From e9fb606121e26d7831bc14f2a74f04e310de4f3e Mon Sep 17 00:00:00 2001
From: Tk-Glitch <ti3nou@gmail.com>
Date: Mon, 26 Oct 2020 22:46:56 +0100
Subject: [PATCH] Move to unified builder as default

The previous building scripts and patches will be moved to legacy
---
 .gitignore                                    |     1 +
 linux-tkg/PKGBUILD => PKGBUILD                |     2 +-
 linux-tkg/README.md => README.md              |     0
 .../customization.cfg => customization.cfg    |     0
 linux-tkg/install.sh => install.sh            |     0
 .../5.10/90-cleanup.hook                      |     0
 .../5.10/cleanup                              |     0
 .../5.10/config.x86_64                        |     0
 .../5.4/90-cleanup.hook                       |     0
 .../5.4/cleanup                               |     0
 .../5.4/config.x86_64                         |     0
 .../5.4/config_hardened.x86_64                |     0
 .../5.7/90-cleanup.hook                       |     0
 .../5.7/cleanup                               |     0
 .../5.7/config.x86_64                         |     0
 .../5.7/config_hardened.x86_64                |     0
 .../5.8/90-cleanup.hook                       |     0
 .../5.8/cleanup                               |     0
 .../5.8/config.x86_64                         |     0
 .../5.9/90-cleanup.hook                       |     0
 .../5.9/cleanup                               |     0
 .../5.9/config.x86_64                         |     0
 .../generic-desktop-profile.cfg               |     0
 .../prepare                                   |     2 +-
 .../ryzen-desktop-profile.cfg                 |     0
 ...sallow-unprivileged-CLONE_NEWUSER-by.patch |     0
 .../5.10/0002-clear-patches.patch             |     0
 .../5.10/0003-glitched-base.patch             |     0
 .../5.10/0003-glitched-cfs.patch              |     0
 .../5.10/0004-5.10-ck1.patch                  |     0
 .../5.10/0004-glitched-muqss.patch            |     0
 .../5.10/0004-glitched-ondemand-muqss.patch   |     0
 .../5.10/0005-glitched-pds.patch              |     0
 .../5.10/0006-add-acs-overrides_iommu.patch   |     0
 .../5.10/0007-v5.10-fsync.patch               |     0
 .../5.10/0009-glitched-bmq.patch              |     0
 .../5.10/0009-glitched-ondemand-bmq.patch     |     0
 .../5.10/0009-prjc_v5.10-r0.patch             |     0
 .../5.10/0011-ZFS-fix.patch                   |     0
 .../5.10/0012-misc-additions.patch            |     0
 ...sallow-unprivileged-CLONE_NEWUSER-by.patch |     0
 .../5.4/0002-clear-patches.patch              |     0
 .../5.4/0003-glitched-base.patch              |     0
 .../5.4/0003-glitched-cfs.patch               |     0
 .../5.4/0004-5.4-ck1.patch                    |     0
 .../5.4/0004-glitched-muqss.patch             |     0
 .../5.4/0004-glitched-ondemand-muqss.patch    |     0
 .../5.4/0005-glitched-ondemand-pds.patch      |     0
 .../5.4/0005-glitched-pds.patch               |     0
 .../5.4/0005-v5.4_undead-pds099o.patch        |     0
 .../5.4/0006-add-acs-overrides_iommu.patch    |     0
 .../5.4/0007-v5.4-fsync.patch                 |     0
 .../5.4/0009-bmq_v5.4-r2.patch                |     0
 .../5.4/0009-glitched-bmq.patch               |     0
 .../5.4/0011-ZFS-fix.patch                    |     0
 .../5.4/0012-linux-hardened.patch             |     0
 ...sallow-unprivileged-CLONE_NEWUSER-by.patch |     0
 .../5.7/0002-clear-patches.patch              |     0
 .../5.7/0003-glitched-base.patch              |     0
 .../5.7/0003-glitched-cfs.patch               |     0
 .../5.7/0004-5.7-ck1.patch                    |     0
 .../5.7/0004-glitched-muqss.patch             |     0
 .../5.7/0004-glitched-ondemand-muqss.patch    |     0
 .../5.7/0005-glitched-ondemand-pds.patch      |     0
 .../5.7/0005-glitched-pds.patch               |     0
 .../5.7/0005-v5.7_undead-pds099o.patch        |     0
 .../5.7/0006-add-acs-overrides_iommu.patch    |     0
 .../5.7/0007-v5.7-fsync.patch                 |     0
 .../5.7/0008-5.7-bcachefs.patch               |     0
 .../5.7/0009-glitched-bmq.patch               |     0
 .../5.7/0009-glitched-ondemand-bmq.patch      |     0
 .../5.7/0009-prjc_v5.7-r3.patch               |     0
 .../5.7/0010-5.7-glitched-cachy.patch         |     0
 .../5.7/0011-ZFS-fix.patch                    |     0
 .../5.7/0012-linux-hardened.patch             |     0
 .../5.7/0012-misc-additions.patch             |     0
 ...sallow-unprivileged-CLONE_NEWUSER-by.patch |     0
 .../5.8/0002-clear-patches.patch              |     0
 .../5.8/0003-glitched-base.patch              |     0
 .../5.8/0003-glitched-cfs.patch               |     0
 .../5.8/0005-glitched-pds.patch               |     0
 .../0005-undead-glitched-ondemand-pds.patch   |     0
 .../5.8/0005-undead-glitched-pds.patch        |     0
 .../5.8/0005-v5.8_undead-pds099o.patch        |     0
 .../5.8/0006-add-acs-overrides_iommu.patch    |     0
 .../5.8/0007-v5.8-fsync.patch                 |     0
 .../5.8/0008-5.8-bcachefs.patch               |     0
 .../5.8/0009-glitched-bmq.patch               |     0
 .../5.8/0009-glitched-ondemand-bmq.patch      |     0
 .../5.8/0009-prjc_v5.8-r3.patch               |     0
 .../5.8/0011-ZFS-fix.patch                    |     0
 .../5.8/0012-misc-additions.patch             |     0
 ...sallow-unprivileged-CLONE_NEWUSER-by.patch |     0
 .../5.9/0002-clear-patches.patch              |     0
 .../5.9/0003-glitched-base.patch              |     0
 .../5.9/0003-glitched-cfs.patch               |     0
 .../5.9/0004-5.9-ck1.patch                    |     0
 .../5.9}/0004-glitched-muqss.patch            |     0
 .../5.9/0004-glitched-ondemand-muqss.patch    |     0
 .../5.9/0005-glitched-pds.patch               |     0
 .../5.9/0006-add-acs-overrides_iommu.patch    |     0
 .../5.9/0007-v5.9-fsync.patch                 |     0
 .../5.9/0008-5.9-bcachefs.patch               |     0
 .../5.9/0009-glitched-bmq.patch               |     0
 .../5.9/0009-glitched-ondemand-bmq.patch      |     0
 .../5.9/0009-prjc_v5.9-r1.patch               |     0
 .../5.9/0011-ZFS-fix.patch                    |     0
 .../5.9/0012-misc-additions.patch             |     0
 .../5.9/0013-remove-debian-deps-cross.patch   |     0
 .../5.9/0004-glitched-muqss.patch             |    78 -
 linux510-rc-tkg/PKGBUILD                      |   284 -
 linux510-rc-tkg/README.md                     |    52 -
 linux510-rc-tkg/customization.cfg             |   189 -
 linux510-rc-tkg/install.sh                    |   283 -
 .../linux510-tkg-config/90-cleanup.hook       |    14 -
 linux510-rc-tkg/linux510-tkg-config/cleanup   |    10 -
 .../linux510-tkg-config/config.x86_64         | 11179 ---
 .../generic-desktop-profile.cfg               |    35 -
 linux510-rc-tkg/linux510-tkg-config/prepare   |   991 -
 .../ryzen-desktop-profile.cfg                 |    38 -
 ...sallow-unprivileged-CLONE_NEWUSER-by.patch |   156 -
 .../0002-clear-patches.patch                  |   360 -
 .../0003-glitched-base.patch                  |   678 -
 .../0003-glitched-cfs.patch                   |    72 -
 .../linux510-tkg-patches/0004-5.10-ck1.patch  | 13369 ---
 .../0004-glitched-ondemand-muqss.patch        |    18 -
 .../0005-glitched-pds.patch                   |    90 -
 .../0006-add-acs-overrides_iommu.patch        |   193 -
 .../0007-v5.10-fsync.patch                    |   597 -
 .../0009-glitched-bmq.patch                   |    90 -
 .../0009-glitched-ondemand-bmq.patch          |    18 -
 .../0009-prjc_v5.10-r0.patch                  |  8809 --
 .../linux510-tkg-patches/0011-ZFS-fix.patch   |    43 -
 .../0012-misc-additions.patch                 |    54 -
 linux54-tkg/PKGBUILD                          |   423 -
 linux54-tkg/README.md                         |    45 -
 linux54-tkg/customization.cfg                 |   172 -
 .../linux54-tkg-config/90-cleanup.hook        |    14 -
 linux54-tkg/linux54-tkg-config/cleanup        |    10 -
 linux54-tkg/linux54-tkg-config/config.x86_64  | 10598 ---
 .../linux54-tkg-config/config_hardened.x86_64 | 10527 ---
 .../generic-desktop-profile.cfg               |    55 -
 linux54-tkg/linux54-tkg-config/prepare        |   733 -
 .../ryzen-desktop-profile.cfg                 |    58 -
 ...sallow-unprivileged-CLONE_NEWUSER-by.patch |   156 -
 .../0002-clear-patches.patch                  |   354 -
 .../0003-glitched-base.patch                  |  4612 -
 .../0003-glitched-cfs.patch                   |    72 -
 .../linux54-tkg-patches/0004-5.4-ck1.patch    | 17684 ----
 .../0004-glitched-muqss.patch                 |    78 -
 .../0004-glitched-ondemand-muqss.patch        |    18 -
 .../0005-glitched-ondemand-pds.patch          |    18 -
 .../0005-glitched-pds.patch                   |   213 -
 .../0005-v5.4_undead-pds099o.patch            |  8387 --
 .../0006-add-acs-overrides_iommu.patch        |   193 -
 .../linux54-tkg-patches/0007-v5.4-fsync.patch |   419 -
 .../0009-bmq_v5.4-r2.patch                    |  7601 --
 .../0009-glitched-bmq.patch                   |   108 -
 .../linux54-tkg-patches/0011-ZFS-fix.patch    |    43 -
 .../0012-linux-hardened.patch                 |  2806 -
 linux57-tkg/PKGBUILD                          |   282 -
 linux57-tkg/README.md                         |    69 -
 linux57-tkg/customization.cfg                 |   188 -
 linux57-tkg/install.sh                        |   283 -
 .../linux57-tkg-config/90-cleanup.hook        |    14 -
 linux57-tkg/linux57-tkg-config/cleanup        |    10 -
 linux57-tkg/linux57-tkg-config/config.x86_64  | 10864 ---
 .../linux57-tkg-config/config_hardened.x86_64 | 10839 ---
 .../generic-desktop-profile.cfg               |    55 -
 linux57-tkg/linux57-tkg-config/prepare        |   983 -
 .../ryzen-desktop-profile.cfg                 |    58 -
 ...sallow-unprivileged-CLONE_NEWUSER-by.patch |   156 -
 .../0002-clear-patches.patch                  |   354 -
 .../0003-glitched-base.patch                  |   545 -
 .../0003-glitched-cfs.patch                   |    72 -
 .../linux57-tkg-patches/0004-5.7-ck1.patch    | 13147 ---
 .../0004-glitched-muqss.patch                 |    78 -
 .../0004-glitched-ondemand-muqss.patch        |    18 -
 .../0005-glitched-ondemand-pds.patch          |    18 -
 .../0005-glitched-pds.patch                   |   166 -
 .../0005-v5.7_undead-pds099o.patch            |  8400 --
 .../0006-add-acs-overrides_iommu.patch        |   193 -
 .../linux57-tkg-patches/0007-v5.7-fsync.patch |   908 -
 .../0008-5.7-bcachefs.patch                   | 71085 ----------------
 .../0009-glitched-bmq.patch                   |    90 -
 .../0009-glitched-ondemand-bmq.patch          |    18 -
 .../0009-prjc_v5.7-r3.patch                   |  7817 --
 .../linux57-tkg-patches/0011-ZFS-fix.patch    |    43 -
 .../0012-linux-hardened.patch                 |  2916 -
 .../0012-misc-additions.patch                 |    55 -
 linux58-tkg/PKGBUILD                          |   285 -
 linux58-tkg/README.md                         |    55 -
 linux58-tkg/customization.cfg                 |   188 -
 linux58-tkg/install.sh                        |   283 -
 .../linux58-tkg-config/90-cleanup.hook        |    14 -
 linux58-tkg/linux58-tkg-config/cleanup        |    10 -
 linux58-tkg/linux58-tkg-config/config.x86_64  | 11020 ---
 .../generic-desktop-profile.cfg               |    35 -
 linux58-tkg/linux58-tkg-config/prepare        |  1015 -
 .../ryzen-desktop-profile.cfg                 |    38 -
 ...sallow-unprivileged-CLONE_NEWUSER-by.patch |   156 -
 .../0002-clear-patches.patch                  |   360 -
 .../0003-glitched-base.patch                  |   708 -
 .../0003-glitched-cfs.patch                   |    72 -
 .../0005-glitched-pds.patch                   |    90 -
 .../0005-undead-glitched-ondemand-pds.patch   |    18 -
 .../0005-undead-glitched-pds.patch            |   166 -
 .../0005-v5.8_undead-pds099o.patch            |  8530 --
 .../0006-add-acs-overrides_iommu.patch        |   193 -
 .../linux58-tkg-patches/0007-v5.8-fsync.patch |   908 -
 .../0008-5.8-bcachefs.patch                   | 70598 ---------------
 .../0009-glitched-bmq.patch                   |    90 -
 .../0009-glitched-ondemand-bmq.patch          |    18 -
 .../0009-prjc_v5.8-r3.patch                   |  8582 --
 .../linux58-tkg-patches/0011-ZFS-fix.patch    |    43 -
 .../0012-misc-additions.patch                 |    54 -
 linux59-tkg/PKGBUILD                          |   285 -
 linux59-tkg/README.md                         |    52 -
 linux59-tkg/customization.cfg                 |   189 -
 linux59-tkg/install.sh                        |   283 -
 .../linux59-tkg-config/90-cleanup.hook        |    14 -
 linux59-tkg/linux59-tkg-config/cleanup        |    10 -
 linux59-tkg/linux59-tkg-config/config.x86_64  | 11076 ---
 .../generic-desktop-profile.cfg               |    35 -
 linux59-tkg/linux59-tkg-config/prepare        |   991 -
 .../ryzen-desktop-profile.cfg                 |    38 -
 ...sallow-unprivileged-CLONE_NEWUSER-by.patch |   156 -
 .../0002-clear-patches.patch                  |   360 -
 .../0003-glitched-base.patch                  |   708 -
 .../0003-glitched-cfs.patch                   |    72 -
 .../linux59-tkg-patches/0004-5.9-ck1.patch    | 13384 ---
 .../0004-glitched-muqss.patch                 |    90 -
 .../0004-glitched-ondemand-muqss.patch        |    18 -
 .../0005-glitched-pds.patch                   |    90 -
 .../0006-add-acs-overrides_iommu.patch        |   193 -
 .../linux59-tkg-patches/0007-v5.9-fsync.patch |   597 -
 .../0008-5.9-bcachefs.patch                   | 70821 ---------------
 .../0009-glitched-bmq.patch                   |    90 -
 .../0009-glitched-ondemand-bmq.patch          |    18 -
 .../0009-prjc_v5.9-r0.patch                   |  8809 --
 .../linux59-tkg-patches/0011-ZFS-fix.patch    |    43 -
 .../0012-misc-additions.patch                 |    54 -
 242 files changed, 3 insertions(+), 444828 deletions(-)
 rename linux-tkg/PKGBUILD => PKGBUILD (99%)
 rename linux-tkg/README.md => README.md (100%)
 rename linux-tkg/customization.cfg => customization.cfg (100%)
 rename linux-tkg/install.sh => install.sh (100%)
 rename {linux-tkg/linux-tkg-config => linux-tkg-config}/5.10/90-cleanup.hook (100%)
 rename {linux-tkg/linux-tkg-config => linux-tkg-config}/5.10/cleanup (100%)
 rename {linux-tkg/linux-tkg-config => linux-tkg-config}/5.10/config.x86_64 (100%)
 rename {linux-tkg/linux-tkg-config => linux-tkg-config}/5.4/90-cleanup.hook (100%)
 rename {linux-tkg/linux-tkg-config => linux-tkg-config}/5.4/cleanup (100%)
 rename {linux-tkg/linux-tkg-config => linux-tkg-config}/5.4/config.x86_64 (100%)
 rename {linux-tkg/linux-tkg-config => linux-tkg-config}/5.4/config_hardened.x86_64 (100%)
 rename {linux-tkg/linux-tkg-config => linux-tkg-config}/5.7/90-cleanup.hook (100%)
 rename {linux-tkg/linux-tkg-config => linux-tkg-config}/5.7/cleanup (100%)
 rename {linux-tkg/linux-tkg-config => linux-tkg-config}/5.7/config.x86_64 (100%)
 rename {linux-tkg/linux-tkg-config => linux-tkg-config}/5.7/config_hardened.x86_64 (100%)
 rename {linux-tkg/linux-tkg-config => linux-tkg-config}/5.8/90-cleanup.hook (100%)
 rename {linux-tkg/linux-tkg-config => linux-tkg-config}/5.8/cleanup (100%)
 rename {linux-tkg/linux-tkg-config => linux-tkg-config}/5.8/config.x86_64 (100%)
 rename {linux-tkg/linux-tkg-config => linux-tkg-config}/5.9/90-cleanup.hook (100%)
 rename {linux-tkg/linux-tkg-config => linux-tkg-config}/5.9/cleanup (100%)
 rename {linux-tkg/linux-tkg-config => linux-tkg-config}/5.9/config.x86_64 (100%)
 rename {linux-tkg/linux-tkg-config => linux-tkg-config}/generic-desktop-profile.cfg (100%)
 rename {linux-tkg/linux-tkg-config => linux-tkg-config}/prepare (99%)
 rename {linux-tkg/linux-tkg-config => linux-tkg-config}/ryzen-desktop-profile.cfg (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.10/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.10/0002-clear-patches.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.10/0003-glitched-base.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.10/0003-glitched-cfs.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.10/0004-5.10-ck1.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.10/0004-glitched-muqss.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.10/0004-glitched-ondemand-muqss.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.10/0005-glitched-pds.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.10/0006-add-acs-overrides_iommu.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.10/0007-v5.10-fsync.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.10/0009-glitched-bmq.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.10/0009-glitched-ondemand-bmq.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.10/0009-prjc_v5.10-r0.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.10/0011-ZFS-fix.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.10/0012-misc-additions.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.4/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.4/0002-clear-patches.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.4/0003-glitched-base.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.4/0003-glitched-cfs.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.4/0004-5.4-ck1.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.4/0004-glitched-muqss.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.4/0004-glitched-ondemand-muqss.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.4/0005-glitched-ondemand-pds.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.4/0005-glitched-pds.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.4/0005-v5.4_undead-pds099o.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.4/0006-add-acs-overrides_iommu.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.4/0007-v5.4-fsync.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.4/0009-bmq_v5.4-r2.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.4/0009-glitched-bmq.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.4/0011-ZFS-fix.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.4/0012-linux-hardened.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.7/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.7/0002-clear-patches.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.7/0003-glitched-base.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.7/0003-glitched-cfs.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.7/0004-5.7-ck1.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.7/0004-glitched-muqss.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.7/0004-glitched-ondemand-muqss.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.7/0005-glitched-ondemand-pds.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.7/0005-glitched-pds.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.7/0005-v5.7_undead-pds099o.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.7/0006-add-acs-overrides_iommu.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.7/0007-v5.7-fsync.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.7/0008-5.7-bcachefs.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.7/0009-glitched-bmq.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.7/0009-glitched-ondemand-bmq.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.7/0009-prjc_v5.7-r3.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.7/0010-5.7-glitched-cachy.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.7/0011-ZFS-fix.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.7/0012-linux-hardened.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.7/0012-misc-additions.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.8/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.8/0002-clear-patches.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.8/0003-glitched-base.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.8/0003-glitched-cfs.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.8/0005-glitched-pds.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.8/0005-undead-glitched-ondemand-pds.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.8/0005-undead-glitched-pds.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.8/0005-v5.8_undead-pds099o.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.8/0006-add-acs-overrides_iommu.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.8/0007-v5.8-fsync.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.8/0008-5.8-bcachefs.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.8/0009-glitched-bmq.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.8/0009-glitched-ondemand-bmq.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.8/0009-prjc_v5.8-r3.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.8/0011-ZFS-fix.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.8/0012-misc-additions.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.9/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.9/0002-clear-patches.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.9/0003-glitched-base.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.9/0003-glitched-cfs.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.9/0004-5.9-ck1.patch (100%)
 rename {linux510-rc-tkg/linux510-tkg-patches => linux-tkg-patches/5.9}/0004-glitched-muqss.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.9/0004-glitched-ondemand-muqss.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.9/0005-glitched-pds.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.9/0006-add-acs-overrides_iommu.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.9/0007-v5.9-fsync.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.9/0008-5.9-bcachefs.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.9/0009-glitched-bmq.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.9/0009-glitched-ondemand-bmq.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.9/0009-prjc_v5.9-r1.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.9/0011-ZFS-fix.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.9/0012-misc-additions.patch (100%)
 rename {linux-tkg/linux-tkg-patches => linux-tkg-patches}/5.9/0013-remove-debian-deps-cross.patch (100%)
 delete mode 100644 linux-tkg/linux-tkg-patches/5.9/0004-glitched-muqss.patch
 delete mode 100644 linux510-rc-tkg/PKGBUILD
 delete mode 100644 linux510-rc-tkg/README.md
 delete mode 100644 linux510-rc-tkg/customization.cfg
 delete mode 100755 linux510-rc-tkg/install.sh
 delete mode 100644 linux510-rc-tkg/linux510-tkg-config/90-cleanup.hook
 delete mode 100755 linux510-rc-tkg/linux510-tkg-config/cleanup
 delete mode 100644 linux510-rc-tkg/linux510-tkg-config/config.x86_64
 delete mode 100644 linux510-rc-tkg/linux510-tkg-config/generic-desktop-profile.cfg
 delete mode 100644 linux510-rc-tkg/linux510-tkg-config/prepare
 delete mode 100644 linux510-rc-tkg/linux510-tkg-config/ryzen-desktop-profile.cfg
 delete mode 100644 linux510-rc-tkg/linux510-tkg-patches/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch
 delete mode 100644 linux510-rc-tkg/linux510-tkg-patches/0002-clear-patches.patch
 delete mode 100644 linux510-rc-tkg/linux510-tkg-patches/0003-glitched-base.patch
 delete mode 100644 linux510-rc-tkg/linux510-tkg-patches/0003-glitched-cfs.patch
 delete mode 100644 linux510-rc-tkg/linux510-tkg-patches/0004-5.10-ck1.patch
 delete mode 100644 linux510-rc-tkg/linux510-tkg-patches/0004-glitched-ondemand-muqss.patch
 delete mode 100644 linux510-rc-tkg/linux510-tkg-patches/0005-glitched-pds.patch
 delete mode 100644 linux510-rc-tkg/linux510-tkg-patches/0006-add-acs-overrides_iommu.patch
 delete mode 100644 linux510-rc-tkg/linux510-tkg-patches/0007-v5.10-fsync.patch
 delete mode 100644 linux510-rc-tkg/linux510-tkg-patches/0009-glitched-bmq.patch
 delete mode 100644 linux510-rc-tkg/linux510-tkg-patches/0009-glitched-ondemand-bmq.patch
 delete mode 100644 linux510-rc-tkg/linux510-tkg-patches/0009-prjc_v5.10-r0.patch
 delete mode 100644 linux510-rc-tkg/linux510-tkg-patches/0011-ZFS-fix.patch
 delete mode 100644 linux510-rc-tkg/linux510-tkg-patches/0012-misc-additions.patch
 delete mode 100644 linux54-tkg/PKGBUILD
 delete mode 100644 linux54-tkg/README.md
 delete mode 100644 linux54-tkg/customization.cfg
 delete mode 100644 linux54-tkg/linux54-tkg-config/90-cleanup.hook
 delete mode 100755 linux54-tkg/linux54-tkg-config/cleanup
 delete mode 100644 linux54-tkg/linux54-tkg-config/config.x86_64
 delete mode 100644 linux54-tkg/linux54-tkg-config/config_hardened.x86_64
 delete mode 100644 linux54-tkg/linux54-tkg-config/generic-desktop-profile.cfg
 delete mode 100644 linux54-tkg/linux54-tkg-config/prepare
 delete mode 100644 linux54-tkg/linux54-tkg-config/ryzen-desktop-profile.cfg
 delete mode 100644 linux54-tkg/linux54-tkg-patches/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch
 delete mode 100644 linux54-tkg/linux54-tkg-patches/0002-clear-patches.patch
 delete mode 100644 linux54-tkg/linux54-tkg-patches/0003-glitched-base.patch
 delete mode 100644 linux54-tkg/linux54-tkg-patches/0003-glitched-cfs.patch
 delete mode 100644 linux54-tkg/linux54-tkg-patches/0004-5.4-ck1.patch
 delete mode 100644 linux54-tkg/linux54-tkg-patches/0004-glitched-muqss.patch
 delete mode 100644 linux54-tkg/linux54-tkg-patches/0004-glitched-ondemand-muqss.patch
 delete mode 100644 linux54-tkg/linux54-tkg-patches/0005-glitched-ondemand-pds.patch
 delete mode 100644 linux54-tkg/linux54-tkg-patches/0005-glitched-pds.patch
 delete mode 100644 linux54-tkg/linux54-tkg-patches/0005-v5.4_undead-pds099o.patch
 delete mode 100644 linux54-tkg/linux54-tkg-patches/0006-add-acs-overrides_iommu.patch
 delete mode 100644 linux54-tkg/linux54-tkg-patches/0007-v5.4-fsync.patch
 delete mode 100644 linux54-tkg/linux54-tkg-patches/0009-bmq_v5.4-r2.patch
 delete mode 100644 linux54-tkg/linux54-tkg-patches/0009-glitched-bmq.patch
 delete mode 100644 linux54-tkg/linux54-tkg-patches/0011-ZFS-fix.patch
 delete mode 100644 linux54-tkg/linux54-tkg-patches/0012-linux-hardened.patch
 delete mode 100644 linux57-tkg/PKGBUILD
 delete mode 100644 linux57-tkg/README.md
 delete mode 100644 linux57-tkg/customization.cfg
 delete mode 100755 linux57-tkg/install.sh
 delete mode 100644 linux57-tkg/linux57-tkg-config/90-cleanup.hook
 delete mode 100755 linux57-tkg/linux57-tkg-config/cleanup
 delete mode 100644 linux57-tkg/linux57-tkg-config/config.x86_64
 delete mode 100644 linux57-tkg/linux57-tkg-config/config_hardened.x86_64
 delete mode 100644 linux57-tkg/linux57-tkg-config/generic-desktop-profile.cfg
 delete mode 100644 linux57-tkg/linux57-tkg-config/prepare
 delete mode 100644 linux57-tkg/linux57-tkg-config/ryzen-desktop-profile.cfg
 delete mode 100644 linux57-tkg/linux57-tkg-patches/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch
 delete mode 100644 linux57-tkg/linux57-tkg-patches/0002-clear-patches.patch
 delete mode 100644 linux57-tkg/linux57-tkg-patches/0003-glitched-base.patch
 delete mode 100644 linux57-tkg/linux57-tkg-patches/0003-glitched-cfs.patch
 delete mode 100644 linux57-tkg/linux57-tkg-patches/0004-5.7-ck1.patch
 delete mode 100644 linux57-tkg/linux57-tkg-patches/0004-glitched-muqss.patch
 delete mode 100644 linux57-tkg/linux57-tkg-patches/0004-glitched-ondemand-muqss.patch
 delete mode 100644 linux57-tkg/linux57-tkg-patches/0005-glitched-ondemand-pds.patch
 delete mode 100644 linux57-tkg/linux57-tkg-patches/0005-glitched-pds.patch
 delete mode 100644 linux57-tkg/linux57-tkg-patches/0005-v5.7_undead-pds099o.patch
 delete mode 100644 linux57-tkg/linux57-tkg-patches/0006-add-acs-overrides_iommu.patch
 delete mode 100644 linux57-tkg/linux57-tkg-patches/0007-v5.7-fsync.patch
 delete mode 100644 linux57-tkg/linux57-tkg-patches/0008-5.7-bcachefs.patch
 delete mode 100644 linux57-tkg/linux57-tkg-patches/0009-glitched-bmq.patch
 delete mode 100644 linux57-tkg/linux57-tkg-patches/0009-glitched-ondemand-bmq.patch
 delete mode 100644 linux57-tkg/linux57-tkg-patches/0009-prjc_v5.7-r3.patch
 delete mode 100644 linux57-tkg/linux57-tkg-patches/0011-ZFS-fix.patch
 delete mode 100644 linux57-tkg/linux57-tkg-patches/0012-linux-hardened.patch
 delete mode 100644 linux57-tkg/linux57-tkg-patches/0012-misc-additions.patch
 delete mode 100644 linux58-tkg/PKGBUILD
 delete mode 100644 linux58-tkg/README.md
 delete mode 100644 linux58-tkg/customization.cfg
 delete mode 100755 linux58-tkg/install.sh
 delete mode 100644 linux58-tkg/linux58-tkg-config/90-cleanup.hook
 delete mode 100755 linux58-tkg/linux58-tkg-config/cleanup
 delete mode 100644 linux58-tkg/linux58-tkg-config/config.x86_64
 delete mode 100644 linux58-tkg/linux58-tkg-config/generic-desktop-profile.cfg
 delete mode 100644 linux58-tkg/linux58-tkg-config/prepare
 delete mode 100644 linux58-tkg/linux58-tkg-config/ryzen-desktop-profile.cfg
 delete mode 100644 linux58-tkg/linux58-tkg-patches/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch
 delete mode 100644 linux58-tkg/linux58-tkg-patches/0002-clear-patches.patch
 delete mode 100644 linux58-tkg/linux58-tkg-patches/0003-glitched-base.patch
 delete mode 100644 linux58-tkg/linux58-tkg-patches/0003-glitched-cfs.patch
 delete mode 100644 linux58-tkg/linux58-tkg-patches/0005-glitched-pds.patch
 delete mode 100644 linux58-tkg/linux58-tkg-patches/0005-undead-glitched-ondemand-pds.patch
 delete mode 100644 linux58-tkg/linux58-tkg-patches/0005-undead-glitched-pds.patch
 delete mode 100644 linux58-tkg/linux58-tkg-patches/0005-v5.8_undead-pds099o.patch
 delete mode 100644 linux58-tkg/linux58-tkg-patches/0006-add-acs-overrides_iommu.patch
 delete mode 100644 linux58-tkg/linux58-tkg-patches/0007-v5.8-fsync.patch
 delete mode 100644 linux58-tkg/linux58-tkg-patches/0008-5.8-bcachefs.patch
 delete mode 100644 linux58-tkg/linux58-tkg-patches/0009-glitched-bmq.patch
 delete mode 100644 linux58-tkg/linux58-tkg-patches/0009-glitched-ondemand-bmq.patch
 delete mode 100644 linux58-tkg/linux58-tkg-patches/0009-prjc_v5.8-r3.patch
 delete mode 100644 linux58-tkg/linux58-tkg-patches/0011-ZFS-fix.patch
 delete mode 100644 linux58-tkg/linux58-tkg-patches/0012-misc-additions.patch
 delete mode 100644 linux59-tkg/PKGBUILD
 delete mode 100644 linux59-tkg/README.md
 delete mode 100644 linux59-tkg/customization.cfg
 delete mode 100755 linux59-tkg/install.sh
 delete mode 100644 linux59-tkg/linux59-tkg-config/90-cleanup.hook
 delete mode 100755 linux59-tkg/linux59-tkg-config/cleanup
 delete mode 100644 linux59-tkg/linux59-tkg-config/config.x86_64
 delete mode 100644 linux59-tkg/linux59-tkg-config/generic-desktop-profile.cfg
 delete mode 100644 linux59-tkg/linux59-tkg-config/prepare
 delete mode 100644 linux59-tkg/linux59-tkg-config/ryzen-desktop-profile.cfg
 delete mode 100644 linux59-tkg/linux59-tkg-patches/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch
 delete mode 100644 linux59-tkg/linux59-tkg-patches/0002-clear-patches.patch
 delete mode 100644 linux59-tkg/linux59-tkg-patches/0003-glitched-base.patch
 delete mode 100644 linux59-tkg/linux59-tkg-patches/0003-glitched-cfs.patch
 delete mode 100644 linux59-tkg/linux59-tkg-patches/0004-5.9-ck1.patch
 delete mode 100644 linux59-tkg/linux59-tkg-patches/0004-glitched-muqss.patch
 delete mode 100644 linux59-tkg/linux59-tkg-patches/0004-glitched-ondemand-muqss.patch
 delete mode 100644 linux59-tkg/linux59-tkg-patches/0005-glitched-pds.patch
 delete mode 100644 linux59-tkg/linux59-tkg-patches/0006-add-acs-overrides_iommu.patch
 delete mode 100644 linux59-tkg/linux59-tkg-patches/0007-v5.9-fsync.patch
 delete mode 100644 linux59-tkg/linux59-tkg-patches/0008-5.9-bcachefs.patch
 delete mode 100644 linux59-tkg/linux59-tkg-patches/0009-glitched-bmq.patch
 delete mode 100644 linux59-tkg/linux59-tkg-patches/0009-glitched-ondemand-bmq.patch
 delete mode 100644 linux59-tkg/linux59-tkg-patches/0009-prjc_v5.9-r0.patch
 delete mode 100644 linux59-tkg/linux59-tkg-patches/0011-ZFS-fix.patch
 delete mode 100644 linux59-tkg/linux59-tkg-patches/0012-misc-additions.patch

diff --git a/.gitignore b/.gitignore
index f9fd432..d37f231 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,5 +14,6 @@
 *.db
 *.files
 */src/
+src/*
 */pkg/
 */linux-5*/
diff --git a/linux-tkg/PKGBUILD b/PKGBUILD
similarity index 99%
rename from linux-tkg/PKGBUILD
rename to PKGBUILD
index f56fcde..7b38182 100644
--- a/linux-tkg/PKGBUILD
+++ b/PKGBUILD
@@ -271,7 +271,7 @@ case $_basever in
             'b9ebe0ae69bc2b2091d6bfcf6c7875a87ea7969fcfa4e306c48d47a60f9ef4d6'
             '7058e57fd68367b029adc77f2a82928f1433daaf02c8c279cb2d13556c8804d7'
             'c605f638d74c61861ebdc36ebd4cb8b6475eae2f6273e1ccb2bbb3e10a2ec3fe'
-            'bc69d6e5ee8172b0242c8fa72d13cfe2b8d2b6601468836908a7dfe8b78a3bbb'
+            '2bbbac963b6ca44ef3f8a71ec7c5cad7d66df860869a73059087ee236775970a'
             '45a9ab99215ab3313be6e66e073d29154aac55bc58975a4df2dad116c918d27c'
             'fca63d15ca4502aebd73e76d7499b243d2c03db71ff5ab0bf5cf268b2e576320'
             '19661ec0d39f9663452b34433214c755179894528bf73a42f6ba52ccf572832a'
diff --git a/linux-tkg/README.md b/README.md
similarity index 100%
rename from linux-tkg/README.md
rename to README.md
diff --git a/linux-tkg/customization.cfg b/customization.cfg
similarity index 100%
rename from linux-tkg/customization.cfg
rename to customization.cfg
diff --git a/linux-tkg/install.sh b/install.sh
similarity index 100%
rename from linux-tkg/install.sh
rename to install.sh
diff --git a/linux-tkg/linux-tkg-config/5.10/90-cleanup.hook b/linux-tkg-config/5.10/90-cleanup.hook
similarity index 100%
rename from linux-tkg/linux-tkg-config/5.10/90-cleanup.hook
rename to linux-tkg-config/5.10/90-cleanup.hook
diff --git a/linux-tkg/linux-tkg-config/5.10/cleanup b/linux-tkg-config/5.10/cleanup
similarity index 100%
rename from linux-tkg/linux-tkg-config/5.10/cleanup
rename to linux-tkg-config/5.10/cleanup
diff --git a/linux-tkg/linux-tkg-config/5.10/config.x86_64 b/linux-tkg-config/5.10/config.x86_64
similarity index 100%
rename from linux-tkg/linux-tkg-config/5.10/config.x86_64
rename to linux-tkg-config/5.10/config.x86_64
diff --git a/linux-tkg/linux-tkg-config/5.4/90-cleanup.hook b/linux-tkg-config/5.4/90-cleanup.hook
similarity index 100%
rename from linux-tkg/linux-tkg-config/5.4/90-cleanup.hook
rename to linux-tkg-config/5.4/90-cleanup.hook
diff --git a/linux-tkg/linux-tkg-config/5.4/cleanup b/linux-tkg-config/5.4/cleanup
similarity index 100%
rename from linux-tkg/linux-tkg-config/5.4/cleanup
rename to linux-tkg-config/5.4/cleanup
diff --git a/linux-tkg/linux-tkg-config/5.4/config.x86_64 b/linux-tkg-config/5.4/config.x86_64
similarity index 100%
rename from linux-tkg/linux-tkg-config/5.4/config.x86_64
rename to linux-tkg-config/5.4/config.x86_64
diff --git a/linux-tkg/linux-tkg-config/5.4/config_hardened.x86_64 b/linux-tkg-config/5.4/config_hardened.x86_64
similarity index 100%
rename from linux-tkg/linux-tkg-config/5.4/config_hardened.x86_64
rename to linux-tkg-config/5.4/config_hardened.x86_64
diff --git a/linux-tkg/linux-tkg-config/5.7/90-cleanup.hook b/linux-tkg-config/5.7/90-cleanup.hook
similarity index 100%
rename from linux-tkg/linux-tkg-config/5.7/90-cleanup.hook
rename to linux-tkg-config/5.7/90-cleanup.hook
diff --git a/linux-tkg/linux-tkg-config/5.7/cleanup b/linux-tkg-config/5.7/cleanup
similarity index 100%
rename from linux-tkg/linux-tkg-config/5.7/cleanup
rename to linux-tkg-config/5.7/cleanup
diff --git a/linux-tkg/linux-tkg-config/5.7/config.x86_64 b/linux-tkg-config/5.7/config.x86_64
similarity index 100%
rename from linux-tkg/linux-tkg-config/5.7/config.x86_64
rename to linux-tkg-config/5.7/config.x86_64
diff --git a/linux-tkg/linux-tkg-config/5.7/config_hardened.x86_64 b/linux-tkg-config/5.7/config_hardened.x86_64
similarity index 100%
rename from linux-tkg/linux-tkg-config/5.7/config_hardened.x86_64
rename to linux-tkg-config/5.7/config_hardened.x86_64
diff --git a/linux-tkg/linux-tkg-config/5.8/90-cleanup.hook b/linux-tkg-config/5.8/90-cleanup.hook
similarity index 100%
rename from linux-tkg/linux-tkg-config/5.8/90-cleanup.hook
rename to linux-tkg-config/5.8/90-cleanup.hook
diff --git a/linux-tkg/linux-tkg-config/5.8/cleanup b/linux-tkg-config/5.8/cleanup
similarity index 100%
rename from linux-tkg/linux-tkg-config/5.8/cleanup
rename to linux-tkg-config/5.8/cleanup
diff --git a/linux-tkg/linux-tkg-config/5.8/config.x86_64 b/linux-tkg-config/5.8/config.x86_64
similarity index 100%
rename from linux-tkg/linux-tkg-config/5.8/config.x86_64
rename to linux-tkg-config/5.8/config.x86_64
diff --git a/linux-tkg/linux-tkg-config/5.9/90-cleanup.hook b/linux-tkg-config/5.9/90-cleanup.hook
similarity index 100%
rename from linux-tkg/linux-tkg-config/5.9/90-cleanup.hook
rename to linux-tkg-config/5.9/90-cleanup.hook
diff --git a/linux-tkg/linux-tkg-config/5.9/cleanup b/linux-tkg-config/5.9/cleanup
similarity index 100%
rename from linux-tkg/linux-tkg-config/5.9/cleanup
rename to linux-tkg-config/5.9/cleanup
diff --git a/linux-tkg/linux-tkg-config/5.9/config.x86_64 b/linux-tkg-config/5.9/config.x86_64
similarity index 100%
rename from linux-tkg/linux-tkg-config/5.9/config.x86_64
rename to linux-tkg-config/5.9/config.x86_64
diff --git a/linux-tkg/linux-tkg-config/generic-desktop-profile.cfg b/linux-tkg-config/generic-desktop-profile.cfg
similarity index 100%
rename from linux-tkg/linux-tkg-config/generic-desktop-profile.cfg
rename to linux-tkg-config/generic-desktop-profile.cfg
diff --git a/linux-tkg/linux-tkg-config/prepare b/linux-tkg-config/prepare
similarity index 99%
rename from linux-tkg/linux-tkg-config/prepare
rename to linux-tkg-config/prepare
index 6e8bace..e6d2ef2 100644
--- a/linux-tkg/linux-tkg-config/prepare
+++ b/linux-tkg-config/prepare
@@ -132,7 +132,7 @@ _tkg_initscript() {
         echo "_cpusched=\"MuQSS\"" > "${_path}"/cpuschedset
       elif [ "$_basever" = "58" ]; then
         echo "_cpusched=\"pds\"" > "${_path}"/cpuschedset
-      else [ "$_basever" = "59" ]; then
+      else
         echo "_cpusched=\"bmq\"" > "${_path}"/cpuschedset
       fi
     elif [ "$CONDITION" = "3" ]; then
diff --git a/linux-tkg/linux-tkg-config/ryzen-desktop-profile.cfg b/linux-tkg-config/ryzen-desktop-profile.cfg
similarity index 100%
rename from linux-tkg/linux-tkg-config/ryzen-desktop-profile.cfg
rename to linux-tkg-config/ryzen-desktop-profile.cfg
diff --git a/linux-tkg/linux-tkg-patches/5.10/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch b/linux-tkg-patches/5.10/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.10/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch
rename to linux-tkg-patches/5.10/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch
diff --git a/linux-tkg/linux-tkg-patches/5.10/0002-clear-patches.patch b/linux-tkg-patches/5.10/0002-clear-patches.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.10/0002-clear-patches.patch
rename to linux-tkg-patches/5.10/0002-clear-patches.patch
diff --git a/linux-tkg/linux-tkg-patches/5.10/0003-glitched-base.patch b/linux-tkg-patches/5.10/0003-glitched-base.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.10/0003-glitched-base.patch
rename to linux-tkg-patches/5.10/0003-glitched-base.patch
diff --git a/linux-tkg/linux-tkg-patches/5.10/0003-glitched-cfs.patch b/linux-tkg-patches/5.10/0003-glitched-cfs.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.10/0003-glitched-cfs.patch
rename to linux-tkg-patches/5.10/0003-glitched-cfs.patch
diff --git a/linux-tkg/linux-tkg-patches/5.10/0004-5.10-ck1.patch b/linux-tkg-patches/5.10/0004-5.10-ck1.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.10/0004-5.10-ck1.patch
rename to linux-tkg-patches/5.10/0004-5.10-ck1.patch
diff --git a/linux-tkg/linux-tkg-patches/5.10/0004-glitched-muqss.patch b/linux-tkg-patches/5.10/0004-glitched-muqss.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.10/0004-glitched-muqss.patch
rename to linux-tkg-patches/5.10/0004-glitched-muqss.patch
diff --git a/linux-tkg/linux-tkg-patches/5.10/0004-glitched-ondemand-muqss.patch b/linux-tkg-patches/5.10/0004-glitched-ondemand-muqss.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.10/0004-glitched-ondemand-muqss.patch
rename to linux-tkg-patches/5.10/0004-glitched-ondemand-muqss.patch
diff --git a/linux-tkg/linux-tkg-patches/5.10/0005-glitched-pds.patch b/linux-tkg-patches/5.10/0005-glitched-pds.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.10/0005-glitched-pds.patch
rename to linux-tkg-patches/5.10/0005-glitched-pds.patch
diff --git a/linux-tkg/linux-tkg-patches/5.10/0006-add-acs-overrides_iommu.patch b/linux-tkg-patches/5.10/0006-add-acs-overrides_iommu.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.10/0006-add-acs-overrides_iommu.patch
rename to linux-tkg-patches/5.10/0006-add-acs-overrides_iommu.patch
diff --git a/linux-tkg/linux-tkg-patches/5.10/0007-v5.10-fsync.patch b/linux-tkg-patches/5.10/0007-v5.10-fsync.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.10/0007-v5.10-fsync.patch
rename to linux-tkg-patches/5.10/0007-v5.10-fsync.patch
diff --git a/linux-tkg/linux-tkg-patches/5.10/0009-glitched-bmq.patch b/linux-tkg-patches/5.10/0009-glitched-bmq.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.10/0009-glitched-bmq.patch
rename to linux-tkg-patches/5.10/0009-glitched-bmq.patch
diff --git a/linux-tkg/linux-tkg-patches/5.10/0009-glitched-ondemand-bmq.patch b/linux-tkg-patches/5.10/0009-glitched-ondemand-bmq.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.10/0009-glitched-ondemand-bmq.patch
rename to linux-tkg-patches/5.10/0009-glitched-ondemand-bmq.patch
diff --git a/linux-tkg/linux-tkg-patches/5.10/0009-prjc_v5.10-r0.patch b/linux-tkg-patches/5.10/0009-prjc_v5.10-r0.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.10/0009-prjc_v5.10-r0.patch
rename to linux-tkg-patches/5.10/0009-prjc_v5.10-r0.patch
diff --git a/linux-tkg/linux-tkg-patches/5.10/0011-ZFS-fix.patch b/linux-tkg-patches/5.10/0011-ZFS-fix.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.10/0011-ZFS-fix.patch
rename to linux-tkg-patches/5.10/0011-ZFS-fix.patch
diff --git a/linux-tkg/linux-tkg-patches/5.10/0012-misc-additions.patch b/linux-tkg-patches/5.10/0012-misc-additions.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.10/0012-misc-additions.patch
rename to linux-tkg-patches/5.10/0012-misc-additions.patch
diff --git a/linux-tkg/linux-tkg-patches/5.4/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch b/linux-tkg-patches/5.4/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.4/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch
rename to linux-tkg-patches/5.4/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch
diff --git a/linux-tkg/linux-tkg-patches/5.4/0002-clear-patches.patch b/linux-tkg-patches/5.4/0002-clear-patches.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.4/0002-clear-patches.patch
rename to linux-tkg-patches/5.4/0002-clear-patches.patch
diff --git a/linux-tkg/linux-tkg-patches/5.4/0003-glitched-base.patch b/linux-tkg-patches/5.4/0003-glitched-base.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.4/0003-glitched-base.patch
rename to linux-tkg-patches/5.4/0003-glitched-base.patch
diff --git a/linux-tkg/linux-tkg-patches/5.4/0003-glitched-cfs.patch b/linux-tkg-patches/5.4/0003-glitched-cfs.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.4/0003-glitched-cfs.patch
rename to linux-tkg-patches/5.4/0003-glitched-cfs.patch
diff --git a/linux-tkg/linux-tkg-patches/5.4/0004-5.4-ck1.patch b/linux-tkg-patches/5.4/0004-5.4-ck1.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.4/0004-5.4-ck1.patch
rename to linux-tkg-patches/5.4/0004-5.4-ck1.patch
diff --git a/linux-tkg/linux-tkg-patches/5.4/0004-glitched-muqss.patch b/linux-tkg-patches/5.4/0004-glitched-muqss.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.4/0004-glitched-muqss.patch
rename to linux-tkg-patches/5.4/0004-glitched-muqss.patch
diff --git a/linux-tkg/linux-tkg-patches/5.4/0004-glitched-ondemand-muqss.patch b/linux-tkg-patches/5.4/0004-glitched-ondemand-muqss.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.4/0004-glitched-ondemand-muqss.patch
rename to linux-tkg-patches/5.4/0004-glitched-ondemand-muqss.patch
diff --git a/linux-tkg/linux-tkg-patches/5.4/0005-glitched-ondemand-pds.patch b/linux-tkg-patches/5.4/0005-glitched-ondemand-pds.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.4/0005-glitched-ondemand-pds.patch
rename to linux-tkg-patches/5.4/0005-glitched-ondemand-pds.patch
diff --git a/linux-tkg/linux-tkg-patches/5.4/0005-glitched-pds.patch b/linux-tkg-patches/5.4/0005-glitched-pds.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.4/0005-glitched-pds.patch
rename to linux-tkg-patches/5.4/0005-glitched-pds.patch
diff --git a/linux-tkg/linux-tkg-patches/5.4/0005-v5.4_undead-pds099o.patch b/linux-tkg-patches/5.4/0005-v5.4_undead-pds099o.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.4/0005-v5.4_undead-pds099o.patch
rename to linux-tkg-patches/5.4/0005-v5.4_undead-pds099o.patch
diff --git a/linux-tkg/linux-tkg-patches/5.4/0006-add-acs-overrides_iommu.patch b/linux-tkg-patches/5.4/0006-add-acs-overrides_iommu.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.4/0006-add-acs-overrides_iommu.patch
rename to linux-tkg-patches/5.4/0006-add-acs-overrides_iommu.patch
diff --git a/linux-tkg/linux-tkg-patches/5.4/0007-v5.4-fsync.patch b/linux-tkg-patches/5.4/0007-v5.4-fsync.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.4/0007-v5.4-fsync.patch
rename to linux-tkg-patches/5.4/0007-v5.4-fsync.patch
diff --git a/linux-tkg/linux-tkg-patches/5.4/0009-bmq_v5.4-r2.patch b/linux-tkg-patches/5.4/0009-bmq_v5.4-r2.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.4/0009-bmq_v5.4-r2.patch
rename to linux-tkg-patches/5.4/0009-bmq_v5.4-r2.patch
diff --git a/linux-tkg/linux-tkg-patches/5.4/0009-glitched-bmq.patch b/linux-tkg-patches/5.4/0009-glitched-bmq.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.4/0009-glitched-bmq.patch
rename to linux-tkg-patches/5.4/0009-glitched-bmq.patch
diff --git a/linux-tkg/linux-tkg-patches/5.4/0011-ZFS-fix.patch b/linux-tkg-patches/5.4/0011-ZFS-fix.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.4/0011-ZFS-fix.patch
rename to linux-tkg-patches/5.4/0011-ZFS-fix.patch
diff --git a/linux-tkg/linux-tkg-patches/5.4/0012-linux-hardened.patch b/linux-tkg-patches/5.4/0012-linux-hardened.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.4/0012-linux-hardened.patch
rename to linux-tkg-patches/5.4/0012-linux-hardened.patch
diff --git a/linux-tkg/linux-tkg-patches/5.7/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch b/linux-tkg-patches/5.7/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.7/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch
rename to linux-tkg-patches/5.7/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch
diff --git a/linux-tkg/linux-tkg-patches/5.7/0002-clear-patches.patch b/linux-tkg-patches/5.7/0002-clear-patches.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.7/0002-clear-patches.patch
rename to linux-tkg-patches/5.7/0002-clear-patches.patch
diff --git a/linux-tkg/linux-tkg-patches/5.7/0003-glitched-base.patch b/linux-tkg-patches/5.7/0003-glitched-base.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.7/0003-glitched-base.patch
rename to linux-tkg-patches/5.7/0003-glitched-base.patch
diff --git a/linux-tkg/linux-tkg-patches/5.7/0003-glitched-cfs.patch b/linux-tkg-patches/5.7/0003-glitched-cfs.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.7/0003-glitched-cfs.patch
rename to linux-tkg-patches/5.7/0003-glitched-cfs.patch
diff --git a/linux-tkg/linux-tkg-patches/5.7/0004-5.7-ck1.patch b/linux-tkg-patches/5.7/0004-5.7-ck1.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.7/0004-5.7-ck1.patch
rename to linux-tkg-patches/5.7/0004-5.7-ck1.patch
diff --git a/linux-tkg/linux-tkg-patches/5.7/0004-glitched-muqss.patch b/linux-tkg-patches/5.7/0004-glitched-muqss.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.7/0004-glitched-muqss.patch
rename to linux-tkg-patches/5.7/0004-glitched-muqss.patch
diff --git a/linux-tkg/linux-tkg-patches/5.7/0004-glitched-ondemand-muqss.patch b/linux-tkg-patches/5.7/0004-glitched-ondemand-muqss.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.7/0004-glitched-ondemand-muqss.patch
rename to linux-tkg-patches/5.7/0004-glitched-ondemand-muqss.patch
diff --git a/linux-tkg/linux-tkg-patches/5.7/0005-glitched-ondemand-pds.patch b/linux-tkg-patches/5.7/0005-glitched-ondemand-pds.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.7/0005-glitched-ondemand-pds.patch
rename to linux-tkg-patches/5.7/0005-glitched-ondemand-pds.patch
diff --git a/linux-tkg/linux-tkg-patches/5.7/0005-glitched-pds.patch b/linux-tkg-patches/5.7/0005-glitched-pds.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.7/0005-glitched-pds.patch
rename to linux-tkg-patches/5.7/0005-glitched-pds.patch
diff --git a/linux-tkg/linux-tkg-patches/5.7/0005-v5.7_undead-pds099o.patch b/linux-tkg-patches/5.7/0005-v5.7_undead-pds099o.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.7/0005-v5.7_undead-pds099o.patch
rename to linux-tkg-patches/5.7/0005-v5.7_undead-pds099o.patch
diff --git a/linux-tkg/linux-tkg-patches/5.7/0006-add-acs-overrides_iommu.patch b/linux-tkg-patches/5.7/0006-add-acs-overrides_iommu.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.7/0006-add-acs-overrides_iommu.patch
rename to linux-tkg-patches/5.7/0006-add-acs-overrides_iommu.patch
diff --git a/linux-tkg/linux-tkg-patches/5.7/0007-v5.7-fsync.patch b/linux-tkg-patches/5.7/0007-v5.7-fsync.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.7/0007-v5.7-fsync.patch
rename to linux-tkg-patches/5.7/0007-v5.7-fsync.patch
diff --git a/linux-tkg/linux-tkg-patches/5.7/0008-5.7-bcachefs.patch b/linux-tkg-patches/5.7/0008-5.7-bcachefs.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.7/0008-5.7-bcachefs.patch
rename to linux-tkg-patches/5.7/0008-5.7-bcachefs.patch
diff --git a/linux-tkg/linux-tkg-patches/5.7/0009-glitched-bmq.patch b/linux-tkg-patches/5.7/0009-glitched-bmq.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.7/0009-glitched-bmq.patch
rename to linux-tkg-patches/5.7/0009-glitched-bmq.patch
diff --git a/linux-tkg/linux-tkg-patches/5.7/0009-glitched-ondemand-bmq.patch b/linux-tkg-patches/5.7/0009-glitched-ondemand-bmq.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.7/0009-glitched-ondemand-bmq.patch
rename to linux-tkg-patches/5.7/0009-glitched-ondemand-bmq.patch
diff --git a/linux-tkg/linux-tkg-patches/5.7/0009-prjc_v5.7-r3.patch b/linux-tkg-patches/5.7/0009-prjc_v5.7-r3.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.7/0009-prjc_v5.7-r3.patch
rename to linux-tkg-patches/5.7/0009-prjc_v5.7-r3.patch
diff --git a/linux-tkg/linux-tkg-patches/5.7/0010-5.7-glitched-cachy.patch b/linux-tkg-patches/5.7/0010-5.7-glitched-cachy.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.7/0010-5.7-glitched-cachy.patch
rename to linux-tkg-patches/5.7/0010-5.7-glitched-cachy.patch
diff --git a/linux-tkg/linux-tkg-patches/5.7/0011-ZFS-fix.patch b/linux-tkg-patches/5.7/0011-ZFS-fix.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.7/0011-ZFS-fix.patch
rename to linux-tkg-patches/5.7/0011-ZFS-fix.patch
diff --git a/linux-tkg/linux-tkg-patches/5.7/0012-linux-hardened.patch b/linux-tkg-patches/5.7/0012-linux-hardened.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.7/0012-linux-hardened.patch
rename to linux-tkg-patches/5.7/0012-linux-hardened.patch
diff --git a/linux-tkg/linux-tkg-patches/5.7/0012-misc-additions.patch b/linux-tkg-patches/5.7/0012-misc-additions.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.7/0012-misc-additions.patch
rename to linux-tkg-patches/5.7/0012-misc-additions.patch
diff --git a/linux-tkg/linux-tkg-patches/5.8/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch b/linux-tkg-patches/5.8/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.8/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch
rename to linux-tkg-patches/5.8/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch
diff --git a/linux-tkg/linux-tkg-patches/5.8/0002-clear-patches.patch b/linux-tkg-patches/5.8/0002-clear-patches.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.8/0002-clear-patches.patch
rename to linux-tkg-patches/5.8/0002-clear-patches.patch
diff --git a/linux-tkg/linux-tkg-patches/5.8/0003-glitched-base.patch b/linux-tkg-patches/5.8/0003-glitched-base.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.8/0003-glitched-base.patch
rename to linux-tkg-patches/5.8/0003-glitched-base.patch
diff --git a/linux-tkg/linux-tkg-patches/5.8/0003-glitched-cfs.patch b/linux-tkg-patches/5.8/0003-glitched-cfs.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.8/0003-glitched-cfs.patch
rename to linux-tkg-patches/5.8/0003-glitched-cfs.patch
diff --git a/linux-tkg/linux-tkg-patches/5.8/0005-glitched-pds.patch b/linux-tkg-patches/5.8/0005-glitched-pds.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.8/0005-glitched-pds.patch
rename to linux-tkg-patches/5.8/0005-glitched-pds.patch
diff --git a/linux-tkg/linux-tkg-patches/5.8/0005-undead-glitched-ondemand-pds.patch b/linux-tkg-patches/5.8/0005-undead-glitched-ondemand-pds.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.8/0005-undead-glitched-ondemand-pds.patch
rename to linux-tkg-patches/5.8/0005-undead-glitched-ondemand-pds.patch
diff --git a/linux-tkg/linux-tkg-patches/5.8/0005-undead-glitched-pds.patch b/linux-tkg-patches/5.8/0005-undead-glitched-pds.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.8/0005-undead-glitched-pds.patch
rename to linux-tkg-patches/5.8/0005-undead-glitched-pds.patch
diff --git a/linux-tkg/linux-tkg-patches/5.8/0005-v5.8_undead-pds099o.patch b/linux-tkg-patches/5.8/0005-v5.8_undead-pds099o.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.8/0005-v5.8_undead-pds099o.patch
rename to linux-tkg-patches/5.8/0005-v5.8_undead-pds099o.patch
diff --git a/linux-tkg/linux-tkg-patches/5.8/0006-add-acs-overrides_iommu.patch b/linux-tkg-patches/5.8/0006-add-acs-overrides_iommu.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.8/0006-add-acs-overrides_iommu.patch
rename to linux-tkg-patches/5.8/0006-add-acs-overrides_iommu.patch
diff --git a/linux-tkg/linux-tkg-patches/5.8/0007-v5.8-fsync.patch b/linux-tkg-patches/5.8/0007-v5.8-fsync.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.8/0007-v5.8-fsync.patch
rename to linux-tkg-patches/5.8/0007-v5.8-fsync.patch
diff --git a/linux-tkg/linux-tkg-patches/5.8/0008-5.8-bcachefs.patch b/linux-tkg-patches/5.8/0008-5.8-bcachefs.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.8/0008-5.8-bcachefs.patch
rename to linux-tkg-patches/5.8/0008-5.8-bcachefs.patch
diff --git a/linux-tkg/linux-tkg-patches/5.8/0009-glitched-bmq.patch b/linux-tkg-patches/5.8/0009-glitched-bmq.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.8/0009-glitched-bmq.patch
rename to linux-tkg-patches/5.8/0009-glitched-bmq.patch
diff --git a/linux-tkg/linux-tkg-patches/5.8/0009-glitched-ondemand-bmq.patch b/linux-tkg-patches/5.8/0009-glitched-ondemand-bmq.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.8/0009-glitched-ondemand-bmq.patch
rename to linux-tkg-patches/5.8/0009-glitched-ondemand-bmq.patch
diff --git a/linux-tkg/linux-tkg-patches/5.8/0009-prjc_v5.8-r3.patch b/linux-tkg-patches/5.8/0009-prjc_v5.8-r3.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.8/0009-prjc_v5.8-r3.patch
rename to linux-tkg-patches/5.8/0009-prjc_v5.8-r3.patch
diff --git a/linux-tkg/linux-tkg-patches/5.8/0011-ZFS-fix.patch b/linux-tkg-patches/5.8/0011-ZFS-fix.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.8/0011-ZFS-fix.patch
rename to linux-tkg-patches/5.8/0011-ZFS-fix.patch
diff --git a/linux-tkg/linux-tkg-patches/5.8/0012-misc-additions.patch b/linux-tkg-patches/5.8/0012-misc-additions.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.8/0012-misc-additions.patch
rename to linux-tkg-patches/5.8/0012-misc-additions.patch
diff --git a/linux-tkg/linux-tkg-patches/5.9/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch b/linux-tkg-patches/5.9/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.9/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch
rename to linux-tkg-patches/5.9/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch
diff --git a/linux-tkg/linux-tkg-patches/5.9/0002-clear-patches.patch b/linux-tkg-patches/5.9/0002-clear-patches.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.9/0002-clear-patches.patch
rename to linux-tkg-patches/5.9/0002-clear-patches.patch
diff --git a/linux-tkg/linux-tkg-patches/5.9/0003-glitched-base.patch b/linux-tkg-patches/5.9/0003-glitched-base.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.9/0003-glitched-base.patch
rename to linux-tkg-patches/5.9/0003-glitched-base.patch
diff --git a/linux-tkg/linux-tkg-patches/5.9/0003-glitched-cfs.patch b/linux-tkg-patches/5.9/0003-glitched-cfs.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.9/0003-glitched-cfs.patch
rename to linux-tkg-patches/5.9/0003-glitched-cfs.patch
diff --git a/linux-tkg/linux-tkg-patches/5.9/0004-5.9-ck1.patch b/linux-tkg-patches/5.9/0004-5.9-ck1.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.9/0004-5.9-ck1.patch
rename to linux-tkg-patches/5.9/0004-5.9-ck1.patch
diff --git a/linux510-rc-tkg/linux510-tkg-patches/0004-glitched-muqss.patch b/linux-tkg-patches/5.9/0004-glitched-muqss.patch
similarity index 100%
rename from linux510-rc-tkg/linux510-tkg-patches/0004-glitched-muqss.patch
rename to linux-tkg-patches/5.9/0004-glitched-muqss.patch
diff --git a/linux-tkg/linux-tkg-patches/5.9/0004-glitched-ondemand-muqss.patch b/linux-tkg-patches/5.9/0004-glitched-ondemand-muqss.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.9/0004-glitched-ondemand-muqss.patch
rename to linux-tkg-patches/5.9/0004-glitched-ondemand-muqss.patch
diff --git a/linux-tkg/linux-tkg-patches/5.9/0005-glitched-pds.patch b/linux-tkg-patches/5.9/0005-glitched-pds.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.9/0005-glitched-pds.patch
rename to linux-tkg-patches/5.9/0005-glitched-pds.patch
diff --git a/linux-tkg/linux-tkg-patches/5.9/0006-add-acs-overrides_iommu.patch b/linux-tkg-patches/5.9/0006-add-acs-overrides_iommu.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.9/0006-add-acs-overrides_iommu.patch
rename to linux-tkg-patches/5.9/0006-add-acs-overrides_iommu.patch
diff --git a/linux-tkg/linux-tkg-patches/5.9/0007-v5.9-fsync.patch b/linux-tkg-patches/5.9/0007-v5.9-fsync.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.9/0007-v5.9-fsync.patch
rename to linux-tkg-patches/5.9/0007-v5.9-fsync.patch
diff --git a/linux-tkg/linux-tkg-patches/5.9/0008-5.9-bcachefs.patch b/linux-tkg-patches/5.9/0008-5.9-bcachefs.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.9/0008-5.9-bcachefs.patch
rename to linux-tkg-patches/5.9/0008-5.9-bcachefs.patch
diff --git a/linux-tkg/linux-tkg-patches/5.9/0009-glitched-bmq.patch b/linux-tkg-patches/5.9/0009-glitched-bmq.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.9/0009-glitched-bmq.patch
rename to linux-tkg-patches/5.9/0009-glitched-bmq.patch
diff --git a/linux-tkg/linux-tkg-patches/5.9/0009-glitched-ondemand-bmq.patch b/linux-tkg-patches/5.9/0009-glitched-ondemand-bmq.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.9/0009-glitched-ondemand-bmq.patch
rename to linux-tkg-patches/5.9/0009-glitched-ondemand-bmq.patch
diff --git a/linux-tkg/linux-tkg-patches/5.9/0009-prjc_v5.9-r1.patch b/linux-tkg-patches/5.9/0009-prjc_v5.9-r1.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.9/0009-prjc_v5.9-r1.patch
rename to linux-tkg-patches/5.9/0009-prjc_v5.9-r1.patch
diff --git a/linux-tkg/linux-tkg-patches/5.9/0011-ZFS-fix.patch b/linux-tkg-patches/5.9/0011-ZFS-fix.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.9/0011-ZFS-fix.patch
rename to linux-tkg-patches/5.9/0011-ZFS-fix.patch
diff --git a/linux-tkg/linux-tkg-patches/5.9/0012-misc-additions.patch b/linux-tkg-patches/5.9/0012-misc-additions.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.9/0012-misc-additions.patch
rename to linux-tkg-patches/5.9/0012-misc-additions.patch
diff --git a/linux-tkg/linux-tkg-patches/5.9/0013-remove-debian-deps-cross.patch b/linux-tkg-patches/5.9/0013-remove-debian-deps-cross.patch
similarity index 100%
rename from linux-tkg/linux-tkg-patches/5.9/0013-remove-debian-deps-cross.patch
rename to linux-tkg-patches/5.9/0013-remove-debian-deps-cross.patch
diff --git a/linux-tkg/linux-tkg-patches/5.9/0004-glitched-muqss.patch b/linux-tkg/linux-tkg-patches/5.9/0004-glitched-muqss.patch
deleted file mode 100644
index 2c4837e..0000000
--- a/linux-tkg/linux-tkg-patches/5.9/0004-glitched-muqss.patch
+++ /dev/null
@@ -1,78 +0,0 @@
-From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
-From: Tk-Glitch <ti3nou@gmail.com>
-Date: Wed, 4 Jul 2018 04:30:08 +0200
-Subject: glitched - MuQSS
-
-diff --git a/kernel/sched/MuQSS.c b/kernel/sched/MuQSS.c
-index 84a1d08d68551..57c3036a68952 100644
---- a/kernel/sched/MuQSS.c
-+++ b/kernel/sched/MuQSS.c
-@@ -163,7 +167,11 @@ int sched_interactive __read_mostly = 1;
-  * are allowed to run five seconds as real time tasks. This is the total over
-  * all online cpus.
-  */
-+#ifdef CONFIG_ZENIFY
-+int sched_iso_cpu __read_mostly = 25;
-+#else
- int sched_iso_cpu __read_mostly = 70;
-+#endif
- 
- /*
-  * sched_yield_type - Choose what sort of yield sched_yield will perform.
-
-diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
-index 2a202a846757..1d9c7ed79b11 100644
---- a/kernel/Kconfig.hz
-+++ b/kernel/Kconfig.hz
-@@ -5,7 +5,7 @@
- choice
- 	prompt "Timer frequency"
- 	default HZ_100 if SCHED_MUQSS
--	default HZ_250_NODEF if !SCHED_MUQSS
-+	default HZ_500_NODEF if !SCHED_MUQSS
- 	help
- 	 Allows the configuration of the timer frequency. It is customary
- 	 to have the timer interrupt run at 1000 Hz but 100 Hz may be more
-@@ -50,6 +50,20 @@ choice
- 	 on SMP and NUMA systems and exactly dividing by both PAL and
- 	 NTSC frame rates for video and multimedia work.
- 
-+	config HZ_500_NODEF
-+		bool "500 HZ"
-+	help
-+	 500 Hz is a good timer frequency for desktops. Provides fast
-+	 interactivity with great smoothness without sacrificing too
-+	 much throughput.
-+
-+	config HZ_750_NODEF
-+		bool "750 HZ"
-+	help
-+	 750 Hz is a good timer frequency for desktops. Provides fast
-+	 interactivity with great smoothness without sacrificing too
-+	 much throughput.
-+
- 	config HZ_1000_NODEF
- 		bool "1000 HZ"
- 	help
-@@ -63,6 +70,8 @@ config HZ
- 	default 100 if HZ_100
- 	default 250 if HZ_250_NODEF
- 	default 300 if HZ_300_NODEF
-+	default 500 if HZ_500_NODEF
-+	default 750 if HZ_750_NODEF
- 	default 1000 if HZ_1000_NODEF
- 
- config SCHED_HRTICK
-
-diff --git a/Makefile b/Makefile
-index d4d36c61940b..4a9dfe471f1f 100644
---- a/Makefile
-+++ b/Makefile
-@@ -15,7 +15,6 @@ NAME = Kleptomaniac Octopus
- 
- CKVERSION = -ck1
- CKNAME = MuQSS Powered
--EXTRAVERSION := $(EXTRAVERSION)$(CKVERSION)
- 
- # We are using a recursive build, so we need to do a little thinking
- # to get the ordering right.
diff --git a/linux510-rc-tkg/PKGBUILD b/linux510-rc-tkg/PKGBUILD
deleted file mode 100644
index b211543..0000000
--- a/linux510-rc-tkg/PKGBUILD
+++ /dev/null
@@ -1,284 +0,0 @@
-# Based on the file created for Arch Linux by:
-# Tobias Powalowski <tpowa@archlinux.org>
-# Thomas Baechler <thomas@archlinux.org>
-
-# Contributor: Tk-Glitch <ti3nou at gmail dot com>
-
-plain '       .---.`               `.---.'
-plain '    `/syhhhyso-           -osyhhhys/`'
-plain '   .syNMdhNNhss/``.---.``/sshNNhdMNys.'
-plain '   +sdMh.`+MNsssssssssssssssNM+`.hMds+'
-plain '   :syNNdhNNhssssssssssssssshNNhdNNys:'
-plain '    /ssyhhhysssssssssssssssssyhhhyss/'
-plain '    .ossssssssssssssssssssssssssssso.'
-plain '   :sssssssssssssssssssssssssssssssss:'
-plain '  /sssssssssssssssssssssssssssssssssss/'
-plain ' :sssssssssssssoosssssssoosssssssssssss:'
-plain ' osssssssssssssoosssssssoossssssssssssso'
-plain ' osssssssssssyyyyhhhhhhhyyyyssssssssssso'
-plain ' /yyyyyyhhdmmmmNNNNNNNNNNNmmmmdhhyyyyyy/'
-plain '  smmmNNNNNNNNNNNNNNNNNNNNNNNNNNNNNmmms'
-plain '   /dNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNd/'
-plain '    `:sdNNNNNNNNNNNNNNNNNNNNNNNNNds:`'
-plain '       `-+shdNNNNNNNNNNNNNNNdhs+-`'
-plain '             `.-:///////:-.`'
-
-_where="$PWD" # track basedir as different Arch based distros are moving srcdir around
-
-source "$_where"/customization.cfg # load default configuration from file
-source "$_where"/linux*-tkg-config/prepare
-
-if [[ "$_sub" = rc* ]]; then
-  _srcpath="linux-${_basekernel}-${_sub}"
-else
-  _srcpath="linux-${_basekernel}"
-fi
-
-_tkg_initscript
-
-_distro="Arch"
-
-if [ -n "$_custom_pkgbase" ]; then
-  pkgbase="${_custom_pkgbase}"
-else
-  pkgbase=linux"${_basever}"-tkg-"${_cpusched}"
-fi
-pkgname=("${pkgbase}" "${pkgbase}-headers")
-pkgver="${_basekernel}"."${_sub}"
-pkgrel=1
-pkgdesc='Linux-tkg'
-arch=('x86_64') # no i686 in here
-url="http://www.kernel.org/"
-license=('GPL2')
-makedepends=('xmlto' 'docbook-xsl' 'kmod' 'inetutils' 'bc' 'libelf' 'pahole' 'patchutils' 'flex' 'python-sphinx' 'python-sphinx_rtd_theme' 'graphviz' 'imagemagick' 'git')
-optdepends=('schedtool')
-options=('!strip' 'docs')
-source=("https://git.kernel.org/torvalds/t/linux-${_basekernel}-${_sub}.tar.gz"
-        #"https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-${_basekernel}.tar.xz"
-        #"https://cdn.kernel.org/pub/linux/kernel/v5.x/patch-${pkgver}.xz"
-        "https://raw.githubusercontent.com/graysky2/kernel_gcc_patch/master/enable_additional_cpu_optimizations_for_gcc_v10.1%2B_kernel_v5.8%2B.patch"
-        'config.x86_64' # stock Arch config
-        #'config_hardened.x86_64' # hardened Arch config
-        90-cleanup.hook
-        cleanup
-        # ARCH Patches
-        0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch
-        # TkG
-        0002-clear-patches.patch
-        0003-glitched-base.patch
-        0003-glitched-cfs.patch
-        0004-glitched-ondemand-muqss.patch
-        0004-glitched-muqss.patch
-        0004-5.10-ck1.patch
-        #0005-undead-glitched-ondemand-pds.patch
-        #0005-undead-glitched-pds.patch
-        #0005-v5.8_undead-pds099o.patch
-        0005-glitched-pds.patch
-        0006-add-acs-overrides_iommu.patch
-        0007-v5.10-fsync.patch
-        #0008-5.10-bcachefs.patch
-        0009-glitched-ondemand-bmq.patch
-        0009-glitched-bmq.patch
-        0009-prjc_v5.10-r0.patch
-        0011-ZFS-fix.patch
-        #0012-linux-hardened.patch
-        0012-misc-additions.patch
-)
-sha256sums=('483d8b3945963ea375026c4dde019da36f5d2116241036b09493e63e92e39ee8'
-            '5ab29eb64e57df83b395a29a6a4f89030d142feffbfbf73b3afc6d97a2a7fd12'
-            '834247434877e4e76201ada7df35ebd4622116737e9650e0772f22d03083b426'
-            '1e15fc2ef3fa770217ecc63a220e5df2ddbcf3295eb4a021171e7edd4c6cc898'
-            '66a03c246037451a77b4d448565b1d7e9368270c7d02872fbd0b5d024ed0a997'
-            'f6383abef027fd9a430fd33415355e0df492cdc3c90e9938bf2d98f4f63b32e6'
-            '35a7cde86fb94939c0f25a62b8c47f3de0dbd3c65f876f460b263181b3e92fc0'
-            '1ac97da07e72ec7e2b0923d32daacacfaa632a44c714d6942d9f143fe239e1b5'
-            '7058e57fd68367b029adc77f2a82928f1433daaf02c8c279cb2d13556c8804d7'
-            'c605f638d74c61861ebdc36ebd4cb8b6475eae2f6273e1ccb2bbb3e10a2ec3fe'
-            '2bbbac963b6ca44ef3f8a71ec7c5cad7d66df860869a73059087ee236775970a'
-            '4231bd331289f5678b49d084698f0a80a3ae602eccb41d89e4f85ff4465eb971'
-            'fca63d15ca4502aebd73e76d7499b243d2c03db71ff5ab0bf5cf268b2e576320'
-            '19661ec0d39f9663452b34433214c755179894528bf73a42f6ba52ccf572832a'
-            'b302ba6c5bbe8ed19b20207505d513208fae1e678cf4d8e7ac0b154e5fe3f456'
-            '9fad4a40449e09522899955762c8928ae17f4cdaa16e01239fd12592e9d58177'
-            'a557b342111849a5f920bbe1c129f3ff1fc1eff62c6bd6685e0972fc88e39911'
-            '88c7e308e474c845e0cc09e09bd223fc39876eca757abf6d6c3b8321f49ce1f1'
-            '49262ce4a8089fa70275aad742fc914baa28d9c384f710c9a62f64796d13e104'
-            '433b919e6a0be26784fb4304c43b1811a28f12ad3de9e26c0af827f64c0c316e')
-
-export KBUILD_BUILD_HOST=archlinux
-export KBUILD_BUILD_USER=$pkgbase
-export KBUILD_BUILD_TIMESTAMP="$(date -Ru${SOURCE_DATE_EPOCH:+d @$SOURCE_DATE_EPOCH})"
-
-prepare() {
-  rm -rf $pkgdir # Nuke the entire pkg folder so it'll get regenerated clean on next build
-
-  ln -s "${_where}/customization.cfg" "${srcdir}" # workaround
-
-  cd "${srcdir}/${_srcpath}"
-
-  _tkg_srcprep
-}
-
-build() {
-  cd "${srcdir}/${_srcpath}"
-
-  # Use custom compiler paths if defined
-  if [ -n "${CUSTOM_GCC_PATH}" ]; then
-    PATH=${CUSTOM_GCC_PATH}/bin:${CUSTOM_GCC_PATH}/lib:${CUSTOM_GCC_PATH}/include:${PATH}
-  fi
-
-  if [ "$_force_all_threads" = "true" ]; then
-    _force_all_threads="-j$((`nproc`*2))"
-  else
-    _force_all_threads="${MAKEFLAGS}"
-  fi
-
-  # ccache
-  if [ "$_noccache" != "true" ] && pacman -Qq ccache &> /dev/null; then
-    export PATH="/usr/lib/ccache/bin/:$PATH"
-    export CCACHE_SLOPPINESS="file_macro,locale,time_macros"
-    export CCACHE_NOHASHDIR="true"
-    msg2 'ccache was found and will be used'
-  fi
-
-  # document the TkG variables, excluding "_", "_EXT_CONFIG_PATH", and "_where".
-  declare -p | cut -d ' ' -f 3 | grep -P '^_(?!=|EXT_CONFIG_PATH|where)' > "${srcdir}/customization-full.cfg"
-
-  # build!
-  _runtime=$( time ( schedtool -B -n 1 -e ionice -n 1 make ${_force_all_threads} LOCALVERSION= bzImage modules 2>&1 ) 3>&1 1>&2 2>&3 ) || _runtime=$( time ( make ${_force_all_threads} LOCALVERSION= bzImage modules 2>&1 ) 3>&1 1>&2 2>&3 )
-}
-
-hackbase() {
-  pkgdesc="The $pkgdesc kernel and modules"
-  depends=('coreutils' 'kmod' 'initramfs')
-  optdepends=('linux-docs: Kernel hackers manual - HTML documentation that comes with the Linux kernel.'
-              'crda: to set the correct wireless channels of your country.'
-              'linux-firmware: Firmware files for Linux'
-              'modprobed-db: Keeps track of EVERY kernel module that has ever been probed. Useful for make localmodconfig.'
-              'nvidia-tkg: NVIDIA drivers for all installed kernels - non-dkms version.'
-              'nvidia-dkms-tkg: NVIDIA drivers for all installed kernels - dkms version.'
-              'update-grub: Simple wrapper around grub-mkconfig.')
-  provides=("linux=${pkgver}" "${pkgbase}" VIRTUALBOX-GUEST-MODULES WIREGUARD-MODULE)
-  replaces=(virtualbox-guest-modules-arch wireguard-arch)
-
-  cd "${srcdir}/${_srcpath}"
-
-  # get kernel version
-  local _kernver="$(<version)"
-  local modulesdir="$pkgdir/usr/lib/modules/$_kernver"
-
-  msg2 "Installing boot image..."
-  # systemd expects to find the kernel here to allow hibernation
-  # https://github.com/systemd/systemd/commit/edda44605f06a41fb86b7ab8128dcf99161d2344
-  install -Dm644 "$(make -s image_name)" "$modulesdir/vmlinuz"
-
-  # Used by mkinitcpio to name the kernel
-  echo "$pkgbase" | install -Dm644 /dev/stdin "$modulesdir/pkgbase"
-
-  msg2 "Installing modules..."
-  make INSTALL_MOD_PATH="$pkgdir/usr" INSTALL_MOD_STRIP=1 modules_install
-
-  # remove build and source links
-  rm "$modulesdir"/{source,build}
-
-  # install cleanup pacman hook and script
-  sed -e "s|cleanup|${pkgbase}-cleanup|g" "${srcdir}"/90-cleanup.hook |
-    install -Dm644 /dev/stdin "${pkgdir}/usr/share/libalpm/hooks/90-${pkgbase}.hook"
-  install -Dm755 "${srcdir}"/cleanup "${pkgdir}/usr/share/libalpm/scripts/${pkgbase}-cleanup"
-
-  # install customization file, for reference
-  install -Dm644 "${srcdir}"/customization-full.cfg "${pkgdir}/usr/share/doc/${pkgbase}/customization.cfg"
-}
-
-hackheaders() {
-  pkgdesc="Headers and scripts for building modules for the $pkgdesc kernel"
-  provides=("linux-headers=${pkgver}" "${pkgbase}-headers=${pkgver}")
-
-  cd "${srcdir}/${_srcpath}"
-  local builddir="${pkgdir}/usr/lib/modules/$(<version)/build"
-
-  msg2 "Installing build files..."
-  install -Dt "$builddir" -m644 .config Makefile Module.symvers System.map \
-    localversion.* version vmlinux
-  install -Dt "$builddir/kernel" -m644 kernel/Makefile
-  install -Dt "$builddir/arch/x86" -m644 arch/x86/Makefile
-  cp -t "$builddir" -a scripts
-
-  # add objtool for external module building and enabled VALIDATION_STACK option
-  install -Dt "$builddir/tools/objtool" tools/objtool/objtool
-
-  # add xfs and shmem for aufs building
-  mkdir -p "$builddir"/{fs/xfs,mm}
-
-  msg2 "Installing headers..."
-  cp -t "$builddir" -a include
-  cp -t "$builddir/arch/x86" -a arch/x86/include
-  install -Dt "$builddir/arch/x86/kernel" -m644 arch/x86/kernel/asm-offsets.s
-
-  install -Dt "$builddir/drivers/md" -m644 drivers/md/*.h
-  install -Dt "$builddir/net/mac80211" -m644 net/mac80211/*.h
-
-  # http://bugs.archlinux.org/task/13146
-  install -Dt "$builddir/drivers/media/i2c" -m644 drivers/media/i2c/msp3400-driver.h
-
-  # http://bugs.archlinux.org/task/20402
-  install -Dt "$builddir/drivers/media/usb/dvb-usb" -m644 drivers/media/usb/dvb-usb/*.h
-  install -Dt "$builddir/drivers/media/dvb-frontends" -m644 drivers/media/dvb-frontends/*.h
-  install -Dt "$builddir/drivers/media/tuners" -m644 drivers/media/tuners/*.h
-
-  msg2 "Installing KConfig files..."
-  find . -name 'Kconfig*' -exec install -Dm644 {} "$builddir/{}" \;
-
-  msg2 "Removing unneeded architectures..."
-  local arch
-  for arch in "$builddir"/arch/*/; do
-    [[ $arch = */x86/ ]] && continue
-    echo "Removing $(basename "$arch")"
-    rm -r "$arch"
-  done
-
-  msg2 "Removing documentation..."
-  rm -r "$builddir/Documentation"
-
-  msg2 "Removing broken symlinks..."
-  find -L "$builddir" -type l -printf 'Removing %P\n' -delete
-
-  msg2 "Removing loose objects..."
-  find "$builddir" -type f -name '*.o' -printf 'Removing %P\n' -delete
-
-  msg2 "Stripping build tools..."
-  local file
-  while read -rd '' file; do
-    case "$(file -bi "$file")" in
-      application/x-sharedlib\;*)      # Libraries (.so)
-        strip -v $STRIP_SHARED "$file" ;;
-      application/x-archive\;*)        # Libraries (.a)
-        strip -v $STRIP_STATIC "$file" ;;
-      application/x-executable\;*)     # Binaries
-        strip -v $STRIP_BINARIES "$file" ;;
-      application/x-pie-executable\;*) # Relocatable binaries
-        strip -v $STRIP_SHARED "$file" ;;
-    esac
-  done < <(find "$builddir" -type f -perm -u+x ! -name vmlinux -print0)
-
-  msg2 "Adding symlink..."
-  mkdir -p "$pkgdir/usr/src"
-  ln -sr "$builddir" "$pkgdir/usr/src/$pkgbase"
-
-  echo "Stripping vmlinux..."
-  strip -v $STRIP_STATIC "$builddir/vmlinux"
-
-  if [ $_NUKR = "true" ]; then
-    rm -rf "$srcdir" # Nuke the entire src folder so it'll get regenerated clean on next build
-  fi
-}
-
-source /dev/stdin <<EOF
-package_${pkgbase}() {
-hackbase
-}
-
-package_${pkgbase}-headers() {
-hackheaders
-}
-EOF
diff --git a/linux510-rc-tkg/README.md b/linux510-rc-tkg/README.md
deleted file mode 100644
index c4904b7..0000000
--- a/linux510-rc-tkg/README.md
+++ /dev/null
@@ -1,52 +0,0 @@
-**Due to intel_pstate poor performances as of late, I have decided to set it to passive mode to make use of the acpi_cpufreq governors passthrough, keeping full support for turbo frequencies.**
-
-A custom Linux kernel 5.10 RC with specific MuQSS, Project C / PDS & BMQ CPU schedulers related patchsets selector (stock CFS is also an option) and added tweaks for a nice interactivity/performance balance, aiming for the best gaming experience.
-
-- MuQSS : http://ck-hack.blogspot.com/
-
-- Project C / PDS & BMQ : http://cchalpha.blogspot.com/
-
-Various personalization options available and userpatches support (put your own patches in the same dir as the PKGBUILD, with the ".mypatch" extension). The options built with are installed to `/usr/share/doc/$pkgbase/customization.cfg`, where `$pkgbase` is the package name.
-
-Comes with a slightly modified Arch config asking for a few core personalization settings at compilation time.
-
-If you want to streamline your kernel config for lower footprint and faster compilations : https://wiki.archlinux.org/index.php/Modprobed-db
-You can optionally enable support for it at the beginning of the PKGBUILD file. **Make sure to read everything you need to know about it as there are big caevats making it NOT recommended for most users.**
-
-## Other stuff included:
-- Graysky's per-CPU-arch native optimizations - https://github.com/graysky2/kernel_gcc_patch
-- memory management and swapping tweaks
-- scheduling tweaks
-- optional "Zenify" patchset using core blk, mm and scheduler tweaks from Zen
-- CFS tweaks
-- using yeah TCP congestion algo by default
-- using cake network queue management system
-- using vm.max_map_count=524288 by default
-- cherry-picked clear linux patches
-- **optional** overrides for missing ACS capabilities
-- **optional** Fsync support (proton)
-
-## Install procedure
-
-### DEB (Debian, Ubuntu and derivatives) and RPM (Fedora, SUSE and derivatives) based distributions
-```
-git clone https://github.com/Frogging-Family/linux-tkg.git
-cd linux-tkg/linux510-rc-tkg
-# Optional: edit customization.cfg file
-./install.sh install
-```
-Uninstalling custom kernels installed through the script has to be done manually.
-The script can can help out with some useful information:
-```
-cd path/to/linux-tkg/linux510-rc-tkg
-./install.sh uninstall-help
-```
-
-### Other linux distributions
-If your distro is not DEB or RPM based, `install.sh` script can clone the kernel tree, patch and edit a `.config` file from your current distro's 
-that is expected at ``/boot/config-`uname -r`.config`` (otherwise it won't work as-is)
-
-The command to do for that is:
-```
-./install.sh config
-```
diff --git a/linux510-rc-tkg/customization.cfg b/linux510-rc-tkg/customization.cfg
deleted file mode 100644
index 1e162ce..0000000
--- a/linux510-rc-tkg/customization.cfg
+++ /dev/null
@@ -1,189 +0,0 @@
-# linux510-TkG config file
-
-# Linux distribution you are using, options are "Arch", "Ubuntu", "Debian", "Fedora" or "Suse".
-# It is automatically set to "Arch" when using PKGBUILD.
-# If left empty, the script will prompt
-_distro=""
-
-#### MISC OPTIONS ####
-
-# External config file to use - If the given file exists in path, it will override default config (customization.cfg) - Default is ~/.config/frogminer/linux59-tkg.cfg
-_EXT_CONFIG_PATH=~/.config/frogminer/linux510-tkg.cfg
-
-# [Arch specific] Set to anything else than "true" to limit cleanup operations and keep source and files generated during compilation.
-# Default is "true".
-_NUKR="true"
-
-# Custom compiler root dirs - Leave empty to use system compilers
-# Example: CUSTOM_GCC_PATH="/home/frog/PKGBUILDS/mostlyportable-gcc/gcc-mostlyportable-9.2.0"
-CUSTOM_GCC_PATH=""
-
-# Set to the number corresponding to a predefined profile to use it. Current list of available profiles :
-# 1 - Custom (meaning nothing will be enforced and you get to configure everything)
-# 2 - Ryzen desktop (performance)
-# 3 - Generic Desktop (Performance)
-_OPTIPROFILE=""
-
-# Set to true to bypass makepkg.conf and use all available threads for compilation. False will respect your makepkg.conf options.
-_force_all_threads="true"
-
-# Set to true to prevent ccache from being used and set CONFIG_GCC_PLUGINS=y (which needs to be disabled for ccache to work properly)
-_noccache="false"
-
-# Set to true to use modprobed db to clean config from unneeded modules. Speeds up compilation considerably. Requires root - https://wiki.archlinux.org/index.php/Modprobed-db
-# !!!! Make sure to have a well populated db !!!! - Leave empty to be asked about it at build time
-_modprobeddb="false"
-
-# Set to "1" to call make menuconfig, "2" to call make nconfig, "3" to call make xconfig, before building the kernel. Set to false to disable and skip the prompt.
-_menunconfig=""
-
-# Set to true to generate a kernel config fragment from your changes in menuconfig/nconfig. Set to false to disable and skip the prompt.
-_diffconfig=""
-
-# Set to the file name where the generated config fragment should be written to. Only used if _diffconfig is active.
-_diffconfig_name=""
-
-#### KERNEL OPTIONS ####
-
-# [Arch specific] Name of the default config file to use from the linux???-tkg-config folder.
-# Default is "config.x86_64" and hardened is "config_hardened.x86_64" (Arch-based).
-# To get a complete hardened setup, you have to use "cfs" as _cpusched
-_configfile="config.x86_64"
-
-# Disable some non-module debugging - See PKGBUILD for the list
-_debugdisable="false"
-
-# LEAVE AN EMPTY VALUE TO BE PROMPTED ABOUT FOLLOWING OPTIONS AT BUILD TIME
-
-# CPU scheduler - Options are "pds", "bmq", "muqss" or "cfs"
-# "pds" is the recommended option for gaming
-_cpusched=""
-
-# CPU sched_yield_type - Choose what sort of yield sched_yield will perform
-# For PDS and MuQSS: 0: No yield. (Recommended option for gaming on PDS and MuQSS)
-#                    1: Yield only to better priority/deadline tasks. (Default - can be unstable with PDS on some platforms)
-#                    2: Expire timeslice and recalculate deadline. (Usually the slowest option for PDS and MuQSS, not recommended)
-# For BMQ:           0: No yield.
-#                    1: Deboost and requeue task. (Default)
-#                    2: Set rq skip task.
-_sched_yield_type="0"
-
-# Round Robin interval is the longest duration two tasks with the same nice level will be delayed for. When CPU time is requested by a task, it receives a time slice equal
-# to the rr_interval in addition to a virtual deadline. When using yield_type 2, a low value can help offset the disadvantages of rescheduling a process that has yielded.
-# MuQSS default: 6ms"
-# PDS default: 4ms"
-# BMQ default: 2ms"
-# Set to "1" for 2ms, "2" for 4ms, "3" for 6ms, "4" for 8ms, or "default" to keep the chosen scheduler defaults.
-_rr_interval=""
-
-# Set to "true" to disable FUNCTION_TRACER/GRAPH_TRACER, lowering overhead but limiting debugging and analyzing of kernel functions - Kernel default is "false"
-_ftracedisable="false"
-
-# Set to "true" to disable NUMA, lowering overhead, but breaking CUDA/NvEnc on Nvidia equipped systems - Kernel default is "false"
-_numadisable="false"
-
-# Set to "true" to enable misc additions - May contain temporary fixes pending upstream or changes that can break on non-Arch - Kernel default is "true"
-_misc_adds="true"
-
-# Set to "1" to use CattaRappa mode (enabling full tickless), "2" for tickless idle only, or "0" for periodic ticks.
-# Full tickless can give higher performances in various cases but, depending on hardware, lower consistency. Just tickless idle can perform better on some platforms (mostly AMD based).
-_tickless=""
-
-# Setting this to to "true" can improve latency on PDS (at the cost of throughput) and improve throughput on other schedulers (at the cost of latency) - Can improve VMs performance - Kernel default is "false"
-_voluntary_preempt=""
-
-# Set to "true" to enable Device Tree and Open Firmware support. If you don't know about it, you don't need it - Default is "false"
-_OFenable="false"
-
-# Set to "true" to use ACS override patch - https://wiki.archlinux.org/index.php/PCI_passthrough_via_OVMF#Bypassing_the_IOMMU_groups_.28ACS_override_patch.29 - Kernel default is "false"
-_acs_override=""
-
-# Set to "true" to add Bcache filesystem support. You'll have to install bcachefs-tools-git from AUR for utilities - https://bcachefs.org/ - If in doubt, set to "false"
-_bcachefs=""
-
-# Set to "true" to add back missing symbol for AES-NI/AVX support on ZFS - https://github.com/NixOS/nixpkgs/blob/master/pkgs/os-specific/linux/kernel/export_kernel_fpu_functions.patch - Kernel default is "false"
-_zfsfix="true"
-
-# Set to "true" to enable support for fsync, an experimental replacement for esync found in Valve Proton 4.11+ - https://steamcommunity.com/games/221410/announcements/detail/2957094910196249305
-_fsync=""
-
-# A selection of patches from Zen/Liquorix kernel and additional tweaks for a better gaming experience (ZENIFY) - Default is "true"
-_zenify="true"
-
-# compiler optimization level - 1. Optimize for performance (-O2); 2. Optimize harder (-O3); 3. Optimize for size (-Os) - Kernel default is "1"
-_compileroptlevel="1"
-
-# CPU compiler optimizations - Defaults to generic optimizations if left empty
-# AMD CPUs : "k8" "k8sse3" "k10" "barcelona" "bobcat" "jaguar" "bulldozer" "piledriver" "steamroller" "excavator" "zen" "zen2"
-# Intel CPUs : "mpsc"(P4 & older Netburst based Xeon) "atom" "core2" "nehalem" "westmere" "silvermont" "sandybridge" "ivybridge" "haswell" "broadwell" "skylake" "skylakex" "cannonlake" "icelake" "goldmont" "goldmontplus" "cascadelake" "cooperlake" "tigerlake"
-# Other options :
-# - "generic" (to share the package between machines with different CPUs)
-# - "native" (use compiler autodetection and will prompt for P6_NOPS - Selecting your arch manually in the list above is recommended instead of this option)
-_processor_opt=""
-
-# MuQSS only - Make IRQ threading compulsory (FORCE_IRQ_THREADING) - Default is "false"
-_irq_threading="false"
-
-# MuQSS and PDS only - SMT (Hyperthreading) aware nice priority and policy support (SMT_NICE) - Kernel default is "true" - You can disable this on non-SMT/HT CPUs for lower overhead
-_smt_nice=""
-
-# Trust the CPU manufacturer to initialize Linux's CRNG (RANDOM_TRUST_CPU) - Kernel default is "false"
-_random_trust_cpu="false"
-
-# MuQSS only - CPU scheduler runqueue sharing - No sharing (RQ_NONE), SMT (hyperthread) siblings (RQ_SMT), Multicore siblings (RQ_MC), Symmetric Multi-Processing (RQ_SMP), NUMA (RQ_ALL)
-# Valid values are "none", "smt", "mc", "mc-llc"(for zen), "smp", "all" - Kernel default is "smt"
-_runqueue_sharing=""
-
-# Timer frequency - "100" "500", "750" or "1000" - More options available in kernel config prompt when left empty depending on selected cpusched - Kernel default is "500" - For MuQSS, 100Hz is recommended
-_timer_freq=""
-
-# Default CPU governor - "performance", "ondemand", "schedutil" or leave empty for default (schedutil)
-_default_cpu_gov="ondemand"
-
-# Use an aggressive ondemand governor instead of default ondemand to improve performance on low loads/high core count CPUs while keeping some power efficiency from frequency scaling.
-# It still requires you to either set ondemand as default governor or to select it some way.
-_aggressive_ondemand="true"
-
-# On some platforms, an acpi_cpufreq bug affects performance negatively. Set to "true" to disable it as a workaround, but it will use more power.
-# https://github.com/Tk-Glitch/PKGBUILDS/issues/263
-_disable_acpi_cpufreq=""
-
-# You can pass a default set of kernel command line options here - example: "intel_pstate=passive nowatchdog amdgpu.ppfeaturemask=0xfffd7fff mitigations=off"
-_custom_commandline="intel_pstate=passive"
-
-
-#### SPESHUL OPTION ####
-
-# If you want to bypass the stock naming scheme and enforce something else (example : "linux") - Useful for some bootloaders requiring manual entry editing on each release.
-# !!! It will also change pkgname - If you don't explicitely need this, don't use it !!!
-_custom_pkgbase=""
-
-# [non-Arch specific] Kernel localversion. Putting it to "Mario" will make for example the kernel version be 5.7.0-tkg-Mario (given by uname -r)
-# If left empty, it will use -tkg-"${_cpusched}" where "${_cpusched}" will be replaced by the user chosen scheduler
-_kernel_localversion=""
-
-#### USER PATCHES ####
-
-# community patches - add patches (separated by a space) of your choice by name from the community-patches dir
-# example: _community_patches="clear_nack_in_tend_isr.myrevert ffb_regression_fix.mypatch 0008-drm-amd-powerplay-force-the-trim-of-the-mclk-dpm-levels-if-OD-is-enabled.mypatch"
-_community_patches=""
-
-# You can use your own patches by putting them in the same folder as the PKGBUILD and giving them the .mypatch extension.
-# You can also revert patches by putting them in the same folder as the PKGBUILD and giving them the .myrevert extension.
-
-# Also, userpatches variable below must be set to true for the above to work.
-_user_patches="true"
-
-# Apply all user patches without confirmation - !!! NOT RECOMMENDED !!!
-_user_patches_no_confirm="false"
-
-
-#### CONFIG FRAGMENTS ####
-
-# You can use your own kernel config fragments by putting them in the same folder as the PKGBUILD and giving them the .myfrag extension.
-
-# Also, the config fragments variable below must be set to true for the above to work.
-_config_fragments="true"
-
-# Apply all config fragments without confirmation - !!! NOT RECOMMENDED !!!
-_config_fragments_no_confirm="false"
diff --git a/linux510-rc-tkg/install.sh b/linux510-rc-tkg/install.sh
deleted file mode 100755
index c8cfe5b..0000000
--- a/linux510-rc-tkg/install.sh
+++ /dev/null
@@ -1,283 +0,0 @@
-#!/bin/bash
-
-msg2() {
- echo -e " \033[1;34m->\033[1;0m \033[1;1m$1\033[1;0m" >&2
-}
-
-error() {
- echo -e " \033[1;31m==> ERROR: $1\033[1;0m" >&2
-}
-
-warning() {
- echo -e " \033[1;33m==> WARNING: $1\033[1;0m" >&2
-}
-
-plain() {
- echo "$1" >&2
-}
-
-# Stop the script at any ecountered error
-set -e
-
-_where=`pwd`
-srcdir="$_where"
-
-source linux*-tkg-config/prepare
-
-_cpu_opt_patch_link="https://raw.githubusercontent.com/graysky2/kernel_gcc_patch/master/enable_additional_cpu_optimizations_for_gcc_v10.1%2B_kernel_v5.8%2B.patch"
-
-source customization.cfg
-
-if [ "$1" != "install" ] && [ "$1" != "config" ] && [ "$1" != "uninstall-help" ]; then
-  msg2 "Argument not recognised, options are:
-        - config : shallow clones the linux ${_basekernel}.x git tree into the folder linux-${_basekernel}, then applies on it the extra patches and prepares the .config file 
-                   by copying the one from the current linux system in /boot/config-`uname -r` and updates it. 
-        - install : [RPM and DEB based distros only], does the config step, proceeds to compile, then prompts to install
-        - uninstall-help : [RPM and DEB based distros only], lists the installed kernels in this system, then gives a hint on how to uninstall them manually."
-  exit 0
-fi
-
-# Load external configuration file if present. Available variable values will overwrite customization.cfg ones.
-if [ -e "$_EXT_CONFIG_PATH" ]; then
-  msg2 "External configuration file $_EXT_CONFIG_PATH will be used and will override customization.cfg values."
-  source "$_EXT_CONFIG_PATH"
-fi
-
-_misc_adds="false" # We currently don't want this enabled on non-Arch
-
-if [ "$1" = "install" ] || [ "$1" = "config" ]; then
-
-  if [ -z $_distro ] && [ "$1" = "install" ]; then
-    while true; do
-      echo "Which linux distribution are you running ?"
-      echo "if it's not on the list, chose the closest one to it: Fedora/Suse for RPM, Ubuntu/Debian for DEB"
-      echo "   1) Debian"
-      echo "   2) Fedora"
-      echo "   3) Suse"
-      echo "   4) Ubuntu"
-      read -p "[1-4]: " _distro_index
-
-      if [ "$_distro_index" = "1" ]; then
-        _distro="Debian"
-        break
-      elif [ "$_distro_index" = "2" ]; then
-        _distro="Fedora"
-        break
-      elif [ "$_distro_index" = "3" ]; then
-        _distro="Suse"
-        break
-      elif [ "$_distro_index" = "4" ]; then
-        _distro="Ubuntu"
-        break
-      else
-        echo "Wrong index."
-      fi
-    done
-  fi
-
-  if [[ $1 = "install" && "$_distro" != "Ubuntu" && "$_distro" != "Debian" &&  "$_distro" != "Fedora" && "$_distro" != "Suse" ]]; then 
-    msg2 "Variable \"_distro\" in \"customization.cfg\" hasn't been set to \"Ubuntu\", \"Debian\",  \"Fedora\" or \"Suse\""
-    msg2 "This script can only install custom kernels for RPM and DEB based distros, though only those keywords are permitted. Exiting..."
-    exit 0
-  fi
-
-  if [ "$_distro" = "Ubuntu" ] || [ "$_distro" = "Debian" ]; then
-    msg2 "Installing dependencies"
-    sudo apt install git build-essential kernel-package fakeroot libncurses5-dev libssl-dev ccache bison flex qtbase5-dev -y
-  elif [ "$_distro" = "Fedora" ]; then
-    msg2 "Installing dependencies"
-    sudo dnf install fedpkg fedora-packager rpmdevtools ncurses-devel pesign grubby qt5-devel libXi-devel gcc-c++ git ccache flex bison elfutils-libelf-devel openssl-devel dwarves rpm-build -y
-  elif [ "$_distro" = "Suse" ]; then
-    msg2 "Installing dependencies"
-    sudo zypper install -y rpmdevtools ncurses-devel pesign libXi-devel gcc-c++ git ccache flex bison elfutils libelf-devel openssl-devel dwarves make patch bc rpm-build libqt5-qtbase-common-devel libqt5-qtbase-devel lz4
-  fi
-
-  # Force prepare script to avoid Arch specific commands if the user is using `config`
-  if [ "$1" = "config" ]; then
-    _distro=""
-  fi
-
-  if [ -d linux-${_basekernel}.orig ]; then
-    rm -rf linux-${_basekernel}.orig
-  fi
-
-  if [ -d linux-${_basekernel} ]; then
-    msg2 "Reseting files in linux-$_basekernel to their original state and getting latest updates"
-    cd "$_where"/linux-${_basekernel}
-    git checkout --force linux-$_basekernel.y
-    git clean -f -d -x
-    git pull
-    msg2 "Done" 
-    cd "$_where"
-  else
-    msg2 "Shallow git cloning linux $_basekernel"
-    git clone --branch linux-$_basekernel.y --single-branch --depth=1 https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git linux-${_basekernel}
-    msg2 "Done"
-  fi
-
-  # Define current kernel subversion
-  if [ -z $_kernel_subver ]; then
-    cd "$_where"/linux-${_basekernel}
-    _kernelverstr=`git describe`
-    _kernel_subver=${_kernelverstr:5}
-    cd "$_where"
-  fi
-
-
-  # Run init script that is also run in PKGBUILD, it will define some env vars that we will use
-  _tkg_initscript
-
-  cd "$_where"
-  msg2 "Downloading Graysky2's CPU optimisations patch"
-  wget "$_cpu_opt_patch_link"
-
-  # Follow Ubuntu install isntructions in https://wiki.ubuntu.com/KernelTeam/GitKernelBuild
-
-  # cd in linux folder, copy Ubuntu's current config file, update with new params
-  cd "$_where"/linux-${_basekernel}
-
-  msg2 "Copying current kernel's config and running make oldconfig..."
-  cp /boot/config-`uname -r` .config
-  if [ "$_distro" = "Debian" ]; then #Help Debian cert problem.
-    sed -i -e 's#CONFIG_SYSTEM_TRUSTED_KEYS="debian/certs/test-signing-certs.pem"#CONFIG_SYSTEM_TRUSTED_KEYS=""#g' .config
-    sed -i -e 's#CONFIG_SYSTEM_TRUSTED_KEYS="debian/certs/debian-uefi-certs.pem"#CONFIG_SYSTEM_TRUSTED_KEYS=""#g' .config
-  fi
-  yes '' | make oldconfig
-  msg2 "Done"
-
-  # apply linux-tkg patching script
-  _tkg_srcprep
-
-  msg2 "Configuration done."
-fi
-
-if [ "$1" = "install" ]; then
-
-  # Use custom compiler paths if defined
-  if [ -n "${CUSTOM_GCC_PATH}" ]; then
-    PATH=${CUSTOM_GCC_PATH}/bin:${CUSTOM_GCC_PATH}/lib:${CUSTOM_GCC_PATH}/include:${PATH}
-  fi
-
-  if [ "$_force_all_threads" = "true" ]; then
-    _thread_num=`nproc`
-  else
-    _thread_num=`expr \`nproc\` / 4`
-    if [ "$_thread_num" = "0" ]; then
-      _thread_num=1
-    fi
-  fi
-
-  # ccache
-  if [ "$_noccache" != "true" ]; then
-
-    if [ "$_distro" = "Ubuntu" ] || [ "$_distro" = "Debian" ]; then
-      export PATH="/usr/lib/ccache/bin/:$PATH"
-    elif [ "$_distro" = "Fedora" ] || [ "$_distro" = "Suse" ]; then
-      export PATH="/usr/lib64/ccache/:$PATH" 
-    fi
-
-    export CCACHE_SLOPPINESS="file_macro,locale,time_macros"
-    export CCACHE_NOHASHDIR="true"
-    msg2 'ccache was found and will be used'
-
-  fi
-
-  if [ -z $_kernel_localversion ]; then
-    _kernel_flavor="tkg-${_cpusched}"
-  else
-    _kernel_flavor="tkg-${_kernel_localversion}"
-  fi
-
-  if [ "$_distro" = "Ubuntu" ]  || [ "$_distro" = "Debian" ]; then
-
-    if make -j ${_thread_num} deb-pkg LOCALVERSION=-${_kernel_flavor}; then
-      msg2 "Building successfully finished!"
-
-      cd "$_where"
-
-      # Create DEBS folder if it doesn't exist
-      mkdir -p DEBS
-      
-      # Move rpm files to RPMS folder inside the linux-tkg folder
-      mv "$_where"/*.deb "$_where"/DEBS/
-
-      read -p "Do you want to install the new Kernel ? y/[n]: " _install
-      if [[ $_install =~ [yY] ]] || [ $_install = "yes" ] || [ $_install = "Yes" ]; then
-        cd "$_where"
-        _kernelname=$_basekernel.$_kernel_subver-$_kernel_flavor
-        _headers_deb="linux-headers-${_kernelname}*.deb"
-        _image_deb="linux-image-${_kernelname}_*.deb"
-        _kernel_devel_deb="linux-libc-dev_${_kernelname}*.deb"
-        
-        cd DEBS
-        sudo dpkg -i $_headers_deb $_image_deb $_kernel_devel_deb
-      fi
-    fi
-
-  elif [[ "$_distro" = "Fedora" ||  "$_distro" = "Suse" ]]; then
-
-    # Replace dashes with underscores, it seems that it's being done by binrpm-pkg
-    # Se we can actually refer properly to the rpm files.
-    _kernel_flavor=${_kernel_flavor//-/_}
-
-    if make -j ${_thread_num} rpm-pkg EXTRAVERSION="_${_kernel_flavor}"; then
-      msg2 "Building successfully finished!"
-
-      cd "$_where"
-
-      # Create RPMS folder if it doesn't exist
-      mkdir -p RPMS
-      
-      # Move rpm files to RPMS folder inside the linux-tkg folder
-      mv ~/rpmbuild/RPMS/x86_64/* "$_where"/RPMS/
-
-      #Clean up the original folder, unneeded and takes a lot of space
-      rm -rf ~/rpmbuild/
-
-      read -p "Do you want to install the new Kernel ? y/[n]: " _install
-      if [ "$_install" = "y" ] || [ "$_install" = "Y" ] || [ "$_install" = "yes" ] || [ "$_install" = "Yes" ]; then
-        
-        _kernelname=$_basekernel.${_kernel_subver}_$_kernel_flavor
-        _headers_rpm="kernel-headers-${_kernelname}*.rpm"
-        _kernel_rpm="kernel-${_kernelname}*.rpm"
-        _kernel_devel_rpm="kernel-devel-${_kernelname}*.rpm"
-        
-        cd RPMS
-        if [ "$_distro" = "Fedora" ]; then
-          sudo dnf install $_headers_rpm $_kernel_rpm $_kernel_devel_rpm
-        elif [ "$_distro" = "Suse" ]; then
-          msg2 "Some files from 'linux-glibc-devel' will be replaced by files from the custom kernel-hearders package"
-          msg2 "To revert back to the original kernel headers do 'sudo zypper install -f linux-glibc-devel'" 
-          sudo zypper install --replacefiles --allow-unsigned-rpm $_headers_rpm $_kernel_rpm $_kernel_devel_rpm
-        fi
-        
-        msg2 "Install successful" 
-      fi
-    fi
-  fi
-fi
-
-if [ "$1" = "uninstall-help" ]; then
-
-  cd "$_where"
-  msg2 "List of installed custom tkg kernels: "
-
-  if [ "$_distro" = "Ubuntu" ]; then
-    dpkg -l "*tkg*" | grep "linux.*tkg"
-    dpkg -l "*linux-libc-dev*" | grep "linux.*tkg"
-    msg2 "To uninstall a version, you should remove the linux-image, linux-headers and linux-libc-dev associated to it (if installed), with: "
-    msg2 "      sudo apt remove linux-image-VERSION linux-headers-VERSION linux-libc-dev-VERSION"
-    msg2 "       where VERSION is displayed in the lists above, uninstall only versions that have \"tkg\" in its name"
-  elif [ "$_distro" = "Fedora" ]; then
-    dnf list --installed kernel*
-    msg2 "To uninstall a version, you should remove the kernel, kernel-headers and kernel-devel associated to it (if installed), with: "
-    msg2 "      sudo dnf remove --noautoremove kernel-VERSION kernel-devel-VERSION kernel-headers-VERSION"
-    msg2 "       where VERSION is displayed in the second column"
-  elif [ "$_distro" = "Suse" ]; then
-    zypper packages --installed-only | grep "kernel.*tkg"
-    msg2 "To uninstall a version, you should remove the kernel, kernel-headers and kernel-devel associated to it (if installed), with: "
-    msg2 "      sudo zypper remove --no-clean-deps kernel-VERSION kernel-devel-VERSION kernel-headers-VERSION"
-    msg2 "       where VERSION is displayed in the second to last column"
-  fi
-
-fi
diff --git a/linux510-rc-tkg/linux510-tkg-config/90-cleanup.hook b/linux510-rc-tkg/linux510-tkg-config/90-cleanup.hook
deleted file mode 100644
index 99f5221..0000000
--- a/linux510-rc-tkg/linux510-tkg-config/90-cleanup.hook
+++ /dev/null
@@ -1,14 +0,0 @@
-[Trigger]
-Type = File
-Operation = Install
-Operation = Upgrade
-Operation = Remove
-Target = usr/lib/modules/*/
-Target = !usr/lib/modules/*/?*
-
-[Action]
-Description = Cleaning up...
-When = PostTransaction
-Exec = /usr/share/libalpm/scripts/cleanup
-NeedsTargets
- 
diff --git a/linux510-rc-tkg/linux510-tkg-config/cleanup b/linux510-rc-tkg/linux510-tkg-config/cleanup
deleted file mode 100755
index c00c08d..0000000
--- a/linux510-rc-tkg/linux510-tkg-config/cleanup
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/bash
-
-for _f in /usr/lib/modules/*tkg*; do
-  if [[ ! -e ${_f}/vmlinuz ]]; then
-    rm -rf "$_f"
-  fi
-done
-
-# vim:set ft=sh sw=2 et:
- 
diff --git a/linux510-rc-tkg/linux510-tkg-config/config.x86_64 b/linux510-rc-tkg/linux510-tkg-config/config.x86_64
deleted file mode 100644
index 5d15c18..0000000
--- a/linux510-rc-tkg/linux510-tkg-config/config.x86_64
+++ /dev/null
@@ -1,11179 +0,0 @@
-#
-# Automatically generated file; DO NOT EDIT.
-# Linux/x86 5.10.0-rc1 Kernel Configuration
-#
-CONFIG_CC_VERSION_TEXT="gcc (GCC) 10.2.0"
-CONFIG_CC_IS_GCC=y
-CONFIG_GCC_VERSION=100200
-CONFIG_LD_VERSION=235010000
-CONFIG_CLANG_VERSION=0
-CONFIG_CC_CAN_LINK=y
-CONFIG_CC_CAN_LINK_STATIC=y
-CONFIG_CC_HAS_ASM_GOTO=y
-CONFIG_CC_HAS_ASM_INLINE=y
-CONFIG_IRQ_WORK=y
-CONFIG_BUILDTIME_TABLE_SORT=y
-CONFIG_THREAD_INFO_IN_TASK=y
-
-#
-# General setup
-#
-CONFIG_INIT_ENV_ARG_LIMIT=32
-# CONFIG_COMPILE_TEST is not set
-CONFIG_LOCALVERSION=""
-CONFIG_LOCALVERSION_AUTO=y
-CONFIG_BUILD_SALT=""
-CONFIG_HAVE_KERNEL_GZIP=y
-CONFIG_HAVE_KERNEL_BZIP2=y
-CONFIG_HAVE_KERNEL_LZMA=y
-CONFIG_HAVE_KERNEL_XZ=y
-CONFIG_HAVE_KERNEL_LZO=y
-CONFIG_HAVE_KERNEL_LZ4=y
-CONFIG_HAVE_KERNEL_ZSTD=y
-# CONFIG_KERNEL_GZIP is not set
-# CONFIG_KERNEL_BZIP2 is not set
-# CONFIG_KERNEL_LZMA is not set
-CONFIG_KERNEL_XZ=y
-# CONFIG_KERNEL_LZO is not set
-# CONFIG_KERNEL_LZ4 is not set
-# CONFIG_KERNEL_ZSTD is not set
-CONFIG_DEFAULT_INIT=""
-CONFIG_DEFAULT_HOSTNAME="archlinux"
-CONFIG_SWAP=y
-CONFIG_SYSVIPC=y
-CONFIG_SYSVIPC_SYSCTL=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_POSIX_MQUEUE_SYSCTL=y
-CONFIG_WATCH_QUEUE=y
-CONFIG_CROSS_MEMORY_ATTACH=y
-# CONFIG_USELIB is not set
-CONFIG_AUDIT=y
-CONFIG_HAVE_ARCH_AUDITSYSCALL=y
-CONFIG_AUDITSYSCALL=y
-
-#
-# IRQ subsystem
-#
-CONFIG_GENERIC_IRQ_PROBE=y
-CONFIG_GENERIC_IRQ_SHOW=y
-CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK=y
-CONFIG_GENERIC_PENDING_IRQ=y
-CONFIG_GENERIC_IRQ_MIGRATION=y
-CONFIG_HARDIRQS_SW_RESEND=y
-CONFIG_GENERIC_IRQ_CHIP=y
-CONFIG_IRQ_DOMAIN=y
-CONFIG_IRQ_SIM=y
-CONFIG_IRQ_DOMAIN_HIERARCHY=y
-CONFIG_GENERIC_MSI_IRQ=y
-CONFIG_GENERIC_MSI_IRQ_DOMAIN=y
-CONFIG_IRQ_MSI_IOMMU=y
-CONFIG_GENERIC_IRQ_MATRIX_ALLOCATOR=y
-CONFIG_GENERIC_IRQ_RESERVATION_MODE=y
-CONFIG_IRQ_FORCED_THREADING=y
-CONFIG_SPARSE_IRQ=y
-# CONFIG_GENERIC_IRQ_DEBUGFS is not set
-# end of IRQ subsystem
-
-CONFIG_CLOCKSOURCE_WATCHDOG=y
-CONFIG_ARCH_CLOCKSOURCE_INIT=y
-CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE=y
-CONFIG_GENERIC_TIME_VSYSCALL=y
-CONFIG_GENERIC_CLOCKEVENTS=y
-CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y
-CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST=y
-CONFIG_GENERIC_CMOS_UPDATE=y
-CONFIG_HAVE_POSIX_CPU_TIMERS_TASK_WORK=y
-CONFIG_POSIX_CPU_TIMERS_TASK_WORK=y
-
-#
-# Timers subsystem
-#
-CONFIG_TICK_ONESHOT=y
-CONFIG_NO_HZ_COMMON=y
-# CONFIG_HZ_PERIODIC is not set
-CONFIG_NO_HZ_IDLE=y
-# CONFIG_NO_HZ_FULL is not set
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-# end of Timers subsystem
-
-# CONFIG_PREEMPT_NONE is not set
-# CONFIG_PREEMPT_VOLUNTARY is not set
-CONFIG_PREEMPT=y
-CONFIG_PREEMPT_COUNT=y
-CONFIG_PREEMPTION=y
-
-#
-# CPU/Task time and stats accounting
-#
-CONFIG_TICK_CPU_ACCOUNTING=y
-# CONFIG_VIRT_CPU_ACCOUNTING_GEN is not set
-CONFIG_IRQ_TIME_ACCOUNTING=y
-CONFIG_HAVE_SCHED_AVG_IRQ=y
-CONFIG_BSD_PROCESS_ACCT=y
-CONFIG_BSD_PROCESS_ACCT_V3=y
-CONFIG_TASKSTATS=y
-CONFIG_TASK_DELAY_ACCT=y
-CONFIG_TASK_XACCT=y
-CONFIG_TASK_IO_ACCOUNTING=y
-CONFIG_PSI=y
-# CONFIG_PSI_DEFAULT_DISABLED is not set
-# end of CPU/Task time and stats accounting
-
-CONFIG_CPU_ISOLATION=y
-
-#
-# RCU Subsystem
-#
-CONFIG_TREE_RCU=y
-CONFIG_PREEMPT_RCU=y
-CONFIG_RCU_EXPERT=y
-CONFIG_SRCU=y
-CONFIG_TREE_SRCU=y
-CONFIG_TASKS_RCU_GENERIC=y
-CONFIG_TASKS_RCU=y
-CONFIG_TASKS_RUDE_RCU=y
-CONFIG_TASKS_TRACE_RCU=y
-CONFIG_RCU_STALL_COMMON=y
-CONFIG_RCU_NEED_SEGCBLIST=y
-CONFIG_RCU_FANOUT=64
-CONFIG_RCU_FANOUT_LEAF=16
-CONFIG_RCU_FAST_NO_HZ=y
-CONFIG_RCU_BOOST=y
-CONFIG_RCU_BOOST_DELAY=500
-# CONFIG_RCU_NOCB_CPU is not set
-# CONFIG_TASKS_TRACE_RCU_READ_MB is not set
-# end of RCU Subsystem
-
-CONFIG_BUILD_BIN2C=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-# CONFIG_IKHEADERS is not set
-CONFIG_LOG_BUF_SHIFT=17
-CONFIG_LOG_CPU_MAX_BUF_SHIFT=12
-CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=13
-CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y
-
-#
-# Scheduler features
-#
-CONFIG_UCLAMP_TASK=y
-CONFIG_UCLAMP_BUCKETS_COUNT=5
-# end of Scheduler features
-
-CONFIG_ARCH_SUPPORTS_NUMA_BALANCING=y
-CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH=y
-CONFIG_CC_HAS_INT128=y
-CONFIG_ARCH_SUPPORTS_INT128=y
-CONFIG_NUMA_BALANCING=y
-CONFIG_NUMA_BALANCING_DEFAULT_ENABLED=y
-CONFIG_CGROUPS=y
-CONFIG_PAGE_COUNTER=y
-CONFIG_MEMCG=y
-CONFIG_MEMCG_SWAP=y
-CONFIG_MEMCG_KMEM=y
-CONFIG_BLK_CGROUP=y
-CONFIG_CGROUP_WRITEBACK=y
-CONFIG_CGROUP_SCHED=y
-CONFIG_FAIR_GROUP_SCHED=y
-CONFIG_CFS_BANDWIDTH=y
-# CONFIG_RT_GROUP_SCHED is not set
-CONFIG_UCLAMP_TASK_GROUP=y
-CONFIG_CGROUP_PIDS=y
-CONFIG_CGROUP_RDMA=y
-CONFIG_CGROUP_FREEZER=y
-CONFIG_CGROUP_HUGETLB=y
-CONFIG_CPUSETS=y
-CONFIG_PROC_PID_CPUSET=y
-CONFIG_CGROUP_DEVICE=y
-CONFIG_CGROUP_CPUACCT=y
-CONFIG_CGROUP_PERF=y
-CONFIG_CGROUP_BPF=y
-# CONFIG_CGROUP_DEBUG is not set
-CONFIG_SOCK_CGROUP_DATA=y
-CONFIG_NAMESPACES=y
-CONFIG_UTS_NS=y
-CONFIG_TIME_NS=y
-CONFIG_IPC_NS=y
-CONFIG_USER_NS=y
-CONFIG_USER_NS_UNPRIVILEGED=y
-CONFIG_PID_NS=y
-CONFIG_NET_NS=y
-CONFIG_CHECKPOINT_RESTORE=y
-CONFIG_SCHED_AUTOGROUP=y
-# CONFIG_SYSFS_DEPRECATED is not set
-CONFIG_RELAY=y
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE=""
-CONFIG_RD_GZIP=y
-CONFIG_RD_BZIP2=y
-CONFIG_RD_LZMA=y
-CONFIG_RD_XZ=y
-CONFIG_RD_LZO=y
-CONFIG_RD_LZ4=y
-CONFIG_RD_ZSTD=y
-CONFIG_BOOT_CONFIG=y
-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-CONFIG_SYSCTL=y
-CONFIG_HAVE_UID16=y
-CONFIG_SYSCTL_EXCEPTION_TRACE=y
-CONFIG_HAVE_PCSPKR_PLATFORM=y
-CONFIG_BPF=y
-CONFIG_EXPERT=y
-# CONFIG_UID16 is not set
-CONFIG_MULTIUSER=y
-CONFIG_SGETMASK_SYSCALL=y
-# CONFIG_SYSFS_SYSCALL is not set
-CONFIG_FHANDLE=y
-CONFIG_POSIX_TIMERS=y
-CONFIG_PRINTK=y
-CONFIG_PRINTK_NMI=y
-CONFIG_BUG=y
-CONFIG_ELF_CORE=y
-CONFIG_PCSPKR_PLATFORM=y
-CONFIG_BASE_FULL=y
-CONFIG_FUTEX=y
-CONFIG_FUTEX_PI=y
-CONFIG_EPOLL=y
-CONFIG_SIGNALFD=y
-CONFIG_TIMERFD=y
-CONFIG_EVENTFD=y
-CONFIG_SHMEM=y
-CONFIG_AIO=y
-CONFIG_IO_URING=y
-CONFIG_ADVISE_SYSCALLS=y
-CONFIG_HAVE_ARCH_USERFAULTFD_WP=y
-CONFIG_MEMBARRIER=y
-CONFIG_KALLSYMS=y
-CONFIG_KALLSYMS_ALL=y
-CONFIG_KALLSYMS_ABSOLUTE_PERCPU=y
-CONFIG_KALLSYMS_BASE_RELATIVE=y
-CONFIG_BPF_LSM=y
-CONFIG_BPF_SYSCALL=y
-CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y
-CONFIG_BPF_JIT_ALWAYS_ON=y
-CONFIG_BPF_JIT_DEFAULT_ON=y
-# CONFIG_BPF_PRELOAD is not set
-CONFIG_USERFAULTFD=y
-CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE=y
-CONFIG_RSEQ=y
-# CONFIG_DEBUG_RSEQ is not set
-# CONFIG_EMBEDDED is not set
-CONFIG_HAVE_PERF_EVENTS=y
-# CONFIG_PC104 is not set
-
-#
-# Kernel Performance Events And Counters
-#
-CONFIG_PERF_EVENTS=y
-# CONFIG_DEBUG_PERF_USE_VMALLOC is not set
-# end of Kernel Performance Events And Counters
-
-CONFIG_VM_EVENT_COUNTERS=y
-CONFIG_SLUB_DEBUG=y
-# CONFIG_SLUB_MEMCG_SYSFS_ON is not set
-# CONFIG_COMPAT_BRK is not set
-# CONFIG_SLAB is not set
-CONFIG_SLUB=y
-# CONFIG_SLOB is not set
-CONFIG_SLAB_MERGE_DEFAULT=y
-CONFIG_SLAB_FREELIST_RANDOM=y
-CONFIG_SLAB_FREELIST_HARDENED=y
-CONFIG_SHUFFLE_PAGE_ALLOCATOR=y
-CONFIG_SLUB_CPU_PARTIAL=y
-CONFIG_SYSTEM_DATA_VERIFICATION=y
-CONFIG_PROFILING=y
-CONFIG_TRACEPOINTS=y
-# end of General setup
-
-CONFIG_64BIT=y
-CONFIG_X86_64=y
-CONFIG_X86=y
-CONFIG_INSTRUCTION_DECODER=y
-CONFIG_OUTPUT_FORMAT="elf64-x86-64"
-CONFIG_LOCKDEP_SUPPORT=y
-CONFIG_STACKTRACE_SUPPORT=y
-CONFIG_MMU=y
-CONFIG_ARCH_MMAP_RND_BITS_MIN=28
-CONFIG_ARCH_MMAP_RND_BITS_MAX=32
-CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN=8
-CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX=16
-CONFIG_GENERIC_ISA_DMA=y
-CONFIG_GENERIC_BUG=y
-CONFIG_GENERIC_BUG_RELATIVE_POINTERS=y
-CONFIG_ARCH_MAY_HAVE_PC_FDC=y
-CONFIG_GENERIC_CALIBRATE_DELAY=y
-CONFIG_ARCH_HAS_CPU_RELAX=y
-CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
-CONFIG_ARCH_HAS_FILTER_PGPROT=y
-CONFIG_HAVE_SETUP_PER_CPU_AREA=y
-CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y
-CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y
-CONFIG_ARCH_HIBERNATION_POSSIBLE=y
-CONFIG_ARCH_SUSPEND_POSSIBLE=y
-CONFIG_ARCH_WANT_GENERAL_HUGETLB=y
-CONFIG_ZONE_DMA32=y
-CONFIG_AUDIT_ARCH=y
-CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y
-CONFIG_HAVE_INTEL_TXT=y
-CONFIG_X86_64_SMP=y
-CONFIG_ARCH_SUPPORTS_UPROBES=y
-CONFIG_FIX_EARLYCON_MEM=y
-CONFIG_DYNAMIC_PHYSICAL_MASK=y
-CONFIG_PGTABLE_LEVELS=5
-CONFIG_CC_HAS_SANE_STACKPROTECTOR=y
-
-#
-# Processor type and features
-#
-CONFIG_ZONE_DMA=y
-CONFIG_SMP=y
-CONFIG_X86_FEATURE_NAMES=y
-CONFIG_X86_X2APIC=y
-CONFIG_X86_MPPARSE=y
-# CONFIG_GOLDFISH is not set
-CONFIG_RETPOLINE=y
-CONFIG_X86_CPU_RESCTRL=y
-# CONFIG_X86_EXTENDED_PLATFORM is not set
-CONFIG_X86_INTEL_LPSS=y
-CONFIG_X86_AMD_PLATFORM_DEVICE=y
-CONFIG_IOSF_MBI=y
-# CONFIG_IOSF_MBI_DEBUG is not set
-CONFIG_X86_SUPPORTS_MEMORY_FAILURE=y
-CONFIG_SCHED_OMIT_FRAME_POINTER=y
-CONFIG_HYPERVISOR_GUEST=y
-CONFIG_PARAVIRT=y
-CONFIG_PARAVIRT_XXL=y
-# CONFIG_PARAVIRT_DEBUG is not set
-CONFIG_PARAVIRT_SPINLOCKS=y
-CONFIG_X86_HV_CALLBACK_VECTOR=y
-CONFIG_XEN=y
-CONFIG_XEN_PV=y
-CONFIG_XEN_PV_SMP=y
-CONFIG_XEN_DOM0=y
-CONFIG_XEN_PVHVM=y
-CONFIG_XEN_PVHVM_SMP=y
-CONFIG_XEN_512GB=y
-CONFIG_XEN_SAVE_RESTORE=y
-# CONFIG_XEN_DEBUG_FS is not set
-CONFIG_XEN_PVH=y
-CONFIG_KVM_GUEST=y
-CONFIG_ARCH_CPUIDLE_HALTPOLL=y
-CONFIG_PVH=y
-CONFIG_PARAVIRT_TIME_ACCOUNTING=y
-CONFIG_PARAVIRT_CLOCK=y
-CONFIG_JAILHOUSE_GUEST=y
-CONFIG_ACRN_GUEST=y
-# CONFIG_MK8 is not set
-# CONFIG_MPSC is not set
-# CONFIG_MCORE2 is not set
-# CONFIG_MATOM is not set
-CONFIG_GENERIC_CPU=y
-CONFIG_X86_INTERNODE_CACHE_SHIFT=6
-CONFIG_X86_L1_CACHE_SHIFT=6
-CONFIG_X86_TSC=y
-CONFIG_X86_CMPXCHG64=y
-CONFIG_X86_CMOV=y
-CONFIG_X86_MINIMUM_CPU_FAMILY=64
-CONFIG_X86_DEBUGCTLMSR=y
-CONFIG_IA32_FEAT_CTL=y
-CONFIG_X86_VMX_FEATURE_NAMES=y
-CONFIG_PROCESSOR_SELECT=y
-CONFIG_CPU_SUP_INTEL=y
-CONFIG_CPU_SUP_AMD=y
-CONFIG_CPU_SUP_HYGON=y
-CONFIG_CPU_SUP_CENTAUR=y
-CONFIG_CPU_SUP_ZHAOXIN=y
-CONFIG_HPET_TIMER=y
-CONFIG_HPET_EMULATE_RTC=y
-CONFIG_DMI=y
-CONFIG_GART_IOMMU=y
-# CONFIG_MAXSMP is not set
-CONFIG_NR_CPUS_RANGE_BEGIN=2
-CONFIG_NR_CPUS_RANGE_END=512
-CONFIG_NR_CPUS_DEFAULT=64
-CONFIG_NR_CPUS=320
-CONFIG_SCHED_SMT=y
-CONFIG_SCHED_MC=y
-CONFIG_SCHED_MC_PRIO=y
-CONFIG_X86_LOCAL_APIC=y
-CONFIG_X86_IO_APIC=y
-CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
-CONFIG_X86_MCE=y
-# CONFIG_X86_MCELOG_LEGACY is not set
-CONFIG_X86_MCE_INTEL=y
-CONFIG_X86_MCE_AMD=y
-CONFIG_X86_MCE_THRESHOLD=y
-CONFIG_X86_MCE_INJECT=m
-CONFIG_X86_THERMAL_VECTOR=y
-
-#
-# Performance monitoring
-#
-CONFIG_PERF_EVENTS_INTEL_UNCORE=m
-CONFIG_PERF_EVENTS_INTEL_RAPL=m
-CONFIG_PERF_EVENTS_INTEL_CSTATE=m
-CONFIG_PERF_EVENTS_AMD_POWER=m
-# end of Performance monitoring
-
-CONFIG_X86_16BIT=y
-CONFIG_X86_ESPFIX64=y
-CONFIG_X86_VSYSCALL_EMULATION=y
-CONFIG_X86_IOPL_IOPERM=y
-CONFIG_I8K=m
-CONFIG_MICROCODE=y
-CONFIG_MICROCODE_INTEL=y
-CONFIG_MICROCODE_AMD=y
-CONFIG_MICROCODE_OLD_INTERFACE=y
-CONFIG_X86_MSR=m
-CONFIG_X86_CPUID=m
-CONFIG_X86_5LEVEL=y
-CONFIG_X86_DIRECT_GBPAGES=y
-# CONFIG_X86_CPA_STATISTICS is not set
-CONFIG_AMD_MEM_ENCRYPT=y
-# CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT is not set
-CONFIG_NUMA=y
-CONFIG_AMD_NUMA=y
-CONFIG_X86_64_ACPI_NUMA=y
-# CONFIG_NUMA_EMU is not set
-CONFIG_NODES_SHIFT=5
-CONFIG_ARCH_SPARSEMEM_ENABLE=y
-CONFIG_ARCH_SPARSEMEM_DEFAULT=y
-CONFIG_ARCH_SELECT_MEMORY_MODEL=y
-CONFIG_ARCH_MEMORY_PROBE=y
-CONFIG_ARCH_PROC_KCORE_TEXT=y
-CONFIG_ILLEGAL_POINTER_VALUE=0xdead000000000000
-CONFIG_X86_PMEM_LEGACY_DEVICE=y
-CONFIG_X86_PMEM_LEGACY=m
-CONFIG_X86_CHECK_BIOS_CORRUPTION=y
-CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK=y
-CONFIG_X86_RESERVE_LOW=64
-CONFIG_MTRR=y
-CONFIG_MTRR_SANITIZER=y
-CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT=1
-CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT=0
-CONFIG_X86_PAT=y
-CONFIG_ARCH_USES_PG_UNCACHED=y
-CONFIG_ARCH_RANDOM=y
-CONFIG_X86_SMAP=y
-CONFIG_X86_UMIP=y
-CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS=y
-# CONFIG_X86_INTEL_TSX_MODE_OFF is not set
-# CONFIG_X86_INTEL_TSX_MODE_ON is not set
-CONFIG_X86_INTEL_TSX_MODE_AUTO=y
-CONFIG_EFI=y
-CONFIG_EFI_STUB=y
-CONFIG_EFI_MIXED=y
-# CONFIG_HZ_100 is not set
-# CONFIG_HZ_250 is not set
-CONFIG_HZ_300=y
-# CONFIG_HZ_1000 is not set
-CONFIG_HZ=300
-CONFIG_SCHED_HRTICK=y
-CONFIG_KEXEC=y
-CONFIG_KEXEC_FILE=y
-CONFIG_ARCH_HAS_KEXEC_PURGATORY=y
-# CONFIG_KEXEC_SIG is not set
-CONFIG_CRASH_DUMP=y
-CONFIG_KEXEC_JUMP=y
-CONFIG_PHYSICAL_START=0x1000000
-CONFIG_RELOCATABLE=y
-CONFIG_RANDOMIZE_BASE=y
-CONFIG_X86_NEED_RELOCS=y
-CONFIG_PHYSICAL_ALIGN=0x200000
-CONFIG_DYNAMIC_MEMORY_LAYOUT=y
-CONFIG_RANDOMIZE_MEMORY=y
-CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING=0x1
-CONFIG_HOTPLUG_CPU=y
-# CONFIG_BOOTPARAM_HOTPLUG_CPU0 is not set
-# CONFIG_DEBUG_HOTPLUG_CPU0 is not set
-# CONFIG_COMPAT_VDSO is not set
-# CONFIG_LEGACY_VSYSCALL_EMULATE is not set
-CONFIG_LEGACY_VSYSCALL_XONLY=y
-# CONFIG_LEGACY_VSYSCALL_NONE is not set
-# CONFIG_CMDLINE_BOOL is not set
-CONFIG_MODIFY_LDT_SYSCALL=y
-CONFIG_HAVE_LIVEPATCH=y
-# CONFIG_LIVEPATCH is not set
-# end of Processor type and features
-
-CONFIG_ARCH_HAS_ADD_PAGES=y
-CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
-CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y
-CONFIG_USE_PERCPU_NUMA_NODE_ID=y
-CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK=y
-CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION=y
-CONFIG_ARCH_ENABLE_THP_MIGRATION=y
-
-#
-# Power management and ACPI options
-#
-CONFIG_ARCH_HIBERNATION_HEADER=y
-CONFIG_SUSPEND=y
-CONFIG_SUSPEND_FREEZER=y
-# CONFIG_SUSPEND_SKIP_SYNC is not set
-CONFIG_HIBERNATE_CALLBACKS=y
-CONFIG_HIBERNATION=y
-CONFIG_HIBERNATION_SNAPSHOT_DEV=y
-CONFIG_PM_STD_PARTITION=""
-CONFIG_PM_SLEEP=y
-CONFIG_PM_SLEEP_SMP=y
-CONFIG_PM_AUTOSLEEP=y
-CONFIG_PM_WAKELOCKS=y
-CONFIG_PM_WAKELOCKS_LIMIT=100
-CONFIG_PM_WAKELOCKS_GC=y
-CONFIG_PM=y
-CONFIG_PM_DEBUG=y
-CONFIG_PM_ADVANCED_DEBUG=y
-# CONFIG_PM_TEST_SUSPEND is not set
-CONFIG_PM_SLEEP_DEBUG=y
-# CONFIG_DPM_WATCHDOG is not set
-CONFIG_PM_TRACE=y
-CONFIG_PM_TRACE_RTC=y
-CONFIG_PM_CLK=y
-CONFIG_PM_GENERIC_DOMAINS=y
-CONFIG_WQ_POWER_EFFICIENT_DEFAULT=y
-CONFIG_PM_GENERIC_DOMAINS_SLEEP=y
-CONFIG_PM_GENERIC_DOMAINS_OF=y
-CONFIG_ENERGY_MODEL=y
-CONFIG_ARCH_SUPPORTS_ACPI=y
-CONFIG_ACPI=y
-CONFIG_ACPI_LEGACY_TABLES_LOOKUP=y
-CONFIG_ARCH_MIGHT_HAVE_ACPI_PDC=y
-CONFIG_ACPI_SYSTEM_POWER_STATES_SUPPORT=y
-# CONFIG_ACPI_DEBUGGER is not set
-CONFIG_ACPI_SPCR_TABLE=y
-CONFIG_ACPI_LPIT=y
-CONFIG_ACPI_SLEEP=y
-CONFIG_ACPI_REV_OVERRIDE_POSSIBLE=y
-CONFIG_ACPI_EC_DEBUGFS=y
-CONFIG_ACPI_AC=m
-CONFIG_ACPI_BATTERY=m
-CONFIG_ACPI_BUTTON=y
-CONFIG_ACPI_VIDEO=y
-CONFIG_ACPI_FAN=y
-CONFIG_ACPI_TAD=m
-CONFIG_ACPI_DOCK=y
-CONFIG_ACPI_CPU_FREQ_PSS=y
-CONFIG_ACPI_PROCESSOR_CSTATE=y
-CONFIG_ACPI_PROCESSOR_IDLE=y
-CONFIG_ACPI_CPPC_LIB=y
-CONFIG_ACPI_PROCESSOR=y
-CONFIG_ACPI_IPMI=m
-CONFIG_ACPI_HOTPLUG_CPU=y
-CONFIG_ACPI_PROCESSOR_AGGREGATOR=y
-CONFIG_ACPI_THERMAL=y
-CONFIG_ARCH_HAS_ACPI_TABLE_UPGRADE=y
-CONFIG_ACPI_TABLE_UPGRADE=y
-CONFIG_ACPI_DEBUG=y
-CONFIG_ACPI_PCI_SLOT=y
-CONFIG_ACPI_CONTAINER=y
-CONFIG_ACPI_HOTPLUG_MEMORY=y
-CONFIG_ACPI_HOTPLUG_IOAPIC=y
-CONFIG_ACPI_SBS=m
-CONFIG_ACPI_HED=y
-CONFIG_ACPI_CUSTOM_METHOD=m
-CONFIG_ACPI_BGRT=y
-# CONFIG_ACPI_REDUCED_HARDWARE_ONLY is not set
-CONFIG_ACPI_NFIT=m
-# CONFIG_NFIT_SECURITY_DEBUG is not set
-CONFIG_ACPI_NUMA=y
-CONFIG_ACPI_HMAT=y
-CONFIG_HAVE_ACPI_APEI=y
-CONFIG_HAVE_ACPI_APEI_NMI=y
-CONFIG_ACPI_APEI=y
-CONFIG_ACPI_APEI_GHES=y
-CONFIG_ACPI_APEI_PCIEAER=y
-CONFIG_ACPI_APEI_MEMORY_FAILURE=y
-CONFIG_ACPI_APEI_EINJ=m
-CONFIG_ACPI_APEI_ERST_DEBUG=m
-# CONFIG_ACPI_DPTF is not set
-CONFIG_ACPI_WATCHDOG=y
-CONFIG_ACPI_EXTLOG=m
-CONFIG_ACPI_ADXL=y
-CONFIG_ACPI_CONFIGFS=m
-CONFIG_PMIC_OPREGION=y
-CONFIG_BYTCRC_PMIC_OPREGION=y
-CONFIG_CHTCRC_PMIC_OPREGION=y
-CONFIG_XPOWER_PMIC_OPREGION=y
-CONFIG_BXT_WC_PMIC_OPREGION=y
-CONFIG_CHT_WC_PMIC_OPREGION=y
-CONFIG_CHT_DC_TI_PMIC_OPREGION=y
-CONFIG_TPS68470_PMIC_OPREGION=y
-CONFIG_X86_PM_TIMER=y
-CONFIG_SFI=y
-
-#
-# CPU Frequency scaling
-#
-CONFIG_CPU_FREQ=y
-CONFIG_CPU_FREQ_GOV_ATTR_SET=y
-CONFIG_CPU_FREQ_GOV_COMMON=y
-CONFIG_CPU_FREQ_STAT=y
-# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set
-# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set
-# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set
-# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set
-# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set
-CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL=y
-CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
-CONFIG_CPU_FREQ_GOV_POWERSAVE=m
-CONFIG_CPU_FREQ_GOV_USERSPACE=m
-CONFIG_CPU_FREQ_GOV_ONDEMAND=m
-CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m
-CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y
-
-#
-# CPU frequency scaling drivers
-#
-CONFIG_CPUFREQ_DT=m
-CONFIG_CPUFREQ_DT_PLATDEV=y
-CONFIG_X86_INTEL_PSTATE=y
-CONFIG_X86_PCC_CPUFREQ=m
-CONFIG_X86_ACPI_CPUFREQ=m
-CONFIG_X86_ACPI_CPUFREQ_CPB=y
-CONFIG_X86_POWERNOW_K8=m
-CONFIG_X86_AMD_FREQ_SENSITIVITY=m
-# CONFIG_X86_SPEEDSTEP_CENTRINO is not set
-CONFIG_X86_P4_CLOCKMOD=m
-
-#
-# shared options
-#
-CONFIG_X86_SPEEDSTEP_LIB=m
-# end of CPU Frequency scaling
-
-#
-# CPU Idle
-#
-CONFIG_CPU_IDLE=y
-CONFIG_CPU_IDLE_GOV_LADDER=y
-CONFIG_CPU_IDLE_GOV_MENU=y
-CONFIG_CPU_IDLE_GOV_TEO=y
-CONFIG_CPU_IDLE_GOV_HALTPOLL=y
-CONFIG_HALTPOLL_CPUIDLE=m
-# end of CPU Idle
-
-CONFIG_INTEL_IDLE=y
-# end of Power management and ACPI options
-
-#
-# Bus options (PCI etc.)
-#
-CONFIG_PCI_DIRECT=y
-CONFIG_PCI_MMCONFIG=y
-CONFIG_PCI_XEN=y
-CONFIG_MMCONF_FAM10H=y
-# CONFIG_PCI_CNB20LE_QUIRK is not set
-# CONFIG_ISA_BUS is not set
-CONFIG_ISA_DMA_API=y
-CONFIG_AMD_NB=y
-# CONFIG_X86_SYSFB is not set
-# end of Bus options (PCI etc.)
-
-#
-# Binary Emulations
-#
-CONFIG_IA32_EMULATION=y
-# CONFIG_X86_X32 is not set
-CONFIG_COMPAT_32=y
-CONFIG_COMPAT=y
-CONFIG_COMPAT_FOR_U64_ALIGNMENT=y
-CONFIG_SYSVIPC_COMPAT=y
-# end of Binary Emulations
-
-#
-# Firmware Drivers
-#
-CONFIG_EDD=m
-# CONFIG_EDD_OFF is not set
-CONFIG_FIRMWARE_MEMMAP=y
-CONFIG_DMIID=y
-CONFIG_DMI_SYSFS=m
-CONFIG_DMI_SCAN_MACHINE_NON_EFI_FALLBACK=y
-CONFIG_ISCSI_IBFT_FIND=y
-CONFIG_ISCSI_IBFT=m
-CONFIG_FW_CFG_SYSFS=m
-# CONFIG_FW_CFG_SYSFS_CMDLINE is not set
-CONFIG_GOOGLE_FIRMWARE=y
-# CONFIG_GOOGLE_SMI is not set
-CONFIG_GOOGLE_COREBOOT_TABLE=m
-CONFIG_GOOGLE_MEMCONSOLE=m
-# CONFIG_GOOGLE_MEMCONSOLE_X86_LEGACY is not set
-CONFIG_GOOGLE_FRAMEBUFFER_COREBOOT=m
-CONFIG_GOOGLE_MEMCONSOLE_COREBOOT=m
-CONFIG_GOOGLE_VPD=m
-
-#
-# EFI (Extensible Firmware Interface) Support
-#
-# CONFIG_EFI_VARS is not set
-CONFIG_EFI_ESRT=y
-CONFIG_EFI_VARS_PSTORE=y
-# CONFIG_EFI_VARS_PSTORE_DEFAULT_DISABLE is not set
-CONFIG_EFI_RUNTIME_MAP=y
-# CONFIG_EFI_FAKE_MEMMAP is not set
-CONFIG_EFI_SOFT_RESERVE=y
-CONFIG_EFI_RUNTIME_WRAPPERS=y
-CONFIG_EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER=y
-# CONFIG_EFI_BOOTLOADER_CONTROL is not set
-CONFIG_EFI_CAPSULE_LOADER=m
-# CONFIG_EFI_TEST is not set
-CONFIG_APPLE_PROPERTIES=y
-# CONFIG_RESET_ATTACK_MITIGATION is not set
-CONFIG_EFI_RCI2_TABLE=y
-# CONFIG_EFI_DISABLE_PCI_DMA is not set
-# end of EFI (Extensible Firmware Interface) Support
-
-CONFIG_EFI_EMBEDDED_FIRMWARE=y
-CONFIG_UEFI_CPER=y
-CONFIG_UEFI_CPER_X86=y
-CONFIG_EFI_DEV_PATH_PARSER=y
-CONFIG_EFI_EARLYCON=y
-CONFIG_EFI_CUSTOM_SSDT_OVERLAYS=y
-
-#
-# Tegra firmware driver
-#
-# end of Tegra firmware driver
-# end of Firmware Drivers
-
-CONFIG_HAVE_KVM=y
-CONFIG_HAVE_KVM_IRQCHIP=y
-CONFIG_HAVE_KVM_IRQFD=y
-CONFIG_HAVE_KVM_IRQ_ROUTING=y
-CONFIG_HAVE_KVM_EVENTFD=y
-CONFIG_KVM_MMIO=y
-CONFIG_KVM_ASYNC_PF=y
-CONFIG_HAVE_KVM_MSI=y
-CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT=y
-CONFIG_KVM_VFIO=y
-CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT=y
-CONFIG_KVM_COMPAT=y
-CONFIG_HAVE_KVM_IRQ_BYPASS=y
-CONFIG_HAVE_KVM_NO_POLL=y
-CONFIG_KVM_XFER_TO_GUEST_WORK=y
-CONFIG_VIRTUALIZATION=y
-CONFIG_KVM=m
-CONFIG_KVM_WERROR=y
-CONFIG_KVM_INTEL=m
-CONFIG_KVM_AMD=m
-CONFIG_KVM_AMD_SEV=y
-CONFIG_KVM_MMU_AUDIT=y
-CONFIG_AS_AVX512=y
-CONFIG_AS_SHA1_NI=y
-CONFIG_AS_SHA256_NI=y
-CONFIG_AS_TPAUSE=y
-
-#
-# General architecture-dependent options
-#
-CONFIG_CRASH_CORE=y
-CONFIG_KEXEC_CORE=y
-CONFIG_HOTPLUG_SMT=y
-CONFIG_GENERIC_ENTRY=y
-CONFIG_OPROFILE=m
-# CONFIG_OPROFILE_EVENT_MULTIPLEX is not set
-CONFIG_HAVE_OPROFILE=y
-CONFIG_OPROFILE_NMI_TIMER=y
-CONFIG_KPROBES=y
-CONFIG_JUMP_LABEL=y
-# CONFIG_STATIC_KEYS_SELFTEST is not set
-# CONFIG_STATIC_CALL_SELFTEST is not set
-CONFIG_OPTPROBES=y
-CONFIG_KPROBES_ON_FTRACE=y
-CONFIG_UPROBES=y
-CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y
-CONFIG_ARCH_USE_BUILTIN_BSWAP=y
-CONFIG_KRETPROBES=y
-CONFIG_USER_RETURN_NOTIFIER=y
-CONFIG_HAVE_IOREMAP_PROT=y
-CONFIG_HAVE_KPROBES=y
-CONFIG_HAVE_KRETPROBES=y
-CONFIG_HAVE_OPTPROBES=y
-CONFIG_HAVE_KPROBES_ON_FTRACE=y
-CONFIG_HAVE_FUNCTION_ERROR_INJECTION=y
-CONFIG_HAVE_NMI=y
-CONFIG_HAVE_ARCH_TRACEHOOK=y
-CONFIG_HAVE_DMA_CONTIGUOUS=y
-CONFIG_GENERIC_SMP_IDLE_THREAD=y
-CONFIG_ARCH_HAS_FORTIFY_SOURCE=y
-CONFIG_ARCH_HAS_SET_MEMORY=y
-CONFIG_ARCH_HAS_SET_DIRECT_MAP=y
-CONFIG_HAVE_ARCH_THREAD_STRUCT_WHITELIST=y
-CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT=y
-CONFIG_HAVE_ASM_MODVERSIONS=y
-CONFIG_HAVE_REGS_AND_STACK_ACCESS_API=y
-CONFIG_HAVE_RSEQ=y
-CONFIG_HAVE_FUNCTION_ARG_ACCESS_API=y
-CONFIG_HAVE_HW_BREAKPOINT=y
-CONFIG_HAVE_MIXED_BREAKPOINTS_REGS=y
-CONFIG_HAVE_USER_RETURN_NOTIFIER=y
-CONFIG_HAVE_PERF_EVENTS_NMI=y
-CONFIG_HAVE_HARDLOCKUP_DETECTOR_PERF=y
-CONFIG_HAVE_PERF_REGS=y
-CONFIG_HAVE_PERF_USER_STACK_DUMP=y
-CONFIG_HAVE_ARCH_JUMP_LABEL=y
-CONFIG_HAVE_ARCH_JUMP_LABEL_RELATIVE=y
-CONFIG_MMU_GATHER_TABLE_FREE=y
-CONFIG_MMU_GATHER_RCU_TABLE_FREE=y
-CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG=y
-CONFIG_HAVE_ALIGNED_STRUCT_PAGE=y
-CONFIG_HAVE_CMPXCHG_LOCAL=y
-CONFIG_HAVE_CMPXCHG_DOUBLE=y
-CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION=y
-CONFIG_ARCH_WANT_OLD_COMPAT_IPC=y
-CONFIG_HAVE_ARCH_SECCOMP=y
-CONFIG_HAVE_ARCH_SECCOMP_FILTER=y
-CONFIG_SECCOMP=y
-CONFIG_SECCOMP_FILTER=y
-CONFIG_HAVE_ARCH_STACKLEAK=y
-CONFIG_HAVE_STACKPROTECTOR=y
-CONFIG_STACKPROTECTOR=y
-CONFIG_STACKPROTECTOR_STRONG=y
-CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES=y
-CONFIG_HAVE_CONTEXT_TRACKING=y
-CONFIG_HAVE_VIRT_CPU_ACCOUNTING_GEN=y
-CONFIG_HAVE_IRQ_TIME_ACCOUNTING=y
-CONFIG_HAVE_MOVE_PMD=y
-CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE=y
-CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD=y
-CONFIG_HAVE_ARCH_HUGE_VMAP=y
-CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y
-CONFIG_HAVE_ARCH_SOFT_DIRTY=y
-CONFIG_HAVE_MOD_ARCH_SPECIFIC=y
-CONFIG_MODULES_USE_ELF_RELA=y
-CONFIG_ARCH_HAS_ELF_RANDOMIZE=y
-CONFIG_HAVE_ARCH_MMAP_RND_BITS=y
-CONFIG_HAVE_EXIT_THREAD=y
-CONFIG_ARCH_MMAP_RND_BITS=28
-CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS=y
-CONFIG_ARCH_MMAP_RND_COMPAT_BITS=8
-CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES=y
-CONFIG_HAVE_STACK_VALIDATION=y
-CONFIG_HAVE_RELIABLE_STACKTRACE=y
-CONFIG_ISA_BUS_API=y
-CONFIG_OLD_SIGSUSPEND3=y
-CONFIG_COMPAT_OLD_SIGACTION=y
-CONFIG_COMPAT_32BIT_TIME=y
-CONFIG_HAVE_ARCH_VMAP_STACK=y
-CONFIG_VMAP_STACK=y
-CONFIG_ARCH_HAS_STRICT_KERNEL_RWX=y
-CONFIG_STRICT_KERNEL_RWX=y
-CONFIG_ARCH_HAS_STRICT_MODULE_RWX=y
-CONFIG_STRICT_MODULE_RWX=y
-CONFIG_HAVE_ARCH_PREL32_RELOCATIONS=y
-CONFIG_ARCH_USE_MEMREMAP_PROT=y
-CONFIG_LOCK_EVENT_COUNTS=y
-CONFIG_ARCH_HAS_MEM_ENCRYPT=y
-CONFIG_HAVE_STATIC_CALL=y
-CONFIG_HAVE_STATIC_CALL_INLINE=y
-
-#
-# GCOV-based kernel profiling
-#
-# CONFIG_GCOV_KERNEL is not set
-CONFIG_ARCH_HAS_GCOV_PROFILE_ALL=y
-# end of GCOV-based kernel profiling
-
-CONFIG_HAVE_GCC_PLUGINS=y
-CONFIG_GCC_PLUGINS=y
-# CONFIG_GCC_PLUGIN_CYC_COMPLEXITY is not set
-# CONFIG_GCC_PLUGIN_LATENT_ENTROPY is not set
-# CONFIG_GCC_PLUGIN_RANDSTRUCT is not set
-# end of General architecture-dependent options
-
-CONFIG_RT_MUTEXES=y
-CONFIG_BASE_SMALL=0
-CONFIG_MODULE_SIG_FORMAT=y
-CONFIG_MODULES=y
-CONFIG_MODULE_FORCE_LOAD=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODULE_FORCE_UNLOAD=y
-# CONFIG_MODVERSIONS is not set
-CONFIG_MODULE_SRCVERSION_ALL=y
-CONFIG_MODULE_SIG=y
-# CONFIG_MODULE_SIG_FORCE is not set
-CONFIG_MODULE_SIG_ALL=y
-# CONFIG_MODULE_SIG_SHA1 is not set
-# CONFIG_MODULE_SIG_SHA224 is not set
-# CONFIG_MODULE_SIG_SHA256 is not set
-# CONFIG_MODULE_SIG_SHA384 is not set
-CONFIG_MODULE_SIG_SHA512=y
-CONFIG_MODULE_SIG_HASH="sha512"
-CONFIG_MODULE_COMPRESS=y
-# CONFIG_MODULE_COMPRESS_GZIP is not set
-CONFIG_MODULE_COMPRESS_XZ=y
-CONFIG_MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS=y
-CONFIG_UNUSED_SYMBOLS=y
-CONFIG_MODULES_TREE_LOOKUP=y
-CONFIG_BLOCK=y
-CONFIG_BLK_RQ_ALLOC_TIME=y
-CONFIG_BLK_SCSI_REQUEST=y
-CONFIG_BLK_CGROUP_RWSTAT=y
-CONFIG_BLK_DEV_BSG=y
-CONFIG_BLK_DEV_BSGLIB=y
-CONFIG_BLK_DEV_INTEGRITY=y
-CONFIG_BLK_DEV_INTEGRITY_T10=y
-CONFIG_BLK_DEV_ZONED=y
-CONFIG_BLK_DEV_THROTTLING=y
-CONFIG_BLK_DEV_THROTTLING_LOW=y
-# CONFIG_BLK_CMDLINE_PARSER is not set
-CONFIG_BLK_WBT=y
-CONFIG_BLK_CGROUP_IOLATENCY=y
-CONFIG_BLK_CGROUP_IOCOST=y
-CONFIG_BLK_WBT_MQ=y
-CONFIG_BLK_DEBUG_FS=y
-CONFIG_BLK_DEBUG_FS_ZONED=y
-CONFIG_BLK_SED_OPAL=y
-CONFIG_BLK_INLINE_ENCRYPTION=y
-CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK=y
-
-#
-# Partition Types
-#
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_ACORN_PARTITION is not set
-CONFIG_AIX_PARTITION=y
-# CONFIG_OSF_PARTITION is not set
-# CONFIG_AMIGA_PARTITION is not set
-# CONFIG_ATARI_PARTITION is not set
-CONFIG_MAC_PARTITION=y
-CONFIG_MSDOS_PARTITION=y
-CONFIG_BSD_DISKLABEL=y
-CONFIG_MINIX_SUBPARTITION=y
-CONFIG_SOLARIS_X86_PARTITION=y
-# CONFIG_UNIXWARE_DISKLABEL is not set
-CONFIG_LDM_PARTITION=y
-# CONFIG_LDM_DEBUG is not set
-# CONFIG_SGI_PARTITION is not set
-# CONFIG_ULTRIX_PARTITION is not set
-# CONFIG_SUN_PARTITION is not set
-CONFIG_KARMA_PARTITION=y
-CONFIG_EFI_PARTITION=y
-# CONFIG_SYSV68_PARTITION is not set
-# CONFIG_CMDLINE_PARTITION is not set
-# end of Partition Types
-
-CONFIG_BLOCK_COMPAT=y
-CONFIG_BLK_MQ_PCI=y
-CONFIG_BLK_MQ_VIRTIO=y
-CONFIG_BLK_MQ_RDMA=y
-CONFIG_BLK_PM=y
-
-#
-# IO Schedulers
-#
-CONFIG_MQ_IOSCHED_DEADLINE=y
-CONFIG_MQ_IOSCHED_KYBER=y
-CONFIG_IOSCHED_BFQ=y
-CONFIG_BFQ_GROUP_IOSCHED=y
-# CONFIG_BFQ_CGROUP_DEBUG is not set
-# end of IO Schedulers
-
-CONFIG_PREEMPT_NOTIFIERS=y
-CONFIG_PADATA=y
-CONFIG_ASN1=y
-CONFIG_UNINLINE_SPIN_UNLOCK=y
-CONFIG_ARCH_SUPPORTS_ATOMIC_RMW=y
-CONFIG_MUTEX_SPIN_ON_OWNER=y
-CONFIG_RWSEM_SPIN_ON_OWNER=y
-CONFIG_LOCK_SPIN_ON_OWNER=y
-CONFIG_ARCH_USE_QUEUED_SPINLOCKS=y
-CONFIG_QUEUED_SPINLOCKS=y
-CONFIG_ARCH_USE_QUEUED_RWLOCKS=y
-CONFIG_QUEUED_RWLOCKS=y
-CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE=y
-CONFIG_ARCH_HAS_SYNC_CORE_BEFORE_USERMODE=y
-CONFIG_ARCH_HAS_SYSCALL_WRAPPER=y
-CONFIG_FREEZER=y
-
-#
-# Executable file formats
-#
-CONFIG_BINFMT_ELF=y
-CONFIG_COMPAT_BINFMT_ELF=y
-CONFIG_ELFCORE=y
-CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
-CONFIG_BINFMT_SCRIPT=y
-CONFIG_BINFMT_MISC=y
-CONFIG_COREDUMP=y
-# end of Executable file formats
-
-#
-# Memory Management options
-#
-CONFIG_SELECT_MEMORY_MODEL=y
-CONFIG_SPARSEMEM_MANUAL=y
-CONFIG_SPARSEMEM=y
-CONFIG_NEED_MULTIPLE_NODES=y
-CONFIG_SPARSEMEM_EXTREME=y
-CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
-CONFIG_SPARSEMEM_VMEMMAP=y
-CONFIG_HAVE_FAST_GUP=y
-CONFIG_NUMA_KEEP_MEMINFO=y
-CONFIG_MEMORY_ISOLATION=y
-CONFIG_HAVE_BOOTMEM_INFO_NODE=y
-CONFIG_MEMORY_HOTPLUG=y
-CONFIG_MEMORY_HOTPLUG_SPARSE=y
-CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE=y
-CONFIG_MEMORY_HOTREMOVE=y
-CONFIG_SPLIT_PTLOCK_CPUS=4
-CONFIG_MEMORY_BALLOON=y
-CONFIG_BALLOON_COMPACTION=y
-CONFIG_COMPACTION=y
-CONFIG_PAGE_REPORTING=y
-CONFIG_MIGRATION=y
-CONFIG_CONTIG_ALLOC=y
-CONFIG_PHYS_ADDR_T_64BIT=y
-CONFIG_BOUNCE=y
-CONFIG_VIRT_TO_BUS=y
-CONFIG_MMU_NOTIFIER=y
-CONFIG_KSM=y
-CONFIG_DEFAULT_MMAP_MIN_ADDR=65536
-CONFIG_ARCH_SUPPORTS_MEMORY_FAILURE=y
-CONFIG_MEMORY_FAILURE=y
-CONFIG_HWPOISON_INJECT=m
-CONFIG_TRANSPARENT_HUGEPAGE=y
-# CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS is not set
-CONFIG_TRANSPARENT_HUGEPAGE_MADVISE=y
-CONFIG_ARCH_WANTS_THP_SWAP=y
-CONFIG_THP_SWAP=y
-CONFIG_CLEANCACHE=y
-CONFIG_FRONTSWAP=y
-# CONFIG_CMA is not set
-CONFIG_MEM_SOFT_DIRTY=y
-CONFIG_ZSWAP=y
-# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_DEFLATE is not set
-# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZO is not set
-# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_842 is not set
-CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZ4=y
-# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZ4HC is not set
-# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_ZSTD is not set
-CONFIG_ZSWAP_COMPRESSOR_DEFAULT="lz4"
-# CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD is not set
-CONFIG_ZSWAP_ZPOOL_DEFAULT_Z3FOLD=y
-# CONFIG_ZSWAP_ZPOOL_DEFAULT_ZSMALLOC is not set
-CONFIG_ZSWAP_ZPOOL_DEFAULT="z3fold"
-CONFIG_ZSWAP_DEFAULT_ON=y
-CONFIG_ZPOOL=y
-CONFIG_ZBUD=y
-CONFIG_Z3FOLD=y
-CONFIG_ZSMALLOC=y
-# CONFIG_ZSMALLOC_PGTABLE_MAPPING is not set
-# CONFIG_ZSMALLOC_STAT is not set
-CONFIG_GENERIC_EARLY_IOREMAP=y
-# CONFIG_DEFERRED_STRUCT_PAGE_INIT is not set
-# CONFIG_IDLE_PAGE_TRACKING is not set
-CONFIG_ARCH_HAS_PTE_DEVMAP=y
-CONFIG_ZONE_DEVICE=y
-CONFIG_DEV_PAGEMAP_OPS=y
-CONFIG_HMM_MIRROR=y
-CONFIG_DEVICE_PRIVATE=y
-CONFIG_VMAP_PFN=y
-CONFIG_FRAME_VECTOR=y
-CONFIG_ARCH_USES_HIGH_VMA_FLAGS=y
-CONFIG_ARCH_HAS_PKEYS=y
-# CONFIG_PERCPU_STATS is not set
-# CONFIG_GUP_BENCHMARK is not set
-CONFIG_READ_ONLY_THP_FOR_FS=y
-CONFIG_ARCH_HAS_PTE_SPECIAL=y
-CONFIG_MAPPING_DIRTY_HELPERS=y
-# end of Memory Management options
-
-CONFIG_NET=y
-CONFIG_COMPAT_NETLINK_MESSAGES=y
-CONFIG_NET_INGRESS=y
-CONFIG_NET_EGRESS=y
-CONFIG_NET_REDIRECT=y
-CONFIG_SKB_EXTENSIONS=y
-
-#
-# Networking options
-#
-CONFIG_PACKET=y
-CONFIG_PACKET_DIAG=y
-CONFIG_UNIX=y
-CONFIG_UNIX_SCM=y
-CONFIG_UNIX_DIAG=y
-CONFIG_TLS=m
-CONFIG_TLS_DEVICE=y
-# CONFIG_TLS_TOE is not set
-CONFIG_XFRM=y
-CONFIG_XFRM_OFFLOAD=y
-CONFIG_XFRM_ALGO=m
-CONFIG_XFRM_USER=m
-# CONFIG_XFRM_USER_COMPAT is not set
-CONFIG_XFRM_INTERFACE=m
-CONFIG_XFRM_SUB_POLICY=y
-CONFIG_XFRM_MIGRATE=y
-CONFIG_XFRM_STATISTICS=y
-CONFIG_XFRM_AH=m
-CONFIG_XFRM_ESP=m
-CONFIG_XFRM_IPCOMP=m
-CONFIG_NET_KEY=m
-CONFIG_NET_KEY_MIGRATE=y
-CONFIG_XFRM_ESPINTCP=y
-CONFIG_SMC=m
-CONFIG_SMC_DIAG=m
-CONFIG_XDP_SOCKETS=y
-CONFIG_XDP_SOCKETS_DIAG=y
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_ADVANCED_ROUTER=y
-# CONFIG_IP_FIB_TRIE_STATS is not set
-CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_ROUTE_MULTIPATH=y
-CONFIG_IP_ROUTE_VERBOSE=y
-CONFIG_IP_ROUTE_CLASSID=y
-# CONFIG_IP_PNP is not set
-CONFIG_NET_IPIP=m
-CONFIG_NET_IPGRE_DEMUX=m
-CONFIG_NET_IP_TUNNEL=m
-CONFIG_NET_IPGRE=m
-# CONFIG_NET_IPGRE_BROADCAST is not set
-CONFIG_IP_MROUTE_COMMON=y
-CONFIG_IP_MROUTE=y
-CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
-CONFIG_IP_PIMSM_V1=y
-CONFIG_IP_PIMSM_V2=y
-CONFIG_SYN_COOKIES=y
-CONFIG_NET_IPVTI=m
-CONFIG_NET_UDP_TUNNEL=m
-CONFIG_NET_FOU=m
-CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_INET_AH=m
-CONFIG_INET_ESP=m
-CONFIG_INET_ESP_OFFLOAD=m
-CONFIG_INET_ESPINTCP=y
-CONFIG_INET_IPCOMP=m
-CONFIG_INET_XFRM_TUNNEL=m
-CONFIG_INET_TUNNEL=m
-CONFIG_INET_DIAG=m
-CONFIG_INET_TCP_DIAG=m
-CONFIG_INET_UDP_DIAG=m
-CONFIG_INET_RAW_DIAG=m
-CONFIG_INET_DIAG_DESTROY=y
-CONFIG_TCP_CONG_ADVANCED=y
-CONFIG_TCP_CONG_BIC=m
-CONFIG_TCP_CONG_CUBIC=y
-CONFIG_TCP_CONG_WESTWOOD=m
-CONFIG_TCP_CONG_HTCP=m
-CONFIG_TCP_CONG_HSTCP=m
-CONFIG_TCP_CONG_HYBLA=m
-CONFIG_TCP_CONG_VEGAS=m
-CONFIG_TCP_CONG_NV=m
-CONFIG_TCP_CONG_SCALABLE=m
-CONFIG_TCP_CONG_LP=m
-CONFIG_TCP_CONG_VENO=m
-CONFIG_TCP_CONG_YEAH=m
-CONFIG_TCP_CONG_ILLINOIS=m
-CONFIG_TCP_CONG_DCTCP=m
-CONFIG_TCP_CONG_CDG=m
-CONFIG_TCP_CONG_BBR=m
-CONFIG_DEFAULT_CUBIC=y
-# CONFIG_DEFAULT_RENO is not set
-CONFIG_DEFAULT_TCP_CONG="cubic"
-CONFIG_TCP_MD5SIG=y
-CONFIG_IPV6=y
-CONFIG_IPV6_ROUTER_PREF=y
-CONFIG_IPV6_ROUTE_INFO=y
-CONFIG_IPV6_OPTIMISTIC_DAD=y
-CONFIG_INET6_AH=m
-CONFIG_INET6_ESP=m
-CONFIG_INET6_ESP_OFFLOAD=m
-CONFIG_INET6_ESPINTCP=y
-CONFIG_INET6_IPCOMP=m
-CONFIG_IPV6_MIP6=m
-CONFIG_IPV6_ILA=m
-CONFIG_INET6_XFRM_TUNNEL=m
-CONFIG_INET6_TUNNEL=m
-CONFIG_IPV6_VTI=m
-CONFIG_IPV6_SIT=m
-CONFIG_IPV6_SIT_6RD=y
-CONFIG_IPV6_NDISC_NODETYPE=y
-CONFIG_IPV6_TUNNEL=m
-CONFIG_IPV6_GRE=m
-CONFIG_IPV6_FOU=m
-CONFIG_IPV6_FOU_TUNNEL=m
-CONFIG_IPV6_MULTIPLE_TABLES=y
-CONFIG_IPV6_SUBTREES=y
-CONFIG_IPV6_MROUTE=y
-CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y
-CONFIG_IPV6_PIMSM_V2=y
-CONFIG_IPV6_SEG6_LWTUNNEL=y
-CONFIG_IPV6_SEG6_HMAC=y
-CONFIG_IPV6_SEG6_BPF=y
-CONFIG_IPV6_RPL_LWTUNNEL=y
-CONFIG_NETLABEL=y
-CONFIG_MPTCP=y
-CONFIG_INET_MPTCP_DIAG=m
-CONFIG_MPTCP_IPV6=y
-CONFIG_NETWORK_SECMARK=y
-CONFIG_NET_PTP_CLASSIFY=y
-CONFIG_NETWORK_PHY_TIMESTAMPING=y
-CONFIG_NETFILTER=y
-CONFIG_NETFILTER_ADVANCED=y
-CONFIG_BRIDGE_NETFILTER=m
-
-#
-# Core Netfilter Configuration
-#
-CONFIG_NETFILTER_INGRESS=y
-CONFIG_NETFILTER_NETLINK=m
-CONFIG_NETFILTER_FAMILY_BRIDGE=y
-CONFIG_NETFILTER_FAMILY_ARP=y
-CONFIG_NETFILTER_NETLINK_ACCT=m
-CONFIG_NETFILTER_NETLINK_QUEUE=m
-CONFIG_NETFILTER_NETLINK_LOG=m
-CONFIG_NETFILTER_NETLINK_OSF=m
-CONFIG_NF_CONNTRACK=m
-CONFIG_NF_LOG_COMMON=m
-CONFIG_NF_LOG_NETDEV=m
-CONFIG_NETFILTER_CONNCOUNT=m
-CONFIG_NF_CONNTRACK_MARK=y
-CONFIG_NF_CONNTRACK_SECMARK=y
-CONFIG_NF_CONNTRACK_ZONES=y
-CONFIG_NF_CONNTRACK_PROCFS=y
-CONFIG_NF_CONNTRACK_EVENTS=y
-CONFIG_NF_CONNTRACK_TIMEOUT=y
-CONFIG_NF_CONNTRACK_TIMESTAMP=y
-CONFIG_NF_CONNTRACK_LABELS=y
-CONFIG_NF_CT_PROTO_DCCP=y
-CONFIG_NF_CT_PROTO_GRE=y
-CONFIG_NF_CT_PROTO_SCTP=y
-CONFIG_NF_CT_PROTO_UDPLITE=y
-CONFIG_NF_CONNTRACK_AMANDA=m
-CONFIG_NF_CONNTRACK_FTP=m
-CONFIG_NF_CONNTRACK_H323=m
-CONFIG_NF_CONNTRACK_IRC=m
-CONFIG_NF_CONNTRACK_BROADCAST=m
-CONFIG_NF_CONNTRACK_NETBIOS_NS=m
-CONFIG_NF_CONNTRACK_SNMP=m
-CONFIG_NF_CONNTRACK_PPTP=m
-CONFIG_NF_CONNTRACK_SANE=m
-CONFIG_NF_CONNTRACK_SIP=m
-CONFIG_NF_CONNTRACK_TFTP=m
-CONFIG_NF_CT_NETLINK=m
-CONFIG_NF_CT_NETLINK_TIMEOUT=m
-CONFIG_NF_CT_NETLINK_HELPER=m
-CONFIG_NETFILTER_NETLINK_GLUE_CT=y
-CONFIG_NF_NAT=m
-CONFIG_NF_NAT_AMANDA=m
-CONFIG_NF_NAT_FTP=m
-CONFIG_NF_NAT_IRC=m
-CONFIG_NF_NAT_SIP=m
-CONFIG_NF_NAT_TFTP=m
-CONFIG_NF_NAT_REDIRECT=y
-CONFIG_NF_NAT_MASQUERADE=y
-CONFIG_NETFILTER_SYNPROXY=m
-CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_INET=y
-CONFIG_NF_TABLES_NETDEV=y
-CONFIG_NFT_NUMGEN=m
-CONFIG_NFT_CT=m
-CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NFT_COUNTER=m
-CONFIG_NFT_CONNLIMIT=m
-CONFIG_NFT_LOG=m
-CONFIG_NFT_LIMIT=m
-CONFIG_NFT_MASQ=m
-CONFIG_NFT_REDIR=m
-CONFIG_NFT_NAT=m
-CONFIG_NFT_TUNNEL=m
-CONFIG_NFT_OBJREF=m
-CONFIG_NFT_QUEUE=m
-CONFIG_NFT_QUOTA=m
-CONFIG_NFT_REJECT=m
-CONFIG_NFT_REJECT_INET=m
-CONFIG_NFT_COMPAT=m
-CONFIG_NFT_HASH=m
-CONFIG_NFT_FIB=m
-CONFIG_NFT_FIB_INET=m
-CONFIG_NFT_XFRM=m
-CONFIG_NFT_SOCKET=m
-CONFIG_NFT_OSF=m
-CONFIG_NFT_TPROXY=m
-CONFIG_NFT_SYNPROXY=m
-CONFIG_NF_DUP_NETDEV=m
-CONFIG_NFT_DUP_NETDEV=m
-CONFIG_NFT_FWD_NETDEV=m
-CONFIG_NFT_FIB_NETDEV=m
-CONFIG_NF_FLOW_TABLE_INET=m
-CONFIG_NF_FLOW_TABLE=m
-CONFIG_NETFILTER_XTABLES=m
-
-#
-# Xtables combined modules
-#
-CONFIG_NETFILTER_XT_MARK=m
-CONFIG_NETFILTER_XT_CONNMARK=m
-CONFIG_NETFILTER_XT_SET=m
-
-#
-# Xtables targets
-#
-CONFIG_NETFILTER_XT_TARGET_AUDIT=m
-CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
-CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
-CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
-CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
-CONFIG_NETFILTER_XT_TARGET_CT=m
-CONFIG_NETFILTER_XT_TARGET_DSCP=m
-CONFIG_NETFILTER_XT_TARGET_HL=m
-CONFIG_NETFILTER_XT_TARGET_HMARK=m
-CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
-CONFIG_NETFILTER_XT_TARGET_LED=m
-CONFIG_NETFILTER_XT_TARGET_LOG=m
-CONFIG_NETFILTER_XT_TARGET_MARK=m
-CONFIG_NETFILTER_XT_NAT=m
-CONFIG_NETFILTER_XT_TARGET_NETMAP=m
-CONFIG_NETFILTER_XT_TARGET_NFLOG=m
-CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
-CONFIG_NETFILTER_XT_TARGET_NOTRACK=m
-CONFIG_NETFILTER_XT_TARGET_RATEEST=m
-CONFIG_NETFILTER_XT_TARGET_REDIRECT=m
-CONFIG_NETFILTER_XT_TARGET_MASQUERADE=m
-CONFIG_NETFILTER_XT_TARGET_TEE=m
-CONFIG_NETFILTER_XT_TARGET_TPROXY=m
-CONFIG_NETFILTER_XT_TARGET_TRACE=m
-CONFIG_NETFILTER_XT_TARGET_SECMARK=m
-CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
-CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
-
-#
-# Xtables matches
-#
-CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
-CONFIG_NETFILTER_XT_MATCH_BPF=m
-CONFIG_NETFILTER_XT_MATCH_CGROUP=m
-CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
-CONFIG_NETFILTER_XT_MATCH_COMMENT=m
-CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
-CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m
-CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
-CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
-CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
-CONFIG_NETFILTER_XT_MATCH_CPU=m
-CONFIG_NETFILTER_XT_MATCH_DCCP=m
-CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m
-CONFIG_NETFILTER_XT_MATCH_DSCP=m
-CONFIG_NETFILTER_XT_MATCH_ECN=m
-CONFIG_NETFILTER_XT_MATCH_ESP=m
-CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
-CONFIG_NETFILTER_XT_MATCH_HELPER=m
-CONFIG_NETFILTER_XT_MATCH_HL=m
-CONFIG_NETFILTER_XT_MATCH_IPCOMP=m
-CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
-CONFIG_NETFILTER_XT_MATCH_IPVS=m
-CONFIG_NETFILTER_XT_MATCH_L2TP=m
-CONFIG_NETFILTER_XT_MATCH_LENGTH=m
-CONFIG_NETFILTER_XT_MATCH_LIMIT=m
-CONFIG_NETFILTER_XT_MATCH_MAC=m
-CONFIG_NETFILTER_XT_MATCH_MARK=m
-CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
-CONFIG_NETFILTER_XT_MATCH_NFACCT=m
-CONFIG_NETFILTER_XT_MATCH_OSF=m
-CONFIG_NETFILTER_XT_MATCH_OWNER=m
-CONFIG_NETFILTER_XT_MATCH_POLICY=m
-CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
-CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
-CONFIG_NETFILTER_XT_MATCH_QUOTA=m
-CONFIG_NETFILTER_XT_MATCH_RATEEST=m
-CONFIG_NETFILTER_XT_MATCH_REALM=m
-CONFIG_NETFILTER_XT_MATCH_RECENT=m
-CONFIG_NETFILTER_XT_MATCH_SCTP=m
-CONFIG_NETFILTER_XT_MATCH_SOCKET=m
-CONFIG_NETFILTER_XT_MATCH_STATE=m
-CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
-CONFIG_NETFILTER_XT_MATCH_STRING=m
-CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
-CONFIG_NETFILTER_XT_MATCH_TIME=m
-CONFIG_NETFILTER_XT_MATCH_U32=m
-# end of Core Netfilter Configuration
-
-CONFIG_IP_SET=m
-CONFIG_IP_SET_MAX=256
-CONFIG_IP_SET_BITMAP_IP=m
-CONFIG_IP_SET_BITMAP_IPMAC=m
-CONFIG_IP_SET_BITMAP_PORT=m
-CONFIG_IP_SET_HASH_IP=m
-CONFIG_IP_SET_HASH_IPMARK=m
-CONFIG_IP_SET_HASH_IPPORT=m
-CONFIG_IP_SET_HASH_IPPORTIP=m
-CONFIG_IP_SET_HASH_IPPORTNET=m
-CONFIG_IP_SET_HASH_IPMAC=m
-CONFIG_IP_SET_HASH_MAC=m
-CONFIG_IP_SET_HASH_NETPORTNET=m
-CONFIG_IP_SET_HASH_NET=m
-CONFIG_IP_SET_HASH_NETNET=m
-CONFIG_IP_SET_HASH_NETPORT=m
-CONFIG_IP_SET_HASH_NETIFACE=m
-CONFIG_IP_SET_LIST_SET=m
-CONFIG_IP_VS=m
-CONFIG_IP_VS_IPV6=y
-# CONFIG_IP_VS_DEBUG is not set
-CONFIG_IP_VS_TAB_BITS=15
-
-#
-# IPVS transport protocol load balancing support
-#
-CONFIG_IP_VS_PROTO_TCP=y
-CONFIG_IP_VS_PROTO_UDP=y
-CONFIG_IP_VS_PROTO_AH_ESP=y
-CONFIG_IP_VS_PROTO_ESP=y
-CONFIG_IP_VS_PROTO_AH=y
-CONFIG_IP_VS_PROTO_SCTP=y
-
-#
-# IPVS scheduler
-#
-CONFIG_IP_VS_RR=m
-CONFIG_IP_VS_WRR=m
-CONFIG_IP_VS_LC=m
-CONFIG_IP_VS_WLC=m
-CONFIG_IP_VS_FO=m
-CONFIG_IP_VS_OVF=m
-CONFIG_IP_VS_LBLC=m
-CONFIG_IP_VS_LBLCR=m
-CONFIG_IP_VS_DH=m
-CONFIG_IP_VS_SH=m
-CONFIG_IP_VS_MH=m
-CONFIG_IP_VS_SED=m
-CONFIG_IP_VS_NQ=m
-
-#
-# IPVS SH scheduler
-#
-CONFIG_IP_VS_SH_TAB_BITS=8
-
-#
-# IPVS MH scheduler
-#
-CONFIG_IP_VS_MH_TAB_INDEX=12
-
-#
-# IPVS application helper
-#
-CONFIG_IP_VS_FTP=m
-CONFIG_IP_VS_NFCT=y
-CONFIG_IP_VS_PE_SIP=m
-
-#
-# IP: Netfilter Configuration
-#
-CONFIG_NF_DEFRAG_IPV4=m
-CONFIG_NF_SOCKET_IPV4=m
-CONFIG_NF_TPROXY_IPV4=m
-CONFIG_NF_TABLES_IPV4=y
-CONFIG_NFT_REJECT_IPV4=m
-CONFIG_NFT_DUP_IPV4=m
-CONFIG_NFT_FIB_IPV4=m
-CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_FLOW_TABLE_IPV4=m
-CONFIG_NF_DUP_IPV4=m
-CONFIG_NF_LOG_ARP=m
-CONFIG_NF_LOG_IPV4=m
-CONFIG_NF_REJECT_IPV4=m
-CONFIG_NF_NAT_SNMP_BASIC=m
-CONFIG_NF_NAT_PPTP=m
-CONFIG_NF_NAT_H323=m
-CONFIG_IP_NF_IPTABLES=m
-CONFIG_IP_NF_MATCH_AH=m
-CONFIG_IP_NF_MATCH_ECN=m
-CONFIG_IP_NF_MATCH_RPFILTER=m
-CONFIG_IP_NF_MATCH_TTL=m
-CONFIG_IP_NF_FILTER=m
-CONFIG_IP_NF_TARGET_REJECT=m
-CONFIG_IP_NF_TARGET_SYNPROXY=m
-CONFIG_IP_NF_NAT=m
-CONFIG_IP_NF_TARGET_MASQUERADE=m
-CONFIG_IP_NF_TARGET_NETMAP=m
-CONFIG_IP_NF_TARGET_REDIRECT=m
-CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
-CONFIG_IP_NF_TARGET_ECN=m
-CONFIG_IP_NF_TARGET_TTL=m
-CONFIG_IP_NF_RAW=m
-CONFIG_IP_NF_SECURITY=m
-CONFIG_IP_NF_ARPTABLES=m
-CONFIG_IP_NF_ARPFILTER=m
-CONFIG_IP_NF_ARP_MANGLE=m
-# end of IP: Netfilter Configuration
-
-#
-# IPv6: Netfilter Configuration
-#
-CONFIG_NF_SOCKET_IPV6=m
-CONFIG_NF_TPROXY_IPV6=m
-CONFIG_NF_TABLES_IPV6=y
-CONFIG_NFT_REJECT_IPV6=m
-CONFIG_NFT_DUP_IPV6=m
-CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
-CONFIG_NF_DUP_IPV6=m
-CONFIG_NF_REJECT_IPV6=m
-CONFIG_NF_LOG_IPV6=m
-CONFIG_IP6_NF_IPTABLES=m
-CONFIG_IP6_NF_MATCH_AH=m
-CONFIG_IP6_NF_MATCH_EUI64=m
-CONFIG_IP6_NF_MATCH_FRAG=m
-CONFIG_IP6_NF_MATCH_OPTS=m
-CONFIG_IP6_NF_MATCH_HL=m
-CONFIG_IP6_NF_MATCH_IPV6HEADER=m
-CONFIG_IP6_NF_MATCH_MH=m
-CONFIG_IP6_NF_MATCH_RPFILTER=m
-CONFIG_IP6_NF_MATCH_RT=m
-CONFIG_IP6_NF_MATCH_SRH=m
-CONFIG_IP6_NF_TARGET_HL=m
-CONFIG_IP6_NF_FILTER=m
-CONFIG_IP6_NF_TARGET_REJECT=m
-CONFIG_IP6_NF_TARGET_SYNPROXY=m
-CONFIG_IP6_NF_MANGLE=m
-CONFIG_IP6_NF_RAW=m
-CONFIG_IP6_NF_SECURITY=m
-CONFIG_IP6_NF_NAT=m
-CONFIG_IP6_NF_TARGET_MASQUERADE=m
-CONFIG_IP6_NF_TARGET_NPT=m
-# end of IPv6: Netfilter Configuration
-
-CONFIG_NF_DEFRAG_IPV6=m
-CONFIG_NF_TABLES_BRIDGE=m
-CONFIG_NFT_BRIDGE_META=m
-CONFIG_NFT_BRIDGE_REJECT=m
-CONFIG_NF_LOG_BRIDGE=m
-CONFIG_NF_CONNTRACK_BRIDGE=m
-CONFIG_BRIDGE_NF_EBTABLES=m
-CONFIG_BRIDGE_EBT_BROUTE=m
-CONFIG_BRIDGE_EBT_T_FILTER=m
-CONFIG_BRIDGE_EBT_T_NAT=m
-CONFIG_BRIDGE_EBT_802_3=m
-CONFIG_BRIDGE_EBT_AMONG=m
-CONFIG_BRIDGE_EBT_ARP=m
-CONFIG_BRIDGE_EBT_IP=m
-CONFIG_BRIDGE_EBT_IP6=m
-CONFIG_BRIDGE_EBT_LIMIT=m
-CONFIG_BRIDGE_EBT_MARK=m
-CONFIG_BRIDGE_EBT_PKTTYPE=m
-CONFIG_BRIDGE_EBT_STP=m
-CONFIG_BRIDGE_EBT_VLAN=m
-CONFIG_BRIDGE_EBT_ARPREPLY=m
-CONFIG_BRIDGE_EBT_DNAT=m
-CONFIG_BRIDGE_EBT_MARK_T=m
-CONFIG_BRIDGE_EBT_REDIRECT=m
-CONFIG_BRIDGE_EBT_SNAT=m
-CONFIG_BRIDGE_EBT_LOG=m
-CONFIG_BRIDGE_EBT_NFLOG=m
-# CONFIG_BPFILTER is not set
-CONFIG_IP_DCCP=m
-CONFIG_INET_DCCP_DIAG=m
-
-#
-# DCCP CCIDs Configuration
-#
-# CONFIG_IP_DCCP_CCID2_DEBUG is not set
-CONFIG_IP_DCCP_CCID3=y
-# CONFIG_IP_DCCP_CCID3_DEBUG is not set
-CONFIG_IP_DCCP_TFRC_LIB=y
-# end of DCCP CCIDs Configuration
-
-#
-# DCCP Kernel Hacking
-#
-# CONFIG_IP_DCCP_DEBUG is not set
-# end of DCCP Kernel Hacking
-
-CONFIG_IP_SCTP=m
-# CONFIG_SCTP_DBG_OBJCNT is not set
-# CONFIG_SCTP_DEFAULT_COOKIE_HMAC_MD5 is not set
-CONFIG_SCTP_DEFAULT_COOKIE_HMAC_SHA1=y
-# CONFIG_SCTP_DEFAULT_COOKIE_HMAC_NONE is not set
-CONFIG_SCTP_COOKIE_HMAC_MD5=y
-CONFIG_SCTP_COOKIE_HMAC_SHA1=y
-CONFIG_INET_SCTP_DIAG=m
-CONFIG_RDS=m
-CONFIG_RDS_RDMA=m
-CONFIG_RDS_TCP=m
-# CONFIG_RDS_DEBUG is not set
-CONFIG_TIPC=m
-CONFIG_TIPC_MEDIA_IB=y
-CONFIG_TIPC_MEDIA_UDP=y
-CONFIG_TIPC_CRYPTO=y
-CONFIG_TIPC_DIAG=m
-CONFIG_ATM=m
-CONFIG_ATM_CLIP=m
-# CONFIG_ATM_CLIP_NO_ICMP is not set
-CONFIG_ATM_LANE=m
-CONFIG_ATM_MPOA=m
-CONFIG_ATM_BR2684=m
-# CONFIG_ATM_BR2684_IPFILTER is not set
-CONFIG_L2TP=m
-# CONFIG_L2TP_DEBUGFS is not set
-CONFIG_L2TP_V3=y
-CONFIG_L2TP_IP=m
-CONFIG_L2TP_ETH=m
-CONFIG_STP=m
-CONFIG_GARP=m
-CONFIG_MRP=m
-CONFIG_BRIDGE=m
-CONFIG_BRIDGE_IGMP_SNOOPING=y
-CONFIG_BRIDGE_VLAN_FILTERING=y
-CONFIG_BRIDGE_MRP=y
-CONFIG_HAVE_NET_DSA=y
-CONFIG_NET_DSA=m
-CONFIG_NET_DSA_TAG_8021Q=m
-CONFIG_NET_DSA_TAG_AR9331=m
-CONFIG_NET_DSA_TAG_BRCM_COMMON=m
-CONFIG_NET_DSA_TAG_BRCM=m
-CONFIG_NET_DSA_TAG_BRCM_PREPEND=m
-CONFIG_NET_DSA_TAG_GSWIP=m
-CONFIG_NET_DSA_TAG_DSA=m
-CONFIG_NET_DSA_TAG_EDSA=m
-CONFIG_NET_DSA_TAG_MTK=m
-CONFIG_NET_DSA_TAG_KSZ=m
-CONFIG_NET_DSA_TAG_RTL4_A=m
-CONFIG_NET_DSA_TAG_OCELOT=m
-CONFIG_NET_DSA_TAG_QCA=m
-CONFIG_NET_DSA_TAG_LAN9303=m
-CONFIG_NET_DSA_TAG_SJA1105=m
-CONFIG_NET_DSA_TAG_TRAILER=m
-CONFIG_VLAN_8021Q=m
-CONFIG_VLAN_8021Q_GVRP=y
-CONFIG_VLAN_8021Q_MVRP=y
-# CONFIG_DECNET is not set
-CONFIG_LLC=m
-CONFIG_LLC2=m
-CONFIG_ATALK=m
-CONFIG_DEV_APPLETALK=m
-CONFIG_IPDDP=m
-CONFIG_IPDDP_ENCAP=y
-# CONFIG_X25 is not set
-# CONFIG_LAPB is not set
-CONFIG_PHONET=m
-CONFIG_6LOWPAN=m
-# CONFIG_6LOWPAN_DEBUGFS is not set
-CONFIG_6LOWPAN_NHC=m
-CONFIG_6LOWPAN_NHC_DEST=m
-CONFIG_6LOWPAN_NHC_FRAGMENT=m
-CONFIG_6LOWPAN_NHC_HOP=m
-CONFIG_6LOWPAN_NHC_IPV6=m
-CONFIG_6LOWPAN_NHC_MOBILITY=m
-CONFIG_6LOWPAN_NHC_ROUTING=m
-CONFIG_6LOWPAN_NHC_UDP=m
-CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m
-CONFIG_6LOWPAN_GHC_UDP=m
-CONFIG_6LOWPAN_GHC_ICMPV6=m
-CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m
-CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
-CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
-CONFIG_IEEE802154=m
-CONFIG_IEEE802154_NL802154_EXPERIMENTAL=y
-CONFIG_IEEE802154_SOCKET=m
-CONFIG_IEEE802154_6LOWPAN=m
-CONFIG_MAC802154=m
-CONFIG_NET_SCHED=y
-
-#
-# Queueing/Scheduling
-#
-CONFIG_NET_SCH_CBQ=m
-CONFIG_NET_SCH_HTB=m
-CONFIG_NET_SCH_HFSC=m
-CONFIG_NET_SCH_ATM=m
-CONFIG_NET_SCH_PRIO=m
-CONFIG_NET_SCH_MULTIQ=m
-CONFIG_NET_SCH_RED=m
-CONFIG_NET_SCH_SFB=m
-CONFIG_NET_SCH_SFQ=m
-CONFIG_NET_SCH_TEQL=m
-CONFIG_NET_SCH_TBF=m
-CONFIG_NET_SCH_CBS=m
-CONFIG_NET_SCH_ETF=m
-CONFIG_NET_SCH_TAPRIO=m
-CONFIG_NET_SCH_GRED=m
-CONFIG_NET_SCH_DSMARK=m
-CONFIG_NET_SCH_NETEM=m
-CONFIG_NET_SCH_DRR=m
-CONFIG_NET_SCH_MQPRIO=m
-CONFIG_NET_SCH_SKBPRIO=m
-CONFIG_NET_SCH_CHOKE=m
-CONFIG_NET_SCH_QFQ=m
-CONFIG_NET_SCH_CODEL=m
-CONFIG_NET_SCH_FQ_CODEL=y
-CONFIG_NET_SCH_CAKE=m
-CONFIG_NET_SCH_FQ=m
-CONFIG_NET_SCH_HHF=m
-CONFIG_NET_SCH_PIE=m
-CONFIG_NET_SCH_FQ_PIE=m
-CONFIG_NET_SCH_INGRESS=m
-CONFIG_NET_SCH_PLUG=m
-CONFIG_NET_SCH_ETS=m
-CONFIG_NET_SCH_DEFAULT=y
-# CONFIG_DEFAULT_FQ is not set
-# CONFIG_DEFAULT_CODEL is not set
-CONFIG_DEFAULT_FQ_CODEL=y
-# CONFIG_DEFAULT_FQ_PIE is not set
-# CONFIG_DEFAULT_SFQ is not set
-# CONFIG_DEFAULT_PFIFO_FAST is not set
-CONFIG_DEFAULT_NET_SCH="fq_codel"
-
-#
-# Classification
-#
-CONFIG_NET_CLS=y
-CONFIG_NET_CLS_BASIC=m
-CONFIG_NET_CLS_TCINDEX=m
-CONFIG_NET_CLS_ROUTE4=m
-CONFIG_NET_CLS_FW=m
-CONFIG_NET_CLS_U32=m
-CONFIG_CLS_U32_PERF=y
-CONFIG_CLS_U32_MARK=y
-CONFIG_NET_CLS_RSVP=m
-CONFIG_NET_CLS_RSVP6=m
-CONFIG_NET_CLS_FLOW=m
-CONFIG_NET_CLS_CGROUP=m
-CONFIG_NET_CLS_BPF=m
-CONFIG_NET_CLS_FLOWER=m
-CONFIG_NET_CLS_MATCHALL=m
-CONFIG_NET_EMATCH=y
-CONFIG_NET_EMATCH_STACK=32
-CONFIG_NET_EMATCH_CMP=m
-CONFIG_NET_EMATCH_NBYTE=m
-CONFIG_NET_EMATCH_U32=m
-CONFIG_NET_EMATCH_META=m
-CONFIG_NET_EMATCH_TEXT=m
-CONFIG_NET_EMATCH_CANID=m
-CONFIG_NET_EMATCH_IPSET=m
-CONFIG_NET_EMATCH_IPT=m
-CONFIG_NET_CLS_ACT=y
-CONFIG_NET_ACT_POLICE=m
-CONFIG_NET_ACT_GACT=m
-CONFIG_GACT_PROB=y
-CONFIG_NET_ACT_MIRRED=m
-CONFIG_NET_ACT_SAMPLE=m
-CONFIG_NET_ACT_IPT=m
-CONFIG_NET_ACT_NAT=m
-CONFIG_NET_ACT_PEDIT=m
-CONFIG_NET_ACT_SIMP=m
-CONFIG_NET_ACT_SKBEDIT=m
-CONFIG_NET_ACT_CSUM=m
-CONFIG_NET_ACT_MPLS=m
-CONFIG_NET_ACT_VLAN=m
-CONFIG_NET_ACT_BPF=m
-CONFIG_NET_ACT_CONNMARK=m
-CONFIG_NET_ACT_CTINFO=m
-CONFIG_NET_ACT_SKBMOD=m
-CONFIG_NET_ACT_IFE=m
-CONFIG_NET_ACT_TUNNEL_KEY=m
-CONFIG_NET_ACT_CT=m
-CONFIG_NET_ACT_GATE=m
-CONFIG_NET_IFE_SKBMARK=m
-CONFIG_NET_IFE_SKBPRIO=m
-CONFIG_NET_IFE_SKBTCINDEX=m
-CONFIG_NET_TC_SKB_EXT=y
-CONFIG_NET_SCH_FIFO=y
-CONFIG_DCB=y
-CONFIG_DNS_RESOLVER=m
-CONFIG_BATMAN_ADV=m
-CONFIG_BATMAN_ADV_BATMAN_V=y
-CONFIG_BATMAN_ADV_BLA=y
-CONFIG_BATMAN_ADV_DAT=y
-CONFIG_BATMAN_ADV_NC=y
-CONFIG_BATMAN_ADV_MCAST=y
-CONFIG_BATMAN_ADV_DEBUGFS=y
-# CONFIG_BATMAN_ADV_DEBUG is not set
-CONFIG_BATMAN_ADV_SYSFS=y
-# CONFIG_BATMAN_ADV_TRACING is not set
-CONFIG_OPENVSWITCH=m
-CONFIG_OPENVSWITCH_GRE=m
-CONFIG_OPENVSWITCH_VXLAN=m
-CONFIG_OPENVSWITCH_GENEVE=m
-CONFIG_VSOCKETS=m
-CONFIG_VSOCKETS_DIAG=m
-CONFIG_VSOCKETS_LOOPBACK=m
-CONFIG_VMWARE_VMCI_VSOCKETS=m
-CONFIG_VIRTIO_VSOCKETS=m
-CONFIG_VIRTIO_VSOCKETS_COMMON=m
-CONFIG_HYPERV_VSOCKETS=m
-CONFIG_NETLINK_DIAG=m
-CONFIG_MPLS=y
-CONFIG_NET_MPLS_GSO=m
-CONFIG_MPLS_ROUTING=m
-CONFIG_MPLS_IPTUNNEL=m
-CONFIG_NET_NSH=m
-CONFIG_HSR=m
-CONFIG_NET_SWITCHDEV=y
-CONFIG_NET_L3_MASTER_DEV=y
-CONFIG_QRTR=m
-CONFIG_QRTR_SMD=m
-CONFIG_QRTR_TUN=m
-CONFIG_QRTR_MHI=m
-CONFIG_NET_NCSI=y
-CONFIG_NCSI_OEM_CMD_GET_MAC=y
-CONFIG_RPS=y
-CONFIG_RFS_ACCEL=y
-CONFIG_XPS=y
-CONFIG_CGROUP_NET_PRIO=y
-CONFIG_CGROUP_NET_CLASSID=y
-CONFIG_NET_RX_BUSY_POLL=y
-CONFIG_BQL=y
-CONFIG_BPF_JIT=y
-CONFIG_BPF_STREAM_PARSER=y
-CONFIG_NET_FLOW_LIMIT=y
-
-#
-# Network testing
-#
-CONFIG_NET_PKTGEN=m
-CONFIG_NET_DROP_MONITOR=y
-# end of Network testing
-# end of Networking options
-
-CONFIG_HAMRADIO=y
-
-#
-# Packet Radio protocols
-#
-CONFIG_AX25=m
-CONFIG_AX25_DAMA_SLAVE=y
-CONFIG_NETROM=m
-CONFIG_ROSE=m
-
-#
-# AX.25 network device drivers
-#
-CONFIG_MKISS=m
-CONFIG_6PACK=m
-CONFIG_BPQETHER=m
-CONFIG_BAYCOM_SER_FDX=m
-CONFIG_BAYCOM_SER_HDX=m
-CONFIG_BAYCOM_PAR=m
-CONFIG_YAM=m
-# end of AX.25 network device drivers
-
-CONFIG_CAN=m
-CONFIG_CAN_RAW=m
-CONFIG_CAN_BCM=m
-CONFIG_CAN_GW=m
-CONFIG_CAN_J1939=m
-# CONFIG_CAN_ISOTP is not set
-
-#
-# CAN Device Drivers
-#
-CONFIG_CAN_VCAN=m
-CONFIG_CAN_VXCAN=m
-CONFIG_CAN_SLCAN=m
-CONFIG_CAN_DEV=m
-CONFIG_CAN_CALC_BITTIMING=y
-CONFIG_CAN_FLEXCAN=m
-CONFIG_CAN_GRCAN=m
-CONFIG_CAN_JANZ_ICAN3=m
-CONFIG_CAN_KVASER_PCIEFD=m
-CONFIG_CAN_C_CAN=m
-CONFIG_CAN_C_CAN_PLATFORM=m
-CONFIG_CAN_C_CAN_PCI=m
-CONFIG_CAN_CC770=m
-# CONFIG_CAN_CC770_ISA is not set
-CONFIG_CAN_CC770_PLATFORM=m
-CONFIG_CAN_IFI_CANFD=m
-CONFIG_CAN_M_CAN=m
-CONFIG_CAN_M_CAN_PLATFORM=m
-CONFIG_CAN_M_CAN_TCAN4X5X=m
-CONFIG_CAN_PEAK_PCIEFD=m
-CONFIG_CAN_SJA1000=m
-CONFIG_CAN_EMS_PCI=m
-# CONFIG_CAN_EMS_PCMCIA is not set
-CONFIG_CAN_F81601=m
-CONFIG_CAN_KVASER_PCI=m
-CONFIG_CAN_PEAK_PCI=m
-CONFIG_CAN_PEAK_PCIEC=y
-CONFIG_CAN_PEAK_PCMCIA=m
-CONFIG_CAN_PLX_PCI=m
-# CONFIG_CAN_SJA1000_ISA is not set
-CONFIG_CAN_SJA1000_PLATFORM=m
-CONFIG_CAN_SOFTING=m
-CONFIG_CAN_SOFTING_CS=m
-
-#
-# CAN SPI interfaces
-#
-CONFIG_CAN_HI311X=m
-CONFIG_CAN_MCP251X=m
-# CONFIG_CAN_MCP251XFD is not set
-# end of CAN SPI interfaces
-
-#
-# CAN USB interfaces
-#
-CONFIG_CAN_8DEV_USB=m
-CONFIG_CAN_EMS_USB=m
-CONFIG_CAN_ESD_USB2=m
-CONFIG_CAN_GS_USB=m
-CONFIG_CAN_KVASER_USB=m
-CONFIG_CAN_MCBA_USB=m
-CONFIG_CAN_PEAK_USB=m
-CONFIG_CAN_UCAN=m
-# end of CAN USB interfaces
-
-# CONFIG_CAN_DEBUG_DEVICES is not set
-# end of CAN Device Drivers
-
-CONFIG_BT=m
-CONFIG_BT_BREDR=y
-CONFIG_BT_RFCOMM=m
-CONFIG_BT_RFCOMM_TTY=y
-CONFIG_BT_BNEP=m
-CONFIG_BT_BNEP_MC_FILTER=y
-CONFIG_BT_BNEP_PROTO_FILTER=y
-CONFIG_BT_CMTP=m
-CONFIG_BT_HIDP=m
-CONFIG_BT_HS=y
-CONFIG_BT_LE=y
-CONFIG_BT_6LOWPAN=m
-CONFIG_BT_LEDS=y
-CONFIG_BT_MSFTEXT=y
-CONFIG_BT_DEBUGFS=y
-# CONFIG_BT_SELFTEST is not set
-
-#
-# Bluetooth device drivers
-#
-CONFIG_BT_INTEL=m
-CONFIG_BT_BCM=m
-CONFIG_BT_RTL=m
-CONFIG_BT_QCA=m
-CONFIG_BT_HCIBTUSB=m
-CONFIG_BT_HCIBTUSB_AUTOSUSPEND=y
-CONFIG_BT_HCIBTUSB_BCM=y
-CONFIG_BT_HCIBTUSB_MTK=y
-CONFIG_BT_HCIBTUSB_RTL=y
-CONFIG_BT_HCIBTSDIO=m
-CONFIG_BT_HCIUART=m
-CONFIG_BT_HCIUART_SERDEV=y
-CONFIG_BT_HCIUART_H4=y
-CONFIG_BT_HCIUART_NOKIA=m
-CONFIG_BT_HCIUART_BCSP=y
-CONFIG_BT_HCIUART_ATH3K=y
-CONFIG_BT_HCIUART_LL=y
-CONFIG_BT_HCIUART_3WIRE=y
-CONFIG_BT_HCIUART_INTEL=y
-CONFIG_BT_HCIUART_BCM=y
-CONFIG_BT_HCIUART_RTL=y
-CONFIG_BT_HCIUART_QCA=y
-CONFIG_BT_HCIUART_AG6XX=y
-CONFIG_BT_HCIUART_MRVL=y
-CONFIG_BT_HCIBCM203X=m
-CONFIG_BT_HCIBPA10X=m
-CONFIG_BT_HCIBFUSB=m
-CONFIG_BT_HCIDTL1=m
-CONFIG_BT_HCIBT3C=m
-CONFIG_BT_HCIBLUECARD=m
-CONFIG_BT_HCIVHCI=m
-CONFIG_BT_MRVL=m
-CONFIG_BT_MRVL_SDIO=m
-CONFIG_BT_ATH3K=m
-CONFIG_BT_MTKSDIO=m
-CONFIG_BT_MTKUART=m
-CONFIG_BT_HCIRSI=m
-# end of Bluetooth device drivers
-
-CONFIG_AF_RXRPC=m
-CONFIG_AF_RXRPC_IPV6=y
-# CONFIG_AF_RXRPC_INJECT_LOSS is not set
-CONFIG_AF_RXRPC_DEBUG=y
-CONFIG_RXKAD=y
-CONFIG_AF_KCM=m
-CONFIG_STREAM_PARSER=y
-CONFIG_FIB_RULES=y
-CONFIG_WIRELESS=y
-CONFIG_WIRELESS_EXT=y
-CONFIG_WEXT_CORE=y
-CONFIG_WEXT_PROC=y
-CONFIG_WEXT_SPY=y
-CONFIG_WEXT_PRIV=y
-CONFIG_CFG80211=m
-# CONFIG_NL80211_TESTMODE is not set
-# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set
-# CONFIG_CFG80211_CERTIFICATION_ONUS is not set
-CONFIG_CFG80211_REQUIRE_SIGNED_REGDB=y
-CONFIG_CFG80211_USE_KERNEL_REGDB_KEYS=y
-CONFIG_CFG80211_DEFAULT_PS=y
-CONFIG_CFG80211_DEBUGFS=y
-CONFIG_CFG80211_CRDA_SUPPORT=y
-CONFIG_CFG80211_WEXT=y
-CONFIG_CFG80211_WEXT_EXPORT=y
-CONFIG_LIB80211=m
-CONFIG_LIB80211_CRYPT_WEP=m
-CONFIG_LIB80211_CRYPT_CCMP=m
-CONFIG_LIB80211_CRYPT_TKIP=m
-# CONFIG_LIB80211_DEBUG is not set
-CONFIG_MAC80211=m
-CONFIG_MAC80211_HAS_RC=y
-CONFIG_MAC80211_RC_MINSTREL=y
-CONFIG_MAC80211_RC_DEFAULT_MINSTREL=y
-CONFIG_MAC80211_RC_DEFAULT="minstrel_ht"
-CONFIG_MAC80211_MESH=y
-CONFIG_MAC80211_LEDS=y
-CONFIG_MAC80211_DEBUGFS=y
-# CONFIG_MAC80211_MESSAGE_TRACING is not set
-# CONFIG_MAC80211_DEBUG_MENU is not set
-CONFIG_MAC80211_STA_HASH_MAX_SIZE=0
-CONFIG_WIMAX=m
-CONFIG_WIMAX_DEBUG_LEVEL=8
-CONFIG_RFKILL=m
-CONFIG_RFKILL_LEDS=y
-CONFIG_RFKILL_INPUT=y
-CONFIG_RFKILL_GPIO=m
-CONFIG_NET_9P=m
-CONFIG_NET_9P_VIRTIO=m
-CONFIG_NET_9P_XEN=m
-CONFIG_NET_9P_RDMA=m
-# CONFIG_NET_9P_DEBUG is not set
-CONFIG_CAIF=m
-# CONFIG_CAIF_DEBUG is not set
-CONFIG_CAIF_NETDEV=m
-CONFIG_CAIF_USB=m
-CONFIG_CEPH_LIB=m
-CONFIG_CEPH_LIB_PRETTYDEBUG=y
-CONFIG_CEPH_LIB_USE_DNS_RESOLVER=y
-CONFIG_NFC=m
-CONFIG_NFC_DIGITAL=m
-CONFIG_NFC_NCI=m
-CONFIG_NFC_NCI_SPI=m
-CONFIG_NFC_NCI_UART=m
-CONFIG_NFC_HCI=m
-CONFIG_NFC_SHDLC=y
-
-#
-# Near Field Communication (NFC) devices
-#
-CONFIG_NFC_TRF7970A=m
-CONFIG_NFC_MEI_PHY=m
-CONFIG_NFC_SIM=m
-CONFIG_NFC_PORT100=m
-CONFIG_NFC_FDP=m
-CONFIG_NFC_FDP_I2C=m
-CONFIG_NFC_PN544=m
-CONFIG_NFC_PN544_I2C=m
-CONFIG_NFC_PN544_MEI=m
-CONFIG_NFC_PN533=m
-CONFIG_NFC_PN533_USB=m
-CONFIG_NFC_PN533_I2C=m
-CONFIG_NFC_PN532_UART=m
-CONFIG_NFC_MICROREAD=m
-CONFIG_NFC_MICROREAD_I2C=m
-CONFIG_NFC_MICROREAD_MEI=m
-CONFIG_NFC_MRVL=m
-CONFIG_NFC_MRVL_USB=m
-CONFIG_NFC_MRVL_UART=m
-CONFIG_NFC_MRVL_I2C=m
-CONFIG_NFC_MRVL_SPI=m
-CONFIG_NFC_ST21NFCA=m
-CONFIG_NFC_ST21NFCA_I2C=m
-CONFIG_NFC_ST_NCI=m
-CONFIG_NFC_ST_NCI_I2C=m
-CONFIG_NFC_ST_NCI_SPI=m
-CONFIG_NFC_NXP_NCI=m
-CONFIG_NFC_NXP_NCI_I2C=m
-CONFIG_NFC_S3FWRN5=m
-CONFIG_NFC_S3FWRN5_I2C=m
-CONFIG_NFC_ST95HF=m
-# end of Near Field Communication (NFC) devices
-
-CONFIG_PSAMPLE=m
-CONFIG_NET_IFE=m
-CONFIG_LWTUNNEL=y
-CONFIG_LWTUNNEL_BPF=y
-CONFIG_DST_CACHE=y
-CONFIG_GRO_CELLS=y
-CONFIG_SOCK_VALIDATE_XMIT=y
-CONFIG_NET_SOCK_MSG=y
-CONFIG_NET_DEVLINK=y
-CONFIG_PAGE_POOL=y
-CONFIG_FAILOVER=m
-CONFIG_ETHTOOL_NETLINK=y
-CONFIG_HAVE_EBPF_JIT=y
-
-#
-# Device Drivers
-#
-CONFIG_HAVE_EISA=y
-# CONFIG_EISA is not set
-CONFIG_HAVE_PCI=y
-CONFIG_PCI=y
-CONFIG_PCI_DOMAINS=y
-CONFIG_PCIEPORTBUS=y
-CONFIG_HOTPLUG_PCI_PCIE=y
-CONFIG_PCIEAER=y
-# CONFIG_PCIEAER_INJECT is not set
-CONFIG_PCIE_ECRC=y
-CONFIG_PCIEASPM=y
-CONFIG_PCIEASPM_DEFAULT=y
-# CONFIG_PCIEASPM_POWERSAVE is not set
-# CONFIG_PCIEASPM_POWER_SUPERSAVE is not set
-# CONFIG_PCIEASPM_PERFORMANCE is not set
-CONFIG_PCIE_PME=y
-CONFIG_PCIE_DPC=y
-CONFIG_PCIE_PTM=y
-# CONFIG_PCIE_BW is not set
-CONFIG_PCIE_EDR=y
-CONFIG_PCI_MSI=y
-CONFIG_PCI_MSI_IRQ_DOMAIN=y
-CONFIG_PCI_MSI_ARCH_FALLBACKS=y
-CONFIG_PCI_QUIRKS=y
-# CONFIG_PCI_DEBUG is not set
-CONFIG_PCI_REALLOC_ENABLE_AUTO=y
-CONFIG_PCI_STUB=y
-CONFIG_PCI_PF_STUB=m
-CONFIG_XEN_PCIDEV_FRONTEND=m
-CONFIG_PCI_ATS=y
-CONFIG_PCI_ECAM=y
-CONFIG_PCI_LOCKLESS_CONFIG=y
-CONFIG_PCI_IOV=y
-CONFIG_PCI_PRI=y
-CONFIG_PCI_PASID=y
-CONFIG_PCI_P2PDMA=y
-CONFIG_PCI_LABEL=y
-CONFIG_PCI_HYPERV=m
-# CONFIG_PCIE_BUS_TUNE_OFF is not set
-CONFIG_PCIE_BUS_DEFAULT=y
-# CONFIG_PCIE_BUS_SAFE is not set
-# CONFIG_PCIE_BUS_PERFORMANCE is not set
-# CONFIG_PCIE_BUS_PEER2PEER is not set
-CONFIG_HOTPLUG_PCI=y
-CONFIG_HOTPLUG_PCI_ACPI=y
-CONFIG_HOTPLUG_PCI_ACPI_IBM=m
-CONFIG_HOTPLUG_PCI_CPCI=y
-CONFIG_HOTPLUG_PCI_CPCI_ZT5550=m
-CONFIG_HOTPLUG_PCI_CPCI_GENERIC=m
-CONFIG_HOTPLUG_PCI_SHPC=y
-
-#
-# PCI controller drivers
-#
-CONFIG_PCI_FTPCI100=y
-CONFIG_PCI_HOST_COMMON=y
-CONFIG_PCI_HOST_GENERIC=y
-CONFIG_PCIE_XILINX=y
-CONFIG_VMD=m
-CONFIG_PCI_HYPERV_INTERFACE=m
-
-#
-# DesignWare PCI Core Support
-#
-CONFIG_PCIE_DW=y
-CONFIG_PCIE_DW_HOST=y
-CONFIG_PCIE_DW_EP=y
-CONFIG_PCIE_DW_PLAT=y
-CONFIG_PCIE_DW_PLAT_HOST=y
-CONFIG_PCIE_DW_PLAT_EP=y
-CONFIG_PCIE_INTEL_GW=y
-CONFIG_PCI_MESON=y
-# end of DesignWare PCI Core Support
-
-#
-# Mobiveil PCIe Core Support
-#
-# end of Mobiveil PCIe Core Support
-
-#
-# Cadence PCIe controllers support
-#
-CONFIG_PCIE_CADENCE=y
-CONFIG_PCIE_CADENCE_HOST=y
-CONFIG_PCIE_CADENCE_EP=y
-CONFIG_PCIE_CADENCE_PLAT=y
-CONFIG_PCIE_CADENCE_PLAT_HOST=y
-CONFIG_PCIE_CADENCE_PLAT_EP=y
-# CONFIG_PCI_J721E_HOST is not set
-# CONFIG_PCI_J721E_EP is not set
-# end of Cadence PCIe controllers support
-# end of PCI controller drivers
-
-#
-# PCI Endpoint
-#
-CONFIG_PCI_ENDPOINT=y
-CONFIG_PCI_ENDPOINT_CONFIGFS=y
-# CONFIG_PCI_EPF_TEST is not set
-# end of PCI Endpoint
-
-#
-# PCI switch controller drivers
-#
-CONFIG_PCI_SW_SWITCHTEC=m
-# end of PCI switch controller drivers
-
-CONFIG_PCCARD=m
-CONFIG_PCMCIA=m
-CONFIG_PCMCIA_LOAD_CIS=y
-CONFIG_CARDBUS=y
-
-#
-# PC-card bridges
-#
-CONFIG_YENTA=m
-CONFIG_YENTA_O2=y
-CONFIG_YENTA_RICOH=y
-CONFIG_YENTA_TI=y
-CONFIG_YENTA_ENE_TUNE=y
-CONFIG_YENTA_TOSHIBA=y
-CONFIG_PD6729=m
-CONFIG_I82092=m
-CONFIG_PCCARD_NONSTATIC=y
-CONFIG_RAPIDIO=m
-CONFIG_RAPIDIO_TSI721=m
-CONFIG_RAPIDIO_DISC_TIMEOUT=30
-CONFIG_RAPIDIO_ENABLE_RX_TX_PORTS=y
-CONFIG_RAPIDIO_DMA_ENGINE=y
-# CONFIG_RAPIDIO_DEBUG is not set
-CONFIG_RAPIDIO_ENUM_BASIC=m
-CONFIG_RAPIDIO_CHMAN=m
-CONFIG_RAPIDIO_MPORT_CDEV=m
-
-#
-# RapidIO Switch drivers
-#
-CONFIG_RAPIDIO_TSI57X=m
-CONFIG_RAPIDIO_CPS_XX=m
-CONFIG_RAPIDIO_TSI568=m
-CONFIG_RAPIDIO_CPS_GEN2=m
-CONFIG_RAPIDIO_RXS_GEN3=m
-# end of RapidIO Switch drivers
-
-#
-# Generic Driver Options
-#
-# CONFIG_UEVENT_HELPER is not set
-CONFIG_DEVTMPFS=y
-CONFIG_DEVTMPFS_MOUNT=y
-CONFIG_STANDALONE=y
-CONFIG_PREVENT_FIRMWARE_BUILD=y
-
-#
-# Firmware loader
-#
-CONFIG_FW_LOADER=y
-CONFIG_FW_LOADER_PAGED_BUF=y
-CONFIG_EXTRA_FIRMWARE=""
-# CONFIG_FW_LOADER_USER_HELPER is not set
-CONFIG_FW_LOADER_COMPRESS=y
-CONFIG_FW_CACHE=y
-# end of Firmware loader
-
-CONFIG_WANT_DEV_COREDUMP=y
-CONFIG_ALLOW_DEV_COREDUMP=y
-CONFIG_DEV_COREDUMP=y
-# CONFIG_DEBUG_DRIVER is not set
-# CONFIG_DEBUG_DEVRES is not set
-# CONFIG_DEBUG_TEST_DRIVER_REMOVE is not set
-CONFIG_HMEM_REPORTING=y
-# CONFIG_TEST_ASYNC_DRIVER_PROBE is not set
-CONFIG_SYS_HYPERVISOR=y
-CONFIG_GENERIC_CPU_AUTOPROBE=y
-CONFIG_GENERIC_CPU_VULNERABILITIES=y
-CONFIG_REGMAP=y
-CONFIG_REGMAP_I2C=y
-CONFIG_REGMAP_SLIMBUS=m
-CONFIG_REGMAP_SPI=y
-CONFIG_REGMAP_SPMI=m
-CONFIG_REGMAP_W1=m
-CONFIG_REGMAP_MMIO=y
-CONFIG_REGMAP_IRQ=y
-CONFIG_REGMAP_SOUNDWIRE=m
-CONFIG_REGMAP_SCCB=m
-CONFIG_REGMAP_I3C=m
-CONFIG_DMA_SHARED_BUFFER=y
-# CONFIG_DMA_FENCE_TRACE is not set
-# end of Generic Driver Options
-
-#
-# Bus devices
-#
-CONFIG_MOXTET=m
-CONFIG_SIMPLE_PM_BUS=y
-CONFIG_MHI_BUS=m
-# CONFIG_MHI_BUS_DEBUG is not set
-# end of Bus devices
-
-CONFIG_CONNECTOR=y
-CONFIG_PROC_EVENTS=y
-CONFIG_GNSS=m
-CONFIG_GNSS_SERIAL=m
-CONFIG_GNSS_MTK_SERIAL=m
-CONFIG_GNSS_SIRF_SERIAL=m
-CONFIG_GNSS_UBX_SERIAL=m
-CONFIG_MTD=m
-CONFIG_MTD_TESTS=m
-
-#
-# Partition parsers
-#
-CONFIG_MTD_AR7_PARTS=m
-CONFIG_MTD_CMDLINE_PARTS=m
-CONFIG_MTD_OF_PARTS=m
-CONFIG_MTD_REDBOOT_PARTS=m
-CONFIG_MTD_REDBOOT_DIRECTORY_BLOCK=-1
-# CONFIG_MTD_REDBOOT_PARTS_UNALLOCATED is not set
-# CONFIG_MTD_REDBOOT_PARTS_READONLY is not set
-# end of Partition parsers
-
-#
-# User Modules And Translation Layers
-#
-CONFIG_MTD_BLKDEVS=m
-CONFIG_MTD_BLOCK=m
-CONFIG_MTD_BLOCK_RO=m
-CONFIG_FTL=m
-CONFIG_NFTL=m
-CONFIG_NFTL_RW=y
-CONFIG_INFTL=m
-CONFIG_RFD_FTL=m
-CONFIG_SSFDC=m
-CONFIG_SM_FTL=m
-CONFIG_MTD_OOPS=m
-CONFIG_MTD_PSTORE=m
-CONFIG_MTD_SWAP=m
-CONFIG_MTD_PARTITIONED_MASTER=y
-
-#
-# RAM/ROM/Flash chip drivers
-#
-CONFIG_MTD_CFI=m
-CONFIG_MTD_JEDECPROBE=m
-CONFIG_MTD_GEN_PROBE=m
-# CONFIG_MTD_CFI_ADV_OPTIONS is not set
-CONFIG_MTD_MAP_BANK_WIDTH_1=y
-CONFIG_MTD_MAP_BANK_WIDTH_2=y
-CONFIG_MTD_MAP_BANK_WIDTH_4=y
-CONFIG_MTD_CFI_I1=y
-CONFIG_MTD_CFI_I2=y
-CONFIG_MTD_CFI_INTELEXT=m
-CONFIG_MTD_CFI_AMDSTD=m
-CONFIG_MTD_CFI_STAA=m
-CONFIG_MTD_CFI_UTIL=m
-CONFIG_MTD_RAM=m
-CONFIG_MTD_ROM=m
-CONFIG_MTD_ABSENT=m
-# end of RAM/ROM/Flash chip drivers
-
-#
-# Mapping drivers for chip access
-#
-CONFIG_MTD_COMPLEX_MAPPINGS=y
-CONFIG_MTD_PHYSMAP=m
-# CONFIG_MTD_PHYSMAP_COMPAT is not set
-CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_MTD_PHYSMAP_VERSATILE=y
-CONFIG_MTD_PHYSMAP_GEMINI=y
-CONFIG_MTD_PHYSMAP_GPIO_ADDR=y
-CONFIG_MTD_SBC_GXX=m
-CONFIG_MTD_AMD76XROM=m
-CONFIG_MTD_ICHXROM=m
-CONFIG_MTD_ESB2ROM=m
-CONFIG_MTD_CK804XROM=m
-CONFIG_MTD_SCB2_FLASH=m
-CONFIG_MTD_NETtel=m
-CONFIG_MTD_L440GX=m
-CONFIG_MTD_PCI=m
-CONFIG_MTD_PCMCIA=m
-# CONFIG_MTD_PCMCIA_ANONYMOUS is not set
-CONFIG_MTD_INTEL_VR_NOR=m
-CONFIG_MTD_PLATRAM=m
-# end of Mapping drivers for chip access
-
-#
-# Self-contained MTD device drivers
-#
-CONFIG_MTD_PMC551=m
-# CONFIG_MTD_PMC551_BUGFIX is not set
-# CONFIG_MTD_PMC551_DEBUG is not set
-CONFIG_MTD_DATAFLASH=m
-# CONFIG_MTD_DATAFLASH_WRITE_VERIFY is not set
-CONFIG_MTD_DATAFLASH_OTP=y
-CONFIG_MTD_MCHP23K256=m
-CONFIG_MTD_SST25L=m
-CONFIG_MTD_SLRAM=m
-CONFIG_MTD_PHRAM=m
-CONFIG_MTD_MTDRAM=m
-CONFIG_MTDRAM_TOTAL_SIZE=4096
-CONFIG_MTDRAM_ERASE_SIZE=128
-CONFIG_MTD_BLOCK2MTD=m
-
-#
-# Disk-On-Chip Device Drivers
-#
-CONFIG_MTD_DOCG3=m
-CONFIG_BCH_CONST_M=14
-CONFIG_BCH_CONST_T=4
-# end of Self-contained MTD device drivers
-
-#
-# NAND
-#
-CONFIG_MTD_NAND_CORE=m
-CONFIG_MTD_ONENAND=m
-# CONFIG_MTD_ONENAND_VERIFY_WRITE is not set
-CONFIG_MTD_ONENAND_GENERIC=m
-CONFIG_MTD_ONENAND_OTP=y
-CONFIG_MTD_ONENAND_2X_PROGRAM=y
-CONFIG_MTD_NAND_ECC_SW_HAMMING=m
-CONFIG_MTD_NAND_ECC_SW_HAMMING_SMC=y
-CONFIG_MTD_RAW_NAND=m
-CONFIG_MTD_NAND_ECC_SW_BCH=y
-
-#
-# Raw/parallel NAND flash controllers
-#
-CONFIG_MTD_NAND_DENALI=m
-CONFIG_MTD_NAND_DENALI_PCI=m
-CONFIG_MTD_NAND_DENALI_DT=m
-CONFIG_MTD_NAND_CAFE=m
-CONFIG_MTD_NAND_MXIC=m
-CONFIG_MTD_NAND_GPIO=m
-CONFIG_MTD_NAND_PLATFORM=m
-CONFIG_MTD_NAND_CADENCE=m
-CONFIG_MTD_NAND_ARASAN=m
-
-#
-# Misc
-#
-CONFIG_MTD_SM_COMMON=m
-CONFIG_MTD_NAND_NANDSIM=m
-CONFIG_MTD_NAND_RICOH=m
-CONFIG_MTD_NAND_DISKONCHIP=m
-# CONFIG_MTD_NAND_DISKONCHIP_PROBE_ADVANCED is not set
-CONFIG_MTD_NAND_DISKONCHIP_PROBE_ADDRESS=0
-CONFIG_MTD_NAND_DISKONCHIP_BBTWRITE=y
-CONFIG_MTD_SPI_NAND=m
-
-#
-# ECC engine support
-#
-CONFIG_MTD_NAND_ECC=y
-# end of ECC engine support
-# end of NAND
-
-#
-# LPDDR & LPDDR2 PCM memory drivers
-#
-CONFIG_MTD_LPDDR=m
-CONFIG_MTD_QINFO_PROBE=m
-# end of LPDDR & LPDDR2 PCM memory drivers
-
-CONFIG_MTD_SPI_NOR=m
-CONFIG_MTD_SPI_NOR_USE_4K_SECTORS=y
-CONFIG_SPI_INTEL_SPI=m
-CONFIG_SPI_INTEL_SPI_PCI=m
-CONFIG_SPI_INTEL_SPI_PLATFORM=m
-CONFIG_MTD_UBI=m
-CONFIG_MTD_UBI_WL_THRESHOLD=4096
-CONFIG_MTD_UBI_BEB_LIMIT=20
-CONFIG_MTD_UBI_FASTMAP=y
-CONFIG_MTD_UBI_GLUEBI=m
-CONFIG_MTD_UBI_BLOCK=y
-CONFIG_MTD_HYPERBUS=m
-CONFIG_DTC=y
-CONFIG_OF=y
-# CONFIG_OF_UNITTEST is not set
-CONFIG_OF_FLATTREE=y
-CONFIG_OF_EARLY_FLATTREE=y
-CONFIG_OF_KOBJ=y
-CONFIG_OF_DYNAMIC=y
-CONFIG_OF_ADDRESS=y
-CONFIG_OF_IRQ=y
-CONFIG_OF_NET=y
-CONFIG_OF_RESERVED_MEM=y
-CONFIG_OF_RESOLVE=y
-CONFIG_OF_OVERLAY=y
-CONFIG_ARCH_MIGHT_HAVE_PC_PARPORT=y
-CONFIG_PARPORT=m
-CONFIG_PARPORT_PC=m
-CONFIG_PARPORT_SERIAL=m
-CONFIG_PARPORT_PC_FIFO=y
-CONFIG_PARPORT_PC_SUPERIO=y
-CONFIG_PARPORT_PC_PCMCIA=m
-CONFIG_PARPORT_AX88796=m
-CONFIG_PARPORT_1284=y
-CONFIG_PARPORT_NOT_PC=y
-CONFIG_PNP=y
-CONFIG_PNP_DEBUG_MESSAGES=y
-
-#
-# Protocols
-#
-CONFIG_PNPACPI=y
-CONFIG_BLK_DEV=y
-# CONFIG_BLK_DEV_NULL_BLK is not set
-CONFIG_BLK_DEV_FD=m
-CONFIG_CDROM=m
-# CONFIG_PARIDE is not set
-CONFIG_BLK_DEV_PCIESSD_MTIP32XX=m
-CONFIG_ZRAM=m
-CONFIG_ZRAM_WRITEBACK=y
-# CONFIG_ZRAM_MEMORY_TRACKING is not set
-CONFIG_BLK_DEV_UMEM=m
-CONFIG_BLK_DEV_LOOP=m
-CONFIG_BLK_DEV_LOOP_MIN_COUNT=8
-CONFIG_BLK_DEV_CRYPTOLOOP=m
-CONFIG_BLK_DEV_DRBD=m
-# CONFIG_DRBD_FAULT_INJECTION is not set
-CONFIG_BLK_DEV_NBD=m
-CONFIG_BLK_DEV_SKD=m
-CONFIG_BLK_DEV_SX8=m
-CONFIG_BLK_DEV_RAM=m
-CONFIG_BLK_DEV_RAM_COUNT=16
-CONFIG_BLK_DEV_RAM_SIZE=16384
-CONFIG_CDROM_PKTCDVD=m
-CONFIG_CDROM_PKTCDVD_BUFFERS=8
-# CONFIG_CDROM_PKTCDVD_WCACHE is not set
-CONFIG_ATA_OVER_ETH=m
-CONFIG_XEN_BLKDEV_FRONTEND=m
-CONFIG_XEN_BLKDEV_BACKEND=m
-CONFIG_VIRTIO_BLK=m
-CONFIG_BLK_DEV_RBD=m
-CONFIG_BLK_DEV_RSXX=m
-CONFIG_BLK_DEV_RNBD=y
-CONFIG_BLK_DEV_RNBD_CLIENT=m
-CONFIG_BLK_DEV_RNBD_SERVER=m
-
-#
-# NVME Support
-#
-CONFIG_NVME_CORE=y
-CONFIG_BLK_DEV_NVME=y
-CONFIG_NVME_MULTIPATH=y
-CONFIG_NVME_HWMON=y
-CONFIG_NVME_FABRICS=m
-CONFIG_NVME_RDMA=m
-CONFIG_NVME_FC=m
-CONFIG_NVME_TCP=m
-CONFIG_NVME_TARGET=m
-# CONFIG_NVME_TARGET_PASSTHRU is not set
-CONFIG_NVME_TARGET_LOOP=m
-CONFIG_NVME_TARGET_RDMA=m
-CONFIG_NVME_TARGET_FC=m
-CONFIG_NVME_TARGET_FCLOOP=m
-CONFIG_NVME_TARGET_TCP=m
-# end of NVME Support
-
-#
-# Misc devices
-#
-CONFIG_SENSORS_LIS3LV02D=m
-CONFIG_AD525X_DPOT=m
-CONFIG_AD525X_DPOT_I2C=m
-CONFIG_AD525X_DPOT_SPI=m
-# CONFIG_DUMMY_IRQ is not set
-CONFIG_IBM_ASM=m
-CONFIG_PHANTOM=m
-CONFIG_TIFM_CORE=m
-CONFIG_TIFM_7XX1=m
-CONFIG_ICS932S401=m
-CONFIG_ENCLOSURE_SERVICES=m
-CONFIG_HP_ILO=m
-CONFIG_APDS9802ALS=m
-CONFIG_ISL29003=m
-CONFIG_ISL29020=m
-CONFIG_SENSORS_TSL2550=m
-CONFIG_SENSORS_BH1770=m
-CONFIG_SENSORS_APDS990X=m
-CONFIG_HMC6352=m
-CONFIG_DS1682=m
-CONFIG_VMWARE_BALLOON=m
-CONFIG_LATTICE_ECP3_CONFIG=m
-# CONFIG_SRAM is not set
-CONFIG_PCI_ENDPOINT_TEST=m
-CONFIG_XILINX_SDFEC=m
-CONFIG_MISC_RTSX=m
-CONFIG_PVPANIC=m
-# CONFIG_HISI_HIKEY_USB is not set
-CONFIG_C2PORT=m
-CONFIG_C2PORT_DURAMAR_2150=m
-
-#
-# EEPROM support
-#
-CONFIG_EEPROM_AT24=m
-# CONFIG_EEPROM_AT25 is not set
-CONFIG_EEPROM_LEGACY=m
-CONFIG_EEPROM_MAX6875=m
-CONFIG_EEPROM_93CX6=m
-# CONFIG_EEPROM_93XX46 is not set
-CONFIG_EEPROM_IDT_89HPESX=m
-CONFIG_EEPROM_EE1004=m
-# end of EEPROM support
-
-CONFIG_CB710_CORE=m
-# CONFIG_CB710_DEBUG is not set
-CONFIG_CB710_DEBUG_ASSUMPTIONS=y
-
-#
-# Texas Instruments shared transport line discipline
-#
-CONFIG_TI_ST=m
-# end of Texas Instruments shared transport line discipline
-
-CONFIG_SENSORS_LIS3_I2C=m
-CONFIG_ALTERA_STAPL=m
-CONFIG_INTEL_MEI=m
-CONFIG_INTEL_MEI_ME=m
-CONFIG_INTEL_MEI_TXE=m
-# CONFIG_INTEL_MEI_VIRTIO is not set
-CONFIG_INTEL_MEI_HDCP=m
-CONFIG_VMWARE_VMCI=m
-
-#
-# Intel MIC & related support
-#
-CONFIG_INTEL_MIC_BUS=m
-CONFIG_SCIF_BUS=m
-CONFIG_VOP_BUS=m
-CONFIG_INTEL_MIC_HOST=m
-CONFIG_INTEL_MIC_CARD=m
-CONFIG_SCIF=m
-CONFIG_MIC_COSM=m
-CONFIG_VOP=m
-# end of Intel MIC & related support
-
-CONFIG_GENWQE=m
-CONFIG_GENWQE_PLATFORM_ERROR_RECOVERY=0
-CONFIG_ECHO=m
-CONFIG_MISC_ALCOR_PCI=m
-CONFIG_MISC_RTSX_PCI=m
-CONFIG_MISC_RTSX_USB=m
-CONFIG_HABANA_AI=m
-CONFIG_UACCE=m
-# end of Misc devices
-
-CONFIG_HAVE_IDE=y
-# CONFIG_IDE is not set
-
-#
-# SCSI device support
-#
-CONFIG_SCSI_MOD=y
-CONFIG_RAID_ATTRS=m
-CONFIG_SCSI=y
-CONFIG_SCSI_DMA=y
-CONFIG_SCSI_NETLINK=y
-CONFIG_SCSI_PROC_FS=y
-
-#
-# SCSI support type (disk, tape, CD-ROM)
-#
-CONFIG_BLK_DEV_SD=y
-CONFIG_CHR_DEV_ST=m
-CONFIG_BLK_DEV_SR=m
-CONFIG_CHR_DEV_SG=m
-CONFIG_CHR_DEV_SCH=m
-CONFIG_SCSI_ENCLOSURE=m
-CONFIG_SCSI_CONSTANTS=y
-CONFIG_SCSI_LOGGING=y
-CONFIG_SCSI_SCAN_ASYNC=y
-
-#
-# SCSI Transports
-#
-CONFIG_SCSI_SPI_ATTRS=m
-CONFIG_SCSI_FC_ATTRS=m
-CONFIG_SCSI_ISCSI_ATTRS=m
-CONFIG_SCSI_SAS_ATTRS=m
-CONFIG_SCSI_SAS_LIBSAS=m
-CONFIG_SCSI_SAS_ATA=y
-CONFIG_SCSI_SAS_HOST_SMP=y
-CONFIG_SCSI_SRP_ATTRS=m
-# end of SCSI Transports
-
-CONFIG_SCSI_LOWLEVEL=y
-CONFIG_ISCSI_TCP=m
-CONFIG_ISCSI_BOOT_SYSFS=m
-CONFIG_SCSI_CXGB3_ISCSI=m
-CONFIG_SCSI_CXGB4_ISCSI=m
-CONFIG_SCSI_BNX2_ISCSI=m
-CONFIG_SCSI_BNX2X_FCOE=m
-CONFIG_BE2ISCSI=m
-CONFIG_BLK_DEV_3W_XXXX_RAID=m
-CONFIG_SCSI_HPSA=m
-CONFIG_SCSI_3W_9XXX=m
-CONFIG_SCSI_3W_SAS=m
-CONFIG_SCSI_ACARD=m
-CONFIG_SCSI_AACRAID=m
-CONFIG_SCSI_AIC7XXX=m
-CONFIG_AIC7XXX_CMDS_PER_DEVICE=32
-CONFIG_AIC7XXX_RESET_DELAY_MS=15000
-CONFIG_AIC7XXX_DEBUG_ENABLE=y
-CONFIG_AIC7XXX_DEBUG_MASK=0
-CONFIG_AIC7XXX_REG_PRETTY_PRINT=y
-CONFIG_SCSI_AIC79XX=m
-CONFIG_AIC79XX_CMDS_PER_DEVICE=32
-CONFIG_AIC79XX_RESET_DELAY_MS=15000
-CONFIG_AIC79XX_DEBUG_ENABLE=y
-CONFIG_AIC79XX_DEBUG_MASK=0
-CONFIG_AIC79XX_REG_PRETTY_PRINT=y
-CONFIG_SCSI_AIC94XX=m
-CONFIG_AIC94XX_DEBUG=y
-CONFIG_SCSI_MVSAS=m
-CONFIG_SCSI_MVSAS_DEBUG=y
-CONFIG_SCSI_MVSAS_TASKLET=y
-CONFIG_SCSI_MVUMI=m
-CONFIG_SCSI_DPT_I2O=m
-CONFIG_SCSI_ADVANSYS=m
-CONFIG_SCSI_ARCMSR=m
-CONFIG_SCSI_ESAS2R=m
-CONFIG_MEGARAID_NEWGEN=y
-CONFIG_MEGARAID_MM=m
-CONFIG_MEGARAID_MAILBOX=m
-CONFIG_MEGARAID_LEGACY=m
-CONFIG_MEGARAID_SAS=m
-CONFIG_SCSI_MPT3SAS=m
-CONFIG_SCSI_MPT2SAS_MAX_SGE=128
-CONFIG_SCSI_MPT3SAS_MAX_SGE=128
-CONFIG_SCSI_MPT2SAS=m
-CONFIG_SCSI_SMARTPQI=m
-CONFIG_SCSI_UFSHCD=m
-CONFIG_SCSI_UFSHCD_PCI=m
-# CONFIG_SCSI_UFS_DWC_TC_PCI is not set
-CONFIG_SCSI_UFSHCD_PLATFORM=m
-CONFIG_SCSI_UFS_CDNS_PLATFORM=m
-# CONFIG_SCSI_UFS_DWC_TC_PLATFORM is not set
-CONFIG_SCSI_UFS_BSG=y
-# CONFIG_SCSI_UFS_CRYPTO is not set
-CONFIG_SCSI_HPTIOP=m
-CONFIG_SCSI_BUSLOGIC=m
-CONFIG_SCSI_FLASHPOINT=y
-CONFIG_SCSI_MYRB=m
-CONFIG_SCSI_MYRS=m
-CONFIG_VMWARE_PVSCSI=m
-CONFIG_XEN_SCSI_FRONTEND=m
-CONFIG_HYPERV_STORAGE=m
-CONFIG_LIBFC=m
-CONFIG_LIBFCOE=m
-CONFIG_FCOE=m
-CONFIG_FCOE_FNIC=m
-CONFIG_SCSI_SNIC=m
-# CONFIG_SCSI_SNIC_DEBUG_FS is not set
-CONFIG_SCSI_DMX3191D=m
-CONFIG_SCSI_FDOMAIN=m
-CONFIG_SCSI_FDOMAIN_PCI=m
-CONFIG_SCSI_GDTH=m
-CONFIG_SCSI_ISCI=m
-CONFIG_SCSI_IPS=m
-CONFIG_SCSI_INITIO=m
-CONFIG_SCSI_INIA100=m
-CONFIG_SCSI_PPA=m
-CONFIG_SCSI_IMM=m
-# CONFIG_SCSI_IZIP_EPP16 is not set
-# CONFIG_SCSI_IZIP_SLOW_CTR is not set
-CONFIG_SCSI_STEX=m
-CONFIG_SCSI_SYM53C8XX_2=m
-CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1
-CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
-CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
-CONFIG_SCSI_SYM53C8XX_MMIO=y
-CONFIG_SCSI_IPR=m
-CONFIG_SCSI_IPR_TRACE=y
-CONFIG_SCSI_IPR_DUMP=y
-CONFIG_SCSI_QLOGIC_1280=m
-CONFIG_SCSI_QLA_FC=m
-CONFIG_TCM_QLA2XXX=m
-# CONFIG_TCM_QLA2XXX_DEBUG is not set
-CONFIG_SCSI_QLA_ISCSI=m
-CONFIG_QEDI=m
-CONFIG_QEDF=m
-CONFIG_SCSI_LPFC=m
-# CONFIG_SCSI_LPFC_DEBUG_FS is not set
-CONFIG_SCSI_DC395x=m
-CONFIG_SCSI_AM53C974=m
-CONFIG_SCSI_WD719X=m
-CONFIG_SCSI_DEBUG=m
-CONFIG_SCSI_PMCRAID=m
-CONFIG_SCSI_PM8001=m
-CONFIG_SCSI_BFA_FC=m
-CONFIG_SCSI_VIRTIO=m
-CONFIG_SCSI_CHELSIO_FCOE=m
-CONFIG_SCSI_LOWLEVEL_PCMCIA=y
-CONFIG_PCMCIA_AHA152X=m
-CONFIG_PCMCIA_FDOMAIN=m
-CONFIG_PCMCIA_QLOGIC=m
-CONFIG_PCMCIA_SYM53C500=m
-CONFIG_SCSI_DH=y
-CONFIG_SCSI_DH_RDAC=m
-CONFIG_SCSI_DH_HP_SW=m
-CONFIG_SCSI_DH_EMC=m
-CONFIG_SCSI_DH_ALUA=m
-# end of SCSI device support
-
-CONFIG_ATA=y
-CONFIG_SATA_HOST=y
-CONFIG_PATA_TIMINGS=y
-CONFIG_ATA_VERBOSE_ERROR=y
-CONFIG_ATA_FORCE=y
-CONFIG_ATA_ACPI=y
-CONFIG_SATA_ZPODD=y
-CONFIG_SATA_PMP=y
-
-#
-# Controllers with non-SFF native interface
-#
-CONFIG_SATA_AHCI=y
-CONFIG_SATA_MOBILE_LPM_POLICY=3
-CONFIG_SATA_AHCI_PLATFORM=m
-CONFIG_AHCI_CEVA=m
-CONFIG_AHCI_QORIQ=m
-CONFIG_SATA_INIC162X=m
-CONFIG_SATA_ACARD_AHCI=m
-CONFIG_SATA_SIL24=m
-CONFIG_ATA_SFF=y
-
-#
-# SFF controllers with custom DMA interface
-#
-CONFIG_PDC_ADMA=m
-CONFIG_SATA_QSTOR=m
-CONFIG_SATA_SX4=m
-CONFIG_ATA_BMDMA=y
-
-#
-# SATA SFF controllers with BMDMA
-#
-CONFIG_ATA_PIIX=m
-CONFIG_SATA_DWC=m
-# CONFIG_SATA_DWC_OLD_DMA is not set
-# CONFIG_SATA_DWC_DEBUG is not set
-CONFIG_SATA_MV=m
-CONFIG_SATA_NV=m
-CONFIG_SATA_PROMISE=m
-CONFIG_SATA_SIL=m
-CONFIG_SATA_SIS=m
-CONFIG_SATA_SVW=m
-CONFIG_SATA_ULI=m
-CONFIG_SATA_VIA=m
-CONFIG_SATA_VITESSE=m
-
-#
-# PATA SFF controllers with BMDMA
-#
-CONFIG_PATA_ALI=m
-CONFIG_PATA_AMD=m
-CONFIG_PATA_ARTOP=m
-CONFIG_PATA_ATIIXP=m
-CONFIG_PATA_ATP867X=m
-CONFIG_PATA_CMD64X=m
-CONFIG_PATA_CYPRESS=m
-CONFIG_PATA_EFAR=m
-CONFIG_PATA_HPT366=m
-CONFIG_PATA_HPT37X=m
-CONFIG_PATA_HPT3X2N=m
-CONFIG_PATA_HPT3X3=m
-CONFIG_PATA_HPT3X3_DMA=y
-CONFIG_PATA_IT8213=m
-CONFIG_PATA_IT821X=m
-CONFIG_PATA_JMICRON=m
-CONFIG_PATA_MARVELL=m
-CONFIG_PATA_NETCELL=m
-CONFIG_PATA_NINJA32=m
-CONFIG_PATA_NS87415=m
-CONFIG_PATA_OLDPIIX=m
-CONFIG_PATA_OPTIDMA=m
-CONFIG_PATA_PDC2027X=m
-CONFIG_PATA_PDC_OLD=m
-CONFIG_PATA_RADISYS=m
-CONFIG_PATA_RDC=m
-CONFIG_PATA_SCH=m
-CONFIG_PATA_SERVERWORKS=m
-CONFIG_PATA_SIL680=m
-CONFIG_PATA_SIS=m
-CONFIG_PATA_TOSHIBA=m
-CONFIG_PATA_TRIFLEX=m
-CONFIG_PATA_VIA=m
-CONFIG_PATA_WINBOND=m
-
-#
-# PIO-only SFF controllers
-#
-CONFIG_PATA_CMD640_PCI=m
-CONFIG_PATA_MPIIX=m
-CONFIG_PATA_NS87410=m
-CONFIG_PATA_OPTI=m
-CONFIG_PATA_PCMCIA=m
-# CONFIG_PATA_PLATFORM is not set
-CONFIG_PATA_RZ1000=m
-
-#
-# Generic fallback / legacy drivers
-#
-CONFIG_PATA_ACPI=m
-CONFIG_ATA_GENERIC=m
-CONFIG_PATA_LEGACY=m
-CONFIG_MD=y
-CONFIG_BLK_DEV_MD=m
-CONFIG_MD_LINEAR=m
-CONFIG_MD_RAID0=m
-CONFIG_MD_RAID1=m
-CONFIG_MD_RAID10=m
-CONFIG_MD_RAID456=m
-CONFIG_MD_MULTIPATH=m
-CONFIG_MD_FAULTY=m
-CONFIG_MD_CLUSTER=m
-CONFIG_BCACHE=m
-# CONFIG_BCACHE_DEBUG is not set
-# CONFIG_BCACHE_CLOSURES_DEBUG is not set
-CONFIG_BCACHE_ASYNC_REGISTRATION=y
-CONFIG_BLK_DEV_DM_BUILTIN=y
-CONFIG_BLK_DEV_DM=m
-CONFIG_DM_DEBUG=y
-CONFIG_DM_BUFIO=m
-# CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING is not set
-CONFIG_DM_BIO_PRISON=m
-CONFIG_DM_PERSISTENT_DATA=m
-CONFIG_DM_UNSTRIPED=m
-CONFIG_DM_CRYPT=m
-CONFIG_DM_SNAPSHOT=m
-CONFIG_DM_THIN_PROVISIONING=m
-CONFIG_DM_CACHE=m
-CONFIG_DM_CACHE_SMQ=m
-CONFIG_DM_WRITECACHE=m
-CONFIG_DM_EBS=m
-CONFIG_DM_ERA=m
-CONFIG_DM_CLONE=m
-CONFIG_DM_MIRROR=m
-CONFIG_DM_LOG_USERSPACE=m
-CONFIG_DM_RAID=m
-CONFIG_DM_ZERO=m
-CONFIG_DM_MULTIPATH=m
-CONFIG_DM_MULTIPATH_QL=m
-CONFIG_DM_MULTIPATH_ST=m
-CONFIG_DM_MULTIPATH_HST=m
-CONFIG_DM_DELAY=m
-CONFIG_DM_DUST=m
-CONFIG_DM_UEVENT=y
-CONFIG_DM_FLAKEY=m
-CONFIG_DM_VERITY=m
-CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG=y
-CONFIG_DM_VERITY_FEC=y
-CONFIG_DM_SWITCH=m
-CONFIG_DM_LOG_WRITES=m
-CONFIG_DM_INTEGRITY=m
-CONFIG_DM_ZONED=m
-CONFIG_TARGET_CORE=m
-CONFIG_TCM_IBLOCK=m
-CONFIG_TCM_FILEIO=m
-CONFIG_TCM_PSCSI=m
-CONFIG_TCM_USER2=m
-CONFIG_LOOPBACK_TARGET=m
-CONFIG_TCM_FC=m
-CONFIG_ISCSI_TARGET=m
-CONFIG_ISCSI_TARGET_CXGB4=m
-CONFIG_SBP_TARGET=m
-CONFIG_FUSION=y
-CONFIG_FUSION_SPI=m
-CONFIG_FUSION_FC=m
-CONFIG_FUSION_SAS=m
-CONFIG_FUSION_MAX_SGE=128
-CONFIG_FUSION_CTL=m
-CONFIG_FUSION_LAN=m
-# CONFIG_FUSION_LOGGING is not set
-
-#
-# IEEE 1394 (FireWire) support
-#
-CONFIG_FIREWIRE=m
-CONFIG_FIREWIRE_OHCI=m
-CONFIG_FIREWIRE_SBP2=m
-CONFIG_FIREWIRE_NET=m
-CONFIG_FIREWIRE_NOSY=m
-# end of IEEE 1394 (FireWire) support
-
-CONFIG_MACINTOSH_DRIVERS=y
-CONFIG_MAC_EMUMOUSEBTN=m
-CONFIG_NETDEVICES=y
-CONFIG_MII=m
-CONFIG_NET_CORE=y
-CONFIG_BONDING=m
-CONFIG_DUMMY=m
-CONFIG_WIREGUARD=m
-# CONFIG_WIREGUARD_DEBUG is not set
-CONFIG_EQUALIZER=m
-CONFIG_NET_FC=y
-CONFIG_IFB=m
-CONFIG_NET_TEAM=m
-CONFIG_NET_TEAM_MODE_BROADCAST=m
-CONFIG_NET_TEAM_MODE_ROUNDROBIN=m
-CONFIG_NET_TEAM_MODE_RANDOM=m
-CONFIG_NET_TEAM_MODE_ACTIVEBACKUP=m
-CONFIG_NET_TEAM_MODE_LOADBALANCE=m
-CONFIG_MACVLAN=m
-CONFIG_MACVTAP=m
-CONFIG_IPVLAN_L3S=y
-CONFIG_IPVLAN=m
-CONFIG_IPVTAP=m
-CONFIG_VXLAN=m
-CONFIG_GENEVE=m
-CONFIG_BAREUDP=m
-CONFIG_GTP=m
-CONFIG_MACSEC=m
-CONFIG_NETCONSOLE=m
-CONFIG_NETCONSOLE_DYNAMIC=y
-CONFIG_NETPOLL=y
-CONFIG_NET_POLL_CONTROLLER=y
-CONFIG_NTB_NETDEV=m
-CONFIG_RIONET=m
-CONFIG_RIONET_TX_SIZE=128
-CONFIG_RIONET_RX_SIZE=128
-CONFIG_TUN=m
-CONFIG_TAP=m
-# CONFIG_TUN_VNET_CROSS_LE is not set
-CONFIG_VETH=m
-CONFIG_VIRTIO_NET=m
-CONFIG_NLMON=m
-CONFIG_NET_VRF=m
-CONFIG_VSOCKMON=m
-CONFIG_SUNGEM_PHY=m
-# CONFIG_ARCNET is not set
-CONFIG_ATM_DRIVERS=y
-# CONFIG_ATM_DUMMY is not set
-CONFIG_ATM_TCP=m
-CONFIG_ATM_LANAI=m
-CONFIG_ATM_ENI=m
-# CONFIG_ATM_ENI_DEBUG is not set
-# CONFIG_ATM_ENI_TUNE_BURST is not set
-CONFIG_ATM_FIRESTREAM=m
-CONFIG_ATM_ZATM=m
-# CONFIG_ATM_ZATM_DEBUG is not set
-CONFIG_ATM_NICSTAR=m
-# CONFIG_ATM_NICSTAR_USE_SUNI is not set
-# CONFIG_ATM_NICSTAR_USE_IDT77105 is not set
-CONFIG_ATM_IDT77252=m
-# CONFIG_ATM_IDT77252_DEBUG is not set
-# CONFIG_ATM_IDT77252_RCV_ALL is not set
-CONFIG_ATM_IDT77252_USE_SUNI=y
-CONFIG_ATM_AMBASSADOR=m
-# CONFIG_ATM_AMBASSADOR_DEBUG is not set
-CONFIG_ATM_HORIZON=m
-# CONFIG_ATM_HORIZON_DEBUG is not set
-CONFIG_ATM_IA=m
-# CONFIG_ATM_IA_DEBUG is not set
-CONFIG_ATM_FORE200E=m
-CONFIG_ATM_FORE200E_USE_TASKLET=y
-CONFIG_ATM_FORE200E_TX_RETRY=16
-CONFIG_ATM_FORE200E_DEBUG=0
-CONFIG_ATM_HE=m
-CONFIG_ATM_HE_USE_SUNI=y
-CONFIG_ATM_SOLOS=m
-CONFIG_CAIF_DRIVERS=y
-CONFIG_CAIF_TTY=m
-CONFIG_CAIF_HSI=m
-CONFIG_CAIF_VIRTIO=m
-
-#
-# Distributed Switch Architecture drivers
-#
-CONFIG_B53=m
-# CONFIG_B53_SPI_DRIVER is not set
-CONFIG_B53_MDIO_DRIVER=m
-CONFIG_B53_MMAP_DRIVER=m
-CONFIG_B53_SRAB_DRIVER=m
-CONFIG_B53_SERDES=m
-CONFIG_NET_DSA_BCM_SF2=m
-CONFIG_NET_DSA_LOOP=m
-CONFIG_NET_DSA_LANTIQ_GSWIP=m
-CONFIG_NET_DSA_MT7530=m
-CONFIG_NET_DSA_MV88E6060=m
-CONFIG_NET_DSA_MICROCHIP_KSZ_COMMON=m
-CONFIG_NET_DSA_MICROCHIP_KSZ9477=m
-CONFIG_NET_DSA_MICROCHIP_KSZ9477_I2C=m
-CONFIG_NET_DSA_MICROCHIP_KSZ9477_SPI=m
-CONFIG_NET_DSA_MICROCHIP_KSZ8795=m
-CONFIG_NET_DSA_MICROCHIP_KSZ8795_SPI=m
-CONFIG_NET_DSA_MV88E6XXX=m
-CONFIG_NET_DSA_MV88E6XXX_GLOBAL2=y
-CONFIG_NET_DSA_MV88E6XXX_PTP=y
-# CONFIG_NET_DSA_MSCC_SEVILLE is not set
-CONFIG_NET_DSA_AR9331=m
-CONFIG_NET_DSA_SJA1105=m
-CONFIG_NET_DSA_SJA1105_PTP=y
-CONFIG_NET_DSA_SJA1105_TAS=y
-CONFIG_NET_DSA_SJA1105_VL=y
-CONFIG_NET_DSA_QCA8K=m
-CONFIG_NET_DSA_REALTEK_SMI=m
-CONFIG_NET_DSA_SMSC_LAN9303=m
-CONFIG_NET_DSA_SMSC_LAN9303_I2C=m
-CONFIG_NET_DSA_SMSC_LAN9303_MDIO=m
-CONFIG_NET_DSA_VITESSE_VSC73XX=m
-CONFIG_NET_DSA_VITESSE_VSC73XX_SPI=m
-CONFIG_NET_DSA_VITESSE_VSC73XX_PLATFORM=m
-# end of Distributed Switch Architecture drivers
-
-CONFIG_ETHERNET=y
-CONFIG_MDIO=m
-CONFIG_NET_VENDOR_3COM=y
-CONFIG_PCMCIA_3C574=m
-CONFIG_PCMCIA_3C589=m
-CONFIG_VORTEX=m
-CONFIG_TYPHOON=m
-CONFIG_NET_VENDOR_ADAPTEC=y
-CONFIG_ADAPTEC_STARFIRE=m
-CONFIG_NET_VENDOR_AGERE=y
-CONFIG_ET131X=m
-CONFIG_NET_VENDOR_ALACRITECH=y
-CONFIG_SLICOSS=m
-CONFIG_NET_VENDOR_ALTEON=y
-CONFIG_ACENIC=m
-# CONFIG_ACENIC_OMIT_TIGON_I is not set
-CONFIG_ALTERA_TSE=m
-CONFIG_NET_VENDOR_AMAZON=y
-CONFIG_ENA_ETHERNET=m
-CONFIG_NET_VENDOR_AMD=y
-CONFIG_AMD8111_ETH=m
-CONFIG_PCNET32=m
-CONFIG_PCMCIA_NMCLAN=m
-CONFIG_AMD_XGBE=m
-CONFIG_AMD_XGBE_DCB=y
-CONFIG_AMD_XGBE_HAVE_ECC=y
-CONFIG_NET_VENDOR_AQUANTIA=y
-CONFIG_AQTION=m
-CONFIG_NET_VENDOR_ARC=y
-CONFIG_NET_VENDOR_ATHEROS=y
-CONFIG_ATL2=m
-CONFIG_ATL1=m
-CONFIG_ATL1E=m
-CONFIG_ATL1C=m
-CONFIG_ALX=m
-CONFIG_NET_VENDOR_AURORA=y
-CONFIG_AURORA_NB8800=m
-CONFIG_NET_VENDOR_BROADCOM=y
-CONFIG_B44=m
-CONFIG_B44_PCI_AUTOSELECT=y
-CONFIG_B44_PCICORE_AUTOSELECT=y
-CONFIG_B44_PCI=y
-CONFIG_BCMGENET=m
-CONFIG_BNX2=m
-CONFIG_CNIC=m
-CONFIG_TIGON3=m
-CONFIG_TIGON3_HWMON=y
-CONFIG_BNX2X=m
-CONFIG_BNX2X_SRIOV=y
-CONFIG_SYSTEMPORT=m
-CONFIG_BNXT=m
-CONFIG_BNXT_SRIOV=y
-CONFIG_BNXT_FLOWER_OFFLOAD=y
-CONFIG_BNXT_DCB=y
-CONFIG_BNXT_HWMON=y
-CONFIG_NET_VENDOR_BROCADE=y
-CONFIG_BNA=m
-CONFIG_NET_VENDOR_CADENCE=y
-CONFIG_MACB=m
-CONFIG_MACB_USE_HWSTAMP=y
-CONFIG_MACB_PCI=m
-CONFIG_NET_VENDOR_CAVIUM=y
-CONFIG_THUNDER_NIC_PF=m
-CONFIG_THUNDER_NIC_VF=m
-CONFIG_THUNDER_NIC_BGX=m
-CONFIG_THUNDER_NIC_RGX=m
-CONFIG_CAVIUM_PTP=m
-CONFIG_LIQUIDIO=m
-CONFIG_LIQUIDIO_VF=m
-CONFIG_NET_VENDOR_CHELSIO=y
-CONFIG_CHELSIO_T1=m
-CONFIG_CHELSIO_T1_1G=y
-CONFIG_CHELSIO_T3=m
-CONFIG_CHELSIO_T4=m
-CONFIG_CHELSIO_T4_DCB=y
-CONFIG_CHELSIO_T4_FCOE=y
-CONFIG_CHELSIO_T4VF=m
-CONFIG_CHELSIO_LIB=m
-CONFIG_CHELSIO_INLINE_CRYPTO=y
-CONFIG_CHELSIO_IPSEC_INLINE=m
-CONFIG_CHELSIO_TLS_DEVICE=m
-CONFIG_NET_VENDOR_CISCO=y
-CONFIG_ENIC=m
-CONFIG_NET_VENDOR_CORTINA=y
-CONFIG_GEMINI_ETHERNET=m
-CONFIG_CX_ECAT=m
-CONFIG_DNET=m
-CONFIG_NET_VENDOR_DEC=y
-CONFIG_NET_TULIP=y
-CONFIG_DE2104X=m
-CONFIG_DE2104X_DSL=0
-CONFIG_TULIP=m
-CONFIG_TULIP_MWI=y
-CONFIG_TULIP_MMIO=y
-CONFIG_TULIP_NAPI=y
-CONFIG_TULIP_NAPI_HW_MITIGATION=y
-CONFIG_DE4X5=m
-CONFIG_WINBOND_840=m
-CONFIG_DM9102=m
-CONFIG_ULI526X=m
-CONFIG_PCMCIA_XIRCOM=m
-CONFIG_NET_VENDOR_DLINK=y
-CONFIG_DL2K=m
-CONFIG_SUNDANCE=m
-# CONFIG_SUNDANCE_MMIO is not set
-CONFIG_NET_VENDOR_EMULEX=y
-CONFIG_BE2NET=m
-CONFIG_BE2NET_HWMON=y
-CONFIG_BE2NET_BE2=y
-CONFIG_BE2NET_BE3=y
-CONFIG_BE2NET_LANCER=y
-CONFIG_BE2NET_SKYHAWK=y
-CONFIG_NET_VENDOR_EZCHIP=y
-CONFIG_EZCHIP_NPS_MANAGEMENT_ENET=m
-CONFIG_NET_VENDOR_FUJITSU=y
-CONFIG_PCMCIA_FMVJ18X=m
-CONFIG_NET_VENDOR_GOOGLE=y
-CONFIG_GVE=m
-CONFIG_NET_VENDOR_HUAWEI=y
-CONFIG_HINIC=m
-CONFIG_NET_VENDOR_I825XX=y
-CONFIG_NET_VENDOR_INTEL=y
-CONFIG_E100=m
-CONFIG_E1000=m
-CONFIG_E1000E=m
-CONFIG_E1000E_HWTS=y
-CONFIG_IGB=m
-CONFIG_IGB_HWMON=y
-CONFIG_IGB_DCA=y
-CONFIG_IGBVF=m
-CONFIG_IXGB=m
-CONFIG_IXGBE=m
-CONFIG_IXGBE_HWMON=y
-CONFIG_IXGBE_DCA=y
-CONFIG_IXGBE_DCB=y
-# CONFIG_IXGBE_IPSEC is not set
-CONFIG_IXGBEVF=m
-CONFIG_IXGBEVF_IPSEC=y
-CONFIG_I40E=m
-CONFIG_I40E_DCB=y
-CONFIG_IAVF=m
-CONFIG_I40EVF=m
-CONFIG_ICE=m
-CONFIG_FM10K=m
-CONFIG_IGC=m
-CONFIG_JME=m
-CONFIG_NET_VENDOR_MARVELL=y
-CONFIG_MVMDIO=m
-CONFIG_SKGE=m
-# CONFIG_SKGE_DEBUG is not set
-CONFIG_SKGE_GENESIS=y
-CONFIG_SKY2=m
-# CONFIG_SKY2_DEBUG is not set
-# CONFIG_PRESTERA is not set
-CONFIG_NET_VENDOR_MELLANOX=y
-CONFIG_MLX4_EN=m
-CONFIG_MLX4_EN_DCB=y
-CONFIG_MLX4_CORE=m
-CONFIG_MLX4_DEBUG=y
-CONFIG_MLX4_CORE_GEN2=y
-CONFIG_MLX5_CORE=m
-CONFIG_MLX5_ACCEL=y
-CONFIG_MLX5_FPGA=y
-CONFIG_MLX5_CORE_EN=y
-CONFIG_MLX5_EN_ARFS=y
-CONFIG_MLX5_EN_RXNFC=y
-CONFIG_MLX5_MPFS=y
-CONFIG_MLX5_ESWITCH=y
-CONFIG_MLX5_CLS_ACT=y
-CONFIG_MLX5_TC_CT=y
-CONFIG_MLX5_CORE_EN_DCB=y
-CONFIG_MLX5_CORE_IPOIB=y
-CONFIG_MLX5_FPGA_IPSEC=y
-# CONFIG_MLX5_IPSEC is not set
-CONFIG_MLX5_EN_IPSEC=y
-CONFIG_MLX5_FPGA_TLS=y
-CONFIG_MLX5_TLS=y
-CONFIG_MLX5_EN_TLS=y
-CONFIG_MLX5_SW_STEERING=y
-CONFIG_MLXSW_CORE=m
-CONFIG_MLXSW_CORE_HWMON=y
-CONFIG_MLXSW_CORE_THERMAL=y
-CONFIG_MLXSW_PCI=m
-CONFIG_MLXSW_I2C=m
-CONFIG_MLXSW_SWITCHIB=m
-CONFIG_MLXSW_SWITCHX2=m
-CONFIG_MLXSW_SPECTRUM=m
-CONFIG_MLXSW_SPECTRUM_DCB=y
-CONFIG_MLXSW_MINIMAL=m
-CONFIG_MLXFW=m
-CONFIG_NET_VENDOR_MICREL=y
-CONFIG_KS8842=m
-CONFIG_KS8851=m
-CONFIG_KS8851_MLL=m
-CONFIG_KSZ884X_PCI=m
-CONFIG_NET_VENDOR_MICROCHIP=y
-CONFIG_ENC28J60=m
-# CONFIG_ENC28J60_WRITEVERIFY is not set
-CONFIG_ENCX24J600=m
-CONFIG_LAN743X=m
-CONFIG_NET_VENDOR_MICROSEMI=y
-CONFIG_MSCC_OCELOT_SWITCH_LIB=m
-CONFIG_MSCC_OCELOT_SWITCH=m
-CONFIG_NET_VENDOR_MYRI=y
-CONFIG_MYRI10GE=m
-CONFIG_MYRI10GE_DCA=y
-CONFIG_FEALNX=m
-CONFIG_NET_VENDOR_NATSEMI=y
-CONFIG_NATSEMI=m
-CONFIG_NS83820=m
-CONFIG_NET_VENDOR_NETERION=y
-CONFIG_S2IO=m
-CONFIG_VXGE=m
-# CONFIG_VXGE_DEBUG_TRACE_ALL is not set
-CONFIG_NET_VENDOR_NETRONOME=y
-CONFIG_NFP=m
-CONFIG_NFP_APP_FLOWER=y
-CONFIG_NFP_APP_ABM_NIC=y
-# CONFIG_NFP_DEBUG is not set
-CONFIG_NET_VENDOR_NI=y
-CONFIG_NI_XGE_MANAGEMENT_ENET=m
-CONFIG_NET_VENDOR_8390=y
-CONFIG_PCMCIA_AXNET=m
-CONFIG_NE2K_PCI=m
-CONFIG_PCMCIA_PCNET=m
-CONFIG_NET_VENDOR_NVIDIA=y
-CONFIG_FORCEDETH=m
-CONFIG_NET_VENDOR_OKI=y
-CONFIG_ETHOC=m
-CONFIG_NET_VENDOR_PACKET_ENGINES=y
-CONFIG_HAMACHI=m
-CONFIG_YELLOWFIN=m
-CONFIG_NET_VENDOR_PENSANDO=y
-CONFIG_IONIC=m
-CONFIG_NET_VENDOR_QLOGIC=y
-CONFIG_QLA3XXX=m
-CONFIG_QLCNIC=m
-CONFIG_QLCNIC_SRIOV=y
-CONFIG_QLCNIC_DCB=y
-CONFIG_QLCNIC_HWMON=y
-CONFIG_NETXEN_NIC=m
-CONFIG_QED=m
-CONFIG_QED_LL2=y
-CONFIG_QED_SRIOV=y
-CONFIG_QEDE=m
-CONFIG_QED_RDMA=y
-CONFIG_QED_ISCSI=y
-CONFIG_QED_FCOE=y
-CONFIG_QED_OOO=y
-CONFIG_NET_VENDOR_QUALCOMM=y
-CONFIG_QCA7000=m
-CONFIG_QCA7000_SPI=m
-CONFIG_QCA7000_UART=m
-CONFIG_QCOM_EMAC=m
-CONFIG_RMNET=m
-CONFIG_NET_VENDOR_RDC=y
-CONFIG_R6040=m
-CONFIG_NET_VENDOR_REALTEK=y
-CONFIG_ATP=m
-CONFIG_8139CP=m
-CONFIG_8139TOO=m
-# CONFIG_8139TOO_PIO is not set
-CONFIG_8139TOO_TUNE_TWISTER=y
-CONFIG_8139TOO_8129=y
-# CONFIG_8139_OLD_RX_RESET is not set
-CONFIG_R8169=m
-CONFIG_NET_VENDOR_RENESAS=y
-CONFIG_NET_VENDOR_ROCKER=y
-CONFIG_ROCKER=m
-CONFIG_NET_VENDOR_SAMSUNG=y
-CONFIG_SXGBE_ETH=m
-CONFIG_NET_VENDOR_SEEQ=y
-CONFIG_NET_VENDOR_SOLARFLARE=y
-CONFIG_SFC=m
-CONFIG_SFC_MTD=y
-CONFIG_SFC_MCDI_MON=y
-CONFIG_SFC_SRIOV=y
-CONFIG_SFC_MCDI_LOGGING=y
-CONFIG_SFC_FALCON=m
-CONFIG_SFC_FALCON_MTD=y
-CONFIG_NET_VENDOR_SILAN=y
-CONFIG_SC92031=m
-CONFIG_NET_VENDOR_SIS=y
-CONFIG_SIS900=m
-CONFIG_SIS190=m
-CONFIG_NET_VENDOR_SMSC=y
-CONFIG_PCMCIA_SMC91C92=m
-CONFIG_EPIC100=m
-CONFIG_SMSC911X=m
-CONFIG_SMSC9420=m
-CONFIG_NET_VENDOR_SOCIONEXT=y
-CONFIG_NET_VENDOR_STMICRO=y
-CONFIG_STMMAC_ETH=m
-# CONFIG_STMMAC_SELFTESTS is not set
-CONFIG_STMMAC_PLATFORM=m
-CONFIG_DWMAC_DWC_QOS_ETH=m
-CONFIG_DWMAC_GENERIC=m
-# CONFIG_DWMAC_INTEL_PLAT is not set
-CONFIG_DWMAC_INTEL=m
-CONFIG_STMMAC_PCI=m
-CONFIG_NET_VENDOR_SUN=y
-CONFIG_HAPPYMEAL=m
-CONFIG_SUNGEM=m
-CONFIG_CASSINI=m
-CONFIG_NIU=m
-CONFIG_NET_VENDOR_SYNOPSYS=y
-CONFIG_DWC_XLGMAC=m
-CONFIG_DWC_XLGMAC_PCI=m
-CONFIG_NET_VENDOR_TEHUTI=y
-CONFIG_TEHUTI=m
-CONFIG_NET_VENDOR_TI=y
-# CONFIG_TI_CPSW_PHY_SEL is not set
-CONFIG_TLAN=m
-CONFIG_NET_VENDOR_VIA=y
-CONFIG_VIA_RHINE=m
-CONFIG_VIA_RHINE_MMIO=y
-CONFIG_VIA_VELOCITY=m
-CONFIG_NET_VENDOR_WIZNET=y
-CONFIG_WIZNET_W5100=m
-CONFIG_WIZNET_W5300=m
-# CONFIG_WIZNET_BUS_DIRECT is not set
-# CONFIG_WIZNET_BUS_INDIRECT is not set
-CONFIG_WIZNET_BUS_ANY=y
-CONFIG_WIZNET_W5100_SPI=m
-CONFIG_NET_VENDOR_XILINX=y
-CONFIG_XILINX_AXI_EMAC=m
-CONFIG_XILINX_LL_TEMAC=m
-CONFIG_NET_VENDOR_XIRCOM=y
-CONFIG_PCMCIA_XIRC2PS=m
-CONFIG_FDDI=m
-CONFIG_DEFXX=m
-CONFIG_DEFXX_MMIO=y
-CONFIG_SKFP=m
-# CONFIG_HIPPI is not set
-CONFIG_NET_SB1000=m
-CONFIG_PHYLINK=m
-CONFIG_PHYLIB=m
-CONFIG_SWPHY=y
-CONFIG_LED_TRIGGER_PHY=y
-CONFIG_FIXED_PHY=m
-CONFIG_SFP=m
-
-#
-# MII PHY device drivers
-#
-CONFIG_AMD_PHY=m
-CONFIG_ADIN_PHY=m
-CONFIG_AQUANTIA_PHY=m
-CONFIG_AX88796B_PHY=m
-CONFIG_BROADCOM_PHY=m
-CONFIG_BCM54140_PHY=m
-CONFIG_BCM7XXX_PHY=m
-CONFIG_BCM84881_PHY=m
-CONFIG_BCM87XX_PHY=m
-CONFIG_BCM_NET_PHYLIB=m
-CONFIG_CICADA_PHY=m
-CONFIG_CORTINA_PHY=m
-CONFIG_DAVICOM_PHY=m
-CONFIG_ICPLUS_PHY=m
-CONFIG_LXT_PHY=m
-CONFIG_INTEL_XWAY_PHY=m
-CONFIG_LSI_ET1011C_PHY=m
-CONFIG_MARVELL_PHY=m
-CONFIG_MARVELL_10G_PHY=m
-CONFIG_MICREL_PHY=m
-CONFIG_MICROCHIP_PHY=m
-CONFIG_MICROCHIP_T1_PHY=m
-CONFIG_MICROSEMI_PHY=m
-CONFIG_NATIONAL_PHY=m
-CONFIG_NXP_TJA11XX_PHY=m
-CONFIG_AT803X_PHY=m
-CONFIG_QSEMI_PHY=m
-CONFIG_REALTEK_PHY=m
-CONFIG_RENESAS_PHY=m
-CONFIG_ROCKCHIP_PHY=m
-CONFIG_SMSC_PHY=m
-CONFIG_STE10XP=m
-CONFIG_TERANETICS_PHY=m
-CONFIG_DP83822_PHY=m
-CONFIG_DP83TC811_PHY=m
-CONFIG_DP83848_PHY=m
-CONFIG_DP83867_PHY=m
-CONFIG_DP83869_PHY=m
-CONFIG_VITESSE_PHY=m
-CONFIG_XILINX_GMII2RGMII=m
-CONFIG_MICREL_KS8995MA=m
-CONFIG_MDIO_DEVICE=m
-CONFIG_MDIO_BUS=m
-CONFIG_OF_MDIO=m
-CONFIG_MDIO_DEVRES=m
-CONFIG_MDIO_BITBANG=m
-CONFIG_MDIO_BCM_UNIMAC=m
-CONFIG_MDIO_CAVIUM=m
-CONFIG_MDIO_GPIO=m
-CONFIG_MDIO_HISI_FEMAC=m
-CONFIG_MDIO_I2C=m
-CONFIG_MDIO_MVUSB=m
-CONFIG_MDIO_MSCC_MIIM=m
-CONFIG_MDIO_OCTEON=m
-CONFIG_MDIO_IPQ4019=m
-CONFIG_MDIO_IPQ8064=m
-CONFIG_MDIO_THUNDER=m
-
-#
-# MDIO Multiplexers
-#
-CONFIG_MDIO_BUS_MUX=m
-CONFIG_MDIO_BUS_MUX_GPIO=m
-CONFIG_MDIO_BUS_MUX_MULTIPLEXER=m
-CONFIG_MDIO_BUS_MUX_MMIOREG=m
-
-#
-# PCS device drivers
-#
-CONFIG_PCS_XPCS=m
-# end of PCS device drivers
-
-CONFIG_PLIP=m
-CONFIG_PPP=m
-CONFIG_PPP_BSDCOMP=m
-CONFIG_PPP_DEFLATE=m
-CONFIG_PPP_FILTER=y
-CONFIG_PPP_MPPE=m
-CONFIG_PPP_MULTILINK=y
-CONFIG_PPPOATM=m
-CONFIG_PPPOE=m
-CONFIG_PPTP=m
-CONFIG_PPPOL2TP=m
-CONFIG_PPP_ASYNC=m
-CONFIG_PPP_SYNC_TTY=m
-CONFIG_SLIP=m
-CONFIG_SLHC=m
-CONFIG_SLIP_COMPRESSED=y
-CONFIG_SLIP_SMART=y
-CONFIG_SLIP_MODE_SLIP6=y
-CONFIG_USB_NET_DRIVERS=m
-CONFIG_USB_CATC=m
-CONFIG_USB_KAWETH=m
-CONFIG_USB_PEGASUS=m
-CONFIG_USB_RTL8150=m
-CONFIG_USB_RTL8152=m
-CONFIG_USB_LAN78XX=m
-CONFIG_USB_USBNET=m
-CONFIG_USB_NET_AX8817X=m
-CONFIG_USB_NET_AX88179_178A=m
-CONFIG_USB_NET_CDCETHER=m
-CONFIG_USB_NET_CDC_EEM=m
-CONFIG_USB_NET_CDC_NCM=m
-CONFIG_USB_NET_HUAWEI_CDC_NCM=m
-CONFIG_USB_NET_CDC_MBIM=m
-CONFIG_USB_NET_DM9601=m
-CONFIG_USB_NET_SR9700=m
-CONFIG_USB_NET_SR9800=m
-CONFIG_USB_NET_SMSC75XX=m
-CONFIG_USB_NET_SMSC95XX=m
-CONFIG_USB_NET_GL620A=m
-CONFIG_USB_NET_NET1080=m
-CONFIG_USB_NET_PLUSB=m
-CONFIG_USB_NET_MCS7830=m
-CONFIG_USB_NET_RNDIS_HOST=m
-CONFIG_USB_NET_CDC_SUBSET_ENABLE=m
-CONFIG_USB_NET_CDC_SUBSET=m
-CONFIG_USB_ALI_M5632=y
-CONFIG_USB_AN2720=y
-CONFIG_USB_BELKIN=y
-CONFIG_USB_ARMLINUX=y
-CONFIG_USB_EPSON2888=y
-CONFIG_USB_KC2190=y
-CONFIG_USB_NET_ZAURUS=m
-CONFIG_USB_NET_CX82310_ETH=m
-CONFIG_USB_NET_KALMIA=m
-CONFIG_USB_NET_QMI_WWAN=m
-CONFIG_USB_HSO=m
-CONFIG_USB_NET_INT51X1=m
-CONFIG_USB_CDC_PHONET=m
-CONFIG_USB_IPHETH=m
-CONFIG_USB_SIERRA_NET=m
-CONFIG_USB_VL600=m
-CONFIG_USB_NET_CH9200=m
-CONFIG_USB_NET_AQC111=m
-CONFIG_WLAN=y
-# CONFIG_WIRELESS_WDS is not set
-CONFIG_WLAN_VENDOR_ADMTEK=y
-CONFIG_ADM8211=m
-CONFIG_ATH_COMMON=m
-CONFIG_WLAN_VENDOR_ATH=y
-# CONFIG_ATH_DEBUG is not set
-CONFIG_ATH5K=m
-CONFIG_ATH5K_DEBUG=y
-CONFIG_ATH5K_TRACER=y
-CONFIG_ATH5K_PCI=y
-CONFIG_ATH9K_HW=m
-CONFIG_ATH9K_COMMON=m
-CONFIG_ATH9K_COMMON_DEBUG=y
-CONFIG_ATH9K_BTCOEX_SUPPORT=y
-CONFIG_ATH9K=m
-CONFIG_ATH9K_PCI=y
-CONFIG_ATH9K_AHB=y
-CONFIG_ATH9K_DEBUGFS=y
-CONFIG_ATH9K_STATION_STATISTICS=y
-CONFIG_ATH9K_DYNACK=y
-CONFIG_ATH9K_WOW=y
-CONFIG_ATH9K_RFKILL=y
-CONFIG_ATH9K_CHANNEL_CONTEXT=y
-CONFIG_ATH9K_PCOEM=y
-CONFIG_ATH9K_PCI_NO_EEPROM=m
-CONFIG_ATH9K_HTC=m
-CONFIG_ATH9K_HTC_DEBUGFS=y
-CONFIG_ATH9K_HWRNG=y
-CONFIG_ATH9K_COMMON_SPECTRAL=y
-CONFIG_CARL9170=m
-CONFIG_CARL9170_LEDS=y
-CONFIG_CARL9170_DEBUGFS=y
-CONFIG_CARL9170_WPC=y
-# CONFIG_CARL9170_HWRNG is not set
-CONFIG_ATH6KL=m
-CONFIG_ATH6KL_SDIO=m
-CONFIG_ATH6KL_USB=m
-CONFIG_ATH6KL_DEBUG=y
-CONFIG_ATH6KL_TRACING=y
-CONFIG_AR5523=m
-CONFIG_WIL6210=m
-CONFIG_WIL6210_ISR_COR=y
-CONFIG_WIL6210_TRACING=y
-CONFIG_WIL6210_DEBUGFS=y
-CONFIG_ATH10K=m
-CONFIG_ATH10K_CE=y
-CONFIG_ATH10K_PCI=m
-CONFIG_ATH10K_AHB=y
-CONFIG_ATH10K_SDIO=m
-CONFIG_ATH10K_USB=m
-CONFIG_ATH10K_DEBUG=y
-CONFIG_ATH10K_DEBUGFS=y
-CONFIG_ATH10K_SPECTRAL=y
-CONFIG_ATH10K_TRACING=y
-CONFIG_WCN36XX=m
-CONFIG_WCN36XX_DEBUGFS=y
-# CONFIG_ATH11K is not set
-CONFIG_WLAN_VENDOR_ATMEL=y
-CONFIG_ATMEL=m
-CONFIG_PCI_ATMEL=m
-CONFIG_PCMCIA_ATMEL=m
-CONFIG_AT76C50X_USB=m
-CONFIG_WLAN_VENDOR_BROADCOM=y
-CONFIG_B43=m
-CONFIG_B43_BCMA=y
-CONFIG_B43_SSB=y
-CONFIG_B43_BUSES_BCMA_AND_SSB=y
-# CONFIG_B43_BUSES_BCMA is not set
-# CONFIG_B43_BUSES_SSB is not set
-CONFIG_B43_PCI_AUTOSELECT=y
-CONFIG_B43_PCICORE_AUTOSELECT=y
-CONFIG_B43_SDIO=y
-CONFIG_B43_BCMA_PIO=y
-CONFIG_B43_PIO=y
-CONFIG_B43_PHY_G=y
-CONFIG_B43_PHY_N=y
-CONFIG_B43_PHY_LP=y
-CONFIG_B43_PHY_HT=y
-CONFIG_B43_LEDS=y
-CONFIG_B43_HWRNG=y
-# CONFIG_B43_DEBUG is not set
-CONFIG_B43LEGACY=m
-CONFIG_B43LEGACY_PCI_AUTOSELECT=y
-CONFIG_B43LEGACY_PCICORE_AUTOSELECT=y
-CONFIG_B43LEGACY_LEDS=y
-CONFIG_B43LEGACY_HWRNG=y
-CONFIG_B43LEGACY_DEBUG=y
-CONFIG_B43LEGACY_DMA=y
-CONFIG_B43LEGACY_PIO=y
-CONFIG_B43LEGACY_DMA_AND_PIO_MODE=y
-# CONFIG_B43LEGACY_DMA_MODE is not set
-# CONFIG_B43LEGACY_PIO_MODE is not set
-CONFIG_BRCMUTIL=m
-CONFIG_BRCMSMAC=m
-CONFIG_BRCMFMAC=m
-CONFIG_BRCMFMAC_PROTO_BCDC=y
-CONFIG_BRCMFMAC_PROTO_MSGBUF=y
-CONFIG_BRCMFMAC_SDIO=y
-CONFIG_BRCMFMAC_USB=y
-CONFIG_BRCMFMAC_PCIE=y
-CONFIG_BRCM_TRACING=y
-CONFIG_BRCMDBG=y
-CONFIG_WLAN_VENDOR_CISCO=y
-CONFIG_AIRO=m
-CONFIG_AIRO_CS=m
-CONFIG_WLAN_VENDOR_INTEL=y
-CONFIG_IPW2100=m
-CONFIG_IPW2100_MONITOR=y
-# CONFIG_IPW2100_DEBUG is not set
-CONFIG_IPW2200=m
-CONFIG_IPW2200_MONITOR=y
-CONFIG_IPW2200_RADIOTAP=y
-CONFIG_IPW2200_PROMISCUOUS=y
-CONFIG_IPW2200_QOS=y
-# CONFIG_IPW2200_DEBUG is not set
-CONFIG_LIBIPW=m
-# CONFIG_LIBIPW_DEBUG is not set
-CONFIG_IWLEGACY=m
-CONFIG_IWL4965=m
-CONFIG_IWL3945=m
-
-#
-# iwl3945 / iwl4965 Debugging Options
-#
-CONFIG_IWLEGACY_DEBUG=y
-CONFIG_IWLEGACY_DEBUGFS=y
-# end of iwl3945 / iwl4965 Debugging Options
-
-CONFIG_IWLWIFI=m
-CONFIG_IWLWIFI_LEDS=y
-CONFIG_IWLDVM=m
-CONFIG_IWLMVM=m
-CONFIG_IWLWIFI_OPMODE_MODULAR=y
-# CONFIG_IWLWIFI_BCAST_FILTERING is not set
-
-#
-# Debugging Options
-#
-CONFIG_IWLWIFI_DEBUG=y
-CONFIG_IWLWIFI_DEBUGFS=y
-CONFIG_IWLWIFI_DEVICE_TRACING=y
-# end of Debugging Options
-
-CONFIG_WLAN_VENDOR_INTERSIL=y
-CONFIG_HOSTAP=m
-CONFIG_HOSTAP_FIRMWARE=y
-CONFIG_HOSTAP_FIRMWARE_NVRAM=y
-CONFIG_HOSTAP_PLX=m
-CONFIG_HOSTAP_PCI=m
-CONFIG_HOSTAP_CS=m
-CONFIG_HERMES=m
-CONFIG_HERMES_PRISM=y
-CONFIG_HERMES_CACHE_FW_ON_INIT=y
-CONFIG_PLX_HERMES=m
-CONFIG_TMD_HERMES=m
-CONFIG_NORTEL_HERMES=m
-CONFIG_PCI_HERMES=m
-CONFIG_PCMCIA_HERMES=m
-CONFIG_PCMCIA_SPECTRUM=m
-CONFIG_ORINOCO_USB=m
-CONFIG_P54_COMMON=m
-CONFIG_P54_USB=m
-CONFIG_P54_PCI=m
-CONFIG_P54_SPI=m
-# CONFIG_P54_SPI_DEFAULT_EEPROM is not set
-CONFIG_P54_LEDS=y
-CONFIG_PRISM54=m
-CONFIG_WLAN_VENDOR_MARVELL=y
-CONFIG_LIBERTAS=m
-CONFIG_LIBERTAS_USB=m
-CONFIG_LIBERTAS_CS=m
-CONFIG_LIBERTAS_SDIO=m
-CONFIG_LIBERTAS_SPI=m
-# CONFIG_LIBERTAS_DEBUG is not set
-CONFIG_LIBERTAS_MESH=y
-CONFIG_LIBERTAS_THINFIRM=m
-# CONFIG_LIBERTAS_THINFIRM_DEBUG is not set
-CONFIG_LIBERTAS_THINFIRM_USB=m
-CONFIG_MWIFIEX=m
-CONFIG_MWIFIEX_SDIO=m
-CONFIG_MWIFIEX_PCIE=m
-CONFIG_MWIFIEX_USB=m
-CONFIG_MWL8K=m
-CONFIG_WLAN_VENDOR_MEDIATEK=y
-CONFIG_MT7601U=m
-CONFIG_MT76_CORE=m
-CONFIG_MT76_LEDS=y
-CONFIG_MT76_USB=m
-CONFIG_MT76x02_LIB=m
-CONFIG_MT76x02_USB=m
-CONFIG_MT76x0_COMMON=m
-CONFIG_MT76x0U=m
-CONFIG_MT76x0E=m
-CONFIG_MT76x2_COMMON=m
-CONFIG_MT76x2E=m
-CONFIG_MT76x2U=m
-CONFIG_MT7603E=m
-CONFIG_MT7615_COMMON=m
-CONFIG_MT7615E=m
-CONFIG_MT7663_USB_SDIO_COMMON=m
-CONFIG_MT7663U=m
-# CONFIG_MT7663S is not set
-CONFIG_MT7915E=m
-CONFIG_WLAN_VENDOR_MICROCHIP=y
-CONFIG_WILC1000=m
-CONFIG_WILC1000_SDIO=m
-CONFIG_WILC1000_SPI=m
-# CONFIG_WILC1000_HW_OOB_INTR is not set
-CONFIG_WLAN_VENDOR_RALINK=y
-CONFIG_RT2X00=m
-CONFIG_RT2400PCI=m
-CONFIG_RT2500PCI=m
-CONFIG_RT61PCI=m
-CONFIG_RT2800PCI=m
-CONFIG_RT2800PCI_RT33XX=y
-CONFIG_RT2800PCI_RT35XX=y
-CONFIG_RT2800PCI_RT53XX=y
-CONFIG_RT2800PCI_RT3290=y
-CONFIG_RT2500USB=m
-CONFIG_RT73USB=m
-CONFIG_RT2800USB=m
-CONFIG_RT2800USB_RT33XX=y
-CONFIG_RT2800USB_RT35XX=y
-CONFIG_RT2800USB_RT3573=y
-CONFIG_RT2800USB_RT53XX=y
-CONFIG_RT2800USB_RT55XX=y
-CONFIG_RT2800USB_UNKNOWN=y
-CONFIG_RT2800_LIB=m
-CONFIG_RT2800_LIB_MMIO=m
-CONFIG_RT2X00_LIB_MMIO=m
-CONFIG_RT2X00_LIB_PCI=m
-CONFIG_RT2X00_LIB_USB=m
-CONFIG_RT2X00_LIB=m
-CONFIG_RT2X00_LIB_FIRMWARE=y
-CONFIG_RT2X00_LIB_CRYPTO=y
-CONFIG_RT2X00_LIB_LEDS=y
-CONFIG_RT2X00_LIB_DEBUGFS=y
-# CONFIG_RT2X00_DEBUG is not set
-CONFIG_WLAN_VENDOR_REALTEK=y
-CONFIG_RTL8180=m
-CONFIG_RTL8187=m
-CONFIG_RTL8187_LEDS=y
-CONFIG_RTL_CARDS=m
-CONFIG_RTL8192CE=m
-CONFIG_RTL8192SE=m
-CONFIG_RTL8192DE=m
-CONFIG_RTL8723AE=m
-CONFIG_RTL8723BE=m
-CONFIG_RTL8188EE=m
-CONFIG_RTL8192EE=m
-CONFIG_RTL8821AE=m
-CONFIG_RTL8192CU=m
-CONFIG_RTLWIFI=m
-CONFIG_RTLWIFI_PCI=m
-CONFIG_RTLWIFI_USB=m
-CONFIG_RTLWIFI_DEBUG=y
-CONFIG_RTL8192C_COMMON=m
-CONFIG_RTL8723_COMMON=m
-CONFIG_RTLBTCOEXIST=m
-CONFIG_RTL8XXXU=m
-CONFIG_RTL8XXXU_UNTESTED=y
-CONFIG_RTW88=m
-CONFIG_RTW88_CORE=m
-CONFIG_RTW88_PCI=m
-CONFIG_RTW88_8822B=m
-CONFIG_RTW88_8822C=m
-CONFIG_RTW88_8723D=m
-CONFIG_RTW88_8821C=m
-CONFIG_RTW88_8822BE=m
-CONFIG_RTW88_8822CE=m
-CONFIG_RTW88_8723DE=m
-CONFIG_RTW88_8821CE=m
-CONFIG_RTW88_DEBUG=y
-CONFIG_RTW88_DEBUGFS=y
-CONFIG_WLAN_VENDOR_RSI=y
-CONFIG_RSI_91X=m
-CONFIG_RSI_DEBUGFS=y
-CONFIG_RSI_SDIO=m
-CONFIG_RSI_USB=m
-CONFIG_RSI_COEX=y
-CONFIG_WLAN_VENDOR_ST=y
-CONFIG_CW1200=m
-CONFIG_CW1200_WLAN_SDIO=m
-CONFIG_CW1200_WLAN_SPI=m
-CONFIG_WLAN_VENDOR_TI=y
-CONFIG_WL1251=m
-CONFIG_WL1251_SPI=m
-CONFIG_WL1251_SDIO=m
-CONFIG_WL12XX=m
-CONFIG_WL18XX=m
-CONFIG_WLCORE=m
-CONFIG_WLCORE_SPI=m
-CONFIG_WLCORE_SDIO=m
-CONFIG_WILINK_PLATFORM_DATA=y
-CONFIG_WLAN_VENDOR_ZYDAS=y
-CONFIG_USB_ZD1201=m
-CONFIG_ZD1211RW=m
-# CONFIG_ZD1211RW_DEBUG is not set
-CONFIG_WLAN_VENDOR_QUANTENNA=y
-CONFIG_QTNFMAC=m
-CONFIG_QTNFMAC_PCIE=m
-CONFIG_PCMCIA_RAYCS=m
-CONFIG_PCMCIA_WL3501=m
-CONFIG_MAC80211_HWSIM=m
-CONFIG_USB_NET_RNDIS_WLAN=m
-CONFIG_VIRT_WIFI=m
-
-#
-# WiMAX Wireless Broadband devices
-#
-CONFIG_WIMAX_I2400M=m
-CONFIG_WIMAX_I2400M_USB=m
-CONFIG_WIMAX_I2400M_DEBUG_LEVEL=8
-# end of WiMAX Wireless Broadband devices
-
-# CONFIG_WAN is not set
-CONFIG_IEEE802154_DRIVERS=m
-CONFIG_IEEE802154_FAKELB=m
-CONFIG_IEEE802154_AT86RF230=m
-# CONFIG_IEEE802154_AT86RF230_DEBUGFS is not set
-CONFIG_IEEE802154_MRF24J40=m
-CONFIG_IEEE802154_CC2520=m
-CONFIG_IEEE802154_ATUSB=m
-CONFIG_IEEE802154_ADF7242=m
-CONFIG_IEEE802154_CA8210=m
-# CONFIG_IEEE802154_CA8210_DEBUGFS is not set
-CONFIG_IEEE802154_MCR20A=m
-CONFIG_IEEE802154_HWSIM=m
-CONFIG_XEN_NETDEV_FRONTEND=m
-CONFIG_XEN_NETDEV_BACKEND=m
-CONFIG_VMXNET3=m
-CONFIG_FUJITSU_ES=m
-CONFIG_USB4_NET=m
-CONFIG_HYPERV_NET=m
-CONFIG_NETDEVSIM=m
-CONFIG_NET_FAILOVER=m
-CONFIG_ISDN=y
-CONFIG_ISDN_CAPI=y
-CONFIG_CAPI_TRACE=y
-CONFIG_ISDN_CAPI_MIDDLEWARE=y
-CONFIG_MISDN=m
-CONFIG_MISDN_DSP=m
-CONFIG_MISDN_L1OIP=m
-
-#
-# mISDN hardware drivers
-#
-CONFIG_MISDN_HFCPCI=m
-CONFIG_MISDN_HFCMULTI=m
-CONFIG_MISDN_HFCUSB=m
-CONFIG_MISDN_AVMFRITZ=m
-CONFIG_MISDN_SPEEDFAX=m
-CONFIG_MISDN_INFINEON=m
-CONFIG_MISDN_W6692=m
-CONFIG_MISDN_NETJET=m
-CONFIG_MISDN_HDLC=m
-CONFIG_MISDN_IPAC=m
-CONFIG_MISDN_ISAR=m
-CONFIG_NVM=y
-CONFIG_NVM_PBLK=m
-# CONFIG_NVM_PBLK_DEBUG is not set
-
-#
-# Input device support
-#
-CONFIG_INPUT=y
-CONFIG_INPUT_LEDS=m
-CONFIG_INPUT_FF_MEMLESS=m
-CONFIG_INPUT_POLLDEV=m
-CONFIG_INPUT_SPARSEKMAP=m
-CONFIG_INPUT_MATRIXKMAP=m
-
-#
-# Userland interfaces
-#
-CONFIG_INPUT_MOUSEDEV=m
-CONFIG_INPUT_MOUSEDEV_PSAUX=y
-CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
-CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
-CONFIG_INPUT_JOYDEV=m
-CONFIG_INPUT_EVDEV=m
-# CONFIG_INPUT_EVBUG is not set
-
-#
-# Input Device Drivers
-#
-CONFIG_INPUT_KEYBOARD=y
-CONFIG_KEYBOARD_ADC=m
-CONFIG_KEYBOARD_ADP5520=m
-CONFIG_KEYBOARD_ADP5588=m
-CONFIG_KEYBOARD_ADP5589=m
-CONFIG_KEYBOARD_APPLESPI=m
-CONFIG_KEYBOARD_ATKBD=m
-CONFIG_KEYBOARD_QT1050=m
-CONFIG_KEYBOARD_QT1070=m
-CONFIG_KEYBOARD_QT2160=m
-CONFIG_KEYBOARD_DLINK_DIR685=m
-CONFIG_KEYBOARD_LKKBD=m
-CONFIG_KEYBOARD_GPIO=m
-CONFIG_KEYBOARD_GPIO_POLLED=m
-CONFIG_KEYBOARD_TCA6416=m
-CONFIG_KEYBOARD_TCA8418=m
-CONFIG_KEYBOARD_MATRIX=m
-CONFIG_KEYBOARD_LM8323=m
-CONFIG_KEYBOARD_LM8333=m
-CONFIG_KEYBOARD_MAX7359=m
-CONFIG_KEYBOARD_MCS=m
-CONFIG_KEYBOARD_MPR121=m
-CONFIG_KEYBOARD_NEWTON=m
-CONFIG_KEYBOARD_OPENCORES=m
-CONFIG_KEYBOARD_SAMSUNG=m
-CONFIG_KEYBOARD_STOWAWAY=m
-CONFIG_KEYBOARD_SUNKBD=m
-CONFIG_KEYBOARD_STMPE=m
-CONFIG_KEYBOARD_IQS62X=m
-CONFIG_KEYBOARD_OMAP4=m
-CONFIG_KEYBOARD_TC3589X=m
-CONFIG_KEYBOARD_TM2_TOUCHKEY=m
-CONFIG_KEYBOARD_TWL4030=m
-CONFIG_KEYBOARD_XTKBD=m
-CONFIG_KEYBOARD_CROS_EC=m
-CONFIG_KEYBOARD_CAP11XX=m
-CONFIG_KEYBOARD_BCM=m
-CONFIG_KEYBOARD_MTK_PMIC=m
-CONFIG_INPUT_MOUSE=y
-CONFIG_MOUSE_PS2=m
-CONFIG_MOUSE_PS2_ALPS=y
-CONFIG_MOUSE_PS2_BYD=y
-CONFIG_MOUSE_PS2_LOGIPS2PP=y
-CONFIG_MOUSE_PS2_SYNAPTICS=y
-CONFIG_MOUSE_PS2_SYNAPTICS_SMBUS=y
-CONFIG_MOUSE_PS2_CYPRESS=y
-CONFIG_MOUSE_PS2_LIFEBOOK=y
-CONFIG_MOUSE_PS2_TRACKPOINT=y
-CONFIG_MOUSE_PS2_ELANTECH=y
-CONFIG_MOUSE_PS2_ELANTECH_SMBUS=y
-CONFIG_MOUSE_PS2_SENTELIC=y
-CONFIG_MOUSE_PS2_TOUCHKIT=y
-CONFIG_MOUSE_PS2_FOCALTECH=y
-CONFIG_MOUSE_PS2_VMMOUSE=y
-CONFIG_MOUSE_PS2_SMBUS=y
-CONFIG_MOUSE_SERIAL=m
-CONFIG_MOUSE_APPLETOUCH=m
-CONFIG_MOUSE_BCM5974=m
-CONFIG_MOUSE_CYAPA=m
-CONFIG_MOUSE_ELAN_I2C=m
-CONFIG_MOUSE_ELAN_I2C_I2C=y
-CONFIG_MOUSE_ELAN_I2C_SMBUS=y
-CONFIG_MOUSE_VSXXXAA=m
-CONFIG_MOUSE_GPIO=m
-CONFIG_MOUSE_SYNAPTICS_I2C=m
-CONFIG_MOUSE_SYNAPTICS_USB=m
-CONFIG_INPUT_JOYSTICK=y
-CONFIG_JOYSTICK_ANALOG=m
-CONFIG_JOYSTICK_A3D=m
-# CONFIG_JOYSTICK_ADC is not set
-CONFIG_JOYSTICK_ADI=m
-CONFIG_JOYSTICK_COBRA=m
-CONFIG_JOYSTICK_GF2K=m
-CONFIG_JOYSTICK_GRIP=m
-CONFIG_JOYSTICK_GRIP_MP=m
-CONFIG_JOYSTICK_GUILLEMOT=m
-CONFIG_JOYSTICK_INTERACT=m
-CONFIG_JOYSTICK_SIDEWINDER=m
-CONFIG_JOYSTICK_TMDC=m
-CONFIG_JOYSTICK_IFORCE=m
-CONFIG_JOYSTICK_IFORCE_USB=m
-CONFIG_JOYSTICK_IFORCE_232=m
-CONFIG_JOYSTICK_WARRIOR=m
-CONFIG_JOYSTICK_MAGELLAN=m
-CONFIG_JOYSTICK_SPACEORB=m
-CONFIG_JOYSTICK_SPACEBALL=m
-CONFIG_JOYSTICK_STINGER=m
-CONFIG_JOYSTICK_TWIDJOY=m
-CONFIG_JOYSTICK_ZHENHUA=m
-CONFIG_JOYSTICK_DB9=m
-CONFIG_JOYSTICK_GAMECON=m
-CONFIG_JOYSTICK_TURBOGRAFX=m
-CONFIG_JOYSTICK_AS5011=m
-CONFIG_JOYSTICK_JOYDUMP=m
-CONFIG_JOYSTICK_XPAD=m
-CONFIG_JOYSTICK_XPAD_FF=y
-CONFIG_JOYSTICK_XPAD_LEDS=y
-CONFIG_JOYSTICK_WALKERA0701=m
-CONFIG_JOYSTICK_PSXPAD_SPI=m
-CONFIG_JOYSTICK_PSXPAD_SPI_FF=y
-CONFIG_JOYSTICK_PXRC=m
-CONFIG_JOYSTICK_FSIA6B=m
-CONFIG_INPUT_TABLET=y
-CONFIG_TABLET_USB_ACECAD=m
-CONFIG_TABLET_USB_AIPTEK=m
-CONFIG_TABLET_USB_GTCO=m
-CONFIG_TABLET_USB_HANWANG=m
-CONFIG_TABLET_USB_KBTAB=m
-CONFIG_TABLET_USB_PEGASUS=m
-CONFIG_TABLET_SERIAL_WACOM4=m
-CONFIG_INPUT_TOUCHSCREEN=y
-CONFIG_TOUCHSCREEN_PROPERTIES=y
-CONFIG_TOUCHSCREEN_88PM860X=m
-CONFIG_TOUCHSCREEN_ADS7846=m
-CONFIG_TOUCHSCREEN_AD7877=m
-CONFIG_TOUCHSCREEN_AD7879=m
-CONFIG_TOUCHSCREEN_AD7879_I2C=m
-CONFIG_TOUCHSCREEN_AD7879_SPI=m
-CONFIG_TOUCHSCREEN_ADC=m
-CONFIG_TOUCHSCREEN_AR1021_I2C=m
-CONFIG_TOUCHSCREEN_ATMEL_MXT=m
-CONFIG_TOUCHSCREEN_ATMEL_MXT_T37=y
-CONFIG_TOUCHSCREEN_AUO_PIXCIR=m
-CONFIG_TOUCHSCREEN_BU21013=m
-CONFIG_TOUCHSCREEN_BU21029=m
-CONFIG_TOUCHSCREEN_CHIPONE_ICN8318=m
-CONFIG_TOUCHSCREEN_CHIPONE_ICN8505=m
-CONFIG_TOUCHSCREEN_CY8CTMA140=m
-CONFIG_TOUCHSCREEN_CY8CTMG110=m
-CONFIG_TOUCHSCREEN_CYTTSP_CORE=m
-CONFIG_TOUCHSCREEN_CYTTSP_I2C=m
-CONFIG_TOUCHSCREEN_CYTTSP_SPI=m
-CONFIG_TOUCHSCREEN_CYTTSP4_CORE=m
-CONFIG_TOUCHSCREEN_CYTTSP4_I2C=m
-CONFIG_TOUCHSCREEN_CYTTSP4_SPI=m
-CONFIG_TOUCHSCREEN_DA9034=m
-CONFIG_TOUCHSCREEN_DA9052=m
-CONFIG_TOUCHSCREEN_DYNAPRO=m
-CONFIG_TOUCHSCREEN_HAMPSHIRE=m
-CONFIG_TOUCHSCREEN_EETI=m
-CONFIG_TOUCHSCREEN_EGALAX=m
-CONFIG_TOUCHSCREEN_EGALAX_SERIAL=m
-CONFIG_TOUCHSCREEN_EXC3000=m
-CONFIG_TOUCHSCREEN_FUJITSU=m
-CONFIG_TOUCHSCREEN_GOODIX=m
-CONFIG_TOUCHSCREEN_HIDEEP=m
-CONFIG_TOUCHSCREEN_ILI210X=m
-CONFIG_TOUCHSCREEN_S6SY761=m
-CONFIG_TOUCHSCREEN_GUNZE=m
-CONFIG_TOUCHSCREEN_EKTF2127=m
-CONFIG_TOUCHSCREEN_ELAN=m
-CONFIG_TOUCHSCREEN_ELO=m
-CONFIG_TOUCHSCREEN_WACOM_W8001=m
-CONFIG_TOUCHSCREEN_WACOM_I2C=m
-CONFIG_TOUCHSCREEN_MAX11801=m
-CONFIG_TOUCHSCREEN_MCS5000=m
-CONFIG_TOUCHSCREEN_MMS114=m
-CONFIG_TOUCHSCREEN_MELFAS_MIP4=m
-CONFIG_TOUCHSCREEN_MTOUCH=m
-CONFIG_TOUCHSCREEN_IMX6UL_TSC=m
-CONFIG_TOUCHSCREEN_INEXIO=m
-CONFIG_TOUCHSCREEN_MK712=m
-CONFIG_TOUCHSCREEN_PENMOUNT=m
-CONFIG_TOUCHSCREEN_EDT_FT5X06=m
-CONFIG_TOUCHSCREEN_TOUCHRIGHT=m
-CONFIG_TOUCHSCREEN_TOUCHWIN=m
-CONFIG_TOUCHSCREEN_TI_AM335X_TSC=m
-CONFIG_TOUCHSCREEN_UCB1400=m
-CONFIG_TOUCHSCREEN_PIXCIR=m
-CONFIG_TOUCHSCREEN_WDT87XX_I2C=m
-CONFIG_TOUCHSCREEN_WM831X=m
-CONFIG_TOUCHSCREEN_WM97XX=m
-CONFIG_TOUCHSCREEN_WM9705=y
-CONFIG_TOUCHSCREEN_WM9712=y
-CONFIG_TOUCHSCREEN_WM9713=y
-CONFIG_TOUCHSCREEN_USB_COMPOSITE=m
-CONFIG_TOUCHSCREEN_MC13783=m
-CONFIG_TOUCHSCREEN_USB_EGALAX=y
-CONFIG_TOUCHSCREEN_USB_PANJIT=y
-CONFIG_TOUCHSCREEN_USB_3M=y
-CONFIG_TOUCHSCREEN_USB_ITM=y
-CONFIG_TOUCHSCREEN_USB_ETURBO=y
-CONFIG_TOUCHSCREEN_USB_GUNZE=y
-CONFIG_TOUCHSCREEN_USB_DMC_TSC10=y
-CONFIG_TOUCHSCREEN_USB_IRTOUCH=y
-CONFIG_TOUCHSCREEN_USB_IDEALTEK=y
-CONFIG_TOUCHSCREEN_USB_GENERAL_TOUCH=y
-CONFIG_TOUCHSCREEN_USB_GOTOP=y
-CONFIG_TOUCHSCREEN_USB_JASTEC=y
-CONFIG_TOUCHSCREEN_USB_ELO=y
-CONFIG_TOUCHSCREEN_USB_E2I=y
-CONFIG_TOUCHSCREEN_USB_ZYTRONIC=y
-CONFIG_TOUCHSCREEN_USB_ETT_TC45USB=y
-CONFIG_TOUCHSCREEN_USB_NEXIO=y
-CONFIG_TOUCHSCREEN_USB_EASYTOUCH=y
-CONFIG_TOUCHSCREEN_TOUCHIT213=m
-CONFIG_TOUCHSCREEN_TSC_SERIO=m
-CONFIG_TOUCHSCREEN_TSC200X_CORE=m
-CONFIG_TOUCHSCREEN_TSC2004=m
-CONFIG_TOUCHSCREEN_TSC2005=m
-CONFIG_TOUCHSCREEN_TSC2007=m
-CONFIG_TOUCHSCREEN_TSC2007_IIO=y
-CONFIG_TOUCHSCREEN_PCAP=m
-CONFIG_TOUCHSCREEN_RM_TS=m
-CONFIG_TOUCHSCREEN_SILEAD=m
-CONFIG_TOUCHSCREEN_SIS_I2C=m
-CONFIG_TOUCHSCREEN_ST1232=m
-CONFIG_TOUCHSCREEN_STMFTS=m
-CONFIG_TOUCHSCREEN_STMPE=m
-CONFIG_TOUCHSCREEN_SUR40=m
-CONFIG_TOUCHSCREEN_SURFACE3_SPI=m
-CONFIG_TOUCHSCREEN_SX8654=m
-CONFIG_TOUCHSCREEN_TPS6507X=m
-CONFIG_TOUCHSCREEN_ZET6223=m
-CONFIG_TOUCHSCREEN_ZFORCE=m
-CONFIG_TOUCHSCREEN_COLIBRI_VF50=m
-CONFIG_TOUCHSCREEN_ROHM_BU21023=m
-CONFIG_TOUCHSCREEN_IQS5XX=m
-# CONFIG_TOUCHSCREEN_ZINITIX is not set
-CONFIG_INPUT_MISC=y
-CONFIG_INPUT_88PM860X_ONKEY=m
-CONFIG_INPUT_88PM80X_ONKEY=m
-CONFIG_INPUT_AD714X=m
-CONFIG_INPUT_AD714X_I2C=m
-CONFIG_INPUT_AD714X_SPI=m
-CONFIG_INPUT_ARIZONA_HAPTICS=m
-CONFIG_INPUT_ATMEL_CAPTOUCH=m
-CONFIG_INPUT_BMA150=m
-CONFIG_INPUT_E3X0_BUTTON=m
-CONFIG_INPUT_PCSPKR=m
-CONFIG_INPUT_MAX77650_ONKEY=m
-CONFIG_INPUT_MAX77693_HAPTIC=m
-CONFIG_INPUT_MAX8925_ONKEY=m
-CONFIG_INPUT_MAX8997_HAPTIC=m
-CONFIG_INPUT_MC13783_PWRBUTTON=m
-CONFIG_INPUT_MMA8450=m
-CONFIG_INPUT_APANEL=m
-CONFIG_INPUT_GPIO_BEEPER=m
-CONFIG_INPUT_GPIO_DECODER=m
-CONFIG_INPUT_GPIO_VIBRA=m
-CONFIG_INPUT_CPCAP_PWRBUTTON=m
-CONFIG_INPUT_ATLAS_BTNS=m
-CONFIG_INPUT_ATI_REMOTE2=m
-CONFIG_INPUT_KEYSPAN_REMOTE=m
-CONFIG_INPUT_KXTJ9=m
-CONFIG_INPUT_POWERMATE=m
-CONFIG_INPUT_YEALINK=m
-CONFIG_INPUT_CM109=m
-CONFIG_INPUT_REGULATOR_HAPTIC=m
-CONFIG_INPUT_RETU_PWRBUTTON=m
-CONFIG_INPUT_TPS65218_PWRBUTTON=m
-CONFIG_INPUT_AXP20X_PEK=m
-CONFIG_INPUT_TWL4030_PWRBUTTON=m
-CONFIG_INPUT_TWL4030_VIBRA=m
-CONFIG_INPUT_TWL6040_VIBRA=m
-CONFIG_INPUT_UINPUT=m
-CONFIG_INPUT_PALMAS_PWRBUTTON=m
-CONFIG_INPUT_PCF50633_PMU=m
-CONFIG_INPUT_PCF8574=m
-CONFIG_INPUT_PWM_BEEPER=m
-CONFIG_INPUT_PWM_VIBRA=m
-CONFIG_INPUT_RK805_PWRKEY=m
-CONFIG_INPUT_GPIO_ROTARY_ENCODER=m
-CONFIG_INPUT_DA9052_ONKEY=m
-CONFIG_INPUT_DA9055_ONKEY=m
-CONFIG_INPUT_DA9063_ONKEY=m
-CONFIG_INPUT_WM831X_ON=m
-CONFIG_INPUT_PCAP=m
-CONFIG_INPUT_ADXL34X=m
-CONFIG_INPUT_ADXL34X_I2C=m
-CONFIG_INPUT_ADXL34X_SPI=m
-CONFIG_INPUT_IMS_PCU=m
-CONFIG_INPUT_IQS269A=m
-CONFIG_INPUT_CMA3000=m
-CONFIG_INPUT_CMA3000_I2C=m
-CONFIG_INPUT_XEN_KBDDEV_FRONTEND=m
-CONFIG_INPUT_IDEAPAD_SLIDEBAR=m
-CONFIG_INPUT_SOC_BUTTON_ARRAY=m
-CONFIG_INPUT_DRV260X_HAPTICS=m
-CONFIG_INPUT_DRV2665_HAPTICS=m
-CONFIG_INPUT_DRV2667_HAPTICS=m
-CONFIG_INPUT_RAVE_SP_PWRBUTTON=m
-CONFIG_INPUT_STPMIC1_ONKEY=m
-CONFIG_RMI4_CORE=m
-CONFIG_RMI4_I2C=m
-CONFIG_RMI4_SPI=m
-CONFIG_RMI4_SMB=m
-CONFIG_RMI4_F03=y
-CONFIG_RMI4_F03_SERIO=m
-CONFIG_RMI4_2D_SENSOR=y
-CONFIG_RMI4_F11=y
-CONFIG_RMI4_F12=y
-CONFIG_RMI4_F30=y
-CONFIG_RMI4_F34=y
-# CONFIG_RMI4_F3A is not set
-# CONFIG_RMI4_F54 is not set
-CONFIG_RMI4_F55=y
-
-#
-# Hardware I/O ports
-#
-CONFIG_SERIO=m
-CONFIG_ARCH_MIGHT_HAVE_PC_SERIO=y
-CONFIG_SERIO_I8042=m
-CONFIG_SERIO_SERPORT=m
-CONFIG_SERIO_CT82C710=m
-CONFIG_SERIO_PARKBD=m
-CONFIG_SERIO_PCIPS2=m
-CONFIG_SERIO_LIBPS2=m
-CONFIG_SERIO_RAW=m
-CONFIG_SERIO_ALTERA_PS2=m
-CONFIG_SERIO_PS2MULT=m
-CONFIG_SERIO_ARC_PS2=m
-# CONFIG_SERIO_APBPS2 is not set
-CONFIG_HYPERV_KEYBOARD=m
-CONFIG_SERIO_GPIO_PS2=m
-CONFIG_USERIO=m
-CONFIG_GAMEPORT=m
-CONFIG_GAMEPORT_NS558=m
-CONFIG_GAMEPORT_L4=m
-CONFIG_GAMEPORT_EMU10K1=m
-CONFIG_GAMEPORT_FM801=m
-# end of Hardware I/O ports
-# end of Input device support
-
-#
-# Character devices
-#
-CONFIG_TTY=y
-CONFIG_VT=y
-CONFIG_CONSOLE_TRANSLATIONS=y
-CONFIG_VT_CONSOLE=y
-CONFIG_VT_CONSOLE_SLEEP=y
-CONFIG_HW_CONSOLE=y
-CONFIG_VT_HW_CONSOLE_BINDING=y
-CONFIG_UNIX98_PTYS=y
-# CONFIG_LEGACY_PTYS is not set
-CONFIG_LDISC_AUTOLOAD=y
-
-#
-# Serial drivers
-#
-CONFIG_SERIAL_EARLYCON=y
-CONFIG_SERIAL_8250=y
-# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
-CONFIG_SERIAL_8250_PNP=y
-# CONFIG_SERIAL_8250_16550A_VARIANTS is not set
-CONFIG_SERIAL_8250_FINTEK=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_DMA=y
-CONFIG_SERIAL_8250_PCI=y
-CONFIG_SERIAL_8250_EXAR=m
-CONFIG_SERIAL_8250_CS=m
-CONFIG_SERIAL_8250_MEN_MCB=m
-CONFIG_SERIAL_8250_NR_UARTS=32
-CONFIG_SERIAL_8250_RUNTIME_UARTS=4
-CONFIG_SERIAL_8250_EXTENDED=y
-CONFIG_SERIAL_8250_MANY_PORTS=y
-CONFIG_SERIAL_8250_ASPEED_VUART=m
-CONFIG_SERIAL_8250_SHARE_IRQ=y
-# CONFIG_SERIAL_8250_DETECT_IRQ is not set
-CONFIG_SERIAL_8250_RSA=y
-CONFIG_SERIAL_8250_DWLIB=y
-CONFIG_SERIAL_8250_DW=m
-CONFIG_SERIAL_8250_RT288X=y
-CONFIG_SERIAL_8250_LPSS=y
-CONFIG_SERIAL_8250_MID=y
-CONFIG_SERIAL_OF_PLATFORM=m
-
-#
-# Non-8250 serial port support
-#
-CONFIG_SERIAL_MAX3100=m
-CONFIG_SERIAL_MAX310X=m
-CONFIG_SERIAL_UARTLITE=m
-CONFIG_SERIAL_UARTLITE_NR_UARTS=1
-CONFIG_SERIAL_CORE=y
-CONFIG_SERIAL_CORE_CONSOLE=y
-CONFIG_SERIAL_JSM=m
-CONFIG_SERIAL_SIFIVE=m
-CONFIG_SERIAL_LANTIQ=m
-CONFIG_SERIAL_SCCNXP=m
-CONFIG_SERIAL_SC16IS7XX_CORE=m
-CONFIG_SERIAL_SC16IS7XX=m
-CONFIG_SERIAL_SC16IS7XX_I2C=y
-CONFIG_SERIAL_SC16IS7XX_SPI=y
-CONFIG_SERIAL_ALTERA_JTAGUART=m
-CONFIG_SERIAL_ALTERA_UART=m
-CONFIG_SERIAL_ALTERA_UART_MAXPORTS=4
-CONFIG_SERIAL_ALTERA_UART_BAUDRATE=115200
-CONFIG_SERIAL_IFX6X60=m
-CONFIG_SERIAL_XILINX_PS_UART=m
-CONFIG_SERIAL_ARC=m
-CONFIG_SERIAL_ARC_NR_PORTS=1
-CONFIG_SERIAL_RP2=m
-CONFIG_SERIAL_RP2_NR_UARTS=32
-CONFIG_SERIAL_FSL_LPUART=m
-CONFIG_SERIAL_FSL_LINFLEXUART=m
-CONFIG_SERIAL_CONEXANT_DIGICOLOR=m
-CONFIG_SERIAL_MEN_Z135=m
-CONFIG_SERIAL_SPRD=m
-# end of Serial drivers
-
-CONFIG_SERIAL_MCTRL_GPIO=y
-CONFIG_SERIAL_NONSTANDARD=y
-CONFIG_ROCKETPORT=m
-CONFIG_CYCLADES=m
-CONFIG_CYZ_INTR=y
-CONFIG_MOXA_INTELLIO=m
-CONFIG_MOXA_SMARTIO=m
-CONFIG_SYNCLINK=m
-CONFIG_SYNCLINKMP=m
-CONFIG_SYNCLINK_GT=m
-CONFIG_ISI=m
-CONFIG_N_HDLC=m
-CONFIG_N_GSM=m
-CONFIG_NOZOMI=m
-CONFIG_NULL_TTY=m
-CONFIG_TRACE_ROUTER=m
-CONFIG_TRACE_SINK=m
-CONFIG_HVC_DRIVER=y
-CONFIG_HVC_IRQ=y
-CONFIG_HVC_XEN=y
-CONFIG_HVC_XEN_FRONTEND=y
-CONFIG_SERIAL_DEV_BUS=y
-CONFIG_SERIAL_DEV_CTRL_TTYPORT=y
-# CONFIG_TTY_PRINTK is not set
-CONFIG_PRINTER=m
-# CONFIG_LP_CONSOLE is not set
-CONFIG_PPDEV=m
-CONFIG_VIRTIO_CONSOLE=m
-CONFIG_IPMI_HANDLER=m
-CONFIG_IPMI_DMI_DECODE=y
-CONFIG_IPMI_PLAT_DATA=y
-# CONFIG_IPMI_PANIC_EVENT is not set
-CONFIG_IPMI_DEVICE_INTERFACE=m
-CONFIG_IPMI_SI=m
-CONFIG_IPMI_SSIF=m
-CONFIG_IPMI_WATCHDOG=m
-CONFIG_IPMI_POWEROFF=m
-CONFIG_IPMB_DEVICE_INTERFACE=m
-CONFIG_HW_RANDOM=m
-CONFIG_HW_RANDOM_TIMERIOMEM=m
-CONFIG_HW_RANDOM_INTEL=m
-CONFIG_HW_RANDOM_AMD=m
-# CONFIG_HW_RANDOM_BA431 is not set
-CONFIG_HW_RANDOM_VIA=m
-CONFIG_HW_RANDOM_VIRTIO=m
-CONFIG_HW_RANDOM_CCTRNG=m
-# CONFIG_HW_RANDOM_XIPHERA is not set
-CONFIG_APPLICOM=m
-
-#
-# PCMCIA character devices
-#
-CONFIG_SYNCLINK_CS=m
-CONFIG_CARDMAN_4000=m
-CONFIG_CARDMAN_4040=m
-CONFIG_SCR24X=m
-CONFIG_IPWIRELESS=m
-# end of PCMCIA character devices
-
-CONFIG_MWAVE=m
-CONFIG_DEVMEM=y
-# CONFIG_DEVKMEM is not set
-CONFIG_NVRAM=m
-CONFIG_RAW_DRIVER=m
-CONFIG_MAX_RAW_DEVS=256
-CONFIG_DEVPORT=y
-CONFIG_HPET=y
-CONFIG_HPET_MMAP=y
-CONFIG_HPET_MMAP_DEFAULT=y
-CONFIG_HANGCHECK_TIMER=m
-CONFIG_TCG_TPM=m
-CONFIG_HW_RANDOM_TPM=y
-CONFIG_TCG_TIS_CORE=m
-CONFIG_TCG_TIS=m
-CONFIG_TCG_TIS_SPI=m
-CONFIG_TCG_TIS_SPI_CR50=y
-CONFIG_TCG_TIS_I2C_ATMEL=m
-CONFIG_TCG_TIS_I2C_INFINEON=m
-CONFIG_TCG_TIS_I2C_NUVOTON=m
-CONFIG_TCG_NSC=m
-CONFIG_TCG_ATMEL=m
-CONFIG_TCG_INFINEON=m
-CONFIG_TCG_XEN=m
-CONFIG_TCG_CRB=m
-CONFIG_TCG_VTPM_PROXY=m
-CONFIG_TCG_TIS_ST33ZP24=m
-CONFIG_TCG_TIS_ST33ZP24_I2C=m
-CONFIG_TCG_TIS_ST33ZP24_SPI=m
-CONFIG_TELCLOCK=m
-CONFIG_XILLYBUS=m
-CONFIG_XILLYBUS_PCIE=m
-CONFIG_XILLYBUS_OF=m
-# end of Character devices
-
-# CONFIG_RANDOM_TRUST_CPU is not set
-# CONFIG_RANDOM_TRUST_BOOTLOADER is not set
-
-#
-# I2C support
-#
-CONFIG_I2C=y
-CONFIG_ACPI_I2C_OPREGION=y
-CONFIG_I2C_BOARDINFO=y
-CONFIG_I2C_COMPAT=y
-CONFIG_I2C_CHARDEV=m
-CONFIG_I2C_MUX=m
-
-#
-# Multiplexer I2C Chip support
-#
-CONFIG_I2C_ARB_GPIO_CHALLENGE=m
-CONFIG_I2C_MUX_GPIO=m
-CONFIG_I2C_MUX_GPMUX=m
-CONFIG_I2C_MUX_LTC4306=m
-CONFIG_I2C_MUX_PCA9541=m
-CONFIG_I2C_MUX_PCA954x=m
-CONFIG_I2C_MUX_PINCTRL=m
-CONFIG_I2C_MUX_REG=m
-CONFIG_I2C_DEMUX_PINCTRL=m
-CONFIG_I2C_MUX_MLXCPLD=m
-# end of Multiplexer I2C Chip support
-
-CONFIG_I2C_HELPER_AUTO=y
-CONFIG_I2C_SMBUS=m
-CONFIG_I2C_ALGOBIT=m
-CONFIG_I2C_ALGOPCA=m
-
-#
-# I2C Hardware Bus support
-#
-
-#
-# PC SMBus host controller drivers
-#
-CONFIG_I2C_ALI1535=m
-CONFIG_I2C_ALI1563=m
-CONFIG_I2C_ALI15X3=m
-CONFIG_I2C_AMD756=m
-CONFIG_I2C_AMD756_S4882=m
-CONFIG_I2C_AMD8111=m
-CONFIG_I2C_AMD_MP2=m
-CONFIG_I2C_I801=m
-CONFIG_I2C_ISCH=m
-CONFIG_I2C_ISMT=m
-CONFIG_I2C_PIIX4=m
-CONFIG_I2C_CHT_WC=m
-CONFIG_I2C_NFORCE2=m
-CONFIG_I2C_NFORCE2_S4985=m
-CONFIG_I2C_NVIDIA_GPU=m
-CONFIG_I2C_SIS5595=m
-CONFIG_I2C_SIS630=m
-CONFIG_I2C_SIS96X=m
-CONFIG_I2C_VIA=m
-CONFIG_I2C_VIAPRO=m
-
-#
-# ACPI drivers
-#
-CONFIG_I2C_SCMI=m
-
-#
-# I2C system bus drivers (mostly embedded / system-on-chip)
-#
-CONFIG_I2C_CBUS_GPIO=m
-CONFIG_I2C_DESIGNWARE_CORE=y
-CONFIG_I2C_DESIGNWARE_SLAVE=y
-CONFIG_I2C_DESIGNWARE_PLATFORM=y
-CONFIG_I2C_DESIGNWARE_BAYTRAIL=y
-CONFIG_I2C_DESIGNWARE_PCI=m
-CONFIG_I2C_EMEV2=m
-CONFIG_I2C_GPIO=m
-# CONFIG_I2C_GPIO_FAULT_INJECTOR is not set
-CONFIG_I2C_KEMPLD=m
-CONFIG_I2C_OCORES=m
-CONFIG_I2C_PCA_PLATFORM=m
-CONFIG_I2C_RK3X=m
-CONFIG_I2C_SIMTEC=m
-CONFIG_I2C_XILINX=m
-
-#
-# External I2C/SMBus adapter drivers
-#
-CONFIG_I2C_DIOLAN_U2C=m
-CONFIG_I2C_DLN2=m
-CONFIG_I2C_PARPORT=m
-CONFIG_I2C_ROBOTFUZZ_OSIF=m
-CONFIG_I2C_TAOS_EVM=m
-CONFIG_I2C_TINY_USB=m
-CONFIG_I2C_VIPERBOARD=m
-
-#
-# Other I2C/SMBus bus drivers
-#
-CONFIG_I2C_MLXCPLD=m
-CONFIG_I2C_CROS_EC_TUNNEL=m
-CONFIG_I2C_FSI=m
-# end of I2C Hardware Bus support
-
-CONFIG_I2C_STUB=m
-CONFIG_I2C_SLAVE=y
-CONFIG_I2C_SLAVE_EEPROM=m
-# CONFIG_I2C_SLAVE_TESTUNIT is not set
-# CONFIG_I2C_DEBUG_CORE is not set
-# CONFIG_I2C_DEBUG_ALGO is not set
-# CONFIG_I2C_DEBUG_BUS is not set
-# end of I2C support
-
-CONFIG_I3C=m
-CONFIG_CDNS_I3C_MASTER=m
-CONFIG_DW_I3C_MASTER=m
-CONFIG_SPI=y
-# CONFIG_SPI_DEBUG is not set
-CONFIG_SPI_MASTER=y
-CONFIG_SPI_MEM=y
-
-#
-# SPI Master Controller Drivers
-#
-CONFIG_SPI_ALTERA=m
-CONFIG_SPI_AXI_SPI_ENGINE=m
-CONFIG_SPI_BITBANG=m
-CONFIG_SPI_BUTTERFLY=m
-CONFIG_SPI_CADENCE=m
-CONFIG_SPI_DESIGNWARE=m
-CONFIG_SPI_DW_DMA=y
-CONFIG_SPI_DW_PCI=m
-CONFIG_SPI_DW_MMIO=m
-CONFIG_SPI_DLN2=m
-CONFIG_SPI_FSI=m
-CONFIG_SPI_NXP_FLEXSPI=m
-CONFIG_SPI_GPIO=m
-CONFIG_SPI_LM70_LLP=m
-CONFIG_SPI_FSL_LIB=m
-CONFIG_SPI_FSL_SPI=m
-# CONFIG_SPI_LANTIQ_SSC is not set
-CONFIG_SPI_OC_TINY=m
-CONFIG_SPI_PXA2XX=m
-CONFIG_SPI_PXA2XX_PCI=m
-CONFIG_SPI_ROCKCHIP=m
-CONFIG_SPI_SC18IS602=m
-CONFIG_SPI_SIFIVE=m
-CONFIG_SPI_MXIC=m
-CONFIG_SPI_XCOMM=m
-CONFIG_SPI_XILINX=m
-CONFIG_SPI_ZYNQMP_GQSPI=m
-CONFIG_SPI_AMD=m
-
-#
-# SPI Multiplexer support
-#
-CONFIG_SPI_MUX=m
-
-#
-# SPI Protocol Masters
-#
-CONFIG_SPI_SPIDEV=m
-CONFIG_SPI_LOOPBACK_TEST=m
-CONFIG_SPI_TLE62X0=m
-CONFIG_SPI_SLAVE=y
-CONFIG_SPI_SLAVE_TIME=m
-CONFIG_SPI_SLAVE_SYSTEM_CONTROL=m
-CONFIG_SPI_DYNAMIC=y
-CONFIG_SPMI=m
-CONFIG_HSI=m
-CONFIG_HSI_BOARDINFO=y
-
-#
-# HSI controllers
-#
-
-#
-# HSI clients
-#
-CONFIG_HSI_CHAR=m
-CONFIG_PPS=y
-# CONFIG_PPS_DEBUG is not set
-
-#
-# PPS clients support
-#
-CONFIG_PPS_CLIENT_KTIMER=m
-CONFIG_PPS_CLIENT_LDISC=m
-CONFIG_PPS_CLIENT_PARPORT=m
-CONFIG_PPS_CLIENT_GPIO=m
-
-#
-# PPS generators support
-#
-
-#
-# PTP clock support
-#
-CONFIG_PTP_1588_CLOCK=y
-CONFIG_DP83640_PHY=m
-CONFIG_PTP_1588_CLOCK_INES=m
-CONFIG_PTP_1588_CLOCK_KVM=m
-CONFIG_PTP_1588_CLOCK_IDT82P33=m
-CONFIG_PTP_1588_CLOCK_IDTCM=m
-CONFIG_PTP_1588_CLOCK_VMW=m
-# end of PTP clock support
-
-CONFIG_PINCTRL=y
-CONFIG_GENERIC_PINCTRL_GROUPS=y
-CONFIG_PINMUX=y
-CONFIG_GENERIC_PINMUX_FUNCTIONS=y
-CONFIG_PINCONF=y
-CONFIG_GENERIC_PINCONF=y
-# CONFIG_DEBUG_PINCTRL is not set
-CONFIG_PINCTRL_AS3722=m
-CONFIG_PINCTRL_AXP209=m
-CONFIG_PINCTRL_AMD=m
-CONFIG_PINCTRL_DA9062=m
-CONFIG_PINCTRL_MCP23S08_I2C=m
-CONFIG_PINCTRL_MCP23S08_SPI=m
-CONFIG_PINCTRL_MCP23S08=m
-CONFIG_PINCTRL_SINGLE=m
-CONFIG_PINCTRL_SX150X=y
-CONFIG_PINCTRL_STMFX=m
-CONFIG_PINCTRL_MAX77620=m
-CONFIG_PINCTRL_PALMAS=m
-CONFIG_PINCTRL_RK805=m
-CONFIG_PINCTRL_OCELOT=y
-CONFIG_PINCTRL_BAYTRAIL=y
-CONFIG_PINCTRL_CHERRYVIEW=y
-CONFIG_PINCTRL_LYNXPOINT=y
-CONFIG_PINCTRL_INTEL=y
-CONFIG_PINCTRL_BROXTON=y
-CONFIG_PINCTRL_CANNONLAKE=y
-CONFIG_PINCTRL_CEDARFORK=y
-CONFIG_PINCTRL_DENVERTON=y
-# CONFIG_PINCTRL_EMMITSBURG is not set
-CONFIG_PINCTRL_GEMINILAKE=y
-CONFIG_PINCTRL_ICELAKE=y
-CONFIG_PINCTRL_JASPERLAKE=y
-CONFIG_PINCTRL_LEWISBURG=y
-CONFIG_PINCTRL_SUNRISEPOINT=y
-CONFIG_PINCTRL_TIGERLAKE=y
-
-#
-# Renesas pinctrl drivers
-#
-# end of Renesas pinctrl drivers
-
-CONFIG_PINCTRL_LOCHNAGAR=m
-CONFIG_PINCTRL_MADERA=m
-CONFIG_PINCTRL_CS47L15=y
-CONFIG_PINCTRL_CS47L35=y
-CONFIG_PINCTRL_CS47L85=y
-CONFIG_PINCTRL_CS47L90=y
-CONFIG_PINCTRL_CS47L92=y
-CONFIG_PINCTRL_EQUILIBRIUM=m
-CONFIG_GPIOLIB=y
-CONFIG_GPIOLIB_FASTPATH_LIMIT=512
-CONFIG_OF_GPIO=y
-CONFIG_GPIO_ACPI=y
-CONFIG_GPIOLIB_IRQCHIP=y
-# CONFIG_DEBUG_GPIO is not set
-CONFIG_GPIO_SYSFS=y
-CONFIG_GPIO_CDEV=y
-CONFIG_GPIO_CDEV_V1=y
-CONFIG_GPIO_GENERIC=y
-CONFIG_GPIO_MAX730X=m
-
-#
-# Memory mapped GPIO drivers
-#
-CONFIG_GPIO_74XX_MMIO=m
-CONFIG_GPIO_ALTERA=m
-CONFIG_GPIO_AMDPT=m
-CONFIG_GPIO_CADENCE=m
-CONFIG_GPIO_DWAPB=m
-CONFIG_GPIO_EXAR=m
-CONFIG_GPIO_FTGPIO010=y
-CONFIG_GPIO_GENERIC_PLATFORM=m
-CONFIG_GPIO_GRGPIO=m
-CONFIG_GPIO_HLWD=m
-CONFIG_GPIO_ICH=m
-CONFIG_GPIO_LOGICVC=m
-CONFIG_GPIO_MB86S7X=m
-CONFIG_GPIO_MENZ127=m
-CONFIG_GPIO_SAMA5D2_PIOBU=m
-CONFIG_GPIO_SIFIVE=y
-CONFIG_GPIO_SIOX=m
-CONFIG_GPIO_SYSCON=m
-CONFIG_GPIO_VX855=m
-CONFIG_GPIO_WCD934X=m
-CONFIG_GPIO_XILINX=m
-CONFIG_GPIO_AMD_FCH=m
-# end of Memory mapped GPIO drivers
-
-#
-# Port-mapped I/O GPIO drivers
-#
-CONFIG_GPIO_F7188X=m
-CONFIG_GPIO_IT87=m
-CONFIG_GPIO_SCH=m
-CONFIG_GPIO_SCH311X=m
-CONFIG_GPIO_WINBOND=m
-CONFIG_GPIO_WS16C48=m
-# end of Port-mapped I/O GPIO drivers
-
-#
-# I2C GPIO expanders
-#
-CONFIG_GPIO_ADP5588=m
-CONFIG_GPIO_ADNP=m
-CONFIG_GPIO_GW_PLD=m
-CONFIG_GPIO_MAX7300=m
-CONFIG_GPIO_MAX732X=m
-CONFIG_GPIO_PCA953X=m
-CONFIG_GPIO_PCA953X_IRQ=y
-CONFIG_GPIO_PCA9570=m
-CONFIG_GPIO_PCF857X=m
-CONFIG_GPIO_TPIC2810=m
-# end of I2C GPIO expanders
-
-#
-# MFD GPIO expanders
-#
-CONFIG_GPIO_ADP5520=m
-CONFIG_GPIO_ARIZONA=m
-CONFIG_GPIO_BD70528=m
-CONFIG_GPIO_BD71828=m
-CONFIG_GPIO_BD9571MWV=m
-CONFIG_GPIO_CRYSTAL_COVE=m
-CONFIG_GPIO_DA9052=m
-CONFIG_GPIO_DA9055=m
-CONFIG_GPIO_DLN2=m
-CONFIG_GPIO_JANZ_TTL=m
-CONFIG_GPIO_KEMPLD=m
-CONFIG_GPIO_LP3943=m
-CONFIG_GPIO_LP873X=m
-CONFIG_GPIO_LP87565=m
-CONFIG_GPIO_MADERA=m
-CONFIG_GPIO_MAX77620=m
-CONFIG_GPIO_MAX77650=m
-CONFIG_GPIO_MSIC=y
-CONFIG_GPIO_PALMAS=y
-CONFIG_GPIO_RC5T583=y
-CONFIG_GPIO_STMPE=y
-CONFIG_GPIO_TC3589X=y
-CONFIG_GPIO_TPS65086=m
-CONFIG_GPIO_TPS65218=m
-CONFIG_GPIO_TPS6586X=y
-CONFIG_GPIO_TPS65910=y
-CONFIG_GPIO_TPS65912=m
-CONFIG_GPIO_TPS68470=y
-CONFIG_GPIO_TQMX86=m
-CONFIG_GPIO_TWL4030=m
-CONFIG_GPIO_TWL6040=m
-CONFIG_GPIO_UCB1400=m
-CONFIG_GPIO_WHISKEY_COVE=m
-CONFIG_GPIO_WM831X=m
-CONFIG_GPIO_WM8350=m
-CONFIG_GPIO_WM8994=m
-# end of MFD GPIO expanders
-
-#
-# PCI GPIO expanders
-#
-CONFIG_GPIO_AMD8111=m
-CONFIG_GPIO_ML_IOH=m
-CONFIG_GPIO_PCI_IDIO_16=m
-CONFIG_GPIO_PCIE_IDIO_24=m
-CONFIG_GPIO_RDC321X=m
-CONFIG_GPIO_SODAVILLE=y
-# end of PCI GPIO expanders
-
-#
-# SPI GPIO expanders
-#
-CONFIG_GPIO_74X164=m
-CONFIG_GPIO_MAX3191X=m
-CONFIG_GPIO_MAX7301=m
-CONFIG_GPIO_MC33880=m
-CONFIG_GPIO_PISOSR=m
-CONFIG_GPIO_XRA1403=m
-CONFIG_GPIO_MOXTET=m
-# end of SPI GPIO expanders
-
-#
-# USB GPIO expanders
-#
-CONFIG_GPIO_VIPERBOARD=m
-# end of USB GPIO expanders
-
-CONFIG_GPIO_AGGREGATOR=m
-CONFIG_GPIO_MOCKUP=m
-CONFIG_W1=m
-CONFIG_W1_CON=y
-
-#
-# 1-wire Bus Masters
-#
-CONFIG_W1_MASTER_MATROX=m
-CONFIG_W1_MASTER_DS2490=m
-CONFIG_W1_MASTER_DS2482=m
-CONFIG_W1_MASTER_DS1WM=m
-CONFIG_W1_MASTER_GPIO=m
-CONFIG_W1_MASTER_SGI=m
-# end of 1-wire Bus Masters
-
-#
-# 1-wire Slaves
-#
-CONFIG_W1_SLAVE_THERM=m
-CONFIG_W1_SLAVE_SMEM=m
-CONFIG_W1_SLAVE_DS2405=m
-CONFIG_W1_SLAVE_DS2408=m
-# CONFIG_W1_SLAVE_DS2408_READBACK is not set
-CONFIG_W1_SLAVE_DS2413=m
-CONFIG_W1_SLAVE_DS2406=m
-CONFIG_W1_SLAVE_DS2423=m
-CONFIG_W1_SLAVE_DS2805=m
-CONFIG_W1_SLAVE_DS2430=m
-CONFIG_W1_SLAVE_DS2431=m
-CONFIG_W1_SLAVE_DS2433=m
-# CONFIG_W1_SLAVE_DS2433_CRC is not set
-CONFIG_W1_SLAVE_DS2438=m
-CONFIG_W1_SLAVE_DS250X=m
-CONFIG_W1_SLAVE_DS2780=m
-CONFIG_W1_SLAVE_DS2781=m
-CONFIG_W1_SLAVE_DS28E04=m
-CONFIG_W1_SLAVE_DS28E17=m
-# end of 1-wire Slaves
-
-CONFIG_POWER_RESET=y
-CONFIG_POWER_RESET_AS3722=y
-CONFIG_POWER_RESET_GPIO=y
-CONFIG_POWER_RESET_GPIO_RESTART=y
-CONFIG_POWER_RESET_LTC2952=y
-CONFIG_POWER_RESET_MT6323=y
-CONFIG_POWER_RESET_RESTART=y
-CONFIG_POWER_RESET_SYSCON=y
-CONFIG_POWER_RESET_SYSCON_POWEROFF=y
-CONFIG_REBOOT_MODE=m
-CONFIG_SYSCON_REBOOT_MODE=m
-CONFIG_NVMEM_REBOOT_MODE=m
-CONFIG_POWER_SUPPLY=y
-# CONFIG_POWER_SUPPLY_DEBUG is not set
-CONFIG_POWER_SUPPLY_HWMON=y
-CONFIG_PDA_POWER=m
-CONFIG_GENERIC_ADC_BATTERY=m
-CONFIG_MAX8925_POWER=m
-CONFIG_WM831X_BACKUP=m
-CONFIG_WM831X_POWER=m
-CONFIG_WM8350_POWER=m
-CONFIG_TEST_POWER=m
-CONFIG_BATTERY_88PM860X=m
-CONFIG_CHARGER_ADP5061=m
-CONFIG_BATTERY_ACT8945A=m
-CONFIG_BATTERY_CPCAP=m
-CONFIG_BATTERY_CW2015=m
-CONFIG_BATTERY_DS2760=m
-CONFIG_BATTERY_DS2780=m
-CONFIG_BATTERY_DS2781=m
-CONFIG_BATTERY_DS2782=m
-CONFIG_BATTERY_SBS=m
-CONFIG_CHARGER_SBS=m
-CONFIG_MANAGER_SBS=m
-CONFIG_BATTERY_BQ27XXX=m
-CONFIG_BATTERY_BQ27XXX_I2C=m
-CONFIG_BATTERY_BQ27XXX_HDQ=m
-# CONFIG_BATTERY_BQ27XXX_DT_UPDATES_NVM is not set
-CONFIG_BATTERY_DA9030=m
-CONFIG_BATTERY_DA9052=m
-CONFIG_CHARGER_DA9150=m
-CONFIG_BATTERY_DA9150=m
-CONFIG_CHARGER_AXP20X=m
-CONFIG_BATTERY_AXP20X=m
-CONFIG_AXP20X_POWER=m
-CONFIG_AXP288_CHARGER=m
-CONFIG_AXP288_FUEL_GAUGE=m
-CONFIG_BATTERY_MAX17040=m
-CONFIG_BATTERY_MAX17042=m
-CONFIG_BATTERY_MAX1721X=m
-CONFIG_BATTERY_TWL4030_MADC=m
-CONFIG_CHARGER_88PM860X=m
-CONFIG_CHARGER_PCF50633=m
-CONFIG_BATTERY_RX51=m
-CONFIG_CHARGER_ISP1704=m
-CONFIG_CHARGER_MAX8903=m
-CONFIG_CHARGER_TWL4030=m
-CONFIG_CHARGER_LP8727=m
-CONFIG_CHARGER_LP8788=m
-CONFIG_CHARGER_GPIO=m
-CONFIG_CHARGER_MANAGER=y
-CONFIG_CHARGER_LT3651=m
-CONFIG_CHARGER_MAX14577=m
-CONFIG_CHARGER_DETECTOR_MAX14656=m
-CONFIG_CHARGER_MAX77650=m
-CONFIG_CHARGER_MAX77693=m
-CONFIG_CHARGER_MAX8997=m
-CONFIG_CHARGER_MAX8998=m
-CONFIG_CHARGER_MP2629=m
-CONFIG_CHARGER_BQ2415X=m
-CONFIG_CHARGER_BQ24190=m
-CONFIG_CHARGER_BQ24257=m
-CONFIG_CHARGER_BQ24735=m
-# CONFIG_CHARGER_BQ2515X is not set
-CONFIG_CHARGER_BQ25890=m
-# CONFIG_CHARGER_BQ25980 is not set
-CONFIG_CHARGER_SMB347=m
-CONFIG_CHARGER_TPS65090=m
-CONFIG_CHARGER_TPS65217=m
-CONFIG_BATTERY_GAUGE_LTC2941=m
-CONFIG_BATTERY_RT5033=m
-CONFIG_CHARGER_RT9455=m
-CONFIG_CHARGER_CROS_USBPD=m
-CONFIG_CHARGER_UCS1002=m
-CONFIG_CHARGER_BD70528=m
-CONFIG_CHARGER_BD99954=m
-CONFIG_CHARGER_WILCO=m
-# CONFIG_RN5T618_POWER is not set
-CONFIG_HWMON=y
-CONFIG_HWMON_VID=m
-# CONFIG_HWMON_DEBUG_CHIP is not set
-
-#
-# Native drivers
-#
-CONFIG_SENSORS_ABITUGURU=m
-CONFIG_SENSORS_ABITUGURU3=m
-CONFIG_SENSORS_AD7314=m
-CONFIG_SENSORS_AD7414=m
-CONFIG_SENSORS_AD7418=m
-CONFIG_SENSORS_ADM1021=m
-CONFIG_SENSORS_ADM1025=m
-CONFIG_SENSORS_ADM1026=m
-CONFIG_SENSORS_ADM1029=m
-CONFIG_SENSORS_ADM1031=m
-CONFIG_SENSORS_ADM1177=m
-CONFIG_SENSORS_ADM9240=m
-CONFIG_SENSORS_ADT7X10=m
-CONFIG_SENSORS_ADT7310=m
-CONFIG_SENSORS_ADT7410=m
-CONFIG_SENSORS_ADT7411=m
-CONFIG_SENSORS_ADT7462=m
-CONFIG_SENSORS_ADT7470=m
-CONFIG_SENSORS_ADT7475=m
-CONFIG_SENSORS_AS370=m
-CONFIG_SENSORS_ASC7621=m
-CONFIG_SENSORS_AXI_FAN_CONTROL=m
-CONFIG_SENSORS_K8TEMP=m
-CONFIG_SENSORS_K10TEMP=m
-CONFIG_SENSORS_FAM15H_POWER=m
-CONFIG_SENSORS_AMD_ENERGY=m
-CONFIG_SENSORS_APPLESMC=m
-CONFIG_SENSORS_ASB100=m
-CONFIG_SENSORS_ASPEED=m
-CONFIG_SENSORS_ATXP1=m
-CONFIG_SENSORS_CORSAIR_CPRO=m
-CONFIG_SENSORS_DRIVETEMP=m
-CONFIG_SENSORS_DS620=m
-CONFIG_SENSORS_DS1621=m
-CONFIG_SENSORS_DELL_SMM=m
-CONFIG_SENSORS_DA9052_ADC=m
-CONFIG_SENSORS_DA9055=m
-CONFIG_SENSORS_I5K_AMB=m
-CONFIG_SENSORS_F71805F=m
-CONFIG_SENSORS_F71882FG=m
-CONFIG_SENSORS_F75375S=m
-CONFIG_SENSORS_GSC=m
-CONFIG_SENSORS_MC13783_ADC=m
-CONFIG_SENSORS_FSCHMD=m
-CONFIG_SENSORS_FTSTEUTATES=m
-CONFIG_SENSORS_GL518SM=m
-CONFIG_SENSORS_GL520SM=m
-CONFIG_SENSORS_G760A=m
-CONFIG_SENSORS_G762=m
-CONFIG_SENSORS_GPIO_FAN=m
-CONFIG_SENSORS_HIH6130=m
-CONFIG_SENSORS_IBMAEM=m
-CONFIG_SENSORS_IBMPEX=m
-CONFIG_SENSORS_IIO_HWMON=m
-CONFIG_SENSORS_I5500=m
-CONFIG_SENSORS_CORETEMP=m
-CONFIG_SENSORS_IT87=m
-CONFIG_SENSORS_JC42=m
-CONFIG_SENSORS_POWR1220=m
-CONFIG_SENSORS_LINEAGE=m
-CONFIG_SENSORS_LOCHNAGAR=m
-CONFIG_SENSORS_LTC2945=m
-CONFIG_SENSORS_LTC2947=m
-CONFIG_SENSORS_LTC2947_I2C=m
-CONFIG_SENSORS_LTC2947_SPI=m
-CONFIG_SENSORS_LTC2990=m
-CONFIG_SENSORS_LTC4151=m
-CONFIG_SENSORS_LTC4215=m
-CONFIG_SENSORS_LTC4222=m
-CONFIG_SENSORS_LTC4245=m
-CONFIG_SENSORS_LTC4260=m
-CONFIG_SENSORS_LTC4261=m
-CONFIG_SENSORS_MAX1111=m
-CONFIG_SENSORS_MAX16065=m
-CONFIG_SENSORS_MAX1619=m
-CONFIG_SENSORS_MAX1668=m
-CONFIG_SENSORS_MAX197=m
-CONFIG_SENSORS_MAX31722=m
-CONFIG_SENSORS_MAX31730=m
-CONFIG_SENSORS_MAX6621=m
-CONFIG_SENSORS_MAX6639=m
-CONFIG_SENSORS_MAX6642=m
-CONFIG_SENSORS_MAX6650=m
-CONFIG_SENSORS_MAX6697=m
-CONFIG_SENSORS_MAX31790=m
-CONFIG_SENSORS_MCP3021=m
-CONFIG_SENSORS_MLXREG_FAN=m
-CONFIG_SENSORS_TC654=m
-CONFIG_SENSORS_MENF21BMC_HWMON=m
-# CONFIG_SENSORS_MR75203 is not set
-CONFIG_SENSORS_ADCXX=m
-CONFIG_SENSORS_LM63=m
-CONFIG_SENSORS_LM70=m
-CONFIG_SENSORS_LM73=m
-CONFIG_SENSORS_LM75=m
-CONFIG_SENSORS_LM77=m
-CONFIG_SENSORS_LM78=m
-CONFIG_SENSORS_LM80=m
-CONFIG_SENSORS_LM83=m
-CONFIG_SENSORS_LM85=m
-CONFIG_SENSORS_LM87=m
-CONFIG_SENSORS_LM90=m
-CONFIG_SENSORS_LM92=m
-CONFIG_SENSORS_LM93=m
-CONFIG_SENSORS_LM95234=m
-CONFIG_SENSORS_LM95241=m
-CONFIG_SENSORS_LM95245=m
-CONFIG_SENSORS_PC87360=m
-CONFIG_SENSORS_PC87427=m
-CONFIG_SENSORS_NTC_THERMISTOR=m
-CONFIG_SENSORS_NCT6683=m
-CONFIG_SENSORS_NCT6775=m
-CONFIG_SENSORS_NCT7802=m
-CONFIG_SENSORS_NCT7904=m
-CONFIG_SENSORS_NPCM7XX=m
-CONFIG_SENSORS_PCF8591=m
-CONFIG_PMBUS=m
-CONFIG_SENSORS_PMBUS=m
-# CONFIG_SENSORS_ADM1266 is not set
-CONFIG_SENSORS_ADM1275=m
-CONFIG_SENSORS_BEL_PFE=m
-CONFIG_SENSORS_IBM_CFFPS=m
-CONFIG_SENSORS_INSPUR_IPSPS=m
-CONFIG_SENSORS_IR35221=m
-CONFIG_SENSORS_IR38064=m
-CONFIG_SENSORS_IRPS5401=m
-CONFIG_SENSORS_ISL68137=m
-CONFIG_SENSORS_LM25066=m
-CONFIG_SENSORS_LTC2978=m
-# CONFIG_SENSORS_LTC2978_REGULATOR is not set
-CONFIG_SENSORS_LTC3815=m
-CONFIG_SENSORS_MAX16064=m
-CONFIG_SENSORS_MAX16601=m
-CONFIG_SENSORS_MAX20730=m
-CONFIG_SENSORS_MAX20751=m
-CONFIG_SENSORS_MAX31785=m
-CONFIG_SENSORS_MAX34440=m
-CONFIG_SENSORS_MAX8688=m
-# CONFIG_SENSORS_MP2975 is not set
-CONFIG_SENSORS_PXE1610=m
-CONFIG_SENSORS_TPS40422=m
-CONFIG_SENSORS_TPS53679=m
-CONFIG_SENSORS_UCD9000=m
-CONFIG_SENSORS_UCD9200=m
-CONFIG_SENSORS_XDPE122=m
-CONFIG_SENSORS_ZL6100=m
-CONFIG_SENSORS_PWM_FAN=m
-CONFIG_SENSORS_SHT15=m
-CONFIG_SENSORS_SHT21=m
-CONFIG_SENSORS_SHT3x=m
-CONFIG_SENSORS_SHTC1=m
-CONFIG_SENSORS_SIS5595=m
-CONFIG_SENSORS_DME1737=m
-CONFIG_SENSORS_EMC1403=m
-CONFIG_SENSORS_EMC2103=m
-CONFIG_SENSORS_EMC6W201=m
-CONFIG_SENSORS_SMSC47M1=m
-CONFIG_SENSORS_SMSC47M192=m
-CONFIG_SENSORS_SMSC47B397=m
-CONFIG_SENSORS_SCH56XX_COMMON=m
-CONFIG_SENSORS_SCH5627=m
-CONFIG_SENSORS_SCH5636=m
-CONFIG_SENSORS_STTS751=m
-CONFIG_SENSORS_SMM665=m
-CONFIG_SENSORS_ADC128D818=m
-CONFIG_SENSORS_ADS7828=m
-CONFIG_SENSORS_ADS7871=m
-CONFIG_SENSORS_AMC6821=m
-CONFIG_SENSORS_INA209=m
-CONFIG_SENSORS_INA2XX=m
-CONFIG_SENSORS_INA3221=m
-CONFIG_SENSORS_TC74=m
-CONFIG_SENSORS_THMC50=m
-CONFIG_SENSORS_TMP102=m
-CONFIG_SENSORS_TMP103=m
-CONFIG_SENSORS_TMP108=m
-CONFIG_SENSORS_TMP401=m
-CONFIG_SENSORS_TMP421=m
-CONFIG_SENSORS_TMP513=m
-CONFIG_SENSORS_VIA_CPUTEMP=m
-CONFIG_SENSORS_VIA686A=m
-CONFIG_SENSORS_VT1211=m
-CONFIG_SENSORS_VT8231=m
-CONFIG_SENSORS_W83773G=m
-CONFIG_SENSORS_W83781D=m
-CONFIG_SENSORS_W83791D=m
-CONFIG_SENSORS_W83792D=m
-CONFIG_SENSORS_W83793=m
-CONFIG_SENSORS_W83795=m
-# CONFIG_SENSORS_W83795_FANCTRL is not set
-CONFIG_SENSORS_W83L785TS=m
-CONFIG_SENSORS_W83L786NG=m
-CONFIG_SENSORS_W83627HF=m
-CONFIG_SENSORS_W83627EHF=m
-CONFIG_SENSORS_WM831X=m
-CONFIG_SENSORS_WM8350=m
-CONFIG_SENSORS_XGENE=m
-
-#
-# ACPI drivers
-#
-CONFIG_SENSORS_ACPI_POWER=m
-CONFIG_SENSORS_ATK0110=m
-CONFIG_THERMAL=y
-# CONFIG_THERMAL_NETLINK is not set
-# CONFIG_THERMAL_STATISTICS is not set
-CONFIG_THERMAL_EMERGENCY_POWEROFF_DELAY_MS=100
-CONFIG_THERMAL_HWMON=y
-CONFIG_THERMAL_OF=y
-CONFIG_THERMAL_WRITABLE_TRIPS=y
-CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE=y
-# CONFIG_THERMAL_DEFAULT_GOV_FAIR_SHARE is not set
-# CONFIG_THERMAL_DEFAULT_GOV_USER_SPACE is not set
-# CONFIG_THERMAL_DEFAULT_GOV_POWER_ALLOCATOR is not set
-CONFIG_THERMAL_GOV_FAIR_SHARE=y
-CONFIG_THERMAL_GOV_STEP_WISE=y
-CONFIG_THERMAL_GOV_BANG_BANG=y
-CONFIG_THERMAL_GOV_USER_SPACE=y
-CONFIG_THERMAL_GOV_POWER_ALLOCATOR=y
-CONFIG_CPU_THERMAL=y
-CONFIG_CPU_FREQ_THERMAL=y
-CONFIG_CPU_IDLE_THERMAL=y
-CONFIG_DEVFREQ_THERMAL=y
-# CONFIG_THERMAL_EMULATION is not set
-CONFIG_THERMAL_MMIO=m
-CONFIG_MAX77620_THERMAL=m
-CONFIG_DA9062_THERMAL=m
-
-#
-# Intel thermal drivers
-#
-CONFIG_INTEL_POWERCLAMP=m
-CONFIG_X86_PKG_TEMP_THERMAL=m
-CONFIG_INTEL_SOC_DTS_IOSF_CORE=m
-CONFIG_INTEL_SOC_DTS_THERMAL=m
-
-#
-# ACPI INT340X thermal drivers
-#
-CONFIG_INT340X_THERMAL=m
-CONFIG_ACPI_THERMAL_REL=m
-CONFIG_INT3406_THERMAL=m
-CONFIG_PROC_THERMAL_MMIO_RAPL=y
-# end of ACPI INT340X thermal drivers
-
-CONFIG_INTEL_BXT_PMIC_THERMAL=m
-CONFIG_INTEL_PCH_THERMAL=m
-# end of Intel thermal drivers
-
-# CONFIG_TI_SOC_THERMAL is not set
-CONFIG_GENERIC_ADC_THERMAL=m
-CONFIG_WATCHDOG=y
-CONFIG_WATCHDOG_CORE=y
-# CONFIG_WATCHDOG_NOWAYOUT is not set
-CONFIG_WATCHDOG_HANDLE_BOOT_ENABLED=y
-CONFIG_WATCHDOG_OPEN_TIMEOUT=0
-CONFIG_WATCHDOG_SYSFS=y
-
-#
-# Watchdog Pretimeout Governors
-#
-CONFIG_WATCHDOG_PRETIMEOUT_GOV=y
-CONFIG_WATCHDOG_PRETIMEOUT_GOV_SEL=m
-CONFIG_WATCHDOG_PRETIMEOUT_GOV_NOOP=m
-CONFIG_WATCHDOG_PRETIMEOUT_GOV_PANIC=y
-# CONFIG_WATCHDOG_PRETIMEOUT_DEFAULT_GOV_NOOP is not set
-CONFIG_WATCHDOG_PRETIMEOUT_DEFAULT_GOV_PANIC=y
-
-#
-# Watchdog Device Drivers
-#
-CONFIG_SOFT_WATCHDOG=m
-# CONFIG_SOFT_WATCHDOG_PRETIMEOUT is not set
-CONFIG_BD70528_WATCHDOG=m
-CONFIG_DA9052_WATCHDOG=m
-CONFIG_DA9055_WATCHDOG=m
-CONFIG_DA9063_WATCHDOG=m
-CONFIG_DA9062_WATCHDOG=m
-CONFIG_GPIO_WATCHDOG=m
-CONFIG_MENF21BMC_WATCHDOG=m
-CONFIG_MENZ069_WATCHDOG=m
-CONFIG_WDAT_WDT=m
-CONFIG_WM831X_WATCHDOG=m
-CONFIG_WM8350_WATCHDOG=m
-CONFIG_XILINX_WATCHDOG=m
-CONFIG_ZIIRAVE_WATCHDOG=m
-CONFIG_RAVE_SP_WATCHDOG=m
-CONFIG_MLX_WDT=m
-CONFIG_CADENCE_WATCHDOG=m
-CONFIG_DW_WATCHDOG=m
-CONFIG_RN5T618_WATCHDOG=m
-CONFIG_TWL4030_WATCHDOG=m
-CONFIG_MAX63XX_WATCHDOG=m
-CONFIG_MAX77620_WATCHDOG=m
-CONFIG_RETU_WATCHDOG=m
-CONFIG_STPMIC1_WATCHDOG=m
-CONFIG_ACQUIRE_WDT=m
-CONFIG_ADVANTECH_WDT=m
-CONFIG_ALIM1535_WDT=m
-CONFIG_ALIM7101_WDT=m
-CONFIG_EBC_C384_WDT=m
-CONFIG_F71808E_WDT=m
-CONFIG_SP5100_TCO=m
-CONFIG_SBC_FITPC2_WATCHDOG=m
-CONFIG_EUROTECH_WDT=m
-CONFIG_IB700_WDT=m
-CONFIG_IBMASR=m
-CONFIG_WAFER_WDT=m
-CONFIG_I6300ESB_WDT=m
-CONFIG_IE6XX_WDT=m
-CONFIG_ITCO_WDT=m
-CONFIG_ITCO_VENDOR_SUPPORT=y
-CONFIG_IT8712F_WDT=m
-CONFIG_IT87_WDT=m
-CONFIG_HP_WATCHDOG=m
-CONFIG_HPWDT_NMI_DECODING=y
-CONFIG_KEMPLD_WDT=m
-CONFIG_SC1200_WDT=m
-CONFIG_PC87413_WDT=m
-CONFIG_NV_TCO=m
-CONFIG_60XX_WDT=m
-CONFIG_CPU5_WDT=m
-CONFIG_SMSC_SCH311X_WDT=m
-CONFIG_SMSC37B787_WDT=m
-CONFIG_TQMX86_WDT=m
-CONFIG_VIA_WDT=m
-CONFIG_W83627HF_WDT=m
-CONFIG_W83877F_WDT=m
-CONFIG_W83977F_WDT=m
-CONFIG_MACHZ_WDT=m
-CONFIG_SBC_EPX_C3_WATCHDOG=m
-CONFIG_INTEL_MEI_WDT=m
-CONFIG_NI903X_WDT=m
-CONFIG_NIC7018_WDT=m
-CONFIG_MEN_A21_WDT=m
-CONFIG_XEN_WDT=m
-
-#
-# PCI-based Watchdog Cards
-#
-CONFIG_PCIPCWATCHDOG=m
-CONFIG_WDTPCI=m
-
-#
-# USB-based Watchdog Cards
-#
-CONFIG_USBPCWATCHDOG=m
-CONFIG_SSB_POSSIBLE=y
-CONFIG_SSB=m
-CONFIG_SSB_SPROM=y
-CONFIG_SSB_BLOCKIO=y
-CONFIG_SSB_PCIHOST_POSSIBLE=y
-CONFIG_SSB_PCIHOST=y
-CONFIG_SSB_B43_PCI_BRIDGE=y
-CONFIG_SSB_PCMCIAHOST_POSSIBLE=y
-CONFIG_SSB_PCMCIAHOST=y
-CONFIG_SSB_SDIOHOST_POSSIBLE=y
-CONFIG_SSB_SDIOHOST=y
-CONFIG_SSB_DRIVER_PCICORE_POSSIBLE=y
-CONFIG_SSB_DRIVER_PCICORE=y
-CONFIG_SSB_DRIVER_GPIO=y
-CONFIG_BCMA_POSSIBLE=y
-CONFIG_BCMA=m
-CONFIG_BCMA_BLOCKIO=y
-CONFIG_BCMA_HOST_PCI_POSSIBLE=y
-CONFIG_BCMA_HOST_PCI=y
-# CONFIG_BCMA_HOST_SOC is not set
-CONFIG_BCMA_DRIVER_PCI=y
-CONFIG_BCMA_DRIVER_GMAC_CMN=y
-CONFIG_BCMA_DRIVER_GPIO=y
-# CONFIG_BCMA_DEBUG is not set
-
-#
-# Multifunction device drivers
-#
-CONFIG_MFD_CORE=y
-CONFIG_MFD_ACT8945A=m
-CONFIG_MFD_AS3711=y
-CONFIG_MFD_AS3722=m
-CONFIG_PMIC_ADP5520=y
-CONFIG_MFD_AAT2870_CORE=y
-CONFIG_MFD_ATMEL_FLEXCOM=m
-CONFIG_MFD_ATMEL_HLCDC=m
-CONFIG_MFD_BCM590XX=m
-CONFIG_MFD_BD9571MWV=m
-CONFIG_MFD_AXP20X=m
-CONFIG_MFD_AXP20X_I2C=m
-CONFIG_MFD_CROS_EC_DEV=m
-CONFIG_MFD_MADERA=m
-CONFIG_MFD_MADERA_I2C=m
-CONFIG_MFD_MADERA_SPI=m
-CONFIG_MFD_CS47L15=y
-CONFIG_MFD_CS47L35=y
-CONFIG_MFD_CS47L85=y
-CONFIG_MFD_CS47L90=y
-CONFIG_MFD_CS47L92=y
-CONFIG_PMIC_DA903X=y
-CONFIG_PMIC_DA9052=y
-CONFIG_MFD_DA9052_SPI=y
-CONFIG_MFD_DA9052_I2C=y
-CONFIG_MFD_DA9055=y
-CONFIG_MFD_DA9062=m
-CONFIG_MFD_DA9063=m
-CONFIG_MFD_DA9150=m
-CONFIG_MFD_DLN2=m
-CONFIG_MFD_GATEWORKS_GSC=m
-CONFIG_MFD_MC13XXX=m
-CONFIG_MFD_MC13XXX_SPI=m
-CONFIG_MFD_MC13XXX_I2C=m
-CONFIG_MFD_MP2629=m
-CONFIG_MFD_HI6421_PMIC=m
-CONFIG_HTC_PASIC3=m
-CONFIG_HTC_I2CPLD=y
-CONFIG_MFD_INTEL_QUARK_I2C_GPIO=m
-CONFIG_LPC_ICH=m
-CONFIG_LPC_SCH=m
-CONFIG_INTEL_SOC_PMIC=y
-CONFIG_INTEL_SOC_PMIC_BXTWC=m
-CONFIG_INTEL_SOC_PMIC_CHTWC=y
-CONFIG_INTEL_SOC_PMIC_CHTDC_TI=m
-CONFIG_INTEL_SOC_PMIC_MRFLD=m
-CONFIG_MFD_INTEL_LPSS=m
-CONFIG_MFD_INTEL_LPSS_ACPI=m
-CONFIG_MFD_INTEL_LPSS_PCI=m
-CONFIG_MFD_INTEL_MSIC=y
-CONFIG_MFD_INTEL_PMC_BXT=m
-CONFIG_MFD_IQS62X=m
-CONFIG_MFD_JANZ_CMODIO=m
-CONFIG_MFD_KEMPLD=m
-CONFIG_MFD_88PM800=m
-CONFIG_MFD_88PM805=m
-CONFIG_MFD_88PM860X=y
-CONFIG_MFD_MAX14577=m
-CONFIG_MFD_MAX77620=y
-CONFIG_MFD_MAX77650=m
-CONFIG_MFD_MAX77686=m
-CONFIG_MFD_MAX77693=m
-CONFIG_MFD_MAX77843=y
-CONFIG_MFD_MAX8907=m
-CONFIG_MFD_MAX8925=y
-CONFIG_MFD_MAX8997=y
-CONFIG_MFD_MAX8998=y
-CONFIG_MFD_MT6360=m
-CONFIG_MFD_MT6397=m
-CONFIG_MFD_MENF21BMC=m
-CONFIG_EZX_PCAP=y
-CONFIG_MFD_CPCAP=m
-CONFIG_MFD_VIPERBOARD=m
-CONFIG_MFD_RETU=m
-CONFIG_MFD_PCF50633=m
-CONFIG_PCF50633_ADC=m
-CONFIG_PCF50633_GPIO=m
-CONFIG_UCB1400_CORE=m
-CONFIG_MFD_RDC321X=m
-CONFIG_MFD_RT5033=m
-CONFIG_MFD_RC5T583=y
-CONFIG_MFD_RK808=m
-CONFIG_MFD_RN5T618=m
-CONFIG_MFD_SEC_CORE=y
-CONFIG_MFD_SI476X_CORE=m
-# CONFIG_MFD_SL28CPLD is not set
-CONFIG_MFD_SM501=m
-CONFIG_MFD_SM501_GPIO=y
-CONFIG_MFD_SKY81452=m
-CONFIG_ABX500_CORE=y
-CONFIG_AB3100_CORE=y
-CONFIG_AB3100_OTP=y
-CONFIG_MFD_STMPE=y
-
-#
-# STMicroelectronics STMPE Interface Drivers
-#
-CONFIG_STMPE_I2C=y
-CONFIG_STMPE_SPI=y
-# end of STMicroelectronics STMPE Interface Drivers
-
-CONFIG_MFD_SYSCON=y
-CONFIG_MFD_TI_AM335X_TSCADC=m
-CONFIG_MFD_LP3943=m
-CONFIG_MFD_LP8788=y
-CONFIG_MFD_TI_LMU=m
-CONFIG_MFD_PALMAS=y
-CONFIG_TPS6105X=m
-CONFIG_TPS65010=m
-CONFIG_TPS6507X=m
-CONFIG_MFD_TPS65086=m
-CONFIG_MFD_TPS65090=y
-CONFIG_MFD_TPS65217=m
-CONFIG_MFD_TPS68470=y
-CONFIG_MFD_TI_LP873X=m
-CONFIG_MFD_TI_LP87565=m
-CONFIG_MFD_TPS65218=m
-CONFIG_MFD_TPS6586X=y
-CONFIG_MFD_TPS65910=y
-CONFIG_MFD_TPS65912=m
-CONFIG_MFD_TPS65912_I2C=m
-CONFIG_MFD_TPS65912_SPI=m
-CONFIG_MFD_TPS80031=y
-CONFIG_TWL4030_CORE=y
-CONFIG_MFD_TWL4030_AUDIO=y
-CONFIG_TWL6040_CORE=y
-CONFIG_MFD_WL1273_CORE=m
-CONFIG_MFD_LM3533=m
-CONFIG_MFD_TC3589X=y
-CONFIG_MFD_TQMX86=m
-CONFIG_MFD_VX855=m
-CONFIG_MFD_LOCHNAGAR=y
-CONFIG_MFD_ARIZONA=y
-CONFIG_MFD_ARIZONA_I2C=m
-CONFIG_MFD_ARIZONA_SPI=m
-CONFIG_MFD_CS47L24=y
-CONFIG_MFD_WM5102=y
-CONFIG_MFD_WM5110=y
-CONFIG_MFD_WM8997=y
-CONFIG_MFD_WM8998=y
-CONFIG_MFD_WM8400=y
-CONFIG_MFD_WM831X=y
-CONFIG_MFD_WM831X_I2C=y
-CONFIG_MFD_WM831X_SPI=y
-CONFIG_MFD_WM8350=y
-CONFIG_MFD_WM8350_I2C=y
-CONFIG_MFD_WM8994=m
-CONFIG_MFD_ROHM_BD718XX=m
-CONFIG_MFD_ROHM_BD70528=m
-CONFIG_MFD_ROHM_BD71828=m
-CONFIG_MFD_STPMIC1=m
-CONFIG_MFD_STMFX=m
-CONFIG_MFD_WCD934X=m
-CONFIG_RAVE_SP_CORE=m
-# CONFIG_MFD_INTEL_M10_BMC is not set
-# end of Multifunction device drivers
-
-CONFIG_REGULATOR=y
-# CONFIG_REGULATOR_DEBUG is not set
-CONFIG_REGULATOR_FIXED_VOLTAGE=m
-CONFIG_REGULATOR_VIRTUAL_CONSUMER=m
-CONFIG_REGULATOR_USERSPACE_CONSUMER=m
-CONFIG_REGULATOR_88PG86X=m
-CONFIG_REGULATOR_88PM800=m
-CONFIG_REGULATOR_88PM8607=m
-CONFIG_REGULATOR_ACT8865=m
-CONFIG_REGULATOR_ACT8945A=m
-CONFIG_REGULATOR_AD5398=m
-CONFIG_REGULATOR_AAT2870=m
-CONFIG_REGULATOR_AB3100=m
-CONFIG_REGULATOR_ARIZONA_LDO1=m
-CONFIG_REGULATOR_ARIZONA_MICSUPP=m
-CONFIG_REGULATOR_AS3711=m
-CONFIG_REGULATOR_AS3722=m
-CONFIG_REGULATOR_AXP20X=m
-CONFIG_REGULATOR_BCM590XX=m
-CONFIG_REGULATOR_BD70528=m
-CONFIG_REGULATOR_BD71828=m
-CONFIG_REGULATOR_BD718XX=m
-CONFIG_REGULATOR_BD9571MWV=m
-CONFIG_REGULATOR_CPCAP=m
-CONFIG_REGULATOR_CROS_EC=m
-CONFIG_REGULATOR_DA903X=m
-CONFIG_REGULATOR_DA9052=m
-CONFIG_REGULATOR_DA9055=m
-CONFIG_REGULATOR_DA9062=m
-CONFIG_REGULATOR_DA9063=m
-CONFIG_REGULATOR_DA9210=m
-CONFIG_REGULATOR_DA9211=m
-CONFIG_REGULATOR_FAN53555=m
-CONFIG_REGULATOR_FAN53880=m
-CONFIG_REGULATOR_GPIO=m
-CONFIG_REGULATOR_HI6421=m
-CONFIG_REGULATOR_HI6421V530=m
-CONFIG_REGULATOR_ISL9305=m
-CONFIG_REGULATOR_ISL6271A=m
-CONFIG_REGULATOR_LM363X=m
-CONFIG_REGULATOR_LOCHNAGAR=m
-CONFIG_REGULATOR_LP3971=m
-CONFIG_REGULATOR_LP3972=m
-CONFIG_REGULATOR_LP872X=m
-CONFIG_REGULATOR_LP873X=m
-CONFIG_REGULATOR_LP8755=m
-CONFIG_REGULATOR_LP87565=m
-CONFIG_REGULATOR_LP8788=m
-CONFIG_REGULATOR_LTC3589=m
-CONFIG_REGULATOR_LTC3676=m
-CONFIG_REGULATOR_MAX14577=m
-CONFIG_REGULATOR_MAX1586=m
-CONFIG_REGULATOR_MAX77620=m
-CONFIG_REGULATOR_MAX77650=m
-CONFIG_REGULATOR_MAX8649=m
-CONFIG_REGULATOR_MAX8660=m
-CONFIG_REGULATOR_MAX8907=m
-CONFIG_REGULATOR_MAX8925=m
-CONFIG_REGULATOR_MAX8952=m
-CONFIG_REGULATOR_MAX8973=m
-CONFIG_REGULATOR_MAX8997=m
-CONFIG_REGULATOR_MAX8998=m
-CONFIG_REGULATOR_MAX77686=m
-CONFIG_REGULATOR_MAX77693=m
-CONFIG_REGULATOR_MAX77802=m
-CONFIG_REGULATOR_MAX77826=m
-CONFIG_REGULATOR_MC13XXX_CORE=m
-CONFIG_REGULATOR_MC13783=m
-CONFIG_REGULATOR_MC13892=m
-CONFIG_REGULATOR_MCP16502=m
-CONFIG_REGULATOR_MP5416=m
-CONFIG_REGULATOR_MP8859=m
-CONFIG_REGULATOR_MP886X=m
-CONFIG_REGULATOR_MPQ7920=m
-CONFIG_REGULATOR_MT6311=m
-CONFIG_REGULATOR_MT6323=m
-CONFIG_REGULATOR_MT6358=m
-# CONFIG_REGULATOR_MT6360 is not set
-CONFIG_REGULATOR_MT6397=m
-CONFIG_REGULATOR_PALMAS=m
-CONFIG_REGULATOR_PCA9450=m
-CONFIG_REGULATOR_PCAP=m
-CONFIG_REGULATOR_PCF50633=m
-CONFIG_REGULATOR_PFUZE100=m
-CONFIG_REGULATOR_PV88060=m
-CONFIG_REGULATOR_PV88080=m
-CONFIG_REGULATOR_PV88090=m
-CONFIG_REGULATOR_PWM=m
-CONFIG_REGULATOR_QCOM_SPMI=m
-CONFIG_REGULATOR_QCOM_USB_VBUS=m
-# CONFIG_REGULATOR_RASPBERRYPI_TOUCHSCREEN_ATTINY is not set
-CONFIG_REGULATOR_RC5T583=m
-CONFIG_REGULATOR_RK808=m
-CONFIG_REGULATOR_RN5T618=m
-CONFIG_REGULATOR_ROHM=m
-# CONFIG_REGULATOR_RT4801 is not set
-CONFIG_REGULATOR_RT5033=m
-# CONFIG_REGULATOR_RTMV20 is not set
-CONFIG_REGULATOR_S2MPA01=m
-CONFIG_REGULATOR_S2MPS11=m
-CONFIG_REGULATOR_S5M8767=m
-CONFIG_REGULATOR_SKY81452=m
-CONFIG_REGULATOR_SLG51000=m
-CONFIG_REGULATOR_STPMIC1=m
-CONFIG_REGULATOR_SY8106A=m
-CONFIG_REGULATOR_SY8824X=m
-CONFIG_REGULATOR_SY8827N=m
-CONFIG_REGULATOR_TPS51632=m
-CONFIG_REGULATOR_TPS6105X=m
-CONFIG_REGULATOR_TPS62360=m
-CONFIG_REGULATOR_TPS65023=m
-CONFIG_REGULATOR_TPS6507X=m
-CONFIG_REGULATOR_TPS65086=m
-CONFIG_REGULATOR_TPS65090=m
-CONFIG_REGULATOR_TPS65132=m
-CONFIG_REGULATOR_TPS65217=m
-CONFIG_REGULATOR_TPS65218=m
-CONFIG_REGULATOR_TPS6524X=m
-CONFIG_REGULATOR_TPS6586X=m
-CONFIG_REGULATOR_TPS65910=m
-CONFIG_REGULATOR_TPS65912=m
-CONFIG_REGULATOR_TPS80031=m
-CONFIG_REGULATOR_TWL4030=m
-CONFIG_REGULATOR_VCTRL=m
-CONFIG_REGULATOR_WM831X=m
-CONFIG_REGULATOR_WM8350=m
-CONFIG_REGULATOR_WM8400=m
-CONFIG_REGULATOR_WM8994=m
-CONFIG_REGULATOR_QCOM_LABIBB=m
-CONFIG_RC_CORE=m
-CONFIG_RC_MAP=m
-CONFIG_LIRC=y
-CONFIG_RC_DECODERS=y
-CONFIG_IR_NEC_DECODER=m
-CONFIG_IR_RC5_DECODER=m
-CONFIG_IR_RC6_DECODER=m
-CONFIG_IR_JVC_DECODER=m
-CONFIG_IR_SONY_DECODER=m
-CONFIG_IR_SANYO_DECODER=m
-CONFIG_IR_SHARP_DECODER=m
-CONFIG_IR_MCE_KBD_DECODER=m
-CONFIG_IR_XMP_DECODER=m
-CONFIG_IR_IMON_DECODER=m
-CONFIG_IR_RCMM_DECODER=m
-CONFIG_RC_DEVICES=y
-CONFIG_RC_ATI_REMOTE=m
-CONFIG_IR_ENE=m
-CONFIG_IR_HIX5HD2=m
-CONFIG_IR_IMON=m
-CONFIG_IR_IMON_RAW=m
-CONFIG_IR_MCEUSB=m
-CONFIG_IR_ITE_CIR=m
-CONFIG_IR_FINTEK=m
-CONFIG_IR_NUVOTON=m
-CONFIG_IR_REDRAT3=m
-CONFIG_IR_SPI=m
-CONFIG_IR_STREAMZAP=m
-CONFIG_IR_WINBOND_CIR=m
-CONFIG_IR_IGORPLUGUSB=m
-CONFIG_IR_IGUANA=m
-CONFIG_IR_TTUSBIR=m
-CONFIG_RC_LOOPBACK=m
-CONFIG_IR_GPIO_CIR=m
-CONFIG_IR_GPIO_TX=m
-CONFIG_IR_PWM_TX=m
-CONFIG_IR_SERIAL=m
-CONFIG_IR_SERIAL_TRANSMITTER=y
-CONFIG_IR_SIR=m
-CONFIG_RC_XBOX_DVD=m
-CONFIG_IR_TOY=m
-CONFIG_CEC_CORE=m
-CONFIG_CEC_NOTIFIER=y
-CONFIG_CEC_PIN=y
-CONFIG_MEDIA_CEC_RC=y
-# CONFIG_CEC_PIN_ERROR_INJ is not set
-CONFIG_MEDIA_CEC_SUPPORT=y
-CONFIG_CEC_CH7322=m
-CONFIG_CEC_CROS_EC=m
-CONFIG_CEC_GPIO=m
-CONFIG_CEC_SECO=m
-CONFIG_CEC_SECO_RC=y
-CONFIG_USB_PULSE8_CEC=m
-CONFIG_USB_RAINSHADOW_CEC=m
-CONFIG_MEDIA_SUPPORT=m
-# CONFIG_MEDIA_SUPPORT_FILTER is not set
-CONFIG_MEDIA_SUBDRV_AUTOSELECT=y
-
-#
-# Media device types
-#
-CONFIG_MEDIA_CAMERA_SUPPORT=y
-CONFIG_MEDIA_ANALOG_TV_SUPPORT=y
-CONFIG_MEDIA_DIGITAL_TV_SUPPORT=y
-CONFIG_MEDIA_RADIO_SUPPORT=y
-CONFIG_MEDIA_SDR_SUPPORT=y
-CONFIG_MEDIA_PLATFORM_SUPPORT=y
-CONFIG_MEDIA_TEST_SUPPORT=y
-# end of Media device types
-
-#
-# Media core support
-#
-CONFIG_VIDEO_DEV=m
-CONFIG_MEDIA_CONTROLLER=y
-CONFIG_DVB_CORE=m
-# end of Media core support
-
-#
-# Video4Linux options
-#
-CONFIG_VIDEO_V4L2=m
-CONFIG_VIDEO_V4L2_I2C=y
-CONFIG_VIDEO_V4L2_SUBDEV_API=y
-# CONFIG_VIDEO_ADV_DEBUG is not set
-# CONFIG_VIDEO_FIXED_MINOR_RANGES is not set
-CONFIG_VIDEO_TUNER=m
-CONFIG_V4L2_MEM2MEM_DEV=m
-CONFIG_V4L2_FLASH_LED_CLASS=m
-CONFIG_V4L2_FWNODE=m
-CONFIG_VIDEOBUF_GEN=m
-CONFIG_VIDEOBUF_DMA_SG=m
-CONFIG_VIDEOBUF_VMALLOC=m
-# end of Video4Linux options
-
-#
-# Media controller options
-#
-CONFIG_MEDIA_CONTROLLER_DVB=y
-CONFIG_MEDIA_CONTROLLER_REQUEST_API=y
-
-#
-# Please notice that the enabled Media controller Request API is EXPERIMENTAL
-#
-# end of Media controller options
-
-#
-# Digital TV options
-#
-CONFIG_DVB_MMAP=y
-CONFIG_DVB_NET=y
-CONFIG_DVB_MAX_ADAPTERS=16
-# CONFIG_DVB_DYNAMIC_MINORS is not set
-# CONFIG_DVB_DEMUX_SECTION_LOSS_LOG is not set
-# CONFIG_DVB_ULE_DEBUG is not set
-# end of Digital TV options
-
-#
-# Media drivers
-#
-CONFIG_TTPCI_EEPROM=m
-CONFIG_MEDIA_USB_SUPPORT=y
-
-#
-# Webcam devices
-#
-CONFIG_USB_VIDEO_CLASS=m
-CONFIG_USB_VIDEO_CLASS_INPUT_EVDEV=y
-CONFIG_USB_GSPCA=m
-CONFIG_USB_M5602=m
-CONFIG_USB_STV06XX=m
-CONFIG_USB_GL860=m
-CONFIG_USB_GSPCA_BENQ=m
-CONFIG_USB_GSPCA_CONEX=m
-CONFIG_USB_GSPCA_CPIA1=m
-CONFIG_USB_GSPCA_DTCS033=m
-CONFIG_USB_GSPCA_ETOMS=m
-CONFIG_USB_GSPCA_FINEPIX=m
-CONFIG_USB_GSPCA_JEILINJ=m
-CONFIG_USB_GSPCA_JL2005BCD=m
-CONFIG_USB_GSPCA_KINECT=m
-CONFIG_USB_GSPCA_KONICA=m
-CONFIG_USB_GSPCA_MARS=m
-CONFIG_USB_GSPCA_MR97310A=m
-CONFIG_USB_GSPCA_NW80X=m
-CONFIG_USB_GSPCA_OV519=m
-CONFIG_USB_GSPCA_OV534=m
-CONFIG_USB_GSPCA_OV534_9=m
-CONFIG_USB_GSPCA_PAC207=m
-CONFIG_USB_GSPCA_PAC7302=m
-CONFIG_USB_GSPCA_PAC7311=m
-CONFIG_USB_GSPCA_SE401=m
-CONFIG_USB_GSPCA_SN9C2028=m
-CONFIG_USB_GSPCA_SN9C20X=m
-CONFIG_USB_GSPCA_SONIXB=m
-CONFIG_USB_GSPCA_SONIXJ=m
-CONFIG_USB_GSPCA_SPCA500=m
-CONFIG_USB_GSPCA_SPCA501=m
-CONFIG_USB_GSPCA_SPCA505=m
-CONFIG_USB_GSPCA_SPCA506=m
-CONFIG_USB_GSPCA_SPCA508=m
-CONFIG_USB_GSPCA_SPCA561=m
-CONFIG_USB_GSPCA_SPCA1528=m
-CONFIG_USB_GSPCA_SQ905=m
-CONFIG_USB_GSPCA_SQ905C=m
-CONFIG_USB_GSPCA_SQ930X=m
-CONFIG_USB_GSPCA_STK014=m
-CONFIG_USB_GSPCA_STK1135=m
-CONFIG_USB_GSPCA_STV0680=m
-CONFIG_USB_GSPCA_SUNPLUS=m
-CONFIG_USB_GSPCA_T613=m
-CONFIG_USB_GSPCA_TOPRO=m
-CONFIG_USB_GSPCA_TOUPTEK=m
-CONFIG_USB_GSPCA_TV8532=m
-CONFIG_USB_GSPCA_VC032X=m
-CONFIG_USB_GSPCA_VICAM=m
-CONFIG_USB_GSPCA_XIRLINK_CIT=m
-CONFIG_USB_GSPCA_ZC3XX=m
-CONFIG_USB_PWC=m
-# CONFIG_USB_PWC_DEBUG is not set
-CONFIG_USB_PWC_INPUT_EVDEV=y
-CONFIG_VIDEO_CPIA2=m
-CONFIG_USB_ZR364XX=m
-CONFIG_USB_STKWEBCAM=m
-CONFIG_USB_S2255=m
-CONFIG_VIDEO_USBTV=m
-
-#
-# Analog TV USB devices
-#
-CONFIG_VIDEO_PVRUSB2=m
-CONFIG_VIDEO_PVRUSB2_SYSFS=y
-CONFIG_VIDEO_PVRUSB2_DVB=y
-# CONFIG_VIDEO_PVRUSB2_DEBUGIFC is not set
-CONFIG_VIDEO_HDPVR=m
-CONFIG_VIDEO_STK1160_COMMON=m
-CONFIG_VIDEO_STK1160=m
-CONFIG_VIDEO_GO7007=m
-CONFIG_VIDEO_GO7007_USB=m
-CONFIG_VIDEO_GO7007_LOADER=m
-CONFIG_VIDEO_GO7007_USB_S2250_BOARD=m
-
-#
-# Analog/digital TV USB devices
-#
-CONFIG_VIDEO_AU0828=m
-CONFIG_VIDEO_AU0828_V4L2=y
-CONFIG_VIDEO_AU0828_RC=y
-CONFIG_VIDEO_CX231XX=m
-CONFIG_VIDEO_CX231XX_RC=y
-CONFIG_VIDEO_CX231XX_ALSA=m
-CONFIG_VIDEO_CX231XX_DVB=m
-CONFIG_VIDEO_TM6000=m
-CONFIG_VIDEO_TM6000_ALSA=m
-CONFIG_VIDEO_TM6000_DVB=m
-
-#
-# Digital TV USB devices
-#
-CONFIG_DVB_USB=m
-# CONFIG_DVB_USB_DEBUG is not set
-CONFIG_DVB_USB_DIB3000MC=m
-CONFIG_DVB_USB_A800=m
-CONFIG_DVB_USB_DIBUSB_MB=m
-CONFIG_DVB_USB_DIBUSB_MB_FAULTY=y
-CONFIG_DVB_USB_DIBUSB_MC=m
-CONFIG_DVB_USB_DIB0700=m
-CONFIG_DVB_USB_UMT_010=m
-CONFIG_DVB_USB_CXUSB=m
-CONFIG_DVB_USB_CXUSB_ANALOG=y
-CONFIG_DVB_USB_M920X=m
-CONFIG_DVB_USB_DIGITV=m
-CONFIG_DVB_USB_VP7045=m
-CONFIG_DVB_USB_VP702X=m
-CONFIG_DVB_USB_GP8PSK=m
-CONFIG_DVB_USB_NOVA_T_USB2=m
-CONFIG_DVB_USB_TTUSB2=m
-CONFIG_DVB_USB_DTT200U=m
-CONFIG_DVB_USB_OPERA1=m
-CONFIG_DVB_USB_AF9005=m
-CONFIG_DVB_USB_AF9005_REMOTE=m
-CONFIG_DVB_USB_PCTV452E=m
-CONFIG_DVB_USB_DW2102=m
-CONFIG_DVB_USB_CINERGY_T2=m
-CONFIG_DVB_USB_DTV5100=m
-CONFIG_DVB_USB_AZ6027=m
-CONFIG_DVB_USB_TECHNISAT_USB2=m
-CONFIG_DVB_USB_V2=m
-CONFIG_DVB_USB_AF9015=m
-CONFIG_DVB_USB_AF9035=m
-CONFIG_DVB_USB_ANYSEE=m
-CONFIG_DVB_USB_AU6610=m
-CONFIG_DVB_USB_AZ6007=m
-CONFIG_DVB_USB_CE6230=m
-CONFIG_DVB_USB_EC168=m
-CONFIG_DVB_USB_GL861=m
-CONFIG_DVB_USB_LME2510=m
-CONFIG_DVB_USB_MXL111SF=m
-CONFIG_DVB_USB_RTL28XXU=m
-CONFIG_DVB_USB_DVBSKY=m
-CONFIG_DVB_USB_ZD1301=m
-CONFIG_DVB_TTUSB_BUDGET=m
-CONFIG_DVB_TTUSB_DEC=m
-CONFIG_SMS_USB_DRV=m
-CONFIG_DVB_B2C2_FLEXCOP_USB=m
-# CONFIG_DVB_B2C2_FLEXCOP_USB_DEBUG is not set
-CONFIG_DVB_AS102=m
-
-#
-# Webcam, TV (analog/digital) USB devices
-#
-CONFIG_VIDEO_EM28XX=m
-CONFIG_VIDEO_EM28XX_V4L2=m
-CONFIG_VIDEO_EM28XX_ALSA=m
-CONFIG_VIDEO_EM28XX_DVB=m
-CONFIG_VIDEO_EM28XX_RC=m
-
-#
-# Software defined radio USB devices
-#
-CONFIG_USB_AIRSPY=m
-CONFIG_USB_HACKRF=m
-CONFIG_USB_MSI2500=m
-CONFIG_MEDIA_PCI_SUPPORT=y
-
-#
-# Media capture support
-#
-CONFIG_VIDEO_MEYE=m
-CONFIG_VIDEO_SOLO6X10=m
-CONFIG_VIDEO_TW5864=m
-CONFIG_VIDEO_TW68=m
-CONFIG_VIDEO_TW686X=m
-
-#
-# Media capture/analog TV support
-#
-CONFIG_VIDEO_IVTV=m
-# CONFIG_VIDEO_IVTV_DEPRECATED_IOCTLS is not set
-CONFIG_VIDEO_IVTV_ALSA=m
-CONFIG_VIDEO_FB_IVTV=m
-# CONFIG_VIDEO_FB_IVTV_FORCE_PAT is not set
-CONFIG_VIDEO_HEXIUM_GEMINI=m
-CONFIG_VIDEO_HEXIUM_ORION=m
-CONFIG_VIDEO_MXB=m
-CONFIG_VIDEO_DT3155=m
-
-#
-# Media capture/analog/hybrid TV support
-#
-CONFIG_VIDEO_CX18=m
-CONFIG_VIDEO_CX18_ALSA=m
-CONFIG_VIDEO_CX23885=m
-CONFIG_MEDIA_ALTERA_CI=m
-CONFIG_VIDEO_CX25821=m
-CONFIG_VIDEO_CX25821_ALSA=m
-CONFIG_VIDEO_CX88=m
-CONFIG_VIDEO_CX88_ALSA=m
-CONFIG_VIDEO_CX88_BLACKBIRD=m
-CONFIG_VIDEO_CX88_DVB=m
-CONFIG_VIDEO_CX88_ENABLE_VP3054=y
-CONFIG_VIDEO_CX88_VP3054=m
-CONFIG_VIDEO_CX88_MPEG=m
-CONFIG_VIDEO_BT848=m
-CONFIG_DVB_BT8XX=m
-CONFIG_VIDEO_SAA7134=m
-CONFIG_VIDEO_SAA7134_ALSA=m
-CONFIG_VIDEO_SAA7134_RC=y
-CONFIG_VIDEO_SAA7134_DVB=m
-CONFIG_VIDEO_SAA7134_GO7007=m
-CONFIG_VIDEO_SAA7164=m
-
-#
-# Media digital TV PCI Adapters
-#
-CONFIG_DVB_AV7110_IR=y
-CONFIG_DVB_AV7110=m
-CONFIG_DVB_AV7110_OSD=y
-CONFIG_DVB_BUDGET_CORE=m
-CONFIG_DVB_BUDGET=m
-CONFIG_DVB_BUDGET_CI=m
-CONFIG_DVB_BUDGET_AV=m
-CONFIG_DVB_BUDGET_PATCH=m
-CONFIG_DVB_B2C2_FLEXCOP_PCI=m
-# CONFIG_DVB_B2C2_FLEXCOP_PCI_DEBUG is not set
-CONFIG_DVB_PLUTO2=m
-CONFIG_DVB_DM1105=m
-CONFIG_DVB_PT1=m
-CONFIG_DVB_PT3=m
-CONFIG_MANTIS_CORE=m
-CONFIG_DVB_MANTIS=m
-CONFIG_DVB_HOPPER=m
-CONFIG_DVB_NGENE=m
-CONFIG_DVB_DDBRIDGE=m
-# CONFIG_DVB_DDBRIDGE_MSIENABLE is not set
-CONFIG_DVB_SMIPCIE=m
-CONFIG_DVB_NETUP_UNIDVB=m
-CONFIG_VIDEO_IPU3_CIO2=m
-CONFIG_RADIO_ADAPTERS=y
-CONFIG_RADIO_TEA575X=m
-CONFIG_RADIO_SI470X=m
-CONFIG_USB_SI470X=m
-CONFIG_I2C_SI470X=m
-CONFIG_RADIO_SI4713=m
-CONFIG_USB_SI4713=m
-CONFIG_PLATFORM_SI4713=m
-CONFIG_I2C_SI4713=m
-CONFIG_RADIO_SI476X=m
-CONFIG_USB_MR800=m
-CONFIG_USB_DSBR=m
-CONFIG_RADIO_MAXIRADIO=m
-CONFIG_RADIO_SHARK=m
-CONFIG_RADIO_SHARK2=m
-CONFIG_USB_KEENE=m
-CONFIG_USB_RAREMONO=m
-CONFIG_USB_MA901=m
-CONFIG_RADIO_TEA5764=m
-CONFIG_RADIO_SAA7706H=m
-CONFIG_RADIO_TEF6862=m
-CONFIG_RADIO_WL1273=m
-CONFIG_RADIO_WL128X=m
-CONFIG_MEDIA_COMMON_OPTIONS=y
-
-#
-# common driver options
-#
-CONFIG_VIDEO_CX2341X=m
-CONFIG_VIDEO_TVEEPROM=m
-CONFIG_CYPRESS_FIRMWARE=m
-CONFIG_VIDEOBUF2_CORE=m
-CONFIG_VIDEOBUF2_V4L2=m
-CONFIG_VIDEOBUF2_MEMOPS=m
-CONFIG_VIDEOBUF2_DMA_CONTIG=m
-CONFIG_VIDEOBUF2_VMALLOC=m
-CONFIG_VIDEOBUF2_DMA_SG=m
-CONFIG_VIDEOBUF2_DVB=m
-CONFIG_DVB_B2C2_FLEXCOP=m
-CONFIG_VIDEO_SAA7146=m
-CONFIG_VIDEO_SAA7146_VV=m
-CONFIG_SMS_SIANO_MDTV=m
-CONFIG_SMS_SIANO_RC=y
-# CONFIG_SMS_SIANO_DEBUGFS is not set
-CONFIG_VIDEO_V4L2_TPG=m
-CONFIG_V4L_PLATFORM_DRIVERS=y
-CONFIG_VIDEO_CAFE_CCIC=m
-CONFIG_VIDEO_CADENCE=y
-CONFIG_VIDEO_CADENCE_CSI2RX=m
-CONFIG_VIDEO_CADENCE_CSI2TX=m
-CONFIG_VIDEO_ASPEED=m
-CONFIG_VIDEO_MUX=m
-CONFIG_VIDEO_XILINX=m
-# CONFIG_VIDEO_XILINX_CSI2RXSS is not set
-CONFIG_VIDEO_XILINX_TPG=m
-CONFIG_VIDEO_XILINX_VTC=m
-CONFIG_V4L_MEM2MEM_DRIVERS=y
-CONFIG_VIDEO_MEM2MEM_DEINTERLACE=m
-CONFIG_DVB_PLATFORM_DRIVERS=y
-CONFIG_SDR_PLATFORM_DRIVERS=y
-
-#
-# MMC/SDIO DVB adapters
-#
-CONFIG_SMS_SDIO_DRV=m
-CONFIG_V4L_TEST_DRIVERS=y
-CONFIG_VIDEO_VIMC=m
-CONFIG_VIDEO_VIVID=m
-CONFIG_VIDEO_VIVID_CEC=y
-CONFIG_VIDEO_VIVID_MAX_DEVS=64
-CONFIG_VIDEO_VIM2M=m
-CONFIG_VIDEO_VICODEC=m
-# CONFIG_DVB_TEST_DRIVERS is not set
-
-#
-# FireWire (IEEE 1394) Adapters
-#
-CONFIG_DVB_FIREDTV=m
-CONFIG_DVB_FIREDTV_INPUT=y
-# end of Media drivers
-
-#
-# Media ancillary drivers
-#
-CONFIG_MEDIA_ATTACH=y
-
-#
-# IR I2C driver auto-selected by 'Autoselect ancillary drivers'
-#
-CONFIG_VIDEO_IR_I2C=m
-
-#
-# Audio decoders, processors and mixers
-#
-CONFIG_VIDEO_TVAUDIO=m
-CONFIG_VIDEO_TDA7432=m
-CONFIG_VIDEO_TDA9840=m
-CONFIG_VIDEO_TDA1997X=m
-CONFIG_VIDEO_TEA6415C=m
-CONFIG_VIDEO_TEA6420=m
-CONFIG_VIDEO_MSP3400=m
-CONFIG_VIDEO_CS3308=m
-CONFIG_VIDEO_CS5345=m
-CONFIG_VIDEO_CS53L32A=m
-CONFIG_VIDEO_TLV320AIC23B=m
-CONFIG_VIDEO_UDA1342=m
-CONFIG_VIDEO_WM8775=m
-CONFIG_VIDEO_WM8739=m
-CONFIG_VIDEO_VP27SMPX=m
-CONFIG_VIDEO_SONY_BTF_MPX=m
-# end of Audio decoders, processors and mixers
-
-#
-# RDS decoders
-#
-CONFIG_VIDEO_SAA6588=m
-# end of RDS decoders
-
-#
-# Video decoders
-#
-CONFIG_VIDEO_ADV7180=m
-CONFIG_VIDEO_ADV7183=m
-CONFIG_VIDEO_ADV748X=m
-CONFIG_VIDEO_ADV7604=m
-CONFIG_VIDEO_ADV7604_CEC=y
-CONFIG_VIDEO_ADV7842=m
-CONFIG_VIDEO_ADV7842_CEC=y
-CONFIG_VIDEO_BT819=m
-CONFIG_VIDEO_BT856=m
-CONFIG_VIDEO_BT866=m
-CONFIG_VIDEO_KS0127=m
-CONFIG_VIDEO_ML86V7667=m
-CONFIG_VIDEO_SAA7110=m
-CONFIG_VIDEO_SAA711X=m
-CONFIG_VIDEO_TC358743=m
-CONFIG_VIDEO_TC358743_CEC=y
-CONFIG_VIDEO_TVP514X=m
-CONFIG_VIDEO_TVP5150=m
-CONFIG_VIDEO_TVP7002=m
-CONFIG_VIDEO_TW2804=m
-CONFIG_VIDEO_TW9903=m
-CONFIG_VIDEO_TW9906=m
-CONFIG_VIDEO_TW9910=m
-CONFIG_VIDEO_VPX3220=m
-# CONFIG_VIDEO_MAX9286 is not set
-
-#
-# Video and audio decoders
-#
-CONFIG_VIDEO_SAA717X=m
-CONFIG_VIDEO_CX25840=m
-# end of Video decoders
-
-#
-# Video encoders
-#
-CONFIG_VIDEO_SAA7127=m
-CONFIG_VIDEO_SAA7185=m
-CONFIG_VIDEO_ADV7170=m
-CONFIG_VIDEO_ADV7175=m
-CONFIG_VIDEO_ADV7343=m
-CONFIG_VIDEO_ADV7393=m
-CONFIG_VIDEO_AD9389B=m
-CONFIG_VIDEO_AK881X=m
-CONFIG_VIDEO_THS8200=m
-# end of Video encoders
-
-#
-# Video improvement chips
-#
-CONFIG_VIDEO_UPD64031A=m
-CONFIG_VIDEO_UPD64083=m
-# end of Video improvement chips
-
-#
-# Audio/Video compression chips
-#
-CONFIG_VIDEO_SAA6752HS=m
-# end of Audio/Video compression chips
-
-#
-# SDR tuner chips
-#
-CONFIG_SDR_MAX2175=m
-# end of SDR tuner chips
-
-#
-# Miscellaneous helper chips
-#
-CONFIG_VIDEO_THS7303=m
-CONFIG_VIDEO_M52790=m
-CONFIG_VIDEO_I2C=m
-CONFIG_VIDEO_ST_MIPID02=m
-# end of Miscellaneous helper chips
-
-#
-# Camera sensor devices
-#
-CONFIG_VIDEO_APTINA_PLL=m
-CONFIG_VIDEO_SMIAPP_PLL=m
-CONFIG_VIDEO_HI556=m
-CONFIG_VIDEO_IMX214=m
-CONFIG_VIDEO_IMX219=m
-CONFIG_VIDEO_IMX258=m
-CONFIG_VIDEO_IMX274=m
-CONFIG_VIDEO_IMX290=m
-CONFIG_VIDEO_IMX319=m
-CONFIG_VIDEO_IMX355=m
-CONFIG_VIDEO_OV2640=m
-CONFIG_VIDEO_OV2659=m
-CONFIG_VIDEO_OV2680=m
-CONFIG_VIDEO_OV2685=m
-CONFIG_VIDEO_OV2740=m
-CONFIG_VIDEO_OV5640=m
-CONFIG_VIDEO_OV5645=m
-CONFIG_VIDEO_OV5647=m
-CONFIG_VIDEO_OV6650=m
-CONFIG_VIDEO_OV5670=m
-CONFIG_VIDEO_OV5675=m
-CONFIG_VIDEO_OV5695=m
-CONFIG_VIDEO_OV7251=m
-CONFIG_VIDEO_OV772X=m
-CONFIG_VIDEO_OV7640=m
-CONFIG_VIDEO_OV7670=m
-CONFIG_VIDEO_OV7740=m
-CONFIG_VIDEO_OV8856=m
-CONFIG_VIDEO_OV9640=m
-CONFIG_VIDEO_OV9650=m
-CONFIG_VIDEO_OV13858=m
-CONFIG_VIDEO_VS6624=m
-CONFIG_VIDEO_MT9M001=m
-CONFIG_VIDEO_MT9M032=m
-CONFIG_VIDEO_MT9M111=m
-CONFIG_VIDEO_MT9P031=m
-CONFIG_VIDEO_MT9T001=m
-CONFIG_VIDEO_MT9T112=m
-CONFIG_VIDEO_MT9V011=m
-CONFIG_VIDEO_MT9V032=m
-CONFIG_VIDEO_MT9V111=m
-CONFIG_VIDEO_SR030PC30=m
-CONFIG_VIDEO_NOON010PC30=m
-CONFIG_VIDEO_M5MOLS=m
-# CONFIG_VIDEO_RDACM20 is not set
-CONFIG_VIDEO_RJ54N1=m
-CONFIG_VIDEO_S5K6AA=m
-CONFIG_VIDEO_S5K6A3=m
-CONFIG_VIDEO_S5K4ECGX=m
-CONFIG_VIDEO_S5K5BAF=m
-CONFIG_VIDEO_SMIAPP=m
-CONFIG_VIDEO_ET8EK8=m
-CONFIG_VIDEO_S5C73M3=m
-# end of Camera sensor devices
-
-#
-# Lens drivers
-#
-CONFIG_VIDEO_AD5820=m
-CONFIG_VIDEO_AK7375=m
-CONFIG_VIDEO_DW9714=m
-CONFIG_VIDEO_DW9768=m
-CONFIG_VIDEO_DW9807_VCM=m
-# end of Lens drivers
-
-#
-# Flash devices
-#
-CONFIG_VIDEO_ADP1653=m
-CONFIG_VIDEO_LM3560=m
-CONFIG_VIDEO_LM3646=m
-# end of Flash devices
-
-#
-# SPI helper chips
-#
-CONFIG_VIDEO_GS1662=m
-# end of SPI helper chips
-
-#
-# Media SPI Adapters
-#
-CONFIG_CXD2880_SPI_DRV=m
-# end of Media SPI Adapters
-
-CONFIG_MEDIA_TUNER=m
-
-#
-# Customize TV tuners
-#
-CONFIG_MEDIA_TUNER_SIMPLE=m
-CONFIG_MEDIA_TUNER_TDA18250=m
-CONFIG_MEDIA_TUNER_TDA8290=m
-CONFIG_MEDIA_TUNER_TDA827X=m
-CONFIG_MEDIA_TUNER_TDA18271=m
-CONFIG_MEDIA_TUNER_TDA9887=m
-CONFIG_MEDIA_TUNER_TEA5761=m
-CONFIG_MEDIA_TUNER_TEA5767=m
-CONFIG_MEDIA_TUNER_MSI001=m
-CONFIG_MEDIA_TUNER_MT20XX=m
-CONFIG_MEDIA_TUNER_MT2060=m
-CONFIG_MEDIA_TUNER_MT2063=m
-CONFIG_MEDIA_TUNER_MT2266=m
-CONFIG_MEDIA_TUNER_MT2131=m
-CONFIG_MEDIA_TUNER_QT1010=m
-CONFIG_MEDIA_TUNER_XC2028=m
-CONFIG_MEDIA_TUNER_XC5000=m
-CONFIG_MEDIA_TUNER_XC4000=m
-CONFIG_MEDIA_TUNER_MXL5005S=m
-CONFIG_MEDIA_TUNER_MXL5007T=m
-CONFIG_MEDIA_TUNER_MC44S803=m
-CONFIG_MEDIA_TUNER_MAX2165=m
-CONFIG_MEDIA_TUNER_TDA18218=m
-CONFIG_MEDIA_TUNER_FC0011=m
-CONFIG_MEDIA_TUNER_FC0012=m
-CONFIG_MEDIA_TUNER_FC0013=m
-CONFIG_MEDIA_TUNER_TDA18212=m
-CONFIG_MEDIA_TUNER_E4000=m
-CONFIG_MEDIA_TUNER_FC2580=m
-CONFIG_MEDIA_TUNER_M88RS6000T=m
-CONFIG_MEDIA_TUNER_TUA9001=m
-CONFIG_MEDIA_TUNER_SI2157=m
-CONFIG_MEDIA_TUNER_IT913X=m
-CONFIG_MEDIA_TUNER_R820T=m
-CONFIG_MEDIA_TUNER_MXL301RF=m
-CONFIG_MEDIA_TUNER_QM1D1C0042=m
-CONFIG_MEDIA_TUNER_QM1D1B0004=m
-# end of Customize TV tuners
-
-#
-# Customise DVB Frontends
-#
-
-#
-# Multistandard (satellite) frontends
-#
-CONFIG_DVB_STB0899=m
-CONFIG_DVB_STB6100=m
-CONFIG_DVB_STV090x=m
-CONFIG_DVB_STV0910=m
-CONFIG_DVB_STV6110x=m
-CONFIG_DVB_STV6111=m
-CONFIG_DVB_MXL5XX=m
-CONFIG_DVB_M88DS3103=m
-
-#
-# Multistandard (cable + terrestrial) frontends
-#
-CONFIG_DVB_DRXK=m
-CONFIG_DVB_TDA18271C2DD=m
-CONFIG_DVB_SI2165=m
-CONFIG_DVB_MN88472=m
-CONFIG_DVB_MN88473=m
-
-#
-# DVB-S (satellite) frontends
-#
-CONFIG_DVB_CX24110=m
-CONFIG_DVB_CX24123=m
-CONFIG_DVB_MT312=m
-CONFIG_DVB_ZL10036=m
-CONFIG_DVB_ZL10039=m
-CONFIG_DVB_S5H1420=m
-CONFIG_DVB_STV0288=m
-CONFIG_DVB_STB6000=m
-CONFIG_DVB_STV0299=m
-CONFIG_DVB_STV6110=m
-CONFIG_DVB_STV0900=m
-CONFIG_DVB_TDA8083=m
-CONFIG_DVB_TDA10086=m
-CONFIG_DVB_TDA8261=m
-CONFIG_DVB_VES1X93=m
-CONFIG_DVB_TUNER_ITD1000=m
-CONFIG_DVB_TUNER_CX24113=m
-CONFIG_DVB_TDA826X=m
-CONFIG_DVB_TUA6100=m
-CONFIG_DVB_CX24116=m
-CONFIG_DVB_CX24117=m
-CONFIG_DVB_CX24120=m
-CONFIG_DVB_SI21XX=m
-CONFIG_DVB_TS2020=m
-CONFIG_DVB_DS3000=m
-CONFIG_DVB_MB86A16=m
-CONFIG_DVB_TDA10071=m
-
-#
-# DVB-T (terrestrial) frontends
-#
-CONFIG_DVB_SP8870=m
-CONFIG_DVB_SP887X=m
-CONFIG_DVB_CX22700=m
-CONFIG_DVB_CX22702=m
-CONFIG_DVB_S5H1432=m
-CONFIG_DVB_DRXD=m
-CONFIG_DVB_L64781=m
-CONFIG_DVB_TDA1004X=m
-CONFIG_DVB_NXT6000=m
-CONFIG_DVB_MT352=m
-CONFIG_DVB_ZL10353=m
-CONFIG_DVB_DIB3000MB=m
-CONFIG_DVB_DIB3000MC=m
-CONFIG_DVB_DIB7000M=m
-CONFIG_DVB_DIB7000P=m
-CONFIG_DVB_DIB9000=m
-CONFIG_DVB_TDA10048=m
-CONFIG_DVB_AF9013=m
-CONFIG_DVB_EC100=m
-CONFIG_DVB_STV0367=m
-CONFIG_DVB_CXD2820R=m
-CONFIG_DVB_CXD2841ER=m
-CONFIG_DVB_RTL2830=m
-CONFIG_DVB_RTL2832=m
-CONFIG_DVB_RTL2832_SDR=m
-CONFIG_DVB_SI2168=m
-CONFIG_DVB_AS102_FE=m
-CONFIG_DVB_ZD1301_DEMOD=m
-CONFIG_DVB_GP8PSK_FE=m
-CONFIG_DVB_CXD2880=m
-
-#
-# DVB-C (cable) frontends
-#
-CONFIG_DVB_VES1820=m
-CONFIG_DVB_TDA10021=m
-CONFIG_DVB_TDA10023=m
-CONFIG_DVB_STV0297=m
-
-#
-# ATSC (North American/Korean Terrestrial/Cable DTV) frontends
-#
-CONFIG_DVB_NXT200X=m
-CONFIG_DVB_OR51211=m
-CONFIG_DVB_OR51132=m
-CONFIG_DVB_BCM3510=m
-CONFIG_DVB_LGDT330X=m
-CONFIG_DVB_LGDT3305=m
-CONFIG_DVB_LGDT3306A=m
-CONFIG_DVB_LG2160=m
-CONFIG_DVB_S5H1409=m
-CONFIG_DVB_AU8522=m
-CONFIG_DVB_AU8522_DTV=m
-CONFIG_DVB_AU8522_V4L=m
-CONFIG_DVB_S5H1411=m
-
-#
-# ISDB-T (terrestrial) frontends
-#
-CONFIG_DVB_S921=m
-CONFIG_DVB_DIB8000=m
-CONFIG_DVB_MB86A20S=m
-
-#
-# ISDB-S (satellite) & ISDB-T (terrestrial) frontends
-#
-CONFIG_DVB_TC90522=m
-CONFIG_DVB_MN88443X=m
-
-#
-# Digital terrestrial only tuners/PLL
-#
-CONFIG_DVB_PLL=m
-CONFIG_DVB_TUNER_DIB0070=m
-CONFIG_DVB_TUNER_DIB0090=m
-
-#
-# SEC control devices for DVB-S
-#
-CONFIG_DVB_DRX39XYJ=m
-CONFIG_DVB_LNBH25=m
-CONFIG_DVB_LNBH29=m
-CONFIG_DVB_LNBP21=m
-CONFIG_DVB_LNBP22=m
-CONFIG_DVB_ISL6405=m
-CONFIG_DVB_ISL6421=m
-CONFIG_DVB_ISL6423=m
-CONFIG_DVB_A8293=m
-CONFIG_DVB_LGS8GL5=m
-CONFIG_DVB_LGS8GXX=m
-CONFIG_DVB_ATBM8830=m
-CONFIG_DVB_TDA665x=m
-CONFIG_DVB_IX2505V=m
-CONFIG_DVB_M88RS2000=m
-CONFIG_DVB_AF9033=m
-CONFIG_DVB_HORUS3A=m
-CONFIG_DVB_ASCOT2E=m
-CONFIG_DVB_HELENE=m
-
-#
-# Common Interface (EN50221) controller drivers
-#
-CONFIG_DVB_CXD2099=m
-CONFIG_DVB_SP2=m
-# end of Customise DVB Frontends
-
-#
-# Tools to develop new frontends
-#
-CONFIG_DVB_DUMMY_FE=m
-# end of Media ancillary drivers
-
-#
-# Graphics support
-#
-CONFIG_AGP=m
-CONFIG_AGP_AMD64=m
-CONFIG_AGP_INTEL=m
-CONFIG_AGP_SIS=m
-CONFIG_AGP_VIA=m
-CONFIG_INTEL_GTT=m
-CONFIG_VGA_ARB=y
-CONFIG_VGA_ARB_MAX_GPUS=10
-CONFIG_VGA_SWITCHEROO=y
-CONFIG_DRM=m
-CONFIG_DRM_MIPI_DBI=m
-CONFIG_DRM_MIPI_DSI=y
-CONFIG_DRM_DP_AUX_CHARDEV=y
-# CONFIG_DRM_DEBUG_SELFTEST is not set
-CONFIG_DRM_KMS_HELPER=m
-CONFIG_DRM_KMS_FB_HELPER=y
-# CONFIG_DRM_DEBUG_DP_MST_TOPOLOGY_REFS is not set
-CONFIG_DRM_FBDEV_EMULATION=y
-CONFIG_DRM_FBDEV_OVERALLOC=100
-# CONFIG_DRM_FBDEV_LEAK_PHYS_SMEM is not set
-CONFIG_DRM_LOAD_EDID_FIRMWARE=y
-CONFIG_DRM_DP_CEC=y
-CONFIG_DRM_TTM=m
-CONFIG_DRM_TTM_DMA_PAGE_POOL=y
-CONFIG_DRM_VRAM_HELPER=m
-CONFIG_DRM_TTM_HELPER=m
-CONFIG_DRM_GEM_CMA_HELPER=y
-CONFIG_DRM_KMS_CMA_HELPER=y
-CONFIG_DRM_GEM_SHMEM_HELPER=y
-CONFIG_DRM_SCHED=m
-
-#
-# I2C encoder or helper chips
-#
-CONFIG_DRM_I2C_CH7006=m
-CONFIG_DRM_I2C_SIL164=m
-CONFIG_DRM_I2C_NXP_TDA998X=m
-CONFIG_DRM_I2C_NXP_TDA9950=m
-# end of I2C encoder or helper chips
-
-#
-# ARM devices
-#
-CONFIG_DRM_KOMEDA=m
-# end of ARM devices
-
-CONFIG_DRM_RADEON=m
-CONFIG_DRM_RADEON_USERPTR=y
-CONFIG_DRM_AMDGPU=m
-CONFIG_DRM_AMDGPU_SI=y
-CONFIG_DRM_AMDGPU_CIK=y
-CONFIG_DRM_AMDGPU_USERPTR=y
-# CONFIG_DRM_AMDGPU_GART_DEBUGFS is not set
-
-#
-# ACP (Audio CoProcessor) Configuration
-#
-CONFIG_DRM_AMD_ACP=y
-# end of ACP (Audio CoProcessor) Configuration
-
-#
-# Display Engine Configuration
-#
-CONFIG_DRM_AMD_DC=y
-CONFIG_DRM_AMD_DC_DCN=y
-CONFIG_DRM_AMD_DC_DCN3_0=y
-CONFIG_DRM_AMD_DC_HDCP=y
-CONFIG_DRM_AMD_DC_SI=y
-# CONFIG_DEBUG_KERNEL_DC is not set
-# end of Display Engine Configuration
-
-CONFIG_HSA_AMD=y
-CONFIG_DRM_NOUVEAU=m
-# CONFIG_NOUVEAU_LEGACY_CTX_SUPPORT is not set
-CONFIG_NOUVEAU_DEBUG=5
-CONFIG_NOUVEAU_DEBUG_DEFAULT=3
-# CONFIG_NOUVEAU_DEBUG_MMU is not set
-# CONFIG_NOUVEAU_DEBUG_PUSH is not set
-CONFIG_DRM_NOUVEAU_BACKLIGHT=y
-CONFIG_DRM_NOUVEAU_SVM=y
-CONFIG_DRM_I915=m
-CONFIG_DRM_I915_FORCE_PROBE="*"
-CONFIG_DRM_I915_CAPTURE_ERROR=y
-CONFIG_DRM_I915_COMPRESS_ERROR=y
-CONFIG_DRM_I915_USERPTR=y
-CONFIG_DRM_I915_GVT=y
-CONFIG_DRM_I915_GVT_KVMGT=m
-
-#
-# drm/i915 Debugging
-#
-# CONFIG_DRM_I915_WERROR is not set
-# CONFIG_DRM_I915_DEBUG is not set
-# CONFIG_DRM_I915_DEBUG_MMIO is not set
-# CONFIG_DRM_I915_SW_FENCE_DEBUG_OBJECTS is not set
-# CONFIG_DRM_I915_SW_FENCE_CHECK_DAG is not set
-# CONFIG_DRM_I915_DEBUG_GUC is not set
-# CONFIG_DRM_I915_SELFTEST is not set
-# CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS is not set
-# CONFIG_DRM_I915_DEBUG_VBLANK_EVADE is not set
-# CONFIG_DRM_I915_DEBUG_RUNTIME_PM is not set
-# end of drm/i915 Debugging
-
-#
-# drm/i915 Profile Guided Optimisation
-#
-CONFIG_DRM_I915_FENCE_TIMEOUT=10000
-CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND=250
-CONFIG_DRM_I915_HEARTBEAT_INTERVAL=2500
-CONFIG_DRM_I915_PREEMPT_TIMEOUT=640
-CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT=8000
-CONFIG_DRM_I915_STOP_TIMEOUT=100
-CONFIG_DRM_I915_TIMESLICE_DURATION=1
-# end of drm/i915 Profile Guided Optimisation
-
-CONFIG_DRM_VGEM=m
-CONFIG_DRM_VKMS=m
-CONFIG_DRM_VMWGFX=m
-CONFIG_DRM_VMWGFX_FBCON=y
-CONFIG_DRM_GMA500=m
-CONFIG_DRM_GMA600=y
-CONFIG_DRM_GMA3600=y
-CONFIG_DRM_UDL=m
-CONFIG_DRM_AST=m
-CONFIG_DRM_MGAG200=m
-CONFIG_DRM_RCAR_DW_HDMI=m
-CONFIG_DRM_RCAR_LVDS=m
-CONFIG_DRM_QXL=m
-CONFIG_DRM_BOCHS=m
-CONFIG_DRM_VIRTIO_GPU=m
-CONFIG_DRM_PANEL=y
-
-#
-# Display Panels
-#
-CONFIG_DRM_PANEL_ARM_VERSATILE=m
-CONFIG_DRM_PANEL_ASUS_Z00T_TM5P5_NT35596=m
-CONFIG_DRM_PANEL_BOE_HIMAX8279D=m
-CONFIG_DRM_PANEL_BOE_TV101WUM_NL6=m
-CONFIG_DRM_PANEL_LVDS=m
-CONFIG_DRM_PANEL_SIMPLE=m
-CONFIG_DRM_PANEL_ELIDA_KD35T133=m
-CONFIG_DRM_PANEL_FEIXIN_K101_IM2BA02=m
-CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D=m
-CONFIG_DRM_PANEL_ILITEK_IL9322=m
-CONFIG_DRM_PANEL_ILITEK_ILI9881C=m
-CONFIG_DRM_PANEL_INNOLUX_P079ZCA=m
-CONFIG_DRM_PANEL_JDI_LT070ME05000=m
-CONFIG_DRM_PANEL_KINGDISPLAY_KD097D04=m
-CONFIG_DRM_PANEL_LEADTEK_LTK050H3146W=m
-CONFIG_DRM_PANEL_LEADTEK_LTK500HD1829=m
-CONFIG_DRM_PANEL_SAMSUNG_LD9040=m
-CONFIG_DRM_PANEL_LG_LB035Q02=m
-CONFIG_DRM_PANEL_LG_LG4573=m
-CONFIG_DRM_PANEL_NEC_NL8048HL11=m
-CONFIG_DRM_PANEL_NOVATEK_NT35510=m
-CONFIG_DRM_PANEL_NOVATEK_NT39016=m
-# CONFIG_DRM_PANEL_MANTIX_MLAF057WE51 is not set
-CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO=m
-CONFIG_DRM_PANEL_ORISETECH_OTM8009A=m
-CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS=m
-CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00=m
-CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN=m
-CONFIG_DRM_PANEL_RAYDIUM_RM67191=m
-CONFIG_DRM_PANEL_RAYDIUM_RM68200=m
-CONFIG_DRM_PANEL_RONBO_RB070D30=m
-CONFIG_DRM_PANEL_SAMSUNG_S6D16D0=m
-CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2=m
-CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03=m
-CONFIG_DRM_PANEL_SAMSUNG_S6E63M0=m
-CONFIG_DRM_PANEL_SAMSUNG_S6E63M0_SPI=m
-# CONFIG_DRM_PANEL_SAMSUNG_S6E63M0_DSI is not set
-CONFIG_DRM_PANEL_SAMSUNG_S6E88A0_AMS452EF01=m
-CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0=m
-CONFIG_DRM_PANEL_SEIKO_43WVF1G=m
-CONFIG_DRM_PANEL_SHARP_LQ101R1SX01=m
-CONFIG_DRM_PANEL_SHARP_LS037V7DW01=m
-CONFIG_DRM_PANEL_SHARP_LS043T1LE01=m
-CONFIG_DRM_PANEL_SITRONIX_ST7701=m
-# CONFIG_DRM_PANEL_SITRONIX_ST7703 is not set
-CONFIG_DRM_PANEL_SITRONIX_ST7789V=m
-CONFIG_DRM_PANEL_SONY_ACX424AKP=m
-CONFIG_DRM_PANEL_SONY_ACX565AKM=m
-CONFIG_DRM_PANEL_TPO_TD028TTEC1=m
-CONFIG_DRM_PANEL_TPO_TD043MTEA1=m
-CONFIG_DRM_PANEL_TPO_TPG110=m
-CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA=m
-CONFIG_DRM_PANEL_VISIONOX_RM69299=m
-CONFIG_DRM_PANEL_XINPENG_XPP055C272=m
-# end of Display Panels
-
-CONFIG_DRM_BRIDGE=y
-CONFIG_DRM_PANEL_BRIDGE=y
-
-#
-# Display Interface Bridges
-#
-CONFIG_DRM_CDNS_DSI=m
-CONFIG_DRM_CHRONTEL_CH7033=m
-CONFIG_DRM_DISPLAY_CONNECTOR=m
-# CONFIG_DRM_LONTIUM_LT9611 is not set
-CONFIG_DRM_LVDS_CODEC=m
-CONFIG_DRM_MEGACHIPS_STDPXXXX_GE_B850V3_FW=m
-CONFIG_DRM_NWL_MIPI_DSI=m
-CONFIG_DRM_NXP_PTN3460=m
-CONFIG_DRM_PARADE_PS8622=m
-CONFIG_DRM_PARADE_PS8640=m
-CONFIG_DRM_SIL_SII8620=m
-CONFIG_DRM_SII902X=m
-CONFIG_DRM_SII9234=m
-CONFIG_DRM_SIMPLE_BRIDGE=m
-CONFIG_DRM_THINE_THC63LVD1024=m
-# CONFIG_DRM_TOSHIBA_TC358762 is not set
-CONFIG_DRM_TOSHIBA_TC358764=m
-CONFIG_DRM_TOSHIBA_TC358767=m
-CONFIG_DRM_TOSHIBA_TC358768=m
-# CONFIG_DRM_TOSHIBA_TC358775 is not set
-CONFIG_DRM_TI_TFP410=m
-CONFIG_DRM_TI_SN65DSI86=m
-CONFIG_DRM_TI_TPD12S015=m
-CONFIG_DRM_ANALOGIX_ANX6345=m
-CONFIG_DRM_ANALOGIX_ANX78XX=m
-CONFIG_DRM_ANALOGIX_DP=m
-CONFIG_DRM_I2C_ADV7511=m
-CONFIG_DRM_I2C_ADV7511_AUDIO=y
-CONFIG_DRM_I2C_ADV7511_CEC=y
-# CONFIG_DRM_CDNS_MHDP8546 is not set
-CONFIG_DRM_DW_HDMI=m
-CONFIG_DRM_DW_HDMI_AHB_AUDIO=m
-CONFIG_DRM_DW_HDMI_I2S_AUDIO=m
-CONFIG_DRM_DW_HDMI_CEC=m
-# end of Display Interface Bridges
-
-# CONFIG_DRM_ETNAVIV is not set
-CONFIG_DRM_ARCPGU=m
-CONFIG_DRM_MXS=y
-CONFIG_DRM_MXSFB=m
-CONFIG_DRM_CIRRUS_QEMU=m
-CONFIG_DRM_GM12U320=m
-CONFIG_TINYDRM_HX8357D=m
-CONFIG_TINYDRM_ILI9225=m
-CONFIG_TINYDRM_ILI9341=m
-CONFIG_TINYDRM_ILI9486=m
-CONFIG_TINYDRM_MI0283QT=m
-CONFIG_TINYDRM_REPAPER=m
-CONFIG_TINYDRM_ST7586=m
-CONFIG_TINYDRM_ST7735R=m
-CONFIG_DRM_XEN=y
-CONFIG_DRM_XEN_FRONTEND=m
-CONFIG_DRM_VBOXVIDEO=m
-# CONFIG_DRM_LEGACY is not set
-CONFIG_DRM_PANEL_ORIENTATION_QUIRKS=y
-
-#
-# Frame buffer Devices
-#
-CONFIG_FB_CMDLINE=y
-CONFIG_FB_NOTIFY=y
-CONFIG_FB=y
-CONFIG_FIRMWARE_EDID=y
-CONFIG_FB_BOOT_VESA_SUPPORT=y
-CONFIG_FB_CFB_FILLRECT=y
-CONFIG_FB_CFB_COPYAREA=y
-CONFIG_FB_CFB_IMAGEBLIT=y
-CONFIG_FB_SYS_FILLRECT=m
-CONFIG_FB_SYS_COPYAREA=m
-CONFIG_FB_SYS_IMAGEBLIT=m
-# CONFIG_FB_FOREIGN_ENDIAN is not set
-CONFIG_FB_SYS_FOPS=m
-CONFIG_FB_DEFERRED_IO=y
-CONFIG_FB_BACKLIGHT=m
-CONFIG_FB_MODE_HELPERS=y
-CONFIG_FB_TILEBLITTING=y
-
-#
-# Frame buffer hardware drivers
-#
-# CONFIG_FB_CIRRUS is not set
-# CONFIG_FB_PM2 is not set
-# CONFIG_FB_CYBER2000 is not set
-# CONFIG_FB_ARC is not set
-# CONFIG_FB_ASILIANT is not set
-# CONFIG_FB_IMSTT is not set
-# CONFIG_FB_VGA16 is not set
-# CONFIG_FB_UVESA is not set
-CONFIG_FB_VESA=y
-CONFIG_FB_EFI=y
-# CONFIG_FB_N411 is not set
-# CONFIG_FB_HGA is not set
-# CONFIG_FB_OPENCORES is not set
-# CONFIG_FB_S1D13XXX is not set
-# CONFIG_FB_NVIDIA is not set
-# CONFIG_FB_RIVA is not set
-# CONFIG_FB_I740 is not set
-# CONFIG_FB_LE80578 is not set
-# CONFIG_FB_INTEL is not set
-# CONFIG_FB_MATROX is not set
-# CONFIG_FB_RADEON is not set
-# CONFIG_FB_ATY128 is not set
-# CONFIG_FB_ATY is not set
-# CONFIG_FB_S3 is not set
-# CONFIG_FB_SAVAGE is not set
-# CONFIG_FB_SIS is not set
-# CONFIG_FB_VIA is not set
-# CONFIG_FB_NEOMAGIC is not set
-# CONFIG_FB_KYRO is not set
-# CONFIG_FB_3DFX is not set
-# CONFIG_FB_VOODOO1 is not set
-# CONFIG_FB_VT8623 is not set
-# CONFIG_FB_TRIDENT is not set
-# CONFIG_FB_ARK is not set
-# CONFIG_FB_PM3 is not set
-# CONFIG_FB_CARMINE is not set
-# CONFIG_FB_SM501 is not set
-# CONFIG_FB_SMSCUFX is not set
-# CONFIG_FB_UDL is not set
-# CONFIG_FB_IBM_GXT4500 is not set
-# CONFIG_FB_VIRTUAL is not set
-CONFIG_XEN_FBDEV_FRONTEND=m
-# CONFIG_FB_METRONOME is not set
-# CONFIG_FB_MB862XX is not set
-CONFIG_FB_HYPERV=m
-CONFIG_FB_SIMPLE=y
-# CONFIG_FB_SSD1307 is not set
-# CONFIG_FB_SM712 is not set
-# end of Frame buffer Devices
-
-#
-# Backlight & LCD device support
-#
-CONFIG_LCD_CLASS_DEVICE=m
-CONFIG_LCD_L4F00242T03=m
-CONFIG_LCD_LMS283GF05=m
-CONFIG_LCD_LTV350QV=m
-CONFIG_LCD_ILI922X=m
-CONFIG_LCD_ILI9320=m
-CONFIG_LCD_TDO24M=m
-CONFIG_LCD_VGG2432A4=m
-CONFIG_LCD_PLATFORM=m
-CONFIG_LCD_AMS369FG06=m
-CONFIG_LCD_LMS501KF03=m
-CONFIG_LCD_HX8357=m
-CONFIG_LCD_OTM3225A=m
-CONFIG_BACKLIGHT_CLASS_DEVICE=y
-# CONFIG_BACKLIGHT_KTD253 is not set
-CONFIG_BACKLIGHT_LM3533=m
-CONFIG_BACKLIGHT_PWM=m
-CONFIG_BACKLIGHT_DA903X=m
-CONFIG_BACKLIGHT_DA9052=m
-CONFIG_BACKLIGHT_MAX8925=m
-CONFIG_BACKLIGHT_APPLE=m
-CONFIG_BACKLIGHT_QCOM_WLED=m
-CONFIG_BACKLIGHT_SAHARA=m
-CONFIG_BACKLIGHT_WM831X=m
-CONFIG_BACKLIGHT_ADP5520=m
-CONFIG_BACKLIGHT_ADP8860=m
-CONFIG_BACKLIGHT_ADP8870=m
-CONFIG_BACKLIGHT_88PM860X=m
-CONFIG_BACKLIGHT_PCF50633=m
-CONFIG_BACKLIGHT_AAT2870=m
-CONFIG_BACKLIGHT_LM3630A=m
-CONFIG_BACKLIGHT_LM3639=m
-CONFIG_BACKLIGHT_LP855X=m
-CONFIG_BACKLIGHT_LP8788=m
-CONFIG_BACKLIGHT_PANDORA=m
-CONFIG_BACKLIGHT_SKY81452=m
-CONFIG_BACKLIGHT_TPS65217=m
-CONFIG_BACKLIGHT_AS3711=m
-CONFIG_BACKLIGHT_GPIO=m
-CONFIG_BACKLIGHT_LV5207LP=m
-CONFIG_BACKLIGHT_BD6107=m
-CONFIG_BACKLIGHT_ARCXCNN=m
-CONFIG_BACKLIGHT_RAVE_SP=m
-CONFIG_BACKLIGHT_LED=m
-# end of Backlight & LCD device support
-
-CONFIG_VIDEOMODE_HELPERS=y
-CONFIG_HDMI=y
-
-#
-# Console display driver support
-#
-CONFIG_VGA_CONSOLE=y
-CONFIG_DUMMY_CONSOLE=y
-CONFIG_DUMMY_CONSOLE_COLUMNS=80
-CONFIG_DUMMY_CONSOLE_ROWS=25
-CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
-CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y
-CONFIG_FRAMEBUFFER_CONSOLE_DEFERRED_TAKEOVER=y
-# end of Console display driver support
-
-# CONFIG_LOGO is not set
-# end of Graphics support
-
-CONFIG_SOUND=m
-CONFIG_SOUND_OSS_CORE=y
-# CONFIG_SOUND_OSS_CORE_PRECLAIM is not set
-CONFIG_SND=m
-CONFIG_SND_TIMER=m
-CONFIG_SND_PCM=m
-CONFIG_SND_PCM_ELD=y
-CONFIG_SND_PCM_IEC958=y
-CONFIG_SND_DMAENGINE_PCM=m
-CONFIG_SND_HWDEP=m
-CONFIG_SND_SEQ_DEVICE=m
-CONFIG_SND_RAWMIDI=m
-CONFIG_SND_COMPRESS_OFFLOAD=m
-CONFIG_SND_JACK=y
-CONFIG_SND_JACK_INPUT_DEV=y
-CONFIG_SND_OSSEMUL=y
-CONFIG_SND_MIXER_OSS=m
-CONFIG_SND_PCM_OSS=m
-CONFIG_SND_PCM_OSS_PLUGINS=y
-CONFIG_SND_PCM_TIMER=y
-CONFIG_SND_HRTIMER=m
-CONFIG_SND_DYNAMIC_MINORS=y
-CONFIG_SND_MAX_CARDS=32
-# CONFIG_SND_SUPPORT_OLD_API is not set
-CONFIG_SND_PROC_FS=y
-CONFIG_SND_VERBOSE_PROCFS=y
-CONFIG_SND_VERBOSE_PRINTK=y
-CONFIG_SND_DEBUG=y
-# CONFIG_SND_DEBUG_VERBOSE is not set
-# CONFIG_SND_PCM_XRUN_DEBUG is not set
-# CONFIG_SND_CTL_VALIDATION is not set
-CONFIG_SND_VMASTER=y
-CONFIG_SND_DMA_SGBUF=y
-CONFIG_SND_SEQUENCER=m
-CONFIG_SND_SEQ_DUMMY=m
-CONFIG_SND_SEQUENCER_OSS=m
-CONFIG_SND_SEQ_HRTIMER_DEFAULT=y
-CONFIG_SND_SEQ_MIDI_EVENT=m
-CONFIG_SND_SEQ_MIDI=m
-CONFIG_SND_SEQ_MIDI_EMUL=m
-CONFIG_SND_SEQ_VIRMIDI=m
-CONFIG_SND_MPU401_UART=m
-CONFIG_SND_OPL3_LIB=m
-CONFIG_SND_OPL3_LIB_SEQ=m
-CONFIG_SND_VX_LIB=m
-CONFIG_SND_AC97_CODEC=m
-CONFIG_SND_DRIVERS=y
-# CONFIG_SND_PCSP is not set
-CONFIG_SND_DUMMY=m
-CONFIG_SND_ALOOP=m
-CONFIG_SND_VIRMIDI=m
-CONFIG_SND_MTPAV=m
-CONFIG_SND_MTS64=m
-CONFIG_SND_SERIAL_U16550=m
-CONFIG_SND_MPU401=m
-CONFIG_SND_PORTMAN2X4=m
-CONFIG_SND_AC97_POWER_SAVE=y
-CONFIG_SND_AC97_POWER_SAVE_DEFAULT=0
-CONFIG_SND_SB_COMMON=m
-CONFIG_SND_PCI=y
-CONFIG_SND_AD1889=m
-CONFIG_SND_ALS300=m
-CONFIG_SND_ALS4000=m
-CONFIG_SND_ALI5451=m
-CONFIG_SND_ASIHPI=m
-CONFIG_SND_ATIIXP=m
-CONFIG_SND_ATIIXP_MODEM=m
-CONFIG_SND_AU8810=m
-CONFIG_SND_AU8820=m
-CONFIG_SND_AU8830=m
-CONFIG_SND_AW2=m
-CONFIG_SND_AZT3328=m
-CONFIG_SND_BT87X=m
-# CONFIG_SND_BT87X_OVERCLOCK is not set
-CONFIG_SND_CA0106=m
-CONFIG_SND_CMIPCI=m
-CONFIG_SND_OXYGEN_LIB=m
-CONFIG_SND_OXYGEN=m
-CONFIG_SND_CS4281=m
-CONFIG_SND_CS46XX=m
-CONFIG_SND_CS46XX_NEW_DSP=y
-CONFIG_SND_CTXFI=m
-CONFIG_SND_DARLA20=m
-CONFIG_SND_GINA20=m
-CONFIG_SND_LAYLA20=m
-CONFIG_SND_DARLA24=m
-CONFIG_SND_GINA24=m
-CONFIG_SND_LAYLA24=m
-CONFIG_SND_MONA=m
-CONFIG_SND_MIA=m
-CONFIG_SND_ECHO3G=m
-CONFIG_SND_INDIGO=m
-CONFIG_SND_INDIGOIO=m
-CONFIG_SND_INDIGODJ=m
-CONFIG_SND_INDIGOIOX=m
-CONFIG_SND_INDIGODJX=m
-CONFIG_SND_EMU10K1=m
-CONFIG_SND_EMU10K1_SEQ=m
-CONFIG_SND_EMU10K1X=m
-CONFIG_SND_ENS1370=m
-CONFIG_SND_ENS1371=m
-CONFIG_SND_ES1938=m
-CONFIG_SND_ES1968=m
-CONFIG_SND_ES1968_INPUT=y
-CONFIG_SND_ES1968_RADIO=y
-CONFIG_SND_FM801=m
-CONFIG_SND_FM801_TEA575X_BOOL=y
-CONFIG_SND_HDSP=m
-CONFIG_SND_HDSPM=m
-CONFIG_SND_ICE1712=m
-CONFIG_SND_ICE1724=m
-CONFIG_SND_INTEL8X0=m
-CONFIG_SND_INTEL8X0M=m
-CONFIG_SND_KORG1212=m
-CONFIG_SND_LOLA=m
-CONFIG_SND_LX6464ES=m
-CONFIG_SND_MAESTRO3=m
-CONFIG_SND_MAESTRO3_INPUT=y
-CONFIG_SND_MIXART=m
-CONFIG_SND_NM256=m
-CONFIG_SND_PCXHR=m
-CONFIG_SND_RIPTIDE=m
-CONFIG_SND_RME32=m
-CONFIG_SND_RME96=m
-CONFIG_SND_RME9652=m
-CONFIG_SND_SONICVIBES=m
-CONFIG_SND_TRIDENT=m
-CONFIG_SND_VIA82XX=m
-CONFIG_SND_VIA82XX_MODEM=m
-CONFIG_SND_VIRTUOSO=m
-CONFIG_SND_VX222=m
-CONFIG_SND_YMFPCI=m
-
-#
-# HD-Audio
-#
-CONFIG_SND_HDA=m
-CONFIG_SND_HDA_GENERIC_LEDS=y
-CONFIG_SND_HDA_INTEL=m
-CONFIG_SND_HDA_HWDEP=y
-CONFIG_SND_HDA_RECONFIG=y
-CONFIG_SND_HDA_INPUT_BEEP=y
-CONFIG_SND_HDA_INPUT_BEEP_MODE=1
-CONFIG_SND_HDA_PATCH_LOADER=y
-CONFIG_SND_HDA_CODEC_REALTEK=m
-CONFIG_SND_HDA_CODEC_ANALOG=m
-CONFIG_SND_HDA_CODEC_SIGMATEL=m
-CONFIG_SND_HDA_CODEC_VIA=m
-CONFIG_SND_HDA_CODEC_HDMI=m
-CONFIG_SND_HDA_CODEC_CIRRUS=m
-CONFIG_SND_HDA_CODEC_CONEXANT=m
-CONFIG_SND_HDA_CODEC_CA0110=m
-CONFIG_SND_HDA_CODEC_CA0132=m
-CONFIG_SND_HDA_CODEC_CA0132_DSP=y
-CONFIG_SND_HDA_CODEC_CMEDIA=m
-CONFIG_SND_HDA_CODEC_SI3054=m
-CONFIG_SND_HDA_GENERIC=m
-CONFIG_SND_HDA_POWER_SAVE_DEFAULT=0
-# CONFIG_SND_HDA_INTEL_HDMI_SILENT_STREAM is not set
-# end of HD-Audio
-
-CONFIG_SND_HDA_CORE=m
-CONFIG_SND_HDA_DSP_LOADER=y
-CONFIG_SND_HDA_COMPONENT=y
-CONFIG_SND_HDA_I915=y
-CONFIG_SND_HDA_EXT_CORE=m
-CONFIG_SND_HDA_PREALLOC_SIZE=0
-CONFIG_SND_INTEL_NHLT=y
-CONFIG_SND_INTEL_DSP_CONFIG=m
-CONFIG_SND_SPI=y
-CONFIG_SND_USB=y
-CONFIG_SND_USB_AUDIO=m
-CONFIG_SND_USB_AUDIO_USE_MEDIA_CONTROLLER=y
-CONFIG_SND_USB_UA101=m
-CONFIG_SND_USB_USX2Y=m
-CONFIG_SND_USB_CAIAQ=m
-CONFIG_SND_USB_CAIAQ_INPUT=y
-CONFIG_SND_USB_US122L=m
-CONFIG_SND_USB_6FIRE=m
-CONFIG_SND_USB_HIFACE=m
-CONFIG_SND_BCD2000=m
-CONFIG_SND_USB_LINE6=m
-CONFIG_SND_USB_POD=m
-CONFIG_SND_USB_PODHD=m
-CONFIG_SND_USB_TONEPORT=m
-CONFIG_SND_USB_VARIAX=m
-CONFIG_SND_FIREWIRE=y
-CONFIG_SND_FIREWIRE_LIB=m
-CONFIG_SND_DICE=m
-CONFIG_SND_OXFW=m
-CONFIG_SND_ISIGHT=m
-CONFIG_SND_FIREWORKS=m
-CONFIG_SND_BEBOB=m
-CONFIG_SND_FIREWIRE_DIGI00X=m
-CONFIG_SND_FIREWIRE_TASCAM=m
-CONFIG_SND_FIREWIRE_MOTU=m
-CONFIG_SND_FIREFACE=m
-CONFIG_SND_PCMCIA=y
-CONFIG_SND_VXPOCKET=m
-CONFIG_SND_PDAUDIOCF=m
-CONFIG_SND_SOC=m
-CONFIG_SND_SOC_AC97_BUS=y
-CONFIG_SND_SOC_GENERIC_DMAENGINE_PCM=y
-CONFIG_SND_SOC_COMPRESS=y
-CONFIG_SND_SOC_TOPOLOGY=y
-CONFIG_SND_SOC_ACPI=m
-CONFIG_SND_SOC_AMD_ACP=m
-CONFIG_SND_SOC_AMD_CZ_DA7219MX98357_MACH=m
-CONFIG_SND_SOC_AMD_CZ_RT5645_MACH=m
-CONFIG_SND_SOC_AMD_ACP3x=m
-CONFIG_SND_SOC_AMD_RV_RT5682_MACH=m
-CONFIG_SND_SOC_AMD_RENOIR=m
-CONFIG_SND_SOC_AMD_RENOIR_MACH=m
-CONFIG_SND_ATMEL_SOC=m
-CONFIG_SND_SOC_MIKROE_PROTO=m
-CONFIG_SND_BCM63XX_I2S_WHISTLER=m
-CONFIG_SND_DESIGNWARE_I2S=m
-CONFIG_SND_DESIGNWARE_PCM=y
-
-#
-# SoC Audio for Freescale CPUs
-#
-
-#
-# Common SoC Audio options for Freescale CPUs:
-#
-# CONFIG_SND_SOC_FSL_ASRC is not set
-# CONFIG_SND_SOC_FSL_SAI is not set
-# CONFIG_SND_SOC_FSL_AUDMIX is not set
-# CONFIG_SND_SOC_FSL_SSI is not set
-# CONFIG_SND_SOC_FSL_SPDIF is not set
-# CONFIG_SND_SOC_FSL_ESAI is not set
-# CONFIG_SND_SOC_FSL_MICFIL is not set
-# CONFIG_SND_SOC_IMX_AUDMUX is not set
-# end of SoC Audio for Freescale CPUs
-
-CONFIG_SND_I2S_HI6210_I2S=m
-CONFIG_SND_SOC_IMG=y
-CONFIG_SND_SOC_IMG_I2S_IN=m
-CONFIG_SND_SOC_IMG_I2S_OUT=m
-CONFIG_SND_SOC_IMG_PARALLEL_OUT=m
-CONFIG_SND_SOC_IMG_SPDIF_IN=m
-CONFIG_SND_SOC_IMG_SPDIF_OUT=m
-CONFIG_SND_SOC_IMG_PISTACHIO_INTERNAL_DAC=m
-CONFIG_SND_SOC_INTEL_SST_TOPLEVEL=y
-CONFIG_SND_SST_IPC=m
-CONFIG_SND_SST_IPC_PCI=m
-CONFIG_SND_SST_IPC_ACPI=m
-CONFIG_SND_SOC_INTEL_SST=m
-CONFIG_SND_SOC_INTEL_CATPT=m
-CONFIG_SND_SST_ATOM_HIFI2_PLATFORM=m
-CONFIG_SND_SST_ATOM_HIFI2_PLATFORM_PCI=m
-CONFIG_SND_SST_ATOM_HIFI2_PLATFORM_ACPI=m
-CONFIG_SND_SOC_INTEL_SKYLAKE=m
-CONFIG_SND_SOC_INTEL_SKL=m
-CONFIG_SND_SOC_INTEL_APL=m
-CONFIG_SND_SOC_INTEL_KBL=m
-CONFIG_SND_SOC_INTEL_GLK=m
-CONFIG_SND_SOC_INTEL_CNL=m
-CONFIG_SND_SOC_INTEL_CFL=m
-CONFIG_SND_SOC_INTEL_CML_H=m
-CONFIG_SND_SOC_INTEL_CML_LP=m
-CONFIG_SND_SOC_INTEL_SKYLAKE_FAMILY=m
-CONFIG_SND_SOC_INTEL_SKYLAKE_SSP_CLK=m
-# CONFIG_SND_SOC_INTEL_SKYLAKE_HDAUDIO_CODEC is not set
-CONFIG_SND_SOC_INTEL_SKYLAKE_COMMON=m
-CONFIG_SND_SOC_ACPI_INTEL_MATCH=m
-CONFIG_SND_SOC_INTEL_MACH=y
-# CONFIG_SND_SOC_INTEL_USER_FRIENDLY_LONG_NAMES is not set
-CONFIG_SND_SOC_INTEL_HASWELL_MACH=m
-CONFIG_SND_SOC_INTEL_BDW_RT5650_MACH=m
-CONFIG_SND_SOC_INTEL_BDW_RT5677_MACH=m
-CONFIG_SND_SOC_INTEL_BROADWELL_MACH=m
-CONFIG_SND_SOC_INTEL_BYTCR_RT5640_MACH=m
-CONFIG_SND_SOC_INTEL_BYTCR_RT5651_MACH=m
-CONFIG_SND_SOC_INTEL_CHT_BSW_RT5672_MACH=m
-CONFIG_SND_SOC_INTEL_CHT_BSW_RT5645_MACH=m
-CONFIG_SND_SOC_INTEL_CHT_BSW_MAX98090_TI_MACH=m
-CONFIG_SND_SOC_INTEL_CHT_BSW_NAU8824_MACH=m
-CONFIG_SND_SOC_INTEL_BYT_CHT_CX2072X_MACH=m
-CONFIG_SND_SOC_INTEL_BYT_CHT_DA7213_MACH=m
-CONFIG_SND_SOC_INTEL_BYT_CHT_ES8316_MACH=m
-# CONFIG_SND_SOC_INTEL_BYT_CHT_NOCODEC_MACH is not set
-CONFIG_SND_SOC_INTEL_SKL_RT286_MACH=m
-CONFIG_SND_SOC_INTEL_SKL_NAU88L25_SSM4567_MACH=m
-CONFIG_SND_SOC_INTEL_SKL_NAU88L25_MAX98357A_MACH=m
-CONFIG_SND_SOC_INTEL_DA7219_MAX98357A_GENERIC=m
-CONFIG_SND_SOC_INTEL_BXT_DA7219_MAX98357A_COMMON=m
-CONFIG_SND_SOC_INTEL_BXT_DA7219_MAX98357A_MACH=m
-CONFIG_SND_SOC_INTEL_BXT_RT298_MACH=m
-CONFIG_SND_SOC_INTEL_SOF_WM8804_MACH=m
-CONFIG_SND_SOC_INTEL_KBL_RT5663_MAX98927_MACH=m
-CONFIG_SND_SOC_INTEL_KBL_RT5663_RT5514_MAX98927_MACH=m
-CONFIG_SND_SOC_INTEL_KBL_DA7219_MAX98357A_MACH=m
-CONFIG_SND_SOC_INTEL_KBL_DA7219_MAX98927_MACH=m
-CONFIG_SND_SOC_INTEL_KBL_RT5660_MACH=m
-CONFIG_SND_SOC_INTEL_GLK_DA7219_MAX98357A_MACH=m
-CONFIG_SND_SOC_INTEL_GLK_RT5682_MAX98357A_MACH=m
-CONFIG_SND_SOC_INTEL_SKL_HDA_DSP_GENERIC_MACH=m
-CONFIG_SND_SOC_INTEL_SOF_RT5682_MACH=m
-CONFIG_SND_SOC_INTEL_SOF_PCM512x_MACH=m
-CONFIG_SND_SOC_INTEL_CML_LP_DA7219_MAX98357A_MACH=m
-CONFIG_SND_SOC_INTEL_SOF_CML_RT1011_RT5682_MACH=m
-CONFIG_SND_SOC_INTEL_SOF_DA7219_MAX98373_MACH=m
-CONFIG_SND_SOC_INTEL_EHL_RT5660_MACH=m
-CONFIG_SND_SOC_MTK_BTCVSD=m
-CONFIG_SND_SOC_SOF_TOPLEVEL=y
-CONFIG_SND_SOC_SOF_PCI=m
-CONFIG_SND_SOC_SOF_ACPI=m
-CONFIG_SND_SOC_SOF_OF=m
-# CONFIG_SND_SOC_SOF_DEBUG_PROBES is not set
-# CONFIG_SND_SOC_SOF_DEVELOPER_SUPPORT is not set
-CONFIG_SND_SOC_SOF=m
-CONFIG_SND_SOC_SOF_PROBE_WORK_QUEUE=y
-CONFIG_SND_SOC_SOF_INTEL_TOPLEVEL=y
-CONFIG_SND_SOC_SOF_INTEL_ACPI=m
-CONFIG_SND_SOC_SOF_INTEL_PCI=m
-CONFIG_SND_SOC_SOF_INTEL_HIFI_EP_IPC=m
-CONFIG_SND_SOC_SOF_INTEL_ATOM_HIFI_EP=m
-CONFIG_SND_SOC_SOF_INTEL_COMMON=m
-CONFIG_SND_SOC_SOF_BROADWELL_SUPPORT=y
-CONFIG_SND_SOC_SOF_BROADWELL=m
-CONFIG_SND_SOC_SOF_MERRIFIELD_SUPPORT=y
-CONFIG_SND_SOC_SOF_MERRIFIELD=m
-CONFIG_SND_SOC_SOF_APOLLOLAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_APOLLOLAKE=m
-CONFIG_SND_SOC_SOF_GEMINILAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_GEMINILAKE=m
-CONFIG_SND_SOC_SOF_CANNONLAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_CANNONLAKE=m
-CONFIG_SND_SOC_SOF_COFFEELAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_COFFEELAKE=m
-CONFIG_SND_SOC_SOF_ICELAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_ICELAKE=m
-CONFIG_SND_SOC_SOF_COMETLAKE=m
-CONFIG_SND_SOC_SOF_COMETLAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_COMETLAKE_LP_SUPPORT=y
-CONFIG_SND_SOC_SOF_TIGERLAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_TIGERLAKE=m
-CONFIG_SND_SOC_SOF_ELKHARTLAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_ELKHARTLAKE=m
-CONFIG_SND_SOC_SOF_JASPERLAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_JASPERLAKE=m
-CONFIG_SND_SOC_SOF_HDA_COMMON=m
-CONFIG_SND_SOC_SOF_HDA_LINK=y
-CONFIG_SND_SOC_SOF_HDA_AUDIO_CODEC=y
-# CONFIG_SND_SOC_SOF_HDA_ALWAYS_ENABLE_DMI_L1 is not set
-CONFIG_SND_SOC_SOF_HDA_LINK_BASELINE=m
-CONFIG_SND_SOC_SOF_HDA=m
-CONFIG_SND_SOC_SOF_INTEL_SOUNDWIRE_LINK=y
-CONFIG_SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE=m
-CONFIG_SND_SOC_SOF_INTEL_SOUNDWIRE=m
-CONFIG_SND_SOC_SOF_XTENSA=m
-
-#
-# STMicroelectronics STM32 SOC audio support
-#
-# end of STMicroelectronics STM32 SOC audio support
-
-CONFIG_SND_SOC_XILINX_I2S=m
-CONFIG_SND_SOC_XILINX_AUDIO_FORMATTER=m
-CONFIG_SND_SOC_XILINX_SPDIF=m
-CONFIG_SND_SOC_XTFPGA_I2S=m
-CONFIG_ZX_TDM=m
-CONFIG_SND_SOC_I2C_AND_SPI=m
-
-#
-# CODEC drivers
-#
-CONFIG_SND_SOC_AC97_CODEC=m
-CONFIG_SND_SOC_ADAU_UTILS=m
-CONFIG_SND_SOC_ADAU1701=m
-CONFIG_SND_SOC_ADAU17X1=m
-CONFIG_SND_SOC_ADAU1761=m
-CONFIG_SND_SOC_ADAU1761_I2C=m
-CONFIG_SND_SOC_ADAU1761_SPI=m
-CONFIG_SND_SOC_ADAU7002=m
-CONFIG_SND_SOC_ADAU7118=m
-CONFIG_SND_SOC_ADAU7118_HW=m
-CONFIG_SND_SOC_ADAU7118_I2C=m
-CONFIG_SND_SOC_AK4104=m
-CONFIG_SND_SOC_AK4118=m
-CONFIG_SND_SOC_AK4458=m
-CONFIG_SND_SOC_AK4554=m
-CONFIG_SND_SOC_AK4613=m
-CONFIG_SND_SOC_AK4642=m
-CONFIG_SND_SOC_AK5386=m
-CONFIG_SND_SOC_AK5558=m
-CONFIG_SND_SOC_ALC5623=m
-CONFIG_SND_SOC_BD28623=m
-# CONFIG_SND_SOC_BT_SCO is not set
-CONFIG_SND_SOC_CPCAP=m
-CONFIG_SND_SOC_CROS_EC_CODEC=m
-CONFIG_SND_SOC_CS35L32=m
-CONFIG_SND_SOC_CS35L33=m
-CONFIG_SND_SOC_CS35L34=m
-CONFIG_SND_SOC_CS35L35=m
-CONFIG_SND_SOC_CS35L36=m
-CONFIG_SND_SOC_CS42L42=m
-CONFIG_SND_SOC_CS42L51=m
-CONFIG_SND_SOC_CS42L51_I2C=m
-CONFIG_SND_SOC_CS42L52=m
-CONFIG_SND_SOC_CS42L56=m
-CONFIG_SND_SOC_CS42L73=m
-# CONFIG_SND_SOC_CS4234 is not set
-CONFIG_SND_SOC_CS4265=m
-CONFIG_SND_SOC_CS4270=m
-CONFIG_SND_SOC_CS4271=m
-CONFIG_SND_SOC_CS4271_I2C=m
-CONFIG_SND_SOC_CS4271_SPI=m
-CONFIG_SND_SOC_CS42XX8=m
-CONFIG_SND_SOC_CS42XX8_I2C=m
-CONFIG_SND_SOC_CS43130=m
-CONFIG_SND_SOC_CS4341=m
-CONFIG_SND_SOC_CS4349=m
-CONFIG_SND_SOC_CS53L30=m
-CONFIG_SND_SOC_CX2072X=m
-CONFIG_SND_SOC_DA7213=m
-CONFIG_SND_SOC_DA7219=m
-CONFIG_SND_SOC_DMIC=m
-CONFIG_SND_SOC_HDMI_CODEC=m
-CONFIG_SND_SOC_ES7134=m
-CONFIG_SND_SOC_ES7241=m
-CONFIG_SND_SOC_ES8316=m
-CONFIG_SND_SOC_ES8328=m
-CONFIG_SND_SOC_ES8328_I2C=m
-CONFIG_SND_SOC_ES8328_SPI=m
-CONFIG_SND_SOC_GTM601=m
-CONFIG_SND_SOC_HDAC_HDMI=m
-CONFIG_SND_SOC_HDAC_HDA=m
-CONFIG_SND_SOC_INNO_RK3036=m
-CONFIG_SND_SOC_LOCHNAGAR_SC=m
-CONFIG_SND_SOC_MAX98088=m
-CONFIG_SND_SOC_MAX98090=m
-CONFIG_SND_SOC_MAX98357A=m
-CONFIG_SND_SOC_MAX98504=m
-CONFIG_SND_SOC_MAX9867=m
-CONFIG_SND_SOC_MAX98927=m
-CONFIG_SND_SOC_MAX98373=m
-CONFIG_SND_SOC_MAX98373_I2C=m
-# CONFIG_SND_SOC_MAX98373_SDW is not set
-CONFIG_SND_SOC_MAX98390=m
-CONFIG_SND_SOC_MAX9860=m
-CONFIG_SND_SOC_MSM8916_WCD_ANALOG=m
-CONFIG_SND_SOC_MSM8916_WCD_DIGITAL=m
-CONFIG_SND_SOC_PCM1681=m
-CONFIG_SND_SOC_PCM1789=m
-CONFIG_SND_SOC_PCM1789_I2C=m
-CONFIG_SND_SOC_PCM179X=m
-CONFIG_SND_SOC_PCM179X_I2C=m
-CONFIG_SND_SOC_PCM179X_SPI=m
-CONFIG_SND_SOC_PCM186X=m
-CONFIG_SND_SOC_PCM186X_I2C=m
-CONFIG_SND_SOC_PCM186X_SPI=m
-CONFIG_SND_SOC_PCM3060=m
-CONFIG_SND_SOC_PCM3060_I2C=m
-CONFIG_SND_SOC_PCM3060_SPI=m
-CONFIG_SND_SOC_PCM3168A=m
-CONFIG_SND_SOC_PCM3168A_I2C=m
-CONFIG_SND_SOC_PCM3168A_SPI=m
-CONFIG_SND_SOC_PCM512x=m
-CONFIG_SND_SOC_PCM512x_I2C=m
-CONFIG_SND_SOC_PCM512x_SPI=m
-CONFIG_SND_SOC_RK3328=m
-CONFIG_SND_SOC_RL6231=m
-CONFIG_SND_SOC_RL6347A=m
-CONFIG_SND_SOC_RT286=m
-CONFIG_SND_SOC_RT298=m
-CONFIG_SND_SOC_RT1011=m
-CONFIG_SND_SOC_RT1015=m
-CONFIG_SND_SOC_RT1308_SDW=m
-CONFIG_SND_SOC_RT5514=m
-CONFIG_SND_SOC_RT5514_SPI=m
-CONFIG_SND_SOC_RT5616=m
-CONFIG_SND_SOC_RT5631=m
-CONFIG_SND_SOC_RT5640=m
-CONFIG_SND_SOC_RT5645=m
-CONFIG_SND_SOC_RT5651=m
-CONFIG_SND_SOC_RT5660=m
-CONFIG_SND_SOC_RT5663=m
-CONFIG_SND_SOC_RT5670=m
-CONFIG_SND_SOC_RT5677=m
-CONFIG_SND_SOC_RT5677_SPI=m
-CONFIG_SND_SOC_RT5682=m
-CONFIG_SND_SOC_RT5682_I2C=m
-CONFIG_SND_SOC_RT5682_SDW=m
-CONFIG_SND_SOC_RT700=m
-CONFIG_SND_SOC_RT700_SDW=m
-CONFIG_SND_SOC_RT711=m
-CONFIG_SND_SOC_RT711_SDW=m
-CONFIG_SND_SOC_RT715=m
-CONFIG_SND_SOC_RT715_SDW=m
-CONFIG_SND_SOC_SGTL5000=m
-CONFIG_SND_SOC_SI476X=m
-CONFIG_SND_SOC_SIGMADSP=m
-CONFIG_SND_SOC_SIGMADSP_I2C=m
-CONFIG_SND_SOC_SIGMADSP_REGMAP=m
-CONFIG_SND_SOC_SIMPLE_AMPLIFIER=m
-CONFIG_SND_SOC_SIRF_AUDIO_CODEC=m
-CONFIG_SND_SOC_SPDIF=m
-CONFIG_SND_SOC_SSM2305=m
-CONFIG_SND_SOC_SSM2602=m
-CONFIG_SND_SOC_SSM2602_SPI=m
-CONFIG_SND_SOC_SSM2602_I2C=m
-CONFIG_SND_SOC_SSM4567=m
-CONFIG_SND_SOC_STA32X=m
-CONFIG_SND_SOC_STA350=m
-CONFIG_SND_SOC_STI_SAS=m
-CONFIG_SND_SOC_TAS2552=m
-CONFIG_SND_SOC_TAS2562=m
-# CONFIG_SND_SOC_TAS2764 is not set
-CONFIG_SND_SOC_TAS2770=m
-CONFIG_SND_SOC_TAS5086=m
-CONFIG_SND_SOC_TAS571X=m
-CONFIG_SND_SOC_TAS5720=m
-CONFIG_SND_SOC_TAS6424=m
-CONFIG_SND_SOC_TDA7419=m
-CONFIG_SND_SOC_TFA9879=m
-CONFIG_SND_SOC_TLV320AIC23=m
-CONFIG_SND_SOC_TLV320AIC23_I2C=m
-CONFIG_SND_SOC_TLV320AIC23_SPI=m
-CONFIG_SND_SOC_TLV320AIC31XX=m
-CONFIG_SND_SOC_TLV320AIC32X4=m
-CONFIG_SND_SOC_TLV320AIC32X4_I2C=m
-CONFIG_SND_SOC_TLV320AIC32X4_SPI=m
-CONFIG_SND_SOC_TLV320AIC3X=m
-CONFIG_SND_SOC_TLV320ADCX140=m
-CONFIG_SND_SOC_TS3A227E=m
-CONFIG_SND_SOC_TSCS42XX=m
-CONFIG_SND_SOC_TSCS454=m
-CONFIG_SND_SOC_UDA1334=m
-CONFIG_SND_SOC_WCD9335=m
-CONFIG_SND_SOC_WCD934X=m
-CONFIG_SND_SOC_WM8510=m
-CONFIG_SND_SOC_WM8523=m
-CONFIG_SND_SOC_WM8524=m
-CONFIG_SND_SOC_WM8580=m
-CONFIG_SND_SOC_WM8711=m
-CONFIG_SND_SOC_WM8728=m
-CONFIG_SND_SOC_WM8731=m
-CONFIG_SND_SOC_WM8737=m
-CONFIG_SND_SOC_WM8741=m
-CONFIG_SND_SOC_WM8750=m
-CONFIG_SND_SOC_WM8753=m
-CONFIG_SND_SOC_WM8770=m
-CONFIG_SND_SOC_WM8776=m
-CONFIG_SND_SOC_WM8782=m
-CONFIG_SND_SOC_WM8804=m
-CONFIG_SND_SOC_WM8804_I2C=m
-CONFIG_SND_SOC_WM8804_SPI=m
-CONFIG_SND_SOC_WM8903=m
-CONFIG_SND_SOC_WM8904=m
-CONFIG_SND_SOC_WM8960=m
-CONFIG_SND_SOC_WM8962=m
-CONFIG_SND_SOC_WM8974=m
-CONFIG_SND_SOC_WM8978=m
-CONFIG_SND_SOC_WM8985=m
-CONFIG_SND_SOC_WSA881X=m
-CONFIG_SND_SOC_ZL38060=m
-CONFIG_SND_SOC_ZX_AUD96P22=m
-CONFIG_SND_SOC_MAX9759=m
-CONFIG_SND_SOC_MT6351=m
-CONFIG_SND_SOC_MT6358=m
-CONFIG_SND_SOC_MT6660=m
-CONFIG_SND_SOC_NAU8540=m
-CONFIG_SND_SOC_NAU8810=m
-CONFIG_SND_SOC_NAU8822=m
-CONFIG_SND_SOC_NAU8824=m
-CONFIG_SND_SOC_NAU8825=m
-CONFIG_SND_SOC_TPA6130A2=m
-# end of CODEC drivers
-
-CONFIG_SND_SIMPLE_CARD_UTILS=m
-CONFIG_SND_SIMPLE_CARD=m
-CONFIG_SND_AUDIO_GRAPH_CARD=m
-CONFIG_SND_X86=y
-CONFIG_HDMI_LPE_AUDIO=m
-CONFIG_SND_SYNTH_EMUX=m
-CONFIG_SND_XEN_FRONTEND=m
-CONFIG_AC97_BUS=m
-
-#
-# HID support
-#
-CONFIG_HID=m
-CONFIG_HID_BATTERY_STRENGTH=y
-CONFIG_HIDRAW=y
-CONFIG_UHID=m
-CONFIG_HID_GENERIC=m
-
-#
-# Special HID drivers
-#
-CONFIG_HID_A4TECH=m
-CONFIG_HID_ACCUTOUCH=m
-CONFIG_HID_ACRUX=m
-CONFIG_HID_ACRUX_FF=y
-CONFIG_HID_APPLE=m
-CONFIG_HID_APPLEIR=m
-CONFIG_HID_ASUS=m
-CONFIG_HID_AUREAL=m
-CONFIG_HID_BELKIN=m
-CONFIG_HID_BETOP_FF=m
-CONFIG_HID_BIGBEN_FF=m
-CONFIG_HID_CHERRY=m
-CONFIG_HID_CHICONY=m
-CONFIG_HID_CORSAIR=m
-CONFIG_HID_COUGAR=m
-CONFIG_HID_MACALLY=m
-CONFIG_HID_PRODIKEYS=m
-CONFIG_HID_CMEDIA=m
-CONFIG_HID_CP2112=m
-CONFIG_HID_CREATIVE_SB0540=m
-CONFIG_HID_CYPRESS=m
-CONFIG_HID_DRAGONRISE=m
-CONFIG_DRAGONRISE_FF=y
-CONFIG_HID_EMS_FF=m
-CONFIG_HID_ELAN=m
-CONFIG_HID_ELECOM=m
-CONFIG_HID_ELO=m
-CONFIG_HID_EZKEY=m
-CONFIG_HID_GEMBIRD=m
-CONFIG_HID_GFRM=m
-CONFIG_HID_GLORIOUS=m
-CONFIG_HID_HOLTEK=m
-CONFIG_HOLTEK_FF=y
-CONFIG_HID_GOOGLE_HAMMER=m
-# CONFIG_HID_VIVALDI is not set
-CONFIG_HID_GT683R=m
-CONFIG_HID_KEYTOUCH=m
-CONFIG_HID_KYE=m
-CONFIG_HID_UCLOGIC=m
-CONFIG_HID_WALTOP=m
-CONFIG_HID_VIEWSONIC=m
-CONFIG_HID_GYRATION=m
-CONFIG_HID_ICADE=m
-CONFIG_HID_ITE=m
-CONFIG_HID_JABRA=m
-CONFIG_HID_TWINHAN=m
-CONFIG_HID_KENSINGTON=m
-CONFIG_HID_LCPOWER=m
-CONFIG_HID_LED=m
-CONFIG_HID_LENOVO=m
-CONFIG_HID_LOGITECH=m
-CONFIG_HID_LOGITECH_DJ=m
-CONFIG_HID_LOGITECH_HIDPP=m
-CONFIG_LOGITECH_FF=y
-CONFIG_LOGIRUMBLEPAD2_FF=y
-CONFIG_LOGIG940_FF=y
-CONFIG_LOGIWHEELS_FF=y
-CONFIG_HID_MAGICMOUSE=m
-CONFIG_HID_MALTRON=m
-CONFIG_HID_MAYFLASH=m
-CONFIG_HID_REDRAGON=m
-CONFIG_HID_MICROSOFT=m
-CONFIG_HID_MONTEREY=m
-CONFIG_HID_MULTITOUCH=m
-CONFIG_HID_NTI=m
-CONFIG_HID_NTRIG=m
-CONFIG_HID_ORTEK=m
-CONFIG_HID_PANTHERLORD=m
-CONFIG_PANTHERLORD_FF=y
-CONFIG_HID_PENMOUNT=m
-CONFIG_HID_PETALYNX=m
-CONFIG_HID_PICOLCD=m
-CONFIG_HID_PICOLCD_FB=y
-CONFIG_HID_PICOLCD_BACKLIGHT=y
-CONFIG_HID_PICOLCD_LCD=y
-CONFIG_HID_PICOLCD_LEDS=y
-CONFIG_HID_PICOLCD_CIR=y
-CONFIG_HID_PLANTRONICS=m
-CONFIG_HID_PRIMAX=m
-CONFIG_HID_RETRODE=m
-CONFIG_HID_ROCCAT=m
-CONFIG_HID_SAITEK=m
-CONFIG_HID_SAMSUNG=m
-CONFIG_HID_SONY=m
-CONFIG_SONY_FF=y
-CONFIG_HID_SPEEDLINK=m
-CONFIG_HID_STEAM=m
-CONFIG_HID_STEELSERIES=m
-CONFIG_HID_SUNPLUS=m
-CONFIG_HID_RMI=m
-CONFIG_HID_GREENASIA=m
-CONFIG_GREENASIA_FF=y
-CONFIG_HID_HYPERV_MOUSE=m
-CONFIG_HID_SMARTJOYPLUS=m
-CONFIG_SMARTJOYPLUS_FF=y
-CONFIG_HID_TIVO=m
-CONFIG_HID_TOPSEED=m
-CONFIG_HID_THINGM=m
-CONFIG_HID_THRUSTMASTER=m
-CONFIG_THRUSTMASTER_FF=y
-CONFIG_HID_UDRAW_PS3=m
-CONFIG_HID_U2FZERO=m
-CONFIG_HID_WACOM=m
-CONFIG_HID_WIIMOTE=m
-CONFIG_HID_XINMO=m
-CONFIG_HID_ZEROPLUS=m
-CONFIG_ZEROPLUS_FF=y
-CONFIG_HID_ZYDACRON=m
-CONFIG_HID_SENSOR_HUB=m
-# CONFIG_HID_SENSOR_CUSTOM_SENSOR is not set
-CONFIG_HID_ALPS=m
-CONFIG_HID_MCP2221=m
-# end of Special HID drivers
-
-#
-# USB HID support
-#
-CONFIG_USB_HID=m
-CONFIG_HID_PID=y
-CONFIG_USB_HIDDEV=y
-
-#
-# USB HID Boot Protocol drivers
-#
-# CONFIG_USB_KBD is not set
-# CONFIG_USB_MOUSE is not set
-# end of USB HID Boot Protocol drivers
-# end of USB HID support
-
-#
-# I2C HID support
-#
-CONFIG_I2C_HID=m
-# end of I2C HID support
-
-#
-# Intel ISH HID support
-#
-CONFIG_INTEL_ISH_HID=m
-CONFIG_INTEL_ISH_FIRMWARE_DOWNLOADER=m
-# end of Intel ISH HID support
-# end of HID support
-
-CONFIG_USB_OHCI_LITTLE_ENDIAN=y
-CONFIG_USB_SUPPORT=y
-CONFIG_USB_COMMON=y
-CONFIG_USB_LED_TRIG=y
-CONFIG_USB_ULPI_BUS=m
-CONFIG_USB_CONN_GPIO=m
-CONFIG_USB_ARCH_HAS_HCD=y
-CONFIG_USB=y
-CONFIG_USB_PCI=y
-CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
-
-#
-# Miscellaneous USB options
-#
-CONFIG_USB_DEFAULT_PERSIST=y
-# CONFIG_USB_FEW_INIT_RETRIES is not set
-CONFIG_USB_DYNAMIC_MINORS=y
-# CONFIG_USB_OTG is not set
-# CONFIG_USB_OTG_PRODUCTLIST is not set
-# CONFIG_USB_OTG_DISABLE_EXTERNAL_HUB is not set
-CONFIG_USB_LEDS_TRIGGER_USBPORT=m
-CONFIG_USB_AUTOSUSPEND_DELAY=2
-CONFIG_USB_MON=m
-
-#
-# USB Host Controller Drivers
-#
-CONFIG_USB_C67X00_HCD=m
-CONFIG_USB_XHCI_HCD=m
-# CONFIG_USB_XHCI_DBGCAP is not set
-CONFIG_USB_XHCI_PCI=m
-CONFIG_USB_XHCI_PCI_RENESAS=m
-CONFIG_USB_XHCI_PLATFORM=m
-CONFIG_USB_EHCI_HCD=m
-CONFIG_USB_EHCI_ROOT_HUB_TT=y
-CONFIG_USB_EHCI_TT_NEWSCHED=y
-CONFIG_USB_EHCI_PCI=m
-CONFIG_USB_EHCI_FSL=m
-CONFIG_USB_EHCI_HCD_PLATFORM=m
-CONFIG_USB_OXU210HP_HCD=m
-CONFIG_USB_ISP116X_HCD=m
-CONFIG_USB_FOTG210_HCD=m
-CONFIG_USB_MAX3421_HCD=m
-CONFIG_USB_OHCI_HCD=m
-CONFIG_USB_OHCI_HCD_PCI=m
-# CONFIG_USB_OHCI_HCD_SSB is not set
-CONFIG_USB_OHCI_HCD_PLATFORM=m
-CONFIG_USB_UHCI_HCD=m
-CONFIG_USB_U132_HCD=m
-CONFIG_USB_SL811_HCD=m
-# CONFIG_USB_SL811_HCD_ISO is not set
-CONFIG_USB_SL811_CS=m
-CONFIG_USB_R8A66597_HCD=m
-CONFIG_USB_HCD_BCMA=m
-CONFIG_USB_HCD_SSB=m
-# CONFIG_USB_HCD_TEST_MODE is not set
-
-#
-# USB Device Class drivers
-#
-CONFIG_USB_ACM=m
-CONFIG_USB_PRINTER=m
-CONFIG_USB_WDM=m
-CONFIG_USB_TMC=m
-
-#
-# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may
-#
-
-#
-# also be needed; see USB_STORAGE Help for more info
-#
-CONFIG_USB_STORAGE=m
-# CONFIG_USB_STORAGE_DEBUG is not set
-CONFIG_USB_STORAGE_REALTEK=m
-CONFIG_REALTEK_AUTOPM=y
-CONFIG_USB_STORAGE_DATAFAB=m
-CONFIG_USB_STORAGE_FREECOM=m
-CONFIG_USB_STORAGE_ISD200=m
-CONFIG_USB_STORAGE_USBAT=m
-CONFIG_USB_STORAGE_SDDR09=m
-CONFIG_USB_STORAGE_SDDR55=m
-CONFIG_USB_STORAGE_JUMPSHOT=m
-CONFIG_USB_STORAGE_ALAUDA=m
-CONFIG_USB_STORAGE_ONETOUCH=m
-CONFIG_USB_STORAGE_KARMA=m
-CONFIG_USB_STORAGE_CYPRESS_ATACB=m
-CONFIG_USB_STORAGE_ENE_UB6250=m
-CONFIG_USB_UAS=m
-
-#
-# USB Imaging devices
-#
-CONFIG_USB_MDC800=m
-CONFIG_USB_MICROTEK=m
-CONFIG_USBIP_CORE=m
-CONFIG_USBIP_VHCI_HCD=m
-CONFIG_USBIP_VHCI_HC_PORTS=8
-CONFIG_USBIP_VHCI_NR_HCS=1
-CONFIG_USBIP_HOST=m
-CONFIG_USBIP_VUDC=m
-# CONFIG_USBIP_DEBUG is not set
-CONFIG_USB_CDNS3=m
-CONFIG_USB_CDNS3_GADGET=y
-CONFIG_USB_CDNS3_HOST=y
-CONFIG_USB_CDNS3_PCI_WRAP=m
-CONFIG_USB_MUSB_HDRC=m
-# CONFIG_USB_MUSB_HOST is not set
-# CONFIG_USB_MUSB_GADGET is not set
-CONFIG_USB_MUSB_DUAL_ROLE=y
-
-#
-# Platform Glue Layer
-#
-
-#
-# MUSB DMA mode
-#
-# CONFIG_MUSB_PIO_ONLY is not set
-CONFIG_USB_DWC3=m
-CONFIG_USB_DWC3_ULPI=y
-# CONFIG_USB_DWC3_HOST is not set
-# CONFIG_USB_DWC3_GADGET is not set
-CONFIG_USB_DWC3_DUAL_ROLE=y
-
-#
-# Platform Glue Driver Support
-#
-CONFIG_USB_DWC3_PCI=m
-CONFIG_USB_DWC3_HAPS=m
-CONFIG_USB_DWC3_OF_SIMPLE=m
-CONFIG_USB_DWC2=m
-# CONFIG_USB_DWC2_HOST is not set
-
-#
-# Gadget/Dual-role mode requires USB Gadget support to be enabled
-#
-# CONFIG_USB_DWC2_PERIPHERAL is not set
-CONFIG_USB_DWC2_DUAL_ROLE=y
-CONFIG_USB_DWC2_PCI=m
-# CONFIG_USB_DWC2_DEBUG is not set
-# CONFIG_USB_DWC2_TRACK_MISSED_SOFS is not set
-CONFIG_USB_CHIPIDEA=m
-CONFIG_USB_CHIPIDEA_UDC=y
-CONFIG_USB_CHIPIDEA_HOST=y
-CONFIG_USB_CHIPIDEA_PCI=m
-CONFIG_USB_CHIPIDEA_MSM=m
-CONFIG_USB_CHIPIDEA_IMX=m
-CONFIG_USB_CHIPIDEA_GENERIC=m
-CONFIG_USB_CHIPIDEA_TEGRA=m
-CONFIG_USB_ISP1760=m
-CONFIG_USB_ISP1760_HCD=y
-CONFIG_USB_ISP1761_UDC=y
-# CONFIG_USB_ISP1760_HOST_ROLE is not set
-# CONFIG_USB_ISP1760_GADGET_ROLE is not set
-CONFIG_USB_ISP1760_DUAL_ROLE=y
-
-#
-# USB port drivers
-#
-CONFIG_USB_USS720=m
-CONFIG_USB_SERIAL=y
-CONFIG_USB_SERIAL_CONSOLE=y
-CONFIG_USB_SERIAL_GENERIC=y
-CONFIG_USB_SERIAL_SIMPLE=m
-CONFIG_USB_SERIAL_AIRCABLE=m
-CONFIG_USB_SERIAL_ARK3116=m
-CONFIG_USB_SERIAL_BELKIN=m
-CONFIG_USB_SERIAL_CH341=m
-CONFIG_USB_SERIAL_WHITEHEAT=m
-CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m
-CONFIG_USB_SERIAL_CP210X=m
-CONFIG_USB_SERIAL_CYPRESS_M8=m
-CONFIG_USB_SERIAL_EMPEG=m
-CONFIG_USB_SERIAL_FTDI_SIO=m
-CONFIG_USB_SERIAL_VISOR=m
-CONFIG_USB_SERIAL_IPAQ=m
-CONFIG_USB_SERIAL_IR=m
-CONFIG_USB_SERIAL_EDGEPORT=m
-CONFIG_USB_SERIAL_EDGEPORT_TI=m
-CONFIG_USB_SERIAL_F81232=m
-CONFIG_USB_SERIAL_F8153X=m
-CONFIG_USB_SERIAL_GARMIN=m
-CONFIG_USB_SERIAL_IPW=m
-CONFIG_USB_SERIAL_IUU=m
-CONFIG_USB_SERIAL_KEYSPAN_PDA=m
-CONFIG_USB_SERIAL_KEYSPAN=m
-CONFIG_USB_SERIAL_KLSI=m
-CONFIG_USB_SERIAL_KOBIL_SCT=m
-CONFIG_USB_SERIAL_MCT_U232=m
-CONFIG_USB_SERIAL_METRO=m
-CONFIG_USB_SERIAL_MOS7720=m
-CONFIG_USB_SERIAL_MOS7715_PARPORT=y
-CONFIG_USB_SERIAL_MOS7840=m
-CONFIG_USB_SERIAL_MXUPORT=m
-CONFIG_USB_SERIAL_NAVMAN=m
-CONFIG_USB_SERIAL_PL2303=m
-CONFIG_USB_SERIAL_OTI6858=m
-CONFIG_USB_SERIAL_QCAUX=m
-CONFIG_USB_SERIAL_QUALCOMM=m
-CONFIG_USB_SERIAL_SPCP8X5=m
-CONFIG_USB_SERIAL_SAFE=m
-# CONFIG_USB_SERIAL_SAFE_PADDED is not set
-CONFIG_USB_SERIAL_SIERRAWIRELESS=m
-CONFIG_USB_SERIAL_SYMBOL=m
-CONFIG_USB_SERIAL_TI=m
-CONFIG_USB_SERIAL_CYBERJACK=m
-CONFIG_USB_SERIAL_XIRCOM=m
-CONFIG_USB_SERIAL_WWAN=m
-CONFIG_USB_SERIAL_OPTION=m
-CONFIG_USB_SERIAL_OMNINET=m
-CONFIG_USB_SERIAL_OPTICON=m
-CONFIG_USB_SERIAL_XSENS_MT=m
-CONFIG_USB_SERIAL_WISHBONE=m
-CONFIG_USB_SERIAL_SSU100=m
-CONFIG_USB_SERIAL_QT2=m
-CONFIG_USB_SERIAL_UPD78F0730=m
-CONFIG_USB_SERIAL_DEBUG=m
-
-#
-# USB Miscellaneous drivers
-#
-CONFIG_USB_EMI62=m
-CONFIG_USB_EMI26=m
-CONFIG_USB_ADUTUX=m
-CONFIG_USB_SEVSEG=m
-CONFIG_USB_LEGOTOWER=m
-CONFIG_USB_LCD=m
-CONFIG_USB_CYPRESS_CY7C63=m
-CONFIG_USB_CYTHERM=m
-CONFIG_USB_IDMOUSE=m
-CONFIG_USB_FTDI_ELAN=m
-CONFIG_USB_APPLEDISPLAY=m
-CONFIG_APPLE_MFI_FASTCHARGE=m
-CONFIG_USB_SISUSBVGA=m
-CONFIG_USB_SISUSBVGA_CON=y
-CONFIG_USB_LD=m
-CONFIG_USB_TRANCEVIBRATOR=m
-CONFIG_USB_IOWARRIOR=m
-CONFIG_USB_TEST=m
-CONFIG_USB_EHSET_TEST_FIXTURE=m
-CONFIG_USB_ISIGHTFW=m
-CONFIG_USB_YUREX=m
-CONFIG_USB_EZUSB_FX2=m
-CONFIG_USB_HUB_USB251XB=m
-CONFIG_USB_HSIC_USB3503=m
-CONFIG_USB_HSIC_USB4604=m
-CONFIG_USB_LINK_LAYER_TEST=m
-CONFIG_USB_CHAOSKEY=m
-CONFIG_USB_ATM=m
-CONFIG_USB_SPEEDTOUCH=m
-CONFIG_USB_CXACRU=m
-CONFIG_USB_UEAGLEATM=m
-CONFIG_USB_XUSBATM=m
-
-#
-# USB Physical Layer drivers
-#
-CONFIG_USB_PHY=y
-CONFIG_NOP_USB_XCEIV=m
-CONFIG_USB_GPIO_VBUS=m
-CONFIG_TAHVO_USB=m
-# CONFIG_TAHVO_USB_HOST_BY_DEFAULT is not set
-CONFIG_USB_ISP1301=m
-# end of USB Physical Layer drivers
-
-CONFIG_USB_GADGET=m
-# CONFIG_USB_GADGET_DEBUG is not set
-# CONFIG_USB_GADGET_DEBUG_FILES is not set
-# CONFIG_USB_GADGET_DEBUG_FS is not set
-CONFIG_USB_GADGET_VBUS_DRAW=2
-CONFIG_USB_GADGET_STORAGE_NUM_BUFFERS=2
-CONFIG_U_SERIAL_CONSOLE=y
-
-#
-# USB Peripheral Controller
-#
-CONFIG_USB_FOTG210_UDC=m
-CONFIG_USB_GR_UDC=m
-CONFIG_USB_R8A66597=m
-CONFIG_USB_PXA27X=m
-CONFIG_USB_MV_UDC=m
-CONFIG_USB_MV_U3D=m
-CONFIG_USB_SNP_CORE=m
-CONFIG_USB_SNP_UDC_PLAT=m
-CONFIG_USB_M66592=m
-CONFIG_USB_BDC_UDC=m
-
-#
-# Platform Support
-#
-CONFIG_USB_BDC_PCI=m
-CONFIG_USB_AMD5536UDC=m
-CONFIG_USB_NET2272=m
-CONFIG_USB_NET2272_DMA=y
-CONFIG_USB_NET2280=m
-CONFIG_USB_GOKU=m
-CONFIG_USB_EG20T=m
-CONFIG_USB_GADGET_XILINX=m
-CONFIG_USB_MAX3420_UDC=m
-CONFIG_USB_DUMMY_HCD=m
-# end of USB Peripheral Controller
-
-CONFIG_USB_LIBCOMPOSITE=m
-CONFIG_USB_F_ACM=m
-CONFIG_USB_F_SS_LB=m
-CONFIG_USB_U_SERIAL=m
-CONFIG_USB_U_ETHER=m
-CONFIG_USB_U_AUDIO=m
-CONFIG_USB_F_SERIAL=m
-CONFIG_USB_F_OBEX=m
-CONFIG_USB_F_NCM=m
-CONFIG_USB_F_ECM=m
-CONFIG_USB_F_PHONET=m
-CONFIG_USB_F_EEM=m
-CONFIG_USB_F_SUBSET=m
-CONFIG_USB_F_RNDIS=m
-CONFIG_USB_F_MASS_STORAGE=m
-CONFIG_USB_F_FS=m
-CONFIG_USB_F_UAC1=m
-CONFIG_USB_F_UAC1_LEGACY=m
-CONFIG_USB_F_UAC2=m
-CONFIG_USB_F_UVC=m
-CONFIG_USB_F_MIDI=m
-CONFIG_USB_F_HID=m
-CONFIG_USB_F_PRINTER=m
-CONFIG_USB_F_TCM=m
-CONFIG_USB_CONFIGFS=m
-CONFIG_USB_CONFIGFS_SERIAL=y
-CONFIG_USB_CONFIGFS_ACM=y
-CONFIG_USB_CONFIGFS_OBEX=y
-CONFIG_USB_CONFIGFS_NCM=y
-CONFIG_USB_CONFIGFS_ECM=y
-CONFIG_USB_CONFIGFS_ECM_SUBSET=y
-CONFIG_USB_CONFIGFS_RNDIS=y
-CONFIG_USB_CONFIGFS_EEM=y
-CONFIG_USB_CONFIGFS_PHONET=y
-CONFIG_USB_CONFIGFS_MASS_STORAGE=y
-CONFIG_USB_CONFIGFS_F_LB_SS=y
-CONFIG_USB_CONFIGFS_F_FS=y
-CONFIG_USB_CONFIGFS_F_UAC1=y
-CONFIG_USB_CONFIGFS_F_UAC1_LEGACY=y
-CONFIG_USB_CONFIGFS_F_UAC2=y
-CONFIG_USB_CONFIGFS_F_MIDI=y
-CONFIG_USB_CONFIGFS_F_HID=y
-CONFIG_USB_CONFIGFS_F_UVC=y
-CONFIG_USB_CONFIGFS_F_PRINTER=y
-CONFIG_USB_CONFIGFS_F_TCM=y
-
-#
-# USB Gadget precomposed configurations
-#
-CONFIG_USB_ZERO=m
-CONFIG_USB_AUDIO=m
-# CONFIG_GADGET_UAC1 is not set
-CONFIG_USB_ETH=m
-CONFIG_USB_ETH_RNDIS=y
-CONFIG_USB_ETH_EEM=y
-CONFIG_USB_G_NCM=m
-CONFIG_USB_GADGETFS=m
-CONFIG_USB_FUNCTIONFS=m
-CONFIG_USB_FUNCTIONFS_ETH=y
-CONFIG_USB_FUNCTIONFS_RNDIS=y
-CONFIG_USB_FUNCTIONFS_GENERIC=y
-CONFIG_USB_MASS_STORAGE=m
-CONFIG_USB_GADGET_TARGET=m
-CONFIG_USB_G_SERIAL=m
-CONFIG_USB_MIDI_GADGET=m
-CONFIG_USB_G_PRINTER=m
-CONFIG_USB_CDC_COMPOSITE=m
-CONFIG_USB_G_NOKIA=m
-CONFIG_USB_G_ACM_MS=m
-CONFIG_USB_G_MULTI=m
-CONFIG_USB_G_MULTI_RNDIS=y
-CONFIG_USB_G_MULTI_CDC=y
-CONFIG_USB_G_HID=m
-CONFIG_USB_G_DBGP=m
-# CONFIG_USB_G_DBGP_PRINTK is not set
-CONFIG_USB_G_DBGP_SERIAL=y
-CONFIG_USB_G_WEBCAM=m
-CONFIG_USB_RAW_GADGET=m
-# end of USB Gadget precomposed configurations
-
-CONFIG_TYPEC=m
-CONFIG_TYPEC_TCPM=m
-CONFIG_TYPEC_TCPCI=m
-CONFIG_TYPEC_RT1711H=m
-# CONFIG_TYPEC_MT6360 is not set
-# CONFIG_TYPEC_TCPCI_MAXIM is not set
-CONFIG_TYPEC_FUSB302=m
-CONFIG_TYPEC_WCOVE=m
-CONFIG_TYPEC_UCSI=m
-CONFIG_UCSI_CCG=m
-CONFIG_UCSI_ACPI=m
-CONFIG_TYPEC_HD3SS3220=m
-CONFIG_TYPEC_TPS6598X=m
-# CONFIG_TYPEC_STUSB160X is not set
-
-#
-# USB Type-C Multiplexer/DeMultiplexer Switch support
-#
-CONFIG_TYPEC_MUX_PI3USB30532=m
-CONFIG_TYPEC_MUX_INTEL_PMC=m
-# end of USB Type-C Multiplexer/DeMultiplexer Switch support
-
-#
-# USB Type-C Alternate Mode drivers
-#
-CONFIG_TYPEC_DP_ALTMODE=m
-CONFIG_TYPEC_NVIDIA_ALTMODE=m
-# end of USB Type-C Alternate Mode drivers
-
-CONFIG_USB_ROLE_SWITCH=m
-CONFIG_USB_ROLES_INTEL_XHCI=m
-CONFIG_MMC=m
-CONFIG_PWRSEQ_EMMC=m
-CONFIG_PWRSEQ_SD8787=m
-CONFIG_PWRSEQ_SIMPLE=m
-CONFIG_MMC_BLOCK=m
-CONFIG_MMC_BLOCK_MINORS=8
-CONFIG_SDIO_UART=m
-CONFIG_MMC_TEST=m
-
-#
-# MMC/SD/SDIO Host Controller Drivers
-#
-# CONFIG_MMC_DEBUG is not set
-CONFIG_MMC_SDHCI=m
-CONFIG_MMC_SDHCI_IO_ACCESSORS=y
-CONFIG_MMC_SDHCI_PCI=m
-CONFIG_MMC_RICOH_MMC=y
-CONFIG_MMC_SDHCI_ACPI=m
-CONFIG_MMC_SDHCI_PLTFM=m
-CONFIG_MMC_SDHCI_OF_ARASAN=m
-CONFIG_MMC_SDHCI_OF_ASPEED=m
-CONFIG_MMC_SDHCI_OF_AT91=m
-CONFIG_MMC_SDHCI_OF_DWCMSHC=m
-CONFIG_MMC_SDHCI_CADENCE=m
-CONFIG_MMC_SDHCI_F_SDH30=m
-CONFIG_MMC_SDHCI_MILBEAUT=m
-CONFIG_MMC_WBSD=m
-CONFIG_MMC_ALCOR=m
-CONFIG_MMC_TIFM_SD=m
-CONFIG_MMC_SPI=m
-CONFIG_MMC_SDRICOH_CS=m
-CONFIG_MMC_CB710=m
-CONFIG_MMC_VIA_SDMMC=m
-CONFIG_MMC_VUB300=m
-CONFIG_MMC_USHC=m
-CONFIG_MMC_USDHI6ROL0=m
-CONFIG_MMC_REALTEK_PCI=m
-CONFIG_MMC_REALTEK_USB=m
-CONFIG_MMC_CQHCI=m
-CONFIG_MMC_HSQ=m
-CONFIG_MMC_TOSHIBA_PCI=m
-CONFIG_MMC_MTK=m
-CONFIG_MMC_SDHCI_XENON=m
-CONFIG_MMC_SDHCI_OMAP=m
-CONFIG_MMC_SDHCI_AM654=m
-CONFIG_MMC_SDHCI_EXTERNAL_DMA=y
-CONFIG_MEMSTICK=m
-# CONFIG_MEMSTICK_DEBUG is not set
-
-#
-# MemoryStick drivers
-#
-# CONFIG_MEMSTICK_UNSAFE_RESUME is not set
-CONFIG_MSPRO_BLOCK=m
-CONFIG_MS_BLOCK=m
-
-#
-# MemoryStick Host Controller Drivers
-#
-CONFIG_MEMSTICK_TIFM_MS=m
-CONFIG_MEMSTICK_JMICRON_38X=m
-CONFIG_MEMSTICK_R592=m
-CONFIG_MEMSTICK_REALTEK_PCI=m
-CONFIG_MEMSTICK_REALTEK_USB=m
-CONFIG_NEW_LEDS=y
-CONFIG_LEDS_CLASS=y
-CONFIG_LEDS_CLASS_FLASH=m
-CONFIG_LEDS_CLASS_MULTICOLOR=m
-CONFIG_LEDS_BRIGHTNESS_HW_CHANGED=y
-
-#
-# LED drivers
-#
-CONFIG_LEDS_88PM860X=m
-CONFIG_LEDS_AAT1290=m
-CONFIG_LEDS_AN30259A=m
-CONFIG_LEDS_APU=m
-CONFIG_LEDS_AS3645A=m
-CONFIG_LEDS_AW2013=m
-CONFIG_LEDS_BCM6328=m
-CONFIG_LEDS_BCM6358=m
-CONFIG_LEDS_CPCAP=m
-CONFIG_LEDS_CR0014114=m
-CONFIG_LEDS_EL15203000=m
-CONFIG_LEDS_LM3530=m
-CONFIG_LEDS_LM3532=m
-CONFIG_LEDS_LM3533=m
-CONFIG_LEDS_LM3642=m
-CONFIG_LEDS_LM3692X=m
-CONFIG_LEDS_LM3601X=m
-CONFIG_LEDS_MT6323=m
-CONFIG_LEDS_PCA9532=m
-CONFIG_LEDS_PCA9532_GPIO=y
-CONFIG_LEDS_GPIO=m
-CONFIG_LEDS_LP3944=m
-CONFIG_LEDS_LP3952=m
-# CONFIG_LEDS_LP50XX is not set
-# CONFIG_LEDS_LP55XX_COMMON is not set
-CONFIG_LEDS_LP8788=m
-CONFIG_LEDS_LP8860=m
-CONFIG_LEDS_CLEVO_MAIL=m
-CONFIG_LEDS_PCA955X=m
-CONFIG_LEDS_PCA955X_GPIO=y
-CONFIG_LEDS_PCA963X=m
-CONFIG_LEDS_WM831X_STATUS=m
-CONFIG_LEDS_WM8350=m
-CONFIG_LEDS_DA903X=m
-CONFIG_LEDS_DA9052=m
-CONFIG_LEDS_DAC124S085=m
-CONFIG_LEDS_PWM=m
-CONFIG_LEDS_REGULATOR=m
-CONFIG_LEDS_BD2802=m
-CONFIG_LEDS_INTEL_SS4200=m
-CONFIG_LEDS_LT3593=m
-CONFIG_LEDS_ADP5520=m
-CONFIG_LEDS_MC13783=m
-CONFIG_LEDS_TCA6507=m
-CONFIG_LEDS_TLC591XX=m
-CONFIG_LEDS_MAX77650=m
-CONFIG_LEDS_MAX77693=m
-CONFIG_LEDS_MAX8997=m
-CONFIG_LEDS_LM355x=m
-CONFIG_LEDS_MENF21BMC=m
-CONFIG_LEDS_KTD2692=m
-CONFIG_LEDS_IS31FL319X=m
-CONFIG_LEDS_IS31FL32XX=m
-
-#
-# LED driver for blink(1) USB RGB LED is under Special HID drivers (HID_THINGM)
-#
-CONFIG_LEDS_BLINKM=m
-CONFIG_LEDS_SYSCON=y
-CONFIG_LEDS_MLXCPLD=m
-CONFIG_LEDS_MLXREG=m
-CONFIG_LEDS_USER=m
-CONFIG_LEDS_NIC78BX=m
-CONFIG_LEDS_SPI_BYTE=m
-CONFIG_LEDS_TI_LMU_COMMON=m
-CONFIG_LEDS_LM3697=m
-CONFIG_LEDS_LM36274=m
-CONFIG_LEDS_TPS6105X=m
-CONFIG_LEDS_SGM3140=m
-
-#
-# LED Triggers
-#
-CONFIG_LEDS_TRIGGERS=y
-CONFIG_LEDS_TRIGGER_TIMER=m
-CONFIG_LEDS_TRIGGER_ONESHOT=m
-CONFIG_LEDS_TRIGGER_DISK=y
-CONFIG_LEDS_TRIGGER_MTD=y
-CONFIG_LEDS_TRIGGER_HEARTBEAT=m
-CONFIG_LEDS_TRIGGER_BACKLIGHT=m
-CONFIG_LEDS_TRIGGER_CPU=y
-CONFIG_LEDS_TRIGGER_ACTIVITY=m
-CONFIG_LEDS_TRIGGER_GPIO=m
-CONFIG_LEDS_TRIGGER_DEFAULT_ON=m
-
-#
-# iptables trigger is under Netfilter config (LED target)
-#
-CONFIG_LEDS_TRIGGER_TRANSIENT=m
-CONFIG_LEDS_TRIGGER_CAMERA=m
-CONFIG_LEDS_TRIGGER_PANIC=y
-CONFIG_LEDS_TRIGGER_NETDEV=m
-CONFIG_LEDS_TRIGGER_PATTERN=m
-CONFIG_LEDS_TRIGGER_AUDIO=m
-CONFIG_ACCESSIBILITY=y
-CONFIG_A11Y_BRAILLE_CONSOLE=y
-
-#
-# Speakup console speech
-#
-CONFIG_SPEAKUP=m
-CONFIG_SPEAKUP_SYNTH_ACNTSA=m
-CONFIG_SPEAKUP_SYNTH_APOLLO=m
-CONFIG_SPEAKUP_SYNTH_AUDPTR=m
-CONFIG_SPEAKUP_SYNTH_BNS=m
-CONFIG_SPEAKUP_SYNTH_DECTLK=m
-CONFIG_SPEAKUP_SYNTH_DECEXT=m
-CONFIG_SPEAKUP_SYNTH_LTLK=m
-CONFIG_SPEAKUP_SYNTH_SOFT=m
-CONFIG_SPEAKUP_SYNTH_SPKOUT=m
-CONFIG_SPEAKUP_SYNTH_TXPRT=m
-CONFIG_SPEAKUP_SYNTH_DUMMY=m
-# end of Speakup console speech
-
-CONFIG_INFINIBAND=m
-CONFIG_INFINIBAND_USER_MAD=m
-CONFIG_INFINIBAND_USER_ACCESS=m
-CONFIG_INFINIBAND_USER_MEM=y
-CONFIG_INFINIBAND_ON_DEMAND_PAGING=y
-CONFIG_INFINIBAND_ADDR_TRANS=y
-CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS=y
-CONFIG_INFINIBAND_MTHCA=m
-CONFIG_INFINIBAND_MTHCA_DEBUG=y
-CONFIG_INFINIBAND_QIB=m
-CONFIG_INFINIBAND_QIB_DCA=y
-CONFIG_INFINIBAND_CXGB4=m
-CONFIG_INFINIBAND_EFA=m
-CONFIG_INFINIBAND_I40IW=m
-CONFIG_MLX4_INFINIBAND=m
-CONFIG_MLX5_INFINIBAND=m
-CONFIG_INFINIBAND_OCRDMA=m
-CONFIG_INFINIBAND_VMWARE_PVRDMA=m
-CONFIG_INFINIBAND_USNIC=m
-CONFIG_INFINIBAND_BNXT_RE=m
-CONFIG_INFINIBAND_HFI1=m
-# CONFIG_HFI1_DEBUG_SDMA_ORDER is not set
-# CONFIG_SDMA_VERBOSITY is not set
-CONFIG_INFINIBAND_QEDR=m
-CONFIG_INFINIBAND_RDMAVT=m
-CONFIG_RDMA_RXE=m
-CONFIG_RDMA_SIW=m
-CONFIG_INFINIBAND_IPOIB=m
-CONFIG_INFINIBAND_IPOIB_CM=y
-CONFIG_INFINIBAND_IPOIB_DEBUG=y
-# CONFIG_INFINIBAND_IPOIB_DEBUG_DATA is not set
-CONFIG_INFINIBAND_SRP=m
-CONFIG_INFINIBAND_SRPT=m
-CONFIG_INFINIBAND_ISER=m
-CONFIG_INFINIBAND_ISERT=m
-CONFIG_INFINIBAND_RTRS=m
-CONFIG_INFINIBAND_RTRS_CLIENT=m
-CONFIG_INFINIBAND_RTRS_SERVER=m
-CONFIG_INFINIBAND_OPA_VNIC=m
-CONFIG_EDAC_ATOMIC_SCRUB=y
-CONFIG_EDAC_SUPPORT=y
-CONFIG_EDAC=y
-CONFIG_EDAC_LEGACY_SYSFS=y
-# CONFIG_EDAC_DEBUG is not set
-CONFIG_EDAC_DECODE_MCE=m
-CONFIG_EDAC_GHES=y
-CONFIG_EDAC_AMD64=m
-# CONFIG_EDAC_AMD64_ERROR_INJECTION is not set
-CONFIG_EDAC_E752X=m
-CONFIG_EDAC_I82975X=m
-CONFIG_EDAC_I3000=m
-CONFIG_EDAC_I3200=m
-CONFIG_EDAC_IE31200=m
-CONFIG_EDAC_X38=m
-CONFIG_EDAC_I5400=m
-CONFIG_EDAC_I7CORE=m
-CONFIG_EDAC_I5000=m
-CONFIG_EDAC_I5100=m
-CONFIG_EDAC_I7300=m
-CONFIG_EDAC_SBRIDGE=m
-CONFIG_EDAC_SKX=m
-CONFIG_EDAC_I10NM=m
-CONFIG_EDAC_PND2=m
-CONFIG_RTC_LIB=y
-CONFIG_RTC_MC146818_LIB=y
-CONFIG_RTC_CLASS=y
-CONFIG_RTC_HCTOSYS=y
-CONFIG_RTC_HCTOSYS_DEVICE="rtc0"
-CONFIG_RTC_SYSTOHC=y
-CONFIG_RTC_SYSTOHC_DEVICE="rtc0"
-# CONFIG_RTC_DEBUG is not set
-CONFIG_RTC_NVMEM=y
-
-#
-# RTC interfaces
-#
-CONFIG_RTC_INTF_SYSFS=y
-CONFIG_RTC_INTF_PROC=y
-CONFIG_RTC_INTF_DEV=y
-CONFIG_RTC_INTF_DEV_UIE_EMUL=y
-# CONFIG_RTC_DRV_TEST is not set
-
-#
-# I2C RTC drivers
-#
-CONFIG_RTC_DRV_88PM860X=m
-CONFIG_RTC_DRV_88PM80X=m
-CONFIG_RTC_DRV_ABB5ZES3=m
-CONFIG_RTC_DRV_ABEOZ9=m
-CONFIG_RTC_DRV_ABX80X=m
-CONFIG_RTC_DRV_AS3722=m
-CONFIG_RTC_DRV_DS1307=m
-CONFIG_RTC_DRV_DS1307_CENTURY=y
-CONFIG_RTC_DRV_DS1374=m
-CONFIG_RTC_DRV_DS1374_WDT=y
-CONFIG_RTC_DRV_DS1672=m
-CONFIG_RTC_DRV_HYM8563=m
-CONFIG_RTC_DRV_LP8788=m
-CONFIG_RTC_DRV_MAX6900=m
-CONFIG_RTC_DRV_MAX8907=m
-CONFIG_RTC_DRV_MAX8925=m
-CONFIG_RTC_DRV_MAX8998=m
-CONFIG_RTC_DRV_MAX8997=m
-CONFIG_RTC_DRV_MAX77686=m
-CONFIG_RTC_DRV_RK808=m
-CONFIG_RTC_DRV_RS5C372=m
-CONFIG_RTC_DRV_ISL1208=m
-CONFIG_RTC_DRV_ISL12022=m
-CONFIG_RTC_DRV_ISL12026=m
-CONFIG_RTC_DRV_X1205=m
-CONFIG_RTC_DRV_PCF8523=m
-CONFIG_RTC_DRV_PCF85063=m
-CONFIG_RTC_DRV_PCF85363=m
-CONFIG_RTC_DRV_PCF8563=m
-CONFIG_RTC_DRV_PCF8583=m
-CONFIG_RTC_DRV_M41T80=m
-CONFIG_RTC_DRV_M41T80_WDT=y
-CONFIG_RTC_DRV_BD70528=m
-CONFIG_RTC_DRV_BQ32K=m
-CONFIG_RTC_DRV_TWL4030=m
-CONFIG_RTC_DRV_PALMAS=m
-CONFIG_RTC_DRV_TPS6586X=m
-CONFIG_RTC_DRV_TPS65910=m
-CONFIG_RTC_DRV_TPS80031=m
-CONFIG_RTC_DRV_RC5T583=m
-CONFIG_RTC_DRV_RC5T619=m
-CONFIG_RTC_DRV_S35390A=m
-CONFIG_RTC_DRV_FM3130=m
-CONFIG_RTC_DRV_RX8010=m
-CONFIG_RTC_DRV_RX8581=m
-CONFIG_RTC_DRV_RX8025=m
-CONFIG_RTC_DRV_EM3027=m
-CONFIG_RTC_DRV_RV3028=m
-# CONFIG_RTC_DRV_RV3032 is not set
-CONFIG_RTC_DRV_RV8803=m
-CONFIG_RTC_DRV_S5M=m
-CONFIG_RTC_DRV_SD3078=m
-
-#
-# SPI RTC drivers
-#
-CONFIG_RTC_DRV_M41T93=m
-CONFIG_RTC_DRV_M41T94=m
-CONFIG_RTC_DRV_DS1302=m
-CONFIG_RTC_DRV_DS1305=m
-CONFIG_RTC_DRV_DS1343=m
-CONFIG_RTC_DRV_DS1347=m
-CONFIG_RTC_DRV_DS1390=m
-CONFIG_RTC_DRV_MAX6916=m
-CONFIG_RTC_DRV_R9701=m
-CONFIG_RTC_DRV_RX4581=m
-CONFIG_RTC_DRV_RX6110=m
-CONFIG_RTC_DRV_RS5C348=m
-CONFIG_RTC_DRV_MAX6902=m
-CONFIG_RTC_DRV_PCF2123=m
-CONFIG_RTC_DRV_MCP795=m
-CONFIG_RTC_I2C_AND_SPI=y
-
-#
-# SPI and I2C RTC drivers
-#
-CONFIG_RTC_DRV_DS3232=m
-CONFIG_RTC_DRV_DS3232_HWMON=y
-CONFIG_RTC_DRV_PCF2127=m
-CONFIG_RTC_DRV_RV3029C2=m
-CONFIG_RTC_DRV_RV3029_HWMON=y
-
-#
-# Platform RTC drivers
-#
-CONFIG_RTC_DRV_CMOS=y
-CONFIG_RTC_DRV_DS1286=m
-CONFIG_RTC_DRV_DS1511=m
-CONFIG_RTC_DRV_DS1553=m
-CONFIG_RTC_DRV_DS1685_FAMILY=m
-CONFIG_RTC_DRV_DS1685=y
-# CONFIG_RTC_DRV_DS1689 is not set
-# CONFIG_RTC_DRV_DS17285 is not set
-# CONFIG_RTC_DRV_DS17485 is not set
-# CONFIG_RTC_DRV_DS17885 is not set
-CONFIG_RTC_DRV_DS1742=m
-CONFIG_RTC_DRV_DS2404=m
-CONFIG_RTC_DRV_DA9052=m
-CONFIG_RTC_DRV_DA9055=m
-CONFIG_RTC_DRV_DA9063=m
-CONFIG_RTC_DRV_STK17TA8=m
-CONFIG_RTC_DRV_M48T86=m
-CONFIG_RTC_DRV_M48T35=m
-CONFIG_RTC_DRV_M48T59=m
-CONFIG_RTC_DRV_MSM6242=m
-CONFIG_RTC_DRV_BQ4802=m
-CONFIG_RTC_DRV_RP5C01=m
-CONFIG_RTC_DRV_V3020=m
-CONFIG_RTC_DRV_WM831X=m
-CONFIG_RTC_DRV_WM8350=m
-CONFIG_RTC_DRV_PCF50633=m
-CONFIG_RTC_DRV_AB3100=m
-CONFIG_RTC_DRV_ZYNQMP=m
-CONFIG_RTC_DRV_CROS_EC=m
-
-#
-# on-CPU RTC drivers
-#
-CONFIG_RTC_DRV_CADENCE=m
-CONFIG_RTC_DRV_FTRTC010=m
-CONFIG_RTC_DRV_PCAP=m
-CONFIG_RTC_DRV_MC13XXX=m
-CONFIG_RTC_DRV_MT6397=m
-CONFIG_RTC_DRV_R7301=m
-CONFIG_RTC_DRV_CPCAP=m
-
-#
-# HID Sensor RTC drivers
-#
-CONFIG_RTC_DRV_HID_SENSOR_TIME=m
-CONFIG_RTC_DRV_WILCO_EC=m
-CONFIG_DMADEVICES=y
-# CONFIG_DMADEVICES_DEBUG is not set
-
-#
-# DMA Devices
-#
-CONFIG_DMA_ENGINE=y
-CONFIG_DMA_VIRTUAL_CHANNELS=y
-CONFIG_DMA_ACPI=y
-CONFIG_DMA_OF=y
-CONFIG_ALTERA_MSGDMA=m
-CONFIG_DW_AXI_DMAC=m
-CONFIG_FSL_EDMA=m
-CONFIG_INTEL_IDMA64=m
-CONFIG_INTEL_IDXD=m
-CONFIG_INTEL_IOATDMA=m
-CONFIG_INTEL_MIC_X100_DMA=m
-CONFIG_PLX_DMA=m
-# CONFIG_XILINX_ZYNQMP_DPDMA is not set
-CONFIG_QCOM_HIDMA_MGMT=m
-CONFIG_QCOM_HIDMA=m
-CONFIG_DW_DMAC_CORE=y
-CONFIG_DW_DMAC=y
-CONFIG_DW_DMAC_PCI=y
-CONFIG_DW_EDMA=m
-CONFIG_DW_EDMA_PCIE=m
-CONFIG_HSU_DMA=y
-CONFIG_SF_PDMA=m
-
-#
-# DMA Clients
-#
-CONFIG_ASYNC_TX_DMA=y
-# CONFIG_DMATEST is not set
-CONFIG_DMA_ENGINE_RAID=y
-
-#
-# DMABUF options
-#
-CONFIG_SYNC_FILE=y
-# CONFIG_SW_SYNC is not set
-CONFIG_UDMABUF=y
-# CONFIG_DMABUF_MOVE_NOTIFY is not set
-# CONFIG_DMABUF_SELFTESTS is not set
-CONFIG_DMABUF_HEAPS=y
-CONFIG_DMABUF_HEAPS_SYSTEM=y
-# end of DMABUF options
-
-CONFIG_DCA=m
-CONFIG_AUXDISPLAY=y
-CONFIG_HD44780=m
-CONFIG_KS0108=m
-CONFIG_KS0108_PORT=0x378
-CONFIG_KS0108_DELAY=2
-CONFIG_CFAG12864B=m
-CONFIG_CFAG12864B_RATE=20
-CONFIG_IMG_ASCII_LCD=m
-CONFIG_HT16K33=m
-CONFIG_PARPORT_PANEL=m
-CONFIG_PANEL_PARPORT=0
-CONFIG_PANEL_PROFILE=5
-# CONFIG_PANEL_CHANGE_MESSAGE is not set
-# CONFIG_CHARLCD_BL_OFF is not set
-# CONFIG_CHARLCD_BL_ON is not set
-CONFIG_CHARLCD_BL_FLASH=y
-CONFIG_PANEL=m
-CONFIG_CHARLCD=m
-CONFIG_UIO=m
-CONFIG_UIO_CIF=m
-CONFIG_UIO_PDRV_GENIRQ=m
-CONFIG_UIO_DMEM_GENIRQ=m
-CONFIG_UIO_AEC=m
-CONFIG_UIO_SERCOS3=m
-CONFIG_UIO_PCI_GENERIC=m
-CONFIG_UIO_NETX=m
-CONFIG_UIO_PRUSS=m
-CONFIG_UIO_MF624=m
-CONFIG_UIO_HV_GENERIC=m
-CONFIG_VFIO_IOMMU_TYPE1=m
-CONFIG_VFIO_VIRQFD=m
-CONFIG_VFIO=m
-# CONFIG_VFIO_NOIOMMU is not set
-CONFIG_VFIO_PCI=m
-CONFIG_VFIO_PCI_VGA=y
-CONFIG_VFIO_PCI_MMAP=y
-CONFIG_VFIO_PCI_INTX=y
-CONFIG_VFIO_PCI_IGD=y
-CONFIG_VFIO_MDEV=m
-CONFIG_VFIO_MDEV_DEVICE=m
-CONFIG_IRQ_BYPASS_MANAGER=m
-CONFIG_VIRT_DRIVERS=y
-CONFIG_VBOXGUEST=m
-# CONFIG_NITRO_ENCLAVES is not set
-CONFIG_VIRTIO=y
-CONFIG_VIRTIO_MENU=y
-CONFIG_VIRTIO_PCI=m
-CONFIG_VIRTIO_PCI_LEGACY=y
-CONFIG_VIRTIO_VDPA=m
-CONFIG_VIRTIO_PMEM=m
-CONFIG_VIRTIO_BALLOON=m
-CONFIG_VIRTIO_MEM=m
-CONFIG_VIRTIO_INPUT=m
-CONFIG_VIRTIO_MMIO=m
-CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y
-CONFIG_VIRTIO_DMA_SHARED_BUFFER=m
-CONFIG_VDPA=m
-CONFIG_VDPA_SIM=m
-CONFIG_IFCVF=m
-CONFIG_MLX5_VDPA=y
-CONFIG_MLX5_VDPA_NET=m
-CONFIG_VHOST_IOTLB=m
-CONFIG_VHOST_RING=m
-CONFIG_VHOST=m
-CONFIG_VHOST_MENU=y
-CONFIG_VHOST_NET=m
-CONFIG_VHOST_SCSI=m
-CONFIG_VHOST_VSOCK=m
-CONFIG_VHOST_VDPA=m
-# CONFIG_VHOST_CROSS_ENDIAN_LEGACY is not set
-
-#
-# Microsoft Hyper-V guest support
-#
-CONFIG_HYPERV=m
-CONFIG_HYPERV_TIMER=y
-CONFIG_HYPERV_UTILS=m
-CONFIG_HYPERV_BALLOON=m
-# end of Microsoft Hyper-V guest support
-
-#
-# Xen driver support
-#
-CONFIG_XEN_BALLOON=y
-CONFIG_XEN_BALLOON_MEMORY_HOTPLUG=y
-CONFIG_XEN_BALLOON_MEMORY_HOTPLUG_LIMIT=512
-CONFIG_XEN_SCRUB_PAGES_DEFAULT=y
-CONFIG_XEN_DEV_EVTCHN=m
-CONFIG_XEN_BACKEND=y
-CONFIG_XENFS=m
-CONFIG_XEN_COMPAT_XENFS=y
-CONFIG_XEN_SYS_HYPERVISOR=y
-CONFIG_XEN_XENBUS_FRONTEND=y
-CONFIG_XEN_GNTDEV=m
-CONFIG_XEN_GNTDEV_DMABUF=y
-CONFIG_XEN_GRANT_DEV_ALLOC=m
-CONFIG_XEN_GRANT_DMA_ALLOC=y
-CONFIG_SWIOTLB_XEN=y
-CONFIG_XEN_PCIDEV_BACKEND=m
-CONFIG_XEN_PVCALLS_FRONTEND=m
-CONFIG_XEN_PVCALLS_BACKEND=y
-CONFIG_XEN_SCSI_BACKEND=m
-CONFIG_XEN_PRIVCMD=m
-CONFIG_XEN_ACPI_PROCESSOR=m
-CONFIG_XEN_MCE_LOG=y
-CONFIG_XEN_HAVE_PVMMU=y
-CONFIG_XEN_EFI=y
-CONFIG_XEN_AUTO_XLATE=y
-CONFIG_XEN_ACPI=y
-CONFIG_XEN_SYMS=y
-CONFIG_XEN_HAVE_VPMU=y
-CONFIG_XEN_FRONT_PGDIR_SHBUF=m
-CONFIG_XEN_UNPOPULATED_ALLOC=y
-# end of Xen driver support
-
-# CONFIG_GREYBUS is not set
-CONFIG_STAGING=y
-CONFIG_PRISM2_USB=m
-CONFIG_COMEDI=m
-# CONFIG_COMEDI_DEBUG is not set
-CONFIG_COMEDI_DEFAULT_BUF_SIZE_KB=2048
-CONFIG_COMEDI_DEFAULT_BUF_MAXSIZE_KB=20480
-CONFIG_COMEDI_MISC_DRIVERS=y
-CONFIG_COMEDI_BOND=m
-CONFIG_COMEDI_TEST=m
-CONFIG_COMEDI_PARPORT=m
-# CONFIG_COMEDI_ISA_DRIVERS is not set
-CONFIG_COMEDI_PCI_DRIVERS=m
-CONFIG_COMEDI_8255_PCI=m
-CONFIG_COMEDI_ADDI_WATCHDOG=m
-CONFIG_COMEDI_ADDI_APCI_1032=m
-CONFIG_COMEDI_ADDI_APCI_1500=m
-CONFIG_COMEDI_ADDI_APCI_1516=m
-CONFIG_COMEDI_ADDI_APCI_1564=m
-CONFIG_COMEDI_ADDI_APCI_16XX=m
-CONFIG_COMEDI_ADDI_APCI_2032=m
-CONFIG_COMEDI_ADDI_APCI_2200=m
-CONFIG_COMEDI_ADDI_APCI_3120=m
-CONFIG_COMEDI_ADDI_APCI_3501=m
-CONFIG_COMEDI_ADDI_APCI_3XXX=m
-CONFIG_COMEDI_ADL_PCI6208=m
-CONFIG_COMEDI_ADL_PCI7X3X=m
-CONFIG_COMEDI_ADL_PCI8164=m
-CONFIG_COMEDI_ADL_PCI9111=m
-CONFIG_COMEDI_ADL_PCI9118=m
-CONFIG_COMEDI_ADV_PCI1710=m
-CONFIG_COMEDI_ADV_PCI1720=m
-CONFIG_COMEDI_ADV_PCI1723=m
-CONFIG_COMEDI_ADV_PCI1724=m
-CONFIG_COMEDI_ADV_PCI1760=m
-CONFIG_COMEDI_ADV_PCI_DIO=m
-CONFIG_COMEDI_AMPLC_DIO200_PCI=m
-CONFIG_COMEDI_AMPLC_PC236_PCI=m
-CONFIG_COMEDI_AMPLC_PC263_PCI=m
-CONFIG_COMEDI_AMPLC_PCI224=m
-CONFIG_COMEDI_AMPLC_PCI230=m
-CONFIG_COMEDI_CONTEC_PCI_DIO=m
-CONFIG_COMEDI_DAS08_PCI=m
-CONFIG_COMEDI_DT3000=m
-CONFIG_COMEDI_DYNA_PCI10XX=m
-CONFIG_COMEDI_GSC_HPDI=m
-CONFIG_COMEDI_MF6X4=m
-CONFIG_COMEDI_ICP_MULTI=m
-CONFIG_COMEDI_DAQBOARD2000=m
-CONFIG_COMEDI_JR3_PCI=m
-CONFIG_COMEDI_KE_COUNTER=m
-CONFIG_COMEDI_CB_PCIDAS64=m
-CONFIG_COMEDI_CB_PCIDAS=m
-CONFIG_COMEDI_CB_PCIDDA=m
-CONFIG_COMEDI_CB_PCIMDAS=m
-CONFIG_COMEDI_CB_PCIMDDA=m
-CONFIG_COMEDI_ME4000=m
-CONFIG_COMEDI_ME_DAQ=m
-CONFIG_COMEDI_NI_6527=m
-CONFIG_COMEDI_NI_65XX=m
-CONFIG_COMEDI_NI_660X=m
-CONFIG_COMEDI_NI_670X=m
-CONFIG_COMEDI_NI_LABPC_PCI=m
-CONFIG_COMEDI_NI_PCIDIO=m
-CONFIG_COMEDI_NI_PCIMIO=m
-CONFIG_COMEDI_RTD520=m
-CONFIG_COMEDI_S626=m
-CONFIG_COMEDI_MITE=m
-CONFIG_COMEDI_NI_TIOCMD=m
-CONFIG_COMEDI_PCMCIA_DRIVERS=m
-CONFIG_COMEDI_CB_DAS16_CS=m
-CONFIG_COMEDI_DAS08_CS=m
-CONFIG_COMEDI_NI_DAQ_700_CS=m
-CONFIG_COMEDI_NI_DAQ_DIO24_CS=m
-CONFIG_COMEDI_NI_LABPC_CS=m
-CONFIG_COMEDI_NI_MIO_CS=m
-CONFIG_COMEDI_QUATECH_DAQP_CS=m
-CONFIG_COMEDI_USB_DRIVERS=m
-CONFIG_COMEDI_DT9812=m
-CONFIG_COMEDI_NI_USB6501=m
-CONFIG_COMEDI_USBDUX=m
-CONFIG_COMEDI_USBDUXFAST=m
-CONFIG_COMEDI_USBDUXSIGMA=m
-CONFIG_COMEDI_VMK80XX=m
-CONFIG_COMEDI_8254=m
-CONFIG_COMEDI_8255=m
-CONFIG_COMEDI_8255_SA=m
-CONFIG_COMEDI_KCOMEDILIB=m
-CONFIG_COMEDI_AMPLC_DIO200=m
-CONFIG_COMEDI_AMPLC_PC236=m
-CONFIG_COMEDI_DAS08=m
-CONFIG_COMEDI_NI_LABPC=m
-CONFIG_COMEDI_NI_TIO=m
-CONFIG_COMEDI_NI_ROUTING=m
-CONFIG_RTL8192U=m
-CONFIG_RTLLIB=m
-CONFIG_RTLLIB_CRYPTO_CCMP=m
-CONFIG_RTLLIB_CRYPTO_TKIP=m
-CONFIG_RTLLIB_CRYPTO_WEP=m
-CONFIG_RTL8192E=m
-CONFIG_RTL8723BS=m
-CONFIG_R8712U=m
-CONFIG_R8188EU=m
-CONFIG_88EU_AP_MODE=y
-CONFIG_RTS5208=m
-CONFIG_VT6655=m
-CONFIG_VT6656=m
-
-#
-# IIO staging drivers
-#
-
-#
-# Accelerometers
-#
-CONFIG_ADIS16203=m
-CONFIG_ADIS16240=m
-# end of Accelerometers
-
-#
-# Analog to digital converters
-#
-CONFIG_AD7816=m
-CONFIG_AD7280=m
-# end of Analog to digital converters
-
-#
-# Analog digital bi-direction converters
-#
-CONFIG_ADT7316=m
-CONFIG_ADT7316_SPI=m
-CONFIG_ADT7316_I2C=m
-# end of Analog digital bi-direction converters
-
-#
-# Capacitance to digital converters
-#
-CONFIG_AD7150=m
-CONFIG_AD7746=m
-# end of Capacitance to digital converters
-
-#
-# Direct Digital Synthesis
-#
-CONFIG_AD9832=m
-CONFIG_AD9834=m
-# end of Direct Digital Synthesis
-
-#
-# Network Analyzer, Impedance Converters
-#
-CONFIG_AD5933=m
-# end of Network Analyzer, Impedance Converters
-
-#
-# Active energy metering IC
-#
-CONFIG_ADE7854=m
-CONFIG_ADE7854_I2C=m
-CONFIG_ADE7854_SPI=m
-# end of Active energy metering IC
-
-#
-# Resolver to digital converters
-#
-CONFIG_AD2S1210=m
-# end of Resolver to digital converters
-# end of IIO staging drivers
-
-# CONFIG_FB_SM750 is not set
-CONFIG_STAGING_MEDIA=y
-CONFIG_INTEL_ATOMISP=y
-CONFIG_VIDEO_ATOMISP=m
-CONFIG_VIDEO_ATOMISP_ISP2401=y
-CONFIG_VIDEO_ATOMISP_OV2722=m
-CONFIG_VIDEO_ATOMISP_GC2235=m
-CONFIG_VIDEO_ATOMISP_MSRLIST_HELPER=m
-CONFIG_VIDEO_ATOMISP_MT9M114=m
-CONFIG_VIDEO_ATOMISP_GC0310=m
-CONFIG_VIDEO_ATOMISP_OV2680=m
-CONFIG_VIDEO_ATOMISP_OV5693=m
-CONFIG_VIDEO_ATOMISP_LM3554=m
-# CONFIG_VIDEO_ZORAN is not set
-CONFIG_VIDEO_IPU3_IMGU=m
-
-#
-# Android
-#
-# end of Android
-
-CONFIG_STAGING_BOARD=y
-CONFIG_LTE_GDM724X=m
-CONFIG_FIREWIRE_SERIAL=m
-CONFIG_FWTTY_MAX_TOTAL_PORTS=64
-CONFIG_FWTTY_MAX_CARD_PORTS=32
-CONFIG_GS_FPGABOOT=m
-CONFIG_UNISYSSPAR=y
-CONFIG_UNISYS_VISORNIC=m
-CONFIG_UNISYS_VISORINPUT=m
-CONFIG_UNISYS_VISORHBA=m
-# CONFIG_FB_TFT is not set
-CONFIG_MOST_COMPONENTS=m
-CONFIG_MOST_NET=m
-CONFIG_MOST_SOUND=m
-CONFIG_MOST_VIDEO=m
-CONFIG_MOST_DIM2=m
-CONFIG_MOST_I2C=m
-CONFIG_KS7010=m
-CONFIG_PI433=m
-
-#
-# Gasket devices
-#
-CONFIG_STAGING_GASKET_FRAMEWORK=m
-CONFIG_STAGING_APEX_DRIVER=m
-# end of Gasket devices
-
-CONFIG_XIL_AXIS_FIFO=m
-CONFIG_FIELDBUS_DEV=m
-CONFIG_HMS_ANYBUSS_BUS=m
-CONFIG_ARCX_ANYBUS_CONTROLLER=m
-CONFIG_HMS_PROFINET=m
-CONFIG_KPC2000=y
-CONFIG_KPC2000_CORE=m
-CONFIG_KPC2000_SPI=m
-CONFIG_KPC2000_I2C=m
-CONFIG_KPC2000_DMA=m
-CONFIG_QLGE=m
-CONFIG_WFX=m
-# CONFIG_SPMI_HISI3670 is not set
-# CONFIG_MFD_HI6421_SPMI is not set
-CONFIG_X86_PLATFORM_DEVICES=y
-CONFIG_ACPI_WMI=m
-CONFIG_WMI_BMOF=m
-CONFIG_ALIENWARE_WMI=m
-CONFIG_HUAWEI_WMI=m
-CONFIG_INTEL_WMI_SBL_FW_UPDATE=m
-CONFIG_INTEL_WMI_THUNDERBOLT=m
-CONFIG_MXM_WMI=m
-CONFIG_PEAQ_WMI=m
-CONFIG_XIAOMI_WMI=m
-CONFIG_ACERHDF=m
-CONFIG_ACER_WIRELESS=m
-CONFIG_ACER_WMI=m
-CONFIG_APPLE_GMUX=m
-CONFIG_ASUS_LAPTOP=m
-CONFIG_ASUS_WIRELESS=m
-CONFIG_ASUS_WMI=m
-CONFIG_ASUS_NB_WMI=m
-CONFIG_EEEPC_LAPTOP=m
-CONFIG_EEEPC_WMI=m
-CONFIG_DCDBAS=m
-CONFIG_DELL_SMBIOS=m
-CONFIG_DELL_SMBIOS_WMI=y
-CONFIG_DELL_SMBIOS_SMM=y
-CONFIG_DELL_LAPTOP=m
-CONFIG_DELL_RBTN=m
-# CONFIG_DELL_RBU is not set
-CONFIG_DELL_SMO8800=m
-CONFIG_DELL_WMI=m
-CONFIG_DELL_WMI_DESCRIPTOR=m
-CONFIG_DELL_WMI_AIO=m
-CONFIG_DELL_WMI_LED=m
-CONFIG_AMILO_RFKILL=m
-CONFIG_FUJITSU_LAPTOP=m
-CONFIG_FUJITSU_TABLET=m
-CONFIG_GPD_POCKET_FAN=m
-CONFIG_HP_ACCEL=m
-CONFIG_HP_WIRELESS=m
-CONFIG_HP_WMI=m
-CONFIG_IBM_RTL=m
-CONFIG_IDEAPAD_LAPTOP=m
-CONFIG_SENSORS_HDAPS=m
-CONFIG_THINKPAD_ACPI=m
-CONFIG_THINKPAD_ACPI_ALSA_SUPPORT=y
-# CONFIG_THINKPAD_ACPI_DEBUGFACILITIES is not set
-# CONFIG_THINKPAD_ACPI_DEBUG is not set
-# CONFIG_THINKPAD_ACPI_UNSAFE_LEDS is not set
-CONFIG_THINKPAD_ACPI_VIDEO=y
-CONFIG_THINKPAD_ACPI_HOTKEY_POLL=y
-CONFIG_INTEL_ATOMISP2_LED=m
-CONFIG_INTEL_CHT_INT33FE=m
-CONFIG_INTEL_HID_EVENT=m
-CONFIG_INTEL_INT0002_VGPIO=m
-CONFIG_INTEL_MENLOW=m
-CONFIG_INTEL_OAKTRAIL=m
-CONFIG_INTEL_VBTN=m
-CONFIG_SURFACE3_WMI=m
-CONFIG_SURFACE_3_BUTTON=m
-CONFIG_SURFACE_3_POWER_OPREGION=m
-CONFIG_SURFACE_PRO3_BUTTON=m
-CONFIG_MSI_LAPTOP=m
-CONFIG_MSI_WMI=m
-CONFIG_PCENGINES_APU2=m
-CONFIG_SAMSUNG_LAPTOP=m
-CONFIG_SAMSUNG_Q10=m
-CONFIG_ACPI_TOSHIBA=m
-CONFIG_TOSHIBA_BT_RFKILL=m
-CONFIG_TOSHIBA_HAPS=m
-CONFIG_TOSHIBA_WMI=m
-CONFIG_ACPI_CMPC=m
-CONFIG_COMPAL_LAPTOP=m
-CONFIG_LG_LAPTOP=m
-CONFIG_PANASONIC_LAPTOP=m
-CONFIG_SONY_LAPTOP=m
-CONFIG_SONYPI_COMPAT=y
-CONFIG_SYSTEM76_ACPI=m
-CONFIG_TOPSTAR_LAPTOP=m
-CONFIG_I2C_MULTI_INSTANTIATE=m
-CONFIG_MLX_PLATFORM=m
-CONFIG_TOUCHSCREEN_DMI=y
-CONFIG_INTEL_IPS=m
-CONFIG_INTEL_RST=m
-CONFIG_INTEL_SMARTCONNECT=m
-
-#
-# Intel Speed Select Technology interface support
-#
-CONFIG_INTEL_SPEED_SELECT_INTERFACE=m
-# end of Intel Speed Select Technology interface support
-
-CONFIG_INTEL_TURBO_MAX_3=y
-CONFIG_INTEL_UNCORE_FREQ_CONTROL=m
-CONFIG_INTEL_BXTWC_PMIC_TMU=m
-CONFIG_INTEL_CHTDC_TI_PWRBTN=m
-CONFIG_INTEL_MFLD_THERMAL=m
-CONFIG_INTEL_MID_POWER_BUTTON=m
-CONFIG_INTEL_MRFLD_PWRBTN=m
-CONFIG_INTEL_PMC_CORE=y
-CONFIG_INTEL_PUNIT_IPC=m
-CONFIG_INTEL_SCU_IPC=y
-CONFIG_INTEL_SCU=y
-CONFIG_INTEL_SCU_PCI=y
-CONFIG_INTEL_SCU_PLATFORM=m
-CONFIG_INTEL_SCU_IPC_UTIL=m
-CONFIG_INTEL_TELEMETRY=m
-CONFIG_PMC_ATOM=y
-CONFIG_CHROME_PLATFORMS=y
-CONFIG_CHROMEOS_LAPTOP=m
-CONFIG_CHROMEOS_PSTORE=m
-CONFIG_CHROMEOS_TBMC=m
-CONFIG_CROS_EC=m
-CONFIG_CROS_EC_I2C=m
-CONFIG_CROS_EC_RPMSG=m
-CONFIG_CROS_EC_ISHTP=m
-CONFIG_CROS_EC_SPI=m
-CONFIG_CROS_EC_LPC=m
-CONFIG_CROS_EC_PROTO=y
-CONFIG_CROS_KBD_LED_BACKLIGHT=m
-CONFIG_CROS_EC_CHARDEV=m
-CONFIG_CROS_EC_LIGHTBAR=m
-CONFIG_CROS_EC_VBC=m
-CONFIG_CROS_EC_DEBUGFS=m
-CONFIG_CROS_EC_SENSORHUB=m
-CONFIG_CROS_EC_SYSFS=m
-CONFIG_CROS_EC_TYPEC=m
-CONFIG_CROS_USBPD_LOGGER=m
-CONFIG_CROS_USBPD_NOTIFY=m
-CONFIG_WILCO_EC=m
-CONFIG_WILCO_EC_DEBUGFS=m
-CONFIG_WILCO_EC_EVENTS=m
-CONFIG_WILCO_EC_TELEMETRY=m
-CONFIG_MELLANOX_PLATFORM=y
-CONFIG_MLXREG_HOTPLUG=m
-CONFIG_MLXREG_IO=m
-CONFIG_HAVE_CLK=y
-CONFIG_CLKDEV_LOOKUP=y
-CONFIG_HAVE_CLK_PREPARE=y
-CONFIG_COMMON_CLK=y
-CONFIG_COMMON_CLK_WM831X=m
-CONFIG_COMMON_CLK_MAX77686=m
-CONFIG_COMMON_CLK_MAX9485=m
-CONFIG_COMMON_CLK_RK808=m
-CONFIG_COMMON_CLK_SI5341=m
-CONFIG_COMMON_CLK_SI5351=m
-CONFIG_COMMON_CLK_SI514=m
-CONFIG_COMMON_CLK_SI544=m
-CONFIG_COMMON_CLK_SI570=m
-CONFIG_COMMON_CLK_CDCE706=m
-CONFIG_COMMON_CLK_CDCE925=m
-CONFIG_COMMON_CLK_CS2000_CP=m
-CONFIG_COMMON_CLK_S2MPS11=m
-CONFIG_CLK_TWL6040=m
-CONFIG_COMMON_CLK_LOCHNAGAR=m
-CONFIG_COMMON_CLK_PALMAS=m
-CONFIG_COMMON_CLK_PWM=m
-CONFIG_COMMON_CLK_VC5=m
-CONFIG_COMMON_CLK_BD718XX=m
-CONFIG_COMMON_CLK_FIXED_MMIO=y
-CONFIG_CLK_LGM_CGU=y
-CONFIG_HWSPINLOCK=y
-
-#
-# Clock Source drivers
-#
-CONFIG_TIMER_OF=y
-CONFIG_TIMER_PROBE=y
-CONFIG_CLKEVT_I8253=y
-CONFIG_I8253_LOCK=y
-CONFIG_CLKBLD_I8253=y
-CONFIG_CLKSRC_MMIO=y
-CONFIG_MICROCHIP_PIT64B=y
-# end of Clock Source drivers
-
-CONFIG_MAILBOX=y
-CONFIG_PLATFORM_MHU=m
-CONFIG_PCC=y
-CONFIG_ALTERA_MBOX=m
-CONFIG_MAILBOX_TEST=m
-CONFIG_IOMMU_IOVA=y
-CONFIG_IOASID=y
-CONFIG_IOMMU_API=y
-CONFIG_IOMMU_SUPPORT=y
-
-#
-# Generic IOMMU Pagetable Support
-#
-# end of Generic IOMMU Pagetable Support
-
-# CONFIG_IOMMU_DEBUGFS is not set
-# CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set
-CONFIG_OF_IOMMU=y
-CONFIG_IOMMU_DMA=y
-CONFIG_AMD_IOMMU=y
-CONFIG_AMD_IOMMU_V2=y
-CONFIG_DMAR_TABLE=y
-CONFIG_INTEL_IOMMU=y
-CONFIG_INTEL_IOMMU_SVM=y
-# CONFIG_INTEL_IOMMU_DEFAULT_ON is not set
-CONFIG_INTEL_IOMMU_FLOPPY_WA=y
-# CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON is not set
-CONFIG_IRQ_REMAP=y
-CONFIG_HYPERV_IOMMU=y
-
-#
-# Remoteproc drivers
-#
-CONFIG_REMOTEPROC=y
-# CONFIG_REMOTEPROC_CDEV is not set
-# end of Remoteproc drivers
-
-#
-# Rpmsg drivers
-#
-CONFIG_RPMSG=m
-CONFIG_RPMSG_CHAR=m
-CONFIG_RPMSG_QCOM_GLINK=m
-CONFIG_RPMSG_QCOM_GLINK_RPM=m
-CONFIG_RPMSG_VIRTIO=m
-# end of Rpmsg drivers
-
-CONFIG_SOUNDWIRE=m
-
-#
-# SoundWire Devices
-#
-CONFIG_SOUNDWIRE_CADENCE=m
-CONFIG_SOUNDWIRE_INTEL=m
-CONFIG_SOUNDWIRE_QCOM=m
-CONFIG_SOUNDWIRE_GENERIC_ALLOCATION=m
-
-#
-# SOC (System On Chip) specific Drivers
-#
-
-#
-# Amlogic SoC drivers
-#
-# end of Amlogic SoC drivers
-
-#
-# Aspeed SoC drivers
-#
-# end of Aspeed SoC drivers
-
-#
-# Broadcom SoC drivers
-#
-# end of Broadcom SoC drivers
-
-#
-# NXP/Freescale QorIQ SoC drivers
-#
-# end of NXP/Freescale QorIQ SoC drivers
-
-#
-# i.MX SoC drivers
-#
-# end of i.MX SoC drivers
-
-#
-# Qualcomm SoC drivers
-#
-# end of Qualcomm SoC drivers
-
-CONFIG_SOC_TI=y
-
-#
-# Xilinx SoC drivers
-#
-CONFIG_XILINX_VCU=m
-# end of Xilinx SoC drivers
-# end of SOC (System On Chip) specific Drivers
-
-CONFIG_PM_DEVFREQ=y
-
-#
-# DEVFREQ Governors
-#
-CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=m
-CONFIG_DEVFREQ_GOV_PERFORMANCE=m
-CONFIG_DEVFREQ_GOV_POWERSAVE=m
-CONFIG_DEVFREQ_GOV_USERSPACE=m
-CONFIG_DEVFREQ_GOV_PASSIVE=m
-
-#
-# DEVFREQ Drivers
-#
-CONFIG_PM_DEVFREQ_EVENT=y
-CONFIG_EXTCON=y
-
-#
-# Extcon Device Drivers
-#
-CONFIG_EXTCON_ADC_JACK=m
-CONFIG_EXTCON_ARIZONA=m
-CONFIG_EXTCON_AXP288=m
-CONFIG_EXTCON_FSA9480=m
-CONFIG_EXTCON_GPIO=m
-CONFIG_EXTCON_INTEL_INT3496=m
-CONFIG_EXTCON_INTEL_CHT_WC=m
-CONFIG_EXTCON_INTEL_MRFLD=m
-CONFIG_EXTCON_MAX14577=m
-CONFIG_EXTCON_MAX3355=m
-CONFIG_EXTCON_MAX77693=m
-CONFIG_EXTCON_MAX77843=m
-CONFIG_EXTCON_MAX8997=m
-CONFIG_EXTCON_PALMAS=m
-CONFIG_EXTCON_PTN5150=m
-CONFIG_EXTCON_RT8973A=m
-CONFIG_EXTCON_SM5502=m
-CONFIG_EXTCON_USB_GPIO=m
-CONFIG_EXTCON_USBC_CROS_EC=m
-CONFIG_MEMORY=y
-CONFIG_IIO=m
-CONFIG_IIO_BUFFER=y
-CONFIG_IIO_BUFFER_CB=m
-CONFIG_IIO_BUFFER_DMA=m
-CONFIG_IIO_BUFFER_DMAENGINE=m
-CONFIG_IIO_BUFFER_HW_CONSUMER=m
-CONFIG_IIO_KFIFO_BUF=m
-CONFIG_IIO_TRIGGERED_BUFFER=m
-CONFIG_IIO_CONFIGFS=m
-CONFIG_IIO_TRIGGER=y
-CONFIG_IIO_CONSUMERS_PER_TRIGGER=2
-CONFIG_IIO_SW_DEVICE=m
-CONFIG_IIO_SW_TRIGGER=m
-CONFIG_IIO_TRIGGERED_EVENT=m
-
-#
-# Accelerometers
-#
-CONFIG_ADIS16201=m
-CONFIG_ADIS16209=m
-CONFIG_ADXL372=m
-CONFIG_ADXL372_SPI=m
-CONFIG_ADXL372_I2C=m
-CONFIG_BMA220=m
-CONFIG_BMA400=m
-CONFIG_BMA400_I2C=m
-CONFIG_BMA400_SPI=m
-CONFIG_BMC150_ACCEL=m
-CONFIG_BMC150_ACCEL_I2C=m
-CONFIG_BMC150_ACCEL_SPI=m
-CONFIG_DA280=m
-CONFIG_DA311=m
-CONFIG_DMARD06=m
-CONFIG_DMARD09=m
-CONFIG_DMARD10=m
-CONFIG_HID_SENSOR_ACCEL_3D=m
-CONFIG_IIO_CROS_EC_ACCEL_LEGACY=m
-CONFIG_IIO_ST_ACCEL_3AXIS=m
-CONFIG_IIO_ST_ACCEL_I2C_3AXIS=m
-CONFIG_IIO_ST_ACCEL_SPI_3AXIS=m
-CONFIG_KXSD9=m
-CONFIG_KXSD9_SPI=m
-CONFIG_KXSD9_I2C=m
-CONFIG_KXCJK1013=m
-CONFIG_MC3230=m
-CONFIG_MMA7455=m
-CONFIG_MMA7455_I2C=m
-CONFIG_MMA7455_SPI=m
-CONFIG_MMA7660=m
-CONFIG_MMA8452=m
-CONFIG_MMA9551_CORE=m
-CONFIG_MMA9551=m
-CONFIG_MMA9553=m
-CONFIG_MXC4005=m
-CONFIG_MXC6255=m
-CONFIG_SCA3000=m
-CONFIG_STK8312=m
-CONFIG_STK8BA50=m
-# end of Accelerometers
-
-#
-# Analog to digital converters
-#
-CONFIG_AD_SIGMA_DELTA=m
-CONFIG_AD7091R5=m
-CONFIG_AD7124=m
-CONFIG_AD7192=m
-CONFIG_AD7266=m
-CONFIG_AD7291=m
-CONFIG_AD7292=m
-CONFIG_AD7298=m
-CONFIG_AD7476=m
-CONFIG_AD7606=m
-CONFIG_AD7606_IFACE_PARALLEL=m
-CONFIG_AD7606_IFACE_SPI=m
-CONFIG_AD7766=m
-CONFIG_AD7768_1=m
-CONFIG_AD7780=m
-CONFIG_AD7791=m
-CONFIG_AD7793=m
-CONFIG_AD7887=m
-CONFIG_AD7923=m
-CONFIG_AD7949=m
-CONFIG_AD799X=m
-CONFIG_AD9467=m
-CONFIG_ADI_AXI_ADC=m
-CONFIG_AXP20X_ADC=m
-CONFIG_AXP288_ADC=m
-CONFIG_CC10001_ADC=m
-CONFIG_CPCAP_ADC=m
-CONFIG_DA9150_GPADC=m
-CONFIG_DLN2_ADC=m
-CONFIG_ENVELOPE_DETECTOR=m
-CONFIG_HI8435=m
-CONFIG_HX711=m
-CONFIG_INA2XX_ADC=m
-CONFIG_INTEL_MRFLD_ADC=m
-CONFIG_LP8788_ADC=m
-CONFIG_LTC2471=m
-CONFIG_LTC2485=m
-CONFIG_LTC2496=m
-CONFIG_LTC2497=m
-CONFIG_MAX1027=m
-CONFIG_MAX11100=m
-CONFIG_MAX1118=m
-CONFIG_MAX1241=m
-CONFIG_MAX1363=m
-CONFIG_MAX9611=m
-CONFIG_MCP320X=m
-CONFIG_MCP3422=m
-CONFIG_MCP3911=m
-CONFIG_MEN_Z188_ADC=m
-CONFIG_MP2629_ADC=m
-CONFIG_NAU7802=m
-CONFIG_PALMAS_GPADC=m
-CONFIG_QCOM_VADC_COMMON=m
-CONFIG_QCOM_SPMI_IADC=m
-CONFIG_QCOM_SPMI_VADC=m
-CONFIG_QCOM_SPMI_ADC5=m
-CONFIG_RN5T618_ADC=m
-CONFIG_SD_ADC_MODULATOR=m
-CONFIG_STMPE_ADC=m
-CONFIG_TI_ADC081C=m
-CONFIG_TI_ADC0832=m
-CONFIG_TI_ADC084S021=m
-CONFIG_TI_ADC12138=m
-CONFIG_TI_ADC108S102=m
-CONFIG_TI_ADC128S052=m
-CONFIG_TI_ADC161S626=m
-CONFIG_TI_ADS1015=m
-CONFIG_TI_ADS7950=m
-CONFIG_TI_ADS8344=m
-CONFIG_TI_ADS8688=m
-CONFIG_TI_ADS124S08=m
-CONFIG_TI_AM335X_ADC=m
-CONFIG_TI_TLC4541=m
-CONFIG_TWL4030_MADC=m
-CONFIG_TWL6030_GPADC=m
-CONFIG_VF610_ADC=m
-CONFIG_VIPERBOARD_ADC=m
-CONFIG_XILINX_XADC=m
-# end of Analog to digital converters
-
-#
-# Analog Front Ends
-#
-CONFIG_IIO_RESCALE=m
-# end of Analog Front Ends
-
-#
-# Amplifiers
-#
-CONFIG_AD8366=m
-CONFIG_HMC425=m
-# end of Amplifiers
-
-#
-# Chemical Sensors
-#
-CONFIG_ATLAS_PH_SENSOR=m
-CONFIG_ATLAS_EZO_SENSOR=m
-CONFIG_BME680=m
-CONFIG_BME680_I2C=m
-CONFIG_BME680_SPI=m
-CONFIG_CCS811=m
-CONFIG_IAQCORE=m
-CONFIG_PMS7003=m
-# CONFIG_SCD30_CORE is not set
-CONFIG_SENSIRION_SGP30=m
-CONFIG_SPS30=m
-CONFIG_VZ89X=m
-# end of Chemical Sensors
-
-CONFIG_IIO_CROS_EC_SENSORS_CORE=m
-CONFIG_IIO_CROS_EC_SENSORS=m
-CONFIG_IIO_CROS_EC_SENSORS_LID_ANGLE=m
-
-#
-# Hid Sensor IIO Common
-#
-CONFIG_HID_SENSOR_IIO_COMMON=m
-CONFIG_HID_SENSOR_IIO_TRIGGER=m
-# end of Hid Sensor IIO Common
-
-CONFIG_IIO_MS_SENSORS_I2C=m
-
-#
-# SSP Sensor Common
-#
-CONFIG_IIO_SSP_SENSORS_COMMONS=m
-CONFIG_IIO_SSP_SENSORHUB=m
-# end of SSP Sensor Common
-
-CONFIG_IIO_ST_SENSORS_I2C=m
-CONFIG_IIO_ST_SENSORS_SPI=m
-CONFIG_IIO_ST_SENSORS_CORE=m
-
-#
-# Digital to analog converters
-#
-CONFIG_AD5064=m
-CONFIG_AD5360=m
-CONFIG_AD5380=m
-CONFIG_AD5421=m
-CONFIG_AD5446=m
-CONFIG_AD5449=m
-CONFIG_AD5592R_BASE=m
-CONFIG_AD5592R=m
-CONFIG_AD5593R=m
-CONFIG_AD5504=m
-CONFIG_AD5624R_SPI=m
-CONFIG_AD5686=m
-CONFIG_AD5686_SPI=m
-CONFIG_AD5696_I2C=m
-CONFIG_AD5755=m
-CONFIG_AD5758=m
-CONFIG_AD5761=m
-CONFIG_AD5764=m
-CONFIG_AD5770R=m
-CONFIG_AD5791=m
-CONFIG_AD7303=m
-CONFIG_AD8801=m
-CONFIG_DPOT_DAC=m
-CONFIG_DS4424=m
-CONFIG_LTC1660=m
-CONFIG_LTC2632=m
-CONFIG_M62332=m
-CONFIG_MAX517=m
-CONFIG_MAX5821=m
-CONFIG_MCP4725=m
-CONFIG_MCP4922=m
-CONFIG_TI_DAC082S085=m
-CONFIG_TI_DAC5571=m
-CONFIG_TI_DAC7311=m
-CONFIG_TI_DAC7612=m
-CONFIG_VF610_DAC=m
-# end of Digital to analog converters
-
-#
-# IIO dummy driver
-#
-# CONFIG_IIO_SIMPLE_DUMMY is not set
-# end of IIO dummy driver
-
-#
-# Frequency Synthesizers DDS/PLL
-#
-
-#
-# Clock Generator/Distribution
-#
-CONFIG_AD9523=m
-# end of Clock Generator/Distribution
-
-#
-# Phase-Locked Loop (PLL) frequency synthesizers
-#
-CONFIG_ADF4350=m
-CONFIG_ADF4371=m
-# end of Phase-Locked Loop (PLL) frequency synthesizers
-# end of Frequency Synthesizers DDS/PLL
-
-#
-# Digital gyroscope sensors
-#
-CONFIG_ADIS16080=m
-CONFIG_ADIS16130=m
-CONFIG_ADIS16136=m
-CONFIG_ADIS16260=m
-# CONFIG_ADXRS290 is not set
-CONFIG_ADXRS450=m
-CONFIG_BMG160=m
-CONFIG_BMG160_I2C=m
-CONFIG_BMG160_SPI=m
-CONFIG_FXAS21002C=m
-CONFIG_FXAS21002C_I2C=m
-CONFIG_FXAS21002C_SPI=m
-CONFIG_HID_SENSOR_GYRO_3D=m
-CONFIG_MPU3050=m
-CONFIG_MPU3050_I2C=m
-CONFIG_IIO_ST_GYRO_3AXIS=m
-CONFIG_IIO_ST_GYRO_I2C_3AXIS=m
-CONFIG_IIO_ST_GYRO_SPI_3AXIS=m
-CONFIG_ITG3200=m
-# end of Digital gyroscope sensors
-
-#
-# Health Sensors
-#
-
-#
-# Heart Rate Monitors
-#
-CONFIG_AFE4403=m
-CONFIG_AFE4404=m
-CONFIG_MAX30100=m
-CONFIG_MAX30102=m
-# end of Heart Rate Monitors
-# end of Health Sensors
-
-#
-# Humidity sensors
-#
-CONFIG_AM2315=m
-CONFIG_DHT11=m
-CONFIG_HDC100X=m
-# CONFIG_HDC2010 is not set
-CONFIG_HID_SENSOR_HUMIDITY=m
-CONFIG_HTS221=m
-CONFIG_HTS221_I2C=m
-CONFIG_HTS221_SPI=m
-CONFIG_HTU21=m
-CONFIG_SI7005=m
-CONFIG_SI7020=m
-# end of Humidity sensors
-
-#
-# Inertial measurement units
-#
-CONFIG_ADIS16400=m
-CONFIG_ADIS16460=m
-CONFIG_ADIS16475=m
-CONFIG_ADIS16480=m
-CONFIG_BMI160=m
-CONFIG_BMI160_I2C=m
-CONFIG_BMI160_SPI=m
-CONFIG_FXOS8700=m
-CONFIG_FXOS8700_I2C=m
-CONFIG_FXOS8700_SPI=m
-CONFIG_KMX61=m
-CONFIG_INV_ICM42600=m
-CONFIG_INV_ICM42600_I2C=m
-CONFIG_INV_ICM42600_SPI=m
-CONFIG_INV_MPU6050_IIO=m
-CONFIG_INV_MPU6050_I2C=m
-CONFIG_INV_MPU6050_SPI=m
-CONFIG_IIO_ST_LSM6DSX=m
-CONFIG_IIO_ST_LSM6DSX_I2C=m
-CONFIG_IIO_ST_LSM6DSX_SPI=m
-CONFIG_IIO_ST_LSM6DSX_I3C=m
-# end of Inertial measurement units
-
-CONFIG_IIO_ADIS_LIB=m
-CONFIG_IIO_ADIS_LIB_BUFFER=y
-
-#
-# Light sensors
-#
-CONFIG_ACPI_ALS=m
-CONFIG_ADJD_S311=m
-CONFIG_ADUX1020=m
-CONFIG_AL3010=m
-CONFIG_AL3320A=m
-CONFIG_APDS9300=m
-CONFIG_APDS9960=m
-# CONFIG_AS73211 is not set
-CONFIG_BH1750=m
-CONFIG_BH1780=m
-CONFIG_CM32181=m
-CONFIG_CM3232=m
-CONFIG_CM3323=m
-CONFIG_CM3605=m
-CONFIG_CM36651=m
-CONFIG_IIO_CROS_EC_LIGHT_PROX=m
-CONFIG_GP2AP002=m
-CONFIG_GP2AP020A00F=m
-CONFIG_IQS621_ALS=m
-CONFIG_SENSORS_ISL29018=m
-CONFIG_SENSORS_ISL29028=m
-CONFIG_ISL29125=m
-CONFIG_HID_SENSOR_ALS=m
-CONFIG_HID_SENSOR_PROX=m
-CONFIG_JSA1212=m
-CONFIG_RPR0521=m
-CONFIG_SENSORS_LM3533=m
-CONFIG_LTR501=m
-CONFIG_LV0104CS=m
-CONFIG_MAX44000=m
-CONFIG_MAX44009=m
-CONFIG_NOA1305=m
-CONFIG_OPT3001=m
-CONFIG_PA12203001=m
-CONFIG_SI1133=m
-CONFIG_SI1145=m
-CONFIG_STK3310=m
-CONFIG_ST_UVIS25=m
-CONFIG_ST_UVIS25_I2C=m
-CONFIG_ST_UVIS25_SPI=m
-CONFIG_TCS3414=m
-CONFIG_TCS3472=m
-CONFIG_SENSORS_TSL2563=m
-CONFIG_TSL2583=m
-CONFIG_TSL2772=m
-CONFIG_TSL4531=m
-CONFIG_US5182D=m
-CONFIG_VCNL4000=m
-CONFIG_VCNL4035=m
-CONFIG_VEML6030=m
-CONFIG_VEML6070=m
-CONFIG_VL6180=m
-CONFIG_ZOPT2201=m
-# end of Light sensors
-
-#
-# Magnetometer sensors
-#
-CONFIG_AK8974=m
-CONFIG_AK8975=m
-CONFIG_AK09911=m
-CONFIG_BMC150_MAGN=m
-CONFIG_BMC150_MAGN_I2C=m
-CONFIG_BMC150_MAGN_SPI=m
-CONFIG_MAG3110=m
-CONFIG_HID_SENSOR_MAGNETOMETER_3D=m
-CONFIG_MMC35240=m
-CONFIG_IIO_ST_MAGN_3AXIS=m
-CONFIG_IIO_ST_MAGN_I2C_3AXIS=m
-CONFIG_IIO_ST_MAGN_SPI_3AXIS=m
-CONFIG_SENSORS_HMC5843=m
-CONFIG_SENSORS_HMC5843_I2C=m
-CONFIG_SENSORS_HMC5843_SPI=m
-CONFIG_SENSORS_RM3100=m
-CONFIG_SENSORS_RM3100_I2C=m
-CONFIG_SENSORS_RM3100_SPI=m
-# end of Magnetometer sensors
-
-#
-# Multiplexers
-#
-CONFIG_IIO_MUX=m
-# end of Multiplexers
-
-#
-# Inclinometer sensors
-#
-CONFIG_HID_SENSOR_INCLINOMETER_3D=m
-CONFIG_HID_SENSOR_DEVICE_ROTATION=m
-# end of Inclinometer sensors
-
-#
-# Triggers - standalone
-#
-CONFIG_IIO_HRTIMER_TRIGGER=m
-CONFIG_IIO_INTERRUPT_TRIGGER=m
-CONFIG_IIO_TIGHTLOOP_TRIGGER=m
-CONFIG_IIO_SYSFS_TRIGGER=m
-# end of Triggers - standalone
-
-#
-# Linear and angular position sensors
-#
-CONFIG_IQS624_POS=m
-# end of Linear and angular position sensors
-
-#
-# Digital potentiometers
-#
-CONFIG_AD5272=m
-CONFIG_DS1803=m
-CONFIG_MAX5432=m
-CONFIG_MAX5481=m
-CONFIG_MAX5487=m
-CONFIG_MCP4018=m
-CONFIG_MCP4131=m
-CONFIG_MCP4531=m
-CONFIG_MCP41010=m
-CONFIG_TPL0102=m
-# end of Digital potentiometers
-
-#
-# Digital potentiostats
-#
-CONFIG_LMP91000=m
-# end of Digital potentiostats
-
-#
-# Pressure sensors
-#
-CONFIG_ABP060MG=m
-CONFIG_BMP280=m
-CONFIG_BMP280_I2C=m
-CONFIG_BMP280_SPI=m
-CONFIG_IIO_CROS_EC_BARO=m
-CONFIG_DLHL60D=m
-CONFIG_DPS310=m
-CONFIG_HID_SENSOR_PRESS=m
-CONFIG_HP03=m
-CONFIG_ICP10100=m
-CONFIG_MPL115=m
-CONFIG_MPL115_I2C=m
-CONFIG_MPL115_SPI=m
-CONFIG_MPL3115=m
-CONFIG_MS5611=m
-CONFIG_MS5611_I2C=m
-CONFIG_MS5611_SPI=m
-CONFIG_MS5637=m
-CONFIG_IIO_ST_PRESS=m
-CONFIG_IIO_ST_PRESS_I2C=m
-CONFIG_IIO_ST_PRESS_SPI=m
-CONFIG_T5403=m
-CONFIG_HP206C=m
-CONFIG_ZPA2326=m
-CONFIG_ZPA2326_I2C=m
-CONFIG_ZPA2326_SPI=m
-# end of Pressure sensors
-
-#
-# Lightning sensors
-#
-CONFIG_AS3935=m
-# end of Lightning sensors
-
-#
-# Proximity and distance sensors
-#
-CONFIG_ISL29501=m
-CONFIG_LIDAR_LITE_V2=m
-CONFIG_MB1232=m
-CONFIG_PING=m
-CONFIG_RFD77402=m
-CONFIG_SRF04=m
-CONFIG_SX9310=m
-CONFIG_SX9500=m
-CONFIG_SRF08=m
-CONFIG_VCNL3020=m
-CONFIG_VL53L0X_I2C=m
-# end of Proximity and distance sensors
-
-#
-# Resolver to digital converters
-#
-CONFIG_AD2S90=m
-CONFIG_AD2S1200=m
-# end of Resolver to digital converters
-
-#
-# Temperature sensors
-#
-CONFIG_IQS620AT_TEMP=m
-CONFIG_LTC2983=m
-CONFIG_MAXIM_THERMOCOUPLE=m
-CONFIG_HID_SENSOR_TEMP=m
-CONFIG_MLX90614=m
-CONFIG_MLX90632=m
-CONFIG_TMP006=m
-CONFIG_TMP007=m
-CONFIG_TSYS01=m
-CONFIG_TSYS02D=m
-CONFIG_MAX31856=m
-# end of Temperature sensors
-
-CONFIG_NTB=m
-CONFIG_NTB_MSI=y
-CONFIG_NTB_AMD=m
-CONFIG_NTB_IDT=m
-CONFIG_NTB_INTEL=m
-CONFIG_NTB_SWITCHTEC=m
-# CONFIG_NTB_PINGPONG is not set
-# CONFIG_NTB_TOOL is not set
-# CONFIG_NTB_PERF is not set
-# CONFIG_NTB_MSI_TEST is not set
-CONFIG_NTB_TRANSPORT=m
-CONFIG_VME_BUS=y
-
-#
-# VME Bridge Drivers
-#
-CONFIG_VME_CA91CX42=m
-CONFIG_VME_TSI148=m
-# CONFIG_VME_FAKE is not set
-
-#
-# VME Board Drivers
-#
-CONFIG_VMIVME_7805=m
-
-#
-# VME Device Drivers
-#
-CONFIG_VME_USER=m
-CONFIG_PWM=y
-CONFIG_PWM_SYSFS=y
-# CONFIG_PWM_DEBUG is not set
-CONFIG_PWM_ATMEL_HLCDC_PWM=m
-CONFIG_PWM_CRC=y
-CONFIG_PWM_CROS_EC=m
-CONFIG_PWM_FSL_FTM=m
-CONFIG_PWM_IQS620A=m
-CONFIG_PWM_LP3943=m
-CONFIG_PWM_LPSS=m
-CONFIG_PWM_LPSS_PCI=m
-CONFIG_PWM_LPSS_PLATFORM=m
-CONFIG_PWM_PCA9685=m
-CONFIG_PWM_STMPE=y
-CONFIG_PWM_TWL=m
-CONFIG_PWM_TWL_LED=m
-
-#
-# IRQ chip support
-#
-CONFIG_IRQCHIP=y
-CONFIG_AL_FIC=y
-CONFIG_MADERA_IRQ=m
-# CONFIG_MST_IRQ is not set
-# end of IRQ chip support
-
-CONFIG_IPACK_BUS=m
-CONFIG_BOARD_TPCI200=m
-CONFIG_SERIAL_IPOCTAL=m
-CONFIG_RESET_CONTROLLER=y
-CONFIG_RESET_BRCMSTB_RESCAL=y
-CONFIG_RESET_INTEL_GW=y
-CONFIG_RESET_TI_SYSCON=m
-
-#
-# PHY Subsystem
-#
-CONFIG_GENERIC_PHY=y
-CONFIG_GENERIC_PHY_MIPI_DPHY=y
-# CONFIG_USB_LGM_PHY is not set
-CONFIG_BCM_KONA_USB2_PHY=m
-CONFIG_PHY_CADENCE_TORRENT=m
-CONFIG_PHY_CADENCE_DPHY=m
-CONFIG_PHY_CADENCE_SIERRA=m
-CONFIG_PHY_CADENCE_SALVO=m
-CONFIG_PHY_FSL_IMX8MQ_USB=m
-CONFIG_PHY_MIXEL_MIPI_DPHY=m
-CONFIG_PHY_PXA_28NM_HSIC=m
-CONFIG_PHY_PXA_28NM_USB2=m
-CONFIG_PHY_CPCAP_USB=m
-CONFIG_PHY_MAPPHONE_MDM6600=m
-CONFIG_PHY_OCELOT_SERDES=m
-CONFIG_PHY_QCOM_USB_HS=m
-CONFIG_PHY_QCOM_USB_HSIC=m
-CONFIG_PHY_SAMSUNG_USB2=m
-CONFIG_PHY_TUSB1210=m
-# CONFIG_PHY_INTEL_LGM_COMBO is not set
-# CONFIG_PHY_INTEL_LGM_EMMC is not set
-# end of PHY Subsystem
-
-CONFIG_POWERCAP=y
-CONFIG_INTEL_RAPL_CORE=m
-CONFIG_INTEL_RAPL=m
-CONFIG_IDLE_INJECT=y
-CONFIG_MCB=m
-CONFIG_MCB_PCI=m
-CONFIG_MCB_LPC=m
-
-#
-# Performance monitor support
-#
-# end of Performance monitor support
-
-CONFIG_RAS=y
-CONFIG_RAS_CEC=y
-# CONFIG_RAS_CEC_DEBUG is not set
-CONFIG_USB4=m
-# CONFIG_USB4_DEBUGFS_WRITE is not set
-
-#
-# Android
-#
-# CONFIG_ANDROID is not set
-# end of Android
-
-CONFIG_LIBNVDIMM=y
-CONFIG_BLK_DEV_PMEM=m
-CONFIG_ND_BLK=m
-CONFIG_ND_CLAIM=y
-CONFIG_ND_BTT=m
-CONFIG_BTT=y
-CONFIG_ND_PFN=m
-CONFIG_NVDIMM_PFN=y
-CONFIG_NVDIMM_DAX=y
-CONFIG_OF_PMEM=m
-CONFIG_DAX_DRIVER=y
-CONFIG_DAX=y
-CONFIG_DEV_DAX=m
-CONFIG_DEV_DAX_PMEM=m
-CONFIG_DEV_DAX_HMEM=m
-CONFIG_DEV_DAX_HMEM_DEVICES=y
-CONFIG_DEV_DAX_KMEM=m
-CONFIG_DEV_DAX_PMEM_COMPAT=m
-CONFIG_NVMEM=y
-CONFIG_NVMEM_SYSFS=y
-CONFIG_NVMEM_SPMI_SDAM=m
-CONFIG_RAVE_SP_EEPROM=m
-
-#
-# HW tracing support
-#
-CONFIG_STM=m
-CONFIG_STM_PROTO_BASIC=m
-CONFIG_STM_PROTO_SYS_T=m
-# CONFIG_STM_DUMMY is not set
-CONFIG_STM_SOURCE_CONSOLE=m
-CONFIG_STM_SOURCE_HEARTBEAT=m
-CONFIG_STM_SOURCE_FTRACE=m
-CONFIG_INTEL_TH=m
-CONFIG_INTEL_TH_PCI=m
-CONFIG_INTEL_TH_ACPI=m
-CONFIG_INTEL_TH_GTH=m
-CONFIG_INTEL_TH_STH=m
-CONFIG_INTEL_TH_MSU=m
-CONFIG_INTEL_TH_PTI=m
-# CONFIG_INTEL_TH_DEBUG is not set
-# end of HW tracing support
-
-CONFIG_FPGA=m
-CONFIG_ALTERA_PR_IP_CORE=m
-CONFIG_ALTERA_PR_IP_CORE_PLAT=m
-CONFIG_FPGA_MGR_ALTERA_PS_SPI=m
-CONFIG_FPGA_MGR_ALTERA_CVP=m
-CONFIG_FPGA_MGR_XILINX_SPI=m
-CONFIG_FPGA_MGR_ICE40_SPI=m
-CONFIG_FPGA_MGR_MACHXO2_SPI=m
-CONFIG_FPGA_BRIDGE=m
-CONFIG_ALTERA_FREEZE_BRIDGE=m
-CONFIG_XILINX_PR_DECOUPLER=m
-CONFIG_FPGA_REGION=m
-CONFIG_OF_FPGA_REGION=m
-CONFIG_FPGA_DFL=m
-CONFIG_FPGA_DFL_FME=m
-CONFIG_FPGA_DFL_FME_MGR=m
-CONFIG_FPGA_DFL_FME_BRIDGE=m
-CONFIG_FPGA_DFL_FME_REGION=m
-CONFIG_FPGA_DFL_AFU=m
-CONFIG_FPGA_DFL_PCI=m
-CONFIG_FSI=m
-CONFIG_FSI_NEW_DEV_NODE=y
-CONFIG_FSI_MASTER_GPIO=m
-CONFIG_FSI_MASTER_HUB=m
-CONFIG_FSI_MASTER_ASPEED=m
-CONFIG_FSI_SCOM=m
-CONFIG_FSI_SBEFIFO=m
-CONFIG_FSI_OCC=m
-CONFIG_TEE=m
-
-#
-# TEE drivers
-#
-CONFIG_AMDTEE=m
-# end of TEE drivers
-
-CONFIG_MULTIPLEXER=m
-
-#
-# Multiplexer drivers
-#
-CONFIG_MUX_ADG792A=m
-CONFIG_MUX_ADGS1408=m
-CONFIG_MUX_GPIO=m
-CONFIG_MUX_MMIO=m
-# end of Multiplexer drivers
-
-CONFIG_PM_OPP=y
-CONFIG_UNISYS_VISORBUS=m
-CONFIG_SIOX=m
-CONFIG_SIOX_BUS_GPIO=m
-CONFIG_SLIMBUS=m
-CONFIG_SLIM_QCOM_CTRL=m
-CONFIG_INTERCONNECT=y
-CONFIG_COUNTER=m
-CONFIG_FTM_QUADDEC=m
-CONFIG_MICROCHIP_TCB_CAPTURE=m
-CONFIG_MOST=m
-# CONFIG_MOST_USB_HDM is not set
-CONFIG_MOST_CDEV=m
-# end of Device Drivers
-
-#
-# File systems
-#
-CONFIG_DCACHE_WORD_ACCESS=y
-CONFIG_VALIDATE_FS_PARSER=y
-CONFIG_FS_IOMAP=y
-# CONFIG_EXT2_FS is not set
-# CONFIG_EXT3_FS is not set
-CONFIG_EXT4_FS=m
-CONFIG_EXT4_USE_FOR_EXT2=y
-CONFIG_EXT4_FS_POSIX_ACL=y
-CONFIG_EXT4_FS_SECURITY=y
-# CONFIG_EXT4_DEBUG is not set
-CONFIG_JBD2=m
-# CONFIG_JBD2_DEBUG is not set
-CONFIG_FS_MBCACHE=m
-CONFIG_REISERFS_FS=m
-# CONFIG_REISERFS_CHECK is not set
-CONFIG_REISERFS_PROC_INFO=y
-CONFIG_REISERFS_FS_XATTR=y
-CONFIG_REISERFS_FS_POSIX_ACL=y
-CONFIG_REISERFS_FS_SECURITY=y
-CONFIG_JFS_FS=m
-CONFIG_JFS_POSIX_ACL=y
-CONFIG_JFS_SECURITY=y
-# CONFIG_JFS_DEBUG is not set
-CONFIG_JFS_STATISTICS=y
-CONFIG_XFS_FS=m
-CONFIG_XFS_SUPPORT_V4=y
-CONFIG_XFS_QUOTA=y
-CONFIG_XFS_POSIX_ACL=y
-CONFIG_XFS_RT=y
-CONFIG_XFS_ONLINE_SCRUB=y
-CONFIG_XFS_ONLINE_REPAIR=y
-# CONFIG_XFS_WARN is not set
-# CONFIG_XFS_DEBUG is not set
-CONFIG_GFS2_FS=m
-CONFIG_GFS2_FS_LOCKING_DLM=y
-CONFIG_OCFS2_FS=m
-CONFIG_OCFS2_FS_O2CB=m
-CONFIG_OCFS2_FS_USERSPACE_CLUSTER=m
-CONFIG_OCFS2_FS_STATS=y
-CONFIG_OCFS2_DEBUG_MASKLOG=y
-# CONFIG_OCFS2_DEBUG_FS is not set
-CONFIG_BTRFS_FS=m
-CONFIG_BTRFS_FS_POSIX_ACL=y
-# CONFIG_BTRFS_FS_CHECK_INTEGRITY is not set
-# CONFIG_BTRFS_FS_RUN_SANITY_TESTS is not set
-# CONFIG_BTRFS_DEBUG is not set
-# CONFIG_BTRFS_ASSERT is not set
-# CONFIG_BTRFS_FS_REF_VERIFY is not set
-CONFIG_NILFS2_FS=m
-CONFIG_F2FS_FS=m
-CONFIG_F2FS_STAT_FS=y
-CONFIG_F2FS_FS_XATTR=y
-CONFIG_F2FS_FS_POSIX_ACL=y
-CONFIG_F2FS_FS_SECURITY=y
-CONFIG_F2FS_CHECK_FS=y
-# CONFIG_F2FS_IO_TRACE is not set
-# CONFIG_F2FS_FAULT_INJECTION is not set
-CONFIG_F2FS_FS_COMPRESSION=y
-CONFIG_F2FS_FS_LZO=y
-CONFIG_F2FS_FS_LZ4=y
-CONFIG_F2FS_FS_ZSTD=y
-CONFIG_F2FS_FS_LZORLE=y
-CONFIG_ZONEFS_FS=m
-CONFIG_FS_DAX=y
-CONFIG_FS_DAX_PMD=y
-CONFIG_FS_POSIX_ACL=y
-CONFIG_EXPORTFS=y
-CONFIG_EXPORTFS_BLOCK_OPS=y
-CONFIG_FILE_LOCKING=y
-# CONFIG_MANDATORY_FILE_LOCKING is not set
-CONFIG_FS_ENCRYPTION=y
-CONFIG_FS_ENCRYPTION_ALGS=m
-# CONFIG_FS_ENCRYPTION_INLINE_CRYPT is not set
-CONFIG_FS_VERITY=y
-# CONFIG_FS_VERITY_DEBUG is not set
-CONFIG_FS_VERITY_BUILTIN_SIGNATURES=y
-CONFIG_FSNOTIFY=y
-CONFIG_DNOTIFY=y
-CONFIG_INOTIFY_USER=y
-CONFIG_FANOTIFY=y
-CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
-CONFIG_QUOTA=y
-CONFIG_QUOTA_NETLINK_INTERFACE=y
-# CONFIG_PRINT_QUOTA_WARNING is not set
-# CONFIG_QUOTA_DEBUG is not set
-CONFIG_QUOTA_TREE=m
-CONFIG_QFMT_V1=m
-CONFIG_QFMT_V2=m
-CONFIG_QUOTACTL=y
-CONFIG_AUTOFS4_FS=y
-CONFIG_AUTOFS_FS=y
-CONFIG_FUSE_FS=m
-CONFIG_CUSE=m
-CONFIG_VIRTIO_FS=m
-CONFIG_FUSE_DAX=y
-CONFIG_OVERLAY_FS=m
-CONFIG_OVERLAY_FS_REDIRECT_DIR=y
-# CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW is not set
-CONFIG_OVERLAY_FS_INDEX=y
-CONFIG_OVERLAY_FS_XINO_AUTO=y
-CONFIG_OVERLAY_FS_METACOPY=y
-
-#
-# Caches
-#
-CONFIG_FSCACHE=m
-CONFIG_FSCACHE_STATS=y
-CONFIG_FSCACHE_HISTOGRAM=y
-# CONFIG_FSCACHE_DEBUG is not set
-# CONFIG_FSCACHE_OBJECT_LIST is not set
-CONFIG_CACHEFILES=m
-# CONFIG_CACHEFILES_DEBUG is not set
-# CONFIG_CACHEFILES_HISTOGRAM is not set
-# end of Caches
-
-#
-# CD-ROM/DVD Filesystems
-#
-CONFIG_ISO9660_FS=m
-CONFIG_JOLIET=y
-CONFIG_ZISOFS=y
-CONFIG_UDF_FS=m
-# end of CD-ROM/DVD Filesystems
-
-#
-# DOS/FAT/EXFAT/NT Filesystems
-#
-CONFIG_FAT_FS=m
-CONFIG_MSDOS_FS=m
-CONFIG_VFAT_FS=m
-CONFIG_FAT_DEFAULT_CODEPAGE=437
-CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
-CONFIG_FAT_DEFAULT_UTF8=y
-CONFIG_EXFAT_FS=m
-CONFIG_EXFAT_DEFAULT_IOCHARSET="utf8"
-CONFIG_NTFS_FS=m
-# CONFIG_NTFS_DEBUG is not set
-CONFIG_NTFS_RW=y
-# end of DOS/FAT/EXFAT/NT Filesystems
-
-#
-# Pseudo filesystems
-#
-CONFIG_PROC_FS=y
-CONFIG_PROC_KCORE=y
-CONFIG_PROC_VMCORE=y
-CONFIG_PROC_VMCORE_DEVICE_DUMP=y
-CONFIG_PROC_SYSCTL=y
-CONFIG_PROC_PAGE_MONITOR=y
-CONFIG_PROC_CHILDREN=y
-CONFIG_PROC_PID_ARCH_STATUS=y
-CONFIG_PROC_CPU_RESCTRL=y
-CONFIG_KERNFS=y
-CONFIG_SYSFS=y
-CONFIG_TMPFS=y
-CONFIG_TMPFS_POSIX_ACL=y
-CONFIG_TMPFS_XATTR=y
-# CONFIG_TMPFS_INODE64 is not set
-CONFIG_HUGETLBFS=y
-CONFIG_HUGETLB_PAGE=y
-CONFIG_MEMFD_CREATE=y
-CONFIG_ARCH_HAS_GIGANTIC_PAGE=y
-CONFIG_CONFIGFS_FS=y
-CONFIG_EFIVAR_FS=y
-# end of Pseudo filesystems
-
-CONFIG_MISC_FILESYSTEMS=y
-CONFIG_ORANGEFS_FS=m
-# CONFIG_ADFS_FS is not set
-CONFIG_AFFS_FS=m
-CONFIG_ECRYPT_FS=m
-# CONFIG_ECRYPT_FS_MESSAGING is not set
-CONFIG_HFS_FS=m
-CONFIG_HFSPLUS_FS=m
-CONFIG_BEFS_FS=m
-# CONFIG_BEFS_DEBUG is not set
-# CONFIG_BFS_FS is not set
-# CONFIG_EFS_FS is not set
-CONFIG_JFFS2_FS=m
-CONFIG_JFFS2_FS_DEBUG=0
-CONFIG_JFFS2_FS_WRITEBUFFER=y
-# CONFIG_JFFS2_FS_WBUF_VERIFY is not set
-CONFIG_JFFS2_SUMMARY=y
-CONFIG_JFFS2_FS_XATTR=y
-CONFIG_JFFS2_FS_POSIX_ACL=y
-CONFIG_JFFS2_FS_SECURITY=y
-# CONFIG_JFFS2_COMPRESSION_OPTIONS is not set
-CONFIG_JFFS2_ZLIB=y
-CONFIG_JFFS2_RTIME=y
-CONFIG_UBIFS_FS=m
-# CONFIG_UBIFS_FS_ADVANCED_COMPR is not set
-CONFIG_UBIFS_FS_LZO=y
-CONFIG_UBIFS_FS_ZLIB=y
-CONFIG_UBIFS_FS_ZSTD=y
-CONFIG_UBIFS_ATIME_SUPPORT=y
-CONFIG_UBIFS_FS_XATTR=y
-CONFIG_UBIFS_FS_SECURITY=y
-CONFIG_UBIFS_FS_AUTHENTICATION=y
-CONFIG_CRAMFS=m
-CONFIG_CRAMFS_BLOCKDEV=y
-CONFIG_CRAMFS_MTD=y
-CONFIG_SQUASHFS=m
-# CONFIG_SQUASHFS_FILE_CACHE is not set
-CONFIG_SQUASHFS_FILE_DIRECT=y
-# CONFIG_SQUASHFS_DECOMP_SINGLE is not set
-CONFIG_SQUASHFS_DECOMP_MULTI=y
-# CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU is not set
-CONFIG_SQUASHFS_XATTR=y
-CONFIG_SQUASHFS_ZLIB=y
-CONFIG_SQUASHFS_LZ4=y
-CONFIG_SQUASHFS_LZO=y
-CONFIG_SQUASHFS_XZ=y
-CONFIG_SQUASHFS_ZSTD=y
-# CONFIG_SQUASHFS_4K_DEVBLK_SIZE is not set
-# CONFIG_SQUASHFS_EMBEDDED is not set
-CONFIG_SQUASHFS_FRAGMENT_CACHE_SIZE=3
-# CONFIG_VXFS_FS is not set
-CONFIG_MINIX_FS=m
-CONFIG_OMFS_FS=m
-# CONFIG_HPFS_FS is not set
-# CONFIG_QNX4FS_FS is not set
-# CONFIG_QNX6FS_FS is not set
-CONFIG_ROMFS_FS=m
-CONFIG_ROMFS_BACKED_BY_BLOCK=y
-# CONFIG_ROMFS_BACKED_BY_MTD is not set
-# CONFIG_ROMFS_BACKED_BY_BOTH is not set
-CONFIG_ROMFS_ON_BLOCK=y
-CONFIG_PSTORE=y
-CONFIG_PSTORE_DEFLATE_COMPRESS=m
-CONFIG_PSTORE_LZO_COMPRESS=m
-CONFIG_PSTORE_LZ4_COMPRESS=m
-CONFIG_PSTORE_LZ4HC_COMPRESS=m
-# CONFIG_PSTORE_842_COMPRESS is not set
-CONFIG_PSTORE_ZSTD_COMPRESS=y
-CONFIG_PSTORE_COMPRESS=y
-# CONFIG_PSTORE_DEFLATE_COMPRESS_DEFAULT is not set
-# CONFIG_PSTORE_LZO_COMPRESS_DEFAULT is not set
-# CONFIG_PSTORE_LZ4_COMPRESS_DEFAULT is not set
-# CONFIG_PSTORE_LZ4HC_COMPRESS_DEFAULT is not set
-CONFIG_PSTORE_ZSTD_COMPRESS_DEFAULT=y
-CONFIG_PSTORE_COMPRESS_DEFAULT="zstd"
-# CONFIG_PSTORE_CONSOLE is not set
-# CONFIG_PSTORE_PMSG is not set
-# CONFIG_PSTORE_FTRACE is not set
-CONFIG_PSTORE_RAM=y
-CONFIG_PSTORE_ZONE=m
-CONFIG_PSTORE_BLK=m
-CONFIG_PSTORE_BLK_BLKDEV=""
-CONFIG_PSTORE_BLK_KMSG_SIZE=64
-CONFIG_PSTORE_BLK_MAX_REASON=2
-# CONFIG_SYSV_FS is not set
-CONFIG_UFS_FS=m
-# CONFIG_UFS_FS_WRITE is not set
-# CONFIG_UFS_DEBUG is not set
-CONFIG_EROFS_FS=m
-# CONFIG_EROFS_FS_DEBUG is not set
-CONFIG_EROFS_FS_XATTR=y
-CONFIG_EROFS_FS_POSIX_ACL=y
-CONFIG_EROFS_FS_SECURITY=y
-CONFIG_EROFS_FS_ZIP=y
-CONFIG_EROFS_FS_CLUSTER_PAGE_LIMIT=2
-CONFIG_VBOXSF_FS=m
-CONFIG_NETWORK_FILESYSTEMS=y
-CONFIG_NFS_FS=m
-CONFIG_NFS_V2=m
-CONFIG_NFS_V3=m
-CONFIG_NFS_V3_ACL=y
-CONFIG_NFS_V4=m
-CONFIG_NFS_SWAP=y
-CONFIG_NFS_V4_1=y
-CONFIG_NFS_V4_2=y
-CONFIG_PNFS_FILE_LAYOUT=m
-CONFIG_PNFS_BLOCK=m
-CONFIG_PNFS_FLEXFILE_LAYOUT=m
-CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN="kernel.org"
-CONFIG_NFS_V4_1_MIGRATION=y
-CONFIG_NFS_V4_SECURITY_LABEL=y
-CONFIG_NFS_FSCACHE=y
-# CONFIG_NFS_USE_LEGACY_DNS is not set
-CONFIG_NFS_USE_KERNEL_DNS=y
-CONFIG_NFS_DEBUG=y
-# CONFIG_NFS_DISABLE_UDP_SUPPORT is not set
-CONFIG_NFSD=m
-CONFIG_NFSD_V2_ACL=y
-CONFIG_NFSD_V3=y
-CONFIG_NFSD_V3_ACL=y
-CONFIG_NFSD_V4=y
-CONFIG_NFSD_PNFS=y
-CONFIG_NFSD_BLOCKLAYOUT=y
-CONFIG_NFSD_SCSILAYOUT=y
-# CONFIG_NFSD_FLEXFILELAYOUT is not set
-# CONFIG_NFSD_V4_2_INTER_SSC is not set
-CONFIG_NFSD_V4_SECURITY_LABEL=y
-CONFIG_GRACE_PERIOD=m
-CONFIG_LOCKD=m
-CONFIG_LOCKD_V4=y
-CONFIG_NFS_ACL_SUPPORT=m
-CONFIG_NFS_COMMON=y
-CONFIG_SUNRPC=m
-CONFIG_SUNRPC_GSS=m
-CONFIG_SUNRPC_BACKCHANNEL=y
-CONFIG_SUNRPC_SWAP=y
-CONFIG_RPCSEC_GSS_KRB5=m
-CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES=y
-CONFIG_SUNRPC_DEBUG=y
-CONFIG_SUNRPC_XPRT_RDMA=m
-CONFIG_CEPH_FS=m
-CONFIG_CEPH_FSCACHE=y
-CONFIG_CEPH_FS_POSIX_ACL=y
-CONFIG_CEPH_FS_SECURITY_LABEL=y
-CONFIG_CIFS=m
-# CONFIG_CIFS_STATS2 is not set
-CONFIG_CIFS_ALLOW_INSECURE_LEGACY=y
-# CONFIG_CIFS_WEAK_PW_HASH is not set
-CONFIG_CIFS_UPCALL=y
-CONFIG_CIFS_XATTR=y
-CONFIG_CIFS_POSIX=y
-CONFIG_CIFS_DEBUG=y
-# CONFIG_CIFS_DEBUG2 is not set
-# CONFIG_CIFS_DEBUG_DUMP_KEYS is not set
-CONFIG_CIFS_DFS_UPCALL=y
-# CONFIG_CIFS_SMB_DIRECT is not set
-CONFIG_CIFS_FSCACHE=y
-CONFIG_CODA_FS=m
-CONFIG_AFS_FS=m
-# CONFIG_AFS_DEBUG is not set
-CONFIG_AFS_FSCACHE=y
-# CONFIG_AFS_DEBUG_CURSOR is not set
-CONFIG_9P_FS=m
-CONFIG_9P_FSCACHE=y
-CONFIG_9P_FS_POSIX_ACL=y
-CONFIG_9P_FS_SECURITY=y
-CONFIG_NLS=y
-CONFIG_NLS_DEFAULT="utf8"
-CONFIG_NLS_CODEPAGE_437=m
-CONFIG_NLS_CODEPAGE_737=m
-CONFIG_NLS_CODEPAGE_775=m
-CONFIG_NLS_CODEPAGE_850=m
-CONFIG_NLS_CODEPAGE_852=m
-CONFIG_NLS_CODEPAGE_855=m
-CONFIG_NLS_CODEPAGE_857=m
-CONFIG_NLS_CODEPAGE_860=m
-CONFIG_NLS_CODEPAGE_861=m
-CONFIG_NLS_CODEPAGE_862=m
-CONFIG_NLS_CODEPAGE_863=m
-CONFIG_NLS_CODEPAGE_864=m
-CONFIG_NLS_CODEPAGE_865=m
-CONFIG_NLS_CODEPAGE_866=m
-CONFIG_NLS_CODEPAGE_869=m
-CONFIG_NLS_CODEPAGE_936=m
-CONFIG_NLS_CODEPAGE_950=m
-CONFIG_NLS_CODEPAGE_932=m
-CONFIG_NLS_CODEPAGE_949=m
-CONFIG_NLS_CODEPAGE_874=m
-CONFIG_NLS_ISO8859_8=m
-CONFIG_NLS_CODEPAGE_1250=m
-CONFIG_NLS_CODEPAGE_1251=m
-CONFIG_NLS_ASCII=m
-CONFIG_NLS_ISO8859_1=m
-CONFIG_NLS_ISO8859_2=m
-CONFIG_NLS_ISO8859_3=m
-CONFIG_NLS_ISO8859_4=m
-CONFIG_NLS_ISO8859_5=m
-CONFIG_NLS_ISO8859_6=m
-CONFIG_NLS_ISO8859_7=m
-CONFIG_NLS_ISO8859_9=m
-CONFIG_NLS_ISO8859_13=m
-CONFIG_NLS_ISO8859_14=m
-CONFIG_NLS_ISO8859_15=m
-CONFIG_NLS_KOI8_R=m
-CONFIG_NLS_KOI8_U=m
-CONFIG_NLS_MAC_ROMAN=m
-CONFIG_NLS_MAC_CELTIC=m
-CONFIG_NLS_MAC_CENTEURO=m
-CONFIG_NLS_MAC_CROATIAN=m
-CONFIG_NLS_MAC_CYRILLIC=m
-CONFIG_NLS_MAC_GAELIC=m
-CONFIG_NLS_MAC_GREEK=m
-CONFIG_NLS_MAC_ICELAND=m
-CONFIG_NLS_MAC_INUIT=m
-CONFIG_NLS_MAC_ROMANIAN=m
-CONFIG_NLS_MAC_TURKISH=m
-CONFIG_NLS_UTF8=m
-CONFIG_DLM=m
-# CONFIG_DLM_DEBUG is not set
-CONFIG_UNICODE=y
-# CONFIG_UNICODE_NORMALIZATION_SELFTEST is not set
-CONFIG_IO_WQ=y
-# end of File systems
-
-#
-# Security options
-#
-CONFIG_KEYS=y
-CONFIG_KEYS_REQUEST_CACHE=y
-CONFIG_PERSISTENT_KEYRINGS=y
-CONFIG_TRUSTED_KEYS=m
-CONFIG_ENCRYPTED_KEYS=m
-CONFIG_KEY_DH_OPERATIONS=y
-CONFIG_KEY_NOTIFICATIONS=y
-# CONFIG_SECURITY_DMESG_RESTRICT is not set
-CONFIG_SECURITY=y
-CONFIG_SECURITYFS=y
-CONFIG_SECURITY_NETWORK=y
-CONFIG_PAGE_TABLE_ISOLATION=y
-CONFIG_SECURITY_INFINIBAND=y
-CONFIG_SECURITY_NETWORK_XFRM=y
-CONFIG_SECURITY_PATH=y
-# CONFIG_INTEL_TXT is not set
-CONFIG_LSM_MMAP_MIN_ADDR=65536
-CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR=y
-CONFIG_HARDENED_USERCOPY=y
-CONFIG_HARDENED_USERCOPY_FALLBACK=y
-# CONFIG_HARDENED_USERCOPY_PAGESPAN is not set
-CONFIG_FORTIFY_SOURCE=y
-# CONFIG_STATIC_USERMODEHELPER is not set
-CONFIG_SECURITY_SELINUX=y
-CONFIG_SECURITY_SELINUX_BOOTPARAM=y
-# CONFIG_SECURITY_SELINUX_DISABLE is not set
-CONFIG_SECURITY_SELINUX_DEVELOP=y
-CONFIG_SECURITY_SELINUX_AVC_STATS=y
-CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=0
-CONFIG_SECURITY_SELINUX_SIDTAB_HASH_BITS=9
-CONFIG_SECURITY_SELINUX_SID2STR_CACHE_SIZE=256
-CONFIG_SECURITY_SMACK=y
-CONFIG_SECURITY_SMACK_BRINGUP=y
-CONFIG_SECURITY_SMACK_NETFILTER=y
-CONFIG_SECURITY_SMACK_APPEND_SIGNALS=y
-CONFIG_SECURITY_TOMOYO=y
-CONFIG_SECURITY_TOMOYO_MAX_ACCEPT_ENTRY=2048
-CONFIG_SECURITY_TOMOYO_MAX_AUDIT_LOG=1024
-# CONFIG_SECURITY_TOMOYO_OMIT_USERSPACE_LOADER is not set
-CONFIG_SECURITY_TOMOYO_POLICY_LOADER="/sbin/tomoyo-init"
-CONFIG_SECURITY_TOMOYO_ACTIVATION_TRIGGER="/sbin/init"
-# CONFIG_SECURITY_TOMOYO_INSECURE_BUILTIN_SETTING is not set
-CONFIG_SECURITY_APPARMOR=y
-CONFIG_SECURITY_APPARMOR_HASH=y
-CONFIG_SECURITY_APPARMOR_HASH_DEFAULT=y
-# CONFIG_SECURITY_APPARMOR_DEBUG is not set
-# CONFIG_SECURITY_LOADPIN is not set
-CONFIG_SECURITY_YAMA=y
-CONFIG_SECURITY_SAFESETID=y
-CONFIG_SECURITY_LOCKDOWN_LSM=y
-# CONFIG_SECURITY_LOCKDOWN_LSM_EARLY is not set
-CONFIG_LOCK_DOWN_KERNEL_FORCE_NONE=y
-# CONFIG_LOCK_DOWN_KERNEL_FORCE_INTEGRITY is not set
-# CONFIG_LOCK_DOWN_KERNEL_FORCE_CONFIDENTIALITY is not set
-# CONFIG_INTEGRITY is not set
-# CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT is not set
-# CONFIG_DEFAULT_SECURITY_SELINUX is not set
-# CONFIG_DEFAULT_SECURITY_SMACK is not set
-# CONFIG_DEFAULT_SECURITY_TOMOYO is not set
-# CONFIG_DEFAULT_SECURITY_APPARMOR is not set
-CONFIG_DEFAULT_SECURITY_DAC=y
-CONFIG_LSM="lockdown,yama"
-
-#
-# Kernel hardening options
-#
-CONFIG_GCC_PLUGIN_STRUCTLEAK=y
-
-#
-# Memory initialization
-#
-# CONFIG_INIT_STACK_NONE is not set
-# CONFIG_GCC_PLUGIN_STRUCTLEAK_USER is not set
-# CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF is not set
-CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL=y
-# CONFIG_GCC_PLUGIN_STRUCTLEAK_VERBOSE is not set
-# CONFIG_GCC_PLUGIN_STACKLEAK is not set
-CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y
-# CONFIG_INIT_ON_FREE_DEFAULT_ON is not set
-# end of Memory initialization
-# end of Kernel hardening options
-# end of Security options
-
-CONFIG_XOR_BLOCKS=m
-CONFIG_ASYNC_CORE=m
-CONFIG_ASYNC_MEMCPY=m
-CONFIG_ASYNC_XOR=m
-CONFIG_ASYNC_PQ=m
-CONFIG_ASYNC_RAID6_RECOV=m
-CONFIG_CRYPTO=y
-
-#
-# Crypto core or helper
-#
-CONFIG_CRYPTO_ALGAPI=y
-CONFIG_CRYPTO_ALGAPI2=y
-CONFIG_CRYPTO_AEAD=y
-CONFIG_CRYPTO_AEAD2=y
-CONFIG_CRYPTO_SKCIPHER=y
-CONFIG_CRYPTO_SKCIPHER2=y
-CONFIG_CRYPTO_HASH=y
-CONFIG_CRYPTO_HASH2=y
-CONFIG_CRYPTO_RNG=y
-CONFIG_CRYPTO_RNG2=y
-CONFIG_CRYPTO_RNG_DEFAULT=y
-CONFIG_CRYPTO_AKCIPHER2=y
-CONFIG_CRYPTO_AKCIPHER=y
-CONFIG_CRYPTO_KPP2=y
-CONFIG_CRYPTO_KPP=y
-CONFIG_CRYPTO_ACOMP2=y
-CONFIG_CRYPTO_MANAGER=y
-CONFIG_CRYPTO_MANAGER2=y
-CONFIG_CRYPTO_USER=m
-CONFIG_CRYPTO_MANAGER_DISABLE_TESTS=y
-CONFIG_CRYPTO_GF128MUL=y
-CONFIG_CRYPTO_NULL=y
-CONFIG_CRYPTO_NULL2=y
-CONFIG_CRYPTO_PCRYPT=m
-CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_AUTHENC=m
-CONFIG_CRYPTO_TEST=m
-CONFIG_CRYPTO_SIMD=m
-CONFIG_CRYPTO_GLUE_HELPER_X86=m
-CONFIG_CRYPTO_ENGINE=m
-
-#
-# Public-key cryptography
-#
-CONFIG_CRYPTO_RSA=y
-CONFIG_CRYPTO_DH=y
-CONFIG_CRYPTO_ECC=m
-CONFIG_CRYPTO_ECDH=m
-CONFIG_CRYPTO_ECRDSA=m
-# CONFIG_CRYPTO_SM2 is not set
-CONFIG_CRYPTO_CURVE25519=m
-CONFIG_CRYPTO_CURVE25519_X86=m
-
-#
-# Authenticated Encryption with Associated Data
-#
-CONFIG_CRYPTO_CCM=m
-CONFIG_CRYPTO_GCM=y
-CONFIG_CRYPTO_CHACHA20POLY1305=m
-CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128_AESNI_SSE2=m
-CONFIG_CRYPTO_SEQIV=y
-CONFIG_CRYPTO_ECHAINIV=m
-
-#
-# Block modes
-#
-CONFIG_CRYPTO_CBC=m
-CONFIG_CRYPTO_CFB=m
-CONFIG_CRYPTO_CTR=y
-CONFIG_CRYPTO_CTS=m
-CONFIG_CRYPTO_ECB=m
-CONFIG_CRYPTO_LRW=m
-CONFIG_CRYPTO_OFB=m
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_XTS=m
-CONFIG_CRYPTO_KEYWRAP=m
-CONFIG_CRYPTO_NHPOLY1305=m
-CONFIG_CRYPTO_NHPOLY1305_SSE2=m
-CONFIG_CRYPTO_NHPOLY1305_AVX2=m
-CONFIG_CRYPTO_ADIANTUM=m
-CONFIG_CRYPTO_ESSIV=m
-
-#
-# Hash modes
-#
-CONFIG_CRYPTO_CMAC=m
-CONFIG_CRYPTO_HMAC=y
-CONFIG_CRYPTO_XCBC=m
-CONFIG_CRYPTO_VMAC=m
-
-#
-# Digest
-#
-CONFIG_CRYPTO_CRC32C=m
-CONFIG_CRYPTO_CRC32C_INTEL=m
-CONFIG_CRYPTO_CRC32=m
-CONFIG_CRYPTO_CRC32_PCLMUL=m
-CONFIG_CRYPTO_XXHASH=m
-CONFIG_CRYPTO_BLAKE2B=m
-CONFIG_CRYPTO_BLAKE2S=m
-CONFIG_CRYPTO_BLAKE2S_X86=m
-CONFIG_CRYPTO_CRCT10DIF=y
-CONFIG_CRYPTO_CRCT10DIF_PCLMUL=m
-CONFIG_CRYPTO_GHASH=y
-CONFIG_CRYPTO_POLY1305=m
-CONFIG_CRYPTO_POLY1305_X86_64=m
-CONFIG_CRYPTO_MD4=m
-CONFIG_CRYPTO_MD5=y
-CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_RMD128=m
-CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_RMD256=m
-CONFIG_CRYPTO_RMD320=m
-CONFIG_CRYPTO_SHA1=y
-CONFIG_CRYPTO_SHA1_SSSE3=m
-CONFIG_CRYPTO_SHA256_SSSE3=m
-CONFIG_CRYPTO_SHA512_SSSE3=m
-CONFIG_CRYPTO_SHA256=y
-CONFIG_CRYPTO_SHA512=y
-CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_SM3=m
-CONFIG_CRYPTO_STREEBOG=m
-CONFIG_CRYPTO_TGR192=m
-CONFIG_CRYPTO_WP512=m
-CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL=m
-
-#
-# Ciphers
-#
-CONFIG_CRYPTO_AES=y
-CONFIG_CRYPTO_AES_TI=m
-CONFIG_CRYPTO_AES_NI_INTEL=m
-CONFIG_CRYPTO_ANUBIS=m
-CONFIG_CRYPTO_ARC4=m
-CONFIG_CRYPTO_BLOWFISH=m
-CONFIG_CRYPTO_BLOWFISH_COMMON=m
-CONFIG_CRYPTO_BLOWFISH_X86_64=m
-CONFIG_CRYPTO_CAMELLIA=m
-CONFIG_CRYPTO_CAMELLIA_X86_64=m
-CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64=m
-CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64=m
-CONFIG_CRYPTO_CAST_COMMON=m
-CONFIG_CRYPTO_CAST5=m
-CONFIG_CRYPTO_CAST5_AVX_X86_64=m
-CONFIG_CRYPTO_CAST6=m
-CONFIG_CRYPTO_CAST6_AVX_X86_64=m
-CONFIG_CRYPTO_DES=m
-CONFIG_CRYPTO_DES3_EDE_X86_64=m
-CONFIG_CRYPTO_FCRYPT=m
-CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SALSA20=m
-CONFIG_CRYPTO_CHACHA20=m
-CONFIG_CRYPTO_CHACHA20_X86_64=m
-CONFIG_CRYPTO_SEED=m
-CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_SERPENT_SSE2_X86_64=m
-CONFIG_CRYPTO_SERPENT_AVX_X86_64=m
-CONFIG_CRYPTO_SERPENT_AVX2_X86_64=m
-CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_TEA=m
-CONFIG_CRYPTO_TWOFISH=m
-CONFIG_CRYPTO_TWOFISH_COMMON=m
-CONFIG_CRYPTO_TWOFISH_X86_64=m
-CONFIG_CRYPTO_TWOFISH_X86_64_3WAY=m
-CONFIG_CRYPTO_TWOFISH_AVX_X86_64=m
-
-#
-# Compression
-#
-CONFIG_CRYPTO_DEFLATE=m
-CONFIG_CRYPTO_LZO=m
-CONFIG_CRYPTO_842=m
-CONFIG_CRYPTO_LZ4=y
-CONFIG_CRYPTO_LZ4HC=m
-CONFIG_CRYPTO_ZSTD=y
-
-#
-# Random Number Generation
-#
-CONFIG_CRYPTO_ANSI_CPRNG=m
-CONFIG_CRYPTO_DRBG_MENU=y
-CONFIG_CRYPTO_DRBG_HMAC=y
-CONFIG_CRYPTO_DRBG_HASH=y
-CONFIG_CRYPTO_DRBG_CTR=y
-CONFIG_CRYPTO_DRBG=y
-CONFIG_CRYPTO_JITTERENTROPY=y
-CONFIG_CRYPTO_USER_API=m
-CONFIG_CRYPTO_USER_API_HASH=m
-CONFIG_CRYPTO_USER_API_SKCIPHER=m
-CONFIG_CRYPTO_USER_API_RNG=m
-# CONFIG_CRYPTO_USER_API_RNG_CAVP is not set
-CONFIG_CRYPTO_USER_API_AEAD=m
-CONFIG_CRYPTO_USER_API_ENABLE_OBSOLETE=y
-# CONFIG_CRYPTO_STATS is not set
-CONFIG_CRYPTO_HASH_INFO=y
-
-#
-# Crypto library routines
-#
-CONFIG_CRYPTO_LIB_AES=y
-CONFIG_CRYPTO_LIB_ARC4=m
-CONFIG_CRYPTO_ARCH_HAVE_LIB_BLAKE2S=m
-CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC=m
-CONFIG_CRYPTO_LIB_BLAKE2S=m
-CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA=m
-CONFIG_CRYPTO_LIB_CHACHA_GENERIC=m
-CONFIG_CRYPTO_LIB_CHACHA=m
-CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519=m
-CONFIG_CRYPTO_LIB_CURVE25519_GENERIC=m
-CONFIG_CRYPTO_LIB_CURVE25519=m
-CONFIG_CRYPTO_LIB_DES=m
-CONFIG_CRYPTO_LIB_POLY1305_RSIZE=11
-CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305=m
-CONFIG_CRYPTO_LIB_POLY1305_GENERIC=m
-CONFIG_CRYPTO_LIB_POLY1305=m
-CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
-CONFIG_CRYPTO_LIB_SHA256=y
-CONFIG_CRYPTO_HW=y
-CONFIG_CRYPTO_DEV_PADLOCK=m
-CONFIG_CRYPTO_DEV_PADLOCK_AES=m
-CONFIG_CRYPTO_DEV_PADLOCK_SHA=m
-CONFIG_CRYPTO_DEV_ATMEL_I2C=m
-CONFIG_CRYPTO_DEV_ATMEL_ECC=m
-CONFIG_CRYPTO_DEV_ATMEL_SHA204A=m
-CONFIG_CRYPTO_DEV_CCP=y
-CONFIG_CRYPTO_DEV_CCP_DD=m
-CONFIG_CRYPTO_DEV_SP_CCP=y
-CONFIG_CRYPTO_DEV_CCP_CRYPTO=m
-CONFIG_CRYPTO_DEV_SP_PSP=y
-CONFIG_CRYPTO_DEV_CCP_DEBUGFS=y
-CONFIG_CRYPTO_DEV_QAT=m
-CONFIG_CRYPTO_DEV_QAT_DH895xCC=m
-CONFIG_CRYPTO_DEV_QAT_C3XXX=m
-CONFIG_CRYPTO_DEV_QAT_C62X=m
-CONFIG_CRYPTO_DEV_QAT_DH895xCCVF=m
-CONFIG_CRYPTO_DEV_QAT_C3XXXVF=m
-CONFIG_CRYPTO_DEV_QAT_C62XVF=m
-CONFIG_CRYPTO_DEV_NITROX=m
-CONFIG_CRYPTO_DEV_NITROX_CNN55XX=m
-CONFIG_CRYPTO_DEV_CHELSIO=m
-CONFIG_CRYPTO_DEV_VIRTIO=m
-CONFIG_CRYPTO_DEV_SAFEXCEL=m
-CONFIG_CRYPTO_DEV_CCREE=m
-CONFIG_CRYPTO_DEV_AMLOGIC_GXL=m
-CONFIG_CRYPTO_DEV_AMLOGIC_GXL_DEBUG=y
-CONFIG_ASYMMETRIC_KEY_TYPE=y
-CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y
-CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE=m
-CONFIG_X509_CERTIFICATE_PARSER=y
-CONFIG_PKCS8_PRIVATE_KEY_PARSER=m
-CONFIG_TPM_KEY_PARSER=m
-CONFIG_PKCS7_MESSAGE_PARSER=y
-# CONFIG_PKCS7_TEST_KEY is not set
-CONFIG_SIGNED_PE_FILE_VERIFICATION=y
-
-#
-# Certificates for signature checking
-#
-CONFIG_MODULE_SIG_KEY="certs/signing_key.pem"
-CONFIG_SYSTEM_TRUSTED_KEYRING=y
-CONFIG_SYSTEM_TRUSTED_KEYS=""
-# CONFIG_SYSTEM_EXTRA_CERTIFICATE is not set
-CONFIG_SECONDARY_TRUSTED_KEYRING=y
-CONFIG_SYSTEM_BLACKLIST_KEYRING=y
-CONFIG_SYSTEM_BLACKLIST_HASH_LIST=""
-# end of Certificates for signature checking
-
-CONFIG_BINARY_PRINTF=y
-
-#
-# Library routines
-#
-CONFIG_RAID6_PQ=m
-CONFIG_RAID6_PQ_BENCHMARK=y
-CONFIG_LINEAR_RANGES=y
-CONFIG_PACKING=y
-CONFIG_BITREVERSE=y
-CONFIG_GENERIC_STRNCPY_FROM_USER=y
-CONFIG_GENERIC_STRNLEN_USER=y
-CONFIG_GENERIC_NET_UTILS=y
-CONFIG_GENERIC_FIND_FIRST_BIT=y
-CONFIG_CORDIC=m
-# CONFIG_PRIME_NUMBERS is not set
-CONFIG_RATIONAL=y
-CONFIG_GENERIC_PCI_IOMAP=y
-CONFIG_GENERIC_IOMAP=y
-CONFIG_ARCH_USE_CMPXCHG_LOCKREF=y
-CONFIG_ARCH_HAS_FAST_MULTIPLIER=y
-CONFIG_ARCH_USE_SYM_ANNOTATIONS=y
-CONFIG_CRC_CCITT=y
-CONFIG_CRC16=m
-CONFIG_CRC_T10DIF=y
-CONFIG_CRC_ITU_T=m
-CONFIG_CRC32=y
-# CONFIG_CRC32_SELFTEST is not set
-CONFIG_CRC32_SLICEBY8=y
-# CONFIG_CRC32_SLICEBY4 is not set
-# CONFIG_CRC32_SARWATE is not set
-# CONFIG_CRC32_BIT is not set
-CONFIG_CRC64=m
-CONFIG_CRC4=m
-CONFIG_CRC7=m
-CONFIG_LIBCRC32C=m
-CONFIG_CRC8=m
-CONFIG_XXHASH=y
-# CONFIG_RANDOM32_SELFTEST is not set
-CONFIG_842_COMPRESS=m
-CONFIG_842_DECOMPRESS=m
-CONFIG_ZLIB_INFLATE=y
-CONFIG_ZLIB_DEFLATE=y
-CONFIG_LZO_COMPRESS=y
-CONFIG_LZO_DECOMPRESS=y
-CONFIG_LZ4_COMPRESS=y
-CONFIG_LZ4HC_COMPRESS=m
-CONFIG_LZ4_DECOMPRESS=y
-CONFIG_ZSTD_COMPRESS=y
-CONFIG_ZSTD_DECOMPRESS=y
-CONFIG_XZ_DEC=y
-CONFIG_XZ_DEC_X86=y
-CONFIG_XZ_DEC_POWERPC=y
-CONFIG_XZ_DEC_IA64=y
-CONFIG_XZ_DEC_ARM=y
-CONFIG_XZ_DEC_ARMTHUMB=y
-CONFIG_XZ_DEC_SPARC=y
-CONFIG_XZ_DEC_BCJ=y
-# CONFIG_XZ_DEC_TEST is not set
-CONFIG_DECOMPRESS_GZIP=y
-CONFIG_DECOMPRESS_BZIP2=y
-CONFIG_DECOMPRESS_LZMA=y
-CONFIG_DECOMPRESS_XZ=y
-CONFIG_DECOMPRESS_LZO=y
-CONFIG_DECOMPRESS_LZ4=y
-CONFIG_DECOMPRESS_ZSTD=y
-CONFIG_GENERIC_ALLOCATOR=y
-CONFIG_REED_SOLOMON=y
-CONFIG_REED_SOLOMON_ENC8=y
-CONFIG_REED_SOLOMON_DEC8=y
-CONFIG_REED_SOLOMON_DEC16=y
-CONFIG_BCH=m
-CONFIG_TEXTSEARCH=y
-CONFIG_TEXTSEARCH_KMP=m
-CONFIG_TEXTSEARCH_BM=m
-CONFIG_TEXTSEARCH_FSM=m
-CONFIG_BTREE=y
-CONFIG_INTERVAL_TREE=y
-CONFIG_XARRAY_MULTI=y
-CONFIG_ASSOCIATIVE_ARRAY=y
-CONFIG_HAS_IOMEM=y
-CONFIG_HAS_IOPORT_MAP=y
-CONFIG_HAS_DMA=y
-CONFIG_DMA_OPS=y
-CONFIG_NEED_SG_DMA_LENGTH=y
-CONFIG_NEED_DMA_MAP_STATE=y
-CONFIG_ARCH_DMA_ADDR_T_64BIT=y
-CONFIG_DMA_DECLARE_COHERENT=y
-CONFIG_ARCH_HAS_FORCE_DMA_UNENCRYPTED=y
-CONFIG_DMA_VIRT_OPS=y
-CONFIG_SWIOTLB=y
-CONFIG_DMA_COHERENT_POOL=y
-# CONFIG_DMA_API_DEBUG is not set
-CONFIG_SGL_ALLOC=y
-CONFIG_IOMMU_HELPER=y
-CONFIG_CHECK_SIGNATURE=y
-CONFIG_CPU_RMAP=y
-CONFIG_DQL=y
-CONFIG_GLOB=y
-# CONFIG_GLOB_SELFTEST is not set
-CONFIG_NLATTR=y
-CONFIG_LRU_CACHE=m
-CONFIG_CLZ_TAB=y
-CONFIG_IRQ_POLL=y
-CONFIG_MPILIB=y
-CONFIG_DIMLIB=y
-CONFIG_LIBFDT=y
-CONFIG_OID_REGISTRY=y
-CONFIG_UCS2_STRING=y
-CONFIG_HAVE_GENERIC_VDSO=y
-CONFIG_GENERIC_GETTIMEOFDAY=y
-CONFIG_GENERIC_VDSO_TIME_NS=y
-CONFIG_FONT_SUPPORT=y
-CONFIG_FONTS=y
-# CONFIG_FONT_8x8 is not set
-CONFIG_FONT_8x16=y
-# CONFIG_FONT_6x11 is not set
-# CONFIG_FONT_7x14 is not set
-# CONFIG_FONT_PEARL_8x8 is not set
-# CONFIG_FONT_ACORN_8x8 is not set
-# CONFIG_FONT_MINI_4x6 is not set
-# CONFIG_FONT_6x10 is not set
-# CONFIG_FONT_10x18 is not set
-# CONFIG_FONT_SUN8x16 is not set
-# CONFIG_FONT_SUN12x22 is not set
-CONFIG_FONT_TER16x32=y
-# CONFIG_FONT_6x8 is not set
-CONFIG_SG_POOL=y
-CONFIG_ARCH_HAS_PMEM_API=y
-CONFIG_MEMREGION=y
-CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE=y
-CONFIG_ARCH_HAS_COPY_MC=y
-CONFIG_ARCH_STACKWALK=y
-CONFIG_SBITMAP=y
-CONFIG_PARMAN=m
-CONFIG_OBJAGG=m
-# CONFIG_STRING_SELFTEST is not set
-# end of Library routines
-
-CONFIG_PLDMFW=y
-
-#
-# Kernel hacking
-#
-
-#
-# printk and dmesg options
-#
-CONFIG_PRINTK_TIME=y
-# CONFIG_PRINTK_CALLER is not set
-CONFIG_CONSOLE_LOGLEVEL_DEFAULT=4
-CONFIG_CONSOLE_LOGLEVEL_QUIET=1
-CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4
-# CONFIG_BOOT_PRINTK_DELAY is not set
-CONFIG_DYNAMIC_DEBUG=y
-CONFIG_DYNAMIC_DEBUG_CORE=y
-CONFIG_SYMBOLIC_ERRNAME=y
-CONFIG_DEBUG_BUGVERBOSE=y
-# end of printk and dmesg options
-
-#
-# Compile-time checks and compiler options
-#
-CONFIG_DEBUG_INFO=y
-# CONFIG_DEBUG_INFO_REDUCED is not set
-# CONFIG_DEBUG_INFO_COMPRESSED is not set
-# CONFIG_DEBUG_INFO_SPLIT is not set
-CONFIG_DEBUG_INFO_DWARF4=y
-CONFIG_DEBUG_INFO_BTF=y
-# CONFIG_GDB_SCRIPTS is not set
-# CONFIG_ENABLE_MUST_CHECK is not set
-CONFIG_FRAME_WARN=2048
-CONFIG_STRIP_ASM_SYMS=y
-# CONFIG_READABLE_ASM is not set
-# CONFIG_HEADERS_INSTALL is not set
-# CONFIG_DEBUG_SECTION_MISMATCH is not set
-CONFIG_SECTION_MISMATCH_WARN_ONLY=y
-# CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_32B is not set
-CONFIG_STACK_VALIDATION=y
-# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set
-# end of Compile-time checks and compiler options
-
-#
-# Generic Kernel Debugging Instruments
-#
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE=0x0
-CONFIG_MAGIC_SYSRQ_SERIAL=y
-CONFIG_MAGIC_SYSRQ_SERIAL_SEQUENCE=""
-CONFIG_DEBUG_FS=y
-CONFIG_DEBUG_FS_ALLOW_ALL=y
-# CONFIG_DEBUG_FS_DISALLOW_MOUNT is not set
-# CONFIG_DEBUG_FS_ALLOW_NONE is not set
-CONFIG_HAVE_ARCH_KGDB=y
-# CONFIG_KGDB is not set
-CONFIG_ARCH_HAS_UBSAN_SANITIZE_ALL=y
-# CONFIG_UBSAN is not set
-CONFIG_HAVE_ARCH_KCSAN=y
-# end of Generic Kernel Debugging Instruments
-
-CONFIG_DEBUG_KERNEL=y
-CONFIG_DEBUG_MISC=y
-
-#
-# Memory Debugging
-#
-# CONFIG_PAGE_EXTENSION is not set
-# CONFIG_DEBUG_PAGEALLOC is not set
-# CONFIG_PAGE_OWNER is not set
-CONFIG_PAGE_POISONING=y
-CONFIG_PAGE_POISONING_NO_SANITY=y
-CONFIG_PAGE_POISONING_ZERO=y
-# CONFIG_DEBUG_PAGE_REF is not set
-# CONFIG_DEBUG_RODATA_TEST is not set
-CONFIG_ARCH_HAS_DEBUG_WX=y
-CONFIG_DEBUG_WX=y
-CONFIG_GENERIC_PTDUMP=y
-CONFIG_PTDUMP_CORE=y
-# CONFIG_PTDUMP_DEBUGFS is not set
-# CONFIG_DEBUG_OBJECTS is not set
-# CONFIG_SLUB_DEBUG_ON is not set
-# CONFIG_SLUB_STATS is not set
-CONFIG_HAVE_DEBUG_KMEMLEAK=y
-# CONFIG_DEBUG_KMEMLEAK is not set
-# CONFIG_DEBUG_STACK_USAGE is not set
-CONFIG_SCHED_STACK_END_CHECK=y
-CONFIG_ARCH_HAS_DEBUG_VM_PGTABLE=y
-# CONFIG_DEBUG_VM is not set
-# CONFIG_DEBUG_VM_PGTABLE is not set
-CONFIG_ARCH_HAS_DEBUG_VIRTUAL=y
-# CONFIG_DEBUG_VIRTUAL is not set
-CONFIG_DEBUG_MEMORY_INIT=y
-# CONFIG_DEBUG_PER_CPU_MAPS is not set
-CONFIG_HAVE_ARCH_KASAN=y
-CONFIG_HAVE_ARCH_KASAN_VMALLOC=y
-CONFIG_CC_HAS_KASAN_GENERIC=y
-CONFIG_CC_HAS_WORKING_NOSANITIZE_ADDRESS=y
-# CONFIG_KASAN is not set
-# end of Memory Debugging
-
-# CONFIG_DEBUG_SHIRQ is not set
-
-#
-# Debug Oops, Lockups and Hangs
-#
-# CONFIG_PANIC_ON_OOPS is not set
-CONFIG_PANIC_ON_OOPS_VALUE=0
-CONFIG_PANIC_TIMEOUT=0
-CONFIG_LOCKUP_DETECTOR=y
-CONFIG_SOFTLOCKUP_DETECTOR=y
-# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
-CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
-CONFIG_HARDLOCKUP_DETECTOR_PERF=y
-CONFIG_HARDLOCKUP_CHECK_TIMESTAMP=y
-CONFIG_HARDLOCKUP_DETECTOR=y
-# CONFIG_BOOTPARAM_HARDLOCKUP_PANIC is not set
-CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE=0
-CONFIG_DETECT_HUNG_TASK=y
-CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=120
-# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
-CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
-# CONFIG_WQ_WATCHDOG is not set
-# CONFIG_TEST_LOCKUP is not set
-# end of Debug Oops, Lockups and Hangs
-
-#
-# Scheduler Debugging
-#
-CONFIG_SCHED_DEBUG=y
-CONFIG_SCHED_INFO=y
-CONFIG_SCHEDSTATS=y
-# end of Scheduler Debugging
-
-# CONFIG_DEBUG_TIMEKEEPING is not set
-CONFIG_DEBUG_PREEMPT=y
-
-#
-# Lock Debugging (spinlocks, mutexes, etc...)
-#
-CONFIG_LOCK_DEBUGGING_SUPPORT=y
-# CONFIG_PROVE_LOCKING is not set
-# CONFIG_LOCK_STAT is not set
-# CONFIG_DEBUG_RT_MUTEXES is not set
-# CONFIG_DEBUG_SPINLOCK is not set
-# CONFIG_DEBUG_MUTEXES is not set
-# CONFIG_DEBUG_WW_MUTEX_SLOWPATH is not set
-# CONFIG_DEBUG_RWSEMS is not set
-# CONFIG_DEBUG_LOCK_ALLOC is not set
-# CONFIG_DEBUG_ATOMIC_SLEEP is not set
-# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
-# CONFIG_LOCK_TORTURE_TEST is not set
-# CONFIG_WW_MUTEX_SELFTEST is not set
-# CONFIG_SCF_TORTURE_TEST is not set
-# CONFIG_CSD_LOCK_WAIT_DEBUG is not set
-# end of Lock Debugging (spinlocks, mutexes, etc...)
-
-CONFIG_STACKTRACE=y
-# CONFIG_WARN_ALL_UNSEEDED_RANDOM is not set
-# CONFIG_DEBUG_KOBJECT is not set
-
-#
-# Debug kernel data structures
-#
-# CONFIG_DEBUG_LIST is not set
-# CONFIG_DEBUG_PLIST is not set
-# CONFIG_DEBUG_SG is not set
-# CONFIG_DEBUG_NOTIFIERS is not set
-# CONFIG_BUG_ON_DATA_CORRUPTION is not set
-# end of Debug kernel data structures
-
-# CONFIG_DEBUG_CREDENTIALS is not set
-
-#
-# RCU Debugging
-#
-# CONFIG_RCU_SCALE_TEST is not set
-# CONFIG_RCU_TORTURE_TEST is not set
-# CONFIG_RCU_REF_SCALE_TEST is not set
-CONFIG_RCU_CPU_STALL_TIMEOUT=60
-# CONFIG_RCU_TRACE is not set
-# CONFIG_RCU_EQS_DEBUG is not set
-# CONFIG_RCU_STRICT_GRACE_PERIOD is not set
-# end of RCU Debugging
-
-# CONFIG_DEBUG_WQ_FORCE_RR_CPU is not set
-# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
-# CONFIG_CPU_HOTPLUG_STATE_CONTROL is not set
-CONFIG_LATENCYTOP=y
-CONFIG_USER_STACKTRACE_SUPPORT=y
-CONFIG_NOP_TRACER=y
-CONFIG_HAVE_FUNCTION_TRACER=y
-CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
-CONFIG_HAVE_DYNAMIC_FTRACE=y
-CONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS=y
-CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS=y
-CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
-CONFIG_HAVE_SYSCALL_TRACEPOINTS=y
-CONFIG_HAVE_FENTRY=y
-CONFIG_HAVE_C_RECORDMCOUNT=y
-CONFIG_TRACER_MAX_TRACE=y
-CONFIG_TRACE_CLOCK=y
-CONFIG_RING_BUFFER=y
-CONFIG_EVENT_TRACING=y
-CONFIG_CONTEXT_SWITCH_TRACER=y
-CONFIG_RING_BUFFER_ALLOW_SWAP=y
-CONFIG_TRACING=y
-CONFIG_GENERIC_TRACER=y
-CONFIG_TRACING_SUPPORT=y
-CONFIG_FTRACE=y
-# CONFIG_BOOTTIME_TRACING is not set
-CONFIG_FUNCTION_TRACER=y
-CONFIG_FUNCTION_GRAPH_TRACER=y
-CONFIG_DYNAMIC_FTRACE=y
-CONFIG_DYNAMIC_FTRACE_WITH_REGS=y
-CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS=y
-CONFIG_FUNCTION_PROFILER=y
-CONFIG_STACK_TRACER=y
-# CONFIG_IRQSOFF_TRACER is not set
-# CONFIG_PREEMPT_TRACER is not set
-CONFIG_SCHED_TRACER=y
-CONFIG_HWLAT_TRACER=y
-CONFIG_MMIOTRACE=y
-CONFIG_FTRACE_SYSCALLS=y
-CONFIG_TRACER_SNAPSHOT=y
-# CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP is not set
-CONFIG_BRANCH_PROFILE_NONE=y
-# CONFIG_PROFILE_ANNOTATED_BRANCHES is not set
-CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_KPROBE_EVENTS=y
-# CONFIG_KPROBE_EVENTS_ON_NOTRACE is not set
-CONFIG_UPROBE_EVENTS=y
-CONFIG_BPF_EVENTS=y
-CONFIG_DYNAMIC_EVENTS=y
-CONFIG_PROBE_EVENTS=y
-CONFIG_BPF_KPROBE_OVERRIDE=y
-CONFIG_FTRACE_MCOUNT_RECORD=y
-CONFIG_TRACING_MAP=y
-CONFIG_SYNTH_EVENTS=y
-CONFIG_HIST_TRIGGERS=y
-# CONFIG_TRACE_EVENT_INJECT is not set
-# CONFIG_TRACEPOINT_BENCHMARK is not set
-# CONFIG_RING_BUFFER_BENCHMARK is not set
-# CONFIG_TRACE_EVAL_MAP_FILE is not set
-# CONFIG_FTRACE_STARTUP_TEST is not set
-# CONFIG_RING_BUFFER_STARTUP_TEST is not set
-# CONFIG_MMIOTRACE_TEST is not set
-# CONFIG_PREEMPTIRQ_DELAY_TEST is not set
-# CONFIG_SYNTH_EVENT_GEN_TEST is not set
-# CONFIG_KPROBE_EVENT_GEN_TEST is not set
-# CONFIG_HIST_TRIGGERS_DEBUG is not set
-# CONFIG_PROVIDE_OHCI1394_DMA_INIT is not set
-# CONFIG_SAMPLES is not set
-CONFIG_ARCH_HAS_DEVMEM_IS_ALLOWED=y
-CONFIG_STRICT_DEVMEM=y
-CONFIG_IO_STRICT_DEVMEM=y
-
-#
-# x86 Debugging
-#
-CONFIG_TRACE_IRQFLAGS_SUPPORT=y
-CONFIG_TRACE_IRQFLAGS_NMI_SUPPORT=y
-# CONFIG_X86_VERBOSE_BOOTUP is not set
-CONFIG_EARLY_PRINTK=y
-# CONFIG_EARLY_PRINTK_DBGP is not set
-# CONFIG_EARLY_PRINTK_USB_XDBC is not set
-# CONFIG_EFI_PGT_DUMP is not set
-# CONFIG_DEBUG_TLBFLUSH is not set
-# CONFIG_IOMMU_DEBUG is not set
-CONFIG_HAVE_MMIOTRACE_SUPPORT=y
-# CONFIG_X86_DECODER_SELFTEST is not set
-CONFIG_IO_DELAY_0X80=y
-# CONFIG_IO_DELAY_0XED is not set
-# CONFIG_IO_DELAY_UDELAY is not set
-# CONFIG_IO_DELAY_NONE is not set
-CONFIG_DEBUG_BOOT_PARAMS=y
-# CONFIG_CPA_DEBUG is not set
-# CONFIG_DEBUG_ENTRY is not set
-# CONFIG_DEBUG_NMI_SELFTEST is not set
-# CONFIG_X86_DEBUG_FPU is not set
-# CONFIG_PUNIT_ATOM_DEBUG is not set
-CONFIG_UNWINDER_ORC=y
-# CONFIG_UNWINDER_FRAME_POINTER is not set
-# CONFIG_UNWINDER_GUESS is not set
-# end of x86 Debugging
-
-#
-# Kernel Testing and Coverage
-#
-# CONFIG_KUNIT is not set
-# CONFIG_NOTIFIER_ERROR_INJECTION is not set
-CONFIG_FUNCTION_ERROR_INJECTION=y
-# CONFIG_FAULT_INJECTION is not set
-CONFIG_ARCH_HAS_KCOV=y
-CONFIG_CC_HAS_SANCOV_TRACE_PC=y
-# CONFIG_KCOV is not set
-CONFIG_RUNTIME_TESTING_MENU=y
-CONFIG_LKDTM=m
-# CONFIG_TEST_LIST_SORT is not set
-# CONFIG_TEST_MIN_HEAP is not set
-# CONFIG_TEST_SORT is not set
-# CONFIG_KPROBES_SANITY_TEST is not set
-# CONFIG_BACKTRACE_SELF_TEST is not set
-# CONFIG_RBTREE_TEST is not set
-# CONFIG_REED_SOLOMON_TEST is not set
-# CONFIG_INTERVAL_TREE_TEST is not set
-# CONFIG_PERCPU_TEST is not set
-# CONFIG_ATOMIC64_SELFTEST is not set
-# CONFIG_ASYNC_RAID6_TEST is not set
-# CONFIG_TEST_HEXDUMP is not set
-# CONFIG_TEST_STRING_HELPERS is not set
-# CONFIG_TEST_STRSCPY is not set
-# CONFIG_TEST_KSTRTOX is not set
-# CONFIG_TEST_PRINTF is not set
-# CONFIG_TEST_BITMAP is not set
-# CONFIG_TEST_UUID is not set
-# CONFIG_TEST_XARRAY is not set
-# CONFIG_TEST_OVERFLOW is not set
-# CONFIG_TEST_RHASHTABLE is not set
-# CONFIG_TEST_HASH is not set
-# CONFIG_TEST_IDA is not set
-# CONFIG_TEST_PARMAN is not set
-# CONFIG_TEST_LKM is not set
-# CONFIG_TEST_BITOPS is not set
-# CONFIG_TEST_VMALLOC is not set
-# CONFIG_TEST_USER_COPY is not set
-# CONFIG_TEST_BPF is not set
-# CONFIG_TEST_BLACKHOLE_DEV is not set
-# CONFIG_FIND_BIT_BENCHMARK is not set
-# CONFIG_TEST_FIRMWARE is not set
-# CONFIG_TEST_SYSCTL is not set
-# CONFIG_TEST_UDELAY is not set
-# CONFIG_TEST_STATIC_KEYS is not set
-# CONFIG_TEST_KMOD is not set
-# CONFIG_TEST_MEMCAT_P is not set
-# CONFIG_TEST_OBJAGG is not set
-# CONFIG_TEST_STACKINIT is not set
-# CONFIG_TEST_MEMINIT is not set
-# CONFIG_TEST_HMM is not set
-# CONFIG_TEST_FREE_PAGES is not set
-# CONFIG_TEST_FPU is not set
-# CONFIG_MEMTEST is not set
-# CONFIG_HYPERV_TESTING is not set
-# end of Kernel Testing and Coverage
-# end of Kernel hacking
diff --git a/linux510-rc-tkg/linux510-tkg-config/generic-desktop-profile.cfg b/linux510-rc-tkg/linux510-tkg-config/generic-desktop-profile.cfg
deleted file mode 100644
index 4e0af37..0000000
--- a/linux510-rc-tkg/linux510-tkg-config/generic-desktop-profile.cfg
+++ /dev/null
@@ -1,35 +0,0 @@
-# linux510-TkG config file
-# Generic Desktop
-
-
-#### KERNEL OPTIONS ####
-
-# Disable some non-module debugging - See PKGBUILD for the list
-_debugdisable="false"
-
-# LEAVE AN EMPTY VALUE TO BE PROMPTED ABOUT FOLLOWING OPTIONS AT BUILD TIME
-
-# Set to "true" to disable FUNCTION_TRACER/GRAPH_TRACER, lowering overhead but limiting debugging and analyzing of kernel functions - Kernel default is "false"
-_ftracedisable="false"
-
-# Set to "true" to disable NUMA, lowering overhead, but breaking CUDA/NvEnc on Nvidia equipped systems - Kernel default is "false"
-_numadisable="false"
-
-# Set to "true" to use explicit preemption points to lower latency at the cost of a small throughput loss - Can give a nice perf boost in VMs - Kernel default is "false"
-_voluntary_preempt="false"
-
-# A selection of patches from Zen/Liquorix kernel and additional tweaks for a better gaming experience (ZENIFY) - Default is "true"
-_zenify="true"
-
-# compiler optimization level - 1. Optimize for performance (-O2); 2. Optimize harder (-O3); 3. Optimize for size (-Os) - Kernel default is "2"
-_compileroptlevel="1"
-
-# Trust the CPU manufacturer to initialize Linux's CRNG (RANDOM_TRUST_CPU) - Kernel default is "false"
-_random_trust_cpu="false"
-
-# CPU scheduler runqueue sharing - No sharing (RQ_NONE), SMT (hyperthread) siblings (RQ_SMT), Multicore siblings (RQ_MC), Symmetric Multi-Processing (RQ_SMP), NUMA (RQ_ALL)
-# Valid values are "none", "smt", "mc", "mc-llc"(for zen), "smp", "all" - Kernel default is "mc"
-_runqueue_sharing="mc"
-
-# Timer frequency - "500", "750" or "1000" - More options available in kernel config prompt when left empty depending on selected cpusched - Kernel default is "750"
-_timer_freq="500"
diff --git a/linux510-rc-tkg/linux510-tkg-config/prepare b/linux510-rc-tkg/linux510-tkg-config/prepare
deleted file mode 100644
index 5153c7a..0000000
--- a/linux510-rc-tkg/linux510-tkg-config/prepare
+++ /dev/null
@@ -1,991 +0,0 @@
-#!/bin/bash
-
-_basever=510
-_basekernel=5.10
-_sub=rc1
-
-_tkg_initscript() {
-
-  cp "$_where"/linux"$_basever"-tkg-patches/* "$_where" # copy patches inside the PKGBUILD's dir to preserve makepkg sourcing and md5sum checking
-  cp "$_where"/linux"$_basever"-tkg-config/* "$_where" # copy config files and hooks inside the PKGBUILD's dir to preserve makepkg sourcing and md5sum checking
-
-  # Load external configuration file if present. Available variable values will overwrite customization.cfg ones.
-  if [ -e "$_EXT_CONFIG_PATH" ]; then
-    source "$_EXT_CONFIG_PATH" && msg2 "External configuration file $_EXT_CONFIG_PATH will be used to override customization.cfg values." && msg2 ""
-  fi
-
-  if [ -z "$_OPTIPROFILE" ] && [ ! -e "$_where"/cpuschedset ]; then
-    # Prompt about optimized configurations. Available variable values will overwrite customization.cfg/external config ones.
-    plain "Do you want to use a predefined optimized profile?"
-    read -rp "`echo $'  > 1.Custom\n    2.Ryzen Desktop (Performance)\n    3.Other Desktop (Performance)\nchoice[1-3?]: '`" _OPTIPROFILE;
-  fi
-  if [ "$_OPTIPROFILE" = "2" ]; then
-    source "$_where"/ryzen-desktop-profile.cfg && msg2 "Ryzen Desktop (Performance) profile will be used." && msg2 ""
-  elif [ "$_OPTIPROFILE" = "3" ]; then
-    source "$_where"/generic-desktop-profile.cfg && msg2 "Generic Desktop (Performance) profile will be used." && msg2 ""
-  fi
-
-  # source cpuschedset early if present
-  if [ -e "$_where"/cpuschedset ]; then
-    source "$_where"/cpuschedset
-  fi
-
-  # CPU SCHED selector
-  if [ -z "$_cpusched" ] && [ ! -e "$_where"/cpuschedset ]; then
-    plain "What CPU sched variant do you want to build/install?"
-    read -rp "`echo $'  > 1.Project C / PDS\n    2.Project C / BMQ\n    3.MuQSS\n    4.CFS\nchoice[1-4?]: '`" CONDITION;
-    if [ "$CONDITION" = "2" ]; then
-      echo "_cpusched=\"bmq\"" > "$_where"/cpuschedset
-    elif [ "$CONDITION" = "3" ]; then
-      echo "_cpusched=\"MuQSS\"" > "$_where"/cpuschedset
-    elif [ "$CONDITION" = "4" ]; then
-      echo "_cpusched=\"cfs\"" > "$_where"/cpuschedset
-    else
-      echo "_cpusched=\"pds\"" > "$_where"/cpuschedset
-    fi
-    if [ -n "$_custom_pkgbase" ]; then
-      echo "_custom_pkgbase=\"${_custom_pkgbase}\"" >> "$_where"/cpuschedset
-    fi
-  elif [ "$_cpusched" = "muqss" ] || [ "$_cpusched" = "MuQSS" ]; then
-    echo "_cpusched=\"MuQSS\"" > "$_where"/cpuschedset
-  elif [ "$_cpusched" = "pds" ]; then
-    echo "_cpusched=\"pds\"" > "$_where"/cpuschedset
-  elif [ "$_cpusched" = "cfs" ]; then
-    echo "_cpusched=\"cfs\"" > "$_where"/cpuschedset
-  elif [ "$_cpusched" = "bmq" ]; then
-    echo "_cpusched=\"bmq\"" > "$_where"/cpuschedset
-  else
-    if [ "$_nofallback" != "true" ]; then
-      warning "Something is wrong with your cpusched selection. Do you want to fallback to CFS (default)?"
-      read -rp "`echo $'    > N/y : '`" _fallback;
-    fi
-    if [[ "$_fallback" =~ [yY] ]] || [ "$_nofallback" = "true" ]; then
-      echo "_cpusched=\"cfs\"" > "$_where"/cpuschedset
-    else
-      error "Exiting..."
-      exit 1
-    fi
-  fi
-
-  source "$_where"/cpuschedset
-}
-
-user_patcher() {
-	# To patch the user because all your base are belong to us
-	local _patches=("$_where"/*."${_userpatch_ext}revert")
-	if [ ${#_patches[@]} -ge 2 ] || [ -e "${_patches}" ]; then
-	  if [ "$_user_patches_no_confirm" != "true" ]; then
-	    msg2 "Found ${#_patches[@]} 'to revert' userpatches for ${_userpatch_target}:"
-	    printf '%s\n' "${_patches[@]}"
-	    read -rp "Do you want to install it/them? - Be careful with that ;)"$'\n> N/y : ' _CONDITION;
-	  fi
-	  if [[ "$_CONDITION" =~ [yY] ]] || [ "$_user_patches_no_confirm" = "true" ]; then
-	    for _f in "${_patches[@]}"; do
-	      if [ -e "${_f}" ]; then
-	        msg2 "######################################################"
-	        msg2 ""
-	        msg2 "Reverting your own ${_userpatch_target} patch ${_f}"
-	        msg2 ""
-	        msg2 "######################################################"
-	        patch -Np1 -R < "${_f}"
-	        echo "Reverted your own patch ${_f}" >> "$_where"/last_build_config.log
-	      fi
-	    done
-	  fi
-	fi
-
-	_patches=("$_where"/*."${_userpatch_ext}patch")
-	if [ ${#_patches[@]} -ge 2 ] || [ -e "${_patches}" ]; then
-	  if [ "$_user_patches_no_confirm" != "true" ]; then
-	    msg2 "Found ${#_patches[@]} userpatches for ${_userpatch_target}:"
-	    printf '%s\n' "${_patches[@]}"
-	    read -rp "Do you want to install it/them? - Be careful with that ;)"$'\n> N/y : ' _CONDITION;
-	  fi
-	  if [[ "$_CONDITION" =~ [yY] ]] || [ "$_user_patches_no_confirm" = "true" ]; then
-	    for _f in "${_patches[@]}"; do
-	      if [ -e "${_f}" ]; then
-	        msg2 "######################################################"
-	        msg2 ""
-	        msg2 "Applying your own ${_userpatch_target} patch ${_f}"
-	        msg2 ""
-	        msg2 "######################################################"
-	        patch -Np1 < "${_f}"
-	        echo "Applied your own patch ${_f}" >> "$_where"/last_build_config.log
-	      fi
-	    done
-	  fi
-	fi
-}
-
-_tkg_srcprep() {
-
-  if [ "${_distro}" = "Arch" ]; then
-    msg2 "Setting version..."
-    scripts/setlocalversion --save-scmversion
-    echo "-$pkgrel-tkg-${_cpusched}" > localversion.10-pkgrel
-    echo "" > localversion.20-pkgname
-
-    # add upstream patch
-    #msg2 "Patching from $_basekernel to $pkgver"
-    #patch -p1 -i "$srcdir"/patch-"${pkgver}"
-
-    # ARCH Patches
-    if [ "${_configfile}" = "config_hardened.x86_64" ] && [ "${_cpusched}" = "cfs" ]; then
-      msg2 "Using linux hardened patchset"
-      patch -Np1 -i "$srcdir"/0012-linux-hardened.patch
-    else
-      patch -Np1 -i "$srcdir"/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch
-    fi
-  fi
-
-  # graysky's cpu opts - https://github.com/graysky2/kernel_gcc_patch
-  msg2 "Applying graysky's cpu opts patch"
-  if [ "${_distro}" = "Arch" ]; then
-    patch -Np1 -i "$srcdir"/enable_additional_cpu_optimizations_for_gcc_v10.1%2B_kernel_v5.8%2B.patch
-  else
-    patch -Np1 -i "$srcdir"/enable_additional_cpu_optimizations_for_gcc_v10.1+_kernel_v5.8+.patch
-  fi
-
-  # TkG
-  msg2 "Applying clear linux patches"
-  patch -Np1 -i "$srcdir"/0002-clear-patches.patch
-
-  msg2 "Applying glitched base patch"
-  patch -Np1 -i "$srcdir"/0003-glitched-base.patch
-
-  if [ -z $_misc_adds ]; then
-    plain "Enable misc additions ? May contain temporary fixes pending upstream or changes that can break on non-Arch. "
-    read -rp "`echo $'    > [Y]/n : '`" _interactive_misc_adds;
-    if [ "$_interactive_misc_adds" != "n" ] && [ "$_interactive_misc_adds" != "N" ]; then
-      _misc_adds="true"
-    fi
-  fi
-
-  if [ "$_misc_adds" = "true" ]; then
-    msg2 "Applying misc additions patch"
-    patch -Np1 -i "$srcdir"/0012-misc-additions.patch
-  fi
-
-  if [ "${_cpusched}" = "MuQSS" ]; then
-    # MuQSS
-    msg2 "Applying MuQSS base patch"
-    patch -Np1 -i "$srcdir"/0004-5.10-ck1.patch
-
-    if [ "${_aggressive_ondemand}" = "true" ]; then
-      msg2 "Applying MuQSS agressive ondemand governor patch"
-      patch -Np1 -i "$srcdir"/0004-glitched-ondemand-muqss.patch
-    fi
-
-    msg2 "Applying Glitched MuQSS patch"
-    patch -Np1 -i "$srcdir"/0004-glitched-muqss.patch
-
-  elif [ "${_cpusched}" = "pds" ]; then
-    # PDS-mq
-    msg2 "Applying PDS base patch"
-    patch -Np1 -i "$srcdir"/0009-prjc_v5.10-r0.patch
-
-    if [ "${_aggressive_ondemand}" = "true" ]; then
-      msg2 "Applying PDS agressive ondemand governor patch"
-      patch -Np1 -i "$srcdir"/0009-glitched-ondemand-bmq.patch
-    fi
-
-    msg2 "Applying Glitched PDS patch"
-    patch -Np1 -i "$srcdir"/0005-glitched-pds.patch
-
-  elif [ "${_cpusched}" = "bmq" ]; then
-    # Project C / BMQ
-    msg2 "Applying Project C / BMQ base patch"
-
-    patch -Np1 -i "$srcdir"/0009-prjc_v5.10-r0.patch
-
-    if [ "${_aggressive_ondemand}" = "true" ]; then
-      msg2 "Applying BMQ agressive ondemand governor patch"
-      patch -Np1 -i "$srcdir"/0009-glitched-ondemand-bmq.patch
-    fi
-
-    msg2 "Applying Glitched BMQ patch"
-    patch -Np1 -i "$srcdir"/0009-glitched-bmq.patch
-
-  elif [ "${_cpusched}" = "cfs" ]; then
-    msg2 "Applying Glitched CFS patch"
-    patch -Np1 -i "$srcdir"/0003-glitched-cfs.patch
-  fi
-
-  if [ "${_distro}" = "Arch" ]; then
-    if [ -z "${_configfile}" ]; then
-    _configfile="config.x86_64"
-    fi
-
-    cat "${srcdir}/${_configfile}" > ./.config
-  fi
-
-
-  # Set some -tkg defaults
-  echo "# CONFIG_DYNAMIC_FAULT is not set" >> ./.config
-  sed -i -e 's/CONFIG_DEFAULT_FQ_CODEL=y/# CONFIG_DEFAULT_FQ_CODEL is not set/' ./.config
-  echo "CONFIG_DEFAULT_CAKE=y" >> ./.config
-  echo "CONFIG_NR_TTY_DEVICES=63" >> ./.config
-  echo "# CONFIG_NTP_PPS is not set" >> ./.config
-  sed -i -e 's/CONFIG_CRYPTO_LZ4=m/CONFIG_CRYPTO_LZ4=y/' ./.config
-  sed -i -e 's/CONFIG_CRYPTO_LZ4HC=m/CONFIG_CRYPTO_LZ4HC=y/' ./.config
-  sed -i -e 's/CONFIG_LZ4_COMPRESS=m/CONFIG_LZ4_COMPRESS=y/' ./.config
-  sed -i -e 's/CONFIG_LZ4HC_COMPRESS=m/CONFIG_LZ4HC_COMPRESS=y/' ./.config
-  sed -i -e 's/CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZO=y/# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZO is not set/' ./.config
-  sed -i -e 's/# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZ4 is not set/CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZ4=y/' ./.config
-  sed -i -e 's/CONFIG_ZSWAP_COMPRESSOR_DEFAULT="lzo"/CONFIG_ZSWAP_COMPRESSOR_DEFAULT="lz4"/' ./.config
-  sed -i -e 's/CONFIG_RCU_BOOST_DELAY=500/CONFIG_RCU_BOOST_DELAY=0/' ./.config
-  sed -i -e 's/# CONFIG_CMDLINE_BOOL is not set/CONFIG_CMDLINE_BOOL=y/' ./.config
-  echo "CONFIG_CMDLINE=\"${_custom_commandline}\"" >> ./.config
-  echo "# CONFIG_CMDLINE_OVERRIDE is not set" >> ./.config
-  echo "# CONFIG_X86_P6_NOP is not set" >> ./.config
-
-  # openrgb
-  echo "CONFIG_I2C_NCT6775=m" >> ./.config
-
-  # ccache fix
-  if [ "$_noccache" != "true" ]; then
-    if { [ "$_distro" = "Arch" ] && pacman -Qq ccache &> /dev/null; } || { [ "$_distro" = "Ubuntu" ] && dpkg -l ccache > /dev/null; }; then
-      sed -i -e 's/CONFIG_GCC_PLUGINS=y/# CONFIG_GCC_PLUGINS is not set/' ./.config
-    fi
-  fi
-  # Skip dbg package creation on non-Arch
-  if [ "$_distro" != "Arch" ]; then
-    sed -i -e 's/CONFIG_DEBUG_INFO.*/CONFIG_DEBUG_INFO=n/' ./.config
-  fi
-
-  if [ "$_font_autoselect" != "false" ]; then
-    sed -i -e 's/CONFIG_FONT_TER16x32=y/# CONFIG_FONT_TER16x32 is not set\nCONFIG_FONT_AUTOSELECT=y/' ./.config
-  fi
-
-  # Inject cpuopts options
-  echo "# CONFIG_MK8SSE3 is not set" >> ./.config
-  echo "# CONFIG_MK10 is not set" >> ./.config
-  echo "# CONFIG_MBARCELONA is not set" >> ./.config
-  echo "# CONFIG_MBOBCAT is not set" >> ./.config
-  echo "# CONFIG_MJAGUAR is not set" >> ./.config
-  echo "# CONFIG_MBULLDOZER is not set" >> ./.config
-  echo "# CONFIG_MPILEDRIVER is not set" >> ./.config
-  echo "# CONFIG_MSTEAMROLLER is not set" >> ./.config
-  echo "# CONFIG_MEXCAVATOR is not set" >> ./.config
-  echo "# CONFIG_MZEN is not set" >> ./.config
-  echo "# CONFIG_MZEN2 is not set" >> ./.config
-  echo "# CONFIG_MATOM is not set" >> ./.config
-  echo "# CONFIG_MNEHALEM is not set" >> ./.config
-  echo "# CONFIG_MWESTMERE is not set" >> ./.config
-  echo "# CONFIG_MSILVERMONT is not set" >> ./.config
-  echo "# CONFIG_MSANDYBRIDGE is not set" >> ./.config
-  echo "# CONFIG_MIVYBRIDGE is not set" >> ./.config
-  echo "# CONFIG_MHASWELL is not set" >> ./.config
-  echo "# CONFIG_MBROADWELL is not set" >> ./.config
-  echo "# CONFIG_MSKYLAKE is not set" >> ./.config
-  echo "# CONFIG_MSKYLAKEX is not set" >> ./.config
-  echo "# CONFIG_MCANNONLAKE is not set" >> ./.config
-  echo "# CONFIG_MICELAKE is not set" >> ./.config
-  echo "# CONFIG_MGOLDMONT is not set" >> ./.config
-  echo "# CONFIG_MGOLDMONTPLUS is not set" >> ./.config
-  echo "# CONFIG_MCASCADELAKE is not set" >> ./.config
-  echo "# CONFIG_MCOOPERLAKE is not set" >> ./.config
-  echo "# CONFIG_MTIGERLAKE is not set" >> ./.config
-
-  # Disable some debugging
-  if [ "${_debugdisable}" = "true" ]; then
-    sed -i -e 's/CONFIG_SLUB_DEBUG=y/# CONFIG_SLUB_DEBUG is not set/' ./.config
-    sed -i -e 's/CONFIG_PM_DEBUG=y/# CONFIG_PM_DEBUG is not set/' ./.config
-    sed -i -e 's/CONFIG_PM_ADVANCED_DEBUG=y/# CONFIG_PM_ADVANCED_DEBUG is not set/' ./.config
-    sed -i -e 's/CONFIG_PM_SLEEP_DEBUG=y/# CONFIG_PM_SLEEP_DEBUG is not set/' ./.config
-    sed -i -e 's/CONFIG_ACPI_DEBUG=y/# CONFIG_ACPI_DEBUG is not set/' ./.config
-    sed -i -e 's/CONFIG_SCHED_DEBUG=y/# CONFIG_SCHED_DEBUG is not set/' ./.config
-    sed -i -e 's/CONFIG_LATENCYTOP=y/# CONFIG_LATENCYTOP is not set/' ./.config
-    sed -i -e 's/CONFIG_DEBUG_PREEMPT=y/# CONFIG_DEBUG_PREEMPT is not set/' ./.config
-  fi
-
-  if [ "${_cpusched}" = "MuQSS" ]; then
-    # MuQSS default config
-    echo "CONFIG_SCHED_MUQSS=y" >> ./.config
-  elif [ "${_cpusched}" = "pds" ]; then
-    # PDS default config
-    echo "CONFIG_SCHED_ALT=y" >> ./.config
-    echo "CONFIG_SCHED_PDS=y" >> ./.config
-    echo "# CONFIG_SCHED_BMQ is not set" >> ./.config
-  elif [ "${_cpusched}" = "bmq" ]; then
-    # BMQ default config
-    echo "CONFIG_SCHED_ALT=y" >> ./.config
-    echo "CONFIG_SCHED_BMQ=y" >> ./.config
-    echo "# CONFIG_SCHED_PDS is not set" >> ./.config
-  fi
-
-  if [ "${_cpusched}" = "MuQSS" ] || [ "${_cpusched}" = "pds" ] || [ "${_cpusched}" = "bmq" ]; then
-    # Disable CFS
-    sed -i -e 's/CONFIG_FAIR_GROUP_SCHED=y/# CONFIG_FAIR_GROUP_SCHED is not set/' ./.config
-    sed -i -e 's/CONFIG_CFS_BANDWIDTH=y/# CONFIG_CFS_BANDWIDTH is not set/' ./.config
-    # sched yield type
-    if [ -n "$_sched_yield_type" ]; then
-      CONDITION0="$_sched_yield_type"
-    else
-      plain ""
-      plain "CPU sched_yield_type - Choose what sort of yield sched_yield will perform."
-      plain ""
-      plain "For PDS and MuQSS:"
-      plain "0: No yield."
-      plain "1: Yield only to better priority/deadline tasks."
-      plain "2: Expire timeslice and recalculate deadline."
-      plain ""
-      plain "For BMQ (experimental) - No recommended value yet, so try for yourself x) :"
-      plain "0: No yield."
-      plain "1: Deboost and requeue task. (default)"
-      plain "2: Set rq skip task."
-      if [ "${_cpusched}" = "MuQSS" ]; then
-        read -rp "`echo $'\n      0. Supposedly best option for gaming performance - could lead to stability issues on some (AMD) platforms when combined with MuQSS\n    > 1. Default and recommended option for MuQSS - could lead to stability issues on some (Intel) platforms\n      2. Can be a good option with low rr_interval on MuQSS\n    [0-2?]: '`" CONDITION0;
-      else
-        read -rp "`echo $'\n    > 0. Recommended option for gaming on PDS - "tkg" default\n      1. Default, but can lead to stability issues on some platforms\n      2. Can be a good option with low rr_interval on MuQSS\n    [0-2?]: '`" CONDITION0;
-      fi
-    fi
-    if [ "$CONDITION0" = "0" ]; then
-      if [ "${_cpusched}" = "bmq" ] || [ "${_cpusched}" = "pds" ]; then
-        sed -i -e 's/int sched_yield_type __read_mostly = 1;/int sched_yield_type __read_mostly = 0;/' ./kernel/sched/alt_core.c
-      else
-        sed -i -e 's/int sched_yield_type __read_mostly = 1;/int sched_yield_type __read_mostly = 0;/' ./kernel/sched/"${_cpusched}".c
-      fi
-    elif [ "$CONDITION0" = "1" ]; then
-      msg2 "Using default CPU sched yield type (1)"
-    elif [ "$CONDITION0" = "2" ]; then
-      if [ "${_cpusched}" = "bmq" ] || [ "${_cpusched}" = "pds" ]; then
-        sed -i -e 's/int sched_yield_type __read_mostly = 1;/int sched_yield_type __read_mostly = 2;/' ./kernel/sched/alt_core.c
-      else
-        sed -i -e 's/int sched_yield_type __read_mostly = 1;/int sched_yield_type __read_mostly = 2;/' ./kernel/sched/"${_cpusched}".c
-      fi
-    else
-      if [ "${_cpusched}" = "MuQSS" ]; then
-        msg2 "Using default CPU sched yield type (1)"
-      elif [ "${_cpusched}" = "bmq" ] || [ "${_cpusched}" = "pds" ]; then
-        sed -i -e 's/int sched_yield_type __read_mostly = 1;/int sched_yield_type __read_mostly = 0;/' ./kernel/sched/alt_core.c
-      else
-        sed -i -e 's/int sched_yield_type __read_mostly = 1;/int sched_yield_type __read_mostly = 0;/' ./kernel/sched/"${_cpusched}".c
-      fi
-    fi
-  fi
-
-  # Round Robin interval
-  if [ "${_cpusched}" = "MuQSS" ] || [ "${_cpusched}" = "pds" ] || [ "${_cpusched}" = "bmq" ]; then
-    if [ -n "$_rr_interval" ]; then
-      CONDITION1="$_rr_interval"
-    else
-      plain ""
-      plain "Round Robin interval is the longest duration two tasks with the same nice level will"
-      plain "be delayed for. When CPU time is requested by a task, it receives a time slice equal"
-      plain "to the rr_interval in addition to a virtual deadline. When using yield_type 2, a low"
-      plain "value can help offset the disadvantages of rescheduling a process that has yielded."
-      plain ""
-      plain "MuQSS default: 6ms"
-      plain "PDS default: 4ms"
-      plain "BMQ default: 2ms"
-      read -rp "`echo $'\n    > 0.Keep defaults\n      1.2ms\n      2.4ms\n      3.6ms\n      4.8ms\n    [0-4?]: '`" CONDITION1;
-    fi
-    if [ "$CONDITION1" = "1" ]; then
-      msg2 "Using 2ms rr_interval"
-      _rrvalue="2"
-    elif [ "$CONDITION1" = "2" ]; then
-      msg2 "Using 4ms rr_interval"
-      _rrvalue="4"
-    elif [ "$CONDITION1" = "3" ]; then
-      msg2 "Using 6ms rr_interval"
-      _rrvalue="6"
-    elif [ "$CONDITION1" = "4" ]; then
-      msg2 "Using 8ms rr_interval"
-      _rrvalue="8"
-    else
-      msg2 "Using default rr_interval"
-      _rrvalue="default"
-    fi
-    if [ "$_rrvalue" != "default" ]; then
-      if [ "${_cpusched}" = "MuQSS" ]; then
-        sed -i -e "s/int rr_interval __read_mostly = 6;/int rr_interval __read_mostly = ${_rrvalue};/" ./kernel/sched/"${_cpusched}".c
-      elif [ "${_cpusched}" = "bmq" ] || [ "${_cpusched}" = "pds" ]; then
-        sed -i -e "s/u64 sched_timeslice_ns __read_mostly = (4 * 1000 * 1000);/u64 sched_timeslice_ns __read_mostly = (${_rrvalue} * 1000 * 1000);/" ./kernel/sched/alt_core.c
-      fi
-    else
-      if [ "${_cpusched}" = "bmq" ] || [ "${_cpusched}" = "pds" ]; then
-        sed -i -e "s/u64 sched_timeslice_ns __read_mostly = (4 * 1000 * 1000);/u64 sched_timeslice_ns __read_mostly = (2 * 1000 * 1000);/" ./kernel/sched/alt_core.c
-      fi
-    fi
-  fi
-
-  # zenify
-  if [ "$_zenify" = "true" ]; then
-    echo "CONFIG_ZENIFY=y" >> ./.config
-  elif [ "$_zenify" = "false" ]; then
-    echo "# CONFIG_ZENIFY is not set" >> ./.config
-  fi
-
-  # compiler optimization level
-  if [ "$_compileroptlevel" = "1" ]; then
-    echo "# CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3 is not set" >> ./.config
-  elif [ "$_compileroptlevel" = "2" ]; then
-    sed -i -e 's/CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y/# CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE is not set/' ./.config
-    echo "CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y" >> ./.config
-  elif [ "$_compileroptlevel" = "3" ]; then
-    sed -i -e 's/CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y/# CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE is not set/' ./.config
-    sed -i -e 's/# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set/CONFIG_CC_OPTIMIZE_FOR_SIZE=y/' ./.config
-    echo "# CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3 is not set" >> ./.config
-  fi
-
-  # cpu opt
-  if [ -n "$_processor_opt" ] && [ "$_processor_opt" != "native" ]; then
-    echo "# CONFIG_MNATIVE is not set" >> ./.config
-  fi
-
-  if [ -n "$_processor_opt" ] && [ "$_processor_opt" != "generic" ]; then
-    sed -i -e 's/CONFIG_GENERIC_CPU=y/# CONFIG_GENERIC_CPU is not set/' ./.config
-  fi
-
-  if [ "$_processor_opt" = "native" ]; then
-    echo "CONFIG_MNATIVE=y" >> ./.config
-  elif [ "$_processor_opt" = "k8" ]; then
-    sed -i -e 's/# CONFIG_MK8 is not set/CONFIG_MK8=y/' ./.config
-  elif [ "$_processor_opt" = "k8sse3" ]; then
-    sed -i -e 's/# CONFIG_MK8SSE3 is not set/CONFIG_MK8SSE3=y/' ./.config
-  elif [ "$_processor_opt" = "k10" ]; then
-    sed -i -e 's/# CONFIG_MK10 is not set/CONFIG_MK10=y/' ./.config
-  elif [ "$_processor_opt" = "barcelona" ]; then
-    sed -i -e 's/# CONFIG_MBARCELONA is not set/CONFIG_MBARCELONA=y/' ./.config
-  elif [ "$_processor_opt" = "bobcat" ]; then
-    sed -i -e 's/# CONFIG_MBOBCAT is not set/CONFIG_MBOBCAT=y/' ./.config
-  elif [ "$_processor_opt" = "jaguar" ]; then
-    sed -i -e 's/# CONFIG_MJAGUAR is not set/CONFIG_MJAGUAR=y/' ./.config
-  elif [ "$_processor_opt" = "bulldozer" ]; then
-    sed -i -e 's/# CONFIG_MBULLDOZER is not set/CONFIG_MBULLDOZER=y/' ./.config
-  elif [ "$_processor_opt" = "piledriver" ]; then
-    sed -i -e 's/# CONFIG_MPILEDRIVER is not set/CONFIG_MPILEDRIVER=y/' ./.config
-  elif [ "$_processor_opt" = "steamroller" ]; then
-    sed -i -e 's/# CONFIG_MSTEAMROLLER is not set/CONFIG_MSTEAMROLLER=y/' ./.config
-  elif [ "$_processor_opt" = "excavator" ]; then
-    sed -i -e 's/# CONFIG_MEXCAVATOR is not set/CONFIG_MEXCAVATOR=y/' ./.config
-  elif [ "$_processor_opt" = "zen" ]; then
-    sed -i -e 's/# CONFIG_MZEN is not set/CONFIG_MZEN=y/' ./.config
-  elif [ "$_processor_opt" = "zen2" ]; then
-    sed -i -e 's/# CONFIG_MZEN2 is not set/CONFIG_MZEN2=y/' ./.config
-  elif [ "$_processor_opt" = "mpsc" ]; then
-    sed -i -e 's/# CONFIG_MPSC is not set/CONFIG_MPSC=y/' ./.config
-  elif [ "$_processor_opt" = "atom" ]; then
-    sed -i -e 's/# CONFIG_MATOM is not set/CONFIG_MATOM=y/' ./.config
-  elif [ "$_processor_opt" = "core2" ]; then
-    sed -i -e 's/# CONFIG_MCORE2 is not set/CONFIG_MCORE2=y/' ./.config
-  elif [ "$_processor_opt" = "nehalem" ]; then
-    sed -i -e 's/# CONFIG_MNEHALEM is not set/CONFIG_MNEHALEM=y/' ./.config
-  elif [ "$_processor_opt" = "westmere" ]; then
-    sed -i -e 's/# CONFIG_MWESTMERE is not set/CONFIG_MWESTMERE=y/' ./.config
-  elif [ "$_processor_opt" = "silvermont" ]; then
-    sed -i -e 's/# CONFIG_MSILVERMONT is not set/CONFIG_MSILVERMONT=y/' ./.config
-  elif [ "$_processor_opt" = "sandybridge" ]; then
-    sed -i -e 's/# CONFIG_MSANDYBRIDGE is not set/CONFIG_MSANDYBRIDGE=y/' ./.config
-  elif [ "$_processor_opt" = "ivybridge" ]; then
-    sed -i -e 's/# CONFIG_MIVYBRIDGE is not set/CONFIG_MIVYBRIDGE=y/' ./.config
-  elif [ "$_processor_opt" = "haswell" ]; then
-    sed -i -e 's/# CONFIG_MHASWELL is not set/CONFIG_MHASWELL=y/' ./.config
-  elif [ "$_processor_opt" = "broadwell" ]; then
-    sed -i -e 's/# CONFIG_MBROADWELL is not set/CONFIG_MBROADWELL=y/' ./.config
-  elif [ "$_processor_opt" = "skylake" ]; then
-    sed -i -e 's/# CONFIG_MSKYLAKE is not set/CONFIG_MSKYLAKE=y/' ./.config
-  elif [ "$_processor_opt" = "skylakex" ]; then
-    sed -i -e 's/# CONFIG_MSKYLAKEX is not set/CONFIG_MSKYLAKEX=y/' ./.config
-  elif [ "$_processor_opt" = "cannonlake" ]; then
-    sed -i -e 's/# CONFIG_MCANNONLAKE is not set/CONFIG_MCANNONLAKE=y/' ./.config
-  elif [ "$_processor_opt" = "icelake" ]; then
-    sed -i -e 's/# CONFIG_MICELAKE is not set/CONFIG_MICELAKE=y/' ./.config
-  elif [ "$_processor_opt" = "goldmont" ]; then
-    sed -i -e 's/# CONFIG_MGOLDMONT is not set/CONFIG_MGOLDMONT=y/' ./.config
-  elif [ "$_processor_opt" = "goldmontplus" ]; then
-    sed -i -e 's/# CONFIG_MGOLDMONTPLUS is not set/CONFIG_MGOLDMONTPLUS=y/' ./.config
-  elif [ "$_processor_opt" = "cascadelake" ]; then
-    sed -i -e 's/# CONFIG_MCASCADELAKE is not set/CONFIG_MCASCADELAKE=y/' ./.config
-  elif [ "$_processor_opt" = "cooperlake" ]; then
-    sed -i -e 's/# CONFIG_MCOOPERLAKE is not set/CONFIG_MCOOPERLAKE=y/' ./.config
-  elif [ "$_processor_opt" = "tigerlake" ]; then
-    sed -i -e 's/# CONFIG_MTIGERLAKE is not set/CONFIG_MTIGERLAKE=y/' ./.config
-  fi
-
-  # irq threading
-  if [ "$_irq_threading" = "true" ]; then
-    echo "CONFIG_FORCE_IRQ_THREADING=y" >> ./.config
-  elif [ "$_irq_threading" = "false" ]; then
-    echo "# CONFIG_FORCE_IRQ_THREADING is not set" >> ./.config
-  fi
-
-  # smt nice
-  if [ "$_smt_nice" = "true" ]; then
-    echo "CONFIG_SMT_NICE=y" >> ./.config
-  elif [ "$_smt_nice" = "false" ]; then
-    echo "# CONFIG_SMT_NICE is not set" >> ./.config
-  fi
-
-  # random trust cpu
-  if [ "$_random_trust_cpu" = "true" ]; then
-    sed -i -e 's/# CONFIG_RANDOM_TRUST_CPU is not set/CONFIG_RANDOM_TRUST_CPU=y/' ./.config
-  fi
-
-  # rq sharing
-  if [ "$_runqueue_sharing" = "none" ]; then
-    echo -e "CONFIG_RQ_NONE=y\n# CONFIG_RQ_SMT is not set\n# CONFIG_RQ_MC is not set\n# CONFIG_RQ_MC_LLC is not set\n# CONFIG_RQ_SMP is not set\n# CONFIG_RQ_ALL is not set" >> ./.config
-  elif [ -z "$_runqueue_sharing" ] || [ "$_runqueue_sharing" = "smt" ]; then
-    echo -e "# CONFIG_RQ_NONE is not set\nCONFIG_RQ_SMT=y\n# CONFIG_RQ_MC is not set\n# CONFIG_RQ_MC_LLC is not set\n# CONFIG_RQ_SMP is not set\n# CONFIG_RQ_ALL is not set" >> ./.config
-  elif [ "$_runqueue_sharing" = "mc" ]; then
-    echo -e "# CONFIG_RQ_NONE is not set\n# CONFIG_RQ_SMT is not set\nCONFIG_RQ_MC=y\n# CONFIG_RQ_MC_LLC is not set\n# CONFIG_RQ_SMP is not set\n# CONFIG_RQ_ALL is not set" >> ./.config
-  elif [ "$_runqueue_sharing" = "smp" ]; then
-    echo -e "# CONFIG_RQ_NONE is not set\n# CONFIG_RQ_SMT is not set\n# CONFIG_RQ_MC is not set\n# CONFIG_RQ_MC_LLC is not set\nCONFIG_RQ_SMP=y\n# CONFIG_RQ_ALL is not set" >> ./.config
-  elif [ "$_runqueue_sharing" = "all" ]; then
-    echo -e "# CONFIG_RQ_NONE is not set\n# CONFIG_RQ_SMT is not set\n# CONFIG_RQ_MC is not set\n# CONFIG_RQ_MC_LLC is not set\n# CONFIG_RQ_SMP is not set\nCONFIG_RQ_ALL=y" >> ./.config
-  elif [ "$_runqueue_sharing" = "mc-llc" ]; then
-    echo -e "# CONFIG_RQ_NONE is not set\n# CONFIG_RQ_SMT is not set\n# CONFIG_RQ_MC is not set\nCONFIG_RQ_MC_LLC=y\n# CONFIG_RQ_SMP is not set\n# CONFIG_RQ_ALL is not set" >> ./.config
-  fi
-
-  # timer freq
-  if [ -n "$_timer_freq" ] && [ "$_timer_freq" != "300" ]; then
-    sed -i -e 's/CONFIG_HZ_300=y/# CONFIG_HZ_300 is not set/' ./.config
-    sed -i -e 's/CONFIG_HZ_300_NODEF=y/# CONFIG_HZ_300_NODEF is not set/' ./.config
-    if [ "$_timer_freq" = "1000" ]; then
-      sed -i -e 's/# CONFIG_HZ_1000 is not set/CONFIG_HZ_1000=y/' ./.config
-      sed -i -e 's/CONFIG_HZ=300/CONFIG_HZ=1000/' ./.config
-      echo "# CONFIG_HZ_500 is not set" >> ./.config
-      echo "# CONFIG_HZ_500_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_750 is not set" >> ./.config
-      echo "# CONFIG_HZ_750_NODEF is not set" >> ./.config
-      echo "CONFIG_HZ_1000_NODEF=y" >> ./.config
-      echo "# CONFIG_HZ_250_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_300_NODEF is not set" >> ./.config
-    elif [ "$_timer_freq" = "750" ]; then
-      sed -i -e 's/CONFIG_HZ=300/CONFIG_HZ=750/' ./.config
-      echo "# CONFIG_HZ_500 is not set" >> ./.config
-      echo "# CONFIG_HZ_500_NODEF is not set" >> ./.config
-      echo "CONFIG_HZ_750=y" >> ./.config
-      echo "CONFIG_HZ_750_NODEF=y" >> ./.config
-      echo "# CONFIG_HZ_1000_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_250_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_300_NODEF is not set" >> ./.config
-    elif [ "$_timer_freq" = "500" ]; then
-      sed -i -e 's/CONFIG_HZ=300/CONFIG_HZ=500/' ./.config
-      echo "CONFIG_HZ_500=y" >> ./.config
-      echo "CONFIG_HZ_500_NODEF=y" >> ./.config
-      echo "# CONFIG_HZ_750 is not set" >> ./.config
-      echo "# CONFIG_HZ_750_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_1000_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_250_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_300_NODEF is not set" >> ./.config
-    elif [ "$_timer_freq" = "100" ]; then
-      sed -i -e 's/CONFIG_HZ=300/CONFIG_HZ=100/' ./.config
-      echo "# CONFIG_HZ_500 is not set" >> ./.config
-      echo "# CONFIG_HZ_750 is not set" >> ./.config
-      echo "# CONFIG_HZ_1000_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_750_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_500_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_250_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_300_NODEF is not set" >> ./.config
-      echo "CONFIG_HZ_100=y" >> ./.config
-      echo "CONFIG_HZ_100_NODEF=y" >> ./.config
-    fi
-  elif [ "${_cpusched}" = "MuQSS" ] && [ -z "$_timer_freq" ]; then
-      sed -i -e 's/CONFIG_HZ=300/CONFIG_HZ=100/' ./.config
-      echo "# CONFIG_HZ_500 is not set" >> ./.config
-      echo "# CONFIG_HZ_750 is not set" >> ./.config
-      echo "# CONFIG_HZ_1000_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_750_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_500_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_250_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_300_NODEF is not set" >> ./.config
-      echo "CONFIG_HZ_100=y" >> ./.config
-      echo "CONFIG_HZ_100_NODEF=y" >> ./.config
-  else
-    sed -i -e 's/CONFIG_HZ_300=y/# CONFIG_HZ_300 is not set/' ./.config
-    sed -i -e 's/CONFIG_HZ_300_NODEF=y/# CONFIG_HZ_300_NODEF is not set/' ./.config
-    sed -i -e 's/CONFIG_HZ=300/CONFIG_HZ=500/' ./.config
-    echo "CONFIG_HZ_500=y" >> ./.config
-    echo "CONFIG_HZ_500_NODEF=y" >> ./.config
-    echo "# CONFIG_HZ_250_NODEF is not set" >> ./.config
-    echo "# CONFIG_HZ_300_NODEF is not set" >> ./.config
-  fi
-
-  # default cpu gov
-  if [ "$_default_cpu_gov" = "performance" ]; then
-    sed -i -e 's/CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL=y/# CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL is not set/' ./.config
-    sed -i -e 's/# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set/CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y/' ./.config
-  elif [ "$_default_cpu_gov" = "ondemand" ]; then
-    sed -i -e 's/CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL=y/# CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL is not set/' ./.config
-    sed -i -e 's/# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set/CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y/' ./.config
-  fi
-
-  # ACPI_CPUFREQ disablement
-  if [ "$_disable_acpi_cpufreq" = "true" ]; then
-    sed -i -e 's/CONFIG_X86_ACPI_CPUFREQ=m/# CONFIG_X86_ACPI_CPUFREQ is not set/' ./.config
-  fi
-
-  # ftrace
-  if [ -z "$_ftracedisable" ]; then
-    plain ""
-    plain "Disable FUNCTION_TRACER/GRAPH_TRACER? Lowers overhead but limits debugging"
-    plain "and analyzing of kernel functions."
-    read -rp "`echo $'    > N/y : '`" CONDITION2;
-  fi
-  if [[ "$CONDITION2" =~ [yY] ]] || [ "$_ftracedisable" = "true" ]; then
-    sed -i -e 's/CONFIG_FUNCTION_TRACER=y/# CONFIG_FUNCTION_TRACER is not set/' ./.config
-    sed -i -e 's/CONFIG_FUNCTION_GRAPH_TRACER=y/# CONFIG_FUNCTION_GRAPH_TRACER is not set/' ./.config
-  fi
-
-  # disable numa
-  if [ -z "$_numadisable" ]; then
-    plain ""
-    plain "Disable NUMA? Lowers overhead, but breaks CUDA/NvEnc on Nvidia if disabled."
-    plain "https://bbs.archlinux.org/viewtopic.php?id=239174"
-    read -rp "`echo $'    > N/y : '`" CONDITION3;
-  fi
-  if [[ "$CONDITION3" =~ [yY] ]] || [ "$_numadisable" = "true" ]; then
-    # disable NUMA since 99.9% of users do not have multiple CPUs but do have multiple cores in one CPU
-    sed -i -e 's/CONFIG_NUMA=y/# CONFIG_NUMA is not set/' \
-        -i -e '/CONFIG_AMD_NUMA=y/d' \
-        -i -e '/CONFIG_X86_64_ACPI_NUMA=y/d' \
-        -i -e '/CONFIG_NODES_SPAN_OTHER_NODES=y/d' \
-        -i -e '/# CONFIG_NUMA_EMU is not set/d' \
-        -i -e '/CONFIG_NODES_SHIFT=6/d' \
-        -i -e '/CONFIG_NEED_MULTIPLE_NODES=y/d' \
-        -i -e '/CONFIG_USE_PERCPU_NUMA_NODE_ID=y/d' \
-        -i -e '/CONFIG_ACPI_NUMA=y/d' ./.config
-  fi
-
-  # tickless
-  if [ -z "$_tickless" ]; then
-    plain ""
-    plain "Use CattaRappa mode (Tickless/Dynticks) ?"
-    plain "Can give higher performances in many cases but lower consistency on some hardware."
-    plain "Just tickless idle can perform better with some platforms (mostly AMD) or CPU schedulers (mostly MuQSS)."
-    if [ "${_cpusched}" = "MuQSS" ]; then
-      read -rp "`echo $'\n      0.No, use periodic ticks\n      1.Yes, full tickless baby!\n    > 2.Just tickless idle plz\n    [0-2?]: '`" CONDITION4;
-    else
-      read -rp "`echo $'\n      0.No, use periodic ticks\n    > 1.Yes, full tickless baby!\n      2.Just tickless idle plz\n    [0-2?]: '`" CONDITION4;
-    fi
-  fi
-  if [ "$CONDITION4" = "0" ] || [ "$_tickless" = "0" ]; then
-    echo "# CONFIG_NO_HZ_FULL_NODEF is not set" >> ./.config
-    sed -i -e 's/# CONFIG_HZ_PERIODIC is not set/CONFIG_HZ_PERIODIC=y/' ./.config
-    sed -i -e 's/CONFIG_NO_HZ_IDLE=y/# CONFIG_NO_HZ_IDLE is not set/' ./.config
-    sed -i -e 's/CONFIG_NO_HZ_FULL=y/# CONFIG_NO_HZ_FULL is not set/' ./.config
-    sed -i -e 's/CONFIG_NO_HZ=y/# CONFIG_NO_HZ is not set/' ./.config
-    sed -i -e 's/CONFIG_NO_HZ_COMMON=y/# CONFIG_NO_HZ_COMMON is not set/' ./.config
-  elif [ "$CONDITION4" = "2" ] || [ "$_tickless" = "2" ]; then
-    echo "# CONFIG_NO_HZ_FULL_NODEF is not set" >> ./.config
-    sed -i -e 's/CONFIG_HZ_PERIODIC=y/# CONFIG_HZ_PERIODIC is not set/' ./.config
-    sed -i -e 's/# CONFIG_NO_HZ_IDLE is not set/CONFIG_NO_HZ_IDLE=y/' ./.config
-    sed -i -e 's/CONFIG_NO_HZ_FULL=y/# CONFIG_NO_HZ_FULL is not set/' ./.config
-    sed -i -e 's/# CONFIG_NO_HZ is not set/CONFIG_NO_HZ=y/' ./.config
-    sed -i -e 's/# CONFIG_NO_HZ_COMMON is not set/CONFIG_NO_HZ_COMMON=y/' ./.config
-  else
-    if [ "${_cpusched}" = "MuQSS" ]; then
-      echo "# CONFIG_NO_HZ_FULL_NODEF is not set" >> ./.config
-      sed -i -e 's/CONFIG_HZ_PERIODIC=y/# CONFIG_HZ_PERIODIC is not set/' ./.config
-      sed -i -e 's/# CONFIG_NO_HZ_IDLE is not set/CONFIG_NO_HZ_IDLE=y/' ./.config
-      sed -i -e 's/CONFIG_NO_HZ_FULL=y/# CONFIG_NO_HZ_FULL is not set/' ./.config
-      sed -i -e 's/# CONFIG_NO_HZ is not set/CONFIG_NO_HZ=y/' ./.config
-      sed -i -e 's/# CONFIG_NO_HZ_COMMON is not set/CONFIG_NO_HZ_COMMON=y/' ./.config
-    else
-      echo "CONFIG_NO_HZ_FULL_NODEF=y" >> ./.config
-      sed -i -e 's/CONFIG_HZ_PERIODIC=y/# CONFIG_HZ_PERIODIC is not set/' ./.config
-      sed -i -e 's/CONFIG_NO_HZ_IDLE=y/# CONFIG_NO_HZ_IDLE is not set/' ./.config
-      sed -i -e 's/# CONFIG_NO_HZ_FULL is not set/CONFIG_NO_HZ_FULL=y/' ./.config
-      sed -i -e 's/# CONFIG_NO_HZ is not set/CONFIG_NO_HZ=y/' ./.config
-      sed -i -e 's/# CONFIG_NO_HZ_COMMON is not set/CONFIG_NO_HZ_COMMON=y/' ./.config
-      echo "CONFIG_CONTEXT_TRACKING=y" >> ./.config
-      echo "# CONFIG_CONTEXT_TRACKING_FORCE is not set" >> ./.config
-    fi
-  fi
-
-  # voluntary preempt
-  if [ -z "$_voluntary_preempt" ]; then
-    plain ""
-    plain "Use explicit preemption points?"
-    plain "It can improve latency on PDS (at the cost of throughput)"
-    plain "and improve throughput on other schedulers (at the cost of latency)"
-    read -rp "`echo $'    > N/y : '`" CONDITION5;
-  fi
-  if [[ "$CONDITION5" =~ [yY] ]] || [ "$_voluntary_preempt" = "true" ]; then
-    sed -i -e 's/CONFIG_PREEMPT=y/# CONFIG_PREEMPT is not set/' ./.config
-    sed -i -e 's/CONFIG_PREEMPT_LL=y/# CONFIG_PREEMPT_LL is not set/' ./.config
-    sed -i -e 's/# CONFIG_PREEMPT_VOLUNTARY is not set/CONFIG_PREEMPT_VOLUNTARY=y/' ./.config
-  fi
-
-  # Open Firmware support
-  if [ -z "$_OFenable" ]; then
-    plain ""
-    plain "Enable Device Tree and Open Firmware support?"
-    read -rp "`echo $'    > N/y : '`" CONDITION6;
-  fi
-  if [[ "$CONDITION6" =~ [yY] ]] || [ "$_OFenable" = "true" ]; then
-    sed -i -e 's/# CONFIG_OF is not set/CONFIG_OF=y/' ./.config
-  fi
-
-  # acs override
-  if [ -z "$_acs_override" ]; then
-    plain ""
-    plain "Use ACS override patch?"
-    plain "https://wiki.archlinux.org/index.php/PCI_passthrough_via_OVMF#Bypassing_the_IOMMU_groups_.28ACS_override_patch.29"
-    read -rp "`echo $'    > N/y : '`" CONDITION7;
-  fi
-  if [[ "$CONDITION7" =~ [yY] ]] || [ "$_acs_override" = "true" ]; then
-    msg2 "Patching ACS override"
-    patch -Np1 -i "$srcdir"/0006-add-acs-overrides_iommu.patch
-  fi
-
-  # bcachefs
-  #if [ -z "$_bcachefs" ]; then
-  #   plain ""
-  #   plain "Add Bcache filesystem support? You'll have to install bcachefs-tools-git from AUR for utilities."
-  #   plain "https://bcachefs.org/"
-  #   read -rp "`echo $'    > N/y : '`" CONDITION8;
-  #fi
-  #if [[ "$CONDITION8" =~ [yY] ]] || [ "$_bcachefs" = "true" ]; then
-  #   msg2 "Patching Bcache filesystem support override"
-  #   patch -Np1 -i "$srcdir"/0008-5.10-bcachefs.patch
-  #   echo "CONFIG_BCACHEFS_FS=m" >> ./.config
-  #   echo "CONFIG_BCACHEFS_QUOTA=y" >> ./.config
-  #   echo "CONFIG_BCACHEFS_POSIX_ACL=y" >> ./.config
-  #   echo "# CONFIG_BCACHEFS_DEBUG is not set" >> ./.config
-  #   echo "# CONFIG_BCACHEFS_TESTS is not set" >> ./.config
-  #   echo "# CONFIG_DEBUG_CLOSURES is not set" >> ./.config
-  #fi
-
-  # fsync support
-  if [ -z "$_fsync" ]; then
-    plain ""
-    plain "Enable support for fsync, an experimental replacement for esync in Valve Proton 4.11+"
-    plain "https://steamcommunity.com/games/221410/announcements/detail/2957094910196249305"
-    read -rp "`echo $'    > N/y : '`" CONDITION9;
-  fi
-  if [[ "$CONDITION9" =~ [yY] ]] || [ "$_fsync" = "true" ]; then
-    msg2 "Patching Fsync support"
-    patch -Np1 -i "$srcdir"/0007-v5.10-fsync.patch
-  fi
-
-  # ZFS fix
-  if [ -z "$_zfsfix" ]; then
-    plain ""
-    plain "Add back missing symbol for AES-NI/AVX support on ZFS"
-    plain "https://github.com/NixOS/nixpkgs/blob/master/pkgs/os-specific/linux/kernel/export_kernel_fpu_functions_5_3.patch"
-    read -rp "`echo $'    > N/y : '`" CONDITION11;
-  fi
-  if [[ "$CONDITION11" =~ [yY] ]] || [ "$_zfsfix" = "true" ]; then
-    msg2 "Patching missing symbol for AES-NI/AVX support on ZFS"
-    patch -Np1 -i "$srcdir"/0011-ZFS-fix.patch
-  fi
-
-  # Community patches
-  if [ -n "$_community_patches" ]; then
-    if [ ! -d "$_where/../../community-patches" ]; then
-      cd "$_where/../.." && git clone https://github.com/Frogging-Family/community-patches.git && cd "${srcdir}/${_srcpath}"
-    fi
-    _community_patches=($_community_patches)
-    for _p in ${_community_patches[@]}; do
-      ln -s "$_where"/../../community-patches/linux"$_basever"-tkg/$_p "$_where"/
-    done
-  fi
-
-  # userpatches
-  if [ "$_user_patches" = "true" ]; then
-    _userpatch_target="linux-${_basekernel}"
-    _userpatch_ext="my"
-    user_patcher
-  fi
-
-  # Community patches removal
-  for _p in ${_community_patches[@]}; do
-    rm -f "$_where"/$_p
-  done
-
-  if [ "$_distro" = "Arch" ]; then
-    # don't run depmod on 'make install'. We'll do this ourselves in packaging
-    sed -i '2iexit 0' scripts/depmod.sh
-
-    # get kernel version
-    make prepare
-  fi
-
-  # modprobed-db
-  if [ -z "$_modprobeddb" ]; then
-    plain ""
-    plain "Use modprobed db to clean config from unneeded modules?"
-    plain "Speeds up compilation considerably. Requires root."
-    plain "https://wiki.archlinux.org/index.php/Modprobed-db"
-    plain "!!!! Make sure to have a well populated db !!!!"
-    read -rp "`echo $'    > N/y : '`" CONDITIONMPDB;
-  fi
-  if [[ "$CONDITIONMPDB" =~ [yY] ]] || [ "$_modprobeddb" = "true" ]; then
-    sudo modprobed-db recall
-    yes "" | make localmodconfig
-  fi
-
-  if [ true = "$_config_fragments" ]; then
-    local fragments=()
-    mapfile -d '' -t fragments < <(find "$_where"/ -type f -name "*.myfrag" -print0)
-
-    if [ true = "$_config_fragments_no_confirm" ]; then
-      printf 'Using config fragment %s\n' "${fragments[@]#$_where/}"
-    else
-      for i in "${!fragments[@]}"; do
-        while true; do
-          read -r -p 'Found config fragment '"${fragments[$i]#$_where/}"', apply it? [y/N] ' CONDITIONMPDB
-          CONDITIONMPDB="$(printf '%s' "$CONDITIONMPDB" | tr '[:upper:]' '[:lower:]')"
-          case "$CONDITIONMPDB" in
-            y|yes)
-              break;;
-            n|no|'')
-              unset fragments[$i]
-              break;;
-            *)
-              echo 'Please answer with yes or no'
-          esac
-        done
-      done
-    fi
-
-    if [ 0 -lt "${#fragments[@]}" ]; then
-      scripts/kconfig/merge_config.sh -m .config "${fragments[@]}"
-    fi
-  fi
-
-  # menuconfig / nconfig
-  if [ -z "$_menunconfig" ]; then
-    plain ""
-    plain "*Optional* For advanced users - Do you want to use make menuconfig or nconfig"
-    plain "to configure the kernel before building it?"
-    plain "If you do, make sure your terminal is currently"
-    plain "at least 19 lines by 80 columns large or you'll get an error :D"
-    read -rp "`echo $'    > 0. nope\n      1. menuconfig\n      2. nconfig\n      3. xconfig\n      choice[0-3?]: '`" CONDITIONMNC;
-    _menunconfig="$CONDITIONMNC"
-  fi
-  if [ 1 = "$_menunconfig" ]; then
-    cp .config .config.orig
-    make menuconfig
-  elif [ 2 = "$_menunconfig" ]; then
-    cp .config .config.orig
-    make nconfig
-  elif [ 3 = "$_menunconfig" ]; then
-    cp .config .config.orig
-    make xconfig
-  else
-    # rewrite configuration
-    yes "" | make config >/dev/null
-  fi
-  if [ 1 = "$_menunconfig" ] || [ 2 = "$_menunconfig" ] || [ 3 = "$_menunconfig" ]; then
-    if [ -z "${_diffconfig}" ]; then
-      while true; do
-        read -r -p 'Generate a config fragment from your changes? [y/N] ' CONDITIONF
-        CONDITIONF="$(printf '%s' "$CONDITIONF" | tr '[:upper:]' '[:lower:]')"
-        case "$CONDITIONF" in
-          y|yes)
-            _diffconfig=true
-            break;;
-          n|no|'')
-            _diffconfig=false
-            break;;
-          *)
-            echo 'Please answer with yes or no'
-        esac
-      done
-    fi
-    if [ true = "$_diffconfig" ]; then
-      if [ -z "$_diffconfig_name" ]; then
-        IFS= read -r -p 'Filename for the config fragment [leave empty to not generate fragment]: ' _diffconfig_name
-      fi
-      if [ -z "$_diffconfig_name" ]; then
-        echo 'No file name given, not generating config fragment.'
-      else (
-        prev_pwd="${PWD:-$(pwd)}"
-        cd "$_where"
-        "${prev_pwd}/scripts/diffconfig" -m "${prev_pwd}/.config.orig" "${prev_pwd}/.config" > "$_diffconfig_name"
-      ) fi
-    fi
-    rm .config.orig
-  fi
-
-  if [ "$_distro" = "Arch" ]; then
-    make -s kernelrelease > version
-    msg2 "Prepared %s version %s" "$pkgbase" "$(<version)"
-  fi
-}
-
-exit_cleanup() {
-  # Remove state tracker
-  rm -f "$_where"/cpuschedset
-
-  # Remove temporarily copied files
-  rm -rf "$_where"/*.patch
-  rm -rf "$_where"/*-profile.cfg
-  rm -f "$_where"/config*
-  rm -f "$_where"/*.hook
-  rm -f "$_where"/cleanup
-  rm -f "$_where"/prepare
-
-  # Community patches removal in case of failure
-  for _p in ${_community_patches[@]}; do
-    rm -f "$_where"/"$_p"
-  done
-
-  if [ "${_distro}" = "Arch" ]; then
-    if [ "$_NUKR" = "true" ] && [ "$_where" != "$srcdir" ]; then
-      rm -rf "$_where"/src/*
-      # Double tap
-      rm -rf "$srcdir"/linux-*
-      rm -rf "$srcdir"/*.xz
-      rm -rf "$srcdir"/*.patch
-      rm -rf "$srcdir"/*-profile.cfg
-      rm -f "$srcdir"/config.x86_64
-      rm -f "$srcdir"/customization.cfg
-    else
-      # Meh
-      rm -rf "$srcdir"/linux-${_basekernel}/Documentation/filesystems/aufs/*
-      rm -f "$srcdir"/linux-${_basekernel}/Documentation/ABI/testing/*-aufs
-      rm -rf "$srcdir"/linux-${_basekernel}/fs/aufs/*
-      rm -f "$srcdir"/linux-${_basekernel}/include/uapi/linux/aufs*
-
-      rm -f "$srcdir"/linux-${_basekernel}/mm/prfile.c
-
-      rm -f "$srcdir"/linux-${_basekernel}/block/bfq*
-
-      rm -rf "$srcdir"/linux-${_basekernel}/drivers/scsi/vhba/*
-
-      rm -rf "$srcdir"/linux-${_basekernel}/fs/exfat/*
-      rm -f "$srcdir"/linux-${_basekernel}/include/trace/events/fs.h
-
-      rm -f "$srcdir"/linux-${_basekernel}/Documentation/scheduler/sched-PDS-mq.txt
-      rm -f "$srcdir"/linux-${_basekernel}/include/linux/skip_list.h
-      rm -f "$srcdir"/linux-${_basekernel}/kernel/sched/pds.c
-      rm -f "$srcdir"/linux-${_basekernel}/kernel/sched/pds_sched.h
-
-      rm -f "$srcdir"/linux-${_basekernel}/Documentation/scheduler/sched-BMQ.txt
-      rm -f "$srcdir"/linux-${_basekernel}/kernel/sched/alt_core.c
-      rm -f "$srcdir"/linux-${_basekernel}/kernel/sched/sched/alt_debug.c
-      rm -f "$srcdir"/linux-${_basekernel}/kernel/sched/alt_sched.h
-
-      rm -f "$srcdir"/linux-${_basekernel}/Documentation/scheduler/sched-BFS.txt
-      rm -f "$srcdir"/linux-${_basekernel}/Documentation/scheduler/sched-MuQSS.txt
-      rm -rf "$srcdir"/linux-${_basekernel}/arch/blackfin/*
-      rm -f "$srcdir"/linux-${_basekernel}/arch/powerpc/configs/c2k_defconfig
-      rm -f "$srcdir"/linux-${_basekernel}/arch/score/configs/spct6600_defconfig
-      rm -f "$srcdir"/linux-${_basekernel}/arch/tile/configs/tilegx_defconfig
-      rm -f "$srcdir"/linux-${_basekernel}/arch/tile/configs/tilepro_defconfig
-      rm -f "$srcdir"/linux-${_basekernel}/drivers/staging/lustre/lnet/lnet/lib-eq.c
-      rm -f "$srcdir"/linux-${_basekernel}/kernel/sched/MuQSS*
-      rm -f "$srcdir"/linux-${_basekernel}/kernel/skip_list.c
-
-      rm -f "$srcdir"/linux-${_basekernel}/Documentation/vm/uksm.txt
-      rm -f "$srcdir"/linux-${_basekernel}/include/linux/sradix-tree.h
-      rm -f "$srcdir"/linux-${_basekernel}/include/linux/uksm.h
-      rm -f "$srcdir"/linux-${_basekernel}/lib/sradix-tree.c
-      rm -f "$srcdir"/linux-${_basekernel}/mm/uksm.c
-    fi
-
-    remove_deps
-  fi
-
-  msg2 'exit cleanup done\n'
-  if [ -n "$_runtime" ]; then
-    msg2 "compilation time : \n$_runtime"
-  fi
-}
-
-trap exit_cleanup EXIT
diff --git a/linux510-rc-tkg/linux510-tkg-config/ryzen-desktop-profile.cfg b/linux510-rc-tkg/linux510-tkg-config/ryzen-desktop-profile.cfg
deleted file mode 100644
index 39fd621..0000000
--- a/linux510-rc-tkg/linux510-tkg-config/ryzen-desktop-profile.cfg
+++ /dev/null
@@ -1,38 +0,0 @@
-# linux510-TkG config file
-# Ryzen Desktop
-
-
-#### KERNEL OPTIONS ####
-
-# Disable some non-module debugging - See PKGBUILD for the list
-_debugdisable="false"
-
-# LEAVE AN EMPTY VALUE TO BE PROMPTED ABOUT FOLLOWING OPTIONS AT BUILD TIME
-
-# Set to "true" to disable FUNCTION_TRACER/GRAPH_TRACER, lowering overhead but limiting debugging and analyzing of kernel functions - Kernel default is "false"
-_ftracedisable="false"
-
-# Set to "true" to disable NUMA, lowering overhead, but breaking CUDA/NvEnc on Nvidia equipped systems - Kernel default is "false"
-_numadisable="false"
-
-# Set to "true" to use explicit preemption points to lower latency at the cost of a small throughput loss - Can give a nice perf boost in VMs - Kernel default is "false"
-_voluntary_preempt="false"
-
-# A selection of patches from Zen/Liquorix kernel and additional tweaks for a better gaming experience (ZENIFY) - Default is "true"
-_zenify="true"
-
-# compiler optimization level - 1. Optimize for performance (-O2); 2. Optimize harder (-O3); 3. Optimize for size (-Os) - Kernel default is "2"
-_compileroptlevel="1"
-
-# Trust the CPU manufacturer to initialize Linux's CRNG (RANDOM_TRUST_CPU) - Kernel default is "false"
-_random_trust_cpu="false"
-
-# CPU scheduler runqueue sharing - No sharing (RQ_NONE), SMT (hyperthread) siblings (RQ_SMT), Multicore siblings (RQ_MC), Symmetric Multi-Processing (RQ_SMP), NUMA (RQ_ALL)
-# Valid values are "none", "smt", "mc", "mc-llc"(for zen), "smp", "all" - Kernel default is "mc"
-_runqueue_sharing="mc-llc"
-
-# Timer frequency - "500", "750" or "1000" - More options available in kernel config prompt when left empty depending on selected cpusched - Kernel default is "500"
-_timer_freq="500"
-
-# Default CPU governor - "performance", "ondemand" (tweaked), "schedutil" or leave empty for default (schedutil on AMD and legacy Intel, intel_pstate on modern Intel) - Enforcing an option will disable intel_pstate altogether!
-_default_cpu_gov="performance"
diff --git a/linux510-rc-tkg/linux510-tkg-patches/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch b/linux510-rc-tkg/linux510-tkg-patches/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch
deleted file mode 100644
index 83240cb..0000000
--- a/linux510-rc-tkg/linux510-tkg-patches/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch
+++ /dev/null
@@ -1,156 +0,0 @@
-From 5ec2dd3a095442ec1a21d86042a4994f2ba24e63 Mon Sep 17 00:00:00 2001
-Message-Id: <5ec2dd3a095442ec1a21d86042a4994f2ba24e63.1512651251.git.jan.steffens@gmail.com>
-From: Serge Hallyn <serge.hallyn@canonical.com>
-Date: Fri, 31 May 2013 19:12:12 +0100
-Subject: [PATCH] add sysctl to disallow unprivileged CLONE_NEWUSER by default
-
-Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com>
-[bwh: Remove unneeded binary sysctl bits]
-Signed-off-by: Daniel Micay <danielmicay@gmail.com>
----
- kernel/fork.c           | 15 +++++++++++++++
- kernel/sysctl.c         | 12 ++++++++++++
- kernel/user_namespace.c |  3 +++
- 3 files changed, 30 insertions(+)
-
-diff --git a/kernel/fork.c b/kernel/fork.c
-index 07cc743698d3668e..4011d68a8ff9305c 100644
---- a/kernel/fork.c
-+++ b/kernel/fork.c
-@@ -102,6 +102,11 @@
- 
- #define CREATE_TRACE_POINTS
- #include <trace/events/task.h>
-+#ifdef CONFIG_USER_NS
-+extern int unprivileged_userns_clone;
-+#else
-+#define unprivileged_userns_clone 0
-+#endif
- 
- /*
-  * Minimum number of threads to boot the kernel
-@@ -1555,6 +1560,10 @@ static __latent_entropy struct task_struct *copy_process(
- 	if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
- 		return ERR_PTR(-EINVAL);
- 
-+	if ((clone_flags & CLONE_NEWUSER) && !unprivileged_userns_clone)
-+		if (!capable(CAP_SYS_ADMIN))
-+			return ERR_PTR(-EPERM);
-+
- 	/*
- 	 * Thread groups must share signals as well, and detached threads
- 	 * can only be started up within the thread group.
-@@ -2348,6 +2357,12 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
- 	if (unshare_flags & CLONE_NEWNS)
- 		unshare_flags |= CLONE_FS;
- 
-+	if ((unshare_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) {
-+		err = -EPERM;
-+		if (!capable(CAP_SYS_ADMIN))
-+			goto bad_unshare_out;
-+	}
-+
- 	err = check_unshare_flags(unshare_flags);
- 	if (err)
- 		goto bad_unshare_out;
-diff --git a/kernel/sysctl.c b/kernel/sysctl.c
-index b86520ed3fb60fbf..f7dab3760839f1a1 100644
---- a/kernel/sysctl.c
-+++ b/kernel/sysctl.c
-@@ -105,6 +105,9 @@ extern int core_uses_pid;
- 
- #if defined(CONFIG_SYSCTL)
- 
-+#ifdef CONFIG_USER_NS
-+extern int unprivileged_userns_clone;
-+#endif
- /* Constants used for minimum and  maximum */
- #ifdef CONFIG_LOCKUP_DETECTOR
- static int sixty = 60;
-@@ -513,6 +516,15 @@ static struct ctl_table kern_table[] = {
- 		.proc_handler	= proc_dointvec,
- 	},
- #endif
-+#ifdef CONFIG_USER_NS
-+	{
-+		.procname	= "unprivileged_userns_clone",
-+		.data		= &unprivileged_userns_clone,
-+		.maxlen		= sizeof(int),
-+		.mode		= 0644,
-+		.proc_handler	= proc_dointvec,
-+	},
-+#endif
- #ifdef CONFIG_PROC_SYSCTL
- 	{
- 		.procname	= "tainted",
-diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
-index c490f1e4313b998a..dd03bd39d7bf194d 100644
---- a/kernel/user_namespace.c
-+++ b/kernel/user_namespace.c
-@@ -24,6 +24,9 @@
- #include <linux/projid.h>
- #include <linux/fs_struct.h>
- 
-+/* sysctl */
-+int unprivileged_userns_clone;
-+
- static struct kmem_cache *user_ns_cachep __read_mostly;
- static DEFINE_MUTEX(userns_state_mutex);
- 
--- 
-2.15.1
-
-From b5202296055dd333db4425120d3f93ef4e6a0573 Mon Sep 17 00:00:00 2001
-From: "Jan Alexander Steffens (heftig)" <jan.steffens@gmail.com>
-Date: Thu, 7 Dec 2017 13:50:48 +0100
-Subject: ZEN: Add CONFIG for unprivileged_userns_clone
-
-This way our default behavior continues to match the vanilla kernel.
----
- init/Kconfig            | 16 ++++++++++++++++
- kernel/user_namespace.c |  4 ++++
- 2 files changed, 20 insertions(+)
-
-diff --git a/init/Kconfig b/init/Kconfig
-index 4592bf7997c0..f3df02990aff 100644
---- a/init/Kconfig
-+++ b/init/Kconfig
-@@ -1004,6 +1004,22 @@ config USER_NS
- 
- 	  If unsure, say N.
- 
-+config USER_NS_UNPRIVILEGED
-+	bool "Allow unprivileged users to create namespaces"
-+	default y
-+	depends on USER_NS
-+	help
-+	  When disabled, unprivileged users will not be able to create
-+	  new namespaces. Allowing users to create their own namespaces
-+	  has been part of several recent local privilege escalation
-+	  exploits, so if you need user namespaces but are
-+	  paranoid^Wsecurity-conscious you want to disable this.
-+
-+	  This setting can be overridden at runtime via the
-+	  kernel.unprivileged_userns_clone sysctl.
-+
-+	  If unsure, say Y.
-+
- config PID_NS
- 	bool "PID Namespaces"
- 	default y
-diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
-index 6b9dbc257e34..107b17f0d528 100644
---- a/kernel/user_namespace.c
-+++ b/kernel/user_namespace.c
-@@ -27,7 +27,11 @@
- #include <linux/sort.h>
- 
- /* sysctl */
-+#ifdef CONFIG_USER_NS_UNPRIVILEGED
-+int unprivileged_userns_clone = 1;
-+#else
- int unprivileged_userns_clone;
-+#endif
- 
- static struct kmem_cache *user_ns_cachep __read_mostly;
- static DEFINE_MUTEX(userns_state_mutex);
diff --git a/linux510-rc-tkg/linux510-tkg-patches/0002-clear-patches.patch b/linux510-rc-tkg/linux510-tkg-patches/0002-clear-patches.patch
deleted file mode 100644
index 22a32f5..0000000
--- a/linux510-rc-tkg/linux510-tkg-patches/0002-clear-patches.patch
+++ /dev/null
@@ -1,360 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Arjan van de Ven <arjan@linux.intel.com>
-Date: Mon, 14 Mar 2016 11:10:58 -0600
-Subject: [PATCH] pci pme wakeups
-
-Reduce wakeups for PME checks, which are a workaround for miswired
-boards (sadly, too many of them) in laptops.
----
- drivers/pci/pci.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
-index c9338f9..6974fbf 100644
---- a/drivers/pci/pci.c
-+++ b/drivers/pci/pci.c
-@@ -62,7 +62,7 @@ struct pci_pme_device {
- 	struct pci_dev *dev;
- };
- 
--#define PME_TIMEOUT 1000 /* How long between PME checks */
-+#define PME_TIMEOUT 4000 /* How long between PME checks */
- 
- static void pci_dev_d3_sleep(struct pci_dev *dev)
- {
--- 
-https://clearlinux.org
-
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Arjan van de Ven <arjan@linux.intel.com>
-Date: Sat, 19 Mar 2016 21:32:19 -0400
-Subject: [PATCH] intel_idle: tweak cpuidle cstates
-
-Increase target_residency in cpuidle cstate
-
-Tune intel_idle to be a bit less agressive;
-Clear linux is cleaner in hygiene (wakupes) than the average linux,
-so we can afford changing these in a way that increases
-performance while keeping power efficiency
----
- drivers/idle/intel_idle.c | 44 +++++++++++++++++++--------------------
- 1 file changed, 22 insertions(+), 22 deletions(-)
-
-diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
-index f449584..c994d24 100644
---- a/drivers/idle/intel_idle.c
-+++ b/drivers/idle/intel_idle.c
-@@ -531,7 +531,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
- 		.desc = "MWAIT 0x01",
- 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
- 		.exit_latency = 10,
--		.target_residency = 20,
-+		.target_residency = 120,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -539,7 +539,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
- 		.desc = "MWAIT 0x10",
- 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 33,
--		.target_residency = 100,
-+		.target_residency = 900,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -547,7 +547,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
- 		.desc = "MWAIT 0x20",
- 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 133,
--		.target_residency = 400,
-+		.target_residency = 1000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -555,7 +555,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
- 		.desc = "MWAIT 0x32",
- 		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 166,
--		.target_residency = 500,
-+		.target_residency = 1500,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -563,7 +563,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
- 		.desc = "MWAIT 0x40",
- 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 300,
--		.target_residency = 900,
-+		.target_residency = 2000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -571,7 +571,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
- 		.desc = "MWAIT 0x50",
- 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 600,
--		.target_residency = 1800,
-+		.target_residency = 5000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -579,7 +579,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
- 		.desc = "MWAIT 0x60",
- 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 2600,
--		.target_residency = 7700,
-+		.target_residency = 9000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -599,7 +599,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
- 		.desc = "MWAIT 0x01",
- 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
- 		.exit_latency = 10,
--		.target_residency = 20,
-+		.target_residency = 120,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -607,7 +607,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
- 		.desc = "MWAIT 0x10",
- 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 40,
--		.target_residency = 100,
-+		.target_residency = 1000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -615,7 +615,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
- 		.desc = "MWAIT 0x20",
- 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 133,
--		.target_residency = 400,
-+		.target_residency = 1000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -623,7 +623,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
- 		.desc = "MWAIT 0x32",
- 		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 166,
--		.target_residency = 500,
-+		.target_residency = 2000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -631,7 +631,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
- 		.desc = "MWAIT 0x40",
- 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 300,
--		.target_residency = 900,
-+		.target_residency = 4000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -639,7 +639,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
- 		.desc = "MWAIT 0x50",
- 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 600,
--		.target_residency = 1800,
-+		.target_residency = 7000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -647,7 +647,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
- 		.desc = "MWAIT 0x60",
- 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 2600,
--		.target_residency = 7700,
-+		.target_residency = 9000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -668,7 +668,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
- 		.desc = "MWAIT 0x01",
- 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
- 		.exit_latency = 10,
--		.target_residency = 20,
-+		.target_residency = 120,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -676,7 +676,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
- 		.desc = "MWAIT 0x10",
- 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 70,
--		.target_residency = 100,
-+		.target_residency = 1000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -684,7 +684,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
- 		.desc = "MWAIT 0x20",
- 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 85,
--		.target_residency = 200,
-+		.target_residency = 600,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -692,7 +692,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
- 		.desc = "MWAIT 0x33",
- 		.flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 124,
--		.target_residency = 800,
-+		.target_residency = 3000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -700,7 +700,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
- 		.desc = "MWAIT 0x40",
- 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 200,
--		.target_residency = 800,
-+		.target_residency = 3200,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -708,7 +708,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
- 		.desc = "MWAIT 0x50",
- 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 480,
--		.target_residency = 5000,
-+		.target_residency = 9000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -716,7 +716,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
- 		.desc = "MWAIT 0x60",
- 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 890,
--		.target_residency = 5000,
-+		.target_residency = 9000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -737,7 +737,7 @@ static struct cpuidle_state skx_cstates[] __initdata = {
- 		.desc = "MWAIT 0x01",
- 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
- 		.exit_latency = 10,
--		.target_residency = 20,
-+		.target_residency = 300,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
--- 
-https://clearlinux.org
-
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Arjan van de Ven <arjan@linux.intel.com>
-Date: Fri, 6 Jan 2017 15:34:09 +0000
-Subject: [PATCH] ipv4/tcp: allow the memory tuning for tcp to go a little
- bigger than default
-
----
- net/ipv4/tcp.c | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
-index 30c1142..4345075 100644
---- a/net/ipv4/tcp.c
-+++ b/net/ipv4/tcp.c
-@@ -4201,8 +4201,8 @@ void __init tcp_init(void)
- 	tcp_init_mem();
- 	/* Set per-socket limits to no more than 1/128 the pressure threshold */
- 	limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7);
--	max_wshare = min(4UL*1024*1024, limit);
--	max_rshare = min(6UL*1024*1024, limit);
-+	max_wshare = min(16UL*1024*1024, limit);
-+	max_rshare = min(16UL*1024*1024, limit);
- 
- 	init_net.ipv4.sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
- 	init_net.ipv4.sysctl_tcp_wmem[1] = 16*1024;
--- 
-https://clearlinux.org
-
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Arjan van de Ven <arjan@linux.intel.com>
-Date: Sun, 18 Feb 2018 23:35:41 +0000
-Subject: [PATCH] locking: rwsem: spin faster
-
-tweak rwsem owner spinning a bit
----
- kernel/locking/rwsem.c | 4 +++-
- 1 file changed, 3 insertions(+), 1 deletion(-)
-
-diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
-index f11b9bd..1bbfcc1 100644
---- a/kernel/locking/rwsem.c
-+++ b/kernel/locking/rwsem.c
-@@ -717,6 +717,7 @@ rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable)
- 	struct task_struct *new, *owner;
- 	unsigned long flags, new_flags;
- 	enum owner_state state;
-+	int i = 0;
- 
- 	owner = rwsem_owner_flags(sem, &flags);
- 	state = rwsem_owner_state(owner, flags, nonspinnable);
-@@ -750,7 +751,8 @@ rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable)
- 			break;
- 		}
- 
--		cpu_relax();
-+		if (i++ > 1000)
-+			cpu_relax();
- 	}
- 	rcu_read_unlock();
- 
--- 
-https://clearlinux.org
-
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Arjan van de Ven <arjan@linux.intel.com>
-Date: Thu, 2 Jun 2016 23:36:32 -0500
-Subject: [PATCH] initialize ata before graphics
-
-ATA init is the long pole in the boot process, and its asynchronous.
-move the graphics init after it so that ata and graphics initialize
-in parallel
----
- drivers/Makefile | 15 ++++++++-------
- 1 file changed, 8 insertions(+), 7 deletions(-)
-
-diff --git a/drivers/Makefile b/drivers/Makefile
-index c0cd1b9..af1e2fb 100644
---- a/drivers/Makefile
-+++ b/drivers/Makefile
-@@ -59,15 +59,8 @@ obj-y				+= char/
- # iommu/ comes before gpu as gpu are using iommu controllers
- obj-y				+= iommu/
- 
--# gpu/ comes after char for AGP vs DRM startup and after iommu
--obj-y				+= gpu/
--
- obj-$(CONFIG_CONNECTOR)		+= connector/
- 
--# i810fb and intelfb depend on char/agp/
--obj-$(CONFIG_FB_I810)           += video/fbdev/i810/
--obj-$(CONFIG_FB_INTEL)          += video/fbdev/intelfb/
--
- obj-$(CONFIG_PARPORT)		+= parport/
- obj-$(CONFIG_NVM)		+= lightnvm/
- obj-y				+= base/ block/ misc/ mfd/ nfc/
-@@ -80,6 +73,14 @@ obj-$(CONFIG_IDE)		+= ide/
- obj-y				+= scsi/
- obj-y				+= nvme/
- obj-$(CONFIG_ATA)		+= ata/
-+
-+# gpu/ comes after char for AGP vs DRM startup and after iommu
-+obj-y				+= gpu/
-+
-+# i810fb and intelfb depend on char/agp/
-+obj-$(CONFIG_FB_I810)           += video/fbdev/i810/
-+obj-$(CONFIG_FB_INTEL)          += video/fbdev/intelfb/
-+
- obj-$(CONFIG_TARGET_CORE)	+= target/
- obj-$(CONFIG_MTD)		+= mtd/
- obj-$(CONFIG_SPI)		+= spi/
--- 
-https://clearlinux.org
-
diff --git a/linux510-rc-tkg/linux510-tkg-patches/0003-glitched-base.patch b/linux510-rc-tkg/linux510-tkg-patches/0003-glitched-base.patch
deleted file mode 100644
index d0bb7d3..0000000
--- a/linux510-rc-tkg/linux510-tkg-patches/0003-glitched-base.patch
+++ /dev/null
@@ -1,678 +0,0 @@
-From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
-From: Tk-Glitch <ti3nou@gmail.com>
-Date: Wed, 4 Jul 2018 04:30:08 +0200
-Subject: [PATCH 01/17] glitched
-
----
- scripts/mkcompile_h | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/scripts/mkcompile_h b/scripts/mkcompile_h
-index baf3ab8d9d49..854e32e6aec7 100755
---- a/scripts/mkcompile_h
-+++ b/scripts/mkcompile_h
-@@ -41,8 +41,8 @@ else
- fi
- 
- UTS_VERSION="#$VERSION"
--CONFIG_FLAGS=""
--if [ -n "$SMP" ] ; then CONFIG_FLAGS="SMP"; fi
-+CONFIG_FLAGS="TKG"
-+if [ -n "$SMP" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS SMP"; fi
- if [ -n "$PREEMPT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT"; fi
- if [ -n "$PREEMPT_RT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT_RT"; fi
- 
--- 
-2.28.0
-
-
-From c304f43d14e98d4bf1215fc10bc5012f554bdd8a Mon Sep 17 00:00:00 2001
-From: Alexandre Frade <admfrade@gmail.com>
-Date: Mon, 29 Jan 2018 16:59:22 +0000
-Subject: [PATCH 02/17] dcache: cache_pressure = 50 decreases the rate at which
- VFS caches are reclaimed
-
-Signed-off-by: Alexandre Frade <admfrade@gmail.com>
----
- fs/dcache.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/fs/dcache.c b/fs/dcache.c
-index 361ea7ab30ea..0c5cf69b241a 100644
---- a/fs/dcache.c
-+++ b/fs/dcache.c
-@@ -71,7 +71,7 @@
-  * If no ancestor relationship:
-  * arbitrary, since it's serialized on rename_lock
-  */
--int sysctl_vfs_cache_pressure __read_mostly = 100;
-+int sysctl_vfs_cache_pressure __read_mostly = 50;
- EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
- 
- __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
--- 
-2.28.0
-
-
-From 28f32f59d9d55ac7ec3a20b79bdd02d2a0a5f7e1 Mon Sep 17 00:00:00 2001
-From: Alexandre Frade <admfrade@gmail.com>
-Date: Mon, 29 Jan 2018 18:29:13 +0000
-Subject: [PATCH 03/17] sched/core: nr_migrate = 128 increases number of tasks
- to iterate in a single balance run.
-
-Signed-off-by: Alexandre Frade <admfrade@gmail.com>
----
- kernel/sched/core.c | 6 +++---
- 1 file changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/kernel/sched/core.c b/kernel/sched/core.c
-index f788cd61df21..2bfbb4213707 100644
---- a/kernel/sched/core.c
-+++ b/kernel/sched/core.c
-@@ -59,7 +59,7 @@ const_debug unsigned int sysctl_sched_features =
-  * Number of tasks to iterate in a single balance run.
-  * Limited because this is done with IRQs disabled.
-  */
--const_debug unsigned int sysctl_sched_nr_migrate = 32;
-+const_debug unsigned int sysctl_sched_nr_migrate = 128;
- 
- /*
-  * period over which we measure -rt task CPU usage in us.
-@@ -71,9 +71,9 @@ __read_mostly int scheduler_running;
- 
- /*
-  * part of the period that we allow rt tasks to run in us.
-- * default: 0.95s
-+ * XanMod default: 0.98s
-  */
--int sysctl_sched_rt_runtime = 950000;
-+int sysctl_sched_rt_runtime = 980000;
- 
- /*
-  * __task_rq_lock - lock the rq @p resides on.
--- 
-2.28.0
-
-
-From acc49f33a10f61dc66c423888cbb883ba46710e4 Mon Sep 17 00:00:00 2001
-From: Alexandre Frade <admfrade@gmail.com>
-Date: Mon, 29 Jan 2018 17:41:29 +0000
-Subject: [PATCH 04/17] scripts: disable the localversion "+" tag of a git repo
-
-Signed-off-by: Alexandre Frade <admfrade@gmail.com>
----
- scripts/setlocalversion | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/scripts/setlocalversion b/scripts/setlocalversion
-index 20f2efd57b11..0552d8b9f582 100755
---- a/scripts/setlocalversion
-+++ b/scripts/setlocalversion
-@@ -54,7 +54,7 @@ scm_version()
- 			# If only the short version is requested, don't bother
- 			# running further git commands
- 			if $short; then
--				echo "+"
-+			#	echo "+"
- 				return
- 			fi
- 			# If we are past a tagged commit (like
--- 
-2.28.0
-
-
-From 61fcb33fb0de8bc0f060e0a1ada38ed149217f4d Mon Sep 17 00:00:00 2001
-From: Oleksandr Natalenko <oleksandr@redhat.com>
-Date: Wed, 11 Dec 2019 11:46:19 +0100
-Subject: [PATCH 05/17] init/Kconfig: enable -O3 for all arches
-
-Building a kernel with -O3 may help in hunting bugs like [1] and thus
-using this switch should not be restricted to one specific arch only.
-
-With that, lets expose it for everyone.
-
-[1] https://lore.kernel.org/lkml/673b885183fb64f1cbb3ed2387524077@natalenko.name/
-
-Signed-off-by: Oleksandr Natalenko <oleksandr@redhat.com>
----
- init/Kconfig | 1 -
- 1 file changed, 1 deletion(-)
-
-diff --git a/init/Kconfig b/init/Kconfig
-index 0498af567f70..3ae8678e1145 100644
---- a/init/Kconfig
-+++ b/init/Kconfig
-@@ -1278,7 +1278,6 @@ config CC_OPTIMIZE_FOR_PERFORMANCE
- 
- config CC_OPTIMIZE_FOR_PERFORMANCE_O3
- 	bool "Optimize more for performance (-O3)"
--	depends on ARC
- 	help
- 	  Choosing this option will pass "-O3" to your compiler to optimize
- 	  the kernel yet more for performance.
--- 
-2.28.0
-
-
-From 360c6833e07cc9fdef5746f6bc45bdbc7212288d Mon Sep 17 00:00:00 2001
-From: "Jan Alexander Steffens (heftig)" <jan.steffens@gmail.com>
-Date: Fri, 26 Oct 2018 11:22:33 +0100
-Subject: [PATCH 06/17] infiniband: Fix __read_overflow2 error with -O3
- inlining
-
----
- drivers/infiniband/core/addr.c | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
-index 3a98439bba83..6efc4f907f58 100644
---- a/drivers/infiniband/core/addr.c
-+++ b/drivers/infiniband/core/addr.c
-@@ -820,6 +820,7 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
- 	union {
- 		struct sockaddr_in  _sockaddr_in;
- 		struct sockaddr_in6 _sockaddr_in6;
-+		struct sockaddr_ib  _sockaddr_ib;
- 	} sgid_addr, dgid_addr;
- 	int ret;
- 
--- 
-2.28.0
-
-
-From f85ed068b4d0e6c31edce8574a95757a60e58b87 Mon Sep 17 00:00:00 2001
-From: Etienne Juvigny <Ti3noU@gmail.com>
-Date: Mon, 3 Sep 2018 17:36:25 +0200
-Subject: [PATCH 07/17] Zenify & stuff
-
----
- init/Kconfig           | 32 ++++++++++++++++++++++++++++++++
- kernel/sched/fair.c    | 25 +++++++++++++++++++++++++
- mm/page-writeback.c    |  8 ++++++++
- 3 files changed, 65 insertions(+)
-
-diff --git a/init/Kconfig b/init/Kconfig
-index 3ae8678e1145..da708eed0f1e 100644
---- a/init/Kconfig
-+++ b/init/Kconfig
-@@ -92,6 +92,38 @@ config THREAD_INFO_IN_TASK
- 
- menu "General setup"
- 
-+config ZENIFY
-+	bool "A selection of patches from Zen/Liquorix kernel and additional tweaks for a better gaming experience"
-+	default y
-+	help
-+	  Tunes the kernel for responsiveness at the cost of throughput and power usage.
-+
-+	  --- Virtual Memory Subsystem ---------------------------
-+
-+	    Mem dirty before bg writeback..:  10 %  ->  20 %
-+	    Mem dirty before sync writeback:  20 %  ->  50 %
-+
-+	  --- Block Layer ----------------------------------------
-+
-+	    Queue depth...............:      128    -> 512
-+	    Default MQ scheduler......: mq-deadline -> bfq
-+
-+	  --- CFS CPU Scheduler ----------------------------------
-+
-+	    Scheduling latency.............:   6    ->   3    ms
-+	    Minimal granularity............:   0.75 ->   0.3  ms
-+	    Wakeup granularity.............:   1    ->   0.5  ms
-+	    CPU migration cost.............:   0.5  ->   0.25 ms
-+	    Bandwidth slice size...........:   5    ->   3    ms
-+	    Ondemand fine upscaling limit..:  95 %  ->  85 %
-+
-+	  --- MuQSS CPU Scheduler --------------------------------
-+
-+	    Scheduling interval............:   6    ->   3    ms
-+	    ISO task max realtime use......:  70 %  ->  25 %
-+	    Ondemand coarse upscaling limit:  80 %  ->  45 %
-+	    Ondemand fine upscaling limit..:  95 %  ->  45 %
-+
- config BROKEN
- 	bool
- 
-diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
-index 6b3b59cc51d6..2a0072192c3d 100644
---- a/kernel/sched/fair.c
-+++ b/kernel/sched/fair.c
-@@ -37,8 +37,13 @@
-  *
-  * (default: 6ms * (1 + ilog(ncpus)), units: nanoseconds)
-  */
-+#ifdef CONFIG_ZENIFY
-+unsigned int sysctl_sched_latency			= 3000000ULL;
-+static unsigned int normalized_sysctl_sched_latency	= 3000000ULL;
-+#else
- unsigned int sysctl_sched_latency			= 6000000ULL;
- static unsigned int normalized_sysctl_sched_latency	= 6000000ULL;
-+#endif
- 
- /*
-  * The initial- and re-scaling of tunables is configurable
-@@ -58,13 +63,22 @@ enum sched_tunable_scaling sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_L
-  *
-  * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
-  */
-+#ifdef CONFIG_ZENIFY
-+unsigned int sysctl_sched_min_granularity			= 300000ULL;
-+static unsigned int normalized_sysctl_sched_min_granularity	= 300000ULL;
-+#else
- unsigned int sysctl_sched_min_granularity			= 750000ULL;
- static unsigned int normalized_sysctl_sched_min_granularity	= 750000ULL;
-+#endif
- 
- /*
-  * This value is kept at sysctl_sched_latency/sysctl_sched_min_granularity
-  */
-+#ifdef CONFIG_ZENIFY
-+static unsigned int sched_nr_latency = 10;
-+#else
- static unsigned int sched_nr_latency = 8;
-+#endif
- 
- /*
-  * After fork, child runs first. If set to 0 (default) then
-@@ -81,10 +95,17 @@ unsigned int sysctl_sched_child_runs_first __read_mostly;
-  *
-  * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds)
-  */
-+#ifdef CONFIG_ZENIFY
-+unsigned int sysctl_sched_wakeup_granularity			= 500000UL;
-+static unsigned int normalized_sysctl_sched_wakeup_granularity	= 500000UL;
-+
-+const_debug unsigned int sysctl_sched_migration_cost	= 50000UL;
-+#else
- unsigned int sysctl_sched_wakeup_granularity			= 1000000UL;
- static unsigned int normalized_sysctl_sched_wakeup_granularity	= 1000000UL;
- 
- const_debug unsigned int sysctl_sched_migration_cost	= 500000UL;
-+#endif
- 
- int sched_thermal_decay_shift;
- static int __init setup_sched_thermal_decay_shift(char *str)
-@@ -128,8 +149,12 @@ int __weak arch_asym_cpu_priority(int cpu)
-  *
-  * (default: 5 msec, units: microseconds)
-  */
-+#ifdef CONFIG_ZENIFY
-+unsigned int sysctl_sched_cfs_bandwidth_slice		= 3000UL;
-+#else
- unsigned int sysctl_sched_cfs_bandwidth_slice		= 5000UL;
- #endif
-+#endif
- 
- static inline void update_load_add(struct load_weight *lw, unsigned long inc)
- {
-diff --git a/mm/page-writeback.c b/mm/page-writeback.c
-index 28b3e7a67565..01a1aef2b9b1 100644
---- a/mm/page-writeback.c
-+++ b/mm/page-writeback.c
-@@ -71,7 +71,11 @@ static long ratelimit_pages = 32;
- /*
-  * Start background writeback (via writeback threads) at this percentage
-  */
-+#ifdef CONFIG_ZENIFY
-+int dirty_background_ratio = 20;
-+#else
- int dirty_background_ratio = 10;
-+#endif
- 
- /*
-  * dirty_background_bytes starts at 0 (disabled) so that it is a function of
-@@ -88,7 +92,11 @@ int vm_highmem_is_dirtyable;
- /*
-  * The generator of dirty data starts writeback at this percentage
-  */
-+#ifdef CONFIG_ZENIFY
-+int vm_dirty_ratio = 50;
-+#else
- int vm_dirty_ratio = 20;
-+#endif
- 
- /*
-  * vm_dirty_bytes starts at 0 (disabled) so that it is a function of
--- 
-2.28.0
-
-
-From e92e67143385cf285851e12aa8b7f083dd38dd24 Mon Sep 17 00:00:00 2001
-From: Steven Barrett <damentz@liquorix.net>
-Date: Sun, 16 Jan 2011 18:57:32 -0600
-Subject: [PATCH 08/17] ZEN: Allow TCP YeAH as default congestion control
-
-4.4: In my tests YeAH dramatically slowed down transfers over a WLAN,
-     reducing throughput from ~65Mbps (CUBIC) to ~7MBps (YeAH) over 10
-     seconds (netperf TCP_STREAM) including long stalls.
-
-     Be careful when choosing this. ~heftig
----
- net/ipv4/Kconfig | 4 ++++
- 1 file changed, 4 insertions(+)
-
-diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
-index e64e59b536d3..bfb55ef7ebbe 100644
---- a/net/ipv4/Kconfig
-+++ b/net/ipv4/Kconfig
-@@ -691,6 +691,9 @@ choice
- 	config DEFAULT_VEGAS
- 		bool "Vegas" if TCP_CONG_VEGAS=y
- 
-+	config DEFAULT_YEAH
-+		bool "YeAH" if TCP_CONG_YEAH=y
-+
- 	config DEFAULT_VENO
- 		bool "Veno" if TCP_CONG_VENO=y
- 
-@@ -724,6 +727,7 @@ config DEFAULT_TCP_CONG
- 	default "htcp" if DEFAULT_HTCP
- 	default "hybla" if DEFAULT_HYBLA
- 	default "vegas" if DEFAULT_VEGAS
-+	default "yeah" if DEFAULT_YEAH
- 	default "westwood" if DEFAULT_WESTWOOD
- 	default "veno" if DEFAULT_VENO
- 	default "reno" if DEFAULT_RENO
--- 
-2.28.0
-
-
-From 76dbe7477bfde1b5e8bf29a71b5af7ab2be9b98e Mon Sep 17 00:00:00 2001
-From: Steven Barrett <steven@liquorix.net>
-Date: Wed, 28 Nov 2018 19:01:27 -0600
-Subject: [PATCH 09/17] zen: Use [defer+madvise] as default khugepaged defrag
- strategy
-
-For some reason, the default strategy to respond to THP fault fallbacks
-is still just madvise, meaning stall if the program wants transparent
-hugepages, but don't trigger a background reclaim / compaction if THP
-begins to fail allocations.  This creates a snowball affect where we
-still use the THP code paths, but we almost always fail once a system
-has been active and busy for a while.
-
-The option "defer" was created for interactive systems where THP can
-still improve performance.  If we have to fallback to a regular page due
-to an allocation failure or anything else, we will trigger a background
-reclaim and compaction so future THP attempts succeed and previous
-attempts eventually have their smaller pages combined without stalling
-running applications.
-
-We still want madvise to stall applications that explicitely want THP,
-so defer+madvise _does_ make a ton of sense.  Make it the default for
-interactive systems, especially if the kernel maintainer left
-transparent hugepages on "always".
-
-Reasoning and details in the original patch: https://lwn.net/Articles/711248/
----
- mm/huge_memory.c | 4 ++++
- 1 file changed, 4 insertions(+)
-
-diff --git a/mm/huge_memory.c b/mm/huge_memory.c
-index 74300e337c3c..9277f22c10a7 100644
---- a/mm/huge_memory.c
-+++ b/mm/huge_memory.c
-@@ -53,7 +53,11 @@ unsigned long transparent_hugepage_flags __read_mostly =
- #ifdef CONFIG_TRANSPARENT_HUGEPAGE_MADVISE
- 	(1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)|
- #endif
-+#ifdef CONFIG_ZENIFY
-+	(1<<TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG)|
-+#else
- 	(1<<TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG)|
-+#endif
- 	(1<<TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG)|
- 	(1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);
- 
--- 
-2.28.0
-
-
-From 2b65a1329cb220b43c19c4d0de5833fae9e2b22d Mon Sep 17 00:00:00 2001
-From: Alexandre Frade <admfrade@gmail.com>
-Date: Wed, 24 Oct 2018 16:58:52 -0300
-Subject: [PATCH 10/17] net/sched: allow configuring cake qdisc as default
-
-Signed-off-by: Alexandre Frade <admfrade@gmail.com>
----
- net/sched/Kconfig | 4 ++++
- 1 file changed, 4 insertions(+)
-
-diff --git a/net/sched/Kconfig b/net/sched/Kconfig
-index 84badf00647e..6a922bca9f39 100644
---- a/net/sched/Kconfig
-+++ b/net/sched/Kconfig
-@@ -471,6 +471,9 @@ choice
- 	config DEFAULT_SFQ
- 		bool "Stochastic Fair Queue" if NET_SCH_SFQ
- 
-+	config DEFAULT_CAKE
-+		bool "Common Applications Kept Enhanced" if NET_SCH_CAKE
-+
- 	config DEFAULT_PFIFO_FAST
- 		bool "Priority FIFO Fast"
- endchoice
-@@ -481,6 +484,7 @@ config DEFAULT_NET_SCH
- 	default "fq" if DEFAULT_FQ
- 	default "fq_codel" if DEFAULT_FQ_CODEL
- 	default "sfq" if DEFAULT_SFQ
-+	default "cake" if DEFAULT_CAKE
- 	default "pfifo_fast"
- endif
- 
--- 
-2.28.0
-
-
-From 816ee502759e954304693813bd03d94986b28dba Mon Sep 17 00:00:00 2001
-From: Tk-Glitch <ti3nou@gmail.com>
-Date: Mon, 18 Feb 2019 17:40:57 +0100
-Subject: [PATCH 11/17] mm: Set watermark_scale_factor to 200 (from 10)
-
-Multiple users have reported it's helping reducing/eliminating stuttering
-with DXVK.
----
- mm/page_alloc.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/mm/page_alloc.c b/mm/page_alloc.c
-index 898ff44f2c7b..e72074034793 100644
---- a/mm/page_alloc.c
-+++ b/mm/page_alloc.c
-@@ -330,7 +330,7 @@ int watermark_boost_factor __read_mostly;
- #else
- int watermark_boost_factor __read_mostly = 15000;
- #endif
--int watermark_scale_factor = 10;
-+int watermark_scale_factor = 200;
- 
- static unsigned long nr_kernel_pages __initdata;
- static unsigned long nr_all_pages __initdata;
--- 
-2.28.0
-
-
-From 90240bcd90a568878738e66c0d45bed3e38e347b Mon Sep 17 00:00:00 2001
-From: Tk-Glitch <ti3nou@gmail.com>
-Date: Fri, 19 Apr 2019 12:33:38 +0200
-Subject: [PATCH 12/17] Set vm.max_map_count to 262144 by default
-
-The value is still pretty low, and AMD64-ABI and ELF extended numbering
-supports that, so we should be fine on modern x86 systems.
-
-This fixes crashes in some applications using more than 65535 vmas (also
-affects some windows games running in wine, such as Star Citizen).
----
- include/linux/mm.h | 3 +--
- 1 file changed, 1 insertion(+), 2 deletions(-)
-
-diff --git a/include/linux/mm.h b/include/linux/mm.h
-index bc05c3588aa3..b0cefe94920d 100644
---- a/include/linux/mm.h
-+++ b/include/linux/mm.h
-@@ -190,8 +190,7 @@ static inline void __mm_zero_struct_page(struct page *page)
-  * not a hard limit any more. Although some userspace tools can be surprised by
-  * that.
-  */
--#define MAPCOUNT_ELF_CORE_MARGIN	(5)
--#define DEFAULT_MAX_MAP_COUNT	(USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
-+#define DEFAULT_MAX_MAP_COUNT	(262144)
- 
- extern int sysctl_max_map_count;
- 
--- 
-2.28.0
-
-
-From 3a34034dba5efe91bcec491efe8c66e8087f509b Mon Sep 17 00:00:00 2001
-From: Tk-Glitch <ti3nou@gmail.com>
-Date: Mon, 27 Jul 2020 00:19:18 +0200
-Subject: [PATCH 13/17] mm: bump DEFAULT_MAX_MAP_COUNT
-
-Some games such as Detroit: Become Human tend to be very crash prone with
-lower values.
----
- include/linux/mm.h | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/include/linux/mm.h b/include/linux/mm.h
-index b0cefe94920d..890165099b07 100644
---- a/include/linux/mm.h
-+++ b/include/linux/mm.h
-@@ -190,7 +190,7 @@ static inline void __mm_zero_struct_page(struct page *page)
-  * not a hard limit any more. Although some userspace tools can be surprised by
-  * that.
-  */
--#define DEFAULT_MAX_MAP_COUNT	(262144)
-+#define DEFAULT_MAX_MAP_COUNT	(524288)
- 
- extern int sysctl_max_map_count;
- 
--- 
-2.28.0
-
-
-From 977812938da7c7226415778c340832141d9278b7 Mon Sep 17 00:00:00 2001
-From: Alexandre Frade <admfrade@gmail.com>
-Date: Mon, 25 Nov 2019 15:13:06 -0300
-Subject: [PATCH 14/17] elevator: set default scheduler to bfq for blk-mq
-
-Signed-off-by: Alexandre Frade <admfrade@gmail.com>
----
- block/elevator.c | 6 +++---
- 1 file changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/block/elevator.c b/block/elevator.c
-index 4eab3d70e880..79669aa39d79 100644
---- a/block/elevator.c
-+++ b/block/elevator.c
-@@ -623,15 +623,15 @@ static inline bool elv_support_iosched(struct request_queue *q)
- }
- 
- /*
-- * For single queue devices, default to using mq-deadline. If we have multiple
-- * queues or mq-deadline is not available, default to "none".
-+ * For single queue devices, default to using bfq. If we have multiple
-+ * queues or bfq is not available, default to "none".
-  */
- static struct elevator_type *elevator_get_default(struct request_queue *q)
- {
- 	if (q->nr_hw_queues != 1)
- 		return NULL;
- 
--	return elevator_get(q, "mq-deadline", false);
-+	return elevator_get(q, "bfq", false);
- }
- 
- /*
--- 
-2.28.0
-
-From 3c229f434aca65c4ca61772bc03c3e0370817b92 Mon Sep 17 00:00:00 2001
-From: Alexandre Frade <kernel@xanmod.org>
-Date: Mon, 3 Aug 2020 17:05:04 +0000
-Subject: [PATCH 16/17] mm: set 2 megabytes for address_space-level file
- read-ahead pages size
-
-Signed-off-by: Alexandre Frade <kernel@xanmod.org>
----
- include/linux/pagemap.h | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
-index cf2468da68e9..007dea784451 100644
---- a/include/linux/pagemap.h
-+++ b/include/linux/pagemap.h
-@@ -655,7 +655,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask);
- void delete_from_page_cache_batch(struct address_space *mapping,
- 				  struct pagevec *pvec);
- 
--#define VM_READAHEAD_PAGES	(SZ_128K / PAGE_SIZE)
-+#define VM_READAHEAD_PAGES	(SZ_2M / PAGE_SIZE)
- 
- void page_cache_sync_readahead(struct address_space *, struct file_ra_state *,
- 		struct file *, pgoff_t index, unsigned long req_count);
--- 
-2.28.0
-
-
-From 716f41cf6631f3a85834dcb67b4ce99185b6387f Mon Sep 17 00:00:00 2001
-From: Steven Barrett <steven@liquorix.net>
-Date: Wed, 15 Jan 2020 20:43:56 -0600
-Subject: [PATCH 17/17] ZEN: intel-pstate: Implement "enable" parameter
-
-If intel-pstate is compiled into the kernel, it will preempt the loading
-of acpi-cpufreq so you can take advantage of hardware p-states without
-any friction.
-
-However, intel-pstate is not completely superior to cpufreq's ondemand
-for one reason.  There's no concept of an up_threshold property.
-
-In ondemand, up_threshold essentially reduces the maximum utilization to
-compare against, allowing you to hit max frequencies and turbo boost
-from a much lower core utilization.
-
-With intel-pstate, you have the concept of minimum and maximum
-performance, but no tunable that lets you define, maximum frequency
-means 50% core utilization.  For just this oversight, there's reasons
-you may want ondemand.
-
-Lets support setting "enable" in kernel boot parameters.  This lets
-kernel maintainers include "intel_pstate=disable" statically in the
-static boot parameters, but let users of the kernel override this
-selection.
----
- Documentation/admin-guide/kernel-parameters.txt | 3 +++
- drivers/cpufreq/intel_pstate.c                  | 2 ++
- 2 files changed, 5 insertions(+)
-
-diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
-index fb95fad81c79..3e92fee81e33 100644
---- a/Documentation/admin-guide/kernel-parameters.txt
-+++ b/Documentation/admin-guide/kernel-parameters.txt
-@@ -1857,6 +1857,9 @@
- 			disable
- 			  Do not enable intel_pstate as the default
- 			  scaling driver for the supported processors
-+			enable
-+			  Enable intel_pstate in-case "disable" was passed
-+			  previously in the kernel boot parameters
- 			passive
- 			  Use intel_pstate as a scaling driver, but configure it
- 			  to work with generic cpufreq governors (instead of
-diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
-index 36a469150ff9..aee891c9b78a 100644
---- a/drivers/cpufreq/intel_pstate.c
-+++ b/drivers/cpufreq/intel_pstate.c
-@@ -2845,6 +2845,8 @@ static int __init intel_pstate_setup(char *str)
- 		pr_info("HWP disabled\n");
- 		no_hwp = 1;
- 	}
-+	if (!strcmp(str, "enable"))
-+		no_load = 0;
- 	if (!strcmp(str, "force"))
- 		force_load = 1;
- 	if (!strcmp(str, "hwp_only"))
--- 
-2.28.0
-
diff --git a/linux510-rc-tkg/linux510-tkg-patches/0003-glitched-cfs.patch b/linux510-rc-tkg/linux510-tkg-patches/0003-glitched-cfs.patch
deleted file mode 100644
index 06b7f02..0000000
--- a/linux510-rc-tkg/linux510-tkg-patches/0003-glitched-cfs.patch
+++ /dev/null
@@ -1,72 +0,0 @@
-diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
-index 2a202a846757..1d9c7ed79b11 100644
---- a/kernel/Kconfig.hz
-+++ b/kernel/Kconfig.hz
-@@ -4,7 +4,7 @@
- 
- choice
- 	prompt "Timer frequency"
--	default HZ_250
-+	default HZ_500
- 	help
- 	 Allows the configuration of the timer frequency. It is customary
- 	 to have the timer interrupt run at 1000 Hz but 100 Hz may be more
-@@ -39,6 +39,13 @@ choice
- 	 on SMP and NUMA systems and exactly dividing by both PAL and
- 	 NTSC frame rates for video and multimedia work.
- 
-+	config HZ_500
-+		bool "500 HZ"
-+	help
-+	 500 Hz is a balanced timer frequency. Provides fast interactivity
-+	 on desktops with great smoothness without increasing CPU power
-+	 consumption and sacrificing the battery life on laptops.
-+
- 	config HZ_1000
- 		bool "1000 HZ"
- 	help
-@@ -52,6 +59,7 @@ config HZ
- 	default 100 if HZ_100
- 	default 250 if HZ_250
- 	default 300 if HZ_300
-+	default 500 if HZ_500
- 	default 1000 if HZ_1000
- 
- config SCHED_HRTICK
-
-diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
-index 2a202a846757..1d9c7ed79b11 100644
---- a/kernel/Kconfig.hz
-+++ b/kernel/Kconfig.hz
-@@ -4,7 +4,7 @@
- 
- choice
- 	prompt "Timer frequency"
--	default HZ_500
-+	default HZ_750
- 	help
- 	 Allows the configuration of the timer frequency. It is customary
- 	 to have the timer interrupt run at 1000 Hz but 100 Hz may be more
-@@ -46,6 +46,13 @@ choice
- 	 on desktops with great smoothness without increasing CPU power
- 	 consumption and sacrificing the battery life on laptops.
- 
-+	config HZ_750
-+		bool "750 HZ"
-+	help
-+	 750 Hz is a good timer frequency for desktops. Provides fast
-+	 interactivity with great smoothness without sacrificing too
-+	 much throughput.
-+
- 	config HZ_1000
- 		bool "1000 HZ"
- 	help
-@@ -60,6 +67,7 @@ config HZ
- 	default 250 if HZ_250
- 	default 300 if HZ_300
- 	default 500 if HZ_500
-+	default 750 if HZ_750
- 	default 1000 if HZ_1000
- 
- config SCHED_HRTICK
-
diff --git a/linux510-rc-tkg/linux510-tkg-patches/0004-5.10-ck1.patch b/linux510-rc-tkg/linux510-tkg-patches/0004-5.10-ck1.patch
deleted file mode 100644
index 00e7c4d..0000000
--- a/linux510-rc-tkg/linux510-tkg-patches/0004-5.10-ck1.patch
+++ /dev/null
@@ -1,13369 +0,0 @@
-diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
-index a1068742a6df..d2a8f1c637d2 100644
---- a/Documentation/admin-guide/kernel-parameters.txt
-+++ b/Documentation/admin-guide/kernel-parameters.txt
-@@ -4595,6 +4595,14 @@
- 			Memory area to be used by remote processor image,
- 			managed by CMA.
- 
-+	rqshare=	[X86] Select the MuQSS scheduler runqueue sharing type.
-+			Format: <string>
-+			smt -- Share SMT (hyperthread) sibling runqueues
-+			mc -- Share MC (multicore) sibling runqueues
-+			smp -- Share SMP runqueues
-+			none -- So not share any runqueues
-+			Default value is mc
-+
- 	rw		[KNL] Mount root device read-write on boot
- 
- 	S		[KNL] Run init in single mode
-diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
-index d4b32cc32bb7..9e1e71fc66d0 100644
---- a/Documentation/admin-guide/sysctl/kernel.rst
-+++ b/Documentation/admin-guide/sysctl/kernel.rst
-@@ -436,6 +436,16 @@ this allows system administrators to override the
- ``IA64_THREAD_UAC_NOPRINT`` ``prctl`` and avoid logs being flooded.
- 
- 
-+iso_cpu: (MuQSS CPU scheduler only)
-+===================================
-+
-+This sets the percentage cpu that the unprivileged SCHED_ISO tasks can
-+run effectively at realtime priority, averaged over a rolling five
-+seconds over the -whole- system, meaning all cpus.
-+
-+Set to 70 (percent) by default.
-+
-+
- kexec_load_disabled
- ===================
- 
-@@ -1077,6 +1087,20 @@ ROM/Flash boot loader. Maybe to tell it what to do after
- rebooting. ???
- 
- 
-+rr_interval: (MuQSS CPU scheduler only)
-+=======================================
-+
-+This is the smallest duration that any cpu process scheduling unit
-+will run for. Increasing this value can increase throughput of cpu
-+bound tasks substantially but at the expense of increased latencies
-+overall. Conversely decreasing it will decrease average and maximum
-+latencies but at the expense of throughput. This value is in
-+milliseconds and the default value chosen depends on the number of
-+cpus available at scheduler initialisation with a minimum of 6.
-+
-+Valid values are from 1-1000.
-+
-+
- sched_energy_aware
- ==================
- 
-@@ -1515,3 +1539,13 @@ is 10 seconds.
- 
- The softlockup threshold is (``2 * watchdog_thresh``). Setting this
- tunable to zero will disable lockup detection altogether.
-+
-+
-+yield_type: (MuQSS CPU scheduler only)
-+======================================
-+
-+This determines what type of yield calls to sched_yield will perform.
-+
-+ 0: No yield.
-+ 1: Yield only to better priority/deadline tasks. (default)
-+ 2: Expire timeslice and recalculate deadline.
-diff --git a/Documentation/scheduler/sched-BFS.txt b/Documentation/scheduler/sched-BFS.txt
-new file mode 100644
-index 000000000000..c0282002a079
---- /dev/null
-+++ b/Documentation/scheduler/sched-BFS.txt
-@@ -0,0 +1,351 @@
-+BFS - The Brain Fuck Scheduler by Con Kolivas.
-+
-+Goals.
-+
-+The goal of the Brain Fuck Scheduler, referred to as BFS from here on, is to
-+completely do away with the complex designs of the past for the cpu process
-+scheduler and instead implement one that is very simple in basic design.
-+The main focus of BFS is to achieve excellent desktop interactivity and
-+responsiveness without heuristics and tuning knobs that are difficult to
-+understand, impossible to model and predict the effect of, and when tuned to
-+one workload cause massive detriment to another.
-+
-+
-+Design summary.
-+
-+BFS is best described as a single runqueue, O(n) lookup, earliest effective
-+virtual deadline first design, loosely based on EEVDF (earliest eligible virtual
-+deadline first) and my previous Staircase Deadline scheduler. Each component
-+shall be described in order to understand the significance of, and reasoning for
-+it. The codebase when the first stable version was released was approximately
-+9000 lines less code than the existing mainline linux kernel scheduler (in
-+2.6.31). This does not even take into account the removal of documentation and
-+the cgroups code that is not used.
-+
-+Design reasoning.
-+
-+The single runqueue refers to the queued but not running processes for the
-+entire system, regardless of the number of CPUs. The reason for going back to
-+a single runqueue design is that once multiple runqueues are introduced,
-+per-CPU or otherwise, there will be complex interactions as each runqueue will
-+be responsible for the scheduling latency and fairness of the tasks only on its
-+own runqueue, and to achieve fairness and low latency across multiple CPUs, any
-+advantage in throughput of having CPU local tasks causes other disadvantages.
-+This is due to requiring a very complex balancing system to at best achieve some
-+semblance of fairness across CPUs and can only maintain relatively low latency
-+for tasks bound to the same CPUs, not across them. To increase said fairness
-+and latency across CPUs, the advantage of local runqueue locking, which makes
-+for better scalability, is lost due to having to grab multiple locks.
-+
-+A significant feature of BFS is that all accounting is done purely based on CPU
-+used and nowhere is sleep time used in any way to determine entitlement or
-+interactivity. Interactivity "estimators" that use some kind of sleep/run
-+algorithm are doomed to fail to detect all interactive tasks, and to falsely tag
-+tasks that aren't interactive as being so. The reason for this is that it is
-+close to impossible to determine that when a task is sleeping, whether it is
-+doing it voluntarily, as in a userspace application waiting for input in the
-+form of a mouse click or otherwise, or involuntarily, because it is waiting for
-+another thread, process, I/O, kernel activity or whatever. Thus, such an
-+estimator will introduce corner cases, and more heuristics will be required to
-+cope with those corner cases, introducing more corner cases and failed
-+interactivity detection and so on. Interactivity in BFS is built into the design
-+by virtue of the fact that tasks that are waking up have not used up their quota
-+of CPU time, and have earlier effective deadlines, thereby making it very likely
-+they will preempt any CPU bound task of equivalent nice level. See below for
-+more information on the virtual deadline mechanism. Even if they do not preempt
-+a running task, because the rr interval is guaranteed to have a bound upper
-+limit on how long a task will wait for, it will be scheduled within a timeframe
-+that will not cause visible interface jitter.
-+
-+
-+Design details.
-+
-+Task insertion.
-+
-+BFS inserts tasks into each relevant queue as an O(1) insertion into a double
-+linked list. On insertion, *every* running queue is checked to see if the newly
-+queued task can run on any idle queue, or preempt the lowest running task on the
-+system. This is how the cross-CPU scheduling of BFS achieves significantly lower
-+latency per extra CPU the system has. In this case the lookup is, in the worst
-+case scenario, O(n) where n is the number of CPUs on the system.
-+
-+Data protection.
-+
-+BFS has one single lock protecting the process local data of every task in the
-+global queue. Thus every insertion, removal and modification of task data in the
-+global runqueue needs to grab the global lock. However, once a task is taken by
-+a CPU, the CPU has its own local data copy of the running process' accounting
-+information which only that CPU accesses and modifies (such as during a
-+timer tick) thus allowing the accounting data to be updated lockless. Once a
-+CPU has taken a task to run, it removes it from the global queue. Thus the
-+global queue only ever has, at most,
-+
-+	(number of tasks requesting cpu time) - (number of logical CPUs) + 1
-+
-+tasks in the global queue. This value is relevant for the time taken to look up
-+tasks during scheduling. This will increase if many tasks with CPU affinity set
-+in their policy to limit which CPUs they're allowed to run on if they outnumber
-+the number of CPUs. The +1 is because when rescheduling a task, the CPU's
-+currently running task is put back on the queue. Lookup will be described after
-+the virtual deadline mechanism is explained.
-+
-+Virtual deadline.
-+
-+The key to achieving low latency, scheduling fairness, and "nice level"
-+distribution in BFS is entirely in the virtual deadline mechanism. The one
-+tunable in BFS is the rr_interval, or "round robin interval". This is the
-+maximum time two SCHED_OTHER (or SCHED_NORMAL, the common scheduling policy)
-+tasks of the same nice level will be running for, or looking at it the other
-+way around, the longest duration two tasks of the same nice level will be
-+delayed for. When a task requests cpu time, it is given a quota (time_slice)
-+equal to the rr_interval and a virtual deadline. The virtual deadline is
-+offset from the current time in jiffies by this equation:
-+
-+	jiffies + (prio_ratio * rr_interval)
-+
-+The prio_ratio is determined as a ratio compared to the baseline of nice -20
-+and increases by 10% per nice level. The deadline is a virtual one only in that
-+no guarantee is placed that a task will actually be scheduled by this time, but
-+it is used to compare which task should go next. There are three components to
-+how a task is next chosen. First is time_slice expiration. If a task runs out
-+of its time_slice, it is descheduled, the time_slice is refilled, and the
-+deadline reset to that formula above. Second is sleep, where a task no longer
-+is requesting CPU for whatever reason. The time_slice and deadline are _not_
-+adjusted in this case and are just carried over for when the task is next
-+scheduled. Third is preemption, and that is when a newly waking task is deemed
-+higher priority than a currently running task on any cpu by virtue of the fact
-+that it has an earlier virtual deadline than the currently running task. The
-+earlier deadline is the key to which task is next chosen for the first and
-+second cases. Once a task is descheduled, it is put back on the queue, and an
-+O(n) lookup of all queued-but-not-running tasks is done to determine which has
-+the earliest deadline and that task is chosen to receive CPU next.
-+
-+The CPU proportion of different nice tasks works out to be approximately the
-+
-+	(prio_ratio difference)^2
-+
-+The reason it is squared is that a task's deadline does not change while it is
-+running unless it runs out of time_slice. Thus, even if the time actually
-+passes the deadline of another task that is queued, it will not get CPU time
-+unless the current running task deschedules, and the time "base" (jiffies) is
-+constantly moving.
-+
-+Task lookup.
-+
-+BFS has 103 priority queues. 100 of these are dedicated to the static priority
-+of realtime tasks, and the remaining 3 are, in order of best to worst priority,
-+SCHED_ISO (isochronous), SCHED_NORMAL, and SCHED_IDLEPRIO (idle priority
-+scheduling). When a task of these priorities is queued, a bitmap of running
-+priorities is set showing which of these priorities has tasks waiting for CPU
-+time. When a CPU is made to reschedule, the lookup for the next task to get
-+CPU time is performed in the following way:
-+
-+First the bitmap is checked to see what static priority tasks are queued. If
-+any realtime priorities are found, the corresponding queue is checked and the
-+first task listed there is taken (provided CPU affinity is suitable) and lookup
-+is complete. If the priority corresponds to a SCHED_ISO task, they are also
-+taken in FIFO order (as they behave like SCHED_RR). If the priority corresponds
-+to either SCHED_NORMAL or SCHED_IDLEPRIO, then the lookup becomes O(n). At this
-+stage, every task in the runlist that corresponds to that priority is checked
-+to see which has the earliest set deadline, and (provided it has suitable CPU
-+affinity) it is taken off the runqueue and given the CPU. If a task has an
-+expired deadline, it is taken and the rest of the lookup aborted (as they are
-+chosen in FIFO order).
-+
-+Thus, the lookup is O(n) in the worst case only, where n is as described
-+earlier, as tasks may be chosen before the whole task list is looked over.
-+
-+
-+Scalability.
-+
-+The major limitations of BFS will be that of scalability, as the separate
-+runqueue designs will have less lock contention as the number of CPUs rises.
-+However they do not scale linearly even with separate runqueues as multiple
-+runqueues will need to be locked concurrently on such designs to be able to
-+achieve fair CPU balancing, to try and achieve some sort of nice-level fairness
-+across CPUs, and to achieve low enough latency for tasks on a busy CPU when
-+other CPUs would be more suited. BFS has the advantage that it requires no
-+balancing algorithm whatsoever, as balancing occurs by proxy simply because
-+all CPUs draw off the global runqueue, in priority and deadline order. Despite
-+the fact that scalability is _not_ the prime concern of BFS, it both shows very
-+good scalability to smaller numbers of CPUs and is likely a more scalable design
-+at these numbers of CPUs.
-+
-+It also has some very low overhead scalability features built into the design
-+when it has been deemed their overhead is so marginal that they're worth adding.
-+The first is the local copy of the running process' data to the CPU it's running
-+on to allow that data to be updated lockless where possible. Then there is
-+deference paid to the last CPU a task was running on, by trying that CPU first
-+when looking for an idle CPU to use the next time it's scheduled. Finally there
-+is the notion of cache locality beyond the last running CPU. The sched_domains
-+information is used to determine the relative virtual "cache distance" that
-+other CPUs have from the last CPU a task was running on. CPUs with shared
-+caches, such as SMT siblings, or multicore CPUs with shared caches, are treated
-+as cache local. CPUs without shared caches are treated as not cache local, and
-+CPUs on different NUMA nodes are treated as very distant. This "relative cache
-+distance" is used by modifying the virtual deadline value when doing lookups.
-+Effectively, the deadline is unaltered between "cache local" CPUs, doubled for
-+"cache distant" CPUs, and quadrupled for "very distant" CPUs. The reasoning
-+behind the doubling of deadlines is as follows. The real cost of migrating a
-+task from one CPU to another is entirely dependant on the cache footprint of
-+the task, how cache intensive the task is, how long it's been running on that
-+CPU to take up the bulk of its cache, how big the CPU cache is, how fast and
-+how layered the CPU cache is, how fast a context switch is... and so on. In
-+other words, it's close to random in the real world where we do more than just
-+one sole workload. The only thing we can be sure of is that it's not free. So
-+BFS uses the principle that an idle CPU is a wasted CPU and utilising idle CPUs
-+is more important than cache locality, and cache locality only plays a part
-+after that. Doubling the effective deadline is based on the premise that the
-+"cache local" CPUs will tend to work on the same tasks up to double the number
-+of cache local CPUs, and once the workload is beyond that amount, it is likely
-+that none of the tasks are cache warm anywhere anyway. The quadrupling for NUMA
-+is a value I pulled out of my arse.
-+
-+When choosing an idle CPU for a waking task, the cache locality is determined
-+according to where the task last ran and then idle CPUs are ranked from best
-+to worst to choose the most suitable idle CPU based on cache locality, NUMA
-+node locality and hyperthread sibling business. They are chosen in the
-+following preference (if idle):
-+
-+* Same core, idle or busy cache, idle threads
-+* Other core, same cache, idle or busy cache, idle threads.
-+* Same node, other CPU, idle cache, idle threads.
-+* Same node, other CPU, busy cache, idle threads.
-+* Same core, busy threads.
-+* Other core, same cache, busy threads.
-+* Same node, other CPU, busy threads.
-+* Other node, other CPU, idle cache, idle threads.
-+* Other node, other CPU, busy cache, idle threads.
-+* Other node, other CPU, busy threads.
-+
-+This shows the SMT or "hyperthread" awareness in the design as well which will
-+choose a real idle core first before a logical SMT sibling which already has
-+tasks on the physical CPU.
-+
-+Early benchmarking of BFS suggested scalability dropped off at the 16 CPU mark.
-+However this benchmarking was performed on an earlier design that was far less
-+scalable than the current one so it's hard to know how scalable it is in terms
-+of both CPUs (due to the global runqueue) and heavily loaded machines (due to
-+O(n) lookup) at this stage. Note that in terms of scalability, the number of
-+_logical_ CPUs matters, not the number of _physical_ CPUs. Thus, a dual (2x)
-+quad core (4X) hyperthreaded (2X) machine is effectively a 16X. Newer benchmark
-+results are very promising indeed, without needing to tweak any knobs, features
-+or options. Benchmark contributions are most welcome.
-+
-+
-+Features
-+
-+As the initial prime target audience for BFS was the average desktop user, it
-+was designed to not need tweaking, tuning or have features set to obtain benefit
-+from it. Thus the number of knobs and features has been kept to an absolute
-+minimum and should not require extra user input for the vast majority of cases.
-+There are precisely 2 tunables, and 2 extra scheduling policies. The rr_interval
-+and iso_cpu tunables, and the SCHED_ISO and SCHED_IDLEPRIO policies. In addition
-+to this, BFS also uses sub-tick accounting. What BFS does _not_ now feature is
-+support for CGROUPS. The average user should neither need to know what these
-+are, nor should they need to be using them to have good desktop behaviour.
-+
-+rr_interval
-+
-+There is only one "scheduler" tunable, the round robin interval. This can be
-+accessed in
-+
-+	/proc/sys/kernel/rr_interval
-+
-+The value is in milliseconds, and the default value is set to 6 on a
-+uniprocessor machine, and automatically set to a progressively higher value on
-+multiprocessor machines. The reasoning behind increasing the value on more CPUs
-+is that the effective latency is decreased by virtue of there being more CPUs on
-+BFS (for reasons explained above), and increasing the value allows for less
-+cache contention and more throughput. Valid values are from 1 to 1000
-+Decreasing the value will decrease latencies at the cost of decreasing
-+throughput, while increasing it will improve throughput, but at the cost of
-+worsening latencies. The accuracy of the rr interval is limited by HZ resolution
-+of the kernel configuration. Thus, the worst case latencies are usually slightly
-+higher than this actual value. The default value of 6 is not an arbitrary one.
-+It is based on the fact that humans can detect jitter at approximately 7ms, so
-+aiming for much lower latencies is pointless under most circumstances. It is
-+worth noting this fact when comparing the latency performance of BFS to other
-+schedulers. Worst case latencies being higher than 7ms are far worse than
-+average latencies not being in the microsecond range.
-+
-+Isochronous scheduling.
-+
-+Isochronous scheduling is a unique scheduling policy designed to provide
-+near-real-time performance to unprivileged (ie non-root) users without the
-+ability to starve the machine indefinitely. Isochronous tasks (which means
-+"same time") are set using, for example, the schedtool application like so:
-+
-+	schedtool -I -e amarok
-+
-+This will start the audio application "amarok" as SCHED_ISO. How SCHED_ISO works
-+is that it has a priority level between true realtime tasks and SCHED_NORMAL
-+which would allow them to preempt all normal tasks, in a SCHED_RR fashion (ie,
-+if multiple SCHED_ISO tasks are running, they purely round robin at rr_interval
-+rate). However if ISO tasks run for more than a tunable finite amount of time,
-+they are then demoted back to SCHED_NORMAL scheduling. This finite amount of
-+time is the percentage of _total CPU_ available across the machine, configurable
-+as a percentage in the following "resource handling" tunable (as opposed to a
-+scheduler tunable):
-+
-+	/proc/sys/kernel/iso_cpu
-+
-+and is set to 70% by default. It is calculated over a rolling 5 second average
-+Because it is the total CPU available, it means that on a multi CPU machine, it
-+is possible to have an ISO task running as realtime scheduling indefinitely on
-+just one CPU, as the other CPUs will be available. Setting this to 100 is the
-+equivalent of giving all users SCHED_RR access and setting it to 0 removes the
-+ability to run any pseudo-realtime tasks.
-+
-+A feature of BFS is that it detects when an application tries to obtain a
-+realtime policy (SCHED_RR or SCHED_FIFO) and the caller does not have the
-+appropriate privileges to use those policies. When it detects this, it will
-+give the task SCHED_ISO policy instead. Thus it is transparent to the user.
-+Because some applications constantly set their policy as well as their nice
-+level, there is potential for them to undo the override specified by the user
-+on the command line of setting the policy to SCHED_ISO. To counter this, once
-+a task has been set to SCHED_ISO policy, it needs superuser privileges to set
-+it back to SCHED_NORMAL. This will ensure the task remains ISO and all child
-+processes and threads will also inherit the ISO policy.
-+
-+Idleprio scheduling.
-+
-+Idleprio scheduling is a scheduling policy designed to give out CPU to a task
-+_only_ when the CPU would be otherwise idle. The idea behind this is to allow
-+ultra low priority tasks to be run in the background that have virtually no
-+effect on the foreground tasks. This is ideally suited to distributed computing
-+clients (like setiathome, folding, mprime etc) but can also be used to start
-+a video encode or so on without any slowdown of other tasks. To avoid this
-+policy from grabbing shared resources and holding them indefinitely, if it
-+detects a state where the task is waiting on I/O, the machine is about to
-+suspend to ram and so on, it will transiently schedule them as SCHED_NORMAL. As
-+per the Isochronous task management, once a task has been scheduled as IDLEPRIO,
-+it cannot be put back to SCHED_NORMAL without superuser privileges. Tasks can
-+be set to start as SCHED_IDLEPRIO with the schedtool command like so:
-+
-+	schedtool -D -e ./mprime
-+
-+Subtick accounting.
-+
-+It is surprisingly difficult to get accurate CPU accounting, and in many cases,
-+the accounting is done by simply determining what is happening at the precise
-+moment a timer tick fires off. This becomes increasingly inaccurate as the
-+timer tick frequency (HZ) is lowered. It is possible to create an application
-+which uses almost 100% CPU, yet by being descheduled at the right time, records
-+zero CPU usage. While the main problem with this is that there are possible
-+security implications, it is also difficult to determine how much CPU a task
-+really does use. BFS tries to use the sub-tick accounting from the TSC clock,
-+where possible, to determine real CPU usage. This is not entirely reliable, but
-+is far more likely to produce accurate CPU usage data than the existing designs
-+and will not show tasks as consuming no CPU usage when they actually are. Thus,
-+the amount of CPU reported as being used by BFS will more accurately represent
-+how much CPU the task itself is using (as is shown for example by the 'time'
-+application), so the reported values may be quite different to other schedulers.
-+Values reported as the 'load' are more prone to problems with this design, but
-+per process values are closer to real usage. When comparing throughput of BFS
-+to other designs, it is important to compare the actual completed work in terms
-+of total wall clock time taken and total work done, rather than the reported
-+"cpu usage".
-+
-+
-+Con Kolivas <kernel@kolivas.org> Fri Aug 27 2010
-diff --git a/Documentation/scheduler/sched-MuQSS.txt b/Documentation/scheduler/sched-MuQSS.txt
-new file mode 100644
-index 000000000000..ae28b85c9995
---- /dev/null
-+++ b/Documentation/scheduler/sched-MuQSS.txt
-@@ -0,0 +1,373 @@
-+MuQSS - The Multiple Queue Skiplist Scheduler by Con Kolivas.
-+
-+MuQSS is a per-cpu runqueue variant of the original BFS scheduler with
-+one 8 level skiplist per runqueue, and fine grained locking for much more
-+scalability.
-+
-+
-+Goals.
-+
-+The goal of the Multiple Queue Skiplist Scheduler, referred to as MuQSS from
-+here on (pronounced mux) is to completely do away with the complex designs of
-+the past for the cpu process scheduler and instead implement one that is very
-+simple in basic design. The main focus of MuQSS is to achieve excellent desktop
-+interactivity and responsiveness without heuristics and tuning knobs that are
-+difficult to understand, impossible to model and predict the effect of, and when
-+tuned to one workload cause massive detriment to another, while still being
-+scalable to many CPUs and processes.
-+
-+
-+Design summary.
-+
-+MuQSS is best described as per-cpu multiple runqueue, O(log n) insertion, O(1)
-+lookup, earliest effective virtual deadline first tickless design, loosely based
-+on EEVDF (earliest eligible virtual deadline first) and my previous Staircase
-+Deadline scheduler, and evolved from the single runqueue O(n) BFS scheduler.
-+Each component shall be described in order to understand the significance of,
-+and reasoning for it.
-+
-+
-+Design reasoning.
-+
-+In BFS, the use of a single runqueue across all CPUs meant that each CPU would
-+need to scan the entire runqueue looking for the process with the earliest
-+deadline and schedule that next, regardless of which CPU it originally came
-+from. This made BFS deterministic with respect to latency and provided
-+guaranteed latencies dependent on number of processes and CPUs. The single
-+runqueue, however, meant that all CPUs would compete for the single lock
-+protecting it, which would lead to increasing lock contention as the number of
-+CPUs rose and appeared to limit scalability of common workloads beyond 16
-+logical CPUs. Additionally, the O(n) lookup of the runqueue list obviously
-+increased overhead proportionate to the number of queued proecesses and led to
-+cache thrashing while iterating over the linked list.
-+
-+MuQSS is an evolution of BFS, designed to maintain the same scheduling
-+decision mechanism and be virtually deterministic without relying on the
-+constrained design of the single runqueue by splitting out the single runqueue
-+to be per-CPU and use skiplists instead of linked lists.
-+
-+The original reason for going back to a single runqueue design for BFS was that
-+once multiple runqueues are introduced, per-CPU or otherwise, there will be
-+complex interactions as each runqueue will be responsible for the scheduling
-+latency and fairness of the tasks only on its own runqueue, and to achieve
-+fairness and low latency across multiple CPUs, any advantage in throughput of
-+having CPU local tasks causes other disadvantages. This is due to requiring a
-+very complex balancing system to at best achieve some semblance of fairness
-+across CPUs and can only maintain relatively low latency for tasks bound to the
-+same CPUs, not across them. To increase said fairness and latency across CPUs,
-+the advantage of local runqueue locking, which makes for better scalability, is
-+lost due to having to grab multiple locks.
-+
-+MuQSS works around the problems inherent in multiple runqueue designs by
-+making its skip lists priority ordered and through novel use of lockless
-+examination of each other runqueue it can decide if it should take the earliest
-+deadline task from another runqueue for latency reasons, or for CPU balancing
-+reasons. It still does not have a balancing system, choosing to allow the
-+next task scheduling decision and task wakeup CPU choice to allow balancing to
-+happen by virtue of its choices.
-+
-+As a further evolution of the design, MuQSS normally configures sharing of
-+runqueues in a logical fashion for when CPU resources are shared for improved
-+latency and throughput. By default it shares runqueues and locks between
-+multicore siblings. Optionally it can be configured to run with sharing of
-+SMT siblings only, all SMP packages or no sharing at all. Additionally it can
-+be selected at boot time.
-+
-+
-+Design details.
-+
-+Custom skip list implementation:
-+
-+To avoid the overhead of building up and tearing down skip list structures,
-+the variant used by MuQSS has a number of optimisations making it specific for
-+its use case in the scheduler. It uses static arrays of 8 'levels' instead of
-+building up and tearing down structures dynamically. This makes each runqueue
-+only scale O(log N) up to 64k tasks. However as there is one runqueue per CPU
-+it means that it scales O(log N) up to 64k x number of logical CPUs which is
-+far beyond the realistic task limits each CPU could handle. By being 8 levels
-+it also makes the array exactly one cacheline in size. Additionally, each
-+skip list node is bidirectional making insertion and removal amortised O(1),
-+being O(k) where k is 1-8. Uniquely, we are only ever interested in the very
-+first entry in each list at all times with MuQSS, so there is never a need to
-+do a search and thus look up is always O(1). In interactive mode, the queues
-+will be searched beyond their first entry if the first task is not suitable
-+for affinity or SMT nice reasons.
-+
-+Task insertion:
-+
-+MuQSS inserts tasks into a per CPU runqueue as an O(log N) insertion into
-+a custom skip list as described above (based on the original design by William
-+Pugh). Insertion is ordered in such a way that there is never a need to do a
-+search by ordering tasks according to static priority primarily, and then
-+virtual deadline at the time of insertion.
-+
-+Niffies:
-+
-+Niffies are a monotonic forward moving timer not unlike the "jiffies" but are
-+of nanosecond resolution. Niffies are calculated per-runqueue from the high
-+resolution TSC timers, and in order to maintain fairness are synchronised
-+between CPUs whenever both runqueues are locked concurrently.
-+
-+Virtual deadline:
-+
-+The key to achieving low latency, scheduling fairness, and "nice level"
-+distribution in MuQSS is entirely in the virtual deadline mechanism. The one
-+tunable in MuQSS is the rr_interval, or "round robin interval". This is the
-+maximum time two SCHED_OTHER (or SCHED_NORMAL, the common scheduling policy)
-+tasks of the same nice level will be running for, or looking at it the other
-+way around, the longest duration two tasks of the same nice level will be
-+delayed for. When a task requests cpu time, it is given a quota (time_slice)
-+equal to the rr_interval and a virtual deadline. The virtual deadline is
-+offset from the current time in niffies by this equation:
-+
-+	niffies + (prio_ratio * rr_interval)
-+
-+The prio_ratio is determined as a ratio compared to the baseline of nice -20
-+and increases by 10% per nice level. The deadline is a virtual one only in that
-+no guarantee is placed that a task will actually be scheduled by this time, but
-+it is used to compare which task should go next. There are three components to
-+how a task is next chosen. First is time_slice expiration. If a task runs out
-+of its time_slice, it is descheduled, the time_slice is refilled, and the
-+deadline reset to that formula above. Second is sleep, where a task no longer
-+is requesting CPU for whatever reason. The time_slice and deadline are _not_
-+adjusted in this case and are just carried over for when the task is next
-+scheduled. Third is preemption, and that is when a newly waking task is deemed
-+higher priority than a currently running task on any cpu by virtue of the fact
-+that it has an earlier virtual deadline than the currently running task. The
-+earlier deadline is the key to which task is next chosen for the first and
-+second cases.
-+
-+The CPU proportion of different nice tasks works out to be approximately the
-+
-+	(prio_ratio difference)^2
-+
-+The reason it is squared is that a task's deadline does not change while it is
-+running unless it runs out of time_slice. Thus, even if the time actually
-+passes the deadline of another task that is queued, it will not get CPU time
-+unless the current running task deschedules, and the time "base" (niffies) is
-+constantly moving.
-+
-+Task lookup:
-+
-+As tasks are already pre-ordered according to anticipated scheduling order in
-+the skip lists, lookup for the next suitable task per-runqueue is always a
-+matter of simply selecting the first task in the 0th level skip list entry.
-+In order to maintain optimal latency and fairness across CPUs, MuQSS does a
-+novel examination of every other runqueue in cache locality order, choosing the
-+best task across all runqueues. This provides near-determinism of how long any
-+task across the entire system may wait before receiving CPU time. The other
-+runqueues are first examine lockless and then trylocked to minimise the
-+potential lock contention if they are likely to have a suitable better task.
-+Each other runqueue lock is only held for as long as it takes to examine the
-+entry for suitability. In "interactive" mode, the default setting, MuQSS will
-+look for the best deadline task across all CPUs, while in !interactive mode,
-+it will only select a better deadline task from another CPU if it is more
-+heavily laden than the current one.
-+
-+Lookup is therefore O(k) where k is number of CPUs.
-+
-+
-+Latency.
-+
-+Through the use of virtual deadlines to govern the scheduling order of normal
-+tasks, queue-to-activation latency per runqueue is guaranteed to be bound by
-+the rr_interval tunable which is set to 6ms by default. This means that the
-+longest a CPU bound task will wait for more CPU is proportional to the number
-+of running tasks and in the common case of 0-2 running tasks per CPU, will be
-+under the 7ms threshold for human perception of jitter. Additionally, as newly
-+woken tasks will have an early deadline from their previous runtime, the very
-+tasks that are usually latency sensitive will have the shortest interval for
-+activation, usually preempting any existing CPU bound tasks.
-+
-+Tickless expiry:
-+
-+A feature of MuQSS is that it is not tied to the resolution of the chosen tick
-+rate in Hz, instead depending entirely on the high resolution timers where
-+possible for sub-millisecond accuracy on timeouts regarless of the underlying
-+tick rate. This allows MuQSS to be run with the low overhead of low Hz rates
-+such as 100 by default, benefiting from the improved throughput and lower
-+power usage it provides. Another advantage of this approach is that in
-+combination with the Full No HZ option, which disables ticks on running task
-+CPUs instead of just idle CPUs, the tick can be disabled at all times
-+regardless of how many tasks are running instead of being limited to just one
-+running task. Note that this option is NOT recommended for regular desktop
-+users.
-+
-+
-+Scalability and balancing.
-+
-+Unlike traditional approaches where balancing is a combination of CPU selection
-+at task wakeup and intermittent balancing based on a vast array of rules set
-+according to architecture, busyness calculations and special case management,
-+MuQSS indirectly balances on the fly at task wakeup and next task selection.
-+During initialisation, MuQSS creates a cache coherency ordered list of CPUs for
-+each logical CPU and uses this to aid task/CPU selection when CPUs are busy.
-+Additionally it selects any idle CPUs, if they are available, at any time over
-+busy CPUs according to the following preference:
-+
-+ * Same thread, idle or busy cache, idle or busy threads
-+ * Other core, same cache, idle or busy cache, idle threads.
-+ * Same node, other CPU, idle cache, idle threads.
-+ * Same node, other CPU, busy cache, idle threads.
-+ * Other core, same cache, busy threads.
-+ * Same node, other CPU, busy threads.
-+ * Other node, other CPU, idle cache, idle threads.
-+ * Other node, other CPU, busy cache, idle threads.
-+ * Other node, other CPU, busy threads.
-+
-+Mux is therefore SMT, MC and Numa aware without the need for extra
-+intermittent balancing to maintain CPUs busy and make the most of cache
-+coherency.
-+
-+
-+Features
-+
-+As the initial prime target audience for MuQSS was the average desktop user, it
-+was designed to not need tweaking, tuning or have features set to obtain benefit
-+from it. Thus the number of knobs and features has been kept to an absolute
-+minimum and should not require extra user input for the vast majority of cases.
-+There are 3 optional tunables, and 2 extra scheduling policies. The rr_interval,
-+interactive, and iso_cpu tunables, and the SCHED_ISO and SCHED_IDLEPRIO
-+policies. In addition to this, MuQSS also uses sub-tick accounting. What MuQSS
-+does _not_ now feature is support for CGROUPS. The average user should neither
-+need to know what these are, nor should they need to be using them to have good
-+desktop behaviour. However since some applications refuse to work without
-+cgroups, one can enable them with MuQSS as a stub and the filesystem will be
-+created which will allow the applications to work.
-+
-+rr_interval:
-+
-+	/proc/sys/kernel/rr_interval
-+
-+The value is in milliseconds, and the default value is set to 6. Valid values
-+are from 1 to 1000 Decreasing the value will decrease latencies at the cost of
-+decreasing throughput, while increasing it will improve throughput, but at the
-+cost of worsening latencies. It is based on the fact that humans can detect
-+jitter at approximately 7ms, so aiming for much lower latencies is pointless
-+under most circumstances. It is worth noting this fact when comparing the
-+latency performance of MuQSS to other schedulers. Worst case latencies being
-+higher than 7ms are far worse than average latencies not being in the
-+microsecond range.
-+
-+interactive:
-+
-+	/proc/sys/kernel/interactive
-+
-+The value is a simple boolean of 1 for on and 0 for off and is set to on by
-+default. Disabling this will disable the near-determinism of MuQSS when
-+selecting the next task by not examining all CPUs for the earliest deadline
-+task, or which CPU to wake to, instead prioritising CPU balancing for improved
-+throughput. Latency will still be bound by rr_interval, but on a per-CPU basis
-+instead of across the whole system.
-+
-+Runqueue sharing.
-+
-+By default MuQSS chooses to share runqueue resources (specifically the skip
-+list and locking) between multicore siblings. It is configurable at build time
-+to select between None, SMT, MC and SMP, corresponding to no sharing, sharing
-+only between simultaneous mulithreading siblings, multicore siblings, or
-+symmetric multiprocessing physical packages. Additionally it can be se at
-+bootime with the use of the rqshare parameter. The reason for configurability
-+is that some architectures have CPUs with many multicore siblings (>= 16)
-+where it may be detrimental to throughput to share runqueues and another
-+sharing option may be desirable. Additionally, more sharing than usual can
-+improve latency on a system-wide level at the expense of throughput if desired.
-+
-+The options are:
-+none, smt, mc, smp
-+
-+eg:
-+	rqshare=mc
-+
-+Isochronous scheduling:
-+
-+Isochronous scheduling is a unique scheduling policy designed to provide
-+near-real-time performance to unprivileged (ie non-root) users without the
-+ability to starve the machine indefinitely. Isochronous tasks (which means
-+"same time") are set using, for example, the schedtool application like so:
-+
-+	schedtool -I -e amarok
-+
-+This will start the audio application "amarok" as SCHED_ISO. How SCHED_ISO works
-+is that it has a priority level between true realtime tasks and SCHED_NORMAL
-+which would allow them to preempt all normal tasks, in a SCHED_RR fashion (ie,
-+if multiple SCHED_ISO tasks are running, they purely round robin at rr_interval
-+rate). However if ISO tasks run for more than a tunable finite amount of time,
-+they are then demoted back to SCHED_NORMAL scheduling. This finite amount of
-+time is the percentage of CPU available per CPU, configurable as a percentage in
-+the following "resource handling" tunable (as opposed to a scheduler tunable):
-+
-+iso_cpu:
-+
-+	/proc/sys/kernel/iso_cpu
-+
-+and is set to 70% by default. It is calculated over a rolling 5 second average
-+Because it is the total CPU available, it means that on a multi CPU machine, it
-+is possible to have an ISO task running as realtime scheduling indefinitely on
-+just one CPU, as the other CPUs will be available. Setting this to 100 is the
-+equivalent of giving all users SCHED_RR access and setting it to 0 removes the
-+ability to run any pseudo-realtime tasks.
-+
-+A feature of MuQSS is that it detects when an application tries to obtain a
-+realtime policy (SCHED_RR or SCHED_FIFO) and the caller does not have the
-+appropriate privileges to use those policies. When it detects this, it will
-+give the task SCHED_ISO policy instead. Thus it is transparent to the user.
-+
-+
-+Idleprio scheduling:
-+
-+Idleprio scheduling is a scheduling policy designed to give out CPU to a task
-+_only_ when the CPU would be otherwise idle. The idea behind this is to allow
-+ultra low priority tasks to be run in the background that have virtually no
-+effect on the foreground tasks. This is ideally suited to distributed computing
-+clients (like setiathome, folding, mprime etc) but can also be used to start a
-+video encode or so on without any slowdown of other tasks. To avoid this policy
-+from grabbing shared resources and holding them indefinitely, if it detects a
-+state where the task is waiting on I/O, the machine is about to suspend to ram
-+and so on, it will transiently schedule them as SCHED_NORMAL. Once a task has
-+been scheduled as IDLEPRIO, it cannot be put back to SCHED_NORMAL without
-+superuser privileges since it is effectively a lower scheduling policy. Tasks
-+can be set to start as SCHED_IDLEPRIO with the schedtool command like so:
-+
-+schedtool -D -e ./mprime
-+
-+Subtick accounting:
-+
-+It is surprisingly difficult to get accurate CPU accounting, and in many cases,
-+the accounting is done by simply determining what is happening at the precise
-+moment a timer tick fires off. This becomes increasingly inaccurate as the timer
-+tick frequency (HZ) is lowered. It is possible to create an application which
-+uses almost 100% CPU, yet by being descheduled at the right time, records zero
-+CPU usage. While the main problem with this is that there are possible security
-+implications, it is also difficult to determine how much CPU a task really does
-+use. Mux uses sub-tick accounting from the TSC clock to determine real CPU
-+usage. Thus, the amount of CPU reported as being used by MuQSS will more
-+accurately represent how much CPU the task itself is using (as is shown for
-+example by the 'time' application), so the reported values may be quite
-+different to other schedulers. When comparing throughput of MuQSS to other
-+designs, it is important to compare the actual completed work in terms of total
-+wall clock time taken and total work done, rather than the reported "cpu usage".
-+
-+Symmetric MultiThreading (SMT) aware nice:
-+
-+SMT, a.k.a. hyperthreading, is a very common feature on modern CPUs. While the
-+logical CPU count rises by adding thread units to each CPU core, allowing more
-+than one task to be run simultaneously on the same core, the disadvantage of it
-+is that the CPU power is shared between the tasks, not summating to the power
-+of two CPUs. The practical upshot of this is that two tasks running on
-+separate threads of the same core run significantly slower than if they had one
-+core each to run on. While smart CPU selection allows each task to have a core
-+to itself whenever available (as is done on MuQSS), it cannot offset the
-+slowdown that occurs when the cores are all loaded and only a thread is left.
-+Most of the time this is harmless as the CPU is effectively overloaded at this
-+point and the extra thread is of benefit. However when running a niced task in
-+the presence of an un-niced task (say nice 19 v nice 0), the nice task gets
-+precisely the same amount of CPU power as the unniced one. MuQSS has an
-+optional configuration feature known as SMT-NICE which selectively idles the
-+secondary niced thread for a period proportional to the nice difference,
-+allowing CPU distribution according to nice level to be maintained, at the
-+expense of a small amount of extra overhead. If this is configured in on a
-+machine without SMT threads, the overhead is minimal.
-+
-+
-+Con Kolivas <kernel@kolivas.org> Sat, 29th October 2016
-diff --git a/Makefile b/Makefile
-index 51540b291738..ab8c480660a6 100644
---- a/Makefile
-+++ b/Makefile
-@@ -18,6 +18,10 @@ $(if $(filter __%, $(MAKECMDGOALS)), \
- PHONY := __all
- __all:
- 
-+CKVERSION = -ck1
-+CKNAME = MuQSS Powered
-+EXTRAVERSION := $(EXTRAVERSION)$(CKVERSION)
-+
- # We are using a recursive build, so we need to do a little thinking
- # to get the ordering right.
- #
-diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
-index 9c5f06e8eb9b..0d1069eee09c 100644
---- a/arch/alpha/Kconfig
-+++ b/arch/alpha/Kconfig
-@@ -666,6 +666,8 @@ config HZ
- 	default 1200 if HZ_1200
- 	default 1024
- 
-+source "kernel/Kconfig.MuQSS"
-+
- config SRM_ENV
- 	tristate "SRM environment through procfs"
- 	depends on PROC_FS
-diff --git a/arch/arc/configs/tb10x_defconfig b/arch/arc/configs/tb10x_defconfig
-index a12656ec0072..b46b6ddc7636 100644
---- a/arch/arc/configs/tb10x_defconfig
-+++ b/arch/arc/configs/tb10x_defconfig
-@@ -29,7 +29,7 @@ CONFIG_ARC_PLAT_TB10X=y
- CONFIG_ARC_CACHE_LINE_SHIFT=5
- CONFIG_HZ=250
- CONFIG_ARC_BUILTIN_DTB_NAME="abilis_tb100_dvk"
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- # CONFIG_COMPACTION is not set
- CONFIG_NET=y
- CONFIG_PACKET=y
-diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
-index e00d94b16658..efabbd09475a 100644
---- a/arch/arm/Kconfig
-+++ b/arch/arm/Kconfig
-@@ -1236,6 +1236,8 @@ config SCHED_SMT
- 	  MultiThreading at a cost of slightly increased overhead in some
- 	  places. If unsure say N here.
- 
-+source "kernel/Kconfig.MuQSS"
-+
- config HAVE_ARM_SCU
- 	bool
- 	help
-diff --git a/arch/arm/configs/bcm2835_defconfig b/arch/arm/configs/bcm2835_defconfig
-index 44ff9cd88d81..9c639c998015 100644
---- a/arch/arm/configs/bcm2835_defconfig
-+++ b/arch/arm/configs/bcm2835_defconfig
-@@ -29,7 +29,7 @@ CONFIG_MODULE_UNLOAD=y
- CONFIG_ARCH_MULTI_V6=y
- CONFIG_ARCH_BCM=y
- CONFIG_ARCH_BCM2835=y
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- CONFIG_AEABI=y
- CONFIG_KSM=y
- CONFIG_CLEANCACHE=y
-diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig
-index 82d3ffb18e70..bb05667427a6 100644
---- a/arch/arm/configs/imx_v6_v7_defconfig
-+++ b/arch/arm/configs/imx_v6_v7_defconfig
-@@ -45,6 +45,7 @@ CONFIG_PCI_MSI=y
- CONFIG_PCI_IMX6=y
- CONFIG_SMP=y
- CONFIG_ARM_PSCI=y
-+CONFIG_PREEMPT=y
- CONFIG_HIGHMEM=y
- CONFIG_FORCE_MAX_ZONEORDER=14
- CONFIG_CMDLINE="noinitrd console=ttymxc0,115200"
-diff --git a/arch/arm/configs/mps2_defconfig b/arch/arm/configs/mps2_defconfig
-index 1d923dbb9928..9c1931f1fafd 100644
---- a/arch/arm/configs/mps2_defconfig
-+++ b/arch/arm/configs/mps2_defconfig
-@@ -18,7 +18,7 @@ CONFIG_ARCH_MPS2=y
- CONFIG_SET_MEM_PARAM=y
- CONFIG_DRAM_BASE=0x21000000
- CONFIG_DRAM_SIZE=0x1000000
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- # CONFIG_ATAGS is not set
- CONFIG_ZBOOT_ROM_TEXT=0x0
- CONFIG_ZBOOT_ROM_BSS=0x0
-diff --git a/arch/arm/configs/mxs_defconfig b/arch/arm/configs/mxs_defconfig
-index a9c6f32a9b1c..870866aaa39d 100644
---- a/arch/arm/configs/mxs_defconfig
-+++ b/arch/arm/configs/mxs_defconfig
-@@ -1,7 +1,7 @@
- CONFIG_SYSVIPC=y
- CONFIG_NO_HZ=y
- CONFIG_HIGH_RES_TIMERS=y
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT_VOLUNTARY=n
- CONFIG_TASKSTATS=y
- CONFIG_TASK_DELAY_ACCT=y
- CONFIG_TASK_XACCT=y
-@@ -25,6 +25,13 @@ CONFIG_MODULE_UNLOAD=y
- CONFIG_MODULE_FORCE_UNLOAD=y
- CONFIG_MODVERSIONS=y
- CONFIG_BLK_DEV_INTEGRITY=y
-+# CONFIG_IOSCHED_DEADLINE is not set
-+# CONFIG_IOSCHED_CFQ is not set
-+# CONFIG_ARCH_MULTI_V7 is not set
-+CONFIG_ARCH_MXS=y
-+# CONFIG_ARM_THUMB is not set
-+CONFIG_PREEMPT=y
-+CONFIG_AEABI=y
- CONFIG_NET=y
- CONFIG_PACKET=y
- CONFIG_UNIX=y
-diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
-index 6d232837cbee..052cae73d674 100644
---- a/arch/arm64/Kconfig
-+++ b/arch/arm64/Kconfig
-@@ -945,6 +945,8 @@ config SCHED_SMT
- 	  MultiThreading at a cost of slightly increased overhead in some
- 	  places. If unsure say N here.
- 
-+source "kernel/Kconfig.MuQSS"
-+
- config NR_CPUS
- 	int "Maximum number of CPUs (2-4096)"
- 	range 2 4096
-diff --git a/arch/mips/configs/fuloong2e_defconfig b/arch/mips/configs/fuloong2e_defconfig
-index 023b4e644b1c..013e630b96a6 100644
---- a/arch/mips/configs/fuloong2e_defconfig
-+++ b/arch/mips/configs/fuloong2e_defconfig
-@@ -4,7 +4,7 @@ CONFIG_SYSVIPC=y
- CONFIG_POSIX_MQUEUE=y
- CONFIG_NO_HZ=y
- CONFIG_HIGH_RES_TIMERS=y
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- CONFIG_BSD_PROCESS_ACCT=y
- CONFIG_IKCONFIG=y
- CONFIG_IKCONFIG_PROC=y
-diff --git a/arch/mips/configs/gpr_defconfig b/arch/mips/configs/gpr_defconfig
-index 9085f4d6c698..fb23111d45f6 100644
---- a/arch/mips/configs/gpr_defconfig
-+++ b/arch/mips/configs/gpr_defconfig
-@@ -1,8 +1,8 @@
-+CONFIG_PREEMPT=y
- # CONFIG_LOCALVERSION_AUTO is not set
- CONFIG_SYSVIPC=y
- CONFIG_POSIX_MQUEUE=y
- CONFIG_HIGH_RES_TIMERS=y
--CONFIG_PREEMPT_VOLUNTARY=y
- CONFIG_BSD_PROCESS_ACCT=y
- CONFIG_BSD_PROCESS_ACCT_V3=y
- CONFIG_RELAY=y
-diff --git a/arch/mips/configs/ip22_defconfig b/arch/mips/configs/ip22_defconfig
-index 21a1168ae301..529a1b1007cf 100644
---- a/arch/mips/configs/ip22_defconfig
-+++ b/arch/mips/configs/ip22_defconfig
-@@ -1,7 +1,7 @@
- CONFIG_SYSVIPC=y
- CONFIG_NO_HZ=y
- CONFIG_HIGH_RES_TIMERS=y
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- CONFIG_IKCONFIG=y
- CONFIG_IKCONFIG_PROC=y
- CONFIG_LOG_BUF_SHIFT=14
-diff --git a/arch/mips/configs/ip28_defconfig b/arch/mips/configs/ip28_defconfig
-index 0921ef38e9fb..6da05cef46f8 100644
---- a/arch/mips/configs/ip28_defconfig
-+++ b/arch/mips/configs/ip28_defconfig
-@@ -1,5 +1,5 @@
- CONFIG_SYSVIPC=y
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- CONFIG_IKCONFIG=y
- CONFIG_IKCONFIG_PROC=y
- CONFIG_LOG_BUF_SHIFT=14
-diff --git a/arch/mips/configs/jazz_defconfig b/arch/mips/configs/jazz_defconfig
-index 8c223035921f..a3bf87450343 100644
---- a/arch/mips/configs/jazz_defconfig
-+++ b/arch/mips/configs/jazz_defconfig
-@@ -1,8 +1,8 @@
-+CONFIG_PREEMPT=y
- CONFIG_SYSVIPC=y
- CONFIG_POSIX_MQUEUE=y
- CONFIG_NO_HZ=y
- CONFIG_HIGH_RES_TIMERS=y
--CONFIG_PREEMPT_VOLUNTARY=y
- CONFIG_BSD_PROCESS_ACCT=y
- CONFIG_LOG_BUF_SHIFT=14
- CONFIG_RELAY=y
-diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig
-index 914af125a7fa..76a64290373f 100644
---- a/arch/mips/configs/mtx1_defconfig
-+++ b/arch/mips/configs/mtx1_defconfig
-@@ -1,8 +1,8 @@
-+CONFIG_PREEMPT=y
- # CONFIG_LOCALVERSION_AUTO is not set
- CONFIG_SYSVIPC=y
- CONFIG_POSIX_MQUEUE=y
- CONFIG_AUDIT=y
--CONFIG_PREEMPT_VOLUNTARY=y
- CONFIG_BSD_PROCESS_ACCT=y
- CONFIG_BSD_PROCESS_ACCT_V3=y
- CONFIG_RELAY=y
-diff --git a/arch/mips/configs/nlm_xlr_defconfig b/arch/mips/configs/nlm_xlr_defconfig
-index 4ecb157e56d4..ea7309283b01 100644
---- a/arch/mips/configs/nlm_xlr_defconfig
-+++ b/arch/mips/configs/nlm_xlr_defconfig
-@@ -1,10 +1,10 @@
-+CONFIG_PREEMPT=y
- # CONFIG_LOCALVERSION_AUTO is not set
- CONFIG_SYSVIPC=y
- CONFIG_POSIX_MQUEUE=y
- CONFIG_AUDIT=y
- CONFIG_NO_HZ=y
- CONFIG_HIGH_RES_TIMERS=y
--CONFIG_PREEMPT_VOLUNTARY=y
- CONFIG_BSD_PROCESS_ACCT=y
- CONFIG_BSD_PROCESS_ACCT_V3=y
- CONFIG_TASKSTATS=y
-diff --git a/arch/mips/configs/pic32mzda_defconfig b/arch/mips/configs/pic32mzda_defconfig
-index 63fe2da1b37f..7f08ee237345 100644
---- a/arch/mips/configs/pic32mzda_defconfig
-+++ b/arch/mips/configs/pic32mzda_defconfig
-@@ -1,7 +1,7 @@
-+CONFIG_PREEMPT=y
- CONFIG_SYSVIPC=y
- CONFIG_NO_HZ=y
- CONFIG_HIGH_RES_TIMERS=y
--CONFIG_PREEMPT_VOLUNTARY=y
- CONFIG_IKCONFIG=y
- CONFIG_IKCONFIG_PROC=y
- CONFIG_LOG_BUF_SHIFT=14
-diff --git a/arch/mips/configs/pistachio_defconfig b/arch/mips/configs/pistachio_defconfig
-index b9adf15ebbec..0025b56dc300 100644
---- a/arch/mips/configs/pistachio_defconfig
-+++ b/arch/mips/configs/pistachio_defconfig
-@@ -1,9 +1,9 @@
-+CONFIG_PREEMPT=y
- # CONFIG_LOCALVERSION_AUTO is not set
- CONFIG_DEFAULT_HOSTNAME="localhost"
- CONFIG_SYSVIPC=y
- CONFIG_NO_HZ=y
- CONFIG_HIGH_RES_TIMERS=y
--CONFIG_PREEMPT_VOLUNTARY=y
- CONFIG_IKCONFIG=m
- CONFIG_IKCONFIG_PROC=y
- CONFIG_LOG_BUF_SHIFT=18
-diff --git a/arch/mips/configs/rm200_defconfig b/arch/mips/configs/rm200_defconfig
-index 30d7c3db884e..9e68acfa0d0e 100644
---- a/arch/mips/configs/rm200_defconfig
-+++ b/arch/mips/configs/rm200_defconfig
-@@ -1,6 +1,6 @@
-+CONFIG_PREEMPT=y
- CONFIG_SYSVIPC=y
- CONFIG_POSIX_MQUEUE=y
--CONFIG_PREEMPT_VOLUNTARY=y
- CONFIG_BSD_PROCESS_ACCT=y
- CONFIG_IKCONFIG=y
- CONFIG_IKCONFIG_PROC=y
-diff --git a/arch/parisc/configs/712_defconfig b/arch/parisc/configs/712_defconfig
-new file mode 100644
-index 000000000000..578524f80cc4
---- /dev/null
-+++ b/arch/parisc/configs/712_defconfig
-@@ -0,0 +1,181 @@
-+# CONFIG_LOCALVERSION_AUTO is not set
-+CONFIG_SYSVIPC=y
-+CONFIG_POSIX_MQUEUE=y
-+CONFIG_IKCONFIG=y
-+CONFIG_IKCONFIG_PROC=y
-+CONFIG_LOG_BUF_SHIFT=16
-+CONFIG_BLK_DEV_INITRD=y
-+CONFIG_KALLSYMS_ALL=y
-+CONFIG_SLAB=y
-+CONFIG_PROFILING=y
-+CONFIG_OPROFILE=m
-+CONFIG_MODULES=y
-+CONFIG_MODULE_UNLOAD=y
-+CONFIG_MODULE_FORCE_UNLOAD=y
-+CONFIG_PA7100LC=y
-+CONFIG_PREEMPT=y
-+CONFIG_GSC_LASI=y
-+# CONFIG_PDC_CHASSIS is not set
-+CONFIG_BINFMT_MISC=m
-+CONFIG_NET=y
-+CONFIG_PACKET=y
-+CONFIG_UNIX=y
-+CONFIG_XFRM_USER=m
-+CONFIG_NET_KEY=m
-+CONFIG_INET=y
-+CONFIG_IP_MULTICAST=y
-+CONFIG_IP_PNP=y
-+CONFIG_IP_PNP_DHCP=y
-+CONFIG_IP_PNP_BOOTP=y
-+CONFIG_INET_AH=m
-+CONFIG_INET_ESP=m
-+CONFIG_INET_DIAG=m
-+# CONFIG_IPV6 is not set
-+CONFIG_NETFILTER=y
-+CONFIG_LLC2=m
-+CONFIG_NET_PKTGEN=m
-+CONFIG_DEVTMPFS=y
-+CONFIG_DEVTMPFS_MOUNT=y
-+# CONFIG_STANDALONE is not set
-+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
-+CONFIG_PARPORT=y
-+CONFIG_PARPORT_PC=m
-+CONFIG_BLK_DEV_LOOP=y
-+CONFIG_BLK_DEV_CRYPTOLOOP=y
-+CONFIG_BLK_DEV_RAM=y
-+CONFIG_BLK_DEV_RAM_SIZE=6144
-+CONFIG_ATA_OVER_ETH=m
-+CONFIG_SCSI=y
-+CONFIG_BLK_DEV_SD=y
-+CONFIG_CHR_DEV_ST=y
-+CONFIG_BLK_DEV_SR=y
-+CONFIG_CHR_DEV_SG=y
-+CONFIG_SCSI_ISCSI_ATTRS=m
-+CONFIG_SCSI_LASI700=y
-+CONFIG_SCSI_DEBUG=m
-+CONFIG_MD=y
-+CONFIG_BLK_DEV_MD=m
-+CONFIG_MD_LINEAR=m
-+CONFIG_MD_RAID0=m
-+CONFIG_MD_RAID1=m
-+CONFIG_NETDEVICES=y
-+CONFIG_BONDING=m
-+CONFIG_DUMMY=m
-+CONFIG_TUN=m
-+CONFIG_LASI_82596=y
-+CONFIG_PPP=m
-+CONFIG_PPP_BSDCOMP=m
-+CONFIG_PPP_DEFLATE=m
-+CONFIG_PPP_MPPE=m
-+CONFIG_PPPOE=m
-+CONFIG_PPP_ASYNC=m
-+CONFIG_PPP_SYNC_TTY=m
-+# CONFIG_KEYBOARD_HIL_OLD is not set
-+CONFIG_MOUSE_SERIAL=m
-+CONFIG_LEGACY_PTY_COUNT=64
-+CONFIG_SERIAL_8250=y
-+CONFIG_SERIAL_8250_CONSOLE=y
-+CONFIG_SERIAL_8250_NR_UARTS=17
-+CONFIG_SERIAL_8250_EXTENDED=y
-+CONFIG_SERIAL_8250_MANY_PORTS=y
-+CONFIG_SERIAL_8250_SHARE_IRQ=y
-+# CONFIG_SERIAL_MUX is not set
-+CONFIG_PDC_CONSOLE=y
-+CONFIG_PRINTER=m
-+CONFIG_PPDEV=m
-+# CONFIG_HW_RANDOM is not set
-+CONFIG_RAW_DRIVER=y
-+# CONFIG_HWMON is not set
-+CONFIG_FB=y
-+CONFIG_FB_MODE_HELPERS=y
-+CONFIG_FB_TILEBLITTING=y
-+CONFIG_DUMMY_CONSOLE_COLUMNS=128
-+CONFIG_DUMMY_CONSOLE_ROWS=48
-+CONFIG_FRAMEBUFFER_CONSOLE=y
-+CONFIG_LOGO=y
-+# CONFIG_LOGO_LINUX_MONO is not set
-+# CONFIG_LOGO_LINUX_VGA16 is not set
-+# CONFIG_LOGO_LINUX_CLUT224 is not set
-+CONFIG_SOUND=y
-+CONFIG_SND=y
-+CONFIG_SND_SEQUENCER=y
-+CONFIG_SND_HARMONY=y
-+CONFIG_EXT2_FS=y
-+CONFIG_EXT3_FS=y
-+CONFIG_JFS_FS=m
-+CONFIG_XFS_FS=m
-+CONFIG_AUTOFS4_FS=y
-+CONFIG_ISO9660_FS=y
-+CONFIG_JOLIET=y
-+CONFIG_UDF_FS=m
-+CONFIG_MSDOS_FS=m
-+CONFIG_VFAT_FS=m
-+CONFIG_PROC_KCORE=y
-+CONFIG_TMPFS=y
-+CONFIG_UFS_FS=m
-+CONFIG_NFS_FS=y
-+CONFIG_NFS_V4=y
-+CONFIG_ROOT_NFS=y
-+CONFIG_NFSD=m
-+CONFIG_NFSD_V4=y
-+CONFIG_CIFS=m
-+CONFIG_NLS_CODEPAGE_437=m
-+CONFIG_NLS_CODEPAGE_737=m
-+CONFIG_NLS_CODEPAGE_775=m
-+CONFIG_NLS_CODEPAGE_850=m
-+CONFIG_NLS_CODEPAGE_852=m
-+CONFIG_NLS_CODEPAGE_855=m
-+CONFIG_NLS_CODEPAGE_857=m
-+CONFIG_NLS_CODEPAGE_860=m
-+CONFIG_NLS_CODEPAGE_861=m
-+CONFIG_NLS_CODEPAGE_862=m
-+CONFIG_NLS_CODEPAGE_863=m
-+CONFIG_NLS_CODEPAGE_864=m
-+CONFIG_NLS_CODEPAGE_865=m
-+CONFIG_NLS_CODEPAGE_866=m
-+CONFIG_NLS_CODEPAGE_869=m
-+CONFIG_NLS_CODEPAGE_936=m
-+CONFIG_NLS_CODEPAGE_950=m
-+CONFIG_NLS_CODEPAGE_932=m
-+CONFIG_NLS_CODEPAGE_949=m
-+CONFIG_NLS_CODEPAGE_874=m
-+CONFIG_NLS_ISO8859_8=m
-+CONFIG_NLS_CODEPAGE_1250=m
-+CONFIG_NLS_CODEPAGE_1251=m
-+CONFIG_NLS_ASCII=m
-+CONFIG_NLS_ISO8859_1=m
-+CONFIG_NLS_ISO8859_2=m
-+CONFIG_NLS_ISO8859_3=m
-+CONFIG_NLS_ISO8859_4=m
-+CONFIG_NLS_ISO8859_5=m
-+CONFIG_NLS_ISO8859_6=m
-+CONFIG_NLS_ISO8859_7=m
-+CONFIG_NLS_ISO8859_9=m
-+CONFIG_NLS_ISO8859_13=m
-+CONFIG_NLS_ISO8859_14=m
-+CONFIG_NLS_ISO8859_15=m
-+CONFIG_NLS_KOI8_R=m
-+CONFIG_NLS_KOI8_U=m
-+CONFIG_NLS_UTF8=m
-+CONFIG_DEBUG_FS=y
-+CONFIG_MAGIC_SYSRQ=y
-+CONFIG_DEBUG_KERNEL=y
-+CONFIG_DEBUG_MUTEXES=y
-+CONFIG_CRYPTO_TEST=m
-+CONFIG_CRYPTO_HMAC=y
-+CONFIG_CRYPTO_MICHAEL_MIC=m
-+CONFIG_CRYPTO_SHA512=m
-+CONFIG_CRYPTO_TGR192=m
-+CONFIG_CRYPTO_WP512=m
-+CONFIG_CRYPTO_ANUBIS=m
-+CONFIG_CRYPTO_BLOWFISH=m
-+CONFIG_CRYPTO_CAST6=m
-+CONFIG_CRYPTO_KHAZAD=m
-+CONFIG_CRYPTO_SERPENT=m
-+CONFIG_CRYPTO_TEA=m
-+CONFIG_CRYPTO_TWOFISH=m
-+CONFIG_CRYPTO_DEFLATE=m
-+# CONFIG_CRYPTO_HW is not set
-+CONFIG_FONTS=y
-+CONFIG_FONT_8x8=y
-+CONFIG_FONT_8x16=y
-diff --git a/arch/parisc/configs/c3000_defconfig b/arch/parisc/configs/c3000_defconfig
-new file mode 100644
-index 000000000000..d1bdfad94048
---- /dev/null
-+++ b/arch/parisc/configs/c3000_defconfig
-@@ -0,0 +1,151 @@
-+# CONFIG_LOCALVERSION_AUTO is not set
-+CONFIG_SYSVIPC=y
-+CONFIG_IKCONFIG=y
-+CONFIG_IKCONFIG_PROC=y
-+CONFIG_LOG_BUF_SHIFT=16
-+CONFIG_BLK_DEV_INITRD=y
-+CONFIG_EXPERT=y
-+CONFIG_KALLSYMS_ALL=y
-+CONFIG_SLAB=y
-+CONFIG_PROFILING=y
-+CONFIG_OPROFILE=m
-+CONFIG_MODULES=y
-+CONFIG_MODULE_UNLOAD=y
-+CONFIG_MODULE_FORCE_UNLOAD=y
-+CONFIG_PA8X00=y
-+CONFIG_PREEMPT=y
-+# CONFIG_GSC is not set
-+CONFIG_PCI=y
-+CONFIG_PCI_LBA=y
-+# CONFIG_PDC_CHASSIS is not set
-+CONFIG_NET=y
-+CONFIG_PACKET=y
-+CONFIG_UNIX=y
-+CONFIG_XFRM_USER=m
-+CONFIG_NET_KEY=m
-+CONFIG_INET=y
-+CONFIG_IP_MULTICAST=y
-+CONFIG_IP_PNP=y
-+CONFIG_IP_PNP_BOOTP=y
-+# CONFIG_INET_DIAG is not set
-+CONFIG_INET6_IPCOMP=m
-+CONFIG_IPV6_TUNNEL=m
-+CONFIG_NETFILTER=y
-+CONFIG_NET_PKTGEN=m
-+CONFIG_DEVTMPFS=y
-+CONFIG_DEVTMPFS_MOUNT=y
-+# CONFIG_STANDALONE is not set
-+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
-+CONFIG_BLK_DEV_UMEM=m
-+CONFIG_BLK_DEV_LOOP=y
-+CONFIG_BLK_DEV_CRYPTOLOOP=m
-+CONFIG_IDE=y
-+CONFIG_BLK_DEV_IDECD=y
-+CONFIG_BLK_DEV_NS87415=y
-+CONFIG_SCSI=y
-+CONFIG_BLK_DEV_SD=y
-+CONFIG_CHR_DEV_ST=y
-+CONFIG_BLK_DEV_SR=y
-+CONFIG_CHR_DEV_SG=y
-+CONFIG_SCSI_ISCSI_ATTRS=m
-+CONFIG_SCSI_SYM53C8XX_2=y
-+CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0
-+CONFIG_SCSI_DEBUG=m
-+CONFIG_MD=y
-+CONFIG_BLK_DEV_MD=y
-+CONFIG_MD_LINEAR=y
-+CONFIG_MD_RAID0=y
-+CONFIG_MD_RAID1=y
-+CONFIG_BLK_DEV_DM=m
-+CONFIG_DM_CRYPT=m
-+CONFIG_DM_SNAPSHOT=m
-+CONFIG_DM_MIRROR=m
-+CONFIG_DM_ZERO=m
-+CONFIG_DM_MULTIPATH=m
-+CONFIG_FUSION=y
-+CONFIG_FUSION_SPI=m
-+CONFIG_FUSION_CTL=m
-+CONFIG_NETDEVICES=y
-+CONFIG_BONDING=m
-+CONFIG_DUMMY=m
-+CONFIG_TUN=m
-+CONFIG_ACENIC=m
-+CONFIG_TIGON3=m
-+CONFIG_NET_TULIP=y
-+CONFIG_DE2104X=m
-+CONFIG_TULIP=y
-+CONFIG_TULIP_MMIO=y
-+CONFIG_E100=m
-+CONFIG_E1000=m
-+CONFIG_PPP=m
-+CONFIG_PPP_BSDCOMP=m
-+CONFIG_PPP_DEFLATE=m
-+CONFIG_PPPOE=m
-+CONFIG_PPP_ASYNC=m
-+CONFIG_PPP_SYNC_TTY=m
-+# CONFIG_KEYBOARD_ATKBD is not set
-+# CONFIG_MOUSE_PS2 is not set
-+CONFIG_SERIO=m
-+CONFIG_SERIO_LIBPS2=m
-+CONFIG_SERIAL_8250=y
-+CONFIG_SERIAL_8250_CONSOLE=y
-+CONFIG_SERIAL_8250_NR_UARTS=13
-+CONFIG_SERIAL_8250_EXTENDED=y
-+CONFIG_SERIAL_8250_MANY_PORTS=y
-+CONFIG_SERIAL_8250_SHARE_IRQ=y
-+# CONFIG_HW_RANDOM is not set
-+CONFIG_RAW_DRIVER=y
-+# CONFIG_HWMON is not set
-+CONFIG_FB=y
-+CONFIG_FRAMEBUFFER_CONSOLE=y
-+CONFIG_LOGO=y
-+# CONFIG_LOGO_LINUX_MONO is not set
-+# CONFIG_LOGO_LINUX_VGA16 is not set
-+# CONFIG_LOGO_LINUX_CLUT224 is not set
-+CONFIG_SOUND=y
-+CONFIG_SND=y
-+CONFIG_SND_SEQUENCER=y
-+CONFIG_SND_AD1889=y
-+CONFIG_USB_HIDDEV=y
-+CONFIG_USB=y
-+CONFIG_USB_OHCI_HCD=y
-+CONFIG_USB_PRINTER=m
-+CONFIG_USB_STORAGE=m
-+CONFIG_USB_STORAGE_USBAT=m
-+CONFIG_USB_STORAGE_SDDR09=m
-+CONFIG_USB_STORAGE_SDDR55=m
-+CONFIG_USB_STORAGE_JUMPSHOT=m
-+CONFIG_USB_MDC800=m
-+CONFIG_USB_MICROTEK=m
-+CONFIG_USB_LEGOTOWER=m
-+CONFIG_EXT2_FS=y
-+CONFIG_EXT3_FS=y
-+CONFIG_XFS_FS=m
-+CONFIG_AUTOFS4_FS=y
-+CONFIG_ISO9660_FS=y
-+CONFIG_JOLIET=y
-+CONFIG_MSDOS_FS=m
-+CONFIG_VFAT_FS=m
-+CONFIG_PROC_KCORE=y
-+CONFIG_TMPFS=y
-+CONFIG_NFS_FS=y
-+CONFIG_ROOT_NFS=y
-+CONFIG_NFSD=y
-+CONFIG_NFSD_V3=y
-+CONFIG_NLS_CODEPAGE_437=m
-+CONFIG_NLS_CODEPAGE_850=m
-+CONFIG_NLS_ASCII=m
-+CONFIG_NLS_ISO8859_1=m
-+CONFIG_NLS_ISO8859_15=m
-+CONFIG_NLS_UTF8=m
-+CONFIG_DEBUG_FS=y
-+CONFIG_HEADERS_INSTALL=y
-+CONFIG_HEADERS_CHECK=y
-+CONFIG_MAGIC_SYSRQ=y
-+CONFIG_DEBUG_MUTEXES=y
-+# CONFIG_DEBUG_BUGVERBOSE is not set
-+CONFIG_CRYPTO_TEST=m
-+CONFIG_CRYPTO_MD5=m
-+CONFIG_CRYPTO_BLOWFISH=m
-+CONFIG_CRYPTO_DES=m
-+# CONFIG_CRYPTO_HW is not set
-diff --git a/arch/parisc/configs/defconfig b/arch/parisc/configs/defconfig
-new file mode 100644
-index 000000000000..0d976614934c
---- /dev/null
-+++ b/arch/parisc/configs/defconfig
-@@ -0,0 +1,206 @@
-+# CONFIG_LOCALVERSION_AUTO is not set
-+CONFIG_SYSVIPC=y
-+CONFIG_POSIX_MQUEUE=y
-+CONFIG_IKCONFIG=y
-+CONFIG_IKCONFIG_PROC=y
-+CONFIG_LOG_BUF_SHIFT=16
-+CONFIG_BLK_DEV_INITRD=y
-+CONFIG_KALLSYMS_ALL=y
-+CONFIG_SLAB=y
-+CONFIG_PROFILING=y
-+CONFIG_OPROFILE=m
-+CONFIG_MODULES=y
-+CONFIG_MODULE_UNLOAD=y
-+CONFIG_MODULE_FORCE_UNLOAD=y
-+# CONFIG_BLK_DEV_BSG is not set
-+CONFIG_PA7100LC=y
-+CONFIG_PREEMPT=y
-+CONFIG_IOMMU_CCIO=y
-+CONFIG_GSC_LASI=y
-+CONFIG_GSC_WAX=y
-+CONFIG_EISA=y
-+CONFIG_PCI=y
-+CONFIG_GSC_DINO=y
-+CONFIG_PCI_LBA=y
-+CONFIG_PCCARD=y
-+CONFIG_YENTA=y
-+CONFIG_PD6729=y
-+CONFIG_I82092=y
-+CONFIG_BINFMT_MISC=m
-+CONFIG_NET=y
-+CONFIG_PACKET=y
-+CONFIG_UNIX=y
-+CONFIG_XFRM_USER=m
-+CONFIG_NET_KEY=m
-+CONFIG_INET=y
-+CONFIG_IP_MULTICAST=y
-+CONFIG_IP_PNP=y
-+CONFIG_IP_PNP_DHCP=y
-+CONFIG_IP_PNP_BOOTP=y
-+CONFIG_INET_AH=m
-+CONFIG_INET_ESP=m
-+CONFIG_INET_DIAG=m
-+CONFIG_INET6_AH=y
-+CONFIG_INET6_ESP=y
-+CONFIG_INET6_IPCOMP=y
-+CONFIG_LLC2=m
-+CONFIG_DEVTMPFS=y
-+CONFIG_DEVTMPFS_MOUNT=y
-+# CONFIG_STANDALONE is not set
-+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
-+CONFIG_PARPORT=y
-+CONFIG_PARPORT_PC=m
-+CONFIG_PARPORT_PC_PCMCIA=m
-+CONFIG_PARPORT_1284=y
-+CONFIG_BLK_DEV_LOOP=y
-+CONFIG_BLK_DEV_CRYPTOLOOP=y
-+CONFIG_BLK_DEV_RAM=y
-+CONFIG_BLK_DEV_RAM_SIZE=6144
-+CONFIG_IDE=y
-+CONFIG_BLK_DEV_IDECS=y
-+CONFIG_BLK_DEV_IDECD=y
-+CONFIG_BLK_DEV_GENERIC=y
-+CONFIG_BLK_DEV_NS87415=y
-+CONFIG_SCSI=y
-+CONFIG_BLK_DEV_SD=y
-+CONFIG_CHR_DEV_ST=y
-+CONFIG_BLK_DEV_SR=y
-+CONFIG_CHR_DEV_SG=y
-+CONFIG_SCSI_LASI700=y
-+CONFIG_SCSI_SYM53C8XX_2=y
-+CONFIG_SCSI_ZALON=y
-+CONFIG_MD=y
-+CONFIG_BLK_DEV_MD=y
-+CONFIG_MD_LINEAR=y
-+CONFIG_MD_RAID0=y
-+CONFIG_MD_RAID1=y
-+CONFIG_MD_RAID10=y
-+CONFIG_BLK_DEV_DM=y
-+CONFIG_NETDEVICES=y
-+CONFIG_BONDING=m
-+CONFIG_DUMMY=m
-+CONFIG_TUN=m
-+CONFIG_ACENIC=y
-+CONFIG_TIGON3=y
-+CONFIG_NET_TULIP=y
-+CONFIG_TULIP=y
-+CONFIG_LASI_82596=y
-+CONFIG_PPP=m
-+CONFIG_PPP_BSDCOMP=m
-+CONFIG_PPP_DEFLATE=m
-+CONFIG_PPPOE=m
-+CONFIG_PPP_ASYNC=m
-+CONFIG_PPP_SYNC_TTY=m
-+# CONFIG_KEYBOARD_HIL_OLD is not set
-+CONFIG_MOUSE_SERIAL=y
-+CONFIG_LEGACY_PTY_COUNT=64
-+CONFIG_SERIAL_8250=y
-+CONFIG_SERIAL_8250_CONSOLE=y
-+CONFIG_SERIAL_8250_CS=y
-+CONFIG_SERIAL_8250_NR_UARTS=17
-+CONFIG_SERIAL_8250_EXTENDED=y
-+CONFIG_SERIAL_8250_MANY_PORTS=y
-+CONFIG_SERIAL_8250_SHARE_IRQ=y
-+CONFIG_PRINTER=m
-+CONFIG_PPDEV=m
-+# CONFIG_HW_RANDOM is not set
-+# CONFIG_HWMON is not set
-+CONFIG_FB=y
-+CONFIG_FB_MODE_HELPERS=y
-+CONFIG_FB_TILEBLITTING=y
-+CONFIG_DUMMY_CONSOLE_COLUMNS=128
-+CONFIG_DUMMY_CONSOLE_ROWS=48
-+CONFIG_FRAMEBUFFER_CONSOLE=y
-+CONFIG_LOGO=y
-+# CONFIG_LOGO_LINUX_MONO is not set
-+# CONFIG_LOGO_LINUX_VGA16 is not set
-+# CONFIG_LOGO_LINUX_CLUT224 is not set
-+CONFIG_SOUND=y
-+CONFIG_SND=y
-+CONFIG_SND_DYNAMIC_MINORS=y
-+CONFIG_SND_SEQUENCER=y
-+CONFIG_SND_AD1889=y
-+CONFIG_SND_HARMONY=y
-+CONFIG_HID_GYRATION=y
-+CONFIG_HID_NTRIG=y
-+CONFIG_HID_PANTHERLORD=y
-+CONFIG_HID_PETALYNX=y
-+CONFIG_HID_SAMSUNG=y
-+CONFIG_HID_SUNPLUS=y
-+CONFIG_HID_TOPSEED=y
-+CONFIG_USB=y
-+CONFIG_USB_MON=y
-+CONFIG_USB_OHCI_HCD=y
-+CONFIG_USB_UHCI_HCD=y
-+CONFIG_EXT2_FS=y
-+CONFIG_EXT3_FS=y
-+CONFIG_ISO9660_FS=y
-+CONFIG_JOLIET=y
-+CONFIG_VFAT_FS=y
-+CONFIG_PROC_KCORE=y
-+CONFIG_TMPFS=y
-+CONFIG_NFS_FS=y
-+CONFIG_ROOT_NFS=y
-+CONFIG_NFSD=y
-+CONFIG_NFSD_V4=y
-+CONFIG_CIFS=m
-+CONFIG_NLS_CODEPAGE_437=y
-+CONFIG_NLS_CODEPAGE_737=m
-+CONFIG_NLS_CODEPAGE_775=m
-+CONFIG_NLS_CODEPAGE_850=m
-+CONFIG_NLS_CODEPAGE_852=m
-+CONFIG_NLS_CODEPAGE_855=m
-+CONFIG_NLS_CODEPAGE_857=m
-+CONFIG_NLS_CODEPAGE_860=m
-+CONFIG_NLS_CODEPAGE_861=m
-+CONFIG_NLS_CODEPAGE_862=m
-+CONFIG_NLS_CODEPAGE_863=m
-+CONFIG_NLS_CODEPAGE_864=m
-+CONFIG_NLS_CODEPAGE_865=m
-+CONFIG_NLS_CODEPAGE_866=m
-+CONFIG_NLS_CODEPAGE_869=m
-+CONFIG_NLS_CODEPAGE_936=m
-+CONFIG_NLS_CODEPAGE_950=m
-+CONFIG_NLS_CODEPAGE_932=m
-+CONFIG_NLS_CODEPAGE_949=m
-+CONFIG_NLS_CODEPAGE_874=m
-+CONFIG_NLS_ISO8859_8=m
-+CONFIG_NLS_CODEPAGE_1250=y
-+CONFIG_NLS_CODEPAGE_1251=m
-+CONFIG_NLS_ASCII=m
-+CONFIG_NLS_ISO8859_1=y
-+CONFIG_NLS_ISO8859_2=m
-+CONFIG_NLS_ISO8859_3=m
-+CONFIG_NLS_ISO8859_4=m
-+CONFIG_NLS_ISO8859_5=m
-+CONFIG_NLS_ISO8859_6=m
-+CONFIG_NLS_ISO8859_7=m
-+CONFIG_NLS_ISO8859_9=m
-+CONFIG_NLS_ISO8859_13=m
-+CONFIG_NLS_ISO8859_14=m
-+CONFIG_NLS_ISO8859_15=m
-+CONFIG_NLS_KOI8_R=m
-+CONFIG_NLS_KOI8_U=m
-+CONFIG_NLS_UTF8=y
-+CONFIG_DEBUG_FS=y
-+CONFIG_HEADERS_INSTALL=y
-+CONFIG_HEADERS_CHECK=y
-+CONFIG_MAGIC_SYSRQ=y
-+CONFIG_DEBUG_KERNEL=y
-+CONFIG_DEBUG_MUTEXES=y
-+CONFIG_KEYS=y
-+CONFIG_CRYPTO_TEST=m
-+CONFIG_CRYPTO_MICHAEL_MIC=m
-+CONFIG_CRYPTO_SHA512=m
-+CONFIG_CRYPTO_TGR192=m
-+CONFIG_CRYPTO_WP512=m
-+CONFIG_CRYPTO_ANUBIS=m
-+CONFIG_CRYPTO_BLOWFISH=m
-+CONFIG_CRYPTO_CAST6=m
-+CONFIG_CRYPTO_KHAZAD=m
-+CONFIG_CRYPTO_SERPENT=m
-+CONFIG_CRYPTO_TEA=m
-+CONFIG_CRYPTO_TWOFISH=m
-+# CONFIG_CRYPTO_HW is not set
-+CONFIG_LIBCRC32C=m
-+CONFIG_FONTS=y
-diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
-index 787e829b6f25..22914bbb4caa 100644
---- a/arch/powerpc/Kconfig
-+++ b/arch/powerpc/Kconfig
-@@ -882,6 +882,8 @@ config SCHED_SMT
- 	  when dealing with POWER5 cpus at a cost of slightly increased
- 	  overhead in some places. If unsure say N here.
- 
-+source "kernel/Kconfig.MuQSS"
-+
- config PPC_DENORMALISATION
- 	bool "PowerPC denormalisation exception handling"
- 	depends on PPC_BOOK3S_64
-diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
-index 66e9a0fd64ff..c8531232efb7 100644
---- a/arch/powerpc/configs/ppc6xx_defconfig
-+++ b/arch/powerpc/configs/ppc6xx_defconfig
-@@ -73,7 +73,7 @@ CONFIG_QE_GPIO=y
- CONFIG_MCU_MPC8349EMITX=y
- CONFIG_HIGHMEM=y
- CONFIG_HZ_1000=y
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- CONFIG_BINFMT_MISC=y
- CONFIG_HIBERNATION=y
- CONFIG_PM_DEBUG=y
-diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
-index f18d5067cd0f..fe489fc01c73 100644
---- a/arch/powerpc/platforms/cell/spufs/sched.c
-+++ b/arch/powerpc/platforms/cell/spufs/sched.c
-@@ -51,11 +51,6 @@ static struct task_struct *spusched_task;
- static struct timer_list spusched_timer;
- static struct timer_list spuloadavg_timer;
- 
--/*
-- * Priority of a normal, non-rt, non-niced'd process (aka nice level 0).
-- */
--#define NORMAL_PRIO		120
--
- /*
-  * Frequency of the spu scheduler tick.  By default we do one SPU scheduler
-  * tick for every 10 CPU scheduler ticks.
-diff --git a/arch/sh/configs/se7712_defconfig b/arch/sh/configs/se7712_defconfig
-index ee6d28ae08de..827e4693c5b2 100644
---- a/arch/sh/configs/se7712_defconfig
-+++ b/arch/sh/configs/se7712_defconfig
-@@ -21,7 +21,7 @@ CONFIG_FLATMEM_MANUAL=y
- CONFIG_SH_SOLUTION_ENGINE=y
- CONFIG_SH_PCLK_FREQ=66666666
- CONFIG_HEARTBEAT=y
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- CONFIG_CMDLINE_OVERWRITE=y
- CONFIG_CMDLINE="console=ttySC0,115200 root=/dev/sda1"
- CONFIG_NET=y
-diff --git a/arch/sh/configs/se7721_defconfig b/arch/sh/configs/se7721_defconfig
-index bad921bc10f8..e8f42bc0d370 100644
---- a/arch/sh/configs/se7721_defconfig
-+++ b/arch/sh/configs/se7721_defconfig
-@@ -21,7 +21,7 @@ CONFIG_FLATMEM_MANUAL=y
- CONFIG_SH_7721_SOLUTION_ENGINE=y
- CONFIG_SH_PCLK_FREQ=33333333
- CONFIG_HEARTBEAT=y
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- CONFIG_CMDLINE_OVERWRITE=y
- CONFIG_CMDLINE="console=ttySC0,115200 root=/dev/sda2"
- CONFIG_NET=y
-diff --git a/arch/sh/configs/titan_defconfig b/arch/sh/configs/titan_defconfig
-index ba887f1351be..4434e93b70bc 100644
---- a/arch/sh/configs/titan_defconfig
-+++ b/arch/sh/configs/titan_defconfig
-@@ -19,7 +19,7 @@ CONFIG_SH_TITAN=y
- CONFIG_SH_PCLK_FREQ=30000000
- CONFIG_SH_DMA=y
- CONFIG_SH_DMA_API=y
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- CONFIG_CMDLINE_OVERWRITE=y
- CONFIG_CMDLINE="console=ttySC1,38400N81 root=/dev/nfs ip=:::::eth1:autoconf rw"
- CONFIG_PCI=y
-diff --git a/arch/sparc/configs/sparc64_defconfig b/arch/sparc/configs/sparc64_defconfig
-index bde4d21a8ac8..c054ec82d91b 100644
---- a/arch/sparc/configs/sparc64_defconfig
-+++ b/arch/sparc/configs/sparc64_defconfig
-@@ -22,7 +22,7 @@ CONFIG_NO_HZ=y
- CONFIG_HIGH_RES_TIMERS=y
- CONFIG_NUMA=y
- CONFIG_DEFAULT_MMAP_MIN_ADDR=8192
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- CONFIG_SUN_LDOMS=y
- CONFIG_PCI=y
- CONFIG_PCI_MSI=y
-diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
-index 7101ac64bb20..6f56ad1894d1 100644
---- a/arch/x86/Kconfig
-+++ b/arch/x86/Kconfig
-@@ -1005,6 +1005,22 @@ config NR_CPUS
- config SCHED_SMT
- 	def_bool y if SMP
- 
-+config SMT_NICE
-+	bool "SMT (Hyperthreading) aware nice priority and policy support"
-+	depends on SCHED_MUQSS && SCHED_SMT
-+	default y
-+	help
-+	  Enabling Hyperthreading on Intel CPUs decreases the effectiveness
-+	  of the use of 'nice' levels and different scheduling policies
-+	  (e.g. realtime) due to sharing of CPU power between hyperthreads.
-+	  SMT nice support makes each logical CPU aware of what is running on
-+	  its hyperthread siblings, maintaining appropriate distribution of
-+	  CPU according to nice levels and scheduling policies at the expense
-+	  of slightly increased overhead.
-+
-+	  If unsure say Y here.
-+
-+
- config SCHED_MC
- 	def_bool y
- 	prompt "Multi-core scheduler support"
-@@ -1035,6 +1051,8 @@ config SCHED_MC_PRIO
- 
- 	  If unsure say Y here.
- 
-+source "kernel/Kconfig.MuQSS"
-+
- config UP_LATE_INIT
- 	def_bool y
- 	depends on !SMP && X86_LOCAL_APIC
-@@ -1419,7 +1437,7 @@ config HIGHMEM64G
- endchoice
- 
- choice
--	prompt "Memory split" if EXPERT
-+	prompt "Memory split"
- 	default VMSPLIT_3G
- 	depends on X86_32
- 	help
-@@ -1439,17 +1457,17 @@ choice
- 	  option alone!
- 
- 	config VMSPLIT_3G
--		bool "3G/1G user/kernel split"
-+		bool "Default 896MB lowmem (3G/1G user/kernel split)"
- 	config VMSPLIT_3G_OPT
- 		depends on !X86_PAE
--		bool "3G/1G user/kernel split (for full 1G low memory)"
-+		bool "1GB lowmem (3G/1G user/kernel split)"
- 	config VMSPLIT_2G
--		bool "2G/2G user/kernel split"
-+		bool "2GB lowmem (2G/2G user/kernel split)"
- 	config VMSPLIT_2G_OPT
- 		depends on !X86_PAE
--		bool "2G/2G user/kernel split (for full 2G low memory)"
-+		bool "2GB lowmem (2G/2G user/kernel split)"
- 	config VMSPLIT_1G
--		bool "1G/3G user/kernel split"
-+		bool "3GB lowmem (1G/3G user/kernel split)"
- endchoice
- 
- config PAGE_OFFSET
-diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
-index 78210793d357..0c4415b23002 100644
---- a/arch/x86/configs/i386_defconfig
-+++ b/arch/x86/configs/i386_defconfig
-@@ -23,6 +23,8 @@ CONFIG_PROFILING=y
- CONFIG_SMP=y
- CONFIG_X86_GENERIC=y
- CONFIG_HPET_TIMER=y
-+CONFIG_SCHED_SMT=y
-+CONFIG_PREEMPT=y
- CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
- CONFIG_X86_REBOOTFIXUPS=y
- CONFIG_MICROCODE_AMD=y
-diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
-index 9936528e1939..328c7d0a38a1 100644
---- a/arch/x86/configs/x86_64_defconfig
-+++ b/arch/x86/configs/x86_64_defconfig
-@@ -20,6 +20,9 @@ CONFIG_BLK_DEV_INITRD=y
- # CONFIG_COMPAT_BRK is not set
- CONFIG_PROFILING=y
- CONFIG_SMP=y
-+CONFIG_NR_CPUS=64
-+CONFIG_SCHED_SMT=y
-+CONFIG_PREEMPT=y
- CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
- CONFIG_MICROCODE_AMD=y
- CONFIG_X86_MSR=y
-diff --git a/drivers/accessibility/speakup/speakup_acntpc.c b/drivers/accessibility/speakup/speakup_acntpc.c
-index c94328a5bd4a..6e7d4671aa69 100644
---- a/drivers/accessibility/speakup/speakup_acntpc.c
-+++ b/drivers/accessibility/speakup/speakup_acntpc.c
-@@ -198,7 +198,7 @@ static void do_catch_up(struct spk_synth *synth)
- 		full_time_val = full_time->u.n.value;
- 		spin_unlock_irqrestore(&speakup_info.spinlock, flags);
- 		if (synth_full()) {
--			schedule_timeout(msecs_to_jiffies(full_time_val));
-+			schedule_msec_hrtimeout((full_time_val));
- 			continue;
- 		}
- 		set_current_state(TASK_RUNNING);
-@@ -226,7 +226,7 @@ static void do_catch_up(struct spk_synth *synth)
- 			jiffy_delta_val = jiffy_delta->u.n.value;
- 			delay_time_val = delay_time->u.n.value;
- 			spin_unlock_irqrestore(&speakup_info.spinlock, flags);
--			schedule_timeout(msecs_to_jiffies(delay_time_val));
-+			schedule_msec_hrtimeout(delay_time_val);
- 			jiff_max = jiffies + jiffy_delta_val;
- 		}
- 	}
-diff --git a/drivers/accessibility/speakup/speakup_apollo.c b/drivers/accessibility/speakup/speakup_apollo.c
-index 0877b4044c28..627102d048c1 100644
---- a/drivers/accessibility/speakup/speakup_apollo.c
-+++ b/drivers/accessibility/speakup/speakup_apollo.c
-@@ -165,7 +165,7 @@ static void do_catch_up(struct spk_synth *synth)
- 		if (!synth->io_ops->synth_out(synth, ch)) {
- 			synth->io_ops->tiocmset(0, UART_MCR_RTS);
- 			synth->io_ops->tiocmset(UART_MCR_RTS, 0);
--			schedule_timeout(msecs_to_jiffies(full_time_val));
-+			schedule_msec_hrtimeout(full_time_val);
- 			continue;
- 		}
- 		if (time_after_eq(jiffies, jiff_max) && (ch == SPACE)) {
-diff --git a/drivers/accessibility/speakup/speakup_decext.c b/drivers/accessibility/speakup/speakup_decext.c
-index 7408eb29cf38..938a0c35968f 100644
---- a/drivers/accessibility/speakup/speakup_decext.c
-+++ b/drivers/accessibility/speakup/speakup_decext.c
-@@ -180,7 +180,7 @@ static void do_catch_up(struct spk_synth *synth)
- 		if (ch == '\n')
- 			ch = 0x0D;
- 		if (synth_full() || !synth->io_ops->synth_out(synth, ch)) {
--			schedule_timeout(msecs_to_jiffies(delay_time_val));
-+			schedule_msec_hrtimeout(delay_time_val);
- 			continue;
- 		}
- 		set_current_state(TASK_RUNNING);
-diff --git a/drivers/accessibility/speakup/speakup_decpc.c b/drivers/accessibility/speakup/speakup_decpc.c
-index 96f24c848cc5..1130dfe4da6c 100644
---- a/drivers/accessibility/speakup/speakup_decpc.c
-+++ b/drivers/accessibility/speakup/speakup_decpc.c
-@@ -398,7 +398,7 @@ static void do_catch_up(struct spk_synth *synth)
- 		if (ch == '\n')
- 			ch = 0x0D;
- 		if (dt_sendchar(ch)) {
--			schedule_timeout(msecs_to_jiffies(delay_time_val));
-+			schedule_msec_hrtimeout((delay_time_val));
- 			continue;
- 		}
- 		set_current_state(TASK_RUNNING);
-diff --git a/drivers/accessibility/speakup/speakup_dectlk.c b/drivers/accessibility/speakup/speakup_dectlk.c
-index 780214b5ca16..7b91594c57aa 100644
---- a/drivers/accessibility/speakup/speakup_dectlk.c
-+++ b/drivers/accessibility/speakup/speakup_dectlk.c
-@@ -247,7 +247,7 @@ static void do_catch_up(struct spk_synth *synth)
- 		if (ch == '\n')
- 			ch = 0x0D;
- 		if (synth_full_val || !synth->io_ops->synth_out(synth, ch)) {
--			schedule_timeout(msecs_to_jiffies(delay_time_val));
-+			schedule_msec_hrtimeout(delay_time_val);
- 			continue;
- 		}
- 		set_current_state(TASK_RUNNING);
-diff --git a/drivers/accessibility/speakup/speakup_dtlk.c b/drivers/accessibility/speakup/speakup_dtlk.c
-index dbebed0eeeec..6d83c13ca4a6 100644
---- a/drivers/accessibility/speakup/speakup_dtlk.c
-+++ b/drivers/accessibility/speakup/speakup_dtlk.c
-@@ -211,7 +211,7 @@ static void do_catch_up(struct spk_synth *synth)
- 		delay_time_val = delay_time->u.n.value;
- 		spin_unlock_irqrestore(&speakup_info.spinlock, flags);
- 		if (synth_full()) {
--			schedule_timeout(msecs_to_jiffies(delay_time_val));
-+			schedule_msec_hrtimeout((delay_time_val));
- 			continue;
- 		}
- 		set_current_state(TASK_RUNNING);
-@@ -227,7 +227,7 @@ static void do_catch_up(struct spk_synth *synth)
- 			delay_time_val = delay_time->u.n.value;
- 			jiffy_delta_val = jiffy_delta->u.n.value;
- 			spin_unlock_irqrestore(&speakup_info.spinlock, flags);
--			schedule_timeout(msecs_to_jiffies(delay_time_val));
-+			schedule_msec_hrtimeout((delay_time_val));
- 			jiff_max = jiffies + jiffy_delta_val;
- 		}
- 	}
-diff --git a/drivers/accessibility/speakup/speakup_keypc.c b/drivers/accessibility/speakup/speakup_keypc.c
-index 414827e888fc..cb31c9176daa 100644
---- a/drivers/accessibility/speakup/speakup_keypc.c
-+++ b/drivers/accessibility/speakup/speakup_keypc.c
-@@ -199,7 +199,7 @@ static void do_catch_up(struct spk_synth *synth)
- 		full_time_val = full_time->u.n.value;
- 		spin_unlock_irqrestore(&speakup_info.spinlock, flags);
- 		if (synth_full()) {
--			schedule_timeout(msecs_to_jiffies(full_time_val));
-+			schedule_msec_hrtimeout((full_time_val));
- 			continue;
- 		}
- 		set_current_state(TASK_RUNNING);
-@@ -232,7 +232,7 @@ static void do_catch_up(struct spk_synth *synth)
- 			jiffy_delta_val = jiffy_delta->u.n.value;
- 			delay_time_val = delay_time->u.n.value;
- 			spin_unlock_irqrestore(&speakup_info.spinlock, flags);
--			schedule_timeout(msecs_to_jiffies(delay_time_val));
-+			schedule_msec_hrtimeout(delay_time_val);
- 			jiff_max = jiffies + jiffy_delta_val;
- 		}
- 	}
-diff --git a/drivers/accessibility/speakup/synth.c b/drivers/accessibility/speakup/synth.c
-index ac47dbac7207..09f6ba829dfd 100644
---- a/drivers/accessibility/speakup/synth.c
-+++ b/drivers/accessibility/speakup/synth.c
-@@ -93,12 +93,8 @@ static void _spk_do_catch_up(struct spk_synth *synth, int unicode)
- 		spin_unlock_irqrestore(&speakup_info.spinlock, flags);
- 		if (ch == '\n')
- 			ch = synth->procspeech;
--		if (unicode)
--			ret = synth->io_ops->synth_out_unicode(synth, ch);
--		else
--			ret = synth->io_ops->synth_out(synth, ch);
--		if (!ret) {
--			schedule_timeout(msecs_to_jiffies(full_time_val));
-+		if (!synth->io_ops->synth_out(synth, ch)) {
-+			schedule_msec_hrtimeout(full_time_val);
- 			continue;
- 		}
- 		if (time_after_eq(jiffies, jiff_max) && (ch == SPACE)) {
-@@ -108,11 +104,9 @@ static void _spk_do_catch_up(struct spk_synth *synth, int unicode)
- 			full_time_val = full_time->u.n.value;
- 			spin_unlock_irqrestore(&speakup_info.spinlock, flags);
- 			if (synth->io_ops->synth_out(synth, synth->procspeech))
--				schedule_timeout(
--					msecs_to_jiffies(delay_time_val));
-+				schedule_msec_hrtimeout(delay_time_val);
- 			else
--				schedule_timeout(
--					msecs_to_jiffies(full_time_val));
-+				schedule_msec_hrtimeout(full_time_val);
- 			jiff_max = jiffies + jiffy_delta_val;
- 		}
- 		set_current_state(TASK_RUNNING);
-diff --git a/drivers/block/swim.c b/drivers/block/swim.c
-index dd34504382e5..0caa1c7e9223 100644
---- a/drivers/block/swim.c
-+++ b/drivers/block/swim.c
-@@ -328,7 +328,7 @@ static inline void swim_motor(struct swim __iomem *base,
- 			if (swim_readbit(base, MOTOR_ON))
- 				break;
- 			set_current_state(TASK_INTERRUPTIBLE);
--			schedule_timeout(1);
-+			schedule_min_hrtimeout();
- 		}
- 	} else if (action == OFF) {
- 		swim_action(base, MOTOR_OFF);
-@@ -347,7 +347,7 @@ static inline void swim_eject(struct swim __iomem *base)
- 		if (!swim_readbit(base, DISK_IN))
- 			break;
- 		set_current_state(TASK_INTERRUPTIBLE);
--		schedule_timeout(1);
-+		schedule_min_hrtimeout();
- 	}
- 	swim_select(base, RELAX);
- }
-@@ -372,6 +372,7 @@ static inline int swim_step(struct swim __iomem *base)
- 
- 		set_current_state(TASK_INTERRUPTIBLE);
- 		schedule_timeout(1);
-+		schedule_min_hrtimeout();
- 
- 		swim_select(base, RELAX);
- 		if (!swim_readbit(base, STEP))
-diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
-index 737c0b6b24ea..a3db1f42bb3b 100644
---- a/drivers/char/ipmi/ipmi_msghandler.c
-+++ b/drivers/char/ipmi/ipmi_msghandler.c
-@@ -3542,7 +3542,7 @@ static void cleanup_smi_msgs(struct ipmi_smi *intf)
- 	/* Current message first, to preserve order */
- 	while (intf->curr_msg && !list_empty(&intf->waiting_rcv_msgs)) {
- 		/* Wait for the message to clear out. */
--		schedule_timeout(1);
-+		schedule_min_hrtimeout();
- 	}
- 
- 	/* No need for locks, the interface is down. */
-diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c
-index 0416b9c9d410..9ce5fae0f1cf 100644
---- a/drivers/char/ipmi/ipmi_ssif.c
-+++ b/drivers/char/ipmi/ipmi_ssif.c
-@@ -1288,7 +1288,7 @@ static void shutdown_ssif(void *send_info)
- 
- 	/* make sure the driver is not looking for flags any more. */
- 	while (ssif_info->ssif_state != SSIF_NORMAL)
--		schedule_timeout(1);
-+		schedule_min_hrtimeout();
- 
- 	ssif_info->stopping = true;
- 	del_timer_sync(&ssif_info->watch_timer);
-diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
-index a95156fc5db7..8f07c8900184 100644
---- a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
-+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
-@@ -235,7 +235,7 @@ static int vmw_fifo_wait_noirq(struct vmw_private *dev_priv,
- 			DRM_ERROR("SVGA device lockup.\n");
- 			break;
- 		}
--		schedule_timeout(1);
-+		schedule_min_hrtimeout();
- 		if (interruptible && signal_pending(current)) {
- 			ret = -ERESTARTSYS;
- 			break;
-diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
-index 75f3efee21a4..09b1932ce85b 100644
---- a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
-+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
-@@ -203,7 +203,7 @@ int vmw_fallback_wait(struct vmw_private *dev_priv,
- 			break;
- 		}
- 		if (lazy)
--			schedule_timeout(1);
-+			schedule_min_hrtimeout();
- 		else if ((++count & 0x0F) == 0) {
- 			/**
- 			 * FIXME: Use schedule_hr_timeout here for
-diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c
-index 29f5fed28c2a..974cb08c7aa7 100644
---- a/drivers/hwmon/fam15h_power.c
-+++ b/drivers/hwmon/fam15h_power.c
-@@ -221,7 +221,7 @@ static ssize_t power1_average_show(struct device *dev,
- 		prev_ptsc[cu] = data->cpu_sw_pwr_ptsc[cu];
- 	}
- 
--	leftover = schedule_timeout_interruptible(msecs_to_jiffies(data->power_period));
-+	leftover = schedule_msec_hrtimeout_interruptible((data->power_period));
- 	if (leftover)
- 		return 0;
- 
-diff --git a/drivers/iio/light/tsl2563.c b/drivers/iio/light/tsl2563.c
-index abc8d7db8dc1..baa9d6338a52 100644
---- a/drivers/iio/light/tsl2563.c
-+++ b/drivers/iio/light/tsl2563.c
-@@ -269,11 +269,7 @@ static void tsl2563_wait_adc(struct tsl2563_chip *chip)
- 	default:
- 		delay = 402;
- 	}
--	/*
--	 * TODO: Make sure that we wait at least required delay but why we
--	 * have to extend it one tick more?
--	 */
--	schedule_timeout_interruptible(msecs_to_jiffies(delay) + 2);
-+	schedule_msec_hrtimeout_interruptible(delay + 1);
- }
- 
- static int tsl2563_adjust_gainlevel(struct tsl2563_chip *chip, u16 adc)
-diff --git a/drivers/media/i2c/msp3400-driver.c b/drivers/media/i2c/msp3400-driver.c
-index 39530d43590e..a7caf2eb5771 100644
---- a/drivers/media/i2c/msp3400-driver.c
-+++ b/drivers/media/i2c/msp3400-driver.c
-@@ -170,7 +170,7 @@ static int msp_read(struct i2c_client *client, int dev, int addr)
- 			break;
- 		dev_warn(&client->dev, "I/O error #%d (read 0x%02x/0x%02x)\n", err,
- 		       dev, addr);
--		schedule_timeout_interruptible(msecs_to_jiffies(10));
-+		schedule_msec_hrtimeout_interruptible((10));
- 	}
- 	if (err == 3) {
- 		dev_warn(&client->dev, "resetting chip, sound will go off.\n");
-@@ -211,7 +211,7 @@ static int msp_write(struct i2c_client *client, int dev, int addr, int val)
- 			break;
- 		dev_warn(&client->dev, "I/O error #%d (write 0x%02x/0x%02x)\n", err,
- 		       dev, addr);
--		schedule_timeout_interruptible(msecs_to_jiffies(10));
-+		schedule_msec_hrtimeout_interruptible((10));
- 	}
- 	if (err == 3) {
- 		dev_warn(&client->dev, "resetting chip, sound will go off.\n");
-diff --git a/drivers/media/pci/cx18/cx18-gpio.c b/drivers/media/pci/cx18/cx18-gpio.c
-index cf7cfda94107..f63e17489547 100644
---- a/drivers/media/pci/cx18/cx18-gpio.c
-+++ b/drivers/media/pci/cx18/cx18-gpio.c
-@@ -81,11 +81,11 @@ static void gpio_reset_seq(struct cx18 *cx, u32 active_lo, u32 active_hi,
- 
- 	/* Assert */
- 	gpio_update(cx, mask, ~active_lo);
--	schedule_timeout_uninterruptible(msecs_to_jiffies(assert_msecs));
-+	schedule_msec_hrtimeout_uninterruptible((assert_msecs));
- 
- 	/* Deassert */
- 	gpio_update(cx, mask, ~active_hi);
--	schedule_timeout_uninterruptible(msecs_to_jiffies(recovery_msecs));
-+	schedule_msec_hrtimeout_uninterruptible((recovery_msecs));
- }
- 
- /*
-diff --git a/drivers/media/pci/ivtv/ivtv-gpio.c b/drivers/media/pci/ivtv/ivtv-gpio.c
-index 856e7ab7f33e..766a26251337 100644
---- a/drivers/media/pci/ivtv/ivtv-gpio.c
-+++ b/drivers/media/pci/ivtv/ivtv-gpio.c
-@@ -105,7 +105,7 @@ void ivtv_reset_ir_gpio(struct ivtv *itv)
- 	curout = (curout & ~0xF) | 1;
- 	write_reg(curout, IVTV_REG_GPIO_OUT);
- 	/* We could use something else for smaller time */
--	schedule_timeout_interruptible(msecs_to_jiffies(1));
-+	schedule_msec_hrtimeout_interruptible((1));
- 	curout |= 2;
- 	write_reg(curout, IVTV_REG_GPIO_OUT);
- 	curdir &= ~0x80;
-@@ -125,11 +125,11 @@ int ivtv_reset_tuner_gpio(void *dev, int component, int cmd, int value)
- 	curout = read_reg(IVTV_REG_GPIO_OUT);
- 	curout &= ~(1 << itv->card->xceive_pin);
- 	write_reg(curout, IVTV_REG_GPIO_OUT);
--	schedule_timeout_interruptible(msecs_to_jiffies(1));
-+	schedule_msec_hrtimeout_interruptible((1));
- 
- 	curout |= 1 << itv->card->xceive_pin;
- 	write_reg(curout, IVTV_REG_GPIO_OUT);
--	schedule_timeout_interruptible(msecs_to_jiffies(1));
-+	schedule_msec_hrtimeout_interruptible((1));
- 	return 0;
- }
- 
-diff --git a/drivers/media/pci/ivtv/ivtv-ioctl.c b/drivers/media/pci/ivtv/ivtv-ioctl.c
-index 35dccb31174c..8181cd65e876 100644
---- a/drivers/media/pci/ivtv/ivtv-ioctl.c
-+++ b/drivers/media/pci/ivtv/ivtv-ioctl.c
-@@ -1139,7 +1139,7 @@ void ivtv_s_std_dec(struct ivtv *itv, v4l2_std_id std)
- 				TASK_UNINTERRUPTIBLE);
- 		if ((read_reg(IVTV_REG_DEC_LINE_FIELD) >> 16) < 100)
- 			break;
--		schedule_timeout(msecs_to_jiffies(25));
-+		schedule_msec_hrtimeout((25));
- 	}
- 	finish_wait(&itv->vsync_waitq, &wait);
- 	mutex_lock(&itv->serialize_lock);
-diff --git a/drivers/media/pci/ivtv/ivtv-streams.c b/drivers/media/pci/ivtv/ivtv-streams.c
-index f04ee84bab5f..c4469b4b8f99 100644
---- a/drivers/media/pci/ivtv/ivtv-streams.c
-+++ b/drivers/media/pci/ivtv/ivtv-streams.c
-@@ -849,7 +849,7 @@ int ivtv_stop_v4l2_encode_stream(struct ivtv_stream *s, int gop_end)
- 			while (!test_bit(IVTV_F_I_EOS, &itv->i_flags) &&
- 				time_before(jiffies,
- 					    then + msecs_to_jiffies(2000))) {
--				schedule_timeout(msecs_to_jiffies(10));
-+				schedule_msec_hrtimeout((10));
- 			}
- 
- 			/* To convert jiffies to ms, we must multiply by 1000
-diff --git a/drivers/media/radio/radio-mr800.c b/drivers/media/radio/radio-mr800.c
-index cb0437b4c331..163fffc0e1d4 100644
---- a/drivers/media/radio/radio-mr800.c
-+++ b/drivers/media/radio/radio-mr800.c
-@@ -366,7 +366,7 @@ static int vidioc_s_hw_freq_seek(struct file *file, void *priv,
- 			retval = -ENODATA;
- 			break;
- 		}
--		if (schedule_timeout_interruptible(msecs_to_jiffies(10))) {
-+		if (schedule_msec_hrtimeout_interruptible((10))) {
- 			retval = -ERESTARTSYS;
- 			break;
- 		}
-diff --git a/drivers/media/radio/radio-tea5777.c b/drivers/media/radio/radio-tea5777.c
-index fb9de7bbcd19..e53cf45e7f3f 100644
---- a/drivers/media/radio/radio-tea5777.c
-+++ b/drivers/media/radio/radio-tea5777.c
-@@ -235,7 +235,7 @@ static int radio_tea5777_update_read_reg(struct radio_tea5777 *tea, int wait)
- 	}
- 
- 	if (wait) {
--		if (schedule_timeout_interruptible(msecs_to_jiffies(wait)))
-+		if (schedule_msec_hrtimeout_interruptible((wait)))
- 			return -ERESTARTSYS;
- 	}
- 
-diff --git a/drivers/media/radio/tea575x.c b/drivers/media/radio/tea575x.c
-index c37315226c42..e73e6393403c 100644
---- a/drivers/media/radio/tea575x.c
-+++ b/drivers/media/radio/tea575x.c
-@@ -401,7 +401,7 @@ int snd_tea575x_s_hw_freq_seek(struct file *file, struct snd_tea575x *tea,
- 	for (;;) {
- 		if (time_after(jiffies, timeout))
- 			break;
--		if (schedule_timeout_interruptible(msecs_to_jiffies(10))) {
-+		if (schedule_msec_hrtimeout_interruptible((10))) {
- 			/* some signal arrived, stop search */
- 			tea->val &= ~TEA575X_BIT_SEARCH;
- 			snd_tea575x_set_freq(tea);
-diff --git a/drivers/mfd/ucb1x00-core.c b/drivers/mfd/ucb1x00-core.c
-index b690796d24d4..448b13da62b4 100644
---- a/drivers/mfd/ucb1x00-core.c
-+++ b/drivers/mfd/ucb1x00-core.c
-@@ -250,7 +250,7 @@ unsigned int ucb1x00_adc_read(struct ucb1x00 *ucb, int adc_channel, int sync)
- 			break;
- 		/* yield to other processes */
- 		set_current_state(TASK_INTERRUPTIBLE);
--		schedule_timeout(1);
-+		schedule_min_hrtimeout();
- 	}
- 
- 	return UCB_ADC_DAT(val);
-diff --git a/drivers/misc/sgi-xp/xpc_channel.c b/drivers/misc/sgi-xp/xpc_channel.c
-index 8e6607fc8a67..b9ab770bbdb5 100644
---- a/drivers/misc/sgi-xp/xpc_channel.c
-+++ b/drivers/misc/sgi-xp/xpc_channel.c
-@@ -834,7 +834,7 @@ xpc_allocate_msg_wait(struct xpc_channel *ch)
- 
- 	atomic_inc(&ch->n_on_msg_allocate_wq);
- 	prepare_to_wait(&ch->msg_allocate_wq, &wait, TASK_INTERRUPTIBLE);
--	ret = schedule_timeout(1);
-+	ret = schedule_min_hrtimeout();
- 	finish_wait(&ch->msg_allocate_wq, &wait);
- 	atomic_dec(&ch->n_on_msg_allocate_wq);
- 
-diff --git a/drivers/net/caif/caif_hsi.c b/drivers/net/caif/caif_hsi.c
-index 4a33ec4fc089..da85f847ebb4 100644
---- a/drivers/net/caif/caif_hsi.c
-+++ b/drivers/net/caif/caif_hsi.c
-@@ -939,7 +939,7 @@ static void cfhsi_wake_down(struct work_struct *work)
- 			break;
- 
- 		set_current_state(TASK_INTERRUPTIBLE);
--		schedule_timeout(1);
-+		schedule_min_hrtimeout();
- 		retry--;
- 	}
- 
-diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c
-index 66d0198e7834..ce1c7bf9be87 100644
---- a/drivers/net/can/usb/peak_usb/pcan_usb.c
-+++ b/drivers/net/can/usb/peak_usb/pcan_usb.c
-@@ -242,7 +242,7 @@ static int pcan_usb_write_mode(struct peak_usb_device *dev, u8 onoff)
- 	} else {
- 		/* the PCAN-USB needs time to init */
- 		set_current_state(TASK_INTERRUPTIBLE);
--		schedule_timeout(msecs_to_jiffies(PCAN_USB_STARTUP_TIMEOUT));
-+		schedule_msec_hrtimeout((PCAN_USB_STARTUP_TIMEOUT));
- 	}
- 
- 	return err;
-diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
-index 65b315bc60ab..2b3f71086f5f 100644
---- a/drivers/net/usb/lan78xx.c
-+++ b/drivers/net/usb/lan78xx.c
-@@ -2666,7 +2666,7 @@ static void lan78xx_terminate_urbs(struct lan78xx_net *dev)
- 	while (!skb_queue_empty(&dev->rxq) &&
- 	       !skb_queue_empty(&dev->txq) &&
- 	       !skb_queue_empty(&dev->done)) {
--		schedule_timeout(msecs_to_jiffies(UNLINK_TIMEOUT_MS));
-+		schedule_msec_hrtimeout((UNLINK_TIMEOUT_MS));
- 		set_current_state(TASK_UNINTERRUPTIBLE);
- 		netif_dbg(dev, ifdown, dev->net,
- 			  "waited for %d urb completions\n", temp);
-diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
-index 2b2a841cd938..1a4d27179db1 100644
---- a/drivers/net/usb/usbnet.c
-+++ b/drivers/net/usb/usbnet.c
-@@ -767,7 +767,7 @@ static void wait_skb_queue_empty(struct sk_buff_head *q)
- 	spin_lock_irqsave(&q->lock, flags);
- 	while (!skb_queue_empty(q)) {
- 		spin_unlock_irqrestore(&q->lock, flags);
--		schedule_timeout(msecs_to_jiffies(UNLINK_TIMEOUT_MS));
-+		schedule_msec_hrtimeout((UNLINK_TIMEOUT_MS));
- 		set_current_state(TASK_UNINTERRUPTIBLE);
- 		spin_lock_irqsave(&q->lock, flags);
- 	}
-diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2100.c b/drivers/net/wireless/intel/ipw2x00/ipw2100.c
-index 461e955aa259..5ab8e7396ea4 100644
---- a/drivers/net/wireless/intel/ipw2x00/ipw2100.c
-+++ b/drivers/net/wireless/intel/ipw2x00/ipw2100.c
-@@ -816,7 +816,7 @@ static int ipw2100_hw_send_command(struct ipw2100_priv *priv,
- 	 * doesn't seem to have as many firmware restart cycles...
- 	 *
- 	 * As a test, we're sticking in a 1/100s delay here */
--	schedule_timeout_uninterruptible(msecs_to_jiffies(10));
-+	schedule_msec_hrtimeout_uninterruptible((10));
- 
- 	return 0;
- 
-@@ -1267,7 +1267,7 @@ static int ipw2100_start_adapter(struct ipw2100_priv *priv)
- 	IPW_DEBUG_FW("Waiting for f/w initialization to complete...\n");
- 	i = 5000;
- 	do {
--		schedule_timeout_uninterruptible(msecs_to_jiffies(40));
-+		schedule_msec_hrtimeout_uninterruptible((40));
- 		/* Todo... wait for sync command ... */
- 
- 		read_register(priv->net_dev, IPW_REG_INTA, &inta);
-diff --git a/drivers/parport/ieee1284.c b/drivers/parport/ieee1284.c
-index 4547ac44c8d4..8fa1a7fdf12c 100644
---- a/drivers/parport/ieee1284.c
-+++ b/drivers/parport/ieee1284.c
-@@ -202,7 +202,7 @@ int parport_wait_peripheral(struct parport *port,
- 			/* parport_wait_event didn't time out, but the
- 			 * peripheral wasn't actually ready either.
- 			 * Wait for another 10ms. */
--			schedule_timeout_interruptible(msecs_to_jiffies(10));
-+			schedule_msec_hrtimeout_interruptible((10));
- 		}
- 	}
- 
-diff --git a/drivers/parport/ieee1284_ops.c b/drivers/parport/ieee1284_ops.c
-index 2c11bd3fe1fd..8cb6b61c0880 100644
---- a/drivers/parport/ieee1284_ops.c
-+++ b/drivers/parport/ieee1284_ops.c
-@@ -520,7 +520,7 @@ size_t parport_ieee1284_ecp_read_data (struct parport *port,
- 			/* Yield the port for a while. */
- 			if (count && dev->port->irq != PARPORT_IRQ_NONE) {
- 				parport_release (dev);
--				schedule_timeout_interruptible(msecs_to_jiffies(40));
-+				schedule_msec_hrtimeout_interruptible((40));
- 				parport_claim_or_block (dev);
- 			}
- 			else
-diff --git a/drivers/platform/x86/intel_ips.c b/drivers/platform/x86/intel_ips.c
-index bffe548187ee..c2918ee3e100 100644
---- a/drivers/platform/x86/intel_ips.c
-+++ b/drivers/platform/x86/intel_ips.c
-@@ -798,7 +798,7 @@ static int ips_adjust(void *data)
- 			ips_gpu_lower(ips);
- 
- sleep:
--		schedule_timeout_interruptible(msecs_to_jiffies(IPS_ADJUST_PERIOD));
-+		schedule_msec_hrtimeout_interruptible((IPS_ADJUST_PERIOD));
- 	} while (!kthread_should_stop());
- 
- 	dev_dbg(ips->dev, "ips-adjust thread stopped\n");
-@@ -974,7 +974,7 @@ static int ips_monitor(void *data)
- 	seqno_timestamp = get_jiffies_64();
- 
- 	old_cpu_power = thm_readl(THM_CEC);
--	schedule_timeout_interruptible(msecs_to_jiffies(IPS_SAMPLE_PERIOD));
-+	schedule_msec_hrtimeout_interruptible((IPS_SAMPLE_PERIOD));
- 
- 	/* Collect an initial average */
- 	for (i = 0; i < IPS_SAMPLE_COUNT; i++) {
-@@ -1001,7 +1001,7 @@ static int ips_monitor(void *data)
- 			mchp_samples[i] = mchp;
- 		}
- 
--		schedule_timeout_interruptible(msecs_to_jiffies(IPS_SAMPLE_PERIOD));
-+		schedule_msec_hrtimeout_interruptible((IPS_SAMPLE_PERIOD));
- 		if (kthread_should_stop())
- 			break;
- 	}
-@@ -1028,7 +1028,7 @@ static int ips_monitor(void *data)
- 	 * us to reduce the sample frequency if the CPU and GPU are idle.
- 	 */
- 	old_cpu_power = thm_readl(THM_CEC);
--	schedule_timeout_interruptible(msecs_to_jiffies(IPS_SAMPLE_PERIOD));
-+	schedule_msec_hrtimeout_interruptible((IPS_SAMPLE_PERIOD));
- 	last_sample_period = IPS_SAMPLE_PERIOD;
- 
- 	timer_setup(&ips->timer, monitor_timeout, TIMER_DEFERRABLE);
-diff --git a/drivers/rtc/rtc-wm8350.c b/drivers/rtc/rtc-wm8350.c
-index 2018614f258f..fc19b312c345 100644
---- a/drivers/rtc/rtc-wm8350.c
-+++ b/drivers/rtc/rtc-wm8350.c
-@@ -114,7 +114,7 @@ static int wm8350_rtc_settime(struct device *dev, struct rtc_time *tm)
- 	/* Wait until confirmation of stopping */
- 	do {
- 		rtc_ctrl = wm8350_reg_read(wm8350, WM8350_RTC_TIME_CONTROL);
--		schedule_timeout_uninterruptible(msecs_to_jiffies(1));
-+		schedule_msec_hrtimeout_uninterruptible((1));
- 	} while (--retries && !(rtc_ctrl & WM8350_RTC_STS));
- 
- 	if (!retries) {
-@@ -197,7 +197,7 @@ static int wm8350_rtc_stop_alarm(struct wm8350 *wm8350)
- 	/* Wait until confirmation of stopping */
- 	do {
- 		rtc_ctrl = wm8350_reg_read(wm8350, WM8350_RTC_TIME_CONTROL);
--		schedule_timeout_uninterruptible(msecs_to_jiffies(1));
-+		schedule_msec_hrtimeout_uninterruptible((1));
- 	} while (retries-- && !(rtc_ctrl & WM8350_RTC_ALMSTS));
- 
- 	if (!(rtc_ctrl & WM8350_RTC_ALMSTS))
-@@ -220,7 +220,7 @@ static int wm8350_rtc_start_alarm(struct wm8350 *wm8350)
- 	/* Wait until confirmation */
- 	do {
- 		rtc_ctrl = wm8350_reg_read(wm8350, WM8350_RTC_TIME_CONTROL);
--		schedule_timeout_uninterruptible(msecs_to_jiffies(1));
-+		schedule_msec_hrtimeout_uninterruptible((1));
- 	} while (retries-- && rtc_ctrl & WM8350_RTC_ALMSTS);
- 
- 	if (rtc_ctrl & WM8350_RTC_ALMSTS)
-diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c
-index 03b1805b106c..41ee54ff304a 100644
---- a/drivers/scsi/fnic/fnic_scsi.c
-+++ b/drivers/scsi/fnic/fnic_scsi.c
-@@ -217,7 +217,7 @@ int fnic_fw_reset_handler(struct fnic *fnic)
- 
- 	/* wait for io cmpl */
- 	while (atomic_read(&fnic->in_flight))
--		schedule_timeout(msecs_to_jiffies(1));
-+		schedule_msec_hrtimeout((1));
- 
- 	spin_lock_irqsave(&fnic->wq_copy_lock[0], flags);
- 
-@@ -2278,7 +2278,7 @@ static int fnic_clean_pending_aborts(struct fnic *fnic,
- 		}
- 	}
- 
--	schedule_timeout(msecs_to_jiffies(2 * fnic->config.ed_tov));
-+	schedule_msec_hrtimeout((2 * fnic->config.ed_tov));
- 
- 	/* walk again to check, if IOs are still pending in fw */
- 	if (fnic_is_abts_pending(fnic, lr_sc))
-diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
-index 983eeb0e3d07..007966930f94 100644
---- a/drivers/scsi/lpfc/lpfc_scsi.c
-+++ b/drivers/scsi/lpfc/lpfc_scsi.c
-@@ -5194,7 +5194,7 @@ lpfc_reset_flush_io_context(struct lpfc_vport *vport, uint16_t tgt_id,
- 					tgt_id, lun_id, context);
- 	later = msecs_to_jiffies(2 * vport->cfg_devloss_tmo * 1000) + jiffies;
- 	while (time_after(later, jiffies) && cnt) {
--		schedule_timeout_uninterruptible(msecs_to_jiffies(20));
-+		schedule_msec_hrtimeout_uninterruptible((20));
- 		cnt = lpfc_sli_sum_iocb(vport, tgt_id, lun_id, context);
- 	}
- 	if (cnt) {
-diff --git a/drivers/scsi/snic/snic_scsi.c b/drivers/scsi/snic/snic_scsi.c
-index b3650c989ed4..7ed1fb285754 100644
---- a/drivers/scsi/snic/snic_scsi.c
-+++ b/drivers/scsi/snic/snic_scsi.c
-@@ -2353,7 +2353,7 @@ snic_reset(struct Scsi_Host *shost, struct scsi_cmnd *sc)
- 
- 	/* Wait for all the IOs that are entered in Qcmd */
- 	while (atomic_read(&snic->ios_inflight))
--		schedule_timeout(msecs_to_jiffies(1));
-+		schedule_msec_hrtimeout((1));
- 
- 	ret = snic_issue_hba_reset(snic, sc);
- 	if (ret) {
-diff --git a/drivers/staging/comedi/drivers/ni_mio_common.c b/drivers/staging/comedi/drivers/ni_mio_common.c
-index 9266e13f6271..df5c53216d78 100644
---- a/drivers/staging/comedi/drivers/ni_mio_common.c
-+++ b/drivers/staging/comedi/drivers/ni_mio_common.c
-@@ -4748,7 +4748,7 @@ static int cs5529_wait_for_idle(struct comedi_device *dev)
- 		if ((status & NI67XX_CAL_STATUS_BUSY) == 0)
- 			break;
- 		set_current_state(TASK_INTERRUPTIBLE);
--		if (schedule_timeout(1))
-+		if (schedule_min_hrtimeout())
- 			return -EIO;
- 	}
- 	if (i == timeout) {
-diff --git a/drivers/staging/rts5208/rtsx.c b/drivers/staging/rts5208/rtsx.c
-index 898add4d1fc8..0aa9dd467349 100644
---- a/drivers/staging/rts5208/rtsx.c
-+++ b/drivers/staging/rts5208/rtsx.c
-@@ -477,7 +477,7 @@ static int rtsx_polling_thread(void *__dev)
- 
- 	for (;;) {
- 		set_current_state(TASK_INTERRUPTIBLE);
--		schedule_timeout(msecs_to_jiffies(POLLING_INTERVAL));
-+		schedule_msec_hrtimeout((POLLING_INTERVAL));
- 
- 		/* lock the device pointers */
- 		mutex_lock(&dev->dev_mutex);
-diff --git a/drivers/staging/unisys/visornic/visornic_main.c b/drivers/staging/unisys/visornic/visornic_main.c
-index 0433536930a9..d8726f28843f 100644
---- a/drivers/staging/unisys/visornic/visornic_main.c
-+++ b/drivers/staging/unisys/visornic/visornic_main.c
-@@ -549,7 +549,7 @@ static int visornic_disable_with_timeout(struct net_device *netdev,
- 		}
- 		set_current_state(TASK_INTERRUPTIBLE);
- 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
--		wait += schedule_timeout(msecs_to_jiffies(10));
-+		wait += schedule_msec_hrtimeout((10));
- 		spin_lock_irqsave(&devdata->priv_lock, flags);
- 	}
- 
-@@ -560,7 +560,7 @@ static int visornic_disable_with_timeout(struct net_device *netdev,
- 		while (1) {
- 			set_current_state(TASK_INTERRUPTIBLE);
- 			spin_unlock_irqrestore(&devdata->priv_lock, flags);
--			schedule_timeout(msecs_to_jiffies(10));
-+			schedule_msec_hrtimeout((10));
- 			spin_lock_irqsave(&devdata->priv_lock, flags);
- 			if (atomic_read(&devdata->usage))
- 				break;
-@@ -714,7 +714,7 @@ static int visornic_enable_with_timeout(struct net_device *netdev,
- 		}
- 		set_current_state(TASK_INTERRUPTIBLE);
- 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
--		wait += schedule_timeout(msecs_to_jiffies(10));
-+		wait += schedule_msec_hrtimeout((10));
- 		spin_lock_irqsave(&devdata->priv_lock, flags);
- 	}
- 
-diff --git a/drivers/video/fbdev/omap/hwa742.c b/drivers/video/fbdev/omap/hwa742.c
-index cfe63932f825..71c00ef772a3 100644
---- a/drivers/video/fbdev/omap/hwa742.c
-+++ b/drivers/video/fbdev/omap/hwa742.c
-@@ -913,7 +913,7 @@ static void hwa742_resume(void)
- 		if (hwa742_read_reg(HWA742_PLL_DIV_REG) & (1 << 7))
- 			break;
- 		set_current_state(TASK_UNINTERRUPTIBLE);
--		schedule_timeout(msecs_to_jiffies(5));
-+		schedule_msec_hrtimeout((5));
- 	}
- 	hwa742_set_update_mode(hwa742.update_mode_before_suspend);
- }
-diff --git a/drivers/video/fbdev/pxafb.c b/drivers/video/fbdev/pxafb.c
-index f1551e00eb12..f0f651e92504 100644
---- a/drivers/video/fbdev/pxafb.c
-+++ b/drivers/video/fbdev/pxafb.c
-@@ -1287,7 +1287,7 @@ static int pxafb_smart_thread(void *arg)
- 		mutex_unlock(&fbi->ctrlr_lock);
- 
- 		set_current_state(TASK_INTERRUPTIBLE);
--		schedule_timeout(msecs_to_jiffies(30));
-+		schedule_msec_hrtimeout((30));
- 	}
- 
- 	pr_debug("%s(): task ending\n", __func__);
-diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
-index 76d2e43817ea..6ba0604e2162 100644
---- a/fs/btrfs/inode-map.c
-+++ b/fs/btrfs/inode-map.c
-@@ -91,7 +91,7 @@ static int caching_kthread(void *data)
- 				btrfs_release_path(path);
- 				root->ino_cache_progress = last;
- 				up_read(&fs_info->commit_root_sem);
--				schedule_timeout(1);
-+				schedule_min_hrtimeout();
- 				goto again;
- 			} else
- 				continue;
-diff --git a/fs/proc/base.c b/fs/proc/base.c
-index 617db4e0faa0..f85926764f9a 100644
---- a/fs/proc/base.c
-+++ b/fs/proc/base.c
-@@ -479,7 +479,7 @@ static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns,
- 		seq_puts(m, "0 0 0\n");
- 	else
- 		seq_printf(m, "%llu %llu %lu\n",
--		   (unsigned long long)task->se.sum_exec_runtime,
-+		   (unsigned long long)tsk_seruntime(task),
- 		   (unsigned long long)task->sched_info.run_delay,
- 		   task->sched_info.pcount);
- 
-diff --git a/include/linux/freezer.h b/include/linux/freezer.h
-index 27828145ca09..504cc97bf475 100644
---- a/include/linux/freezer.h
-+++ b/include/linux/freezer.h
-@@ -311,6 +311,7 @@ static inline void set_freezable(void) {}
- #define wait_event_freezekillable_unsafe(wq, condition)			\
- 		wait_event_killable(wq, condition)
- 
-+#define pm_freezing (false)
- #endif /* !CONFIG_FREEZER */
- 
- #endif	/* FREEZER_H_INCLUDED */
-diff --git a/include/linux/init_task.h b/include/linux/init_task.h
-index 2c620d7ac432..73417df5daa2 100644
---- a/include/linux/init_task.h
-+++ b/include/linux/init_task.h
-@@ -36,7 +36,11 @@ extern struct cred init_cred;
- #define INIT_PREV_CPUTIME(x)
- #endif
- 
-+#ifdef CONFIG_SCHED_MUQSS
-+#define INIT_TASK_COMM "MuQSS"
-+#else
- #define INIT_TASK_COMM "swapper"
-+#endif
- 
- /* Attach to the init_task data structure for proper alignment */
- #ifdef CONFIG_ARCH_TASK_STRUCT_ON_STACK
-diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h
-index e9bfe6972aed..16ba1c7e5bde 100644
---- a/include/linux/ioprio.h
-+++ b/include/linux/ioprio.h
-@@ -53,6 +53,8 @@ enum {
-  */
- static inline int task_nice_ioprio(struct task_struct *task)
- {
-+	if (iso_task(task))
-+		return 0;
- 	return (task_nice(task) + 20) / 5;
- }
- 
-diff --git a/include/linux/sched.h b/include/linux/sched.h
-index afe01e232935..139e4535fcc6 100644
---- a/include/linux/sched.h
-+++ b/include/linux/sched.h
-@@ -35,6 +35,10 @@
- #include <linux/seqlock.h>
- #include <linux/kcsan.h>
- 
-+#ifdef CONFIG_SCHED_MUQSS
-+#include <linux/skip_list.h>
-+#endif
-+
- /* task_struct member predeclarations (sorted alphabetically): */
- struct audit_context;
- struct backing_dev_info;
-@@ -213,13 +217,40 @@ struct task_group;
- 
- extern void scheduler_tick(void);
- 
--#define	MAX_SCHEDULE_TIMEOUT		LONG_MAX
--
-+#define	MAX_SCHEDULE_TIMEOUT	LONG_MAX
- extern long schedule_timeout(long timeout);
- extern long schedule_timeout_interruptible(long timeout);
- extern long schedule_timeout_killable(long timeout);
- extern long schedule_timeout_uninterruptible(long timeout);
- extern long schedule_timeout_idle(long timeout);
-+
-+#ifdef CONFIG_HIGH_RES_TIMERS
-+extern long schedule_msec_hrtimeout(long timeout);
-+extern long schedule_min_hrtimeout(void);
-+extern long schedule_msec_hrtimeout_interruptible(long timeout);
-+extern long schedule_msec_hrtimeout_uninterruptible(long timeout);
-+#else
-+static inline long schedule_msec_hrtimeout(long timeout)
-+{
-+	return schedule_timeout(msecs_to_jiffies(timeout));
-+}
-+
-+static inline long schedule_min_hrtimeout(void)
-+{
-+	return schedule_timeout(1);
-+}
-+
-+static inline long schedule_msec_hrtimeout_interruptible(long timeout)
-+{
-+	return schedule_timeout_interruptible(msecs_to_jiffies(timeout));
-+}
-+
-+static inline long schedule_msec_hrtimeout_uninterruptible(long timeout)
-+{
-+	return schedule_timeout_uninterruptible(msecs_to_jiffies(timeout));
-+}
-+#endif
-+
- asmlinkage void schedule(void);
- extern void schedule_preempt_disabled(void);
- asmlinkage void preempt_schedule_irq(void);
-@@ -651,8 +682,10 @@ struct task_struct {
- 	unsigned int			flags;
- 	unsigned int			ptrace;
- 
--#ifdef CONFIG_SMP
-+#if defined(CONFIG_SMP) || defined(CONFIG_SCHED_MUQSS)
- 	int				on_cpu;
-+#endif
-+#ifdef CONFIG_SMP
- 	struct __call_single_node	wake_entry;
- #ifdef CONFIG_THREAD_INFO_IN_TASK
- 	/* Current CPU: */
-@@ -678,10 +711,25 @@ struct task_struct {
- 	int				static_prio;
- 	int				normal_prio;
- 	unsigned int			rt_priority;
-+#ifdef CONFIG_SCHED_MUQSS
-+	int time_slice;
-+	u64 deadline;
-+	skiplist_node node; /* Skip list node */
-+	u64 last_ran;
-+	u64 sched_time; /* sched_clock time spent running */
-+#ifdef CONFIG_SMT_NICE
-+	int smt_bias; /* Policy/nice level bias across smt siblings */
-+#endif
-+#ifdef CONFIG_HOTPLUG_CPU
-+	bool zerobound; /* Bound to CPU0 for hotplug */
-+#endif
-+	unsigned long rt_timeout;
-+#else /* CONFIG_SCHED_MUQSS */
- 
- 	const struct sched_class	*sched_class;
- 	struct sched_entity		se;
- 	struct sched_rt_entity		rt;
-+#endif
- #ifdef CONFIG_CGROUP_SCHED
- 	struct task_group		*sched_task_group;
- #endif
-@@ -863,6 +911,10 @@ struct task_struct {
- #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
- 	u64				utimescaled;
- 	u64				stimescaled;
-+#endif
-+#ifdef CONFIG_SCHED_MUQSS
-+	/* Unbanked cpu time */
-+	unsigned long utime_ns, stime_ns;
- #endif
- 	u64				gtime;
- 	struct prev_cputime		prev_cputime;
-@@ -1332,6 +1384,40 @@ struct task_struct {
- 	 */
- };
- 
-+#ifdef CONFIG_SCHED_MUQSS
-+#define tsk_seruntime(t)		((t)->sched_time)
-+#define tsk_rttimeout(t)		((t)->rt_timeout)
-+
-+static inline void tsk_cpus_current(struct task_struct *p)
-+{
-+}
-+
-+void print_scheduler_version(void);
-+
-+static inline bool iso_task(struct task_struct *p)
-+{
-+	return (p->policy == SCHED_ISO);
-+}
-+#else /* CFS */
-+#define tsk_seruntime(t)	((t)->se.sum_exec_runtime)
-+#define tsk_rttimeout(t)	((t)->rt.timeout)
-+
-+static inline void tsk_cpus_current(struct task_struct *p)
-+{
-+	p->nr_cpus_allowed = current->nr_cpus_allowed;
-+}
-+
-+static inline void print_scheduler_version(void)
-+{
-+	printk(KERN_INFO "CFS CPU scheduler.\n");
-+}
-+
-+static inline bool iso_task(struct task_struct *p)
-+{
-+	return false;
-+}
-+#endif /* CONFIG_SCHED_MUQSS */
-+
- static inline struct pid *task_pid(struct task_struct *task)
- {
- 	return task->thread_pid;
-diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h
-index 1aff00b65f3c..73d6319a856a 100644
---- a/include/linux/sched/deadline.h
-+++ b/include/linux/sched/deadline.h
-@@ -28,7 +28,16 @@ static inline bool dl_time_before(u64 a, u64 b)
- #ifdef CONFIG_SMP
- 
- struct root_domain;
-+#ifdef CONFIG_SCHED_MUQSS
-+static inline void dl_clear_root_domain(struct root_domain *rd)
-+{
-+}
-+static inline void dl_add_task_root_domain(struct task_struct *p)
-+{
-+}
-+#else /* CONFIG_SCHED_MUQSS */
- extern void dl_add_task_root_domain(struct task_struct *p);
- extern void dl_clear_root_domain(struct root_domain *rd);
-+#endif /* CONFIG_SCHED_MUQSS */
- 
- #endif /* CONFIG_SMP */
-diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h
-index 6d67e9a5af6b..101fe470aa8f 100644
---- a/include/linux/sched/nohz.h
-+++ b/include/linux/sched/nohz.h
-@@ -13,7 +13,7 @@ extern int get_nohz_timer_target(void);
- static inline void nohz_balance_enter_idle(int cpu) { }
- #endif
- 
--#ifdef CONFIG_NO_HZ_COMMON
-+#if defined(CONFIG_NO_HZ_COMMON) && !defined(CONFIG_SCHED_MUQSS)
- void calc_load_nohz_start(void);
- void calc_load_nohz_remote(struct rq *rq);
- void calc_load_nohz_stop(void);
-diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h
-index 7d64feafc408..43c9d9e50c09 100644
---- a/include/linux/sched/prio.h
-+++ b/include/linux/sched/prio.h
-@@ -20,8 +20,20 @@
-  */
- 
- #define MAX_USER_RT_PRIO	100
-+
-+#ifdef CONFIG_SCHED_MUQSS
-+/* Note different MAX_RT_PRIO */
-+#define MAX_RT_PRIO		(MAX_USER_RT_PRIO + 1)
-+
-+#define ISO_PRIO		(MAX_RT_PRIO)
-+#define NORMAL_PRIO		(MAX_RT_PRIO + 1)
-+#define IDLE_PRIO		(MAX_RT_PRIO + 2)
-+#define PRIO_LIMIT		((IDLE_PRIO) + 1)
-+#else /* CONFIG_SCHED_MUQSS */
- #define MAX_RT_PRIO		MAX_USER_RT_PRIO
- 
-+#endif /* CONFIG_SCHED_MUQSS */
-+
- #define MAX_PRIO		(MAX_RT_PRIO + NICE_WIDTH)
- #define DEFAULT_PRIO		(MAX_RT_PRIO + NICE_WIDTH / 2)
- 
-diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
-index e5af028c08b4..010b2244e0b6 100644
---- a/include/linux/sched/rt.h
-+++ b/include/linux/sched/rt.h
-@@ -24,8 +24,10 @@ static inline bool task_is_realtime(struct task_struct *tsk)
- 
- 	if (policy == SCHED_FIFO || policy == SCHED_RR)
- 		return true;
-+#ifndef CONFIG_SCHED_MUQSS
- 	if (policy == SCHED_DEADLINE)
- 		return true;
-+#endif
- 	return false;
- }
- 
-diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
-index a98965007eef..743f67fd012e 100644
---- a/include/linux/sched/task.h
-+++ b/include/linux/sched/task.h
-@@ -93,7 +93,7 @@ int kernel_wait(pid_t pid, int *stat);
- extern void free_task(struct task_struct *tsk);
- 
- /* sched_exec is called by processes performing an exec */
--#ifdef CONFIG_SMP
-+#if defined(CONFIG_SMP) && !defined(CONFIG_SCHED_MUQSS)
- extern void sched_exec(void);
- #else
- #define sched_exec()   {}
-diff --git a/include/linux/skip_list.h b/include/linux/skip_list.h
-new file mode 100644
-index 000000000000..d4be84ba273b
---- /dev/null
-+++ b/include/linux/skip_list.h
-@@ -0,0 +1,33 @@
-+#ifndef _LINUX_SKIP_LISTS_H
-+#define _LINUX_SKIP_LISTS_H
-+typedef u64 keyType;
-+typedef void *valueType;
-+
-+typedef struct nodeStructure skiplist_node;
-+
-+struct nodeStructure {
-+	int level;	/* Levels in this structure */
-+	keyType key;
-+	valueType value;
-+	skiplist_node *next[8];
-+	skiplist_node *prev[8];
-+};
-+
-+typedef struct listStructure {
-+	int entries;
-+	int level;	/* Maximum level of the list
-+			(1 more than the number of levels in the list) */
-+	skiplist_node *header; /* pointer to header */
-+} skiplist;
-+
-+void skiplist_init(skiplist_node *slnode);
-+skiplist *new_skiplist(skiplist_node *slnode);
-+void free_skiplist(skiplist *l);
-+void skiplist_node_init(skiplist_node *node);
-+void skiplist_insert(skiplist *l, skiplist_node *node, keyType key, valueType value, unsigned int randseed);
-+void skiplist_delete(skiplist *l, skiplist_node *node);
-+
-+static inline bool skiplist_node_empty(skiplist_node *node) {
-+	return (!node->next[0]);
-+}
-+#endif /* _LINUX_SKIP_LISTS_H */
-diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
-index 3bac0a8ceab2..f48c5c5da651 100644
---- a/include/uapi/linux/sched.h
-+++ b/include/uapi/linux/sched.h
-@@ -115,9 +115,16 @@ struct clone_args {
- #define SCHED_FIFO		1
- #define SCHED_RR		2
- #define SCHED_BATCH		3
--/* SCHED_ISO: reserved but not implemented yet */
-+/* SCHED_ISO: Implemented on MuQSS only */
- #define SCHED_IDLE		5
-+#ifdef CONFIG_SCHED_MUQSS
-+#define SCHED_ISO		4
-+#define SCHED_IDLEPRIO		SCHED_IDLE
-+#define SCHED_MAX		(SCHED_IDLEPRIO)
-+#define SCHED_RANGE(policy)	((policy) <= SCHED_MAX)
-+#else /* CONFIG_SCHED_MUQSS */
- #define SCHED_DEADLINE		6
-+#endif /* CONFIG_SCHED_MUQSS */
- 
- /* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */
- #define SCHED_RESET_ON_FORK     0x40000000
-diff --git a/init/Kconfig b/init/Kconfig
-index d6a0b31b13dc..7e0eb99bd607 100644
---- a/init/Kconfig
-+++ b/init/Kconfig
-@@ -92,6 +92,18 @@ config THREAD_INFO_IN_TASK
- 
- menu "General setup"
- 
-+config SCHED_MUQSS
-+	bool "MuQSS cpu scheduler"
-+	select HIGH_RES_TIMERS
-+	help
-+	  The Multiple Queue Skiplist Scheduler for excellent interactivity and
-+	  responsiveness on the desktop and highly scalable deterministic
-+	  low latency on any hardware.
-+
-+          Say Y here.
-+	default y
-+
-+
- config BROKEN
- 	bool
- 
-@@ -510,6 +522,7 @@ config SCHED_THERMAL_PRESSURE
- 	default y if ARM64
- 	depends on SMP
- 	depends on CPU_FREQ_THERMAL
-+	depends on !SCHED_MUQSS
- 	help
- 	  Select this option to enable thermal pressure accounting in the
- 	  scheduler. Thermal pressure is the value conveyed to the scheduler
-@@ -858,6 +871,7 @@ config NUMA_BALANCING
- 	depends on ARCH_SUPPORTS_NUMA_BALANCING
- 	depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY
- 	depends on SMP && NUMA && MIGRATION
-+	depends on !SCHED_MUQSS
- 	help
- 	  This option adds support for automatic NUMA aware memory/task placement.
- 	  The mechanism is quite primitive and is based on migrating memory when
-@@ -942,9 +956,13 @@ menuconfig CGROUP_SCHED
- 	help
- 	  This feature lets CPU scheduler recognize task groups and control CPU
- 	  bandwidth allocation to such task groups. It uses cgroups to group
--	  tasks.
-+	  tasks. In combination with MuQSS this is purely a STUB to create the
-+	  files associated with the CPU controller cgroup but most of the
-+	  controls do nothing. This is useful for working in environments and
-+	  with applications that will only work if this control group is
-+	  present.
- 
--if CGROUP_SCHED
-+if CGROUP_SCHED && !SCHED_MUQSS
- config FAIR_GROUP_SCHED
- 	bool "Group scheduling for SCHED_OTHER"
- 	depends on CGROUP_SCHED
-@@ -1073,6 +1091,7 @@ config CGROUP_DEVICE
- 
- config CGROUP_CPUACCT
- 	bool "Simple CPU accounting controller"
-+	depends on !SCHED_MUQSS
- 	help
- 	  Provides a simple controller for monitoring the
- 	  total CPU consumed by the tasks in a cgroup.
-@@ -1200,6 +1219,7 @@ config CHECKPOINT_RESTORE
- 
- config SCHED_AUTOGROUP
- 	bool "Automatic process group scheduling"
-+	depends on !SCHED_MUQSS
- 	select CGROUPS
- 	select CGROUP_SCHED
- 	select FAIR_GROUP_SCHED
-diff --git a/init/init_task.c b/init/init_task.c
-index f6889fce64af..2557beb609c0 100644
---- a/init/init_task.c
-+++ b/init/init_task.c
-@@ -75,9 +75,17 @@ struct task_struct init_task
- 	.stack		= init_stack,
- 	.usage		= REFCOUNT_INIT(2),
- 	.flags		= PF_KTHREAD,
-+#ifdef CONFIG_SCHED_MUQSS
-+	.prio		= NORMAL_PRIO,
-+	.static_prio	= MAX_PRIO - 20,
-+	.normal_prio	= NORMAL_PRIO,
-+	.deadline	= 0,
-+	.time_slice	= 1000000,
-+#else
- 	.prio		= MAX_PRIO - 20,
- 	.static_prio	= MAX_PRIO - 20,
- 	.normal_prio	= MAX_PRIO - 20,
-+#endif
- 	.policy		= SCHED_NORMAL,
- 	.cpus_ptr	= &init_task.cpus_mask,
- 	.cpus_mask	= CPU_MASK_ALL,
-@@ -87,6 +95,7 @@ struct task_struct init_task
- 	.restart_block	= {
- 		.fn = do_no_restart_syscall,
- 	},
-+#ifndef CONFIG_SCHED_MUQSS
- 	.se		= {
- 		.group_node 	= LIST_HEAD_INIT(init_task.se.group_node),
- 	},
-@@ -94,6 +103,7 @@ struct task_struct init_task
- 		.run_list	= LIST_HEAD_INIT(init_task.rt.run_list),
- 		.time_slice	= RR_TIMESLICE,
- 	},
-+#endif
- 	.tasks		= LIST_HEAD_INIT(init_task.tasks),
- #ifdef CONFIG_SMP
- 	.pushable_tasks	= PLIST_NODE_INIT(init_task.pushable_tasks, MAX_PRIO),
-diff --git a/init/main.c b/init/main.c
-index e880b4ecb314..fe0a705e83f2 100644
---- a/init/main.c
-+++ b/init/main.c
-@@ -1421,6 +1421,8 @@ static int __ref kernel_init(void *unused)
- 
- 	do_sysctl_args();
- 
-+	print_scheduler_version();
-+
- 	if (ramdisk_execute_command) {
- 		ret = run_init_process(ramdisk_execute_command);
- 		if (!ret)
-diff --git a/kernel/Kconfig.MuQSS b/kernel/Kconfig.MuQSS
-new file mode 100644
-index 000000000000..a6a58781ef91
---- /dev/null
-+++ b/kernel/Kconfig.MuQSS
-@@ -0,0 +1,105 @@
-+choice
-+	prompt "CPU scheduler runqueue sharing"
-+	default RQ_MC if SCHED_MUQSS
-+	default RQ_NONE
-+
-+config RQ_NONE
-+	bool "No sharing"
-+	help
-+	  This is the default behaviour where the CPU scheduler has one runqueue
-+	  per CPU, whether it is a physical or logical CPU (hyperthread).
-+
-+	  This can still be enabled runtime with the boot parameter
-+	  rqshare=none
-+
-+	  If unsure, say N.
-+
-+config RQ_SMT
-+	bool "SMT (hyperthread) siblings"
-+	depends on SCHED_SMT && SCHED_MUQSS
-+
-+	help
-+	  With this option enabled, the CPU scheduler will have one runqueue
-+	  shared by SMT (hyperthread) siblings. As these logical cores share
-+	  one physical core, sharing the runqueue resource can lead to decreased
-+	  overhead, lower latency and higher throughput.
-+
-+	  This can still be enabled runtime with the boot parameter
-+	  rqshare=smt
-+
-+	  If unsure, say N.
-+
-+config RQ_MC
-+	bool "Multicore siblings"
-+	depends on SCHED_MC && SCHED_MUQSS
-+	help
-+	  With this option enabled, the CPU scheduler will have one runqueue
-+	  shared by multicore siblings in addition to any SMT siblings.
-+	  As these physical cores share caches, sharing the runqueue resource
-+	  will lead to lower latency, but its effects on overhead and throughput
-+	  are less predictable. As a general rule, 6 or fewer cores will likely
-+	  benefit from this, while larger CPUs will only derive a latency
-+	  benefit. If your workloads are primarily single threaded, this will
-+	  possibly worsen throughput. If you are only concerned about latency
-+	  then enable this regardless of how many cores you have.
-+
-+	  This can still be enabled runtime with the boot parameter
-+	  rqshare=mc
-+
-+	  If unsure, say Y.
-+
-+config RQ_MC_LLC
-+	bool "Multicore siblings (LLC)"
-+	depends on SCHED_MC && SCHED_MUQSS
-+	help
-+	  With this option enabled, the CPU scheduler will behave similarly as
-+	  with "Multicore siblings".
-+	  This option takes LLC cache into account when scheduling tasks.
-+	  Option may benefit CPUs with multiple LLC caches, such as Ryzen
-+	  and Xeon CPUs.
-+
-+	  This can still be enabled runtime with the boot parameter
-+	  rqshare=llc
-+
-+	  If unsure, say N.
-+
-+config RQ_SMP
-+	bool "Symmetric Multi-Processing"
-+	depends on SMP && SCHED_MUQSS
-+	help
-+	  With this option enabled, the CPU scheduler will have one runqueue
-+	  shared by all physical CPUs unless they are on separate NUMA nodes.
-+	  As physical CPUs usually do not share resources, sharing the runqueue
-+	  will normally worsen throughput but improve latency. If you only
-+	  care about latency enable this.
-+
-+	  This can still be enabled runtime with the boot parameter
-+	  rqshare=smp
-+
-+	  If unsure, say N.
-+
-+config RQ_ALL
-+	bool "NUMA"
-+	depends on SMP && SCHED_MUQSS
-+	help
-+	  With this option enabled, the CPU scheduler will have one runqueue
-+	  regardless of the architecture configuration, including across NUMA
-+	  nodes. This can substantially decrease throughput in NUMA
-+	  configurations, but light NUMA designs will not be dramatically
-+	  affected. This option should only be chosen if latency is the prime
-+	  concern.
-+
-+	  This can still be enabled runtime with the boot parameter
-+	  rqshare=all
-+
-+	  If unsure, say N.
-+endchoice
-+
-+config SHARERQ
-+	int
-+	default 0 if RQ_NONE
-+	default 1 if RQ_SMT
-+	default 2 if RQ_MC
-+	default 3 if RQ_MC_LLC
-+	default 4 if RQ_SMP
-+	default 5 if RQ_ALL
-diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
-index 38ef6d06888e..89ed751ac4e4 100644
---- a/kernel/Kconfig.hz
-+++ b/kernel/Kconfig.hz
-@@ -5,7 +5,8 @@
- 
- choice
- 	prompt "Timer frequency"
--	default HZ_250
-+	default HZ_100 if SCHED_MUQSS
-+	default HZ_250_NODEF if !SCHED_MUQSS
- 	help
- 	 Allows the configuration of the timer frequency. It is customary
- 	 to have the timer interrupt run at 1000 Hz but 100 Hz may be more
-@@ -20,11 +21,18 @@ choice
- 	config HZ_100
- 		bool "100 HZ"
- 	help
-+	  100 Hz is a suitable choice in combination with MuQSS which does
-+	  not rely on ticks for rescheduling interrupts, and is not Hz limited
-+	  for timeouts and sleeps from both the kernel and userspace.
-+	  This allows us to benefit from the lower overhead and higher
-+	  throughput of fewer timer ticks.
-+
-+	  Non-MuQSS kernels:
- 	  100 Hz is a typical choice for servers, SMP and NUMA systems
- 	  with lots of processors that may show reduced performance if
- 	  too many timer interrupts are occurring.
- 
--	config HZ_250
-+	config HZ_250_NODEF
- 		bool "250 HZ"
- 	help
- 	 250 Hz is a good compromise choice allowing server performance
-@@ -32,7 +40,10 @@ choice
- 	 on SMP and NUMA systems. If you are going to be using NTSC video
- 	 or multimedia, selected 300Hz instead.
- 
--	config HZ_300
-+	 250 Hz is the default choice for the mainline scheduler but not
-+	 advantageous in combination with MuQSS.
-+
-+	config HZ_300_NODEF
- 		bool "300 HZ"
- 	help
- 	 300 Hz is a good compromise choice allowing server performance
-@@ -40,7 +51,7 @@ choice
- 	 on SMP and NUMA systems and exactly dividing by both PAL and
- 	 NTSC frame rates for video and multimedia work.
- 
--	config HZ_1000
-+	config HZ_1000_NODEF
- 		bool "1000 HZ"
- 	help
- 	 1000 Hz is the preferred choice for desktop systems and other
-@@ -51,9 +62,9 @@ endchoice
- config HZ
- 	int
- 	default 100 if HZ_100
--	default 250 if HZ_250
--	default 300 if HZ_300
--	default 1000 if HZ_1000
-+	default 250 if HZ_250_NODEF
-+	default 300 if HZ_300_NODEF
-+	default 1000 if HZ_1000_NODEF
- 
- config SCHED_HRTICK
- 	def_bool HIGH_RES_TIMERS
-diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
-index bf82259cff96..d9438eb6f91c 100644
---- a/kernel/Kconfig.preempt
-+++ b/kernel/Kconfig.preempt
-@@ -2,7 +2,7 @@
- 
- choice
- 	prompt "Preemption Model"
--	default PREEMPT_NONE
-+	default PREEMPT
- 
- config PREEMPT_NONE
- 	bool "No Forced Preemption (Server)"
-@@ -18,7 +18,7 @@ config PREEMPT_NONE
- 	  latencies.
- 
- config PREEMPT_VOLUNTARY
--	bool "Voluntary Kernel Preemption (Desktop)"
-+	bool "Voluntary Kernel Preemption (Nothing)"
- 	depends on !ARCH_NO_PREEMPT
- 	help
- 	  This option reduces the latency of the kernel by adding more
-@@ -33,7 +33,8 @@ config PREEMPT_VOLUNTARY
- 	  applications to run more 'smoothly' even when the system is
- 	  under load.
- 
--	  Select this if you are building a kernel for a desktop system.
-+	  Select this for no system in particular (choose Preemptible
-+	  instead on a desktop if you know what's good for you).
- 
- config PREEMPT
- 	bool "Preemptible Kernel (Low-Latency Desktop)"
-diff --git a/kernel/Makefile b/kernel/Makefile
-index 9a20016d4900..a2640d78eadb 100644
---- a/kernel/Makefile
-+++ b/kernel/Makefile
-@@ -10,7 +10,8 @@ obj-y     = fork.o exec_domain.o panic.o \
- 	    extable.o params.o \
- 	    kthread.o sys_ni.o nsproxy.o \
- 	    notifier.o ksysfs.o cred.o reboot.o \
--	    async.o range.o smpboot.o ucount.o regset.o
-+	    async.o range.o smpboot.o ucount.o regset.o \
-+	    skip_list.o
- 
- obj-$(CONFIG_BPFILTER) += usermode_driver.o
- obj-$(CONFIG_MODULES) += kmod.o
-diff --git a/kernel/delayacct.c b/kernel/delayacct.c
-index 27725754ac99..769d773c7182 100644
---- a/kernel/delayacct.c
-+++ b/kernel/delayacct.c
-@@ -106,7 +106,7 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
- 	 */
- 	t1 = tsk->sched_info.pcount;
- 	t2 = tsk->sched_info.run_delay;
--	t3 = tsk->se.sum_exec_runtime;
-+	t3 = tsk_seruntime(tsk);
- 
- 	d->cpu_count += t1;
- 
-diff --git a/kernel/exit.c b/kernel/exit.c
-index 733e80f334e7..3f3506c851fd 100644
---- a/kernel/exit.c
-+++ b/kernel/exit.c
-@@ -121,7 +121,7 @@ static void __exit_signal(struct task_struct *tsk)
- 			sig->curr_target = next_thread(tsk);
- 	}
- 
--	add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
-+	add_device_randomness((const void*) &tsk_seruntime(tsk),
- 			      sizeof(unsigned long long));
- 
- 	/*
-@@ -142,7 +142,7 @@ static void __exit_signal(struct task_struct *tsk)
- 	sig->inblock += task_io_get_inblock(tsk);
- 	sig->oublock += task_io_get_oublock(tsk);
- 	task_io_accounting_add(&sig->ioac, &tsk->ioac);
--	sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
-+	sig->sum_sched_runtime += tsk_seruntime(tsk);
- 	sig->nr_threads--;
- 	__unhash_process(tsk, group_dead);
- 	write_sequnlock(&sig->stats_lock);
-diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
-index 10a5aff4eecc..ce3bcc66b48d 100644
---- a/kernel/irq/Kconfig
-+++ b/kernel/irq/Kconfig
-@@ -112,6 +112,23 @@ config GENERIC_IRQ_RESERVATION_MODE
- config IRQ_FORCED_THREADING
-        bool
- 
-+config FORCE_IRQ_THREADING
-+	bool "Make IRQ threading compulsory"
-+	depends on IRQ_FORCED_THREADING
-+	default n
-+	help
-+
-+	  Make IRQ threading mandatory for any IRQ handlers that support it
-+	  instead of being optional and requiring the threadirqs kernel
-+	  parameter. Instead they can be optionally disabled with the
-+	  nothreadirqs kernel parameter.
-+
-+	  Enabling this may make some architectures not boot with runqueue
-+	  sharing and MuQSS.
-+
-+	  Enable if you are building for a desktop or low latency system,
-+	  otherwise say N.
-+
- config SPARSE_IRQ
- 	bool "Support sparse irq numbering" if MAY_HAVE_SPARSE_IRQ
- 	help
-diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
-index 5df903fccb60..17a0dd194582 100644
---- a/kernel/irq/manage.c
-+++ b/kernel/irq/manage.c
-@@ -25,9 +25,20 @@
- #include "internals.h"
- 
- #if defined(CONFIG_IRQ_FORCED_THREADING) && !defined(CONFIG_PREEMPT_RT)
-+#ifdef CONFIG_FORCE_IRQ_THREADING
-+__read_mostly bool force_irqthreads = true;
-+#else
- __read_mostly bool force_irqthreads;
-+#endif
- EXPORT_SYMBOL_GPL(force_irqthreads);
- 
-+static int __init setup_noforced_irqthreads(char *arg)
-+{
-+	force_irqthreads = false;
-+	return 0;
-+}
-+early_param("nothreadirqs", setup_noforced_irqthreads);
-+
- static int __init setup_forced_irqthreads(char *arg)
- {
- 	force_irqthreads = true;
-diff --git a/kernel/kthread.c b/kernel/kthread.c
-index 3edaa380dc7b..a1712699726b 100644
---- a/kernel/kthread.c
-+++ b/kernel/kthread.c
-@@ -471,6 +471,34 @@ void kthread_bind(struct task_struct *p, unsigned int cpu)
- }
- EXPORT_SYMBOL(kthread_bind);
- 
-+#if defined(CONFIG_SCHED_MUQSS) && defined(CONFIG_SMP)
-+extern void __do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask);
-+
-+/*
-+ * new_kthread_bind is a special variant of __kthread_bind_mask.
-+ * For new threads to work on muqss we want to call do_set_cpus_allowed
-+ * without the task_cpu being set and the task rescheduled until they're
-+ * rescheduled on their own so we call __do_set_cpus_allowed directly which
-+ * only changes the cpumask. This is particularly important for smpboot threads
-+ * to work.
-+ */
-+static void new_kthread_bind(struct task_struct *p, unsigned int cpu)
-+{
-+	unsigned long flags;
-+
-+	if (WARN_ON(!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)))
-+		return;
-+
-+	/* It's safe because the task is inactive. */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	__do_set_cpus_allowed(p, cpumask_of(cpu));
-+	p->flags |= PF_NO_SETAFFINITY;
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+}
-+#else
-+#define new_kthread_bind(p, cpu) kthread_bind(p, cpu)
-+#endif
-+
- /**
-  * kthread_create_on_cpu - Create a cpu bound kthread
-  * @threadfn: the function to run until signal_pending(current).
-@@ -491,7 +519,7 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
- 				   cpu);
- 	if (IS_ERR(p))
- 		return p;
--	kthread_bind(p, cpu);
-+	new_kthread_bind(p, cpu);
- 	/* CPU hotplug need to bind once again when unparking the thread. */
- 	set_bit(KTHREAD_IS_PER_CPU, &to_kthread(p)->flags);
- 	to_kthread(p)->cpu = cpu;
-diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
-index f6310f848f34..825f9b8e228f 100644
---- a/kernel/livepatch/transition.c
-+++ b/kernel/livepatch/transition.c
-@@ -282,7 +282,7 @@ static bool klp_try_switch_task(struct task_struct *task)
- {
- 	static char err_buf[STACK_ERR_BUF_SIZE];
- 	struct rq *rq;
--	struct rq_flags flags;
-+	struct rq_flags rf;
- 	int ret;
- 	bool success = false;
- 
-@@ -304,7 +304,7 @@ static bool klp_try_switch_task(struct task_struct *task)
- 	 * functions.  If all goes well, switch the task to the target patch
- 	 * state.
- 	 */
--	rq = task_rq_lock(task, &flags);
-+	rq = task_rq_lock(task, &rf);
- 
- 	if (task_running(rq, task) && task != current) {
- 		snprintf(err_buf, STACK_ERR_BUF_SIZE,
-@@ -323,7 +323,7 @@ static bool klp_try_switch_task(struct task_struct *task)
- 	task->patch_state = klp_target_state;
- 
- done:
--	task_rq_unlock(rq, task, &flags);
-+	task_rq_unlock(rq, task, &rf);
- 
- 	/*
- 	 * Due to console deadlock issues, pr_debug() can't be used while
-diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
-index 5fc9c9b70862..1ff14a21193d 100644
---- a/kernel/sched/Makefile
-+++ b/kernel/sched/Makefile
-@@ -22,15 +22,23 @@ ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
- CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
- endif
- 
-+ifdef CONFIG_SCHED_MUQSS
-+obj-y += MuQSS.o clock.o cputime.o
-+obj-y += idle.o
-+obj-y += wait.o wait_bit.o swait.o completion.o
-+
-+obj-$(CONFIG_SMP) += topology.o
-+else
- obj-y += core.o loadavg.o clock.o cputime.o
- obj-y += idle.o fair.o rt.o deadline.o
- obj-y += wait.o wait_bit.o swait.o completion.o
- 
- obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o
- obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
--obj-$(CONFIG_SCHEDSTATS) += stats.o
- obj-$(CONFIG_SCHED_DEBUG) += debug.o
- obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
-+endif
-+obj-$(CONFIG_SCHEDSTATS) += stats.o
- obj-$(CONFIG_CPU_FREQ) += cpufreq.o
- obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o
- obj-$(CONFIG_MEMBARRIER) += membarrier.o
-diff --git a/kernel/sched/MuQSS.c b/kernel/sched/MuQSS.c
-new file mode 100644
-index 000000000000..8da537d5226c
---- /dev/null
-+++ b/kernel/sched/MuQSS.c
-@@ -0,0 +1,7855 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ *  kernel/sched/MuQSS.c, was kernel/sched.c
-+ *
-+ *  Kernel scheduler and related syscalls
-+ *
-+ *  Copyright (C) 1991-2002  Linus Torvalds
-+ *
-+ *  1996-12-23  Modified by Dave Grothe to fix bugs in semaphores and
-+ *		make semaphores SMP safe
-+ *  1998-11-19	Implemented schedule_timeout() and related stuff
-+ *		by Andrea Arcangeli
-+ *  2002-01-04	New ultra-scalable O(1) scheduler by Ingo Molnar:
-+ *		hybrid priority-list and round-robin design with
-+ *		an array-switch method of distributing timeslices
-+ *		and per-CPU runqueues.  Cleanups and useful suggestions
-+ *		by Davide Libenzi, preemptible kernel bits by Robert Love.
-+ *  2003-09-03	Interactivity tuning by Con Kolivas.
-+ *  2004-04-02	Scheduler domains code by Nick Piggin
-+ *  2007-04-15  Work begun on replacing all interactivity tuning with a
-+ *              fair scheduling design by Con Kolivas.
-+ *  2007-05-05  Load balancing (smp-nice) and other improvements
-+ *              by Peter Williams
-+ *  2007-05-06  Interactivity improvements to CFS by Mike Galbraith
-+ *  2007-07-01  Group scheduling enhancements by Srivatsa Vaddagiri
-+ *  2007-11-29  RT balancing improvements by Steven Rostedt, Gregory Haskins,
-+ *              Thomas Gleixner, Mike Kravetz
-+ *  2009-08-13	Brainfuck deadline scheduling policy by Con Kolivas deletes
-+ *              a whole lot of those previous things.
-+ *  2016-10-01  Multiple Queue Skiplist Scheduler scalable evolution of BFS
-+ * 		scheduler by Con Kolivas.
-+ *  2019-08-31  LLC bits by Eduards Bezverhijs
-+ */
-+#define CREATE_TRACE_POINTS
-+#include <trace/events/sched.h>
-+#undef CREATE_TRACE_POINTS
-+
-+#include <linux/sched/isolation.h>
-+#include <linux/sched/loadavg.h>
-+
-+#include <linux/binfmts.h>
-+#include <linux/blkdev.h>
-+#include <linux/compat.h>
-+#include <linux/context_tracking.h>
-+#include <linux/cpuset.h>
-+#include <linux/delayacct.h>
-+#include <linux/init_task.h>
-+#include <linux/kcov.h>
-+#include <linux/kprobes.h>
-+#include <linux/mmu_context.h>
-+#include <linux/module.h>
-+#include <linux/nmi.h>
-+#include <linux/prefetch.h>
-+#include <linux/profile.h>
-+#include <linux/rcupdate_wait.h>
-+#include <linux/sched.h>
-+#include <linux/scs.h>
-+#include <linux/security.h>
-+#include <linux/skip_list.h>
-+#include <linux/syscalls.h>
-+#include <linux/tick.h>
-+#include <linux/wait_bit.h>
-+
-+#include <asm/irq_regs.h>
-+#include <asm/switch_to.h>
-+#include <asm/tlb.h>
-+
-+#include "../workqueue_internal.h"
-+#include "../../fs/io-wq.h"
-+#include "../smpboot.h"
-+
-+#include "MuQSS.h"
-+#include "smp.h"
-+
-+#define rt_prio(prio)		unlikely((prio) < MAX_RT_PRIO)
-+#define rt_task(p)		rt_prio((p)->prio)
-+#define batch_task(p)		(unlikely((p)->policy == SCHED_BATCH))
-+#define is_rt_policy(policy)	((policy) == SCHED_FIFO || \
-+					(policy) == SCHED_RR)
-+#define has_rt_policy(p)	unlikely(is_rt_policy((p)->policy))
-+
-+#define is_idle_policy(policy)	((policy) == SCHED_IDLEPRIO)
-+#define idleprio_task(p)	unlikely(is_idle_policy((p)->policy))
-+#define task_running_idle(p)	unlikely((p)->prio == IDLE_PRIO)
-+
-+#define is_iso_policy(policy)	((policy) == SCHED_ISO)
-+#define iso_task(p)		unlikely(is_iso_policy((p)->policy))
-+#define task_running_iso(p)	unlikely((p)->prio == ISO_PRIO)
-+
-+#define rq_idle(rq)		((rq)->rq_prio == PRIO_LIMIT)
-+
-+#define ISO_PERIOD		(5 * HZ)
-+
-+#define STOP_PRIO		(MAX_RT_PRIO - 1)
-+
-+/*
-+ * Some helpers for converting to/from various scales. Use shifts to get
-+ * approximate multiples of ten for less overhead.
-+ */
-+#define APPROX_NS_PS		(1073741824) /* Approximate ns per second */
-+#define JIFFIES_TO_NS(TIME)	((TIME) * (APPROX_NS_PS / HZ))
-+#define JIFFY_NS		(APPROX_NS_PS / HZ)
-+#define JIFFY_US		(1048576 / HZ)
-+#define NS_TO_JIFFIES(TIME)	((TIME) / JIFFY_NS)
-+#define HALF_JIFFY_NS		(APPROX_NS_PS / HZ / 2)
-+#define HALF_JIFFY_US		(1048576 / HZ / 2)
-+#define MS_TO_NS(TIME)		((TIME) << 20)
-+#define MS_TO_US(TIME)		((TIME) << 10)
-+#define NS_TO_MS(TIME)		((TIME) >> 20)
-+#define NS_TO_US(TIME)		((TIME) >> 10)
-+#define US_TO_NS(TIME)		((TIME) << 10)
-+#define TICK_APPROX_NS		((APPROX_NS_PS+HZ/2)/HZ)
-+
-+#define RESCHED_US	(100) /* Reschedule if less than this many μs left */
-+
-+void print_scheduler_version(void)
-+{
-+	printk(KERN_INFO "MuQSS CPU scheduler v0.204 by Con Kolivas.\n");
-+}
-+
-+/* Define RQ share levels */
-+#define RQSHARE_NONE 0
-+#define RQSHARE_SMT 1
-+#define RQSHARE_MC 2
-+#define RQSHARE_MC_LLC 3
-+#define RQSHARE_SMP 4
-+#define RQSHARE_ALL 5
-+
-+/* Define locality levels */
-+#define LOCALITY_SAME 0
-+#define LOCALITY_SMT 1
-+#define LOCALITY_MC_LLC 2
-+#define LOCALITY_MC 3
-+#define LOCALITY_SMP 4
-+#define LOCALITY_DISTANT 5
-+
-+/*
-+ * This determines what level of runqueue sharing will be done and is
-+ * configurable at boot time with the bootparam rqshare =
-+ */
-+static int rqshare __read_mostly = CONFIG_SHARERQ; /* Default RQSHARE_MC */
-+
-+static int __init set_rqshare(char *str)
-+{
-+	if (!strncmp(str, "none", 4)) {
-+		rqshare = RQSHARE_NONE;
-+		return 0;
-+	}
-+	if (!strncmp(str, "smt", 3)) {
-+		rqshare = RQSHARE_SMT;
-+		return 0;
-+	}
-+	if (!strncmp(str, "mc", 2)) {
-+		rqshare = RQSHARE_MC;
-+		return 0;
-+	}
-+	if (!strncmp(str, "llc", 3)) {
-+		rqshare = RQSHARE_MC_LLC;
-+		return 0;
-+	}
-+	if (!strncmp(str, "smp", 3)) {
-+		rqshare = RQSHARE_SMP;
-+		return 0;
-+	}
-+	if (!strncmp(str, "all", 3)) {
-+		rqshare = RQSHARE_ALL;
-+		return 0;
-+	}
-+	return 1;
-+}
-+__setup("rqshare=", set_rqshare);
-+
-+/*
-+ * This is the time all tasks within the same priority round robin.
-+ * Value is in ms and set to a minimum of 6ms.
-+ * Tunable via /proc interface.
-+ */
-+int rr_interval __read_mostly = 6;
-+
-+/*
-+ * Tunable to choose whether to prioritise latency or throughput, simple
-+ * binary yes or no
-+ */
-+int sched_interactive __read_mostly = 1;
-+
-+/*
-+ * sched_iso_cpu - sysctl which determines the cpu percentage SCHED_ISO tasks
-+ * are allowed to run five seconds as real time tasks. This is the total over
-+ * all online cpus.
-+ */
-+int sched_iso_cpu __read_mostly = 70;
-+
-+/*
-+ * sched_yield_type - Choose what sort of yield sched_yield will perform.
-+ * 0: No yield.
-+ * 1: Yield only to better priority/deadline tasks. (default)
-+ * 2: Expire timeslice and recalculate deadline.
-+ */
-+int sched_yield_type __read_mostly = 1;
-+
-+/*
-+ * The relative length of deadline for each priority(nice) level.
-+ */
-+static int prio_ratios[NICE_WIDTH] __read_mostly;
-+
-+
-+/*
-+ * The quota handed out to tasks of all priority levels when refilling their
-+ * time_slice.
-+ */
-+static inline int timeslice(void)
-+{
-+	return MS_TO_US(rr_interval);
-+}
-+
-+DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
-+
-+#ifdef CONFIG_SMP
-+/*
-+ * Total number of runqueues. Equals number of CPUs when there is no runqueue
-+ * sharing but is usually less with SMT/MC sharing of runqueues.
-+ */
-+static int total_runqueues __read_mostly = 1;
-+
-+static cpumask_t cpu_idle_map ____cacheline_aligned_in_smp;
-+
-+struct rq *cpu_rq(int cpu)
-+{
-+	return &per_cpu(runqueues, (cpu));
-+}
-+#define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
-+
-+/*
-+ * For asym packing, by default the lower numbered cpu has higher priority.
-+ */
-+int __weak arch_asym_cpu_priority(int cpu)
-+{
-+	return -cpu;
-+}
-+
-+int __weak arch_sd_sibling_asym_packing(void)
-+{
-+       return 0*SD_ASYM_PACKING;
-+}
-+
-+#ifdef CONFIG_SCHED_SMT
-+DEFINE_STATIC_KEY_FALSE(sched_smt_present);
-+EXPORT_SYMBOL_GPL(sched_smt_present);
-+#endif
-+
-+#else
-+struct rq *uprq;
-+#endif /* CONFIG_SMP */
-+
-+#include "stats.h"
-+
-+/*
-+ * All common locking functions performed on rq->lock. rq->clock is local to
-+ * the CPU accessing it so it can be modified just with interrupts disabled
-+ * when we're not updating niffies.
-+ * Looking up task_rq must be done under rq->lock to be safe.
-+ */
-+
-+/*
-+ * RQ-clock updating methods:
-+ */
-+
-+#ifdef HAVE_SCHED_AVG_IRQ
-+static void update_irq_load_avg(struct rq *rq, long delta);
-+#else
-+static inline void update_irq_load_avg(struct rq *rq, long delta) {}
-+#endif
-+
-+static void update_rq_clock_task(struct rq *rq, s64 delta)
-+{
-+/*
-+ * In theory, the compile should just see 0 here, and optimize out the call
-+ * to sched_rt_avg_update. But I don't trust it...
-+ */
-+	s64 __maybe_unused steal = 0, irq_delta = 0;
-+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-+	irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
-+
-+	/*
-+	 * Since irq_time is only updated on {soft,}irq_exit, we might run into
-+	 * this case when a previous update_rq_clock() happened inside a
-+	 * {soft,}irq region.
-+	 *
-+	 * When this happens, we stop ->clock_task and only update the
-+	 * prev_irq_time stamp to account for the part that fit, so that a next
-+	 * update will consume the rest. This ensures ->clock_task is
-+	 * monotonic.
-+	 *
-+	 * It does however cause some slight miss-attribution of {soft,}irq
-+	 * time, a more accurate solution would be to update the irq_time using
-+	 * the current rq->clock timestamp, except that would require using
-+	 * atomic ops.
-+	 */
-+	if (irq_delta > delta)
-+		irq_delta = delta;
-+
-+	rq->prev_irq_time += irq_delta;
-+	delta -= irq_delta;
-+#endif
-+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
-+	if (static_key_false((&paravirt_steal_rq_enabled))) {
-+		steal = paravirt_steal_clock(cpu_of(rq));
-+		steal -= rq->prev_steal_time_rq;
-+
-+		if (unlikely(steal > delta))
-+			steal = delta;
-+
-+		rq->prev_steal_time_rq += steal;
-+		delta -= steal;
-+	}
-+#endif
-+	rq->clock_task += delta;
-+
-+#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
-+	if (irq_delta + steal)
-+		update_irq_load_avg(rq, irq_delta + steal);
-+#endif
-+}
-+
-+static inline void update_rq_clock(struct rq *rq)
-+{
-+	s64 delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
-+
-+	if (unlikely(delta < 0))
-+		return;
-+	rq->clock += delta;
-+	update_rq_clock_task(rq, delta);
-+}
-+
-+/*
-+ * Niffies are a globally increasing nanosecond counter. They're only used by
-+ * update_load_avg and time_slice_expired, however deadlines are based on them
-+ * across CPUs. Update them whenever we will call one of those functions, and
-+ * synchronise them across CPUs whenever we hold both runqueue locks.
-+ */
-+static inline void update_clocks(struct rq *rq)
-+{
-+	s64 ndiff, minndiff;
-+	long jdiff;
-+
-+	update_rq_clock(rq);
-+	ndiff = rq->clock - rq->old_clock;
-+	rq->old_clock = rq->clock;
-+	jdiff = jiffies - rq->last_jiffy;
-+
-+	/* Subtract any niffies added by balancing with other rqs */
-+	ndiff -= rq->niffies - rq->last_niffy;
-+	minndiff = JIFFIES_TO_NS(jdiff) - rq->niffies + rq->last_jiffy_niffies;
-+	if (minndiff < 0)
-+		minndiff = 0;
-+	ndiff = max(ndiff, minndiff);
-+	rq->niffies += ndiff;
-+	rq->last_niffy = rq->niffies;
-+	if (jdiff) {
-+		rq->last_jiffy += jdiff;
-+		rq->last_jiffy_niffies = rq->niffies;
-+	}
-+}
-+
-+/*
-+ * Any time we have two runqueues locked we use that as an opportunity to
-+ * synchronise niffies to the highest value as idle ticks may have artificially
-+ * kept niffies low on one CPU and the truth can only be later.
-+ */
-+static inline void synchronise_niffies(struct rq *rq1, struct rq *rq2)
-+{
-+	if (rq1->niffies > rq2->niffies)
-+		rq2->niffies = rq1->niffies;
-+	else
-+		rq1->niffies = rq2->niffies;
-+}
-+
-+/*
-+ * double_rq_lock - safely lock two runqueues
-+ *
-+ * Note this does not disable interrupts like task_rq_lock,
-+ * you need to do so manually before calling.
-+ */
-+
-+/* For when we know rq1 != rq2 */
-+static inline void __double_rq_lock(struct rq *rq1, struct rq *rq2)
-+	__acquires(rq1->lock)
-+	__acquires(rq2->lock)
-+{
-+	if (rq1 < rq2) {
-+		raw_spin_lock(rq1->lock);
-+		raw_spin_lock_nested(rq2->lock, SINGLE_DEPTH_NESTING);
-+	} else {
-+		raw_spin_lock(rq2->lock);
-+		raw_spin_lock_nested(rq1->lock, SINGLE_DEPTH_NESTING);
-+	}
-+}
-+
-+static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
-+	__acquires(rq1->lock)
-+	__acquires(rq2->lock)
-+{
-+	BUG_ON(!irqs_disabled());
-+	if (rq1->lock == rq2->lock) {
-+		raw_spin_lock(rq1->lock);
-+		__acquire(rq2->lock);	/* Fake it out ;) */
-+	} else
-+		__double_rq_lock(rq1, rq2);
-+	synchronise_niffies(rq1, rq2);
-+}
-+
-+/*
-+ * double_rq_unlock - safely unlock two runqueues
-+ *
-+ * Note this does not restore interrupts like task_rq_unlock,
-+ * you need to do so manually after calling.
-+ */
-+static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
-+	__releases(rq1->lock)
-+	__releases(rq2->lock)
-+{
-+	raw_spin_unlock(rq1->lock);
-+	if (rq1->lock != rq2->lock)
-+		raw_spin_unlock(rq2->lock);
-+	else
-+		__release(rq2->lock);
-+}
-+
-+static inline void lock_all_rqs(void)
-+{
-+	int cpu;
-+
-+	preempt_disable();
-+	for_each_possible_cpu(cpu) {
-+		struct rq *rq = cpu_rq(cpu);
-+
-+		do_raw_spin_lock(rq->lock);
-+	}
-+}
-+
-+static inline void unlock_all_rqs(void)
-+{
-+	int cpu;
-+
-+	for_each_possible_cpu(cpu) {
-+		struct rq *rq = cpu_rq(cpu);
-+
-+		do_raw_spin_unlock(rq->lock);
-+	}
-+	preempt_enable();
-+}
-+
-+/* Specially nest trylock an rq */
-+static inline bool trylock_rq(struct rq *this_rq, struct rq *rq)
-+{
-+	if (unlikely(!do_raw_spin_trylock(rq->lock)))
-+		return false;
-+	spin_acquire(&rq->lock->dep_map, SINGLE_DEPTH_NESTING, 1, _RET_IP_);
-+	synchronise_niffies(this_rq, rq);
-+	return true;
-+}
-+
-+/* Unlock a specially nested trylocked rq */
-+static inline void unlock_rq(struct rq *rq)
-+{
-+	spin_release(&rq->lock->dep_map, _RET_IP_);
-+	do_raw_spin_unlock(rq->lock);
-+}
-+
-+/*
-+ * cmpxchg based fetch_or, macro so it works for different integer types
-+ */
-+#define fetch_or(ptr, mask)						\
-+	({								\
-+		typeof(ptr) _ptr = (ptr);				\
-+		typeof(mask) _mask = (mask);				\
-+		typeof(*_ptr) _old, _val = *_ptr;			\
-+									\
-+		for (;;) {						\
-+			_old = cmpxchg(_ptr, _val, _val | _mask);	\
-+			if (_old == _val)				\
-+				break;					\
-+			_val = _old;					\
-+		}							\
-+	_old;								\
-+})
-+
-+#if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
-+/*
-+ * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG,
-+ * this avoids any races wrt polling state changes and thereby avoids
-+ * spurious IPIs.
-+ */
-+static bool set_nr_and_not_polling(struct task_struct *p)
-+{
-+	struct thread_info *ti = task_thread_info(p);
-+	return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG);
-+}
-+
-+/*
-+ * Atomically set TIF_NEED_RESCHED if TIF_POLLING_NRFLAG is set.
-+ *
-+ * If this returns true, then the idle task promises to call
-+ * sched_ttwu_pending() and reschedule soon.
-+ */
-+static bool set_nr_if_polling(struct task_struct *p)
-+{
-+	struct thread_info *ti = task_thread_info(p);
-+	typeof(ti->flags) old, val = READ_ONCE(ti->flags);
-+
-+	for (;;) {
-+		if (!(val & _TIF_POLLING_NRFLAG))
-+			return false;
-+		if (val & _TIF_NEED_RESCHED)
-+			return true;
-+		old = cmpxchg(&ti->flags, val, val | _TIF_NEED_RESCHED);
-+		if (old == val)
-+			break;
-+		val = old;
-+	}
-+	return true;
-+}
-+
-+#else
-+static bool set_nr_and_not_polling(struct task_struct *p)
-+{
-+	set_tsk_need_resched(p);
-+	return true;
-+}
-+
-+#ifdef CONFIG_SMP
-+static bool set_nr_if_polling(struct task_struct *p)
-+{
-+	return false;
-+}
-+#endif
-+#endif
-+
-+static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task)
-+{
-+	struct wake_q_node *node = &task->wake_q;
-+
-+	/*
-+	 * Atomically grab the task, if ->wake_q is !nil already it means
-+	 * its already queued (either by us or someone else) and will get the
-+	 * wakeup due to that.
-+	 *
-+	 * In order to ensure that a pending wakeup will observe our pending
-+	 * state, even in the failed case, an explicit smp_mb() must be used.
-+	 */
-+	smp_mb__before_atomic();
-+	if (unlikely(cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL)))
-+		return false;
-+
-+	/*
-+	 * The head is context local, there can be no concurrency.
-+	 */
-+	*head->lastp = node;
-+	head->lastp = &node->next;
-+	return true;
-+}
-+
-+/**
-+ * wake_q_add() - queue a wakeup for 'later' waking.
-+ * @head: the wake_q_head to add @task to
-+ * @task: the task to queue for 'later' wakeup
-+ *
-+ * Queue a task for later wakeup, most likely by the wake_up_q() call in the
-+ * same context, _HOWEVER_ this is not guaranteed, the wakeup can come
-+ * instantly.
-+ *
-+ * This function must be used as-if it were wake_up_process(); IOW the task
-+ * must be ready to be woken at this location.
-+ */
-+void wake_q_add(struct wake_q_head *head, struct task_struct *task)
-+{
-+	if (__wake_q_add(head, task))
-+		get_task_struct(task);
-+}
-+
-+/**
-+ * wake_q_add_safe() - safely queue a wakeup for 'later' waking.
-+ * @head: the wake_q_head to add @task to
-+ * @task: the task to queue for 'later' wakeup
-+ *
-+ * Queue a task for later wakeup, most likely by the wake_up_q() call in the
-+ * same context, _HOWEVER_ this is not guaranteed, the wakeup can come
-+ * instantly.
-+ *
-+ * This function must be used as-if it were wake_up_process(); IOW the task
-+ * must be ready to be woken at this location.
-+ *
-+ * This function is essentially a task-safe equivalent to wake_q_add(). Callers
-+ * that already hold reference to @task can call the 'safe' version and trust
-+ * wake_q to do the right thing depending whether or not the @task is already
-+ * queued for wakeup.
-+ */
-+void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task)
-+{
-+	if (!__wake_q_add(head, task))
-+		put_task_struct(task);
-+}
-+
-+void wake_up_q(struct wake_q_head *head)
-+{
-+	struct wake_q_node *node = head->first;
-+
-+	while (node != WAKE_Q_TAIL) {
-+		struct task_struct *task;
-+
-+		task = container_of(node, struct task_struct, wake_q);
-+		BUG_ON(!task);
-+		/* Task can safely be re-inserted now */
-+		node = node->next;
-+		task->wake_q.next = NULL;
-+
-+		/*
-+		 * wake_up_process() executes a full barrier, which pairs with
-+		 * the queueing in wake_q_add() so as not to miss wakeups.
-+		 */
-+		wake_up_process(task);
-+		put_task_struct(task);
-+	}
-+}
-+
-+static inline void smp_sched_reschedule(int cpu)
-+{
-+	if (likely(cpu_online(cpu)))
-+		smp_send_reschedule(cpu);
-+}
-+
-+/*
-+ * resched_task - mark a task 'to be rescheduled now'.
-+ *
-+ * On UP this means the setting of the need_resched flag, on SMP it
-+ * might also involve a cross-CPU call to trigger the scheduler on
-+ * the target CPU.
-+ */
-+void resched_task(struct task_struct *p)
-+{
-+	int cpu;
-+#ifdef CONFIG_LOCKDEP
-+	/* Kernel threads call this when creating workqueues while still
-+	 * inactive from __kthread_bind_mask, holding only the pi_lock */
-+	if (!(p->flags & PF_KTHREAD)) {
-+		struct rq *rq = task_rq(p);
-+
-+		lockdep_assert_held(rq->lock);
-+	}
-+#endif
-+	if (test_tsk_need_resched(p))
-+		return;
-+
-+	cpu = task_cpu(p);
-+	if (cpu == smp_processor_id()) {
-+		set_tsk_need_resched(p);
-+		set_preempt_need_resched();
-+		return;
-+	}
-+
-+	if (set_nr_and_not_polling(p))
-+		smp_sched_reschedule(cpu);
-+	else
-+		trace_sched_wake_idle_without_ipi(cpu);
-+}
-+
-+/*
-+ * A task that is not running or queued will not have a node set.
-+ * A task that is queued but not running will have a node set.
-+ * A task that is currently running will have ->on_cpu set but no node set.
-+ */
-+static inline bool task_queued(struct task_struct *p)
-+{
-+	return !skiplist_node_empty(&p->node);
-+}
-+
-+static void enqueue_task(struct rq *rq, struct task_struct *p, int flags);
-+static inline void resched_if_idle(struct rq *rq);
-+
-+static inline bool deadline_before(u64 deadline, u64 time)
-+{
-+	return (deadline < time);
-+}
-+
-+/*
-+ * Deadline is "now" in niffies + (offset by priority). Setting the deadline
-+ * is the key to everything. It distributes cpu fairly amongst tasks of the
-+ * same nice value, it proportions cpu according to nice level, it means the
-+ * task that last woke up the longest ago has the earliest deadline, thus
-+ * ensuring that interactive tasks get low latency on wake up. The CPU
-+ * proportion works out to the square of the virtual deadline difference, so
-+ * this equation will give nice 19 3% CPU compared to nice 0.
-+ */
-+static inline u64 prio_deadline_diff(int user_prio)
-+{
-+	return (prio_ratios[user_prio] * rr_interval * (MS_TO_NS(1) / 128));
-+}
-+
-+static inline u64 task_deadline_diff(struct task_struct *p)
-+{
-+	return prio_deadline_diff(TASK_USER_PRIO(p));
-+}
-+
-+static inline u64 static_deadline_diff(int static_prio)
-+{
-+	return prio_deadline_diff(USER_PRIO(static_prio));
-+}
-+
-+static inline int longest_deadline_diff(void)
-+{
-+	return prio_deadline_diff(39);
-+}
-+
-+static inline int ms_longest_deadline_diff(void)
-+{
-+	return NS_TO_MS(longest_deadline_diff());
-+}
-+
-+static inline bool rq_local(struct rq *rq);
-+
-+#ifndef SCHED_CAPACITY_SCALE
-+#define SCHED_CAPACITY_SCALE 1024
-+#endif
-+
-+static inline int rq_load(struct rq *rq)
-+{
-+	return rq->nr_running;
-+}
-+
-+/*
-+ * Update the load average for feeding into cpu frequency governors. Use a
-+ * rough estimate of a rolling average with ~ time constant of 32ms.
-+ * 80/128 ~ 0.63. * 80 / 32768 / 128 == * 5 / 262144
-+ * Make sure a call to update_clocks has been made before calling this to get
-+ * an updated rq->niffies.
-+ */
-+static void update_load_avg(struct rq *rq, unsigned int flags)
-+{
-+	long us_interval, load;
-+
-+	us_interval = NS_TO_US(rq->niffies - rq->load_update);
-+	if (unlikely(us_interval <= 0))
-+		return;
-+
-+	load = rq->load_avg - (rq->load_avg * us_interval * 5 / 262144);
-+	if (unlikely(load < 0))
-+		load = 0;
-+	load += rq_load(rq) * SCHED_CAPACITY_SCALE * us_interval * 5 / 262144;
-+	rq->load_avg = load;
-+
-+	rq->load_update = rq->niffies;
-+	update_irq_load_avg(rq, 0);
-+	if (likely(rq_local(rq)))
-+		cpufreq_trigger(rq, flags);
-+}
-+
-+#ifdef HAVE_SCHED_AVG_IRQ
-+/*
-+ * IRQ variant of update_load_avg below. delta is actually time in nanoseconds
-+ * here so we scale curload to how long it's been since the last update.
-+ */
-+static void update_irq_load_avg(struct rq *rq, long delta)
-+{
-+	long us_interval, load;
-+
-+	us_interval = NS_TO_US(rq->niffies - rq->irq_load_update);
-+	if (unlikely(us_interval <= 0))
-+		return;
-+
-+	load = rq->irq_load_avg - (rq->irq_load_avg * us_interval * 5 / 262144);
-+	if (unlikely(load < 0))
-+		load = 0;
-+	load += NS_TO_US(delta) * SCHED_CAPACITY_SCALE * 5 / 262144;
-+	rq->irq_load_avg = load;
-+
-+	rq->irq_load_update = rq->niffies;
-+}
-+#endif
-+
-+/*
-+ * Removing from the runqueue. Enter with rq locked. Deleting a task
-+ * from the skip list is done via the stored node reference in the task struct
-+ * and does not require a full look up. Thus it occurs in O(k) time where k
-+ * is the "level" of the list the task was stored at - usually < 4, max 8.
-+ */
-+static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
-+{
-+	skiplist_delete(rq->sl, &p->node);
-+	rq->best_key = rq->node->next[0]->key;
-+	update_clocks(rq);
-+
-+	if (!(flags & DEQUEUE_SAVE)) {
-+		sched_info_dequeued(rq, p);
-+		psi_dequeue(p, flags & DEQUEUE_SLEEP);
-+	}
-+	rq->nr_running--;
-+	if (rt_task(p))
-+		rq->rt_nr_running--;
-+	update_load_avg(rq, flags);
-+}
-+
-+#ifdef CONFIG_PREEMPT_RCU
-+static bool rcu_read_critical(struct task_struct *p)
-+{
-+	return p->rcu_read_unlock_special.b.blocked;
-+}
-+#else /* CONFIG_PREEMPT_RCU */
-+#define rcu_read_critical(p) (false)
-+#endif /* CONFIG_PREEMPT_RCU */
-+
-+/*
-+ * To determine if it's safe for a task of SCHED_IDLEPRIO to actually run as
-+ * an idle task, we ensure none of the following conditions are met.
-+ */
-+static bool idleprio_suitable(struct task_struct *p)
-+{
-+	return (!(p->sched_contributes_to_load) && !(p->flags & (PF_EXITING)) &&
-+		!signal_pending(p) && !rcu_read_critical(p) && !freezing(p));
-+}
-+
-+/*
-+ * To determine if a task of SCHED_ISO can run in pseudo-realtime, we check
-+ * that the iso_refractory flag is not set.
-+ */
-+static inline bool isoprio_suitable(struct rq *rq)
-+{
-+	return !rq->iso_refractory;
-+}
-+
-+static inline void inc_nr_running(struct rq *rq)
-+{
-+	rq->nr_running++;
-+	if (trace_sched_update_nr_running_tp_enabled()) {
-+		call_trace_sched_update_nr_running(rq, 1);
-+	}
-+}
-+
-+static inline void dec_nr_running(struct rq *rq)
-+{
-+	rq->nr_running--;
-+	if (trace_sched_update_nr_running_tp_enabled()) {
-+		call_trace_sched_update_nr_running(rq, -1);
-+	}
-+}
-+
-+/*
-+ * Adding to the runqueue. Enter with rq locked.
-+ */
-+static void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
-+{
-+	unsigned int randseed, cflags = 0;
-+	u64 sl_id;
-+
-+	if (!rt_task(p)) {
-+		/* Check it hasn't gotten rt from PI */
-+		if ((idleprio_task(p) && idleprio_suitable(p)) ||
-+		   (iso_task(p) && isoprio_suitable(rq)))
-+			p->prio = p->normal_prio;
-+		else
-+			p->prio = NORMAL_PRIO;
-+	} else
-+		rq->rt_nr_running++;
-+	/*
-+	 * The sl_id key passed to the skiplist generates a sorted list.
-+	 * Realtime and sched iso tasks run FIFO so they only need be sorted
-+	 * according to priority. The skiplist will put tasks of the same
-+	 * key inserted later in FIFO order. Tasks of sched normal, batch
-+	 * and idleprio are sorted according to their deadlines. Idleprio
-+	 * tasks are offset by an impossibly large deadline value ensuring
-+	 * they get sorted into last positions, but still according to their
-+	 * own deadlines. This creates a "landscape" of skiplists running
-+	 * from priority 0 realtime in first place to the lowest priority
-+	 * idleprio tasks last. Skiplist insertion is an O(log n) process.
-+	 */
-+	if (p->prio <= ISO_PRIO) {
-+		sl_id = p->prio;
-+	} else {
-+		sl_id = p->deadline;
-+		if (idleprio_task(p)) {
-+			if (p->prio == IDLE_PRIO)
-+				sl_id |= 0xF000000000000000;
-+			else
-+				sl_id += longest_deadline_diff();
-+		}
-+	}
-+	/*
-+	 * Some architectures don't have better than microsecond resolution
-+	 * so mask out ~microseconds as the random seed for skiplist insertion.
-+	 */
-+	update_clocks(rq);
-+	if (!(flags & ENQUEUE_RESTORE)) {
-+		sched_info_queued(rq, p);
-+		psi_enqueue(p, flags & ENQUEUE_WAKEUP);
-+	}
-+
-+	randseed = (rq->niffies >> 10) & 0xFFFFFFFF;
-+	skiplist_insert(rq->sl, &p->node, sl_id, p, randseed);
-+	rq->best_key = rq->node->next[0]->key;
-+	if (p->in_iowait)
-+		cflags |= SCHED_CPUFREQ_IOWAIT;
-+	inc_nr_running(rq);
-+	update_load_avg(rq, cflags);
-+}
-+
-+/*
-+ * Returns the relative length of deadline all compared to the shortest
-+ * deadline which is that of nice -20.
-+ */
-+static inline int task_prio_ratio(struct task_struct *p)
-+{
-+	return prio_ratios[TASK_USER_PRIO(p)];
-+}
-+
-+/*
-+ * task_timeslice - all tasks of all priorities get the exact same timeslice
-+ * length. CPU distribution is handled by giving different deadlines to
-+ * tasks of different priorities. Use 128 as the base value for fast shifts.
-+ */
-+static inline int task_timeslice(struct task_struct *p)
-+{
-+	return (rr_interval * task_prio_ratio(p) / 128);
-+}
-+
-+#ifdef CONFIG_SMP
-+/* Entered with rq locked */
-+static inline void resched_if_idle(struct rq *rq)
-+{
-+	if (rq_idle(rq))
-+		resched_task(rq->curr);
-+}
-+
-+static inline bool rq_local(struct rq *rq)
-+{
-+	return (rq->cpu == smp_processor_id());
-+}
-+#ifdef CONFIG_SMT_NICE
-+static const cpumask_t *thread_cpumask(int cpu);
-+
-+/* Find the best real time priority running on any SMT siblings of cpu and if
-+ * none are running, the static priority of the best deadline task running.
-+ * The lookups to the other runqueues is done lockless as the occasional wrong
-+ * value would be harmless. */
-+static int best_smt_bias(struct rq *this_rq)
-+{
-+	int other_cpu, best_bias = 0;
-+
-+	for_each_cpu(other_cpu, &this_rq->thread_mask) {
-+		struct rq *rq = cpu_rq(other_cpu);
-+
-+		if (rq_idle(rq))
-+			continue;
-+		if (unlikely(!rq->online))
-+			continue;
-+		if (!rq->rq_mm)
-+			continue;
-+		if (likely(rq->rq_smt_bias > best_bias))
-+			best_bias = rq->rq_smt_bias;
-+	}
-+	return best_bias;
-+}
-+
-+static int task_prio_bias(struct task_struct *p)
-+{
-+	if (rt_task(p))
-+		return 1 << 30;
-+	else if (task_running_iso(p))
-+		return 1 << 29;
-+	else if (task_running_idle(p))
-+		return 0;
-+	return MAX_PRIO - p->static_prio;
-+}
-+
-+static bool smt_always_schedule(struct task_struct __maybe_unused *p, struct rq __maybe_unused *this_rq)
-+{
-+	return true;
-+}
-+
-+static bool (*smt_schedule)(struct task_struct *p, struct rq *this_rq) = &smt_always_schedule;
-+
-+/* We've already decided p can run on CPU, now test if it shouldn't for SMT
-+ * nice reasons. */
-+static bool smt_should_schedule(struct task_struct *p, struct rq *this_rq)
-+{
-+	int best_bias, task_bias;
-+
-+	/* Kernel threads always run */
-+	if (unlikely(!p->mm))
-+		return true;
-+	if (rt_task(p))
-+		return true;
-+	if (!idleprio_suitable(p))
-+		return true;
-+	best_bias = best_smt_bias(this_rq);
-+	/* The smt siblings are all idle or running IDLEPRIO */
-+	if (best_bias < 1)
-+		return true;
-+	task_bias = task_prio_bias(p);
-+	if (task_bias < 1)
-+		return false;
-+	if (task_bias >= best_bias)
-+		return true;
-+	/* Dither 25% cpu of normal tasks regardless of nice difference */
-+	if (best_bias % 4 == 1)
-+		return true;
-+	/* Sorry, you lose */
-+	return false;
-+}
-+#else /* CONFIG_SMT_NICE */
-+#define smt_schedule(p, this_rq) (true)
-+#endif /* CONFIG_SMT_NICE */
-+
-+static inline void atomic_set_cpu(int cpu, cpumask_t *cpumask)
-+{
-+	set_bit(cpu, (volatile unsigned long *)cpumask);
-+}
-+
-+/*
-+ * The cpu_idle_map stores a bitmap of all the CPUs currently idle to
-+ * allow easy lookup of whether any suitable idle CPUs are available.
-+ * It's cheaper to maintain a binary yes/no if there are any idle CPUs on the
-+ * idle_cpus variable than to do a full bitmask check when we are busy. The
-+ * bits are set atomically but read locklessly as occasional false positive /
-+ * negative is harmless.
-+ */
-+static inline void set_cpuidle_map(int cpu)
-+{
-+	if (likely(cpu_online(cpu)))
-+		atomic_set_cpu(cpu, &cpu_idle_map);
-+}
-+
-+static inline void atomic_clear_cpu(int cpu, cpumask_t *cpumask)
-+{
-+	clear_bit(cpu, (volatile unsigned long *)cpumask);
-+}
-+
-+static inline void clear_cpuidle_map(int cpu)
-+{
-+	atomic_clear_cpu(cpu, &cpu_idle_map);
-+}
-+
-+static bool suitable_idle_cpus(struct task_struct *p)
-+{
-+	return (cpumask_intersects(p->cpus_ptr, &cpu_idle_map));
-+}
-+
-+/*
-+ * Resched current on rq. We don't know if rq is local to this CPU nor if it
-+ * is locked so we do not use an intermediate variable for the task to avoid
-+ * having it dereferenced.
-+ */
-+static void resched_curr(struct rq *rq)
-+{
-+	int cpu;
-+
-+	if (test_tsk_need_resched(rq->curr))
-+		return;
-+
-+	rq->preempt = rq->curr;
-+	cpu = rq->cpu;
-+
-+	/* We're doing this without holding the rq lock if it's not task_rq */
-+
-+	if (cpu == smp_processor_id()) {
-+		set_tsk_need_resched(rq->curr);
-+		set_preempt_need_resched();
-+		return;
-+	}
-+
-+	if (set_nr_and_not_polling(rq->curr))
-+		smp_sched_reschedule(cpu);
-+	else
-+		trace_sched_wake_idle_without_ipi(cpu);
-+}
-+
-+#define CPUIDLE_DIFF_THREAD     (1)
-+#define CPUIDLE_DIFF_CORE_LLC   (2)
-+#define CPUIDLE_DIFF_CORE       (4)
-+#define CPUIDLE_CACHE_BUSY      (8)
-+#define CPUIDLE_DIFF_CPU        (16)
-+#define CPUIDLE_THREAD_BUSY     (32)
-+#define CPUIDLE_DIFF_NODE       (64)
-+
-+/*
-+ * The best idle CPU is chosen according to the CPUIDLE ranking above where the
-+ * lowest value would give the most suitable CPU to schedule p onto next. The
-+ * order works out to be the following:
-+ *
-+ * Same thread, idle or busy cache, idle or busy threads
-+ * Other core, same cache, idle or busy cache, idle threads.
-+ * Same node, other CPU, idle cache, idle threads.
-+ * Same node, other CPU, busy cache, idle threads.
-+ * Other core, same cache, busy threads.
-+ * Same node, other CPU, busy threads.
-+ * Other node, other CPU, idle cache, idle threads.
-+ * Other node, other CPU, busy cache, idle threads.
-+ * Other node, other CPU, busy threads.
-+ */
-+static int best_mask_cpu(int best_cpu, struct rq *rq, cpumask_t *tmpmask)
-+{
-+	int best_ranking = CPUIDLE_DIFF_NODE | CPUIDLE_THREAD_BUSY |
-+		CPUIDLE_DIFF_CPU | CPUIDLE_CACHE_BUSY | CPUIDLE_DIFF_CORE |
-+		CPUIDLE_DIFF_CORE_LLC | CPUIDLE_DIFF_THREAD;
-+	int cpu_tmp;
-+
-+	if (cpumask_test_cpu(best_cpu, tmpmask))
-+		goto out;
-+
-+	for_each_cpu(cpu_tmp, tmpmask) {
-+		int ranking, locality;
-+		struct rq *tmp_rq;
-+
-+		ranking = 0;
-+		tmp_rq = cpu_rq(cpu_tmp);
-+
-+		locality = rq->cpu_locality[cpu_tmp];
-+#ifdef CONFIG_NUMA
-+		if (locality > LOCALITY_SMP)
-+			ranking |= CPUIDLE_DIFF_NODE;
-+		else
-+#endif
-+			if (locality > LOCALITY_MC)
-+				ranking |= CPUIDLE_DIFF_CPU;
-+#ifdef CONFIG_SCHED_MC
-+			else if (locality == LOCALITY_MC_LLC)
-+				ranking |= CPUIDLE_DIFF_CORE_LLC;
-+			else if (locality == LOCALITY_MC)
-+				ranking |= CPUIDLE_DIFF_CORE;
-+		if (!(tmp_rq->cache_idle(tmp_rq)))
-+			ranking |= CPUIDLE_CACHE_BUSY;
-+#endif
-+#ifdef CONFIG_SCHED_SMT
-+		if (locality == LOCALITY_SMT)
-+			ranking |= CPUIDLE_DIFF_THREAD;
-+#endif
-+		if (ranking < best_ranking
-+#ifdef CONFIG_SCHED_SMT
-+			|| (ranking == best_ranking && (tmp_rq->siblings_idle(tmp_rq)))
-+#endif
-+		) {
-+			best_cpu = cpu_tmp;
-+			best_ranking = ranking;
-+		}
-+	}
-+out:
-+	return best_cpu;
-+}
-+
-+bool cpus_share_cache(int this_cpu, int that_cpu)
-+{
-+	struct rq *this_rq = cpu_rq(this_cpu);
-+
-+	return (this_rq->cpu_locality[that_cpu] < LOCALITY_SMP);
-+}
-+
-+/* As per resched_curr but only will resched idle task */
-+static inline void resched_idle(struct rq *rq)
-+{
-+	if (test_tsk_need_resched(rq->idle))
-+		return;
-+
-+	rq->preempt = rq->idle;
-+
-+	set_tsk_need_resched(rq->idle);
-+
-+	if (rq_local(rq)) {
-+		set_preempt_need_resched();
-+		return;
-+	}
-+
-+	smp_sched_reschedule(rq->cpu);
-+}
-+
-+DEFINE_PER_CPU(cpumask_t, idlemask);
-+
-+static struct rq *resched_best_idle(struct task_struct *p, int cpu)
-+{
-+	cpumask_t *tmpmask = &(per_cpu(idlemask, cpu));
-+	struct rq *rq;
-+	int best_cpu;
-+
-+	cpumask_and(tmpmask, p->cpus_ptr, &cpu_idle_map);
-+	best_cpu = best_mask_cpu(cpu, task_rq(p), tmpmask);
-+	rq = cpu_rq(best_cpu);
-+	if (!smt_schedule(p, rq))
-+		return NULL;
-+	rq->preempt = p;
-+	resched_idle(rq);
-+	return rq;
-+}
-+
-+static inline void resched_suitable_idle(struct task_struct *p)
-+{
-+	if (suitable_idle_cpus(p))
-+		resched_best_idle(p, task_cpu(p));
-+}
-+
-+static inline struct rq *rq_order(struct rq *rq, int cpu)
-+{
-+	return rq->rq_order[cpu];
-+}
-+#else /* CONFIG_SMP */
-+static inline void set_cpuidle_map(int cpu)
-+{
-+}
-+
-+static inline void clear_cpuidle_map(int cpu)
-+{
-+}
-+
-+static inline bool suitable_idle_cpus(struct task_struct *p)
-+{
-+	return uprq->curr == uprq->idle;
-+}
-+
-+static inline void resched_suitable_idle(struct task_struct *p)
-+{
-+}
-+
-+static inline void resched_curr(struct rq *rq)
-+{
-+	resched_task(rq->curr);
-+}
-+
-+static inline void resched_if_idle(struct rq *rq)
-+{
-+}
-+
-+static inline bool rq_local(struct rq *rq)
-+{
-+	return true;
-+}
-+
-+static inline struct rq *rq_order(struct rq *rq, int cpu)
-+{
-+	return rq;
-+}
-+
-+static inline bool smt_schedule(struct task_struct *p, struct rq *rq)
-+{
-+	return true;
-+}
-+#endif /* CONFIG_SMP */
-+
-+static inline int normal_prio(struct task_struct *p)
-+{
-+	if (has_rt_policy(p))
-+		return MAX_RT_PRIO - 1 - p->rt_priority;
-+	if (idleprio_task(p))
-+		return IDLE_PRIO;
-+	if (iso_task(p))
-+		return ISO_PRIO;
-+	return NORMAL_PRIO;
-+}
-+
-+/*
-+ * Calculate the current priority, i.e. the priority
-+ * taken into account by the scheduler. This value might
-+ * be boosted by RT tasks as it will be RT if the task got
-+ * RT-boosted. If not then it returns p->normal_prio.
-+ */
-+static int effective_prio(struct task_struct *p)
-+{
-+	p->normal_prio = normal_prio(p);
-+	/*
-+	 * If we are RT tasks or we were boosted to RT priority,
-+	 * keep the priority unchanged. Otherwise, update priority
-+	 * to the normal priority:
-+	 */
-+	if (!rt_prio(p->prio))
-+		return p->normal_prio;
-+	return p->prio;
-+}
-+
-+/*
-+ * activate_task - move a task to the runqueue. Enter with rq locked.
-+ */
-+static void activate_task(struct rq *rq, struct task_struct *p, int flags)
-+{
-+	resched_if_idle(rq);
-+
-+	/*
-+	 * Sleep time is in units of nanosecs, so shift by 20 to get a
-+	 * milliseconds-range estimation of the amount of time that the task
-+	 * spent sleeping:
-+	 */
-+	if (unlikely(prof_on == SLEEP_PROFILING)) {
-+		if (p->state == TASK_UNINTERRUPTIBLE)
-+			profile_hits(SLEEP_PROFILING, (void *)get_wchan(p),
-+				     (rq->niffies - p->last_ran) >> 20);
-+	}
-+
-+	p->prio = effective_prio(p);
-+	enqueue_task(rq, p, flags);
-+	p->on_rq = TASK_ON_RQ_QUEUED;
-+}
-+
-+/*
-+ * deactivate_task - If it's running, it's not on the runqueue and we can just
-+ * decrement the nr_running. Enter with rq locked.
-+ */
-+static inline void deactivate_task(struct task_struct *p, struct rq *rq)
-+{
-+	p->on_rq = 0;
-+	sched_info_dequeued(rq, p);
-+	/* deactivate_task is always DEQUEUE_SLEEP in muqss */
-+	psi_dequeue(p, DEQUEUE_SLEEP);
-+}
-+
-+#ifdef CONFIG_SMP
-+void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
-+{
-+	struct rq *rq;
-+
-+	if (task_cpu(p) == new_cpu)
-+		return;
-+
-+	/* Do NOT call set_task_cpu on a currently queued task as we will not
-+	 * be reliably holding the rq lock after changing CPU. */
-+	BUG_ON(task_queued(p));
-+	rq = task_rq(p);
-+
-+#ifdef CONFIG_LOCKDEP
-+	/*
-+	 * The caller should hold either p->pi_lock or rq->lock, when changing
-+	 * a task's CPU. ->pi_lock for waking tasks, rq->lock for runnable tasks.
-+	 *
-+	 * Furthermore, all task_rq users should acquire both locks, see
-+	 * task_rq_lock().
-+	 */
-+	WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) ||
-+				      lockdep_is_held(rq->lock)));
-+#endif
-+
-+	trace_sched_migrate_task(p, new_cpu);
-+	rseq_migrate(p);
-+	perf_event_task_migrate(p);
-+
-+	/*
-+	 * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
-+	 * successfully executed on another CPU. We must ensure that updates of
-+	 * per-task data have been completed by this moment.
-+	 */
-+	smp_wmb();
-+
-+	p->wake_cpu = new_cpu;
-+
-+	if (task_running(rq, p)) {
-+		/*
-+		 * We should only be calling this on a running task if we're
-+		 * holding rq lock.
-+		 */
-+		lockdep_assert_held(rq->lock);
-+
-+		/*
-+		 * We can't change the task_thread_info CPU on a running task
-+		 * as p will still be protected by the rq lock of the CPU it
-+		 * is still running on so we only set the wake_cpu for it to be
-+		 * lazily updated once off the CPU.
-+		 */
-+		return;
-+	}
-+
-+#ifdef CONFIG_THREAD_INFO_IN_TASK
-+	WRITE_ONCE(p->cpu, new_cpu);
-+#else
-+	WRITE_ONCE(task_thread_info(p)->cpu, new_cpu);
-+#endif
-+	/* We're no longer protecting p after this point since we're holding
-+	 * the wrong runqueue lock. */
-+}
-+#endif /* CONFIG_SMP */
-+
-+/*
-+ * Move a task off the runqueue and take it to a cpu for it will
-+ * become the running task.
-+ */
-+static inline void take_task(struct rq *rq, int cpu, struct task_struct *p)
-+{
-+	struct rq *p_rq = task_rq(p);
-+
-+	dequeue_task(p_rq, p, DEQUEUE_SAVE);
-+	if (p_rq != rq) {
-+		sched_info_dequeued(p_rq, p);
-+		sched_info_queued(rq, p);
-+	}
-+	set_task_cpu(p, cpu);
-+}
-+
-+/*
-+ * Returns a descheduling task to the runqueue unless it is being
-+ * deactivated.
-+ */
-+static inline void return_task(struct task_struct *p, struct rq *rq,
-+			       int cpu, bool deactivate)
-+{
-+	if (deactivate)
-+		deactivate_task(p, rq);
-+	else {
-+#ifdef CONFIG_SMP
-+		/*
-+		 * set_task_cpu was called on the running task that doesn't
-+		 * want to deactivate so it has to be enqueued to a different
-+		 * CPU and we need its lock. Tag it to be moved with as the
-+		 * lock is dropped in finish_lock_switch.
-+		 */
-+		if (unlikely(p->wake_cpu != cpu))
-+			WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING);
-+		else
-+#endif
-+			enqueue_task(rq, p, ENQUEUE_RESTORE);
-+	}
-+}
-+
-+/* Enter with rq lock held. We know p is on the local cpu */
-+static inline void __set_tsk_resched(struct task_struct *p)
-+{
-+	set_tsk_need_resched(p);
-+	set_preempt_need_resched();
-+}
-+
-+/**
-+ * task_curr - is this task currently executing on a CPU?
-+ * @p: the task in question.
-+ *
-+ * Return: 1 if the task is currently executing. 0 otherwise.
-+ */
-+inline int task_curr(const struct task_struct *p)
-+{
-+	return cpu_curr(task_cpu(p)) == p;
-+}
-+
-+#ifdef CONFIG_SMP
-+/*
-+ * wait_task_inactive - wait for a thread to unschedule.
-+ *
-+ * If @match_state is nonzero, it's the @p->state value just checked and
-+ * not expected to change.  If it changes, i.e. @p might have woken up,
-+ * then return zero.  When we succeed in waiting for @p to be off its CPU,
-+ * we return a positive number (its total switch count).  If a second call
-+ * a short while later returns the same number, the caller can be sure that
-+ * @p has remained unscheduled the whole time.
-+ *
-+ * The caller must ensure that the task *will* unschedule sometime soon,
-+ * else this function might spin for a *long* time. This function can't
-+ * be called with interrupts off, or it may introduce deadlock with
-+ * smp_call_function() if an IPI is sent by the same process we are
-+ * waiting to become inactive.
-+ */
-+unsigned long wait_task_inactive(struct task_struct *p, long match_state)
-+{
-+	int running, queued;
-+	struct rq_flags rf;
-+	unsigned long ncsw;
-+	struct rq *rq;
-+
-+	for (;;) {
-+		rq = task_rq(p);
-+
-+		/*
-+		 * If the task is actively running on another CPU
-+		 * still, just relax and busy-wait without holding
-+		 * any locks.
-+		 *
-+		 * NOTE! Since we don't hold any locks, it's not
-+		 * even sure that "rq" stays as the right runqueue!
-+		 * But we don't care, since this will return false
-+		 * if the runqueue has changed and p is actually now
-+		 * running somewhere else!
-+		 */
-+		while (task_running(rq, p)) {
-+			if (match_state && unlikely(p->state != match_state))
-+				return 0;
-+			cpu_relax();
-+		}
-+
-+		/*
-+		 * Ok, time to look more closely! We need the rq
-+		 * lock now, to be *sure*. If we're wrong, we'll
-+		 * just go back and repeat.
-+		 */
-+		rq = task_rq_lock(p, &rf);
-+		trace_sched_wait_task(p);
-+		running = task_running(rq, p);
-+		queued = task_on_rq_queued(p);
-+		ncsw = 0;
-+		if (!match_state || p->state == match_state)
-+			ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
-+		task_rq_unlock(rq, p, &rf);
-+
-+		/*
-+		 * If it changed from the expected state, bail out now.
-+		 */
-+		if (unlikely(!ncsw))
-+			break;
-+
-+		/*
-+		 * Was it really running after all now that we
-+		 * checked with the proper locks actually held?
-+		 *
-+		 * Oops. Go back and try again..
-+		 */
-+		if (unlikely(running)) {
-+			cpu_relax();
-+			continue;
-+		}
-+
-+		/*
-+		 * It's not enough that it's not actively running,
-+		 * it must be off the runqueue _entirely_, and not
-+		 * preempted!
-+		 *
-+		 * So if it was still runnable (but just not actively
-+		 * running right now), it's preempted, and we should
-+		 * yield - it could be a while.
-+		 */
-+		if (unlikely(queued)) {
-+			ktime_t to = NSEC_PER_SEC / HZ;
-+
-+			set_current_state(TASK_UNINTERRUPTIBLE);
-+			schedule_hrtimeout(&to, HRTIMER_MODE_REL);
-+			continue;
-+		}
-+
-+		/*
-+		 * Ahh, all good. It wasn't running, and it wasn't
-+		 * runnable, which means that it will never become
-+		 * running in the future either. We're all done!
-+		 */
-+		break;
-+	}
-+
-+	return ncsw;
-+}
-+
-+/***
-+ * kick_process - kick a running thread to enter/exit the kernel
-+ * @p: the to-be-kicked thread
-+ *
-+ * Cause a process which is running on another CPU to enter
-+ * kernel-mode, without any delay. (to get signals handled.)
-+ *
-+ * NOTE: this function doesn't have to take the runqueue lock,
-+ * because all it wants to ensure is that the remote task enters
-+ * the kernel. If the IPI races and the task has been migrated
-+ * to another CPU then no harm is done and the purpose has been
-+ * achieved as well.
-+ */
-+void kick_process(struct task_struct *p)
-+{
-+	int cpu;
-+
-+	preempt_disable();
-+	cpu = task_cpu(p);
-+	if ((cpu != smp_processor_id()) && task_curr(p))
-+		smp_sched_reschedule(cpu);
-+	preempt_enable();
-+}
-+EXPORT_SYMBOL_GPL(kick_process);
-+#endif
-+
-+/*
-+ * RT tasks preempt purely on priority. SCHED_NORMAL tasks preempt on the
-+ * basis of earlier deadlines. SCHED_IDLEPRIO don't preempt anything else or
-+ * between themselves, they cooperatively multitask. An idle rq scores as
-+ * prio PRIO_LIMIT so it is always preempted.
-+ */
-+static inline bool
-+can_preempt(struct task_struct *p, int prio, u64 deadline)
-+{
-+	/* Better static priority RT task or better policy preemption */
-+	if (p->prio < prio)
-+		return true;
-+	if (p->prio > prio)
-+		return false;
-+	if (p->policy == SCHED_BATCH)
-+		return false;
-+	/* SCHED_NORMAL and ISO will preempt based on deadline */
-+	if (!deadline_before(p->deadline, deadline))
-+		return false;
-+	return true;
-+}
-+
-+#ifdef CONFIG_SMP
-+
-+/*
-+ * Per-CPU kthreads are allowed to run on !active && online CPUs, see
-+ * __set_cpus_allowed_ptr().
-+ */
-+static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
-+{
-+	if (!cpumask_test_cpu(cpu, p->cpus_ptr))
-+		return false;
-+
-+	if (is_per_cpu_kthread(p))
-+		return cpu_online(cpu);
-+
-+	return cpu_active(cpu);
-+}
-+
-+/*
-+ * Check to see if p can run on cpu, and if not, whether there are any online
-+ * CPUs it can run on instead. This only happens with the hotplug threads that
-+ * bring up the CPUs.
-+ */
-+static inline bool sched_other_cpu(struct task_struct *p, int cpu)
-+{
-+	if (likely(cpumask_test_cpu(cpu, p->cpus_ptr)))
-+		return false;
-+	if (p->nr_cpus_allowed == 1) {
-+		cpumask_t valid_mask;
-+
-+		cpumask_and(&valid_mask, p->cpus_ptr, cpu_online_mask);
-+		if (unlikely(cpumask_empty(&valid_mask)))
-+			return false;
-+	}
-+	return true;
-+}
-+
-+static inline bool needs_other_cpu(struct task_struct *p, int cpu)
-+{
-+	if (cpumask_test_cpu(cpu, p->cpus_ptr))
-+		return false;
-+	return true;
-+}
-+
-+#define cpu_online_map		(*(cpumask_t *)cpu_online_mask)
-+
-+static void try_preempt(struct task_struct *p, struct rq *this_rq)
-+{
-+	int i, this_entries = rq_load(this_rq);
-+	cpumask_t tmp;
-+
-+	if (suitable_idle_cpus(p) && resched_best_idle(p, task_cpu(p)))
-+		return;
-+
-+	/* IDLEPRIO tasks never preempt anything but idle */
-+	if (p->policy == SCHED_IDLEPRIO)
-+		return;
-+
-+	cpumask_and(&tmp, &cpu_online_map, p->cpus_ptr);
-+
-+	for (i = 0; i < num_online_cpus(); i++) {
-+		struct rq *rq = this_rq->cpu_order[i];
-+
-+		if (!cpumask_test_cpu(rq->cpu, &tmp))
-+			continue;
-+
-+		if (!sched_interactive && rq != this_rq && rq_load(rq) <= this_entries)
-+			continue;
-+		if (smt_schedule(p, rq) && can_preempt(p, rq->rq_prio, rq->rq_deadline)) {
-+			/* We set rq->preempting lockless, it's a hint only */
-+			rq->preempting = p;
-+			resched_curr(rq);
-+			return;
-+		}
-+	}
-+}
-+
-+static int __set_cpus_allowed_ptr(struct task_struct *p,
-+				  const struct cpumask *new_mask, bool check);
-+#else /* CONFIG_SMP */
-+static inline bool needs_other_cpu(struct task_struct *p, int cpu)
-+{
-+	return false;
-+}
-+
-+static void try_preempt(struct task_struct *p, struct rq *this_rq)
-+{
-+	if (p->policy == SCHED_IDLEPRIO)
-+		return;
-+	if (can_preempt(p, uprq->rq_prio, uprq->rq_deadline))
-+		resched_curr(uprq);
-+}
-+
-+static inline int __set_cpus_allowed_ptr(struct task_struct *p,
-+					 const struct cpumask *new_mask, bool check)
-+{
-+	return set_cpus_allowed_ptr(p, new_mask);
-+}
-+#endif /* CONFIG_SMP */
-+
-+static void
-+ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
-+{
-+	struct rq *rq;
-+
-+	if (!schedstat_enabled())
-+		return;
-+
-+	rq = this_rq();
-+
-+#ifdef CONFIG_SMP
-+	if (cpu == rq->cpu) {
-+		__schedstat_inc(rq->ttwu_local);
-+	} else {
-+		struct sched_domain *sd;
-+
-+		rcu_read_lock();
-+		for_each_domain(rq->cpu, sd) {
-+			if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
-+				__schedstat_inc(sd->ttwu_wake_remote);
-+				break;
-+			}
-+		}
-+		rcu_read_unlock();
-+	}
-+
-+#endif /* CONFIG_SMP */
-+
-+	__schedstat_inc(rq->ttwu_count);
-+}
-+
-+/*
-+ * Mark the task runnable and perform wakeup-preemption.
-+ */
-+static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
-+{
-+	/*
-+	 * Sync wakeups (i.e. those types of wakeups where the waker
-+	 * has indicated that it will leave the CPU in short order)
-+	 * don't trigger a preemption if there are no idle cpus,
-+	 * instead waiting for current to deschedule.
-+	 */
-+	if (wake_flags & WF_SYNC)
-+		resched_suitable_idle(p);
-+	else
-+		try_preempt(p, rq);
-+	p->state = TASK_RUNNING;
-+	trace_sched_wakeup(p);
-+}
-+
-+static void
-+ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags)
-+{
-+	int en_flags = ENQUEUE_WAKEUP;
-+
-+	lockdep_assert_held(rq->lock);
-+
-+	if (p->sched_contributes_to_load)
-+		rq->nr_uninterruptible--;
-+
-+#ifdef CONFIG_SMP
-+	if (wake_flags & WF_MIGRATED)
-+		en_flags |= ENQUEUE_MIGRATED;
-+#endif
-+
-+	activate_task(rq, p, en_flags);
-+	ttwu_do_wakeup(rq, p, wake_flags);
-+}
-+
-+/*
-+ * Consider @p being inside a wait loop:
-+ *
-+ *   for (;;) {
-+ *      set_current_state(TASK_UNINTERRUPTIBLE);
-+ *
-+ *      if (CONDITION)
-+ *         break;
-+ *
-+ *      schedule();
-+ *   }
-+ *   __set_current_state(TASK_RUNNING);
-+ *
-+ * between set_current_state() and schedule(). In this case @p is still
-+ * runnable, so all that needs doing is change p->state back to TASK_RUNNING in
-+ * an atomic manner.
-+ *
-+ * By taking task_rq(p)->lock we serialize against schedule(), if @p->on_rq
-+ * then schedule() must still happen and p->state can be changed to
-+ * TASK_RUNNING. Otherwise we lost the race, schedule() has happened, and we
-+ * need to do a full wakeup with enqueue.
-+ *
-+ * Returns: %true when the wakeup is done,
-+ *          %false otherwise.
-+ */
-+static int ttwu_runnable(struct task_struct *p, int wake_flags)
-+{
-+	struct rq *rq;
-+	int ret = 0;
-+
-+	rq = __task_rq_lock(p, NULL);
-+	if (likely(task_on_rq_queued(p))) {
-+		ttwu_do_wakeup(rq, p, wake_flags);
-+		ret = 1;
-+	}
-+	__task_rq_unlock(rq, NULL);
-+
-+	return ret;
-+}
-+
-+#ifdef CONFIG_SMP
-+void sched_ttwu_pending(void *arg)
-+{
-+	struct llist_node *llist = arg;
-+	struct rq *rq = this_rq();
-+	struct task_struct *p, *t;
-+	struct rq_flags rf;
-+
-+	if (!llist)
-+		return;
-+
-+	/*
-+	 * rq::ttwu_pending racy indication of out-standing wakeups.
-+	 * Races such that false-negatives are possible, since they
-+	 * are shorter lived that false-positives would be.
-+	 */
-+	WRITE_ONCE(rq->ttwu_pending, 0);
-+
-+	rq_lock_irqsave(rq, &rf);
-+
-+	llist_for_each_entry_safe(p, t, llist, wake_entry.llist) {
-+		if (WARN_ON_ONCE(p->on_cpu))
-+			smp_cond_load_acquire(&p->on_cpu, !VAL);
-+
-+		if (WARN_ON_ONCE(task_cpu(p) != cpu_of(rq)))
-+			set_task_cpu(p, cpu_of(rq));
-+
-+		ttwu_do_activate(rq, p, 0);
-+	}
-+
-+	rq_unlock_irqrestore(rq, &rf);
-+}
-+
-+void send_call_function_single_ipi(int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	if (!set_nr_if_polling(rq->idle))
-+		arch_send_call_function_single_ipi(cpu);
-+	else
-+		trace_sched_wake_idle_without_ipi(cpu);
-+}
-+
-+/*
-+ * Queue a task on the target CPUs wake_list and wake the CPU via IPI if
-+ * necessary. The wakee CPU on receipt of the IPI will queue the task
-+ * via sched_ttwu_wakeup() for activation so the wakee incurs the cost
-+ * of the wakeup instead of the waker.
-+ */
-+static void __ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	WRITE_ONCE(rq->ttwu_pending, 1);
-+	__smp_call_single_queue(cpu, &p->wake_entry.llist);
-+}
-+
-+void wake_up_if_idle(int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	struct rq_flags rf;
-+
-+	rcu_read_lock();
-+
-+	if (!is_idle_task(rcu_dereference(rq->curr)))
-+		goto out;
-+
-+	if (set_nr_if_polling(rq->idle)) {
-+		trace_sched_wake_idle_without_ipi(cpu);
-+	} else {
-+		rq_lock_irqsave(rq, &rf);
-+		if (likely(is_idle_task(rq->curr)))
-+			smp_sched_reschedule(cpu);
-+		/* Else cpu is not in idle, do nothing here */
-+		rq_unlock_irqrestore(rq, &rf);
-+	}
-+
-+out:
-+	rcu_read_unlock();
-+}
-+
-+static inline bool ttwu_queue_cond(int cpu, int wake_flags)
-+{
-+	/*
-+	 * If the CPU does not share cache, then queue the task on the
-+	 * remote rqs wakelist to avoid accessing remote data.
-+	 */
-+	if (!cpus_share_cache(smp_processor_id(), cpu))
-+		return true;
-+
-+	/*
-+	 * If the task is descheduling and the only running task on the
-+	 * CPU then use the wakelist to offload the task activation to
-+	 * the soon-to-be-idle CPU as the current CPU is likely busy.
-+	 * nr_running is checked to avoid unnecessary task stacking.
-+	 */
-+	if ((wake_flags & WF_ON_CPU) && cpu_rq(cpu)->nr_running <= 1)
-+		return true;
-+
-+	return false;
-+}
-+
-+static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
-+{
-+	if (sched_feat(TTWU_QUEUE) && ttwu_queue_cond(cpu, wake_flags)) {
-+		if (WARN_ON_ONCE(cpu == smp_processor_id()))
-+			return false;
-+
-+		sched_clock_cpu(cpu); /* Sync clocks across CPUs */
-+		__ttwu_queue_wakelist(p, cpu, wake_flags);
-+		return true;
-+	}
-+
-+	return false;
-+}
-+
-+static int valid_task_cpu(struct task_struct *p)
-+{
-+	cpumask_t valid_mask;
-+
-+	if (p->flags & PF_KTHREAD)
-+		cpumask_and(&valid_mask, p->cpus_ptr, cpu_all_mask);
-+	else
-+		cpumask_and(&valid_mask, p->cpus_ptr, cpu_active_mask);
-+
-+	if (unlikely(!cpumask_weight(&valid_mask))) {
-+		/* We shouldn't be hitting this any more */
-+		printk(KERN_WARNING "SCHED: No cpumask for %s/%d weight %d\n", p->comm,
-+		       p->pid, cpumask_weight(p->cpus_ptr));
-+		return cpumask_any(p->cpus_ptr);
-+	}
-+	return cpumask_any(&valid_mask);
-+}
-+
-+/*
-+ * For a task that's just being woken up we have a valuable balancing
-+ * opportunity so choose the nearest cache most lightly loaded runqueue.
-+ * Entered with rq locked and returns with the chosen runqueue locked.
-+ */
-+static inline int select_best_cpu(struct task_struct *p)
-+{
-+	unsigned int idlest = ~0U;
-+	struct rq *rq = NULL;
-+	int i;
-+
-+	if (suitable_idle_cpus(p)) {
-+		int cpu = task_cpu(p);
-+
-+		if (unlikely(needs_other_cpu(p, cpu)))
-+			cpu = valid_task_cpu(p);
-+		rq = resched_best_idle(p, cpu);
-+		if (likely(rq))
-+			return rq->cpu;
-+	}
-+
-+	for (i = 0; i < num_online_cpus(); i++) {
-+		struct rq *other_rq = task_rq(p)->cpu_order[i];
-+		int entries;
-+
-+		if (!other_rq->online)
-+			continue;
-+		if (needs_other_cpu(p, other_rq->cpu))
-+			continue;
-+		entries = rq_load(other_rq);
-+		if (entries >= idlest)
-+			continue;
-+		idlest = entries;
-+		rq = other_rq;
-+	}
-+	if (unlikely(!rq))
-+		return task_cpu(p);
-+	return rq->cpu;
-+}
-+#else /* CONFIG_SMP */
-+
-+static inline bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
-+{
-+	return false;
-+}
-+
-+static int valid_task_cpu(struct task_struct *p)
-+{
-+	return 0;
-+}
-+
-+static inline int select_best_cpu(struct task_struct *p)
-+{
-+	return 0;
-+}
-+
-+static struct rq *resched_best_idle(struct task_struct *p, int cpu)
-+{
-+	return NULL;
-+}
-+#endif /* CONFIG_SMP */
-+
-+static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	if (ttwu_queue_wakelist(p, cpu, wake_flags))
-+		return;
-+
-+	rq_lock(rq);
-+	update_rq_clock(rq);
-+	ttwu_do_activate(rq, p, wake_flags);
-+	rq_unlock(rq);
-+}
-+
-+/***
-+ * try_to_wake_up - wake up a thread
-+ * @p: the thread to be awakened
-+ * @state: the mask of task states that can be woken
-+ * @wake_flags: wake modifier flags (WF_*)
-+ *
-+ * Put it on the run-queue if it's not already there. The "current"
-+ * thread is always on the run-queue (except when the actual
-+ * re-schedule is in progress), and as such you're allowed to do
-+ * the simpler "current->state = TASK_RUNNING" to mark yourself
-+ * runnable without the overhead of this.
-+ *
-+ * Return: %true if @p was woken up, %false if it was already running.
-+ * or @state didn't match @p's state.
-+ */
-+static int
-+try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
-+{
-+	unsigned long flags;
-+	int cpu, success = 0;
-+
-+	preempt_disable();
-+	if (p == current) {
-+		/*
-+		 * We're waking current, this means 'p->on_rq' and 'task_cpu(p)
-+		 * == smp_processor_id()'. Together this means we can special
-+		 * case the whole 'p->on_rq && ttwu_runnable()' case below
-+		 * without taking any locks.
-+		 *
-+		 * In particular:
-+		 *  - we rely on Program-Order guarantees for all the ordering,
-+		 *  - we're serialized against set_special_state() by virtue of
-+		 *    it disabling IRQs (this allows not taking ->pi_lock).
-+		 */
-+		if (!(p->state & state))
-+			goto out;
-+
-+		success = 1;
-+		trace_sched_waking(p);
-+		p->state = TASK_RUNNING;
-+		trace_sched_wakeup(p);
-+		goto out;
-+	}
-+
-+	/*
-+	 * If we are going to wake up a thread waiting for CONDITION we
-+	 * need to ensure that CONDITION=1 done by the caller can not be
-+	 * reordered with p->state check below. This pairs with smp_store_mb()
-+	 * in set_current_state() that the waiting thread does.
-+	 */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	smp_mb__after_spinlock();
-+	if (!(p->state & state))
-+		goto unlock;
-+
-+	trace_sched_waking(p);
-+
-+	/* We're going to change ->state: */
-+	success = 1;
-+
-+	/*
-+	 * Ensure we load p->on_rq _after_ p->state, otherwise it would
-+	 * be possible to, falsely, observe p->on_rq == 0 and get stuck
-+	 * in smp_cond_load_acquire() below.
-+	 *
-+	 * sched_ttwu_pending()			try_to_wake_up()
-+	 *   STORE p->on_rq = 1			  LOAD p->state
-+	 *   UNLOCK rq->lock
-+	 *
-+	 * __schedule() (switch to task 'p')
-+	 *   LOCK rq->lock			  smp_rmb();
-+	 *   smp_mb__after_spinlock();
-+	 *   UNLOCK rq->lock
-+	 *
-+	 * [task p]
-+	 *   STORE p->state = UNINTERRUPTIBLE	  LOAD p->on_rq
-+	 *
-+	 * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
-+	 * __schedule().  See the comment for smp_mb__after_spinlock().
-+	 */
-+	smp_rmb();
-+	if (READ_ONCE(p->on_rq) && ttwu_runnable(p, wake_flags))
-+		goto unlock;
-+
-+	if (p->in_iowait) {
-+		delayacct_blkio_end(p);
-+		atomic_dec(&task_rq(p)->nr_iowait);
-+	}
-+
-+#ifdef CONFIG_SMP
-+	/*
-+	 * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
-+	 * possible to, falsely, observe p->on_cpu == 0.
-+	 *
-+	 * One must be running (->on_cpu == 1) in order to remove oneself
-+	 * from the runqueue.
-+	 *
-+	 * __schedule() (switch to task 'p')	try_to_wake_up()
-+	 *   STORE p->on_cpu = 1		  LOAD p->on_rq
-+	 *   UNLOCK rq->lock
-+	 *
-+	 * __schedule() (put 'p' to sleep)
-+	 *   LOCK rq->lock			  smp_rmb();
-+	 *   smp_mb__after_spinlock();
-+	 *   STORE p->on_rq = 0			  LOAD p->on_cpu
-+	 *
-+	 * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
-+	 * __schedule().  See the comment for smp_mb__after_spinlock().
-+	 *
-+	 * Form a control-dep-acquire with p->on_rq == 0 above, to ensure
-+	 * schedule()'s deactivate_task() has 'happened' and p will no longer
-+	 * care about it's own p->state. See the comment in __schedule().
-+	 */
-+	smp_acquire__after_ctrl_dep();
-+
-+	/*
-+	 * We're doing the wakeup (@success == 1), they did a dequeue (p->on_rq
-+	 * == 0), which means we need to do an enqueue, change p->state to
-+	 * TASK_WAKING such that we can unlock p->pi_lock before doing the
-+	 * enqueue, such as ttwu_queue_wakelist().
-+	 */
-+	p->state = TASK_WAKING;
-+
-+	/*
-+	 * If the owning (remote) CPU is still in the middle of schedule() with
-+	 * this task as prev, considering queueing p on the remote CPUs wake_list
-+	 * which potentially sends an IPI instead of spinning on p->on_cpu to
-+	 * let the waker make forward progress. This is safe because IRQs are
-+	 * disabled and the IPI will deliver after on_cpu is cleared.
-+	 *
-+	 * Ensure we load task_cpu(p) after p->on_cpu:
-+	 *
-+	 * set_task_cpu(p, cpu);
-+	 *   STORE p->cpu = @cpu
-+	 * __schedule() (switch to task 'p')
-+	 *   LOCK rq->lock
-+	 *   smp_mb__after_spin_lock()		smp_cond_load_acquire(&p->on_cpu)
-+	 *   STORE p->on_cpu = 1		LOAD p->cpu
-+	 *
-+	 * to ensure we observe the correct CPU on which the task is currently
-+	 * scheduling.
-+	 */
-+	if (smp_load_acquire(&p->on_cpu) &&
-+	    ttwu_queue_wakelist(p, task_cpu(p), wake_flags | WF_ON_CPU))
-+		goto unlock;
-+
-+	/*
-+	 * If the owning (remote) CPU is still in the middle of schedule() with
-+	 * this task as prev, wait until its done referencing the task.
-+	 *
-+	 * Pairs with the smp_store_release() in finish_task().
-+	 *
-+	 * This ensures that tasks getting woken will be fully ordered against
-+	 * their previous state and preserve Program Order.
-+	 */
-+	smp_cond_load_acquire(&p->on_cpu, !VAL);
-+
-+	cpu = select_best_cpu(p);
-+	if (task_cpu(p) != cpu) {
-+		wake_flags |= WF_MIGRATED;
-+		psi_ttwu_dequeue(p);
-+		set_task_cpu(p, cpu);
-+	}
-+
-+#else
-+	cpu = task_cpu(p);
-+#endif /* CONFIG_SMP */
-+
-+	ttwu_queue(p, cpu, wake_flags);
-+unlock:
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+out:
-+	if (success)
-+		ttwu_stat(p, task_cpu(p), wake_flags);
-+	preempt_enable();
-+
-+	return success;
-+}
-+
-+/**
-+ * try_invoke_on_locked_down_task - Invoke a function on task in fixed state
-+ * @p: Process for which the function is to be invoked.
-+ * @func: Function to invoke.
-+ * @arg: Argument to function.
-+ *
-+ * If the specified task can be quickly locked into a definite state
-+ * (either sleeping or on a given runqueue), arrange to keep it in that
-+ * state while invoking @func(@arg).  This function can use ->on_rq and
-+ * task_curr() to work out what the state is, if required.  Given that
-+ * @func can be invoked with a runqueue lock held, it had better be quite
-+ * lightweight.
-+ *
-+ * Returns:
-+ *	@false if the task slipped out from under the locks.
-+ *	@true if the task was locked onto a runqueue or is sleeping.
-+ *		However, @func can override this by returning @false.
-+ */
-+bool try_invoke_on_locked_down_task(struct task_struct *p, bool (*func)(struct task_struct *t, void *arg), void *arg)
-+{
-+	bool ret = false;
-+	struct rq *rq;
-+
-+	lockdep_assert_irqs_enabled();
-+	raw_spin_lock_irq(&p->pi_lock);
-+	if (p->on_rq) {
-+		rq = __task_rq_lock(p, NULL);
-+		if (task_rq(p) == rq)
-+			ret = func(p, arg);
-+		rq_unlock(rq);
-+	} else {
-+		switch (p->state) {
-+		case TASK_RUNNING:
-+		case TASK_WAKING:
-+			break;
-+		default:
-+			smp_rmb(); // See smp_rmb() comment in try_to_wake_up().
-+			if (!p->on_rq)
-+				ret = func(p, arg);
-+		}
-+	}
-+	raw_spin_unlock_irq(&p->pi_lock);
-+	return ret;
-+}
-+
-+/**
-+ * wake_up_process - Wake up a specific process
-+ * @p: The process to be woken up.
-+ *
-+ * Attempt to wake up the nominated process and move it to the set of runnable
-+ * processes.
-+ *
-+ * Return: 1 if the process was woken up, 0 if it was already running.
-+ *
-+ * This function executes a full memory barrier before accessing the task state.
-+ */
-+int wake_up_process(struct task_struct *p)
-+{
-+	return try_to_wake_up(p, TASK_NORMAL, 0);
-+}
-+EXPORT_SYMBOL(wake_up_process);
-+
-+int wake_up_state(struct task_struct *p, unsigned int state)
-+{
-+	return try_to_wake_up(p, state, 0);
-+}
-+
-+static void time_slice_expired(struct task_struct *p, struct rq *rq);
-+
-+/*
-+ * Perform scheduler related setup for a newly forked process p.
-+ * p is forked by current.
-+ */
-+int sched_fork(unsigned long __maybe_unused clone_flags, struct task_struct *p)
-+{
-+	unsigned long flags;
-+
-+#ifdef CONFIG_PREEMPT_NOTIFIERS
-+	INIT_HLIST_HEAD(&p->preempt_notifiers);
-+#endif
-+
-+#ifdef CONFIG_COMPACTION
-+	p->capture_control = NULL;
-+#endif
-+
-+#ifdef CONFIG_SMP
-+	p->wake_entry.u_flags = CSD_TYPE_TTWU;
-+#endif
-+	/*
-+	 * We mark the process as NEW here. This guarantees that
-+	 * nobody will actually run it, and a signal or other external
-+	 * event cannot wake it up and insert it on the runqueue either.
-+	 */
-+	p->state = TASK_NEW;
-+
-+	/*
-+	 * The process state is set to the same value of the process executing
-+	 * do_fork() code. That is running. This guarantees that nobody will
-+	 * actually run it, and a signal or other external event cannot wake
-+	 * it up and insert it on the runqueue either.
-+	 */
-+
-+	/* Should be reset in fork.c but done here for ease of MuQSS patching */
-+	p->on_cpu =
-+	p->on_rq =
-+	p->utime =
-+	p->stime =
-+	p->sched_time =
-+	p->stime_ns =
-+	p->utime_ns = 0;
-+	skiplist_node_init(&p->node);
-+
-+	/*
-+	 * Revert to default priority/policy on fork if requested.
-+	 */
-+	if (unlikely(p->sched_reset_on_fork)) {
-+		if (p->policy == SCHED_FIFO || p->policy == SCHED_RR || p-> policy == SCHED_ISO) {
-+			p->policy = SCHED_NORMAL;
-+			p->normal_prio = normal_prio(p);
-+		}
-+
-+		if (PRIO_TO_NICE(p->static_prio) < 0) {
-+			p->static_prio = NICE_TO_PRIO(0);
-+			p->normal_prio = p->static_prio;
-+		}
-+
-+		/*
-+		 * We don't need the reset flag anymore after the fork. It has
-+		 * fulfilled its duty:
-+		 */
-+		p->sched_reset_on_fork = 0;
-+	}
-+
-+	/*
-+	 * Silence PROVE_RCU.
-+	 */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	rseq_migrate(p);
-+	set_task_cpu(p, smp_processor_id());
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+
-+#ifdef CONFIG_SCHED_INFO
-+	if (unlikely(sched_info_on()))
-+		memset(&p->sched_info, 0, sizeof(p->sched_info));
-+#endif
-+	init_task_preempt_count(p);
-+
-+	return 0;
-+}
-+
-+void sched_post_fork(struct task_struct *p)
-+{
-+}
-+
-+#ifdef CONFIG_SCHEDSTATS
-+
-+DEFINE_STATIC_KEY_FALSE(sched_schedstats);
-+static bool __initdata __sched_schedstats = false;
-+
-+static void set_schedstats(bool enabled)
-+{
-+	if (enabled)
-+		static_branch_enable(&sched_schedstats);
-+	else
-+		static_branch_disable(&sched_schedstats);
-+}
-+
-+void force_schedstat_enabled(void)
-+{
-+	if (!schedstat_enabled()) {
-+		pr_info("kernel profiling enabled schedstats, disable via kernel.sched_schedstats.\n");
-+		static_branch_enable(&sched_schedstats);
-+	}
-+}
-+
-+static int __init setup_schedstats(char *str)
-+{
-+	int ret = 0;
-+	if (!str)
-+		goto out;
-+
-+	/*
-+	 * This code is called before jump labels have been set up, so we can't
-+	 * change the static branch directly just yet.  Instead set a temporary
-+	 * variable so init_schedstats() can do it later.
-+	 */
-+	if (!strcmp(str, "enable")) {
-+		__sched_schedstats = true;
-+		ret = 1;
-+	} else if (!strcmp(str, "disable")) {
-+		__sched_schedstats = false;
-+		ret = 1;
-+	}
-+out:
-+	if (!ret)
-+		pr_warn("Unable to parse schedstats=\n");
-+
-+	return ret;
-+}
-+__setup("schedstats=", setup_schedstats);
-+
-+static void __init init_schedstats(void)
-+{
-+	set_schedstats(__sched_schedstats);
-+}
-+
-+#ifdef CONFIG_PROC_SYSCTL
-+int sysctl_schedstats(struct ctl_table *table, int write, void *buffer,
-+		size_t *lenp, loff_t *ppos)
-+{
-+	struct ctl_table t;
-+	int err;
-+	int state = static_branch_likely(&sched_schedstats);
-+
-+	if (write && !capable(CAP_SYS_ADMIN))
-+		return -EPERM;
-+
-+	t = *table;
-+	t.data = &state;
-+	err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
-+	if (err < 0)
-+		return err;
-+	if (write)
-+		set_schedstats(state);
-+	return err;
-+}
-+#endif /* CONFIG_PROC_SYSCTL */
-+#else  /* !CONFIG_SCHEDSTATS */
-+static inline void init_schedstats(void) {}
-+#endif /* CONFIG_SCHEDSTATS */
-+
-+static void update_cpu_clock_switch(struct rq *rq, struct task_struct *p);
-+
-+static void account_task_cpu(struct rq *rq, struct task_struct *p)
-+{
-+	update_clocks(rq);
-+	/* This isn't really a context switch but accounting is the same */
-+	update_cpu_clock_switch(rq, p);
-+	p->last_ran = rq->niffies;
-+}
-+
-+bool sched_smp_initialized __read_mostly;
-+
-+static inline int hrexpiry_enabled(struct rq *rq)
-+{
-+	if (unlikely(!cpu_active(cpu_of(rq)) || !sched_smp_initialized))
-+		return 0;
-+	return hrtimer_is_hres_active(&rq->hrexpiry_timer);
-+}
-+
-+/*
-+ * Use HR-timers to deliver accurate preemption points.
-+ */
-+static inline void hrexpiry_clear(struct rq *rq)
-+{
-+	if (!hrexpiry_enabled(rq))
-+		return;
-+	if (hrtimer_active(&rq->hrexpiry_timer))
-+		hrtimer_cancel(&rq->hrexpiry_timer);
-+}
-+
-+/*
-+ * High-resolution time_slice expiry.
-+ * Runs from hardirq context with interrupts disabled.
-+ */
-+static enum hrtimer_restart hrexpiry(struct hrtimer *timer)
-+{
-+	struct rq *rq = container_of(timer, struct rq, hrexpiry_timer);
-+	struct task_struct *p;
-+
-+	/* This can happen during CPU hotplug / resume */
-+	if (unlikely(cpu_of(rq) != smp_processor_id()))
-+		goto out;
-+
-+	/*
-+	 * We're doing this without the runqueue lock but this should always
-+	 * be run on the local CPU. Time slice should run out in __schedule
-+	 * but we set it to zero here in case niffies is slightly less.
-+	 */
-+	p = rq->curr;
-+	p->time_slice = 0;
-+	__set_tsk_resched(p);
-+out:
-+	return HRTIMER_NORESTART;
-+}
-+
-+/*
-+ * Called to set the hrexpiry timer state.
-+ *
-+ * called with irqs disabled from the local CPU only
-+ */
-+static void hrexpiry_start(struct rq *rq, u64 delay)
-+{
-+	if (!hrexpiry_enabled(rq))
-+		return;
-+
-+	hrtimer_start(&rq->hrexpiry_timer, ns_to_ktime(delay),
-+		      HRTIMER_MODE_REL_PINNED);
-+}
-+
-+static void init_rq_hrexpiry(struct rq *rq)
-+{
-+	hrtimer_init(&rq->hrexpiry_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-+	rq->hrexpiry_timer.function = hrexpiry;
-+}
-+
-+static inline int rq_dither(struct rq *rq)
-+{
-+	if (!hrexpiry_enabled(rq))
-+		return HALF_JIFFY_US;
-+	return 0;
-+}
-+
-+/*
-+ * wake_up_new_task - wake up a newly created task for the first time.
-+ *
-+ * This function will do some initial scheduler statistics housekeeping
-+ * that must be done for every newly created context, then puts the task
-+ * on the runqueue and wakes it.
-+ */
-+void wake_up_new_task(struct task_struct *p)
-+{
-+	struct task_struct *parent, *rq_curr;
-+	struct rq *rq, *new_rq;
-+	unsigned long flags;
-+
-+	parent = p->parent;
-+
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	p->state = TASK_RUNNING;
-+	/* Task_rq can't change yet on a new task */
-+	new_rq = rq = task_rq(p);
-+	if (unlikely(needs_other_cpu(p, task_cpu(p)))) {
-+		set_task_cpu(p, valid_task_cpu(p));
-+		new_rq = task_rq(p);
-+	}
-+
-+	double_rq_lock(rq, new_rq);
-+	rq_curr = rq->curr;
-+
-+	/*
-+	 * Make sure we do not leak PI boosting priority to the child.
-+	 */
-+	p->prio = rq_curr->normal_prio;
-+
-+	trace_sched_wakeup_new(p);
-+
-+	/*
-+	 * Share the timeslice between parent and child, thus the
-+	 * total amount of pending timeslices in the system doesn't change,
-+	 * resulting in more scheduling fairness. If it's negative, it won't
-+	 * matter since that's the same as being 0. rq->rq_deadline is only
-+	 * modified within schedule() so it is always equal to
-+	 * current->deadline.
-+	 */
-+	account_task_cpu(rq, rq_curr);
-+	p->last_ran = rq_curr->last_ran;
-+	if (likely(rq_curr->policy != SCHED_FIFO)) {
-+		rq_curr->time_slice /= 2;
-+		if (rq_curr->time_slice < RESCHED_US) {
-+			/*
-+			 * Forking task has run out of timeslice. Reschedule it and
-+			 * start its child with a new time slice and deadline. The
-+			 * child will end up running first because its deadline will
-+			 * be slightly earlier.
-+			 */
-+			__set_tsk_resched(rq_curr);
-+			time_slice_expired(p, new_rq);
-+			if (suitable_idle_cpus(p))
-+				resched_best_idle(p, task_cpu(p));
-+			else if (unlikely(rq != new_rq))
-+				try_preempt(p, new_rq);
-+		} else {
-+			p->time_slice = rq_curr->time_slice;
-+			if (rq_curr == parent && rq == new_rq && !suitable_idle_cpus(p)) {
-+				/*
-+				 * The VM isn't cloned, so we're in a good position to
-+				 * do child-runs-first in anticipation of an exec. This
-+				 * usually avoids a lot of COW overhead.
-+				 */
-+				__set_tsk_resched(rq_curr);
-+			} else {
-+				/*
-+				 * Adjust the hrexpiry since rq_curr will keep
-+				 * running and its timeslice has been shortened.
-+				 */
-+				hrexpiry_start(rq, US_TO_NS(rq_curr->time_slice));
-+				try_preempt(p, new_rq);
-+			}
-+		}
-+	} else {
-+		time_slice_expired(p, new_rq);
-+		try_preempt(p, new_rq);
-+	}
-+	activate_task(new_rq, p, 0);
-+	double_rq_unlock(rq, new_rq);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+}
-+
-+#ifdef CONFIG_PREEMPT_NOTIFIERS
-+
-+static DEFINE_STATIC_KEY_FALSE(preempt_notifier_key);
-+
-+void preempt_notifier_inc(void)
-+{
-+	static_branch_inc(&preempt_notifier_key);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_inc);
-+
-+void preempt_notifier_dec(void)
-+{
-+	static_branch_dec(&preempt_notifier_key);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_dec);
-+
-+/**
-+ * preempt_notifier_register - tell me when current is being preempted & rescheduled
-+ * @notifier: notifier struct to register
-+ */
-+void preempt_notifier_register(struct preempt_notifier *notifier)
-+{
-+	if (!static_branch_unlikely(&preempt_notifier_key))
-+		WARN(1, "registering preempt_notifier while notifiers disabled\n");
-+
-+	hlist_add_head(&notifier->link, &current->preempt_notifiers);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_register);
-+
-+/**
-+ * preempt_notifier_unregister - no longer interested in preemption notifications
-+ * @notifier: notifier struct to unregister
-+ *
-+ * This is *not* safe to call from within a preemption notifier.
-+ */
-+void preempt_notifier_unregister(struct preempt_notifier *notifier)
-+{
-+	hlist_del(&notifier->link);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_unregister);
-+
-+static void __fire_sched_in_preempt_notifiers(struct task_struct *curr)
-+{
-+	struct preempt_notifier *notifier;
-+
-+	hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
-+		notifier->ops->sched_in(notifier, raw_smp_processor_id());
-+}
-+
-+static __always_inline void fire_sched_in_preempt_notifiers(struct task_struct *curr)
-+{
-+	if (static_branch_unlikely(&preempt_notifier_key))
-+		__fire_sched_in_preempt_notifiers(curr);
-+}
-+
-+static void
-+__fire_sched_out_preempt_notifiers(struct task_struct *curr,
-+				 struct task_struct *next)
-+{
-+	struct preempt_notifier *notifier;
-+
-+	hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
-+		notifier->ops->sched_out(notifier, next);
-+}
-+
-+static __always_inline void
-+fire_sched_out_preempt_notifiers(struct task_struct *curr,
-+				 struct task_struct *next)
-+{
-+	if (static_branch_unlikely(&preempt_notifier_key))
-+		__fire_sched_out_preempt_notifiers(curr, next);
-+}
-+
-+#else /* !CONFIG_PREEMPT_NOTIFIERS */
-+
-+static inline void fire_sched_in_preempt_notifiers(struct task_struct *curr)
-+{
-+}
-+
-+static inline void
-+fire_sched_out_preempt_notifiers(struct task_struct *curr,
-+				 struct task_struct *next)
-+{
-+}
-+
-+#endif /* CONFIG_PREEMPT_NOTIFIERS */
-+
-+static inline void prepare_task(struct task_struct *next)
-+{
-+	/*
-+	 * Claim the task as running, we do this before switching to it
-+	 * such that any running task will have this set.
-+	 */
-+	next->on_cpu = 1;
-+}
-+
-+static inline void finish_task(struct task_struct *prev)
-+{
-+#ifdef CONFIG_SMP
-+	/*
-+	 * This must be the very last reference to @prev from this CPU. After
-+	 * p->on_cpu is cleared, the task can be moved to a different CPU. We
-+	 * must ensure this doesn't happen until the switch is completely
-+	 * finished.
-+	 *
-+	 * In particular, the load of prev->state in finish_task_switch() must
-+	 * happen before this.
-+	 *
-+	 * Pairs with the smp_cond_load_acquire() in try_to_wake_up().
-+	 */
-+	smp_store_release(&prev->on_cpu, 0);
-+#endif
-+}
-+
-+static inline void
-+prepare_lock_switch(struct rq *rq, struct task_struct *next)
-+{
-+	/*
-+	 * Since the runqueue lock will be released by the next
-+	 * task (which is an invalid locking op but in the case
-+	 * of the scheduler it's an obvious special-case), so we
-+	 * do an early lockdep release here:
-+	 */
-+	spin_release(&rq->lock->dep_map, _THIS_IP_);
-+#ifdef CONFIG_DEBUG_SPINLOCK
-+	/* this is a valid case when another task releases the spinlock */
-+	rq->lock->owner = next;
-+#endif
-+}
-+
-+static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
-+{
-+	/*
-+	 * If we are tracking spinlock dependencies then we have to
-+	 * fix up the runqueue lock - which gets 'carried over' from
-+	 * prev into current:
-+	 */
-+	spin_acquire(&rq->lock->dep_map, 0, 0, _THIS_IP_);
-+
-+#ifdef CONFIG_SMP
-+	/*
-+	 * If prev was marked as migrating to another CPU in return_task, drop
-+	 * the local runqueue lock but leave interrupts disabled and grab the
-+	 * remote lock we're migrating it to before enabling them.
-+	 */
-+	if (unlikely(task_on_rq_migrating(prev))) {
-+		sched_info_dequeued(rq, prev);
-+		/*
-+		 * We move the ownership of prev to the new cpu now. ttwu can't
-+		 * activate prev to the wrong cpu since it has to grab this
-+		 * runqueue in ttwu_remote.
-+		 */
-+#ifdef CONFIG_THREAD_INFO_IN_TASK
-+		prev->cpu = prev->wake_cpu;
-+#else
-+		task_thread_info(prev)->cpu = prev->wake_cpu;
-+#endif
-+		raw_spin_unlock(rq->lock);
-+
-+		raw_spin_lock(&prev->pi_lock);
-+		rq = __task_rq_lock(prev, NULL);
-+		/* Check that someone else hasn't already queued prev */
-+		if (likely(!task_queued(prev))) {
-+			enqueue_task(rq, prev, 0);
-+			prev->on_rq = TASK_ON_RQ_QUEUED;
-+			/* Wake up the CPU if it's not already running */
-+			resched_if_idle(rq);
-+		}
-+		raw_spin_unlock(&prev->pi_lock);
-+	}
-+#endif
-+	rq_unlock(rq);
-+	local_irq_enable();
-+}
-+
-+#ifndef prepare_arch_switch
-+# define prepare_arch_switch(next)	do { } while (0)
-+#endif
-+#ifndef finish_arch_switch
-+# define finish_arch_switch(prev)	do { } while (0)
-+#endif
-+#ifndef finish_arch_post_lock_switch
-+# define finish_arch_post_lock_switch()	do { } while (0)
-+#endif
-+
-+/**
-+ * prepare_task_switch - prepare to switch tasks
-+ * @rq: the runqueue preparing to switch
-+ * @next: the task we are going to switch to.
-+ *
-+ * This is called with the rq lock held and interrupts off. It must
-+ * be paired with a subsequent finish_task_switch after the context
-+ * switch.
-+ *
-+ * prepare_task_switch sets up locking and calls architecture specific
-+ * hooks.
-+ */
-+static inline void
-+prepare_task_switch(struct rq *rq, struct task_struct *prev,
-+		    struct task_struct *next)
-+{
-+	kcov_prepare_switch(prev);
-+	sched_info_switch(rq, prev, next);
-+	perf_event_task_sched_out(prev, next);
-+	rseq_preempt(prev);
-+	fire_sched_out_preempt_notifiers(prev, next);
-+	prepare_task(next);
-+	prepare_arch_switch(next);
-+}
-+
-+/**
-+ * finish_task_switch - clean up after a task-switch
-+ * @rq: runqueue associated with task-switch
-+ * @prev: the thread we just switched away from.
-+ *
-+ * finish_task_switch must be called after the context switch, paired
-+ * with a prepare_task_switch call before the context switch.
-+ * finish_task_switch will reconcile locking set up by prepare_task_switch,
-+ * and do any other architecture-specific cleanup actions.
-+ *
-+ * Note that we may have delayed dropping an mm in context_switch(). If
-+ * so, we finish that here outside of the runqueue lock.  (Doing it
-+ * with the lock held can cause deadlocks; see schedule() for
-+ * details.)
-+ *
-+ * The context switch have flipped the stack from under us and restored the
-+ * local variables which were saved when this task called schedule() in the
-+ * past. prev == current is still correct but we need to recalculate this_rq
-+ * because prev may have moved to another CPU.
-+ */
-+static void finish_task_switch(struct task_struct *prev)
-+	__releases(rq->lock)
-+{
-+	struct rq *rq = this_rq();
-+	struct mm_struct *mm = rq->prev_mm;
-+	long prev_state;
-+
-+	/*
-+	 * The previous task will have left us with a preempt_count of 2
-+	 * because it left us after:
-+	 *
-+	 *	schedule()
-+	 *	  preempt_disable();			// 1
-+	 *	  __schedule()
-+	 *	    raw_spin_lock_irq(rq->lock)	// 2
-+	 *
-+	 * Also, see FORK_PREEMPT_COUNT.
-+	 */
-+	if (WARN_ONCE(preempt_count() != 2*PREEMPT_DISABLE_OFFSET,
-+		      "corrupted preempt_count: %s/%d/0x%x\n",
-+		      current->comm, current->pid, preempt_count()))
-+		preempt_count_set(FORK_PREEMPT_COUNT);
-+
-+	rq->prev_mm = NULL;
-+
-+	/*
-+	 * A task struct has one reference for the use as "current".
-+	 * If a task dies, then it sets TASK_DEAD in tsk->state and calls
-+	 * schedule one last time. The schedule call will never return, and
-+	 * the scheduled task must drop that reference.
-+	 *
-+	 * We must observe prev->state before clearing prev->on_cpu (in
-+	 * finish_task), otherwise a concurrent wakeup can get prev
-+	 * running on another CPU and we could rave with its RUNNING -> DEAD
-+	 * transition, resulting in a double drop.
-+	 */
-+	prev_state = prev->state;
-+	vtime_task_switch(prev);
-+	perf_event_task_sched_in(prev, current);
-+	finish_task(prev);
-+	finish_lock_switch(rq, prev);
-+	finish_arch_post_lock_switch();
-+	kcov_finish_switch(current);
-+
-+	fire_sched_in_preempt_notifiers(current);
-+	/*
-+	 * When switching through a kernel thread, the loop in
-+	 * membarrier_{private,global}_expedited() may have observed that
-+	 * kernel thread and not issued an IPI. It is therefore possible to
-+	 * schedule between user->kernel->user threads without passing though
-+	 * switch_mm(). Membarrier requires a barrier after storing to
-+	 * rq->curr, before returning to userspace, so provide them here:
-+	 *
-+	 * - a full memory barrier for {PRIVATE,GLOBAL}_EXPEDITED, implicitly
-+	 *   provided by mmdrop(),
-+	 * - a sync_core for SYNC_CORE.
-+	 */
-+	if (mm) {
-+		membarrier_mm_sync_core_before_usermode(mm);
-+		mmdrop(mm);
-+	}
-+	if (unlikely(prev_state == TASK_DEAD)) {
-+		/*
-+		 * Remove function-return probe instances associated with this
-+		 * task and put them back on the free list.
-+		 */
-+		kprobe_flush_task(prev);
-+
-+		/* Task is done with its stack. */
-+		put_task_stack(prev);
-+
-+		put_task_struct_rcu_user(prev);
-+	}
-+}
-+
-+/**
-+ * schedule_tail - first thing a freshly forked thread must call.
-+ * @prev: the thread we just switched away from.
-+ */
-+asmlinkage __visible void schedule_tail(struct task_struct *prev)
-+{
-+	/*
-+	 * New tasks start with FORK_PREEMPT_COUNT, see there and
-+	 * finish_task_switch() for details.
-+	 *
-+	 * finish_task_switch() will drop rq->lock() and lower preempt_count
-+	 * and the preempt_enable() will end up enabling preemption (on
-+	 * PREEMPT_COUNT kernels).
-+	 */
-+
-+	finish_task_switch(prev);
-+	preempt_enable();
-+
-+	if (current->set_child_tid)
-+		put_user(task_pid_vnr(current), current->set_child_tid);
-+
-+	calculate_sigpending();
-+}
-+
-+/*
-+ * context_switch - switch to the new MM and the new thread's register state.
-+ */
-+static __always_inline void
-+context_switch(struct rq *rq, struct task_struct *prev,
-+	       struct task_struct *next)
-+{
-+	prepare_task_switch(rq, prev, next);
-+
-+	/*
-+	 * For paravirt, this is coupled with an exit in switch_to to
-+	 * combine the page table reload and the switch backend into
-+	 * one hypercall.
-+	 */
-+	arch_start_context_switch(prev);
-+
-+	/*
-+	 * kernel -> kernel   lazy + transfer active
-+	 *   user -> kernel   lazy + mmgrab() active
-+	 *
-+	 * kernel ->   user   switch + mmdrop() active
-+	 *   user ->   user   switch
-+	 */
-+	if (!next->mm) {                                // to kernel
-+		enter_lazy_tlb(prev->active_mm, next);
-+
-+		next->active_mm = prev->active_mm;
-+		if (prev->mm)                           // from user
-+			mmgrab(prev->active_mm);
-+		else
-+			prev->active_mm = NULL;
-+	} else {                                        // to user
-+		membarrier_switch_mm(rq, prev->active_mm, next->mm);
-+		/*
-+		 * sys_membarrier() requires an smp_mb() between setting
-+		 * rq->curr / membarrier_switch_mm() and returning to userspace.
-+		 *
-+		 * The below provides this either through switch_mm(), or in
-+		 * case 'prev->active_mm == next->mm' through
-+		 * finish_task_switch()'s mmdrop().
-+		 */
-+		switch_mm_irqs_off(prev->active_mm, next->mm, next);
-+
-+		if (!prev->mm) {                        // from kernel
-+			/* will mmdrop() in finish_task_switch(). */
-+			rq->prev_mm = prev->active_mm;
-+			prev->active_mm = NULL;
-+		}
-+	}
-+	prepare_lock_switch(rq, next);
-+
-+	/* Here we just switch the register state and the stack. */
-+	switch_to(prev, next, prev);
-+	barrier();
-+
-+	finish_task_switch(prev);
-+}
-+
-+/*
-+ * nr_running, nr_uninterruptible and nr_context_switches:
-+ *
-+ * externally visible scheduler statistics: current number of runnable
-+ * threads, total number of context switches performed since bootup.
-+ */
-+unsigned long nr_running(void)
-+{
-+	unsigned long i, sum = 0;
-+
-+	for_each_online_cpu(i)
-+		sum += cpu_rq(i)->nr_running;
-+
-+	return sum;
-+}
-+
-+static unsigned long nr_uninterruptible(void)
-+{
-+	unsigned long i, sum = 0;
-+
-+	for_each_online_cpu(i)
-+		sum += cpu_rq(i)->nr_uninterruptible;
-+
-+	return sum;
-+}
-+
-+/*
-+ * Check if only the current task is running on the CPU.
-+ *
-+ * Caution: this function does not check that the caller has disabled
-+ * preemption, thus the result might have a time-of-check-to-time-of-use
-+ * race.  The caller is responsible to use it correctly, for example:
-+ *
-+ * - from a non-preemptible section (of course)
-+ *
-+ * - from a thread that is bound to a single CPU
-+ *
-+ * - in a loop with very short iterations (e.g. a polling loop)
-+ */
-+bool single_task_running(void)
-+{
-+	if (rq_load(raw_rq()) == 1)
-+		return true;
-+	else
-+		return false;
-+}
-+EXPORT_SYMBOL(single_task_running);
-+
-+unsigned long long nr_context_switches(void)
-+{
-+	int cpu;
-+	unsigned long long sum = 0;
-+
-+	for_each_possible_cpu(cpu)
-+		sum += cpu_rq(cpu)->nr_switches;
-+
-+	return sum;
-+}
-+
-+/*
-+ * Consumers of these two interfaces, like for example the cpufreq menu
-+ * governor are using nonsensical data. Boosting frequency for a CPU that has
-+ * IO-wait which might not even end up running the task when it does become
-+ * runnable.
-+ */
-+
-+unsigned long nr_iowait_cpu(int cpu)
-+{
-+	return atomic_read(&cpu_rq(cpu)->nr_iowait);
-+}
-+
-+/*
-+ * IO-wait accounting, and how its mostly bollocks (on SMP).
-+ *
-+ * The idea behind IO-wait account is to account the idle time that we could
-+ * have spend running if it were not for IO. That is, if we were to improve the
-+ * storage performance, we'd have a proportional reduction in IO-wait time.
-+ *
-+ * This all works nicely on UP, where, when a task blocks on IO, we account
-+ * idle time as IO-wait, because if the storage were faster, it could've been
-+ * running and we'd not be idle.
-+ *
-+ * This has been extended to SMP, by doing the same for each CPU. This however
-+ * is broken.
-+ *
-+ * Imagine for instance the case where two tasks block on one CPU, only the one
-+ * CPU will have IO-wait accounted, while the other has regular idle. Even
-+ * though, if the storage were faster, both could've ran at the same time,
-+ * utilising both CPUs.
-+ *
-+ * This means, that when looking globally, the current IO-wait accounting on
-+ * SMP is a lower bound, by reason of under accounting.
-+ *
-+ * Worse, since the numbers are provided per CPU, they are sometimes
-+ * interpreted per CPU, and that is nonsensical. A blocked task isn't strictly
-+ * associated with any one particular CPU, it can wake to another CPU than it
-+ * blocked on. This means the per CPU IO-wait number is meaningless.
-+ *
-+ * Task CPU affinities can make all that even more 'interesting'.
-+ */
-+
-+unsigned long nr_iowait(void)
-+{
-+	unsigned long cpu, sum = 0;
-+
-+	for_each_possible_cpu(cpu)
-+		sum += nr_iowait_cpu(cpu);
-+
-+	return sum;
-+}
-+
-+unsigned long nr_active(void)
-+{
-+	return nr_running() + nr_uninterruptible();
-+}
-+
-+/* Variables and functions for calc_load */
-+static unsigned long calc_load_update;
-+unsigned long avenrun[3];
-+EXPORT_SYMBOL(avenrun);
-+
-+/**
-+ * get_avenrun - get the load average array
-+ * @loads:	pointer to dest load array
-+ * @offset:	offset to add
-+ * @shift:	shift count to shift the result left
-+ *
-+ * These values are estimates at best, so no need for locking.
-+ */
-+void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
-+{
-+	loads[0] = (avenrun[0] + offset) << shift;
-+	loads[1] = (avenrun[1] + offset) << shift;
-+	loads[2] = (avenrun[2] + offset) << shift;
-+}
-+
-+/*
-+ * calc_load - update the avenrun load estimates every LOAD_FREQ seconds.
-+ */
-+void calc_global_load(void)
-+{
-+	long active;
-+
-+	if (time_before(jiffies, READ_ONCE(calc_load_update)))
-+		return;
-+	active = nr_active() * FIXED_1;
-+
-+	avenrun[0] = calc_load(avenrun[0], EXP_1, active);
-+	avenrun[1] = calc_load(avenrun[1], EXP_5, active);
-+	avenrun[2] = calc_load(avenrun[2], EXP_15, active);
-+
-+	calc_load_update = jiffies + LOAD_FREQ;
-+}
-+
-+/**
-+ * fixed_power_int - compute: x^n, in O(log n) time
-+ *
-+ * @x:         base of the power
-+ * @frac_bits: fractional bits of @x
-+ * @n:         power to raise @x to.
-+ *
-+ * By exploiting the relation between the definition of the natural power
-+ * function: x^n := x*x*...*x (x multiplied by itself for n times), and
-+ * the binary encoding of numbers used by computers: n := \Sum n_i * 2^i,
-+ * (where: n_i \elem {0, 1}, the binary vector representing n),
-+ * we find: x^n := x^(\Sum n_i * 2^i) := \Prod x^(n_i * 2^i), which is
-+ * of course trivially computable in O(log_2 n), the length of our binary
-+ * vector.
-+ */
-+static unsigned long
-+fixed_power_int(unsigned long x, unsigned int frac_bits, unsigned int n)
-+{
-+	unsigned long result = 1UL << frac_bits;
-+
-+	if (n) {
-+		for (;;) {
-+			if (n & 1) {
-+				result *= x;
-+				result += 1UL << (frac_bits - 1);
-+				result >>= frac_bits;
-+			}
-+			n >>= 1;
-+			if (!n)
-+				break;
-+			x *= x;
-+			x += 1UL << (frac_bits - 1);
-+			x >>= frac_bits;
-+		}
-+	}
-+
-+	return result;
-+}
-+
-+/*
-+ * a1 = a0 * e + a * (1 - e)
-+ *
-+ * a2 = a1 * e + a * (1 - e)
-+ *    = (a0 * e + a * (1 - e)) * e + a * (1 - e)
-+ *    = a0 * e^2 + a * (1 - e) * (1 + e)
-+ *
-+ * a3 = a2 * e + a * (1 - e)
-+ *    = (a0 * e^2 + a * (1 - e) * (1 + e)) * e + a * (1 - e)
-+ *    = a0 * e^3 + a * (1 - e) * (1 + e + e^2)
-+ *
-+ *  ...
-+ *
-+ * an = a0 * e^n + a * (1 - e) * (1 + e + ... + e^n-1) [1]
-+ *    = a0 * e^n + a * (1 - e) * (1 - e^n)/(1 - e)
-+ *    = a0 * e^n + a * (1 - e^n)
-+ *
-+ * [1] application of the geometric series:
-+ *
-+ *              n         1 - x^(n+1)
-+ *     S_n := \Sum x^i = -------------
-+ *             i=0          1 - x
-+ */
-+unsigned long
-+calc_load_n(unsigned long load, unsigned long exp,
-+	    unsigned long active, unsigned int n)
-+{
-+	return calc_load(load, fixed_power_int(exp, FSHIFT, n), active);
-+}
-+
-+DEFINE_PER_CPU(struct kernel_stat, kstat);
-+DEFINE_PER_CPU(struct kernel_cpustat, kernel_cpustat);
-+
-+EXPORT_PER_CPU_SYMBOL(kstat);
-+EXPORT_PER_CPU_SYMBOL(kernel_cpustat);
-+
-+#ifdef CONFIG_PARAVIRT
-+static inline u64 steal_ticks(u64 steal)
-+{
-+	if (unlikely(steal > NSEC_PER_SEC))
-+		return div_u64(steal, TICK_NSEC);
-+
-+	return __iter_div_u64_rem(steal, TICK_NSEC, &steal);
-+}
-+#endif
-+
-+#ifndef nsecs_to_cputime
-+# define nsecs_to_cputime(__nsecs)	nsecs_to_jiffies(__nsecs)
-+#endif
-+
-+/*
-+ * On each tick, add the number of nanoseconds to the unbanked variables and
-+ * once one tick's worth has accumulated, account it allowing for accurate
-+ * sub-tick accounting and totals. Use the TICK_APPROX_NS to match the way we
-+ * deduct nanoseconds.
-+ */
-+static void pc_idle_time(struct rq *rq, struct task_struct *idle, unsigned long ns)
-+{
-+	u64 *cpustat = kcpustat_this_cpu->cpustat;
-+	unsigned long ticks;
-+
-+	if (atomic_read(&rq->nr_iowait) > 0) {
-+		rq->iowait_ns += ns;
-+		if (rq->iowait_ns >= JIFFY_NS) {
-+			ticks = NS_TO_JIFFIES(rq->iowait_ns);
-+			cpustat[CPUTIME_IOWAIT] += (__force u64)TICK_APPROX_NS * ticks;
-+			rq->iowait_ns %= JIFFY_NS;
-+		}
-+	} else {
-+		rq->idle_ns += ns;
-+		if (rq->idle_ns >= JIFFY_NS) {
-+			ticks = NS_TO_JIFFIES(rq->idle_ns);
-+			cpustat[CPUTIME_IDLE] += (__force u64)TICK_APPROX_NS * ticks;
-+			rq->idle_ns %= JIFFY_NS;
-+		}
-+	}
-+	acct_update_integrals(idle);
-+}
-+
-+static void pc_system_time(struct rq *rq, struct task_struct *p,
-+			   int hardirq_offset, unsigned long ns)
-+{
-+	u64 *cpustat = kcpustat_this_cpu->cpustat;
-+	unsigned long ticks;
-+
-+	p->stime_ns += ns;
-+	if (p->stime_ns >= JIFFY_NS) {
-+		ticks = NS_TO_JIFFIES(p->stime_ns);
-+		p->stime_ns %= JIFFY_NS;
-+		p->stime += (__force u64)TICK_APPROX_NS * ticks;
-+		account_group_system_time(p, TICK_APPROX_NS * ticks);
-+	}
-+	p->sched_time += ns;
-+	account_group_exec_runtime(p, ns);
-+
-+	if (hardirq_count() - hardirq_offset) {
-+		rq->irq_ns += ns;
-+		if (rq->irq_ns >= JIFFY_NS) {
-+			ticks = NS_TO_JIFFIES(rq->irq_ns);
-+			cpustat[CPUTIME_IRQ] += (__force u64)TICK_APPROX_NS * ticks;
-+			rq->irq_ns %= JIFFY_NS;
-+		}
-+	} else if (in_serving_softirq()) {
-+		rq->softirq_ns += ns;
-+		if (rq->softirq_ns >= JIFFY_NS) {
-+			ticks = NS_TO_JIFFIES(rq->softirq_ns);
-+			cpustat[CPUTIME_SOFTIRQ] += (__force u64)TICK_APPROX_NS * ticks;
-+			rq->softirq_ns %= JIFFY_NS;
-+		}
-+	} else {
-+		rq->system_ns += ns;
-+		if (rq->system_ns >= JIFFY_NS) {
-+			ticks = NS_TO_JIFFIES(rq->system_ns);
-+			cpustat[CPUTIME_SYSTEM] += (__force u64)TICK_APPROX_NS * ticks;
-+			rq->system_ns %= JIFFY_NS;
-+		}
-+	}
-+	acct_update_integrals(p);
-+}
-+
-+static void pc_user_time(struct rq *rq, struct task_struct *p, unsigned long ns)
-+{
-+	u64 *cpustat = kcpustat_this_cpu->cpustat;
-+	unsigned long ticks;
-+
-+	p->utime_ns += ns;
-+	if (p->utime_ns >= JIFFY_NS) {
-+		ticks = NS_TO_JIFFIES(p->utime_ns);
-+		p->utime_ns %= JIFFY_NS;
-+		p->utime += (__force u64)TICK_APPROX_NS * ticks;
-+		account_group_user_time(p, TICK_APPROX_NS * ticks);
-+	}
-+	p->sched_time += ns;
-+	account_group_exec_runtime(p, ns);
-+
-+	if (this_cpu_ksoftirqd() == p) {
-+		/*
-+		 * ksoftirqd time do not get accounted in cpu_softirq_time.
-+		 * So, we have to handle it separately here.
-+		 */
-+		rq->softirq_ns += ns;
-+		if (rq->softirq_ns >= JIFFY_NS) {
-+			ticks = NS_TO_JIFFIES(rq->softirq_ns);
-+			cpustat[CPUTIME_SOFTIRQ] += (__force u64)TICK_APPROX_NS * ticks;
-+			rq->softirq_ns %= JIFFY_NS;
-+		}
-+	}
-+
-+	if (task_nice(p) > 0 || idleprio_task(p)) {
-+		rq->nice_ns += ns;
-+		if (rq->nice_ns >= JIFFY_NS) {
-+			ticks = NS_TO_JIFFIES(rq->nice_ns);
-+			cpustat[CPUTIME_NICE] += (__force u64)TICK_APPROX_NS * ticks;
-+			rq->nice_ns %= JIFFY_NS;
-+		}
-+	} else {
-+		rq->user_ns += ns;
-+		if (rq->user_ns >= JIFFY_NS) {
-+			ticks = NS_TO_JIFFIES(rq->user_ns);
-+			cpustat[CPUTIME_USER] += (__force u64)TICK_APPROX_NS * ticks;
-+			rq->user_ns %= JIFFY_NS;
-+		}
-+	}
-+	acct_update_integrals(p);
-+}
-+
-+/*
-+ * This is called on clock ticks.
-+ * Bank in p->sched_time the ns elapsed since the last tick or switch.
-+ * CPU scheduler quota accounting is also performed here in microseconds.
-+ */
-+static void update_cpu_clock_tick(struct rq *rq, struct task_struct *p)
-+{
-+	s64 account_ns = rq->niffies - p->last_ran;
-+	struct task_struct *idle = rq->idle;
-+
-+	/* Accurate tick timekeeping */
-+	if (user_mode(get_irq_regs()))
-+		pc_user_time(rq, p, account_ns);
-+	else if (p != idle || (irq_count() != HARDIRQ_OFFSET)) {
-+		pc_system_time(rq, p, HARDIRQ_OFFSET, account_ns);
-+	} else
-+		pc_idle_time(rq, idle, account_ns);
-+
-+	/* time_slice accounting is done in usecs to avoid overflow on 32bit */
-+	if (p->policy != SCHED_FIFO && p != idle)
-+		p->time_slice -= NS_TO_US(account_ns);
-+
-+	p->last_ran = rq->niffies;
-+}
-+
-+/*
-+ * This is called on context switches.
-+ * Bank in p->sched_time the ns elapsed since the last tick or switch.
-+ * CPU scheduler quota accounting is also performed here in microseconds.
-+ */
-+static void update_cpu_clock_switch(struct rq *rq, struct task_struct *p)
-+{
-+	s64 account_ns = rq->niffies - p->last_ran;
-+	struct task_struct *idle = rq->idle;
-+
-+	/* Accurate subtick timekeeping */
-+	if (p != idle)
-+		pc_user_time(rq, p, account_ns);
-+	else
-+		pc_idle_time(rq, idle, account_ns);
-+
-+	/* time_slice accounting is done in usecs to avoid overflow on 32bit */
-+	if (p->policy != SCHED_FIFO && p != idle)
-+		p->time_slice -= NS_TO_US(account_ns);
-+}
-+
-+/*
-+ * Return any ns on the sched_clock that have not yet been accounted in
-+ * @p in case that task is currently running.
-+ *
-+ * Called with task_rq_lock(p) held.
-+ */
-+static inline u64 do_task_delta_exec(struct task_struct *p, struct rq *rq)
-+{
-+	u64 ns = 0;
-+
-+	/*
-+	 * Must be ->curr _and_ ->on_rq.  If dequeued, we would
-+	 * project cycles that may never be accounted to this
-+	 * thread, breaking clock_gettime().
-+	 */
-+	if (p == rq->curr && task_on_rq_queued(p)) {
-+		update_clocks(rq);
-+		ns = rq->niffies - p->last_ran;
-+	}
-+
-+	return ns;
-+}
-+
-+/*
-+ * Return accounted runtime for the task.
-+ * Return separately the current's pending runtime that have not been
-+ * accounted yet.
-+ */
-+unsigned long long task_sched_runtime(struct task_struct *p)
-+{
-+	struct rq_flags rf;
-+	struct rq *rq;
-+	u64 ns;
-+
-+#if defined(CONFIG_64BIT) && defined(CONFIG_SMP)
-+	/*
-+	 * 64-bit doesn't need locks to atomically read a 64-bit value.
-+	 * So we have a optimisation chance when the task's delta_exec is 0.
-+	 * Reading ->on_cpu is racy, but this is ok.
-+	 *
-+	 * If we race with it leaving CPU, we'll take a lock. So we're correct.
-+	 * If we race with it entering CPU, unaccounted time is 0. This is
-+	 * indistinguishable from the read occurring a few cycles earlier.
-+	 * If we see ->on_cpu without ->on_rq, the task is leaving, and has
-+	 * been accounted, so we're correct here as well.
-+	 */
-+	if (!p->on_cpu || !task_on_rq_queued(p))
-+		return tsk_seruntime(p);
-+#endif
-+
-+	rq = task_rq_lock(p, &rf);
-+	ns = p->sched_time + do_task_delta_exec(p, rq);
-+	task_rq_unlock(rq, p, &rf);
-+
-+	return ns;
-+}
-+
-+/*
-+ * Functions to test for when SCHED_ISO tasks have used their allocated
-+ * quota as real time scheduling and convert them back to SCHED_NORMAL. All
-+ * data is modified only by the local runqueue during scheduler_tick with
-+ * interrupts disabled.
-+ */
-+
-+/*
-+ * Test if SCHED_ISO tasks have run longer than their alloted period as RT
-+ * tasks and set the refractory flag if necessary. There is 10% hysteresis
-+ * for unsetting the flag. 115/128 is ~90/100 as a fast shift instead of a
-+ * slow division.
-+ */
-+static inline void iso_tick(struct rq *rq)
-+{
-+	rq->iso_ticks = rq->iso_ticks * (ISO_PERIOD - 1) / ISO_PERIOD;
-+	rq->iso_ticks += 100;
-+	if (rq->iso_ticks > ISO_PERIOD * sched_iso_cpu) {
-+		rq->iso_refractory = true;
-+		if (unlikely(rq->iso_ticks > ISO_PERIOD * 100))
-+			rq->iso_ticks = ISO_PERIOD * 100;
-+	}
-+}
-+
-+/* No SCHED_ISO task was running so decrease rq->iso_ticks */
-+static inline void no_iso_tick(struct rq *rq, int ticks)
-+{
-+	if (rq->iso_ticks > 0 || rq->iso_refractory) {
-+		rq->iso_ticks = rq->iso_ticks * (ISO_PERIOD - ticks) / ISO_PERIOD;
-+		if (rq->iso_ticks < ISO_PERIOD * (sched_iso_cpu * 115 / 128)) {
-+			rq->iso_refractory = false;
-+			if (unlikely(rq->iso_ticks < 0))
-+				rq->iso_ticks = 0;
-+		}
-+	}
-+}
-+
-+/* This manages tasks that have run out of timeslice during a scheduler_tick */
-+static void task_running_tick(struct rq *rq)
-+{
-+	struct task_struct *p = rq->curr;
-+
-+	/*
-+	 * If a SCHED_ISO task is running we increment the iso_ticks. In
-+	 * order to prevent SCHED_ISO tasks from causing starvation in the
-+	 * presence of true RT tasks we account those as iso_ticks as well.
-+	 */
-+	if (rt_task(p) || task_running_iso(p))
-+		iso_tick(rq);
-+	else
-+		no_iso_tick(rq, 1);
-+
-+	/* SCHED_FIFO tasks never run out of timeslice. */
-+	if (p->policy == SCHED_FIFO)
-+		return;
-+
-+	if (iso_task(p)) {
-+		if (task_running_iso(p)) {
-+			if (rq->iso_refractory) {
-+				/*
-+				 * SCHED_ISO task is running as RT and limit
-+				 * has been hit. Force it to reschedule as
-+				 * SCHED_NORMAL by zeroing its time_slice
-+				 */
-+				p->time_slice = 0;
-+			}
-+		} else if (!rq->iso_refractory) {
-+			/* Can now run again ISO. Reschedule to pick up prio */
-+			goto out_resched;
-+		}
-+	}
-+
-+	/*
-+	 * Tasks that were scheduled in the first half of a tick are not
-+	 * allowed to run into the 2nd half of the next tick if they will
-+	 * run out of time slice in the interim. Otherwise, if they have
-+	 * less than RESCHED_US μs of time slice left they will be rescheduled.
-+	 * Dither is used as a backup for when hrexpiry is disabled or high res
-+	 * timers not configured in.
-+	 */
-+	if (p->time_slice - rq->dither >= RESCHED_US)
-+		return;
-+out_resched:
-+	rq_lock(rq);
-+	__set_tsk_resched(p);
-+	rq_unlock(rq);
-+}
-+
-+static inline void task_tick(struct rq *rq)
-+{
-+	if (!rq_idle(rq))
-+		task_running_tick(rq);
-+	else if (rq->last_jiffy > rq->last_scheduler_tick)
-+		no_iso_tick(rq, rq->last_jiffy - rq->last_scheduler_tick);
-+}
-+
-+#ifdef CONFIG_NO_HZ_FULL
-+/*
-+ * We can stop the timer tick any time highres timers are active since
-+ * we rely entirely on highres timeouts for task expiry rescheduling.
-+ */
-+static void sched_stop_tick(struct rq *rq, int cpu)
-+{
-+	if (!hrexpiry_enabled(rq))
-+		return;
-+	if (!tick_nohz_full_enabled())
-+		return;
-+	if (!tick_nohz_full_cpu(cpu))
-+		return;
-+	tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED);
-+}
-+
-+static inline void sched_start_tick(struct rq *rq, int cpu)
-+{
-+	tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
-+}
-+
-+struct tick_work {
-+	int			cpu;
-+	atomic_t		state;
-+	struct delayed_work	work;
-+};
-+/* Values for ->state, see diagram below. */
-+#define TICK_SCHED_REMOTE_OFFLINE	0
-+#define TICK_SCHED_REMOTE_OFFLINING	1
-+#define TICK_SCHED_REMOTE_RUNNING	2
-+
-+/*
-+ * State diagram for ->state:
-+ *
-+ *
-+ *          TICK_SCHED_REMOTE_OFFLINE
-+ *                    |   ^
-+ *                    |   |
-+ *                    |   | sched_tick_remote()
-+ *                    |   |
-+ *                    |   |
-+ *                    +--TICK_SCHED_REMOTE_OFFLINING
-+ *                    |   ^
-+ *                    |   |
-+ * sched_tick_start() |   | sched_tick_stop()
-+ *                    |   |
-+ *                    V   |
-+ *          TICK_SCHED_REMOTE_RUNNING
-+ *
-+ *
-+ * Other transitions get WARN_ON_ONCE(), except that sched_tick_remote()
-+ * and sched_tick_start() are happy to leave the state in RUNNING.
-+ */
-+
-+static struct tick_work __percpu *tick_work_cpu;
-+
-+static void sched_tick_remote(struct work_struct *work)
-+{
-+	struct delayed_work *dwork = to_delayed_work(work);
-+	struct tick_work *twork = container_of(dwork, struct tick_work, work);
-+	int cpu = twork->cpu;
-+	struct rq *rq = cpu_rq(cpu);
-+	struct task_struct *curr;
-+	u64 delta;
-+	int os;
-+
-+	/*
-+	 * Handle the tick only if it appears the remote CPU is running in full
-+	 * dynticks mode. The check is racy by nature, but missing a tick or
-+	 * having one too much is no big deal because the scheduler tick updates
-+	 * statistics and checks timeslices in a time-independent way, regardless
-+	 * of when exactly it is running.
-+	 */
-+	if (!tick_nohz_tick_stopped_cpu(cpu))
-+		goto out_requeue;
-+
-+	rq_lock_irq(rq);
-+	if (cpu_is_offline(cpu))
-+		goto out_unlock;
-+
-+	curr = rq->curr;
-+	update_rq_clock(rq);
-+
-+	if (!is_idle_task(curr)) {
-+		/*
-+		 * Make sure the next tick runs within a reasonable
-+		 * amount of time.
-+		 */
-+		delta = rq_clock_task(rq) - curr->last_ran;
-+		WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
-+	}
-+	task_tick(rq);
-+
-+out_unlock:
-+	rq_unlock_irq(rq, NULL);
-+
-+out_requeue:
-+
-+	/*
-+	 * Run the remote tick once per second (1Hz). This arbitrary
-+	 * frequency is large enough to avoid overload but short enough
-+	 * to keep scheduler internal stats reasonably up to date.  But
-+	 * first update state to reflect hotplug activity if required.
-+	 */
-+	os = atomic_fetch_add_unless(&twork->state, -1, TICK_SCHED_REMOTE_RUNNING);
-+	WARN_ON_ONCE(os == TICK_SCHED_REMOTE_OFFLINE);
-+	if (os == TICK_SCHED_REMOTE_RUNNING)
-+		queue_delayed_work(system_unbound_wq, dwork, HZ);
-+}
-+
-+static void sched_tick_start(int cpu)
-+{
-+	struct tick_work *twork;
-+	int os;
-+
-+	if (housekeeping_cpu(cpu, HK_FLAG_TICK))
-+		return;
-+
-+	WARN_ON_ONCE(!tick_work_cpu);
-+
-+	twork = per_cpu_ptr(tick_work_cpu, cpu);
-+	os = atomic_xchg(&twork->state, TICK_SCHED_REMOTE_RUNNING);
-+	WARN_ON_ONCE(os == TICK_SCHED_REMOTE_RUNNING);
-+	if (os == TICK_SCHED_REMOTE_OFFLINE) {
-+		twork->cpu = cpu;
-+		INIT_DELAYED_WORK(&twork->work, sched_tick_remote);
-+		queue_delayed_work(system_unbound_wq, &twork->work, HZ);
-+	}
-+}
-+
-+#ifdef CONFIG_HOTPLUG_CPU
-+static void sched_tick_stop(int cpu)
-+{
-+	struct tick_work *twork;
-+	int os;
-+
-+	if (housekeeping_cpu(cpu, HK_FLAG_TICK))
-+		return;
-+
-+	WARN_ON_ONCE(!tick_work_cpu);
-+
-+	twork = per_cpu_ptr(tick_work_cpu, cpu);
-+	/* There cannot be competing actions, but don't rely on stop-machine. */
-+	os = atomic_xchg(&twork->state, TICK_SCHED_REMOTE_OFFLINING);
-+	WARN_ON_ONCE(os != TICK_SCHED_REMOTE_RUNNING);
-+	/* Don't cancel, as this would mess up the state machine. */
-+}
-+#endif /* CONFIG_HOTPLUG_CPU */
-+
-+int __init sched_tick_offload_init(void)
-+{
-+	tick_work_cpu = alloc_percpu(struct tick_work);
-+	BUG_ON(!tick_work_cpu);
-+	return 0;
-+}
-+
-+#else /* !CONFIG_NO_HZ_FULL */
-+static inline void sched_stop_tick(struct rq *rq, int cpu) {}
-+static inline void sched_start_tick(struct rq *rq, int cpu) {}
-+static inline void sched_tick_start(int cpu) { }
-+static inline void sched_tick_stop(int cpu) { }
-+#endif
-+
-+/*
-+ * This function gets called by the timer code, with HZ frequency.
-+ * We call it with interrupts disabled.
-+ */
-+void scheduler_tick(void)
-+{
-+	int cpu __maybe_unused = smp_processor_id();
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	arch_scale_freq_tick();
-+	sched_clock_tick();
-+	update_clocks(rq);
-+	update_load_avg(rq, 0);
-+	update_cpu_clock_tick(rq, rq->curr);
-+	task_tick(rq);
-+	rq->last_scheduler_tick = rq->last_jiffy;
-+	rq->last_tick = rq->clock;
-+	psi_task_tick(rq);
-+	perf_event_task_tick();
-+	sched_stop_tick(rq, cpu);
-+}
-+
-+#if defined(CONFIG_PREEMPTION) && (defined(CONFIG_DEBUG_PREEMPT) || \
-+				defined(CONFIG_TRACE_PREEMPT_TOGGLE))
-+/*
-+ * If the value passed in is equal to the current preempt count
-+ * then we just disabled preemption. Start timing the latency.
-+ */
-+static inline void preempt_latency_start(int val)
-+{
-+	if (preempt_count() == val) {
-+		unsigned long ip = get_lock_parent_ip();
-+#ifdef CONFIG_DEBUG_PREEMPT
-+		current->preempt_disable_ip = ip;
-+#endif
-+		trace_preempt_off(CALLER_ADDR0, ip);
-+	}
-+}
-+
-+void preempt_count_add(int val)
-+{
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	/*
-+	 * Underflow?
-+	 */
-+	if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
-+		return;
-+#endif
-+	__preempt_count_add(val);
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	/*
-+	 * Spinlock count overflowing soon?
-+	 */
-+	DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
-+				PREEMPT_MASK - 10);
-+#endif
-+	preempt_latency_start(val);
-+}
-+EXPORT_SYMBOL(preempt_count_add);
-+NOKPROBE_SYMBOL(preempt_count_add);
-+
-+/*
-+ * If the value passed in equals to the current preempt count
-+ * then we just enabled preemption. Stop timing the latency.
-+ */
-+static inline void preempt_latency_stop(int val)
-+{
-+	if (preempt_count() == val)
-+		trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
-+}
-+
-+void preempt_count_sub(int val)
-+{
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	/*
-+	 * Underflow?
-+	 */
-+	if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
-+		return;
-+	/*
-+	 * Is the spinlock portion underflowing?
-+	 */
-+	if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) &&
-+			!(preempt_count() & PREEMPT_MASK)))
-+		return;
-+#endif
-+
-+	preempt_latency_stop(val);
-+	__preempt_count_sub(val);
-+}
-+EXPORT_SYMBOL(preempt_count_sub);
-+NOKPROBE_SYMBOL(preempt_count_sub);
-+
-+#else
-+static inline void preempt_latency_start(int val) { }
-+static inline void preempt_latency_stop(int val) { }
-+#endif
-+
-+static inline unsigned long get_preempt_disable_ip(struct task_struct *p)
-+{
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	return p->preempt_disable_ip;
-+#else
-+	return 0;
-+#endif
-+}
-+
-+/*
-+ * The time_slice is only refilled when it is empty and that is when we set a
-+ * new deadline. Make sure update_clocks has been called recently to update
-+ * rq->niffies.
-+ */
-+static void time_slice_expired(struct task_struct *p, struct rq *rq)
-+{
-+	p->time_slice = timeslice();
-+	p->deadline = rq->niffies + task_deadline_diff(p);
-+#ifdef CONFIG_SMT_NICE
-+	if (!p->mm)
-+		p->smt_bias = 0;
-+	else if (rt_task(p))
-+		p->smt_bias = 1 << 30;
-+	else if (task_running_iso(p))
-+		p->smt_bias = 1 << 29;
-+	else if (idleprio_task(p)) {
-+		if (task_running_idle(p))
-+			p->smt_bias = 0;
-+		else
-+			p->smt_bias = 1;
-+	} else if (--p->smt_bias < 1)
-+		p->smt_bias = MAX_PRIO - p->static_prio;
-+#endif
-+}
-+
-+/*
-+ * Timeslices below RESCHED_US are considered as good as expired as there's no
-+ * point rescheduling when there's so little time left. SCHED_BATCH tasks
-+ * have been flagged be not latency sensitive and likely to be fully CPU
-+ * bound so every time they're rescheduled they have their time_slice
-+ * refilled, but get a new later deadline to have little effect on
-+ * SCHED_NORMAL tasks.
-+
-+ */
-+static inline void check_deadline(struct task_struct *p, struct rq *rq)
-+{
-+	if (p->time_slice < RESCHED_US || batch_task(p))
-+		time_slice_expired(p, rq);
-+}
-+
-+/*
-+ * Task selection with skiplists is a simple matter of picking off the first
-+ * task in the sorted list, an O(1) operation. The lookup is amortised O(1)
-+ * being bound to the number of processors.
-+ *
-+ * Runqueues are selectively locked based on their unlocked data and then
-+ * unlocked if not needed. At most 3 locks will be held at any time and are
-+ * released as soon as they're no longer needed. All balancing between CPUs
-+ * is thus done here in an extremely simple first come best fit manner.
-+ *
-+ * This iterates over runqueues in cache locality order. In interactive mode
-+ * it iterates over all CPUs and finds the task with the best key/deadline.
-+ * In non-interactive mode it will only take a task if it's from the current
-+ * runqueue or a runqueue with more tasks than the current one with a better
-+ * key/deadline.
-+ */
-+#ifdef CONFIG_SMP
-+static inline struct task_struct
-+*earliest_deadline_task(struct rq *rq, int cpu, struct task_struct *idle)
-+{
-+	struct rq *locked = NULL, *chosen = NULL;
-+	struct task_struct *edt = idle;
-+	int i, best_entries = 0;
-+	u64 best_key = ~0ULL;
-+
-+	for (i = 0; i < total_runqueues; i++) {
-+		struct rq *other_rq = rq_order(rq, i);
-+		skiplist_node *next;
-+		int entries;
-+
-+		entries = other_rq->sl->entries;
-+		/*
-+		 * Check for queued entres lockless first. The local runqueue
-+		 * is locked so entries will always be accurate.
-+		 */
-+		if (!sched_interactive) {
-+			/*
-+			 * Don't reschedule balance across nodes unless the CPU
-+			 * is idle.
-+			 */
-+			if (edt != idle && rq->cpu_locality[other_rq->cpu] > LOCALITY_SMP)
-+				break;
-+			if (entries <= best_entries)
-+				continue;
-+		} else if (!entries)
-+			continue;
-+
-+		/* if (i) implies other_rq != rq */
-+		if (i) {
-+			/* Check for best id queued lockless first */
-+			if (other_rq->best_key >= best_key)
-+				continue;
-+
-+			if (unlikely(!trylock_rq(rq, other_rq)))
-+				continue;
-+
-+			/* Need to reevaluate entries after locking */
-+			entries = other_rq->sl->entries;
-+			if (unlikely(!entries)) {
-+				unlock_rq(other_rq);
-+				continue;
-+			}
-+		}
-+
-+		next = other_rq->node;
-+		/*
-+		 * In interactive mode we check beyond the best entry on other
-+		 * runqueues if we can't get the best for smt or affinity
-+		 * reasons.
-+		 */
-+		while ((next = next->next[0]) != other_rq->node) {
-+			struct task_struct *p;
-+			u64 key = next->key;
-+
-+			/* Reevaluate key after locking */
-+			if (key >= best_key)
-+				break;
-+
-+			p = next->value;
-+			if (!smt_schedule(p, rq)) {
-+				if (i && !sched_interactive)
-+					break;
-+				continue;
-+			}
-+
-+			if (sched_other_cpu(p, cpu)) {
-+				if (sched_interactive || !i)
-+					continue;
-+				break;
-+			}
-+			/* Make sure affinity is ok */
-+			if (i) {
-+				/* From this point on p is the best so far */
-+				if (locked)
-+					unlock_rq(locked);
-+				chosen = locked = other_rq;
-+			}
-+			best_entries = entries;
-+			best_key = key;
-+			edt = p;
-+			break;
-+		}
-+		/* rq->preempting is a hint only as the state may have changed
-+		 * since it was set with the resched call but if we have met
-+		 * the condition we can break out here. */
-+		if (edt == rq->preempting)
-+			break;
-+		if (i && other_rq != chosen)
-+			unlock_rq(other_rq);
-+	}
-+
-+	if (likely(edt != idle))
-+		take_task(rq, cpu, edt);
-+
-+	if (locked)
-+		unlock_rq(locked);
-+
-+	rq->preempting = NULL;
-+
-+	return edt;
-+}
-+#else /* CONFIG_SMP */
-+static inline struct task_struct
-+*earliest_deadline_task(struct rq *rq, int cpu, struct task_struct *idle)
-+{
-+	struct task_struct *edt;
-+
-+	if (unlikely(!rq->sl->entries))
-+		return idle;
-+	edt = rq->node->next[0]->value;
-+	take_task(rq, cpu, edt);
-+	return edt;
-+}
-+#endif /* CONFIG_SMP */
-+
-+/*
-+ * Print scheduling while atomic bug:
-+ */
-+static noinline void __schedule_bug(struct task_struct *prev)
-+{
-+	/* Save this before calling printk(), since that will clobber it */
-+	unsigned long preempt_disable_ip = get_preempt_disable_ip(current);
-+
-+	if (oops_in_progress)
-+		return;
-+
-+	printk(KERN_ERR "BUG: scheduling while atomic: %s/%d/0x%08x\n",
-+		prev->comm, prev->pid, preempt_count());
-+
-+	debug_show_held_locks(prev);
-+	print_modules();
-+	if (irqs_disabled())
-+		print_irqtrace_events(prev);
-+	if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)
-+	    && in_atomic_preempt_off()) {
-+		pr_err("Preemption disabled at:");
-+		print_ip_sym(KERN_ERR, preempt_disable_ip);
-+	}
-+	dump_stack();
-+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+}
-+
-+/*
-+ * Various schedule()-time debugging checks and statistics:
-+ */
-+static inline void schedule_debug(struct task_struct *prev, bool preempt)
-+{
-+#ifdef CONFIG_SCHED_STACK_END_CHECK
-+	if (task_stack_end_corrupted(prev))
-+		panic("corrupted stack end detected inside scheduler\n");
-+
-+	if (task_scs_end_corrupted(prev))
-+		panic("corrupted shadow stack detected inside scheduler\n");
-+#endif
-+
-+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-+	if (!preempt && prev->state && prev->non_block_count) {
-+		printk(KERN_ERR "BUG: scheduling in a non-blocking section: %s/%d/%i\n",
-+			prev->comm, prev->pid, prev->non_block_count);
-+		dump_stack();
-+		add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+	}
-+#endif
-+
-+	if (unlikely(in_atomic_preempt_off())) {
-+		__schedule_bug(prev);
-+		preempt_count_set(PREEMPT_DISABLED);
-+	}
-+	rcu_sleep_check();
-+
-+	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
-+
-+	schedstat_inc(this_rq()->sched_count);
-+}
-+
-+/*
-+ * The currently running task's information is all stored in rq local data
-+ * which is only modified by the local CPU.
-+ */
-+static inline void set_rq_task(struct rq *rq, struct task_struct *p)
-+{
-+	if (p == rq->idle || p->policy == SCHED_FIFO)
-+		hrexpiry_clear(rq);
-+	else
-+		hrexpiry_start(rq, US_TO_NS(p->time_slice));
-+	if (rq->clock - rq->last_tick > HALF_JIFFY_NS)
-+		rq->dither = 0;
-+	else
-+		rq->dither = rq_dither(rq);
-+
-+	rq->rq_deadline = p->deadline;
-+	rq->rq_prio = p->prio;
-+#ifdef CONFIG_SMT_NICE
-+	rq->rq_mm = p->mm;
-+	rq->rq_smt_bias = p->smt_bias;
-+#endif
-+}
-+
-+#ifdef CONFIG_SMT_NICE
-+static void check_no_siblings(struct rq __maybe_unused *this_rq) {}
-+static void wake_no_siblings(struct rq __maybe_unused *this_rq) {}
-+static void (*check_siblings)(struct rq *this_rq) = &check_no_siblings;
-+static void (*wake_siblings)(struct rq *this_rq) = &wake_no_siblings;
-+
-+/* Iterate over smt siblings when we've scheduled a process on cpu and decide
-+ * whether they should continue running or be descheduled. */
-+static void check_smt_siblings(struct rq *this_rq)
-+{
-+	int other_cpu;
-+
-+	for_each_cpu(other_cpu, &this_rq->thread_mask) {
-+		struct task_struct *p;
-+		struct rq *rq;
-+
-+		rq = cpu_rq(other_cpu);
-+		if (rq_idle(rq))
-+			continue;
-+		p = rq->curr;
-+		if (!smt_schedule(p, this_rq))
-+			resched_curr(rq);
-+	}
-+}
-+
-+static void wake_smt_siblings(struct rq *this_rq)
-+{
-+	int other_cpu;
-+
-+	for_each_cpu(other_cpu, &this_rq->thread_mask) {
-+		struct rq *rq;
-+
-+		rq = cpu_rq(other_cpu);
-+		if (rq_idle(rq))
-+			resched_idle(rq);
-+	}
-+}
-+#else
-+static void check_siblings(struct rq __maybe_unused *this_rq) {}
-+static void wake_siblings(struct rq __maybe_unused *this_rq) {}
-+#endif
-+
-+/*
-+ * schedule() is the main scheduler function.
-+ *
-+ * The main means of driving the scheduler and thus entering this function are:
-+ *
-+ *   1. Explicit blocking: mutex, semaphore, waitqueue, etc.
-+ *
-+ *   2. TIF_NEED_RESCHED flag is checked on interrupt and userspace return
-+ *      paths. For example, see arch/x86/entry_64.S.
-+ *
-+ *      To drive preemption between tasks, the scheduler sets the flag in timer
-+ *      interrupt handler scheduler_tick().
-+ *
-+ *   3. Wakeups don't really cause entry into schedule(). They add a
-+ *      task to the run-queue and that's it.
-+ *
-+ *      Now, if the new task added to the run-queue preempts the current
-+ *      task, then the wakeup sets TIF_NEED_RESCHED and schedule() gets
-+ *      called on the nearest possible occasion:
-+ *
-+ *       - If the kernel is preemptible (CONFIG_PREEMPTION=y):
-+ *
-+ *         - in syscall or exception context, at the next outmost
-+ *           preempt_enable(). (this might be as soon as the wake_up()'s
-+ *           spin_unlock()!)
-+ *
-+ *         - in IRQ context, return from interrupt-handler to
-+ *           preemptible context
-+ *
-+ *       - If the kernel is not preemptible (CONFIG_PREEMPTION is not set)
-+ *         then at the next:
-+ *
-+ *          - cond_resched() call
-+ *          - explicit schedule() call
-+ *          - return from syscall or exception to user-space
-+ *          - return from interrupt-handler to user-space
-+ *
-+ * WARNING: must be called with preemption disabled!
-+ */
-+static void __sched notrace __schedule(bool preempt)
-+{
-+	struct task_struct *prev, *next, *idle;
-+	unsigned long *switch_count;
-+	unsigned long prev_state;
-+	bool deactivate = false;
-+	struct rq *rq;
-+	u64 niffies;
-+	int cpu;
-+
-+	cpu = smp_processor_id();
-+	rq = cpu_rq(cpu);
-+	prev = rq->curr;
-+	idle = rq->idle;
-+
-+	schedule_debug(prev, preempt);
-+
-+	local_irq_disable();
-+	rcu_note_context_switch(preempt);
-+
-+	/*
-+	 * Make sure that signal_pending_state()->signal_pending() below
-+	 * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
-+	 * done by the caller to avoid the race with signal_wake_up():
-+	 *
-+	 * __set_current_state(@state)		signal_wake_up()
-+	 * schedule()				  set_tsk_thread_flag(p, TIF_SIGPENDING)
-+	 *					  wake_up_state(p, state)
-+	 *   LOCK rq->lock			    LOCK p->pi_state
-+	 *   smp_mb__after_spinlock()		    smp_mb__after_spinlock()
-+	 *     if (signal_pending_state())	    if (p->state & @state)
-+	 *
-+	 * Also, the membarrier system call requires a full memory barrier
-+	 * after coming from user-space, before storing to rq->curr.
-+	 */
-+	rq_lock(rq);
-+	smp_mb__after_spinlock();
-+#ifdef CONFIG_SMP
-+	if (rq->preempt) {
-+		/*
-+		 * Make sure resched_curr hasn't triggered a preemption
-+		 * locklessly on a task that has since scheduled away. Spurious
-+		 * wakeup of idle is okay though.
-+		 */
-+		if (unlikely(preempt && prev != idle && !test_tsk_need_resched(prev))) {
-+			rq->preempt = NULL;
-+			clear_preempt_need_resched();
-+			rq_unlock_irq(rq, NULL);
-+			return;
-+		}
-+		rq->preempt = NULL;
-+	}
-+#endif
-+
-+	switch_count = &prev->nivcsw;
-+
-+	/*
-+	 * We must load prev->state once (task_struct::state is volatile), such
-+	 * that:
-+	 *
-+	 *  - we form a control dependency vs deactivate_task() below.
-+	 *  - ptrace_{,un}freeze_traced() can change ->state underneath us.
-+	 */
-+	prev_state = prev->state;
-+	if (!preempt && prev_state) {
-+		if (signal_pending_state(prev_state, prev)) {
-+			prev->state = TASK_RUNNING;
-+		} else {
-+			prev->sched_contributes_to_load =
-+				(prev_state & TASK_UNINTERRUPTIBLE) &&
-+				!(prev_state & TASK_NOLOAD) &&
-+				!(prev->flags & PF_FROZEN);
-+
-+			if (prev->sched_contributes_to_load)
-+				rq->nr_uninterruptible++;
-+
-+			/*
-+			 * __schedule()			ttwu()
-+			 *   prev_state = prev->state;    if (p->on_rq && ...)
-+			 *   if (prev_state)		    goto out;
-+			 *     p->on_rq = 0;		  smp_acquire__after_ctrl_dep();
-+			 *				  p->state = TASK_WAKING
-+			 *
-+			 * Where __schedule() and ttwu() have matching control dependencies.
-+			 *
-+			 * After this, schedule() must not care about p->state any more.
-+			 */
-+			deactivate = true;
-+
-+			if (prev->in_iowait) {
-+				atomic_inc(&rq->nr_iowait);
-+				delayacct_blkio_start();
-+			}
-+		}
-+		switch_count = &prev->nvcsw;
-+	}
-+
-+	/*
-+	 * Store the niffy value here for use by the next task's last_ran
-+	 * below to avoid losing niffies due to update_clocks being called
-+	 * again after this point.
-+	 */
-+	update_clocks(rq);
-+	niffies = rq->niffies;
-+	update_cpu_clock_switch(rq, prev);
-+
-+	clear_tsk_need_resched(prev);
-+	clear_preempt_need_resched();
-+
-+	if (idle != prev) {
-+		check_deadline(prev, rq);
-+		return_task(prev, rq, cpu, deactivate);
-+	}
-+
-+	next = earliest_deadline_task(rq, cpu, idle);
-+	if (likely(next->prio != PRIO_LIMIT))
-+		clear_cpuidle_map(cpu);
-+	else {
-+		set_cpuidle_map(cpu);
-+		update_load_avg(rq, 0);
-+	}
-+
-+	set_rq_task(rq, next);
-+	next->last_ran = niffies;
-+
-+	if (likely(prev != next)) {
-+		/*
-+		 * Don't reschedule an idle task or deactivated tasks
-+		 */
-+		if (prev == idle) {
-+			inc_nr_running(rq);
-+			if (rt_task(next))
-+				rq->rt_nr_running++;
-+		} else if (!deactivate)
-+			resched_suitable_idle(prev);
-+		if (unlikely(next == idle)) {
-+			dec_nr_running(rq);
-+			if (rt_task(prev))
-+				rq->rt_nr_running--;
-+			wake_siblings(rq);
-+		} else
-+			check_siblings(rq);
-+		rq->nr_switches++;
-+		/*
-+		 * RCU users of rcu_dereference(rq->curr) may not see
-+		 * changes to task_struct made by pick_next_task().
-+		 */
-+		RCU_INIT_POINTER(rq->curr, next);
-+		/*
-+		 * The membarrier system call requires each architecture
-+		 * to have a full memory barrier after updating
-+		 * rq->curr, before returning to user-space.
-+		 *
-+		 * Here are the schemes providing that barrier on the
-+		 * various architectures:
-+		 * - mm ? switch_mm() : mmdrop() for x86, s390, sparc, PowerPC.
-+		 *   switch_mm() rely on membarrier_arch_switch_mm() on PowerPC.
-+		 * - finish_lock_switch() for weakly-ordered
-+		 *   architectures where spin_unlock is a full barrier,
-+		 * - switch_to() for arm64 (weakly-ordered, spin_unlock
-+		 *   is a RELEASE barrier),
-+		 */
-+		++*switch_count;
-+
-+		psi_sched_switch(prev, next, !task_on_rq_queued(prev));
-+
-+		trace_sched_switch(preempt, prev, next);
-+		context_switch(rq, prev, next); /* unlocks the rq */
-+	} else {
-+		check_siblings(rq);
-+		rq_unlock(rq);
-+		local_irq_enable();
-+	}
-+}
-+
-+void __noreturn do_task_dead(void)
-+{
-+	/* Causes final put_task_struct in finish_task_switch(). */
-+	set_special_state(TASK_DEAD);
-+
-+	/* Tell freezer to ignore us: */
-+	current->flags |= PF_NOFREEZE;
-+	__schedule(false);
-+	BUG();
-+
-+	/* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */
-+	for (;;)
-+		cpu_relax();
-+}
-+
-+static inline void sched_submit_work(struct task_struct *tsk)
-+{
-+	if (!tsk->state)
-+		return;
-+
-+	/*
-+	 * If a worker went to sleep, notify and ask workqueue whether
-+	 * it wants to wake up a task to maintain concurrency.
-+	 * As this function is called inside the schedule() context,
-+	 * we disable preemption to avoid it calling schedule() again
-+	 * in the possible wakeup of a kworker and because wq_worker_sleeping()
-+	 * requires it.
-+	 */
-+	if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) {
-+		preempt_disable();
-+		if (tsk->flags & PF_WQ_WORKER)
-+			wq_worker_sleeping(tsk);
-+		else
-+			io_wq_worker_sleeping(tsk);
-+		preempt_enable_no_resched();
-+	}
-+
-+	if (tsk_is_pi_blocked(tsk))
-+		return;
-+
-+	/*
-+	 * If we are going to sleep and we have plugged IO queued,
-+	 * make sure to submit it to avoid deadlocks.
-+	 */
-+	if (blk_needs_flush_plug(tsk))
-+		blk_schedule_flush_plug(tsk);
-+}
-+
-+static inline void sched_update_worker(struct task_struct *tsk)
-+{
-+	if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) {
-+		if (tsk->flags & PF_WQ_WORKER)
-+			wq_worker_running(tsk);
-+		else
-+			io_wq_worker_running(tsk);
-+	}
-+}
-+
-+asmlinkage __visible void __sched schedule(void)
-+{
-+	struct task_struct *tsk = current;
-+
-+	sched_submit_work(tsk);
-+	do {
-+		preempt_disable();
-+		__schedule(false);
-+		sched_preempt_enable_no_resched();
-+	} while (need_resched());
-+	sched_update_worker(tsk);
-+}
-+
-+EXPORT_SYMBOL(schedule);
-+
-+/*
-+ * synchronize_rcu_tasks() makes sure that no task is stuck in preempted
-+ * state (have scheduled out non-voluntarily) by making sure that all
-+ * tasks have either left the run queue or have gone into user space.
-+ * As idle tasks do not do either, they must not ever be preempted
-+ * (schedule out non-voluntarily).
-+ *
-+ * schedule_idle() is similar to schedule_preempt_disable() except that it
-+ * never enables preemption because it does not call sched_submit_work().
-+ */
-+void __sched schedule_idle(void)
-+{
-+	/*
-+	 * As this skips calling sched_submit_work(), which the idle task does
-+	 * regardless because that function is a nop when the task is in a
-+	 * TASK_RUNNING state, make sure this isn't used someplace that the
-+	 * current task can be in any other state. Note, idle is always in the
-+	 * TASK_RUNNING state.
-+	 */
-+	WARN_ON_ONCE(current->state);
-+	do {
-+		__schedule(false);
-+	} while (need_resched());
-+}
-+
-+#ifdef CONFIG_CONTEXT_TRACKING
-+asmlinkage __visible void __sched schedule_user(void)
-+{
-+	/*
-+	 * If we come here after a random call to set_need_resched(),
-+	 * or we have been woken up remotely but the IPI has not yet arrived,
-+	 * we haven't yet exited the RCU idle mode. Do it here manually until
-+	 * we find a better solution.
-+	 *
-+	 * NB: There are buggy callers of this function.  Ideally we
-+	 * should warn if prev_state != IN_USER, but that will trigger
-+	 * too frequently to make sense yet.
-+	 */
-+	enum ctx_state prev_state = exception_enter();
-+	schedule();
-+	exception_exit(prev_state);
-+}
-+#endif
-+
-+/**
-+ * schedule_preempt_disabled - called with preemption disabled
-+ *
-+ * Returns with preemption disabled. Note: preempt_count must be 1
-+ */
-+void __sched schedule_preempt_disabled(void)
-+{
-+	sched_preempt_enable_no_resched();
-+	schedule();
-+	preempt_disable();
-+}
-+
-+static void __sched notrace preempt_schedule_common(void)
-+{
-+	do {
-+		/*
-+		 * Because the function tracer can trace preempt_count_sub()
-+		 * and it also uses preempt_enable/disable_notrace(), if
-+		 * NEED_RESCHED is set, the preempt_enable_notrace() called
-+		 * by the function tracer will call this function again and
-+		 * cause infinite recursion.
-+		 *
-+		 * Preemption must be disabled here before the function
-+		 * tracer can trace. Break up preempt_disable() into two
-+		 * calls. One to disable preemption without fear of being
-+		 * traced. The other to still record the preemption latency,
-+		 * which can also be traced by the function tracer.
-+		 */
-+		preempt_disable_notrace();
-+		preempt_latency_start(1);
-+		__schedule(true);
-+		preempt_latency_stop(1);
-+		preempt_enable_no_resched_notrace();
-+
-+		/*
-+		 * Check again in case we missed a preemption opportunity
-+		 * between schedule and now.
-+		 */
-+	} while (need_resched());
-+}
-+
-+#ifdef CONFIG_PREEMPTION
-+/*
-+ * This is the entry point to schedule() from in-kernel preemption
-+ * off of preempt_enable.
-+ */
-+asmlinkage __visible void __sched notrace preempt_schedule(void)
-+{
-+	/*
-+	 * If there is a non-zero preempt_count or interrupts are disabled,
-+	 * we do not want to preempt the current task. Just return..
-+	 */
-+	if (likely(!preemptible()))
-+		return;
-+
-+	preempt_schedule_common();
-+}
-+NOKPROBE_SYMBOL(preempt_schedule);
-+EXPORT_SYMBOL(preempt_schedule);
-+
-+/**
-+ * preempt_schedule_notrace - preempt_schedule called by tracing
-+ *
-+ * The tracing infrastructure uses preempt_enable_notrace to prevent
-+ * recursion and tracing preempt enabling caused by the tracing
-+ * infrastructure itself. But as tracing can happen in areas coming
-+ * from userspace or just about to enter userspace, a preempt enable
-+ * can occur before user_exit() is called. This will cause the scheduler
-+ * to be called when the system is still in usermode.
-+ *
-+ * To prevent this, the preempt_enable_notrace will use this function
-+ * instead of preempt_schedule() to exit user context if needed before
-+ * calling the scheduler.
-+ */
-+asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
-+{
-+	enum ctx_state prev_ctx;
-+
-+	if (likely(!preemptible()))
-+		return;
-+
-+	do {
-+		/*
-+		 * Because the function tracer can trace preempt_count_sub()
-+		 * and it also uses preempt_enable/disable_notrace(), if
-+		 * NEED_RESCHED is set, the preempt_enable_notrace() called
-+		 * by the function tracer will call this function again and
-+		 * cause infinite recursion.
-+		 *
-+		 * Preemption must be disabled here before the function
-+		 * tracer can trace. Break up preempt_disable() into two
-+		 * calls. One to disable preemption without fear of being
-+		 * traced. The other to still record the preemption latency,
-+		 * which can also be traced by the function tracer.
-+		 */
-+		preempt_disable_notrace();
-+		preempt_latency_start(1);
-+		/*
-+		 * Needs preempt disabled in case user_exit() is traced
-+		 * and the tracer calls preempt_enable_notrace() causing
-+		 * an infinite recursion.
-+		 */
-+		prev_ctx = exception_enter();
-+		__schedule(true);
-+		exception_exit(prev_ctx);
-+
-+		preempt_latency_stop(1);
-+		preempt_enable_no_resched_notrace();
-+	} while (need_resched());
-+}
-+EXPORT_SYMBOL_GPL(preempt_schedule_notrace);
-+
-+#endif /* CONFIG_PREEMPTION */
-+
-+/*
-+ * This is the entry point to schedule() from kernel preemption
-+ * off of irq context.
-+ * Note, that this is called and return with irqs disabled. This will
-+ * protect us against recursive calling from irq.
-+ */
-+asmlinkage __visible void __sched preempt_schedule_irq(void)
-+{
-+	enum ctx_state prev_state;
-+
-+	/* Catch callers which need to be fixed */
-+	BUG_ON(preempt_count() || !irqs_disabled());
-+
-+	prev_state = exception_enter();
-+
-+	do {
-+		preempt_disable();
-+		local_irq_enable();
-+		__schedule(true);
-+		local_irq_disable();
-+		sched_preempt_enable_no_resched();
-+	} while (need_resched());
-+
-+	exception_exit(prev_state);
-+}
-+
-+int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags,
-+			  void *key)
-+{
-+	WARN_ON_ONCE(IS_ENABLED(CONFIG_SCHED_DEBUG) && wake_flags & ~WF_SYNC);
-+	return try_to_wake_up(curr->private, mode, wake_flags);
-+}
-+EXPORT_SYMBOL(default_wake_function);
-+
-+#ifdef CONFIG_RT_MUTEXES
-+
-+static inline int __rt_effective_prio(struct task_struct *pi_task, int prio)
-+{
-+	if (pi_task)
-+		prio = min(prio, pi_task->prio);
-+
-+	return prio;
-+}
-+
-+static inline int rt_effective_prio(struct task_struct *p, int prio)
-+{
-+	struct task_struct *pi_task = rt_mutex_get_top_task(p);
-+
-+	return __rt_effective_prio(pi_task, prio);
-+}
-+
-+/*
-+ * rt_mutex_setprio - set the current priority of a task
-+ * @p: task to boost
-+ * @pi_task: donor task
-+ *
-+ * This function changes the 'effective' priority of a task. It does
-+ * not touch ->normal_prio like __setscheduler().
-+ *
-+ * Used by the rt_mutex code to implement priority inheritance
-+ * logic. Call site only calls if the priority of the task changed.
-+ */
-+void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
-+{
-+	int prio, oldprio;
-+	struct rq *rq;
-+
-+	/* XXX used to be waiter->prio, not waiter->task->prio */
-+	prio = __rt_effective_prio(pi_task, p->normal_prio);
-+
-+	/*
-+	 * If nothing changed; bail early.
-+	 */
-+	if (p->pi_top_task == pi_task && prio == p->prio)
-+		return;
-+
-+	rq = __task_rq_lock(p, NULL);
-+	update_rq_clock(rq);
-+	/*
-+	 * Set under pi_lock && rq->lock, such that the value can be used under
-+	 * either lock.
-+	 *
-+	 * Note that there is loads of tricky to make this pointer cache work
-+	 * right. rt_mutex_slowunlock()+rt_mutex_postunlock() work together to
-+	 * ensure a task is de-boosted (pi_task is set to NULL) before the
-+	 * task is allowed to run again (and can exit). This ensures the pointer
-+	 * points to a blocked task -- which guaratees the task is present.
-+	 */
-+	p->pi_top_task = pi_task;
-+
-+	/*
-+	 * For FIFO/RR we only need to set prio, if that matches we're done.
-+	 */
-+	if (prio == p->prio)
-+		goto out_unlock;
-+
-+	/*
-+	 * Idle task boosting is a nono in general. There is one
-+	 * exception, when PREEMPT_RT and NOHZ is active:
-+	 *
-+	 * The idle task calls get_next_timer_interrupt() and holds
-+	 * the timer wheel base->lock on the CPU and another CPU wants
-+	 * to access the timer (probably to cancel it). We can safely
-+	 * ignore the boosting request, as the idle CPU runs this code
-+	 * with interrupts disabled and will complete the lock
-+	 * protected section without being interrupted. So there is no
-+	 * real need to boost.
-+	 */
-+	if (unlikely(p == rq->idle)) {
-+		WARN_ON(p != rq->curr);
-+		WARN_ON(p->pi_blocked_on);
-+		goto out_unlock;
-+	}
-+
-+	trace_sched_pi_setprio(p, pi_task);
-+	oldprio = p->prio;
-+	p->prio = prio;
-+	if (task_running(rq, p)){
-+		if (prio > oldprio)
-+			resched_task(p);
-+	} else if (task_queued(p)) {
-+		dequeue_task(rq, p, DEQUEUE_SAVE);
-+		enqueue_task(rq, p, ENQUEUE_RESTORE);
-+		if (prio < oldprio)
-+			try_preempt(p, rq);
-+	}
-+out_unlock:
-+	__task_rq_unlock(rq, NULL);
-+}
-+#else
-+static inline int rt_effective_prio(struct task_struct *p, int prio)
-+{
-+	return prio;
-+}
-+#endif
-+
-+/*
-+ * Adjust the deadline for when the priority is to change, before it's
-+ * changed.
-+ */
-+static inline void adjust_deadline(struct task_struct *p, int new_prio)
-+{
-+	p->deadline += static_deadline_diff(new_prio) - task_deadline_diff(p);
-+}
-+
-+void set_user_nice(struct task_struct *p, long nice)
-+{
-+	int new_static, old_static;
-+	struct rq_flags rf;
-+	struct rq *rq;
-+
-+	if (task_nice(p) == nice || nice < MIN_NICE || nice > MAX_NICE)
-+		return;
-+	new_static = NICE_TO_PRIO(nice);
-+	/*
-+	 * We have to be careful, if called from sys_setpriority(),
-+	 * the task might be in the middle of scheduling on another CPU.
-+	 */
-+	rq = task_rq_lock(p, &rf);
-+	update_rq_clock(rq);
-+
-+	/*
-+	 * The RT priorities are set via sched_setscheduler(), but we still
-+	 * allow the 'normal' nice value to be set - but as expected
-+	 * it wont have any effect on scheduling until the task is
-+	 * not SCHED_NORMAL/SCHED_BATCH:
-+	 */
-+	if (has_rt_policy(p)) {
-+		p->static_prio = new_static;
-+		goto out_unlock;
-+	}
-+
-+	adjust_deadline(p, new_static);
-+	old_static = p->static_prio;
-+	p->static_prio = new_static;
-+	p->prio = effective_prio(p);
-+
-+	if (task_queued(p)) {
-+		dequeue_task(rq, p, DEQUEUE_SAVE);
-+		enqueue_task(rq, p, ENQUEUE_RESTORE);
-+		if (new_static < old_static)
-+			try_preempt(p, rq);
-+	} else if (task_running(rq, p)) {
-+		set_rq_task(rq, p);
-+		if (old_static < new_static)
-+			resched_task(p);
-+	}
-+out_unlock:
-+	task_rq_unlock(rq, p, &rf);
-+}
-+EXPORT_SYMBOL(set_user_nice);
-+
-+/*
-+ * can_nice - check if a task can reduce its nice value
-+ * @p: task
-+ * @nice: nice value
-+ */
-+int can_nice(const struct task_struct *p, const int nice)
-+{
-+	/* Convert nice value [19,-20] to rlimit style value [1,40] */
-+	int nice_rlim = nice_to_rlimit(nice);
-+
-+	return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) ||
-+		capable(CAP_SYS_NICE));
-+}
-+
-+#ifdef __ARCH_WANT_SYS_NICE
-+
-+/*
-+ * sys_nice - change the priority of the current process.
-+ * @increment: priority increment
-+ *
-+ * sys_setpriority is a more generic, but much slower function that
-+ * does similar things.
-+ */
-+SYSCALL_DEFINE1(nice, int, increment)
-+{
-+	long nice, retval;
-+
-+	/*
-+	 * Setpriority might change our priority at the same moment.
-+	 * We don't have to worry. Conceptually one call occurs first
-+	 * and we have a single winner.
-+	 */
-+
-+	increment = clamp(increment, -NICE_WIDTH, NICE_WIDTH);
-+	nice = task_nice(current) + increment;
-+
-+	nice = clamp_val(nice, MIN_NICE, MAX_NICE);
-+	if (increment < 0 && !can_nice(current, nice))
-+		return -EPERM;
-+
-+	retval = security_task_setnice(current, nice);
-+	if (retval)
-+		return retval;
-+
-+	set_user_nice(current, nice);
-+	return 0;
-+}
-+
-+#endif
-+
-+/**
-+ * task_prio - return the priority value of a given task.
-+ * @p: the task in question.
-+ *
-+ * Return: The priority value as seen by users in /proc.
-+ * RT tasks are offset by -100. Normal tasks are centered around 1, value goes
-+ * from 0 (SCHED_ISO) up to 82 (nice +19 SCHED_IDLEPRIO).
-+ */
-+int task_prio(const struct task_struct *p)
-+{
-+	int delta, prio = p->prio - MAX_RT_PRIO;
-+
-+	/* rt tasks and iso tasks */
-+	if (prio <= 0)
-+		goto out;
-+
-+	/* Convert to ms to avoid overflows */
-+	delta = NS_TO_MS(p->deadline - task_rq(p)->niffies);
-+	if (unlikely(delta < 0))
-+		delta = 0;
-+	delta = delta * 40 / ms_longest_deadline_diff();
-+	if (delta <= 80)
-+		prio += delta;
-+	if (idleprio_task(p))
-+		prio += 40;
-+out:
-+	return prio;
-+}
-+
-+/**
-+ * idle_cpu - is a given CPU idle currently?
-+ * @cpu: the processor in question.
-+ *
-+ * Return: 1 if the CPU is currently idle. 0 otherwise.
-+ */
-+int idle_cpu(int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	if (rq->curr != rq->idle)
-+		return 0;
-+
-+	if (rq->nr_running)
-+		return 0;
-+
-+#ifdef CONFIG_SMP
-+	if (rq->ttwu_pending)
-+		return 0;
-+#endif
-+
-+	return 1;
-+}
-+
-+/**
-+ * available_idle_cpu - is a given CPU idle for enqueuing work.
-+ * @cpu: the CPU in question.
-+ *
-+ * Return: 1 if the CPU is currently idle. 0 otherwise.
-+ */
-+int available_idle_cpu(int cpu)
-+{
-+	if (!idle_cpu(cpu))
-+		return 0;
-+
-+	if (vcpu_is_preempted(cpu))
-+		return 0;
-+
-+	return 1;
-+}
-+
-+/**
-+ * idle_task - return the idle task for a given CPU.
-+ * @cpu: the processor in question.
-+ *
-+ * Return: The idle task for the CPU @cpu.
-+ */
-+struct task_struct *idle_task(int cpu)
-+{
-+	return cpu_rq(cpu)->idle;
-+}
-+
-+/**
-+ * find_process_by_pid - find a process with a matching PID value.
-+ * @pid: the pid in question.
-+ *
-+ * The task of @pid, if found. %NULL otherwise.
-+ */
-+static inline struct task_struct *find_process_by_pid(pid_t pid)
-+{
-+	return pid ? find_task_by_vpid(pid) : current;
-+}
-+
-+/* Actually do priority change: must hold rq lock. */
-+static void __setscheduler(struct task_struct *p, struct rq *rq, int policy,
-+			   int prio, const struct sched_attr *attr,
-+			   bool keep_boost)
-+{
-+	int oldrtprio, oldprio;
-+
-+	/*
-+	 * If params can't change scheduling class changes aren't allowed
-+	 * either.
-+	 */
-+	if (attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)
-+		return;
-+
-+	p->policy = policy;
-+	oldrtprio = p->rt_priority;
-+	p->rt_priority = prio;
-+	p->normal_prio = normal_prio(p);
-+	oldprio = p->prio;
-+	/*
-+	 * Keep a potential priority boosting if called from
-+	 * sched_setscheduler().
-+	 */
-+	p->prio = normal_prio(p);
-+	if (keep_boost)
-+		p->prio = rt_effective_prio(p, p->prio);
-+
-+	if (task_running(rq, p)) {
-+		set_rq_task(rq, p);
-+		resched_task(p);
-+	} else if (task_queued(p)) {
-+		dequeue_task(rq, p, DEQUEUE_SAVE);
-+		enqueue_task(rq, p, ENQUEUE_RESTORE);
-+		if (p->prio < oldprio || p->rt_priority > oldrtprio)
-+			try_preempt(p, rq);
-+	}
-+}
-+
-+/*
-+ * Check the target process has a UID that matches the current process's
-+ */
-+static bool check_same_owner(struct task_struct *p)
-+{
-+	const struct cred *cred = current_cred(), *pcred;
-+	bool match;
-+
-+	rcu_read_lock();
-+	pcred = __task_cred(p);
-+	match = (uid_eq(cred->euid, pcred->euid) ||
-+		 uid_eq(cred->euid, pcred->uid));
-+	rcu_read_unlock();
-+	return match;
-+}
-+
-+static int __sched_setscheduler(struct task_struct *p,
-+				const struct sched_attr *attr,
-+				bool user, bool pi)
-+{
-+	int retval, policy = attr->sched_policy, oldpolicy = -1, priority = attr->sched_priority;
-+	unsigned long rlim_rtprio = 0;
-+	struct rq_flags rf;
-+	int reset_on_fork;
-+	struct rq *rq;
-+
-+	/* The pi code expects interrupts enabled */
-+	BUG_ON(pi && in_interrupt());
-+
-+	if (is_rt_policy(policy) && !capable(CAP_SYS_NICE)) {
-+		unsigned long lflags;
-+
-+		if (!lock_task_sighand(p, &lflags))
-+			return -ESRCH;
-+		rlim_rtprio = task_rlimit(p, RLIMIT_RTPRIO);
-+		unlock_task_sighand(p, &lflags);
-+		if (rlim_rtprio)
-+			goto recheck;
-+		/*
-+		 * If the caller requested an RT policy without having the
-+		 * necessary rights, we downgrade the policy to SCHED_ISO.
-+		 * We also set the parameter to zero to pass the checks.
-+		 */
-+		policy = SCHED_ISO;
-+		priority = 0;
-+	}
-+recheck:
-+	/* Double check policy once rq lock held */
-+	if (policy < 0) {
-+		reset_on_fork = p->sched_reset_on_fork;
-+		policy = oldpolicy = p->policy;
-+	} else {
-+		reset_on_fork = !!(policy & SCHED_RESET_ON_FORK);
-+		policy &= ~SCHED_RESET_ON_FORK;
-+
-+		if (!SCHED_RANGE(policy))
-+			return -EINVAL;
-+	}
-+
-+	if (attr->sched_flags & ~(SCHED_FLAG_ALL | SCHED_FLAG_SUGOV))
-+		return -EINVAL;
-+
-+	/*
-+	 * Valid priorities for SCHED_FIFO and SCHED_RR are
-+	 * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL and
-+	 * SCHED_BATCH is 0.
-+	 */
-+	if (priority < 0 ||
-+	    (p->mm && priority > MAX_USER_RT_PRIO - 1) ||
-+	    (!p->mm && priority > MAX_RT_PRIO - 1))
-+		return -EINVAL;
-+	if (is_rt_policy(policy) != (priority != 0))
-+		return -EINVAL;
-+
-+	/*
-+	 * Allow unprivileged RT tasks to decrease priority:
-+	 */
-+	if (user && !capable(CAP_SYS_NICE)) {
-+		if (is_rt_policy(policy)) {
-+			unsigned long rlim_rtprio =
-+					task_rlimit(p, RLIMIT_RTPRIO);
-+
-+			/* Can't set/change the rt policy */
-+			if (policy != p->policy && !rlim_rtprio)
-+				return -EPERM;
-+
-+			/* Can't increase priority */
-+			if (priority > p->rt_priority &&
-+			    priority > rlim_rtprio)
-+				return -EPERM;
-+		} else {
-+			switch (p->policy) {
-+				/*
-+				 * Can only downgrade policies but not back to
-+				 * SCHED_NORMAL
-+				 */
-+				case SCHED_ISO:
-+					if (policy == SCHED_ISO)
-+						goto out;
-+					if (policy != SCHED_NORMAL)
-+						return -EPERM;
-+					break;
-+				case SCHED_BATCH:
-+					if (policy == SCHED_BATCH)
-+						goto out;
-+					if (policy != SCHED_IDLEPRIO)
-+						return -EPERM;
-+					break;
-+				case SCHED_IDLEPRIO:
-+					if (policy == SCHED_IDLEPRIO)
-+						goto out;
-+					return -EPERM;
-+				default:
-+					break;
-+			}
-+		}
-+
-+		/* Can't change other user's priorities */
-+		if (!check_same_owner(p))
-+			return -EPERM;
-+
-+		/* Normal users shall not reset the sched_reset_on_fork flag: */
-+		if (p->sched_reset_on_fork && !reset_on_fork)
-+			return -EPERM;
-+	}
-+
-+	if (user) {
-+		retval = security_task_setscheduler(p);
-+		if (retval)
-+			return retval;
-+	}
-+
-+	if (pi)
-+		cpuset_read_lock();
-+
-+	/*
-+	 * Make sure no PI-waiters arrive (or leave) while we are
-+	 * changing the priority of the task:
-+	 *
-+	 * To be able to change p->policy safely, the runqueue lock must be
-+	 * held.
-+	 */
-+	rq = task_rq_lock(p, &rf);
-+	update_rq_clock(rq);
-+
-+	/*
-+	 * Changing the policy of the stop threads its a very bad idea:
-+	 */
-+	if (p == rq->stop) {
-+		retval = -EINVAL;
-+		goto unlock;
-+	}
-+
-+	/*
-+	 * If not changing anything there's no need to proceed further:
-+	 */
-+	if (unlikely(policy == p->policy && (!is_rt_policy(policy) ||
-+	    priority == p->rt_priority))) {
-+		retval = 0;
-+		goto unlock;
-+	}
-+
-+	/* Re-check policy now with rq lock held */
-+	if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
-+		policy = oldpolicy = -1;
-+		task_rq_unlock(rq, p, &rf);
-+		if (pi)
-+			cpuset_read_unlock();
-+		goto recheck;
-+	}
-+	p->sched_reset_on_fork = reset_on_fork;
-+
-+	__setscheduler(p, rq, policy, priority, attr, pi);
-+
-+	/* Avoid rq from going away on us: */
-+	preempt_disable();
-+	task_rq_unlock(rq, p, &rf);
-+
-+	if (pi) {
-+		cpuset_read_unlock();
-+		rt_mutex_adjust_pi(p);
-+	}
-+	preempt_enable();
-+out:
-+	return 0;
-+
-+unlock:
-+	task_rq_unlock(rq, p, &rf);
-+	if (pi)
-+		cpuset_read_unlock();
-+	return retval;
-+}
-+
-+static int _sched_setscheduler(struct task_struct *p, int policy,
-+			       const struct sched_param *param, bool check)
-+{
-+	struct sched_attr attr = {
-+		.sched_policy   = policy,
-+		.sched_priority = param->sched_priority,
-+		.sched_nice	= PRIO_TO_NICE(p->static_prio),
-+	};
-+
-+	return __sched_setscheduler(p, &attr, check, true);
-+}
-+/**
-+ * sched_setscheduler - change the scheduling policy and/or RT priority of a thread.
-+ * @p: the task in question.
-+ * @policy: new policy.
-+ * @param: structure containing the new RT priority.
-+ *
-+ * Use sched_set_fifo(), read its comment.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ *
-+ * NOTE that the task may be already dead.
-+ */
-+int sched_setscheduler(struct task_struct *p, int policy,
-+		       const struct sched_param *param)
-+{
-+	return _sched_setscheduler(p, policy, param, true);
-+}
-+
-+
-+int sched_setattr(struct task_struct *p, const struct sched_attr *attr)
-+{
-+	return __sched_setscheduler(p, attr, true, true);
-+}
-+
-+int sched_setattr_nocheck(struct task_struct *p, const struct sched_attr *attr)
-+{
-+	return __sched_setscheduler(p, attr, false, true);
-+}
-+
-+/**
-+ * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace.
-+ * @p: the task in question.
-+ * @policy: new policy.
-+ * @param: structure containing the new RT priority.
-+ *
-+ * Just like sched_setscheduler, only don't bother checking if the
-+ * current context has permission.  For example, this is needed in
-+ * stop_machine(): we create temporary high priority worker threads,
-+ * but our caller might not have that capability.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+int sched_setscheduler_nocheck(struct task_struct *p, int policy,
-+			       const struct sched_param *param)
-+{
-+	return _sched_setscheduler(p, policy, param, false);
-+}
-+
-+/*
-+ * SCHED_FIFO is a broken scheduler model; that is, it is fundamentally
-+ * incapable of resource management, which is the one thing an OS really should
-+ * be doing.
-+ *
-+ * This is of course the reason it is limited to privileged users only.
-+ *
-+ * Worse still; it is fundamentally impossible to compose static priority
-+ * workloads. You cannot take two correctly working static prio workloads
-+ * and smash them together and still expect them to work.
-+ *
-+ * For this reason 'all' FIFO tasks the kernel creates are basically at:
-+ *
-+ *   MAX_RT_PRIO / 2
-+ *
-+ * The administrator _MUST_ configure the system, the kernel simply doesn't
-+ * know enough information to make a sensible choice.
-+ */
-+void sched_set_fifo(struct task_struct *p)
-+{
-+	struct sched_param sp = { .sched_priority = MAX_RT_PRIO / 2 };
-+	WARN_ON_ONCE(sched_setscheduler_nocheck(p, SCHED_FIFO, &sp) != 0);
-+}
-+EXPORT_SYMBOL_GPL(sched_set_fifo);
-+
-+/*
-+ * For when you don't much care about FIFO, but want to be above SCHED_NORMAL.
-+ */
-+void sched_set_fifo_low(struct task_struct *p)
-+{
-+	struct sched_param sp = { .sched_priority = 1 };
-+	WARN_ON_ONCE(sched_setscheduler_nocheck(p, SCHED_FIFO, &sp) != 0);
-+}
-+EXPORT_SYMBOL_GPL(sched_set_fifo_low);
-+
-+void sched_set_normal(struct task_struct *p, int nice)
-+{
-+	struct sched_attr attr = {
-+		.sched_policy = SCHED_NORMAL,
-+		.sched_nice = nice,
-+	};
-+	WARN_ON_ONCE(sched_setattr_nocheck(p, &attr) != 0);
-+}
-+EXPORT_SYMBOL_GPL(sched_set_normal);
-+
-+static int
-+do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
-+{
-+	struct sched_param lparam;
-+	struct task_struct *p;
-+	int retval;
-+
-+	if (!param || pid < 0)
-+		return -EINVAL;
-+	if (copy_from_user(&lparam, param, sizeof(struct sched_param)))
-+		return -EFAULT;
-+
-+	rcu_read_lock();
-+	retval = -ESRCH;
-+	p = find_process_by_pid(pid);
-+	if (likely(p))
-+		get_task_struct(p);
-+	rcu_read_unlock();
-+
-+	if (likely(p)) {
-+		retval = sched_setscheduler(p, policy, &lparam);
-+		put_task_struct(p);
-+	}
-+
-+	return retval;
-+}
-+
-+/*
-+ * Mimics kernel/events/core.c perf_copy_attr().
-+ */
-+static int sched_copy_attr(struct sched_attr __user *uattr,
-+			   struct sched_attr *attr)
-+{
-+	u32 size;
-+	int ret;
-+
-+	/* Zero the full structure, so that a short copy will be nice: */
-+	memset(attr, 0, sizeof(*attr));
-+
-+	ret = get_user(size, &uattr->size);
-+	if (ret)
-+		return ret;
-+
-+	/* ABI compatibility quirk: */
-+	if (!size)
-+		size = SCHED_ATTR_SIZE_VER0;
-+
-+	if (size < SCHED_ATTR_SIZE_VER0 || size > PAGE_SIZE)
-+		goto err_size;
-+
-+	ret = copy_struct_from_user(attr, sizeof(*attr), uattr, size);
-+	if (ret) {
-+		if (ret == -E2BIG)
-+			goto err_size;
-+		return ret;
-+	}
-+
-+	/*
-+	 * XXX: Do we want to be lenient like existing syscalls; or do we want
-+	 * to be strict and return an error on out-of-bounds values?
-+	 */
-+	attr->sched_nice = clamp(attr->sched_nice, -20, 19);
-+
-+	/* sched/core.c uses zero here but we already know ret is zero */
-+	return 0;
-+
-+err_size:
-+	put_user(sizeof(*attr), &uattr->size);
-+	return -E2BIG;
-+}
-+
-+/*
-+ * sched_setparam() passes in -1 for its policy, to let the functions
-+ * it calls know not to change it.
-+ */
-+#define SETPARAM_POLICY	-1
-+
-+/**
-+ * sys_sched_setscheduler - set/change the scheduler policy and RT priority
-+ * @pid: the pid in question.
-+ * @policy: new policy.
-+ * @param: structure containing the new RT priority.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy, struct sched_param __user *, param)
-+{
-+	if (policy < 0)
-+		return -EINVAL;
-+
-+	return do_sched_setscheduler(pid, policy, param);
-+}
-+
-+/**
-+ * sys_sched_setparam - set/change the RT priority of a thread
-+ * @pid: the pid in question.
-+ * @param: structure containing the new RT priority.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
-+{
-+	return do_sched_setscheduler(pid, SETPARAM_POLICY, param);
-+}
-+
-+/**
-+ * sys_sched_setattr - same as above, but with extended sched_attr
-+ * @pid: the pid in question.
-+ * @uattr: structure containing the extended parameters.
-+ */
-+SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr,
-+			       unsigned int, flags)
-+{
-+	struct sched_attr attr;
-+	struct task_struct *p;
-+	int retval;
-+
-+	if (!uattr || pid < 0 || flags)
-+		return -EINVAL;
-+
-+	retval = sched_copy_attr(uattr, &attr);
-+	if (retval)
-+		return retval;
-+
-+	if ((int)attr.sched_policy < 0)
-+		return -EINVAL;
-+	if (attr.sched_flags & SCHED_FLAG_KEEP_POLICY)
-+		attr.sched_policy = SETPARAM_POLICY;
-+
-+	rcu_read_lock();
-+	retval = -ESRCH;
-+	p = find_process_by_pid(pid);
-+	if (likely(p))
-+		get_task_struct(p);
-+	rcu_read_unlock();
-+
-+	if (likely(p)) {
-+		retval = sched_setattr(p, &attr);
-+		put_task_struct(p);
-+	}
-+
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_getscheduler - get the policy (scheduling class) of a thread
-+ * @pid: the pid in question.
-+ *
-+ * Return: On success, the policy of the thread. Otherwise, a negative error
-+ * code.
-+ */
-+SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
-+{
-+	struct task_struct *p;
-+	int retval = -EINVAL;
-+
-+	if (pid < 0)
-+		goto out_nounlock;
-+
-+	retval = -ESRCH;
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	if (p) {
-+		retval = security_task_getscheduler(p);
-+		if (!retval)
-+			retval = p->policy;
-+	}
-+	rcu_read_unlock();
-+
-+out_nounlock:
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_getscheduler - get the RT priority of a thread
-+ * @pid: the pid in question.
-+ * @param: structure containing the RT priority.
-+ *
-+ * Return: On success, 0 and the RT priority is in @param. Otherwise, an error
-+ * code.
-+ */
-+SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
-+{
-+	struct sched_param lp = { .sched_priority = 0 };
-+	struct task_struct *p;
-+	int retval = -EINVAL;
-+
-+	if (!param || pid < 0)
-+		goto out_nounlock;
-+
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	retval = -ESRCH;
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	if (has_rt_policy(p))
-+		lp.sched_priority = p->rt_priority;
-+	rcu_read_unlock();
-+
-+	/*
-+	 * This one might sleep, we cannot do it with a spinlock held ...
-+	 */
-+	retval = copy_to_user(param, &lp, sizeof(*param)) ? -EFAULT : 0;
-+
-+out_nounlock:
-+	return retval;
-+
-+out_unlock:
-+	rcu_read_unlock();
-+	return retval;
-+}
-+
-+/*
-+ * Copy the kernel size attribute structure (which might be larger
-+ * than what user-space knows about) to user-space.
-+ *
-+ * Note that all cases are valid: user-space buffer can be larger or
-+ * smaller than the kernel-space buffer. The usual case is that both
-+ * have the same size.
-+ */
-+static int
-+sched_attr_copy_to_user(struct sched_attr __user *uattr,
-+			struct sched_attr *kattr,
-+			unsigned int usize)
-+{
-+	unsigned int ksize = sizeof(*kattr);
-+
-+	if (!access_ok(uattr, usize))
-+		return -EFAULT;
-+
-+	/*
-+	 * sched_getattr() ABI forwards and backwards compatibility:
-+	 *
-+	 * If usize == ksize then we just copy everything to user-space and all is good.
-+	 *
-+	 * If usize < ksize then we only copy as much as user-space has space for,
-+	 * this keeps ABI compatibility as well. We skip the rest.
-+	 *
-+	 * If usize > ksize then user-space is using a newer version of the ABI,
-+	 * which part the kernel doesn't know about. Just ignore it - tooling can
-+	 * detect the kernel's knowledge of attributes from the attr->size value
-+	 * which is set to ksize in this case.
-+	 */
-+	kattr->size = min(usize, ksize);
-+
-+	if (copy_to_user(uattr, kattr, kattr->size))
-+		return -EFAULT;
-+
-+	return 0;
-+}
-+
-+/**
-+ * sys_sched_getattr - similar to sched_getparam, but with sched_attr
-+ * @pid: the pid in question.
-+ * @uattr: structure containing the extended parameters.
-+ * @usize: sizeof(attr) for fwd/bwd comp.
-+ * @flags: for future extension.
-+ */
-+SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
-+		unsigned int, usize, unsigned int, flags)
-+{
-+	struct sched_attr kattr = { };
-+	struct task_struct *p;
-+	int retval;
-+
-+	if (!uattr || pid < 0 || usize > PAGE_SIZE ||
-+	    usize < SCHED_ATTR_SIZE_VER0 || flags)
-+		return -EINVAL;
-+
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	retval = -ESRCH;
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	kattr.sched_policy = p->policy;
-+	if (rt_task(p))
-+		kattr.sched_priority = p->rt_priority;
-+	else
-+		kattr.sched_nice = task_nice(p);
-+
-+	rcu_read_unlock();
-+
-+	return sched_attr_copy_to_user(uattr, &kattr, usize);
-+
-+out_unlock:
-+	rcu_read_unlock();
-+	return retval;
-+}
-+
-+long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
-+{
-+	cpumask_var_t cpus_allowed, new_mask;
-+	struct task_struct *p;
-+	int retval;
-+
-+	rcu_read_lock();
-+
-+	p = find_process_by_pid(pid);
-+	if (!p) {
-+		rcu_read_unlock();
-+		return -ESRCH;
-+	}
-+
-+	/* Prevent p going away */
-+	get_task_struct(p);
-+	rcu_read_unlock();
-+
-+	if (p->flags & PF_NO_SETAFFINITY) {
-+		retval = -EINVAL;
-+		goto out_put_task;
-+	}
-+	if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
-+		retval = -ENOMEM;
-+		goto out_put_task;
-+	}
-+	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) {
-+		retval = -ENOMEM;
-+		goto out_free_cpus_allowed;
-+	}
-+	retval = -EPERM;
-+	if (!check_same_owner(p)) {
-+		rcu_read_lock();
-+		if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
-+			rcu_read_unlock();
-+			goto out_unlock;
-+		}
-+		rcu_read_unlock();
-+	}
-+
-+	retval = security_task_setscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	cpuset_cpus_allowed(p, cpus_allowed);
-+	cpumask_and(new_mask, in_mask, cpus_allowed);
-+again:
-+	retval = __set_cpus_allowed_ptr(p, new_mask, true);
-+
-+	if (!retval) {
-+		cpuset_cpus_allowed(p, cpus_allowed);
-+		if (!cpumask_subset(new_mask, cpus_allowed)) {
-+			/*
-+			 * We must have raced with a concurrent cpuset
-+			 * update. Just reset the cpus_allowed to the
-+			 * cpuset's cpus_allowed
-+			 */
-+			cpumask_copy(new_mask, cpus_allowed);
-+			goto again;
-+		}
-+	}
-+out_unlock:
-+	free_cpumask_var(new_mask);
-+out_free_cpus_allowed:
-+	free_cpumask_var(cpus_allowed);
-+out_put_task:
-+	put_task_struct(p);
-+	return retval;
-+}
-+
-+static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
-+			     cpumask_t *new_mask)
-+{
-+	if (len < cpumask_size())
-+		cpumask_clear(new_mask);
-+	else if (len > cpumask_size())
-+		len = cpumask_size();
-+
-+	return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0;
-+}
-+
-+
-+/**
-+ * sys_sched_setaffinity - set the CPU affinity of a process
-+ * @pid: pid of the process
-+ * @len: length in bytes of the bitmask pointed to by user_mask_ptr
-+ * @user_mask_ptr: user-space pointer to the new CPU mask
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
-+		unsigned long __user *, user_mask_ptr)
-+{
-+	cpumask_var_t new_mask;
-+	int retval;
-+
-+	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
-+		return -ENOMEM;
-+
-+	retval = get_user_cpu_mask(user_mask_ptr, len, new_mask);
-+	if (retval == 0)
-+		retval = sched_setaffinity(pid, new_mask);
-+	free_cpumask_var(new_mask);
-+	return retval;
-+}
-+
-+long sched_getaffinity(pid_t pid, cpumask_t *mask)
-+{
-+	struct task_struct *p;
-+	unsigned long flags;
-+	int retval;
-+
-+	get_online_cpus();
-+	rcu_read_lock();
-+
-+	retval = -ESRCH;
-+	p = find_process_by_pid(pid);
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	cpumask_and(mask, &p->cpus_mask, cpu_active_mask);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+
-+out_unlock:
-+	rcu_read_unlock();
-+	put_online_cpus();
-+
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_getaffinity - get the CPU affinity of a process
-+ * @pid: pid of the process
-+ * @len: length in bytes of the bitmask pointed to by user_mask_ptr
-+ * @user_mask_ptr: user-space pointer to hold the current CPU mask
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
-+		unsigned long __user *, user_mask_ptr)
-+{
-+	int ret;
-+	cpumask_var_t mask;
-+
-+	if ((len * BITS_PER_BYTE) < nr_cpu_ids)
-+		return -EINVAL;
-+	if (len & (sizeof(unsigned long)-1))
-+		return -EINVAL;
-+
-+	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
-+		return -ENOMEM;
-+
-+	ret = sched_getaffinity(pid, mask);
-+	if (ret == 0) {
-+		unsigned int retlen = min(len, cpumask_size());
-+
-+		if (copy_to_user(user_mask_ptr, mask, retlen))
-+			ret = -EFAULT;
-+		else
-+			ret = retlen;
-+	}
-+	free_cpumask_var(mask);
-+
-+	return ret;
-+}
-+
-+/**
-+ * sys_sched_yield - yield the current processor to other threads.
-+ *
-+ * This function yields the current CPU to other tasks. It does this by
-+ * scheduling away the current task. If it still has the earliest deadline
-+ * it will be scheduled again as the next task.
-+ *
-+ * Return: 0.
-+ */
-+static void do_sched_yield(void)
-+{
-+	struct rq *rq;
-+
-+	if (!sched_yield_type)
-+		return;
-+
-+	local_irq_disable();
-+	rq = this_rq();
-+	rq_lock(rq);
-+
-+	if (sched_yield_type > 1)
-+		time_slice_expired(current, rq);
-+	schedstat_inc(rq->yld_count);
-+
-+	/*
-+	 * Since we are going to call schedule() anyway, there's
-+	 * no need to preempt or enable interrupts:
-+	 */
-+	preempt_disable();
-+	rq_unlock(rq);
-+	sched_preempt_enable_no_resched();
-+
-+	schedule();
-+}
-+
-+SYSCALL_DEFINE0(sched_yield)
-+{
-+	do_sched_yield();
-+	return 0;
-+}
-+
-+#ifndef CONFIG_PREEMPTION
-+int __sched _cond_resched(void)
-+{
-+	if (should_resched(0)) {
-+		preempt_schedule_common();
-+		return 1;
-+	}
-+	rcu_all_qs();
-+	return 0;
-+}
-+EXPORT_SYMBOL(_cond_resched);
-+#endif
-+
-+/*
-+ * __cond_resched_lock() - if a reschedule is pending, drop the given lock,
-+ * call schedule, and on return reacquire the lock.
-+ *
-+ * This works OK both with and without CONFIG_PREEMPTION.  We do strange low-level
-+ * operations here to prevent schedule() from being called twice (once via
-+ * spin_unlock(), once by hand).
-+ */
-+int __cond_resched_lock(spinlock_t *lock)
-+{
-+	int resched = should_resched(PREEMPT_LOCK_OFFSET);
-+	int ret = 0;
-+
-+	lockdep_assert_held(lock);
-+
-+	if (spin_needbreak(lock) || resched) {
-+		spin_unlock(lock);
-+		if (resched)
-+			preempt_schedule_common();
-+		else
-+			cpu_relax();
-+		ret = 1;
-+		spin_lock(lock);
-+	}
-+	return ret;
-+}
-+EXPORT_SYMBOL(__cond_resched_lock);
-+
-+/**
-+ * yield - yield the current processor to other threads.
-+ *
-+ * Do not ever use this function, there's a 99% chance you're doing it wrong.
-+ *
-+ * The scheduler is at all times free to pick the calling task as the most
-+ * eligible task to run, if removing the yield() call from your code breaks
-+ * it, its already broken.
-+ *
-+ * Typical broken usage is:
-+ *
-+ * while (!event)
-+ *	yield();
-+ *
-+ * where one assumes that yield() will let 'the other' process run that will
-+ * make event true. If the current task is a SCHED_FIFO task that will never
-+ * happen. Never use yield() as a progress guarantee!!
-+ *
-+ * If you want to use yield() to wait for something, use wait_event().
-+ * If you want to use yield() to be 'nice' for others, use cond_resched().
-+ * If you still want to use yield(), do not!
-+ */
-+void __sched yield(void)
-+{
-+	set_current_state(TASK_RUNNING);
-+	do_sched_yield();
-+}
-+EXPORT_SYMBOL(yield);
-+
-+/**
-+ * yield_to - yield the current processor to another thread in
-+ * your thread group, or accelerate that thread toward the
-+ * processor it's on.
-+ * @p: target task
-+ * @preempt: whether task preemption is allowed or not
-+ *
-+ * It's the caller's job to ensure that the target task struct
-+ * can't go away on us before we can do any checks.
-+ *
-+ * Return:
-+ *	true (>0) if we indeed boosted the target task.
-+ *	false (0) if we failed to boost the target.
-+ *	-ESRCH if there's no task to yield to.
-+ */
-+int __sched yield_to(struct task_struct *p, bool preempt)
-+{
-+	struct task_struct *rq_p;
-+	struct rq *rq, *p_rq;
-+	unsigned long flags;
-+	int yielded = 0;
-+
-+	local_irq_save(flags);
-+	rq = this_rq();
-+
-+again:
-+	p_rq = task_rq(p);
-+	/*
-+	 * If we're the only runnable task on the rq and target rq also
-+	 * has only one task, there's absolutely no point in yielding.
-+	 */
-+	if (task_running(p_rq, p) || p->state) {
-+		yielded = -ESRCH;
-+		goto out_irq;
-+	}
-+
-+	double_rq_lock(rq, p_rq);
-+	if (unlikely(task_rq(p) != p_rq)) {
-+		double_rq_unlock(rq, p_rq);
-+		goto again;
-+	}
-+
-+	yielded = 1;
-+	schedstat_inc(rq->yld_count);
-+	rq_p = rq->curr;
-+	if (p->deadline > rq_p->deadline)
-+		p->deadline = rq_p->deadline;
-+	p->time_slice += rq_p->time_slice;
-+	if (p->time_slice > timeslice())
-+		p->time_slice = timeslice();
-+	time_slice_expired(rq_p, rq);
-+	if (preempt && rq != p_rq)
-+		resched_task(p_rq->curr);
-+	double_rq_unlock(rq, p_rq);
-+out_irq:
-+	local_irq_restore(flags);
-+
-+	if (yielded > 0)
-+		schedule();
-+	return yielded;
-+}
-+EXPORT_SYMBOL_GPL(yield_to);
-+
-+int io_schedule_prepare(void)
-+{
-+	int old_iowait = current->in_iowait;
-+
-+	current->in_iowait = 1;
-+	blk_schedule_flush_plug(current);
-+
-+	return old_iowait;
-+}
-+
-+void io_schedule_finish(int token)
-+{
-+	current->in_iowait = token;
-+}
-+
-+/*
-+ * This task is about to go to sleep on IO.  Increment rq->nr_iowait so
-+ * that process accounting knows that this is a task in IO wait state.
-+ *
-+ * But don't do that if it is a deliberate, throttling IO wait (this task
-+ * has set its backing_dev_info: the queue against which it should throttle)
-+ */
-+
-+long __sched io_schedule_timeout(long timeout)
-+{
-+	int token;
-+	long ret;
-+
-+	token = io_schedule_prepare();
-+	ret = schedule_timeout(timeout);
-+	io_schedule_finish(token);
-+
-+	return ret;
-+}
-+EXPORT_SYMBOL(io_schedule_timeout);
-+
-+void __sched io_schedule(void)
-+{
-+	int token;
-+
-+	token = io_schedule_prepare();
-+	schedule();
-+	io_schedule_finish(token);
-+}
-+EXPORT_SYMBOL(io_schedule);
-+
-+/**
-+ * sys_sched_get_priority_max - return maximum RT priority.
-+ * @policy: scheduling class.
-+ *
-+ * Return: On success, this syscall returns the maximum
-+ * rt_priority that can be used by a given scheduling class.
-+ * On failure, a negative error code is returned.
-+ */
-+SYSCALL_DEFINE1(sched_get_priority_max, int, policy)
-+{
-+	int ret = -EINVAL;
-+
-+	switch (policy) {
-+	case SCHED_FIFO:
-+	case SCHED_RR:
-+		ret = MAX_USER_RT_PRIO-1;
-+		break;
-+	case SCHED_NORMAL:
-+	case SCHED_BATCH:
-+	case SCHED_ISO:
-+	case SCHED_IDLEPRIO:
-+		ret = 0;
-+		break;
-+	}
-+	return ret;
-+}
-+
-+/**
-+ * sys_sched_get_priority_min - return minimum RT priority.
-+ * @policy: scheduling class.
-+ *
-+ * Return: On success, this syscall returns the minimum
-+ * rt_priority that can be used by a given scheduling class.
-+ * On failure, a negative error code is returned.
-+ */
-+SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
-+{
-+	int ret = -EINVAL;
-+
-+	switch (policy) {
-+	case SCHED_FIFO:
-+	case SCHED_RR:
-+		ret = 1;
-+		break;
-+	case SCHED_NORMAL:
-+	case SCHED_BATCH:
-+	case SCHED_ISO:
-+	case SCHED_IDLEPRIO:
-+		ret = 0;
-+		break;
-+	}
-+	return ret;
-+}
-+
-+static int sched_rr_get_interval(pid_t pid, struct timespec64 *t)
-+{
-+	struct task_struct *p;
-+	unsigned int time_slice;
-+	struct rq_flags rf;
-+	struct rq *rq;
-+	int retval;
-+
-+	if (pid < 0)
-+		return -EINVAL;
-+
-+	retval = -ESRCH;
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	rq = task_rq_lock(p, &rf);
-+	time_slice = p->policy == SCHED_FIFO ? 0 : MS_TO_NS(task_timeslice(p));
-+	task_rq_unlock(rq, p, &rf);
-+
-+	rcu_read_unlock();
-+	*t = ns_to_timespec64(time_slice);
-+	return 0;
-+
-+out_unlock:
-+	rcu_read_unlock();
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_rr_get_interval - return the default timeslice of a process.
-+ * @pid: pid of the process.
-+ * @interval: userspace pointer to the timeslice value.
-+ *
-+ * this syscall writes the default timeslice value of a given process
-+ * into the user-space timespec buffer. A value of '0' means infinity.
-+ *
-+ * Return: On success, 0 and the timeslice is in @interval. Otherwise,
-+ * an error code.
-+ */
-+SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
-+		struct __kernel_timespec __user *, interval)
-+{
-+	struct timespec64 t;
-+	int retval = sched_rr_get_interval(pid, &t);
-+
-+	if (retval == 0)
-+		retval = put_timespec64(&t, interval);
-+
-+	return retval;
-+}
-+
-+#ifdef CONFIG_COMPAT_32BIT_TIME
-+SYSCALL_DEFINE2(sched_rr_get_interval_time32, pid_t, pid,
-+		struct old_timespec32 __user *, interval)
-+{
-+	struct timespec64 t;
-+	int retval = sched_rr_get_interval(pid, &t);
-+
-+	if (retval == 0)
-+		retval = put_old_timespec32(&t, interval);
-+	return retval;
-+}
-+#endif
-+
-+void sched_show_task(struct task_struct *p)
-+{
-+	unsigned long free = 0;
-+	int ppid;
-+
-+	if (!try_get_task_stack(p))
-+		return;
-+
-+	printk(KERN_INFO "%-15.15s %c", p->comm, task_state_to_char(p));
-+
-+	if (p->state == TASK_RUNNING)
-+		printk(KERN_CONT "  running task    ");
-+#ifdef CONFIG_DEBUG_STACK_USAGE
-+	free = stack_not_used(p);
-+#endif
-+	ppid = 0;
-+	rcu_read_lock();
-+	if (pid_alive(p))
-+		ppid = task_pid_nr(rcu_dereference(p->real_parent));
-+	rcu_read_unlock();
-+	pr_cont(" stack:%5lu pid:%5d ppid:%6d flags:0x%08lx\n",
-+		free, task_pid_nr(p), ppid,
-+		(unsigned long)task_thread_info(p)->flags);
-+
-+	print_worker_info(KERN_INFO, p);
-+	show_stack(p, NULL, KERN_INFO);
-+	put_task_stack(p);
-+}
-+EXPORT_SYMBOL_GPL(sched_show_task);
-+
-+static inline bool
-+state_filter_match(unsigned long state_filter, struct task_struct *p)
-+{
-+	/* no filter, everything matches */
-+	if (!state_filter)
-+		return true;
-+
-+	/* filter, but doesn't match */
-+	if (!(p->state & state_filter))
-+		return false;
-+
-+	/*
-+	 * When looking for TASK_UNINTERRUPTIBLE skip TASK_IDLE (allows
-+	 * TASK_KILLABLE).
-+	 */
-+	if (state_filter == TASK_UNINTERRUPTIBLE && p->state == TASK_IDLE)
-+		return false;
-+
-+	return true;
-+}
-+
-+void show_state_filter(unsigned long state_filter)
-+{
-+	struct task_struct *g, *p;
-+
-+	rcu_read_lock();
-+	for_each_process_thread(g, p) {
-+		/*
-+		 * reset the NMI-timeout, listing all files on a slow
-+		 * console might take a lot of time:
-+		 * Also, reset softlockup watchdogs on all CPUs, because
-+		 * another CPU might be blocked waiting for us to process
-+		 * an IPI.
-+		 */
-+		touch_nmi_watchdog();
-+		touch_all_softlockup_watchdogs();
-+		if (state_filter_match(state_filter, p))
-+			sched_show_task(p);
-+	}
-+
-+	rcu_read_unlock();
-+	/*
-+	 * Only show locks if all tasks are dumped:
-+	 */
-+	if (!state_filter)
-+		debug_show_all_locks();
-+}
-+
-+void dump_cpu_task(int cpu)
-+{
-+	pr_info("Task dump for CPU %d:\n", cpu);
-+	sched_show_task(cpu_curr(cpu));
-+}
-+
-+#ifdef CONFIG_SMP
-+void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	cpumask_copy(&p->cpus_mask, new_mask);
-+	p->nr_cpus_allowed = cpumask_weight(new_mask);
-+}
-+
-+void __do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	struct rq *rq = task_rq(p);
-+
-+	lockdep_assert_held(&p->pi_lock);
-+
-+	cpumask_copy(&p->cpus_mask, new_mask);
-+
-+	if (task_queued(p)) {
-+		/*
-+		 * Because __kthread_bind() calls this on blocked tasks without
-+		 * holding rq->lock.
-+		 */
-+		lockdep_assert_held(rq->lock);
-+	}
-+}
-+
-+/*
-+ * Calling do_set_cpus_allowed from outside the scheduler code should not be
-+ * called on a running or queued task. We should be holding pi_lock.
-+ */
-+void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	__do_set_cpus_allowed(p, new_mask);
-+	if (needs_other_cpu(p, task_cpu(p))) {
-+		struct rq *rq;
-+
-+		rq = __task_rq_lock(p, NULL);
-+		set_task_cpu(p, valid_task_cpu(p));
-+		resched_task(p);
-+		__task_rq_unlock(rq, NULL);
-+	}
-+}
-+#endif
-+
-+/**
-+ * init_idle - set up an idle thread for a given CPU
-+ * @idle: task in question
-+ * @cpu: cpu the idle task belongs to
-+ *
-+ * NOTE: this function does not set the idle thread's NEED_RESCHED
-+ * flag, to make booting more robust.
-+ */
-+void init_idle(struct task_struct *idle, int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	raw_spin_lock_irqsave(&idle->pi_lock, flags);
-+	raw_spin_lock(rq->lock);
-+	idle->last_ran = rq->niffies;
-+	time_slice_expired(idle, rq);
-+	idle->state = TASK_RUNNING;
-+	/* Setting prio to illegal value shouldn't matter when never queued */
-+	idle->prio = PRIO_LIMIT;
-+	idle->flags |= PF_IDLE;
-+
-+	scs_task_reset(idle);
-+	kasan_unpoison_task_stack(idle);
-+
-+#ifdef CONFIG_SMP
-+	/*
-+	 * It's possible that init_idle() gets called multiple times on a task,
-+	 * in that case do_set_cpus_allowed() will not do the right thing.
-+	 *
-+	 * And since this is boot we can forgo the serialisation.
-+	 */
-+	set_cpus_allowed_common(idle, cpumask_of(cpu));
-+#ifdef CONFIG_SMT_NICE
-+	idle->smt_bias = 0;
-+#endif
-+#endif
-+	set_rq_task(rq, idle);
-+
-+	/* Silence PROVE_RCU */
-+	rcu_read_lock();
-+	set_task_cpu(idle, cpu);
-+	rcu_read_unlock();
-+
-+	rq->idle = idle;
-+	rcu_assign_pointer(rq->curr, idle);
-+	idle->on_rq = TASK_ON_RQ_QUEUED;
-+	raw_spin_unlock(rq->lock);
-+	raw_spin_unlock_irqrestore(&idle->pi_lock, flags);
-+
-+	/* Set the preempt count _outside_ the spinlocks! */
-+	init_idle_preempt_count(idle, cpu);
-+
-+	ftrace_graph_init_idle_task(idle, cpu);
-+	vtime_init_idle(idle, cpu);
-+#ifdef CONFIG_SMP
-+	sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
-+#endif
-+}
-+
-+int cpuset_cpumask_can_shrink(const struct cpumask __maybe_unused *cur,
-+			      const struct cpumask __maybe_unused *trial)
-+{
-+	return 1;
-+}
-+
-+int task_can_attach(struct task_struct *p,
-+		    const struct cpumask *cs_cpus_allowed)
-+{
-+	int ret = 0;
-+
-+	/*
-+	 * Kthreads which disallow setaffinity shouldn't be moved
-+	 * to a new cpuset; we don't want to change their CPU
-+	 * affinity and isolating such threads by their set of
-+	 * allowed nodes is unnecessary.  Thus, cpusets are not
-+	 * applicable for such threads.  This prevents checking for
-+	 * success of set_cpus_allowed_ptr() on all attached tasks
-+	 * before cpus_mask may be changed.
-+	 */
-+	if (p->flags & PF_NO_SETAFFINITY)
-+		ret = -EINVAL;
-+
-+	return ret;
-+}
-+
-+void resched_cpu(int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	struct rq_flags rf;
-+
-+	rq_lock_irqsave(rq, &rf);
-+	if (cpu_online(cpu) || cpu == smp_processor_id())
-+		resched_curr(rq);
-+	rq_unlock_irqrestore(rq, &rf);
-+}
-+
-+#ifdef CONFIG_SMP
-+#ifdef CONFIG_NO_HZ_COMMON
-+void select_nohz_load_balancer(int stop_tick)
-+{
-+}
-+
-+void set_cpu_sd_state_idle(void) {}
-+void nohz_balance_enter_idle(int cpu) {}
-+
-+/*
-+ * In the semi idle case, use the nearest busy CPU for migrating timers
-+ * from an idle CPU.  This is good for power-savings.
-+ *
-+ * We don't do similar optimization for completely idle system, as
-+ * selecting an idle CPU will add more delays to the timers than intended
-+ * (as that CPU's timer base may not be uptodate wrt jiffies etc).
-+ */
-+int get_nohz_timer_target(void)
-+{
-+	int i, cpu = smp_processor_id(), default_cpu = -1;
-+	struct sched_domain *sd;
-+
-+	if (housekeeping_cpu(cpu, HK_FLAG_TIMER)) {
-+		if (!idle_cpu(cpu))
-+			return cpu;
-+		default_cpu = cpu;
-+	}
-+
-+	rcu_read_lock();
-+	for_each_domain(cpu, sd) {
-+		for_each_cpu_and(i, sched_domain_span(sd),
-+			housekeeping_cpumask(HK_FLAG_TIMER)) {
-+			if (cpu == i)
-+				continue;
-+
-+			if (!idle_cpu(i)) {
-+				cpu = i;
-+				goto unlock;
-+			}
-+		}
-+	}
-+
-+	if (default_cpu == -1)
-+		default_cpu = housekeeping_any_cpu(HK_FLAG_TIMER);
-+	cpu = default_cpu;
-+unlock:
-+	rcu_read_unlock();
-+	return cpu;
-+}
-+
-+/*
-+ * When add_timer_on() enqueues a timer into the timer wheel of an
-+ * idle CPU then this timer might expire before the next timer event
-+ * which is scheduled to wake up that CPU. In case of a completely
-+ * idle system the next event might even be infinite time into the
-+ * future. wake_up_idle_cpu() ensures that the CPU is woken up and
-+ * leaves the inner idle loop so the newly added timer is taken into
-+ * account when the CPU goes back to idle and evaluates the timer
-+ * wheel for the next timer event.
-+ */
-+void wake_up_idle_cpu(int cpu)
-+{
-+	if (cpu == smp_processor_id())
-+		return;
-+
-+	if (set_nr_and_not_polling(cpu_rq(cpu)->idle))
-+		smp_sched_reschedule(cpu);
-+	else
-+		trace_sched_wake_idle_without_ipi(cpu);
-+}
-+
-+static bool wake_up_full_nohz_cpu(int cpu)
-+{
-+	/*
-+	 * We just need the target to call irq_exit() and re-evaluate
-+	 * the next tick. The nohz full kick at least implies that.
-+	 * If needed we can still optimize that later with an
-+	 * empty IRQ.
-+	 */
-+	if (cpu_is_offline(cpu))
-+		return true;  /* Don't try to wake offline CPUs. */
-+	if (tick_nohz_full_cpu(cpu)) {
-+		if (cpu != smp_processor_id() ||
-+		    tick_nohz_tick_stopped())
-+			tick_nohz_full_kick_cpu(cpu);
-+		return true;
-+	}
-+
-+	return false;
-+}
-+
-+/*
-+ * Wake up the specified CPU.  If the CPU is going offline, it is the
-+ * caller's responsibility to deal with the lost wakeup, for example,
-+ * by hooking into the CPU_DEAD notifier like timers and hrtimers do.
-+ */
-+void wake_up_nohz_cpu(int cpu)
-+{
-+	if (!wake_up_full_nohz_cpu(cpu))
-+		wake_up_idle_cpu(cpu);
-+}
-+#endif /* CONFIG_NO_HZ_COMMON */
-+
-+/*
-+ * Change a given task's CPU affinity. Migrate the thread to a
-+ * proper CPU and schedule it away if the CPU it's executing on
-+ * is removed from the allowed bitmask.
-+ *
-+ * NOTE: the caller must have a valid reference to the task, the
-+ * task must not exit() & deallocate itself prematurely. The
-+ * call is not atomic; no spinlocks may be held.
-+ */
-+static int __set_cpus_allowed_ptr(struct task_struct *p,
-+				  const struct cpumask *new_mask, bool check)
-+{
-+	const struct cpumask *cpu_valid_mask = cpu_active_mask;
-+	bool queued = false, running_wrong = false, kthread;
-+	unsigned int dest_cpu;
-+	struct rq_flags rf;
-+	struct rq *rq;
-+	int ret = 0;
-+
-+	rq = task_rq_lock(p, &rf);
-+	update_rq_clock(rq);
-+
-+	kthread = !!(p->flags & PF_KTHREAD);
-+	if (kthread) {
-+		/*
-+		 * Kernel threads are allowed on online && !active CPUs
-+		 */
-+		cpu_valid_mask = cpu_online_mask;
-+	}
-+
-+	/*
-+	 * Must re-check here, to close a race against __kthread_bind(),
-+	 * sched_setaffinity() is not guaranteed to observe the flag.
-+	 */
-+	if (check && (p->flags & PF_NO_SETAFFINITY)) {
-+		ret = -EINVAL;
-+		goto out;
-+	}
-+
-+	if (cpumask_equal(&p->cpus_mask, new_mask))
-+		goto out;
-+
-+	/*
-+	 * Picking a ~random cpu helps in cases where we are changing affinity
-+	 * for groups of tasks (ie. cpuset), so that load balancing is not
-+	 * immediately required to distribute the tasks within their new mask.
-+	 */
-+	dest_cpu = cpumask_any_and_distribute(cpu_valid_mask, new_mask);
-+	if (dest_cpu >= nr_cpu_ids) {
-+		ret = -EINVAL;
-+		goto out;
-+	}
-+
-+	queued = task_queued(p);
-+	__do_set_cpus_allowed(p, new_mask);
-+
-+	if (kthread) {
-+		/*
-+		 * For kernel threads that do indeed end up on online &&
-+		 * !active we want to ensure they are strict per-CPU threads.
-+		 */
-+		WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) &&
-+			!cpumask_intersects(new_mask, cpu_active_mask) &&
-+			p->nr_cpus_allowed != 1);
-+	}
-+
-+	/* Can the task run on the task's current CPU? If so, we're done */
-+	if (cpumask_test_cpu(task_cpu(p), new_mask))
-+		goto out;
-+
-+	if (task_running(rq, p)) {
-+		/* Task is running on the wrong cpu now, reschedule it. */
-+		if (rq == this_rq()) {
-+			set_task_cpu(p, dest_cpu);
-+			set_tsk_need_resched(p);
-+			running_wrong = true;
-+		} else
-+			resched_task(p);
-+	} else {
-+		if (queued) {
-+			/*
-+			 * Switch runqueue locks after dequeueing the task
-+			 * here while still holding the pi_lock to be holding
-+			 * the correct lock for enqueueing.
-+			 */
-+			dequeue_task(rq, p, 0);
-+			rq_unlock(rq);
-+
-+			rq = cpu_rq(dest_cpu);
-+			rq_lock(rq);
-+		}
-+		set_task_cpu(p, dest_cpu);
-+		if (queued)
-+			enqueue_task(rq, p, 0);
-+	}
-+	if (queued)
-+		try_preempt(p, rq);
-+	if (running_wrong)
-+		preempt_disable();
-+out:
-+	task_rq_unlock(rq, p, &rf);
-+
-+	if (running_wrong) {
-+		__schedule(true);
-+		preempt_enable();
-+	}
-+
-+	return ret;
-+}
-+
-+int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	return __set_cpus_allowed_ptr(p, new_mask, false);
-+}
-+EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
-+
-+#ifdef CONFIG_HOTPLUG_CPU
-+/*
-+ * Run through task list and find tasks affined to the dead cpu, then remove
-+ * that cpu from the list, enable cpu0 and set the zerobound flag. Must hold
-+ * cpu 0 and src_cpu's runqueue locks. We should be holding both rq lock and
-+ * pi_lock to change cpus_mask but it's not going to matter here.
-+ */
-+static void bind_zero(int src_cpu)
-+{
-+	struct task_struct *p, *t;
-+	struct rq *rq0;
-+	int bound = 0;
-+
-+	if (src_cpu == 0)
-+		return;
-+
-+	rq0 = cpu_rq(0);
-+
-+	do_each_thread(t, p) {
-+		if (cpumask_test_cpu(src_cpu, p->cpus_ptr)) {
-+			bool local = (task_cpu(p) == src_cpu);
-+			struct rq *rq = task_rq(p);
-+
-+			/* task_running is the cpu stopper thread */
-+			if (local && task_running(rq, p))
-+				continue;
-+			atomic_clear_cpu(src_cpu, &p->cpus_mask);
-+			atomic_set_cpu(0, &p->cpus_mask);
-+			p->zerobound = true;
-+			bound++;
-+			if (local) {
-+				bool queued = task_queued(p);
-+
-+				if (queued)
-+					dequeue_task(rq, p, 0);
-+				set_task_cpu(p, 0);
-+				if (queued)
-+					enqueue_task(rq0, p, 0);
-+			}
-+		}
-+	} while_each_thread(t, p);
-+
-+	if (bound) {
-+		printk(KERN_INFO "MuQSS removed affinity for %d processes to cpu %d\n",
-+		       bound, src_cpu);
-+	}
-+}
-+
-+/* Find processes with the zerobound flag and reenable their affinity for the
-+ * CPU coming alive. */
-+static void unbind_zero(int src_cpu)
-+{
-+	int unbound = 0, zerobound = 0;
-+	struct task_struct *p, *t;
-+
-+	if (src_cpu == 0)
-+		return;
-+
-+	do_each_thread(t, p) {
-+		if (!p->mm)
-+			p->zerobound = false;
-+		if (p->zerobound) {
-+			unbound++;
-+			cpumask_set_cpu(src_cpu, &p->cpus_mask);
-+			/* Once every CPU affinity has been re-enabled, remove
-+			 * the zerobound flag */
-+			if (cpumask_subset(cpu_possible_mask, p->cpus_ptr)) {
-+				p->zerobound = false;
-+				zerobound++;
-+			}
-+		}
-+	} while_each_thread(t, p);
-+
-+	if (unbound) {
-+		printk(KERN_INFO "MuQSS added affinity for %d processes to cpu %d\n",
-+		       unbound, src_cpu);
-+	}
-+	if (zerobound) {
-+		printk(KERN_INFO "MuQSS released forced binding to cpu0 for %d processes\n",
-+		       zerobound);
-+	}
-+}
-+
-+/*
-+ * Ensure that the idle task is using init_mm right before its cpu goes
-+ * offline.
-+ */
-+void idle_task_exit(void)
-+{
-+	struct mm_struct *mm = current->active_mm;
-+
-+	BUG_ON(cpu_online(smp_processor_id()));
-+	BUG_ON(current != this_rq()->idle);
-+
-+	if (mm != &init_mm) {
-+		switch_mm(mm, &init_mm, current);
-+		finish_arch_post_lock_switch();
-+	}
-+
-+	/* finish_cpu(), as ran on the BP, will clean up the active_mm state */
-+}
-+#else /* CONFIG_HOTPLUG_CPU */
-+static void unbind_zero(int src_cpu) {}
-+#endif /* CONFIG_HOTPLUG_CPU */
-+
-+void sched_set_stop_task(int cpu, struct task_struct *stop)
-+{
-+	struct sched_param stop_param = { .sched_priority = STOP_PRIO };
-+	struct sched_param start_param = { .sched_priority = 0 };
-+	struct task_struct *old_stop = cpu_rq(cpu)->stop;
-+
-+	if (stop) {
-+		/*
-+		 * Make it appear like a SCHED_FIFO task, its something
-+		 * userspace knows about and won't get confused about.
-+		 *
-+		 * Also, it will make PI more or less work without too
-+		 * much confusion -- but then, stop work should not
-+		 * rely on PI working anyway.
-+		 */
-+		sched_setscheduler_nocheck(stop, SCHED_FIFO, &stop_param);
-+	}
-+
-+	cpu_rq(cpu)->stop = stop;
-+
-+	if (old_stop) {
-+		/*
-+		 * Reset it back to a normal scheduling policy so that
-+		 * it can die in pieces.
-+		 */
-+		sched_setscheduler_nocheck(old_stop, SCHED_NORMAL, &start_param);
-+	}
-+}
-+
-+#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
-+
-+static struct ctl_table sd_ctl_dir[] = {
-+	{
-+		.procname	= "sched_domain",
-+		.mode		= 0555,
-+	},
-+	{}
-+};
-+
-+static struct ctl_table sd_ctl_root[] = {
-+	{
-+		.procname	= "kernel",
-+		.mode		= 0555,
-+		.child		= sd_ctl_dir,
-+	},
-+	{}
-+};
-+
-+static struct ctl_table *sd_alloc_ctl_entry(int n)
-+{
-+	struct ctl_table *entry =
-+		kcalloc(n, sizeof(struct ctl_table), GFP_KERNEL);
-+
-+	return entry;
-+}
-+
-+static void sd_free_ctl_entry(struct ctl_table **tablep)
-+{
-+	struct ctl_table *entry;
-+
-+	/*
-+	 * In the intermediate directories, both the child directory and
-+	 * procname are dynamically allocated and could fail but the mode
-+	 * will always be set. In the lowest directory the names are
-+	 * static strings and all have proc handlers.
-+	 */
-+	for (entry = *tablep; entry->mode; entry++) {
-+		if (entry->child)
-+			sd_free_ctl_entry(&entry->child);
-+		if (entry->proc_handler == NULL)
-+			kfree(entry->procname);
-+	}
-+
-+	kfree(*tablep);
-+	*tablep = NULL;
-+}
-+
-+static void
-+set_table_entry(struct ctl_table *entry,
-+		const char *procname, void *data, int maxlen,
-+		umode_t mode, proc_handler *proc_handler)
-+{
-+	entry->procname = procname;
-+	entry->data = data;
-+	entry->maxlen = maxlen;
-+	entry->mode = mode;
-+	entry->proc_handler = proc_handler;
-+}
-+
-+static struct ctl_table *
-+sd_alloc_ctl_domain_table(struct sched_domain *sd)
-+{
-+	struct ctl_table *table = sd_alloc_ctl_entry(9);
-+
-+	if (table == NULL)
-+		return NULL;
-+
-+	set_table_entry(&table[0], "min_interval",	  &sd->min_interval,	    sizeof(long), 0644, proc_doulongvec_minmax);
-+	set_table_entry(&table[1], "max_interval",	  &sd->max_interval,	    sizeof(long), 0644, proc_doulongvec_minmax);
-+	set_table_entry(&table[2], "busy_factor",	  &sd->busy_factor,	    sizeof(int),  0644, proc_dointvec_minmax);
-+	set_table_entry(&table[3], "imbalance_pct",	  &sd->imbalance_pct,	    sizeof(int),  0644, proc_dointvec_minmax);
-+	set_table_entry(&table[4], "cache_nice_tries",	  &sd->cache_nice_tries,    sizeof(int),  0644, proc_dointvec_minmax);
-+	set_table_entry(&table[5], "flags",		  &sd->flags,		    sizeof(int),  0644, proc_dointvec_minmax);
-+	set_table_entry(&table[6], "max_newidle_lb_cost", &sd->max_newidle_lb_cost, sizeof(long), 0644, proc_doulongvec_minmax);
-+	set_table_entry(&table[7], "name",		  sd->name,	       CORENAME_MAX_SIZE, 0444, proc_dostring);
-+	/* &table[8] is terminator */
-+
-+	return table;
-+}
-+
-+static struct ctl_table *sd_alloc_ctl_cpu_table(int cpu)
-+{
-+	struct ctl_table *entry, *table;
-+	struct sched_domain *sd;
-+	int domain_num = 0, i;
-+	char buf[32];
-+
-+	for_each_domain(cpu, sd)
-+		domain_num++;
-+	entry = table = sd_alloc_ctl_entry(domain_num + 1);
-+	if (table == NULL)
-+		return NULL;
-+
-+	i = 0;
-+	for_each_domain(cpu, sd) {
-+		snprintf(buf, 32, "domain%d", i);
-+		entry->procname = kstrdup(buf, GFP_KERNEL);
-+		entry->mode = 0555;
-+		entry->child = sd_alloc_ctl_domain_table(sd);
-+		entry++;
-+		i++;
-+	}
-+	return table;
-+}
-+
-+static cpumask_var_t sd_sysctl_cpus;
-+static struct ctl_table_header *sd_sysctl_header;
-+
-+void register_sched_domain_sysctl(void)
-+{
-+	static struct ctl_table *cpu_entries;
-+	static struct ctl_table **cpu_idx;
-+	char buf[32];
-+	int i;
-+
-+	if (!cpu_entries) {
-+		cpu_entries = sd_alloc_ctl_entry(num_possible_cpus() + 1);
-+		if (!cpu_entries)
-+			return;
-+
-+		WARN_ON(sd_ctl_dir[0].child);
-+		sd_ctl_dir[0].child = cpu_entries;
-+	}
-+
-+	if (!cpu_idx) {
-+		struct ctl_table *e = cpu_entries;
-+
-+		cpu_idx = kcalloc(nr_cpu_ids, sizeof(struct ctl_table*), GFP_KERNEL);
-+		if (!cpu_idx)
-+			return;
-+
-+		/* deal with sparse possible map */
-+		for_each_possible_cpu(i) {
-+			cpu_idx[i] = e;
-+			e++;
-+		}
-+	}
-+
-+	if (!cpumask_available(sd_sysctl_cpus)) {
-+		if (!alloc_cpumask_var(&sd_sysctl_cpus, GFP_KERNEL))
-+			return;
-+
-+		/* init to possible to not have holes in @cpu_entries */
-+		cpumask_copy(sd_sysctl_cpus, cpu_possible_mask);
-+	}
-+
-+	for_each_cpu(i, sd_sysctl_cpus) {
-+		struct ctl_table *e = cpu_idx[i];
-+
-+		if (e->child)
-+			sd_free_ctl_entry(&e->child);
-+
-+		if (!e->procname) {
-+			snprintf(buf, 32, "cpu%d", i);
-+			e->procname = kstrdup(buf, GFP_KERNEL);
-+		}
-+		e->mode = 0555;
-+		e->child = sd_alloc_ctl_cpu_table(i);
-+
-+		__cpumask_clear_cpu(i, sd_sysctl_cpus);
-+	}
-+
-+	WARN_ON(sd_sysctl_header);
-+	sd_sysctl_header = register_sysctl_table(sd_ctl_root);
-+}
-+
-+void dirty_sched_domain_sysctl(int cpu)
-+{
-+	if (cpumask_available(sd_sysctl_cpus))
-+		__cpumask_set_cpu(cpu, sd_sysctl_cpus);
-+}
-+
-+/* may be called multiple times per register */
-+void unregister_sched_domain_sysctl(void)
-+{
-+	unregister_sysctl_table(sd_sysctl_header);
-+	sd_sysctl_header = NULL;
-+}
-+#endif /* CONFIG_SYSCTL */
-+
-+void set_rq_online(struct rq *rq)
-+{
-+	if (!rq->online) {
-+		cpumask_set_cpu(cpu_of(rq), rq->rd->online);
-+		rq->online = true;
-+	}
-+}
-+
-+void set_rq_offline(struct rq *rq)
-+{
-+	if (rq->online) {
-+		int cpu = cpu_of(rq);
-+
-+		cpumask_clear_cpu(cpu, rq->rd->online);
-+		rq->online = false;
-+		clear_cpuidle_map(cpu);
-+	}
-+}
-+
-+/*
-+ * used to mark begin/end of suspend/resume:
-+ */
-+static int num_cpus_frozen;
-+
-+/*
-+ * Update cpusets according to cpu_active mask.  If cpusets are
-+ * disabled, cpuset_update_active_cpus() becomes a simple wrapper
-+ * around partition_sched_domains().
-+ *
-+ * If we come here as part of a suspend/resume, don't touch cpusets because we
-+ * want to restore it back to its original state upon resume anyway.
-+ */
-+static void cpuset_cpu_active(void)
-+{
-+	if (cpuhp_tasks_frozen) {
-+		/*
-+		 * num_cpus_frozen tracks how many CPUs are involved in suspend
-+		 * resume sequence. As long as this is not the last online
-+		 * operation in the resume sequence, just build a single sched
-+		 * domain, ignoring cpusets.
-+		 */
-+		partition_sched_domains(1, NULL, NULL);
-+		if (--num_cpus_frozen)
-+			return;
-+		/*
-+		 * This is the last CPU online operation. So fall through and
-+		 * restore the original sched domains by considering the
-+		 * cpuset configurations.
-+		 */
-+		cpuset_force_rebuild();
-+	}
-+
-+	cpuset_update_active_cpus();
-+}
-+
-+static int cpuset_cpu_inactive(unsigned int cpu)
-+{
-+	if (!cpuhp_tasks_frozen) {
-+		cpuset_update_active_cpus();
-+	} else {
-+		num_cpus_frozen++;
-+		partition_sched_domains(1, NULL, NULL);
-+	}
-+	return 0;
-+}
-+
-+int sched_cpu_activate(unsigned int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	struct rq_flags rf;
-+
-+#ifdef CONFIG_SCHED_SMT
-+	/*
-+	 * When going up, increment the number of cores with SMT present.
-+	 */
-+	if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
-+		static_branch_inc_cpuslocked(&sched_smt_present);
-+#endif
-+	set_cpu_active(cpu, true);
-+
-+	if (sched_smp_initialized) {
-+		sched_domains_numa_masks_set(cpu);
-+		cpuset_cpu_active();
-+	}
-+
-+	/*
-+	 * Put the rq online, if not already. This happens:
-+	 *
-+	 * 1) In the early boot process, because we build the real domains
-+	 *    after all CPUs have been brought up.
-+	 *
-+	 * 2) At runtime, if cpuset_cpu_active() fails to rebuild the
-+	 *    domains.
-+	 */
-+	rq_lock_irqsave(rq, &rf);
-+	if (rq->rd) {
-+		BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
-+		set_rq_online(rq);
-+	}
-+	unbind_zero(cpu);
-+	rq_unlock_irqrestore(rq, &rf);
-+
-+	return 0;
-+}
-+
-+int sched_cpu_deactivate(unsigned int cpu)
-+{
-+	int ret;
-+
-+	set_cpu_active(cpu, false);
-+	/*
-+	 * We've cleared cpu_active_mask, wait for all preempt-disabled and RCU
-+	 * users of this state to go away such that all new such users will
-+	 * observe it.
-+	 *
-+	 * Do sync before park smpboot threads to take care the rcu boost case.
-+	 */
-+	synchronize_rcu();
-+
-+#ifdef CONFIG_SCHED_SMT
-+	/*
-+	 * When going down, decrement the number of cores with SMT present.
-+	 */
-+	if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
-+		static_branch_dec_cpuslocked(&sched_smt_present);
-+#endif
-+
-+	if (!sched_smp_initialized)
-+		return 0;
-+
-+	ret = cpuset_cpu_inactive(cpu);
-+	if (ret) {
-+		set_cpu_active(cpu, true);
-+		return ret;
-+	}
-+	sched_domains_numa_masks_clear(cpu);
-+	return 0;
-+}
-+
-+int sched_cpu_starting(unsigned int cpu)
-+{
-+	sched_tick_start(cpu);
-+	return 0;
-+}
-+
-+#ifdef CONFIG_HOTPLUG_CPU
-+int sched_cpu_dying(unsigned int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	/* Handle pending wakeups and then migrate everything off */
-+	sched_tick_stop(cpu);
-+
-+	local_irq_save(flags);
-+	double_rq_lock(rq, cpu_rq(0));
-+	if (rq->rd) {
-+		BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
-+		set_rq_offline(rq);
-+	}
-+	bind_zero(cpu);
-+	double_rq_unlock(rq, cpu_rq(0));
-+	sched_start_tick(rq, cpu);
-+	hrexpiry_clear(rq);
-+	local_irq_restore(flags);
-+
-+	return 0;
-+}
-+#endif
-+
-+#if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_MC)
-+/*
-+ * Cheaper version of the below functions in case support for SMT and MC is
-+ * compiled in but CPUs have no siblings.
-+ */
-+static bool sole_cpu_idle(struct rq *rq)
-+{
-+	return rq_idle(rq);
-+}
-+#endif
-+#ifdef CONFIG_SCHED_SMT
-+static const cpumask_t *thread_cpumask(int cpu)
-+{
-+	return topology_sibling_cpumask(cpu);
-+}
-+/* All this CPU's SMT siblings are idle */
-+static bool siblings_cpu_idle(struct rq *rq)
-+{
-+	return cpumask_subset(&rq->thread_mask, &cpu_idle_map);
-+}
-+#endif
-+#ifdef CONFIG_SCHED_MC
-+static const cpumask_t *core_cpumask(int cpu)
-+{
-+	return topology_core_cpumask(cpu);
-+}
-+/* All this CPU's shared cache siblings are idle */
-+static bool cache_cpu_idle(struct rq *rq)
-+{
-+	return cpumask_subset(&rq->core_mask, &cpu_idle_map);
-+}
-+/* MC siblings CPU mask which share the same LLC */
-+static const cpumask_t *llc_core_cpumask(int cpu)
-+{
-+#ifdef CONFIG_X86
-+	return per_cpu(cpu_llc_shared_map, cpu);
-+#else
-+	return topology_core_cpumask(cpu);
-+#endif
-+}
-+#endif
-+
-+enum sched_domain_level {
-+	SD_LV_NONE = 0,
-+	SD_LV_SIBLING,
-+	SD_LV_MC,
-+	SD_LV_BOOK,
-+	SD_LV_CPU,
-+	SD_LV_NODE,
-+	SD_LV_ALLNODES,
-+	SD_LV_MAX
-+};
-+
-+/*
-+ * Set up the relative cache distance of each online cpu from each
-+ * other in a simple array for quick lookup. Locality is determined
-+ * by the closest sched_domain that CPUs are separated by. CPUs with
-+ * shared cache in SMT and MC are treated as local. Separate CPUs
-+ * (within the same package or physically) within the same node are
-+ * treated as not local. CPUs not even in the same domain (different
-+ * nodes) are treated as very distant.
-+ */
-+static void __init select_leaders(void)
-+{
-+	struct rq *rq, *other_rq, *leader;
-+	struct sched_domain *sd;
-+	int cpu, other_cpu;
-+#ifdef CONFIG_SCHED_SMT
-+	bool smt_threads = false;
-+#endif
-+
-+	for (cpu = 0; cpu < num_online_cpus(); cpu++) {
-+		rq = cpu_rq(cpu);
-+		leader = NULL;
-+		/* First check if this cpu is in the same node */
-+		for_each_domain(cpu, sd) {
-+			if (sd->level > SD_LV_MC)
-+				continue;
-+			if (rqshare != RQSHARE_ALL)
-+				leader = NULL;
-+			/* Set locality to local node if not already found lower */
-+			for_each_cpu(other_cpu, sched_domain_span(sd)) {
-+				if (rqshare >= RQSHARE_SMP) {
-+					other_rq = cpu_rq(other_cpu);
-+
-+					/* Set the smp_leader to the first CPU */
-+					if (!leader)
-+						leader = rq;
-+					if (!other_rq->smp_leader)
-+						other_rq->smp_leader = leader;
-+				}
-+				if (rq->cpu_locality[other_cpu] > LOCALITY_SMP)
-+					rq->cpu_locality[other_cpu] = LOCALITY_SMP;
-+			}
-+		}
-+
-+		/*
-+		 * Each runqueue has its own function in case it doesn't have
-+		 * siblings of its own allowing mixed topologies.
-+		 */
-+#ifdef CONFIG_SCHED_MC
-+		leader = NULL;
-+		if (cpumask_weight(core_cpumask(cpu)) > 1) {
-+			cpumask_copy(&rq->core_mask, llc_core_cpumask(cpu));
-+			cpumask_clear_cpu(cpu, &rq->core_mask);
-+			for_each_cpu(other_cpu, core_cpumask(cpu)) {
-+				if (rqshare == RQSHARE_MC ||
-+					(rqshare == RQSHARE_MC_LLC && cpumask_test_cpu(other_cpu, llc_core_cpumask(cpu)))) {
-+					other_rq = cpu_rq(other_cpu);
-+
-+					/* Set the mc_leader to the first CPU */
-+					if (!leader)
-+						leader = rq;
-+					if (!other_rq->mc_leader)
-+						other_rq->mc_leader = leader;
-+				}
-+				if (rq->cpu_locality[other_cpu] > LOCALITY_MC) {
-+					/* this is to get LLC into play even in case LLC sharing is not used */
-+					if (cpumask_test_cpu(other_cpu, llc_core_cpumask(cpu)))
-+						rq->cpu_locality[other_cpu] = LOCALITY_MC_LLC;
-+					else
-+						rq->cpu_locality[other_cpu] = LOCALITY_MC;
-+				}
-+			}
-+			rq->cache_idle = cache_cpu_idle;
-+		}
-+#endif
-+#ifdef CONFIG_SCHED_SMT
-+		leader = NULL;
-+		if (cpumask_weight(thread_cpumask(cpu)) > 1) {
-+			cpumask_copy(&rq->thread_mask, thread_cpumask(cpu));
-+			cpumask_clear_cpu(cpu, &rq->thread_mask);
-+			for_each_cpu(other_cpu, thread_cpumask(cpu)) {
-+				if (rqshare == RQSHARE_SMT) {
-+					other_rq = cpu_rq(other_cpu);
-+
-+					/* Set the smt_leader to the first CPU */
-+					if (!leader)
-+						leader = rq;
-+					if (!other_rq->smt_leader)
-+						other_rq->smt_leader = leader;
-+				}
-+				if (rq->cpu_locality[other_cpu] > LOCALITY_SMT)
-+					rq->cpu_locality[other_cpu] = LOCALITY_SMT;
-+			}
-+			rq->siblings_idle = siblings_cpu_idle;
-+			smt_threads = true;
-+		}
-+#endif
-+	}
-+
-+#ifdef CONFIG_SMT_NICE
-+	if (smt_threads) {
-+		check_siblings = &check_smt_siblings;
-+		wake_siblings = &wake_smt_siblings;
-+		smt_schedule = &smt_should_schedule;
-+	}
-+#endif
-+
-+	for_each_online_cpu(cpu) {
-+		rq = cpu_rq(cpu);
-+		for_each_online_cpu(other_cpu) {
-+			printk(KERN_DEBUG "MuQSS locality CPU %d to %d: %d\n", cpu, other_cpu, rq->cpu_locality[other_cpu]);
-+		}
-+	}
-+}
-+
-+/* FIXME freeing locked spinlock */
-+static void __init share_and_free_rq(struct rq *leader, struct rq *rq)
-+{
-+	WARN_ON(rq->nr_running > 0);
-+
-+	kfree(rq->node);
-+	kfree(rq->sl);
-+	kfree(rq->lock);
-+	rq->node = leader->node;
-+	rq->sl = leader->sl;
-+	rq->lock = leader->lock;
-+	rq->is_leader = false;
-+	barrier();
-+	/* To make up for not unlocking the freed runlock */
-+	preempt_enable();
-+}
-+
-+static void __init share_rqs(void)
-+{
-+	struct rq *rq, *leader;
-+	int cpu;
-+
-+	for_each_online_cpu(cpu) {
-+		rq = cpu_rq(cpu);
-+		leader = rq->smp_leader;
-+
-+		rq_lock(rq);
-+		if (leader && rq != leader) {
-+			printk(KERN_INFO "MuQSS sharing SMP runqueue from CPU %d to CPU %d\n",
-+			       leader->cpu, rq->cpu);
-+			share_and_free_rq(leader, rq);
-+		} else
-+			rq_unlock(rq);
-+	}
-+
-+#ifdef CONFIG_SCHED_MC
-+	for_each_online_cpu(cpu) {
-+		rq = cpu_rq(cpu);
-+		leader = rq->mc_leader;
-+
-+		rq_lock(rq);
-+		if (leader && rq != leader) {
-+			printk(KERN_INFO "MuQSS sharing MC runqueue from CPU %d to CPU %d\n",
-+			       leader->cpu, rq->cpu);
-+			share_and_free_rq(leader, rq);
-+		} else
-+			rq_unlock(rq);
-+	}
-+#endif /* CONFIG_SCHED_MC */
-+
-+#ifdef CONFIG_SCHED_SMT
-+	for_each_online_cpu(cpu) {
-+		rq = cpu_rq(cpu);
-+		leader = rq->smt_leader;
-+
-+		rq_lock(rq);
-+		if (leader && rq != leader) {
-+			printk(KERN_INFO "MuQSS sharing SMT runqueue from CPU %d to CPU %d\n",
-+			       leader->cpu, rq->cpu);
-+			share_and_free_rq(leader, rq);
-+		} else
-+			rq_unlock(rq);
-+	}
-+#endif /* CONFIG_SCHED_SMT */
-+}
-+
-+static void __init setup_rq_orders(void)
-+{
-+	int *selected_cpus, *ordered_cpus;
-+	struct rq *rq, *other_rq;
-+	int cpu, other_cpu, i;
-+
-+	selected_cpus = kmalloc(sizeof(int) * NR_CPUS, GFP_ATOMIC);
-+	ordered_cpus = kmalloc(sizeof(int) * NR_CPUS, GFP_ATOMIC);
-+
-+	total_runqueues = 0;
-+	for_each_online_cpu(cpu) {
-+		int locality, total_rqs = 0, total_cpus = 0;
-+
-+		rq = cpu_rq(cpu);
-+		if (rq->is_leader)
-+			total_runqueues++;
-+
-+		for (locality = LOCALITY_SAME; locality <= LOCALITY_DISTANT; locality++) {
-+			int selected_cpu_cnt, selected_cpu_idx, test_cpu_idx, cpu_idx, best_locality, test_cpu;
-+			int ordered_cpus_idx;
-+
-+			ordered_cpus_idx = -1;
-+			selected_cpu_cnt = 0;
-+
-+			for_each_online_cpu(test_cpu) {
-+				if (cpu < num_online_cpus() / 2)
-+					other_cpu = cpu + test_cpu;
-+				else
-+					other_cpu = cpu - test_cpu;
-+				if (other_cpu < 0)
-+					other_cpu += num_online_cpus();
-+				else
-+					other_cpu %= num_online_cpus();
-+				/* gather CPUs of the same locality */
-+				if (rq->cpu_locality[other_cpu] == locality) {
-+					selected_cpus[selected_cpu_cnt] = other_cpu;
-+					selected_cpu_cnt++;
-+				}
-+			}
-+
-+			/* reserve first CPU as starting point */
-+			if (selected_cpu_cnt > 0) {
-+				ordered_cpus_idx++;
-+				ordered_cpus[ordered_cpus_idx] = selected_cpus[ordered_cpus_idx];
-+				selected_cpus[ordered_cpus_idx] = -1;
-+			}
-+
-+			/* take each CPU and sort it within the same locality based on each inter-CPU localities */
-+			for (test_cpu_idx = 1; test_cpu_idx < selected_cpu_cnt; test_cpu_idx++) {
-+				/* starting point with worst locality and current CPU */
-+				best_locality = LOCALITY_DISTANT;
-+				selected_cpu_idx = test_cpu_idx;
-+
-+				/* try to find the best locality within group */
-+				for (cpu_idx = 1; cpu_idx < selected_cpu_cnt; cpu_idx++) {
-+					/* if CPU has not been used and locality is better */
-+					if (selected_cpus[cpu_idx] > -1) {
-+						other_rq = cpu_rq(ordered_cpus[ordered_cpus_idx]);
-+						if (best_locality > other_rq->cpu_locality[selected_cpus[cpu_idx]]) {
-+							/* assign best locality and best CPU idx in array */
-+							best_locality = other_rq->cpu_locality[selected_cpus[cpu_idx]];
-+							selected_cpu_idx = cpu_idx;
-+						}
-+					}
-+				}
-+
-+				/* add our next best CPU to ordered list */
-+				ordered_cpus_idx++;
-+				ordered_cpus[ordered_cpus_idx] = selected_cpus[selected_cpu_idx];
-+				/* mark this CPU as used */
-+				selected_cpus[selected_cpu_idx] =  -1;
-+			}
-+
-+			/* set up RQ and CPU orders */
-+			for (test_cpu = 0; test_cpu <= ordered_cpus_idx; test_cpu++) {
-+				other_rq = cpu_rq(ordered_cpus[test_cpu]);
-+				/* set up cpu orders */
-+				rq->cpu_order[total_cpus++] = other_rq;
-+				if (other_rq->is_leader) {
-+					/* set up RQ orders */
-+					rq->rq_order[total_rqs++] = other_rq;
-+				}
-+			}
-+		}
-+	}
-+
-+	kfree(selected_cpus);
-+	kfree(ordered_cpus);
-+
-+#ifdef CONFIG_X86
-+	for_each_online_cpu(cpu) {
-+		rq = cpu_rq(cpu);
-+		for (i = 0; i < total_runqueues; i++) {
-+			printk(KERN_DEBUG "MuQSS CPU %d llc %d RQ order %d RQ %d llc %d\n", cpu, per_cpu(cpu_llc_id, cpu), i,
-+			       rq->rq_order[i]->cpu, per_cpu(cpu_llc_id, rq->rq_order[i]->cpu));
-+		}
-+	}
-+
-+	for_each_online_cpu(cpu) {
-+		rq = cpu_rq(cpu);
-+		for (i = 0; i < num_online_cpus(); i++) {
-+			printk(KERN_DEBUG "MuQSS CPU %d llc %d CPU order %d RQ %d llc %d\n", cpu, per_cpu(cpu_llc_id, cpu), i,
-+			       rq->cpu_order[i]->cpu, per_cpu(cpu_llc_id, rq->cpu_order[i]->cpu));
-+		}
-+	}
-+#endif
-+}
-+
-+void __init sched_init_smp(void)
-+{
-+	sched_init_numa();
-+
-+	/*
-+	 * There's no userspace yet to cause hotplug operations; hence all the
-+	 * cpu masks are stable and all blatant races in the below code cannot
-+	 * happen.
-+	 */
-+	mutex_lock(&sched_domains_mutex);
-+	sched_init_domains(cpu_active_mask);
-+	mutex_unlock(&sched_domains_mutex);
-+
-+	/* Move init over to a non-isolated CPU */
-+	if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0)
-+		BUG();
-+
-+	local_irq_disable();
-+	mutex_lock(&sched_domains_mutex);
-+	lock_all_rqs();
-+
-+	printk(KERN_INFO "MuQSS possible/present/online CPUs: %d/%d/%d\n",
-+		num_possible_cpus(), num_present_cpus(), num_online_cpus());
-+
-+	select_leaders();
-+
-+	unlock_all_rqs();
-+	mutex_unlock(&sched_domains_mutex);
-+
-+	share_rqs();
-+
-+	local_irq_enable();
-+
-+	setup_rq_orders();
-+
-+	switch (rqshare) {
-+		case RQSHARE_ALL:
-+			/* This should only ever read 1 */
-+			printk(KERN_INFO "MuQSS runqueue share type ALL total runqueues: %d\n",
-+			       total_runqueues);
-+			break;
-+		case RQSHARE_SMP:
-+			printk(KERN_INFO "MuQSS runqueue share type SMP total runqueues: %d\n",
-+			       total_runqueues);
-+			break;
-+		case RQSHARE_MC:
-+			printk(KERN_INFO "MuQSS runqueue share type MC total runqueues: %d\n",
-+			       total_runqueues);
-+			break;
-+		case RQSHARE_MC_LLC:
-+			printk(KERN_INFO "MuQSS runqueue share type LLC total runqueues: %d\n",
-+			       total_runqueues);
-+			break;
-+		case RQSHARE_SMT:
-+			printk(KERN_INFO "MuQSS runqueue share type SMT total runqueues: %d\n",
-+			       total_runqueues);
-+			break;
-+		case RQSHARE_NONE:
-+			printk(KERN_INFO "MuQSS runqueue share type NONE total runqueues: %d\n",
-+			       total_runqueues);
-+			break;
-+	}
-+
-+	sched_smp_initialized = true;
-+}
-+#else
-+void __init sched_init_smp(void)
-+{
-+	sched_smp_initialized = true;
-+}
-+#endif /* CONFIG_SMP */
-+
-+int in_sched_functions(unsigned long addr)
-+{
-+	return in_lock_functions(addr) ||
-+		(addr >= (unsigned long)__sched_text_start
-+		&& addr < (unsigned long)__sched_text_end);
-+}
-+
-+#ifdef CONFIG_CGROUP_SCHED
-+/* task group related information */
-+struct task_group {
-+	struct cgroup_subsys_state css;
-+
-+	struct rcu_head rcu;
-+	struct list_head list;
-+
-+	struct task_group *parent;
-+	struct list_head siblings;
-+	struct list_head children;
-+};
-+
-+/*
-+ * Default task group.
-+ * Every task in system belongs to this group at bootup.
-+ */
-+struct task_group root_task_group;
-+LIST_HEAD(task_groups);
-+
-+/* Cacheline aligned slab cache for task_group */
-+static struct kmem_cache *task_group_cache __read_mostly;
-+#endif /* CONFIG_CGROUP_SCHED */
-+
-+void __init sched_init(void)
-+{
-+#ifdef CONFIG_SMP
-+	int cpu_ids;
-+#endif
-+	int i;
-+	struct rq *rq;
-+
-+	wait_bit_init();
-+
-+	prio_ratios[0] = 128;
-+	for (i = 1 ; i < NICE_WIDTH ; i++)
-+		prio_ratios[i] = prio_ratios[i - 1] * 11 / 10;
-+
-+	skiplist_node_init(&init_task.node);
-+
-+#ifdef CONFIG_SMP
-+	init_defrootdomain();
-+	cpumask_clear(&cpu_idle_map);
-+#else
-+	uprq = &per_cpu(runqueues, 0);
-+#endif
-+
-+#ifdef CONFIG_CGROUP_SCHED
-+	task_group_cache = KMEM_CACHE(task_group, 0);
-+
-+	list_add(&root_task_group.list, &task_groups);
-+	INIT_LIST_HEAD(&root_task_group.children);
-+	INIT_LIST_HEAD(&root_task_group.siblings);
-+#endif /* CONFIG_CGROUP_SCHED */
-+	for_each_possible_cpu(i) {
-+		rq = cpu_rq(i);
-+		rq->node = kmalloc(sizeof(skiplist_node), GFP_ATOMIC);
-+		skiplist_init(rq->node);
-+		rq->sl = new_skiplist(rq->node);
-+		rq->lock = kmalloc(sizeof(raw_spinlock_t), GFP_ATOMIC);
-+		raw_spin_lock_init(rq->lock);
-+		rq->nr_running = 0;
-+		rq->nr_uninterruptible = 0;
-+		rq->nr_switches = 0;
-+		rq->clock = rq->old_clock = rq->last_niffy = rq->niffies = 0;
-+		rq->last_jiffy = jiffies;
-+		rq->user_ns = rq->nice_ns = rq->softirq_ns = rq->system_ns =
-+			      rq->iowait_ns = rq->idle_ns = 0;
-+		rq->dither = 0;
-+		set_rq_task(rq, &init_task);
-+		rq->iso_ticks = 0;
-+		rq->iso_refractory = false;
-+#ifdef CONFIG_SMP
-+		rq->is_leader = true;
-+		rq->smp_leader = NULL;
-+#ifdef CONFIG_SCHED_MC
-+		rq->mc_leader = NULL;
-+#endif
-+#ifdef CONFIG_SCHED_SMT
-+		rq->smt_leader = NULL;
-+#endif
-+		rq->sd = NULL;
-+		rq->rd = NULL;
-+		rq->online = false;
-+		rq->cpu = i;
-+		rq_attach_root(rq, &def_root_domain);
-+#endif
-+		init_rq_hrexpiry(rq);
-+		atomic_set(&rq->nr_iowait, 0);
-+	}
-+
-+#ifdef CONFIG_SMP
-+	cpu_ids = i;
-+	/*
-+	 * Set the base locality for cpu cache distance calculation to
-+	 * "distant" (3). Make sure the distance from a CPU to itself is 0.
-+	 */
-+	for_each_possible_cpu(i) {
-+		int j;
-+
-+		rq = cpu_rq(i);
-+#ifdef CONFIG_SCHED_SMT
-+		rq->siblings_idle = sole_cpu_idle;
-+#endif
-+#ifdef CONFIG_SCHED_MC
-+		rq->cache_idle = sole_cpu_idle;
-+#endif
-+		rq->cpu_locality = kmalloc(cpu_ids * sizeof(int *), GFP_ATOMIC);
-+		for_each_possible_cpu(j) {
-+			if (i == j)
-+				rq->cpu_locality[j] = LOCALITY_SAME;
-+			else
-+				rq->cpu_locality[j] = LOCALITY_DISTANT;
-+		}
-+		rq->rq_order = kmalloc(cpu_ids * sizeof(struct rq *), GFP_ATOMIC);
-+		rq->cpu_order = kmalloc(cpu_ids * sizeof(struct rq *), GFP_ATOMIC);
-+		rq->rq_order[0] = rq->cpu_order[0] = rq;
-+		for (j = 1; j < cpu_ids; j++)
-+			rq->rq_order[j] = rq->cpu_order[j] = cpu_rq(j);
-+	}
-+#endif
-+
-+	/*
-+	 * The boot idle thread does lazy MMU switching as well:
-+	 */
-+	mmgrab(&init_mm);
-+	enter_lazy_tlb(&init_mm, current);
-+
-+	/*
-+	 * Make us the idle thread. Technically, schedule() should not be
-+	 * called from this thread, however somewhere below it might be,
-+	 * but because we are the idle thread, we just pick up running again
-+	 * when this runqueue becomes "idle".
-+	 */
-+	init_idle(current, smp_processor_id());
-+
-+#ifdef CONFIG_SMP
-+	idle_thread_set_boot_cpu();
-+#endif /* SMP */
-+
-+	init_schedstats();
-+
-+	psi_init();
-+}
-+
-+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-+static inline int preempt_count_equals(int preempt_offset)
-+{
-+	int nested = preempt_count() + rcu_preempt_depth();
-+
-+	return (nested == preempt_offset);
-+}
-+
-+void __might_sleep(const char *file, int line, int preempt_offset)
-+{
-+	/*
-+	 * Blocking primitives will set (and therefore destroy) current->state,
-+	 * since we will exit with TASK_RUNNING make sure we enter with it,
-+	 * otherwise we will destroy state.
-+	 */
-+	WARN_ONCE(current->state != TASK_RUNNING && current->task_state_change,
-+			"do not call blocking ops when !TASK_RUNNING; "
-+			"state=%lx set at [<%p>] %pS\n",
-+			current->state,
-+			(void *)current->task_state_change,
-+			(void *)current->task_state_change);
-+
-+	___might_sleep(file, line, preempt_offset);
-+}
-+EXPORT_SYMBOL(__might_sleep);
-+
-+void __cant_sleep(const char *file, int line, int preempt_offset)
-+{
-+	static unsigned long prev_jiffy;
-+
-+	if (irqs_disabled())
-+		return;
-+
-+	if (!IS_ENABLED(CONFIG_PREEMPT_COUNT))
-+		return;
-+
-+	if (preempt_count() > preempt_offset)
-+		return;
-+
-+	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
-+		return;
-+	prev_jiffy = jiffies;
-+
-+	printk(KERN_ERR "BUG: assuming atomic context at %s:%d\n", file, line);
-+	printk(KERN_ERR "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
-+			in_atomic(), irqs_disabled(),
-+			current->pid, current->comm);
-+
-+	debug_show_held_locks(current);
-+	dump_stack();
-+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+}
-+EXPORT_SYMBOL_GPL(__cant_sleep);
-+
-+void ___might_sleep(const char *file, int line, int preempt_offset)
-+{
-+	/* Ratelimiting timestamp: */
-+	static unsigned long prev_jiffy;
-+
-+	unsigned long preempt_disable_ip;
-+
-+	/* WARN_ON_ONCE() by default, no rate limit required: */
-+	rcu_sleep_check();
-+
-+	if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
-+	     !is_idle_task(current) && !current->non_block_count) ||
-+	    system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING ||
-+	    oops_in_progress)
-+		return;
-+
-+	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
-+		return;
-+	prev_jiffy = jiffies;
-+
-+	/* Save this before calling printk(), since that will clobber it: */
-+	preempt_disable_ip = get_preempt_disable_ip(current);
-+
-+	printk(KERN_ERR
-+		"BUG: sleeping function called from invalid context at %s:%d\n",
-+			file, line);
-+	printk(KERN_ERR
-+		"in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n",
-+			in_atomic(), irqs_disabled(), current->non_block_count,
-+			current->pid, current->comm);
-+
-+	if (task_stack_end_corrupted(current))
-+		printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");
-+
-+	debug_show_held_locks(current);
-+	if (irqs_disabled())
-+		print_irqtrace_events(current);
-+	if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)
-+	    && !preempt_count_equals(preempt_offset)) {
-+		pr_err("Preemption disabled at:");
-+		print_ip_sym(KERN_ERR, preempt_disable_ip);
-+	}
-+	dump_stack();
-+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+}
-+EXPORT_SYMBOL(___might_sleep);
-+#endif
-+
-+#ifdef CONFIG_MAGIC_SYSRQ
-+static inline void normalise_rt_tasks(void)
-+{
-+	struct sched_attr attr = {};
-+	struct task_struct *g, *p;
-+	struct rq_flags rf;
-+	struct rq *rq;
-+
-+	read_lock(&tasklist_lock);
-+	for_each_process_thread(g, p) {
-+		/*
-+		 * Only normalize user tasks:
-+		 */
-+		if (p->flags & PF_KTHREAD)
-+			continue;
-+
-+		if (!rt_task(p) && !iso_task(p))
-+			continue;
-+
-+		rq = task_rq_lock(p, &rf);
-+		__setscheduler(p, rq, SCHED_NORMAL, 0, &attr, false);
-+		task_rq_unlock(rq, p, &rf);
-+	}
-+	read_unlock(&tasklist_lock);
-+}
-+
-+void normalize_rt_tasks(void)
-+{
-+	normalise_rt_tasks();
-+}
-+#endif /* CONFIG_MAGIC_SYSRQ */
-+
-+#if defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB)
-+/*
-+ * These functions are only useful for the IA64 MCA handling, or kdb.
-+ *
-+ * They can only be called when the whole system has been
-+ * stopped - every CPU needs to be quiescent, and no scheduling
-+ * activity can take place. Using them for anything else would
-+ * be a serious bug, and as a result, they aren't even visible
-+ * under any other configuration.
-+ */
-+
-+/**
-+ * curr_task - return the current task for a given CPU.
-+ * @cpu: the processor in question.
-+ *
-+ * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
-+ *
-+ * Return: The current task for @cpu.
-+ */
-+struct task_struct *curr_task(int cpu)
-+{
-+	return cpu_curr(cpu);
-+}
-+
-+#endif /* defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB) */
-+
-+#ifdef CONFIG_IA64
-+/**
-+ * ia64_set_curr_task - set the current task for a given CPU.
-+ * @cpu: the processor in question.
-+ * @p: the task pointer to set.
-+ *
-+ * Description: This function must only be used when non-maskable interrupts
-+ * are serviced on a separate stack.  It allows the architecture to switch the
-+ * notion of the current task on a CPU in a non-blocking manner.  This function
-+ * must be called with all CPU's synchronised, and interrupts disabled, the
-+ * and caller must save the original value of the current task (see
-+ * curr_task() above) and restore that value before reenabling interrupts and
-+ * re-starting the system.
-+ *
-+ * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
-+ */
-+void ia64_set_curr_task(int cpu, struct task_struct *p)
-+{
-+	cpu_curr(cpu) = p;
-+}
-+
-+#endif
-+
-+void init_idle_bootup_task(struct task_struct *idle)
-+{}
-+
-+#ifdef CONFIG_SCHED_DEBUG
-+__read_mostly bool sched_debug_enabled;
-+
-+void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
-+			  struct seq_file *m)
-+{
-+	seq_printf(m, "%s (%d, #threads: %d)\n", p->comm, task_pid_nr_ns(p, ns),
-+		   get_nr_threads(p));
-+}
-+
-+void proc_sched_set_task(struct task_struct *p)
-+{}
-+#endif
-+
-+#ifdef CONFIG_CGROUP_SCHED
-+static void sched_free_group(struct task_group *tg)
-+{
-+	kmem_cache_free(task_group_cache, tg);
-+}
-+
-+/* allocate runqueue etc for a new task group */
-+struct task_group *sched_create_group(struct task_group *parent)
-+{
-+	struct task_group *tg;
-+
-+	tg = kmem_cache_alloc(task_group_cache, GFP_KERNEL | __GFP_ZERO);
-+	if (!tg)
-+		return ERR_PTR(-ENOMEM);
-+
-+	return tg;
-+}
-+
-+void sched_online_group(struct task_group *tg, struct task_group *parent)
-+{
-+}
-+
-+/* rcu callback to free various structures associated with a task group */
-+static void sched_free_group_rcu(struct rcu_head *rhp)
-+{
-+	/* Now it should be safe to free those cfs_rqs */
-+	sched_free_group(container_of(rhp, struct task_group, rcu));
-+}
-+
-+void sched_destroy_group(struct task_group *tg)
-+{
-+	/* Wait for possible concurrent references to cfs_rqs complete */
-+	call_rcu(&tg->rcu, sched_free_group_rcu);
-+}
-+
-+void sched_offline_group(struct task_group *tg)
-+{
-+}
-+
-+static inline struct task_group *css_tg(struct cgroup_subsys_state *css)
-+{
-+	return css ? container_of(css, struct task_group, css) : NULL;
-+}
-+
-+static struct cgroup_subsys_state *
-+cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
-+{
-+	struct task_group *parent = css_tg(parent_css);
-+	struct task_group *tg;
-+
-+	if (!parent) {
-+		/* This is early initialization for the top cgroup */
-+		return &root_task_group.css;
-+	}
-+
-+	tg = sched_create_group(parent);
-+	if (IS_ERR(tg))
-+		return ERR_PTR(-ENOMEM);
-+	return &tg->css;
-+}
-+
-+/* Expose task group only after completing cgroup initialization */
-+static int cpu_cgroup_css_online(struct cgroup_subsys_state *css)
-+{
-+	struct task_group *tg = css_tg(css);
-+	struct task_group *parent = css_tg(css->parent);
-+
-+	if (parent)
-+		sched_online_group(tg, parent);
-+	return 0;
-+}
-+
-+static void cpu_cgroup_css_released(struct cgroup_subsys_state *css)
-+{
-+	struct task_group *tg = css_tg(css);
-+
-+	sched_offline_group(tg);
-+}
-+
-+static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
-+{
-+	struct task_group *tg = css_tg(css);
-+
-+	/*
-+	 * Relies on the RCU grace period between css_released() and this.
-+	 */
-+	sched_free_group(tg);
-+}
-+
-+static void cpu_cgroup_fork(struct task_struct *task)
-+{
-+}
-+
-+static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
-+{
-+	return 0;
-+}
-+
-+static void cpu_cgroup_attach(struct cgroup_taskset *tset)
-+{
-+}
-+
-+static struct cftype cpu_legacy_files[] = {
-+	{ }	/* Terminate */
-+};
-+
-+static struct cftype cpu_files[] = {
-+	{ }	/* terminate */
-+};
-+
-+static int cpu_extra_stat_show(struct seq_file *sf,
-+			       struct cgroup_subsys_state *css)
-+{
-+	return 0;
-+}
-+
-+struct cgroup_subsys cpu_cgrp_subsys = {
-+	.css_alloc	= cpu_cgroup_css_alloc,
-+	.css_online	= cpu_cgroup_css_online,
-+	.css_released	= cpu_cgroup_css_released,
-+	.css_free	= cpu_cgroup_css_free,
-+	.css_extra_stat_show = cpu_extra_stat_show,
-+	.fork		= cpu_cgroup_fork,
-+	.can_attach	= cpu_cgroup_can_attach,
-+	.attach		= cpu_cgroup_attach,
-+	.legacy_cftypes	= cpu_files,
-+	.legacy_cftypes	= cpu_legacy_files,
-+	.dfl_cftypes	= cpu_files,
-+	.early_init	= true,
-+	.threaded	= true,
-+};
-+#endif	/* CONFIG_CGROUP_SCHED */
-+
-+void call_trace_sched_update_nr_running(struct rq *rq, int count)
-+{
-+        trace_sched_update_nr_running_tp(rq, count);
-+}
-+
-+/* CFS Compat */
-+#ifdef CONFIG_RCU_TORTURE_TEST
-+int sysctl_sched_rt_runtime;
-+#endif
-diff --git a/kernel/sched/MuQSS.h b/kernel/sched/MuQSS.h
-new file mode 100644
-index 000000000000..09a1f2fe64ba
---- /dev/null
-+++ b/kernel/sched/MuQSS.h
-@@ -0,0 +1,1070 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef MUQSS_SCHED_H
-+#define MUQSS_SCHED_H
-+
-+#include <linux/sched/clock.h>
-+#include <linux/sched/cpufreq.h>
-+#include <linux/sched/cputime.h>
-+#include <linux/sched/deadline.h>
-+#include <linux/sched/debug.h>
-+#include <linux/sched/hotplug.h>
-+#include <linux/sched/init.h>
-+#include <linux/sched/isolation.h>
-+#include <linux/sched/mm.h>
-+#include <linux/sched/nohz.h>
-+#include <linux/sched/signal.h>
-+#include <linux/sched/smt.h>
-+#include <linux/sched/stat.h>
-+#include <linux/sched/task.h>
-+#include <linux/sched/task_stack.h>
-+#include <linux/sched/topology.h>
-+#include <linux/sched/wake_q.h>
-+
-+#include <uapi/linux/sched/types.h>
-+
-+#include <linux/cgroup.h>
-+#include <linux/cpufreq.h>
-+#include <linux/cpuidle.h>
-+#include <linux/cpuset.h>
-+#include <linux/ctype.h>
-+#include <linux/energy_model.h>
-+#include <linux/freezer.h>
-+#include <linux/kernel_stat.h>
-+#include <linux/kthread.h>
-+#include <linux/membarrier.h>
-+#include <linux/livepatch.h>
-+#include <linux/proc_fs.h>
-+#include <linux/psi.h>
-+#include <linux/sched.h>
-+#include <linux/slab.h>
-+#include <linux/skip_list.h>
-+#include <linux/stop_machine.h>
-+#include <linux/suspend.h>
-+#include <linux/swait.h>
-+#include <linux/syscalls.h>
-+#include <linux/tick.h>
-+#include <linux/tsacct_kern.h>
-+#include <linux/u64_stats_sync.h>
-+
-+#ifdef CONFIG_PARAVIRT
-+#include <asm/paravirt.h>
-+#endif
-+
-+#include "cpupri.h"
-+
-+#include <trace/events/sched.h>
-+
-+#ifdef CONFIG_SCHED_DEBUG
-+# define SCHED_WARN_ON(x)	WARN_ONCE(x, #x)
-+#else
-+# define SCHED_WARN_ON(x)	((void)(x))
-+#endif
-+
-+/*
-+ * wake flags
-+ */
-+#define WF_SYNC			0x01		/* waker goes to sleep after wakeup */
-+#define WF_FORK			0x02		/* child wakeup after fork */
-+#define WF_MIGRATED		0x04		/* internal use, task got migrated */
-+#define WF_ON_CPU		0x08		/* Wakee is on_cpu */
-+
-+/* task_struct::on_rq states: */
-+#define TASK_ON_RQ_QUEUED	1
-+#define TASK_ON_RQ_MIGRATING	2
-+
-+extern void call_trace_sched_update_nr_running(struct rq *rq, int count);
-+
-+struct rq;
-+
-+#ifdef CONFIG_SMP
-+
-+static inline bool sched_asym_prefer(int a, int b)
-+{
-+	return arch_asym_cpu_priority(a) > arch_asym_cpu_priority(b);
-+}
-+
-+struct perf_domain {
-+	struct em_perf_domain *em_pd;
-+	struct perf_domain *next;
-+	struct rcu_head rcu;
-+};
-+
-+/* Scheduling group status flags */
-+#define SG_OVERLOAD		0x1 /* More than one runnable task on a CPU. */
-+#define SG_OVERUTILIZED		0x2 /* One or more CPUs are over-utilized. */
-+
-+/*
-+ * We add the notion of a root-domain which will be used to define per-domain
-+ * variables. Each exclusive cpuset essentially defines an island domain by
-+ * fully partitioning the member cpus from any other cpuset. Whenever a new
-+ * exclusive cpuset is created, we also create and attach a new root-domain
-+ * object.
-+ *
-+ */
-+struct root_domain {
-+	atomic_t refcount;
-+	atomic_t rto_count;
-+	struct rcu_head rcu;
-+	cpumask_var_t span;
-+	cpumask_var_t online;
-+
-+	/*
-+	 * Indicate pullable load on at least one CPU, e.g:
-+	 * - More than one runnable task
-+	 * - Running task is misfit
-+	 */
-+	int			overload;
-+
-+	/* Indicate one or more cpus over-utilized (tipping point) */
-+	int			overutilized;
-+
-+	/*
-+	 * The bit corresponding to a CPU gets set here if such CPU has more
-+	 * than one runnable -deadline task (as it is below for RT tasks).
-+	 */
-+	cpumask_var_t dlo_mask;
-+	atomic_t dlo_count;
-+	/* Replace unused CFS structures with void */
-+	//struct dl_bw dl_bw;
-+	//struct cpudl cpudl;
-+	void *dl_bw;
-+	void *cpudl;
-+
-+	/*
-+	 * The "RT overload" flag: it gets set if a CPU has more than
-+	 * one runnable RT task.
-+	 */
-+	cpumask_var_t rto_mask;
-+	//struct cpupri cpupri;
-+	void *cpupri;
-+
-+	unsigned long max_cpu_capacity;
-+
-+	/*
-+	 * NULL-terminated list of performance domains intersecting with the
-+	 * CPUs of the rd. Protected by RCU.
-+	 */
-+	struct perf_domain	*pd;
-+};
-+
-+extern void init_defrootdomain(void);
-+extern int sched_init_domains(const struct cpumask *cpu_map);
-+extern void rq_attach_root(struct rq *rq, struct root_domain *rd);
-+
-+static inline void cpupri_cleanup(void __maybe_unused *cpupri)
-+{
-+}
-+
-+static inline void cpudl_cleanup(void __maybe_unused *cpudl)
-+{
-+}
-+
-+static inline void init_dl_bw(void __maybe_unused *dl_bw)
-+{
-+}
-+
-+static inline int cpudl_init(void __maybe_unused *dl_bw)
-+{
-+	return 0;
-+}
-+
-+static inline int cpupri_init(void __maybe_unused *cpupri)
-+{
-+	return 0;
-+}
-+#endif /* CONFIG_SMP */
-+
-+/*
-+ * This is the main, per-CPU runqueue data structure.
-+ * This data should only be modified by the local cpu.
-+ */
-+struct rq {
-+	raw_spinlock_t *lock;
-+	raw_spinlock_t *orig_lock;
-+
-+	struct task_struct __rcu	*curr;
-+	struct task_struct	*idle;
-+	struct task_struct	*stop;
-+	struct mm_struct *prev_mm;
-+
-+	unsigned int nr_running;
-+	/*
-+	 * This is part of a global counter where only the total sum
-+	 * over all CPUs matters. A task can increase this counter on
-+	 * one CPU and if it got migrated afterwards it may decrease
-+	 * it on another CPU. Always updated under the runqueue lock:
-+	 */
-+	unsigned long nr_uninterruptible;
-+#ifdef CONFIG_SMP
-+	unsigned int		ttwu_pending;
-+#endif
-+	u64 nr_switches;
-+
-+	/* Stored data about rq->curr to work outside rq lock */
-+	u64 rq_deadline;
-+	int rq_prio;
-+
-+	/* Best queued id for use outside lock */
-+	u64 best_key;
-+
-+	unsigned long last_scheduler_tick; /* Last jiffy this RQ ticked */
-+	unsigned long last_jiffy; /* Last jiffy this RQ updated rq clock */
-+	u64 niffies; /* Last time this RQ updated rq clock */
-+	u64 last_niffy; /* Last niffies as updated by local clock */
-+	u64 last_jiffy_niffies; /* Niffies @ last_jiffy */
-+
-+	u64 load_update; /* When we last updated load */
-+	unsigned long load_avg; /* Rolling load average */
-+#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
-+	u64 irq_load_update; /* When we last updated IRQ load */
-+	unsigned long irq_load_avg; /* Rolling IRQ load average */
-+#endif
-+#ifdef CONFIG_SMT_NICE
-+	struct mm_struct *rq_mm;
-+	int rq_smt_bias; /* Policy/nice level bias across smt siblings */
-+#endif
-+	/* Accurate timekeeping data */
-+	unsigned long user_ns, nice_ns, irq_ns, softirq_ns, system_ns,
-+		iowait_ns, idle_ns;
-+	atomic_t nr_iowait;
-+
-+#ifdef CONFIG_MEMBARRIER
-+	int membarrier_state;
-+#endif
-+
-+	skiplist_node *node;
-+	skiplist *sl;
-+#ifdef CONFIG_SMP
-+	struct task_struct *preempt; /* Preempt triggered on this task */
-+	struct task_struct *preempting; /* Hint only, what task is preempting */
-+
-+	int cpu;		/* cpu of this runqueue */
-+	bool online;
-+
-+	struct root_domain *rd;
-+	struct sched_domain *sd;
-+
-+	unsigned long cpu_capacity_orig;
-+
-+	int *cpu_locality; /* CPU relative cache distance */
-+	struct rq **rq_order; /* Shared RQs ordered by relative cache distance */
-+	struct rq **cpu_order; /* RQs of discrete CPUs ordered by distance */
-+
-+	bool is_leader;
-+	struct rq *smp_leader; /* First physical CPU per node */
-+#ifdef CONFIG_SCHED_THERMAL_PRESSURE
-+	struct sched_avg	avg_thermal;
-+#endif /* CONFIG_SCHED_THERMAL_PRESSURE */
-+#ifdef CONFIG_SCHED_SMT
-+	struct rq *smt_leader; /* First logical CPU in SMT siblings */
-+	cpumask_t thread_mask;
-+	bool (*siblings_idle)(struct rq *rq);
-+	/* See if all smt siblings are idle */
-+#endif /* CONFIG_SCHED_SMT */
-+#ifdef CONFIG_SCHED_MC
-+	struct rq *mc_leader; /* First logical CPU in MC siblings */
-+	cpumask_t core_mask;
-+	bool (*cache_idle)(struct rq *rq);
-+	/* See if all cache siblings are idle */
-+#endif /* CONFIG_SCHED_MC */
-+#endif /* CONFIG_SMP */
-+
-+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-+	u64 prev_irq_time;
-+#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
-+#ifdef CONFIG_PARAVIRT
-+	u64 prev_steal_time;
-+#endif /* CONFIG_PARAVIRT */
-+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
-+	u64 prev_steal_time_rq;
-+#endif /* CONFIG_PARAVIRT_TIME_ACCOUNTING */
-+
-+	u64 clock, old_clock, last_tick;
-+	/* Ensure that all clocks are in the same cache line */
-+	u64 clock_task ____cacheline_aligned;
-+	int dither;
-+
-+	int iso_ticks;
-+	bool iso_refractory;
-+
-+#ifdef CONFIG_HIGH_RES_TIMERS
-+	struct hrtimer hrexpiry_timer;
-+#endif
-+
-+	int rt_nr_running; /* Number real time tasks running */
-+#ifdef CONFIG_SCHEDSTATS
-+
-+	/* latency stats */
-+	struct sched_info rq_sched_info;
-+	unsigned long long rq_cpu_time;
-+	/* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
-+
-+	/* sys_sched_yield() stats */
-+	unsigned int yld_count;
-+
-+	/* schedule() stats */
-+	unsigned int sched_switch;
-+	unsigned int sched_count;
-+	unsigned int sched_goidle;
-+
-+	/* try_to_wake_up() stats */
-+	unsigned int ttwu_count;
-+	unsigned int ttwu_local;
-+#endif /* CONFIG_SCHEDSTATS */
-+
-+#ifdef CONFIG_CPU_IDLE
-+	/* Must be inspected within a rcu lock section */
-+	struct cpuidle_state *idle_state;
-+#endif
-+};
-+
-+static inline u64 __rq_clock_broken(struct rq *rq)
-+{
-+	return READ_ONCE(rq->clock);
-+}
-+
-+static inline u64 rq_clock(struct rq *rq)
-+{
-+	lockdep_assert_held(rq->lock);
-+
-+	return rq->clock;
-+}
-+
-+static inline u64 rq_clock_task(struct rq *rq)
-+{
-+	lockdep_assert_held(rq->lock);
-+
-+	return rq->clock_task;
-+}
-+
-+/**
-+ * By default the decay is the default pelt decay period.
-+ * The decay shift can change the decay period in
-+ * multiples of 32.
-+ *  Decay shift		Decay period(ms)
-+ *	0			32
-+ *	1			64
-+ *	2			128
-+ *	3			256
-+ *	4			512
-+ */
-+extern int sched_thermal_decay_shift;
-+
-+static inline u64 rq_clock_thermal(struct rq *rq)
-+{
-+	return rq_clock_task(rq) >> sched_thermal_decay_shift;
-+}
-+
-+struct rq_flags {
-+	unsigned long flags;
-+};
-+
-+#ifdef CONFIG_SMP
-+struct rq *cpu_rq(int cpu);
-+#endif
-+
-+#ifndef CONFIG_SMP
-+extern struct rq *uprq;
-+#define cpu_rq(cpu)	(uprq)
-+#define this_rq()	(uprq)
-+#define raw_rq()	(uprq)
-+#define task_rq(p)	(uprq)
-+#define cpu_curr(cpu)	((uprq)->curr)
-+#else /* CONFIG_SMP */
-+DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
-+#define this_rq()		this_cpu_ptr(&runqueues)
-+#define raw_rq()		raw_cpu_ptr(&runqueues)
-+#define task_rq(p)		cpu_rq(task_cpu(p))
-+#endif /* CONFIG_SMP */
-+
-+static inline int task_current(struct rq *rq, struct task_struct *p)
-+{
-+	return rq->curr == p;
-+}
-+
-+static inline int task_running(struct rq *rq, struct task_struct *p)
-+{
-+#ifdef CONFIG_SMP
-+	return p->on_cpu;
-+#else
-+	return task_current(rq, p);
-+#endif
-+}
-+
-+static inline int task_on_rq_queued(struct task_struct *p)
-+{
-+	return p->on_rq == TASK_ON_RQ_QUEUED;
-+}
-+
-+static inline int task_on_rq_migrating(struct task_struct *p)
-+{
-+	return READ_ONCE(p->on_rq) == TASK_ON_RQ_MIGRATING;
-+}
-+
-+static inline void rq_lock(struct rq *rq)
-+	__acquires(rq->lock)
-+{
-+	raw_spin_lock(rq->lock);
-+}
-+
-+static inline void rq_unlock(struct rq *rq)
-+	__releases(rq->lock)
-+{
-+	raw_spin_unlock(rq->lock);
-+}
-+
-+static inline void rq_lock_irq(struct rq *rq)
-+	__acquires(rq->lock)
-+{
-+	raw_spin_lock_irq(rq->lock);
-+}
-+
-+static inline void rq_unlock_irq(struct rq *rq, struct rq_flags __always_unused *rf)
-+	__releases(rq->lock)
-+{
-+	raw_spin_unlock_irq(rq->lock);
-+}
-+
-+static inline void rq_lock_irqsave(struct rq *rq, struct rq_flags *rf)
-+	__acquires(rq->lock)
-+{
-+	raw_spin_lock_irqsave(rq->lock, rf->flags);
-+}
-+
-+static inline void rq_unlock_irqrestore(struct rq *rq, struct rq_flags *rf)
-+	__releases(rq->lock)
-+{
-+	raw_spin_unlock_irqrestore(rq->lock, rf->flags);
-+}
-+
-+static inline struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
-+	__acquires(p->pi_lock)
-+	__acquires(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	while (42) {
-+		raw_spin_lock_irqsave(&p->pi_lock, rf->flags);
-+		rq = task_rq(p);
-+		raw_spin_lock(rq->lock);
-+		if (likely(rq == task_rq(p)))
-+			break;
-+		raw_spin_unlock(rq->lock);
-+		raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
-+	}
-+	return rq;
-+}
-+
-+static inline void task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
-+	__releases(rq->lock)
-+	__releases(p->pi_lock)
-+{
-+	rq_unlock(rq);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
-+}
-+
-+static inline struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags __always_unused *rf)
-+	__acquires(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	lockdep_assert_held(&p->pi_lock);
-+
-+	while (42) {
-+		rq = task_rq(p);
-+		raw_spin_lock(rq->lock);
-+		if (likely(rq == task_rq(p)))
-+			break;
-+		raw_spin_unlock(rq->lock);
-+	}
-+	return rq;
-+}
-+
-+static inline void __task_rq_unlock(struct rq *rq, struct rq_flags __always_unused *rf)
-+{
-+	rq_unlock(rq);
-+}
-+
-+static inline struct rq *
-+this_rq_lock_irq(struct rq_flags *rf)
-+	__acquires(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	local_irq_disable();
-+	rq = this_rq();
-+	rq_lock(rq);
-+	return rq;
-+}
-+
-+/*
-+ * {de,en}queue flags: Most not used on MuQSS.
-+ *
-+ * DEQUEUE_SLEEP  - task is no longer runnable
-+ * ENQUEUE_WAKEUP - task just became runnable
-+ *
-+ * SAVE/RESTORE - an otherwise spurious dequeue/enqueue, done to ensure tasks
-+ *                are in a known state which allows modification. Such pairs
-+ *                should preserve as much state as possible.
-+ *
-+ * MOVE - paired with SAVE/RESTORE, explicitly does not preserve the location
-+ *        in the runqueue.
-+ *
-+ * ENQUEUE_HEAD      - place at front of runqueue (tail if not specified)
-+ * ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline)
-+ * ENQUEUE_MIGRATED  - the task was migrated during wakeup
-+ *
-+ */
-+
-+#define DEQUEUE_SLEEP		0x01
-+#define DEQUEUE_SAVE		0x02 /* matches ENQUEUE_RESTORE */
-+
-+#define ENQUEUE_WAKEUP		0x01
-+#define ENQUEUE_RESTORE		0x02
-+
-+#ifdef CONFIG_SMP
-+#define ENQUEUE_MIGRATED	0x40
-+#else
-+#define ENQUEUE_MIGRATED	0x00
-+#endif
-+
-+#ifdef CONFIG_NUMA
-+enum numa_topology_type {
-+	NUMA_DIRECT,
-+	NUMA_GLUELESS_MESH,
-+	NUMA_BACKPLANE,
-+};
-+extern enum numa_topology_type sched_numa_topology_type;
-+extern int sched_max_numa_distance;
-+extern bool find_numa_distance(int distance);
-+extern void sched_init_numa(void);
-+extern void sched_domains_numa_masks_set(unsigned int cpu);
-+extern void sched_domains_numa_masks_clear(unsigned int cpu);
-+extern int sched_numa_find_closest(const struct cpumask *cpus, int cpu);
-+#else
-+static inline void sched_init_numa(void) { }
-+static inline void sched_domains_numa_masks_set(unsigned int cpu) { }
-+static inline void sched_domains_numa_masks_clear(unsigned int cpu) { }
-+static inline int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
-+{
-+	return nr_cpu_ids;
-+}
-+#endif
-+
-+extern struct mutex sched_domains_mutex;
-+extern struct static_key_false sched_schedstats;
-+
-+#define rcu_dereference_check_sched_domain(p) \
-+	rcu_dereference_check((p), \
-+			      lockdep_is_held(&sched_domains_mutex))
-+
-+#ifdef CONFIG_SMP
-+
-+/*
-+ * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
-+ * See destroy_sched_domains: call_rcu for details.
-+ *
-+ * The domain tree of any CPU may only be accessed from within
-+ * preempt-disabled sections.
-+ */
-+#define for_each_domain(cpu, __sd) \
-+	for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); \
-+			__sd; __sd = __sd->parent)
-+
-+/**
-+ * highest_flag_domain - Return highest sched_domain containing flag.
-+ * @cpu:	The cpu whose highest level of sched domain is to
-+ *		be returned.
-+ * @flag:	The flag to check for the highest sched_domain
-+ *		for the given cpu.
-+ *
-+ * Returns the highest sched_domain of a cpu which contains the given flag.
-+ */
-+static inline struct sched_domain *highest_flag_domain(int cpu, int flag)
-+{
-+	struct sched_domain *sd, *hsd = NULL;
-+
-+	for_each_domain(cpu, sd) {
-+		if (!(sd->flags & flag))
-+			break;
-+		hsd = sd;
-+	}
-+
-+	return hsd;
-+}
-+
-+static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
-+{
-+	struct sched_domain *sd;
-+
-+	for_each_domain(cpu, sd) {
-+		if (sd->flags & flag)
-+			break;
-+	}
-+
-+	return sd;
-+}
-+
-+DECLARE_PER_CPU(struct sched_domain *, sd_llc);
-+DECLARE_PER_CPU(int, sd_llc_size);
-+DECLARE_PER_CPU(int, sd_llc_id);
-+DECLARE_PER_CPU(struct sched_domain_shared *, sd_llc_shared);
-+DECLARE_PER_CPU(struct sched_domain *, sd_numa);
-+DECLARE_PER_CPU(struct sched_domain *, sd_asym_packing);
-+DECLARE_PER_CPU(struct sched_domain *, sd_asym_cpucapacity);
-+
-+struct sched_group_capacity {
-+	atomic_t ref;
-+	/*
-+	 * CPU capacity of this group, SCHED_CAPACITY_SCALE being max capacity
-+	 * for a single CPU.
-+	 */
-+	unsigned long		capacity;
-+	unsigned long		min_capacity;		/* Min per-CPU capacity in group */
-+	unsigned long		max_capacity;		/* Max per-CPU capacity in group */
-+	unsigned long		next_update;
-+	int			imbalance;		/* XXX unrelated to capacity but shared group state */
-+
-+#ifdef CONFIG_SCHED_DEBUG
-+	int id;
-+#endif
-+
-+	unsigned long cpumask[]; /* balance mask */
-+};
-+
-+struct sched_group {
-+	struct sched_group *next;	/* Must be a circular list */
-+	atomic_t ref;
-+
-+	unsigned int group_weight;
-+	struct sched_group_capacity *sgc;
-+	int asym_prefer_cpu;		/* cpu of highest priority in group */
-+
-+	/*
-+	 * The CPUs this group covers.
-+	 *
-+	 * NOTE: this field is variable length. (Allocated dynamically
-+	 * by attaching extra space to the end of the structure,
-+	 * depending on how many CPUs the kernel has booted up with)
-+	 */
-+	unsigned long cpumask[0];
-+};
-+
-+static inline struct cpumask *sched_group_span(struct sched_group *sg)
-+{
-+	return to_cpumask(sg->cpumask);
-+}
-+
-+/*
-+ * See build_balance_mask().
-+ */
-+static inline struct cpumask *group_balance_mask(struct sched_group *sg)
-+{
-+	return to_cpumask(sg->sgc->cpumask);
-+}
-+
-+/**
-+ * group_first_cpu - Returns the first cpu in the cpumask of a sched_group.
-+ * @group: The group whose first cpu is to be returned.
-+ */
-+static inline unsigned int group_first_cpu(struct sched_group *group)
-+{
-+	return cpumask_first(sched_group_span(group));
-+}
-+
-+
-+#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
-+void register_sched_domain_sysctl(void);
-+void dirty_sched_domain_sysctl(int cpu);
-+void unregister_sched_domain_sysctl(void);
-+#else
-+static inline void register_sched_domain_sysctl(void)
-+{
-+}
-+static inline void dirty_sched_domain_sysctl(int cpu)
-+{
-+}
-+static inline void unregister_sched_domain_sysctl(void)
-+{
-+}
-+#endif
-+
-+extern void flush_smp_call_function_from_idle(void);
-+
-+extern void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask);
-+extern void set_rq_online (struct rq *rq);
-+extern void set_rq_offline(struct rq *rq);
-+extern bool sched_smp_initialized;
-+
-+static inline void update_group_capacity(struct sched_domain *sd, int cpu)
-+{
-+}
-+
-+static inline void trigger_load_balance(struct rq *rq)
-+{
-+}
-+
-+#define sched_feat(x) 0
-+
-+#else /* CONFIG_SMP */
-+
-+static inline void flush_smp_call_function_from_idle(void) { }
-+
-+#endif /* CONFIG_SMP */
-+
-+#ifdef CONFIG_CPU_IDLE
-+static inline void idle_set_state(struct rq *rq,
-+				  struct cpuidle_state *idle_state)
-+{
-+	rq->idle_state = idle_state;
-+}
-+
-+static inline struct cpuidle_state *idle_get_state(struct rq *rq)
-+{
-+	SCHED_WARN_ON(!rcu_read_lock_held());
-+	return rq->idle_state;
-+}
-+#else
-+static inline void idle_set_state(struct rq *rq,
-+				  struct cpuidle_state *idle_state)
-+{
-+}
-+
-+static inline struct cpuidle_state *idle_get_state(struct rq *rq)
-+{
-+	return NULL;
-+}
-+#endif
-+
-+#ifdef CONFIG_SCHED_DEBUG
-+extern bool sched_debug_enabled;
-+#endif
-+
-+extern void schedule_idle(void);
-+
-+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-+struct irqtime {
-+	u64			total;
-+	u64			tick_delta;
-+	u64			irq_start_time;
-+	struct u64_stats_sync	sync;
-+};
-+
-+DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
-+
-+/*
-+ * Returns the irqtime minus the softirq time computed by ksoftirqd.
-+ * Otherwise ksoftirqd's sum_exec_runtime is substracted its own runtime
-+ * and never move forward.
-+ */
-+static inline u64 irq_time_read(int cpu)
-+{
-+	struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu);
-+	unsigned int seq;
-+	u64 total;
-+
-+	do {
-+		seq = __u64_stats_fetch_begin(&irqtime->sync);
-+		total = irqtime->total;
-+	} while (__u64_stats_fetch_retry(&irqtime->sync, seq));
-+
-+	return total;
-+}
-+#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
-+
-+static inline bool sched_stop_runnable(struct rq *rq)
-+{
-+	return rq->stop && task_on_rq_queued(rq->stop);
-+}
-+
-+#ifdef CONFIG_SMP
-+static inline int cpu_of(struct rq *rq)
-+{
-+	return rq->cpu;
-+}
-+#else /* CONFIG_SMP */
-+static inline int cpu_of(struct rq *rq)
-+{
-+	return 0;
-+}
-+#endif
-+
-+#ifdef CONFIG_CPU_FREQ
-+DECLARE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
-+
-+static inline void cpufreq_trigger(struct rq *rq, unsigned int flags)
-+{
-+	struct update_util_data *data;
-+
-+	data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data,
-+						  cpu_of(rq)));
-+
-+	if (data)
-+		data->func(data, rq->niffies, flags);
-+}
-+#else
-+static inline void cpufreq_trigger(struct rq *rq, unsigned int flag)
-+{
-+}
-+#endif /* CONFIG_CPU_FREQ */
-+
-+static __always_inline
-+unsigned int uclamp_rq_util_with(struct rq __maybe_unused *rq, unsigned int util,
-+			      struct task_struct __maybe_unused *p)
-+{
-+	return util;
-+}
-+
-+static inline bool uclamp_is_used(void)
-+{
-+	return false;
-+}
-+
-+#ifndef arch_scale_freq_tick
-+static __always_inline
-+void arch_scale_freq_tick(void)
-+{
-+}
-+#endif
-+
-+#ifdef arch_scale_freq_capacity
-+#ifndef arch_scale_freq_invariant
-+#define arch_scale_freq_invariant()	(true)
-+#endif
-+#else /* arch_scale_freq_capacity */
-+#define arch_scale_freq_invariant()	(false)
-+#endif
-+
-+#ifdef CONFIG_64BIT
-+static inline u64 read_sum_exec_runtime(struct task_struct *t)
-+{
-+	return tsk_seruntime(t);
-+}
-+#else
-+static inline u64 read_sum_exec_runtime(struct task_struct *t)
-+{
-+	struct rq_flags rf;
-+	u64 ns;
-+	struct rq *rq;
-+
-+	rq = task_rq_lock(t, &rf);
-+	ns = tsk_seruntime(t);
-+	task_rq_unlock(rq, t, &rf);
-+
-+	return ns;
-+}
-+#endif
-+
-+#ifndef arch_scale_freq_capacity
-+/**
-+ * arch_scale_freq_capacity - get the frequency scale factor of a given CPU.
-+ * @cpu: the CPU in question.
-+ *
-+ * Return: the frequency scale factor normalized against SCHED_CAPACITY_SCALE, i.e.
-+ *
-+ *     f_curr
-+ *     ------ * SCHED_CAPACITY_SCALE
-+ *     f_max
-+ */
-+static __always_inline
-+unsigned long arch_scale_freq_capacity(int cpu)
-+{
-+	return SCHED_CAPACITY_SCALE;
-+}
-+#endif
-+
-+#ifdef CONFIG_NO_HZ_FULL
-+extern bool sched_can_stop_tick(struct rq *rq);
-+extern int __init sched_tick_offload_init(void);
-+
-+/*
-+ * Tick may be needed by tasks in the runqueue depending on their policy and
-+ * requirements. If tick is needed, lets send the target an IPI to kick it out of
-+ * nohz mode if necessary.
-+ */
-+static inline void sched_update_tick_dependency(struct rq *rq)
-+{
-+	int cpu = cpu_of(rq);
-+
-+	if (!tick_nohz_full_cpu(cpu))
-+		return;
-+
-+	if (sched_can_stop_tick(rq))
-+		tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED);
-+	else
-+		tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
-+}
-+#else
-+static inline int sched_tick_offload_init(void) { return 0; }
-+static inline void sched_update_tick_dependency(struct rq *rq) { }
-+#endif
-+
-+#define SCHED_FLAG_SUGOV	0x10000000
-+
-+static inline bool rt_rq_is_runnable(struct rq *rt_rq)
-+{
-+	return rt_rq->rt_nr_running;
-+}
-+
-+/**
-+ * enum schedutil_type - CPU utilization type
-+ * @FREQUENCY_UTIL:	Utilization used to select frequency
-+ * @ENERGY_UTIL:	Utilization used during energy calculation
-+ *
-+ * The utilization signals of all scheduling classes (CFS/RT/DL) and IRQ time
-+ * need to be aggregated differently depending on the usage made of them. This
-+ * enum is used within schedutil_freq_util() to differentiate the types of
-+ * utilization expected by the callers, and adjust the aggregation accordingly.
-+ */
-+enum schedutil_type {
-+	FREQUENCY_UTIL,
-+	ENERGY_UTIL,
-+};
-+
-+#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
-+
-+unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs,
-+				 unsigned long max, enum schedutil_type type,
-+				 struct task_struct *p);
-+
-+static inline unsigned long cpu_bw_dl(struct rq *rq)
-+{
-+	return 0;
-+}
-+
-+static inline unsigned long cpu_util_dl(struct rq *rq)
-+{
-+	return 0;
-+}
-+
-+static inline unsigned long cpu_util_cfs(struct rq *rq)
-+{
-+	unsigned long ret = READ_ONCE(rq->load_avg);
-+
-+	if (ret > SCHED_CAPACITY_SCALE)
-+		ret = SCHED_CAPACITY_SCALE;
-+	return ret;
-+}
-+
-+static inline unsigned long cpu_util_rt(struct rq *rq)
-+{
-+	unsigned long ret = READ_ONCE(rq->rt_nr_running);
-+
-+	if (ret > SCHED_CAPACITY_SCALE)
-+		ret = SCHED_CAPACITY_SCALE;
-+	return ret;
-+}
-+
-+#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
-+static inline unsigned long cpu_util_irq(struct rq *rq)
-+{
-+	unsigned long ret = READ_ONCE(rq->irq_load_avg);
-+
-+	if (ret > SCHED_CAPACITY_SCALE)
-+		ret = SCHED_CAPACITY_SCALE;
-+	return ret;
-+}
-+
-+static inline
-+unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned long max)
-+{
-+	util *= (max - irq);
-+	util /= max;
-+
-+	return util;
-+
-+}
-+#else
-+static inline unsigned long cpu_util_irq(struct rq *rq)
-+{
-+	return 0;
-+}
-+
-+static inline
-+unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned long max)
-+{
-+	return util;
-+}
-+#endif
-+#endif
-+
-+#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
-+#define perf_domain_span(pd) (to_cpumask(((pd)->em_pd->cpus)))
-+
-+DECLARE_STATIC_KEY_FALSE(sched_energy_present);
-+
-+static inline bool sched_energy_enabled(void)
-+{
-+	return static_branch_unlikely(&sched_energy_present);
-+}
-+
-+#else /* ! (CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL) */
-+
-+#define perf_domain_span(pd) NULL
-+static inline bool sched_energy_enabled(void) { return false; }
-+
-+#endif /* CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL */
-+
-+#ifdef CONFIG_MEMBARRIER
-+/*
-+ * The scheduler provides memory barriers required by membarrier between:
-+ * - prior user-space memory accesses and store to rq->membarrier_state,
-+ * - store to rq->membarrier_state and following user-space memory accesses.
-+ * In the same way it provides those guarantees around store to rq->curr.
-+ */
-+static inline void membarrier_switch_mm(struct rq *rq,
-+					struct mm_struct *prev_mm,
-+					struct mm_struct *next_mm)
-+{
-+	int membarrier_state;
-+
-+	if (prev_mm == next_mm)
-+		return;
-+
-+	membarrier_state = atomic_read(&next_mm->membarrier_state);
-+	if (READ_ONCE(rq->membarrier_state) == membarrier_state)
-+		return;
-+
-+	WRITE_ONCE(rq->membarrier_state, membarrier_state);
-+}
-+#else
-+static inline void membarrier_switch_mm(struct rq *rq,
-+					struct mm_struct *prev_mm,
-+					struct mm_struct *next_mm)
-+{
-+}
-+#endif
-+
-+#ifdef CONFIG_SMP
-+static inline bool is_per_cpu_kthread(struct task_struct *p)
-+{
-+	if (!(p->flags & PF_KTHREAD))
-+		return false;
-+
-+	if (p->nr_cpus_allowed != 1)
-+		return false;
-+
-+	return true;
-+}
-+#endif
-+
-+void swake_up_all_locked(struct swait_queue_head *q);
-+void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
-+
-+/* pelt.h compat CONFIG_SCHED_THERMAL_PRESSURE impossible with MUQSS */
-+static inline int
-+update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity)
-+{
-+	return 0;
-+}
-+
-+static inline u64 thermal_load_avg(struct rq *rq)
-+{
-+	return 0;
-+}
-+
-+#ifdef CONFIG_RCU_TORTURE_TEST
-+extern int sysctl_sched_rt_runtime;
-+#endif
-+
-+#endif /* MUQSS_SCHED_H */
-diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
-index e39008242cf4..146a3dfe626f 100644
---- a/kernel/sched/cpufreq_schedutil.c
-+++ b/kernel/sched/cpufreq_schedutil.c
-@@ -183,6 +183,12 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy,
- 	return cpufreq_driver_resolve_freq(policy, freq);
- }
- 
-+#ifdef CONFIG_SCHED_MUQSS
-+#define rt_rq_runnable(rq_rt) rt_rq_is_runnable(rq)
-+#else
-+#define rt_rq_runnable(rq_rt) rt_rq_is_runnable(&rq->rt)
-+#endif
-+
- /*
-  * This function computes an effective utilization for the given CPU, to be
-  * used for frequency selection given the linear relation: f = u * f_max.
-@@ -211,7 +217,7 @@ unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs,
- 	struct rq *rq = cpu_rq(cpu);
- 
- 	if (!uclamp_is_used() &&
--	    type == FREQUENCY_UTIL && rt_rq_is_runnable(&rq->rt)) {
-+	    type == FREQUENCY_UTIL && rt_rq_runnable(rq)) {
- 		return max;
- 	}
- 
-@@ -656,7 +662,11 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy)
- 	struct task_struct *thread;
- 	struct sched_attr attr = {
- 		.size		= sizeof(struct sched_attr),
-+#ifdef CONFIG_SCHED_MUQSS
-+		.sched_policy	= SCHED_RR,
-+#else
- 		.sched_policy	= SCHED_DEADLINE,
-+#endif
- 		.sched_flags	= SCHED_FLAG_SUGOV,
- 		.sched_nice	= 0,
- 		.sched_priority	= 0,
-diff --git a/kernel/sched/cpupri.h b/kernel/sched/cpupri.h
-index efbb492bb94c..f0288c32ab17 100644
---- a/kernel/sched/cpupri.h
-+++ b/kernel/sched/cpupri.h
-@@ -17,6 +17,7 @@ struct cpupri {
- 	int			*cpu_to_pri;
- };
- 
-+#ifndef CONFIG_SCHED_MUQSS
- #ifdef CONFIG_SMP
- int  cpupri_find(struct cpupri *cp, struct task_struct *p,
- 		 struct cpumask *lowest_mask);
-@@ -27,3 +28,4 @@ void cpupri_set(struct cpupri *cp, int cpu, int pri);
- int  cpupri_init(struct cpupri *cp);
- void cpupri_cleanup(struct cpupri *cp);
- #endif
-+#endif
-diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
-index 5a55d2300452..283a580754a7 100644
---- a/kernel/sched/cputime.c
-+++ b/kernel/sched/cputime.c
-@@ -266,26 +266,6 @@ static inline u64 account_other_time(u64 max)
- 	return accounted;
- }
- 
--#ifdef CONFIG_64BIT
--static inline u64 read_sum_exec_runtime(struct task_struct *t)
--{
--	return t->se.sum_exec_runtime;
--}
--#else
--static u64 read_sum_exec_runtime(struct task_struct *t)
--{
--	u64 ns;
--	struct rq_flags rf;
--	struct rq *rq;
--
--	rq = task_rq_lock(t, &rf);
--	ns = t->se.sum_exec_runtime;
--	task_rq_unlock(rq, t, &rf);
--
--	return ns;
--}
--#endif
--
- /*
-  * Accumulate raw cputime values of dead tasks (sig->[us]time) and live
-  * tasks (sum on group iteration) belonging to @tsk's group.
-@@ -614,7 +594,7 @@ void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev,
- void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
- {
- 	struct task_cputime cputime = {
--		.sum_exec_runtime = p->se.sum_exec_runtime,
-+		.sum_exec_runtime = tsk_seruntime(p),
- 	};
- 
- 	task_cputime(p, &cputime.utime, &cputime.stime);
-diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
-index f324dc36fc43..43ca13ed9ab0 100644
---- a/kernel/sched/idle.c
-+++ b/kernel/sched/idle.c
-@@ -369,6 +369,7 @@ void cpu_startup_entry(enum cpuhp_state state)
- 		do_idle();
- }
- 
-+#ifndef CONFIG_SCHED_MUQSS
- /*
-  * idle-task scheduling class.
-  */
-@@ -482,3 +483,4 @@ const struct sched_class idle_sched_class
- 	.switched_to		= switched_to_idle,
- 	.update_curr		= update_curr_idle,
- };
-+#endif /* CONFIG_SCHED_MUQSS */
-diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
-index 28709f6b0975..4478c11cb51a 100644
---- a/kernel/sched/sched.h
-+++ b/kernel/sched/sched.h
-@@ -2,6 +2,19 @@
- /*
-  * Scheduler internal types and methods:
-  */
-+#ifdef CONFIG_SCHED_MUQSS
-+#include "MuQSS.h"
-+
-+/* Begin compatibility wrappers for MuQSS/CFS differences */
-+#define rq_rt_nr_running(rq) ((rq)->rt_nr_running)
-+#define rq_h_nr_running(rq) ((rq)->nr_running)
-+
-+#else /* CONFIG_SCHED_MUQSS */
-+
-+#define rq_rt_nr_running(rq) ((rq)->rt.rt_nr_running)
-+#define rq_h_nr_running(rq) ((rq)->cfs.h_nr_running)
-+
-+
- #include <linux/sched.h>
- 
- #include <linux/sched/autogroup.h>
-@@ -2626,3 +2639,25 @@ static inline bool is_per_cpu_kthread(struct task_struct *p)
- 
- void swake_up_all_locked(struct swait_queue_head *q);
- void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
-+
-+/* MuQSS compatibility functions */
-+#ifdef CONFIG_64BIT
-+static inline u64 read_sum_exec_runtime(struct task_struct *t)
-+{
-+	return t->se.sum_exec_runtime;
-+}
-+#else
-+static inline u64 read_sum_exec_runtime(struct task_struct *t)
-+{
-+	u64 ns;
-+	struct rq_flags rf;
-+	struct rq *rq;
-+
-+	rq = task_rq_lock(t, &rf);
-+	ns = t->se.sum_exec_runtime;
-+	task_rq_unlock(rq, t, &rf);
-+
-+	return ns;
-+}
-+#endif
-+#endif /* CONFIG_SCHED_MUQSS */
-diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
-index 1bd7e3af904f..a1dc490c15e4 100644
---- a/kernel/sched/topology.c
-+++ b/kernel/sched/topology.c
-@@ -440,7 +440,11 @@ void rq_attach_root(struct rq *rq, struct root_domain *rd)
- 	struct root_domain *old_rd = NULL;
- 	unsigned long flags;
- 
-+#ifdef CONFIG_SCHED_MUQSS
-+	raw_spin_lock_irqsave(rq->lock, flags);
-+#else
- 	raw_spin_lock_irqsave(&rq->lock, flags);
-+#endif
- 
- 	if (rq->rd) {
- 		old_rd = rq->rd;
-@@ -466,7 +470,11 @@ void rq_attach_root(struct rq *rq, struct root_domain *rd)
- 	if (cpumask_test_cpu(rq->cpu, cpu_active_mask))
- 		set_rq_online(rq);
- 
-+#ifdef CONFIG_SCHED_MUQSS
-+	raw_spin_unlock_irqrestore(rq->lock, flags);
-+#else
- 	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+#endif
- 
- 	if (old_rd)
- 		call_rcu(&old_rd->rcu, free_rootdomain);
-diff --git a/kernel/skip_list.c b/kernel/skip_list.c
-new file mode 100644
-index 000000000000..bf5c6e97e139
---- /dev/null
-+++ b/kernel/skip_list.c
-@@ -0,0 +1,148 @@
-+/*
-+  Copyright (C) 2011,2016 Con Kolivas.
-+
-+  Code based on example originally by William Pugh.
-+
-+Skip Lists are a probabilistic alternative to balanced trees, as
-+described in the June 1990 issue of CACM and were invented by
-+William Pugh in 1987.
-+
-+A couple of comments about this implementation:
-+The routine randomLevel has been hard-coded to generate random
-+levels using p=0.25. It can be easily changed.
-+
-+The insertion routine has been implemented so as to use the
-+dirty hack described in the CACM paper: if a random level is
-+generated that is more than the current maximum level, the
-+current maximum level plus one is used instead.
-+
-+Levels start at zero and go up to MaxLevel (which is equal to
-+MaxNumberOfLevels-1).
-+
-+The routines defined in this file are:
-+
-+init: defines slnode
-+
-+new_skiplist: returns a new, empty list
-+
-+randomLevel: Returns a random level based on a u64 random seed passed to it.
-+In MuQSS, the "niffy" time is used for this purpose.
-+
-+insert(l,key, value): inserts the binding (key, value) into l. This operation
-+occurs in O(log n) time.
-+
-+delnode(slnode, l, node): deletes any binding of key from the l based on the
-+actual node value. This operation occurs in O(k) time where k is the
-+number of levels of the node in question (max 8). The original delete
-+function occurred in O(log n) time and involved a search.
-+
-+MuQSS Notes: In this implementation of skiplists, there are bidirectional
-+next/prev pointers and the insert function returns a pointer to the actual
-+node the value is stored. The key here is chosen by the scheduler so as to
-+sort tasks according to the priority list requirements and is no longer used
-+by the scheduler after insertion. The scheduler lookup, however, occurs in
-+O(1) time because it is always the first item in the level 0 linked list.
-+Since the task struct stores a copy of the node pointer upon skiplist_insert,
-+it can also remove it much faster than the original implementation with the
-+aid of prev<->next pointer manipulation and no searching.
-+
-+*/
-+
-+#include <linux/slab.h>
-+#include <linux/skip_list.h>
-+
-+#define MaxNumberOfLevels 8
-+#define MaxLevel (MaxNumberOfLevels - 1)
-+
-+void skiplist_init(skiplist_node *slnode)
-+{
-+	int i;
-+
-+	slnode->key = 0xFFFFFFFFFFFFFFFF;
-+	slnode->level = 0;
-+	slnode->value = NULL;
-+	for (i = 0; i < MaxNumberOfLevels; i++)
-+		slnode->next[i] = slnode->prev[i] = slnode;
-+}
-+
-+skiplist *new_skiplist(skiplist_node *slnode)
-+{
-+	skiplist *l = kzalloc(sizeof(skiplist), GFP_ATOMIC);
-+
-+	BUG_ON(!l);
-+	l->header = slnode;
-+	return l;
-+}
-+
-+void free_skiplist(skiplist *l)
-+{
-+	skiplist_node *p, *q;
-+
-+	p = l->header;
-+	do {
-+		q = p->next[0];
-+		p->next[0]->prev[0] = q->prev[0];
-+		skiplist_node_init(p);
-+		p = q;
-+	} while (p != l->header);
-+	kfree(l);
-+}
-+
-+void skiplist_node_init(skiplist_node *node)
-+{
-+	memset(node, 0, sizeof(skiplist_node));
-+}
-+
-+static inline unsigned int randomLevel(const long unsigned int randseed)
-+{
-+	return find_first_bit(&randseed, MaxLevel) / 2;
-+}
-+
-+void skiplist_insert(skiplist *l, skiplist_node *node, keyType key, valueType value, unsigned int randseed)
-+{
-+	skiplist_node *update[MaxNumberOfLevels];
-+	skiplist_node *p, *q;
-+	int k = l->level;
-+
-+	p = l->header;
-+	do {
-+		while (q = p->next[k], q->key <= key)
-+			p = q;
-+		update[k] = p;
-+	} while (--k >= 0);
-+
-+	++l->entries;
-+	k = randomLevel(randseed);
-+	if (k > l->level) {
-+		k = ++l->level;
-+		update[k] = l->header;
-+	}
-+
-+	node->level = k;
-+	node->key = key;
-+	node->value = value;
-+	do {
-+		p = update[k];
-+		node->next[k] = p->next[k];
-+		p->next[k] = node;
-+		node->prev[k] = p;
-+		node->next[k]->prev[k] = node;
-+	} while (--k >= 0);
-+}
-+
-+void skiplist_delete(skiplist *l, skiplist_node *node)
-+{
-+	int k, m = node->level;
-+
-+	for (k = 0; k <= m; k++) {
-+		node->prev[k]->next[k] = node->next[k];
-+		node->next[k]->prev[k] = node->prev[k];
-+	}
-+	skiplist_node_init(node);
-+	if (m == l->level) {
-+		while (l->header->next[m] == l->header && l->header->prev[m] == l->header && m > 0)
-+			m--;
-+		l->level = m;
-+	}
-+	l->entries--;
-+}
-diff --git a/kernel/sysctl.c b/kernel/sysctl.c
-index afad085960b8..d2e35cd54f94 100644
---- a/kernel/sysctl.c
-+++ b/kernel/sysctl.c
-@@ -120,7 +120,17 @@ static unsigned long long_max = LONG_MAX;
- static int one_hundred = 100;
- static int two_hundred = 200;
- static int one_thousand = 1000;
--#ifdef CONFIG_PRINTK
-+static int zero = 0;
-+static int one = 1;
-+#ifdef CONFIG_SCHED_MUQSS
-+extern int rr_interval;
-+extern int sched_interactive;
-+extern int sched_iso_cpu;
-+extern int sched_yield_type;
-+#endif
-+extern int hrtimer_granularity_us;
-+extern int hrtimeout_min_us;
-+#if defined(CONFIG_PRINTK) || defined(CONFIG_SCHED_MUQSS)
- static int ten_thousand = 10000;
- #endif
- #ifdef CONFIG_PERF_EVENTS
-@@ -184,7 +194,7 @@ static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
- int sysctl_legacy_va_layout;
- #endif
- 
--#ifdef CONFIG_SCHED_DEBUG
-+#if defined(CONFIG_SCHED_DEBUG) && !defined(CONFIG_SCHED_MUQSS)
- static int min_sched_granularity_ns = 100000;		/* 100 usecs */
- static int max_sched_granularity_ns = NSEC_PER_SEC;	/* 1 second */
- static int min_wakeup_granularity_ns;			/* 0 usecs */
-@@ -193,7 +203,7 @@ static int max_wakeup_granularity_ns = NSEC_PER_SEC;	/* 1 second */
- static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
- static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
- #endif /* CONFIG_SMP */
--#endif /* CONFIG_SCHED_DEBUG */
-+#endif /* CONFIG_SCHED_DEBUG && !CONFIG_SCHED_MUQSS */
- 
- #ifdef CONFIG_COMPACTION
- static int min_extfrag_threshold;
-@@ -1652,6 +1662,7 @@ int proc_do_static_key(struct ctl_table *table, int write,
- }
- 
- static struct ctl_table kern_table[] = {
-+#ifndef CONFIG_SCHED_MUQSS
- 	{
- 		.procname	= "sched_child_runs_first",
- 		.data		= &sysctl_sched_child_runs_first,
-@@ -1843,6 +1854,73 @@ static struct ctl_table kern_table[] = {
- 		.extra1		= SYSCTL_ONE,
- 	},
- #endif
-+#elif defined(CONFIG_SCHED_MUQSS)
-+	{
-+		.procname	= "rr_interval",
-+		.data		= &rr_interval,
-+		.maxlen		= sizeof (int),
-+		.mode		= 0644,
-+		.proc_handler	= &proc_dointvec_minmax,
-+		.extra1		= &one,
-+		.extra2		= &one_thousand,
-+	},
-+	{
-+		.procname	= "interactive",
-+		.data		= &sched_interactive,
-+		.maxlen		= sizeof(int),
-+		.mode		= 0644,
-+		.proc_handler	= &proc_dointvec_minmax,
-+		.extra1		= &zero,
-+		.extra2		= &one,
-+	},
-+	{
-+		.procname	= "iso_cpu",
-+		.data		= &sched_iso_cpu,
-+		.maxlen		= sizeof (int),
-+		.mode		= 0644,
-+		.proc_handler	= &proc_dointvec_minmax,
-+		.extra1		= &zero,
-+		.extra2		= &one_hundred,
-+	},
-+	{
-+		.procname	= "yield_type",
-+		.data		= &sched_yield_type,
-+		.maxlen		= sizeof (int),
-+		.mode		= 0644,
-+		.proc_handler	= &proc_dointvec_minmax,
-+		.extra1		= &zero,
-+		.extra2		= &two,
-+	},
-+#if defined(CONFIG_SMP) && defined(CONFIG_SCHEDSTATS)
-+	{
-+		.procname	= "sched_schedstats",
-+		.data		= NULL,
-+		.maxlen		= sizeof(unsigned int),
-+		.mode		= 0644,
-+		.proc_handler	= sysctl_schedstats,
-+		.extra1		= SYSCTL_ZERO,
-+		.extra2		= SYSCTL_ONE,
-+	},
-+#endif /* CONFIG_SMP && CONFIG_SCHEDSTATS */
-+#endif /* CONFIG_SCHED_MUQSS */
-+	{
-+		.procname	= "hrtimer_granularity_us",
-+		.data		= &hrtimer_granularity_us,
-+		.maxlen		= sizeof(int),
-+		.mode		= 0644,
-+		.proc_handler	= &proc_dointvec_minmax,
-+		.extra1		= &one,
-+		.extra2		= &ten_thousand,
-+	},
-+	{
-+		.procname	= "hrtimeout_min_us",
-+		.data		= &hrtimeout_min_us,
-+		.maxlen		= sizeof(int),
-+		.mode		= 0644,
-+		.proc_handler	= &proc_dointvec_minmax,
-+		.extra1		= &one,
-+		.extra2		= &ten_thousand,
-+	},
- #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
- 	{
- 		.procname	= "sched_energy_aware",
-diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
-index a09b1d61df6a..e7662101fcc3 100644
---- a/kernel/time/Kconfig
-+++ b/kernel/time/Kconfig
-@@ -75,6 +75,9 @@ config NO_HZ_COMMON
- 	depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS
- 	select TICK_ONESHOT
- 
-+config NO_HZ_FULL
-+	bool
-+
- choice
- 	prompt "Timer tick handling"
- 	default NO_HZ_IDLE if NO_HZ
-@@ -96,8 +99,9 @@ config NO_HZ_IDLE
- 
- 	  Most of the time you want to say Y here.
- 
--config NO_HZ_FULL
-+config NO_HZ_FULL_NODEF
- 	bool "Full dynticks system (tickless)"
-+	select NO_HZ_FULL
- 	# NO_HZ_COMMON dependency
- 	depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS
- 	# We need at least one periodic CPU for timekeeping
-@@ -123,6 +127,8 @@ config NO_HZ_FULL
- 	 transitions: syscalls, exceptions and interrupts. Even when it's
- 	 dynamically off.
- 
-+	 Not recommended for desktops,laptops, or mobile devices.
-+
- 	 Say N.
- 
- endchoice
-@@ -132,7 +138,7 @@ config CONTEXT_TRACKING
- 
- config CONTEXT_TRACKING_FORCE
- 	bool "Force context tracking"
--	depends on CONTEXT_TRACKING
-+	depends on CONTEXT_TRACKING && !SCHED_MUQSS
- 	default y if !NO_HZ_FULL
- 	help
- 	  The major pre-requirement for full dynticks to work is to
-diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
-index f5490222e134..544c58c29267 100644
---- a/kernel/time/clockevents.c
-+++ b/kernel/time/clockevents.c
-@@ -190,8 +190,9 @@ int clockevents_tick_resume(struct clock_event_device *dev)
- 
- #ifdef CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST
- 
--/* Limit min_delta to a jiffie */
--#define MIN_DELTA_LIMIT		(NSEC_PER_SEC / HZ)
-+int __read_mostly hrtimer_granularity_us = 100;
-+/* Limit min_delta to 100us */
-+#define MIN_DELTA_LIMIT		(hrtimer_granularity_us * NSEC_PER_USEC)
- 
- /**
-  * clockevents_increase_min_delta - raise minimum delta of a clock event device
-diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
-index 95b6a708b040..19918cf649b0 100644
---- a/kernel/time/hrtimer.c
-+++ b/kernel/time/hrtimer.c
-@@ -2223,3 +2223,113 @@ int __sched schedule_hrtimeout(ktime_t *expires,
- 	return schedule_hrtimeout_range(expires, 0, mode);
- }
- EXPORT_SYMBOL_GPL(schedule_hrtimeout);
-+
-+/*
-+ * As per schedule_hrtimeout but taskes a millisecond value and returns how
-+ * many milliseconds are left.
-+ */
-+long __sched schedule_msec_hrtimeout(long timeout)
-+{
-+	struct hrtimer_sleeper t;
-+	int delta, jiffs;
-+	ktime_t expires;
-+
-+	if (!timeout) {
-+		__set_current_state(TASK_RUNNING);
-+		return 0;
-+	}
-+
-+	jiffs = msecs_to_jiffies(timeout);
-+	/*
-+	 * If regular timer resolution is adequate or hrtimer resolution is not
-+	 * (yet) better than Hz, as would occur during startup, use regular
-+	 * timers.
-+	 */
-+	if (jiffs > 4 || hrtimer_resolution >= NSEC_PER_SEC / HZ || pm_freezing)
-+		return schedule_timeout(jiffs);
-+
-+	delta = (timeout % 1000) * NSEC_PER_MSEC;
-+	expires = ktime_set(0, delta);
-+
-+	hrtimer_init_sleeper_on_stack(&t, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-+	hrtimer_set_expires_range_ns(&t.timer, expires, delta);
-+
-+	hrtimer_sleeper_start_expires(&t, HRTIMER_MODE_REL);
-+
-+	if (likely(t.task))
-+		schedule();
-+
-+	hrtimer_cancel(&t.timer);
-+	destroy_hrtimer_on_stack(&t.timer);
-+
-+	__set_current_state(TASK_RUNNING);
-+
-+	expires = hrtimer_expires_remaining(&t.timer);
-+	timeout = ktime_to_ms(expires);
-+	return timeout < 0 ? 0 : timeout;
-+}
-+
-+EXPORT_SYMBOL(schedule_msec_hrtimeout);
-+
-+#define USECS_PER_SEC 1000000
-+extern int hrtimer_granularity_us;
-+
-+static inline long schedule_usec_hrtimeout(long timeout)
-+{
-+	struct hrtimer_sleeper t;
-+	ktime_t expires;
-+	int delta;
-+
-+	if (!timeout) {
-+		__set_current_state(TASK_RUNNING);
-+		return 0;
-+	}
-+
-+	if (hrtimer_resolution >= NSEC_PER_SEC / HZ)
-+		return schedule_timeout(usecs_to_jiffies(timeout));
-+
-+	if (timeout < hrtimer_granularity_us)
-+		timeout = hrtimer_granularity_us;
-+	delta = (timeout % USECS_PER_SEC) * NSEC_PER_USEC;
-+	expires = ktime_set(0, delta);
-+
-+	hrtimer_init_sleeper_on_stack(&t, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-+	hrtimer_set_expires_range_ns(&t.timer, expires, delta);
-+
-+	hrtimer_sleeper_start_expires(&t, HRTIMER_MODE_REL);
-+
-+	if (likely(t.task))
-+		schedule();
-+
-+	hrtimer_cancel(&t.timer);
-+	destroy_hrtimer_on_stack(&t.timer);
-+
-+	__set_current_state(TASK_RUNNING);
-+
-+	expires = hrtimer_expires_remaining(&t.timer);
-+	timeout = ktime_to_us(expires);
-+	return timeout < 0 ? 0 : timeout;
-+}
-+
-+int __read_mostly hrtimeout_min_us = 500;
-+
-+long __sched schedule_min_hrtimeout(void)
-+{
-+	return usecs_to_jiffies(schedule_usec_hrtimeout(hrtimeout_min_us));
-+}
-+
-+EXPORT_SYMBOL(schedule_min_hrtimeout);
-+
-+long __sched schedule_msec_hrtimeout_interruptible(long timeout)
-+{
-+	__set_current_state(TASK_INTERRUPTIBLE);
-+	return schedule_msec_hrtimeout(timeout);
-+}
-+EXPORT_SYMBOL(schedule_msec_hrtimeout_interruptible);
-+
-+long __sched schedule_msec_hrtimeout_uninterruptible(long timeout)
-+{
-+	__set_current_state(TASK_UNINTERRUPTIBLE);
-+	return schedule_msec_hrtimeout(timeout);
-+}
-+EXPORT_SYMBOL(schedule_msec_hrtimeout_uninterruptible);
-diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
-index a71758e34e45..ebb84a65d928 100644
---- a/kernel/time/posix-cpu-timers.c
-+++ b/kernel/time/posix-cpu-timers.c
-@@ -216,7 +216,7 @@ static void task_sample_cputime(struct task_struct *p, u64 *samples)
- 	u64 stime, utime;
- 
- 	task_cputime(p, &utime, &stime);
--	store_samples(samples, stime, utime, p->se.sum_exec_runtime);
-+	store_samples(samples, stime, utime, tsk_seruntime(p));
- }
- 
- static void proc_sample_cputime_atomic(struct task_cputime_atomic *at,
-@@ -850,7 +850,7 @@ static void check_thread_timers(struct task_struct *tsk,
- 	soft = task_rlimit(tsk, RLIMIT_RTTIME);
- 	if (soft != RLIM_INFINITY) {
- 		/* Task RT timeout is accounted in jiffies. RTTIME is usec */
--		unsigned long rttime = tsk->rt.timeout * (USEC_PER_SEC / HZ);
-+		unsigned long rttime = tsk_rttimeout(tsk) * (USEC_PER_SEC / HZ);
- 		unsigned long hard = task_rlimit_max(tsk, RLIMIT_RTTIME);
- 
- 		/* At the hard limit, send SIGKILL. No further action. */
-diff --git a/kernel/time/timer.c b/kernel/time/timer.c
-index a50364df1054..a86e4530e530 100644
---- a/kernel/time/timer.c
-+++ b/kernel/time/timer.c
-@@ -44,6 +44,7 @@
- #include <linux/slab.h>
- #include <linux/compat.h>
- #include <linux/random.h>
-+#include <linux/freezer.h>
- 
- #include <linux/uaccess.h>
- #include <asm/unistd.h>
-@@ -1587,7 +1588,7 @@ static unsigned long __next_timer_interrupt(struct timer_base *base)
-  * Check, if the next hrtimer event is before the next timer wheel
-  * event:
-  */
--static u64 cmp_next_hrtimer_event(u64 basem, u64 expires)
-+static u64 cmp_next_hrtimer_event(struct timer_base *base, u64 basem, u64 expires)
- {
- 	u64 nextevt = hrtimer_get_next_event();
- 
-@@ -1605,6 +1606,9 @@ static u64 cmp_next_hrtimer_event(u64 basem, u64 expires)
- 	if (nextevt <= basem)
- 		return basem;
- 
-+	if (nextevt < expires && nextevt - basem <= TICK_NSEC)
-+		base->is_idle = false;
-+
- 	/*
- 	 * Round up to the next jiffie. High resolution timers are
- 	 * off, so the hrtimers are expired in the tick and we need to
-@@ -1674,7 +1678,7 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
- 	}
- 	raw_spin_unlock(&base->lock);
- 
--	return cmp_next_hrtimer_event(basem, expires);
-+	return cmp_next_hrtimer_event(base, basem, expires);
- }
- 
- /**
-@@ -1873,6 +1877,18 @@ signed long __sched schedule_timeout(signed long timeout)
- 
- 	expire = timeout + jiffies;
- 
-+#ifdef CONFIG_HIGH_RES_TIMERS
-+	if (timeout == 1 && hrtimer_resolution < NSEC_PER_SEC / HZ) {
-+		/*
-+		 * Special case 1 as being a request for the minimum timeout
-+		 * and use highres timers to timeout after 1ms to workaround
-+		 * the granularity of low Hz tick timers.
-+		 */
-+		if (!schedule_min_hrtimeout())
-+			return 0;
-+		goto out_timeout;
-+	}
-+#endif
- 	timer.task = current;
- 	timer_setup_on_stack(&timer.timer, process_timeout, 0);
- 	__mod_timer(&timer.timer, expire, MOD_TIMER_NOTPENDING);
-@@ -1881,10 +1897,10 @@ signed long __sched schedule_timeout(signed long timeout)
- 
- 	/* Remove the timer from the object tracker */
- 	destroy_timer_on_stack(&timer.timer);
--
-+out_timeout:
- 	timeout = expire - jiffies;
- 
-- out:
-+out:
- 	return timeout < 0 ? 0 : timeout;
- }
- EXPORT_SYMBOL(schedule_timeout);
-@@ -2027,7 +2043,19 @@ void __init init_timers(void)
-  */
- void msleep(unsigned int msecs)
- {
--	unsigned long timeout = msecs_to_jiffies(msecs) + 1;
-+	int jiffs = msecs_to_jiffies(msecs);
-+	unsigned long timeout;
-+
-+	/*
-+	 * Use high resolution timers where the resolution of tick based
-+	 * timers is inadequate.
-+	 */
-+	if (jiffs < 5 && hrtimer_resolution < NSEC_PER_SEC / HZ && !pm_freezing) {
-+		while (msecs)
-+			msecs = schedule_msec_hrtimeout_uninterruptible(msecs);
-+		return;
-+	}
-+	timeout = jiffs + 1;
- 
- 	while (timeout)
- 		timeout = schedule_timeout_uninterruptible(timeout);
-@@ -2041,7 +2069,15 @@ EXPORT_SYMBOL(msleep);
-  */
- unsigned long msleep_interruptible(unsigned int msecs)
- {
--	unsigned long timeout = msecs_to_jiffies(msecs) + 1;
-+	int jiffs = msecs_to_jiffies(msecs);
-+	unsigned long timeout;
-+
-+	if (jiffs < 5 && hrtimer_resolution < NSEC_PER_SEC / HZ && !pm_freezing) {
-+		while (msecs && !signal_pending(current))
-+			msecs = schedule_msec_hrtimeout_interruptible(msecs);
-+		return msecs;
-+	}
-+	timeout = jiffs + 1;
- 
- 	while (timeout && !signal_pending(current))
- 		timeout = schedule_timeout_interruptible(timeout);
-diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
-index b5e3496cf803..68930e7f4d28 100644
---- a/kernel/trace/trace_selftest.c
-+++ b/kernel/trace/trace_selftest.c
-@@ -1048,10 +1048,15 @@ static int trace_wakeup_test_thread(void *data)
- {
- 	/* Make this a -deadline thread */
- 	static const struct sched_attr attr = {
-+#ifdef CONFIG_SCHED_MUQSS
-+		/* No deadline on MuQSS, use RR */
-+		.sched_policy = SCHED_RR,
-+#else
- 		.sched_policy = SCHED_DEADLINE,
- 		.sched_runtime = 100000ULL,
- 		.sched_deadline = 10000000ULL,
- 		.sched_period = 10000000ULL
-+#endif
- 	};
- 	struct wakeup_test_data *x = data;
- 
-diff --git a/mm/vmscan.c b/mm/vmscan.c
-index 466fc3144fff..27224c2d7674 100644
---- a/mm/vmscan.c
-+++ b/mm/vmscan.c
-@@ -169,7 +169,7 @@ struct scan_control {
- /*
-  * From 0 .. 200.  Higher means more swappy.
-  */
--int vm_swappiness = 60;
-+int vm_swappiness = 33;
- 
- static void set_task_reclaim_state(struct task_struct *task,
- 				   struct reclaim_state *rs)
-diff --git a/net/core/pktgen.c b/net/core/pktgen.c
-index 44fdbb9c6e53..ae0adfc677c2 100644
---- a/net/core/pktgen.c
-+++ b/net/core/pktgen.c
-@@ -1894,7 +1894,7 @@ static void pktgen_mark_device(const struct pktgen_net *pn, const char *ifname)
- 		mutex_unlock(&pktgen_thread_lock);
- 		pr_debug("%s: waiting for %s to disappear....\n",
- 			 __func__, ifname);
--		schedule_timeout_interruptible(msecs_to_jiffies(msec_per_try));
-+		schedule_msec_hrtimeout_interruptible((msec_per_try));
- 		mutex_lock(&pktgen_thread_lock);
- 
- 		if (++i >= max_tries) {
-diff --git a/sound/pci/maestro3.c b/sound/pci/maestro3.c
-index 40232a278b1a..d87fae1113aa 100644
---- a/sound/pci/maestro3.c
-+++ b/sound/pci/maestro3.c
-@@ -1995,7 +1995,7 @@ static void snd_m3_ac97_reset(struct snd_m3 *chip)
- 		outw(0, io + GPIO_DATA);
- 		outw(dir | GPO_PRIMARY_AC97, io + GPIO_DIRECTION);
- 
--		schedule_timeout_uninterruptible(msecs_to_jiffies(delay1));
-+		schedule_msec_hrtimeout_uninterruptible((delay1));
- 
- 		outw(GPO_PRIMARY_AC97, io + GPIO_DATA);
- 		udelay(5);
-@@ -2003,7 +2003,7 @@ static void snd_m3_ac97_reset(struct snd_m3 *chip)
- 		outw(IO_SRAM_ENABLE | SERIAL_AC_LINK_ENABLE, io + RING_BUS_CTRL_A);
- 		outw(~0, io + GPIO_MASK);
- 
--		schedule_timeout_uninterruptible(msecs_to_jiffies(delay2));
-+		schedule_msec_hrtimeout_uninterruptible((delay2));
- 
- 		if (! snd_m3_try_read_vendor(chip))
- 			break;
-diff --git a/sound/soc/codecs/rt5631.c b/sound/soc/codecs/rt5631.c
-index 653da3eaf355..d77d12902594 100644
---- a/sound/soc/codecs/rt5631.c
-+++ b/sound/soc/codecs/rt5631.c
-@@ -417,7 +417,7 @@ static void onebit_depop_mute_stage(struct snd_soc_component *component, int ena
- 	hp_zc = snd_soc_component_read(component, RT5631_INT_ST_IRQ_CTRL_2);
- 	snd_soc_component_write(component, RT5631_INT_ST_IRQ_CTRL_2, hp_zc & 0xf7ff);
- 	if (enable) {
--		schedule_timeout_uninterruptible(msecs_to_jiffies(10));
-+		schedule_msec_hrtimeout_uninterruptible((10));
- 		/* config one-bit depop parameter */
- 		rt5631_write_index(component, RT5631_SPK_INTL_CTRL, 0x307f);
- 		snd_soc_component_update_bits(component, RT5631_HP_OUT_VOL,
-@@ -529,7 +529,7 @@ static void depop_seq_mute_stage(struct snd_soc_component *component, int enable
- 	hp_zc = snd_soc_component_read(component, RT5631_INT_ST_IRQ_CTRL_2);
- 	snd_soc_component_write(component, RT5631_INT_ST_IRQ_CTRL_2, hp_zc & 0xf7ff);
- 	if (enable) {
--		schedule_timeout_uninterruptible(msecs_to_jiffies(10));
-+		schedule_msec_hrtimeout_uninterruptible((10));
- 
- 		/* config depop sequence parameter */
- 		rt5631_write_index(component, RT5631_SPK_INTL_CTRL, 0x302f);
-diff --git a/sound/soc/codecs/wm8350.c b/sound/soc/codecs/wm8350.c
-index a6aa212fa0c8..8bfa549b38db 100644
---- a/sound/soc/codecs/wm8350.c
-+++ b/sound/soc/codecs/wm8350.c
-@@ -233,10 +233,10 @@ static void wm8350_pga_work(struct work_struct *work)
- 		    out2->ramp == WM8350_RAMP_UP) {
- 			/* delay is longer over 0dB as increases are larger */
- 			if (i >= WM8350_OUTn_0dB)
--				schedule_timeout_interruptible(msecs_to_jiffies
-+				schedule_msec_hrtimeout_interruptible(
- 							       (2));
- 			else
--				schedule_timeout_interruptible(msecs_to_jiffies
-+				schedule_msec_hrtimeout_interruptible(
- 							       (1));
- 		} else
- 			udelay(50);	/* doesn't matter if we delay longer */
-@@ -1120,7 +1120,7 @@ static int wm8350_set_bias_level(struct snd_soc_component *component,
- 					 (platform->dis_out4 << 6));
- 
- 			/* wait for discharge */
--			schedule_timeout_interruptible(msecs_to_jiffies
-+			schedule_msec_hrtimeout_interruptible(
- 						       (platform->
- 							cap_discharge_msecs));
- 
-@@ -1136,7 +1136,7 @@ static int wm8350_set_bias_level(struct snd_soc_component *component,
- 					 WM8350_VBUFEN);
- 
- 			/* wait for vmid */
--			schedule_timeout_interruptible(msecs_to_jiffies
-+			schedule_msec_hrtimeout_interruptible(
- 						       (platform->
- 							vmid_charge_msecs));
- 
-@@ -1187,7 +1187,7 @@ static int wm8350_set_bias_level(struct snd_soc_component *component,
- 		wm8350_reg_write(wm8350, WM8350_POWER_MGMT_1, pm1);
- 
- 		/* wait */
--		schedule_timeout_interruptible(msecs_to_jiffies
-+		schedule_msec_hrtimeout_interruptible(
- 					       (platform->
- 						vmid_discharge_msecs));
- 
-@@ -1205,7 +1205,7 @@ static int wm8350_set_bias_level(struct snd_soc_component *component,
- 				 pm1 | WM8350_OUTPUT_DRAIN_EN);
- 
- 		/* wait */
--		schedule_timeout_interruptible(msecs_to_jiffies
-+		schedule_msec_hrtimeout_interruptible(
- 					       (platform->drain_msecs));
- 
- 		pm1 &= ~WM8350_BIASEN;
-diff --git a/sound/soc/codecs/wm8900.c b/sound/soc/codecs/wm8900.c
-index a9a6d766a176..45bf31de6282 100644
---- a/sound/soc/codecs/wm8900.c
-+++ b/sound/soc/codecs/wm8900.c
-@@ -1104,7 +1104,7 @@ static int wm8900_set_bias_level(struct snd_soc_component *component,
- 		/* Need to let things settle before stopping the clock
- 		 * to ensure that restart works, see "Stopping the
- 		 * master clock" in the datasheet. */
--		schedule_timeout_interruptible(msecs_to_jiffies(1));
-+		schedule_msec_hrtimeout_interruptible(1);
- 		snd_soc_component_write(component, WM8900_REG_POWER2,
- 			     WM8900_REG_POWER2_SYSCLK_ENA);
- 		break;
-diff --git a/sound/soc/codecs/wm9713.c b/sound/soc/codecs/wm9713.c
-index 7072ffacbdfd..e8414ec4759c 100644
---- a/sound/soc/codecs/wm9713.c
-+++ b/sound/soc/codecs/wm9713.c
-@@ -199,7 +199,7 @@ static int wm9713_voice_shutdown(struct snd_soc_dapm_widget *w,
- 
- 	/* Gracefully shut down the voice interface. */
- 	snd_soc_component_update_bits(component, AC97_HANDSET_RATE, 0x0f00, 0x0200);
--	schedule_timeout_interruptible(msecs_to_jiffies(1));
-+	schedule_msec_hrtimeout_interruptible(1);
- 	snd_soc_component_update_bits(component, AC97_HANDSET_RATE, 0x0f00, 0x0f00);
- 	snd_soc_component_update_bits(component, AC97_EXTENDED_MID, 0x1000, 0x1000);
- 
-@@ -868,7 +868,7 @@ static int wm9713_set_pll(struct snd_soc_component *component,
- 	wm9713->pll_in = freq_in;
- 
- 	/* wait 10ms AC97 link frames for the link to stabilise */
--	schedule_timeout_interruptible(msecs_to_jiffies(10));
-+	schedule_msec_hrtimeout_interruptible((10));
- 	return 0;
- }
- 
-diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
-index 3273161e2787..7fb9b4c6dd7b 100644
---- a/sound/soc/soc-dapm.c
-+++ b/sound/soc/soc-dapm.c
-@@ -154,7 +154,7 @@ static void dapm_assert_locked(struct snd_soc_dapm_context *dapm)
- static void pop_wait(u32 pop_time)
- {
- 	if (pop_time)
--		schedule_timeout_uninterruptible(msecs_to_jiffies(pop_time));
-+		schedule_msec_hrtimeout_uninterruptible((pop_time));
- }
- 
- __printf(3, 4)
-diff --git a/sound/usb/line6/pcm.c b/sound/usb/line6/pcm.c
-index fdbdfb7bce92..fa8e8faf3eb3 100644
---- a/sound/usb/line6/pcm.c
-+++ b/sound/usb/line6/pcm.c
-@@ -127,7 +127,7 @@ static void line6_wait_clear_audio_urbs(struct snd_line6_pcm *line6pcm,
- 		if (!alive)
- 			break;
- 		set_current_state(TASK_UNINTERRUPTIBLE);
--		schedule_timeout(1);
-+		schedule_min_hrtimeout();
- 	} while (--timeout > 0);
- 	if (alive)
- 		dev_err(line6pcm->line6->ifcdev,
diff --git a/linux510-rc-tkg/linux510-tkg-patches/0004-glitched-ondemand-muqss.patch b/linux510-rc-tkg/linux510-tkg-patches/0004-glitched-ondemand-muqss.patch
deleted file mode 100644
index 02933e4..0000000
--- a/linux510-rc-tkg/linux510-tkg-patches/0004-glitched-ondemand-muqss.patch
+++ /dev/null
@@ -1,18 +0,0 @@
-diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
-index 6b423eebfd5d..61e3271675d6 100644
---- a/drivers/cpufreq/cpufreq_ondemand.c
-+++ b/drivers/cpufreq/cpufreq_ondemand.c
-@@ -21,10 +21,10 @@
- #include "cpufreq_ondemand.h"
- 
- /* On-demand governor macros */
--#define DEF_FREQUENCY_UP_THRESHOLD		(80)
--#define DEF_SAMPLING_DOWN_FACTOR		(1)
-+#define DEF_FREQUENCY_UP_THRESHOLD		(45)
-+#define DEF_SAMPLING_DOWN_FACTOR		(5)
- #define MAX_SAMPLING_DOWN_FACTOR		(100000)
--#define MICRO_FREQUENCY_UP_THRESHOLD		(95)
-+#define MICRO_FREQUENCY_UP_THRESHOLD		(45)
- #define MICRO_FREQUENCY_MIN_SAMPLE_RATE		(10000)
- #define MIN_FREQUENCY_UP_THRESHOLD		(1)
- #define MAX_FREQUENCY_UP_THRESHOLD		(100) 
diff --git a/linux510-rc-tkg/linux510-tkg-patches/0005-glitched-pds.patch b/linux510-rc-tkg/linux510-tkg-patches/0005-glitched-pds.patch
deleted file mode 100644
index 08c9ef3..0000000
--- a/linux510-rc-tkg/linux510-tkg-patches/0005-glitched-pds.patch
+++ /dev/null
@@ -1,90 +0,0 @@
-From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
-From: Tk-Glitch <ti3nou@gmail.com>
-Date: Wed, 4 Jul 2018 04:30:08 +0200
-Subject: glitched - PDS
-
-diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
-index 2a202a846757..1d9c7ed79b11 100644
---- a/kernel/Kconfig.hz
-+++ b/kernel/Kconfig.hz
-@@ -4,7 +4,7 @@
- 
- choice
- 	prompt "Timer frequency"
--	default HZ_250
-+	default HZ_500
- 	help
- 	 Allows the configuration of the timer frequency. It is customary
- 	 to have the timer interrupt run at 1000 Hz but 100 Hz may be more
-@@ -39,6 +39,13 @@ choice
- 	 on SMP and NUMA systems and exactly dividing by both PAL and
- 	 NTSC frame rates for video and multimedia work.
- 
-+	config HZ_500
-+		bool "500 HZ"
-+	help
-+	 500 Hz is a balanced timer frequency. Provides fast interactivity
-+	 on desktops with great smoothness without increasing CPU power
-+	 consumption and sacrificing the battery life on laptops.
-+
- 	config HZ_1000
- 		bool "1000 HZ"
- 	help
-@@ -52,6 +59,7 @@ config HZ
- 	default 100 if HZ_100
- 	default 250 if HZ_250
- 	default 300 if HZ_300
-+	default 500 if HZ_500
- 	default 1000 if HZ_1000
- 
- config SCHED_HRTICK
-
-diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
-index 2a202a846757..1d9c7ed79b11 100644
---- a/kernel/Kconfig.hz
-+++ b/kernel/Kconfig.hz
-@@ -4,7 +4,7 @@
- 
- choice
- 	prompt "Timer frequency"
--	default HZ_500
-+	default HZ_750
- 	help
- 	 Allows the configuration of the timer frequency. It is customary
- 	 to have the timer interrupt run at 1000 Hz but 100 Hz may be more
-@@ -46,6 +46,13 @@ choice
- 	 on desktops with great smoothness without increasing CPU power
- 	 consumption and sacrificing the battery life on laptops.
- 
-+	config HZ_750
-+		bool "750 HZ"
-+	help
-+	 750 Hz is a good timer frequency for desktops. Provides fast
-+	 interactivity with great smoothness without sacrificing too
-+	 much throughput.
-+
- 	config HZ_1000
- 		bool "1000 HZ"
- 	help
-@@ -60,6 +67,7 @@ config HZ
- 	default 250 if HZ_250
- 	default 300 if HZ_300
- 	default 500 if HZ_500
-+	default 750 if HZ_750
- 	default 1000 if HZ_1000
- 
- config SCHED_HRTICK
-
-diff --git a/mm/vmscan.c b/mm/vmscan.c
-index 9270a4370d54..30d01e647417 100644
---- a/mm/vmscan.c
-+++ b/mm/vmscan.c
-@@ -169,7 +169,7 @@  
- /*
-  * From 0 .. 200.  Higher means more swappy.
-  */
--int vm_swappiness = 60;
-+int vm_swappiness = 20;
- 
- static void set_task_reclaim_state(struct task_struct *task,
- 				   struct reclaim_state *rs)
diff --git a/linux510-rc-tkg/linux510-tkg-patches/0006-add-acs-overrides_iommu.patch b/linux510-rc-tkg/linux510-tkg-patches/0006-add-acs-overrides_iommu.patch
deleted file mode 100644
index d1303a5..0000000
--- a/linux510-rc-tkg/linux510-tkg-patches/0006-add-acs-overrides_iommu.patch
+++ /dev/null
@@ -1,193 +0,0 @@
-From cdeab384f48dd9c88e2dff2e9ad8d57dca1a1b1c Mon Sep 17 00:00:00 2001
-From: Mark Weiman <mark.weiman@markzz.com>
-Date: Sun, 12 Aug 2018 11:36:21 -0400
-Subject: [PATCH] pci: Enable overrides for missing ACS capabilities
-
-This an updated version of Alex Williamson's patch from:
-https://lkml.org/lkml/2013/5/30/513
-
-Original commit message follows:
-
-PCIe ACS (Access Control Services) is the PCIe 2.0+ feature that
-allows us to control whether transactions are allowed to be redirected
-in various subnodes of a PCIe topology.  For instance, if two
-endpoints are below a root port or downsteam switch port, the
-downstream port may optionally redirect transactions between the
-devices, bypassing upstream devices.  The same can happen internally
-on multifunction devices.  The transaction may never be visible to the
-upstream devices.
-
-One upstream device that we particularly care about is the IOMMU.  If
-a redirection occurs in the topology below the IOMMU, then the IOMMU
-cannot provide isolation between devices.  This is why the PCIe spec
-encourages topologies to include ACS support.  Without it, we have to
-assume peer-to-peer DMA within a hierarchy can bypass IOMMU isolation.
-
-Unfortunately, far too many topologies do not support ACS to make this
-a steadfast requirement.  Even the latest chipsets from Intel are only
-sporadically supporting ACS.  We have trouble getting interconnect
-vendors to include the PCIe spec required PCIe capability, let alone
-suggested features.
-
-Therefore, we need to add some flexibility.  The pcie_acs_override=
-boot option lets users opt-in specific devices or sets of devices to
-assume ACS support.  The "downstream" option assumes full ACS support
-on root ports and downstream switch ports.  The "multifunction"
-option assumes the subset of ACS features available on multifunction
-endpoints and upstream switch ports are supported.  The "id:nnnn:nnnn"
-option enables ACS support on devices matching the provided vendor
-and device IDs, allowing more strategic ACS overrides.  These options
-may be combined in any order.  A maximum of 16 id specific overrides
-are available.  It's suggested to use the most limited set of options
-necessary to avoid completely disabling ACS across the topology.
-Note to hardware vendors, we have facilities to permanently quirk
-specific devices which enforce isolation but not provide an ACS
-capability.  Please contact me to have your devices added and save
-your customers the hassle of this boot option.
-
-Signed-off-by: Mark Weiman <mark.weiman@markzz.com>
----
- .../admin-guide/kernel-parameters.txt         |   9 ++
- drivers/pci/quirks.c                          | 101 ++++++++++++++++++
- 2 files changed, 110 insertions(+)
-
-diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
-index aefd358a5ca3..173b3596fd9e 100644
---- a/Documentation/admin-guide/kernel-parameters.txt
-+++ b/Documentation/admin-guide/kernel-parameters.txt
-@@ -3190,6 +3190,15 @@
- 		nomsi		[MSI] If the PCI_MSI kernel config parameter is
- 				enabled, this kernel boot option can be used to
- 				disable the use of MSI interrupts system-wide.
-+		pcie_acs_override =
-+					[PCIE] Override missing PCIe ACS support for:
-+				downstream
-+					All downstream ports - full ACS capabilities
-+				multifunction
-+					All multifunction devices - multifunction ACS subset
-+				id:nnnn:nnnn
-+					Specific device - full ACS capabilities
-+					Specified as vid:did (vendor/device ID) in hex
- 		noioapicquirk	[APIC] Disable all boot interrupt quirks.
- 				Safety option to keep boot IRQs enabled. This
- 				should never be necessary.
-diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
-index 4700d24e5d55..8f7a3d7fd9c1 100644
---- a/drivers/pci/quirks.c
-+++ b/drivers/pci/quirks.c
-@@ -3372,6 +3372,106 @@ static void quirk_no_bus_reset(struct pci_dev *dev)
- 	dev->dev_flags |= PCI_DEV_FLAGS_NO_BUS_RESET;
- }
- 
-+static bool acs_on_downstream;
-+static bool acs_on_multifunction;
-+
-+#define NUM_ACS_IDS 16
-+struct acs_on_id {
-+	unsigned short vendor;
-+	unsigned short device;
-+};
-+static struct acs_on_id acs_on_ids[NUM_ACS_IDS];
-+static u8 max_acs_id;
-+
-+static __init int pcie_acs_override_setup(char *p)
-+{
-+	if (!p)
-+		return -EINVAL;
-+
-+	while (*p) {
-+		if (!strncmp(p, "downstream", 10))
-+			acs_on_downstream = true;
-+		if (!strncmp(p, "multifunction", 13))
-+			acs_on_multifunction = true;
-+		if (!strncmp(p, "id:", 3)) {
-+			char opt[5];
-+			int ret;
-+			long val;
-+
-+			if (max_acs_id >= NUM_ACS_IDS - 1) {
-+				pr_warn("Out of PCIe ACS override slots (%d)\n",
-+						NUM_ACS_IDS);
-+				goto next;
-+			}
-+
-+			p += 3;
-+			snprintf(opt, 5, "%s", p);
-+			ret = kstrtol(opt, 16, &val);
-+			if (ret) {
-+				pr_warn("PCIe ACS ID parse error %d\n", ret);
-+				goto next;
-+			}
-+			acs_on_ids[max_acs_id].vendor = val;
-+
-+			p += strcspn(p, ":");
-+			if (*p != ':') {
-+				pr_warn("PCIe ACS invalid ID\n");
-+				goto next;
-+			}
-+
-+			p++;
-+			snprintf(opt, 5, "%s", p);
-+			ret = kstrtol(opt, 16, &val);
-+			if (ret) {
-+				pr_warn("PCIe ACS ID parse error %d\n", ret);
-+				goto next;
-+			}
-+			acs_on_ids[max_acs_id].device = val;
-+			max_acs_id++;
-+		}
-+next:
-+		p += strcspn(p, ",");
-+		if (*p == ',')
-+			p++;
-+	}
-+
-+	if (acs_on_downstream || acs_on_multifunction || max_acs_id)
-+		pr_warn("Warning: PCIe ACS overrides enabled; This may allow non-IOMMU protected peer-to-peer DMA\n");
-+
-+	return 0;
-+}
-+early_param("pcie_acs_override", pcie_acs_override_setup);
-+
-+static int pcie_acs_overrides(struct pci_dev *dev, u16 acs_flags)
-+{
-+	int i;
-+
-+	/* Never override ACS for legacy devices or devices with ACS caps */
-+	if (!pci_is_pcie(dev) ||
-+		pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS))
-+			return -ENOTTY;
-+
-+	for (i = 0; i < max_acs_id; i++)
-+		if (acs_on_ids[i].vendor == dev->vendor &&
-+			acs_on_ids[i].device == dev->device)
-+				return 1;
-+
-+	switch (pci_pcie_type(dev)) {
-+	case PCI_EXP_TYPE_DOWNSTREAM:
-+	case PCI_EXP_TYPE_ROOT_PORT:
-+		if (acs_on_downstream)
-+			return 1;
-+		break;
-+	case PCI_EXP_TYPE_ENDPOINT:
-+	case PCI_EXP_TYPE_UPSTREAM:
-+	case PCI_EXP_TYPE_LEG_END:
-+	case PCI_EXP_TYPE_RC_END:
-+		if (acs_on_multifunction && dev->multifunction)
-+			return 1;
-+	}
-+
-+	return -ENOTTY;
-+}
- /*
-  * Some Atheros AR9xxx and QCA988x chips do not behave after a bus reset.
-  * The device will throw a Link Down error on AER-capable systems and
-@@ -4513,6 +4613,7 @@ static const struct pci_dev_acs_enabled {
- 	{ PCI_VENDOR_ID_ZHAOXIN, 0x9083, pci_quirk_mf_endpoint_acs },
- 	/* Zhaoxin Root/Downstream Ports */
- 	{ PCI_VENDOR_ID_ZHAOXIN, PCI_ANY_ID, pci_quirk_zhaoxin_pcie_ports_acs },
-+ 	{ PCI_ANY_ID, PCI_ANY_ID, pcie_acs_overrides },
- 	{ 0 }
- };
- 
-
diff --git a/linux510-rc-tkg/linux510-tkg-patches/0007-v5.10-fsync.patch b/linux510-rc-tkg/linux510-tkg-patches/0007-v5.10-fsync.patch
deleted file mode 100644
index 47badbb..0000000
--- a/linux510-rc-tkg/linux510-tkg-patches/0007-v5.10-fsync.patch
+++ /dev/null
@@ -1,597 +0,0 @@
-From 7b5df0248ce255ef5b7204d65a7b3783ebb76a3d Mon Sep 17 00:00:00 2001
-From: Gabriel Krisman Bertazi <krisman@collabora.com>
-Date: Fri, 13 Dec 2019 11:08:02 -0300
-Subject: [PATCH 1/2] futex: Implement mechanism to wait on any of several
- futexes
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-This is a new futex operation, called FUTEX_WAIT_MULTIPLE, which allows
-a thread to wait on several futexes at the same time, and be awoken by
-any of them.  In a sense, it implements one of the features that was
-supported by pooling on the old FUTEX_FD interface.
-
-The use case lies in the Wine implementation of the Windows NT interface
-WaitMultipleObjects. This Windows API function allows a thread to sleep
-waiting on the first of a set of event sources (mutexes, timers, signal,
-console input, etc) to signal.  Considering this is a primitive
-synchronization operation for Windows applications, being able to quickly
-signal events on the producer side, and quickly go to sleep on the
-consumer side is essential for good performance of those running over Wine.
-
-Wine developers have an implementation that uses eventfd, but it suffers
-from FD exhaustion (there is applications that go to the order of
-multi-milion FDs), and higher CPU utilization than this new operation.
-
-The futex list is passed as an array of `struct futex_wait_block`
-(pointer, value, bitset) to the kernel, which will enqueue all of them
-and sleep if none was already triggered. It returns a hint of which
-futex caused the wake up event to userspace, but the hint doesn't
-guarantee that is the only futex triggered.  Before calling the syscall
-again, userspace should traverse the list, trying to re-acquire any of
-the other futexes, to prevent an immediate -EWOULDBLOCK return code from
-the kernel.
-
-This was tested using three mechanisms:
-
-1) By reimplementing FUTEX_WAIT in terms of FUTEX_WAIT_MULTIPLE and
-running the unmodified tools/testing/selftests/futex and a full linux
-distro on top of this kernel.
-
-2) By an example code that exercises the FUTEX_WAIT_MULTIPLE path on a
-multi-threaded, event-handling setup.
-
-3) By running the Wine fsync implementation and executing multi-threaded
-applications, in particular modern games, on top of this implementation.
-
-Changes were tested for the following ABIs: x86_64, i386 and x32.
-Support for x32 applications is not implemented since it would
-take a major rework adding a new entry point and splitting the current
-futex 64 entry point in two and we can't change the current x32 syscall
-number without breaking user space compatibility.
-
-CC: Steven Rostedt <rostedt@goodmis.org>
-Cc: Richard Yao <ryao@gentoo.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Co-developed-by: Zebediah Figura <z.figura12@gmail.com>
-Signed-off-by: Zebediah Figura <z.figura12@gmail.com>
-Co-developed-by: Steven Noonan <steven@valvesoftware.com>
-Signed-off-by: Steven Noonan <steven@valvesoftware.com>
-Co-developed-by: Pierre-Loup A. Griffais <pgriffais@valvesoftware.com>
-Signed-off-by: Pierre-Loup A. Griffais <pgriffais@valvesoftware.com>
-Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
-[Added compatibility code]
-Co-developed-by: André Almeida <andrealmeid@collabora.com>
-Signed-off-by: André Almeida <andrealmeid@collabora.com>
-
-Adjusted for v5.9: Removed `put_futex_key` calls.
----
- include/uapi/linux/futex.h |  20 +++
- kernel/futex.c             | 352 ++++++++++++++++++++++++++++++++++++-
- 2 files changed, 370 insertions(+), 2 deletions(-)
-
-diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h
-index a89eb0accd5e2..580001e89c6ca 100644
---- a/include/uapi/linux/futex.h
-+++ b/include/uapi/linux/futex.h
-@@ -21,6 +21,7 @@
- #define FUTEX_WAKE_BITSET	10
- #define FUTEX_WAIT_REQUEUE_PI	11
- #define FUTEX_CMP_REQUEUE_PI	12
-+#define FUTEX_WAIT_MULTIPLE	13
- 
- #define FUTEX_PRIVATE_FLAG	128
- #define FUTEX_CLOCK_REALTIME	256
-@@ -40,6 +41,8 @@
- 					 FUTEX_PRIVATE_FLAG)
- #define FUTEX_CMP_REQUEUE_PI_PRIVATE	(FUTEX_CMP_REQUEUE_PI | \
- 					 FUTEX_PRIVATE_FLAG)
-+#define FUTEX_WAIT_MULTIPLE_PRIVATE	(FUTEX_WAIT_MULTIPLE | \
-+					 FUTEX_PRIVATE_FLAG)
- 
- /*
-  * Support for robust futexes: the kernel cleans up held futexes at
-@@ -150,4 +153,21 @@ struct robust_list_head {
-   (((op & 0xf) << 28) | ((cmp & 0xf) << 24)		\
-    | ((oparg & 0xfff) << 12) | (cmparg & 0xfff))
- 
-+/*
-+ * Maximum number of multiple futexes to wait for
-+ */
-+#define FUTEX_MULTIPLE_MAX_COUNT	128
-+
-+/**
-+ * struct futex_wait_block - Block of futexes to be waited for
-+ * @uaddr:	User address of the futex
-+ * @val:	Futex value expected by userspace
-+ * @bitset:	Bitset for the optional bitmasked wakeup
-+ */
-+struct futex_wait_block {
-+	__u32 __user *uaddr;
-+	__u32 val;
-+	__u32 bitset;
-+};
-+
- #endif /* _UAPI_LINUX_FUTEX_H */
-diff --git a/kernel/futex.c b/kernel/futex.c
-index a5876694a60eb..6f4bea76df460 100644
---- a/kernel/futex.c
-+++ b/kernel/futex.c
-@@ -197,6 +197,8 @@ struct futex_pi_state {
-  * @rt_waiter:		rt_waiter storage for use with requeue_pi
-  * @requeue_pi_key:	the requeue_pi target futex key
-  * @bitset:		bitset for the optional bitmasked wakeup
-+ * @uaddr:             userspace address of futex
-+ * @uval:              expected futex's value
-  *
-  * We use this hashed waitqueue, instead of a normal wait_queue_entry_t, so
-  * we can wake only the relevant ones (hashed queues may be shared).
-@@ -219,6 +221,8 @@ struct futex_q {
- 	struct rt_mutex_waiter *rt_waiter;
- 	union futex_key *requeue_pi_key;
- 	u32 bitset;
-+	u32 __user *uaddr;
-+	u32 uval;
- } __randomize_layout;
- 
- static const struct futex_q futex_q_init = {
-@@ -2304,6 +2308,29 @@ static int unqueue_me(struct futex_q *q)
- 	return ret;
- }
- 
-+/**
-+ * unqueue_multiple() - Remove several futexes from their futex_hash_bucket
-+ * @q:	The list of futexes to unqueue
-+ * @count: Number of futexes in the list
-+ *
-+ * Helper to unqueue a list of futexes. This can't fail.
-+ *
-+ * Return:
-+ *  - >=0 - Index of the last futex that was awoken;
-+ *  - -1  - If no futex was awoken
-+ */
-+static int unqueue_multiple(struct futex_q *q, int count)
-+{
-+	int ret = -1;
-+	int i;
-+
-+	for (i = 0; i < count; i++) {
-+		if (!unqueue_me(&q[i]))
-+			ret = i;
-+	}
-+	return ret;
-+}
-+
- /*
-  * PI futexes can not be requeued and must remove themself from the
-  * hash bucket. The hash bucket lock (i.e. lock_ptr) is held on entry
-@@ -2662,6 +2689,205 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
- 	return ret;
- }
- 
-+/**
-+ * futex_wait_multiple_setup() - Prepare to wait and enqueue multiple futexes
-+ * @qs:		The corresponding futex list
-+ * @count:	The size of the lists
-+ * @flags:	Futex flags (FLAGS_SHARED, etc.)
-+ * @awaken:	Index of the last awoken futex
-+ *
-+ * Prepare multiple futexes in a single step and enqueue them. This may fail if
-+ * the futex list is invalid or if any futex was already awoken. On success the
-+ * task is ready to interruptible sleep.
-+ *
-+ * Return:
-+ *  -  1 - One of the futexes was awaken by another thread
-+ *  -  0 - Success
-+ *  - <0 - -EFAULT, -EWOULDBLOCK or -EINVAL
-+ */
-+static int futex_wait_multiple_setup(struct futex_q *qs, int count,
-+				     unsigned int flags, int *awaken)
-+{
-+	struct futex_hash_bucket *hb;
-+	int ret, i;
-+	u32 uval;
-+
-+	/*
-+	 * Enqueuing multiple futexes is tricky, because we need to
-+	 * enqueue each futex in the list before dealing with the next
-+	 * one to avoid deadlocking on the hash bucket.  But, before
-+	 * enqueuing, we need to make sure that current->state is
-+	 * TASK_INTERRUPTIBLE, so we don't absorb any awake events, which
-+	 * cannot be done before the get_futex_key of the next key,
-+	 * because it calls get_user_pages, which can sleep.  Thus, we
-+	 * fetch the list of futexes keys in two steps, by first pinning
-+	 * all the memory keys in the futex key, and only then we read
-+	 * each key and queue the corresponding futex.
-+	 */
-+retry:
-+	for (i = 0; i < count; i++) {
-+		qs[i].key = FUTEX_KEY_INIT;
-+		ret = get_futex_key(qs[i].uaddr, flags & FLAGS_SHARED,
-+				    &qs[i].key, FUTEX_READ);
-+		if (unlikely(ret)) {
-+			return ret;
-+		}
-+	}
-+
-+	set_current_state(TASK_INTERRUPTIBLE);
-+
-+	for (i = 0; i < count; i++) {
-+		struct futex_q *q = &qs[i];
-+
-+		hb = queue_lock(q);
-+
-+		ret = get_futex_value_locked(&uval, q->uaddr);
-+		if (ret) {
-+			/*
-+			 * We need to try to handle the fault, which
-+			 * cannot be done without sleep, so we need to
-+			 * undo all the work already done, to make sure
-+			 * we don't miss any wake ups.  Therefore, clean
-+			 * up, handle the fault and retry from the
-+			 * beginning.
-+			 */
-+			queue_unlock(hb);
-+
-+			/*
-+			 * Keys 0..(i-1) are implicitly put
-+			 * on unqueue_multiple.
-+			 */
-+			*awaken = unqueue_multiple(qs, i);
-+
-+			__set_current_state(TASK_RUNNING);
-+
-+			/*
-+			 * On a real fault, prioritize the error even if
-+			 * some other futex was awoken.  Userspace gave
-+			 * us a bad address, -EFAULT them.
-+			 */
-+			ret = get_user(uval, q->uaddr);
-+			if (ret)
-+				return ret;
-+
-+			/*
-+			 * Even if the page fault was handled, If
-+			 * something was already awaken, we can safely
-+			 * give up and succeed to give a hint for userspace to
-+			 * acquire the right futex faster.
-+			 */
-+			if (*awaken >= 0)
-+				return 1;
-+
-+			goto retry;
-+		}
-+
-+		if (uval != q->uval) {
-+			queue_unlock(hb);
-+
-+			/*
-+			 * If something was already awaken, we can
-+			 * safely ignore the error and succeed.
-+			 */
-+			*awaken = unqueue_multiple(qs, i);
-+			__set_current_state(TASK_RUNNING);
-+			if (*awaken >= 0)
-+				return 1;
-+
-+			return -EWOULDBLOCK;
-+		}
-+
-+		/*
-+		 * The bucket lock can't be held while dealing with the
-+		 * next futex. Queue each futex at this moment so hb can
-+		 * be unlocked.
-+		 */
-+		queue_me(&qs[i], hb);
-+	}
-+	return 0;
-+}
-+
-+/**
-+ * futex_wait_multiple() - Prepare to wait on and enqueue several futexes
-+ * @qs:		The list of futexes to wait on
-+ * @op:		Operation code from futex's syscall
-+ * @count:	The number of objects
-+ * @abs_time:	Timeout before giving up and returning to userspace
-+ *
-+ * Entry point for the FUTEX_WAIT_MULTIPLE futex operation, this function
-+ * sleeps on a group of futexes and returns on the first futex that
-+ * triggered, or after the timeout has elapsed.
-+ *
-+ * Return:
-+ *  - >=0 - Hint to the futex that was awoken
-+ *  - <0  - On error
-+ */
-+static int futex_wait_multiple(struct futex_q *qs, int op,
-+			       u32 count, ktime_t *abs_time)
-+{
-+	struct hrtimer_sleeper timeout, *to;
-+	int ret, flags = 0, hint = 0;
-+	unsigned int i;
-+
-+	if (!(op & FUTEX_PRIVATE_FLAG))
-+		flags |= FLAGS_SHARED;
-+
-+	if (op & FUTEX_CLOCK_REALTIME)
-+		flags |= FLAGS_CLOCKRT;
-+
-+	to = futex_setup_timer(abs_time, &timeout, flags, 0);
-+	while (1) {
-+		ret = futex_wait_multiple_setup(qs, count, flags, &hint);
-+		if (ret) {
-+			if (ret > 0) {
-+				/* A futex was awaken during setup */
-+				ret = hint;
-+			}
-+			break;
-+		}
-+
-+		if (to)
-+			hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS);
-+
-+		/*
-+		 * Avoid sleeping if another thread already tried to
-+		 * wake us.
-+		 */
-+		for (i = 0; i < count; i++) {
-+			if (plist_node_empty(&qs[i].list))
-+				break;
-+		}
-+
-+		if (i == count && (!to || to->task))
-+			freezable_schedule();
-+
-+		ret = unqueue_multiple(qs, count);
-+
-+		__set_current_state(TASK_RUNNING);
-+
-+		if (ret >= 0)
-+			break;
-+		if (to && !to->task) {
-+			ret = -ETIMEDOUT;
-+			break;
-+		} else if (signal_pending(current)) {
-+			ret = -ERESTARTSYS;
-+			break;
-+		}
-+		/*
-+		 * The final case is a spurious wakeup, for
-+		 * which just retry.
-+		 */
-+	}
-+
-+	if (to) {
-+		hrtimer_cancel(&to->timer);
-+		destroy_hrtimer_on_stack(&to->timer);
-+	}
-+
-+	return ret;
-+}
-+
- static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
- 		      ktime_t *abs_time, u32 bitset)
- {
-@@ -3774,6 +4000,43 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
- 	return -ENOSYS;
- }
- 
-+/**
-+ * futex_read_wait_block - Read an array of futex_wait_block from userspace
-+ * @uaddr:	Userspace address of the block
-+ * @count:	Number of blocks to be read
-+ *
-+ * This function creates and allocate an array of futex_q (we zero it to
-+ * initialize the fields) and then, for each futex_wait_block element from
-+ * userspace, fill a futex_q element with proper values.
-+ */
-+inline struct futex_q *futex_read_wait_block(u32 __user *uaddr, u32 count)
-+{
-+	unsigned int i;
-+	struct futex_q *qs;
-+	struct futex_wait_block fwb;
-+	struct futex_wait_block __user *entry =
-+		(struct futex_wait_block __user *)uaddr;
-+
-+	if (!count || count > FUTEX_MULTIPLE_MAX_COUNT)
-+		return ERR_PTR(-EINVAL);
-+
-+	qs = kcalloc(count, sizeof(*qs), GFP_KERNEL);
-+	if (!qs)
-+		return ERR_PTR(-ENOMEM);
-+
-+	for (i = 0; i < count; i++) {
-+		if (copy_from_user(&fwb, &entry[i], sizeof(fwb))) {
-+			kfree(qs);
-+			return ERR_PTR(-EFAULT);
-+		}
-+
-+		qs[i].uaddr = fwb.uaddr;
-+		qs[i].uval = fwb.val;
-+		qs[i].bitset = fwb.bitset;
-+	}
-+
-+	return qs;
-+}
- 
- SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
- 		struct __kernel_timespec __user *, utime, u32 __user *, uaddr2,
-@@ -3786,7 +4049,8 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
- 
- 	if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
- 		      cmd == FUTEX_WAIT_BITSET ||
--		      cmd == FUTEX_WAIT_REQUEUE_PI)) {
-+		      cmd == FUTEX_WAIT_REQUEUE_PI ||
-+		      cmd == FUTEX_WAIT_MULTIPLE)) {
- 		if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG))))
- 			return -EFAULT;
- 		if (get_timespec64(&ts, utime))
-@@ -3807,6 +4071,25 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
- 	    cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
- 		val2 = (u32) (unsigned long) utime;
- 
-+	if (cmd == FUTEX_WAIT_MULTIPLE) {
-+		int ret;
-+		struct futex_q *qs;
-+
-+#ifdef CONFIG_X86_X32
-+		if (unlikely(in_x32_syscall()))
-+			return -ENOSYS;
-+#endif
-+		qs = futex_read_wait_block(uaddr, val);
-+
-+		if (IS_ERR(qs))
-+			return PTR_ERR(qs);
-+
-+		ret = futex_wait_multiple(qs, op, val, tp);
-+		kfree(qs);
-+
-+		return ret;
-+	}
-+
- 	return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
- }
- 
-@@ -3969,6 +4252,57 @@ COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
- #endif /* CONFIG_COMPAT */
- 
- #ifdef CONFIG_COMPAT_32BIT_TIME
-+/**
-+ * struct compat_futex_wait_block - Block of futexes to be waited for
-+ * @uaddr:	User address of the futex (compatible pointer)
-+ * @val:	Futex value expected by userspace
-+ * @bitset:	Bitset for the optional bitmasked wakeup
-+ */
-+struct compat_futex_wait_block {
-+	compat_uptr_t	uaddr;
-+	__u32 val;
-+	__u32 bitset;
-+};
-+
-+/**
-+ * compat_futex_read_wait_block - Read an array of futex_wait_block from
-+ * userspace
-+ * @uaddr:	Userspace address of the block
-+ * @count:	Number of blocks to be read
-+ *
-+ * This function does the same as futex_read_wait_block(), except that it
-+ * converts the pointer to the futex from the compat version to the regular one.
-+ */
-+inline struct futex_q *compat_futex_read_wait_block(u32 __user *uaddr,
-+						    u32 count)
-+{
-+	unsigned int i;
-+	struct futex_q *qs;
-+	struct compat_futex_wait_block fwb;
-+	struct compat_futex_wait_block __user *entry =
-+		(struct compat_futex_wait_block __user *)uaddr;
-+
-+	if (!count || count > FUTEX_MULTIPLE_MAX_COUNT)
-+		return ERR_PTR(-EINVAL);
-+
-+	qs = kcalloc(count, sizeof(*qs), GFP_KERNEL);
-+	if (!qs)
-+		return ERR_PTR(-ENOMEM);
-+
-+	for (i = 0; i < count; i++) {
-+		if (copy_from_user(&fwb, &entry[i], sizeof(fwb))) {
-+			kfree(qs);
-+			return ERR_PTR(-EFAULT);
-+		}
-+
-+		qs[i].uaddr = compat_ptr(fwb.uaddr);
-+		qs[i].uval = fwb.val;
-+		qs[i].bitset = fwb.bitset;
-+	}
-+
-+	return qs;
-+}
-+
- SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
- 		struct old_timespec32 __user *, utime, u32 __user *, uaddr2,
- 		u32, val3)
-@@ -3980,7 +4314,8 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
- 
- 	if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
- 		      cmd == FUTEX_WAIT_BITSET ||
--		      cmd == FUTEX_WAIT_REQUEUE_PI)) {
-+		      cmd == FUTEX_WAIT_REQUEUE_PI ||
-+		      cmd == FUTEX_WAIT_MULTIPLE)) {
- 		if (get_old_timespec32(&ts, utime))
- 			return -EFAULT;
- 		if (!timespec64_valid(&ts))
-@@ -3995,6 +4330,19 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
- 	    cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
- 		val2 = (int) (unsigned long) utime;
- 
-+	if (cmd == FUTEX_WAIT_MULTIPLE) {
-+		int ret;
-+		struct futex_q *qs = compat_futex_read_wait_block(uaddr, val);
-+
-+		if (IS_ERR(qs))
-+			return PTR_ERR(qs);
-+
-+		ret = futex_wait_multiple(qs, op, val, tp);
-+		kfree(qs);
-+
-+		return ret;
-+	}
-+
- 	return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
- }
- #endif /* CONFIG_COMPAT_32BIT_TIME */
-
-From ccdddb50d330d2ee1a4d2cbfdd27bdd7fb10eec3 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@collabora.com>
-Date: Fri, 7 Feb 2020 23:28:02 -0300
-Subject: [PATCH 2/2] futex: Add Proton compatibility code
-
----
- include/uapi/linux/futex.h | 2 +-
- kernel/futex.c             | 5 +++--
- 2 files changed, 4 insertions(+), 3 deletions(-)
-
-diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h
-index 580001e89c6ca..a3e760886b8e7 100644
---- a/include/uapi/linux/futex.h
-+++ b/include/uapi/linux/futex.h
-@@ -21,7 +21,7 @@
- #define FUTEX_WAKE_BITSET	10
- #define FUTEX_WAIT_REQUEUE_PI	11
- #define FUTEX_CMP_REQUEUE_PI	12
--#define FUTEX_WAIT_MULTIPLE	13
-+#define FUTEX_WAIT_MULTIPLE	31
- 
- #define FUTEX_PRIVATE_FLAG	128
- #define FUTEX_CLOCK_REALTIME	256
-diff --git a/kernel/futex.c b/kernel/futex.c
-index 6f4bea76df460..03d89fe7b8392 100644
---- a/kernel/futex.c
-+++ b/kernel/futex.c
-@@ -4059,7 +4059,7 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
- 			return -EINVAL;
- 
- 		t = timespec64_to_ktime(ts);
--		if (cmd == FUTEX_WAIT)
-+		if (cmd == FUTEX_WAIT || cmd == FUTEX_WAIT_MULTIPLE)
- 			t = ktime_add_safe(ktime_get(), t);
- 		tp = &t;
- 	}
-@@ -4260,6 +4260,7 @@ COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
-  */
- struct compat_futex_wait_block {
- 	compat_uptr_t	uaddr;
-+	__u32 pad;
- 	__u32 val;
- 	__u32 bitset;
- };
-@@ -4322,7 +4323,7 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
- 			return -EINVAL;
- 
- 		t = timespec64_to_ktime(ts);
--		if (cmd == FUTEX_WAIT)
-+		if (cmd == FUTEX_WAIT || cmd == FUTEX_WAIT_MULTIPLE)
- 			t = ktime_add_safe(ktime_get(), t);
- 		tp = &t;
- 	}
diff --git a/linux510-rc-tkg/linux510-tkg-patches/0009-glitched-bmq.patch b/linux510-rc-tkg/linux510-tkg-patches/0009-glitched-bmq.patch
deleted file mode 100644
index e42e522..0000000
--- a/linux510-rc-tkg/linux510-tkg-patches/0009-glitched-bmq.patch
+++ /dev/null
@@ -1,90 +0,0 @@
-From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
-From: Tk-Glitch <ti3nou@gmail.com>
-Date: Wed, 4 Jul 2018 04:30:08 +0200
-Subject: glitched - BMQ
-
-diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
-index 2a202a846757..1d9c7ed79b11 100644
---- a/kernel/Kconfig.hz
-+++ b/kernel/Kconfig.hz
-@@ -4,7 +4,7 @@
- 
- choice
- 	prompt "Timer frequency"
--	default HZ_250
-+	default HZ_500
- 	help
- 	 Allows the configuration of the timer frequency. It is customary
- 	 to have the timer interrupt run at 1000 Hz but 100 Hz may be more
-@@ -39,6 +39,13 @@ choice
- 	 on SMP and NUMA systems and exactly dividing by both PAL and
- 	 NTSC frame rates for video and multimedia work.
- 
-+	config HZ_500
-+		bool "500 HZ"
-+	help
-+	 500 Hz is a balanced timer frequency. Provides fast interactivity
-+	 on desktops with great smoothness without increasing CPU power
-+	 consumption and sacrificing the battery life on laptops.
-+
- 	config HZ_1000
- 		bool "1000 HZ"
- 	help
-@@ -52,6 +59,7 @@ config HZ
- 	default 100 if HZ_100
- 	default 250 if HZ_250
- 	default 300 if HZ_300
-+	default 500 if HZ_500
- 	default 1000 if HZ_1000
- 
- config SCHED_HRTICK
-
-diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
-index 2a202a846757..1d9c7ed79b11 100644
---- a/kernel/Kconfig.hz
-+++ b/kernel/Kconfig.hz
-@@ -4,7 +4,7 @@
- 
- choice
- 	prompt "Timer frequency"
--	default HZ_500
-+	default HZ_750
- 	help
- 	 Allows the configuration of the timer frequency. It is customary
- 	 to have the timer interrupt run at 1000 Hz but 100 Hz may be more
-@@ -46,6 +46,13 @@ choice
- 	 on desktops with great smoothness without increasing CPU power
- 	 consumption and sacrificing the battery life on laptops.
- 
-+	config HZ_750
-+		bool "750 HZ"
-+	help
-+	 750 Hz is a good timer frequency for desktops. Provides fast
-+	 interactivity with great smoothness without sacrificing too
-+	 much throughput.
-+
- 	config HZ_1000
- 		bool "1000 HZ"
- 	help
-@@ -60,6 +67,7 @@ config HZ
- 	default 250 if HZ_250
- 	default 300 if HZ_300
- 	default 500 if HZ_500
-+	default 750 if HZ_750
- 	default 1000 if HZ_1000
- 
- config SCHED_HRTICK
-
-diff --git a/mm/vmscan.c b/mm/vmscan.c
-index 9270a4370d54..30d01e647417 100644
---- a/mm/vmscan.c
-+++ b/mm/vmscan.c
-@@ -169,7 +169,7 @@  
- /*
-  * From 0 .. 200.  Higher means more swappy.
-  */
--int vm_swappiness = 60;
-+int vm_swappiness = 20;
- 
- static void set_task_reclaim_state(struct task_struct *task,
- 				   struct reclaim_state *rs)
diff --git a/linux510-rc-tkg/linux510-tkg-patches/0009-glitched-ondemand-bmq.patch b/linux510-rc-tkg/linux510-tkg-patches/0009-glitched-ondemand-bmq.patch
deleted file mode 100644
index a926040..0000000
--- a/linux510-rc-tkg/linux510-tkg-patches/0009-glitched-ondemand-bmq.patch
+++ /dev/null
@@ -1,18 +0,0 @@
-diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
-index 6b423eebfd5d..61e3271675d6 100644
---- a/drivers/cpufreq/cpufreq_ondemand.c
-+++ b/drivers/cpufreq/cpufreq_ondemand.c
-@@ -21,10 +21,10 @@
- #include "cpufreq_ondemand.h"
- 
- /* On-demand governor macros */
--#define DEF_FREQUENCY_UP_THRESHOLD		(80)
--#define DEF_SAMPLING_DOWN_FACTOR		(1)
-+#define DEF_FREQUENCY_UP_THRESHOLD		(55)
-+#define DEF_SAMPLING_DOWN_FACTOR		(5)
- #define MAX_SAMPLING_DOWN_FACTOR		(100000)
--#define MICRO_FREQUENCY_UP_THRESHOLD		(95)
-+#define MICRO_FREQUENCY_UP_THRESHOLD		(63)
- #define MICRO_FREQUENCY_MIN_SAMPLE_RATE		(10000)
- #define MIN_FREQUENCY_UP_THRESHOLD		(1)
- #define MAX_FREQUENCY_UP_THRESHOLD		(100)
diff --git a/linux510-rc-tkg/linux510-tkg-patches/0009-prjc_v5.10-r0.patch b/linux510-rc-tkg/linux510-tkg-patches/0009-prjc_v5.10-r0.patch
deleted file mode 100644
index 550d29c..0000000
--- a/linux510-rc-tkg/linux510-tkg-patches/0009-prjc_v5.10-r0.patch
+++ /dev/null
@@ -1,8809 +0,0 @@
-diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
-index a1068742a6df..b97a9697fde4 100644
---- a/Documentation/admin-guide/kernel-parameters.txt
-+++ b/Documentation/admin-guide/kernel-parameters.txt
-@@ -4611,6 +4611,12 @@
- 
- 	sbni=		[NET] Granch SBNI12 leased line adapter
- 
-+	sched_timeslice=
-+			[KNL] Time slice in us for BMQ/PDS scheduler.
-+			Format: <int> (must be >= 1000)
-+			Default: 4000
-+			See Documentation/scheduler/sched-BMQ.txt
-+
- 	sched_debug	[KNL] Enables verbose scheduler debug messages.
- 
- 	schedstats=	[KNL,X86] Enable or disable scheduled statistics.
-diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
-index d4b32cc32bb7..14118e5168ef 100644
---- a/Documentation/admin-guide/sysctl/kernel.rst
-+++ b/Documentation/admin-guide/sysctl/kernel.rst
-@@ -1515,3 +1515,13 @@ is 10 seconds.
- 
- The softlockup threshold is (``2 * watchdog_thresh``). Setting this
- tunable to zero will disable lockup detection altogether.
-+
-+yield_type:
-+===========
-+
-+BMQ/PDS CPU scheduler only. This determines what type of yield calls
-+to sched_yield will perform.
-+
-+  0 - No yield.
-+  1 - Deboost and requeue task. (default)
-+  2 - Set run queue skip task.
-diff --git a/Documentation/scheduler/sched-BMQ.txt b/Documentation/scheduler/sched-BMQ.txt
-new file mode 100644
-index 000000000000..05c84eec0f31
---- /dev/null
-+++ b/Documentation/scheduler/sched-BMQ.txt
-@@ -0,0 +1,110 @@
-+                         BitMap queue CPU Scheduler
-+                         --------------------------
-+
-+CONTENT
-+========
-+
-+ Background
-+ Design
-+   Overview
-+   Task policy
-+   Priority management
-+   BitMap Queue
-+   CPU Assignment and Migration
-+
-+
-+Background
-+==========
-+
-+BitMap Queue CPU scheduler, referred to as BMQ from here on, is an evolution
-+of previous Priority and Deadline based Skiplist multiple queue scheduler(PDS),
-+and inspired by Zircon scheduler. The goal of it is to keep the scheduler code
-+simple, while efficiency and scalable for interactive tasks, such as desktop,
-+movie playback and gaming etc.
-+
-+Design
-+======
-+
-+Overview
-+--------
-+
-+BMQ use per CPU run queue design, each CPU(logical) has it's own run queue,
-+each CPU is responsible for scheduling the tasks that are putting into it's
-+run queue.
-+
-+The run queue is a set of priority queues. Note that these queues are fifo
-+queue for non-rt tasks or priority queue for rt tasks in data structure. See
-+BitMap Queue below for details. BMQ is optimized for non-rt tasks in the fact
-+that most applications are non-rt tasks. No matter the queue is fifo or
-+priority, In each queue is an ordered list of runnable tasks awaiting execution
-+and the data structures are the same. When it is time for a new task to run,
-+the scheduler simply looks the lowest numbered queueue that contains a task,
-+and runs the first task from the head of that queue. And per CPU idle task is
-+also in the run queue, so the scheduler can always find a task to run on from
-+its run queue.
-+
-+Each task will assigned the same timeslice(default 4ms) when it is picked to
-+start running. Task will be reinserted at the end of the appropriate priority
-+queue when it uses its whole timeslice. When the scheduler selects a new task
-+from the priority queue it sets the CPU's preemption timer for the remainder of
-+the previous timeslice. When that timer fires the scheduler will stop execution
-+on that task, select another task and start over again.
-+
-+If a task blocks waiting for a shared resource then it's taken out of its
-+priority queue and is placed in a wait queue for the shared resource. When it
-+is unblocked it will be reinserted in the appropriate priority queue of an
-+eligible CPU.
-+
-+Task policy
-+-----------
-+
-+BMQ supports DEADLINE, FIFO, RR, NORMAL, BATCH and IDLE task policy like the
-+mainline CFS scheduler. But BMQ is heavy optimized for non-rt task, that's
-+NORMAL/BATCH/IDLE policy tasks. Below is the implementation detail of each
-+policy.
-+
-+DEADLINE
-+	It is squashed as priority 0 FIFO task.
-+
-+FIFO/RR
-+	All RT tasks share one single priority queue in BMQ run queue designed. The
-+complexity of insert operation is O(n). BMQ is not designed for system runs
-+with major rt policy tasks.
-+
-+NORMAL/BATCH/IDLE
-+	BATCH and IDLE tasks are treated as the same policy. They compete CPU with
-+NORMAL policy tasks, but they just don't boost. To control the priority of
-+NORMAL/BATCH/IDLE tasks, simply use nice level.
-+
-+ISO
-+	ISO policy is not supported in BMQ. Please use nice level -20 NORMAL policy
-+task instead.
-+
-+Priority management
-+-------------------
-+
-+RT tasks have priority from 0-99. For non-rt tasks, there are three different
-+factors used to determine the effective priority of a task. The effective
-+priority being what is used to determine which queue it will be in.
-+
-+The first factor is simply the task’s static priority. Which is assigned from
-+task's nice level, within [-20, 19] in userland's point of view and [0, 39]
-+internally.
-+
-+The second factor is the priority boost. This is a value bounded between
-+[-MAX_PRIORITY_ADJ, MAX_PRIORITY_ADJ] used to offset the base priority, it is
-+modified by the following cases:
-+
-+*When a thread has used up its entire timeslice, always deboost its boost by
-+increasing by one.
-+*When a thread gives up cpu control(voluntary or non-voluntary) to reschedule,
-+and its switch-in time(time after last switch and run) below the thredhold
-+based on its priority boost, will boost its boost by decreasing by one buti is
-+capped at 0 (won’t go negative).
-+
-+The intent in this system is to ensure that interactive threads are serviced
-+quickly. These are usually the threads that interact directly with the user
-+and cause user-perceivable latency. These threads usually do little work and
-+spend most of their time blocked awaiting another user event. So they get the
-+priority boost from unblocking while background threads that do most of the
-+processing receive the priority penalty for using their entire timeslice.
-diff --git a/fs/proc/base.c b/fs/proc/base.c
-index 617db4e0faa0..f85926764f9a 100644
---- a/fs/proc/base.c
-+++ b/fs/proc/base.c
-@@ -479,7 +479,7 @@ static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns,
- 		seq_puts(m, "0 0 0\n");
- 	else
- 		seq_printf(m, "%llu %llu %lu\n",
--		   (unsigned long long)task->se.sum_exec_runtime,
-+		   (unsigned long long)tsk_seruntime(task),
- 		   (unsigned long long)task->sched_info.run_delay,
- 		   task->sched_info.pcount);
- 
-diff --git a/include/asm-generic/resource.h b/include/asm-generic/resource.h
-index 8874f681b056..59eb72bf7d5f 100644
---- a/include/asm-generic/resource.h
-+++ b/include/asm-generic/resource.h
-@@ -23,7 +23,7 @@
- 	[RLIMIT_LOCKS]		= {  RLIM_INFINITY,  RLIM_INFINITY },	\
- 	[RLIMIT_SIGPENDING]	= { 		0,	       0 },	\
- 	[RLIMIT_MSGQUEUE]	= {   MQ_BYTES_MAX,   MQ_BYTES_MAX },	\
--	[RLIMIT_NICE]		= { 0, 0 },				\
-+	[RLIMIT_NICE]		= { 30, 30 },				\
- 	[RLIMIT_RTPRIO]		= { 0, 0 },				\
- 	[RLIMIT_RTTIME]		= {  RLIM_INFINITY,  RLIM_INFINITY },	\
- }
-diff --git a/include/linux/sched.h b/include/linux/sched.h
-index afe01e232935..8918609cb9f0 100644
---- a/include/linux/sched.h
-+++ b/include/linux/sched.h
-@@ -34,6 +34,7 @@
- #include <linux/rseq.h>
- #include <linux/seqlock.h>
- #include <linux/kcsan.h>
-+#include <linux/skip_list.h>
- 
- /* task_struct member predeclarations (sorted alphabetically): */
- struct audit_context;
-@@ -652,12 +653,18 @@ struct task_struct {
- 	unsigned int			ptrace;
- 
- #ifdef CONFIG_SMP
--	int				on_cpu;
- 	struct __call_single_node	wake_entry;
-+#endif
-+#if defined(CONFIG_SMP) || defined(CONFIG_SCHED_ALT)
-+	int				on_cpu;
-+#endif
-+
-+#ifdef CONFIG_SMP
- #ifdef CONFIG_THREAD_INFO_IN_TASK
- 	/* Current CPU: */
- 	unsigned int			cpu;
- #endif
-+#ifndef CONFIG_SCHED_ALT
- 	unsigned int			wakee_flips;
- 	unsigned long			wakee_flip_decay_ts;
- 	struct task_struct		*last_wakee;
-@@ -671,6 +678,7 @@ struct task_struct {
- 	 */
- 	int				recent_used_cpu;
- 	int				wake_cpu;
-+#endif /* !CONFIG_SCHED_ALT */
- #endif
- 	int				on_rq;
- 
-@@ -679,13 +687,33 @@ struct task_struct {
- 	int				normal_prio;
- 	unsigned int			rt_priority;
- 
-+#ifdef CONFIG_SCHED_ALT
-+	u64				last_ran;
-+	s64				time_slice;
-+#ifdef CONFIG_SCHED_BMQ
-+	int				boost_prio;
-+	int				bmq_idx;
-+	struct list_head		bmq_node;
-+#endif /* CONFIG_SCHED_BMQ */
-+#ifdef CONFIG_SCHED_PDS
-+	u64				deadline;
-+	u64				priodl;
-+	/* skip list level */
-+	int				sl_level;
-+	/* skip list node */
-+	struct skiplist_node		sl_node;
-+#endif /* CONFIG_SCHED_PDS */
-+	/* sched_clock time spent running */
-+	u64				sched_time;
-+#else /* !CONFIG_SCHED_ALT */
- 	const struct sched_class	*sched_class;
- 	struct sched_entity		se;
- 	struct sched_rt_entity		rt;
-+	struct sched_dl_entity		dl;
-+#endif
- #ifdef CONFIG_CGROUP_SCHED
- 	struct task_group		*sched_task_group;
- #endif
--	struct sched_dl_entity		dl;
- 
- #ifdef CONFIG_UCLAMP_TASK
- 	/*
-@@ -1332,6 +1360,15 @@ struct task_struct {
- 	 */
- };
- 
-+#ifdef CONFIG_SCHED_ALT
-+#define tsk_seruntime(t)		((t)->sched_time)
-+/* replace the uncertian rt_timeout with 0UL */
-+#define tsk_rttimeout(t)		(0UL)
-+#else /* CFS */
-+#define tsk_seruntime(t)	((t)->se.sum_exec_runtime)
-+#define tsk_rttimeout(t)	((t)->rt.timeout)
-+#endif /* !CONFIG_SCHED_ALT */
-+
- static inline struct pid *task_pid(struct task_struct *task)
- {
- 	return task->thread_pid;
-diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h
-index 1aff00b65f3c..179d77c8360e 100644
---- a/include/linux/sched/deadline.h
-+++ b/include/linux/sched/deadline.h
-@@ -1,5 +1,24 @@
- /* SPDX-License-Identifier: GPL-2.0 */
- 
-+#ifdef CONFIG_SCHED_ALT
-+
-+static inline int dl_task(struct task_struct *p)
-+{
-+	return 0;
-+}
-+
-+#ifdef CONFIG_SCHED_BMQ
-+#define __tsk_deadline(p)	(0UL)
-+#endif
-+
-+#ifdef CONFIG_SCHED_PDS
-+#define __tsk_deadline(p)	((p)->priodl)
-+#endif
-+
-+#else
-+
-+#define __tsk_deadline(p)	((p)->dl.deadline)
-+
- /*
-  * SCHED_DEADLINE tasks has negative priorities, reflecting
-  * the fact that any of them has higher prio than RT and
-@@ -19,6 +38,7 @@ static inline int dl_task(struct task_struct *p)
- {
- 	return dl_prio(p->prio);
- }
-+#endif /* CONFIG_SCHED_ALT */
- 
- static inline bool dl_time_before(u64 a, u64 b)
- {
-diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h
-index 7d64feafc408..42730d27ceb5 100644
---- a/include/linux/sched/prio.h
-+++ b/include/linux/sched/prio.h
-@@ -20,11 +20,20 @@
-  */
- 
- #define MAX_USER_RT_PRIO	100
-+
- #define MAX_RT_PRIO		MAX_USER_RT_PRIO
- 
- #define MAX_PRIO		(MAX_RT_PRIO + NICE_WIDTH)
- #define DEFAULT_PRIO		(MAX_RT_PRIO + NICE_WIDTH / 2)
- 
-+/* +/- priority levels from the base priority */
-+#ifdef CONFIG_SCHED_BMQ
-+#define MAX_PRIORITY_ADJ	7
-+#endif
-+#ifdef CONFIG_SCHED_PDS
-+#define MAX_PRIORITY_ADJ	0
-+#endif
-+
- /*
-  * Convert user-nice values [ -20 ... 0 ... 19 ]
-  * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
-diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
-index e5af028c08b4..0a7565d0d3cf 100644
---- a/include/linux/sched/rt.h
-+++ b/include/linux/sched/rt.h
-@@ -24,8 +24,10 @@ static inline bool task_is_realtime(struct task_struct *tsk)
- 
- 	if (policy == SCHED_FIFO || policy == SCHED_RR)
- 		return true;
-+#ifndef CONFIG_SCHED_ALT
- 	if (policy == SCHED_DEADLINE)
- 		return true;
-+#endif
- 	return false;
- }
- 
-diff --git a/include/linux/skip_list.h b/include/linux/skip_list.h
-new file mode 100644
-index 000000000000..47ca955a451d
---- /dev/null
-+++ b/include/linux/skip_list.h
-@@ -0,0 +1,177 @@
-+/*
-+ * Copyright (C) 2016 Alfred Chen.
-+ *
-+ * Code based on Con Kolivas's skip list implementation for BFS, and
-+ * which is based on example originally by William Pugh.
-+ *
-+ * Skip Lists are a probabilistic alternative to balanced trees, as
-+ * described in the June 1990 issue of CACM and were invented by
-+ * William Pugh in 1987.
-+ *
-+ * A couple of comments about this implementation:
-+ *
-+ * This file only provides a infrastructure of skip list.
-+ *
-+ * skiplist_node is embedded into container data structure, to get rid
-+ * the dependency of kmalloc/kfree operation in scheduler code.
-+ *
-+ * A customized search function should be defined using DEFINE_SKIPLIST_INSERT
-+ * macro and be used for skip list insert operation.
-+ *
-+ * Random Level is also not defined in this file, instead, it should be
-+ * customized implemented and set to node->level then pass to the customized
-+ * skiplist_insert function.
-+ *
-+ * Levels start at zero and go up to (NUM_SKIPLIST_LEVEL -1)
-+ *
-+ * NUM_SKIPLIST_LEVEL in this implementation is 8 instead of origin 16,
-+ * considering that there will be 256 entries to enable the top level when using
-+ * random level p=0.5, and that number is more than enough for a run queue usage
-+ * in a scheduler usage. And it also help to reduce the memory usage of the
-+ * embedded skip list node in task_struct to about 50%.
-+ *
-+ * The insertion routine has been implemented so as to use the
-+ * dirty hack described in the CACM paper: if a random level is
-+ * generated that is more than the current maximum level, the
-+ * current maximum level plus one is used instead.
-+ *
-+ * BFS Notes: In this implementation of skiplists, there are bidirectional
-+ * next/prev pointers and the insert function returns a pointer to the actual
-+ * node the value is stored. The key here is chosen by the scheduler so as to
-+ * sort tasks according to the priority list requirements and is no longer used
-+ * by the scheduler after insertion. The scheduler lookup, however, occurs in
-+ * O(1) time because it is always the first item in the level 0 linked list.
-+ * Since the task struct stores a copy of the node pointer upon skiplist_insert,
-+ * it can also remove it much faster than the original implementation with the
-+ * aid of prev<->next pointer manipulation and no searching.
-+ */
-+#ifndef _LINUX_SKIP_LIST_H
-+#define _LINUX_SKIP_LIST_H
-+
-+#include <linux/kernel.h>
-+
-+#define NUM_SKIPLIST_LEVEL (8)
-+
-+struct skiplist_node {
-+	int level;	/* Levels in this node */
-+	struct skiplist_node *next[NUM_SKIPLIST_LEVEL];
-+	struct skiplist_node *prev[NUM_SKIPLIST_LEVEL];
-+};
-+
-+#define SKIPLIST_NODE_INIT(name) { 0,\
-+				   {&name, &name, &name, &name,\
-+				    &name, &name, &name, &name},\
-+				   {&name, &name, &name, &name,\
-+				    &name, &name, &name, &name},\
-+				 }
-+
-+static inline void INIT_SKIPLIST_NODE(struct skiplist_node *node)
-+{
-+	/* only level 0 ->next matters in skiplist_empty() */
-+	WRITE_ONCE(node->next[0], node);
-+}
-+
-+/**
-+ * FULL_INIT_SKIPLIST_NODE -- fully init a skiplist_node, expecially for header
-+ * @node: the skip list node to be inited.
-+ */
-+static inline void FULL_INIT_SKIPLIST_NODE(struct skiplist_node *node)
-+{
-+	int i;
-+
-+	node->level = 0;
-+	for (i = 0; i < NUM_SKIPLIST_LEVEL; i++) {
-+		WRITE_ONCE(node->next[i], node);
-+		node->prev[i] = node;
-+	}
-+}
-+
-+/**
-+ * skiplist_empty - test whether a skip list is empty
-+ * @head: the skip list to test.
-+ */
-+static inline int skiplist_empty(const struct skiplist_node *head)
-+{
-+	return READ_ONCE(head->next[0]) == head;
-+}
-+
-+/**
-+ * skiplist_entry - get the struct for this entry
-+ * @ptr: the &struct skiplist_node pointer.
-+ * @type:       the type of the struct this is embedded in.
-+ * @member:     the name of the skiplist_node within the struct.
-+ */
-+#define skiplist_entry(ptr, type, member) \
-+	container_of(ptr, type, member)
-+
-+/**
-+ * DEFINE_SKIPLIST_INSERT_FUNC -- macro to define a customized skip list insert
-+ * function, which takes two parameters, first one is the header node of the
-+ * skip list, second one is the skip list node to be inserted
-+ * @func_name: the customized skip list insert function name
-+ * @search_func: the search function to be used, which takes two parameters,
-+ * 1st one is the itrator of skiplist_node in the list, the 2nd is the skip list
-+ * node to be inserted, the function should return true if search should be
-+ * continued, otherwise return false.
-+ * Returns 1 if @node is inserted as the first item of skip list at level zero,
-+ * otherwise 0
-+ */
-+#define DEFINE_SKIPLIST_INSERT_FUNC(func_name, search_func)\
-+static inline int func_name(struct skiplist_node *head, struct skiplist_node *node)\
-+{\
-+	struct skiplist_node *update[NUM_SKIPLIST_LEVEL];\
-+	struct skiplist_node *p, *q;\
-+	int k = head->level;\
-+\
-+	p = head;\
-+	do {\
-+		while (q = p->next[k], q != head && search_func(q, node))\
-+			p = q;\
-+		update[k] = p;\
-+	} while (--k >= 0);\
-+\
-+	k = node->level;\
-+	if (unlikely(k > head->level)) {\
-+		node->level = k = ++head->level;\
-+		update[k] = head;\
-+	}\
-+\
-+	do {\
-+		p = update[k];\
-+		q = p->next[k];\
-+		node->next[k] = q;\
-+		p->next[k] = node;\
-+		node->prev[k] = p;\
-+		q->prev[k] = node;\
-+	} while (--k >= 0);\
-+\
-+	return (p == head);\
-+}
-+
-+/**
-+ * skiplist_del_init -- delete skip list node from a skip list and reset it's
-+ * init state
-+ * @head: the header node of the skip list to be deleted from.
-+ * @node: the skip list node to be deleted, the caller need to ensure @node is
-+ * in skip list which @head represent.
-+ * Returns 1 if @node is the first item of skip level at level zero, otherwise 0
-+ */
-+static inline int
-+skiplist_del_init(struct skiplist_node *head, struct skiplist_node *node)
-+{
-+	int l, m = node->level;
-+
-+	for (l = 0; l <= m; l++) {
-+		node->prev[l]->next[l] = node->next[l];
-+		node->next[l]->prev[l] = node->prev[l];
-+	}
-+	if (m == head->level && m > 0) {
-+		while (head->next[m] == head && m > 0)
-+			m--;
-+		head->level = m;
-+	}
-+	INIT_SKIPLIST_NODE(node);
-+
-+	return (node->prev[0] == head);
-+}
-+#endif /* _LINUX_SKIP_LIST_H */
-diff --git a/init/Kconfig b/init/Kconfig
-index d6a0b31b13dc..2122dba5596f 100644
---- a/init/Kconfig
-+++ b/init/Kconfig
-@@ -770,9 +770,39 @@ config GENERIC_SCHED_CLOCK
- 
- menu "Scheduler features"
- 
-+menuconfig SCHED_ALT
-+	bool "Alternative CPU Schedulers"
-+	default y
-+	help
-+	  This feature enable alternative CPU scheduler"
-+
-+if SCHED_ALT
-+
-+choice
-+	prompt "Alternative CPU Scheduler"
-+	default SCHED_BMQ
-+
-+config SCHED_BMQ
-+	bool "BMQ CPU scheduler"
-+	help
-+	  The BitMap Queue CPU scheduler for excellent interactivity and
-+	  responsiveness on the desktop and solid scalability on normal
-+	  hardware and commodity servers.
-+
-+config SCHED_PDS
-+	bool "PDS CPU scheduler"
-+	help
-+	  The Priority and Deadline based Skip list multiple queue CPU
-+	  Scheduler.
-+
-+endchoice
-+
-+endif
-+
- config UCLAMP_TASK
- 	bool "Enable utilization clamping for RT/FAIR tasks"
- 	depends on CPU_FREQ_GOV_SCHEDUTIL
-+	depends on !SCHED_ALT
- 	help
- 	  This feature enables the scheduler to track the clamped utilization
- 	  of each CPU based on RUNNABLE tasks scheduled on that CPU.
-@@ -858,6 +888,7 @@ config NUMA_BALANCING
- 	depends on ARCH_SUPPORTS_NUMA_BALANCING
- 	depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY
- 	depends on SMP && NUMA && MIGRATION
-+	depends on !SCHED_ALT
- 	help
- 	  This option adds support for automatic NUMA aware memory/task placement.
- 	  The mechanism is quite primitive and is based on migrating memory when
-@@ -944,7 +975,7 @@ menuconfig CGROUP_SCHED
- 	  bandwidth allocation to such task groups. It uses cgroups to group
- 	  tasks.
- 
--if CGROUP_SCHED
-+if CGROUP_SCHED && !SCHED_ALT
- config FAIR_GROUP_SCHED
- 	bool "Group scheduling for SCHED_OTHER"
- 	depends on CGROUP_SCHED
-@@ -1200,6 +1231,7 @@ config CHECKPOINT_RESTORE
- 
- config SCHED_AUTOGROUP
- 	bool "Automatic process group scheduling"
-+	depends on !SCHED_ALT
- 	select CGROUPS
- 	select CGROUP_SCHED
- 	select FAIR_GROUP_SCHED
-diff --git a/init/init_task.c b/init/init_task.c
-index f6889fce64af..5a23122f3d2c 100644
---- a/init/init_task.c
-+++ b/init/init_task.c
-@@ -75,9 +75,15 @@ struct task_struct init_task
- 	.stack		= init_stack,
- 	.usage		= REFCOUNT_INIT(2),
- 	.flags		= PF_KTHREAD,
-+#ifdef CONFIG_SCHED_ALT
-+	.prio		= DEFAULT_PRIO + MAX_PRIORITY_ADJ,
-+	.static_prio	= DEFAULT_PRIO,
-+	.normal_prio	= DEFAULT_PRIO + MAX_PRIORITY_ADJ,
-+#else
- 	.prio		= MAX_PRIO - 20,
- 	.static_prio	= MAX_PRIO - 20,
- 	.normal_prio	= MAX_PRIO - 20,
-+#endif
- 	.policy		= SCHED_NORMAL,
- 	.cpus_ptr	= &init_task.cpus_mask,
- 	.cpus_mask	= CPU_MASK_ALL,
-@@ -87,6 +93,19 @@ struct task_struct init_task
- 	.restart_block	= {
- 		.fn = do_no_restart_syscall,
- 	},
-+#ifdef CONFIG_SCHED_ALT
-+#ifdef CONFIG_SCHED_BMQ
-+	.boost_prio	= 0,
-+	.bmq_idx	= 15,
-+	.bmq_node	= LIST_HEAD_INIT(init_task.bmq_node),
-+#endif
-+#ifdef CONFIG_SCHED_PDS
-+	.deadline	= 0,
-+	.sl_level	= 0,
-+	.sl_node	= SKIPLIST_NODE_INIT(init_task.sl_node),
-+#endif
-+	.time_slice	= HZ,
-+#else
- 	.se		= {
- 		.group_node 	= LIST_HEAD_INIT(init_task.se.group_node),
- 	},
-@@ -94,6 +113,7 @@ struct task_struct init_task
- 		.run_list	= LIST_HEAD_INIT(init_task.rt.run_list),
- 		.time_slice	= RR_TIMESLICE,
- 	},
-+#endif
- 	.tasks		= LIST_HEAD_INIT(init_task.tasks),
- #ifdef CONFIG_SMP
- 	.pushable_tasks	= PLIST_NODE_INIT(init_task.pushable_tasks, MAX_PRIO),
-diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
-index 642415b8c3c9..7e0e1fe18035 100644
---- a/kernel/cgroup/cpuset.c
-+++ b/kernel/cgroup/cpuset.c
-@@ -636,7 +636,7 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial)
- 	return ret;
- }
- 
--#ifdef CONFIG_SMP
-+#if defined(CONFIG_SMP) && !defined(CONFIG_SCHED_ALT)
- /*
-  * Helper routine for generate_sched_domains().
-  * Do cpusets a, b have overlapping effective cpus_allowed masks?
-@@ -1009,7 +1009,7 @@ static void rebuild_sched_domains_locked(void)
- 	/* Have scheduler rebuild the domains */
- 	partition_and_rebuild_sched_domains(ndoms, doms, attr);
- }
--#else /* !CONFIG_SMP */
-+#else /* !CONFIG_SMP || CONFIG_SCHED_ALT */
- static void rebuild_sched_domains_locked(void)
- {
- }
-diff --git a/kernel/delayacct.c b/kernel/delayacct.c
-index 27725754ac99..769d773c7182 100644
---- a/kernel/delayacct.c
-+++ b/kernel/delayacct.c
-@@ -106,7 +106,7 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
- 	 */
- 	t1 = tsk->sched_info.pcount;
- 	t2 = tsk->sched_info.run_delay;
--	t3 = tsk->se.sum_exec_runtime;
-+	t3 = tsk_seruntime(tsk);
- 
- 	d->cpu_count += t1;
- 
-diff --git a/kernel/exit.c b/kernel/exit.c
-index 733e80f334e7..3f3506c851fd 100644
---- a/kernel/exit.c
-+++ b/kernel/exit.c
-@@ -121,7 +121,7 @@ static void __exit_signal(struct task_struct *tsk)
- 			sig->curr_target = next_thread(tsk);
- 	}
- 
--	add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
-+	add_device_randomness((const void*) &tsk_seruntime(tsk),
- 			      sizeof(unsigned long long));
- 
- 	/*
-@@ -142,7 +142,7 @@ static void __exit_signal(struct task_struct *tsk)
- 	sig->inblock += task_io_get_inblock(tsk);
- 	sig->oublock += task_io_get_oublock(tsk);
- 	task_io_accounting_add(&sig->ioac, &tsk->ioac);
--	sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
-+	sig->sum_sched_runtime += tsk_seruntime(tsk);
- 	sig->nr_threads--;
- 	__unhash_process(tsk, group_dead);
- 	write_sequnlock(&sig->stats_lock);
-diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
-index f6310f848f34..4176ad070bc9 100644
---- a/kernel/livepatch/transition.c
-+++ b/kernel/livepatch/transition.c
-@@ -306,7 +306,11 @@ static bool klp_try_switch_task(struct task_struct *task)
- 	 */
- 	rq = task_rq_lock(task, &flags);
- 
-+#ifdef	CONFIG_SCHED_ALT
-+	if (task_running(task) && task != current) {
-+#else
- 	if (task_running(rq, task) && task != current) {
-+#endif
- 		snprintf(err_buf, STACK_ERR_BUF_SIZE,
- 			 "%s: %s:%d is running\n", __func__, task->comm,
- 			 task->pid);
-diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
-index cfdd5b93264d..84c284eb544a 100644
---- a/kernel/locking/rtmutex.c
-+++ b/kernel/locking/rtmutex.c
-@@ -227,15 +227,19 @@ static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
-  * Only use with rt_mutex_waiter_{less,equal}()
-  */
- #define task_to_waiter(p)	\
--	&(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline }
-+	&(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = __tsk_deadline(p) }
- 
- static inline int
- rt_mutex_waiter_less(struct rt_mutex_waiter *left,
- 		     struct rt_mutex_waiter *right)
- {
-+#ifdef CONFIG_SCHED_PDS
-+	return (left->deadline < right->deadline);
-+#else
- 	if (left->prio < right->prio)
- 		return 1;
- 
-+#ifndef CONFIG_SCHED_BMQ
- 	/*
- 	 * If both waiters have dl_prio(), we check the deadlines of the
- 	 * associated tasks.
-@@ -244,17 +248,23 @@ rt_mutex_waiter_less(struct rt_mutex_waiter *left,
- 	 */
- 	if (dl_prio(left->prio))
- 		return dl_time_before(left->deadline, right->deadline);
-+#endif
- 
- 	return 0;
-+#endif
- }
- 
- static inline int
- rt_mutex_waiter_equal(struct rt_mutex_waiter *left,
- 		      struct rt_mutex_waiter *right)
- {
-+#ifdef CONFIG_SCHED_PDS
-+	return (left->deadline == right->deadline);
-+#else
- 	if (left->prio != right->prio)
- 		return 0;
- 
-+#ifndef CONFIG_SCHED_BMQ
- 	/*
- 	 * If both waiters have dl_prio(), we check the deadlines of the
- 	 * associated tasks.
-@@ -263,8 +273,10 @@ rt_mutex_waiter_equal(struct rt_mutex_waiter *left,
- 	 */
- 	if (dl_prio(left->prio))
- 		return left->deadline == right->deadline;
-+#endif
- 
- 	return 1;
-+#endif
- }
- 
- static void
-@@ -678,7 +690,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
- 	 * the values of the node being removed.
- 	 */
- 	waiter->prio = task->prio;
--	waiter->deadline = task->dl.deadline;
-+	waiter->deadline = __tsk_deadline(task);
- 
- 	rt_mutex_enqueue(lock, waiter);
- 
-@@ -951,7 +963,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
- 	waiter->task = task;
- 	waiter->lock = lock;
- 	waiter->prio = task->prio;
--	waiter->deadline = task->dl.deadline;
-+	waiter->deadline = __tsk_deadline(task);
- 
- 	/* Get the top priority waiter on the lock */
- 	if (rt_mutex_has_waiters(lock))
-diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
-index 5fc9c9b70862..eb6d7d87779f 100644
---- a/kernel/sched/Makefile
-+++ b/kernel/sched/Makefile
-@@ -22,14 +22,20 @@ ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
- CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
- endif
- 
--obj-y += core.o loadavg.o clock.o cputime.o
--obj-y += idle.o fair.o rt.o deadline.o
--obj-y += wait.o wait_bit.o swait.o completion.o
--
--obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o
-+ifdef CONFIG_SCHED_ALT
-+obj-y += alt_core.o alt_debug.o
-+else
-+obj-y += core.o
-+obj-y += fair.o rt.o deadline.o
-+obj-$(CONFIG_SMP) += cpudeadline.o stop_task.o
- obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
--obj-$(CONFIG_SCHEDSTATS) += stats.o
- obj-$(CONFIG_SCHED_DEBUG) += debug.o
-+endif
-+obj-y += loadavg.o clock.o cputime.o
-+obj-y += idle.o
-+obj-y += wait.o wait_bit.o swait.o completion.o
-+obj-$(CONFIG_SMP) += cpupri.o pelt.o topology.o
-+obj-$(CONFIG_SCHEDSTATS) += stats.o
- obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
- obj-$(CONFIG_CPU_FREQ) += cpufreq.o
- obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o
-diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c
-new file mode 100644
-index 000000000000..f36264fea75c
---- /dev/null
-+++ b/kernel/sched/alt_core.c
-@@ -0,0 +1,6360 @@
-+/*
-+ *  kernel/sched/alt_core.c
-+ *
-+ *  Core alternative kernel scheduler code and related syscalls
-+ *
-+ *  Copyright (C) 1991-2002  Linus Torvalds
-+ *
-+ *  2009-08-13	Brainfuck deadline scheduling policy by Con Kolivas deletes
-+ *		a whole lot of those previous things.
-+ *  2017-09-06	Priority and Deadline based Skip list multiple queue kernel
-+ *		scheduler by Alfred Chen.
-+ *  2019-02-20	BMQ(BitMap Queue) kernel scheduler by Alfred Chen.
-+ */
-+#include "sched.h"
-+
-+#include <linux/sched/rt.h>
-+
-+#include <linux/context_tracking.h>
-+#include <linux/compat.h>
-+#include <linux/blkdev.h>
-+#include <linux/delayacct.h>
-+#include <linux/freezer.h>
-+#include <linux/init_task.h>
-+#include <linux/kprobes.h>
-+#include <linux/mmu_context.h>
-+#include <linux/nmi.h>
-+#include <linux/profile.h>
-+#include <linux/rcupdate_wait.h>
-+#include <linux/security.h>
-+#include <linux/syscalls.h>
-+#include <linux/wait_bit.h>
-+
-+#include <linux/kcov.h>
-+#include <linux/scs.h>
-+
-+#include <asm/switch_to.h>
-+
-+#include "../workqueue_internal.h"
-+#include "../../fs/io-wq.h"
-+#include "../smpboot.h"
-+
-+#include "pelt.h"
-+#include "smp.h"
-+
-+#define CREATE_TRACE_POINTS
-+#include <trace/events/sched.h>
-+
-+#define ALT_SCHED_VERSION "v5.9-r0"
-+
-+/* rt_prio(prio) defined in include/linux/sched/rt.h */
-+#define rt_task(p)		rt_prio((p)->prio)
-+#define rt_policy(policy)	((policy) == SCHED_FIFO || (policy) == SCHED_RR)
-+#define task_has_rt_policy(p)	(rt_policy((p)->policy))
-+
-+#define STOP_PRIO		(MAX_RT_PRIO - 1)
-+
-+/* Default time slice is 4 in ms, can be set via kernel parameter "sched_timeslice" */
-+u64 sched_timeslice_ns __read_mostly = (4 * 1000 * 1000);
-+
-+static int __init sched_timeslice(char *str)
-+{
-+	int timeslice_us;
-+
-+	get_option(&str, &timeslice_us);
-+	if (timeslice_us >= 1000)
-+		sched_timeslice_ns = timeslice_us * 1000;
-+
-+	return 0;
-+}
-+early_param("sched_timeslice", sched_timeslice);
-+
-+/* Reschedule if less than this many μs left */
-+#define RESCHED_NS		(100 * 1000)
-+
-+/**
-+ * sched_yield_type - Choose what sort of yield sched_yield will perform.
-+ * 0: No yield.
-+ * 1: Deboost and requeue task. (default)
-+ * 2: Set rq skip task.
-+ */
-+int sched_yield_type __read_mostly = 1;
-+
-+#ifdef CONFIG_SMP
-+static cpumask_t sched_rq_pending_mask ____cacheline_aligned_in_smp;
-+
-+DEFINE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_CHK_LEVEL], sched_cpu_affinity_masks);
-+DEFINE_PER_CPU(cpumask_t *, sched_cpu_affinity_end_mask);
-+DEFINE_PER_CPU(cpumask_t *, sched_cpu_llc_mask);
-+
-+#ifdef CONFIG_SCHED_SMT
-+DEFINE_STATIC_KEY_FALSE(sched_smt_present);
-+EXPORT_SYMBOL_GPL(sched_smt_present);
-+#endif
-+
-+/*
-+ * Keep a unique ID per domain (we use the first CPUs number in the cpumask of
-+ * the domain), this allows us to quickly tell if two cpus are in the same cache
-+ * domain, see cpus_share_cache().
-+ */
-+DEFINE_PER_CPU(int, sd_llc_id);
-+#endif /* CONFIG_SMP */
-+
-+static DEFINE_MUTEX(sched_hotcpu_mutex);
-+
-+DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
-+
-+#ifndef prepare_arch_switch
-+# define prepare_arch_switch(next)	do { } while (0)
-+#endif
-+#ifndef finish_arch_post_lock_switch
-+# define finish_arch_post_lock_switch()	do { } while (0)
-+#endif
-+
-+#define IDLE_WM	(IDLE_TASK_SCHED_PRIO)
-+
-+#ifdef CONFIG_SCHED_SMT
-+static cpumask_t sched_sg_idle_mask ____cacheline_aligned_in_smp;
-+#endif
-+static cpumask_t sched_rq_watermark[SCHED_BITS] ____cacheline_aligned_in_smp;
-+
-+#ifdef CONFIG_SCHED_BMQ
-+#include "bmq_imp.h"
-+#endif
-+#ifdef CONFIG_SCHED_PDS
-+#include "pds_imp.h"
-+#endif
-+
-+static inline void update_sched_rq_watermark(struct rq *rq)
-+{
-+	unsigned long watermark = sched_queue_watermark(rq);
-+	unsigned long last_wm = rq->watermark;
-+	unsigned long i;
-+	int cpu;
-+
-+	/*printk(KERN_INFO "sched: watermark(%d) %d, last %d\n",
-+	       cpu_of(rq), watermark, last_wm);*/
-+	if (watermark == last_wm)
-+		return;
-+
-+	rq->watermark = watermark;
-+	cpu = cpu_of(rq);
-+	if (watermark < last_wm) {
-+		for (i = watermark + 1; i <= last_wm; i++)
-+			cpumask_andnot(&sched_rq_watermark[i],
-+				       &sched_rq_watermark[i], cpumask_of(cpu));
-+#ifdef CONFIG_SCHED_SMT
-+		if (!static_branch_likely(&sched_smt_present))
-+			return;
-+		if (IDLE_WM == last_wm)
-+			cpumask_andnot(&sched_sg_idle_mask,
-+				       &sched_sg_idle_mask, cpu_smt_mask(cpu));
-+#endif
-+		return;
-+	}
-+	/* last_wm < watermark */
-+	for (i = last_wm + 1; i <= watermark; i++)
-+		cpumask_set_cpu(cpu, &sched_rq_watermark[i]);
-+#ifdef CONFIG_SCHED_SMT
-+	if (!static_branch_likely(&sched_smt_present))
-+		return;
-+	if (IDLE_WM == watermark) {
-+		cpumask_t tmp;
-+		cpumask_and(&tmp, cpu_smt_mask(cpu), &sched_rq_watermark[IDLE_WM]);
-+		if (cpumask_equal(&tmp, cpu_smt_mask(cpu)))
-+			cpumask_or(&sched_sg_idle_mask, cpu_smt_mask(cpu),
-+				   &sched_sg_idle_mask);
-+	}
-+#endif
-+}
-+
-+static inline struct task_struct *rq_runnable_task(struct rq *rq)
-+{
-+	struct task_struct *next = sched_rq_first_task(rq);
-+
-+	if (unlikely(next == rq->skip))
-+		next = sched_rq_next_task(next, rq);
-+
-+	return next;
-+}
-+
-+/*
-+ * Serialization rules:
-+ *
-+ * Lock order:
-+ *
-+ *   p->pi_lock
-+ *     rq->lock
-+ *       hrtimer_cpu_base->lock (hrtimer_start() for bandwidth controls)
-+ *
-+ *  rq1->lock
-+ *    rq2->lock  where: rq1 < rq2
-+ *
-+ * Regular state:
-+ *
-+ * Normal scheduling state is serialized by rq->lock. __schedule() takes the
-+ * local CPU's rq->lock, it optionally removes the task from the runqueue and
-+ * always looks at the local rq data structures to find the most elegible task
-+ * to run next.
-+ *
-+ * Task enqueue is also under rq->lock, possibly taken from another CPU.
-+ * Wakeups from another LLC domain might use an IPI to transfer the enqueue to
-+ * the local CPU to avoid bouncing the runqueue state around [ see
-+ * ttwu_queue_wakelist() ]
-+ *
-+ * Task wakeup, specifically wakeups that involve migration, are horribly
-+ * complicated to avoid having to take two rq->locks.
-+ *
-+ * Special state:
-+ *
-+ * System-calls and anything external will use task_rq_lock() which acquires
-+ * both p->pi_lock and rq->lock. As a consequence the state they change is
-+ * stable while holding either lock:
-+ *
-+ *  - sched_setaffinity()/
-+ *    set_cpus_allowed_ptr():	p->cpus_ptr, p->nr_cpus_allowed
-+ *  - set_user_nice():		p->se.load, p->*prio
-+ *  - __sched_setscheduler():	p->sched_class, p->policy, p->*prio,
-+ *				p->se.load, p->rt_priority,
-+ *				p->dl.dl_{runtime, deadline, period, flags, bw, density}
-+ *  - sched_setnuma():		p->numa_preferred_nid
-+ *  - sched_move_task()/
-+ *    cpu_cgroup_fork():	p->sched_task_group
-+ *  - uclamp_update_active()	p->uclamp*
-+ *
-+ * p->state <- TASK_*:
-+ *
-+ *   is changed locklessly using set_current_state(), __set_current_state() or
-+ *   set_special_state(), see their respective comments, or by
-+ *   try_to_wake_up(). This latter uses p->pi_lock to serialize against
-+ *   concurrent self.
-+ *
-+ * p->on_rq <- { 0, 1 = TASK_ON_RQ_QUEUED, 2 = TASK_ON_RQ_MIGRATING }:
-+ *
-+ *   is set by activate_task() and cleared by deactivate_task(), under
-+ *   rq->lock. Non-zero indicates the task is runnable, the special
-+ *   ON_RQ_MIGRATING state is used for migration without holding both
-+ *   rq->locks. It indicates task_cpu() is not stable, see task_rq_lock().
-+ *
-+ * p->on_cpu <- { 0, 1 }:
-+ *
-+ *   is set by prepare_task() and cleared by finish_task() such that it will be
-+ *   set before p is scheduled-in and cleared after p is scheduled-out, both
-+ *   under rq->lock. Non-zero indicates the task is running on its CPU.
-+ *
-+ *   [ The astute reader will observe that it is possible for two tasks on one
-+ *     CPU to have ->on_cpu = 1 at the same time. ]
-+ *
-+ * task_cpu(p): is changed by set_task_cpu(), the rules are:
-+ *
-+ *  - Don't call set_task_cpu() on a blocked task:
-+ *
-+ *    We don't care what CPU we're not running on, this simplifies hotplug,
-+ *    the CPU assignment of blocked tasks isn't required to be valid.
-+ *
-+ *  - for try_to_wake_up(), called under p->pi_lock:
-+ *
-+ *    This allows try_to_wake_up() to only take one rq->lock, see its comment.
-+ *
-+ *  - for migration called under rq->lock:
-+ *    [ see task_on_rq_migrating() in task_rq_lock() ]
-+ *
-+ *    o move_queued_task()
-+ *    o detach_task()
-+ *
-+ *  - for migration called under double_rq_lock():
-+ *
-+ *    o __migrate_swap_task()
-+ *    o push_rt_task() / pull_rt_task()
-+ *    o push_dl_task() / pull_dl_task()
-+ *    o dl_task_offline_migration()
-+ *
-+ */
-+
-+/*
-+ * Context: p->pi_lock
-+ */
-+static inline struct rq
-+*__task_access_lock(struct task_struct *p, raw_spinlock_t **plock)
-+{
-+	struct rq *rq;
-+	for (;;) {
-+		rq = task_rq(p);
-+		if (p->on_cpu || task_on_rq_queued(p)) {
-+			raw_spin_lock(&rq->lock);
-+			if (likely((p->on_cpu || task_on_rq_queued(p))
-+				   && rq == task_rq(p))) {
-+				*plock = &rq->lock;
-+				return rq;
-+			}
-+			raw_spin_unlock(&rq->lock);
-+		} else if (task_on_rq_migrating(p)) {
-+			do {
-+				cpu_relax();
-+			} while (unlikely(task_on_rq_migrating(p)));
-+		} else {
-+			*plock = NULL;
-+			return rq;
-+		}
-+	}
-+}
-+
-+static inline void
-+__task_access_unlock(struct task_struct *p, raw_spinlock_t *lock)
-+{
-+	if (NULL != lock)
-+		raw_spin_unlock(lock);
-+}
-+
-+static inline struct rq
-+*task_access_lock_irqsave(struct task_struct *p, raw_spinlock_t **plock,
-+			  unsigned long *flags)
-+{
-+	struct rq *rq;
-+	for (;;) {
-+		rq = task_rq(p);
-+		if (p->on_cpu || task_on_rq_queued(p)) {
-+			raw_spin_lock_irqsave(&rq->lock, *flags);
-+			if (likely((p->on_cpu || task_on_rq_queued(p))
-+				   && rq == task_rq(p))) {
-+				*plock = &rq->lock;
-+				return rq;
-+			}
-+			raw_spin_unlock_irqrestore(&rq->lock, *flags);
-+		} else if (task_on_rq_migrating(p)) {
-+			do {
-+				cpu_relax();
-+			} while (unlikely(task_on_rq_migrating(p)));
-+		} else {
-+			raw_spin_lock_irqsave(&p->pi_lock, *flags);
-+			if (likely(!p->on_cpu && !p->on_rq &&
-+				   rq == task_rq(p))) {
-+				*plock = &p->pi_lock;
-+				return rq;
-+			}
-+			raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
-+		}
-+	}
-+}
-+
-+static inline void
-+task_access_unlock_irqrestore(struct task_struct *p, raw_spinlock_t *lock,
-+			      unsigned long *flags)
-+{
-+	raw_spin_unlock_irqrestore(lock, *flags);
-+}
-+
-+/*
-+ * __task_rq_lock - lock the rq @p resides on.
-+ */
-+struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
-+	__acquires(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	lockdep_assert_held(&p->pi_lock);
-+
-+	for (;;) {
-+		rq = task_rq(p);
-+		raw_spin_lock(&rq->lock);
-+		if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
-+			return rq;
-+		raw_spin_unlock(&rq->lock);
-+
-+		while (unlikely(task_on_rq_migrating(p)))
-+			cpu_relax();
-+	}
-+}
-+
-+/*
-+ * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
-+ */
-+struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
-+	__acquires(p->pi_lock)
-+	__acquires(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	for (;;) {
-+		raw_spin_lock_irqsave(&p->pi_lock, rf->flags);
-+		rq = task_rq(p);
-+		raw_spin_lock(&rq->lock);
-+		/*
-+		 *	move_queued_task()		task_rq_lock()
-+		 *
-+		 *	ACQUIRE (rq->lock)
-+		 *	[S] ->on_rq = MIGRATING		[L] rq = task_rq()
-+		 *	WMB (__set_task_cpu())		ACQUIRE (rq->lock);
-+		 *	[S] ->cpu = new_cpu		[L] task_rq()
-+		 *					[L] ->on_rq
-+		 *	RELEASE (rq->lock)
-+		 *
-+		 * If we observe the old CPU in task_rq_lock(), the acquire of
-+		 * the old rq->lock will fully serialize against the stores.
-+		 *
-+		 * If we observe the new CPU in task_rq_lock(), the address
-+		 * dependency headed by '[L] rq = task_rq()' and the acquire
-+		 * will pair with the WMB to ensure we then also see migrating.
-+		 */
-+		if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
-+			return rq;
-+		}
-+		raw_spin_unlock(&rq->lock);
-+		raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
-+
-+		while (unlikely(task_on_rq_migrating(p)))
-+			cpu_relax();
-+	}
-+}
-+
-+static inline void
-+rq_lock_irqsave(struct rq *rq, struct rq_flags *rf)
-+	__acquires(rq->lock)
-+{
-+	raw_spin_lock_irqsave(&rq->lock, rf->flags);
-+}
-+
-+static inline void
-+rq_unlock_irqrestore(struct rq *rq, struct rq_flags *rf)
-+	__releases(rq->lock)
-+{
-+	raw_spin_unlock_irqrestore(&rq->lock, rf->flags);
-+}
-+
-+/*
-+ * RQ-clock updating methods:
-+ */
-+
-+static void update_rq_clock_task(struct rq *rq, s64 delta)
-+{
-+/*
-+ * In theory, the compile should just see 0 here, and optimize out the call
-+ * to sched_rt_avg_update. But I don't trust it...
-+ */
-+	s64 __maybe_unused steal = 0, irq_delta = 0;
-+
-+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-+	irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
-+
-+	/*
-+	 * Since irq_time is only updated on {soft,}irq_exit, we might run into
-+	 * this case when a previous update_rq_clock() happened inside a
-+	 * {soft,}irq region.
-+	 *
-+	 * When this happens, we stop ->clock_task and only update the
-+	 * prev_irq_time stamp to account for the part that fit, so that a next
-+	 * update will consume the rest. This ensures ->clock_task is
-+	 * monotonic.
-+	 *
-+	 * It does however cause some slight miss-attribution of {soft,}irq
-+	 * time, a more accurate solution would be to update the irq_time using
-+	 * the current rq->clock timestamp, except that would require using
-+	 * atomic ops.
-+	 */
-+	if (irq_delta > delta)
-+		irq_delta = delta;
-+
-+	rq->prev_irq_time += irq_delta;
-+	delta -= irq_delta;
-+#endif
-+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
-+	if (static_key_false((&paravirt_steal_rq_enabled))) {
-+		steal = paravirt_steal_clock(cpu_of(rq));
-+		steal -= rq->prev_steal_time_rq;
-+
-+		if (unlikely(steal > delta))
-+			steal = delta;
-+
-+		rq->prev_steal_time_rq += steal;
-+		delta -= steal;
-+	}
-+#endif
-+
-+	rq->clock_task += delta;
-+
-+#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
-+	if ((irq_delta + steal))
-+		update_irq_load_avg(rq, irq_delta + steal);
-+#endif
-+}
-+
-+static inline void update_rq_clock(struct rq *rq)
-+{
-+	s64 delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
-+
-+	if (unlikely(delta <= 0))
-+		return;
-+	rq->clock += delta;
-+	update_rq_clock_task(rq, delta);
-+}
-+
-+#ifdef CONFIG_NO_HZ_FULL
-+/*
-+ * Tick may be needed by tasks in the runqueue depending on their policy and
-+ * requirements. If tick is needed, lets send the target an IPI to kick it out
-+ * of nohz mode if necessary.
-+ */
-+static inline void sched_update_tick_dependency(struct rq *rq)
-+{
-+	int cpu = cpu_of(rq);
-+
-+	if (!tick_nohz_full_cpu(cpu))
-+		return;
-+
-+	if (rq->nr_running < 2)
-+		tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED);
-+	else
-+		tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
-+}
-+#else /* !CONFIG_NO_HZ_FULL */
-+static inline void sched_update_tick_dependency(struct rq *rq) { }
-+#endif
-+
-+/*
-+ * Add/Remove/Requeue task to/from the runqueue routines
-+ * Context: rq->lock
-+ */
-+static inline void dequeue_task(struct task_struct *p, struct rq *rq, int flags)
-+{
-+	lockdep_assert_held(&rq->lock);
-+
-+	/*printk(KERN_INFO "sched: dequeue(%d) %px %016llx\n", cpu_of(rq), p, p->priodl);*/
-+	WARN_ONCE(task_rq(p) != rq, "sched: dequeue task reside on cpu%d from cpu%d\n",
-+		  task_cpu(p), cpu_of(rq));
-+
-+	__SCHED_DEQUEUE_TASK(p, rq, flags, update_sched_rq_watermark(rq));
-+	--rq->nr_running;
-+#ifdef CONFIG_SMP
-+	if (1 == rq->nr_running)
-+		cpumask_clear_cpu(cpu_of(rq), &sched_rq_pending_mask);
-+#endif
-+
-+	sched_update_tick_dependency(rq);
-+}
-+
-+static inline void enqueue_task(struct task_struct *p, struct rq *rq, int flags)
-+{
-+	lockdep_assert_held(&rq->lock);
-+
-+	/*printk(KERN_INFO "sched: enqueue(%d) %px %016llx\n", cpu_of(rq), p, p->priodl);*/
-+	WARN_ONCE(task_rq(p) != rq, "sched: enqueue task reside on cpu%d to cpu%d\n",
-+		  task_cpu(p), cpu_of(rq));
-+
-+	__SCHED_ENQUEUE_TASK(p, rq, flags);
-+	update_sched_rq_watermark(rq);
-+	++rq->nr_running;
-+#ifdef CONFIG_SMP
-+	if (2 == rq->nr_running)
-+		cpumask_set_cpu(cpu_of(rq), &sched_rq_pending_mask);
-+#endif
-+
-+	sched_update_tick_dependency(rq);
-+
-+	/*
-+	 * If in_iowait is set, the code below may not trigger any cpufreq
-+	 * utilization updates, so do it here explicitly with the IOWAIT flag
-+	 * passed.
-+	 */
-+	if (p->in_iowait)
-+		cpufreq_update_util(rq, SCHED_CPUFREQ_IOWAIT);
-+}
-+
-+static inline void requeue_task(struct task_struct *p, struct rq *rq)
-+{
-+	lockdep_assert_held(&rq->lock);
-+	/*printk(KERN_INFO "sched: requeue(%d) %px %016llx\n", cpu_of(rq), p, p->priodl);*/
-+	WARN_ONCE(task_rq(p) != rq, "sched: cpu[%d] requeue task reside on cpu%d\n",
-+		  cpu_of(rq), task_cpu(p));
-+
-+	__SCHED_REQUEUE_TASK(p, rq, update_sched_rq_watermark(rq));
-+}
-+
-+/*
-+ * cmpxchg based fetch_or, macro so it works for different integer types
-+ */
-+#define fetch_or(ptr, mask)						\
-+	({								\
-+		typeof(ptr) _ptr = (ptr);				\
-+		typeof(mask) _mask = (mask);				\
-+		typeof(*_ptr) _old, _val = *_ptr;			\
-+									\
-+		for (;;) {						\
-+			_old = cmpxchg(_ptr, _val, _val | _mask);	\
-+			if (_old == _val)				\
-+				break;					\
-+			_val = _old;					\
-+		}							\
-+	_old;								\
-+})
-+
-+#if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
-+/*
-+ * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG,
-+ * this avoids any races wrt polling state changes and thereby avoids
-+ * spurious IPIs.
-+ */
-+static bool set_nr_and_not_polling(struct task_struct *p)
-+{
-+	struct thread_info *ti = task_thread_info(p);
-+	return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG);
-+}
-+
-+/*
-+ * Atomically set TIF_NEED_RESCHED if TIF_POLLING_NRFLAG is set.
-+ *
-+ * If this returns true, then the idle task promises to call
-+ * sched_ttwu_pending() and reschedule soon.
-+ */
-+static bool set_nr_if_polling(struct task_struct *p)
-+{
-+	struct thread_info *ti = task_thread_info(p);
-+	typeof(ti->flags) old, val = READ_ONCE(ti->flags);
-+
-+	for (;;) {
-+		if (!(val & _TIF_POLLING_NRFLAG))
-+			return false;
-+		if (val & _TIF_NEED_RESCHED)
-+			return true;
-+		old = cmpxchg(&ti->flags, val, val | _TIF_NEED_RESCHED);
-+		if (old == val)
-+			break;
-+		val = old;
-+	}
-+	return true;
-+}
-+
-+#else
-+static bool set_nr_and_not_polling(struct task_struct *p)
-+{
-+	set_tsk_need_resched(p);
-+	return true;
-+}
-+
-+#ifdef CONFIG_SMP
-+static bool set_nr_if_polling(struct task_struct *p)
-+{
-+	return false;
-+}
-+#endif
-+#endif
-+
-+static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task)
-+{
-+	struct wake_q_node *node = &task->wake_q;
-+
-+	/*
-+	 * Atomically grab the task, if ->wake_q is !nil already it means
-+	 * its already queued (either by us or someone else) and will get the
-+	 * wakeup due to that.
-+	 *
-+	 * In order to ensure that a pending wakeup will observe our pending
-+	 * state, even in the failed case, an explicit smp_mb() must be used.
-+	 */
-+	smp_mb__before_atomic();
-+	if (unlikely(cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL)))
-+		return false;
-+
-+	/*
-+	 * The head is context local, there can be no concurrency.
-+	 */
-+	*head->lastp = node;
-+	head->lastp = &node->next;
-+	return true;
-+}
-+
-+/**
-+ * wake_q_add() - queue a wakeup for 'later' waking.
-+ * @head: the wake_q_head to add @task to
-+ * @task: the task to queue for 'later' wakeup
-+ *
-+ * Queue a task for later wakeup, most likely by the wake_up_q() call in the
-+ * same context, _HOWEVER_ this is not guaranteed, the wakeup can come
-+ * instantly.
-+ *
-+ * This function must be used as-if it were wake_up_process(); IOW the task
-+ * must be ready to be woken at this location.
-+ */
-+void wake_q_add(struct wake_q_head *head, struct task_struct *task)
-+{
-+	if (__wake_q_add(head, task))
-+		get_task_struct(task);
-+}
-+
-+/**
-+ * wake_q_add_safe() - safely queue a wakeup for 'later' waking.
-+ * @head: the wake_q_head to add @task to
-+ * @task: the task to queue for 'later' wakeup
-+ *
-+ * Queue a task for later wakeup, most likely by the wake_up_q() call in the
-+ * same context, _HOWEVER_ this is not guaranteed, the wakeup can come
-+ * instantly.
-+ *
-+ * This function must be used as-if it were wake_up_process(); IOW the task
-+ * must be ready to be woken at this location.
-+ *
-+ * This function is essentially a task-safe equivalent to wake_q_add(). Callers
-+ * that already hold reference to @task can call the 'safe' version and trust
-+ * wake_q to do the right thing depending whether or not the @task is already
-+ * queued for wakeup.
-+ */
-+void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task)
-+{
-+	if (!__wake_q_add(head, task))
-+		put_task_struct(task);
-+}
-+
-+void wake_up_q(struct wake_q_head *head)
-+{
-+	struct wake_q_node *node = head->first;
-+
-+	while (node != WAKE_Q_TAIL) {
-+		struct task_struct *task;
-+
-+		task = container_of(node, struct task_struct, wake_q);
-+		BUG_ON(!task);
-+		/* task can safely be re-inserted now: */
-+		node = node->next;
-+		task->wake_q.next = NULL;
-+
-+		/*
-+		 * wake_up_process() executes a full barrier, which pairs with
-+		 * the queueing in wake_q_add() so as not to miss wakeups.
-+		 */
-+		wake_up_process(task);
-+		put_task_struct(task);
-+	}
-+}
-+
-+/*
-+ * resched_curr - mark rq's current task 'to be rescheduled now'.
-+ *
-+ * On UP this means the setting of the need_resched flag, on SMP it
-+ * might also involve a cross-CPU call to trigger the scheduler on
-+ * the target CPU.
-+ */
-+void resched_curr(struct rq *rq)
-+{
-+	struct task_struct *curr = rq->curr;
-+	int cpu;
-+
-+	lockdep_assert_held(&rq->lock);
-+
-+	if (test_tsk_need_resched(curr))
-+		return;
-+
-+	cpu = cpu_of(rq);
-+	if (cpu == smp_processor_id()) {
-+		set_tsk_need_resched(curr);
-+		set_preempt_need_resched();
-+		return;
-+	}
-+
-+	if (set_nr_and_not_polling(curr))
-+		smp_send_reschedule(cpu);
-+	else
-+		trace_sched_wake_idle_without_ipi(cpu);
-+}
-+
-+void resched_cpu(int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	raw_spin_lock_irqsave(&rq->lock, flags);
-+	if (cpu_online(cpu) || cpu == smp_processor_id())
-+		resched_curr(cpu_rq(cpu));
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+}
-+
-+#ifdef CONFIG_SMP
-+#ifdef CONFIG_NO_HZ_COMMON
-+void nohz_balance_enter_idle(int cpu) {}
-+
-+void select_nohz_load_balancer(int stop_tick) {}
-+
-+void set_cpu_sd_state_idle(void) {}
-+
-+/*
-+ * In the semi idle case, use the nearest busy CPU for migrating timers
-+ * from an idle CPU.  This is good for power-savings.
-+ *
-+ * We don't do similar optimization for completely idle system, as
-+ * selecting an idle CPU will add more delays to the timers than intended
-+ * (as that CPU's timer base may not be uptodate wrt jiffies etc).
-+ */
-+int get_nohz_timer_target(void)
-+{
-+	int i, cpu = smp_processor_id(), default_cpu = -1;
-+	struct cpumask *mask;
-+
-+	if (housekeeping_cpu(cpu, HK_FLAG_TIMER)) {
-+		if (!idle_cpu(cpu))
-+			return cpu;
-+		default_cpu = cpu;
-+	}
-+
-+	for (mask = &(per_cpu(sched_cpu_affinity_masks, cpu)[0]);
-+	     mask < per_cpu(sched_cpu_affinity_end_mask, cpu); mask++)
-+		for_each_cpu_and(i, mask, housekeeping_cpumask(HK_FLAG_TIMER))
-+			if (!idle_cpu(i))
-+				return i;
-+
-+	if (default_cpu == -1)
-+		default_cpu = housekeeping_any_cpu(HK_FLAG_TIMER);
-+	cpu = default_cpu;
-+
-+	return cpu;
-+}
-+
-+/*
-+ * When add_timer_on() enqueues a timer into the timer wheel of an
-+ * idle CPU then this timer might expire before the next timer event
-+ * which is scheduled to wake up that CPU. In case of a completely
-+ * idle system the next event might even be infinite time into the
-+ * future. wake_up_idle_cpu() ensures that the CPU is woken up and
-+ * leaves the inner idle loop so the newly added timer is taken into
-+ * account when the CPU goes back to idle and evaluates the timer
-+ * wheel for the next timer event.
-+ */
-+static inline void wake_up_idle_cpu(int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	if (cpu == smp_processor_id())
-+		return;
-+
-+	if (set_nr_and_not_polling(rq->idle))
-+		smp_send_reschedule(cpu);
-+	else
-+		trace_sched_wake_idle_without_ipi(cpu);
-+}
-+
-+static inline bool wake_up_full_nohz_cpu(int cpu)
-+{
-+	/*
-+	 * We just need the target to call irq_exit() and re-evaluate
-+	 * the next tick. The nohz full kick at least implies that.
-+	 * If needed we can still optimize that later with an
-+	 * empty IRQ.
-+	 */
-+	if (cpu_is_offline(cpu))
-+		return true;  /* Don't try to wake offline CPUs. */
-+	if (tick_nohz_full_cpu(cpu)) {
-+		if (cpu != smp_processor_id() ||
-+		    tick_nohz_tick_stopped())
-+			tick_nohz_full_kick_cpu(cpu);
-+		return true;
-+	}
-+
-+	return false;
-+}
-+
-+void wake_up_nohz_cpu(int cpu)
-+{
-+	if (!wake_up_full_nohz_cpu(cpu))
-+		wake_up_idle_cpu(cpu);
-+}
-+
-+static void nohz_csd_func(void *info)
-+{
-+	struct rq *rq = info;
-+	int cpu = cpu_of(rq);
-+	unsigned int flags;
-+
-+	/*
-+	 * Release the rq::nohz_csd.
-+	 */
-+	flags = atomic_fetch_andnot(NOHZ_KICK_MASK, nohz_flags(cpu));
-+	WARN_ON(!(flags & NOHZ_KICK_MASK));
-+
-+	rq->idle_balance = idle_cpu(cpu);
-+	if (rq->idle_balance && !need_resched()) {
-+		rq->nohz_idle_balance = flags;
-+		raise_softirq_irqoff(SCHED_SOFTIRQ);
-+	}
-+}
-+
-+#endif /* CONFIG_NO_HZ_COMMON */
-+#endif /* CONFIG_SMP */
-+
-+static inline void check_preempt_curr(struct rq *rq)
-+{
-+	if (sched_rq_first_task(rq) != rq->curr)
-+		resched_curr(rq);
-+}
-+
-+static inline void
-+rq_csd_init(struct rq *rq, call_single_data_t *csd, smp_call_func_t func)
-+{
-+	csd->flags = 0;
-+	csd->func = func;
-+	csd->info = rq;
-+}
-+
-+#ifdef CONFIG_SCHED_HRTICK
-+/*
-+ * Use HR-timers to deliver accurate preemption points.
-+ */
-+
-+static void hrtick_clear(struct rq *rq)
-+{
-+	if (hrtimer_active(&rq->hrtick_timer))
-+		hrtimer_cancel(&rq->hrtick_timer);
-+}
-+
-+/*
-+ * High-resolution timer tick.
-+ * Runs from hardirq context with interrupts disabled.
-+ */
-+static enum hrtimer_restart hrtick(struct hrtimer *timer)
-+{
-+	struct rq *rq = container_of(timer, struct rq, hrtick_timer);
-+	struct task_struct *p;
-+
-+	WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
-+
-+	raw_spin_lock(&rq->lock);
-+	p = rq->curr;
-+	p->time_slice = 0;
-+	resched_curr(rq);
-+	raw_spin_unlock(&rq->lock);
-+
-+	return HRTIMER_NORESTART;
-+}
-+
-+/*
-+ * Use hrtick when:
-+ *  - enabled by features
-+ *  - hrtimer is actually high res
-+ */
-+static inline int hrtick_enabled(struct rq *rq)
-+{
-+	/**
-+	 * Alt schedule FW doesn't support sched_feat yet
-+	if (!sched_feat(HRTICK))
-+		return 0;
-+	*/
-+	if (!cpu_active(cpu_of(rq)))
-+		return 0;
-+	return hrtimer_is_hres_active(&rq->hrtick_timer);
-+}
-+
-+#ifdef CONFIG_SMP
-+
-+static void __hrtick_restart(struct rq *rq)
-+{
-+	struct hrtimer *timer = &rq->hrtick_timer;
-+
-+	hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED_HARD);
-+}
-+
-+/*
-+ * called from hardirq (IPI) context
-+ */
-+static void __hrtick_start(void *arg)
-+{
-+	struct rq *rq = arg;
-+
-+	raw_spin_lock(&rq->lock);
-+	__hrtick_restart(rq);
-+	raw_spin_unlock(&rq->lock);
-+}
-+
-+/*
-+ * Called to set the hrtick timer state.
-+ *
-+ * called with rq->lock held and irqs disabled
-+ */
-+void hrtick_start(struct rq *rq, u64 delay)
-+{
-+	struct hrtimer *timer = &rq->hrtick_timer;
-+	ktime_t time;
-+	s64 delta;
-+
-+	/*
-+	 * Don't schedule slices shorter than 10000ns, that just
-+	 * doesn't make sense and can cause timer DoS.
-+	 */
-+	delta = max_t(s64, delay, 10000LL);
-+	time = ktime_add_ns(timer->base->get_time(), delta);
-+
-+	hrtimer_set_expires(timer, time);
-+
-+	if (rq == this_rq())
-+		__hrtick_restart(rq);
-+	else
-+		smp_call_function_single_async(cpu_of(rq), &rq->hrtick_csd);
-+}
-+
-+#else
-+/*
-+ * Called to set the hrtick timer state.
-+ *
-+ * called with rq->lock held and irqs disabled
-+ */
-+void hrtick_start(struct rq *rq, u64 delay)
-+{
-+	/*
-+	 * Don't schedule slices shorter than 10000ns, that just
-+	 * doesn't make sense. Rely on vruntime for fairness.
-+	 */
-+	delay = max_t(u64, delay, 10000LL);
-+	hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay),
-+		      HRTIMER_MODE_REL_PINNED_HARD);
-+}
-+#endif /* CONFIG_SMP */
-+
-+static void hrtick_rq_init(struct rq *rq)
-+{
-+#ifdef CONFIG_SMP
-+	rq_csd_init(rq, &rq->hrtick_csd, __hrtick_start);
-+#endif
-+
-+	hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
-+	rq->hrtick_timer.function = hrtick;
-+}
-+#else	/* CONFIG_SCHED_HRTICK */
-+static inline int hrtick_enabled(struct rq *rq)
-+{
-+	return 0;
-+}
-+
-+static inline void hrtick_clear(struct rq *rq)
-+{
-+}
-+
-+static inline void hrtick_rq_init(struct rq *rq)
-+{
-+}
-+#endif	/* CONFIG_SCHED_HRTICK */
-+
-+static inline int normal_prio(struct task_struct *p)
-+{
-+	if (task_has_rt_policy(p))
-+		return MAX_RT_PRIO - 1 - p->rt_priority;
-+
-+	return p->static_prio + MAX_PRIORITY_ADJ;
-+}
-+
-+/*
-+ * Calculate the current priority, i.e. the priority
-+ * taken into account by the scheduler. This value might
-+ * be boosted by RT tasks as it will be RT if the task got
-+ * RT-boosted. If not then it returns p->normal_prio.
-+ */
-+static int effective_prio(struct task_struct *p)
-+{
-+	p->normal_prio = normal_prio(p);
-+	/*
-+	 * If we are RT tasks or we were boosted to RT priority,
-+	 * keep the priority unchanged. Otherwise, update priority
-+	 * to the normal priority:
-+	 */
-+	if (!rt_prio(p->prio))
-+		return p->normal_prio;
-+	return p->prio;
-+}
-+
-+/*
-+ * activate_task - move a task to the runqueue.
-+ *
-+ * Context: rq->lock
-+ */
-+static void activate_task(struct task_struct *p, struct rq *rq)
-+{
-+	enqueue_task(p, rq, ENQUEUE_WAKEUP);
-+	p->on_rq = TASK_ON_RQ_QUEUED;
-+	cpufreq_update_util(rq, 0);
-+}
-+
-+/*
-+ * deactivate_task - remove a task from the runqueue.
-+ *
-+ * Context: rq->lock
-+ */
-+static inline void deactivate_task(struct task_struct *p, struct rq *rq)
-+{
-+	dequeue_task(p, rq, DEQUEUE_SLEEP);
-+	p->on_rq = 0;
-+	cpufreq_update_util(rq, 0);
-+}
-+
-+static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
-+{
-+#ifdef CONFIG_SMP
-+	/*
-+	 * After ->cpu is set up to a new value, task_access_lock(p, ...) can be
-+	 * successfully executed on another CPU. We must ensure that updates of
-+	 * per-task data have been completed by this moment.
-+	 */
-+	smp_wmb();
-+
-+#ifdef CONFIG_THREAD_INFO_IN_TASK
-+	WRITE_ONCE(p->cpu, cpu);
-+#else
-+	WRITE_ONCE(task_thread_info(p)->cpu, cpu);
-+#endif
-+#endif
-+}
-+
-+#ifdef CONFIG_SMP
-+void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
-+{
-+#ifdef CONFIG_SCHED_DEBUG
-+	/*
-+	 * We should never call set_task_cpu() on a blocked task,
-+	 * ttwu() will sort out the placement.
-+	 */
-+	WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING &&
-+		     !p->on_rq);
-+#ifdef CONFIG_LOCKDEP
-+	/*
-+	 * The caller should hold either p->pi_lock or rq->lock, when changing
-+	 * a task's CPU. ->pi_lock for waking tasks, rq->lock for runnable tasks.
-+	 *
-+	 * sched_move_task() holds both and thus holding either pins the cgroup,
-+	 * see task_group().
-+	 */
-+	WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) ||
-+				      lockdep_is_held(&task_rq(p)->lock)));
-+#endif
-+	/*
-+	 * Clearly, migrating tasks to offline CPUs is a fairly daft thing.
-+	 */
-+	WARN_ON_ONCE(!cpu_online(new_cpu));
-+#endif
-+	if (task_cpu(p) == new_cpu)
-+		return;
-+	trace_sched_migrate_task(p, new_cpu);
-+	rseq_migrate(p);
-+	perf_event_task_migrate(p);
-+
-+	__set_task_cpu(p, new_cpu);
-+}
-+
-+static inline bool is_per_cpu_kthread(struct task_struct *p)
-+{
-+	return ((p->flags & PF_KTHREAD) && (1 == p->nr_cpus_allowed));
-+}
-+
-+/*
-+ * Per-CPU kthreads are allowed to run on !active && online CPUs, see
-+ * __set_cpus_allowed_ptr() and select_fallback_rq().
-+ */
-+static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
-+{
-+	if (!cpumask_test_cpu(cpu, p->cpus_ptr))
-+		return false;
-+
-+	if (is_per_cpu_kthread(p))
-+		return cpu_online(cpu);
-+
-+	return cpu_active(cpu);
-+}
-+
-+/*
-+ * This is how migration works:
-+ *
-+ * 1) we invoke migration_cpu_stop() on the target CPU using
-+ *    stop_one_cpu().
-+ * 2) stopper starts to run (implicitly forcing the migrated thread
-+ *    off the CPU)
-+ * 3) it checks whether the migrated task is still in the wrong runqueue.
-+ * 4) if it's in the wrong runqueue then the migration thread removes
-+ *    it and puts it into the right queue.
-+ * 5) stopper completes and stop_one_cpu() returns and the migration
-+ *    is done.
-+ */
-+
-+/*
-+ * move_queued_task - move a queued task to new rq.
-+ *
-+ * Returns (locked) new rq. Old rq's lock is released.
-+ */
-+static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int
-+				   new_cpu)
-+{
-+	lockdep_assert_held(&rq->lock);
-+
-+	WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING);
-+	dequeue_task(p, rq, 0);
-+	set_task_cpu(p, new_cpu);
-+	raw_spin_unlock(&rq->lock);
-+
-+	rq = cpu_rq(new_cpu);
-+
-+	raw_spin_lock(&rq->lock);
-+	BUG_ON(task_cpu(p) != new_cpu);
-+	enqueue_task(p, rq, 0);
-+	p->on_rq = TASK_ON_RQ_QUEUED;
-+	check_preempt_curr(rq);
-+
-+	return rq;
-+}
-+
-+struct migration_arg {
-+	struct task_struct *task;
-+	int dest_cpu;
-+};
-+
-+/*
-+ * Move (not current) task off this CPU, onto the destination CPU. We're doing
-+ * this because either it can't run here any more (set_cpus_allowed()
-+ * away from this CPU, or CPU going down), or because we're
-+ * attempting to rebalance this task on exec (sched_exec).
-+ *
-+ * So we race with normal scheduler movements, but that's OK, as long
-+ * as the task is no longer on this CPU.
-+ */
-+static struct rq *__migrate_task(struct rq *rq, struct task_struct *p, int
-+				 dest_cpu)
-+{
-+	/* Affinity changed (again). */
-+	if (!is_cpu_allowed(p, dest_cpu))
-+		return rq;
-+
-+	update_rq_clock(rq);
-+	return move_queued_task(rq, p, dest_cpu);
-+}
-+
-+/*
-+ * migration_cpu_stop - this will be executed by a highprio stopper thread
-+ * and performs thread migration by bumping thread off CPU then
-+ * 'pushing' onto another runqueue.
-+ */
-+static int migration_cpu_stop(void *data)
-+{
-+	struct migration_arg *arg = data;
-+	struct task_struct *p = arg->task;
-+	struct rq *rq = this_rq();
-+
-+	/*
-+	 * The original target CPU might have gone down and we might
-+	 * be on another CPU but it doesn't matter.
-+	 */
-+	local_irq_disable();
-+	/*
-+	 * We need to explicitly wake pending tasks before running
-+	 * __migrate_task() such that we will not miss enforcing cpus_ptr
-+	 * during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test.
-+	 */
-+	flush_smp_call_function_from_idle();
-+
-+	raw_spin_lock(&p->pi_lock);
-+	raw_spin_lock(&rq->lock);
-+	/*
-+	 * If task_rq(p) != rq, it cannot be migrated here, because we're
-+	 * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because
-+	 * we're holding p->pi_lock.
-+	 */
-+	if (task_rq(p) == rq && task_on_rq_queued(p))
-+		rq = __migrate_task(rq, p, arg->dest_cpu);
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock(&p->pi_lock);
-+
-+	local_irq_enable();
-+	return 0;
-+}
-+
-+static inline void
-+set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	cpumask_copy(&p->cpus_mask, new_mask);
-+	p->nr_cpus_allowed = cpumask_weight(new_mask);
-+}
-+
-+void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	set_cpus_allowed_common(p, new_mask);
-+}
-+#endif
-+
-+/**
-+ * task_curr - is this task currently executing on a CPU?
-+ * @p: the task in question.
-+ *
-+ * Return: 1 if the task is currently executing. 0 otherwise.
-+ */
-+inline int task_curr(const struct task_struct *p)
-+{
-+	return cpu_curr(task_cpu(p)) == p;
-+}
-+
-+#ifdef CONFIG_SMP
-+/*
-+ * wait_task_inactive - wait for a thread to unschedule.
-+ *
-+ * If @match_state is nonzero, it's the @p->state value just checked and
-+ * not expected to change.  If it changes, i.e. @p might have woken up,
-+ * then return zero.  When we succeed in waiting for @p to be off its CPU,
-+ * we return a positive number (its total switch count).  If a second call
-+ * a short while later returns the same number, the caller can be sure that
-+ * @p has remained unscheduled the whole time.
-+ *
-+ * The caller must ensure that the task *will* unschedule sometime soon,
-+ * else this function might spin for a *long* time. This function can't
-+ * be called with interrupts off, or it may introduce deadlock with
-+ * smp_call_function() if an IPI is sent by the same process we are
-+ * waiting to become inactive.
-+ */
-+unsigned long wait_task_inactive(struct task_struct *p, long match_state)
-+{
-+	unsigned long flags;
-+	bool running, on_rq;
-+	unsigned long ncsw;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+
-+	for (;;) {
-+		rq = task_rq(p);
-+
-+		/*
-+		 * If the task is actively running on another CPU
-+		 * still, just relax and busy-wait without holding
-+		 * any locks.
-+		 *
-+		 * NOTE! Since we don't hold any locks, it's not
-+		 * even sure that "rq" stays as the right runqueue!
-+		 * But we don't care, since this will return false
-+		 * if the runqueue has changed and p is actually now
-+		 * running somewhere else!
-+		 */
-+		while (task_running(p) && p == rq->curr) {
-+			if (match_state && unlikely(p->state != match_state))
-+				return 0;
-+			cpu_relax();
-+		}
-+
-+		/*
-+		 * Ok, time to look more closely! We need the rq
-+		 * lock now, to be *sure*. If we're wrong, we'll
-+		 * just go back and repeat.
-+		 */
-+		task_access_lock_irqsave(p, &lock, &flags);
-+		trace_sched_wait_task(p);
-+		running = task_running(p);
-+		on_rq = p->on_rq;
-+		ncsw = 0;
-+		if (!match_state || p->state == match_state)
-+			ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
-+		task_access_unlock_irqrestore(p, lock, &flags);
-+
-+		/*
-+		 * If it changed from the expected state, bail out now.
-+		 */
-+		if (unlikely(!ncsw))
-+			break;
-+
-+		/*
-+		 * Was it really running after all now that we
-+		 * checked with the proper locks actually held?
-+		 *
-+		 * Oops. Go back and try again..
-+		 */
-+		if (unlikely(running)) {
-+			cpu_relax();
-+			continue;
-+		}
-+
-+		/*
-+		 * It's not enough that it's not actively running,
-+		 * it must be off the runqueue _entirely_, and not
-+		 * preempted!
-+		 *
-+		 * So if it was still runnable (but just not actively
-+		 * running right now), it's preempted, and we should
-+		 * yield - it could be a while.
-+		 */
-+		if (unlikely(on_rq)) {
-+			ktime_t to = NSEC_PER_SEC / HZ;
-+
-+			set_current_state(TASK_UNINTERRUPTIBLE);
-+			schedule_hrtimeout(&to, HRTIMER_MODE_REL);
-+			continue;
-+		}
-+
-+		/*
-+		 * Ahh, all good. It wasn't running, and it wasn't
-+		 * runnable, which means that it will never become
-+		 * running in the future either. We're all done!
-+		 */
-+		break;
-+	}
-+
-+	return ncsw;
-+}
-+
-+/***
-+ * kick_process - kick a running thread to enter/exit the kernel
-+ * @p: the to-be-kicked thread
-+ *
-+ * Cause a process which is running on another CPU to enter
-+ * kernel-mode, without any delay. (to get signals handled.)
-+ *
-+ * NOTE: this function doesn't have to take the runqueue lock,
-+ * because all it wants to ensure is that the remote task enters
-+ * the kernel. If the IPI races and the task has been migrated
-+ * to another CPU then no harm is done and the purpose has been
-+ * achieved as well.
-+ */
-+void kick_process(struct task_struct *p)
-+{
-+	int cpu;
-+
-+	preempt_disable();
-+	cpu = task_cpu(p);
-+	if ((cpu != smp_processor_id()) && task_curr(p))
-+		smp_send_reschedule(cpu);
-+	preempt_enable();
-+}
-+EXPORT_SYMBOL_GPL(kick_process);
-+
-+/*
-+ * ->cpus_ptr is protected by both rq->lock and p->pi_lock
-+ *
-+ * A few notes on cpu_active vs cpu_online:
-+ *
-+ *  - cpu_active must be a subset of cpu_online
-+ *
-+ *  - on CPU-up we allow per-CPU kthreads on the online && !active CPU,
-+ *    see __set_cpus_allowed_ptr(). At this point the newly online
-+ *    CPU isn't yet part of the sched domains, and balancing will not
-+ *    see it.
-+ *
-+ *  - on cpu-down we clear cpu_active() to mask the sched domains and
-+ *    avoid the load balancer to place new tasks on the to be removed
-+ *    CPU. Existing tasks will remain running there and will be taken
-+ *    off.
-+ *
-+ * This means that fallback selection must not select !active CPUs.
-+ * And can assume that any active CPU must be online. Conversely
-+ * select_task_rq() below may allow selection of !active CPUs in order
-+ * to satisfy the above rules.
-+ */
-+static int select_fallback_rq(int cpu, struct task_struct *p)
-+{
-+	int nid = cpu_to_node(cpu);
-+	const struct cpumask *nodemask = NULL;
-+	enum { cpuset, possible, fail } state = cpuset;
-+	int dest_cpu;
-+
-+	/*
-+	 * If the node that the CPU is on has been offlined, cpu_to_node()
-+	 * will return -1. There is no CPU on the node, and we should
-+	 * select the CPU on the other node.
-+	 */
-+	if (nid != -1) {
-+		nodemask = cpumask_of_node(nid);
-+
-+		/* Look for allowed, online CPU in same node. */
-+		for_each_cpu(dest_cpu, nodemask) {
-+			if (!cpu_active(dest_cpu))
-+				continue;
-+			if (cpumask_test_cpu(dest_cpu, p->cpus_ptr))
-+				return dest_cpu;
-+		}
-+	}
-+
-+	for (;;) {
-+		/* Any allowed, online CPU? */
-+		for_each_cpu(dest_cpu, p->cpus_ptr) {
-+			if (!is_cpu_allowed(p, dest_cpu))
-+				continue;
-+			goto out;
-+		}
-+
-+		/* No more Mr. Nice Guy. */
-+		switch (state) {
-+		case cpuset:
-+			if (IS_ENABLED(CONFIG_CPUSETS)) {
-+				cpuset_cpus_allowed_fallback(p);
-+				state = possible;
-+				break;
-+			}
-+			fallthrough;
-+		case possible:
-+			do_set_cpus_allowed(p, cpu_possible_mask);
-+			state = fail;
-+			break;
-+
-+		case fail:
-+			BUG();
-+			break;
-+		}
-+	}
-+
-+out:
-+	if (state != cpuset) {
-+		/*
-+		 * Don't tell them about moving exiting tasks or
-+		 * kernel threads (both mm NULL), since they never
-+		 * leave kernel.
-+		 */
-+		if (p->mm && printk_ratelimit()) {
-+			printk_deferred("process %d (%s) no longer affine to cpu%d\n",
-+					task_pid_nr(p), p->comm, cpu);
-+		}
-+	}
-+
-+	return dest_cpu;
-+}
-+
-+static inline int select_task_rq(struct task_struct *p, struct rq *rq)
-+{
-+	cpumask_t chk_mask, tmp;
-+
-+	if (unlikely(!cpumask_and(&chk_mask, p->cpus_ptr, cpu_online_mask)))
-+		return select_fallback_rq(task_cpu(p), p);
-+
-+	if (
-+#ifdef CONFIG_SCHED_SMT
-+	    cpumask_and(&tmp, &chk_mask, &sched_sg_idle_mask) ||
-+#endif
-+	    cpumask_and(&tmp, &chk_mask, &sched_rq_watermark[IDLE_WM]) ||
-+	    cpumask_and(&tmp, &chk_mask,
-+			&sched_rq_watermark[task_sched_prio(p, rq) + 1]))
-+		return best_mask_cpu(task_cpu(p), &tmp);
-+
-+	return best_mask_cpu(task_cpu(p), &chk_mask);
-+}
-+
-+void sched_set_stop_task(int cpu, struct task_struct *stop)
-+{
-+	struct sched_param stop_param = { .sched_priority = STOP_PRIO };
-+	struct sched_param start_param = { .sched_priority = 0 };
-+	struct task_struct *old_stop = cpu_rq(cpu)->stop;
-+
-+	if (stop) {
-+		/*
-+		 * Make it appear like a SCHED_FIFO task, its something
-+		 * userspace knows about and won't get confused about.
-+		 *
-+		 * Also, it will make PI more or less work without too
-+		 * much confusion -- but then, stop work should not
-+		 * rely on PI working anyway.
-+		 */
-+		sched_setscheduler_nocheck(stop, SCHED_FIFO, &stop_param);
-+	}
-+
-+	cpu_rq(cpu)->stop = stop;
-+
-+	if (old_stop) {
-+		/*
-+		 * Reset it back to a normal scheduling policy so that
-+		 * it can die in pieces.
-+		 */
-+		sched_setscheduler_nocheck(old_stop, SCHED_NORMAL, &start_param);
-+	}
-+}
-+
-+/*
-+ * Change a given task's CPU affinity. Migrate the thread to a
-+ * proper CPU and schedule it away if the CPU it's executing on
-+ * is removed from the allowed bitmask.
-+ *
-+ * NOTE: the caller must have a valid reference to the task, the
-+ * task must not exit() & deallocate itself prematurely. The
-+ * call is not atomic; no spinlocks may be held.
-+ */
-+static int __set_cpus_allowed_ptr(struct task_struct *p,
-+				  const struct cpumask *new_mask, bool check)
-+{
-+	const struct cpumask *cpu_valid_mask = cpu_active_mask;
-+	int dest_cpu;
-+	unsigned long flags;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+	int ret = 0;
-+
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	rq = __task_access_lock(p, &lock);
-+
-+	if (p->flags & PF_KTHREAD) {
-+		/*
-+		 * Kernel threads are allowed on online && !active CPUs
-+		 */
-+		cpu_valid_mask = cpu_online_mask;
-+	}
-+
-+	/*
-+	 * Must re-check here, to close a race against __kthread_bind(),
-+	 * sched_setaffinity() is not guaranteed to observe the flag.
-+	 */
-+	if (check && (p->flags & PF_NO_SETAFFINITY)) {
-+		ret = -EINVAL;
-+		goto out;
-+	}
-+
-+	if (cpumask_equal(&p->cpus_mask, new_mask))
-+		goto out;
-+
-+	dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
-+	if (dest_cpu >= nr_cpu_ids) {
-+		ret = -EINVAL;
-+		goto out;
-+	}
-+
-+	do_set_cpus_allowed(p, new_mask);
-+
-+	if (p->flags & PF_KTHREAD) {
-+		/*
-+		 * For kernel threads that do indeed end up on online &&
-+		 * !active we want to ensure they are strict per-CPU threads.
-+		 */
-+		WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) &&
-+			!cpumask_intersects(new_mask, cpu_active_mask) &&
-+			p->nr_cpus_allowed != 1);
-+	}
-+
-+	/* Can the task run on the task's current CPU? If so, we're done */
-+	if (cpumask_test_cpu(task_cpu(p), new_mask))
-+		goto out;
-+
-+	if (task_running(p) || p->state == TASK_WAKING) {
-+		struct migration_arg arg = { p, dest_cpu };
-+
-+		/* Need help from migration thread: drop lock and wait. */
-+		__task_access_unlock(p, lock);
-+		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+		stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
-+		return 0;
-+	}
-+	if (task_on_rq_queued(p)) {
-+		/*
-+		 * OK, since we're going to drop the lock immediately
-+		 * afterwards anyway.
-+		 */
-+		update_rq_clock(rq);
-+		rq = move_queued_task(rq, p, dest_cpu);
-+		lock = &rq->lock;
-+	}
-+
-+out:
-+	__task_access_unlock(p, lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+
-+	return ret;
-+}
-+
-+int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	return __set_cpus_allowed_ptr(p, new_mask, false);
-+}
-+EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
-+
-+#else /* CONFIG_SMP */
-+
-+static inline int select_task_rq(struct task_struct *p, struct rq *rq)
-+{
-+	return 0;
-+}
-+
-+static inline int
-+__set_cpus_allowed_ptr(struct task_struct *p,
-+		       const struct cpumask *new_mask, bool check)
-+{
-+	return set_cpus_allowed_ptr(p, new_mask);
-+}
-+
-+#endif /* CONFIG_SMP */
-+
-+static void
-+ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
-+{
-+	struct rq *rq;
-+
-+	if (!schedstat_enabled())
-+		return;
-+
-+	rq= this_rq();
-+
-+#ifdef CONFIG_SMP
-+	if (cpu == rq->cpu)
-+		__schedstat_inc(rq->ttwu_local);
-+	else {
-+		/** Alt schedule FW ToDo:
-+		 * How to do ttwu_wake_remote
-+		 */
-+	}
-+#endif /* CONFIG_SMP */
-+
-+	__schedstat_inc(rq->ttwu_count);
-+}
-+
-+/*
-+ * Mark the task runnable and perform wakeup-preemption.
-+ */
-+static inline void
-+ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
-+{
-+	check_preempt_curr(rq);
-+	p->state = TASK_RUNNING;
-+	trace_sched_wakeup(p);
-+}
-+
-+static inline void
-+ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags)
-+{
-+	if (p->sched_contributes_to_load)
-+		rq->nr_uninterruptible--;
-+
-+	activate_task(p, rq);
-+	ttwu_do_wakeup(rq, p, 0);
-+}
-+
-+/*
-+ * Consider @p being inside a wait loop:
-+ *
-+ *   for (;;) {
-+ *      set_current_state(TASK_UNINTERRUPTIBLE);
-+ *
-+ *      if (CONDITION)
-+ *         break;
-+ *
-+ *      schedule();
-+ *   }
-+ *   __set_current_state(TASK_RUNNING);
-+ *
-+ * between set_current_state() and schedule(). In this case @p is still
-+ * runnable, so all that needs doing is change p->state back to TASK_RUNNING in
-+ * an atomic manner.
-+ *
-+ * By taking task_rq(p)->lock we serialize against schedule(), if @p->on_rq
-+ * then schedule() must still happen and p->state can be changed to
-+ * TASK_RUNNING. Otherwise we lost the race, schedule() has happened, and we
-+ * need to do a full wakeup with enqueue.
-+ *
-+ * Returns: %true when the wakeup is done,
-+ *          %false otherwise.
-+ */
-+static int ttwu_runnable(struct task_struct *p, int wake_flags)
-+{
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+	int ret = 0;
-+
-+	rq = __task_access_lock(p, &lock);
-+	if (task_on_rq_queued(p)) {
-+		/* check_preempt_curr() may use rq clock */
-+		update_rq_clock(rq);
-+		ttwu_do_wakeup(rq, p, wake_flags);
-+		ret = 1;
-+	}
-+	__task_access_unlock(p, lock);
-+
-+	return ret;
-+}
-+
-+#ifdef CONFIG_SMP
-+void sched_ttwu_pending(void *arg)
-+{
-+	struct llist_node *llist = arg;
-+	struct rq *rq = this_rq();
-+	struct task_struct *p, *t;
-+	struct rq_flags rf;
-+
-+	if (!llist)
-+		return;
-+
-+	/*
-+	 * rq::ttwu_pending racy indication of out-standing wakeups.
-+	 * Races such that false-negatives are possible, since they
-+	 * are shorter lived that false-positives would be.
-+	 */
-+	WRITE_ONCE(rq->ttwu_pending, 0);
-+
-+	rq_lock_irqsave(rq, &rf);
-+	update_rq_clock(rq);
-+
-+	llist_for_each_entry_safe(p, t, llist, wake_entry.llist) {
-+		if (WARN_ON_ONCE(p->on_cpu))
-+			smp_cond_load_acquire(&p->on_cpu, !VAL);
-+
-+		if (WARN_ON_ONCE(task_cpu(p) != cpu_of(rq)))
-+			set_task_cpu(p, cpu_of(rq));
-+
-+		ttwu_do_activate(rq, p, p->sched_remote_wakeup ? WF_MIGRATED : 0);
-+	}
-+
-+	rq_unlock_irqrestore(rq, &rf);
-+}
-+
-+void send_call_function_single_ipi(int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	if (!set_nr_if_polling(rq->idle))
-+		arch_send_call_function_single_ipi(cpu);
-+	else
-+		trace_sched_wake_idle_without_ipi(cpu);
-+}
-+
-+/*
-+ * Queue a task on the target CPUs wake_list and wake the CPU via IPI if
-+ * necessary. The wakee CPU on receipt of the IPI will queue the task
-+ * via sched_ttwu_wakeup() for activation so the wakee incurs the cost
-+ * of the wakeup instead of the waker.
-+ */
-+static void __ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	p->sched_remote_wakeup = !!(wake_flags & WF_MIGRATED);
-+
-+	WRITE_ONCE(rq->ttwu_pending, 1);
-+	__smp_call_single_queue(cpu, &p->wake_entry.llist);
-+}
-+
-+static inline bool ttwu_queue_cond(int cpu, int wake_flags)
-+{
-+	/*
-+	 * If the CPU does not share cache, then queue the task on the
-+	 * remote rqs wakelist to avoid accessing remote data.
-+	 */
-+	if (!cpus_share_cache(smp_processor_id(), cpu))
-+		return true;
-+
-+	/*
-+	 * If the task is descheduling and the only running task on the
-+	 * CPU then use the wakelist to offload the task activation to
-+	 * the soon-to-be-idle CPU as the current CPU is likely busy.
-+	 * nr_running is checked to avoid unnecessary task stacking.
-+	 */
-+	if ((wake_flags & WF_ON_CPU) && cpu_rq(cpu)->nr_running <= 1)
-+		return true;
-+
-+	return false;
-+}
-+
-+static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
-+{
-+	if (__is_defined(ALT_SCHED_TTWU_QUEUE) && ttwu_queue_cond(cpu, wake_flags)) {
-+		if (WARN_ON_ONCE(cpu == smp_processor_id()))
-+			return false;
-+
-+		sched_clock_cpu(cpu); /* Sync clocks across CPUs */
-+		__ttwu_queue_wakelist(p, cpu, wake_flags);
-+		return true;
-+	}
-+
-+	return false;
-+}
-+
-+void wake_up_if_idle(int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	rcu_read_lock();
-+
-+	if (!is_idle_task(rcu_dereference(rq->curr)))
-+		goto out;
-+
-+	if (set_nr_if_polling(rq->idle)) {
-+		trace_sched_wake_idle_without_ipi(cpu);
-+	} else {
-+		raw_spin_lock_irqsave(&rq->lock, flags);
-+		if (is_idle_task(rq->curr))
-+			smp_send_reschedule(cpu);
-+		/* Else CPU is not idle, do nothing here */
-+		raw_spin_unlock_irqrestore(&rq->lock, flags);
-+	}
-+
-+out:
-+	rcu_read_unlock();
-+}
-+
-+bool cpus_share_cache(int this_cpu, int that_cpu)
-+{
-+	return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu);
-+}
-+#else /* !CONFIG_SMP */
-+
-+static inline bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
-+{
-+	return false;
-+}
-+
-+#endif /* CONFIG_SMP */
-+
-+static inline void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	if (ttwu_queue_wakelist(p, cpu, wake_flags))
-+		return;
-+
-+	raw_spin_lock(&rq->lock);
-+	update_rq_clock(rq);
-+	ttwu_do_activate(rq, p, wake_flags);
-+	raw_spin_unlock(&rq->lock);
-+}
-+
-+/*
-+ * Notes on Program-Order guarantees on SMP systems.
-+ *
-+ *  MIGRATION
-+ *
-+ * The basic program-order guarantee on SMP systems is that when a task [t]
-+ * migrates, all its activity on its old CPU [c0] happens-before any subsequent
-+ * execution on its new CPU [c1].
-+ *
-+ * For migration (of runnable tasks) this is provided by the following means:
-+ *
-+ *  A) UNLOCK of the rq(c0)->lock scheduling out task t
-+ *  B) migration for t is required to synchronize *both* rq(c0)->lock and
-+ *     rq(c1)->lock (if not at the same time, then in that order).
-+ *  C) LOCK of the rq(c1)->lock scheduling in task
-+ *
-+ * Transitivity guarantees that B happens after A and C after B.
-+ * Note: we only require RCpc transitivity.
-+ * Note: the CPU doing B need not be c0 or c1
-+ *
-+ * Example:
-+ *
-+ *   CPU0            CPU1            CPU2
-+ *
-+ *   LOCK rq(0)->lock
-+ *   sched-out X
-+ *   sched-in Y
-+ *   UNLOCK rq(0)->lock
-+ *
-+ *                                   LOCK rq(0)->lock // orders against CPU0
-+ *                                   dequeue X
-+ *                                   UNLOCK rq(0)->lock
-+ *
-+ *                                   LOCK rq(1)->lock
-+ *                                   enqueue X
-+ *                                   UNLOCK rq(1)->lock
-+ *
-+ *                   LOCK rq(1)->lock // orders against CPU2
-+ *                   sched-out Z
-+ *                   sched-in X
-+ *                   UNLOCK rq(1)->lock
-+ *
-+ *
-+ *  BLOCKING -- aka. SLEEP + WAKEUP
-+ *
-+ * For blocking we (obviously) need to provide the same guarantee as for
-+ * migration. However the means are completely different as there is no lock
-+ * chain to provide order. Instead we do:
-+ *
-+ *   1) smp_store_release(X->on_cpu, 0)   -- finish_task()
-+ *   2) smp_cond_load_acquire(!X->on_cpu) -- try_to_wake_up()
-+ *
-+ * Example:
-+ *
-+ *   CPU0 (schedule)  CPU1 (try_to_wake_up) CPU2 (schedule)
-+ *
-+ *   LOCK rq(0)->lock LOCK X->pi_lock
-+ *   dequeue X
-+ *   sched-out X
-+ *   smp_store_release(X->on_cpu, 0);
-+ *
-+ *                    smp_cond_load_acquire(&X->on_cpu, !VAL);
-+ *                    X->state = WAKING
-+ *                    set_task_cpu(X,2)
-+ *
-+ *                    LOCK rq(2)->lock
-+ *                    enqueue X
-+ *                    X->state = RUNNING
-+ *                    UNLOCK rq(2)->lock
-+ *
-+ *                                          LOCK rq(2)->lock // orders against CPU1
-+ *                                          sched-out Z
-+ *                                          sched-in X
-+ *                                          UNLOCK rq(2)->lock
-+ *
-+ *                    UNLOCK X->pi_lock
-+ *   UNLOCK rq(0)->lock
-+ *
-+ *
-+ * However; for wakeups there is a second guarantee we must provide, namely we
-+ * must observe the state that lead to our wakeup. That is, not only must our
-+ * task observe its own prior state, it must also observe the stores prior to
-+ * its wakeup.
-+ *
-+ * This means that any means of doing remote wakeups must order the CPU doing
-+ * the wakeup against the CPU the task is going to end up running on. This,
-+ * however, is already required for the regular Program-Order guarantee above,
-+ * since the waking CPU is the one issueing the ACQUIRE (smp_cond_load_acquire).
-+ *
-+ */
-+
-+/**
-+ * try_to_wake_up - wake up a thread
-+ * @p: the thread to be awakened
-+ * @state: the mask of task states that can be woken
-+ * @wake_flags: wake modifier flags (WF_*)
-+ *
-+ * Conceptually does:
-+ *
-+ *   If (@state & @p->state) @p->state = TASK_RUNNING.
-+ *
-+ * If the task was not queued/runnable, also place it back on a runqueue.
-+ *
-+ * This function is atomic against schedule() which would dequeue the task.
-+ *
-+ * It issues a full memory barrier before accessing @p->state, see the comment
-+ * with set_current_state().
-+ *
-+ * Uses p->pi_lock to serialize against concurrent wake-ups.
-+ *
-+ * Relies on p->pi_lock stabilizing:
-+ *  - p->sched_class
-+ *  - p->cpus_ptr
-+ *  - p->sched_task_group
-+ * in order to do migration, see its use of select_task_rq()/set_task_cpu().
-+ *
-+ * Tries really hard to only take one task_rq(p)->lock for performance.
-+ * Takes rq->lock in:
-+ *  - ttwu_runnable()    -- old rq, unavoidable, see comment there;
-+ *  - ttwu_queue()       -- new rq, for enqueue of the task;
-+ *  - psi_ttwu_dequeue() -- much sadness :-( accounting will kill us.
-+ *
-+ * As a consequence we race really badly with just about everything. See the
-+ * many memory barriers and their comments for details.
-+ *
-+ * Return: %true if @p->state changes (an actual wakeup was done),
-+ *	   %false otherwise.
-+ */
-+static int try_to_wake_up(struct task_struct *p, unsigned int state,
-+			  int wake_flags)
-+{
-+	unsigned long flags;
-+	int cpu, success = 0;
-+
-+	preempt_disable();
-+	if (p == current) {
-+		/*
-+		 * We're waking current, this means 'p->on_rq' and 'task_cpu(p)
-+		 * == smp_processor_id()'. Together this means we can special
-+		 * case the whole 'p->on_rq && ttwu_runnable()' case below
-+		 * without taking any locks.
-+		 *
-+		 * In particular:
-+		 *  - we rely on Program-Order guarantees for all the ordering,
-+		 *  - we're serialized against set_special_state() by virtue of
-+		 *    it disabling IRQs (this allows not taking ->pi_lock).
-+		 */
-+		if (!(p->state & state))
-+			goto out;
-+
-+		success = 1;
-+		trace_sched_waking(p);
-+		p->state = TASK_RUNNING;
-+		trace_sched_wakeup(p);
-+		goto out;
-+	}
-+
-+	/*
-+	 * If we are going to wake up a thread waiting for CONDITION we
-+	 * need to ensure that CONDITION=1 done by the caller can not be
-+	 * reordered with p->state check below. This pairs with smp_store_mb()
-+	 * in set_current_state() that the waiting thread does.
-+	 */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	smp_mb__after_spinlock();
-+	if (!(p->state & state))
-+		goto unlock;
-+
-+	trace_sched_waking(p);
-+
-+	/* We're going to change ->state: */
-+	success = 1;
-+
-+	/*
-+	 * Ensure we load p->on_rq _after_ p->state, otherwise it would
-+	 * be possible to, falsely, observe p->on_rq == 0 and get stuck
-+	 * in smp_cond_load_acquire() below.
-+	 *
-+	 * sched_ttwu_pending()			try_to_wake_up()
-+	 *   STORE p->on_rq = 1			  LOAD p->state
-+	 *   UNLOCK rq->lock
-+	 *
-+	 * __schedule() (switch to task 'p')
-+	 *   LOCK rq->lock			  smp_rmb();
-+	 *   smp_mb__after_spinlock();
-+	 *   UNLOCK rq->lock
-+	 *
-+	 * [task p]
-+	 *   STORE p->state = UNINTERRUPTIBLE	  LOAD p->on_rq
-+	 *
-+	 * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
-+	 * __schedule().  See the comment for smp_mb__after_spinlock().
-+	 *
-+	 * A similar smb_rmb() lives in try_invoke_on_locked_down_task().
-+	 */
-+	smp_rmb();
-+	if (READ_ONCE(p->on_rq) && ttwu_runnable(p, wake_flags))
-+		goto unlock;
-+
-+	if (p->in_iowait) {
-+		delayacct_blkio_end(p);
-+		atomic_dec(&task_rq(p)->nr_iowait);
-+	}
-+
-+#ifdef CONFIG_SMP
-+	/*
-+	 * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
-+	 * possible to, falsely, observe p->on_cpu == 0.
-+	 *
-+	 * One must be running (->on_cpu == 1) in order to remove oneself
-+	 * from the runqueue.
-+	 *
-+	 * __schedule() (switch to task 'p')	try_to_wake_up()
-+	 *   STORE p->on_cpu = 1		  LOAD p->on_rq
-+	 *   UNLOCK rq->lock
-+	 *
-+	 * __schedule() (put 'p' to sleep)
-+	 *   LOCK rq->lock			  smp_rmb();
-+	 *   smp_mb__after_spinlock();
-+	 *   STORE p->on_rq = 0			  LOAD p->on_cpu
-+	 *
-+	 * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
-+	 * __schedule().  See the comment for smp_mb__after_spinlock().
-+	 *
-+	 * Form a control-dep-acquire with p->on_rq == 0 above, to ensure
-+	 * schedule()'s deactivate_task() has 'happened' and p will no longer
-+	 * care about it's own p->state. See the comment in __schedule().
-+	 */
-+	smp_acquire__after_ctrl_dep();
-+
-+	/*
-+	 * We're doing the wakeup (@success == 1), they did a dequeue (p->on_rq
-+	 * == 0), which means we need to do an enqueue, change p->state to
-+	 * TASK_WAKING such that we can unlock p->pi_lock before doing the
-+	 * enqueue, such as ttwu_queue_wakelist().
-+	 */
-+	p->state = TASK_WAKING;
-+
-+	/*
-+	 * If the owning (remote) CPU is still in the middle of schedule() with
-+	 * this task as prev, considering queueing p on the remote CPUs wake_list
-+	 * which potentially sends an IPI instead of spinning on p->on_cpu to
-+	 * let the waker make forward progress. This is safe because IRQs are
-+	 * disabled and the IPI will deliver after on_cpu is cleared.
-+	 *
-+	 * Ensure we load task_cpu(p) after p->on_cpu:
-+	 *
-+	 * set_task_cpu(p, cpu);
-+	 *   STORE p->cpu = @cpu
-+	 * __schedule() (switch to task 'p')
-+	 *   LOCK rq->lock
-+	 *   smp_mb__after_spin_lock()          smp_cond_load_acquire(&p->on_cpu)
-+	 *   STORE p->on_cpu = 1                LOAD p->cpu
-+	 *
-+	 * to ensure we observe the correct CPU on which the task is currently
-+	 * scheduling.
-+	 */
-+	if (smp_load_acquire(&p->on_cpu) &&
-+	    ttwu_queue_wakelist(p, task_cpu(p), wake_flags | WF_ON_CPU))
-+		goto unlock;
-+
-+	/*
-+	 * If the owning (remote) CPU is still in the middle of schedule() with
-+	 * this task as prev, wait until its done referencing the task.
-+	 *
-+	 * Pairs with the smp_store_release() in finish_task().
-+	 *
-+	 * This ensures that tasks getting woken will be fully ordered against
-+	 * their previous state and preserve Program Order.
-+	 */
-+	smp_cond_load_acquire(&p->on_cpu, !VAL);
-+
-+	sched_task_ttwu(p);
-+
-+	cpu = select_task_rq(p, this_rq());
-+
-+	if (cpu != task_cpu(p)) {
-+		wake_flags |= WF_MIGRATED;
-+		psi_ttwu_dequeue(p);
-+		set_task_cpu(p, cpu);
-+	}
-+#else
-+	cpu = task_cpu(p);
-+#endif /* CONFIG_SMP */
-+
-+	ttwu_queue(p, cpu, wake_flags);
-+unlock:
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+out:
-+	if (success)
-+		ttwu_stat(p, task_cpu(p), wake_flags);
-+	preempt_enable();
-+
-+	return success;
-+}
-+
-+/**
-+ * try_invoke_on_locked_down_task - Invoke a function on task in fixed state
-+ * @p: Process for which the function is to be invoked.
-+ * @func: Function to invoke.
-+ * @arg: Argument to function.
-+ *
-+ * If the specified task can be quickly locked into a definite state
-+ * (either sleeping or on a given runqueue), arrange to keep it in that
-+ * state while invoking @func(@arg).  This function can use ->on_rq and
-+ * task_curr() to work out what the state is, if required.  Given that
-+ * @func can be invoked with a runqueue lock held, it had better be quite
-+ * lightweight.
-+ *
-+ * Returns:
-+ *	@false if the task slipped out from under the locks.
-+ *	@true if the task was locked onto a runqueue or is sleeping.
-+ *		However, @func can override this by returning @false.
-+ */
-+bool try_invoke_on_locked_down_task(struct task_struct *p, bool (*func)(struct task_struct *t, void *arg), void *arg)
-+{
-+	bool ret = false;
-+	struct rq_flags rf;
-+	struct rq *rq;
-+
-+	lockdep_assert_irqs_enabled();
-+	raw_spin_lock_irq(&p->pi_lock);
-+	if (p->on_rq) {
-+		rq = __task_rq_lock(p, &rf);
-+		if (task_rq(p) == rq)
-+			ret = func(p, arg);
-+		__task_rq_unlock(rq, &rf);
-+	} else {
-+		switch (p->state) {
-+		case TASK_RUNNING:
-+		case TASK_WAKING:
-+			break;
-+		default:
-+			smp_rmb(); // See smp_rmb() comment in try_to_wake_up().
-+			if (!p->on_rq)
-+				ret = func(p, arg);
-+		}
-+	}
-+	raw_spin_unlock_irq(&p->pi_lock);
-+	return ret;
-+}
-+
-+/**
-+ * wake_up_process - Wake up a specific process
-+ * @p: The process to be woken up.
-+ *
-+ * Attempt to wake up the nominated process and move it to the set of runnable
-+ * processes.
-+ *
-+ * Return: 1 if the process was woken up, 0 if it was already running.
-+ *
-+ * This function executes a full memory barrier before accessing the task state.
-+ */
-+int wake_up_process(struct task_struct *p)
-+{
-+	return try_to_wake_up(p, TASK_NORMAL, 0);
-+}
-+EXPORT_SYMBOL(wake_up_process);
-+
-+int wake_up_state(struct task_struct *p, unsigned int state)
-+{
-+	return try_to_wake_up(p, state, 0);
-+}
-+
-+/*
-+ * Perform scheduler related setup for a newly forked process p.
-+ * p is forked by current.
-+ *
-+ * __sched_fork() is basic setup used by init_idle() too:
-+ */
-+static inline void __sched_fork(unsigned long clone_flags, struct task_struct *p)
-+{
-+	p->on_rq			= 0;
-+	p->on_cpu			= 0;
-+	p->utime			= 0;
-+	p->stime			= 0;
-+	p->sched_time			= 0;
-+
-+#ifdef CONFIG_PREEMPT_NOTIFIERS
-+	INIT_HLIST_HEAD(&p->preempt_notifiers);
-+#endif
-+
-+#ifdef CONFIG_COMPACTION
-+	p->capture_control = NULL;
-+#endif
-+#ifdef CONFIG_SMP
-+	p->wake_entry.u_flags = CSD_TYPE_TTWU;
-+#endif
-+}
-+
-+/*
-+ * fork()/clone()-time setup:
-+ */
-+int sched_fork(unsigned long clone_flags, struct task_struct *p)
-+{
-+	unsigned long flags;
-+	struct rq *rq;
-+
-+	__sched_fork(clone_flags, p);
-+	/*
-+	 * We mark the process as NEW here. This guarantees that
-+	 * nobody will actually run it, and a signal or other external
-+	 * event cannot wake it up and insert it on the runqueue either.
-+	 */
-+	p->state = TASK_NEW;
-+
-+	/*
-+	 * Make sure we do not leak PI boosting priority to the child.
-+	 */
-+	p->prio = current->normal_prio;
-+
-+	/*
-+	 * Revert to default priority/policy on fork if requested.
-+	 */
-+	if (unlikely(p->sched_reset_on_fork)) {
-+		if (task_has_rt_policy(p)) {
-+			p->policy = SCHED_NORMAL;
-+			p->static_prio = NICE_TO_PRIO(0);
-+			p->rt_priority = 0;
-+		} else if (PRIO_TO_NICE(p->static_prio) < 0)
-+			p->static_prio = NICE_TO_PRIO(0);
-+
-+		p->prio = p->normal_prio = normal_prio(p);
-+
-+		/*
-+		 * We don't need the reset flag anymore after the fork. It has
-+		 * fulfilled its duty:
-+		 */
-+		p->sched_reset_on_fork = 0;
-+	}
-+
-+	/*
-+	 * The child is not yet in the pid-hash so no cgroup attach races,
-+	 * and the cgroup is pinned to this child due to cgroup_fork()
-+	 * is ran before sched_fork().
-+	 *
-+	 * Silence PROVE_RCU.
-+	 */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	/*
-+	 * Share the timeslice between parent and child, thus the
-+	 * total amount of pending timeslices in the system doesn't change,
-+	 * resulting in more scheduling fairness.
-+	 */
-+	rq = this_rq();
-+	raw_spin_lock(&rq->lock);
-+
-+	rq->curr->time_slice /= 2;
-+	p->time_slice = rq->curr->time_slice;
-+#ifdef CONFIG_SCHED_HRTICK
-+	hrtick_start(rq, rq->curr->time_slice);
-+#endif
-+
-+	if (p->time_slice < RESCHED_NS) {
-+		p->time_slice = sched_timeslice_ns;
-+		resched_curr(rq);
-+	}
-+	sched_task_fork(p, rq);
-+	raw_spin_unlock(&rq->lock);
-+
-+	rseq_migrate(p);
-+	/*
-+	 * We're setting the CPU for the first time, we don't migrate,
-+	 * so use __set_task_cpu().
-+	 */
-+	__set_task_cpu(p, cpu_of(rq));
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+
-+#ifdef CONFIG_SCHED_INFO
-+	if (unlikely(sched_info_on()))
-+		memset(&p->sched_info, 0, sizeof(p->sched_info));
-+#endif
-+	init_task_preempt_count(p);
-+
-+	return 0;
-+}
-+
-+void sched_post_fork(struct task_struct *p) {}
-+
-+#ifdef CONFIG_SCHEDSTATS
-+
-+DEFINE_STATIC_KEY_FALSE(sched_schedstats);
-+static bool __initdata __sched_schedstats = false;
-+
-+static void set_schedstats(bool enabled)
-+{
-+	if (enabled)
-+		static_branch_enable(&sched_schedstats);
-+	else
-+		static_branch_disable(&sched_schedstats);
-+}
-+
-+void force_schedstat_enabled(void)
-+{
-+	if (!schedstat_enabled()) {
-+		pr_info("kernel profiling enabled schedstats, disable via kernel.sched_schedstats.\n");
-+		static_branch_enable(&sched_schedstats);
-+	}
-+}
-+
-+static int __init setup_schedstats(char *str)
-+{
-+	int ret = 0;
-+	if (!str)
-+		goto out;
-+
-+	/*
-+	 * This code is called before jump labels have been set up, so we can't
-+	 * change the static branch directly just yet.  Instead set a temporary
-+	 * variable so init_schedstats() can do it later.
-+	 */
-+	if (!strcmp(str, "enable")) {
-+		__sched_schedstats = true;
-+		ret = 1;
-+	} else if (!strcmp(str, "disable")) {
-+		__sched_schedstats = false;
-+		ret = 1;
-+	}
-+out:
-+	if (!ret)
-+		pr_warn("Unable to parse schedstats=\n");
-+
-+	return ret;
-+}
-+__setup("schedstats=", setup_schedstats);
-+
-+static void __init init_schedstats(void)
-+{
-+	set_schedstats(__sched_schedstats);
-+}
-+
-+#ifdef CONFIG_PROC_SYSCTL
-+int sysctl_schedstats(struct ctl_table *table, int write,
-+			 void __user *buffer, size_t *lenp, loff_t *ppos)
-+{
-+	struct ctl_table t;
-+	int err;
-+	int state = static_branch_likely(&sched_schedstats);
-+
-+	if (write && !capable(CAP_SYS_ADMIN))
-+		return -EPERM;
-+
-+	t = *table;
-+	t.data = &state;
-+	err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
-+	if (err < 0)
-+		return err;
-+	if (write)
-+		set_schedstats(state);
-+	return err;
-+}
-+#endif /* CONFIG_PROC_SYSCTL */
-+#else  /* !CONFIG_SCHEDSTATS */
-+static inline void init_schedstats(void) {}
-+#endif /* CONFIG_SCHEDSTATS */
-+
-+/*
-+ * wake_up_new_task - wake up a newly created task for the first time.
-+ *
-+ * This function will do some initial scheduler statistics housekeeping
-+ * that must be done for every newly created context, then puts the task
-+ * on the runqueue and wakes it.
-+ */
-+void wake_up_new_task(struct task_struct *p)
-+{
-+	unsigned long flags;
-+	struct rq *rq;
-+
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+
-+	p->state = TASK_RUNNING;
-+
-+	rq = cpu_rq(select_task_rq(p, this_rq()));
-+#ifdef CONFIG_SMP
-+	rseq_migrate(p);
-+	/*
-+	 * Fork balancing, do it here and not earlier because:
-+	 * - cpus_ptr can change in the fork path
-+	 * - any previously selected CPU might disappear through hotplug
-+	 * Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq,
-+	 * as we're not fully set-up yet.
-+	 */
-+	__set_task_cpu(p, cpu_of(rq));
-+#endif
-+
-+	raw_spin_lock(&rq->lock);
-+
-+	update_rq_clock(rq);
-+	activate_task(p, rq);
-+	trace_sched_wakeup_new(p);
-+	check_preempt_curr(rq);
-+
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+}
-+
-+#ifdef CONFIG_PREEMPT_NOTIFIERS
-+
-+static DEFINE_STATIC_KEY_FALSE(preempt_notifier_key);
-+
-+void preempt_notifier_inc(void)
-+{
-+	static_branch_inc(&preempt_notifier_key);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_inc);
-+
-+void preempt_notifier_dec(void)
-+{
-+	static_branch_dec(&preempt_notifier_key);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_dec);
-+
-+/**
-+ * preempt_notifier_register - tell me when current is being preempted & rescheduled
-+ * @notifier: notifier struct to register
-+ */
-+void preempt_notifier_register(struct preempt_notifier *notifier)
-+{
-+	if (!static_branch_unlikely(&preempt_notifier_key))
-+		WARN(1, "registering preempt_notifier while notifiers disabled\n");
-+
-+	hlist_add_head(&notifier->link, &current->preempt_notifiers);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_register);
-+
-+/**
-+ * preempt_notifier_unregister - no longer interested in preemption notifications
-+ * @notifier: notifier struct to unregister
-+ *
-+ * This is *not* safe to call from within a preemption notifier.
-+ */
-+void preempt_notifier_unregister(struct preempt_notifier *notifier)
-+{
-+	hlist_del(&notifier->link);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_unregister);
-+
-+static void __fire_sched_in_preempt_notifiers(struct task_struct *curr)
-+{
-+	struct preempt_notifier *notifier;
-+
-+	hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
-+		notifier->ops->sched_in(notifier, raw_smp_processor_id());
-+}
-+
-+static __always_inline void fire_sched_in_preempt_notifiers(struct task_struct *curr)
-+{
-+	if (static_branch_unlikely(&preempt_notifier_key))
-+		__fire_sched_in_preempt_notifiers(curr);
-+}
-+
-+static void
-+__fire_sched_out_preempt_notifiers(struct task_struct *curr,
-+				   struct task_struct *next)
-+{
-+	struct preempt_notifier *notifier;
-+
-+	hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
-+		notifier->ops->sched_out(notifier, next);
-+}
-+
-+static __always_inline void
-+fire_sched_out_preempt_notifiers(struct task_struct *curr,
-+				 struct task_struct *next)
-+{
-+	if (static_branch_unlikely(&preempt_notifier_key))
-+		__fire_sched_out_preempt_notifiers(curr, next);
-+}
-+
-+#else /* !CONFIG_PREEMPT_NOTIFIERS */
-+
-+static inline void fire_sched_in_preempt_notifiers(struct task_struct *curr)
-+{
-+}
-+
-+static inline void
-+fire_sched_out_preempt_notifiers(struct task_struct *curr,
-+				 struct task_struct *next)
-+{
-+}
-+
-+#endif /* CONFIG_PREEMPT_NOTIFIERS */
-+
-+static inline void prepare_task(struct task_struct *next)
-+{
-+	/*
-+	 * Claim the task as running, we do this before switching to it
-+	 * such that any running task will have this set.
-+	 *
-+	 * See the ttwu() WF_ON_CPU case and its ordering comment.
-+	 */
-+	WRITE_ONCE(next->on_cpu, 1);
-+}
-+
-+static inline void finish_task(struct task_struct *prev)
-+{
-+#ifdef CONFIG_SMP
-+	/*
-+	 * This must be the very last reference to @prev from this CPU. After
-+	 * p->on_cpu is cleared, the task can be moved to a different CPU. We
-+	 * must ensure this doesn't happen until the switch is completely
-+	 * finished.
-+	 *
-+	 * In particular, the load of prev->state in finish_task_switch() must
-+	 * happen before this.
-+	 *
-+	 * Pairs with the smp_cond_load_acquire() in try_to_wake_up().
-+	 */
-+	smp_store_release(&prev->on_cpu, 0);
-+#else
-+	prev->on_cpu = 0;
-+#endif
-+}
-+
-+static inline void
-+prepare_lock_switch(struct rq *rq, struct task_struct *next)
-+{
-+	/*
-+	 * Since the runqueue lock will be released by the next
-+	 * task (which is an invalid locking op but in the case
-+	 * of the scheduler it's an obvious special-case), so we
-+	 * do an early lockdep release here:
-+	 */
-+	spin_release(&rq->lock.dep_map, _THIS_IP_);
-+#ifdef CONFIG_DEBUG_SPINLOCK
-+	/* this is a valid case when another task releases the spinlock */
-+	rq->lock.owner = next;
-+#endif
-+}
-+
-+static inline void finish_lock_switch(struct rq *rq)
-+{
-+	/*
-+	 * If we are tracking spinlock dependencies then we have to
-+	 * fix up the runqueue lock - which gets 'carried over' from
-+	 * prev into current:
-+	 */
-+	spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
-+	raw_spin_unlock_irq(&rq->lock);
-+}
-+
-+/**
-+ * prepare_task_switch - prepare to switch tasks
-+ * @rq: the runqueue preparing to switch
-+ * @next: the task we are going to switch to.
-+ *
-+ * This is called with the rq lock held and interrupts off. It must
-+ * be paired with a subsequent finish_task_switch after the context
-+ * switch.
-+ *
-+ * prepare_task_switch sets up locking and calls architecture specific
-+ * hooks.
-+ */
-+static inline void
-+prepare_task_switch(struct rq *rq, struct task_struct *prev,
-+		    struct task_struct *next)
-+{
-+	kcov_prepare_switch(prev);
-+	sched_info_switch(rq, prev, next);
-+	perf_event_task_sched_out(prev, next);
-+	rseq_preempt(prev);
-+	fire_sched_out_preempt_notifiers(prev, next);
-+	prepare_task(next);
-+	prepare_arch_switch(next);
-+}
-+
-+/**
-+ * finish_task_switch - clean up after a task-switch
-+ * @rq: runqueue associated with task-switch
-+ * @prev: the thread we just switched away from.
-+ *
-+ * finish_task_switch must be called after the context switch, paired
-+ * with a prepare_task_switch call before the context switch.
-+ * finish_task_switch will reconcile locking set up by prepare_task_switch,
-+ * and do any other architecture-specific cleanup actions.
-+ *
-+ * Note that we may have delayed dropping an mm in context_switch(). If
-+ * so, we finish that here outside of the runqueue lock.  (Doing it
-+ * with the lock held can cause deadlocks; see schedule() for
-+ * details.)
-+ *
-+ * The context switch have flipped the stack from under us and restored the
-+ * local variables which were saved when this task called schedule() in the
-+ * past. prev == current is still correct but we need to recalculate this_rq
-+ * because prev may have moved to another CPU.
-+ */
-+static struct rq *finish_task_switch(struct task_struct *prev)
-+	__releases(rq->lock)
-+{
-+	struct rq *rq = this_rq();
-+	struct mm_struct *mm = rq->prev_mm;
-+	long prev_state;
-+
-+	/*
-+	 * The previous task will have left us with a preempt_count of 2
-+	 * because it left us after:
-+	 *
-+	 *	schedule()
-+	 *	  preempt_disable();			// 1
-+	 *	  __schedule()
-+	 *	    raw_spin_lock_irq(&rq->lock)	// 2
-+	 *
-+	 * Also, see FORK_PREEMPT_COUNT.
-+	 */
-+	if (WARN_ONCE(preempt_count() != 2*PREEMPT_DISABLE_OFFSET,
-+		      "corrupted preempt_count: %s/%d/0x%x\n",
-+		      current->comm, current->pid, preempt_count()))
-+		preempt_count_set(FORK_PREEMPT_COUNT);
-+
-+	rq->prev_mm = NULL;
-+
-+	/*
-+	 * A task struct has one reference for the use as "current".
-+	 * If a task dies, then it sets TASK_DEAD in tsk->state and calls
-+	 * schedule one last time. The schedule call will never return, and
-+	 * the scheduled task must drop that reference.
-+	 *
-+	 * We must observe prev->state before clearing prev->on_cpu (in
-+	 * finish_task), otherwise a concurrent wakeup can get prev
-+	 * running on another CPU and we could rave with its RUNNING -> DEAD
-+	 * transition, resulting in a double drop.
-+	 */
-+	prev_state = prev->state;
-+	vtime_task_switch(prev);
-+	perf_event_task_sched_in(prev, current);
-+	finish_task(prev);
-+	finish_lock_switch(rq);
-+	finish_arch_post_lock_switch();
-+	kcov_finish_switch(current);
-+
-+	fire_sched_in_preempt_notifiers(current);
-+	/*
-+	 * When switching through a kernel thread, the loop in
-+	 * membarrier_{private,global}_expedited() may have observed that
-+	 * kernel thread and not issued an IPI. It is therefore possible to
-+	 * schedule between user->kernel->user threads without passing though
-+	 * switch_mm(). Membarrier requires a barrier after storing to
-+	 * rq->curr, before returning to userspace, so provide them here:
-+	 *
-+	 * - a full memory barrier for {PRIVATE,GLOBAL}_EXPEDITED, implicitly
-+	 *   provided by mmdrop(),
-+	 * - a sync_core for SYNC_CORE.
-+	 */
-+	if (mm) {
-+		membarrier_mm_sync_core_before_usermode(mm);
-+		mmdrop(mm);
-+	}
-+	if (unlikely(prev_state == TASK_DEAD)) {
-+		/*
-+		 * Remove function-return probe instances associated with this
-+		 * task and put them back on the free list.
-+		 */
-+		kprobe_flush_task(prev);
-+
-+		/* Task is done with its stack. */
-+		put_task_stack(prev);
-+
-+		put_task_struct_rcu_user(prev);
-+	}
-+
-+	tick_nohz_task_switch();
-+	return rq;
-+}
-+
-+/**
-+ * schedule_tail - first thing a freshly forked thread must call.
-+ * @prev: the thread we just switched away from.
-+ */
-+asmlinkage __visible void schedule_tail(struct task_struct *prev)
-+	__releases(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	/*
-+	 * New tasks start with FORK_PREEMPT_COUNT, see there and
-+	 * finish_task_switch() for details.
-+	 *
-+	 * finish_task_switch() will drop rq->lock() and lower preempt_count
-+	 * and the preempt_enable() will end up enabling preemption (on
-+	 * PREEMPT_COUNT kernels).
-+	 */
-+
-+	rq = finish_task_switch(prev);
-+	preempt_enable();
-+
-+	if (current->set_child_tid)
-+		put_user(task_pid_vnr(current), current->set_child_tid);
-+
-+	calculate_sigpending();
-+}
-+
-+/*
-+ * context_switch - switch to the new MM and the new thread's register state.
-+ */
-+static __always_inline struct rq *
-+context_switch(struct rq *rq, struct task_struct *prev,
-+	       struct task_struct *next)
-+{
-+	prepare_task_switch(rq, prev, next);
-+
-+	/*
-+	 * For paravirt, this is coupled with an exit in switch_to to
-+	 * combine the page table reload and the switch backend into
-+	 * one hypercall.
-+	 */
-+	arch_start_context_switch(prev);
-+
-+	/*
-+	 * kernel -> kernel   lazy + transfer active
-+	 *   user -> kernel   lazy + mmgrab() active
-+	 *
-+	 * kernel ->   user   switch + mmdrop() active
-+	 *   user ->   user   switch
-+	 */
-+	if (!next->mm) {                                // to kernel
-+		enter_lazy_tlb(prev->active_mm, next);
-+
-+		next->active_mm = prev->active_mm;
-+		if (prev->mm)                           // from user
-+			mmgrab(prev->active_mm);
-+		else
-+			prev->active_mm = NULL;
-+	} else {                                        // to user
-+		membarrier_switch_mm(rq, prev->active_mm, next->mm);
-+		/*
-+		 * sys_membarrier() requires an smp_mb() between setting
-+		 * rq->curr / membarrier_switch_mm() and returning to userspace.
-+		 *
-+		 * The below provides this either through switch_mm(), or in
-+		 * case 'prev->active_mm == next->mm' through
-+		 * finish_task_switch()'s mmdrop().
-+		 */
-+		switch_mm_irqs_off(prev->active_mm, next->mm, next);
-+
-+		if (!prev->mm) {                        // from kernel
-+			/* will mmdrop() in finish_task_switch(). */
-+			rq->prev_mm = prev->active_mm;
-+			prev->active_mm = NULL;
-+		}
-+	}
-+
-+	prepare_lock_switch(rq, next);
-+
-+	/* Here we just switch the register state and the stack. */
-+	switch_to(prev, next, prev);
-+	barrier();
-+
-+	return finish_task_switch(prev);
-+}
-+
-+/*
-+ * nr_running, nr_uninterruptible and nr_context_switches:
-+ *
-+ * externally visible scheduler statistics: current number of runnable
-+ * threads, total number of context switches performed since bootup.
-+ */
-+unsigned long nr_running(void)
-+{
-+	unsigned long i, sum = 0;
-+
-+	for_each_online_cpu(i)
-+		sum += cpu_rq(i)->nr_running;
-+
-+	return sum;
-+}
-+
-+/*
-+ * Check if only the current task is running on the CPU.
-+ *
-+ * Caution: this function does not check that the caller has disabled
-+ * preemption, thus the result might have a time-of-check-to-time-of-use
-+ * race.  The caller is responsible to use it correctly, for example:
-+ *
-+ * - from a non-preemptible section (of course)
-+ *
-+ * - from a thread that is bound to a single CPU
-+ *
-+ * - in a loop with very short iterations (e.g. a polling loop)
-+ */
-+bool single_task_running(void)
-+{
-+	return raw_rq()->nr_running == 1;
-+}
-+EXPORT_SYMBOL(single_task_running);
-+
-+unsigned long long nr_context_switches(void)
-+{
-+	int i;
-+	unsigned long long sum = 0;
-+
-+	for_each_possible_cpu(i)
-+		sum += cpu_rq(i)->nr_switches;
-+
-+	return sum;
-+}
-+
-+/*
-+ * Consumers of these two interfaces, like for example the cpuidle menu
-+ * governor, are using nonsensical data. Preferring shallow idle state selection
-+ * for a CPU that has IO-wait which might not even end up running the task when
-+ * it does become runnable.
-+ */
-+
-+unsigned long nr_iowait_cpu(int cpu)
-+{
-+	return atomic_read(&cpu_rq(cpu)->nr_iowait);
-+}
-+
-+/*
-+ * IO-wait accounting, and how its mostly bollocks (on SMP).
-+ *
-+ * The idea behind IO-wait account is to account the idle time that we could
-+ * have spend running if it were not for IO. That is, if we were to improve the
-+ * storage performance, we'd have a proportional reduction in IO-wait time.
-+ *
-+ * This all works nicely on UP, where, when a task blocks on IO, we account
-+ * idle time as IO-wait, because if the storage were faster, it could've been
-+ * running and we'd not be idle.
-+ *
-+ * This has been extended to SMP, by doing the same for each CPU. This however
-+ * is broken.
-+ *
-+ * Imagine for instance the case where two tasks block on one CPU, only the one
-+ * CPU will have IO-wait accounted, while the other has regular idle. Even
-+ * though, if the storage were faster, both could've ran at the same time,
-+ * utilising both CPUs.
-+ *
-+ * This means, that when looking globally, the current IO-wait accounting on
-+ * SMP is a lower bound, by reason of under accounting.
-+ *
-+ * Worse, since the numbers are provided per CPU, they are sometimes
-+ * interpreted per CPU, and that is nonsensical. A blocked task isn't strictly
-+ * associated with any one particular CPU, it can wake to another CPU than it
-+ * blocked on. This means the per CPU IO-wait number is meaningless.
-+ *
-+ * Task CPU affinities can make all that even more 'interesting'.
-+ */
-+
-+unsigned long nr_iowait(void)
-+{
-+	unsigned long i, sum = 0;
-+
-+	for_each_possible_cpu(i)
-+		sum += nr_iowait_cpu(i);
-+
-+	return sum;
-+}
-+
-+#ifdef CONFIG_SMP
-+
-+/*
-+ * sched_exec - execve() is a valuable balancing opportunity, because at
-+ * this point the task has the smallest effective memory and cache
-+ * footprint.
-+ */
-+void sched_exec(void)
-+{
-+	struct task_struct *p = current;
-+	unsigned long flags;
-+	int dest_cpu;
-+	struct rq *rq;
-+
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	rq = this_rq();
-+
-+	if (rq != task_rq(p) || rq->nr_running < 2)
-+		goto unlock;
-+
-+	dest_cpu = select_task_rq(p, task_rq(p));
-+	if (dest_cpu == smp_processor_id())
-+		goto unlock;
-+
-+	if (likely(cpu_active(dest_cpu))) {
-+		struct migration_arg arg = { p, dest_cpu };
-+
-+		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+		stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg);
-+		return;
-+	}
-+unlock:
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+}
-+
-+#endif
-+
-+DEFINE_PER_CPU(struct kernel_stat, kstat);
-+DEFINE_PER_CPU(struct kernel_cpustat, kernel_cpustat);
-+
-+EXPORT_PER_CPU_SYMBOL(kstat);
-+EXPORT_PER_CPU_SYMBOL(kernel_cpustat);
-+
-+static inline void update_curr(struct rq *rq, struct task_struct *p)
-+{
-+	s64 ns = rq->clock_task - p->last_ran;
-+
-+	p->sched_time += ns;
-+	account_group_exec_runtime(p, ns);
-+
-+	p->time_slice -= ns;
-+	p->last_ran = rq->clock_task;
-+}
-+
-+/*
-+ * Return accounted runtime for the task.
-+ * Return separately the current's pending runtime that have not been
-+ * accounted yet.
-+ */
-+unsigned long long task_sched_runtime(struct task_struct *p)
-+{
-+	unsigned long flags;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+	u64 ns;
-+
-+#if defined(CONFIG_64BIT) && defined(CONFIG_SMP)
-+	/*
-+	 * 64-bit doesn't need locks to atomically read a 64-bit value.
-+	 * So we have a optimization chance when the task's delta_exec is 0.
-+	 * Reading ->on_cpu is racy, but this is ok.
-+	 *
-+	 * If we race with it leaving CPU, we'll take a lock. So we're correct.
-+	 * If we race with it entering CPU, unaccounted time is 0. This is
-+	 * indistinguishable from the read occurring a few cycles earlier.
-+	 * If we see ->on_cpu without ->on_rq, the task is leaving, and has
-+	 * been accounted, so we're correct here as well.
-+	 */
-+	if (!p->on_cpu || !task_on_rq_queued(p))
-+		return tsk_seruntime(p);
-+#endif
-+
-+	rq = task_access_lock_irqsave(p, &lock, &flags);
-+	/*
-+	 * Must be ->curr _and_ ->on_rq.  If dequeued, we would
-+	 * project cycles that may never be accounted to this
-+	 * thread, breaking clock_gettime().
-+	 */
-+	if (p == rq->curr && task_on_rq_queued(p)) {
-+		update_rq_clock(rq);
-+		update_curr(rq, p);
-+	}
-+	ns = tsk_seruntime(p);
-+	task_access_unlock_irqrestore(p, lock, &flags);
-+
-+	return ns;
-+}
-+
-+/* This manages tasks that have run out of timeslice during a scheduler_tick */
-+static inline void scheduler_task_tick(struct rq *rq)
-+{
-+	struct task_struct *p = rq->curr;
-+
-+	if (is_idle_task(p))
-+		return;
-+
-+	update_curr(rq, p);
-+	cpufreq_update_util(rq, 0);
-+
-+	/*
-+	 * Tasks have less than RESCHED_NS of time slice left they will be
-+	 * rescheduled.
-+	 */
-+	if (p->time_slice >= RESCHED_NS)
-+		return;
-+	set_tsk_need_resched(p);
-+	set_preempt_need_resched();
-+}
-+
-+/*
-+ * This function gets called by the timer code, with HZ frequency.
-+ * We call it with interrupts disabled.
-+ */
-+void scheduler_tick(void)
-+{
-+	int cpu __maybe_unused = smp_processor_id();
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	arch_scale_freq_tick();
-+	sched_clock_tick();
-+
-+	raw_spin_lock(&rq->lock);
-+	update_rq_clock(rq);
-+
-+	scheduler_task_tick(rq);
-+	calc_global_load_tick(rq);
-+	psi_task_tick(rq);
-+
-+	rq->last_tick = rq->clock;
-+	raw_spin_unlock(&rq->lock);
-+
-+	perf_event_task_tick();
-+}
-+
-+#ifdef CONFIG_SCHED_SMT
-+static inline int active_load_balance_cpu_stop(void *data)
-+{
-+	struct rq *rq = this_rq();
-+	struct task_struct *p = data;
-+	cpumask_t tmp;
-+	unsigned long flags;
-+
-+	local_irq_save(flags);
-+
-+	raw_spin_lock(&p->pi_lock);
-+	raw_spin_lock(&rq->lock);
-+
-+	rq->active_balance = 0;
-+	/* _something_ may have changed the task, double check again */
-+	if (task_on_rq_queued(p) && task_rq(p) == rq &&
-+	    cpumask_and(&tmp, p->cpus_ptr, &sched_sg_idle_mask)) {
-+		int cpu = cpu_of(rq);
-+		int dcpu = __best_mask_cpu(cpu, &tmp,
-+					   per_cpu(sched_cpu_llc_mask, cpu));
-+		rq = move_queued_task(rq, p, dcpu);
-+	}
-+
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock(&p->pi_lock);
-+
-+	local_irq_restore(flags);
-+
-+	return 0;
-+}
-+
-+/* sg_balance_trigger - trigger slibing group balance for @cpu */
-+static inline int sg_balance_trigger(const int cpu)
-+{
-+	struct rq *rq= cpu_rq(cpu);
-+	unsigned long flags;
-+	struct task_struct *curr;
-+	int res;
-+
-+	if (!raw_spin_trylock_irqsave(&rq->lock, flags))
-+		return 0;
-+	curr = rq->curr;
-+	res = (!is_idle_task(curr)) && (1 == rq->nr_running) &&\
-+	      cpumask_intersects(curr->cpus_ptr, &sched_sg_idle_mask) &&\
-+	      (!rq->active_balance);
-+
-+	if (res)
-+		rq->active_balance = 1;
-+
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+
-+	if (res)
-+		stop_one_cpu_nowait(cpu, active_load_balance_cpu_stop,
-+				    curr, &rq->active_balance_work);
-+	return res;
-+}
-+
-+/*
-+ * sg_balance_check - slibing group balance check for run queue @rq
-+ */
-+static inline void sg_balance_check(struct rq *rq)
-+{
-+	cpumask_t chk;
-+	int cpu;
-+
-+	/* exit when no sg in idle */
-+	if (cpumask_empty(&sched_sg_idle_mask))
-+		return;
-+
-+	cpu = cpu_of(rq);
-+	/*
-+	 * Only cpu in slibing idle group will do the checking and then
-+	 * find potential cpus which can migrate the current running task
-+	 */
-+	if (cpumask_test_cpu(cpu, &sched_sg_idle_mask) &&
-+	    cpumask_andnot(&chk, cpu_online_mask, &sched_rq_pending_mask) &&
-+	    cpumask_andnot(&chk, &chk, &sched_rq_watermark[IDLE_WM])) {
-+		int i, tried = 0;
-+
-+		for_each_cpu_wrap(i, &chk, cpu) {
-+			if (cpumask_subset(cpu_smt_mask(i), &chk)) {
-+				if (sg_balance_trigger(i))
-+					return;
-+				if (tried)
-+					return;
-+				tried++;
-+			}
-+		}
-+	}
-+}
-+#endif /* CONFIG_SCHED_SMT */
-+
-+#ifdef CONFIG_NO_HZ_FULL
-+
-+struct tick_work {
-+	int			cpu;
-+	atomic_t		state;
-+	struct delayed_work	work;
-+};
-+/* Values for ->state, see diagram below. */
-+#define TICK_SCHED_REMOTE_OFFLINE	0
-+#define TICK_SCHED_REMOTE_OFFLINING	1
-+#define TICK_SCHED_REMOTE_RUNNING	2
-+
-+/*
-+ * State diagram for ->state:
-+ *
-+ *
-+ *          TICK_SCHED_REMOTE_OFFLINE
-+ *                    |   ^
-+ *                    |   |
-+ *                    |   | sched_tick_remote()
-+ *                    |   |
-+ *                    |   |
-+ *                    +--TICK_SCHED_REMOTE_OFFLINING
-+ *                    |   ^
-+ *                    |   |
-+ * sched_tick_start() |   | sched_tick_stop()
-+ *                    |   |
-+ *                    V   |
-+ *          TICK_SCHED_REMOTE_RUNNING
-+ *
-+ *
-+ * Other transitions get WARN_ON_ONCE(), except that sched_tick_remote()
-+ * and sched_tick_start() are happy to leave the state in RUNNING.
-+ */
-+
-+static struct tick_work __percpu *tick_work_cpu;
-+
-+static void sched_tick_remote(struct work_struct *work)
-+{
-+	struct delayed_work *dwork = to_delayed_work(work);
-+	struct tick_work *twork = container_of(dwork, struct tick_work, work);
-+	int cpu = twork->cpu;
-+	struct rq *rq = cpu_rq(cpu);
-+	struct task_struct *curr;
-+	unsigned long flags;
-+	u64 delta;
-+	int os;
-+
-+	/*
-+	 * Handle the tick only if it appears the remote CPU is running in full
-+	 * dynticks mode. The check is racy by nature, but missing a tick or
-+	 * having one too much is no big deal because the scheduler tick updates
-+	 * statistics and checks timeslices in a time-independent way, regardless
-+	 * of when exactly it is running.
-+	 */
-+	if (!tick_nohz_tick_stopped_cpu(cpu))
-+		goto out_requeue;
-+
-+	raw_spin_lock_irqsave(&rq->lock, flags);
-+	curr = rq->curr;
-+	if (cpu_is_offline(cpu))
-+		goto out_unlock;
-+
-+	update_rq_clock(rq);
-+	if (!is_idle_task(curr)) {
-+		/*
-+		 * Make sure the next tick runs within a reasonable
-+		 * amount of time.
-+		 */
-+		delta = rq_clock_task(rq) - curr->last_ran;
-+		WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
-+	}
-+	scheduler_task_tick(rq);
-+
-+	calc_load_nohz_remote(rq);
-+out_unlock:
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+
-+out_requeue:
-+	/*
-+	 * Run the remote tick once per second (1Hz). This arbitrary
-+	 * frequency is large enough to avoid overload but short enough
-+	 * to keep scheduler internal stats reasonably up to date.  But
-+	 * first update state to reflect hotplug activity if required.
-+	 */
-+	os = atomic_fetch_add_unless(&twork->state, -1, TICK_SCHED_REMOTE_RUNNING);
-+	WARN_ON_ONCE(os == TICK_SCHED_REMOTE_OFFLINE);
-+	if (os == TICK_SCHED_REMOTE_RUNNING)
-+		queue_delayed_work(system_unbound_wq, dwork, HZ);
-+}
-+
-+static void sched_tick_start(int cpu)
-+{
-+	int os;
-+	struct tick_work *twork;
-+
-+	if (housekeeping_cpu(cpu, HK_FLAG_TICK))
-+		return;
-+
-+	WARN_ON_ONCE(!tick_work_cpu);
-+
-+	twork = per_cpu_ptr(tick_work_cpu, cpu);
-+	os = atomic_xchg(&twork->state, TICK_SCHED_REMOTE_RUNNING);
-+	WARN_ON_ONCE(os == TICK_SCHED_REMOTE_RUNNING);
-+	if (os == TICK_SCHED_REMOTE_OFFLINE) {
-+		twork->cpu = cpu;
-+		INIT_DELAYED_WORK(&twork->work, sched_tick_remote);
-+		queue_delayed_work(system_unbound_wq, &twork->work, HZ);
-+	}
-+}
-+
-+#ifdef CONFIG_HOTPLUG_CPU
-+static void sched_tick_stop(int cpu)
-+{
-+	struct tick_work *twork;
-+
-+	if (housekeeping_cpu(cpu, HK_FLAG_TICK))
-+		return;
-+
-+	WARN_ON_ONCE(!tick_work_cpu);
-+
-+	twork = per_cpu_ptr(tick_work_cpu, cpu);
-+	cancel_delayed_work_sync(&twork->work);
-+}
-+#endif /* CONFIG_HOTPLUG_CPU */
-+
-+int __init sched_tick_offload_init(void)
-+{
-+	tick_work_cpu = alloc_percpu(struct tick_work);
-+	BUG_ON(!tick_work_cpu);
-+	return 0;
-+}
-+
-+#else /* !CONFIG_NO_HZ_FULL */
-+static inline void sched_tick_start(int cpu) { }
-+static inline void sched_tick_stop(int cpu) { }
-+#endif
-+
-+#if defined(CONFIG_PREEMPTION) && (defined(CONFIG_DEBUG_PREEMPT) || \
-+				defined(CONFIG_PREEMPT_TRACER))
-+/*
-+ * If the value passed in is equal to the current preempt count
-+ * then we just disabled preemption. Start timing the latency.
-+ */
-+static inline void preempt_latency_start(int val)
-+{
-+	if (preempt_count() == val) {
-+		unsigned long ip = get_lock_parent_ip();
-+#ifdef CONFIG_DEBUG_PREEMPT
-+		current->preempt_disable_ip = ip;
-+#endif
-+		trace_preempt_off(CALLER_ADDR0, ip);
-+	}
-+}
-+
-+void preempt_count_add(int val)
-+{
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	/*
-+	 * Underflow?
-+	 */
-+	if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
-+		return;
-+#endif
-+	__preempt_count_add(val);
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	/*
-+	 * Spinlock count overflowing soon?
-+	 */
-+	DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
-+				PREEMPT_MASK - 10);
-+#endif
-+	preempt_latency_start(val);
-+}
-+EXPORT_SYMBOL(preempt_count_add);
-+NOKPROBE_SYMBOL(preempt_count_add);
-+
-+/*
-+ * If the value passed in equals to the current preempt count
-+ * then we just enabled preemption. Stop timing the latency.
-+ */
-+static inline void preempt_latency_stop(int val)
-+{
-+	if (preempt_count() == val)
-+		trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
-+}
-+
-+void preempt_count_sub(int val)
-+{
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	/*
-+	 * Underflow?
-+	 */
-+	if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
-+		return;
-+	/*
-+	 * Is the spinlock portion underflowing?
-+	 */
-+	if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) &&
-+			!(preempt_count() & PREEMPT_MASK)))
-+		return;
-+#endif
-+
-+	preempt_latency_stop(val);
-+	__preempt_count_sub(val);
-+}
-+EXPORT_SYMBOL(preempt_count_sub);
-+NOKPROBE_SYMBOL(preempt_count_sub);
-+
-+#else
-+static inline void preempt_latency_start(int val) { }
-+static inline void preempt_latency_stop(int val) { }
-+#endif
-+
-+static inline unsigned long get_preempt_disable_ip(struct task_struct *p)
-+{
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	return p->preempt_disable_ip;
-+#else
-+	return 0;
-+#endif
-+}
-+
-+/*
-+ * Print scheduling while atomic bug:
-+ */
-+static noinline void __schedule_bug(struct task_struct *prev)
-+{
-+	/* Save this before calling printk(), since that will clobber it */
-+	unsigned long preempt_disable_ip = get_preempt_disable_ip(current);
-+
-+	if (oops_in_progress)
-+		return;
-+
-+	printk(KERN_ERR "BUG: scheduling while atomic: %s/%d/0x%08x\n",
-+		prev->comm, prev->pid, preempt_count());
-+
-+	debug_show_held_locks(prev);
-+	print_modules();
-+	if (irqs_disabled())
-+		print_irqtrace_events(prev);
-+	if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)
-+	    && in_atomic_preempt_off()) {
-+		pr_err("Preemption disabled at:");
-+		print_ip_sym(KERN_ERR, preempt_disable_ip);
-+	}
-+	if (panic_on_warn)
-+		panic("scheduling while atomic\n");
-+
-+	dump_stack();
-+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+}
-+
-+/*
-+ * Various schedule()-time debugging checks and statistics:
-+ */
-+static inline void schedule_debug(struct task_struct *prev, bool preempt)
-+{
-+#ifdef CONFIG_SCHED_STACK_END_CHECK
-+	if (task_stack_end_corrupted(prev))
-+		panic("corrupted stack end detected inside scheduler\n");
-+
-+	if (task_scs_end_corrupted(prev))
-+		panic("corrupted shadow stack detected inside scheduler\n");
-+#endif
-+
-+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-+	if (!preempt && prev->state && prev->non_block_count) {
-+		printk(KERN_ERR "BUG: scheduling in a non-blocking section: %s/%d/%i\n",
-+			prev->comm, prev->pid, prev->non_block_count);
-+		dump_stack();
-+		add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+	}
-+#endif
-+
-+	if (unlikely(in_atomic_preempt_off())) {
-+		__schedule_bug(prev);
-+		preempt_count_set(PREEMPT_DISABLED);
-+	}
-+	rcu_sleep_check();
-+
-+	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
-+
-+	schedstat_inc(this_rq()->sched_count);
-+}
-+
-+/*
-+ * Compile time debug macro
-+ * #define ALT_SCHED_DEBUG
-+ */
-+
-+#ifdef ALT_SCHED_DEBUG
-+void alt_sched_debug(void)
-+{
-+	printk(KERN_INFO "sched: pending: 0x%04lx, idle: 0x%04lx, sg_idle: 0x%04lx\n",
-+	       sched_rq_pending_mask.bits[0],
-+	       sched_rq_watermark[IDLE_WM].bits[0],
-+	       sched_sg_idle_mask.bits[0]);
-+}
-+#else
-+inline void alt_sched_debug(void) {}
-+#endif
-+
-+#ifdef	CONFIG_SMP
-+
-+#define SCHED_RQ_NR_MIGRATION (32UL)
-+/*
-+ * Migrate pending tasks in @rq to @dest_cpu
-+ * Will try to migrate mininal of half of @rq nr_running tasks and
-+ * SCHED_RQ_NR_MIGRATION to @dest_cpu
-+ */
-+static inline int
-+migrate_pending_tasks(struct rq *rq, struct rq *dest_rq, const int dest_cpu)
-+{
-+	struct task_struct *p, *skip = rq->curr;
-+	int nr_migrated = 0;
-+	int nr_tries = min(rq->nr_running / 2, SCHED_RQ_NR_MIGRATION);
-+
-+	while (skip != rq->idle && nr_tries &&
-+	       (p = sched_rq_next_task(skip, rq)) != rq->idle) {
-+		skip = sched_rq_next_task(p, rq);
-+		if (cpumask_test_cpu(dest_cpu, p->cpus_ptr)) {
-+			__SCHED_DEQUEUE_TASK(p, rq, 0, );
-+			set_task_cpu(p, dest_cpu);
-+			__SCHED_ENQUEUE_TASK(p, dest_rq, 0);
-+			nr_migrated++;
-+		}
-+		nr_tries--;
-+	}
-+
-+	return nr_migrated;
-+}
-+
-+static inline int take_other_rq_tasks(struct rq *rq, int cpu)
-+{
-+	struct cpumask *affinity_mask, *end_mask;
-+
-+	if (unlikely(!rq->online))
-+		return 0;
-+
-+	if (cpumask_empty(&sched_rq_pending_mask))
-+		return 0;
-+
-+	affinity_mask = &(per_cpu(sched_cpu_affinity_masks, cpu)[0]);
-+	end_mask = per_cpu(sched_cpu_affinity_end_mask, cpu);
-+	do {
-+		int i;
-+		for_each_cpu_and(i, &sched_rq_pending_mask, affinity_mask) {
-+			int nr_migrated;
-+			struct rq *src_rq;
-+
-+			src_rq = cpu_rq(i);
-+			if (!do_raw_spin_trylock(&src_rq->lock))
-+				continue;
-+			spin_acquire(&src_rq->lock.dep_map,
-+				     SINGLE_DEPTH_NESTING, 1, _RET_IP_);
-+
-+			if ((nr_migrated = migrate_pending_tasks(src_rq, rq, cpu))) {
-+				src_rq->nr_running -= nr_migrated;
-+#ifdef CONFIG_SMP
-+				if (src_rq->nr_running < 2)
-+					cpumask_clear_cpu(i, &sched_rq_pending_mask);
-+#endif
-+				rq->nr_running += nr_migrated;
-+#ifdef CONFIG_SMP
-+				if (rq->nr_running > 1)
-+					cpumask_set_cpu(cpu, &sched_rq_pending_mask);
-+#endif
-+				update_sched_rq_watermark(rq);
-+				cpufreq_update_util(rq, 0);
-+
-+				spin_release(&src_rq->lock.dep_map, _RET_IP_);
-+				do_raw_spin_unlock(&src_rq->lock);
-+
-+				return 1;
-+			}
-+
-+			spin_release(&src_rq->lock.dep_map, _RET_IP_);
-+			do_raw_spin_unlock(&src_rq->lock);
-+		}
-+	} while (++affinity_mask < end_mask);
-+
-+	return 0;
-+}
-+#endif
-+
-+/*
-+ * Timeslices below RESCHED_NS are considered as good as expired as there's no
-+ * point rescheduling when there's so little time left.
-+ */
-+static inline void check_curr(struct task_struct *p, struct rq *rq)
-+{
-+	if (unlikely(rq->idle == p))
-+		return;
-+
-+	update_curr(rq, p);
-+
-+	if (p->time_slice < RESCHED_NS)
-+		time_slice_expired(p, rq);
-+}
-+
-+static inline struct task_struct *
-+choose_next_task(struct rq *rq, int cpu, struct task_struct *prev)
-+{
-+	struct task_struct *next;
-+
-+	if (unlikely(rq->skip)) {
-+		next = rq_runnable_task(rq);
-+		if (next == rq->idle) {
-+#ifdef	CONFIG_SMP
-+			if (!take_other_rq_tasks(rq, cpu)) {
-+#endif
-+				rq->skip = NULL;
-+				schedstat_inc(rq->sched_goidle);
-+				return next;
-+#ifdef	CONFIG_SMP
-+			}
-+			next = rq_runnable_task(rq);
-+#endif
-+		}
-+		rq->skip = NULL;
-+#ifdef CONFIG_HIGH_RES_TIMERS
-+		hrtick_start(rq, next->time_slice);
-+#endif
-+		return next;
-+	}
-+
-+	next = sched_rq_first_task(rq);
-+	if (next == rq->idle) {
-+#ifdef	CONFIG_SMP
-+		if (!take_other_rq_tasks(rq, cpu)) {
-+#endif
-+			schedstat_inc(rq->sched_goidle);
-+			/*printk(KERN_INFO "sched: choose_next_task(%d) idle %px\n", cpu, next);*/
-+			return next;
-+#ifdef	CONFIG_SMP
-+		}
-+		next = sched_rq_first_task(rq);
-+#endif
-+	}
-+#ifdef CONFIG_HIGH_RES_TIMERS
-+	hrtick_start(rq, next->time_slice);
-+#endif
-+	/*printk(KERN_INFO "sched: choose_next_task(%d) next %px\n", cpu,
-+	 * next);*/
-+	return next;
-+}
-+
-+/*
-+ * schedule() is the main scheduler function.
-+ *
-+ * The main means of driving the scheduler and thus entering this function are:
-+ *
-+ *   1. Explicit blocking: mutex, semaphore, waitqueue, etc.
-+ *
-+ *   2. TIF_NEED_RESCHED flag is checked on interrupt and userspace return
-+ *      paths. For example, see arch/x86/entry_64.S.
-+ *
-+ *      To drive preemption between tasks, the scheduler sets the flag in timer
-+ *      interrupt handler scheduler_tick().
-+ *
-+ *   3. Wakeups don't really cause entry into schedule(). They add a
-+ *      task to the run-queue and that's it.
-+ *
-+ *      Now, if the new task added to the run-queue preempts the current
-+ *      task, then the wakeup sets TIF_NEED_RESCHED and schedule() gets
-+ *      called on the nearest possible occasion:
-+ *
-+ *       - If the kernel is preemptible (CONFIG_PREEMPTION=y):
-+ *
-+ *         - in syscall or exception context, at the next outmost
-+ *           preempt_enable(). (this might be as soon as the wake_up()'s
-+ *           spin_unlock()!)
-+ *
-+ *         - in IRQ context, return from interrupt-handler to
-+ *           preemptible context
-+ *
-+ *       - If the kernel is not preemptible (CONFIG_PREEMPTION is not set)
-+ *         then at the next:
-+ *
-+ *          - cond_resched() call
-+ *          - explicit schedule() call
-+ *          - return from syscall or exception to user-space
-+ *          - return from interrupt-handler to user-space
-+ *
-+ * WARNING: must be called with preemption disabled!
-+ */
-+static void __sched notrace __schedule(bool preempt)
-+{
-+	struct task_struct *prev, *next;
-+	unsigned long *switch_count;
-+	unsigned long prev_state;
-+	struct rq *rq;
-+	int cpu;
-+
-+	cpu = smp_processor_id();
-+	rq = cpu_rq(cpu);
-+	prev = rq->curr;
-+
-+	schedule_debug(prev, preempt);
-+
-+	/* by passing sched_feat(HRTICK) checking which Alt schedule FW doesn't support */
-+	hrtick_clear(rq);
-+
-+	local_irq_disable();
-+	rcu_note_context_switch(preempt);
-+
-+	/*
-+	 * Make sure that signal_pending_state()->signal_pending() below
-+	 * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
-+	 * done by the caller to avoid the race with signal_wake_up():
-+	 *
-+	 * __set_current_state(@state)		signal_wake_up()
-+	 * schedule()				  set_tsk_thread_flag(p, TIF_SIGPENDING)
-+	 *					  wake_up_state(p, state)
-+	 *   LOCK rq->lock			    LOCK p->pi_state
-+	 *   smp_mb__after_spinlock()		    smp_mb__after_spinlock()
-+	 *     if (signal_pending_state())	    if (p->state & @state)
-+	 *
-+	 * Also, the membarrier system call requires a full memory barrier
-+	 * after coming from user-space, before storing to rq->curr.
-+	 */
-+	raw_spin_lock(&rq->lock);
-+	smp_mb__after_spinlock();
-+
-+	update_rq_clock(rq);
-+
-+	switch_count = &prev->nivcsw;
-+	/*
-+	 * We must load prev->state once (task_struct::state is volatile), such
-+	 * that:
-+	 *
-+	 *  - we form a control dependency vs deactivate_task() below.
-+	 *  - ptrace_{,un}freeze_traced() can change ->state underneath us.
-+	 */
-+	prev_state = prev->state;
-+	if (!preempt && prev_state && prev_state == prev->state) {
-+		if (signal_pending_state(prev_state, prev)) {
-+			prev->state = TASK_RUNNING;
-+		} else {
-+			prev->sched_contributes_to_load =
-+				(prev_state & TASK_UNINTERRUPTIBLE) &&
-+				!(prev_state & TASK_NOLOAD) &&
-+				!(prev->flags & PF_FROZEN);
-+
-+			if (prev->sched_contributes_to_load)
-+				rq->nr_uninterruptible++;
-+
-+			/*
-+			 * __schedule()			ttwu()
-+			 *   prev_state = prev->state;    if (p->on_rq && ...)
-+			 *   if (prev_state)		    goto out;
-+			 *     p->on_rq = 0;		  smp_acquire__after_ctrl_dep();
-+			 *				  p->state = TASK_WAKING
-+			 *
-+			 * Where __schedule() and ttwu() have matching control dependencies.
-+			 *
-+			 * After this, schedule() must not care about p->state any more.
-+			 */
-+			sched_task_deactivate(prev, rq);
-+			deactivate_task(prev, rq);
-+
-+			if (prev->in_iowait) {
-+				atomic_inc(&rq->nr_iowait);
-+				delayacct_blkio_start();
-+			}
-+		}
-+		switch_count = &prev->nvcsw;
-+	}
-+
-+	check_curr(prev, rq);
-+
-+	next = choose_next_task(rq, cpu, prev);
-+	clear_tsk_need_resched(prev);
-+	clear_preempt_need_resched();
-+
-+
-+	if (likely(prev != next)) {
-+		next->last_ran = rq->clock_task;
-+		rq->last_ts_switch = rq->clock;
-+
-+		rq->nr_switches++;
-+		/*
-+		 * RCU users of rcu_dereference(rq->curr) may not see
-+		 * changes to task_struct made by pick_next_task().
-+		 */
-+		RCU_INIT_POINTER(rq->curr, next);
-+		/*
-+		 * The membarrier system call requires each architecture
-+		 * to have a full memory barrier after updating
-+		 * rq->curr, before returning to user-space.
-+		 *
-+		 * Here are the schemes providing that barrier on the
-+		 * various architectures:
-+		 * - mm ? switch_mm() : mmdrop() for x86, s390, sparc, PowerPC.
-+		 *   switch_mm() rely on membarrier_arch_switch_mm() on PowerPC.
-+		 * - finish_lock_switch() for weakly-ordered
-+		 *   architectures where spin_unlock is a full barrier,
-+		 * - switch_to() for arm64 (weakly-ordered, spin_unlock
-+		 *   is a RELEASE barrier),
-+		 */
-+		++*switch_count;
-+
-+		psi_sched_switch(prev, next, !task_on_rq_queued(prev));
-+
-+		trace_sched_switch(preempt, prev, next);
-+
-+		/* Also unlocks the rq: */
-+		rq = context_switch(rq, prev, next);
-+	} else
-+		raw_spin_unlock_irq(&rq->lock);
-+
-+#ifdef CONFIG_SCHED_SMT
-+	sg_balance_check(rq);
-+#endif
-+}
-+
-+void __noreturn do_task_dead(void)
-+{
-+	/* Causes final put_task_struct in finish_task_switch(): */
-+	set_special_state(TASK_DEAD);
-+
-+	/* Tell freezer to ignore us: */
-+	current->flags |= PF_NOFREEZE;
-+
-+	__schedule(false);
-+	BUG();
-+
-+	/* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */
-+	for (;;)
-+		cpu_relax();
-+}
-+
-+static inline void sched_submit_work(struct task_struct *tsk)
-+{
-+	if (!tsk->state)
-+		return;
-+
-+	/*
-+	 * If a worker went to sleep, notify and ask workqueue whether
-+	 * it wants to wake up a task to maintain concurrency.
-+	 * As this function is called inside the schedule() context,
-+	 * we disable preemption to avoid it calling schedule() again
-+	 * in the possible wakeup of a kworker and because wq_worker_sleeping()
-+	 * requires it.
-+	 */
-+	if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) {
-+		preempt_disable();
-+		if (tsk->flags & PF_WQ_WORKER)
-+			wq_worker_sleeping(tsk);
-+		else
-+			io_wq_worker_sleeping(tsk);
-+		preempt_enable_no_resched();
-+	}
-+
-+	if (tsk_is_pi_blocked(tsk))
-+		return;
-+
-+	/*
-+	 * If we are going to sleep and we have plugged IO queued,
-+	 * make sure to submit it to avoid deadlocks.
-+	 */
-+	if (blk_needs_flush_plug(tsk))
-+		blk_schedule_flush_plug(tsk);
-+}
-+
-+static void sched_update_worker(struct task_struct *tsk)
-+{
-+	if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) {
-+		if (tsk->flags & PF_WQ_WORKER)
-+			wq_worker_running(tsk);
-+		else
-+			io_wq_worker_running(tsk);
-+	}
-+}
-+
-+asmlinkage __visible void __sched schedule(void)
-+{
-+	struct task_struct *tsk = current;
-+
-+	sched_submit_work(tsk);
-+	do {
-+		preempt_disable();
-+		__schedule(false);
-+		sched_preempt_enable_no_resched();
-+	} while (need_resched());
-+	sched_update_worker(tsk);
-+}
-+EXPORT_SYMBOL(schedule);
-+
-+/*
-+ * synchronize_rcu_tasks() makes sure that no task is stuck in preempted
-+ * state (have scheduled out non-voluntarily) by making sure that all
-+ * tasks have either left the run queue or have gone into user space.
-+ * As idle tasks do not do either, they must not ever be preempted
-+ * (schedule out non-voluntarily).
-+ *
-+ * schedule_idle() is similar to schedule_preempt_disable() except that it
-+ * never enables preemption because it does not call sched_submit_work().
-+ */
-+void __sched schedule_idle(void)
-+{
-+	/*
-+	 * As this skips calling sched_submit_work(), which the idle task does
-+	 * regardless because that function is a nop when the task is in a
-+	 * TASK_RUNNING state, make sure this isn't used someplace that the
-+	 * current task can be in any other state. Note, idle is always in the
-+	 * TASK_RUNNING state.
-+	 */
-+	WARN_ON_ONCE(current->state);
-+	do {
-+		__schedule(false);
-+	} while (need_resched());
-+}
-+
-+#ifdef CONFIG_CONTEXT_TRACKING
-+asmlinkage __visible void __sched schedule_user(void)
-+{
-+	/*
-+	 * If we come here after a random call to set_need_resched(),
-+	 * or we have been woken up remotely but the IPI has not yet arrived,
-+	 * we haven't yet exited the RCU idle mode. Do it here manually until
-+	 * we find a better solution.
-+	 *
-+	 * NB: There are buggy callers of this function.  Ideally we
-+	 * should warn if prev_state != CONTEXT_USER, but that will trigger
-+	 * too frequently to make sense yet.
-+	 */
-+	enum ctx_state prev_state = exception_enter();
-+	schedule();
-+	exception_exit(prev_state);
-+}
-+#endif
-+
-+/**
-+ * schedule_preempt_disabled - called with preemption disabled
-+ *
-+ * Returns with preemption disabled. Note: preempt_count must be 1
-+ */
-+void __sched schedule_preempt_disabled(void)
-+{
-+	sched_preempt_enable_no_resched();
-+	schedule();
-+	preempt_disable();
-+}
-+
-+static void __sched notrace preempt_schedule_common(void)
-+{
-+	do {
-+		/*
-+		 * Because the function tracer can trace preempt_count_sub()
-+		 * and it also uses preempt_enable/disable_notrace(), if
-+		 * NEED_RESCHED is set, the preempt_enable_notrace() called
-+		 * by the function tracer will call this function again and
-+		 * cause infinite recursion.
-+		 *
-+		 * Preemption must be disabled here before the function
-+		 * tracer can trace. Break up preempt_disable() into two
-+		 * calls. One to disable preemption without fear of being
-+		 * traced. The other to still record the preemption latency,
-+		 * which can also be traced by the function tracer.
-+		 */
-+		preempt_disable_notrace();
-+		preempt_latency_start(1);
-+		__schedule(true);
-+		preempt_latency_stop(1);
-+		preempt_enable_no_resched_notrace();
-+
-+		/*
-+		 * Check again in case we missed a preemption opportunity
-+		 * between schedule and now.
-+		 */
-+	} while (need_resched());
-+}
-+
-+#ifdef CONFIG_PREEMPTION
-+/*
-+ * This is the entry point to schedule() from in-kernel preemption
-+ * off of preempt_enable.
-+ */
-+asmlinkage __visible void __sched notrace preempt_schedule(void)
-+{
-+	/*
-+	 * If there is a non-zero preempt_count or interrupts are disabled,
-+	 * we do not want to preempt the current task. Just return..
-+	 */
-+	if (likely(!preemptible()))
-+		return;
-+
-+	preempt_schedule_common();
-+}
-+NOKPROBE_SYMBOL(preempt_schedule);
-+EXPORT_SYMBOL(preempt_schedule);
-+
-+/**
-+ * preempt_schedule_notrace - preempt_schedule called by tracing
-+ *
-+ * The tracing infrastructure uses preempt_enable_notrace to prevent
-+ * recursion and tracing preempt enabling caused by the tracing
-+ * infrastructure itself. But as tracing can happen in areas coming
-+ * from userspace or just about to enter userspace, a preempt enable
-+ * can occur before user_exit() is called. This will cause the scheduler
-+ * to be called when the system is still in usermode.
-+ *
-+ * To prevent this, the preempt_enable_notrace will use this function
-+ * instead of preempt_schedule() to exit user context if needed before
-+ * calling the scheduler.
-+ */
-+asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
-+{
-+	enum ctx_state prev_ctx;
-+
-+	if (likely(!preemptible()))
-+		return;
-+
-+	do {
-+		/*
-+		 * Because the function tracer can trace preempt_count_sub()
-+		 * and it also uses preempt_enable/disable_notrace(), if
-+		 * NEED_RESCHED is set, the preempt_enable_notrace() called
-+		 * by the function tracer will call this function again and
-+		 * cause infinite recursion.
-+		 *
-+		 * Preemption must be disabled here before the function
-+		 * tracer can trace. Break up preempt_disable() into two
-+		 * calls. One to disable preemption without fear of being
-+		 * traced. The other to still record the preemption latency,
-+		 * which can also be traced by the function tracer.
-+		 */
-+		preempt_disable_notrace();
-+		preempt_latency_start(1);
-+		/*
-+		 * Needs preempt disabled in case user_exit() is traced
-+		 * and the tracer calls preempt_enable_notrace() causing
-+		 * an infinite recursion.
-+		 */
-+		prev_ctx = exception_enter();
-+		__schedule(true);
-+		exception_exit(prev_ctx);
-+
-+		preempt_latency_stop(1);
-+		preempt_enable_no_resched_notrace();
-+	} while (need_resched());
-+}
-+EXPORT_SYMBOL_GPL(preempt_schedule_notrace);
-+
-+#endif /* CONFIG_PREEMPTION */
-+
-+/*
-+ * This is the entry point to schedule() from kernel preemption
-+ * off of irq context.
-+ * Note, that this is called and return with irqs disabled. This will
-+ * protect us against recursive calling from irq.
-+ */
-+asmlinkage __visible void __sched preempt_schedule_irq(void)
-+{
-+	enum ctx_state prev_state;
-+
-+	/* Catch callers which need to be fixed */
-+	BUG_ON(preempt_count() || !irqs_disabled());
-+
-+	prev_state = exception_enter();
-+
-+	do {
-+		preempt_disable();
-+		local_irq_enable();
-+		__schedule(true);
-+		local_irq_disable();
-+		sched_preempt_enable_no_resched();
-+	} while (need_resched());
-+
-+	exception_exit(prev_state);
-+}
-+
-+int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags,
-+			  void *key)
-+{
-+	WARN_ON_ONCE(IS_ENABLED(CONFIG_SCHED_DEBUG) && wake_flags & ~WF_SYNC);
-+	return try_to_wake_up(curr->private, mode, wake_flags);
-+}
-+EXPORT_SYMBOL(default_wake_function);
-+
-+static inline void check_task_changed(struct rq *rq, struct task_struct *p)
-+{
-+	/* Trigger resched if task sched_prio has been modified. */
-+	if (task_on_rq_queued(p) && sched_task_need_requeue(p, rq)) {
-+		requeue_task(p, rq);
-+		check_preempt_curr(rq);
-+	}
-+}
-+
-+#ifdef CONFIG_RT_MUTEXES
-+
-+static inline int __rt_effective_prio(struct task_struct *pi_task, int prio)
-+{
-+	if (pi_task)
-+		prio = min(prio, pi_task->prio);
-+
-+	return prio;
-+}
-+
-+static inline int rt_effective_prio(struct task_struct *p, int prio)
-+{
-+	struct task_struct *pi_task = rt_mutex_get_top_task(p);
-+
-+	return __rt_effective_prio(pi_task, prio);
-+}
-+
-+/*
-+ * rt_mutex_setprio - set the current priority of a task
-+ * @p: task to boost
-+ * @pi_task: donor task
-+ *
-+ * This function changes the 'effective' priority of a task. It does
-+ * not touch ->normal_prio like __setscheduler().
-+ *
-+ * Used by the rt_mutex code to implement priority inheritance
-+ * logic. Call site only calls if the priority of the task changed.
-+ */
-+void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
-+{
-+	int prio;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+
-+	/* XXX used to be waiter->prio, not waiter->task->prio */
-+	prio = __rt_effective_prio(pi_task, p->normal_prio);
-+
-+	/*
-+	 * If nothing changed; bail early.
-+	 */
-+	if (p->pi_top_task == pi_task && prio == p->prio)
-+		return;
-+
-+	rq = __task_access_lock(p, &lock);
-+	/*
-+	 * Set under pi_lock && rq->lock, such that the value can be used under
-+	 * either lock.
-+	 *
-+	 * Note that there is loads of tricky to make this pointer cache work
-+	 * right. rt_mutex_slowunlock()+rt_mutex_postunlock() work together to
-+	 * ensure a task is de-boosted (pi_task is set to NULL) before the
-+	 * task is allowed to run again (and can exit). This ensures the pointer
-+	 * points to a blocked task -- which guaratees the task is present.
-+	 */
-+	p->pi_top_task = pi_task;
-+
-+	/*
-+	 * For FIFO/RR we only need to set prio, if that matches we're done.
-+	 */
-+	if (prio == p->prio)
-+		goto out_unlock;
-+
-+	/*
-+	 * Idle task boosting is a nono in general. There is one
-+	 * exception, when PREEMPT_RT and NOHZ is active:
-+	 *
-+	 * The idle task calls get_next_timer_interrupt() and holds
-+	 * the timer wheel base->lock on the CPU and another CPU wants
-+	 * to access the timer (probably to cancel it). We can safely
-+	 * ignore the boosting request, as the idle CPU runs this code
-+	 * with interrupts disabled and will complete the lock
-+	 * protected section without being interrupted. So there is no
-+	 * real need to boost.
-+	 */
-+	if (unlikely(p == rq->idle)) {
-+		WARN_ON(p != rq->curr);
-+		WARN_ON(p->pi_blocked_on);
-+		goto out_unlock;
-+	}
-+
-+	trace_sched_pi_setprio(p, pi_task);
-+	p->prio = prio;
-+	update_task_priodl(p);
-+
-+	check_task_changed(rq, p);
-+out_unlock:
-+	__task_access_unlock(p, lock);
-+}
-+#else
-+static inline int rt_effective_prio(struct task_struct *p, int prio)
-+{
-+	return prio;
-+}
-+#endif
-+
-+void set_user_nice(struct task_struct *p, long nice)
-+{
-+	unsigned long flags;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+
-+	if (task_nice(p) == nice || nice < MIN_NICE || nice > MAX_NICE)
-+		return;
-+	/*
-+	 * We have to be careful, if called from sys_setpriority(),
-+	 * the task might be in the middle of scheduling on another CPU.
-+	 */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	rq = __task_access_lock(p, &lock);
-+
-+	p->static_prio = NICE_TO_PRIO(nice);
-+	/*
-+	 * The RT priorities are set via sched_setscheduler(), but we still
-+	 * allow the 'normal' nice value to be set - but as expected
-+	 * it wont have any effect on scheduling until the task is
-+	 * not SCHED_NORMAL/SCHED_BATCH:
-+	 */
-+	if (task_has_rt_policy(p))
-+		goto out_unlock;
-+
-+	p->prio = effective_prio(p);
-+	update_task_priodl(p);
-+
-+	check_task_changed(rq, p);
-+out_unlock:
-+	__task_access_unlock(p, lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+}
-+EXPORT_SYMBOL(set_user_nice);
-+
-+/*
-+ * can_nice - check if a task can reduce its nice value
-+ * @p: task
-+ * @nice: nice value
-+ */
-+int can_nice(const struct task_struct *p, const int nice)
-+{
-+	/* Convert nice value [19,-20] to rlimit style value [1,40] */
-+	int nice_rlim = nice_to_rlimit(nice);
-+
-+	return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) ||
-+		capable(CAP_SYS_NICE));
-+}
-+
-+#ifdef __ARCH_WANT_SYS_NICE
-+
-+/*
-+ * sys_nice - change the priority of the current process.
-+ * @increment: priority increment
-+ *
-+ * sys_setpriority is a more generic, but much slower function that
-+ * does similar things.
-+ */
-+SYSCALL_DEFINE1(nice, int, increment)
-+{
-+	long nice, retval;
-+
-+	/*
-+	 * Setpriority might change our priority at the same moment.
-+	 * We don't have to worry. Conceptually one call occurs first
-+	 * and we have a single winner.
-+	 */
-+
-+	increment = clamp(increment, -NICE_WIDTH, NICE_WIDTH);
-+	nice = task_nice(current) + increment;
-+
-+	nice = clamp_val(nice, MIN_NICE, MAX_NICE);
-+	if (increment < 0 && !can_nice(current, nice))
-+		return -EPERM;
-+
-+	retval = security_task_setnice(current, nice);
-+	if (retval)
-+		return retval;
-+
-+	set_user_nice(current, nice);
-+	return 0;
-+}
-+
-+#endif
-+
-+/**
-+ * idle_cpu - is a given CPU idle currently?
-+ * @cpu: the processor in question.
-+ *
-+ * Return: 1 if the CPU is currently idle. 0 otherwise.
-+ */
-+int idle_cpu(int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	if (rq->curr != rq->idle)
-+		return 0;
-+
-+	if (rq->nr_running)
-+		return 0;
-+
-+#ifdef CONFIG_SMP
-+	if (rq->ttwu_pending)
-+		return 0;
-+#endif
-+
-+	return 1;
-+}
-+
-+/**
-+ * idle_task - return the idle task for a given CPU.
-+ * @cpu: the processor in question.
-+ *
-+ * Return: The idle task for the cpu @cpu.
-+ */
-+struct task_struct *idle_task(int cpu)
-+{
-+	return cpu_rq(cpu)->idle;
-+}
-+
-+/**
-+ * find_process_by_pid - find a process with a matching PID value.
-+ * @pid: the pid in question.
-+ *
-+ * The task of @pid, if found. %NULL otherwise.
-+ */
-+static inline struct task_struct *find_process_by_pid(pid_t pid)
-+{
-+	return pid ? find_task_by_vpid(pid) : current;
-+}
-+
-+/*
-+ * sched_setparam() passes in -1 for its policy, to let the functions
-+ * it calls know not to change it.
-+ */
-+#define SETPARAM_POLICY -1
-+
-+static void __setscheduler_params(struct task_struct *p,
-+		const struct sched_attr *attr)
-+{
-+	int policy = attr->sched_policy;
-+
-+	if (policy == SETPARAM_POLICY)
-+		policy = p->policy;
-+
-+	p->policy = policy;
-+
-+	/*
-+	 * allow normal nice value to be set, but will not have any
-+	 * effect on scheduling until the task not SCHED_NORMAL/
-+	 * SCHED_BATCH
-+	 */
-+	p->static_prio = NICE_TO_PRIO(attr->sched_nice);
-+
-+	/*
-+	 * __sched_setscheduler() ensures attr->sched_priority == 0 when
-+	 * !rt_policy. Always setting this ensures that things like
-+	 * getparam()/getattr() don't report silly values for !rt tasks.
-+	 */
-+	p->rt_priority = attr->sched_priority;
-+	p->normal_prio = normal_prio(p);
-+}
-+
-+/* Actually do priority change: must hold rq lock. */
-+static void __setscheduler(struct rq *rq, struct task_struct *p,
-+			   const struct sched_attr *attr, bool keep_boost)
-+{
-+	__setscheduler_params(p, attr);
-+
-+	/*
-+	 * Keep a potential priority boosting if called from
-+	 * sched_setscheduler().
-+	 */
-+	p->prio = normal_prio(p);
-+	if (keep_boost)
-+		p->prio = rt_effective_prio(p, p->prio);
-+	update_task_priodl(p);
-+}
-+
-+/*
-+ * check the target process has a UID that matches the current process's
-+ */
-+static bool check_same_owner(struct task_struct *p)
-+{
-+	const struct cred *cred = current_cred(), *pcred;
-+	bool match;
-+
-+	rcu_read_lock();
-+	pcred = __task_cred(p);
-+	match = (uid_eq(cred->euid, pcred->euid) ||
-+		 uid_eq(cred->euid, pcred->uid));
-+	rcu_read_unlock();
-+	return match;
-+}
-+
-+static int __sched_setscheduler(struct task_struct *p,
-+				const struct sched_attr *attr,
-+				bool user, bool pi)
-+{
-+	const struct sched_attr dl_squash_attr = {
-+		.size		= sizeof(struct sched_attr),
-+		.sched_policy	= SCHED_FIFO,
-+		.sched_nice	= 0,
-+		.sched_priority = 99,
-+	};
-+	int newprio = MAX_RT_PRIO - 1 - attr->sched_priority;
-+	int retval, oldpolicy = -1;
-+	int policy = attr->sched_policy;
-+	unsigned long flags;
-+	struct rq *rq;
-+	int reset_on_fork;
-+	raw_spinlock_t *lock;
-+
-+	/* The pi code expects interrupts enabled */
-+	BUG_ON(pi && in_interrupt());
-+
-+	/*
-+	 * Alt schedule FW supports SCHED_DEADLINE by squash it as prio 0 SCHED_FIFO
-+	 */
-+	if (unlikely(SCHED_DEADLINE == policy)) {
-+		attr = &dl_squash_attr;
-+		policy = attr->sched_policy;
-+		newprio = MAX_RT_PRIO - 1 - attr->sched_priority;
-+	}
-+recheck:
-+	/* Double check policy once rq lock held */
-+	if (policy < 0) {
-+		reset_on_fork = p->sched_reset_on_fork;
-+		policy = oldpolicy = p->policy;
-+	} else {
-+		reset_on_fork = !!(attr->sched_flags & SCHED_RESET_ON_FORK);
-+
-+		if (policy > SCHED_IDLE)
-+			return -EINVAL;
-+	}
-+
-+	if (attr->sched_flags & ~(SCHED_FLAG_ALL))
-+		return -EINVAL;
-+
-+	/*
-+	 * Valid priorities for SCHED_FIFO and SCHED_RR are
-+	 * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL and
-+	 * SCHED_BATCH and SCHED_IDLE is 0.
-+	 */
-+	if (attr->sched_priority < 0 ||
-+	    (p->mm && attr->sched_priority > MAX_USER_RT_PRIO - 1) ||
-+	    (!p->mm && attr->sched_priority > MAX_RT_PRIO - 1))
-+		return -EINVAL;
-+	if ((SCHED_RR == policy || SCHED_FIFO == policy) !=
-+	    (attr->sched_priority != 0))
-+		return -EINVAL;
-+
-+	/*
-+	 * Allow unprivileged RT tasks to decrease priority:
-+	 */
-+	if (user && !capable(CAP_SYS_NICE)) {
-+		if (SCHED_FIFO == policy || SCHED_RR == policy) {
-+			unsigned long rlim_rtprio =
-+					task_rlimit(p, RLIMIT_RTPRIO);
-+
-+			/* Can't set/change the rt policy */
-+			if (policy != p->policy && !rlim_rtprio)
-+				return -EPERM;
-+
-+			/* Can't increase priority */
-+			if (attr->sched_priority > p->rt_priority &&
-+			    attr->sched_priority > rlim_rtprio)
-+				return -EPERM;
-+		}
-+
-+		/* Can't change other user's priorities */
-+		if (!check_same_owner(p))
-+			return -EPERM;
-+
-+		/* Normal users shall not reset the sched_reset_on_fork flag */
-+		if (p->sched_reset_on_fork && !reset_on_fork)
-+			return -EPERM;
-+	}
-+
-+	if (user) {
-+		retval = security_task_setscheduler(p);
-+		if (retval)
-+			return retval;
-+	}
-+
-+	if (pi)
-+		cpuset_read_lock();
-+
-+	/*
-+	 * Make sure no PI-waiters arrive (or leave) while we are
-+	 * changing the priority of the task:
-+	 */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+
-+	/*
-+	 * To be able to change p->policy safely, task_access_lock()
-+	 * must be called.
-+	 * IF use task_access_lock() here:
-+	 * For the task p which is not running, reading rq->stop is
-+	 * racy but acceptable as ->stop doesn't change much.
-+	 * An enhancemnet can be made to read rq->stop saftly.
-+	 */
-+	rq = __task_access_lock(p, &lock);
-+
-+	/*
-+	 * Changing the policy of the stop threads its a very bad idea
-+	 */
-+	if (p == rq->stop) {
-+		retval = -EINVAL;
-+		goto unlock;
-+	}
-+
-+	/*
-+	 * If not changing anything there's no need to proceed further:
-+	 */
-+	if (unlikely(policy == p->policy)) {
-+		if (rt_policy(policy) && attr->sched_priority != p->rt_priority)
-+			goto change;
-+		if (!rt_policy(policy) &&
-+		    NICE_TO_PRIO(attr->sched_nice) != p->static_prio)
-+			goto change;
-+
-+		p->sched_reset_on_fork = reset_on_fork;
-+		retval = 0;
-+		goto unlock;
-+	}
-+change:
-+
-+	/* Re-check policy now with rq lock held */
-+	if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
-+		policy = oldpolicy = -1;
-+		__task_access_unlock(p, lock);
-+		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+		if (pi)
-+			cpuset_read_unlock();
-+		goto recheck;
-+	}
-+
-+	p->sched_reset_on_fork = reset_on_fork;
-+
-+	if (pi) {
-+		/*
-+		 * Take priority boosted tasks into account. If the new
-+		 * effective priority is unchanged, we just store the new
-+		 * normal parameters and do not touch the scheduler class and
-+		 * the runqueue. This will be done when the task deboost
-+		 * itself.
-+		 */
-+		if (rt_effective_prio(p, newprio) == p->prio) {
-+			__setscheduler_params(p, attr);
-+			retval = 0;
-+			goto unlock;
-+		}
-+	}
-+
-+	__setscheduler(rq, p, attr, pi);
-+
-+	check_task_changed(rq, p);
-+
-+	/* Avoid rq from going away on us: */
-+	preempt_disable();
-+	__task_access_unlock(p, lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+
-+	if (pi) {
-+		cpuset_read_unlock();
-+		rt_mutex_adjust_pi(p);
-+	}
-+
-+	preempt_enable();
-+
-+	return 0;
-+
-+unlock:
-+	__task_access_unlock(p, lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+	if (pi)
-+		cpuset_read_unlock();
-+	return retval;
-+}
-+
-+static int _sched_setscheduler(struct task_struct *p, int policy,
-+			       const struct sched_param *param, bool check)
-+{
-+	struct sched_attr attr = {
-+		.sched_policy   = policy,
-+		.sched_priority = param->sched_priority,
-+		.sched_nice     = PRIO_TO_NICE(p->static_prio),
-+	};
-+
-+	/* Fixup the legacy SCHED_RESET_ON_FORK hack. */
-+	if ((policy != SETPARAM_POLICY) && (policy & SCHED_RESET_ON_FORK)) {
-+		attr.sched_flags |= SCHED_FLAG_RESET_ON_FORK;
-+		policy &= ~SCHED_RESET_ON_FORK;
-+		attr.sched_policy = policy;
-+	}
-+
-+	return __sched_setscheduler(p, &attr, check, true);
-+}
-+
-+/**
-+ * sched_setscheduler - change the scheduling policy and/or RT priority of a thread.
-+ * @p: the task in question.
-+ * @policy: new policy.
-+ * @param: structure containing the new RT priority.
-+ *
-+ * Use sched_set_fifo(), read its comment.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ *
-+ * NOTE that the task may be already dead.
-+ */
-+int sched_setscheduler(struct task_struct *p, int policy,
-+		       const struct sched_param *param)
-+{
-+	return _sched_setscheduler(p, policy, param, true);
-+}
-+
-+int sched_setattr(struct task_struct *p, const struct sched_attr *attr)
-+{
-+	return __sched_setscheduler(p, attr, true, true);
-+}
-+
-+int sched_setattr_nocheck(struct task_struct *p, const struct sched_attr *attr)
-+{
-+	return __sched_setscheduler(p, attr, false, true);
-+}
-+
-+/**
-+ * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace.
-+ * @p: the task in question.
-+ * @policy: new policy.
-+ * @param: structure containing the new RT priority.
-+ *
-+ * Just like sched_setscheduler, only don't bother checking if the
-+ * current context has permission.  For example, this is needed in
-+ * stop_machine(): we create temporary high priority worker threads,
-+ * but our caller might not have that capability.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+int sched_setscheduler_nocheck(struct task_struct *p, int policy,
-+			       const struct sched_param *param)
-+{
-+	return _sched_setscheduler(p, policy, param, false);
-+}
-+
-+/*
-+ * SCHED_FIFO is a broken scheduler model; that is, it is fundamentally
-+ * incapable of resource management, which is the one thing an OS really should
-+ * be doing.
-+ *
-+ * This is of course the reason it is limited to privileged users only.
-+ *
-+ * Worse still; it is fundamentally impossible to compose static priority
-+ * workloads. You cannot take two correctly working static prio workloads
-+ * and smash them together and still expect them to work.
-+ *
-+ * For this reason 'all' FIFO tasks the kernel creates are basically at:
-+ *
-+ *   MAX_RT_PRIO / 2
-+ *
-+ * The administrator _MUST_ configure the system, the kernel simply doesn't
-+ * know enough information to make a sensible choice.
-+ */
-+void sched_set_fifo(struct task_struct *p)
-+{
-+	struct sched_param sp = { .sched_priority = MAX_RT_PRIO / 2 };
-+	WARN_ON_ONCE(sched_setscheduler_nocheck(p, SCHED_FIFO, &sp) != 0);
-+}
-+EXPORT_SYMBOL_GPL(sched_set_fifo);
-+
-+/*
-+ * For when you don't much care about FIFO, but want to be above SCHED_NORMAL.
-+ */
-+void sched_set_fifo_low(struct task_struct *p)
-+{
-+	struct sched_param sp = { .sched_priority = 1 };
-+	WARN_ON_ONCE(sched_setscheduler_nocheck(p, SCHED_FIFO, &sp) != 0);
-+}
-+EXPORT_SYMBOL_GPL(sched_set_fifo_low);
-+
-+void sched_set_normal(struct task_struct *p, int nice)
-+{
-+	struct sched_attr attr = {
-+		.sched_policy = SCHED_NORMAL,
-+		.sched_nice = nice,
-+	};
-+	WARN_ON_ONCE(sched_setattr_nocheck(p, &attr) != 0);
-+}
-+EXPORT_SYMBOL_GPL(sched_set_normal);
-+
-+static int
-+do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
-+{
-+	struct sched_param lparam;
-+	struct task_struct *p;
-+	int retval;
-+
-+	if (!param || pid < 0)
-+		return -EINVAL;
-+	if (copy_from_user(&lparam, param, sizeof(struct sched_param)))
-+		return -EFAULT;
-+
-+	rcu_read_lock();
-+	retval = -ESRCH;
-+	p = find_process_by_pid(pid);
-+	if (likely(p))
-+		get_task_struct(p);
-+	rcu_read_unlock();
-+
-+	if (likely(p)) {
-+		retval = sched_setscheduler(p, policy, &lparam);
-+		put_task_struct(p);
-+	}
-+
-+	return retval;
-+}
-+
-+/*
-+ * Mimics kernel/events/core.c perf_copy_attr().
-+ */
-+static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *attr)
-+{
-+	u32 size;
-+	int ret;
-+
-+	/* Zero the full structure, so that a short copy will be nice: */
-+	memset(attr, 0, sizeof(*attr));
-+
-+	ret = get_user(size, &uattr->size);
-+	if (ret)
-+		return ret;
-+
-+	/* ABI compatibility quirk: */
-+	if (!size)
-+		size = SCHED_ATTR_SIZE_VER0;
-+
-+	if (size < SCHED_ATTR_SIZE_VER0 || size > PAGE_SIZE)
-+		goto err_size;
-+
-+	ret = copy_struct_from_user(attr, sizeof(*attr), uattr, size);
-+	if (ret) {
-+		if (ret == -E2BIG)
-+			goto err_size;
-+		return ret;
-+	}
-+
-+	/*
-+	 * XXX: Do we want to be lenient like existing syscalls; or do we want
-+	 * to be strict and return an error on out-of-bounds values?
-+	 */
-+	attr->sched_nice = clamp(attr->sched_nice, -20, 19);
-+
-+	/* sched/core.c uses zero here but we already know ret is zero */
-+	return 0;
-+
-+err_size:
-+	put_user(sizeof(*attr), &uattr->size);
-+	return -E2BIG;
-+}
-+
-+/**
-+ * sys_sched_setscheduler - set/change the scheduler policy and RT priority
-+ * @pid: the pid in question.
-+ * @policy: new policy.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ * @param: structure containing the new RT priority.
-+ */
-+SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy, struct sched_param __user *, param)
-+{
-+	if (policy < 0)
-+		return -EINVAL;
-+
-+	return do_sched_setscheduler(pid, policy, param);
-+}
-+
-+/**
-+ * sys_sched_setparam - set/change the RT priority of a thread
-+ * @pid: the pid in question.
-+ * @param: structure containing the new RT priority.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
-+{
-+	return do_sched_setscheduler(pid, SETPARAM_POLICY, param);
-+}
-+
-+/**
-+ * sys_sched_setattr - same as above, but with extended sched_attr
-+ * @pid: the pid in question.
-+ * @uattr: structure containing the extended parameters.
-+ */
-+SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr,
-+			       unsigned int, flags)
-+{
-+	struct sched_attr attr;
-+	struct task_struct *p;
-+	int retval;
-+
-+	if (!uattr || pid < 0 || flags)
-+		return -EINVAL;
-+
-+	retval = sched_copy_attr(uattr, &attr);
-+	if (retval)
-+		return retval;
-+
-+	if ((int)attr.sched_policy < 0)
-+		return -EINVAL;
-+
-+	rcu_read_lock();
-+	retval = -ESRCH;
-+	p = find_process_by_pid(pid);
-+	if (p != NULL)
-+		retval = sched_setattr(p, &attr);
-+	rcu_read_unlock();
-+
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_getscheduler - get the policy (scheduling class) of a thread
-+ * @pid: the pid in question.
-+ *
-+ * Return: On success, the policy of the thread. Otherwise, a negative error
-+ * code.
-+ */
-+SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
-+{
-+	struct task_struct *p;
-+	int retval = -EINVAL;
-+
-+	if (pid < 0)
-+		goto out_nounlock;
-+
-+	retval = -ESRCH;
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	if (p) {
-+		retval = security_task_getscheduler(p);
-+		if (!retval)
-+			retval = p->policy;
-+	}
-+	rcu_read_unlock();
-+
-+out_nounlock:
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_getscheduler - get the RT priority of a thread
-+ * @pid: the pid in question.
-+ * @param: structure containing the RT priority.
-+ *
-+ * Return: On success, 0 and the RT priority is in @param. Otherwise, an error
-+ * code.
-+ */
-+SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
-+{
-+	struct sched_param lp = { .sched_priority = 0 };
-+	struct task_struct *p;
-+	int retval = -EINVAL;
-+
-+	if (!param || pid < 0)
-+		goto out_nounlock;
-+
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	retval = -ESRCH;
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	if (task_has_rt_policy(p))
-+		lp.sched_priority = p->rt_priority;
-+	rcu_read_unlock();
-+
-+	/*
-+	 * This one might sleep, we cannot do it with a spinlock held ...
-+	 */
-+	retval = copy_to_user(param, &lp, sizeof(*param)) ? -EFAULT : 0;
-+
-+out_nounlock:
-+	return retval;
-+
-+out_unlock:
-+	rcu_read_unlock();
-+	return retval;
-+}
-+
-+/*
-+ * Copy the kernel size attribute structure (which might be larger
-+ * than what user-space knows about) to user-space.
-+ *
-+ * Note that all cases are valid: user-space buffer can be larger or
-+ * smaller than the kernel-space buffer. The usual case is that both
-+ * have the same size.
-+ */
-+static int
-+sched_attr_copy_to_user(struct sched_attr __user *uattr,
-+			struct sched_attr *kattr,
-+			unsigned int usize)
-+{
-+	unsigned int ksize = sizeof(*kattr);
-+
-+	if (!access_ok(uattr, usize))
-+		return -EFAULT;
-+
-+	/*
-+	 * sched_getattr() ABI forwards and backwards compatibility:
-+	 *
-+	 * If usize == ksize then we just copy everything to user-space and all is good.
-+	 *
-+	 * If usize < ksize then we only copy as much as user-space has space for,
-+	 * this keeps ABI compatibility as well. We skip the rest.
-+	 *
-+	 * If usize > ksize then user-space is using a newer version of the ABI,
-+	 * which part the kernel doesn't know about. Just ignore it - tooling can
-+	 * detect the kernel's knowledge of attributes from the attr->size value
-+	 * which is set to ksize in this case.
-+	 */
-+	kattr->size = min(usize, ksize);
-+
-+	if (copy_to_user(uattr, kattr, kattr->size))
-+		return -EFAULT;
-+
-+	return 0;
-+}
-+
-+/**
-+ * sys_sched_getattr - similar to sched_getparam, but with sched_attr
-+ * @pid: the pid in question.
-+ * @uattr: structure containing the extended parameters.
-+ * @usize: sizeof(attr) for fwd/bwd comp.
-+ * @flags: for future extension.
-+ */
-+SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
-+		unsigned int, usize, unsigned int, flags)
-+{
-+	struct sched_attr kattr = { };
-+	struct task_struct *p;
-+	int retval;
-+
-+	if (!uattr || pid < 0 || usize > PAGE_SIZE ||
-+	    usize < SCHED_ATTR_SIZE_VER0 || flags)
-+		return -EINVAL;
-+
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	retval = -ESRCH;
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	kattr.sched_policy = p->policy;
-+	if (p->sched_reset_on_fork)
-+		kattr.sched_flags |= SCHED_FLAG_RESET_ON_FORK;
-+	if (task_has_rt_policy(p))
-+		kattr.sched_priority = p->rt_priority;
-+	else
-+		kattr.sched_nice = task_nice(p);
-+
-+#ifdef CONFIG_UCLAMP_TASK
-+	kattr.sched_util_min = p->uclamp_req[UCLAMP_MIN].value;
-+	kattr.sched_util_max = p->uclamp_req[UCLAMP_MAX].value;
-+#endif
-+
-+	rcu_read_unlock();
-+
-+	return sched_attr_copy_to_user(uattr, &kattr, usize);
-+
-+out_unlock:
-+	rcu_read_unlock();
-+	return retval;
-+}
-+
-+long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
-+{
-+	cpumask_var_t cpus_allowed, new_mask;
-+	struct task_struct *p;
-+	int retval;
-+
-+	get_online_cpus();
-+	rcu_read_lock();
-+
-+	p = find_process_by_pid(pid);
-+	if (!p) {
-+		rcu_read_unlock();
-+		put_online_cpus();
-+		return -ESRCH;
-+	}
-+
-+	/* Prevent p going away */
-+	get_task_struct(p);
-+	rcu_read_unlock();
-+
-+	if (p->flags & PF_NO_SETAFFINITY) {
-+		retval = -EINVAL;
-+		goto out_put_task;
-+	}
-+	if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
-+		retval = -ENOMEM;
-+		goto out_put_task;
-+	}
-+	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) {
-+		retval = -ENOMEM;
-+		goto out_free_cpus_allowed;
-+	}
-+	retval = -EPERM;
-+	if (!check_same_owner(p)) {
-+		rcu_read_lock();
-+		if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
-+			rcu_read_unlock();
-+			goto out_unlock;
-+		}
-+		rcu_read_unlock();
-+	}
-+
-+	retval = security_task_setscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	cpuset_cpus_allowed(p, cpus_allowed);
-+	cpumask_and(new_mask, in_mask, cpus_allowed);
-+again:
-+	retval = __set_cpus_allowed_ptr(p, new_mask, true);
-+
-+	if (!retval) {
-+		cpuset_cpus_allowed(p, cpus_allowed);
-+		if (!cpumask_subset(new_mask, cpus_allowed)) {
-+			/*
-+			 * We must have raced with a concurrent cpuset
-+			 * update. Just reset the cpus_allowed to the
-+			 * cpuset's cpus_allowed
-+			 */
-+			cpumask_copy(new_mask, cpus_allowed);
-+			goto again;
-+		}
-+	}
-+out_unlock:
-+	free_cpumask_var(new_mask);
-+out_free_cpus_allowed:
-+	free_cpumask_var(cpus_allowed);
-+out_put_task:
-+	put_task_struct(p);
-+	put_online_cpus();
-+	return retval;
-+}
-+
-+static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
-+			     struct cpumask *new_mask)
-+{
-+	if (len < cpumask_size())
-+		cpumask_clear(new_mask);
-+	else if (len > cpumask_size())
-+		len = cpumask_size();
-+
-+	return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0;
-+}
-+
-+/**
-+ * sys_sched_setaffinity - set the CPU affinity of a process
-+ * @pid: pid of the process
-+ * @len: length in bytes of the bitmask pointed to by user_mask_ptr
-+ * @user_mask_ptr: user-space pointer to the new CPU mask
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
-+		unsigned long __user *, user_mask_ptr)
-+{
-+	cpumask_var_t new_mask;
-+	int retval;
-+
-+	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
-+		return -ENOMEM;
-+
-+	retval = get_user_cpu_mask(user_mask_ptr, len, new_mask);
-+	if (retval == 0)
-+		retval = sched_setaffinity(pid, new_mask);
-+	free_cpumask_var(new_mask);
-+	return retval;
-+}
-+
-+long sched_getaffinity(pid_t pid, cpumask_t *mask)
-+{
-+	struct task_struct *p;
-+	raw_spinlock_t *lock;
-+	unsigned long flags;
-+	int retval;
-+
-+	rcu_read_lock();
-+
-+	retval = -ESRCH;
-+	p = find_process_by_pid(pid);
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	task_access_lock_irqsave(p, &lock, &flags);
-+	cpumask_and(mask, &p->cpus_mask, cpu_active_mask);
-+	task_access_unlock_irqrestore(p, lock, &flags);
-+
-+out_unlock:
-+	rcu_read_unlock();
-+
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_getaffinity - get the CPU affinity of a process
-+ * @pid: pid of the process
-+ * @len: length in bytes of the bitmask pointed to by user_mask_ptr
-+ * @user_mask_ptr: user-space pointer to hold the current CPU mask
-+ *
-+ * Return: size of CPU mask copied to user_mask_ptr on success. An
-+ * error code otherwise.
-+ */
-+SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
-+		unsigned long __user *, user_mask_ptr)
-+{
-+	int ret;
-+	cpumask_var_t mask;
-+
-+	if ((len * BITS_PER_BYTE) < nr_cpu_ids)
-+		return -EINVAL;
-+	if (len & (sizeof(unsigned long)-1))
-+		return -EINVAL;
-+
-+	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
-+		return -ENOMEM;
-+
-+	ret = sched_getaffinity(pid, mask);
-+	if (ret == 0) {
-+		unsigned int retlen = min_t(size_t, len, cpumask_size());
-+
-+		if (copy_to_user(user_mask_ptr, mask, retlen))
-+			ret = -EFAULT;
-+		else
-+			ret = retlen;
-+	}
-+	free_cpumask_var(mask);
-+
-+	return ret;
-+}
-+
-+/**
-+ * sys_sched_yield - yield the current processor to other threads.
-+ *
-+ * This function yields the current CPU to other tasks. It does this by
-+ * scheduling away the current task. If it still has the earliest deadline
-+ * it will be scheduled again as the next task.
-+ *
-+ * Return: 0.
-+ */
-+static void do_sched_yield(void)
-+{
-+	struct rq *rq;
-+	struct rq_flags rf;
-+
-+	if (!sched_yield_type)
-+		return;
-+
-+	rq = this_rq_lock_irq(&rf);
-+
-+	schedstat_inc(rq->yld_count);
-+
-+	if (1 == sched_yield_type) {
-+		if (!rt_task(current))
-+			do_sched_yield_type_1(current, rq);
-+	} else if (2 == sched_yield_type) {
-+		if (rq->nr_running > 1)
-+			rq->skip = current;
-+	}
-+
-+	/*
-+	 * Since we are going to call schedule() anyway, there's
-+	 * no need to preempt or enable interrupts:
-+	 */
-+	preempt_disable();
-+	raw_spin_unlock(&rq->lock);
-+	sched_preempt_enable_no_resched();
-+
-+	schedule();
-+}
-+
-+SYSCALL_DEFINE0(sched_yield)
-+{
-+	do_sched_yield();
-+	return 0;
-+}
-+
-+#ifndef CONFIG_PREEMPTION
-+int __sched _cond_resched(void)
-+{
-+	if (should_resched(0)) {
-+		preempt_schedule_common();
-+		return 1;
-+	}
-+	rcu_all_qs();
-+	return 0;
-+}
-+EXPORT_SYMBOL(_cond_resched);
-+#endif
-+
-+/*
-+ * __cond_resched_lock() - if a reschedule is pending, drop the given lock,
-+ * call schedule, and on return reacquire the lock.
-+ *
-+ * This works OK both with and without CONFIG_PREEMPTION.  We do strange low-level
-+ * operations here to prevent schedule() from being called twice (once via
-+ * spin_unlock(), once by hand).
-+ */
-+int __cond_resched_lock(spinlock_t *lock)
-+{
-+	int resched = should_resched(PREEMPT_LOCK_OFFSET);
-+	int ret = 0;
-+
-+	lockdep_assert_held(lock);
-+
-+	if (spin_needbreak(lock) || resched) {
-+		spin_unlock(lock);
-+		if (resched)
-+			preempt_schedule_common();
-+		else
-+			cpu_relax();
-+		ret = 1;
-+		spin_lock(lock);
-+	}
-+	return ret;
-+}
-+EXPORT_SYMBOL(__cond_resched_lock);
-+
-+/**
-+ * yield - yield the current processor to other threads.
-+ *
-+ * Do not ever use this function, there's a 99% chance you're doing it wrong.
-+ *
-+ * The scheduler is at all times free to pick the calling task as the most
-+ * eligible task to run, if removing the yield() call from your code breaks
-+ * it, its already broken.
-+ *
-+ * Typical broken usage is:
-+ *
-+ * while (!event)
-+ * 	yield();
-+ *
-+ * where one assumes that yield() will let 'the other' process run that will
-+ * make event true. If the current task is a SCHED_FIFO task that will never
-+ * happen. Never use yield() as a progress guarantee!!
-+ *
-+ * If you want to use yield() to wait for something, use wait_event().
-+ * If you want to use yield() to be 'nice' for others, use cond_resched().
-+ * If you still want to use yield(), do not!
-+ */
-+void __sched yield(void)
-+{
-+	set_current_state(TASK_RUNNING);
-+	do_sched_yield();
-+}
-+EXPORT_SYMBOL(yield);
-+
-+/**
-+ * yield_to - yield the current processor to another thread in
-+ * your thread group, or accelerate that thread toward the
-+ * processor it's on.
-+ * @p: target task
-+ * @preempt: whether task preemption is allowed or not
-+ *
-+ * It's the caller's job to ensure that the target task struct
-+ * can't go away on us before we can do any checks.
-+ *
-+ * In Alt schedule FW, yield_to is not supported.
-+ *
-+ * Return:
-+ *	true (>0) if we indeed boosted the target task.
-+ *	false (0) if we failed to boost the target.
-+ *	-ESRCH if there's no task to yield to.
-+ */
-+int __sched yield_to(struct task_struct *p, bool preempt)
-+{
-+	return 0;
-+}
-+EXPORT_SYMBOL_GPL(yield_to);
-+
-+int io_schedule_prepare(void)
-+{
-+	int old_iowait = current->in_iowait;
-+
-+	current->in_iowait = 1;
-+	blk_schedule_flush_plug(current);
-+
-+	return old_iowait;
-+}
-+
-+void io_schedule_finish(int token)
-+{
-+	current->in_iowait = token;
-+}
-+
-+/*
-+ * This task is about to go to sleep on IO.  Increment rq->nr_iowait so
-+ * that process accounting knows that this is a task in IO wait state.
-+ *
-+ * But don't do that if it is a deliberate, throttling IO wait (this task
-+ * has set its backing_dev_info: the queue against which it should throttle)
-+ */
-+
-+long __sched io_schedule_timeout(long timeout)
-+{
-+	int token;
-+	long ret;
-+
-+	token = io_schedule_prepare();
-+	ret = schedule_timeout(timeout);
-+	io_schedule_finish(token);
-+
-+	return ret;
-+}
-+EXPORT_SYMBOL(io_schedule_timeout);
-+
-+void __sched io_schedule(void)
-+{
-+	int token;
-+
-+	token = io_schedule_prepare();
-+	schedule();
-+	io_schedule_finish(token);
-+}
-+EXPORT_SYMBOL(io_schedule);
-+
-+/**
-+ * sys_sched_get_priority_max - return maximum RT priority.
-+ * @policy: scheduling class.
-+ *
-+ * Return: On success, this syscall returns the maximum
-+ * rt_priority that can be used by a given scheduling class.
-+ * On failure, a negative error code is returned.
-+ */
-+SYSCALL_DEFINE1(sched_get_priority_max, int, policy)
-+{
-+	int ret = -EINVAL;
-+
-+	switch (policy) {
-+	case SCHED_FIFO:
-+	case SCHED_RR:
-+		ret = MAX_USER_RT_PRIO-1;
-+		break;
-+	case SCHED_NORMAL:
-+	case SCHED_BATCH:
-+	case SCHED_IDLE:
-+		ret = 0;
-+		break;
-+	}
-+	return ret;
-+}
-+
-+/**
-+ * sys_sched_get_priority_min - return minimum RT priority.
-+ * @policy: scheduling class.
-+ *
-+ * Return: On success, this syscall returns the minimum
-+ * rt_priority that can be used by a given scheduling class.
-+ * On failure, a negative error code is returned.
-+ */
-+SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
-+{
-+	int ret = -EINVAL;
-+
-+	switch (policy) {
-+	case SCHED_FIFO:
-+	case SCHED_RR:
-+		ret = 1;
-+		break;
-+	case SCHED_NORMAL:
-+	case SCHED_BATCH:
-+	case SCHED_IDLE:
-+		ret = 0;
-+		break;
-+	}
-+	return ret;
-+}
-+
-+static int sched_rr_get_interval(pid_t pid, struct timespec64 *t)
-+{
-+	struct task_struct *p;
-+	int retval;
-+
-+	alt_sched_debug();
-+
-+	if (pid < 0)
-+		return -EINVAL;
-+
-+	retval = -ESRCH;
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+	rcu_read_unlock();
-+
-+	*t = ns_to_timespec64(sched_timeslice_ns);
-+	return 0;
-+
-+out_unlock:
-+	rcu_read_unlock();
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_rr_get_interval - return the default timeslice of a process.
-+ * @pid: pid of the process.
-+ * @interval: userspace pointer to the timeslice value.
-+ *
-+ *
-+ * Return: On success, 0 and the timeslice is in @interval. Otherwise,
-+ * an error code.
-+ */
-+SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
-+		struct __kernel_timespec __user *, interval)
-+{
-+	struct timespec64 t;
-+	int retval = sched_rr_get_interval(pid, &t);
-+
-+	if (retval == 0)
-+		retval = put_timespec64(&t, interval);
-+
-+	return retval;
-+}
-+
-+#ifdef CONFIG_COMPAT_32BIT_TIME
-+SYSCALL_DEFINE2(sched_rr_get_interval_time32, pid_t, pid,
-+		struct old_timespec32 __user *, interval)
-+{
-+	struct timespec64 t;
-+	int retval = sched_rr_get_interval(pid, &t);
-+
-+	if (retval == 0)
-+		retval = put_old_timespec32(&t, interval);
-+	return retval;
-+}
-+#endif
-+
-+void sched_show_task(struct task_struct *p)
-+{
-+	unsigned long free = 0;
-+	int ppid;
-+
-+	if (!try_get_task_stack(p))
-+		return;
-+
-+	pr_info("task:%-15.15s state:%c", p->comm, task_state_to_char(p));
-+
-+	if (p->state == TASK_RUNNING)
-+		pr_cont("  running task    ");
-+#ifdef CONFIG_DEBUG_STACK_USAGE
-+	free = stack_not_used(p);
-+#endif
-+	ppid = 0;
-+	rcu_read_lock();
-+	if (pid_alive(p))
-+		ppid = task_pid_nr(rcu_dereference(p->real_parent));
-+	rcu_read_unlock();
-+	pr_cont(" stack:%5lu pid:%5d ppid:%6d flags:0x%08lx\n",
-+		free, task_pid_nr(p), ppid,
-+		(unsigned long)task_thread_info(p)->flags);
-+
-+	print_worker_info(KERN_INFO, p);
-+	show_stack(p, NULL, KERN_INFO);
-+	put_task_stack(p);
-+}
-+EXPORT_SYMBOL_GPL(sched_show_task);
-+
-+static inline bool
-+state_filter_match(unsigned long state_filter, struct task_struct *p)
-+{
-+	/* no filter, everything matches */
-+	if (!state_filter)
-+		return true;
-+
-+	/* filter, but doesn't match */
-+	if (!(p->state & state_filter))
-+		return false;
-+
-+	/*
-+	 * When looking for TASK_UNINTERRUPTIBLE skip TASK_IDLE (allows
-+	 * TASK_KILLABLE).
-+	 */
-+	if (state_filter == TASK_UNINTERRUPTIBLE && p->state == TASK_IDLE)
-+		return false;
-+
-+	return true;
-+}
-+
-+
-+void show_state_filter(unsigned long state_filter)
-+{
-+	struct task_struct *g, *p;
-+
-+	rcu_read_lock();
-+	for_each_process_thread(g, p) {
-+		/*
-+		 * reset the NMI-timeout, listing all files on a slow
-+		 * console might take a lot of time:
-+		 * Also, reset softlockup watchdogs on all CPUs, because
-+		 * another CPU might be blocked waiting for us to process
-+		 * an IPI.
-+		 */
-+		touch_nmi_watchdog();
-+		touch_all_softlockup_watchdogs();
-+		if (state_filter_match(state_filter, p))
-+			sched_show_task(p);
-+	}
-+
-+#ifdef CONFIG_SCHED_DEBUG
-+	/* TODO: Alt schedule FW should support this
-+	if (!state_filter)
-+		sysrq_sched_debug_show();
-+	*/
-+#endif
-+	rcu_read_unlock();
-+	/*
-+	 * Only show locks if all tasks are dumped:
-+	 */
-+	if (!state_filter)
-+		debug_show_all_locks();
-+}
-+
-+void dump_cpu_task(int cpu)
-+{
-+	pr_info("Task dump for CPU %d:\n", cpu);
-+	sched_show_task(cpu_curr(cpu));
-+}
-+
-+/**
-+ * init_idle - set up an idle thread for a given CPU
-+ * @idle: task in question
-+ * @cpu: CPU the idle task belongs to
-+ *
-+ * NOTE: this function does not set the idle thread's NEED_RESCHED
-+ * flag, to make booting more robust.
-+ */
-+void init_idle(struct task_struct *idle, int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	__sched_fork(0, idle);
-+
-+	raw_spin_lock_irqsave(&idle->pi_lock, flags);
-+	raw_spin_lock(&rq->lock);
-+	update_rq_clock(rq);
-+
-+	idle->last_ran = rq->clock_task;
-+	idle->state = TASK_RUNNING;
-+	idle->flags |= PF_IDLE;
-+	sched_queue_init_idle(rq, idle);
-+
-+	scs_task_reset(idle);
-+	kasan_unpoison_task_stack(idle);
-+
-+#ifdef CONFIG_SMP
-+	/*
-+	 * It's possible that init_idle() gets called multiple times on a task,
-+	 * in that case do_set_cpus_allowed() will not do the right thing.
-+	 *
-+	 * And since this is boot we can forgo the serialisation.
-+	 */
-+	set_cpus_allowed_common(idle, cpumask_of(cpu));
-+#endif
-+
-+	/* Silence PROVE_RCU */
-+	rcu_read_lock();
-+	__set_task_cpu(idle, cpu);
-+	rcu_read_unlock();
-+
-+	rq->idle = idle;
-+	rcu_assign_pointer(rq->curr, idle);
-+	idle->on_cpu = 1;
-+
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock_irqrestore(&idle->pi_lock, flags);
-+
-+	/* Set the preempt count _outside_ the spinlocks! */
-+	init_idle_preempt_count(idle, cpu);
-+
-+	ftrace_graph_init_idle_task(idle, cpu);
-+	vtime_init_idle(idle, cpu);
-+#ifdef CONFIG_SMP
-+	sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
-+#endif
-+}
-+
-+#ifdef CONFIG_SMP
-+
-+int cpuset_cpumask_can_shrink(const struct cpumask __maybe_unused *cur,
-+			      const struct cpumask __maybe_unused *trial)
-+{
-+	return 1;
-+}
-+
-+int task_can_attach(struct task_struct *p,
-+		    const struct cpumask *cs_cpus_allowed)
-+{
-+	int ret = 0;
-+
-+	/*
-+	 * Kthreads which disallow setaffinity shouldn't be moved
-+	 * to a new cpuset; we don't want to change their CPU
-+	 * affinity and isolating such threads by their set of
-+	 * allowed nodes is unnecessary.  Thus, cpusets are not
-+	 * applicable for such threads.  This prevents checking for
-+	 * success of set_cpus_allowed_ptr() on all attached tasks
-+	 * before cpus_mask may be changed.
-+	 */
-+	if (p->flags & PF_NO_SETAFFINITY)
-+		ret = -EINVAL;
-+
-+	return ret;
-+}
-+
-+bool sched_smp_initialized __read_mostly;
-+
-+#ifdef CONFIG_HOTPLUG_CPU
-+/*
-+ * Ensures that the idle task is using init_mm right before its CPU goes
-+ * offline.
-+ */
-+void idle_task_exit(void)
-+{
-+	struct mm_struct *mm = current->active_mm;
-+
-+	BUG_ON(current != this_rq()->idle);
-+
-+	if (mm != &init_mm) {
-+		switch_mm(mm, &init_mm, current);
-+		finish_arch_post_lock_switch();
-+	}
-+
-+	/* finish_cpu(), as ran on the BP, will clean up the active_mm state */
-+}
-+
-+/*
-+ * Migrate all tasks from the rq, sleeping tasks will be migrated by
-+ * try_to_wake_up()->select_task_rq().
-+ *
-+ * Called with rq->lock held even though we'er in stop_machine() and
-+ * there's no concurrency possible, we hold the required locks anyway
-+ * because of lock validation efforts.
-+ */
-+static void migrate_tasks(struct rq *dead_rq)
-+{
-+	struct rq *rq = dead_rq;
-+	struct task_struct *p, *stop = rq->stop;
-+	int count = 0;
-+
-+	/*
-+	 * Fudge the rq selection such that the below task selection loop
-+	 * doesn't get stuck on the currently eligible stop task.
-+	 *
-+	 * We're currently inside stop_machine() and the rq is either stuck
-+	 * in the stop_machine_cpu_stop() loop, or we're executing this code,
-+	 * either way we should never end up calling schedule() until we're
-+	 * done here.
-+	 */
-+	rq->stop = NULL;
-+
-+	p = sched_rq_first_task(rq);
-+	while (p != rq->idle) {
-+		int dest_cpu;
-+
-+		/* skip the running task */
-+		if (task_running(p) || 1 == p->nr_cpus_allowed) {
-+			p = sched_rq_next_task(p, rq);
-+			continue;
-+		}
-+
-+		/*
-+		 * Rules for changing task_struct::cpus_allowed are holding
-+		 * both pi_lock and rq->lock, such that holding either
-+		 * stabilizes the mask.
-+		 *
-+		 * Drop rq->lock is not quite as disastrous as it usually is
-+		 * because !cpu_active at this point, which means load-balance
-+		 * will not interfere. Also, stop-machine.
-+		 */
-+		raw_spin_unlock(&rq->lock);
-+		raw_spin_lock(&p->pi_lock);
-+		raw_spin_lock(&rq->lock);
-+
-+		/*
-+		 * Since we're inside stop-machine, _nothing_ should have
-+		 * changed the task, WARN if weird stuff happened, because in
-+		 * that case the above rq->lock drop is a fail too.
-+		 */
-+		if (WARN_ON(task_rq(p) != rq || !task_on_rq_queued(p))) {
-+			raw_spin_unlock(&p->pi_lock);
-+			p = sched_rq_next_task(p, rq);
-+			continue;
-+		}
-+
-+		count++;
-+		/* Find suitable destination for @next, with force if needed. */
-+		dest_cpu = select_fallback_rq(dead_rq->cpu, p);
-+		rq = __migrate_task(rq, p, dest_cpu);
-+		raw_spin_unlock(&rq->lock);
-+		raw_spin_unlock(&p->pi_lock);
-+
-+		rq = dead_rq;
-+		raw_spin_lock(&rq->lock);
-+		/* Check queued task all over from the header again */
-+		p = sched_rq_first_task(rq);
-+	}
-+
-+	rq->stop = stop;
-+}
-+
-+static void set_rq_offline(struct rq *rq)
-+{
-+	if (rq->online)
-+		rq->online = false;
-+}
-+#endif /* CONFIG_HOTPLUG_CPU */
-+
-+static void set_rq_online(struct rq *rq)
-+{
-+	if (!rq->online)
-+		rq->online = true;
-+}
-+
-+/*
-+ * used to mark begin/end of suspend/resume:
-+ */
-+static int num_cpus_frozen;
-+
-+/*
-+ * Update cpusets according to cpu_active mask.  If cpusets are
-+ * disabled, cpuset_update_active_cpus() becomes a simple wrapper
-+ * around partition_sched_domains().
-+ *
-+ * If we come here as part of a suspend/resume, don't touch cpusets because we
-+ * want to restore it back to its original state upon resume anyway.
-+ */
-+static void cpuset_cpu_active(void)
-+{
-+	if (cpuhp_tasks_frozen) {
-+		/*
-+		 * num_cpus_frozen tracks how many CPUs are involved in suspend
-+		 * resume sequence. As long as this is not the last online
-+		 * operation in the resume sequence, just build a single sched
-+		 * domain, ignoring cpusets.
-+		 */
-+		partition_sched_domains(1, NULL, NULL);
-+		if (--num_cpus_frozen)
-+			return;
-+		/*
-+		 * This is the last CPU online operation. So fall through and
-+		 * restore the original sched domains by considering the
-+		 * cpuset configurations.
-+		 */
-+		cpuset_force_rebuild();
-+	}
-+
-+	cpuset_update_active_cpus();
-+}
-+
-+static int cpuset_cpu_inactive(unsigned int cpu)
-+{
-+	if (!cpuhp_tasks_frozen) {
-+		cpuset_update_active_cpus();
-+	} else {
-+		num_cpus_frozen++;
-+		partition_sched_domains(1, NULL, NULL);
-+	}
-+	return 0;
-+}
-+
-+int sched_cpu_activate(unsigned int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+#ifdef CONFIG_SCHED_SMT
-+	/*
-+	 * When going up, increment the number of cores with SMT present.
-+	 */
-+	if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
-+		static_branch_inc_cpuslocked(&sched_smt_present);
-+#endif
-+	set_cpu_active(cpu, true);
-+
-+	if (sched_smp_initialized)
-+		cpuset_cpu_active();
-+
-+	/*
-+	 * Put the rq online, if not already. This happens:
-+	 *
-+	 * 1) In the early boot process, because we build the real domains
-+	 *    after all cpus have been brought up.
-+	 *
-+	 * 2) At runtime, if cpuset_cpu_active() fails to rebuild the
-+	 *    domains.
-+	 */
-+	raw_spin_lock_irqsave(&rq->lock, flags);
-+	set_rq_online(rq);
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+
-+	return 0;
-+}
-+
-+int sched_cpu_deactivate(unsigned int cpu)
-+{
-+	int ret;
-+
-+	set_cpu_active(cpu, false);
-+	/*
-+	 * We've cleared cpu_active_mask, wait for all preempt-disabled and RCU
-+	 * users of this state to go away such that all new such users will
-+	 * observe it.
-+	 *
-+	 * Do sync before park smpboot threads to take care the rcu boost case.
-+	 */
-+	synchronize_rcu();
-+
-+#ifdef CONFIG_SCHED_SMT
-+	/*
-+	 * When going down, decrement the number of cores with SMT present.
-+	 */
-+	if (cpumask_weight(cpu_smt_mask(cpu)) == 2) {
-+		static_branch_dec_cpuslocked(&sched_smt_present);
-+		if (!static_branch_likely(&sched_smt_present))
-+			cpumask_clear(&sched_sg_idle_mask);
-+	}
-+#endif
-+
-+	if (!sched_smp_initialized)
-+		return 0;
-+
-+	ret = cpuset_cpu_inactive(cpu);
-+	if (ret) {
-+		set_cpu_active(cpu, true);
-+		return ret;
-+	}
-+	return 0;
-+}
-+
-+static void sched_rq_cpu_starting(unsigned int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	rq->calc_load_update = calc_load_update;
-+}
-+
-+int sched_cpu_starting(unsigned int cpu)
-+{
-+	sched_rq_cpu_starting(cpu);
-+	sched_tick_start(cpu);
-+	return 0;
-+}
-+
-+#ifdef CONFIG_HOTPLUG_CPU
-+int sched_cpu_dying(unsigned int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	/* Handle pending wakeups and then migrate everything off */
-+	sched_tick_stop(cpu);
-+
-+	raw_spin_lock_irqsave(&rq->lock, flags);
-+	set_rq_offline(rq);
-+	migrate_tasks(rq);
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+
-+	hrtick_clear(rq);
-+	return 0;
-+}
-+#endif
-+
-+#ifdef CONFIG_SMP
-+static void sched_init_topology_cpumask_early(void)
-+{
-+	int cpu, level;
-+	cpumask_t *tmp;
-+
-+	for_each_possible_cpu(cpu) {
-+		for (level = 0; level < NR_CPU_AFFINITY_CHK_LEVEL; level++) {
-+			tmp = &(per_cpu(sched_cpu_affinity_masks, cpu)[level]);
-+			cpumask_copy(tmp, cpu_possible_mask);
-+			cpumask_clear_cpu(cpu, tmp);
-+		}
-+		per_cpu(sched_cpu_llc_mask, cpu) =
-+			&(per_cpu(sched_cpu_affinity_masks, cpu)[0]);
-+		per_cpu(sched_cpu_affinity_end_mask, cpu) =
-+			&(per_cpu(sched_cpu_affinity_masks, cpu)[1]);
-+		/*per_cpu(sd_llc_id, cpu) = cpu;*/
-+	}
-+}
-+
-+#define TOPOLOGY_CPUMASK(name, mask, last) \
-+	if (cpumask_and(chk, chk, mask))					\
-+		printk(KERN_INFO "sched: cpu#%02d affinity mask: 0x%08lx - "#name,\
-+		       cpu, (chk++)->bits[0]);					\
-+	if (!last)								\
-+		cpumask_complement(chk, mask)
-+
-+static void sched_init_topology_cpumask(void)
-+{
-+	int cpu;
-+	cpumask_t *chk;
-+
-+	for_each_online_cpu(cpu) {
-+		/* take chance to reset time slice for idle tasks */
-+		cpu_rq(cpu)->idle->time_slice = sched_timeslice_ns;
-+
-+		chk = &(per_cpu(sched_cpu_affinity_masks, cpu)[0]);
-+
-+		cpumask_complement(chk, cpumask_of(cpu));
-+#ifdef CONFIG_SCHED_SMT
-+		TOPOLOGY_CPUMASK(smt, topology_sibling_cpumask(cpu), false);
-+#endif
-+		per_cpu(sd_llc_id, cpu) = cpumask_first(cpu_coregroup_mask(cpu));
-+		per_cpu(sched_cpu_llc_mask, cpu) = chk;
-+		TOPOLOGY_CPUMASK(coregroup, cpu_coregroup_mask(cpu), false);
-+
-+		TOPOLOGY_CPUMASK(core, topology_core_cpumask(cpu), false);
-+
-+		TOPOLOGY_CPUMASK(others, cpu_online_mask, true);
-+
-+		per_cpu(sched_cpu_affinity_end_mask, cpu) = chk;
-+		printk(KERN_INFO "sched: cpu#%02d llc_id = %d, llc_mask idx = %d\n",
-+		       cpu, per_cpu(sd_llc_id, cpu),
-+		       (int) (per_cpu(sched_cpu_llc_mask, cpu) -
-+			      &(per_cpu(sched_cpu_affinity_masks, cpu)[0])));
-+	}
-+}
-+#endif
-+
-+void __init sched_init_smp(void)
-+{
-+	/* Move init over to a non-isolated CPU */
-+	if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0)
-+		BUG();
-+
-+	sched_init_topology_cpumask();
-+
-+	sched_smp_initialized = true;
-+}
-+#else
-+void __init sched_init_smp(void)
-+{
-+	cpu_rq(0)->idle->time_slice = sched_timeslice_ns;
-+}
-+#endif /* CONFIG_SMP */
-+
-+int in_sched_functions(unsigned long addr)
-+{
-+	return in_lock_functions(addr) ||
-+		(addr >= (unsigned long)__sched_text_start
-+		&& addr < (unsigned long)__sched_text_end);
-+}
-+
-+#ifdef CONFIG_CGROUP_SCHED
-+/* task group related information */
-+struct task_group {
-+	struct cgroup_subsys_state css;
-+
-+	struct rcu_head rcu;
-+	struct list_head list;
-+
-+	struct task_group *parent;
-+	struct list_head siblings;
-+	struct list_head children;
-+};
-+
-+/*
-+ * Default task group.
-+ * Every task in system belongs to this group at bootup.
-+ */
-+struct task_group root_task_group;
-+LIST_HEAD(task_groups);
-+
-+/* Cacheline aligned slab cache for task_group */
-+static struct kmem_cache *task_group_cache __read_mostly;
-+#endif /* CONFIG_CGROUP_SCHED */
-+
-+void __init sched_init(void)
-+{
-+	int i;
-+	struct rq *rq;
-+
-+	printk(KERN_INFO ALT_SCHED_VERSION_MSG);
-+
-+	wait_bit_init();
-+
-+#ifdef CONFIG_SMP
-+	for (i = 0; i < SCHED_BITS; i++)
-+		cpumask_copy(&sched_rq_watermark[i], cpu_present_mask);
-+#endif
-+
-+#ifdef CONFIG_CGROUP_SCHED
-+	task_group_cache = KMEM_CACHE(task_group, 0);
-+
-+	list_add(&root_task_group.list, &task_groups);
-+	INIT_LIST_HEAD(&root_task_group.children);
-+	INIT_LIST_HEAD(&root_task_group.siblings);
-+#endif /* CONFIG_CGROUP_SCHED */
-+	for_each_possible_cpu(i) {
-+		rq = cpu_rq(i);
-+
-+		sched_queue_init(rq);
-+		rq->watermark = IDLE_WM;
-+		rq->skip = NULL;
-+
-+		raw_spin_lock_init(&rq->lock);
-+		rq->nr_running = rq->nr_uninterruptible = 0;
-+		rq->calc_load_active = 0;
-+		rq->calc_load_update = jiffies + LOAD_FREQ;
-+#ifdef CONFIG_SMP
-+		rq->online = false;
-+		rq->cpu = i;
-+
-+#ifdef CONFIG_SCHED_SMT
-+		rq->active_balance = 0;
-+#endif
-+
-+#ifdef CONFIG_NO_HZ_COMMON
-+		rq_csd_init(rq, &rq->nohz_csd, nohz_csd_func);
-+#endif
-+#endif /* CONFIG_SMP */
-+		rq->nr_switches = 0;
-+
-+		hrtick_rq_init(rq);
-+		atomic_set(&rq->nr_iowait, 0);
-+	}
-+#ifdef CONFIG_SMP
-+	/* Set rq->online for cpu 0 */
-+	cpu_rq(0)->online = true;
-+#endif
-+	/*
-+	 * The boot idle thread does lazy MMU switching as well:
-+	 */
-+	mmgrab(&init_mm);
-+	enter_lazy_tlb(&init_mm, current);
-+
-+	/*
-+	 * Make us the idle thread. Technically, schedule() should not be
-+	 * called from this thread, however somewhere below it might be,
-+	 * but because we are the idle thread, we just pick up running again
-+	 * when this runqueue becomes "idle".
-+	 */
-+	init_idle(current, smp_processor_id());
-+
-+	calc_load_update = jiffies + LOAD_FREQ;
-+
-+#ifdef CONFIG_SMP
-+	idle_thread_set_boot_cpu();
-+
-+	sched_init_topology_cpumask_early();
-+#endif /* SMP */
-+
-+	init_schedstats();
-+
-+	psi_init();
-+}
-+
-+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-+static inline int preempt_count_equals(int preempt_offset)
-+{
-+	int nested = preempt_count() + rcu_preempt_depth();
-+
-+	return (nested == preempt_offset);
-+}
-+
-+void __might_sleep(const char *file, int line, int preempt_offset)
-+{
-+	/*
-+	 * Blocking primitives will set (and therefore destroy) current->state,
-+	 * since we will exit with TASK_RUNNING make sure we enter with it,
-+	 * otherwise we will destroy state.
-+	 */
-+	WARN_ONCE(current->state != TASK_RUNNING && current->task_state_change,
-+			"do not call blocking ops when !TASK_RUNNING; "
-+			"state=%lx set at [<%p>] %pS\n",
-+			current->state,
-+			(void *)current->task_state_change,
-+			(void *)current->task_state_change);
-+
-+	___might_sleep(file, line, preempt_offset);
-+}
-+EXPORT_SYMBOL(__might_sleep);
-+
-+void ___might_sleep(const char *file, int line, int preempt_offset)
-+{
-+	/* Ratelimiting timestamp: */
-+	static unsigned long prev_jiffy;
-+
-+	unsigned long preempt_disable_ip;
-+
-+	/* WARN_ON_ONCE() by default, no rate limit required: */
-+	rcu_sleep_check();
-+
-+	if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
-+	     !is_idle_task(current) && !current->non_block_count) ||
-+	    system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING ||
-+	    oops_in_progress)
-+		return;
-+	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
-+		return;
-+	prev_jiffy = jiffies;
-+
-+	/* Save this before calling printk(), since that will clobber it: */
-+	preempt_disable_ip = get_preempt_disable_ip(current);
-+
-+	printk(KERN_ERR
-+		"BUG: sleeping function called from invalid context at %s:%d\n",
-+			file, line);
-+	printk(KERN_ERR
-+		"in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n",
-+			in_atomic(), irqs_disabled(), current->non_block_count,
-+			current->pid, current->comm);
-+
-+	if (task_stack_end_corrupted(current))
-+		printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");
-+
-+	debug_show_held_locks(current);
-+	if (irqs_disabled())
-+		print_irqtrace_events(current);
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	if (!preempt_count_equals(preempt_offset)) {
-+		pr_err("Preemption disabled at:");
-+		print_ip_sym(KERN_ERR, preempt_disable_ip);
-+	}
-+#endif
-+	dump_stack();
-+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+}
-+EXPORT_SYMBOL(___might_sleep);
-+
-+void __cant_sleep(const char *file, int line, int preempt_offset)
-+{
-+	static unsigned long prev_jiffy;
-+
-+	if (irqs_disabled())
-+		return;
-+
-+	if (!IS_ENABLED(CONFIG_PREEMPT_COUNT))
-+		return;
-+
-+	if (preempt_count() > preempt_offset)
-+		return;
-+
-+	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
-+		return;
-+	prev_jiffy = jiffies;
-+
-+	printk(KERN_ERR "BUG: assuming atomic context at %s:%d\n", file, line);
-+	printk(KERN_ERR "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
-+			in_atomic(), irqs_disabled(),
-+			current->pid, current->comm);
-+
-+	debug_show_held_locks(current);
-+	dump_stack();
-+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+}
-+EXPORT_SYMBOL_GPL(__cant_sleep);
-+#endif
-+
-+#ifdef CONFIG_MAGIC_SYSRQ
-+void normalize_rt_tasks(void)
-+{
-+	struct task_struct *g, *p;
-+	struct sched_attr attr = {
-+		.sched_policy = SCHED_NORMAL,
-+	};
-+
-+	read_lock(&tasklist_lock);
-+	for_each_process_thread(g, p) {
-+		/*
-+		 * Only normalize user tasks:
-+		 */
-+		if (p->flags & PF_KTHREAD)
-+			continue;
-+
-+		if (!rt_task(p)) {
-+			/*
-+			 * Renice negative nice level userspace
-+			 * tasks back to 0:
-+			 */
-+			if (task_nice(p) < 0)
-+				set_user_nice(p, 0);
-+			continue;
-+		}
-+
-+		__sched_setscheduler(p, &attr, false, false);
-+	}
-+	read_unlock(&tasklist_lock);
-+}
-+#endif /* CONFIG_MAGIC_SYSRQ */
-+
-+#if defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB)
-+/*
-+ * These functions are only useful for the IA64 MCA handling, or kdb.
-+ *
-+ * They can only be called when the whole system has been
-+ * stopped - every CPU needs to be quiescent, and no scheduling
-+ * activity can take place. Using them for anything else would
-+ * be a serious bug, and as a result, they aren't even visible
-+ * under any other configuration.
-+ */
-+
-+/**
-+ * curr_task - return the current task for a given CPU.
-+ * @cpu: the processor in question.
-+ *
-+ * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
-+ *
-+ * Return: The current task for @cpu.
-+ */
-+struct task_struct *curr_task(int cpu)
-+{
-+	return cpu_curr(cpu);
-+}
-+
-+#endif /* defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB) */
-+
-+#ifdef CONFIG_IA64
-+/**
-+ * ia64_set_curr_task - set the current task for a given CPU.
-+ * @cpu: the processor in question.
-+ * @p: the task pointer to set.
-+ *
-+ * Description: This function must only be used when non-maskable interrupts
-+ * are serviced on a separate stack.  It allows the architecture to switch the
-+ * notion of the current task on a CPU in a non-blocking manner.  This function
-+ * must be called with all CPU's synchronised, and interrupts disabled, the
-+ * and caller must save the original value of the current task (see
-+ * curr_task() above) and restore that value before reenabling interrupts and
-+ * re-starting the system.
-+ *
-+ * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
-+ */
-+void ia64_set_curr_task(int cpu, struct task_struct *p)
-+{
-+	cpu_curr(cpu) = p;
-+}
-+
-+#endif
-+
-+#ifdef CONFIG_CGROUP_SCHED
-+static void sched_free_group(struct task_group *tg)
-+{
-+	kmem_cache_free(task_group_cache, tg);
-+}
-+
-+/* allocate runqueue etc for a new task group */
-+struct task_group *sched_create_group(struct task_group *parent)
-+{
-+	struct task_group *tg;
-+
-+	tg = kmem_cache_alloc(task_group_cache, GFP_KERNEL | __GFP_ZERO);
-+	if (!tg)
-+		return ERR_PTR(-ENOMEM);
-+
-+	return tg;
-+}
-+
-+void sched_online_group(struct task_group *tg, struct task_group *parent)
-+{
-+}
-+
-+/* rcu callback to free various structures associated with a task group */
-+static void sched_free_group_rcu(struct rcu_head *rhp)
-+{
-+	/* Now it should be safe to free those cfs_rqs */
-+	sched_free_group(container_of(rhp, struct task_group, rcu));
-+}
-+
-+void sched_destroy_group(struct task_group *tg)
-+{
-+	/* Wait for possible concurrent references to cfs_rqs complete */
-+	call_rcu(&tg->rcu, sched_free_group_rcu);
-+}
-+
-+void sched_offline_group(struct task_group *tg)
-+{
-+}
-+
-+static inline struct task_group *css_tg(struct cgroup_subsys_state *css)
-+{
-+	return css ? container_of(css, struct task_group, css) : NULL;
-+}
-+
-+static struct cgroup_subsys_state *
-+cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
-+{
-+	struct task_group *parent = css_tg(parent_css);
-+	struct task_group *tg;
-+
-+	if (!parent) {
-+		/* This is early initialization for the top cgroup */
-+		return &root_task_group.css;
-+	}
-+
-+	tg = sched_create_group(parent);
-+	if (IS_ERR(tg))
-+		return ERR_PTR(-ENOMEM);
-+	return &tg->css;
-+}
-+
-+/* Expose task group only after completing cgroup initialization */
-+static int cpu_cgroup_css_online(struct cgroup_subsys_state *css)
-+{
-+	struct task_group *tg = css_tg(css);
-+	struct task_group *parent = css_tg(css->parent);
-+
-+	if (parent)
-+		sched_online_group(tg, parent);
-+	return 0;
-+}
-+
-+static void cpu_cgroup_css_released(struct cgroup_subsys_state *css)
-+{
-+	struct task_group *tg = css_tg(css);
-+
-+	sched_offline_group(tg);
-+}
-+
-+static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
-+{
-+	struct task_group *tg = css_tg(css);
-+
-+	/*
-+	 * Relies on the RCU grace period between css_released() and this.
-+	 */
-+	sched_free_group(tg);
-+}
-+
-+static void cpu_cgroup_fork(struct task_struct *task)
-+{
-+}
-+
-+static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
-+{
-+	return 0;
-+}
-+
-+static void cpu_cgroup_attach(struct cgroup_taskset *tset)
-+{
-+}
-+
-+static struct cftype cpu_legacy_files[] = {
-+	{ }	/* Terminate */
-+};
-+
-+
-+static struct cftype cpu_files[] = {
-+	{ }	/* terminate */
-+};
-+
-+static int cpu_extra_stat_show(struct seq_file *sf,
-+			       struct cgroup_subsys_state *css)
-+{
-+	return 0;
-+}
-+
-+struct cgroup_subsys cpu_cgrp_subsys = {
-+	.css_alloc	= cpu_cgroup_css_alloc,
-+	.css_online	= cpu_cgroup_css_online,
-+	.css_released	= cpu_cgroup_css_released,
-+	.css_free	= cpu_cgroup_css_free,
-+	.css_extra_stat_show = cpu_extra_stat_show,
-+	.fork		= cpu_cgroup_fork,
-+	.can_attach	= cpu_cgroup_can_attach,
-+	.attach		= cpu_cgroup_attach,
-+	.legacy_cftypes	= cpu_files,
-+	.legacy_cftypes	= cpu_legacy_files,
-+	.dfl_cftypes	= cpu_files,
-+	.early_init	= true,
-+	.threaded	= true,
-+};
-+#endif	/* CONFIG_CGROUP_SCHED */
-+
-+#undef CREATE_TRACE_POINTS
-diff --git a/kernel/sched/alt_debug.c b/kernel/sched/alt_debug.c
-new file mode 100644
-index 000000000000..1212a031700e
---- /dev/null
-+++ b/kernel/sched/alt_debug.c
-@@ -0,0 +1,31 @@
-+/*
-+ * kernel/sched/alt_debug.c
-+ *
-+ * Print the alt scheduler debugging details
-+ *
-+ * Author: Alfred Chen
-+ * Date  : 2020
-+ */
-+#include "sched.h"
-+
-+/*
-+ * This allows printing both to /proc/sched_debug and
-+ * to the console
-+ */
-+#define SEQ_printf(m, x...)			\
-+ do {						\
-+	if (m)					\
-+		seq_printf(m, x);		\
-+	else					\
-+		pr_cont(x);			\
-+ } while (0)
-+
-+void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
-+			  struct seq_file *m)
-+{
-+	SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, task_pid_nr_ns(p, ns),
-+						get_nr_threads(p));
-+}
-+
-+void proc_sched_set_task(struct task_struct *p)
-+{}
-diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h
-new file mode 100644
-index 000000000000..99be2c51c88d
---- /dev/null
-+++ b/kernel/sched/alt_sched.h
-@@ -0,0 +1,555 @@
-+#ifndef ALT_SCHED_H
-+#define ALT_SCHED_H
-+
-+#include <linux/sched.h>
-+
-+#include <linux/sched/clock.h>
-+#include <linux/sched/cpufreq.h>
-+#include <linux/sched/cputime.h>
-+#include <linux/sched/debug.h>
-+#include <linux/sched/init.h>
-+#include <linux/sched/isolation.h>
-+#include <linux/sched/loadavg.h>
-+#include <linux/sched/mm.h>
-+#include <linux/sched/nohz.h>
-+#include <linux/sched/signal.h>
-+#include <linux/sched/stat.h>
-+#include <linux/sched/sysctl.h>
-+#include <linux/sched/task.h>
-+#include <linux/sched/topology.h>
-+#include <linux/sched/wake_q.h>
-+
-+#include <uapi/linux/sched/types.h>
-+
-+#include <linux/cgroup.h>
-+#include <linux/cpufreq.h>
-+#include <linux/cpuidle.h>
-+#include <linux/cpuset.h>
-+#include <linux/ctype.h>
-+#include <linux/kthread.h>
-+#include <linux/livepatch.h>
-+#include <linux/membarrier.h>
-+#include <linux/proc_fs.h>
-+#include <linux/psi.h>
-+#include <linux/slab.h>
-+#include <linux/stop_machine.h>
-+#include <linux/suspend.h>
-+#include <linux/swait.h>
-+#include <linux/syscalls.h>
-+#include <linux/tsacct_kern.h>
-+
-+#include <asm/tlb.h>
-+
-+#ifdef CONFIG_PARAVIRT
-+# include <asm/paravirt.h>
-+#endif
-+
-+#include "cpupri.h"
-+
-+#ifdef CONFIG_SCHED_BMQ
-+#include "bmq.h"
-+#endif
-+#ifdef CONFIG_SCHED_PDS
-+#include "pds.h"
-+#endif
-+
-+/* task_struct::on_rq states: */
-+#define TASK_ON_RQ_QUEUED	1
-+#define TASK_ON_RQ_MIGRATING	2
-+
-+static inline int task_on_rq_queued(struct task_struct *p)
-+{
-+	return p->on_rq == TASK_ON_RQ_QUEUED;
-+}
-+
-+static inline int task_on_rq_migrating(struct task_struct *p)
-+{
-+	return READ_ONCE(p->on_rq) == TASK_ON_RQ_MIGRATING;
-+}
-+
-+/*
-+ * wake flags
-+ */
-+#define WF_SYNC		0x01		/* waker goes to sleep after wakeup */
-+#define WF_FORK		0x02		/* child wakeup after fork */
-+#define WF_MIGRATED	0x04		/* internal use, task got migrated */
-+#define WF_ON_CPU	0x08		/* Wakee is on_rq */
-+
-+/*
-+ * This is the main, per-CPU runqueue data structure.
-+ * This data should only be modified by the local cpu.
-+ */
-+struct rq {
-+	/* runqueue lock: */
-+	raw_spinlock_t lock;
-+
-+	struct task_struct __rcu *curr;
-+	struct task_struct *idle, *stop, *skip;
-+	struct mm_struct *prev_mm;
-+
-+#ifdef CONFIG_SCHED_BMQ
-+	struct bmq queue;
-+#endif
-+#ifdef CONFIG_SCHED_PDS
-+	struct skiplist_node sl_header;
-+#endif
-+	unsigned long watermark;
-+
-+	/* switch count */
-+	u64 nr_switches;
-+
-+	atomic_t nr_iowait;
-+
-+#ifdef CONFIG_MEMBARRIER
-+	int membarrier_state;
-+#endif
-+
-+#ifdef CONFIG_SMP
-+	int cpu;		/* cpu of this runqueue */
-+	bool online;
-+
-+	unsigned int		ttwu_pending;
-+	unsigned char		nohz_idle_balance;
-+	unsigned char		idle_balance;
-+
-+#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
-+	struct sched_avg	avg_irq;
-+#endif
-+
-+#ifdef CONFIG_SCHED_SMT
-+	int active_balance;
-+	struct cpu_stop_work active_balance_work;
-+#endif
-+#endif /* CONFIG_SMP */
-+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-+	u64 prev_irq_time;
-+#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
-+#ifdef CONFIG_PARAVIRT
-+	u64 prev_steal_time;
-+#endif /* CONFIG_PARAVIRT */
-+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
-+	u64 prev_steal_time_rq;
-+#endif /* CONFIG_PARAVIRT_TIME_ACCOUNTING */
-+
-+	/* calc_load related fields */
-+	unsigned long calc_load_update;
-+	long calc_load_active;
-+
-+	u64 clock, last_tick;
-+	u64 last_ts_switch;
-+	u64 clock_task;
-+
-+	unsigned long nr_running;
-+	unsigned long nr_uninterruptible;
-+
-+#ifdef CONFIG_SCHED_HRTICK
-+#ifdef CONFIG_SMP
-+	call_single_data_t hrtick_csd;
-+#endif
-+	struct hrtimer hrtick_timer;
-+#endif
-+
-+#ifdef CONFIG_SCHEDSTATS
-+
-+	/* latency stats */
-+	struct sched_info rq_sched_info;
-+	unsigned long long rq_cpu_time;
-+	/* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
-+
-+	/* sys_sched_yield() stats */
-+	unsigned int yld_count;
-+
-+	/* schedule() stats */
-+	unsigned int sched_switch;
-+	unsigned int sched_count;
-+	unsigned int sched_goidle;
-+
-+	/* try_to_wake_up() stats */
-+	unsigned int ttwu_count;
-+	unsigned int ttwu_local;
-+#endif /* CONFIG_SCHEDSTATS */
-+
-+#ifdef CONFIG_CPU_IDLE
-+	/* Must be inspected within a rcu lock section */
-+	struct cpuidle_state *idle_state;
-+#endif
-+
-+#ifdef CONFIG_NO_HZ_COMMON
-+#ifdef CONFIG_SMP
-+	call_single_data_t	nohz_csd;
-+#endif
-+	atomic_t		nohz_flags;
-+#endif /* CONFIG_NO_HZ_COMMON */
-+};
-+
-+extern unsigned long calc_load_update;
-+extern atomic_long_t calc_load_tasks;
-+
-+extern void calc_global_load_tick(struct rq *this_rq);
-+extern long calc_load_fold_active(struct rq *this_rq, long adjust);
-+
-+DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
-+#define cpu_rq(cpu)		(&per_cpu(runqueues, (cpu)))
-+#define this_rq()		this_cpu_ptr(&runqueues)
-+#define task_rq(p)		cpu_rq(task_cpu(p))
-+#define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
-+#define raw_rq()		raw_cpu_ptr(&runqueues)
-+
-+#ifdef CONFIG_SMP
-+#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
-+void register_sched_domain_sysctl(void);
-+void unregister_sched_domain_sysctl(void);
-+#else
-+static inline void register_sched_domain_sysctl(void)
-+{
-+}
-+static inline void unregister_sched_domain_sysctl(void)
-+{
-+}
-+#endif
-+
-+extern bool sched_smp_initialized;
-+
-+enum {
-+	BASE_CPU_AFFINITY_CHK_LEVEL = 1,
-+#ifdef CONFIG_SCHED_SMT
-+	SMT_CPU_AFFINITY_CHK_LEVEL_SPACE_HOLDER,
-+#endif
-+#ifdef CONFIG_SCHED_MC
-+	MC_CPU_AFFINITY_CHK_LEVEL_SPACE_HOLDER,
-+#endif
-+	NR_CPU_AFFINITY_CHK_LEVEL
-+};
-+
-+DECLARE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_CHK_LEVEL], sched_cpu_affinity_masks);
-+
-+static inline int __best_mask_cpu(int cpu, const cpumask_t *cpumask,
-+				  const cpumask_t *mask)
-+{
-+	while ((cpu = cpumask_any_and(cpumask, mask)) >= nr_cpu_ids)
-+		mask++;
-+	return cpu;
-+}
-+
-+static inline int best_mask_cpu(int cpu, const cpumask_t *cpumask)
-+{
-+	return cpumask_test_cpu(cpu, cpumask)? cpu :
-+		__best_mask_cpu(cpu, cpumask, &(per_cpu(sched_cpu_affinity_masks, cpu)[0]));
-+}
-+
-+extern void flush_smp_call_function_from_idle(void);
-+
-+#else  /* !CONFIG_SMP */
-+static inline void flush_smp_call_function_from_idle(void) { }
-+#endif
-+
-+#ifndef arch_scale_freq_tick
-+static __always_inline
-+void arch_scale_freq_tick(void)
-+{
-+}
-+#endif
-+
-+#ifndef arch_scale_freq_capacity
-+static __always_inline
-+unsigned long arch_scale_freq_capacity(int cpu)
-+{
-+	return SCHED_CAPACITY_SCALE;
-+}
-+#endif
-+
-+static inline u64 __rq_clock_broken(struct rq *rq)
-+{
-+	return READ_ONCE(rq->clock);
-+}
-+
-+static inline u64 rq_clock(struct rq *rq)
-+{
-+	/*
-+	 * Relax lockdep_assert_held() checking as in VRQ, call to
-+	 * sched_info_xxxx() may not held rq->lock
-+	 * lockdep_assert_held(&rq->lock);
-+	 */
-+	return rq->clock;
-+}
-+
-+static inline u64 rq_clock_task(struct rq *rq)
-+{
-+	/*
-+	 * Relax lockdep_assert_held() checking as in VRQ, call to
-+	 * sched_info_xxxx() may not held rq->lock
-+	 * lockdep_assert_held(&rq->lock);
-+	 */
-+	return rq->clock_task;
-+}
-+
-+/*
-+ * {de,en}queue flags:
-+ *
-+ * DEQUEUE_SLEEP  - task is no longer runnable
-+ * ENQUEUE_WAKEUP - task just became runnable
-+ *
-+ */
-+
-+#define DEQUEUE_SLEEP		0x01
-+
-+#define ENQUEUE_WAKEUP		0x01
-+
-+
-+/*
-+ * Below are scheduler API which using in other kernel code
-+ * It use the dummy rq_flags
-+ * ToDo : BMQ need to support these APIs for compatibility with mainline
-+ * scheduler code.
-+ */
-+struct rq_flags {
-+	unsigned long flags;
-+};
-+
-+struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
-+	__acquires(rq->lock);
-+
-+struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
-+	__acquires(p->pi_lock)
-+	__acquires(rq->lock);
-+
-+static inline void __task_rq_unlock(struct rq *rq, struct rq_flags *rf)
-+	__releases(rq->lock)
-+{
-+	raw_spin_unlock(&rq->lock);
-+}
-+
-+static inline void
-+task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
-+	__releases(rq->lock)
-+	__releases(p->pi_lock)
-+{
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
-+}
-+
-+static inline void
-+rq_unlock_irq(struct rq *rq, struct rq_flags *rf)
-+	__releases(rq->lock)
-+{
-+	raw_spin_unlock_irq(&rq->lock);
-+}
-+
-+static inline struct rq *
-+this_rq_lock_irq(struct rq_flags *rf)
-+	__acquires(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	local_irq_disable();
-+	rq = this_rq();
-+	raw_spin_lock(&rq->lock);
-+
-+	return rq;
-+}
-+
-+static inline int task_current(struct rq *rq, struct task_struct *p)
-+{
-+	return rq->curr == p;
-+}
-+
-+static inline bool task_running(struct task_struct *p)
-+{
-+	return p->on_cpu;
-+}
-+
-+extern struct static_key_false sched_schedstats;
-+
-+#ifdef CONFIG_CPU_IDLE
-+static inline void idle_set_state(struct rq *rq,
-+				  struct cpuidle_state *idle_state)
-+{
-+	rq->idle_state = idle_state;
-+}
-+
-+static inline struct cpuidle_state *idle_get_state(struct rq *rq)
-+{
-+	WARN_ON(!rcu_read_lock_held());
-+	return rq->idle_state;
-+}
-+#else
-+static inline void idle_set_state(struct rq *rq,
-+				  struct cpuidle_state *idle_state)
-+{
-+}
-+
-+static inline struct cpuidle_state *idle_get_state(struct rq *rq)
-+{
-+	return NULL;
-+}
-+#endif
-+
-+static inline int cpu_of(const struct rq *rq)
-+{
-+#ifdef CONFIG_SMP
-+	return rq->cpu;
-+#else
-+	return 0;
-+#endif
-+}
-+
-+#include "stats.h"
-+
-+#ifdef CONFIG_NO_HZ_COMMON
-+#define NOHZ_BALANCE_KICK_BIT	0
-+#define NOHZ_STATS_KICK_BIT	1
-+
-+#define NOHZ_BALANCE_KICK	BIT(NOHZ_BALANCE_KICK_BIT)
-+#define NOHZ_STATS_KICK		BIT(NOHZ_STATS_KICK_BIT)
-+
-+#define NOHZ_KICK_MASK	(NOHZ_BALANCE_KICK | NOHZ_STATS_KICK)
-+
-+#define nohz_flags(cpu)	(&cpu_rq(cpu)->nohz_flags)
-+
-+/* TODO: needed?
-+extern void nohz_balance_exit_idle(struct rq *rq);
-+#else
-+static inline void nohz_balance_exit_idle(struct rq *rq) { }
-+*/
-+#endif
-+
-+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-+struct irqtime {
-+	u64			total;
-+	u64			tick_delta;
-+	u64			irq_start_time;
-+	struct u64_stats_sync	sync;
-+};
-+
-+DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
-+
-+/*
-+ * Returns the irqtime minus the softirq time computed by ksoftirqd.
-+ * Otherwise ksoftirqd's sum_exec_runtime is substracted its own runtime
-+ * and never move forward.
-+ */
-+static inline u64 irq_time_read(int cpu)
-+{
-+	struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu);
-+	unsigned int seq;
-+	u64 total;
-+
-+	do {
-+		seq = __u64_stats_fetch_begin(&irqtime->sync);
-+		total = irqtime->total;
-+	} while (__u64_stats_fetch_retry(&irqtime->sync, seq));
-+
-+	return total;
-+}
-+#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
-+
-+#ifdef CONFIG_CPU_FREQ
-+DECLARE_PER_CPU(struct update_util_data __rcu *, cpufreq_update_util_data);
-+
-+/**
-+ * cpufreq_update_util - Take a note about CPU utilization changes.
-+ * @rq: Runqueue to carry out the update for.
-+ * @flags: Update reason flags.
-+ *
-+ * This function is called by the scheduler on the CPU whose utilization is
-+ * being updated.
-+ *
-+ * It can only be called from RCU-sched read-side critical sections.
-+ *
-+ * The way cpufreq is currently arranged requires it to evaluate the CPU
-+ * performance state (frequency/voltage) on a regular basis to prevent it from
-+ * being stuck in a completely inadequate performance level for too long.
-+ * That is not guaranteed to happen if the updates are only triggered from CFS
-+ * and DL, though, because they may not be coming in if only RT tasks are
-+ * active all the time (or there are RT tasks only).
-+ *
-+ * As a workaround for that issue, this function is called periodically by the
-+ * RT sched class to trigger extra cpufreq updates to prevent it from stalling,
-+ * but that really is a band-aid.  Going forward it should be replaced with
-+ * solutions targeted more specifically at RT tasks.
-+ */
-+static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
-+{
-+	struct update_util_data *data;
-+
-+	data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data));
-+	if (data)
-+		data->func(data, rq_clock(rq), flags);
-+}
-+#else
-+static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
-+#endif /* CONFIG_CPU_FREQ */
-+
-+#ifdef CONFIG_NO_HZ_FULL
-+extern int __init sched_tick_offload_init(void);
-+#else
-+static inline int sched_tick_offload_init(void) { return 0; }
-+#endif
-+
-+#ifdef arch_scale_freq_capacity
-+#ifndef arch_scale_freq_invariant
-+#define arch_scale_freq_invariant()	(true)
-+#endif
-+#else /* arch_scale_freq_capacity */
-+#define arch_scale_freq_invariant()	(false)
-+#endif
-+
-+extern void schedule_idle(void);
-+
-+/*
-+ * !! For sched_setattr_nocheck() (kernel) only !!
-+ *
-+ * This is actually gross. :(
-+ *
-+ * It is used to make schedutil kworker(s) higher priority than SCHED_DEADLINE
-+ * tasks, but still be able to sleep. We need this on platforms that cannot
-+ * atomically change clock frequency. Remove once fast switching will be
-+ * available on such platforms.
-+ *
-+ * SUGOV stands for SchedUtil GOVernor.
-+ */
-+#define SCHED_FLAG_SUGOV	0x10000000
-+
-+#ifdef CONFIG_MEMBARRIER
-+/*
-+ * The scheduler provides memory barriers required by membarrier between:
-+ * - prior user-space memory accesses and store to rq->membarrier_state,
-+ * - store to rq->membarrier_state and following user-space memory accesses.
-+ * In the same way it provides those guarantees around store to rq->curr.
-+ */
-+static inline void membarrier_switch_mm(struct rq *rq,
-+					struct mm_struct *prev_mm,
-+					struct mm_struct *next_mm)
-+{
-+	int membarrier_state;
-+
-+	if (prev_mm == next_mm)
-+		return;
-+
-+	membarrier_state = atomic_read(&next_mm->membarrier_state);
-+	if (READ_ONCE(rq->membarrier_state) == membarrier_state)
-+		return;
-+
-+	WRITE_ONCE(rq->membarrier_state, membarrier_state);
-+}
-+#else
-+static inline void membarrier_switch_mm(struct rq *rq,
-+					struct mm_struct *prev_mm,
-+					struct mm_struct *next_mm)
-+{
-+}
-+#endif
-+
-+#ifdef CONFIG_NUMA
-+extern int sched_numa_find_closest(const struct cpumask *cpus, int cpu);
-+#else
-+static inline int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
-+{
-+	return nr_cpu_ids;
-+}
-+#endif
-+
-+void swake_up_all_locked(struct swait_queue_head *q);
-+void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
-+
-+#endif /* ALT_SCHED_H */
-diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h
-new file mode 100644
-index 000000000000..aff0bb30a884
---- /dev/null
-+++ b/kernel/sched/bmq.h
-@@ -0,0 +1,20 @@
-+#ifndef BMQ_H
-+#define BMQ_H
-+
-+/* bits:
-+ * RT(0-99), (Low prio adj range, nice width, high prio adj range) / 2, cpu idle task */
-+#define SCHED_BITS	(MAX_RT_PRIO + NICE_WIDTH / 2 + MAX_PRIORITY_ADJ + 1)
-+#define IDLE_TASK_SCHED_PRIO	(SCHED_BITS - 1)
-+
-+struct bmq {
-+	DECLARE_BITMAP(bitmap, SCHED_BITS);
-+	struct list_head heads[SCHED_BITS];
-+};
-+
-+
-+static inline int task_running_nice(struct task_struct *p)
-+{
-+	return (p->prio + p->boost_prio > DEFAULT_PRIO + MAX_PRIORITY_ADJ);
-+}
-+
-+#endif
-diff --git a/kernel/sched/bmq_imp.h b/kernel/sched/bmq_imp.h
-new file mode 100644
-index 000000000000..ad9a7c448da7
---- /dev/null
-+++ b/kernel/sched/bmq_imp.h
-@@ -0,0 +1,185 @@
-+#define ALT_SCHED_VERSION_MSG "sched/bmq: BMQ CPU Scheduler "ALT_SCHED_VERSION" by Alfred Chen.\n"
-+
-+/*
-+ * BMQ only routines
-+ */
-+#define rq_switch_time(rq)	((rq)->clock - (rq)->last_ts_switch)
-+#define boost_threshold(p)	(sched_timeslice_ns >>\
-+				 (15 - MAX_PRIORITY_ADJ -  (p)->boost_prio))
-+
-+static inline void boost_task(struct task_struct *p)
-+{
-+	int limit;
-+
-+	switch (p->policy) {
-+	case SCHED_NORMAL:
-+		limit = -MAX_PRIORITY_ADJ;
-+		break;
-+	case SCHED_BATCH:
-+	case SCHED_IDLE:
-+		limit = 0;
-+		break;
-+	default:
-+		return;
-+	}
-+
-+	if (p->boost_prio > limit)
-+		p->boost_prio--;
-+}
-+
-+static inline void deboost_task(struct task_struct *p)
-+{
-+	if (p->boost_prio < MAX_PRIORITY_ADJ)
-+		p->boost_prio++;
-+}
-+
-+/*
-+ * Common interfaces
-+ */
-+static inline int task_sched_prio(struct task_struct *p, struct rq *rq)
-+{
-+	return (p->prio < MAX_RT_PRIO)? p->prio : MAX_RT_PRIO / 2 + (p->prio + p->boost_prio) / 2;
-+}
-+
-+static inline void requeue_task(struct task_struct *p, struct rq *rq);
-+
-+static inline void time_slice_expired(struct task_struct *p, struct rq *rq)
-+{
-+	p->time_slice = sched_timeslice_ns;
-+
-+	if (SCHED_FIFO != p->policy && task_on_rq_queued(p)) {
-+		if (SCHED_RR != p->policy)
-+			deboost_task(p);
-+		requeue_task(p, rq);
-+	}
-+}
-+
-+static inline void update_task_priodl(struct task_struct *p) {}
-+
-+static inline unsigned long sched_queue_watermark(struct rq *rq)
-+{
-+	return find_first_bit(rq->queue.bitmap, SCHED_BITS);
-+}
-+
-+static inline void sched_queue_init(struct rq *rq)
-+{
-+	struct bmq *q = &rq->queue;
-+	int i;
-+
-+	bitmap_zero(q->bitmap, SCHED_BITS);
-+	for(i = 0; i < SCHED_BITS; i++)
-+		INIT_LIST_HEAD(&q->heads[i]);
-+}
-+
-+static inline void sched_queue_init_idle(struct rq *rq, struct task_struct *idle)
-+{
-+	struct bmq *q = &rq->queue;
-+
-+	idle->bmq_idx = IDLE_TASK_SCHED_PRIO;
-+	INIT_LIST_HEAD(&q->heads[idle->bmq_idx]);
-+	list_add(&idle->bmq_node, &q->heads[idle->bmq_idx]);
-+	set_bit(idle->bmq_idx, q->bitmap);
-+}
-+
-+/*
-+ * This routine used in bmq scheduler only which assume the idle task in the bmq
-+ */
-+static inline struct task_struct *sched_rq_first_task(struct rq *rq)
-+{
-+	unsigned long idx = find_first_bit(rq->queue.bitmap, SCHED_BITS);
-+	const struct list_head *head = &rq->queue.heads[idx];
-+
-+	return list_first_entry(head, struct task_struct, bmq_node);
-+}
-+
-+static inline struct task_struct *
-+sched_rq_next_task(struct task_struct *p, struct rq *rq)
-+{
-+	unsigned long idx = p->bmq_idx;
-+	struct list_head *head = &rq->queue.heads[idx];
-+
-+	if (list_is_last(&p->bmq_node, head)) {
-+		idx = find_next_bit(rq->queue.bitmap, SCHED_BITS, idx + 1);
-+		head = &rq->queue.heads[idx];
-+
-+		return list_first_entry(head, struct task_struct, bmq_node);
-+	}
-+
-+	return list_next_entry(p, bmq_node);
-+}
-+
-+#define __SCHED_DEQUEUE_TASK(p, rq, flags, func)	\
-+	psi_dequeue(p, flags & DEQUEUE_SLEEP);		\
-+	sched_info_dequeued(rq, p);			\
-+							\
-+	list_del(&p->bmq_node);				\
-+	if (list_empty(&rq->queue.heads[p->bmq_idx])) {	\
-+		clear_bit(p->bmq_idx, rq->queue.bitmap);\
-+		func;					\
-+	}
-+
-+#define __SCHED_ENQUEUE_TASK(p, rq, flags)				\
-+	sched_info_queued(rq, p);					\
-+	psi_enqueue(p, flags);						\
-+									\
-+	p->bmq_idx = task_sched_prio(p, rq);				\
-+	list_add_tail(&p->bmq_node, &rq->queue.heads[p->bmq_idx]);	\
-+	set_bit(p->bmq_idx, rq->queue.bitmap)
-+
-+#define __SCHED_REQUEUE_TASK(p, rq, func)				\
-+{									\
-+	int idx = task_sched_prio(p, rq);				\
-+\
-+	list_del(&p->bmq_node);						\
-+	list_add_tail(&p->bmq_node, &rq->queue.heads[idx]);		\
-+	if (idx != p->bmq_idx) {					\
-+		if (list_empty(&rq->queue.heads[p->bmq_idx]))		\
-+			clear_bit(p->bmq_idx, rq->queue.bitmap);	\
-+		p->bmq_idx = idx;					\
-+		set_bit(p->bmq_idx, rq->queue.bitmap);			\
-+		func;							\
-+	}								\
-+}
-+
-+static inline bool sched_task_need_requeue(struct task_struct *p, struct rq *rq)
-+{
-+	return (task_sched_prio(p, rq) != p->bmq_idx);
-+}
-+
-+static void sched_task_fork(struct task_struct *p, struct rq *rq)
-+{
-+	p->boost_prio = (p->boost_prio < 0) ?
-+		p->boost_prio + MAX_PRIORITY_ADJ : MAX_PRIORITY_ADJ;
-+}
-+
-+/**
-+ * task_prio - return the priority value of a given task.
-+ * @p: the task in question.
-+ *
-+ * Return: The priority value as seen by users in /proc.
-+ * RT tasks are offset by -100. Normal tasks are centered around 1, value goes
-+ * from 0(SCHED_ISO) up to 82 (nice +19 SCHED_IDLE).
-+ */
-+int task_prio(const struct task_struct *p)
-+{
-+	if (p->prio < MAX_RT_PRIO)
-+		return (p->prio - MAX_RT_PRIO);
-+	return (p->prio - MAX_RT_PRIO + p->boost_prio);
-+}
-+
-+static void do_sched_yield_type_1(struct task_struct *p, struct rq *rq)
-+{
-+	p->boost_prio = MAX_PRIORITY_ADJ;
-+}
-+
-+static void sched_task_ttwu(struct task_struct *p)
-+{
-+	if(this_rq()->clock_task - p->last_ran > sched_timeslice_ns)
-+		boost_task(p);
-+}
-+
-+static void sched_task_deactivate(struct task_struct *p, struct rq *rq)
-+{
-+	if (rq_switch_time(rq) < boost_threshold(p))
-+		boost_task(p);
-+}
-diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
-index e39008242cf4..5963716fe391 100644
---- a/kernel/sched/cpufreq_schedutil.c
-+++ b/kernel/sched/cpufreq_schedutil.c
-@@ -183,6 +183,7 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy,
- 	return cpufreq_driver_resolve_freq(policy, freq);
- }
- 
-+#ifndef CONFIG_SCHED_ALT
- /*
-  * This function computes an effective utilization for the given CPU, to be
-  * used for frequency selection given the linear relation: f = u * f_max.
-@@ -300,6 +301,13 @@ static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu)
- 
- 	return schedutil_cpu_util(sg_cpu->cpu, util, max, FREQUENCY_UTIL, NULL);
- }
-+#else /* CONFIG_SCHED_ALT */
-+static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu)
-+{
-+	sg_cpu->max = arch_scale_cpu_capacity(sg_cpu->cpu);
-+	return sg_cpu->max;
-+}
-+#endif
- 
- /**
-  * sugov_iowait_reset() - Reset the IO boost status of a CPU.
-@@ -443,7 +451,9 @@ static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; }
-  */
- static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu, struct sugov_policy *sg_policy)
- {
-+#ifndef CONFIG_SCHED_ALT
- 	if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_dl)
-+#endif
- 		sg_policy->limits_changed = true;
- }
- 
-@@ -686,6 +696,7 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy)
- 	}
- 
- 	ret = sched_setattr_nocheck(thread, &attr);
-+
- 	if (ret) {
- 		kthread_stop(thread);
- 		pr_warn("%s: failed to set SCHED_DEADLINE\n", __func__);
-@@ -912,6 +923,7 @@ struct cpufreq_governor *cpufreq_default_governor(void)
- cpufreq_governor_init(schedutil_gov);
- 
- #ifdef CONFIG_ENERGY_MODEL
-+#ifndef CONFIG_SCHED_ALT
- extern bool sched_energy_update;
- extern struct mutex sched_energy_mutex;
- 
-@@ -942,4 +954,10 @@ void sched_cpufreq_governor_change(struct cpufreq_policy *policy,
- 	}
- 
- }
-+#else /* CONFIG_SCHED_ALT */
-+void sched_cpufreq_governor_change(struct cpufreq_policy *policy,
-+				  struct cpufreq_governor *old_gov)
-+{
-+}
-+#endif
- #endif
-diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
-index 5a55d2300452..66a0ab7165f0 100644
---- a/kernel/sched/cputime.c
-+++ b/kernel/sched/cputime.c
-@@ -122,7 +122,7 @@ void account_user_time(struct task_struct *p, u64 cputime)
- 	p->utime += cputime;
- 	account_group_user_time(p, cputime);
- 
--	index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
-+	index = task_running_nice(p) ? CPUTIME_NICE : CPUTIME_USER;
- 
- 	/* Add user time to cpustat. */
- 	task_group_account_field(p, index, cputime);
-@@ -146,7 +146,7 @@ void account_guest_time(struct task_struct *p, u64 cputime)
- 	p->gtime += cputime;
- 
- 	/* Add guest time to cpustat. */
--	if (task_nice(p) > 0) {
-+	if (task_running_nice(p)) {
- 		cpustat[CPUTIME_NICE] += cputime;
- 		cpustat[CPUTIME_GUEST_NICE] += cputime;
- 	} else {
-@@ -269,7 +269,7 @@ static inline u64 account_other_time(u64 max)
- #ifdef CONFIG_64BIT
- static inline u64 read_sum_exec_runtime(struct task_struct *t)
- {
--	return t->se.sum_exec_runtime;
-+	return tsk_seruntime(t);
- }
- #else
- static u64 read_sum_exec_runtime(struct task_struct *t)
-@@ -279,7 +279,7 @@ static u64 read_sum_exec_runtime(struct task_struct *t)
- 	struct rq *rq;
- 
- 	rq = task_rq_lock(t, &rf);
--	ns = t->se.sum_exec_runtime;
-+	ns = tsk_seruntime(t);
- 	task_rq_unlock(rq, t, &rf);
- 
- 	return ns;
-@@ -614,7 +614,7 @@ void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev,
- void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
- {
- 	struct task_cputime cputime = {
--		.sum_exec_runtime = p->se.sum_exec_runtime,
-+		.sum_exec_runtime = tsk_seruntime(p),
- 	};
- 
- 	task_cputime(p, &cputime.utime, &cputime.stime);
-diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
-index f324dc36fc43..a6b566bda65b 100644
---- a/kernel/sched/idle.c
-+++ b/kernel/sched/idle.c
-@@ -369,6 +369,7 @@ void cpu_startup_entry(enum cpuhp_state state)
- 		do_idle();
- }
- 
-+#ifndef CONFIG_SCHED_ALT
- /*
-  * idle-task scheduling class.
-  */
-@@ -482,3 +483,4 @@ const struct sched_class idle_sched_class
- 	.switched_to		= switched_to_idle,
- 	.update_curr		= update_curr_idle,
- };
-+#endif
-diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h
-new file mode 100644
-index 000000000000..7fdeace7e8a5
---- /dev/null
-+++ b/kernel/sched/pds.h
-@@ -0,0 +1,14 @@
-+#ifndef PDS_H
-+#define PDS_H
-+
-+/* bits:
-+ * RT(0-99), (Low prio adj range, nice width, high prio adj range) / 2, cpu idle task */
-+#define SCHED_BITS	(MAX_RT_PRIO + 20 + 1)
-+#define IDLE_TASK_SCHED_PRIO	(SCHED_BITS - 1)
-+
-+static inline int task_running_nice(struct task_struct *p)
-+{
-+	return (p->prio > DEFAULT_PRIO);
-+}
-+
-+#endif
-diff --git a/kernel/sched/pds_imp.h b/kernel/sched/pds_imp.h
-new file mode 100644
-index 000000000000..6baee5e961b9
---- /dev/null
-+++ b/kernel/sched/pds_imp.h
-@@ -0,0 +1,257 @@
-+#define ALT_SCHED_VERSION_MSG "sched/pds: PDS CPU Scheduler "ALT_SCHED_VERSION" by Alfred Chen.\n"
-+
-+static const u64 user_prio2deadline[NICE_WIDTH] = {
-+/* -20 */	  4194304,   4613734,   5075107,   5582617,   6140878,
-+/* -15 */	  6754965,   7430461,   8173507,   8990857,   9889942,
-+/* -10 */	 10878936,  11966829,  13163511,  14479862,  15927848,
-+/*  -5 */	 17520632,  19272695,  21199964,  23319960,  25651956,
-+/*   0 */	 28217151,  31038866,  34142752,  37557027,  41312729,
-+/*   5 */	 45444001,  49988401,  54987241,  60485965,  66534561,
-+/*  10 */	 73188017,  80506818,  88557499,  97413248, 107154572,
-+/*  15 */	117870029, 129657031, 142622734, 156885007, 172573507
-+};
-+
-+static const unsigned char dl_level_map[] = {
-+/*       0               4               8              12           */
-+	19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18,
-+/*      16              20              24              28           */
-+	18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 17, 17, 17, 17, 17,
-+/*      32              36              40              44           */
-+	17, 17, 17, 17, 16, 16, 16, 16, 16, 16, 16, 16, 15, 15, 15, 15,
-+/*      48              52              56              60           */
-+	15, 15, 15, 14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 12, 12, 12,
-+/*      64              68              72              76           */
-+	12, 11, 11, 11, 10, 10, 10,  9,  9,  8,  7,  6,  5,  4,  3,  2,
-+/*      80              84              88              92           */
-+	 1,  0
-+};
-+
-+static inline int
-+task_sched_prio(const struct task_struct *p, const struct rq *rq)
-+{
-+	size_t delta;
-+
-+	if (p == rq->idle)
-+		return IDLE_TASK_SCHED_PRIO;
-+
-+	if (p->prio < MAX_RT_PRIO)
-+		return p->prio;
-+
-+	delta = (rq->clock + user_prio2deadline[39] - p->deadline) >> 21;
-+	delta = min((size_t)delta, ARRAY_SIZE(dl_level_map) - 1);
-+
-+	return MAX_RT_PRIO + dl_level_map[delta];
-+}
-+
-+static inline void update_task_priodl(struct task_struct *p)
-+{
-+	p->priodl = (((u64) (p->prio))<<56) | ((p->deadline)>>8);
-+}
-+
-+static inline void requeue_task(struct task_struct *p, struct rq *rq);
-+
-+static inline void time_slice_expired(struct task_struct *p, struct rq *rq)
-+{
-+	/*printk(KERN_INFO "sched: time_slice_expired(%d) - %px\n", cpu_of(rq), p);*/
-+	p->time_slice = sched_timeslice_ns;
-+
-+	if (p->prio >= MAX_RT_PRIO)
-+		p->deadline = rq->clock + user_prio2deadline[TASK_USER_PRIO(p)];
-+	update_task_priodl(p);
-+
-+	if (SCHED_FIFO != p->policy && task_on_rq_queued(p))
-+		requeue_task(p, rq);
-+}
-+
-+/*
-+ * pds_skiplist_task_search -- search function used in PDS run queue skip list
-+ * node insert operation.
-+ * @it: iterator pointer to the node in the skip list
-+ * @node: pointer to the skiplist_node to be inserted
-+ *
-+ * Returns true if key of @it is less or equal to key value of @node, otherwise
-+ * false.
-+ */
-+static inline bool
-+pds_skiplist_task_search(struct skiplist_node *it, struct skiplist_node *node)
-+{
-+	return (skiplist_entry(it, struct task_struct, sl_node)->priodl <=
-+		skiplist_entry(node, struct task_struct, sl_node)->priodl);
-+}
-+
-+/*
-+ * Define the skip list insert function for PDS
-+ */
-+DEFINE_SKIPLIST_INSERT_FUNC(pds_skiplist_insert, pds_skiplist_task_search);
-+
-+/*
-+ * Init the queue structure in rq
-+ */
-+static inline void sched_queue_init(struct rq *rq)
-+{
-+	FULL_INIT_SKIPLIST_NODE(&rq->sl_header);
-+}
-+
-+/*
-+ * Init idle task and put into queue structure of rq
-+ * IMPORTANT: may be called multiple times for a single cpu
-+ */
-+static inline void sched_queue_init_idle(struct rq *rq, struct task_struct *idle)
-+{
-+	/*printk(KERN_INFO "sched: init(%d) - %px\n", cpu_of(rq), idle);*/
-+	int default_prio = idle->prio;
-+
-+	idle->prio = MAX_PRIO;
-+	idle->deadline = 0ULL;
-+	update_task_priodl(idle);
-+
-+	FULL_INIT_SKIPLIST_NODE(&rq->sl_header);
-+
-+	idle->sl_node.level = idle->sl_level;
-+	pds_skiplist_insert(&rq->sl_header, &idle->sl_node);
-+
-+	idle->prio = default_prio;
-+}
-+
-+/*
-+ * This routine assume that the idle task always in queue
-+ */
-+static inline struct task_struct *sched_rq_first_task(struct rq *rq)
-+{
-+	struct skiplist_node *node = rq->sl_header.next[0];
-+
-+	BUG_ON(node == &rq->sl_header);
-+	return skiplist_entry(node, struct task_struct, sl_node);
-+}
-+
-+static inline struct task_struct *
-+sched_rq_next_task(struct task_struct *p, struct rq *rq)
-+{
-+	struct skiplist_node *next = p->sl_node.next[0];
-+
-+	BUG_ON(next == &rq->sl_header);
-+	return skiplist_entry(next, struct task_struct, sl_node);
-+}
-+
-+static inline unsigned long sched_queue_watermark(struct rq *rq)
-+{
-+	return task_sched_prio(sched_rq_first_task(rq), rq);
-+}
-+
-+#define __SCHED_DEQUEUE_TASK(p, rq, flags, func)		\
-+	psi_dequeue(p, flags & DEQUEUE_SLEEP);			\
-+	sched_info_dequeued(rq, p);				\
-+								\
-+	if (skiplist_del_init(&rq->sl_header, &p->sl_node)) {	\
-+		func;						\
-+	}
-+
-+#define __SCHED_ENQUEUE_TASK(p, rq, flags)				\
-+	sched_info_queued(rq, p);					\
-+	psi_enqueue(p, flags);						\
-+									\
-+	p->sl_node.level = p->sl_level;					\
-+	pds_skiplist_insert(&rq->sl_header, &p->sl_node)
-+
-+/*
-+ * Requeue a task @p to @rq
-+ */
-+#define __SCHED_REQUEUE_TASK(p, rq, func)					\
-+{\
-+	bool b_first = skiplist_del_init(&rq->sl_header, &p->sl_node);		\
-+\
-+	p->sl_node.level = p->sl_level;						\
-+	if (pds_skiplist_insert(&rq->sl_header, &p->sl_node) || b_first) {	\
-+		func;								\
-+	}									\
-+}
-+
-+static inline bool sched_task_need_requeue(struct task_struct *p, struct rq *rq)
-+{
-+	struct skiplist_node *node = p->sl_node.prev[0];
-+
-+	if (node != &rq->sl_header) {
-+		struct task_struct *t = skiplist_entry(node, struct task_struct, sl_node);
-+
-+		if (t->priodl > p->priodl)
-+			return true;
-+	}
-+
-+	node = p->sl_node.next[0];
-+	if (node != &rq->sl_header) {
-+		struct task_struct *t = skiplist_entry(node, struct task_struct, sl_node);
-+
-+		if (t->priodl < p->priodl)
-+			return true;
-+	}
-+
-+	return false;
-+}
-+
-+/*
-+ * pds_skiplist_random_level -- Returns a pseudo-random level number for skip
-+ * list node which is used in PDS run queue.
-+ *
-+ * In current implementation, based on testing, the first 8 bits in microseconds
-+ * of niffies are suitable for random level population.
-+ * find_first_bit() is used to satisfy p = 0.5 between each levels, and there
-+ * should be platform hardware supported instruction(known as ctz/clz) to speed
-+ * up this function.
-+ * The skiplist level for a task is populated when task is created and doesn't
-+ * change in task's life time. When task is being inserted into run queue, this
-+ * skiplist level is set to task's sl_node->level, the skiplist insert function
-+ * may change it based on current level of the skip lsit.
-+ */
-+static inline int pds_skiplist_random_level(const struct task_struct *p)
-+{
-+	long unsigned int randseed;
-+
-+	/*
-+	 * 1. Some architectures don't have better than microsecond resolution
-+	 * so mask out ~microseconds as a factor of the random seed for skiplist
-+	 * insertion.
-+	 * 2. Use address of task structure pointer as another factor of the
-+	 * random seed for task burst forking scenario.
-+	 */
-+	randseed = (task_rq(p)->clock ^ (long unsigned int)p) >> 10;
-+
-+	return find_first_bit(&randseed, NUM_SKIPLIST_LEVEL - 1);
-+}
-+
-+static void sched_task_fork(struct task_struct *p, struct rq *rq)
-+{
-+	p->sl_level = pds_skiplist_random_level(p);
-+	if (p->prio >= MAX_RT_PRIO)
-+		p->deadline = rq->clock + user_prio2deadline[TASK_USER_PRIO(p)];
-+	update_task_priodl(p);
-+}
-+
-+/**
-+ * task_prio - return the priority value of a given task.
-+ * @p: the task in question.
-+ *
-+ * Return: The priority value as seen by users in /proc.
-+ * RT tasks are offset by -100. Normal tasks are centered around 1, value goes
-+ * from 0(SCHED_ISO) up to 82 (nice +19 SCHED_IDLE).
-+ */
-+int task_prio(const struct task_struct *p)
-+{
-+	int ret;
-+
-+	if (p->prio < MAX_RT_PRIO)
-+		return (p->prio - MAX_RT_PRIO);
-+
-+	preempt_disable();
-+	ret = task_sched_prio(p, this_rq()) - MAX_RT_PRIO;
-+	preempt_enable();
-+
-+	return ret;
-+}
-+
-+static void do_sched_yield_type_1(struct task_struct *p, struct rq *rq)
-+{
-+	time_slice_expired(p, rq);
-+}
-+
-+static void sched_task_ttwu(struct task_struct *p) {}
-+static void sched_task_deactivate(struct task_struct *p, struct rq *rq) {}
-diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c
-index 2c613e1cff3a..0103b2a7201d 100644
---- a/kernel/sched/pelt.c
-+++ b/kernel/sched/pelt.c
-@@ -270,6 +270,7 @@ ___update_load_avg(struct sched_avg *sa, unsigned long load)
- 	WRITE_ONCE(sa->util_avg, sa->util_sum / divider);
- }
- 
-+#ifndef CONFIG_SCHED_ALT
- /*
-  * sched_entity:
-  *
-@@ -387,8 +388,9 @@ int update_dl_rq_load_avg(u64 now, struct rq *rq, int running)
- 
- 	return 0;
- }
-+#endif
- 
--#ifdef CONFIG_SCHED_THERMAL_PRESSURE
-+#if defined(CONFIG_SCHED_THERMAL_PRESSURE) && !defined(CONFIG_SCHED_ALT)
- /*
-  * thermal:
-  *
-diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h
-index 795e43e02afc..856163dac896 100644
---- a/kernel/sched/pelt.h
-+++ b/kernel/sched/pelt.h
-@@ -1,13 +1,15 @@
- #ifdef CONFIG_SMP
- #include "sched-pelt.h"
- 
-+#ifndef CONFIG_SCHED_ALT
- int __update_load_avg_blocked_se(u64 now, struct sched_entity *se);
- int __update_load_avg_se(u64 now, struct cfs_rq *cfs_rq, struct sched_entity *se);
- int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq);
- int update_rt_rq_load_avg(u64 now, struct rq *rq, int running);
- int update_dl_rq_load_avg(u64 now, struct rq *rq, int running);
-+#endif
- 
--#ifdef CONFIG_SCHED_THERMAL_PRESSURE
-+#if defined(CONFIG_SCHED_THERMAL_PRESSURE) && !defined(CONFIG_SCHED_ALT)
- int update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity);
- 
- static inline u64 thermal_load_avg(struct rq *rq)
-@@ -42,6 +44,7 @@ static inline u32 get_pelt_divider(struct sched_avg *avg)
- 	return LOAD_AVG_MAX - 1024 + avg->period_contrib;
- }
- 
-+#ifndef CONFIG_SCHED_ALT
- /*
-  * When a task is dequeued, its estimated utilization should not be update if
-  * its util_avg has not been updated at least once.
-@@ -162,9 +165,11 @@ static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq)
- 	return rq_clock_pelt(rq_of(cfs_rq));
- }
- #endif
-+#endif /* CONFIG_SCHED_ALT */
- 
- #else
- 
-+#ifndef CONFIG_SCHED_ALT
- static inline int
- update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
- {
-@@ -182,6 +187,7 @@ update_dl_rq_load_avg(u64 now, struct rq *rq, int running)
- {
- 	return 0;
- }
-+#endif
- 
- static inline int
- update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity)
-diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
-index 28709f6b0975..6bc68bacbac8 100644
---- a/kernel/sched/sched.h
-+++ b/kernel/sched/sched.h
-@@ -2,6 +2,10 @@
- /*
-  * Scheduler internal types and methods:
-  */
-+#ifdef CONFIG_SCHED_ALT
-+#include "alt_sched.h"
-+#else
-+
- #include <linux/sched.h>
- 
- #include <linux/sched/autogroup.h>
-@@ -2626,3 +2630,9 @@ static inline bool is_per_cpu_kthread(struct task_struct *p)
- 
- void swake_up_all_locked(struct swait_queue_head *q);
- void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
-+
-+static inline int task_running_nice(struct task_struct *p)
-+{
-+	return (task_nice(p) > 0);
-+}
-+#endif /* !CONFIG_SCHED_ALT */
-diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c
-index 750fb3c67eed..108422ebc7bf 100644
---- a/kernel/sched/stats.c
-+++ b/kernel/sched/stats.c
-@@ -22,8 +22,10 @@ static int show_schedstat(struct seq_file *seq, void *v)
- 	} else {
- 		struct rq *rq;
- #ifdef CONFIG_SMP
-+#ifndef CONFIG_SCHED_ALT
- 		struct sched_domain *sd;
- 		int dcount = 0;
-+#endif
- #endif
- 		cpu = (unsigned long)(v - 2);
- 		rq = cpu_rq(cpu);
-@@ -40,6 +42,7 @@ static int show_schedstat(struct seq_file *seq, void *v)
- 		seq_printf(seq, "\n");
- 
- #ifdef CONFIG_SMP
-+#ifndef CONFIG_SCHED_ALT
- 		/* domain-specific stats */
- 		rcu_read_lock();
- 		for_each_domain(cpu, sd) {
-@@ -68,6 +71,7 @@ static int show_schedstat(struct seq_file *seq, void *v)
- 			    sd->ttwu_move_balance);
- 		}
- 		rcu_read_unlock();
-+#endif
- #endif
- 	}
- 	return 0;
-diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
-index 1bd7e3af904f..cc946a9bd550 100644
---- a/kernel/sched/topology.c
-+++ b/kernel/sched/topology.c
-@@ -4,6 +4,7 @@
-  */
- #include "sched.h"
- 
-+#ifndef CONFIG_SCHED_ALT
- DEFINE_MUTEX(sched_domains_mutex);
- 
- /* Protected by sched_domains_mutex: */
-@@ -1180,8 +1181,10 @@ static void init_sched_groups_capacity(int cpu, struct sched_domain *sd)
-  */
- 
- static int default_relax_domain_level = -1;
-+#endif /* CONFIG_SCHED_ALT */
- int sched_domain_level_max;
- 
-+#ifndef CONFIG_SCHED_ALT
- static int __init setup_relax_domain_level(char *str)
- {
- 	if (kstrtoint(str, 0, &default_relax_domain_level))
-@@ -1413,6 +1416,7 @@ sd_init(struct sched_domain_topology_level *tl,
- 
- 	return sd;
- }
-+#endif /* CONFIG_SCHED_ALT */
- 
- /*
-  * Topology list, bottom-up.
-@@ -1442,6 +1446,7 @@ void set_sched_topology(struct sched_domain_topology_level *tl)
- 	sched_domain_topology = tl;
- }
- 
-+#ifndef CONFIG_SCHED_ALT
- #ifdef CONFIG_NUMA
- 
- static const struct cpumask *sd_numa_mask(int cpu)
-@@ -2316,3 +2321,17 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
- 	partition_sched_domains_locked(ndoms_new, doms_new, dattr_new);
- 	mutex_unlock(&sched_domains_mutex);
- }
-+#else /* CONFIG_SCHED_ALT */
-+void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
-+			     struct sched_domain_attr *dattr_new)
-+{}
-+
-+#ifdef CONFIG_NUMA
-+int __read_mostly		node_reclaim_distance = RECLAIM_DISTANCE;
-+
-+int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
-+{
-+	return best_mask_cpu(cpu, cpus);
-+}
-+#endif /* CONFIG_NUMA */
-+#endif
-diff --git a/kernel/sysctl.c b/kernel/sysctl.c
-index afad085960b8..e91b4cb3042b 100644
---- a/kernel/sysctl.c
-+++ b/kernel/sysctl.c
-@@ -120,6 +120,10 @@ static unsigned long long_max = LONG_MAX;
- static int one_hundred = 100;
- static int two_hundred = 200;
- static int one_thousand = 1000;
-+#ifdef CONFIG_SCHED_ALT
-+static int __maybe_unused zero = 0;
-+extern int sched_yield_type;
-+#endif
- #ifdef CONFIG_PRINTK
- static int ten_thousand = 10000;
- #endif
-@@ -184,7 +188,7 @@ static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
- int sysctl_legacy_va_layout;
- #endif
- 
--#ifdef CONFIG_SCHED_DEBUG
-+#if defined(CONFIG_SCHED_DEBUG) && !defined(CONFIG_SCHED_ALT)
- static int min_sched_granularity_ns = 100000;		/* 100 usecs */
- static int max_sched_granularity_ns = NSEC_PER_SEC;	/* 1 second */
- static int min_wakeup_granularity_ns;			/* 0 usecs */
-@@ -1652,6 +1656,7 @@ int proc_do_static_key(struct ctl_table *table, int write,
- }
- 
- static struct ctl_table kern_table[] = {
-+#ifndef CONFIG_SCHED_ALT
- 	{
- 		.procname	= "sched_child_runs_first",
- 		.data		= &sysctl_sched_child_runs_first,
-@@ -1854,6 +1859,7 @@ static struct ctl_table kern_table[] = {
- 		.extra2		= SYSCTL_ONE,
- 	},
- #endif
-+#endif /* !CONFIG_SCHED_ALT */
- #ifdef CONFIG_PROVE_LOCKING
- 	{
- 		.procname	= "prove_locking",
-@@ -2430,6 +2436,17 @@ static struct ctl_table kern_table[] = {
- 		.proc_handler	= proc_dointvec,
- 	},
- #endif
-+#ifdef CONFIG_SCHED_ALT
-+	{
-+		.procname	= "yield_type",
-+		.data		= &sched_yield_type,
-+		.maxlen		= sizeof (int),
-+		.mode		= 0644,
-+		.proc_handler	= &proc_dointvec_minmax,
-+		.extra1		= &zero,
-+		.extra2		= &two,
-+	},
-+#endif
- #if defined(CONFIG_S390) && defined(CONFIG_SMP)
- 	{
- 		.procname	= "spin_retry",
-diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
-index 95b6a708b040..81f2ee62c807 100644
---- a/kernel/time/hrtimer.c
-+++ b/kernel/time/hrtimer.c
-@@ -1927,8 +1927,10 @@ long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode,
- 	int ret = 0;
- 	u64 slack;
- 
-+#ifndef CONFIG_SCHED_ALT
- 	slack = current->timer_slack_ns;
- 	if (dl_task(current) || rt_task(current))
-+#endif
- 		slack = 0;
- 
- 	hrtimer_init_sleeper_on_stack(&t, clockid, mode);
-diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
-index a71758e34e45..d20c347df861 100644
---- a/kernel/time/posix-cpu-timers.c
-+++ b/kernel/time/posix-cpu-timers.c
-@@ -216,7 +216,7 @@ static void task_sample_cputime(struct task_struct *p, u64 *samples)
- 	u64 stime, utime;
- 
- 	task_cputime(p, &utime, &stime);
--	store_samples(samples, stime, utime, p->se.sum_exec_runtime);
-+	store_samples(samples, stime, utime, tsk_seruntime(p));
- }
- 
- static void proc_sample_cputime_atomic(struct task_cputime_atomic *at,
-@@ -801,6 +801,7 @@ static void collect_posix_cputimers(struct posix_cputimers *pct, u64 *samples,
- 	}
- }
- 
-+#ifndef CONFIG_SCHED_ALT
- static inline void check_dl_overrun(struct task_struct *tsk)
- {
- 	if (tsk->dl.dl_overrun) {
-@@ -808,6 +809,7 @@ static inline void check_dl_overrun(struct task_struct *tsk)
- 		__group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
- 	}
- }
-+#endif
- 
- static bool check_rlimit(u64 time, u64 limit, int signo, bool rt, bool hard)
- {
-@@ -835,8 +837,10 @@ static void check_thread_timers(struct task_struct *tsk,
- 	u64 samples[CPUCLOCK_MAX];
- 	unsigned long soft;
- 
-+#ifndef CONFIG_SCHED_ALT
- 	if (dl_task(tsk))
- 		check_dl_overrun(tsk);
-+#endif
- 
- 	if (expiry_cache_is_inactive(pct))
- 		return;
-@@ -850,7 +854,7 @@ static void check_thread_timers(struct task_struct *tsk,
- 	soft = task_rlimit(tsk, RLIMIT_RTTIME);
- 	if (soft != RLIM_INFINITY) {
- 		/* Task RT timeout is accounted in jiffies. RTTIME is usec */
--		unsigned long rttime = tsk->rt.timeout * (USEC_PER_SEC / HZ);
-+		unsigned long rttime = tsk_rttimeout(tsk) * (USEC_PER_SEC / HZ);
- 		unsigned long hard = task_rlimit_max(tsk, RLIMIT_RTTIME);
- 
- 		/* At the hard limit, send SIGKILL. No further action. */
-@@ -1086,8 +1090,10 @@ static inline bool fastpath_timer_check(struct task_struct *tsk)
- 			return true;
- 	}
- 
-+#ifndef CONFIG_SCHED_ALT
- 	if (dl_task(tsk) && tsk->dl.dl_overrun)
- 		return true;
-+#endif
- 
- 	return false;
- }
-diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
-index b5e3496cf803..65f60c77bc50 100644
---- a/kernel/trace/trace_selftest.c
-+++ b/kernel/trace/trace_selftest.c
-@@ -1048,10 +1048,15 @@ static int trace_wakeup_test_thread(void *data)
- {
- 	/* Make this a -deadline thread */
- 	static const struct sched_attr attr = {
-+#ifdef CONFIG_SCHED_ALT
-+		/* No deadline on BMQ/PDS, use RR */
-+		.sched_policy = SCHED_RR,
-+#else
- 		.sched_policy = SCHED_DEADLINE,
- 		.sched_runtime = 100000ULL,
- 		.sched_deadline = 10000000ULL,
- 		.sched_period = 10000000ULL
-+#endif
- 	};
- 	struct wakeup_test_data *x = data;
- 
-diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c
-index f36264fea75c6ca7c34eaa259c0bff829cbf6ac0..d43ca62fd00fe442bda9b4ad548fae432a7436de 100644
---- a/kernel/sched/alt_core.c
-+++ b/kernel/sched/alt_core.c
-@@ -11,6 +11,10 @@
-  *		scheduler by Alfred Chen.
-  *  2019-02-20	BMQ(BitMap Queue) kernel scheduler by Alfred Chen.
-  */
-+#define CREATE_TRACE_POINTS
-+#include <trace/events/sched.h>
-+#undef CREATE_TRACE_POINTS
-+
- #include "sched.h"
- 
- #include <linux/sched/rt.h>
-@@ -42,8 +46,11 @@
- #include "pelt.h"
- #include "smp.h"
- 
--#define CREATE_TRACE_POINTS
--#include <trace/events/sched.h>
-+/*
-+ * Export tracepoints that act as a bare tracehook (ie: have no trace event
-+ * associated with them) to allow external modules to probe them.
-+ */
-+EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp);
- 
- #define ALT_SCHED_VERSION "v5.9-r0"
- 
-diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h
-index 99be2c51c88d0406cced20b36d7230da12930a5c..03f8b8b1aa27eeb15989af25b4050c767da12aad 100644
---- a/kernel/sched/alt_sched.h
-+++ b/kernel/sched/alt_sched.h
-@@ -46,6 +46,8 @@
- 
- #include "cpupri.h"
- 
-+#include <trace/events/sched.h>
-+
- #ifdef CONFIG_SCHED_BMQ
- #include "bmq.h"
- #endif
-@@ -496,6 +498,8 @@ static inline int sched_tick_offload_init(void) { return 0; }
- 
- extern void schedule_idle(void);
- 
-+#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
-+
- /*
-  * !! For sched_setattr_nocheck() (kernel) only !!
-  *
diff --git a/linux510-rc-tkg/linux510-tkg-patches/0011-ZFS-fix.patch b/linux510-rc-tkg/linux510-tkg-patches/0011-ZFS-fix.patch
deleted file mode 100644
index af71d04..0000000
--- a/linux510-rc-tkg/linux510-tkg-patches/0011-ZFS-fix.patch
+++ /dev/null
@@ -1,43 +0,0 @@
-From 1e010beda2896bdf3082fb37a3e49f8ce20e04d8 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?J=C3=B6rg=20Thalheim?= <joerg@thalheim.io>
-Date: Thu, 2 May 2019 05:28:08 +0100
-Subject: [PATCH] x86/fpu: Export kernel_fpu_{begin,end}() with
- EXPORT_SYMBOL_GPL
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-We need these symbols in zfs as the fpu implementation breaks userspace:
-
-https://github.com/zfsonlinux/zfs/issues/9346
-Signed-off-by: Jörg Thalheim <joerg@thalheim.io>
----
- arch/x86/kernel/fpu/core.c | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
-index 12c70840980e..352538b3bb5d 100644
---- a/arch/x86/kernel/fpu/core.c
-+++ b/arch/x86/kernel/fpu/core.c
-@@ -102,7 +102,7 @@ void kernel_fpu_begin(void)
- 	}
- 	__cpu_invalidate_fpregs_state();
- }
--EXPORT_SYMBOL_GPL(kernel_fpu_begin);
-+EXPORT_SYMBOL(kernel_fpu_begin);
- 
- void kernel_fpu_end(void)
- {
-@@ -111,7 +111,7 @@ void kernel_fpu_end(void)
- 	this_cpu_write(in_kernel_fpu, false);
- 	preempt_enable();
- }
--EXPORT_SYMBOL_GPL(kernel_fpu_end);
-+EXPORT_SYMBOL(kernel_fpu_end);
- 
- /*
-  * Save the FPU state (mark it for reload if necessary):
--- 
-2.23.0
-
-
diff --git a/linux510-rc-tkg/linux510-tkg-patches/0012-misc-additions.patch b/linux510-rc-tkg/linux510-tkg-patches/0012-misc-additions.patch
deleted file mode 100644
index a4efaef..0000000
--- a/linux510-rc-tkg/linux510-tkg-patches/0012-misc-additions.patch
+++ /dev/null
@@ -1,54 +0,0 @@
-diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig
-index 0840d27381ea..73aba9a31064 100644
---- a/drivers/tty/Kconfig
-+++ b/drivers/tty/Kconfig
-@@ -75,6 +75,19 @@ config VT_CONSOLE_SLEEP
- 	def_bool y
- 	depends on VT_CONSOLE && PM_SLEEP
- 
-+config NR_TTY_DEVICES
-+        int "Maximum tty device number"
-+        depends on VT
-+        range 12 63
-+        default 63
-+        help
-+          This option is used to change the number of tty devices in /dev.
-+          The default value is 63. The lowest number you can set is 12,
-+          63 is also the upper limit so we don't overrun the serial
-+          consoles.
-+
-+          If unsure, say 63.
-+
- config HW_CONSOLE
- 	bool
- 	depends on VT && !UML
-diff --git a/include/uapi/linux/vt.h b/include/uapi/linux/vt.h
-index e9d39c48520a..3bceead8da40 100644
---- a/include/uapi/linux/vt.h
-+++ b/include/uapi/linux/vt.h
-@@ -3,12 +3,25 @@
- #define _UAPI_LINUX_VT_H
- 
- 
-+/*
-+ * We will make this definition solely for the purpose of making packages
-+ * such as splashutils build, because they can not understand that
-+ * NR_TTY_DEVICES is defined in the kernel configuration.
-+ */
-+#ifndef CONFIG_NR_TTY_DEVICES
-+#define CONFIG_NR_TTY_DEVICES 63
-+#endif
-+
- /*
-  * These constants are also useful for user-level apps (e.g., VC
-  * resizing).
-  */
- #define MIN_NR_CONSOLES 1       /* must be at least 1 */
--#define MAX_NR_CONSOLES	63	/* serial lines start at 64 */
-+/*
-+ * NR_TTY_DEVICES:
-+ * Value MUST be at least 12 and must never be higher then 63
-+ */
-+#define MAX_NR_CONSOLES CONFIG_NR_TTY_DEVICES	/* serial lines start above this */
- 		/* Note: the ioctl VT_GETSTATE does not work for
- 		   consoles 16 and higher (since it returns a short) */
\ No newline at end of file
diff --git a/linux54-tkg/PKGBUILD b/linux54-tkg/PKGBUILD
deleted file mode 100644
index 867a107..0000000
--- a/linux54-tkg/PKGBUILD
+++ /dev/null
@@ -1,423 +0,0 @@
-# Based on the file created for Arch Linux by:
-# Tobias Powalowski <tpowa@archlinux.org>
-# Thomas Baechler <thomas@archlinux.org>
-
-# Contributor: Tk-Glitch <ti3nou at gmail dot com>
-
-plain '       .---.`               `.---.'
-plain '    `/syhhhyso-           -osyhhhys/`'
-plain '   .syNMdhNNhss/``.---.``/sshNNhdMNys.'
-plain '   +sdMh.`+MNsssssssssssssssNM+`.hMds+'
-plain '   :syNNdhNNhssssssssssssssshNNhdNNys:'
-plain '    /ssyhhhysssssssssssssssssyhhhyss/'
-plain '    .ossssssssssssssssssssssssssssso.'
-plain '   :sssssssssssssssssssssssssssssssss:'
-plain '  /sssssssssssssssssssssssssssssssssss/'
-plain ' :sssssssssssssoosssssssoosssssssssssss:'
-plain ' osssssssssssssoosssssssoossssssssssssso'
-plain ' osssssssssssyyyyhhhhhhhyyyyssssssssssso'
-plain ' /yyyyyyhhdmmmmNNNNNNNNNNNmmmmdhhyyyyyy/'
-plain '  smmmNNNNNNNNNNNNNNNNNNNNNNNNNNNNNmmms'
-plain '   /dNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNd/'
-plain '    `:sdNNNNNNNNNNNNNNNNNNNNNNNNNds:`'
-plain '       `-+shdNNNNNNNNNNNNNNNdhs+-`'
-plain '             `.-:///////:-.`'
-
-_where="$PWD" # track basedir as different Arch based distros are moving srcdir around
-
-cp "$_where"/linux54-tkg-patches/* "$_where" # copy patches inside the PKGBUILD's dir to preserve makepkg sourcing and md5sum checking
-cp "$_where"/linux54-tkg-config/* "$_where" # copy config files and hooks inside the PKGBUILD's dir to preserve makepkg sourcing and md5sum checking
-
-source "$_where"/customization.cfg # load default configuration from file
-
-# Load external configuration file if present. Available variable values will overwrite customization.cfg ones.
-if [ -e "$_EXT_CONFIG_PATH" ]; then
-  source "$_EXT_CONFIG_PATH" && msg2 "External configuration file $_EXT_CONFIG_PATH will be used to override customization.cfg values." && msg2 ""
-fi
-
-if [ -z "$_OPTIPROFILE" ] && [ ! -e "$_where"/cpuschedset ]; then
-  # Prompt about optimized configurations. Available variable values will overwrite customization.cfg/external config ones.
-  plain "Do you want to use a predefined optimized profile?"
-  read -rp "`echo $'  > 1.Custom\n    2.Ryzen Desktop (Performance)\n    3.Other Desktop (Performance)\nchoice[1-3?]: '`" _OPTIPROFILE;
-fi
-if [ "$_OPTIPROFILE" == "2" ]; then
-  source "$_where"/ryzen-desktop-profile.cfg && msg2 "Ryzen Desktop (Performance) profile will be used." && msg2 ""
-elif [ "$_OPTIPROFILE" == "3" ]; then
-  source "$_where"/generic-desktop-profile.cfg && msg2 "Generic Desktop (Performance) profile will be used." && msg2 ""
-fi
-
-# source cpuschedset early if present
-if [ -e "$_where"/cpuschedset ]; then
-  source "$_where"/cpuschedset
-fi
-
-# CPU SCHED selector
-if [ -z "$_cpusched" ] && [ ! -e "$_where"/cpuschedset ]; then
-  plain "What CPU sched variant do you want to build/install?"
-  read -rp "`echo $'  > 1.PDS\n    2.MuQSS\n    3.BMQ\n    4.CFS\nchoice[1-4?]: '`" CONDITION;
-  if [ "$CONDITION" == "2" ]; then
-    echo "_cpusched=\"MuQSS\"" > "$_where"/cpuschedset
-  elif [ "$CONDITION" == "3" ]; then
-    echo "_cpusched=\"bmq\"" > "$_where"/cpuschedset
-  elif [ "$CONDITION" == "4" ]; then
-    echo "_cpusched=\"cfs\"" > "$_where"/cpuschedset
-  else
-    echo "_cpusched=\"pds\"" > "$_where"/cpuschedset
-  fi
-  if [ -n "$_custom_pkgbase" ]; then
-    echo "_custom_pkgbase=\"${_custom_pkgbase}\"" >> "$_where"/cpuschedset
-  fi
-elif [ "$_cpusched" == "muqss" ] || [ "$_cpusched" == "MuQSS" ]; then
-  echo "_cpusched=\"MuQSS\"" > "$_where"/cpuschedset
-elif [ "$_cpusched" == "pds" ]; then
-  echo "_cpusched=\"pds\"" > "$_where"/cpuschedset
-elif [ "$_cpusched" == "bmq" ]; then
-  echo "_cpusched=\"bmq\"" > "$_where"/cpuschedset
-else
-  if [ "$_nofallback" != "true" ]; then
-    warning "Something is wrong with your cpusched selection. Do you want to fallback to CFS (default)?"
-    read -rp "`echo $'    > N/y : '`" _fallback;
-  fi
-  if [[ "$_fallback" =~ [yY] ]] || [ "$_nofallback" == "true" ]; then
-    echo "_cpusched=\"cfs\"" > "$_where"/cpuschedset
-  else
-    error "Exiting..."
-    exit 1
-  fi
-fi
-
-source "$_where"/cpuschedset
-
-_basever=54
-if [ -n "$_custom_pkgbase" ]; then
-  pkgbase="${_custom_pkgbase}"
-else
-  pkgbase=linux"${_basever}"-tkg-"${_cpusched}"
-fi
-pkgname=("${pkgbase}" "${pkgbase}-headers")
-_basekernel=5.4
-_sub=72
-pkgver="${_basekernel}"."${_sub}"
-pkgrel=91
-pkgdesc='Linux-tkg'
-arch=('x86_64') # no i686 in here
-url="http://www.kernel.org/"
-license=('GPL2')
-makedepends=('xmlto' 'docbook-xsl' 'kmod' 'inetutils' 'bc' 'libelf' 'patchutils' 'flex' 'python-sphinx' 'python-sphinx_rtd_theme' 'graphviz' 'imagemagick' 'git')
-optdepends=('schedtool')
-options=('!strip' 'docs')
-source=("https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-${_basekernel}.tar.xz"
-        "https://cdn.kernel.org/pub/linux/kernel/v5.x/patch-${pkgver}.xz"
-        "https://raw.githubusercontent.com/graysky2/kernel_gcc_patch/master/enable_additional_cpu_optimizations_for_gcc_v10.1%2B_kernel_v4.19-v5.4.patch"
-        'config.x86_64' # stock Arch config
-        'config_hardened.x86_64' # hardened Arch config
-        90-cleanup.hook
-        cleanup
-        # ARCH Patches
-        0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch
-        # TkG
-        0002-clear-patches.patch
-        0003-glitched-base.patch
-        0003-glitched-cfs.patch
-        0004-glitched-ondemand-muqss.patch
-        0004-glitched-muqss.patch
-        0004-5.4-ck1.patch
-        0005-glitched-ondemand-pds.patch
-        0005-glitched-pds.patch
-        0005-v5.4_undead-pds099o.patch
-        0006-add-acs-overrides_iommu.patch
-        0007-v5.4-fsync.patch
-        #0008-5.4-bcachefs.patch
-        0009-glitched-bmq.patch
-        0009-bmq_v5.4-r2.patch
-        0011-ZFS-fix.patch
-        0012-linux-hardened.patch
-)
-sha256sums=('bf338980b1670bca287f9994b7441c2361907635879169c64ae78364efc5f491'
-            'bce941bcb6c8148ac19cd2fa4f1e19c6c75f699a3bcdfd452df7484cff2a2353'
-            '27b7fc535ade94b636c3ec4e809e141831e9465a0ef55215a9852b87048629e2'
-            '55dd5117c1da17c9ec38d7bc995958958bcc8b7ebcfd81de1d4c7650b85537ab'
-            '1f4a20d6eaaa0d969af93152a65191492400c6aa838fc1c290b0dd29bb6019d8'
-            '1e15fc2ef3fa770217ecc63a220e5df2ddbcf3295eb4a021171e7edd4c6cc898'
-            '66a03c246037451a77b4d448565b1d7e9368270c7d02872fbd0b5d024ed0a997'
-            '31dc68e84aecfb7d069efb1305049122c65694676be8b955634abcf0675922a2'
-            'd02bf5ca08fd610394b9d3a0c3b176d74af206f897dee826e5cbaec97bb4a4aa'
-            '156a2c75fd228920e3c3da5e04a110afa403951bdfbb85772c2fd4b82fd24d61'
-            '7058e57fd68367b029adc77f2a82928f1433daaf02c8c279cb2d13556c8804d7'
-            'c605f638d74c61861ebdc36ebd4cb8b6475eae2f6273e1ccb2bbb3e10a2ec3fe'
-            'bc69d6e5ee8172b0242c8fa72d13cfe2b8d2b6601468836908a7dfe8b78a3bbb'
-            '815974c65f47301d2a5d1577bf95e8a4b54cad7d77f226e0065f83e763837c48'
-            '62496f9ca788996181ef145f96ad26291282fcc3fb95cdc04080dcf84365be33'
-            'eac7e5d6201528e64f4bdf5e286c842511e1afc52e1518dc8e7d11932bbe0a99'
-            'db03fbd179ec78941eefe1c0edde4c19071bc603511d0b5c06c04e412994b62e'
-            '19661ec0d39f9663452b34433214c755179894528bf73a42f6ba52ccf572832a'
-            '2d9260b80b43bbd605cf420d6bd53aa7262103dfd77196ba590ece5600b6dc0d'
-            '3832f828a9f402b153fc9a6829c5a4eaf6091804bcda3a0423c8e1b57e26420d'
-            '6a6a736cf1b3513d108bfd36f60baf50bb36b33aec21ab0d0ffad13602b7ff75'
-            '49262ce4a8089fa70275aad742fc914baa28d9c384f710c9a62f64796d13e104'
-            'aeb31404c26ee898d007b1f66cb9572c9884ad8eca14edc4587d68f6cba6de46')
-
-export KBUILD_BUILD_HOST=archlinux
-export KBUILD_BUILD_USER=$pkgbase
-export KBUILD_BUILD_TIMESTAMP="$(date -Ru${SOURCE_DATE_EPOCH:+d @$SOURCE_DATE_EPOCH})"
-
-prepare() {
-  rm -rf $pkgdir # Nuke the entire pkg folder so it'll get regenerated clean on next build
-
-  ln -s "${_where}/customization.cfg" "${srcdir}" # workaround
-
-  cd "${srcdir}/linux-${_basekernel}"
-
-  source "$_where/linux$_basever-tkg-config/prepare"
-  _tkg_srcprep
-}
-
-build() {
-  cd "${srcdir}/linux-${_basekernel}"
-
-  # Use custom compiler paths if defined
-  if [ -n "${CUSTOM_GCC_PATH}" ]; then
-    PATH=${CUSTOM_GCC_PATH}/bin:${CUSTOM_GCC_PATH}/lib:${CUSTOM_GCC_PATH}/include:${PATH}
-  fi
-
-  if [ "$_force_all_threads" == "true" ]; then
-    _force_all_threads=-j$(nproc)
-  else
-    _force_all_threads=${MAKEFLAGS}
-  fi
-
-  # ccache
-  if [ "$_noccache" != "true" ] && pacman -Qq ccache &> /dev/null; then
-    export PATH="/usr/lib/ccache/bin/:$PATH"
-    export CCACHE_SLOPPINESS="file_macro,locale,time_macros"
-    export CCACHE_NOHASHDIR="true"
-    msg2 'ccache was found and will be used'
-  fi
-
-  # document the TkG variables, excluding "_", "_EXT_CONFIG_PATH", and "_where".
-  declare -p | cut -d ' ' -f 3 | grep -P '^_(?!=|EXT_CONFIG_PATH|where)' > "${srcdir}/customization-full.cfg"
-
-  # build!
-  _runtime=$( time ( schedtool -B -n 1 -e ionice -n 1 make $_force_all_threads LOCALVERSION= bzImage modules 2>&1 ) 3>&1 1>&2 2>&3 ) || _runtime=$( time ( make $_force_all_threads LOCALVERSION= bzImage modules 2>&1 ) 3>&1 1>&2 2>&3 )
-}
-
-hackbase() {
-  pkgdesc="The $pkgdesc kernel and modules"
-  depends=('coreutils' 'kmod' 'initramfs')
-  optdepends=('linux-docs: Kernel hackers manual - HTML documentation that comes with the Linux kernel.'
-              'crda: to set the correct wireless channels of your country.'
-              'linux-firmware: Firmware files for Linux'
-              'modprobed-db: Keeps track of EVERY kernel module that has ever been probed. Useful for make localmodconfig.'
-              'nvidia-tkg: NVIDIA drivers for all installed kernels - non-dkms version.'
-              'nvidia-dkms-tkg: NVIDIA drivers for all installed kernels - dkms version.'
-              'update-grub: Simple wrapper around grub-mkconfig.')
-  provides=("linux=${pkgver}" "${pkgbase}")
-
-  cd "${srcdir}/linux-${_basekernel}"
-
-  # get kernel version
-  local _kernver="$(<version)"
-  local modulesdir="$pkgdir/usr/lib/modules/$_kernver"
-
-  msg2 "Installing boot image..."
-  # systemd expects to find the kernel here to allow hibernation
-  # https://github.com/systemd/systemd/commit/edda44605f06a41fb86b7ab8128dcf99161d2344
-  install -Dm644 "$(make -s image_name)" "$modulesdir/vmlinuz"
-
-  # Used by mkinitcpio to name the kernel
-  echo "$pkgbase" | install -Dm644 /dev/stdin "$modulesdir/pkgbase"
-
-  msg2 "Installing modules..."
-  make INSTALL_MOD_PATH="$pkgdir/usr" modules_install
-
-  # remove build and source links
-  rm "$modulesdir"/{source,build}
-
-  # install cleanup pacman hook and script
-  sed -e "s|cleanup|${pkgbase}-cleanup|g" "${srcdir}"/90-cleanup.hook |
-    install -Dm644 /dev/stdin "${pkgdir}/usr/share/libalpm/hooks/90-${pkgbase}.hook"
-  install -Dm755 "${srcdir}"/cleanup "${pkgdir}/usr/share/libalpm/scripts/${pkgbase}-cleanup"
-
-  # install customization file, for reference
-  install -Dm644 "${srcdir}"/customization-full.cfg "${pkgdir}/usr/share/doc/${pkgbase}/customization.cfg"
-
-  msg2 "Fixing permissions..."
-  chmod -Rc u=rwX,go=rX "$pkgdir"
-}
-
-hackheaders() {
-  pkgdesc="Headers and scripts for building modules for the $pkgdesc kernel"
-  provides=("linux-headers=${pkgver}" "${pkgbase}-headers=${pkgver}")
-
-  cd "${srcdir}/linux-${_basekernel}"
-  local builddir="${pkgdir}/usr/lib/modules/$(<version)/build"
-
-  msg2 "Installing build files..."
-  install -Dt "$builddir" -m644 .config Makefile Module.symvers System.map \
-    localversion.* version vmlinux
-  install -Dt "$builddir/kernel" -m644 kernel/Makefile
-  install -Dt "$builddir/arch/x86" -m644 arch/x86/Makefile
-  cp -t "$builddir" -a scripts
-
-  # add objtool for external module building and enabled VALIDATION_STACK option
-  install -Dt "$builddir/tools/objtool" tools/objtool/objtool
-
-  # add xfs and shmem for aufs building
-  mkdir -p "$builddir"/{fs/xfs,mm}
-
-  msg2 "Installing headers..."
-  cp -t "$builddir" -a include
-  cp -t "$builddir/arch/x86" -a arch/x86/include
-  install -Dt "$builddir/arch/x86/kernel" -m644 arch/x86/kernel/asm-offsets.s
-
-  install -Dt "$builddir/drivers/md" -m644 drivers/md/*.h
-  install -Dt "$builddir/net/mac80211" -m644 net/mac80211/*.h
-
-  # http://bugs.archlinux.org/task/13146
-  install -Dt "$builddir/drivers/media/i2c" -m644 drivers/media/i2c/msp3400-driver.h
-
-  # http://bugs.archlinux.org/task/20402
-  install -Dt "$builddir/drivers/media/usb/dvb-usb" -m644 drivers/media/usb/dvb-usb/*.h
-  install -Dt "$builddir/drivers/media/dvb-frontends" -m644 drivers/media/dvb-frontends/*.h
-  install -Dt "$builddir/drivers/media/tuners" -m644 drivers/media/tuners/*.h
-
-  msg2 "Installing KConfig files..."
-  find . -name 'Kconfig*' -exec install -Dm644 {} "$builddir/{}" \;
-
-  msg2 "Removing unneeded architectures..."
-  local arch
-  for arch in "$builddir"/arch/*/; do
-    [[ $arch = */x86/ ]] && continue
-    echo "Removing $(basename "$arch")"
-    rm -r "$arch"
-  done
-
-  msg2 "Removing documentation..."
-  rm -r "$builddir/Documentation"
-
-  msg2 "Removing broken symlinks..."
-  find -L "$builddir" -type l -printf 'Removing %P\n' -delete
-
-  msg2 "Removing loose objects..."
-  find "$builddir" -type f -name '*.o' -printf 'Removing %P\n' -delete
-
-  msg2 "Stripping build tools..."
-  local file
-  while read -rd '' file; do
-    case "$(file -bi "$file")" in
-      application/x-sharedlib\;*)      # Libraries (.so)
-        strip -v $STRIP_SHARED "$file" ;;
-      application/x-archive\;*)        # Libraries (.a)
-        strip -v $STRIP_STATIC "$file" ;;
-      application/x-executable\;*)     # Binaries
-        strip -v $STRIP_BINARIES "$file" ;;
-      application/x-pie-executable\;*) # Relocatable binaries
-        strip -v $STRIP_SHARED "$file" ;;
-    esac
-  done < <(find "$builddir" -type f -perm -u+x ! -name vmlinux -print0)
-
-  msg2 "Adding symlink..."
-  mkdir -p "$pkgdir/usr/src"
-  ln -sr "$builddir" "$pkgdir/usr/src/$pkgbase"
-
-  msg2 "Fixing permissions..."
-  chmod -Rc u=rwX,go=rX "$pkgdir"
-
-  if [ $_NUKR == "true" ]; then
-    rm -rf "$srcdir" # Nuke the entire src folder so it'll get regenerated clean on next build
-  fi
-}
-
-source /dev/stdin <<EOF
-package_${pkgbase}() {
-hackbase
-}
-
-package_${pkgbase}-headers() {
-hackheaders
-}
-EOF
-
-function exit_cleanup {
-  # Remove state tracker
-  rm -f "$_where"/cpuschedset
-  
-  # Remove temporarily copied files
-  rm -rf "$_where"/*.patch
-  rm -rf "$_where"/*-profile.cfg
-  rm -f "$_where"/config*
-  rm -f "$_where"/*.hook
-  rm -f "$_where"/cleanup
-  rm -f "$_where"/prepare
-
-  # Community patches removal in case of failure
-  for _p in ${_community_patches[@]}; do
-    rm -f "$_where"/"$_p"
-  done
-
-  if [ "$_NUKR" == "true" ]; then
-    rm -rf "$_where"/src/*
-    # Double tap
-    rm -rf "$srcdir"/linux-*
-    rm -rf "$srcdir"/*.xz
-    rm -rf "$srcdir"/*.patch
-    rm -rf "$srcdir"/*-profile.cfg
-    rm -f "$srcdir"/config.x86_64
-    rm -f "$srcdir"/customization.cfg
-  else
-    # Meh
-    rm -rf "$srcdir"/linux-${_basekernel}/Documentation/filesystems/aufs/*
-    rm -f "$srcdir"/linux-${_basekernel}/Documentation/ABI/testing/*-aufs
-    rm -rf "$srcdir"/linux-${_basekernel}/fs/aufs/*
-    rm -f "$srcdir"/linux-${_basekernel}/include/uapi/linux/aufs*
-
-    rm -f "$srcdir"/linux-${_basekernel}/mm/prfile.c
-
-    rm -f "$srcdir"/linux-${_basekernel}/block/bfq*
-    rm -f "$srcdir"/linux-${_basekernel}/Documentation/tp_smapi.txt
-
-    rm -f "$srcdir"/linux-${_basekernel}/drivers/platform/x86/thinkpad_ec.c
-    rm -f "$srcdir"/linux-${_basekernel}/include/linux/thinkpad_ec.h
-    rm -f "$srcdir"/linux-${_basekernel}/drivers/platform/x86/tp_smapi.c
-
-    rm -rf "$srcdir"/linux-${_basekernel}/drivers/scsi/vhba/*
-
-    rm -rf "$srcdir"/linux-${_basekernel}/fs/exfat/*
-    rm -f "$srcdir"/linux-${_basekernel}/include/trace/events/fs.h
-
-    rm -f "$srcdir"/linux-${_basekernel}/Documentation/scheduler/sched-PDS-mq.txt
-    rm -f "$srcdir"/linux-${_basekernel}/include/linux/skip_list.h
-    rm -f "$srcdir"/linux-${_basekernel}/kernel/sched/pds.c
-    rm -f "$srcdir"/linux-${_basekernel}/kernel/sched/pds_sched.h
-
-    rm -f "$srcdir"/linux-${_basekernel}/Documentation/scheduler/sched-BMQ.txt
-    rm -f "$srcdir"/linux-${_basekernel}/kernel/sched/bmq.c
-    rm -f "$srcdir"/linux-${_basekernel}/kernel/sched/bmq_sched.h
-
-    rm -f "$srcdir"/linux-${_basekernel}/Documentation/scheduler/sched-BFS.txt
-    rm -f "$srcdir"/linux-${_basekernel}/Documentation/scheduler/sched-MuQSS.txt
-    rm -rf "$srcdir"/linux-${_basekernel}/arch/blackfin/*
-    rm -f "$srcdir"/linux-${_basekernel}/arch/powerpc/configs/c2k_defconfig
-    rm -f "$srcdir"/linux-${_basekernel}/arch/score/configs/spct6600_defconfig
-    rm -f "$srcdir"/linux-${_basekernel}/arch/tile/configs/tilegx_defconfig
-    rm -f "$srcdir"/linux-${_basekernel}/arch/tile/configs/tilepro_defconfig
-    rm -f "$srcdir"/linux-${_basekernel}/drivers/staging/lustre/lnet/lnet/lib-eq.c
-    rm -f "$srcdir"/linux-${_basekernel}/kernel/sched/MuQSS*
-    rm -f "$srcdir"/linux-${_basekernel}/kernel/skip_list.c
-
-    rm -f "$srcdir"/linux-${_basekernel}/Documentation/vm/uksm.txt
-    rm -f "$srcdir"/linux-${_basekernel}/include/linux/sradix-tree.h
-    rm -f "$srcdir"/linux-${_basekernel}/include/linux/uksm.h
-    rm -f "$srcdir"/linux-${_basekernel}/lib/sradix-tree.c
-    rm -f "$srcdir"/linux-${_basekernel}/mm/uksm.c
-  fi
-
-  remove_deps
-
-  msg2 'exit cleanup done\n'
-  if [ -n "$_runtime" ]; then
-    msg2 "compilation time : \n$_runtime"
-  fi
-}
-
-trap exit_cleanup EXIT
diff --git a/linux54-tkg/README.md b/linux54-tkg/README.md
deleted file mode 100644
index 68b5bee..0000000
--- a/linux54-tkg/README.md
+++ /dev/null
@@ -1,45 +0,0 @@
-**intel_pstate seems to perform poorly lately. If you are affected, you can add `intel_pstate=passive` to your kernel command line (in GRUB for example) to make use of acpi_cpufreq governors while keeping full support for turbo frequencies.**
-
-A custom Linux kernel 5.4.y with specific PDS, MuQSS and BMQ CPU schedulers related patchsets selector (stock CFS is also an option) and added tweaks for a nice interactivity/performance balance, aiming for the best gaming experience.
-
-Various personalization options available and userpatches support (put your own patches in the same dir as the PKGBUILD, with the ".mypatch" extension). The options built with are installed to `/usr/share/doc/$pkgbase/customization.cfg`, where `$pkgbase` is the package name.
-
-MuQSS : http://ck-hack.blogspot.com/
-
-BMQ : http://cchalpha.blogspot.com/
-
-PDS-mq was originally created by Alfred Chen : http://cchalpha.blogspot.com/
-While he dropped it with kernel 5.1 in favor of its BMQ evolution/rework, my pretty bad gaming experiences with BMQ up to this point convinced me to keep PDS afloat for as long as it'll make sense/I'll be able to.
-
-You can find prebuilts on chaotic-aur, but if you need the extra-spice of per-arch optimized prebuilts, you can find PDS and MuQSS variants daily builds here : https://repo.kitsuna.net/ - Thanks to LordKitsuna.
-
-Comes with a slightly modified Arch config asking for a few core personalization settings at compilation time.
-If you want to streamline your kernel config for lower footprint and faster compilations : https://wiki.archlinux.org/index.php/Modprobed-db
-You can enable support for it at the beginning of the PKGBUILD file. Make sure to read everything you need to know about it.
-
-## Other stuff included:
-- Graysky's per-CPU-arch native optimizations - https://github.com/graysky2/kernel_gcc_patch
-- built-in -O3 optimization and -O3 specific fixes
-- memory management and swapping tweaks
-- scheduling tweaks
-- using prefered raid6 gen function directly
-- using lz4 algo for zswap by default
-- built-in Thinkpad hardware functions driver / embedded controller LPC3 functions / SMAPI support
-- absolute mode for adb trackpads
-- optional "Zenify" patchset using core blk, mm and scheduler tweaks from Zen
-- CFS tweaks
-- using yeah TCP congestion algo by default
-- using cake network queue management system
-- using vm.max_map_count=262144 by default
-- intel E1000 fixes
-- cherry-picked clear linux patches
-- **optional** overrides for missing ACS capabilities
-- **optional** ZFS fpu symbols
-- **optional** Fsync support (proton)
-
-
-```
-git clone https://github.com/Frogging-Family/linux-tkg.git
-cd linux-tkg/linux54-tkg
-makepkg -si
-```
diff --git a/linux54-tkg/customization.cfg b/linux54-tkg/customization.cfg
deleted file mode 100644
index 7e6910e..0000000
--- a/linux54-tkg/customization.cfg
+++ /dev/null
@@ -1,172 +0,0 @@
-# linux54-TkG config file
-
-
-#### MISC OPTIONS #### 
-
-# External config file to use - If the given file exists in path, it will override default config (customization.cfg) - Default is ~/.config/frogminer/linux52-tkg.cfg
-_EXT_CONFIG_PATH=~/.config/frogminer/linux54-tkg.cfg
-
-# Set to anything else than "true" to limit cleanup operations and keep source and files generated during compilation.
-# Default is "true".
-_NUKR="true"
-
-# Custom compiler root dirs - Leave empty to use system compilers
-# Example: CUSTOM_GCC_PATH="/home/frog/PKGBUILDS/mostlyportable-gcc/gcc-mostlyportable-9.2.0"
-CUSTOM_GCC_PATH=""
-
-# Set to the number corresponding to a predefined profile to use it. Current list of available profiles :
-# 1 - Custom (meaning nothing will be enforced and you get to configure everything)
-# 2 - Ryzen desktop (performance)
-# 3 - Generic Desktop (Performance)
-_OPTIPROFILE=""
-
-# Set to true to bypass makepkg.conf and use all available threads for compilation. False will respect your makepkg.conf options.
-_force_all_threads="false"
-
-# Set to true to prevent ccache from being used and set CONFIG_GCC_PLUGINS=y (which needs to be disabled for ccache to work properly)
-_noccache="false"
-
-# Set to true to use modprobed db to clean config from unneeded modules. Speeds up compilation considerably. Requires root - https://wiki.archlinux.org/index.php/Modprobed-db
-# !!!! Make sure to have a well populated db !!!! - Leave empty to be asked about it at build time
-_modprobeddb="false"
-
-# Set to "1" to call make menuconfig or "2" to call make nconfig before building the kernel. Set to false to disable and skip the prompt.
-_menunconfig=""
-
-# Set to true to generate a kernel config fragment from your changes in menuconfig/nconfig. Set to false to disable and skip the prompt.
-_diffconfig=""
-
-# Set to the file name where the generated config fragment should be written to. Only used if _diffconfig is active.
-_diffconfig_name=""
-
-#### KERNEL OPTIONS ####
-
-# Name of the default config file to use from the linux???-tkg-config folder. Arch default is "config.x86_64" and Arch hardened is "config_hardened.x86_64".
-# To get a complete hardened setup, you have to use "cfs" as _cpusched
-_configfile="config.x86_64"
-
-# Disable some non-module debugging - See PKGBUILD for the list
-_debugdisable="false"
-
-# LEAVE AN EMPTY VALUE TO BE PROMPTED ABOUT FOLLOWING OPTIONS AT BUILD TIME
-
-# CPU scheduler - Options are "pds", "muqss", "bmq", "cfs" or "cfsturbo" (experimental, for power CPUs https://lkml.org/lkml/2019/7/25/296)
-# "pds" is the recommended option for gaming
-_cpusched=""
-
-# CPU sched_yield_type - Choose what sort of yield sched_yield will perform
-# For PDS and MuQSS: 0: No yield. (Recommended option for gaming on PDS and MuQSS)
-#                    1: Yield only to better priority/deadline tasks. (Default - can be unstable with PDS on some platforms)
-#                    2: Expire timeslice and recalculate deadline. (Usually the slowest option for PDS and MuQSS, not recommended)
-# For BMQ:           0: No yield.
-#                    1: Deboost and requeue task. (Default)
-#                    2: Set rq skip task.
-_sched_yield_type=""
-
-# Round Robin interval is the longest duration two tasks with the same nice level will be delayed for. When CPU time is requested by a task, it receives a time slice equal
-# to the rr_interval in addition to a virtual deadline. When using yield_type 2, a low value can help offset the disadvantages of rescheduling a process that has yielded.
-# MuQSS default: 6ms"
-# PDS default: 4ms"
-# BMQ default: 2ms"
-# Set to "1" for 2ms, "2" for 4ms, "3" for 6ms, "4" for 8ms, or "default" to keep the chosen scheduler defaults.
-_rr_interval=""
-
-# Set to "true" to disable FUNCTION_TRACER/GRAPH_TRACER, lowering overhead but limiting debugging and analyzing of kernel functions - Kernel default is "false"
-_ftracedisable="false"
-
-# Set to "true" to disable NUMA, lowering overhead, but breaking CUDA/NvEnc on Nvidia equipped systems - Kernel default is "false"
-_numadisable="false"
-
-# Set to "1" to use CattaRappa mode (enabling full tickless), "2" for tickless idle only, or "0" for periodic ticks.
-# Full tickless can give higher performances in various cases but, depending on hardware, lower consistency. Just tickless idle can perform better on some platforms (mostly AMD based).
-_tickless=""
-
-# Setting this to to "true" can improve latency on PDS (at the cost of throughput) and improve throughput on other schedulers (at the cost of latency) - Can improve VMs performance - Kernel default is "false"
-_voluntary_preempt=""
-
-# Set to "true" to enable Device Tree and Open Firmware support. If you don't know about it, you don't need it - Default is "false"
-_OFenable="false"
-
-# Set to "true" to use ACS override patch - https://wiki.archlinux.org/index.php/PCI_passthrough_via_OVMF#Bypassing_the_IOMMU_groups_.28ACS_override_patch.29 - Kernel default is "false"
-_acs_override=""
-
-# Set to "true" to add back missing symbol for AES-NI/AVX support on ZFS - https://github.com/NixOS/nixpkgs/blob/master/pkgs/os-specific/linux/kernel/export_kernel_fpu_functions.patch - Kernel default is "false"
-_zfsfix=""
-
-# Set to "true" to enable support for fsync, an experimental replacement for esync found in Valve Proton 4.11+ - https://steamcommunity.com/games/221410/announcements/detail/2957094910196249305
-_fsync=""
-
-# A selection of patches from Zen/Liquorix kernel and additional tweaks for a better gaming experience (ZENIFY) - Default is "true"
-_zenify="true"
-
-# compiler optimization level - 1. Optimize for performance (-O2); 2. Optimize harder (-O3); 3. Optimize for size (-Os) - Kernel default is "2"
-_compileroptlevel="1"
-
-# CPU compiler optimizations - Defaults to generic optimizations if left empty
-# AMD CPUs : "k8" "k8sse3" "k10" "barcelona" "bobcat" "jaguar" "bulldozer" "piledriver" "steamroller" "excavator" "zen" "zen2"
-# Intel CPUs : "mpsc"(P4 & older Netburst based Xeon) "atom" "core2" "nehalem" "westmere" "silvermont" "sandybridge" "ivybridge" "haswell" "broadwell" "skylake" "skylakex" "cannonlake" "icelake" "goldmont" "goldmontplus" "cascadelake" "cooperlake" "tigerlake"
-# Other options :
-# - "generic" (to share the package between machines with different CPUs)
-# - "native" (use compiler autodetection and will prompt for P6_NOPS - Selecting your arch manually in the list above is recommended instead of this option)
-_processor_opt=""
-
-# Make IRQ threading compulsory (FORCE_IRQ_THREADING) - MuQSS only - Default is "false"
-_irq_threading="false"
-
-# SMT (Hyperthreading) aware nice priority and policy support (SMT_NICE) - Kernel default is "true" - You can disable this on non-SMT/HT CPUs for lower overhead
-_smt_nice=""
-
-# Trust the CPU manufacturer to initialize Linux's CRNG (RANDOM_TRUST_CPU) - Kernel default is "false"
-_random_trust_cpu="false"
-
-# CPU scheduler runqueue sharing - No sharing (RQ_NONE), SMT (hyperthread) siblings (RQ_SMT), Multicore siblings (RQ_MC), Symmetric Multi-Processing (RQ_SMP), NUMA (RQ_ALL)
-# Valid values are "none", "smt", "mc", "mc-llc"(for zen), "smp", "all" - Kernel default is "mc"
-_runqueue_sharing=""
-
-# Timer frequency - "100" "500", "750" or "1000" - More options available in kernel config prompt when left empty depending on selected cpusched - Kernel default is "500" - For MuQSS, 100Hz is recommended
-_timer_freq=""
-
-# Default CPU governor - "performance", "ondemand", "schedutil" or leave empty for default (schedutil on AMD and legacy Intel, intel_pstate on modern Intel)
-_default_cpu_gov=""
-
-# Use an aggressive ondemand governor instead of default ondemand to improve performance on low loads/high core count CPUs while keeping some power efficiency from frequency scaling.
-# It still requires you to either set ondemand as default governor or to select it some way.
-_aggressive_ondemand="true"
-
-# On some platforms, an acpi_cpufreq bug affects performance negatively. Set to "true" to disable it as a workaround, but it will use more power.
-# https://github.com/Tk-Glitch/PKGBUILDS/issues/263
-_disable_acpi_cpufreq=""
-
-
-#### SPESHUL OPTION ####
-
-# If you want to bypass the stock naming scheme and enforce something else (example : "linux") - Useful for some bootloaders requiring manual entry editing on each release.
-# !!! It will also change pkgname - If you don't explicitely need this, don't use it !!!
-_custom_pkgbase=""
-
-
-#### USER PATCHES ####
-
-# community patches - add patches (separated by a space) of your choice by name from the community-patches dir
-# example: _community_patches="clear_nack_in_tend_isr.myrevert ffb_regression_fix.mypatch 0008-drm-amd-powerplay-force-the-trim-of-the-mclk-dpm-levels-if-OD-is-enabled.mypatch"
-_community_patches=""
-
-# You can use your own patches by putting them in the same folder as the PKGBUILD and giving them the .mypatch extension.
-# You can also revert patches by putting them in the same folder as the PKGBUILD and giving them the .myrevert extension.
-
-# Also, userpatches variable below must be set to true for the above to work.
-_user_patches="true"
-
-# Apply all user patches without confirmation - !!! NOT RECOMMENDED !!!
-_user_patches_no_confirm="false"
-
-
-#### CONFIG FRAGMENTS ####
-
-# You can use your own kernel config fragments by putting them in the same folder as the PKGBUILD and giving them the .myfrag extension.
-
-# Also, the config fragments variable below must be set to true for the above to work.
-_config_fragments="true"
-
-# Apply all config fragments without confirmation - !!! NOT RECOMMENDED !!!
-_config_fragments_no_confirm="false"
diff --git a/linux54-tkg/linux54-tkg-config/90-cleanup.hook b/linux54-tkg/linux54-tkg-config/90-cleanup.hook
deleted file mode 100644
index 99f5221..0000000
--- a/linux54-tkg/linux54-tkg-config/90-cleanup.hook
+++ /dev/null
@@ -1,14 +0,0 @@
-[Trigger]
-Type = File
-Operation = Install
-Operation = Upgrade
-Operation = Remove
-Target = usr/lib/modules/*/
-Target = !usr/lib/modules/*/?*
-
-[Action]
-Description = Cleaning up...
-When = PostTransaction
-Exec = /usr/share/libalpm/scripts/cleanup
-NeedsTargets
- 
diff --git a/linux54-tkg/linux54-tkg-config/cleanup b/linux54-tkg/linux54-tkg-config/cleanup
deleted file mode 100755
index c00c08d..0000000
--- a/linux54-tkg/linux54-tkg-config/cleanup
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/bash
-
-for _f in /usr/lib/modules/*tkg*; do
-  if [[ ! -e ${_f}/vmlinuz ]]; then
-    rm -rf "$_f"
-  fi
-done
-
-# vim:set ft=sh sw=2 et:
- 
diff --git a/linux54-tkg/linux54-tkg-config/config.x86_64 b/linux54-tkg/linux54-tkg-config/config.x86_64
deleted file mode 100644
index 8216172..0000000
--- a/linux54-tkg/linux54-tkg-config/config.x86_64
+++ /dev/null
@@ -1,10598 +0,0 @@
-#
-# Automatically generated file; DO NOT EDIT.
-# Linux/x86 5.4.57 Kernel Configuration
-#
-
-#
-# Compiler: gcc (GCC) 10.1.0
-#
-CONFIG_CC_IS_GCC=y
-CONFIG_GCC_VERSION=100100
-CONFIG_CLANG_VERSION=0
-CONFIG_CC_CAN_LINK=y
-CONFIG_CC_HAS_ASM_GOTO=y
-CONFIG_CC_HAS_ASM_INLINE=y
-CONFIG_IRQ_WORK=y
-CONFIG_BUILDTIME_EXTABLE_SORT=y
-CONFIG_THREAD_INFO_IN_TASK=y
-
-#
-# General setup
-#
-CONFIG_INIT_ENV_ARG_LIMIT=32
-# CONFIG_COMPILE_TEST is not set
-CONFIG_LOCALVERSION=""
-CONFIG_LOCALVERSION_AUTO=y
-CONFIG_BUILD_SALT=""
-CONFIG_HAVE_KERNEL_GZIP=y
-CONFIG_HAVE_KERNEL_BZIP2=y
-CONFIG_HAVE_KERNEL_LZMA=y
-CONFIG_HAVE_KERNEL_XZ=y
-CONFIG_HAVE_KERNEL_LZO=y
-CONFIG_HAVE_KERNEL_LZ4=y
-# CONFIG_KERNEL_GZIP is not set
-# CONFIG_KERNEL_BZIP2 is not set
-# CONFIG_KERNEL_LZMA is not set
-CONFIG_KERNEL_XZ=y
-# CONFIG_KERNEL_LZO is not set
-# CONFIG_KERNEL_LZ4 is not set
-CONFIG_DEFAULT_HOSTNAME="archlinux"
-CONFIG_SWAP=y
-CONFIG_SYSVIPC=y
-CONFIG_SYSVIPC_SYSCTL=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_POSIX_MQUEUE_SYSCTL=y
-CONFIG_CROSS_MEMORY_ATTACH=y
-# CONFIG_USELIB is not set
-CONFIG_AUDIT=y
-CONFIG_HAVE_ARCH_AUDITSYSCALL=y
-CONFIG_AUDITSYSCALL=y
-
-#
-# IRQ subsystem
-#
-CONFIG_GENERIC_IRQ_PROBE=y
-CONFIG_GENERIC_IRQ_SHOW=y
-CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK=y
-CONFIG_GENERIC_PENDING_IRQ=y
-CONFIG_GENERIC_IRQ_MIGRATION=y
-CONFIG_GENERIC_IRQ_CHIP=y
-CONFIG_IRQ_DOMAIN=y
-CONFIG_IRQ_SIM=y
-CONFIG_IRQ_DOMAIN_HIERARCHY=y
-CONFIG_GENERIC_MSI_IRQ=y
-CONFIG_GENERIC_MSI_IRQ_DOMAIN=y
-CONFIG_GENERIC_IRQ_MATRIX_ALLOCATOR=y
-CONFIG_GENERIC_IRQ_RESERVATION_MODE=y
-CONFIG_IRQ_FORCED_THREADING=y
-CONFIG_SPARSE_IRQ=y
-# CONFIG_GENERIC_IRQ_DEBUGFS is not set
-# end of IRQ subsystem
-
-CONFIG_CLOCKSOURCE_WATCHDOG=y
-CONFIG_ARCH_CLOCKSOURCE_DATA=y
-CONFIG_ARCH_CLOCKSOURCE_INIT=y
-CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE=y
-CONFIG_GENERIC_TIME_VSYSCALL=y
-CONFIG_GENERIC_CLOCKEVENTS=y
-CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y
-CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST=y
-CONFIG_GENERIC_CMOS_UPDATE=y
-
-#
-# Timers subsystem
-#
-CONFIG_TICK_ONESHOT=y
-CONFIG_NO_HZ_COMMON=y
-# CONFIG_HZ_PERIODIC is not set
-CONFIG_NO_HZ_IDLE=y
-# CONFIG_NO_HZ_FULL is not set
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-# end of Timers subsystem
-
-CONFIG_PREEMPT_NONE=y
-# CONFIG_PREEMPT_VOLUNTARY is not set
-# CONFIG_PREEMPT is not set
-
-#
-# CPU/Task time and stats accounting
-#
-CONFIG_TICK_CPU_ACCOUNTING=y
-# CONFIG_VIRT_CPU_ACCOUNTING_GEN is not set
-CONFIG_IRQ_TIME_ACCOUNTING=y
-CONFIG_HAVE_SCHED_AVG_IRQ=y
-CONFIG_BSD_PROCESS_ACCT=y
-CONFIG_BSD_PROCESS_ACCT_V3=y
-CONFIG_TASKSTATS=y
-CONFIG_TASK_DELAY_ACCT=y
-CONFIG_TASK_XACCT=y
-CONFIG_TASK_IO_ACCOUNTING=y
-CONFIG_PSI=y
-# CONFIG_PSI_DEFAULT_DISABLED is not set
-# end of CPU/Task time and stats accounting
-
-CONFIG_CPU_ISOLATION=y
-
-#
-# RCU Subsystem
-#
-CONFIG_TREE_RCU=y
-CONFIG_RCU_EXPERT=y
-CONFIG_SRCU=y
-CONFIG_TREE_SRCU=y
-CONFIG_RCU_STALL_COMMON=y
-CONFIG_RCU_NEED_SEGCBLIST=y
-CONFIG_RCU_FANOUT=64
-CONFIG_RCU_FANOUT_LEAF=16
-# CONFIG_RCU_FAST_NO_HZ is not set
-# CONFIG_RCU_NOCB_CPU is not set
-# end of RCU Subsystem
-
-CONFIG_BUILD_BIN2C=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-# CONFIG_IKHEADERS is not set
-CONFIG_LOG_BUF_SHIFT=17
-CONFIG_LOG_CPU_MAX_BUF_SHIFT=12
-CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=13
-CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y
-
-#
-# Scheduler features
-#
-CONFIG_UCLAMP_TASK=y
-CONFIG_UCLAMP_BUCKETS_COUNT=5
-# end of Scheduler features
-
-CONFIG_ARCH_SUPPORTS_NUMA_BALANCING=y
-CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH=y
-CONFIG_ARCH_SUPPORTS_INT128=y
-CONFIG_NUMA_BALANCING=y
-CONFIG_NUMA_BALANCING_DEFAULT_ENABLED=y
-CONFIG_CGROUPS=y
-CONFIG_PAGE_COUNTER=y
-CONFIG_MEMCG=y
-CONFIG_MEMCG_SWAP=y
-CONFIG_MEMCG_SWAP_ENABLED=y
-CONFIG_MEMCG_KMEM=y
-CONFIG_BLK_CGROUP=y
-CONFIG_CGROUP_WRITEBACK=y
-CONFIG_CGROUP_SCHED=y
-CONFIG_FAIR_GROUP_SCHED=y
-CONFIG_CFS_BANDWIDTH=y
-# CONFIG_RT_GROUP_SCHED is not set
-CONFIG_UCLAMP_TASK_GROUP=y
-CONFIG_CGROUP_PIDS=y
-CONFIG_CGROUP_RDMA=y
-CONFIG_CGROUP_FREEZER=y
-CONFIG_CGROUP_HUGETLB=y
-CONFIG_CPUSETS=y
-CONFIG_PROC_PID_CPUSET=y
-CONFIG_CGROUP_DEVICE=y
-CONFIG_CGROUP_CPUACCT=y
-CONFIG_CGROUP_PERF=y
-CONFIG_CGROUP_BPF=y
-# CONFIG_CGROUP_DEBUG is not set
-CONFIG_SOCK_CGROUP_DATA=y
-CONFIG_NAMESPACES=y
-CONFIG_UTS_NS=y
-CONFIG_IPC_NS=y
-CONFIG_USER_NS=y
-CONFIG_USER_NS_UNPRIVILEGED=y
-CONFIG_PID_NS=y
-CONFIG_NET_NS=y
-CONFIG_CHECKPOINT_RESTORE=y
-CONFIG_SCHED_AUTOGROUP=y
-# CONFIG_SYSFS_DEPRECATED is not set
-CONFIG_RELAY=y
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE=""
-CONFIG_RD_GZIP=y
-CONFIG_RD_BZIP2=y
-CONFIG_RD_LZMA=y
-CONFIG_RD_XZ=y
-CONFIG_RD_LZO=y
-CONFIG_RD_LZ4=y
-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-CONFIG_SYSCTL=y
-CONFIG_HAVE_UID16=y
-CONFIG_SYSCTL_EXCEPTION_TRACE=y
-CONFIG_HAVE_PCSPKR_PLATFORM=y
-CONFIG_BPF=y
-CONFIG_EXPERT=y
-# CONFIG_UID16 is not set
-CONFIG_MULTIUSER=y
-CONFIG_SGETMASK_SYSCALL=y
-# CONFIG_SYSFS_SYSCALL is not set
-# CONFIG_SYSCTL_SYSCALL is not set
-CONFIG_FHANDLE=y
-CONFIG_POSIX_TIMERS=y
-CONFIG_PRINTK=y
-CONFIG_PRINTK_NMI=y
-CONFIG_BUG=y
-CONFIG_ELF_CORE=y
-CONFIG_PCSPKR_PLATFORM=y
-CONFIG_BASE_FULL=y
-CONFIG_FUTEX=y
-CONFIG_FUTEX_PI=y
-CONFIG_EPOLL=y
-CONFIG_SIGNALFD=y
-CONFIG_TIMERFD=y
-CONFIG_EVENTFD=y
-CONFIG_SHMEM=y
-CONFIG_AIO=y
-CONFIG_IO_URING=y
-CONFIG_ADVISE_SYSCALLS=y
-CONFIG_MEMBARRIER=y
-CONFIG_KALLSYMS=y
-CONFIG_KALLSYMS_ALL=y
-CONFIG_KALLSYMS_ABSOLUTE_PERCPU=y
-CONFIG_KALLSYMS_BASE_RELATIVE=y
-CONFIG_BPF_SYSCALL=y
-CONFIG_BPF_JIT_ALWAYS_ON=y
-CONFIG_USERFAULTFD=y
-CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE=y
-CONFIG_RSEQ=y
-# CONFIG_DEBUG_RSEQ is not set
-# CONFIG_EMBEDDED is not set
-CONFIG_HAVE_PERF_EVENTS=y
-# CONFIG_PC104 is not set
-
-#
-# Kernel Performance Events And Counters
-#
-CONFIG_PERF_EVENTS=y
-# CONFIG_DEBUG_PERF_USE_VMALLOC is not set
-# end of Kernel Performance Events And Counters
-
-CONFIG_VM_EVENT_COUNTERS=y
-CONFIG_SLUB_DEBUG=y
-# CONFIG_SLUB_MEMCG_SYSFS_ON is not set
-# CONFIG_COMPAT_BRK is not set
-# CONFIG_SLAB is not set
-CONFIG_SLUB=y
-# CONFIG_SLOB is not set
-CONFIG_SLAB_MERGE_DEFAULT=y
-CONFIG_SLAB_FREELIST_RANDOM=y
-CONFIG_SLAB_FREELIST_HARDENED=y
-CONFIG_SHUFFLE_PAGE_ALLOCATOR=y
-CONFIG_SLUB_CPU_PARTIAL=y
-CONFIG_SYSTEM_DATA_VERIFICATION=y
-CONFIG_PROFILING=y
-CONFIG_TRACEPOINTS=y
-# end of General setup
-
-CONFIG_64BIT=y
-CONFIG_X86_64=y
-CONFIG_X86=y
-CONFIG_INSTRUCTION_DECODER=y
-CONFIG_OUTPUT_FORMAT="elf64-x86-64"
-CONFIG_ARCH_DEFCONFIG="arch/x86/configs/x86_64_defconfig"
-CONFIG_LOCKDEP_SUPPORT=y
-CONFIG_STACKTRACE_SUPPORT=y
-CONFIG_MMU=y
-CONFIG_ARCH_MMAP_RND_BITS_MIN=28
-CONFIG_ARCH_MMAP_RND_BITS_MAX=32
-CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN=8
-CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX=16
-CONFIG_GENERIC_ISA_DMA=y
-CONFIG_GENERIC_BUG=y
-CONFIG_GENERIC_BUG_RELATIVE_POINTERS=y
-CONFIG_ARCH_MAY_HAVE_PC_FDC=y
-CONFIG_GENERIC_CALIBRATE_DELAY=y
-CONFIG_ARCH_HAS_CPU_RELAX=y
-CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
-CONFIG_ARCH_HAS_FILTER_PGPROT=y
-CONFIG_HAVE_SETUP_PER_CPU_AREA=y
-CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y
-CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y
-CONFIG_ARCH_HIBERNATION_POSSIBLE=y
-CONFIG_ARCH_SUSPEND_POSSIBLE=y
-CONFIG_ARCH_WANT_GENERAL_HUGETLB=y
-CONFIG_ZONE_DMA32=y
-CONFIG_AUDIT_ARCH=y
-CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y
-CONFIG_HAVE_INTEL_TXT=y
-CONFIG_X86_64_SMP=y
-CONFIG_ARCH_SUPPORTS_UPROBES=y
-CONFIG_FIX_EARLYCON_MEM=y
-CONFIG_DYNAMIC_PHYSICAL_MASK=y
-CONFIG_PGTABLE_LEVELS=5
-CONFIG_CC_HAS_SANE_STACKPROTECTOR=y
-
-#
-# Processor type and features
-#
-CONFIG_ZONE_DMA=y
-CONFIG_SMP=y
-CONFIG_X86_FEATURE_NAMES=y
-CONFIG_X86_X2APIC=y
-CONFIG_X86_MPPARSE=y
-# CONFIG_GOLDFISH is not set
-CONFIG_RETPOLINE=y
-CONFIG_X86_CPU_RESCTRL=y
-# CONFIG_X86_EXTENDED_PLATFORM is not set
-CONFIG_X86_INTEL_LPSS=y
-CONFIG_X86_AMD_PLATFORM_DEVICE=y
-CONFIG_IOSF_MBI=y
-# CONFIG_IOSF_MBI_DEBUG is not set
-CONFIG_X86_SUPPORTS_MEMORY_FAILURE=y
-CONFIG_SCHED_OMIT_FRAME_POINTER=y
-CONFIG_HYPERVISOR_GUEST=y
-CONFIG_PARAVIRT=y
-CONFIG_PARAVIRT_XXL=y
-# CONFIG_PARAVIRT_DEBUG is not set
-CONFIG_PARAVIRT_SPINLOCKS=y
-CONFIG_X86_HV_CALLBACK_VECTOR=y
-CONFIG_XEN=y
-CONFIG_XEN_PV=y
-CONFIG_XEN_PV_SMP=y
-CONFIG_XEN_DOM0=y
-CONFIG_XEN_PVHVM=y
-CONFIG_XEN_PVHVM_SMP=y
-CONFIG_XEN_512GB=y
-CONFIG_XEN_SAVE_RESTORE=y
-# CONFIG_XEN_DEBUG_FS is not set
-CONFIG_XEN_PVH=y
-CONFIG_KVM_GUEST=y
-CONFIG_ARCH_CPUIDLE_HALTPOLL=y
-CONFIG_PVH=y
-# CONFIG_KVM_DEBUG_FS is not set
-CONFIG_PARAVIRT_TIME_ACCOUNTING=y
-CONFIG_PARAVIRT_CLOCK=y
-CONFIG_JAILHOUSE_GUEST=y
-CONFIG_ACRN_GUEST=y
-# CONFIG_MK8 is not set
-# CONFIG_MPSC is not set
-# CONFIG_MCORE2 is not set
-# CONFIG_MATOM is not set
-CONFIG_GENERIC_CPU=y
-CONFIG_X86_INTERNODE_CACHE_SHIFT=6
-CONFIG_X86_L1_CACHE_SHIFT=6
-CONFIG_X86_TSC=y
-CONFIG_X86_CMPXCHG64=y
-CONFIG_X86_CMOV=y
-CONFIG_X86_MINIMUM_CPU_FAMILY=64
-CONFIG_X86_DEBUGCTLMSR=y
-CONFIG_PROCESSOR_SELECT=y
-CONFIG_CPU_SUP_INTEL=y
-CONFIG_CPU_SUP_AMD=y
-CONFIG_CPU_SUP_HYGON=y
-CONFIG_CPU_SUP_CENTAUR=y
-CONFIG_CPU_SUP_ZHAOXIN=y
-CONFIG_HPET_TIMER=y
-CONFIG_HPET_EMULATE_RTC=y
-CONFIG_DMI=y
-CONFIG_GART_IOMMU=y
-CONFIG_CALGARY_IOMMU=y
-CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT=y
-# CONFIG_MAXSMP is not set
-CONFIG_NR_CPUS_RANGE_BEGIN=2
-CONFIG_NR_CPUS_RANGE_END=512
-CONFIG_NR_CPUS_DEFAULT=64
-CONFIG_NR_CPUS=320
-CONFIG_SCHED_SMT=y
-CONFIG_SCHED_MC=y
-CONFIG_SCHED_MC_PRIO=y
-CONFIG_X86_LOCAL_APIC=y
-CONFIG_X86_IO_APIC=y
-CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
-CONFIG_X86_MCE=y
-# CONFIG_X86_MCELOG_LEGACY is not set
-CONFIG_X86_MCE_INTEL=y
-CONFIG_X86_MCE_AMD=y
-CONFIG_X86_MCE_THRESHOLD=y
-CONFIG_X86_MCE_INJECT=m
-CONFIG_X86_THERMAL_VECTOR=y
-
-#
-# Performance monitoring
-#
-CONFIG_PERF_EVENTS_INTEL_UNCORE=m
-CONFIG_PERF_EVENTS_INTEL_RAPL=m
-CONFIG_PERF_EVENTS_INTEL_CSTATE=m
-CONFIG_PERF_EVENTS_AMD_POWER=m
-# end of Performance monitoring
-
-CONFIG_X86_16BIT=y
-CONFIG_X86_ESPFIX64=y
-CONFIG_X86_VSYSCALL_EMULATION=y
-CONFIG_I8K=m
-CONFIG_MICROCODE=y
-CONFIG_MICROCODE_INTEL=y
-CONFIG_MICROCODE_AMD=y
-CONFIG_MICROCODE_OLD_INTERFACE=y
-CONFIG_X86_MSR=m
-CONFIG_X86_CPUID=m
-CONFIG_X86_5LEVEL=y
-CONFIG_X86_DIRECT_GBPAGES=y
-# CONFIG_X86_CPA_STATISTICS is not set
-CONFIG_AMD_MEM_ENCRYPT=y
-# CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT is not set
-CONFIG_NUMA=y
-CONFIG_AMD_NUMA=y
-CONFIG_X86_64_ACPI_NUMA=y
-CONFIG_NODES_SPAN_OTHER_NODES=y
-# CONFIG_NUMA_EMU is not set
-CONFIG_NODES_SHIFT=5
-CONFIG_ARCH_SPARSEMEM_ENABLE=y
-CONFIG_ARCH_SPARSEMEM_DEFAULT=y
-CONFIG_ARCH_SELECT_MEMORY_MODEL=y
-CONFIG_ARCH_MEMORY_PROBE=y
-CONFIG_ARCH_PROC_KCORE_TEXT=y
-CONFIG_ILLEGAL_POINTER_VALUE=0xdead000000000000
-CONFIG_X86_PMEM_LEGACY_DEVICE=y
-CONFIG_X86_PMEM_LEGACY=m
-CONFIG_X86_CHECK_BIOS_CORRUPTION=y
-CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK=y
-CONFIG_X86_RESERVE_LOW=64
-CONFIG_MTRR=y
-CONFIG_MTRR_SANITIZER=y
-CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT=1
-CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT=0
-CONFIG_X86_PAT=y
-CONFIG_ARCH_USES_PG_UNCACHED=y
-CONFIG_ARCH_RANDOM=y
-CONFIG_X86_SMAP=y
-CONFIG_X86_INTEL_UMIP=y
-# CONFIG_X86_INTEL_MPX is not set
-CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS=y
-# CONFIG_X86_INTEL_TSX_MODE_OFF is not set
-# CONFIG_X86_INTEL_TSX_MODE_ON is not set
-CONFIG_X86_INTEL_TSX_MODE_AUTO=y
-CONFIG_EFI=y
-CONFIG_EFI_STUB=y
-CONFIG_EFI_MIXED=y
-CONFIG_SECCOMP=y
-CONFIG_HZ_100=y
-# CONFIG_HZ_250 is not set
-# CONFIG_HZ_300 is not set
-# CONFIG_HZ_1000 is not set
-CONFIG_HZ=100
-CONFIG_SCHED_HRTICK=y
-CONFIG_KEXEC=y
-CONFIG_KEXEC_FILE=y
-CONFIG_ARCH_HAS_KEXEC_PURGATORY=y
-# CONFIG_KEXEC_SIG is not set
-CONFIG_CRASH_DUMP=y
-CONFIG_KEXEC_JUMP=y
-CONFIG_PHYSICAL_START=0x1000000
-CONFIG_RELOCATABLE=y
-CONFIG_RANDOMIZE_BASE=y
-CONFIG_X86_NEED_RELOCS=y
-CONFIG_PHYSICAL_ALIGN=0x200000
-CONFIG_DYNAMIC_MEMORY_LAYOUT=y
-CONFIG_RANDOMIZE_MEMORY=y
-CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING=0x1
-CONFIG_HOTPLUG_CPU=y
-# CONFIG_BOOTPARAM_HOTPLUG_CPU0 is not set
-# CONFIG_DEBUG_HOTPLUG_CPU0 is not set
-# CONFIG_COMPAT_VDSO is not set
-# CONFIG_LEGACY_VSYSCALL_EMULATE is not set
-CONFIG_LEGACY_VSYSCALL_XONLY=y
-# CONFIG_LEGACY_VSYSCALL_NONE is not set
-# CONFIG_CMDLINE_BOOL is not set
-CONFIG_MODIFY_LDT_SYSCALL=y
-CONFIG_HAVE_LIVEPATCH=y
-# CONFIG_LIVEPATCH is not set
-# end of Processor type and features
-
-CONFIG_ARCH_HAS_ADD_PAGES=y
-CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
-CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y
-CONFIG_USE_PERCPU_NUMA_NODE_ID=y
-CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK=y
-CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION=y
-CONFIG_ARCH_ENABLE_THP_MIGRATION=y
-
-#
-# Power management and ACPI options
-#
-CONFIG_ARCH_HIBERNATION_HEADER=y
-CONFIG_SUSPEND=y
-CONFIG_SUSPEND_FREEZER=y
-# CONFIG_SUSPEND_SKIP_SYNC is not set
-CONFIG_HIBERNATE_CALLBACKS=y
-CONFIG_HIBERNATION=y
-CONFIG_PM_STD_PARTITION=""
-CONFIG_PM_SLEEP=y
-CONFIG_PM_SLEEP_SMP=y
-CONFIG_PM_AUTOSLEEP=y
-CONFIG_PM_WAKELOCKS=y
-CONFIG_PM_WAKELOCKS_LIMIT=100
-CONFIG_PM_WAKELOCKS_GC=y
-CONFIG_PM=y
-CONFIG_PM_DEBUG=y
-CONFIG_PM_ADVANCED_DEBUG=y
-# CONFIG_PM_TEST_SUSPEND is not set
-CONFIG_PM_SLEEP_DEBUG=y
-# CONFIG_DPM_WATCHDOG is not set
-CONFIG_PM_TRACE=y
-CONFIG_PM_TRACE_RTC=y
-CONFIG_PM_CLK=y
-CONFIG_PM_GENERIC_DOMAINS=y
-CONFIG_WQ_POWER_EFFICIENT_DEFAULT=y
-CONFIG_PM_GENERIC_DOMAINS_SLEEP=y
-CONFIG_PM_GENERIC_DOMAINS_OF=y
-CONFIG_ENERGY_MODEL=y
-CONFIG_ARCH_SUPPORTS_ACPI=y
-CONFIG_ACPI=y
-CONFIG_ACPI_LEGACY_TABLES_LOOKUP=y
-CONFIG_ARCH_MIGHT_HAVE_ACPI_PDC=y
-CONFIG_ACPI_SYSTEM_POWER_STATES_SUPPORT=y
-# CONFIG_ACPI_DEBUGGER is not set
-CONFIG_ACPI_SPCR_TABLE=y
-CONFIG_ACPI_LPIT=y
-CONFIG_ACPI_SLEEP=y
-# CONFIG_ACPI_PROCFS_POWER is not set
-CONFIG_ACPI_REV_OVERRIDE_POSSIBLE=y
-CONFIG_ACPI_EC_DEBUGFS=y
-CONFIG_ACPI_AC=m
-CONFIG_ACPI_BATTERY=m
-CONFIG_ACPI_BUTTON=y
-CONFIG_ACPI_VIDEO=y
-CONFIG_ACPI_FAN=y
-CONFIG_ACPI_TAD=m
-CONFIG_ACPI_DOCK=y
-CONFIG_ACPI_CPU_FREQ_PSS=y
-CONFIG_ACPI_PROCESSOR_CSTATE=y
-CONFIG_ACPI_PROCESSOR_IDLE=y
-CONFIG_ACPI_CPPC_LIB=y
-CONFIG_ACPI_PROCESSOR=y
-CONFIG_ACPI_IPMI=m
-CONFIG_ACPI_HOTPLUG_CPU=y
-CONFIG_ACPI_PROCESSOR_AGGREGATOR=y
-CONFIG_ACPI_THERMAL=y
-CONFIG_ACPI_NUMA=y
-CONFIG_ARCH_HAS_ACPI_TABLE_UPGRADE=y
-CONFIG_ACPI_TABLE_UPGRADE=y
-CONFIG_ACPI_DEBUG=y
-CONFIG_ACPI_PCI_SLOT=y
-CONFIG_ACPI_CONTAINER=y
-CONFIG_ACPI_HOTPLUG_MEMORY=y
-CONFIG_ACPI_HOTPLUG_IOAPIC=y
-CONFIG_ACPI_SBS=m
-CONFIG_ACPI_HED=y
-CONFIG_ACPI_CUSTOM_METHOD=m
-CONFIG_ACPI_BGRT=y
-# CONFIG_ACPI_REDUCED_HARDWARE_ONLY is not set
-CONFIG_ACPI_NFIT=m
-# CONFIG_NFIT_SECURITY_DEBUG is not set
-CONFIG_ACPI_HMAT=y
-CONFIG_HAVE_ACPI_APEI=y
-CONFIG_HAVE_ACPI_APEI_NMI=y
-CONFIG_ACPI_APEI=y
-CONFIG_ACPI_APEI_GHES=y
-CONFIG_ACPI_APEI_PCIEAER=y
-CONFIG_ACPI_APEI_MEMORY_FAILURE=y
-CONFIG_ACPI_APEI_EINJ=m
-CONFIG_ACPI_APEI_ERST_DEBUG=m
-CONFIG_DPTF_POWER=m
-CONFIG_ACPI_WATCHDOG=y
-CONFIG_ACPI_EXTLOG=m
-CONFIG_ACPI_ADXL=y
-CONFIG_PMIC_OPREGION=y
-CONFIG_CRC_PMIC_OPREGION=y
-CONFIG_XPOWER_PMIC_OPREGION=y
-CONFIG_BXT_WC_PMIC_OPREGION=y
-CONFIG_CHT_WC_PMIC_OPREGION=y
-CONFIG_CHT_DC_TI_PMIC_OPREGION=y
-CONFIG_ACPI_CONFIGFS=m
-CONFIG_TPS68470_PMIC_OPREGION=y
-CONFIG_X86_PM_TIMER=y
-CONFIG_SFI=y
-
-#
-# CPU Frequency scaling
-#
-CONFIG_CPU_FREQ=y
-CONFIG_CPU_FREQ_GOV_ATTR_SET=y
-CONFIG_CPU_FREQ_GOV_COMMON=y
-CONFIG_CPU_FREQ_STAT=y
-# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set
-# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set
-# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set
-# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set
-# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set
-CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL=y
-CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
-CONFIG_CPU_FREQ_GOV_POWERSAVE=m
-CONFIG_CPU_FREQ_GOV_USERSPACE=m
-CONFIG_CPU_FREQ_GOV_ONDEMAND=m
-CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m
-CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y
-
-#
-# CPU frequency scaling drivers
-#
-CONFIG_CPUFREQ_DT=m
-CONFIG_CPUFREQ_DT_PLATDEV=y
-CONFIG_X86_INTEL_PSTATE=y
-CONFIG_X86_PCC_CPUFREQ=m
-CONFIG_X86_ACPI_CPUFREQ=m
-CONFIG_X86_ACPI_CPUFREQ_CPB=y
-CONFIG_X86_POWERNOW_K8=m
-CONFIG_X86_AMD_FREQ_SENSITIVITY=m
-# CONFIG_X86_SPEEDSTEP_CENTRINO is not set
-CONFIG_X86_P4_CLOCKMOD=m
-
-#
-# shared options
-#
-CONFIG_X86_SPEEDSTEP_LIB=m
-# end of CPU Frequency scaling
-
-#
-# CPU Idle
-#
-CONFIG_CPU_IDLE=y
-CONFIG_CPU_IDLE_GOV_LADDER=y
-CONFIG_CPU_IDLE_GOV_MENU=y
-CONFIG_CPU_IDLE_GOV_TEO=y
-CONFIG_CPU_IDLE_GOV_HALTPOLL=y
-CONFIG_HALTPOLL_CPUIDLE=m
-# end of CPU Idle
-
-CONFIG_INTEL_IDLE=y
-# end of Power management and ACPI options
-
-#
-# Bus options (PCI etc.)
-#
-CONFIG_PCI_DIRECT=y
-CONFIG_PCI_MMCONFIG=y
-CONFIG_PCI_XEN=y
-CONFIG_MMCONF_FAM10H=y
-# CONFIG_PCI_CNB20LE_QUIRK is not set
-# CONFIG_ISA_BUS is not set
-CONFIG_ISA_DMA_API=y
-CONFIG_AMD_NB=y
-# CONFIG_X86_SYSFB is not set
-# end of Bus options (PCI etc.)
-
-#
-# Binary Emulations
-#
-CONFIG_IA32_EMULATION=y
-# CONFIG_X86_X32 is not set
-CONFIG_COMPAT_32=y
-CONFIG_COMPAT=y
-CONFIG_COMPAT_FOR_U64_ALIGNMENT=y
-CONFIG_SYSVIPC_COMPAT=y
-# end of Binary Emulations
-
-CONFIG_X86_DEV_DMA_OPS=y
-
-#
-# Firmware Drivers
-#
-CONFIG_EDD=m
-# CONFIG_EDD_OFF is not set
-CONFIG_FIRMWARE_MEMMAP=y
-CONFIG_DMIID=y
-CONFIG_DMI_SYSFS=m
-CONFIG_DMI_SCAN_MACHINE_NON_EFI_FALLBACK=y
-CONFIG_ISCSI_IBFT_FIND=y
-CONFIG_ISCSI_IBFT=m
-CONFIG_FW_CFG_SYSFS=m
-# CONFIG_FW_CFG_SYSFS_CMDLINE is not set
-CONFIG_GOOGLE_FIRMWARE=y
-# CONFIG_GOOGLE_SMI is not set
-CONFIG_GOOGLE_COREBOOT_TABLE=m
-CONFIG_GOOGLE_MEMCONSOLE=m
-# CONFIG_GOOGLE_MEMCONSOLE_X86_LEGACY is not set
-CONFIG_GOOGLE_FRAMEBUFFER_COREBOOT=m
-CONFIG_GOOGLE_MEMCONSOLE_COREBOOT=m
-CONFIG_GOOGLE_VPD=m
-
-#
-# EFI (Extensible Firmware Interface) Support
-#
-# CONFIG_EFI_VARS is not set
-CONFIG_EFI_ESRT=y
-CONFIG_EFI_RUNTIME_MAP=y
-# CONFIG_EFI_FAKE_MEMMAP is not set
-CONFIG_EFI_RUNTIME_WRAPPERS=y
-CONFIG_EFI_CAPSULE_LOADER=m
-# CONFIG_EFI_TEST is not set
-CONFIG_APPLE_PROPERTIES=y
-# CONFIG_RESET_ATTACK_MITIGATION is not set
-CONFIG_EFI_RCI2_TABLE=y
-# end of EFI (Extensible Firmware Interface) Support
-
-CONFIG_UEFI_CPER=y
-CONFIG_UEFI_CPER_X86=y
-CONFIG_EFI_DEV_PATH_PARSER=y
-CONFIG_EFI_EARLYCON=y
-
-#
-# Tegra firmware driver
-#
-# end of Tegra firmware driver
-# end of Firmware Drivers
-
-CONFIG_HAVE_KVM=y
-CONFIG_HAVE_KVM_IRQCHIP=y
-CONFIG_HAVE_KVM_IRQFD=y
-CONFIG_HAVE_KVM_IRQ_ROUTING=y
-CONFIG_HAVE_KVM_EVENTFD=y
-CONFIG_KVM_MMIO=y
-CONFIG_KVM_ASYNC_PF=y
-CONFIG_HAVE_KVM_MSI=y
-CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT=y
-CONFIG_KVM_VFIO=y
-CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT=y
-CONFIG_KVM_COMPAT=y
-CONFIG_HAVE_KVM_IRQ_BYPASS=y
-CONFIG_HAVE_KVM_NO_POLL=y
-CONFIG_VIRTUALIZATION=y
-CONFIG_KVM=m
-CONFIG_KVM_INTEL=m
-CONFIG_KVM_AMD=m
-CONFIG_KVM_AMD_SEV=y
-CONFIG_KVM_MMU_AUDIT=y
-CONFIG_VHOST_NET=m
-CONFIG_VHOST_SCSI=m
-CONFIG_VHOST_VSOCK=m
-CONFIG_VHOST=m
-# CONFIG_VHOST_CROSS_ENDIAN_LEGACY is not set
-
-#
-# General architecture-dependent options
-#
-CONFIG_CRASH_CORE=y
-CONFIG_KEXEC_CORE=y
-CONFIG_HOTPLUG_SMT=y
-CONFIG_OPROFILE=m
-# CONFIG_OPROFILE_EVENT_MULTIPLEX is not set
-CONFIG_HAVE_OPROFILE=y
-CONFIG_OPROFILE_NMI_TIMER=y
-CONFIG_KPROBES=y
-CONFIG_JUMP_LABEL=y
-# CONFIG_STATIC_KEYS_SELFTEST is not set
-CONFIG_OPTPROBES=y
-CONFIG_KPROBES_ON_FTRACE=y
-CONFIG_UPROBES=y
-CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y
-CONFIG_ARCH_USE_BUILTIN_BSWAP=y
-CONFIG_KRETPROBES=y
-CONFIG_USER_RETURN_NOTIFIER=y
-CONFIG_HAVE_IOREMAP_PROT=y
-CONFIG_HAVE_KPROBES=y
-CONFIG_HAVE_KRETPROBES=y
-CONFIG_HAVE_OPTPROBES=y
-CONFIG_HAVE_KPROBES_ON_FTRACE=y
-CONFIG_HAVE_FUNCTION_ERROR_INJECTION=y
-CONFIG_HAVE_NMI=y
-CONFIG_HAVE_ARCH_TRACEHOOK=y
-CONFIG_HAVE_DMA_CONTIGUOUS=y
-CONFIG_GENERIC_SMP_IDLE_THREAD=y
-CONFIG_ARCH_HAS_FORTIFY_SOURCE=y
-CONFIG_ARCH_HAS_SET_MEMORY=y
-CONFIG_ARCH_HAS_SET_DIRECT_MAP=y
-CONFIG_HAVE_ARCH_THREAD_STRUCT_WHITELIST=y
-CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT=y
-CONFIG_HAVE_ASM_MODVERSIONS=y
-CONFIG_HAVE_REGS_AND_STACK_ACCESS_API=y
-CONFIG_HAVE_RSEQ=y
-CONFIG_HAVE_FUNCTION_ARG_ACCESS_API=y
-CONFIG_HAVE_CLK=y
-CONFIG_HAVE_HW_BREAKPOINT=y
-CONFIG_HAVE_MIXED_BREAKPOINTS_REGS=y
-CONFIG_HAVE_USER_RETURN_NOTIFIER=y
-CONFIG_HAVE_PERF_EVENTS_NMI=y
-CONFIG_HAVE_HARDLOCKUP_DETECTOR_PERF=y
-CONFIG_HAVE_PERF_REGS=y
-CONFIG_HAVE_PERF_USER_STACK_DUMP=y
-CONFIG_HAVE_ARCH_JUMP_LABEL=y
-CONFIG_HAVE_ARCH_JUMP_LABEL_RELATIVE=y
-CONFIG_HAVE_RCU_TABLE_FREE=y
-CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG=y
-CONFIG_HAVE_ALIGNED_STRUCT_PAGE=y
-CONFIG_HAVE_CMPXCHG_LOCAL=y
-CONFIG_HAVE_CMPXCHG_DOUBLE=y
-CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION=y
-CONFIG_ARCH_WANT_OLD_COMPAT_IPC=y
-CONFIG_HAVE_ARCH_SECCOMP_FILTER=y
-CONFIG_SECCOMP_FILTER=y
-CONFIG_HAVE_ARCH_STACKLEAK=y
-CONFIG_HAVE_STACKPROTECTOR=y
-CONFIG_CC_HAS_STACKPROTECTOR_NONE=y
-CONFIG_STACKPROTECTOR=y
-CONFIG_STACKPROTECTOR_STRONG=y
-CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES=y
-CONFIG_HAVE_CONTEXT_TRACKING=y
-CONFIG_HAVE_VIRT_CPU_ACCOUNTING_GEN=y
-CONFIG_HAVE_IRQ_TIME_ACCOUNTING=y
-CONFIG_HAVE_MOVE_PMD=y
-CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE=y
-CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD=y
-CONFIG_HAVE_ARCH_HUGE_VMAP=y
-CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y
-CONFIG_HAVE_ARCH_SOFT_DIRTY=y
-CONFIG_HAVE_MOD_ARCH_SPECIFIC=y
-CONFIG_MODULES_USE_ELF_RELA=y
-CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK=y
-CONFIG_ARCH_HAS_ELF_RANDOMIZE=y
-CONFIG_HAVE_ARCH_MMAP_RND_BITS=y
-CONFIG_HAVE_EXIT_THREAD=y
-CONFIG_ARCH_MMAP_RND_BITS=28
-CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS=y
-CONFIG_ARCH_MMAP_RND_COMPAT_BITS=8
-CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES=y
-CONFIG_HAVE_COPY_THREAD_TLS=y
-CONFIG_HAVE_STACK_VALIDATION=y
-CONFIG_HAVE_RELIABLE_STACKTRACE=y
-CONFIG_ISA_BUS_API=y
-CONFIG_OLD_SIGSUSPEND3=y
-CONFIG_COMPAT_OLD_SIGACTION=y
-CONFIG_64BIT_TIME=y
-CONFIG_COMPAT_32BIT_TIME=y
-CONFIG_HAVE_ARCH_VMAP_STACK=y
-CONFIG_VMAP_STACK=y
-CONFIG_ARCH_HAS_STRICT_KERNEL_RWX=y
-CONFIG_STRICT_KERNEL_RWX=y
-CONFIG_ARCH_HAS_STRICT_MODULE_RWX=y
-CONFIG_STRICT_MODULE_RWX=y
-CONFIG_ARCH_HAS_REFCOUNT=y
-# CONFIG_REFCOUNT_FULL is not set
-CONFIG_HAVE_ARCH_PREL32_RELOCATIONS=y
-CONFIG_ARCH_USE_MEMREMAP_PROT=y
-CONFIG_LOCK_EVENT_COUNTS=y
-CONFIG_ARCH_HAS_MEM_ENCRYPT=y
-
-#
-# GCOV-based kernel profiling
-#
-# CONFIG_GCOV_KERNEL is not set
-CONFIG_ARCH_HAS_GCOV_PROFILE_ALL=y
-# end of GCOV-based kernel profiling
-
-CONFIG_PLUGIN_HOSTCC="g++"
-CONFIG_HAVE_GCC_PLUGINS=y
-CONFIG_GCC_PLUGINS=y
-# CONFIG_GCC_PLUGIN_CYC_COMPLEXITY is not set
-# CONFIG_GCC_PLUGIN_LATENT_ENTROPY is not set
-# CONFIG_GCC_PLUGIN_RANDSTRUCT is not set
-# end of General architecture-dependent options
-
-CONFIG_RT_MUTEXES=y
-CONFIG_BASE_SMALL=0
-CONFIG_MODULE_SIG_FORMAT=y
-CONFIG_MODULES=y
-CONFIG_MODULE_FORCE_LOAD=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODULE_FORCE_UNLOAD=y
-# CONFIG_MODVERSIONS is not set
-CONFIG_MODULE_SRCVERSION_ALL=y
-CONFIG_MODULE_SIG=y
-# CONFIG_MODULE_SIG_FORCE is not set
-CONFIG_MODULE_SIG_ALL=y
-# CONFIG_MODULE_SIG_SHA1 is not set
-# CONFIG_MODULE_SIG_SHA224 is not set
-# CONFIG_MODULE_SIG_SHA256 is not set
-# CONFIG_MODULE_SIG_SHA384 is not set
-CONFIG_MODULE_SIG_SHA512=y
-CONFIG_MODULE_SIG_HASH="sha512"
-CONFIG_MODULE_COMPRESS=y
-# CONFIG_MODULE_COMPRESS_GZIP is not set
-CONFIG_MODULE_COMPRESS_XZ=y
-CONFIG_MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS=y
-CONFIG_UNUSED_SYMBOLS=y
-CONFIG_MODULES_TREE_LOOKUP=y
-CONFIG_BLOCK=y
-CONFIG_BLK_RQ_ALLOC_TIME=y
-CONFIG_BLK_SCSI_REQUEST=y
-CONFIG_BLK_DEV_BSG=y
-CONFIG_BLK_DEV_BSGLIB=y
-CONFIG_BLK_DEV_INTEGRITY=y
-CONFIG_BLK_DEV_ZONED=y
-CONFIG_BLK_DEV_THROTTLING=y
-CONFIG_BLK_DEV_THROTTLING_LOW=y
-# CONFIG_BLK_CMDLINE_PARSER is not set
-CONFIG_BLK_WBT=y
-CONFIG_BLK_CGROUP_IOLATENCY=y
-CONFIG_BLK_CGROUP_IOCOST=y
-CONFIG_BLK_WBT_MQ=y
-CONFIG_BLK_DEBUG_FS=y
-CONFIG_BLK_DEBUG_FS_ZONED=y
-CONFIG_BLK_SED_OPAL=y
-
-#
-# Partition Types
-#
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_ACORN_PARTITION is not set
-CONFIG_AIX_PARTITION=y
-# CONFIG_OSF_PARTITION is not set
-# CONFIG_AMIGA_PARTITION is not set
-# CONFIG_ATARI_PARTITION is not set
-CONFIG_MAC_PARTITION=y
-CONFIG_MSDOS_PARTITION=y
-CONFIG_BSD_DISKLABEL=y
-CONFIG_MINIX_SUBPARTITION=y
-CONFIG_SOLARIS_X86_PARTITION=y
-# CONFIG_UNIXWARE_DISKLABEL is not set
-CONFIG_LDM_PARTITION=y
-# CONFIG_LDM_DEBUG is not set
-# CONFIG_SGI_PARTITION is not set
-# CONFIG_ULTRIX_PARTITION is not set
-# CONFIG_SUN_PARTITION is not set
-CONFIG_KARMA_PARTITION=y
-CONFIG_EFI_PARTITION=y
-# CONFIG_SYSV68_PARTITION is not set
-# CONFIG_CMDLINE_PARTITION is not set
-# end of Partition Types
-
-CONFIG_BLOCK_COMPAT=y
-CONFIG_BLK_MQ_PCI=y
-CONFIG_BLK_MQ_VIRTIO=y
-CONFIG_BLK_MQ_RDMA=y
-CONFIG_BLK_PM=y
-
-#
-# IO Schedulers
-#
-CONFIG_MQ_IOSCHED_DEADLINE=y
-CONFIG_MQ_IOSCHED_KYBER=y
-CONFIG_IOSCHED_BFQ=y
-CONFIG_BFQ_GROUP_IOSCHED=y
-# CONFIG_BFQ_CGROUP_DEBUG is not set
-# end of IO Schedulers
-
-CONFIG_PREEMPT_NOTIFIERS=y
-CONFIG_PADATA=y
-CONFIG_ASN1=y
-CONFIG_INLINE_SPIN_UNLOCK_IRQ=y
-CONFIG_INLINE_READ_UNLOCK=y
-CONFIG_INLINE_READ_UNLOCK_IRQ=y
-CONFIG_INLINE_WRITE_UNLOCK=y
-CONFIG_INLINE_WRITE_UNLOCK_IRQ=y
-CONFIG_ARCH_SUPPORTS_ATOMIC_RMW=y
-CONFIG_MUTEX_SPIN_ON_OWNER=y
-CONFIG_RWSEM_SPIN_ON_OWNER=y
-CONFIG_LOCK_SPIN_ON_OWNER=y
-CONFIG_ARCH_USE_QUEUED_SPINLOCKS=y
-CONFIG_QUEUED_SPINLOCKS=y
-CONFIG_ARCH_USE_QUEUED_RWLOCKS=y
-CONFIG_QUEUED_RWLOCKS=y
-CONFIG_ARCH_HAS_SYNC_CORE_BEFORE_USERMODE=y
-CONFIG_ARCH_HAS_SYSCALL_WRAPPER=y
-CONFIG_FREEZER=y
-
-#
-# Executable file formats
-#
-CONFIG_BINFMT_ELF=y
-CONFIG_COMPAT_BINFMT_ELF=y
-CONFIG_ELFCORE=y
-CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
-CONFIG_BINFMT_SCRIPT=y
-CONFIG_BINFMT_MISC=y
-CONFIG_COREDUMP=y
-# end of Executable file formats
-
-#
-# Memory Management options
-#
-CONFIG_SELECT_MEMORY_MODEL=y
-CONFIG_SPARSEMEM_MANUAL=y
-CONFIG_SPARSEMEM=y
-CONFIG_NEED_MULTIPLE_NODES=y
-CONFIG_HAVE_MEMORY_PRESENT=y
-CONFIG_SPARSEMEM_EXTREME=y
-CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
-CONFIG_SPARSEMEM_VMEMMAP=y
-CONFIG_HAVE_MEMBLOCK_NODE_MAP=y
-CONFIG_HAVE_FAST_GUP=y
-CONFIG_MEMORY_ISOLATION=y
-CONFIG_HAVE_BOOTMEM_INFO_NODE=y
-CONFIG_MEMORY_HOTPLUG=y
-CONFIG_MEMORY_HOTPLUG_SPARSE=y
-CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE=y
-CONFIG_MEMORY_HOTREMOVE=y
-CONFIG_SPLIT_PTLOCK_CPUS=4
-CONFIG_MEMORY_BALLOON=y
-CONFIG_BALLOON_COMPACTION=y
-CONFIG_COMPACTION=y
-CONFIG_MIGRATION=y
-CONFIG_CONTIG_ALLOC=y
-CONFIG_PHYS_ADDR_T_64BIT=y
-CONFIG_BOUNCE=y
-CONFIG_VIRT_TO_BUS=y
-CONFIG_MMU_NOTIFIER=y
-CONFIG_KSM=y
-CONFIG_DEFAULT_MMAP_MIN_ADDR=65536
-CONFIG_ARCH_SUPPORTS_MEMORY_FAILURE=y
-CONFIG_MEMORY_FAILURE=y
-CONFIG_HWPOISON_INJECT=m
-CONFIG_TRANSPARENT_HUGEPAGE=y
-# CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS is not set
-CONFIG_TRANSPARENT_HUGEPAGE_MADVISE=y
-CONFIG_ARCH_WANTS_THP_SWAP=y
-CONFIG_THP_SWAP=y
-CONFIG_TRANSPARENT_HUGE_PAGECACHE=y
-CONFIG_CLEANCACHE=y
-CONFIG_FRONTSWAP=y
-# CONFIG_CMA is not set
-CONFIG_MEM_SOFT_DIRTY=y
-CONFIG_ZSWAP=y
-CONFIG_ZPOOL=y
-CONFIG_ZBUD=y
-CONFIG_Z3FOLD=y
-CONFIG_ZSMALLOC=y
-# CONFIG_PGTABLE_MAPPING is not set
-# CONFIG_ZSMALLOC_STAT is not set
-CONFIG_GENERIC_EARLY_IOREMAP=y
-# CONFIG_DEFERRED_STRUCT_PAGE_INIT is not set
-# CONFIG_IDLE_PAGE_TRACKING is not set
-CONFIG_ARCH_HAS_PTE_DEVMAP=y
-CONFIG_ZONE_DEVICE=y
-CONFIG_DEV_PAGEMAP_OPS=y
-CONFIG_HMM_MIRROR=y
-CONFIG_DEVICE_PRIVATE=y
-CONFIG_FRAME_VECTOR=y
-CONFIG_ARCH_USES_HIGH_VMA_FLAGS=y
-CONFIG_ARCH_HAS_PKEYS=y
-# CONFIG_PERCPU_STATS is not set
-# CONFIG_GUP_BENCHMARK is not set
-CONFIG_READ_ONLY_THP_FOR_FS=y
-CONFIG_ARCH_HAS_PTE_SPECIAL=y
-# end of Memory Management options
-
-CONFIG_NET=y
-CONFIG_COMPAT_NETLINK_MESSAGES=y
-CONFIG_NET_INGRESS=y
-CONFIG_NET_EGRESS=y
-CONFIG_NET_REDIRECT=y
-CONFIG_SKB_EXTENSIONS=y
-
-#
-# Networking options
-#
-CONFIG_PACKET=y
-CONFIG_PACKET_DIAG=y
-CONFIG_UNIX=y
-CONFIG_UNIX_SCM=y
-CONFIG_UNIX_DIAG=y
-CONFIG_TLS=m
-CONFIG_TLS_DEVICE=y
-CONFIG_XFRM=y
-CONFIG_XFRM_OFFLOAD=y
-CONFIG_XFRM_ALGO=m
-CONFIG_XFRM_USER=m
-CONFIG_XFRM_INTERFACE=m
-CONFIG_XFRM_SUB_POLICY=y
-CONFIG_XFRM_MIGRATE=y
-CONFIG_XFRM_STATISTICS=y
-CONFIG_XFRM_IPCOMP=m
-CONFIG_NET_KEY=m
-CONFIG_NET_KEY_MIGRATE=y
-CONFIG_SMC=m
-CONFIG_SMC_DIAG=m
-CONFIG_XDP_SOCKETS=y
-CONFIG_XDP_SOCKETS_DIAG=y
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_ADVANCED_ROUTER=y
-# CONFIG_IP_FIB_TRIE_STATS is not set
-CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_ROUTE_MULTIPATH=y
-CONFIG_IP_ROUTE_VERBOSE=y
-CONFIG_IP_ROUTE_CLASSID=y
-# CONFIG_IP_PNP is not set
-CONFIG_NET_IPIP=m
-CONFIG_NET_IPGRE_DEMUX=m
-CONFIG_NET_IP_TUNNEL=m
-CONFIG_NET_IPGRE=m
-# CONFIG_NET_IPGRE_BROADCAST is not set
-CONFIG_IP_MROUTE_COMMON=y
-CONFIG_IP_MROUTE=y
-CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
-CONFIG_IP_PIMSM_V1=y
-CONFIG_IP_PIMSM_V2=y
-CONFIG_SYN_COOKIES=y
-CONFIG_NET_IPVTI=m
-CONFIG_NET_UDP_TUNNEL=m
-CONFIG_NET_FOU=m
-CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_INET_AH=m
-CONFIG_INET_ESP=m
-CONFIG_INET_ESP_OFFLOAD=m
-CONFIG_INET_IPCOMP=m
-CONFIG_INET_XFRM_TUNNEL=m
-CONFIG_INET_TUNNEL=m
-CONFIG_INET_DIAG=m
-CONFIG_INET_TCP_DIAG=m
-CONFIG_INET_UDP_DIAG=m
-CONFIG_INET_RAW_DIAG=m
-CONFIG_INET_DIAG_DESTROY=y
-CONFIG_TCP_CONG_ADVANCED=y
-CONFIG_TCP_CONG_BIC=m
-CONFIG_TCP_CONG_CUBIC=y
-CONFIG_TCP_CONG_WESTWOOD=m
-CONFIG_TCP_CONG_HTCP=m
-CONFIG_TCP_CONG_HSTCP=m
-CONFIG_TCP_CONG_HYBLA=m
-CONFIG_TCP_CONG_VEGAS=m
-CONFIG_TCP_CONG_NV=m
-CONFIG_TCP_CONG_SCALABLE=m
-CONFIG_TCP_CONG_LP=m
-CONFIG_TCP_CONG_VENO=m
-CONFIG_TCP_CONG_YEAH=m
-CONFIG_TCP_CONG_ILLINOIS=m
-CONFIG_TCP_CONG_DCTCP=m
-CONFIG_TCP_CONG_CDG=m
-CONFIG_TCP_CONG_BBR=m
-CONFIG_DEFAULT_CUBIC=y
-# CONFIG_DEFAULT_RENO is not set
-CONFIG_DEFAULT_TCP_CONG="cubic"
-CONFIG_TCP_MD5SIG=y
-CONFIG_IPV6=y
-CONFIG_IPV6_ROUTER_PREF=y
-CONFIG_IPV6_ROUTE_INFO=y
-CONFIG_IPV6_OPTIMISTIC_DAD=y
-CONFIG_INET6_AH=m
-CONFIG_INET6_ESP=m
-CONFIG_INET6_ESP_OFFLOAD=m
-CONFIG_INET6_IPCOMP=m
-CONFIG_IPV6_MIP6=m
-CONFIG_IPV6_ILA=m
-CONFIG_INET6_XFRM_TUNNEL=m
-CONFIG_INET6_TUNNEL=m
-CONFIG_IPV6_VTI=m
-CONFIG_IPV6_SIT=m
-CONFIG_IPV6_SIT_6RD=y
-CONFIG_IPV6_NDISC_NODETYPE=y
-CONFIG_IPV6_TUNNEL=m
-CONFIG_IPV6_GRE=m
-CONFIG_IPV6_FOU=m
-CONFIG_IPV6_FOU_TUNNEL=m
-CONFIG_IPV6_MULTIPLE_TABLES=y
-CONFIG_IPV6_SUBTREES=y
-CONFIG_IPV6_MROUTE=y
-CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y
-CONFIG_IPV6_PIMSM_V2=y
-CONFIG_IPV6_SEG6_LWTUNNEL=y
-CONFIG_IPV6_SEG6_HMAC=y
-CONFIG_IPV6_SEG6_BPF=y
-CONFIG_NETLABEL=y
-CONFIG_NETWORK_SECMARK=y
-CONFIG_NET_PTP_CLASSIFY=y
-CONFIG_NETWORK_PHY_TIMESTAMPING=y
-CONFIG_NETFILTER=y
-CONFIG_NETFILTER_ADVANCED=y
-CONFIG_BRIDGE_NETFILTER=m
-
-#
-# Core Netfilter Configuration
-#
-CONFIG_NETFILTER_INGRESS=y
-CONFIG_NETFILTER_NETLINK=m
-CONFIG_NETFILTER_FAMILY_BRIDGE=y
-CONFIG_NETFILTER_FAMILY_ARP=y
-CONFIG_NETFILTER_NETLINK_ACCT=m
-CONFIG_NETFILTER_NETLINK_QUEUE=m
-CONFIG_NETFILTER_NETLINK_LOG=m
-CONFIG_NETFILTER_NETLINK_OSF=m
-CONFIG_NF_CONNTRACK=m
-CONFIG_NF_LOG_COMMON=m
-CONFIG_NF_LOG_NETDEV=m
-CONFIG_NETFILTER_CONNCOUNT=m
-CONFIG_NF_CONNTRACK_MARK=y
-CONFIG_NF_CONNTRACK_SECMARK=y
-CONFIG_NF_CONNTRACK_ZONES=y
-CONFIG_NF_CONNTRACK_PROCFS=y
-CONFIG_NF_CONNTRACK_EVENTS=y
-CONFIG_NF_CONNTRACK_TIMEOUT=y
-CONFIG_NF_CONNTRACK_TIMESTAMP=y
-CONFIG_NF_CONNTRACK_LABELS=y
-CONFIG_NF_CT_PROTO_DCCP=y
-CONFIG_NF_CT_PROTO_GRE=y
-CONFIG_NF_CT_PROTO_SCTP=y
-CONFIG_NF_CT_PROTO_UDPLITE=y
-CONFIG_NF_CONNTRACK_AMANDA=m
-CONFIG_NF_CONNTRACK_FTP=m
-CONFIG_NF_CONNTRACK_H323=m
-CONFIG_NF_CONNTRACK_IRC=m
-CONFIG_NF_CONNTRACK_BROADCAST=m
-CONFIG_NF_CONNTRACK_NETBIOS_NS=m
-CONFIG_NF_CONNTRACK_SNMP=m
-CONFIG_NF_CONNTRACK_PPTP=m
-CONFIG_NF_CONNTRACK_SANE=m
-CONFIG_NF_CONNTRACK_SIP=m
-CONFIG_NF_CONNTRACK_TFTP=m
-CONFIG_NF_CT_NETLINK=m
-CONFIG_NF_CT_NETLINK_TIMEOUT=m
-CONFIG_NF_CT_NETLINK_HELPER=m
-CONFIG_NETFILTER_NETLINK_GLUE_CT=y
-CONFIG_NF_NAT=m
-CONFIG_NF_NAT_AMANDA=m
-CONFIG_NF_NAT_FTP=m
-CONFIG_NF_NAT_IRC=m
-CONFIG_NF_NAT_SIP=m
-CONFIG_NF_NAT_TFTP=m
-CONFIG_NF_NAT_REDIRECT=y
-CONFIG_NF_NAT_MASQUERADE=y
-CONFIG_NETFILTER_SYNPROXY=m
-CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_SET=m
-CONFIG_NF_TABLES_INET=y
-CONFIG_NF_TABLES_NETDEV=y
-CONFIG_NFT_NUMGEN=m
-CONFIG_NFT_CT=m
-CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NFT_COUNTER=m
-CONFIG_NFT_CONNLIMIT=m
-CONFIG_NFT_LOG=m
-CONFIG_NFT_LIMIT=m
-CONFIG_NFT_MASQ=m
-CONFIG_NFT_REDIR=m
-CONFIG_NFT_NAT=m
-CONFIG_NFT_TUNNEL=m
-CONFIG_NFT_OBJREF=m
-CONFIG_NFT_QUEUE=m
-CONFIG_NFT_QUOTA=m
-CONFIG_NFT_REJECT=m
-CONFIG_NFT_REJECT_INET=m
-CONFIG_NFT_COMPAT=m
-CONFIG_NFT_HASH=m
-CONFIG_NFT_FIB=m
-CONFIG_NFT_FIB_INET=m
-CONFIG_NFT_XFRM=m
-CONFIG_NFT_SOCKET=m
-CONFIG_NFT_OSF=m
-CONFIG_NFT_TPROXY=m
-CONFIG_NFT_SYNPROXY=m
-CONFIG_NF_DUP_NETDEV=m
-CONFIG_NFT_DUP_NETDEV=m
-CONFIG_NFT_FWD_NETDEV=m
-CONFIG_NFT_FIB_NETDEV=m
-CONFIG_NF_FLOW_TABLE_INET=m
-CONFIG_NF_FLOW_TABLE=m
-CONFIG_NETFILTER_XTABLES=m
-
-#
-# Xtables combined modules
-#
-CONFIG_NETFILTER_XT_MARK=m
-CONFIG_NETFILTER_XT_CONNMARK=m
-CONFIG_NETFILTER_XT_SET=m
-
-#
-# Xtables targets
-#
-CONFIG_NETFILTER_XT_TARGET_AUDIT=m
-CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
-CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
-CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
-CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
-CONFIG_NETFILTER_XT_TARGET_CT=m
-CONFIG_NETFILTER_XT_TARGET_DSCP=m
-CONFIG_NETFILTER_XT_TARGET_HL=m
-CONFIG_NETFILTER_XT_TARGET_HMARK=m
-CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
-CONFIG_NETFILTER_XT_TARGET_LED=m
-CONFIG_NETFILTER_XT_TARGET_LOG=m
-CONFIG_NETFILTER_XT_TARGET_MARK=m
-CONFIG_NETFILTER_XT_NAT=m
-CONFIG_NETFILTER_XT_TARGET_NETMAP=m
-CONFIG_NETFILTER_XT_TARGET_NFLOG=m
-CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
-CONFIG_NETFILTER_XT_TARGET_NOTRACK=m
-CONFIG_NETFILTER_XT_TARGET_RATEEST=m
-CONFIG_NETFILTER_XT_TARGET_REDIRECT=m
-CONFIG_NETFILTER_XT_TARGET_MASQUERADE=m
-CONFIG_NETFILTER_XT_TARGET_TEE=m
-CONFIG_NETFILTER_XT_TARGET_TPROXY=m
-CONFIG_NETFILTER_XT_TARGET_TRACE=m
-CONFIG_NETFILTER_XT_TARGET_SECMARK=m
-CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
-CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
-
-#
-# Xtables matches
-#
-CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
-CONFIG_NETFILTER_XT_MATCH_BPF=m
-CONFIG_NETFILTER_XT_MATCH_CGROUP=m
-CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
-CONFIG_NETFILTER_XT_MATCH_COMMENT=m
-CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
-CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m
-CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
-CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
-CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
-CONFIG_NETFILTER_XT_MATCH_CPU=m
-CONFIG_NETFILTER_XT_MATCH_DCCP=m
-CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m
-CONFIG_NETFILTER_XT_MATCH_DSCP=m
-CONFIG_NETFILTER_XT_MATCH_ECN=m
-CONFIG_NETFILTER_XT_MATCH_ESP=m
-CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
-CONFIG_NETFILTER_XT_MATCH_HELPER=m
-CONFIG_NETFILTER_XT_MATCH_HL=m
-CONFIG_NETFILTER_XT_MATCH_IPCOMP=m
-CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
-CONFIG_NETFILTER_XT_MATCH_IPVS=m
-CONFIG_NETFILTER_XT_MATCH_L2TP=m
-CONFIG_NETFILTER_XT_MATCH_LENGTH=m
-CONFIG_NETFILTER_XT_MATCH_LIMIT=m
-CONFIG_NETFILTER_XT_MATCH_MAC=m
-CONFIG_NETFILTER_XT_MATCH_MARK=m
-CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
-CONFIG_NETFILTER_XT_MATCH_NFACCT=m
-CONFIG_NETFILTER_XT_MATCH_OSF=m
-CONFIG_NETFILTER_XT_MATCH_OWNER=m
-CONFIG_NETFILTER_XT_MATCH_POLICY=m
-CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
-CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
-CONFIG_NETFILTER_XT_MATCH_QUOTA=m
-CONFIG_NETFILTER_XT_MATCH_RATEEST=m
-CONFIG_NETFILTER_XT_MATCH_REALM=m
-CONFIG_NETFILTER_XT_MATCH_RECENT=m
-CONFIG_NETFILTER_XT_MATCH_SCTP=m
-CONFIG_NETFILTER_XT_MATCH_SOCKET=m
-CONFIG_NETFILTER_XT_MATCH_STATE=m
-CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
-CONFIG_NETFILTER_XT_MATCH_STRING=m
-CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
-CONFIG_NETFILTER_XT_MATCH_TIME=m
-CONFIG_NETFILTER_XT_MATCH_U32=m
-# end of Core Netfilter Configuration
-
-CONFIG_IP_SET=m
-CONFIG_IP_SET_MAX=256
-CONFIG_IP_SET_BITMAP_IP=m
-CONFIG_IP_SET_BITMAP_IPMAC=m
-CONFIG_IP_SET_BITMAP_PORT=m
-CONFIG_IP_SET_HASH_IP=m
-CONFIG_IP_SET_HASH_IPMARK=m
-CONFIG_IP_SET_HASH_IPPORT=m
-CONFIG_IP_SET_HASH_IPPORTIP=m
-CONFIG_IP_SET_HASH_IPPORTNET=m
-CONFIG_IP_SET_HASH_IPMAC=m
-CONFIG_IP_SET_HASH_MAC=m
-CONFIG_IP_SET_HASH_NETPORTNET=m
-CONFIG_IP_SET_HASH_NET=m
-CONFIG_IP_SET_HASH_NETNET=m
-CONFIG_IP_SET_HASH_NETPORT=m
-CONFIG_IP_SET_HASH_NETIFACE=m
-CONFIG_IP_SET_LIST_SET=m
-CONFIG_IP_VS=m
-CONFIG_IP_VS_IPV6=y
-# CONFIG_IP_VS_DEBUG is not set
-CONFIG_IP_VS_TAB_BITS=15
-
-#
-# IPVS transport protocol load balancing support
-#
-CONFIG_IP_VS_PROTO_TCP=y
-CONFIG_IP_VS_PROTO_UDP=y
-CONFIG_IP_VS_PROTO_AH_ESP=y
-CONFIG_IP_VS_PROTO_ESP=y
-CONFIG_IP_VS_PROTO_AH=y
-CONFIG_IP_VS_PROTO_SCTP=y
-
-#
-# IPVS scheduler
-#
-CONFIG_IP_VS_RR=m
-CONFIG_IP_VS_WRR=m
-CONFIG_IP_VS_LC=m
-CONFIG_IP_VS_WLC=m
-CONFIG_IP_VS_FO=m
-CONFIG_IP_VS_OVF=m
-CONFIG_IP_VS_LBLC=m
-CONFIG_IP_VS_LBLCR=m
-CONFIG_IP_VS_DH=m
-CONFIG_IP_VS_SH=m
-CONFIG_IP_VS_MH=m
-CONFIG_IP_VS_SED=m
-CONFIG_IP_VS_NQ=m
-
-#
-# IPVS SH scheduler
-#
-CONFIG_IP_VS_SH_TAB_BITS=8
-
-#
-# IPVS MH scheduler
-#
-CONFIG_IP_VS_MH_TAB_INDEX=12
-
-#
-# IPVS application helper
-#
-CONFIG_IP_VS_FTP=m
-CONFIG_IP_VS_NFCT=y
-CONFIG_IP_VS_PE_SIP=m
-
-#
-# IP: Netfilter Configuration
-#
-CONFIG_NF_DEFRAG_IPV4=m
-CONFIG_NF_SOCKET_IPV4=m
-CONFIG_NF_TPROXY_IPV4=m
-CONFIG_NF_TABLES_IPV4=y
-CONFIG_NFT_REJECT_IPV4=m
-CONFIG_NFT_DUP_IPV4=m
-CONFIG_NFT_FIB_IPV4=m
-CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_FLOW_TABLE_IPV4=m
-CONFIG_NF_DUP_IPV4=m
-CONFIG_NF_LOG_ARP=m
-CONFIG_NF_LOG_IPV4=m
-CONFIG_NF_REJECT_IPV4=m
-CONFIG_NF_NAT_SNMP_BASIC=m
-CONFIG_NF_NAT_PPTP=m
-CONFIG_NF_NAT_H323=m
-CONFIG_IP_NF_IPTABLES=m
-CONFIG_IP_NF_MATCH_AH=m
-CONFIG_IP_NF_MATCH_ECN=m
-CONFIG_IP_NF_MATCH_RPFILTER=m
-CONFIG_IP_NF_MATCH_TTL=m
-CONFIG_IP_NF_FILTER=m
-CONFIG_IP_NF_TARGET_REJECT=m
-CONFIG_IP_NF_TARGET_SYNPROXY=m
-CONFIG_IP_NF_NAT=m
-CONFIG_IP_NF_TARGET_MASQUERADE=m
-CONFIG_IP_NF_TARGET_NETMAP=m
-CONFIG_IP_NF_TARGET_REDIRECT=m
-CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
-CONFIG_IP_NF_TARGET_ECN=m
-CONFIG_IP_NF_TARGET_TTL=m
-CONFIG_IP_NF_RAW=m
-CONFIG_IP_NF_SECURITY=m
-CONFIG_IP_NF_ARPTABLES=m
-CONFIG_IP_NF_ARPFILTER=m
-CONFIG_IP_NF_ARP_MANGLE=m
-# end of IP: Netfilter Configuration
-
-#
-# IPv6: Netfilter Configuration
-#
-CONFIG_NF_SOCKET_IPV6=m
-CONFIG_NF_TPROXY_IPV6=m
-CONFIG_NF_TABLES_IPV6=y
-CONFIG_NFT_REJECT_IPV6=m
-CONFIG_NFT_DUP_IPV6=m
-CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
-CONFIG_NF_DUP_IPV6=m
-CONFIG_NF_REJECT_IPV6=m
-CONFIG_NF_LOG_IPV6=m
-CONFIG_IP6_NF_IPTABLES=m
-CONFIG_IP6_NF_MATCH_AH=m
-CONFIG_IP6_NF_MATCH_EUI64=m
-CONFIG_IP6_NF_MATCH_FRAG=m
-CONFIG_IP6_NF_MATCH_OPTS=m
-CONFIG_IP6_NF_MATCH_HL=m
-CONFIG_IP6_NF_MATCH_IPV6HEADER=m
-CONFIG_IP6_NF_MATCH_MH=m
-CONFIG_IP6_NF_MATCH_RPFILTER=m
-CONFIG_IP6_NF_MATCH_RT=m
-CONFIG_IP6_NF_MATCH_SRH=m
-CONFIG_IP6_NF_TARGET_HL=m
-CONFIG_IP6_NF_FILTER=m
-CONFIG_IP6_NF_TARGET_REJECT=m
-CONFIG_IP6_NF_TARGET_SYNPROXY=m
-CONFIG_IP6_NF_MANGLE=m
-CONFIG_IP6_NF_RAW=m
-CONFIG_IP6_NF_SECURITY=m
-CONFIG_IP6_NF_NAT=m
-CONFIG_IP6_NF_TARGET_MASQUERADE=m
-CONFIG_IP6_NF_TARGET_NPT=m
-# end of IPv6: Netfilter Configuration
-
-CONFIG_NF_DEFRAG_IPV6=m
-CONFIG_NF_TABLES_BRIDGE=m
-CONFIG_NFT_BRIDGE_META=m
-CONFIG_NFT_BRIDGE_REJECT=m
-CONFIG_NF_LOG_BRIDGE=m
-CONFIG_NF_CONNTRACK_BRIDGE=m
-CONFIG_BRIDGE_NF_EBTABLES=m
-CONFIG_BRIDGE_EBT_BROUTE=m
-CONFIG_BRIDGE_EBT_T_FILTER=m
-CONFIG_BRIDGE_EBT_T_NAT=m
-CONFIG_BRIDGE_EBT_802_3=m
-CONFIG_BRIDGE_EBT_AMONG=m
-CONFIG_BRIDGE_EBT_ARP=m
-CONFIG_BRIDGE_EBT_IP=m
-CONFIG_BRIDGE_EBT_IP6=m
-CONFIG_BRIDGE_EBT_LIMIT=m
-CONFIG_BRIDGE_EBT_MARK=m
-CONFIG_BRIDGE_EBT_PKTTYPE=m
-CONFIG_BRIDGE_EBT_STP=m
-CONFIG_BRIDGE_EBT_VLAN=m
-CONFIG_BRIDGE_EBT_ARPREPLY=m
-CONFIG_BRIDGE_EBT_DNAT=m
-CONFIG_BRIDGE_EBT_MARK_T=m
-CONFIG_BRIDGE_EBT_REDIRECT=m
-CONFIG_BRIDGE_EBT_SNAT=m
-CONFIG_BRIDGE_EBT_LOG=m
-CONFIG_BRIDGE_EBT_NFLOG=m
-# CONFIG_BPFILTER is not set
-CONFIG_IP_DCCP=m
-CONFIG_INET_DCCP_DIAG=m
-
-#
-# DCCP CCIDs Configuration
-#
-# CONFIG_IP_DCCP_CCID2_DEBUG is not set
-CONFIG_IP_DCCP_CCID3=y
-# CONFIG_IP_DCCP_CCID3_DEBUG is not set
-CONFIG_IP_DCCP_TFRC_LIB=y
-# end of DCCP CCIDs Configuration
-
-#
-# DCCP Kernel Hacking
-#
-# CONFIG_IP_DCCP_DEBUG is not set
-# end of DCCP Kernel Hacking
-
-CONFIG_IP_SCTP=m
-# CONFIG_SCTP_DBG_OBJCNT is not set
-# CONFIG_SCTP_DEFAULT_COOKIE_HMAC_MD5 is not set
-CONFIG_SCTP_DEFAULT_COOKIE_HMAC_SHA1=y
-# CONFIG_SCTP_DEFAULT_COOKIE_HMAC_NONE is not set
-CONFIG_SCTP_COOKIE_HMAC_MD5=y
-CONFIG_SCTP_COOKIE_HMAC_SHA1=y
-CONFIG_INET_SCTP_DIAG=m
-CONFIG_RDS=m
-CONFIG_RDS_RDMA=m
-CONFIG_RDS_TCP=m
-# CONFIG_RDS_DEBUG is not set
-CONFIG_TIPC=m
-CONFIG_TIPC_MEDIA_IB=y
-CONFIG_TIPC_MEDIA_UDP=y
-CONFIG_TIPC_DIAG=m
-CONFIG_ATM=m
-CONFIG_ATM_CLIP=m
-# CONFIG_ATM_CLIP_NO_ICMP is not set
-CONFIG_ATM_LANE=m
-CONFIG_ATM_MPOA=m
-CONFIG_ATM_BR2684=m
-# CONFIG_ATM_BR2684_IPFILTER is not set
-CONFIG_L2TP=m
-# CONFIG_L2TP_DEBUGFS is not set
-CONFIG_L2TP_V3=y
-CONFIG_L2TP_IP=m
-CONFIG_L2TP_ETH=m
-CONFIG_STP=m
-CONFIG_GARP=m
-CONFIG_MRP=m
-CONFIG_BRIDGE=m
-CONFIG_BRIDGE_IGMP_SNOOPING=y
-CONFIG_BRIDGE_VLAN_FILTERING=y
-CONFIG_HAVE_NET_DSA=y
-CONFIG_NET_DSA=m
-CONFIG_NET_DSA_TAG_8021Q=m
-CONFIG_NET_DSA_TAG_BRCM_COMMON=m
-CONFIG_NET_DSA_TAG_BRCM=m
-CONFIG_NET_DSA_TAG_BRCM_PREPEND=m
-CONFIG_NET_DSA_TAG_GSWIP=m
-CONFIG_NET_DSA_TAG_DSA=m
-CONFIG_NET_DSA_TAG_EDSA=m
-CONFIG_NET_DSA_TAG_MTK=m
-CONFIG_NET_DSA_TAG_KSZ=m
-CONFIG_NET_DSA_TAG_QCA=m
-CONFIG_NET_DSA_TAG_LAN9303=m
-CONFIG_NET_DSA_TAG_SJA1105=m
-CONFIG_NET_DSA_TAG_TRAILER=m
-CONFIG_VLAN_8021Q=m
-CONFIG_VLAN_8021Q_GVRP=y
-CONFIG_VLAN_8021Q_MVRP=y
-# CONFIG_DECNET is not set
-CONFIG_LLC=m
-CONFIG_LLC2=m
-CONFIG_ATALK=m
-CONFIG_DEV_APPLETALK=m
-CONFIG_IPDDP=m
-CONFIG_IPDDP_ENCAP=y
-# CONFIG_X25 is not set
-# CONFIG_LAPB is not set
-CONFIG_PHONET=m
-CONFIG_6LOWPAN=m
-# CONFIG_6LOWPAN_DEBUGFS is not set
-CONFIG_6LOWPAN_NHC=m
-CONFIG_6LOWPAN_NHC_DEST=m
-CONFIG_6LOWPAN_NHC_FRAGMENT=m
-CONFIG_6LOWPAN_NHC_HOP=m
-CONFIG_6LOWPAN_NHC_IPV6=m
-CONFIG_6LOWPAN_NHC_MOBILITY=m
-CONFIG_6LOWPAN_NHC_ROUTING=m
-CONFIG_6LOWPAN_NHC_UDP=m
-CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m
-CONFIG_6LOWPAN_GHC_UDP=m
-CONFIG_6LOWPAN_GHC_ICMPV6=m
-CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m
-CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
-CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
-CONFIG_IEEE802154=m
-CONFIG_IEEE802154_NL802154_EXPERIMENTAL=y
-CONFIG_IEEE802154_SOCKET=m
-CONFIG_IEEE802154_6LOWPAN=m
-CONFIG_MAC802154=m
-CONFIG_NET_SCHED=y
-
-#
-# Queueing/Scheduling
-#
-CONFIG_NET_SCH_CBQ=m
-CONFIG_NET_SCH_HTB=m
-CONFIG_NET_SCH_HFSC=m
-CONFIG_NET_SCH_ATM=m
-CONFIG_NET_SCH_PRIO=m
-CONFIG_NET_SCH_MULTIQ=m
-CONFIG_NET_SCH_RED=m
-CONFIG_NET_SCH_SFB=m
-CONFIG_NET_SCH_SFQ=m
-CONFIG_NET_SCH_TEQL=m
-CONFIG_NET_SCH_TBF=m
-CONFIG_NET_SCH_CBS=m
-CONFIG_NET_SCH_ETF=m
-CONFIG_NET_SCH_TAPRIO=m
-CONFIG_NET_SCH_GRED=m
-CONFIG_NET_SCH_DSMARK=m
-CONFIG_NET_SCH_NETEM=m
-CONFIG_NET_SCH_DRR=m
-CONFIG_NET_SCH_MQPRIO=m
-CONFIG_NET_SCH_SKBPRIO=m
-CONFIG_NET_SCH_CHOKE=m
-CONFIG_NET_SCH_QFQ=m
-CONFIG_NET_SCH_CODEL=m
-CONFIG_NET_SCH_FQ_CODEL=y
-CONFIG_NET_SCH_CAKE=m
-CONFIG_NET_SCH_FQ=m
-CONFIG_NET_SCH_HHF=m
-CONFIG_NET_SCH_PIE=m
-CONFIG_NET_SCH_INGRESS=m
-CONFIG_NET_SCH_PLUG=m
-CONFIG_NET_SCH_DEFAULT=y
-# CONFIG_DEFAULT_FQ is not set
-# CONFIG_DEFAULT_CODEL is not set
-CONFIG_DEFAULT_FQ_CODEL=y
-# CONFIG_DEFAULT_SFQ is not set
-# CONFIG_DEFAULT_PFIFO_FAST is not set
-CONFIG_DEFAULT_NET_SCH="fq_codel"
-
-#
-# Classification
-#
-CONFIG_NET_CLS=y
-CONFIG_NET_CLS_BASIC=m
-CONFIG_NET_CLS_TCINDEX=m
-CONFIG_NET_CLS_ROUTE4=m
-CONFIG_NET_CLS_FW=m
-CONFIG_NET_CLS_U32=m
-CONFIG_CLS_U32_PERF=y
-CONFIG_CLS_U32_MARK=y
-CONFIG_NET_CLS_RSVP=m
-CONFIG_NET_CLS_RSVP6=m
-CONFIG_NET_CLS_FLOW=m
-CONFIG_NET_CLS_CGROUP=m
-CONFIG_NET_CLS_BPF=m
-CONFIG_NET_CLS_FLOWER=m
-CONFIG_NET_CLS_MATCHALL=m
-CONFIG_NET_EMATCH=y
-CONFIG_NET_EMATCH_STACK=32
-CONFIG_NET_EMATCH_CMP=m
-CONFIG_NET_EMATCH_NBYTE=m
-CONFIG_NET_EMATCH_U32=m
-CONFIG_NET_EMATCH_META=m
-CONFIG_NET_EMATCH_TEXT=m
-CONFIG_NET_EMATCH_CANID=m
-CONFIG_NET_EMATCH_IPSET=m
-CONFIG_NET_EMATCH_IPT=m
-CONFIG_NET_CLS_ACT=y
-CONFIG_NET_ACT_POLICE=m
-CONFIG_NET_ACT_GACT=m
-CONFIG_GACT_PROB=y
-CONFIG_NET_ACT_MIRRED=m
-CONFIG_NET_ACT_SAMPLE=m
-CONFIG_NET_ACT_IPT=m
-CONFIG_NET_ACT_NAT=m
-CONFIG_NET_ACT_PEDIT=m
-CONFIG_NET_ACT_SIMP=m
-CONFIG_NET_ACT_SKBEDIT=m
-CONFIG_NET_ACT_CSUM=m
-CONFIG_NET_ACT_MPLS=m
-CONFIG_NET_ACT_VLAN=m
-CONFIG_NET_ACT_BPF=m
-CONFIG_NET_ACT_CONNMARK=m
-CONFIG_NET_ACT_CTINFO=m
-CONFIG_NET_ACT_SKBMOD=m
-CONFIG_NET_ACT_IFE=m
-CONFIG_NET_ACT_TUNNEL_KEY=m
-CONFIG_NET_ACT_CT=m
-CONFIG_NET_IFE_SKBMARK=m
-CONFIG_NET_IFE_SKBPRIO=m
-CONFIG_NET_IFE_SKBTCINDEX=m
-CONFIG_NET_TC_SKB_EXT=y
-CONFIG_NET_SCH_FIFO=y
-CONFIG_DCB=y
-CONFIG_DNS_RESOLVER=m
-CONFIG_BATMAN_ADV=m
-CONFIG_BATMAN_ADV_BATMAN_V=y
-CONFIG_BATMAN_ADV_BLA=y
-CONFIG_BATMAN_ADV_DAT=y
-CONFIG_BATMAN_ADV_NC=y
-CONFIG_BATMAN_ADV_MCAST=y
-CONFIG_BATMAN_ADV_DEBUGFS=y
-# CONFIG_BATMAN_ADV_DEBUG is not set
-CONFIG_BATMAN_ADV_SYSFS=y
-# CONFIG_BATMAN_ADV_TRACING is not set
-CONFIG_OPENVSWITCH=m
-CONFIG_OPENVSWITCH_GRE=m
-CONFIG_OPENVSWITCH_VXLAN=m
-CONFIG_OPENVSWITCH_GENEVE=m
-CONFIG_VSOCKETS=m
-CONFIG_VSOCKETS_DIAG=m
-CONFIG_VMWARE_VMCI_VSOCKETS=m
-CONFIG_VIRTIO_VSOCKETS=m
-CONFIG_VIRTIO_VSOCKETS_COMMON=m
-CONFIG_HYPERV_VSOCKETS=m
-CONFIG_NETLINK_DIAG=m
-CONFIG_MPLS=y
-CONFIG_NET_MPLS_GSO=m
-CONFIG_MPLS_ROUTING=m
-CONFIG_MPLS_IPTUNNEL=m
-CONFIG_NET_NSH=m
-CONFIG_HSR=m
-CONFIG_NET_SWITCHDEV=y
-CONFIG_NET_L3_MASTER_DEV=y
-CONFIG_NET_NCSI=y
-CONFIG_NCSI_OEM_CMD_GET_MAC=y
-CONFIG_RPS=y
-CONFIG_RFS_ACCEL=y
-CONFIG_XPS=y
-CONFIG_CGROUP_NET_PRIO=y
-CONFIG_CGROUP_NET_CLASSID=y
-CONFIG_NET_RX_BUSY_POLL=y
-CONFIG_BQL=y
-CONFIG_BPF_JIT=y
-CONFIG_BPF_STREAM_PARSER=y
-CONFIG_NET_FLOW_LIMIT=y
-
-#
-# Network testing
-#
-CONFIG_NET_PKTGEN=m
-CONFIG_NET_DROP_MONITOR=y
-# end of Network testing
-# end of Networking options
-
-CONFIG_HAMRADIO=y
-
-#
-# Packet Radio protocols
-#
-CONFIG_AX25=m
-CONFIG_AX25_DAMA_SLAVE=y
-CONFIG_NETROM=m
-CONFIG_ROSE=m
-
-#
-# AX.25 network device drivers
-#
-CONFIG_MKISS=m
-CONFIG_6PACK=m
-CONFIG_BPQETHER=m
-CONFIG_BAYCOM_SER_FDX=m
-CONFIG_BAYCOM_SER_HDX=m
-CONFIG_BAYCOM_PAR=m
-CONFIG_YAM=m
-# end of AX.25 network device drivers
-
-CONFIG_CAN=m
-CONFIG_CAN_RAW=m
-CONFIG_CAN_BCM=m
-CONFIG_CAN_GW=m
-CONFIG_CAN_J1939=m
-
-#
-# CAN Device Drivers
-#
-CONFIG_CAN_VCAN=m
-CONFIG_CAN_VXCAN=m
-CONFIG_CAN_SLCAN=m
-CONFIG_CAN_DEV=m
-CONFIG_CAN_CALC_BITTIMING=y
-CONFIG_CAN_FLEXCAN=m
-CONFIG_CAN_GRCAN=m
-CONFIG_CAN_JANZ_ICAN3=m
-CONFIG_CAN_KVASER_PCIEFD=m
-CONFIG_CAN_C_CAN=m
-CONFIG_CAN_C_CAN_PLATFORM=m
-CONFIG_CAN_C_CAN_PCI=m
-CONFIG_CAN_CC770=m
-# CONFIG_CAN_CC770_ISA is not set
-CONFIG_CAN_CC770_PLATFORM=m
-CONFIG_CAN_IFI_CANFD=m
-CONFIG_CAN_M_CAN=m
-CONFIG_CAN_M_CAN_PLATFORM=m
-CONFIG_CAN_M_CAN_TCAN4X5X=m
-CONFIG_CAN_PEAK_PCIEFD=m
-CONFIG_CAN_SJA1000=m
-CONFIG_CAN_EMS_PCI=m
-# CONFIG_CAN_EMS_PCMCIA is not set
-CONFIG_CAN_F81601=m
-CONFIG_CAN_KVASER_PCI=m
-CONFIG_CAN_PEAK_PCI=m
-CONFIG_CAN_PEAK_PCIEC=y
-CONFIG_CAN_PEAK_PCMCIA=m
-CONFIG_CAN_PLX_PCI=m
-# CONFIG_CAN_SJA1000_ISA is not set
-CONFIG_CAN_SJA1000_PLATFORM=m
-CONFIG_CAN_SOFTING=m
-CONFIG_CAN_SOFTING_CS=m
-
-#
-# CAN SPI interfaces
-#
-CONFIG_CAN_HI311X=m
-CONFIG_CAN_MCP251X=m
-# end of CAN SPI interfaces
-
-#
-# CAN USB interfaces
-#
-CONFIG_CAN_8DEV_USB=m
-CONFIG_CAN_EMS_USB=m
-CONFIG_CAN_ESD_USB2=m
-CONFIG_CAN_GS_USB=m
-CONFIG_CAN_KVASER_USB=m
-CONFIG_CAN_MCBA_USB=m
-CONFIG_CAN_PEAK_USB=m
-CONFIG_CAN_UCAN=m
-# end of CAN USB interfaces
-
-# CONFIG_CAN_DEBUG_DEVICES is not set
-# end of CAN Device Drivers
-
-CONFIG_BT=m
-CONFIG_BT_BREDR=y
-CONFIG_BT_RFCOMM=m
-CONFIG_BT_RFCOMM_TTY=y
-CONFIG_BT_BNEP=m
-CONFIG_BT_BNEP_MC_FILTER=y
-CONFIG_BT_BNEP_PROTO_FILTER=y
-CONFIG_BT_CMTP=m
-CONFIG_BT_HIDP=m
-CONFIG_BT_HS=y
-CONFIG_BT_LE=y
-CONFIG_BT_6LOWPAN=m
-CONFIG_BT_LEDS=y
-# CONFIG_BT_SELFTEST is not set
-CONFIG_BT_DEBUGFS=y
-
-#
-# Bluetooth device drivers
-#
-CONFIG_BT_INTEL=m
-CONFIG_BT_BCM=m
-CONFIG_BT_RTL=m
-CONFIG_BT_QCA=m
-CONFIG_BT_HCIBTUSB=m
-CONFIG_BT_HCIBTUSB_AUTOSUSPEND=y
-CONFIG_BT_HCIBTUSB_BCM=y
-CONFIG_BT_HCIBTUSB_MTK=y
-CONFIG_BT_HCIBTUSB_RTL=y
-CONFIG_BT_HCIBTSDIO=m
-CONFIG_BT_HCIUART=m
-CONFIG_BT_HCIUART_SERDEV=y
-CONFIG_BT_HCIUART_H4=y
-CONFIG_BT_HCIUART_NOKIA=m
-CONFIG_BT_HCIUART_BCSP=y
-CONFIG_BT_HCIUART_ATH3K=y
-CONFIG_BT_HCIUART_LL=y
-CONFIG_BT_HCIUART_3WIRE=y
-CONFIG_BT_HCIUART_INTEL=y
-CONFIG_BT_HCIUART_BCM=y
-CONFIG_BT_HCIUART_RTL=y
-CONFIG_BT_HCIUART_QCA=y
-CONFIG_BT_HCIUART_AG6XX=y
-CONFIG_BT_HCIUART_MRVL=y
-CONFIG_BT_HCIBCM203X=m
-CONFIG_BT_HCIBPA10X=m
-CONFIG_BT_HCIBFUSB=m
-CONFIG_BT_HCIDTL1=m
-CONFIG_BT_HCIBT3C=m
-CONFIG_BT_HCIBLUECARD=m
-CONFIG_BT_HCIVHCI=m
-CONFIG_BT_MRVL=m
-CONFIG_BT_MRVL_SDIO=m
-CONFIG_BT_ATH3K=m
-CONFIG_BT_WILINK=m
-CONFIG_BT_MTKSDIO=m
-CONFIG_BT_MTKUART=m
-CONFIG_BT_HCIRSI=m
-# end of Bluetooth device drivers
-
-CONFIG_AF_RXRPC=m
-CONFIG_AF_RXRPC_IPV6=y
-# CONFIG_AF_RXRPC_INJECT_LOSS is not set
-CONFIG_AF_RXRPC_DEBUG=y
-CONFIG_RXKAD=y
-CONFIG_AF_KCM=m
-CONFIG_STREAM_PARSER=y
-CONFIG_FIB_RULES=y
-CONFIG_WIRELESS=y
-CONFIG_WIRELESS_EXT=y
-CONFIG_WEXT_CORE=y
-CONFIG_WEXT_PROC=y
-CONFIG_WEXT_SPY=y
-CONFIG_WEXT_PRIV=y
-CONFIG_CFG80211=m
-# CONFIG_NL80211_TESTMODE is not set
-# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set
-# CONFIG_CFG80211_CERTIFICATION_ONUS is not set
-CONFIG_CFG80211_REQUIRE_SIGNED_REGDB=y
-CONFIG_CFG80211_USE_KERNEL_REGDB_KEYS=y
-CONFIG_CFG80211_DEFAULT_PS=y
-CONFIG_CFG80211_DEBUGFS=y
-CONFIG_CFG80211_CRDA_SUPPORT=y
-CONFIG_CFG80211_WEXT=y
-CONFIG_CFG80211_WEXT_EXPORT=y
-CONFIG_LIB80211=m
-CONFIG_LIB80211_CRYPT_WEP=m
-CONFIG_LIB80211_CRYPT_CCMP=m
-CONFIG_LIB80211_CRYPT_TKIP=m
-# CONFIG_LIB80211_DEBUG is not set
-CONFIG_MAC80211=m
-CONFIG_MAC80211_HAS_RC=y
-CONFIG_MAC80211_RC_MINSTREL=y
-CONFIG_MAC80211_RC_DEFAULT_MINSTREL=y
-CONFIG_MAC80211_RC_DEFAULT="minstrel_ht"
-CONFIG_MAC80211_MESH=y
-CONFIG_MAC80211_LEDS=y
-CONFIG_MAC80211_DEBUGFS=y
-# CONFIG_MAC80211_MESSAGE_TRACING is not set
-# CONFIG_MAC80211_DEBUG_MENU is not set
-CONFIG_MAC80211_STA_HASH_MAX_SIZE=0
-CONFIG_WIMAX=m
-CONFIG_WIMAX_DEBUG_LEVEL=8
-CONFIG_RFKILL=m
-CONFIG_RFKILL_LEDS=y
-CONFIG_RFKILL_INPUT=y
-CONFIG_RFKILL_GPIO=m
-CONFIG_NET_9P=m
-CONFIG_NET_9P_VIRTIO=m
-CONFIG_NET_9P_XEN=m
-CONFIG_NET_9P_RDMA=m
-# CONFIG_NET_9P_DEBUG is not set
-CONFIG_CAIF=m
-# CONFIG_CAIF_DEBUG is not set
-CONFIG_CAIF_NETDEV=m
-CONFIG_CAIF_USB=m
-CONFIG_CEPH_LIB=m
-CONFIG_CEPH_LIB_PRETTYDEBUG=y
-CONFIG_CEPH_LIB_USE_DNS_RESOLVER=y
-CONFIG_NFC=m
-CONFIG_NFC_DIGITAL=m
-CONFIG_NFC_NCI=m
-CONFIG_NFC_NCI_SPI=m
-CONFIG_NFC_NCI_UART=m
-CONFIG_NFC_HCI=m
-CONFIG_NFC_SHDLC=y
-
-#
-# Near Field Communication (NFC) devices
-#
-CONFIG_NFC_TRF7970A=m
-CONFIG_NFC_MEI_PHY=m
-CONFIG_NFC_SIM=m
-CONFIG_NFC_PORT100=m
-CONFIG_NFC_FDP=m
-CONFIG_NFC_FDP_I2C=m
-CONFIG_NFC_PN544=m
-CONFIG_NFC_PN544_I2C=m
-CONFIG_NFC_PN544_MEI=m
-CONFIG_NFC_PN533=m
-CONFIG_NFC_PN533_USB=m
-CONFIG_NFC_PN533_I2C=m
-CONFIG_NFC_MICROREAD=m
-CONFIG_NFC_MICROREAD_I2C=m
-CONFIG_NFC_MICROREAD_MEI=m
-CONFIG_NFC_MRVL=m
-CONFIG_NFC_MRVL_USB=m
-CONFIG_NFC_MRVL_UART=m
-CONFIG_NFC_MRVL_I2C=m
-CONFIG_NFC_MRVL_SPI=m
-CONFIG_NFC_ST21NFCA=m
-CONFIG_NFC_ST21NFCA_I2C=m
-CONFIG_NFC_ST_NCI=m
-CONFIG_NFC_ST_NCI_I2C=m
-CONFIG_NFC_ST_NCI_SPI=m
-CONFIG_NFC_NXP_NCI=m
-CONFIG_NFC_NXP_NCI_I2C=m
-CONFIG_NFC_S3FWRN5=m
-CONFIG_NFC_S3FWRN5_I2C=m
-CONFIG_NFC_ST95HF=m
-# end of Near Field Communication (NFC) devices
-
-CONFIG_PSAMPLE=m
-CONFIG_NET_IFE=m
-CONFIG_LWTUNNEL=y
-CONFIG_LWTUNNEL_BPF=y
-CONFIG_DST_CACHE=y
-CONFIG_GRO_CELLS=y
-CONFIG_SOCK_VALIDATE_XMIT=y
-CONFIG_NET_SOCK_MSG=y
-CONFIG_NET_DEVLINK=y
-CONFIG_PAGE_POOL=y
-CONFIG_FAILOVER=m
-CONFIG_HAVE_EBPF_JIT=y
-
-#
-# Device Drivers
-#
-CONFIG_HAVE_EISA=y
-# CONFIG_EISA is not set
-CONFIG_HAVE_PCI=y
-CONFIG_PCI=y
-CONFIG_PCI_DOMAINS=y
-CONFIG_PCIEPORTBUS=y
-CONFIG_HOTPLUG_PCI_PCIE=y
-CONFIG_PCIEAER=y
-# CONFIG_PCIEAER_INJECT is not set
-CONFIG_PCIE_ECRC=y
-CONFIG_PCIEASPM=y
-# CONFIG_PCIEASPM_DEBUG is not set
-CONFIG_PCIEASPM_DEFAULT=y
-# CONFIG_PCIEASPM_POWERSAVE is not set
-# CONFIG_PCIEASPM_POWER_SUPERSAVE is not set
-# CONFIG_PCIEASPM_PERFORMANCE is not set
-CONFIG_PCIE_PME=y
-CONFIG_PCIE_DPC=y
-CONFIG_PCIE_PTM=y
-# CONFIG_PCIE_BW is not set
-CONFIG_PCI_MSI=y
-CONFIG_PCI_MSI_IRQ_DOMAIN=y
-CONFIG_PCI_QUIRKS=y
-# CONFIG_PCI_DEBUG is not set
-CONFIG_PCI_REALLOC_ENABLE_AUTO=y
-CONFIG_PCI_STUB=y
-CONFIG_PCI_PF_STUB=m
-CONFIG_XEN_PCIDEV_FRONTEND=m
-CONFIG_PCI_ATS=y
-CONFIG_PCI_ECAM=y
-CONFIG_PCI_LOCKLESS_CONFIG=y
-CONFIG_PCI_IOV=y
-CONFIG_PCI_PRI=y
-CONFIG_PCI_PASID=y
-CONFIG_PCI_P2PDMA=y
-CONFIG_PCI_LABEL=y
-CONFIG_PCI_HYPERV=m
-CONFIG_HOTPLUG_PCI=y
-CONFIG_HOTPLUG_PCI_ACPI=y
-CONFIG_HOTPLUG_PCI_ACPI_IBM=m
-CONFIG_HOTPLUG_PCI_CPCI=y
-CONFIG_HOTPLUG_PCI_CPCI_ZT5550=m
-CONFIG_HOTPLUG_PCI_CPCI_GENERIC=m
-CONFIG_HOTPLUG_PCI_SHPC=y
-
-#
-# PCI controller drivers
-#
-
-#
-# Cadence PCIe controllers support
-#
-CONFIG_PCIE_CADENCE=y
-CONFIG_PCIE_CADENCE_HOST=y
-CONFIG_PCIE_CADENCE_EP=y
-# end of Cadence PCIe controllers support
-
-CONFIG_PCI_FTPCI100=y
-CONFIG_PCI_HOST_COMMON=y
-CONFIG_PCI_HOST_GENERIC=y
-CONFIG_PCIE_XILINX=y
-CONFIG_VMD=m
-CONFIG_PCI_HYPERV_INTERFACE=m
-
-#
-# DesignWare PCI Core Support
-#
-CONFIG_PCIE_DW=y
-CONFIG_PCIE_DW_HOST=y
-CONFIG_PCIE_DW_EP=y
-CONFIG_PCIE_DW_PLAT=y
-CONFIG_PCIE_DW_PLAT_HOST=y
-CONFIG_PCIE_DW_PLAT_EP=y
-CONFIG_PCI_MESON=y
-# end of DesignWare PCI Core Support
-# end of PCI controller drivers
-
-#
-# PCI Endpoint
-#
-CONFIG_PCI_ENDPOINT=y
-CONFIG_PCI_ENDPOINT_CONFIGFS=y
-# CONFIG_PCI_EPF_TEST is not set
-# end of PCI Endpoint
-
-#
-# PCI switch controller drivers
-#
-CONFIG_PCI_SW_SWITCHTEC=m
-# end of PCI switch controller drivers
-
-CONFIG_PCCARD=m
-CONFIG_PCMCIA=m
-CONFIG_PCMCIA_LOAD_CIS=y
-CONFIG_CARDBUS=y
-
-#
-# PC-card bridges
-#
-CONFIG_YENTA=m
-CONFIG_YENTA_O2=y
-CONFIG_YENTA_RICOH=y
-CONFIG_YENTA_TI=y
-CONFIG_YENTA_ENE_TUNE=y
-CONFIG_YENTA_TOSHIBA=y
-CONFIG_PD6729=m
-CONFIG_I82092=m
-CONFIG_PCCARD_NONSTATIC=y
-CONFIG_RAPIDIO=m
-CONFIG_RAPIDIO_TSI721=m
-CONFIG_RAPIDIO_DISC_TIMEOUT=30
-CONFIG_RAPIDIO_ENABLE_RX_TX_PORTS=y
-CONFIG_RAPIDIO_DMA_ENGINE=y
-# CONFIG_RAPIDIO_DEBUG is not set
-CONFIG_RAPIDIO_ENUM_BASIC=m
-CONFIG_RAPIDIO_CHMAN=m
-CONFIG_RAPIDIO_MPORT_CDEV=m
-
-#
-# RapidIO Switch drivers
-#
-CONFIG_RAPIDIO_TSI57X=m
-CONFIG_RAPIDIO_CPS_XX=m
-CONFIG_RAPIDIO_TSI568=m
-CONFIG_RAPIDIO_CPS_GEN2=m
-CONFIG_RAPIDIO_RXS_GEN3=m
-# end of RapidIO Switch drivers
-
-#
-# Generic Driver Options
-#
-# CONFIG_UEVENT_HELPER is not set
-CONFIG_DEVTMPFS=y
-CONFIG_DEVTMPFS_MOUNT=y
-CONFIG_STANDALONE=y
-CONFIG_PREVENT_FIRMWARE_BUILD=y
-
-#
-# Firmware loader
-#
-CONFIG_FW_LOADER=y
-CONFIG_FW_LOADER_PAGED_BUF=y
-CONFIG_EXTRA_FIRMWARE=""
-# CONFIG_FW_LOADER_USER_HELPER is not set
-CONFIG_FW_LOADER_COMPRESS=y
-# end of Firmware loader
-
-CONFIG_WANT_DEV_COREDUMP=y
-CONFIG_ALLOW_DEV_COREDUMP=y
-CONFIG_DEV_COREDUMP=y
-# CONFIG_DEBUG_DRIVER is not set
-# CONFIG_DEBUG_DEVRES is not set
-# CONFIG_DEBUG_TEST_DRIVER_REMOVE is not set
-CONFIG_HMEM_REPORTING=y
-# CONFIG_TEST_ASYNC_DRIVER_PROBE is not set
-CONFIG_SYS_HYPERVISOR=y
-CONFIG_GENERIC_CPU_AUTOPROBE=y
-CONFIG_GENERIC_CPU_VULNERABILITIES=y
-CONFIG_REGMAP=y
-CONFIG_REGMAP_I2C=y
-CONFIG_REGMAP_SLIMBUS=m
-CONFIG_REGMAP_SPI=y
-CONFIG_REGMAP_SPMI=m
-CONFIG_REGMAP_W1=m
-CONFIG_REGMAP_MMIO=y
-CONFIG_REGMAP_IRQ=y
-CONFIG_REGMAP_SCCB=m
-CONFIG_REGMAP_I3C=m
-CONFIG_DMA_SHARED_BUFFER=y
-# CONFIG_DMA_FENCE_TRACE is not set
-# end of Generic Driver Options
-
-#
-# Bus devices
-#
-CONFIG_MOXTET=m
-CONFIG_SIMPLE_PM_BUS=y
-# end of Bus devices
-
-CONFIG_CONNECTOR=y
-CONFIG_PROC_EVENTS=y
-CONFIG_GNSS=m
-CONFIG_GNSS_SERIAL=m
-CONFIG_GNSS_MTK_SERIAL=m
-CONFIG_GNSS_SIRF_SERIAL=m
-CONFIG_GNSS_UBX_SERIAL=m
-CONFIG_MTD=m
-CONFIG_MTD_TESTS=m
-
-#
-# Partition parsers
-#
-CONFIG_MTD_AR7_PARTS=m
-CONFIG_MTD_CMDLINE_PARTS=m
-CONFIG_MTD_OF_PARTS=m
-CONFIG_MTD_REDBOOT_PARTS=m
-CONFIG_MTD_REDBOOT_DIRECTORY_BLOCK=-1
-# CONFIG_MTD_REDBOOT_PARTS_UNALLOCATED is not set
-# CONFIG_MTD_REDBOOT_PARTS_READONLY is not set
-# end of Partition parsers
-
-#
-# User Modules And Translation Layers
-#
-CONFIG_MTD_BLKDEVS=m
-CONFIG_MTD_BLOCK=m
-CONFIG_MTD_BLOCK_RO=m
-CONFIG_FTL=m
-CONFIG_NFTL=m
-CONFIG_NFTL_RW=y
-CONFIG_INFTL=m
-CONFIG_RFD_FTL=m
-CONFIG_SSFDC=m
-CONFIG_SM_FTL=m
-CONFIG_MTD_OOPS=m
-CONFIG_MTD_SWAP=m
-CONFIG_MTD_PARTITIONED_MASTER=y
-
-#
-# RAM/ROM/Flash chip drivers
-#
-CONFIG_MTD_CFI=m
-CONFIG_MTD_JEDECPROBE=m
-CONFIG_MTD_GEN_PROBE=m
-# CONFIG_MTD_CFI_ADV_OPTIONS is not set
-CONFIG_MTD_MAP_BANK_WIDTH_1=y
-CONFIG_MTD_MAP_BANK_WIDTH_2=y
-CONFIG_MTD_MAP_BANK_WIDTH_4=y
-CONFIG_MTD_CFI_I1=y
-CONFIG_MTD_CFI_I2=y
-CONFIG_MTD_CFI_INTELEXT=m
-CONFIG_MTD_CFI_AMDSTD=m
-CONFIG_MTD_CFI_STAA=m
-CONFIG_MTD_CFI_UTIL=m
-CONFIG_MTD_RAM=m
-CONFIG_MTD_ROM=m
-CONFIG_MTD_ABSENT=m
-# end of RAM/ROM/Flash chip drivers
-
-#
-# Mapping drivers for chip access
-#
-CONFIG_MTD_COMPLEX_MAPPINGS=y
-CONFIG_MTD_PHYSMAP=m
-# CONFIG_MTD_PHYSMAP_COMPAT is not set
-CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_MTD_PHYSMAP_VERSATILE=y
-CONFIG_MTD_PHYSMAP_GEMINI=y
-CONFIG_MTD_PHYSMAP_GPIO_ADDR=y
-CONFIG_MTD_SBC_GXX=m
-CONFIG_MTD_AMD76XROM=m
-CONFIG_MTD_ICHXROM=m
-CONFIG_MTD_ESB2ROM=m
-CONFIG_MTD_CK804XROM=m
-CONFIG_MTD_SCB2_FLASH=m
-CONFIG_MTD_NETtel=m
-CONFIG_MTD_L440GX=m
-CONFIG_MTD_PCI=m
-CONFIG_MTD_PCMCIA=m
-# CONFIG_MTD_PCMCIA_ANONYMOUS is not set
-CONFIG_MTD_INTEL_VR_NOR=m
-CONFIG_MTD_PLATRAM=m
-# end of Mapping drivers for chip access
-
-#
-# Self-contained MTD device drivers
-#
-CONFIG_MTD_PMC551=m
-# CONFIG_MTD_PMC551_BUGFIX is not set
-# CONFIG_MTD_PMC551_DEBUG is not set
-CONFIG_MTD_DATAFLASH=m
-# CONFIG_MTD_DATAFLASH_WRITE_VERIFY is not set
-CONFIG_MTD_DATAFLASH_OTP=y
-CONFIG_MTD_MCHP23K256=m
-CONFIG_MTD_SST25L=m
-CONFIG_MTD_SLRAM=m
-CONFIG_MTD_PHRAM=m
-CONFIG_MTD_MTDRAM=m
-CONFIG_MTDRAM_TOTAL_SIZE=4096
-CONFIG_MTDRAM_ERASE_SIZE=128
-CONFIG_MTD_BLOCK2MTD=m
-
-#
-# Disk-On-Chip Device Drivers
-#
-CONFIG_MTD_DOCG3=m
-CONFIG_BCH_CONST_M=14
-CONFIG_BCH_CONST_T=4
-# end of Self-contained MTD device drivers
-
-CONFIG_MTD_NAND_CORE=m
-CONFIG_MTD_ONENAND=m
-# CONFIG_MTD_ONENAND_VERIFY_WRITE is not set
-CONFIG_MTD_ONENAND_GENERIC=m
-CONFIG_MTD_ONENAND_OTP=y
-CONFIG_MTD_ONENAND_2X_PROGRAM=y
-CONFIG_MTD_NAND_ECC_SW_HAMMING=m
-CONFIG_MTD_NAND_ECC_SW_HAMMING_SMC=y
-CONFIG_MTD_RAW_NAND=m
-CONFIG_MTD_NAND_ECC_SW_BCH=y
-
-#
-# Raw/parallel NAND flash controllers
-#
-CONFIG_MTD_NAND_DENALI=m
-CONFIG_MTD_NAND_DENALI_PCI=m
-CONFIG_MTD_NAND_DENALI_DT=m
-CONFIG_MTD_NAND_CAFE=m
-CONFIG_MTD_NAND_MXIC=m
-CONFIG_MTD_NAND_GPIO=m
-CONFIG_MTD_NAND_PLATFORM=m
-
-#
-# Misc
-#
-CONFIG_MTD_SM_COMMON=m
-CONFIG_MTD_NAND_NANDSIM=m
-CONFIG_MTD_NAND_RICOH=m
-CONFIG_MTD_NAND_DISKONCHIP=m
-# CONFIG_MTD_NAND_DISKONCHIP_PROBE_ADVANCED is not set
-CONFIG_MTD_NAND_DISKONCHIP_PROBE_ADDRESS=0
-CONFIG_MTD_NAND_DISKONCHIP_BBTWRITE=y
-CONFIG_MTD_SPI_NAND=m
-
-#
-# LPDDR & LPDDR2 PCM memory drivers
-#
-CONFIG_MTD_LPDDR=m
-CONFIG_MTD_QINFO_PROBE=m
-# end of LPDDR & LPDDR2 PCM memory drivers
-
-CONFIG_MTD_SPI_NOR=m
-CONFIG_MTD_SPI_NOR_USE_4K_SECTORS=y
-CONFIG_SPI_MTK_QUADSPI=m
-CONFIG_SPI_INTEL_SPI=m
-CONFIG_SPI_INTEL_SPI_PCI=m
-CONFIG_SPI_INTEL_SPI_PLATFORM=m
-CONFIG_MTD_UBI=m
-CONFIG_MTD_UBI_WL_THRESHOLD=4096
-CONFIG_MTD_UBI_BEB_LIMIT=20
-CONFIG_MTD_UBI_FASTMAP=y
-CONFIG_MTD_UBI_GLUEBI=m
-CONFIG_MTD_UBI_BLOCK=y
-CONFIG_MTD_HYPERBUS=m
-CONFIG_DTC=y
-CONFIG_OF=y
-# CONFIG_OF_UNITTEST is not set
-CONFIG_OF_FLATTREE=y
-CONFIG_OF_KOBJ=y
-CONFIG_OF_DYNAMIC=y
-CONFIG_OF_ADDRESS=y
-CONFIG_OF_IRQ=y
-CONFIG_OF_NET=y
-CONFIG_OF_MDIO=m
-CONFIG_OF_RESOLVE=y
-CONFIG_OF_OVERLAY=y
-CONFIG_ARCH_MIGHT_HAVE_PC_PARPORT=y
-CONFIG_PARPORT=m
-CONFIG_PARPORT_PC=m
-CONFIG_PARPORT_SERIAL=m
-CONFIG_PARPORT_PC_FIFO=y
-CONFIG_PARPORT_PC_SUPERIO=y
-CONFIG_PARPORT_PC_PCMCIA=m
-CONFIG_PARPORT_AX88796=m
-CONFIG_PARPORT_1284=y
-CONFIG_PARPORT_NOT_PC=y
-CONFIG_PNP=y
-CONFIG_PNP_DEBUG_MESSAGES=y
-
-#
-# Protocols
-#
-CONFIG_PNPACPI=y
-CONFIG_BLK_DEV=y
-# CONFIG_BLK_DEV_NULL_BLK is not set
-CONFIG_BLK_DEV_FD=m
-CONFIG_CDROM=m
-# CONFIG_PARIDE is not set
-CONFIG_BLK_DEV_PCIESSD_MTIP32XX=m
-CONFIG_ZRAM=m
-CONFIG_ZRAM_WRITEBACK=y
-# CONFIG_ZRAM_MEMORY_TRACKING is not set
-CONFIG_BLK_DEV_UMEM=m
-CONFIG_BLK_DEV_LOOP=m
-CONFIG_BLK_DEV_LOOP_MIN_COUNT=8
-CONFIG_BLK_DEV_CRYPTOLOOP=m
-CONFIG_BLK_DEV_DRBD=m
-# CONFIG_DRBD_FAULT_INJECTION is not set
-CONFIG_BLK_DEV_NBD=m
-CONFIG_BLK_DEV_SKD=m
-CONFIG_BLK_DEV_SX8=m
-CONFIG_BLK_DEV_RAM=m
-CONFIG_BLK_DEV_RAM_COUNT=16
-CONFIG_BLK_DEV_RAM_SIZE=16384
-CONFIG_CDROM_PKTCDVD=m
-CONFIG_CDROM_PKTCDVD_BUFFERS=8
-# CONFIG_CDROM_PKTCDVD_WCACHE is not set
-CONFIG_ATA_OVER_ETH=m
-CONFIG_XEN_BLKDEV_FRONTEND=m
-CONFIG_XEN_BLKDEV_BACKEND=m
-CONFIG_VIRTIO_BLK=m
-# CONFIG_VIRTIO_BLK_SCSI is not set
-CONFIG_BLK_DEV_RBD=m
-CONFIG_BLK_DEV_RSXX=m
-
-#
-# NVME Support
-#
-CONFIG_NVME_CORE=y
-CONFIG_BLK_DEV_NVME=y
-CONFIG_NVME_MULTIPATH=y
-CONFIG_NVME_FABRICS=m
-CONFIG_NVME_RDMA=m
-CONFIG_NVME_FC=m
-CONFIG_NVME_TCP=m
-CONFIG_NVME_TARGET=m
-CONFIG_NVME_TARGET_LOOP=m
-CONFIG_NVME_TARGET_RDMA=m
-CONFIG_NVME_TARGET_FC=m
-CONFIG_NVME_TARGET_FCLOOP=m
-CONFIG_NVME_TARGET_TCP=m
-# end of NVME Support
-
-#
-# Misc devices
-#
-CONFIG_SENSORS_LIS3LV02D=m
-CONFIG_AD525X_DPOT=m
-CONFIG_AD525X_DPOT_I2C=m
-CONFIG_AD525X_DPOT_SPI=m
-# CONFIG_DUMMY_IRQ is not set
-CONFIG_IBM_ASM=m
-CONFIG_PHANTOM=m
-CONFIG_TIFM_CORE=m
-CONFIG_TIFM_7XX1=m
-CONFIG_ICS932S401=m
-CONFIG_ENCLOSURE_SERVICES=m
-CONFIG_HP_ILO=m
-CONFIG_APDS9802ALS=m
-CONFIG_ISL29003=m
-CONFIG_ISL29020=m
-CONFIG_SENSORS_TSL2550=m
-CONFIG_SENSORS_BH1770=m
-CONFIG_SENSORS_APDS990X=m
-CONFIG_HMC6352=m
-CONFIG_DS1682=m
-CONFIG_VMWARE_BALLOON=m
-CONFIG_LATTICE_ECP3_CONFIG=m
-# CONFIG_SRAM is not set
-CONFIG_PCI_ENDPOINT_TEST=m
-CONFIG_XILINX_SDFEC=m
-CONFIG_MISC_RTSX=m
-CONFIG_PVPANIC=m
-CONFIG_C2PORT=m
-CONFIG_C2PORT_DURAMAR_2150=m
-
-#
-# EEPROM support
-#
-CONFIG_EEPROM_AT24=m
-# CONFIG_EEPROM_AT25 is not set
-CONFIG_EEPROM_LEGACY=m
-CONFIG_EEPROM_MAX6875=m
-CONFIG_EEPROM_93CX6=m
-# CONFIG_EEPROM_93XX46 is not set
-CONFIG_EEPROM_IDT_89HPESX=m
-CONFIG_EEPROM_EE1004=m
-# end of EEPROM support
-
-CONFIG_CB710_CORE=m
-# CONFIG_CB710_DEBUG is not set
-CONFIG_CB710_DEBUG_ASSUMPTIONS=y
-
-#
-# Texas Instruments shared transport line discipline
-#
-CONFIG_TI_ST=m
-# end of Texas Instruments shared transport line discipline
-
-CONFIG_SENSORS_LIS3_I2C=m
-CONFIG_ALTERA_STAPL=m
-CONFIG_INTEL_MEI=m
-CONFIG_INTEL_MEI_ME=m
-CONFIG_INTEL_MEI_TXE=m
-CONFIG_INTEL_MEI_HDCP=m
-CONFIG_VMWARE_VMCI=m
-
-#
-# Intel MIC & related support
-#
-
-#
-# Intel MIC Bus Driver
-#
-CONFIG_INTEL_MIC_BUS=m
-
-#
-# SCIF Bus Driver
-#
-CONFIG_SCIF_BUS=m
-
-#
-# VOP Bus Driver
-#
-CONFIG_VOP_BUS=m
-
-#
-# Intel MIC Host Driver
-#
-CONFIG_INTEL_MIC_HOST=m
-
-#
-# Intel MIC Card Driver
-#
-CONFIG_INTEL_MIC_CARD=m
-
-#
-# SCIF Driver
-#
-CONFIG_SCIF=m
-
-#
-# Intel MIC Coprocessor State Management (COSM) Drivers
-#
-CONFIG_MIC_COSM=m
-
-#
-# VOP Driver
-#
-CONFIG_VOP=m
-CONFIG_VHOST_RING=m
-# end of Intel MIC & related support
-
-CONFIG_GENWQE=m
-CONFIG_GENWQE_PLATFORM_ERROR_RECOVERY=0
-CONFIG_ECHO=m
-CONFIG_MISC_ALCOR_PCI=m
-CONFIG_MISC_RTSX_PCI=m
-CONFIG_MISC_RTSX_USB=m
-CONFIG_HABANA_AI=m
-# end of Misc devices
-
-CONFIG_HAVE_IDE=y
-# CONFIG_IDE is not set
-
-#
-# SCSI device support
-#
-CONFIG_SCSI_MOD=m
-CONFIG_RAID_ATTRS=m
-CONFIG_SCSI=m
-CONFIG_SCSI_DMA=y
-CONFIG_SCSI_NETLINK=y
-CONFIG_SCSI_PROC_FS=y
-
-#
-# SCSI support type (disk, tape, CD-ROM)
-#
-CONFIG_BLK_DEV_SD=m
-CONFIG_CHR_DEV_ST=m
-CONFIG_BLK_DEV_SR=m
-CONFIG_CHR_DEV_SG=m
-CONFIG_CHR_DEV_SCH=m
-CONFIG_SCSI_ENCLOSURE=m
-CONFIG_SCSI_CONSTANTS=y
-CONFIG_SCSI_LOGGING=y
-CONFIG_SCSI_SCAN_ASYNC=y
-
-#
-# SCSI Transports
-#
-CONFIG_SCSI_SPI_ATTRS=m
-CONFIG_SCSI_FC_ATTRS=m
-CONFIG_SCSI_ISCSI_ATTRS=m
-CONFIG_SCSI_SAS_ATTRS=m
-CONFIG_SCSI_SAS_LIBSAS=m
-CONFIG_SCSI_SAS_ATA=y
-CONFIG_SCSI_SAS_HOST_SMP=y
-CONFIG_SCSI_SRP_ATTRS=m
-# end of SCSI Transports
-
-CONFIG_SCSI_LOWLEVEL=y
-CONFIG_ISCSI_TCP=m
-CONFIG_ISCSI_BOOT_SYSFS=m
-CONFIG_SCSI_CXGB3_ISCSI=m
-CONFIG_SCSI_CXGB4_ISCSI=m
-CONFIG_SCSI_BNX2_ISCSI=m
-CONFIG_SCSI_BNX2X_FCOE=m
-CONFIG_BE2ISCSI=m
-CONFIG_BLK_DEV_3W_XXXX_RAID=m
-CONFIG_SCSI_HPSA=m
-CONFIG_SCSI_3W_9XXX=m
-CONFIG_SCSI_3W_SAS=m
-CONFIG_SCSI_ACARD=m
-CONFIG_SCSI_AACRAID=m
-CONFIG_SCSI_AIC7XXX=m
-CONFIG_AIC7XXX_CMDS_PER_DEVICE=32
-CONFIG_AIC7XXX_RESET_DELAY_MS=15000
-CONFIG_AIC7XXX_DEBUG_ENABLE=y
-CONFIG_AIC7XXX_DEBUG_MASK=0
-CONFIG_AIC7XXX_REG_PRETTY_PRINT=y
-CONFIG_SCSI_AIC79XX=m
-CONFIG_AIC79XX_CMDS_PER_DEVICE=32
-CONFIG_AIC79XX_RESET_DELAY_MS=15000
-CONFIG_AIC79XX_DEBUG_ENABLE=y
-CONFIG_AIC79XX_DEBUG_MASK=0
-CONFIG_AIC79XX_REG_PRETTY_PRINT=y
-CONFIG_SCSI_AIC94XX=m
-CONFIG_AIC94XX_DEBUG=y
-CONFIG_SCSI_MVSAS=m
-CONFIG_SCSI_MVSAS_DEBUG=y
-CONFIG_SCSI_MVSAS_TASKLET=y
-CONFIG_SCSI_MVUMI=m
-CONFIG_SCSI_DPT_I2O=m
-CONFIG_SCSI_ADVANSYS=m
-CONFIG_SCSI_ARCMSR=m
-CONFIG_SCSI_ESAS2R=m
-CONFIG_MEGARAID_NEWGEN=y
-CONFIG_MEGARAID_MM=m
-CONFIG_MEGARAID_MAILBOX=m
-CONFIG_MEGARAID_LEGACY=m
-CONFIG_MEGARAID_SAS=m
-CONFIG_SCSI_MPT3SAS=m
-CONFIG_SCSI_MPT2SAS_MAX_SGE=128
-CONFIG_SCSI_MPT3SAS_MAX_SGE=128
-CONFIG_SCSI_MPT2SAS=m
-CONFIG_SCSI_SMARTPQI=m
-CONFIG_SCSI_UFSHCD=m
-CONFIG_SCSI_UFSHCD_PCI=m
-# CONFIG_SCSI_UFS_DWC_TC_PCI is not set
-CONFIG_SCSI_UFSHCD_PLATFORM=m
-CONFIG_SCSI_UFS_CDNS_PLATFORM=m
-# CONFIG_SCSI_UFS_DWC_TC_PLATFORM is not set
-CONFIG_SCSI_UFS_BSG=y
-CONFIG_SCSI_HPTIOP=m
-CONFIG_SCSI_BUSLOGIC=m
-CONFIG_SCSI_FLASHPOINT=y
-CONFIG_SCSI_MYRB=m
-CONFIG_SCSI_MYRS=m
-CONFIG_VMWARE_PVSCSI=m
-CONFIG_XEN_SCSI_FRONTEND=m
-CONFIG_HYPERV_STORAGE=m
-CONFIG_LIBFC=m
-CONFIG_LIBFCOE=m
-CONFIG_FCOE=m
-CONFIG_FCOE_FNIC=m
-CONFIG_SCSI_SNIC=m
-# CONFIG_SCSI_SNIC_DEBUG_FS is not set
-CONFIG_SCSI_DMX3191D=m
-CONFIG_SCSI_FDOMAIN=m
-CONFIG_SCSI_FDOMAIN_PCI=m
-CONFIG_SCSI_GDTH=m
-CONFIG_SCSI_ISCI=m
-CONFIG_SCSI_IPS=m
-CONFIG_SCSI_INITIO=m
-CONFIG_SCSI_INIA100=m
-CONFIG_SCSI_PPA=m
-CONFIG_SCSI_IMM=m
-# CONFIG_SCSI_IZIP_EPP16 is not set
-# CONFIG_SCSI_IZIP_SLOW_CTR is not set
-CONFIG_SCSI_STEX=m
-CONFIG_SCSI_SYM53C8XX_2=m
-CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1
-CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
-CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
-CONFIG_SCSI_SYM53C8XX_MMIO=y
-CONFIG_SCSI_IPR=m
-CONFIG_SCSI_IPR_TRACE=y
-CONFIG_SCSI_IPR_DUMP=y
-CONFIG_SCSI_QLOGIC_1280=m
-CONFIG_SCSI_QLA_FC=m
-CONFIG_TCM_QLA2XXX=m
-# CONFIG_TCM_QLA2XXX_DEBUG is not set
-CONFIG_SCSI_QLA_ISCSI=m
-CONFIG_QEDI=m
-CONFIG_QEDF=m
-CONFIG_SCSI_LPFC=m
-# CONFIG_SCSI_LPFC_DEBUG_FS is not set
-CONFIG_SCSI_DC395x=m
-CONFIG_SCSI_AM53C974=m
-CONFIG_SCSI_WD719X=m
-CONFIG_SCSI_DEBUG=m
-CONFIG_SCSI_PMCRAID=m
-CONFIG_SCSI_PM8001=m
-CONFIG_SCSI_BFA_FC=m
-CONFIG_SCSI_VIRTIO=m
-CONFIG_SCSI_CHELSIO_FCOE=m
-CONFIG_SCSI_LOWLEVEL_PCMCIA=y
-CONFIG_PCMCIA_AHA152X=m
-CONFIG_PCMCIA_FDOMAIN=m
-CONFIG_PCMCIA_QLOGIC=m
-CONFIG_PCMCIA_SYM53C500=m
-CONFIG_SCSI_DH=y
-CONFIG_SCSI_DH_RDAC=m
-CONFIG_SCSI_DH_HP_SW=m
-CONFIG_SCSI_DH_EMC=m
-CONFIG_SCSI_DH_ALUA=m
-# end of SCSI device support
-
-CONFIG_ATA=m
-CONFIG_ATA_VERBOSE_ERROR=y
-CONFIG_ATA_ACPI=y
-CONFIG_SATA_ZPODD=y
-CONFIG_SATA_PMP=y
-
-#
-# Controllers with non-SFF native interface
-#
-CONFIG_SATA_AHCI=m
-CONFIG_SATA_MOBILE_LPM_POLICY=3
-CONFIG_SATA_AHCI_PLATFORM=m
-CONFIG_AHCI_CEVA=m
-CONFIG_AHCI_QORIQ=m
-CONFIG_SATA_INIC162X=m
-CONFIG_SATA_ACARD_AHCI=m
-CONFIG_SATA_SIL24=m
-CONFIG_ATA_SFF=y
-
-#
-# SFF controllers with custom DMA interface
-#
-CONFIG_PDC_ADMA=m
-CONFIG_SATA_QSTOR=m
-CONFIG_SATA_SX4=m
-CONFIG_ATA_BMDMA=y
-
-#
-# SATA SFF controllers with BMDMA
-#
-CONFIG_ATA_PIIX=m
-CONFIG_SATA_DWC=m
-# CONFIG_SATA_DWC_OLD_DMA is not set
-# CONFIG_SATA_DWC_DEBUG is not set
-CONFIG_SATA_MV=m
-CONFIG_SATA_NV=m
-CONFIG_SATA_PROMISE=m
-CONFIG_SATA_SIL=m
-CONFIG_SATA_SIS=m
-CONFIG_SATA_SVW=m
-CONFIG_SATA_ULI=m
-CONFIG_SATA_VIA=m
-CONFIG_SATA_VITESSE=m
-
-#
-# PATA SFF controllers with BMDMA
-#
-CONFIG_PATA_ALI=m
-CONFIG_PATA_AMD=m
-CONFIG_PATA_ARTOP=m
-CONFIG_PATA_ATIIXP=m
-CONFIG_PATA_ATP867X=m
-CONFIG_PATA_CMD64X=m
-CONFIG_PATA_CYPRESS=m
-CONFIG_PATA_EFAR=m
-CONFIG_PATA_HPT366=m
-CONFIG_PATA_HPT37X=m
-CONFIG_PATA_HPT3X2N=m
-CONFIG_PATA_HPT3X3=m
-CONFIG_PATA_HPT3X3_DMA=y
-CONFIG_PATA_IT8213=m
-CONFIG_PATA_IT821X=m
-CONFIG_PATA_JMICRON=m
-CONFIG_PATA_MARVELL=m
-CONFIG_PATA_NETCELL=m
-CONFIG_PATA_NINJA32=m
-CONFIG_PATA_NS87415=m
-CONFIG_PATA_OLDPIIX=m
-CONFIG_PATA_OPTIDMA=m
-CONFIG_PATA_PDC2027X=m
-CONFIG_PATA_PDC_OLD=m
-CONFIG_PATA_RADISYS=m
-CONFIG_PATA_RDC=m
-CONFIG_PATA_SCH=m
-CONFIG_PATA_SERVERWORKS=m
-CONFIG_PATA_SIL680=m
-CONFIG_PATA_SIS=m
-CONFIG_PATA_TOSHIBA=m
-CONFIG_PATA_TRIFLEX=m
-CONFIG_PATA_VIA=m
-CONFIG_PATA_WINBOND=m
-
-#
-# PIO-only SFF controllers
-#
-CONFIG_PATA_CMD640_PCI=m
-CONFIG_PATA_MPIIX=m
-CONFIG_PATA_NS87410=m
-CONFIG_PATA_OPTI=m
-CONFIG_PATA_PCMCIA=m
-# CONFIG_PATA_PLATFORM is not set
-CONFIG_PATA_RZ1000=m
-
-#
-# Generic fallback / legacy drivers
-#
-CONFIG_PATA_ACPI=m
-CONFIG_ATA_GENERIC=m
-CONFIG_PATA_LEGACY=m
-CONFIG_MD=y
-CONFIG_BLK_DEV_MD=m
-CONFIG_MD_LINEAR=m
-CONFIG_MD_RAID0=m
-CONFIG_MD_RAID1=m
-CONFIG_MD_RAID10=m
-CONFIG_MD_RAID456=m
-CONFIG_MD_MULTIPATH=m
-CONFIG_MD_FAULTY=m
-CONFIG_MD_CLUSTER=m
-CONFIG_BCACHE=m
-# CONFIG_BCACHE_DEBUG is not set
-# CONFIG_BCACHE_CLOSURES_DEBUG is not set
-CONFIG_BLK_DEV_DM_BUILTIN=y
-CONFIG_BLK_DEV_DM=m
-CONFIG_DM_DEBUG=y
-CONFIG_DM_BUFIO=m
-# CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING is not set
-CONFIG_DM_BIO_PRISON=m
-CONFIG_DM_PERSISTENT_DATA=m
-CONFIG_DM_UNSTRIPED=m
-CONFIG_DM_CRYPT=m
-CONFIG_DM_SNAPSHOT=m
-CONFIG_DM_THIN_PROVISIONING=m
-CONFIG_DM_CACHE=m
-CONFIG_DM_CACHE_SMQ=m
-CONFIG_DM_WRITECACHE=m
-CONFIG_DM_ERA=m
-CONFIG_DM_CLONE=m
-CONFIG_DM_MIRROR=m
-CONFIG_DM_LOG_USERSPACE=m
-CONFIG_DM_RAID=m
-CONFIG_DM_ZERO=m
-CONFIG_DM_MULTIPATH=m
-CONFIG_DM_MULTIPATH_QL=m
-CONFIG_DM_MULTIPATH_ST=m
-CONFIG_DM_DELAY=m
-CONFIG_DM_DUST=m
-CONFIG_DM_UEVENT=y
-CONFIG_DM_FLAKEY=m
-CONFIG_DM_VERITY=m
-CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG=y
-CONFIG_DM_VERITY_FEC=y
-CONFIG_DM_SWITCH=m
-CONFIG_DM_LOG_WRITES=m
-CONFIG_DM_INTEGRITY=m
-CONFIG_DM_ZONED=m
-CONFIG_TARGET_CORE=m
-CONFIG_TCM_IBLOCK=m
-CONFIG_TCM_FILEIO=m
-CONFIG_TCM_PSCSI=m
-CONFIG_TCM_USER2=m
-CONFIG_LOOPBACK_TARGET=m
-CONFIG_TCM_FC=m
-CONFIG_ISCSI_TARGET=m
-CONFIG_ISCSI_TARGET_CXGB4=m
-CONFIG_SBP_TARGET=m
-CONFIG_FUSION=y
-CONFIG_FUSION_SPI=m
-CONFIG_FUSION_FC=m
-CONFIG_FUSION_SAS=m
-CONFIG_FUSION_MAX_SGE=128
-CONFIG_FUSION_CTL=m
-CONFIG_FUSION_LAN=m
-# CONFIG_FUSION_LOGGING is not set
-
-#
-# IEEE 1394 (FireWire) support
-#
-CONFIG_FIREWIRE=m
-CONFIG_FIREWIRE_OHCI=m
-CONFIG_FIREWIRE_SBP2=m
-CONFIG_FIREWIRE_NET=m
-CONFIG_FIREWIRE_NOSY=m
-# end of IEEE 1394 (FireWire) support
-
-CONFIG_MACINTOSH_DRIVERS=y
-CONFIG_MAC_EMUMOUSEBTN=m
-CONFIG_NETDEVICES=y
-CONFIG_MII=m
-CONFIG_NET_CORE=y
-CONFIG_BONDING=m
-CONFIG_DUMMY=m
-CONFIG_EQUALIZER=m
-CONFIG_NET_FC=y
-CONFIG_IFB=m
-CONFIG_NET_TEAM=m
-CONFIG_NET_TEAM_MODE_BROADCAST=m
-CONFIG_NET_TEAM_MODE_ROUNDROBIN=m
-CONFIG_NET_TEAM_MODE_RANDOM=m
-CONFIG_NET_TEAM_MODE_ACTIVEBACKUP=m
-CONFIG_NET_TEAM_MODE_LOADBALANCE=m
-CONFIG_MACVLAN=m
-CONFIG_MACVTAP=m
-CONFIG_IPVLAN_L3S=y
-CONFIG_IPVLAN=m
-CONFIG_IPVTAP=m
-CONFIG_VXLAN=m
-CONFIG_GENEVE=m
-CONFIG_GTP=m
-CONFIG_MACSEC=m
-CONFIG_NETCONSOLE=m
-CONFIG_NETCONSOLE_DYNAMIC=y
-CONFIG_NETPOLL=y
-CONFIG_NET_POLL_CONTROLLER=y
-CONFIG_NTB_NETDEV=m
-CONFIG_RIONET=m
-CONFIG_RIONET_TX_SIZE=128
-CONFIG_RIONET_RX_SIZE=128
-CONFIG_TUN=m
-CONFIG_TAP=m
-# CONFIG_TUN_VNET_CROSS_LE is not set
-CONFIG_VETH=m
-CONFIG_VIRTIO_NET=m
-CONFIG_NLMON=m
-CONFIG_NET_VRF=m
-CONFIG_VSOCKMON=m
-CONFIG_SUNGEM_PHY=m
-# CONFIG_ARCNET is not set
-CONFIG_ATM_DRIVERS=y
-# CONFIG_ATM_DUMMY is not set
-CONFIG_ATM_TCP=m
-CONFIG_ATM_LANAI=m
-CONFIG_ATM_ENI=m
-# CONFIG_ATM_ENI_DEBUG is not set
-# CONFIG_ATM_ENI_TUNE_BURST is not set
-CONFIG_ATM_FIRESTREAM=m
-CONFIG_ATM_ZATM=m
-# CONFIG_ATM_ZATM_DEBUG is not set
-CONFIG_ATM_NICSTAR=m
-# CONFIG_ATM_NICSTAR_USE_SUNI is not set
-# CONFIG_ATM_NICSTAR_USE_IDT77105 is not set
-CONFIG_ATM_IDT77252=m
-# CONFIG_ATM_IDT77252_DEBUG is not set
-# CONFIG_ATM_IDT77252_RCV_ALL is not set
-CONFIG_ATM_IDT77252_USE_SUNI=y
-CONFIG_ATM_AMBASSADOR=m
-# CONFIG_ATM_AMBASSADOR_DEBUG is not set
-CONFIG_ATM_HORIZON=m
-# CONFIG_ATM_HORIZON_DEBUG is not set
-CONFIG_ATM_IA=m
-# CONFIG_ATM_IA_DEBUG is not set
-CONFIG_ATM_FORE200E=m
-CONFIG_ATM_FORE200E_USE_TASKLET=y
-CONFIG_ATM_FORE200E_TX_RETRY=16
-CONFIG_ATM_FORE200E_DEBUG=0
-CONFIG_ATM_HE=m
-CONFIG_ATM_HE_USE_SUNI=y
-CONFIG_ATM_SOLOS=m
-
-#
-# CAIF transport drivers
-#
-CONFIG_CAIF_TTY=m
-CONFIG_CAIF_SPI_SLAVE=m
-CONFIG_CAIF_SPI_SYNC=y
-CONFIG_CAIF_HSI=m
-CONFIG_CAIF_VIRTIO=m
-
-#
-# Distributed Switch Architecture drivers
-#
-CONFIG_B53=m
-# CONFIG_B53_SPI_DRIVER is not set
-CONFIG_B53_MDIO_DRIVER=m
-CONFIG_B53_MMAP_DRIVER=m
-CONFIG_B53_SRAB_DRIVER=m
-CONFIG_B53_SERDES=m
-CONFIG_NET_DSA_BCM_SF2=m
-CONFIG_NET_DSA_LOOP=m
-CONFIG_NET_DSA_LANTIQ_GSWIP=m
-CONFIG_NET_DSA_MT7530=m
-CONFIG_NET_DSA_MV88E6060=m
-CONFIG_NET_DSA_MICROCHIP_KSZ_COMMON=m
-CONFIG_NET_DSA_MICROCHIP_KSZ9477=m
-CONFIG_NET_DSA_MICROCHIP_KSZ9477_I2C=m
-CONFIG_NET_DSA_MICROCHIP_KSZ9477_SPI=m
-CONFIG_NET_DSA_MICROCHIP_KSZ8795=m
-CONFIG_NET_DSA_MICROCHIP_KSZ8795_SPI=m
-CONFIG_NET_DSA_MV88E6XXX=m
-CONFIG_NET_DSA_MV88E6XXX_GLOBAL2=y
-CONFIG_NET_DSA_MV88E6XXX_PTP=y
-CONFIG_NET_DSA_SJA1105=m
-CONFIG_NET_DSA_SJA1105_PTP=y
-CONFIG_NET_DSA_SJA1105_TAS=y
-CONFIG_NET_DSA_QCA8K=m
-CONFIG_NET_DSA_REALTEK_SMI=m
-CONFIG_NET_DSA_SMSC_LAN9303=m
-CONFIG_NET_DSA_SMSC_LAN9303_I2C=m
-CONFIG_NET_DSA_SMSC_LAN9303_MDIO=m
-CONFIG_NET_DSA_VITESSE_VSC73XX=m
-CONFIG_NET_DSA_VITESSE_VSC73XX_SPI=m
-CONFIG_NET_DSA_VITESSE_VSC73XX_PLATFORM=m
-# end of Distributed Switch Architecture drivers
-
-CONFIG_ETHERNET=y
-CONFIG_MDIO=m
-CONFIG_NET_VENDOR_3COM=y
-CONFIG_PCMCIA_3C574=m
-CONFIG_PCMCIA_3C589=m
-CONFIG_VORTEX=m
-CONFIG_TYPHOON=m
-CONFIG_NET_VENDOR_ADAPTEC=y
-CONFIG_ADAPTEC_STARFIRE=m
-CONFIG_NET_VENDOR_AGERE=y
-CONFIG_ET131X=m
-CONFIG_NET_VENDOR_ALACRITECH=y
-CONFIG_SLICOSS=m
-CONFIG_NET_VENDOR_ALTEON=y
-CONFIG_ACENIC=m
-# CONFIG_ACENIC_OMIT_TIGON_I is not set
-CONFIG_ALTERA_TSE=m
-CONFIG_NET_VENDOR_AMAZON=y
-CONFIG_ENA_ETHERNET=m
-CONFIG_NET_VENDOR_AMD=y
-CONFIG_AMD8111_ETH=m
-CONFIG_PCNET32=m
-CONFIG_PCMCIA_NMCLAN=m
-CONFIG_AMD_XGBE=m
-CONFIG_AMD_XGBE_DCB=y
-CONFIG_AMD_XGBE_HAVE_ECC=y
-CONFIG_NET_VENDOR_AQUANTIA=y
-CONFIG_AQTION=m
-CONFIG_NET_VENDOR_ARC=y
-CONFIG_NET_VENDOR_ATHEROS=y
-CONFIG_ATL2=m
-CONFIG_ATL1=m
-CONFIG_ATL1E=m
-CONFIG_ATL1C=m
-CONFIG_ALX=m
-CONFIG_NET_VENDOR_AURORA=y
-CONFIG_AURORA_NB8800=m
-CONFIG_NET_VENDOR_BROADCOM=y
-CONFIG_B44=m
-CONFIG_B44_PCI_AUTOSELECT=y
-CONFIG_B44_PCICORE_AUTOSELECT=y
-CONFIG_B44_PCI=y
-CONFIG_BCMGENET=m
-CONFIG_BNX2=m
-CONFIG_CNIC=m
-CONFIG_TIGON3=m
-CONFIG_TIGON3_HWMON=y
-CONFIG_BNX2X=m
-CONFIG_BNX2X_SRIOV=y
-CONFIG_SYSTEMPORT=m
-CONFIG_BNXT=m
-CONFIG_BNXT_SRIOV=y
-CONFIG_BNXT_FLOWER_OFFLOAD=y
-CONFIG_BNXT_DCB=y
-CONFIG_BNXT_HWMON=y
-CONFIG_NET_VENDOR_BROCADE=y
-CONFIG_BNA=m
-CONFIG_NET_VENDOR_CADENCE=y
-CONFIG_MACB=m
-CONFIG_MACB_USE_HWSTAMP=y
-CONFIG_MACB_PCI=m
-CONFIG_NET_VENDOR_CAVIUM=y
-CONFIG_THUNDER_NIC_PF=m
-CONFIG_THUNDER_NIC_VF=m
-CONFIG_THUNDER_NIC_BGX=m
-CONFIG_THUNDER_NIC_RGX=m
-CONFIG_CAVIUM_PTP=m
-CONFIG_LIQUIDIO=m
-CONFIG_LIQUIDIO_VF=m
-CONFIG_NET_VENDOR_CHELSIO=y
-CONFIG_CHELSIO_T1=m
-CONFIG_CHELSIO_T1_1G=y
-CONFIG_CHELSIO_T3=m
-CONFIG_CHELSIO_T4=m
-CONFIG_CHELSIO_T4_DCB=y
-CONFIG_CHELSIO_T4_FCOE=y
-CONFIG_CHELSIO_T4VF=m
-CONFIG_CHELSIO_LIB=m
-CONFIG_NET_VENDOR_CISCO=y
-CONFIG_ENIC=m
-CONFIG_NET_VENDOR_CORTINA=y
-CONFIG_GEMINI_ETHERNET=m
-CONFIG_CX_ECAT=m
-CONFIG_DNET=m
-CONFIG_NET_VENDOR_DEC=y
-CONFIG_NET_TULIP=y
-CONFIG_DE2104X=m
-CONFIG_DE2104X_DSL=0
-CONFIG_TULIP=m
-CONFIG_TULIP_MWI=y
-CONFIG_TULIP_MMIO=y
-CONFIG_TULIP_NAPI=y
-CONFIG_TULIP_NAPI_HW_MITIGATION=y
-CONFIG_DE4X5=m
-CONFIG_WINBOND_840=m
-CONFIG_DM9102=m
-CONFIG_ULI526X=m
-CONFIG_PCMCIA_XIRCOM=m
-CONFIG_NET_VENDOR_DLINK=y
-CONFIG_DL2K=m
-CONFIG_SUNDANCE=m
-# CONFIG_SUNDANCE_MMIO is not set
-CONFIG_NET_VENDOR_EMULEX=y
-CONFIG_BE2NET=m
-CONFIG_BE2NET_HWMON=y
-CONFIG_BE2NET_BE2=y
-CONFIG_BE2NET_BE3=y
-CONFIG_BE2NET_LANCER=y
-CONFIG_BE2NET_SKYHAWK=y
-CONFIG_NET_VENDOR_EZCHIP=y
-CONFIG_EZCHIP_NPS_MANAGEMENT_ENET=m
-CONFIG_NET_VENDOR_FUJITSU=y
-CONFIG_PCMCIA_FMVJ18X=m
-CONFIG_NET_VENDOR_GOOGLE=y
-CONFIG_GVE=m
-CONFIG_NET_VENDOR_HP=y
-CONFIG_HP100=m
-CONFIG_NET_VENDOR_HUAWEI=y
-CONFIG_HINIC=m
-CONFIG_NET_VENDOR_I825XX=y
-CONFIG_NET_VENDOR_INTEL=y
-CONFIG_E100=m
-CONFIG_E1000=m
-CONFIG_E1000E=m
-CONFIG_E1000E_HWTS=y
-CONFIG_IGB=m
-CONFIG_IGB_HWMON=y
-CONFIG_IGB_DCA=y
-CONFIG_IGBVF=m
-CONFIG_IXGB=m
-CONFIG_IXGBE=m
-CONFIG_IXGBE_HWMON=y
-CONFIG_IXGBE_DCA=y
-CONFIG_IXGBE_DCB=y
-# CONFIG_IXGBE_IPSEC is not set
-CONFIG_IXGBEVF=m
-CONFIG_IXGBEVF_IPSEC=y
-CONFIG_I40E=m
-CONFIG_I40E_DCB=y
-CONFIG_IAVF=m
-CONFIG_I40EVF=m
-CONFIG_ICE=m
-CONFIG_FM10K=m
-CONFIG_IGC=m
-CONFIG_JME=m
-CONFIG_NET_VENDOR_MARVELL=y
-CONFIG_MVMDIO=m
-CONFIG_SKGE=m
-# CONFIG_SKGE_DEBUG is not set
-CONFIG_SKGE_GENESIS=y
-CONFIG_SKY2=m
-# CONFIG_SKY2_DEBUG is not set
-CONFIG_NET_VENDOR_MELLANOX=y
-CONFIG_MLX4_EN=m
-CONFIG_MLX4_EN_DCB=y
-CONFIG_MLX4_CORE=m
-CONFIG_MLX4_DEBUG=y
-CONFIG_MLX4_CORE_GEN2=y
-CONFIG_MLX5_CORE=m
-CONFIG_MLX5_ACCEL=y
-CONFIG_MLX5_FPGA=y
-CONFIG_MLX5_CORE_EN=y
-CONFIG_MLX5_EN_ARFS=y
-CONFIG_MLX5_EN_RXNFC=y
-CONFIG_MLX5_MPFS=y
-CONFIG_MLX5_ESWITCH=y
-CONFIG_MLX5_CORE_EN_DCB=y
-CONFIG_MLX5_CORE_IPOIB=y
-CONFIG_MLX5_FPGA_IPSEC=y
-CONFIG_MLX5_EN_IPSEC=y
-CONFIG_MLX5_FPGA_TLS=y
-CONFIG_MLX5_TLS=y
-CONFIG_MLX5_EN_TLS=y
-CONFIG_MLX5_SW_STEERING=y
-CONFIG_MLXSW_CORE=m
-CONFIG_MLXSW_CORE_HWMON=y
-CONFIG_MLXSW_CORE_THERMAL=y
-CONFIG_MLXSW_PCI=m
-CONFIG_MLXSW_I2C=m
-CONFIG_MLXSW_SWITCHIB=m
-CONFIG_MLXSW_SWITCHX2=m
-CONFIG_MLXSW_SPECTRUM=m
-CONFIG_MLXSW_SPECTRUM_DCB=y
-CONFIG_MLXSW_MINIMAL=m
-CONFIG_MLXFW=m
-CONFIG_NET_VENDOR_MICREL=y
-CONFIG_KS8842=m
-CONFIG_KS8851=m
-CONFIG_KS8851_MLL=m
-CONFIG_KSZ884X_PCI=m
-CONFIG_NET_VENDOR_MICROCHIP=y
-CONFIG_ENC28J60=m
-# CONFIG_ENC28J60_WRITEVERIFY is not set
-CONFIG_ENCX24J600=m
-CONFIG_LAN743X=m
-CONFIG_NET_VENDOR_MICROSEMI=y
-CONFIG_MSCC_OCELOT_SWITCH=m
-CONFIG_MSCC_OCELOT_SWITCH_OCELOT=m
-CONFIG_NET_VENDOR_MYRI=y
-CONFIG_MYRI10GE=m
-CONFIG_MYRI10GE_DCA=y
-CONFIG_FEALNX=m
-CONFIG_NET_VENDOR_NATSEMI=y
-CONFIG_NATSEMI=m
-CONFIG_NS83820=m
-CONFIG_NET_VENDOR_NETERION=y
-CONFIG_S2IO=m
-CONFIG_VXGE=m
-# CONFIG_VXGE_DEBUG_TRACE_ALL is not set
-CONFIG_NET_VENDOR_NETRONOME=y
-CONFIG_NFP=m
-CONFIG_NFP_APP_FLOWER=y
-CONFIG_NFP_APP_ABM_NIC=y
-# CONFIG_NFP_DEBUG is not set
-CONFIG_NET_VENDOR_NI=y
-CONFIG_NI_XGE_MANAGEMENT_ENET=m
-CONFIG_NET_VENDOR_8390=y
-CONFIG_PCMCIA_AXNET=m
-CONFIG_NE2K_PCI=m
-CONFIG_PCMCIA_PCNET=m
-CONFIG_NET_VENDOR_NVIDIA=y
-CONFIG_FORCEDETH=m
-CONFIG_NET_VENDOR_OKI=y
-CONFIG_ETHOC=m
-CONFIG_NET_VENDOR_PACKET_ENGINES=y
-CONFIG_HAMACHI=m
-CONFIG_YELLOWFIN=m
-CONFIG_NET_VENDOR_PENSANDO=y
-CONFIG_IONIC=m
-CONFIG_NET_VENDOR_QLOGIC=y
-CONFIG_QLA3XXX=m
-CONFIG_QLCNIC=m
-CONFIG_QLCNIC_SRIOV=y
-CONFIG_QLCNIC_DCB=y
-CONFIG_QLCNIC_HWMON=y
-CONFIG_NETXEN_NIC=m
-CONFIG_QED=m
-CONFIG_QED_LL2=y
-CONFIG_QED_SRIOV=y
-CONFIG_QEDE=m
-CONFIG_QED_RDMA=y
-CONFIG_QED_ISCSI=y
-CONFIG_QED_FCOE=y
-CONFIG_QED_OOO=y
-CONFIG_NET_VENDOR_QUALCOMM=y
-CONFIG_QCA7000=m
-CONFIG_QCA7000_SPI=m
-CONFIG_QCA7000_UART=m
-CONFIG_QCOM_EMAC=m
-CONFIG_RMNET=m
-CONFIG_NET_VENDOR_RDC=y
-CONFIG_R6040=m
-CONFIG_NET_VENDOR_REALTEK=y
-CONFIG_ATP=m
-CONFIG_8139CP=m
-CONFIG_8139TOO=m
-# CONFIG_8139TOO_PIO is not set
-CONFIG_8139TOO_TUNE_TWISTER=y
-CONFIG_8139TOO_8129=y
-# CONFIG_8139_OLD_RX_RESET is not set
-CONFIG_R8169=m
-CONFIG_NET_VENDOR_RENESAS=y
-CONFIG_NET_VENDOR_ROCKER=y
-CONFIG_ROCKER=m
-CONFIG_NET_VENDOR_SAMSUNG=y
-CONFIG_SXGBE_ETH=m
-CONFIG_NET_VENDOR_SEEQ=y
-CONFIG_NET_VENDOR_SOLARFLARE=y
-CONFIG_SFC=m
-CONFIG_SFC_MTD=y
-CONFIG_SFC_MCDI_MON=y
-CONFIG_SFC_SRIOV=y
-CONFIG_SFC_MCDI_LOGGING=y
-CONFIG_SFC_FALCON=m
-CONFIG_SFC_FALCON_MTD=y
-CONFIG_NET_VENDOR_SILAN=y
-CONFIG_SC92031=m
-CONFIG_NET_VENDOR_SIS=y
-CONFIG_SIS900=m
-CONFIG_SIS190=m
-CONFIG_NET_VENDOR_SMSC=y
-CONFIG_PCMCIA_SMC91C92=m
-CONFIG_EPIC100=m
-CONFIG_SMSC911X=m
-CONFIG_SMSC9420=m
-CONFIG_NET_VENDOR_SOCIONEXT=y
-CONFIG_NET_VENDOR_STMICRO=y
-CONFIG_STMMAC_ETH=m
-# CONFIG_STMMAC_SELFTESTS is not set
-CONFIG_STMMAC_PLATFORM=m
-CONFIG_DWMAC_DWC_QOS_ETH=m
-CONFIG_DWMAC_GENERIC=m
-CONFIG_STMMAC_PCI=m
-CONFIG_NET_VENDOR_SUN=y
-CONFIG_HAPPYMEAL=m
-CONFIG_SUNGEM=m
-CONFIG_CASSINI=m
-CONFIG_NIU=m
-CONFIG_NET_VENDOR_SYNOPSYS=y
-CONFIG_DWC_XLGMAC=m
-CONFIG_DWC_XLGMAC_PCI=m
-CONFIG_NET_VENDOR_TEHUTI=y
-CONFIG_TEHUTI=m
-CONFIG_NET_VENDOR_TI=y
-# CONFIG_TI_CPSW_PHY_SEL is not set
-CONFIG_TLAN=m
-CONFIG_NET_VENDOR_VIA=y
-CONFIG_VIA_RHINE=m
-CONFIG_VIA_RHINE_MMIO=y
-CONFIG_VIA_VELOCITY=m
-CONFIG_NET_VENDOR_WIZNET=y
-CONFIG_WIZNET_W5100=m
-CONFIG_WIZNET_W5300=m
-# CONFIG_WIZNET_BUS_DIRECT is not set
-# CONFIG_WIZNET_BUS_INDIRECT is not set
-CONFIG_WIZNET_BUS_ANY=y
-CONFIG_WIZNET_W5100_SPI=m
-CONFIG_NET_VENDOR_XILINX=y
-CONFIG_XILINX_AXI_EMAC=m
-CONFIG_XILINX_LL_TEMAC=m
-CONFIG_NET_VENDOR_XIRCOM=y
-CONFIG_PCMCIA_XIRC2PS=m
-CONFIG_FDDI=m
-CONFIG_DEFXX=m
-CONFIG_DEFXX_MMIO=y
-CONFIG_SKFP=m
-# CONFIG_HIPPI is not set
-CONFIG_NET_SB1000=m
-CONFIG_MDIO_DEVICE=m
-CONFIG_MDIO_BUS=m
-CONFIG_MDIO_BCM_UNIMAC=m
-CONFIG_MDIO_BITBANG=m
-CONFIG_MDIO_BUS_MUX=m
-CONFIG_MDIO_BUS_MUX_GPIO=m
-CONFIG_MDIO_BUS_MUX_MMIOREG=m
-CONFIG_MDIO_BUS_MUX_MULTIPLEXER=m
-CONFIG_MDIO_CAVIUM=m
-CONFIG_MDIO_GPIO=m
-CONFIG_MDIO_HISI_FEMAC=m
-CONFIG_MDIO_I2C=m
-CONFIG_MDIO_MSCC_MIIM=m
-CONFIG_MDIO_OCTEON=m
-CONFIG_MDIO_THUNDER=m
-CONFIG_PHYLINK=m
-CONFIG_PHYLIB=m
-CONFIG_SWPHY=y
-CONFIG_LED_TRIGGER_PHY=y
-
-#
-# MII PHY device drivers
-#
-CONFIG_SFP=m
-CONFIG_ADIN_PHY=m
-CONFIG_AMD_PHY=m
-CONFIG_AQUANTIA_PHY=m
-CONFIG_AX88796B_PHY=m
-CONFIG_AT803X_PHY=m
-CONFIG_BCM7XXX_PHY=m
-CONFIG_BCM87XX_PHY=m
-CONFIG_BCM_NET_PHYLIB=m
-CONFIG_BROADCOM_PHY=m
-CONFIG_CICADA_PHY=m
-CONFIG_CORTINA_PHY=m
-CONFIG_DAVICOM_PHY=m
-CONFIG_DP83822_PHY=m
-CONFIG_DP83TC811_PHY=m
-CONFIG_DP83848_PHY=m
-CONFIG_DP83867_PHY=m
-CONFIG_FIXED_PHY=m
-CONFIG_ICPLUS_PHY=m
-CONFIG_INTEL_XWAY_PHY=m
-CONFIG_LSI_ET1011C_PHY=m
-CONFIG_LXT_PHY=m
-CONFIG_MARVELL_PHY=m
-CONFIG_MARVELL_10G_PHY=m
-CONFIG_MICREL_PHY=m
-CONFIG_MICROCHIP_PHY=m
-CONFIG_MICROCHIP_T1_PHY=m
-CONFIG_MICROSEMI_PHY=m
-CONFIG_NATIONAL_PHY=m
-CONFIG_NXP_TJA11XX_PHY=m
-CONFIG_QSEMI_PHY=m
-CONFIG_REALTEK_PHY=m
-CONFIG_RENESAS_PHY=m
-CONFIG_ROCKCHIP_PHY=m
-CONFIG_SMSC_PHY=m
-CONFIG_STE10XP=m
-CONFIG_TERANETICS_PHY=m
-CONFIG_VITESSE_PHY=m
-CONFIG_XILINX_GMII2RGMII=m
-CONFIG_MICREL_KS8995MA=m
-CONFIG_PLIP=m
-CONFIG_PPP=m
-CONFIG_PPP_BSDCOMP=m
-CONFIG_PPP_DEFLATE=m
-CONFIG_PPP_FILTER=y
-CONFIG_PPP_MPPE=m
-CONFIG_PPP_MULTILINK=y
-CONFIG_PPPOATM=m
-CONFIG_PPPOE=m
-CONFIG_PPTP=m
-CONFIG_PPPOL2TP=m
-CONFIG_PPP_ASYNC=m
-CONFIG_PPP_SYNC_TTY=m
-CONFIG_SLIP=m
-CONFIG_SLHC=m
-CONFIG_SLIP_COMPRESSED=y
-CONFIG_SLIP_SMART=y
-CONFIG_SLIP_MODE_SLIP6=y
-CONFIG_USB_NET_DRIVERS=m
-CONFIG_USB_CATC=m
-CONFIG_USB_KAWETH=m
-CONFIG_USB_PEGASUS=m
-CONFIG_USB_RTL8150=m
-CONFIG_USB_RTL8152=m
-CONFIG_USB_LAN78XX=m
-CONFIG_USB_USBNET=m
-CONFIG_USB_NET_AX8817X=m
-CONFIG_USB_NET_AX88179_178A=m
-CONFIG_USB_NET_CDCETHER=m
-CONFIG_USB_NET_CDC_EEM=m
-CONFIG_USB_NET_CDC_NCM=m
-CONFIG_USB_NET_HUAWEI_CDC_NCM=m
-CONFIG_USB_NET_CDC_MBIM=m
-CONFIG_USB_NET_DM9601=m
-CONFIG_USB_NET_SR9700=m
-CONFIG_USB_NET_SR9800=m
-CONFIG_USB_NET_SMSC75XX=m
-CONFIG_USB_NET_SMSC95XX=m
-CONFIG_USB_NET_GL620A=m
-CONFIG_USB_NET_NET1080=m
-CONFIG_USB_NET_PLUSB=m
-CONFIG_USB_NET_MCS7830=m
-CONFIG_USB_NET_RNDIS_HOST=m
-CONFIG_USB_NET_CDC_SUBSET_ENABLE=m
-CONFIG_USB_NET_CDC_SUBSET=m
-CONFIG_USB_ALI_M5632=y
-CONFIG_USB_AN2720=y
-CONFIG_USB_BELKIN=y
-CONFIG_USB_ARMLINUX=y
-CONFIG_USB_EPSON2888=y
-CONFIG_USB_KC2190=y
-CONFIG_USB_NET_ZAURUS=m
-CONFIG_USB_NET_CX82310_ETH=m
-CONFIG_USB_NET_KALMIA=m
-CONFIG_USB_NET_QMI_WWAN=m
-CONFIG_USB_HSO=m
-CONFIG_USB_NET_INT51X1=m
-CONFIG_USB_CDC_PHONET=m
-CONFIG_USB_IPHETH=m
-CONFIG_USB_SIERRA_NET=m
-CONFIG_USB_VL600=m
-CONFIG_USB_NET_CH9200=m
-CONFIG_USB_NET_AQC111=m
-CONFIG_WLAN=y
-# CONFIG_WIRELESS_WDS is not set
-CONFIG_WLAN_VENDOR_ADMTEK=y
-CONFIG_ADM8211=m
-CONFIG_ATH_COMMON=m
-CONFIG_WLAN_VENDOR_ATH=y
-# CONFIG_ATH_DEBUG is not set
-CONFIG_ATH5K=m
-CONFIG_ATH5K_DEBUG=y
-CONFIG_ATH5K_TRACER=y
-CONFIG_ATH5K_PCI=y
-CONFIG_ATH9K_HW=m
-CONFIG_ATH9K_COMMON=m
-CONFIG_ATH9K_COMMON_DEBUG=y
-CONFIG_ATH9K_BTCOEX_SUPPORT=y
-CONFIG_ATH9K=m
-CONFIG_ATH9K_PCI=y
-CONFIG_ATH9K_AHB=y
-CONFIG_ATH9K_DEBUGFS=y
-CONFIG_ATH9K_STATION_STATISTICS=y
-CONFIG_ATH9K_DYNACK=y
-CONFIG_ATH9K_WOW=y
-CONFIG_ATH9K_RFKILL=y
-CONFIG_ATH9K_CHANNEL_CONTEXT=y
-CONFIG_ATH9K_PCOEM=y
-CONFIG_ATH9K_PCI_NO_EEPROM=m
-CONFIG_ATH9K_HTC=m
-CONFIG_ATH9K_HTC_DEBUGFS=y
-CONFIG_ATH9K_HWRNG=y
-CONFIG_ATH9K_COMMON_SPECTRAL=y
-CONFIG_CARL9170=m
-CONFIG_CARL9170_LEDS=y
-CONFIG_CARL9170_DEBUGFS=y
-CONFIG_CARL9170_WPC=y
-# CONFIG_CARL9170_HWRNG is not set
-CONFIG_ATH6KL=m
-CONFIG_ATH6KL_SDIO=m
-CONFIG_ATH6KL_USB=m
-CONFIG_ATH6KL_DEBUG=y
-CONFIG_ATH6KL_TRACING=y
-CONFIG_AR5523=m
-CONFIG_WIL6210=m
-CONFIG_WIL6210_ISR_COR=y
-CONFIG_WIL6210_TRACING=y
-CONFIG_WIL6210_DEBUGFS=y
-CONFIG_ATH10K=m
-CONFIG_ATH10K_CE=y
-CONFIG_ATH10K_PCI=m
-CONFIG_ATH10K_AHB=y
-CONFIG_ATH10K_SDIO=m
-CONFIG_ATH10K_USB=m
-CONFIG_ATH10K_DEBUG=y
-CONFIG_ATH10K_DEBUGFS=y
-CONFIG_ATH10K_SPECTRAL=y
-CONFIG_ATH10K_TRACING=y
-CONFIG_WCN36XX=m
-CONFIG_WCN36XX_DEBUGFS=y
-CONFIG_WLAN_VENDOR_ATMEL=y
-CONFIG_ATMEL=m
-CONFIG_PCI_ATMEL=m
-CONFIG_PCMCIA_ATMEL=m
-CONFIG_AT76C50X_USB=m
-CONFIG_WLAN_VENDOR_BROADCOM=y
-CONFIG_B43=m
-CONFIG_B43_BCMA=y
-CONFIG_B43_SSB=y
-CONFIG_B43_BUSES_BCMA_AND_SSB=y
-# CONFIG_B43_BUSES_BCMA is not set
-# CONFIG_B43_BUSES_SSB is not set
-CONFIG_B43_PCI_AUTOSELECT=y
-CONFIG_B43_PCICORE_AUTOSELECT=y
-CONFIG_B43_SDIO=y
-CONFIG_B43_BCMA_PIO=y
-CONFIG_B43_PIO=y
-CONFIG_B43_PHY_G=y
-CONFIG_B43_PHY_N=y
-CONFIG_B43_PHY_LP=y
-CONFIG_B43_PHY_HT=y
-CONFIG_B43_LEDS=y
-CONFIG_B43_HWRNG=y
-# CONFIG_B43_DEBUG is not set
-CONFIG_B43LEGACY=m
-CONFIG_B43LEGACY_PCI_AUTOSELECT=y
-CONFIG_B43LEGACY_PCICORE_AUTOSELECT=y
-CONFIG_B43LEGACY_LEDS=y
-CONFIG_B43LEGACY_HWRNG=y
-CONFIG_B43LEGACY_DEBUG=y
-CONFIG_B43LEGACY_DMA=y
-CONFIG_B43LEGACY_PIO=y
-CONFIG_B43LEGACY_DMA_AND_PIO_MODE=y
-# CONFIG_B43LEGACY_DMA_MODE is not set
-# CONFIG_B43LEGACY_PIO_MODE is not set
-CONFIG_BRCMUTIL=m
-CONFIG_BRCMSMAC=m
-CONFIG_BRCMFMAC=m
-CONFIG_BRCMFMAC_PROTO_BCDC=y
-CONFIG_BRCMFMAC_PROTO_MSGBUF=y
-CONFIG_BRCMFMAC_SDIO=y
-CONFIG_BRCMFMAC_USB=y
-CONFIG_BRCMFMAC_PCIE=y
-CONFIG_BRCM_TRACING=y
-CONFIG_BRCMDBG=y
-CONFIG_WLAN_VENDOR_CISCO=y
-CONFIG_AIRO=m
-CONFIG_AIRO_CS=m
-CONFIG_WLAN_VENDOR_INTEL=y
-CONFIG_IPW2100=m
-CONFIG_IPW2100_MONITOR=y
-# CONFIG_IPW2100_DEBUG is not set
-CONFIG_IPW2200=m
-CONFIG_IPW2200_MONITOR=y
-CONFIG_IPW2200_RADIOTAP=y
-CONFIG_IPW2200_PROMISCUOUS=y
-CONFIG_IPW2200_QOS=y
-# CONFIG_IPW2200_DEBUG is not set
-CONFIG_LIBIPW=m
-# CONFIG_LIBIPW_DEBUG is not set
-CONFIG_IWLEGACY=m
-CONFIG_IWL4965=m
-CONFIG_IWL3945=m
-
-#
-# iwl3945 / iwl4965 Debugging Options
-#
-CONFIG_IWLEGACY_DEBUG=y
-CONFIG_IWLEGACY_DEBUGFS=y
-# end of iwl3945 / iwl4965 Debugging Options
-
-CONFIG_IWLWIFI=m
-CONFIG_IWLWIFI_LEDS=y
-CONFIG_IWLDVM=m
-CONFIG_IWLMVM=m
-CONFIG_IWLWIFI_OPMODE_MODULAR=y
-# CONFIG_IWLWIFI_BCAST_FILTERING is not set
-
-#
-# Debugging Options
-#
-CONFIG_IWLWIFI_DEBUG=y
-CONFIG_IWLWIFI_DEBUGFS=y
-CONFIG_IWLWIFI_DEVICE_TRACING=y
-# end of Debugging Options
-
-CONFIG_WLAN_VENDOR_INTERSIL=y
-CONFIG_HOSTAP=m
-CONFIG_HOSTAP_FIRMWARE=y
-CONFIG_HOSTAP_FIRMWARE_NVRAM=y
-CONFIG_HOSTAP_PLX=m
-CONFIG_HOSTAP_PCI=m
-CONFIG_HOSTAP_CS=m
-CONFIG_HERMES=m
-CONFIG_HERMES_PRISM=y
-CONFIG_HERMES_CACHE_FW_ON_INIT=y
-CONFIG_PLX_HERMES=m
-CONFIG_TMD_HERMES=m
-CONFIG_NORTEL_HERMES=m
-CONFIG_PCI_HERMES=m
-CONFIG_PCMCIA_HERMES=m
-CONFIG_PCMCIA_SPECTRUM=m
-CONFIG_ORINOCO_USB=m
-CONFIG_P54_COMMON=m
-CONFIG_P54_USB=m
-CONFIG_P54_PCI=m
-CONFIG_P54_SPI=m
-# CONFIG_P54_SPI_DEFAULT_EEPROM is not set
-CONFIG_P54_LEDS=y
-CONFIG_PRISM54=m
-CONFIG_WLAN_VENDOR_MARVELL=y
-CONFIG_LIBERTAS=m
-CONFIG_LIBERTAS_USB=m
-CONFIG_LIBERTAS_CS=m
-CONFIG_LIBERTAS_SDIO=m
-CONFIG_LIBERTAS_SPI=m
-# CONFIG_LIBERTAS_DEBUG is not set
-CONFIG_LIBERTAS_MESH=y
-CONFIG_LIBERTAS_THINFIRM=m
-# CONFIG_LIBERTAS_THINFIRM_DEBUG is not set
-CONFIG_LIBERTAS_THINFIRM_USB=m
-CONFIG_MWIFIEX=m
-CONFIG_MWIFIEX_SDIO=m
-CONFIG_MWIFIEX_PCIE=m
-CONFIG_MWIFIEX_USB=m
-CONFIG_MWL8K=m
-CONFIG_WLAN_VENDOR_MEDIATEK=y
-CONFIG_MT7601U=m
-CONFIG_MT76_CORE=m
-CONFIG_MT76_LEDS=y
-CONFIG_MT76_USB=m
-CONFIG_MT76x02_LIB=m
-CONFIG_MT76x02_USB=m
-CONFIG_MT76x0_COMMON=m
-CONFIG_MT76x0U=m
-CONFIG_MT76x0E=m
-CONFIG_MT76x2_COMMON=m
-CONFIG_MT76x2E=m
-CONFIG_MT76x2U=m
-CONFIG_MT7603E=m
-CONFIG_MT7615E=m
-CONFIG_WLAN_VENDOR_RALINK=y
-CONFIG_RT2X00=m
-CONFIG_RT2400PCI=m
-CONFIG_RT2500PCI=m
-CONFIG_RT61PCI=m
-CONFIG_RT2800PCI=m
-CONFIG_RT2800PCI_RT33XX=y
-CONFIG_RT2800PCI_RT35XX=y
-CONFIG_RT2800PCI_RT53XX=y
-CONFIG_RT2800PCI_RT3290=y
-CONFIG_RT2500USB=m
-CONFIG_RT73USB=m
-CONFIG_RT2800USB=m
-CONFIG_RT2800USB_RT33XX=y
-CONFIG_RT2800USB_RT35XX=y
-CONFIG_RT2800USB_RT3573=y
-CONFIG_RT2800USB_RT53XX=y
-CONFIG_RT2800USB_RT55XX=y
-CONFIG_RT2800USB_UNKNOWN=y
-CONFIG_RT2800_LIB=m
-CONFIG_RT2800_LIB_MMIO=m
-CONFIG_RT2X00_LIB_MMIO=m
-CONFIG_RT2X00_LIB_PCI=m
-CONFIG_RT2X00_LIB_USB=m
-CONFIG_RT2X00_LIB=m
-CONFIG_RT2X00_LIB_FIRMWARE=y
-CONFIG_RT2X00_LIB_CRYPTO=y
-CONFIG_RT2X00_LIB_LEDS=y
-CONFIG_RT2X00_LIB_DEBUGFS=y
-# CONFIG_RT2X00_DEBUG is not set
-CONFIG_WLAN_VENDOR_REALTEK=y
-CONFIG_RTL8180=m
-CONFIG_RTL8187=m
-CONFIG_RTL8187_LEDS=y
-CONFIG_RTL_CARDS=m
-CONFIG_RTL8192CE=m
-CONFIG_RTL8192SE=m
-CONFIG_RTL8192DE=m
-CONFIG_RTL8723AE=m
-CONFIG_RTL8723BE=m
-CONFIG_RTL8188EE=m
-CONFIG_RTL8192EE=m
-CONFIG_RTL8821AE=m
-CONFIG_RTL8192CU=m
-CONFIG_RTLWIFI=m
-CONFIG_RTLWIFI_PCI=m
-CONFIG_RTLWIFI_USB=m
-CONFIG_RTLWIFI_DEBUG=y
-CONFIG_RTL8192C_COMMON=m
-CONFIG_RTL8723_COMMON=m
-CONFIG_RTLBTCOEXIST=m
-CONFIG_RTL8XXXU=m
-CONFIG_RTL8XXXU_UNTESTED=y
-CONFIG_RTW88=m
-CONFIG_RTW88_CORE=m
-CONFIG_RTW88_PCI=m
-CONFIG_RTW88_8822BE=y
-CONFIG_RTW88_8822CE=y
-CONFIG_RTW88_DEBUG=y
-CONFIG_RTW88_DEBUGFS=y
-CONFIG_WLAN_VENDOR_RSI=y
-CONFIG_RSI_91X=m
-CONFIG_RSI_DEBUGFS=y
-CONFIG_RSI_SDIO=m
-CONFIG_RSI_USB=m
-CONFIG_RSI_COEX=y
-CONFIG_WLAN_VENDOR_ST=y
-CONFIG_CW1200=m
-CONFIG_CW1200_WLAN_SDIO=m
-CONFIG_CW1200_WLAN_SPI=m
-CONFIG_WLAN_VENDOR_TI=y
-CONFIG_WL1251=m
-CONFIG_WL1251_SPI=m
-CONFIG_WL1251_SDIO=m
-CONFIG_WL12XX=m
-CONFIG_WL18XX=m
-CONFIG_WLCORE=m
-CONFIG_WLCORE_SPI=m
-CONFIG_WLCORE_SDIO=m
-CONFIG_WILINK_PLATFORM_DATA=y
-CONFIG_WLAN_VENDOR_ZYDAS=y
-CONFIG_USB_ZD1201=m
-CONFIG_ZD1211RW=m
-# CONFIG_ZD1211RW_DEBUG is not set
-CONFIG_WLAN_VENDOR_QUANTENNA=y
-CONFIG_QTNFMAC=m
-CONFIG_QTNFMAC_PCIE=m
-CONFIG_PCMCIA_RAYCS=m
-CONFIG_PCMCIA_WL3501=m
-CONFIG_MAC80211_HWSIM=m
-CONFIG_USB_NET_RNDIS_WLAN=m
-CONFIG_VIRT_WIFI=m
-
-#
-# WiMAX Wireless Broadband devices
-#
-CONFIG_WIMAX_I2400M=m
-CONFIG_WIMAX_I2400M_USB=m
-CONFIG_WIMAX_I2400M_DEBUG_LEVEL=8
-# end of WiMAX Wireless Broadband devices
-
-# CONFIG_WAN is not set
-CONFIG_IEEE802154_DRIVERS=m
-CONFIG_IEEE802154_FAKELB=m
-CONFIG_IEEE802154_AT86RF230=m
-# CONFIG_IEEE802154_AT86RF230_DEBUGFS is not set
-CONFIG_IEEE802154_MRF24J40=m
-CONFIG_IEEE802154_CC2520=m
-CONFIG_IEEE802154_ATUSB=m
-CONFIG_IEEE802154_ADF7242=m
-CONFIG_IEEE802154_CA8210=m
-# CONFIG_IEEE802154_CA8210_DEBUGFS is not set
-CONFIG_IEEE802154_MCR20A=m
-CONFIG_IEEE802154_HWSIM=m
-CONFIG_XEN_NETDEV_FRONTEND=m
-CONFIG_XEN_NETDEV_BACKEND=m
-CONFIG_VMXNET3=m
-CONFIG_FUJITSU_ES=m
-CONFIG_THUNDERBOLT_NET=m
-CONFIG_HYPERV_NET=m
-CONFIG_NETDEVSIM=m
-CONFIG_NET_FAILOVER=m
-CONFIG_ISDN=y
-CONFIG_ISDN_CAPI=m
-CONFIG_CAPI_TRACE=y
-CONFIG_ISDN_CAPI_CAPI20=m
-CONFIG_ISDN_CAPI_MIDDLEWARE=y
-CONFIG_MISDN=m
-CONFIG_MISDN_DSP=m
-CONFIG_MISDN_L1OIP=m
-
-#
-# mISDN hardware drivers
-#
-CONFIG_MISDN_HFCPCI=m
-CONFIG_MISDN_HFCMULTI=m
-CONFIG_MISDN_HFCUSB=m
-CONFIG_MISDN_AVMFRITZ=m
-CONFIG_MISDN_SPEEDFAX=m
-CONFIG_MISDN_INFINEON=m
-CONFIG_MISDN_W6692=m
-CONFIG_MISDN_NETJET=m
-CONFIG_MISDN_HDLC=m
-CONFIG_MISDN_IPAC=m
-CONFIG_MISDN_ISAR=m
-CONFIG_NVM=y
-CONFIG_NVM_PBLK=m
-# CONFIG_NVM_PBLK_DEBUG is not set
-
-#
-# Input device support
-#
-CONFIG_INPUT=y
-CONFIG_INPUT_LEDS=m
-CONFIG_INPUT_FF_MEMLESS=m
-CONFIG_INPUT_POLLDEV=m
-CONFIG_INPUT_SPARSEKMAP=m
-CONFIG_INPUT_MATRIXKMAP=m
-
-#
-# Userland interfaces
-#
-CONFIG_INPUT_MOUSEDEV=m
-CONFIG_INPUT_MOUSEDEV_PSAUX=y
-CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
-CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
-CONFIG_INPUT_JOYDEV=m
-CONFIG_INPUT_EVDEV=m
-# CONFIG_INPUT_EVBUG is not set
-
-#
-# Input Device Drivers
-#
-CONFIG_INPUT_KEYBOARD=y
-CONFIG_KEYBOARD_ADC=m
-CONFIG_KEYBOARD_ADP5520=m
-CONFIG_KEYBOARD_ADP5588=m
-CONFIG_KEYBOARD_ADP5589=m
-CONFIG_KEYBOARD_APPLESPI=m
-CONFIG_KEYBOARD_ATKBD=m
-CONFIG_KEYBOARD_QT1050=m
-CONFIG_KEYBOARD_QT1070=m
-CONFIG_KEYBOARD_QT2160=m
-CONFIG_KEYBOARD_DLINK_DIR685=m
-CONFIG_KEYBOARD_LKKBD=m
-CONFIG_KEYBOARD_GPIO=m
-CONFIG_KEYBOARD_GPIO_POLLED=m
-CONFIG_KEYBOARD_TCA6416=m
-CONFIG_KEYBOARD_TCA8418=m
-CONFIG_KEYBOARD_MATRIX=m
-CONFIG_KEYBOARD_LM8323=m
-CONFIG_KEYBOARD_LM8333=m
-CONFIG_KEYBOARD_MAX7359=m
-CONFIG_KEYBOARD_MCS=m
-CONFIG_KEYBOARD_MPR121=m
-CONFIG_KEYBOARD_NEWTON=m
-CONFIG_KEYBOARD_OPENCORES=m
-CONFIG_KEYBOARD_SAMSUNG=m
-CONFIG_KEYBOARD_STOWAWAY=m
-CONFIG_KEYBOARD_SUNKBD=m
-CONFIG_KEYBOARD_STMPE=m
-CONFIG_KEYBOARD_OMAP4=m
-CONFIG_KEYBOARD_TC3589X=m
-CONFIG_KEYBOARD_TM2_TOUCHKEY=m
-CONFIG_KEYBOARD_TWL4030=m
-CONFIG_KEYBOARD_XTKBD=m
-CONFIG_KEYBOARD_CROS_EC=m
-CONFIG_KEYBOARD_CAP11XX=m
-CONFIG_KEYBOARD_BCM=m
-CONFIG_KEYBOARD_MTK_PMIC=m
-CONFIG_INPUT_MOUSE=y
-CONFIG_MOUSE_PS2=m
-CONFIG_MOUSE_PS2_ALPS=y
-CONFIG_MOUSE_PS2_BYD=y
-CONFIG_MOUSE_PS2_LOGIPS2PP=y
-CONFIG_MOUSE_PS2_SYNAPTICS=y
-CONFIG_MOUSE_PS2_SYNAPTICS_SMBUS=y
-CONFIG_MOUSE_PS2_CYPRESS=y
-CONFIG_MOUSE_PS2_LIFEBOOK=y
-CONFIG_MOUSE_PS2_TRACKPOINT=y
-CONFIG_MOUSE_PS2_ELANTECH=y
-CONFIG_MOUSE_PS2_ELANTECH_SMBUS=y
-CONFIG_MOUSE_PS2_SENTELIC=y
-CONFIG_MOUSE_PS2_TOUCHKIT=y
-CONFIG_MOUSE_PS2_FOCALTECH=y
-CONFIG_MOUSE_PS2_VMMOUSE=y
-CONFIG_MOUSE_PS2_SMBUS=y
-CONFIG_MOUSE_SERIAL=m
-CONFIG_MOUSE_APPLETOUCH=m
-CONFIG_MOUSE_BCM5974=m
-CONFIG_MOUSE_CYAPA=m
-CONFIG_MOUSE_ELAN_I2C=m
-CONFIG_MOUSE_ELAN_I2C_I2C=y
-CONFIG_MOUSE_ELAN_I2C_SMBUS=y
-CONFIG_MOUSE_VSXXXAA=m
-CONFIG_MOUSE_GPIO=m
-CONFIG_MOUSE_SYNAPTICS_I2C=m
-CONFIG_MOUSE_SYNAPTICS_USB=m
-CONFIG_INPUT_JOYSTICK=y
-CONFIG_JOYSTICK_ANALOG=m
-CONFIG_JOYSTICK_A3D=m
-CONFIG_JOYSTICK_ADI=m
-CONFIG_JOYSTICK_COBRA=m
-CONFIG_JOYSTICK_GF2K=m
-CONFIG_JOYSTICK_GRIP=m
-CONFIG_JOYSTICK_GRIP_MP=m
-CONFIG_JOYSTICK_GUILLEMOT=m
-CONFIG_JOYSTICK_INTERACT=m
-CONFIG_JOYSTICK_SIDEWINDER=m
-CONFIG_JOYSTICK_TMDC=m
-CONFIG_JOYSTICK_IFORCE=m
-CONFIG_JOYSTICK_IFORCE_USB=m
-CONFIG_JOYSTICK_IFORCE_232=m
-CONFIG_JOYSTICK_WARRIOR=m
-CONFIG_JOYSTICK_MAGELLAN=m
-CONFIG_JOYSTICK_SPACEORB=m
-CONFIG_JOYSTICK_SPACEBALL=m
-CONFIG_JOYSTICK_STINGER=m
-CONFIG_JOYSTICK_TWIDJOY=m
-CONFIG_JOYSTICK_ZHENHUA=m
-CONFIG_JOYSTICK_DB9=m
-CONFIG_JOYSTICK_GAMECON=m
-CONFIG_JOYSTICK_TURBOGRAFX=m
-CONFIG_JOYSTICK_AS5011=m
-CONFIG_JOYSTICK_JOYDUMP=m
-CONFIG_JOYSTICK_XPAD=m
-CONFIG_JOYSTICK_XPAD_FF=y
-CONFIG_JOYSTICK_XPAD_LEDS=y
-CONFIG_JOYSTICK_WALKERA0701=m
-CONFIG_JOYSTICK_PSXPAD_SPI=m
-CONFIG_JOYSTICK_PSXPAD_SPI_FF=y
-CONFIG_JOYSTICK_PXRC=m
-CONFIG_JOYSTICK_FSIA6B=m
-CONFIG_INPUT_TABLET=y
-CONFIG_TABLET_USB_ACECAD=m
-CONFIG_TABLET_USB_AIPTEK=m
-CONFIG_TABLET_USB_GTCO=m
-CONFIG_TABLET_USB_HANWANG=m
-CONFIG_TABLET_USB_KBTAB=m
-CONFIG_TABLET_USB_PEGASUS=m
-CONFIG_TABLET_SERIAL_WACOM4=m
-CONFIG_INPUT_TOUCHSCREEN=y
-CONFIG_TOUCHSCREEN_PROPERTIES=y
-CONFIG_TOUCHSCREEN_88PM860X=m
-CONFIG_TOUCHSCREEN_ADS7846=m
-CONFIG_TOUCHSCREEN_AD7877=m
-CONFIG_TOUCHSCREEN_AD7879=m
-CONFIG_TOUCHSCREEN_AD7879_I2C=m
-CONFIG_TOUCHSCREEN_AD7879_SPI=m
-CONFIG_TOUCHSCREEN_ADC=m
-CONFIG_TOUCHSCREEN_AR1021_I2C=m
-CONFIG_TOUCHSCREEN_ATMEL_MXT=m
-CONFIG_TOUCHSCREEN_ATMEL_MXT_T37=y
-CONFIG_TOUCHSCREEN_AUO_PIXCIR=m
-CONFIG_TOUCHSCREEN_BU21013=m
-CONFIG_TOUCHSCREEN_BU21029=m
-CONFIG_TOUCHSCREEN_CHIPONE_ICN8318=m
-CONFIG_TOUCHSCREEN_CHIPONE_ICN8505=m
-CONFIG_TOUCHSCREEN_CY8CTMG110=m
-CONFIG_TOUCHSCREEN_CYTTSP_CORE=m
-CONFIG_TOUCHSCREEN_CYTTSP_I2C=m
-CONFIG_TOUCHSCREEN_CYTTSP_SPI=m
-CONFIG_TOUCHSCREEN_CYTTSP4_CORE=m
-CONFIG_TOUCHSCREEN_CYTTSP4_I2C=m
-CONFIG_TOUCHSCREEN_CYTTSP4_SPI=m
-CONFIG_TOUCHSCREEN_DA9034=m
-CONFIG_TOUCHSCREEN_DA9052=m
-CONFIG_TOUCHSCREEN_DYNAPRO=m
-CONFIG_TOUCHSCREEN_HAMPSHIRE=m
-CONFIG_TOUCHSCREEN_EETI=m
-CONFIG_TOUCHSCREEN_EGALAX=m
-CONFIG_TOUCHSCREEN_EGALAX_SERIAL=m
-CONFIG_TOUCHSCREEN_EXC3000=m
-CONFIG_TOUCHSCREEN_FUJITSU=m
-CONFIG_TOUCHSCREEN_GOODIX=m
-CONFIG_TOUCHSCREEN_HIDEEP=m
-CONFIG_TOUCHSCREEN_ILI210X=m
-CONFIG_TOUCHSCREEN_S6SY761=m
-CONFIG_TOUCHSCREEN_GUNZE=m
-CONFIG_TOUCHSCREEN_EKTF2127=m
-CONFIG_TOUCHSCREEN_ELAN=m
-CONFIG_TOUCHSCREEN_ELO=m
-CONFIG_TOUCHSCREEN_WACOM_W8001=m
-CONFIG_TOUCHSCREEN_WACOM_I2C=m
-CONFIG_TOUCHSCREEN_MAX11801=m
-CONFIG_TOUCHSCREEN_MCS5000=m
-CONFIG_TOUCHSCREEN_MMS114=m
-CONFIG_TOUCHSCREEN_MELFAS_MIP4=m
-CONFIG_TOUCHSCREEN_MTOUCH=m
-CONFIG_TOUCHSCREEN_IMX6UL_TSC=m
-CONFIG_TOUCHSCREEN_INEXIO=m
-CONFIG_TOUCHSCREEN_MK712=m
-CONFIG_TOUCHSCREEN_PENMOUNT=m
-CONFIG_TOUCHSCREEN_EDT_FT5X06=m
-CONFIG_TOUCHSCREEN_TOUCHRIGHT=m
-CONFIG_TOUCHSCREEN_TOUCHWIN=m
-CONFIG_TOUCHSCREEN_TI_AM335X_TSC=m
-CONFIG_TOUCHSCREEN_UCB1400=m
-CONFIG_TOUCHSCREEN_PIXCIR=m
-CONFIG_TOUCHSCREEN_WDT87XX_I2C=m
-CONFIG_TOUCHSCREEN_WM831X=m
-CONFIG_TOUCHSCREEN_WM97XX=m
-CONFIG_TOUCHSCREEN_WM9705=y
-CONFIG_TOUCHSCREEN_WM9712=y
-CONFIG_TOUCHSCREEN_WM9713=y
-CONFIG_TOUCHSCREEN_USB_COMPOSITE=m
-CONFIG_TOUCHSCREEN_MC13783=m
-CONFIG_TOUCHSCREEN_USB_EGALAX=y
-CONFIG_TOUCHSCREEN_USB_PANJIT=y
-CONFIG_TOUCHSCREEN_USB_3M=y
-CONFIG_TOUCHSCREEN_USB_ITM=y
-CONFIG_TOUCHSCREEN_USB_ETURBO=y
-CONFIG_TOUCHSCREEN_USB_GUNZE=y
-CONFIG_TOUCHSCREEN_USB_DMC_TSC10=y
-CONFIG_TOUCHSCREEN_USB_IRTOUCH=y
-CONFIG_TOUCHSCREEN_USB_IDEALTEK=y
-CONFIG_TOUCHSCREEN_USB_GENERAL_TOUCH=y
-CONFIG_TOUCHSCREEN_USB_GOTOP=y
-CONFIG_TOUCHSCREEN_USB_JASTEC=y
-CONFIG_TOUCHSCREEN_USB_ELO=y
-CONFIG_TOUCHSCREEN_USB_E2I=y
-CONFIG_TOUCHSCREEN_USB_ZYTRONIC=y
-CONFIG_TOUCHSCREEN_USB_ETT_TC45USB=y
-CONFIG_TOUCHSCREEN_USB_NEXIO=y
-CONFIG_TOUCHSCREEN_USB_EASYTOUCH=y
-CONFIG_TOUCHSCREEN_TOUCHIT213=m
-CONFIG_TOUCHSCREEN_TSC_SERIO=m
-CONFIG_TOUCHSCREEN_TSC200X_CORE=m
-CONFIG_TOUCHSCREEN_TSC2004=m
-CONFIG_TOUCHSCREEN_TSC2005=m
-CONFIG_TOUCHSCREEN_TSC2007=m
-CONFIG_TOUCHSCREEN_TSC2007_IIO=y
-CONFIG_TOUCHSCREEN_PCAP=m
-CONFIG_TOUCHSCREEN_RM_TS=m
-CONFIG_TOUCHSCREEN_SILEAD=m
-CONFIG_TOUCHSCREEN_SIS_I2C=m
-CONFIG_TOUCHSCREEN_ST1232=m
-CONFIG_TOUCHSCREEN_STMFTS=m
-CONFIG_TOUCHSCREEN_STMPE=m
-CONFIG_TOUCHSCREEN_SUR40=m
-CONFIG_TOUCHSCREEN_SURFACE3_SPI=m
-CONFIG_TOUCHSCREEN_SX8654=m
-CONFIG_TOUCHSCREEN_TPS6507X=m
-CONFIG_TOUCHSCREEN_ZET6223=m
-CONFIG_TOUCHSCREEN_ZFORCE=m
-CONFIG_TOUCHSCREEN_COLIBRI_VF50=m
-CONFIG_TOUCHSCREEN_ROHM_BU21023=m
-CONFIG_TOUCHSCREEN_IQS5XX=m
-CONFIG_INPUT_MISC=y
-CONFIG_INPUT_88PM860X_ONKEY=m
-CONFIG_INPUT_88PM80X_ONKEY=m
-CONFIG_INPUT_AD714X=m
-CONFIG_INPUT_AD714X_I2C=m
-CONFIG_INPUT_AD714X_SPI=m
-CONFIG_INPUT_ARIZONA_HAPTICS=m
-CONFIG_INPUT_ATMEL_CAPTOUCH=m
-CONFIG_INPUT_BMA150=m
-CONFIG_INPUT_E3X0_BUTTON=m
-CONFIG_INPUT_MSM_VIBRATOR=m
-CONFIG_INPUT_PCSPKR=m
-CONFIG_INPUT_MAX77650_ONKEY=m
-CONFIG_INPUT_MAX77693_HAPTIC=m
-CONFIG_INPUT_MAX8925_ONKEY=m
-CONFIG_INPUT_MAX8997_HAPTIC=m
-CONFIG_INPUT_MC13783_PWRBUTTON=m
-CONFIG_INPUT_MMA8450=m
-CONFIG_INPUT_APANEL=m
-CONFIG_INPUT_GP2A=m
-CONFIG_INPUT_GPIO_BEEPER=m
-CONFIG_INPUT_GPIO_DECODER=m
-CONFIG_INPUT_GPIO_VIBRA=m
-CONFIG_INPUT_CPCAP_PWRBUTTON=m
-CONFIG_INPUT_ATLAS_BTNS=m
-CONFIG_INPUT_ATI_REMOTE2=m
-CONFIG_INPUT_KEYSPAN_REMOTE=m
-CONFIG_INPUT_KXTJ9=m
-# CONFIG_INPUT_KXTJ9_POLLED_MODE is not set
-CONFIG_INPUT_POWERMATE=m
-CONFIG_INPUT_YEALINK=m
-CONFIG_INPUT_CM109=m
-CONFIG_INPUT_REGULATOR_HAPTIC=m
-CONFIG_INPUT_RETU_PWRBUTTON=m
-CONFIG_INPUT_TPS65218_PWRBUTTON=m
-CONFIG_INPUT_AXP20X_PEK=m
-CONFIG_INPUT_TWL4030_PWRBUTTON=m
-CONFIG_INPUT_TWL4030_VIBRA=m
-CONFIG_INPUT_TWL6040_VIBRA=m
-CONFIG_INPUT_UINPUT=m
-CONFIG_INPUT_PALMAS_PWRBUTTON=m
-CONFIG_INPUT_PCF50633_PMU=m
-CONFIG_INPUT_PCF8574=m
-CONFIG_INPUT_PWM_BEEPER=m
-CONFIG_INPUT_PWM_VIBRA=m
-CONFIG_INPUT_RK805_PWRKEY=m
-CONFIG_INPUT_GPIO_ROTARY_ENCODER=m
-CONFIG_INPUT_DA9052_ONKEY=m
-CONFIG_INPUT_DA9055_ONKEY=m
-CONFIG_INPUT_DA9063_ONKEY=m
-CONFIG_INPUT_WM831X_ON=m
-CONFIG_INPUT_PCAP=m
-CONFIG_INPUT_ADXL34X=m
-CONFIG_INPUT_ADXL34X_I2C=m
-CONFIG_INPUT_ADXL34X_SPI=m
-CONFIG_INPUT_IMS_PCU=m
-CONFIG_INPUT_CMA3000=m
-CONFIG_INPUT_CMA3000_I2C=m
-CONFIG_INPUT_XEN_KBDDEV_FRONTEND=m
-CONFIG_INPUT_IDEAPAD_SLIDEBAR=m
-CONFIG_INPUT_SOC_BUTTON_ARRAY=m
-CONFIG_INPUT_DRV260X_HAPTICS=m
-CONFIG_INPUT_DRV2665_HAPTICS=m
-CONFIG_INPUT_DRV2667_HAPTICS=m
-CONFIG_INPUT_RAVE_SP_PWRBUTTON=m
-CONFIG_INPUT_STPMIC1_ONKEY=m
-CONFIG_RMI4_CORE=m
-CONFIG_RMI4_I2C=m
-CONFIG_RMI4_SPI=m
-CONFIG_RMI4_SMB=m
-CONFIG_RMI4_F03=y
-CONFIG_RMI4_F03_SERIO=m
-CONFIG_RMI4_2D_SENSOR=y
-CONFIG_RMI4_F11=y
-CONFIG_RMI4_F12=y
-CONFIG_RMI4_F30=y
-CONFIG_RMI4_F34=y
-# CONFIG_RMI4_F54 is not set
-CONFIG_RMI4_F55=y
-
-#
-# Hardware I/O ports
-#
-CONFIG_SERIO=m
-CONFIG_ARCH_MIGHT_HAVE_PC_SERIO=y
-CONFIG_SERIO_I8042=m
-CONFIG_SERIO_SERPORT=m
-CONFIG_SERIO_CT82C710=m
-CONFIG_SERIO_PARKBD=m
-CONFIG_SERIO_PCIPS2=m
-CONFIG_SERIO_LIBPS2=m
-CONFIG_SERIO_RAW=m
-CONFIG_SERIO_ALTERA_PS2=m
-CONFIG_SERIO_PS2MULT=m
-CONFIG_SERIO_ARC_PS2=m
-# CONFIG_SERIO_APBPS2 is not set
-CONFIG_HYPERV_KEYBOARD=m
-CONFIG_SERIO_GPIO_PS2=m
-CONFIG_USERIO=m
-CONFIG_GAMEPORT=m
-CONFIG_GAMEPORT_NS558=m
-CONFIG_GAMEPORT_L4=m
-CONFIG_GAMEPORT_EMU10K1=m
-CONFIG_GAMEPORT_FM801=m
-# end of Hardware I/O ports
-# end of Input device support
-
-#
-# Character devices
-#
-CONFIG_TTY=y
-CONFIG_VT=y
-CONFIG_CONSOLE_TRANSLATIONS=y
-CONFIG_VT_CONSOLE=y
-CONFIG_VT_CONSOLE_SLEEP=y
-CONFIG_HW_CONSOLE=y
-CONFIG_VT_HW_CONSOLE_BINDING=y
-CONFIG_UNIX98_PTYS=y
-# CONFIG_LEGACY_PTYS is not set
-CONFIG_SERIAL_NONSTANDARD=y
-CONFIG_ROCKETPORT=m
-CONFIG_CYCLADES=m
-CONFIG_CYZ_INTR=y
-CONFIG_MOXA_INTELLIO=m
-CONFIG_MOXA_SMARTIO=m
-CONFIG_SYNCLINK=m
-CONFIG_SYNCLINKMP=m
-CONFIG_SYNCLINK_GT=m
-CONFIG_NOZOMI=m
-CONFIG_ISI=m
-CONFIG_N_HDLC=m
-CONFIG_N_GSM=m
-CONFIG_TRACE_ROUTER=m
-CONFIG_TRACE_SINK=m
-CONFIG_NULL_TTY=m
-CONFIG_LDISC_AUTOLOAD=y
-CONFIG_DEVMEM=y
-# CONFIG_DEVKMEM is not set
-
-#
-# Serial drivers
-#
-CONFIG_SERIAL_EARLYCON=y
-CONFIG_SERIAL_8250=y
-# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
-CONFIG_SERIAL_8250_PNP=y
-CONFIG_SERIAL_8250_FINTEK=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_DMA=y
-CONFIG_SERIAL_8250_PCI=y
-CONFIG_SERIAL_8250_EXAR=m
-CONFIG_SERIAL_8250_CS=m
-CONFIG_SERIAL_8250_MEN_MCB=m
-CONFIG_SERIAL_8250_NR_UARTS=32
-CONFIG_SERIAL_8250_RUNTIME_UARTS=4
-CONFIG_SERIAL_8250_EXTENDED=y
-CONFIG_SERIAL_8250_MANY_PORTS=y
-CONFIG_SERIAL_8250_ASPEED_VUART=m
-CONFIG_SERIAL_8250_SHARE_IRQ=y
-# CONFIG_SERIAL_8250_DETECT_IRQ is not set
-CONFIG_SERIAL_8250_RSA=y
-CONFIG_SERIAL_8250_DWLIB=y
-CONFIG_SERIAL_8250_DW=m
-CONFIG_SERIAL_8250_RT288X=y
-CONFIG_SERIAL_8250_LPSS=y
-CONFIG_SERIAL_8250_MID=y
-CONFIG_SERIAL_OF_PLATFORM=m
-
-#
-# Non-8250 serial port support
-#
-CONFIG_SERIAL_MAX3100=m
-CONFIG_SERIAL_MAX310X=m
-CONFIG_SERIAL_UARTLITE=m
-CONFIG_SERIAL_UARTLITE_NR_UARTS=1
-CONFIG_SERIAL_CORE=y
-CONFIG_SERIAL_CORE_CONSOLE=y
-CONFIG_SERIAL_JSM=m
-CONFIG_SERIAL_SIFIVE=m
-CONFIG_SERIAL_SCCNXP=m
-CONFIG_SERIAL_SC16IS7XX_CORE=m
-CONFIG_SERIAL_SC16IS7XX=m
-CONFIG_SERIAL_SC16IS7XX_I2C=y
-CONFIG_SERIAL_SC16IS7XX_SPI=y
-CONFIG_SERIAL_ALTERA_JTAGUART=m
-CONFIG_SERIAL_ALTERA_UART=m
-CONFIG_SERIAL_ALTERA_UART_MAXPORTS=4
-CONFIG_SERIAL_ALTERA_UART_BAUDRATE=115200
-CONFIG_SERIAL_IFX6X60=m
-CONFIG_SERIAL_XILINX_PS_UART=m
-CONFIG_SERIAL_ARC=m
-CONFIG_SERIAL_ARC_NR_PORTS=1
-CONFIG_SERIAL_RP2=m
-CONFIG_SERIAL_RP2_NR_UARTS=32
-CONFIG_SERIAL_FSL_LPUART=m
-CONFIG_SERIAL_FSL_LINFLEXUART=m
-CONFIG_SERIAL_CONEXANT_DIGICOLOR=m
-CONFIG_SERIAL_MEN_Z135=m
-# end of Serial drivers
-
-CONFIG_SERIAL_MCTRL_GPIO=y
-CONFIG_SERIAL_DEV_BUS=y
-CONFIG_SERIAL_DEV_CTRL_TTYPORT=y
-# CONFIG_TTY_PRINTK is not set
-CONFIG_PRINTER=m
-# CONFIG_LP_CONSOLE is not set
-CONFIG_PPDEV=m
-CONFIG_HVC_DRIVER=y
-CONFIG_HVC_IRQ=y
-CONFIG_HVC_XEN=y
-CONFIG_HVC_XEN_FRONTEND=y
-CONFIG_VIRTIO_CONSOLE=m
-CONFIG_IPMI_HANDLER=m
-CONFIG_IPMI_DMI_DECODE=y
-CONFIG_IPMI_PLAT_DATA=y
-# CONFIG_IPMI_PANIC_EVENT is not set
-CONFIG_IPMI_DEVICE_INTERFACE=m
-CONFIG_IPMI_SI=m
-CONFIG_IPMI_SSIF=m
-CONFIG_IPMI_WATCHDOG=m
-CONFIG_IPMI_POWEROFF=m
-CONFIG_IPMB_DEVICE_INTERFACE=m
-CONFIG_HW_RANDOM=m
-CONFIG_HW_RANDOM_TIMERIOMEM=m
-CONFIG_HW_RANDOM_INTEL=m
-CONFIG_HW_RANDOM_AMD=m
-CONFIG_HW_RANDOM_VIA=m
-CONFIG_HW_RANDOM_VIRTIO=m
-CONFIG_NVRAM=m
-CONFIG_APPLICOM=m
-
-#
-# PCMCIA character devices
-#
-CONFIG_SYNCLINK_CS=m
-CONFIG_CARDMAN_4000=m
-CONFIG_CARDMAN_4040=m
-CONFIG_SCR24X=m
-CONFIG_IPWIRELESS=m
-# end of PCMCIA character devices
-
-CONFIG_MWAVE=m
-CONFIG_RAW_DRIVER=m
-CONFIG_MAX_RAW_DEVS=256
-CONFIG_HPET=y
-CONFIG_HPET_MMAP=y
-CONFIG_HPET_MMAP_DEFAULT=y
-CONFIG_HANGCHECK_TIMER=m
-CONFIG_TCG_TPM=m
-CONFIG_HW_RANDOM_TPM=y
-CONFIG_TCG_TIS_CORE=m
-CONFIG_TCG_TIS=m
-CONFIG_TCG_TIS_SPI=m
-CONFIG_TCG_TIS_I2C_ATMEL=m
-CONFIG_TCG_TIS_I2C_INFINEON=m
-CONFIG_TCG_TIS_I2C_NUVOTON=m
-CONFIG_TCG_NSC=m
-CONFIG_TCG_ATMEL=m
-CONFIG_TCG_INFINEON=m
-CONFIG_TCG_XEN=m
-CONFIG_TCG_CRB=m
-CONFIG_TCG_VTPM_PROXY=m
-CONFIG_TCG_TIS_ST33ZP24=m
-CONFIG_TCG_TIS_ST33ZP24_I2C=m
-CONFIG_TCG_TIS_ST33ZP24_SPI=m
-CONFIG_TELCLOCK=m
-CONFIG_DEVPORT=y
-CONFIG_XILLYBUS=m
-CONFIG_XILLYBUS_PCIE=m
-CONFIG_XILLYBUS_OF=m
-# end of Character devices
-
-# CONFIG_RANDOM_TRUST_CPU is not set
-# CONFIG_RANDOM_TRUST_BOOTLOADER is not set
-
-#
-# I2C support
-#
-CONFIG_I2C=y
-CONFIG_ACPI_I2C_OPREGION=y
-CONFIG_I2C_BOARDINFO=y
-CONFIG_I2C_COMPAT=y
-CONFIG_I2C_CHARDEV=m
-CONFIG_I2C_MUX=m
-
-#
-# Multiplexer I2C Chip support
-#
-CONFIG_I2C_ARB_GPIO_CHALLENGE=m
-CONFIG_I2C_MUX_GPIO=m
-CONFIG_I2C_MUX_GPMUX=m
-CONFIG_I2C_MUX_LTC4306=m
-CONFIG_I2C_MUX_PCA9541=m
-CONFIG_I2C_MUX_PCA954x=m
-CONFIG_I2C_MUX_PINCTRL=m
-CONFIG_I2C_MUX_REG=m
-CONFIG_I2C_DEMUX_PINCTRL=m
-CONFIG_I2C_MUX_MLXCPLD=m
-# end of Multiplexer I2C Chip support
-
-CONFIG_I2C_HELPER_AUTO=y
-CONFIG_I2C_SMBUS=m
-CONFIG_I2C_ALGOBIT=m
-CONFIG_I2C_ALGOPCA=m
-
-#
-# I2C Hardware Bus support
-#
-
-#
-# PC SMBus host controller drivers
-#
-CONFIG_I2C_ALI1535=m
-CONFIG_I2C_ALI1563=m
-CONFIG_I2C_ALI15X3=m
-CONFIG_I2C_AMD756=m
-CONFIG_I2C_AMD756_S4882=m
-CONFIG_I2C_AMD8111=m
-CONFIG_I2C_AMD_MP2=m
-CONFIG_I2C_I801=m
-CONFIG_I2C_ISCH=m
-CONFIG_I2C_ISMT=m
-CONFIG_I2C_PIIX4=m
-CONFIG_I2C_CHT_WC=m
-CONFIG_I2C_NFORCE2=m
-CONFIG_I2C_NFORCE2_S4985=m
-CONFIG_I2C_NVIDIA_GPU=m
-CONFIG_I2C_SIS5595=m
-CONFIG_I2C_SIS630=m
-CONFIG_I2C_SIS96X=m
-CONFIG_I2C_VIA=m
-CONFIG_I2C_VIAPRO=m
-
-#
-# ACPI drivers
-#
-CONFIG_I2C_SCMI=m
-
-#
-# I2C system bus drivers (mostly embedded / system-on-chip)
-#
-CONFIG_I2C_CBUS_GPIO=m
-CONFIG_I2C_DESIGNWARE_CORE=y
-CONFIG_I2C_DESIGNWARE_PLATFORM=y
-CONFIG_I2C_DESIGNWARE_SLAVE=y
-CONFIG_I2C_DESIGNWARE_PCI=m
-CONFIG_I2C_DESIGNWARE_BAYTRAIL=y
-CONFIG_I2C_EMEV2=m
-CONFIG_I2C_GPIO=m
-# CONFIG_I2C_GPIO_FAULT_INJECTOR is not set
-CONFIG_I2C_KEMPLD=m
-CONFIG_I2C_OCORES=m
-CONFIG_I2C_PCA_PLATFORM=m
-CONFIG_I2C_RK3X=m
-CONFIG_I2C_SIMTEC=m
-CONFIG_I2C_XILINX=m
-
-#
-# External I2C/SMBus adapter drivers
-#
-CONFIG_I2C_DIOLAN_U2C=m
-CONFIG_I2C_DLN2=m
-CONFIG_I2C_PARPORT=m
-CONFIG_I2C_PARPORT_LIGHT=m
-CONFIG_I2C_ROBOTFUZZ_OSIF=m
-CONFIG_I2C_TAOS_EVM=m
-CONFIG_I2C_TINY_USB=m
-CONFIG_I2C_VIPERBOARD=m
-
-#
-# Other I2C/SMBus bus drivers
-#
-CONFIG_I2C_MLXCPLD=m
-CONFIG_I2C_CROS_EC_TUNNEL=m
-CONFIG_I2C_FSI=m
-# end of I2C Hardware Bus support
-
-CONFIG_I2C_STUB=m
-CONFIG_I2C_SLAVE=y
-CONFIG_I2C_SLAVE_EEPROM=m
-# CONFIG_I2C_DEBUG_CORE is not set
-# CONFIG_I2C_DEBUG_ALGO is not set
-# CONFIG_I2C_DEBUG_BUS is not set
-# end of I2C support
-
-CONFIG_I3C=m
-CONFIG_CDNS_I3C_MASTER=m
-CONFIG_DW_I3C_MASTER=m
-CONFIG_SPI=y
-# CONFIG_SPI_DEBUG is not set
-CONFIG_SPI_MASTER=y
-CONFIG_SPI_MEM=y
-
-#
-# SPI Master Controller Drivers
-#
-CONFIG_SPI_ALTERA=m
-CONFIG_SPI_AXI_SPI_ENGINE=m
-CONFIG_SPI_BITBANG=m
-CONFIG_SPI_BUTTERFLY=m
-CONFIG_SPI_CADENCE=m
-CONFIG_SPI_DESIGNWARE=m
-CONFIG_SPI_DW_PCI=m
-CONFIG_SPI_DW_MID_DMA=y
-CONFIG_SPI_DW_MMIO=m
-CONFIG_SPI_DLN2=m
-CONFIG_SPI_NXP_FLEXSPI=m
-CONFIG_SPI_GPIO=m
-CONFIG_SPI_LM70_LLP=m
-CONFIG_SPI_FSL_LIB=m
-CONFIG_SPI_FSL_SPI=m
-CONFIG_SPI_OC_TINY=m
-CONFIG_SPI_PXA2XX=m
-CONFIG_SPI_PXA2XX_PCI=m
-CONFIG_SPI_ROCKCHIP=m
-CONFIG_SPI_SC18IS602=m
-CONFIG_SPI_SIFIVE=m
-CONFIG_SPI_MXIC=m
-CONFIG_SPI_XCOMM=m
-CONFIG_SPI_XILINX=m
-CONFIG_SPI_ZYNQMP_GQSPI=m
-
-#
-# SPI Protocol Masters
-#
-CONFIG_SPI_SPIDEV=m
-CONFIG_SPI_LOOPBACK_TEST=m
-CONFIG_SPI_TLE62X0=m
-CONFIG_SPI_SLAVE=y
-CONFIG_SPI_SLAVE_TIME=m
-CONFIG_SPI_SLAVE_SYSTEM_CONTROL=m
-CONFIG_SPMI=m
-CONFIG_HSI=m
-CONFIG_HSI_BOARDINFO=y
-
-#
-# HSI controllers
-#
-
-#
-# HSI clients
-#
-CONFIG_HSI_CHAR=m
-CONFIG_PPS=y
-# CONFIG_PPS_DEBUG is not set
-
-#
-# PPS clients support
-#
-CONFIG_PPS_CLIENT_KTIMER=m
-CONFIG_PPS_CLIENT_LDISC=m
-CONFIG_PPS_CLIENT_PARPORT=m
-CONFIG_PPS_CLIENT_GPIO=m
-
-#
-# PPS generators support
-#
-
-#
-# PTP clock support
-#
-CONFIG_PTP_1588_CLOCK=y
-CONFIG_DP83640_PHY=m
-CONFIG_PTP_1588_CLOCK_KVM=m
-# end of PTP clock support
-
-CONFIG_PINCTRL=y
-CONFIG_GENERIC_PINCTRL_GROUPS=y
-CONFIG_PINMUX=y
-CONFIG_GENERIC_PINMUX_FUNCTIONS=y
-CONFIG_PINCONF=y
-CONFIG_GENERIC_PINCONF=y
-# CONFIG_DEBUG_PINCTRL is not set
-CONFIG_PINCTRL_AS3722=m
-CONFIG_PINCTRL_AXP209=m
-CONFIG_PINCTRL_AMD=m
-CONFIG_PINCTRL_MCP23S08=m
-CONFIG_PINCTRL_SINGLE=m
-CONFIG_PINCTRL_SX150X=y
-CONFIG_PINCTRL_STMFX=m
-CONFIG_PINCTRL_MAX77620=m
-CONFIG_PINCTRL_PALMAS=m
-CONFIG_PINCTRL_RK805=m
-CONFIG_PINCTRL_OCELOT=y
-CONFIG_PINCTRL_BAYTRAIL=y
-CONFIG_PINCTRL_CHERRYVIEW=y
-CONFIG_PINCTRL_INTEL=y
-CONFIG_PINCTRL_BROXTON=y
-CONFIG_PINCTRL_CANNONLAKE=y
-CONFIG_PINCTRL_CEDARFORK=y
-CONFIG_PINCTRL_DENVERTON=y
-CONFIG_PINCTRL_GEMINILAKE=y
-CONFIG_PINCTRL_ICELAKE=y
-CONFIG_PINCTRL_LEWISBURG=y
-CONFIG_PINCTRL_SUNRISEPOINT=y
-CONFIG_PINCTRL_LOCHNAGAR=m
-CONFIG_PINCTRL_MADERA=m
-CONFIG_PINCTRL_CS47L15=y
-CONFIG_PINCTRL_CS47L35=y
-CONFIG_PINCTRL_CS47L85=y
-CONFIG_PINCTRL_CS47L90=y
-CONFIG_PINCTRL_CS47L92=y
-CONFIG_GPIOLIB=y
-CONFIG_GPIOLIB_FASTPATH_LIMIT=512
-CONFIG_OF_GPIO=y
-CONFIG_GPIO_ACPI=y
-CONFIG_GPIOLIB_IRQCHIP=y
-# CONFIG_DEBUG_GPIO is not set
-CONFIG_GPIO_SYSFS=y
-CONFIG_GPIO_GENERIC=y
-CONFIG_GPIO_MAX730X=m
-
-#
-# Memory mapped GPIO drivers
-#
-CONFIG_GPIO_74XX_MMIO=m
-CONFIG_GPIO_ALTERA=m
-CONFIG_GPIO_AMDPT=m
-CONFIG_GPIO_CADENCE=m
-CONFIG_GPIO_DWAPB=m
-CONFIG_GPIO_EXAR=m
-CONFIG_GPIO_FTGPIO010=y
-CONFIG_GPIO_GENERIC_PLATFORM=m
-CONFIG_GPIO_GRGPIO=m
-CONFIG_GPIO_HLWD=m
-CONFIG_GPIO_ICH=m
-CONFIG_GPIO_LYNXPOINT=m
-CONFIG_GPIO_MB86S7X=m
-CONFIG_GPIO_MENZ127=m
-CONFIG_GPIO_SAMA5D2_PIOBU=m
-CONFIG_GPIO_SIOX=m
-CONFIG_GPIO_SYSCON=m
-CONFIG_GPIO_VX855=m
-CONFIG_GPIO_XILINX=m
-CONFIG_GPIO_AMD_FCH=m
-# end of Memory mapped GPIO drivers
-
-#
-# Port-mapped I/O GPIO drivers
-#
-CONFIG_GPIO_F7188X=m
-CONFIG_GPIO_IT87=m
-CONFIG_GPIO_SCH=m
-CONFIG_GPIO_SCH311X=m
-CONFIG_GPIO_WINBOND=m
-CONFIG_GPIO_WS16C48=m
-# end of Port-mapped I/O GPIO drivers
-
-#
-# I2C GPIO expanders
-#
-CONFIG_GPIO_ADP5588=m
-CONFIG_GPIO_ADNP=m
-CONFIG_GPIO_GW_PLD=m
-CONFIG_GPIO_MAX7300=m
-CONFIG_GPIO_MAX732X=m
-CONFIG_GPIO_PCA953X=m
-CONFIG_GPIO_PCF857X=m
-CONFIG_GPIO_TPIC2810=m
-# end of I2C GPIO expanders
-
-#
-# MFD GPIO expanders
-#
-CONFIG_GPIO_ADP5520=m
-CONFIG_GPIO_ARIZONA=m
-CONFIG_GPIO_BD70528=m
-CONFIG_GPIO_BD9571MWV=m
-CONFIG_GPIO_CRYSTAL_COVE=m
-CONFIG_GPIO_DA9052=m
-CONFIG_GPIO_DA9055=m
-CONFIG_GPIO_DLN2=m
-CONFIG_GPIO_JANZ_TTL=m
-CONFIG_GPIO_KEMPLD=m
-CONFIG_GPIO_LP3943=m
-CONFIG_GPIO_LP873X=m
-CONFIG_GPIO_LP87565=m
-CONFIG_GPIO_MADERA=m
-CONFIG_GPIO_MAX77620=m
-CONFIG_GPIO_MAX77650=m
-CONFIG_GPIO_PALMAS=y
-CONFIG_GPIO_RC5T583=y
-CONFIG_GPIO_STMPE=y
-CONFIG_GPIO_TC3589X=y
-CONFIG_GPIO_TPS65086=m
-CONFIG_GPIO_TPS65218=m
-CONFIG_GPIO_TPS6586X=y
-CONFIG_GPIO_TPS65910=y
-CONFIG_GPIO_TPS65912=m
-CONFIG_GPIO_TPS68470=y
-CONFIG_GPIO_TQMX86=m
-CONFIG_GPIO_TWL4030=m
-CONFIG_GPIO_TWL6040=m
-CONFIG_GPIO_UCB1400=m
-CONFIG_GPIO_WHISKEY_COVE=m
-CONFIG_GPIO_WM831X=m
-CONFIG_GPIO_WM8350=m
-CONFIG_GPIO_WM8994=m
-# end of MFD GPIO expanders
-
-#
-# PCI GPIO expanders
-#
-CONFIG_GPIO_AMD8111=m
-CONFIG_GPIO_ML_IOH=m
-CONFIG_GPIO_PCI_IDIO_16=m
-CONFIG_GPIO_PCIE_IDIO_24=m
-CONFIG_GPIO_RDC321X=m
-CONFIG_GPIO_SODAVILLE=y
-# end of PCI GPIO expanders
-
-#
-# SPI GPIO expanders
-#
-CONFIG_GPIO_74X164=m
-CONFIG_GPIO_MAX3191X=m
-CONFIG_GPIO_MAX7301=m
-CONFIG_GPIO_MC33880=m
-CONFIG_GPIO_PISOSR=m
-CONFIG_GPIO_XRA1403=m
-CONFIG_GPIO_MOXTET=m
-# end of SPI GPIO expanders
-
-#
-# USB GPIO expanders
-#
-CONFIG_GPIO_VIPERBOARD=m
-# end of USB GPIO expanders
-
-CONFIG_GPIO_MOCKUP=m
-CONFIG_W1=m
-CONFIG_W1_CON=y
-
-#
-# 1-wire Bus Masters
-#
-CONFIG_W1_MASTER_MATROX=m
-CONFIG_W1_MASTER_DS2490=m
-CONFIG_W1_MASTER_DS2482=m
-CONFIG_W1_MASTER_DS1WM=m
-CONFIG_W1_MASTER_GPIO=m
-CONFIG_W1_MASTER_SGI=m
-# end of 1-wire Bus Masters
-
-#
-# 1-wire Slaves
-#
-CONFIG_W1_SLAVE_THERM=m
-CONFIG_W1_SLAVE_SMEM=m
-CONFIG_W1_SLAVE_DS2405=m
-CONFIG_W1_SLAVE_DS2408=m
-# CONFIG_W1_SLAVE_DS2408_READBACK is not set
-CONFIG_W1_SLAVE_DS2413=m
-CONFIG_W1_SLAVE_DS2406=m
-CONFIG_W1_SLAVE_DS2423=m
-CONFIG_W1_SLAVE_DS2805=m
-CONFIG_W1_SLAVE_DS2431=m
-CONFIG_W1_SLAVE_DS2433=m
-# CONFIG_W1_SLAVE_DS2433_CRC is not set
-CONFIG_W1_SLAVE_DS2438=m
-CONFIG_W1_SLAVE_DS250X=m
-CONFIG_W1_SLAVE_DS2780=m
-CONFIG_W1_SLAVE_DS2781=m
-CONFIG_W1_SLAVE_DS28E04=m
-CONFIG_W1_SLAVE_DS28E17=m
-# end of 1-wire Slaves
-
-CONFIG_POWER_AVS=y
-CONFIG_POWER_RESET=y
-CONFIG_POWER_RESET_AS3722=y
-CONFIG_POWER_RESET_GPIO=y
-CONFIG_POWER_RESET_GPIO_RESTART=y
-CONFIG_POWER_RESET_LTC2952=y
-CONFIG_POWER_RESET_RESTART=y
-CONFIG_POWER_RESET_SYSCON=y
-CONFIG_POWER_RESET_SYSCON_POWEROFF=y
-CONFIG_REBOOT_MODE=m
-CONFIG_SYSCON_REBOOT_MODE=m
-CONFIG_NVMEM_REBOOT_MODE=m
-CONFIG_POWER_SUPPLY=y
-# CONFIG_POWER_SUPPLY_DEBUG is not set
-CONFIG_POWER_SUPPLY_HWMON=y
-CONFIG_PDA_POWER=m
-CONFIG_GENERIC_ADC_BATTERY=m
-CONFIG_MAX8925_POWER=m
-CONFIG_WM831X_BACKUP=m
-CONFIG_WM831X_POWER=m
-CONFIG_WM8350_POWER=m
-CONFIG_TEST_POWER=m
-CONFIG_BATTERY_88PM860X=m
-CONFIG_CHARGER_ADP5061=m
-CONFIG_BATTERY_ACT8945A=m
-CONFIG_BATTERY_CPCAP=m
-CONFIG_BATTERY_DS2760=m
-CONFIG_BATTERY_DS2780=m
-CONFIG_BATTERY_DS2781=m
-CONFIG_BATTERY_DS2782=m
-CONFIG_BATTERY_LEGO_EV3=m
-CONFIG_BATTERY_SBS=m
-CONFIG_CHARGER_SBS=m
-CONFIG_MANAGER_SBS=m
-CONFIG_BATTERY_BQ27XXX=m
-CONFIG_BATTERY_BQ27XXX_I2C=m
-CONFIG_BATTERY_BQ27XXX_HDQ=m
-# CONFIG_BATTERY_BQ27XXX_DT_UPDATES_NVM is not set
-CONFIG_BATTERY_DA9030=m
-CONFIG_BATTERY_DA9052=m
-CONFIG_CHARGER_DA9150=m
-CONFIG_BATTERY_DA9150=m
-CONFIG_CHARGER_AXP20X=m
-CONFIG_BATTERY_AXP20X=m
-CONFIG_AXP20X_POWER=m
-CONFIG_AXP288_CHARGER=m
-CONFIG_AXP288_FUEL_GAUGE=m
-CONFIG_BATTERY_MAX17040=m
-CONFIG_BATTERY_MAX17042=m
-CONFIG_BATTERY_MAX1721X=m
-CONFIG_BATTERY_TWL4030_MADC=m
-CONFIG_CHARGER_88PM860X=m
-CONFIG_CHARGER_PCF50633=m
-CONFIG_BATTERY_RX51=m
-CONFIG_CHARGER_ISP1704=m
-CONFIG_CHARGER_MAX8903=m
-CONFIG_CHARGER_TWL4030=m
-CONFIG_CHARGER_LP8727=m
-CONFIG_CHARGER_LP8788=m
-CONFIG_CHARGER_GPIO=m
-CONFIG_CHARGER_MANAGER=y
-CONFIG_CHARGER_LT3651=m
-CONFIG_CHARGER_MAX14577=m
-CONFIG_CHARGER_DETECTOR_MAX14656=m
-CONFIG_CHARGER_MAX77650=m
-CONFIG_CHARGER_MAX77693=m
-CONFIG_CHARGER_MAX8997=m
-CONFIG_CHARGER_MAX8998=m
-CONFIG_CHARGER_BQ2415X=m
-CONFIG_CHARGER_BQ24190=m
-CONFIG_CHARGER_BQ24257=m
-CONFIG_CHARGER_BQ24735=m
-CONFIG_CHARGER_BQ25890=m
-CONFIG_CHARGER_SMB347=m
-CONFIG_CHARGER_TPS65090=m
-CONFIG_CHARGER_TPS65217=m
-CONFIG_BATTERY_GAUGE_LTC2941=m
-CONFIG_BATTERY_RT5033=m
-CONFIG_CHARGER_RT9455=m
-CONFIG_CHARGER_CROS_USBPD=m
-CONFIG_CHARGER_UCS1002=m
-CONFIG_CHARGER_BD70528=m
-CONFIG_CHARGER_WILCO=m
-CONFIG_HWMON=y
-CONFIG_HWMON_VID=m
-# CONFIG_HWMON_DEBUG_CHIP is not set
-
-#
-# Native drivers
-#
-CONFIG_SENSORS_ABITUGURU=m
-CONFIG_SENSORS_ABITUGURU3=m
-CONFIG_SENSORS_AD7314=m
-CONFIG_SENSORS_AD7414=m
-CONFIG_SENSORS_AD7418=m
-CONFIG_SENSORS_ADM1021=m
-CONFIG_SENSORS_ADM1025=m
-CONFIG_SENSORS_ADM1026=m
-CONFIG_SENSORS_ADM1029=m
-CONFIG_SENSORS_ADM1031=m
-CONFIG_SENSORS_ADM9240=m
-CONFIG_SENSORS_ADT7X10=m
-CONFIG_SENSORS_ADT7310=m
-CONFIG_SENSORS_ADT7410=m
-CONFIG_SENSORS_ADT7411=m
-CONFIG_SENSORS_ADT7462=m
-CONFIG_SENSORS_ADT7470=m
-CONFIG_SENSORS_ADT7475=m
-CONFIG_SENSORS_AS370=m
-CONFIG_SENSORS_ASC7621=m
-CONFIG_SENSORS_K8TEMP=m
-CONFIG_SENSORS_K10TEMP=m
-CONFIG_SENSORS_FAM15H_POWER=m
-CONFIG_SENSORS_APPLESMC=m
-CONFIG_SENSORS_ASB100=m
-CONFIG_SENSORS_ASPEED=m
-CONFIG_SENSORS_ATXP1=m
-CONFIG_SENSORS_DS620=m
-CONFIG_SENSORS_DS1621=m
-CONFIG_SENSORS_DELL_SMM=m
-CONFIG_SENSORS_DA9052_ADC=m
-CONFIG_SENSORS_DA9055=m
-CONFIG_SENSORS_I5K_AMB=m
-CONFIG_SENSORS_F71805F=m
-CONFIG_SENSORS_F71882FG=m
-CONFIG_SENSORS_F75375S=m
-CONFIG_SENSORS_MC13783_ADC=m
-CONFIG_SENSORS_FSCHMD=m
-CONFIG_SENSORS_FTSTEUTATES=m
-CONFIG_SENSORS_GL518SM=m
-CONFIG_SENSORS_GL520SM=m
-CONFIG_SENSORS_G760A=m
-CONFIG_SENSORS_G762=m
-CONFIG_SENSORS_GPIO_FAN=m
-CONFIG_SENSORS_HIH6130=m
-CONFIG_SENSORS_IBMAEM=m
-CONFIG_SENSORS_IBMPEX=m
-CONFIG_SENSORS_IIO_HWMON=m
-CONFIG_SENSORS_I5500=m
-CONFIG_SENSORS_CORETEMP=m
-CONFIG_SENSORS_IT87=m
-CONFIG_SENSORS_JC42=m
-CONFIG_SENSORS_POWR1220=m
-CONFIG_SENSORS_LINEAGE=m
-CONFIG_SENSORS_LOCHNAGAR=m
-CONFIG_SENSORS_LTC2945=m
-CONFIG_SENSORS_LTC2990=m
-CONFIG_SENSORS_LTC4151=m
-CONFIG_SENSORS_LTC4215=m
-CONFIG_SENSORS_LTC4222=m
-CONFIG_SENSORS_LTC4245=m
-CONFIG_SENSORS_LTC4260=m
-CONFIG_SENSORS_LTC4261=m
-CONFIG_SENSORS_MAX1111=m
-CONFIG_SENSORS_MAX16065=m
-CONFIG_SENSORS_MAX1619=m
-CONFIG_SENSORS_MAX1668=m
-CONFIG_SENSORS_MAX197=m
-CONFIG_SENSORS_MAX31722=m
-CONFIG_SENSORS_MAX6621=m
-CONFIG_SENSORS_MAX6639=m
-CONFIG_SENSORS_MAX6642=m
-CONFIG_SENSORS_MAX6650=m
-CONFIG_SENSORS_MAX6697=m
-CONFIG_SENSORS_MAX31790=m
-CONFIG_SENSORS_MCP3021=m
-CONFIG_SENSORS_MLXREG_FAN=m
-CONFIG_SENSORS_TC654=m
-CONFIG_SENSORS_MENF21BMC_HWMON=m
-CONFIG_SENSORS_ADCXX=m
-CONFIG_SENSORS_LM63=m
-CONFIG_SENSORS_LM70=m
-CONFIG_SENSORS_LM73=m
-CONFIG_SENSORS_LM75=m
-CONFIG_SENSORS_LM77=m
-CONFIG_SENSORS_LM78=m
-CONFIG_SENSORS_LM80=m
-CONFIG_SENSORS_LM83=m
-CONFIG_SENSORS_LM85=m
-CONFIG_SENSORS_LM87=m
-CONFIG_SENSORS_LM90=m
-CONFIG_SENSORS_LM92=m
-CONFIG_SENSORS_LM93=m
-CONFIG_SENSORS_LM95234=m
-CONFIG_SENSORS_LM95241=m
-CONFIG_SENSORS_LM95245=m
-CONFIG_SENSORS_PC87360=m
-CONFIG_SENSORS_PC87427=m
-CONFIG_SENSORS_NTC_THERMISTOR=m
-CONFIG_SENSORS_NCT6683=m
-CONFIG_SENSORS_NCT6775=m
-CONFIG_SENSORS_NCT7802=m
-CONFIG_SENSORS_NCT7904=m
-CONFIG_SENSORS_NPCM7XX=m
-CONFIG_SENSORS_PCF8591=m
-CONFIG_PMBUS=m
-CONFIG_SENSORS_PMBUS=m
-CONFIG_SENSORS_ADM1275=m
-CONFIG_SENSORS_IBM_CFFPS=m
-CONFIG_SENSORS_INSPUR_IPSPS=m
-CONFIG_SENSORS_IR35221=m
-CONFIG_SENSORS_IR38064=m
-CONFIG_SENSORS_IRPS5401=m
-CONFIG_SENSORS_ISL68137=m
-CONFIG_SENSORS_LM25066=m
-CONFIG_SENSORS_LTC2978=m
-# CONFIG_SENSORS_LTC2978_REGULATOR is not set
-CONFIG_SENSORS_LTC3815=m
-CONFIG_SENSORS_MAX16064=m
-CONFIG_SENSORS_MAX20751=m
-CONFIG_SENSORS_MAX31785=m
-CONFIG_SENSORS_MAX34440=m
-CONFIG_SENSORS_MAX8688=m
-CONFIG_SENSORS_PXE1610=m
-CONFIG_SENSORS_TPS40422=m
-CONFIG_SENSORS_TPS53679=m
-CONFIG_SENSORS_UCD9000=m
-CONFIG_SENSORS_UCD9200=m
-CONFIG_SENSORS_ZL6100=m
-CONFIG_SENSORS_PWM_FAN=m
-CONFIG_SENSORS_SHT15=m
-CONFIG_SENSORS_SHT21=m
-CONFIG_SENSORS_SHT3x=m
-CONFIG_SENSORS_SHTC1=m
-CONFIG_SENSORS_SIS5595=m
-CONFIG_SENSORS_DME1737=m
-CONFIG_SENSORS_EMC1403=m
-CONFIG_SENSORS_EMC2103=m
-CONFIG_SENSORS_EMC6W201=m
-CONFIG_SENSORS_SMSC47M1=m
-CONFIG_SENSORS_SMSC47M192=m
-CONFIG_SENSORS_SMSC47B397=m
-CONFIG_SENSORS_SCH56XX_COMMON=m
-CONFIG_SENSORS_SCH5627=m
-CONFIG_SENSORS_SCH5636=m
-CONFIG_SENSORS_STTS751=m
-CONFIG_SENSORS_SMM665=m
-CONFIG_SENSORS_ADC128D818=m
-CONFIG_SENSORS_ADS7828=m
-CONFIG_SENSORS_ADS7871=m
-CONFIG_SENSORS_AMC6821=m
-CONFIG_SENSORS_INA209=m
-CONFIG_SENSORS_INA2XX=m
-CONFIG_SENSORS_INA3221=m
-CONFIG_SENSORS_TC74=m
-CONFIG_SENSORS_THMC50=m
-CONFIG_SENSORS_TMP102=m
-CONFIG_SENSORS_TMP103=m
-CONFIG_SENSORS_TMP108=m
-CONFIG_SENSORS_TMP401=m
-CONFIG_SENSORS_TMP421=m
-CONFIG_SENSORS_VIA_CPUTEMP=m
-CONFIG_SENSORS_VIA686A=m
-CONFIG_SENSORS_VT1211=m
-CONFIG_SENSORS_VT8231=m
-CONFIG_SENSORS_W83773G=m
-CONFIG_SENSORS_W83781D=m
-CONFIG_SENSORS_W83791D=m
-CONFIG_SENSORS_W83792D=m
-CONFIG_SENSORS_W83793=m
-CONFIG_SENSORS_W83795=m
-# CONFIG_SENSORS_W83795_FANCTRL is not set
-CONFIG_SENSORS_W83L785TS=m
-CONFIG_SENSORS_W83L786NG=m
-CONFIG_SENSORS_W83627HF=m
-CONFIG_SENSORS_W83627EHF=m
-CONFIG_SENSORS_WM831X=m
-CONFIG_SENSORS_WM8350=m
-CONFIG_SENSORS_XGENE=m
-
-#
-# ACPI drivers
-#
-CONFIG_SENSORS_ACPI_POWER=m
-CONFIG_SENSORS_ATK0110=m
-CONFIG_THERMAL=y
-# CONFIG_THERMAL_STATISTICS is not set
-CONFIG_THERMAL_EMERGENCY_POWEROFF_DELAY_MS=100
-CONFIG_THERMAL_HWMON=y
-CONFIG_THERMAL_OF=y
-CONFIG_THERMAL_WRITABLE_TRIPS=y
-CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE=y
-# CONFIG_THERMAL_DEFAULT_GOV_FAIR_SHARE is not set
-# CONFIG_THERMAL_DEFAULT_GOV_USER_SPACE is not set
-# CONFIG_THERMAL_DEFAULT_GOV_POWER_ALLOCATOR is not set
-CONFIG_THERMAL_GOV_FAIR_SHARE=y
-CONFIG_THERMAL_GOV_STEP_WISE=y
-CONFIG_THERMAL_GOV_BANG_BANG=y
-CONFIG_THERMAL_GOV_USER_SPACE=y
-CONFIG_THERMAL_GOV_POWER_ALLOCATOR=y
-CONFIG_CPU_THERMAL=y
-CONFIG_CLOCK_THERMAL=y
-CONFIG_DEVFREQ_THERMAL=y
-# CONFIG_THERMAL_EMULATION is not set
-CONFIG_THERMAL_MMIO=m
-CONFIG_MAX77620_THERMAL=m
-CONFIG_QORIQ_THERMAL=m
-CONFIG_DA9062_THERMAL=m
-
-#
-# Intel thermal drivers
-#
-CONFIG_INTEL_POWERCLAMP=m
-CONFIG_X86_PKG_TEMP_THERMAL=m
-CONFIG_INTEL_SOC_DTS_IOSF_CORE=m
-CONFIG_INTEL_SOC_DTS_THERMAL=m
-
-#
-# ACPI INT340X thermal drivers
-#
-CONFIG_INT340X_THERMAL=m
-CONFIG_ACPI_THERMAL_REL=m
-CONFIG_INT3406_THERMAL=m
-CONFIG_PROC_THERMAL_MMIO_RAPL=y
-# end of ACPI INT340X thermal drivers
-
-CONFIG_INTEL_BXT_PMIC_THERMAL=m
-CONFIG_INTEL_PCH_THERMAL=m
-# end of Intel thermal drivers
-
-CONFIG_GENERIC_ADC_THERMAL=m
-CONFIG_WATCHDOG=y
-CONFIG_WATCHDOG_CORE=y
-# CONFIG_WATCHDOG_NOWAYOUT is not set
-CONFIG_WATCHDOG_HANDLE_BOOT_ENABLED=y
-CONFIG_WATCHDOG_OPEN_TIMEOUT=0
-CONFIG_WATCHDOG_SYSFS=y
-
-#
-# Watchdog Pretimeout Governors
-#
-CONFIG_WATCHDOG_PRETIMEOUT_GOV=y
-CONFIG_WATCHDOG_PRETIMEOUT_GOV_SEL=m
-CONFIG_WATCHDOG_PRETIMEOUT_GOV_NOOP=m
-CONFIG_WATCHDOG_PRETIMEOUT_GOV_PANIC=y
-# CONFIG_WATCHDOG_PRETIMEOUT_DEFAULT_GOV_NOOP is not set
-CONFIG_WATCHDOG_PRETIMEOUT_DEFAULT_GOV_PANIC=y
-
-#
-# Watchdog Device Drivers
-#
-CONFIG_SOFT_WATCHDOG=m
-# CONFIG_SOFT_WATCHDOG_PRETIMEOUT is not set
-CONFIG_BD70528_WATCHDOG=m
-CONFIG_DA9052_WATCHDOG=m
-CONFIG_DA9055_WATCHDOG=m
-CONFIG_DA9063_WATCHDOG=m
-CONFIG_DA9062_WATCHDOG=m
-CONFIG_GPIO_WATCHDOG=m
-CONFIG_MENF21BMC_WATCHDOG=m
-CONFIG_MENZ069_WATCHDOG=m
-CONFIG_WDAT_WDT=m
-CONFIG_WM831X_WATCHDOG=m
-CONFIG_WM8350_WATCHDOG=m
-CONFIG_XILINX_WATCHDOG=m
-CONFIG_ZIIRAVE_WATCHDOG=m
-CONFIG_RAVE_SP_WATCHDOG=m
-CONFIG_MLX_WDT=m
-CONFIG_CADENCE_WATCHDOG=m
-CONFIG_DW_WATCHDOG=m
-CONFIG_RN5T618_WATCHDOG=m
-CONFIG_TWL4030_WATCHDOG=m
-CONFIG_MAX63XX_WATCHDOG=m
-CONFIG_MAX77620_WATCHDOG=m
-CONFIG_RETU_WATCHDOG=m
-CONFIG_STPMIC1_WATCHDOG=m
-CONFIG_ACQUIRE_WDT=m
-CONFIG_ADVANTECH_WDT=m
-CONFIG_ALIM1535_WDT=m
-CONFIG_ALIM7101_WDT=m
-CONFIG_EBC_C384_WDT=m
-CONFIG_F71808E_WDT=m
-CONFIG_SP5100_TCO=m
-CONFIG_SBC_FITPC2_WATCHDOG=m
-CONFIG_EUROTECH_WDT=m
-CONFIG_IB700_WDT=m
-CONFIG_IBMASR=m
-CONFIG_WAFER_WDT=m
-CONFIG_I6300ESB_WDT=m
-CONFIG_IE6XX_WDT=m
-CONFIG_ITCO_WDT=m
-CONFIG_ITCO_VENDOR_SUPPORT=y
-CONFIG_IT8712F_WDT=m
-CONFIG_IT87_WDT=m
-CONFIG_HP_WATCHDOG=m
-CONFIG_HPWDT_NMI_DECODING=y
-CONFIG_KEMPLD_WDT=m
-CONFIG_SC1200_WDT=m
-CONFIG_PC87413_WDT=m
-CONFIG_NV_TCO=m
-CONFIG_60XX_WDT=m
-CONFIG_CPU5_WDT=m
-CONFIG_SMSC_SCH311X_WDT=m
-CONFIG_SMSC37B787_WDT=m
-CONFIG_TQMX86_WDT=m
-CONFIG_VIA_WDT=m
-CONFIG_W83627HF_WDT=m
-CONFIG_W83877F_WDT=m
-CONFIG_W83977F_WDT=m
-CONFIG_MACHZ_WDT=m
-CONFIG_SBC_EPX_C3_WATCHDOG=m
-CONFIG_INTEL_MEI_WDT=m
-CONFIG_NI903X_WDT=m
-CONFIG_NIC7018_WDT=m
-CONFIG_MEN_A21_WDT=m
-CONFIG_XEN_WDT=m
-
-#
-# PCI-based Watchdog Cards
-#
-CONFIG_PCIPCWATCHDOG=m
-CONFIG_WDTPCI=m
-
-#
-# USB-based Watchdog Cards
-#
-CONFIG_USBPCWATCHDOG=m
-CONFIG_SSB_POSSIBLE=y
-CONFIG_SSB=m
-CONFIG_SSB_SPROM=y
-CONFIG_SSB_BLOCKIO=y
-CONFIG_SSB_PCIHOST_POSSIBLE=y
-CONFIG_SSB_PCIHOST=y
-CONFIG_SSB_B43_PCI_BRIDGE=y
-CONFIG_SSB_PCMCIAHOST_POSSIBLE=y
-CONFIG_SSB_PCMCIAHOST=y
-CONFIG_SSB_SDIOHOST_POSSIBLE=y
-CONFIG_SSB_SDIOHOST=y
-CONFIG_SSB_DRIVER_PCICORE_POSSIBLE=y
-CONFIG_SSB_DRIVER_PCICORE=y
-CONFIG_SSB_DRIVER_GPIO=y
-CONFIG_BCMA_POSSIBLE=y
-CONFIG_BCMA=m
-CONFIG_BCMA_BLOCKIO=y
-CONFIG_BCMA_HOST_PCI_POSSIBLE=y
-CONFIG_BCMA_HOST_PCI=y
-# CONFIG_BCMA_HOST_SOC is not set
-CONFIG_BCMA_DRIVER_PCI=y
-CONFIG_BCMA_DRIVER_GMAC_CMN=y
-CONFIG_BCMA_DRIVER_GPIO=y
-# CONFIG_BCMA_DEBUG is not set
-
-#
-# Multifunction device drivers
-#
-CONFIG_MFD_CORE=y
-CONFIG_MFD_ACT8945A=m
-CONFIG_MFD_AS3711=y
-CONFIG_MFD_AS3722=m
-CONFIG_PMIC_ADP5520=y
-CONFIG_MFD_AAT2870_CORE=y
-CONFIG_MFD_ATMEL_FLEXCOM=m
-CONFIG_MFD_ATMEL_HLCDC=m
-CONFIG_MFD_BCM590XX=m
-CONFIG_MFD_BD9571MWV=m
-CONFIG_MFD_AXP20X=m
-CONFIG_MFD_AXP20X_I2C=m
-CONFIG_MFD_CROS_EC_DEV=m
-CONFIG_MFD_MADERA=m
-CONFIG_MFD_MADERA_I2C=m
-CONFIG_MFD_MADERA_SPI=m
-CONFIG_MFD_CS47L15=y
-CONFIG_MFD_CS47L35=y
-CONFIG_MFD_CS47L85=y
-CONFIG_MFD_CS47L90=y
-CONFIG_MFD_CS47L92=y
-CONFIG_PMIC_DA903X=y
-CONFIG_PMIC_DA9052=y
-CONFIG_MFD_DA9052_SPI=y
-CONFIG_MFD_DA9052_I2C=y
-CONFIG_MFD_DA9055=y
-CONFIG_MFD_DA9062=m
-CONFIG_MFD_DA9063=m
-CONFIG_MFD_DA9150=m
-CONFIG_MFD_DLN2=m
-CONFIG_MFD_MC13XXX=m
-CONFIG_MFD_MC13XXX_SPI=m
-CONFIG_MFD_MC13XXX_I2C=m
-CONFIG_MFD_HI6421_PMIC=m
-CONFIG_HTC_PASIC3=m
-CONFIG_HTC_I2CPLD=y
-CONFIG_MFD_INTEL_QUARK_I2C_GPIO=m
-CONFIG_LPC_ICH=m
-CONFIG_LPC_SCH=m
-CONFIG_INTEL_SOC_PMIC=y
-CONFIG_INTEL_SOC_PMIC_BXTWC=m
-CONFIG_INTEL_SOC_PMIC_CHTWC=y
-CONFIG_INTEL_SOC_PMIC_CHTDC_TI=m
-CONFIG_MFD_INTEL_LPSS=m
-CONFIG_MFD_INTEL_LPSS_ACPI=m
-CONFIG_MFD_INTEL_LPSS_PCI=m
-CONFIG_MFD_JANZ_CMODIO=m
-CONFIG_MFD_KEMPLD=m
-CONFIG_MFD_88PM800=m
-CONFIG_MFD_88PM805=m
-CONFIG_MFD_88PM860X=y
-CONFIG_MFD_MAX14577=m
-CONFIG_MFD_MAX77620=y
-CONFIG_MFD_MAX77650=m
-CONFIG_MFD_MAX77686=m
-CONFIG_MFD_MAX77693=m
-CONFIG_MFD_MAX77843=y
-CONFIG_MFD_MAX8907=m
-CONFIG_MFD_MAX8925=y
-CONFIG_MFD_MAX8997=y
-CONFIG_MFD_MAX8998=y
-CONFIG_MFD_MT6397=m
-CONFIG_MFD_MENF21BMC=m
-CONFIG_EZX_PCAP=y
-CONFIG_MFD_CPCAP=m
-CONFIG_MFD_VIPERBOARD=m
-CONFIG_MFD_RETU=m
-CONFIG_MFD_PCF50633=m
-CONFIG_PCF50633_ADC=m
-CONFIG_PCF50633_GPIO=m
-CONFIG_UCB1400_CORE=m
-CONFIG_MFD_RDC321X=m
-CONFIG_MFD_RT5033=m
-CONFIG_MFD_RC5T583=y
-CONFIG_MFD_RK808=m
-CONFIG_MFD_RN5T618=m
-CONFIG_MFD_SEC_CORE=y
-CONFIG_MFD_SI476X_CORE=m
-CONFIG_MFD_SM501=m
-CONFIG_MFD_SM501_GPIO=y
-CONFIG_MFD_SKY81452=m
-CONFIG_MFD_SMSC=y
-CONFIG_ABX500_CORE=y
-CONFIG_AB3100_CORE=y
-CONFIG_AB3100_OTP=y
-CONFIG_MFD_STMPE=y
-
-#
-# STMicroelectronics STMPE Interface Drivers
-#
-CONFIG_STMPE_I2C=y
-CONFIG_STMPE_SPI=y
-# end of STMicroelectronics STMPE Interface Drivers
-
-CONFIG_MFD_SYSCON=y
-CONFIG_MFD_TI_AM335X_TSCADC=m
-CONFIG_MFD_LP3943=m
-CONFIG_MFD_LP8788=y
-CONFIG_MFD_TI_LMU=m
-CONFIG_MFD_PALMAS=y
-CONFIG_TPS6105X=m
-CONFIG_TPS65010=m
-CONFIG_TPS6507X=m
-CONFIG_MFD_TPS65086=m
-CONFIG_MFD_TPS65090=y
-CONFIG_MFD_TPS65217=m
-CONFIG_MFD_TPS68470=y
-CONFIG_MFD_TI_LP873X=m
-CONFIG_MFD_TI_LP87565=m
-CONFIG_MFD_TPS65218=m
-CONFIG_MFD_TPS6586X=y
-CONFIG_MFD_TPS65910=y
-CONFIG_MFD_TPS65912=m
-CONFIG_MFD_TPS65912_I2C=m
-CONFIG_MFD_TPS65912_SPI=m
-CONFIG_MFD_TPS80031=y
-CONFIG_TWL4030_CORE=y
-CONFIG_MFD_TWL4030_AUDIO=y
-CONFIG_TWL6040_CORE=y
-CONFIG_MFD_WL1273_CORE=m
-CONFIG_MFD_LM3533=m
-CONFIG_MFD_TC3589X=y
-CONFIG_MFD_TQMX86=m
-CONFIG_MFD_VX855=m
-CONFIG_MFD_LOCHNAGAR=y
-CONFIG_MFD_ARIZONA=y
-CONFIG_MFD_ARIZONA_I2C=m
-CONFIG_MFD_ARIZONA_SPI=m
-CONFIG_MFD_CS47L24=y
-CONFIG_MFD_WM5102=y
-CONFIG_MFD_WM5110=y
-CONFIG_MFD_WM8997=y
-CONFIG_MFD_WM8998=y
-CONFIG_MFD_WM8400=y
-CONFIG_MFD_WM831X=y
-CONFIG_MFD_WM831X_I2C=y
-CONFIG_MFD_WM831X_SPI=y
-CONFIG_MFD_WM8350=y
-CONFIG_MFD_WM8350_I2C=y
-CONFIG_MFD_WM8994=m
-CONFIG_MFD_ROHM_BD718XX=m
-CONFIG_MFD_ROHM_BD70528=m
-CONFIG_MFD_STPMIC1=m
-CONFIG_MFD_STMFX=m
-CONFIG_RAVE_SP_CORE=m
-# end of Multifunction device drivers
-
-CONFIG_REGULATOR=y
-# CONFIG_REGULATOR_DEBUG is not set
-CONFIG_REGULATOR_FIXED_VOLTAGE=m
-CONFIG_REGULATOR_VIRTUAL_CONSUMER=m
-CONFIG_REGULATOR_USERSPACE_CONSUMER=m
-CONFIG_REGULATOR_88PG86X=m
-CONFIG_REGULATOR_88PM800=m
-CONFIG_REGULATOR_88PM8607=m
-CONFIG_REGULATOR_ACT8865=m
-CONFIG_REGULATOR_ACT8945A=m
-CONFIG_REGULATOR_AD5398=m
-CONFIG_REGULATOR_ANATOP=m
-CONFIG_REGULATOR_AAT2870=m
-CONFIG_REGULATOR_AB3100=m
-CONFIG_REGULATOR_ARIZONA_LDO1=m
-CONFIG_REGULATOR_ARIZONA_MICSUPP=m
-CONFIG_REGULATOR_AS3711=m
-CONFIG_REGULATOR_AS3722=m
-CONFIG_REGULATOR_AXP20X=m
-CONFIG_REGULATOR_BCM590XX=m
-CONFIG_REGULATOR_BD70528=m
-CONFIG_REGULATOR_BD718XX=m
-CONFIG_REGULATOR_BD9571MWV=m
-CONFIG_REGULATOR_CPCAP=m
-CONFIG_REGULATOR_DA903X=m
-CONFIG_REGULATOR_DA9052=m
-CONFIG_REGULATOR_DA9055=m
-CONFIG_REGULATOR_DA9062=m
-CONFIG_REGULATOR_DA9063=m
-CONFIG_REGULATOR_DA9210=m
-CONFIG_REGULATOR_DA9211=m
-CONFIG_REGULATOR_FAN53555=m
-CONFIG_REGULATOR_GPIO=m
-CONFIG_REGULATOR_HI6421=m
-CONFIG_REGULATOR_HI6421V530=m
-CONFIG_REGULATOR_ISL9305=m
-CONFIG_REGULATOR_ISL6271A=m
-CONFIG_REGULATOR_LM363X=m
-CONFIG_REGULATOR_LOCHNAGAR=m
-CONFIG_REGULATOR_LP3971=m
-CONFIG_REGULATOR_LP3972=m
-CONFIG_REGULATOR_LP872X=m
-CONFIG_REGULATOR_LP873X=m
-CONFIG_REGULATOR_LP8755=m
-CONFIG_REGULATOR_LP87565=m
-CONFIG_REGULATOR_LP8788=m
-CONFIG_REGULATOR_LTC3589=m
-CONFIG_REGULATOR_LTC3676=m
-CONFIG_REGULATOR_MAX14577=m
-CONFIG_REGULATOR_MAX1586=m
-CONFIG_REGULATOR_MAX77620=m
-CONFIG_REGULATOR_MAX77650=m
-CONFIG_REGULATOR_MAX8649=m
-CONFIG_REGULATOR_MAX8660=m
-CONFIG_REGULATOR_MAX8907=m
-CONFIG_REGULATOR_MAX8925=m
-CONFIG_REGULATOR_MAX8952=m
-CONFIG_REGULATOR_MAX8973=m
-CONFIG_REGULATOR_MAX8997=m
-CONFIG_REGULATOR_MAX8998=m
-CONFIG_REGULATOR_MAX77686=m
-CONFIG_REGULATOR_MAX77693=m
-CONFIG_REGULATOR_MAX77802=m
-CONFIG_REGULATOR_MC13XXX_CORE=m
-CONFIG_REGULATOR_MC13783=m
-CONFIG_REGULATOR_MC13892=m
-CONFIG_REGULATOR_MCP16502=m
-CONFIG_REGULATOR_MT6311=m
-CONFIG_REGULATOR_MT6323=m
-CONFIG_REGULATOR_MT6397=m
-CONFIG_REGULATOR_PALMAS=m
-CONFIG_REGULATOR_PCAP=m
-CONFIG_REGULATOR_PCF50633=m
-CONFIG_REGULATOR_PFUZE100=m
-CONFIG_REGULATOR_PV88060=m
-CONFIG_REGULATOR_PV88080=m
-CONFIG_REGULATOR_PV88090=m
-CONFIG_REGULATOR_PWM=m
-CONFIG_REGULATOR_QCOM_SPMI=m
-CONFIG_REGULATOR_RC5T583=m
-CONFIG_REGULATOR_RK808=m
-CONFIG_REGULATOR_RN5T618=m
-CONFIG_REGULATOR_RT5033=m
-CONFIG_REGULATOR_S2MPA01=m
-CONFIG_REGULATOR_S2MPS11=m
-CONFIG_REGULATOR_S5M8767=m
-CONFIG_REGULATOR_SKY81452=m
-CONFIG_REGULATOR_SLG51000=m
-CONFIG_REGULATOR_STPMIC1=m
-CONFIG_REGULATOR_SY8106A=m
-CONFIG_REGULATOR_SY8824X=m
-CONFIG_REGULATOR_TPS51632=m
-CONFIG_REGULATOR_TPS6105X=m
-CONFIG_REGULATOR_TPS62360=m
-CONFIG_REGULATOR_TPS65023=m
-CONFIG_REGULATOR_TPS6507X=m
-CONFIG_REGULATOR_TPS65086=m
-CONFIG_REGULATOR_TPS65090=m
-CONFIG_REGULATOR_TPS65132=m
-CONFIG_REGULATOR_TPS65217=m
-CONFIG_REGULATOR_TPS65218=m
-CONFIG_REGULATOR_TPS6524X=m
-CONFIG_REGULATOR_TPS6586X=m
-CONFIG_REGULATOR_TPS65910=m
-CONFIG_REGULATOR_TPS65912=m
-CONFIG_REGULATOR_TPS80031=m
-CONFIG_REGULATOR_TWL4030=m
-CONFIG_REGULATOR_VCTRL=m
-CONFIG_REGULATOR_WM831X=m
-CONFIG_REGULATOR_WM8350=m
-CONFIG_REGULATOR_WM8400=m
-CONFIG_REGULATOR_WM8994=m
-CONFIG_CEC_CORE=y
-CONFIG_CEC_NOTIFIER=y
-CONFIG_RC_CORE=m
-CONFIG_RC_MAP=m
-CONFIG_LIRC=y
-CONFIG_RC_DECODERS=y
-CONFIG_IR_NEC_DECODER=m
-CONFIG_IR_RC5_DECODER=m
-CONFIG_IR_RC6_DECODER=m
-CONFIG_IR_JVC_DECODER=m
-CONFIG_IR_SONY_DECODER=m
-CONFIG_IR_SANYO_DECODER=m
-CONFIG_IR_SHARP_DECODER=m
-CONFIG_IR_MCE_KBD_DECODER=m
-CONFIG_IR_XMP_DECODER=m
-CONFIG_IR_IMON_DECODER=m
-CONFIG_IR_RCMM_DECODER=m
-CONFIG_RC_DEVICES=y
-CONFIG_RC_ATI_REMOTE=m
-CONFIG_IR_ENE=m
-CONFIG_IR_HIX5HD2=m
-CONFIG_IR_IMON=m
-CONFIG_IR_IMON_RAW=m
-CONFIG_IR_MCEUSB=m
-CONFIG_IR_ITE_CIR=m
-CONFIG_IR_FINTEK=m
-CONFIG_IR_NUVOTON=m
-CONFIG_IR_REDRAT3=m
-CONFIG_IR_SPI=m
-CONFIG_IR_STREAMZAP=m
-CONFIG_IR_WINBOND_CIR=m
-CONFIG_IR_IGORPLUGUSB=m
-CONFIG_IR_IGUANA=m
-CONFIG_IR_TTUSBIR=m
-CONFIG_RC_LOOPBACK=m
-CONFIG_IR_GPIO_CIR=m
-CONFIG_IR_GPIO_TX=m
-CONFIG_IR_PWM_TX=m
-CONFIG_IR_SERIAL=m
-CONFIG_IR_SERIAL_TRANSMITTER=y
-CONFIG_IR_SIR=m
-CONFIG_RC_XBOX_DVD=m
-CONFIG_MEDIA_SUPPORT=m
-
-#
-# Multimedia core support
-#
-CONFIG_MEDIA_CAMERA_SUPPORT=y
-CONFIG_MEDIA_ANALOG_TV_SUPPORT=y
-CONFIG_MEDIA_DIGITAL_TV_SUPPORT=y
-CONFIG_MEDIA_RADIO_SUPPORT=y
-CONFIG_MEDIA_SDR_SUPPORT=y
-CONFIG_MEDIA_CEC_SUPPORT=y
-CONFIG_MEDIA_CONTROLLER=y
-CONFIG_MEDIA_CONTROLLER_DVB=y
-# CONFIG_MEDIA_CONTROLLER_REQUEST_API is not set
-CONFIG_VIDEO_DEV=m
-CONFIG_VIDEO_V4L2_SUBDEV_API=y
-CONFIG_VIDEO_V4L2=m
-CONFIG_VIDEO_V4L2_I2C=y
-# CONFIG_VIDEO_ADV_DEBUG is not set
-# CONFIG_VIDEO_FIXED_MINOR_RANGES is not set
-CONFIG_VIDEO_TUNER=m
-CONFIG_V4L2_MEM2MEM_DEV=m
-CONFIG_V4L2_FLASH_LED_CLASS=m
-CONFIG_V4L2_FWNODE=m
-CONFIG_VIDEOBUF_GEN=m
-CONFIG_VIDEOBUF_DMA_SG=m
-CONFIG_VIDEOBUF_VMALLOC=m
-CONFIG_DVB_CORE=m
-CONFIG_DVB_MMAP=y
-CONFIG_DVB_NET=y
-CONFIG_TTPCI_EEPROM=m
-CONFIG_DVB_MAX_ADAPTERS=16
-# CONFIG_DVB_DYNAMIC_MINORS is not set
-# CONFIG_DVB_DEMUX_SECTION_LOSS_LOG is not set
-# CONFIG_DVB_ULE_DEBUG is not set
-
-#
-# Media drivers
-#
-CONFIG_MEDIA_USB_SUPPORT=y
-
-#
-# Webcam devices
-#
-CONFIG_USB_VIDEO_CLASS=m
-CONFIG_USB_VIDEO_CLASS_INPUT_EVDEV=y
-CONFIG_USB_GSPCA=m
-CONFIG_USB_M5602=m
-CONFIG_USB_STV06XX=m
-CONFIG_USB_GL860=m
-CONFIG_USB_GSPCA_BENQ=m
-CONFIG_USB_GSPCA_CONEX=m
-CONFIG_USB_GSPCA_CPIA1=m
-CONFIG_USB_GSPCA_DTCS033=m
-CONFIG_USB_GSPCA_ETOMS=m
-CONFIG_USB_GSPCA_FINEPIX=m
-CONFIG_USB_GSPCA_JEILINJ=m
-CONFIG_USB_GSPCA_JL2005BCD=m
-CONFIG_USB_GSPCA_KINECT=m
-CONFIG_USB_GSPCA_KONICA=m
-CONFIG_USB_GSPCA_MARS=m
-CONFIG_USB_GSPCA_MR97310A=m
-CONFIG_USB_GSPCA_NW80X=m
-CONFIG_USB_GSPCA_OV519=m
-CONFIG_USB_GSPCA_OV534=m
-CONFIG_USB_GSPCA_OV534_9=m
-CONFIG_USB_GSPCA_PAC207=m
-CONFIG_USB_GSPCA_PAC7302=m
-CONFIG_USB_GSPCA_PAC7311=m
-CONFIG_USB_GSPCA_SE401=m
-CONFIG_USB_GSPCA_SN9C2028=m
-CONFIG_USB_GSPCA_SN9C20X=m
-CONFIG_USB_GSPCA_SONIXB=m
-CONFIG_USB_GSPCA_SONIXJ=m
-CONFIG_USB_GSPCA_SPCA500=m
-CONFIG_USB_GSPCA_SPCA501=m
-CONFIG_USB_GSPCA_SPCA505=m
-CONFIG_USB_GSPCA_SPCA506=m
-CONFIG_USB_GSPCA_SPCA508=m
-CONFIG_USB_GSPCA_SPCA561=m
-CONFIG_USB_GSPCA_SPCA1528=m
-CONFIG_USB_GSPCA_SQ905=m
-CONFIG_USB_GSPCA_SQ905C=m
-CONFIG_USB_GSPCA_SQ930X=m
-CONFIG_USB_GSPCA_STK014=m
-CONFIG_USB_GSPCA_STK1135=m
-CONFIG_USB_GSPCA_STV0680=m
-CONFIG_USB_GSPCA_SUNPLUS=m
-CONFIG_USB_GSPCA_T613=m
-CONFIG_USB_GSPCA_TOPRO=m
-CONFIG_USB_GSPCA_TOUPTEK=m
-CONFIG_USB_GSPCA_TV8532=m
-CONFIG_USB_GSPCA_VC032X=m
-CONFIG_USB_GSPCA_VICAM=m
-CONFIG_USB_GSPCA_XIRLINK_CIT=m
-CONFIG_USB_GSPCA_ZC3XX=m
-CONFIG_USB_PWC=m
-# CONFIG_USB_PWC_DEBUG is not set
-CONFIG_USB_PWC_INPUT_EVDEV=y
-CONFIG_VIDEO_CPIA2=m
-CONFIG_USB_ZR364XX=m
-CONFIG_USB_STKWEBCAM=m
-CONFIG_USB_S2255=m
-CONFIG_VIDEO_USBTV=m
-
-#
-# Analog TV USB devices
-#
-CONFIG_VIDEO_PVRUSB2=m
-CONFIG_VIDEO_PVRUSB2_SYSFS=y
-CONFIG_VIDEO_PVRUSB2_DVB=y
-# CONFIG_VIDEO_PVRUSB2_DEBUGIFC is not set
-CONFIG_VIDEO_HDPVR=m
-CONFIG_VIDEO_USBVISION=m
-CONFIG_VIDEO_STK1160_COMMON=m
-CONFIG_VIDEO_STK1160=m
-CONFIG_VIDEO_GO7007=m
-CONFIG_VIDEO_GO7007_USB=m
-CONFIG_VIDEO_GO7007_LOADER=m
-CONFIG_VIDEO_GO7007_USB_S2250_BOARD=m
-
-#
-# Analog/digital TV USB devices
-#
-CONFIG_VIDEO_AU0828=m
-CONFIG_VIDEO_AU0828_V4L2=y
-CONFIG_VIDEO_AU0828_RC=y
-CONFIG_VIDEO_CX231XX=m
-CONFIG_VIDEO_CX231XX_RC=y
-CONFIG_VIDEO_CX231XX_ALSA=m
-CONFIG_VIDEO_CX231XX_DVB=m
-CONFIG_VIDEO_TM6000=m
-CONFIG_VIDEO_TM6000_ALSA=m
-CONFIG_VIDEO_TM6000_DVB=m
-
-#
-# Digital TV USB devices
-#
-CONFIG_DVB_USB=m
-# CONFIG_DVB_USB_DEBUG is not set
-CONFIG_DVB_USB_DIB3000MC=m
-CONFIG_DVB_USB_A800=m
-CONFIG_DVB_USB_DIBUSB_MB=m
-CONFIG_DVB_USB_DIBUSB_MB_FAULTY=y
-CONFIG_DVB_USB_DIBUSB_MC=m
-CONFIG_DVB_USB_DIB0700=m
-CONFIG_DVB_USB_UMT_010=m
-CONFIG_DVB_USB_CXUSB=m
-CONFIG_DVB_USB_CXUSB_ANALOG=y
-CONFIG_DVB_USB_M920X=m
-CONFIG_DVB_USB_DIGITV=m
-CONFIG_DVB_USB_VP7045=m
-CONFIG_DVB_USB_VP702X=m
-CONFIG_DVB_USB_GP8PSK=m
-CONFIG_DVB_USB_NOVA_T_USB2=m
-CONFIG_DVB_USB_TTUSB2=m
-CONFIG_DVB_USB_DTT200U=m
-CONFIG_DVB_USB_OPERA1=m
-CONFIG_DVB_USB_AF9005=m
-CONFIG_DVB_USB_AF9005_REMOTE=m
-CONFIG_DVB_USB_PCTV452E=m
-CONFIG_DVB_USB_DW2102=m
-CONFIG_DVB_USB_CINERGY_T2=m
-CONFIG_DVB_USB_DTV5100=m
-CONFIG_DVB_USB_AZ6027=m
-CONFIG_DVB_USB_TECHNISAT_USB2=m
-CONFIG_DVB_USB_V2=m
-CONFIG_DVB_USB_AF9015=m
-CONFIG_DVB_USB_AF9035=m
-CONFIG_DVB_USB_ANYSEE=m
-CONFIG_DVB_USB_AU6610=m
-CONFIG_DVB_USB_AZ6007=m
-CONFIG_DVB_USB_CE6230=m
-CONFIG_DVB_USB_EC168=m
-CONFIG_DVB_USB_GL861=m
-CONFIG_DVB_USB_LME2510=m
-CONFIG_DVB_USB_MXL111SF=m
-CONFIG_DVB_USB_RTL28XXU=m
-CONFIG_DVB_USB_DVBSKY=m
-CONFIG_DVB_USB_ZD1301=m
-CONFIG_DVB_TTUSB_BUDGET=m
-CONFIG_DVB_TTUSB_DEC=m
-CONFIG_SMS_USB_DRV=m
-CONFIG_DVB_B2C2_FLEXCOP_USB=m
-# CONFIG_DVB_B2C2_FLEXCOP_USB_DEBUG is not set
-CONFIG_DVB_AS102=m
-
-#
-# Webcam, TV (analog/digital) USB devices
-#
-CONFIG_VIDEO_EM28XX=m
-CONFIG_VIDEO_EM28XX_V4L2=m
-CONFIG_VIDEO_EM28XX_ALSA=m
-CONFIG_VIDEO_EM28XX_DVB=m
-CONFIG_VIDEO_EM28XX_RC=m
-
-#
-# Software defined radio USB devices
-#
-CONFIG_USB_AIRSPY=m
-CONFIG_USB_HACKRF=m
-CONFIG_USB_MSI2500=m
-
-#
-# USB HDMI CEC adapters
-#
-CONFIG_USB_PULSE8_CEC=m
-CONFIG_USB_RAINSHADOW_CEC=m
-CONFIG_MEDIA_PCI_SUPPORT=y
-
-#
-# Media capture support
-#
-CONFIG_VIDEO_MEYE=m
-CONFIG_VIDEO_SOLO6X10=m
-CONFIG_VIDEO_TW5864=m
-CONFIG_VIDEO_TW68=m
-CONFIG_VIDEO_TW686X=m
-
-#
-# Media capture/analog TV support
-#
-CONFIG_VIDEO_IVTV=m
-# CONFIG_VIDEO_IVTV_DEPRECATED_IOCTLS is not set
-CONFIG_VIDEO_IVTV_ALSA=m
-CONFIG_VIDEO_FB_IVTV=m
-# CONFIG_VIDEO_FB_IVTV_FORCE_PAT is not set
-CONFIG_VIDEO_HEXIUM_GEMINI=m
-CONFIG_VIDEO_HEXIUM_ORION=m
-CONFIG_VIDEO_MXB=m
-CONFIG_VIDEO_DT3155=m
-
-#
-# Media capture/analog/hybrid TV support
-#
-CONFIG_VIDEO_CX18=m
-CONFIG_VIDEO_CX18_ALSA=m
-CONFIG_VIDEO_CX23885=m
-CONFIG_MEDIA_ALTERA_CI=m
-CONFIG_VIDEO_CX25821=m
-CONFIG_VIDEO_CX25821_ALSA=m
-CONFIG_VIDEO_CX88=m
-CONFIG_VIDEO_CX88_ALSA=m
-CONFIG_VIDEO_CX88_BLACKBIRD=m
-CONFIG_VIDEO_CX88_DVB=m
-CONFIG_VIDEO_CX88_ENABLE_VP3054=y
-CONFIG_VIDEO_CX88_VP3054=m
-CONFIG_VIDEO_CX88_MPEG=m
-CONFIG_VIDEO_BT848=m
-CONFIG_DVB_BT8XX=m
-CONFIG_VIDEO_SAA7134=m
-CONFIG_VIDEO_SAA7134_ALSA=m
-CONFIG_VIDEO_SAA7134_RC=y
-CONFIG_VIDEO_SAA7134_DVB=m
-CONFIG_VIDEO_SAA7134_GO7007=m
-CONFIG_VIDEO_SAA7164=m
-
-#
-# Media digital TV PCI Adapters
-#
-CONFIG_DVB_AV7110_IR=y
-CONFIG_DVB_AV7110=m
-CONFIG_DVB_AV7110_OSD=y
-CONFIG_DVB_BUDGET_CORE=m
-CONFIG_DVB_BUDGET=m
-CONFIG_DVB_BUDGET_CI=m
-CONFIG_DVB_BUDGET_AV=m
-CONFIG_DVB_BUDGET_PATCH=m
-CONFIG_DVB_B2C2_FLEXCOP_PCI=m
-# CONFIG_DVB_B2C2_FLEXCOP_PCI_DEBUG is not set
-CONFIG_DVB_PLUTO2=m
-CONFIG_DVB_DM1105=m
-CONFIG_DVB_PT1=m
-CONFIG_DVB_PT3=m
-CONFIG_MANTIS_CORE=m
-CONFIG_DVB_MANTIS=m
-CONFIG_DVB_HOPPER=m
-CONFIG_DVB_NGENE=m
-CONFIG_DVB_DDBRIDGE=m
-# CONFIG_DVB_DDBRIDGE_MSIENABLE is not set
-CONFIG_DVB_SMIPCIE=m
-CONFIG_DVB_NETUP_UNIDVB=m
-CONFIG_VIDEO_IPU3_CIO2=m
-CONFIG_V4L_PLATFORM_DRIVERS=y
-CONFIG_VIDEO_CAFE_CCIC=m
-CONFIG_VIDEO_CADENCE=y
-CONFIG_VIDEO_CADENCE_CSI2RX=m
-CONFIG_VIDEO_CADENCE_CSI2TX=m
-CONFIG_VIDEO_ASPEED=m
-CONFIG_VIDEO_MUX=m
-CONFIG_VIDEO_XILINX=m
-CONFIG_VIDEO_XILINX_TPG=m
-CONFIG_VIDEO_XILINX_VTC=m
-CONFIG_V4L_MEM2MEM_DRIVERS=y
-CONFIG_VIDEO_MEM2MEM_DEINTERLACE=m
-CONFIG_VIDEO_SH_VEU=m
-CONFIG_V4L_TEST_DRIVERS=y
-CONFIG_VIDEO_VIMC=m
-CONFIG_VIDEO_VIVID=m
-CONFIG_VIDEO_VIVID_CEC=y
-CONFIG_VIDEO_VIVID_MAX_DEVS=64
-CONFIG_VIDEO_VIM2M=m
-CONFIG_VIDEO_VICODEC=m
-CONFIG_DVB_PLATFORM_DRIVERS=y
-CONFIG_CEC_PLATFORM_DRIVERS=y
-CONFIG_VIDEO_CROS_EC_CEC=m
-CONFIG_VIDEO_SECO_CEC=m
-CONFIG_VIDEO_SECO_RC=y
-CONFIG_SDR_PLATFORM_DRIVERS=y
-
-#
-# Supported MMC/SDIO adapters
-#
-CONFIG_SMS_SDIO_DRV=m
-CONFIG_RADIO_ADAPTERS=y
-CONFIG_RADIO_TEA575X=m
-CONFIG_RADIO_SI470X=m
-CONFIG_USB_SI470X=m
-CONFIG_I2C_SI470X=m
-CONFIG_RADIO_SI4713=m
-CONFIG_USB_SI4713=m
-CONFIG_PLATFORM_SI4713=m
-CONFIG_I2C_SI4713=m
-CONFIG_RADIO_SI476X=m
-CONFIG_USB_MR800=m
-CONFIG_USB_DSBR=m
-CONFIG_RADIO_MAXIRADIO=m
-CONFIG_RADIO_SHARK=m
-CONFIG_RADIO_SHARK2=m
-CONFIG_USB_KEENE=m
-CONFIG_USB_RAREMONO=m
-CONFIG_USB_MA901=m
-CONFIG_RADIO_TEA5764=m
-CONFIG_RADIO_SAA7706H=m
-CONFIG_RADIO_TEF6862=m
-CONFIG_RADIO_WL1273=m
-
-#
-# Texas Instruments WL128x FM driver (ST based)
-#
-CONFIG_RADIO_WL128X=m
-# end of Texas Instruments WL128x FM driver (ST based)
-
-#
-# Supported FireWire (IEEE 1394) Adapters
-#
-CONFIG_DVB_FIREDTV=m
-CONFIG_DVB_FIREDTV_INPUT=y
-CONFIG_MEDIA_COMMON_OPTIONS=y
-
-#
-# common driver options
-#
-CONFIG_VIDEO_CX2341X=m
-CONFIG_VIDEO_TVEEPROM=m
-CONFIG_CYPRESS_FIRMWARE=m
-CONFIG_VIDEOBUF2_CORE=m
-CONFIG_VIDEOBUF2_V4L2=m
-CONFIG_VIDEOBUF2_MEMOPS=m
-CONFIG_VIDEOBUF2_DMA_CONTIG=m
-CONFIG_VIDEOBUF2_VMALLOC=m
-CONFIG_VIDEOBUF2_DMA_SG=m
-CONFIG_VIDEOBUF2_DVB=m
-CONFIG_DVB_B2C2_FLEXCOP=m
-CONFIG_VIDEO_SAA7146=m
-CONFIG_VIDEO_SAA7146_VV=m
-CONFIG_SMS_SIANO_MDTV=m
-CONFIG_SMS_SIANO_RC=y
-# CONFIG_SMS_SIANO_DEBUGFS is not set
-CONFIG_VIDEO_V4L2_TPG=m
-
-#
-# Media ancillary drivers (tuners, sensors, i2c, spi, frontends)
-#
-CONFIG_MEDIA_SUBDRV_AUTOSELECT=y
-CONFIG_MEDIA_ATTACH=y
-CONFIG_VIDEO_IR_I2C=m
-
-#
-# I2C Encoders, decoders, sensors and other helper chips
-#
-
-#
-# Audio decoders, processors and mixers
-#
-CONFIG_VIDEO_TVAUDIO=m
-CONFIG_VIDEO_TDA7432=m
-CONFIG_VIDEO_TDA9840=m
-CONFIG_VIDEO_TDA1997X=m
-CONFIG_VIDEO_TEA6415C=m
-CONFIG_VIDEO_TEA6420=m
-CONFIG_VIDEO_MSP3400=m
-CONFIG_VIDEO_CS3308=m
-CONFIG_VIDEO_CS5345=m
-CONFIG_VIDEO_CS53L32A=m
-CONFIG_VIDEO_TLV320AIC23B=m
-CONFIG_VIDEO_UDA1342=m
-CONFIG_VIDEO_WM8775=m
-CONFIG_VIDEO_WM8739=m
-CONFIG_VIDEO_VP27SMPX=m
-CONFIG_VIDEO_SONY_BTF_MPX=m
-
-#
-# RDS decoders
-#
-CONFIG_VIDEO_SAA6588=m
-
-#
-# Video decoders
-#
-CONFIG_VIDEO_ADV7180=m
-CONFIG_VIDEO_ADV7183=m
-CONFIG_VIDEO_ADV748X=m
-CONFIG_VIDEO_ADV7604=m
-CONFIG_VIDEO_ADV7604_CEC=y
-CONFIG_VIDEO_ADV7842=m
-CONFIG_VIDEO_ADV7842_CEC=y
-CONFIG_VIDEO_BT819=m
-CONFIG_VIDEO_BT856=m
-CONFIG_VIDEO_BT866=m
-CONFIG_VIDEO_KS0127=m
-CONFIG_VIDEO_ML86V7667=m
-CONFIG_VIDEO_SAA7110=m
-CONFIG_VIDEO_SAA711X=m
-CONFIG_VIDEO_TC358743=m
-CONFIG_VIDEO_TC358743_CEC=y
-CONFIG_VIDEO_TVP514X=m
-CONFIG_VIDEO_TVP5150=m
-CONFIG_VIDEO_TVP7002=m
-CONFIG_VIDEO_TW2804=m
-CONFIG_VIDEO_TW9903=m
-CONFIG_VIDEO_TW9906=m
-CONFIG_VIDEO_TW9910=m
-CONFIG_VIDEO_VPX3220=m
-
-#
-# Video and audio decoders
-#
-CONFIG_VIDEO_SAA717X=m
-CONFIG_VIDEO_CX25840=m
-
-#
-# Video encoders
-#
-CONFIG_VIDEO_SAA7127=m
-CONFIG_VIDEO_SAA7185=m
-CONFIG_VIDEO_ADV7170=m
-CONFIG_VIDEO_ADV7175=m
-CONFIG_VIDEO_ADV7343=m
-CONFIG_VIDEO_ADV7393=m
-CONFIG_VIDEO_AD9389B=m
-CONFIG_VIDEO_AK881X=m
-CONFIG_VIDEO_THS8200=m
-
-#
-# Camera sensor devices
-#
-CONFIG_VIDEO_APTINA_PLL=m
-CONFIG_VIDEO_SMIAPP_PLL=m
-CONFIG_VIDEO_IMX214=m
-CONFIG_VIDEO_IMX258=m
-CONFIG_VIDEO_IMX274=m
-CONFIG_VIDEO_IMX319=m
-CONFIG_VIDEO_IMX355=m
-CONFIG_VIDEO_OV2640=m
-CONFIG_VIDEO_OV2659=m
-CONFIG_VIDEO_OV2680=m
-CONFIG_VIDEO_OV2685=m
-CONFIG_VIDEO_OV5640=m
-CONFIG_VIDEO_OV5645=m
-CONFIG_VIDEO_OV5647=m
-CONFIG_VIDEO_OV6650=m
-CONFIG_VIDEO_OV5670=m
-CONFIG_VIDEO_OV5675=m
-CONFIG_VIDEO_OV5695=m
-CONFIG_VIDEO_OV7251=m
-CONFIG_VIDEO_OV772X=m
-CONFIG_VIDEO_OV7640=m
-CONFIG_VIDEO_OV7670=m
-CONFIG_VIDEO_OV7740=m
-CONFIG_VIDEO_OV8856=m
-CONFIG_VIDEO_OV9640=m
-CONFIG_VIDEO_OV9650=m
-CONFIG_VIDEO_OV13858=m
-CONFIG_VIDEO_VS6624=m
-CONFIG_VIDEO_MT9M001=m
-CONFIG_VIDEO_MT9M032=m
-CONFIG_VIDEO_MT9M111=m
-CONFIG_VIDEO_MT9P031=m
-CONFIG_VIDEO_MT9T001=m
-CONFIG_VIDEO_MT9T112=m
-CONFIG_VIDEO_MT9V011=m
-CONFIG_VIDEO_MT9V032=m
-CONFIG_VIDEO_MT9V111=m
-CONFIG_VIDEO_SR030PC30=m
-CONFIG_VIDEO_NOON010PC30=m
-CONFIG_VIDEO_M5MOLS=m
-CONFIG_VIDEO_RJ54N1=m
-CONFIG_VIDEO_S5K6AA=m
-CONFIG_VIDEO_S5K6A3=m
-CONFIG_VIDEO_S5K4ECGX=m
-CONFIG_VIDEO_S5K5BAF=m
-CONFIG_VIDEO_SMIAPP=m
-CONFIG_VIDEO_ET8EK8=m
-CONFIG_VIDEO_S5C73M3=m
-
-#
-# Lens drivers
-#
-CONFIG_VIDEO_AD5820=m
-CONFIG_VIDEO_AK7375=m
-CONFIG_VIDEO_DW9714=m
-CONFIG_VIDEO_DW9807_VCM=m
-
-#
-# Flash devices
-#
-CONFIG_VIDEO_ADP1653=m
-CONFIG_VIDEO_LM3560=m
-CONFIG_VIDEO_LM3646=m
-
-#
-# Video improvement chips
-#
-CONFIG_VIDEO_UPD64031A=m
-CONFIG_VIDEO_UPD64083=m
-
-#
-# Audio/Video compression chips
-#
-CONFIG_VIDEO_SAA6752HS=m
-
-#
-# SDR tuner chips
-#
-CONFIG_SDR_MAX2175=m
-
-#
-# Miscellaneous helper chips
-#
-CONFIG_VIDEO_THS7303=m
-CONFIG_VIDEO_M52790=m
-CONFIG_VIDEO_I2C=m
-CONFIG_VIDEO_ST_MIPID02=m
-# end of I2C Encoders, decoders, sensors and other helper chips
-
-#
-# SPI helper chips
-#
-CONFIG_VIDEO_GS1662=m
-# end of SPI helper chips
-
-#
-# Media SPI Adapters
-#
-CONFIG_CXD2880_SPI_DRV=m
-# end of Media SPI Adapters
-
-CONFIG_MEDIA_TUNER=m
-
-#
-# Customize TV tuners
-#
-CONFIG_MEDIA_TUNER_SIMPLE=m
-CONFIG_MEDIA_TUNER_TDA18250=m
-CONFIG_MEDIA_TUNER_TDA8290=m
-CONFIG_MEDIA_TUNER_TDA827X=m
-CONFIG_MEDIA_TUNER_TDA18271=m
-CONFIG_MEDIA_TUNER_TDA9887=m
-CONFIG_MEDIA_TUNER_TEA5761=m
-CONFIG_MEDIA_TUNER_TEA5767=m
-CONFIG_MEDIA_TUNER_MSI001=m
-CONFIG_MEDIA_TUNER_MT20XX=m
-CONFIG_MEDIA_TUNER_MT2060=m
-CONFIG_MEDIA_TUNER_MT2063=m
-CONFIG_MEDIA_TUNER_MT2266=m
-CONFIG_MEDIA_TUNER_MT2131=m
-CONFIG_MEDIA_TUNER_QT1010=m
-CONFIG_MEDIA_TUNER_XC2028=m
-CONFIG_MEDIA_TUNER_XC5000=m
-CONFIG_MEDIA_TUNER_XC4000=m
-CONFIG_MEDIA_TUNER_MXL5005S=m
-CONFIG_MEDIA_TUNER_MXL5007T=m
-CONFIG_MEDIA_TUNER_MC44S803=m
-CONFIG_MEDIA_TUNER_MAX2165=m
-CONFIG_MEDIA_TUNER_TDA18218=m
-CONFIG_MEDIA_TUNER_FC0011=m
-CONFIG_MEDIA_TUNER_FC0012=m
-CONFIG_MEDIA_TUNER_FC0013=m
-CONFIG_MEDIA_TUNER_TDA18212=m
-CONFIG_MEDIA_TUNER_E4000=m
-CONFIG_MEDIA_TUNER_FC2580=m
-CONFIG_MEDIA_TUNER_M88RS6000T=m
-CONFIG_MEDIA_TUNER_TUA9001=m
-CONFIG_MEDIA_TUNER_SI2157=m
-CONFIG_MEDIA_TUNER_IT913X=m
-CONFIG_MEDIA_TUNER_R820T=m
-CONFIG_MEDIA_TUNER_MXL301RF=m
-CONFIG_MEDIA_TUNER_QM1D1C0042=m
-CONFIG_MEDIA_TUNER_QM1D1B0004=m
-# end of Customize TV tuners
-
-#
-# Customise DVB Frontends
-#
-
-#
-# Multistandard (satellite) frontends
-#
-CONFIG_DVB_STB0899=m
-CONFIG_DVB_STB6100=m
-CONFIG_DVB_STV090x=m
-CONFIG_DVB_STV0910=m
-CONFIG_DVB_STV6110x=m
-CONFIG_DVB_STV6111=m
-CONFIG_DVB_MXL5XX=m
-CONFIG_DVB_M88DS3103=m
-
-#
-# Multistandard (cable + terrestrial) frontends
-#
-CONFIG_DVB_DRXK=m
-CONFIG_DVB_TDA18271C2DD=m
-CONFIG_DVB_SI2165=m
-CONFIG_DVB_MN88472=m
-CONFIG_DVB_MN88473=m
-
-#
-# DVB-S (satellite) frontends
-#
-CONFIG_DVB_CX24110=m
-CONFIG_DVB_CX24123=m
-CONFIG_DVB_MT312=m
-CONFIG_DVB_ZL10036=m
-CONFIG_DVB_ZL10039=m
-CONFIG_DVB_S5H1420=m
-CONFIG_DVB_STV0288=m
-CONFIG_DVB_STB6000=m
-CONFIG_DVB_STV0299=m
-CONFIG_DVB_STV6110=m
-CONFIG_DVB_STV0900=m
-CONFIG_DVB_TDA8083=m
-CONFIG_DVB_TDA10086=m
-CONFIG_DVB_TDA8261=m
-CONFIG_DVB_VES1X93=m
-CONFIG_DVB_TUNER_ITD1000=m
-CONFIG_DVB_TUNER_CX24113=m
-CONFIG_DVB_TDA826X=m
-CONFIG_DVB_TUA6100=m
-CONFIG_DVB_CX24116=m
-CONFIG_DVB_CX24117=m
-CONFIG_DVB_CX24120=m
-CONFIG_DVB_SI21XX=m
-CONFIG_DVB_TS2020=m
-CONFIG_DVB_DS3000=m
-CONFIG_DVB_MB86A16=m
-CONFIG_DVB_TDA10071=m
-
-#
-# DVB-T (terrestrial) frontends
-#
-CONFIG_DVB_SP8870=m
-CONFIG_DVB_SP887X=m
-CONFIG_DVB_CX22700=m
-CONFIG_DVB_CX22702=m
-CONFIG_DVB_S5H1432=m
-CONFIG_DVB_DRXD=m
-CONFIG_DVB_L64781=m
-CONFIG_DVB_TDA1004X=m
-CONFIG_DVB_NXT6000=m
-CONFIG_DVB_MT352=m
-CONFIG_DVB_ZL10353=m
-CONFIG_DVB_DIB3000MB=m
-CONFIG_DVB_DIB3000MC=m
-CONFIG_DVB_DIB7000M=m
-CONFIG_DVB_DIB7000P=m
-CONFIG_DVB_DIB9000=m
-CONFIG_DVB_TDA10048=m
-CONFIG_DVB_AF9013=m
-CONFIG_DVB_EC100=m
-CONFIG_DVB_STV0367=m
-CONFIG_DVB_CXD2820R=m
-CONFIG_DVB_CXD2841ER=m
-CONFIG_DVB_RTL2830=m
-CONFIG_DVB_RTL2832=m
-CONFIG_DVB_RTL2832_SDR=m
-CONFIG_DVB_SI2168=m
-CONFIG_DVB_AS102_FE=m
-CONFIG_DVB_ZD1301_DEMOD=m
-CONFIG_DVB_GP8PSK_FE=m
-CONFIG_DVB_CXD2880=m
-
-#
-# DVB-C (cable) frontends
-#
-CONFIG_DVB_VES1820=m
-CONFIG_DVB_TDA10021=m
-CONFIG_DVB_TDA10023=m
-CONFIG_DVB_STV0297=m
-
-#
-# ATSC (North American/Korean Terrestrial/Cable DTV) frontends
-#
-CONFIG_DVB_NXT200X=m
-CONFIG_DVB_OR51211=m
-CONFIG_DVB_OR51132=m
-CONFIG_DVB_BCM3510=m
-CONFIG_DVB_LGDT330X=m
-CONFIG_DVB_LGDT3305=m
-CONFIG_DVB_LGDT3306A=m
-CONFIG_DVB_LG2160=m
-CONFIG_DVB_S5H1409=m
-CONFIG_DVB_AU8522=m
-CONFIG_DVB_AU8522_DTV=m
-CONFIG_DVB_AU8522_V4L=m
-CONFIG_DVB_S5H1411=m
-
-#
-# ISDB-T (terrestrial) frontends
-#
-CONFIG_DVB_S921=m
-CONFIG_DVB_DIB8000=m
-CONFIG_DVB_MB86A20S=m
-
-#
-# ISDB-S (satellite) & ISDB-T (terrestrial) frontends
-#
-CONFIG_DVB_TC90522=m
-CONFIG_DVB_MN88443X=m
-
-#
-# Digital terrestrial only tuners/PLL
-#
-CONFIG_DVB_PLL=m
-CONFIG_DVB_TUNER_DIB0070=m
-CONFIG_DVB_TUNER_DIB0090=m
-
-#
-# SEC control devices for DVB-S
-#
-CONFIG_DVB_DRX39XYJ=m
-CONFIG_DVB_LNBH25=m
-CONFIG_DVB_LNBH29=m
-CONFIG_DVB_LNBP21=m
-CONFIG_DVB_LNBP22=m
-CONFIG_DVB_ISL6405=m
-CONFIG_DVB_ISL6421=m
-CONFIG_DVB_ISL6423=m
-CONFIG_DVB_A8293=m
-CONFIG_DVB_LGS8GL5=m
-CONFIG_DVB_LGS8GXX=m
-CONFIG_DVB_ATBM8830=m
-CONFIG_DVB_TDA665x=m
-CONFIG_DVB_IX2505V=m
-CONFIG_DVB_M88RS2000=m
-CONFIG_DVB_AF9033=m
-CONFIG_DVB_HORUS3A=m
-CONFIG_DVB_ASCOT2E=m
-CONFIG_DVB_HELENE=m
-
-#
-# Common Interface (EN50221) controller drivers
-#
-CONFIG_DVB_CXD2099=m
-CONFIG_DVB_SP2=m
-
-#
-# Tools to develop new frontends
-#
-CONFIG_DVB_DUMMY_FE=m
-# end of Customise DVB Frontends
-
-#
-# Graphics support
-#
-CONFIG_AGP=m
-CONFIG_AGP_AMD64=m
-CONFIG_AGP_INTEL=m
-CONFIG_AGP_SIS=m
-CONFIG_AGP_VIA=m
-CONFIG_INTEL_GTT=m
-CONFIG_VGA_ARB=y
-CONFIG_VGA_ARB_MAX_GPUS=10
-CONFIG_VGA_SWITCHEROO=y
-CONFIG_DRM=m
-CONFIG_DRM_MIPI_DBI=m
-CONFIG_DRM_MIPI_DSI=y
-CONFIG_DRM_DP_AUX_CHARDEV=y
-# CONFIG_DRM_DEBUG_SELFTEST is not set
-CONFIG_DRM_KMS_HELPER=m
-CONFIG_DRM_KMS_FB_HELPER=y
-CONFIG_DRM_FBDEV_EMULATION=y
-CONFIG_DRM_FBDEV_OVERALLOC=100
-# CONFIG_DRM_FBDEV_LEAK_PHYS_SMEM is not set
-CONFIG_DRM_LOAD_EDID_FIRMWARE=y
-CONFIG_DRM_DP_CEC=y
-CONFIG_DRM_TTM=m
-CONFIG_DRM_VRAM_HELPER=m
-CONFIG_DRM_GEM_CMA_HELPER=y
-CONFIG_DRM_KMS_CMA_HELPER=y
-CONFIG_DRM_GEM_SHMEM_HELPER=y
-CONFIG_DRM_SCHED=m
-
-#
-# I2C encoder or helper chips
-#
-CONFIG_DRM_I2C_CH7006=m
-CONFIG_DRM_I2C_SIL164=m
-CONFIG_DRM_I2C_NXP_TDA998X=m
-CONFIG_DRM_I2C_NXP_TDA9950=m
-# end of I2C encoder or helper chips
-
-#
-# ARM devices
-#
-CONFIG_DRM_KOMEDA=m
-# end of ARM devices
-
-CONFIG_DRM_RADEON=m
-CONFIG_DRM_RADEON_USERPTR=y
-CONFIG_DRM_AMDGPU=m
-CONFIG_DRM_AMDGPU_SI=y
-CONFIG_DRM_AMDGPU_CIK=y
-CONFIG_DRM_AMDGPU_USERPTR=y
-# CONFIG_DRM_AMDGPU_GART_DEBUGFS is not set
-
-#
-# ACP (Audio CoProcessor) Configuration
-#
-CONFIG_DRM_AMD_ACP=y
-# end of ACP (Audio CoProcessor) Configuration
-
-#
-# Display Engine Configuration
-#
-CONFIG_DRM_AMD_DC=y
-CONFIG_DRM_AMD_DC_DCN1_0=y
-CONFIG_DRM_AMD_DC_DCN2_0=y
-CONFIG_DRM_AMD_DC_DCN2_1=y
-CONFIG_DRM_AMD_DC_DSC_SUPPORT=y
-# CONFIG_DEBUG_KERNEL_DC is not set
-# end of Display Engine Configuration
-
-CONFIG_HSA_AMD=y
-CONFIG_DRM_NOUVEAU=m
-# CONFIG_NOUVEAU_LEGACY_CTX_SUPPORT is not set
-CONFIG_NOUVEAU_DEBUG=5
-CONFIG_NOUVEAU_DEBUG_DEFAULT=3
-# CONFIG_NOUVEAU_DEBUG_MMU is not set
-CONFIG_DRM_NOUVEAU_BACKLIGHT=y
-CONFIG_DRM_NOUVEAU_SVM=y
-CONFIG_DRM_I915=m
-CONFIG_DRM_I915_ALPHA_SUPPORT=y
-CONFIG_DRM_I915_FORCE_PROBE="*"
-CONFIG_DRM_I915_CAPTURE_ERROR=y
-CONFIG_DRM_I915_COMPRESS_ERROR=y
-CONFIG_DRM_I915_USERPTR=y
-CONFIG_DRM_I915_GVT=y
-CONFIG_DRM_I915_GVT_KVMGT=m
-
-#
-# drm/i915 Debugging
-#
-# CONFIG_DRM_I915_WERROR is not set
-# CONFIG_DRM_I915_DEBUG is not set
-# CONFIG_DRM_I915_DEBUG_MMIO is not set
-# CONFIG_DRM_I915_SW_FENCE_DEBUG_OBJECTS is not set
-# CONFIG_DRM_I915_SW_FENCE_CHECK_DAG is not set
-# CONFIG_DRM_I915_DEBUG_GUC is not set
-# CONFIG_DRM_I915_SELFTEST is not set
-# CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS is not set
-# CONFIG_DRM_I915_DEBUG_VBLANK_EVADE is not set
-# CONFIG_DRM_I915_DEBUG_RUNTIME_PM is not set
-# end of drm/i915 Debugging
-
-#
-# drm/i915 Profile Guided Optimisation
-#
-CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND=250
-CONFIG_DRM_I915_SPIN_REQUEST=5
-# end of drm/i915 Profile Guided Optimisation
-
-CONFIG_DRM_VGEM=m
-CONFIG_DRM_VKMS=m
-CONFIG_DRM_VMWGFX=m
-CONFIG_DRM_VMWGFX_FBCON=y
-CONFIG_DRM_GMA500=m
-CONFIG_DRM_GMA600=y
-CONFIG_DRM_GMA3600=y
-CONFIG_DRM_UDL=m
-CONFIG_DRM_AST=m
-CONFIG_DRM_MGAG200=m
-CONFIG_DRM_CIRRUS_QEMU=m
-CONFIG_DRM_RCAR_DW_HDMI=m
-CONFIG_DRM_RCAR_LVDS=m
-CONFIG_DRM_QXL=m
-CONFIG_DRM_BOCHS=m
-CONFIG_DRM_VIRTIO_GPU=m
-CONFIG_DRM_PANEL=y
-
-#
-# Display Panels
-#
-CONFIG_DRM_PANEL_ARM_VERSATILE=m
-CONFIG_DRM_PANEL_LVDS=m
-CONFIG_DRM_PANEL_SIMPLE=m
-CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D=m
-CONFIG_DRM_PANEL_ILITEK_IL9322=m
-CONFIG_DRM_PANEL_ILITEK_ILI9881C=m
-CONFIG_DRM_PANEL_INNOLUX_P079ZCA=m
-CONFIG_DRM_PANEL_JDI_LT070ME05000=m
-CONFIG_DRM_PANEL_KINGDISPLAY_KD097D04=m
-CONFIG_DRM_PANEL_SAMSUNG_LD9040=m
-CONFIG_DRM_PANEL_LG_LB035Q02=m
-CONFIG_DRM_PANEL_LG_LG4573=m
-CONFIG_DRM_PANEL_NEC_NL8048HL11=m
-CONFIG_DRM_PANEL_NOVATEK_NT39016=m
-CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO=m
-CONFIG_DRM_PANEL_ORISETECH_OTM8009A=m
-CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS=m
-CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00=m
-CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN=m
-CONFIG_DRM_PANEL_RAYDIUM_RM67191=m
-CONFIG_DRM_PANEL_RAYDIUM_RM68200=m
-CONFIG_DRM_PANEL_ROCKTECH_JH057N00900=m
-CONFIG_DRM_PANEL_RONBO_RB070D30=m
-CONFIG_DRM_PANEL_SAMSUNG_S6D16D0=m
-CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2=m
-CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03=m
-CONFIG_DRM_PANEL_SAMSUNG_S6E63M0=m
-CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0=m
-CONFIG_DRM_PANEL_SEIKO_43WVF1G=m
-CONFIG_DRM_PANEL_SHARP_LQ101R1SX01=m
-CONFIG_DRM_PANEL_SHARP_LS037V7DW01=m
-CONFIG_DRM_PANEL_SHARP_LS043T1LE01=m
-CONFIG_DRM_PANEL_SITRONIX_ST7701=m
-CONFIG_DRM_PANEL_SITRONIX_ST7789V=m
-CONFIG_DRM_PANEL_SONY_ACX565AKM=m
-CONFIG_DRM_PANEL_TPO_TD028TTEC1=m
-CONFIG_DRM_PANEL_TPO_TD043MTEA1=m
-CONFIG_DRM_PANEL_TPO_TPG110=m
-CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA=m
-# end of Display Panels
-
-CONFIG_DRM_BRIDGE=y
-CONFIG_DRM_PANEL_BRIDGE=y
-
-#
-# Display Interface Bridges
-#
-CONFIG_DRM_ANALOGIX_ANX78XX=m
-CONFIG_DRM_CDNS_DSI=m
-CONFIG_DRM_DUMB_VGA_DAC=m
-CONFIG_DRM_LVDS_ENCODER=m
-CONFIG_DRM_MEGACHIPS_STDPXXXX_GE_B850V3_FW=m
-CONFIG_DRM_NXP_PTN3460=m
-CONFIG_DRM_PARADE_PS8622=m
-CONFIG_DRM_SIL_SII8620=m
-CONFIG_DRM_SII902X=m
-CONFIG_DRM_SII9234=m
-CONFIG_DRM_THINE_THC63LVD1024=m
-CONFIG_DRM_TOSHIBA_TC358764=m
-CONFIG_DRM_TOSHIBA_TC358767=m
-CONFIG_DRM_TI_TFP410=m
-CONFIG_DRM_TI_SN65DSI86=m
-CONFIG_DRM_I2C_ADV7511=m
-CONFIG_DRM_I2C_ADV7511_AUDIO=y
-CONFIG_DRM_I2C_ADV7533=y
-CONFIG_DRM_I2C_ADV7511_CEC=y
-CONFIG_DRM_DW_HDMI=m
-CONFIG_DRM_DW_HDMI_AHB_AUDIO=m
-CONFIG_DRM_DW_HDMI_I2S_AUDIO=m
-CONFIG_DRM_DW_HDMI_CEC=m
-# end of Display Interface Bridges
-
-# CONFIG_DRM_ETNAVIV is not set
-CONFIG_DRM_ARCPGU=m
-CONFIG_DRM_MXS=y
-CONFIG_DRM_MXSFB=m
-CONFIG_DRM_GM12U320=m
-CONFIG_TINYDRM_HX8357D=m
-CONFIG_TINYDRM_ILI9225=m
-CONFIG_TINYDRM_ILI9341=m
-CONFIG_TINYDRM_MI0283QT=m
-CONFIG_TINYDRM_REPAPER=m
-CONFIG_TINYDRM_ST7586=m
-CONFIG_TINYDRM_ST7735R=m
-CONFIG_DRM_XEN=y
-CONFIG_DRM_XEN_FRONTEND=m
-CONFIG_DRM_VBOXVIDEO=m
-# CONFIG_DRM_LEGACY is not set
-CONFIG_DRM_PANEL_ORIENTATION_QUIRKS=y
-
-#
-# Frame buffer Devices
-#
-CONFIG_FB_CMDLINE=y
-CONFIG_FB_NOTIFY=y
-CONFIG_FB=y
-CONFIG_FIRMWARE_EDID=y
-CONFIG_FB_BOOT_VESA_SUPPORT=y
-CONFIG_FB_CFB_FILLRECT=y
-CONFIG_FB_CFB_COPYAREA=y
-CONFIG_FB_CFB_IMAGEBLIT=y
-CONFIG_FB_SYS_FILLRECT=m
-CONFIG_FB_SYS_COPYAREA=m
-CONFIG_FB_SYS_IMAGEBLIT=m
-# CONFIG_FB_FOREIGN_ENDIAN is not set
-CONFIG_FB_SYS_FOPS=m
-CONFIG_FB_DEFERRED_IO=y
-CONFIG_FB_BACKLIGHT=m
-CONFIG_FB_MODE_HELPERS=y
-CONFIG_FB_TILEBLITTING=y
-
-#
-# Frame buffer hardware drivers
-#
-# CONFIG_FB_CIRRUS is not set
-# CONFIG_FB_PM2 is not set
-# CONFIG_FB_CYBER2000 is not set
-# CONFIG_FB_ARC is not set
-# CONFIG_FB_ASILIANT is not set
-# CONFIG_FB_IMSTT is not set
-# CONFIG_FB_VGA16 is not set
-# CONFIG_FB_UVESA is not set
-CONFIG_FB_VESA=y
-CONFIG_FB_EFI=y
-# CONFIG_FB_N411 is not set
-# CONFIG_FB_HGA is not set
-# CONFIG_FB_OPENCORES is not set
-# CONFIG_FB_S1D13XXX is not set
-# CONFIG_FB_NVIDIA is not set
-# CONFIG_FB_RIVA is not set
-# CONFIG_FB_I740 is not set
-# CONFIG_FB_LE80578 is not set
-# CONFIG_FB_INTEL is not set
-# CONFIG_FB_MATROX is not set
-# CONFIG_FB_RADEON is not set
-# CONFIG_FB_ATY128 is not set
-# CONFIG_FB_ATY is not set
-# CONFIG_FB_S3 is not set
-# CONFIG_FB_SAVAGE is not set
-# CONFIG_FB_SIS is not set
-# CONFIG_FB_VIA is not set
-# CONFIG_FB_NEOMAGIC is not set
-# CONFIG_FB_KYRO is not set
-# CONFIG_FB_3DFX is not set
-# CONFIG_FB_VOODOO1 is not set
-# CONFIG_FB_VT8623 is not set
-# CONFIG_FB_TRIDENT is not set
-# CONFIG_FB_ARK is not set
-# CONFIG_FB_PM3 is not set
-# CONFIG_FB_CARMINE is not set
-# CONFIG_FB_SM501 is not set
-# CONFIG_FB_SMSCUFX is not set
-# CONFIG_FB_UDL is not set
-# CONFIG_FB_IBM_GXT4500 is not set
-# CONFIG_FB_VIRTUAL is not set
-CONFIG_XEN_FBDEV_FRONTEND=m
-# CONFIG_FB_METRONOME is not set
-# CONFIG_FB_MB862XX is not set
-CONFIG_FB_HYPERV=m
-CONFIG_FB_SIMPLE=y
-# CONFIG_FB_SSD1307 is not set
-# CONFIG_FB_SM712 is not set
-# end of Frame buffer Devices
-
-#
-# Backlight & LCD device support
-#
-CONFIG_LCD_CLASS_DEVICE=m
-CONFIG_LCD_L4F00242T03=m
-CONFIG_LCD_LMS283GF05=m
-CONFIG_LCD_LTV350QV=m
-CONFIG_LCD_ILI922X=m
-CONFIG_LCD_ILI9320=m
-CONFIG_LCD_TDO24M=m
-CONFIG_LCD_VGG2432A4=m
-CONFIG_LCD_PLATFORM=m
-CONFIG_LCD_AMS369FG06=m
-CONFIG_LCD_LMS501KF03=m
-CONFIG_LCD_HX8357=m
-CONFIG_LCD_OTM3225A=m
-CONFIG_BACKLIGHT_CLASS_DEVICE=y
-CONFIG_BACKLIGHT_GENERIC=m
-CONFIG_BACKLIGHT_LM3533=m
-CONFIG_BACKLIGHT_PWM=m
-CONFIG_BACKLIGHT_DA903X=m
-CONFIG_BACKLIGHT_DA9052=m
-CONFIG_BACKLIGHT_MAX8925=m
-CONFIG_BACKLIGHT_APPLE=m
-CONFIG_BACKLIGHT_PM8941_WLED=m
-CONFIG_BACKLIGHT_SAHARA=m
-CONFIG_BACKLIGHT_WM831X=m
-CONFIG_BACKLIGHT_ADP5520=m
-CONFIG_BACKLIGHT_ADP8860=m
-CONFIG_BACKLIGHT_ADP8870=m
-CONFIG_BACKLIGHT_88PM860X=m
-CONFIG_BACKLIGHT_PCF50633=m
-CONFIG_BACKLIGHT_AAT2870=m
-CONFIG_BACKLIGHT_LM3630A=m
-CONFIG_BACKLIGHT_LM3639=m
-CONFIG_BACKLIGHT_LP855X=m
-CONFIG_BACKLIGHT_LP8788=m
-CONFIG_BACKLIGHT_PANDORA=m
-CONFIG_BACKLIGHT_SKY81452=m
-CONFIG_BACKLIGHT_TPS65217=m
-CONFIG_BACKLIGHT_AS3711=m
-CONFIG_BACKLIGHT_GPIO=m
-CONFIG_BACKLIGHT_LV5207LP=m
-CONFIG_BACKLIGHT_BD6107=m
-CONFIG_BACKLIGHT_ARCXCNN=m
-CONFIG_BACKLIGHT_RAVE_SP=m
-# end of Backlight & LCD device support
-
-CONFIG_VIDEOMODE_HELPERS=y
-CONFIG_HDMI=y
-
-#
-# Console display driver support
-#
-CONFIG_VGA_CONSOLE=y
-CONFIG_VGACON_SOFT_SCROLLBACK=y
-CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=64
-# CONFIG_VGACON_SOFT_SCROLLBACK_PERSISTENT_ENABLE_BY_DEFAULT is not set
-CONFIG_DUMMY_CONSOLE=y
-CONFIG_DUMMY_CONSOLE_COLUMNS=80
-CONFIG_DUMMY_CONSOLE_ROWS=25
-CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
-CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y
-CONFIG_FRAMEBUFFER_CONSOLE_DEFERRED_TAKEOVER=y
-# end of Console display driver support
-
-# CONFIG_LOGO is not set
-# end of Graphics support
-
-CONFIG_SOUND=m
-CONFIG_SOUND_OSS_CORE=y
-# CONFIG_SOUND_OSS_CORE_PRECLAIM is not set
-CONFIG_SND=m
-CONFIG_SND_TIMER=m
-CONFIG_SND_PCM=m
-CONFIG_SND_PCM_ELD=y
-CONFIG_SND_PCM_IEC958=y
-CONFIG_SND_DMAENGINE_PCM=m
-CONFIG_SND_HWDEP=m
-CONFIG_SND_SEQ_DEVICE=m
-CONFIG_SND_RAWMIDI=m
-CONFIG_SND_COMPRESS_OFFLOAD=m
-CONFIG_SND_JACK=y
-CONFIG_SND_JACK_INPUT_DEV=y
-CONFIG_SND_OSSEMUL=y
-CONFIG_SND_MIXER_OSS=m
-CONFIG_SND_PCM_OSS=m
-CONFIG_SND_PCM_OSS_PLUGINS=y
-CONFIG_SND_PCM_TIMER=y
-CONFIG_SND_HRTIMER=m
-CONFIG_SND_DYNAMIC_MINORS=y
-CONFIG_SND_MAX_CARDS=32
-# CONFIG_SND_SUPPORT_OLD_API is not set
-CONFIG_SND_PROC_FS=y
-CONFIG_SND_VERBOSE_PROCFS=y
-CONFIG_SND_VERBOSE_PRINTK=y
-CONFIG_SND_DEBUG=y
-# CONFIG_SND_DEBUG_VERBOSE is not set
-# CONFIG_SND_PCM_XRUN_DEBUG is not set
-CONFIG_SND_VMASTER=y
-CONFIG_SND_DMA_SGBUF=y
-CONFIG_SND_SEQUENCER=m
-CONFIG_SND_SEQ_DUMMY=m
-CONFIG_SND_SEQUENCER_OSS=m
-CONFIG_SND_SEQ_HRTIMER_DEFAULT=y
-CONFIG_SND_SEQ_MIDI_EVENT=m
-CONFIG_SND_SEQ_MIDI=m
-CONFIG_SND_SEQ_MIDI_EMUL=m
-CONFIG_SND_SEQ_VIRMIDI=m
-CONFIG_SND_MPU401_UART=m
-CONFIG_SND_OPL3_LIB=m
-CONFIG_SND_OPL3_LIB_SEQ=m
-CONFIG_SND_VX_LIB=m
-CONFIG_SND_AC97_CODEC=m
-CONFIG_SND_DRIVERS=y
-# CONFIG_SND_PCSP is not set
-CONFIG_SND_DUMMY=m
-CONFIG_SND_ALOOP=m
-CONFIG_SND_VIRMIDI=m
-CONFIG_SND_MTPAV=m
-CONFIG_SND_MTS64=m
-CONFIG_SND_SERIAL_U16550=m
-CONFIG_SND_MPU401=m
-CONFIG_SND_PORTMAN2X4=m
-CONFIG_SND_AC97_POWER_SAVE=y
-CONFIG_SND_AC97_POWER_SAVE_DEFAULT=0
-CONFIG_SND_SB_COMMON=m
-CONFIG_SND_PCI=y
-CONFIG_SND_AD1889=m
-CONFIG_SND_ALS300=m
-CONFIG_SND_ALS4000=m
-CONFIG_SND_ALI5451=m
-CONFIG_SND_ASIHPI=m
-CONFIG_SND_ATIIXP=m
-CONFIG_SND_ATIIXP_MODEM=m
-CONFIG_SND_AU8810=m
-CONFIG_SND_AU8820=m
-CONFIG_SND_AU8830=m
-CONFIG_SND_AW2=m
-CONFIG_SND_AZT3328=m
-CONFIG_SND_BT87X=m
-# CONFIG_SND_BT87X_OVERCLOCK is not set
-CONFIG_SND_CA0106=m
-CONFIG_SND_CMIPCI=m
-CONFIG_SND_OXYGEN_LIB=m
-CONFIG_SND_OXYGEN=m
-CONFIG_SND_CS4281=m
-CONFIG_SND_CS46XX=m
-CONFIG_SND_CS46XX_NEW_DSP=y
-CONFIG_SND_CTXFI=m
-CONFIG_SND_DARLA20=m
-CONFIG_SND_GINA20=m
-CONFIG_SND_LAYLA20=m
-CONFIG_SND_DARLA24=m
-CONFIG_SND_GINA24=m
-CONFIG_SND_LAYLA24=m
-CONFIG_SND_MONA=m
-CONFIG_SND_MIA=m
-CONFIG_SND_ECHO3G=m
-CONFIG_SND_INDIGO=m
-CONFIG_SND_INDIGOIO=m
-CONFIG_SND_INDIGODJ=m
-CONFIG_SND_INDIGOIOX=m
-CONFIG_SND_INDIGODJX=m
-CONFIG_SND_EMU10K1=m
-CONFIG_SND_EMU10K1_SEQ=m
-CONFIG_SND_EMU10K1X=m
-CONFIG_SND_ENS1370=m
-CONFIG_SND_ENS1371=m
-CONFIG_SND_ES1938=m
-CONFIG_SND_ES1968=m
-CONFIG_SND_ES1968_INPUT=y
-CONFIG_SND_ES1968_RADIO=y
-CONFIG_SND_FM801=m
-CONFIG_SND_FM801_TEA575X_BOOL=y
-CONFIG_SND_HDSP=m
-CONFIG_SND_HDSPM=m
-CONFIG_SND_ICE1712=m
-CONFIG_SND_ICE1724=m
-CONFIG_SND_INTEL8X0=m
-CONFIG_SND_INTEL8X0M=m
-CONFIG_SND_KORG1212=m
-CONFIG_SND_LOLA=m
-CONFIG_SND_LX6464ES=m
-CONFIG_SND_MAESTRO3=m
-CONFIG_SND_MAESTRO3_INPUT=y
-CONFIG_SND_MIXART=m
-CONFIG_SND_NM256=m
-CONFIG_SND_PCXHR=m
-CONFIG_SND_RIPTIDE=m
-CONFIG_SND_RME32=m
-CONFIG_SND_RME96=m
-CONFIG_SND_RME9652=m
-CONFIG_SND_SONICVIBES=m
-CONFIG_SND_TRIDENT=m
-CONFIG_SND_VIA82XX=m
-CONFIG_SND_VIA82XX_MODEM=m
-CONFIG_SND_VIRTUOSO=m
-CONFIG_SND_VX222=m
-CONFIG_SND_YMFPCI=m
-
-#
-# HD-Audio
-#
-CONFIG_SND_HDA=m
-CONFIG_SND_HDA_INTEL=m
-# CONFIG_SND_HDA_INTEL_DETECT_DMIC is not set
-CONFIG_SND_HDA_HWDEP=y
-CONFIG_SND_HDA_RECONFIG=y
-CONFIG_SND_HDA_INPUT_BEEP=y
-CONFIG_SND_HDA_INPUT_BEEP_MODE=1
-CONFIG_SND_HDA_PATCH_LOADER=y
-CONFIG_SND_HDA_CODEC_REALTEK=m
-CONFIG_SND_HDA_CODEC_ANALOG=m
-CONFIG_SND_HDA_CODEC_SIGMATEL=m
-CONFIG_SND_HDA_CODEC_VIA=m
-CONFIG_SND_HDA_CODEC_HDMI=m
-CONFIG_SND_HDA_CODEC_CIRRUS=m
-CONFIG_SND_HDA_CODEC_CONEXANT=m
-CONFIG_SND_HDA_CODEC_CA0110=m
-CONFIG_SND_HDA_CODEC_CA0132=m
-CONFIG_SND_HDA_CODEC_CA0132_DSP=y
-CONFIG_SND_HDA_CODEC_CMEDIA=m
-CONFIG_SND_HDA_CODEC_SI3054=m
-CONFIG_SND_HDA_GENERIC=m
-CONFIG_SND_HDA_POWER_SAVE_DEFAULT=0
-# end of HD-Audio
-
-CONFIG_SND_HDA_CORE=m
-CONFIG_SND_HDA_DSP_LOADER=y
-CONFIG_SND_HDA_COMPONENT=y
-CONFIG_SND_HDA_I915=y
-CONFIG_SND_HDA_EXT_CORE=m
-CONFIG_SND_HDA_PREALLOC_SIZE=4096
-CONFIG_SND_INTEL_NHLT=m
-CONFIG_SND_SPI=y
-CONFIG_SND_USB=y
-CONFIG_SND_USB_AUDIO=m
-CONFIG_SND_USB_AUDIO_USE_MEDIA_CONTROLLER=y
-CONFIG_SND_USB_UA101=m
-CONFIG_SND_USB_USX2Y=m
-CONFIG_SND_USB_CAIAQ=m
-CONFIG_SND_USB_CAIAQ_INPUT=y
-CONFIG_SND_USB_US122L=m
-CONFIG_SND_USB_6FIRE=m
-CONFIG_SND_USB_HIFACE=m
-CONFIG_SND_BCD2000=m
-CONFIG_SND_USB_LINE6=m
-CONFIG_SND_USB_POD=m
-CONFIG_SND_USB_PODHD=m
-CONFIG_SND_USB_TONEPORT=m
-CONFIG_SND_USB_VARIAX=m
-CONFIG_SND_FIREWIRE=y
-CONFIG_SND_FIREWIRE_LIB=m
-CONFIG_SND_DICE=m
-CONFIG_SND_OXFW=m
-CONFIG_SND_ISIGHT=m
-CONFIG_SND_FIREWORKS=m
-CONFIG_SND_BEBOB=m
-CONFIG_SND_FIREWIRE_DIGI00X=m
-CONFIG_SND_FIREWIRE_TASCAM=m
-CONFIG_SND_FIREWIRE_MOTU=m
-CONFIG_SND_FIREFACE=m
-CONFIG_SND_PCMCIA=y
-CONFIG_SND_VXPOCKET=m
-CONFIG_SND_PDAUDIOCF=m
-CONFIG_SND_SOC=m
-CONFIG_SND_SOC_AC97_BUS=y
-CONFIG_SND_SOC_GENERIC_DMAENGINE_PCM=y
-CONFIG_SND_SOC_COMPRESS=y
-CONFIG_SND_SOC_TOPOLOGY=y
-CONFIG_SND_SOC_ACPI=m
-CONFIG_SND_SOC_AMD_ACP=m
-CONFIG_SND_SOC_AMD_CZ_DA7219MX98357_MACH=m
-CONFIG_SND_SOC_AMD_CZ_RT5645_MACH=m
-CONFIG_SND_SOC_AMD_ACP3x=m
-CONFIG_SND_ATMEL_SOC=m
-CONFIG_SND_SOC_MIKROE_PROTO=m
-CONFIG_SND_DESIGNWARE_I2S=m
-CONFIG_SND_DESIGNWARE_PCM=y
-
-#
-# SoC Audio for Freescale CPUs
-#
-
-#
-# Common SoC Audio options for Freescale CPUs:
-#
-# CONFIG_SND_SOC_FSL_ASRC is not set
-# CONFIG_SND_SOC_FSL_SAI is not set
-# CONFIG_SND_SOC_FSL_AUDMIX is not set
-# CONFIG_SND_SOC_FSL_SSI is not set
-# CONFIG_SND_SOC_FSL_SPDIF is not set
-# CONFIG_SND_SOC_FSL_ESAI is not set
-# CONFIG_SND_SOC_FSL_MICFIL is not set
-# CONFIG_SND_SOC_IMX_AUDMUX is not set
-# end of SoC Audio for Freescale CPUs
-
-CONFIG_SND_I2S_HI6210_I2S=m
-CONFIG_SND_SOC_IMG=y
-CONFIG_SND_SOC_IMG_I2S_IN=m
-CONFIG_SND_SOC_IMG_I2S_OUT=m
-CONFIG_SND_SOC_IMG_PARALLEL_OUT=m
-CONFIG_SND_SOC_IMG_SPDIF_IN=m
-CONFIG_SND_SOC_IMG_SPDIF_OUT=m
-CONFIG_SND_SOC_IMG_PISTACHIO_INTERNAL_DAC=m
-CONFIG_SND_SOC_INTEL_SST_TOPLEVEL=y
-CONFIG_SND_SST_IPC=m
-CONFIG_SND_SST_IPC_PCI=m
-CONFIG_SND_SST_IPC_ACPI=m
-CONFIG_SND_SOC_INTEL_SST_ACPI=m
-CONFIG_SND_SOC_INTEL_SST=m
-CONFIG_SND_SOC_INTEL_SST_FIRMWARE=m
-CONFIG_SND_SOC_INTEL_HASWELL=m
-CONFIG_SND_SST_ATOM_HIFI2_PLATFORM=m
-CONFIG_SND_SST_ATOM_HIFI2_PLATFORM_PCI=m
-CONFIG_SND_SST_ATOM_HIFI2_PLATFORM_ACPI=m
-# CONFIG_SND_SOC_INTEL_SKYLAKE is not set
-CONFIG_SND_SOC_INTEL_SKL=m
-CONFIG_SND_SOC_INTEL_APL=m
-CONFIG_SND_SOC_INTEL_KBL=m
-CONFIG_SND_SOC_INTEL_GLK=m
-# CONFIG_SND_SOC_INTEL_CNL is not set
-# CONFIG_SND_SOC_INTEL_CFL is not set
-# CONFIG_SND_SOC_INTEL_CML_H is not set
-# CONFIG_SND_SOC_INTEL_CML_LP is not set
-CONFIG_SND_SOC_INTEL_SKYLAKE_FAMILY=m
-CONFIG_SND_SOC_INTEL_SKYLAKE_SSP_CLK=m
-# CONFIG_SND_SOC_INTEL_SKYLAKE_HDAUDIO_CODEC is not set
-CONFIG_SND_SOC_INTEL_SKYLAKE_COMMON=m
-CONFIG_SND_SOC_ACPI_INTEL_MATCH=m
-CONFIG_SND_SOC_INTEL_MACH=y
-CONFIG_SND_SOC_INTEL_HASWELL_MACH=m
-CONFIG_SND_SOC_INTEL_BDW_RT5677_MACH=m
-CONFIG_SND_SOC_INTEL_BROADWELL_MACH=m
-CONFIG_SND_SOC_INTEL_BYTCR_RT5640_MACH=m
-CONFIG_SND_SOC_INTEL_BYTCR_RT5651_MACH=m
-CONFIG_SND_SOC_INTEL_CHT_BSW_RT5672_MACH=m
-CONFIG_SND_SOC_INTEL_CHT_BSW_RT5645_MACH=m
-CONFIG_SND_SOC_INTEL_CHT_BSW_MAX98090_TI_MACH=m
-CONFIG_SND_SOC_INTEL_CHT_BSW_NAU8824_MACH=m
-CONFIG_SND_SOC_INTEL_BYT_CHT_CX2072X_MACH=m
-CONFIG_SND_SOC_INTEL_BYT_CHT_DA7213_MACH=m
-CONFIG_SND_SOC_INTEL_BYT_CHT_ES8316_MACH=m
-# CONFIG_SND_SOC_INTEL_BYT_CHT_NOCODEC_MACH is not set
-CONFIG_SND_SOC_INTEL_SKL_RT286_MACH=m
-CONFIG_SND_SOC_INTEL_SKL_NAU88L25_SSM4567_MACH=m
-CONFIG_SND_SOC_INTEL_SKL_NAU88L25_MAX98357A_MACH=m
-CONFIG_SND_SOC_INTEL_DA7219_MAX98357A_GENERIC=m
-CONFIG_SND_SOC_INTEL_BXT_DA7219_MAX98357A_MACH=m
-CONFIG_SND_SOC_INTEL_BXT_RT298_MACH=m
-CONFIG_SND_SOC_INTEL_KBL_RT5663_MAX98927_MACH=m
-CONFIG_SND_SOC_INTEL_KBL_RT5663_RT5514_MAX98927_MACH=m
-CONFIG_SND_SOC_INTEL_KBL_DA7219_MAX98357A_MACH=m
-CONFIG_SND_SOC_INTEL_KBL_DA7219_MAX98927_MACH=m
-CONFIG_SND_SOC_INTEL_KBL_RT5660_MACH=m
-CONFIG_SND_SOC_INTEL_GLK_RT5682_MAX98357A_MACH=m
-CONFIG_SND_SOC_INTEL_SKL_HDA_DSP_GENERIC_MACH=m
-CONFIG_SND_SOC_INTEL_SOF_RT5682_MACH=m
-CONFIG_SND_SOC_INTEL_CML_LP_DA7219_MAX98357A_MACH=m
-CONFIG_SND_SOC_MTK_BTCVSD=m
-CONFIG_SND_SOC_SOF_TOPLEVEL=y
-CONFIG_SND_SOC_SOF_PCI=m
-CONFIG_SND_SOC_SOF_ACPI=m
-CONFIG_SND_SOC_SOF_OF=m
-CONFIG_SND_SOC_SOF_OPTIONS=m
-# CONFIG_SND_SOC_SOF_NOCODEC_SUPPORT is not set
-# CONFIG_SND_SOC_SOF_STRICT_ABI_CHECKS is not set
-# CONFIG_SND_SOC_SOF_DEBUG is not set
-CONFIG_SND_SOC_SOF=m
-CONFIG_SND_SOC_SOF_PROBE_WORK_QUEUE=y
-CONFIG_SND_SOC_SOF_INTEL_TOPLEVEL=y
-CONFIG_SND_SOC_SOF_INTEL_ACPI=m
-CONFIG_SND_SOC_SOF_INTEL_PCI=m
-CONFIG_SND_SOC_SOF_INTEL_HIFI_EP_IPC=m
-CONFIG_SND_SOC_SOF_INTEL_ATOM_HIFI_EP=m
-CONFIG_SND_SOC_SOF_INTEL_COMMON=m
-# CONFIG_SND_SOC_SOF_BAYTRAIL_SUPPORT is not set
-CONFIG_SND_SOC_SOF_MERRIFIELD_SUPPORT=y
-CONFIG_SND_SOC_SOF_MERRIFIELD=m
-CONFIG_SND_SOC_SOF_APOLLOLAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_APOLLOLAKE=m
-CONFIG_SND_SOC_SOF_GEMINILAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_GEMINILAKE=m
-CONFIG_SND_SOC_SOF_CANNONLAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_CANNONLAKE=m
-CONFIG_SND_SOC_SOF_COFFEELAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_COFFEELAKE=m
-CONFIG_SND_SOC_SOF_ICELAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_ICELAKE=m
-CONFIG_SND_SOC_SOF_COMETLAKE_LP=m
-CONFIG_SND_SOC_SOF_COMETLAKE_LP_SUPPORT=y
-CONFIG_SND_SOC_SOF_COMETLAKE_H=m
-CONFIG_SND_SOC_SOF_COMETLAKE_H_SUPPORT=y
-CONFIG_SND_SOC_SOF_TIGERLAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_TIGERLAKE=m
-CONFIG_SND_SOC_SOF_ELKHARTLAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_ELKHARTLAKE=m
-CONFIG_SND_SOC_SOF_HDA_COMMON=m
-CONFIG_SND_SOC_SOF_HDA_LINK=y
-CONFIG_SND_SOC_SOF_HDA_AUDIO_CODEC=y
-# CONFIG_SND_SOC_SOF_HDA_ALWAYS_ENABLE_DMI_L1 is not set
-CONFIG_SND_SOC_SOF_HDA_LINK_BASELINE=m
-CONFIG_SND_SOC_SOF_HDA=m
-CONFIG_SND_SOC_SOF_XTENSA=m
-
-#
-# STMicroelectronics STM32 SOC audio support
-#
-# end of STMicroelectronics STM32 SOC audio support
-
-CONFIG_SND_SOC_XILINX_I2S=m
-CONFIG_SND_SOC_XILINX_AUDIO_FORMATTER=m
-CONFIG_SND_SOC_XILINX_SPDIF=m
-CONFIG_SND_SOC_XTFPGA_I2S=m
-CONFIG_ZX_TDM=m
-CONFIG_SND_SOC_I2C_AND_SPI=m
-
-#
-# CODEC drivers
-#
-CONFIG_SND_SOC_AC97_CODEC=m
-CONFIG_SND_SOC_ADAU_UTILS=m
-CONFIG_SND_SOC_ADAU1701=m
-CONFIG_SND_SOC_ADAU17X1=m
-CONFIG_SND_SOC_ADAU1761=m
-CONFIG_SND_SOC_ADAU1761_I2C=m
-CONFIG_SND_SOC_ADAU1761_SPI=m
-CONFIG_SND_SOC_ADAU7002=m
-CONFIG_SND_SOC_AK4104=m
-CONFIG_SND_SOC_AK4118=m
-CONFIG_SND_SOC_AK4458=m
-CONFIG_SND_SOC_AK4554=m
-CONFIG_SND_SOC_AK4613=m
-CONFIG_SND_SOC_AK4642=m
-CONFIG_SND_SOC_AK5386=m
-CONFIG_SND_SOC_AK5558=m
-CONFIG_SND_SOC_ALC5623=m
-CONFIG_SND_SOC_BD28623=m
-# CONFIG_SND_SOC_BT_SCO is not set
-CONFIG_SND_SOC_CPCAP=m
-CONFIG_SND_SOC_CROS_EC_CODEC=m
-CONFIG_SND_SOC_CS35L32=m
-CONFIG_SND_SOC_CS35L33=m
-CONFIG_SND_SOC_CS35L34=m
-CONFIG_SND_SOC_CS35L35=m
-CONFIG_SND_SOC_CS35L36=m
-CONFIG_SND_SOC_CS42L42=m
-CONFIG_SND_SOC_CS42L51=m
-CONFIG_SND_SOC_CS42L51_I2C=m
-CONFIG_SND_SOC_CS42L52=m
-CONFIG_SND_SOC_CS42L56=m
-CONFIG_SND_SOC_CS42L73=m
-CONFIG_SND_SOC_CS4265=m
-CONFIG_SND_SOC_CS4270=m
-CONFIG_SND_SOC_CS4271=m
-CONFIG_SND_SOC_CS4271_I2C=m
-CONFIG_SND_SOC_CS4271_SPI=m
-CONFIG_SND_SOC_CS42XX8=m
-CONFIG_SND_SOC_CS42XX8_I2C=m
-CONFIG_SND_SOC_CS43130=m
-CONFIG_SND_SOC_CS4341=m
-CONFIG_SND_SOC_CS4349=m
-CONFIG_SND_SOC_CS53L30=m
-CONFIG_SND_SOC_CX2072X=m
-CONFIG_SND_SOC_DA7213=m
-CONFIG_SND_SOC_DA7219=m
-CONFIG_SND_SOC_DMIC=m
-CONFIG_SND_SOC_HDMI_CODEC=m
-CONFIG_SND_SOC_ES7134=m
-CONFIG_SND_SOC_ES7241=m
-CONFIG_SND_SOC_ES8316=m
-CONFIG_SND_SOC_ES8328=m
-CONFIG_SND_SOC_ES8328_I2C=m
-CONFIG_SND_SOC_ES8328_SPI=m
-CONFIG_SND_SOC_GTM601=m
-CONFIG_SND_SOC_HDAC_HDMI=m
-CONFIG_SND_SOC_HDAC_HDA=m
-CONFIG_SND_SOC_INNO_RK3036=m
-CONFIG_SND_SOC_LOCHNAGAR_SC=m
-CONFIG_SND_SOC_MAX98088=m
-CONFIG_SND_SOC_MAX98090=m
-CONFIG_SND_SOC_MAX98357A=m
-CONFIG_SND_SOC_MAX98504=m
-CONFIG_SND_SOC_MAX9867=m
-CONFIG_SND_SOC_MAX98927=m
-CONFIG_SND_SOC_MAX98373=m
-CONFIG_SND_SOC_MAX9860=m
-CONFIG_SND_SOC_MSM8916_WCD_ANALOG=m
-CONFIG_SND_SOC_MSM8916_WCD_DIGITAL=m
-CONFIG_SND_SOC_PCM1681=m
-CONFIG_SND_SOC_PCM1789=m
-CONFIG_SND_SOC_PCM1789_I2C=m
-CONFIG_SND_SOC_PCM179X=m
-CONFIG_SND_SOC_PCM179X_I2C=m
-CONFIG_SND_SOC_PCM179X_SPI=m
-CONFIG_SND_SOC_PCM186X=m
-CONFIG_SND_SOC_PCM186X_I2C=m
-CONFIG_SND_SOC_PCM186X_SPI=m
-CONFIG_SND_SOC_PCM3060=m
-CONFIG_SND_SOC_PCM3060_I2C=m
-CONFIG_SND_SOC_PCM3060_SPI=m
-CONFIG_SND_SOC_PCM3168A=m
-CONFIG_SND_SOC_PCM3168A_I2C=m
-CONFIG_SND_SOC_PCM3168A_SPI=m
-CONFIG_SND_SOC_PCM512x=m
-CONFIG_SND_SOC_PCM512x_I2C=m
-CONFIG_SND_SOC_PCM512x_SPI=m
-CONFIG_SND_SOC_RK3328=m
-CONFIG_SND_SOC_RL6231=m
-CONFIG_SND_SOC_RL6347A=m
-CONFIG_SND_SOC_RT286=m
-CONFIG_SND_SOC_RT298=m
-CONFIG_SND_SOC_RT5514=m
-CONFIG_SND_SOC_RT5514_SPI=m
-CONFIG_SND_SOC_RT5616=m
-CONFIG_SND_SOC_RT5631=m
-CONFIG_SND_SOC_RT5640=m
-CONFIG_SND_SOC_RT5645=m
-CONFIG_SND_SOC_RT5651=m
-CONFIG_SND_SOC_RT5660=m
-CONFIG_SND_SOC_RT5663=m
-CONFIG_SND_SOC_RT5670=m
-CONFIG_SND_SOC_RT5677=m
-CONFIG_SND_SOC_RT5677_SPI=m
-CONFIG_SND_SOC_RT5682=m
-CONFIG_SND_SOC_SGTL5000=m
-CONFIG_SND_SOC_SI476X=m
-CONFIG_SND_SOC_SIGMADSP=m
-CONFIG_SND_SOC_SIGMADSP_I2C=m
-CONFIG_SND_SOC_SIGMADSP_REGMAP=m
-CONFIG_SND_SOC_SIMPLE_AMPLIFIER=m
-CONFIG_SND_SOC_SIRF_AUDIO_CODEC=m
-CONFIG_SND_SOC_SPDIF=m
-CONFIG_SND_SOC_SSM2305=m
-CONFIG_SND_SOC_SSM2602=m
-CONFIG_SND_SOC_SSM2602_SPI=m
-CONFIG_SND_SOC_SSM2602_I2C=m
-CONFIG_SND_SOC_SSM4567=m
-CONFIG_SND_SOC_STA32X=m
-CONFIG_SND_SOC_STA350=m
-CONFIG_SND_SOC_STI_SAS=m
-CONFIG_SND_SOC_TAS2552=m
-CONFIG_SND_SOC_TAS5086=m
-CONFIG_SND_SOC_TAS571X=m
-CONFIG_SND_SOC_TAS5720=m
-CONFIG_SND_SOC_TAS6424=m
-CONFIG_SND_SOC_TDA7419=m
-CONFIG_SND_SOC_TFA9879=m
-CONFIG_SND_SOC_TLV320AIC23=m
-CONFIG_SND_SOC_TLV320AIC23_I2C=m
-CONFIG_SND_SOC_TLV320AIC23_SPI=m
-CONFIG_SND_SOC_TLV320AIC31XX=m
-CONFIG_SND_SOC_TLV320AIC32X4=m
-CONFIG_SND_SOC_TLV320AIC32X4_I2C=m
-CONFIG_SND_SOC_TLV320AIC32X4_SPI=m
-CONFIG_SND_SOC_TLV320AIC3X=m
-CONFIG_SND_SOC_TS3A227E=m
-CONFIG_SND_SOC_TSCS42XX=m
-CONFIG_SND_SOC_TSCS454=m
-CONFIG_SND_SOC_UDA1334=m
-CONFIG_SND_SOC_WCD9335=m
-CONFIG_SND_SOC_WM8510=m
-CONFIG_SND_SOC_WM8523=m
-CONFIG_SND_SOC_WM8524=m
-CONFIG_SND_SOC_WM8580=m
-CONFIG_SND_SOC_WM8711=m
-CONFIG_SND_SOC_WM8728=m
-CONFIG_SND_SOC_WM8731=m
-CONFIG_SND_SOC_WM8737=m
-CONFIG_SND_SOC_WM8741=m
-CONFIG_SND_SOC_WM8750=m
-CONFIG_SND_SOC_WM8753=m
-CONFIG_SND_SOC_WM8770=m
-CONFIG_SND_SOC_WM8776=m
-CONFIG_SND_SOC_WM8782=m
-CONFIG_SND_SOC_WM8804=m
-CONFIG_SND_SOC_WM8804_I2C=m
-CONFIG_SND_SOC_WM8804_SPI=m
-CONFIG_SND_SOC_WM8903=m
-CONFIG_SND_SOC_WM8904=m
-CONFIG_SND_SOC_WM8960=m
-CONFIG_SND_SOC_WM8962=m
-CONFIG_SND_SOC_WM8974=m
-CONFIG_SND_SOC_WM8978=m
-CONFIG_SND_SOC_WM8985=m
-CONFIG_SND_SOC_ZX_AUD96P22=m
-CONFIG_SND_SOC_MAX9759=m
-CONFIG_SND_SOC_MT6351=m
-CONFIG_SND_SOC_MT6358=m
-CONFIG_SND_SOC_NAU8540=m
-CONFIG_SND_SOC_NAU8810=m
-CONFIG_SND_SOC_NAU8822=m
-CONFIG_SND_SOC_NAU8824=m
-CONFIG_SND_SOC_NAU8825=m
-CONFIG_SND_SOC_TPA6130A2=m
-# end of CODEC drivers
-
-CONFIG_SND_SIMPLE_CARD_UTILS=m
-CONFIG_SND_SIMPLE_CARD=m
-CONFIG_SND_AUDIO_GRAPH_CARD=m
-CONFIG_SND_X86=y
-CONFIG_HDMI_LPE_AUDIO=m
-CONFIG_SND_SYNTH_EMUX=m
-CONFIG_SND_XEN_FRONTEND=m
-CONFIG_AC97_BUS=m
-
-#
-# HID support
-#
-CONFIG_HID=m
-CONFIG_HID_BATTERY_STRENGTH=y
-CONFIG_HIDRAW=y
-CONFIG_UHID=m
-CONFIG_HID_GENERIC=m
-
-#
-# Special HID drivers
-#
-CONFIG_HID_A4TECH=m
-CONFIG_HID_ACCUTOUCH=m
-CONFIG_HID_ACRUX=m
-CONFIG_HID_ACRUX_FF=y
-CONFIG_HID_APPLE=m
-CONFIG_HID_APPLEIR=m
-CONFIG_HID_ASUS=m
-CONFIG_HID_AUREAL=m
-CONFIG_HID_BELKIN=m
-CONFIG_HID_BETOP_FF=m
-CONFIG_HID_BIGBEN_FF=m
-CONFIG_HID_CHERRY=m
-CONFIG_HID_CHICONY=m
-CONFIG_HID_CORSAIR=m
-CONFIG_HID_COUGAR=m
-CONFIG_HID_MACALLY=m
-CONFIG_HID_PRODIKEYS=m
-CONFIG_HID_CMEDIA=m
-CONFIG_HID_CP2112=m
-CONFIG_HID_CREATIVE_SB0540=m
-CONFIG_HID_CYPRESS=m
-CONFIG_HID_DRAGONRISE=m
-CONFIG_DRAGONRISE_FF=y
-CONFIG_HID_EMS_FF=m
-CONFIG_HID_ELAN=m
-CONFIG_HID_ELECOM=m
-CONFIG_HID_ELO=m
-CONFIG_HID_EZKEY=m
-CONFIG_HID_GEMBIRD=m
-CONFIG_HID_GFRM=m
-CONFIG_HID_HOLTEK=m
-CONFIG_HOLTEK_FF=y
-CONFIG_HID_GOOGLE_HAMMER=m
-CONFIG_HID_GT683R=m
-CONFIG_HID_KEYTOUCH=m
-CONFIG_HID_KYE=m
-CONFIG_HID_UCLOGIC=m
-CONFIG_HID_WALTOP=m
-CONFIG_HID_VIEWSONIC=m
-CONFIG_HID_GYRATION=m
-CONFIG_HID_ICADE=m
-CONFIG_HID_ITE=m
-CONFIG_HID_JABRA=m
-CONFIG_HID_TWINHAN=m
-CONFIG_HID_KENSINGTON=m
-CONFIG_HID_LCPOWER=m
-CONFIG_HID_LED=m
-CONFIG_HID_LENOVO=m
-CONFIG_HID_LOGITECH=m
-CONFIG_HID_LOGITECH_DJ=m
-CONFIG_HID_LOGITECH_HIDPP=m
-CONFIG_LOGITECH_FF=y
-CONFIG_LOGIRUMBLEPAD2_FF=y
-CONFIG_LOGIG940_FF=y
-CONFIG_LOGIWHEELS_FF=y
-CONFIG_HID_MAGICMOUSE=m
-CONFIG_HID_MALTRON=m
-CONFIG_HID_MAYFLASH=m
-CONFIG_HID_REDRAGON=m
-CONFIG_HID_MICROSOFT=m
-CONFIG_HID_MONTEREY=m
-CONFIG_HID_MULTITOUCH=m
-CONFIG_HID_NTI=m
-CONFIG_HID_NTRIG=m
-CONFIG_HID_ORTEK=m
-CONFIG_HID_PANTHERLORD=m
-CONFIG_PANTHERLORD_FF=y
-CONFIG_HID_PENMOUNT=m
-CONFIG_HID_PETALYNX=m
-CONFIG_HID_PICOLCD=m
-CONFIG_HID_PICOLCD_FB=y
-CONFIG_HID_PICOLCD_BACKLIGHT=y
-CONFIG_HID_PICOLCD_LCD=y
-CONFIG_HID_PICOLCD_LEDS=y
-CONFIG_HID_PICOLCD_CIR=y
-CONFIG_HID_PLANTRONICS=m
-CONFIG_HID_PRIMAX=m
-CONFIG_HID_RETRODE=m
-CONFIG_HID_ROCCAT=m
-CONFIG_HID_SAITEK=m
-CONFIG_HID_SAMSUNG=m
-CONFIG_HID_SONY=m
-CONFIG_SONY_FF=y
-CONFIG_HID_SPEEDLINK=m
-CONFIG_HID_STEAM=m
-CONFIG_HID_STEELSERIES=m
-CONFIG_HID_SUNPLUS=m
-CONFIG_HID_RMI=m
-CONFIG_HID_GREENASIA=m
-CONFIG_GREENASIA_FF=y
-CONFIG_HID_HYPERV_MOUSE=m
-CONFIG_HID_SMARTJOYPLUS=m
-CONFIG_SMARTJOYPLUS_FF=y
-CONFIG_HID_TIVO=m
-CONFIG_HID_TOPSEED=m
-CONFIG_HID_THINGM=m
-CONFIG_HID_THRUSTMASTER=m
-CONFIG_THRUSTMASTER_FF=y
-CONFIG_HID_UDRAW_PS3=m
-CONFIG_HID_U2FZERO=m
-CONFIG_HID_WACOM=m
-CONFIG_HID_WIIMOTE=m
-CONFIG_HID_XINMO=m
-CONFIG_HID_ZEROPLUS=m
-CONFIG_ZEROPLUS_FF=y
-CONFIG_HID_ZYDACRON=m
-CONFIG_HID_SENSOR_HUB=m
-# CONFIG_HID_SENSOR_CUSTOM_SENSOR is not set
-CONFIG_HID_ALPS=m
-# end of Special HID drivers
-
-#
-# USB HID support
-#
-CONFIG_USB_HID=m
-CONFIG_HID_PID=y
-CONFIG_USB_HIDDEV=y
-
-#
-# USB HID Boot Protocol drivers
-#
-# CONFIG_USB_KBD is not set
-# CONFIG_USB_MOUSE is not set
-# end of USB HID Boot Protocol drivers
-# end of USB HID support
-
-#
-# I2C HID support
-#
-CONFIG_I2C_HID=m
-# end of I2C HID support
-
-#
-# Intel ISH HID support
-#
-CONFIG_INTEL_ISH_HID=m
-CONFIG_INTEL_ISH_FIRMWARE_DOWNLOADER=m
-# end of Intel ISH HID support
-# end of HID support
-
-CONFIG_USB_OHCI_LITTLE_ENDIAN=y
-CONFIG_USB_SUPPORT=y
-CONFIG_USB_COMMON=y
-CONFIG_USB_LED_TRIG=y
-CONFIG_USB_ULPI_BUS=m
-CONFIG_USB_CONN_GPIO=m
-CONFIG_USB_ARCH_HAS_HCD=y
-CONFIG_USB=y
-CONFIG_USB_PCI=y
-CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
-
-#
-# Miscellaneous USB options
-#
-CONFIG_USB_DEFAULT_PERSIST=y
-CONFIG_USB_DYNAMIC_MINORS=y
-# CONFIG_USB_OTG is not set
-# CONFIG_USB_OTG_WHITELIST is not set
-# CONFIG_USB_OTG_BLACKLIST_HUB is not set
-CONFIG_USB_LEDS_TRIGGER_USBPORT=m
-CONFIG_USB_AUTOSUSPEND_DELAY=2
-CONFIG_USB_MON=m
-
-#
-# USB Host Controller Drivers
-#
-CONFIG_USB_C67X00_HCD=m
-CONFIG_USB_XHCI_HCD=m
-# CONFIG_USB_XHCI_DBGCAP is not set
-CONFIG_USB_XHCI_PCI=m
-CONFIG_USB_XHCI_PLATFORM=m
-CONFIG_USB_EHCI_HCD=m
-CONFIG_USB_EHCI_ROOT_HUB_TT=y
-CONFIG_USB_EHCI_TT_NEWSCHED=y
-CONFIG_USB_EHCI_PCI=m
-CONFIG_USB_EHCI_FSL=m
-CONFIG_USB_EHCI_HCD_PLATFORM=m
-CONFIG_USB_OXU210HP_HCD=m
-CONFIG_USB_ISP116X_HCD=m
-CONFIG_USB_FOTG210_HCD=m
-CONFIG_USB_MAX3421_HCD=m
-CONFIG_USB_OHCI_HCD=m
-CONFIG_USB_OHCI_HCD_PCI=m
-# CONFIG_USB_OHCI_HCD_SSB is not set
-CONFIG_USB_OHCI_HCD_PLATFORM=m
-CONFIG_USB_UHCI_HCD=m
-CONFIG_USB_U132_HCD=m
-CONFIG_USB_SL811_HCD=m
-# CONFIG_USB_SL811_HCD_ISO is not set
-CONFIG_USB_SL811_CS=m
-CONFIG_USB_R8A66597_HCD=m
-CONFIG_USB_HCD_BCMA=m
-CONFIG_USB_HCD_SSB=m
-# CONFIG_USB_HCD_TEST_MODE is not set
-
-#
-# USB Device Class drivers
-#
-CONFIG_USB_ACM=m
-CONFIG_USB_PRINTER=m
-CONFIG_USB_WDM=m
-CONFIG_USB_TMC=m
-
-#
-# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may
-#
-
-#
-# also be needed; see USB_STORAGE Help for more info
-#
-CONFIG_USB_STORAGE=m
-# CONFIG_USB_STORAGE_DEBUG is not set
-CONFIG_USB_STORAGE_REALTEK=m
-CONFIG_REALTEK_AUTOPM=y
-CONFIG_USB_STORAGE_DATAFAB=m
-CONFIG_USB_STORAGE_FREECOM=m
-CONFIG_USB_STORAGE_ISD200=m
-CONFIG_USB_STORAGE_USBAT=m
-CONFIG_USB_STORAGE_SDDR09=m
-CONFIG_USB_STORAGE_SDDR55=m
-CONFIG_USB_STORAGE_JUMPSHOT=m
-CONFIG_USB_STORAGE_ALAUDA=m
-CONFIG_USB_STORAGE_ONETOUCH=m
-CONFIG_USB_STORAGE_KARMA=m
-CONFIG_USB_STORAGE_CYPRESS_ATACB=m
-CONFIG_USB_STORAGE_ENE_UB6250=m
-CONFIG_USB_UAS=m
-
-#
-# USB Imaging devices
-#
-CONFIG_USB_MDC800=m
-CONFIG_USB_MICROTEK=m
-CONFIG_USBIP_CORE=m
-CONFIG_USBIP_VHCI_HCD=m
-CONFIG_USBIP_VHCI_HC_PORTS=8
-CONFIG_USBIP_VHCI_NR_HCS=1
-CONFIG_USBIP_HOST=m
-CONFIG_USBIP_VUDC=m
-# CONFIG_USBIP_DEBUG is not set
-CONFIG_USB_CDNS3=m
-CONFIG_USB_CDNS3_GADGET=y
-CONFIG_USB_CDNS3_HOST=y
-CONFIG_USB_CDNS3_PCI_WRAP=m
-CONFIG_USB_MUSB_HDRC=m
-# CONFIG_USB_MUSB_HOST is not set
-# CONFIG_USB_MUSB_GADGET is not set
-CONFIG_USB_MUSB_DUAL_ROLE=y
-
-#
-# Platform Glue Layer
-#
-
-#
-# MUSB DMA mode
-#
-# CONFIG_MUSB_PIO_ONLY is not set
-CONFIG_USB_DWC3=m
-CONFIG_USB_DWC3_ULPI=y
-# CONFIG_USB_DWC3_HOST is not set
-# CONFIG_USB_DWC3_GADGET is not set
-CONFIG_USB_DWC3_DUAL_ROLE=y
-
-#
-# Platform Glue Driver Support
-#
-CONFIG_USB_DWC3_PCI=m
-CONFIG_USB_DWC3_HAPS=m
-CONFIG_USB_DWC3_OF_SIMPLE=m
-CONFIG_USB_DWC2=m
-# CONFIG_USB_DWC2_HOST is not set
-
-#
-# Gadget/Dual-role mode requires USB Gadget support to be enabled
-#
-# CONFIG_USB_DWC2_PERIPHERAL is not set
-CONFIG_USB_DWC2_DUAL_ROLE=y
-CONFIG_USB_DWC2_PCI=m
-# CONFIG_USB_DWC2_DEBUG is not set
-# CONFIG_USB_DWC2_TRACK_MISSED_SOFS is not set
-CONFIG_USB_CHIPIDEA=m
-CONFIG_USB_CHIPIDEA_OF=m
-CONFIG_USB_CHIPIDEA_PCI=m
-CONFIG_USB_CHIPIDEA_UDC=y
-CONFIG_USB_CHIPIDEA_HOST=y
-CONFIG_USB_ISP1760=m
-CONFIG_USB_ISP1760_HCD=y
-CONFIG_USB_ISP1761_UDC=y
-# CONFIG_USB_ISP1760_HOST_ROLE is not set
-# CONFIG_USB_ISP1760_GADGET_ROLE is not set
-CONFIG_USB_ISP1760_DUAL_ROLE=y
-
-#
-# USB port drivers
-#
-CONFIG_USB_USS720=m
-CONFIG_USB_SERIAL=y
-CONFIG_USB_SERIAL_CONSOLE=y
-CONFIG_USB_SERIAL_GENERIC=y
-CONFIG_USB_SERIAL_SIMPLE=m
-CONFIG_USB_SERIAL_AIRCABLE=m
-CONFIG_USB_SERIAL_ARK3116=m
-CONFIG_USB_SERIAL_BELKIN=m
-CONFIG_USB_SERIAL_CH341=m
-CONFIG_USB_SERIAL_WHITEHEAT=m
-CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m
-CONFIG_USB_SERIAL_CP210X=m
-CONFIG_USB_SERIAL_CYPRESS_M8=m
-CONFIG_USB_SERIAL_EMPEG=m
-CONFIG_USB_SERIAL_FTDI_SIO=m
-CONFIG_USB_SERIAL_VISOR=m
-CONFIG_USB_SERIAL_IPAQ=m
-CONFIG_USB_SERIAL_IR=m
-CONFIG_USB_SERIAL_EDGEPORT=m
-CONFIG_USB_SERIAL_EDGEPORT_TI=m
-CONFIG_USB_SERIAL_F81232=m
-CONFIG_USB_SERIAL_F8153X=m
-CONFIG_USB_SERIAL_GARMIN=m
-CONFIG_USB_SERIAL_IPW=m
-CONFIG_USB_SERIAL_IUU=m
-CONFIG_USB_SERIAL_KEYSPAN_PDA=m
-CONFIG_USB_SERIAL_KEYSPAN=m
-CONFIG_USB_SERIAL_KLSI=m
-CONFIG_USB_SERIAL_KOBIL_SCT=m
-CONFIG_USB_SERIAL_MCT_U232=m
-CONFIG_USB_SERIAL_METRO=m
-CONFIG_USB_SERIAL_MOS7720=m
-CONFIG_USB_SERIAL_MOS7715_PARPORT=y
-CONFIG_USB_SERIAL_MOS7840=m
-CONFIG_USB_SERIAL_MXUPORT=m
-CONFIG_USB_SERIAL_NAVMAN=m
-CONFIG_USB_SERIAL_PL2303=m
-CONFIG_USB_SERIAL_OTI6858=m
-CONFIG_USB_SERIAL_QCAUX=m
-CONFIG_USB_SERIAL_QUALCOMM=m
-CONFIG_USB_SERIAL_SPCP8X5=m
-CONFIG_USB_SERIAL_SAFE=m
-# CONFIG_USB_SERIAL_SAFE_PADDED is not set
-CONFIG_USB_SERIAL_SIERRAWIRELESS=m
-CONFIG_USB_SERIAL_SYMBOL=m
-CONFIG_USB_SERIAL_TI=m
-CONFIG_USB_SERIAL_CYBERJACK=m
-CONFIG_USB_SERIAL_XIRCOM=m
-CONFIG_USB_SERIAL_WWAN=m
-CONFIG_USB_SERIAL_OPTION=m
-CONFIG_USB_SERIAL_OMNINET=m
-CONFIG_USB_SERIAL_OPTICON=m
-CONFIG_USB_SERIAL_XSENS_MT=m
-CONFIG_USB_SERIAL_WISHBONE=m
-CONFIG_USB_SERIAL_SSU100=m
-CONFIG_USB_SERIAL_QT2=m
-CONFIG_USB_SERIAL_UPD78F0730=m
-CONFIG_USB_SERIAL_DEBUG=m
-
-#
-# USB Miscellaneous drivers
-#
-CONFIG_USB_EMI62=m
-CONFIG_USB_EMI26=m
-CONFIG_USB_ADUTUX=m
-CONFIG_USB_SEVSEG=m
-CONFIG_USB_LEGOTOWER=m
-CONFIG_USB_LCD=m
-CONFIG_USB_CYPRESS_CY7C63=m
-CONFIG_USB_CYTHERM=m
-CONFIG_USB_IDMOUSE=m
-CONFIG_USB_FTDI_ELAN=m
-CONFIG_USB_APPLEDISPLAY=m
-CONFIG_USB_SISUSBVGA=m
-CONFIG_USB_SISUSBVGA_CON=y
-CONFIG_USB_LD=m
-CONFIG_USB_TRANCEVIBRATOR=m
-CONFIG_USB_IOWARRIOR=m
-CONFIG_USB_TEST=m
-CONFIG_USB_EHSET_TEST_FIXTURE=m
-CONFIG_USB_ISIGHTFW=m
-CONFIG_USB_YUREX=m
-CONFIG_USB_EZUSB_FX2=m
-CONFIG_USB_HUB_USB251XB=m
-CONFIG_USB_HSIC_USB3503=m
-CONFIG_USB_HSIC_USB4604=m
-CONFIG_USB_LINK_LAYER_TEST=m
-CONFIG_USB_CHAOSKEY=m
-CONFIG_USB_ATM=m
-CONFIG_USB_SPEEDTOUCH=m
-CONFIG_USB_CXACRU=m
-CONFIG_USB_UEAGLEATM=m
-CONFIG_USB_XUSBATM=m
-
-#
-# USB Physical Layer drivers
-#
-CONFIG_USB_PHY=y
-CONFIG_NOP_USB_XCEIV=m
-CONFIG_USB_GPIO_VBUS=m
-CONFIG_TAHVO_USB=m
-# CONFIG_TAHVO_USB_HOST_BY_DEFAULT is not set
-CONFIG_USB_ISP1301=m
-# end of USB Physical Layer drivers
-
-CONFIG_USB_GADGET=m
-# CONFIG_USB_GADGET_DEBUG is not set
-# CONFIG_USB_GADGET_DEBUG_FILES is not set
-# CONFIG_USB_GADGET_DEBUG_FS is not set
-CONFIG_USB_GADGET_VBUS_DRAW=2
-CONFIG_USB_GADGET_STORAGE_NUM_BUFFERS=2
-CONFIG_U_SERIAL_CONSOLE=y
-
-#
-# USB Peripheral Controller
-#
-CONFIG_USB_FOTG210_UDC=m
-CONFIG_USB_GR_UDC=m
-CONFIG_USB_R8A66597=m
-CONFIG_USB_PXA27X=m
-CONFIG_USB_MV_UDC=m
-CONFIG_USB_MV_U3D=m
-CONFIG_USB_SNP_CORE=m
-CONFIG_USB_SNP_UDC_PLAT=m
-CONFIG_USB_M66592=m
-CONFIG_USB_BDC_UDC=m
-
-#
-# Platform Support
-#
-CONFIG_USB_BDC_PCI=m
-CONFIG_USB_AMD5536UDC=m
-CONFIG_USB_NET2272=m
-CONFIG_USB_NET2272_DMA=y
-CONFIG_USB_NET2280=m
-CONFIG_USB_GOKU=m
-CONFIG_USB_EG20T=m
-CONFIG_USB_GADGET_XILINX=m
-CONFIG_USB_DUMMY_HCD=m
-# end of USB Peripheral Controller
-
-CONFIG_USB_LIBCOMPOSITE=m
-CONFIG_USB_F_ACM=m
-CONFIG_USB_F_SS_LB=m
-CONFIG_USB_U_SERIAL=m
-CONFIG_USB_U_ETHER=m
-CONFIG_USB_U_AUDIO=m
-CONFIG_USB_F_SERIAL=m
-CONFIG_USB_F_OBEX=m
-CONFIG_USB_F_NCM=m
-CONFIG_USB_F_ECM=m
-CONFIG_USB_F_PHONET=m
-CONFIG_USB_F_EEM=m
-CONFIG_USB_F_SUBSET=m
-CONFIG_USB_F_RNDIS=m
-CONFIG_USB_F_MASS_STORAGE=m
-CONFIG_USB_F_FS=m
-CONFIG_USB_F_UAC1=m
-CONFIG_USB_F_UAC1_LEGACY=m
-CONFIG_USB_F_UAC2=m
-CONFIG_USB_F_UVC=m
-CONFIG_USB_F_MIDI=m
-CONFIG_USB_F_HID=m
-CONFIG_USB_F_PRINTER=m
-CONFIG_USB_F_TCM=m
-CONFIG_USB_CONFIGFS=m
-CONFIG_USB_CONFIGFS_SERIAL=y
-CONFIG_USB_CONFIGFS_ACM=y
-CONFIG_USB_CONFIGFS_OBEX=y
-CONFIG_USB_CONFIGFS_NCM=y
-CONFIG_USB_CONFIGFS_ECM=y
-CONFIG_USB_CONFIGFS_ECM_SUBSET=y
-CONFIG_USB_CONFIGFS_RNDIS=y
-CONFIG_USB_CONFIGFS_EEM=y
-CONFIG_USB_CONFIGFS_PHONET=y
-CONFIG_USB_CONFIGFS_MASS_STORAGE=y
-CONFIG_USB_CONFIGFS_F_LB_SS=y
-CONFIG_USB_CONFIGFS_F_FS=y
-CONFIG_USB_CONFIGFS_F_UAC1=y
-CONFIG_USB_CONFIGFS_F_UAC1_LEGACY=y
-CONFIG_USB_CONFIGFS_F_UAC2=y
-CONFIG_USB_CONFIGFS_F_MIDI=y
-CONFIG_USB_CONFIGFS_F_HID=y
-CONFIG_USB_CONFIGFS_F_UVC=y
-CONFIG_USB_CONFIGFS_F_PRINTER=y
-CONFIG_USB_CONFIGFS_F_TCM=y
-CONFIG_USB_ZERO=m
-CONFIG_USB_AUDIO=m
-# CONFIG_GADGET_UAC1 is not set
-CONFIG_USB_ETH=m
-CONFIG_USB_ETH_RNDIS=y
-CONFIG_USB_ETH_EEM=y
-CONFIG_USB_G_NCM=m
-CONFIG_USB_GADGETFS=m
-CONFIG_USB_FUNCTIONFS=m
-CONFIG_USB_FUNCTIONFS_ETH=y
-CONFIG_USB_FUNCTIONFS_RNDIS=y
-CONFIG_USB_FUNCTIONFS_GENERIC=y
-CONFIG_USB_MASS_STORAGE=m
-CONFIG_USB_GADGET_TARGET=m
-CONFIG_USB_G_SERIAL=m
-CONFIG_USB_MIDI_GADGET=m
-CONFIG_USB_G_PRINTER=m
-CONFIG_USB_CDC_COMPOSITE=m
-CONFIG_USB_G_NOKIA=m
-CONFIG_USB_G_ACM_MS=m
-CONFIG_USB_G_MULTI=m
-CONFIG_USB_G_MULTI_RNDIS=y
-CONFIG_USB_G_MULTI_CDC=y
-CONFIG_USB_G_HID=m
-CONFIG_USB_G_DBGP=m
-# CONFIG_USB_G_DBGP_PRINTK is not set
-CONFIG_USB_G_DBGP_SERIAL=y
-CONFIG_USB_G_WEBCAM=m
-CONFIG_TYPEC=m
-CONFIG_TYPEC_TCPM=m
-CONFIG_TYPEC_TCPCI=m
-CONFIG_TYPEC_RT1711H=m
-CONFIG_TYPEC_FUSB302=m
-CONFIG_TYPEC_WCOVE=m
-CONFIG_TYPEC_UCSI=m
-CONFIG_UCSI_CCG=m
-CONFIG_UCSI_ACPI=m
-CONFIG_TYPEC_TPS6598X=m
-
-#
-# USB Type-C Multiplexer/DeMultiplexer Switch support
-#
-CONFIG_TYPEC_MUX_PI3USB30532=m
-# end of USB Type-C Multiplexer/DeMultiplexer Switch support
-
-#
-# USB Type-C Alternate Mode drivers
-#
-CONFIG_TYPEC_DP_ALTMODE=m
-CONFIG_TYPEC_NVIDIA_ALTMODE=m
-# end of USB Type-C Alternate Mode drivers
-
-CONFIG_USB_ROLE_SWITCH=m
-CONFIG_USB_ROLES_INTEL_XHCI=m
-CONFIG_MMC=m
-CONFIG_PWRSEQ_EMMC=m
-CONFIG_PWRSEQ_SD8787=m
-CONFIG_PWRSEQ_SIMPLE=m
-CONFIG_MMC_BLOCK=m
-CONFIG_MMC_BLOCK_MINORS=8
-CONFIG_SDIO_UART=m
-CONFIG_MMC_TEST=m
-
-#
-# MMC/SD/SDIO Host Controller Drivers
-#
-# CONFIG_MMC_DEBUG is not set
-CONFIG_MMC_SDHCI=m
-CONFIG_MMC_SDHCI_IO_ACCESSORS=y
-CONFIG_MMC_SDHCI_PCI=m
-CONFIG_MMC_RICOH_MMC=y
-CONFIG_MMC_SDHCI_ACPI=m
-CONFIG_MMC_SDHCI_PLTFM=m
-CONFIG_MMC_SDHCI_OF_ARASAN=m
-CONFIG_MMC_SDHCI_OF_ASPEED=m
-CONFIG_MMC_SDHCI_OF_AT91=m
-CONFIG_MMC_SDHCI_OF_DWCMSHC=m
-CONFIG_MMC_SDHCI_CADENCE=m
-CONFIG_MMC_SDHCI_F_SDH30=m
-CONFIG_MMC_WBSD=m
-CONFIG_MMC_ALCOR=m
-CONFIG_MMC_TIFM_SD=m
-CONFIG_MMC_SPI=m
-CONFIG_MMC_SDRICOH_CS=m
-CONFIG_MMC_CB710=m
-CONFIG_MMC_VIA_SDMMC=m
-CONFIG_MMC_VUB300=m
-CONFIG_MMC_USHC=m
-CONFIG_MMC_USDHI6ROL0=m
-CONFIG_MMC_REALTEK_PCI=m
-CONFIG_MMC_REALTEK_USB=m
-CONFIG_MMC_CQHCI=m
-CONFIG_MMC_TOSHIBA_PCI=m
-CONFIG_MMC_MTK=m
-CONFIG_MMC_SDHCI_XENON=m
-CONFIG_MMC_SDHCI_OMAP=m
-CONFIG_MMC_SDHCI_AM654=m
-CONFIG_MEMSTICK=m
-# CONFIG_MEMSTICK_DEBUG is not set
-
-#
-# MemoryStick drivers
-#
-# CONFIG_MEMSTICK_UNSAFE_RESUME is not set
-CONFIG_MSPRO_BLOCK=m
-CONFIG_MS_BLOCK=m
-
-#
-# MemoryStick Host Controller Drivers
-#
-CONFIG_MEMSTICK_TIFM_MS=m
-CONFIG_MEMSTICK_JMICRON_38X=m
-CONFIG_MEMSTICK_R592=m
-CONFIG_MEMSTICK_REALTEK_PCI=m
-CONFIG_MEMSTICK_REALTEK_USB=m
-CONFIG_NEW_LEDS=y
-CONFIG_LEDS_CLASS=y
-CONFIG_LEDS_CLASS_FLASH=m
-CONFIG_LEDS_BRIGHTNESS_HW_CHANGED=y
-
-#
-# LED drivers
-#
-CONFIG_LEDS_88PM860X=m
-CONFIG_LEDS_AAT1290=m
-CONFIG_LEDS_AN30259A=m
-CONFIG_LEDS_APU=m
-CONFIG_LEDS_AS3645A=m
-CONFIG_LEDS_BCM6328=m
-CONFIG_LEDS_BCM6358=m
-CONFIG_LEDS_CPCAP=m
-CONFIG_LEDS_CR0014114=m
-CONFIG_LEDS_LM3530=m
-CONFIG_LEDS_LM3532=m
-CONFIG_LEDS_LM3533=m
-CONFIG_LEDS_LM3642=m
-CONFIG_LEDS_LM3692X=m
-CONFIG_LEDS_LM3601X=m
-CONFIG_LEDS_MT6323=m
-CONFIG_LEDS_PCA9532=m
-CONFIG_LEDS_PCA9532_GPIO=y
-CONFIG_LEDS_GPIO=m
-CONFIG_LEDS_LP3944=m
-CONFIG_LEDS_LP3952=m
-# CONFIG_LEDS_LP5521 is not set
-# CONFIG_LEDS_LP5523 is not set
-# CONFIG_LEDS_LP5562 is not set
-# CONFIG_LEDS_LP8501 is not set
-CONFIG_LEDS_LP8788=m
-CONFIG_LEDS_LP8860=m
-CONFIG_LEDS_CLEVO_MAIL=m
-CONFIG_LEDS_PCA955X=m
-CONFIG_LEDS_PCA955X_GPIO=y
-CONFIG_LEDS_PCA963X=m
-CONFIG_LEDS_WM831X_STATUS=m
-CONFIG_LEDS_WM8350=m
-CONFIG_LEDS_DA903X=m
-CONFIG_LEDS_DA9052=m
-CONFIG_LEDS_DAC124S085=m
-CONFIG_LEDS_PWM=m
-CONFIG_LEDS_REGULATOR=m
-CONFIG_LEDS_BD2802=m
-CONFIG_LEDS_INTEL_SS4200=m
-CONFIG_LEDS_LT3593=m
-CONFIG_LEDS_ADP5520=m
-CONFIG_LEDS_MC13783=m
-CONFIG_LEDS_TCA6507=m
-CONFIG_LEDS_TLC591XX=m
-CONFIG_LEDS_MAX77650=m
-CONFIG_LEDS_MAX77693=m
-CONFIG_LEDS_MAX8997=m
-CONFIG_LEDS_LM355x=m
-CONFIG_LEDS_MENF21BMC=m
-CONFIG_LEDS_KTD2692=m
-CONFIG_LEDS_IS31FL319X=m
-CONFIG_LEDS_IS31FL32XX=m
-
-#
-# LED driver for blink(1) USB RGB LED is under Special HID drivers (HID_THINGM)
-#
-CONFIG_LEDS_BLINKM=m
-CONFIG_LEDS_SYSCON=y
-CONFIG_LEDS_MLXCPLD=m
-CONFIG_LEDS_MLXREG=m
-CONFIG_LEDS_USER=m
-CONFIG_LEDS_NIC78BX=m
-CONFIG_LEDS_SPI_BYTE=m
-CONFIG_LEDS_TI_LMU_COMMON=m
-CONFIG_LEDS_LM3697=m
-CONFIG_LEDS_LM36274=m
-
-#
-# LED Triggers
-#
-CONFIG_LEDS_TRIGGERS=y
-CONFIG_LEDS_TRIGGER_TIMER=m
-CONFIG_LEDS_TRIGGER_ONESHOT=m
-CONFIG_LEDS_TRIGGER_DISK=y
-CONFIG_LEDS_TRIGGER_MTD=y
-CONFIG_LEDS_TRIGGER_HEARTBEAT=m
-CONFIG_LEDS_TRIGGER_BACKLIGHT=m
-CONFIG_LEDS_TRIGGER_CPU=y
-CONFIG_LEDS_TRIGGER_ACTIVITY=m
-CONFIG_LEDS_TRIGGER_GPIO=m
-CONFIG_LEDS_TRIGGER_DEFAULT_ON=m
-
-#
-# iptables trigger is under Netfilter config (LED target)
-#
-CONFIG_LEDS_TRIGGER_TRANSIENT=m
-CONFIG_LEDS_TRIGGER_CAMERA=m
-CONFIG_LEDS_TRIGGER_PANIC=y
-CONFIG_LEDS_TRIGGER_NETDEV=m
-CONFIG_LEDS_TRIGGER_PATTERN=m
-CONFIG_LEDS_TRIGGER_AUDIO=m
-CONFIG_ACCESSIBILITY=y
-CONFIG_A11Y_BRAILLE_CONSOLE=y
-CONFIG_INFINIBAND=m
-CONFIG_INFINIBAND_USER_MAD=m
-CONFIG_INFINIBAND_USER_ACCESS=m
-# CONFIG_INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI is not set
-CONFIG_INFINIBAND_USER_MEM=y
-CONFIG_INFINIBAND_ON_DEMAND_PAGING=y
-CONFIG_INFINIBAND_ADDR_TRANS=y
-CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS=y
-CONFIG_INFINIBAND_MTHCA=m
-CONFIG_INFINIBAND_MTHCA_DEBUG=y
-CONFIG_INFINIBAND_QIB=m
-CONFIG_INFINIBAND_QIB_DCA=y
-CONFIG_INFINIBAND_CXGB3=m
-CONFIG_INFINIBAND_CXGB4=m
-CONFIG_INFINIBAND_EFA=m
-CONFIG_INFINIBAND_I40IW=m
-CONFIG_MLX4_INFINIBAND=m
-CONFIG_MLX5_INFINIBAND=m
-CONFIG_INFINIBAND_OCRDMA=m
-CONFIG_INFINIBAND_VMWARE_PVRDMA=m
-CONFIG_INFINIBAND_USNIC=m
-CONFIG_INFINIBAND_BNXT_RE=m
-CONFIG_INFINIBAND_HFI1=m
-# CONFIG_HFI1_DEBUG_SDMA_ORDER is not set
-# CONFIG_SDMA_VERBOSITY is not set
-CONFIG_INFINIBAND_QEDR=m
-CONFIG_INFINIBAND_RDMAVT=m
-CONFIG_RDMA_RXE=m
-CONFIG_RDMA_SIW=m
-CONFIG_INFINIBAND_IPOIB=m
-CONFIG_INFINIBAND_IPOIB_CM=y
-CONFIG_INFINIBAND_IPOIB_DEBUG=y
-# CONFIG_INFINIBAND_IPOIB_DEBUG_DATA is not set
-CONFIG_INFINIBAND_SRP=m
-CONFIG_INFINIBAND_SRPT=m
-CONFIG_INFINIBAND_ISER=m
-CONFIG_INFINIBAND_ISERT=m
-CONFIG_INFINIBAND_OPA_VNIC=m
-CONFIG_EDAC_ATOMIC_SCRUB=y
-CONFIG_EDAC_SUPPORT=y
-CONFIG_EDAC=y
-CONFIG_EDAC_LEGACY_SYSFS=y
-# CONFIG_EDAC_DEBUG is not set
-CONFIG_EDAC_DECODE_MCE=m
-CONFIG_EDAC_GHES=y
-CONFIG_EDAC_AMD64=m
-# CONFIG_EDAC_AMD64_ERROR_INJECTION is not set
-CONFIG_EDAC_E752X=m
-CONFIG_EDAC_I82975X=m
-CONFIG_EDAC_I3000=m
-CONFIG_EDAC_I3200=m
-CONFIG_EDAC_IE31200=m
-CONFIG_EDAC_X38=m
-CONFIG_EDAC_I5400=m
-CONFIG_EDAC_I7CORE=m
-CONFIG_EDAC_I5000=m
-CONFIG_EDAC_I5100=m
-CONFIG_EDAC_I7300=m
-CONFIG_EDAC_SBRIDGE=m
-CONFIG_EDAC_SKX=m
-CONFIG_EDAC_I10NM=m
-CONFIG_EDAC_PND2=m
-CONFIG_RTC_LIB=y
-CONFIG_RTC_MC146818_LIB=y
-CONFIG_RTC_CLASS=y
-CONFIG_RTC_HCTOSYS=y
-CONFIG_RTC_HCTOSYS_DEVICE="rtc0"
-CONFIG_RTC_SYSTOHC=y
-CONFIG_RTC_SYSTOHC_DEVICE="rtc0"
-# CONFIG_RTC_DEBUG is not set
-CONFIG_RTC_NVMEM=y
-
-#
-# RTC interfaces
-#
-CONFIG_RTC_INTF_SYSFS=y
-CONFIG_RTC_INTF_PROC=y
-CONFIG_RTC_INTF_DEV=y
-CONFIG_RTC_INTF_DEV_UIE_EMUL=y
-# CONFIG_RTC_DRV_TEST is not set
-
-#
-# I2C RTC drivers
-#
-CONFIG_RTC_DRV_88PM860X=m
-CONFIG_RTC_DRV_88PM80X=m
-CONFIG_RTC_DRV_ABB5ZES3=m
-CONFIG_RTC_DRV_ABEOZ9=m
-CONFIG_RTC_DRV_ABX80X=m
-CONFIG_RTC_DRV_AS3722=m
-CONFIG_RTC_DRV_DS1307=m
-CONFIG_RTC_DRV_DS1307_CENTURY=y
-CONFIG_RTC_DRV_DS1374=m
-CONFIG_RTC_DRV_DS1374_WDT=y
-CONFIG_RTC_DRV_DS1672=m
-CONFIG_RTC_DRV_HYM8563=m
-CONFIG_RTC_DRV_LP8788=m
-CONFIG_RTC_DRV_MAX6900=m
-CONFIG_RTC_DRV_MAX8907=m
-CONFIG_RTC_DRV_MAX8925=m
-CONFIG_RTC_DRV_MAX8998=m
-CONFIG_RTC_DRV_MAX8997=m
-CONFIG_RTC_DRV_MAX77686=m
-CONFIG_RTC_DRV_RK808=m
-CONFIG_RTC_DRV_RS5C372=m
-CONFIG_RTC_DRV_ISL1208=m
-CONFIG_RTC_DRV_ISL12022=m
-CONFIG_RTC_DRV_ISL12026=m
-CONFIG_RTC_DRV_X1205=m
-CONFIG_RTC_DRV_PCF8523=m
-CONFIG_RTC_DRV_PCF85063=m
-CONFIG_RTC_DRV_PCF85363=m
-CONFIG_RTC_DRV_PCF8563=m
-CONFIG_RTC_DRV_PCF8583=m
-CONFIG_RTC_DRV_M41T80=m
-CONFIG_RTC_DRV_M41T80_WDT=y
-CONFIG_RTC_DRV_BD70528=m
-CONFIG_RTC_DRV_BQ32K=m
-CONFIG_RTC_DRV_TWL4030=m
-CONFIG_RTC_DRV_PALMAS=m
-CONFIG_RTC_DRV_TPS6586X=m
-CONFIG_RTC_DRV_TPS65910=m
-CONFIG_RTC_DRV_TPS80031=m
-CONFIG_RTC_DRV_RC5T583=m
-CONFIG_RTC_DRV_S35390A=m
-CONFIG_RTC_DRV_FM3130=m
-CONFIG_RTC_DRV_RX8010=m
-CONFIG_RTC_DRV_RX8581=m
-CONFIG_RTC_DRV_RX8025=m
-CONFIG_RTC_DRV_EM3027=m
-CONFIG_RTC_DRV_RV3028=m
-CONFIG_RTC_DRV_RV8803=m
-CONFIG_RTC_DRV_S5M=m
-CONFIG_RTC_DRV_SD3078=m
-
-#
-# SPI RTC drivers
-#
-CONFIG_RTC_DRV_M41T93=m
-CONFIG_RTC_DRV_M41T94=m
-CONFIG_RTC_DRV_DS1302=m
-CONFIG_RTC_DRV_DS1305=m
-CONFIG_RTC_DRV_DS1343=m
-CONFIG_RTC_DRV_DS1347=m
-CONFIG_RTC_DRV_DS1390=m
-CONFIG_RTC_DRV_MAX6916=m
-CONFIG_RTC_DRV_R9701=m
-CONFIG_RTC_DRV_RX4581=m
-CONFIG_RTC_DRV_RX6110=m
-CONFIG_RTC_DRV_RS5C348=m
-CONFIG_RTC_DRV_MAX6902=m
-CONFIG_RTC_DRV_PCF2123=m
-CONFIG_RTC_DRV_MCP795=m
-CONFIG_RTC_I2C_AND_SPI=y
-
-#
-# SPI and I2C RTC drivers
-#
-CONFIG_RTC_DRV_DS3232=m
-CONFIG_RTC_DRV_DS3232_HWMON=y
-CONFIG_RTC_DRV_PCF2127=m
-CONFIG_RTC_DRV_RV3029C2=m
-CONFIG_RTC_DRV_RV3029_HWMON=y
-
-#
-# Platform RTC drivers
-#
-CONFIG_RTC_DRV_CMOS=y
-CONFIG_RTC_DRV_DS1286=m
-CONFIG_RTC_DRV_DS1511=m
-CONFIG_RTC_DRV_DS1553=m
-CONFIG_RTC_DRV_DS1685_FAMILY=m
-CONFIG_RTC_DRV_DS1685=y
-# CONFIG_RTC_DRV_DS1689 is not set
-# CONFIG_RTC_DRV_DS17285 is not set
-# CONFIG_RTC_DRV_DS17485 is not set
-# CONFIG_RTC_DRV_DS17885 is not set
-CONFIG_RTC_DRV_DS1742=m
-CONFIG_RTC_DRV_DS2404=m
-CONFIG_RTC_DRV_DA9052=m
-CONFIG_RTC_DRV_DA9055=m
-CONFIG_RTC_DRV_DA9063=m
-CONFIG_RTC_DRV_STK17TA8=m
-CONFIG_RTC_DRV_M48T86=m
-CONFIG_RTC_DRV_M48T35=m
-CONFIG_RTC_DRV_M48T59=m
-CONFIG_RTC_DRV_MSM6242=m
-CONFIG_RTC_DRV_BQ4802=m
-CONFIG_RTC_DRV_RP5C01=m
-CONFIG_RTC_DRV_V3020=m
-CONFIG_RTC_DRV_WM831X=m
-CONFIG_RTC_DRV_WM8350=m
-CONFIG_RTC_DRV_PCF50633=m
-CONFIG_RTC_DRV_AB3100=m
-CONFIG_RTC_DRV_ZYNQMP=m
-CONFIG_RTC_DRV_CROS_EC=m
-
-#
-# on-CPU RTC drivers
-#
-CONFIG_RTC_DRV_CADENCE=m
-CONFIG_RTC_DRV_FTRTC010=m
-CONFIG_RTC_DRV_PCAP=m
-CONFIG_RTC_DRV_MC13XXX=m
-CONFIG_RTC_DRV_SNVS=m
-CONFIG_RTC_DRV_MT6397=m
-CONFIG_RTC_DRV_R7301=m
-CONFIG_RTC_DRV_CPCAP=m
-
-#
-# HID Sensor RTC drivers
-#
-CONFIG_RTC_DRV_HID_SENSOR_TIME=m
-CONFIG_RTC_DRV_WILCO_EC=m
-CONFIG_DMADEVICES=y
-# CONFIG_DMADEVICES_DEBUG is not set
-
-#
-# DMA Devices
-#
-CONFIG_DMA_ENGINE=y
-CONFIG_DMA_VIRTUAL_CHANNELS=y
-CONFIG_DMA_ACPI=y
-CONFIG_DMA_OF=y
-CONFIG_ALTERA_MSGDMA=m
-CONFIG_DW_AXI_DMAC=m
-CONFIG_FSL_EDMA=m
-CONFIG_INTEL_IDMA64=m
-CONFIG_INTEL_IOATDMA=m
-CONFIG_INTEL_MIC_X100_DMA=m
-CONFIG_QCOM_HIDMA_MGMT=m
-CONFIG_QCOM_HIDMA=m
-CONFIG_DW_DMAC_CORE=y
-CONFIG_DW_DMAC=y
-CONFIG_DW_DMAC_PCI=y
-CONFIG_DW_EDMA=m
-CONFIG_DW_EDMA_PCIE=m
-CONFIG_HSU_DMA=y
-
-#
-# DMA Clients
-#
-CONFIG_ASYNC_TX_DMA=y
-# CONFIG_DMATEST is not set
-CONFIG_DMA_ENGINE_RAID=y
-
-#
-# DMABUF options
-#
-CONFIG_SYNC_FILE=y
-# CONFIG_SW_SYNC is not set
-CONFIG_UDMABUF=y
-# CONFIG_DMABUF_SELFTESTS is not set
-# end of DMABUF options
-
-CONFIG_DCA=m
-CONFIG_AUXDISPLAY=y
-CONFIG_HD44780=m
-CONFIG_KS0108=m
-CONFIG_KS0108_PORT=0x378
-CONFIG_KS0108_DELAY=2
-CONFIG_CFAG12864B=m
-CONFIG_CFAG12864B_RATE=20
-CONFIG_IMG_ASCII_LCD=m
-CONFIG_HT16K33=m
-CONFIG_PARPORT_PANEL=m
-CONFIG_PANEL_PARPORT=0
-CONFIG_PANEL_PROFILE=5
-# CONFIG_PANEL_CHANGE_MESSAGE is not set
-# CONFIG_CHARLCD_BL_OFF is not set
-# CONFIG_CHARLCD_BL_ON is not set
-CONFIG_CHARLCD_BL_FLASH=y
-CONFIG_PANEL=m
-CONFIG_CHARLCD=m
-CONFIG_UIO=m
-CONFIG_UIO_CIF=m
-CONFIG_UIO_PDRV_GENIRQ=m
-CONFIG_UIO_DMEM_GENIRQ=m
-CONFIG_UIO_AEC=m
-CONFIG_UIO_SERCOS3=m
-CONFIG_UIO_PCI_GENERIC=m
-CONFIG_UIO_NETX=m
-CONFIG_UIO_PRUSS=m
-CONFIG_UIO_MF624=m
-CONFIG_UIO_HV_GENERIC=m
-CONFIG_VFIO_IOMMU_TYPE1=m
-CONFIG_VFIO_VIRQFD=m
-CONFIG_VFIO=m
-# CONFIG_VFIO_NOIOMMU is not set
-CONFIG_VFIO_PCI=m
-CONFIG_VFIO_PCI_VGA=y
-CONFIG_VFIO_PCI_MMAP=y
-CONFIG_VFIO_PCI_INTX=y
-CONFIG_VFIO_PCI_IGD=y
-CONFIG_VFIO_MDEV=m
-CONFIG_VFIO_MDEV_DEVICE=m
-CONFIG_IRQ_BYPASS_MANAGER=m
-CONFIG_VIRT_DRIVERS=y
-CONFIG_VBOXGUEST=m
-CONFIG_VIRTIO=y
-CONFIG_VIRTIO_MENU=y
-CONFIG_VIRTIO_PCI=m
-CONFIG_VIRTIO_PCI_LEGACY=y
-CONFIG_VIRTIO_PMEM=m
-CONFIG_VIRTIO_BALLOON=m
-CONFIG_VIRTIO_INPUT=m
-CONFIG_VIRTIO_MMIO=m
-CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y
-
-#
-# Microsoft Hyper-V guest support
-#
-CONFIG_HYPERV=m
-CONFIG_HYPERV_TIMER=y
-CONFIG_HYPERV_UTILS=m
-CONFIG_HYPERV_BALLOON=m
-# end of Microsoft Hyper-V guest support
-
-#
-# Xen driver support
-#
-CONFIG_XEN_BALLOON=y
-CONFIG_XEN_BALLOON_MEMORY_HOTPLUG=y
-CONFIG_XEN_BALLOON_MEMORY_HOTPLUG_LIMIT=512
-CONFIG_XEN_SCRUB_PAGES_DEFAULT=y
-CONFIG_XEN_DEV_EVTCHN=m
-CONFIG_XEN_BACKEND=y
-CONFIG_XENFS=m
-CONFIG_XEN_COMPAT_XENFS=y
-CONFIG_XEN_SYS_HYPERVISOR=y
-CONFIG_XEN_XENBUS_FRONTEND=y
-CONFIG_XEN_GNTDEV=m
-CONFIG_XEN_GNTDEV_DMABUF=y
-CONFIG_XEN_GRANT_DEV_ALLOC=m
-CONFIG_XEN_GRANT_DMA_ALLOC=y
-CONFIG_SWIOTLB_XEN=y
-CONFIG_XEN_PCIDEV_BACKEND=m
-CONFIG_XEN_PVCALLS_FRONTEND=m
-CONFIG_XEN_PVCALLS_BACKEND=y
-CONFIG_XEN_SCSI_BACKEND=m
-CONFIG_XEN_PRIVCMD=m
-CONFIG_XEN_ACPI_PROCESSOR=m
-CONFIG_XEN_MCE_LOG=y
-CONFIG_XEN_HAVE_PVMMU=y
-CONFIG_XEN_EFI=y
-CONFIG_XEN_AUTO_XLATE=y
-CONFIG_XEN_ACPI=y
-CONFIG_XEN_SYMS=y
-CONFIG_XEN_HAVE_VPMU=y
-CONFIG_XEN_FRONT_PGDIR_SHBUF=m
-# end of Xen driver support
-
-# CONFIG_GREYBUS is not set
-CONFIG_STAGING=y
-CONFIG_PRISM2_USB=m
-CONFIG_COMEDI=m
-# CONFIG_COMEDI_DEBUG is not set
-CONFIG_COMEDI_DEFAULT_BUF_SIZE_KB=2048
-CONFIG_COMEDI_DEFAULT_BUF_MAXSIZE_KB=20480
-CONFIG_COMEDI_MISC_DRIVERS=y
-CONFIG_COMEDI_BOND=m
-CONFIG_COMEDI_TEST=m
-CONFIG_COMEDI_PARPORT=m
-# CONFIG_COMEDI_ISA_DRIVERS is not set
-CONFIG_COMEDI_PCI_DRIVERS=m
-CONFIG_COMEDI_8255_PCI=m
-CONFIG_COMEDI_ADDI_WATCHDOG=m
-CONFIG_COMEDI_ADDI_APCI_1032=m
-CONFIG_COMEDI_ADDI_APCI_1500=m
-CONFIG_COMEDI_ADDI_APCI_1516=m
-CONFIG_COMEDI_ADDI_APCI_1564=m
-CONFIG_COMEDI_ADDI_APCI_16XX=m
-CONFIG_COMEDI_ADDI_APCI_2032=m
-CONFIG_COMEDI_ADDI_APCI_2200=m
-CONFIG_COMEDI_ADDI_APCI_3120=m
-CONFIG_COMEDI_ADDI_APCI_3501=m
-CONFIG_COMEDI_ADDI_APCI_3XXX=m
-CONFIG_COMEDI_ADL_PCI6208=m
-CONFIG_COMEDI_ADL_PCI7X3X=m
-CONFIG_COMEDI_ADL_PCI8164=m
-CONFIG_COMEDI_ADL_PCI9111=m
-CONFIG_COMEDI_ADL_PCI9118=m
-CONFIG_COMEDI_ADV_PCI1710=m
-CONFIG_COMEDI_ADV_PCI1720=m
-CONFIG_COMEDI_ADV_PCI1723=m
-CONFIG_COMEDI_ADV_PCI1724=m
-CONFIG_COMEDI_ADV_PCI1760=m
-CONFIG_COMEDI_ADV_PCI_DIO=m
-CONFIG_COMEDI_AMPLC_DIO200_PCI=m
-CONFIG_COMEDI_AMPLC_PC236_PCI=m
-CONFIG_COMEDI_AMPLC_PC263_PCI=m
-CONFIG_COMEDI_AMPLC_PCI224=m
-CONFIG_COMEDI_AMPLC_PCI230=m
-CONFIG_COMEDI_CONTEC_PCI_DIO=m
-CONFIG_COMEDI_DAS08_PCI=m
-CONFIG_COMEDI_DT3000=m
-CONFIG_COMEDI_DYNA_PCI10XX=m
-CONFIG_COMEDI_GSC_HPDI=m
-CONFIG_COMEDI_MF6X4=m
-CONFIG_COMEDI_ICP_MULTI=m
-CONFIG_COMEDI_DAQBOARD2000=m
-CONFIG_COMEDI_JR3_PCI=m
-CONFIG_COMEDI_KE_COUNTER=m
-CONFIG_COMEDI_CB_PCIDAS64=m
-CONFIG_COMEDI_CB_PCIDAS=m
-CONFIG_COMEDI_CB_PCIDDA=m
-CONFIG_COMEDI_CB_PCIMDAS=m
-CONFIG_COMEDI_CB_PCIMDDA=m
-CONFIG_COMEDI_ME4000=m
-CONFIG_COMEDI_ME_DAQ=m
-CONFIG_COMEDI_NI_6527=m
-CONFIG_COMEDI_NI_65XX=m
-CONFIG_COMEDI_NI_660X=m
-CONFIG_COMEDI_NI_670X=m
-CONFIG_COMEDI_NI_LABPC_PCI=m
-CONFIG_COMEDI_NI_PCIDIO=m
-CONFIG_COMEDI_NI_PCIMIO=m
-CONFIG_COMEDI_RTD520=m
-CONFIG_COMEDI_S626=m
-CONFIG_COMEDI_MITE=m
-CONFIG_COMEDI_NI_TIOCMD=m
-CONFIG_COMEDI_PCMCIA_DRIVERS=m
-CONFIG_COMEDI_CB_DAS16_CS=m
-CONFIG_COMEDI_DAS08_CS=m
-CONFIG_COMEDI_NI_DAQ_700_CS=m
-CONFIG_COMEDI_NI_DAQ_DIO24_CS=m
-CONFIG_COMEDI_NI_LABPC_CS=m
-CONFIG_COMEDI_NI_MIO_CS=m
-CONFIG_COMEDI_QUATECH_DAQP_CS=m
-CONFIG_COMEDI_USB_DRIVERS=m
-CONFIG_COMEDI_DT9812=m
-CONFIG_COMEDI_NI_USB6501=m
-CONFIG_COMEDI_USBDUX=m
-CONFIG_COMEDI_USBDUXFAST=m
-CONFIG_COMEDI_USBDUXSIGMA=m
-CONFIG_COMEDI_VMK80XX=m
-CONFIG_COMEDI_8254=m
-CONFIG_COMEDI_8255=m
-CONFIG_COMEDI_8255_SA=m
-CONFIG_COMEDI_KCOMEDILIB=m
-CONFIG_COMEDI_AMPLC_DIO200=m
-CONFIG_COMEDI_AMPLC_PC236=m
-CONFIG_COMEDI_DAS08=m
-CONFIG_COMEDI_NI_LABPC=m
-CONFIG_COMEDI_NI_TIO=m
-CONFIG_COMEDI_NI_ROUTING=m
-CONFIG_RTL8192U=m
-CONFIG_RTLLIB=m
-CONFIG_RTLLIB_CRYPTO_CCMP=m
-CONFIG_RTLLIB_CRYPTO_TKIP=m
-CONFIG_RTLLIB_CRYPTO_WEP=m
-CONFIG_RTL8192E=m
-CONFIG_RTL8723BS=m
-CONFIG_R8712U=m
-CONFIG_R8188EU=m
-CONFIG_88EU_AP_MODE=y
-CONFIG_RTS5208=m
-CONFIG_VT6655=m
-CONFIG_VT6656=m
-
-#
-# IIO staging drivers
-#
-
-#
-# Accelerometers
-#
-CONFIG_ADIS16203=m
-CONFIG_ADIS16240=m
-# end of Accelerometers
-
-#
-# Analog to digital converters
-#
-CONFIG_AD7816=m
-CONFIG_AD7192=m
-CONFIG_AD7280=m
-# end of Analog to digital converters
-
-#
-# Analog digital bi-direction converters
-#
-CONFIG_ADT7316=m
-CONFIG_ADT7316_SPI=m
-CONFIG_ADT7316_I2C=m
-# end of Analog digital bi-direction converters
-
-#
-# Capacitance to digital converters
-#
-CONFIG_AD7150=m
-CONFIG_AD7746=m
-# end of Capacitance to digital converters
-
-#
-# Direct Digital Synthesis
-#
-CONFIG_AD9832=m
-CONFIG_AD9834=m
-# end of Direct Digital Synthesis
-
-#
-# Network Analyzer, Impedance Converters
-#
-CONFIG_AD5933=m
-# end of Network Analyzer, Impedance Converters
-
-#
-# Active energy metering IC
-#
-CONFIG_ADE7854=m
-CONFIG_ADE7854_I2C=m
-CONFIG_ADE7854_SPI=m
-# end of Active energy metering IC
-
-#
-# Resolver to digital converters
-#
-CONFIG_AD2S1210=m
-# end of Resolver to digital converters
-# end of IIO staging drivers
-
-# CONFIG_FB_SM750 is not set
-
-#
-# Speakup console speech
-#
-CONFIG_SPEAKUP=m
-CONFIG_SPEAKUP_SYNTH_ACNTSA=m
-CONFIG_SPEAKUP_SYNTH_APOLLO=m
-CONFIG_SPEAKUP_SYNTH_AUDPTR=m
-CONFIG_SPEAKUP_SYNTH_BNS=m
-CONFIG_SPEAKUP_SYNTH_DECTLK=m
-CONFIG_SPEAKUP_SYNTH_DECEXT=m
-CONFIG_SPEAKUP_SYNTH_LTLK=m
-CONFIG_SPEAKUP_SYNTH_SOFT=m
-CONFIG_SPEAKUP_SYNTH_SPKOUT=m
-CONFIG_SPEAKUP_SYNTH_TXPRT=m
-CONFIG_SPEAKUP_SYNTH_DUMMY=m
-# end of Speakup console speech
-
-CONFIG_STAGING_MEDIA=y
-CONFIG_VIDEO_IPU3_IMGU=m
-
-#
-# soc_camera sensor drivers
-#
-
-#
-# Android
-#
-# end of Android
-
-CONFIG_STAGING_BOARD=y
-CONFIG_LTE_GDM724X=m
-CONFIG_FIREWIRE_SERIAL=m
-CONFIG_FWTTY_MAX_TOTAL_PORTS=64
-CONFIG_FWTTY_MAX_CARD_PORTS=32
-CONFIG_GS_FPGABOOT=m
-CONFIG_UNISYSSPAR=y
-CONFIG_UNISYS_VISORNIC=m
-CONFIG_UNISYS_VISORINPUT=m
-CONFIG_UNISYS_VISORHBA=m
-CONFIG_COMMON_CLK_XLNX_CLKWZRD=m
-# CONFIG_FB_TFT is not set
-CONFIG_WILC1000=m
-CONFIG_WILC1000_SDIO=m
-CONFIG_WILC1000_SPI=m
-# CONFIG_WILC1000_HW_OOB_INTR is not set
-CONFIG_MOST=m
-CONFIG_MOST_CDEV=m
-CONFIG_MOST_NET=m
-CONFIG_MOST_SOUND=m
-CONFIG_MOST_VIDEO=m
-CONFIG_MOST_DIM2=m
-CONFIG_MOST_I2C=m
-CONFIG_MOST_USB=m
-CONFIG_KS7010=m
-CONFIG_PI433=m
-
-#
-# Gasket devices
-#
-CONFIG_STAGING_GASKET_FRAMEWORK=m
-CONFIG_STAGING_APEX_DRIVER=m
-# end of Gasket devices
-
-CONFIG_XIL_AXIS_FIFO=m
-CONFIG_FIELDBUS_DEV=m
-CONFIG_HMS_ANYBUSS_BUS=m
-CONFIG_ARCX_ANYBUS_CONTROLLER=m
-CONFIG_HMS_PROFINET=m
-CONFIG_KPC2000=y
-CONFIG_KPC2000_CORE=m
-CONFIG_KPC2000_SPI=m
-CONFIG_KPC2000_I2C=m
-CONFIG_KPC2000_DMA=m
-
-#
-# ISDN CAPI drivers
-#
-CONFIG_CAPI_AVM=y
-CONFIG_ISDN_DRV_AVMB1_B1PCI=m
-CONFIG_ISDN_DRV_AVMB1_B1PCIV4=y
-CONFIG_ISDN_DRV_AVMB1_B1PCMCIA=m
-CONFIG_ISDN_DRV_AVMB1_AVM_CS=m
-CONFIG_ISDN_DRV_AVMB1_T1PCI=m
-CONFIG_ISDN_DRV_AVMB1_C4=m
-CONFIG_ISDN_DRV_GIGASET=m
-CONFIG_GIGASET_CAPI=y
-CONFIG_GIGASET_BASE=m
-CONFIG_GIGASET_M105=m
-CONFIG_GIGASET_M101=m
-# CONFIG_GIGASET_DEBUG is not set
-CONFIG_HYSDN=m
-CONFIG_HYSDN_CAPI=y
-# end of ISDN CAPI drivers
-
-CONFIG_USB_WUSB=m
-CONFIG_USB_WUSB_CBAF=m
-# CONFIG_USB_WUSB_CBAF_DEBUG is not set
-CONFIG_USB_WHCI_HCD=m
-CONFIG_USB_HWA_HCD=m
-CONFIG_UWB=m
-CONFIG_UWB_HWA=m
-CONFIG_UWB_WHCI=m
-CONFIG_UWB_I1480U=m
-CONFIG_EXFAT_FS=m
-CONFIG_EXFAT_DONT_MOUNT_VFAT=y
-CONFIG_EXFAT_DISCARD=y
-# CONFIG_EXFAT_DELAYED_SYNC is not set
-# CONFIG_EXFAT_KERNEL_DEBUG is not set
-# CONFIG_EXFAT_DEBUG_MSG is not set
-CONFIG_EXFAT_DEFAULT_CODEPAGE=437
-CONFIG_EXFAT_DEFAULT_IOCHARSET="utf8"
-CONFIG_QLGE=m
-CONFIG_X86_PLATFORM_DEVICES=y
-CONFIG_ACER_WMI=m
-CONFIG_ACER_WIRELESS=m
-CONFIG_ACERHDF=m
-CONFIG_ALIENWARE_WMI=m
-CONFIG_ASUS_LAPTOP=m
-CONFIG_DCDBAS=m
-CONFIG_DELL_SMBIOS=m
-CONFIG_DELL_SMBIOS_WMI=y
-CONFIG_DELL_SMBIOS_SMM=y
-CONFIG_DELL_LAPTOP=m
-CONFIG_DELL_WMI=m
-CONFIG_DELL_WMI_DESCRIPTOR=m
-CONFIG_DELL_WMI_AIO=m
-CONFIG_DELL_WMI_LED=m
-CONFIG_DELL_SMO8800=m
-CONFIG_DELL_RBTN=m
-# CONFIG_DELL_RBU is not set
-CONFIG_FUJITSU_LAPTOP=m
-CONFIG_FUJITSU_TABLET=m
-CONFIG_AMILO_RFKILL=m
-CONFIG_GPD_POCKET_FAN=m
-CONFIG_HP_ACCEL=m
-CONFIG_HP_WIRELESS=m
-CONFIG_HP_WMI=m
-CONFIG_LG_LAPTOP=m
-CONFIG_MSI_LAPTOP=m
-CONFIG_PANASONIC_LAPTOP=m
-CONFIG_COMPAL_LAPTOP=m
-CONFIG_SONY_LAPTOP=m
-CONFIG_SONYPI_COMPAT=y
-CONFIG_IDEAPAD_LAPTOP=m
-CONFIG_SURFACE3_WMI=m
-CONFIG_THINKPAD_ACPI=m
-CONFIG_THINKPAD_ACPI_ALSA_SUPPORT=y
-# CONFIG_THINKPAD_ACPI_DEBUGFACILITIES is not set
-# CONFIG_THINKPAD_ACPI_DEBUG is not set
-# CONFIG_THINKPAD_ACPI_UNSAFE_LEDS is not set
-CONFIG_THINKPAD_ACPI_VIDEO=y
-CONFIG_THINKPAD_ACPI_HOTKEY_POLL=y
-CONFIG_SENSORS_HDAPS=m
-CONFIG_INTEL_MENLOW=m
-CONFIG_EEEPC_LAPTOP=m
-CONFIG_ASUS_WMI=m
-CONFIG_ASUS_NB_WMI=m
-CONFIG_EEEPC_WMI=m
-CONFIG_ASUS_WIRELESS=m
-CONFIG_ACPI_WMI=m
-CONFIG_WMI_BMOF=m
-CONFIG_INTEL_WMI_THUNDERBOLT=m
-CONFIG_XIAOMI_WMI=m
-CONFIG_MSI_WMI=m
-CONFIG_PEAQ_WMI=m
-CONFIG_TOPSTAR_LAPTOP=m
-CONFIG_ACPI_TOSHIBA=m
-CONFIG_TOSHIBA_BT_RFKILL=m
-CONFIG_TOSHIBA_HAPS=m
-CONFIG_TOSHIBA_WMI=m
-CONFIG_ACPI_CMPC=m
-CONFIG_INTEL_CHT_INT33FE=m
-CONFIG_INTEL_INT0002_VGPIO=m
-CONFIG_INTEL_HID_EVENT=m
-CONFIG_INTEL_VBTN=m
-CONFIG_INTEL_IPS=m
-CONFIG_INTEL_PMC_CORE=y
-CONFIG_IBM_RTL=m
-CONFIG_SAMSUNG_LAPTOP=m
-CONFIG_MXM_WMI=m
-CONFIG_INTEL_OAKTRAIL=m
-CONFIG_SAMSUNG_Q10=m
-CONFIG_APPLE_GMUX=m
-CONFIG_INTEL_RST=m
-CONFIG_INTEL_SMARTCONNECT=m
-CONFIG_INTEL_PMC_IPC=m
-CONFIG_INTEL_BXTWC_PMIC_TMU=m
-CONFIG_SURFACE_PRO3_BUTTON=m
-CONFIG_SURFACE_3_BUTTON=m
-CONFIG_INTEL_PUNIT_IPC=m
-CONFIG_INTEL_TELEMETRY=m
-CONFIG_MLX_PLATFORM=m
-CONFIG_INTEL_TURBO_MAX_3=y
-CONFIG_TOUCHSCREEN_DMI=y
-CONFIG_INTEL_CHTDC_TI_PWRBTN=m
-CONFIG_I2C_MULTI_INSTANTIATE=m
-CONFIG_INTEL_ATOMISP2_PM=m
-CONFIG_HUAWEI_WMI=m
-CONFIG_PCENGINES_APU2=m
-
-#
-# Intel Speed Select Technology interface support
-#
-CONFIG_INTEL_SPEED_SELECT_INTERFACE=m
-# end of Intel Speed Select Technology interface support
-
-CONFIG_PMC_ATOM=y
-CONFIG_MFD_CROS_EC=m
-CONFIG_CHROME_PLATFORMS=y
-CONFIG_CHROMEOS_LAPTOP=m
-CONFIG_CHROMEOS_PSTORE=m
-CONFIG_CHROMEOS_TBMC=m
-CONFIG_CROS_EC=m
-CONFIG_CROS_EC_I2C=m
-CONFIG_CROS_EC_RPMSG=m
-CONFIG_CROS_EC_ISHTP=m
-CONFIG_CROS_EC_SPI=m
-CONFIG_CROS_EC_LPC=m
-CONFIG_CROS_EC_PROTO=y
-CONFIG_CROS_KBD_LED_BACKLIGHT=m
-CONFIG_CROS_EC_CHARDEV=m
-CONFIG_CROS_EC_LIGHTBAR=m
-CONFIG_CROS_EC_VBC=m
-CONFIG_CROS_EC_DEBUGFS=m
-CONFIG_CROS_EC_SYSFS=m
-CONFIG_CROS_USBPD_LOGGER=m
-CONFIG_WILCO_EC=m
-CONFIG_WILCO_EC_DEBUGFS=m
-CONFIG_WILCO_EC_EVENTS=m
-CONFIG_WILCO_EC_TELEMETRY=m
-CONFIG_MELLANOX_PLATFORM=y
-CONFIG_MLXREG_HOTPLUG=m
-CONFIG_MLXREG_IO=m
-CONFIG_CLKDEV_LOOKUP=y
-CONFIG_HAVE_CLK_PREPARE=y
-CONFIG_COMMON_CLK=y
-
-#
-# Common Clock Framework
-#
-CONFIG_COMMON_CLK_WM831X=m
-CONFIG_CLK_HSDK=y
-CONFIG_COMMON_CLK_MAX77686=m
-CONFIG_COMMON_CLK_MAX9485=m
-CONFIG_COMMON_CLK_RK808=m
-CONFIG_COMMON_CLK_SI5341=m
-CONFIG_COMMON_CLK_SI5351=m
-CONFIG_COMMON_CLK_SI514=m
-CONFIG_COMMON_CLK_SI544=m
-CONFIG_COMMON_CLK_SI570=m
-CONFIG_COMMON_CLK_CDCE706=m
-CONFIG_COMMON_CLK_CDCE925=m
-CONFIG_COMMON_CLK_CS2000_CP=m
-CONFIG_COMMON_CLK_S2MPS11=m
-CONFIG_CLK_TWL6040=m
-CONFIG_COMMON_CLK_LOCHNAGAR=m
-CONFIG_COMMON_CLK_PALMAS=m
-CONFIG_COMMON_CLK_PWM=m
-CONFIG_COMMON_CLK_VC5=m
-CONFIG_COMMON_CLK_BD718XX=m
-CONFIG_COMMON_CLK_FIXED_MMIO=y
-# end of Common Clock Framework
-
-CONFIG_HWSPINLOCK=y
-
-#
-# Clock Source drivers
-#
-CONFIG_CLKEVT_I8253=y
-CONFIG_I8253_LOCK=y
-CONFIG_CLKBLD_I8253=y
-# end of Clock Source drivers
-
-CONFIG_MAILBOX=y
-CONFIG_PLATFORM_MHU=m
-CONFIG_PCC=y
-CONFIG_ALTERA_MBOX=m
-CONFIG_MAILBOX_TEST=m
-CONFIG_IOMMU_IOVA=y
-CONFIG_IOMMU_API=y
-CONFIG_IOMMU_SUPPORT=y
-
-#
-# Generic IOMMU Pagetable Support
-#
-# end of Generic IOMMU Pagetable Support
-
-# CONFIG_IOMMU_DEBUGFS is not set
-# CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set
-CONFIG_OF_IOMMU=y
-CONFIG_AMD_IOMMU=y
-CONFIG_AMD_IOMMU_V2=y
-CONFIG_DMAR_TABLE=y
-CONFIG_INTEL_IOMMU=y
-CONFIG_INTEL_IOMMU_SVM=y
-# CONFIG_INTEL_IOMMU_DEFAULT_ON is not set
-CONFIG_INTEL_IOMMU_FLOPPY_WA=y
-CONFIG_IRQ_REMAP=y
-CONFIG_HYPERV_IOMMU=y
-
-#
-# Remoteproc drivers
-#
-CONFIG_REMOTEPROC=y
-# end of Remoteproc drivers
-
-#
-# Rpmsg drivers
-#
-CONFIG_RPMSG=m
-CONFIG_RPMSG_CHAR=m
-CONFIG_RPMSG_QCOM_GLINK_NATIVE=m
-CONFIG_RPMSG_QCOM_GLINK_RPM=m
-CONFIG_RPMSG_VIRTIO=m
-# end of Rpmsg drivers
-
-CONFIG_SOUNDWIRE=m
-
-#
-# SoundWire Devices
-#
-CONFIG_SOUNDWIRE_CADENCE=m
-CONFIG_SOUNDWIRE_INTEL=m
-
-#
-# SOC (System On Chip) specific Drivers
-#
-
-#
-# Amlogic SoC drivers
-#
-# end of Amlogic SoC drivers
-
-#
-# Aspeed SoC drivers
-#
-# end of Aspeed SoC drivers
-
-#
-# Broadcom SoC drivers
-#
-# end of Broadcom SoC drivers
-
-#
-# NXP/Freescale QorIQ SoC drivers
-#
-# end of NXP/Freescale QorIQ SoC drivers
-
-#
-# i.MX SoC drivers
-#
-# end of i.MX SoC drivers
-
-#
-# Qualcomm SoC drivers
-#
-# end of Qualcomm SoC drivers
-
-CONFIG_SOC_TI=y
-
-#
-# Xilinx SoC drivers
-#
-CONFIG_XILINX_VCU=m
-# end of Xilinx SoC drivers
-# end of SOC (System On Chip) specific Drivers
-
-CONFIG_PM_DEVFREQ=y
-
-#
-# DEVFREQ Governors
-#
-CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=m
-CONFIG_DEVFREQ_GOV_PERFORMANCE=m
-CONFIG_DEVFREQ_GOV_POWERSAVE=m
-CONFIG_DEVFREQ_GOV_USERSPACE=m
-CONFIG_DEVFREQ_GOV_PASSIVE=m
-
-#
-# DEVFREQ Drivers
-#
-CONFIG_PM_DEVFREQ_EVENT=y
-CONFIG_EXTCON=y
-
-#
-# Extcon Device Drivers
-#
-CONFIG_EXTCON_ADC_JACK=m
-CONFIG_EXTCON_ARIZONA=m
-CONFIG_EXTCON_AXP288=m
-CONFIG_EXTCON_FSA9480=m
-CONFIG_EXTCON_GPIO=m
-CONFIG_EXTCON_INTEL_INT3496=m
-CONFIG_EXTCON_INTEL_CHT_WC=m
-CONFIG_EXTCON_MAX14577=m
-CONFIG_EXTCON_MAX3355=m
-CONFIG_EXTCON_MAX77693=m
-CONFIG_EXTCON_MAX77843=m
-CONFIG_EXTCON_MAX8997=m
-CONFIG_EXTCON_PALMAS=m
-CONFIG_EXTCON_PTN5150=m
-CONFIG_EXTCON_RT8973A=m
-CONFIG_EXTCON_SM5502=m
-CONFIG_EXTCON_USB_GPIO=m
-CONFIG_EXTCON_USBC_CROS_EC=m
-CONFIG_MEMORY=y
-CONFIG_IIO=m
-CONFIG_IIO_BUFFER=y
-CONFIG_IIO_BUFFER_CB=m
-CONFIG_IIO_BUFFER_HW_CONSUMER=m
-CONFIG_IIO_KFIFO_BUF=m
-CONFIG_IIO_TRIGGERED_BUFFER=m
-CONFIG_IIO_CONFIGFS=m
-CONFIG_IIO_TRIGGER=y
-CONFIG_IIO_CONSUMERS_PER_TRIGGER=2
-CONFIG_IIO_SW_DEVICE=m
-CONFIG_IIO_SW_TRIGGER=m
-CONFIG_IIO_TRIGGERED_EVENT=m
-
-#
-# Accelerometers
-#
-CONFIG_ADIS16201=m
-CONFIG_ADIS16209=m
-CONFIG_ADXL372=m
-CONFIG_ADXL372_SPI=m
-CONFIG_ADXL372_I2C=m
-CONFIG_BMA180=m
-CONFIG_BMA220=m
-CONFIG_BMC150_ACCEL=m
-CONFIG_BMC150_ACCEL_I2C=m
-CONFIG_BMC150_ACCEL_SPI=m
-CONFIG_DA280=m
-CONFIG_DA311=m
-CONFIG_DMARD06=m
-CONFIG_DMARD09=m
-CONFIG_DMARD10=m
-CONFIG_HID_SENSOR_ACCEL_3D=m
-CONFIG_IIO_CROS_EC_ACCEL_LEGACY=m
-CONFIG_IIO_ST_ACCEL_3AXIS=m
-CONFIG_IIO_ST_ACCEL_I2C_3AXIS=m
-CONFIG_IIO_ST_ACCEL_SPI_3AXIS=m
-CONFIG_KXSD9=m
-CONFIG_KXSD9_SPI=m
-CONFIG_KXSD9_I2C=m
-CONFIG_KXCJK1013=m
-CONFIG_MC3230=m
-CONFIG_MMA7455=m
-CONFIG_MMA7455_I2C=m
-CONFIG_MMA7455_SPI=m
-CONFIG_MMA7660=m
-CONFIG_MMA8452=m
-CONFIG_MMA9551_CORE=m
-CONFIG_MMA9551=m
-CONFIG_MMA9553=m
-CONFIG_MXC4005=m
-CONFIG_MXC6255=m
-CONFIG_SCA3000=m
-CONFIG_STK8312=m
-CONFIG_STK8BA50=m
-# end of Accelerometers
-
-#
-# Analog to digital converters
-#
-CONFIG_AD_SIGMA_DELTA=m
-CONFIG_AD7124=m
-CONFIG_AD7266=m
-CONFIG_AD7291=m
-CONFIG_AD7298=m
-CONFIG_AD7476=m
-CONFIG_AD7606=m
-CONFIG_AD7606_IFACE_PARALLEL=m
-CONFIG_AD7606_IFACE_SPI=m
-CONFIG_AD7766=m
-CONFIG_AD7768_1=m
-CONFIG_AD7780=m
-CONFIG_AD7791=m
-CONFIG_AD7793=m
-CONFIG_AD7887=m
-CONFIG_AD7923=m
-CONFIG_AD7949=m
-CONFIG_AD799X=m
-CONFIG_AXP20X_ADC=m
-CONFIG_AXP288_ADC=m
-CONFIG_CC10001_ADC=m
-CONFIG_CPCAP_ADC=m
-CONFIG_DA9150_GPADC=m
-CONFIG_DLN2_ADC=m
-CONFIG_ENVELOPE_DETECTOR=m
-CONFIG_HI8435=m
-CONFIG_HX711=m
-CONFIG_INA2XX_ADC=m
-CONFIG_LP8788_ADC=m
-CONFIG_LTC2471=m
-CONFIG_LTC2485=m
-CONFIG_LTC2497=m
-CONFIG_MAX1027=m
-CONFIG_MAX11100=m
-CONFIG_MAX1118=m
-CONFIG_MAX1363=m
-CONFIG_MAX9611=m
-CONFIG_MCP320X=m
-CONFIG_MCP3422=m
-CONFIG_MCP3911=m
-CONFIG_MEN_Z188_ADC=m
-CONFIG_NAU7802=m
-CONFIG_PALMAS_GPADC=m
-CONFIG_QCOM_VADC_COMMON=m
-CONFIG_QCOM_SPMI_IADC=m
-CONFIG_QCOM_SPMI_VADC=m
-CONFIG_QCOM_SPMI_ADC5=m
-CONFIG_SD_ADC_MODULATOR=m
-CONFIG_STMPE_ADC=m
-CONFIG_TI_ADC081C=m
-CONFIG_TI_ADC0832=m
-CONFIG_TI_ADC084S021=m
-CONFIG_TI_ADC12138=m
-CONFIG_TI_ADC108S102=m
-CONFIG_TI_ADC128S052=m
-CONFIG_TI_ADC161S626=m
-CONFIG_TI_ADS1015=m
-CONFIG_TI_ADS7950=m
-CONFIG_TI_ADS8344=m
-CONFIG_TI_ADS8688=m
-CONFIG_TI_ADS124S08=m
-CONFIG_TI_AM335X_ADC=m
-CONFIG_TI_TLC4541=m
-CONFIG_TWL4030_MADC=m
-CONFIG_TWL6030_GPADC=m
-CONFIG_VF610_ADC=m
-CONFIG_VIPERBOARD_ADC=m
-CONFIG_XILINX_XADC=m
-# end of Analog to digital converters
-
-#
-# Analog Front Ends
-#
-CONFIG_IIO_RESCALE=m
-# end of Analog Front Ends
-
-#
-# Amplifiers
-#
-CONFIG_AD8366=m
-# end of Amplifiers
-
-#
-# Chemical Sensors
-#
-CONFIG_ATLAS_PH_SENSOR=m
-CONFIG_BME680=m
-CONFIG_BME680_I2C=m
-CONFIG_BME680_SPI=m
-CONFIG_CCS811=m
-CONFIG_IAQCORE=m
-CONFIG_PMS7003=m
-CONFIG_SENSIRION_SGP30=m
-CONFIG_SPS30=m
-CONFIG_VZ89X=m
-# end of Chemical Sensors
-
-CONFIG_IIO_CROS_EC_SENSORS_CORE=m
-CONFIG_IIO_CROS_EC_SENSORS=m
-CONFIG_IIO_CROS_EC_SENSORS_LID_ANGLE=m
-
-#
-# Hid Sensor IIO Common
-#
-CONFIG_HID_SENSOR_IIO_COMMON=m
-CONFIG_HID_SENSOR_IIO_TRIGGER=m
-# end of Hid Sensor IIO Common
-
-CONFIG_IIO_MS_SENSORS_I2C=m
-
-#
-# SSP Sensor Common
-#
-CONFIG_IIO_SSP_SENSORS_COMMONS=m
-CONFIG_IIO_SSP_SENSORHUB=m
-# end of SSP Sensor Common
-
-CONFIG_IIO_ST_SENSORS_I2C=m
-CONFIG_IIO_ST_SENSORS_SPI=m
-CONFIG_IIO_ST_SENSORS_CORE=m
-
-#
-# Digital to analog converters
-#
-CONFIG_AD5064=m
-CONFIG_AD5360=m
-CONFIG_AD5380=m
-CONFIG_AD5421=m
-CONFIG_AD5446=m
-CONFIG_AD5449=m
-CONFIG_AD5592R_BASE=m
-CONFIG_AD5592R=m
-CONFIG_AD5593R=m
-CONFIG_AD5504=m
-CONFIG_AD5624R_SPI=m
-CONFIG_LTC1660=m
-CONFIG_LTC2632=m
-CONFIG_AD5686=m
-CONFIG_AD5686_SPI=m
-CONFIG_AD5696_I2C=m
-CONFIG_AD5755=m
-CONFIG_AD5758=m
-CONFIG_AD5761=m
-CONFIG_AD5764=m
-CONFIG_AD5791=m
-CONFIG_AD7303=m
-CONFIG_AD8801=m
-CONFIG_DPOT_DAC=m
-CONFIG_DS4424=m
-CONFIG_M62332=m
-CONFIG_MAX517=m
-CONFIG_MAX5821=m
-CONFIG_MCP4725=m
-CONFIG_MCP4922=m
-CONFIG_TI_DAC082S085=m
-CONFIG_TI_DAC5571=m
-CONFIG_TI_DAC7311=m
-CONFIG_TI_DAC7612=m
-CONFIG_VF610_DAC=m
-# end of Digital to analog converters
-
-#
-# IIO dummy driver
-#
-# CONFIG_IIO_SIMPLE_DUMMY is not set
-# end of IIO dummy driver
-
-#
-# Frequency Synthesizers DDS/PLL
-#
-
-#
-# Clock Generator/Distribution
-#
-CONFIG_AD9523=m
-# end of Clock Generator/Distribution
-
-#
-# Phase-Locked Loop (PLL) frequency synthesizers
-#
-CONFIG_ADF4350=m
-CONFIG_ADF4371=m
-# end of Phase-Locked Loop (PLL) frequency synthesizers
-# end of Frequency Synthesizers DDS/PLL
-
-#
-# Digital gyroscope sensors
-#
-CONFIG_ADIS16080=m
-CONFIG_ADIS16130=m
-CONFIG_ADIS16136=m
-CONFIG_ADIS16260=m
-CONFIG_ADXRS450=m
-CONFIG_BMG160=m
-CONFIG_BMG160_I2C=m
-CONFIG_BMG160_SPI=m
-CONFIG_FXAS21002C=m
-CONFIG_FXAS21002C_I2C=m
-CONFIG_FXAS21002C_SPI=m
-CONFIG_HID_SENSOR_GYRO_3D=m
-CONFIG_MPU3050=m
-CONFIG_MPU3050_I2C=m
-CONFIG_IIO_ST_GYRO_3AXIS=m
-CONFIG_IIO_ST_GYRO_I2C_3AXIS=m
-CONFIG_IIO_ST_GYRO_SPI_3AXIS=m
-CONFIG_ITG3200=m
-# end of Digital gyroscope sensors
-
-#
-# Health Sensors
-#
-
-#
-# Heart Rate Monitors
-#
-CONFIG_AFE4403=m
-CONFIG_AFE4404=m
-CONFIG_MAX30100=m
-CONFIG_MAX30102=m
-# end of Heart Rate Monitors
-# end of Health Sensors
-
-#
-# Humidity sensors
-#
-CONFIG_AM2315=m
-CONFIG_DHT11=m
-CONFIG_HDC100X=m
-CONFIG_HID_SENSOR_HUMIDITY=m
-CONFIG_HTS221=m
-CONFIG_HTS221_I2C=m
-CONFIG_HTS221_SPI=m
-CONFIG_HTU21=m
-CONFIG_SI7005=m
-CONFIG_SI7020=m
-# end of Humidity sensors
-
-#
-# Inertial measurement units
-#
-CONFIG_ADIS16400=m
-CONFIG_ADIS16460=m
-CONFIG_ADIS16480=m
-CONFIG_BMI160=m
-CONFIG_BMI160_I2C=m
-CONFIG_BMI160_SPI=m
-CONFIG_KMX61=m
-CONFIG_INV_MPU6050_IIO=m
-CONFIG_INV_MPU6050_I2C=m
-CONFIG_INV_MPU6050_SPI=m
-CONFIG_IIO_ST_LSM6DSX=m
-CONFIG_IIO_ST_LSM6DSX_I2C=m
-CONFIG_IIO_ST_LSM6DSX_SPI=m
-CONFIG_IIO_ST_LSM6DSX_I3C=m
-# end of Inertial measurement units
-
-CONFIG_IIO_ADIS_LIB=m
-CONFIG_IIO_ADIS_LIB_BUFFER=y
-
-#
-# Light sensors
-#
-CONFIG_ACPI_ALS=m
-CONFIG_ADJD_S311=m
-CONFIG_AL3320A=m
-CONFIG_APDS9300=m
-CONFIG_APDS9960=m
-CONFIG_BH1750=m
-CONFIG_BH1780=m
-CONFIG_CM32181=m
-CONFIG_CM3232=m
-CONFIG_CM3323=m
-CONFIG_CM3605=m
-CONFIG_CM36651=m
-CONFIG_IIO_CROS_EC_LIGHT_PROX=m
-CONFIG_GP2AP020A00F=m
-CONFIG_SENSORS_ISL29018=m
-CONFIG_SENSORS_ISL29028=m
-CONFIG_ISL29125=m
-CONFIG_HID_SENSOR_ALS=m
-CONFIG_HID_SENSOR_PROX=m
-CONFIG_JSA1212=m
-CONFIG_RPR0521=m
-CONFIG_SENSORS_LM3533=m
-CONFIG_LTR501=m
-CONFIG_LV0104CS=m
-CONFIG_MAX44000=m
-CONFIG_MAX44009=m
-CONFIG_NOA1305=m
-CONFIG_OPT3001=m
-CONFIG_PA12203001=m
-CONFIG_SI1133=m
-CONFIG_SI1145=m
-CONFIG_STK3310=m
-CONFIG_ST_UVIS25=m
-CONFIG_ST_UVIS25_I2C=m
-CONFIG_ST_UVIS25_SPI=m
-CONFIG_TCS3414=m
-CONFIG_TCS3472=m
-CONFIG_SENSORS_TSL2563=m
-CONFIG_TSL2583=m
-CONFIG_TSL2772=m
-CONFIG_TSL4531=m
-CONFIG_US5182D=m
-CONFIG_VCNL4000=m
-CONFIG_VCNL4035=m
-CONFIG_VEML6070=m
-CONFIG_VL6180=m
-CONFIG_ZOPT2201=m
-# end of Light sensors
-
-#
-# Magnetometer sensors
-#
-CONFIG_AK8974=m
-CONFIG_AK8975=m
-CONFIG_AK09911=m
-CONFIG_BMC150_MAGN=m
-CONFIG_BMC150_MAGN_I2C=m
-CONFIG_BMC150_MAGN_SPI=m
-CONFIG_MAG3110=m
-CONFIG_HID_SENSOR_MAGNETOMETER_3D=m
-CONFIG_MMC35240=m
-CONFIG_IIO_ST_MAGN_3AXIS=m
-CONFIG_IIO_ST_MAGN_I2C_3AXIS=m
-CONFIG_IIO_ST_MAGN_SPI_3AXIS=m
-CONFIG_SENSORS_HMC5843=m
-CONFIG_SENSORS_HMC5843_I2C=m
-CONFIG_SENSORS_HMC5843_SPI=m
-CONFIG_SENSORS_RM3100=m
-CONFIG_SENSORS_RM3100_I2C=m
-CONFIG_SENSORS_RM3100_SPI=m
-# end of Magnetometer sensors
-
-#
-# Multiplexers
-#
-CONFIG_IIO_MUX=m
-# end of Multiplexers
-
-#
-# Inclinometer sensors
-#
-CONFIG_HID_SENSOR_INCLINOMETER_3D=m
-CONFIG_HID_SENSOR_DEVICE_ROTATION=m
-# end of Inclinometer sensors
-
-#
-# Triggers - standalone
-#
-CONFIG_IIO_HRTIMER_TRIGGER=m
-CONFIG_IIO_INTERRUPT_TRIGGER=m
-CONFIG_IIO_TIGHTLOOP_TRIGGER=m
-CONFIG_IIO_SYSFS_TRIGGER=m
-# end of Triggers - standalone
-
-#
-# Digital potentiometers
-#
-CONFIG_AD5272=m
-CONFIG_DS1803=m
-CONFIG_MAX5432=m
-CONFIG_MAX5481=m
-CONFIG_MAX5487=m
-CONFIG_MCP4018=m
-CONFIG_MCP4131=m
-CONFIG_MCP4531=m
-CONFIG_MCP41010=m
-CONFIG_TPL0102=m
-# end of Digital potentiometers
-
-#
-# Digital potentiostats
-#
-CONFIG_LMP91000=m
-# end of Digital potentiostats
-
-#
-# Pressure sensors
-#
-CONFIG_ABP060MG=m
-CONFIG_BMP280=m
-CONFIG_BMP280_I2C=m
-CONFIG_BMP280_SPI=m
-CONFIG_IIO_CROS_EC_BARO=m
-CONFIG_DPS310=m
-CONFIG_HID_SENSOR_PRESS=m
-CONFIG_HP03=m
-CONFIG_MPL115=m
-CONFIG_MPL115_I2C=m
-CONFIG_MPL115_SPI=m
-CONFIG_MPL3115=m
-CONFIG_MS5611=m
-CONFIG_MS5611_I2C=m
-CONFIG_MS5611_SPI=m
-CONFIG_MS5637=m
-CONFIG_IIO_ST_PRESS=m
-CONFIG_IIO_ST_PRESS_I2C=m
-CONFIG_IIO_ST_PRESS_SPI=m
-CONFIG_T5403=m
-CONFIG_HP206C=m
-CONFIG_ZPA2326=m
-CONFIG_ZPA2326_I2C=m
-CONFIG_ZPA2326_SPI=m
-# end of Pressure sensors
-
-#
-# Lightning sensors
-#
-CONFIG_AS3935=m
-# end of Lightning sensors
-
-#
-# Proximity and distance sensors
-#
-CONFIG_ISL29501=m
-CONFIG_LIDAR_LITE_V2=m
-CONFIG_MB1232=m
-CONFIG_RFD77402=m
-CONFIG_SRF04=m
-CONFIG_SX9500=m
-CONFIG_SRF08=m
-CONFIG_VL53L0X_I2C=m
-# end of Proximity and distance sensors
-
-#
-# Resolver to digital converters
-#
-CONFIG_AD2S90=m
-CONFIG_AD2S1200=m
-# end of Resolver to digital converters
-
-#
-# Temperature sensors
-#
-CONFIG_MAXIM_THERMOCOUPLE=m
-CONFIG_HID_SENSOR_TEMP=m
-CONFIG_MLX90614=m
-CONFIG_MLX90632=m
-CONFIG_TMP006=m
-CONFIG_TMP007=m
-CONFIG_TSYS01=m
-CONFIG_TSYS02D=m
-CONFIG_MAX31856=m
-# end of Temperature sensors
-
-CONFIG_NTB=m
-CONFIG_NTB_MSI=y
-CONFIG_NTB_AMD=m
-CONFIG_NTB_IDT=m
-CONFIG_NTB_INTEL=m
-CONFIG_NTB_SWITCHTEC=m
-# CONFIG_NTB_PINGPONG is not set
-# CONFIG_NTB_TOOL is not set
-# CONFIG_NTB_PERF is not set
-# CONFIG_NTB_MSI_TEST is not set
-CONFIG_NTB_TRANSPORT=m
-CONFIG_VME_BUS=y
-
-#
-# VME Bridge Drivers
-#
-CONFIG_VME_CA91CX42=m
-CONFIG_VME_TSI148=m
-# CONFIG_VME_FAKE is not set
-
-#
-# VME Board Drivers
-#
-CONFIG_VMIVME_7805=m
-
-#
-# VME Device Drivers
-#
-CONFIG_VME_USER=m
-CONFIG_PWM=y
-CONFIG_PWM_SYSFS=y
-CONFIG_PWM_ATMEL_HLCDC_PWM=m
-CONFIG_PWM_CRC=y
-CONFIG_PWM_CROS_EC=m
-CONFIG_PWM_FSL_FTM=m
-CONFIG_PWM_LP3943=m
-CONFIG_PWM_LPSS=m
-CONFIG_PWM_LPSS_PCI=m
-CONFIG_PWM_LPSS_PLATFORM=m
-CONFIG_PWM_PCA9685=m
-CONFIG_PWM_STMPE=y
-CONFIG_PWM_TWL=m
-CONFIG_PWM_TWL_LED=m
-
-#
-# IRQ chip support
-#
-CONFIG_IRQCHIP=y
-CONFIG_AL_FIC=y
-CONFIG_MADERA_IRQ=m
-# end of IRQ chip support
-
-CONFIG_IPACK_BUS=m
-CONFIG_BOARD_TPCI200=m
-CONFIG_SERIAL_IPOCTAL=m
-CONFIG_RESET_CONTROLLER=y
-CONFIG_RESET_TI_SYSCON=m
-
-#
-# PHY Subsystem
-#
-CONFIG_GENERIC_PHY=y
-CONFIG_GENERIC_PHY_MIPI_DPHY=y
-CONFIG_BCM_KONA_USB2_PHY=m
-CONFIG_PHY_CADENCE_DP=m
-CONFIG_PHY_CADENCE_DPHY=m
-CONFIG_PHY_CADENCE_SIERRA=m
-CONFIG_PHY_FSL_IMX8MQ_USB=m
-CONFIG_PHY_MIXEL_MIPI_DPHY=m
-CONFIG_PHY_PXA_28NM_HSIC=m
-CONFIG_PHY_PXA_28NM_USB2=m
-CONFIG_PHY_CPCAP_USB=m
-CONFIG_PHY_MAPPHONE_MDM6600=m
-CONFIG_PHY_OCELOT_SERDES=m
-CONFIG_PHY_QCOM_USB_HS=m
-CONFIG_PHY_QCOM_USB_HSIC=m
-CONFIG_PHY_SAMSUNG_USB2=m
-CONFIG_PHY_TUSB1210=m
-# end of PHY Subsystem
-
-CONFIG_POWERCAP=y
-CONFIG_INTEL_RAPL_CORE=m
-CONFIG_INTEL_RAPL=m
-CONFIG_IDLE_INJECT=y
-CONFIG_MCB=m
-CONFIG_MCB_PCI=m
-CONFIG_MCB_LPC=m
-
-#
-# Performance monitor support
-#
-# end of Performance monitor support
-
-CONFIG_RAS=y
-CONFIG_RAS_CEC=y
-# CONFIG_RAS_CEC_DEBUG is not set
-CONFIG_THUNDERBOLT=m
-
-#
-# Android
-#
-# CONFIG_ANDROID is not set
-# end of Android
-
-CONFIG_LIBNVDIMM=y
-CONFIG_BLK_DEV_PMEM=m
-CONFIG_ND_BLK=m
-CONFIG_ND_CLAIM=y
-CONFIG_ND_BTT=m
-CONFIG_BTT=y
-CONFIG_ND_PFN=m
-CONFIG_NVDIMM_PFN=y
-CONFIG_NVDIMM_DAX=y
-CONFIG_OF_PMEM=m
-CONFIG_DAX_DRIVER=y
-CONFIG_DAX=y
-CONFIG_DEV_DAX=m
-CONFIG_DEV_DAX_PMEM=m
-CONFIG_DEV_DAX_KMEM=m
-CONFIG_DEV_DAX_PMEM_COMPAT=m
-CONFIG_NVMEM=y
-CONFIG_NVMEM_SYSFS=y
-CONFIG_RAVE_SP_EEPROM=m
-
-#
-# HW tracing support
-#
-CONFIG_STM=m
-CONFIG_STM_PROTO_BASIC=m
-CONFIG_STM_PROTO_SYS_T=m
-# CONFIG_STM_DUMMY is not set
-CONFIG_STM_SOURCE_CONSOLE=m
-CONFIG_STM_SOURCE_HEARTBEAT=m
-CONFIG_STM_SOURCE_FTRACE=m
-CONFIG_INTEL_TH=m
-CONFIG_INTEL_TH_PCI=m
-CONFIG_INTEL_TH_ACPI=m
-CONFIG_INTEL_TH_GTH=m
-CONFIG_INTEL_TH_STH=m
-CONFIG_INTEL_TH_MSU=m
-CONFIG_INTEL_TH_PTI=m
-# CONFIG_INTEL_TH_DEBUG is not set
-# end of HW tracing support
-
-CONFIG_FPGA=m
-CONFIG_ALTERA_PR_IP_CORE=m
-CONFIG_ALTERA_PR_IP_CORE_PLAT=m
-CONFIG_FPGA_MGR_ALTERA_PS_SPI=m
-CONFIG_FPGA_MGR_ALTERA_CVP=m
-CONFIG_FPGA_MGR_XILINX_SPI=m
-CONFIG_FPGA_MGR_ICE40_SPI=m
-CONFIG_FPGA_MGR_MACHXO2_SPI=m
-CONFIG_FPGA_BRIDGE=m
-CONFIG_ALTERA_FREEZE_BRIDGE=m
-CONFIG_XILINX_PR_DECOUPLER=m
-CONFIG_FPGA_REGION=m
-CONFIG_OF_FPGA_REGION=m
-CONFIG_FPGA_DFL=m
-CONFIG_FPGA_DFL_FME=m
-CONFIG_FPGA_DFL_FME_MGR=m
-CONFIG_FPGA_DFL_FME_BRIDGE=m
-CONFIG_FPGA_DFL_FME_REGION=m
-CONFIG_FPGA_DFL_AFU=m
-CONFIG_FPGA_DFL_PCI=m
-CONFIG_FSI=m
-CONFIG_FSI_NEW_DEV_NODE=y
-CONFIG_FSI_MASTER_GPIO=m
-CONFIG_FSI_MASTER_HUB=m
-CONFIG_FSI_SCOM=m
-CONFIG_FSI_SBEFIFO=m
-CONFIG_FSI_OCC=m
-CONFIG_MULTIPLEXER=m
-
-#
-# Multiplexer drivers
-#
-CONFIG_MUX_ADG792A=m
-CONFIG_MUX_ADGS1408=m
-CONFIG_MUX_GPIO=m
-CONFIG_MUX_MMIO=m
-# end of Multiplexer drivers
-
-CONFIG_PM_OPP=y
-CONFIG_UNISYS_VISORBUS=m
-CONFIG_SIOX=m
-CONFIG_SIOX_BUS_GPIO=m
-CONFIG_SLIMBUS=m
-CONFIG_SLIM_QCOM_CTRL=m
-CONFIG_INTERCONNECT=m
-CONFIG_COUNTER=m
-CONFIG_FTM_QUADDEC=m
-# end of Device Drivers
-
-#
-# File systems
-#
-CONFIG_DCACHE_WORD_ACCESS=y
-CONFIG_VALIDATE_FS_PARSER=y
-CONFIG_FS_IOMAP=y
-# CONFIG_EXT2_FS is not set
-# CONFIG_EXT3_FS is not set
-CONFIG_EXT4_FS=m
-CONFIG_EXT4_USE_FOR_EXT2=y
-CONFIG_EXT4_FS_POSIX_ACL=y
-CONFIG_EXT4_FS_SECURITY=y
-# CONFIG_EXT4_DEBUG is not set
-CONFIG_JBD2=m
-# CONFIG_JBD2_DEBUG is not set
-CONFIG_FS_MBCACHE=m
-CONFIG_REISERFS_FS=m
-# CONFIG_REISERFS_CHECK is not set
-CONFIG_REISERFS_PROC_INFO=y
-CONFIG_REISERFS_FS_XATTR=y
-CONFIG_REISERFS_FS_POSIX_ACL=y
-CONFIG_REISERFS_FS_SECURITY=y
-CONFIG_JFS_FS=m
-CONFIG_JFS_POSIX_ACL=y
-CONFIG_JFS_SECURITY=y
-# CONFIG_JFS_DEBUG is not set
-CONFIG_JFS_STATISTICS=y
-CONFIG_XFS_FS=m
-CONFIG_XFS_QUOTA=y
-CONFIG_XFS_POSIX_ACL=y
-CONFIG_XFS_RT=y
-CONFIG_XFS_ONLINE_SCRUB=y
-CONFIG_XFS_ONLINE_REPAIR=y
-# CONFIG_XFS_WARN is not set
-# CONFIG_XFS_DEBUG is not set
-CONFIG_GFS2_FS=m
-CONFIG_GFS2_FS_LOCKING_DLM=y
-CONFIG_OCFS2_FS=m
-CONFIG_OCFS2_FS_O2CB=m
-CONFIG_OCFS2_FS_USERSPACE_CLUSTER=m
-CONFIG_OCFS2_FS_STATS=y
-CONFIG_OCFS2_DEBUG_MASKLOG=y
-# CONFIG_OCFS2_DEBUG_FS is not set
-CONFIG_BTRFS_FS=m
-CONFIG_BTRFS_FS_POSIX_ACL=y
-# CONFIG_BTRFS_FS_CHECK_INTEGRITY is not set
-# CONFIG_BTRFS_FS_RUN_SANITY_TESTS is not set
-# CONFIG_BTRFS_DEBUG is not set
-# CONFIG_BTRFS_ASSERT is not set
-# CONFIG_BTRFS_FS_REF_VERIFY is not set
-CONFIG_NILFS2_FS=m
-CONFIG_F2FS_FS=m
-CONFIG_F2FS_STAT_FS=y
-CONFIG_F2FS_FS_XATTR=y
-CONFIG_F2FS_FS_POSIX_ACL=y
-CONFIG_F2FS_FS_SECURITY=y
-CONFIG_F2FS_CHECK_FS=y
-# CONFIG_F2FS_IO_TRACE is not set
-# CONFIG_F2FS_FAULT_INJECTION is not set
-CONFIG_FS_DAX=y
-CONFIG_FS_DAX_PMD=y
-CONFIG_FS_POSIX_ACL=y
-CONFIG_EXPORTFS=y
-CONFIG_EXPORTFS_BLOCK_OPS=y
-CONFIG_FILE_LOCKING=y
-# CONFIG_MANDATORY_FILE_LOCKING is not set
-CONFIG_FS_ENCRYPTION=y
-CONFIG_FS_VERITY=y
-# CONFIG_FS_VERITY_DEBUG is not set
-CONFIG_FS_VERITY_BUILTIN_SIGNATURES=y
-CONFIG_FSNOTIFY=y
-CONFIG_DNOTIFY=y
-CONFIG_INOTIFY_USER=y
-CONFIG_FANOTIFY=y
-CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
-CONFIG_QUOTA=y
-CONFIG_QUOTA_NETLINK_INTERFACE=y
-# CONFIG_PRINT_QUOTA_WARNING is not set
-# CONFIG_QUOTA_DEBUG is not set
-CONFIG_QUOTA_TREE=m
-CONFIG_QFMT_V1=m
-CONFIG_QFMT_V2=m
-CONFIG_QUOTACTL=y
-CONFIG_QUOTACTL_COMPAT=y
-CONFIG_AUTOFS4_FS=y
-CONFIG_AUTOFS_FS=y
-CONFIG_FUSE_FS=m
-CONFIG_CUSE=m
-CONFIG_VIRTIO_FS=m
-CONFIG_OVERLAY_FS=m
-CONFIG_OVERLAY_FS_REDIRECT_DIR=y
-# CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW is not set
-CONFIG_OVERLAY_FS_INDEX=y
-CONFIG_OVERLAY_FS_XINO_AUTO=y
-CONFIG_OVERLAY_FS_METACOPY=y
-
-#
-# Caches
-#
-CONFIG_FSCACHE=m
-CONFIG_FSCACHE_STATS=y
-CONFIG_FSCACHE_HISTOGRAM=y
-# CONFIG_FSCACHE_DEBUG is not set
-# CONFIG_FSCACHE_OBJECT_LIST is not set
-CONFIG_CACHEFILES=m
-# CONFIG_CACHEFILES_DEBUG is not set
-# CONFIG_CACHEFILES_HISTOGRAM is not set
-# end of Caches
-
-#
-# CD-ROM/DVD Filesystems
-#
-CONFIG_ISO9660_FS=m
-CONFIG_JOLIET=y
-CONFIG_ZISOFS=y
-CONFIG_UDF_FS=m
-# end of CD-ROM/DVD Filesystems
-
-#
-# DOS/FAT/NT Filesystems
-#
-CONFIG_FAT_FS=m
-CONFIG_MSDOS_FS=m
-CONFIG_VFAT_FS=m
-CONFIG_FAT_DEFAULT_CODEPAGE=437
-CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
-CONFIG_FAT_DEFAULT_UTF8=y
-CONFIG_NTFS_FS=m
-# CONFIG_NTFS_DEBUG is not set
-CONFIG_NTFS_RW=y
-# end of DOS/FAT/NT Filesystems
-
-#
-# Pseudo filesystems
-#
-CONFIG_PROC_FS=y
-CONFIG_PROC_KCORE=y
-CONFIG_PROC_VMCORE=y
-CONFIG_PROC_VMCORE_DEVICE_DUMP=y
-CONFIG_PROC_SYSCTL=y
-CONFIG_PROC_PAGE_MONITOR=y
-CONFIG_PROC_CHILDREN=y
-CONFIG_PROC_PID_ARCH_STATUS=y
-CONFIG_KERNFS=y
-CONFIG_SYSFS=y
-CONFIG_TMPFS=y
-CONFIG_TMPFS_POSIX_ACL=y
-CONFIG_TMPFS_XATTR=y
-CONFIG_HUGETLBFS=y
-CONFIG_HUGETLB_PAGE=y
-CONFIG_MEMFD_CREATE=y
-CONFIG_ARCH_HAS_GIGANTIC_PAGE=y
-CONFIG_CONFIGFS_FS=y
-CONFIG_EFIVAR_FS=y
-# end of Pseudo filesystems
-
-CONFIG_MISC_FILESYSTEMS=y
-CONFIG_ORANGEFS_FS=m
-# CONFIG_ADFS_FS is not set
-CONFIG_AFFS_FS=m
-CONFIG_ECRYPT_FS=m
-# CONFIG_ECRYPT_FS_MESSAGING is not set
-CONFIG_HFS_FS=m
-CONFIG_HFSPLUS_FS=m
-CONFIG_BEFS_FS=m
-# CONFIG_BEFS_DEBUG is not set
-# CONFIG_BFS_FS is not set
-# CONFIG_EFS_FS is not set
-CONFIG_JFFS2_FS=m
-CONFIG_JFFS2_FS_DEBUG=0
-CONFIG_JFFS2_FS_WRITEBUFFER=y
-# CONFIG_JFFS2_FS_WBUF_VERIFY is not set
-CONFIG_JFFS2_SUMMARY=y
-CONFIG_JFFS2_FS_XATTR=y
-CONFIG_JFFS2_FS_POSIX_ACL=y
-CONFIG_JFFS2_FS_SECURITY=y
-# CONFIG_JFFS2_COMPRESSION_OPTIONS is not set
-CONFIG_JFFS2_ZLIB=y
-CONFIG_JFFS2_RTIME=y
-CONFIG_UBIFS_FS=m
-# CONFIG_UBIFS_FS_ADVANCED_COMPR is not set
-CONFIG_UBIFS_FS_LZO=y
-CONFIG_UBIFS_FS_ZLIB=y
-CONFIG_UBIFS_FS_ZSTD=y
-CONFIG_UBIFS_ATIME_SUPPORT=y
-CONFIG_UBIFS_FS_XATTR=y
-CONFIG_UBIFS_FS_SECURITY=y
-CONFIG_UBIFS_FS_AUTHENTICATION=y
-CONFIG_CRAMFS=m
-CONFIG_CRAMFS_BLOCKDEV=y
-CONFIG_CRAMFS_MTD=y
-CONFIG_SQUASHFS=m
-# CONFIG_SQUASHFS_FILE_CACHE is not set
-CONFIG_SQUASHFS_FILE_DIRECT=y
-# CONFIG_SQUASHFS_DECOMP_SINGLE is not set
-CONFIG_SQUASHFS_DECOMP_MULTI=y
-# CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU is not set
-CONFIG_SQUASHFS_XATTR=y
-CONFIG_SQUASHFS_ZLIB=y
-CONFIG_SQUASHFS_LZ4=y
-CONFIG_SQUASHFS_LZO=y
-CONFIG_SQUASHFS_XZ=y
-CONFIG_SQUASHFS_ZSTD=y
-# CONFIG_SQUASHFS_4K_DEVBLK_SIZE is not set
-# CONFIG_SQUASHFS_EMBEDDED is not set
-CONFIG_SQUASHFS_FRAGMENT_CACHE_SIZE=3
-# CONFIG_VXFS_FS is not set
-CONFIG_MINIX_FS=m
-CONFIG_OMFS_FS=m
-# CONFIG_HPFS_FS is not set
-# CONFIG_QNX4FS_FS is not set
-# CONFIG_QNX6FS_FS is not set
-CONFIG_ROMFS_FS=m
-CONFIG_ROMFS_BACKED_BY_BLOCK=y
-# CONFIG_ROMFS_BACKED_BY_MTD is not set
-# CONFIG_ROMFS_BACKED_BY_BOTH is not set
-CONFIG_ROMFS_ON_BLOCK=y
-CONFIG_PSTORE=y
-CONFIG_PSTORE_DEFLATE_COMPRESS=m
-CONFIG_PSTORE_LZO_COMPRESS=m
-CONFIG_PSTORE_LZ4_COMPRESS=m
-CONFIG_PSTORE_LZ4HC_COMPRESS=m
-# CONFIG_PSTORE_842_COMPRESS is not set
-CONFIG_PSTORE_ZSTD_COMPRESS=y
-CONFIG_PSTORE_COMPRESS=y
-# CONFIG_PSTORE_DEFLATE_COMPRESS_DEFAULT is not set
-# CONFIG_PSTORE_LZO_COMPRESS_DEFAULT is not set
-# CONFIG_PSTORE_LZ4_COMPRESS_DEFAULT is not set
-# CONFIG_PSTORE_LZ4HC_COMPRESS_DEFAULT is not set
-CONFIG_PSTORE_ZSTD_COMPRESS_DEFAULT=y
-CONFIG_PSTORE_COMPRESS_DEFAULT="zstd"
-# CONFIG_PSTORE_CONSOLE is not set
-# CONFIG_PSTORE_PMSG is not set
-# CONFIG_PSTORE_FTRACE is not set
-CONFIG_PSTORE_RAM=y
-# CONFIG_SYSV_FS is not set
-CONFIG_UFS_FS=m
-# CONFIG_UFS_FS_WRITE is not set
-# CONFIG_UFS_DEBUG is not set
-CONFIG_EROFS_FS=m
-# CONFIG_EROFS_FS_DEBUG is not set
-CONFIG_EROFS_FS_XATTR=y
-CONFIG_EROFS_FS_POSIX_ACL=y
-CONFIG_EROFS_FS_SECURITY=y
-CONFIG_EROFS_FS_ZIP=y
-CONFIG_EROFS_FS_CLUSTER_PAGE_LIMIT=2
-CONFIG_NETWORK_FILESYSTEMS=y
-CONFIG_NFS_FS=m
-CONFIG_NFS_V2=m
-CONFIG_NFS_V3=m
-CONFIG_NFS_V3_ACL=y
-CONFIG_NFS_V4=m
-CONFIG_NFS_SWAP=y
-CONFIG_NFS_V4_1=y
-CONFIG_NFS_V4_2=y
-CONFIG_PNFS_FILE_LAYOUT=m
-CONFIG_PNFS_BLOCK=m
-CONFIG_PNFS_FLEXFILE_LAYOUT=m
-CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN="kernel.org"
-CONFIG_NFS_V4_1_MIGRATION=y
-CONFIG_NFS_V4_SECURITY_LABEL=y
-CONFIG_NFS_FSCACHE=y
-# CONFIG_NFS_USE_LEGACY_DNS is not set
-CONFIG_NFS_USE_KERNEL_DNS=y
-CONFIG_NFS_DEBUG=y
-CONFIG_NFSD=m
-CONFIG_NFSD_V2_ACL=y
-CONFIG_NFSD_V3=y
-CONFIG_NFSD_V3_ACL=y
-CONFIG_NFSD_V4=y
-CONFIG_NFSD_PNFS=y
-CONFIG_NFSD_BLOCKLAYOUT=y
-CONFIG_NFSD_SCSILAYOUT=y
-# CONFIG_NFSD_FLEXFILELAYOUT is not set
-CONFIG_NFSD_V4_SECURITY_LABEL=y
-CONFIG_GRACE_PERIOD=m
-CONFIG_LOCKD=m
-CONFIG_LOCKD_V4=y
-CONFIG_NFS_ACL_SUPPORT=m
-CONFIG_NFS_COMMON=y
-CONFIG_SUNRPC=m
-CONFIG_SUNRPC_GSS=m
-CONFIG_SUNRPC_BACKCHANNEL=y
-CONFIG_SUNRPC_SWAP=y
-CONFIG_RPCSEC_GSS_KRB5=m
-CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES=y
-CONFIG_SUNRPC_DEBUG=y
-CONFIG_SUNRPC_XPRT_RDMA=m
-CONFIG_CEPH_FS=m
-CONFIG_CEPH_FSCACHE=y
-CONFIG_CEPH_FS_POSIX_ACL=y
-CONFIG_CEPH_FS_SECURITY_LABEL=y
-CONFIG_CIFS=m
-# CONFIG_CIFS_STATS2 is not set
-CONFIG_CIFS_ALLOW_INSECURE_LEGACY=y
-# CONFIG_CIFS_WEAK_PW_HASH is not set
-CONFIG_CIFS_UPCALL=y
-CONFIG_CIFS_XATTR=y
-CONFIG_CIFS_POSIX=y
-CONFIG_CIFS_DEBUG=y
-# CONFIG_CIFS_DEBUG2 is not set
-# CONFIG_CIFS_DEBUG_DUMP_KEYS is not set
-CONFIG_CIFS_DFS_UPCALL=y
-# CONFIG_CIFS_SMB_DIRECT is not set
-CONFIG_CIFS_FSCACHE=y
-CONFIG_CODA_FS=m
-CONFIG_AFS_FS=m
-# CONFIG_AFS_DEBUG is not set
-CONFIG_AFS_FSCACHE=y
-# CONFIG_AFS_DEBUG_CURSOR is not set
-CONFIG_9P_FS=m
-CONFIG_9P_FSCACHE=y
-CONFIG_9P_FS_POSIX_ACL=y
-CONFIG_9P_FS_SECURITY=y
-CONFIG_NLS=y
-CONFIG_NLS_DEFAULT="utf8"
-CONFIG_NLS_CODEPAGE_437=m
-CONFIG_NLS_CODEPAGE_737=m
-CONFIG_NLS_CODEPAGE_775=m
-CONFIG_NLS_CODEPAGE_850=m
-CONFIG_NLS_CODEPAGE_852=m
-CONFIG_NLS_CODEPAGE_855=m
-CONFIG_NLS_CODEPAGE_857=m
-CONFIG_NLS_CODEPAGE_860=m
-CONFIG_NLS_CODEPAGE_861=m
-CONFIG_NLS_CODEPAGE_862=m
-CONFIG_NLS_CODEPAGE_863=m
-CONFIG_NLS_CODEPAGE_864=m
-CONFIG_NLS_CODEPAGE_865=m
-CONFIG_NLS_CODEPAGE_866=m
-CONFIG_NLS_CODEPAGE_869=m
-CONFIG_NLS_CODEPAGE_936=m
-CONFIG_NLS_CODEPAGE_950=m
-CONFIG_NLS_CODEPAGE_932=m
-CONFIG_NLS_CODEPAGE_949=m
-CONFIG_NLS_CODEPAGE_874=m
-CONFIG_NLS_ISO8859_8=m
-CONFIG_NLS_CODEPAGE_1250=m
-CONFIG_NLS_CODEPAGE_1251=m
-CONFIG_NLS_ASCII=m
-CONFIG_NLS_ISO8859_1=m
-CONFIG_NLS_ISO8859_2=m
-CONFIG_NLS_ISO8859_3=m
-CONFIG_NLS_ISO8859_4=m
-CONFIG_NLS_ISO8859_5=m
-CONFIG_NLS_ISO8859_6=m
-CONFIG_NLS_ISO8859_7=m
-CONFIG_NLS_ISO8859_9=m
-CONFIG_NLS_ISO8859_13=m
-CONFIG_NLS_ISO8859_14=m
-CONFIG_NLS_ISO8859_15=m
-CONFIG_NLS_KOI8_R=m
-CONFIG_NLS_KOI8_U=m
-CONFIG_NLS_MAC_ROMAN=m
-CONFIG_NLS_MAC_CELTIC=m
-CONFIG_NLS_MAC_CENTEURO=m
-CONFIG_NLS_MAC_CROATIAN=m
-CONFIG_NLS_MAC_CYRILLIC=m
-CONFIG_NLS_MAC_GAELIC=m
-CONFIG_NLS_MAC_GREEK=m
-CONFIG_NLS_MAC_ICELAND=m
-CONFIG_NLS_MAC_INUIT=m
-CONFIG_NLS_MAC_ROMANIAN=m
-CONFIG_NLS_MAC_TURKISH=m
-CONFIG_NLS_UTF8=m
-CONFIG_DLM=m
-# CONFIG_DLM_DEBUG is not set
-CONFIG_UNICODE=y
-# CONFIG_UNICODE_NORMALIZATION_SELFTEST is not set
-# end of File systems
-
-#
-# Security options
-#
-CONFIG_KEYS=y
-CONFIG_KEYS_COMPAT=y
-CONFIG_KEYS_REQUEST_CACHE=y
-CONFIG_PERSISTENT_KEYRINGS=y
-CONFIG_BIG_KEYS=y
-CONFIG_TRUSTED_KEYS=m
-CONFIG_ENCRYPTED_KEYS=m
-CONFIG_KEY_DH_OPERATIONS=y
-# CONFIG_SECURITY_DMESG_RESTRICT is not set
-CONFIG_SECURITY=y
-CONFIG_SECURITYFS=y
-CONFIG_SECURITY_NETWORK=y
-CONFIG_PAGE_TABLE_ISOLATION=y
-CONFIG_SECURITY_INFINIBAND=y
-CONFIG_SECURITY_NETWORK_XFRM=y
-CONFIG_SECURITY_PATH=y
-# CONFIG_INTEL_TXT is not set
-CONFIG_LSM_MMAP_MIN_ADDR=65536
-CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR=y
-CONFIG_HARDENED_USERCOPY=y
-CONFIG_HARDENED_USERCOPY_FALLBACK=y
-# CONFIG_HARDENED_USERCOPY_PAGESPAN is not set
-CONFIG_FORTIFY_SOURCE=y
-# CONFIG_STATIC_USERMODEHELPER is not set
-CONFIG_SECURITY_SELINUX=y
-CONFIG_SECURITY_SELINUX_BOOTPARAM=y
-# CONFIG_SECURITY_SELINUX_DISABLE is not set
-CONFIG_SECURITY_SELINUX_DEVELOP=y
-CONFIG_SECURITY_SELINUX_AVC_STATS=y
-CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=0
-CONFIG_SECURITY_SMACK=y
-CONFIG_SECURITY_SMACK_BRINGUP=y
-CONFIG_SECURITY_SMACK_NETFILTER=y
-CONFIG_SECURITY_SMACK_APPEND_SIGNALS=y
-CONFIG_SECURITY_TOMOYO=y
-CONFIG_SECURITY_TOMOYO_MAX_ACCEPT_ENTRY=2048
-CONFIG_SECURITY_TOMOYO_MAX_AUDIT_LOG=1024
-# CONFIG_SECURITY_TOMOYO_OMIT_USERSPACE_LOADER is not set
-CONFIG_SECURITY_TOMOYO_POLICY_LOADER="/sbin/tomoyo-init"
-CONFIG_SECURITY_TOMOYO_ACTIVATION_TRIGGER="/sbin/init"
-# CONFIG_SECURITY_TOMOYO_INSECURE_BUILTIN_SETTING is not set
-CONFIG_SECURITY_APPARMOR=y
-CONFIG_SECURITY_APPARMOR_HASH=y
-CONFIG_SECURITY_APPARMOR_HASH_DEFAULT=y
-# CONFIG_SECURITY_APPARMOR_DEBUG is not set
-# CONFIG_SECURITY_LOADPIN is not set
-CONFIG_SECURITY_YAMA=y
-CONFIG_SECURITY_SAFESETID=y
-CONFIG_SECURITY_LOCKDOWN_LSM=y
-# CONFIG_SECURITY_LOCKDOWN_LSM_EARLY is not set
-CONFIG_LOCK_DOWN_KERNEL_FORCE_NONE=y
-# CONFIG_LOCK_DOWN_KERNEL_FORCE_INTEGRITY is not set
-# CONFIG_LOCK_DOWN_KERNEL_FORCE_CONFIDENTIALITY is not set
-# CONFIG_INTEGRITY is not set
-# CONFIG_DEFAULT_SECURITY_SELINUX is not set
-# CONFIG_DEFAULT_SECURITY_SMACK is not set
-# CONFIG_DEFAULT_SECURITY_TOMOYO is not set
-# CONFIG_DEFAULT_SECURITY_APPARMOR is not set
-CONFIG_DEFAULT_SECURITY_DAC=y
-CONFIG_LSM="lockdown,yama"
-
-#
-# Kernel hardening options
-#
-CONFIG_GCC_PLUGIN_STRUCTLEAK=y
-
-#
-# Memory initialization
-#
-# CONFIG_INIT_STACK_NONE is not set
-# CONFIG_GCC_PLUGIN_STRUCTLEAK_USER is not set
-# CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF is not set
-CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL=y
-# CONFIG_GCC_PLUGIN_STRUCTLEAK_VERBOSE is not set
-# CONFIG_GCC_PLUGIN_STACKLEAK is not set
-CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y
-# CONFIG_INIT_ON_FREE_DEFAULT_ON is not set
-# end of Memory initialization
-# end of Kernel hardening options
-# end of Security options
-
-CONFIG_XOR_BLOCKS=m
-CONFIG_ASYNC_CORE=m
-CONFIG_ASYNC_MEMCPY=m
-CONFIG_ASYNC_XOR=m
-CONFIG_ASYNC_PQ=m
-CONFIG_ASYNC_RAID6_RECOV=m
-CONFIG_CRYPTO=y
-
-#
-# Crypto core or helper
-#
-CONFIG_CRYPTO_ALGAPI=y
-CONFIG_CRYPTO_ALGAPI2=y
-CONFIG_CRYPTO_AEAD=y
-CONFIG_CRYPTO_AEAD2=y
-CONFIG_CRYPTO_BLKCIPHER=y
-CONFIG_CRYPTO_BLKCIPHER2=y
-CONFIG_CRYPTO_HASH=y
-CONFIG_CRYPTO_HASH2=y
-CONFIG_CRYPTO_RNG=y
-CONFIG_CRYPTO_RNG2=y
-CONFIG_CRYPTO_RNG_DEFAULT=y
-CONFIG_CRYPTO_AKCIPHER2=y
-CONFIG_CRYPTO_AKCIPHER=y
-CONFIG_CRYPTO_KPP2=y
-CONFIG_CRYPTO_KPP=y
-CONFIG_CRYPTO_ACOMP2=y
-CONFIG_CRYPTO_MANAGER=y
-CONFIG_CRYPTO_MANAGER2=y
-CONFIG_CRYPTO_USER=m
-CONFIG_CRYPTO_MANAGER_DISABLE_TESTS=y
-CONFIG_CRYPTO_GF128MUL=y
-CONFIG_CRYPTO_NULL=y
-CONFIG_CRYPTO_NULL2=y
-CONFIG_CRYPTO_PCRYPT=m
-CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_AUTHENC=m
-CONFIG_CRYPTO_TEST=m
-CONFIG_CRYPTO_SIMD=m
-CONFIG_CRYPTO_GLUE_HELPER_X86=m
-CONFIG_CRYPTO_ENGINE=m
-
-#
-# Public-key cryptography
-#
-CONFIG_CRYPTO_RSA=y
-CONFIG_CRYPTO_DH=y
-CONFIG_CRYPTO_ECC=m
-CONFIG_CRYPTO_ECDH=m
-CONFIG_CRYPTO_ECRDSA=m
-
-#
-# Authenticated Encryption with Associated Data
-#
-CONFIG_CRYPTO_CCM=m
-CONFIG_CRYPTO_GCM=y
-CONFIG_CRYPTO_CHACHA20POLY1305=m
-CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128_AESNI_SSE2=m
-CONFIG_CRYPTO_SEQIV=y
-CONFIG_CRYPTO_ECHAINIV=m
-
-#
-# Block modes
-#
-CONFIG_CRYPTO_CBC=y
-CONFIG_CRYPTO_CFB=m
-CONFIG_CRYPTO_CTR=y
-CONFIG_CRYPTO_CTS=y
-CONFIG_CRYPTO_ECB=y
-CONFIG_CRYPTO_LRW=m
-CONFIG_CRYPTO_OFB=m
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_XTS=y
-CONFIG_CRYPTO_KEYWRAP=m
-CONFIG_CRYPTO_NHPOLY1305=m
-CONFIG_CRYPTO_NHPOLY1305_SSE2=m
-CONFIG_CRYPTO_NHPOLY1305_AVX2=m
-CONFIG_CRYPTO_ADIANTUM=m
-CONFIG_CRYPTO_ESSIV=m
-
-#
-# Hash modes
-#
-CONFIG_CRYPTO_CMAC=m
-CONFIG_CRYPTO_HMAC=y
-CONFIG_CRYPTO_XCBC=m
-CONFIG_CRYPTO_VMAC=m
-
-#
-# Digest
-#
-CONFIG_CRYPTO_CRC32C=m
-CONFIG_CRYPTO_CRC32C_INTEL=m
-CONFIG_CRYPTO_CRC32=m
-CONFIG_CRYPTO_CRC32_PCLMUL=m
-CONFIG_CRYPTO_XXHASH=m
-CONFIG_CRYPTO_CRCT10DIF=y
-CONFIG_CRYPTO_CRCT10DIF_PCLMUL=m
-CONFIG_CRYPTO_GHASH=y
-CONFIG_CRYPTO_POLY1305=m
-CONFIG_CRYPTO_POLY1305_X86_64=m
-CONFIG_CRYPTO_MD4=m
-CONFIG_CRYPTO_MD5=y
-CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_RMD128=m
-CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_RMD256=m
-CONFIG_CRYPTO_RMD320=m
-CONFIG_CRYPTO_SHA1=y
-CONFIG_CRYPTO_SHA1_SSSE3=m
-CONFIG_CRYPTO_SHA256_SSSE3=m
-CONFIG_CRYPTO_SHA512_SSSE3=m
-CONFIG_CRYPTO_LIB_SHA256=y
-CONFIG_CRYPTO_SHA256=y
-CONFIG_CRYPTO_SHA512=y
-CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_SM3=m
-CONFIG_CRYPTO_STREEBOG=m
-CONFIG_CRYPTO_TGR192=m
-CONFIG_CRYPTO_WP512=m
-CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL=m
-
-#
-# Ciphers
-#
-CONFIG_CRYPTO_LIB_AES=y
-CONFIG_CRYPTO_AES=y
-CONFIG_CRYPTO_AES_TI=m
-CONFIG_CRYPTO_AES_NI_INTEL=m
-CONFIG_CRYPTO_ANUBIS=m
-CONFIG_CRYPTO_LIB_ARC4=m
-CONFIG_CRYPTO_ARC4=m
-CONFIG_CRYPTO_BLOWFISH=m
-CONFIG_CRYPTO_BLOWFISH_COMMON=m
-CONFIG_CRYPTO_BLOWFISH_X86_64=m
-CONFIG_CRYPTO_CAMELLIA=m
-CONFIG_CRYPTO_CAMELLIA_X86_64=m
-CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64=m
-CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64=m
-CONFIG_CRYPTO_CAST_COMMON=m
-CONFIG_CRYPTO_CAST5=m
-CONFIG_CRYPTO_CAST5_AVX_X86_64=m
-CONFIG_CRYPTO_CAST6=m
-CONFIG_CRYPTO_CAST6_AVX_X86_64=m
-CONFIG_CRYPTO_LIB_DES=m
-CONFIG_CRYPTO_DES=m
-CONFIG_CRYPTO_DES3_EDE_X86_64=m
-CONFIG_CRYPTO_FCRYPT=m
-CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SALSA20=m
-CONFIG_CRYPTO_CHACHA20=m
-CONFIG_CRYPTO_CHACHA20_X86_64=m
-CONFIG_CRYPTO_SEED=m
-CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_SERPENT_SSE2_X86_64=m
-CONFIG_CRYPTO_SERPENT_AVX_X86_64=m
-CONFIG_CRYPTO_SERPENT_AVX2_X86_64=m
-CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_TEA=m
-CONFIG_CRYPTO_TWOFISH=m
-CONFIG_CRYPTO_TWOFISH_COMMON=m
-CONFIG_CRYPTO_TWOFISH_X86_64=m
-CONFIG_CRYPTO_TWOFISH_X86_64_3WAY=m
-CONFIG_CRYPTO_TWOFISH_AVX_X86_64=m
-
-#
-# Compression
-#
-CONFIG_CRYPTO_DEFLATE=m
-CONFIG_CRYPTO_LZO=y
-CONFIG_CRYPTO_842=m
-CONFIG_CRYPTO_LZ4=m
-CONFIG_CRYPTO_LZ4HC=m
-CONFIG_CRYPTO_ZSTD=y
-
-#
-# Random Number Generation
-#
-CONFIG_CRYPTO_ANSI_CPRNG=m
-CONFIG_CRYPTO_DRBG_MENU=y
-CONFIG_CRYPTO_DRBG_HMAC=y
-CONFIG_CRYPTO_DRBG_HASH=y
-CONFIG_CRYPTO_DRBG_CTR=y
-CONFIG_CRYPTO_DRBG=y
-CONFIG_CRYPTO_JITTERENTROPY=y
-CONFIG_CRYPTO_USER_API=m
-CONFIG_CRYPTO_USER_API_HASH=m
-CONFIG_CRYPTO_USER_API_SKCIPHER=m
-CONFIG_CRYPTO_USER_API_RNG=m
-CONFIG_CRYPTO_USER_API_AEAD=m
-# CONFIG_CRYPTO_STATS is not set
-CONFIG_CRYPTO_HASH_INFO=y
-CONFIG_CRYPTO_HW=y
-CONFIG_CRYPTO_DEV_PADLOCK=m
-CONFIG_CRYPTO_DEV_PADLOCK_AES=m
-CONFIG_CRYPTO_DEV_PADLOCK_SHA=m
-CONFIG_CRYPTO_DEV_ATMEL_I2C=m
-CONFIG_CRYPTO_DEV_ATMEL_ECC=m
-CONFIG_CRYPTO_DEV_ATMEL_SHA204A=m
-CONFIG_CRYPTO_DEV_CCP=y
-CONFIG_CRYPTO_DEV_CCP_DD=m
-CONFIG_CRYPTO_DEV_SP_CCP=y
-CONFIG_CRYPTO_DEV_CCP_CRYPTO=m
-CONFIG_CRYPTO_DEV_SP_PSP=y
-CONFIG_CRYPTO_DEV_CCP_DEBUGFS=y
-CONFIG_CRYPTO_DEV_QAT=m
-CONFIG_CRYPTO_DEV_QAT_DH895xCC=m
-CONFIG_CRYPTO_DEV_QAT_C3XXX=m
-CONFIG_CRYPTO_DEV_QAT_C62X=m
-CONFIG_CRYPTO_DEV_QAT_DH895xCCVF=m
-CONFIG_CRYPTO_DEV_QAT_C3XXXVF=m
-CONFIG_CRYPTO_DEV_QAT_C62XVF=m
-CONFIG_CRYPTO_DEV_NITROX=m
-CONFIG_CRYPTO_DEV_NITROX_CNN55XX=m
-CONFIG_CRYPTO_DEV_CHELSIO=m
-CONFIG_CHELSIO_IPSEC_INLINE=y
-CONFIG_CRYPTO_DEV_CHELSIO_TLS=m
-CONFIG_CRYPTO_DEV_VIRTIO=m
-CONFIG_CRYPTO_DEV_SAFEXCEL=m
-CONFIG_CRYPTO_DEV_CCREE=m
-CONFIG_ASYMMETRIC_KEY_TYPE=y
-CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y
-CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE=m
-CONFIG_X509_CERTIFICATE_PARSER=y
-CONFIG_PKCS8_PRIVATE_KEY_PARSER=m
-CONFIG_TPM_KEY_PARSER=m
-CONFIG_PKCS7_MESSAGE_PARSER=y
-# CONFIG_PKCS7_TEST_KEY is not set
-CONFIG_SIGNED_PE_FILE_VERIFICATION=y
-
-#
-# Certificates for signature checking
-#
-CONFIG_MODULE_SIG_KEY="certs/signing_key.pem"
-CONFIG_SYSTEM_TRUSTED_KEYRING=y
-CONFIG_SYSTEM_TRUSTED_KEYS=""
-# CONFIG_SYSTEM_EXTRA_CERTIFICATE is not set
-CONFIG_SECONDARY_TRUSTED_KEYRING=y
-CONFIG_SYSTEM_BLACKLIST_KEYRING=y
-CONFIG_SYSTEM_BLACKLIST_HASH_LIST=""
-# end of Certificates for signature checking
-
-CONFIG_BINARY_PRINTF=y
-
-#
-# Library routines
-#
-CONFIG_RAID6_PQ=m
-CONFIG_RAID6_PQ_BENCHMARK=y
-CONFIG_PACKING=y
-CONFIG_BITREVERSE=y
-CONFIG_GENERIC_STRNCPY_FROM_USER=y
-CONFIG_GENERIC_STRNLEN_USER=y
-CONFIG_GENERIC_NET_UTILS=y
-CONFIG_GENERIC_FIND_FIRST_BIT=y
-CONFIG_CORDIC=m
-CONFIG_RATIONAL=y
-CONFIG_GENERIC_PCI_IOMAP=y
-CONFIG_GENERIC_IOMAP=y
-CONFIG_ARCH_USE_CMPXCHG_LOCKREF=y
-CONFIG_ARCH_HAS_FAST_MULTIPLIER=y
-CONFIG_CRC_CCITT=y
-CONFIG_CRC16=m
-CONFIG_CRC_T10DIF=y
-CONFIG_CRC_ITU_T=m
-CONFIG_CRC32=y
-# CONFIG_CRC32_SELFTEST is not set
-CONFIG_CRC32_SLICEBY8=y
-# CONFIG_CRC32_SLICEBY4 is not set
-# CONFIG_CRC32_SARWATE is not set
-# CONFIG_CRC32_BIT is not set
-CONFIG_CRC64=m
-CONFIG_CRC4=m
-CONFIG_CRC7=m
-CONFIG_LIBCRC32C=m
-CONFIG_CRC8=m
-CONFIG_XXHASH=y
-# CONFIG_RANDOM32_SELFTEST is not set
-CONFIG_842_COMPRESS=m
-CONFIG_842_DECOMPRESS=m
-CONFIG_ZLIB_INFLATE=y
-CONFIG_ZLIB_DEFLATE=y
-CONFIG_LZO_COMPRESS=y
-CONFIG_LZO_DECOMPRESS=y
-CONFIG_LZ4_COMPRESS=m
-CONFIG_LZ4HC_COMPRESS=m
-CONFIG_LZ4_DECOMPRESS=y
-CONFIG_ZSTD_COMPRESS=y
-CONFIG_ZSTD_DECOMPRESS=y
-CONFIG_XZ_DEC=y
-CONFIG_XZ_DEC_X86=y
-CONFIG_XZ_DEC_POWERPC=y
-CONFIG_XZ_DEC_IA64=y
-CONFIG_XZ_DEC_ARM=y
-CONFIG_XZ_DEC_ARMTHUMB=y
-CONFIG_XZ_DEC_SPARC=y
-CONFIG_XZ_DEC_BCJ=y
-# CONFIG_XZ_DEC_TEST is not set
-CONFIG_DECOMPRESS_GZIP=y
-CONFIG_DECOMPRESS_BZIP2=y
-CONFIG_DECOMPRESS_LZMA=y
-CONFIG_DECOMPRESS_XZ=y
-CONFIG_DECOMPRESS_LZO=y
-CONFIG_DECOMPRESS_LZ4=y
-CONFIG_GENERIC_ALLOCATOR=y
-CONFIG_REED_SOLOMON=y
-CONFIG_REED_SOLOMON_ENC8=y
-CONFIG_REED_SOLOMON_DEC8=y
-CONFIG_REED_SOLOMON_DEC16=y
-CONFIG_BCH=m
-CONFIG_TEXTSEARCH=y
-CONFIG_TEXTSEARCH_KMP=m
-CONFIG_TEXTSEARCH_BM=m
-CONFIG_TEXTSEARCH_FSM=m
-CONFIG_BTREE=y
-CONFIG_INTERVAL_TREE=y
-CONFIG_XARRAY_MULTI=y
-CONFIG_ASSOCIATIVE_ARRAY=y
-CONFIG_HAS_IOMEM=y
-CONFIG_HAS_IOPORT_MAP=y
-CONFIG_HAS_DMA=y
-CONFIG_NEED_SG_DMA_LENGTH=y
-CONFIG_NEED_DMA_MAP_STATE=y
-CONFIG_ARCH_DMA_ADDR_T_64BIT=y
-CONFIG_ARCH_HAS_FORCE_DMA_UNENCRYPTED=y
-CONFIG_DMA_VIRT_OPS=y
-CONFIG_SWIOTLB=y
-# CONFIG_DMA_API_DEBUG is not set
-CONFIG_SGL_ALLOC=y
-CONFIG_IOMMU_HELPER=y
-CONFIG_CHECK_SIGNATURE=y
-CONFIG_CPU_RMAP=y
-CONFIG_DQL=y
-CONFIG_GLOB=y
-# CONFIG_GLOB_SELFTEST is not set
-CONFIG_NLATTR=y
-CONFIG_LRU_CACHE=m
-CONFIG_CLZ_TAB=y
-CONFIG_IRQ_POLL=y
-CONFIG_MPILIB=y
-CONFIG_DIMLIB=y
-CONFIG_LIBFDT=y
-CONFIG_OID_REGISTRY=y
-CONFIG_UCS2_STRING=y
-CONFIG_HAVE_GENERIC_VDSO=y
-CONFIG_GENERIC_GETTIMEOFDAY=y
-CONFIG_FONT_SUPPORT=y
-CONFIG_FONTS=y
-# CONFIG_FONT_8x8 is not set
-CONFIG_FONT_8x16=y
-# CONFIG_FONT_6x11 is not set
-# CONFIG_FONT_7x14 is not set
-# CONFIG_FONT_PEARL_8x8 is not set
-# CONFIG_FONT_ACORN_8x8 is not set
-# CONFIG_FONT_MINI_4x6 is not set
-# CONFIG_FONT_6x10 is not set
-# CONFIG_FONT_10x18 is not set
-# CONFIG_FONT_SUN8x16 is not set
-# CONFIG_FONT_SUN12x22 is not set
-# CONFIG_FONT_TER16x32 is not set
-CONFIG_FONT_AUTOSELECT=y
-CONFIG_SG_POOL=y
-CONFIG_ARCH_HAS_PMEM_API=y
-CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE=y
-CONFIG_ARCH_HAS_UACCESS_MCSAFE=y
-CONFIG_ARCH_STACKWALK=y
-CONFIG_SBITMAP=y
-CONFIG_PARMAN=m
-CONFIG_OBJAGG=m
-# CONFIG_STRING_SELFTEST is not set
-# end of Library routines
-
-#
-# Kernel hacking
-#
-
-#
-# printk and dmesg options
-#
-CONFIG_PRINTK_TIME=y
-# CONFIG_PRINTK_CALLER is not set
-CONFIG_CONSOLE_LOGLEVEL_DEFAULT=4
-CONFIG_CONSOLE_LOGLEVEL_QUIET=1
-CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4
-# CONFIG_BOOT_PRINTK_DELAY is not set
-CONFIG_DYNAMIC_DEBUG=y
-# end of printk and dmesg options
-
-#
-# Compile-time checks and compiler options
-#
-# CONFIG_DEBUG_INFO is not set
-# CONFIG_ENABLE_MUST_CHECK is not set
-CONFIG_FRAME_WARN=2048
-CONFIG_STRIP_ASM_SYMS=y
-# CONFIG_READABLE_ASM is not set
-CONFIG_DEBUG_FS=y
-# CONFIG_HEADERS_INSTALL is not set
-CONFIG_OPTIMIZE_INLINING=y
-# CONFIG_DEBUG_SECTION_MISMATCH is not set
-CONFIG_SECTION_MISMATCH_WARN_ONLY=y
-CONFIG_STACK_VALIDATION=y
-# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set
-# end of Compile-time checks and compiler options
-
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE=0x0
-CONFIG_MAGIC_SYSRQ_SERIAL=y
-CONFIG_DEBUG_KERNEL=y
-CONFIG_DEBUG_MISC=y
-
-#
-# Memory Debugging
-#
-# CONFIG_PAGE_EXTENSION is not set
-# CONFIG_DEBUG_PAGEALLOC is not set
-# CONFIG_PAGE_OWNER is not set
-# CONFIG_PAGE_POISONING is not set
-# CONFIG_DEBUG_PAGE_REF is not set
-# CONFIG_DEBUG_RODATA_TEST is not set
-# CONFIG_DEBUG_OBJECTS is not set
-# CONFIG_SLUB_DEBUG_ON is not set
-# CONFIG_SLUB_STATS is not set
-CONFIG_HAVE_DEBUG_KMEMLEAK=y
-# CONFIG_DEBUG_KMEMLEAK is not set
-# CONFIG_DEBUG_STACK_USAGE is not set
-# CONFIG_DEBUG_VM is not set
-CONFIG_ARCH_HAS_DEBUG_VIRTUAL=y
-# CONFIG_DEBUG_VIRTUAL is not set
-CONFIG_DEBUG_MEMORY_INIT=y
-# CONFIG_DEBUG_PER_CPU_MAPS is not set
-CONFIG_HAVE_ARCH_KASAN=y
-CONFIG_CC_HAS_KASAN_GENERIC=y
-# CONFIG_KASAN is not set
-CONFIG_KASAN_STACK=1
-# end of Memory Debugging
-
-CONFIG_ARCH_HAS_KCOV=y
-CONFIG_CC_HAS_SANCOV_TRACE_PC=y
-# CONFIG_KCOV is not set
-# CONFIG_DEBUG_SHIRQ is not set
-
-#
-# Debug Lockups and Hangs
-#
-CONFIG_LOCKUP_DETECTOR=y
-CONFIG_SOFTLOCKUP_DETECTOR=y
-# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
-CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
-CONFIG_HARDLOCKUP_DETECTOR_PERF=y
-CONFIG_HARDLOCKUP_CHECK_TIMESTAMP=y
-CONFIG_HARDLOCKUP_DETECTOR=y
-# CONFIG_BOOTPARAM_HARDLOCKUP_PANIC is not set
-CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE=0
-CONFIG_DETECT_HUNG_TASK=y
-CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=120
-# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
-CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
-# CONFIG_WQ_WATCHDOG is not set
-# end of Debug Lockups and Hangs
-
-# CONFIG_PANIC_ON_OOPS is not set
-CONFIG_PANIC_ON_OOPS_VALUE=0
-CONFIG_PANIC_TIMEOUT=0
-CONFIG_SCHED_DEBUG=y
-CONFIG_SCHED_INFO=y
-CONFIG_SCHEDSTATS=y
-CONFIG_SCHED_STACK_END_CHECK=y
-# CONFIG_DEBUG_TIMEKEEPING is not set
-
-#
-# Lock Debugging (spinlocks, mutexes, etc...)
-#
-CONFIG_LOCK_DEBUGGING_SUPPORT=y
-# CONFIG_PROVE_LOCKING is not set
-# CONFIG_LOCK_STAT is not set
-# CONFIG_DEBUG_RT_MUTEXES is not set
-# CONFIG_DEBUG_SPINLOCK is not set
-# CONFIG_DEBUG_MUTEXES is not set
-# CONFIG_DEBUG_WW_MUTEX_SLOWPATH is not set
-# CONFIG_DEBUG_RWSEMS is not set
-# CONFIG_DEBUG_LOCK_ALLOC is not set
-# CONFIG_DEBUG_ATOMIC_SLEEP is not set
-# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
-# CONFIG_LOCK_TORTURE_TEST is not set
-# CONFIG_WW_MUTEX_SELFTEST is not set
-# end of Lock Debugging (spinlocks, mutexes, etc...)
-
-CONFIG_STACKTRACE=y
-# CONFIG_WARN_ALL_UNSEEDED_RANDOM is not set
-# CONFIG_DEBUG_KOBJECT is not set
-CONFIG_DEBUG_BUGVERBOSE=y
-# CONFIG_DEBUG_LIST is not set
-# CONFIG_DEBUG_PLIST is not set
-# CONFIG_DEBUG_SG is not set
-# CONFIG_DEBUG_NOTIFIERS is not set
-# CONFIG_DEBUG_CREDENTIALS is not set
-
-#
-# RCU Debugging
-#
-# CONFIG_RCU_PERF_TEST is not set
-# CONFIG_RCU_TORTURE_TEST is not set
-CONFIG_RCU_CPU_STALL_TIMEOUT=60
-# CONFIG_RCU_TRACE is not set
-# CONFIG_RCU_EQS_DEBUG is not set
-# end of RCU Debugging
-
-# CONFIG_DEBUG_WQ_FORCE_RR_CPU is not set
-# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
-# CONFIG_CPU_HOTPLUG_STATE_CONTROL is not set
-# CONFIG_NOTIFIER_ERROR_INJECTION is not set
-CONFIG_FUNCTION_ERROR_INJECTION=y
-# CONFIG_FAULT_INJECTION is not set
-CONFIG_LATENCYTOP=y
-CONFIG_USER_STACKTRACE_SUPPORT=y
-CONFIG_NOP_TRACER=y
-CONFIG_HAVE_FUNCTION_TRACER=y
-CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
-CONFIG_HAVE_DYNAMIC_FTRACE=y
-CONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS=y
-CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
-CONFIG_HAVE_SYSCALL_TRACEPOINTS=y
-CONFIG_HAVE_FENTRY=y
-CONFIG_HAVE_C_RECORDMCOUNT=y
-CONFIG_TRACER_MAX_TRACE=y
-CONFIG_TRACE_CLOCK=y
-CONFIG_RING_BUFFER=y
-CONFIG_EVENT_TRACING=y
-CONFIG_CONTEXT_SWITCH_TRACER=y
-CONFIG_RING_BUFFER_ALLOW_SWAP=y
-CONFIG_TRACING=y
-CONFIG_GENERIC_TRACER=y
-CONFIG_TRACING_SUPPORT=y
-CONFIG_FTRACE=y
-CONFIG_FUNCTION_TRACER=y
-CONFIG_FUNCTION_GRAPH_TRACER=y
-# CONFIG_PREEMPTIRQ_EVENTS is not set
-# CONFIG_IRQSOFF_TRACER is not set
-CONFIG_SCHED_TRACER=y
-CONFIG_HWLAT_TRACER=y
-CONFIG_FTRACE_SYSCALLS=y
-CONFIG_TRACER_SNAPSHOT=y
-# CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP is not set
-CONFIG_BRANCH_PROFILE_NONE=y
-# CONFIG_PROFILE_ANNOTATED_BRANCHES is not set
-CONFIG_STACK_TRACER=y
-CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_KPROBE_EVENTS=y
-# CONFIG_KPROBE_EVENTS_ON_NOTRACE is not set
-CONFIG_UPROBE_EVENTS=y
-CONFIG_BPF_EVENTS=y
-CONFIG_DYNAMIC_EVENTS=y
-CONFIG_PROBE_EVENTS=y
-CONFIG_DYNAMIC_FTRACE=y
-CONFIG_DYNAMIC_FTRACE_WITH_REGS=y
-CONFIG_FUNCTION_PROFILER=y
-CONFIG_BPF_KPROBE_OVERRIDE=y
-CONFIG_FTRACE_MCOUNT_RECORD=y
-# CONFIG_FTRACE_STARTUP_TEST is not set
-CONFIG_MMIOTRACE=y
-CONFIG_TRACING_MAP=y
-CONFIG_HIST_TRIGGERS=y
-# CONFIG_MMIOTRACE_TEST is not set
-# CONFIG_TRACEPOINT_BENCHMARK is not set
-# CONFIG_RING_BUFFER_BENCHMARK is not set
-# CONFIG_RING_BUFFER_STARTUP_TEST is not set
-# CONFIG_PREEMPTIRQ_DELAY_TEST is not set
-# CONFIG_TRACE_EVAL_MAP_FILE is not set
-# CONFIG_PROVIDE_OHCI1394_DMA_INIT is not set
-CONFIG_RUNTIME_TESTING_MENU=y
-CONFIG_LKDTM=m
-# CONFIG_TEST_LIST_SORT is not set
-# CONFIG_TEST_SORT is not set
-# CONFIG_KPROBES_SANITY_TEST is not set
-# CONFIG_BACKTRACE_SELF_TEST is not set
-# CONFIG_RBTREE_TEST is not set
-# CONFIG_REED_SOLOMON_TEST is not set
-# CONFIG_INTERVAL_TREE_TEST is not set
-# CONFIG_PERCPU_TEST is not set
-# CONFIG_ATOMIC64_SELFTEST is not set
-# CONFIG_ASYNC_RAID6_TEST is not set
-# CONFIG_TEST_HEXDUMP is not set
-# CONFIG_TEST_STRING_HELPERS is not set
-# CONFIG_TEST_STRSCPY is not set
-# CONFIG_TEST_KSTRTOX is not set
-# CONFIG_TEST_PRINTF is not set
-# CONFIG_TEST_BITMAP is not set
-# CONFIG_TEST_BITFIELD is not set
-# CONFIG_TEST_UUID is not set
-# CONFIG_TEST_XARRAY is not set
-# CONFIG_TEST_OVERFLOW is not set
-# CONFIG_TEST_RHASHTABLE is not set
-# CONFIG_TEST_HASH is not set
-# CONFIG_TEST_IDA is not set
-# CONFIG_TEST_PARMAN is not set
-# CONFIG_TEST_LKM is not set
-# CONFIG_TEST_VMALLOC is not set
-# CONFIG_TEST_USER_COPY is not set
-# CONFIG_TEST_BPF is not set
-# CONFIG_TEST_BLACKHOLE_DEV is not set
-# CONFIG_FIND_BIT_BENCHMARK is not set
-# CONFIG_TEST_FIRMWARE is not set
-# CONFIG_TEST_SYSCTL is not set
-# CONFIG_TEST_UDELAY is not set
-# CONFIG_TEST_STATIC_KEYS is not set
-# CONFIG_TEST_KMOD is not set
-# CONFIG_TEST_MEMCAT_P is not set
-# CONFIG_TEST_OBJAGG is not set
-# CONFIG_TEST_STACKINIT is not set
-# CONFIG_TEST_MEMINIT is not set
-# CONFIG_MEMTEST is not set
-# CONFIG_BUG_ON_DATA_CORRUPTION is not set
-# CONFIG_SAMPLES is not set
-CONFIG_HAVE_ARCH_KGDB=y
-# CONFIG_KGDB is not set
-CONFIG_ARCH_HAS_UBSAN_SANITIZE_ALL=y
-# CONFIG_UBSAN is not set
-CONFIG_UBSAN_ALIGNMENT=y
-CONFIG_ARCH_HAS_DEVMEM_IS_ALLOWED=y
-CONFIG_STRICT_DEVMEM=y
-CONFIG_IO_STRICT_DEVMEM=y
-CONFIG_TRACE_IRQFLAGS_SUPPORT=y
-# CONFIG_X86_VERBOSE_BOOTUP is not set
-CONFIG_EARLY_PRINTK=y
-# CONFIG_EARLY_PRINTK_DBGP is not set
-# CONFIG_EARLY_PRINTK_USB_XDBC is not set
-CONFIG_X86_PTDUMP_CORE=y
-# CONFIG_X86_PTDUMP is not set
-# CONFIG_EFI_PGT_DUMP is not set
-CONFIG_DEBUG_WX=y
-CONFIG_DOUBLEFAULT=y
-# CONFIG_DEBUG_TLBFLUSH is not set
-# CONFIG_IOMMU_DEBUG is not set
-CONFIG_HAVE_MMIOTRACE_SUPPORT=y
-# CONFIG_X86_DECODER_SELFTEST is not set
-CONFIG_IO_DELAY_0X80=y
-# CONFIG_IO_DELAY_0XED is not set
-# CONFIG_IO_DELAY_UDELAY is not set
-# CONFIG_IO_DELAY_NONE is not set
-CONFIG_DEBUG_BOOT_PARAMS=y
-# CONFIG_CPA_DEBUG is not set
-# CONFIG_DEBUG_ENTRY is not set
-# CONFIG_DEBUG_NMI_SELFTEST is not set
-# CONFIG_X86_DEBUG_FPU is not set
-# CONFIG_PUNIT_ATOM_DEBUG is not set
-CONFIG_UNWINDER_ORC=y
-# CONFIG_UNWINDER_FRAME_POINTER is not set
-# CONFIG_UNWINDER_GUESS is not set
-# end of Kernel hacking
diff --git a/linux54-tkg/linux54-tkg-config/config_hardened.x86_64 b/linux54-tkg/linux54-tkg-config/config_hardened.x86_64
deleted file mode 100644
index ac94c74..0000000
--- a/linux54-tkg/linux54-tkg-config/config_hardened.x86_64
+++ /dev/null
@@ -1,10527 +0,0 @@
-#
-# Automatically generated file; DO NOT EDIT.
-# Linux/x86 5.4.0-rc8 Kernel Configuration
-#
-
-#
-# Compiler: gcc (GCC) 9.2.0
-#
-CONFIG_CC_IS_GCC=y
-CONFIG_GCC_VERSION=90200
-CONFIG_CLANG_VERSION=0
-CONFIG_CC_CAN_LINK=y
-CONFIG_CC_HAS_ASM_GOTO=y
-CONFIG_CC_HAS_ASM_INLINE=y
-CONFIG_CC_HAS_WARN_MAYBE_UNINITIALIZED=y
-CONFIG_IRQ_WORK=y
-CONFIG_BUILDTIME_EXTABLE_SORT=y
-CONFIG_THREAD_INFO_IN_TASK=y
-
-#
-# General setup
-#
-CONFIG_INIT_ENV_ARG_LIMIT=32
-# CONFIG_COMPILE_TEST is not set
-# CONFIG_HEADER_TEST is not set
-CONFIG_LOCALVERSION=""
-CONFIG_LOCALVERSION_AUTO=y
-CONFIG_BUILD_SALT=""
-CONFIG_HAVE_KERNEL_GZIP=y
-CONFIG_HAVE_KERNEL_BZIP2=y
-CONFIG_HAVE_KERNEL_LZMA=y
-CONFIG_HAVE_KERNEL_XZ=y
-CONFIG_HAVE_KERNEL_LZO=y
-CONFIG_HAVE_KERNEL_LZ4=y
-# CONFIG_KERNEL_GZIP is not set
-# CONFIG_KERNEL_BZIP2 is not set
-# CONFIG_KERNEL_LZMA is not set
-CONFIG_KERNEL_XZ=y
-# CONFIG_KERNEL_LZO is not set
-# CONFIG_KERNEL_LZ4 is not set
-CONFIG_DEFAULT_HOSTNAME="archlinux"
-CONFIG_SWAP=y
-CONFIG_SYSVIPC=y
-CONFIG_SYSVIPC_SYSCTL=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_POSIX_MQUEUE_SYSCTL=y
-CONFIG_CROSS_MEMORY_ATTACH=y
-# CONFIG_USELIB is not set
-CONFIG_AUDIT=y
-CONFIG_HAVE_ARCH_AUDITSYSCALL=y
-CONFIG_AUDITSYSCALL=y
-
-#
-# IRQ subsystem
-#
-CONFIG_GENERIC_IRQ_PROBE=y
-CONFIG_GENERIC_IRQ_SHOW=y
-CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK=y
-CONFIG_GENERIC_PENDING_IRQ=y
-CONFIG_GENERIC_IRQ_MIGRATION=y
-CONFIG_GENERIC_IRQ_CHIP=y
-CONFIG_IRQ_DOMAIN=y
-CONFIG_IRQ_SIM=y
-CONFIG_IRQ_DOMAIN_HIERARCHY=y
-CONFIG_GENERIC_MSI_IRQ=y
-CONFIG_GENERIC_MSI_IRQ_DOMAIN=y
-CONFIG_GENERIC_IRQ_MATRIX_ALLOCATOR=y
-CONFIG_GENERIC_IRQ_RESERVATION_MODE=y
-CONFIG_IRQ_FORCED_THREADING=y
-CONFIG_SPARSE_IRQ=y
-# CONFIG_GENERIC_IRQ_DEBUGFS is not set
-# end of IRQ subsystem
-
-CONFIG_CLOCKSOURCE_WATCHDOG=y
-CONFIG_ARCH_CLOCKSOURCE_DATA=y
-CONFIG_ARCH_CLOCKSOURCE_INIT=y
-CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE=y
-CONFIG_GENERIC_TIME_VSYSCALL=y
-CONFIG_GENERIC_CLOCKEVENTS=y
-CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y
-CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST=y
-CONFIG_GENERIC_CMOS_UPDATE=y
-
-#
-# Timers subsystem
-#
-CONFIG_TICK_ONESHOT=y
-CONFIG_NO_HZ_COMMON=y
-# CONFIG_HZ_PERIODIC is not set
-CONFIG_NO_HZ_IDLE=y
-# CONFIG_NO_HZ_FULL is not set
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-# end of Timers subsystem
-
-# CONFIG_PREEMPT_NONE is not set
-# CONFIG_PREEMPT_VOLUNTARY is not set
-CONFIG_PREEMPT=y
-CONFIG_PREEMPT_COUNT=y
-CONFIG_PREEMPTION=y
-
-#
-# CPU/Task time and stats accounting
-#
-CONFIG_TICK_CPU_ACCOUNTING=y
-# CONFIG_VIRT_CPU_ACCOUNTING_GEN is not set
-CONFIG_IRQ_TIME_ACCOUNTING=y
-CONFIG_HAVE_SCHED_AVG_IRQ=y
-CONFIG_BSD_PROCESS_ACCT=y
-CONFIG_BSD_PROCESS_ACCT_V3=y
-CONFIG_TASKSTATS=y
-CONFIG_TASK_DELAY_ACCT=y
-CONFIG_TASK_XACCT=y
-CONFIG_TASK_IO_ACCOUNTING=y
-CONFIG_PSI=y
-# CONFIG_PSI_DEFAULT_DISABLED is not set
-# end of CPU/Task time and stats accounting
-
-CONFIG_CPU_ISOLATION=y
-
-#
-# RCU Subsystem
-#
-CONFIG_PREEMPT_RCU=y
-CONFIG_RCU_EXPERT=y
-CONFIG_SRCU=y
-CONFIG_TREE_SRCU=y
-CONFIG_TASKS_RCU=y
-CONFIG_RCU_STALL_COMMON=y
-CONFIG_RCU_NEED_SEGCBLIST=y
-CONFIG_RCU_FANOUT=64
-CONFIG_RCU_FANOUT_LEAF=16
-CONFIG_RCU_FAST_NO_HZ=y
-CONFIG_RCU_BOOST=y
-CONFIG_RCU_BOOST_DELAY=500
-# CONFIG_RCU_NOCB_CPU is not set
-# end of RCU Subsystem
-
-CONFIG_BUILD_BIN2C=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-# CONFIG_IKHEADERS is not set
-CONFIG_LOG_BUF_SHIFT=17
-CONFIG_LOG_CPU_MAX_BUF_SHIFT=12
-CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=13
-CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y
-
-#
-# Scheduler features
-#
-CONFIG_UCLAMP_TASK=y
-CONFIG_UCLAMP_BUCKETS_COUNT=5
-# end of Scheduler features
-
-CONFIG_ARCH_SUPPORTS_NUMA_BALANCING=y
-CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH=y
-CONFIG_ARCH_SUPPORTS_INT128=y
-CONFIG_NUMA_BALANCING=y
-CONFIG_NUMA_BALANCING_DEFAULT_ENABLED=y
-CONFIG_CGROUPS=y
-CONFIG_PAGE_COUNTER=y
-CONFIG_MEMCG=y
-CONFIG_MEMCG_SWAP=y
-CONFIG_MEMCG_SWAP_ENABLED=y
-CONFIG_MEMCG_KMEM=y
-CONFIG_BLK_CGROUP=y
-CONFIG_CGROUP_WRITEBACK=y
-CONFIG_CGROUP_SCHED=y
-CONFIG_FAIR_GROUP_SCHED=y
-CONFIG_CFS_BANDWIDTH=y
-# CONFIG_RT_GROUP_SCHED is not set
-# CONFIG_UCLAMP_TASK_GROUP is not set
-CONFIG_CGROUP_PIDS=y
-CONFIG_CGROUP_RDMA=y
-CONFIG_CGROUP_FREEZER=y
-CONFIG_CGROUP_HUGETLB=y
-CONFIG_CPUSETS=y
-CONFIG_PROC_PID_CPUSET=y
-CONFIG_CGROUP_DEVICE=y
-CONFIG_CGROUP_CPUACCT=y
-CONFIG_CGROUP_PERF=y
-CONFIG_CGROUP_BPF=y
-# CONFIG_CGROUP_DEBUG is not set
-CONFIG_SOCK_CGROUP_DATA=y
-CONFIG_NAMESPACES=y
-CONFIG_UTS_NS=y
-CONFIG_IPC_NS=y
-CONFIG_USER_NS=y
-# CONFIG_USER_NS_UNPRIVILEGED is not set
-CONFIG_PID_NS=y
-CONFIG_NET_NS=y
-# CONFIG_CHECKPOINT_RESTORE is not set
-CONFIG_SCHED_AUTOGROUP=y
-# CONFIG_SYSFS_DEPRECATED is not set
-CONFIG_RELAY=y
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE=""
-CONFIG_RD_GZIP=y
-CONFIG_RD_BZIP2=y
-CONFIG_RD_LZMA=y
-CONFIG_RD_XZ=y
-CONFIG_RD_LZO=y
-CONFIG_RD_LZ4=y
-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-CONFIG_SYSCTL=y
-CONFIG_HAVE_UID16=y
-CONFIG_SYSCTL_EXCEPTION_TRACE=y
-CONFIG_HAVE_PCSPKR_PLATFORM=y
-CONFIG_BPF=y
-CONFIG_EXPERT=y
-# CONFIG_UID16 is not set
-CONFIG_MULTIUSER=y
-CONFIG_SGETMASK_SYSCALL=y
-# CONFIG_SYSFS_SYSCALL is not set
-# CONFIG_SYSCTL_SYSCALL is not set
-CONFIG_FHANDLE=y
-CONFIG_POSIX_TIMERS=y
-CONFIG_PRINTK=y
-CONFIG_PRINTK_NMI=y
-CONFIG_BUG=y
-CONFIG_ELF_CORE=y
-CONFIG_PCSPKR_PLATFORM=y
-CONFIG_BASE_FULL=y
-CONFIG_FUTEX=y
-CONFIG_FUTEX_PI=y
-CONFIG_EPOLL=y
-CONFIG_SIGNALFD=y
-CONFIG_TIMERFD=y
-CONFIG_EVENTFD=y
-CONFIG_SHMEM=y
-CONFIG_AIO=y
-CONFIG_IO_URING=y
-CONFIG_ADVISE_SYSCALLS=y
-CONFIG_MEMBARRIER=y
-CONFIG_KALLSYMS=y
-CONFIG_KALLSYMS_ALL=y
-CONFIG_KALLSYMS_ABSOLUTE_PERCPU=y
-CONFIG_KALLSYMS_BASE_RELATIVE=y
-CONFIG_BPF_SYSCALL=y
-CONFIG_BPF_JIT_ALWAYS_ON=y
-# CONFIG_USERFAULTFD is not set
-CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE=y
-CONFIG_RSEQ=y
-# CONFIG_DEBUG_RSEQ is not set
-# CONFIG_EMBEDDED is not set
-CONFIG_HAVE_PERF_EVENTS=y
-# CONFIG_PC104 is not set
-
-#
-# Kernel Performance Events And Counters
-#
-CONFIG_PERF_EVENTS=y
-# CONFIG_DEBUG_PERF_USE_VMALLOC is not set
-# end of Kernel Performance Events And Counters
-
-CONFIG_VM_EVENT_COUNTERS=y
-CONFIG_SLUB_DEBUG=y
-# CONFIG_SLUB_MEMCG_SYSFS_ON is not set
-# CONFIG_COMPAT_BRK is not set
-# CONFIG_SLAB is not set
-CONFIG_SLUB=y
-# CONFIG_SLOB is not set
-# CONFIG_SLAB_MERGE_DEFAULT is not set
-CONFIG_SLAB_FREELIST_RANDOM=y
-CONFIG_SLAB_FREELIST_HARDENED=y
-CONFIG_SLAB_CANARY=y
-CONFIG_SHUFFLE_PAGE_ALLOCATOR=y
-CONFIG_SLUB_CPU_PARTIAL=y
-CONFIG_SYSTEM_DATA_VERIFICATION=y
-CONFIG_PROFILING=y
-CONFIG_TRACEPOINTS=y
-# end of General setup
-
-CONFIG_64BIT=y
-CONFIG_X86_64=y
-CONFIG_X86=y
-CONFIG_INSTRUCTION_DECODER=y
-CONFIG_OUTPUT_FORMAT="elf64-x86-64"
-CONFIG_ARCH_DEFCONFIG="arch/x86/configs/x86_64_defconfig"
-CONFIG_LOCKDEP_SUPPORT=y
-CONFIG_STACKTRACE_SUPPORT=y
-CONFIG_MMU=y
-CONFIG_ARCH_MMAP_RND_BITS_MIN=28
-CONFIG_ARCH_MMAP_RND_BITS_MAX=32
-CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN=8
-CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX=16
-CONFIG_GENERIC_ISA_DMA=y
-CONFIG_GENERIC_BUG=y
-CONFIG_GENERIC_BUG_RELATIVE_POINTERS=y
-CONFIG_ARCH_MAY_HAVE_PC_FDC=y
-CONFIG_GENERIC_CALIBRATE_DELAY=y
-CONFIG_ARCH_HAS_CPU_RELAX=y
-CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
-CONFIG_ARCH_HAS_FILTER_PGPROT=y
-CONFIG_HAVE_SETUP_PER_CPU_AREA=y
-CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y
-CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y
-CONFIG_ARCH_HIBERNATION_POSSIBLE=y
-CONFIG_ARCH_SUSPEND_POSSIBLE=y
-CONFIG_ARCH_WANT_GENERAL_HUGETLB=y
-CONFIG_ZONE_DMA32=y
-CONFIG_AUDIT_ARCH=y
-CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y
-CONFIG_HAVE_INTEL_TXT=y
-CONFIG_X86_64_SMP=y
-CONFIG_ARCH_SUPPORTS_UPROBES=y
-CONFIG_FIX_EARLYCON_MEM=y
-CONFIG_DYNAMIC_PHYSICAL_MASK=y
-CONFIG_PGTABLE_LEVELS=4
-CONFIG_CC_HAS_SANE_STACKPROTECTOR=y
-
-#
-# Processor type and features
-#
-CONFIG_ZONE_DMA=y
-CONFIG_SMP=y
-CONFIG_X86_FEATURE_NAMES=y
-CONFIG_X86_X2APIC=y
-CONFIG_X86_MPPARSE=y
-# CONFIG_GOLDFISH is not set
-CONFIG_RETPOLINE=y
-CONFIG_X86_CPU_RESCTRL=y
-# CONFIG_X86_EXTENDED_PLATFORM is not set
-CONFIG_X86_INTEL_LPSS=y
-CONFIG_X86_AMD_PLATFORM_DEVICE=y
-CONFIG_IOSF_MBI=y
-# CONFIG_IOSF_MBI_DEBUG is not set
-CONFIG_X86_SUPPORTS_MEMORY_FAILURE=y
-CONFIG_SCHED_OMIT_FRAME_POINTER=y
-CONFIG_HYPERVISOR_GUEST=y
-CONFIG_PARAVIRT=y
-CONFIG_PARAVIRT_XXL=y
-# CONFIG_PARAVIRT_DEBUG is not set
-CONFIG_PARAVIRT_SPINLOCKS=y
-CONFIG_X86_HV_CALLBACK_VECTOR=y
-CONFIG_XEN=y
-CONFIG_XEN_PV=y
-CONFIG_XEN_PV_SMP=y
-CONFIG_XEN_DOM0=y
-CONFIG_XEN_PVHVM=y
-CONFIG_XEN_PVHVM_SMP=y
-CONFIG_XEN_512GB=y
-CONFIG_XEN_SAVE_RESTORE=y
-# CONFIG_XEN_DEBUG_FS is not set
-CONFIG_XEN_PVH=y
-CONFIG_KVM_GUEST=y
-CONFIG_ARCH_CPUIDLE_HALTPOLL=y
-CONFIG_PVH=y
-# CONFIG_KVM_DEBUG_FS is not set
-CONFIG_PARAVIRT_TIME_ACCOUNTING=y
-CONFIG_PARAVIRT_CLOCK=y
-CONFIG_JAILHOUSE_GUEST=y
-CONFIG_ACRN_GUEST=y
-# CONFIG_MK8 is not set
-# CONFIG_MPSC is not set
-# CONFIG_MCORE2 is not set
-# CONFIG_MATOM is not set
-CONFIG_GENERIC_CPU=y
-CONFIG_X86_INTERNODE_CACHE_SHIFT=6
-CONFIG_X86_L1_CACHE_SHIFT=6
-CONFIG_X86_TSC=y
-CONFIG_X86_CMPXCHG64=y
-CONFIG_X86_CMOV=y
-CONFIG_X86_MINIMUM_CPU_FAMILY=64
-CONFIG_X86_DEBUGCTLMSR=y
-CONFIG_PROCESSOR_SELECT=y
-CONFIG_CPU_SUP_INTEL=y
-CONFIG_CPU_SUP_AMD=y
-CONFIG_CPU_SUP_HYGON=y
-CONFIG_CPU_SUP_CENTAUR=y
-CONFIG_CPU_SUP_ZHAOXIN=y
-CONFIG_HPET_TIMER=y
-CONFIG_HPET_EMULATE_RTC=y
-CONFIG_DMI=y
-CONFIG_GART_IOMMU=y
-CONFIG_CALGARY_IOMMU=y
-CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT=y
-# CONFIG_MAXSMP is not set
-CONFIG_NR_CPUS_RANGE_BEGIN=2
-CONFIG_NR_CPUS_RANGE_END=512
-CONFIG_NR_CPUS_DEFAULT=64
-CONFIG_NR_CPUS=320
-CONFIG_SCHED_SMT=y
-CONFIG_SCHED_MC=y
-CONFIG_SCHED_MC_PRIO=y
-CONFIG_X86_LOCAL_APIC=y
-CONFIG_X86_IO_APIC=y
-CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
-CONFIG_X86_MCE=y
-# CONFIG_X86_MCELOG_LEGACY is not set
-CONFIG_X86_MCE_INTEL=y
-CONFIG_X86_MCE_AMD=y
-CONFIG_X86_MCE_THRESHOLD=y
-CONFIG_X86_MCE_INJECT=m
-CONFIG_X86_THERMAL_VECTOR=y
-
-#
-# Performance monitoring
-#
-CONFIG_PERF_EVENTS_INTEL_UNCORE=m
-CONFIG_PERF_EVENTS_INTEL_RAPL=m
-CONFIG_PERF_EVENTS_INTEL_CSTATE=m
-CONFIG_PERF_EVENTS_AMD_POWER=m
-# end of Performance monitoring
-
-CONFIG_X86_VSYSCALL_EMULATION=y
-CONFIG_I8K=m
-CONFIG_MICROCODE=y
-CONFIG_MICROCODE_INTEL=y
-CONFIG_MICROCODE_AMD=y
-CONFIG_MICROCODE_OLD_INTERFACE=y
-CONFIG_X86_MSR=m
-CONFIG_X86_CPUID=m
-# CONFIG_X86_5LEVEL is not set
-CONFIG_X86_DIRECT_GBPAGES=y
-# CONFIG_X86_CPA_STATISTICS is not set
-CONFIG_AMD_MEM_ENCRYPT=y
-CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT=y
-CONFIG_NUMA=y
-CONFIG_AMD_NUMA=y
-CONFIG_X86_64_ACPI_NUMA=y
-CONFIG_NODES_SPAN_OTHER_NODES=y
-# CONFIG_NUMA_EMU is not set
-CONFIG_NODES_SHIFT=5
-CONFIG_ARCH_SPARSEMEM_ENABLE=y
-CONFIG_ARCH_SPARSEMEM_DEFAULT=y
-CONFIG_ARCH_SELECT_MEMORY_MODEL=y
-CONFIG_ARCH_MEMORY_PROBE=y
-CONFIG_ILLEGAL_POINTER_VALUE=0xdead000000000000
-CONFIG_X86_PMEM_LEGACY_DEVICE=y
-CONFIG_X86_PMEM_LEGACY=m
-CONFIG_X86_CHECK_BIOS_CORRUPTION=y
-CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK=y
-CONFIG_X86_RESERVE_LOW=64
-CONFIG_MTRR=y
-CONFIG_MTRR_SANITIZER=y
-CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT=1
-CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT=0
-CONFIG_X86_PAT=y
-CONFIG_ARCH_USES_PG_UNCACHED=y
-CONFIG_ARCH_RANDOM=y
-CONFIG_X86_SMAP=y
-CONFIG_X86_INTEL_UMIP=y
-# CONFIG_X86_INTEL_MPX is not set
-CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS=y
-# CONFIG_X86_INTEL_TSX_MODE_OFF is not set
-# CONFIG_X86_INTEL_TSX_MODE_ON is not set
-CONFIG_X86_INTEL_TSX_MODE_AUTO=y
-CONFIG_EFI=y
-CONFIG_EFI_STUB=y
-CONFIG_EFI_MIXED=y
-CONFIG_SECCOMP=y
-# CONFIG_HZ_100 is not set
-# CONFIG_HZ_250 is not set
-CONFIG_HZ_300=y
-# CONFIG_HZ_1000 is not set
-CONFIG_HZ=300
-CONFIG_SCHED_HRTICK=y
-# CONFIG_KEXEC is not set
-# CONFIG_KEXEC_FILE is not set
-CONFIG_CRASH_DUMP=y
-CONFIG_PHYSICAL_START=0x1000000
-CONFIG_RELOCATABLE=y
-CONFIG_RANDOMIZE_BASE=y
-CONFIG_X86_NEED_RELOCS=y
-CONFIG_PHYSICAL_ALIGN=0x1000000
-CONFIG_DYNAMIC_MEMORY_LAYOUT=y
-CONFIG_RANDOMIZE_MEMORY=y
-CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING=0x1
-CONFIG_HOTPLUG_CPU=y
-# CONFIG_BOOTPARAM_HOTPLUG_CPU0 is not set
-# CONFIG_DEBUG_HOTPLUG_CPU0 is not set
-# CONFIG_COMPAT_VDSO is not set
-# CONFIG_LEGACY_VSYSCALL_EMULATE is not set
-# CONFIG_LEGACY_VSYSCALL_XONLY is not set
-CONFIG_LEGACY_VSYSCALL_NONE=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="pti=on page_alloc.shuffle=1"
-# CONFIG_CMDLINE_OVERRIDE is not set
-# CONFIG_MODIFY_LDT_SYSCALL is not set
-CONFIG_HAVE_LIVEPATCH=y
-# CONFIG_LIVEPATCH is not set
-# end of Processor type and features
-
-CONFIG_ARCH_HAS_ADD_PAGES=y
-CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
-CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y
-CONFIG_USE_PERCPU_NUMA_NODE_ID=y
-CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK=y
-CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION=y
-CONFIG_ARCH_ENABLE_THP_MIGRATION=y
-
-#
-# Power management and ACPI options
-#
-CONFIG_SUSPEND=y
-CONFIG_SUSPEND_FREEZER=y
-# CONFIG_SUSPEND_SKIP_SYNC is not set
-CONFIG_HIBERNATE_CALLBACKS=y
-# CONFIG_HIBERNATION is not set
-CONFIG_PM_SLEEP=y
-CONFIG_PM_SLEEP_SMP=y
-CONFIG_PM_AUTOSLEEP=y
-CONFIG_PM_WAKELOCKS=y
-CONFIG_PM_WAKELOCKS_LIMIT=100
-CONFIG_PM_WAKELOCKS_GC=y
-CONFIG_PM=y
-CONFIG_PM_DEBUG=y
-CONFIG_PM_ADVANCED_DEBUG=y
-# CONFIG_PM_TEST_SUSPEND is not set
-CONFIG_PM_SLEEP_DEBUG=y
-# CONFIG_DPM_WATCHDOG is not set
-CONFIG_PM_TRACE=y
-CONFIG_PM_TRACE_RTC=y
-CONFIG_PM_CLK=y
-CONFIG_PM_GENERIC_DOMAINS=y
-CONFIG_WQ_POWER_EFFICIENT_DEFAULT=y
-CONFIG_PM_GENERIC_DOMAINS_SLEEP=y
-CONFIG_PM_GENERIC_DOMAINS_OF=y
-CONFIG_ENERGY_MODEL=y
-CONFIG_ARCH_SUPPORTS_ACPI=y
-CONFIG_ACPI=y
-CONFIG_ACPI_LEGACY_TABLES_LOOKUP=y
-CONFIG_ARCH_MIGHT_HAVE_ACPI_PDC=y
-CONFIG_ACPI_SYSTEM_POWER_STATES_SUPPORT=y
-# CONFIG_ACPI_DEBUGGER is not set
-CONFIG_ACPI_SPCR_TABLE=y
-CONFIG_ACPI_LPIT=y
-CONFIG_ACPI_SLEEP=y
-# CONFIG_ACPI_PROCFS_POWER is not set
-CONFIG_ACPI_REV_OVERRIDE_POSSIBLE=y
-# CONFIG_ACPI_EC_DEBUGFS is not set
-CONFIG_ACPI_AC=m
-CONFIG_ACPI_BATTERY=m
-CONFIG_ACPI_BUTTON=y
-CONFIG_ACPI_VIDEO=y
-CONFIG_ACPI_FAN=y
-CONFIG_ACPI_TAD=m
-CONFIG_ACPI_DOCK=y
-CONFIG_ACPI_CPU_FREQ_PSS=y
-CONFIG_ACPI_PROCESSOR_CSTATE=y
-CONFIG_ACPI_PROCESSOR_IDLE=y
-CONFIG_ACPI_CPPC_LIB=y
-CONFIG_ACPI_PROCESSOR=y
-CONFIG_ACPI_IPMI=m
-CONFIG_ACPI_HOTPLUG_CPU=y
-CONFIG_ACPI_PROCESSOR_AGGREGATOR=y
-CONFIG_ACPI_THERMAL=y
-CONFIG_ACPI_NUMA=y
-CONFIG_ARCH_HAS_ACPI_TABLE_UPGRADE=y
-CONFIG_ACPI_TABLE_UPGRADE=y
-# CONFIG_ACPI_DEBUG is not set
-CONFIG_ACPI_PCI_SLOT=y
-CONFIG_ACPI_CONTAINER=y
-CONFIG_ACPI_HOTPLUG_MEMORY=y
-CONFIG_ACPI_HOTPLUG_IOAPIC=y
-CONFIG_ACPI_SBS=m
-CONFIG_ACPI_HED=y
-# CONFIG_ACPI_CUSTOM_METHOD is not set
-CONFIG_ACPI_BGRT=y
-# CONFIG_ACPI_REDUCED_HARDWARE_ONLY is not set
-CONFIG_ACPI_NFIT=m
-# CONFIG_NFIT_SECURITY_DEBUG is not set
-CONFIG_ACPI_HMAT=y
-CONFIG_HAVE_ACPI_APEI=y
-CONFIG_HAVE_ACPI_APEI_NMI=y
-CONFIG_ACPI_APEI=y
-CONFIG_ACPI_APEI_GHES=y
-CONFIG_ACPI_APEI_PCIEAER=y
-CONFIG_ACPI_APEI_MEMORY_FAILURE=y
-CONFIG_ACPI_APEI_EINJ=m
-CONFIG_ACPI_APEI_ERST_DEBUG=m
-CONFIG_DPTF_POWER=m
-CONFIG_ACPI_WATCHDOG=y
-CONFIG_ACPI_EXTLOG=m
-CONFIG_ACPI_ADXL=y
-CONFIG_PMIC_OPREGION=y
-CONFIG_CRC_PMIC_OPREGION=y
-CONFIG_XPOWER_PMIC_OPREGION=y
-CONFIG_BXT_WC_PMIC_OPREGION=y
-CONFIG_CHT_WC_PMIC_OPREGION=y
-CONFIG_CHT_DC_TI_PMIC_OPREGION=y
-CONFIG_ACPI_CONFIGFS=m
-CONFIG_TPS68470_PMIC_OPREGION=y
-CONFIG_X86_PM_TIMER=y
-CONFIG_SFI=y
-
-#
-# CPU Frequency scaling
-#
-CONFIG_CPU_FREQ=y
-CONFIG_CPU_FREQ_GOV_ATTR_SET=y
-CONFIG_CPU_FREQ_GOV_COMMON=y
-CONFIG_CPU_FREQ_STAT=y
-# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set
-# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set
-# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set
-# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set
-# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set
-CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL=y
-CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
-CONFIG_CPU_FREQ_GOV_POWERSAVE=m
-CONFIG_CPU_FREQ_GOV_USERSPACE=m
-CONFIG_CPU_FREQ_GOV_ONDEMAND=m
-CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m
-CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y
-
-#
-# CPU frequency scaling drivers
-#
-CONFIG_CPUFREQ_DT=m
-CONFIG_CPUFREQ_DT_PLATDEV=y
-CONFIG_X86_INTEL_PSTATE=y
-CONFIG_X86_PCC_CPUFREQ=m
-CONFIG_X86_ACPI_CPUFREQ=m
-CONFIG_X86_ACPI_CPUFREQ_CPB=y
-CONFIG_X86_POWERNOW_K8=m
-CONFIG_X86_AMD_FREQ_SENSITIVITY=m
-# CONFIG_X86_SPEEDSTEP_CENTRINO is not set
-CONFIG_X86_P4_CLOCKMOD=m
-
-#
-# shared options
-#
-CONFIG_X86_SPEEDSTEP_LIB=m
-# end of CPU Frequency scaling
-
-#
-# CPU Idle
-#
-CONFIG_CPU_IDLE=y
-CONFIG_CPU_IDLE_GOV_LADDER=y
-CONFIG_CPU_IDLE_GOV_MENU=y
-CONFIG_CPU_IDLE_GOV_TEO=y
-# CONFIG_CPU_IDLE_GOV_HALTPOLL is not set
-CONFIG_HALTPOLL_CPUIDLE=y
-# end of CPU Idle
-
-CONFIG_INTEL_IDLE=y
-# end of Power management and ACPI options
-
-#
-# Bus options (PCI etc.)
-#
-CONFIG_PCI_DIRECT=y
-CONFIG_PCI_MMCONFIG=y
-CONFIG_PCI_XEN=y
-CONFIG_MMCONF_FAM10H=y
-# CONFIG_PCI_CNB20LE_QUIRK is not set
-# CONFIG_ISA_BUS is not set
-CONFIG_ISA_DMA_API=y
-CONFIG_AMD_NB=y
-# CONFIG_X86_SYSFB is not set
-# end of Bus options (PCI etc.)
-
-#
-# Binary Emulations
-#
-CONFIG_IA32_EMULATION=y
-# CONFIG_X86_X32 is not set
-CONFIG_COMPAT_32=y
-CONFIG_COMPAT=y
-CONFIG_COMPAT_FOR_U64_ALIGNMENT=y
-CONFIG_SYSVIPC_COMPAT=y
-# end of Binary Emulations
-
-CONFIG_X86_DEV_DMA_OPS=y
-
-#
-# Firmware Drivers
-#
-CONFIG_EDD=m
-# CONFIG_EDD_OFF is not set
-CONFIG_FIRMWARE_MEMMAP=y
-CONFIG_DMIID=y
-CONFIG_DMI_SYSFS=m
-CONFIG_DMI_SCAN_MACHINE_NON_EFI_FALLBACK=y
-CONFIG_ISCSI_IBFT_FIND=y
-CONFIG_ISCSI_IBFT=m
-CONFIG_FW_CFG_SYSFS=m
-# CONFIG_FW_CFG_SYSFS_CMDLINE is not set
-CONFIG_GOOGLE_FIRMWARE=y
-# CONFIG_GOOGLE_SMI is not set
-CONFIG_GOOGLE_COREBOOT_TABLE=m
-CONFIG_GOOGLE_MEMCONSOLE=m
-# CONFIG_GOOGLE_MEMCONSOLE_X86_LEGACY is not set
-CONFIG_GOOGLE_FRAMEBUFFER_COREBOOT=m
-CONFIG_GOOGLE_MEMCONSOLE_COREBOOT=m
-CONFIG_GOOGLE_VPD=m
-
-#
-# EFI (Extensible Firmware Interface) Support
-#
-# CONFIG_EFI_VARS is not set
-CONFIG_EFI_ESRT=y
-# CONFIG_EFI_FAKE_MEMMAP is not set
-CONFIG_EFI_RUNTIME_WRAPPERS=y
-CONFIG_EFI_CAPSULE_LOADER=m
-# CONFIG_EFI_TEST is not set
-CONFIG_APPLE_PROPERTIES=y
-CONFIG_RESET_ATTACK_MITIGATION=y
-CONFIG_EFI_RCI2_TABLE=y
-# end of EFI (Extensible Firmware Interface) Support
-
-CONFIG_UEFI_CPER=y
-CONFIG_UEFI_CPER_X86=y
-CONFIG_EFI_DEV_PATH_PARSER=y
-CONFIG_EFI_EARLYCON=y
-
-#
-# Tegra firmware driver
-#
-# end of Tegra firmware driver
-# end of Firmware Drivers
-
-CONFIG_HAVE_KVM=y
-CONFIG_HAVE_KVM_IRQCHIP=y
-CONFIG_HAVE_KVM_IRQFD=y
-CONFIG_HAVE_KVM_IRQ_ROUTING=y
-CONFIG_HAVE_KVM_EVENTFD=y
-CONFIG_KVM_MMIO=y
-CONFIG_KVM_ASYNC_PF=y
-CONFIG_HAVE_KVM_MSI=y
-CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT=y
-CONFIG_KVM_VFIO=y
-CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT=y
-CONFIG_KVM_COMPAT=y
-CONFIG_HAVE_KVM_IRQ_BYPASS=y
-CONFIG_HAVE_KVM_NO_POLL=y
-CONFIG_VIRTUALIZATION=y
-CONFIG_KVM=m
-CONFIG_KVM_INTEL=m
-CONFIG_KVM_AMD=m
-CONFIG_KVM_AMD_SEV=y
-CONFIG_KVM_MMU_AUDIT=y
-CONFIG_VHOST_NET=m
-CONFIG_VHOST_SCSI=m
-CONFIG_VHOST_VSOCK=m
-CONFIG_VHOST=m
-# CONFIG_VHOST_CROSS_ENDIAN_LEGACY is not set
-
-#
-# General architecture-dependent options
-#
-CONFIG_HOTPLUG_SMT=y
-CONFIG_OPROFILE=m
-# CONFIG_OPROFILE_EVENT_MULTIPLEX is not set
-CONFIG_HAVE_OPROFILE=y
-CONFIG_OPROFILE_NMI_TIMER=y
-CONFIG_KPROBES=y
-CONFIG_JUMP_LABEL=y
-# CONFIG_STATIC_KEYS_SELFTEST is not set
-CONFIG_OPTPROBES=y
-CONFIG_KPROBES_ON_FTRACE=y
-CONFIG_UPROBES=y
-CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y
-CONFIG_ARCH_USE_BUILTIN_BSWAP=y
-CONFIG_KRETPROBES=y
-CONFIG_USER_RETURN_NOTIFIER=y
-CONFIG_HAVE_IOREMAP_PROT=y
-CONFIG_HAVE_KPROBES=y
-CONFIG_HAVE_KRETPROBES=y
-CONFIG_HAVE_OPTPROBES=y
-CONFIG_HAVE_KPROBES_ON_FTRACE=y
-CONFIG_HAVE_FUNCTION_ERROR_INJECTION=y
-CONFIG_HAVE_NMI=y
-CONFIG_HAVE_ARCH_TRACEHOOK=y
-CONFIG_HAVE_DMA_CONTIGUOUS=y
-CONFIG_GENERIC_SMP_IDLE_THREAD=y
-CONFIG_ARCH_HAS_FORTIFY_SOURCE=y
-CONFIG_ARCH_HAS_SET_MEMORY=y
-CONFIG_ARCH_HAS_SET_DIRECT_MAP=y
-CONFIG_HAVE_ARCH_THREAD_STRUCT_WHITELIST=y
-CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT=y
-CONFIG_HAVE_ASM_MODVERSIONS=y
-CONFIG_HAVE_REGS_AND_STACK_ACCESS_API=y
-CONFIG_HAVE_RSEQ=y
-CONFIG_HAVE_FUNCTION_ARG_ACCESS_API=y
-CONFIG_HAVE_CLK=y
-CONFIG_HAVE_HW_BREAKPOINT=y
-CONFIG_HAVE_MIXED_BREAKPOINTS_REGS=y
-CONFIG_HAVE_USER_RETURN_NOTIFIER=y
-CONFIG_HAVE_PERF_EVENTS_NMI=y
-CONFIG_HAVE_HARDLOCKUP_DETECTOR_PERF=y
-CONFIG_HAVE_PERF_REGS=y
-CONFIG_HAVE_PERF_USER_STACK_DUMP=y
-CONFIG_HAVE_ARCH_JUMP_LABEL=y
-CONFIG_HAVE_ARCH_JUMP_LABEL_RELATIVE=y
-CONFIG_HAVE_RCU_TABLE_FREE=y
-CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG=y
-CONFIG_HAVE_ALIGNED_STRUCT_PAGE=y
-CONFIG_HAVE_CMPXCHG_LOCAL=y
-CONFIG_HAVE_CMPXCHG_DOUBLE=y
-CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION=y
-CONFIG_ARCH_WANT_OLD_COMPAT_IPC=y
-CONFIG_HAVE_ARCH_SECCOMP_FILTER=y
-CONFIG_SECCOMP_FILTER=y
-CONFIG_HAVE_ARCH_STACKLEAK=y
-CONFIG_HAVE_STACKPROTECTOR=y
-CONFIG_CC_HAS_STACKPROTECTOR_NONE=y
-CONFIG_STACKPROTECTOR=y
-CONFIG_STACKPROTECTOR_STRONG=y
-CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES=y
-CONFIG_HAVE_CONTEXT_TRACKING=y
-CONFIG_HAVE_VIRT_CPU_ACCOUNTING_GEN=y
-CONFIG_HAVE_IRQ_TIME_ACCOUNTING=y
-CONFIG_HAVE_MOVE_PMD=y
-CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE=y
-CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD=y
-CONFIG_HAVE_ARCH_HUGE_VMAP=y
-CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y
-CONFIG_HAVE_ARCH_SOFT_DIRTY=y
-CONFIG_HAVE_MOD_ARCH_SPECIFIC=y
-CONFIG_MODULES_USE_ELF_RELA=y
-CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK=y
-CONFIG_ARCH_HAS_ELF_RANDOMIZE=y
-CONFIG_HAVE_ARCH_MMAP_RND_BITS=y
-CONFIG_HAVE_EXIT_THREAD=y
-CONFIG_ARCH_MMAP_RND_BITS=32
-CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS=y
-CONFIG_ARCH_MMAP_RND_COMPAT_BITS=16
-CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES=y
-CONFIG_HAVE_COPY_THREAD_TLS=y
-CONFIG_HAVE_STACK_VALIDATION=y
-CONFIG_HAVE_RELIABLE_STACKTRACE=y
-CONFIG_ISA_BUS_API=y
-CONFIG_OLD_SIGSUSPEND3=y
-CONFIG_COMPAT_OLD_SIGACTION=y
-CONFIG_64BIT_TIME=y
-CONFIG_COMPAT_32BIT_TIME=y
-CONFIG_HAVE_ARCH_VMAP_STACK=y
-CONFIG_VMAP_STACK=y
-CONFIG_ARCH_HAS_STRICT_KERNEL_RWX=y
-CONFIG_STRICT_KERNEL_RWX=y
-CONFIG_ARCH_HAS_STRICT_MODULE_RWX=y
-CONFIG_STRICT_MODULE_RWX=y
-CONFIG_ARCH_HAS_REFCOUNT=y
-CONFIG_REFCOUNT_FULL=y
-CONFIG_HAVE_ARCH_PREL32_RELOCATIONS=y
-CONFIG_ARCH_USE_MEMREMAP_PROT=y
-CONFIG_LOCK_EVENT_COUNTS=y
-CONFIG_ARCH_HAS_MEM_ENCRYPT=y
-
-#
-# GCOV-based kernel profiling
-#
-# CONFIG_GCOV_KERNEL is not set
-CONFIG_ARCH_HAS_GCOV_PROFILE_ALL=y
-# end of GCOV-based kernel profiling
-
-CONFIG_PLUGIN_HOSTCC="g++"
-CONFIG_HAVE_GCC_PLUGINS=y
-CONFIG_GCC_PLUGINS=y
-
-#
-# GCC plugins
-#
-# CONFIG_GCC_PLUGIN_CYC_COMPLEXITY is not set
-CONFIG_GCC_PLUGIN_LATENT_ENTROPY=y
-# CONFIG_GCC_PLUGIN_RANDSTRUCT is not set
-# end of GCC plugins
-# end of General architecture-dependent options
-
-CONFIG_RT_MUTEXES=y
-CONFIG_BASE_SMALL=0
-CONFIG_MODULE_SIG_FORMAT=y
-CONFIG_MODULES=y
-CONFIG_MODULE_FORCE_LOAD=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODULE_FORCE_UNLOAD=y
-# CONFIG_MODVERSIONS is not set
-CONFIG_MODULE_SRCVERSION_ALL=y
-CONFIG_MODULE_SIG=y
-# CONFIG_MODULE_SIG_FORCE is not set
-CONFIG_MODULE_SIG_ALL=y
-# CONFIG_MODULE_SIG_SHA1 is not set
-# CONFIG_MODULE_SIG_SHA224 is not set
-# CONFIG_MODULE_SIG_SHA256 is not set
-# CONFIG_MODULE_SIG_SHA384 is not set
-CONFIG_MODULE_SIG_SHA512=y
-CONFIG_MODULE_SIG_HASH="sha512"
-CONFIG_MODULE_COMPRESS=y
-# CONFIG_MODULE_COMPRESS_GZIP is not set
-CONFIG_MODULE_COMPRESS_XZ=y
-# CONFIG_MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS is not set
-CONFIG_UNUSED_SYMBOLS=y
-CONFIG_MODULES_TREE_LOOKUP=y
-CONFIG_BLOCK=y
-CONFIG_BLK_SCSI_REQUEST=y
-CONFIG_BLK_DEV_BSG=y
-CONFIG_BLK_DEV_BSGLIB=y
-CONFIG_BLK_DEV_INTEGRITY=y
-CONFIG_BLK_DEV_ZONED=y
-CONFIG_BLK_DEV_THROTTLING=y
-CONFIG_BLK_DEV_THROTTLING_LOW=y
-# CONFIG_BLK_CMDLINE_PARSER is not set
-CONFIG_BLK_WBT=y
-CONFIG_BLK_CGROUP_IOLATENCY=y
-# CONFIG_BLK_CGROUP_IOCOST is not set
-CONFIG_BLK_WBT_MQ=y
-CONFIG_BLK_DEBUG_FS=y
-CONFIG_BLK_DEBUG_FS_ZONED=y
-CONFIG_BLK_SED_OPAL=y
-
-#
-# Partition Types
-#
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_ACORN_PARTITION is not set
-CONFIG_AIX_PARTITION=y
-# CONFIG_OSF_PARTITION is not set
-# CONFIG_AMIGA_PARTITION is not set
-# CONFIG_ATARI_PARTITION is not set
-CONFIG_MAC_PARTITION=y
-CONFIG_MSDOS_PARTITION=y
-CONFIG_BSD_DISKLABEL=y
-CONFIG_MINIX_SUBPARTITION=y
-CONFIG_SOLARIS_X86_PARTITION=y
-# CONFIG_UNIXWARE_DISKLABEL is not set
-CONFIG_LDM_PARTITION=y
-# CONFIG_LDM_DEBUG is not set
-# CONFIG_SGI_PARTITION is not set
-# CONFIG_ULTRIX_PARTITION is not set
-# CONFIG_SUN_PARTITION is not set
-CONFIG_KARMA_PARTITION=y
-CONFIG_EFI_PARTITION=y
-# CONFIG_SYSV68_PARTITION is not set
-# CONFIG_CMDLINE_PARTITION is not set
-# end of Partition Types
-
-CONFIG_BLOCK_COMPAT=y
-CONFIG_BLK_MQ_PCI=y
-CONFIG_BLK_MQ_VIRTIO=y
-CONFIG_BLK_MQ_RDMA=y
-CONFIG_BLK_PM=y
-
-#
-# IO Schedulers
-#
-CONFIG_MQ_IOSCHED_DEADLINE=y
-CONFIG_MQ_IOSCHED_KYBER=y
-CONFIG_IOSCHED_BFQ=y
-CONFIG_BFQ_GROUP_IOSCHED=y
-# CONFIG_BFQ_CGROUP_DEBUG is not set
-# end of IO Schedulers
-
-CONFIG_PREEMPT_NOTIFIERS=y
-CONFIG_PADATA=y
-CONFIG_ASN1=y
-CONFIG_UNINLINE_SPIN_UNLOCK=y
-CONFIG_ARCH_SUPPORTS_ATOMIC_RMW=y
-CONFIG_MUTEX_SPIN_ON_OWNER=y
-CONFIG_RWSEM_SPIN_ON_OWNER=y
-CONFIG_LOCK_SPIN_ON_OWNER=y
-CONFIG_ARCH_USE_QUEUED_SPINLOCKS=y
-CONFIG_QUEUED_SPINLOCKS=y
-CONFIG_ARCH_USE_QUEUED_RWLOCKS=y
-CONFIG_QUEUED_RWLOCKS=y
-CONFIG_ARCH_HAS_SYNC_CORE_BEFORE_USERMODE=y
-CONFIG_ARCH_HAS_SYSCALL_WRAPPER=y
-CONFIG_FREEZER=y
-
-#
-# Executable file formats
-#
-CONFIG_BINFMT_ELF=y
-CONFIG_COMPAT_BINFMT_ELF=y
-CONFIG_ELFCORE=y
-CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
-CONFIG_BINFMT_SCRIPT=y
-CONFIG_BINFMT_MISC=y
-CONFIG_COREDUMP=y
-# end of Executable file formats
-
-#
-# Memory Management options
-#
-CONFIG_SELECT_MEMORY_MODEL=y
-CONFIG_SPARSEMEM_MANUAL=y
-CONFIG_SPARSEMEM=y
-CONFIG_NEED_MULTIPLE_NODES=y
-CONFIG_HAVE_MEMORY_PRESENT=y
-CONFIG_SPARSEMEM_EXTREME=y
-CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
-CONFIG_SPARSEMEM_VMEMMAP=y
-CONFIG_HAVE_MEMBLOCK_NODE_MAP=y
-CONFIG_HAVE_FAST_GUP=y
-CONFIG_MEMORY_ISOLATION=y
-CONFIG_HAVE_BOOTMEM_INFO_NODE=y
-CONFIG_MEMORY_HOTPLUG=y
-CONFIG_MEMORY_HOTPLUG_SPARSE=y
-CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE=y
-CONFIG_MEMORY_HOTREMOVE=y
-CONFIG_SPLIT_PTLOCK_CPUS=4
-CONFIG_MEMORY_BALLOON=y
-CONFIG_BALLOON_COMPACTION=y
-CONFIG_COMPACTION=y
-CONFIG_MIGRATION=y
-CONFIG_CONTIG_ALLOC=y
-CONFIG_PHYS_ADDR_T_64BIT=y
-CONFIG_BOUNCE=y
-CONFIG_VIRT_TO_BUS=y
-CONFIG_MMU_NOTIFIER=y
-CONFIG_KSM=y
-CONFIG_DEFAULT_MMAP_MIN_ADDR=65536
-CONFIG_ARCH_SUPPORTS_MEMORY_FAILURE=y
-CONFIG_MEMORY_FAILURE=y
-CONFIG_HWPOISON_INJECT=m
-CONFIG_TRANSPARENT_HUGEPAGE=y
-# CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS is not set
-CONFIG_TRANSPARENT_HUGEPAGE_MADVISE=y
-CONFIG_ARCH_WANTS_THP_SWAP=y
-CONFIG_THP_SWAP=y
-CONFIG_TRANSPARENT_HUGE_PAGECACHE=y
-CONFIG_CLEANCACHE=y
-CONFIG_FRONTSWAP=y
-# CONFIG_CMA is not set
-CONFIG_ZSWAP=y
-CONFIG_ZPOOL=y
-CONFIG_ZBUD=y
-CONFIG_Z3FOLD=y
-CONFIG_ZSMALLOC=y
-# CONFIG_PGTABLE_MAPPING is not set
-# CONFIG_ZSMALLOC_STAT is not set
-CONFIG_GENERIC_EARLY_IOREMAP=y
-# CONFIG_DEFERRED_STRUCT_PAGE_INIT is not set
-# CONFIG_IDLE_PAGE_TRACKING is not set
-CONFIG_ARCH_HAS_PTE_DEVMAP=y
-CONFIG_ZONE_DEVICE=y
-CONFIG_DEV_PAGEMAP_OPS=y
-CONFIG_HMM_MIRROR=y
-CONFIG_DEVICE_PRIVATE=y
-CONFIG_FRAME_VECTOR=y
-CONFIG_ARCH_USES_HIGH_VMA_FLAGS=y
-CONFIG_ARCH_HAS_PKEYS=y
-# CONFIG_PERCPU_STATS is not set
-# CONFIG_GUP_BENCHMARK is not set
-# CONFIG_READ_ONLY_THP_FOR_FS is not set
-CONFIG_ARCH_HAS_PTE_SPECIAL=y
-# end of Memory Management options
-
-CONFIG_NET=y
-CONFIG_COMPAT_NETLINK_MESSAGES=y
-CONFIG_NET_INGRESS=y
-CONFIG_NET_EGRESS=y
-CONFIG_SKB_EXTENSIONS=y
-
-#
-# Networking options
-#
-CONFIG_PACKET=y
-CONFIG_PACKET_DIAG=y
-CONFIG_UNIX=y
-CONFIG_UNIX_SCM=y
-CONFIG_UNIX_DIAG=y
-CONFIG_TLS=m
-CONFIG_TLS_DEVICE=y
-CONFIG_XFRM=y
-CONFIG_XFRM_OFFLOAD=y
-CONFIG_XFRM_ALGO=m
-CONFIG_XFRM_USER=m
-CONFIG_XFRM_INTERFACE=m
-CONFIG_XFRM_SUB_POLICY=y
-CONFIG_XFRM_MIGRATE=y
-CONFIG_XFRM_STATISTICS=y
-CONFIG_XFRM_IPCOMP=m
-CONFIG_NET_KEY=m
-CONFIG_NET_KEY_MIGRATE=y
-CONFIG_SMC=m
-CONFIG_SMC_DIAG=m
-CONFIG_XDP_SOCKETS=y
-CONFIG_XDP_SOCKETS_DIAG=y
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_ADVANCED_ROUTER=y
-# CONFIG_IP_FIB_TRIE_STATS is not set
-CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_ROUTE_MULTIPATH=y
-CONFIG_IP_ROUTE_VERBOSE=y
-CONFIG_IP_ROUTE_CLASSID=y
-# CONFIG_IP_PNP is not set
-CONFIG_NET_IPIP=m
-CONFIG_NET_IPGRE_DEMUX=m
-CONFIG_NET_IP_TUNNEL=m
-CONFIG_NET_IPGRE=m
-# CONFIG_NET_IPGRE_BROADCAST is not set
-CONFIG_IP_MROUTE_COMMON=y
-CONFIG_IP_MROUTE=y
-CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
-CONFIG_IP_PIMSM_V1=y
-CONFIG_IP_PIMSM_V2=y
-CONFIG_SYN_COOKIES=y
-CONFIG_NET_IPVTI=m
-CONFIG_NET_UDP_TUNNEL=m
-CONFIG_NET_FOU=m
-CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_INET_AH=m
-CONFIG_INET_ESP=m
-CONFIG_INET_ESP_OFFLOAD=m
-CONFIG_INET_IPCOMP=m
-CONFIG_INET_XFRM_TUNNEL=m
-CONFIG_INET_TUNNEL=m
-CONFIG_INET_DIAG=m
-CONFIG_INET_TCP_DIAG=m
-CONFIG_INET_UDP_DIAG=m
-CONFIG_INET_RAW_DIAG=m
-CONFIG_INET_DIAG_DESTROY=y
-CONFIG_TCP_CONG_ADVANCED=y
-CONFIG_TCP_CONG_BIC=m
-CONFIG_TCP_CONG_CUBIC=y
-CONFIG_TCP_CONG_WESTWOOD=m
-CONFIG_TCP_CONG_HTCP=m
-CONFIG_TCP_CONG_HSTCP=m
-CONFIG_TCP_CONG_HYBLA=m
-CONFIG_TCP_CONG_VEGAS=m
-CONFIG_TCP_CONG_NV=m
-CONFIG_TCP_CONG_SCALABLE=m
-CONFIG_TCP_CONG_LP=m
-CONFIG_TCP_CONG_VENO=m
-CONFIG_TCP_CONG_YEAH=m
-CONFIG_TCP_CONG_ILLINOIS=m
-CONFIG_TCP_CONG_DCTCP=m
-CONFIG_TCP_CONG_CDG=m
-CONFIG_TCP_CONG_BBR=m
-CONFIG_DEFAULT_CUBIC=y
-# CONFIG_DEFAULT_RENO is not set
-CONFIG_DEFAULT_TCP_CONG="cubic"
-CONFIG_TCP_MD5SIG=y
-CONFIG_IPV6=y
-CONFIG_IPV6_ROUTER_PREF=y
-CONFIG_IPV6_ROUTE_INFO=y
-CONFIG_IPV6_OPTIMISTIC_DAD=y
-CONFIG_INET6_AH=m
-CONFIG_INET6_ESP=m
-CONFIG_INET6_ESP_OFFLOAD=m
-CONFIG_INET6_IPCOMP=m
-CONFIG_IPV6_MIP6=m
-CONFIG_IPV6_ILA=m
-CONFIG_INET6_XFRM_TUNNEL=m
-CONFIG_INET6_TUNNEL=m
-CONFIG_IPV6_VTI=m
-CONFIG_IPV6_SIT=m
-CONFIG_IPV6_SIT_6RD=y
-CONFIG_IPV6_NDISC_NODETYPE=y
-CONFIG_IPV6_TUNNEL=m
-CONFIG_IPV6_GRE=m
-CONFIG_IPV6_FOU=m
-CONFIG_IPV6_FOU_TUNNEL=m
-CONFIG_IPV6_MULTIPLE_TABLES=y
-CONFIG_IPV6_SUBTREES=y
-CONFIG_IPV6_MROUTE=y
-CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y
-CONFIG_IPV6_PIMSM_V2=y
-CONFIG_IPV6_SEG6_LWTUNNEL=y
-CONFIG_IPV6_SEG6_HMAC=y
-CONFIG_IPV6_SEG6_BPF=y
-CONFIG_NETLABEL=y
-CONFIG_NETWORK_SECMARK=y
-CONFIG_NET_PTP_CLASSIFY=y
-CONFIG_NETWORK_PHY_TIMESTAMPING=y
-CONFIG_NETFILTER=y
-CONFIG_NETFILTER_ADVANCED=y
-CONFIG_BRIDGE_NETFILTER=m
-
-#
-# Core Netfilter Configuration
-#
-CONFIG_NETFILTER_INGRESS=y
-CONFIG_NETFILTER_NETLINK=m
-CONFIG_NETFILTER_FAMILY_BRIDGE=y
-CONFIG_NETFILTER_FAMILY_ARP=y
-CONFIG_NETFILTER_NETLINK_ACCT=m
-CONFIG_NETFILTER_NETLINK_QUEUE=m
-CONFIG_NETFILTER_NETLINK_LOG=m
-CONFIG_NETFILTER_NETLINK_OSF=m
-CONFIG_NF_CONNTRACK=m
-CONFIG_NF_LOG_COMMON=m
-CONFIG_NF_LOG_NETDEV=m
-CONFIG_NETFILTER_CONNCOUNT=m
-CONFIG_NF_CONNTRACK_MARK=y
-CONFIG_NF_CONNTRACK_SECMARK=y
-CONFIG_NF_CONNTRACK_ZONES=y
-CONFIG_NF_CONNTRACK_PROCFS=y
-CONFIG_NF_CONNTRACK_EVENTS=y
-CONFIG_NF_CONNTRACK_TIMEOUT=y
-CONFIG_NF_CONNTRACK_TIMESTAMP=y
-CONFIG_NF_CONNTRACK_LABELS=y
-CONFIG_NF_CT_PROTO_DCCP=y
-CONFIG_NF_CT_PROTO_GRE=y
-CONFIG_NF_CT_PROTO_SCTP=y
-CONFIG_NF_CT_PROTO_UDPLITE=y
-CONFIG_NF_CONNTRACK_AMANDA=m
-CONFIG_NF_CONNTRACK_FTP=m
-CONFIG_NF_CONNTRACK_H323=m
-CONFIG_NF_CONNTRACK_IRC=m
-CONFIG_NF_CONNTRACK_BROADCAST=m
-CONFIG_NF_CONNTRACK_NETBIOS_NS=m
-CONFIG_NF_CONNTRACK_SNMP=m
-CONFIG_NF_CONNTRACK_PPTP=m
-CONFIG_NF_CONNTRACK_SANE=m
-CONFIG_NF_CONNTRACK_SIP=m
-CONFIG_NF_CONNTRACK_TFTP=m
-CONFIG_NF_CT_NETLINK=m
-CONFIG_NF_CT_NETLINK_TIMEOUT=m
-CONFIG_NF_CT_NETLINK_HELPER=m
-CONFIG_NETFILTER_NETLINK_GLUE_CT=y
-CONFIG_NF_NAT=m
-CONFIG_NF_NAT_AMANDA=m
-CONFIG_NF_NAT_FTP=m
-CONFIG_NF_NAT_IRC=m
-CONFIG_NF_NAT_SIP=m
-CONFIG_NF_NAT_TFTP=m
-CONFIG_NF_NAT_REDIRECT=y
-CONFIG_NF_NAT_MASQUERADE=y
-CONFIG_NETFILTER_SYNPROXY=m
-CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_SET=m
-CONFIG_NF_TABLES_INET=y
-CONFIG_NF_TABLES_NETDEV=y
-CONFIG_NFT_NUMGEN=m
-CONFIG_NFT_CT=m
-CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NFT_COUNTER=m
-CONFIG_NFT_CONNLIMIT=m
-CONFIG_NFT_LOG=m
-CONFIG_NFT_LIMIT=m
-CONFIG_NFT_MASQ=m
-CONFIG_NFT_REDIR=m
-CONFIG_NFT_NAT=m
-CONFIG_NFT_TUNNEL=m
-CONFIG_NFT_OBJREF=m
-CONFIG_NFT_QUEUE=m
-CONFIG_NFT_QUOTA=m
-CONFIG_NFT_REJECT=m
-CONFIG_NFT_REJECT_INET=m
-CONFIG_NFT_COMPAT=m
-CONFIG_NFT_HASH=m
-CONFIG_NFT_FIB=m
-CONFIG_NFT_FIB_INET=m
-CONFIG_NFT_XFRM=m
-CONFIG_NFT_SOCKET=m
-CONFIG_NFT_OSF=m
-CONFIG_NFT_TPROXY=m
-CONFIG_NFT_SYNPROXY=m
-CONFIG_NF_DUP_NETDEV=m
-CONFIG_NFT_DUP_NETDEV=m
-CONFIG_NFT_FWD_NETDEV=m
-CONFIG_NFT_FIB_NETDEV=m
-CONFIG_NF_FLOW_TABLE_INET=m
-CONFIG_NF_FLOW_TABLE=m
-CONFIG_NETFILTER_XTABLES=m
-
-#
-# Xtables combined modules
-#
-CONFIG_NETFILTER_XT_MARK=m
-CONFIG_NETFILTER_XT_CONNMARK=m
-CONFIG_NETFILTER_XT_SET=m
-
-#
-# Xtables targets
-#
-CONFIG_NETFILTER_XT_TARGET_AUDIT=m
-CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
-CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
-CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
-CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
-CONFIG_NETFILTER_XT_TARGET_CT=m
-CONFIG_NETFILTER_XT_TARGET_DSCP=m
-CONFIG_NETFILTER_XT_TARGET_HL=m
-CONFIG_NETFILTER_XT_TARGET_HMARK=m
-CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
-CONFIG_NETFILTER_XT_TARGET_LED=m
-CONFIG_NETFILTER_XT_TARGET_LOG=m
-CONFIG_NETFILTER_XT_TARGET_MARK=m
-CONFIG_NETFILTER_XT_NAT=m
-CONFIG_NETFILTER_XT_TARGET_NETMAP=m
-CONFIG_NETFILTER_XT_TARGET_NFLOG=m
-CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
-CONFIG_NETFILTER_XT_TARGET_NOTRACK=m
-CONFIG_NETFILTER_XT_TARGET_RATEEST=m
-CONFIG_NETFILTER_XT_TARGET_REDIRECT=m
-CONFIG_NETFILTER_XT_TARGET_MASQUERADE=m
-CONFIG_NETFILTER_XT_TARGET_TEE=m
-CONFIG_NETFILTER_XT_TARGET_TPROXY=m
-CONFIG_NETFILTER_XT_TARGET_TRACE=m
-CONFIG_NETFILTER_XT_TARGET_SECMARK=m
-CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
-CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
-
-#
-# Xtables matches
-#
-CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
-CONFIG_NETFILTER_XT_MATCH_BPF=m
-CONFIG_NETFILTER_XT_MATCH_CGROUP=m
-CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
-CONFIG_NETFILTER_XT_MATCH_COMMENT=m
-CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
-CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m
-CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
-CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
-CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
-CONFIG_NETFILTER_XT_MATCH_CPU=m
-CONFIG_NETFILTER_XT_MATCH_DCCP=m
-CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m
-CONFIG_NETFILTER_XT_MATCH_DSCP=m
-CONFIG_NETFILTER_XT_MATCH_ECN=m
-CONFIG_NETFILTER_XT_MATCH_ESP=m
-CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
-CONFIG_NETFILTER_XT_MATCH_HELPER=m
-CONFIG_NETFILTER_XT_MATCH_HL=m
-CONFIG_NETFILTER_XT_MATCH_IPCOMP=m
-CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
-CONFIG_NETFILTER_XT_MATCH_IPVS=m
-CONFIG_NETFILTER_XT_MATCH_L2TP=m
-CONFIG_NETFILTER_XT_MATCH_LENGTH=m
-CONFIG_NETFILTER_XT_MATCH_LIMIT=m
-CONFIG_NETFILTER_XT_MATCH_MAC=m
-CONFIG_NETFILTER_XT_MATCH_MARK=m
-CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
-CONFIG_NETFILTER_XT_MATCH_NFACCT=m
-CONFIG_NETFILTER_XT_MATCH_OSF=m
-CONFIG_NETFILTER_XT_MATCH_OWNER=m
-CONFIG_NETFILTER_XT_MATCH_POLICY=m
-CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
-CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
-CONFIG_NETFILTER_XT_MATCH_QUOTA=m
-CONFIG_NETFILTER_XT_MATCH_RATEEST=m
-CONFIG_NETFILTER_XT_MATCH_REALM=m
-CONFIG_NETFILTER_XT_MATCH_RECENT=m
-CONFIG_NETFILTER_XT_MATCH_SCTP=m
-CONFIG_NETFILTER_XT_MATCH_SOCKET=m
-CONFIG_NETFILTER_XT_MATCH_STATE=m
-CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
-CONFIG_NETFILTER_XT_MATCH_STRING=m
-CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
-CONFIG_NETFILTER_XT_MATCH_TIME=m
-CONFIG_NETFILTER_XT_MATCH_U32=m
-# end of Core Netfilter Configuration
-
-CONFIG_IP_SET=m
-CONFIG_IP_SET_MAX=256
-CONFIG_IP_SET_BITMAP_IP=m
-CONFIG_IP_SET_BITMAP_IPMAC=m
-CONFIG_IP_SET_BITMAP_PORT=m
-CONFIG_IP_SET_HASH_IP=m
-CONFIG_IP_SET_HASH_IPMARK=m
-CONFIG_IP_SET_HASH_IPPORT=m
-CONFIG_IP_SET_HASH_IPPORTIP=m
-CONFIG_IP_SET_HASH_IPPORTNET=m
-CONFIG_IP_SET_HASH_IPMAC=m
-CONFIG_IP_SET_HASH_MAC=m
-CONFIG_IP_SET_HASH_NETPORTNET=m
-CONFIG_IP_SET_HASH_NET=m
-CONFIG_IP_SET_HASH_NETNET=m
-CONFIG_IP_SET_HASH_NETPORT=m
-CONFIG_IP_SET_HASH_NETIFACE=m
-CONFIG_IP_SET_LIST_SET=m
-CONFIG_IP_VS=m
-CONFIG_IP_VS_IPV6=y
-# CONFIG_IP_VS_DEBUG is not set
-CONFIG_IP_VS_TAB_BITS=15
-
-#
-# IPVS transport protocol load balancing support
-#
-CONFIG_IP_VS_PROTO_TCP=y
-CONFIG_IP_VS_PROTO_UDP=y
-CONFIG_IP_VS_PROTO_AH_ESP=y
-CONFIG_IP_VS_PROTO_ESP=y
-CONFIG_IP_VS_PROTO_AH=y
-CONFIG_IP_VS_PROTO_SCTP=y
-
-#
-# IPVS scheduler
-#
-CONFIG_IP_VS_RR=m
-CONFIG_IP_VS_WRR=m
-CONFIG_IP_VS_LC=m
-CONFIG_IP_VS_WLC=m
-CONFIG_IP_VS_FO=m
-CONFIG_IP_VS_OVF=m
-CONFIG_IP_VS_LBLC=m
-CONFIG_IP_VS_LBLCR=m
-CONFIG_IP_VS_DH=m
-CONFIG_IP_VS_SH=m
-CONFIG_IP_VS_MH=m
-CONFIG_IP_VS_SED=m
-CONFIG_IP_VS_NQ=m
-
-#
-# IPVS SH scheduler
-#
-CONFIG_IP_VS_SH_TAB_BITS=8
-
-#
-# IPVS MH scheduler
-#
-CONFIG_IP_VS_MH_TAB_INDEX=12
-
-#
-# IPVS application helper
-#
-CONFIG_IP_VS_FTP=m
-CONFIG_IP_VS_NFCT=y
-CONFIG_IP_VS_PE_SIP=m
-
-#
-# IP: Netfilter Configuration
-#
-CONFIG_NF_DEFRAG_IPV4=m
-CONFIG_NF_SOCKET_IPV4=m
-CONFIG_NF_TPROXY_IPV4=m
-CONFIG_NF_TABLES_IPV4=y
-CONFIG_NFT_REJECT_IPV4=m
-CONFIG_NFT_DUP_IPV4=m
-CONFIG_NFT_FIB_IPV4=m
-CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_FLOW_TABLE_IPV4=m
-CONFIG_NF_DUP_IPV4=m
-CONFIG_NF_LOG_ARP=m
-CONFIG_NF_LOG_IPV4=m
-CONFIG_NF_REJECT_IPV4=m
-CONFIG_NF_NAT_SNMP_BASIC=m
-CONFIG_NF_NAT_PPTP=m
-CONFIG_NF_NAT_H323=m
-CONFIG_IP_NF_IPTABLES=m
-CONFIG_IP_NF_MATCH_AH=m
-CONFIG_IP_NF_MATCH_ECN=m
-CONFIG_IP_NF_MATCH_RPFILTER=m
-CONFIG_IP_NF_MATCH_TTL=m
-CONFIG_IP_NF_FILTER=m
-CONFIG_IP_NF_TARGET_REJECT=m
-CONFIG_IP_NF_TARGET_SYNPROXY=m
-CONFIG_IP_NF_NAT=m
-CONFIG_IP_NF_TARGET_MASQUERADE=m
-CONFIG_IP_NF_TARGET_NETMAP=m
-CONFIG_IP_NF_TARGET_REDIRECT=m
-CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
-CONFIG_IP_NF_TARGET_ECN=m
-CONFIG_IP_NF_TARGET_TTL=m
-CONFIG_IP_NF_RAW=m
-CONFIG_IP_NF_SECURITY=m
-CONFIG_IP_NF_ARPTABLES=m
-CONFIG_IP_NF_ARPFILTER=m
-CONFIG_IP_NF_ARP_MANGLE=m
-# end of IP: Netfilter Configuration
-
-#
-# IPv6: Netfilter Configuration
-#
-CONFIG_NF_SOCKET_IPV6=m
-CONFIG_NF_TPROXY_IPV6=m
-CONFIG_NF_TABLES_IPV6=y
-CONFIG_NFT_REJECT_IPV6=m
-CONFIG_NFT_DUP_IPV6=m
-CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
-CONFIG_NF_DUP_IPV6=m
-CONFIG_NF_REJECT_IPV6=m
-CONFIG_NF_LOG_IPV6=m
-CONFIG_IP6_NF_IPTABLES=m
-CONFIG_IP6_NF_MATCH_AH=m
-CONFIG_IP6_NF_MATCH_EUI64=m
-CONFIG_IP6_NF_MATCH_FRAG=m
-CONFIG_IP6_NF_MATCH_OPTS=m
-CONFIG_IP6_NF_MATCH_HL=m
-CONFIG_IP6_NF_MATCH_IPV6HEADER=m
-CONFIG_IP6_NF_MATCH_MH=m
-CONFIG_IP6_NF_MATCH_RPFILTER=m
-CONFIG_IP6_NF_MATCH_RT=m
-CONFIG_IP6_NF_MATCH_SRH=m
-CONFIG_IP6_NF_TARGET_HL=m
-CONFIG_IP6_NF_FILTER=m
-CONFIG_IP6_NF_TARGET_REJECT=m
-CONFIG_IP6_NF_TARGET_SYNPROXY=m
-CONFIG_IP6_NF_MANGLE=m
-CONFIG_IP6_NF_RAW=m
-CONFIG_IP6_NF_SECURITY=m
-CONFIG_IP6_NF_NAT=m
-CONFIG_IP6_NF_TARGET_MASQUERADE=m
-CONFIG_IP6_NF_TARGET_NPT=m
-# end of IPv6: Netfilter Configuration
-
-CONFIG_NF_DEFRAG_IPV6=m
-CONFIG_NF_TABLES_BRIDGE=m
-CONFIG_NFT_BRIDGE_META=m
-CONFIG_NFT_BRIDGE_REJECT=m
-CONFIG_NF_LOG_BRIDGE=m
-CONFIG_NF_CONNTRACK_BRIDGE=m
-CONFIG_BRIDGE_NF_EBTABLES=m
-CONFIG_BRIDGE_EBT_BROUTE=m
-CONFIG_BRIDGE_EBT_T_FILTER=m
-CONFIG_BRIDGE_EBT_T_NAT=m
-CONFIG_BRIDGE_EBT_802_3=m
-CONFIG_BRIDGE_EBT_AMONG=m
-CONFIG_BRIDGE_EBT_ARP=m
-CONFIG_BRIDGE_EBT_IP=m
-CONFIG_BRIDGE_EBT_IP6=m
-CONFIG_BRIDGE_EBT_LIMIT=m
-CONFIG_BRIDGE_EBT_MARK=m
-CONFIG_BRIDGE_EBT_PKTTYPE=m
-CONFIG_BRIDGE_EBT_STP=m
-CONFIG_BRIDGE_EBT_VLAN=m
-CONFIG_BRIDGE_EBT_ARPREPLY=m
-CONFIG_BRIDGE_EBT_DNAT=m
-CONFIG_BRIDGE_EBT_MARK_T=m
-CONFIG_BRIDGE_EBT_REDIRECT=m
-CONFIG_BRIDGE_EBT_SNAT=m
-CONFIG_BRIDGE_EBT_LOG=m
-CONFIG_BRIDGE_EBT_NFLOG=m
-# CONFIG_BPFILTER is not set
-CONFIG_IP_DCCP=m
-CONFIG_INET_DCCP_DIAG=m
-
-#
-# DCCP CCIDs Configuration
-#
-# CONFIG_IP_DCCP_CCID2_DEBUG is not set
-CONFIG_IP_DCCP_CCID3=y
-# CONFIG_IP_DCCP_CCID3_DEBUG is not set
-CONFIG_IP_DCCP_TFRC_LIB=y
-# end of DCCP CCIDs Configuration
-
-#
-# DCCP Kernel Hacking
-#
-# CONFIG_IP_DCCP_DEBUG is not set
-# end of DCCP Kernel Hacking
-
-CONFIG_IP_SCTP=m
-# CONFIG_SCTP_DBG_OBJCNT is not set
-# CONFIG_SCTP_DEFAULT_COOKIE_HMAC_MD5 is not set
-CONFIG_SCTP_DEFAULT_COOKIE_HMAC_SHA1=y
-# CONFIG_SCTP_DEFAULT_COOKIE_HMAC_NONE is not set
-CONFIG_SCTP_COOKIE_HMAC_MD5=y
-CONFIG_SCTP_COOKIE_HMAC_SHA1=y
-CONFIG_INET_SCTP_DIAG=m
-CONFIG_RDS=m
-CONFIG_RDS_RDMA=m
-CONFIG_RDS_TCP=m
-# CONFIG_RDS_DEBUG is not set
-CONFIG_TIPC=m
-CONFIG_TIPC_MEDIA_IB=y
-CONFIG_TIPC_MEDIA_UDP=y
-CONFIG_TIPC_DIAG=m
-CONFIG_ATM=m
-CONFIG_ATM_CLIP=m
-# CONFIG_ATM_CLIP_NO_ICMP is not set
-CONFIG_ATM_LANE=m
-CONFIG_ATM_MPOA=m
-CONFIG_ATM_BR2684=m
-# CONFIG_ATM_BR2684_IPFILTER is not set
-CONFIG_L2TP=m
-# CONFIG_L2TP_DEBUGFS is not set
-CONFIG_L2TP_V3=y
-CONFIG_L2TP_IP=m
-CONFIG_L2TP_ETH=m
-CONFIG_STP=m
-CONFIG_GARP=m
-CONFIG_MRP=m
-CONFIG_BRIDGE=m
-CONFIG_BRIDGE_IGMP_SNOOPING=y
-CONFIG_BRIDGE_VLAN_FILTERING=y
-CONFIG_HAVE_NET_DSA=y
-CONFIG_NET_DSA=m
-CONFIG_NET_DSA_TAG_8021Q=m
-CONFIG_NET_DSA_TAG_BRCM_COMMON=m
-CONFIG_NET_DSA_TAG_BRCM=m
-CONFIG_NET_DSA_TAG_BRCM_PREPEND=m
-CONFIG_NET_DSA_TAG_GSWIP=m
-CONFIG_NET_DSA_TAG_DSA=m
-CONFIG_NET_DSA_TAG_EDSA=m
-CONFIG_NET_DSA_TAG_MTK=m
-CONFIG_NET_DSA_TAG_KSZ=m
-CONFIG_NET_DSA_TAG_QCA=m
-CONFIG_NET_DSA_TAG_LAN9303=m
-CONFIG_NET_DSA_TAG_SJA1105=m
-CONFIG_NET_DSA_TAG_TRAILER=m
-CONFIG_VLAN_8021Q=m
-CONFIG_VLAN_8021Q_GVRP=y
-CONFIG_VLAN_8021Q_MVRP=y
-# CONFIG_DECNET is not set
-CONFIG_LLC=m
-CONFIG_LLC2=m
-# CONFIG_ATALK is not set
-# CONFIG_X25 is not set
-# CONFIG_LAPB is not set
-CONFIG_PHONET=m
-CONFIG_6LOWPAN=m
-# CONFIG_6LOWPAN_DEBUGFS is not set
-CONFIG_6LOWPAN_NHC=m
-CONFIG_6LOWPAN_NHC_DEST=m
-CONFIG_6LOWPAN_NHC_FRAGMENT=m
-CONFIG_6LOWPAN_NHC_HOP=m
-CONFIG_6LOWPAN_NHC_IPV6=m
-CONFIG_6LOWPAN_NHC_MOBILITY=m
-CONFIG_6LOWPAN_NHC_ROUTING=m
-CONFIG_6LOWPAN_NHC_UDP=m
-CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m
-CONFIG_6LOWPAN_GHC_UDP=m
-CONFIG_6LOWPAN_GHC_ICMPV6=m
-CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m
-CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
-CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
-CONFIG_IEEE802154=m
-CONFIG_IEEE802154_NL802154_EXPERIMENTAL=y
-CONFIG_IEEE802154_SOCKET=m
-CONFIG_IEEE802154_6LOWPAN=m
-CONFIG_MAC802154=m
-CONFIG_NET_SCHED=y
-
-#
-# Queueing/Scheduling
-#
-CONFIG_NET_SCH_CBQ=m
-CONFIG_NET_SCH_HTB=m
-CONFIG_NET_SCH_HFSC=m
-CONFIG_NET_SCH_ATM=m
-CONFIG_NET_SCH_PRIO=m
-CONFIG_NET_SCH_MULTIQ=m
-CONFIG_NET_SCH_RED=m
-CONFIG_NET_SCH_SFB=m
-CONFIG_NET_SCH_SFQ=m
-CONFIG_NET_SCH_TEQL=m
-CONFIG_NET_SCH_TBF=m
-CONFIG_NET_SCH_CBS=m
-CONFIG_NET_SCH_ETF=m
-CONFIG_NET_SCH_TAPRIO=m
-CONFIG_NET_SCH_GRED=m
-CONFIG_NET_SCH_DSMARK=m
-CONFIG_NET_SCH_NETEM=m
-CONFIG_NET_SCH_DRR=m
-CONFIG_NET_SCH_MQPRIO=m
-CONFIG_NET_SCH_SKBPRIO=m
-CONFIG_NET_SCH_CHOKE=m
-CONFIG_NET_SCH_QFQ=m
-CONFIG_NET_SCH_CODEL=m
-CONFIG_NET_SCH_FQ_CODEL=y
-CONFIG_NET_SCH_CAKE=m
-CONFIG_NET_SCH_FQ=m
-CONFIG_NET_SCH_HHF=m
-CONFIG_NET_SCH_PIE=m
-CONFIG_NET_SCH_INGRESS=m
-CONFIG_NET_SCH_PLUG=m
-CONFIG_NET_SCH_DEFAULT=y
-# CONFIG_DEFAULT_FQ is not set
-# CONFIG_DEFAULT_CODEL is not set
-CONFIG_DEFAULT_FQ_CODEL=y
-# CONFIG_DEFAULT_SFQ is not set
-# CONFIG_DEFAULT_PFIFO_FAST is not set
-CONFIG_DEFAULT_NET_SCH="fq_codel"
-
-#
-# Classification
-#
-CONFIG_NET_CLS=y
-CONFIG_NET_CLS_BASIC=m
-CONFIG_NET_CLS_TCINDEX=m
-CONFIG_NET_CLS_ROUTE4=m
-CONFIG_NET_CLS_FW=m
-CONFIG_NET_CLS_U32=m
-CONFIG_CLS_U32_PERF=y
-CONFIG_CLS_U32_MARK=y
-CONFIG_NET_CLS_RSVP=m
-CONFIG_NET_CLS_RSVP6=m
-CONFIG_NET_CLS_FLOW=m
-CONFIG_NET_CLS_CGROUP=m
-CONFIG_NET_CLS_BPF=m
-CONFIG_NET_CLS_FLOWER=m
-CONFIG_NET_CLS_MATCHALL=m
-CONFIG_NET_EMATCH=y
-CONFIG_NET_EMATCH_STACK=32
-CONFIG_NET_EMATCH_CMP=m
-CONFIG_NET_EMATCH_NBYTE=m
-CONFIG_NET_EMATCH_U32=m
-CONFIG_NET_EMATCH_META=m
-CONFIG_NET_EMATCH_TEXT=m
-CONFIG_NET_EMATCH_CANID=m
-CONFIG_NET_EMATCH_IPSET=m
-CONFIG_NET_EMATCH_IPT=m
-CONFIG_NET_CLS_ACT=y
-CONFIG_NET_ACT_POLICE=m
-CONFIG_NET_ACT_GACT=m
-CONFIG_GACT_PROB=y
-CONFIG_NET_ACT_MIRRED=m
-CONFIG_NET_ACT_SAMPLE=m
-CONFIG_NET_ACT_IPT=m
-CONFIG_NET_ACT_NAT=m
-CONFIG_NET_ACT_PEDIT=m
-CONFIG_NET_ACT_SIMP=m
-CONFIG_NET_ACT_SKBEDIT=m
-CONFIG_NET_ACT_CSUM=m
-CONFIG_NET_ACT_MPLS=m
-CONFIG_NET_ACT_VLAN=m
-CONFIG_NET_ACT_BPF=m
-CONFIG_NET_ACT_CONNMARK=m
-CONFIG_NET_ACT_CTINFO=m
-CONFIG_NET_ACT_SKBMOD=m
-CONFIG_NET_ACT_IFE=m
-CONFIG_NET_ACT_TUNNEL_KEY=m
-CONFIG_NET_ACT_CT=m
-CONFIG_NET_IFE_SKBMARK=m
-CONFIG_NET_IFE_SKBPRIO=m
-CONFIG_NET_IFE_SKBTCINDEX=m
-# CONFIG_NET_TC_SKB_EXT is not set
-CONFIG_NET_SCH_FIFO=y
-CONFIG_DCB=y
-CONFIG_DNS_RESOLVER=m
-CONFIG_BATMAN_ADV=m
-CONFIG_BATMAN_ADV_BATMAN_V=y
-CONFIG_BATMAN_ADV_BLA=y
-CONFIG_BATMAN_ADV_DAT=y
-CONFIG_BATMAN_ADV_NC=y
-CONFIG_BATMAN_ADV_MCAST=y
-# CONFIG_BATMAN_ADV_DEBUGFS is not set
-# CONFIG_BATMAN_ADV_DEBUG is not set
-CONFIG_BATMAN_ADV_SYSFS=y
-# CONFIG_BATMAN_ADV_TRACING is not set
-CONFIG_OPENVSWITCH=m
-CONFIG_OPENVSWITCH_GRE=m
-CONFIG_OPENVSWITCH_VXLAN=m
-CONFIG_OPENVSWITCH_GENEVE=m
-CONFIG_VSOCKETS=m
-CONFIG_VSOCKETS_DIAG=m
-CONFIG_VMWARE_VMCI_VSOCKETS=m
-CONFIG_VIRTIO_VSOCKETS=m
-CONFIG_VIRTIO_VSOCKETS_COMMON=m
-CONFIG_HYPERV_VSOCKETS=m
-CONFIG_NETLINK_DIAG=m
-CONFIG_MPLS=y
-CONFIG_NET_MPLS_GSO=m
-CONFIG_MPLS_ROUTING=m
-CONFIG_MPLS_IPTUNNEL=m
-CONFIG_NET_NSH=m
-CONFIG_HSR=m
-CONFIG_NET_SWITCHDEV=y
-CONFIG_NET_L3_MASTER_DEV=y
-CONFIG_NET_NCSI=y
-CONFIG_NCSI_OEM_CMD_GET_MAC=y
-CONFIG_RPS=y
-CONFIG_RFS_ACCEL=y
-CONFIG_XPS=y
-CONFIG_CGROUP_NET_PRIO=y
-CONFIG_CGROUP_NET_CLASSID=y
-CONFIG_NET_RX_BUSY_POLL=y
-CONFIG_BQL=y
-CONFIG_BPF_JIT=y
-CONFIG_BPF_STREAM_PARSER=y
-CONFIG_NET_FLOW_LIMIT=y
-
-#
-# Network testing
-#
-CONFIG_NET_PKTGEN=m
-CONFIG_NET_DROP_MONITOR=y
-# end of Network testing
-# end of Networking options
-
-CONFIG_HAMRADIO=y
-
-#
-# Packet Radio protocols
-#
-CONFIG_AX25=m
-CONFIG_AX25_DAMA_SLAVE=y
-CONFIG_NETROM=m
-CONFIG_ROSE=m
-
-#
-# AX.25 network device drivers
-#
-CONFIG_MKISS=m
-CONFIG_6PACK=m
-CONFIG_BPQETHER=m
-CONFIG_BAYCOM_SER_FDX=m
-CONFIG_BAYCOM_SER_HDX=m
-CONFIG_BAYCOM_PAR=m
-CONFIG_YAM=m
-# end of AX.25 network device drivers
-
-CONFIG_CAN=m
-CONFIG_CAN_RAW=m
-CONFIG_CAN_BCM=m
-CONFIG_CAN_GW=m
-# CONFIG_CAN_J1939 is not set
-
-#
-# CAN Device Drivers
-#
-CONFIG_CAN_VCAN=m
-CONFIG_CAN_VXCAN=m
-CONFIG_CAN_SLCAN=m
-CONFIG_CAN_DEV=m
-CONFIG_CAN_CALC_BITTIMING=y
-CONFIG_CAN_FLEXCAN=m
-CONFIG_CAN_GRCAN=m
-CONFIG_CAN_JANZ_ICAN3=m
-# CONFIG_CAN_KVASER_PCIEFD is not set
-CONFIG_CAN_C_CAN=m
-CONFIG_CAN_C_CAN_PLATFORM=m
-CONFIG_CAN_C_CAN_PCI=m
-CONFIG_CAN_CC770=m
-# CONFIG_CAN_CC770_ISA is not set
-CONFIG_CAN_CC770_PLATFORM=m
-CONFIG_CAN_IFI_CANFD=m
-CONFIG_CAN_M_CAN=m
-# CONFIG_CAN_M_CAN_PLATFORM is not set
-# CONFIG_CAN_M_CAN_TCAN4X5X is not set
-CONFIG_CAN_PEAK_PCIEFD=m
-CONFIG_CAN_SJA1000=m
-CONFIG_CAN_EMS_PCI=m
-# CONFIG_CAN_EMS_PCMCIA is not set
-# CONFIG_CAN_F81601 is not set
-CONFIG_CAN_KVASER_PCI=m
-CONFIG_CAN_PEAK_PCI=m
-CONFIG_CAN_PEAK_PCIEC=y
-CONFIG_CAN_PEAK_PCMCIA=m
-CONFIG_CAN_PLX_PCI=m
-# CONFIG_CAN_SJA1000_ISA is not set
-CONFIG_CAN_SJA1000_PLATFORM=m
-CONFIG_CAN_SOFTING=m
-CONFIG_CAN_SOFTING_CS=m
-
-#
-# CAN SPI interfaces
-#
-CONFIG_CAN_HI311X=m
-CONFIG_CAN_MCP251X=m
-# end of CAN SPI interfaces
-
-#
-# CAN USB interfaces
-#
-CONFIG_CAN_8DEV_USB=m
-CONFIG_CAN_EMS_USB=m
-CONFIG_CAN_ESD_USB2=m
-CONFIG_CAN_GS_USB=m
-CONFIG_CAN_KVASER_USB=m
-CONFIG_CAN_MCBA_USB=m
-CONFIG_CAN_PEAK_USB=m
-CONFIG_CAN_UCAN=m
-# end of CAN USB interfaces
-
-# CONFIG_CAN_DEBUG_DEVICES is not set
-# end of CAN Device Drivers
-
-CONFIG_BT=m
-CONFIG_BT_BREDR=y
-CONFIG_BT_RFCOMM=m
-CONFIG_BT_RFCOMM_TTY=y
-CONFIG_BT_BNEP=m
-CONFIG_BT_BNEP_MC_FILTER=y
-CONFIG_BT_BNEP_PROTO_FILTER=y
-CONFIG_BT_CMTP=m
-CONFIG_BT_HIDP=m
-CONFIG_BT_HS=y
-CONFIG_BT_LE=y
-CONFIG_BT_6LOWPAN=m
-CONFIG_BT_LEDS=y
-# CONFIG_BT_SELFTEST is not set
-# CONFIG_BT_DEBUGFS is not set
-
-#
-# Bluetooth device drivers
-#
-CONFIG_BT_INTEL=m
-CONFIG_BT_BCM=m
-CONFIG_BT_RTL=m
-CONFIG_BT_QCA=m
-CONFIG_BT_HCIBTUSB=m
-CONFIG_BT_HCIBTUSB_AUTOSUSPEND=y
-CONFIG_BT_HCIBTUSB_BCM=y
-CONFIG_BT_HCIBTUSB_MTK=y
-CONFIG_BT_HCIBTUSB_RTL=y
-CONFIG_BT_HCIBTSDIO=m
-CONFIG_BT_HCIUART=m
-CONFIG_BT_HCIUART_SERDEV=y
-CONFIG_BT_HCIUART_H4=y
-CONFIG_BT_HCIUART_NOKIA=m
-CONFIG_BT_HCIUART_BCSP=y
-CONFIG_BT_HCIUART_ATH3K=y
-CONFIG_BT_HCIUART_LL=y
-CONFIG_BT_HCIUART_3WIRE=y
-CONFIG_BT_HCIUART_INTEL=y
-CONFIG_BT_HCIUART_BCM=y
-CONFIG_BT_HCIUART_RTL=y
-CONFIG_BT_HCIUART_QCA=y
-CONFIG_BT_HCIUART_AG6XX=y
-CONFIG_BT_HCIUART_MRVL=y
-CONFIG_BT_HCIBCM203X=m
-CONFIG_BT_HCIBPA10X=m
-CONFIG_BT_HCIBFUSB=m
-CONFIG_BT_HCIDTL1=m
-CONFIG_BT_HCIBT3C=m
-CONFIG_BT_HCIBLUECARD=m
-CONFIG_BT_HCIVHCI=m
-CONFIG_BT_MRVL=m
-CONFIG_BT_MRVL_SDIO=m
-CONFIG_BT_ATH3K=m
-CONFIG_BT_WILINK=m
-CONFIG_BT_MTKSDIO=m
-CONFIG_BT_MTKUART=m
-CONFIG_BT_HCIRSI=m
-# end of Bluetooth device drivers
-
-CONFIG_AF_RXRPC=m
-CONFIG_AF_RXRPC_IPV6=y
-# CONFIG_AF_RXRPC_INJECT_LOSS is not set
-# CONFIG_AF_RXRPC_DEBUG is not set
-CONFIG_RXKAD=y
-CONFIG_AF_KCM=m
-CONFIG_STREAM_PARSER=y
-CONFIG_FIB_RULES=y
-CONFIG_WIRELESS=y
-CONFIG_WIRELESS_EXT=y
-CONFIG_WEXT_CORE=y
-CONFIG_WEXT_PROC=y
-CONFIG_WEXT_SPY=y
-CONFIG_WEXT_PRIV=y
-CONFIG_CFG80211=m
-# CONFIG_NL80211_TESTMODE is not set
-# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set
-# CONFIG_CFG80211_CERTIFICATION_ONUS is not set
-CONFIG_CFG80211_REQUIRE_SIGNED_REGDB=y
-CONFIG_CFG80211_USE_KERNEL_REGDB_KEYS=y
-CONFIG_CFG80211_DEFAULT_PS=y
-# CONFIG_CFG80211_DEBUGFS is not set
-CONFIG_CFG80211_CRDA_SUPPORT=y
-CONFIG_CFG80211_WEXT=y
-CONFIG_CFG80211_WEXT_EXPORT=y
-CONFIG_LIB80211=m
-CONFIG_LIB80211_CRYPT_WEP=m
-CONFIG_LIB80211_CRYPT_CCMP=m
-CONFIG_LIB80211_CRYPT_TKIP=m
-# CONFIG_LIB80211_DEBUG is not set
-CONFIG_MAC80211=m
-CONFIG_MAC80211_HAS_RC=y
-CONFIG_MAC80211_RC_MINSTREL=y
-CONFIG_MAC80211_RC_DEFAULT_MINSTREL=y
-CONFIG_MAC80211_RC_DEFAULT="minstrel_ht"
-CONFIG_MAC80211_MESH=y
-CONFIG_MAC80211_LEDS=y
-# CONFIG_MAC80211_DEBUGFS is not set
-# CONFIG_MAC80211_MESSAGE_TRACING is not set
-# CONFIG_MAC80211_DEBUG_MENU is not set
-CONFIG_MAC80211_STA_HASH_MAX_SIZE=0
-CONFIG_WIMAX=m
-CONFIG_WIMAX_DEBUG_LEVEL=8
-CONFIG_RFKILL=m
-CONFIG_RFKILL_LEDS=y
-CONFIG_RFKILL_INPUT=y
-CONFIG_RFKILL_GPIO=m
-CONFIG_NET_9P=m
-CONFIG_NET_9P_VIRTIO=m
-CONFIG_NET_9P_XEN=m
-CONFIG_NET_9P_RDMA=m
-# CONFIG_NET_9P_DEBUG is not set
-CONFIG_CAIF=m
-# CONFIG_CAIF_DEBUG is not set
-CONFIG_CAIF_NETDEV=m
-CONFIG_CAIF_USB=m
-CONFIG_CEPH_LIB=m
-CONFIG_CEPH_LIB_PRETTYDEBUG=y
-CONFIG_CEPH_LIB_USE_DNS_RESOLVER=y
-CONFIG_NFC=m
-CONFIG_NFC_DIGITAL=m
-CONFIG_NFC_NCI=m
-CONFIG_NFC_NCI_SPI=m
-CONFIG_NFC_NCI_UART=m
-CONFIG_NFC_HCI=m
-CONFIG_NFC_SHDLC=y
-
-#
-# Near Field Communication (NFC) devices
-#
-CONFIG_NFC_TRF7970A=m
-CONFIG_NFC_MEI_PHY=m
-CONFIG_NFC_SIM=m
-CONFIG_NFC_PORT100=m
-CONFIG_NFC_FDP=m
-CONFIG_NFC_FDP_I2C=m
-CONFIG_NFC_PN544=m
-CONFIG_NFC_PN544_I2C=m
-CONFIG_NFC_PN544_MEI=m
-CONFIG_NFC_PN533=m
-CONFIG_NFC_PN533_USB=m
-CONFIG_NFC_PN533_I2C=m
-CONFIG_NFC_MICROREAD=m
-CONFIG_NFC_MICROREAD_I2C=m
-CONFIG_NFC_MICROREAD_MEI=m
-CONFIG_NFC_MRVL=m
-CONFIG_NFC_MRVL_USB=m
-CONFIG_NFC_MRVL_UART=m
-CONFIG_NFC_MRVL_I2C=m
-CONFIG_NFC_MRVL_SPI=m
-CONFIG_NFC_ST21NFCA=m
-CONFIG_NFC_ST21NFCA_I2C=m
-CONFIG_NFC_ST_NCI=m
-CONFIG_NFC_ST_NCI_I2C=m
-CONFIG_NFC_ST_NCI_SPI=m
-CONFIG_NFC_NXP_NCI=m
-CONFIG_NFC_NXP_NCI_I2C=m
-CONFIG_NFC_S3FWRN5=m
-CONFIG_NFC_S3FWRN5_I2C=m
-CONFIG_NFC_ST95HF=m
-# end of Near Field Communication (NFC) devices
-
-CONFIG_PSAMPLE=m
-CONFIG_NET_IFE=m
-CONFIG_LWTUNNEL=y
-CONFIG_LWTUNNEL_BPF=y
-CONFIG_DST_CACHE=y
-CONFIG_GRO_CELLS=y
-CONFIG_SOCK_VALIDATE_XMIT=y
-CONFIG_NET_SOCK_MSG=y
-CONFIG_NET_DEVLINK=y
-CONFIG_PAGE_POOL=y
-CONFIG_FAILOVER=m
-CONFIG_HAVE_EBPF_JIT=y
-
-#
-# Device Drivers
-#
-CONFIG_HAVE_EISA=y
-# CONFIG_EISA is not set
-CONFIG_HAVE_PCI=y
-CONFIG_PCI=y
-CONFIG_PCI_DOMAINS=y
-CONFIG_PCIEPORTBUS=y
-CONFIG_HOTPLUG_PCI_PCIE=y
-CONFIG_PCIEAER=y
-# CONFIG_PCIEAER_INJECT is not set
-CONFIG_PCIE_ECRC=y
-CONFIG_PCIEASPM=y
-# CONFIG_PCIEASPM_DEBUG is not set
-CONFIG_PCIEASPM_DEFAULT=y
-# CONFIG_PCIEASPM_POWERSAVE is not set
-# CONFIG_PCIEASPM_POWER_SUPERSAVE is not set
-# CONFIG_PCIEASPM_PERFORMANCE is not set
-CONFIG_PCIE_PME=y
-CONFIG_PCIE_DPC=y
-CONFIG_PCIE_PTM=y
-# CONFIG_PCIE_BW is not set
-CONFIG_PCI_MSI=y
-CONFIG_PCI_MSI_IRQ_DOMAIN=y
-CONFIG_PCI_QUIRKS=y
-# CONFIG_PCI_DEBUG is not set
-CONFIG_PCI_REALLOC_ENABLE_AUTO=y
-CONFIG_PCI_STUB=y
-CONFIG_PCI_PF_STUB=m
-CONFIG_XEN_PCIDEV_FRONTEND=m
-CONFIG_PCI_ATS=y
-CONFIG_PCI_ECAM=y
-CONFIG_PCI_LOCKLESS_CONFIG=y
-CONFIG_PCI_IOV=y
-CONFIG_PCI_PRI=y
-CONFIG_PCI_PASID=y
-CONFIG_PCI_P2PDMA=y
-CONFIG_PCI_LABEL=y
-CONFIG_PCI_HYPERV=m
-CONFIG_HOTPLUG_PCI=y
-CONFIG_HOTPLUG_PCI_ACPI=y
-CONFIG_HOTPLUG_PCI_ACPI_IBM=m
-CONFIG_HOTPLUG_PCI_CPCI=y
-CONFIG_HOTPLUG_PCI_CPCI_ZT5550=m
-CONFIG_HOTPLUG_PCI_CPCI_GENERIC=m
-CONFIG_HOTPLUG_PCI_SHPC=y
-
-#
-# PCI controller drivers
-#
-
-#
-# Cadence PCIe controllers support
-#
-CONFIG_PCIE_CADENCE=y
-CONFIG_PCIE_CADENCE_HOST=y
-CONFIG_PCIE_CADENCE_EP=y
-# end of Cadence PCIe controllers support
-
-CONFIG_PCI_FTPCI100=y
-CONFIG_PCI_HOST_COMMON=y
-CONFIG_PCI_HOST_GENERIC=y
-CONFIG_PCIE_XILINX=y
-CONFIG_VMD=m
-CONFIG_PCI_HYPERV_INTERFACE=m
-
-#
-# DesignWare PCI Core Support
-#
-CONFIG_PCIE_DW=y
-CONFIG_PCIE_DW_HOST=y
-CONFIG_PCIE_DW_EP=y
-CONFIG_PCIE_DW_PLAT=y
-CONFIG_PCIE_DW_PLAT_HOST=y
-CONFIG_PCIE_DW_PLAT_EP=y
-CONFIG_PCI_MESON=y
-# end of DesignWare PCI Core Support
-# end of PCI controller drivers
-
-#
-# PCI Endpoint
-#
-CONFIG_PCI_ENDPOINT=y
-CONFIG_PCI_ENDPOINT_CONFIGFS=y
-# CONFIG_PCI_EPF_TEST is not set
-# end of PCI Endpoint
-
-#
-# PCI switch controller drivers
-#
-CONFIG_PCI_SW_SWITCHTEC=m
-# end of PCI switch controller drivers
-
-CONFIG_PCCARD=m
-CONFIG_PCMCIA=m
-CONFIG_PCMCIA_LOAD_CIS=y
-CONFIG_CARDBUS=y
-
-#
-# PC-card bridges
-#
-CONFIG_YENTA=m
-CONFIG_YENTA_O2=y
-CONFIG_YENTA_RICOH=y
-CONFIG_YENTA_TI=y
-CONFIG_YENTA_ENE_TUNE=y
-CONFIG_YENTA_TOSHIBA=y
-CONFIG_PD6729=m
-CONFIG_I82092=m
-CONFIG_PCCARD_NONSTATIC=y
-CONFIG_RAPIDIO=m
-CONFIG_RAPIDIO_TSI721=m
-CONFIG_RAPIDIO_DISC_TIMEOUT=30
-CONFIG_RAPIDIO_ENABLE_RX_TX_PORTS=y
-CONFIG_RAPIDIO_DMA_ENGINE=y
-# CONFIG_RAPIDIO_DEBUG is not set
-CONFIG_RAPIDIO_ENUM_BASIC=m
-CONFIG_RAPIDIO_CHMAN=m
-CONFIG_RAPIDIO_MPORT_CDEV=m
-
-#
-# RapidIO Switch drivers
-#
-CONFIG_RAPIDIO_TSI57X=m
-CONFIG_RAPIDIO_CPS_XX=m
-CONFIG_RAPIDIO_TSI568=m
-CONFIG_RAPIDIO_CPS_GEN2=m
-CONFIG_RAPIDIO_RXS_GEN3=m
-# end of RapidIO Switch drivers
-
-#
-# Generic Driver Options
-#
-# CONFIG_UEVENT_HELPER is not set
-CONFIG_DEVTMPFS=y
-CONFIG_DEVTMPFS_MOUNT=y
-CONFIG_STANDALONE=y
-CONFIG_PREVENT_FIRMWARE_BUILD=y
-
-#
-# Firmware loader
-#
-CONFIG_FW_LOADER=y
-CONFIG_FW_LOADER_PAGED_BUF=y
-CONFIG_EXTRA_FIRMWARE=""
-# CONFIG_FW_LOADER_USER_HELPER is not set
-CONFIG_FW_LOADER_COMPRESS=y
-# end of Firmware loader
-
-CONFIG_WANT_DEV_COREDUMP=y
-CONFIG_ALLOW_DEV_COREDUMP=y
-CONFIG_DEV_COREDUMP=y
-# CONFIG_DEBUG_DRIVER is not set
-# CONFIG_DEBUG_DEVRES is not set
-# CONFIG_DEBUG_TEST_DRIVER_REMOVE is not set
-CONFIG_HMEM_REPORTING=y
-# CONFIG_TEST_ASYNC_DRIVER_PROBE is not set
-CONFIG_SYS_HYPERVISOR=y
-CONFIG_GENERIC_CPU_AUTOPROBE=y
-CONFIG_GENERIC_CPU_VULNERABILITIES=y
-CONFIG_REGMAP=y
-CONFIG_REGMAP_I2C=y
-CONFIG_REGMAP_SLIMBUS=m
-CONFIG_REGMAP_SPI=y
-CONFIG_REGMAP_SPMI=m
-CONFIG_REGMAP_W1=m
-CONFIG_REGMAP_MMIO=y
-CONFIG_REGMAP_IRQ=y
-CONFIG_REGMAP_SCCB=m
-CONFIG_REGMAP_I3C=m
-CONFIG_DMA_SHARED_BUFFER=y
-# CONFIG_DMA_FENCE_TRACE is not set
-# end of Generic Driver Options
-
-#
-# Bus devices
-#
-# CONFIG_MOXTET is not set
-CONFIG_SIMPLE_PM_BUS=y
-# end of Bus devices
-
-CONFIG_CONNECTOR=y
-CONFIG_PROC_EVENTS=y
-CONFIG_GNSS=m
-CONFIG_GNSS_SERIAL=m
-CONFIG_GNSS_MTK_SERIAL=m
-CONFIG_GNSS_SIRF_SERIAL=m
-CONFIG_GNSS_UBX_SERIAL=m
-CONFIG_MTD=m
-CONFIG_MTD_TESTS=m
-
-#
-# Partition parsers
-#
-CONFIG_MTD_AR7_PARTS=m
-CONFIG_MTD_CMDLINE_PARTS=m
-CONFIG_MTD_OF_PARTS=m
-CONFIG_MTD_REDBOOT_PARTS=m
-CONFIG_MTD_REDBOOT_DIRECTORY_BLOCK=-1
-# CONFIG_MTD_REDBOOT_PARTS_UNALLOCATED is not set
-# CONFIG_MTD_REDBOOT_PARTS_READONLY is not set
-# end of Partition parsers
-
-#
-# User Modules And Translation Layers
-#
-CONFIG_MTD_BLKDEVS=m
-CONFIG_MTD_BLOCK=m
-CONFIG_MTD_BLOCK_RO=m
-CONFIG_FTL=m
-CONFIG_NFTL=m
-CONFIG_NFTL_RW=y
-CONFIG_INFTL=m
-CONFIG_RFD_FTL=m
-CONFIG_SSFDC=m
-CONFIG_SM_FTL=m
-CONFIG_MTD_OOPS=m
-CONFIG_MTD_SWAP=m
-CONFIG_MTD_PARTITIONED_MASTER=y
-
-#
-# RAM/ROM/Flash chip drivers
-#
-CONFIG_MTD_CFI=m
-CONFIG_MTD_JEDECPROBE=m
-CONFIG_MTD_GEN_PROBE=m
-# CONFIG_MTD_CFI_ADV_OPTIONS is not set
-CONFIG_MTD_MAP_BANK_WIDTH_1=y
-CONFIG_MTD_MAP_BANK_WIDTH_2=y
-CONFIG_MTD_MAP_BANK_WIDTH_4=y
-CONFIG_MTD_CFI_I1=y
-CONFIG_MTD_CFI_I2=y
-CONFIG_MTD_CFI_INTELEXT=m
-CONFIG_MTD_CFI_AMDSTD=m
-CONFIG_MTD_CFI_STAA=m
-CONFIG_MTD_CFI_UTIL=m
-CONFIG_MTD_RAM=m
-CONFIG_MTD_ROM=m
-CONFIG_MTD_ABSENT=m
-# end of RAM/ROM/Flash chip drivers
-
-#
-# Mapping drivers for chip access
-#
-CONFIG_MTD_COMPLEX_MAPPINGS=y
-CONFIG_MTD_PHYSMAP=m
-# CONFIG_MTD_PHYSMAP_COMPAT is not set
-CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_MTD_PHYSMAP_VERSATILE=y
-CONFIG_MTD_PHYSMAP_GEMINI=y
-CONFIG_MTD_PHYSMAP_GPIO_ADDR=y
-CONFIG_MTD_SBC_GXX=m
-CONFIG_MTD_AMD76XROM=m
-CONFIG_MTD_ICHXROM=m
-CONFIG_MTD_ESB2ROM=m
-CONFIG_MTD_CK804XROM=m
-CONFIG_MTD_SCB2_FLASH=m
-CONFIG_MTD_NETtel=m
-CONFIG_MTD_L440GX=m
-CONFIG_MTD_PCI=m
-CONFIG_MTD_PCMCIA=m
-# CONFIG_MTD_PCMCIA_ANONYMOUS is not set
-CONFIG_MTD_INTEL_VR_NOR=m
-CONFIG_MTD_PLATRAM=m
-# end of Mapping drivers for chip access
-
-#
-# Self-contained MTD device drivers
-#
-CONFIG_MTD_PMC551=m
-# CONFIG_MTD_PMC551_BUGFIX is not set
-# CONFIG_MTD_PMC551_DEBUG is not set
-CONFIG_MTD_DATAFLASH=m
-# CONFIG_MTD_DATAFLASH_WRITE_VERIFY is not set
-CONFIG_MTD_DATAFLASH_OTP=y
-CONFIG_MTD_MCHP23K256=m
-CONFIG_MTD_SST25L=m
-CONFIG_MTD_SLRAM=m
-CONFIG_MTD_PHRAM=m
-CONFIG_MTD_MTDRAM=m
-CONFIG_MTDRAM_TOTAL_SIZE=4096
-CONFIG_MTDRAM_ERASE_SIZE=128
-CONFIG_MTD_BLOCK2MTD=m
-
-#
-# Disk-On-Chip Device Drivers
-#
-CONFIG_MTD_DOCG3=m
-CONFIG_BCH_CONST_M=14
-CONFIG_BCH_CONST_T=4
-# end of Self-contained MTD device drivers
-
-CONFIG_MTD_NAND_CORE=m
-CONFIG_MTD_ONENAND=m
-# CONFIG_MTD_ONENAND_VERIFY_WRITE is not set
-CONFIG_MTD_ONENAND_GENERIC=m
-CONFIG_MTD_ONENAND_OTP=y
-CONFIG_MTD_ONENAND_2X_PROGRAM=y
-CONFIG_MTD_NAND_ECC_SW_HAMMING=m
-CONFIG_MTD_NAND_ECC_SW_HAMMING_SMC=y
-CONFIG_MTD_RAW_NAND=m
-CONFIG_MTD_NAND_ECC_SW_BCH=y
-
-#
-# Raw/parallel NAND flash controllers
-#
-CONFIG_MTD_NAND_DENALI=m
-CONFIG_MTD_NAND_DENALI_PCI=m
-CONFIG_MTD_NAND_DENALI_DT=m
-CONFIG_MTD_NAND_CAFE=m
-# CONFIG_MTD_NAND_MXIC is not set
-CONFIG_MTD_NAND_GPIO=m
-CONFIG_MTD_NAND_PLATFORM=m
-
-#
-# Misc
-#
-CONFIG_MTD_SM_COMMON=m
-CONFIG_MTD_NAND_NANDSIM=m
-CONFIG_MTD_NAND_RICOH=m
-CONFIG_MTD_NAND_DISKONCHIP=m
-# CONFIG_MTD_NAND_DISKONCHIP_PROBE_ADVANCED is not set
-CONFIG_MTD_NAND_DISKONCHIP_PROBE_ADDRESS=0
-CONFIG_MTD_NAND_DISKONCHIP_BBTWRITE=y
-CONFIG_MTD_SPI_NAND=m
-
-#
-# LPDDR & LPDDR2 PCM memory drivers
-#
-CONFIG_MTD_LPDDR=m
-CONFIG_MTD_QINFO_PROBE=m
-# end of LPDDR & LPDDR2 PCM memory drivers
-
-CONFIG_MTD_SPI_NOR=m
-CONFIG_MTD_SPI_NOR_USE_4K_SECTORS=y
-CONFIG_SPI_MTK_QUADSPI=m
-CONFIG_SPI_INTEL_SPI=m
-CONFIG_SPI_INTEL_SPI_PCI=m
-CONFIG_SPI_INTEL_SPI_PLATFORM=m
-CONFIG_MTD_UBI=m
-CONFIG_MTD_UBI_WL_THRESHOLD=4096
-CONFIG_MTD_UBI_BEB_LIMIT=20
-CONFIG_MTD_UBI_FASTMAP=y
-CONFIG_MTD_UBI_GLUEBI=m
-CONFIG_MTD_UBI_BLOCK=y
-CONFIG_MTD_HYPERBUS=m
-CONFIG_DTC=y
-CONFIG_OF=y
-# CONFIG_OF_UNITTEST is not set
-CONFIG_OF_FLATTREE=y
-CONFIG_OF_KOBJ=y
-CONFIG_OF_DYNAMIC=y
-CONFIG_OF_ADDRESS=y
-CONFIG_OF_IRQ=y
-CONFIG_OF_NET=y
-CONFIG_OF_MDIO=m
-CONFIG_OF_RESOLVE=y
-CONFIG_OF_OVERLAY=y
-CONFIG_ARCH_MIGHT_HAVE_PC_PARPORT=y
-CONFIG_PARPORT=m
-CONFIG_PARPORT_PC=m
-CONFIG_PARPORT_SERIAL=m
-CONFIG_PARPORT_PC_FIFO=y
-CONFIG_PARPORT_PC_SUPERIO=y
-CONFIG_PARPORT_PC_PCMCIA=m
-CONFIG_PARPORT_AX88796=m
-CONFIG_PARPORT_1284=y
-CONFIG_PARPORT_NOT_PC=y
-CONFIG_PNP=y
-CONFIG_PNP_DEBUG_MESSAGES=y
-
-#
-# Protocols
-#
-CONFIG_PNPACPI=y
-CONFIG_BLK_DEV=y
-# CONFIG_BLK_DEV_NULL_BLK is not set
-CONFIG_BLK_DEV_FD=m
-CONFIG_CDROM=m
-# CONFIG_PARIDE is not set
-CONFIG_BLK_DEV_PCIESSD_MTIP32XX=m
-CONFIG_ZRAM=m
-CONFIG_ZRAM_WRITEBACK=y
-# CONFIG_ZRAM_MEMORY_TRACKING is not set
-CONFIG_BLK_DEV_UMEM=m
-CONFIG_BLK_DEV_LOOP=m
-CONFIG_BLK_DEV_LOOP_MIN_COUNT=8
-CONFIG_BLK_DEV_CRYPTOLOOP=m
-CONFIG_BLK_DEV_DRBD=m
-# CONFIG_DRBD_FAULT_INJECTION is not set
-CONFIG_BLK_DEV_NBD=m
-CONFIG_BLK_DEV_SKD=m
-CONFIG_BLK_DEV_SX8=m
-CONFIG_BLK_DEV_RAM=m
-CONFIG_BLK_DEV_RAM_COUNT=16
-CONFIG_BLK_DEV_RAM_SIZE=16384
-CONFIG_CDROM_PKTCDVD=m
-CONFIG_CDROM_PKTCDVD_BUFFERS=8
-# CONFIG_CDROM_PKTCDVD_WCACHE is not set
-CONFIG_ATA_OVER_ETH=m
-CONFIG_XEN_BLKDEV_FRONTEND=m
-CONFIG_XEN_BLKDEV_BACKEND=m
-CONFIG_VIRTIO_BLK=m
-# CONFIG_VIRTIO_BLK_SCSI is not set
-CONFIG_BLK_DEV_RBD=m
-CONFIG_BLK_DEV_RSXX=m
-
-#
-# NVME Support
-#
-CONFIG_NVME_CORE=y
-CONFIG_BLK_DEV_NVME=y
-CONFIG_NVME_MULTIPATH=y
-CONFIG_NVME_FABRICS=m
-CONFIG_NVME_RDMA=m
-CONFIG_NVME_FC=m
-CONFIG_NVME_TCP=m
-CONFIG_NVME_TARGET=m
-CONFIG_NVME_TARGET_LOOP=m
-CONFIG_NVME_TARGET_RDMA=m
-CONFIG_NVME_TARGET_FC=m
-CONFIG_NVME_TARGET_FCLOOP=m
-CONFIG_NVME_TARGET_TCP=m
-# end of NVME Support
-
-#
-# Misc devices
-#
-CONFIG_SENSORS_LIS3LV02D=m
-CONFIG_AD525X_DPOT=m
-CONFIG_AD525X_DPOT_I2C=m
-CONFIG_AD525X_DPOT_SPI=m
-# CONFIG_DUMMY_IRQ is not set
-CONFIG_IBM_ASM=m
-CONFIG_PHANTOM=m
-CONFIG_TIFM_CORE=m
-CONFIG_TIFM_7XX1=m
-CONFIG_ICS932S401=m
-CONFIG_ENCLOSURE_SERVICES=m
-CONFIG_HP_ILO=m
-CONFIG_APDS9802ALS=m
-CONFIG_ISL29003=m
-CONFIG_ISL29020=m
-CONFIG_SENSORS_TSL2550=m
-CONFIG_SENSORS_BH1770=m
-CONFIG_SENSORS_APDS990X=m
-CONFIG_HMC6352=m
-CONFIG_DS1682=m
-CONFIG_VMWARE_BALLOON=m
-CONFIG_LATTICE_ECP3_CONFIG=m
-# CONFIG_SRAM is not set
-CONFIG_PCI_ENDPOINT_TEST=m
-CONFIG_XILINX_SDFEC=m
-CONFIG_MISC_RTSX=m
-CONFIG_PVPANIC=m
-CONFIG_C2PORT=m
-CONFIG_C2PORT_DURAMAR_2150=m
-
-#
-# EEPROM support
-#
-CONFIG_EEPROM_AT24=m
-# CONFIG_EEPROM_AT25 is not set
-CONFIG_EEPROM_LEGACY=m
-CONFIG_EEPROM_MAX6875=m
-CONFIG_EEPROM_93CX6=m
-# CONFIG_EEPROM_93XX46 is not set
-CONFIG_EEPROM_IDT_89HPESX=m
-CONFIG_EEPROM_EE1004=m
-# end of EEPROM support
-
-CONFIG_CB710_CORE=m
-# CONFIG_CB710_DEBUG is not set
-CONFIG_CB710_DEBUG_ASSUMPTIONS=y
-
-#
-# Texas Instruments shared transport line discipline
-#
-CONFIG_TI_ST=m
-# end of Texas Instruments shared transport line discipline
-
-CONFIG_SENSORS_LIS3_I2C=m
-CONFIG_ALTERA_STAPL=m
-CONFIG_INTEL_MEI=m
-CONFIG_INTEL_MEI_ME=m
-CONFIG_INTEL_MEI_TXE=m
-CONFIG_INTEL_MEI_HDCP=m
-CONFIG_VMWARE_VMCI=m
-
-#
-# Intel MIC & related support
-#
-
-#
-# Intel MIC Bus Driver
-#
-CONFIG_INTEL_MIC_BUS=m
-
-#
-# SCIF Bus Driver
-#
-CONFIG_SCIF_BUS=m
-
-#
-# VOP Bus Driver
-#
-CONFIG_VOP_BUS=m
-
-#
-# Intel MIC Host Driver
-#
-CONFIG_INTEL_MIC_HOST=m
-
-#
-# Intel MIC Card Driver
-#
-CONFIG_INTEL_MIC_CARD=m
-
-#
-# SCIF Driver
-#
-CONFIG_SCIF=m
-
-#
-# Intel MIC Coprocessor State Management (COSM) Drivers
-#
-CONFIG_MIC_COSM=m
-
-#
-# VOP Driver
-#
-CONFIG_VOP=m
-CONFIG_VHOST_RING=m
-# end of Intel MIC & related support
-
-CONFIG_GENWQE=m
-CONFIG_GENWQE_PLATFORM_ERROR_RECOVERY=0
-CONFIG_ECHO=m
-CONFIG_MISC_ALCOR_PCI=m
-CONFIG_MISC_RTSX_PCI=m
-CONFIG_MISC_RTSX_USB=m
-CONFIG_HABANA_AI=m
-# end of Misc devices
-
-CONFIG_HAVE_IDE=y
-# CONFIG_IDE is not set
-
-#
-# SCSI device support
-#
-CONFIG_SCSI_MOD=m
-CONFIG_RAID_ATTRS=m
-CONFIG_SCSI=m
-CONFIG_SCSI_DMA=y
-CONFIG_SCSI_NETLINK=y
-CONFIG_SCSI_PROC_FS=y
-
-#
-# SCSI support type (disk, tape, CD-ROM)
-#
-CONFIG_BLK_DEV_SD=m
-CONFIG_CHR_DEV_ST=m
-CONFIG_BLK_DEV_SR=m
-CONFIG_BLK_DEV_SR_VENDOR=y
-CONFIG_CHR_DEV_SG=m
-CONFIG_CHR_DEV_SCH=m
-CONFIG_SCSI_ENCLOSURE=m
-CONFIG_SCSI_CONSTANTS=y
-CONFIG_SCSI_LOGGING=y
-CONFIG_SCSI_SCAN_ASYNC=y
-
-#
-# SCSI Transports
-#
-CONFIG_SCSI_SPI_ATTRS=m
-CONFIG_SCSI_FC_ATTRS=m
-CONFIG_SCSI_ISCSI_ATTRS=m
-CONFIG_SCSI_SAS_ATTRS=m
-CONFIG_SCSI_SAS_LIBSAS=m
-CONFIG_SCSI_SAS_ATA=y
-CONFIG_SCSI_SAS_HOST_SMP=y
-CONFIG_SCSI_SRP_ATTRS=m
-# end of SCSI Transports
-
-CONFIG_SCSI_LOWLEVEL=y
-CONFIG_ISCSI_TCP=m
-CONFIG_ISCSI_BOOT_SYSFS=m
-CONFIG_SCSI_CXGB3_ISCSI=m
-CONFIG_SCSI_CXGB4_ISCSI=m
-CONFIG_SCSI_BNX2_ISCSI=m
-CONFIG_SCSI_BNX2X_FCOE=m
-CONFIG_BE2ISCSI=m
-CONFIG_BLK_DEV_3W_XXXX_RAID=m
-CONFIG_SCSI_HPSA=m
-CONFIG_SCSI_3W_9XXX=m
-CONFIG_SCSI_3W_SAS=m
-CONFIG_SCSI_ACARD=m
-CONFIG_SCSI_AACRAID=m
-CONFIG_SCSI_AIC7XXX=m
-CONFIG_AIC7XXX_CMDS_PER_DEVICE=32
-CONFIG_AIC7XXX_RESET_DELAY_MS=15000
-CONFIG_AIC7XXX_DEBUG_ENABLE=y
-CONFIG_AIC7XXX_DEBUG_MASK=0
-CONFIG_AIC7XXX_REG_PRETTY_PRINT=y
-CONFIG_SCSI_AIC79XX=m
-CONFIG_AIC79XX_CMDS_PER_DEVICE=32
-CONFIG_AIC79XX_RESET_DELAY_MS=15000
-CONFIG_AIC79XX_DEBUG_ENABLE=y
-CONFIG_AIC79XX_DEBUG_MASK=0
-CONFIG_AIC79XX_REG_PRETTY_PRINT=y
-CONFIG_SCSI_AIC94XX=m
-CONFIG_AIC94XX_DEBUG=y
-CONFIG_SCSI_MVSAS=m
-CONFIG_SCSI_MVSAS_DEBUG=y
-CONFIG_SCSI_MVSAS_TASKLET=y
-CONFIG_SCSI_MVUMI=m
-CONFIG_SCSI_DPT_I2O=m
-CONFIG_SCSI_ADVANSYS=m
-CONFIG_SCSI_ARCMSR=m
-CONFIG_SCSI_ESAS2R=m
-CONFIG_MEGARAID_NEWGEN=y
-CONFIG_MEGARAID_MM=m
-CONFIG_MEGARAID_MAILBOX=m
-CONFIG_MEGARAID_LEGACY=m
-CONFIG_MEGARAID_SAS=m
-CONFIG_SCSI_MPT3SAS=m
-CONFIG_SCSI_MPT2SAS_MAX_SGE=128
-CONFIG_SCSI_MPT3SAS_MAX_SGE=128
-CONFIG_SCSI_MPT2SAS=m
-CONFIG_SCSI_SMARTPQI=m
-CONFIG_SCSI_UFSHCD=m
-CONFIG_SCSI_UFSHCD_PCI=m
-# CONFIG_SCSI_UFS_DWC_TC_PCI is not set
-CONFIG_SCSI_UFSHCD_PLATFORM=m
-CONFIG_SCSI_UFS_CDNS_PLATFORM=m
-# CONFIG_SCSI_UFS_DWC_TC_PLATFORM is not set
-CONFIG_SCSI_UFS_BSG=y
-CONFIG_SCSI_HPTIOP=m
-CONFIG_SCSI_BUSLOGIC=m
-CONFIG_SCSI_FLASHPOINT=y
-CONFIG_SCSI_MYRB=m
-CONFIG_SCSI_MYRS=m
-CONFIG_VMWARE_PVSCSI=m
-CONFIG_XEN_SCSI_FRONTEND=m
-CONFIG_HYPERV_STORAGE=m
-CONFIG_LIBFC=m
-CONFIG_LIBFCOE=m
-CONFIG_FCOE=m
-CONFIG_FCOE_FNIC=m
-CONFIG_SCSI_SNIC=m
-# CONFIG_SCSI_SNIC_DEBUG_FS is not set
-CONFIG_SCSI_DMX3191D=m
-CONFIG_SCSI_FDOMAIN=m
-CONFIG_SCSI_FDOMAIN_PCI=m
-CONFIG_SCSI_GDTH=m
-CONFIG_SCSI_ISCI=m
-CONFIG_SCSI_IPS=m
-CONFIG_SCSI_INITIO=m
-CONFIG_SCSI_INIA100=m
-CONFIG_SCSI_PPA=m
-CONFIG_SCSI_IMM=m
-# CONFIG_SCSI_IZIP_EPP16 is not set
-# CONFIG_SCSI_IZIP_SLOW_CTR is not set
-CONFIG_SCSI_STEX=m
-CONFIG_SCSI_SYM53C8XX_2=m
-CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1
-CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
-CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
-CONFIG_SCSI_SYM53C8XX_MMIO=y
-CONFIG_SCSI_IPR=m
-CONFIG_SCSI_IPR_TRACE=y
-CONFIG_SCSI_IPR_DUMP=y
-CONFIG_SCSI_QLOGIC_1280=m
-CONFIG_SCSI_QLA_FC=m
-CONFIG_TCM_QLA2XXX=m
-# CONFIG_TCM_QLA2XXX_DEBUG is not set
-CONFIG_SCSI_QLA_ISCSI=m
-CONFIG_QEDI=m
-CONFIG_QEDF=m
-CONFIG_SCSI_LPFC=m
-# CONFIG_SCSI_LPFC_DEBUG_FS is not set
-CONFIG_SCSI_DC395x=m
-CONFIG_SCSI_AM53C974=m
-CONFIG_SCSI_WD719X=m
-CONFIG_SCSI_DEBUG=m
-CONFIG_SCSI_PMCRAID=m
-CONFIG_SCSI_PM8001=m
-CONFIG_SCSI_BFA_FC=m
-CONFIG_SCSI_VIRTIO=m
-CONFIG_SCSI_CHELSIO_FCOE=m
-CONFIG_SCSI_LOWLEVEL_PCMCIA=y
-CONFIG_PCMCIA_AHA152X=m
-CONFIG_PCMCIA_FDOMAIN=m
-CONFIG_PCMCIA_QLOGIC=m
-CONFIG_PCMCIA_SYM53C500=m
-CONFIG_SCSI_DH=y
-CONFIG_SCSI_DH_RDAC=m
-CONFIG_SCSI_DH_HP_SW=m
-CONFIG_SCSI_DH_EMC=m
-CONFIG_SCSI_DH_ALUA=m
-# end of SCSI device support
-
-CONFIG_ATA=m
-CONFIG_ATA_VERBOSE_ERROR=y
-CONFIG_ATA_ACPI=y
-CONFIG_SATA_ZPODD=y
-CONFIG_SATA_PMP=y
-
-#
-# Controllers with non-SFF native interface
-#
-CONFIG_SATA_AHCI=m
-CONFIG_SATA_MOBILE_LPM_POLICY=3
-CONFIG_SATA_AHCI_PLATFORM=m
-CONFIG_AHCI_CEVA=m
-CONFIG_AHCI_QORIQ=m
-CONFIG_SATA_INIC162X=m
-CONFIG_SATA_ACARD_AHCI=m
-CONFIG_SATA_SIL24=m
-CONFIG_ATA_SFF=y
-
-#
-# SFF controllers with custom DMA interface
-#
-CONFIG_PDC_ADMA=m
-CONFIG_SATA_QSTOR=m
-CONFIG_SATA_SX4=m
-CONFIG_ATA_BMDMA=y
-
-#
-# SATA SFF controllers with BMDMA
-#
-CONFIG_ATA_PIIX=m
-CONFIG_SATA_DWC=m
-# CONFIG_SATA_DWC_OLD_DMA is not set
-# CONFIG_SATA_DWC_DEBUG is not set
-CONFIG_SATA_MV=m
-CONFIG_SATA_NV=m
-CONFIG_SATA_PROMISE=m
-CONFIG_SATA_SIL=m
-CONFIG_SATA_SIS=m
-CONFIG_SATA_SVW=m
-CONFIG_SATA_ULI=m
-CONFIG_SATA_VIA=m
-CONFIG_SATA_VITESSE=m
-
-#
-# PATA SFF controllers with BMDMA
-#
-CONFIG_PATA_ALI=m
-CONFIG_PATA_AMD=m
-CONFIG_PATA_ARTOP=m
-CONFIG_PATA_ATIIXP=m
-CONFIG_PATA_ATP867X=m
-CONFIG_PATA_CMD64X=m
-CONFIG_PATA_CYPRESS=m
-CONFIG_PATA_EFAR=m
-CONFIG_PATA_HPT366=m
-CONFIG_PATA_HPT37X=m
-CONFIG_PATA_HPT3X2N=m
-CONFIG_PATA_HPT3X3=m
-CONFIG_PATA_HPT3X3_DMA=y
-CONFIG_PATA_IT8213=m
-CONFIG_PATA_IT821X=m
-CONFIG_PATA_JMICRON=m
-CONFIG_PATA_MARVELL=m
-CONFIG_PATA_NETCELL=m
-CONFIG_PATA_NINJA32=m
-CONFIG_PATA_NS87415=m
-CONFIG_PATA_OLDPIIX=m
-CONFIG_PATA_OPTIDMA=m
-CONFIG_PATA_PDC2027X=m
-CONFIG_PATA_PDC_OLD=m
-CONFIG_PATA_RADISYS=m
-CONFIG_PATA_RDC=m
-CONFIG_PATA_SCH=m
-CONFIG_PATA_SERVERWORKS=m
-CONFIG_PATA_SIL680=m
-CONFIG_PATA_SIS=m
-CONFIG_PATA_TOSHIBA=m
-CONFIG_PATA_TRIFLEX=m
-CONFIG_PATA_VIA=m
-CONFIG_PATA_WINBOND=m
-
-#
-# PIO-only SFF controllers
-#
-CONFIG_PATA_CMD640_PCI=m
-CONFIG_PATA_MPIIX=m
-CONFIG_PATA_NS87410=m
-CONFIG_PATA_OPTI=m
-CONFIG_PATA_PCMCIA=m
-# CONFIG_PATA_PLATFORM is not set
-CONFIG_PATA_RZ1000=m
-
-#
-# Generic fallback / legacy drivers
-#
-CONFIG_PATA_ACPI=m
-CONFIG_ATA_GENERIC=m
-CONFIG_PATA_LEGACY=m
-CONFIG_MD=y
-CONFIG_BLK_DEV_MD=m
-CONFIG_MD_LINEAR=m
-CONFIG_MD_RAID0=m
-CONFIG_MD_RAID1=m
-CONFIG_MD_RAID10=m
-CONFIG_MD_RAID456=m
-CONFIG_MD_MULTIPATH=m
-CONFIG_MD_FAULTY=m
-CONFIG_MD_CLUSTER=m
-CONFIG_BCACHE=m
-# CONFIG_BCACHE_DEBUG is not set
-# CONFIG_BCACHE_CLOSURES_DEBUG is not set
-CONFIG_BLK_DEV_DM_BUILTIN=y
-CONFIG_BLK_DEV_DM=m
-# CONFIG_DM_DEBUG is not set
-CONFIG_DM_BUFIO=m
-# CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING is not set
-CONFIG_DM_BIO_PRISON=m
-CONFIG_DM_PERSISTENT_DATA=m
-CONFIG_DM_UNSTRIPED=m
-CONFIG_DM_CRYPT=m
-CONFIG_DM_SNAPSHOT=m
-CONFIG_DM_THIN_PROVISIONING=m
-CONFIG_DM_CACHE=m
-CONFIG_DM_CACHE_SMQ=m
-CONFIG_DM_WRITECACHE=m
-CONFIG_DM_ERA=m
-# CONFIG_DM_CLONE is not set
-CONFIG_DM_MIRROR=m
-CONFIG_DM_LOG_USERSPACE=m
-CONFIG_DM_RAID=m
-CONFIG_DM_ZERO=m
-CONFIG_DM_MULTIPATH=m
-CONFIG_DM_MULTIPATH_QL=m
-CONFIG_DM_MULTIPATH_ST=m
-CONFIG_DM_DELAY=m
-CONFIG_DM_DUST=m
-CONFIG_DM_UEVENT=y
-CONFIG_DM_FLAKEY=m
-CONFIG_DM_VERITY=m
-CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG=y
-CONFIG_DM_VERITY_FEC=y
-CONFIG_DM_SWITCH=m
-CONFIG_DM_LOG_WRITES=m
-CONFIG_DM_INTEGRITY=m
-CONFIG_DM_ZONED=m
-CONFIG_TARGET_CORE=m
-CONFIG_TCM_IBLOCK=m
-CONFIG_TCM_FILEIO=m
-CONFIG_TCM_PSCSI=m
-CONFIG_TCM_USER2=m
-CONFIG_LOOPBACK_TARGET=m
-CONFIG_TCM_FC=m
-CONFIG_ISCSI_TARGET=m
-CONFIG_ISCSI_TARGET_CXGB4=m
-CONFIG_SBP_TARGET=m
-CONFIG_FUSION=y
-CONFIG_FUSION_SPI=m
-CONFIG_FUSION_FC=m
-CONFIG_FUSION_SAS=m
-CONFIG_FUSION_MAX_SGE=128
-CONFIG_FUSION_CTL=m
-CONFIG_FUSION_LAN=m
-# CONFIG_FUSION_LOGGING is not set
-
-#
-# IEEE 1394 (FireWire) support
-#
-CONFIG_FIREWIRE=m
-CONFIG_FIREWIRE_OHCI=m
-CONFIG_FIREWIRE_SBP2=m
-CONFIG_FIREWIRE_NET=m
-CONFIG_FIREWIRE_NOSY=m
-# end of IEEE 1394 (FireWire) support
-
-CONFIG_MACINTOSH_DRIVERS=y
-CONFIG_MAC_EMUMOUSEBTN=m
-CONFIG_NETDEVICES=y
-CONFIG_MII=m
-CONFIG_NET_CORE=y
-CONFIG_BONDING=m
-CONFIG_DUMMY=m
-CONFIG_EQUALIZER=m
-CONFIG_NET_FC=y
-CONFIG_IFB=m
-CONFIG_NET_TEAM=m
-CONFIG_NET_TEAM_MODE_BROADCAST=m
-CONFIG_NET_TEAM_MODE_ROUNDROBIN=m
-CONFIG_NET_TEAM_MODE_RANDOM=m
-CONFIG_NET_TEAM_MODE_ACTIVEBACKUP=m
-CONFIG_NET_TEAM_MODE_LOADBALANCE=m
-CONFIG_MACVLAN=m
-CONFIG_MACVTAP=m
-CONFIG_IPVLAN_L3S=y
-CONFIG_IPVLAN=m
-CONFIG_IPVTAP=m
-CONFIG_VXLAN=m
-CONFIG_GENEVE=m
-CONFIG_GTP=m
-CONFIG_MACSEC=m
-CONFIG_NETCONSOLE=m
-CONFIG_NETCONSOLE_DYNAMIC=y
-CONFIG_NETPOLL=y
-CONFIG_NET_POLL_CONTROLLER=y
-CONFIG_NTB_NETDEV=m
-CONFIG_RIONET=m
-CONFIG_RIONET_TX_SIZE=128
-CONFIG_RIONET_RX_SIZE=128
-CONFIG_TUN=m
-CONFIG_TAP=m
-# CONFIG_TUN_VNET_CROSS_LE is not set
-CONFIG_VETH=m
-CONFIG_VIRTIO_NET=m
-CONFIG_NLMON=m
-CONFIG_NET_VRF=m
-CONFIG_VSOCKMON=m
-CONFIG_SUNGEM_PHY=m
-# CONFIG_ARCNET is not set
-CONFIG_ATM_DRIVERS=y
-# CONFIG_ATM_DUMMY is not set
-CONFIG_ATM_TCP=m
-CONFIG_ATM_LANAI=m
-CONFIG_ATM_ENI=m
-# CONFIG_ATM_ENI_DEBUG is not set
-# CONFIG_ATM_ENI_TUNE_BURST is not set
-CONFIG_ATM_FIRESTREAM=m
-CONFIG_ATM_ZATM=m
-# CONFIG_ATM_ZATM_DEBUG is not set
-CONFIG_ATM_NICSTAR=m
-# CONFIG_ATM_NICSTAR_USE_SUNI is not set
-# CONFIG_ATM_NICSTAR_USE_IDT77105 is not set
-CONFIG_ATM_IDT77252=m
-# CONFIG_ATM_IDT77252_DEBUG is not set
-# CONFIG_ATM_IDT77252_RCV_ALL is not set
-CONFIG_ATM_IDT77252_USE_SUNI=y
-CONFIG_ATM_AMBASSADOR=m
-# CONFIG_ATM_AMBASSADOR_DEBUG is not set
-CONFIG_ATM_HORIZON=m
-# CONFIG_ATM_HORIZON_DEBUG is not set
-CONFIG_ATM_IA=m
-# CONFIG_ATM_IA_DEBUG is not set
-CONFIG_ATM_FORE200E=m
-CONFIG_ATM_FORE200E_USE_TASKLET=y
-CONFIG_ATM_FORE200E_TX_RETRY=16
-CONFIG_ATM_FORE200E_DEBUG=0
-CONFIG_ATM_HE=m
-CONFIG_ATM_HE_USE_SUNI=y
-CONFIG_ATM_SOLOS=m
-
-#
-# CAIF transport drivers
-#
-CONFIG_CAIF_TTY=m
-CONFIG_CAIF_SPI_SLAVE=m
-CONFIG_CAIF_SPI_SYNC=y
-CONFIG_CAIF_HSI=m
-CONFIG_CAIF_VIRTIO=m
-
-#
-# Distributed Switch Architecture drivers
-#
-CONFIG_B53=m
-# CONFIG_B53_SPI_DRIVER is not set
-CONFIG_B53_MDIO_DRIVER=m
-CONFIG_B53_MMAP_DRIVER=m
-CONFIG_B53_SRAB_DRIVER=m
-CONFIG_B53_SERDES=m
-CONFIG_NET_DSA_BCM_SF2=m
-CONFIG_NET_DSA_LOOP=m
-CONFIG_NET_DSA_LANTIQ_GSWIP=m
-CONFIG_NET_DSA_MT7530=m
-CONFIG_NET_DSA_MV88E6060=m
-CONFIG_NET_DSA_MICROCHIP_KSZ_COMMON=m
-CONFIG_NET_DSA_MICROCHIP_KSZ9477=m
-# CONFIG_NET_DSA_MICROCHIP_KSZ9477_I2C is not set
-CONFIG_NET_DSA_MICROCHIP_KSZ9477_SPI=m
-# CONFIG_NET_DSA_MICROCHIP_KSZ8795 is not set
-CONFIG_NET_DSA_MV88E6XXX=m
-CONFIG_NET_DSA_MV88E6XXX_GLOBAL2=y
-CONFIG_NET_DSA_MV88E6XXX_PTP=y
-CONFIG_NET_DSA_SJA1105=m
-CONFIG_NET_DSA_SJA1105_PTP=y
-# CONFIG_NET_DSA_SJA1105_TAS is not set
-CONFIG_NET_DSA_QCA8K=m
-CONFIG_NET_DSA_REALTEK_SMI=m
-CONFIG_NET_DSA_SMSC_LAN9303=m
-CONFIG_NET_DSA_SMSC_LAN9303_I2C=m
-CONFIG_NET_DSA_SMSC_LAN9303_MDIO=m
-CONFIG_NET_DSA_VITESSE_VSC73XX=m
-CONFIG_NET_DSA_VITESSE_VSC73XX_SPI=m
-CONFIG_NET_DSA_VITESSE_VSC73XX_PLATFORM=m
-# end of Distributed Switch Architecture drivers
-
-CONFIG_ETHERNET=y
-CONFIG_MDIO=m
-CONFIG_NET_VENDOR_3COM=y
-CONFIG_PCMCIA_3C574=m
-CONFIG_PCMCIA_3C589=m
-CONFIG_VORTEX=m
-CONFIG_TYPHOON=m
-CONFIG_NET_VENDOR_ADAPTEC=y
-CONFIG_ADAPTEC_STARFIRE=m
-CONFIG_NET_VENDOR_AGERE=y
-CONFIG_ET131X=m
-CONFIG_NET_VENDOR_ALACRITECH=y
-CONFIG_SLICOSS=m
-CONFIG_NET_VENDOR_ALTEON=y
-CONFIG_ACENIC=m
-# CONFIG_ACENIC_OMIT_TIGON_I is not set
-CONFIG_ALTERA_TSE=m
-CONFIG_NET_VENDOR_AMAZON=y
-CONFIG_ENA_ETHERNET=m
-CONFIG_NET_VENDOR_AMD=y
-CONFIG_AMD8111_ETH=m
-CONFIG_PCNET32=m
-CONFIG_PCMCIA_NMCLAN=m
-CONFIG_AMD_XGBE=m
-CONFIG_AMD_XGBE_DCB=y
-CONFIG_AMD_XGBE_HAVE_ECC=y
-CONFIG_NET_VENDOR_AQUANTIA=y
-CONFIG_AQTION=m
-CONFIG_NET_VENDOR_ARC=y
-CONFIG_NET_VENDOR_ATHEROS=y
-CONFIG_ATL2=m
-CONFIG_ATL1=m
-CONFIG_ATL1E=m
-CONFIG_ATL1C=m
-CONFIG_ALX=m
-CONFIG_NET_VENDOR_AURORA=y
-CONFIG_AURORA_NB8800=m
-CONFIG_NET_VENDOR_BROADCOM=y
-CONFIG_B44=m
-CONFIG_B44_PCI_AUTOSELECT=y
-CONFIG_B44_PCICORE_AUTOSELECT=y
-CONFIG_B44_PCI=y
-CONFIG_BCMGENET=m
-CONFIG_BNX2=m
-CONFIG_CNIC=m
-CONFIG_TIGON3=m
-CONFIG_TIGON3_HWMON=y
-CONFIG_BNX2X=m
-CONFIG_BNX2X_SRIOV=y
-CONFIG_SYSTEMPORT=m
-CONFIG_BNXT=m
-CONFIG_BNXT_SRIOV=y
-CONFIG_BNXT_FLOWER_OFFLOAD=y
-CONFIG_BNXT_DCB=y
-CONFIG_BNXT_HWMON=y
-CONFIG_NET_VENDOR_BROCADE=y
-CONFIG_BNA=m
-CONFIG_NET_VENDOR_CADENCE=y
-CONFIG_MACB=m
-CONFIG_MACB_USE_HWSTAMP=y
-CONFIG_MACB_PCI=m
-CONFIG_NET_VENDOR_CAVIUM=y
-CONFIG_THUNDER_NIC_PF=m
-CONFIG_THUNDER_NIC_VF=m
-CONFIG_THUNDER_NIC_BGX=m
-CONFIG_THUNDER_NIC_RGX=m
-CONFIG_CAVIUM_PTP=m
-CONFIG_LIQUIDIO=m
-CONFIG_LIQUIDIO_VF=m
-CONFIG_NET_VENDOR_CHELSIO=y
-CONFIG_CHELSIO_T1=m
-CONFIG_CHELSIO_T1_1G=y
-CONFIG_CHELSIO_T3=m
-CONFIG_CHELSIO_T4=m
-CONFIG_CHELSIO_T4_DCB=y
-CONFIG_CHELSIO_T4_FCOE=y
-CONFIG_CHELSIO_T4VF=m
-CONFIG_CHELSIO_LIB=m
-CONFIG_NET_VENDOR_CISCO=y
-CONFIG_ENIC=m
-CONFIG_NET_VENDOR_CORTINA=y
-CONFIG_GEMINI_ETHERNET=m
-CONFIG_CX_ECAT=m
-CONFIG_DNET=m
-CONFIG_NET_VENDOR_DEC=y
-CONFIG_NET_TULIP=y
-CONFIG_DE2104X=m
-CONFIG_DE2104X_DSL=0
-CONFIG_TULIP=m
-CONFIG_TULIP_MWI=y
-CONFIG_TULIP_MMIO=y
-CONFIG_TULIP_NAPI=y
-CONFIG_TULIP_NAPI_HW_MITIGATION=y
-CONFIG_DE4X5=m
-CONFIG_WINBOND_840=m
-CONFIG_DM9102=m
-CONFIG_ULI526X=m
-CONFIG_PCMCIA_XIRCOM=m
-CONFIG_NET_VENDOR_DLINK=y
-CONFIG_DL2K=m
-CONFIG_SUNDANCE=m
-# CONFIG_SUNDANCE_MMIO is not set
-CONFIG_NET_VENDOR_EMULEX=y
-CONFIG_BE2NET=m
-CONFIG_BE2NET_HWMON=y
-CONFIG_BE2NET_BE2=y
-CONFIG_BE2NET_BE3=y
-CONFIG_BE2NET_LANCER=y
-CONFIG_BE2NET_SKYHAWK=y
-CONFIG_NET_VENDOR_EZCHIP=y
-CONFIG_EZCHIP_NPS_MANAGEMENT_ENET=m
-CONFIG_NET_VENDOR_FUJITSU=y
-CONFIG_PCMCIA_FMVJ18X=m
-CONFIG_NET_VENDOR_GOOGLE=y
-CONFIG_GVE=m
-CONFIG_NET_VENDOR_HP=y
-CONFIG_HP100=m
-CONFIG_NET_VENDOR_HUAWEI=y
-CONFIG_HINIC=m
-CONFIG_NET_VENDOR_I825XX=y
-CONFIG_NET_VENDOR_INTEL=y
-CONFIG_E100=m
-CONFIG_E1000=m
-CONFIG_E1000E=m
-CONFIG_E1000E_HWTS=y
-CONFIG_IGB=m
-CONFIG_IGB_HWMON=y
-CONFIG_IGB_DCA=y
-CONFIG_IGBVF=m
-CONFIG_IXGB=m
-CONFIG_IXGBE=m
-CONFIG_IXGBE_HWMON=y
-CONFIG_IXGBE_DCA=y
-CONFIG_IXGBE_DCB=y
-# CONFIG_IXGBE_IPSEC is not set
-CONFIG_IXGBEVF=m
-CONFIG_IXGBEVF_IPSEC=y
-CONFIG_I40E=m
-CONFIG_I40E_DCB=y
-CONFIG_IAVF=m
-CONFIG_I40EVF=m
-CONFIG_ICE=m
-CONFIG_FM10K=m
-CONFIG_IGC=m
-CONFIG_JME=m
-CONFIG_NET_VENDOR_MARVELL=y
-CONFIG_MVMDIO=m
-CONFIG_SKGE=m
-# CONFIG_SKGE_DEBUG is not set
-CONFIG_SKGE_GENESIS=y
-CONFIG_SKY2=m
-# CONFIG_SKY2_DEBUG is not set
-CONFIG_NET_VENDOR_MELLANOX=y
-CONFIG_MLX4_EN=m
-CONFIG_MLX4_EN_DCB=y
-CONFIG_MLX4_CORE=m
-CONFIG_MLX4_DEBUG=y
-CONFIG_MLX4_CORE_GEN2=y
-CONFIG_MLX5_CORE=m
-CONFIG_MLX5_ACCEL=y
-CONFIG_MLX5_FPGA=y
-CONFIG_MLX5_CORE_EN=y
-CONFIG_MLX5_EN_ARFS=y
-CONFIG_MLX5_EN_RXNFC=y
-CONFIG_MLX5_MPFS=y
-CONFIG_MLX5_ESWITCH=y
-CONFIG_MLX5_CORE_EN_DCB=y
-CONFIG_MLX5_CORE_IPOIB=y
-CONFIG_MLX5_FPGA_IPSEC=y
-CONFIG_MLX5_EN_IPSEC=y
-CONFIG_MLX5_FPGA_TLS=y
-CONFIG_MLX5_TLS=y
-CONFIG_MLX5_EN_TLS=y
-CONFIG_MLX5_SW_STEERING=y
-CONFIG_MLXSW_CORE=m
-CONFIG_MLXSW_CORE_HWMON=y
-CONFIG_MLXSW_CORE_THERMAL=y
-CONFIG_MLXSW_PCI=m
-CONFIG_MLXSW_I2C=m
-CONFIG_MLXSW_SWITCHIB=m
-CONFIG_MLXSW_SWITCHX2=m
-CONFIG_MLXSW_SPECTRUM=m
-CONFIG_MLXSW_SPECTRUM_DCB=y
-CONFIG_MLXSW_MINIMAL=m
-CONFIG_MLXFW=m
-CONFIG_NET_VENDOR_MICREL=y
-CONFIG_KS8842=m
-CONFIG_KS8851=m
-CONFIG_KS8851_MLL=m
-CONFIG_KSZ884X_PCI=m
-CONFIG_NET_VENDOR_MICROCHIP=y
-CONFIG_ENC28J60=m
-# CONFIG_ENC28J60_WRITEVERIFY is not set
-CONFIG_ENCX24J600=m
-CONFIG_LAN743X=m
-CONFIG_NET_VENDOR_MICROSEMI=y
-CONFIG_MSCC_OCELOT_SWITCH=m
-CONFIG_MSCC_OCELOT_SWITCH_OCELOT=m
-CONFIG_NET_VENDOR_MYRI=y
-CONFIG_MYRI10GE=m
-CONFIG_MYRI10GE_DCA=y
-CONFIG_FEALNX=m
-CONFIG_NET_VENDOR_NATSEMI=y
-CONFIG_NATSEMI=m
-CONFIG_NS83820=m
-CONFIG_NET_VENDOR_NETERION=y
-CONFIG_S2IO=m
-CONFIG_VXGE=m
-# CONFIG_VXGE_DEBUG_TRACE_ALL is not set
-CONFIG_NET_VENDOR_NETRONOME=y
-CONFIG_NFP=m
-CONFIG_NFP_APP_FLOWER=y
-CONFIG_NFP_APP_ABM_NIC=y
-# CONFIG_NFP_DEBUG is not set
-CONFIG_NET_VENDOR_NI=y
-CONFIG_NI_XGE_MANAGEMENT_ENET=m
-CONFIG_NET_VENDOR_8390=y
-CONFIG_PCMCIA_AXNET=m
-CONFIG_NE2K_PCI=m
-CONFIG_PCMCIA_PCNET=m
-CONFIG_NET_VENDOR_NVIDIA=y
-CONFIG_FORCEDETH=m
-CONFIG_NET_VENDOR_OKI=y
-CONFIG_ETHOC=m
-CONFIG_NET_VENDOR_PACKET_ENGINES=y
-CONFIG_HAMACHI=m
-CONFIG_YELLOWFIN=m
-CONFIG_NET_VENDOR_PENSANDO=y
-# CONFIG_IONIC is not set
-CONFIG_NET_VENDOR_QLOGIC=y
-CONFIG_QLA3XXX=m
-CONFIG_QLCNIC=m
-CONFIG_QLCNIC_SRIOV=y
-CONFIG_QLCNIC_DCB=y
-CONFIG_QLCNIC_HWMON=y
-CONFIG_NETXEN_NIC=m
-CONFIG_QED=m
-CONFIG_QED_LL2=y
-CONFIG_QED_SRIOV=y
-CONFIG_QEDE=m
-CONFIG_QED_RDMA=y
-CONFIG_QED_ISCSI=y
-CONFIG_QED_FCOE=y
-CONFIG_QED_OOO=y
-CONFIG_NET_VENDOR_QUALCOMM=y
-CONFIG_QCA7000=m
-CONFIG_QCA7000_SPI=m
-CONFIG_QCA7000_UART=m
-CONFIG_QCOM_EMAC=m
-CONFIG_RMNET=m
-CONFIG_NET_VENDOR_RDC=y
-CONFIG_R6040=m
-CONFIG_NET_VENDOR_REALTEK=y
-CONFIG_ATP=m
-CONFIG_8139CP=m
-CONFIG_8139TOO=m
-# CONFIG_8139TOO_PIO is not set
-CONFIG_8139TOO_TUNE_TWISTER=y
-CONFIG_8139TOO_8129=y
-# CONFIG_8139_OLD_RX_RESET is not set
-CONFIG_R8169=m
-CONFIG_NET_VENDOR_RENESAS=y
-CONFIG_NET_VENDOR_ROCKER=y
-CONFIG_ROCKER=m
-CONFIG_NET_VENDOR_SAMSUNG=y
-CONFIG_SXGBE_ETH=m
-CONFIG_NET_VENDOR_SEEQ=y
-CONFIG_NET_VENDOR_SOLARFLARE=y
-CONFIG_SFC=m
-CONFIG_SFC_MTD=y
-CONFIG_SFC_MCDI_MON=y
-CONFIG_SFC_SRIOV=y
-CONFIG_SFC_MCDI_LOGGING=y
-CONFIG_SFC_FALCON=m
-CONFIG_SFC_FALCON_MTD=y
-CONFIG_NET_VENDOR_SILAN=y
-CONFIG_SC92031=m
-CONFIG_NET_VENDOR_SIS=y
-CONFIG_SIS900=m
-CONFIG_SIS190=m
-CONFIG_NET_VENDOR_SMSC=y
-CONFIG_PCMCIA_SMC91C92=m
-CONFIG_EPIC100=m
-CONFIG_SMSC911X=m
-CONFIG_SMSC9420=m
-CONFIG_NET_VENDOR_SOCIONEXT=y
-CONFIG_NET_VENDOR_STMICRO=y
-CONFIG_STMMAC_ETH=m
-# CONFIG_STMMAC_SELFTESTS is not set
-CONFIG_STMMAC_PLATFORM=m
-CONFIG_DWMAC_DWC_QOS_ETH=m
-CONFIG_DWMAC_GENERIC=m
-CONFIG_STMMAC_PCI=m
-CONFIG_NET_VENDOR_SUN=y
-CONFIG_HAPPYMEAL=m
-CONFIG_SUNGEM=m
-CONFIG_CASSINI=m
-CONFIG_NIU=m
-CONFIG_NET_VENDOR_SYNOPSYS=y
-CONFIG_DWC_XLGMAC=m
-CONFIG_DWC_XLGMAC_PCI=m
-CONFIG_NET_VENDOR_TEHUTI=y
-CONFIG_TEHUTI=m
-CONFIG_NET_VENDOR_TI=y
-# CONFIG_TI_CPSW_PHY_SEL is not set
-CONFIG_TLAN=m
-CONFIG_NET_VENDOR_VIA=y
-CONFIG_VIA_RHINE=m
-CONFIG_VIA_RHINE_MMIO=y
-CONFIG_VIA_VELOCITY=m
-CONFIG_NET_VENDOR_WIZNET=y
-CONFIG_WIZNET_W5100=m
-CONFIG_WIZNET_W5300=m
-# CONFIG_WIZNET_BUS_DIRECT is not set
-# CONFIG_WIZNET_BUS_INDIRECT is not set
-CONFIG_WIZNET_BUS_ANY=y
-CONFIG_WIZNET_W5100_SPI=m
-CONFIG_NET_VENDOR_XILINX=y
-CONFIG_XILINX_AXI_EMAC=m
-CONFIG_XILINX_LL_TEMAC=m
-CONFIG_NET_VENDOR_XIRCOM=y
-CONFIG_PCMCIA_XIRC2PS=m
-CONFIG_FDDI=m
-CONFIG_DEFXX=m
-CONFIG_DEFXX_MMIO=y
-CONFIG_SKFP=m
-# CONFIG_HIPPI is not set
-CONFIG_NET_SB1000=m
-CONFIG_MDIO_DEVICE=m
-CONFIG_MDIO_BUS=m
-CONFIG_MDIO_BCM_UNIMAC=m
-CONFIG_MDIO_BITBANG=m
-CONFIG_MDIO_BUS_MUX=m
-CONFIG_MDIO_BUS_MUX_GPIO=m
-CONFIG_MDIO_BUS_MUX_MMIOREG=m
-CONFIG_MDIO_BUS_MUX_MULTIPLEXER=m
-CONFIG_MDIO_CAVIUM=m
-CONFIG_MDIO_GPIO=m
-CONFIG_MDIO_HISI_FEMAC=m
-CONFIG_MDIO_I2C=m
-CONFIG_MDIO_MSCC_MIIM=m
-CONFIG_MDIO_OCTEON=m
-CONFIG_MDIO_THUNDER=m
-CONFIG_PHYLINK=m
-CONFIG_PHYLIB=m
-CONFIG_SWPHY=y
-CONFIG_LED_TRIGGER_PHY=y
-
-#
-# MII PHY device drivers
-#
-CONFIG_SFP=m
-# CONFIG_ADIN_PHY is not set
-CONFIG_AMD_PHY=m
-CONFIG_AQUANTIA_PHY=m
-CONFIG_AX88796B_PHY=m
-CONFIG_AT803X_PHY=m
-CONFIG_BCM7XXX_PHY=m
-CONFIG_BCM87XX_PHY=m
-CONFIG_BCM_NET_PHYLIB=m
-CONFIG_BROADCOM_PHY=m
-CONFIG_CICADA_PHY=m
-CONFIG_CORTINA_PHY=m
-CONFIG_DAVICOM_PHY=m
-CONFIG_DP83822_PHY=m
-CONFIG_DP83TC811_PHY=m
-CONFIG_DP83848_PHY=m
-CONFIG_DP83867_PHY=m
-CONFIG_FIXED_PHY=m
-CONFIG_ICPLUS_PHY=m
-CONFIG_INTEL_XWAY_PHY=m
-CONFIG_LSI_ET1011C_PHY=m
-CONFIG_LXT_PHY=m
-CONFIG_MARVELL_PHY=m
-CONFIG_MARVELL_10G_PHY=m
-CONFIG_MICREL_PHY=m
-CONFIG_MICROCHIP_PHY=m
-CONFIG_MICROCHIP_T1_PHY=m
-CONFIG_MICROSEMI_PHY=m
-CONFIG_NATIONAL_PHY=m
-CONFIG_NXP_TJA11XX_PHY=m
-CONFIG_QSEMI_PHY=m
-CONFIG_REALTEK_PHY=m
-CONFIG_RENESAS_PHY=m
-CONFIG_ROCKCHIP_PHY=m
-CONFIG_SMSC_PHY=m
-CONFIG_STE10XP=m
-CONFIG_TERANETICS_PHY=m
-CONFIG_VITESSE_PHY=m
-CONFIG_XILINX_GMII2RGMII=m
-CONFIG_MICREL_KS8995MA=m
-CONFIG_PLIP=m
-CONFIG_PPP=m
-CONFIG_PPP_BSDCOMP=m
-CONFIG_PPP_DEFLATE=m
-CONFIG_PPP_FILTER=y
-CONFIG_PPP_MPPE=m
-CONFIG_PPP_MULTILINK=y
-CONFIG_PPPOATM=m
-CONFIG_PPPOE=m
-CONFIG_PPTP=m
-CONFIG_PPPOL2TP=m
-CONFIG_PPP_ASYNC=m
-CONFIG_PPP_SYNC_TTY=m
-CONFIG_SLIP=m
-CONFIG_SLHC=m
-CONFIG_SLIP_COMPRESSED=y
-CONFIG_SLIP_SMART=y
-CONFIG_SLIP_MODE_SLIP6=y
-CONFIG_USB_NET_DRIVERS=m
-CONFIG_USB_CATC=m
-CONFIG_USB_KAWETH=m
-CONFIG_USB_PEGASUS=m
-CONFIG_USB_RTL8150=m
-CONFIG_USB_RTL8152=m
-CONFIG_USB_LAN78XX=m
-CONFIG_USB_USBNET=m
-CONFIG_USB_NET_AX8817X=m
-CONFIG_USB_NET_AX88179_178A=m
-CONFIG_USB_NET_CDCETHER=m
-CONFIG_USB_NET_CDC_EEM=m
-CONFIG_USB_NET_CDC_NCM=m
-CONFIG_USB_NET_HUAWEI_CDC_NCM=m
-CONFIG_USB_NET_CDC_MBIM=m
-CONFIG_USB_NET_DM9601=m
-CONFIG_USB_NET_SR9700=m
-CONFIG_USB_NET_SR9800=m
-CONFIG_USB_NET_SMSC75XX=m
-CONFIG_USB_NET_SMSC95XX=m
-CONFIG_USB_NET_GL620A=m
-CONFIG_USB_NET_NET1080=m
-CONFIG_USB_NET_PLUSB=m
-CONFIG_USB_NET_MCS7830=m
-CONFIG_USB_NET_RNDIS_HOST=m
-CONFIG_USB_NET_CDC_SUBSET_ENABLE=m
-CONFIG_USB_NET_CDC_SUBSET=m
-CONFIG_USB_ALI_M5632=y
-CONFIG_USB_AN2720=y
-CONFIG_USB_BELKIN=y
-CONFIG_USB_ARMLINUX=y
-CONFIG_USB_EPSON2888=y
-CONFIG_USB_KC2190=y
-CONFIG_USB_NET_ZAURUS=m
-CONFIG_USB_NET_CX82310_ETH=m
-CONFIG_USB_NET_KALMIA=m
-CONFIG_USB_NET_QMI_WWAN=m
-CONFIG_USB_HSO=m
-CONFIG_USB_NET_INT51X1=m
-CONFIG_USB_CDC_PHONET=m
-CONFIG_USB_IPHETH=m
-CONFIG_USB_SIERRA_NET=m
-CONFIG_USB_VL600=m
-CONFIG_USB_NET_CH9200=m
-CONFIG_USB_NET_AQC111=m
-CONFIG_WLAN=y
-# CONFIG_WIRELESS_WDS is not set
-CONFIG_WLAN_VENDOR_ADMTEK=y
-CONFIG_ADM8211=m
-CONFIG_ATH_COMMON=m
-CONFIG_WLAN_VENDOR_ATH=y
-# CONFIG_ATH_DEBUG is not set
-CONFIG_ATH5K=m
-# CONFIG_ATH5K_DEBUG is not set
-# CONFIG_ATH5K_TRACER is not set
-CONFIG_ATH5K_PCI=y
-CONFIG_ATH9K_HW=m
-CONFIG_ATH9K_COMMON=m
-CONFIG_ATH9K_BTCOEX_SUPPORT=y
-CONFIG_ATH9K=m
-CONFIG_ATH9K_PCI=y
-CONFIG_ATH9K_AHB=y
-# CONFIG_ATH9K_DEBUGFS is not set
-CONFIG_ATH9K_DYNACK=y
-CONFIG_ATH9K_WOW=y
-CONFIG_ATH9K_RFKILL=y
-CONFIG_ATH9K_CHANNEL_CONTEXT=y
-CONFIG_ATH9K_PCOEM=y
-# CONFIG_ATH9K_PCI_NO_EEPROM is not set
-CONFIG_ATH9K_HTC=m
-# CONFIG_ATH9K_HTC_DEBUGFS is not set
-CONFIG_ATH9K_HWRNG=y
-CONFIG_CARL9170=m
-CONFIG_CARL9170_LEDS=y
-CONFIG_CARL9170_WPC=y
-# CONFIG_CARL9170_HWRNG is not set
-CONFIG_ATH6KL=m
-CONFIG_ATH6KL_SDIO=m
-CONFIG_ATH6KL_USB=m
-# CONFIG_ATH6KL_DEBUG is not set
-# CONFIG_ATH6KL_TRACING is not set
-CONFIG_AR5523=m
-CONFIG_WIL6210=m
-CONFIG_WIL6210_ISR_COR=y
-CONFIG_WIL6210_TRACING=y
-# CONFIG_WIL6210_DEBUGFS is not set
-CONFIG_ATH10K=m
-CONFIG_ATH10K_CE=y
-CONFIG_ATH10K_PCI=m
-CONFIG_ATH10K_AHB=y
-CONFIG_ATH10K_SDIO=m
-CONFIG_ATH10K_USB=m
-# CONFIG_ATH10K_DEBUG is not set
-# CONFIG_ATH10K_DEBUGFS is not set
-# CONFIG_ATH10K_TRACING is not set
-CONFIG_WCN36XX=m
-# CONFIG_WCN36XX_DEBUGFS is not set
-CONFIG_WLAN_VENDOR_ATMEL=y
-CONFIG_ATMEL=m
-CONFIG_PCI_ATMEL=m
-CONFIG_PCMCIA_ATMEL=m
-CONFIG_AT76C50X_USB=m
-CONFIG_WLAN_VENDOR_BROADCOM=y
-CONFIG_B43=m
-CONFIG_B43_BCMA=y
-CONFIG_B43_SSB=y
-CONFIG_B43_BUSES_BCMA_AND_SSB=y
-# CONFIG_B43_BUSES_BCMA is not set
-# CONFIG_B43_BUSES_SSB is not set
-CONFIG_B43_PCI_AUTOSELECT=y
-CONFIG_B43_PCICORE_AUTOSELECT=y
-CONFIG_B43_SDIO=y
-CONFIG_B43_BCMA_PIO=y
-CONFIG_B43_PIO=y
-CONFIG_B43_PHY_G=y
-CONFIG_B43_PHY_N=y
-CONFIG_B43_PHY_LP=y
-CONFIG_B43_PHY_HT=y
-CONFIG_B43_LEDS=y
-CONFIG_B43_HWRNG=y
-# CONFIG_B43_DEBUG is not set
-CONFIG_B43LEGACY=m
-CONFIG_B43LEGACY_PCI_AUTOSELECT=y
-CONFIG_B43LEGACY_PCICORE_AUTOSELECT=y
-CONFIG_B43LEGACY_LEDS=y
-CONFIG_B43LEGACY_HWRNG=y
-CONFIG_B43LEGACY_DEBUG=y
-CONFIG_B43LEGACY_DMA=y
-CONFIG_B43LEGACY_PIO=y
-CONFIG_B43LEGACY_DMA_AND_PIO_MODE=y
-# CONFIG_B43LEGACY_DMA_MODE is not set
-# CONFIG_B43LEGACY_PIO_MODE is not set
-CONFIG_BRCMUTIL=m
-CONFIG_BRCMSMAC=m
-CONFIG_BRCMFMAC=m
-CONFIG_BRCMFMAC_PROTO_BCDC=y
-CONFIG_BRCMFMAC_PROTO_MSGBUF=y
-CONFIG_BRCMFMAC_SDIO=y
-CONFIG_BRCMFMAC_USB=y
-CONFIG_BRCMFMAC_PCIE=y
-# CONFIG_BRCM_TRACING is not set
-CONFIG_BRCMDBG=y
-CONFIG_WLAN_VENDOR_CISCO=y
-CONFIG_AIRO=m
-CONFIG_AIRO_CS=m
-CONFIG_WLAN_VENDOR_INTEL=y
-CONFIG_IPW2100=m
-CONFIG_IPW2100_MONITOR=y
-# CONFIG_IPW2100_DEBUG is not set
-CONFIG_IPW2200=m
-CONFIG_IPW2200_MONITOR=y
-CONFIG_IPW2200_RADIOTAP=y
-CONFIG_IPW2200_PROMISCUOUS=y
-CONFIG_IPW2200_QOS=y
-# CONFIG_IPW2200_DEBUG is not set
-CONFIG_LIBIPW=m
-# CONFIG_LIBIPW_DEBUG is not set
-CONFIG_IWLEGACY=m
-CONFIG_IWL4965=m
-CONFIG_IWL3945=m
-
-#
-# iwl3945 / iwl4965 Debugging Options
-#
-# CONFIG_IWLEGACY_DEBUG is not set
-# end of iwl3945 / iwl4965 Debugging Options
-
-CONFIG_IWLWIFI=m
-CONFIG_IWLWIFI_LEDS=y
-CONFIG_IWLDVM=m
-CONFIG_IWLMVM=m
-CONFIG_IWLWIFI_OPMODE_MODULAR=y
-# CONFIG_IWLWIFI_BCAST_FILTERING is not set
-
-#
-# Debugging Options
-#
-# CONFIG_IWLWIFI_DEBUG is not set
-# CONFIG_IWLWIFI_DEVICE_TRACING is not set
-# end of Debugging Options
-
-CONFIG_WLAN_VENDOR_INTERSIL=y
-CONFIG_HOSTAP=m
-CONFIG_HOSTAP_FIRMWARE=y
-CONFIG_HOSTAP_FIRMWARE_NVRAM=y
-CONFIG_HOSTAP_PLX=m
-CONFIG_HOSTAP_PCI=m
-CONFIG_HOSTAP_CS=m
-CONFIG_HERMES=m
-CONFIG_HERMES_PRISM=y
-CONFIG_HERMES_CACHE_FW_ON_INIT=y
-CONFIG_PLX_HERMES=m
-CONFIG_TMD_HERMES=m
-CONFIG_NORTEL_HERMES=m
-CONFIG_PCI_HERMES=m
-CONFIG_PCMCIA_HERMES=m
-CONFIG_PCMCIA_SPECTRUM=m
-CONFIG_ORINOCO_USB=m
-CONFIG_P54_COMMON=m
-CONFIG_P54_USB=m
-CONFIG_P54_PCI=m
-CONFIG_P54_SPI=m
-# CONFIG_P54_SPI_DEFAULT_EEPROM is not set
-CONFIG_P54_LEDS=y
-CONFIG_PRISM54=m
-CONFIG_WLAN_VENDOR_MARVELL=y
-CONFIG_LIBERTAS=m
-CONFIG_LIBERTAS_USB=m
-CONFIG_LIBERTAS_CS=m
-CONFIG_LIBERTAS_SDIO=m
-CONFIG_LIBERTAS_SPI=m
-# CONFIG_LIBERTAS_DEBUG is not set
-CONFIG_LIBERTAS_MESH=y
-CONFIG_LIBERTAS_THINFIRM=m
-# CONFIG_LIBERTAS_THINFIRM_DEBUG is not set
-CONFIG_LIBERTAS_THINFIRM_USB=m
-CONFIG_MWIFIEX=m
-CONFIG_MWIFIEX_SDIO=m
-CONFIG_MWIFIEX_PCIE=m
-CONFIG_MWIFIEX_USB=m
-CONFIG_MWL8K=m
-CONFIG_WLAN_VENDOR_MEDIATEK=y
-CONFIG_MT7601U=m
-CONFIG_MT76_CORE=m
-CONFIG_MT76_LEDS=y
-CONFIG_MT76_USB=m
-CONFIG_MT76x02_LIB=m
-CONFIG_MT76x02_USB=m
-CONFIG_MT76x0_COMMON=m
-CONFIG_MT76x0U=m
-CONFIG_MT76x0E=m
-CONFIG_MT76x2_COMMON=m
-CONFIG_MT76x2E=m
-CONFIG_MT76x2U=m
-CONFIG_MT7603E=m
-CONFIG_MT7615E=m
-CONFIG_WLAN_VENDOR_RALINK=y
-CONFIG_RT2X00=m
-CONFIG_RT2400PCI=m
-CONFIG_RT2500PCI=m
-CONFIG_RT61PCI=m
-CONFIG_RT2800PCI=m
-CONFIG_RT2800PCI_RT33XX=y
-CONFIG_RT2800PCI_RT35XX=y
-CONFIG_RT2800PCI_RT53XX=y
-CONFIG_RT2800PCI_RT3290=y
-CONFIG_RT2500USB=m
-CONFIG_RT73USB=m
-CONFIG_RT2800USB=m
-CONFIG_RT2800USB_RT33XX=y
-CONFIG_RT2800USB_RT35XX=y
-CONFIG_RT2800USB_RT3573=y
-CONFIG_RT2800USB_RT53XX=y
-CONFIG_RT2800USB_RT55XX=y
-CONFIG_RT2800USB_UNKNOWN=y
-CONFIG_RT2800_LIB=m
-CONFIG_RT2800_LIB_MMIO=m
-CONFIG_RT2X00_LIB_MMIO=m
-CONFIG_RT2X00_LIB_PCI=m
-CONFIG_RT2X00_LIB_USB=m
-CONFIG_RT2X00_LIB=m
-CONFIG_RT2X00_LIB_FIRMWARE=y
-CONFIG_RT2X00_LIB_CRYPTO=y
-CONFIG_RT2X00_LIB_LEDS=y
-# CONFIG_RT2X00_DEBUG is not set
-CONFIG_WLAN_VENDOR_REALTEK=y
-CONFIG_RTL8180=m
-CONFIG_RTL8187=m
-CONFIG_RTL8187_LEDS=y
-CONFIG_RTL_CARDS=m
-CONFIG_RTL8192CE=m
-CONFIG_RTL8192SE=m
-CONFIG_RTL8192DE=m
-CONFIG_RTL8723AE=m
-CONFIG_RTL8723BE=m
-CONFIG_RTL8188EE=m
-CONFIG_RTL8192EE=m
-CONFIG_RTL8821AE=m
-CONFIG_RTL8192CU=m
-CONFIG_RTLWIFI=m
-CONFIG_RTLWIFI_PCI=m
-CONFIG_RTLWIFI_USB=m
-CONFIG_RTLWIFI_DEBUG=y
-CONFIG_RTL8192C_COMMON=m
-CONFIG_RTL8723_COMMON=m
-CONFIG_RTLBTCOEXIST=m
-CONFIG_RTL8XXXU=m
-CONFIG_RTL8XXXU_UNTESTED=y
-CONFIG_RTW88=m
-CONFIG_RTW88_CORE=m
-CONFIG_RTW88_PCI=m
-CONFIG_RTW88_8822BE=y
-CONFIG_RTW88_8822CE=y
-# CONFIG_RTW88_DEBUG is not set
-# CONFIG_RTW88_DEBUGFS is not set
-CONFIG_WLAN_VENDOR_RSI=y
-CONFIG_RSI_91X=m
-# CONFIG_RSI_DEBUGFS is not set
-CONFIG_RSI_SDIO=m
-CONFIG_RSI_USB=m
-CONFIG_RSI_COEX=y
-CONFIG_WLAN_VENDOR_ST=y
-CONFIG_CW1200=m
-CONFIG_CW1200_WLAN_SDIO=m
-CONFIG_CW1200_WLAN_SPI=m
-CONFIG_WLAN_VENDOR_TI=y
-CONFIG_WL1251=m
-CONFIG_WL1251_SPI=m
-CONFIG_WL1251_SDIO=m
-CONFIG_WL12XX=m
-CONFIG_WL18XX=m
-CONFIG_WLCORE=m
-CONFIG_WLCORE_SPI=m
-CONFIG_WLCORE_SDIO=m
-CONFIG_WILINK_PLATFORM_DATA=y
-CONFIG_WLAN_VENDOR_ZYDAS=y
-CONFIG_USB_ZD1201=m
-CONFIG_ZD1211RW=m
-# CONFIG_ZD1211RW_DEBUG is not set
-CONFIG_WLAN_VENDOR_QUANTENNA=y
-CONFIG_QTNFMAC=m
-CONFIG_QTNFMAC_PCIE=m
-CONFIG_PCMCIA_RAYCS=m
-CONFIG_PCMCIA_WL3501=m
-CONFIG_MAC80211_HWSIM=m
-CONFIG_USB_NET_RNDIS_WLAN=m
-CONFIG_VIRT_WIFI=m
-
-#
-# WiMAX Wireless Broadband devices
-#
-CONFIG_WIMAX_I2400M=m
-CONFIG_WIMAX_I2400M_USB=m
-CONFIG_WIMAX_I2400M_DEBUG_LEVEL=8
-# end of WiMAX Wireless Broadband devices
-
-# CONFIG_WAN is not set
-CONFIG_IEEE802154_DRIVERS=m
-CONFIG_IEEE802154_FAKELB=m
-CONFIG_IEEE802154_AT86RF230=m
-# CONFIG_IEEE802154_AT86RF230_DEBUGFS is not set
-CONFIG_IEEE802154_MRF24J40=m
-CONFIG_IEEE802154_CC2520=m
-CONFIG_IEEE802154_ATUSB=m
-CONFIG_IEEE802154_ADF7242=m
-CONFIG_IEEE802154_CA8210=m
-# CONFIG_IEEE802154_CA8210_DEBUGFS is not set
-CONFIG_IEEE802154_MCR20A=m
-CONFIG_IEEE802154_HWSIM=m
-CONFIG_XEN_NETDEV_FRONTEND=m
-CONFIG_XEN_NETDEV_BACKEND=m
-CONFIG_VMXNET3=m
-CONFIG_FUJITSU_ES=m
-CONFIG_THUNDERBOLT_NET=m
-CONFIG_HYPERV_NET=m
-CONFIG_NETDEVSIM=m
-CONFIG_NET_FAILOVER=m
-CONFIG_ISDN=y
-CONFIG_ISDN_CAPI=m
-CONFIG_CAPI_TRACE=y
-CONFIG_ISDN_CAPI_CAPI20=m
-CONFIG_ISDN_CAPI_MIDDLEWARE=y
-CONFIG_MISDN=m
-CONFIG_MISDN_DSP=m
-CONFIG_MISDN_L1OIP=m
-
-#
-# mISDN hardware drivers
-#
-CONFIG_MISDN_HFCPCI=m
-CONFIG_MISDN_HFCMULTI=m
-CONFIG_MISDN_HFCUSB=m
-CONFIG_MISDN_AVMFRITZ=m
-CONFIG_MISDN_SPEEDFAX=m
-CONFIG_MISDN_INFINEON=m
-CONFIG_MISDN_W6692=m
-CONFIG_MISDN_NETJET=m
-CONFIG_MISDN_HDLC=m
-CONFIG_MISDN_IPAC=m
-CONFIG_MISDN_ISAR=m
-CONFIG_NVM=y
-CONFIG_NVM_PBLK=m
-# CONFIG_NVM_PBLK_DEBUG is not set
-
-#
-# Input device support
-#
-CONFIG_INPUT=y
-CONFIG_INPUT_LEDS=m
-CONFIG_INPUT_FF_MEMLESS=m
-CONFIG_INPUT_POLLDEV=m
-CONFIG_INPUT_SPARSEKMAP=m
-CONFIG_INPUT_MATRIXKMAP=m
-
-#
-# Userland interfaces
-#
-CONFIG_INPUT_MOUSEDEV=m
-CONFIG_INPUT_MOUSEDEV_PSAUX=y
-CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
-CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
-CONFIG_INPUT_JOYDEV=m
-CONFIG_INPUT_EVDEV=m
-# CONFIG_INPUT_EVBUG is not set
-
-#
-# Input Device Drivers
-#
-CONFIG_INPUT_KEYBOARD=y
-CONFIG_KEYBOARD_ADC=m
-CONFIG_KEYBOARD_ADP5520=m
-CONFIG_KEYBOARD_ADP5588=m
-CONFIG_KEYBOARD_ADP5589=m
-CONFIG_KEYBOARD_APPLESPI=m
-CONFIG_KEYBOARD_ATKBD=m
-CONFIG_KEYBOARD_QT1050=m
-CONFIG_KEYBOARD_QT1070=m
-CONFIG_KEYBOARD_QT2160=m
-CONFIG_KEYBOARD_DLINK_DIR685=m
-CONFIG_KEYBOARD_LKKBD=m
-CONFIG_KEYBOARD_GPIO=m
-CONFIG_KEYBOARD_GPIO_POLLED=m
-CONFIG_KEYBOARD_TCA6416=m
-CONFIG_KEYBOARD_TCA8418=m
-CONFIG_KEYBOARD_MATRIX=m
-CONFIG_KEYBOARD_LM8323=m
-CONFIG_KEYBOARD_LM8333=m
-CONFIG_KEYBOARD_MAX7359=m
-CONFIG_KEYBOARD_MCS=m
-CONFIG_KEYBOARD_MPR121=m
-CONFIG_KEYBOARD_NEWTON=m
-CONFIG_KEYBOARD_OPENCORES=m
-CONFIG_KEYBOARD_SAMSUNG=m
-CONFIG_KEYBOARD_STOWAWAY=m
-CONFIG_KEYBOARD_SUNKBD=m
-CONFIG_KEYBOARD_STMPE=m
-CONFIG_KEYBOARD_OMAP4=m
-CONFIG_KEYBOARD_TC3589X=m
-CONFIG_KEYBOARD_TM2_TOUCHKEY=m
-CONFIG_KEYBOARD_TWL4030=m
-CONFIG_KEYBOARD_XTKBD=m
-CONFIG_KEYBOARD_CROS_EC=m
-CONFIG_KEYBOARD_CAP11XX=m
-CONFIG_KEYBOARD_BCM=m
-CONFIG_KEYBOARD_MTK_PMIC=m
-CONFIG_INPUT_MOUSE=y
-CONFIG_MOUSE_PS2=m
-CONFIG_MOUSE_PS2_ALPS=y
-CONFIG_MOUSE_PS2_BYD=y
-CONFIG_MOUSE_PS2_LOGIPS2PP=y
-CONFIG_MOUSE_PS2_SYNAPTICS=y
-CONFIG_MOUSE_PS2_SYNAPTICS_SMBUS=y
-CONFIG_MOUSE_PS2_CYPRESS=y
-CONFIG_MOUSE_PS2_LIFEBOOK=y
-CONFIG_MOUSE_PS2_TRACKPOINT=y
-CONFIG_MOUSE_PS2_ELANTECH=y
-CONFIG_MOUSE_PS2_ELANTECH_SMBUS=y
-CONFIG_MOUSE_PS2_SENTELIC=y
-CONFIG_MOUSE_PS2_TOUCHKIT=y
-CONFIG_MOUSE_PS2_FOCALTECH=y
-CONFIG_MOUSE_PS2_VMMOUSE=y
-CONFIG_MOUSE_PS2_SMBUS=y
-CONFIG_MOUSE_SERIAL=m
-CONFIG_MOUSE_APPLETOUCH=m
-CONFIG_MOUSE_BCM5974=m
-CONFIG_MOUSE_CYAPA=m
-CONFIG_MOUSE_ELAN_I2C=m
-CONFIG_MOUSE_ELAN_I2C_I2C=y
-CONFIG_MOUSE_ELAN_I2C_SMBUS=y
-CONFIG_MOUSE_VSXXXAA=m
-CONFIG_MOUSE_GPIO=m
-CONFIG_MOUSE_SYNAPTICS_I2C=m
-CONFIG_MOUSE_SYNAPTICS_USB=m
-CONFIG_INPUT_JOYSTICK=y
-CONFIG_JOYSTICK_ANALOG=m
-CONFIG_JOYSTICK_A3D=m
-CONFIG_JOYSTICK_ADI=m
-CONFIG_JOYSTICK_COBRA=m
-CONFIG_JOYSTICK_GF2K=m
-CONFIG_JOYSTICK_GRIP=m
-CONFIG_JOYSTICK_GRIP_MP=m
-CONFIG_JOYSTICK_GUILLEMOT=m
-CONFIG_JOYSTICK_INTERACT=m
-CONFIG_JOYSTICK_SIDEWINDER=m
-CONFIG_JOYSTICK_TMDC=m
-CONFIG_JOYSTICK_IFORCE=m
-CONFIG_JOYSTICK_IFORCE_USB=m
-CONFIG_JOYSTICK_IFORCE_232=m
-CONFIG_JOYSTICK_WARRIOR=m
-CONFIG_JOYSTICK_MAGELLAN=m
-CONFIG_JOYSTICK_SPACEORB=m
-CONFIG_JOYSTICK_SPACEBALL=m
-CONFIG_JOYSTICK_STINGER=m
-CONFIG_JOYSTICK_TWIDJOY=m
-CONFIG_JOYSTICK_ZHENHUA=m
-CONFIG_JOYSTICK_DB9=m
-CONFIG_JOYSTICK_GAMECON=m
-CONFIG_JOYSTICK_TURBOGRAFX=m
-CONFIG_JOYSTICK_AS5011=m
-CONFIG_JOYSTICK_JOYDUMP=m
-CONFIG_JOYSTICK_XPAD=m
-CONFIG_JOYSTICK_XPAD_FF=y
-CONFIG_JOYSTICK_XPAD_LEDS=y
-CONFIG_JOYSTICK_WALKERA0701=m
-CONFIG_JOYSTICK_PSXPAD_SPI=m
-CONFIG_JOYSTICK_PSXPAD_SPI_FF=y
-CONFIG_JOYSTICK_PXRC=m
-# CONFIG_JOYSTICK_FSIA6B is not set
-CONFIG_INPUT_TABLET=y
-CONFIG_TABLET_USB_ACECAD=m
-CONFIG_TABLET_USB_AIPTEK=m
-CONFIG_TABLET_USB_GTCO=m
-CONFIG_TABLET_USB_HANWANG=m
-CONFIG_TABLET_USB_KBTAB=m
-CONFIG_TABLET_USB_PEGASUS=m
-CONFIG_TABLET_SERIAL_WACOM4=m
-CONFIG_INPUT_TOUCHSCREEN=y
-CONFIG_TOUCHSCREEN_PROPERTIES=y
-CONFIG_TOUCHSCREEN_88PM860X=m
-CONFIG_TOUCHSCREEN_ADS7846=m
-CONFIG_TOUCHSCREEN_AD7877=m
-CONFIG_TOUCHSCREEN_AD7879=m
-CONFIG_TOUCHSCREEN_AD7879_I2C=m
-CONFIG_TOUCHSCREEN_AD7879_SPI=m
-CONFIG_TOUCHSCREEN_ADC=m
-CONFIG_TOUCHSCREEN_AR1021_I2C=m
-CONFIG_TOUCHSCREEN_ATMEL_MXT=m
-CONFIG_TOUCHSCREEN_ATMEL_MXT_T37=y
-CONFIG_TOUCHSCREEN_AUO_PIXCIR=m
-CONFIG_TOUCHSCREEN_BU21013=m
-CONFIG_TOUCHSCREEN_BU21029=m
-CONFIG_TOUCHSCREEN_CHIPONE_ICN8318=m
-CONFIG_TOUCHSCREEN_CHIPONE_ICN8505=m
-CONFIG_TOUCHSCREEN_CY8CTMG110=m
-CONFIG_TOUCHSCREEN_CYTTSP_CORE=m
-CONFIG_TOUCHSCREEN_CYTTSP_I2C=m
-CONFIG_TOUCHSCREEN_CYTTSP_SPI=m
-CONFIG_TOUCHSCREEN_CYTTSP4_CORE=m
-CONFIG_TOUCHSCREEN_CYTTSP4_I2C=m
-CONFIG_TOUCHSCREEN_CYTTSP4_SPI=m
-CONFIG_TOUCHSCREEN_DA9034=m
-CONFIG_TOUCHSCREEN_DA9052=m
-CONFIG_TOUCHSCREEN_DYNAPRO=m
-CONFIG_TOUCHSCREEN_HAMPSHIRE=m
-CONFIG_TOUCHSCREEN_EETI=m
-CONFIG_TOUCHSCREEN_EGALAX=m
-CONFIG_TOUCHSCREEN_EGALAX_SERIAL=m
-CONFIG_TOUCHSCREEN_EXC3000=m
-CONFIG_TOUCHSCREEN_FUJITSU=m
-CONFIG_TOUCHSCREEN_GOODIX=m
-CONFIG_TOUCHSCREEN_HIDEEP=m
-CONFIG_TOUCHSCREEN_ILI210X=m
-CONFIG_TOUCHSCREEN_S6SY761=m
-CONFIG_TOUCHSCREEN_GUNZE=m
-CONFIG_TOUCHSCREEN_EKTF2127=m
-CONFIG_TOUCHSCREEN_ELAN=m
-CONFIG_TOUCHSCREEN_ELO=m
-CONFIG_TOUCHSCREEN_WACOM_W8001=m
-CONFIG_TOUCHSCREEN_WACOM_I2C=m
-CONFIG_TOUCHSCREEN_MAX11801=m
-CONFIG_TOUCHSCREEN_MCS5000=m
-CONFIG_TOUCHSCREEN_MMS114=m
-CONFIG_TOUCHSCREEN_MELFAS_MIP4=m
-CONFIG_TOUCHSCREEN_MTOUCH=m
-CONFIG_TOUCHSCREEN_IMX6UL_TSC=m
-CONFIG_TOUCHSCREEN_INEXIO=m
-CONFIG_TOUCHSCREEN_MK712=m
-CONFIG_TOUCHSCREEN_PENMOUNT=m
-CONFIG_TOUCHSCREEN_EDT_FT5X06=m
-CONFIG_TOUCHSCREEN_TOUCHRIGHT=m
-CONFIG_TOUCHSCREEN_TOUCHWIN=m
-CONFIG_TOUCHSCREEN_TI_AM335X_TSC=m
-CONFIG_TOUCHSCREEN_UCB1400=m
-CONFIG_TOUCHSCREEN_PIXCIR=m
-CONFIG_TOUCHSCREEN_WDT87XX_I2C=m
-CONFIG_TOUCHSCREEN_WM831X=m
-CONFIG_TOUCHSCREEN_WM97XX=m
-CONFIG_TOUCHSCREEN_WM9705=y
-CONFIG_TOUCHSCREEN_WM9712=y
-CONFIG_TOUCHSCREEN_WM9713=y
-CONFIG_TOUCHSCREEN_USB_COMPOSITE=m
-CONFIG_TOUCHSCREEN_MC13783=m
-CONFIG_TOUCHSCREEN_USB_EGALAX=y
-CONFIG_TOUCHSCREEN_USB_PANJIT=y
-CONFIG_TOUCHSCREEN_USB_3M=y
-CONFIG_TOUCHSCREEN_USB_ITM=y
-CONFIG_TOUCHSCREEN_USB_ETURBO=y
-CONFIG_TOUCHSCREEN_USB_GUNZE=y
-CONFIG_TOUCHSCREEN_USB_DMC_TSC10=y
-CONFIG_TOUCHSCREEN_USB_IRTOUCH=y
-CONFIG_TOUCHSCREEN_USB_IDEALTEK=y
-CONFIG_TOUCHSCREEN_USB_GENERAL_TOUCH=y
-CONFIG_TOUCHSCREEN_USB_GOTOP=y
-CONFIG_TOUCHSCREEN_USB_JASTEC=y
-CONFIG_TOUCHSCREEN_USB_ELO=y
-CONFIG_TOUCHSCREEN_USB_E2I=y
-CONFIG_TOUCHSCREEN_USB_ZYTRONIC=y
-CONFIG_TOUCHSCREEN_USB_ETT_TC45USB=y
-CONFIG_TOUCHSCREEN_USB_NEXIO=y
-CONFIG_TOUCHSCREEN_USB_EASYTOUCH=y
-CONFIG_TOUCHSCREEN_TOUCHIT213=m
-CONFIG_TOUCHSCREEN_TSC_SERIO=m
-CONFIG_TOUCHSCREEN_TSC200X_CORE=m
-CONFIG_TOUCHSCREEN_TSC2004=m
-CONFIG_TOUCHSCREEN_TSC2005=m
-CONFIG_TOUCHSCREEN_TSC2007=m
-CONFIG_TOUCHSCREEN_TSC2007_IIO=y
-CONFIG_TOUCHSCREEN_PCAP=m
-CONFIG_TOUCHSCREEN_RM_TS=m
-CONFIG_TOUCHSCREEN_SILEAD=m
-CONFIG_TOUCHSCREEN_SIS_I2C=m
-CONFIG_TOUCHSCREEN_ST1232=m
-CONFIG_TOUCHSCREEN_STMFTS=m
-CONFIG_TOUCHSCREEN_STMPE=m
-CONFIG_TOUCHSCREEN_SUR40=m
-CONFIG_TOUCHSCREEN_SURFACE3_SPI=m
-CONFIG_TOUCHSCREEN_SX8654=m
-CONFIG_TOUCHSCREEN_TPS6507X=m
-CONFIG_TOUCHSCREEN_ZET6223=m
-CONFIG_TOUCHSCREEN_ZFORCE=m
-CONFIG_TOUCHSCREEN_COLIBRI_VF50=m
-CONFIG_TOUCHSCREEN_ROHM_BU21023=m
-CONFIG_TOUCHSCREEN_IQS5XX=m
-CONFIG_INPUT_MISC=y
-CONFIG_INPUT_88PM860X_ONKEY=m
-CONFIG_INPUT_88PM80X_ONKEY=m
-CONFIG_INPUT_AD714X=m
-CONFIG_INPUT_AD714X_I2C=m
-CONFIG_INPUT_AD714X_SPI=m
-CONFIG_INPUT_ARIZONA_HAPTICS=m
-CONFIG_INPUT_ATMEL_CAPTOUCH=m
-CONFIG_INPUT_BMA150=m
-CONFIG_INPUT_E3X0_BUTTON=m
-CONFIG_INPUT_MSM_VIBRATOR=m
-CONFIG_INPUT_PCSPKR=m
-CONFIG_INPUT_MAX77650_ONKEY=m
-CONFIG_INPUT_MAX77693_HAPTIC=m
-CONFIG_INPUT_MAX8925_ONKEY=m
-CONFIG_INPUT_MAX8997_HAPTIC=m
-CONFIG_INPUT_MC13783_PWRBUTTON=m
-CONFIG_INPUT_MMA8450=m
-CONFIG_INPUT_APANEL=m
-CONFIG_INPUT_GP2A=m
-CONFIG_INPUT_GPIO_BEEPER=m
-CONFIG_INPUT_GPIO_DECODER=m
-CONFIG_INPUT_GPIO_VIBRA=m
-CONFIG_INPUT_CPCAP_PWRBUTTON=m
-CONFIG_INPUT_ATLAS_BTNS=m
-CONFIG_INPUT_ATI_REMOTE2=m
-CONFIG_INPUT_KEYSPAN_REMOTE=m
-CONFIG_INPUT_KXTJ9=m
-# CONFIG_INPUT_KXTJ9_POLLED_MODE is not set
-CONFIG_INPUT_POWERMATE=m
-CONFIG_INPUT_YEALINK=m
-CONFIG_INPUT_CM109=m
-CONFIG_INPUT_REGULATOR_HAPTIC=m
-CONFIG_INPUT_RETU_PWRBUTTON=m
-CONFIG_INPUT_TPS65218_PWRBUTTON=m
-CONFIG_INPUT_AXP20X_PEK=m
-CONFIG_INPUT_TWL4030_PWRBUTTON=m
-CONFIG_INPUT_TWL4030_VIBRA=m
-CONFIG_INPUT_TWL6040_VIBRA=m
-CONFIG_INPUT_UINPUT=m
-CONFIG_INPUT_PALMAS_PWRBUTTON=m
-CONFIG_INPUT_PCF50633_PMU=m
-CONFIG_INPUT_PCF8574=m
-CONFIG_INPUT_PWM_BEEPER=m
-CONFIG_INPUT_PWM_VIBRA=m
-CONFIG_INPUT_RK805_PWRKEY=m
-CONFIG_INPUT_GPIO_ROTARY_ENCODER=m
-CONFIG_INPUT_DA9052_ONKEY=m
-CONFIG_INPUT_DA9055_ONKEY=m
-CONFIG_INPUT_DA9063_ONKEY=m
-CONFIG_INPUT_WM831X_ON=m
-CONFIG_INPUT_PCAP=m
-CONFIG_INPUT_ADXL34X=m
-CONFIG_INPUT_ADXL34X_I2C=m
-CONFIG_INPUT_ADXL34X_SPI=m
-CONFIG_INPUT_IMS_PCU=m
-CONFIG_INPUT_CMA3000=m
-CONFIG_INPUT_CMA3000_I2C=m
-CONFIG_INPUT_XEN_KBDDEV_FRONTEND=m
-CONFIG_INPUT_IDEAPAD_SLIDEBAR=m
-CONFIG_INPUT_SOC_BUTTON_ARRAY=m
-CONFIG_INPUT_DRV260X_HAPTICS=m
-CONFIG_INPUT_DRV2665_HAPTICS=m
-CONFIG_INPUT_DRV2667_HAPTICS=m
-CONFIG_INPUT_RAVE_SP_PWRBUTTON=m
-CONFIG_INPUT_STPMIC1_ONKEY=m
-CONFIG_RMI4_CORE=m
-CONFIG_RMI4_I2C=m
-CONFIG_RMI4_SPI=m
-CONFIG_RMI4_SMB=m
-CONFIG_RMI4_F03=y
-CONFIG_RMI4_F03_SERIO=m
-CONFIG_RMI4_2D_SENSOR=y
-CONFIG_RMI4_F11=y
-CONFIG_RMI4_F12=y
-CONFIG_RMI4_F30=y
-CONFIG_RMI4_F34=y
-# CONFIG_RMI4_F54 is not set
-CONFIG_RMI4_F55=y
-
-#
-# Hardware I/O ports
-#
-CONFIG_SERIO=m
-CONFIG_ARCH_MIGHT_HAVE_PC_SERIO=y
-CONFIG_SERIO_I8042=m
-CONFIG_SERIO_SERPORT=m
-CONFIG_SERIO_CT82C710=m
-CONFIG_SERIO_PARKBD=m
-CONFIG_SERIO_PCIPS2=m
-CONFIG_SERIO_LIBPS2=m
-CONFIG_SERIO_RAW=m
-CONFIG_SERIO_ALTERA_PS2=m
-CONFIG_SERIO_PS2MULT=m
-CONFIG_SERIO_ARC_PS2=m
-# CONFIG_SERIO_APBPS2 is not set
-CONFIG_HYPERV_KEYBOARD=m
-CONFIG_SERIO_GPIO_PS2=m
-CONFIG_USERIO=m
-CONFIG_GAMEPORT=m
-CONFIG_GAMEPORT_NS558=m
-CONFIG_GAMEPORT_L4=m
-CONFIG_GAMEPORT_EMU10K1=m
-CONFIG_GAMEPORT_FM801=m
-# end of Hardware I/O ports
-# end of Input device support
-
-#
-# Character devices
-#
-CONFIG_TTY=y
-CONFIG_VT=y
-CONFIG_CONSOLE_TRANSLATIONS=y
-CONFIG_VT_CONSOLE=y
-CONFIG_VT_CONSOLE_SLEEP=y
-CONFIG_HW_CONSOLE=y
-CONFIG_VT_HW_CONSOLE_BINDING=y
-CONFIG_UNIX98_PTYS=y
-# CONFIG_LEGACY_PTYS is not set
-CONFIG_SERIAL_NONSTANDARD=y
-CONFIG_ROCKETPORT=m
-CONFIG_CYCLADES=m
-CONFIG_CYZ_INTR=y
-CONFIG_MOXA_INTELLIO=m
-CONFIG_MOXA_SMARTIO=m
-CONFIG_SYNCLINK=m
-CONFIG_SYNCLINKMP=m
-CONFIG_SYNCLINK_GT=m
-CONFIG_NOZOMI=m
-CONFIG_ISI=m
-CONFIG_N_HDLC=m
-CONFIG_N_GSM=m
-CONFIG_TRACE_ROUTER=m
-CONFIG_TRACE_SINK=m
-CONFIG_NULL_TTY=m
-CONFIG_LDISC_AUTOLOAD=y
-# CONFIG_DEVMEM is not set
-# CONFIG_DEVKMEM is not set
-
-#
-# Serial drivers
-#
-CONFIG_SERIAL_EARLYCON=y
-CONFIG_SERIAL_8250=y
-# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
-CONFIG_SERIAL_8250_PNP=y
-CONFIG_SERIAL_8250_FINTEK=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_DMA=y
-CONFIG_SERIAL_8250_PCI=y
-CONFIG_SERIAL_8250_EXAR=m
-CONFIG_SERIAL_8250_CS=m
-CONFIG_SERIAL_8250_MEN_MCB=m
-CONFIG_SERIAL_8250_NR_UARTS=32
-CONFIG_SERIAL_8250_RUNTIME_UARTS=4
-CONFIG_SERIAL_8250_EXTENDED=y
-CONFIG_SERIAL_8250_MANY_PORTS=y
-CONFIG_SERIAL_8250_ASPEED_VUART=m
-CONFIG_SERIAL_8250_SHARE_IRQ=y
-# CONFIG_SERIAL_8250_DETECT_IRQ is not set
-CONFIG_SERIAL_8250_RSA=y
-CONFIG_SERIAL_8250_DWLIB=y
-CONFIG_SERIAL_8250_DW=m
-CONFIG_SERIAL_8250_RT288X=y
-CONFIG_SERIAL_8250_LPSS=y
-CONFIG_SERIAL_8250_MID=y
-CONFIG_SERIAL_OF_PLATFORM=m
-
-#
-# Non-8250 serial port support
-#
-CONFIG_SERIAL_MAX3100=m
-CONFIG_SERIAL_MAX310X=m
-CONFIG_SERIAL_UARTLITE=m
-CONFIG_SERIAL_UARTLITE_NR_UARTS=1
-CONFIG_SERIAL_CORE=y
-CONFIG_SERIAL_CORE_CONSOLE=y
-CONFIG_SERIAL_JSM=m
-CONFIG_SERIAL_SIFIVE=m
-CONFIG_SERIAL_SCCNXP=m
-CONFIG_SERIAL_SC16IS7XX_CORE=m
-CONFIG_SERIAL_SC16IS7XX=m
-CONFIG_SERIAL_SC16IS7XX_I2C=y
-CONFIG_SERIAL_SC16IS7XX_SPI=y
-CONFIG_SERIAL_ALTERA_JTAGUART=m
-CONFIG_SERIAL_ALTERA_UART=m
-CONFIG_SERIAL_ALTERA_UART_MAXPORTS=4
-CONFIG_SERIAL_ALTERA_UART_BAUDRATE=115200
-CONFIG_SERIAL_IFX6X60=m
-CONFIG_SERIAL_XILINX_PS_UART=m
-CONFIG_SERIAL_ARC=m
-CONFIG_SERIAL_ARC_NR_PORTS=1
-CONFIG_SERIAL_RP2=m
-CONFIG_SERIAL_RP2_NR_UARTS=32
-CONFIG_SERIAL_FSL_LPUART=m
-# CONFIG_SERIAL_FSL_LINFLEXUART is not set
-CONFIG_SERIAL_CONEXANT_DIGICOLOR=m
-CONFIG_SERIAL_MEN_Z135=m
-# end of Serial drivers
-
-CONFIG_SERIAL_MCTRL_GPIO=y
-CONFIG_SERIAL_DEV_BUS=y
-CONFIG_SERIAL_DEV_CTRL_TTYPORT=y
-# CONFIG_TTY_PRINTK is not set
-CONFIG_PRINTER=m
-# CONFIG_LP_CONSOLE is not set
-CONFIG_PPDEV=m
-CONFIG_HVC_DRIVER=y
-CONFIG_HVC_IRQ=y
-CONFIG_HVC_XEN=y
-CONFIG_HVC_XEN_FRONTEND=y
-CONFIG_VIRTIO_CONSOLE=m
-CONFIG_IPMI_HANDLER=m
-CONFIG_IPMI_DMI_DECODE=y
-CONFIG_IPMI_PLAT_DATA=y
-# CONFIG_IPMI_PANIC_EVENT is not set
-CONFIG_IPMI_DEVICE_INTERFACE=m
-CONFIG_IPMI_SI=m
-CONFIG_IPMI_SSIF=m
-CONFIG_IPMI_WATCHDOG=m
-CONFIG_IPMI_POWEROFF=m
-CONFIG_IPMB_DEVICE_INTERFACE=m
-CONFIG_HW_RANDOM=m
-CONFIG_HW_RANDOM_TIMERIOMEM=m
-CONFIG_HW_RANDOM_INTEL=m
-CONFIG_HW_RANDOM_AMD=m
-CONFIG_HW_RANDOM_VIA=m
-CONFIG_HW_RANDOM_VIRTIO=m
-CONFIG_NVRAM=m
-CONFIG_APPLICOM=m
-
-#
-# PCMCIA character devices
-#
-CONFIG_SYNCLINK_CS=m
-CONFIG_CARDMAN_4000=m
-CONFIG_CARDMAN_4040=m
-CONFIG_SCR24X=m
-CONFIG_IPWIRELESS=m
-# end of PCMCIA character devices
-
-CONFIG_MWAVE=m
-CONFIG_RAW_DRIVER=m
-CONFIG_MAX_RAW_DEVS=256
-CONFIG_HPET=y
-CONFIG_HPET_MMAP=y
-CONFIG_HPET_MMAP_DEFAULT=y
-CONFIG_HANGCHECK_TIMER=m
-CONFIG_TCG_TPM=m
-CONFIG_HW_RANDOM_TPM=y
-CONFIG_TCG_TIS_CORE=m
-CONFIG_TCG_TIS=m
-CONFIG_TCG_TIS_SPI=m
-CONFIG_TCG_TIS_I2C_ATMEL=m
-CONFIG_TCG_TIS_I2C_INFINEON=m
-CONFIG_TCG_TIS_I2C_NUVOTON=m
-CONFIG_TCG_NSC=m
-CONFIG_TCG_ATMEL=m
-CONFIG_TCG_INFINEON=m
-CONFIG_TCG_XEN=m
-CONFIG_TCG_CRB=m
-CONFIG_TCG_VTPM_PROXY=m
-CONFIG_TCG_TIS_ST33ZP24=m
-CONFIG_TCG_TIS_ST33ZP24_I2C=m
-CONFIG_TCG_TIS_ST33ZP24_SPI=m
-CONFIG_TELCLOCK=m
-# CONFIG_DEVPORT is not set
-CONFIG_XILLYBUS=m
-CONFIG_XILLYBUS_PCIE=m
-CONFIG_XILLYBUS_OF=m
-# end of Character devices
-
-# CONFIG_RANDOM_TRUST_CPU is not set
-# CONFIG_RANDOM_TRUST_BOOTLOADER is not set
-
-#
-# I2C support
-#
-CONFIG_I2C=y
-CONFIG_ACPI_I2C_OPREGION=y
-CONFIG_I2C_BOARDINFO=y
-CONFIG_I2C_COMPAT=y
-CONFIG_I2C_CHARDEV=m
-CONFIG_I2C_MUX=m
-
-#
-# Multiplexer I2C Chip support
-#
-CONFIG_I2C_ARB_GPIO_CHALLENGE=m
-CONFIG_I2C_MUX_GPIO=m
-CONFIG_I2C_MUX_GPMUX=m
-CONFIG_I2C_MUX_LTC4306=m
-CONFIG_I2C_MUX_PCA9541=m
-CONFIG_I2C_MUX_PCA954x=m
-CONFIG_I2C_MUX_PINCTRL=m
-CONFIG_I2C_MUX_REG=m
-CONFIG_I2C_DEMUX_PINCTRL=m
-CONFIG_I2C_MUX_MLXCPLD=m
-# end of Multiplexer I2C Chip support
-
-CONFIG_I2C_HELPER_AUTO=y
-CONFIG_I2C_SMBUS=m
-CONFIG_I2C_ALGOBIT=m
-CONFIG_I2C_ALGOPCA=m
-
-#
-# I2C Hardware Bus support
-#
-
-#
-# PC SMBus host controller drivers
-#
-CONFIG_I2C_ALI1535=m
-CONFIG_I2C_ALI1563=m
-CONFIG_I2C_ALI15X3=m
-CONFIG_I2C_AMD756=m
-CONFIG_I2C_AMD756_S4882=m
-CONFIG_I2C_AMD8111=m
-CONFIG_I2C_AMD_MP2=m
-CONFIG_I2C_I801=m
-CONFIG_I2C_ISCH=m
-CONFIG_I2C_ISMT=m
-CONFIG_I2C_PIIX4=m
-CONFIG_I2C_CHT_WC=m
-CONFIG_I2C_NFORCE2=m
-CONFIG_I2C_NFORCE2_S4985=m
-CONFIG_I2C_NVIDIA_GPU=m
-CONFIG_I2C_SIS5595=m
-CONFIG_I2C_SIS630=m
-CONFIG_I2C_SIS96X=m
-CONFIG_I2C_VIA=m
-CONFIG_I2C_VIAPRO=m
-
-#
-# ACPI drivers
-#
-CONFIG_I2C_SCMI=m
-
-#
-# I2C system bus drivers (mostly embedded / system-on-chip)
-#
-CONFIG_I2C_CBUS_GPIO=m
-CONFIG_I2C_DESIGNWARE_CORE=y
-CONFIG_I2C_DESIGNWARE_PLATFORM=y
-CONFIG_I2C_DESIGNWARE_SLAVE=y
-CONFIG_I2C_DESIGNWARE_PCI=m
-CONFIG_I2C_DESIGNWARE_BAYTRAIL=y
-CONFIG_I2C_EMEV2=m
-CONFIG_I2C_GPIO=m
-# CONFIG_I2C_GPIO_FAULT_INJECTOR is not set
-CONFIG_I2C_KEMPLD=m
-CONFIG_I2C_OCORES=m
-CONFIG_I2C_PCA_PLATFORM=m
-CONFIG_I2C_RK3X=m
-CONFIG_I2C_SIMTEC=m
-CONFIG_I2C_XILINX=m
-
-#
-# External I2C/SMBus adapter drivers
-#
-CONFIG_I2C_DIOLAN_U2C=m
-CONFIG_I2C_DLN2=m
-CONFIG_I2C_PARPORT=m
-CONFIG_I2C_PARPORT_LIGHT=m
-CONFIG_I2C_ROBOTFUZZ_OSIF=m
-CONFIG_I2C_TAOS_EVM=m
-CONFIG_I2C_TINY_USB=m
-CONFIG_I2C_VIPERBOARD=m
-
-#
-# Other I2C/SMBus bus drivers
-#
-CONFIG_I2C_MLXCPLD=m
-CONFIG_I2C_CROS_EC_TUNNEL=m
-CONFIG_I2C_FSI=m
-# end of I2C Hardware Bus support
-
-# CONFIG_I2C_STUB is not set
-CONFIG_I2C_SLAVE=y
-CONFIG_I2C_SLAVE_EEPROM=m
-# CONFIG_I2C_DEBUG_CORE is not set
-# CONFIG_I2C_DEBUG_ALGO is not set
-# CONFIG_I2C_DEBUG_BUS is not set
-# end of I2C support
-
-CONFIG_I3C=m
-CONFIG_CDNS_I3C_MASTER=m
-CONFIG_DW_I3C_MASTER=m
-CONFIG_SPI=y
-# CONFIG_SPI_DEBUG is not set
-CONFIG_SPI_MASTER=y
-CONFIG_SPI_MEM=y
-
-#
-# SPI Master Controller Drivers
-#
-CONFIG_SPI_ALTERA=m
-CONFIG_SPI_AXI_SPI_ENGINE=m
-CONFIG_SPI_BITBANG=m
-CONFIG_SPI_BUTTERFLY=m
-CONFIG_SPI_CADENCE=m
-CONFIG_SPI_DESIGNWARE=m
-CONFIG_SPI_DW_PCI=m
-CONFIG_SPI_DW_MID_DMA=y
-CONFIG_SPI_DW_MMIO=m
-CONFIG_SPI_DLN2=m
-CONFIG_SPI_NXP_FLEXSPI=m
-CONFIG_SPI_GPIO=m
-CONFIG_SPI_LM70_LLP=m
-CONFIG_SPI_FSL_LIB=m
-CONFIG_SPI_FSL_SPI=m
-CONFIG_SPI_OC_TINY=m
-CONFIG_SPI_PXA2XX=m
-CONFIG_SPI_PXA2XX_PCI=m
-CONFIG_SPI_ROCKCHIP=m
-CONFIG_SPI_SC18IS602=m
-CONFIG_SPI_SIFIVE=m
-CONFIG_SPI_MXIC=m
-CONFIG_SPI_XCOMM=m
-CONFIG_SPI_XILINX=m
-CONFIG_SPI_ZYNQMP_GQSPI=m
-
-#
-# SPI Protocol Masters
-#
-CONFIG_SPI_SPIDEV=m
-CONFIG_SPI_LOOPBACK_TEST=m
-CONFIG_SPI_TLE62X0=m
-CONFIG_SPI_SLAVE=y
-CONFIG_SPI_SLAVE_TIME=m
-CONFIG_SPI_SLAVE_SYSTEM_CONTROL=m
-CONFIG_SPMI=m
-CONFIG_HSI=m
-CONFIG_HSI_BOARDINFO=y
-
-#
-# HSI controllers
-#
-
-#
-# HSI clients
-#
-CONFIG_HSI_CHAR=m
-CONFIG_PPS=y
-# CONFIG_PPS_DEBUG is not set
-
-#
-# PPS clients support
-#
-CONFIG_PPS_CLIENT_KTIMER=m
-CONFIG_PPS_CLIENT_LDISC=m
-CONFIG_PPS_CLIENT_PARPORT=m
-CONFIG_PPS_CLIENT_GPIO=m
-
-#
-# PPS generators support
-#
-
-#
-# PTP clock support
-#
-CONFIG_PTP_1588_CLOCK=y
-CONFIG_DP83640_PHY=m
-CONFIG_PTP_1588_CLOCK_KVM=m
-# end of PTP clock support
-
-CONFIG_PINCTRL=y
-CONFIG_GENERIC_PINCTRL_GROUPS=y
-CONFIG_PINMUX=y
-CONFIG_GENERIC_PINMUX_FUNCTIONS=y
-CONFIG_PINCONF=y
-CONFIG_GENERIC_PINCONF=y
-# CONFIG_DEBUG_PINCTRL is not set
-CONFIG_PINCTRL_AS3722=m
-CONFIG_PINCTRL_AXP209=m
-CONFIG_PINCTRL_AMD=m
-CONFIG_PINCTRL_MCP23S08=m
-CONFIG_PINCTRL_SINGLE=m
-CONFIG_PINCTRL_SX150X=y
-CONFIG_PINCTRL_STMFX=m
-CONFIG_PINCTRL_MAX77620=m
-CONFIG_PINCTRL_PALMAS=m
-CONFIG_PINCTRL_RK805=m
-CONFIG_PINCTRL_OCELOT=y
-CONFIG_PINCTRL_BAYTRAIL=y
-CONFIG_PINCTRL_CHERRYVIEW=y
-CONFIG_PINCTRL_INTEL=y
-CONFIG_PINCTRL_BROXTON=y
-CONFIG_PINCTRL_CANNONLAKE=y
-CONFIG_PINCTRL_CEDARFORK=y
-CONFIG_PINCTRL_DENVERTON=y
-CONFIG_PINCTRL_GEMINILAKE=y
-CONFIG_PINCTRL_ICELAKE=y
-CONFIG_PINCTRL_LEWISBURG=y
-CONFIG_PINCTRL_SUNRISEPOINT=y
-CONFIG_PINCTRL_LOCHNAGAR=m
-CONFIG_PINCTRL_MADERA=m
-CONFIG_PINCTRL_CS47L15=y
-CONFIG_PINCTRL_CS47L35=y
-CONFIG_PINCTRL_CS47L85=y
-CONFIG_PINCTRL_CS47L90=y
-CONFIG_PINCTRL_CS47L92=y
-CONFIG_GPIOLIB=y
-CONFIG_GPIOLIB_FASTPATH_LIMIT=512
-CONFIG_OF_GPIO=y
-CONFIG_GPIO_ACPI=y
-CONFIG_GPIOLIB_IRQCHIP=y
-# CONFIG_DEBUG_GPIO is not set
-CONFIG_GPIO_SYSFS=y
-CONFIG_GPIO_GENERIC=y
-CONFIG_GPIO_MAX730X=m
-
-#
-# Memory mapped GPIO drivers
-#
-CONFIG_GPIO_74XX_MMIO=m
-CONFIG_GPIO_ALTERA=m
-CONFIG_GPIO_AMDPT=m
-CONFIG_GPIO_CADENCE=m
-CONFIG_GPIO_DWAPB=m
-CONFIG_GPIO_EXAR=m
-CONFIG_GPIO_FTGPIO010=y
-CONFIG_GPIO_GENERIC_PLATFORM=m
-CONFIG_GPIO_GRGPIO=m
-CONFIG_GPIO_HLWD=m
-CONFIG_GPIO_ICH=m
-CONFIG_GPIO_LYNXPOINT=m
-CONFIG_GPIO_MB86S7X=m
-CONFIG_GPIO_MENZ127=m
-CONFIG_GPIO_SAMA5D2_PIOBU=m
-CONFIG_GPIO_SIOX=m
-CONFIG_GPIO_SYSCON=m
-CONFIG_GPIO_VX855=m
-CONFIG_GPIO_XILINX=m
-CONFIG_GPIO_AMD_FCH=m
-# end of Memory mapped GPIO drivers
-
-#
-# Port-mapped I/O GPIO drivers
-#
-CONFIG_GPIO_F7188X=m
-CONFIG_GPIO_IT87=m
-CONFIG_GPIO_SCH=m
-CONFIG_GPIO_SCH311X=m
-CONFIG_GPIO_WINBOND=m
-CONFIG_GPIO_WS16C48=m
-# end of Port-mapped I/O GPIO drivers
-
-#
-# I2C GPIO expanders
-#
-CONFIG_GPIO_ADP5588=m
-CONFIG_GPIO_ADNP=m
-CONFIG_GPIO_GW_PLD=m
-CONFIG_GPIO_MAX7300=m
-CONFIG_GPIO_MAX732X=m
-CONFIG_GPIO_PCA953X=m
-CONFIG_GPIO_PCF857X=m
-CONFIG_GPIO_TPIC2810=m
-# end of I2C GPIO expanders
-
-#
-# MFD GPIO expanders
-#
-CONFIG_GPIO_ADP5520=m
-CONFIG_GPIO_ARIZONA=m
-CONFIG_GPIO_BD70528=m
-CONFIG_GPIO_BD9571MWV=m
-CONFIG_GPIO_CRYSTAL_COVE=m
-CONFIG_GPIO_DA9052=m
-CONFIG_GPIO_DA9055=m
-CONFIG_GPIO_DLN2=m
-CONFIG_GPIO_JANZ_TTL=m
-CONFIG_GPIO_KEMPLD=m
-CONFIG_GPIO_LP3943=m
-CONFIG_GPIO_LP873X=m
-CONFIG_GPIO_LP87565=m
-CONFIG_GPIO_MADERA=m
-CONFIG_GPIO_MAX77620=m
-CONFIG_GPIO_MAX77650=m
-CONFIG_GPIO_PALMAS=y
-CONFIG_GPIO_RC5T583=y
-CONFIG_GPIO_STMPE=y
-CONFIG_GPIO_TC3589X=y
-CONFIG_GPIO_TPS65086=m
-CONFIG_GPIO_TPS65218=m
-CONFIG_GPIO_TPS6586X=y
-CONFIG_GPIO_TPS65910=y
-CONFIG_GPIO_TPS65912=m
-CONFIG_GPIO_TPS68470=y
-CONFIG_GPIO_TQMX86=m
-CONFIG_GPIO_TWL4030=m
-CONFIG_GPIO_TWL6040=m
-CONFIG_GPIO_UCB1400=m
-CONFIG_GPIO_WHISKEY_COVE=m
-CONFIG_GPIO_WM831X=m
-CONFIG_GPIO_WM8350=m
-CONFIG_GPIO_WM8994=m
-# end of MFD GPIO expanders
-
-#
-# PCI GPIO expanders
-#
-CONFIG_GPIO_AMD8111=m
-CONFIG_GPIO_ML_IOH=m
-CONFIG_GPIO_PCI_IDIO_16=m
-CONFIG_GPIO_PCIE_IDIO_24=m
-CONFIG_GPIO_RDC321X=m
-CONFIG_GPIO_SODAVILLE=y
-# end of PCI GPIO expanders
-
-#
-# SPI GPIO expanders
-#
-CONFIG_GPIO_74X164=m
-CONFIG_GPIO_MAX3191X=m
-CONFIG_GPIO_MAX7301=m
-CONFIG_GPIO_MC33880=m
-CONFIG_GPIO_PISOSR=m
-CONFIG_GPIO_XRA1403=m
-# end of SPI GPIO expanders
-
-#
-# USB GPIO expanders
-#
-CONFIG_GPIO_VIPERBOARD=m
-# end of USB GPIO expanders
-
-CONFIG_GPIO_MOCKUP=m
-CONFIG_W1=m
-CONFIG_W1_CON=y
-
-#
-# 1-wire Bus Masters
-#
-CONFIG_W1_MASTER_MATROX=m
-CONFIG_W1_MASTER_DS2490=m
-CONFIG_W1_MASTER_DS2482=m
-CONFIG_W1_MASTER_DS1WM=m
-CONFIG_W1_MASTER_GPIO=m
-# CONFIG_W1_MASTER_SGI is not set
-# end of 1-wire Bus Masters
-
-#
-# 1-wire Slaves
-#
-CONFIG_W1_SLAVE_THERM=m
-CONFIG_W1_SLAVE_SMEM=m
-CONFIG_W1_SLAVE_DS2405=m
-CONFIG_W1_SLAVE_DS2408=m
-# CONFIG_W1_SLAVE_DS2408_READBACK is not set
-CONFIG_W1_SLAVE_DS2413=m
-CONFIG_W1_SLAVE_DS2406=m
-CONFIG_W1_SLAVE_DS2423=m
-CONFIG_W1_SLAVE_DS2805=m
-CONFIG_W1_SLAVE_DS2431=m
-CONFIG_W1_SLAVE_DS2433=m
-# CONFIG_W1_SLAVE_DS2433_CRC is not set
-CONFIG_W1_SLAVE_DS2438=m
-# CONFIG_W1_SLAVE_DS250X is not set
-CONFIG_W1_SLAVE_DS2780=m
-CONFIG_W1_SLAVE_DS2781=m
-CONFIG_W1_SLAVE_DS28E04=m
-CONFIG_W1_SLAVE_DS28E17=m
-# end of 1-wire Slaves
-
-CONFIG_POWER_AVS=y
-CONFIG_POWER_RESET=y
-CONFIG_POWER_RESET_AS3722=y
-CONFIG_POWER_RESET_GPIO=y
-CONFIG_POWER_RESET_GPIO_RESTART=y
-CONFIG_POWER_RESET_LTC2952=y
-CONFIG_POWER_RESET_RESTART=y
-CONFIG_POWER_RESET_SYSCON=y
-CONFIG_POWER_RESET_SYSCON_POWEROFF=y
-CONFIG_REBOOT_MODE=m
-CONFIG_SYSCON_REBOOT_MODE=m
-CONFIG_NVMEM_REBOOT_MODE=m
-CONFIG_POWER_SUPPLY=y
-# CONFIG_POWER_SUPPLY_DEBUG is not set
-CONFIG_POWER_SUPPLY_HWMON=y
-CONFIG_PDA_POWER=m
-CONFIG_GENERIC_ADC_BATTERY=m
-CONFIG_MAX8925_POWER=m
-CONFIG_WM831X_BACKUP=m
-CONFIG_WM831X_POWER=m
-CONFIG_WM8350_POWER=m
-CONFIG_TEST_POWER=m
-CONFIG_BATTERY_88PM860X=m
-CONFIG_CHARGER_ADP5061=m
-CONFIG_BATTERY_ACT8945A=m
-CONFIG_BATTERY_CPCAP=m
-CONFIG_BATTERY_DS2760=m
-CONFIG_BATTERY_DS2780=m
-CONFIG_BATTERY_DS2781=m
-CONFIG_BATTERY_DS2782=m
-CONFIG_BATTERY_LEGO_EV3=m
-CONFIG_BATTERY_SBS=m
-CONFIG_CHARGER_SBS=m
-CONFIG_MANAGER_SBS=m
-CONFIG_BATTERY_BQ27XXX=m
-CONFIG_BATTERY_BQ27XXX_I2C=m
-CONFIG_BATTERY_BQ27XXX_HDQ=m
-# CONFIG_BATTERY_BQ27XXX_DT_UPDATES_NVM is not set
-CONFIG_BATTERY_DA9030=m
-CONFIG_BATTERY_DA9052=m
-CONFIG_CHARGER_DA9150=m
-CONFIG_BATTERY_DA9150=m
-CONFIG_CHARGER_AXP20X=m
-CONFIG_BATTERY_AXP20X=m
-CONFIG_AXP20X_POWER=m
-CONFIG_AXP288_CHARGER=m
-CONFIG_AXP288_FUEL_GAUGE=m
-CONFIG_BATTERY_MAX17040=m
-CONFIG_BATTERY_MAX17042=m
-CONFIG_BATTERY_MAX1721X=m
-CONFIG_BATTERY_TWL4030_MADC=m
-CONFIG_CHARGER_88PM860X=m
-CONFIG_CHARGER_PCF50633=m
-CONFIG_BATTERY_RX51=m
-CONFIG_CHARGER_ISP1704=m
-CONFIG_CHARGER_MAX8903=m
-CONFIG_CHARGER_TWL4030=m
-CONFIG_CHARGER_LP8727=m
-CONFIG_CHARGER_LP8788=m
-CONFIG_CHARGER_GPIO=m
-CONFIG_CHARGER_MANAGER=y
-CONFIG_CHARGER_LT3651=m
-CONFIG_CHARGER_MAX14577=m
-CONFIG_CHARGER_DETECTOR_MAX14656=m
-CONFIG_CHARGER_MAX77650=m
-CONFIG_CHARGER_MAX77693=m
-CONFIG_CHARGER_MAX8997=m
-CONFIG_CHARGER_MAX8998=m
-CONFIG_CHARGER_BQ2415X=m
-CONFIG_CHARGER_BQ24190=m
-CONFIG_CHARGER_BQ24257=m
-CONFIG_CHARGER_BQ24735=m
-CONFIG_CHARGER_BQ25890=m
-CONFIG_CHARGER_SMB347=m
-CONFIG_CHARGER_TPS65090=m
-CONFIG_CHARGER_TPS65217=m
-CONFIG_BATTERY_GAUGE_LTC2941=m
-CONFIG_BATTERY_RT5033=m
-CONFIG_CHARGER_RT9455=m
-CONFIG_CHARGER_CROS_USBPD=m
-CONFIG_CHARGER_UCS1002=m
-CONFIG_CHARGER_BD70528=m
-CONFIG_CHARGER_WILCO=m
-CONFIG_HWMON=y
-CONFIG_HWMON_VID=m
-# CONFIG_HWMON_DEBUG_CHIP is not set
-
-#
-# Native drivers
-#
-CONFIG_SENSORS_ABITUGURU=m
-CONFIG_SENSORS_ABITUGURU3=m
-CONFIG_SENSORS_AD7314=m
-CONFIG_SENSORS_AD7414=m
-CONFIG_SENSORS_AD7418=m
-CONFIG_SENSORS_ADM1021=m
-CONFIG_SENSORS_ADM1025=m
-CONFIG_SENSORS_ADM1026=m
-CONFIG_SENSORS_ADM1029=m
-CONFIG_SENSORS_ADM1031=m
-CONFIG_SENSORS_ADM9240=m
-CONFIG_SENSORS_ADT7X10=m
-CONFIG_SENSORS_ADT7310=m
-CONFIG_SENSORS_ADT7410=m
-CONFIG_SENSORS_ADT7411=m
-CONFIG_SENSORS_ADT7462=m
-CONFIG_SENSORS_ADT7470=m
-CONFIG_SENSORS_ADT7475=m
-# CONFIG_SENSORS_AS370 is not set
-CONFIG_SENSORS_ASC7621=m
-CONFIG_SENSORS_K8TEMP=m
-CONFIG_SENSORS_K10TEMP=m
-CONFIG_SENSORS_FAM15H_POWER=m
-CONFIG_SENSORS_APPLESMC=m
-CONFIG_SENSORS_ASB100=m
-CONFIG_SENSORS_ASPEED=m
-CONFIG_SENSORS_ATXP1=m
-CONFIG_SENSORS_DS620=m
-CONFIG_SENSORS_DS1621=m
-CONFIG_SENSORS_DELL_SMM=m
-CONFIG_SENSORS_DA9052_ADC=m
-CONFIG_SENSORS_DA9055=m
-CONFIG_SENSORS_I5K_AMB=m
-CONFIG_SENSORS_F71805F=m
-CONFIG_SENSORS_F71882FG=m
-CONFIG_SENSORS_F75375S=m
-CONFIG_SENSORS_MC13783_ADC=m
-CONFIG_SENSORS_FSCHMD=m
-CONFIG_SENSORS_FTSTEUTATES=m
-CONFIG_SENSORS_GL518SM=m
-CONFIG_SENSORS_GL520SM=m
-CONFIG_SENSORS_G760A=m
-CONFIG_SENSORS_G762=m
-CONFIG_SENSORS_GPIO_FAN=m
-CONFIG_SENSORS_HIH6130=m
-CONFIG_SENSORS_IBMAEM=m
-CONFIG_SENSORS_IBMPEX=m
-CONFIG_SENSORS_IIO_HWMON=m
-CONFIG_SENSORS_I5500=m
-CONFIG_SENSORS_CORETEMP=m
-CONFIG_SENSORS_IT87=m
-CONFIG_SENSORS_JC42=m
-CONFIG_SENSORS_POWR1220=m
-CONFIG_SENSORS_LINEAGE=m
-CONFIG_SENSORS_LOCHNAGAR=m
-CONFIG_SENSORS_LTC2945=m
-CONFIG_SENSORS_LTC2990=m
-CONFIG_SENSORS_LTC4151=m
-CONFIG_SENSORS_LTC4215=m
-CONFIG_SENSORS_LTC4222=m
-CONFIG_SENSORS_LTC4245=m
-CONFIG_SENSORS_LTC4260=m
-CONFIG_SENSORS_LTC4261=m
-CONFIG_SENSORS_MAX1111=m
-CONFIG_SENSORS_MAX16065=m
-CONFIG_SENSORS_MAX1619=m
-CONFIG_SENSORS_MAX1668=m
-CONFIG_SENSORS_MAX197=m
-CONFIG_SENSORS_MAX31722=m
-CONFIG_SENSORS_MAX6621=m
-CONFIG_SENSORS_MAX6639=m
-CONFIG_SENSORS_MAX6642=m
-CONFIG_SENSORS_MAX6650=m
-CONFIG_SENSORS_MAX6697=m
-CONFIG_SENSORS_MAX31790=m
-CONFIG_SENSORS_MCP3021=m
-CONFIG_SENSORS_MLXREG_FAN=m
-CONFIG_SENSORS_TC654=m
-CONFIG_SENSORS_MENF21BMC_HWMON=m
-CONFIG_SENSORS_ADCXX=m
-CONFIG_SENSORS_LM63=m
-CONFIG_SENSORS_LM70=m
-CONFIG_SENSORS_LM73=m
-CONFIG_SENSORS_LM75=m
-CONFIG_SENSORS_LM77=m
-CONFIG_SENSORS_LM78=m
-CONFIG_SENSORS_LM80=m
-CONFIG_SENSORS_LM83=m
-CONFIG_SENSORS_LM85=m
-CONFIG_SENSORS_LM87=m
-CONFIG_SENSORS_LM90=m
-CONFIG_SENSORS_LM92=m
-CONFIG_SENSORS_LM93=m
-CONFIG_SENSORS_LM95234=m
-CONFIG_SENSORS_LM95241=m
-CONFIG_SENSORS_LM95245=m
-CONFIG_SENSORS_PC87360=m
-CONFIG_SENSORS_PC87427=m
-CONFIG_SENSORS_NTC_THERMISTOR=m
-CONFIG_SENSORS_NCT6683=m
-CONFIG_SENSORS_NCT6775=m
-CONFIG_SENSORS_NCT7802=m
-CONFIG_SENSORS_NCT7904=m
-CONFIG_SENSORS_NPCM7XX=m
-CONFIG_SENSORS_PCF8591=m
-CONFIG_PMBUS=m
-CONFIG_SENSORS_PMBUS=m
-CONFIG_SENSORS_ADM1275=m
-CONFIG_SENSORS_IBM_CFFPS=m
-# CONFIG_SENSORS_INSPUR_IPSPS is not set
-CONFIG_SENSORS_IR35221=m
-CONFIG_SENSORS_IR38064=m
-CONFIG_SENSORS_IRPS5401=m
-CONFIG_SENSORS_ISL68137=m
-CONFIG_SENSORS_LM25066=m
-CONFIG_SENSORS_LTC2978=m
-# CONFIG_SENSORS_LTC2978_REGULATOR is not set
-CONFIG_SENSORS_LTC3815=m
-CONFIG_SENSORS_MAX16064=m
-CONFIG_SENSORS_MAX20751=m
-CONFIG_SENSORS_MAX31785=m
-CONFIG_SENSORS_MAX34440=m
-CONFIG_SENSORS_MAX8688=m
-CONFIG_SENSORS_PXE1610=m
-CONFIG_SENSORS_TPS40422=m
-CONFIG_SENSORS_TPS53679=m
-CONFIG_SENSORS_UCD9000=m
-CONFIG_SENSORS_UCD9200=m
-CONFIG_SENSORS_ZL6100=m
-CONFIG_SENSORS_PWM_FAN=m
-CONFIG_SENSORS_SHT15=m
-CONFIG_SENSORS_SHT21=m
-CONFIG_SENSORS_SHT3x=m
-CONFIG_SENSORS_SHTC1=m
-CONFIG_SENSORS_SIS5595=m
-CONFIG_SENSORS_DME1737=m
-CONFIG_SENSORS_EMC1403=m
-CONFIG_SENSORS_EMC2103=m
-CONFIG_SENSORS_EMC6W201=m
-CONFIG_SENSORS_SMSC47M1=m
-CONFIG_SENSORS_SMSC47M192=m
-CONFIG_SENSORS_SMSC47B397=m
-CONFIG_SENSORS_SCH56XX_COMMON=m
-CONFIG_SENSORS_SCH5627=m
-CONFIG_SENSORS_SCH5636=m
-CONFIG_SENSORS_STTS751=m
-CONFIG_SENSORS_SMM665=m
-CONFIG_SENSORS_ADC128D818=m
-CONFIG_SENSORS_ADS7828=m
-CONFIG_SENSORS_ADS7871=m
-CONFIG_SENSORS_AMC6821=m
-CONFIG_SENSORS_INA209=m
-CONFIG_SENSORS_INA2XX=m
-CONFIG_SENSORS_INA3221=m
-CONFIG_SENSORS_TC74=m
-CONFIG_SENSORS_THMC50=m
-CONFIG_SENSORS_TMP102=m
-CONFIG_SENSORS_TMP103=m
-CONFIG_SENSORS_TMP108=m
-CONFIG_SENSORS_TMP401=m
-CONFIG_SENSORS_TMP421=m
-CONFIG_SENSORS_VIA_CPUTEMP=m
-CONFIG_SENSORS_VIA686A=m
-CONFIG_SENSORS_VT1211=m
-CONFIG_SENSORS_VT8231=m
-CONFIG_SENSORS_W83773G=m
-CONFIG_SENSORS_W83781D=m
-CONFIG_SENSORS_W83791D=m
-CONFIG_SENSORS_W83792D=m
-CONFIG_SENSORS_W83793=m
-CONFIG_SENSORS_W83795=m
-# CONFIG_SENSORS_W83795_FANCTRL is not set
-CONFIG_SENSORS_W83L785TS=m
-CONFIG_SENSORS_W83L786NG=m
-CONFIG_SENSORS_W83627HF=m
-CONFIG_SENSORS_W83627EHF=m
-CONFIG_SENSORS_WM831X=m
-CONFIG_SENSORS_WM8350=m
-CONFIG_SENSORS_XGENE=m
-
-#
-# ACPI drivers
-#
-CONFIG_SENSORS_ACPI_POWER=m
-CONFIG_SENSORS_ATK0110=m
-CONFIG_THERMAL=y
-# CONFIG_THERMAL_STATISTICS is not set
-CONFIG_THERMAL_EMERGENCY_POWEROFF_DELAY_MS=100
-CONFIG_THERMAL_HWMON=y
-CONFIG_THERMAL_OF=y
-CONFIG_THERMAL_WRITABLE_TRIPS=y
-CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE=y
-# CONFIG_THERMAL_DEFAULT_GOV_FAIR_SHARE is not set
-# CONFIG_THERMAL_DEFAULT_GOV_USER_SPACE is not set
-# CONFIG_THERMAL_DEFAULT_GOV_POWER_ALLOCATOR is not set
-CONFIG_THERMAL_GOV_FAIR_SHARE=y
-CONFIG_THERMAL_GOV_STEP_WISE=y
-CONFIG_THERMAL_GOV_BANG_BANG=y
-CONFIG_THERMAL_GOV_USER_SPACE=y
-CONFIG_THERMAL_GOV_POWER_ALLOCATOR=y
-CONFIG_CPU_THERMAL=y
-CONFIG_CLOCK_THERMAL=y
-CONFIG_DEVFREQ_THERMAL=y
-# CONFIG_THERMAL_EMULATION is not set
-CONFIG_THERMAL_MMIO=m
-CONFIG_MAX77620_THERMAL=m
-CONFIG_QORIQ_THERMAL=m
-CONFIG_DA9062_THERMAL=m
-
-#
-# Intel thermal drivers
-#
-CONFIG_INTEL_POWERCLAMP=m
-CONFIG_X86_PKG_TEMP_THERMAL=m
-CONFIG_INTEL_SOC_DTS_IOSF_CORE=m
-CONFIG_INTEL_SOC_DTS_THERMAL=m
-
-#
-# ACPI INT340X thermal drivers
-#
-CONFIG_INT340X_THERMAL=m
-CONFIG_ACPI_THERMAL_REL=m
-CONFIG_INT3406_THERMAL=m
-CONFIG_PROC_THERMAL_MMIO_RAPL=y
-# end of ACPI INT340X thermal drivers
-
-CONFIG_INTEL_BXT_PMIC_THERMAL=m
-CONFIG_INTEL_PCH_THERMAL=m
-# end of Intel thermal drivers
-
-CONFIG_GENERIC_ADC_THERMAL=m
-CONFIG_WATCHDOG=y
-CONFIG_WATCHDOG_CORE=y
-# CONFIG_WATCHDOG_NOWAYOUT is not set
-CONFIG_WATCHDOG_HANDLE_BOOT_ENABLED=y
-CONFIG_WATCHDOG_OPEN_TIMEOUT=0
-CONFIG_WATCHDOG_SYSFS=y
-
-#
-# Watchdog Pretimeout Governors
-#
-CONFIG_WATCHDOG_PRETIMEOUT_GOV=y
-CONFIG_WATCHDOG_PRETIMEOUT_GOV_SEL=m
-CONFIG_WATCHDOG_PRETIMEOUT_GOV_NOOP=m
-CONFIG_WATCHDOG_PRETIMEOUT_GOV_PANIC=y
-# CONFIG_WATCHDOG_PRETIMEOUT_DEFAULT_GOV_NOOP is not set
-CONFIG_WATCHDOG_PRETIMEOUT_DEFAULT_GOV_PANIC=y
-
-#
-# Watchdog Device Drivers
-#
-CONFIG_SOFT_WATCHDOG=m
-# CONFIG_SOFT_WATCHDOG_PRETIMEOUT is not set
-CONFIG_BD70528_WATCHDOG=m
-CONFIG_DA9052_WATCHDOG=m
-CONFIG_DA9055_WATCHDOG=m
-CONFIG_DA9063_WATCHDOG=m
-CONFIG_DA9062_WATCHDOG=m
-CONFIG_GPIO_WATCHDOG=m
-CONFIG_MENF21BMC_WATCHDOG=m
-CONFIG_MENZ069_WATCHDOG=m
-CONFIG_WDAT_WDT=m
-CONFIG_WM831X_WATCHDOG=m
-CONFIG_WM8350_WATCHDOG=m
-CONFIG_XILINX_WATCHDOG=m
-CONFIG_ZIIRAVE_WATCHDOG=m
-CONFIG_RAVE_SP_WATCHDOG=m
-CONFIG_MLX_WDT=m
-CONFIG_CADENCE_WATCHDOG=m
-CONFIG_DW_WATCHDOG=m
-CONFIG_RN5T618_WATCHDOG=m
-CONFIG_TWL4030_WATCHDOG=m
-CONFIG_MAX63XX_WATCHDOG=m
-CONFIG_MAX77620_WATCHDOG=m
-CONFIG_RETU_WATCHDOG=m
-CONFIG_STPMIC1_WATCHDOG=m
-CONFIG_ACQUIRE_WDT=m
-CONFIG_ADVANTECH_WDT=m
-CONFIG_ALIM1535_WDT=m
-CONFIG_ALIM7101_WDT=m
-CONFIG_EBC_C384_WDT=m
-CONFIG_F71808E_WDT=m
-CONFIG_SP5100_TCO=m
-CONFIG_SBC_FITPC2_WATCHDOG=m
-CONFIG_EUROTECH_WDT=m
-CONFIG_IB700_WDT=m
-CONFIG_IBMASR=m
-CONFIG_WAFER_WDT=m
-CONFIG_I6300ESB_WDT=m
-CONFIG_IE6XX_WDT=m
-CONFIG_ITCO_WDT=m
-CONFIG_ITCO_VENDOR_SUPPORT=y
-CONFIG_IT8712F_WDT=m
-CONFIG_IT87_WDT=m
-CONFIG_HP_WATCHDOG=m
-CONFIG_HPWDT_NMI_DECODING=y
-CONFIG_KEMPLD_WDT=m
-CONFIG_SC1200_WDT=m
-CONFIG_PC87413_WDT=m
-CONFIG_NV_TCO=m
-CONFIG_60XX_WDT=m
-CONFIG_CPU5_WDT=m
-CONFIG_SMSC_SCH311X_WDT=m
-CONFIG_SMSC37B787_WDT=m
-CONFIG_TQMX86_WDT=m
-CONFIG_VIA_WDT=m
-CONFIG_W83627HF_WDT=m
-CONFIG_W83877F_WDT=m
-CONFIG_W83977F_WDT=m
-CONFIG_MACHZ_WDT=m
-CONFIG_SBC_EPX_C3_WATCHDOG=m
-CONFIG_INTEL_MEI_WDT=m
-CONFIG_NI903X_WDT=m
-CONFIG_NIC7018_WDT=m
-CONFIG_MEN_A21_WDT=m
-CONFIG_XEN_WDT=m
-
-#
-# PCI-based Watchdog Cards
-#
-CONFIG_PCIPCWATCHDOG=m
-CONFIG_WDTPCI=m
-
-#
-# USB-based Watchdog Cards
-#
-CONFIG_USBPCWATCHDOG=m
-CONFIG_SSB_POSSIBLE=y
-CONFIG_SSB=m
-CONFIG_SSB_SPROM=y
-CONFIG_SSB_BLOCKIO=y
-CONFIG_SSB_PCIHOST_POSSIBLE=y
-CONFIG_SSB_PCIHOST=y
-CONFIG_SSB_B43_PCI_BRIDGE=y
-CONFIG_SSB_PCMCIAHOST_POSSIBLE=y
-CONFIG_SSB_PCMCIAHOST=y
-CONFIG_SSB_SDIOHOST_POSSIBLE=y
-CONFIG_SSB_SDIOHOST=y
-CONFIG_SSB_DRIVER_PCICORE_POSSIBLE=y
-CONFIG_SSB_DRIVER_PCICORE=y
-CONFIG_SSB_DRIVER_GPIO=y
-CONFIG_BCMA_POSSIBLE=y
-CONFIG_BCMA=m
-CONFIG_BCMA_BLOCKIO=y
-CONFIG_BCMA_HOST_PCI_POSSIBLE=y
-CONFIG_BCMA_HOST_PCI=y
-# CONFIG_BCMA_HOST_SOC is not set
-CONFIG_BCMA_DRIVER_PCI=y
-CONFIG_BCMA_DRIVER_GMAC_CMN=y
-CONFIG_BCMA_DRIVER_GPIO=y
-# CONFIG_BCMA_DEBUG is not set
-
-#
-# Multifunction device drivers
-#
-CONFIG_MFD_CORE=y
-CONFIG_MFD_ACT8945A=m
-CONFIG_MFD_AS3711=y
-CONFIG_MFD_AS3722=m
-CONFIG_PMIC_ADP5520=y
-CONFIG_MFD_AAT2870_CORE=y
-CONFIG_MFD_ATMEL_FLEXCOM=m
-CONFIG_MFD_ATMEL_HLCDC=m
-CONFIG_MFD_BCM590XX=m
-CONFIG_MFD_BD9571MWV=m
-CONFIG_MFD_AXP20X=m
-CONFIG_MFD_AXP20X_I2C=m
-CONFIG_MFD_CROS_EC_DEV=m
-CONFIG_MFD_MADERA=m
-CONFIG_MFD_MADERA_I2C=m
-CONFIG_MFD_MADERA_SPI=m
-CONFIG_MFD_CS47L15=y
-CONFIG_MFD_CS47L35=y
-CONFIG_MFD_CS47L85=y
-CONFIG_MFD_CS47L90=y
-CONFIG_MFD_CS47L92=y
-CONFIG_PMIC_DA903X=y
-CONFIG_PMIC_DA9052=y
-CONFIG_MFD_DA9052_SPI=y
-CONFIG_MFD_DA9052_I2C=y
-CONFIG_MFD_DA9055=y
-CONFIG_MFD_DA9062=m
-CONFIG_MFD_DA9063=m
-CONFIG_MFD_DA9150=m
-CONFIG_MFD_DLN2=m
-CONFIG_MFD_MC13XXX=m
-CONFIG_MFD_MC13XXX_SPI=m
-CONFIG_MFD_MC13XXX_I2C=m
-CONFIG_MFD_HI6421_PMIC=m
-CONFIG_HTC_PASIC3=m
-CONFIG_HTC_I2CPLD=y
-CONFIG_MFD_INTEL_QUARK_I2C_GPIO=m
-CONFIG_LPC_ICH=m
-CONFIG_LPC_SCH=m
-CONFIG_INTEL_SOC_PMIC=y
-CONFIG_INTEL_SOC_PMIC_BXTWC=m
-CONFIG_INTEL_SOC_PMIC_CHTWC=y
-CONFIG_INTEL_SOC_PMIC_CHTDC_TI=m
-CONFIG_MFD_INTEL_LPSS=m
-CONFIG_MFD_INTEL_LPSS_ACPI=m
-CONFIG_MFD_INTEL_LPSS_PCI=m
-CONFIG_MFD_JANZ_CMODIO=m
-CONFIG_MFD_KEMPLD=m
-CONFIG_MFD_88PM800=m
-CONFIG_MFD_88PM805=m
-CONFIG_MFD_88PM860X=y
-CONFIG_MFD_MAX14577=m
-CONFIG_MFD_MAX77620=y
-CONFIG_MFD_MAX77650=m
-CONFIG_MFD_MAX77686=m
-CONFIG_MFD_MAX77693=m
-CONFIG_MFD_MAX77843=y
-CONFIG_MFD_MAX8907=m
-CONFIG_MFD_MAX8925=y
-CONFIG_MFD_MAX8997=y
-CONFIG_MFD_MAX8998=y
-CONFIG_MFD_MT6397=m
-CONFIG_MFD_MENF21BMC=m
-CONFIG_EZX_PCAP=y
-CONFIG_MFD_CPCAP=m
-CONFIG_MFD_VIPERBOARD=m
-CONFIG_MFD_RETU=m
-CONFIG_MFD_PCF50633=m
-CONFIG_PCF50633_ADC=m
-CONFIG_PCF50633_GPIO=m
-CONFIG_UCB1400_CORE=m
-CONFIG_MFD_RDC321X=m
-CONFIG_MFD_RT5033=m
-CONFIG_MFD_RC5T583=y
-CONFIG_MFD_RK808=m
-CONFIG_MFD_RN5T618=m
-CONFIG_MFD_SEC_CORE=y
-CONFIG_MFD_SI476X_CORE=m
-CONFIG_MFD_SM501=m
-CONFIG_MFD_SM501_GPIO=y
-CONFIG_MFD_SKY81452=m
-CONFIG_MFD_SMSC=y
-CONFIG_ABX500_CORE=y
-CONFIG_AB3100_CORE=y
-CONFIG_AB3100_OTP=y
-CONFIG_MFD_STMPE=y
-
-#
-# STMicroelectronics STMPE Interface Drivers
-#
-CONFIG_STMPE_I2C=y
-CONFIG_STMPE_SPI=y
-# end of STMicroelectronics STMPE Interface Drivers
-
-CONFIG_MFD_SYSCON=y
-CONFIG_MFD_TI_AM335X_TSCADC=m
-CONFIG_MFD_LP3943=m
-CONFIG_MFD_LP8788=y
-CONFIG_MFD_TI_LMU=m
-CONFIG_MFD_PALMAS=y
-CONFIG_TPS6105X=m
-CONFIG_TPS65010=m
-CONFIG_TPS6507X=m
-CONFIG_MFD_TPS65086=m
-CONFIG_MFD_TPS65090=y
-CONFIG_MFD_TPS65217=m
-CONFIG_MFD_TPS68470=y
-CONFIG_MFD_TI_LP873X=m
-CONFIG_MFD_TI_LP87565=m
-CONFIG_MFD_TPS65218=m
-CONFIG_MFD_TPS6586X=y
-CONFIG_MFD_TPS65910=y
-CONFIG_MFD_TPS65912=m
-CONFIG_MFD_TPS65912_I2C=m
-CONFIG_MFD_TPS65912_SPI=m
-CONFIG_MFD_TPS80031=y
-CONFIG_TWL4030_CORE=y
-CONFIG_MFD_TWL4030_AUDIO=y
-CONFIG_TWL6040_CORE=y
-CONFIG_MFD_WL1273_CORE=m
-CONFIG_MFD_LM3533=m
-CONFIG_MFD_TC3589X=y
-CONFIG_MFD_TQMX86=m
-CONFIG_MFD_VX855=m
-CONFIG_MFD_LOCHNAGAR=y
-CONFIG_MFD_ARIZONA=y
-CONFIG_MFD_ARIZONA_I2C=m
-CONFIG_MFD_ARIZONA_SPI=m
-CONFIG_MFD_CS47L24=y
-CONFIG_MFD_WM5102=y
-CONFIG_MFD_WM5110=y
-CONFIG_MFD_WM8997=y
-CONFIG_MFD_WM8998=y
-CONFIG_MFD_WM8400=y
-CONFIG_MFD_WM831X=y
-CONFIG_MFD_WM831X_I2C=y
-CONFIG_MFD_WM831X_SPI=y
-CONFIG_MFD_WM8350=y
-CONFIG_MFD_WM8350_I2C=y
-CONFIG_MFD_WM8994=m
-CONFIG_MFD_ROHM_BD718XX=m
-CONFIG_MFD_ROHM_BD70528=m
-CONFIG_MFD_STPMIC1=m
-CONFIG_MFD_STMFX=m
-CONFIG_RAVE_SP_CORE=m
-# end of Multifunction device drivers
-
-CONFIG_REGULATOR=y
-# CONFIG_REGULATOR_DEBUG is not set
-CONFIG_REGULATOR_FIXED_VOLTAGE=m
-CONFIG_REGULATOR_VIRTUAL_CONSUMER=m
-CONFIG_REGULATOR_USERSPACE_CONSUMER=m
-CONFIG_REGULATOR_88PG86X=m
-CONFIG_REGULATOR_88PM800=m
-CONFIG_REGULATOR_88PM8607=m
-CONFIG_REGULATOR_ACT8865=m
-CONFIG_REGULATOR_ACT8945A=m
-CONFIG_REGULATOR_AD5398=m
-CONFIG_REGULATOR_ANATOP=m
-CONFIG_REGULATOR_AAT2870=m
-CONFIG_REGULATOR_AB3100=m
-CONFIG_REGULATOR_ARIZONA_LDO1=m
-CONFIG_REGULATOR_ARIZONA_MICSUPP=m
-CONFIG_REGULATOR_AS3711=m
-CONFIG_REGULATOR_AS3722=m
-CONFIG_REGULATOR_AXP20X=m
-CONFIG_REGULATOR_BCM590XX=m
-CONFIG_REGULATOR_BD70528=m
-CONFIG_REGULATOR_BD718XX=m
-CONFIG_REGULATOR_BD9571MWV=m
-CONFIG_REGULATOR_CPCAP=m
-CONFIG_REGULATOR_DA903X=m
-CONFIG_REGULATOR_DA9052=m
-CONFIG_REGULATOR_DA9055=m
-CONFIG_REGULATOR_DA9062=m
-CONFIG_REGULATOR_DA9063=m
-CONFIG_REGULATOR_DA9210=m
-CONFIG_REGULATOR_DA9211=m
-CONFIG_REGULATOR_FAN53555=m
-CONFIG_REGULATOR_GPIO=m
-CONFIG_REGULATOR_HI6421=m
-CONFIG_REGULATOR_HI6421V530=m
-CONFIG_REGULATOR_ISL9305=m
-CONFIG_REGULATOR_ISL6271A=m
-CONFIG_REGULATOR_LM363X=m
-CONFIG_REGULATOR_LOCHNAGAR=m
-CONFIG_REGULATOR_LP3971=m
-CONFIG_REGULATOR_LP3972=m
-CONFIG_REGULATOR_LP872X=m
-CONFIG_REGULATOR_LP873X=m
-CONFIG_REGULATOR_LP8755=m
-CONFIG_REGULATOR_LP87565=m
-CONFIG_REGULATOR_LP8788=m
-CONFIG_REGULATOR_LTC3589=m
-CONFIG_REGULATOR_LTC3676=m
-CONFIG_REGULATOR_MAX14577=m
-CONFIG_REGULATOR_MAX1586=m
-CONFIG_REGULATOR_MAX77620=m
-CONFIG_REGULATOR_MAX77650=m
-CONFIG_REGULATOR_MAX8649=m
-CONFIG_REGULATOR_MAX8660=m
-CONFIG_REGULATOR_MAX8907=m
-CONFIG_REGULATOR_MAX8925=m
-CONFIG_REGULATOR_MAX8952=m
-CONFIG_REGULATOR_MAX8973=m
-CONFIG_REGULATOR_MAX8997=m
-CONFIG_REGULATOR_MAX8998=m
-CONFIG_REGULATOR_MAX77686=m
-CONFIG_REGULATOR_MAX77693=m
-CONFIG_REGULATOR_MAX77802=m
-CONFIG_REGULATOR_MC13XXX_CORE=m
-CONFIG_REGULATOR_MC13783=m
-CONFIG_REGULATOR_MC13892=m
-CONFIG_REGULATOR_MCP16502=m
-CONFIG_REGULATOR_MT6311=m
-CONFIG_REGULATOR_MT6323=m
-CONFIG_REGULATOR_MT6397=m
-CONFIG_REGULATOR_PALMAS=m
-CONFIG_REGULATOR_PCAP=m
-CONFIG_REGULATOR_PCF50633=m
-CONFIG_REGULATOR_PFUZE100=m
-CONFIG_REGULATOR_PV88060=m
-CONFIG_REGULATOR_PV88080=m
-CONFIG_REGULATOR_PV88090=m
-CONFIG_REGULATOR_PWM=m
-CONFIG_REGULATOR_QCOM_SPMI=m
-CONFIG_REGULATOR_RC5T583=m
-CONFIG_REGULATOR_RK808=m
-CONFIG_REGULATOR_RN5T618=m
-CONFIG_REGULATOR_RT5033=m
-CONFIG_REGULATOR_S2MPA01=m
-CONFIG_REGULATOR_S2MPS11=m
-CONFIG_REGULATOR_S5M8767=m
-CONFIG_REGULATOR_SKY81452=m
-CONFIG_REGULATOR_SLG51000=m
-CONFIG_REGULATOR_STPMIC1=m
-CONFIG_REGULATOR_SY8106A=m
-# CONFIG_REGULATOR_SY8824X is not set
-CONFIG_REGULATOR_TPS51632=m
-CONFIG_REGULATOR_TPS6105X=m
-CONFIG_REGULATOR_TPS62360=m
-CONFIG_REGULATOR_TPS65023=m
-CONFIG_REGULATOR_TPS6507X=m
-CONFIG_REGULATOR_TPS65086=m
-CONFIG_REGULATOR_TPS65090=m
-CONFIG_REGULATOR_TPS65132=m
-CONFIG_REGULATOR_TPS65217=m
-CONFIG_REGULATOR_TPS65218=m
-CONFIG_REGULATOR_TPS6524X=m
-CONFIG_REGULATOR_TPS6586X=m
-CONFIG_REGULATOR_TPS65910=m
-CONFIG_REGULATOR_TPS65912=m
-CONFIG_REGULATOR_TPS80031=m
-CONFIG_REGULATOR_TWL4030=m
-CONFIG_REGULATOR_VCTRL=m
-CONFIG_REGULATOR_WM831X=m
-CONFIG_REGULATOR_WM8350=m
-CONFIG_REGULATOR_WM8400=m
-CONFIG_REGULATOR_WM8994=m
-CONFIG_CEC_CORE=y
-CONFIG_CEC_NOTIFIER=y
-CONFIG_CEC_PIN=y
-CONFIG_RC_CORE=m
-CONFIG_RC_MAP=m
-CONFIG_LIRC=y
-CONFIG_RC_DECODERS=y
-CONFIG_IR_NEC_DECODER=m
-CONFIG_IR_RC5_DECODER=m
-CONFIG_IR_RC6_DECODER=m
-CONFIG_IR_JVC_DECODER=m
-CONFIG_IR_SONY_DECODER=m
-CONFIG_IR_SANYO_DECODER=m
-CONFIG_IR_SHARP_DECODER=m
-CONFIG_IR_MCE_KBD_DECODER=m
-CONFIG_IR_XMP_DECODER=m
-CONFIG_IR_IMON_DECODER=m
-CONFIG_IR_RCMM_DECODER=m
-CONFIG_RC_DEVICES=y
-CONFIG_RC_ATI_REMOTE=m
-CONFIG_IR_ENE=m
-CONFIG_IR_HIX5HD2=m
-CONFIG_IR_IMON=m
-CONFIG_IR_IMON_RAW=m
-CONFIG_IR_MCEUSB=m
-CONFIG_IR_ITE_CIR=m
-CONFIG_IR_FINTEK=m
-CONFIG_IR_NUVOTON=m
-CONFIG_IR_REDRAT3=m
-CONFIG_IR_SPI=m
-CONFIG_IR_STREAMZAP=m
-CONFIG_IR_WINBOND_CIR=m
-CONFIG_IR_IGORPLUGUSB=m
-CONFIG_IR_IGUANA=m
-CONFIG_IR_TTUSBIR=m
-CONFIG_RC_LOOPBACK=m
-CONFIG_IR_GPIO_CIR=m
-CONFIG_IR_GPIO_TX=m
-CONFIG_IR_PWM_TX=m
-CONFIG_IR_SERIAL=m
-CONFIG_IR_SERIAL_TRANSMITTER=y
-CONFIG_IR_SIR=m
-CONFIG_RC_XBOX_DVD=m
-CONFIG_MEDIA_SUPPORT=m
-
-#
-# Multimedia core support
-#
-CONFIG_MEDIA_CAMERA_SUPPORT=y
-CONFIG_MEDIA_ANALOG_TV_SUPPORT=y
-CONFIG_MEDIA_DIGITAL_TV_SUPPORT=y
-CONFIG_MEDIA_RADIO_SUPPORT=y
-CONFIG_MEDIA_SDR_SUPPORT=y
-CONFIG_MEDIA_CEC_SUPPORT=y
-# CONFIG_CEC_PIN_ERROR_INJ is not set
-CONFIG_MEDIA_CONTROLLER=y
-CONFIG_MEDIA_CONTROLLER_DVB=y
-# CONFIG_MEDIA_CONTROLLER_REQUEST_API is not set
-CONFIG_VIDEO_DEV=m
-CONFIG_VIDEO_V4L2_SUBDEV_API=y
-CONFIG_VIDEO_V4L2=m
-CONFIG_VIDEO_V4L2_I2C=y
-# CONFIG_VIDEO_ADV_DEBUG is not set
-# CONFIG_VIDEO_FIXED_MINOR_RANGES is not set
-CONFIG_VIDEO_TUNER=m
-CONFIG_V4L2_MEM2MEM_DEV=m
-CONFIG_V4L2_FLASH_LED_CLASS=m
-CONFIG_V4L2_FWNODE=m
-CONFIG_VIDEOBUF_GEN=m
-CONFIG_VIDEOBUF_DMA_SG=m
-CONFIG_VIDEOBUF_VMALLOC=m
-CONFIG_DVB_CORE=m
-CONFIG_DVB_MMAP=y
-CONFIG_DVB_NET=y
-CONFIG_TTPCI_EEPROM=m
-CONFIG_DVB_MAX_ADAPTERS=16
-# CONFIG_DVB_DYNAMIC_MINORS is not set
-# CONFIG_DVB_DEMUX_SECTION_LOSS_LOG is not set
-# CONFIG_DVB_ULE_DEBUG is not set
-
-#
-# Media drivers
-#
-CONFIG_MEDIA_USB_SUPPORT=y
-
-#
-# Webcam devices
-#
-CONFIG_USB_VIDEO_CLASS=m
-CONFIG_USB_VIDEO_CLASS_INPUT_EVDEV=y
-CONFIG_USB_GSPCA=m
-CONFIG_USB_M5602=m
-CONFIG_USB_STV06XX=m
-CONFIG_USB_GL860=m
-CONFIG_USB_GSPCA_BENQ=m
-CONFIG_USB_GSPCA_CONEX=m
-CONFIG_USB_GSPCA_CPIA1=m
-CONFIG_USB_GSPCA_DTCS033=m
-CONFIG_USB_GSPCA_ETOMS=m
-CONFIG_USB_GSPCA_FINEPIX=m
-CONFIG_USB_GSPCA_JEILINJ=m
-CONFIG_USB_GSPCA_JL2005BCD=m
-CONFIG_USB_GSPCA_KINECT=m
-CONFIG_USB_GSPCA_KONICA=m
-CONFIG_USB_GSPCA_MARS=m
-CONFIG_USB_GSPCA_MR97310A=m
-CONFIG_USB_GSPCA_NW80X=m
-CONFIG_USB_GSPCA_OV519=m
-CONFIG_USB_GSPCA_OV534=m
-CONFIG_USB_GSPCA_OV534_9=m
-CONFIG_USB_GSPCA_PAC207=m
-CONFIG_USB_GSPCA_PAC7302=m
-CONFIG_USB_GSPCA_PAC7311=m
-CONFIG_USB_GSPCA_SE401=m
-CONFIG_USB_GSPCA_SN9C2028=m
-CONFIG_USB_GSPCA_SN9C20X=m
-CONFIG_USB_GSPCA_SONIXB=m
-CONFIG_USB_GSPCA_SONIXJ=m
-CONFIG_USB_GSPCA_SPCA500=m
-CONFIG_USB_GSPCA_SPCA501=m
-CONFIG_USB_GSPCA_SPCA505=m
-CONFIG_USB_GSPCA_SPCA506=m
-CONFIG_USB_GSPCA_SPCA508=m
-CONFIG_USB_GSPCA_SPCA561=m
-CONFIG_USB_GSPCA_SPCA1528=m
-CONFIG_USB_GSPCA_SQ905=m
-CONFIG_USB_GSPCA_SQ905C=m
-CONFIG_USB_GSPCA_SQ930X=m
-CONFIG_USB_GSPCA_STK014=m
-CONFIG_USB_GSPCA_STK1135=m
-CONFIG_USB_GSPCA_STV0680=m
-CONFIG_USB_GSPCA_SUNPLUS=m
-CONFIG_USB_GSPCA_T613=m
-CONFIG_USB_GSPCA_TOPRO=m
-CONFIG_USB_GSPCA_TOUPTEK=m
-CONFIG_USB_GSPCA_TV8532=m
-CONFIG_USB_GSPCA_VC032X=m
-CONFIG_USB_GSPCA_VICAM=m
-CONFIG_USB_GSPCA_XIRLINK_CIT=m
-CONFIG_USB_GSPCA_ZC3XX=m
-CONFIG_USB_PWC=m
-# CONFIG_USB_PWC_DEBUG is not set
-CONFIG_USB_PWC_INPUT_EVDEV=y
-CONFIG_VIDEO_CPIA2=m
-CONFIG_USB_ZR364XX=m
-CONFIG_USB_STKWEBCAM=m
-CONFIG_USB_S2255=m
-CONFIG_VIDEO_USBTV=m
-
-#
-# Analog TV USB devices
-#
-CONFIG_VIDEO_PVRUSB2=m
-CONFIG_VIDEO_PVRUSB2_SYSFS=y
-CONFIG_VIDEO_PVRUSB2_DVB=y
-# CONFIG_VIDEO_PVRUSB2_DEBUGIFC is not set
-CONFIG_VIDEO_HDPVR=m
-CONFIG_VIDEO_USBVISION=m
-CONFIG_VIDEO_STK1160_COMMON=m
-CONFIG_VIDEO_STK1160=m
-CONFIG_VIDEO_GO7007=m
-CONFIG_VIDEO_GO7007_USB=m
-CONFIG_VIDEO_GO7007_LOADER=m
-CONFIG_VIDEO_GO7007_USB_S2250_BOARD=m
-
-#
-# Analog/digital TV USB devices
-#
-CONFIG_VIDEO_AU0828=m
-CONFIG_VIDEO_AU0828_V4L2=y
-CONFIG_VIDEO_AU0828_RC=y
-CONFIG_VIDEO_CX231XX=m
-CONFIG_VIDEO_CX231XX_RC=y
-CONFIG_VIDEO_CX231XX_ALSA=m
-CONFIG_VIDEO_CX231XX_DVB=m
-CONFIG_VIDEO_TM6000=m
-CONFIG_VIDEO_TM6000_ALSA=m
-CONFIG_VIDEO_TM6000_DVB=m
-
-#
-# Digital TV USB devices
-#
-CONFIG_DVB_USB=m
-# CONFIG_DVB_USB_DEBUG is not set
-CONFIG_DVB_USB_DIB3000MC=m
-CONFIG_DVB_USB_A800=m
-CONFIG_DVB_USB_DIBUSB_MB=m
-CONFIG_DVB_USB_DIBUSB_MB_FAULTY=y
-CONFIG_DVB_USB_DIBUSB_MC=m
-CONFIG_DVB_USB_DIB0700=m
-CONFIG_DVB_USB_UMT_010=m
-CONFIG_DVB_USB_CXUSB=m
-CONFIG_DVB_USB_CXUSB_ANALOG=y
-CONFIG_DVB_USB_M920X=m
-CONFIG_DVB_USB_DIGITV=m
-CONFIG_DVB_USB_VP7045=m
-CONFIG_DVB_USB_VP702X=m
-CONFIG_DVB_USB_GP8PSK=m
-CONFIG_DVB_USB_NOVA_T_USB2=m
-CONFIG_DVB_USB_TTUSB2=m
-CONFIG_DVB_USB_DTT200U=m
-CONFIG_DVB_USB_OPERA1=m
-CONFIG_DVB_USB_AF9005=m
-CONFIG_DVB_USB_AF9005_REMOTE=m
-CONFIG_DVB_USB_PCTV452E=m
-CONFIG_DVB_USB_DW2102=m
-CONFIG_DVB_USB_CINERGY_T2=m
-CONFIG_DVB_USB_DTV5100=m
-CONFIG_DVB_USB_AZ6027=m
-CONFIG_DVB_USB_TECHNISAT_USB2=m
-CONFIG_DVB_USB_V2=m
-CONFIG_DVB_USB_AF9015=m
-CONFIG_DVB_USB_AF9035=m
-CONFIG_DVB_USB_ANYSEE=m
-CONFIG_DVB_USB_AU6610=m
-CONFIG_DVB_USB_AZ6007=m
-CONFIG_DVB_USB_CE6230=m
-CONFIG_DVB_USB_EC168=m
-CONFIG_DVB_USB_GL861=m
-CONFIG_DVB_USB_LME2510=m
-CONFIG_DVB_USB_MXL111SF=m
-CONFIG_DVB_USB_RTL28XXU=m
-CONFIG_DVB_USB_DVBSKY=m
-CONFIG_DVB_USB_ZD1301=m
-CONFIG_DVB_TTUSB_BUDGET=m
-CONFIG_DVB_TTUSB_DEC=m
-CONFIG_SMS_USB_DRV=m
-CONFIG_DVB_B2C2_FLEXCOP_USB=m
-# CONFIG_DVB_B2C2_FLEXCOP_USB_DEBUG is not set
-CONFIG_DVB_AS102=m
-
-#
-# Webcam, TV (analog/digital) USB devices
-#
-CONFIG_VIDEO_EM28XX=m
-CONFIG_VIDEO_EM28XX_V4L2=m
-CONFIG_VIDEO_EM28XX_ALSA=m
-CONFIG_VIDEO_EM28XX_DVB=m
-CONFIG_VIDEO_EM28XX_RC=m
-
-#
-# Software defined radio USB devices
-#
-CONFIG_USB_AIRSPY=m
-CONFIG_USB_HACKRF=m
-CONFIG_USB_MSI2500=m
-
-#
-# USB HDMI CEC adapters
-#
-CONFIG_USB_PULSE8_CEC=m
-CONFIG_USB_RAINSHADOW_CEC=m
-CONFIG_MEDIA_PCI_SUPPORT=y
-
-#
-# Media capture support
-#
-CONFIG_VIDEO_MEYE=m
-CONFIG_VIDEO_SOLO6X10=m
-CONFIG_VIDEO_TW5864=m
-CONFIG_VIDEO_TW68=m
-CONFIG_VIDEO_TW686X=m
-
-#
-# Media capture/analog TV support
-#
-CONFIG_VIDEO_IVTV=m
-# CONFIG_VIDEO_IVTV_DEPRECATED_IOCTLS is not set
-CONFIG_VIDEO_IVTV_ALSA=m
-CONFIG_VIDEO_FB_IVTV=m
-# CONFIG_VIDEO_FB_IVTV_FORCE_PAT is not set
-CONFIG_VIDEO_HEXIUM_GEMINI=m
-CONFIG_VIDEO_HEXIUM_ORION=m
-CONFIG_VIDEO_MXB=m
-CONFIG_VIDEO_DT3155=m
-
-#
-# Media capture/analog/hybrid TV support
-#
-CONFIG_VIDEO_CX18=m
-CONFIG_VIDEO_CX18_ALSA=m
-CONFIG_VIDEO_CX23885=m
-CONFIG_MEDIA_ALTERA_CI=m
-CONFIG_VIDEO_CX25821=m
-CONFIG_VIDEO_CX25821_ALSA=m
-CONFIG_VIDEO_CX88=m
-CONFIG_VIDEO_CX88_ALSA=m
-CONFIG_VIDEO_CX88_BLACKBIRD=m
-CONFIG_VIDEO_CX88_DVB=m
-CONFIG_VIDEO_CX88_ENABLE_VP3054=y
-CONFIG_VIDEO_CX88_VP3054=m
-CONFIG_VIDEO_CX88_MPEG=m
-CONFIG_VIDEO_BT848=m
-CONFIG_DVB_BT8XX=m
-CONFIG_VIDEO_SAA7134=m
-CONFIG_VIDEO_SAA7134_ALSA=m
-CONFIG_VIDEO_SAA7134_RC=y
-CONFIG_VIDEO_SAA7134_DVB=m
-CONFIG_VIDEO_SAA7134_GO7007=m
-CONFIG_VIDEO_SAA7164=m
-
-#
-# Media digital TV PCI Adapters
-#
-CONFIG_DVB_AV7110_IR=y
-CONFIG_DVB_AV7110=m
-CONFIG_DVB_AV7110_OSD=y
-CONFIG_DVB_BUDGET_CORE=m
-CONFIG_DVB_BUDGET=m
-CONFIG_DVB_BUDGET_CI=m
-CONFIG_DVB_BUDGET_AV=m
-CONFIG_DVB_BUDGET_PATCH=m
-CONFIG_DVB_B2C2_FLEXCOP_PCI=m
-# CONFIG_DVB_B2C2_FLEXCOP_PCI_DEBUG is not set
-CONFIG_DVB_PLUTO2=m
-CONFIG_DVB_DM1105=m
-CONFIG_DVB_PT1=m
-CONFIG_DVB_PT3=m
-CONFIG_MANTIS_CORE=m
-CONFIG_DVB_MANTIS=m
-CONFIG_DVB_HOPPER=m
-CONFIG_DVB_NGENE=m
-CONFIG_DVB_DDBRIDGE=m
-# CONFIG_DVB_DDBRIDGE_MSIENABLE is not set
-CONFIG_DVB_SMIPCIE=m
-CONFIG_DVB_NETUP_UNIDVB=m
-CONFIG_VIDEO_IPU3_CIO2=m
-CONFIG_V4L_PLATFORM_DRIVERS=y
-CONFIG_VIDEO_CAFE_CCIC=m
-CONFIG_VIDEO_CADENCE=y
-CONFIG_VIDEO_CADENCE_CSI2RX=m
-CONFIG_VIDEO_CADENCE_CSI2TX=m
-CONFIG_VIDEO_ASPEED=m
-CONFIG_VIDEO_MUX=m
-CONFIG_VIDEO_XILINX=m
-CONFIG_VIDEO_XILINX_TPG=m
-CONFIG_VIDEO_XILINX_VTC=m
-CONFIG_V4L_MEM2MEM_DRIVERS=y
-CONFIG_VIDEO_MEM2MEM_DEINTERLACE=m
-CONFIG_VIDEO_SH_VEU=m
-CONFIG_V4L_TEST_DRIVERS=y
-CONFIG_VIDEO_VIMC=m
-CONFIG_VIDEO_VIVID=m
-CONFIG_VIDEO_VIVID_CEC=y
-CONFIG_VIDEO_VIVID_MAX_DEVS=64
-CONFIG_VIDEO_VIM2M=m
-CONFIG_VIDEO_VICODEC=m
-CONFIG_DVB_PLATFORM_DRIVERS=y
-CONFIG_CEC_PLATFORM_DRIVERS=y
-CONFIG_VIDEO_CROS_EC_CEC=m
-CONFIG_CEC_GPIO=m
-CONFIG_VIDEO_SECO_CEC=m
-CONFIG_VIDEO_SECO_RC=y
-CONFIG_SDR_PLATFORM_DRIVERS=y
-
-#
-# Supported MMC/SDIO adapters
-#
-CONFIG_SMS_SDIO_DRV=m
-CONFIG_RADIO_ADAPTERS=y
-CONFIG_RADIO_TEA575X=m
-CONFIG_RADIO_SI470X=m
-CONFIG_USB_SI470X=m
-CONFIG_I2C_SI470X=m
-CONFIG_RADIO_SI4713=m
-CONFIG_USB_SI4713=m
-CONFIG_PLATFORM_SI4713=m
-CONFIG_I2C_SI4713=m
-CONFIG_RADIO_SI476X=m
-CONFIG_USB_MR800=m
-CONFIG_USB_DSBR=m
-CONFIG_RADIO_MAXIRADIO=m
-CONFIG_RADIO_SHARK=m
-CONFIG_RADIO_SHARK2=m
-CONFIG_USB_KEENE=m
-CONFIG_USB_RAREMONO=m
-CONFIG_USB_MA901=m
-CONFIG_RADIO_TEA5764=m
-CONFIG_RADIO_SAA7706H=m
-CONFIG_RADIO_TEF6862=m
-CONFIG_RADIO_WL1273=m
-
-#
-# Texas Instruments WL128x FM driver (ST based)
-#
-CONFIG_RADIO_WL128X=m
-# end of Texas Instruments WL128x FM driver (ST based)
-
-#
-# Supported FireWire (IEEE 1394) Adapters
-#
-CONFIG_DVB_FIREDTV=m
-CONFIG_DVB_FIREDTV_INPUT=y
-CONFIG_MEDIA_COMMON_OPTIONS=y
-
-#
-# common driver options
-#
-CONFIG_VIDEO_CX2341X=m
-CONFIG_VIDEO_TVEEPROM=m
-CONFIG_CYPRESS_FIRMWARE=m
-CONFIG_VIDEOBUF2_CORE=m
-CONFIG_VIDEOBUF2_V4L2=m
-CONFIG_VIDEOBUF2_MEMOPS=m
-CONFIG_VIDEOBUF2_DMA_CONTIG=m
-CONFIG_VIDEOBUF2_VMALLOC=m
-CONFIG_VIDEOBUF2_DMA_SG=m
-CONFIG_VIDEOBUF2_DVB=m
-CONFIG_DVB_B2C2_FLEXCOP=m
-CONFIG_VIDEO_SAA7146=m
-CONFIG_VIDEO_SAA7146_VV=m
-CONFIG_SMS_SIANO_MDTV=m
-CONFIG_SMS_SIANO_RC=y
-# CONFIG_SMS_SIANO_DEBUGFS is not set
-CONFIG_VIDEO_V4L2_TPG=m
-
-#
-# Media ancillary drivers (tuners, sensors, i2c, spi, frontends)
-#
-CONFIG_MEDIA_SUBDRV_AUTOSELECT=y
-CONFIG_MEDIA_ATTACH=y
-CONFIG_VIDEO_IR_I2C=m
-
-#
-# I2C Encoders, decoders, sensors and other helper chips
-#
-
-#
-# Audio decoders, processors and mixers
-#
-CONFIG_VIDEO_TVAUDIO=m
-CONFIG_VIDEO_TDA7432=m
-CONFIG_VIDEO_TDA9840=m
-CONFIG_VIDEO_TDA1997X=m
-CONFIG_VIDEO_TEA6415C=m
-CONFIG_VIDEO_TEA6420=m
-CONFIG_VIDEO_MSP3400=m
-CONFIG_VIDEO_CS3308=m
-CONFIG_VIDEO_CS5345=m
-CONFIG_VIDEO_CS53L32A=m
-CONFIG_VIDEO_TLV320AIC23B=m
-CONFIG_VIDEO_UDA1342=m
-CONFIG_VIDEO_WM8775=m
-CONFIG_VIDEO_WM8739=m
-CONFIG_VIDEO_VP27SMPX=m
-CONFIG_VIDEO_SONY_BTF_MPX=m
-
-#
-# RDS decoders
-#
-CONFIG_VIDEO_SAA6588=m
-
-#
-# Video decoders
-#
-CONFIG_VIDEO_ADV7180=m
-CONFIG_VIDEO_ADV7183=m
-CONFIG_VIDEO_ADV748X=m
-CONFIG_VIDEO_ADV7604=m
-CONFIG_VIDEO_ADV7604_CEC=y
-CONFIG_VIDEO_ADV7842=m
-CONFIG_VIDEO_ADV7842_CEC=y
-CONFIG_VIDEO_BT819=m
-CONFIG_VIDEO_BT856=m
-CONFIG_VIDEO_BT866=m
-CONFIG_VIDEO_KS0127=m
-CONFIG_VIDEO_ML86V7667=m
-CONFIG_VIDEO_SAA7110=m
-CONFIG_VIDEO_SAA711X=m
-CONFIG_VIDEO_TC358743=m
-CONFIG_VIDEO_TC358743_CEC=y
-CONFIG_VIDEO_TVP514X=m
-CONFIG_VIDEO_TVP5150=m
-CONFIG_VIDEO_TVP7002=m
-CONFIG_VIDEO_TW2804=m
-CONFIG_VIDEO_TW9903=m
-CONFIG_VIDEO_TW9906=m
-CONFIG_VIDEO_TW9910=m
-CONFIG_VIDEO_VPX3220=m
-
-#
-# Video and audio decoders
-#
-CONFIG_VIDEO_SAA717X=m
-CONFIG_VIDEO_CX25840=m
-
-#
-# Video encoders
-#
-CONFIG_VIDEO_SAA7127=m
-CONFIG_VIDEO_SAA7185=m
-CONFIG_VIDEO_ADV7170=m
-CONFIG_VIDEO_ADV7175=m
-CONFIG_VIDEO_ADV7343=m
-CONFIG_VIDEO_ADV7393=m
-CONFIG_VIDEO_AD9389B=m
-CONFIG_VIDEO_AK881X=m
-CONFIG_VIDEO_THS8200=m
-
-#
-# Camera sensor devices
-#
-CONFIG_VIDEO_APTINA_PLL=m
-CONFIG_VIDEO_SMIAPP_PLL=m
-CONFIG_VIDEO_IMX214=m
-CONFIG_VIDEO_IMX258=m
-CONFIG_VIDEO_IMX274=m
-CONFIG_VIDEO_IMX319=m
-CONFIG_VIDEO_IMX355=m
-CONFIG_VIDEO_OV2640=m
-CONFIG_VIDEO_OV2659=m
-CONFIG_VIDEO_OV2680=m
-CONFIG_VIDEO_OV2685=m
-CONFIG_VIDEO_OV5640=m
-CONFIG_VIDEO_OV5645=m
-CONFIG_VIDEO_OV5647=m
-CONFIG_VIDEO_OV6650=m
-CONFIG_VIDEO_OV5670=m
-# CONFIG_VIDEO_OV5675 is not set
-CONFIG_VIDEO_OV5695=m
-CONFIG_VIDEO_OV7251=m
-CONFIG_VIDEO_OV772X=m
-CONFIG_VIDEO_OV7640=m
-CONFIG_VIDEO_OV7670=m
-CONFIG_VIDEO_OV7740=m
-CONFIG_VIDEO_OV8856=m
-CONFIG_VIDEO_OV9640=m
-CONFIG_VIDEO_OV9650=m
-CONFIG_VIDEO_OV13858=m
-CONFIG_VIDEO_VS6624=m
-CONFIG_VIDEO_MT9M001=m
-CONFIG_VIDEO_MT9M032=m
-CONFIG_VIDEO_MT9M111=m
-CONFIG_VIDEO_MT9P031=m
-CONFIG_VIDEO_MT9T001=m
-CONFIG_VIDEO_MT9T112=m
-CONFIG_VIDEO_MT9V011=m
-CONFIG_VIDEO_MT9V032=m
-CONFIG_VIDEO_MT9V111=m
-CONFIG_VIDEO_SR030PC30=m
-CONFIG_VIDEO_NOON010PC30=m
-CONFIG_VIDEO_M5MOLS=m
-CONFIG_VIDEO_RJ54N1=m
-CONFIG_VIDEO_S5K6AA=m
-CONFIG_VIDEO_S5K6A3=m
-CONFIG_VIDEO_S5K4ECGX=m
-CONFIG_VIDEO_S5K5BAF=m
-CONFIG_VIDEO_SMIAPP=m
-CONFIG_VIDEO_ET8EK8=m
-CONFIG_VIDEO_S5C73M3=m
-
-#
-# Lens drivers
-#
-CONFIG_VIDEO_AD5820=m
-CONFIG_VIDEO_AK7375=m
-CONFIG_VIDEO_DW9714=m
-CONFIG_VIDEO_DW9807_VCM=m
-
-#
-# Flash devices
-#
-CONFIG_VIDEO_ADP1653=m
-CONFIG_VIDEO_LM3560=m
-CONFIG_VIDEO_LM3646=m
-
-#
-# Video improvement chips
-#
-CONFIG_VIDEO_UPD64031A=m
-CONFIG_VIDEO_UPD64083=m
-
-#
-# Audio/Video compression chips
-#
-CONFIG_VIDEO_SAA6752HS=m
-
-#
-# SDR tuner chips
-#
-CONFIG_SDR_MAX2175=m
-
-#
-# Miscellaneous helper chips
-#
-CONFIG_VIDEO_THS7303=m
-CONFIG_VIDEO_M52790=m
-CONFIG_VIDEO_I2C=m
-CONFIG_VIDEO_ST_MIPID02=m
-# end of I2C Encoders, decoders, sensors and other helper chips
-
-#
-# SPI helper chips
-#
-CONFIG_VIDEO_GS1662=m
-# end of SPI helper chips
-
-#
-# Media SPI Adapters
-#
-CONFIG_CXD2880_SPI_DRV=m
-# end of Media SPI Adapters
-
-CONFIG_MEDIA_TUNER=m
-
-#
-# Customize TV tuners
-#
-CONFIG_MEDIA_TUNER_SIMPLE=m
-CONFIG_MEDIA_TUNER_TDA18250=m
-CONFIG_MEDIA_TUNER_TDA8290=m
-CONFIG_MEDIA_TUNER_TDA827X=m
-CONFIG_MEDIA_TUNER_TDA18271=m
-CONFIG_MEDIA_TUNER_TDA9887=m
-CONFIG_MEDIA_TUNER_TEA5761=m
-CONFIG_MEDIA_TUNER_TEA5767=m
-CONFIG_MEDIA_TUNER_MSI001=m
-CONFIG_MEDIA_TUNER_MT20XX=m
-CONFIG_MEDIA_TUNER_MT2060=m
-CONFIG_MEDIA_TUNER_MT2063=m
-CONFIG_MEDIA_TUNER_MT2266=m
-CONFIG_MEDIA_TUNER_MT2131=m
-CONFIG_MEDIA_TUNER_QT1010=m
-CONFIG_MEDIA_TUNER_XC2028=m
-CONFIG_MEDIA_TUNER_XC5000=m
-CONFIG_MEDIA_TUNER_XC4000=m
-CONFIG_MEDIA_TUNER_MXL5005S=m
-CONFIG_MEDIA_TUNER_MXL5007T=m
-CONFIG_MEDIA_TUNER_MC44S803=m
-CONFIG_MEDIA_TUNER_MAX2165=m
-CONFIG_MEDIA_TUNER_TDA18218=m
-CONFIG_MEDIA_TUNER_FC0011=m
-CONFIG_MEDIA_TUNER_FC0012=m
-CONFIG_MEDIA_TUNER_FC0013=m
-CONFIG_MEDIA_TUNER_TDA18212=m
-CONFIG_MEDIA_TUNER_E4000=m
-CONFIG_MEDIA_TUNER_FC2580=m
-CONFIG_MEDIA_TUNER_M88RS6000T=m
-CONFIG_MEDIA_TUNER_TUA9001=m
-CONFIG_MEDIA_TUNER_SI2157=m
-CONFIG_MEDIA_TUNER_IT913X=m
-CONFIG_MEDIA_TUNER_R820T=m
-CONFIG_MEDIA_TUNER_MXL301RF=m
-CONFIG_MEDIA_TUNER_QM1D1C0042=m
-CONFIG_MEDIA_TUNER_QM1D1B0004=m
-# end of Customize TV tuners
-
-#
-# Customise DVB Frontends
-#
-
-#
-# Multistandard (satellite) frontends
-#
-CONFIG_DVB_STB0899=m
-CONFIG_DVB_STB6100=m
-CONFIG_DVB_STV090x=m
-CONFIG_DVB_STV0910=m
-CONFIG_DVB_STV6110x=m
-CONFIG_DVB_STV6111=m
-CONFIG_DVB_MXL5XX=m
-CONFIG_DVB_M88DS3103=m
-
-#
-# Multistandard (cable + terrestrial) frontends
-#
-CONFIG_DVB_DRXK=m
-CONFIG_DVB_TDA18271C2DD=m
-CONFIG_DVB_SI2165=m
-CONFIG_DVB_MN88472=m
-CONFIG_DVB_MN88473=m
-
-#
-# DVB-S (satellite) frontends
-#
-CONFIG_DVB_CX24110=m
-CONFIG_DVB_CX24123=m
-CONFIG_DVB_MT312=m
-CONFIG_DVB_ZL10036=m
-CONFIG_DVB_ZL10039=m
-CONFIG_DVB_S5H1420=m
-CONFIG_DVB_STV0288=m
-CONFIG_DVB_STB6000=m
-CONFIG_DVB_STV0299=m
-CONFIG_DVB_STV6110=m
-CONFIG_DVB_STV0900=m
-CONFIG_DVB_TDA8083=m
-CONFIG_DVB_TDA10086=m
-CONFIG_DVB_TDA8261=m
-CONFIG_DVB_VES1X93=m
-CONFIG_DVB_TUNER_ITD1000=m
-CONFIG_DVB_TUNER_CX24113=m
-CONFIG_DVB_TDA826X=m
-CONFIG_DVB_TUA6100=m
-CONFIG_DVB_CX24116=m
-CONFIG_DVB_CX24117=m
-CONFIG_DVB_CX24120=m
-CONFIG_DVB_SI21XX=m
-CONFIG_DVB_TS2020=m
-CONFIG_DVB_DS3000=m
-CONFIG_DVB_MB86A16=m
-CONFIG_DVB_TDA10071=m
-
-#
-# DVB-T (terrestrial) frontends
-#
-CONFIG_DVB_SP8870=m
-CONFIG_DVB_SP887X=m
-CONFIG_DVB_CX22700=m
-CONFIG_DVB_CX22702=m
-CONFIG_DVB_S5H1432=m
-CONFIG_DVB_DRXD=m
-CONFIG_DVB_L64781=m
-CONFIG_DVB_TDA1004X=m
-CONFIG_DVB_NXT6000=m
-CONFIG_DVB_MT352=m
-CONFIG_DVB_ZL10353=m
-CONFIG_DVB_DIB3000MB=m
-CONFIG_DVB_DIB3000MC=m
-CONFIG_DVB_DIB7000M=m
-CONFIG_DVB_DIB7000P=m
-CONFIG_DVB_DIB9000=m
-CONFIG_DVB_TDA10048=m
-CONFIG_DVB_AF9013=m
-CONFIG_DVB_EC100=m
-CONFIG_DVB_STV0367=m
-CONFIG_DVB_CXD2820R=m
-CONFIG_DVB_CXD2841ER=m
-CONFIG_DVB_RTL2830=m
-CONFIG_DVB_RTL2832=m
-CONFIG_DVB_RTL2832_SDR=m
-CONFIG_DVB_SI2168=m
-CONFIG_DVB_AS102_FE=m
-CONFIG_DVB_ZD1301_DEMOD=m
-CONFIG_DVB_GP8PSK_FE=m
-CONFIG_DVB_CXD2880=m
-
-#
-# DVB-C (cable) frontends
-#
-CONFIG_DVB_VES1820=m
-CONFIG_DVB_TDA10021=m
-CONFIG_DVB_TDA10023=m
-CONFIG_DVB_STV0297=m
-
-#
-# ATSC (North American/Korean Terrestrial/Cable DTV) frontends
-#
-CONFIG_DVB_NXT200X=m
-CONFIG_DVB_OR51211=m
-CONFIG_DVB_OR51132=m
-CONFIG_DVB_BCM3510=m
-CONFIG_DVB_LGDT330X=m
-CONFIG_DVB_LGDT3305=m
-CONFIG_DVB_LGDT3306A=m
-CONFIG_DVB_LG2160=m
-CONFIG_DVB_S5H1409=m
-CONFIG_DVB_AU8522=m
-CONFIG_DVB_AU8522_DTV=m
-CONFIG_DVB_AU8522_V4L=m
-CONFIG_DVB_S5H1411=m
-
-#
-# ISDB-T (terrestrial) frontends
-#
-CONFIG_DVB_S921=m
-CONFIG_DVB_DIB8000=m
-CONFIG_DVB_MB86A20S=m
-
-#
-# ISDB-S (satellite) & ISDB-T (terrestrial) frontends
-#
-CONFIG_DVB_TC90522=m
-CONFIG_DVB_MN88443X=m
-
-#
-# Digital terrestrial only tuners/PLL
-#
-CONFIG_DVB_PLL=m
-CONFIG_DVB_TUNER_DIB0070=m
-CONFIG_DVB_TUNER_DIB0090=m
-
-#
-# SEC control devices for DVB-S
-#
-CONFIG_DVB_DRX39XYJ=m
-CONFIG_DVB_LNBH25=m
-CONFIG_DVB_LNBH29=m
-CONFIG_DVB_LNBP21=m
-CONFIG_DVB_LNBP22=m
-CONFIG_DVB_ISL6405=m
-CONFIG_DVB_ISL6421=m
-CONFIG_DVB_ISL6423=m
-CONFIG_DVB_A8293=m
-CONFIG_DVB_LGS8GL5=m
-CONFIG_DVB_LGS8GXX=m
-CONFIG_DVB_ATBM8830=m
-CONFIG_DVB_TDA665x=m
-CONFIG_DVB_IX2505V=m
-CONFIG_DVB_M88RS2000=m
-CONFIG_DVB_AF9033=m
-CONFIG_DVB_HORUS3A=m
-CONFIG_DVB_ASCOT2E=m
-CONFIG_DVB_HELENE=m
-
-#
-# Common Interface (EN50221) controller drivers
-#
-CONFIG_DVB_CXD2099=m
-CONFIG_DVB_SP2=m
-
-#
-# Tools to develop new frontends
-#
-CONFIG_DVB_DUMMY_FE=m
-# end of Customise DVB Frontends
-
-#
-# Graphics support
-#
-CONFIG_AGP=m
-CONFIG_AGP_AMD64=m
-CONFIG_AGP_INTEL=m
-CONFIG_AGP_SIS=m
-CONFIG_AGP_VIA=m
-CONFIG_INTEL_GTT=m
-CONFIG_VGA_ARB=y
-CONFIG_VGA_ARB_MAX_GPUS=10
-CONFIG_VGA_SWITCHEROO=y
-CONFIG_DRM=m
-CONFIG_DRM_MIPI_DBI=m
-CONFIG_DRM_MIPI_DSI=y
-CONFIG_DRM_DP_AUX_CHARDEV=y
-# CONFIG_DRM_DEBUG_SELFTEST is not set
-CONFIG_DRM_KMS_HELPER=m
-CONFIG_DRM_KMS_FB_HELPER=y
-CONFIG_DRM_FBDEV_EMULATION=y
-CONFIG_DRM_FBDEV_OVERALLOC=100
-# CONFIG_DRM_FBDEV_LEAK_PHYS_SMEM is not set
-CONFIG_DRM_LOAD_EDID_FIRMWARE=y
-CONFIG_DRM_DP_CEC=y
-CONFIG_DRM_TTM=m
-CONFIG_DRM_VRAM_HELPER=m
-CONFIG_DRM_GEM_CMA_HELPER=y
-CONFIG_DRM_KMS_CMA_HELPER=y
-CONFIG_DRM_GEM_SHMEM_HELPER=y
-CONFIG_DRM_SCHED=m
-
-#
-# I2C encoder or helper chips
-#
-CONFIG_DRM_I2C_CH7006=m
-CONFIG_DRM_I2C_SIL164=m
-CONFIG_DRM_I2C_NXP_TDA998X=m
-CONFIG_DRM_I2C_NXP_TDA9950=m
-# end of I2C encoder or helper chips
-
-#
-# ARM devices
-#
-CONFIG_DRM_KOMEDA=m
-# end of ARM devices
-
-CONFIG_DRM_RADEON=m
-CONFIG_DRM_RADEON_USERPTR=y
-CONFIG_DRM_AMDGPU=m
-CONFIG_DRM_AMDGPU_SI=y
-CONFIG_DRM_AMDGPU_CIK=y
-CONFIG_DRM_AMDGPU_USERPTR=y
-# CONFIG_DRM_AMDGPU_GART_DEBUGFS is not set
-
-#
-# ACP (Audio CoProcessor) Configuration
-#
-CONFIG_DRM_AMD_ACP=y
-# end of ACP (Audio CoProcessor) Configuration
-
-#
-# Display Engine Configuration
-#
-CONFIG_DRM_AMD_DC=y
-CONFIG_DRM_AMD_DC_DCN1_0=y
-CONFIG_DRM_AMD_DC_DCN2_0=y
-# CONFIG_DRM_AMD_DC_DCN2_1 is not set
-CONFIG_DRM_AMD_DC_DSC_SUPPORT=y
-# CONFIG_DEBUG_KERNEL_DC is not set
-# end of Display Engine Configuration
-
-CONFIG_HSA_AMD=y
-CONFIG_DRM_NOUVEAU=m
-# CONFIG_NOUVEAU_LEGACY_CTX_SUPPORT is not set
-CONFIG_NOUVEAU_DEBUG=5
-CONFIG_NOUVEAU_DEBUG_DEFAULT=3
-# CONFIG_NOUVEAU_DEBUG_MMU is not set
-CONFIG_DRM_NOUVEAU_BACKLIGHT=y
-CONFIG_DRM_NOUVEAU_SVM=y
-CONFIG_DRM_I915=m
-CONFIG_DRM_I915_ALPHA_SUPPORT=y
-CONFIG_DRM_I915_FORCE_PROBE="*"
-CONFIG_DRM_I915_CAPTURE_ERROR=y
-CONFIG_DRM_I915_COMPRESS_ERROR=y
-CONFIG_DRM_I915_USERPTR=y
-CONFIG_DRM_I915_GVT=y
-CONFIG_DRM_I915_GVT_KVMGT=m
-
-#
-# drm/i915 Debugging
-#
-# CONFIG_DRM_I915_WERROR is not set
-# CONFIG_DRM_I915_DEBUG is not set
-# CONFIG_DRM_I915_DEBUG_MMIO is not set
-# CONFIG_DRM_I915_SW_FENCE_DEBUG_OBJECTS is not set
-# CONFIG_DRM_I915_SW_FENCE_CHECK_DAG is not set
-# CONFIG_DRM_I915_DEBUG_GUC is not set
-# CONFIG_DRM_I915_SELFTEST is not set
-# CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS is not set
-# CONFIG_DRM_I915_DEBUG_VBLANK_EVADE is not set
-# CONFIG_DRM_I915_DEBUG_RUNTIME_PM is not set
-# end of drm/i915 Debugging
-
-#
-# drm/i915 Profile Guided Optimisation
-#
-CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND=250
-CONFIG_DRM_I915_SPIN_REQUEST=5
-# end of drm/i915 Profile Guided Optimisation
-
-CONFIG_DRM_VGEM=m
-CONFIG_DRM_VKMS=m
-CONFIG_DRM_VMWGFX=m
-CONFIG_DRM_VMWGFX_FBCON=y
-CONFIG_DRM_GMA500=m
-CONFIG_DRM_GMA600=y
-CONFIG_DRM_GMA3600=y
-CONFIG_DRM_UDL=m
-CONFIG_DRM_AST=m
-CONFIG_DRM_MGAG200=m
-CONFIG_DRM_CIRRUS_QEMU=m
-CONFIG_DRM_RCAR_DW_HDMI=m
-CONFIG_DRM_RCAR_LVDS=m
-CONFIG_DRM_QXL=m
-CONFIG_DRM_BOCHS=m
-CONFIG_DRM_VIRTIO_GPU=m
-CONFIG_DRM_PANEL=y
-
-#
-# Display Panels
-#
-CONFIG_DRM_PANEL_ARM_VERSATILE=m
-CONFIG_DRM_PANEL_LVDS=m
-CONFIG_DRM_PANEL_SIMPLE=m
-CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D=m
-CONFIG_DRM_PANEL_ILITEK_IL9322=m
-CONFIG_DRM_PANEL_ILITEK_ILI9881C=m
-CONFIG_DRM_PANEL_INNOLUX_P079ZCA=m
-CONFIG_DRM_PANEL_JDI_LT070ME05000=m
-CONFIG_DRM_PANEL_KINGDISPLAY_KD097D04=m
-CONFIG_DRM_PANEL_SAMSUNG_LD9040=m
-# CONFIG_DRM_PANEL_LG_LB035Q02 is not set
-CONFIG_DRM_PANEL_LG_LG4573=m
-# CONFIG_DRM_PANEL_NEC_NL8048HL11 is not set
-# CONFIG_DRM_PANEL_NOVATEK_NT39016 is not set
-CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO=m
-CONFIG_DRM_PANEL_ORISETECH_OTM8009A=m
-CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS=m
-CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00=m
-CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN=m
-# CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set
-CONFIG_DRM_PANEL_RAYDIUM_RM68200=m
-CONFIG_DRM_PANEL_ROCKTECH_JH057N00900=m
-CONFIG_DRM_PANEL_RONBO_RB070D30=m
-CONFIG_DRM_PANEL_SAMSUNG_S6D16D0=m
-CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2=m
-CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03=m
-CONFIG_DRM_PANEL_SAMSUNG_S6E63M0=m
-CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0=m
-CONFIG_DRM_PANEL_SEIKO_43WVF1G=m
-CONFIG_DRM_PANEL_SHARP_LQ101R1SX01=m
-# CONFIG_DRM_PANEL_SHARP_LS037V7DW01 is not set
-CONFIG_DRM_PANEL_SHARP_LS043T1LE01=m
-CONFIG_DRM_PANEL_SITRONIX_ST7701=m
-CONFIG_DRM_PANEL_SITRONIX_ST7789V=m
-# CONFIG_DRM_PANEL_SONY_ACX565AKM is not set
-# CONFIG_DRM_PANEL_TPO_TD028TTEC1 is not set
-# CONFIG_DRM_PANEL_TPO_TD043MTEA1 is not set
-CONFIG_DRM_PANEL_TPO_TPG110=m
-CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA=m
-# end of Display Panels
-
-CONFIG_DRM_BRIDGE=y
-CONFIG_DRM_PANEL_BRIDGE=y
-
-#
-# Display Interface Bridges
-#
-CONFIG_DRM_ANALOGIX_ANX78XX=m
-CONFIG_DRM_CDNS_DSI=m
-CONFIG_DRM_DUMB_VGA_DAC=m
-CONFIG_DRM_LVDS_ENCODER=m
-CONFIG_DRM_MEGACHIPS_STDPXXXX_GE_B850V3_FW=m
-CONFIG_DRM_NXP_PTN3460=m
-CONFIG_DRM_PARADE_PS8622=m
-CONFIG_DRM_SIL_SII8620=m
-CONFIG_DRM_SII902X=m
-CONFIG_DRM_SII9234=m
-CONFIG_DRM_THINE_THC63LVD1024=m
-CONFIG_DRM_TOSHIBA_TC358764=m
-CONFIG_DRM_TOSHIBA_TC358767=m
-CONFIG_DRM_TI_TFP410=m
-CONFIG_DRM_TI_SN65DSI86=m
-CONFIG_DRM_I2C_ADV7511=m
-CONFIG_DRM_I2C_ADV7511_AUDIO=y
-CONFIG_DRM_I2C_ADV7533=y
-CONFIG_DRM_I2C_ADV7511_CEC=y
-CONFIG_DRM_DW_HDMI=m
-CONFIG_DRM_DW_HDMI_AHB_AUDIO=m
-CONFIG_DRM_DW_HDMI_I2S_AUDIO=m
-CONFIG_DRM_DW_HDMI_CEC=m
-# end of Display Interface Bridges
-
-# CONFIG_DRM_ETNAVIV is not set
-CONFIG_DRM_ARCPGU=m
-CONFIG_DRM_MXS=y
-CONFIG_DRM_MXSFB=m
-# CONFIG_DRM_GM12U320 is not set
-CONFIG_TINYDRM_HX8357D=m
-CONFIG_TINYDRM_ILI9225=m
-CONFIG_TINYDRM_ILI9341=m
-CONFIG_TINYDRM_MI0283QT=m
-CONFIG_TINYDRM_REPAPER=m
-CONFIG_TINYDRM_ST7586=m
-CONFIG_TINYDRM_ST7735R=m
-CONFIG_DRM_XEN=y
-CONFIG_DRM_XEN_FRONTEND=m
-CONFIG_DRM_VBOXVIDEO=m
-# CONFIG_DRM_LEGACY is not set
-CONFIG_DRM_PANEL_ORIENTATION_QUIRKS=y
-
-#
-# Frame buffer Devices
-#
-CONFIG_FB_CMDLINE=y
-CONFIG_FB_NOTIFY=y
-CONFIG_FB=y
-CONFIG_FIRMWARE_EDID=y
-CONFIG_FB_BOOT_VESA_SUPPORT=y
-CONFIG_FB_CFB_FILLRECT=y
-CONFIG_FB_CFB_COPYAREA=y
-CONFIG_FB_CFB_IMAGEBLIT=y
-CONFIG_FB_SYS_FILLRECT=m
-CONFIG_FB_SYS_COPYAREA=m
-CONFIG_FB_SYS_IMAGEBLIT=m
-# CONFIG_FB_FOREIGN_ENDIAN is not set
-CONFIG_FB_SYS_FOPS=m
-CONFIG_FB_DEFERRED_IO=y
-CONFIG_FB_BACKLIGHT=m
-CONFIG_FB_MODE_HELPERS=y
-CONFIG_FB_TILEBLITTING=y
-
-#
-# Frame buffer hardware drivers
-#
-# CONFIG_FB_CIRRUS is not set
-# CONFIG_FB_PM2 is not set
-# CONFIG_FB_CYBER2000 is not set
-# CONFIG_FB_ARC is not set
-# CONFIG_FB_ASILIANT is not set
-# CONFIG_FB_IMSTT is not set
-# CONFIG_FB_VGA16 is not set
-# CONFIG_FB_UVESA is not set
-CONFIG_FB_VESA=y
-CONFIG_FB_EFI=y
-# CONFIG_FB_N411 is not set
-# CONFIG_FB_HGA is not set
-# CONFIG_FB_OPENCORES is not set
-# CONFIG_FB_S1D13XXX is not set
-# CONFIG_FB_NVIDIA is not set
-# CONFIG_FB_RIVA is not set
-# CONFIG_FB_I740 is not set
-# CONFIG_FB_LE80578 is not set
-# CONFIG_FB_INTEL is not set
-# CONFIG_FB_MATROX is not set
-# CONFIG_FB_RADEON is not set
-# CONFIG_FB_ATY128 is not set
-# CONFIG_FB_ATY is not set
-# CONFIG_FB_S3 is not set
-# CONFIG_FB_SAVAGE is not set
-# CONFIG_FB_SIS is not set
-# CONFIG_FB_VIA is not set
-# CONFIG_FB_NEOMAGIC is not set
-# CONFIG_FB_KYRO is not set
-# CONFIG_FB_3DFX is not set
-# CONFIG_FB_VOODOO1 is not set
-# CONFIG_FB_VT8623 is not set
-# CONFIG_FB_TRIDENT is not set
-# CONFIG_FB_ARK is not set
-# CONFIG_FB_PM3 is not set
-# CONFIG_FB_CARMINE is not set
-# CONFIG_FB_SM501 is not set
-# CONFIG_FB_SMSCUFX is not set
-# CONFIG_FB_UDL is not set
-# CONFIG_FB_IBM_GXT4500 is not set
-# CONFIG_FB_VIRTUAL is not set
-CONFIG_XEN_FBDEV_FRONTEND=m
-# CONFIG_FB_METRONOME is not set
-# CONFIG_FB_MB862XX is not set
-CONFIG_FB_HYPERV=m
-CONFIG_FB_SIMPLE=y
-# CONFIG_FB_SSD1307 is not set
-# CONFIG_FB_SM712 is not set
-# end of Frame buffer Devices
-
-#
-# Backlight & LCD device support
-#
-CONFIG_LCD_CLASS_DEVICE=m
-CONFIG_LCD_L4F00242T03=m
-CONFIG_LCD_LMS283GF05=m
-CONFIG_LCD_LTV350QV=m
-CONFIG_LCD_ILI922X=m
-CONFIG_LCD_ILI9320=m
-CONFIG_LCD_TDO24M=m
-CONFIG_LCD_VGG2432A4=m
-CONFIG_LCD_PLATFORM=m
-CONFIG_LCD_AMS369FG06=m
-CONFIG_LCD_LMS501KF03=m
-CONFIG_LCD_HX8357=m
-CONFIG_LCD_OTM3225A=m
-CONFIG_BACKLIGHT_CLASS_DEVICE=y
-CONFIG_BACKLIGHT_GENERIC=m
-CONFIG_BACKLIGHT_LM3533=m
-CONFIG_BACKLIGHT_PWM=m
-CONFIG_BACKLIGHT_DA903X=m
-CONFIG_BACKLIGHT_DA9052=m
-CONFIG_BACKLIGHT_MAX8925=m
-CONFIG_BACKLIGHT_APPLE=m
-CONFIG_BACKLIGHT_PM8941_WLED=m
-CONFIG_BACKLIGHT_SAHARA=m
-CONFIG_BACKLIGHT_WM831X=m
-CONFIG_BACKLIGHT_ADP5520=m
-CONFIG_BACKLIGHT_ADP8860=m
-CONFIG_BACKLIGHT_ADP8870=m
-CONFIG_BACKLIGHT_88PM860X=m
-CONFIG_BACKLIGHT_PCF50633=m
-CONFIG_BACKLIGHT_AAT2870=m
-CONFIG_BACKLIGHT_LM3630A=m
-CONFIG_BACKLIGHT_LM3639=m
-CONFIG_BACKLIGHT_LP855X=m
-CONFIG_BACKLIGHT_LP8788=m
-CONFIG_BACKLIGHT_PANDORA=m
-CONFIG_BACKLIGHT_SKY81452=m
-CONFIG_BACKLIGHT_TPS65217=m
-CONFIG_BACKLIGHT_AS3711=m
-CONFIG_BACKLIGHT_GPIO=m
-CONFIG_BACKLIGHT_LV5207LP=m
-CONFIG_BACKLIGHT_BD6107=m
-CONFIG_BACKLIGHT_ARCXCNN=m
-CONFIG_BACKLIGHT_RAVE_SP=m
-# end of Backlight & LCD device support
-
-CONFIG_VIDEOMODE_HELPERS=y
-CONFIG_HDMI=y
-
-#
-# Console display driver support
-#
-CONFIG_VGA_CONSOLE=y
-CONFIG_VGACON_SOFT_SCROLLBACK=y
-CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=64
-# CONFIG_VGACON_SOFT_SCROLLBACK_PERSISTENT_ENABLE_BY_DEFAULT is not set
-CONFIG_DUMMY_CONSOLE=y
-CONFIG_DUMMY_CONSOLE_COLUMNS=80
-CONFIG_DUMMY_CONSOLE_ROWS=25
-CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
-CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y
-CONFIG_FRAMEBUFFER_CONSOLE_DEFERRED_TAKEOVER=y
-# end of Console display driver support
-
-# CONFIG_LOGO is not set
-# end of Graphics support
-
-CONFIG_SOUND=m
-CONFIG_SOUND_OSS_CORE=y
-# CONFIG_SOUND_OSS_CORE_PRECLAIM is not set
-CONFIG_SND=m
-CONFIG_SND_TIMER=m
-CONFIG_SND_PCM=m
-CONFIG_SND_PCM_ELD=y
-CONFIG_SND_PCM_IEC958=y
-CONFIG_SND_DMAENGINE_PCM=m
-CONFIG_SND_HWDEP=m
-CONFIG_SND_SEQ_DEVICE=m
-CONFIG_SND_RAWMIDI=m
-CONFIG_SND_COMPRESS_OFFLOAD=m
-CONFIG_SND_JACK=y
-CONFIG_SND_JACK_INPUT_DEV=y
-CONFIG_SND_OSSEMUL=y
-CONFIG_SND_MIXER_OSS=m
-CONFIG_SND_PCM_OSS=m
-CONFIG_SND_PCM_OSS_PLUGINS=y
-CONFIG_SND_PCM_TIMER=y
-CONFIG_SND_HRTIMER=m
-CONFIG_SND_DYNAMIC_MINORS=y
-CONFIG_SND_MAX_CARDS=32
-# CONFIG_SND_SUPPORT_OLD_API is not set
-CONFIG_SND_PROC_FS=y
-CONFIG_SND_VERBOSE_PROCFS=y
-CONFIG_SND_VERBOSE_PRINTK=y
-CONFIG_SND_DEBUG=y
-# CONFIG_SND_DEBUG_VERBOSE is not set
-# CONFIG_SND_PCM_XRUN_DEBUG is not set
-CONFIG_SND_VMASTER=y
-CONFIG_SND_DMA_SGBUF=y
-CONFIG_SND_SEQUENCER=m
-CONFIG_SND_SEQ_DUMMY=m
-CONFIG_SND_SEQUENCER_OSS=m
-CONFIG_SND_SEQ_HRTIMER_DEFAULT=y
-CONFIG_SND_SEQ_MIDI_EVENT=m
-CONFIG_SND_SEQ_MIDI=m
-CONFIG_SND_SEQ_MIDI_EMUL=m
-CONFIG_SND_SEQ_VIRMIDI=m
-CONFIG_SND_MPU401_UART=m
-CONFIG_SND_OPL3_LIB=m
-CONFIG_SND_OPL3_LIB_SEQ=m
-CONFIG_SND_VX_LIB=m
-CONFIG_SND_AC97_CODEC=m
-CONFIG_SND_DRIVERS=y
-# CONFIG_SND_PCSP is not set
-CONFIG_SND_DUMMY=m
-CONFIG_SND_ALOOP=m
-CONFIG_SND_VIRMIDI=m
-CONFIG_SND_MTPAV=m
-CONFIG_SND_MTS64=m
-CONFIG_SND_SERIAL_U16550=m
-CONFIG_SND_MPU401=m
-CONFIG_SND_PORTMAN2X4=m
-CONFIG_SND_AC97_POWER_SAVE=y
-CONFIG_SND_AC97_POWER_SAVE_DEFAULT=0
-CONFIG_SND_SB_COMMON=m
-CONFIG_SND_PCI=y
-CONFIG_SND_AD1889=m
-CONFIG_SND_ALS300=m
-CONFIG_SND_ALS4000=m
-CONFIG_SND_ALI5451=m
-CONFIG_SND_ASIHPI=m
-CONFIG_SND_ATIIXP=m
-CONFIG_SND_ATIIXP_MODEM=m
-CONFIG_SND_AU8810=m
-CONFIG_SND_AU8820=m
-CONFIG_SND_AU8830=m
-CONFIG_SND_AW2=m
-CONFIG_SND_AZT3328=m
-CONFIG_SND_BT87X=m
-# CONFIG_SND_BT87X_OVERCLOCK is not set
-CONFIG_SND_CA0106=m
-CONFIG_SND_CMIPCI=m
-CONFIG_SND_OXYGEN_LIB=m
-CONFIG_SND_OXYGEN=m
-CONFIG_SND_CS4281=m
-CONFIG_SND_CS46XX=m
-CONFIG_SND_CS46XX_NEW_DSP=y
-CONFIG_SND_CTXFI=m
-CONFIG_SND_DARLA20=m
-CONFIG_SND_GINA20=m
-CONFIG_SND_LAYLA20=m
-CONFIG_SND_DARLA24=m
-CONFIG_SND_GINA24=m
-CONFIG_SND_LAYLA24=m
-CONFIG_SND_MONA=m
-CONFIG_SND_MIA=m
-CONFIG_SND_ECHO3G=m
-CONFIG_SND_INDIGO=m
-CONFIG_SND_INDIGOIO=m
-CONFIG_SND_INDIGODJ=m
-CONFIG_SND_INDIGOIOX=m
-CONFIG_SND_INDIGODJX=m
-CONFIG_SND_EMU10K1=m
-CONFIG_SND_EMU10K1_SEQ=m
-CONFIG_SND_EMU10K1X=m
-CONFIG_SND_ENS1370=m
-CONFIG_SND_ENS1371=m
-CONFIG_SND_ES1938=m
-CONFIG_SND_ES1968=m
-CONFIG_SND_ES1968_INPUT=y
-CONFIG_SND_ES1968_RADIO=y
-CONFIG_SND_FM801=m
-CONFIG_SND_FM801_TEA575X_BOOL=y
-CONFIG_SND_HDSP=m
-CONFIG_SND_HDSPM=m
-CONFIG_SND_ICE1712=m
-CONFIG_SND_ICE1724=m
-CONFIG_SND_INTEL8X0=m
-CONFIG_SND_INTEL8X0M=m
-CONFIG_SND_KORG1212=m
-CONFIG_SND_LOLA=m
-CONFIG_SND_LX6464ES=m
-CONFIG_SND_MAESTRO3=m
-CONFIG_SND_MAESTRO3_INPUT=y
-CONFIG_SND_MIXART=m
-CONFIG_SND_NM256=m
-CONFIG_SND_PCXHR=m
-CONFIG_SND_RIPTIDE=m
-CONFIG_SND_RME32=m
-CONFIG_SND_RME96=m
-CONFIG_SND_RME9652=m
-CONFIG_SND_SONICVIBES=m
-CONFIG_SND_TRIDENT=m
-CONFIG_SND_VIA82XX=m
-CONFIG_SND_VIA82XX_MODEM=m
-CONFIG_SND_VIRTUOSO=m
-CONFIG_SND_VX222=m
-CONFIG_SND_YMFPCI=m
-
-#
-# HD-Audio
-#
-CONFIG_SND_HDA=m
-CONFIG_SND_HDA_INTEL=m
-# CONFIG_SND_HDA_INTEL_DETECT_DMIC is not set
-CONFIG_SND_HDA_HWDEP=y
-CONFIG_SND_HDA_RECONFIG=y
-CONFIG_SND_HDA_INPUT_BEEP=y
-CONFIG_SND_HDA_INPUT_BEEP_MODE=1
-CONFIG_SND_HDA_PATCH_LOADER=y
-CONFIG_SND_HDA_CODEC_REALTEK=m
-CONFIG_SND_HDA_CODEC_ANALOG=m
-CONFIG_SND_HDA_CODEC_SIGMATEL=m
-CONFIG_SND_HDA_CODEC_VIA=m
-CONFIG_SND_HDA_CODEC_HDMI=m
-CONFIG_SND_HDA_CODEC_CIRRUS=m
-CONFIG_SND_HDA_CODEC_CONEXANT=m
-CONFIG_SND_HDA_CODEC_CA0110=m
-CONFIG_SND_HDA_CODEC_CA0132=m
-CONFIG_SND_HDA_CODEC_CA0132_DSP=y
-CONFIG_SND_HDA_CODEC_CMEDIA=m
-CONFIG_SND_HDA_CODEC_SI3054=m
-CONFIG_SND_HDA_GENERIC=m
-CONFIG_SND_HDA_POWER_SAVE_DEFAULT=0
-# end of HD-Audio
-
-CONFIG_SND_HDA_CORE=m
-CONFIG_SND_HDA_DSP_LOADER=y
-CONFIG_SND_HDA_COMPONENT=y
-CONFIG_SND_HDA_I915=y
-CONFIG_SND_HDA_EXT_CORE=m
-CONFIG_SND_HDA_PREALLOC_SIZE=4096
-CONFIG_SND_INTEL_NHLT=m
-CONFIG_SND_SPI=y
-CONFIG_SND_USB=y
-CONFIG_SND_USB_AUDIO=m
-CONFIG_SND_USB_AUDIO_USE_MEDIA_CONTROLLER=y
-CONFIG_SND_USB_UA101=m
-CONFIG_SND_USB_USX2Y=m
-CONFIG_SND_USB_CAIAQ=m
-CONFIG_SND_USB_CAIAQ_INPUT=y
-CONFIG_SND_USB_US122L=m
-CONFIG_SND_USB_6FIRE=m
-CONFIG_SND_USB_HIFACE=m
-CONFIG_SND_BCD2000=m
-CONFIG_SND_USB_LINE6=m
-CONFIG_SND_USB_POD=m
-CONFIG_SND_USB_PODHD=m
-CONFIG_SND_USB_TONEPORT=m
-CONFIG_SND_USB_VARIAX=m
-CONFIG_SND_FIREWIRE=y
-CONFIG_SND_FIREWIRE_LIB=m
-CONFIG_SND_DICE=m
-CONFIG_SND_OXFW=m
-CONFIG_SND_ISIGHT=m
-CONFIG_SND_FIREWORKS=m
-CONFIG_SND_BEBOB=m
-CONFIG_SND_FIREWIRE_DIGI00X=m
-CONFIG_SND_FIREWIRE_TASCAM=m
-CONFIG_SND_FIREWIRE_MOTU=m
-CONFIG_SND_FIREFACE=m
-CONFIG_SND_PCMCIA=y
-CONFIG_SND_VXPOCKET=m
-CONFIG_SND_PDAUDIOCF=m
-CONFIG_SND_SOC=m
-CONFIG_SND_SOC_AC97_BUS=y
-CONFIG_SND_SOC_GENERIC_DMAENGINE_PCM=y
-CONFIG_SND_SOC_COMPRESS=y
-CONFIG_SND_SOC_TOPOLOGY=y
-CONFIG_SND_SOC_ACPI=m
-CONFIG_SND_SOC_AMD_ACP=m
-CONFIG_SND_SOC_AMD_CZ_DA7219MX98357_MACH=m
-CONFIG_SND_SOC_AMD_CZ_RT5645_MACH=m
-CONFIG_SND_SOC_AMD_ACP3x=m
-CONFIG_SND_ATMEL_SOC=m
-CONFIG_SND_SOC_MIKROE_PROTO=m
-CONFIG_SND_DESIGNWARE_I2S=m
-CONFIG_SND_DESIGNWARE_PCM=y
-
-#
-# SoC Audio for Freescale CPUs
-#
-
-#
-# Common SoC Audio options for Freescale CPUs:
-#
-# CONFIG_SND_SOC_FSL_ASRC is not set
-# CONFIG_SND_SOC_FSL_SAI is not set
-# CONFIG_SND_SOC_FSL_AUDMIX is not set
-# CONFIG_SND_SOC_FSL_SSI is not set
-# CONFIG_SND_SOC_FSL_SPDIF is not set
-# CONFIG_SND_SOC_FSL_ESAI is not set
-# CONFIG_SND_SOC_FSL_MICFIL is not set
-# CONFIG_SND_SOC_IMX_AUDMUX is not set
-# end of SoC Audio for Freescale CPUs
-
-CONFIG_SND_I2S_HI6210_I2S=m
-CONFIG_SND_SOC_IMG=y
-CONFIG_SND_SOC_IMG_I2S_IN=m
-CONFIG_SND_SOC_IMG_I2S_OUT=m
-CONFIG_SND_SOC_IMG_PARALLEL_OUT=m
-CONFIG_SND_SOC_IMG_SPDIF_IN=m
-CONFIG_SND_SOC_IMG_SPDIF_OUT=m
-CONFIG_SND_SOC_IMG_PISTACHIO_INTERNAL_DAC=m
-CONFIG_SND_SOC_INTEL_SST_TOPLEVEL=y
-CONFIG_SND_SST_IPC=m
-CONFIG_SND_SST_IPC_PCI=m
-CONFIG_SND_SST_IPC_ACPI=m
-CONFIG_SND_SOC_INTEL_SST_ACPI=m
-CONFIG_SND_SOC_INTEL_SST=m
-CONFIG_SND_SOC_INTEL_SST_FIRMWARE=m
-CONFIG_SND_SOC_INTEL_HASWELL=m
-CONFIG_SND_SST_ATOM_HIFI2_PLATFORM=m
-CONFIG_SND_SST_ATOM_HIFI2_PLATFORM_PCI=m
-CONFIG_SND_SST_ATOM_HIFI2_PLATFORM_ACPI=m
-CONFIG_SND_SOC_INTEL_SKYLAKE=m
-CONFIG_SND_SOC_INTEL_SKL=m
-CONFIG_SND_SOC_INTEL_APL=m
-CONFIG_SND_SOC_INTEL_KBL=m
-CONFIG_SND_SOC_INTEL_GLK=m
-CONFIG_SND_SOC_INTEL_CNL=m
-CONFIG_SND_SOC_INTEL_CFL=m
-CONFIG_SND_SOC_INTEL_CML_H=m
-CONFIG_SND_SOC_INTEL_CML_LP=m
-CONFIG_SND_SOC_INTEL_SKYLAKE_FAMILY=m
-CONFIG_SND_SOC_INTEL_SKYLAKE_SSP_CLK=m
-# CONFIG_SND_SOC_INTEL_SKYLAKE_HDAUDIO_CODEC is not set
-CONFIG_SND_SOC_INTEL_SKYLAKE_COMMON=m
-CONFIG_SND_SOC_ACPI_INTEL_MATCH=m
-CONFIG_SND_SOC_INTEL_MACH=y
-CONFIG_SND_SOC_INTEL_HASWELL_MACH=m
-CONFIG_SND_SOC_INTEL_BDW_RT5677_MACH=m
-CONFIG_SND_SOC_INTEL_BROADWELL_MACH=m
-CONFIG_SND_SOC_INTEL_BYTCR_RT5640_MACH=m
-CONFIG_SND_SOC_INTEL_BYTCR_RT5651_MACH=m
-CONFIG_SND_SOC_INTEL_CHT_BSW_RT5672_MACH=m
-CONFIG_SND_SOC_INTEL_CHT_BSW_RT5645_MACH=m
-CONFIG_SND_SOC_INTEL_CHT_BSW_MAX98090_TI_MACH=m
-CONFIG_SND_SOC_INTEL_CHT_BSW_NAU8824_MACH=m
-CONFIG_SND_SOC_INTEL_BYT_CHT_CX2072X_MACH=m
-CONFIG_SND_SOC_INTEL_BYT_CHT_DA7213_MACH=m
-CONFIG_SND_SOC_INTEL_BYT_CHT_ES8316_MACH=m
-# CONFIG_SND_SOC_INTEL_BYT_CHT_NOCODEC_MACH is not set
-CONFIG_SND_SOC_INTEL_SKL_RT286_MACH=m
-CONFIG_SND_SOC_INTEL_SKL_NAU88L25_SSM4567_MACH=m
-CONFIG_SND_SOC_INTEL_SKL_NAU88L25_MAX98357A_MACH=m
-CONFIG_SND_SOC_INTEL_DA7219_MAX98357A_GENERIC=m
-CONFIG_SND_SOC_INTEL_BXT_DA7219_MAX98357A_MACH=m
-CONFIG_SND_SOC_INTEL_BXT_RT298_MACH=m
-CONFIG_SND_SOC_INTEL_KBL_RT5663_MAX98927_MACH=m
-CONFIG_SND_SOC_INTEL_KBL_RT5663_RT5514_MAX98927_MACH=m
-CONFIG_SND_SOC_INTEL_KBL_DA7219_MAX98357A_MACH=m
-CONFIG_SND_SOC_INTEL_KBL_DA7219_MAX98927_MACH=m
-CONFIG_SND_SOC_INTEL_KBL_RT5660_MACH=m
-CONFIG_SND_SOC_INTEL_GLK_RT5682_MAX98357A_MACH=m
-CONFIG_SND_SOC_MTK_BTCVSD=m
-# CONFIG_SND_SOC_SOF_TOPLEVEL is not set
-
-#
-# STMicroelectronics STM32 SOC audio support
-#
-# end of STMicroelectronics STM32 SOC audio support
-
-CONFIG_SND_SOC_XILINX_I2S=m
-CONFIG_SND_SOC_XILINX_AUDIO_FORMATTER=m
-CONFIG_SND_SOC_XILINX_SPDIF=m
-CONFIG_SND_SOC_XTFPGA_I2S=m
-CONFIG_ZX_TDM=m
-CONFIG_SND_SOC_I2C_AND_SPI=m
-
-#
-# CODEC drivers
-#
-CONFIG_SND_SOC_AC97_CODEC=m
-CONFIG_SND_SOC_ADAU_UTILS=m
-CONFIG_SND_SOC_ADAU1701=m
-CONFIG_SND_SOC_ADAU17X1=m
-CONFIG_SND_SOC_ADAU1761=m
-CONFIG_SND_SOC_ADAU1761_I2C=m
-CONFIG_SND_SOC_ADAU1761_SPI=m
-CONFIG_SND_SOC_ADAU7002=m
-CONFIG_SND_SOC_AK4104=m
-CONFIG_SND_SOC_AK4118=m
-CONFIG_SND_SOC_AK4458=m
-CONFIG_SND_SOC_AK4554=m
-CONFIG_SND_SOC_AK4613=m
-CONFIG_SND_SOC_AK4642=m
-CONFIG_SND_SOC_AK5386=m
-CONFIG_SND_SOC_AK5558=m
-CONFIG_SND_SOC_ALC5623=m
-CONFIG_SND_SOC_BD28623=m
-# CONFIG_SND_SOC_BT_SCO is not set
-CONFIG_SND_SOC_CPCAP=m
-CONFIG_SND_SOC_CROS_EC_CODEC=m
-CONFIG_SND_SOC_CS35L32=m
-CONFIG_SND_SOC_CS35L33=m
-CONFIG_SND_SOC_CS35L34=m
-CONFIG_SND_SOC_CS35L35=m
-CONFIG_SND_SOC_CS35L36=m
-CONFIG_SND_SOC_CS42L42=m
-CONFIG_SND_SOC_CS42L51=m
-CONFIG_SND_SOC_CS42L51_I2C=m
-CONFIG_SND_SOC_CS42L52=m
-CONFIG_SND_SOC_CS42L56=m
-CONFIG_SND_SOC_CS42L73=m
-CONFIG_SND_SOC_CS4265=m
-CONFIG_SND_SOC_CS4270=m
-CONFIG_SND_SOC_CS4271=m
-CONFIG_SND_SOC_CS4271_I2C=m
-CONFIG_SND_SOC_CS4271_SPI=m
-CONFIG_SND_SOC_CS42XX8=m
-CONFIG_SND_SOC_CS42XX8_I2C=m
-CONFIG_SND_SOC_CS43130=m
-CONFIG_SND_SOC_CS4341=m
-CONFIG_SND_SOC_CS4349=m
-CONFIG_SND_SOC_CS53L30=m
-CONFIG_SND_SOC_CX2072X=m
-CONFIG_SND_SOC_DA7213=m
-CONFIG_SND_SOC_DA7219=m
-CONFIG_SND_SOC_DMIC=m
-CONFIG_SND_SOC_HDMI_CODEC=m
-CONFIG_SND_SOC_ES7134=m
-CONFIG_SND_SOC_ES7241=m
-CONFIG_SND_SOC_ES8316=m
-CONFIG_SND_SOC_ES8328=m
-CONFIG_SND_SOC_ES8328_I2C=m
-CONFIG_SND_SOC_ES8328_SPI=m
-CONFIG_SND_SOC_GTM601=m
-CONFIG_SND_SOC_HDAC_HDMI=m
-CONFIG_SND_SOC_INNO_RK3036=m
-CONFIG_SND_SOC_LOCHNAGAR_SC=m
-CONFIG_SND_SOC_MAX98088=m
-CONFIG_SND_SOC_MAX98090=m
-CONFIG_SND_SOC_MAX98357A=m
-CONFIG_SND_SOC_MAX98504=m
-CONFIG_SND_SOC_MAX9867=m
-CONFIG_SND_SOC_MAX98927=m
-CONFIG_SND_SOC_MAX98373=m
-CONFIG_SND_SOC_MAX9860=m
-CONFIG_SND_SOC_MSM8916_WCD_ANALOG=m
-CONFIG_SND_SOC_MSM8916_WCD_DIGITAL=m
-CONFIG_SND_SOC_PCM1681=m
-CONFIG_SND_SOC_PCM1789=m
-CONFIG_SND_SOC_PCM1789_I2C=m
-CONFIG_SND_SOC_PCM179X=m
-CONFIG_SND_SOC_PCM179X_I2C=m
-CONFIG_SND_SOC_PCM179X_SPI=m
-CONFIG_SND_SOC_PCM186X=m
-CONFIG_SND_SOC_PCM186X_I2C=m
-CONFIG_SND_SOC_PCM186X_SPI=m
-CONFIG_SND_SOC_PCM3060=m
-CONFIG_SND_SOC_PCM3060_I2C=m
-CONFIG_SND_SOC_PCM3060_SPI=m
-CONFIG_SND_SOC_PCM3168A=m
-CONFIG_SND_SOC_PCM3168A_I2C=m
-CONFIG_SND_SOC_PCM3168A_SPI=m
-CONFIG_SND_SOC_PCM512x=m
-CONFIG_SND_SOC_PCM512x_I2C=m
-CONFIG_SND_SOC_PCM512x_SPI=m
-CONFIG_SND_SOC_RK3328=m
-CONFIG_SND_SOC_RL6231=m
-CONFIG_SND_SOC_RL6347A=m
-CONFIG_SND_SOC_RT286=m
-CONFIG_SND_SOC_RT298=m
-CONFIG_SND_SOC_RT5514=m
-CONFIG_SND_SOC_RT5514_SPI=m
-CONFIG_SND_SOC_RT5616=m
-CONFIG_SND_SOC_RT5631=m
-CONFIG_SND_SOC_RT5640=m
-CONFIG_SND_SOC_RT5645=m
-CONFIG_SND_SOC_RT5651=m
-CONFIG_SND_SOC_RT5660=m
-CONFIG_SND_SOC_RT5663=m
-CONFIG_SND_SOC_RT5670=m
-CONFIG_SND_SOC_RT5677=m
-CONFIG_SND_SOC_RT5677_SPI=m
-CONFIG_SND_SOC_RT5682=m
-CONFIG_SND_SOC_SGTL5000=m
-CONFIG_SND_SOC_SI476X=m
-CONFIG_SND_SOC_SIGMADSP=m
-CONFIG_SND_SOC_SIGMADSP_I2C=m
-CONFIG_SND_SOC_SIGMADSP_REGMAP=m
-CONFIG_SND_SOC_SIMPLE_AMPLIFIER=m
-CONFIG_SND_SOC_SIRF_AUDIO_CODEC=m
-CONFIG_SND_SOC_SPDIF=m
-CONFIG_SND_SOC_SSM2305=m
-CONFIG_SND_SOC_SSM2602=m
-CONFIG_SND_SOC_SSM2602_SPI=m
-CONFIG_SND_SOC_SSM2602_I2C=m
-CONFIG_SND_SOC_SSM4567=m
-CONFIG_SND_SOC_STA32X=m
-CONFIG_SND_SOC_STA350=m
-CONFIG_SND_SOC_STI_SAS=m
-CONFIG_SND_SOC_TAS2552=m
-CONFIG_SND_SOC_TAS5086=m
-CONFIG_SND_SOC_TAS571X=m
-CONFIG_SND_SOC_TAS5720=m
-CONFIG_SND_SOC_TAS6424=m
-CONFIG_SND_SOC_TDA7419=m
-CONFIG_SND_SOC_TFA9879=m
-CONFIG_SND_SOC_TLV320AIC23=m
-CONFIG_SND_SOC_TLV320AIC23_I2C=m
-CONFIG_SND_SOC_TLV320AIC23_SPI=m
-CONFIG_SND_SOC_TLV320AIC31XX=m
-CONFIG_SND_SOC_TLV320AIC32X4=m
-CONFIG_SND_SOC_TLV320AIC32X4_I2C=m
-CONFIG_SND_SOC_TLV320AIC32X4_SPI=m
-CONFIG_SND_SOC_TLV320AIC3X=m
-CONFIG_SND_SOC_TS3A227E=m
-CONFIG_SND_SOC_TSCS42XX=m
-CONFIG_SND_SOC_TSCS454=m
-# CONFIG_SND_SOC_UDA1334 is not set
-CONFIG_SND_SOC_WCD9335=m
-CONFIG_SND_SOC_WM8510=m
-CONFIG_SND_SOC_WM8523=m
-CONFIG_SND_SOC_WM8524=m
-CONFIG_SND_SOC_WM8580=m
-CONFIG_SND_SOC_WM8711=m
-CONFIG_SND_SOC_WM8728=m
-CONFIG_SND_SOC_WM8731=m
-CONFIG_SND_SOC_WM8737=m
-CONFIG_SND_SOC_WM8741=m
-CONFIG_SND_SOC_WM8750=m
-CONFIG_SND_SOC_WM8753=m
-CONFIG_SND_SOC_WM8770=m
-CONFIG_SND_SOC_WM8776=m
-CONFIG_SND_SOC_WM8782=m
-CONFIG_SND_SOC_WM8804=m
-CONFIG_SND_SOC_WM8804_I2C=m
-CONFIG_SND_SOC_WM8804_SPI=m
-CONFIG_SND_SOC_WM8903=m
-CONFIG_SND_SOC_WM8904=m
-CONFIG_SND_SOC_WM8960=m
-CONFIG_SND_SOC_WM8962=m
-CONFIG_SND_SOC_WM8974=m
-CONFIG_SND_SOC_WM8978=m
-CONFIG_SND_SOC_WM8985=m
-CONFIG_SND_SOC_ZX_AUD96P22=m
-CONFIG_SND_SOC_MAX9759=m
-CONFIG_SND_SOC_MT6351=m
-CONFIG_SND_SOC_MT6358=m
-CONFIG_SND_SOC_NAU8540=m
-CONFIG_SND_SOC_NAU8810=m
-CONFIG_SND_SOC_NAU8822=m
-CONFIG_SND_SOC_NAU8824=m
-CONFIG_SND_SOC_NAU8825=m
-CONFIG_SND_SOC_TPA6130A2=m
-# end of CODEC drivers
-
-CONFIG_SND_SIMPLE_CARD_UTILS=m
-CONFIG_SND_SIMPLE_CARD=m
-CONFIG_SND_AUDIO_GRAPH_CARD=m
-CONFIG_SND_X86=y
-CONFIG_HDMI_LPE_AUDIO=m
-CONFIG_SND_SYNTH_EMUX=m
-CONFIG_SND_XEN_FRONTEND=m
-CONFIG_AC97_BUS=m
-
-#
-# HID support
-#
-CONFIG_HID=m
-CONFIG_HID_BATTERY_STRENGTH=y
-CONFIG_HIDRAW=y
-CONFIG_UHID=m
-CONFIG_HID_GENERIC=m
-
-#
-# Special HID drivers
-#
-CONFIG_HID_A4TECH=m
-CONFIG_HID_ACCUTOUCH=m
-CONFIG_HID_ACRUX=m
-CONFIG_HID_ACRUX_FF=y
-CONFIG_HID_APPLE=m
-CONFIG_HID_APPLEIR=m
-CONFIG_HID_ASUS=m
-CONFIG_HID_AUREAL=m
-CONFIG_HID_BELKIN=m
-CONFIG_HID_BETOP_FF=m
-CONFIG_HID_BIGBEN_FF=m
-CONFIG_HID_CHERRY=m
-CONFIG_HID_CHICONY=m
-CONFIG_HID_CORSAIR=m
-CONFIG_HID_COUGAR=m
-CONFIG_HID_MACALLY=m
-CONFIG_HID_PRODIKEYS=m
-CONFIG_HID_CMEDIA=m
-CONFIG_HID_CP2112=m
-# CONFIG_HID_CREATIVE_SB0540 is not set
-CONFIG_HID_CYPRESS=m
-CONFIG_HID_DRAGONRISE=m
-CONFIG_DRAGONRISE_FF=y
-CONFIG_HID_EMS_FF=m
-CONFIG_HID_ELAN=m
-CONFIG_HID_ELECOM=m
-CONFIG_HID_ELO=m
-CONFIG_HID_EZKEY=m
-CONFIG_HID_GEMBIRD=m
-CONFIG_HID_GFRM=m
-CONFIG_HID_HOLTEK=m
-CONFIG_HOLTEK_FF=y
-CONFIG_HID_GOOGLE_HAMMER=m
-CONFIG_HID_GT683R=m
-CONFIG_HID_KEYTOUCH=m
-CONFIG_HID_KYE=m
-CONFIG_HID_UCLOGIC=m
-CONFIG_HID_WALTOP=m
-CONFIG_HID_VIEWSONIC=m
-CONFIG_HID_GYRATION=m
-CONFIG_HID_ICADE=m
-CONFIG_HID_ITE=m
-CONFIG_HID_JABRA=m
-CONFIG_HID_TWINHAN=m
-CONFIG_HID_KENSINGTON=m
-CONFIG_HID_LCPOWER=m
-CONFIG_HID_LED=m
-CONFIG_HID_LENOVO=m
-CONFIG_HID_LOGITECH=m
-CONFIG_HID_LOGITECH_DJ=m
-CONFIG_HID_LOGITECH_HIDPP=m
-CONFIG_LOGITECH_FF=y
-CONFIG_LOGIRUMBLEPAD2_FF=y
-CONFIG_LOGIG940_FF=y
-CONFIG_LOGIWHEELS_FF=y
-CONFIG_HID_MAGICMOUSE=m
-CONFIG_HID_MALTRON=m
-CONFIG_HID_MAYFLASH=m
-CONFIG_HID_REDRAGON=m
-CONFIG_HID_MICROSOFT=m
-CONFIG_HID_MONTEREY=m
-CONFIG_HID_MULTITOUCH=m
-CONFIG_HID_NTI=m
-CONFIG_HID_NTRIG=m
-CONFIG_HID_ORTEK=m
-CONFIG_HID_PANTHERLORD=m
-CONFIG_PANTHERLORD_FF=y
-CONFIG_HID_PENMOUNT=m
-CONFIG_HID_PETALYNX=m
-CONFIG_HID_PICOLCD=m
-CONFIG_HID_PICOLCD_FB=y
-CONFIG_HID_PICOLCD_BACKLIGHT=y
-CONFIG_HID_PICOLCD_LCD=y
-CONFIG_HID_PICOLCD_LEDS=y
-CONFIG_HID_PICOLCD_CIR=y
-CONFIG_HID_PLANTRONICS=m
-CONFIG_HID_PRIMAX=m
-CONFIG_HID_RETRODE=m
-CONFIG_HID_ROCCAT=m
-CONFIG_HID_SAITEK=m
-CONFIG_HID_SAMSUNG=m
-CONFIG_HID_SONY=m
-CONFIG_SONY_FF=y
-CONFIG_HID_SPEEDLINK=m
-CONFIG_HID_STEAM=m
-CONFIG_HID_STEELSERIES=m
-CONFIG_HID_SUNPLUS=m
-CONFIG_HID_RMI=m
-CONFIG_HID_GREENASIA=m
-CONFIG_GREENASIA_FF=y
-CONFIG_HID_HYPERV_MOUSE=m
-CONFIG_HID_SMARTJOYPLUS=m
-CONFIG_SMARTJOYPLUS_FF=y
-CONFIG_HID_TIVO=m
-CONFIG_HID_TOPSEED=m
-CONFIG_HID_THINGM=m
-CONFIG_HID_THRUSTMASTER=m
-CONFIG_THRUSTMASTER_FF=y
-CONFIG_HID_UDRAW_PS3=m
-CONFIG_HID_U2FZERO=m
-CONFIG_HID_WACOM=m
-CONFIG_HID_WIIMOTE=m
-CONFIG_HID_XINMO=m
-CONFIG_HID_ZEROPLUS=m
-CONFIG_ZEROPLUS_FF=y
-CONFIG_HID_ZYDACRON=m
-CONFIG_HID_SENSOR_HUB=m
-# CONFIG_HID_SENSOR_CUSTOM_SENSOR is not set
-CONFIG_HID_ALPS=m
-# end of Special HID drivers
-
-#
-# USB HID support
-#
-CONFIG_USB_HID=m
-CONFIG_HID_PID=y
-CONFIG_USB_HIDDEV=y
-
-#
-# USB HID Boot Protocol drivers
-#
-# CONFIG_USB_KBD is not set
-# CONFIG_USB_MOUSE is not set
-# end of USB HID Boot Protocol drivers
-# end of USB HID support
-
-#
-# I2C HID support
-#
-CONFIG_I2C_HID=m
-# end of I2C HID support
-
-#
-# Intel ISH HID support
-#
-CONFIG_INTEL_ISH_HID=m
-CONFIG_INTEL_ISH_FIRMWARE_DOWNLOADER=m
-# end of Intel ISH HID support
-# end of HID support
-
-CONFIG_USB_OHCI_LITTLE_ENDIAN=y
-CONFIG_USB_SUPPORT=y
-CONFIG_USB_COMMON=y
-CONFIG_USB_LED_TRIG=y
-CONFIG_USB_ULPI_BUS=m
-# CONFIG_USB_CONN_GPIO is not set
-CONFIG_USB_ARCH_HAS_HCD=y
-CONFIG_USB=y
-CONFIG_USB_PCI=y
-CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
-
-#
-# Miscellaneous USB options
-#
-CONFIG_USB_DEFAULT_PERSIST=y
-CONFIG_USB_DYNAMIC_MINORS=y
-# CONFIG_USB_OTG is not set
-# CONFIG_USB_OTG_WHITELIST is not set
-# CONFIG_USB_OTG_BLACKLIST_HUB is not set
-CONFIG_USB_LEDS_TRIGGER_USBPORT=m
-CONFIG_USB_AUTOSUSPEND_DELAY=2
-CONFIG_USB_MON=m
-
-#
-# USB Host Controller Drivers
-#
-CONFIG_USB_C67X00_HCD=m
-CONFIG_USB_XHCI_HCD=m
-# CONFIG_USB_XHCI_DBGCAP is not set
-CONFIG_USB_XHCI_PCI=m
-CONFIG_USB_XHCI_PLATFORM=m
-CONFIG_USB_EHCI_HCD=m
-CONFIG_USB_EHCI_ROOT_HUB_TT=y
-CONFIG_USB_EHCI_TT_NEWSCHED=y
-CONFIG_USB_EHCI_PCI=m
-CONFIG_USB_EHCI_FSL=m
-CONFIG_USB_EHCI_HCD_PLATFORM=m
-CONFIG_USB_OXU210HP_HCD=m
-CONFIG_USB_ISP116X_HCD=m
-CONFIG_USB_FOTG210_HCD=m
-CONFIG_USB_MAX3421_HCD=m
-CONFIG_USB_OHCI_HCD=m
-CONFIG_USB_OHCI_HCD_PCI=m
-# CONFIG_USB_OHCI_HCD_SSB is not set
-CONFIG_USB_OHCI_HCD_PLATFORM=m
-CONFIG_USB_UHCI_HCD=m
-CONFIG_USB_U132_HCD=m
-CONFIG_USB_SL811_HCD=m
-# CONFIG_USB_SL811_HCD_ISO is not set
-CONFIG_USB_SL811_CS=m
-CONFIG_USB_R8A66597_HCD=m
-CONFIG_USB_HCD_BCMA=m
-CONFIG_USB_HCD_SSB=m
-# CONFIG_USB_HCD_TEST_MODE is not set
-
-#
-# USB Device Class drivers
-#
-CONFIG_USB_ACM=m
-CONFIG_USB_PRINTER=m
-CONFIG_USB_WDM=m
-CONFIG_USB_TMC=m
-
-#
-# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may
-#
-
-#
-# also be needed; see USB_STORAGE Help for more info
-#
-CONFIG_USB_STORAGE=m
-# CONFIG_USB_STORAGE_DEBUG is not set
-CONFIG_USB_STORAGE_REALTEK=m
-CONFIG_REALTEK_AUTOPM=y
-CONFIG_USB_STORAGE_DATAFAB=m
-CONFIG_USB_STORAGE_FREECOM=m
-CONFIG_USB_STORAGE_ISD200=m
-CONFIG_USB_STORAGE_USBAT=m
-CONFIG_USB_STORAGE_SDDR09=m
-CONFIG_USB_STORAGE_SDDR55=m
-CONFIG_USB_STORAGE_JUMPSHOT=m
-CONFIG_USB_STORAGE_ALAUDA=m
-CONFIG_USB_STORAGE_ONETOUCH=m
-CONFIG_USB_STORAGE_KARMA=m
-CONFIG_USB_STORAGE_CYPRESS_ATACB=m
-CONFIG_USB_STORAGE_ENE_UB6250=m
-CONFIG_USB_UAS=m
-
-#
-# USB Imaging devices
-#
-CONFIG_USB_MDC800=m
-CONFIG_USB_MICROTEK=m
-CONFIG_USBIP_CORE=m
-CONFIG_USBIP_VHCI_HCD=m
-CONFIG_USBIP_VHCI_HC_PORTS=8
-CONFIG_USBIP_VHCI_NR_HCS=1
-CONFIG_USBIP_HOST=m
-CONFIG_USBIP_VUDC=m
-# CONFIG_USBIP_DEBUG is not set
-# CONFIG_USB_CDNS3 is not set
-CONFIG_USB_MUSB_HDRC=m
-# CONFIG_USB_MUSB_HOST is not set
-# CONFIG_USB_MUSB_GADGET is not set
-CONFIG_USB_MUSB_DUAL_ROLE=y
-
-#
-# Platform Glue Layer
-#
-
-#
-# MUSB DMA mode
-#
-# CONFIG_MUSB_PIO_ONLY is not set
-CONFIG_USB_DWC3=m
-CONFIG_USB_DWC3_ULPI=y
-# CONFIG_USB_DWC3_HOST is not set
-# CONFIG_USB_DWC3_GADGET is not set
-CONFIG_USB_DWC3_DUAL_ROLE=y
-
-#
-# Platform Glue Driver Support
-#
-CONFIG_USB_DWC3_PCI=m
-CONFIG_USB_DWC3_HAPS=m
-CONFIG_USB_DWC3_OF_SIMPLE=m
-CONFIG_USB_DWC2=m
-# CONFIG_USB_DWC2_HOST is not set
-
-#
-# Gadget/Dual-role mode requires USB Gadget support to be enabled
-#
-# CONFIG_USB_DWC2_PERIPHERAL is not set
-CONFIG_USB_DWC2_DUAL_ROLE=y
-CONFIG_USB_DWC2_PCI=m
-# CONFIG_USB_DWC2_DEBUG is not set
-# CONFIG_USB_DWC2_TRACK_MISSED_SOFS is not set
-CONFIG_USB_CHIPIDEA=m
-CONFIG_USB_CHIPIDEA_OF=m
-CONFIG_USB_CHIPIDEA_PCI=m
-CONFIG_USB_CHIPIDEA_UDC=y
-CONFIG_USB_CHIPIDEA_HOST=y
-CONFIG_USB_ISP1760=m
-CONFIG_USB_ISP1760_HCD=y
-CONFIG_USB_ISP1761_UDC=y
-# CONFIG_USB_ISP1760_HOST_ROLE is not set
-# CONFIG_USB_ISP1760_GADGET_ROLE is not set
-CONFIG_USB_ISP1760_DUAL_ROLE=y
-
-#
-# USB port drivers
-#
-CONFIG_USB_USS720=m
-CONFIG_USB_SERIAL=y
-CONFIG_USB_SERIAL_CONSOLE=y
-CONFIG_USB_SERIAL_GENERIC=y
-CONFIG_USB_SERIAL_SIMPLE=m
-CONFIG_USB_SERIAL_AIRCABLE=m
-CONFIG_USB_SERIAL_ARK3116=m
-CONFIG_USB_SERIAL_BELKIN=m
-CONFIG_USB_SERIAL_CH341=m
-CONFIG_USB_SERIAL_WHITEHEAT=m
-CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m
-CONFIG_USB_SERIAL_CP210X=m
-CONFIG_USB_SERIAL_CYPRESS_M8=m
-CONFIG_USB_SERIAL_EMPEG=m
-CONFIG_USB_SERIAL_FTDI_SIO=m
-CONFIG_USB_SERIAL_VISOR=m
-CONFIG_USB_SERIAL_IPAQ=m
-CONFIG_USB_SERIAL_IR=m
-CONFIG_USB_SERIAL_EDGEPORT=m
-CONFIG_USB_SERIAL_EDGEPORT_TI=m
-CONFIG_USB_SERIAL_F81232=m
-CONFIG_USB_SERIAL_F8153X=m
-CONFIG_USB_SERIAL_GARMIN=m
-CONFIG_USB_SERIAL_IPW=m
-CONFIG_USB_SERIAL_IUU=m
-CONFIG_USB_SERIAL_KEYSPAN_PDA=m
-CONFIG_USB_SERIAL_KEYSPAN=m
-CONFIG_USB_SERIAL_KLSI=m
-CONFIG_USB_SERIAL_KOBIL_SCT=m
-CONFIG_USB_SERIAL_MCT_U232=m
-CONFIG_USB_SERIAL_METRO=m
-CONFIG_USB_SERIAL_MOS7720=m
-CONFIG_USB_SERIAL_MOS7715_PARPORT=y
-CONFIG_USB_SERIAL_MOS7840=m
-CONFIG_USB_SERIAL_MXUPORT=m
-CONFIG_USB_SERIAL_NAVMAN=m
-CONFIG_USB_SERIAL_PL2303=m
-CONFIG_USB_SERIAL_OTI6858=m
-CONFIG_USB_SERIAL_QCAUX=m
-CONFIG_USB_SERIAL_QUALCOMM=m
-CONFIG_USB_SERIAL_SPCP8X5=m
-CONFIG_USB_SERIAL_SAFE=m
-# CONFIG_USB_SERIAL_SAFE_PADDED is not set
-CONFIG_USB_SERIAL_SIERRAWIRELESS=m
-CONFIG_USB_SERIAL_SYMBOL=m
-CONFIG_USB_SERIAL_TI=m
-CONFIG_USB_SERIAL_CYBERJACK=m
-CONFIG_USB_SERIAL_XIRCOM=m
-CONFIG_USB_SERIAL_WWAN=m
-CONFIG_USB_SERIAL_OPTION=m
-CONFIG_USB_SERIAL_OMNINET=m
-CONFIG_USB_SERIAL_OPTICON=m
-CONFIG_USB_SERIAL_XSENS_MT=m
-CONFIG_USB_SERIAL_WISHBONE=m
-CONFIG_USB_SERIAL_SSU100=m
-CONFIG_USB_SERIAL_QT2=m
-CONFIG_USB_SERIAL_UPD78F0730=m
-CONFIG_USB_SERIAL_DEBUG=m
-
-#
-# USB Miscellaneous drivers
-#
-CONFIG_USB_EMI62=m
-CONFIG_USB_EMI26=m
-CONFIG_USB_ADUTUX=m
-CONFIG_USB_SEVSEG=m
-CONFIG_USB_LEGOTOWER=m
-CONFIG_USB_LCD=m
-CONFIG_USB_CYPRESS_CY7C63=m
-CONFIG_USB_CYTHERM=m
-CONFIG_USB_IDMOUSE=m
-CONFIG_USB_FTDI_ELAN=m
-CONFIG_USB_APPLEDISPLAY=m
-CONFIG_USB_SISUSBVGA=m
-CONFIG_USB_SISUSBVGA_CON=y
-CONFIG_USB_LD=m
-CONFIG_USB_TRANCEVIBRATOR=m
-CONFIG_USB_IOWARRIOR=m
-CONFIG_USB_TEST=m
-CONFIG_USB_EHSET_TEST_FIXTURE=m
-CONFIG_USB_ISIGHTFW=m
-CONFIG_USB_YUREX=m
-CONFIG_USB_EZUSB_FX2=m
-CONFIG_USB_HUB_USB251XB=m
-CONFIG_USB_HSIC_USB3503=m
-CONFIG_USB_HSIC_USB4604=m
-CONFIG_USB_LINK_LAYER_TEST=m
-CONFIG_USB_CHAOSKEY=m
-CONFIG_USB_ATM=m
-CONFIG_USB_SPEEDTOUCH=m
-CONFIG_USB_CXACRU=m
-CONFIG_USB_UEAGLEATM=m
-CONFIG_USB_XUSBATM=m
-
-#
-# USB Physical Layer drivers
-#
-CONFIG_USB_PHY=y
-CONFIG_NOP_USB_XCEIV=m
-CONFIG_USB_GPIO_VBUS=m
-CONFIG_TAHVO_USB=m
-# CONFIG_TAHVO_USB_HOST_BY_DEFAULT is not set
-CONFIG_USB_ISP1301=m
-# end of USB Physical Layer drivers
-
-CONFIG_USB_GADGET=m
-# CONFIG_USB_GADGET_DEBUG is not set
-# CONFIG_USB_GADGET_DEBUG_FILES is not set
-# CONFIG_USB_GADGET_DEBUG_FS is not set
-CONFIG_USB_GADGET_VBUS_DRAW=2
-CONFIG_USB_GADGET_STORAGE_NUM_BUFFERS=2
-CONFIG_U_SERIAL_CONSOLE=y
-
-#
-# USB Peripheral Controller
-#
-CONFIG_USB_FOTG210_UDC=m
-CONFIG_USB_GR_UDC=m
-CONFIG_USB_R8A66597=m
-CONFIG_USB_PXA27X=m
-CONFIG_USB_MV_UDC=m
-CONFIG_USB_MV_U3D=m
-CONFIG_USB_SNP_CORE=m
-CONFIG_USB_SNP_UDC_PLAT=m
-CONFIG_USB_M66592=m
-CONFIG_USB_BDC_UDC=m
-
-#
-# Platform Support
-#
-CONFIG_USB_BDC_PCI=m
-CONFIG_USB_AMD5536UDC=m
-CONFIG_USB_NET2272=m
-CONFIG_USB_NET2272_DMA=y
-CONFIG_USB_NET2280=m
-CONFIG_USB_GOKU=m
-CONFIG_USB_EG20T=m
-CONFIG_USB_GADGET_XILINX=m
-CONFIG_USB_DUMMY_HCD=m
-# end of USB Peripheral Controller
-
-CONFIG_USB_LIBCOMPOSITE=m
-CONFIG_USB_F_ACM=m
-CONFIG_USB_F_SS_LB=m
-CONFIG_USB_U_SERIAL=m
-CONFIG_USB_U_ETHER=m
-CONFIG_USB_U_AUDIO=m
-CONFIG_USB_F_SERIAL=m
-CONFIG_USB_F_OBEX=m
-CONFIG_USB_F_NCM=m
-CONFIG_USB_F_ECM=m
-CONFIG_USB_F_PHONET=m
-CONFIG_USB_F_EEM=m
-CONFIG_USB_F_SUBSET=m
-CONFIG_USB_F_RNDIS=m
-CONFIG_USB_F_MASS_STORAGE=m
-CONFIG_USB_F_FS=m
-CONFIG_USB_F_UAC1=m
-CONFIG_USB_F_UAC1_LEGACY=m
-CONFIG_USB_F_UAC2=m
-CONFIG_USB_F_UVC=m
-CONFIG_USB_F_MIDI=m
-CONFIG_USB_F_HID=m
-CONFIG_USB_F_PRINTER=m
-CONFIG_USB_F_TCM=m
-CONFIG_USB_CONFIGFS=m
-CONFIG_USB_CONFIGFS_SERIAL=y
-CONFIG_USB_CONFIGFS_ACM=y
-CONFIG_USB_CONFIGFS_OBEX=y
-CONFIG_USB_CONFIGFS_NCM=y
-CONFIG_USB_CONFIGFS_ECM=y
-CONFIG_USB_CONFIGFS_ECM_SUBSET=y
-CONFIG_USB_CONFIGFS_RNDIS=y
-CONFIG_USB_CONFIGFS_EEM=y
-CONFIG_USB_CONFIGFS_PHONET=y
-CONFIG_USB_CONFIGFS_MASS_STORAGE=y
-CONFIG_USB_CONFIGFS_F_LB_SS=y
-CONFIG_USB_CONFIGFS_F_FS=y
-CONFIG_USB_CONFIGFS_F_UAC1=y
-CONFIG_USB_CONFIGFS_F_UAC1_LEGACY=y
-CONFIG_USB_CONFIGFS_F_UAC2=y
-CONFIG_USB_CONFIGFS_F_MIDI=y
-CONFIG_USB_CONFIGFS_F_HID=y
-CONFIG_USB_CONFIGFS_F_UVC=y
-CONFIG_USB_CONFIGFS_F_PRINTER=y
-CONFIG_USB_CONFIGFS_F_TCM=y
-CONFIG_USB_ZERO=m
-CONFIG_USB_AUDIO=m
-# CONFIG_GADGET_UAC1 is not set
-CONFIG_USB_ETH=m
-CONFIG_USB_ETH_RNDIS=y
-CONFIG_USB_ETH_EEM=y
-CONFIG_USB_G_NCM=m
-CONFIG_USB_GADGETFS=m
-CONFIG_USB_FUNCTIONFS=m
-CONFIG_USB_FUNCTIONFS_ETH=y
-CONFIG_USB_FUNCTIONFS_RNDIS=y
-CONFIG_USB_FUNCTIONFS_GENERIC=y
-CONFIG_USB_MASS_STORAGE=m
-CONFIG_USB_GADGET_TARGET=m
-CONFIG_USB_G_SERIAL=m
-CONFIG_USB_MIDI_GADGET=m
-CONFIG_USB_G_PRINTER=m
-CONFIG_USB_CDC_COMPOSITE=m
-CONFIG_USB_G_NOKIA=m
-CONFIG_USB_G_ACM_MS=m
-CONFIG_USB_G_MULTI=m
-CONFIG_USB_G_MULTI_RNDIS=y
-CONFIG_USB_G_MULTI_CDC=y
-CONFIG_USB_G_HID=m
-CONFIG_USB_G_DBGP=m
-# CONFIG_USB_G_DBGP_PRINTK is not set
-CONFIG_USB_G_DBGP_SERIAL=y
-CONFIG_USB_G_WEBCAM=m
-CONFIG_TYPEC=m
-CONFIG_TYPEC_TCPM=m
-CONFIG_TYPEC_TCPCI=m
-CONFIG_TYPEC_RT1711H=m
-CONFIG_TYPEC_FUSB302=m
-CONFIG_TYPEC_WCOVE=m
-CONFIG_TYPEC_UCSI=m
-CONFIG_UCSI_CCG=m
-CONFIG_UCSI_ACPI=m
-CONFIG_TYPEC_TPS6598X=m
-
-#
-# USB Type-C Multiplexer/DeMultiplexer Switch support
-#
-CONFIG_TYPEC_MUX_PI3USB30532=m
-# end of USB Type-C Multiplexer/DeMultiplexer Switch support
-
-#
-# USB Type-C Alternate Mode drivers
-#
-CONFIG_TYPEC_DP_ALTMODE=m
-CONFIG_TYPEC_NVIDIA_ALTMODE=m
-# end of USB Type-C Alternate Mode drivers
-
-CONFIG_USB_ROLE_SWITCH=m
-CONFIG_USB_ROLES_INTEL_XHCI=m
-CONFIG_MMC=m
-CONFIG_PWRSEQ_EMMC=m
-CONFIG_PWRSEQ_SD8787=m
-CONFIG_PWRSEQ_SIMPLE=m
-CONFIG_MMC_BLOCK=m
-CONFIG_MMC_BLOCK_MINORS=8
-CONFIG_SDIO_UART=m
-CONFIG_MMC_TEST=m
-
-#
-# MMC/SD/SDIO Host Controller Drivers
-#
-# CONFIG_MMC_DEBUG is not set
-CONFIG_MMC_SDHCI=m
-CONFIG_MMC_SDHCI_IO_ACCESSORS=y
-CONFIG_MMC_SDHCI_PCI=m
-CONFIG_MMC_RICOH_MMC=y
-CONFIG_MMC_SDHCI_ACPI=m
-CONFIG_MMC_SDHCI_PLTFM=m
-CONFIG_MMC_SDHCI_OF_ARASAN=m
-# CONFIG_MMC_SDHCI_OF_ASPEED is not set
-CONFIG_MMC_SDHCI_OF_AT91=m
-CONFIG_MMC_SDHCI_OF_DWCMSHC=m
-CONFIG_MMC_SDHCI_CADENCE=m
-CONFIG_MMC_SDHCI_F_SDH30=m
-CONFIG_MMC_WBSD=m
-CONFIG_MMC_ALCOR=m
-CONFIG_MMC_TIFM_SD=m
-CONFIG_MMC_SPI=m
-CONFIG_MMC_SDRICOH_CS=m
-CONFIG_MMC_CB710=m
-CONFIG_MMC_VIA_SDMMC=m
-CONFIG_MMC_VUB300=m
-CONFIG_MMC_USHC=m
-CONFIG_MMC_USDHI6ROL0=m
-CONFIG_MMC_REALTEK_PCI=m
-CONFIG_MMC_REALTEK_USB=m
-CONFIG_MMC_CQHCI=m
-CONFIG_MMC_TOSHIBA_PCI=m
-CONFIG_MMC_MTK=m
-CONFIG_MMC_SDHCI_XENON=m
-CONFIG_MMC_SDHCI_OMAP=m
-CONFIG_MMC_SDHCI_AM654=m
-CONFIG_MEMSTICK=m
-# CONFIG_MEMSTICK_DEBUG is not set
-
-#
-# MemoryStick drivers
-#
-# CONFIG_MEMSTICK_UNSAFE_RESUME is not set
-CONFIG_MSPRO_BLOCK=m
-CONFIG_MS_BLOCK=m
-
-#
-# MemoryStick Host Controller Drivers
-#
-CONFIG_MEMSTICK_TIFM_MS=m
-CONFIG_MEMSTICK_JMICRON_38X=m
-CONFIG_MEMSTICK_R592=m
-CONFIG_MEMSTICK_REALTEK_PCI=m
-CONFIG_MEMSTICK_REALTEK_USB=m
-CONFIG_NEW_LEDS=y
-CONFIG_LEDS_CLASS=y
-CONFIG_LEDS_CLASS_FLASH=m
-CONFIG_LEDS_BRIGHTNESS_HW_CHANGED=y
-
-#
-# LED drivers
-#
-CONFIG_LEDS_88PM860X=m
-CONFIG_LEDS_AAT1290=m
-CONFIG_LEDS_AN30259A=m
-CONFIG_LEDS_APU=m
-CONFIG_LEDS_AS3645A=m
-CONFIG_LEDS_BCM6328=m
-CONFIG_LEDS_BCM6358=m
-CONFIG_LEDS_CPCAP=m
-CONFIG_LEDS_CR0014114=m
-CONFIG_LEDS_LM3530=m
-CONFIG_LEDS_LM3532=m
-CONFIG_LEDS_LM3533=m
-CONFIG_LEDS_LM3642=m
-CONFIG_LEDS_LM3692X=m
-CONFIG_LEDS_LM3601X=m
-CONFIG_LEDS_MT6323=m
-CONFIG_LEDS_PCA9532=m
-CONFIG_LEDS_PCA9532_GPIO=y
-CONFIG_LEDS_GPIO=m
-CONFIG_LEDS_LP3944=m
-CONFIG_LEDS_LP3952=m
-# CONFIG_LEDS_LP5521 is not set
-# CONFIG_LEDS_LP5523 is not set
-# CONFIG_LEDS_LP5562 is not set
-# CONFIG_LEDS_LP8501 is not set
-CONFIG_LEDS_LP8788=m
-CONFIG_LEDS_LP8860=m
-CONFIG_LEDS_CLEVO_MAIL=m
-CONFIG_LEDS_PCA955X=m
-CONFIG_LEDS_PCA955X_GPIO=y
-CONFIG_LEDS_PCA963X=m
-CONFIG_LEDS_WM831X_STATUS=m
-CONFIG_LEDS_WM8350=m
-CONFIG_LEDS_DA903X=m
-CONFIG_LEDS_DA9052=m
-CONFIG_LEDS_DAC124S085=m
-CONFIG_LEDS_PWM=m
-CONFIG_LEDS_REGULATOR=m
-CONFIG_LEDS_BD2802=m
-CONFIG_LEDS_INTEL_SS4200=m
-CONFIG_LEDS_LT3593=m
-CONFIG_LEDS_ADP5520=m
-CONFIG_LEDS_MC13783=m
-CONFIG_LEDS_TCA6507=m
-CONFIG_LEDS_TLC591XX=m
-CONFIG_LEDS_MAX77650=m
-CONFIG_LEDS_MAX77693=m
-CONFIG_LEDS_MAX8997=m
-CONFIG_LEDS_LM355x=m
-CONFIG_LEDS_MENF21BMC=m
-CONFIG_LEDS_KTD2692=m
-CONFIG_LEDS_IS31FL319X=m
-CONFIG_LEDS_IS31FL32XX=m
-
-#
-# LED driver for blink(1) USB RGB LED is under Special HID drivers (HID_THINGM)
-#
-CONFIG_LEDS_BLINKM=m
-CONFIG_LEDS_SYSCON=y
-CONFIG_LEDS_MLXCPLD=m
-CONFIG_LEDS_MLXREG=m
-CONFIG_LEDS_USER=m
-CONFIG_LEDS_NIC78BX=m
-CONFIG_LEDS_SPI_BYTE=m
-CONFIG_LEDS_TI_LMU_COMMON=m
-CONFIG_LEDS_LM3697=m
-CONFIG_LEDS_LM36274=m
-
-#
-# LED Triggers
-#
-CONFIG_LEDS_TRIGGERS=y
-CONFIG_LEDS_TRIGGER_TIMER=m
-CONFIG_LEDS_TRIGGER_ONESHOT=m
-CONFIG_LEDS_TRIGGER_DISK=y
-CONFIG_LEDS_TRIGGER_MTD=y
-CONFIG_LEDS_TRIGGER_HEARTBEAT=m
-CONFIG_LEDS_TRIGGER_BACKLIGHT=m
-CONFIG_LEDS_TRIGGER_CPU=y
-CONFIG_LEDS_TRIGGER_ACTIVITY=m
-CONFIG_LEDS_TRIGGER_GPIO=m
-CONFIG_LEDS_TRIGGER_DEFAULT_ON=m
-
-#
-# iptables trigger is under Netfilter config (LED target)
-#
-CONFIG_LEDS_TRIGGER_TRANSIENT=m
-CONFIG_LEDS_TRIGGER_CAMERA=m
-CONFIG_LEDS_TRIGGER_PANIC=y
-CONFIG_LEDS_TRIGGER_NETDEV=m
-CONFIG_LEDS_TRIGGER_PATTERN=m
-CONFIG_LEDS_TRIGGER_AUDIO=m
-CONFIG_ACCESSIBILITY=y
-CONFIG_A11Y_BRAILLE_CONSOLE=y
-CONFIG_INFINIBAND=m
-CONFIG_INFINIBAND_USER_MAD=m
-CONFIG_INFINIBAND_USER_ACCESS=m
-# CONFIG_INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI is not set
-CONFIG_INFINIBAND_USER_MEM=y
-CONFIG_INFINIBAND_ON_DEMAND_PAGING=y
-CONFIG_INFINIBAND_ADDR_TRANS=y
-CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS=y
-CONFIG_INFINIBAND_MTHCA=m
-CONFIG_INFINIBAND_MTHCA_DEBUG=y
-CONFIG_INFINIBAND_QIB=m
-CONFIG_INFINIBAND_QIB_DCA=y
-CONFIG_INFINIBAND_CXGB3=m
-CONFIG_INFINIBAND_CXGB4=m
-CONFIG_INFINIBAND_EFA=m
-CONFIG_INFINIBAND_I40IW=m
-CONFIG_MLX4_INFINIBAND=m
-CONFIG_MLX5_INFINIBAND=m
-CONFIG_INFINIBAND_OCRDMA=m
-CONFIG_INFINIBAND_VMWARE_PVRDMA=m
-CONFIG_INFINIBAND_USNIC=m
-CONFIG_INFINIBAND_BNXT_RE=m
-CONFIG_INFINIBAND_HFI1=m
-# CONFIG_HFI1_DEBUG_SDMA_ORDER is not set
-# CONFIG_SDMA_VERBOSITY is not set
-CONFIG_INFINIBAND_QEDR=m
-CONFIG_INFINIBAND_RDMAVT=m
-CONFIG_RDMA_RXE=m
-CONFIG_RDMA_SIW=m
-CONFIG_INFINIBAND_IPOIB=m
-CONFIG_INFINIBAND_IPOIB_CM=y
-CONFIG_INFINIBAND_IPOIB_DEBUG=y
-# CONFIG_INFINIBAND_IPOIB_DEBUG_DATA is not set
-CONFIG_INFINIBAND_SRP=m
-CONFIG_INFINIBAND_SRPT=m
-CONFIG_INFINIBAND_ISER=m
-CONFIG_INFINIBAND_ISERT=m
-CONFIG_INFINIBAND_OPA_VNIC=m
-CONFIG_EDAC_ATOMIC_SCRUB=y
-CONFIG_EDAC_SUPPORT=y
-CONFIG_EDAC=y
-CONFIG_EDAC_LEGACY_SYSFS=y
-# CONFIG_EDAC_DEBUG is not set
-CONFIG_EDAC_DECODE_MCE=m
-CONFIG_EDAC_GHES=y
-CONFIG_EDAC_AMD64=m
-# CONFIG_EDAC_AMD64_ERROR_INJECTION is not set
-CONFIG_EDAC_E752X=m
-CONFIG_EDAC_I82975X=m
-CONFIG_EDAC_I3000=m
-CONFIG_EDAC_I3200=m
-CONFIG_EDAC_IE31200=m
-CONFIG_EDAC_X38=m
-CONFIG_EDAC_I5400=m
-CONFIG_EDAC_I7CORE=m
-CONFIG_EDAC_I5000=m
-CONFIG_EDAC_I5100=m
-CONFIG_EDAC_I7300=m
-CONFIG_EDAC_SBRIDGE=m
-CONFIG_EDAC_SKX=m
-CONFIG_EDAC_I10NM=m
-CONFIG_EDAC_PND2=m
-CONFIG_RTC_LIB=y
-CONFIG_RTC_MC146818_LIB=y
-CONFIG_RTC_CLASS=y
-CONFIG_RTC_HCTOSYS=y
-CONFIG_RTC_HCTOSYS_DEVICE="rtc0"
-CONFIG_RTC_SYSTOHC=y
-CONFIG_RTC_SYSTOHC_DEVICE="rtc0"
-# CONFIG_RTC_DEBUG is not set
-CONFIG_RTC_NVMEM=y
-
-#
-# RTC interfaces
-#
-CONFIG_RTC_INTF_SYSFS=y
-CONFIG_RTC_INTF_PROC=y
-CONFIG_RTC_INTF_DEV=y
-CONFIG_RTC_INTF_DEV_UIE_EMUL=y
-# CONFIG_RTC_DRV_TEST is not set
-
-#
-# I2C RTC drivers
-#
-CONFIG_RTC_DRV_88PM860X=m
-CONFIG_RTC_DRV_88PM80X=m
-CONFIG_RTC_DRV_ABB5ZES3=m
-CONFIG_RTC_DRV_ABEOZ9=m
-CONFIG_RTC_DRV_ABX80X=m
-CONFIG_RTC_DRV_AS3722=m
-CONFIG_RTC_DRV_DS1307=m
-CONFIG_RTC_DRV_DS1307_CENTURY=y
-CONFIG_RTC_DRV_DS1374=m
-CONFIG_RTC_DRV_DS1374_WDT=y
-CONFIG_RTC_DRV_DS1672=m
-CONFIG_RTC_DRV_HYM8563=m
-CONFIG_RTC_DRV_LP8788=m
-CONFIG_RTC_DRV_MAX6900=m
-CONFIG_RTC_DRV_MAX8907=m
-CONFIG_RTC_DRV_MAX8925=m
-CONFIG_RTC_DRV_MAX8998=m
-CONFIG_RTC_DRV_MAX8997=m
-CONFIG_RTC_DRV_MAX77686=m
-CONFIG_RTC_DRV_RK808=m
-CONFIG_RTC_DRV_RS5C372=m
-CONFIG_RTC_DRV_ISL1208=m
-CONFIG_RTC_DRV_ISL12022=m
-CONFIG_RTC_DRV_ISL12026=m
-CONFIG_RTC_DRV_X1205=m
-CONFIG_RTC_DRV_PCF8523=m
-CONFIG_RTC_DRV_PCF85063=m
-CONFIG_RTC_DRV_PCF85363=m
-CONFIG_RTC_DRV_PCF8563=m
-CONFIG_RTC_DRV_PCF8583=m
-CONFIG_RTC_DRV_M41T80=m
-CONFIG_RTC_DRV_M41T80_WDT=y
-CONFIG_RTC_DRV_BD70528=m
-CONFIG_RTC_DRV_BQ32K=m
-CONFIG_RTC_DRV_TWL4030=m
-CONFIG_RTC_DRV_PALMAS=m
-CONFIG_RTC_DRV_TPS6586X=m
-CONFIG_RTC_DRV_TPS65910=m
-CONFIG_RTC_DRV_TPS80031=m
-CONFIG_RTC_DRV_RC5T583=m
-CONFIG_RTC_DRV_S35390A=m
-CONFIG_RTC_DRV_FM3130=m
-CONFIG_RTC_DRV_RX8010=m
-CONFIG_RTC_DRV_RX8581=m
-CONFIG_RTC_DRV_RX8025=m
-CONFIG_RTC_DRV_EM3027=m
-CONFIG_RTC_DRV_RV3028=m
-CONFIG_RTC_DRV_RV8803=m
-CONFIG_RTC_DRV_S5M=m
-CONFIG_RTC_DRV_SD3078=m
-
-#
-# SPI RTC drivers
-#
-CONFIG_RTC_DRV_M41T93=m
-CONFIG_RTC_DRV_M41T94=m
-CONFIG_RTC_DRV_DS1302=m
-CONFIG_RTC_DRV_DS1305=m
-CONFIG_RTC_DRV_DS1343=m
-CONFIG_RTC_DRV_DS1347=m
-CONFIG_RTC_DRV_DS1390=m
-CONFIG_RTC_DRV_MAX6916=m
-CONFIG_RTC_DRV_R9701=m
-CONFIG_RTC_DRV_RX4581=m
-CONFIG_RTC_DRV_RX6110=m
-CONFIG_RTC_DRV_RS5C348=m
-CONFIG_RTC_DRV_MAX6902=m
-CONFIG_RTC_DRV_PCF2123=m
-CONFIG_RTC_DRV_MCP795=m
-CONFIG_RTC_I2C_AND_SPI=y
-
-#
-# SPI and I2C RTC drivers
-#
-CONFIG_RTC_DRV_DS3232=m
-CONFIG_RTC_DRV_DS3232_HWMON=y
-CONFIG_RTC_DRV_PCF2127=m
-CONFIG_RTC_DRV_RV3029C2=m
-CONFIG_RTC_DRV_RV3029_HWMON=y
-
-#
-# Platform RTC drivers
-#
-CONFIG_RTC_DRV_CMOS=y
-CONFIG_RTC_DRV_DS1286=m
-CONFIG_RTC_DRV_DS1511=m
-CONFIG_RTC_DRV_DS1553=m
-CONFIG_RTC_DRV_DS1685_FAMILY=m
-CONFIG_RTC_DRV_DS1685=y
-# CONFIG_RTC_DRV_DS1689 is not set
-# CONFIG_RTC_DRV_DS17285 is not set
-# CONFIG_RTC_DRV_DS17485 is not set
-# CONFIG_RTC_DRV_DS17885 is not set
-CONFIG_RTC_DRV_DS1742=m
-CONFIG_RTC_DRV_DS2404=m
-CONFIG_RTC_DRV_DA9052=m
-CONFIG_RTC_DRV_DA9055=m
-CONFIG_RTC_DRV_DA9063=m
-CONFIG_RTC_DRV_STK17TA8=m
-CONFIG_RTC_DRV_M48T86=m
-CONFIG_RTC_DRV_M48T35=m
-CONFIG_RTC_DRV_M48T59=m
-CONFIG_RTC_DRV_MSM6242=m
-CONFIG_RTC_DRV_BQ4802=m
-CONFIG_RTC_DRV_RP5C01=m
-CONFIG_RTC_DRV_V3020=m
-CONFIG_RTC_DRV_WM831X=m
-CONFIG_RTC_DRV_WM8350=m
-CONFIG_RTC_DRV_PCF50633=m
-CONFIG_RTC_DRV_AB3100=m
-CONFIG_RTC_DRV_ZYNQMP=m
-CONFIG_RTC_DRV_CROS_EC=m
-
-#
-# on-CPU RTC drivers
-#
-CONFIG_RTC_DRV_CADENCE=m
-CONFIG_RTC_DRV_FTRTC010=m
-CONFIG_RTC_DRV_PCAP=m
-CONFIG_RTC_DRV_MC13XXX=m
-CONFIG_RTC_DRV_SNVS=m
-CONFIG_RTC_DRV_MT6397=m
-CONFIG_RTC_DRV_R7301=m
-CONFIG_RTC_DRV_CPCAP=m
-
-#
-# HID Sensor RTC drivers
-#
-CONFIG_RTC_DRV_HID_SENSOR_TIME=m
-CONFIG_RTC_DRV_WILCO_EC=m
-CONFIG_DMADEVICES=y
-# CONFIG_DMADEVICES_DEBUG is not set
-
-#
-# DMA Devices
-#
-CONFIG_DMA_ENGINE=y
-CONFIG_DMA_VIRTUAL_CHANNELS=y
-CONFIG_DMA_ACPI=y
-CONFIG_DMA_OF=y
-CONFIG_ALTERA_MSGDMA=m
-CONFIG_DW_AXI_DMAC=m
-CONFIG_FSL_EDMA=m
-CONFIG_INTEL_IDMA64=m
-CONFIG_INTEL_IOATDMA=m
-CONFIG_INTEL_MIC_X100_DMA=m
-CONFIG_QCOM_HIDMA_MGMT=m
-CONFIG_QCOM_HIDMA=m
-CONFIG_DW_DMAC_CORE=y
-CONFIG_DW_DMAC=y
-CONFIG_DW_DMAC_PCI=y
-CONFIG_DW_EDMA=m
-CONFIG_DW_EDMA_PCIE=m
-CONFIG_HSU_DMA=y
-
-#
-# DMA Clients
-#
-CONFIG_ASYNC_TX_DMA=y
-# CONFIG_DMATEST is not set
-CONFIG_DMA_ENGINE_RAID=y
-
-#
-# DMABUF options
-#
-CONFIG_SYNC_FILE=y
-# CONFIG_SW_SYNC is not set
-CONFIG_UDMABUF=y
-# CONFIG_DMABUF_SELFTESTS is not set
-# end of DMABUF options
-
-CONFIG_DCA=m
-CONFIG_AUXDISPLAY=y
-CONFIG_HD44780=m
-CONFIG_KS0108=m
-CONFIG_KS0108_PORT=0x378
-CONFIG_KS0108_DELAY=2
-CONFIG_CFAG12864B=m
-CONFIG_CFAG12864B_RATE=20
-CONFIG_IMG_ASCII_LCD=m
-CONFIG_HT16K33=m
-CONFIG_PARPORT_PANEL=m
-CONFIG_PANEL_PARPORT=0
-CONFIG_PANEL_PROFILE=5
-# CONFIG_PANEL_CHANGE_MESSAGE is not set
-# CONFIG_CHARLCD_BL_OFF is not set
-# CONFIG_CHARLCD_BL_ON is not set
-CONFIG_CHARLCD_BL_FLASH=y
-CONFIG_PANEL=m
-CONFIG_CHARLCD=m
-CONFIG_UIO=m
-CONFIG_UIO_CIF=m
-CONFIG_UIO_PDRV_GENIRQ=m
-CONFIG_UIO_DMEM_GENIRQ=m
-CONFIG_UIO_AEC=m
-CONFIG_UIO_SERCOS3=m
-CONFIG_UIO_PCI_GENERIC=m
-CONFIG_UIO_NETX=m
-CONFIG_UIO_PRUSS=m
-CONFIG_UIO_MF624=m
-CONFIG_UIO_HV_GENERIC=m
-CONFIG_VFIO_IOMMU_TYPE1=m
-CONFIG_VFIO_VIRQFD=m
-CONFIG_VFIO=m
-# CONFIG_VFIO_NOIOMMU is not set
-CONFIG_VFIO_PCI=m
-CONFIG_VFIO_PCI_VGA=y
-CONFIG_VFIO_PCI_MMAP=y
-CONFIG_VFIO_PCI_INTX=y
-CONFIG_VFIO_PCI_IGD=y
-CONFIG_VFIO_MDEV=m
-CONFIG_VFIO_MDEV_DEVICE=m
-CONFIG_IRQ_BYPASS_MANAGER=m
-CONFIG_VIRT_DRIVERS=y
-CONFIG_VBOXGUEST=m
-CONFIG_VIRTIO=m
-CONFIG_VIRTIO_MENU=y
-CONFIG_VIRTIO_PCI=m
-CONFIG_VIRTIO_PCI_LEGACY=y
-CONFIG_VIRTIO_PMEM=m
-CONFIG_VIRTIO_BALLOON=m
-CONFIG_VIRTIO_INPUT=m
-CONFIG_VIRTIO_MMIO=m
-CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y
-
-#
-# Microsoft Hyper-V guest support
-#
-CONFIG_HYPERV=m
-CONFIG_HYPERV_TIMER=y
-CONFIG_HYPERV_UTILS=m
-CONFIG_HYPERV_BALLOON=m
-# end of Microsoft Hyper-V guest support
-
-#
-# Xen driver support
-#
-CONFIG_XEN_BALLOON=y
-CONFIG_XEN_BALLOON_MEMORY_HOTPLUG=y
-CONFIG_XEN_BALLOON_MEMORY_HOTPLUG_LIMIT=512
-CONFIG_XEN_SCRUB_PAGES_DEFAULT=y
-CONFIG_XEN_DEV_EVTCHN=m
-CONFIG_XEN_BACKEND=y
-CONFIG_XENFS=m
-CONFIG_XEN_COMPAT_XENFS=y
-CONFIG_XEN_SYS_HYPERVISOR=y
-CONFIG_XEN_XENBUS_FRONTEND=y
-CONFIG_XEN_GNTDEV=m
-CONFIG_XEN_GNTDEV_DMABUF=y
-CONFIG_XEN_GRANT_DEV_ALLOC=m
-CONFIG_XEN_GRANT_DMA_ALLOC=y
-CONFIG_SWIOTLB_XEN=y
-CONFIG_XEN_PCIDEV_BACKEND=m
-CONFIG_XEN_PVCALLS_FRONTEND=m
-CONFIG_XEN_PVCALLS_BACKEND=y
-CONFIG_XEN_SCSI_BACKEND=m
-CONFIG_XEN_PRIVCMD=m
-CONFIG_XEN_ACPI_PROCESSOR=m
-CONFIG_XEN_MCE_LOG=y
-CONFIG_XEN_HAVE_PVMMU=y
-CONFIG_XEN_EFI=y
-CONFIG_XEN_AUTO_XLATE=y
-CONFIG_XEN_ACPI=y
-CONFIG_XEN_SYMS=y
-CONFIG_XEN_HAVE_VPMU=y
-CONFIG_XEN_FRONT_PGDIR_SHBUF=m
-# end of Xen driver support
-
-# CONFIG_GREYBUS is not set
-CONFIG_STAGING=y
-CONFIG_PRISM2_USB=m
-CONFIG_COMEDI=m
-# CONFIG_COMEDI_DEBUG is not set
-CONFIG_COMEDI_DEFAULT_BUF_SIZE_KB=2048
-CONFIG_COMEDI_DEFAULT_BUF_MAXSIZE_KB=20480
-CONFIG_COMEDI_MISC_DRIVERS=y
-CONFIG_COMEDI_BOND=m
-CONFIG_COMEDI_TEST=m
-CONFIG_COMEDI_PARPORT=m
-# CONFIG_COMEDI_ISA_DRIVERS is not set
-CONFIG_COMEDI_PCI_DRIVERS=m
-CONFIG_COMEDI_8255_PCI=m
-CONFIG_COMEDI_ADDI_WATCHDOG=m
-CONFIG_COMEDI_ADDI_APCI_1032=m
-CONFIG_COMEDI_ADDI_APCI_1500=m
-CONFIG_COMEDI_ADDI_APCI_1516=m
-CONFIG_COMEDI_ADDI_APCI_1564=m
-CONFIG_COMEDI_ADDI_APCI_16XX=m
-CONFIG_COMEDI_ADDI_APCI_2032=m
-CONFIG_COMEDI_ADDI_APCI_2200=m
-CONFIG_COMEDI_ADDI_APCI_3120=m
-CONFIG_COMEDI_ADDI_APCI_3501=m
-CONFIG_COMEDI_ADDI_APCI_3XXX=m
-CONFIG_COMEDI_ADL_PCI6208=m
-CONFIG_COMEDI_ADL_PCI7X3X=m
-CONFIG_COMEDI_ADL_PCI8164=m
-CONFIG_COMEDI_ADL_PCI9111=m
-CONFIG_COMEDI_ADL_PCI9118=m
-CONFIG_COMEDI_ADV_PCI1710=m
-CONFIG_COMEDI_ADV_PCI1720=m
-CONFIG_COMEDI_ADV_PCI1723=m
-CONFIG_COMEDI_ADV_PCI1724=m
-CONFIG_COMEDI_ADV_PCI1760=m
-CONFIG_COMEDI_ADV_PCI_DIO=m
-CONFIG_COMEDI_AMPLC_DIO200_PCI=m
-CONFIG_COMEDI_AMPLC_PC236_PCI=m
-CONFIG_COMEDI_AMPLC_PC263_PCI=m
-CONFIG_COMEDI_AMPLC_PCI224=m
-CONFIG_COMEDI_AMPLC_PCI230=m
-CONFIG_COMEDI_CONTEC_PCI_DIO=m
-CONFIG_COMEDI_DAS08_PCI=m
-CONFIG_COMEDI_DT3000=m
-CONFIG_COMEDI_DYNA_PCI10XX=m
-CONFIG_COMEDI_GSC_HPDI=m
-CONFIG_COMEDI_MF6X4=m
-CONFIG_COMEDI_ICP_MULTI=m
-CONFIG_COMEDI_DAQBOARD2000=m
-CONFIG_COMEDI_JR3_PCI=m
-CONFIG_COMEDI_KE_COUNTER=m
-CONFIG_COMEDI_CB_PCIDAS64=m
-CONFIG_COMEDI_CB_PCIDAS=m
-CONFIG_COMEDI_CB_PCIDDA=m
-CONFIG_COMEDI_CB_PCIMDAS=m
-CONFIG_COMEDI_CB_PCIMDDA=m
-CONFIG_COMEDI_ME4000=m
-CONFIG_COMEDI_ME_DAQ=m
-CONFIG_COMEDI_NI_6527=m
-CONFIG_COMEDI_NI_65XX=m
-CONFIG_COMEDI_NI_660X=m
-CONFIG_COMEDI_NI_670X=m
-CONFIG_COMEDI_NI_LABPC_PCI=m
-CONFIG_COMEDI_NI_PCIDIO=m
-CONFIG_COMEDI_NI_PCIMIO=m
-CONFIG_COMEDI_RTD520=m
-CONFIG_COMEDI_S626=m
-CONFIG_COMEDI_MITE=m
-CONFIG_COMEDI_NI_TIOCMD=m
-CONFIG_COMEDI_PCMCIA_DRIVERS=m
-CONFIG_COMEDI_CB_DAS16_CS=m
-CONFIG_COMEDI_DAS08_CS=m
-CONFIG_COMEDI_NI_DAQ_700_CS=m
-CONFIG_COMEDI_NI_DAQ_DIO24_CS=m
-CONFIG_COMEDI_NI_LABPC_CS=m
-CONFIG_COMEDI_NI_MIO_CS=m
-CONFIG_COMEDI_QUATECH_DAQP_CS=m
-CONFIG_COMEDI_USB_DRIVERS=m
-CONFIG_COMEDI_DT9812=m
-CONFIG_COMEDI_NI_USB6501=m
-CONFIG_COMEDI_USBDUX=m
-CONFIG_COMEDI_USBDUXFAST=m
-CONFIG_COMEDI_USBDUXSIGMA=m
-CONFIG_COMEDI_VMK80XX=m
-CONFIG_COMEDI_8254=m
-CONFIG_COMEDI_8255=m
-CONFIG_COMEDI_8255_SA=m
-CONFIG_COMEDI_KCOMEDILIB=m
-CONFIG_COMEDI_AMPLC_DIO200=m
-CONFIG_COMEDI_AMPLC_PC236=m
-CONFIG_COMEDI_DAS08=m
-CONFIG_COMEDI_NI_LABPC=m
-CONFIG_COMEDI_NI_TIO=m
-CONFIG_COMEDI_NI_ROUTING=m
-CONFIG_RTL8192U=m
-CONFIG_RTLLIB=m
-CONFIG_RTLLIB_CRYPTO_CCMP=m
-CONFIG_RTLLIB_CRYPTO_TKIP=m
-CONFIG_RTLLIB_CRYPTO_WEP=m
-CONFIG_RTL8192E=m
-CONFIG_RTL8723BS=m
-CONFIG_R8712U=m
-CONFIG_R8188EU=m
-CONFIG_88EU_AP_MODE=y
-CONFIG_RTS5208=m
-CONFIG_VT6655=m
-CONFIG_VT6656=m
-
-#
-# IIO staging drivers
-#
-
-#
-# Accelerometers
-#
-CONFIG_ADIS16203=m
-CONFIG_ADIS16240=m
-# end of Accelerometers
-
-#
-# Analog to digital converters
-#
-CONFIG_AD7816=m
-CONFIG_AD7192=m
-CONFIG_AD7280=m
-# end of Analog to digital converters
-
-#
-# Analog digital bi-direction converters
-#
-CONFIG_ADT7316=m
-CONFIG_ADT7316_SPI=m
-CONFIG_ADT7316_I2C=m
-# end of Analog digital bi-direction converters
-
-#
-# Capacitance to digital converters
-#
-CONFIG_AD7150=m
-CONFIG_AD7746=m
-# end of Capacitance to digital converters
-
-#
-# Direct Digital Synthesis
-#
-CONFIG_AD9832=m
-CONFIG_AD9834=m
-# end of Direct Digital Synthesis
-
-#
-# Network Analyzer, Impedance Converters
-#
-CONFIG_AD5933=m
-# end of Network Analyzer, Impedance Converters
-
-#
-# Active energy metering IC
-#
-CONFIG_ADE7854=m
-CONFIG_ADE7854_I2C=m
-CONFIG_ADE7854_SPI=m
-# end of Active energy metering IC
-
-#
-# Resolver to digital converters
-#
-CONFIG_AD2S1210=m
-# end of Resolver to digital converters
-# end of IIO staging drivers
-
-# CONFIG_FB_SM750 is not set
-
-#
-# Speakup console speech
-#
-CONFIG_SPEAKUP=m
-CONFIG_SPEAKUP_SYNTH_ACNTSA=m
-CONFIG_SPEAKUP_SYNTH_APOLLO=m
-CONFIG_SPEAKUP_SYNTH_AUDPTR=m
-CONFIG_SPEAKUP_SYNTH_BNS=m
-CONFIG_SPEAKUP_SYNTH_DECTLK=m
-CONFIG_SPEAKUP_SYNTH_DECEXT=m
-CONFIG_SPEAKUP_SYNTH_LTLK=m
-CONFIG_SPEAKUP_SYNTH_SOFT=m
-CONFIG_SPEAKUP_SYNTH_SPKOUT=m
-CONFIG_SPEAKUP_SYNTH_TXPRT=m
-CONFIG_SPEAKUP_SYNTH_DUMMY=m
-# end of Speakup console speech
-
-CONFIG_STAGING_MEDIA=y
-CONFIG_VIDEO_IPU3_IMGU=m
-
-#
-# soc_camera sensor drivers
-#
-
-#
-# Android
-#
-# end of Android
-
-CONFIG_STAGING_BOARD=y
-CONFIG_LTE_GDM724X=m
-CONFIG_FIREWIRE_SERIAL=m
-CONFIG_FWTTY_MAX_TOTAL_PORTS=64
-CONFIG_FWTTY_MAX_CARD_PORTS=32
-CONFIG_GS_FPGABOOT=m
-CONFIG_UNISYSSPAR=y
-CONFIG_UNISYS_VISORNIC=m
-CONFIG_UNISYS_VISORINPUT=m
-CONFIG_UNISYS_VISORHBA=m
-CONFIG_COMMON_CLK_XLNX_CLKWZRD=m
-# CONFIG_FB_TFT is not set
-CONFIG_WILC1000=m
-CONFIG_WILC1000_SDIO=m
-CONFIG_WILC1000_SPI=m
-# CONFIG_WILC1000_HW_OOB_INTR is not set
-CONFIG_MOST=m
-CONFIG_MOST_CDEV=m
-CONFIG_MOST_NET=m
-CONFIG_MOST_SOUND=m
-CONFIG_MOST_VIDEO=m
-CONFIG_MOST_DIM2=m
-CONFIG_MOST_I2C=m
-CONFIG_MOST_USB=m
-CONFIG_KS7010=m
-CONFIG_PI433=m
-
-#
-# Gasket devices
-#
-CONFIG_STAGING_GASKET_FRAMEWORK=m
-CONFIG_STAGING_APEX_DRIVER=m
-# end of Gasket devices
-
-CONFIG_XIL_AXIS_FIFO=m
-CONFIG_FIELDBUS_DEV=m
-CONFIG_HMS_ANYBUSS_BUS=m
-CONFIG_ARCX_ANYBUS_CONTROLLER=m
-CONFIG_HMS_PROFINET=m
-CONFIG_KPC2000=y
-CONFIG_KPC2000_CORE=m
-CONFIG_KPC2000_SPI=m
-CONFIG_KPC2000_I2C=m
-CONFIG_KPC2000_DMA=m
-
-#
-# ISDN CAPI drivers
-#
-CONFIG_CAPI_AVM=y
-CONFIG_ISDN_DRV_AVMB1_B1PCI=m
-CONFIG_ISDN_DRV_AVMB1_B1PCIV4=y
-CONFIG_ISDN_DRV_AVMB1_B1PCMCIA=m
-CONFIG_ISDN_DRV_AVMB1_AVM_CS=m
-CONFIG_ISDN_DRV_AVMB1_T1PCI=m
-CONFIG_ISDN_DRV_AVMB1_C4=m
-CONFIG_ISDN_DRV_GIGASET=m
-CONFIG_GIGASET_CAPI=y
-CONFIG_GIGASET_BASE=m
-CONFIG_GIGASET_M105=m
-CONFIG_GIGASET_M101=m
-# CONFIG_GIGASET_DEBUG is not set
-CONFIG_HYSDN=m
-CONFIG_HYSDN_CAPI=y
-# end of ISDN CAPI drivers
-
-CONFIG_USB_WUSB=m
-CONFIG_USB_WUSB_CBAF=m
-# CONFIG_USB_WUSB_CBAF_DEBUG is not set
-CONFIG_USB_WHCI_HCD=m
-CONFIG_USB_HWA_HCD=m
-CONFIG_UWB=m
-CONFIG_UWB_HWA=m
-CONFIG_UWB_WHCI=m
-CONFIG_UWB_I1480U=m
-# CONFIG_EXFAT_FS is not set
-CONFIG_QLGE=m
-CONFIG_X86_PLATFORM_DEVICES=y
-CONFIG_ACER_WMI=m
-CONFIG_ACER_WIRELESS=m
-CONFIG_ACERHDF=m
-CONFIG_ALIENWARE_WMI=m
-CONFIG_ASUS_LAPTOP=m
-CONFIG_DCDBAS=m
-CONFIG_DELL_SMBIOS=m
-CONFIG_DELL_SMBIOS_WMI=y
-CONFIG_DELL_SMBIOS_SMM=y
-CONFIG_DELL_LAPTOP=m
-CONFIG_DELL_WMI=m
-CONFIG_DELL_WMI_DESCRIPTOR=m
-CONFIG_DELL_WMI_AIO=m
-CONFIG_DELL_WMI_LED=m
-CONFIG_DELL_SMO8800=m
-CONFIG_DELL_RBTN=m
-# CONFIG_DELL_RBU is not set
-CONFIG_FUJITSU_LAPTOP=m
-CONFIG_FUJITSU_TABLET=m
-CONFIG_AMILO_RFKILL=m
-CONFIG_GPD_POCKET_FAN=m
-CONFIG_HP_ACCEL=m
-CONFIG_HP_WIRELESS=m
-CONFIG_HP_WMI=m
-CONFIG_LG_LAPTOP=m
-CONFIG_MSI_LAPTOP=m
-CONFIG_PANASONIC_LAPTOP=m
-CONFIG_COMPAL_LAPTOP=m
-CONFIG_SONY_LAPTOP=m
-CONFIG_SONYPI_COMPAT=y
-CONFIG_IDEAPAD_LAPTOP=m
-CONFIG_SURFACE3_WMI=m
-CONFIG_THINKPAD_ACPI=m
-CONFIG_THINKPAD_ACPI_ALSA_SUPPORT=y
-# CONFIG_THINKPAD_ACPI_DEBUGFACILITIES is not set
-# CONFIG_THINKPAD_ACPI_DEBUG is not set
-# CONFIG_THINKPAD_ACPI_UNSAFE_LEDS is not set
-CONFIG_THINKPAD_ACPI_VIDEO=y
-CONFIG_THINKPAD_ACPI_HOTKEY_POLL=y
-CONFIG_SENSORS_HDAPS=m
-CONFIG_INTEL_MENLOW=m
-CONFIG_EEEPC_LAPTOP=m
-CONFIG_ASUS_WMI=m
-CONFIG_ASUS_NB_WMI=m
-CONFIG_EEEPC_WMI=m
-CONFIG_ASUS_WIRELESS=m
-CONFIG_ACPI_WMI=m
-CONFIG_WMI_BMOF=m
-CONFIG_INTEL_WMI_THUNDERBOLT=m
-CONFIG_XIAOMI_WMI=m
-CONFIG_MSI_WMI=m
-CONFIG_PEAQ_WMI=m
-CONFIG_TOPSTAR_LAPTOP=m
-CONFIG_ACPI_TOSHIBA=m
-CONFIG_TOSHIBA_BT_RFKILL=m
-CONFIG_TOSHIBA_HAPS=m
-CONFIG_TOSHIBA_WMI=m
-CONFIG_ACPI_CMPC=m
-CONFIG_INTEL_CHT_INT33FE=m
-CONFIG_INTEL_INT0002_VGPIO=m
-CONFIG_INTEL_HID_EVENT=m
-CONFIG_INTEL_VBTN=m
-CONFIG_INTEL_IPS=m
-CONFIG_INTEL_PMC_CORE=y
-CONFIG_IBM_RTL=m
-CONFIG_SAMSUNG_LAPTOP=m
-CONFIG_MXM_WMI=m
-CONFIG_INTEL_OAKTRAIL=m
-CONFIG_SAMSUNG_Q10=m
-CONFIG_APPLE_GMUX=m
-CONFIG_INTEL_RST=m
-CONFIG_INTEL_SMARTCONNECT=m
-CONFIG_INTEL_PMC_IPC=m
-CONFIG_INTEL_BXTWC_PMIC_TMU=m
-CONFIG_SURFACE_PRO3_BUTTON=m
-CONFIG_SURFACE_3_BUTTON=m
-CONFIG_INTEL_PUNIT_IPC=m
-CONFIG_INTEL_TELEMETRY=m
-CONFIG_MLX_PLATFORM=m
-CONFIG_INTEL_TURBO_MAX_3=y
-CONFIG_TOUCHSCREEN_DMI=y
-CONFIG_INTEL_CHTDC_TI_PWRBTN=m
-CONFIG_I2C_MULTI_INSTANTIATE=m
-CONFIG_INTEL_ATOMISP2_PM=m
-CONFIG_HUAWEI_WMI=m
-CONFIG_PCENGINES_APU2=m
-
-#
-# Intel Speed Select Technology interface support
-#
-CONFIG_INTEL_SPEED_SELECT_INTERFACE=m
-# end of Intel Speed Select Technology interface support
-
-CONFIG_PMC_ATOM=y
-CONFIG_MFD_CROS_EC=m
-CONFIG_CHROME_PLATFORMS=y
-CONFIG_CHROMEOS_LAPTOP=m
-CONFIG_CHROMEOS_PSTORE=m
-CONFIG_CHROMEOS_TBMC=m
-CONFIG_CROS_EC=m
-CONFIG_CROS_EC_I2C=m
-CONFIG_CROS_EC_RPMSG=m
-CONFIG_CROS_EC_ISHTP=m
-CONFIG_CROS_EC_SPI=m
-CONFIG_CROS_EC_LPC=m
-CONFIG_CROS_EC_PROTO=y
-CONFIG_CROS_KBD_LED_BACKLIGHT=m
-CONFIG_CROS_EC_CHARDEV=m
-CONFIG_CROS_EC_LIGHTBAR=m
-CONFIG_CROS_EC_VBC=m
-# CONFIG_CROS_EC_DEBUGFS is not set
-CONFIG_CROS_EC_SYSFS=m
-CONFIG_CROS_USBPD_LOGGER=m
-CONFIG_WILCO_EC=m
-# CONFIG_WILCO_EC_DEBUGFS is not set
-CONFIG_WILCO_EC_EVENTS=m
-CONFIG_WILCO_EC_TELEMETRY=m
-CONFIG_MELLANOX_PLATFORM=y
-CONFIG_MLXREG_HOTPLUG=m
-CONFIG_MLXREG_IO=m
-CONFIG_CLKDEV_LOOKUP=y
-CONFIG_HAVE_CLK_PREPARE=y
-CONFIG_COMMON_CLK=y
-
-#
-# Common Clock Framework
-#
-CONFIG_COMMON_CLK_WM831X=m
-CONFIG_CLK_HSDK=y
-CONFIG_COMMON_CLK_MAX77686=m
-CONFIG_COMMON_CLK_MAX9485=m
-CONFIG_COMMON_CLK_RK808=m
-CONFIG_COMMON_CLK_SI5341=m
-CONFIG_COMMON_CLK_SI5351=m
-CONFIG_COMMON_CLK_SI514=m
-CONFIG_COMMON_CLK_SI544=m
-CONFIG_COMMON_CLK_SI570=m
-CONFIG_COMMON_CLK_CDCE706=m
-CONFIG_COMMON_CLK_CDCE925=m
-CONFIG_COMMON_CLK_CS2000_CP=m
-CONFIG_COMMON_CLK_S2MPS11=m
-CONFIG_CLK_TWL6040=m
-CONFIG_COMMON_CLK_LOCHNAGAR=m
-CONFIG_COMMON_CLK_PALMAS=m
-CONFIG_COMMON_CLK_PWM=m
-CONFIG_COMMON_CLK_VC5=m
-CONFIG_COMMON_CLK_BD718XX=m
-CONFIG_COMMON_CLK_FIXED_MMIO=y
-# end of Common Clock Framework
-
-CONFIG_HWSPINLOCK=y
-
-#
-# Clock Source drivers
-#
-CONFIG_CLKEVT_I8253=y
-CONFIG_I8253_LOCK=y
-CONFIG_CLKBLD_I8253=y
-# end of Clock Source drivers
-
-CONFIG_MAILBOX=y
-CONFIG_PLATFORM_MHU=m
-CONFIG_PCC=y
-CONFIG_ALTERA_MBOX=m
-CONFIG_MAILBOX_TEST=m
-CONFIG_IOMMU_IOVA=y
-CONFIG_IOMMU_API=y
-CONFIG_IOMMU_SUPPORT=y
-
-#
-# Generic IOMMU Pagetable Support
-#
-# end of Generic IOMMU Pagetable Support
-
-# CONFIG_IOMMU_DEBUGFS is not set
-# CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set
-CONFIG_OF_IOMMU=y
-CONFIG_AMD_IOMMU=y
-CONFIG_AMD_IOMMU_V2=y
-CONFIG_DMAR_TABLE=y
-CONFIG_INTEL_IOMMU=y
-CONFIG_INTEL_IOMMU_SVM=y
-# CONFIG_INTEL_IOMMU_DEFAULT_ON is not set
-CONFIG_INTEL_IOMMU_FLOPPY_WA=y
-CONFIG_IRQ_REMAP=y
-CONFIG_HYPERV_IOMMU=y
-
-#
-# Remoteproc drivers
-#
-# CONFIG_REMOTEPROC is not set
-# end of Remoteproc drivers
-
-#
-# Rpmsg drivers
-#
-CONFIG_RPMSG=m
-CONFIG_RPMSG_CHAR=m
-CONFIG_RPMSG_QCOM_GLINK_NATIVE=m
-CONFIG_RPMSG_QCOM_GLINK_RPM=m
-CONFIG_RPMSG_VIRTIO=m
-# end of Rpmsg drivers
-
-CONFIG_SOUNDWIRE=y
-
-#
-# SoundWire Devices
-#
-CONFIG_SOUNDWIRE_CADENCE=m
-CONFIG_SOUNDWIRE_INTEL=m
-
-#
-# SOC (System On Chip) specific Drivers
-#
-
-#
-# Amlogic SoC drivers
-#
-# end of Amlogic SoC drivers
-
-#
-# Aspeed SoC drivers
-#
-# end of Aspeed SoC drivers
-
-#
-# Broadcom SoC drivers
-#
-# end of Broadcom SoC drivers
-
-#
-# NXP/Freescale QorIQ SoC drivers
-#
-# end of NXP/Freescale QorIQ SoC drivers
-
-#
-# i.MX SoC drivers
-#
-# end of i.MX SoC drivers
-
-#
-# Qualcomm SoC drivers
-#
-# end of Qualcomm SoC drivers
-
-CONFIG_SOC_TI=y
-
-#
-# Xilinx SoC drivers
-#
-CONFIG_XILINX_VCU=m
-# end of Xilinx SoC drivers
-# end of SOC (System On Chip) specific Drivers
-
-CONFIG_PM_DEVFREQ=y
-
-#
-# DEVFREQ Governors
-#
-CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=m
-CONFIG_DEVFREQ_GOV_PERFORMANCE=m
-CONFIG_DEVFREQ_GOV_POWERSAVE=m
-CONFIG_DEVFREQ_GOV_USERSPACE=m
-CONFIG_DEVFREQ_GOV_PASSIVE=m
-
-#
-# DEVFREQ Drivers
-#
-CONFIG_PM_DEVFREQ_EVENT=y
-CONFIG_EXTCON=y
-
-#
-# Extcon Device Drivers
-#
-CONFIG_EXTCON_ADC_JACK=m
-CONFIG_EXTCON_ARIZONA=m
-CONFIG_EXTCON_AXP288=m
-CONFIG_EXTCON_FSA9480=m
-CONFIG_EXTCON_GPIO=m
-CONFIG_EXTCON_INTEL_INT3496=m
-CONFIG_EXTCON_INTEL_CHT_WC=m
-CONFIG_EXTCON_MAX14577=m
-CONFIG_EXTCON_MAX3355=m
-CONFIG_EXTCON_MAX77693=m
-CONFIG_EXTCON_MAX77843=m
-CONFIG_EXTCON_MAX8997=m
-CONFIG_EXTCON_PALMAS=m
-CONFIG_EXTCON_PTN5150=m
-CONFIG_EXTCON_RT8973A=m
-CONFIG_EXTCON_SM5502=m
-CONFIG_EXTCON_USB_GPIO=m
-CONFIG_EXTCON_USBC_CROS_EC=m
-CONFIG_MEMORY=y
-CONFIG_IIO=m
-CONFIG_IIO_BUFFER=y
-CONFIG_IIO_BUFFER_CB=m
-CONFIG_IIO_BUFFER_HW_CONSUMER=m
-CONFIG_IIO_KFIFO_BUF=m
-CONFIG_IIO_TRIGGERED_BUFFER=m
-CONFIG_IIO_CONFIGFS=m
-CONFIG_IIO_TRIGGER=y
-CONFIG_IIO_CONSUMERS_PER_TRIGGER=2
-CONFIG_IIO_SW_DEVICE=m
-CONFIG_IIO_SW_TRIGGER=m
-CONFIG_IIO_TRIGGERED_EVENT=m
-
-#
-# Accelerometers
-#
-CONFIG_ADIS16201=m
-CONFIG_ADIS16209=m
-CONFIG_ADXL372=m
-CONFIG_ADXL372_SPI=m
-CONFIG_ADXL372_I2C=m
-CONFIG_BMA180=m
-CONFIG_BMA220=m
-CONFIG_BMC150_ACCEL=m
-CONFIG_BMC150_ACCEL_I2C=m
-CONFIG_BMC150_ACCEL_SPI=m
-CONFIG_DA280=m
-CONFIG_DA311=m
-CONFIG_DMARD06=m
-CONFIG_DMARD09=m
-CONFIG_DMARD10=m
-CONFIG_HID_SENSOR_ACCEL_3D=m
-CONFIG_IIO_CROS_EC_ACCEL_LEGACY=m
-CONFIG_IIO_ST_ACCEL_3AXIS=m
-CONFIG_IIO_ST_ACCEL_I2C_3AXIS=m
-CONFIG_IIO_ST_ACCEL_SPI_3AXIS=m
-CONFIG_KXSD9=m
-CONFIG_KXSD9_SPI=m
-CONFIG_KXSD9_I2C=m
-CONFIG_KXCJK1013=m
-CONFIG_MC3230=m
-CONFIG_MMA7455=m
-CONFIG_MMA7455_I2C=m
-CONFIG_MMA7455_SPI=m
-CONFIG_MMA7660=m
-CONFIG_MMA8452=m
-CONFIG_MMA9551_CORE=m
-CONFIG_MMA9551=m
-CONFIG_MMA9553=m
-CONFIG_MXC4005=m
-CONFIG_MXC6255=m
-CONFIG_SCA3000=m
-CONFIG_STK8312=m
-CONFIG_STK8BA50=m
-# end of Accelerometers
-
-#
-# Analog to digital converters
-#
-CONFIG_AD_SIGMA_DELTA=m
-CONFIG_AD7124=m
-CONFIG_AD7266=m
-CONFIG_AD7291=m
-CONFIG_AD7298=m
-CONFIG_AD7476=m
-CONFIG_AD7606=m
-CONFIG_AD7606_IFACE_PARALLEL=m
-CONFIG_AD7606_IFACE_SPI=m
-CONFIG_AD7766=m
-CONFIG_AD7768_1=m
-CONFIG_AD7780=m
-CONFIG_AD7791=m
-CONFIG_AD7793=m
-CONFIG_AD7887=m
-CONFIG_AD7923=m
-CONFIG_AD7949=m
-CONFIG_AD799X=m
-CONFIG_AXP20X_ADC=m
-CONFIG_AXP288_ADC=m
-CONFIG_CC10001_ADC=m
-CONFIG_CPCAP_ADC=m
-CONFIG_DA9150_GPADC=m
-CONFIG_DLN2_ADC=m
-CONFIG_ENVELOPE_DETECTOR=m
-CONFIG_HI8435=m
-CONFIG_HX711=m
-CONFIG_INA2XX_ADC=m
-CONFIG_LP8788_ADC=m
-CONFIG_LTC2471=m
-CONFIG_LTC2485=m
-CONFIG_LTC2497=m
-CONFIG_MAX1027=m
-CONFIG_MAX11100=m
-CONFIG_MAX1118=m
-CONFIG_MAX1363=m
-CONFIG_MAX9611=m
-CONFIG_MCP320X=m
-CONFIG_MCP3422=m
-CONFIG_MCP3911=m
-CONFIG_MEN_Z188_ADC=m
-CONFIG_NAU7802=m
-CONFIG_PALMAS_GPADC=m
-CONFIG_QCOM_VADC_COMMON=m
-CONFIG_QCOM_SPMI_IADC=m
-CONFIG_QCOM_SPMI_VADC=m
-CONFIG_QCOM_SPMI_ADC5=m
-CONFIG_SD_ADC_MODULATOR=m
-CONFIG_STMPE_ADC=m
-CONFIG_TI_ADC081C=m
-CONFIG_TI_ADC0832=m
-CONFIG_TI_ADC084S021=m
-CONFIG_TI_ADC12138=m
-CONFIG_TI_ADC108S102=m
-CONFIG_TI_ADC128S052=m
-CONFIG_TI_ADC161S626=m
-CONFIG_TI_ADS1015=m
-CONFIG_TI_ADS7950=m
-CONFIG_TI_ADS8344=m
-CONFIG_TI_ADS8688=m
-CONFIG_TI_ADS124S08=m
-CONFIG_TI_AM335X_ADC=m
-CONFIG_TI_TLC4541=m
-CONFIG_TWL4030_MADC=m
-CONFIG_TWL6030_GPADC=m
-CONFIG_VF610_ADC=m
-CONFIG_VIPERBOARD_ADC=m
-CONFIG_XILINX_XADC=m
-# end of Analog to digital converters
-
-#
-# Analog Front Ends
-#
-CONFIG_IIO_RESCALE=m
-# end of Analog Front Ends
-
-#
-# Amplifiers
-#
-CONFIG_AD8366=m
-# end of Amplifiers
-
-#
-# Chemical Sensors
-#
-CONFIG_ATLAS_PH_SENSOR=m
-CONFIG_BME680=m
-CONFIG_BME680_I2C=m
-CONFIG_BME680_SPI=m
-CONFIG_CCS811=m
-CONFIG_IAQCORE=m
-CONFIG_PMS7003=m
-CONFIG_SENSIRION_SGP30=m
-CONFIG_SPS30=m
-CONFIG_VZ89X=m
-# end of Chemical Sensors
-
-CONFIG_IIO_CROS_EC_SENSORS_CORE=m
-CONFIG_IIO_CROS_EC_SENSORS=m
-CONFIG_IIO_CROS_EC_SENSORS_LID_ANGLE=m
-
-#
-# Hid Sensor IIO Common
-#
-CONFIG_HID_SENSOR_IIO_COMMON=m
-CONFIG_HID_SENSOR_IIO_TRIGGER=m
-# end of Hid Sensor IIO Common
-
-CONFIG_IIO_MS_SENSORS_I2C=m
-
-#
-# SSP Sensor Common
-#
-CONFIG_IIO_SSP_SENSORS_COMMONS=m
-CONFIG_IIO_SSP_SENSORHUB=m
-# end of SSP Sensor Common
-
-CONFIG_IIO_ST_SENSORS_I2C=m
-CONFIG_IIO_ST_SENSORS_SPI=m
-CONFIG_IIO_ST_SENSORS_CORE=m
-
-#
-# Digital to analog converters
-#
-CONFIG_AD5064=m
-CONFIG_AD5360=m
-CONFIG_AD5380=m
-CONFIG_AD5421=m
-CONFIG_AD5446=m
-CONFIG_AD5449=m
-CONFIG_AD5592R_BASE=m
-CONFIG_AD5592R=m
-CONFIG_AD5593R=m
-CONFIG_AD5504=m
-CONFIG_AD5624R_SPI=m
-CONFIG_LTC1660=m
-CONFIG_LTC2632=m
-CONFIG_AD5686=m
-CONFIG_AD5686_SPI=m
-CONFIG_AD5696_I2C=m
-CONFIG_AD5755=m
-CONFIG_AD5758=m
-CONFIG_AD5761=m
-CONFIG_AD5764=m
-CONFIG_AD5791=m
-CONFIG_AD7303=m
-CONFIG_AD8801=m
-CONFIG_DPOT_DAC=m
-CONFIG_DS4424=m
-CONFIG_M62332=m
-CONFIG_MAX517=m
-CONFIG_MAX5821=m
-CONFIG_MCP4725=m
-CONFIG_MCP4922=m
-CONFIG_TI_DAC082S085=m
-CONFIG_TI_DAC5571=m
-CONFIG_TI_DAC7311=m
-CONFIG_TI_DAC7612=m
-CONFIG_VF610_DAC=m
-# end of Digital to analog converters
-
-#
-# IIO dummy driver
-#
-# CONFIG_IIO_SIMPLE_DUMMY is not set
-# end of IIO dummy driver
-
-#
-# Frequency Synthesizers DDS/PLL
-#
-
-#
-# Clock Generator/Distribution
-#
-CONFIG_AD9523=m
-# end of Clock Generator/Distribution
-
-#
-# Phase-Locked Loop (PLL) frequency synthesizers
-#
-CONFIG_ADF4350=m
-CONFIG_ADF4371=m
-# end of Phase-Locked Loop (PLL) frequency synthesizers
-# end of Frequency Synthesizers DDS/PLL
-
-#
-# Digital gyroscope sensors
-#
-CONFIG_ADIS16080=m
-CONFIG_ADIS16130=m
-CONFIG_ADIS16136=m
-CONFIG_ADIS16260=m
-CONFIG_ADXRS450=m
-CONFIG_BMG160=m
-CONFIG_BMG160_I2C=m
-CONFIG_BMG160_SPI=m
-CONFIG_FXAS21002C=m
-CONFIG_FXAS21002C_I2C=m
-CONFIG_FXAS21002C_SPI=m
-CONFIG_HID_SENSOR_GYRO_3D=m
-CONFIG_MPU3050=m
-CONFIG_MPU3050_I2C=m
-CONFIG_IIO_ST_GYRO_3AXIS=m
-CONFIG_IIO_ST_GYRO_I2C_3AXIS=m
-CONFIG_IIO_ST_GYRO_SPI_3AXIS=m
-CONFIG_ITG3200=m
-# end of Digital gyroscope sensors
-
-#
-# Health Sensors
-#
-
-#
-# Heart Rate Monitors
-#
-CONFIG_AFE4403=m
-CONFIG_AFE4404=m
-CONFIG_MAX30100=m
-CONFIG_MAX30102=m
-# end of Heart Rate Monitors
-# end of Health Sensors
-
-#
-# Humidity sensors
-#
-CONFIG_AM2315=m
-CONFIG_DHT11=m
-CONFIG_HDC100X=m
-CONFIG_HID_SENSOR_HUMIDITY=m
-CONFIG_HTS221=m
-CONFIG_HTS221_I2C=m
-CONFIG_HTS221_SPI=m
-CONFIG_HTU21=m
-CONFIG_SI7005=m
-CONFIG_SI7020=m
-# end of Humidity sensors
-
-#
-# Inertial measurement units
-#
-CONFIG_ADIS16400=m
-# CONFIG_ADIS16460 is not set
-CONFIG_ADIS16480=m
-CONFIG_BMI160=m
-CONFIG_BMI160_I2C=m
-CONFIG_BMI160_SPI=m
-CONFIG_KMX61=m
-CONFIG_INV_MPU6050_IIO=m
-CONFIG_INV_MPU6050_I2C=m
-CONFIG_INV_MPU6050_SPI=m
-CONFIG_IIO_ST_LSM6DSX=m
-CONFIG_IIO_ST_LSM6DSX_I2C=m
-CONFIG_IIO_ST_LSM6DSX_SPI=m
-CONFIG_IIO_ST_LSM6DSX_I3C=m
-# end of Inertial measurement units
-
-CONFIG_IIO_ADIS_LIB=m
-CONFIG_IIO_ADIS_LIB_BUFFER=y
-
-#
-# Light sensors
-#
-CONFIG_ACPI_ALS=m
-CONFIG_ADJD_S311=m
-CONFIG_AL3320A=m
-CONFIG_APDS9300=m
-CONFIG_APDS9960=m
-CONFIG_BH1750=m
-CONFIG_BH1780=m
-CONFIG_CM32181=m
-CONFIG_CM3232=m
-CONFIG_CM3323=m
-CONFIG_CM3605=m
-CONFIG_CM36651=m
-CONFIG_IIO_CROS_EC_LIGHT_PROX=m
-CONFIG_GP2AP020A00F=m
-CONFIG_SENSORS_ISL29018=m
-CONFIG_SENSORS_ISL29028=m
-CONFIG_ISL29125=m
-CONFIG_HID_SENSOR_ALS=m
-CONFIG_HID_SENSOR_PROX=m
-CONFIG_JSA1212=m
-CONFIG_RPR0521=m
-CONFIG_SENSORS_LM3533=m
-CONFIG_LTR501=m
-CONFIG_LV0104CS=m
-CONFIG_MAX44000=m
-CONFIG_MAX44009=m
-# CONFIG_NOA1305 is not set
-CONFIG_OPT3001=m
-CONFIG_PA12203001=m
-CONFIG_SI1133=m
-CONFIG_SI1145=m
-CONFIG_STK3310=m
-CONFIG_ST_UVIS25=m
-CONFIG_ST_UVIS25_I2C=m
-CONFIG_ST_UVIS25_SPI=m
-CONFIG_TCS3414=m
-CONFIG_TCS3472=m
-CONFIG_SENSORS_TSL2563=m
-CONFIG_TSL2583=m
-CONFIG_TSL2772=m
-CONFIG_TSL4531=m
-CONFIG_US5182D=m
-CONFIG_VCNL4000=m
-CONFIG_VCNL4035=m
-CONFIG_VEML6070=m
-CONFIG_VL6180=m
-CONFIG_ZOPT2201=m
-# end of Light sensors
-
-#
-# Magnetometer sensors
-#
-CONFIG_AK8974=m
-CONFIG_AK8975=m
-CONFIG_AK09911=m
-CONFIG_BMC150_MAGN=m
-CONFIG_BMC150_MAGN_I2C=m
-CONFIG_BMC150_MAGN_SPI=m
-CONFIG_MAG3110=m
-CONFIG_HID_SENSOR_MAGNETOMETER_3D=m
-CONFIG_MMC35240=m
-CONFIG_IIO_ST_MAGN_3AXIS=m
-CONFIG_IIO_ST_MAGN_I2C_3AXIS=m
-CONFIG_IIO_ST_MAGN_SPI_3AXIS=m
-CONFIG_SENSORS_HMC5843=m
-CONFIG_SENSORS_HMC5843_I2C=m
-CONFIG_SENSORS_HMC5843_SPI=m
-CONFIG_SENSORS_RM3100=m
-CONFIG_SENSORS_RM3100_I2C=m
-CONFIG_SENSORS_RM3100_SPI=m
-# end of Magnetometer sensors
-
-#
-# Multiplexers
-#
-CONFIG_IIO_MUX=m
-# end of Multiplexers
-
-#
-# Inclinometer sensors
-#
-CONFIG_HID_SENSOR_INCLINOMETER_3D=m
-CONFIG_HID_SENSOR_DEVICE_ROTATION=m
-# end of Inclinometer sensors
-
-#
-# Triggers - standalone
-#
-CONFIG_IIO_HRTIMER_TRIGGER=m
-CONFIG_IIO_INTERRUPT_TRIGGER=m
-CONFIG_IIO_TIGHTLOOP_TRIGGER=m
-CONFIG_IIO_SYSFS_TRIGGER=m
-# end of Triggers - standalone
-
-#
-# Digital potentiometers
-#
-CONFIG_AD5272=m
-CONFIG_DS1803=m
-# CONFIG_MAX5432 is not set
-CONFIG_MAX5481=m
-CONFIG_MAX5487=m
-CONFIG_MCP4018=m
-CONFIG_MCP4131=m
-CONFIG_MCP4531=m
-CONFIG_MCP41010=m
-CONFIG_TPL0102=m
-# end of Digital potentiometers
-
-#
-# Digital potentiostats
-#
-CONFIG_LMP91000=m
-# end of Digital potentiostats
-
-#
-# Pressure sensors
-#
-CONFIG_ABP060MG=m
-CONFIG_BMP280=m
-CONFIG_BMP280_I2C=m
-CONFIG_BMP280_SPI=m
-CONFIG_IIO_CROS_EC_BARO=m
-CONFIG_DPS310=m
-CONFIG_HID_SENSOR_PRESS=m
-CONFIG_HP03=m
-CONFIG_MPL115=m
-CONFIG_MPL115_I2C=m
-CONFIG_MPL115_SPI=m
-CONFIG_MPL3115=m
-CONFIG_MS5611=m
-CONFIG_MS5611_I2C=m
-CONFIG_MS5611_SPI=m
-CONFIG_MS5637=m
-CONFIG_IIO_ST_PRESS=m
-CONFIG_IIO_ST_PRESS_I2C=m
-CONFIG_IIO_ST_PRESS_SPI=m
-CONFIG_T5403=m
-CONFIG_HP206C=m
-CONFIG_ZPA2326=m
-CONFIG_ZPA2326_I2C=m
-CONFIG_ZPA2326_SPI=m
-# end of Pressure sensors
-
-#
-# Lightning sensors
-#
-CONFIG_AS3935=m
-# end of Lightning sensors
-
-#
-# Proximity and distance sensors
-#
-CONFIG_ISL29501=m
-CONFIG_LIDAR_LITE_V2=m
-CONFIG_MB1232=m
-CONFIG_RFD77402=m
-CONFIG_SRF04=m
-CONFIG_SX9500=m
-CONFIG_SRF08=m
-CONFIG_VL53L0X_I2C=m
-# end of Proximity and distance sensors
-
-#
-# Resolver to digital converters
-#
-CONFIG_AD2S90=m
-CONFIG_AD2S1200=m
-# end of Resolver to digital converters
-
-#
-# Temperature sensors
-#
-CONFIG_MAXIM_THERMOCOUPLE=m
-CONFIG_HID_SENSOR_TEMP=m
-CONFIG_MLX90614=m
-CONFIG_MLX90632=m
-CONFIG_TMP006=m
-CONFIG_TMP007=m
-CONFIG_TSYS01=m
-CONFIG_TSYS02D=m
-CONFIG_MAX31856=m
-# end of Temperature sensors
-
-CONFIG_NTB=m
-CONFIG_NTB_MSI=y
-CONFIG_NTB_AMD=m
-CONFIG_NTB_IDT=m
-CONFIG_NTB_INTEL=m
-CONFIG_NTB_SWITCHTEC=m
-# CONFIG_NTB_PINGPONG is not set
-# CONFIG_NTB_TOOL is not set
-# CONFIG_NTB_PERF is not set
-# CONFIG_NTB_MSI_TEST is not set
-CONFIG_NTB_TRANSPORT=m
-CONFIG_VME_BUS=y
-
-#
-# VME Bridge Drivers
-#
-CONFIG_VME_CA91CX42=m
-CONFIG_VME_TSI148=m
-# CONFIG_VME_FAKE is not set
-
-#
-# VME Board Drivers
-#
-CONFIG_VMIVME_7805=m
-
-#
-# VME Device Drivers
-#
-CONFIG_VME_USER=m
-CONFIG_PWM=y
-CONFIG_PWM_SYSFS=y
-CONFIG_PWM_ATMEL_HLCDC_PWM=m
-CONFIG_PWM_CRC=y
-CONFIG_PWM_CROS_EC=m
-CONFIG_PWM_FSL_FTM=m
-CONFIG_PWM_LP3943=m
-CONFIG_PWM_LPSS=m
-CONFIG_PWM_LPSS_PCI=m
-CONFIG_PWM_LPSS_PLATFORM=m
-CONFIG_PWM_PCA9685=m
-CONFIG_PWM_STMPE=y
-CONFIG_PWM_TWL=m
-CONFIG_PWM_TWL_LED=m
-
-#
-# IRQ chip support
-#
-CONFIG_IRQCHIP=y
-CONFIG_AL_FIC=y
-CONFIG_MADERA_IRQ=m
-# end of IRQ chip support
-
-CONFIG_IPACK_BUS=m
-CONFIG_BOARD_TPCI200=m
-CONFIG_SERIAL_IPOCTAL=m
-CONFIG_RESET_CONTROLLER=y
-CONFIG_RESET_TI_SYSCON=m
-
-#
-# PHY Subsystem
-#
-CONFIG_GENERIC_PHY=y
-CONFIG_GENERIC_PHY_MIPI_DPHY=y
-CONFIG_BCM_KONA_USB2_PHY=m
-CONFIG_PHY_CADENCE_DP=m
-CONFIG_PHY_CADENCE_DPHY=m
-CONFIG_PHY_CADENCE_SIERRA=m
-CONFIG_PHY_FSL_IMX8MQ_USB=m
-CONFIG_PHY_MIXEL_MIPI_DPHY=m
-CONFIG_PHY_PXA_28NM_HSIC=m
-CONFIG_PHY_PXA_28NM_USB2=m
-CONFIG_PHY_CPCAP_USB=m
-CONFIG_PHY_MAPPHONE_MDM6600=m
-CONFIG_PHY_OCELOT_SERDES=m
-CONFIG_PHY_QCOM_USB_HS=m
-CONFIG_PHY_QCOM_USB_HSIC=m
-CONFIG_PHY_SAMSUNG_USB2=m
-CONFIG_PHY_TUSB1210=m
-# end of PHY Subsystem
-
-CONFIG_POWERCAP=y
-CONFIG_INTEL_RAPL_CORE=m
-CONFIG_INTEL_RAPL=m
-CONFIG_IDLE_INJECT=y
-CONFIG_MCB=m
-CONFIG_MCB_PCI=m
-CONFIG_MCB_LPC=m
-
-#
-# Performance monitor support
-#
-# end of Performance monitor support
-
-CONFIG_RAS=y
-CONFIG_RAS_CEC=y
-# CONFIG_RAS_CEC_DEBUG is not set
-CONFIG_THUNDERBOLT=m
-
-#
-# Android
-#
-# CONFIG_ANDROID is not set
-# end of Android
-
-CONFIG_LIBNVDIMM=y
-CONFIG_BLK_DEV_PMEM=m
-CONFIG_ND_BLK=m
-CONFIG_ND_CLAIM=y
-CONFIG_ND_BTT=m
-CONFIG_BTT=y
-CONFIG_ND_PFN=m
-CONFIG_NVDIMM_PFN=y
-CONFIG_NVDIMM_DAX=y
-CONFIG_OF_PMEM=m
-CONFIG_DAX_DRIVER=y
-CONFIG_DAX=y
-CONFIG_DEV_DAX=m
-CONFIG_DEV_DAX_PMEM=m
-CONFIG_DEV_DAX_KMEM=m
-CONFIG_DEV_DAX_PMEM_COMPAT=m
-CONFIG_NVMEM=y
-CONFIG_NVMEM_SYSFS=y
-CONFIG_RAVE_SP_EEPROM=m
-
-#
-# HW tracing support
-#
-CONFIG_STM=m
-CONFIG_STM_PROTO_BASIC=m
-CONFIG_STM_PROTO_SYS_T=m
-# CONFIG_STM_DUMMY is not set
-CONFIG_STM_SOURCE_CONSOLE=m
-CONFIG_STM_SOURCE_HEARTBEAT=m
-CONFIG_STM_SOURCE_FTRACE=m
-CONFIG_INTEL_TH=m
-CONFIG_INTEL_TH_PCI=m
-CONFIG_INTEL_TH_ACPI=m
-CONFIG_INTEL_TH_GTH=m
-CONFIG_INTEL_TH_STH=m
-CONFIG_INTEL_TH_MSU=m
-CONFIG_INTEL_TH_PTI=m
-# CONFIG_INTEL_TH_DEBUG is not set
-# end of HW tracing support
-
-CONFIG_FPGA=m
-CONFIG_ALTERA_PR_IP_CORE=m
-CONFIG_ALTERA_PR_IP_CORE_PLAT=m
-CONFIG_FPGA_MGR_ALTERA_PS_SPI=m
-CONFIG_FPGA_MGR_ALTERA_CVP=m
-CONFIG_FPGA_MGR_XILINX_SPI=m
-CONFIG_FPGA_MGR_ICE40_SPI=m
-CONFIG_FPGA_MGR_MACHXO2_SPI=m
-CONFIG_FPGA_BRIDGE=m
-CONFIG_ALTERA_FREEZE_BRIDGE=m
-CONFIG_XILINX_PR_DECOUPLER=m
-CONFIG_FPGA_REGION=m
-CONFIG_OF_FPGA_REGION=m
-CONFIG_FPGA_DFL=m
-CONFIG_FPGA_DFL_FME=m
-CONFIG_FPGA_DFL_FME_MGR=m
-CONFIG_FPGA_DFL_FME_BRIDGE=m
-CONFIG_FPGA_DFL_FME_REGION=m
-CONFIG_FPGA_DFL_AFU=m
-CONFIG_FPGA_DFL_PCI=m
-CONFIG_FSI=m
-CONFIG_FSI_NEW_DEV_NODE=y
-CONFIG_FSI_MASTER_GPIO=m
-CONFIG_FSI_MASTER_HUB=m
-CONFIG_FSI_SCOM=m
-CONFIG_FSI_SBEFIFO=m
-CONFIG_FSI_OCC=m
-CONFIG_MULTIPLEXER=m
-
-#
-# Multiplexer drivers
-#
-CONFIG_MUX_ADG792A=m
-CONFIG_MUX_ADGS1408=m
-CONFIG_MUX_GPIO=m
-CONFIG_MUX_MMIO=m
-# end of Multiplexer drivers
-
-CONFIG_PM_OPP=y
-CONFIG_UNISYS_VISORBUS=m
-CONFIG_SIOX=m
-CONFIG_SIOX_BUS_GPIO=m
-CONFIG_SLIMBUS=m
-CONFIG_SLIM_QCOM_CTRL=m
-CONFIG_INTERCONNECT=m
-CONFIG_COUNTER=m
-CONFIG_FTM_QUADDEC=m
-# end of Device Drivers
-
-#
-# File systems
-#
-CONFIG_DCACHE_WORD_ACCESS=y
-CONFIG_VALIDATE_FS_PARSER=y
-CONFIG_FS_IOMAP=y
-# CONFIG_EXT2_FS is not set
-# CONFIG_EXT3_FS is not set
-CONFIG_EXT4_FS=m
-CONFIG_EXT4_USE_FOR_EXT2=y
-CONFIG_EXT4_FS_POSIX_ACL=y
-CONFIG_EXT4_FS_SECURITY=y
-# CONFIG_EXT4_DEBUG is not set
-CONFIG_JBD2=m
-# CONFIG_JBD2_DEBUG is not set
-CONFIG_FS_MBCACHE=m
-CONFIG_REISERFS_FS=m
-# CONFIG_REISERFS_CHECK is not set
-CONFIG_REISERFS_PROC_INFO=y
-CONFIG_REISERFS_FS_XATTR=y
-CONFIG_REISERFS_FS_POSIX_ACL=y
-CONFIG_REISERFS_FS_SECURITY=y
-CONFIG_JFS_FS=m
-CONFIG_JFS_POSIX_ACL=y
-CONFIG_JFS_SECURITY=y
-# CONFIG_JFS_DEBUG is not set
-CONFIG_JFS_STATISTICS=y
-CONFIG_XFS_FS=m
-CONFIG_XFS_QUOTA=y
-CONFIG_XFS_POSIX_ACL=y
-CONFIG_XFS_RT=y
-CONFIG_XFS_ONLINE_SCRUB=y
-CONFIG_XFS_ONLINE_REPAIR=y
-# CONFIG_XFS_WARN is not set
-# CONFIG_XFS_DEBUG is not set
-CONFIG_GFS2_FS=m
-CONFIG_GFS2_FS_LOCKING_DLM=y
-CONFIG_OCFS2_FS=m
-CONFIG_OCFS2_FS_O2CB=m
-CONFIG_OCFS2_FS_USERSPACE_CLUSTER=m
-CONFIG_OCFS2_FS_STATS=y
-CONFIG_OCFS2_DEBUG_MASKLOG=y
-# CONFIG_OCFS2_DEBUG_FS is not set
-CONFIG_BTRFS_FS=m
-CONFIG_BTRFS_FS_POSIX_ACL=y
-# CONFIG_BTRFS_FS_CHECK_INTEGRITY is not set
-# CONFIG_BTRFS_FS_RUN_SANITY_TESTS is not set
-# CONFIG_BTRFS_DEBUG is not set
-# CONFIG_BTRFS_ASSERT is not set
-# CONFIG_BTRFS_FS_REF_VERIFY is not set
-CONFIG_NILFS2_FS=m
-CONFIG_F2FS_FS=m
-CONFIG_F2FS_STAT_FS=y
-CONFIG_F2FS_FS_XATTR=y
-CONFIG_F2FS_FS_POSIX_ACL=y
-CONFIG_F2FS_FS_SECURITY=y
-CONFIG_F2FS_CHECK_FS=y
-# CONFIG_F2FS_IO_TRACE is not set
-# CONFIG_F2FS_FAULT_INJECTION is not set
-CONFIG_FS_DAX=y
-CONFIG_FS_DAX_PMD=y
-CONFIG_FS_POSIX_ACL=y
-CONFIG_EXPORTFS=y
-CONFIG_EXPORTFS_BLOCK_OPS=y
-CONFIG_FILE_LOCKING=y
-# CONFIG_MANDATORY_FILE_LOCKING is not set
-CONFIG_FS_ENCRYPTION=y
-# CONFIG_FS_VERITY is not set
-CONFIG_FSNOTIFY=y
-CONFIG_DNOTIFY=y
-CONFIG_INOTIFY_USER=y
-CONFIG_FANOTIFY=y
-CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
-CONFIG_QUOTA=y
-CONFIG_QUOTA_NETLINK_INTERFACE=y
-# CONFIG_PRINT_QUOTA_WARNING is not set
-# CONFIG_QUOTA_DEBUG is not set
-CONFIG_QUOTA_TREE=m
-CONFIG_QFMT_V1=m
-CONFIG_QFMT_V2=m
-CONFIG_QUOTACTL=y
-CONFIG_QUOTACTL_COMPAT=y
-CONFIG_AUTOFS4_FS=y
-CONFIG_AUTOFS_FS=y
-CONFIG_FUSE_FS=m
-CONFIG_CUSE=m
-# CONFIG_VIRTIO_FS is not set
-CONFIG_OVERLAY_FS=m
-CONFIG_OVERLAY_FS_REDIRECT_DIR=y
-# CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW is not set
-CONFIG_OVERLAY_FS_INDEX=y
-CONFIG_OVERLAY_FS_XINO_AUTO=y
-CONFIG_OVERLAY_FS_METACOPY=y
-
-#
-# Caches
-#
-CONFIG_FSCACHE=m
-CONFIG_FSCACHE_STATS=y
-CONFIG_FSCACHE_HISTOGRAM=y
-# CONFIG_FSCACHE_DEBUG is not set
-# CONFIG_FSCACHE_OBJECT_LIST is not set
-CONFIG_CACHEFILES=m
-# CONFIG_CACHEFILES_DEBUG is not set
-# CONFIG_CACHEFILES_HISTOGRAM is not set
-# end of Caches
-
-#
-# CD-ROM/DVD Filesystems
-#
-CONFIG_ISO9660_FS=m
-CONFIG_JOLIET=y
-CONFIG_ZISOFS=y
-CONFIG_UDF_FS=m
-# end of CD-ROM/DVD Filesystems
-
-#
-# DOS/FAT/NT Filesystems
-#
-CONFIG_FAT_FS=m
-CONFIG_MSDOS_FS=m
-CONFIG_VFAT_FS=m
-CONFIG_FAT_DEFAULT_CODEPAGE=437
-CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
-CONFIG_FAT_DEFAULT_UTF8=y
-CONFIG_NTFS_FS=m
-# CONFIG_NTFS_DEBUG is not set
-CONFIG_NTFS_RW=y
-# end of DOS/FAT/NT Filesystems
-
-#
-# Pseudo filesystems
-#
-CONFIG_PROC_FS=y
-# CONFIG_PROC_KCORE is not set
-# CONFIG_PROC_VMCORE is not set
-CONFIG_PROC_SYSCTL=y
-CONFIG_PROC_PAGE_MONITOR=y
-CONFIG_PROC_CHILDREN=y
-CONFIG_PROC_PID_ARCH_STATUS=y
-CONFIG_KERNFS=y
-CONFIG_SYSFS=y
-CONFIG_TMPFS=y
-CONFIG_TMPFS_POSIX_ACL=y
-CONFIG_TMPFS_XATTR=y
-CONFIG_HUGETLBFS=y
-CONFIG_HUGETLB_PAGE=y
-CONFIG_MEMFD_CREATE=y
-CONFIG_ARCH_HAS_GIGANTIC_PAGE=y
-CONFIG_CONFIGFS_FS=y
-CONFIG_EFIVAR_FS=y
-# end of Pseudo filesystems
-
-CONFIG_MISC_FILESYSTEMS=y
-CONFIG_ORANGEFS_FS=m
-# CONFIG_ADFS_FS is not set
-CONFIG_AFFS_FS=m
-CONFIG_ECRYPT_FS=m
-# CONFIG_ECRYPT_FS_MESSAGING is not set
-CONFIG_HFS_FS=m
-CONFIG_HFSPLUS_FS=m
-CONFIG_BEFS_FS=m
-# CONFIG_BEFS_DEBUG is not set
-# CONFIG_BFS_FS is not set
-# CONFIG_EFS_FS is not set
-CONFIG_JFFS2_FS=m
-CONFIG_JFFS2_FS_DEBUG=0
-CONFIG_JFFS2_FS_WRITEBUFFER=y
-# CONFIG_JFFS2_FS_WBUF_VERIFY is not set
-CONFIG_JFFS2_SUMMARY=y
-CONFIG_JFFS2_FS_XATTR=y
-CONFIG_JFFS2_FS_POSIX_ACL=y
-CONFIG_JFFS2_FS_SECURITY=y
-# CONFIG_JFFS2_COMPRESSION_OPTIONS is not set
-CONFIG_JFFS2_ZLIB=y
-CONFIG_JFFS2_RTIME=y
-CONFIG_UBIFS_FS=m
-# CONFIG_UBIFS_FS_ADVANCED_COMPR is not set
-CONFIG_UBIFS_FS_LZO=y
-CONFIG_UBIFS_FS_ZLIB=y
-CONFIG_UBIFS_FS_ZSTD=y
-CONFIG_UBIFS_ATIME_SUPPORT=y
-CONFIG_UBIFS_FS_XATTR=y
-CONFIG_UBIFS_FS_SECURITY=y
-CONFIG_UBIFS_FS_AUTHENTICATION=y
-CONFIG_CRAMFS=m
-CONFIG_CRAMFS_BLOCKDEV=y
-CONFIG_CRAMFS_MTD=y
-CONFIG_SQUASHFS=m
-# CONFIG_SQUASHFS_FILE_CACHE is not set
-CONFIG_SQUASHFS_FILE_DIRECT=y
-# CONFIG_SQUASHFS_DECOMP_SINGLE is not set
-CONFIG_SQUASHFS_DECOMP_MULTI=y
-# CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU is not set
-CONFIG_SQUASHFS_XATTR=y
-CONFIG_SQUASHFS_ZLIB=y
-CONFIG_SQUASHFS_LZ4=y
-CONFIG_SQUASHFS_LZO=y
-CONFIG_SQUASHFS_XZ=y
-CONFIG_SQUASHFS_ZSTD=y
-# CONFIG_SQUASHFS_4K_DEVBLK_SIZE is not set
-# CONFIG_SQUASHFS_EMBEDDED is not set
-CONFIG_SQUASHFS_FRAGMENT_CACHE_SIZE=3
-# CONFIG_VXFS_FS is not set
-CONFIG_MINIX_FS=m
-CONFIG_OMFS_FS=m
-# CONFIG_HPFS_FS is not set
-# CONFIG_QNX4FS_FS is not set
-# CONFIG_QNX6FS_FS is not set
-CONFIG_ROMFS_FS=m
-CONFIG_ROMFS_BACKED_BY_BLOCK=y
-# CONFIG_ROMFS_BACKED_BY_MTD is not set
-# CONFIG_ROMFS_BACKED_BY_BOTH is not set
-CONFIG_ROMFS_ON_BLOCK=y
-CONFIG_PSTORE=y
-CONFIG_PSTORE_DEFLATE_COMPRESS=m
-CONFIG_PSTORE_LZO_COMPRESS=m
-CONFIG_PSTORE_LZ4_COMPRESS=m
-CONFIG_PSTORE_LZ4HC_COMPRESS=m
-# CONFIG_PSTORE_842_COMPRESS is not set
-CONFIG_PSTORE_ZSTD_COMPRESS=y
-CONFIG_PSTORE_COMPRESS=y
-# CONFIG_PSTORE_DEFLATE_COMPRESS_DEFAULT is not set
-# CONFIG_PSTORE_LZO_COMPRESS_DEFAULT is not set
-# CONFIG_PSTORE_LZ4_COMPRESS_DEFAULT is not set
-# CONFIG_PSTORE_LZ4HC_COMPRESS_DEFAULT is not set
-CONFIG_PSTORE_ZSTD_COMPRESS_DEFAULT=y
-CONFIG_PSTORE_COMPRESS_DEFAULT="zstd"
-# CONFIG_PSTORE_CONSOLE is not set
-# CONFIG_PSTORE_PMSG is not set
-# CONFIG_PSTORE_FTRACE is not set
-CONFIG_PSTORE_RAM=y
-# CONFIG_SYSV_FS is not set
-CONFIG_UFS_FS=m
-# CONFIG_UFS_FS_WRITE is not set
-# CONFIG_UFS_DEBUG is not set
-CONFIG_EROFS_FS=m
-# CONFIG_EROFS_FS_DEBUG is not set
-CONFIG_EROFS_FS_XATTR=y
-CONFIG_EROFS_FS_POSIX_ACL=y
-CONFIG_EROFS_FS_SECURITY=y
-CONFIG_EROFS_FS_ZIP=y
-CONFIG_EROFS_FS_CLUSTER_PAGE_LIMIT=2
-CONFIG_NETWORK_FILESYSTEMS=y
-CONFIG_NFS_FS=m
-CONFIG_NFS_V2=m
-CONFIG_NFS_V3=m
-CONFIG_NFS_V3_ACL=y
-CONFIG_NFS_V4=m
-CONFIG_NFS_SWAP=y
-CONFIG_NFS_V4_1=y
-CONFIG_NFS_V4_2=y
-CONFIG_PNFS_FILE_LAYOUT=m
-CONFIG_PNFS_BLOCK=m
-CONFIG_PNFS_FLEXFILE_LAYOUT=m
-CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN="kernel.org"
-CONFIG_NFS_V4_1_MIGRATION=y
-CONFIG_NFS_V4_SECURITY_LABEL=y
-CONFIG_NFS_FSCACHE=y
-# CONFIG_NFS_USE_LEGACY_DNS is not set
-CONFIG_NFS_USE_KERNEL_DNS=y
-CONFIG_NFSD=m
-CONFIG_NFSD_V2_ACL=y
-CONFIG_NFSD_V3=y
-CONFIG_NFSD_V3_ACL=y
-CONFIG_NFSD_V4=y
-CONFIG_NFSD_PNFS=y
-CONFIG_NFSD_BLOCKLAYOUT=y
-CONFIG_NFSD_SCSILAYOUT=y
-# CONFIG_NFSD_FLEXFILELAYOUT is not set
-CONFIG_NFSD_V4_SECURITY_LABEL=y
-CONFIG_GRACE_PERIOD=m
-CONFIG_LOCKD=m
-CONFIG_LOCKD_V4=y
-CONFIG_NFS_ACL_SUPPORT=m
-CONFIG_NFS_COMMON=y
-CONFIG_SUNRPC=m
-CONFIG_SUNRPC_GSS=m
-CONFIG_SUNRPC_BACKCHANNEL=y
-CONFIG_SUNRPC_SWAP=y
-CONFIG_RPCSEC_GSS_KRB5=m
-CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES=y
-CONFIG_SUNRPC_DEBUG=y
-CONFIG_SUNRPC_XPRT_RDMA=m
-CONFIG_CEPH_FS=m
-CONFIG_CEPH_FSCACHE=y
-CONFIG_CEPH_FS_POSIX_ACL=y
-CONFIG_CEPH_FS_SECURITY_LABEL=y
-CONFIG_CIFS=m
-# CONFIG_CIFS_STATS2 is not set
-# CONFIG_CIFS_ALLOW_INSECURE_LEGACY is not set
-CONFIG_CIFS_UPCALL=y
-CONFIG_CIFS_XATTR=y
-CONFIG_CIFS_DEBUG=y
-# CONFIG_CIFS_DEBUG2 is not set
-# CONFIG_CIFS_DEBUG_DUMP_KEYS is not set
-CONFIG_CIFS_DFS_UPCALL=y
-# CONFIG_CIFS_SMB_DIRECT is not set
-CONFIG_CIFS_FSCACHE=y
-CONFIG_CODA_FS=m
-CONFIG_AFS_FS=m
-# CONFIG_AFS_DEBUG is not set
-CONFIG_AFS_FSCACHE=y
-# CONFIG_AFS_DEBUG_CURSOR is not set
-CONFIG_9P_FS=m
-CONFIG_9P_FSCACHE=y
-CONFIG_9P_FS_POSIX_ACL=y
-CONFIG_9P_FS_SECURITY=y
-CONFIG_NLS=y
-CONFIG_NLS_DEFAULT="utf8"
-CONFIG_NLS_CODEPAGE_437=m
-CONFIG_NLS_CODEPAGE_737=m
-CONFIG_NLS_CODEPAGE_775=m
-CONFIG_NLS_CODEPAGE_850=m
-CONFIG_NLS_CODEPAGE_852=m
-CONFIG_NLS_CODEPAGE_855=m
-CONFIG_NLS_CODEPAGE_857=m
-CONFIG_NLS_CODEPAGE_860=m
-CONFIG_NLS_CODEPAGE_861=m
-CONFIG_NLS_CODEPAGE_862=m
-CONFIG_NLS_CODEPAGE_863=m
-CONFIG_NLS_CODEPAGE_864=m
-CONFIG_NLS_CODEPAGE_865=m
-CONFIG_NLS_CODEPAGE_866=m
-CONFIG_NLS_CODEPAGE_869=m
-CONFIG_NLS_CODEPAGE_936=m
-CONFIG_NLS_CODEPAGE_950=m
-CONFIG_NLS_CODEPAGE_932=m
-CONFIG_NLS_CODEPAGE_949=m
-CONFIG_NLS_CODEPAGE_874=m
-CONFIG_NLS_ISO8859_8=m
-CONFIG_NLS_CODEPAGE_1250=m
-CONFIG_NLS_CODEPAGE_1251=m
-CONFIG_NLS_ASCII=m
-CONFIG_NLS_ISO8859_1=m
-CONFIG_NLS_ISO8859_2=m
-CONFIG_NLS_ISO8859_3=m
-CONFIG_NLS_ISO8859_4=m
-CONFIG_NLS_ISO8859_5=m
-CONFIG_NLS_ISO8859_6=m
-CONFIG_NLS_ISO8859_7=m
-CONFIG_NLS_ISO8859_9=m
-CONFIG_NLS_ISO8859_13=m
-CONFIG_NLS_ISO8859_14=m
-CONFIG_NLS_ISO8859_15=m
-CONFIG_NLS_KOI8_R=m
-CONFIG_NLS_KOI8_U=m
-CONFIG_NLS_MAC_ROMAN=m
-CONFIG_NLS_MAC_CELTIC=m
-CONFIG_NLS_MAC_CENTEURO=m
-CONFIG_NLS_MAC_CROATIAN=m
-CONFIG_NLS_MAC_CYRILLIC=m
-CONFIG_NLS_MAC_GAELIC=m
-CONFIG_NLS_MAC_GREEK=m
-CONFIG_NLS_MAC_ICELAND=m
-CONFIG_NLS_MAC_INUIT=m
-CONFIG_NLS_MAC_ROMANIAN=m
-CONFIG_NLS_MAC_TURKISH=m
-CONFIG_NLS_UTF8=m
-CONFIG_DLM=m
-# CONFIG_DLM_DEBUG is not set
-CONFIG_UNICODE=y
-# CONFIG_UNICODE_NORMALIZATION_SELFTEST is not set
-# end of File systems
-
-#
-# Security options
-#
-CONFIG_KEYS=y
-CONFIG_KEYS_COMPAT=y
-CONFIG_KEYS_REQUEST_CACHE=y
-CONFIG_PERSISTENT_KEYRINGS=y
-CONFIG_BIG_KEYS=y
-CONFIG_TRUSTED_KEYS=m
-CONFIG_ENCRYPTED_KEYS=m
-CONFIG_KEY_DH_OPERATIONS=y
-CONFIG_SECURITY_DMESG_RESTRICT=y
-CONFIG_SECURITY_PERF_EVENTS_RESTRICT=y
-CONFIG_SECURITY_TIOCSTI_RESTRICT=y
-CONFIG_SECURITY=y
-CONFIG_SECURITYFS=y
-CONFIG_SECURITY_NETWORK=y
-CONFIG_PAGE_TABLE_ISOLATION=y
-CONFIG_SECURITY_INFINIBAND=y
-CONFIG_SECURITY_NETWORK_XFRM=y
-CONFIG_SECURITY_PATH=y
-# CONFIG_INTEL_TXT is not set
-CONFIG_LSM_MMAP_MIN_ADDR=65536
-CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR=y
-CONFIG_HARDENED_USERCOPY=y
-# CONFIG_HARDENED_USERCOPY_FALLBACK is not set
-# CONFIG_HARDENED_USERCOPY_PAGESPAN is not set
-CONFIG_FORTIFY_SOURCE=y
-# CONFIG_FORTIFY_SOURCE_STRICT_STRING is not set
-# CONFIG_STATIC_USERMODEHELPER is not set
-CONFIG_SECURITY_SELINUX=y
-CONFIG_SECURITY_SELINUX_BOOTPARAM=y
-# CONFIG_SECURITY_SELINUX_DISABLE is not set
-CONFIG_SECURITY_SELINUX_DEVELOP=y
-CONFIG_SECURITY_SELINUX_AVC_STATS=y
-CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=0
-CONFIG_SECURITY_SMACK=y
-CONFIG_SECURITY_SMACK_BRINGUP=y
-CONFIG_SECURITY_SMACK_NETFILTER=y
-CONFIG_SECURITY_SMACK_APPEND_SIGNALS=y
-CONFIG_SECURITY_TOMOYO=y
-CONFIG_SECURITY_TOMOYO_MAX_ACCEPT_ENTRY=2048
-CONFIG_SECURITY_TOMOYO_MAX_AUDIT_LOG=1024
-# CONFIG_SECURITY_TOMOYO_OMIT_USERSPACE_LOADER is not set
-CONFIG_SECURITY_TOMOYO_POLICY_LOADER="/sbin/tomoyo-init"
-CONFIG_SECURITY_TOMOYO_ACTIVATION_TRIGGER="/sbin/init"
-# CONFIG_SECURITY_TOMOYO_INSECURE_BUILTIN_SETTING is not set
-CONFIG_SECURITY_APPARMOR=y
-CONFIG_SECURITY_APPARMOR_HASH=y
-CONFIG_SECURITY_APPARMOR_HASH_DEFAULT=y
-# CONFIG_SECURITY_APPARMOR_DEBUG is not set
-# CONFIG_SECURITY_LOADPIN is not set
-CONFIG_SECURITY_YAMA=y
-CONFIG_SECURITY_SAFESETID=y
-# CONFIG_SECURITY_LOCKDOWN_LSM is not set
-# CONFIG_INTEGRITY is not set
-# CONFIG_DEFAULT_SECURITY_SELINUX is not set
-# CONFIG_DEFAULT_SECURITY_SMACK is not set
-# CONFIG_DEFAULT_SECURITY_TOMOYO is not set
-# CONFIG_DEFAULT_SECURITY_APPARMOR is not set
-CONFIG_DEFAULT_SECURITY_DAC=y
-CONFIG_LSM="yama"
-
-#
-# Kernel hardening options
-#
-CONFIG_GCC_PLUGIN_STRUCTLEAK=y
-
-#
-# Memory initialization
-#
-# CONFIG_INIT_STACK_NONE is not set
-# CONFIG_GCC_PLUGIN_STRUCTLEAK_USER is not set
-# CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF is not set
-CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL=y
-# CONFIG_GCC_PLUGIN_STRUCTLEAK_VERBOSE is not set
-CONFIG_GCC_PLUGIN_STACKLEAK=y
-CONFIG_STACKLEAK_TRACK_MIN_SIZE=100
-# CONFIG_STACKLEAK_METRICS is not set
-# CONFIG_STACKLEAK_RUNTIME_DISABLE is not set
-CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y
-CONFIG_INIT_ON_FREE_DEFAULT_ON=y
-CONFIG_PAGE_SANITIZE_VERIFY=y
-CONFIG_SLAB_SANITIZE_VERIFY=y
-# end of Memory initialization
-# end of Kernel hardening options
-# end of Security options
-
-CONFIG_XOR_BLOCKS=m
-CONFIG_ASYNC_CORE=m
-CONFIG_ASYNC_MEMCPY=m
-CONFIG_ASYNC_XOR=m
-CONFIG_ASYNC_PQ=m
-CONFIG_ASYNC_RAID6_RECOV=m
-CONFIG_CRYPTO=y
-
-#
-# Crypto core or helper
-#
-CONFIG_CRYPTO_ALGAPI=y
-CONFIG_CRYPTO_ALGAPI2=y
-CONFIG_CRYPTO_AEAD=y
-CONFIG_CRYPTO_AEAD2=y
-CONFIG_CRYPTO_BLKCIPHER=y
-CONFIG_CRYPTO_BLKCIPHER2=y
-CONFIG_CRYPTO_HASH=y
-CONFIG_CRYPTO_HASH2=y
-CONFIG_CRYPTO_RNG=y
-CONFIG_CRYPTO_RNG2=y
-CONFIG_CRYPTO_RNG_DEFAULT=y
-CONFIG_CRYPTO_AKCIPHER2=y
-CONFIG_CRYPTO_AKCIPHER=y
-CONFIG_CRYPTO_KPP2=y
-CONFIG_CRYPTO_KPP=y
-CONFIG_CRYPTO_ACOMP2=y
-CONFIG_CRYPTO_MANAGER=y
-CONFIG_CRYPTO_MANAGER2=y
-CONFIG_CRYPTO_USER=m
-CONFIG_CRYPTO_MANAGER_DISABLE_TESTS=y
-CONFIG_CRYPTO_GF128MUL=y
-CONFIG_CRYPTO_NULL=y
-CONFIG_CRYPTO_NULL2=y
-CONFIG_CRYPTO_PCRYPT=m
-CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_AUTHENC=m
-CONFIG_CRYPTO_TEST=m
-CONFIG_CRYPTO_SIMD=m
-CONFIG_CRYPTO_GLUE_HELPER_X86=m
-CONFIG_CRYPTO_ENGINE=m
-
-#
-# Public-key cryptography
-#
-CONFIG_CRYPTO_RSA=y
-CONFIG_CRYPTO_DH=y
-CONFIG_CRYPTO_ECC=m
-CONFIG_CRYPTO_ECDH=m
-CONFIG_CRYPTO_ECRDSA=m
-
-#
-# Authenticated Encryption with Associated Data
-#
-CONFIG_CRYPTO_CCM=m
-CONFIG_CRYPTO_GCM=y
-CONFIG_CRYPTO_CHACHA20POLY1305=m
-CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128_AESNI_SSE2=m
-CONFIG_CRYPTO_SEQIV=y
-CONFIG_CRYPTO_ECHAINIV=m
-
-#
-# Block modes
-#
-CONFIG_CRYPTO_CBC=y
-CONFIG_CRYPTO_CFB=m
-CONFIG_CRYPTO_CTR=y
-CONFIG_CRYPTO_CTS=y
-CONFIG_CRYPTO_ECB=y
-CONFIG_CRYPTO_LRW=m
-CONFIG_CRYPTO_OFB=m
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_XTS=y
-CONFIG_CRYPTO_KEYWRAP=m
-CONFIG_CRYPTO_NHPOLY1305=m
-CONFIG_CRYPTO_NHPOLY1305_SSE2=m
-CONFIG_CRYPTO_NHPOLY1305_AVX2=m
-CONFIG_CRYPTO_ADIANTUM=m
-CONFIG_CRYPTO_ESSIV=m
-
-#
-# Hash modes
-#
-CONFIG_CRYPTO_CMAC=m
-CONFIG_CRYPTO_HMAC=y
-CONFIG_CRYPTO_XCBC=m
-CONFIG_CRYPTO_VMAC=m
-
-#
-# Digest
-#
-CONFIG_CRYPTO_CRC32C=m
-CONFIG_CRYPTO_CRC32C_INTEL=m
-CONFIG_CRYPTO_CRC32=m
-CONFIG_CRYPTO_CRC32_PCLMUL=m
-CONFIG_CRYPTO_XXHASH=m
-CONFIG_CRYPTO_CRCT10DIF=y
-CONFIG_CRYPTO_CRCT10DIF_PCLMUL=m
-CONFIG_CRYPTO_GHASH=y
-CONFIG_CRYPTO_POLY1305=m
-CONFIG_CRYPTO_POLY1305_X86_64=m
-CONFIG_CRYPTO_MD4=m
-CONFIG_CRYPTO_MD5=y
-CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_RMD128=m
-CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_RMD256=m
-CONFIG_CRYPTO_RMD320=m
-CONFIG_CRYPTO_SHA1=y
-CONFIG_CRYPTO_SHA1_SSSE3=m
-CONFIG_CRYPTO_SHA256_SSSE3=m
-CONFIG_CRYPTO_SHA512_SSSE3=m
-CONFIG_CRYPTO_LIB_SHA256=y
-CONFIG_CRYPTO_SHA256=y
-CONFIG_CRYPTO_SHA512=y
-CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_SM3=m
-CONFIG_CRYPTO_STREEBOG=m
-CONFIG_CRYPTO_TGR192=m
-CONFIG_CRYPTO_WP512=m
-CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL=m
-
-#
-# Ciphers
-#
-CONFIG_CRYPTO_LIB_AES=y
-CONFIG_CRYPTO_AES=y
-CONFIG_CRYPTO_AES_TI=m
-CONFIG_CRYPTO_AES_NI_INTEL=m
-CONFIG_CRYPTO_ANUBIS=m
-CONFIG_CRYPTO_LIB_ARC4=m
-CONFIG_CRYPTO_ARC4=m
-CONFIG_CRYPTO_BLOWFISH=m
-CONFIG_CRYPTO_BLOWFISH_COMMON=m
-CONFIG_CRYPTO_BLOWFISH_X86_64=m
-CONFIG_CRYPTO_CAMELLIA=m
-CONFIG_CRYPTO_CAMELLIA_X86_64=m
-CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64=m
-CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64=m
-CONFIG_CRYPTO_CAST_COMMON=m
-CONFIG_CRYPTO_CAST5=m
-CONFIG_CRYPTO_CAST5_AVX_X86_64=m
-CONFIG_CRYPTO_CAST6=m
-CONFIG_CRYPTO_CAST6_AVX_X86_64=m
-CONFIG_CRYPTO_LIB_DES=m
-CONFIG_CRYPTO_DES=m
-CONFIG_CRYPTO_DES3_EDE_X86_64=m
-CONFIG_CRYPTO_FCRYPT=m
-CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SALSA20=m
-CONFIG_CRYPTO_CHACHA20=m
-CONFIG_CRYPTO_CHACHA20_X86_64=m
-CONFIG_CRYPTO_SEED=m
-CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_SERPENT_SSE2_X86_64=m
-CONFIG_CRYPTO_SERPENT_AVX_X86_64=m
-CONFIG_CRYPTO_SERPENT_AVX2_X86_64=m
-CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_TEA=m
-CONFIG_CRYPTO_TWOFISH=m
-CONFIG_CRYPTO_TWOFISH_COMMON=m
-CONFIG_CRYPTO_TWOFISH_X86_64=m
-CONFIG_CRYPTO_TWOFISH_X86_64_3WAY=m
-CONFIG_CRYPTO_TWOFISH_AVX_X86_64=m
-
-#
-# Compression
-#
-CONFIG_CRYPTO_DEFLATE=m
-CONFIG_CRYPTO_LZO=y
-CONFIG_CRYPTO_842=m
-CONFIG_CRYPTO_LZ4=m
-CONFIG_CRYPTO_LZ4HC=m
-CONFIG_CRYPTO_ZSTD=y
-
-#
-# Random Number Generation
-#
-CONFIG_CRYPTO_ANSI_CPRNG=m
-CONFIG_CRYPTO_DRBG_MENU=y
-CONFIG_CRYPTO_DRBG_HMAC=y
-CONFIG_CRYPTO_DRBG_HASH=y
-CONFIG_CRYPTO_DRBG_CTR=y
-CONFIG_CRYPTO_DRBG=y
-CONFIG_CRYPTO_JITTERENTROPY=y
-CONFIG_CRYPTO_USER_API=m
-CONFIG_CRYPTO_USER_API_HASH=m
-CONFIG_CRYPTO_USER_API_SKCIPHER=m
-CONFIG_CRYPTO_USER_API_RNG=m
-CONFIG_CRYPTO_USER_API_AEAD=m
-# CONFIG_CRYPTO_STATS is not set
-CONFIG_CRYPTO_HASH_INFO=y
-CONFIG_CRYPTO_HW=y
-CONFIG_CRYPTO_DEV_PADLOCK=m
-CONFIG_CRYPTO_DEV_PADLOCK_AES=m
-CONFIG_CRYPTO_DEV_PADLOCK_SHA=m
-CONFIG_CRYPTO_DEV_ATMEL_I2C=m
-CONFIG_CRYPTO_DEV_ATMEL_ECC=m
-CONFIG_CRYPTO_DEV_ATMEL_SHA204A=m
-CONFIG_CRYPTO_DEV_CCP=y
-CONFIG_CRYPTO_DEV_CCP_DD=m
-CONFIG_CRYPTO_DEV_SP_CCP=y
-CONFIG_CRYPTO_DEV_CCP_CRYPTO=m
-CONFIG_CRYPTO_DEV_SP_PSP=y
-# CONFIG_CRYPTO_DEV_CCP_DEBUGFS is not set
-CONFIG_CRYPTO_DEV_QAT=m
-CONFIG_CRYPTO_DEV_QAT_DH895xCC=m
-CONFIG_CRYPTO_DEV_QAT_C3XXX=m
-CONFIG_CRYPTO_DEV_QAT_C62X=m
-CONFIG_CRYPTO_DEV_QAT_DH895xCCVF=m
-CONFIG_CRYPTO_DEV_QAT_C3XXXVF=m
-CONFIG_CRYPTO_DEV_QAT_C62XVF=m
-CONFIG_CRYPTO_DEV_NITROX=m
-CONFIG_CRYPTO_DEV_NITROX_CNN55XX=m
-CONFIG_CRYPTO_DEV_CHELSIO=m
-CONFIG_CHELSIO_IPSEC_INLINE=y
-CONFIG_CRYPTO_DEV_CHELSIO_TLS=m
-CONFIG_CRYPTO_DEV_VIRTIO=m
-# CONFIG_CRYPTO_DEV_SAFEXCEL is not set
-CONFIG_CRYPTO_DEV_CCREE=m
-CONFIG_ASYMMETRIC_KEY_TYPE=y
-CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y
-CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE=m
-CONFIG_X509_CERTIFICATE_PARSER=y
-CONFIG_PKCS8_PRIVATE_KEY_PARSER=m
-CONFIG_TPM_KEY_PARSER=m
-CONFIG_PKCS7_MESSAGE_PARSER=y
-# CONFIG_PKCS7_TEST_KEY is not set
-CONFIG_SIGNED_PE_FILE_VERIFICATION=y
-
-#
-# Certificates for signature checking
-#
-CONFIG_MODULE_SIG_KEY="certs/signing_key.pem"
-CONFIG_SYSTEM_TRUSTED_KEYRING=y
-CONFIG_SYSTEM_TRUSTED_KEYS=""
-# CONFIG_SYSTEM_EXTRA_CERTIFICATE is not set
-CONFIG_SECONDARY_TRUSTED_KEYRING=y
-CONFIG_SYSTEM_BLACKLIST_KEYRING=y
-CONFIG_SYSTEM_BLACKLIST_HASH_LIST=""
-# end of Certificates for signature checking
-
-CONFIG_BINARY_PRINTF=y
-
-#
-# Library routines
-#
-CONFIG_RAID6_PQ=m
-CONFIG_RAID6_PQ_BENCHMARK=y
-CONFIG_PACKING=y
-CONFIG_BITREVERSE=y
-CONFIG_GENERIC_STRNCPY_FROM_USER=y
-CONFIG_GENERIC_STRNLEN_USER=y
-CONFIG_GENERIC_NET_UTILS=y
-CONFIG_GENERIC_FIND_FIRST_BIT=y
-CONFIG_CORDIC=m
-CONFIG_RATIONAL=y
-CONFIG_GENERIC_PCI_IOMAP=y
-CONFIG_GENERIC_IOMAP=y
-CONFIG_ARCH_USE_CMPXCHG_LOCKREF=y
-CONFIG_ARCH_HAS_FAST_MULTIPLIER=y
-CONFIG_CRC_CCITT=y
-CONFIG_CRC16=m
-CONFIG_CRC_T10DIF=y
-CONFIG_CRC_ITU_T=m
-CONFIG_CRC32=y
-# CONFIG_CRC32_SELFTEST is not set
-CONFIG_CRC32_SLICEBY8=y
-# CONFIG_CRC32_SLICEBY4 is not set
-# CONFIG_CRC32_SARWATE is not set
-# CONFIG_CRC32_BIT is not set
-CONFIG_CRC64=m
-CONFIG_CRC4=m
-CONFIG_CRC7=m
-CONFIG_LIBCRC32C=m
-CONFIG_CRC8=m
-CONFIG_XXHASH=y
-# CONFIG_RANDOM32_SELFTEST is not set
-CONFIG_842_COMPRESS=m
-CONFIG_842_DECOMPRESS=m
-CONFIG_ZLIB_INFLATE=y
-CONFIG_ZLIB_DEFLATE=y
-CONFIG_LZO_COMPRESS=y
-CONFIG_LZO_DECOMPRESS=y
-CONFIG_LZ4_COMPRESS=m
-CONFIG_LZ4HC_COMPRESS=m
-CONFIG_LZ4_DECOMPRESS=y
-CONFIG_ZSTD_COMPRESS=y
-CONFIG_ZSTD_DECOMPRESS=y
-CONFIG_XZ_DEC=y
-CONFIG_XZ_DEC_X86=y
-CONFIG_XZ_DEC_POWERPC=y
-CONFIG_XZ_DEC_IA64=y
-CONFIG_XZ_DEC_ARM=y
-CONFIG_XZ_DEC_ARMTHUMB=y
-CONFIG_XZ_DEC_SPARC=y
-CONFIG_XZ_DEC_BCJ=y
-# CONFIG_XZ_DEC_TEST is not set
-CONFIG_DECOMPRESS_GZIP=y
-CONFIG_DECOMPRESS_BZIP2=y
-CONFIG_DECOMPRESS_LZMA=y
-CONFIG_DECOMPRESS_XZ=y
-CONFIG_DECOMPRESS_LZO=y
-CONFIG_DECOMPRESS_LZ4=y
-CONFIG_GENERIC_ALLOCATOR=y
-CONFIG_REED_SOLOMON=y
-CONFIG_REED_SOLOMON_ENC8=y
-CONFIG_REED_SOLOMON_DEC8=y
-CONFIG_REED_SOLOMON_DEC16=y
-CONFIG_BCH=m
-CONFIG_TEXTSEARCH=y
-CONFIG_TEXTSEARCH_KMP=m
-CONFIG_TEXTSEARCH_BM=m
-CONFIG_TEXTSEARCH_FSM=m
-CONFIG_BTREE=y
-CONFIG_INTERVAL_TREE=y
-CONFIG_XARRAY_MULTI=y
-CONFIG_ASSOCIATIVE_ARRAY=y
-CONFIG_HAS_IOMEM=y
-CONFIG_HAS_IOPORT_MAP=y
-CONFIG_HAS_DMA=y
-CONFIG_NEED_SG_DMA_LENGTH=y
-CONFIG_NEED_DMA_MAP_STATE=y
-CONFIG_ARCH_DMA_ADDR_T_64BIT=y
-CONFIG_ARCH_HAS_FORCE_DMA_UNENCRYPTED=y
-CONFIG_DMA_VIRT_OPS=y
-CONFIG_SWIOTLB=y
-# CONFIG_DMA_API_DEBUG is not set
-CONFIG_SGL_ALLOC=y
-CONFIG_IOMMU_HELPER=y
-CONFIG_CHECK_SIGNATURE=y
-CONFIG_CPU_RMAP=y
-CONFIG_DQL=y
-CONFIG_GLOB=y
-# CONFIG_GLOB_SELFTEST is not set
-CONFIG_NLATTR=y
-CONFIG_LRU_CACHE=m
-CONFIG_CLZ_TAB=y
-CONFIG_IRQ_POLL=y
-CONFIG_MPILIB=y
-CONFIG_DIMLIB=y
-CONFIG_LIBFDT=y
-CONFIG_OID_REGISTRY=y
-CONFIG_UCS2_STRING=y
-CONFIG_HAVE_GENERIC_VDSO=y
-CONFIG_GENERIC_GETTIMEOFDAY=y
-CONFIG_FONT_SUPPORT=y
-CONFIG_FONTS=y
-# CONFIG_FONT_8x8 is not set
-CONFIG_FONT_8x16=y
-# CONFIG_FONT_6x11 is not set
-# CONFIG_FONT_7x14 is not set
-# CONFIG_FONT_PEARL_8x8 is not set
-# CONFIG_FONT_ACORN_8x8 is not set
-# CONFIG_FONT_MINI_4x6 is not set
-# CONFIG_FONT_6x10 is not set
-# CONFIG_FONT_10x18 is not set
-# CONFIG_FONT_SUN8x16 is not set
-# CONFIG_FONT_SUN12x22 is not set
-# CONFIG_FONT_TER16x32 is not set
-CONFIG_FONT_AUTOSELECT=y
-CONFIG_SG_POOL=y
-CONFIG_ARCH_HAS_PMEM_API=y
-CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE=y
-CONFIG_ARCH_HAS_UACCESS_MCSAFE=y
-CONFIG_ARCH_STACKWALK=y
-CONFIG_SBITMAP=y
-CONFIG_PARMAN=m
-CONFIG_OBJAGG=m
-# CONFIG_STRING_SELFTEST is not set
-# end of Library routines
-
-#
-# Kernel hacking
-#
-
-#
-# printk and dmesg options
-#
-CONFIG_PRINTK_TIME=y
-# CONFIG_PRINTK_CALLER is not set
-CONFIG_CONSOLE_LOGLEVEL_DEFAULT=4
-CONFIG_CONSOLE_LOGLEVEL_QUIET=1
-CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4
-# CONFIG_BOOT_PRINTK_DELAY is not set
-CONFIG_DYNAMIC_DEBUG=y
-# end of printk and dmesg options
-
-#
-# Compile-time checks and compiler options
-#
-# CONFIG_DEBUG_INFO is not set
-# CONFIG_ENABLE_MUST_CHECK is not set
-CONFIG_FRAME_WARN=2048
-CONFIG_STRIP_ASM_SYMS=y
-# CONFIG_READABLE_ASM is not set
-CONFIG_DEBUG_FS=y
-# CONFIG_HEADERS_INSTALL is not set
-CONFIG_OPTIMIZE_INLINING=y
-# CONFIG_DEBUG_SECTION_MISMATCH is not set
-CONFIG_SECTION_MISMATCH_WARN_ONLY=y
-# CONFIG_DEBUG_WRITABLE_FUNCTION_POINTERS_VERBOSE is not set
-CONFIG_STACK_VALIDATION=y
-# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set
-# end of Compile-time checks and compiler options
-
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE=0x0
-CONFIG_MAGIC_SYSRQ_SERIAL=y
-CONFIG_DEBUG_KERNEL=y
-CONFIG_DEBUG_MISC=y
-
-#
-# Memory Debugging
-#
-# CONFIG_PAGE_EXTENSION is not set
-# CONFIG_DEBUG_PAGEALLOC is not set
-# CONFIG_PAGE_OWNER is not set
-# CONFIG_PAGE_POISONING is not set
-# CONFIG_DEBUG_PAGE_REF is not set
-# CONFIG_DEBUG_RODATA_TEST is not set
-# CONFIG_DEBUG_OBJECTS is not set
-# CONFIG_SLUB_DEBUG_ON is not set
-# CONFIG_SLUB_STATS is not set
-CONFIG_HAVE_DEBUG_KMEMLEAK=y
-# CONFIG_DEBUG_KMEMLEAK is not set
-# CONFIG_DEBUG_STACK_USAGE is not set
-# CONFIG_DEBUG_VM is not set
-CONFIG_ARCH_HAS_DEBUG_VIRTUAL=y
-# CONFIG_DEBUG_VIRTUAL is not set
-CONFIG_DEBUG_MEMORY_INIT=y
-# CONFIG_DEBUG_PER_CPU_MAPS is not set
-CONFIG_HAVE_ARCH_KASAN=y
-CONFIG_CC_HAS_KASAN_GENERIC=y
-# CONFIG_KASAN is not set
-CONFIG_KASAN_STACK=1
-# end of Memory Debugging
-
-CONFIG_ARCH_HAS_KCOV=y
-CONFIG_CC_HAS_SANCOV_TRACE_PC=y
-# CONFIG_KCOV is not set
-# CONFIG_DEBUG_SHIRQ is not set
-
-#
-# Debug Lockups and Hangs
-#
-CONFIG_LOCKUP_DETECTOR=y
-CONFIG_SOFTLOCKUP_DETECTOR=y
-# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
-CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
-CONFIG_HARDLOCKUP_DETECTOR_PERF=y
-CONFIG_HARDLOCKUP_CHECK_TIMESTAMP=y
-CONFIG_HARDLOCKUP_DETECTOR=y
-# CONFIG_BOOTPARAM_HARDLOCKUP_PANIC is not set
-CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE=0
-CONFIG_DETECT_HUNG_TASK=y
-CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=120
-# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
-CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
-# CONFIG_WQ_WATCHDOG is not set
-# end of Debug Lockups and Hangs
-
-CONFIG_PANIC_ON_OOPS=y
-CONFIG_PANIC_ON_OOPS_VALUE=1
-CONFIG_PANIC_TIMEOUT=0
-CONFIG_SCHED_DEBUG=y
-CONFIG_SCHED_INFO=y
-CONFIG_SCHEDSTATS=y
-CONFIG_SCHED_STACK_END_CHECK=y
-# CONFIG_DEBUG_TIMEKEEPING is not set
-CONFIG_DEBUG_PREEMPT=y
-
-#
-# Lock Debugging (spinlocks, mutexes, etc...)
-#
-CONFIG_LOCK_DEBUGGING_SUPPORT=y
-# CONFIG_PROVE_LOCKING is not set
-# CONFIG_LOCK_STAT is not set
-# CONFIG_DEBUG_RT_MUTEXES is not set
-# CONFIG_DEBUG_SPINLOCK is not set
-# CONFIG_DEBUG_MUTEXES is not set
-# CONFIG_DEBUG_WW_MUTEX_SLOWPATH is not set
-# CONFIG_DEBUG_RWSEMS is not set
-# CONFIG_DEBUG_LOCK_ALLOC is not set
-# CONFIG_DEBUG_ATOMIC_SLEEP is not set
-# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
-# CONFIG_LOCK_TORTURE_TEST is not set
-# CONFIG_WW_MUTEX_SELFTEST is not set
-# end of Lock Debugging (spinlocks, mutexes, etc...)
-
-CONFIG_STACKTRACE=y
-# CONFIG_WARN_ALL_UNSEEDED_RANDOM is not set
-# CONFIG_DEBUG_KOBJECT is not set
-CONFIG_DEBUG_BUGVERBOSE=y
-CONFIG_DEBUG_LIST=y
-# CONFIG_DEBUG_PLIST is not set
-CONFIG_DEBUG_SG=y
-CONFIG_DEBUG_NOTIFIERS=y
-CONFIG_DEBUG_CREDENTIALS=y
-
-#
-# RCU Debugging
-#
-# CONFIG_RCU_PERF_TEST is not set
-# CONFIG_RCU_TORTURE_TEST is not set
-CONFIG_RCU_CPU_STALL_TIMEOUT=60
-# CONFIG_RCU_TRACE is not set
-# CONFIG_RCU_EQS_DEBUG is not set
-# end of RCU Debugging
-
-# CONFIG_DEBUG_WQ_FORCE_RR_CPU is not set
-# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
-# CONFIG_CPU_HOTPLUG_STATE_CONTROL is not set
-# CONFIG_NOTIFIER_ERROR_INJECTION is not set
-CONFIG_FUNCTION_ERROR_INJECTION=y
-# CONFIG_FAULT_INJECTION is not set
-CONFIG_LATENCYTOP=y
-CONFIG_USER_STACKTRACE_SUPPORT=y
-CONFIG_NOP_TRACER=y
-CONFIG_HAVE_FUNCTION_TRACER=y
-CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
-CONFIG_HAVE_DYNAMIC_FTRACE=y
-CONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS=y
-CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
-CONFIG_HAVE_SYSCALL_TRACEPOINTS=y
-CONFIG_HAVE_FENTRY=y
-CONFIG_HAVE_C_RECORDMCOUNT=y
-CONFIG_TRACER_MAX_TRACE=y
-CONFIG_TRACE_CLOCK=y
-CONFIG_RING_BUFFER=y
-CONFIG_EVENT_TRACING=y
-CONFIG_CONTEXT_SWITCH_TRACER=y
-CONFIG_RING_BUFFER_ALLOW_SWAP=y
-CONFIG_TRACING=y
-CONFIG_GENERIC_TRACER=y
-CONFIG_TRACING_SUPPORT=y
-CONFIG_FTRACE=y
-CONFIG_FUNCTION_TRACER=y
-CONFIG_FUNCTION_GRAPH_TRACER=y
-# CONFIG_PREEMPTIRQ_EVENTS is not set
-# CONFIG_IRQSOFF_TRACER is not set
-# CONFIG_PREEMPT_TRACER is not set
-CONFIG_SCHED_TRACER=y
-CONFIG_HWLAT_TRACER=y
-CONFIG_FTRACE_SYSCALLS=y
-CONFIG_TRACER_SNAPSHOT=y
-# CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP is not set
-CONFIG_BRANCH_PROFILE_NONE=y
-# CONFIG_PROFILE_ANNOTATED_BRANCHES is not set
-CONFIG_STACK_TRACER=y
-CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_KPROBE_EVENTS=y
-# CONFIG_KPROBE_EVENTS_ON_NOTRACE is not set
-CONFIG_UPROBE_EVENTS=y
-CONFIG_BPF_EVENTS=y
-CONFIG_DYNAMIC_EVENTS=y
-CONFIG_PROBE_EVENTS=y
-CONFIG_DYNAMIC_FTRACE=y
-CONFIG_DYNAMIC_FTRACE_WITH_REGS=y
-CONFIG_FUNCTION_PROFILER=y
-# CONFIG_BPF_KPROBE_OVERRIDE is not set
-CONFIG_FTRACE_MCOUNT_RECORD=y
-# CONFIG_FTRACE_STARTUP_TEST is not set
-CONFIG_MMIOTRACE=y
-# CONFIG_HIST_TRIGGERS is not set
-# CONFIG_MMIOTRACE_TEST is not set
-# CONFIG_TRACEPOINT_BENCHMARK is not set
-# CONFIG_RING_BUFFER_BENCHMARK is not set
-# CONFIG_RING_BUFFER_STARTUP_TEST is not set
-# CONFIG_PREEMPTIRQ_DELAY_TEST is not set
-# CONFIG_TRACE_EVAL_MAP_FILE is not set
-# CONFIG_PROVIDE_OHCI1394_DMA_INIT is not set
-CONFIG_RUNTIME_TESTING_MENU=y
-CONFIG_LKDTM=m
-# CONFIG_TEST_LIST_SORT is not set
-# CONFIG_TEST_SORT is not set
-# CONFIG_KPROBES_SANITY_TEST is not set
-# CONFIG_BACKTRACE_SELF_TEST is not set
-# CONFIG_RBTREE_TEST is not set
-# CONFIG_REED_SOLOMON_TEST is not set
-# CONFIG_INTERVAL_TREE_TEST is not set
-# CONFIG_PERCPU_TEST is not set
-# CONFIG_ATOMIC64_SELFTEST is not set
-# CONFIG_ASYNC_RAID6_TEST is not set
-# CONFIG_TEST_HEXDUMP is not set
-# CONFIG_TEST_STRING_HELPERS is not set
-# CONFIG_TEST_STRSCPY is not set
-# CONFIG_TEST_KSTRTOX is not set
-# CONFIG_TEST_PRINTF is not set
-# CONFIG_TEST_BITMAP is not set
-# CONFIG_TEST_BITFIELD is not set
-# CONFIG_TEST_UUID is not set
-# CONFIG_TEST_XARRAY is not set
-# CONFIG_TEST_OVERFLOW is not set
-# CONFIG_TEST_RHASHTABLE is not set
-# CONFIG_TEST_HASH is not set
-# CONFIG_TEST_IDA is not set
-# CONFIG_TEST_PARMAN is not set
-# CONFIG_TEST_LKM is not set
-# CONFIG_TEST_VMALLOC is not set
-# CONFIG_TEST_USER_COPY is not set
-# CONFIG_TEST_BPF is not set
-# CONFIG_TEST_BLACKHOLE_DEV is not set
-# CONFIG_FIND_BIT_BENCHMARK is not set
-# CONFIG_TEST_FIRMWARE is not set
-# CONFIG_TEST_SYSCTL is not set
-# CONFIG_TEST_UDELAY is not set
-# CONFIG_TEST_STATIC_KEYS is not set
-# CONFIG_TEST_KMOD is not set
-# CONFIG_TEST_MEMCAT_P is not set
-# CONFIG_TEST_OBJAGG is not set
-# CONFIG_TEST_STACKINIT is not set
-# CONFIG_TEST_MEMINIT is not set
-# CONFIG_MEMTEST is not set
-CONFIG_BUG_ON_DATA_CORRUPTION=y
-# CONFIG_SAMPLES is not set
-CONFIG_HAVE_ARCH_KGDB=y
-# CONFIG_KGDB is not set
-CONFIG_ARCH_HAS_UBSAN_SANITIZE_ALL=y
-# CONFIG_UBSAN is not set
-CONFIG_UBSAN_ALIGNMENT=y
-CONFIG_ARCH_HAS_DEVMEM_IS_ALLOWED=y
-CONFIG_TRACE_IRQFLAGS_SUPPORT=y
-# CONFIG_X86_VERBOSE_BOOTUP is not set
-CONFIG_EARLY_PRINTK=y
-# CONFIG_EARLY_PRINTK_DBGP is not set
-# CONFIG_EARLY_PRINTK_USB_XDBC is not set
-CONFIG_X86_PTDUMP_CORE=y
-# CONFIG_X86_PTDUMP is not set
-# CONFIG_EFI_PGT_DUMP is not set
-CONFIG_DEBUG_WX=y
-CONFIG_DOUBLEFAULT=y
-# CONFIG_DEBUG_TLBFLUSH is not set
-# CONFIG_IOMMU_DEBUG is not set
-CONFIG_HAVE_MMIOTRACE_SUPPORT=y
-# CONFIG_X86_DECODER_SELFTEST is not set
-CONFIG_IO_DELAY_0X80=y
-# CONFIG_IO_DELAY_0XED is not set
-# CONFIG_IO_DELAY_UDELAY is not set
-# CONFIG_IO_DELAY_NONE is not set
-CONFIG_DEBUG_BOOT_PARAMS=y
-# CONFIG_CPA_DEBUG is not set
-# CONFIG_DEBUG_ENTRY is not set
-# CONFIG_DEBUG_NMI_SELFTEST is not set
-# CONFIG_X86_DEBUG_FPU is not set
-# CONFIG_PUNIT_ATOM_DEBUG is not set
-CONFIG_UNWINDER_ORC=y
-# CONFIG_UNWINDER_FRAME_POINTER is not set
-# CONFIG_UNWINDER_GUESS is not set
-# end of Kernel hacking
diff --git a/linux54-tkg/linux54-tkg-config/generic-desktop-profile.cfg b/linux54-tkg/linux54-tkg-config/generic-desktop-profile.cfg
deleted file mode 100644
index 24172f9..0000000
--- a/linux54-tkg/linux54-tkg-config/generic-desktop-profile.cfg
+++ /dev/null
@@ -1,55 +0,0 @@
-# linux54-TkG config file
-# Generic Desktop
-
-
-#### MISC OPTIONS #### 
-
-# External config file to use - If the given file exists in path, it will override default config (customization.cfg) - Default is ~/.config/frogminer/linux50-tkg.cfg
-_EXT_CONFIG_PATH=~/.config/frogminer/linux54-tkg.cfg
-
-#### KERNEL OPTIONS ####
-
-# Name of the default config file to use from the linux???-tkg-config folder. Arch default is "config.x86_64".
-_configfile="config.x86_64"
-
-# Disable some non-module debugging - See PKGBUILD for the list
-_debugdisable="false"
-
-# LEAVE AN EMPTY VALUE TO BE PROMPTED ABOUT FOLLOWING OPTIONS AT BUILD TIME
-
-# Set to "true" to disable FUNCTION_TRACER/GRAPH_TRACER, lowering overhead but limiting debugging and analyzing of kernel functions - Kernel default is "false"
-_ftracedisable="false"
-
-# Set to "true" to disable NUMA, lowering overhead, but breaking CUDA/NvEnc on Nvidia equipped systems - Kernel default is "false"
-_numadisable="false"
-
-# Set to "true" to use explicit preemption points to lower latency at the cost of a small throughput loss - Can give a nice perf boost in VMs - Kernel default is "false"
-_voluntary_preempt="false"
-
-# A selection of patches from Zen/Liquorix kernel and additional tweaks for a better gaming experience (ZENIFY) - Default is "true"
-_zenify="true"
-
-# compiler optimization level - 1. Optimize for performance (-O2); 2. Optimize harder (-O3); 3. Optimize for size (-Os) - Kernel default is "2"
-_compileroptlevel="1"
-
-# Trust the CPU manufacturer to initialize Linux's CRNG (RANDOM_TRUST_CPU) - Kernel default is "false"
-_random_trust_cpu="false"
-
-# CPU scheduler runqueue sharing - No sharing (RQ_NONE), SMT (hyperthread) siblings (RQ_SMT), Multicore siblings (RQ_MC), Symmetric Multi-Processing (RQ_SMP), NUMA (RQ_ALL)
-# Valid values are "none", "smt", "mc", "mc-llc"(for zen), "smp", "all" - Kernel default is "mc"
-_runqueue_sharing="mc"
-
-# Timer frequency - "500", "750" or "1000" - More options available in kernel config prompt when left empty depending on selected cpusched - Kernel default is "750"
-_timer_freq="500"
-
-
-#### USER PATCHES ####
-
-# You can use your own patches by putting them in the same folder as the PKGBUILD and giving them the .mypatch extension.
-# You can also revert patches by putting them in the same folder as the PKGBUILD and giving them the .myrevert extension.
-
-# Also, userpatches variable below must be set to true for the above to work.
-_user_patches="true"
-
-# Apply all user patches without confirmation - !!! NOT RECOMMENDED !!!
-_user_patches_no_confirm="false"
diff --git a/linux54-tkg/linux54-tkg-config/prepare b/linux54-tkg/linux54-tkg-config/prepare
deleted file mode 100644
index e094df4..0000000
--- a/linux54-tkg/linux54-tkg-config/prepare
+++ /dev/null
@@ -1,733 +0,0 @@
-#!/bin/bash
-
-user_patcher() {
-	# To patch the user because all your base are belong to us
-	local _patches=("$_where"/*."${_userpatch_ext}revert")
-	if [ ${#_patches[@]} -ge 2 ] || [ -e "${_patches}" ]; then
-	  if [ "$_user_patches_no_confirm" != "true" ]; then
-	    msg2 "Found ${#_patches[@]} 'to revert' userpatches for ${_userpatch_target}:"
-	    printf '%s\n' "${_patches[@]}"
-	    read -rp "Do you want to install it/them? - Be careful with that ;)"$'\n> N/y : ' _CONDITION;
-	  fi
-	  if [[ "$_CONDITION" =~ [yY] ]] || [ "$_user_patches_no_confirm" == "true" ]; then
-	    for _f in "${_patches[@]}"; do
-	      if [ -e "${_f}" ]; then
-	        msg2 "######################################################"
-	        msg2 ""
-	        msg2 "Reverting your own ${_userpatch_target} patch ${_f}"
-	        msg2 ""
-	        msg2 "######################################################"
-	        patch -Np1 -R < "${_f}"
-	        echo "Reverted your own patch ${_f}" >> "$_where"/last_build_config.log
-	      fi
-	    done
-	  fi
-	fi
-
-	_patches=("$_where"/*."${_userpatch_ext}patch")
-	if [ ${#_patches[@]} -ge 2 ] || [ -e "${_patches}" ]; then
-	  if [ "$_user_patches_no_confirm" != "true" ]; then
-	    msg2 "Found ${#_patches[@]} userpatches for ${_userpatch_target}:"
-	    printf '%s\n' "${_patches[@]}"
-	    read -rp "Do you want to install it/them? - Be careful with that ;)"$'\n> N/y : ' _CONDITION;
-	  fi
-	  if [[ "$_CONDITION" =~ [yY] ]] || [ "$_user_patches_no_confirm" == "true" ]; then
-	    for _f in "${_patches[@]}"; do
-	      if [ -e "${_f}" ]; then
-	        msg2 "######################################################"
-	        msg2 ""
-	        msg2 "Applying your own ${_userpatch_target} patch ${_f}"
-	        msg2 ""
-	        msg2 "######################################################"
-	        patch -Np1 < "${_f}"
-	        echo "Applied your own patch ${_f}" >> "$_where"/last_build_config.log
-	      fi
-	    done
-	  fi
-	fi
-}
-
-_tkg_srcprep() {
-  msg2 "Setting version..."
-  scripts/setlocalversion --save-scmversion
-  echo "-$pkgrel-tkg-${_cpusched}" > localversion.10-pkgrel
-  echo "" > localversion.20-pkgname
-
-  # add upstream patch
-  patch -p1 -i ../patch-"${pkgver}"
-
-  # ARCH Patches
-  if [ "${_configfile}" == "config_hardened.x86_64" ] && [ "${_cpusched}" == "cfs" ]; then
-    msg2 "Using linux hardened patchset"
-    patch -Np1 -i ../0012-linux-hardened.patch
-  else
-    patch -Np1 -i ../0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch
-  fi
-
-  # graysky's cpu opts - https://github.com/graysky2/kernel_gcc_patch
-  msg2 "Applying graysky's cpu opts patch"
-  patch -Np1 -i ../enable_additional_cpu_optimizations_for_gcc_v10.1%2B_kernel_v4.19-v5.4.patch
-
-  # TkG
-  msg2 "Applying clear linux patches"
-  patch -Np1 -i ../0002-clear-patches.patch
-
-  msg2 "Applying glitched base patch"
-  patch -Np1 -i ../0003-glitched-base.patch
-
-  if [ "${_cpusched}" == "MuQSS" ]; then
-    # MuQSS
-    patch -Np1 -i ../0004-5.4-ck1.patch
-    if [ "${_aggressive_ondemand}" == "true" ]; then
-      patch -Np1 -i ../0004-glitched-ondemand-muqss.patch
-    fi
-    patch -Np1 -i ../0004-glitched-muqss.patch
-  elif [ "${_cpusched}" == "pds" ]; then
-    # PDS-mq
-    patch -Np1 -i ../0005-v5.4_undead-pds099o.patch
-    if [ "${_aggressive_ondemand}" == "true" ]; then
-      patch -Np1 -i ../0005-glitched-ondemand-pds.patch
-    fi
-    patch -Np1 -i ../0005-glitched-pds.patch
-  elif [ "${_cpusched}" == "bmq" ]; then
-    # BMQ
-    patch -Np1 -i ../0009-bmq_v5.4-r2.patch
-    patch -Np1 -i ../0009-glitched-bmq.patch
-  elif [ "${_cpusched}" == "cfs" ]; then
-    patch -Np1 -i ../0003-glitched-cfs.patch
-  fi
-
-  if [ -z "${_configfile}" ]; then
-    _configfile="config.x86_64"
-  fi
-
-  cat "${srcdir}/${_configfile}" > ./.config
-
-  # Set some -tkg defaults
-  echo "# CONFIG_DYNAMIC_FAULT is not set" >> ./.config
-  sed -i -e 's/CONFIG_DEFAULT_FQ_CODEL=y/# CONFIG_DEFAULT_FQ_CODEL is not set/' ./.config
-  echo "CONFIG_DEFAULT_CAKE=y" >> ./.config
-  echo "CONFIG_NR_TTY_DEVICES=63" >> ./.config
-  echo "CONFIG_TP_SMAPI=m" >> ./.config
-  echo "CONFIG_RAID6_USE_PREFER_GEN=y" >> ./.config
-  echo "# CONFIG_NTP_PPS is not set" >> ./.config
-  echo "# CONFIG_X86_P6_NOP is not set" >> ./.config
-  sed -i -e 's/CONFIG_CRYPTO_LZ4=m/CONFIG_CRYPTO_LZ4=y/' ./.config
-  sed -i -e 's/CONFIG_CRYPTO_LZ4HC=m/CONFIG_CRYPTO_LZ4HC=y/' ./.config
-  sed -i -e 's/CONFIG_LZ4_COMPRESS=m/CONFIG_LZ4_COMPRESS=y/' ./.config
-  sed -i -e 's/CONFIG_LZ4HC_COMPRESS=m/CONFIG_LZ4HC_COMPRESS=y/' ./.config
-  sed -i -e 's/CONFIG_RCU_BOOST_DELAY=500/CONFIG_RCU_BOOST_DELAY=0/' ./.config
-  if [ "$_noccache" != "true" ] && pacman -Qq ccache &> /dev/null; then
-    sed -i -e 's/CONFIG_GCC_PLUGINS=y/# CONFIG_GCC_PLUGINS is not set/' ./.config
-  fi
-  # Skip dbg package creation on non-Arch
-  #if [ "$_distro" != "Arch" ]; then
-  #  sed -i -e 's/CONFIG_DEBUG_INFO.*/CONFIG_DEBUG_INFO=n/' ./.config
-  #fi
-
-  # Inject cpuopts options
-  echo "# CONFIG_MK8SSE3 is not set" >> ./.config
-  echo "# CONFIG_MK10 is not set" >> ./.config
-  echo "# CONFIG_MBARCELONA is not set" >> ./.config
-  echo "# CONFIG_MBOBCAT is not set" >> ./.config
-  echo "# CONFIG_MJAGUAR is not set" >> ./.config
-  echo "# CONFIG_MBULLDOZER is not set" >> ./.config
-  echo "# CONFIG_MPILEDRIVER is not set" >> ./.config
-  echo "# CONFIG_MSTEAMROLLER is not set" >> ./.config
-  echo "# CONFIG_MEXCAVATOR is not set" >> ./.config
-  echo "# CONFIG_MZEN is not set" >> ./.config
-  echo "# CONFIG_MZEN2 is not set" >> ./.config
-  echo "# CONFIG_MATOM is not set" >> ./.config
-  echo "# CONFIG_MNEHALEM is not set" >> ./.config
-  echo "# CONFIG_MWESTMERE is not set" >> ./.config
-  echo "# CONFIG_MSILVERMONT is not set" >> ./.config
-  echo "# CONFIG_MSANDYBRIDGE is not set" >> ./.config
-  echo "# CONFIG_MIVYBRIDGE is not set" >> ./.config
-  echo "# CONFIG_MHASWELL is not set" >> ./.config
-  echo "# CONFIG_MBROADWELL is not set" >> ./.config
-  echo "# CONFIG_MSKYLAKE is not set" >> ./.config
-  echo "# CONFIG_MSKYLAKEX is not set" >> ./.config
-  echo "# CONFIG_MCANNONLAKE is not set" >> ./.config
-  echo "# CONFIG_MICELAKE is not set" >> ./.config
-  echo "# CONFIG_MGOLDMONT is not set" >> ./.config
-  echo "# CONFIG_MGOLDMONTPLUS is not set" >> ./.config
-  echo "# CONFIG_MCASCADELAKE is not set" >> ./.config
-  echo "# CONFIG_MCOOPERLAKE is not set" >> ./.config
-  echo "# CONFIG_MTIGERLAKE is not set" >> ./.config
-
-  # Disable some debugging
-  if [ "${_debugdisable}" == "true" ]; then
-    sed -i -e 's/CONFIG_SLUB_DEBUG=y/# CONFIG_SLUB_DEBUG is not set/' ./.config
-    sed -i -e 's/CONFIG_PM_DEBUG=y/# CONFIG_PM_DEBUG is not set/' ./.config
-    sed -i -e 's/CONFIG_PM_ADVANCED_DEBUG=y/# CONFIG_PM_ADVANCED_DEBUG is not set/' ./.config
-    sed -i -e 's/CONFIG_PM_SLEEP_DEBUG=y/# CONFIG_PM_SLEEP_DEBUG is not set/' ./.config
-    sed -i -e 's/CONFIG_ACPI_DEBUG=y/# CONFIG_ACPI_DEBUG is not set/' ./.config
-    sed -i -e 's/CONFIG_SCHED_DEBUG=y/# CONFIG_SCHED_DEBUG is not set/' ./.config
-    sed -i -e 's/CONFIG_LATENCYTOP=y/# CONFIG_LATENCYTOP is not set/' ./.config
-    sed -i -e 's/CONFIG_DEBUG_PREEMPT=y/# CONFIG_DEBUG_PREEMPT is not set/' ./.config
-  fi
-
-  if [ "${_cpusched}" == "MuQSS" ]; then
-    # MuQSS default config
-    echo "CONFIG_SCHED_MUQSS=y" >> ./.config
-  elif [ "${_cpusched}" == "pds" ]; then
-    # PDS default config
-    echo "CONFIG_SCHED_PDS=y" >> ./.config
-  elif [ "${_cpusched}" == "bmq" ]; then
-    # BMQ default config
-    echo "CONFIG_SCHED_BMQ=y" >> ./.config
-  fi
-
-  if [ "${_cpusched}" == "MuQSS" ] || [ "${_cpusched}" == "pds" ] || [ "${_cpusched}" == "bmq" ]; then
-    # Disable CFS
-    sed -i -e 's/CONFIG_FAIR_GROUP_SCHED=y/# CONFIG_FAIR_GROUP_SCHED is not set/' ./.config
-    sed -i -e 's/CONFIG_CFS_BANDWIDTH=y/# CONFIG_CFS_BANDWIDTH is not set/' ./.config
-    sed -i -e 's/CONFIG_CGROUP_CPUACCT=y/# CONFIG_CGROUP_CPUACCT is not set/' ./.config
-    sed -i -e 's/CONFIG_SCHED_AUTOGROUP=y/# CONFIG_SCHED_AUTOGROUP is not set/' ./.config
-    # sched yield type
-    if [ -n "$_sched_yield_type" ]; then
-      CONDITION0="$_sched_yield_type"
-    else
-      plain ""
-      plain "CPU sched_yield_type - Choose what sort of yield sched_yield will perform."
-      plain ""
-      plain "For PDS and MuQSS:"
-      plain "0: No yield."
-      plain "1: Yield only to better priority/deadline tasks."
-      plain "2: Expire timeslice and recalculate deadline."
-      plain ""
-      plain "For BMQ (experimental) - No recommended value yet, so try for yourself x) :"
-      plain "0: No yield."
-      plain "1: Deboost and requeue task. (default)"
-      plain "2: Set rq skip task."
-      read -rp "`echo $'\n    > 0. Recommended option for gaming on PDS and MuQSS - "tkg" default\n      1. Default, but can lead to stability issues on some platforms\n      2. Can be a good option with low rr_interval on MuQSS\n    [0-2?]: '`" CONDITION0;
-    fi
-    if [ "$CONDITION0" == "1" ]; then
-      msg2 "Using default CPU sched yield type (1)"
-    elif [ "$CONDITION0" == "2" ]; then
-      sed -i -e 's/int sched_yield_type __read_mostly = 1;/int sched_yield_type __read_mostly = 2;/' ./kernel/sched/"${_cpusched}".c
-    else
-      sed -i -e 's/int sched_yield_type __read_mostly = 1;/int sched_yield_type __read_mostly = 0;/' ./kernel/sched/"${_cpusched}".c
-    fi
-  fi
-
-  # Round Robin interval
-  if [ "${_cpusched}" == "MuQSS" ] || [ "${_cpusched}" == "pds" ] || [ "${_cpusched}" == "bmq" ]; then
-    if [ -n "$_rr_interval" ]; then
-      CONDITION1="$_rr_interval"
-    else
-      plain ""
-      plain "Round Robin interval is the longest duration two tasks with the same nice level will"
-      plain "be delayed for. When CPU time is requested by a task, it receives a time slice equal"
-      plain "to the rr_interval in addition to a virtual deadline. When using yield_type 2, a low"
-      plain "value can help offset the disadvantages of rescheduling a process that has yielded."
-      plain ""
-      plain "MuQSS default: 6ms"
-      plain "PDS default: 4ms"
-      plain "BMQ default: 2ms"
-      read -rp "`echo $'\n    > 0.Keep defaults\n      1.2ms (worth a shot with MuQSS + yield_type 2)\n      2.4ms\n      3.6ms\n      4.8ms\n    [0-4?]: '`" CONDITION1;
-    fi
-    if [ "$CONDITION1" == "1" ]; then
-      msg2 "Using 2ms rr_interval"
-      _rrvalue="2"
-    elif [ "$CONDITION1" == "2" ]; then
-      msg2 "Using 4ms rr_interval"
-      _rrvalue="4"
-    elif [ "$CONDITION1" == "3" ]; then
-      msg2 "Using 6ms rr_interval"
-      _rrvalue="6"
-    elif [ "$CONDITION1" == "4" ]; then
-      msg2 "Using 8ms rr_interval"
-      _rrvalue="8"
-    else
-      msg2 "Using default rr_interval"
-      _rrvalue="default"
-    fi
-    if [ "$_rrvalue" != "default" ]; then
-      if [ "${_cpusched}" == "MuQSS" ]; then
-        sed -i -e "s/int rr_interval __read_mostly = 6;/int rr_interval __read_mostly = ${_rrvalue};/" ./kernel/sched/"${_cpusched}".c
-      elif [ "${_cpusched}" == "pds" ]; then
-        sed -i -e "s/#define SCHED_DEFAULT_RR (4)/#define SCHED_DEFAULT_RR (${_rrvalue})/" ./kernel/sched/"${_cpusched}".c
-      elif [ "${_cpusched}" == "bmq" ]; then
-        echo "CONFIG_SCHED_TIMESLICE=${_rrvalue}" >> ./.config
-      fi
-    else
-      if [ "${_cpusched}" == "bmq" ]; then
-        echo "CONFIG_SCHED_TIMESLICE=2" >> ./.config
-      fi
-    fi
-  fi
-
-  # zenify
-  if [ "$_zenify" == "true" ]; then
-    echo "CONFIG_ZENIFY=y" >> ./.config
-  elif [ "$_zenify" == "false" ]; then
-    echo "# CONFIG_ZENIFY is not set" >> ./.config
-  fi
-
-  # compiler optimization level
-  if [ "$_compileroptlevel" == "1" ]; then
-    echo "# CONFIG_CC_OPTIMIZE_HARDER is not set" >> ./.config
-  elif [ "$_compileroptlevel" == "2" ]; then
-    sed -i -e 's/CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y/# CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE is not set/' ./.config
-    echo "CONFIG_CC_OPTIMIZE_HARDER=y" >> ./.config
-  elif [ "$_compileroptlevel" == "3" ]; then
-    sed -i -e 's/CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y/# CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE is not set/' ./.config
-    sed -i -e 's/# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set/CONFIG_CC_OPTIMIZE_FOR_SIZE=y/' ./.config
-    echo "# CONFIG_CC_OPTIMIZE_HARDER is not set" >> ./.config
-  fi
-
-  # cpu opt
-  if [ -n "$_processor_opt" ] && [ "$_processor_opt" != "native" ]; then
-    echo "# CONFIG_MNATIVE is not set" >> ./.config
-  fi
-
-  if [ -n "$_processor_opt" ] && [ "$_processor_opt" != "generic" ]; then
-    sed -i -e 's/CONFIG_GENERIC_CPU=y/# CONFIG_GENERIC_CPU is not set/' ./.config
-  fi
-
-  if [ "$_processor_opt" == "native" ]; then
-    echo "CONFIG_MNATIVE=y" >> ./.config
-  elif [ "$_processor_opt" == "k8" ]; then
-    sed -i -e 's/# CONFIG_MK8 is not set/CONFIG_MK8=y/' ./.config
-  elif [ "$_processor_opt" == "k8sse3" ]; then
-    sed -i -e 's/# CONFIG_MK8SSE3 is not set/CONFIG_MK8SSE3=y/' ./.config
-  elif [ "$_processor_opt" == "k10" ]; then
-    sed -i -e 's/# CONFIG_MK10 is not set/CONFIG_MK10=y/' ./.config
-  elif [ "$_processor_opt" == "barcelona" ]; then
-    sed -i -e 's/# CONFIG_MBARCELONA is not set/CONFIG_MBARCELONA=y/' ./.config
-  elif [ "$_processor_opt" == "bobcat" ]; then
-    sed -i -e 's/# CONFIG_MBOBCAT is not set/CONFIG_MBOBCAT=y/' ./.config
-  elif [ "$_processor_opt" == "jaguar" ]; then
-    sed -i -e 's/# CONFIG_MJAGUAR is not set/CONFIG_MJAGUAR=y/' ./.config
-  elif [ "$_processor_opt" == "bulldozer" ]; then
-    sed -i -e 's/# CONFIG_MBULLDOZER is not set/CONFIG_MBULLDOZER=y/' ./.config
-  elif [ "$_processor_opt" == "piledriver" ]; then
-    sed -i -e 's/# CONFIG_MPILEDRIVER is not set/CONFIG_MPILEDRIVER=y/' ./.config
-  elif [ "$_processor_opt" == "steamroller" ]; then
-    sed -i -e 's/# CONFIG_MSTEAMROLLER is not set/CONFIG_MSTEAMROLLER=y/' ./.config
-  elif [ "$_processor_opt" == "excavator" ]; then
-    sed -i -e 's/# CONFIG_MEXCAVATOR is not set/CONFIG_MEXCAVATOR=y/' ./.config
-  elif [ "$_processor_opt" == "zen" ]; then
-    sed -i -e 's/# CONFIG_MZEN is not set/CONFIG_MZEN=y/' ./.config
-  elif [ "$_processor_opt" == "zen2" ]; then
-    sed -i -e 's/# CONFIG_MZEN2 is not set/CONFIG_MZEN2=y/' ./.config
-  elif [ "$_processor_opt" == "mpsc" ]; then
-    sed -i -e 's/# CONFIG_MPSC is not set/CONFIG_MPSC=y/' ./.config
-  elif [ "$_processor_opt" == "atom" ]; then
-    sed -i -e 's/# CONFIG_MATOM is not set/CONFIG_MATOM=y/' ./.config
-  elif [ "$_processor_opt" == "core2" ]; then
-    sed -i -e 's/# CONFIG_MCORE2 is not set/CONFIG_MCORE2=y/' ./.config
-  elif [ "$_processor_opt" == "nehalem" ]; then
-    sed -i -e 's/# CONFIG_MNEHALEM is not set/CONFIG_MNEHALEM=y/' ./.config
-  elif [ "$_processor_opt" == "westmere" ]; then
-    sed -i -e 's/# CONFIG_MWESTMERE is not set/CONFIG_MWESTMERE=y/' ./.config
-  elif [ "$_processor_opt" == "silvermont" ]; then
-    sed -i -e 's/# CONFIG_MSILVERMONT is not set/CONFIG_MSILVERMONT=y/' ./.config
-  elif [ "$_processor_opt" == "sandybridge" ]; then
-    sed -i -e 's/# CONFIG_MSANDYBRIDGE is not set/CONFIG_MSANDYBRIDGE=y/' ./.config
-  elif [ "$_processor_opt" == "ivybridge" ]; then
-    sed -i -e 's/# CONFIG_MIVYBRIDGE is not set/CONFIG_MIVYBRIDGE=y/' ./.config
-  elif [ "$_processor_opt" == "haswell" ]; then
-    sed -i -e 's/# CONFIG_MHASWELL is not set/CONFIG_MHASWELL=y/' ./.config
-  elif [ "$_processor_opt" == "broadwell" ]; then
-    sed -i -e 's/# CONFIG_MBROADWELL is not set/CONFIG_MBROADWELL=y/' ./.config
-  elif [ "$_processor_opt" == "skylake" ]; then
-    sed -i -e 's/# CONFIG_MSKYLAKE is not set/CONFIG_MSKYLAKE=y/' ./.config
-  elif [ "$_processor_opt" == "skylakex" ]; then
-    sed -i -e 's/# CONFIG_MSKYLAKEX is not set/CONFIG_MSKYLAKEX=y/' ./.config
-  elif [ "$_processor_opt" == "cannonlake" ]; then
-    sed -i -e 's/# CONFIG_MCANNONLAKE is not set/CONFIG_MCANNONLAKE=y/' ./.config
-  elif [ "$_processor_opt" == "icelake" ]; then
-    sed -i -e 's/# CONFIG_MICELAKE is not set/CONFIG_MICELAKE=y/' ./.config
-  elif [ "$_processor_opt" == "goldmont" ]; then
-    sed -i -e 's/# CONFIG_MGOLDMONT is not set/CONFIG_MGOLDMONT=y/' ./.config
-  elif [ "$_processor_opt" == "goldmontplus" ]; then
-    sed -i -e 's/# CONFIG_MGOLDMONTPLUS is not set/CONFIG_MGOLDMONTPLUS=y/' ./.config
-  elif [ "$_processor_opt" == "cascadelake" ]; then
-    sed -i -e 's/# CONFIG_MCASCADELAKE is not set/CONFIG_MCASCADELAKE=y/' ./.config
-  elif [ "$_processor_opt" == "cooperlake" ]; then
-    sed -i -e 's/# CONFIG_MCOOPERLAKE is not set/CONFIG_MCOOPERLAKE=y/' ./.config
-  elif [ "$_processor_opt" == "tigerlake" ]; then
-    sed -i -e 's/# CONFIG_MTIGERLAKE is not set/CONFIG_MTIGERLAKE=y/' ./.config
-  fi
-
-  # irq threading
-  if [ "$_irq_threading" == "true" ]; then
-    echo "CONFIG_FORCE_IRQ_THREADING=y" >> ./.config
-  elif [ "$_irq_threading" == "false" ]; then
-    echo "# CONFIG_FORCE_IRQ_THREADING is not set" >> ./.config
-  fi
-
-  # smt nice
-  if [ "$_smt_nice" == "true" ]; then
-    echo "CONFIG_SMT_NICE=y" >> ./.config
-  elif [ "$_smt_nice" == "false" ]; then
-    echo "# CONFIG_SMT_NICE is not set" >> ./.config
-  fi
-
-  # random trust cpu
-  if [ "$_random_trust_cpu" == "true" ]; then
-    sed -i -e 's/# CONFIG_RANDOM_TRUST_CPU is not set/CONFIG_RANDOM_TRUST_CPU=y/' ./.config
-  fi
-
-  # rq sharing
-  if [ "$_runqueue_sharing" == "none" ]; then
-    echo -e "CONFIG_RQ_NONE=y\n# CONFIG_RQ_SMT is not set\n# CONFIG_RQ_MC is not set\n# CONFIG_RQ_MC_LLC is not set\n# CONFIG_RQ_SMP is not set\n# CONFIG_RQ_ALL is not set" >> ./.config
-  elif [ "$_runqueue_sharing" == "smt" ]; then
-    echo -e "# CONFIG_RQ_NONE is not set\nCONFIG_RQ_SMT=y\n# CONFIG_RQ_MC is not set\n# CONFIG_RQ_MC_LLC is not set\n# CONFIG_RQ_SMP is not set\n# CONFIG_RQ_ALL is not set" >> ./.config
-  elif [ "$_runqueue_sharing" == "mc" ]; then
-    echo -e "# CONFIG_RQ_NONE is not set\n# CONFIG_RQ_SMT is not set\nCONFIG_RQ_MC=y\n# CONFIG_RQ_MC_LLC is not set\n# CONFIG_RQ_SMP is not set\n# CONFIG_RQ_ALL is not set" >> ./.config
-  elif [ "$_runqueue_sharing" == "smp" ]; then
-    echo -e "# CONFIG_RQ_NONE is not set\n# CONFIG_RQ_SMT is not set\n# CONFIG_RQ_MC is not set\n# CONFIG_RQ_MC_LLC is not set\nCONFIG_RQ_SMP=y\n# CONFIG_RQ_ALL is not set" >> ./.config
-  elif [ "$_runqueue_sharing" == "all" ]; then
-    echo -e "# CONFIG_RQ_NONE is not set\n# CONFIG_RQ_SMT is not set\n# CONFIG_RQ_MC is not set\n# CONFIG_RQ_MC_LLC is not set\n# CONFIG_RQ_SMP is not set\nCONFIG_RQ_ALL=y" >> ./.config
-  elif [ "$_runqueue_sharing" == "mc-llc" ]; then
-    echo -e "# CONFIG_RQ_NONE is not set\n# CONFIG_RQ_SMT is not set\n# CONFIG_RQ_MC is not set\nCONFIG_RQ_MC_LLC=y\n# CONFIG_RQ_SMP is not set\n# CONFIG_RQ_ALL is not set" >> ./.config
-  fi
-
-  # timer freq
-  if [ -n "$_timer_freq" ] && [ "$_timer_freq" != "300" ]; then
-    sed -i -e 's/CONFIG_HZ_300=y/# CONFIG_HZ_300 is not set/' ./.config
-    sed -i -e 's/CONFIG_HZ_300_NODEF=y/# CONFIG_HZ_300_NODEF is not set/' ./.config
-    if [ "$_timer_freq" == "1000" ]; then
-      sed -i -e 's/# CONFIG_HZ_1000 is not set/CONFIG_HZ_1000=y/' ./.config
-      sed -i -e 's/CONFIG_HZ=300/CONFIG_HZ=1000/' ./.config
-      echo "# CONFIG_HZ_500 is not set" >> ./.config
-      echo "# CONFIG_HZ_500_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_750 is not set" >> ./.config
-      echo "# CONFIG_HZ_750_NODEF is not set" >> ./.config
-      echo "CONFIG_HZ_1000_NODEF=y" >> ./.config
-      echo "# CONFIG_HZ_250_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_300_NODEF is not set" >> ./.config
-    elif [ "$_timer_freq" == "750" ]; then
-      sed -i -e 's/CONFIG_HZ=300/CONFIG_HZ=750/' ./.config
-      echo "# CONFIG_HZ_500 is not set" >> ./.config
-      echo "# CONFIG_HZ_500_NODEF is not set" >> ./.config
-      echo "CONFIG_HZ_750=y" >> ./.config
-      echo "CONFIG_HZ_750_NODEF=y" >> ./.config
-      echo "# CONFIG_HZ_1000_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_250_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_300_NODEF is not set" >> ./.config
-    elif [ "$_timer_freq" == "500" ]; then
-      sed -i -e 's/CONFIG_HZ=300/CONFIG_HZ=500/' ./.config
-      echo "CONFIG_HZ_500=y" >> ./.config
-      echo "CONFIG_HZ_500_NODEF=y" >> ./.config
-      echo "# CONFIG_HZ_750 is not set" >> ./.config
-      echo "# CONFIG_HZ_750_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_1000_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_250_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_300_NODEF is not set" >> ./.config
-    elif [ "$_timer_freq" == "100" ]; then
-      sed -i -e 's/CONFIG_HZ=300/CONFIG_HZ=100/' ./.config
-      echo "# CONFIG_HZ_500 is not set" >> ./.config
-      echo "# CONFIG_HZ_750 is not set" >> ./.config
-      echo "# CONFIG_HZ_1000_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_750_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_500_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_250_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_300_NODEF is not set" >> ./.config
-      echo "CONFIG_HZ_100=y" >> ./.config
-      echo "CONFIG_HZ_100_NODEF=y" >> ./.config
-    fi
-  elif [ "${_cpusched}" == "MuQSS" ] && [ -z "$_timer_freq" ]; then
-      sed -i -e 's/CONFIG_HZ=300/CONFIG_HZ=100/' ./.config
-      echo "# CONFIG_HZ_500 is not set" >> ./.config
-      echo "# CONFIG_HZ_750 is not set" >> ./.config
-      echo "# CONFIG_HZ_1000_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_750_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_500_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_250_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_300_NODEF is not set" >> ./.config
-      echo "CONFIG_HZ_100=y" >> ./.config
-      echo "CONFIG_HZ_100_NODEF=y" >> ./.config
-  else
-    sed -i -e 's/CONFIG_HZ_300=y/# CONFIG_HZ_300 is not set/' ./.config
-    sed -i -e 's/CONFIG_HZ_300_NODEF=y/# CONFIG_HZ_300_NODEF is not set/' ./.config
-    sed -i -e 's/CONFIG_HZ=300/CONFIG_HZ=500/' ./.config
-    echo "CONFIG_HZ_500=y" >> ./.config
-    echo "CONFIG_HZ_500_NODEF=y" >> ./.config
-    echo "# CONFIG_HZ_250_NODEF is not set" >> ./.config
-    echo "# CONFIG_HZ_300_NODEF is not set" >> ./.config
-  fi
-
-  # default cpu gov
-  if [ "$_default_cpu_gov" == "performance" ]; then
-    sed -i -e 's/CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL=y/# CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL is not set/' ./.config
-    sed -i -e 's/# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set/CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y/' ./.config
-  elif [ "$_default_cpu_gov" == "ondemand" ]; then
-    sed -i -e 's/CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL=y/# CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL is not set/' ./.config
-    sed -i -e 's/# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set/CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y/' ./.config
-  fi
-
-  # ACPI_CPUFREQ disablement
-  if [ "$_disable_acpi_cpufreq" == "true" ]; then
-    sed -i -e 's/CONFIG_X86_ACPI_CPUFREQ=m/# CONFIG_X86_ACPI_CPUFREQ is not set/' ./.config
-  fi
-
-  # ftrace
-  if [ -z "$_ftracedisable" ]; then
-    plain ""
-    plain "Disable FUNCTION_TRACER/GRAPH_TRACER? Lowers overhead but limits debugging"
-    plain "and analyzing of kernel functions."
-    read -rp "`echo $'    > N/y : '`" CONDITION2;
-  fi
-  if [[ "$CONDITION2" =~ [yY] ]] || [ "$_ftracedisable" == "true" ]; then
-    sed -i -e 's/CONFIG_FUNCTION_TRACER=y/# CONFIG_FUNCTION_TRACER is not set/' ./.config
-    sed -i -e 's/CONFIG_FUNCTION_GRAPH_TRACER=y/# CONFIG_FUNCTION_GRAPH_TRACER is not set/' ./.config
-  fi
-
-  # disable numa
-  if [ -z "$_numadisable" ]; then
-    plain ""
-    plain "Disable NUMA? Lowers overhead, but breaks CUDA/NvEnc on Nvidia if disabled."
-    plain "https://bbs.archlinux.org/viewtopic.php?id=239174"
-    read -rp "`echo $'    > N/y : '`" CONDITION3;
-  fi
-  if [[ "$CONDITION3" =~ [yY] ]] || [ "$_numadisable" == "true" ]; then
-    # disable NUMA since 99.9% of users do not have multiple CPUs but do have multiple cores in one CPU
-    sed -i -e 's/CONFIG_NUMA=y/# CONFIG_NUMA is not set/' \
-        -i -e '/CONFIG_AMD_NUMA=y/d' \
-        -i -e '/CONFIG_X86_64_ACPI_NUMA=y/d' \
-        -i -e '/CONFIG_NODES_SPAN_OTHER_NODES=y/d' \
-        -i -e '/# CONFIG_NUMA_EMU is not set/d' \
-        -i -e '/CONFIG_NODES_SHIFT=6/d' \
-        -i -e '/CONFIG_NEED_MULTIPLE_NODES=y/d' \
-        -i -e '/CONFIG_USE_PERCPU_NUMA_NODE_ID=y/d' \
-        -i -e '/CONFIG_ACPI_NUMA=y/d' ./.config
-  fi
-
-  # tickless
-  if [ -z "$_tickless" ]; then
-    plain ""
-    plain "Use CattaRappa mode (Tickless/Dynticks) ?"
-    plain "Can give higher performances in many cases but lower consistency on some hardware."
-    plain "Just tickless idle can perform better on some platforms (mostly AMD based)."
-    read -rp "`echo $'\n      0.No, use periodic ticks\n    > 1.Yes, full tickless baby!\n      2.Just tickless idle plz\n    [0-2?]: '`" CONDITION4;
-  fi
-  if [ "$CONDITION4" == "0" ] || [ "$_tickless" == "0" ]; then
-    echo "# CONFIG_NO_HZ_FULL_NODEF is not set" >> ./.config
-    sed -i -e 's/# CONFIG_HZ_PERIODIC is not set/CONFIG_HZ_PERIODIC=y/' ./.config
-    sed -i -e 's/CONFIG_NO_HZ_IDLE=y/# CONFIG_NO_HZ_IDLE is not set/' ./.config
-    sed -i -e 's/CONFIG_NO_HZ_FULL=y/# CONFIG_NO_HZ_FULL is not set/' ./.config
-    sed -i -e 's/CONFIG_NO_HZ=y/# CONFIG_NO_HZ is not set/' ./.config
-    sed -i -e 's/CONFIG_NO_HZ_COMMON=y/# CONFIG_NO_HZ_COMMON is not set/' ./.config
-  elif [ "$CONDITION4" == "2" ] || [ "$_tickless" == "2" ]; then
-    echo "# CONFIG_NO_HZ_FULL_NODEF is not set" >> ./.config
-    sed -i -e 's/CONFIG_HZ_PERIODIC=y/# CONFIG_HZ_PERIODIC is not set/' ./.config
-    sed -i -e 's/# CONFIG_NO_HZ_IDLE is not set/CONFIG_NO_HZ_IDLE=y/' ./.config
-    sed -i -e 's/CONFIG_NO_HZ_FULL=y/# CONFIG_NO_HZ_FULL is not set/' ./.config
-    sed -i -e 's/# CONFIG_NO_HZ is not set/CONFIG_NO_HZ=y/' ./.config
-    sed -i -e 's/# CONFIG_NO_HZ_COMMON is not set/CONFIG_NO_HZ_COMMON=y/' ./.config
-  else
-    echo "CONFIG_NO_HZ_FULL_NODEF=y" >> ./.config
-    sed -i -e 's/CONFIG_HZ_PERIODIC=y/# CONFIG_HZ_PERIODIC is not set/' ./.config
-    sed -i -e 's/CONFIG_NO_HZ_IDLE=y/# CONFIG_NO_HZ_IDLE is not set/' ./.config
-    sed -i -e 's/# CONFIG_NO_HZ_FULL is not set/CONFIG_NO_HZ_FULL=y/' ./.config
-    sed -i -e 's/# CONFIG_NO_HZ is not set/CONFIG_NO_HZ=y/' ./.config
-    sed -i -e 's/# CONFIG_NO_HZ_COMMON is not set/CONFIG_NO_HZ_COMMON=y/' ./.config
-    echo "CONFIG_CONTEXT_TRACKING=y" >> ./.config
-    echo "# CONFIG_CONTEXT_TRACKING_FORCE is not set" >> ./.config
-  fi
-
-  # voluntary preempt
-  if [ -z "$_voluntary_preempt" ]; then
-    plain ""
-    plain "Use explicit preemption points?"
-    plain "It can improve latency on PDS (at the cost of throughput)"
-    plain "and improve throughput on other schedulers (at the cost of latency)"
-    read -rp "`echo $'    > N/y : '`" CONDITION5;
-  fi
-  if [[ "$CONDITION5" =~ [yY] ]] || [ "$_voluntary_preempt" == "true" ]; then
-    sed -i -e 's/CONFIG_PREEMPT=y/# CONFIG_PREEMPT is not set/' ./.config
-    sed -i -e 's/CONFIG_PREEMPT_LL=y/# CONFIG_PREEMPT_LL is not set/' ./.config
-    sed -i -e 's/# CONFIG_PREEMPT_VOLUNTARY is not set/CONFIG_PREEMPT_VOLUNTARY=y/' ./.config
-  fi
-
-  # Open Firmware support
-  if [ -z "$_OFenable" ]; then
-    plain ""
-    plain "Enable Device Tree and Open Firmware support?"
-    read -rp "`echo $'    > N/y : '`" CONDITION6;
-  fi
-  if [[ "$CONDITION6" =~ [yY] ]] || [ "$_OFenable" == "true" ]; then
-    sed -i -e 's/# CONFIG_OF is not set/CONFIG_OF=y/' ./.config
-  fi
-
-  # acs override
-  if [ -z "$_acs_override" ]; then
-    plain ""
-    plain "Use ACS override patch?"
-    plain "https://wiki.archlinux.org/index.php/PCI_passthrough_via_OVMF#Bypassing_the_IOMMU_groups_.28ACS_override_patch.29"
-    read -rp "`echo $'    > N/y : '`" CONDITION7;
-  fi
-  if [[ "$CONDITION7" =~ [yY] ]] || [ "$_acs_override" == "true" ]; then
-    patch -Np1 -i ../0006-add-acs-overrides_iommu.patch
-  fi
-
-  # bcachefs
-#  if [ -z "$_bcachefs" ]; then
-#     plain ""
-#     plain "Add Bcache filesystem support? You'll have to install bcachefs-tools-git from AUR for utilities."
-#     plain "https://bcachefs.org/"
-#     read -rp "`echo $'    > N/y : '`" CONDITION8;
-#   fi
-#   if [[ "$CONDITION8" =~ [yY] ]] || [ "$_bcachefs" == "true" ]; then
-#     patch -Np1 -i ../0008-5.4-bcachefs.patch
-#     echo "CONFIG_BCACHEFS_FS=m" >> ./.config
-#     echo "CONFIG_BCACHEFS_QUOTA=y" >> ./.config
-#     echo "CONFIG_BCACHEFS_POSIX_ACL=y" >> ./.config
-#     echo "# CONFIG_BCACHEFS_DEBUG is not set" >> ./.config
-#     echo "# CONFIG_BCACHEFS_TESTS is not set" >> ./.config
-#     echo "# CONFIG_DEBUG_CLOSURES is not set" >> ./.config
-#   fi
-
-  # fsync support
-  if [ -z "$_fsync" ]; then
-    plain ""
-    plain "Enable support for fsync, an experimental replacement for esync in Valve Proton 4.11+"
-    plain "https://steamcommunity.com/games/221410/announcements/detail/2957094910196249305"
-    read -rp "`echo $'    > N/y : '`" CONDITION9;
-  fi
-  if [[ "$CONDITION9" =~ [yY] ]] || [ "$_fsync" == "true" ]; then
-    patch -Np1 -i ../0007-v5.4-fsync.patch
-  fi
-
-  # ZFS fix
-  if [ -z "$_zfsfix" ]; then
-    plain ""
-    plain "Add back missing symbol for AES-NI/AVX support on ZFS"
-    plain "https://github.com/NixOS/nixpkgs/blob/master/pkgs/os-specific/linux/kernel/export_kernel_fpu_functions_5_3.patch"
-    read -rp "`echo $'    > N/y : '`" CONDITION11;
-  fi
-  if [[ "$CONDITION11" =~ [yY] ]] || [ "$_zfsfix" == "true" ]; then
-    patch -Np1 -i ../0011-ZFS-fix.patch
-  fi
-
-  # Community patches
-  if [ -n "$_community_patches" ]; then
-    if [ ! -d "$_where/../../community-patches" ]; then
-      cd "$_where/../.." && git clone https://github.com/Frogging-Family/community-patches.git && cd "${srcdir}/linux-${_basekernel}"
-    fi
-    _community_patches=($_community_patches)
-    for _p in ${_community_patches[@]}; do
-      ln -s "$_where"/../../community-patches/linux54-tkg/$_p "$_where"/
-    done
-  fi
-
-  # userpatches
-  if [ "$_user_patches" == "true" ]; then
-    _userpatch_target="linux-${_basekernel}"
-    _userpatch_ext="my"
-    user_patcher
-  fi
-
-  # Community patches removal
-  for _p in ${_community_patches[@]}; do
-    rm -f "$_where"/$_p
-  done
-
-  # don't run depmod on 'make install'. We'll do this ourselves in packaging
-  sed -i '2iexit 0' scripts/depmod.sh
-
-  # get kernel version
-  make prepare
-
-  # modprobed-db
-  if [ -z "$_modprobeddb" ]; then
-    plain ""
-    plain "Use modprobed db to clean config from unneeded modules?"
-    plain "Speeds up compilation considerably. Requires root."
-    plain "https://wiki.archlinux.org/index.php/Modprobed-db"
-    plain "!!!! Make sure to have a well populated db !!!!"
-    read -rp "`echo $'    > N/y : '`" CONDITIONMPDB;
-  fi
-  if [[ "$CONDITIONMPDB" =~ [yY] ]] || [ "$_modprobeddb" == "true" ]; then
-    sudo modprobed-db recall
-    yes "" | make localmodconfig
-  fi
-
-  if [ true = "$_config_fragments" ]; then
-    local fragments=()
-    mapfile -d '' -t fragments < <(find "$_where"/ -type f -name "*.myfrag" -print0)
-
-    if [ true = "$_config_fragments_no_confirm" ]; then
-      printf 'Using config fragment %s\n' "${fragments[@]#$_where/}"
-    else
-      for i in "${!fragments[@]}"; do
-        while true; do
-          read -r -p 'Found config fragment '"${fragments[$i]#$_where/}"', apply it? [y/N] ' CONDITIONMPDB
-          CONDITIONMPDB="$(printf '%s' "$CONDITIONMPDB" | tr '[:upper:]' '[:lower:]')"
-          case "$CONDITIONMPDB" in
-            y|yes)
-              break;;
-            n|no|'')
-              unset fragments[$i]
-              break;;
-            *)
-              echo 'Please answer with yes or no'
-          esac
-        done
-      done
-    fi
-
-    if [ 0 -lt "${#fragments[@]}" ]; then
-      scripts/kconfig/merge_config.sh -m .config "${fragments[@]}"
-    fi
-  fi
-
-  # menuconfig / nconfig
-  if [ -z "$_menunconfig" ]; then
-    plain ""
-    plain "*Optional* For advanced users - Do you want to use make menuconfig or nconfig"
-    plain "to configure the kernel before building it?"
-    plain "If you do, make sure your terminal is currently"
-    plain "at least 19 lines by 80 columns large or you'll get an error :D"
-    read -rp "`echo $'    > 0. nope\n      1. menuconfig\n      2. nconfig\n      choice[0-2?]: '`" CONDITIONMNC;
-    _menunconfig="$CONDITIONMNC"
-  fi
-  if [ 1 = "$_menunconfig" ]; then
-    cp .config .config.orig
-    make menuconfig
-  elif [ 2 = "$_menunconfig" ]; then
-    cp .config .config.orig
-    make nconfig
-  else
-    # rewrite configuration
-    yes "" | make config >/dev/null
-  fi
-  if [ 1 = "$_menunconfig" ] || [ 2 = "$_menunconfig" ]; then
-    if [ -z "${_diffconfig}" ]; then
-      while true; do
-        read -r -p 'Generate a config fragment from your changes? [y/N] ' CONDITIONF
-        CONDITIONF="$(printf '%s' "$CONDITIONF" | tr '[:upper:]' '[:lower:]')"
-        case "$CONDITIONF" in
-          y|yes)
-            _diffconfig=true
-            break;;
-          n|no|'')
-            _diffconfig=false
-            break;;
-          *)
-            echo 'Please answer with yes or no'
-        esac
-      done
-    fi
-    if [ true = "$_diffconfig" ]; then
-      if [ -z "$_diffconfig_name" ]; then
-        IFS= read -r -p 'Filename for the config fragment [leave empty to not generate fragment]: ' _diffconfig_name
-      fi
-      if [ -z "$_diffconfig_name" ]; then
-        echo 'No file name given, not generating config fragment.'
-      else (
-        prev_pwd="${PWD:-$(pwd)}"
-        cd "$_where"
-        "${prev_pwd}/scripts/diffconfig" -m "${prev_pwd}/.config.orig" "${prev_pwd}/.config" > "$_diffconfig_name"
-      ) fi
-    fi
-    rm .config.orig
-  fi
-
-  make -s kernelrelease > version
-  msg2 "Prepared %s version %s" "$pkgbase" "$(<version)"
-}
diff --git a/linux54-tkg/linux54-tkg-config/ryzen-desktop-profile.cfg b/linux54-tkg/linux54-tkg-config/ryzen-desktop-profile.cfg
deleted file mode 100644
index ed8a718..0000000
--- a/linux54-tkg/linux54-tkg-config/ryzen-desktop-profile.cfg
+++ /dev/null
@@ -1,58 +0,0 @@
-# linux54-TkG config file
-# Ryzen Desktop
-
-
-#### MISC OPTIONS #### 
-
-# External config file to use - If the given file exists in path, it will override default config (customization.cfg) - Default is ~/.config/frogminer/linux52-tkg.cfg
-_EXT_CONFIG_PATH=~/.config/frogminer/linux54-tkg.cfg
-
-#### KERNEL OPTIONS ####
-
-# Name of the default config file to use from the linux???-tkg-config folder. Arch default is "config.x86_64".
-_configfile="config.x86_64"
-
-# Disable some non-module debugging - See PKGBUILD for the list
-_debugdisable="false"
-
-# LEAVE AN EMPTY VALUE TO BE PROMPTED ABOUT FOLLOWING OPTIONS AT BUILD TIME
-
-# Set to "true" to disable FUNCTION_TRACER/GRAPH_TRACER, lowering overhead but limiting debugging and analyzing of kernel functions - Kernel default is "false"
-_ftracedisable="false"
-
-# Set to "true" to disable NUMA, lowering overhead, but breaking CUDA/NvEnc on Nvidia equipped systems - Kernel default is "false"
-_numadisable="false"
-
-# Set to "true" to use explicit preemption points to lower latency at the cost of a small throughput loss - Can give a nice perf boost in VMs - Kernel default is "false"
-_voluntary_preempt="false"
-
-# A selection of patches from Zen/Liquorix kernel and additional tweaks for a better gaming experience (ZENIFY) - Default is "true"
-_zenify="true"
-
-# compiler optimization level - 1. Optimize for performance (-O2); 2. Optimize harder (-O3); 3. Optimize for size (-Os) - Kernel default is "2"
-_compileroptlevel="1"
-
-# Trust the CPU manufacturer to initialize Linux's CRNG (RANDOM_TRUST_CPU) - Kernel default is "false"
-_random_trust_cpu="false"
-
-# CPU scheduler runqueue sharing - No sharing (RQ_NONE), SMT (hyperthread) siblings (RQ_SMT), Multicore siblings (RQ_MC), Symmetric Multi-Processing (RQ_SMP), NUMA (RQ_ALL)
-# Valid values are "none", "smt", "mc", "mc-llc"(for zen), "smp", "all" - Kernel default is "mc"
-_runqueue_sharing="mc-llc"
-
-# Timer frequency - "500", "750" or "1000" - More options available in kernel config prompt when left empty depending on selected cpusched - Kernel default is "500"
-_timer_freq="500"
-
-# Default CPU governor - "performance", "ondemand" (tweaked), "schedutil" or leave empty for default (schedutil on AMD and legacy Intel, intel_pstate on modern Intel) - Enforcing an option will disable intel_pstate altogether!
-_default_cpu_gov="performance"
-
-
-#### USER PATCHES ####
-
-# You can use your own patches by putting them in the same folder as the PKGBUILD and giving them the .mypatch extension.
-# You can also revert patches by putting them in the same folder as the PKGBUILD and giving them the .myrevert extension.
-
-# Also, userpatches variable below must be set to true for the above to work.
-_user_patches="true"
-
-# Apply all user patches without confirmation - !!! NOT RECOMMENDED !!!
-_user_patches_no_confirm="false"
diff --git a/linux54-tkg/linux54-tkg-patches/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch b/linux54-tkg/linux54-tkg-patches/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch
deleted file mode 100644
index 3cef558..0000000
--- a/linux54-tkg/linux54-tkg-patches/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch
+++ /dev/null
@@ -1,156 +0,0 @@
-From 5ec2dd3a095442ec1a21d86042a4994f2ba24e63 Mon Sep 17 00:00:00 2001
-Message-Id: <5ec2dd3a095442ec1a21d86042a4994f2ba24e63.1512651251.git.jan.steffens@gmail.com>
-From: Serge Hallyn <serge.hallyn@canonical.com>
-Date: Fri, 31 May 2013 19:12:12 +0100
-Subject: [PATCH] add sysctl to disallow unprivileged CLONE_NEWUSER by default
-
-Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com>
-[bwh: Remove unneeded binary sysctl bits]
-Signed-off-by: Daniel Micay <danielmicay@gmail.com>
----
- kernel/fork.c           | 15 +++++++++++++++
- kernel/sysctl.c         | 12 ++++++++++++
- kernel/user_namespace.c |  3 +++
- 3 files changed, 30 insertions(+)
-
-diff --git a/kernel/fork.c b/kernel/fork.c
-index 07cc743698d3668e..4011d68a8ff9305c 100644
---- a/kernel/fork.c
-+++ b/kernel/fork.c
-@@ -102,6 +102,11 @@
- 
- #define CREATE_TRACE_POINTS
- #include <trace/events/task.h>
-+#ifdef CONFIG_USER_NS
-+extern int unprivileged_userns_clone;
-+#else
-+#define unprivileged_userns_clone 0
-+#endif
- 
- /*
-  * Minimum number of threads to boot the kernel
-@@ -1555,6 +1560,10 @@ static __latent_entropy struct task_struct *copy_process(
- 	if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
- 		return ERR_PTR(-EINVAL);
- 
-+	if ((clone_flags & CLONE_NEWUSER) && !unprivileged_userns_clone)
-+		if (!capable(CAP_SYS_ADMIN))
-+			return ERR_PTR(-EPERM);
-+
- 	/*
- 	 * Thread groups must share signals as well, and detached threads
- 	 * can only be started up within the thread group.
-@@ -2348,6 +2357,12 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
- 	if (unshare_flags & CLONE_NEWNS)
- 		unshare_flags |= CLONE_FS;
- 
-+	if ((unshare_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) {
-+		err = -EPERM;
-+		if (!capable(CAP_SYS_ADMIN))
-+			goto bad_unshare_out;
-+	}
-+
- 	err = check_unshare_flags(unshare_flags);
- 	if (err)
- 		goto bad_unshare_out;
-diff --git a/kernel/sysctl.c b/kernel/sysctl.c
-index b86520ed3fb60fbf..f7dab3760839f1a1 100644
---- a/kernel/sysctl.c
-+++ b/kernel/sysctl.c
-@@ -105,6 +105,9 @@ extern int core_uses_pid;
- extern char core_pattern[];
- extern unsigned int core_pipe_limit;
- #endif
-+#ifdef CONFIG_USER_NS
-+extern int unprivileged_userns_clone;
-+#endif
- extern int pid_max;
- extern int pid_max_min, pid_max_max;
- extern int percpu_pagelist_fraction;
-@@ -513,6 +516,15 @@ static struct ctl_table kern_table[] = {
- 		.proc_handler	= proc_dointvec,
- 	},
- #endif
-+#ifdef CONFIG_USER_NS
-+	{
-+		.procname	= "unprivileged_userns_clone",
-+		.data		= &unprivileged_userns_clone,
-+		.maxlen		= sizeof(int),
-+		.mode		= 0644,
-+		.proc_handler	= proc_dointvec,
-+	},
-+#endif
- #ifdef CONFIG_PROC_SYSCTL
- 	{
- 		.procname	= "tainted",
-diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
-index c490f1e4313b998a..dd03bd39d7bf194d 100644
---- a/kernel/user_namespace.c
-+++ b/kernel/user_namespace.c
-@@ -24,6 +24,9 @@
- #include <linux/projid.h>
- #include <linux/fs_struct.h>
- 
-+/* sysctl */
-+int unprivileged_userns_clone;
-+
- static struct kmem_cache *user_ns_cachep __read_mostly;
- static DEFINE_MUTEX(userns_state_mutex);
- 
--- 
-2.15.1
-
-From b5202296055dd333db4425120d3f93ef4e6a0573 Mon Sep 17 00:00:00 2001
-From: "Jan Alexander Steffens (heftig)" <jan.steffens@gmail.com>
-Date: Thu, 7 Dec 2017 13:50:48 +0100
-Subject: ZEN: Add CONFIG for unprivileged_userns_clone
-
-This way our default behavior continues to match the vanilla kernel.
----
- init/Kconfig            | 16 ++++++++++++++++
- kernel/user_namespace.c |  4 ++++
- 2 files changed, 20 insertions(+)
-
-diff --git a/init/Kconfig b/init/Kconfig
-index 4592bf7997c0..f3df02990aff 100644
---- a/init/Kconfig
-+++ b/init/Kconfig
-@@ -1004,6 +1004,22 @@ config USER_NS
- 
- 	  If unsure, say N.
- 
-+config USER_NS_UNPRIVILEGED
-+	bool "Allow unprivileged users to create namespaces"
-+	default y
-+	depends on USER_NS
-+	help
-+	  When disabled, unprivileged users will not be able to create
-+	  new namespaces. Allowing users to create their own namespaces
-+	  has been part of several recent local privilege escalation
-+	  exploits, so if you need user namespaces but are
-+	  paranoid^Wsecurity-conscious you want to disable this.
-+
-+	  This setting can be overridden at runtime via the
-+	  kernel.unprivileged_userns_clone sysctl.
-+
-+	  If unsure, say Y.
-+
- config PID_NS
- 	bool "PID Namespaces"
- 	default y
-diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
-index 6b9dbc257e34..107b17f0d528 100644
---- a/kernel/user_namespace.c
-+++ b/kernel/user_namespace.c
-@@ -27,7 +27,11 @@
- #include <linux/sort.h>
- 
- /* sysctl */
-+#ifdef CONFIG_USER_NS_UNPRIVILEGED
-+int unprivileged_userns_clone = 1;
-+#else
- int unprivileged_userns_clone;
-+#endif
- 
- static struct kmem_cache *user_ns_cachep __read_mostly;
- static DEFINE_MUTEX(userns_state_mutex);
diff --git a/linux54-tkg/linux54-tkg-patches/0002-clear-patches.patch b/linux54-tkg/linux54-tkg-patches/0002-clear-patches.patch
deleted file mode 100644
index a7c9d4a..0000000
--- a/linux54-tkg/linux54-tkg-patches/0002-clear-patches.patch
+++ /dev/null
@@ -1,354 +0,0 @@
-From 2ac70785613ef4c6b16414986bb18bd7b60d2a13 Mon Sep 17 00:00:00 2001
-From: Arjan van de Ven <arjan@linux.intel.com>
-Date: Mon, 14 Mar 2016 11:10:58 -0600
-Subject: [PATCH] pci pme wakeups
-
-Reduce wakeups for PME checks, which are a workaround for miswired
-boards (sadly, too many of them) in laptops.
----
- drivers/pci/pci.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
-index c25acace7d91..0ddebdad9f5b 100644
---- a/drivers/pci/pci.c
-+++ b/drivers/pci/pci.c
-@@ -61,7 +61,7 @@ struct pci_pme_device {
- 	struct pci_dev *dev;
- };
- 
--#define PME_TIMEOUT 1000 /* How long between PME checks */
-+#define PME_TIMEOUT 4000 /* How long between PME checks */
- 
- static void pci_dev_d3_sleep(struct pci_dev *dev)
- {
--- 
-2.20.1
-
-From 7e7e36c67aa71d6a1ec5676d99d37c1fea389ceb Mon Sep 17 00:00:00 2001
-From: Arjan van de Ven <arjan@linux.intel.com>
-Date: Sat, 19 Mar 2016 21:32:19 -0400
-Subject: [PATCH] intel_idle: tweak cpuidle cstates
-
-Increase target_residency in cpuidle cstate
-
-Tune intel_idle to be a bit less agressive;
-Clear linux is cleaner in hygiene (wakupes) than the average linux,
-so we can afford changing these in a way that increases
-performance while keeping power efficiency
----
- drivers/idle/intel_idle.c | 44 +++++++++++++++++++--------------------
- 1 file changed, 22 insertions(+), 22 deletions(-)
-
-diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
-index 8b5d85c91e9d..5e2d813a048d 100644
---- a/drivers/idle/intel_idle.c
-+++ b/drivers/idle/intel_idle.c
-@@ -466,7 +466,7 @@ static struct cpuidle_state hsw_cstates[] = {
- 		.desc = "MWAIT 0x01",
- 		.flags = MWAIT2flg(0x01),
- 		.exit_latency = 10,
--		.target_residency = 20,
-+		.target_residency = 120,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -474,7 +474,7 @@ static struct cpuidle_state hsw_cstates[] = {
- 		.desc = "MWAIT 0x10",
- 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 33,
--		.target_residency = 100,
-+		.target_residency = 900,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -482,7 +482,7 @@ static struct cpuidle_state hsw_cstates[] = {
- 		.desc = "MWAIT 0x20",
- 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 133,
--		.target_residency = 400,
-+		.target_residency = 1000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -490,7 +490,7 @@ static struct cpuidle_state hsw_cstates[] = {
- 		.desc = "MWAIT 0x32",
- 		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 166,
--		.target_residency = 500,
-+		.target_residency = 1500,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -498,7 +498,7 @@ static struct cpuidle_state hsw_cstates[] = {
- 		.desc = "MWAIT 0x40",
- 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 300,
--		.target_residency = 900,
-+		.target_residency = 2000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -506,7 +506,7 @@ static struct cpuidle_state hsw_cstates[] = {
- 		.desc = "MWAIT 0x50",
- 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 600,
--		.target_residency = 1800,
-+		.target_residency = 5000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -514,7 +514,7 @@ static struct cpuidle_state hsw_cstates[] = {
- 		.desc = "MWAIT 0x60",
- 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 2600,
--		.target_residency = 7700,
-+		.target_residency = 9000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -534,7 +534,7 @@ static struct cpuidle_state bdw_cstates[] = {
- 		.desc = "MWAIT 0x01",
- 		.flags = MWAIT2flg(0x01),
- 		.exit_latency = 10,
--		.target_residency = 20,
-+		.target_residency = 120,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -542,7 +542,7 @@ static struct cpuidle_state bdw_cstates[] = {
- 		.desc = "MWAIT 0x10",
- 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 40,
--		.target_residency = 100,
-+		.target_residency = 1000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -550,7 +550,7 @@ static struct cpuidle_state bdw_cstates[] = {
- 		.desc = "MWAIT 0x20",
- 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 133,
--		.target_residency = 400,
-+		.target_residency = 1000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -558,7 +558,7 @@ static struct cpuidle_state bdw_cstates[] = {
- 		.desc = "MWAIT 0x32",
- 		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 166,
--		.target_residency = 500,
-+		.target_residency = 2000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -566,7 +566,7 @@ static struct cpuidle_state bdw_cstates[] = {
- 		.desc = "MWAIT 0x40",
- 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 300,
--		.target_residency = 900,
-+		.target_residency = 4000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -574,7 +574,7 @@ static struct cpuidle_state bdw_cstates[] = {
- 		.desc = "MWAIT 0x50",
- 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 600,
--		.target_residency = 1800,
-+		.target_residency = 7000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -582,7 +582,7 @@ static struct cpuidle_state bdw_cstates[] = {
- 		.desc = "MWAIT 0x60",
- 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 2600,
--		.target_residency = 7700,
-+		.target_residency = 9000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -603,7 +603,7 @@ static struct cpuidle_state skl_cstates[] = {
- 		.desc = "MWAIT 0x01",
- 		.flags = MWAIT2flg(0x01),
- 		.exit_latency = 10,
--		.target_residency = 20,
-+		.target_residency = 120,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -611,7 +611,7 @@ static struct cpuidle_state skl_cstates[] = {
- 		.desc = "MWAIT 0x10",
- 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 70,
--		.target_residency = 100,
-+		.target_residency = 1000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -619,7 +619,7 @@ static struct cpuidle_state skl_cstates[] = {
- 		.desc = "MWAIT 0x20",
- 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 85,
--		.target_residency = 200,
-+		.target_residency = 600,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -627,7 +627,7 @@ static struct cpuidle_state skl_cstates[] = {
- 		.desc = "MWAIT 0x33",
- 		.flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 124,
--		.target_residency = 800,
-+		.target_residency = 3000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -635,7 +635,7 @@ static struct cpuidle_state skl_cstates[] = {
- 		.desc = "MWAIT 0x40",
- 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 200,
--		.target_residency = 800,
-+		.target_residency = 3200,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -643,7 +643,7 @@ static struct cpuidle_state skl_cstates[] = {
- 		.desc = "MWAIT 0x50",
- 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 480,
--		.target_residency = 5000,
-+		.target_residency = 9000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -651,7 +651,7 @@ static struct cpuidle_state skl_cstates[] = {
- 		.desc = "MWAIT 0x60",
- 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 890,
--		.target_residency = 5000,
-+		.target_residency = 9000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -672,7 +672,7 @@ static struct cpuidle_state skx_cstates[] = {
- 		.desc = "MWAIT 0x01",
- 		.flags = MWAIT2flg(0x01),
- 		.exit_latency = 10,
--		.target_residency = 20,
-+		.target_residency = 300,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
--- 
-2.20.1
-
-From b8211d4f79dd88dfc2d4bd52be46103ea0b70e3e Mon Sep 17 00:00:00 2001
-From: Arjan van de Ven <arjan@linux.intel.com>
-Date: Fri, 6 Jan 2017 15:34:09 +0000
-Subject: [PATCH] ipv4/tcp: allow the memory tuning for tcp to go a little
- bigger than default
-
----
- net/ipv4/tcp.c | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
-index cf3c5095c10e..b30d51837b2d 100644
---- a/net/ipv4/tcp.c
-+++ b/net/ipv4/tcp.c
-@@ -3897,8 +3897,8 @@ void __init tcp_init(void)
- 	tcp_init_mem();
- 	/* Set per-socket limits to no more than 1/128 the pressure threshold */
- 	limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7);
--	max_wshare = min(4UL*1024*1024, limit);
--	max_rshare = min(6UL*1024*1024, limit);
-+	max_wshare = min(16UL*1024*1024, limit);
-+	max_rshare = min(16UL*1024*1024, limit);
- 
- 	init_net.ipv4.sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
- 	init_net.ipv4.sysctl_tcp_wmem[1] = 16*1024;
--- 
-2.20.1
-
-From 050223869257b87e22636158a80da38d877248ed Mon Sep 17 00:00:00 2001
-From: Arjan van de Ven <arjan@linux.intel.com>
-Date: Sun, 18 Feb 2018 23:35:41 +0000
-Subject: [PATCH] locking: rwsem: spin faster
-
-tweak rwsem owner spinning a bit
----
- kernel/locking/rwsem.c | 4 +++-
- 1 file changed, 3 insertions(+), 1 deletion(-)
-
-diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
-index eef04551eae7..1ec5ab4c8ff7 100644
---- a/kernel/locking/rwsem.c
-+++ b/kernel/locking/rwsem.c
-@@ -720,6 +720,7 @@ rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable)
- 	struct task_struct *new, *owner;
- 	unsigned long flags, new_flags;
- 	enum owner_state state;
-+	int i = 0;
- 
- 	owner = rwsem_owner_flags(sem, &flags);
- 	state = rwsem_owner_state(owner, flags, nonspinnable);
-@@ -753,7 +754,8 @@ rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable)
- 			break;
- 		}
- 
--		cpu_relax();
-+		if (i++ > 1000)
-+			cpu_relax();
- 	}
- 	rcu_read_unlock();
- 
-From b836ea320114643d4354b43acb6ec8bb06ada487 Mon Sep 17 00:00:00 2001
-From: Arjan van de Ven <arjan@linux.intel.com>
-Date: Thu, 2 Jun 2016 23:36:32 -0500
-Subject: [PATCH] drivers: Initialize ata before graphics
-
-ATA init is the long pole in the boot process, and its asynchronous.
-move the graphics init after it so that ata and graphics initialize
-in parallel
----
- drivers/Makefile | 15 ++++++++-------
- 1 file changed, 8 insertions(+), 7 deletions(-)
-
-diff --git a/drivers/Makefile b/drivers/Makefile
-index aaef17cc6512..d08f3a394929 100644
---- a/drivers/Makefile
-+++ b/drivers/Makefile
-@@ -58,15 +58,8 @@ obj-y				+= char/
- # iommu/ comes before gpu as gpu are using iommu controllers
- obj-y				+= iommu/
- 
--# gpu/ comes after char for AGP vs DRM startup and after iommu
--obj-y				+= gpu/
--
- obj-$(CONFIG_CONNECTOR)		+= connector/
- 
--# i810fb and intelfb depend on char/agp/
--obj-$(CONFIG_FB_I810)           += video/fbdev/i810/
--obj-$(CONFIG_FB_INTEL)          += video/fbdev/intelfb/
--
- obj-$(CONFIG_PARPORT)		+= parport/
- obj-$(CONFIG_NVM)		+= lightnvm/
- obj-y				+= base/ block/ misc/ mfd/ nfc/
-@@ -79,6 +72,14 @@ obj-$(CONFIG_IDE)		+= ide/
- obj-y				+= scsi/
- obj-y				+= nvme/
- obj-$(CONFIG_ATA)		+= ata/
-+
-+# gpu/ comes after char for AGP vs DRM startup and after iommu
-+obj-y				+= gpu/
-+
-+# i810fb and intelfb depend on char/agp/
-+obj-$(CONFIG_FB_I810)           += video/fbdev/i810/
-+obj-$(CONFIG_FB_INTEL)          += video/fbdev/intelfb/
-+
- obj-$(CONFIG_TARGET_CORE)	+= target/
- obj-$(CONFIG_MTD)		+= mtd/
- obj-$(CONFIG_SPI)		+= spi/
diff --git a/linux54-tkg/linux54-tkg-patches/0003-glitched-base.patch b/linux54-tkg/linux54-tkg-patches/0003-glitched-base.patch
deleted file mode 100644
index 4cbf12d..0000000
--- a/linux54-tkg/linux54-tkg-patches/0003-glitched-base.patch
+++ /dev/null
@@ -1,4612 +0,0 @@
-From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
-From: Tk-Glitch <ti3nou@gmail.com>
-Date: Wed, 4 Jul 2018 04:30:08 +0200
-Subject: glitched
-
-diff --git a/scripts/mkcompile_h b/scripts/mkcompile_h
-index 87f1fc9..b3be470 100755
---- a/scripts/mkcompile_h
-+++ b/scripts/mkcompile_h
-@@ -50,8 +50,8 @@ else
- fi
- 
- UTS_VERSION="#$VERSION"
--CONFIG_FLAGS=""
--if [ -n "$SMP" ] ; then CONFIG_FLAGS="SMP"; fi
-+CONFIG_FLAGS="TKG"
-+if [ -n "$SMP" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS SMP"; fi
- if [ -n "$PREEMPT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT"; fi
- UTS_VERSION="$UTS_VERSION $CONFIG_FLAGS $TIMESTAMP"
-
-diff --git a/fs/dcache.c b/fs/dcache.c
-index 2acfc69878f5..3f1131431e06 100644
---- a/fs/dcache.c
-+++ b/fs/dcache.c
-@@ -69,7 +69,7 @@
-  * If no ancestor relationship:
-  * arbitrary, since it's serialized on rename_lock
-  */
--int sysctl_vfs_cache_pressure __read_mostly = 100;
-+int sysctl_vfs_cache_pressure __read_mostly = 50;
- EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
- 
- __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
-diff --git a/kernel/sched/core.c b/kernel/sched/core.c
-index 211890edf37e..37121563407d 100644
---- a/kernel/sched/core.c
-+++ b/kernel/sched/core.c
-@@ -41,7 +41,7 @@ const_debug unsigned int sysctl_sched_features =
-  * Number of tasks to iterate in a single balance run.
-  * Limited because this is done with IRQs disabled.
-  */
--const_debug unsigned int sysctl_sched_nr_migrate = 32;
-+const_debug unsigned int sysctl_sched_nr_migrate = 128;
- 
- /*
-  * period over which we average the RT time consumption, measured
-@@ -61,9 +61,9 @@ __read_mostly int scheduler_running;
- 
- /*
-  * part of the period that we allow rt tasks to run in us.
-- * default: 0.95s
-+ * XanMod default: 0.98s
-  */
--int sysctl_sched_rt_runtime = 950000;
-+int sysctl_sched_rt_runtime = 980000;
- 
- /*
-  * __task_rq_lock - lock the rq @p resides on.
-diff --git a/lib/Kconfig b/lib/Kconfig
-index 5fe577673b98..c44c27cd6e05 100644
---- a/lib/Kconfig
-+++ b/lib/Kconfig
-@@ -10,6 +10,16 @@ menu "Library routines"
- config RAID6_PQ
- 	tristate
- 
-+config RAID6_USE_PREFER_GEN
-+	bool "Use prefered raid6 gen function."
-+	default n
-+	depends on RAID6_PQ
-+	help
-+	  This option is provided for using prefered raid6 gen function
-+	  directly instead of calculating the best durning boot-up.
-+	  The prefered function should be the same as the best one from
-+	  calculating.
-+
- config BITREVERSE
- 	tristate
- 
-diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
-index 5065b1e7e327..1bf3c712a4ca 100644
---- a/lib/raid6/algos.c
-+++ b/lib/raid6/algos.c
-@@ -150,6 +150,29 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void)
- 	return best;
- }
- 
-+#ifdef CONFIG_RAID6_USE_PREFER_GEN
-+static inline const struct raid6_calls *raid6_choose_prefer_gen(void)
-+{
-+	const struct raid6_calls *const *algo;
-+	const struct raid6_calls *best;
-+
-+	for (best = NULL, algo = raid6_algos; *algo; algo++) {
-+		if (!best || (*algo)->prefer >= best->prefer) {
-+			if ((*algo)->valid && !(*algo)->valid())
-+				continue;
-+			best = *algo;
-+		}
-+	}
-+
-+	if (best) {
-+		printk("raid6: using algorithm %s\n", best->name);
-+		raid6_call = *best;
-+	} else
-+		printk("raid6: Yikes!  No algorithm found!\n");
-+
-+	return best;
-+}
-+#else
- static inline const struct raid6_calls *raid6_choose_gen(
- 	void *(*const dptrs)[(65536/PAGE_SIZE)+2], const int disks)
- {
-@@ -221,6 +244,7 @@ static inline const struct raid6_calls *raid6_choose_gen(
- 
- 	return best;
- }
-+#endif
- 
- 
- /* Try to pick the best algorithm */
-@@ -228,10 +252,11 @@ static inline const struct raid6_calls *raid6_choose_gen(
- 
- int __init raid6_select_algo(void)
- {
--	const int disks = (65536/PAGE_SIZE)+2;
--
- 	const struct raid6_calls *gen_best;
- 	const struct raid6_recov_calls *rec_best;
-+#ifndef CONFIG_RAID6_USE_PREFER_GEN
-+	const int disks = (65536/PAGE_SIZE)+2;
-+
- 	char *syndromes;
- 	void *dptrs[(65536/PAGE_SIZE)+2];
- 	int i;
-@@ -252,11 +277,16 @@ int __init raid6_select_algo(void)
- 
- 	/* select raid gen_syndrome function */
- 	gen_best = raid6_choose_gen(&dptrs, disks);
-+#else
-+	gen_best = raid6_choose_prefer_gen();
-+#endif
- 
- 	/* select raid recover functions */
- 	rec_best = raid6_choose_recov();
- 
-+#ifndef CONFIG_RAID6_USE_PREFER_GEN
- 	free_pages((unsigned long)syndromes, 1);
-+#endif
- 
- 	return gen_best && rec_best ? 0 : -EINVAL;
- }
-diff --git a/mm/zswap.c b/mm/zswap.c
-index 61a5c41972db..2674c2806130 100644
---- a/mm/zswap.c
-+++ b/mm/zswap.c
-@@ -91,7 +91,7 @@ static struct kernel_param_ops zswap_enabled_param_ops = {
- module_param_cb(enabled, &zswap_enabled_param_ops, &zswap_enabled, 0644);
- 
- /* Crypto compressor to use */
--#define ZSWAP_COMPRESSOR_DEFAULT "lzo"
-+#define ZSWAP_COMPRESSOR_DEFAULT "lz4"
- static char *zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT;
- static int zswap_compressor_param_set(const char *,
- 				      const struct kernel_param *);
-diff --git a/scripts/setlocalversion b/scripts/setlocalversion
-index 71f39410691b..288f9679e883 100755
---- a/scripts/setlocalversion
-+++ b/scripts/setlocalversion
-@@ -54,7 +54,7 @@ scm_version()
- 			# If only the short version is requested, don't bother
- 			# running further git commands
- 			if $short; then
--				echo "+"
-+			#	echo "+"
- 				return
- 			fi
- 			# If we are past a tagged commit (like
-
-From f85ed068b4d0e6c31edce8574a95757a60e58b87 Mon Sep 17 00:00:00 2001
-From: Etienne Juvigny <Ti3noU@gmail.com>
-Date: Mon, 3 Sep 2018 17:36:25 +0200
-Subject: Zenify & stuff
-
-
-diff --git a/Documentation/tp_smapi.txt b/Documentation/tp_smapi.txt
-new file mode 100644
-index 000000000000..a249678a8866
---- /dev/null
-+++ b/Documentation/tp_smapi.txt
-@@ -0,0 +1,275 @@
-+tp_smapi version 0.42
-+IBM ThinkPad hardware functions driver
-+
-+Author:  Shem Multinymous <multinymous@gmail.com>
-+Project: http://sourceforge.net/projects/tpctl
-+Wiki:    http://thinkwiki.org/wiki/tp_smapi
-+List:    linux-thinkpad@linux-thinkpad.org
-+         (http://mailman.linux-thinkpad.org/mailman/listinfo/linux-thinkpad)
-+
-+Description
-+-----------
-+
-+ThinkPad laptops include a proprietary interface called SMAPI BIOS
-+(System Management Application Program Interface) which provides some
-+hardware control functionality that is not accessible by other means.
-+
-+This driver exposes some features of the SMAPI BIOS through a sysfs
-+interface. It is suitable for newer models, on which SMAPI is invoked
-+through IO port writes. Older models use a different SMAPI interface;
-+for those, try the "thinkpad" module from the "tpctl" package.
-+
-+WARNING:
-+This driver uses undocumented features and direct hardware access.
-+It thus cannot be guaranteed to work, and may cause arbitrary damage
-+(especially on models it wasn't tested on).
-+
-+
-+Module parameters
-+-----------------
-+
-+thinkpad_ec module:
-+  force_io=1 lets thinkpad_ec load on some recent ThinkPad models
-+  (e.g., T400 and T500) whose BIOS's ACPI DSDT reserves the ports we need.
-+tp_smapi module:
-+  debug=1    enables verbose dmesg output.
-+
-+
-+Usage
-+-----
-+
-+Control of battery charging thresholds (in percents of current full charge
-+capacity):
-+
-+# echo 40 > /sys/devices/platform/smapi/BAT0/start_charge_thresh
-+# echo 70 > /sys/devices/platform/smapi/BAT0/stop_charge_thresh
-+# cat /sys/devices/platform/smapi/BAT0/*_charge_thresh
-+
-+    (This is useful since Li-Ion batteries wear out much faster at very
-+     high or low charge levels. The driver will also keeps the thresholds
-+     across suspend-to-disk with AC disconnected; this isn't done
-+     automatically by the hardware.)
-+
-+Inhibiting battery charging for 17 minutes (overrides thresholds):
-+
-+# echo 17 > /sys/devices/platform/smapi/BAT0/inhibit_charge_minutes
-+# echo 0  > /sys/devices/platform/smapi/BAT0/inhibit_charge_minutes  # stop
-+# cat /sys/devices/platform/smapi/BAT0/inhibit_charge_minutes
-+
-+    (This can be used to control which battery is charged when using an
-+     Ultrabay battery.)
-+
-+Forcing battery discharging even if AC power available:
-+
-+# echo 1 > /sys/devices/platform/smapi/BAT0/force_discharge  # start discharge
-+# echo 0 > /sys/devices/platform/smapi/BAT0/force_discharge  # stop discharge
-+# cat /sys/devices/platform/smapi/BAT0/force_discharge
-+
-+    (When AC is connected, forced discharging will automatically stop
-+     when battery is fully depleted -- this is useful for calibration.
-+     Also, this attribute can be used to control which battery is discharged
-+     when both a system battery and an Ultrabay battery are connected.)
-+
-+Misc read-only battery status attributes (see note about HDAPS below):
-+
-+/sys/devices/platform/smapi/BAT0/installed   # 0 or 1
-+/sys/devices/platform/smapi/BAT0/state       # idle/charging/discharging
-+/sys/devices/platform/smapi/BAT0/cycle_count # integer counter
-+/sys/devices/platform/smapi/BAT0/current_now # instantaneous current
-+/sys/devices/platform/smapi/BAT0/current_avg # last minute average
-+/sys/devices/platform/smapi/BAT0/power_now   # instantaneous power
-+/sys/devices/platform/smapi/BAT0/power_avg   # last minute average
-+/sys/devices/platform/smapi/BAT0/last_full_capacity         # in mWh
-+/sys/devices/platform/smapi/BAT0/remaining_percent          # remaining percent of energy (set by calibration)
-+/sys/devices/platform/smapi/BAT0/remaining_percent_error    # error range of remaing_percent (not reset by calibration)
-+/sys/devices/platform/smapi/BAT0/remaining_running_time     # in minutes, by last minute average power
-+/sys/devices/platform/smapi/BAT0/remaining_running_time_now # in minutes, by instantenous power
-+/sys/devices/platform/smapi/BAT0/remaining_charging_time    # in minutes
-+/sys/devices/platform/smapi/BAT0/remaining_capacity         # in mWh
-+/sys/devices/platform/smapi/BAT0/design_capacity            # in mWh
-+/sys/devices/platform/smapi/BAT0/voltage           # in mV
-+/sys/devices/platform/smapi/BAT0/design_voltage    # in mV
-+/sys/devices/platform/smapi/BAT0/charging_max_current  # max charging current
-+/sys/devices/platform/smapi/BAT0/charging_max_voltage  # max charging voltage
-+/sys/devices/platform/smapi/BAT0/group{0,1,2,3}_voltage # see below
-+/sys/devices/platform/smapi/BAT0/manufacturer      # string
-+/sys/devices/platform/smapi/BAT0/model             # string
-+/sys/devices/platform/smapi/BAT0/barcoding         # string
-+/sys/devices/platform/smapi/BAT0/chemistry         # string
-+/sys/devices/platform/smapi/BAT0/serial            # integer
-+/sys/devices/platform/smapi/BAT0/manufacture_date  # YYYY-MM-DD
-+/sys/devices/platform/smapi/BAT0/first_use_date    # YYYY-MM-DD
-+/sys/devices/platform/smapi/BAT0/temperature  # in milli-Celsius
-+/sys/devices/platform/smapi/BAT0/dump         # see below
-+/sys/devices/platform/smapi/ac_connected      # 0 or 1
-+
-+The BAT0/group{0,1,2,3}_voltage attribute refers to the separate cell groups
-+in each battery. For example, on the ThinkPad 600, X3x, T4x and R5x models,
-+the battery contains 3 cell groups in series, where each group consisting of 2
-+or 3 cells  connected in parallel. The voltage of each group is given by these
-+attributes, and their sum (roughly) equals the "voltage" attribute.
-+(The effective performance of the battery is determined by the weakest group,
-+i.e., the one those voltage changes most rapidly during dis/charging.)
-+
-+The "BAT0/dump" attribute gives a a hex dump of the raw status data, which
-+contains additional data now in the above (if you can figure it out). Some
-+unused values are autodetected and replaced by "--":
-+
-+In all of the above, replace BAT0 with BAT1 to address the 2nd battery (e.g.
-+in the UltraBay).
-+
-+
-+Raw SMAPI calls:
-+
-+/sys/devices/platform/smapi/smapi_request
-+This performs raw SMAPI calls. It uses a bad interface that cannot handle
-+multiple simultaneous access. Don't touch it, it's for development only.
-+If you did touch it, you would so something like
-+# echo '211a 100 0 0' > /sys/devices/platform/smapi/smapi_request
-+# cat /sys/devices/platform/smapi/smapi_request
-+and notice that in the output "211a 34b b2 0 0 0 'OK'", the "4b" in the 2nd
-+value, converted to decimal is 75: the current charge stop threshold.
-+
-+
-+Model-specific status
-+---------------------
-+
-+Works (at least partially) on the following ThinkPad model:
-+* A30
-+* G41
-+* R40, R50p, R51, R52
-+* T23, T40, T40p, T41, T41p, T42, T42p, T43, T43p, T60, T61, T400, T410, T420 (partially)
-+* X24, X31, X32, X40, X41, X60, X61, X200, X201, X220 (partially)
-+* Z60t, Z61m
-+
-+Does not work on:
-+* X230 and newer
-+* T430 and newer
-+* Any ThinkPad Edge
-+* Any ThinkPad Yoga
-+* Any ThinkPad L series
-+* Any ThinkPad P series
-+
-+Not all functions are available on all models; for detailed status, see:
-+  http://thinkwiki.org/wiki/tp_smapi
-+
-+Please report success/failure by e-mail or on the Wiki.
-+If you get a "not implemented" or "not supported" message, your laptop
-+probably just can't do that (at least not via the SMAPI BIOS).
-+For negative reports, follow the bug reporting guidelines below.
-+If you send me the necessary technical data (i.e., SMAPI function
-+interfaces), I will support additional models.
-+
-+
-+Additional HDAPS features
-+-------------------------
-+
-+The modified hdaps driver has several improvements on the one in mainline
-+(beyond resolving the conflict with thinkpad_ec and tp_smapi):
-+
-+- Fixes reliability and improves support for recent ThinkPad models
-+  (especially *60 and newer). Unlike the mainline driver, the modified hdaps
-+  correctly follows the Embedded Controller communication protocol.
-+
-+- Extends the "invert" parameter to cover all possible axis orientations.
-+  The possible values are as follows.
-+  Let X,Y denote the hardware readouts.
-+  Let R denote the laptop's roll (tilt left/right).
-+  Let P denote the laptop's pitch (tilt forward/backward).
-+    invert=0:   R= X  P= Y   (same as mainline)
-+    invert=1:   R=-X  P=-Y   (same as mainline)
-+    invert=2:   R=-X  P= Y   (new)
-+    invert=3:   R= X  P=-Y   (new)
-+    invert=4:   R= Y  P= X   (new)
-+    invert=5:   R=-Y  P=-X   (new)
-+    invert=6:   R=-Y  P= X   (new)
-+    invert=7:   R= Y  P=-X   (new)
-+  It's probably easiest to just try all 8 possibilities and see which yields
-+  correct results (e.g., in the hdaps-gl visualisation).
-+
-+- Adds a whitelist which automatically sets the correct axis orientation for
-+  some models. If the value for your model is wrong or missing, you can override
-+  it using the "invert" parameter. Please also update the tables at
-+  http://www.thinkwiki.org/wiki/tp_smapi and
-+  http://www.thinkwiki.org/wiki/List_of_DMI_IDs
-+  and submit a patch for the whitelist in hdaps.c.
-+
-+- Provides new attributes:
-+  /sys/devices/platform/hdaps/sampling_rate:
-+    This determines the frequency at which the host queries the embedded
-+    controller for accelerometer data (and informs the hdaps input devices).
-+    Default=50.
-+  /sys/devices/platform/hdaps/oversampling_ratio:
-+    When set to X, the embedded controller is told to do physical accelerometer
-+    measurements at a rate that is X times higher than the rate at which
-+    the driver reads those measurements (i.e., X*sampling_rate). This
-+    makes the readouts from the embedded controller more fresh, and is also
-+    useful for the running average filter (see next). Default=5
-+  /sys/devices/platform/hdaps/running_avg_filter_order:
-+    When set to X, reported readouts will be the average of the last X physical
-+    accelerometer measurements. Current firmware allows 1<=X<=8. Setting to a
-+    high value decreases readout fluctuations. The averaging is handled by the
-+    embedded controller, so no CPU resources are used. Higher values make the
-+    readouts smoother, since it averages out both sensor noise (good) and abrupt
-+    changes (bad). Default=2.
-+
-+- Provides a second input device, which publishes the raw accelerometer
-+  measurements (without the fuzzing needed for joystick emulation). This input
-+  device can be matched by a udev rule such as the following (all on one line):
-+    KERNEL=="event[0-9]*", ATTRS{phys}=="hdaps/input1",
-+    ATTRS{modalias}=="input:b0019v1014p5054e4801-*",
-+    SYMLINK+="input/hdaps/accelerometer-event
-+
-+A new version of the hdapsd userspace daemon, which uses the input device
-+interface instead of polling sysfs, is available seprately. Using this reduces
-+the total interrupts per second generated by hdaps+hdapsd (on tickless kernels)
-+to 50, down from a value that fluctuates between 50 and 100. Set the
-+sampling_rate sysfs attribute to a lower value to further reduce interrupts,
-+at the expense of response latency.
-+
-+Licensing note: all my changes to the HDAPS driver are licensed under the
-+GPL version 2 or, at your option and to the extent allowed by derivation from
-+prior works, any later version. My version of hdaps is derived work from the
-+mainline version, which at the time of writing is available only under
-+GPL version 2.
-+
-+Bug reporting
-+-------------
-+
-+Mail <multinymous@gmail.com>. Please include:
-+* Details about your model,
-+* Relevant "dmesg" output. Make sure thinkpad_ec and tp_smapi are loaded with
-+  the "debug=1" parameter (e.g., use "make load HDAPS=1 DEBUG=1").
-+* Output of "dmidecode | grep -C5 Product"
-+* Does the failed functionality works under Windows?
-+
-+
-+More about SMAPI
-+----------------
-+
-+For hints about what may be possible via the SMAPI BIOS and how, see:
-+
-+* IBM Technical Reference Manual for the ThinkPad 770
-+  (http://www-307.ibm.com/pc/support/site.wss/document.do?lndocid=PFAN-3TUQQD)
-+* Exported symbols in PWRMGRIF.DLL or TPPWRW32.DLL (e.g., use "objdump -x").
-+* drivers/char/mwave/smapi.c in the Linux kernel tree.*
-+* The "thinkpad" SMAPI module (http://tpctl.sourceforge.net).
-+* The SMAPI_* constants in tp_smapi.c.
-+
-+Note that in the above Technical Reference and in the "thinkpad" module,
-+SMAPI is invoked through a function call to some physical address. However,
-+the interface used by tp_smapi and the above mwave drive, and apparently
-+required by newer ThinkPad, is different: you set the parameters up in the
-+CPU's registers and write to ports 0xB2 (the APM control port) and 0x4F; this
-+triggers an SMI (System Management Interrupt), causing the CPU to enter
-+SMM (System Management Mode) and run the BIOS firmware; the results are
-+returned in the CPU's registers. It is not clear what is the relation between
-+the two variants of SMAPI, though the assignment of error codes seems to be
-+similar.
-+
-+In addition, the embedded controller on ThinkPad laptops has a non-standard
-+interface at IO ports 0x1600-0x161F (mapped to LCP channel 3 of the H8S chip).
-+The interface provides various system management services (currently known:
-+battery information and accelerometer readouts). For more information see the
-+thinkpad_ec module and the H8S hardware documentation:
-+http://documentation.renesas.com/eng/products/mpumcu/rej09b0300_2140bhm.pdf
-diff --git a/Makefile b/Makefile
-index 863f58503bee..f33cf760af6d 100644
---- a/Makefile
-+++ b/Makefile
-@@ -682,12 +682,16 @@ ifdef CONFIG_FUNCTION_TRACER
- KBUILD_CFLAGS	+= $(call cc-disable-warning, format-overflow)
- KBUILD_CFLAGS	+= $(call cc-disable-warning, address-of-packed-member)
- 
-+ifdef CONFIG_CC_OPTIMIZE_HARDER
-+KBUILD_CFLAGS	+= -O3 $(call cc-disable-warning,maybe-uninitialized,)
-+else
- ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE
- KBUILD_CFLAGS += -O2
- else ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3
- KBUILD_CFLAGS += -O3
- else ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
- KBUILD_CFLAGS += -Os
- endif
-+endif
- 
- ifdef CONFIG_CC_DISABLE_WARN_MAYBE_UNINITIALIZED
- KBUILD_CFLAGS   += -Wno-maybe-uninitialized
- 
-diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
-index 4f32c4062fb6..c0bf039e1b40 100644
---- a/drivers/infiniband/core/addr.c
-+++ b/drivers/infiniband/core/addr.c
-@@ -721,6 +721,7 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
- 		struct sockaddr     _sockaddr;
- 		struct sockaddr_in  _sockaddr_in;
- 		struct sockaddr_in6 _sockaddr_in6;
-+		struct sockaddr_ib  _sockaddr_ib;
- 	} sgid_addr, dgid_addr;
- 	int ret;
- 
-diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c
-index 55d33500d55e..744e84228a1f 100644
---- a/drivers/input/mouse/synaptics.c
-+++ b/drivers/input/mouse/synaptics.c
-@@ -1338,7 +1338,9 @@ static int set_input_params(struct psmouse *psmouse,
- 		if (psmouse_matches_pnp_id(psmouse, topbuttonpad_pnp_ids) &&
- 		    !SYN_CAP_EXT_BUTTONS_STICK(info->ext_cap_10))
- 			__set_bit(INPUT_PROP_TOPBUTTONPAD, dev->propbit);
--	}
-+	} else if (SYN_CAP_CLICKPAD2BTN(info->ext_cap_0c) ||
-+		   SYN_CAP_CLICKPAD2BTN2(info->ext_cap_0c))
-+		__set_bit(INPUT_PROP_BUTTONPAD, dev->propbit);
- 
- 	return 0;
- }
-diff --git a/drivers/input/mouse/synaptics.h b/drivers/input/mouse/synaptics.h
-index fc00e005c611..4cfbeec3ae4c 100644
---- a/drivers/input/mouse/synaptics.h
-+++ b/drivers/input/mouse/synaptics.h
-@@ -86,6 +86,7 @@
-  */
- #define SYN_CAP_CLICKPAD(ex0c)		((ex0c) & BIT(20)) /* 1-button ClickPad */
- #define SYN_CAP_CLICKPAD2BTN(ex0c)	((ex0c) & BIT(8))  /* 2-button ClickPad */
-+#define SYN_CAP_CLICKPAD2BTN2(ex0c)	((ex0c) & BIT(21)) /* 2-button ClickPad */
- #define SYN_CAP_MAX_DIMENSIONS(ex0c)	((ex0c) & BIT(17))
- #define SYN_CAP_MIN_DIMENSIONS(ex0c)	((ex0c) & BIT(13))
- #define SYN_CAP_ADV_GESTURE(ex0c)	((ex0c) & BIT(19))
-diff --git a/drivers/macintosh/Kconfig b/drivers/macintosh/Kconfig
-index 97a420c11eed..c8621e9b2e4a 100644
---- a/drivers/macintosh/Kconfig
-+++ b/drivers/macintosh/Kconfig
-@@ -159,6 +159,13 @@ config INPUT_ADBHID
- 
- 	  If unsure, say Y.
- 
-+config ADB_TRACKPAD_ABSOLUTE
-+	bool "Enable absolute mode for adb trackpads"
-+	depends on INPUT_ADBHID
-+	help
-+	  Enable absolute mode in adb-base trackpads. This feature adds
-+	  compatibility with synaptics Xorg / Xfree drivers.
-+
- config MAC_EMUMOUSEBTN
- 	tristate "Support for mouse button 2+3 emulation"
- 	depends on SYSCTL && INPUT
-diff --git a/drivers/macintosh/adbhid.c b/drivers/macintosh/adbhid.c
-index a261892c03b3..a85192de840c 100644
---- a/drivers/macintosh/adbhid.c
-+++ b/drivers/macintosh/adbhid.c
-@@ -262,6 +262,15 @@ static struct adb_ids buttons_ids;
- #define ADBMOUSE_MS_A3		8	/* Mouse systems A3 trackball (handler 3) */
- #define ADBMOUSE_MACALLY2	9	/* MacAlly 2-button mouse */
- 
-+#ifdef CONFIG_ADB_TRACKPAD_ABSOLUTE
-+#define	ABS_XMIN	310
-+#define	ABS_XMAX	1700
-+#define	ABS_YMIN	200
-+#define	ABS_YMAX	1000
-+#define	ABS_ZMIN	0
-+#define	ABS_ZMAX	55
-+#endif
-+
- static void
- adbhid_keyboard_input(unsigned char *data, int nb, int apoll)
- {
-@@ -405,6 +414,9 @@ static void
- adbhid_mouse_input(unsigned char *data, int nb, int autopoll)
- {
- 	int id = (data[0] >> 4) & 0x0f;
-+#ifdef CONFIG_ADB_TRACKPAD_ABSOLUTE
-+	int btn = 0; int x_axis = 0; int y_axis = 0; int z_axis = 0;
-+#endif
- 
- 	if (!adbhid[id]) {
- 		pr_err("ADB HID on ID %d not yet registered\n", id);
-@@ -436,6 +448,17 @@ adbhid_mouse_input(unsigned char *data, int nb, int autopoll)
- 	      high bits of y-axis motion.  XY is additional
- 	      high bits of x-axis motion.
- 
-+    For ADB Absolute motion protocol the data array will contain the
-+    following values:
-+
-+		BITS    COMMENTS
-+    data[0] = dddd 1100 ADB command: Talk, register 0, for device dddd.
-+    data[1] = byyy yyyy Left button and y-axis motion.
-+    data[2] = bxxx xxxx Second button and x-axis motion.
-+    data[3] = 1yyy 1xxx Half bits of y-axis and x-axis motion.
-+    data[4] = 1yyy 1xxx Higher bits of y-axis and x-axis motion.
-+    data[5] = 1zzz 1zzz Higher and lower bits of z-pressure.
-+
-     MacAlly 2-button mouse protocol.
- 
-     For MacAlly 2-button mouse protocol the data array will contain the
-@@ -458,8 +481,17 @@ adbhid_mouse_input(unsigned char *data, int nb, int autopoll)
- 	switch (adbhid[id]->mouse_kind)
- 	{
- 	    case ADBMOUSE_TRACKPAD:
-+#ifdef CONFIG_ADB_TRACKPAD_ABSOLUTE
-+		x_axis = (data[2] & 0x7f) | ((data[3] & 0x07) << 7) |
-+			((data[4] & 0x07) << 10);
-+		y_axis = (data[1] & 0x7f) | ((data[3] & 0x70) << 3) |
-+			((data[4] & 0x70) << 6);
-+		z_axis = (data[5] & 0x07) | ((data[5] & 0x70) >> 1);
-+		btn = (!(data[1] >> 7)) & 1;
-+#else
- 		data[1] = (data[1] & 0x7f) | ((data[1] & data[2]) & 0x80);
- 		data[2] = data[2] | 0x80;
-+#endif
- 		break;
- 	    case ADBMOUSE_MICROSPEED:
- 		data[1] = (data[1] & 0x7f) | ((data[3] & 0x01) << 7);
-@@ -485,17 +517,39 @@ adbhid_mouse_input(unsigned char *data, int nb, int autopoll)
-                 break;
- 	}
- 
--	input_report_key(adbhid[id]->input, BTN_LEFT,   !((data[1] >> 7) & 1));
--	input_report_key(adbhid[id]->input, BTN_MIDDLE, !((data[2] >> 7) & 1));
-+#ifdef CONFIG_ADB_TRACKPAD_ABSOLUTE
-+	if ( adbhid[id]->mouse_kind == ADBMOUSE_TRACKPAD ) {
- 
--	if (nb >= 4 && adbhid[id]->mouse_kind != ADBMOUSE_TRACKPAD)
--		input_report_key(adbhid[id]->input, BTN_RIGHT,  !((data[3] >> 7) & 1));
-+		if(z_axis > 30) input_report_key(adbhid[id]->input, BTN_TOUCH, 1);
-+		if(z_axis < 25) input_report_key(adbhid[id]->input, BTN_TOUCH, 0);
- 
--	input_report_rel(adbhid[id]->input, REL_X,
--			 ((data[2]&0x7f) < 64 ? (data[2]&0x7f) : (data[2]&0x7f)-128 ));
--	input_report_rel(adbhid[id]->input, REL_Y,
--			 ((data[1]&0x7f) < 64 ? (data[1]&0x7f) : (data[1]&0x7f)-128 ));
-+		if(z_axis > 0){
-+			input_report_abs(adbhid[id]->input, ABS_X, x_axis);
-+			input_report_abs(adbhid[id]->input, ABS_Y, y_axis);
-+			input_report_key(adbhid[id]->input, BTN_TOOL_FINGER, 1);
-+			input_report_key(adbhid[id]->input, ABS_TOOL_WIDTH, 5);
-+		} else {
-+			input_report_key(adbhid[id]->input, BTN_TOOL_FINGER, 0);
-+			input_report_key(adbhid[id]->input, ABS_TOOL_WIDTH, 0);
-+		}
-+
-+		input_report_abs(adbhid[id]->input, ABS_PRESSURE, z_axis);
-+		input_report_key(adbhid[id]->input, BTN_LEFT, btn);
-+	} else {
-+#endif
-+		input_report_key(adbhid[id]->input, BTN_LEFT,   !((data[1] >> 7) & 1));
-+		input_report_key(adbhid[id]->input, BTN_MIDDLE, !((data[2] >> 7) & 1));
-+
-+		if (nb >= 4 && adbhid[id]->mouse_kind != ADBMOUSE_TRACKPAD)
-+			input_report_key(adbhid[id]->input, BTN_RIGHT,  !((data[3] >> 7) & 1));
- 
-+		input_report_rel(adbhid[id]->input, REL_X,
-+				((data[2]&0x7f) < 64 ? (data[2]&0x7f) : (data[2]&0x7f)-128 ));
-+		input_report_rel(adbhid[id]->input, REL_Y,
-+				((data[1]&0x7f) < 64 ? (data[1]&0x7f) : (data[1]&0x7f)-128 ));
-+#ifdef CONFIG_ADB_TRACKPAD_ABSOLUTE
-+	}
-+#endif
- 	input_sync(adbhid[id]->input);
- }
- 
-@@ -849,6 +903,15 @@ adbhid_input_register(int id, int default_id, int original_handler_id,
- 		input_dev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) |
- 			BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
- 		input_dev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
-+#ifdef CONFIG_ADB_TRACKPAD_ABSOLUTE
-+                set_bit(EV_ABS, input_dev->evbit);
-+		input_set_abs_params(input_dev, ABS_X, ABS_XMIN, ABS_XMAX, 0, 0);
-+		input_set_abs_params(input_dev, ABS_Y, ABS_YMIN, ABS_YMAX, 0, 0);
-+		input_set_abs_params(input_dev, ABS_PRESSURE, ABS_ZMIN, ABS_ZMAX, 0, 0);
-+		set_bit(BTN_TOUCH, input_dev->keybit);
-+		set_bit(BTN_TOOL_FINGER, input_dev->keybit);
-+		set_bit(ABS_TOOL_WIDTH, input_dev->absbit);
-+#endif
- 		break;
- 
- 	case ADB_MISC:
-@@ -1132,7 +1195,11 @@ init_trackpad(int id)
- 	            r1_buffer[3],
- 	            r1_buffer[4],
- 	            r1_buffer[5],
-+#ifdef CONFIG_ADB_TRACKPAD_ABSOLUTE
-+		    0x00, /* Enable absolute mode */
-+#else
- 	            0x03, /*r1_buffer[6],*/
-+#endif
- 	            r1_buffer[7]);
- 
- 	    /* Without this flush, the trackpad may be locked up */
-diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
-index ac4d48830415..b272132ac742 100644
---- a/drivers/platform/x86/Kconfig
-+++ b/drivers/platform/x86/Kconfig
-@@ -573,9 +573,28 @@ config THINKPAD_ACPI_HOTKEY_POLL
- 	  If you are not sure, say Y here.  The driver enables polling only if
- 	  it is strictly necessary to do so.
- 
-+config THINKPAD_EC
-+	tristate
-+	---help---
-+	  This is a low-level driver for accessing the ThinkPad H8S embedded
-+	  controller over the LPC bus (not to be confused with the ACPI Embedded
-+	  Controller interface).
-+
-+config TP_SMAPI
-+	tristate "ThinkPad SMAPI Support"
-+	select THINKPAD_EC
-+	default n
-+	help
-+	  This adds SMAPI support on Lenovo/IBM ThinkPads, for features such
-+	  as battery charging control. For more information about this driver
-+	  see <http://www.thinkwiki.org/wiki/tp_smapi>.
-+
-+	  If you have a Lenovo/IBM ThinkPad laptop, say Y or M here.
-+
- config SENSORS_HDAPS
- 	tristate "Thinkpad Hard Drive Active Protection System (hdaps)"
- 	depends on INPUT
-+	select THINKPAD_EC
- 	select INPUT_POLLDEV
- 	help
- 	  This driver provides support for the IBM Hard Drive Active Protection
-diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile
-index 2ba6cb795338..399f8b88646f 100644
---- a/drivers/platform/x86/Makefile
-+++ b/drivers/platform/x86/Makefile
-@@ -35,6 +35,8 @@ obj-$(CONFIG_TC1100_WMI)	+= tc1100-wmi.o
- obj-$(CONFIG_SONY_LAPTOP)	+= sony-laptop.o
- obj-$(CONFIG_IDEAPAD_LAPTOP)	+= ideapad-laptop.o
- obj-$(CONFIG_THINKPAD_ACPI)	+= thinkpad_acpi.o
-+obj-$(CONFIG_THINKPAD_EC)	+= thinkpad_ec.o
-+obj-$(CONFIG_TP_SMAPI)		+= tp_smapi.o
- obj-$(CONFIG_SENSORS_HDAPS)	+= hdaps.o
- obj-$(CONFIG_FUJITSU_LAPTOP)	+= fujitsu-laptop.o
- obj-$(CONFIG_FUJITSU_TABLET)	+= fujitsu-tablet.o
-diff --git a/drivers/platform/x86/hdaps.c b/drivers/platform/x86/hdaps.c
-index c26baf77938e..1814614f240c 100644
---- a/drivers/platform/x86/hdaps.c
-+++ b/drivers/platform/x86/hdaps.c
-@@ -2,7 +2,7 @@
-  * hdaps.c - driver for IBM's Hard Drive Active Protection System
-  *
-  * Copyright (C) 2005 Robert Love <rml@novell.com>
-- * Copyright (C) 2005 Jesper Juhl <jj@chaosbits.net>
-+ * Copyright (C) 2005 Jesper Juhl <jesper.juhl@gmail.com>
-  *
-  * The HardDisk Active Protection System (hdaps) is present in IBM ThinkPads
-  * starting with the R40, T41, and X40.  It provides a basic two-axis
-@@ -30,266 +30,384 @@
- 
- #include <linux/delay.h>
- #include <linux/platform_device.h>
--#include <linux/input-polldev.h>
-+#include <linux/input.h>
- #include <linux/kernel.h>
--#include <linux/mutex.h>
- #include <linux/module.h>
- #include <linux/timer.h>
- #include <linux/dmi.h>
- #include <linux/jiffies.h>
--#include <linux/io.h>
--
--#define HDAPS_LOW_PORT		0x1600	/* first port used by hdaps */
--#define HDAPS_NR_PORTS		0x30	/* number of ports: 0x1600 - 0x162f */
--
--#define HDAPS_PORT_STATE	0x1611	/* device state */
--#define HDAPS_PORT_YPOS		0x1612	/* y-axis position */
--#define	HDAPS_PORT_XPOS		0x1614	/* x-axis position */
--#define HDAPS_PORT_TEMP1	0x1616	/* device temperature, in Celsius */
--#define HDAPS_PORT_YVAR		0x1617	/* y-axis variance (what is this?) */
--#define HDAPS_PORT_XVAR		0x1619	/* x-axis variance (what is this?) */
--#define HDAPS_PORT_TEMP2	0x161b	/* device temperature (again?) */
--#define HDAPS_PORT_UNKNOWN	0x161c	/* what is this? */
--#define HDAPS_PORT_KMACT	0x161d	/* keyboard or mouse activity */
--
--#define STATE_FRESH		0x50	/* accelerometer data is fresh */
-+#include <linux/thinkpad_ec.h>
-+#include <linux/pci_ids.h>
-+#include <linux/version.h>
-+
-+/* Embedded controller accelerometer read command and its result: */
-+static const struct thinkpad_ec_row ec_accel_args =
-+	{ .mask = 0x0001, .val = {0x11} };
-+#define EC_ACCEL_IDX_READOUTS	0x1	/* readouts included in this read */
-+					/* First readout, if READOUTS>=1: */
-+#define EC_ACCEL_IDX_YPOS1	0x2	/*   y-axis position word */
-+#define EC_ACCEL_IDX_XPOS1	0x4	/*   x-axis position word */
-+#define EC_ACCEL_IDX_TEMP1	0x6	/*   device temperature in Celsius */
-+					/* Second readout, if READOUTS>=2: */
-+#define EC_ACCEL_IDX_XPOS2	0x7	/*   y-axis position word */
-+#define EC_ACCEL_IDX_YPOS2	0x9	/*   x-axis position word */
-+#define EC_ACCEL_IDX_TEMP2	0xb	/*   device temperature in Celsius */
-+#define EC_ACCEL_IDX_QUEUED	0xc	/* Number of queued readouts left */
-+#define EC_ACCEL_IDX_KMACT	0xd	/* keyboard or mouse activity */
-+#define EC_ACCEL_IDX_RETVAL	0xf	/* command return value, good=0x00 */
- 
- #define KEYBD_MASK		0x20	/* set if keyboard activity */
- #define MOUSE_MASK		0x40	/* set if mouse activity */
--#define KEYBD_ISSET(n)		(!! (n & KEYBD_MASK))	/* keyboard used? */
--#define MOUSE_ISSET(n)		(!! (n & MOUSE_MASK))	/* mouse used? */
- 
--#define INIT_TIMEOUT_MSECS	4000	/* wait up to 4s for device init ... */
--#define INIT_WAIT_MSECS		200	/* ... in 200ms increments */
-+#define READ_TIMEOUT_MSECS	100	/* wait this long for device read */
-+#define RETRY_MSECS		3	/* retry delay */
- 
--#define HDAPS_POLL_INTERVAL	50	/* poll for input every 1/20s (50 ms)*/
- #define HDAPS_INPUT_FUZZ	4	/* input event threshold */
- #define HDAPS_INPUT_FLAT	4
--
--#define HDAPS_X_AXIS		(1 << 0)
--#define HDAPS_Y_AXIS		(1 << 1)
--#define HDAPS_BOTH_AXES		(HDAPS_X_AXIS | HDAPS_Y_AXIS)
--
-+#define KMACT_REMEMBER_PERIOD   (HZ/10) /* keyboard/mouse persistence */
-+
-+/* Input IDs */
-+#define HDAPS_INPUT_VENDOR	PCI_VENDOR_ID_IBM
-+#define HDAPS_INPUT_PRODUCT	0x5054 /* "TP", shared with thinkpad_acpi */
-+#define HDAPS_INPUT_JS_VERSION	0x6801 /* Joystick emulation input device */
-+#define HDAPS_INPUT_RAW_VERSION	0x4801 /* Raw accelerometer input device */
-+
-+/* Axis orientation. */
-+/* The unnatural bit-representation of inversions is for backward
-+ * compatibility with the"invert=1" module parameter.             */
-+#define HDAPS_ORIENT_INVERT_XY  0x01   /* Invert both X and Y axes.       */
-+#define HDAPS_ORIENT_INVERT_X   0x02   /* Invert the X axis (uninvert if
-+					* already inverted by INVERT_XY). */
-+#define HDAPS_ORIENT_SWAP       0x04   /* Swap the axes. The swap occurs
-+					* before inverting X or Y.        */
-+#define HDAPS_ORIENT_MAX        0x07
-+#define HDAPS_ORIENT_UNDEFINED  0xFF   /* Placeholder during initialization */
-+#define HDAPS_ORIENT_INVERT_Y   (HDAPS_ORIENT_INVERT_XY | HDAPS_ORIENT_INVERT_X)
-+
-+static struct timer_list hdaps_timer;
- static struct platform_device *pdev;
--static struct input_polled_dev *hdaps_idev;
--static unsigned int hdaps_invert;
--static u8 km_activity;
--static int rest_x;
--static int rest_y;
--
--static DEFINE_MUTEX(hdaps_mtx);
--
--/*
-- * __get_latch - Get the value from a given port.  Callers must hold hdaps_mtx.
-- */
--static inline u8 __get_latch(u16 port)
-+static struct input_dev *hdaps_idev;     /* joystick-like device with fuzz */
-+static struct input_dev *hdaps_idev_raw; /* raw hdaps sensor readouts */
-+static unsigned int hdaps_invert = HDAPS_ORIENT_UNDEFINED;
-+static int needs_calibration;
-+
-+/* Configuration: */
-+static int sampling_rate = 50;       /* Sampling rate  */
-+static int oversampling_ratio = 5;   /* Ratio between our sampling rate and
-+				      * EC accelerometer sampling rate      */
-+static int running_avg_filter_order = 2; /* EC running average filter order */
-+
-+/* Latest state readout: */
-+static int pos_x, pos_y;      /* position */
-+static int temperature;       /* temperature */
-+static int stale_readout = 1; /* last read invalid */
-+static int rest_x, rest_y;    /* calibrated rest position */
-+
-+/* Last time we saw keyboard and mouse activity: */
-+static u64 last_keyboard_jiffies = INITIAL_JIFFIES;
-+static u64 last_mouse_jiffies = INITIAL_JIFFIES;
-+static u64 last_update_jiffies = INITIAL_JIFFIES;
-+
-+/* input device use count */
-+static int hdaps_users;
-+static DEFINE_MUTEX(hdaps_users_mtx);
-+
-+/* Some models require an axis transformation to the standard representation */
-+static void transform_axes(int *x, int *y)
- {
--	return inb(port) & 0xff;
-+	if (hdaps_invert & HDAPS_ORIENT_SWAP) {
-+		int z;
-+		z = *x;
-+		*x = *y;
-+		*y = z;
-+	}
-+	if (hdaps_invert & HDAPS_ORIENT_INVERT_XY) {
-+		*x = -*x;
-+		*y = -*y;
-+	}
-+	if (hdaps_invert & HDAPS_ORIENT_INVERT_X)
-+		*x = -*x;
- }
- 
--/*
-- * __check_latch - Check a port latch for a given value.  Returns zero if the
-- * port contains the given value.  Callers must hold hdaps_mtx.
-+/**
-+ * __hdaps_update - query current state, with locks already acquired
-+ * @fast: if nonzero, do one quick attempt without retries.
-+ *
-+ * Query current accelerometer state and update global state variables.
-+ * Also prefetches the next query. Caller must hold controller lock.
-  */
--static inline int __check_latch(u16 port, u8 val)
-+static int __hdaps_update(int fast)
- {
--	if (__get_latch(port) == val)
--		return 0;
--	return -EINVAL;
--}
-+	/* Read data: */
-+	struct thinkpad_ec_row data;
-+	int ret;
- 
--/*
-- * __wait_latch - Wait up to 100us for a port latch to get a certain value,
-- * returning zero if the value is obtained.  Callers must hold hdaps_mtx.
-- */
--static int __wait_latch(u16 port, u8 val)
--{
--	unsigned int i;
-+	data.mask = (1 << EC_ACCEL_IDX_READOUTS) | (1 << EC_ACCEL_IDX_KMACT) |
-+		    (3 << EC_ACCEL_IDX_YPOS1)    | (3 << EC_ACCEL_IDX_XPOS1) |
-+		    (1 << EC_ACCEL_IDX_TEMP1)    | (1 << EC_ACCEL_IDX_RETVAL);
-+	if (fast)
-+		ret = thinkpad_ec_try_read_row(&ec_accel_args, &data);
-+	else
-+		ret = thinkpad_ec_read_row(&ec_accel_args, &data);
-+	thinkpad_ec_prefetch_row(&ec_accel_args); /* Prefetch even if error */
-+	if (ret)
-+		return ret;
- 
--	for (i = 0; i < 20; i++) {
--		if (!__check_latch(port, val))
--			return 0;
--		udelay(5);
-+	/* Check status: */
-+	if (data.val[EC_ACCEL_IDX_RETVAL] != 0x00) {
-+		pr_warn("read RETVAL=0x%02x\n",
-+		       data.val[EC_ACCEL_IDX_RETVAL]);
-+		return -EIO;
- 	}
- 
--	return -EIO;
-+	if (data.val[EC_ACCEL_IDX_READOUTS] < 1)
-+		return -EBUSY; /* no pending readout, try again later */
-+
-+	/* Parse position data: */
-+	pos_x = *(s16 *)(data.val+EC_ACCEL_IDX_XPOS1);
-+	pos_y = *(s16 *)(data.val+EC_ACCEL_IDX_YPOS1);
-+	transform_axes(&pos_x, &pos_y);
-+
-+	/* Keyboard and mouse activity status is cleared as soon as it's read,
-+	 * so applications will eat each other's events. Thus we remember any
-+	 * event for KMACT_REMEMBER_PERIOD jiffies.
-+	 */
-+	if (data.val[EC_ACCEL_IDX_KMACT] & KEYBD_MASK)
-+		last_keyboard_jiffies = get_jiffies_64();
-+	if (data.val[EC_ACCEL_IDX_KMACT] & MOUSE_MASK)
-+		last_mouse_jiffies = get_jiffies_64();
-+
-+	temperature = data.val[EC_ACCEL_IDX_TEMP1];
-+
-+	last_update_jiffies = get_jiffies_64();
-+	stale_readout = 0;
-+	if (needs_calibration) {
-+		rest_x = pos_x;
-+		rest_y = pos_y;
-+		needs_calibration = 0;
-+	}
-+
-+	return 0;
- }
- 
--/*
-- * __device_refresh - request a refresh from the accelerometer.  Does not wait
-- * for refresh to complete.  Callers must hold hdaps_mtx.
-+/**
-+ * hdaps_update - acquire locks and query current state
-+ *
-+ * Query current accelerometer state and update global state variables.
-+ * Also prefetches the next query.
-+ * Retries until timeout if the accelerometer is not in ready status (common).
-+ * Does its own locking.
-  */
--static void __device_refresh(void)
-+static int hdaps_update(void)
- {
--	udelay(200);
--	if (inb(0x1604) != STATE_FRESH) {
--		outb(0x11, 0x1610);
--		outb(0x01, 0x161f);
-+	u64 age = get_jiffies_64() - last_update_jiffies;
-+	int total, ret;
-+
-+	if (!stale_readout && age < (9*HZ)/(10*sampling_rate))
-+		return 0; /* already updated recently */
-+	for (total = 0; total < READ_TIMEOUT_MSECS; total += RETRY_MSECS) {
-+		ret = thinkpad_ec_lock();
-+		if (ret)
-+			return ret;
-+		ret = __hdaps_update(0);
-+		thinkpad_ec_unlock();
-+
-+		if (!ret)
-+			return 0;
-+		if (ret != -EBUSY)
-+			break;
-+		msleep(RETRY_MSECS);
- 	}
-+	return ret;
- }
- 
--/*
-- * __device_refresh_sync - request a synchronous refresh from the
-- * accelerometer.  We wait for the refresh to complete.  Returns zero if
-- * successful and nonzero on error.  Callers must hold hdaps_mtx.
-+/**
-+ * hdaps_set_power - enable or disable power to the accelerometer.
-+ * Returns zero on success and negative error code on failure.  Can sleep.
-  */
--static int __device_refresh_sync(void)
-+static int hdaps_set_power(int on)
- {
--	__device_refresh();
--	return __wait_latch(0x1604, STATE_FRESH);
-+	struct thinkpad_ec_row args =
-+		{ .mask = 0x0003, .val = {0x14, on?0x01:0x00} };
-+	struct thinkpad_ec_row data = { .mask = 0x8000 };
-+	int ret = thinkpad_ec_read_row(&args, &data);
-+	if (ret)
-+		return ret;
-+	if (data.val[0xF] != 0x00)
-+		return -EIO;
-+	return 0;
- }
- 
--/*
-- * __device_complete - indicate to the accelerometer that we are done reading
-- * data, and then initiate an async refresh.  Callers must hold hdaps_mtx.
-+/**
-+ * hdaps_set_ec_config - set accelerometer parameters.
-+ * @ec_rate: embedded controller sampling rate
-+ * @order: embedded controller running average filter order
-+ * (Normally we have @ec_rate = sampling_rate * oversampling_ratio.)
-+ * Returns zero on success and negative error code on failure.  Can sleep.
-  */
--static inline void __device_complete(void)
-+static int hdaps_set_ec_config(int ec_rate, int order)
- {
--	inb(0x161f);
--	inb(0x1604);
--	__device_refresh();
-+	struct thinkpad_ec_row args = { .mask = 0x000F,
-+		.val = {0x10, (u8)ec_rate, (u8)(ec_rate>>8), order} };
-+	struct thinkpad_ec_row data = { .mask = 0x8000 };
-+	int ret = thinkpad_ec_read_row(&args, &data);
-+	pr_debug("setting ec_rate=%d, filter_order=%d\n", ec_rate, order);
-+	if (ret)
-+		return ret;
-+	if (data.val[0xF] == 0x03) {
-+		pr_warn("config param out of range\n");
-+		return -EINVAL;
-+	}
-+	if (data.val[0xF] == 0x06) {
-+		pr_warn("config change already pending\n");
-+		return -EBUSY;
-+	}
-+	if (data.val[0xF] != 0x00) {
-+		pr_warn("config change error, ret=%d\n",
-+		      data.val[0xF]);
-+		return -EIO;
-+	}
-+	return 0;
- }
- 
--/*
-- * hdaps_readb_one - reads a byte from a single I/O port, placing the value in
-- * the given pointer.  Returns zero on success or a negative error on failure.
-- * Can sleep.
-+/**
-+ * hdaps_get_ec_config - get accelerometer parameters.
-+ * @ec_rate: embedded controller sampling rate
-+ * @order: embedded controller running average filter order
-+ * Returns zero on success and negative error code on failure.  Can sleep.
-  */
--static int hdaps_readb_one(unsigned int port, u8 *val)
-+static int hdaps_get_ec_config(int *ec_rate, int *order)
- {
--	int ret;
--
--	mutex_lock(&hdaps_mtx);
--
--	/* do a sync refresh -- we need to be sure that we read fresh data */
--	ret = __device_refresh_sync();
-+	const struct thinkpad_ec_row args =
-+		{ .mask = 0x0003, .val = {0x17, 0x82} };
-+	struct thinkpad_ec_row data = { .mask = 0x801F };
-+	int ret = thinkpad_ec_read_row(&args, &data);
- 	if (ret)
--		goto out;
--
--	*val = inb(port);
--	__device_complete();
--
--out:
--	mutex_unlock(&hdaps_mtx);
--	return ret;
-+		return ret;
-+	if (data.val[0xF] != 0x00)
-+		return -EIO;
-+	if (!(data.val[0x1] & 0x01))
-+		return -ENXIO; /* accelerometer polling not enabled */
-+	if (data.val[0x1] & 0x02)
-+		return -EBUSY; /* config change in progress, retry later */
-+	*ec_rate = data.val[0x2] | ((int)(data.val[0x3]) << 8);
-+	*order = data.val[0x4];
-+	return 0;
- }
- 
--/* __hdaps_read_pair - internal lockless helper for hdaps_read_pair(). */
--static int __hdaps_read_pair(unsigned int port1, unsigned int port2,
--			     int *x, int *y)
-+/**
-+ * hdaps_get_ec_mode - get EC accelerometer mode
-+ * Returns zero on success and negative error code on failure.  Can sleep.
-+ */
-+static int hdaps_get_ec_mode(u8 *mode)
- {
--	/* do a sync refresh -- we need to be sure that we read fresh data */
--	if (__device_refresh_sync())
-+	const struct thinkpad_ec_row args =
-+		{ .mask = 0x0001, .val = {0x13} };
-+	struct thinkpad_ec_row data = { .mask = 0x8002 };
-+	int ret = thinkpad_ec_read_row(&args, &data);
-+	if (ret)
-+		return ret;
-+	if (data.val[0xF] != 0x00) {
-+		pr_warn("accelerometer not implemented (0x%02x)\n",
-+		       data.val[0xF]);
- 		return -EIO;
--
--	*y = inw(port2);
--	*x = inw(port1);
--	km_activity = inb(HDAPS_PORT_KMACT);
--	__device_complete();
--
--	/* hdaps_invert is a bitvector to negate the axes */
--	if (hdaps_invert & HDAPS_X_AXIS)
--		*x = -*x;
--	if (hdaps_invert & HDAPS_Y_AXIS)
--		*y = -*y;
--
-+	}
-+	*mode = data.val[0x1];
- 	return 0;
- }
- 
--/*
-- * hdaps_read_pair - reads the values from a pair of ports, placing the values
-- * in the given pointers.  Returns zero on success.  Can sleep.
-+/**
-+ * hdaps_check_ec - checks something about the EC.
-+ * Follows the clean-room spec for HDAPS; we don't know what it means.
-+ * Returns zero on success and negative error code on failure.  Can sleep.
-  */
--static int hdaps_read_pair(unsigned int port1, unsigned int port2,
--			   int *val1, int *val2)
-+static int hdaps_check_ec(void)
- {
--	int ret;
--
--	mutex_lock(&hdaps_mtx);
--	ret = __hdaps_read_pair(port1, port2, val1, val2);
--	mutex_unlock(&hdaps_mtx);
--
--	return ret;
-+	const struct thinkpad_ec_row args =
-+		{ .mask = 0x0003, .val = {0x17, 0x81} };
-+	struct thinkpad_ec_row data = { .mask = 0x800E };
-+	int ret = thinkpad_ec_read_row(&args, &data);
-+	if (ret)
-+		return  ret;
-+	if (!((data.val[0x1] == 0x00 && data.val[0x2] == 0x60) || /* cleanroom spec */
-+	      (data.val[0x1] == 0x01 && data.val[0x2] == 0x00)) || /* seen on T61 */
-+	    data.val[0x3] != 0x00 || data.val[0xF] != 0x00) {
-+		pr_warn("hdaps_check_ec: bad response (0x%x,0x%x,0x%x,0x%x)\n",
-+		       data.val[0x1], data.val[0x2],
-+		       data.val[0x3], data.val[0xF]);
-+		return -EIO;
-+	}
-+	return 0;
- }
- 
--/*
-- * hdaps_device_init - initialize the accelerometer.  Returns zero on success
-- * and negative error code on failure.  Can sleep.
-+/**
-+ * hdaps_device_init - initialize the accelerometer.
-+ *
-+ * Call several embedded controller functions to test and initialize the
-+ * accelerometer.
-+ * Returns zero on success and negative error code on failure. Can sleep.
-  */
-+#define FAILED_INIT(msg) pr_err("init failed at: %s\n", msg)
- static int hdaps_device_init(void)
- {
--	int total, ret = -ENXIO;
-+	int ret;
-+	u8 mode;
- 
--	mutex_lock(&hdaps_mtx);
-+	ret = thinkpad_ec_lock();
-+	if (ret)
-+		return ret;
- 
--	outb(0x13, 0x1610);
--	outb(0x01, 0x161f);
--	if (__wait_latch(0x161f, 0x00))
--		goto out;
-+	if (hdaps_get_ec_mode(&mode))
-+		{ FAILED_INIT("hdaps_get_ec_mode failed"); goto bad; }
- 
--	/*
--	 * Most ThinkPads return 0x01.
--	 *
--	 * Others--namely the R50p, T41p, and T42p--return 0x03.  These laptops
--	 * have "inverted" axises.
--	 *
--	 * The 0x02 value occurs when the chip has been previously initialized.
--	 */
--	if (__check_latch(0x1611, 0x03) &&
--		     __check_latch(0x1611, 0x02) &&
--		     __check_latch(0x1611, 0x01))
--		goto out;
-+	pr_debug("initial mode latch is 0x%02x\n", mode);
-+	if (mode == 0x00)
-+		{ FAILED_INIT("accelerometer not available"); goto bad; }
- 
--	printk(KERN_DEBUG "hdaps: initial latch check good (0x%02x)\n",
--	       __get_latch(0x1611));
-+	if (hdaps_check_ec())
-+		{ FAILED_INIT("hdaps_check_ec failed"); goto bad; }
- 
--	outb(0x17, 0x1610);
--	outb(0x81, 0x1611);
--	outb(0x01, 0x161f);
--	if (__wait_latch(0x161f, 0x00))
--		goto out;
--	if (__wait_latch(0x1611, 0x00))
--		goto out;
--	if (__wait_latch(0x1612, 0x60))
--		goto out;
--	if (__wait_latch(0x1613, 0x00))
--		goto out;
--	outb(0x14, 0x1610);
--	outb(0x01, 0x1611);
--	outb(0x01, 0x161f);
--	if (__wait_latch(0x161f, 0x00))
--		goto out;
--	outb(0x10, 0x1610);
--	outb(0xc8, 0x1611);
--	outb(0x00, 0x1612);
--	outb(0x02, 0x1613);
--	outb(0x01, 0x161f);
--	if (__wait_latch(0x161f, 0x00))
--		goto out;
--	if (__device_refresh_sync())
--		goto out;
--	if (__wait_latch(0x1611, 0x00))
--		goto out;
--
--	/* we have done our dance, now let's wait for the applause */
--	for (total = INIT_TIMEOUT_MSECS; total > 0; total -= INIT_WAIT_MSECS) {
--		int x, y;
-+	if (hdaps_set_power(1))
-+		{ FAILED_INIT("hdaps_set_power failed"); goto bad; }
- 
--		/* a read of the device helps push it into action */
--		__hdaps_read_pair(HDAPS_PORT_XPOS, HDAPS_PORT_YPOS, &x, &y);
--		if (!__wait_latch(0x1611, 0x02)) {
--			ret = 0;
--			break;
--		}
-+	if (hdaps_set_ec_config(sampling_rate*oversampling_ratio,
-+				running_avg_filter_order))
-+		{ FAILED_INIT("hdaps_set_ec_config failed"); goto bad; }
- 
--		msleep(INIT_WAIT_MSECS);
--	}
-+	thinkpad_ec_invalidate();
-+	udelay(200);
- 
--out:
--	mutex_unlock(&hdaps_mtx);
-+	/* Just prefetch instead of reading, to avoid ~1sec delay on load */
-+	ret = thinkpad_ec_prefetch_row(&ec_accel_args);
-+	if (ret)
-+		{ FAILED_INIT("initial prefetch failed"); goto bad; }
-+	goto good;
-+bad:
-+	thinkpad_ec_invalidate();
-+	ret = -ENXIO;
-+good:
-+	stale_readout = 1;
-+	thinkpad_ec_unlock();
- 	return ret;
- }
- 
-+/**
-+ * hdaps_device_shutdown - power off the accelerometer
-+ * Returns nonzero on failure. Can sleep.
-+ */
-+static int hdaps_device_shutdown(void)
-+{
-+	int ret;
-+	ret = hdaps_set_power(0);
-+	if (ret) {
-+		pr_warn("cannot power off\n");
-+		return ret;
-+	}
-+	ret = hdaps_set_ec_config(0, 1);
-+	if (ret)
-+		pr_warn("cannot stop EC sampling\n");
-+	return ret;
-+}
- 
- /* Device model stuff */
- 
-@@ -306,13 +424,29 @@ static int hdaps_probe(struct platform_device *dev)
- }
- 
- #ifdef CONFIG_PM_SLEEP
-+static int hdaps_suspend(struct device *dev)
-+{
-+	/* Don't do hdaps polls until resume re-initializes the sensor. */
-+	del_timer_sync(&hdaps_timer);
-+	hdaps_device_shutdown(); /* ignore errors, effect is negligible */
-+	return 0;
-+}
-+
- static int hdaps_resume(struct device *dev)
- {
--	return hdaps_device_init();
-+	int ret = hdaps_device_init();
-+	if (ret)
-+		return ret;
-+
-+	mutex_lock(&hdaps_users_mtx);
-+	if (hdaps_users)
-+		mod_timer(&hdaps_timer, jiffies + HZ/sampling_rate);
-+	mutex_unlock(&hdaps_users_mtx);
-+	return 0;
- }
- #endif
- 
--static SIMPLE_DEV_PM_OPS(hdaps_pm, NULL, hdaps_resume);
-+static SIMPLE_DEV_PM_OPS(hdaps_pm, hdaps_suspend, hdaps_resume);
- 
- static struct platform_driver hdaps_driver = {
- 	.probe = hdaps_probe,
-@@ -322,30 +456,51 @@ static struct platform_driver hdaps_driver = {
- 	},
- };
- 
--/*
-- * hdaps_calibrate - Set our "resting" values.  Callers must hold hdaps_mtx.
-+/**
-+ * hdaps_calibrate - set our "resting" values.
-+ * Does its own locking.
-  */
- static void hdaps_calibrate(void)
- {
--	__hdaps_read_pair(HDAPS_PORT_XPOS, HDAPS_PORT_YPOS, &rest_x, &rest_y);
-+	needs_calibration = 1;
-+	hdaps_update();
-+	/* If that fails, the mousedev poll will take care of things later. */
- }
- 
--static void hdaps_mousedev_poll(struct input_polled_dev *dev)
-+/* Timer handler for updating the input device. Runs in softirq context,
-+ * so avoid lenghty or blocking operations.
-+ */
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,15,0)
-+static void hdaps_mousedev_poll(unsigned long unused)
-+#else
-+static void hdaps_mousedev_poll(struct timer_list *unused)
-+#endif
- {
--	struct input_dev *input_dev = dev->input;
--	int x, y;
-+	int ret;
- 
--	mutex_lock(&hdaps_mtx);
-+	stale_readout = 1;
- 
--	if (__hdaps_read_pair(HDAPS_PORT_XPOS, HDAPS_PORT_YPOS, &x, &y))
--		goto out;
-+	/* Cannot sleep.  Try nonblockingly.  If we fail, try again later. */
-+	if (thinkpad_ec_try_lock())
-+		goto keep_active;
- 
--	input_report_abs(input_dev, ABS_X, x - rest_x);
--	input_report_abs(input_dev, ABS_Y, y - rest_y);
--	input_sync(input_dev);
-+	ret = __hdaps_update(1); /* fast update, we're in softirq context */
-+	thinkpad_ec_unlock();
-+	/* Any of "successful", "not yet ready" and "not prefetched"? */
-+	if (ret != 0 && ret != -EBUSY && ret != -ENODATA) {
-+		pr_err("poll failed, disabling updates\n");
-+		return;
-+	}
- 
--out:
--	mutex_unlock(&hdaps_mtx);
-+keep_active:
-+	/* Even if we failed now, pos_x,y may have been updated earlier: */
-+	input_report_abs(hdaps_idev, ABS_X, pos_x - rest_x);
-+	input_report_abs(hdaps_idev, ABS_Y, pos_y - rest_y);
-+	input_sync(hdaps_idev);
-+	input_report_abs(hdaps_idev_raw, ABS_X, pos_x);
-+	input_report_abs(hdaps_idev_raw, ABS_Y, pos_y);
-+	input_sync(hdaps_idev_raw);
-+	mod_timer(&hdaps_timer, jiffies + HZ/sampling_rate);
- }
- 
- 
-@@ -354,65 +509,41 @@ static void hdaps_mousedev_poll(struct input_polled_dev *dev)
- static ssize_t hdaps_position_show(struct device *dev,
- 				   struct device_attribute *attr, char *buf)
- {
--	int ret, x, y;
--
--	ret = hdaps_read_pair(HDAPS_PORT_XPOS, HDAPS_PORT_YPOS, &x, &y);
-+	int ret = hdaps_update();
- 	if (ret)
- 		return ret;
--
--	return sprintf(buf, "(%d,%d)\n", x, y);
--}
--
--static ssize_t hdaps_variance_show(struct device *dev,
--				   struct device_attribute *attr, char *buf)
--{
--	int ret, x, y;
--
--	ret = hdaps_read_pair(HDAPS_PORT_XVAR, HDAPS_PORT_YVAR, &x, &y);
--	if (ret)
--		return ret;
--
--	return sprintf(buf, "(%d,%d)\n", x, y);
-+	return sprintf(buf, "(%d,%d)\n", pos_x, pos_y);
- }
- 
- static ssize_t hdaps_temp1_show(struct device *dev,
- 				struct device_attribute *attr, char *buf)
- {
--	u8 uninitialized_var(temp);
--	int ret;
--
--	ret = hdaps_readb_one(HDAPS_PORT_TEMP1, &temp);
--	if (ret)
--		return ret;
--
--	return sprintf(buf, "%u\n", temp);
--}
--
--static ssize_t hdaps_temp2_show(struct device *dev,
--				struct device_attribute *attr, char *buf)
--{
--	u8 uninitialized_var(temp);
--	int ret;
--
--	ret = hdaps_readb_one(HDAPS_PORT_TEMP2, &temp);
-+	int ret = hdaps_update();
- 	if (ret)
- 		return ret;
--
--	return sprintf(buf, "%u\n", temp);
-+	return sprintf(buf, "%d\n", temperature);
- }
- 
- static ssize_t hdaps_keyboard_activity_show(struct device *dev,
- 					    struct device_attribute *attr,
- 					    char *buf)
- {
--	return sprintf(buf, "%u\n", KEYBD_ISSET(km_activity));
-+	int ret = hdaps_update();
-+	if (ret)
-+		return ret;
-+	return sprintf(buf, "%u\n",
-+	   get_jiffies_64() < last_keyboard_jiffies + KMACT_REMEMBER_PERIOD);
- }
- 
- static ssize_t hdaps_mouse_activity_show(struct device *dev,
- 					 struct device_attribute *attr,
- 					 char *buf)
- {
--	return sprintf(buf, "%u\n", MOUSE_ISSET(km_activity));
-+	int ret = hdaps_update();
-+	if (ret)
-+		return ret;
-+	return sprintf(buf, "%u\n",
-+	   get_jiffies_64() < last_mouse_jiffies + KMACT_REMEMBER_PERIOD);
- }
- 
- static ssize_t hdaps_calibrate_show(struct device *dev,
-@@ -425,10 +556,7 @@ static ssize_t hdaps_calibrate_store(struct device *dev,
- 				     struct device_attribute *attr,
- 				     const char *buf, size_t count)
- {
--	mutex_lock(&hdaps_mtx);
- 	hdaps_calibrate();
--	mutex_unlock(&hdaps_mtx);
--
- 	return count;
- }
- 
-@@ -445,7 +573,7 @@ static ssize_t hdaps_invert_store(struct device *dev,
- 	int invert;
- 
- 	if (sscanf(buf, "%d", &invert) != 1 ||
--	    invert < 0 || invert > HDAPS_BOTH_AXES)
-+	    invert < 0 || invert > HDAPS_ORIENT_MAX)
- 		return -EINVAL;
- 
- 	hdaps_invert = invert;
-@@ -454,24 +582,128 @@ static ssize_t hdaps_invert_store(struct device *dev,
- 	return count;
- }
- 
-+static ssize_t hdaps_sampling_rate_show(
-+	struct device *dev, struct device_attribute *attr, char *buf)
-+{
-+	return sprintf(buf, "%d\n", sampling_rate);
-+}
-+
-+static ssize_t hdaps_sampling_rate_store(
-+	struct device *dev, struct device_attribute *attr,
-+	const char *buf, size_t count)
-+{
-+	int rate, ret;
-+	if (sscanf(buf, "%d", &rate) != 1 || rate > HZ || rate <= 0) {
-+		pr_warn("must have 0<input_sampling_rate<=HZ=%d\n", HZ);
-+		return -EINVAL;
-+	}
-+	ret = hdaps_set_ec_config(rate*oversampling_ratio,
-+				  running_avg_filter_order);
-+	if (ret)
-+		return ret;
-+	sampling_rate = rate;
-+	return count;
-+}
-+
-+static ssize_t hdaps_oversampling_ratio_show(
-+	struct device *dev, struct device_attribute *attr, char *buf)
-+{
-+	int ec_rate, order;
-+	int ret = hdaps_get_ec_config(&ec_rate, &order);
-+	if (ret)
-+		return ret;
-+	return sprintf(buf, "%u\n", ec_rate / sampling_rate);
-+}
-+
-+static ssize_t hdaps_oversampling_ratio_store(
-+	struct device *dev, struct device_attribute *attr,
-+	const char *buf, size_t count)
-+{
-+	int ratio, ret;
-+	if (sscanf(buf, "%d", &ratio) != 1 || ratio < 1)
-+		return -EINVAL;
-+	ret = hdaps_set_ec_config(sampling_rate*ratio,
-+				  running_avg_filter_order);
-+	if (ret)
-+		return ret;
-+	oversampling_ratio = ratio;
-+	return count;
-+}
-+
-+static ssize_t hdaps_running_avg_filter_order_show(
-+	struct device *dev, struct device_attribute *attr, char *buf)
-+{
-+	int rate, order;
-+	int ret = hdaps_get_ec_config(&rate, &order);
-+	if (ret)
-+		return ret;
-+	return sprintf(buf, "%u\n", order);
-+}
-+
-+static ssize_t hdaps_running_avg_filter_order_store(
-+	struct device *dev, struct device_attribute *attr,
-+	const char *buf, size_t count)
-+{
-+	int order, ret;
-+	if (sscanf(buf, "%d", &order) != 1)
-+		return -EINVAL;
-+	ret = hdaps_set_ec_config(sampling_rate*oversampling_ratio, order);
-+	if (ret)
-+		return ret;
-+	running_avg_filter_order = order;
-+	return count;
-+}
-+
-+static int hdaps_mousedev_open(struct input_dev *dev)
-+{
-+	if (!try_module_get(THIS_MODULE))
-+		return -ENODEV;
-+
-+	mutex_lock(&hdaps_users_mtx);
-+	if (hdaps_users++ == 0) /* first input user */
-+		mod_timer(&hdaps_timer, jiffies + HZ/sampling_rate);
-+	mutex_unlock(&hdaps_users_mtx);
-+	return 0;
-+}
-+
-+static void hdaps_mousedev_close(struct input_dev *dev)
-+{
-+	mutex_lock(&hdaps_users_mtx);
-+	if (--hdaps_users == 0) /* no input users left */
-+		del_timer_sync(&hdaps_timer);
-+	mutex_unlock(&hdaps_users_mtx);
-+
-+	module_put(THIS_MODULE);
-+}
-+
- static DEVICE_ATTR(position, 0444, hdaps_position_show, NULL);
--static DEVICE_ATTR(variance, 0444, hdaps_variance_show, NULL);
- static DEVICE_ATTR(temp1, 0444, hdaps_temp1_show, NULL);
--static DEVICE_ATTR(temp2, 0444, hdaps_temp2_show, NULL);
--static DEVICE_ATTR(keyboard_activity, 0444, hdaps_keyboard_activity_show, NULL);
-+  /* "temp1" instead of "temperature" is hwmon convention */
-+static DEVICE_ATTR(keyboard_activity, 0444,
-+		   hdaps_keyboard_activity_show, NULL);
- static DEVICE_ATTR(mouse_activity, 0444, hdaps_mouse_activity_show, NULL);
--static DEVICE_ATTR(calibrate, 0644, hdaps_calibrate_show,hdaps_calibrate_store);
-+static DEVICE_ATTR(calibrate, 0644,
-+		   hdaps_calibrate_show, hdaps_calibrate_store);
- static DEVICE_ATTR(invert, 0644, hdaps_invert_show, hdaps_invert_store);
-+static DEVICE_ATTR(sampling_rate, 0644,
-+		   hdaps_sampling_rate_show, hdaps_sampling_rate_store);
-+static DEVICE_ATTR(oversampling_ratio, 0644,
-+		   hdaps_oversampling_ratio_show,
-+		   hdaps_oversampling_ratio_store);
-+static DEVICE_ATTR(running_avg_filter_order, 0644,
-+		   hdaps_running_avg_filter_order_show,
-+		   hdaps_running_avg_filter_order_store);
- 
- static struct attribute *hdaps_attributes[] = {
- 	&dev_attr_position.attr,
--	&dev_attr_variance.attr,
- 	&dev_attr_temp1.attr,
--	&dev_attr_temp2.attr,
- 	&dev_attr_keyboard_activity.attr,
- 	&dev_attr_mouse_activity.attr,
- 	&dev_attr_calibrate.attr,
- 	&dev_attr_invert.attr,
-+	&dev_attr_sampling_rate.attr,
-+	&dev_attr_oversampling_ratio.attr,
-+	&dev_attr_running_avg_filter_order.attr,
- 	NULL,
- };
- 
-@@ -482,84 +714,82 @@ static struct attribute_group hdaps_attribute_group = {
- 
- /* Module stuff */
- 
--/* hdaps_dmi_match - found a match.  return one, short-circuiting the hunt. */
--static int __init hdaps_dmi_match(const struct dmi_system_id *id)
--{
--	pr_info("%s detected\n", id->ident);
--	return 1;
--}
--
- /* hdaps_dmi_match_invert - found an inverted match. */
- static int __init hdaps_dmi_match_invert(const struct dmi_system_id *id)
- {
--	hdaps_invert = (unsigned long)id->driver_data;
--	pr_info("inverting axis (%u) readings\n", hdaps_invert);
--	return hdaps_dmi_match(id);
-+	unsigned int orient = (kernel_ulong_t) id->driver_data;
-+	hdaps_invert = orient;
-+	pr_info("%s detected, setting orientation %u\n", id->ident, orient);
-+	return 1; /* stop enumeration */
- }
- 
--#define HDAPS_DMI_MATCH_INVERT(vendor, model, axes) {	\
-+#define HDAPS_DMI_MATCH_INVERT(vendor, model, orient) { \
- 	.ident = vendor " " model,			\
- 	.callback = hdaps_dmi_match_invert,		\
--	.driver_data = (void *)axes,			\
-+	.driver_data = (void *)(orient),		\
- 	.matches = {					\
- 		DMI_MATCH(DMI_BOARD_VENDOR, vendor),	\
- 		DMI_MATCH(DMI_PRODUCT_VERSION, model)	\
- 	}						\
- }
- 
--#define HDAPS_DMI_MATCH_NORMAL(vendor, model)		\
--	HDAPS_DMI_MATCH_INVERT(vendor, model, 0)
--
--/* Note that HDAPS_DMI_MATCH_NORMAL("ThinkPad T42") would match
--   "ThinkPad T42p", so the order of the entries matters.
--   If your ThinkPad is not recognized, please update to latest
--   BIOS. This is especially the case for some R52 ThinkPads. */
--static const struct dmi_system_id hdaps_whitelist[] __initconst = {
--	HDAPS_DMI_MATCH_INVERT("IBM", "ThinkPad R50p", HDAPS_BOTH_AXES),
--	HDAPS_DMI_MATCH_NORMAL("IBM", "ThinkPad R50"),
--	HDAPS_DMI_MATCH_NORMAL("IBM", "ThinkPad R51"),
--	HDAPS_DMI_MATCH_NORMAL("IBM", "ThinkPad R52"),
--	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad R61i", HDAPS_BOTH_AXES),
--	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad R61", HDAPS_BOTH_AXES),
--	HDAPS_DMI_MATCH_INVERT("IBM", "ThinkPad T41p", HDAPS_BOTH_AXES),
--	HDAPS_DMI_MATCH_NORMAL("IBM", "ThinkPad T41"),
--	HDAPS_DMI_MATCH_INVERT("IBM", "ThinkPad T42p", HDAPS_BOTH_AXES),
--	HDAPS_DMI_MATCH_NORMAL("IBM", "ThinkPad T42"),
--	HDAPS_DMI_MATCH_NORMAL("IBM", "ThinkPad T43"),
--	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad T400", HDAPS_BOTH_AXES),
--	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad T60", HDAPS_BOTH_AXES),
--	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad T61p", HDAPS_BOTH_AXES),
--	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad T61", HDAPS_BOTH_AXES),
--	HDAPS_DMI_MATCH_NORMAL("IBM", "ThinkPad X40"),
--	HDAPS_DMI_MATCH_INVERT("IBM", "ThinkPad X41", HDAPS_Y_AXIS),
--	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X60", HDAPS_BOTH_AXES),
--	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X61s", HDAPS_BOTH_AXES),
--	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X61", HDAPS_BOTH_AXES),
--	HDAPS_DMI_MATCH_NORMAL("IBM", "ThinkPad Z60m"),
--	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad Z61m", HDAPS_BOTH_AXES),
--	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad Z61p", HDAPS_BOTH_AXES),
-+/* List of models with abnormal axis configuration.
-+   Note that HDAPS_DMI_MATCH_NORMAL("ThinkPad T42") would match
-+   "ThinkPad T42p", and enumeration stops after first match,
-+   so the order of the entries matters. */
-+const struct dmi_system_id hdaps_whitelist[] __initconst = {
-+	HDAPS_DMI_MATCH_INVERT("IBM", "ThinkPad R50p", HDAPS_ORIENT_INVERT_XY),
-+	HDAPS_DMI_MATCH_INVERT("IBM", "ThinkPad R60", HDAPS_ORIENT_INVERT_XY),
-+	HDAPS_DMI_MATCH_INVERT("IBM", "ThinkPad T41p", HDAPS_ORIENT_INVERT_XY),
-+	HDAPS_DMI_MATCH_INVERT("IBM", "ThinkPad T42p", HDAPS_ORIENT_INVERT_XY),
-+	HDAPS_DMI_MATCH_INVERT("IBM", "ThinkPad X40", HDAPS_ORIENT_INVERT_Y),
-+	HDAPS_DMI_MATCH_INVERT("IBM", "ThinkPad X41", HDAPS_ORIENT_INVERT_Y),
-+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad R60", HDAPS_ORIENT_INVERT_XY),
-+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad R61", HDAPS_ORIENT_INVERT_XY),
-+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad R400", HDAPS_ORIENT_INVERT_XY),
-+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad R500", HDAPS_ORIENT_INVERT_XY),
-+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad T60", HDAPS_ORIENT_INVERT_XY),
-+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad T61", HDAPS_ORIENT_INVERT_XY),
-+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X60 Tablet", HDAPS_ORIENT_INVERT_Y),
-+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X60s", HDAPS_ORIENT_INVERT_Y),
-+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X60", HDAPS_ORIENT_SWAP | HDAPS_ORIENT_INVERT_X),
-+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X61", HDAPS_ORIENT_SWAP | HDAPS_ORIENT_INVERT_X),
-+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad T400s", HDAPS_ORIENT_INVERT_X),
-+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad T400", HDAPS_ORIENT_INVERT_XY),
-+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad T410s", HDAPS_ORIENT_SWAP),
-+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad T410", HDAPS_ORIENT_INVERT_XY),
-+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad T500", HDAPS_ORIENT_INVERT_XY),
-+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad T510", HDAPS_ORIENT_SWAP | HDAPS_ORIENT_INVERT_X | HDAPS_ORIENT_INVERT_Y),
-+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad W510", HDAPS_ORIENT_MAX),
-+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad W520", HDAPS_ORIENT_MAX),
-+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X200s", HDAPS_ORIENT_SWAP | HDAPS_ORIENT_INVERT_XY),
-+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X200", HDAPS_ORIENT_SWAP | HDAPS_ORIENT_INVERT_X | HDAPS_ORIENT_INVERT_Y),
-+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X201 Tablet", HDAPS_ORIENT_SWAP | HDAPS_ORIENT_INVERT_XY),
-+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X201s", HDAPS_ORIENT_SWAP | HDAPS_ORIENT_INVERT_XY),
-+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X201", HDAPS_ORIENT_SWAP | HDAPS_ORIENT_INVERT_X),
-+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X220", HDAPS_ORIENT_SWAP),
- 	{ .ident = NULL }
- };
- 
- static int __init hdaps_init(void)
- {
--	struct input_dev *idev;
- 	int ret;
- 
--	if (!dmi_check_system(hdaps_whitelist)) {
--		pr_warn("supported laptop not found!\n");
--		ret = -ENODEV;
--		goto out;
--	}
--
--	if (!request_region(HDAPS_LOW_PORT, HDAPS_NR_PORTS, "hdaps")) {
--		ret = -ENXIO;
--		goto out;
--	}
--
-+	/* Determine axis orientation orientation */
-+	if (hdaps_invert == HDAPS_ORIENT_UNDEFINED) /* set by module param? */
-+		if (dmi_check_system(hdaps_whitelist) < 1) /* in whitelist? */
-+			hdaps_invert = 0; /* default */
-+
-+	/* Init timer before platform_driver_register, in case of suspend */
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,15,0)
-+	init_timer(&hdaps_timer);
-+	hdaps_timer.function = hdaps_mousedev_poll;
-+#else
-+	timer_setup(&hdaps_timer, hdaps_mousedev_poll, 0);
-+#endif
- 	ret = platform_driver_register(&hdaps_driver);
- 	if (ret)
--		goto out_region;
-+		goto out;
- 
- 	pdev = platform_device_register_simple("hdaps", -1, NULL, 0);
- 	if (IS_ERR(pdev)) {
-@@ -571,47 +801,79 @@ static int __init hdaps_init(void)
- 	if (ret)
- 		goto out_device;
- 
--	hdaps_idev = input_allocate_polled_device();
-+	hdaps_idev = input_allocate_device();
- 	if (!hdaps_idev) {
- 		ret = -ENOMEM;
- 		goto out_group;
- 	}
- 
--	hdaps_idev->poll = hdaps_mousedev_poll;
--	hdaps_idev->poll_interval = HDAPS_POLL_INTERVAL;
--
--	/* initial calibrate for the input device */
--	hdaps_calibrate();
-+	hdaps_idev_raw = input_allocate_device();
-+	if (!hdaps_idev_raw) {
-+		ret = -ENOMEM;
-+		goto out_idev_first;
-+	}
- 
--	/* initialize the input class */
--	idev = hdaps_idev->input;
--	idev->name = "hdaps";
--	idev->phys = "isa1600/input0";
--	idev->id.bustype = BUS_ISA;
--	idev->dev.parent = &pdev->dev;
--	idev->evbit[0] = BIT_MASK(EV_ABS);
--	input_set_abs_params(idev, ABS_X,
-+	/* calibration for the input device (deferred to avoid delay) */
-+	needs_calibration = 1;
-+
-+	/* initialize the joystick-like fuzzed input device */
-+	hdaps_idev->name = "ThinkPad HDAPS joystick emulation";
-+	hdaps_idev->phys = "hdaps/input0";
-+	hdaps_idev->id.bustype = BUS_HOST;
-+	hdaps_idev->id.vendor  = HDAPS_INPUT_VENDOR;
-+	hdaps_idev->id.product = HDAPS_INPUT_PRODUCT;
-+	hdaps_idev->id.version = HDAPS_INPUT_JS_VERSION;
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25)
-+	hdaps_idev->cdev.dev = &pdev->dev;
-+#endif
-+	hdaps_idev->evbit[0] = BIT(EV_ABS);
-+	hdaps_idev->open = hdaps_mousedev_open;
-+	hdaps_idev->close = hdaps_mousedev_close;
-+	input_set_abs_params(hdaps_idev, ABS_X,
- 			-256, 256, HDAPS_INPUT_FUZZ, HDAPS_INPUT_FLAT);
--	input_set_abs_params(idev, ABS_Y,
-+	input_set_abs_params(hdaps_idev, ABS_Y,
- 			-256, 256, HDAPS_INPUT_FUZZ, HDAPS_INPUT_FLAT);
- 
--	ret = input_register_polled_device(hdaps_idev);
-+	ret = input_register_device(hdaps_idev);
- 	if (ret)
- 		goto out_idev;
- 
--	pr_info("driver successfully loaded\n");
-+	/* initialize the raw data input device */
-+	hdaps_idev_raw->name = "ThinkPad HDAPS accelerometer data";
-+	hdaps_idev_raw->phys = "hdaps/input1";
-+	hdaps_idev_raw->id.bustype = BUS_HOST;
-+	hdaps_idev_raw->id.vendor  = HDAPS_INPUT_VENDOR;
-+	hdaps_idev_raw->id.product = HDAPS_INPUT_PRODUCT;
-+	hdaps_idev_raw->id.version = HDAPS_INPUT_RAW_VERSION;
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25)
-+	hdaps_idev_raw->cdev.dev = &pdev->dev;
-+#endif
-+	hdaps_idev_raw->evbit[0] = BIT(EV_ABS);
-+	hdaps_idev_raw->open = hdaps_mousedev_open;
-+	hdaps_idev_raw->close = hdaps_mousedev_close;
-+	input_set_abs_params(hdaps_idev_raw, ABS_X, -32768, 32767, 0, 0);
-+	input_set_abs_params(hdaps_idev_raw, ABS_Y, -32768, 32767, 0, 0);
-+
-+	ret = input_register_device(hdaps_idev_raw);
-+	if (ret)
-+		goto out_idev_reg_first;
-+
-+	pr_info("driver successfully loaded.\n");
- 	return 0;
- 
-+out_idev_reg_first:
-+	input_unregister_device(hdaps_idev);
- out_idev:
--	input_free_polled_device(hdaps_idev);
-+	input_free_device(hdaps_idev_raw);
-+out_idev_first:
-+	input_free_device(hdaps_idev);
- out_group:
- 	sysfs_remove_group(&pdev->dev.kobj, &hdaps_attribute_group);
- out_device:
- 	platform_device_unregister(pdev);
- out_driver:
- 	platform_driver_unregister(&hdaps_driver);
--out_region:
--	release_region(HDAPS_LOW_PORT, HDAPS_NR_PORTS);
-+	hdaps_device_shutdown();
- out:
- 	pr_warn("driver init failed (ret=%d)!\n", ret);
- 	return ret;
-@@ -619,12 +881,12 @@ static int __init hdaps_init(void)
- 
- static void __exit hdaps_exit(void)
- {
--	input_unregister_polled_device(hdaps_idev);
--	input_free_polled_device(hdaps_idev);
-+	input_unregister_device(hdaps_idev_raw);
-+	input_unregister_device(hdaps_idev);
-+	hdaps_device_shutdown(); /* ignore errors, effect is negligible */
- 	sysfs_remove_group(&pdev->dev.kobj, &hdaps_attribute_group);
- 	platform_device_unregister(pdev);
- 	platform_driver_unregister(&hdaps_driver);
--	release_region(HDAPS_LOW_PORT, HDAPS_NR_PORTS);
- 
- 	pr_info("driver unloaded\n");
- }
-@@ -632,9 +894,8 @@ static void __exit hdaps_exit(void)
- module_init(hdaps_init);
- module_exit(hdaps_exit);
- 
--module_param_named(invert, hdaps_invert, int, 0);
--MODULE_PARM_DESC(invert, "invert data along each axis. 1 invert x-axis, "
--		 "2 invert y-axis, 3 invert both axes.");
-+module_param_named(invert, hdaps_invert, uint, 0);
-+MODULE_PARM_DESC(invert, "axis orientation code");
- 
- MODULE_AUTHOR("Robert Love");
- MODULE_DESCRIPTION("IBM Hard Drive Active Protection System (HDAPS) driver");
-diff --git a/drivers/platform/x86/thinkpad_ec.c b/drivers/platform/x86/thinkpad_ec.c
-new file mode 100644
-index 000000000000..597614bc17e6
---- /dev/null
-+++ b/drivers/platform/x86/thinkpad_ec.c
-@@ -0,0 +1,513 @@
-+/*
-+ *  thinkpad_ec.c - ThinkPad embedded controller LPC3 functions
-+ *
-+ *  The embedded controller on ThinkPad laptops has a non-standard interface,
-+ *  where LPC channel 3 of the H8S EC chip is hooked up to IO ports
-+ *  0x1600-0x161F and implements (a special case of) the H8S LPC protocol.
-+ *  The EC LPC interface provides various system management services (currently
-+ *  known: battery information and accelerometer readouts). This driver
-+ *  provides access and mutual exclusion for the EC interface.
-+*
-+ *  The LPC protocol and terminology are documented here:
-+ *  "H8S/2104B Group Hardware Manual",
-+ *  http://documentation.renesas.com/eng/products/mpumcu/rej09b0300_2140bhm.pdf
-+ *
-+ *  Copyright (C) 2006-2007 Shem Multinymous <multinymous@gmail.com>
-+ *
-+ *  This program is free software; you can redistribute it and/or modify
-+ *  it under the terms of the GNU General Public License as published by
-+ *  the Free Software Foundation; either version 2 of the License, or
-+ *  (at your option) any later version.
-+ *
-+ *  This program is distributed in the hope that it will be useful,
-+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-+ *  GNU General Public License for more details.
-+ *
-+ *  You should have received a copy of the GNU General Public License
-+ *  along with this program; if not, write to the Free Software
-+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-+ */
-+
-+#include <linux/kernel.h>
-+#include <linux/module.h>
-+#include <linux/dmi.h>
-+#include <linux/ioport.h>
-+#include <linux/delay.h>
-+#include <linux/thinkpad_ec.h>
-+#include <linux/jiffies.h>
-+#include <asm/io.h>
-+
-+#include <linux/version.h>
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26)
-+	#include <asm/semaphore.h>
-+#else
-+	#include <linux/semaphore.h>
-+#endif
-+
-+#define TP_VERSION "0.42"
-+
-+MODULE_AUTHOR("Shem Multinymous");
-+MODULE_DESCRIPTION("ThinkPad embedded controller hardware access");
-+MODULE_VERSION(TP_VERSION);
-+MODULE_LICENSE("GPL");
-+
-+/* IO ports used by embedded controller LPC channel 3: */
-+#define TPC_BASE_PORT 0x1600
-+#define TPC_NUM_PORTS 0x20
-+#define TPC_STR3_PORT 0x1604  /* Reads H8S EC register STR3 */
-+#define TPC_TWR0_PORT  0x1610 /* Mapped to H8S EC register TWR0MW/SW  */
-+#define TPC_TWR15_PORT 0x161F /* Mapped to H8S EC register TWR15. */
-+  /* (and port TPC_TWR0_PORT+i is mapped to H8S reg TWRi for 0<i<16) */
-+
-+/* H8S STR3 status flags (see "H8S/2104B Group Hardware Manual" p.549) */
-+#define H8S_STR3_IBF3B 0x80  /* Bidi. Data Register Input Buffer Full */
-+#define H8S_STR3_OBF3B 0x40  /* Bidi. Data Register Output Buffer Full */
-+#define H8S_STR3_MWMF  0x20  /* Master Write Mode Flag */
-+#define H8S_STR3_SWMF  0x10  /* Slave Write Mode Flag */
-+#define H8S_STR3_MASK  0xF0  /* All bits we care about in STR3 */
-+
-+/* Timeouts and retries */
-+#define TPC_READ_RETRIES     150
-+#define TPC_READ_NDELAY      500
-+#define TPC_REQUEST_RETRIES 1000
-+#define TPC_REQUEST_NDELAY    10
-+#define TPC_PREFETCH_TIMEOUT   (HZ/10)  /* invalidate prefetch after 0.1sec */
-+
-+/* A few macros for printk()ing: */
-+#define MSG_FMT(fmt, args...) \
-+  "thinkpad_ec: %s: " fmt "\n", __func__, ## args
-+#define REQ_FMT(msg, code) \
-+  MSG_FMT("%s: (0x%02x:0x%02x)->0x%02x", \
-+	  msg, args->val[0x0], args->val[0xF], code)
-+
-+/* State of request prefetching: */
-+static u8 prefetch_arg0, prefetch_argF;           /* Args of last prefetch */
-+static u64 prefetch_jiffies;                      /* time of prefetch, or: */
-+#define TPC_PREFETCH_NONE   INITIAL_JIFFIES       /*   No prefetch */
-+#define TPC_PREFETCH_JUNK   (INITIAL_JIFFIES+1)   /*   Ignore prefetch */
-+
-+/* Locking: */
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
-+static DECLARE_MUTEX(thinkpad_ec_mutex);
-+#else
-+static DEFINE_SEMAPHORE(thinkpad_ec_mutex);
-+#endif
-+
-+/* Kludge in case the ACPI DSDT reserves the ports we need. */
-+static bool force_io;    /* Willing to do IO to ports we couldn't reserve? */
-+static int reserved_io; /* Successfully reserved the ports? */
-+module_param_named(force_io, force_io, bool, 0600);
-+MODULE_PARM_DESC(force_io, "Force IO even if region already reserved (0=off, 1=on)");
-+
-+/**
-+ * thinkpad_ec_lock - get lock on the ThinkPad EC
-+ *
-+ * Get exclusive lock for accesing the ThinkPad embedded controller LPC3
-+ * interface. Returns 0 iff lock acquired.
-+ */
-+int thinkpad_ec_lock(void)
-+{
-+	int ret;
-+	ret = down_interruptible(&thinkpad_ec_mutex);
-+	return ret;
-+}
-+EXPORT_SYMBOL_GPL(thinkpad_ec_lock);
-+
-+/**
-+ * thinkpad_ec_try_lock - try getting lock on the ThinkPad EC
-+ *
-+ * Try getting an exclusive lock for accesing the ThinkPad embedded
-+ * controller LPC3. Returns immediately if lock is not available; neither
-+ * blocks nor sleeps. Returns 0 iff lock acquired .
-+ */
-+int thinkpad_ec_try_lock(void)
-+{
-+	return down_trylock(&thinkpad_ec_mutex);
-+}
-+EXPORT_SYMBOL_GPL(thinkpad_ec_try_lock);
-+
-+/**
-+ * thinkpad_ec_unlock - release lock on ThinkPad EC
-+ *
-+ * Release a previously acquired exclusive lock on the ThinkPad ebmedded
-+ * controller LPC3 interface.
-+ */
-+void thinkpad_ec_unlock(void)
-+{
-+	up(&thinkpad_ec_mutex);
-+}
-+EXPORT_SYMBOL_GPL(thinkpad_ec_unlock);
-+
-+/**
-+ * thinkpad_ec_request_row - tell embedded controller to prepare a row
-+ * @args Input register arguments
-+ *
-+ * Requests a data row by writing to H8S LPC registers TRW0 through TWR15 (or
-+ * a subset thereof) following the protocol prescribed by the "H8S/2104B Group
-+ * Hardware Manual". Does sanity checks via status register STR3.
-+ */
-+static int thinkpad_ec_request_row(const struct thinkpad_ec_row *args)
-+{
-+	u8 str3;
-+	int i;
-+
-+	/* EC protocol requires write to TWR0 (function code): */
-+	if (!(args->mask & 0x0001)) {
-+		printk(KERN_ERR MSG_FMT("bad args->mask=0x%02x", args->mask));
-+		return -EINVAL;
-+	}
-+
-+	/* Check initial STR3 status: */
-+	str3 = inb(TPC_STR3_PORT) & H8S_STR3_MASK;
-+	if (str3 & H8S_STR3_OBF3B) { /* data already pending */
-+		inb(TPC_TWR15_PORT); /* marks end of previous transaction */
-+		if (prefetch_jiffies == TPC_PREFETCH_NONE)
-+			printk(KERN_WARNING REQ_FMT(
-+			       "EC has result from unrequested transaction",
-+			       str3));
-+		return -EBUSY; /* EC will be ready in a few usecs */
-+	} else if (str3 == H8S_STR3_SWMF) { /* busy with previous request */
-+		if (prefetch_jiffies == TPC_PREFETCH_NONE)
-+			printk(KERN_WARNING REQ_FMT(
-+			       "EC is busy with unrequested transaction",
-+			       str3));
-+		return -EBUSY; /* data will be pending in a few usecs */
-+	} else if (str3 != 0x00) { /* unexpected status? */
-+		printk(KERN_WARNING REQ_FMT("unexpected initial STR3", str3));
-+		return -EIO;
-+	}
-+
-+	/* Send TWR0MW: */
-+	outb(args->val[0], TPC_TWR0_PORT);
-+	str3 = inb(TPC_STR3_PORT) & H8S_STR3_MASK;
-+	if (str3 != H8S_STR3_MWMF) { /* not accepted? */
-+		printk(KERN_WARNING REQ_FMT("arg0 rejected", str3));
-+		return -EIO;
-+	}
-+
-+	/* Send TWR1 through TWR14: */
-+	for (i = 1; i < TP_CONTROLLER_ROW_LEN-1; i++)
-+		if ((args->mask>>i)&1)
-+			outb(args->val[i], TPC_TWR0_PORT+i);
-+
-+	/* Send TWR15 (default to 0x01). This marks end of command. */
-+	outb((args->mask & 0x8000) ? args->val[0xF] : 0x01, TPC_TWR15_PORT);
-+
-+	/* Wait until EC starts writing its reply (~60ns on average).
-+	 * Releasing locks before this happens may cause an EC hang
-+	 * due to firmware bug!
-+	 */
-+	for (i = 0; i < TPC_REQUEST_RETRIES; i++) {
-+		str3 = inb(TPC_STR3_PORT) & H8S_STR3_MASK;
-+		if (str3 & H8S_STR3_SWMF) /* EC started replying */
-+			return 0;
-+		else if (!(str3 & ~(H8S_STR3_IBF3B|H8S_STR3_MWMF)))
-+			/* Normal progress (the EC hasn't seen the request
-+			 * yet, or is processing it). Wait it out. */
-+			ndelay(TPC_REQUEST_NDELAY);
-+		else { /* weird EC status */
-+			printk(KERN_WARNING
-+			       REQ_FMT("bad end STR3", str3));
-+			return -EIO;
-+		}
-+	}
-+	printk(KERN_WARNING REQ_FMT("EC is mysteriously silent", str3));
-+	return -EIO;
-+}
-+
-+/**
-+ * thinkpad_ec_read_data - read pre-requested row-data from EC
-+ * @args Input register arguments of pre-requested rows
-+ * @data Output register values
-+ *
-+ * Reads current row data from the controller, assuming it's already
-+ * requested. Follows the H8S spec for register access and status checks.
-+ */
-+static int thinkpad_ec_read_data(const struct thinkpad_ec_row *args,
-+				 struct thinkpad_ec_row *data)
-+{
-+	int i;
-+	u8 str3 = inb(TPC_STR3_PORT) & H8S_STR3_MASK;
-+	/* Once we make a request, STR3 assumes the sequence of values listed
-+	 * in the following 'if' as it reads the request and writes its data.
-+	 * It takes about a few dozen nanosecs total, with very high variance.
-+	 */
-+	if (str3 == (H8S_STR3_IBF3B|H8S_STR3_MWMF) ||
-+	    str3 == 0x00 ||  /* the 0x00 is indistinguishable from idle EC! */
-+	    str3 == H8S_STR3_SWMF)
-+		return -EBUSY; /* not ready yet */
-+	/* Finally, the EC signals output buffer full: */
-+	if (str3 != (H8S_STR3_OBF3B|H8S_STR3_SWMF)) {
-+		printk(KERN_WARNING
-+		       REQ_FMT("bad initial STR3", str3));
-+		return -EIO;
-+	}
-+
-+	/* Read first byte (signals start of read transactions): */
-+	data->val[0] = inb(TPC_TWR0_PORT);
-+	/* Optionally read 14 more bytes: */
-+	for (i = 1; i < TP_CONTROLLER_ROW_LEN-1; i++)
-+		if ((data->mask >> i)&1)
-+			data->val[i] = inb(TPC_TWR0_PORT+i);
-+	/* Read last byte from 0x161F (signals end of read transaction): */
-+	data->val[0xF] = inb(TPC_TWR15_PORT);
-+
-+	/* Readout still pending? */
-+	str3 = inb(TPC_STR3_PORT) & H8S_STR3_MASK;
-+	if (str3 & H8S_STR3_OBF3B)
-+		printk(KERN_WARNING
-+		       REQ_FMT("OBF3B=1 after read", str3));
-+	/* If port 0x161F returns 0x80 too often, the EC may lock up. Warn: */
-+	if (data->val[0xF] == 0x80)
-+		printk(KERN_WARNING
-+		       REQ_FMT("0x161F reports error", data->val[0xF]));
-+	return 0;
-+}
-+
-+/**
-+ * thinkpad_ec_is_row_fetched - is the given row currently prefetched?
-+ *
-+ * To keep things simple we compare only the first and last args;
-+ * this suffices for all known cases.
-+ */
-+static int thinkpad_ec_is_row_fetched(const struct thinkpad_ec_row *args)
-+{
-+	return (prefetch_jiffies != TPC_PREFETCH_NONE) &&
-+	       (prefetch_jiffies != TPC_PREFETCH_JUNK) &&
-+	       (prefetch_arg0 == args->val[0]) &&
-+	       (prefetch_argF == args->val[0xF]) &&
-+	       (get_jiffies_64() < prefetch_jiffies + TPC_PREFETCH_TIMEOUT);
-+}
-+
-+/**
-+ * thinkpad_ec_read_row - request and read data from ThinkPad EC
-+ * @args Input register arguments
-+ * @data Output register values
-+ *
-+ * Read a data row from the ThinkPad embedded controller LPC3 interface.
-+ * Does fetching and retrying if needed. The row is specified by an
-+ * array of 16 bytes, some of which may be undefined (but the first is
-+ * mandatory). These bytes are given in @args->val[], where @args->val[i] is
-+ * used iff (@args->mask>>i)&1). The resulting row data is stored in
-+ * @data->val[], but is only guaranteed to be valid for indices corresponding
-+ * to set bit in @data->mask. That is, if @data->mask&(1<<i)==0 then
-+ * @data->val[i] is undefined.
-+ *
-+ * Returns -EBUSY on transient error and -EIO on abnormal condition.
-+ * Caller must hold controller lock.
-+ */
-+int thinkpad_ec_read_row(const struct thinkpad_ec_row *args,
-+			 struct thinkpad_ec_row *data)
-+{
-+	int retries, ret;
-+
-+	if (thinkpad_ec_is_row_fetched(args))
-+		goto read_row; /* already requested */
-+
-+	/* Request the row */
-+	for (retries = 0; retries < TPC_READ_RETRIES; ++retries) {
-+		ret = thinkpad_ec_request_row(args);
-+		if (!ret)
-+			goto read_row;
-+		if (ret != -EBUSY)
-+			break;
-+		ndelay(TPC_READ_NDELAY);
-+	}
-+	printk(KERN_ERR REQ_FMT("failed requesting row", ret));
-+	goto out;
-+
-+read_row:
-+	/* Read the row's data */
-+	for (retries = 0; retries < TPC_READ_RETRIES; ++retries) {
-+		ret = thinkpad_ec_read_data(args, data);
-+		if (!ret)
-+			goto out;
-+		if (ret != -EBUSY)
-+			break;
-+		ndelay(TPC_READ_NDELAY);
-+	}
-+
-+	printk(KERN_ERR REQ_FMT("failed waiting for data", ret));
-+
-+out:
-+	prefetch_jiffies = TPC_PREFETCH_JUNK;
-+	return ret;
-+}
-+EXPORT_SYMBOL_GPL(thinkpad_ec_read_row);
-+
-+/**
-+ * thinkpad_ec_try_read_row - try reading prefetched data from ThinkPad EC
-+ * @args Input register arguments
-+ * @data Output register values
-+ *
-+ * Try reading a data row from the ThinkPad embedded controller LPC3
-+ * interface, if this raw was recently prefetched using
-+ * thinkpad_ec_prefetch_row(). Does not fetch, retry or block.
-+ * The parameters have the same meaning as in thinkpad_ec_read_row().
-+ *
-+ * Returns -EBUSY is data not ready and -ENODATA if row not prefetched.
-+ * Caller must hold controller lock.
-+ */
-+int thinkpad_ec_try_read_row(const struct thinkpad_ec_row *args,
-+			     struct thinkpad_ec_row *data)
-+{
-+	int ret;
-+	if (!thinkpad_ec_is_row_fetched(args)) {
-+		ret = -ENODATA;
-+	} else {
-+		ret = thinkpad_ec_read_data(args, data);
-+		if (!ret)
-+			prefetch_jiffies = TPC_PREFETCH_NONE; /* eaten up */
-+	}
-+	return ret;
-+}
-+EXPORT_SYMBOL_GPL(thinkpad_ec_try_read_row);
-+
-+/**
-+ * thinkpad_ec_prefetch_row - prefetch data from ThinkPad EC
-+ * @args Input register arguments
-+ *
-+ * Prefetch a data row from the ThinkPad embedded controller LCP3
-+ * interface. A subsequent call to thinkpad_ec_read_row() with the
-+ * same arguments will be faster, and a subsequent call to
-+ * thinkpad_ec_try_read_row() stands a good chance of succeeding if
-+ * done neither too soon nor too late. See
-+ * thinkpad_ec_read_row() for the meaning of @args.
-+ *
-+ * Returns -EBUSY on transient error and -EIO on abnormal condition.
-+ * Caller must hold controller lock.
-+ */
-+int thinkpad_ec_prefetch_row(const struct thinkpad_ec_row *args)
-+{
-+	int ret;
-+	ret = thinkpad_ec_request_row(args);
-+	if (ret) {
-+		prefetch_jiffies = TPC_PREFETCH_JUNK;
-+	} else {
-+		prefetch_jiffies = get_jiffies_64();
-+		prefetch_arg0 = args->val[0x0];
-+		prefetch_argF = args->val[0xF];
-+	}
-+	return ret;
-+}
-+EXPORT_SYMBOL_GPL(thinkpad_ec_prefetch_row);
-+
-+/**
-+ * thinkpad_ec_invalidate - invalidate prefetched ThinkPad EC data
-+ *
-+ * Invalidate the data prefetched via thinkpad_ec_prefetch_row() from the
-+ * ThinkPad embedded controller LPC3 interface.
-+ * Must be called before unlocking by any code that accesses the controller
-+ * ports directly.
-+ */
-+void thinkpad_ec_invalidate(void)
-+{
-+	prefetch_jiffies = TPC_PREFETCH_JUNK;
-+}
-+EXPORT_SYMBOL_GPL(thinkpad_ec_invalidate);
-+
-+
-+/*** Checking for EC hardware ***/
-+
-+/**
-+ * thinkpad_ec_test - verify the EC is present and follows protocol
-+ *
-+ * Ensure the EC LPC3 channel really works on this machine by making
-+ * an EC request and seeing if the EC follows the documented H8S protocol.
-+ * The requested row just reads battery status, so it should be harmless to
-+ * access it (on a correct EC).
-+ * This test writes to IO ports, so execute only after checking DMI.
-+ */
-+static int __init thinkpad_ec_test(void)
-+{
-+	int ret;
-+	const struct thinkpad_ec_row args = /* battery 0 basic status */
-+	  { .mask = 0x8001, .val = {0x01,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x00} };
-+	struct thinkpad_ec_row data = { .mask = 0x0000 };
-+	ret = thinkpad_ec_lock();
-+	if (ret)
-+		return ret;
-+	ret = thinkpad_ec_read_row(&args, &data);
-+	thinkpad_ec_unlock();
-+	return ret;
-+}
-+
-+/* Search all DMI device names of a given type for a substring */
-+static int __init dmi_find_substring(int type, const char *substr)
-+{
-+	const struct dmi_device *dev = NULL;
-+	while ((dev = dmi_find_device(type, NULL, dev))) {
-+		if (strstr(dev->name, substr))
-+			return 1;
-+	}
-+	return 0;
-+}
-+
-+#define TP_DMI_MATCH(vendor,model)	{		\
-+	.ident = vendor " " model,			\
-+	.matches = {					\
-+		DMI_MATCH(DMI_BOARD_VENDOR, vendor),	\
-+		DMI_MATCH(DMI_PRODUCT_VERSION, model)	\
-+	}						\
-+}
-+
-+/* Check DMI for existence of ThinkPad embedded controller */
-+static int __init check_dmi_for_ec(void)
-+{
-+	/* A few old models that have a good EC but don't report it in DMI */
-+	struct dmi_system_id tp_whitelist[] = {
-+		TP_DMI_MATCH("IBM", "ThinkPad A30"),
-+		TP_DMI_MATCH("IBM", "ThinkPad T23"),
-+		TP_DMI_MATCH("IBM", "ThinkPad X24"),
-+		TP_DMI_MATCH("LENOVO", "ThinkPad"),
-+		{ .ident = NULL }
-+	};
-+	return dmi_find_substring(DMI_DEV_TYPE_OEM_STRING,
-+				  "IBM ThinkPad Embedded Controller") ||
-+	       dmi_check_system(tp_whitelist);
-+}
-+
-+/*** Init and cleanup ***/
-+
-+static int __init thinkpad_ec_init(void)
-+{
-+	if (!check_dmi_for_ec()) {
-+		printk(KERN_WARNING
-+		       "thinkpad_ec: no ThinkPad embedded controller!\n");
-+		return -ENODEV;
-+	}
-+
-+	if (request_region(TPC_BASE_PORT, TPC_NUM_PORTS, "thinkpad_ec")) {
-+		reserved_io = 1;
-+	} else {
-+		printk(KERN_ERR "thinkpad_ec: cannot claim IO ports %#x-%#x... ",
-+		       TPC_BASE_PORT,
-+		       TPC_BASE_PORT + TPC_NUM_PORTS - 1);
-+		if (force_io) {
-+			printk("forcing use of unreserved IO ports.\n");
-+		} else {
-+			printk("consider using force_io=1.\n");
-+			return -ENXIO;
-+		}
-+	}
-+	prefetch_jiffies = TPC_PREFETCH_JUNK;
-+	if (thinkpad_ec_test()) {
-+		printk(KERN_ERR "thinkpad_ec: initial ec test failed\n");
-+		if (reserved_io)
-+			release_region(TPC_BASE_PORT, TPC_NUM_PORTS);
-+		return -ENXIO;
-+	}
-+	printk(KERN_INFO "thinkpad_ec: thinkpad_ec " TP_VERSION " loaded.\n");
-+	return 0;
-+}
-+
-+static void __exit thinkpad_ec_exit(void)
-+{
-+	if (reserved_io)
-+		release_region(TPC_BASE_PORT, TPC_NUM_PORTS);
-+	printk(KERN_INFO "thinkpad_ec: unloaded.\n");
-+}
-+
-+module_init(thinkpad_ec_init);
-+module_exit(thinkpad_ec_exit);
-diff --git a/drivers/platform/x86/tp_smapi.c b/drivers/platform/x86/tp_smapi.c
-new file mode 100644
-index 000000000000..209cb6487e24
---- /dev/null
-+++ b/drivers/platform/x86/tp_smapi.c
-@@ -0,0 +1,1493 @@
-+/*
-+ *  tp_smapi.c - ThinkPad SMAPI support
-+ *
-+ *  This driver exposes some features of the System Management Application
-+ *  Program Interface (SMAPI) BIOS found on ThinkPad laptops. It works on
-+ *  models in which the SMAPI BIOS runs in SMM and is invoked by writing
-+ *  to the APM control port 0xB2.
-+ *  It also exposes battery status information, obtained from the ThinkPad
-+ *  embedded controller (via the thinkpad_ec module).
-+ *  Ancient ThinkPad models use a different interface, supported by the
-+ *  "thinkpad" module from "tpctl".
-+ *
-+ *  Many of the battery status values obtained from the EC simply mirror
-+ *  values provided by the battery's Smart Battery System (SBS) interface, so
-+ *  their meaning is defined by the Smart Battery Data Specification (see
-+ *  http://sbs-forum.org/specs/sbdat110.pdf). References to this SBS spec
-+ *  are given in the code where relevant.
-+ *
-+ *  Copyright (C) 2006 Shem Multinymous <multinymous@gmail.com>.
-+ *  SMAPI access code based on the mwave driver by Mike Sullivan.
-+ *
-+ *  This program is free software; you can redistribute it and/or modify
-+ *  it under the terms of the GNU General Public License as published by
-+ *  the Free Software Foundation; either version 2 of the License, or
-+ *  (at your option) any later version.
-+ *
-+ *  This program is distributed in the hope that it will be useful,
-+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-+ *  GNU General Public License for more details.
-+ *
-+ *  You should have received a copy of the GNU General Public License
-+ *  along with this program; if not, write to the Free Software
-+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-+ */
-+
-+#include <linux/kernel.h>
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/types.h>
-+#include <linux/proc_fs.h>
-+#include <linux/mc146818rtc.h>	/* CMOS defines */
-+#include <linux/delay.h>
-+#include <linux/version.h>
-+#include <linux/thinkpad_ec.h>
-+#include <linux/platform_device.h>
-+#include <asm/uaccess.h>
-+#include <asm/io.h>
-+
-+#define TP_VERSION "0.42"
-+#define TP_DESC "ThinkPad SMAPI Support"
-+#define TP_DIR "smapi"
-+
-+MODULE_AUTHOR("Shem Multinymous");
-+MODULE_DESCRIPTION(TP_DESC);
-+MODULE_VERSION(TP_VERSION);
-+MODULE_LICENSE("GPL");
-+
-+static struct platform_device *pdev;
-+
-+static int tp_debug;
-+module_param_named(debug, tp_debug, int, 0600);
-+MODULE_PARM_DESC(debug, "Debug level (0=off, 1=on)");
-+
-+/* A few macros for printk()ing: */
-+#define TPRINTK(level, fmt, args...) \
-+  dev_printk(level, &(pdev->dev), "%s: " fmt "\n", __func__, ## args)
-+#define DPRINTK(fmt, args...) \
-+  do { if (tp_debug) TPRINTK(KERN_DEBUG, fmt, ## args); } while (0)
-+
-+/*********************************************************************
-+ * SMAPI interface
-+ */
-+
-+/* SMAPI functions (register BX when making the SMM call). */
-+#define SMAPI_GET_INHIBIT_CHARGE                0x2114
-+#define SMAPI_SET_INHIBIT_CHARGE                0x2115
-+#define SMAPI_GET_THRESH_START                  0x2116
-+#define SMAPI_SET_THRESH_START                  0x2117
-+#define SMAPI_GET_FORCE_DISCHARGE               0x2118
-+#define SMAPI_SET_FORCE_DISCHARGE               0x2119
-+#define SMAPI_GET_THRESH_STOP                   0x211a
-+#define SMAPI_SET_THRESH_STOP                   0x211b
-+
-+/* SMAPI error codes (see ThinkPad 770 Technical Reference Manual p.83 at
-+ http://www-307.ibm.com/pc/support/site.wss/document.do?lndocid=PFAN-3TUQQD */
-+#define SMAPI_RETCODE_EOF 0xff
-+static struct { u8 rc; char *msg; int ret; } smapi_retcode[] =
-+{
-+	{0x00, "OK", 0},
-+	{0x53, "SMAPI function is not available", -ENXIO},
-+	{0x81, "Invalid parameter", -EINVAL},
-+	{0x86, "Function is not supported by SMAPI BIOS", -EOPNOTSUPP},
-+	{0x90, "System error", -EIO},
-+	{0x91, "System is invalid", -EIO},
-+	{0x92, "System is busy, -EBUSY"},
-+	{0xa0, "Device error (disk read error)", -EIO},
-+	{0xa1, "Device is busy", -EBUSY},
-+	{0xa2, "Device is not attached", -ENXIO},
-+	{0xa3, "Device is disbled", -EIO},
-+	{0xa4, "Request parameter is out of range", -EINVAL},
-+	{0xa5, "Request parameter is not accepted", -EINVAL},
-+	{0xa6, "Transient error", -EBUSY}, /* ? */
-+	{SMAPI_RETCODE_EOF, "Unknown error code", -EIO}
-+};
-+
-+
-+#define SMAPI_MAX_RETRIES 10
-+#define SMAPI_PORT2 0x4F           /* fixed port, meaning unclear */
-+static unsigned short smapi_port;  /* APM control port, normally 0xB2 */
-+
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
-+static DECLARE_MUTEX(smapi_mutex);
-+#else
-+static DEFINE_SEMAPHORE(smapi_mutex);
-+#endif
-+
-+/**
-+ * find_smapi_port - read SMAPI port from NVRAM
-+ */
-+static int __init find_smapi_port(void)
-+{
-+	u16 smapi_id = 0;
-+	unsigned short port = 0;
-+	unsigned long flags;
-+
-+	spin_lock_irqsave(&rtc_lock, flags);
-+	smapi_id = CMOS_READ(0x7C);
-+	smapi_id |= (CMOS_READ(0x7D) << 8);
-+	spin_unlock_irqrestore(&rtc_lock, flags);
-+
-+	if (smapi_id != 0x5349) {
-+		printk(KERN_ERR "SMAPI not supported (ID=0x%x)\n", smapi_id);
-+		return -ENXIO;
-+	}
-+	spin_lock_irqsave(&rtc_lock, flags);
-+	port = CMOS_READ(0x7E);
-+	port |= (CMOS_READ(0x7F) << 8);
-+	spin_unlock_irqrestore(&rtc_lock, flags);
-+	if (port == 0) {
-+		printk(KERN_ERR "unable to read SMAPI port number\n");
-+		return -ENXIO;
-+	}
-+	return port;
-+}
-+
-+/**
-+ * smapi_request - make a SMAPI call
-+ * @inEBX, @inECX, @inEDI, @inESI: input registers
-+ * @outEBX, @outECX, @outEDX, @outEDI, @outESI: outputs registers
-+ * @msg: textual error message
-+ * Invokes the SMAPI SMBIOS with the given input and outpu args.
-+ * All outputs are optional (can be %NULL).
-+ * Returns 0 when successful, and a negative errno constant
-+ * (see smapi_retcode above) upon failure.
-+ */
-+static int smapi_request(u32 inEBX, u32 inECX,
-+			 u32 inEDI, u32 inESI,
-+			 u32 *outEBX, u32 *outECX, u32 *outEDX,
-+			 u32 *outEDI, u32 *outESI, const char **msg)
-+{
-+	int ret = 0;
-+	int i;
-+	int retries;
-+	u8 rc;
-+	/* Must use local vars for output regs, due to reg pressure. */
-+	u32 tmpEAX, tmpEBX, tmpECX, tmpEDX, tmpEDI, tmpESI;
-+
-+	for (retries = 0; retries < SMAPI_MAX_RETRIES; ++retries) {
-+		DPRINTK("req_in: BX=%x CX=%x DI=%x SI=%x",
-+			inEBX, inECX, inEDI, inESI);
-+
-+		/* SMAPI's SMBIOS call and thinkpad_ec end up using use
-+		 * different interfaces to the same chip, so play it safe. */
-+		ret = thinkpad_ec_lock();
-+		if (ret)
-+			return ret;
-+
-+		__asm__ __volatile__(
-+			"movl  $0x00005380,%%eax\n\t"
-+			"movl  %6,%%ebx\n\t"
-+			"movl  %7,%%ecx\n\t"
-+			"movl  %8,%%edi\n\t"
-+			"movl  %9,%%esi\n\t"
-+			"xorl  %%edx,%%edx\n\t"
-+			"movw  %10,%%dx\n\t"
-+			"out   %%al,%%dx\n\t"  /* trigger SMI to SMBIOS */
-+			"out   %%al,$0x4F\n\t"
-+			"movl  %%eax,%0\n\t"
-+			"movl  %%ebx,%1\n\t"
-+			"movl  %%ecx,%2\n\t"
-+			"movl  %%edx,%3\n\t"
-+			"movl  %%edi,%4\n\t"
-+			"movl  %%esi,%5\n\t"
-+			:"=m"(tmpEAX),
-+			 "=m"(tmpEBX),
-+			 "=m"(tmpECX),
-+			 "=m"(tmpEDX),
-+			 "=m"(tmpEDI),
-+			 "=m"(tmpESI)
-+			:"m"(inEBX), "m"(inECX), "m"(inEDI), "m"(inESI),
-+			 "m"((u16)smapi_port)
-+			:"%eax", "%ebx", "%ecx", "%edx", "%edi",
-+			 "%esi");
-+
-+		thinkpad_ec_invalidate();
-+		thinkpad_ec_unlock();
-+
-+		/* Don't let the next SMAPI access happen too quickly,
-+		 * may case problems. (We're hold smapi_mutex).       */
-+		msleep(50);
-+
-+		if (outEBX) *outEBX = tmpEBX;
-+		if (outECX) *outECX = tmpECX;
-+		if (outEDX) *outEDX = tmpEDX;
-+		if (outESI) *outESI = tmpESI;
-+		if (outEDI) *outEDI = tmpEDI;
-+
-+		/* Look up error code */
-+		rc = (tmpEAX>>8)&0xFF;
-+		for (i = 0; smapi_retcode[i].rc != SMAPI_RETCODE_EOF &&
-+			    smapi_retcode[i].rc != rc; ++i) {}
-+		ret = smapi_retcode[i].ret;
-+		if (msg)
-+			*msg = smapi_retcode[i].msg;
-+
-+		DPRINTK("req_out: AX=%x BX=%x CX=%x DX=%x DI=%x SI=%x r=%d",
-+			 tmpEAX, tmpEBX, tmpECX, tmpEDX, tmpEDI, tmpESI, ret);
-+		if (ret)
-+			TPRINTK(KERN_NOTICE, "SMAPI error: %s (func=%x)",
-+				smapi_retcode[i].msg, inEBX);
-+
-+		if (ret != -EBUSY)
-+			return ret;
-+	}
-+	return ret;
-+}
-+
-+/* Convenience wrapper: discard output arguments */
-+static int smapi_write(u32 inEBX, u32 inECX,
-+		       u32 inEDI, u32 inESI, const char **msg)
-+{
-+	return smapi_request(inEBX, inECX, inEDI, inESI,
-+			     NULL, NULL, NULL, NULL, NULL, msg);
-+}
-+
-+
-+/*********************************************************************
-+ * Specific SMAPI services
-+ * All of these functions return 0 upon success, and a negative errno
-+ * constant (see smapi_retcode) on failure.
-+ */
-+
-+enum thresh_type {
-+	THRESH_STOP  = 0, /* the code assumes this is 0 for brevity */
-+	THRESH_START
-+};
-+#define THRESH_NAME(which) ((which == THRESH_START) ? "start" : "stop")
-+
-+/**
-+ * __get_real_thresh - read battery charge start/stop threshold from SMAPI
-+ * @bat:    battery number (0 or 1)
-+ * @which:  THRESH_START or THRESH_STOP
-+ * @thresh: 1..99, 0=default 1..99, 0=default (pass this as-is to SMAPI)
-+ * @outEDI: some additional state that needs to be preserved, meaning unknown
-+ * @outESI: some additional state that needs to be preserved, meaning unknown
-+ */
-+static int __get_real_thresh(int bat, enum thresh_type which, int *thresh,
-+			     u32 *outEDI, u32 *outESI)
-+{
-+	u32 ebx = (which == THRESH_START) ? SMAPI_GET_THRESH_START
-+					  : SMAPI_GET_THRESH_STOP;
-+	u32 ecx = (bat+1)<<8;
-+	const char *msg;
-+	int ret = smapi_request(ebx, ecx, 0, 0, NULL,
-+				&ecx, NULL, outEDI, outESI, &msg);
-+	if (ret) {
-+		TPRINTK(KERN_NOTICE, "cannot get %s_thresh of bat=%d: %s",
-+			THRESH_NAME(which), bat, msg);
-+		return ret;
-+	}
-+	if (!(ecx&0x00000100)) {
-+		TPRINTK(KERN_NOTICE, "cannot get %s_thresh of bat=%d: ecx=0%x",
-+			THRESH_NAME(which), bat, ecx);
-+		return -EIO;
-+	}
-+	if (thresh)
-+		*thresh = ecx&0xFF;
-+	return 0;
-+}
-+
-+/**
-+ * get_real_thresh - read battery charge start/stop threshold from SMAPI
-+ * @bat:    battery number (0 or 1)
-+ * @which:  THRESH_START or THRESH_STOP
-+ * @thresh: 1..99, 0=default (passes as-is to SMAPI)
-+ */
-+static int get_real_thresh(int bat, enum thresh_type which, int *thresh)
-+{
-+	return __get_real_thresh(bat, which, thresh, NULL, NULL);
-+}
-+
-+/**
-+ * set_real_thresh - write battery start/top charge threshold to SMAPI
-+ * @bat:    battery number (0 or 1)
-+ * @which:  THRESH_START or THRESH_STOP
-+ * @thresh: 1..99, 0=default (passes as-is to SMAPI)
-+ */
-+static int set_real_thresh(int bat, enum thresh_type which, int thresh)
-+{
-+	u32 ebx = (which == THRESH_START) ? SMAPI_SET_THRESH_START
-+					  : SMAPI_SET_THRESH_STOP;
-+	u32 ecx = ((bat+1)<<8) + thresh;
-+	u32 getDI, getSI;
-+	const char *msg;
-+	int ret;
-+
-+	/* verify read before writing */
-+	ret = __get_real_thresh(bat, which, NULL, &getDI, &getSI);
-+	if (ret)
-+		return ret;
-+
-+	ret = smapi_write(ebx, ecx, getDI, getSI, &msg);
-+	if (ret)
-+		TPRINTK(KERN_NOTICE, "set %s to %d for bat=%d failed: %s",
-+			THRESH_NAME(which), thresh, bat, msg);
-+	else
-+		TPRINTK(KERN_INFO, "set %s to %d for bat=%d",
-+			THRESH_NAME(which), thresh, bat);
-+	return ret;
-+}
-+
-+/**
-+ * __get_inhibit_charge_minutes - get inhibit charge period from SMAPI
-+ * @bat:     battery number (0 or 1)
-+ * @minutes: period in minutes (1..65535 minutes, 0=disabled)
-+ * @outECX: some additional state that needs to be preserved, meaning unknown
-+ * Note that @minutes is the originally set value, it does not count down.
-+ */
-+static int __get_inhibit_charge_minutes(int bat, int *minutes, u32 *outECX)
-+{
-+	u32 ecx = (bat+1)<<8;
-+	u32 esi;
-+	const char *msg;
-+	int ret = smapi_request(SMAPI_GET_INHIBIT_CHARGE, ecx, 0, 0,
-+				NULL, &ecx, NULL, NULL, &esi, &msg);
-+	if (ret) {
-+		TPRINTK(KERN_NOTICE, "failed for bat=%d: %s", bat, msg);
-+		return ret;
-+	}
-+	if (!(ecx&0x0100)) {
-+		TPRINTK(KERN_NOTICE, "bad ecx=0x%x for bat=%d", ecx, bat);
-+		return -EIO;
-+	}
-+	if (minutes)
-+		*minutes = (ecx&0x0001)?esi:0;
-+	if (outECX)
-+		*outECX = ecx;
-+	return 0;
-+}
-+
-+/**
-+ * get_inhibit_charge_minutes - get inhibit charge period from SMAPI
-+ * @bat:     battery number (0 or 1)
-+ * @minutes: period in minutes (1..65535 minutes, 0=disabled)
-+ * Note that @minutes is the originally set value, it does not count down.
-+ */
-+static int get_inhibit_charge_minutes(int bat, int *minutes)
-+{
-+	return __get_inhibit_charge_minutes(bat, minutes, NULL);
-+}
-+
-+/**
-+ * set_inhibit_charge_minutes - write inhibit charge period to SMAPI
-+ * @bat:     battery number (0 or 1)
-+ * @minutes: period in minutes (1..65535 minutes, 0=disabled)
-+ */
-+static int set_inhibit_charge_minutes(int bat, int minutes)
-+{
-+	u32 ecx;
-+	const char *msg;
-+	int ret;
-+
-+	/* verify read before writing */
-+	ret = __get_inhibit_charge_minutes(bat, NULL, &ecx);
-+	if (ret)
-+		return ret;
-+
-+	ecx = ((bat+1)<<8) | (ecx&0x00FE) | (minutes > 0 ? 0x0001 : 0x0000);
-+	if (minutes > 0xFFFF)
-+		minutes = 0xFFFF;
-+	ret = smapi_write(SMAPI_SET_INHIBIT_CHARGE, ecx, 0, minutes, &msg);
-+	if (ret)
-+		TPRINTK(KERN_NOTICE,
-+			"set to %d failed for bat=%d: %s", minutes, bat, msg);
-+	else
-+		TPRINTK(KERN_INFO, "set to %d for bat=%d\n", minutes, bat);
-+	return ret;
-+}
-+
-+
-+/**
-+ * get_force_discharge - get status of forced discharging from SMAPI
-+ * @bat:     battery number (0 or 1)
-+ * @enabled: 1 if forced discharged is enabled, 0 if not
-+ */
-+static int get_force_discharge(int bat, int *enabled)
-+{
-+	u32 ecx = (bat+1)<<8;
-+	const char *msg;
-+	int ret = smapi_request(SMAPI_GET_FORCE_DISCHARGE, ecx, 0, 0,
-+				NULL, &ecx, NULL, NULL, NULL, &msg);
-+	if (ret) {
-+		TPRINTK(KERN_NOTICE, "failed for bat=%d: %s", bat, msg);
-+		return ret;
-+	}
-+	*enabled = (!(ecx&0x00000100) && (ecx&0x00000001))?1:0;
-+	return 0;
-+}
-+
-+/**
-+ * set_force_discharge - write status of forced discharging to SMAPI
-+ * @bat:     battery number (0 or 1)
-+ * @enabled: 1 if forced discharged is enabled, 0 if not
-+ */
-+static int set_force_discharge(int bat, int enabled)
-+{
-+	u32 ecx = (bat+1)<<8;
-+	const char *msg;
-+	int ret = smapi_request(SMAPI_GET_FORCE_DISCHARGE, ecx, 0, 0,
-+				NULL, &ecx, NULL, NULL, NULL, &msg);
-+	if (ret) {
-+		TPRINTK(KERN_NOTICE, "get failed for bat=%d: %s", bat, msg);
-+		return ret;
-+	}
-+	if (ecx&0x00000100) {
-+		TPRINTK(KERN_NOTICE, "cannot force discharge bat=%d", bat);
-+		return -EIO;
-+	}
-+
-+	ecx = ((bat+1)<<8) | (ecx&0x000000FA) | (enabled?0x00000001:0);
-+	ret = smapi_write(SMAPI_SET_FORCE_DISCHARGE, ecx, 0, 0, &msg);
-+	if (ret)
-+		TPRINTK(KERN_NOTICE, "set to %d failed for bat=%d: %s",
-+			enabled, bat, msg);
-+	else
-+		TPRINTK(KERN_INFO, "set to %d for bat=%d", enabled, bat);
-+	return ret;
-+}
-+
-+
-+/*********************************************************************
-+ * Wrappers to threshold-related SMAPI functions, which handle default
-+ * thresholds and related quirks.
-+ */
-+
-+/* Minimum, default and minimum difference for battery charging thresholds: */
-+#define MIN_THRESH_DELTA      4  /* Min delta between start and stop thresh */
-+#define MIN_THRESH_START      2
-+#define MAX_THRESH_START      (100-MIN_THRESH_DELTA)
-+#define MIN_THRESH_STOP       (MIN_THRESH_START + MIN_THRESH_DELTA)
-+#define MAX_THRESH_STOP       100
-+#define DEFAULT_THRESH_START  MAX_THRESH_START
-+#define DEFAULT_THRESH_STOP   MAX_THRESH_STOP
-+
-+/* The GUI of IBM's Battery Maximizer seems to show a start threshold that
-+ * is 1 more than the value we set/get via SMAPI. Since the threshold is
-+ * maintained across reboot, this can be confusing. So we kludge our
-+ * interface for interoperability: */
-+#define BATMAX_FIX   1
-+
-+/* Get charge start/stop threshold (1..100),
-+ * substituting default values if needed and applying BATMAT_FIX. */
-+static int get_thresh(int bat, enum thresh_type which, int *thresh)
-+{
-+	int ret = get_real_thresh(bat, which, thresh);
-+	if (ret)
-+		return ret;
-+	if (*thresh == 0)
-+		*thresh = (which == THRESH_START) ? DEFAULT_THRESH_START
-+						  : DEFAULT_THRESH_STOP;
-+	else if (which == THRESH_START)
-+		*thresh += BATMAX_FIX;
-+	return 0;
-+}
-+
-+
-+/* Set charge start/stop threshold (1..100),
-+ * substituting default values if needed and applying BATMAT_FIX. */
-+static int set_thresh(int bat, enum thresh_type which, int thresh)
-+{
-+	if (which == THRESH_STOP && thresh == DEFAULT_THRESH_STOP)
-+		thresh = 0; /* 100 is out of range, but default means 100 */
-+	if (which == THRESH_START)
-+		thresh -= BATMAX_FIX;
-+	return set_real_thresh(bat, which, thresh);
-+}
-+
-+/*********************************************************************
-+ * ThinkPad embedded controller readout and basic functions
-+ */
-+
-+/**
-+ * read_tp_ec_row - read data row from the ThinkPad embedded controller
-+ * @arg0: EC command code
-+ * @bat: battery number, 0 or 1
-+ * @j: the byte value to be used for "junk" (unused) input/outputs
-+ * @dataval: result vector
-+ */
-+static int read_tp_ec_row(u8 arg0, int bat, u8 j, u8 *dataval)
-+{
-+	int ret;
-+	const struct thinkpad_ec_row args = { .mask = 0xFFFF,
-+		.val = {arg0, j,j,j,j,j,j,j,j,j,j,j,j,j,j, (u8)bat} };
-+	struct thinkpad_ec_row data = { .mask = 0xFFFF };
-+
-+	ret = thinkpad_ec_lock();
-+	if (ret)
-+		return ret;
-+	ret = thinkpad_ec_read_row(&args, &data);
-+	thinkpad_ec_unlock();
-+	memcpy(dataval, &data.val, TP_CONTROLLER_ROW_LEN);
-+	return ret;
-+}
-+
-+/**
-+ * power_device_present - check for presence of battery or AC power
-+ * @bat: 0 for battery 0, 1 for battery 1, otherwise AC power
-+ * Returns 1 if present, 0 if not present, negative if error.
-+ */
-+static int power_device_present(int bat)
-+{
-+	u8 row[TP_CONTROLLER_ROW_LEN];
-+	u8 test;
-+	int ret = read_tp_ec_row(1, bat, 0, row);
-+	if (ret)
-+		return ret;
-+	switch (bat) {
-+	case 0:  test = 0x40; break; /* battery 0 */
-+	case 1:  test = 0x20; break; /* battery 1 */
-+	default: test = 0x80;        /* AC power */
-+	}
-+	return (row[0] & test) ? 1 : 0;
-+}
-+
-+/**
-+ * bat_has_status - check if battery can report detailed status
-+ * @bat: 0 for battery 0, 1 for battery 1
-+ * Returns 1 if yes, 0 if no, negative if error.
-+ */
-+static int bat_has_status(int bat)
-+{
-+	u8 row[TP_CONTROLLER_ROW_LEN];
-+	int ret = read_tp_ec_row(1, bat, 0, row);
-+	if (ret)
-+		return ret;
-+	if ((row[0] & (bat?0x20:0x40)) == 0) /* no battery */
-+		return 0;
-+	if ((row[1] & (0x60)) == 0) /* no status */
-+		return 0;
-+	return 1;
-+}
-+
-+/**
-+ * get_tp_ec_bat_16 - read a 16-bit value from EC battery status data
-+ * @arg0: first argument to EC
-+ * @off: offset in row returned from EC
-+ * @bat: battery (0 or 1)
-+ * @val: the 16-bit value obtained
-+ * Returns nonzero on error.
-+ */
-+static int get_tp_ec_bat_16(u8 arg0, int offset, int bat, u16 *val)
-+{
-+	u8 row[TP_CONTROLLER_ROW_LEN];
-+	int ret;
-+	if (bat_has_status(bat) != 1)
-+		return -ENXIO;
-+	ret = read_tp_ec_row(arg0, bat, 0, row);
-+	if (ret)
-+		return ret;
-+	*val = *(u16 *)(row+offset);
-+	return 0;
-+}
-+
-+/*********************************************************************
-+ * sysfs attributes for batteries -
-+ * definitions and helper functions
-+ */
-+
-+/* A custom device attribute struct which holds a battery number */
-+struct bat_device_attribute {
-+	struct device_attribute dev_attr;
-+	int bat;
-+};
-+
-+/**
-+ * attr_get_bat - get the battery to which the attribute belongs
-+ */
-+static int attr_get_bat(struct device_attribute *attr)
-+{
-+	return container_of(attr, struct bat_device_attribute, dev_attr)->bat;
-+}
-+
-+/**
-+ * show_tp_ec_bat_u16 - show an unsigned 16-bit battery attribute
-+ * @arg0: specified 1st argument of EC raw to read
-+ * @offset: byte offset in EC raw data
-+ * @mul: correction factor to multiply by
-+ * @na_msg: string to output is value not available (0xFFFFFFFF)
-+ * @attr: battery attribute
-+ * @buf: output buffer
-+ * The 16-bit value is read from the EC, treated as unsigned,
-+ * transformed as x->mul*x, and printed to the buffer.
-+ * If the value is 0xFFFFFFFF and na_msg!=%NULL, na_msg is printed instead.
-+ */
-+static ssize_t show_tp_ec_bat_u16(u8 arg0, int offset, int mul,
-+			      const char *na_msg,
-+			      struct device_attribute *attr, char *buf)
-+{
-+	u16 val;
-+	int ret = get_tp_ec_bat_16(arg0, offset, attr_get_bat(attr), &val);
-+	if (ret)
-+		return ret;
-+	if (na_msg && val == 0xFFFF)
-+		return sprintf(buf, "%s\n", na_msg);
-+	else
-+		return sprintf(buf, "%u\n", mul*(unsigned int)val);
-+}
-+
-+/**
-+ * show_tp_ec_bat_s16 - show an signed 16-bit battery attribute
-+ * @arg0: specified 1st argument of EC raw to read
-+ * @offset: byte offset in EC raw data
-+ * @mul: correction factor to multiply by
-+ * @add: correction term to add after multiplication
-+ * @attr: battery attribute
-+ * @buf: output buffer
-+ * The 16-bit value is read from the EC, treated as signed,
-+ * transformed as x->mul*x+add, and printed to the buffer.
-+ */
-+static ssize_t show_tp_ec_bat_s16(u8 arg0, int offset, int mul, int add,
-+			      struct device_attribute *attr, char *buf)
-+{
-+	u16 val;
-+	int ret = get_tp_ec_bat_16(arg0, offset, attr_get_bat(attr), &val);
-+	if (ret)
-+		return ret;
-+	return sprintf(buf, "%d\n", mul*(s16)val+add);
-+}
-+
-+/**
-+ * show_tp_ec_bat_str - show a string from EC battery status data
-+ * @arg0: specified 1st argument of EC raw to read
-+ * @offset: byte offset in EC raw data
-+ * @maxlen: maximum string length
-+ * @attr: battery attribute
-+ * @buf: output buffer
-+ */
-+static ssize_t show_tp_ec_bat_str(u8 arg0, int offset, int maxlen,
-+			      struct device_attribute *attr, char *buf)
-+{
-+	int bat = attr_get_bat(attr);
-+	u8 row[TP_CONTROLLER_ROW_LEN];
-+	int ret;
-+	if (bat_has_status(bat) != 1)
-+		return -ENXIO;
-+	ret = read_tp_ec_row(arg0, bat, 0, row);
-+	if (ret)
-+		return ret;
-+	strncpy(buf, (char *)row+offset, maxlen);
-+	buf[maxlen] = 0;
-+	strcat(buf, "\n");
-+	return strlen(buf);
-+}
-+
-+/**
-+ * show_tp_ec_bat_power - show a power readout from EC battery status data
-+ * @arg0: specified 1st argument of EC raw to read
-+ * @offV: byte offset of voltage in EC raw data
-+ * @offI: byte offset of current in EC raw data
-+ * @attr: battery attribute
-+ * @buf: output buffer
-+ * Computes the power as current*voltage from the two given readout offsets.
-+ */
-+static ssize_t show_tp_ec_bat_power(u8 arg0, int offV, int offI,
-+				struct device_attribute *attr, char *buf)
-+{
-+	u8 row[TP_CONTROLLER_ROW_LEN];
-+	int milliamp, millivolt, ret;
-+	int bat = attr_get_bat(attr);
-+	if (bat_has_status(bat) != 1)
-+		return -ENXIO;
-+	ret = read_tp_ec_row(1, bat, 0, row);
-+	if (ret)
-+		return ret;
-+	millivolt = *(u16 *)(row+offV);
-+	milliamp = *(s16 *)(row+offI);
-+	return sprintf(buf, "%d\n", milliamp*millivolt/1000); /* units: mW */
-+}
-+
-+/**
-+ * show_tp_ec_bat_date - decode and show a date from EC battery status data
-+ * @arg0: specified 1st argument of EC raw to read
-+ * @offset: byte offset in EC raw data
-+ * @attr: battery attribute
-+ * @buf: output buffer
-+ */
-+static ssize_t show_tp_ec_bat_date(u8 arg0, int offset,
-+			       struct device_attribute *attr, char *buf)
-+{
-+	u8 row[TP_CONTROLLER_ROW_LEN];
-+	u16 v;
-+	int ret;
-+	int day, month, year;
-+	int bat = attr_get_bat(attr);
-+	if (bat_has_status(bat) != 1)
-+		return -ENXIO;
-+	ret = read_tp_ec_row(arg0, bat, 0, row);
-+	if (ret)
-+		return ret;
-+
-+	/* Decode bit-packed: v = day | (month<<5) | ((year-1980)<<9) */
-+	v = *(u16 *)(row+offset);
-+	day = v & 0x1F;
-+	month = (v >> 5) & 0xF;
-+	year = (v >> 9) + 1980;
-+
-+	return sprintf(buf, "%04d-%02d-%02d\n", year, month, day);
-+}
-+
-+
-+/*********************************************************************
-+ * sysfs attribute I/O for batteries -
-+ * the actual attribute show/store functions
-+ */
-+
-+static ssize_t show_battery_start_charge_thresh(struct device *dev,
-+	struct device_attribute *attr, char *buf)
-+{
-+	int thresh;
-+	int bat = attr_get_bat(attr);
-+	int ret = get_thresh(bat, THRESH_START, &thresh);
-+	if (ret)
-+		return ret;
-+	return sprintf(buf, "%d\n", thresh);  /* units: percent */
-+}
-+
-+static ssize_t show_battery_stop_charge_thresh(struct device *dev,
-+	struct device_attribute *attr, char *buf)
-+{
-+	int thresh;
-+	int bat = attr_get_bat(attr);
-+	int ret = get_thresh(bat, THRESH_STOP, &thresh);
-+	if (ret)
-+		return ret;
-+	return sprintf(buf, "%d\n", thresh);  /* units: percent */
-+}
-+
-+/**
-+ * store_battery_start_charge_thresh - store battery_start_charge_thresh attr
-+ * Since this is a kernel<->user interface, we ensure a valid state for
-+ * the hardware. We do this by clamping the requested threshold to the
-+ * valid range and, if necessary, moving the other threshold so that
-+ * it's MIN_THRESH_DELTA away from this one.
-+ */
-+static ssize_t store_battery_start_charge_thresh(struct device *dev,
-+	struct device_attribute *attr, const char *buf, size_t count)
-+{
-+	int thresh, other_thresh, ret;
-+	int bat = attr_get_bat(attr);
-+
-+	if (sscanf(buf, "%d", &thresh) != 1 || thresh < 1 || thresh > 100)
-+		return -EINVAL;
-+
-+	if (thresh < MIN_THRESH_START) /* clamp up to MIN_THRESH_START */
-+		thresh = MIN_THRESH_START;
-+	if (thresh > MAX_THRESH_START) /* clamp down to MAX_THRESH_START */
-+		thresh = MAX_THRESH_START;
-+
-+	down(&smapi_mutex);
-+	ret = get_thresh(bat, THRESH_STOP, &other_thresh);
-+	if (ret != -EOPNOTSUPP && ret != -ENXIO) {
-+		if (ret) /* other threshold is set? */
-+			goto out;
-+		ret = get_real_thresh(bat, THRESH_START, NULL);
-+		if (ret) /* this threshold is set? */
-+			goto out;
-+		if (other_thresh < thresh+MIN_THRESH_DELTA) {
-+			/* move other thresh to keep it above this one */
-+			ret = set_thresh(bat, THRESH_STOP,
-+					 thresh+MIN_THRESH_DELTA);
-+			if (ret)
-+				goto out;
-+		}
-+	}
-+	ret = set_thresh(bat, THRESH_START, thresh);
-+out:
-+	up(&smapi_mutex);
-+	return count;
-+
-+}
-+
-+/**
-+ * store_battery_stop_charge_thresh - store battery_stop_charge_thresh attr
-+ * Since this is a kernel<->user interface, we ensure a valid state for
-+ * the hardware. We do this by clamping the requested threshold to the
-+ * valid range and, if necessary, moving the other threshold so that
-+ * it's MIN_THRESH_DELTA away from this one.
-+ */
-+static ssize_t store_battery_stop_charge_thresh(struct device *dev,
-+	struct device_attribute *attr, const char *buf, size_t count)
-+{
-+	int thresh, other_thresh, ret;
-+	int bat = attr_get_bat(attr);
-+
-+	if (sscanf(buf, "%d", &thresh) != 1 || thresh < 1 || thresh > 100)
-+		return -EINVAL;
-+
-+	if (thresh < MIN_THRESH_STOP) /* clamp up to MIN_THRESH_STOP */
-+		thresh = MIN_THRESH_STOP;
-+
-+	down(&smapi_mutex);
-+	ret = get_thresh(bat, THRESH_START, &other_thresh);
-+	if (ret != -EOPNOTSUPP && ret != -ENXIO) { /* other threshold exists? */
-+		if (ret)
-+			goto out;
-+		/* this threshold exists? */
-+		ret = get_real_thresh(bat, THRESH_STOP, NULL);
-+		if (ret)
-+			goto out;
-+		if (other_thresh >= thresh-MIN_THRESH_DELTA) {
-+			 /* move other thresh to be below this one */
-+			ret = set_thresh(bat, THRESH_START,
-+					 thresh-MIN_THRESH_DELTA);
-+			if (ret)
-+				goto out;
-+		}
-+	}
-+	ret = set_thresh(bat, THRESH_STOP, thresh);
-+out:
-+	up(&smapi_mutex);
-+	return count;
-+}
-+
-+static ssize_t show_battery_inhibit_charge_minutes(struct device *dev,
-+	struct device_attribute *attr, char *buf)
-+{
-+	int minutes;
-+	int bat = attr_get_bat(attr);
-+	int ret = get_inhibit_charge_minutes(bat, &minutes);
-+	if (ret)
-+		return ret;
-+	return sprintf(buf, "%d\n", minutes);  /* units: minutes */
-+}
-+
-+static ssize_t store_battery_inhibit_charge_minutes(struct device *dev,
-+				struct device_attribute *attr,
-+				const char *buf, size_t count)
-+{
-+	int ret;
-+	int minutes;
-+	int bat = attr_get_bat(attr);
-+	if (sscanf(buf, "%d", &minutes) != 1 || minutes < 0) {
-+		TPRINTK(KERN_ERR, "inhibit_charge_minutes: "
-+			      "must be a non-negative integer");
-+		return -EINVAL;
-+	}
-+	ret = set_inhibit_charge_minutes(bat, minutes);
-+	if (ret)
-+		return ret;
-+	return count;
-+}
-+
-+static ssize_t show_battery_force_discharge(struct device *dev,
-+	struct device_attribute *attr, char *buf)
-+{
-+	int enabled;
-+	int bat = attr_get_bat(attr);
-+	int ret = get_force_discharge(bat, &enabled);
-+	if (ret)
-+		return ret;
-+	return sprintf(buf, "%d\n", enabled);  /* type: boolean */
-+}
-+
-+static ssize_t store_battery_force_discharge(struct device *dev,
-+	struct device_attribute *attr, const char *buf, size_t count)
-+{
-+	int ret;
-+	int enabled;
-+	int bat = attr_get_bat(attr);
-+	if (sscanf(buf, "%d", &enabled) != 1 || enabled < 0 || enabled > 1)
-+		return -EINVAL;
-+	ret = set_force_discharge(bat, enabled);
-+	if (ret)
-+		return ret;
-+	return count;
-+}
-+
-+static ssize_t show_battery_installed(
-+	struct device *dev, struct device_attribute *attr, char *buf)
-+{
-+	int bat = attr_get_bat(attr);
-+	int ret = power_device_present(bat);
-+	if (ret < 0)
-+		return ret;
-+	return sprintf(buf, "%d\n", ret); /* type: boolean */
-+}
-+
-+static ssize_t show_battery_state(
-+	struct device *dev, struct device_attribute *attr, char *buf)
-+{
-+	u8 row[TP_CONTROLLER_ROW_LEN];
-+	const char *txt;
-+	int ret;
-+	int bat = attr_get_bat(attr);
-+	if (bat_has_status(bat) != 1)
-+		return sprintf(buf, "none\n");
-+	ret = read_tp_ec_row(1, bat, 0, row);
-+	if (ret)
-+		return ret;
-+	switch (row[1] & 0xf0) {
-+	case 0xc0: txt = "idle"; break;
-+	case 0xd0: txt = "discharging"; break;
-+	case 0xe0: txt = "charging"; break;
-+	default:   return sprintf(buf, "unknown (0x%x)\n", row[1]);
-+	}
-+	return sprintf(buf, "%s\n", txt);  /* type: string from fixed set */
-+}
-+
-+static ssize_t show_battery_manufacturer(
-+	struct device *dev, struct device_attribute *attr, char *buf)
-+{
-+	/* type: string. SBS spec v1.1 p34: ManufacturerName() */
-+	return show_tp_ec_bat_str(4, 2, TP_CONTROLLER_ROW_LEN-2, attr, buf);
-+}
-+
-+static ssize_t show_battery_model(
-+	struct device *dev, struct device_attribute *attr, char *buf)
-+{
-+	/* type: string. SBS spec v1.1 p34: DeviceName() */
-+	return show_tp_ec_bat_str(5, 2, TP_CONTROLLER_ROW_LEN-2, attr, buf);
-+}
-+
-+static ssize_t show_battery_barcoding(
-+	struct device *dev, struct device_attribute *attr, char *buf)
-+{
-+	/* type: string */
-+	return show_tp_ec_bat_str(7, 2, TP_CONTROLLER_ROW_LEN-2, attr, buf);
-+}
-+
-+static ssize_t show_battery_chemistry(
-+	struct device *dev, struct device_attribute *attr, char *buf)
-+{
-+	/* type: string. SBS spec v1.1 p34-35: DeviceChemistry() */
-+	return show_tp_ec_bat_str(6, 2, 5, attr, buf);
-+}
-+
-+static ssize_t show_battery_voltage(
-+	struct device *dev, struct device_attribute *attr, char *buf)
-+{
-+	/* units: mV. SBS spec v1.1 p24: Voltage() */
-+	return show_tp_ec_bat_u16(1, 6, 1, NULL, attr, buf);
-+}
-+
-+static ssize_t show_battery_design_voltage(
-+	struct device *dev, struct device_attribute *attr, char *buf)
-+{
-+	/* units: mV. SBS spec v1.1 p32: DesignVoltage() */
-+	return show_tp_ec_bat_u16(3, 4, 1, NULL, attr, buf);
-+}
-+
-+static ssize_t show_battery_charging_max_voltage(
-+	struct device *dev, struct device_attribute *attr, char *buf)
-+{
-+	/* units: mV. SBS spec v1.1 p37,39: ChargingVoltage() */
-+	return show_tp_ec_bat_u16(9, 8, 1, NULL, attr, buf);
-+}
-+
-+static ssize_t show_battery_group0_voltage(
-+	struct device *dev, struct device_attribute *attr, char *buf)
-+{
-+	/* units: mV */
-+	return show_tp_ec_bat_u16(0xA, 12, 1, NULL, attr, buf);
-+}
-+
-+static ssize_t show_battery_group1_voltage(
-+	struct device *dev, struct device_attribute *attr, char *buf)
-+{
-+	/* units: mV */
-+	return show_tp_ec_bat_u16(0xA, 10, 1, NULL, attr, buf);
-+}
-+
-+static ssize_t show_battery_group2_voltage(
-+	struct device *dev, struct device_attribute *attr, char *buf)
-+{
-+	/* units: mV */
-+	return show_tp_ec_bat_u16(0xA, 8, 1, NULL, attr, buf);
-+}
-+
-+static ssize_t show_battery_group3_voltage(
-+	struct device *dev, struct device_attribute *attr, char *buf)
-+{
-+	/* units: mV */
-+	return show_tp_ec_bat_u16(0xA, 6, 1, NULL, attr, buf);
-+}
-+
-+static ssize_t show_battery_current_now(
-+	struct device *dev, struct device_attribute *attr, char *buf)
-+{
-+	/* units: mA. SBS spec v1.1 p24: Current() */
-+	return show_tp_ec_bat_s16(1, 8, 1, 0, attr, buf);
-+}
-+
-+static ssize_t show_battery_current_avg(
-+	struct device *dev, struct device_attribute *attr, char *buf)
-+{
-+	/* units: mA. SBS spec v1.1 p24: AverageCurrent() */
-+	return show_tp_ec_bat_s16(1, 10, 1, 0, attr, buf);
-+}
-+
-+static ssize_t show_battery_charging_max_current(
-+	struct device *dev, struct device_attribute *attr, char *buf)
-+{
-+	/* units: mA. SBS spec v1.1 p36,38: ChargingCurrent() */
-+	return show_tp_ec_bat_s16(9, 6, 1, 0, attr, buf);
-+}
-+
-+static ssize_t show_battery_power_now(
-+	struct device *dev, struct device_attribute *attr, char *buf)
-+{
-+	/* units: mW. SBS spec v1.1: Voltage()*Current() */
-+	return show_tp_ec_bat_power(1, 6, 8, attr, buf);
-+}
-+
-+static ssize_t show_battery_power_avg(
-+	struct device *dev, struct device_attribute *attr, char *buf)
-+{
-+	/* units: mW. SBS spec v1.1: Voltage()*AverageCurrent() */
-+	return show_tp_ec_bat_power(1, 6, 10, attr, buf);
-+}
-+
-+static ssize_t show_battery_remaining_percent(
-+	struct device *dev, struct device_attribute *attr, char *buf)
-+{
-+	/* units: percent. SBS spec v1.1 p25: RelativeStateOfCharge() */
-+	return show_tp_ec_bat_u16(1, 12, 1, NULL, attr, buf);
-+}
-+
-+static ssize_t show_battery_remaining_percent_error(
-+	struct device *dev, struct device_attribute *attr, char *buf)
-+{
-+	/* units: percent. SBS spec v1.1 p25: MaxError() */
-+	return show_tp_ec_bat_u16(9, 4, 1, NULL, attr, buf);
-+}
-+
-+static ssize_t show_battery_remaining_charging_time(
-+	struct device *dev, struct device_attribute *attr, char *buf)
-+{
-+	/* units: minutes. SBS spec v1.1 p27: AverageTimeToFull() */
-+	return show_tp_ec_bat_u16(2, 8, 1, "not_charging", attr, buf);
-+}
-+
-+static ssize_t show_battery_remaining_running_time(
-+	struct device *dev, struct device_attribute *attr, char *buf)
-+{
-+	/* units: minutes. SBS spec v1.1 p27: RunTimeToEmpty() */
-+	return show_tp_ec_bat_u16(2, 6, 1, "not_discharging", attr, buf);
-+}
-+
-+static ssize_t show_battery_remaining_running_time_now(
-+	struct device *dev, struct device_attribute *attr, char *buf)
-+{
-+	/* units: minutes. SBS spec v1.1 p27: RunTimeToEmpty() */
-+	return show_tp_ec_bat_u16(2, 4, 1, "not_discharging", attr, buf);
-+}
-+
-+static ssize_t show_battery_remaining_capacity(
-+	struct device *dev, struct device_attribute *attr, char *buf)
-+{
-+	/* units: mWh. SBS spec v1.1 p26. */
-+	return show_tp_ec_bat_u16(1, 14, 10, "", attr, buf);
-+}
-+
-+static ssize_t show_battery_last_full_capacity(
-+	struct device *dev, struct device_attribute *attr, char *buf)
-+{
-+	/* units: mWh. SBS spec v1.1 p26: FullChargeCapacity() */
-+	return show_tp_ec_bat_u16(2, 2, 10, "", attr, buf);
-+}
-+
-+static ssize_t show_battery_design_capacity(
-+	struct device *dev, struct device_attribute *attr, char *buf)
-+{
-+	/* units: mWh. SBS spec v1.1 p32: DesignCapacity() */
-+	return show_tp_ec_bat_u16(3, 2, 10, "", attr, buf);
-+}
-+
-+static ssize_t show_battery_cycle_count(
-+	struct device *dev, struct device_attribute *attr, char *buf)
-+{
-+	/* units: ordinal. SBS spec v1.1 p32: CycleCount() */
-+	return show_tp_ec_bat_u16(2, 12, 1, "", attr, buf);
-+}
-+
-+static ssize_t show_battery_temperature(
-+	struct device *dev, struct device_attribute *attr, char *buf)
-+{
-+	/* units: millicelsius. SBS spec v1.1: Temperature()*10 */
-+	return show_tp_ec_bat_s16(1, 4, 100, -273100, attr, buf);
-+}
-+
-+static ssize_t show_battery_serial(
-+	struct device *dev, struct device_attribute *attr, char *buf)
-+{
-+	/* type: int. SBS spec v1.1 p34: SerialNumber() */
-+	return show_tp_ec_bat_u16(3, 10, 1, "", attr, buf);
-+}
-+
-+static ssize_t show_battery_manufacture_date(
-+	struct device *dev, struct device_attribute *attr, char *buf)
-+{
-+	/* type: YYYY-MM-DD. SBS spec v1.1 p34: ManufactureDate() */
-+	return show_tp_ec_bat_date(3, 8, attr, buf);
-+}
-+
-+static ssize_t show_battery_first_use_date(
-+	struct device *dev, struct device_attribute *attr, char *buf)
-+{
-+	/* type: YYYY-MM-DD */
-+	return show_tp_ec_bat_date(8, 2, attr, buf);
-+}
-+
-+/**
-+ * show_battery_dump - show the battery's dump attribute
-+ * The dump attribute gives a hex dump of all EC readouts related to a
-+ * battery. Some of the enumerated values don't really exist (i.e., the
-+ * EC function just leaves them untouched); we use a kludge to detect and
-+ * denote these.
-+ */
-+#define MIN_DUMP_ARG0 0x00
-+#define MAX_DUMP_ARG0 0x0a /* 0x0b is useful too but hangs old EC firmware */
-+static ssize_t show_battery_dump(
-+	struct device *dev, struct device_attribute *attr, char *buf)
-+{
-+	int i;
-+	char *p = buf;
-+	int bat = attr_get_bat(attr);
-+	u8 arg0; /* first argument to EC */
-+	u8 rowa[TP_CONTROLLER_ROW_LEN],
-+	   rowb[TP_CONTROLLER_ROW_LEN];
-+	const u8 junka = 0xAA,
-+		 junkb = 0x55; /* junk values for testing changes */
-+	int ret;
-+
-+	for (arg0 = MIN_DUMP_ARG0; arg0 <= MAX_DUMP_ARG0; ++arg0) {
-+		if ((p-buf) > PAGE_SIZE-TP_CONTROLLER_ROW_LEN*5)
-+			return -ENOMEM; /* don't overflow sysfs buf */
-+		/* Read raw twice with different junk values,
-+		 * to detect unused output bytes which are left unchaged: */
-+		ret = read_tp_ec_row(arg0, bat, junka, rowa);
-+		if (ret)
-+			return ret;
-+		ret = read_tp_ec_row(arg0, bat, junkb, rowb);
-+		if (ret)
-+			return ret;
-+		for (i = 0; i < TP_CONTROLLER_ROW_LEN; i++) {
-+			if (rowa[i] == junka && rowb[i] == junkb)
-+				p += sprintf(p, "-- "); /* unused by EC */
-+			else
-+				p += sprintf(p, "%02x ", rowa[i]);
-+		}
-+		p += sprintf(p, "\n");
-+	}
-+	return p-buf;
-+}
-+
-+
-+/*********************************************************************
-+ * sysfs attribute I/O, other than batteries
-+ */
-+
-+static ssize_t show_ac_connected(
-+	struct device *dev, struct device_attribute *attr, char *buf)
-+{
-+	int ret = power_device_present(0xFF);
-+	if (ret < 0)
-+		return ret;
-+	return sprintf(buf, "%d\n", ret);  /* type: boolean */
-+}
-+
-+/*********************************************************************
-+ * The the "smapi_request" sysfs attribute executes a raw SMAPI call.
-+ * You write to make a request and read to get the result. The state
-+ * is saved globally rather than per fd (sysfs limitation), so
-+ * simultaenous requests may get each other's results! So this is for
-+ * development and debugging only.
-+ */
-+#define MAX_SMAPI_ATTR_ANSWER_LEN   128
-+static char smapi_attr_answer[MAX_SMAPI_ATTR_ANSWER_LEN] = "";
-+
-+static ssize_t show_smapi_request(struct device *dev,
-+				  struct device_attribute *attr, char *buf)
-+{
-+	int ret = snprintf(buf, PAGE_SIZE, "%s", smapi_attr_answer);
-+	smapi_attr_answer[0] = '\0';
-+	return ret;
-+}
-+
-+static ssize_t store_smapi_request(struct device *dev,
-+				   struct device_attribute *attr,
-+				   const char *buf, size_t count)
-+{
-+	unsigned int inEBX, inECX, inEDI, inESI;
-+	u32 outEBX, outECX, outEDX, outEDI, outESI;
-+	const char *msg;
-+	int ret;
-+	if (sscanf(buf, "%x %x %x %x", &inEBX, &inECX, &inEDI, &inESI) != 4) {
-+		smapi_attr_answer[0] = '\0';
-+		return -EINVAL;
-+	}
-+	ret = smapi_request(
-+		   inEBX, inECX, inEDI, inESI,
-+		   &outEBX, &outECX, &outEDX, &outEDI, &outESI, &msg);
-+	snprintf(smapi_attr_answer, MAX_SMAPI_ATTR_ANSWER_LEN,
-+		 "%x %x %x %x %x %d '%s'\n",
-+		 (unsigned int)outEBX, (unsigned int)outECX,
-+		 (unsigned int)outEDX, (unsigned int)outEDI,
-+		 (unsigned int)outESI, ret, msg);
-+	if (ret)
-+		return ret;
-+	else
-+		return count;
-+}
-+
-+/*********************************************************************
-+ * Power management: the embedded controller forgets the battery
-+ * thresholds when the system is suspended to disk and unplugged from
-+ * AC and battery, so we restore it upon resume.
-+ */
-+
-+static int saved_threshs[4] = {-1, -1, -1, -1};  /* -1 = don't know */
-+
-+static int tp_suspend(struct platform_device *dev, pm_message_t state)
-+{
-+	int restore = (state.event == PM_EVENT_HIBERNATE ||
-+	               state.event == PM_EVENT_FREEZE);
-+	if (!restore || get_real_thresh(0, THRESH_STOP , &saved_threshs[0]))
-+		saved_threshs[0] = -1;
-+	if (!restore || get_real_thresh(0, THRESH_START, &saved_threshs[1]))
-+		saved_threshs[1] = -1;
-+	if (!restore || get_real_thresh(1, THRESH_STOP , &saved_threshs[2]))
-+		saved_threshs[2] = -1;
-+	if (!restore || get_real_thresh(1, THRESH_START, &saved_threshs[3]))
-+		saved_threshs[3] = -1;
-+	DPRINTK("suspend saved: %d %d %d %d", saved_threshs[0],
-+		saved_threshs[1], saved_threshs[2], saved_threshs[3]);
-+	return 0;
-+}
-+
-+static int tp_resume(struct platform_device *dev)
-+{
-+	DPRINTK("resume restoring: %d %d %d %d", saved_threshs[0],
-+		saved_threshs[1], saved_threshs[2], saved_threshs[3]);
-+	if (saved_threshs[0] >= 0)
-+		set_real_thresh(0, THRESH_STOP , saved_threshs[0]);
-+	if (saved_threshs[1] >= 0)
-+		set_real_thresh(0, THRESH_START, saved_threshs[1]);
-+	if (saved_threshs[2] >= 0)
-+		set_real_thresh(1, THRESH_STOP , saved_threshs[2]);
-+	if (saved_threshs[3] >= 0)
-+		set_real_thresh(1, THRESH_START, saved_threshs[3]);
-+	return 0;
-+}
-+
-+
-+/*********************************************************************
-+ * Driver model
-+ */
-+
-+static struct platform_driver tp_driver = {
-+	.suspend = tp_suspend,
-+	.resume = tp_resume,
-+	.driver = {
-+		.name = "smapi",
-+		.owner = THIS_MODULE
-+	},
-+};
-+
-+
-+/*********************************************************************
-+ * Sysfs device model
-+ */
-+
-+/* Attributes in /sys/devices/platform/smapi/ */
-+
-+static DEVICE_ATTR(ac_connected, 0444, show_ac_connected, NULL);
-+static DEVICE_ATTR(smapi_request, 0600, show_smapi_request,
-+					store_smapi_request);
-+
-+static struct attribute *tp_root_attributes[] = {
-+	&dev_attr_ac_connected.attr,
-+	&dev_attr_smapi_request.attr,
-+	NULL
-+};
-+static struct attribute_group tp_root_attribute_group = {
-+	.attrs = tp_root_attributes
-+};
-+
-+/* Attributes under /sys/devices/platform/smapi/BAT{0,1}/ :
-+ * Every attribute needs to be defined (i.e., statically allocated) for
-+ * each battery, and then referenced in the attribute list of each battery.
-+ * We use preprocessor voodoo to avoid duplicating the list of attributes 4
-+ * times. The preprocessor output is just normal sysfs attributes code.
-+ */
-+
-+/**
-+ * FOREACH_BAT_ATTR - invoke the given macros on all our battery attributes
-+ * @_BAT:     battery number (0 or 1)
-+ * @_ATTR_RW: macro to invoke for each read/write attribute
-+ * @_ATTR_R:  macro to invoke for each read-only  attribute
-+ */
-+#define FOREACH_BAT_ATTR(_BAT, _ATTR_RW, _ATTR_R) \
-+	_ATTR_RW(_BAT, start_charge_thresh) \
-+	_ATTR_RW(_BAT, stop_charge_thresh) \
-+	_ATTR_RW(_BAT, inhibit_charge_minutes) \
-+	_ATTR_RW(_BAT, force_discharge) \
-+	_ATTR_R(_BAT, installed) \
-+	_ATTR_R(_BAT, state) \
-+	_ATTR_R(_BAT, manufacturer) \
-+	_ATTR_R(_BAT, model) \
-+	_ATTR_R(_BAT, barcoding) \
-+	_ATTR_R(_BAT, chemistry) \
-+	_ATTR_R(_BAT, voltage) \
-+	_ATTR_R(_BAT, group0_voltage) \
-+	_ATTR_R(_BAT, group1_voltage) \
-+	_ATTR_R(_BAT, group2_voltage) \
-+	_ATTR_R(_BAT, group3_voltage) \
-+	_ATTR_R(_BAT, current_now) \
-+	_ATTR_R(_BAT, current_avg) \
-+	_ATTR_R(_BAT, charging_max_current) \
-+	_ATTR_R(_BAT, power_now) \
-+	_ATTR_R(_BAT, power_avg) \
-+	_ATTR_R(_BAT, remaining_percent) \
-+	_ATTR_R(_BAT, remaining_percent_error) \
-+	_ATTR_R(_BAT, remaining_charging_time) \
-+	_ATTR_R(_BAT, remaining_running_time) \
-+	_ATTR_R(_BAT, remaining_running_time_now) \
-+	_ATTR_R(_BAT, remaining_capacity) \
-+	_ATTR_R(_BAT, last_full_capacity) \
-+	_ATTR_R(_BAT, design_voltage) \
-+	_ATTR_R(_BAT, charging_max_voltage) \
-+	_ATTR_R(_BAT, design_capacity) \
-+	_ATTR_R(_BAT, cycle_count) \
-+	_ATTR_R(_BAT, temperature) \
-+	_ATTR_R(_BAT, serial) \
-+	_ATTR_R(_BAT, manufacture_date) \
-+	_ATTR_R(_BAT, first_use_date) \
-+	_ATTR_R(_BAT, dump)
-+
-+/* Define several macros we will feed into FOREACH_BAT_ATTR: */
-+
-+#define DEFINE_BAT_ATTR_RW(_BAT,_NAME) \
-+	static struct bat_device_attribute dev_attr_##_NAME##_##_BAT = {  \
-+		.dev_attr = __ATTR(_NAME, 0644, show_battery_##_NAME,   \
-+						store_battery_##_NAME), \
-+		.bat = _BAT \
-+	};
-+
-+#define DEFINE_BAT_ATTR_R(_BAT,_NAME) \
-+	static struct bat_device_attribute dev_attr_##_NAME##_##_BAT = {    \
-+		.dev_attr = __ATTR(_NAME, 0644, show_battery_##_NAME, 0), \
-+		.bat = _BAT \
-+	};
-+
-+#define REF_BAT_ATTR(_BAT,_NAME) \
-+	&dev_attr_##_NAME##_##_BAT.dev_attr.attr,
-+
-+/* This provide all attributes for one battery: */
-+
-+#define PROVIDE_BAT_ATTRS(_BAT) \
-+	FOREACH_BAT_ATTR(_BAT, DEFINE_BAT_ATTR_RW, DEFINE_BAT_ATTR_R) \
-+	static struct attribute *tp_bat##_BAT##_attributes[] = { \
-+		FOREACH_BAT_ATTR(_BAT, REF_BAT_ATTR, REF_BAT_ATTR) \
-+		NULL \
-+	}; \
-+	static struct attribute_group tp_bat##_BAT##_attribute_group = { \
-+		.name  = "BAT" #_BAT, \
-+		.attrs = tp_bat##_BAT##_attributes \
-+	};
-+
-+/* Finally genereate the attributes: */
-+
-+PROVIDE_BAT_ATTRS(0)
-+PROVIDE_BAT_ATTRS(1)
-+
-+/* List of attribute groups */
-+
-+static struct attribute_group *attr_groups[] = {
-+	&tp_root_attribute_group,
-+	&tp_bat0_attribute_group,
-+	&tp_bat1_attribute_group,
-+	NULL
-+};
-+
-+
-+/*********************************************************************
-+ * Init and cleanup
-+ */
-+
-+static struct attribute_group **next_attr_group; /* next to register */
-+
-+static int __init tp_init(void)
-+{
-+	int ret;
-+	printk(KERN_INFO "tp_smapi " TP_VERSION " loading...\n");
-+
-+	ret = find_smapi_port();
-+	if (ret < 0)
-+		goto err;
-+	else
-+		smapi_port = ret;
-+
-+	if (!request_region(smapi_port, 1, "smapi")) {
-+		printk(KERN_ERR "tp_smapi cannot claim port 0x%x\n",
-+		       smapi_port);
-+		ret = -ENXIO;
-+		goto err;
-+	}
-+
-+	if (!request_region(SMAPI_PORT2, 1, "smapi")) {
-+		printk(KERN_ERR "tp_smapi cannot claim port 0x%x\n",
-+		       SMAPI_PORT2);
-+		ret = -ENXIO;
-+		goto err_port1;
-+	}
-+
-+	ret = platform_driver_register(&tp_driver);
-+	if (ret)
-+		goto err_port2;
-+
-+	pdev = platform_device_alloc("smapi", -1);
-+	if (!pdev) {
-+		ret = -ENOMEM;
-+		goto err_driver;
-+	}
-+
-+	ret = platform_device_add(pdev);
-+	if (ret)
-+		goto err_device_free;
-+
-+	for (next_attr_group = attr_groups; *next_attr_group;
-+	     ++next_attr_group) {
-+		ret = sysfs_create_group(&pdev->dev.kobj, *next_attr_group);
-+		if (ret)
-+			goto err_attr;
-+	}
-+
-+	printk(KERN_INFO "tp_smapi successfully loaded (smapi_port=0x%x).\n",
-+	       smapi_port);
-+	return 0;
-+
-+err_attr:
-+	while (--next_attr_group >= attr_groups)
-+		sysfs_remove_group(&pdev->dev.kobj, *next_attr_group);
-+	platform_device_unregister(pdev);
-+err_device_free:
-+	platform_device_put(pdev);
-+err_driver:
-+	platform_driver_unregister(&tp_driver);
-+err_port2:
-+	release_region(SMAPI_PORT2, 1);
-+err_port1:
-+	release_region(smapi_port, 1);
-+err:
-+	printk(KERN_ERR "tp_smapi init failed (ret=%d)!\n", ret);
-+	return ret;
-+}
-+
-+static void __exit tp_exit(void)
-+{
-+	while (next_attr_group && --next_attr_group >= attr_groups)
-+		sysfs_remove_group(&pdev->dev.kobj, *next_attr_group);
-+	platform_device_unregister(pdev);
-+	platform_driver_unregister(&tp_driver);
-+	release_region(SMAPI_PORT2, 1);
-+	if (smapi_port)
-+		release_region(smapi_port, 1);
-+
-+	printk(KERN_INFO "tp_smapi unloaded.\n");
-+}
-+
-+module_init(tp_init);
-+module_exit(tp_exit);
-diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig
-index 0840d27381ea..73aba9a31064 100644
---- a/drivers/tty/Kconfig
-+++ b/drivers/tty/Kconfig
-@@ -75,6 +75,19 @@ config VT_CONSOLE_SLEEP
- 	def_bool y
- 	depends on VT_CONSOLE && PM_SLEEP
- 
-+config NR_TTY_DEVICES
-+        int "Maximum tty device number"
-+        depends on VT
-+        range 12 63
-+        default 63
-+        ---help---
-+          This option is used to change the number of tty devices in /dev.
-+          The default value is 63. The lowest number you can set is 12,
-+          63 is also the upper limit so we don't overrun the serial
-+          consoles.
-+
-+          If unsure, say 63.
-+
- config HW_CONSOLE
- 	bool
- 	depends on VT && !UML
-diff --git a/fs/exec.c b/fs/exec.c
-index 65eaacaba4f4..1d3b310bd5f0 100644
---- a/fs/exec.c
-+++ b/fs/exec.c
-@@ -63,6 +63,8 @@
- #include <linux/compat.h>
- #include <linux/vmalloc.h>
- 
-+#include <trace/events/fs.h>
-+
- #include <linux/uaccess.h>
- #include <asm/mmu_context.h>
- #include <asm/tlb.h>
-@@ -866,9 +868,12 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags)
- 	if (err)
- 		goto exit;
- 
--	if (name->name[0] != '\0')
-+	if (name->name[0] != '\0') {
- 		fsnotify_open(file);
- 
-+		trace_open_exec(name->name);
-+	}
-+
- out:
- 	return file;
- 
-diff --git a/fs/open.c b/fs/open.c
-index cb81623a8b09..a92b0f6061ac 100644
---- a/fs/open.c
-+++ b/fs/open.c
-@@ -34,6 +34,9 @@
- 
- #include "internal.h"
- 
-+#define CREATE_TRACE_POINTS
-+#include <trace/events/fs.h>
-+
- int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
- 	struct file *filp)
- {
-@@ -1068,6 +1071,7 @@ long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
- 		} else {
- 			fsnotify_open(f);
- 			fd_install(fd, f);
-+			trace_do_sys_open(tmp->name, flags, mode);
- 		}
- 	}
- 	putname(tmp);
-diff --git a/include/trace/events/fs.h b/include/trace/events/fs.h
-new file mode 100644
-index 000000000000..fb634b74adf3
---- /dev/null
-+++ b/include/trace/events/fs.h
-@@ -0,0 +1,53 @@
-+#undef TRACE_SYSTEM
-+#define TRACE_SYSTEM fs
-+
-+#if !defined(_TRACE_FS_H) || defined(TRACE_HEADER_MULTI_READ)
-+#define _TRACE_FS_H
-+
-+#include <linux/fs.h>
-+#include <linux/tracepoint.h>
-+
-+TRACE_EVENT(do_sys_open,
-+
-+	TP_PROTO(const char *filename, int flags, int mode),
-+
-+	TP_ARGS(filename, flags, mode),
-+
-+	TP_STRUCT__entry(
-+		__string(	filename, filename		)
-+		__field(	int, flags			)
-+		__field(	int, mode			)
-+	),
-+
-+	TP_fast_assign(
-+		__assign_str(filename, filename);
-+		__entry->flags = flags;
-+		__entry->mode = mode;
-+	),
-+
-+	TP_printk("\"%s\" %x %o",
-+		  __get_str(filename), __entry->flags, __entry->mode)
-+);
-+
-+TRACE_EVENT(open_exec,
-+
-+	TP_PROTO(const char *filename),
-+
-+	TP_ARGS(filename),
-+
-+	TP_STRUCT__entry(
-+		__string(	filename, filename		)
-+	),
-+
-+	TP_fast_assign(
-+		__assign_str(filename, filename);
-+	),
-+
-+	TP_printk("\"%s\"",
-+		  __get_str(filename))
-+);
-+
-+#endif /* _TRACE_FS_H */
-+
-+/* This part must be outside protection */
-+#include <trace/define_trace.h>
-diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
-index 79226ca8f80f..2a30060e7e1d 100644
---- a/include/linux/blkdev.h
-+++ b/include/linux/blkdev.h
-@@ -47,7 +47,11 @@ struct blk_queue_stats;
- struct blk_stat_callback;
- 
- #define BLKDEV_MIN_RQ	4
-+#ifdef CONFIG_ZENIFY
-+#define BLKDEV_MAX_RQ	512
-+#else
- #define BLKDEV_MAX_RQ	128	/* Default maximum */
-+#endif
- 
- /* Must be consistent with blk_mq_poll_stats_bkt() */
- #define BLK_MQ_POLL_STATS_BKTS 16
-diff --git a/include/linux/thinkpad_ec.h b/include/linux/thinkpad_ec.h
-new file mode 100644
-index 000000000000..1b80d7ee5493
---- /dev/null
-+++ b/include/linux/thinkpad_ec.h
-@@ -0,0 +1,47 @@
-+/*
-+ *  thinkpad_ec.h - interface to ThinkPad embedded controller LPC3 functions
-+ *
-+ *  Copyright (C) 2005 Shem Multinymous <multinymous@gmail.com>
-+ *
-+ *  This program is free software; you can redistribute it and/or modify
-+ *  it under the terms of the GNU General Public License as published by
-+ *  the Free Software Foundation; either version 2 of the License, or
-+ *  (at your option) any later version.
-+ *
-+ *  This program is distributed in the hope that it will be useful,
-+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-+ *  GNU General Public License for more details.
-+ *
-+ *  You should have received a copy of the GNU General Public License
-+ *  along with this program; if not, write to the Free Software
-+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-+ */
-+
-+#ifndef _THINKPAD_EC_H
-+#define _THINKPAD_EC_H
-+
-+#ifdef __KERNEL__
-+
-+#define TP_CONTROLLER_ROW_LEN 16
-+
-+/* EC transactions input and output (possibly partial) vectors of 16 bytes. */
-+struct thinkpad_ec_row {
-+	u16 mask; /* bitmap of which entries of val[] are meaningful */
-+	u8 val[TP_CONTROLLER_ROW_LEN];
-+};
-+
-+extern int __must_check thinkpad_ec_lock(void);
-+extern int __must_check thinkpad_ec_try_lock(void);
-+extern void thinkpad_ec_unlock(void);
-+
-+extern int thinkpad_ec_read_row(const struct thinkpad_ec_row *args,
-+				struct thinkpad_ec_row *data);
-+extern int thinkpad_ec_try_read_row(const struct thinkpad_ec_row *args,
-+				    struct thinkpad_ec_row *mask);
-+extern int thinkpad_ec_prefetch_row(const struct thinkpad_ec_row *args);
-+extern void thinkpad_ec_invalidate(void);
-+
-+
-+#endif /* __KERNEL */
-+#endif /* _THINKPAD_EC_H */
-diff --git a/include/uapi/linux/vt.h b/include/uapi/linux/vt.h
-index e9d39c48520a..3bceead8da40 100644
---- a/include/uapi/linux/vt.h
-+++ b/include/uapi/linux/vt.h
-@@ -3,12 +3,25 @@
- #define _UAPI_LINUX_VT_H
- 
- 
-+/*
-+ * We will make this definition solely for the purpose of making packages
-+ * such as splashutils build, because they can not understand that
-+ * NR_TTY_DEVICES is defined in the kernel configuration.
-+ */
-+#ifndef CONFIG_NR_TTY_DEVICES
-+#define CONFIG_NR_TTY_DEVICES 63
-+#endif
-+
- /*
-  * These constants are also useful for user-level apps (e.g., VC
-  * resizing).
-  */
- #define MIN_NR_CONSOLES 1       /* must be at least 1 */
--#define MAX_NR_CONSOLES	63	/* serial lines start at 64 */
-+/*
-+ * NR_TTY_DEVICES:
-+ * Value MUST be at least 12 and must never be higher then 63
-+ */
-+#define MAX_NR_CONSOLES CONFIG_NR_TTY_DEVICES	/* serial lines start above this */
- 		/* Note: the ioctl VT_GETSTATE does not work for
- 		   consoles 16 and higher (since it returns a short) */
- 
-diff --git a/init/Kconfig b/init/Kconfig
-index 041f3a022122..5ed70eb1ad3a 100644
---- a/init/Kconfig
-+++ b/init/Kconfig
-@@ -45,6 +45,38 @@ config THREAD_INFO_IN_TASK
- 
- menu "General setup"
- 
-+config ZENIFY
-+	bool "A selection of patches from Zen/Liquorix kernel and additional tweaks for a better gaming experience"
-+	default y
-+	help
-+	  Tunes the kernel for responsiveness at the cost of throughput and power usage.
-+
-+	  --- Virtual Memory Subsystem ---------------------------
-+
-+	    Mem dirty before bg writeback..:  10 %  ->  20 %
-+	    Mem dirty before sync writeback:  20 %  ->  50 %
-+
-+	  --- Block Layer ----------------------------------------
-+
-+	    Queue depth...............:      128    -> 512
-+	    Default MQ scheduler......: mq-deadline -> bfq
-+
-+	  --- CFS CPU Scheduler ----------------------------------
-+
-+	    Scheduling latency.............:   6    ->   3    ms
-+	    Minimal granularity............:   0.75 ->   0.3  ms
-+	    Wakeup granularity.............:   1    ->   0.5  ms
-+	    CPU migration cost.............:   0.5  ->   0.25 ms
-+	    Bandwidth slice size...........:   5    ->   3    ms
-+	    Ondemand fine upscaling limit..:  95 %  ->  85 %
-+
-+	  --- MuQSS CPU Scheduler --------------------------------
-+
-+	    Scheduling interval............:   6    ->   3    ms
-+	    ISO task max realtime use......:  70 %  ->  25 %
-+	    Ondemand coarse upscaling limit:  80 %  ->  45 %
-+	    Ondemand fine upscaling limit..:  95 %  ->  45 %
-+
- config BROKEN
- 	bool
- 
-@@ -1026,6 +1058,13 @@ config CC_OPTIMIZE_FOR_PERFORMANCE
- 	  with the "-O2" compiler flag for best performance and most
- 	  helpful compile-time warnings.
- 
-+config CC_OPTIMIZE_HARDER
-+	bool "Optimize harder"
-+	help
-+	  This option will pass "-O3" to your compiler resulting in a
-+	  larger and faster kernel. The more complex optimizations also
-+	  increase compilation time and may affect stability.
-+
- config CC_OPTIMIZE_FOR_SIZE
- 	bool "Optimize for size"
- 	help
-diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
-index 2f0a0be4d344..bada807c7e59 100644
---- a/kernel/sched/fair.c
-+++ b/kernel/sched/fair.c
-@@ -37,8 +37,13 @@
-  *
-  * (default: 6ms * (1 + ilog(ncpus)), units: nanoseconds)
-  */
-+#ifdef CONFIG_ZENIFY
-+unsigned int sysctl_sched_latency			= 3000000ULL;
-+static unsigned int normalized_sysctl_sched_latency	= 3000000ULL;
-+#else
- unsigned int sysctl_sched_latency			= 6000000ULL;
- static unsigned int normalized_sysctl_sched_latency	= 6000000ULL;
-+#endif
- 
- /*
-  * The initial- and re-scaling of tunables is configurable
-@@ -58,13 +63,22 @@ enum sched_tunable_scaling sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_L
-  *
-  * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
-  */
-+#ifdef CONFIG_ZENIFY
-+unsigned int sysctl_sched_min_granularity			= 300000ULL;
-+static unsigned int normalized_sysctl_sched_min_granularity	= 300000ULL;
-+#else
- unsigned int sysctl_sched_min_granularity			= 750000ULL;
- static unsigned int normalized_sysctl_sched_min_granularity	= 750000ULL;
-+#endif
- 
- /*
-  * This value is kept at sysctl_sched_latency/sysctl_sched_min_granularity
-  */
-+#ifdef CONFIG_ZENIFY
-+static unsigned int sched_nr_latency = 10;
-+#else
- static unsigned int sched_nr_latency = 8;
-+#endif
- 
- /*
-  * After fork, child runs first. If set to 0 (default) then
-@@ -81,10 +95,17 @@ unsigned int sysctl_sched_child_runs_first __read_mostly;
-  *
-  * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds)
-  */
-+#ifdef CONFIG_ZENIFY
-+unsigned int sysctl_sched_wakeup_granularity			= 500000UL;
-+static unsigned int normalized_sysctl_sched_wakeup_granularity	= 500000UL;
-+
-+const_debug unsigned int sysctl_sched_migration_cost	= 50000UL;
-+#else
- unsigned int sysctl_sched_wakeup_granularity			= 1000000UL;
- static unsigned int normalized_sysctl_sched_wakeup_granularity	= 1000000UL;
- 
- const_debug unsigned int sysctl_sched_migration_cost	= 500000UL;
-+#endif
- 
- #ifdef CONFIG_SMP
- /*
-@@ -107,8 +128,12 @@ int __weak arch_asym_cpu_priority(int cpu)
-  *
-  * (default: 5 msec, units: microseconds)
-  */
-+#ifdef CONFIG_ZENIFY
-+unsigned int sysctl_sched_cfs_bandwidth_slice		= 3000UL;
-+#else
- unsigned int sysctl_sched_cfs_bandwidth_slice		= 5000UL;
- #endif
-+#endif
- 
- /*
-  * The margin used when comparing utilization with CPU capacity:
-diff --git a/mm/page-writeback.c b/mm/page-writeback.c
-index 337c6afb3345..9315e358f292 100644
---- a/mm/page-writeback.c
-+++ b/mm/page-writeback.c
-@@ -71,7 +71,11 @@ static long ratelimit_pages = 32;
- /*
-  * Start background writeback (via writeback threads) at this percentage
-  */
-+#ifdef CONFIG_ZENIFY
-+int dirty_background_ratio = 20;
-+#else
- int dirty_background_ratio = 10;
-+#endif
- 
- /*
-  * dirty_background_bytes starts at 0 (disabled) so that it is a function of
-@@ -88,7 +92,11 @@ int vm_highmem_is_dirtyable;
- /*
-  * The generator of dirty data starts writeback at this percentage
-  */
-+#ifdef CONFIG_ZENIFY
-+int vm_dirty_ratio = 50;
-+#else
- int vm_dirty_ratio = 20;
-+#endif
- 
- /*
-  * vm_dirty_bytes starts at 0 (disabled) so that it is a function of
-diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
-index 80dad301361d..42b7fa7d01f8 100644
---- a/net/ipv4/Kconfig
-+++ b/net/ipv4/Kconfig
-@@ -702,6 +702,9 @@ choice
- 	config DEFAULT_VEGAS
- 		bool "Vegas" if TCP_CONG_VEGAS=y
- 
-+	config DEFAULT_YEAH
-+		bool "YeAH" if TCP_CONG_YEAH=y
-+
- 	config DEFAULT_VENO
- 		bool "Veno" if TCP_CONG_VENO=y
- 
-@@ -735,6 +738,7 @@ config DEFAULT_TCP_CONG
- 	default "htcp" if DEFAULT_HTCP
- 	default "hybla" if DEFAULT_HYBLA
- 	default "vegas" if DEFAULT_VEGAS
-+	default "yeah" if DEFAULT_YEAH
- 	default "westwood" if DEFAULT_WESTWOOD
- 	default "veno" if DEFAULT_VENO
- 	default "reno" if DEFAULT_RENO
-
-From: Nick Desaulniers <ndesaulniers@google.com>
-Date: Mon, 24 Dec 2018 13:37:41 +0200
-Subject: include/linux/compiler*.h: define asm_volatile_goto
-
-asm_volatile_goto should also be defined for other compilers that
-support asm goto.
-
-Fixes commit 815f0dd ("include/linux/compiler*.h: make compiler-*.h
-mutually exclusive").
-
-Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
-Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
-
-diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
-index ba814f1..e77eeb0 100644
---- a/include/linux/compiler_types.h
-+++ b/include/linux/compiler_types.h
-@@ -188,6 +188,10 @@ struct ftrace_likely_data {
- #define asm_volatile_goto(x...) asm goto(x)
- #endif
- 
-+#ifndef asm_volatile_goto
-+#define asm_volatile_goto(x...) asm goto(x)
-+#endif
-+
- /* Are two types/vars the same type (ignoring qualifiers)? */
- #define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
- 
-From: Andy Lavr <andy.lavr@gmail.com>
-Date: Mon, 24 Dec 2018 14:57:47 +0200
-Subject: avl: Use [defer+madvise] as default khugepaged defrag strategy
-
-For some reason, the default strategy to respond to THP fault fallbacks
-is still just madvise, meaning stall if the program wants transparent
-hugepages, but don't trigger a background reclaim / compaction if THP
-begins to fail allocations.  This creates a snowball affect where we
-still use the THP code paths, but we almost always fail once a system
-has been active and busy for a while.
-
-The option "defer" was created for interactive systems where THP can
-still improve performance.  If we have to fallback to a regular page due
-to an allocation failure or anything else, we will trigger a background
-reclaim and compaction so future THP attempts succeed and previous
-attempts eventually have their smaller pages combined without stalling
-running applications.
-
-We still want madvise to stall applications that explicitely want THP,
-so defer+madvise _does_ make a ton of sense.  Make it the default for
-interactive systems, especially if the kernel maintainer left
-transparent hugepages on "always".
-
-Reasoning and details in the original patch:
-https://lwn.net/Articles/711248/
-
-Signed-off-by: Andy Lavr <andy.lavr@gmail.com>
-
-diff --git a/mm/huge_memory.c b/mm/huge_memory.c
-index e84a10b..21d62b7 100644
---- a/mm/huge_memory.c
-+++ b/mm/huge_memory.c
-@@ -53,7 +53,11 @@ unsigned long transparent_hugepage_flags __read_mostly =
- #ifdef CONFIG_TRANSPARENT_HUGEPAGE_MADVISE
- 	(1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)|
- #endif
-+#ifdef CONFIG_AVL_INTERACTIVE
-+	(1<<TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG)|
-+#else
- 	(1<<TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG)|
-+#endif
- 	(1<<TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG)|
- 	(1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);
- 
-diff --git a/net/sched/Kconfig b/net/sched/Kconfig
---- a/net/sched/Kconfig
-+++ b/net/sched/Kconfig
-@@ -429,6 +429,9 @@
-	  Select the queueing discipline that will be used by default
-	  for all network devices.
- 
-+	config DEFAULT_CAKE
-+ 		bool "Common Applications Kept Enhanced" if NET_SCH_CAKE
-+
-	config DEFAULT_FQ
-		bool "Fair Queue" if NET_SCH_FQ
- 
-@@ -448,6 +451,7 @@
- config DEFAULT_NET_SCH
-	string
-	default "pfifo_fast" if DEFAULT_PFIFO_FAST
-+	default "cake" if DEFAULT_CAKE
-	default "fq" if DEFAULT_FQ
-	default "fq_codel" if DEFAULT_FQ_CODEL
-	default "sfq" if DEFAULT_SFQ
-
-diff --git a/mm/page_alloc.c b/mm/page_alloc.c
-index a29043ea9..3fb219747 100644
---- a/mm/page_alloc.c
-+++ b/mm/page_alloc.c
-@@ -263,7 +263,7 @@ compound_page_dtor * const compound_page_dtors[] = {
- #else
- int watermark_boost_factor __read_mostly = 15000;
- #endif
--int watermark_scale_factor = 10;
-+int watermark_scale_factor = 200;
- 
- static unsigned long nr_kernel_pages __initdata;
- static unsigned long nr_all_pages __initdata;
- 
-diff --git a/include/linux/mm.h b/include/linux/mm.h
-index 80bb6408f..6c8b55cd1 100644
---- a/include/linux/mm.h
-+++ b/include/linux/mm.h
-@@ -146,8 +146,7 @@ extern int mmap_rnd_compat_bits __read_mostly;
-  * not a hard limit any more. Although some userspace tools can be surprised by
-  * that.
-  */
--#define MAPCOUNT_ELF_CORE_MARGIN	(5)
--#define DEFAULT_MAX_MAP_COUNT	(USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
-+#define DEFAULT_MAX_MAP_COUNT	(262144)
- 
- extern int sysctl_max_map_count;
- 
-diff --git a/drivers/net/ethernet/intel/e1000e/defines.h b/drivers/net/ethernet/intel/e1000e/defines.h
-index fd550dee4982..63c3c79380a1 100644
---- a/drivers/net/ethernet/intel/e1000e/defines.h
-+++ b/drivers/net/ethernet/intel/e1000e/defines.h
-@@ -222,9 +222,6 @@
- #define E1000_STATUS_PHYRA      0x00000400      /* PHY Reset Asserted */
- #define E1000_STATUS_GIO_MASTER_ENABLE	0x00080000	/* Master Req status */
- 
--/* PCIm function state */
--#define E1000_STATUS_PCIM_STATE	0x40000000
--
- #define HALF_DUPLEX 1
- #define FULL_DUPLEX 2
- 
-diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
-index b5fed6177ad6..e4baa13b3cda 100644
---- a/drivers/net/ethernet/intel/e1000e/netdev.c
-+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
-@@ -5161,9 +5161,8 @@ static void e1000_watchdog_task(struct work_struct *work)
- 	struct e1000_mac_info *mac = &adapter->hw.mac;
- 	struct e1000_phy_info *phy = &adapter->hw.phy;
- 	struct e1000_ring *tx_ring = adapter->tx_ring;
--	u32 dmoff_exit_timeout = 100, tries = 0;
- 	struct e1000_hw *hw = &adapter->hw;
-+	u32 link, tctl;
--	u32 link, tctl, pcim_state;
- 
- 	if (test_bit(__E1000_DOWN, &adapter->state))
- 		return;
-@@ -5188,21 +5187,6 @@ static void e1000_watchdog_task(struct work_struct *work)
- 			/* Cancel scheduled suspend requests. */
- 			pm_runtime_resume(netdev->dev.parent);
- 
--			/* Checking if MAC is in DMoff state*/
--			pcim_state = er32(STATUS);
--			while (pcim_state & E1000_STATUS_PCIM_STATE) {
--				if (tries++ == dmoff_exit_timeout) {
--					e_dbg("Error in exiting dmoff\n");
--					break;
--				}
--				usleep_range(10000, 20000);
--				pcim_state = er32(STATUS);
--
--				/* Checking if MAC exited DMoff state */
--				if (!(pcim_state & E1000_STATUS_PCIM_STATE))
--					e1000_phy_hw_reset(&adapter->hw);
--			}
--
- 			/* update snapshot of PHY registers on LSC */
- 			e1000_phy_read_status(adapter);
- 			mac->ops.get_link_up_info(&adapter->hw,
-From adb1f9df27f08e6488bcd80b1607987c6114a77a Mon Sep 17 00:00:00 2001
-From: Alexandre Frade <admfrade@gmail.com>
-Date: Mon, 25 Nov 2019 15:13:06 -0300
-Subject: [PATCH] elevator: set default scheduler to bfq for blk-mq
-
-Signed-off-by: Alexandre Frade <admfrade@gmail.com>
----
- block/elevator.c | 6 +++---
- 1 file changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/block/elevator.c b/block/elevator.c
-index 076ba7308e65..81f89095aa77 100644
---- a/block/elevator.c
-+++ b/block/elevator.c
-@@ -623,15 +623,15 @@ static inline bool elv_support_iosched(struct request_queue *q)
- }
- 
- /*
-- * For single queue devices, default to using mq-deadline. If we have multiple
-- * queues or mq-deadline is not available, default to "none".
-+ * For single queue devices, default to using bfq. If we have multiple
-+ * queues or bfq is not available, default to "none".
-  */
- static struct elevator_type *elevator_get_default(struct request_queue *q)
- {
- 	if (q->nr_hw_queues != 1)
- 		return NULL;
- 
--	return elevator_get(q, "mq-deadline", false);
-+	return elevator_get(q, "bfq", false);
- }
- 
- /*
-From c3ec05777c46e19a8a26d0fc4ca0c0db8a19de97 Mon Sep 17 00:00:00 2001
-From: Alexandre Frade <admfrade@gmail.com>
-Date: Fri, 10 May 2019 16:45:59 -0300
-Subject: [PATCH] block: set rq_affinity = 2 for full multithreading I/O
- requests
-
-Signed-off-by: Alexandre Frade <admfrade@gmail.com>
----
- include/linux/blkdev.h | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
-index f3ea78b0c91c..4dbacc6b073b 100644
---- a/include/linux/blkdev.h
-+++ b/include/linux/blkdev.h
-@@ -621,7 +621,8 @@ struct request_queue {
- #define QUEUE_FLAG_RQ_ALLOC_TIME 27	/* record rq->alloc_time_ns */
- 
- #define QUEUE_FLAG_MQ_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
--				 (1 << QUEUE_FLAG_SAME_COMP))
-+				 (1 << QUEUE_FLAG_SAME_COMP)	|	\
-+				 (1 << QUEUE_FLAG_SAME_FORCE))
- 
- void blk_queue_flag_set(unsigned int flag, struct request_queue *q);
- void blk_queue_flag_clear(unsigned int flag, struct request_queue *q);
-From 8171d33d0b84a953649863538fdbe4c26c035e4f Mon Sep 17 00:00:00 2001
-From: Alexandre Frade <admfrade@gmail.com>
-Date: Fri, 10 May 2019 14:32:50 -0300
-Subject: [PATCH] mm: set 2 megabytes for address_space-level file read-ahead
- pages size
-
-Signed-off-by: Alexandre Frade <admfrade@gmail.com>
----
- include/linux/mm.h | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/include/linux/mm.h b/include/linux/mm.h
-index a2adf95b3f9c..e804d9f7583a 100644
---- a/include/linux/mm.h
-+++ b/include/linux/mm.h
-@@ -2416,7 +2416,7 @@ int __must_check write_one_page(struct page *page);
- void task_dirty_inc(struct task_struct *tsk);
- 
- /* readahead.c */
--#define VM_READAHEAD_PAGES	(SZ_128K / PAGE_SIZE)
-+#define VM_READAHEAD_PAGES	(SZ_2M / PAGE_SIZE)
- 
- int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
- 			pgoff_t offset, unsigned long nr_to_read);
diff --git a/linux54-tkg/linux54-tkg-patches/0003-glitched-cfs.patch b/linux54-tkg/linux54-tkg-patches/0003-glitched-cfs.patch
deleted file mode 100644
index 06b7f02..0000000
--- a/linux54-tkg/linux54-tkg-patches/0003-glitched-cfs.patch
+++ /dev/null
@@ -1,72 +0,0 @@
-diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
-index 2a202a846757..1d9c7ed79b11 100644
---- a/kernel/Kconfig.hz
-+++ b/kernel/Kconfig.hz
-@@ -4,7 +4,7 @@
- 
- choice
- 	prompt "Timer frequency"
--	default HZ_250
-+	default HZ_500
- 	help
- 	 Allows the configuration of the timer frequency. It is customary
- 	 to have the timer interrupt run at 1000 Hz but 100 Hz may be more
-@@ -39,6 +39,13 @@ choice
- 	 on SMP and NUMA systems and exactly dividing by both PAL and
- 	 NTSC frame rates for video and multimedia work.
- 
-+	config HZ_500
-+		bool "500 HZ"
-+	help
-+	 500 Hz is a balanced timer frequency. Provides fast interactivity
-+	 on desktops with great smoothness without increasing CPU power
-+	 consumption and sacrificing the battery life on laptops.
-+
- 	config HZ_1000
- 		bool "1000 HZ"
- 	help
-@@ -52,6 +59,7 @@ config HZ
- 	default 100 if HZ_100
- 	default 250 if HZ_250
- 	default 300 if HZ_300
-+	default 500 if HZ_500
- 	default 1000 if HZ_1000
- 
- config SCHED_HRTICK
-
-diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
-index 2a202a846757..1d9c7ed79b11 100644
---- a/kernel/Kconfig.hz
-+++ b/kernel/Kconfig.hz
-@@ -4,7 +4,7 @@
- 
- choice
- 	prompt "Timer frequency"
--	default HZ_500
-+	default HZ_750
- 	help
- 	 Allows the configuration of the timer frequency. It is customary
- 	 to have the timer interrupt run at 1000 Hz but 100 Hz may be more
-@@ -46,6 +46,13 @@ choice
- 	 on desktops with great smoothness without increasing CPU power
- 	 consumption and sacrificing the battery life on laptops.
- 
-+	config HZ_750
-+		bool "750 HZ"
-+	help
-+	 750 Hz is a good timer frequency for desktops. Provides fast
-+	 interactivity with great smoothness without sacrificing too
-+	 much throughput.
-+
- 	config HZ_1000
- 		bool "1000 HZ"
- 	help
-@@ -60,6 +67,7 @@ config HZ
- 	default 250 if HZ_250
- 	default 300 if HZ_300
- 	default 500 if HZ_500
-+	default 750 if HZ_750
- 	default 1000 if HZ_1000
- 
- config SCHED_HRTICK
-
diff --git a/linux54-tkg/linux54-tkg-patches/0004-5.4-ck1.patch b/linux54-tkg/linux54-tkg-patches/0004-5.4-ck1.patch
deleted file mode 100644
index f3fbde8..0000000
--- a/linux54-tkg/linux54-tkg-patches/0004-5.4-ck1.patch
+++ /dev/null
@@ -1,17684 +0,0 @@
-diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
-index 8dee8f68fe15..e56fb275f607 100644
---- a/Documentation/admin-guide/kernel-parameters.txt
-+++ b/Documentation/admin-guide/kernel-parameters.txt
-@@ -4277,6 +4277,14 @@
- 			Memory area to be used by remote processor image,
- 			managed by CMA.
- 
-+	rqshare=	[X86] Select the MuQSS scheduler runqueue sharing type.
-+			Format: <string>
-+			smt -- Share SMT (hyperthread) sibling runqueues
-+			mc -- Share MC (multicore) sibling runqueues
-+			smp -- Share SMP runqueues
-+			none -- So not share any runqueues
-+			Default value is mc
-+
- 	rw		[KNL] Mount root device read-write on boot
- 
- 	S		[KNL] Run init in single mode
-diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
-index 032c7cd3cede..ff41dfacb34b 100644
---- a/Documentation/admin-guide/sysctl/kernel.rst
-+++ b/Documentation/admin-guide/sysctl/kernel.rst
-@@ -46,6 +46,7 @@ show up in /proc/sys/kernel:
- - hung_task_check_interval_secs
- - hung_task_warnings
- - hyperv_record_panic_msg
-+- iso_cpu
- - kexec_load_disabled
- - kptr_restrict
- - l2cr                        [ PPC only ]
-@@ -82,6 +83,7 @@ show up in /proc/sys/kernel:
- - randomize_va_space
- - real-root-dev               ==> Documentation/admin-guide/initrd.rst
- - reboot-cmd                  [ SPARC only ]
-+- rr_interval
- - rtsig-max
- - rtsig-nr
- - sched_energy_aware
-@@ -105,6 +107,7 @@ show up in /proc/sys/kernel:
- - unknown_nmi_panic
- - watchdog
- - watchdog_thresh
-+- yield_type
- - version
- 
- 
-@@ -438,6 +441,16 @@ When kptr_restrict is set to (2), kernel pointers printed using
- %pK will be replaced with 0's regardless of privileges.
- 
- 
-+iso_cpu: (MuQSS CPU scheduler only)
-+===================================
-+
-+This sets the percentage cpu that the unprivileged SCHED_ISO tasks can
-+run effectively at realtime priority, averaged over a rolling five
-+seconds over the -whole- system, meaning all cpus.
-+
-+Set to 70 (percent) by default.
-+
-+
- l2cr: (PPC only)
- ================
- 
-@@ -905,6 +918,20 @@ ROM/Flash boot loader. Maybe to tell it what to do after
- rebooting. ???
- 
- 
-+rr_interval: (MuQSS CPU scheduler only)
-+=======================================
-+
-+This is the smallest duration that any cpu process scheduling unit
-+will run for. Increasing this value can increase throughput of cpu
-+bound tasks substantially but at the expense of increased latencies
-+overall. Conversely decreasing it will decrease average and maximum
-+latencies but at the expense of throughput. This value is in
-+milliseconds and the default value chosen depends on the number of
-+cpus available at scheduler initialisation with a minimum of 6.
-+
-+Valid values are from 1-1000.
-+
-+
- rtsig-max & rtsig-nr:
- =====================
- 
-@@ -1175,3 +1202,13 @@ is 10 seconds.
- 
- The softlockup threshold is (2 * watchdog_thresh). Setting this
- tunable to zero will disable lockup detection altogether.
-+
-+
-+yield_type: (MuQSS CPU scheduler only)
-+======================================
-+
-+This determines what type of yield calls to sched_yield will perform.
-+
-+ 0: No yield.
-+ 1: Yield only to better priority/deadline tasks. (default)
-+ 2: Expire timeslice and recalculate deadline.
-diff --git a/Documentation/scheduler/sched-BFS.txt b/Documentation/scheduler/sched-BFS.txt
-new file mode 100644
-index 000000000000..c0282002a079
---- /dev/null
-+++ b/Documentation/scheduler/sched-BFS.txt
-@@ -0,0 +1,351 @@
-+BFS - The Brain Fuck Scheduler by Con Kolivas.
-+
-+Goals.
-+
-+The goal of the Brain Fuck Scheduler, referred to as BFS from here on, is to
-+completely do away with the complex designs of the past for the cpu process
-+scheduler and instead implement one that is very simple in basic design.
-+The main focus of BFS is to achieve excellent desktop interactivity and
-+responsiveness without heuristics and tuning knobs that are difficult to
-+understand, impossible to model and predict the effect of, and when tuned to
-+one workload cause massive detriment to another.
-+
-+
-+Design summary.
-+
-+BFS is best described as a single runqueue, O(n) lookup, earliest effective
-+virtual deadline first design, loosely based on EEVDF (earliest eligible virtual
-+deadline first) and my previous Staircase Deadline scheduler. Each component
-+shall be described in order to understand the significance of, and reasoning for
-+it. The codebase when the first stable version was released was approximately
-+9000 lines less code than the existing mainline linux kernel scheduler (in
-+2.6.31). This does not even take into account the removal of documentation and
-+the cgroups code that is not used.
-+
-+Design reasoning.
-+
-+The single runqueue refers to the queued but not running processes for the
-+entire system, regardless of the number of CPUs. The reason for going back to
-+a single runqueue design is that once multiple runqueues are introduced,
-+per-CPU or otherwise, there will be complex interactions as each runqueue will
-+be responsible for the scheduling latency and fairness of the tasks only on its
-+own runqueue, and to achieve fairness and low latency across multiple CPUs, any
-+advantage in throughput of having CPU local tasks causes other disadvantages.
-+This is due to requiring a very complex balancing system to at best achieve some
-+semblance of fairness across CPUs and can only maintain relatively low latency
-+for tasks bound to the same CPUs, not across them. To increase said fairness
-+and latency across CPUs, the advantage of local runqueue locking, which makes
-+for better scalability, is lost due to having to grab multiple locks.
-+
-+A significant feature of BFS is that all accounting is done purely based on CPU
-+used and nowhere is sleep time used in any way to determine entitlement or
-+interactivity. Interactivity "estimators" that use some kind of sleep/run
-+algorithm are doomed to fail to detect all interactive tasks, and to falsely tag
-+tasks that aren't interactive as being so. The reason for this is that it is
-+close to impossible to determine that when a task is sleeping, whether it is
-+doing it voluntarily, as in a userspace application waiting for input in the
-+form of a mouse click or otherwise, or involuntarily, because it is waiting for
-+another thread, process, I/O, kernel activity or whatever. Thus, such an
-+estimator will introduce corner cases, and more heuristics will be required to
-+cope with those corner cases, introducing more corner cases and failed
-+interactivity detection and so on. Interactivity in BFS is built into the design
-+by virtue of the fact that tasks that are waking up have not used up their quota
-+of CPU time, and have earlier effective deadlines, thereby making it very likely
-+they will preempt any CPU bound task of equivalent nice level. See below for
-+more information on the virtual deadline mechanism. Even if they do not preempt
-+a running task, because the rr interval is guaranteed to have a bound upper
-+limit on how long a task will wait for, it will be scheduled within a timeframe
-+that will not cause visible interface jitter.
-+
-+
-+Design details.
-+
-+Task insertion.
-+
-+BFS inserts tasks into each relevant queue as an O(1) insertion into a double
-+linked list. On insertion, *every* running queue is checked to see if the newly
-+queued task can run on any idle queue, or preempt the lowest running task on the
-+system. This is how the cross-CPU scheduling of BFS achieves significantly lower
-+latency per extra CPU the system has. In this case the lookup is, in the worst
-+case scenario, O(n) where n is the number of CPUs on the system.
-+
-+Data protection.
-+
-+BFS has one single lock protecting the process local data of every task in the
-+global queue. Thus every insertion, removal and modification of task data in the
-+global runqueue needs to grab the global lock. However, once a task is taken by
-+a CPU, the CPU has its own local data copy of the running process' accounting
-+information which only that CPU accesses and modifies (such as during a
-+timer tick) thus allowing the accounting data to be updated lockless. Once a
-+CPU has taken a task to run, it removes it from the global queue. Thus the
-+global queue only ever has, at most,
-+
-+	(number of tasks requesting cpu time) - (number of logical CPUs) + 1
-+
-+tasks in the global queue. This value is relevant for the time taken to look up
-+tasks during scheduling. This will increase if many tasks with CPU affinity set
-+in their policy to limit which CPUs they're allowed to run on if they outnumber
-+the number of CPUs. The +1 is because when rescheduling a task, the CPU's
-+currently running task is put back on the queue. Lookup will be described after
-+the virtual deadline mechanism is explained.
-+
-+Virtual deadline.
-+
-+The key to achieving low latency, scheduling fairness, and "nice level"
-+distribution in BFS is entirely in the virtual deadline mechanism. The one
-+tunable in BFS is the rr_interval, or "round robin interval". This is the
-+maximum time two SCHED_OTHER (or SCHED_NORMAL, the common scheduling policy)
-+tasks of the same nice level will be running for, or looking at it the other
-+way around, the longest duration two tasks of the same nice level will be
-+delayed for. When a task requests cpu time, it is given a quota (time_slice)
-+equal to the rr_interval and a virtual deadline. The virtual deadline is
-+offset from the current time in jiffies by this equation:
-+
-+	jiffies + (prio_ratio * rr_interval)
-+
-+The prio_ratio is determined as a ratio compared to the baseline of nice -20
-+and increases by 10% per nice level. The deadline is a virtual one only in that
-+no guarantee is placed that a task will actually be scheduled by this time, but
-+it is used to compare which task should go next. There are three components to
-+how a task is next chosen. First is time_slice expiration. If a task runs out
-+of its time_slice, it is descheduled, the time_slice is refilled, and the
-+deadline reset to that formula above. Second is sleep, where a task no longer
-+is requesting CPU for whatever reason. The time_slice and deadline are _not_
-+adjusted in this case and are just carried over for when the task is next
-+scheduled. Third is preemption, and that is when a newly waking task is deemed
-+higher priority than a currently running task on any cpu by virtue of the fact
-+that it has an earlier virtual deadline than the currently running task. The
-+earlier deadline is the key to which task is next chosen for the first and
-+second cases. Once a task is descheduled, it is put back on the queue, and an
-+O(n) lookup of all queued-but-not-running tasks is done to determine which has
-+the earliest deadline and that task is chosen to receive CPU next.
-+
-+The CPU proportion of different nice tasks works out to be approximately the
-+
-+	(prio_ratio difference)^2
-+
-+The reason it is squared is that a task's deadline does not change while it is
-+running unless it runs out of time_slice. Thus, even if the time actually
-+passes the deadline of another task that is queued, it will not get CPU time
-+unless the current running task deschedules, and the time "base" (jiffies) is
-+constantly moving.
-+
-+Task lookup.
-+
-+BFS has 103 priority queues. 100 of these are dedicated to the static priority
-+of realtime tasks, and the remaining 3 are, in order of best to worst priority,
-+SCHED_ISO (isochronous), SCHED_NORMAL, and SCHED_IDLEPRIO (idle priority
-+scheduling). When a task of these priorities is queued, a bitmap of running
-+priorities is set showing which of these priorities has tasks waiting for CPU
-+time. When a CPU is made to reschedule, the lookup for the next task to get
-+CPU time is performed in the following way:
-+
-+First the bitmap is checked to see what static priority tasks are queued. If
-+any realtime priorities are found, the corresponding queue is checked and the
-+first task listed there is taken (provided CPU affinity is suitable) and lookup
-+is complete. If the priority corresponds to a SCHED_ISO task, they are also
-+taken in FIFO order (as they behave like SCHED_RR). If the priority corresponds
-+to either SCHED_NORMAL or SCHED_IDLEPRIO, then the lookup becomes O(n). At this
-+stage, every task in the runlist that corresponds to that priority is checked
-+to see which has the earliest set deadline, and (provided it has suitable CPU
-+affinity) it is taken off the runqueue and given the CPU. If a task has an
-+expired deadline, it is taken and the rest of the lookup aborted (as they are
-+chosen in FIFO order).
-+
-+Thus, the lookup is O(n) in the worst case only, where n is as described
-+earlier, as tasks may be chosen before the whole task list is looked over.
-+
-+
-+Scalability.
-+
-+The major limitations of BFS will be that of scalability, as the separate
-+runqueue designs will have less lock contention as the number of CPUs rises.
-+However they do not scale linearly even with separate runqueues as multiple
-+runqueues will need to be locked concurrently on such designs to be able to
-+achieve fair CPU balancing, to try and achieve some sort of nice-level fairness
-+across CPUs, and to achieve low enough latency for tasks on a busy CPU when
-+other CPUs would be more suited. BFS has the advantage that it requires no
-+balancing algorithm whatsoever, as balancing occurs by proxy simply because
-+all CPUs draw off the global runqueue, in priority and deadline order. Despite
-+the fact that scalability is _not_ the prime concern of BFS, it both shows very
-+good scalability to smaller numbers of CPUs and is likely a more scalable design
-+at these numbers of CPUs.
-+
-+It also has some very low overhead scalability features built into the design
-+when it has been deemed their overhead is so marginal that they're worth adding.
-+The first is the local copy of the running process' data to the CPU it's running
-+on to allow that data to be updated lockless where possible. Then there is
-+deference paid to the last CPU a task was running on, by trying that CPU first
-+when looking for an idle CPU to use the next time it's scheduled. Finally there
-+is the notion of cache locality beyond the last running CPU. The sched_domains
-+information is used to determine the relative virtual "cache distance" that
-+other CPUs have from the last CPU a task was running on. CPUs with shared
-+caches, such as SMT siblings, or multicore CPUs with shared caches, are treated
-+as cache local. CPUs without shared caches are treated as not cache local, and
-+CPUs on different NUMA nodes are treated as very distant. This "relative cache
-+distance" is used by modifying the virtual deadline value when doing lookups.
-+Effectively, the deadline is unaltered between "cache local" CPUs, doubled for
-+"cache distant" CPUs, and quadrupled for "very distant" CPUs. The reasoning
-+behind the doubling of deadlines is as follows. The real cost of migrating a
-+task from one CPU to another is entirely dependant on the cache footprint of
-+the task, how cache intensive the task is, how long it's been running on that
-+CPU to take up the bulk of its cache, how big the CPU cache is, how fast and
-+how layered the CPU cache is, how fast a context switch is... and so on. In
-+other words, it's close to random in the real world where we do more than just
-+one sole workload. The only thing we can be sure of is that it's not free. So
-+BFS uses the principle that an idle CPU is a wasted CPU and utilising idle CPUs
-+is more important than cache locality, and cache locality only plays a part
-+after that. Doubling the effective deadline is based on the premise that the
-+"cache local" CPUs will tend to work on the same tasks up to double the number
-+of cache local CPUs, and once the workload is beyond that amount, it is likely
-+that none of the tasks are cache warm anywhere anyway. The quadrupling for NUMA
-+is a value I pulled out of my arse.
-+
-+When choosing an idle CPU for a waking task, the cache locality is determined
-+according to where the task last ran and then idle CPUs are ranked from best
-+to worst to choose the most suitable idle CPU based on cache locality, NUMA
-+node locality and hyperthread sibling business. They are chosen in the
-+following preference (if idle):
-+
-+* Same core, idle or busy cache, idle threads
-+* Other core, same cache, idle or busy cache, idle threads.
-+* Same node, other CPU, idle cache, idle threads.
-+* Same node, other CPU, busy cache, idle threads.
-+* Same core, busy threads.
-+* Other core, same cache, busy threads.
-+* Same node, other CPU, busy threads.
-+* Other node, other CPU, idle cache, idle threads.
-+* Other node, other CPU, busy cache, idle threads.
-+* Other node, other CPU, busy threads.
-+
-+This shows the SMT or "hyperthread" awareness in the design as well which will
-+choose a real idle core first before a logical SMT sibling which already has
-+tasks on the physical CPU.
-+
-+Early benchmarking of BFS suggested scalability dropped off at the 16 CPU mark.
-+However this benchmarking was performed on an earlier design that was far less
-+scalable than the current one so it's hard to know how scalable it is in terms
-+of both CPUs (due to the global runqueue) and heavily loaded machines (due to
-+O(n) lookup) at this stage. Note that in terms of scalability, the number of
-+_logical_ CPUs matters, not the number of _physical_ CPUs. Thus, a dual (2x)
-+quad core (4X) hyperthreaded (2X) machine is effectively a 16X. Newer benchmark
-+results are very promising indeed, without needing to tweak any knobs, features
-+or options. Benchmark contributions are most welcome.
-+
-+
-+Features
-+
-+As the initial prime target audience for BFS was the average desktop user, it
-+was designed to not need tweaking, tuning or have features set to obtain benefit
-+from it. Thus the number of knobs and features has been kept to an absolute
-+minimum and should not require extra user input for the vast majority of cases.
-+There are precisely 2 tunables, and 2 extra scheduling policies. The rr_interval
-+and iso_cpu tunables, and the SCHED_ISO and SCHED_IDLEPRIO policies. In addition
-+to this, BFS also uses sub-tick accounting. What BFS does _not_ now feature is
-+support for CGROUPS. The average user should neither need to know what these
-+are, nor should they need to be using them to have good desktop behaviour.
-+
-+rr_interval
-+
-+There is only one "scheduler" tunable, the round robin interval. This can be
-+accessed in
-+
-+	/proc/sys/kernel/rr_interval
-+
-+The value is in milliseconds, and the default value is set to 6 on a
-+uniprocessor machine, and automatically set to a progressively higher value on
-+multiprocessor machines. The reasoning behind increasing the value on more CPUs
-+is that the effective latency is decreased by virtue of there being more CPUs on
-+BFS (for reasons explained above), and increasing the value allows for less
-+cache contention and more throughput. Valid values are from 1 to 1000
-+Decreasing the value will decrease latencies at the cost of decreasing
-+throughput, while increasing it will improve throughput, but at the cost of
-+worsening latencies. The accuracy of the rr interval is limited by HZ resolution
-+of the kernel configuration. Thus, the worst case latencies are usually slightly
-+higher than this actual value. The default value of 6 is not an arbitrary one.
-+It is based on the fact that humans can detect jitter at approximately 7ms, so
-+aiming for much lower latencies is pointless under most circumstances. It is
-+worth noting this fact when comparing the latency performance of BFS to other
-+schedulers. Worst case latencies being higher than 7ms are far worse than
-+average latencies not being in the microsecond range.
-+
-+Isochronous scheduling.
-+
-+Isochronous scheduling is a unique scheduling policy designed to provide
-+near-real-time performance to unprivileged (ie non-root) users without the
-+ability to starve the machine indefinitely. Isochronous tasks (which means
-+"same time") are set using, for example, the schedtool application like so:
-+
-+	schedtool -I -e amarok
-+
-+This will start the audio application "amarok" as SCHED_ISO. How SCHED_ISO works
-+is that it has a priority level between true realtime tasks and SCHED_NORMAL
-+which would allow them to preempt all normal tasks, in a SCHED_RR fashion (ie,
-+if multiple SCHED_ISO tasks are running, they purely round robin at rr_interval
-+rate). However if ISO tasks run for more than a tunable finite amount of time,
-+they are then demoted back to SCHED_NORMAL scheduling. This finite amount of
-+time is the percentage of _total CPU_ available across the machine, configurable
-+as a percentage in the following "resource handling" tunable (as opposed to a
-+scheduler tunable):
-+
-+	/proc/sys/kernel/iso_cpu
-+
-+and is set to 70% by default. It is calculated over a rolling 5 second average
-+Because it is the total CPU available, it means that on a multi CPU machine, it
-+is possible to have an ISO task running as realtime scheduling indefinitely on
-+just one CPU, as the other CPUs will be available. Setting this to 100 is the
-+equivalent of giving all users SCHED_RR access and setting it to 0 removes the
-+ability to run any pseudo-realtime tasks.
-+
-+A feature of BFS is that it detects when an application tries to obtain a
-+realtime policy (SCHED_RR or SCHED_FIFO) and the caller does not have the
-+appropriate privileges to use those policies. When it detects this, it will
-+give the task SCHED_ISO policy instead. Thus it is transparent to the user.
-+Because some applications constantly set their policy as well as their nice
-+level, there is potential for them to undo the override specified by the user
-+on the command line of setting the policy to SCHED_ISO. To counter this, once
-+a task has been set to SCHED_ISO policy, it needs superuser privileges to set
-+it back to SCHED_NORMAL. This will ensure the task remains ISO and all child
-+processes and threads will also inherit the ISO policy.
-+
-+Idleprio scheduling.
-+
-+Idleprio scheduling is a scheduling policy designed to give out CPU to a task
-+_only_ when the CPU would be otherwise idle. The idea behind this is to allow
-+ultra low priority tasks to be run in the background that have virtually no
-+effect on the foreground tasks. This is ideally suited to distributed computing
-+clients (like setiathome, folding, mprime etc) but can also be used to start
-+a video encode or so on without any slowdown of other tasks. To avoid this
-+policy from grabbing shared resources and holding them indefinitely, if it
-+detects a state where the task is waiting on I/O, the machine is about to
-+suspend to ram and so on, it will transiently schedule them as SCHED_NORMAL. As
-+per the Isochronous task management, once a task has been scheduled as IDLEPRIO,
-+it cannot be put back to SCHED_NORMAL without superuser privileges. Tasks can
-+be set to start as SCHED_IDLEPRIO with the schedtool command like so:
-+
-+	schedtool -D -e ./mprime
-+
-+Subtick accounting.
-+
-+It is surprisingly difficult to get accurate CPU accounting, and in many cases,
-+the accounting is done by simply determining what is happening at the precise
-+moment a timer tick fires off. This becomes increasingly inaccurate as the
-+timer tick frequency (HZ) is lowered. It is possible to create an application
-+which uses almost 100% CPU, yet by being descheduled at the right time, records
-+zero CPU usage. While the main problem with this is that there are possible
-+security implications, it is also difficult to determine how much CPU a task
-+really does use. BFS tries to use the sub-tick accounting from the TSC clock,
-+where possible, to determine real CPU usage. This is not entirely reliable, but
-+is far more likely to produce accurate CPU usage data than the existing designs
-+and will not show tasks as consuming no CPU usage when they actually are. Thus,
-+the amount of CPU reported as being used by BFS will more accurately represent
-+how much CPU the task itself is using (as is shown for example by the 'time'
-+application), so the reported values may be quite different to other schedulers.
-+Values reported as the 'load' are more prone to problems with this design, but
-+per process values are closer to real usage. When comparing throughput of BFS
-+to other designs, it is important to compare the actual completed work in terms
-+of total wall clock time taken and total work done, rather than the reported
-+"cpu usage".
-+
-+
-+Con Kolivas <kernel@kolivas.org> Fri Aug 27 2010
-diff --git a/Documentation/scheduler/sched-MuQSS.txt b/Documentation/scheduler/sched-MuQSS.txt
-new file mode 100644
-index 000000000000..ae28b85c9995
---- /dev/null
-+++ b/Documentation/scheduler/sched-MuQSS.txt
-@@ -0,0 +1,373 @@
-+MuQSS - The Multiple Queue Skiplist Scheduler by Con Kolivas.
-+
-+MuQSS is a per-cpu runqueue variant of the original BFS scheduler with
-+one 8 level skiplist per runqueue, and fine grained locking for much more
-+scalability.
-+
-+
-+Goals.
-+
-+The goal of the Multiple Queue Skiplist Scheduler, referred to as MuQSS from
-+here on (pronounced mux) is to completely do away with the complex designs of
-+the past for the cpu process scheduler and instead implement one that is very
-+simple in basic design. The main focus of MuQSS is to achieve excellent desktop
-+interactivity and responsiveness without heuristics and tuning knobs that are
-+difficult to understand, impossible to model and predict the effect of, and when
-+tuned to one workload cause massive detriment to another, while still being
-+scalable to many CPUs and processes.
-+
-+
-+Design summary.
-+
-+MuQSS is best described as per-cpu multiple runqueue, O(log n) insertion, O(1)
-+lookup, earliest effective virtual deadline first tickless design, loosely based
-+on EEVDF (earliest eligible virtual deadline first) and my previous Staircase
-+Deadline scheduler, and evolved from the single runqueue O(n) BFS scheduler.
-+Each component shall be described in order to understand the significance of,
-+and reasoning for it.
-+
-+
-+Design reasoning.
-+
-+In BFS, the use of a single runqueue across all CPUs meant that each CPU would
-+need to scan the entire runqueue looking for the process with the earliest
-+deadline and schedule that next, regardless of which CPU it originally came
-+from. This made BFS deterministic with respect to latency and provided
-+guaranteed latencies dependent on number of processes and CPUs. The single
-+runqueue, however, meant that all CPUs would compete for the single lock
-+protecting it, which would lead to increasing lock contention as the number of
-+CPUs rose and appeared to limit scalability of common workloads beyond 16
-+logical CPUs. Additionally, the O(n) lookup of the runqueue list obviously
-+increased overhead proportionate to the number of queued proecesses and led to
-+cache thrashing while iterating over the linked list.
-+
-+MuQSS is an evolution of BFS, designed to maintain the same scheduling
-+decision mechanism and be virtually deterministic without relying on the
-+constrained design of the single runqueue by splitting out the single runqueue
-+to be per-CPU and use skiplists instead of linked lists.
-+
-+The original reason for going back to a single runqueue design for BFS was that
-+once multiple runqueues are introduced, per-CPU or otherwise, there will be
-+complex interactions as each runqueue will be responsible for the scheduling
-+latency and fairness of the tasks only on its own runqueue, and to achieve
-+fairness and low latency across multiple CPUs, any advantage in throughput of
-+having CPU local tasks causes other disadvantages. This is due to requiring a
-+very complex balancing system to at best achieve some semblance of fairness
-+across CPUs and can only maintain relatively low latency for tasks bound to the
-+same CPUs, not across them. To increase said fairness and latency across CPUs,
-+the advantage of local runqueue locking, which makes for better scalability, is
-+lost due to having to grab multiple locks.
-+
-+MuQSS works around the problems inherent in multiple runqueue designs by
-+making its skip lists priority ordered and through novel use of lockless
-+examination of each other runqueue it can decide if it should take the earliest
-+deadline task from another runqueue for latency reasons, or for CPU balancing
-+reasons. It still does not have a balancing system, choosing to allow the
-+next task scheduling decision and task wakeup CPU choice to allow balancing to
-+happen by virtue of its choices.
-+
-+As a further evolution of the design, MuQSS normally configures sharing of
-+runqueues in a logical fashion for when CPU resources are shared for improved
-+latency and throughput. By default it shares runqueues and locks between
-+multicore siblings. Optionally it can be configured to run with sharing of
-+SMT siblings only, all SMP packages or no sharing at all. Additionally it can
-+be selected at boot time.
-+
-+
-+Design details.
-+
-+Custom skip list implementation:
-+
-+To avoid the overhead of building up and tearing down skip list structures,
-+the variant used by MuQSS has a number of optimisations making it specific for
-+its use case in the scheduler. It uses static arrays of 8 'levels' instead of
-+building up and tearing down structures dynamically. This makes each runqueue
-+only scale O(log N) up to 64k tasks. However as there is one runqueue per CPU
-+it means that it scales O(log N) up to 64k x number of logical CPUs which is
-+far beyond the realistic task limits each CPU could handle. By being 8 levels
-+it also makes the array exactly one cacheline in size. Additionally, each
-+skip list node is bidirectional making insertion and removal amortised O(1),
-+being O(k) where k is 1-8. Uniquely, we are only ever interested in the very
-+first entry in each list at all times with MuQSS, so there is never a need to
-+do a search and thus look up is always O(1). In interactive mode, the queues
-+will be searched beyond their first entry if the first task is not suitable
-+for affinity or SMT nice reasons.
-+
-+Task insertion:
-+
-+MuQSS inserts tasks into a per CPU runqueue as an O(log N) insertion into
-+a custom skip list as described above (based on the original design by William
-+Pugh). Insertion is ordered in such a way that there is never a need to do a
-+search by ordering tasks according to static priority primarily, and then
-+virtual deadline at the time of insertion.
-+
-+Niffies:
-+
-+Niffies are a monotonic forward moving timer not unlike the "jiffies" but are
-+of nanosecond resolution. Niffies are calculated per-runqueue from the high
-+resolution TSC timers, and in order to maintain fairness are synchronised
-+between CPUs whenever both runqueues are locked concurrently.
-+
-+Virtual deadline:
-+
-+The key to achieving low latency, scheduling fairness, and "nice level"
-+distribution in MuQSS is entirely in the virtual deadline mechanism. The one
-+tunable in MuQSS is the rr_interval, or "round robin interval". This is the
-+maximum time two SCHED_OTHER (or SCHED_NORMAL, the common scheduling policy)
-+tasks of the same nice level will be running for, or looking at it the other
-+way around, the longest duration two tasks of the same nice level will be
-+delayed for. When a task requests cpu time, it is given a quota (time_slice)
-+equal to the rr_interval and a virtual deadline. The virtual deadline is
-+offset from the current time in niffies by this equation:
-+
-+	niffies + (prio_ratio * rr_interval)
-+
-+The prio_ratio is determined as a ratio compared to the baseline of nice -20
-+and increases by 10% per nice level. The deadline is a virtual one only in that
-+no guarantee is placed that a task will actually be scheduled by this time, but
-+it is used to compare which task should go next. There are three components to
-+how a task is next chosen. First is time_slice expiration. If a task runs out
-+of its time_slice, it is descheduled, the time_slice is refilled, and the
-+deadline reset to that formula above. Second is sleep, where a task no longer
-+is requesting CPU for whatever reason. The time_slice and deadline are _not_
-+adjusted in this case and are just carried over for when the task is next
-+scheduled. Third is preemption, and that is when a newly waking task is deemed
-+higher priority than a currently running task on any cpu by virtue of the fact
-+that it has an earlier virtual deadline than the currently running task. The
-+earlier deadline is the key to which task is next chosen for the first and
-+second cases.
-+
-+The CPU proportion of different nice tasks works out to be approximately the
-+
-+	(prio_ratio difference)^2
-+
-+The reason it is squared is that a task's deadline does not change while it is
-+running unless it runs out of time_slice. Thus, even if the time actually
-+passes the deadline of another task that is queued, it will not get CPU time
-+unless the current running task deschedules, and the time "base" (niffies) is
-+constantly moving.
-+
-+Task lookup:
-+
-+As tasks are already pre-ordered according to anticipated scheduling order in
-+the skip lists, lookup for the next suitable task per-runqueue is always a
-+matter of simply selecting the first task in the 0th level skip list entry.
-+In order to maintain optimal latency and fairness across CPUs, MuQSS does a
-+novel examination of every other runqueue in cache locality order, choosing the
-+best task across all runqueues. This provides near-determinism of how long any
-+task across the entire system may wait before receiving CPU time. The other
-+runqueues are first examine lockless and then trylocked to minimise the
-+potential lock contention if they are likely to have a suitable better task.
-+Each other runqueue lock is only held for as long as it takes to examine the
-+entry for suitability. In "interactive" mode, the default setting, MuQSS will
-+look for the best deadline task across all CPUs, while in !interactive mode,
-+it will only select a better deadline task from another CPU if it is more
-+heavily laden than the current one.
-+
-+Lookup is therefore O(k) where k is number of CPUs.
-+
-+
-+Latency.
-+
-+Through the use of virtual deadlines to govern the scheduling order of normal
-+tasks, queue-to-activation latency per runqueue is guaranteed to be bound by
-+the rr_interval tunable which is set to 6ms by default. This means that the
-+longest a CPU bound task will wait for more CPU is proportional to the number
-+of running tasks and in the common case of 0-2 running tasks per CPU, will be
-+under the 7ms threshold for human perception of jitter. Additionally, as newly
-+woken tasks will have an early deadline from their previous runtime, the very
-+tasks that are usually latency sensitive will have the shortest interval for
-+activation, usually preempting any existing CPU bound tasks.
-+
-+Tickless expiry:
-+
-+A feature of MuQSS is that it is not tied to the resolution of the chosen tick
-+rate in Hz, instead depending entirely on the high resolution timers where
-+possible for sub-millisecond accuracy on timeouts regarless of the underlying
-+tick rate. This allows MuQSS to be run with the low overhead of low Hz rates
-+such as 100 by default, benefiting from the improved throughput and lower
-+power usage it provides. Another advantage of this approach is that in
-+combination with the Full No HZ option, which disables ticks on running task
-+CPUs instead of just idle CPUs, the tick can be disabled at all times
-+regardless of how many tasks are running instead of being limited to just one
-+running task. Note that this option is NOT recommended for regular desktop
-+users.
-+
-+
-+Scalability and balancing.
-+
-+Unlike traditional approaches where balancing is a combination of CPU selection
-+at task wakeup and intermittent balancing based on a vast array of rules set
-+according to architecture, busyness calculations and special case management,
-+MuQSS indirectly balances on the fly at task wakeup and next task selection.
-+During initialisation, MuQSS creates a cache coherency ordered list of CPUs for
-+each logical CPU and uses this to aid task/CPU selection when CPUs are busy.
-+Additionally it selects any idle CPUs, if they are available, at any time over
-+busy CPUs according to the following preference:
-+
-+ * Same thread, idle or busy cache, idle or busy threads
-+ * Other core, same cache, idle or busy cache, idle threads.
-+ * Same node, other CPU, idle cache, idle threads.
-+ * Same node, other CPU, busy cache, idle threads.
-+ * Other core, same cache, busy threads.
-+ * Same node, other CPU, busy threads.
-+ * Other node, other CPU, idle cache, idle threads.
-+ * Other node, other CPU, busy cache, idle threads.
-+ * Other node, other CPU, busy threads.
-+
-+Mux is therefore SMT, MC and Numa aware without the need for extra
-+intermittent balancing to maintain CPUs busy and make the most of cache
-+coherency.
-+
-+
-+Features
-+
-+As the initial prime target audience for MuQSS was the average desktop user, it
-+was designed to not need tweaking, tuning or have features set to obtain benefit
-+from it. Thus the number of knobs and features has been kept to an absolute
-+minimum and should not require extra user input for the vast majority of cases.
-+There are 3 optional tunables, and 2 extra scheduling policies. The rr_interval,
-+interactive, and iso_cpu tunables, and the SCHED_ISO and SCHED_IDLEPRIO
-+policies. In addition to this, MuQSS also uses sub-tick accounting. What MuQSS
-+does _not_ now feature is support for CGROUPS. The average user should neither
-+need to know what these are, nor should they need to be using them to have good
-+desktop behaviour. However since some applications refuse to work without
-+cgroups, one can enable them with MuQSS as a stub and the filesystem will be
-+created which will allow the applications to work.
-+
-+rr_interval:
-+
-+	/proc/sys/kernel/rr_interval
-+
-+The value is in milliseconds, and the default value is set to 6. Valid values
-+are from 1 to 1000 Decreasing the value will decrease latencies at the cost of
-+decreasing throughput, while increasing it will improve throughput, but at the
-+cost of worsening latencies. It is based on the fact that humans can detect
-+jitter at approximately 7ms, so aiming for much lower latencies is pointless
-+under most circumstances. It is worth noting this fact when comparing the
-+latency performance of MuQSS to other schedulers. Worst case latencies being
-+higher than 7ms are far worse than average latencies not being in the
-+microsecond range.
-+
-+interactive:
-+
-+	/proc/sys/kernel/interactive
-+
-+The value is a simple boolean of 1 for on and 0 for off and is set to on by
-+default. Disabling this will disable the near-determinism of MuQSS when
-+selecting the next task by not examining all CPUs for the earliest deadline
-+task, or which CPU to wake to, instead prioritising CPU balancing for improved
-+throughput. Latency will still be bound by rr_interval, but on a per-CPU basis
-+instead of across the whole system.
-+
-+Runqueue sharing.
-+
-+By default MuQSS chooses to share runqueue resources (specifically the skip
-+list and locking) between multicore siblings. It is configurable at build time
-+to select between None, SMT, MC and SMP, corresponding to no sharing, sharing
-+only between simultaneous mulithreading siblings, multicore siblings, or
-+symmetric multiprocessing physical packages. Additionally it can be se at
-+bootime with the use of the rqshare parameter. The reason for configurability
-+is that some architectures have CPUs with many multicore siblings (>= 16)
-+where it may be detrimental to throughput to share runqueues and another
-+sharing option may be desirable. Additionally, more sharing than usual can
-+improve latency on a system-wide level at the expense of throughput if desired.
-+
-+The options are:
-+none, smt, mc, smp
-+
-+eg:
-+	rqshare=mc
-+
-+Isochronous scheduling:
-+
-+Isochronous scheduling is a unique scheduling policy designed to provide
-+near-real-time performance to unprivileged (ie non-root) users without the
-+ability to starve the machine indefinitely. Isochronous tasks (which means
-+"same time") are set using, for example, the schedtool application like so:
-+
-+	schedtool -I -e amarok
-+
-+This will start the audio application "amarok" as SCHED_ISO. How SCHED_ISO works
-+is that it has a priority level between true realtime tasks and SCHED_NORMAL
-+which would allow them to preempt all normal tasks, in a SCHED_RR fashion (ie,
-+if multiple SCHED_ISO tasks are running, they purely round robin at rr_interval
-+rate). However if ISO tasks run for more than a tunable finite amount of time,
-+they are then demoted back to SCHED_NORMAL scheduling. This finite amount of
-+time is the percentage of CPU available per CPU, configurable as a percentage in
-+the following "resource handling" tunable (as opposed to a scheduler tunable):
-+
-+iso_cpu:
-+
-+	/proc/sys/kernel/iso_cpu
-+
-+and is set to 70% by default. It is calculated over a rolling 5 second average
-+Because it is the total CPU available, it means that on a multi CPU machine, it
-+is possible to have an ISO task running as realtime scheduling indefinitely on
-+just one CPU, as the other CPUs will be available. Setting this to 100 is the
-+equivalent of giving all users SCHED_RR access and setting it to 0 removes the
-+ability to run any pseudo-realtime tasks.
-+
-+A feature of MuQSS is that it detects when an application tries to obtain a
-+realtime policy (SCHED_RR or SCHED_FIFO) and the caller does not have the
-+appropriate privileges to use those policies. When it detects this, it will
-+give the task SCHED_ISO policy instead. Thus it is transparent to the user.
-+
-+
-+Idleprio scheduling:
-+
-+Idleprio scheduling is a scheduling policy designed to give out CPU to a task
-+_only_ when the CPU would be otherwise idle. The idea behind this is to allow
-+ultra low priority tasks to be run in the background that have virtually no
-+effect on the foreground tasks. This is ideally suited to distributed computing
-+clients (like setiathome, folding, mprime etc) but can also be used to start a
-+video encode or so on without any slowdown of other tasks. To avoid this policy
-+from grabbing shared resources and holding them indefinitely, if it detects a
-+state where the task is waiting on I/O, the machine is about to suspend to ram
-+and so on, it will transiently schedule them as SCHED_NORMAL. Once a task has
-+been scheduled as IDLEPRIO, it cannot be put back to SCHED_NORMAL without
-+superuser privileges since it is effectively a lower scheduling policy. Tasks
-+can be set to start as SCHED_IDLEPRIO with the schedtool command like so:
-+
-+schedtool -D -e ./mprime
-+
-+Subtick accounting:
-+
-+It is surprisingly difficult to get accurate CPU accounting, and in many cases,
-+the accounting is done by simply determining what is happening at the precise
-+moment a timer tick fires off. This becomes increasingly inaccurate as the timer
-+tick frequency (HZ) is lowered. It is possible to create an application which
-+uses almost 100% CPU, yet by being descheduled at the right time, records zero
-+CPU usage. While the main problem with this is that there are possible security
-+implications, it is also difficult to determine how much CPU a task really does
-+use. Mux uses sub-tick accounting from the TSC clock to determine real CPU
-+usage. Thus, the amount of CPU reported as being used by MuQSS will more
-+accurately represent how much CPU the task itself is using (as is shown for
-+example by the 'time' application), so the reported values may be quite
-+different to other schedulers. When comparing throughput of MuQSS to other
-+designs, it is important to compare the actual completed work in terms of total
-+wall clock time taken and total work done, rather than the reported "cpu usage".
-+
-+Symmetric MultiThreading (SMT) aware nice:
-+
-+SMT, a.k.a. hyperthreading, is a very common feature on modern CPUs. While the
-+logical CPU count rises by adding thread units to each CPU core, allowing more
-+than one task to be run simultaneously on the same core, the disadvantage of it
-+is that the CPU power is shared between the tasks, not summating to the power
-+of two CPUs. The practical upshot of this is that two tasks running on
-+separate threads of the same core run significantly slower than if they had one
-+core each to run on. While smart CPU selection allows each task to have a core
-+to itself whenever available (as is done on MuQSS), it cannot offset the
-+slowdown that occurs when the cores are all loaded and only a thread is left.
-+Most of the time this is harmless as the CPU is effectively overloaded at this
-+point and the extra thread is of benefit. However when running a niced task in
-+the presence of an un-niced task (say nice 19 v nice 0), the nice task gets
-+precisely the same amount of CPU power as the unniced one. MuQSS has an
-+optional configuration feature known as SMT-NICE which selectively idles the
-+secondary niced thread for a period proportional to the nice difference,
-+allowing CPU distribution according to nice level to be maintained, at the
-+expense of a small amount of extra overhead. If this is configured in on a
-+machine without SMT threads, the overhead is minimal.
-+
-+
-+Con Kolivas <kernel@kolivas.org> Sat, 29th October 2016
-diff --git a/Makefile b/Makefile
-index d4d36c61940b..4a9dfe471f1f 100644
---- a/Makefile
-+++ b/Makefile
-@@ -15,6 +15,10 @@ NAME = Kleptomaniac Octopus
- PHONY := _all
- _all:
- 
-+CKVERSION = -ck1
-+CKNAME = MuQSS Powered
-+EXTRAVERSION := $(EXTRAVERSION)$(CKVERSION)
-+
- # We are using a recursive build, so we need to do a little thinking
- # to get the ordering right.
- #
-diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
-index ef179033a7c2..14b576a531ad 100644
---- a/arch/alpha/Kconfig
-+++ b/arch/alpha/Kconfig
-@@ -665,6 +665,8 @@ config HZ
- 	default 1200 if HZ_1200
- 	default 1024
- 
-+source "kernel/Kconfig.MuQSS"
-+
- config SRM_ENV
- 	tristate "SRM environment through procfs"
- 	depends on PROC_FS
-diff --git a/arch/arc/configs/tb10x_defconfig b/arch/arc/configs/tb10x_defconfig
-index 3a138f8c7299..65f44e309a08 100644
---- a/arch/arc/configs/tb10x_defconfig
-+++ b/arch/arc/configs/tb10x_defconfig
-@@ -30,7 +30,7 @@ CONFIG_ARC_PLAT_TB10X=y
- CONFIG_ARC_CACHE_LINE_SHIFT=5
- CONFIG_HZ=250
- CONFIG_ARC_BUILTIN_DTB_NAME="abilis_tb100_dvk"
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- # CONFIG_COMPACTION is not set
- CONFIG_NET=y
- CONFIG_PACKET=y
-diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
-index 8a50efb559f3..d8507d20c258 100644
---- a/arch/arm/Kconfig
-+++ b/arch/arm/Kconfig
-@@ -1238,6 +1238,8 @@ config SCHED_SMT
- 	  MultiThreading at a cost of slightly increased overhead in some
- 	  places. If unsure say N here.
- 
-+source "kernel/Kconfig.MuQSS"
-+
- config HAVE_ARM_SCU
- 	bool
- 	help
-diff --git a/arch/arm/configs/bcm2835_defconfig b/arch/arm/configs/bcm2835_defconfig
-index 519ff58e67b3..b2a05b6f7d80 100644
---- a/arch/arm/configs/bcm2835_defconfig
-+++ b/arch/arm/configs/bcm2835_defconfig
-@@ -29,7 +29,7 @@ CONFIG_MODULE_UNLOAD=y
- CONFIG_ARCH_MULTI_V6=y
- CONFIG_ARCH_BCM=y
- CONFIG_ARCH_BCM2835=y
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- CONFIG_AEABI=y
- CONFIG_KSM=y
- CONFIG_CLEANCACHE=y
-diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig
-index 0f7381ee0c37..3d747237bfed 100644
---- a/arch/arm/configs/imx_v6_v7_defconfig
-+++ b/arch/arm/configs/imx_v6_v7_defconfig
-@@ -45,6 +45,7 @@ CONFIG_PCI_MSI=y
- CONFIG_PCI_IMX6=y
- CONFIG_SMP=y
- CONFIG_ARM_PSCI=y
-+CONFIG_PREEMPT=y
- CONFIG_HIGHMEM=y
- CONFIG_FORCE_MAX_ZONEORDER=14
- CONFIG_CMDLINE="noinitrd console=ttymxc0,115200"
-diff --git a/arch/arm/configs/mps2_defconfig b/arch/arm/configs/mps2_defconfig
-index 1d923dbb9928..9c1931f1fafd 100644
---- a/arch/arm/configs/mps2_defconfig
-+++ b/arch/arm/configs/mps2_defconfig
-@@ -18,7 +18,7 @@ CONFIG_ARCH_MPS2=y
- CONFIG_SET_MEM_PARAM=y
- CONFIG_DRAM_BASE=0x21000000
- CONFIG_DRAM_SIZE=0x1000000
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- # CONFIG_ATAGS is not set
- CONFIG_ZBOOT_ROM_TEXT=0x0
- CONFIG_ZBOOT_ROM_BSS=0x0
-diff --git a/arch/arm/configs/mxs_defconfig b/arch/arm/configs/mxs_defconfig
-index 2773899c21b3..870866aaa39d 100644
---- a/arch/arm/configs/mxs_defconfig
-+++ b/arch/arm/configs/mxs_defconfig
-@@ -1,7 +1,7 @@
- CONFIG_SYSVIPC=y
- CONFIG_NO_HZ=y
- CONFIG_HIGH_RES_TIMERS=y
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT_VOLUNTARY=n
- CONFIG_TASKSTATS=y
- CONFIG_TASK_DELAY_ACCT=y
- CONFIG_TASK_XACCT=y
-@@ -27,6 +27,11 @@ CONFIG_MODVERSIONS=y
- CONFIG_BLK_DEV_INTEGRITY=y
- # CONFIG_IOSCHED_DEADLINE is not set
- # CONFIG_IOSCHED_CFQ is not set
-+# CONFIG_ARCH_MULTI_V7 is not set
-+CONFIG_ARCH_MXS=y
-+# CONFIG_ARM_THUMB is not set
-+CONFIG_PREEMPT=y
-+CONFIG_AEABI=y
- CONFIG_NET=y
- CONFIG_PACKET=y
- CONFIG_UNIX=y
-diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
-index 3f047afb982c..d35eae0a5c7d 100644
---- a/arch/arm64/Kconfig
-+++ b/arch/arm64/Kconfig
-@@ -864,6 +864,8 @@ config SCHED_SMT
- 	  MultiThreading at a cost of slightly increased overhead in some
- 	  places. If unsure say N here.
- 
-+source "kernel/Kconfig.MuQSS"
-+
- config NR_CPUS
- 	int "Maximum number of CPUs (2-4096)"
- 	range 2 4096
-diff --git a/arch/blackfin/configs/BF518F-EZBRD_defconfig b/arch/blackfin/configs/BF518F-EZBRD_defconfig
-new file mode 100644
-index 000000000000..39b91dfa55b5
---- /dev/null
-+++ b/arch/blackfin/configs/BF518F-EZBRD_defconfig
-@@ -0,0 +1,121 @@
-+CONFIG_EXPERIMENTAL=y
-+CONFIG_SYSVIPC=y
-+CONFIG_IKCONFIG=y
-+CONFIG_IKCONFIG_PROC=y
-+CONFIG_LOG_BUF_SHIFT=14
-+CONFIG_BLK_DEV_INITRD=y
-+CONFIG_EXPERT=y
-+# CONFIG_ELF_CORE is not set
-+# CONFIG_FUTEX is not set
-+# CONFIG_SIGNALFD is not set
-+# CONFIG_TIMERFD is not set
-+# CONFIG_EVENTFD is not set
-+# CONFIG_AIO is not set
-+CONFIG_SLAB=y
-+CONFIG_MMAP_ALLOW_UNINITIALIZED=y
-+CONFIG_MODULES=y
-+CONFIG_MODULE_UNLOAD=y
-+# CONFIG_LBDAF is not set
-+# CONFIG_BLK_DEV_BSG is not set
-+# CONFIG_IOSCHED_DEADLINE is not set
-+# CONFIG_IOSCHED_CFQ is not set
-+CONFIG_PREEMPT=y
-+CONFIG_BF518=y
-+CONFIG_IRQ_TIMER0=12
-+# CONFIG_CYCLES_CLOCKSOURCE is not set
-+# CONFIG_SCHEDULE_L1 is not set
-+# CONFIG_MEMSET_L1 is not set
-+# CONFIG_MEMCPY_L1 is not set
-+# CONFIG_SYS_BFIN_SPINLOCK_L1 is not set
-+CONFIG_NOMMU_INITIAL_TRIM_EXCESS=0
-+CONFIG_BFIN_GPTIMERS=m
-+CONFIG_C_CDPRIO=y
-+CONFIG_BANK_3=0x99B2
-+CONFIG_BINFMT_FLAT=y
-+CONFIG_BINFMT_ZFLAT=y
-+CONFIG_NET=y
-+CONFIG_PACKET=y
-+CONFIG_UNIX=y
-+CONFIG_INET=y
-+CONFIG_IP_PNP=y
-+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-+# CONFIG_INET_XFRM_MODE_BEET is not set
-+# CONFIG_INET_LRO is not set
-+# CONFIG_INET_DIAG is not set
-+# CONFIG_IPV6 is not set
-+# CONFIG_WIRELESS is not set
-+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-+# CONFIG_FW_LOADER is not set
-+CONFIG_MTD=y
-+CONFIG_MTD_BLOCK=y
-+CONFIG_MTD_JEDECPROBE=m
-+CONFIG_MTD_RAM=y
-+CONFIG_MTD_ROM=m
-+CONFIG_MTD_COMPLEX_MAPPINGS=y
-+CONFIG_BLK_DEV_RAM=y
-+CONFIG_NETDEVICES=y
-+CONFIG_NET_BFIN=y
-+CONFIG_BFIN_MAC=y
-+# CONFIG_NET_VENDOR_BROADCOM is not set
-+# CONFIG_NET_VENDOR_CHELSIO is not set
-+# CONFIG_NET_VENDOR_INTEL is not set
-+# CONFIG_NET_VENDOR_MARVELL is not set
-+# CONFIG_NET_VENDOR_MICREL is not set
-+# CONFIG_NET_VENDOR_MICROCHIP is not set
-+# CONFIG_NET_VENDOR_NATSEMI is not set
-+# CONFIG_NET_VENDOR_SEEQ is not set
-+# CONFIG_NET_VENDOR_SMSC is not set
-+# CONFIG_NET_VENDOR_STMICRO is not set
-+# CONFIG_WLAN is not set
-+# CONFIG_INPUT is not set
-+# CONFIG_SERIO is not set
-+# CONFIG_VT is not set
-+# CONFIG_LEGACY_PTYS is not set
-+CONFIG_BFIN_JTAG_COMM=m
-+# CONFIG_DEVKMEM is not set
-+CONFIG_SERIAL_BFIN=y
-+CONFIG_SERIAL_BFIN_CONSOLE=y
-+CONFIG_SERIAL_BFIN_UART0=y
-+# CONFIG_HW_RANDOM is not set
-+CONFIG_I2C=y
-+CONFIG_I2C_CHARDEV=y
-+CONFIG_I2C_BLACKFIN_TWI=y
-+CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ=100
-+CONFIG_SPI=y
-+CONFIG_SPI_BFIN5XX=y
-+CONFIG_GPIOLIB=y
-+CONFIG_GPIO_SYSFS=y
-+# CONFIG_HWMON is not set
-+CONFIG_WATCHDOG=y
-+CONFIG_BFIN_WDT=y
-+# CONFIG_USB_SUPPORT is not set
-+CONFIG_MMC=y
-+CONFIG_SDH_BFIN=y
-+CONFIG_SDH_BFIN_MISSING_CMD_PULLUP_WORKAROUND=y
-+CONFIG_RTC_CLASS=y
-+CONFIG_RTC_DRV_BFIN=y
-+CONFIG_EXT2_FS=m
-+# CONFIG_DNOTIFY is not set
-+CONFIG_VFAT_FS=m
-+CONFIG_NFS_FS=m
-+CONFIG_NFS_V3=y
-+CONFIG_NLS_CODEPAGE_437=m
-+CONFIG_NLS_CODEPAGE_936=m
-+CONFIG_NLS_ISO8859_1=m
-+CONFIG_NLS_UTF8=m
-+CONFIG_DEBUG_SHIRQ=y
-+CONFIG_DETECT_HUNG_TASK=y
-+CONFIG_DEBUG_INFO=y
-+# CONFIG_FTRACE is not set
-+CONFIG_DEBUG_MMRS=y
-+CONFIG_DEBUG_HWERR=y
-+CONFIG_EXACT_HWERR=y
-+CONFIG_DEBUG_DOUBLEFAULT=y
-+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE=y
-+CONFIG_EARLY_PRINTK=y
-+CONFIG_CPLB_INFO=y
-+CONFIG_BFIN_PSEUDODBG_INSNS=y
-+CONFIG_CRYPTO=y
-+# CONFIG_CRYPTO_ANSI_CPRNG is not set
-+CONFIG_CRC_CCITT=m
-diff --git a/arch/blackfin/configs/BF526-EZBRD_defconfig b/arch/blackfin/configs/BF526-EZBRD_defconfig
-new file mode 100644
-index 000000000000..675cadb3a0c4
---- /dev/null
-+++ b/arch/blackfin/configs/BF526-EZBRD_defconfig
-@@ -0,0 +1,158 @@
-+CONFIG_EXPERIMENTAL=y
-+CONFIG_SYSVIPC=y
-+CONFIG_IKCONFIG=y
-+CONFIG_IKCONFIG_PROC=y
-+CONFIG_LOG_BUF_SHIFT=14
-+CONFIG_BLK_DEV_INITRD=y
-+CONFIG_EXPERT=y
-+# CONFIG_ELF_CORE is not set
-+# CONFIG_FUTEX is not set
-+# CONFIG_SIGNALFD is not set
-+# CONFIG_TIMERFD is not set
-+# CONFIG_EVENTFD is not set
-+# CONFIG_AIO is not set
-+CONFIG_SLAB=y
-+CONFIG_MMAP_ALLOW_UNINITIALIZED=y
-+CONFIG_MODULES=y
-+CONFIG_MODULE_UNLOAD=y
-+# CONFIG_LBDAF is not set
-+# CONFIG_BLK_DEV_BSG is not set
-+# CONFIG_IOSCHED_DEADLINE is not set
-+# CONFIG_IOSCHED_CFQ is not set
-+CONFIG_PREEMPT=y
-+CONFIG_BF526=y
-+CONFIG_IRQ_TIMER0=12
-+CONFIG_BFIN526_EZBRD=y
-+CONFIG_IRQ_USB_INT0=11
-+CONFIG_IRQ_USB_INT1=11
-+CONFIG_IRQ_USB_INT2=11
-+CONFIG_IRQ_USB_DMA=11
-+# CONFIG_CYCLES_CLOCKSOURCE is not set
-+# CONFIG_SCHEDULE_L1 is not set
-+# CONFIG_MEMSET_L1 is not set
-+# CONFIG_MEMCPY_L1 is not set
-+# CONFIG_SYS_BFIN_SPINLOCK_L1 is not set
-+CONFIG_NOMMU_INITIAL_TRIM_EXCESS=0
-+CONFIG_BFIN_GPTIMERS=m
-+CONFIG_C_CDPRIO=y
-+CONFIG_BANK_3=0x99B2
-+CONFIG_BINFMT_FLAT=y
-+CONFIG_BINFMT_ZFLAT=y
-+CONFIG_NET=y
-+CONFIG_PACKET=y
-+CONFIG_UNIX=y
-+CONFIG_INET=y
-+CONFIG_IP_PNP=y
-+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-+# CONFIG_INET_XFRM_MODE_BEET is not set
-+# CONFIG_INET_LRO is not set
-+# CONFIG_INET_DIAG is not set
-+# CONFIG_IPV6 is not set
-+# CONFIG_WIRELESS is not set
-+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-+# CONFIG_FW_LOADER is not set
-+CONFIG_MTD=y
-+CONFIG_MTD_BLOCK=y
-+CONFIG_MTD_CFI=y
-+CONFIG_MTD_CFI_INTELEXT=y
-+CONFIG_MTD_RAM=y
-+CONFIG_MTD_ROM=m
-+CONFIG_MTD_COMPLEX_MAPPINGS=y
-+CONFIG_MTD_PHYSMAP=y
-+CONFIG_MTD_M25P80=y
-+CONFIG_MTD_NAND=m
-+CONFIG_MTD_SPI_NOR=y
-+CONFIG_BLK_DEV_RAM=y
-+CONFIG_SCSI=y
-+# CONFIG_SCSI_PROC_FS is not set
-+CONFIG_BLK_DEV_SD=y
-+CONFIG_BLK_DEV_SR=m
-+# CONFIG_SCSI_LOWLEVEL is not set
-+CONFIG_NETDEVICES=y
-+CONFIG_NET_BFIN=y
-+CONFIG_BFIN_MAC=y
-+# CONFIG_NET_VENDOR_BROADCOM is not set
-+# CONFIG_NET_VENDOR_CHELSIO is not set
-+# CONFIG_NET_VENDOR_INTEL is not set
-+# CONFIG_NET_VENDOR_MARVELL is not set
-+# CONFIG_NET_VENDOR_MICREL is not set
-+# CONFIG_NET_VENDOR_MICROCHIP is not set
-+# CONFIG_NET_VENDOR_NATSEMI is not set
-+# CONFIG_NET_VENDOR_SEEQ is not set
-+# CONFIG_NET_VENDOR_SMSC is not set
-+# CONFIG_NET_VENDOR_STMICRO is not set
-+# CONFIG_WLAN is not set
-+CONFIG_INPUT_FF_MEMLESS=m
-+# CONFIG_INPUT_MOUSEDEV is not set
-+# CONFIG_INPUT_KEYBOARD is not set
-+# CONFIG_INPUT_MOUSE is not set
-+CONFIG_INPUT_MISC=y
-+# CONFIG_SERIO is not set
-+# CONFIG_LEGACY_PTYS is not set
-+CONFIG_BFIN_JTAG_COMM=m
-+# CONFIG_DEVKMEM is not set
-+CONFIG_SERIAL_BFIN=y
-+CONFIG_SERIAL_BFIN_CONSOLE=y
-+CONFIG_SERIAL_BFIN_UART1=y
-+# CONFIG_HW_RANDOM is not set
-+CONFIG_I2C=y
-+CONFIG_I2C_CHARDEV=m
-+CONFIG_I2C_BLACKFIN_TWI=y
-+CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ=100
-+CONFIG_SPI=y
-+CONFIG_SPI_BFIN5XX=y
-+CONFIG_GPIOLIB=y
-+CONFIG_GPIO_SYSFS=y
-+CONFIG_WATCHDOG=y
-+CONFIG_BFIN_WDT=y
-+CONFIG_HID_A4TECH=y
-+CONFIG_HID_APPLE=y
-+CONFIG_HID_BELKIN=y
-+CONFIG_HID_CHERRY=y
-+CONFIG_HID_CHICONY=y
-+CONFIG_HID_CYPRESS=y
-+CONFIG_HID_EZKEY=y
-+CONFIG_HID_GYRATION=y
-+CONFIG_HID_LOGITECH=y
-+CONFIG_HID_MICROSOFT=y
-+CONFIG_HID_MONTEREY=y
-+CONFIG_HID_PANTHERLORD=y
-+CONFIG_HID_PETALYNX=y
-+CONFIG_HID_SAMSUNG=y
-+CONFIG_HID_SONY=y
-+CONFIG_HID_SUNPLUS=y
-+CONFIG_USB=y
-+# CONFIG_USB_DEVICE_CLASS is not set
-+CONFIG_USB_OTG_BLACKLIST_HUB=y
-+CONFIG_USB_MON=y
-+CONFIG_USB_STORAGE=y
-+CONFIG_RTC_CLASS=y
-+CONFIG_RTC_DRV_BFIN=y
-+CONFIG_EXT2_FS=m
-+# CONFIG_DNOTIFY is not set
-+CONFIG_ISO9660_FS=m
-+CONFIG_JOLIET=y
-+CONFIG_VFAT_FS=m
-+CONFIG_JFFS2_FS=m
-+CONFIG_NFS_FS=m
-+CONFIG_NFS_V3=y
-+CONFIG_NLS_CODEPAGE_437=m
-+CONFIG_NLS_CODEPAGE_936=m
-+CONFIG_NLS_ISO8859_1=m
-+CONFIG_NLS_UTF8=m
-+CONFIG_DEBUG_SHIRQ=y
-+CONFIG_DETECT_HUNG_TASK=y
-+CONFIG_DEBUG_INFO=y
-+# CONFIG_FTRACE is not set
-+CONFIG_DEBUG_MMRS=y
-+CONFIG_DEBUG_HWERR=y
-+CONFIG_EXACT_HWERR=y
-+CONFIG_DEBUG_DOUBLEFAULT=y
-+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE=y
-+CONFIG_EARLY_PRINTK=y
-+CONFIG_CPLB_INFO=y
-+CONFIG_BFIN_PSEUDODBG_INSNS=y
-+CONFIG_CRYPTO=y
-+# CONFIG_CRYPTO_ANSI_CPRNG is not set
-+CONFIG_CRC_CCITT=m
-diff --git a/arch/blackfin/configs/BF527-EZKIT-V2_defconfig b/arch/blackfin/configs/BF527-EZKIT-V2_defconfig
-new file mode 100644
-index 000000000000..4c517c443af5
---- /dev/null
-+++ b/arch/blackfin/configs/BF527-EZKIT-V2_defconfig
-@@ -0,0 +1,188 @@
-+CONFIG_EXPERIMENTAL=y
-+CONFIG_SYSVIPC=y
-+CONFIG_IKCONFIG=y
-+CONFIG_IKCONFIG_PROC=y
-+CONFIG_LOG_BUF_SHIFT=14
-+CONFIG_BLK_DEV_INITRD=y
-+CONFIG_EXPERT=y
-+# CONFIG_ELF_CORE is not set
-+# CONFIG_FUTEX is not set
-+# CONFIG_SIGNALFD is not set
-+# CONFIG_TIMERFD is not set
-+# CONFIG_EVENTFD is not set
-+# CONFIG_AIO is not set
-+CONFIG_SLAB=y
-+CONFIG_MMAP_ALLOW_UNINITIALIZED=y
-+CONFIG_MODULES=y
-+CONFIG_MODULE_UNLOAD=y
-+# CONFIG_LBDAF is not set
-+# CONFIG_BLK_DEV_BSG is not set
-+# CONFIG_IOSCHED_DEADLINE is not set
-+# CONFIG_IOSCHED_CFQ is not set
-+CONFIG_PREEMPT=y
-+CONFIG_BF527=y
-+CONFIG_BF_REV_0_2=y
-+CONFIG_BFIN527_EZKIT_V2=y
-+CONFIG_IRQ_USB_INT0=11
-+CONFIG_IRQ_USB_INT1=11
-+CONFIG_IRQ_USB_INT2=11
-+CONFIG_IRQ_USB_DMA=11
-+# CONFIG_CYCLES_CLOCKSOURCE is not set
-+# CONFIG_SCHEDULE_L1 is not set
-+# CONFIG_MEMSET_L1 is not set
-+# CONFIG_MEMCPY_L1 is not set
-+# CONFIG_SYS_BFIN_SPINLOCK_L1 is not set
-+CONFIG_NOMMU_INITIAL_TRIM_EXCESS=0
-+CONFIG_C_CDPRIO=y
-+CONFIG_BANK_3=0x99B2
-+CONFIG_BINFMT_FLAT=y
-+CONFIG_BINFMT_ZFLAT=y
-+CONFIG_NET=y
-+CONFIG_PACKET=y
-+CONFIG_UNIX=y
-+CONFIG_INET=y
-+CONFIG_IP_PNP=y
-+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-+# CONFIG_INET_XFRM_MODE_BEET is not set
-+# CONFIG_INET_LRO is not set
-+# CONFIG_INET_DIAG is not set
-+# CONFIG_IPV6 is not set
-+CONFIG_IRDA=m
-+CONFIG_IRLAN=m
-+CONFIG_IRCOMM=m
-+CONFIG_IRTTY_SIR=m
-+CONFIG_BFIN_SIR=m
-+CONFIG_BFIN_SIR0=y
-+# CONFIG_WIRELESS is not set
-+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-+# CONFIG_FW_LOADER is not set
-+CONFIG_MTD=y
-+CONFIG_MTD_BLOCK=y
-+CONFIG_MTD_JEDECPROBE=m
-+CONFIG_MTD_RAM=y
-+CONFIG_MTD_ROM=m
-+CONFIG_MTD_COMPLEX_MAPPINGS=y
-+CONFIG_MTD_M25P80=y
-+CONFIG_MTD_NAND=m
-+CONFIG_MTD_SPI_NOR=y
-+CONFIG_BLK_DEV_RAM=y
-+CONFIG_SCSI=y
-+# CONFIG_SCSI_PROC_FS is not set
-+CONFIG_BLK_DEV_SD=y
-+CONFIG_BLK_DEV_SR=m
-+# CONFIG_SCSI_LOWLEVEL is not set
-+CONFIG_NETDEVICES=y
-+CONFIG_NET_BFIN=y
-+CONFIG_BFIN_MAC=y
-+# CONFIG_NET_VENDOR_BROADCOM is not set
-+# CONFIG_NET_VENDOR_CHELSIO is not set
-+# CONFIG_NET_VENDOR_INTEL is not set
-+# CONFIG_NET_VENDOR_MARVELL is not set
-+# CONFIG_NET_VENDOR_MICREL is not set
-+# CONFIG_NET_VENDOR_MICROCHIP is not set
-+# CONFIG_NET_VENDOR_NATSEMI is not set
-+# CONFIG_NET_VENDOR_SEEQ is not set
-+# CONFIG_NET_VENDOR_SMSC is not set
-+# CONFIG_NET_VENDOR_STMICRO is not set
-+# CONFIG_WLAN is not set
-+CONFIG_INPUT_FF_MEMLESS=m
-+# CONFIG_INPUT_MOUSEDEV is not set
-+CONFIG_INPUT_EVDEV=y
-+CONFIG_KEYBOARD_ADP5520=y
-+# CONFIG_KEYBOARD_ATKBD is not set
-+# CONFIG_INPUT_MOUSE is not set
-+CONFIG_INPUT_TOUCHSCREEN=y
-+CONFIG_TOUCHSCREEN_AD7879=y
-+CONFIG_TOUCHSCREEN_AD7879_I2C=y
-+CONFIG_INPUT_MISC=y
-+# CONFIG_SERIO is not set
-+# CONFIG_LEGACY_PTYS is not set
-+CONFIG_BFIN_JTAG_COMM=m
-+# CONFIG_DEVKMEM is not set
-+CONFIG_SERIAL_BFIN=y
-+CONFIG_SERIAL_BFIN_CONSOLE=y
-+CONFIG_SERIAL_BFIN_UART1=y
-+# CONFIG_HW_RANDOM is not set
-+CONFIG_I2C=y
-+CONFIG_I2C_CHARDEV=m
-+CONFIG_I2C_BLACKFIN_TWI=y
-+CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ=100
-+CONFIG_SPI=y
-+CONFIG_SPI_BFIN5XX=y
-+CONFIG_GPIOLIB=y
-+CONFIG_GPIO_SYSFS=y
-+# CONFIG_HWMON is not set
-+CONFIG_WATCHDOG=y
-+CONFIG_BFIN_WDT=y
-+CONFIG_PMIC_ADP5520=y
-+CONFIG_FB=y
-+CONFIG_FB_BFIN_LQ035Q1=y
-+CONFIG_BACKLIGHT_LCD_SUPPORT=y
-+CONFIG_FRAMEBUFFER_CONSOLE=y
-+CONFIG_LOGO=y
-+# CONFIG_LOGO_LINUX_MONO is not set
-+# CONFIG_LOGO_LINUX_VGA16 is not set
-+# CONFIG_LOGO_LINUX_CLUT224 is not set
-+# CONFIG_LOGO_BLACKFIN_VGA16 is not set
-+CONFIG_SOUND=y
-+CONFIG_SND=y
-+CONFIG_SND_SOC=y
-+CONFIG_SND_BF5XX_I2S=y
-+CONFIG_SND_BF5XX_SOC_SSM2602=y
-+CONFIG_HID_A4TECH=y
-+CONFIG_HID_APPLE=y
-+CONFIG_HID_BELKIN=y
-+CONFIG_HID_CHERRY=y
-+CONFIG_HID_CHICONY=y
-+CONFIG_HID_CYPRESS=y
-+CONFIG_HID_EZKEY=y
-+CONFIG_HID_GYRATION=y
-+CONFIG_HID_LOGITECH=y
-+CONFIG_HID_MICROSOFT=y
-+CONFIG_HID_MONTEREY=y
-+CONFIG_HID_PANTHERLORD=y
-+CONFIG_HID_PETALYNX=y
-+CONFIG_HID_SAMSUNG=y
-+CONFIG_HID_SONY=y
-+CONFIG_HID_SUNPLUS=y
-+CONFIG_USB=y
-+# CONFIG_USB_DEVICE_CLASS is not set
-+CONFIG_USB_OTG_BLACKLIST_HUB=y
-+CONFIG_USB_MON=y
-+CONFIG_USB_MUSB_HDRC=y
-+CONFIG_USB_MUSB_BLACKFIN=y
-+CONFIG_USB_STORAGE=y
-+CONFIG_USB_GADGET=y
-+CONFIG_NEW_LEDS=y
-+CONFIG_LEDS_CLASS=y
-+CONFIG_LEDS_ADP5520=y
-+CONFIG_RTC_CLASS=y
-+CONFIG_RTC_DRV_BFIN=y
-+CONFIG_EXT2_FS=m
-+# CONFIG_DNOTIFY is not set
-+CONFIG_ISO9660_FS=m
-+CONFIG_JOLIET=y
-+CONFIG_UDF_FS=m
-+CONFIG_VFAT_FS=m
-+CONFIG_JFFS2_FS=m
-+CONFIG_NFS_FS=m
-+CONFIG_NFS_V3=y
-+CONFIG_NLS_CODEPAGE_437=m
-+CONFIG_NLS_CODEPAGE_936=m
-+CONFIG_NLS_ISO8859_1=m
-+CONFIG_NLS_UTF8=m
-+CONFIG_DEBUG_SHIRQ=y
-+CONFIG_DETECT_HUNG_TASK=y
-+CONFIG_DEBUG_INFO=y
-+# CONFIG_FTRACE is not set
-+CONFIG_DEBUG_MMRS=y
-+CONFIG_DEBUG_HWERR=y
-+CONFIG_EXACT_HWERR=y
-+CONFIG_DEBUG_DOUBLEFAULT=y
-+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE=y
-+CONFIG_EARLY_PRINTK=y
-+CONFIG_CPLB_INFO=y
-+CONFIG_BFIN_PSEUDODBG_INSNS=y
-+CONFIG_CRYPTO=y
-+# CONFIG_CRYPTO_ANSI_CPRNG is not set
-diff --git a/arch/blackfin/configs/BF527-EZKIT_defconfig b/arch/blackfin/configs/BF527-EZKIT_defconfig
-new file mode 100644
-index 000000000000..bf8df3e6cf02
---- /dev/null
-+++ b/arch/blackfin/configs/BF527-EZKIT_defconfig
-@@ -0,0 +1,181 @@
-+CONFIG_EXPERIMENTAL=y
-+CONFIG_SYSVIPC=y
-+CONFIG_IKCONFIG=y
-+CONFIG_IKCONFIG_PROC=y
-+CONFIG_LOG_BUF_SHIFT=14
-+CONFIG_BLK_DEV_INITRD=y
-+CONFIG_EXPERT=y
-+# CONFIG_ELF_CORE is not set
-+# CONFIG_FUTEX is not set
-+# CONFIG_SIGNALFD is not set
-+# CONFIG_TIMERFD is not set
-+# CONFIG_EVENTFD is not set
-+# CONFIG_AIO is not set
-+CONFIG_SLAB=y
-+CONFIG_MMAP_ALLOW_UNINITIALIZED=y
-+CONFIG_MODULES=y
-+CONFIG_MODULE_UNLOAD=y
-+# CONFIG_LBDAF is not set
-+# CONFIG_BLK_DEV_BSG is not set
-+# CONFIG_IOSCHED_DEADLINE is not set
-+# CONFIG_IOSCHED_CFQ is not set
-+CONFIG_PREEMPT=y
-+CONFIG_BF527=y
-+CONFIG_BF_REV_0_1=y
-+CONFIG_IRQ_USB_INT0=11
-+CONFIG_IRQ_USB_INT1=11
-+CONFIG_IRQ_USB_INT2=11
-+CONFIG_IRQ_USB_DMA=11
-+# CONFIG_CYCLES_CLOCKSOURCE is not set
-+# CONFIG_SCHEDULE_L1 is not set
-+# CONFIG_MEMSET_L1 is not set
-+# CONFIG_MEMCPY_L1 is not set
-+# CONFIG_SYS_BFIN_SPINLOCK_L1 is not set
-+CONFIG_NOMMU_INITIAL_TRIM_EXCESS=0
-+CONFIG_C_CDPRIO=y
-+CONFIG_BANK_3=0x99B2
-+CONFIG_BINFMT_FLAT=y
-+CONFIG_BINFMT_ZFLAT=y
-+CONFIG_NET=y
-+CONFIG_PACKET=y
-+CONFIG_UNIX=y
-+CONFIG_INET=y
-+CONFIG_IP_PNP=y
-+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-+# CONFIG_INET_XFRM_MODE_BEET is not set
-+# CONFIG_INET_LRO is not set
-+# CONFIG_INET_DIAG is not set
-+# CONFIG_IPV6 is not set
-+CONFIG_IRDA=m
-+CONFIG_IRLAN=m
-+CONFIG_IRCOMM=m
-+CONFIG_IRTTY_SIR=m
-+CONFIG_BFIN_SIR=m
-+CONFIG_BFIN_SIR0=y
-+# CONFIG_WIRELESS is not set
-+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-+# CONFIG_FW_LOADER is not set
-+CONFIG_MTD=y
-+CONFIG_MTD_BLOCK=y
-+CONFIG_MTD_JEDECPROBE=m
-+CONFIG_MTD_RAM=y
-+CONFIG_MTD_ROM=m
-+CONFIG_MTD_COMPLEX_MAPPINGS=y
-+CONFIG_MTD_M25P80=y
-+CONFIG_MTD_NAND=m
-+CONFIG_MTD_SPI_NOR=y
-+CONFIG_BLK_DEV_RAM=y
-+CONFIG_SCSI=y
-+# CONFIG_SCSI_PROC_FS is not set
-+CONFIG_BLK_DEV_SD=y
-+CONFIG_BLK_DEV_SR=m
-+# CONFIG_SCSI_LOWLEVEL is not set
-+CONFIG_NETDEVICES=y
-+CONFIG_NET_BFIN=y
-+CONFIG_BFIN_MAC=y
-+# CONFIG_NET_VENDOR_BROADCOM is not set
-+# CONFIG_NET_VENDOR_CHELSIO is not set
-+# CONFIG_NET_VENDOR_INTEL is not set
-+# CONFIG_NET_VENDOR_MARVELL is not set
-+# CONFIG_NET_VENDOR_MICREL is not set
-+# CONFIG_NET_VENDOR_MICROCHIP is not set
-+# CONFIG_NET_VENDOR_NATSEMI is not set
-+# CONFIG_NET_VENDOR_SEEQ is not set
-+# CONFIG_NET_VENDOR_SMSC is not set
-+# CONFIG_NET_VENDOR_STMICRO is not set
-+# CONFIG_WLAN is not set
-+CONFIG_INPUT_FF_MEMLESS=m
-+# CONFIG_INPUT_MOUSEDEV is not set
-+# CONFIG_INPUT_KEYBOARD is not set
-+# CONFIG_INPUT_MOUSE is not set
-+CONFIG_INPUT_MISC=y
-+# CONFIG_SERIO is not set
-+# CONFIG_LEGACY_PTYS is not set
-+CONFIG_BFIN_JTAG_COMM=m
-+# CONFIG_DEVKMEM is not set
-+CONFIG_SERIAL_BFIN=y
-+CONFIG_SERIAL_BFIN_CONSOLE=y
-+CONFIG_SERIAL_BFIN_UART1=y
-+# CONFIG_HW_RANDOM is not set
-+CONFIG_I2C=y
-+CONFIG_I2C_CHARDEV=m
-+CONFIG_I2C_BLACKFIN_TWI=y
-+CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ=100
-+CONFIG_SPI=y
-+CONFIG_SPI_BFIN5XX=y
-+CONFIG_GPIOLIB=y
-+CONFIG_GPIO_SYSFS=y
-+# CONFIG_HWMON is not set
-+CONFIG_WATCHDOG=y
-+CONFIG_BFIN_WDT=y
-+CONFIG_FB=y
-+CONFIG_FB_BFIN_T350MCQB=y
-+CONFIG_BACKLIGHT_LCD_SUPPORT=y
-+CONFIG_LCD_LTV350QV=m
-+CONFIG_FRAMEBUFFER_CONSOLE=y
-+CONFIG_LOGO=y
-+# CONFIG_LOGO_LINUX_MONO is not set
-+# CONFIG_LOGO_LINUX_VGA16 is not set
-+# CONFIG_LOGO_LINUX_CLUT224 is not set
-+# CONFIG_LOGO_BLACKFIN_VGA16 is not set
-+CONFIG_SOUND=y
-+CONFIG_SND=y
-+CONFIG_SND_SOC=y
-+CONFIG_SND_BF5XX_I2S=y
-+CONFIG_SND_BF5XX_SOC_SSM2602=y
-+CONFIG_HID_A4TECH=y
-+CONFIG_HID_APPLE=y
-+CONFIG_HID_BELKIN=y
-+CONFIG_HID_CHERRY=y
-+CONFIG_HID_CHICONY=y
-+CONFIG_HID_CYPRESS=y
-+CONFIG_HID_EZKEY=y
-+CONFIG_HID_GYRATION=y
-+CONFIG_HID_LOGITECH=y
-+CONFIG_HID_MICROSOFT=y
-+CONFIG_HID_MONTEREY=y
-+CONFIG_HID_PANTHERLORD=y
-+CONFIG_HID_PETALYNX=y
-+CONFIG_HID_SAMSUNG=y
-+CONFIG_HID_SONY=y
-+CONFIG_HID_SUNPLUS=y
-+CONFIG_USB=y
-+# CONFIG_USB_DEVICE_CLASS is not set
-+CONFIG_USB_OTG_BLACKLIST_HUB=y
-+CONFIG_USB_MON=y
-+CONFIG_USB_MUSB_HDRC=y
-+CONFIG_MUSB_PIO_ONLY=y
-+CONFIG_USB_MUSB_BLACKFIN=y
-+CONFIG_MUSB_PIO_ONLY=y
-+CONFIG_USB_STORAGE=y
-+CONFIG_USB_GADGET=y
-+CONFIG_RTC_CLASS=y
-+CONFIG_RTC_DRV_BFIN=y
-+CONFIG_EXT2_FS=m
-+# CONFIG_DNOTIFY is not set
-+CONFIG_ISO9660_FS=m
-+CONFIG_JOLIET=y
-+CONFIG_UDF_FS=m
-+CONFIG_VFAT_FS=m
-+CONFIG_JFFS2_FS=m
-+CONFIG_NFS_FS=m
-+CONFIG_NFS_V3=y
-+CONFIG_NLS_CODEPAGE_437=m
-+CONFIG_NLS_CODEPAGE_936=m
-+CONFIG_NLS_ISO8859_1=m
-+CONFIG_NLS_UTF8=m
-+CONFIG_DEBUG_SHIRQ=y
-+CONFIG_DETECT_HUNG_TASK=y
-+CONFIG_DEBUG_INFO=y
-+# CONFIG_FTRACE is not set
-+CONFIG_DEBUG_MMRS=y
-+CONFIG_DEBUG_HWERR=y
-+CONFIG_EXACT_HWERR=y
-+CONFIG_DEBUG_DOUBLEFAULT=y
-+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE=y
-+CONFIG_EARLY_PRINTK=y
-+CONFIG_CPLB_INFO=y
-+CONFIG_BFIN_PSEUDODBG_INSNS=y
-+CONFIG_CRYPTO=y
-+# CONFIG_CRYPTO_ANSI_CPRNG is not set
-diff --git a/arch/blackfin/configs/BF527-TLL6527M_defconfig b/arch/blackfin/configs/BF527-TLL6527M_defconfig
-new file mode 100644
-index 000000000000..0220b3b15c53
---- /dev/null
-+++ b/arch/blackfin/configs/BF527-TLL6527M_defconfig
-@@ -0,0 +1,178 @@
-+CONFIG_EXPERIMENTAL=y
-+CONFIG_LOCALVERSION="DEV_0-1_pre2010"
-+CONFIG_SYSVIPC=y
-+CONFIG_IKCONFIG=y
-+CONFIG_IKCONFIG_PROC=y
-+CONFIG_LOG_BUF_SHIFT=14
-+CONFIG_BLK_DEV_INITRD=y
-+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-+CONFIG_EXPERT=y
-+# CONFIG_SYSCTL_SYSCALL is not set
-+# CONFIG_ELF_CORE is not set
-+# CONFIG_FUTEX is not set
-+# CONFIG_SIGNALFD is not set
-+# CONFIG_TIMERFD is not set
-+# CONFIG_EVENTFD is not set
-+# CONFIG_AIO is not set
-+CONFIG_SLAB=y
-+CONFIG_MMAP_ALLOW_UNINITIALIZED=y
-+CONFIG_MODULES=y
-+CONFIG_MODULE_UNLOAD=y
-+# CONFIG_LBDAF is not set
-+# CONFIG_BLK_DEV_BSG is not set
-+# CONFIG_IOSCHED_DEADLINE is not set
-+CONFIG_PREEMPT=y
-+CONFIG_BF527=y
-+CONFIG_BF_REV_0_2=y
-+CONFIG_BFIN527_TLL6527M=y
-+CONFIG_BF527_UART1_PORTG=y
-+CONFIG_IRQ_USB_INT0=11
-+CONFIG_IRQ_USB_INT1=11
-+CONFIG_IRQ_USB_INT2=11
-+CONFIG_IRQ_USB_DMA=11
-+CONFIG_BOOT_LOAD=0x400000
-+# CONFIG_CYCLES_CLOCKSOURCE is not set
-+# CONFIG_SCHEDULE_L1 is not set
-+# CONFIG_MEMSET_L1 is not set
-+# CONFIG_MEMCPY_L1 is not set
-+# CONFIG_SYS_BFIN_SPINLOCK_L1 is not set
-+CONFIG_NOMMU_INITIAL_TRIM_EXCESS=0
-+CONFIG_BFIN_GPTIMERS=y
-+CONFIG_DMA_UNCACHED_2M=y
-+CONFIG_C_CDPRIO=y
-+CONFIG_BANK_0=0xFFC2
-+CONFIG_BANK_1=0xFFC2
-+CONFIG_BANK_2=0xFFC2
-+CONFIG_BANK_3=0xFFC2
-+CONFIG_BINFMT_FLAT=y
-+CONFIG_BINFMT_ZFLAT=y
-+CONFIG_NET=y
-+CONFIG_PACKET=y
-+CONFIG_UNIX=y
-+CONFIG_INET=y
-+CONFIG_IP_PNP=y
-+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-+# CONFIG_INET_XFRM_MODE_BEET is not set
-+# CONFIG_INET_LRO is not set
-+# CONFIG_INET_DIAG is not set
-+# CONFIG_IPV6 is not set
-+CONFIG_IRDA=m
-+CONFIG_IRLAN=m
-+CONFIG_IRCOMM=m
-+CONFIG_IRTTY_SIR=m
-+CONFIG_BFIN_SIR=m
-+CONFIG_BFIN_SIR0=y
-+# CONFIG_WIRELESS is not set
-+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-+# CONFIG_FW_LOADER is not set
-+CONFIG_MTD=y
-+CONFIG_MTD_BLOCK=y
-+CONFIG_MTD_CFI=y
-+CONFIG_MTD_CFI_INTELEXT=y
-+CONFIG_MTD_RAM=y
-+CONFIG_MTD_ROM=y
-+CONFIG_MTD_COMPLEX_MAPPINGS=y
-+CONFIG_MTD_GPIO_ADDR=y
-+CONFIG_BLK_DEV_RAM=y
-+CONFIG_SCSI=y
-+# CONFIG_SCSI_PROC_FS is not set
-+CONFIG_BLK_DEV_SD=y
-+CONFIG_BLK_DEV_SR=m
-+# CONFIG_SCSI_LOWLEVEL is not set
-+CONFIG_NETDEVICES=y
-+CONFIG_NET_ETHERNET=y
-+CONFIG_BFIN_MAC=y
-+# CONFIG_NETDEV_1000 is not set
-+# CONFIG_NETDEV_10000 is not set
-+# CONFIG_WLAN is not set
-+# CONFIG_INPUT_MOUSEDEV is not set
-+CONFIG_INPUT_EVDEV=y
-+# CONFIG_INPUT_KEYBOARD is not set
-+# CONFIG_INPUT_MOUSE is not set
-+CONFIG_INPUT_TOUCHSCREEN=y
-+CONFIG_TOUCHSCREEN_AD7879=m
-+CONFIG_INPUT_MISC=y
-+CONFIG_INPUT_AD714X=y
-+CONFIG_INPUT_ADXL34X=y
-+# CONFIG_SERIO is not set
-+CONFIG_BFIN_PPI=m
-+CONFIG_BFIN_SIMPLE_TIMER=m
-+CONFIG_BFIN_SPORT=m
-+# CONFIG_CONSOLE_TRANSLATIONS is not set
-+# CONFIG_DEVKMEM is not set
-+CONFIG_BFIN_JTAG_COMM=m
-+CONFIG_SERIAL_BFIN=y
-+CONFIG_SERIAL_BFIN_CONSOLE=y
-+CONFIG_SERIAL_BFIN_UART1=y
-+# CONFIG_LEGACY_PTYS is not set
-+# CONFIG_HW_RANDOM is not set
-+CONFIG_I2C_CHARDEV=y
-+# CONFIG_I2C_HELPER_AUTO is not set
-+CONFIG_I2C_SMBUS=y
-+CONFIG_I2C_BLACKFIN_TWI=y
-+CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ=100
-+CONFIG_GPIOLIB=y
-+CONFIG_GPIO_SYSFS=y
-+# CONFIG_HWMON is not set
-+CONFIG_WATCHDOG=y
-+CONFIG_BFIN_WDT=y
-+CONFIG_MEDIA_SUPPORT=y
-+CONFIG_VIDEO_DEV=y
-+# CONFIG_MEDIA_TUNER_CUSTOMISE is not set
-+CONFIG_VIDEO_HELPER_CHIPS_AUTO=y
-+CONFIG_VIDEO_BLACKFIN_CAM=m
-+CONFIG_OV9655=y
-+CONFIG_FB=y
-+CONFIG_BACKLIGHT_LCD_SUPPORT=y
-+CONFIG_FRAMEBUFFER_CONSOLE=y
-+CONFIG_FONTS=y
-+CONFIG_FONT_6x11=y
-+CONFIG_LOGO=y
-+# CONFIG_LOGO_LINUX_MONO is not set
-+# CONFIG_LOGO_LINUX_VGA16 is not set
-+# CONFIG_LOGO_LINUX_CLUT224 is not set
-+# CONFIG_LOGO_BLACKFIN_VGA16 is not set
-+CONFIG_SOUND=y
-+CONFIG_SND=y
-+CONFIG_SND_MIXER_OSS=y
-+CONFIG_SND_PCM_OSS=y
-+CONFIG_SND_SOC=y
-+CONFIG_SND_BF5XX_I2S=y
-+CONFIG_SND_BF5XX_SOC_SSM2602=y
-+# CONFIG_HID_SUPPORT is not set
-+# CONFIG_USB_SUPPORT is not set
-+CONFIG_MMC=m
-+CONFIG_RTC_CLASS=y
-+CONFIG_RTC_DRV_BFIN=y
-+CONFIG_EXT2_FS=y
-+# CONFIG_DNOTIFY is not set
-+CONFIG_ISO9660_FS=m
-+CONFIG_JOLIET=y
-+CONFIG_UDF_FS=m
-+CONFIG_MSDOS_FS=y
-+CONFIG_VFAT_FS=y
-+CONFIG_JFFS2_FS=y
-+CONFIG_NFS_FS=m
-+CONFIG_NFS_V3=y
-+# CONFIG_RPCSEC_GSS_KRB5 is not set
-+CONFIG_NLS_CODEPAGE_437=m
-+CONFIG_NLS_CODEPAGE_936=m
-+CONFIG_NLS_ISO8859_1=m
-+CONFIG_NLS_UTF8=m
-+CONFIG_DEBUG_KERNEL=y
-+CONFIG_DEBUG_SHIRQ=y
-+CONFIG_DETECT_HUNG_TASK=y
-+CONFIG_DEBUG_INFO=y
-+# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-+# CONFIG_FTRACE is not set
-+CONFIG_DEBUG_MMRS=y
-+CONFIG_DEBUG_HWERR=y
-+CONFIG_EXACT_HWERR=y
-+CONFIG_DEBUG_DOUBLEFAULT=y
-+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE=y
-+CONFIG_EARLY_PRINTK=y
-+CONFIG_CPLB_INFO=y
-+CONFIG_CRYPTO=y
-+# CONFIG_CRYPTO_ANSI_CPRNG is not set
-+CONFIG_CRC7=m
-diff --git a/arch/blackfin/configs/BF533-EZKIT_defconfig b/arch/blackfin/configs/BF533-EZKIT_defconfig
-new file mode 100644
-index 000000000000..6023e3fd2c48
---- /dev/null
-+++ b/arch/blackfin/configs/BF533-EZKIT_defconfig
-@@ -0,0 +1,114 @@
-+CONFIG_EXPERIMENTAL=y
-+CONFIG_SYSVIPC=y
-+CONFIG_IKCONFIG=y
-+CONFIG_IKCONFIG_PROC=y
-+CONFIG_LOG_BUF_SHIFT=14
-+CONFIG_BLK_DEV_INITRD=y
-+CONFIG_EXPERT=y
-+# CONFIG_ELF_CORE is not set
-+# CONFIG_FUTEX is not set
-+# CONFIG_SIGNALFD is not set
-+# CONFIG_TIMERFD is not set
-+# CONFIG_EVENTFD is not set
-+# CONFIG_AIO is not set
-+CONFIG_SLAB=y
-+CONFIG_MMAP_ALLOW_UNINITIALIZED=y
-+CONFIG_MODULES=y
-+CONFIG_MODULE_UNLOAD=y
-+# CONFIG_LBDAF is not set
-+# CONFIG_BLK_DEV_BSG is not set
-+# CONFIG_IOSCHED_DEADLINE is not set
-+# CONFIG_IOSCHED_CFQ is not set
-+CONFIG_PREEMPT=y
-+CONFIG_BFIN533_EZKIT=y
-+CONFIG_TIMER0=11
-+CONFIG_CLKIN_HZ=27000000
-+CONFIG_HIGH_RES_TIMERS=y
-+CONFIG_NOMMU_INITIAL_TRIM_EXCESS=0
-+CONFIG_BFIN_GPTIMERS=m
-+CONFIG_C_CDPRIO=y
-+CONFIG_BANK_3=0xAAC2
-+CONFIG_BINFMT_FLAT=y
-+CONFIG_BINFMT_ZFLAT=y
-+CONFIG_NET=y
-+CONFIG_PACKET=y
-+CONFIG_UNIX=y
-+CONFIG_INET=y
-+CONFIG_IP_PNP=y
-+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-+# CONFIG_INET_XFRM_MODE_BEET is not set
-+# CONFIG_INET_LRO is not set
-+# CONFIG_INET_DIAG is not set
-+# CONFIG_IPV6 is not set
-+CONFIG_IRDA=m
-+CONFIG_IRLAN=m
-+CONFIG_IRCOMM=m
-+CONFIG_IRDA_CACHE_LAST_LSAP=y
-+CONFIG_IRTTY_SIR=m
-+# CONFIG_WIRELESS is not set
-+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-+# CONFIG_FW_LOADER is not set
-+CONFIG_MTD=y
-+CONFIG_MTD_BLOCK=y
-+CONFIG_MTD_JEDECPROBE=y
-+CONFIG_MTD_CFI_AMDSTD=y
-+CONFIG_MTD_RAM=y
-+CONFIG_MTD_ROM=y
-+CONFIG_MTD_COMPLEX_MAPPINGS=y
-+CONFIG_MTD_PHYSMAP=y
-+CONFIG_MTD_PLATRAM=y
-+CONFIG_BLK_DEV_RAM=y
-+CONFIG_NETDEVICES=y
-+# CONFIG_NET_VENDOR_BROADCOM is not set
-+# CONFIG_NET_VENDOR_CHELSIO is not set
-+# CONFIG_NET_VENDOR_INTEL is not set
-+# CONFIG_NET_VENDOR_MARVELL is not set
-+# CONFIG_NET_VENDOR_MICREL is not set
-+# CONFIG_NET_VENDOR_MICROCHIP is not set
-+# CONFIG_NET_VENDOR_NATSEMI is not set
-+# CONFIG_NET_VENDOR_SEEQ is not set
-+CONFIG_SMC91X=y
-+# CONFIG_NET_VENDOR_STMICRO is not set
-+# CONFIG_WLAN is not set
-+CONFIG_INPUT=m
-+# CONFIG_INPUT_MOUSEDEV is not set
-+CONFIG_INPUT_EVDEV=m
-+# CONFIG_INPUT_KEYBOARD is not set
-+# CONFIG_INPUT_MOUSE is not set
-+# CONFIG_SERIO is not set
-+# CONFIG_VT is not set
-+# CONFIG_LEGACY_PTYS is not set
-+CONFIG_BFIN_JTAG_COMM=m
-+# CONFIG_DEVKMEM is not set
-+CONFIG_SERIAL_BFIN=y
-+CONFIG_SERIAL_BFIN_CONSOLE=y
-+# CONFIG_HW_RANDOM is not set
-+CONFIG_SPI=y
-+CONFIG_SPI_BFIN5XX=y
-+CONFIG_GPIOLIB=y
-+CONFIG_GPIO_SYSFS=y
-+# CONFIG_HWMON is not set
-+CONFIG_WATCHDOG=y
-+CONFIG_BFIN_WDT=y
-+# CONFIG_USB_SUPPORT is not set
-+CONFIG_RTC_CLASS=y
-+CONFIG_RTC_DRV_BFIN=y
-+# CONFIG_DNOTIFY is not set
-+CONFIG_JFFS2_FS=m
-+CONFIG_NFS_FS=m
-+CONFIG_NFS_V3=y
-+CONFIG_DEBUG_SHIRQ=y
-+CONFIG_DETECT_HUNG_TASK=y
-+CONFIG_DEBUG_INFO=y
-+# CONFIG_FTRACE is not set
-+CONFIG_DEBUG_MMRS=y
-+CONFIG_DEBUG_HWERR=y
-+CONFIG_EXACT_HWERR=y
-+CONFIG_DEBUG_DOUBLEFAULT=y
-+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE=y
-+CONFIG_EARLY_PRINTK=y
-+CONFIG_CPLB_INFO=y
-+CONFIG_BFIN_PSEUDODBG_INSNS=y
-+CONFIG_CRYPTO=y
-+# CONFIG_CRYPTO_ANSI_CPRNG is not set
-diff --git a/arch/blackfin/configs/BF533-STAMP_defconfig b/arch/blackfin/configs/BF533-STAMP_defconfig
-new file mode 100644
-index 000000000000..f5cd0f18b711
---- /dev/null
-+++ b/arch/blackfin/configs/BF533-STAMP_defconfig
-@@ -0,0 +1,124 @@
-+CONFIG_EXPERIMENTAL=y
-+CONFIG_SYSVIPC=y
-+CONFIG_IKCONFIG=y
-+CONFIG_IKCONFIG_PROC=y
-+CONFIG_LOG_BUF_SHIFT=14
-+CONFIG_BLK_DEV_INITRD=y
-+CONFIG_EXPERT=y
-+# CONFIG_ELF_CORE is not set
-+# CONFIG_FUTEX is not set
-+# CONFIG_SIGNALFD is not set
-+# CONFIG_TIMERFD is not set
-+# CONFIG_EVENTFD is not set
-+# CONFIG_AIO is not set
-+CONFIG_SLAB=y
-+CONFIG_MMAP_ALLOW_UNINITIALIZED=y
-+CONFIG_MODULES=y
-+CONFIG_MODULE_UNLOAD=y
-+# CONFIG_LBDAF is not set
-+# CONFIG_BLK_DEV_BSG is not set
-+# CONFIG_IOSCHED_DEADLINE is not set
-+# CONFIG_IOSCHED_CFQ is not set
-+CONFIG_PREEMPT=y
-+CONFIG_TIMER0=11
-+CONFIG_HIGH_RES_TIMERS=y
-+CONFIG_NOMMU_INITIAL_TRIM_EXCESS=0
-+CONFIG_BFIN_GPTIMERS=m
-+CONFIG_C_CDPRIO=y
-+CONFIG_BANK_3=0xAAC2
-+CONFIG_BINFMT_FLAT=y
-+CONFIG_BINFMT_ZFLAT=y
-+CONFIG_NET=y
-+CONFIG_PACKET=y
-+CONFIG_UNIX=y
-+CONFIG_INET=y
-+CONFIG_IP_PNP=y
-+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-+# CONFIG_INET_XFRM_MODE_BEET is not set
-+# CONFIG_INET_LRO is not set
-+# CONFIG_INET_DIAG is not set
-+# CONFIG_IPV6 is not set
-+CONFIG_IRDA=m
-+CONFIG_IRLAN=m
-+CONFIG_IRCOMM=m
-+CONFIG_IRDA_CACHE_LAST_LSAP=y
-+CONFIG_IRTTY_SIR=m
-+CONFIG_BFIN_SIR=m
-+# CONFIG_WIRELESS is not set
-+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-+# CONFIG_FW_LOADER is not set
-+CONFIG_MTD=y
-+CONFIG_MTD_CMDLINE_PARTS=y
-+CONFIG_MTD_BLOCK=y
-+CONFIG_MTD_CFI=m
-+CONFIG_MTD_CFI_AMDSTD=m
-+CONFIG_MTD_RAM=y
-+CONFIG_MTD_ROM=m
-+CONFIG_MTD_COMPLEX_MAPPINGS=y
-+CONFIG_BLK_DEV_RAM=y
-+CONFIG_NETDEVICES=y
-+# CONFIG_NET_VENDOR_BROADCOM is not set
-+# CONFIG_NET_VENDOR_CHELSIO is not set
-+# CONFIG_NET_VENDOR_INTEL is not set
-+# CONFIG_NET_VENDOR_MARVELL is not set
-+# CONFIG_NET_VENDOR_MICREL is not set
-+# CONFIG_NET_VENDOR_MICROCHIP is not set
-+# CONFIG_NET_VENDOR_NATSEMI is not set
-+# CONFIG_NET_VENDOR_SEEQ is not set
-+CONFIG_SMC91X=y
-+# CONFIG_NET_VENDOR_STMICRO is not set
-+# CONFIG_WLAN is not set
-+# CONFIG_INPUT_MOUSEDEV is not set
-+CONFIG_INPUT_EVDEV=m
-+# CONFIG_INPUT_KEYBOARD is not set
-+# CONFIG_INPUT_MOUSE is not set
-+CONFIG_INPUT_MISC=y
-+# CONFIG_SERIO is not set
-+# CONFIG_VT is not set
-+# CONFIG_LEGACY_PTYS is not set
-+CONFIG_BFIN_JTAG_COMM=m
-+# CONFIG_DEVKMEM is not set
-+CONFIG_SERIAL_BFIN=y
-+CONFIG_SERIAL_BFIN_CONSOLE=y
-+# CONFIG_HW_RANDOM is not set
-+CONFIG_I2C=m
-+CONFIG_I2C_CHARDEV=m
-+CONFIG_I2C_GPIO=m
-+CONFIG_SPI=y
-+CONFIG_SPI_BFIN5XX=y
-+CONFIG_GPIOLIB=y
-+CONFIG_GPIO_SYSFS=y
-+# CONFIG_HWMON is not set
-+CONFIG_WATCHDOG=y
-+CONFIG_BFIN_WDT=y
-+CONFIG_FB=m
-+CONFIG_FIRMWARE_EDID=y
-+CONFIG_SOUND=m
-+CONFIG_SND=m
-+CONFIG_SND_MIXER_OSS=m
-+CONFIG_SND_PCM_OSS=m
-+CONFIG_SND_SOC=m
-+CONFIG_SND_BF5XX_I2S=m
-+CONFIG_SND_BF5XX_SOC_AD73311=m
-+# CONFIG_USB_SUPPORT is not set
-+CONFIG_RTC_CLASS=y
-+CONFIG_RTC_DRV_BFIN=y
-+# CONFIG_DNOTIFY is not set
-+CONFIG_JFFS2_FS=m
-+CONFIG_NFS_FS=m
-+CONFIG_NFS_V3=y
-+CONFIG_DEBUG_SHIRQ=y
-+CONFIG_DETECT_HUNG_TASK=y
-+CONFIG_DEBUG_INFO=y
-+# CONFIG_FTRACE is not set
-+CONFIG_DEBUG_MMRS=y
-+CONFIG_DEBUG_HWERR=y
-+CONFIG_EXACT_HWERR=y
-+CONFIG_DEBUG_DOUBLEFAULT=y
-+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE=y
-+CONFIG_EARLY_PRINTK=y
-+CONFIG_CPLB_INFO=y
-+CONFIG_BFIN_PSEUDODBG_INSNS=y
-+CONFIG_CRYPTO=y
-+# CONFIG_CRYPTO_ANSI_CPRNG is not set
-diff --git a/arch/blackfin/configs/BF537-STAMP_defconfig b/arch/blackfin/configs/BF537-STAMP_defconfig
-new file mode 100644
-index 000000000000..48085fde7f9e
---- /dev/null
-+++ b/arch/blackfin/configs/BF537-STAMP_defconfig
-@@ -0,0 +1,136 @@
-+CONFIG_EXPERIMENTAL=y
-+CONFIG_SYSVIPC=y
-+CONFIG_IKCONFIG=y
-+CONFIG_IKCONFIG_PROC=y
-+CONFIG_LOG_BUF_SHIFT=14
-+CONFIG_BLK_DEV_INITRD=y
-+CONFIG_EXPERT=y
-+# CONFIG_ELF_CORE is not set
-+# CONFIG_FUTEX is not set
-+# CONFIG_SIGNALFD is not set
-+# CONFIG_TIMERFD is not set
-+# CONFIG_EVENTFD is not set
-+# CONFIG_AIO is not set
-+CONFIG_SLAB=y
-+CONFIG_MMAP_ALLOW_UNINITIALIZED=y
-+CONFIG_MODULES=y
-+CONFIG_MODULE_UNLOAD=y
-+# CONFIG_LBDAF is not set
-+# CONFIG_BLK_DEV_BSG is not set
-+# CONFIG_IOSCHED_DEADLINE is not set
-+# CONFIG_IOSCHED_CFQ is not set
-+CONFIG_PREEMPT=y
-+CONFIG_BF537=y
-+CONFIG_HIGH_RES_TIMERS=y
-+CONFIG_NOMMU_INITIAL_TRIM_EXCESS=0
-+CONFIG_BFIN_GPTIMERS=m
-+CONFIG_C_CDPRIO=y
-+CONFIG_BANK_3=0x99B2
-+CONFIG_BINFMT_FLAT=y
-+CONFIG_BINFMT_ZFLAT=y
-+CONFIG_NET=y
-+CONFIG_PACKET=y
-+CONFIG_UNIX=y
-+CONFIG_INET=y
-+CONFIG_IP_PNP=y
-+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-+# CONFIG_INET_XFRM_MODE_BEET is not set
-+# CONFIG_INET_LRO is not set
-+# CONFIG_INET_DIAG is not set
-+# CONFIG_IPV6 is not set
-+CONFIG_CAN=m
-+CONFIG_CAN_RAW=m
-+CONFIG_CAN_BCM=m
-+CONFIG_CAN_BFIN=m
-+CONFIG_IRDA=m
-+CONFIG_IRLAN=m
-+CONFIG_IRCOMM=m
-+CONFIG_IRDA_CACHE_LAST_LSAP=y
-+CONFIG_IRTTY_SIR=m
-+CONFIG_BFIN_SIR=m
-+CONFIG_BFIN_SIR1=y
-+# CONFIG_WIRELESS is not set
-+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-+# CONFIG_FW_LOADER is not set
-+CONFIG_MTD=y
-+CONFIG_MTD_CMDLINE_PARTS=y
-+CONFIG_MTD_BLOCK=y
-+CONFIG_MTD_CFI=m
-+CONFIG_MTD_CFI_AMDSTD=m
-+CONFIG_MTD_RAM=y
-+CONFIG_MTD_ROM=m
-+CONFIG_MTD_PHYSMAP=m
-+CONFIG_MTD_M25P80=y
-+CONFIG_MTD_SPI_NOR=y
-+CONFIG_BLK_DEV_RAM=y
-+CONFIG_NETDEVICES=y
-+CONFIG_NET_BFIN=y
-+CONFIG_BFIN_MAC=y
-+# CONFIG_NET_VENDOR_BROADCOM is not set
-+# CONFIG_NET_VENDOR_CHELSIO is not set
-+# CONFIG_NET_VENDOR_INTEL is not set
-+# CONFIG_NET_VENDOR_MARVELL is not set
-+# CONFIG_NET_VENDOR_MICREL is not set
-+# CONFIG_NET_VENDOR_MICROCHIP is not set
-+# CONFIG_NET_VENDOR_NATSEMI is not set
-+# CONFIG_NET_VENDOR_SEEQ is not set
-+# CONFIG_NET_VENDOR_SMSC is not set
-+# CONFIG_NET_VENDOR_STMICRO is not set
-+# CONFIG_WLAN is not set
-+# CONFIG_INPUT_MOUSEDEV is not set
-+CONFIG_INPUT_EVDEV=m
-+# CONFIG_INPUT_KEYBOARD is not set
-+# CONFIG_INPUT_MOUSE is not set
-+CONFIG_INPUT_MISC=y
-+# CONFIG_SERIO is not set
-+# CONFIG_VT is not set
-+# CONFIG_LEGACY_PTYS is not set
-+CONFIG_BFIN_JTAG_COMM=m
-+# CONFIG_DEVKMEM is not set
-+CONFIG_SERIAL_BFIN=y
-+CONFIG_SERIAL_BFIN_CONSOLE=y
-+CONFIG_SERIAL_BFIN_UART0=y
-+# CONFIG_HW_RANDOM is not set
-+CONFIG_I2C=m
-+CONFIG_I2C_CHARDEV=m
-+CONFIG_I2C_BLACKFIN_TWI=m
-+CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ=100
-+CONFIG_SPI=y
-+CONFIG_SPI_BFIN5XX=y
-+CONFIG_GPIOLIB=y
-+CONFIG_GPIO_SYSFS=y
-+# CONFIG_HWMON is not set
-+CONFIG_WATCHDOG=y
-+CONFIG_BFIN_WDT=y
-+CONFIG_FB=m
-+CONFIG_FIRMWARE_EDID=y
-+CONFIG_BACKLIGHT_LCD_SUPPORT=y
-+CONFIG_SOUND=m
-+CONFIG_SND=m
-+CONFIG_SND_MIXER_OSS=m
-+CONFIG_SND_PCM_OSS=m
-+CONFIG_SND_SOC=m
-+CONFIG_SND_BF5XX_I2S=m
-+CONFIG_SND_BF5XX_SOC_AD73311=m
-+# CONFIG_USB_SUPPORT is not set
-+CONFIG_RTC_CLASS=y
-+CONFIG_RTC_DRV_BFIN=y
-+# CONFIG_DNOTIFY is not set
-+CONFIG_JFFS2_FS=m
-+CONFIG_NFS_FS=m
-+CONFIG_NFS_V3=y
-+CONFIG_DEBUG_SHIRQ=y
-+CONFIG_DETECT_HUNG_TASK=y
-+CONFIG_DEBUG_INFO=y
-+# CONFIG_FTRACE is not set
-+CONFIG_DEBUG_MMRS=y
-+CONFIG_DEBUG_HWERR=y
-+CONFIG_EXACT_HWERR=y
-+CONFIG_DEBUG_DOUBLEFAULT=y
-+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE=y
-+CONFIG_EARLY_PRINTK=y
-+CONFIG_CPLB_INFO=y
-+CONFIG_BFIN_PSEUDODBG_INSNS=y
-+CONFIG_CRYPTO=y
-+# CONFIG_CRYPTO_ANSI_CPRNG is not set
-diff --git a/arch/blackfin/configs/BF538-EZKIT_defconfig b/arch/blackfin/configs/BF538-EZKIT_defconfig
-new file mode 100644
-index 000000000000..12deeaaef3cb
---- /dev/null
-+++ b/arch/blackfin/configs/BF538-EZKIT_defconfig
-@@ -0,0 +1,133 @@
-+CONFIG_EXPERIMENTAL=y
-+CONFIG_SYSVIPC=y
-+CONFIG_IKCONFIG=y
-+CONFIG_IKCONFIG_PROC=y
-+CONFIG_LOG_BUF_SHIFT=14
-+CONFIG_BLK_DEV_INITRD=y
-+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-+CONFIG_EXPERT=y
-+# CONFIG_SYSCTL_SYSCALL is not set
-+# CONFIG_ELF_CORE is not set
-+# CONFIG_FUTEX is not set
-+# CONFIG_SIGNALFD is not set
-+# CONFIG_TIMERFD is not set
-+# CONFIG_EVENTFD is not set
-+# CONFIG_AIO is not set
-+CONFIG_SLAB=y
-+CONFIG_MMAP_ALLOW_UNINITIALIZED=y
-+CONFIG_MODULES=y
-+CONFIG_MODULE_UNLOAD=y
-+# CONFIG_LBDAF is not set
-+# CONFIG_BLK_DEV_BSG is not set
-+# CONFIG_IOSCHED_DEADLINE is not set
-+# CONFIG_IOSCHED_CFQ is not set
-+CONFIG_PREEMPT=y
-+CONFIG_BF538=y
-+CONFIG_IRQ_TIMER0=12
-+CONFIG_IRQ_TIMER1=12
-+CONFIG_IRQ_TIMER2=12
-+CONFIG_HIGH_RES_TIMERS=y
-+CONFIG_NOMMU_INITIAL_TRIM_EXCESS=0
-+CONFIG_C_CDPRIO=y
-+CONFIG_BANK_3=0x99B2
-+CONFIG_BINFMT_FLAT=y
-+CONFIG_BINFMT_ZFLAT=y
-+CONFIG_PM=y
-+CONFIG_NET=y
-+CONFIG_PACKET=y
-+CONFIG_UNIX=y
-+CONFIG_INET=y
-+CONFIG_IP_PNP=y
-+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-+# CONFIG_INET_XFRM_MODE_BEET is not set
-+# CONFIG_INET_LRO is not set
-+# CONFIG_INET_DIAG is not set
-+# CONFIG_IPV6 is not set
-+CONFIG_CAN=m
-+CONFIG_CAN_RAW=m
-+CONFIG_CAN_BCM=m
-+CONFIG_CAN_DEV=m
-+CONFIG_CAN_BFIN=m
-+CONFIG_IRDA=m
-+CONFIG_IRLAN=m
-+CONFIG_IRCOMM=m
-+CONFIG_IRDA_CACHE_LAST_LSAP=y
-+CONFIG_IRTTY_SIR=m
-+CONFIG_BFIN_SIR=m
-+# CONFIG_WIRELESS is not set
-+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-+# CONFIG_FW_LOADER is not set
-+CONFIG_MTD=y
-+CONFIG_MTD_CMDLINE_PARTS=y
-+CONFIG_MTD_BLOCK=y
-+CONFIG_MTD_CFI=m
-+CONFIG_MTD_CFI_AMDSTD=m
-+CONFIG_MTD_RAM=y
-+CONFIG_MTD_ROM=m
-+CONFIG_MTD_PHYSMAP=m
-+CONFIG_MTD_NAND=m
-+CONFIG_BLK_DEV_RAM=y
-+CONFIG_NETDEVICES=y
-+CONFIG_PHYLIB=y
-+CONFIG_SMSC_PHY=y
-+CONFIG_NET_ETHERNET=y
-+CONFIG_SMC91X=y
-+# CONFIG_NETDEV_1000 is not set
-+# CONFIG_NETDEV_10000 is not set
-+# CONFIG_WLAN is not set
-+# CONFIG_INPUT_MOUSEDEV is not set
-+CONFIG_INPUT_EVDEV=m
-+# CONFIG_INPUT_KEYBOARD is not set
-+# CONFIG_INPUT_MOUSE is not set
-+CONFIG_INPUT_TOUCHSCREEN=y
-+CONFIG_TOUCHSCREEN_AD7879=y
-+CONFIG_TOUCHSCREEN_AD7879_SPI=y
-+CONFIG_INPUT_MISC=y
-+# CONFIG_SERIO is not set
-+# CONFIG_VT is not set
-+# CONFIG_DEVKMEM is not set
-+CONFIG_BFIN_JTAG_COMM=m
-+CONFIG_SERIAL_BFIN=y
-+CONFIG_SERIAL_BFIN_CONSOLE=y
-+CONFIG_SERIAL_BFIN_UART0=y
-+CONFIG_SERIAL_BFIN_UART1=y
-+CONFIG_SERIAL_BFIN_UART2=y
-+# CONFIG_LEGACY_PTYS is not set
-+# CONFIG_HW_RANDOM is not set
-+CONFIG_I2C=m
-+CONFIG_I2C_BLACKFIN_TWI=m
-+CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ=100
-+CONFIG_SPI=y
-+CONFIG_SPI_BFIN5XX=y
-+CONFIG_GPIOLIB=y
-+CONFIG_GPIO_SYSFS=y
-+# CONFIG_HWMON is not set
-+CONFIG_WATCHDOG=y
-+CONFIG_BFIN_WDT=y
-+CONFIG_FB=m
-+CONFIG_FB_BFIN_LQ035Q1=m
-+# CONFIG_USB_SUPPORT is not set
-+CONFIG_RTC_CLASS=y
-+CONFIG_RTC_DRV_BFIN=y
-+# CONFIG_DNOTIFY is not set
-+CONFIG_JFFS2_FS=m
-+CONFIG_NFS_FS=m
-+CONFIG_NFS_V3=y
-+CONFIG_SMB_FS=m
-+CONFIG_DEBUG_KERNEL=y
-+CONFIG_DEBUG_SHIRQ=y
-+CONFIG_DETECT_HUNG_TASK=y
-+CONFIG_DEBUG_INFO=y
-+# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-+# CONFIG_FTRACE is not set
-+CONFIG_DEBUG_MMRS=y
-+CONFIG_DEBUG_HWERR=y
-+CONFIG_EXACT_HWERR=y
-+CONFIG_DEBUG_DOUBLEFAULT=y
-+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE=y
-+CONFIG_EARLY_PRINTK=y
-+CONFIG_CPLB_INFO=y
-+CONFIG_BFIN_PSEUDODBG_INSNS=y
-+CONFIG_CRYPTO=y
-+# CONFIG_CRYPTO_ANSI_CPRNG is not set
-diff --git a/arch/blackfin/configs/BF548-EZKIT_defconfig b/arch/blackfin/configs/BF548-EZKIT_defconfig
-new file mode 100644
-index 000000000000..6a68ffc55b5a
---- /dev/null
-+++ b/arch/blackfin/configs/BF548-EZKIT_defconfig
-@@ -0,0 +1,207 @@
-+CONFIG_EXPERIMENTAL=y
-+CONFIG_SYSVIPC=y
-+CONFIG_IKCONFIG=y
-+CONFIG_IKCONFIG_PROC=y
-+CONFIG_LOG_BUF_SHIFT=14
-+CONFIG_BLK_DEV_INITRD=y
-+CONFIG_EXPERT=y
-+# CONFIG_ELF_CORE is not set
-+# CONFIG_FUTEX is not set
-+# CONFIG_SIGNALFD is not set
-+# CONFIG_TIMERFD is not set
-+# CONFIG_EVENTFD is not set
-+# CONFIG_AIO is not set
-+CONFIG_SLAB=y
-+CONFIG_MMAP_ALLOW_UNINITIALIZED=y
-+CONFIG_MODULES=y
-+CONFIG_MODULE_UNLOAD=y
-+# CONFIG_LBDAF is not set
-+# CONFIG_BLK_DEV_BSG is not set
-+# CONFIG_IOSCHED_DEADLINE is not set
-+# CONFIG_IOSCHED_CFQ is not set
-+CONFIG_PREEMPT=y
-+CONFIG_BF548_std=y
-+CONFIG_IRQ_TIMER0=11
-+# CONFIG_CYCLES_CLOCKSOURCE is not set
-+# CONFIG_SCHEDULE_L1 is not set
-+# CONFIG_MEMSET_L1 is not set
-+# CONFIG_MEMCPY_L1 is not set
-+# CONFIG_SYS_BFIN_SPINLOCK_L1 is not set
-+CONFIG_CACHELINE_ALIGNED_L1=y
-+CONFIG_NOMMU_INITIAL_TRIM_EXCESS=0
-+CONFIG_BFIN_GPTIMERS=m
-+CONFIG_DMA_UNCACHED_2M=y
-+CONFIG_BFIN_EXTMEM_WRITETHROUGH=y
-+CONFIG_BANK_3=0x99B2
-+CONFIG_EBIU_MBSCTLVAL=0x0
-+CONFIG_EBIU_MODEVAL=0x1
-+CONFIG_EBIU_FCTLVAL=0x6
-+CONFIG_BINFMT_FLAT=y
-+CONFIG_BINFMT_ZFLAT=y
-+CONFIG_NET=y
-+CONFIG_PACKET=y
-+CONFIG_UNIX=y
-+CONFIG_INET=y
-+CONFIG_IP_PNP=y
-+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-+# CONFIG_INET_XFRM_MODE_BEET is not set
-+# CONFIG_INET_LRO is not set
-+# CONFIG_INET_DIAG is not set
-+# CONFIG_IPV6 is not set
-+CONFIG_CAN=m
-+CONFIG_CAN_RAW=m
-+CONFIG_CAN_BCM=m
-+CONFIG_CAN_BFIN=m
-+CONFIG_IRDA=m
-+CONFIG_IRLAN=m
-+CONFIG_IRCOMM=m
-+CONFIG_IRTTY_SIR=m
-+CONFIG_BFIN_SIR=m
-+CONFIG_BFIN_SIR3=y
-+# CONFIG_WIRELESS is not set
-+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-+CONFIG_FW_LOADER=m
-+CONFIG_MTD=y
-+CONFIG_MTD_CMDLINE_PARTS=y
-+CONFIG_MTD_BLOCK=y
-+CONFIG_MTD_CFI=y
-+CONFIG_MTD_CFI_INTELEXT=y
-+CONFIG_MTD_RAM=y
-+CONFIG_MTD_COMPLEX_MAPPINGS=y
-+CONFIG_MTD_PHYSMAP=y
-+CONFIG_MTD_M25P80=y
-+CONFIG_MTD_NAND=y
-+CONFIG_MTD_NAND_BF5XX=y
-+# CONFIG_MTD_NAND_BF5XX_HWECC is not set
-+CONFIG_MTD_SPI_NOR=y
-+CONFIG_BLK_DEV_RAM=y
-+# CONFIG_SCSI_PROC_FS is not set
-+CONFIG_BLK_DEV_SD=y
-+CONFIG_BLK_DEV_SR=m
-+# CONFIG_SCSI_LOWLEVEL is not set
-+CONFIG_ATA=y
-+# CONFIG_SATA_PMP is not set
-+CONFIG_PATA_BF54X=y
-+CONFIG_NETDEVICES=y
-+# CONFIG_NET_VENDOR_BROADCOM is not set
-+# CONFIG_NET_VENDOR_CHELSIO is not set
-+# CONFIG_NET_VENDOR_INTEL is not set
-+# CONFIG_NET_VENDOR_MARVELL is not set
-+# CONFIG_NET_VENDOR_MICREL is not set
-+# CONFIG_NET_VENDOR_MICROCHIP is not set
-+# CONFIG_NET_VENDOR_NATSEMI is not set
-+# CONFIG_NET_VENDOR_SEEQ is not set
-+CONFIG_SMSC911X=y
-+# CONFIG_NET_VENDOR_STMICRO is not set
-+# CONFIG_WLAN is not set
-+CONFIG_INPUT_FF_MEMLESS=m
-+# CONFIG_INPUT_MOUSEDEV is not set
-+CONFIG_INPUT_EVDEV=m
-+CONFIG_INPUT_EVBUG=m
-+# CONFIG_KEYBOARD_ATKBD is not set
-+CONFIG_KEYBOARD_BFIN=y
-+# CONFIG_INPUT_MOUSE is not set
-+CONFIG_INPUT_TOUCHSCREEN=y
-+CONFIG_TOUCHSCREEN_AD7877=m
-+CONFIG_INPUT_MISC=y
-+# CONFIG_SERIO is not set
-+# CONFIG_LEGACY_PTYS is not set
-+CONFIG_BFIN_JTAG_COMM=m
-+# CONFIG_DEVKMEM is not set
-+CONFIG_SERIAL_BFIN=y
-+CONFIG_SERIAL_BFIN_CONSOLE=y
-+CONFIG_SERIAL_BFIN_UART1=y
-+# CONFIG_HW_RANDOM is not set
-+CONFIG_I2C=y
-+CONFIG_I2C_CHARDEV=y
-+CONFIG_I2C_BLACKFIN_TWI=y
-+CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ=100
-+CONFIG_SPI=y
-+CONFIG_SPI_BFIN5XX=y
-+CONFIG_GPIOLIB=y
-+CONFIG_GPIO_SYSFS=y
-+# CONFIG_HWMON is not set
-+CONFIG_WATCHDOG=y
-+CONFIG_BFIN_WDT=y
-+CONFIG_FB=y
-+CONFIG_FIRMWARE_EDID=y
-+CONFIG_FB_BF54X_LQ043=y
-+CONFIG_FRAMEBUFFER_CONSOLE=y
-+CONFIG_FONTS=y
-+CONFIG_FONT_6x11=y
-+CONFIG_LOGO=y
-+# CONFIG_LOGO_LINUX_MONO is not set
-+# CONFIG_LOGO_LINUX_VGA16 is not set
-+# CONFIG_LOGO_LINUX_CLUT224 is not set
-+# CONFIG_LOGO_BLACKFIN_VGA16 is not set
-+CONFIG_SOUND=y
-+CONFIG_SND=y
-+CONFIG_SND_MIXER_OSS=y
-+CONFIG_SND_PCM_OSS=y
-+CONFIG_SND_SOC=y
-+CONFIG_SND_BF5XX_AC97=y
-+CONFIG_SND_BF5XX_SOC_AD1980=y
-+CONFIG_HID_A4TECH=y
-+CONFIG_HID_APPLE=y
-+CONFIG_HID_BELKIN=y
-+CONFIG_HID_CHERRY=y
-+CONFIG_HID_CHICONY=y
-+CONFIG_HID_CYPRESS=y
-+CONFIG_HID_EZKEY=y
-+CONFIG_HID_GYRATION=y
-+CONFIG_HID_LOGITECH=y
-+CONFIG_HID_MICROSOFT=y
-+CONFIG_HID_MONTEREY=y
-+CONFIG_HID_PANTHERLORD=y
-+CONFIG_HID_PETALYNX=y
-+CONFIG_HID_SAMSUNG=y
-+CONFIG_HID_SONY=y
-+CONFIG_HID_SUNPLUS=y
-+CONFIG_USB=y
-+# CONFIG_USB_DEVICE_CLASS is not set
-+CONFIG_USB_OTG_BLACKLIST_HUB=y
-+CONFIG_USB_MON=y
-+CONFIG_USB_MUSB_HDRC=y
-+CONFIG_USB_MUSB_BLACKFIN=y
-+CONFIG_USB_STORAGE=y
-+CONFIG_USB_GADGET=y
-+CONFIG_MMC=y
-+CONFIG_MMC_BLOCK=m
-+CONFIG_SDH_BFIN=y
-+CONFIG_SDH_BFIN_MISSING_CMD_PULLUP_WORKAROUND=y
-+CONFIG_RTC_CLASS=y
-+CONFIG_RTC_DRV_BFIN=y
-+CONFIG_EXT2_FS=y
-+CONFIG_EXT2_FS_XATTR=y
-+# CONFIG_DNOTIFY is not set
-+CONFIG_ISO9660_FS=m
-+CONFIG_JOLIET=y
-+CONFIG_ZISOFS=y
-+CONFIG_MSDOS_FS=m
-+CONFIG_VFAT_FS=m
-+CONFIG_NTFS_FS=m
-+CONFIG_NTFS_RW=y
-+CONFIG_JFFS2_FS=m
-+CONFIG_NFS_FS=m
-+CONFIG_NFS_V3=y
-+CONFIG_NFSD=m
-+CONFIG_NFSD_V3=y
-+CONFIG_CIFS=y
-+CONFIG_NLS_CODEPAGE_437=m
-+CONFIG_NLS_CODEPAGE_936=m
-+CONFIG_NLS_ISO8859_1=m
-+CONFIG_NLS_UTF8=m
-+CONFIG_DEBUG_SHIRQ=y
-+CONFIG_DETECT_HUNG_TASK=y
-+CONFIG_DEBUG_INFO=y
-+# CONFIG_FTRACE is not set
-+CONFIG_DEBUG_MMRS=y
-+CONFIG_DEBUG_HWERR=y
-+CONFIG_EXACT_HWERR=y
-+CONFIG_DEBUG_DOUBLEFAULT=y
-+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE=y
-+CONFIG_EARLY_PRINTK=y
-+CONFIG_CPLB_INFO=y
-+CONFIG_BFIN_PSEUDODBG_INSNS=y
-+# CONFIG_CRYPTO_ANSI_CPRNG is not set
-diff --git a/arch/blackfin/configs/BF561-ACVILON_defconfig b/arch/blackfin/configs/BF561-ACVILON_defconfig
-new file mode 100644
-index 000000000000..e9f3ba783a4e
---- /dev/null
-+++ b/arch/blackfin/configs/BF561-ACVILON_defconfig
-@@ -0,0 +1,149 @@
-+CONFIG_EXPERIMENTAL=y
-+CONFIG_SYSVIPC=y
-+CONFIG_IKCONFIG=y
-+CONFIG_IKCONFIG_PROC=y
-+CONFIG_LOG_BUF_SHIFT=14
-+CONFIG_SYSFS_DEPRECATED_V2=y
-+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-+CONFIG_EXPERT=y
-+# CONFIG_SYSCTL_SYSCALL is not set
-+# CONFIG_ELF_CORE is not set
-+# CONFIG_FUTEX is not set
-+# CONFIG_SIGNALFD is not set
-+# CONFIG_TIMERFD is not set
-+# CONFIG_EVENTFD is not set
-+# CONFIG_AIO is not set
-+CONFIG_SLAB=y
-+CONFIG_MMAP_ALLOW_UNINITIALIZED=y
-+CONFIG_MODULES=y
-+CONFIG_MODULE_UNLOAD=y
-+# CONFIG_LBDAF is not set
-+# CONFIG_BLK_DEV_BSG is not set
-+# CONFIG_IOSCHED_DEADLINE is not set
-+CONFIG_PREEMPT=y
-+CONFIG_BF561=y
-+CONFIG_BF_REV_0_5=y
-+CONFIG_IRQ_TIMER0=10
-+CONFIG_BFIN561_ACVILON=y
-+# CONFIG_BF561_COREB is not set
-+CONFIG_CLKIN_HZ=12000000
-+CONFIG_HIGH_RES_TIMERS=y
-+CONFIG_NOMMU_INITIAL_TRIM_EXCESS=0
-+CONFIG_BFIN_GPTIMERS=y
-+CONFIG_DMA_UNCACHED_4M=y
-+CONFIG_C_CDPRIO=y
-+CONFIG_BANK_0=0x99b2
-+CONFIG_BANK_1=0x3350
-+CONFIG_BANK_3=0xAAC2
-+CONFIG_BINFMT_FLAT=y
-+CONFIG_BINFMT_ZFLAT=y
-+CONFIG_NET=y
-+CONFIG_PACKET=y
-+CONFIG_UNIX=y
-+CONFIG_INET=y
-+CONFIG_IP_PNP=y
-+CONFIG_SYN_COOKIES=y
-+# CONFIG_INET_LRO is not set
-+# CONFIG_IPV6 is not set
-+# CONFIG_WIRELESS is not set
-+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-+# CONFIG_FW_LOADER is not set
-+CONFIG_MTD=y
-+CONFIG_MTD_CMDLINE_PARTS=y
-+CONFIG_MTD_BLOCK=y
-+CONFIG_MTD_PLATRAM=y
-+CONFIG_MTD_PHRAM=y
-+CONFIG_MTD_BLOCK2MTD=y
-+CONFIG_MTD_NAND=y
-+CONFIG_MTD_NAND_PLATFORM=y
-+CONFIG_BLK_DEV_LOOP=y
-+CONFIG_BLK_DEV_RAM=y
-+CONFIG_BLK_DEV_RAM_COUNT=2
-+CONFIG_BLK_DEV_RAM_SIZE=16384
-+CONFIG_SCSI=y
-+# CONFIG_SCSI_PROC_FS is not set
-+CONFIG_BLK_DEV_SD=y
-+# CONFIG_SCSI_LOWLEVEL is not set
-+CONFIG_NETDEVICES=y
-+CONFIG_NET_ETHERNET=y
-+CONFIG_SMSC911X=y
-+# CONFIG_NETDEV_1000 is not set
-+# CONFIG_NETDEV_10000 is not set
-+# CONFIG_WLAN is not set
-+# CONFIG_INPUT is not set
-+# CONFIG_SERIO is not set
-+# CONFIG_VT is not set
-+# CONFIG_DEVKMEM is not set
-+CONFIG_SERIAL_BFIN=y
-+CONFIG_SERIAL_BFIN_CONSOLE=y
-+CONFIG_SERIAL_BFIN_PIO=y
-+# CONFIG_HW_RANDOM is not set
-+CONFIG_I2C=y
-+CONFIG_I2C_CHARDEV=y
-+CONFIG_I2C_PCA_PLATFORM=y
-+CONFIG_SPI=y
-+CONFIG_SPI_BFIN5XX=y
-+CONFIG_SPI_SPIDEV=y
-+CONFIG_GPIOLIB=y
-+CONFIG_GPIO_SYSFS=y
-+CONFIG_GPIO_PCF857X=y
-+CONFIG_SENSORS_LM75=y
-+CONFIG_WATCHDOG=y
-+CONFIG_BFIN_WDT=y
-+CONFIG_SOUND=y
-+CONFIG_SND=y
-+CONFIG_SND_MIXER_OSS=y
-+CONFIG_SND_PCM_OSS=y
-+# CONFIG_SND_DRIVERS is not set
-+# CONFIG_SND_USB is not set
-+CONFIG_SND_SOC=y
-+CONFIG_SND_BF5XX_I2S=y
-+CONFIG_SND_BF5XX_SPORT_NUM=1
-+CONFIG_USB=y
-+CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
-+# CONFIG_USB_DEVICE_CLASS is not set
-+CONFIG_USB_MON=y
-+CONFIG_USB_STORAGE=y
-+CONFIG_USB_SERIAL=y
-+CONFIG_USB_SERIAL_FTDI_SIO=y
-+CONFIG_USB_SERIAL_PL2303=y
-+CONFIG_RTC_CLASS=y
-+CONFIG_RTC_DRV_DS1307=y
-+CONFIG_EXT2_FS=y
-+CONFIG_EXT2_FS_XATTR=y
-+CONFIG_EXT2_FS_POSIX_ACL=y
-+CONFIG_EXT2_FS_SECURITY=y
-+# CONFIG_DNOTIFY is not set
-+CONFIG_MSDOS_FS=y
-+CONFIG_VFAT_FS=y
-+CONFIG_FAT_DEFAULT_CODEPAGE=866
-+CONFIG_FAT_DEFAULT_IOCHARSET="cp1251"
-+CONFIG_NTFS_FS=y
-+CONFIG_CONFIGFS_FS=y
-+CONFIG_JFFS2_FS=y
-+CONFIG_JFFS2_COMPRESSION_OPTIONS=y
-+# CONFIG_JFFS2_ZLIB is not set
-+CONFIG_JFFS2_LZO=y
-+# CONFIG_JFFS2_RTIME is not set
-+CONFIG_JFFS2_CMODE_FAVOURLZO=y
-+CONFIG_CRAMFS=y
-+CONFIG_MINIX_FS=y
-+CONFIG_NFS_FS=y
-+CONFIG_NFS_V3=y
-+CONFIG_ROOT_NFS=y
-+CONFIG_NLS_DEFAULT="cp1251"
-+CONFIG_NLS_CODEPAGE_866=y
-+CONFIG_NLS_CODEPAGE_1251=y
-+CONFIG_NLS_KOI8_R=y
-+CONFIG_NLS_UTF8=y
-+CONFIG_DEBUG_KERNEL=y
-+CONFIG_DEBUG_SHIRQ=y
-+CONFIG_DETECT_HUNG_TASK=y
-+# CONFIG_DEBUG_BUGVERBOSE is not set
-+CONFIG_DEBUG_INFO=y
-+# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-+# CONFIG_FTRACE is not set
-+CONFIG_DEBUG_MMRS=y
-+# CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE is not set
-+CONFIG_CPLB_INFO=y
-+# CONFIG_CRYPTO_ANSI_CPRNG is not set
-diff --git a/arch/blackfin/configs/BF561-EZKIT-SMP_defconfig b/arch/blackfin/configs/BF561-EZKIT-SMP_defconfig
-new file mode 100644
-index 000000000000..89b75a6c3fab
---- /dev/null
-+++ b/arch/blackfin/configs/BF561-EZKIT-SMP_defconfig
-@@ -0,0 +1,112 @@
-+CONFIG_EXPERIMENTAL=y
-+CONFIG_SYSVIPC=y
-+CONFIG_IKCONFIG=y
-+CONFIG_IKCONFIG_PROC=y
-+CONFIG_LOG_BUF_SHIFT=14
-+CONFIG_BLK_DEV_INITRD=y
-+CONFIG_EXPERT=y
-+# CONFIG_ELF_CORE is not set
-+# CONFIG_FUTEX is not set
-+# CONFIG_SIGNALFD is not set
-+# CONFIG_TIMERFD is not set
-+# CONFIG_EVENTFD is not set
-+# CONFIG_AIO is not set
-+CONFIG_SLAB=y
-+CONFIG_MMAP_ALLOW_UNINITIALIZED=y
-+CONFIG_MODULES=y
-+CONFIG_MODULE_UNLOAD=y
-+# CONFIG_LBDAF is not set
-+# CONFIG_BLK_DEV_BSG is not set
-+# CONFIG_IOSCHED_DEADLINE is not set
-+# CONFIG_IOSCHED_CFQ is not set
-+CONFIG_PREEMPT=y
-+CONFIG_BF561=y
-+CONFIG_SMP=y
-+CONFIG_IRQ_TIMER0=10
-+CONFIG_CLKIN_HZ=30000000
-+CONFIG_HIGH_RES_TIMERS=y
-+CONFIG_NOMMU_INITIAL_TRIM_EXCESS=0
-+CONFIG_BFIN_GPTIMERS=m
-+CONFIG_C_CDPRIO=y
-+CONFIG_BANK_3=0xAAC2
-+CONFIG_BINFMT_FLAT=y
-+CONFIG_BINFMT_ZFLAT=y
-+CONFIG_NET=y
-+CONFIG_PACKET=y
-+CONFIG_UNIX=y
-+CONFIG_INET=y
-+CONFIG_IP_PNP=y
-+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-+# CONFIG_INET_XFRM_MODE_BEET is not set
-+# CONFIG_INET_LRO is not set
-+# CONFIG_INET_DIAG is not set
-+# CONFIG_IPV6 is not set
-+CONFIG_IRDA=m
-+CONFIG_IRLAN=m
-+CONFIG_IRCOMM=m
-+CONFIG_IRDA_CACHE_LAST_LSAP=y
-+CONFIG_IRTTY_SIR=m
-+# CONFIG_WIRELESS is not set
-+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-+# CONFIG_FW_LOADER is not set
-+CONFIG_MTD=y
-+CONFIG_MTD_CMDLINE_PARTS=y
-+CONFIG_MTD_BLOCK=y
-+CONFIG_MTD_CFI=y
-+CONFIG_MTD_CFI_AMDSTD=y
-+CONFIG_MTD_RAM=y
-+CONFIG_MTD_ROM=m
-+CONFIG_MTD_PHYSMAP=y
-+CONFIG_BLK_DEV_RAM=y
-+CONFIG_NETDEVICES=y
-+# CONFIG_NET_VENDOR_BROADCOM is not set
-+# CONFIG_NET_VENDOR_CHELSIO is not set
-+# CONFIG_NET_VENDOR_INTEL is not set
-+# CONFIG_NET_VENDOR_MARVELL is not set
-+# CONFIG_NET_VENDOR_MICREL is not set
-+# CONFIG_NET_VENDOR_MICROCHIP is not set
-+# CONFIG_NET_VENDOR_NATSEMI is not set
-+# CONFIG_NET_VENDOR_SEEQ is not set
-+CONFIG_SMC91X=y
-+# CONFIG_NET_VENDOR_STMICRO is not set
-+# CONFIG_WLAN is not set
-+CONFIG_INPUT=m
-+# CONFIG_INPUT_MOUSEDEV is not set
-+CONFIG_INPUT_EVDEV=m
-+# CONFIG_INPUT_KEYBOARD is not set
-+# CONFIG_INPUT_MOUSE is not set
-+# CONFIG_SERIO is not set
-+# CONFIG_VT is not set
-+# CONFIG_LEGACY_PTYS is not set
-+CONFIG_BFIN_JTAG_COMM=m
-+# CONFIG_DEVKMEM is not set
-+CONFIG_SERIAL_BFIN=y
-+CONFIG_SERIAL_BFIN_CONSOLE=y
-+# CONFIG_HW_RANDOM is not set
-+CONFIG_SPI=y
-+CONFIG_SPI_BFIN5XX=y
-+CONFIG_GPIOLIB=y
-+CONFIG_GPIO_SYSFS=y
-+# CONFIG_HWMON is not set
-+CONFIG_WATCHDOG=y
-+CONFIG_BFIN_WDT=y
-+# CONFIG_USB_SUPPORT is not set
-+# CONFIG_DNOTIFY is not set
-+CONFIG_JFFS2_FS=m
-+CONFIG_NFS_FS=m
-+CONFIG_NFS_V3=y
-+CONFIG_DEBUG_SHIRQ=y
-+CONFIG_DETECT_HUNG_TASK=y
-+CONFIG_DEBUG_INFO=y
-+# CONFIG_FTRACE is not set
-+CONFIG_DEBUG_MMRS=y
-+CONFIG_DEBUG_HWERR=y
-+CONFIG_EXACT_HWERR=y
-+CONFIG_DEBUG_DOUBLEFAULT=y
-+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE=y
-+CONFIG_EARLY_PRINTK=y
-+CONFIG_CPLB_INFO=y
-+CONFIG_BFIN_PSEUDODBG_INSNS=y
-+CONFIG_CRYPTO=y
-+# CONFIG_CRYPTO_ANSI_CPRNG is not set
-diff --git a/arch/blackfin/configs/BF561-EZKIT_defconfig b/arch/blackfin/configs/BF561-EZKIT_defconfig
-new file mode 100644
-index 000000000000..67b3d2f419ba
---- /dev/null
-+++ b/arch/blackfin/configs/BF561-EZKIT_defconfig
-@@ -0,0 +1,114 @@
-+CONFIG_EXPERIMENTAL=y
-+CONFIG_SYSVIPC=y
-+CONFIG_IKCONFIG=y
-+CONFIG_IKCONFIG_PROC=y
-+CONFIG_LOG_BUF_SHIFT=14
-+CONFIG_BLK_DEV_INITRD=y
-+CONFIG_EXPERT=y
-+# CONFIG_ELF_CORE is not set
-+# CONFIG_FUTEX is not set
-+# CONFIG_SIGNALFD is not set
-+# CONFIG_TIMERFD is not set
-+# CONFIG_EVENTFD is not set
-+# CONFIG_AIO is not set
-+CONFIG_SLAB=y
-+CONFIG_MMAP_ALLOW_UNINITIALIZED=y
-+CONFIG_MODULES=y
-+CONFIG_MODULE_UNLOAD=y
-+# CONFIG_LBDAF is not set
-+# CONFIG_BLK_DEV_BSG is not set
-+# CONFIG_IOSCHED_DEADLINE is not set
-+# CONFIG_IOSCHED_CFQ is not set
-+CONFIG_PREEMPT=y
-+CONFIG_BF561=y
-+CONFIG_IRQ_TIMER0=10
-+CONFIG_CLKIN_HZ=30000000
-+CONFIG_HIGH_RES_TIMERS=y
-+CONFIG_NOMMU_INITIAL_TRIM_EXCESS=0
-+CONFIG_BFIN_GPTIMERS=m
-+CONFIG_BFIN_EXTMEM_WRITETHROUGH=y
-+CONFIG_BFIN_L2_DCACHEABLE=y
-+CONFIG_BFIN_L2_WRITETHROUGH=y
-+CONFIG_C_CDPRIO=y
-+CONFIG_BANK_3=0xAAC2
-+CONFIG_BINFMT_FLAT=y
-+CONFIG_BINFMT_ZFLAT=y
-+CONFIG_NET=y
-+CONFIG_PACKET=y
-+CONFIG_UNIX=y
-+CONFIG_INET=y
-+CONFIG_IP_PNP=y
-+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-+# CONFIG_INET_XFRM_MODE_BEET is not set
-+# CONFIG_INET_LRO is not set
-+# CONFIG_INET_DIAG is not set
-+# CONFIG_IPV6 is not set
-+CONFIG_IRDA=m
-+CONFIG_IRLAN=m
-+CONFIG_IRCOMM=m
-+CONFIG_IRDA_CACHE_LAST_LSAP=y
-+CONFIG_IRTTY_SIR=m
-+# CONFIG_WIRELESS is not set
-+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-+# CONFIG_FW_LOADER is not set
-+CONFIG_MTD=y
-+CONFIG_MTD_CMDLINE_PARTS=y
-+CONFIG_MTD_BLOCK=y
-+CONFIG_MTD_CFI=y
-+CONFIG_MTD_CFI_AMDSTD=y
-+CONFIG_MTD_RAM=y
-+CONFIG_MTD_ROM=m
-+CONFIG_MTD_PHYSMAP=y
-+CONFIG_BLK_DEV_RAM=y
-+CONFIG_NETDEVICES=y
-+# CONFIG_NET_VENDOR_BROADCOM is not set
-+# CONFIG_NET_VENDOR_CHELSIO is not set
-+# CONFIG_NET_VENDOR_INTEL is not set
-+# CONFIG_NET_VENDOR_MARVELL is not set
-+# CONFIG_NET_VENDOR_MICREL is not set
-+# CONFIG_NET_VENDOR_MICROCHIP is not set
-+# CONFIG_NET_VENDOR_NATSEMI is not set
-+# CONFIG_NET_VENDOR_SEEQ is not set
-+CONFIG_SMC91X=y
-+# CONFIG_NET_VENDOR_STMICRO is not set
-+# CONFIG_WLAN is not set
-+CONFIG_INPUT=m
-+# CONFIG_INPUT_MOUSEDEV is not set
-+CONFIG_INPUT_EVDEV=m
-+# CONFIG_INPUT_KEYBOARD is not set
-+# CONFIG_INPUT_MOUSE is not set
-+# CONFIG_SERIO is not set
-+# CONFIG_VT is not set
-+# CONFIG_LEGACY_PTYS is not set
-+CONFIG_BFIN_JTAG_COMM=m
-+# CONFIG_DEVKMEM is not set
-+CONFIG_SERIAL_BFIN=y
-+CONFIG_SERIAL_BFIN_CONSOLE=y
-+# CONFIG_HW_RANDOM is not set
-+CONFIG_SPI=y
-+CONFIG_SPI_BFIN5XX=y
-+CONFIG_GPIOLIB=y
-+CONFIG_GPIO_SYSFS=y
-+# CONFIG_HWMON is not set
-+CONFIG_WATCHDOG=y
-+CONFIG_BFIN_WDT=y
-+# CONFIG_USB_SUPPORT is not set
-+# CONFIG_DNOTIFY is not set
-+CONFIG_JFFS2_FS=m
-+CONFIG_NFS_FS=m
-+CONFIG_NFS_V3=y
-+CONFIG_DEBUG_SHIRQ=y
-+CONFIG_DETECT_HUNG_TASK=y
-+CONFIG_DEBUG_INFO=y
-+# CONFIG_FTRACE is not set
-+CONFIG_DEBUG_MMRS=y
-+CONFIG_DEBUG_HWERR=y
-+CONFIG_EXACT_HWERR=y
-+CONFIG_DEBUG_DOUBLEFAULT=y
-+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE=y
-+CONFIG_EARLY_PRINTK=y
-+CONFIG_CPLB_INFO=y
-+CONFIG_BFIN_PSEUDODBG_INSNS=y
-+CONFIG_CRYPTO=y
-+# CONFIG_CRYPTO_ANSI_CPRNG is not set
-diff --git a/arch/blackfin/configs/BF609-EZKIT_defconfig b/arch/blackfin/configs/BF609-EZKIT_defconfig
-new file mode 100644
-index 000000000000..8cc75d4218fb
---- /dev/null
-+++ b/arch/blackfin/configs/BF609-EZKIT_defconfig
-@@ -0,0 +1,154 @@
-+CONFIG_EXPERIMENTAL=y
-+CONFIG_SYSVIPC=y
-+CONFIG_HIGH_RES_TIMERS=y
-+CONFIG_IKCONFIG=y
-+CONFIG_IKCONFIG_PROC=y
-+CONFIG_LOG_BUF_SHIFT=14
-+CONFIG_BLK_DEV_INITRD=y
-+CONFIG_EXPERT=y
-+# CONFIG_ELF_CORE is not set
-+# CONFIG_FUTEX is not set
-+# CONFIG_SIGNALFD is not set
-+# CONFIG_TIMERFD is not set
-+# CONFIG_EVENTFD is not set
-+# CONFIG_AIO is not set
-+CONFIG_SLAB=y
-+CONFIG_MMAP_ALLOW_UNINITIALIZED=y
-+CONFIG_MODULES=y
-+CONFIG_MODULE_UNLOAD=y
-+# CONFIG_LBDAF is not set
-+# CONFIG_BLK_DEV_BSG is not set
-+# CONFIG_IOSCHED_DEADLINE is not set
-+# CONFIG_IOSCHED_CFQ is not set
-+CONFIG_PREEMPT=y
-+CONFIG_BF609=y
-+CONFIG_PINT1_ASSIGN=0x01010000
-+CONFIG_PINT2_ASSIGN=0x07000101
-+CONFIG_PINT3_ASSIGN=0x02020303
-+CONFIG_IP_CHECKSUM_L1=y
-+CONFIG_SYSCALL_TAB_L1=y
-+CONFIG_CPLB_SWITCH_TAB_L1=y
-+# CONFIG_APP_STACK_L1 is not set
-+# CONFIG_BFIN_INS_LOWOVERHEAD is not set
-+CONFIG_NOMMU_INITIAL_TRIM_EXCESS=0
-+CONFIG_BINFMT_FLAT=y
-+CONFIG_BINFMT_ZFLAT=y
-+CONFIG_PM_BFIN_WAKE_PE12=y
-+CONFIG_PM_BFIN_WAKE_PE12_POL=1
-+CONFIG_CPU_FREQ=y
-+CONFIG_CPU_FREQ_GOV_POWERSAVE=y
-+CONFIG_CPU_FREQ_GOV_ONDEMAND=y
-+CONFIG_NET=y
-+CONFIG_PACKET=y
-+CONFIG_UNIX=y
-+CONFIG_INET=y
-+CONFIG_IP_PNP=y
-+CONFIG_IP_PNP_DHCP=y
-+CONFIG_IP_PNP_BOOTP=y
-+CONFIG_IP_PNP_RARP=y
-+# CONFIG_IPV6 is not set
-+CONFIG_NETFILTER=y
-+CONFIG_CAN=y
-+CONFIG_CAN_BFIN=y
-+CONFIG_IRDA=y
-+CONFIG_IRTTY_SIR=y
-+# CONFIG_WIRELESS is not set
-+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-+CONFIG_FW_LOADER=m
-+CONFIG_MTD=y
-+CONFIG_MTD_CMDLINE_PARTS=y
-+CONFIG_MTD_BLOCK=y
-+CONFIG_MTD_CFI=y
-+CONFIG_MTD_CFI_INTELEXT=y
-+CONFIG_MTD_CFI_STAA=y
-+CONFIG_MTD_COMPLEX_MAPPINGS=y
-+CONFIG_MTD_PHYSMAP=y
-+CONFIG_MTD_M25P80=y
-+CONFIG_MTD_SPI_NOR=y
-+CONFIG_MTD_UBI=m
-+CONFIG_SCSI=y
-+CONFIG_BLK_DEV_SD=y
-+CONFIG_NETDEVICES=y
-+# CONFIG_NET_VENDOR_BROADCOM is not set
-+# CONFIG_NET_VENDOR_CHELSIO is not set
-+# CONFIG_NET_VENDOR_INTEL is not set
-+# CONFIG_NET_VENDOR_MARVELL is not set
-+# CONFIG_NET_VENDOR_MICREL is not set
-+# CONFIG_NET_VENDOR_MICROCHIP is not set
-+# CONFIG_NET_VENDOR_NATSEMI is not set
-+# CONFIG_NET_VENDOR_SEEQ is not set
-+# CONFIG_NET_VENDOR_SMSC is not set
-+CONFIG_STMMAC_ETH=y
-+CONFIG_STMMAC_IEEE1588=y
-+# CONFIG_WLAN is not set
-+# CONFIG_INPUT_MOUSEDEV is not set
-+CONFIG_INPUT_EVDEV=y
-+# CONFIG_INPUT_KEYBOARD is not set
-+# CONFIG_INPUT_MOUSE is not set
-+CONFIG_INPUT_MISC=y
-+CONFIG_INPUT_BFIN_ROTARY=y
-+# CONFIG_SERIO is not set
-+# CONFIG_LEGACY_PTYS is not set
-+CONFIG_BFIN_SIMPLE_TIMER=m
-+# CONFIG_BFIN_CRC is not set
-+CONFIG_BFIN_LINKPORT=y
-+# CONFIG_DEVKMEM is not set
-+CONFIG_SERIAL_BFIN=y
-+CONFIG_SERIAL_BFIN_CONSOLE=y
-+CONFIG_SERIAL_BFIN_UART0=y
-+# CONFIG_HW_RANDOM is not set
-+CONFIG_I2C=y
-+CONFIG_I2C_CHARDEV=y
-+CONFIG_I2C_BLACKFIN_TWI=y
-+CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ=100
-+CONFIG_SPI=y
-+CONFIG_SPI_ADI_V3=y
-+CONFIG_GPIOLIB=y
-+CONFIG_GPIO_SYSFS=y
-+CONFIG_PINCTRL_MCP23S08=y
-+# CONFIG_HWMON is not set
-+CONFIG_WATCHDOG=y
-+CONFIG_BFIN_WDT=y
-+CONFIG_SOUND=m
-+CONFIG_SND=m
-+CONFIG_SND_MIXER_OSS=m
-+CONFIG_SND_PCM_OSS=m
-+# CONFIG_SND_DRIVERS is not set
-+# CONFIG_SND_SPI is not set
-+# CONFIG_SND_USB is not set
-+CONFIG_SND_SOC=m
-+CONFIG_USB=y
-+CONFIG_USB_MUSB_HDRC=y
-+CONFIG_USB_MUSB_BLACKFIN=m
-+CONFIG_USB_STORAGE=y
-+CONFIG_USB_GADGET=y
-+CONFIG_USB_GADGET_MUSB_HDRC=y
-+CONFIG_USB_ZERO=y
-+CONFIG_MMC=y
-+CONFIG_SDH_BFIN=y
-+# CONFIG_IOMMU_SUPPORT is not set
-+CONFIG_EXT2_FS=y
-+# CONFIG_DNOTIFY is not set
-+CONFIG_MSDOS_FS=y
-+CONFIG_VFAT_FS=y
-+CONFIG_JFFS2_FS=m
-+CONFIG_UBIFS_FS=m
-+CONFIG_NFS_FS=m
-+CONFIG_NLS_CODEPAGE_437=y
-+CONFIG_NLS_ISO8859_1=y
-+CONFIG_DEBUG_FS=y
-+CONFIG_DEBUG_SHIRQ=y
-+CONFIG_DETECT_HUNG_TASK=y
-+CONFIG_DEBUG_INFO=y
-+CONFIG_FRAME_POINTER=y
-+# CONFIG_FTRACE is not set
-+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE=y
-+CONFIG_EARLY_PRINTK=y
-+CONFIG_CPLB_INFO=y
-+CONFIG_BFIN_PSEUDODBG_INSNS=y
-+CONFIG_CRYPTO_HMAC=m
-+CONFIG_CRYPTO_MD4=m
-+CONFIG_CRYPTO_MD5=m
-+CONFIG_CRYPTO_ARC4=m
-+# CONFIG_CRYPTO_ANSI_CPRNG is not set
-+CONFIG_CRYPTO_DEV_BFIN_CRC=m
-diff --git a/arch/blackfin/configs/BlackStamp_defconfig b/arch/blackfin/configs/BlackStamp_defconfig
-new file mode 100644
-index 000000000000..9faf0ec7007f
---- /dev/null
-+++ b/arch/blackfin/configs/BlackStamp_defconfig
-@@ -0,0 +1,108 @@
-+CONFIG_EXPERIMENTAL=y
-+CONFIG_SYSVIPC=y
-+CONFIG_IKCONFIG=y
-+CONFIG_IKCONFIG_PROC=y
-+CONFIG_LOG_BUF_SHIFT=14
-+CONFIG_SYSFS_DEPRECATED_V2=y
-+CONFIG_BLK_DEV_INITRD=y
-+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-+CONFIG_EXPERT=y
-+# CONFIG_SYSCTL_SYSCALL is not set
-+# CONFIG_ELF_CORE is not set
-+# CONFIG_FUTEX is not set
-+CONFIG_SLAB=y
-+CONFIG_MMAP_ALLOW_UNINITIALIZED=y
-+CONFIG_MODULES=y
-+CONFIG_MODULE_UNLOAD=y
-+CONFIG_MODULE_FORCE_UNLOAD=y
-+# CONFIG_BLK_DEV_BSG is not set
-+# CONFIG_IOSCHED_DEADLINE is not set
-+CONFIG_PREEMPT=y
-+CONFIG_BF532=y
-+CONFIG_BF_REV_0_5=y
-+CONFIG_BLACKSTAMP=y
-+CONFIG_TIMER0=11
-+# CONFIG_CYCLES_CLOCKSOURCE is not set
-+CONFIG_HIGH_RES_TIMERS=y
-+CONFIG_ROMKERNEL=y
-+CONFIG_NOMMU_INITIAL_TRIM_EXCESS=0
-+CONFIG_BFIN_GPTIMERS=y
-+CONFIG_C_CDPRIO=y
-+CONFIG_BANK_3=0xAAC2
-+CONFIG_BINFMT_FLAT=y
-+CONFIG_BINFMT_ZFLAT=y
-+CONFIG_BINFMT_SHARED_FLAT=y
-+CONFIG_PM=y
-+CONFIG_NET=y
-+CONFIG_PACKET=y
-+CONFIG_UNIX=y
-+CONFIG_INET=y
-+CONFIG_IP_PNP=y
-+# CONFIG_INET_LRO is not set
-+# CONFIG_IPV6 is not set
-+# CONFIG_WIRELESS is not set
-+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-+# CONFIG_FW_LOADER is not set
-+CONFIG_MTD=y
-+CONFIG_MTD_CMDLINE_PARTS=y
-+CONFIG_MTD_BLOCK=y
-+CONFIG_MTD_CFI=m
-+CONFIG_MTD_CFI_AMDSTD=m
-+CONFIG_MTD_RAM=y
-+CONFIG_MTD_ROM=m
-+CONFIG_MTD_COMPLEX_MAPPINGS=y
-+CONFIG_MTD_M25P80=y
-+CONFIG_MTD_SPI_NOR=y
-+CONFIG_BLK_DEV_LOOP=y
-+CONFIG_BLK_DEV_NBD=y
-+CONFIG_BLK_DEV_RAM=y
-+CONFIG_MISC_DEVICES=y
-+CONFIG_EEPROM_AT25=y
-+CONFIG_NETDEVICES=y
-+CONFIG_NET_ETHERNET=y
-+CONFIG_SMC91X=y
-+# CONFIG_NETDEV_1000 is not set
-+# CONFIG_NETDEV_10000 is not set
-+# CONFIG_WLAN is not set
-+# CONFIG_INPUT_MOUSEDEV is not set
-+CONFIG_INPUT_EVDEV=m
-+# CONFIG_INPUT_KEYBOARD is not set
-+# CONFIG_INPUT_MOUSE is not set
-+# CONFIG_SERIO is not set
-+# CONFIG_VT is not set
-+CONFIG_SERIAL_BFIN=y
-+CONFIG_SERIAL_BFIN_CONSOLE=y
-+# CONFIG_LEGACY_PTYS is not set
-+CONFIG_HW_RANDOM=y
-+CONFIG_I2C=m
-+CONFIG_I2C_CHARDEV=m
-+CONFIG_I2C_GPIO=m
-+CONFIG_SPI=y
-+CONFIG_SPI_BFIN5XX=y
-+CONFIG_SPI_SPIDEV=m
-+# CONFIG_HWMON is not set
-+CONFIG_WATCHDOG=y
-+CONFIG_BFIN_WDT=y
-+# CONFIG_USB_SUPPORT is not set
-+CONFIG_MMC=y
-+CONFIG_MMC_SPI=y
-+CONFIG_RTC_CLASS=y
-+CONFIG_RTC_DRV_BFIN=y
-+# CONFIG_DNOTIFY is not set
-+CONFIG_MSDOS_FS=y
-+CONFIG_VFAT_FS=y
-+CONFIG_JFFS2_FS=y
-+CONFIG_NFS_FS=y
-+CONFIG_NFS_V3=y
-+CONFIG_NFS_V4=y
-+CONFIG_SMB_FS=y
-+CONFIG_CIFS=y
-+CONFIG_NLS_CODEPAGE_437=y
-+CONFIG_NLS_ASCII=y
-+CONFIG_NLS_UTF8=y
-+CONFIG_SYSCTL_SYSCALL_CHECK=y
-+CONFIG_DEBUG_MMRS=y
-+# CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE is not set
-+CONFIG_EARLY_PRINTK=y
-+CONFIG_CPLB_INFO=y
-+CONFIG_CRC_CCITT=m
-diff --git a/arch/blackfin/configs/CM-BF527_defconfig b/arch/blackfin/configs/CM-BF527_defconfig
-new file mode 100644
-index 000000000000..4a1ad4fd7bb2
---- /dev/null
-+++ b/arch/blackfin/configs/CM-BF527_defconfig
-@@ -0,0 +1,129 @@
-+CONFIG_EXPERIMENTAL=y
-+CONFIG_KERNEL_LZMA=y
-+CONFIG_SYSVIPC=y
-+CONFIG_IKCONFIG=y
-+CONFIG_IKCONFIG_PROC=y
-+CONFIG_LOG_BUF_SHIFT=14
-+CONFIG_BLK_DEV_INITRD=y
-+# CONFIG_RD_GZIP is not set
-+CONFIG_RD_LZMA=y
-+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-+CONFIG_EXPERT=y
-+# CONFIG_SYSCTL_SYSCALL is not set
-+# CONFIG_ELF_CORE is not set
-+# CONFIG_FUTEX is not set
-+# CONFIG_AIO is not set
-+CONFIG_SLAB=y
-+CONFIG_MMAP_ALLOW_UNINITIALIZED=y
-+CONFIG_MODULES=y
-+CONFIG_MODULE_UNLOAD=y
-+# CONFIG_BLK_DEV_BSG is not set
-+# CONFIG_IOSCHED_DEADLINE is not set
-+CONFIG_PREEMPT=y
-+CONFIG_BF527=y
-+CONFIG_BF_REV_0_1=y
-+CONFIG_IRQ_TIMER0=12
-+CONFIG_BFIN527_BLUETECHNIX_CM=y
-+CONFIG_IRQ_USB_INT0=11
-+CONFIG_IRQ_USB_INT1=11
-+CONFIG_IRQ_USB_INT2=11
-+CONFIG_IRQ_USB_DMA=11
-+# CONFIG_CYCLES_CLOCKSOURCE is not set
-+# CONFIG_SCHEDULE_L1 is not set
-+# CONFIG_MEMSET_L1 is not set
-+# CONFIG_MEMCPY_L1 is not set
-+# CONFIG_SYS_BFIN_SPINLOCK_L1 is not set
-+CONFIG_NOMMU_INITIAL_TRIM_EXCESS=0
-+CONFIG_BFIN_GPTIMERS=y
-+CONFIG_C_CDPRIO=y
-+CONFIG_BANK_3=0xFFC0
-+CONFIG_BINFMT_FLAT=y
-+CONFIG_BINFMT_ZFLAT=y
-+CONFIG_NET=y
-+CONFIG_PACKET=y
-+CONFIG_UNIX=y
-+CONFIG_INET=y
-+CONFIG_IP_PNP=y
-+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-+# CONFIG_INET_XFRM_MODE_BEET is not set
-+# CONFIG_INET_LRO is not set
-+# CONFIG_INET_DIAG is not set
-+# CONFIG_IPV6 is not set
-+# CONFIG_WIRELESS is not set
-+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-+# CONFIG_FW_LOADER is not set
-+CONFIG_MTD=y
-+CONFIG_MTD_CMDLINE_PARTS=y
-+CONFIG_MTD_BLOCK=y
-+CONFIG_MTD_CFI=y
-+CONFIG_MTD_CFI_INTELEXT=y
-+CONFIG_MTD_RAM=y
-+CONFIG_MTD_ROM=m
-+CONFIG_MTD_COMPLEX_MAPPINGS=y
-+CONFIG_MTD_GPIO_ADDR=y
-+CONFIG_BLK_DEV_RAM=y
-+CONFIG_SCSI=y
-+CONFIG_BLK_DEV_SD=y
-+# CONFIG_SCSI_LOWLEVEL is not set
-+CONFIG_NETDEVICES=y
-+CONFIG_NET_ETHERNET=y
-+CONFIG_BFIN_MAC=y
-+# CONFIG_NETDEV_1000 is not set
-+# CONFIG_NETDEV_10000 is not set
-+# CONFIG_WLAN is not set
-+# CONFIG_INPUT is not set
-+# CONFIG_SERIO is not set
-+# CONFIG_VT is not set
-+# CONFIG_DEVKMEM is not set
-+CONFIG_SERIAL_BFIN=y
-+CONFIG_SERIAL_BFIN_CONSOLE=y
-+CONFIG_SERIAL_BFIN_UART0=y
-+CONFIG_SERIAL_BFIN_UART1=y
-+# CONFIG_LEGACY_PTYS is not set
-+# CONFIG_HW_RANDOM is not set
-+CONFIG_I2C=y
-+CONFIG_I2C_CHARDEV=m
-+CONFIG_I2C_BLACKFIN_TWI=m
-+CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ=100
-+CONFIG_SPI=y
-+CONFIG_SPI_BFIN5XX=y
-+CONFIG_GPIOLIB=y
-+CONFIG_GPIO_SYSFS=y
-+CONFIG_WATCHDOG=y
-+CONFIG_BFIN_WDT=y
-+CONFIG_USB=m
-+CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
-+# CONFIG_USB_DEVICE_CLASS is not set
-+CONFIG_USB_OTG_BLACKLIST_HUB=y
-+CONFIG_USB_MON=m
-+CONFIG_USB_MUSB_HDRC=m
-+CONFIG_USB_MUSB_PERIPHERAL=y
-+CONFIG_USB_GADGET_MUSB_HDRC=y
-+CONFIG_MUSB_PIO_ONLY=y
-+CONFIG_USB_STORAGE=m
-+CONFIG_USB_GADGET=m
-+CONFIG_USB_ETH=m
-+CONFIG_USB_MASS_STORAGE=m
-+CONFIG_USB_G_SERIAL=m
-+CONFIG_USB_G_PRINTER=m
-+CONFIG_RTC_CLASS=y
-+CONFIG_RTC_DRV_BFIN=y
-+# CONFIG_DNOTIFY is not set
-+CONFIG_MSDOS_FS=y
-+CONFIG_VFAT_FS=y
-+CONFIG_JFFS2_FS=y
-+CONFIG_NFS_FS=m
-+CONFIG_NFS_V3=y
-+CONFIG_SMB_FS=m
-+CONFIG_NLS_CODEPAGE_437=y
-+CONFIG_NLS_ISO8859_1=y
-+CONFIG_DEBUG_FS=y
-+# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-+# CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE is not set
-+CONFIG_EARLY_PRINTK=y
-+CONFIG_CRYPTO=y
-+# CONFIG_CRYPTO_ANSI_CPRNG is not set
-+CONFIG_CRC_CCITT=m
-+CONFIG_CRC_ITU_T=y
-+CONFIG_CRC7=y
-diff --git a/arch/blackfin/configs/PNAV-10_defconfig b/arch/blackfin/configs/PNAV-10_defconfig
-new file mode 100644
-index 000000000000..9d787e28bbe8
---- /dev/null
-+++ b/arch/blackfin/configs/PNAV-10_defconfig
-@@ -0,0 +1,111 @@
-+CONFIG_EXPERIMENTAL=y
-+CONFIG_SYSVIPC=y
-+CONFIG_LOG_BUF_SHIFT=14
-+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-+CONFIG_EXPERT=y
-+# CONFIG_SYSCTL_SYSCALL is not set
-+# CONFIG_ELF_CORE is not set
-+# CONFIG_FUTEX is not set
-+# CONFIG_AIO is not set
-+CONFIG_SLAB=y
-+CONFIG_MMAP_ALLOW_UNINITIALIZED=y
-+CONFIG_MODULES=y
-+CONFIG_MODULE_UNLOAD=y
-+# CONFIG_LBDAF is not set
-+# CONFIG_BLK_DEV_BSG is not set
-+# CONFIG_IOSCHED_DEADLINE is not set
-+# CONFIG_IOSCHED_CFQ is not set
-+CONFIG_PREEMPT=y
-+CONFIG_BF537=y
-+CONFIG_IRQ_TIMER0=12
-+CONFIG_PNAV10=y
-+# CONFIG_CYCLES_CLOCKSOURCE is not set
-+CONFIG_IP_CHECKSUM_L1=y
-+CONFIG_SYSCALL_TAB_L1=y
-+CONFIG_CPLB_SWITCH_TAB_L1=y
-+CONFIG_NOMMU_INITIAL_TRIM_EXCESS=0
-+CONFIG_BFIN_GPTIMERS=y
-+CONFIG_C_CDPRIO=y
-+CONFIG_BANK_1=0x33B0
-+CONFIG_BANK_2=0x33B0
-+CONFIG_BANK_3=0x99B2
-+CONFIG_BINFMT_FLAT=y
-+CONFIG_BINFMT_ZFLAT=y
-+CONFIG_NET=y
-+CONFIG_PACKET=y
-+CONFIG_UNIX=y
-+CONFIG_INET=y
-+CONFIG_IP_PNP=y
-+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-+# CONFIG_INET_XFRM_MODE_BEET is not set
-+# CONFIG_INET_LRO is not set
-+# CONFIG_INET_DIAG is not set
-+# CONFIG_IPV6 is not set
-+# CONFIG_WIRELESS is not set
-+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-+# CONFIG_FW_LOADER is not set
-+CONFIG_MTD=y
-+CONFIG_MTD_BLOCK=y
-+CONFIG_MTD_RAM=y
-+CONFIG_MTD_COMPLEX_MAPPINGS=y
-+CONFIG_MTD_UCLINUX=y
-+CONFIG_MTD_NAND=y
-+CONFIG_BLK_DEV_RAM=y
-+CONFIG_NETDEVICES=y
-+CONFIG_NET_ETHERNET=y
-+CONFIG_BFIN_MAC=y
-+# CONFIG_BFIN_MAC_USE_L1 is not set
-+CONFIG_BFIN_TX_DESC_NUM=100
-+CONFIG_BFIN_RX_DESC_NUM=100
-+# CONFIG_NETDEV_1000 is not set
-+# CONFIG_NETDEV_10000 is not set
-+# CONFIG_WLAN is not set
-+# CONFIG_INPUT_MOUSEDEV is not set
-+CONFIG_INPUT_EVDEV=y
-+# CONFIG_INPUT_KEYBOARD is not set
-+# CONFIG_INPUT_MOUSE is not set
-+CONFIG_INPUT_TOUCHSCREEN=y
-+CONFIG_TOUCHSCREEN_AD7877=y
-+CONFIG_INPUT_MISC=y
-+CONFIG_INPUT_UINPUT=y
-+# CONFIG_SERIO is not set
-+# CONFIG_VT is not set
-+CONFIG_SERIAL_BFIN=y
-+CONFIG_SERIAL_BFIN_CONSOLE=y
-+CONFIG_SERIAL_BFIN_UART0=y
-+CONFIG_SERIAL_BFIN_UART1=y
-+# CONFIG_LEGACY_PTYS is not set
-+CONFIG_HW_RANDOM=y
-+CONFIG_I2C=y
-+CONFIG_I2C_CHARDEV=y
-+CONFIG_I2C_BLACKFIN_TWI=y
-+CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ=100
-+CONFIG_SPI=y
-+CONFIG_SPI_BFIN5XX=y
-+CONFIG_FB=y
-+CONFIG_FIRMWARE_EDID=y
-+CONFIG_BACKLIGHT_LCD_SUPPORT=y
-+CONFIG_LCD_CLASS_DEVICE=y
-+CONFIG_BACKLIGHT_CLASS_DEVICE=y
-+CONFIG_SOUND=y
-+CONFIG_SND=m
-+# CONFIG_SND_SUPPORT_OLD_API is not set
-+# CONFIG_SND_VERBOSE_PROCFS is not set
-+CONFIG_SOUND_PRIME=y
-+# CONFIG_HID is not set
-+CONFIG_RTC_CLASS=y
-+CONFIG_RTC_DRV_BFIN=y
-+CONFIG_EXT2_FS=y
-+CONFIG_EXT2_FS_XATTR=y
-+# CONFIG_DNOTIFY is not set
-+CONFIG_NFS_FS=m
-+CONFIG_NFS_V3=y
-+CONFIG_SMB_FS=m
-+# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-+# CONFIG_DEBUG_HUNT_FOR_ZERO is not set
-+# CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE is not set
-+# CONFIG_ACCESS_CHECK is not set
-+CONFIG_CRYPTO=y
-+# CONFIG_CRYPTO_ANSI_CPRNG is not set
-+CONFIG_CRC_CCITT=m
-diff --git a/arch/blackfin/configs/SRV1_defconfig b/arch/blackfin/configs/SRV1_defconfig
-new file mode 100644
-index 000000000000..225df32dc9a8
---- /dev/null
-+++ b/arch/blackfin/configs/SRV1_defconfig
-@@ -0,0 +1,88 @@
-+CONFIG_EXPERIMENTAL=y
-+CONFIG_SYSVIPC=y
-+CONFIG_LOG_BUF_SHIFT=14
-+CONFIG_BLK_DEV_INITRD=y
-+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-+CONFIG_EXPERT=y
-+# CONFIG_SYSCTL_SYSCALL is not set
-+CONFIG_KALLSYMS_ALL=y
-+# CONFIG_ELF_CORE is not set
-+# CONFIG_FUTEX is not set
-+CONFIG_SLAB=y
-+CONFIG_MMAP_ALLOW_UNINITIALIZED=y
-+CONFIG_MODULES=y
-+CONFIG_MODULE_UNLOAD=y
-+# CONFIG_IOSCHED_DEADLINE is not set
-+CONFIG_PREEMPT=y
-+CONFIG_BF537=y
-+CONFIG_IRQ_TIMER0=12
-+CONFIG_BOOT_LOAD=0x400000
-+CONFIG_CLKIN_HZ=22118400
-+CONFIG_NOMMU_INITIAL_TRIM_EXCESS=0
-+CONFIG_DMA_UNCACHED_2M=y
-+CONFIG_C_CDPRIO=y
-+CONFIG_BINFMT_FLAT=y
-+CONFIG_BINFMT_ZFLAT=y
-+CONFIG_PM=y
-+CONFIG_NET=y
-+CONFIG_PACKET=y
-+CONFIG_UNIX=y
-+CONFIG_INET=y
-+CONFIG_IP_PNP=y
-+# CONFIG_IPV6 is not set
-+CONFIG_IRDA=m
-+CONFIG_IRLAN=m
-+CONFIG_IRCOMM=m
-+CONFIG_IRDA_CACHE_LAST_LSAP=y
-+CONFIG_IRTTY_SIR=m
-+# CONFIG_WIRELESS is not set
-+# CONFIG_FW_LOADER is not set
-+CONFIG_MTD=y
-+CONFIG_MTD_BLOCK=y
-+CONFIG_MTD_JEDECPROBE=m
-+CONFIG_MTD_RAM=y
-+CONFIG_MTD_ROM=m
-+CONFIG_MTD_COMPLEX_MAPPINGS=y
-+CONFIG_MTD_UCLINUX=y
-+CONFIG_MTD_NAND=m
-+CONFIG_BLK_DEV_RAM=y
-+CONFIG_MISC_DEVICES=y
-+CONFIG_EEPROM_AT25=m
-+CONFIG_NETDEVICES=y
-+# CONFIG_NETDEV_1000 is not set
-+# CONFIG_NETDEV_10000 is not set
-+# CONFIG_WLAN is not set
-+# CONFIG_INPUT_MOUSEDEV is not set
-+CONFIG_INPUT_EVDEV=m
-+# CONFIG_INPUT_KEYBOARD is not set
-+# CONFIG_INPUT_MOUSE is not set
-+CONFIG_INPUT_MISC=y
-+CONFIG_INPUT_UINPUT=y
-+# CONFIG_SERIO is not set
-+# CONFIG_VT is not set
-+CONFIG_SERIAL_BFIN=y
-+CONFIG_SERIAL_BFIN_CONSOLE=y
-+CONFIG_SERIAL_BFIN_UART0=y
-+# CONFIG_LEGACY_PTYS is not set
-+CONFIG_I2C=y
-+CONFIG_I2C_CHARDEV=y
-+CONFIG_I2C_BLACKFIN_TWI=y
-+CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ=100
-+CONFIG_SPI=y
-+CONFIG_SPI_BFIN5XX=y
-+CONFIG_HWMON=m
-+CONFIG_WATCHDOG=y
-+CONFIG_BFIN_WDT=y
-+# CONFIG_HID is not set
-+CONFIG_EXT2_FS=y
-+CONFIG_EXT2_FS_XATTR=y
-+# CONFIG_DNOTIFY is not set
-+CONFIG_JFFS2_FS=m
-+CONFIG_NFS_FS=m
-+CONFIG_NFS_V3=y
-+CONFIG_SMB_FS=m
-+CONFIG_DEBUG_KERNEL=y
-+# CONFIG_DEBUG_BUGVERBOSE is not set
-+CONFIG_DEBUG_INFO=y
-+# CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE is not set
-+CONFIG_CPLB_INFO=y
-diff --git a/arch/blackfin/configs/TCM-BF518_defconfig b/arch/blackfin/configs/TCM-BF518_defconfig
-new file mode 100644
-index 000000000000..425c24e43c34
---- /dev/null
-+++ b/arch/blackfin/configs/TCM-BF518_defconfig
-@@ -0,0 +1,131 @@
-+CONFIG_EXPERIMENTAL=y
-+CONFIG_KERNEL_LZMA=y
-+CONFIG_SYSVIPC=y
-+CONFIG_IKCONFIG=y
-+CONFIG_IKCONFIG_PROC=y
-+CONFIG_LOG_BUF_SHIFT=14
-+CONFIG_BLK_DEV_INITRD=y
-+# CONFIG_RD_GZIP is not set
-+CONFIG_RD_LZMA=y
-+CONFIG_EXPERT=y
-+# CONFIG_SYSCTL_SYSCALL is not set
-+# CONFIG_ELF_CORE is not set
-+# CONFIG_FUTEX is not set
-+# CONFIG_SIGNALFD is not set
-+# CONFIG_TIMERFD is not set
-+# CONFIG_EVENTFD is not set
-+# CONFIG_AIO is not set
-+CONFIG_SLAB=y
-+CONFIG_MMAP_ALLOW_UNINITIALIZED=y
-+CONFIG_MODULES=y
-+CONFIG_MODULE_UNLOAD=y
-+# CONFIG_LBDAF is not set
-+# CONFIG_BLK_DEV_BSG is not set
-+# CONFIG_IOSCHED_DEADLINE is not set
-+# CONFIG_IOSCHED_CFQ is not set
-+CONFIG_PREEMPT=y
-+CONFIG_BF518=y
-+CONFIG_BF_REV_0_1=y
-+CONFIG_BFIN518F_TCM=y
-+CONFIG_IRQ_TIMER0=12
-+# CONFIG_CYCLES_CLOCKSOURCE is not set
-+# CONFIG_SCHEDULE_L1 is not set
-+# CONFIG_MEMSET_L1 is not set
-+# CONFIG_MEMCPY_L1 is not set
-+# CONFIG_SYS_BFIN_SPINLOCK_L1 is not set
-+CONFIG_NOMMU_INITIAL_TRIM_EXCESS=0
-+CONFIG_BFIN_GPTIMERS=m
-+CONFIG_C_CDPRIO=y
-+CONFIG_BANK_3=0x99B2
-+CONFIG_BINFMT_FLAT=y
-+CONFIG_BINFMT_ZFLAT=y
-+CONFIG_NET=y
-+CONFIG_PACKET=y
-+CONFIG_UNIX=y
-+CONFIG_INET=y
-+CONFIG_IP_PNP=y
-+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-+# CONFIG_INET_XFRM_MODE_BEET is not set
-+# CONFIG_INET_LRO is not set
-+# CONFIG_INET_DIAG is not set
-+# CONFIG_IPV6 is not set
-+# CONFIG_WIRELESS is not set
-+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-+# CONFIG_FW_LOADER is not set
-+CONFIG_MTD=y
-+CONFIG_MTD_CMDLINE_PARTS=y
-+CONFIG_MTD_BLOCK=y
-+CONFIG_MTD_CFI=y
-+CONFIG_MTD_CFI_ADV_OPTIONS=y
-+CONFIG_MTD_CFI_GEOMETRY=y
-+# CONFIG_MTD_MAP_BANK_WIDTH_1 is not set
-+# CONFIG_MTD_MAP_BANK_WIDTH_4 is not set
-+# CONFIG_MTD_CFI_I2 is not set
-+CONFIG_MTD_CFI_INTELEXT=y
-+CONFIG_MTD_RAM=y
-+CONFIG_MTD_ROM=m
-+CONFIG_MTD_PHYSMAP=y
-+CONFIG_BLK_DEV_RAM=y
-+CONFIG_NETDEVICES=y
-+CONFIG_NET_ETHERNET=y
-+CONFIG_BFIN_MAC=y
-+# CONFIG_NETDEV_1000 is not set
-+# CONFIG_NETDEV_10000 is not set
-+# CONFIG_WLAN is not set
-+# CONFIG_INPUT_MOUSEDEV is not set
-+# CONFIG_INPUT_KEYBOARD is not set
-+# CONFIG_INPUT_MOUSE is not set
-+CONFIG_INPUT_MISC=y
-+# CONFIG_SERIO is not set
-+# CONFIG_DEVKMEM is not set
-+CONFIG_BFIN_JTAG_COMM=m
-+CONFIG_SERIAL_BFIN=y
-+CONFIG_SERIAL_BFIN_CONSOLE=y
-+CONFIG_SERIAL_BFIN_UART0=y
-+# CONFIG_LEGACY_PTYS is not set
-+# CONFIG_HW_RANDOM is not set
-+CONFIG_I2C=y
-+CONFIG_I2C_CHARDEV=y
-+CONFIG_I2C_BLACKFIN_TWI=y
-+CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ=100
-+CONFIG_SPI=y
-+CONFIG_SPI_BFIN5XX=y
-+CONFIG_GPIOLIB=y
-+CONFIG_GPIO_SYSFS=y
-+# CONFIG_HWMON is not set
-+CONFIG_WATCHDOG=y
-+CONFIG_BFIN_WDT=y
-+# CONFIG_HID_SUPPORT is not set
-+# CONFIG_USB_SUPPORT is not set
-+CONFIG_MMC=y
-+CONFIG_MMC_DEBUG=y
-+CONFIG_MMC_SPI=y
-+CONFIG_RTC_CLASS=y
-+CONFIG_RTC_DRV_BFIN=y
-+CONFIG_EXT2_FS=y
-+# CONFIG_DNOTIFY is not set
-+CONFIG_VFAT_FS=m
-+# CONFIG_MISC_FILESYSTEMS is not set
-+CONFIG_NFS_FS=y
-+CONFIG_NFS_V3=y
-+CONFIG_ROOT_NFS=y
-+CONFIG_NLS_CODEPAGE_437=m
-+CONFIG_NLS_ISO8859_1=m
-+CONFIG_NLS_UTF8=m
-+CONFIG_DEBUG_KERNEL=y
-+CONFIG_DEBUG_SHIRQ=y
-+CONFIG_DETECT_HUNG_TASK=y
-+CONFIG_DEBUG_INFO=y
-+# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-+# CONFIG_FTRACE is not set
-+CONFIG_DEBUG_MMRS=y
-+CONFIG_DEBUG_HWERR=y
-+CONFIG_EXACT_HWERR=y
-+CONFIG_DEBUG_DOUBLEFAULT=y
-+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE=y
-+CONFIG_EARLY_PRINTK=y
-+CONFIG_CPLB_INFO=y
-+CONFIG_CRYPTO=y
-+# CONFIG_CRYPTO_ANSI_CPRNG is not set
-+CONFIG_CRC_CCITT=m
-diff --git a/arch/mips/configs/fuloong2e_defconfig b/arch/mips/configs/fuloong2e_defconfig
-index 7a7af706e898..be19bf122fde 100644
---- a/arch/mips/configs/fuloong2e_defconfig
-+++ b/arch/mips/configs/fuloong2e_defconfig
-@@ -4,7 +4,7 @@ CONFIG_SYSVIPC=y
- CONFIG_POSIX_MQUEUE=y
- CONFIG_NO_HZ=y
- CONFIG_HIGH_RES_TIMERS=y
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- CONFIG_BSD_PROCESS_ACCT=y
- CONFIG_IKCONFIG=y
- CONFIG_IKCONFIG_PROC=y
-diff --git a/arch/mips/configs/gpr_defconfig b/arch/mips/configs/gpr_defconfig
-index 9085f4d6c698..fb23111d45f6 100644
---- a/arch/mips/configs/gpr_defconfig
-+++ b/arch/mips/configs/gpr_defconfig
-@@ -1,8 +1,8 @@
-+CONFIG_PREEMPT=y
- # CONFIG_LOCALVERSION_AUTO is not set
- CONFIG_SYSVIPC=y
- CONFIG_POSIX_MQUEUE=y
- CONFIG_HIGH_RES_TIMERS=y
--CONFIG_PREEMPT_VOLUNTARY=y
- CONFIG_BSD_PROCESS_ACCT=y
- CONFIG_BSD_PROCESS_ACCT_V3=y
- CONFIG_RELAY=y
-diff --git a/arch/mips/configs/ip22_defconfig b/arch/mips/configs/ip22_defconfig
-index 21a1168ae301..529a1b1007cf 100644
---- a/arch/mips/configs/ip22_defconfig
-+++ b/arch/mips/configs/ip22_defconfig
-@@ -1,7 +1,7 @@
- CONFIG_SYSVIPC=y
- CONFIG_NO_HZ=y
- CONFIG_HIGH_RES_TIMERS=y
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- CONFIG_IKCONFIG=y
- CONFIG_IKCONFIG_PROC=y
- CONFIG_LOG_BUF_SHIFT=14
-diff --git a/arch/mips/configs/ip28_defconfig b/arch/mips/configs/ip28_defconfig
-index 0921ef38e9fb..6da05cef46f8 100644
---- a/arch/mips/configs/ip28_defconfig
-+++ b/arch/mips/configs/ip28_defconfig
-@@ -1,5 +1,5 @@
- CONFIG_SYSVIPC=y
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- CONFIG_IKCONFIG=y
- CONFIG_IKCONFIG_PROC=y
- CONFIG_LOG_BUF_SHIFT=14
-diff --git a/arch/mips/configs/jazz_defconfig b/arch/mips/configs/jazz_defconfig
-index 328d4dfeb4cb..e17cb23173ea 100644
---- a/arch/mips/configs/jazz_defconfig
-+++ b/arch/mips/configs/jazz_defconfig
-@@ -1,6 +1,6 @@
-+CONFIG_PREEMPT=y
- CONFIG_SYSVIPC=y
- CONFIG_POSIX_MQUEUE=y
--CONFIG_PREEMPT_VOLUNTARY=y
- CONFIG_BSD_PROCESS_ACCT=y
- CONFIG_IKCONFIG=y
- CONFIG_IKCONFIG_PROC=y
-diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig
-index 914af125a7fa..76a64290373f 100644
---- a/arch/mips/configs/mtx1_defconfig
-+++ b/arch/mips/configs/mtx1_defconfig
-@@ -1,8 +1,8 @@
-+CONFIG_PREEMPT=y
- # CONFIG_LOCALVERSION_AUTO is not set
- CONFIG_SYSVIPC=y
- CONFIG_POSIX_MQUEUE=y
- CONFIG_AUDIT=y
--CONFIG_PREEMPT_VOLUNTARY=y
- CONFIG_BSD_PROCESS_ACCT=y
- CONFIG_BSD_PROCESS_ACCT_V3=y
- CONFIG_RELAY=y
-diff --git a/arch/mips/configs/nlm_xlr_defconfig b/arch/mips/configs/nlm_xlr_defconfig
-index 4ecb157e56d4..ea7309283b01 100644
---- a/arch/mips/configs/nlm_xlr_defconfig
-+++ b/arch/mips/configs/nlm_xlr_defconfig
-@@ -1,10 +1,10 @@
-+CONFIG_PREEMPT=y
- # CONFIG_LOCALVERSION_AUTO is not set
- CONFIG_SYSVIPC=y
- CONFIG_POSIX_MQUEUE=y
- CONFIG_AUDIT=y
- CONFIG_NO_HZ=y
- CONFIG_HIGH_RES_TIMERS=y
--CONFIG_PREEMPT_VOLUNTARY=y
- CONFIG_BSD_PROCESS_ACCT=y
- CONFIG_BSD_PROCESS_ACCT_V3=y
- CONFIG_TASKSTATS=y
-diff --git a/arch/mips/configs/pic32mzda_defconfig b/arch/mips/configs/pic32mzda_defconfig
-index 63fe2da1b37f..7f08ee237345 100644
---- a/arch/mips/configs/pic32mzda_defconfig
-+++ b/arch/mips/configs/pic32mzda_defconfig
-@@ -1,7 +1,7 @@
-+CONFIG_PREEMPT=y
- CONFIG_SYSVIPC=y
- CONFIG_NO_HZ=y
- CONFIG_HIGH_RES_TIMERS=y
--CONFIG_PREEMPT_VOLUNTARY=y
- CONFIG_IKCONFIG=y
- CONFIG_IKCONFIG_PROC=y
- CONFIG_LOG_BUF_SHIFT=14
-diff --git a/arch/mips/configs/pistachio_defconfig b/arch/mips/configs/pistachio_defconfig
-index 24e07180c57d..38582e8f71c4 100644
---- a/arch/mips/configs/pistachio_defconfig
-+++ b/arch/mips/configs/pistachio_defconfig
-@@ -1,9 +1,9 @@
-+CONFIG_PREEMPT=y
- # CONFIG_LOCALVERSION_AUTO is not set
- CONFIG_DEFAULT_HOSTNAME="localhost"
- CONFIG_SYSVIPC=y
- CONFIG_NO_HZ=y
- CONFIG_HIGH_RES_TIMERS=y
--CONFIG_PREEMPT_VOLUNTARY=y
- CONFIG_IKCONFIG=m
- CONFIG_IKCONFIG_PROC=y
- CONFIG_LOG_BUF_SHIFT=18
-diff --git a/arch/mips/configs/pnx8335_stb225_defconfig b/arch/mips/configs/pnx8335_stb225_defconfig
-index 738ba3b1374b..6a3267e8aa0d 100644
---- a/arch/mips/configs/pnx8335_stb225_defconfig
-+++ b/arch/mips/configs/pnx8335_stb225_defconfig
-@@ -1,9 +1,9 @@
-+CONFIG_PREEMPT=y
- # CONFIG_LOCALVERSION_AUTO is not set
- # CONFIG_SWAP is not set
- CONFIG_SYSVIPC=y
- CONFIG_NO_HZ=y
- CONFIG_HIGH_RES_TIMERS=y
--CONFIG_PREEMPT_VOLUNTARY=y
- CONFIG_LOG_BUF_SHIFT=14
- CONFIG_EXPERT=y
- CONFIG_SLAB=y
-diff --git a/arch/mips/configs/rm200_defconfig b/arch/mips/configs/rm200_defconfig
-index 2c7adea7638f..1c82d62bee72 100644
---- a/arch/mips/configs/rm200_defconfig
-+++ b/arch/mips/configs/rm200_defconfig
-@@ -1,6 +1,6 @@
-+CONFIG_PREEMPT=y
- CONFIG_SYSVIPC=y
- CONFIG_POSIX_MQUEUE=y
--CONFIG_PREEMPT_VOLUNTARY=y
- CONFIG_BSD_PROCESS_ACCT=y
- CONFIG_IKCONFIG=y
- CONFIG_IKCONFIG_PROC=y
-diff --git a/arch/parisc/configs/712_defconfig b/arch/parisc/configs/712_defconfig
-index d3e3d94e90c3..578524f80cc4 100644
---- a/arch/parisc/configs/712_defconfig
-+++ b/arch/parisc/configs/712_defconfig
-@@ -13,7 +13,7 @@ CONFIG_MODULES=y
- CONFIG_MODULE_UNLOAD=y
- CONFIG_MODULE_FORCE_UNLOAD=y
- CONFIG_PA7100LC=y
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- CONFIG_GSC_LASI=y
- # CONFIG_PDC_CHASSIS is not set
- CONFIG_BINFMT_MISC=m
-diff --git a/arch/parisc/configs/c3000_defconfig b/arch/parisc/configs/c3000_defconfig
-index 64d45a8b6ca0..d1bdfad94048 100644
---- a/arch/parisc/configs/c3000_defconfig
-+++ b/arch/parisc/configs/c3000_defconfig
-@@ -13,7 +13,7 @@ CONFIG_MODULES=y
- CONFIG_MODULE_UNLOAD=y
- CONFIG_MODULE_FORCE_UNLOAD=y
- CONFIG_PA8X00=y
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- # CONFIG_GSC is not set
- CONFIG_PCI=y
- CONFIG_PCI_LBA=y
-diff --git a/arch/parisc/configs/defconfig b/arch/parisc/configs/defconfig
-index 5b877ca34ebf..0d976614934c 100644
---- a/arch/parisc/configs/defconfig
-+++ b/arch/parisc/configs/defconfig
-@@ -14,7 +14,7 @@ CONFIG_MODULE_UNLOAD=y
- CONFIG_MODULE_FORCE_UNLOAD=y
- # CONFIG_BLK_DEV_BSG is not set
- CONFIG_PA7100LC=y
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- CONFIG_IOMMU_CCIO=y
- CONFIG_GSC_LASI=y
- CONFIG_GSC_WAX=y
-diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
-index 3e56c9c2f16e..ecee9c2a0062 100644
---- a/arch/powerpc/Kconfig
-+++ b/arch/powerpc/Kconfig
-@@ -853,6 +853,8 @@ config SCHED_SMT
- 	  when dealing with POWER5 cpus at a cost of slightly increased
- 	  overhead in some places. If unsure say N here.
- 
-+source "kernel/Kconfig.MuQSS"
-+
- config PPC_DENORMALISATION
- 	bool "PowerPC denormalisation exception handling"
- 	depends on PPC_BOOK3S_64
-diff --git a/arch/powerpc/configs/c2k_defconfig b/arch/powerpc/configs/c2k_defconfig
-new file mode 100644
-index 000000000000..04fee07ea6c5
---- /dev/null
-+++ b/arch/powerpc/configs/c2k_defconfig
-@@ -0,0 +1,389 @@
-+CONFIG_SYSVIPC=y
-+CONFIG_POSIX_MQUEUE=y
-+CONFIG_AUDIT=y
-+CONFIG_BSD_PROCESS_ACCT=y
-+CONFIG_BLK_DEV_INITRD=y
-+CONFIG_PROFILING=y
-+CONFIG_OPROFILE=m
-+CONFIG_KPROBES=y
-+CONFIG_MODULES=y
-+CONFIG_MODULE_UNLOAD=y
-+CONFIG_MODVERSIONS=y
-+CONFIG_PARTITION_ADVANCED=y
-+CONFIG_OSF_PARTITION=y
-+CONFIG_MAC_PARTITION=y
-+CONFIG_BSD_DISKLABEL=y
-+CONFIG_MINIX_SUBPARTITION=y
-+CONFIG_SOLARIS_X86_PARTITION=y
-+CONFIG_UNIXWARE_DISKLABEL=y
-+CONFIG_SGI_PARTITION=y
-+CONFIG_SUN_PARTITION=y
-+# CONFIG_PPC_CHRP is not set
-+# CONFIG_PPC_PMAC is not set
-+CONFIG_EMBEDDED6xx=y
-+CONFIG_PPC_C2K=y
-+CONFIG_CPU_FREQ=y
-+CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y
-+CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
-+CONFIG_CPU_FREQ_GOV_POWERSAVE=m
-+CONFIG_CPU_FREQ_GOV_ONDEMAND=m
-+CONFIG_GEN_RTC=y
-+CONFIG_HIGHMEM=y
-+CONFIG_PREEMPT=y
-+CONFIG_BINFMT_MISC=y
-+CONFIG_PM=y
-+CONFIG_PCI_MSI=y
-+CONFIG_HOTPLUG_PCI=y
-+CONFIG_HOTPLUG_PCI_SHPC=m
-+CONFIG_NET=y
-+CONFIG_PACKET=y
-+CONFIG_UNIX=y
-+CONFIG_XFRM_USER=y
-+CONFIG_NET_KEY=m
-+CONFIG_INET=y
-+CONFIG_IP_MULTICAST=y
-+CONFIG_IP_ADVANCED_ROUTER=y
-+CONFIG_IP_MULTIPLE_TABLES=y
-+CONFIG_IP_ROUTE_MULTIPATH=y
-+CONFIG_IP_ROUTE_VERBOSE=y
-+CONFIG_IP_PNP=y
-+CONFIG_IP_PNP_DHCP=y
-+CONFIG_NET_IPIP=m
-+CONFIG_IP_MROUTE=y
-+CONFIG_IP_PIMSM_V1=y
-+CONFIG_IP_PIMSM_V2=y
-+CONFIG_SYN_COOKIES=y
-+CONFIG_INET_AH=m
-+CONFIG_INET_ESP=m
-+CONFIG_INET_IPCOMP=m
-+CONFIG_INET6_AH=m
-+CONFIG_INET6_ESP=m
-+CONFIG_INET6_IPCOMP=m
-+CONFIG_IPV6_TUNNEL=m
-+CONFIG_NETFILTER=y
-+# CONFIG_NETFILTER_XT_MATCH_SCTP is not set
-+CONFIG_IP_NF_IPTABLES=m
-+CONFIG_IP_NF_MATCH_ECN=m
-+CONFIG_IP_NF_MATCH_TTL=m
-+CONFIG_IP_NF_FILTER=m
-+CONFIG_IP_NF_TARGET_REJECT=m
-+CONFIG_IP_NF_MANGLE=m
-+CONFIG_IP_NF_TARGET_ECN=m
-+CONFIG_IP_NF_RAW=m
-+CONFIG_IP_NF_ARPTABLES=m
-+CONFIG_IP_NF_ARPFILTER=m
-+CONFIG_IP_NF_ARP_MANGLE=m
-+CONFIG_IP6_NF_IPTABLES=m
-+CONFIG_IP6_NF_MATCH_EUI64=m
-+CONFIG_IP6_NF_MATCH_FRAG=m
-+CONFIG_IP6_NF_MATCH_OPTS=m
-+CONFIG_IP6_NF_MATCH_HL=m
-+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
-+CONFIG_IP6_NF_MATCH_RT=m
-+CONFIG_IP6_NF_FILTER=m
-+CONFIG_IP6_NF_MANGLE=m
-+CONFIG_IP6_NF_RAW=m
-+CONFIG_BRIDGE_NF_EBTABLES=m
-+CONFIG_BRIDGE_EBT_BROUTE=m
-+CONFIG_BRIDGE_EBT_T_FILTER=m
-+CONFIG_BRIDGE_EBT_T_NAT=m
-+CONFIG_BRIDGE_EBT_802_3=m
-+CONFIG_BRIDGE_EBT_AMONG=m
-+CONFIG_BRIDGE_EBT_ARP=m
-+CONFIG_BRIDGE_EBT_IP=m
-+CONFIG_BRIDGE_EBT_LIMIT=m
-+CONFIG_BRIDGE_EBT_MARK=m
-+CONFIG_BRIDGE_EBT_PKTTYPE=m
-+CONFIG_BRIDGE_EBT_STP=m
-+CONFIG_BRIDGE_EBT_VLAN=m
-+CONFIG_BRIDGE_EBT_ARPREPLY=m
-+CONFIG_BRIDGE_EBT_DNAT=m
-+CONFIG_BRIDGE_EBT_MARK_T=m
-+CONFIG_BRIDGE_EBT_REDIRECT=m
-+CONFIG_BRIDGE_EBT_SNAT=m
-+CONFIG_BRIDGE_EBT_LOG=m
-+CONFIG_IP_SCTP=m
-+CONFIG_ATM=m
-+CONFIG_ATM_CLIP=m
-+CONFIG_ATM_LANE=m
-+CONFIG_ATM_BR2684=m
-+CONFIG_BRIDGE=m
-+CONFIG_VLAN_8021Q=m
-+CONFIG_NET_SCHED=y
-+CONFIG_NET_SCH_CBQ=m
-+CONFIG_NET_SCH_HTB=m
-+CONFIG_NET_SCH_HFSC=m
-+CONFIG_NET_SCH_ATM=m
-+CONFIG_NET_SCH_PRIO=m
-+CONFIG_NET_SCH_RED=m
-+CONFIG_NET_SCH_SFQ=m
-+CONFIG_NET_SCH_TEQL=m
-+CONFIG_NET_SCH_TBF=m
-+CONFIG_NET_SCH_GRED=m
-+CONFIG_NET_SCH_DSMARK=m
-+CONFIG_NET_SCH_NETEM=m
-+CONFIG_NET_CLS_TCINDEX=m
-+CONFIG_NET_CLS_ROUTE4=m
-+CONFIG_NET_CLS_FW=m
-+CONFIG_NET_CLS_U32=m
-+CONFIG_CLS_U32_PERF=y
-+CONFIG_NET_CLS_RSVP=m
-+CONFIG_NET_CLS_RSVP6=m
-+CONFIG_NET_CLS_IND=y
-+CONFIG_BT=m
-+CONFIG_BT_RFCOMM=m
-+CONFIG_BT_RFCOMM_TTY=y
-+CONFIG_BT_BNEP=m
-+CONFIG_BT_BNEP_MC_FILTER=y
-+CONFIG_BT_BNEP_PROTO_FILTER=y
-+CONFIG_BT_HIDP=m
-+CONFIG_BT_HCIUART=m
-+CONFIG_BT_HCIUART_H4=y
-+CONFIG_BT_HCIUART_BCSP=y
-+CONFIG_BT_HCIBCM203X=m
-+CONFIG_BT_HCIBFUSB=m
-+CONFIG_BT_HCIVHCI=m
-+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-+CONFIG_MTD=y
-+CONFIG_MTD_BLOCK=y
-+CONFIG_MTD_CFI=y
-+CONFIG_MTD_CFI_AMDSTD=y
-+CONFIG_MTD_COMPLEX_MAPPINGS=y
-+CONFIG_MTD_PHYSMAP_OF=y
-+CONFIG_BLK_DEV_LOOP=m
-+CONFIG_BLK_DEV_CRYPTOLOOP=m
-+CONFIG_BLK_DEV_NBD=m
-+CONFIG_BLK_DEV_RAM=y
-+CONFIG_BLK_DEV_RAM_SIZE=16384
-+CONFIG_SCSI=m
-+CONFIG_BLK_DEV_SD=m
-+CONFIG_CHR_DEV_ST=m
-+CONFIG_CHR_DEV_OSST=m
-+CONFIG_BLK_DEV_SR=m
-+CONFIG_BLK_DEV_SR_VENDOR=y
-+CONFIG_CHR_DEV_SG=m
-+CONFIG_SCSI_CONSTANTS=y
-+CONFIG_SCSI_LOGGING=y
-+CONFIG_SCSI_ISCSI_ATTRS=m
-+CONFIG_BLK_DEV_3W_XXXX_RAID=m
-+CONFIG_SCSI_3W_9XXX=m
-+CONFIG_SCSI_ACARD=m
-+CONFIG_SCSI_AACRAID=m
-+CONFIG_SCSI_AIC7XXX=m
-+CONFIG_AIC7XXX_CMDS_PER_DEVICE=4
-+CONFIG_AIC7XXX_RESET_DELAY_MS=15000
-+# CONFIG_AIC7XXX_DEBUG_ENABLE is not set
-+# CONFIG_AIC7XXX_REG_PRETTY_PRINT is not set
-+CONFIG_SCSI_AIC79XX=m
-+CONFIG_AIC79XX_CMDS_PER_DEVICE=4
-+CONFIG_AIC79XX_RESET_DELAY_MS=15000
-+# CONFIG_AIC79XX_DEBUG_ENABLE is not set
-+# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set
-+CONFIG_SCSI_ARCMSR=m
-+CONFIG_MEGARAID_NEWGEN=y
-+CONFIG_MEGARAID_MM=m
-+CONFIG_MEGARAID_MAILBOX=m
-+CONFIG_MEGARAID_SAS=m
-+CONFIG_SCSI_GDTH=m
-+CONFIG_SCSI_IPS=m
-+CONFIG_SCSI_INITIO=m
-+CONFIG_SCSI_SYM53C8XX_2=m
-+CONFIG_SCSI_QLOGIC_1280=m
-+CONFIG_NETDEVICES=y
-+CONFIG_BONDING=m
-+CONFIG_DUMMY=m
-+CONFIG_NETCONSOLE=m
-+CONFIG_TUN=m
-+# CONFIG_ATM_DRIVERS is not set
-+CONFIG_MV643XX_ETH=y
-+CONFIG_VITESSE_PHY=y
-+CONFIG_INPUT_EVDEV=y
-+# CONFIG_INPUT_KEYBOARD is not set
-+# CONFIG_INPUT_MOUSE is not set
-+CONFIG_INPUT_MISC=y
-+CONFIG_INPUT_UINPUT=m
-+# CONFIG_SERIO is not set
-+# CONFIG_LEGACY_PTYS is not set
-+CONFIG_SERIAL_NONSTANDARD=y
-+CONFIG_SERIAL_MPSC=y
-+CONFIG_SERIAL_MPSC_CONSOLE=y
-+CONFIG_NVRAM=m
-+CONFIG_RAW_DRIVER=y
-+CONFIG_MAX_RAW_DEVS=8192
-+CONFIG_I2C=m
-+CONFIG_I2C_CHARDEV=m
-+CONFIG_I2C_MV64XXX=m
-+CONFIG_HWMON=m
-+CONFIG_SENSORS_ADM1021=m
-+CONFIG_SENSORS_ADM1025=m
-+CONFIG_SENSORS_ADM1026=m
-+CONFIG_SENSORS_ADM1031=m
-+CONFIG_SENSORS_DS1621=m
-+CONFIG_SENSORS_GL518SM=m
-+CONFIG_SENSORS_MAX1619=m
-+CONFIG_SENSORS_LM75=m
-+CONFIG_SENSORS_LM77=m
-+CONFIG_SENSORS_LM78=m
-+CONFIG_SENSORS_LM80=m
-+CONFIG_SENSORS_LM83=m
-+CONFIG_SENSORS_LM85=m
-+CONFIG_SENSORS_LM87=m
-+CONFIG_SENSORS_LM90=m
-+CONFIG_SENSORS_PCF8591=m
-+CONFIG_SENSORS_VIA686A=m
-+CONFIG_SENSORS_W83781D=m
-+CONFIG_SENSORS_W83L785TS=m
-+CONFIG_WATCHDOG=y
-+CONFIG_SOFT_WATCHDOG=m
-+CONFIG_PCIPCWATCHDOG=m
-+CONFIG_WDTPCI=m
-+CONFIG_USBPCWATCHDOG=m
-+# CONFIG_VGA_CONSOLE is not set
-+CONFIG_USB=m
-+CONFIG_USB_MON=m
-+CONFIG_USB_EHCI_HCD=m
-+CONFIG_USB_EHCI_ROOT_HUB_TT=y
-+CONFIG_USB_OHCI_HCD=m
-+CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
-+CONFIG_USB_UHCI_HCD=m
-+CONFIG_USB_ACM=m
-+CONFIG_USB_PRINTER=m
-+CONFIG_USB_STORAGE=m
-+CONFIG_USB_STORAGE_DATAFAB=m
-+CONFIG_USB_STORAGE_FREECOM=m
-+CONFIG_USB_STORAGE_ISD200=m
-+CONFIG_USB_STORAGE_SDDR09=m
-+CONFIG_USB_STORAGE_SDDR55=m
-+CONFIG_USB_STORAGE_JUMPSHOT=m
-+CONFIG_USB_MDC800=m
-+CONFIG_USB_MICROTEK=m
-+CONFIG_USB_SERIAL=m
-+CONFIG_USB_SERIAL_GENERIC=y
-+CONFIG_USB_SERIAL_BELKIN=m
-+CONFIG_USB_SERIAL_WHITEHEAT=m
-+CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m
-+CONFIG_USB_SERIAL_EMPEG=m
-+CONFIG_USB_SERIAL_FTDI_SIO=m
-+CONFIG_USB_SERIAL_VISOR=m
-+CONFIG_USB_SERIAL_IPAQ=m
-+CONFIG_USB_SERIAL_IR=m
-+CONFIG_USB_SERIAL_EDGEPORT=m
-+CONFIG_USB_SERIAL_EDGEPORT_TI=m
-+CONFIG_USB_SERIAL_KEYSPAN_PDA=m
-+CONFIG_USB_SERIAL_KEYSPAN=m
-+CONFIG_USB_SERIAL_KLSI=m
-+CONFIG_USB_SERIAL_KOBIL_SCT=m
-+CONFIG_USB_SERIAL_MCT_U232=m
-+CONFIG_USB_SERIAL_PL2303=m
-+CONFIG_USB_SERIAL_SAFE=m
-+CONFIG_USB_SERIAL_SAFE_PADDED=y
-+CONFIG_USB_SERIAL_CYBERJACK=m
-+CONFIG_USB_SERIAL_XIRCOM=m
-+CONFIG_USB_SERIAL_OMNINET=m
-+CONFIG_USB_EMI62=m
-+CONFIG_USB_RIO500=m
-+CONFIG_USB_LEGOTOWER=m
-+CONFIG_USB_LCD=m
-+CONFIG_USB_LED=m
-+CONFIG_USB_TEST=m
-+CONFIG_USB_ATM=m
-+CONFIG_USB_SPEEDTOUCH=m
-+CONFIG_INFINIBAND=m
-+CONFIG_INFINIBAND_USER_MAD=m
-+CONFIG_INFINIBAND_USER_ACCESS=m
-+CONFIG_INFINIBAND_MTHCA=m
-+CONFIG_INFINIBAND_IPOIB=m
-+CONFIG_INFINIBAND_IPOIB_CM=y
-+CONFIG_INFINIBAND_SRP=m
-+CONFIG_DMADEVICES=y
-+CONFIG_EXT4_FS=m
-+CONFIG_EXT4_FS_POSIX_ACL=y
-+CONFIG_EXT4_FS_SECURITY=y
-+CONFIG_QUOTA=y
-+CONFIG_QFMT_V2=y
-+CONFIG_AUTOFS4_FS=m
-+CONFIG_UDF_FS=m
-+CONFIG_MSDOS_FS=m
-+CONFIG_VFAT_FS=m
-+CONFIG_FAT_DEFAULT_IOCHARSET="ascii"
-+CONFIG_PROC_KCORE=y
-+CONFIG_TMPFS=y
-+CONFIG_HFS_FS=m
-+CONFIG_HFSPLUS_FS=m
-+CONFIG_JFFS2_FS=y
-+CONFIG_CRAMFS=m
-+CONFIG_VXFS_FS=m
-+CONFIG_NFS_FS=y
-+CONFIG_NFS_V3_ACL=y
-+CONFIG_NFS_V4=y
-+CONFIG_ROOT_NFS=y
-+CONFIG_CIFS=m
-+CONFIG_CIFS_XATTR=y
-+CONFIG_CIFS_POSIX=y
-+CONFIG_NLS=y
-+CONFIG_NLS_DEFAULT="utf8"
-+CONFIG_NLS_CODEPAGE_437=y
-+CONFIG_NLS_CODEPAGE_737=m
-+CONFIG_NLS_CODEPAGE_775=m
-+CONFIG_NLS_CODEPAGE_850=m
-+CONFIG_NLS_CODEPAGE_852=m
-+CONFIG_NLS_CODEPAGE_855=m
-+CONFIG_NLS_CODEPAGE_857=m
-+CONFIG_NLS_CODEPAGE_860=m
-+CONFIG_NLS_CODEPAGE_861=m
-+CONFIG_NLS_CODEPAGE_862=m
-+CONFIG_NLS_CODEPAGE_863=m
-+CONFIG_NLS_CODEPAGE_864=m
-+CONFIG_NLS_CODEPAGE_865=m
-+CONFIG_NLS_CODEPAGE_866=m
-+CONFIG_NLS_CODEPAGE_869=m
-+CONFIG_NLS_CODEPAGE_936=m
-+CONFIG_NLS_CODEPAGE_950=m
-+CONFIG_NLS_CODEPAGE_932=m
-+CONFIG_NLS_CODEPAGE_949=m
-+CONFIG_NLS_CODEPAGE_874=m
-+CONFIG_NLS_ISO8859_8=m
-+CONFIG_NLS_CODEPAGE_1250=m
-+CONFIG_NLS_CODEPAGE_1251=m
-+CONFIG_NLS_ASCII=y
-+CONFIG_NLS_ISO8859_1=m
-+CONFIG_NLS_ISO8859_2=m
-+CONFIG_NLS_ISO8859_3=m
-+CONFIG_NLS_ISO8859_4=m
-+CONFIG_NLS_ISO8859_5=m
-+CONFIG_NLS_ISO8859_6=m
-+CONFIG_NLS_ISO8859_7=m
-+CONFIG_NLS_ISO8859_9=m
-+CONFIG_NLS_ISO8859_13=m
-+CONFIG_NLS_ISO8859_14=m
-+CONFIG_NLS_ISO8859_15=m
-+CONFIG_NLS_KOI8_R=m
-+CONFIG_NLS_KOI8_U=m
-+CONFIG_CRC_CCITT=m
-+CONFIG_CRC_T10DIF=m
-+CONFIG_DEBUG_INFO=y
-+CONFIG_MAGIC_SYSRQ=y
-+CONFIG_DEBUG_KERNEL=y
-+CONFIG_DEBUG_STACK_USAGE=y
-+CONFIG_DEBUG_HIGHMEM=y
-+CONFIG_DEBUG_STACKOVERFLOW=y
-+CONFIG_DETECT_HUNG_TASK=y
-+CONFIG_DEBUG_SPINLOCK=y
-+CONFIG_BOOTX_TEXT=y
-+CONFIG_PPC_EARLY_DEBUG=y
-+CONFIG_SECURITY=y
-+CONFIG_SECURITY_NETWORK=y
-+CONFIG_SECURITY_SELINUX=y
-+CONFIG_SECURITY_SELINUX_BOOTPARAM=y
-+CONFIG_SECURITY_SELINUX_DISABLE=y
-+CONFIG_CRYPTO_HMAC=y
-+CONFIG_CRYPTO_MICHAEL_MIC=m
-+CONFIG_CRYPTO_SHA1=y
-+CONFIG_CRYPTO_SHA512=m
-+CONFIG_CRYPTO_WP512=m
-+CONFIG_CRYPTO_BLOWFISH=m
-+CONFIG_CRYPTO_CAST6=m
-+CONFIG_CRYPTO_KHAZAD=m
-+CONFIG_CRYPTO_SERPENT=m
-+CONFIG_CRYPTO_TEA=m
-+CONFIG_CRYPTO_TWOFISH=m
-diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
-index 9dca4cffa623..09d38c3e59a5 100644
---- a/arch/powerpc/configs/ppc6xx_defconfig
-+++ b/arch/powerpc/configs/ppc6xx_defconfig
-@@ -74,7 +74,7 @@ CONFIG_QE_GPIO=y
- CONFIG_MCU_MPC8349EMITX=y
- CONFIG_HIGHMEM=y
- CONFIG_HZ_1000=y
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- CONFIG_BINFMT_MISC=y
- CONFIG_HIBERNATION=y
- CONFIG_PM_DEBUG=y
-diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
-index f18d5067cd0f..fe489fc01c73 100644
---- a/arch/powerpc/platforms/cell/spufs/sched.c
-+++ b/arch/powerpc/platforms/cell/spufs/sched.c
-@@ -51,11 +51,6 @@ static struct task_struct *spusched_task;
- static struct timer_list spusched_timer;
- static struct timer_list spuloadavg_timer;
- 
--/*
-- * Priority of a normal, non-rt, non-niced'd process (aka nice level 0).
-- */
--#define NORMAL_PRIO		120
--
- /*
-  * Frequency of the spu scheduler tick.  By default we do one SPU scheduler
-  * tick for every 10 CPU scheduler ticks.
-diff --git a/arch/score/configs/spct6600_defconfig b/arch/score/configs/spct6600_defconfig
-new file mode 100644
-index 000000000000..46434ca1fa10
---- /dev/null
-+++ b/arch/score/configs/spct6600_defconfig
-@@ -0,0 +1,84 @@
-+CONFIG_HZ_100=y
-+CONFIG_PREEMPT=y
-+CONFIG_EXPERIMENTAL=y
-+# CONFIG_LOCALVERSION_AUTO is not set
-+CONFIG_SYSVIPC=y
-+CONFIG_POSIX_MQUEUE=y
-+CONFIG_BSD_PROCESS_ACCT=y
-+CONFIG_LOG_BUF_SHIFT=12
-+CONFIG_SYSFS_DEPRECATED_V2=y
-+CONFIG_BLK_DEV_INITRD=y
-+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-+CONFIG_EXPERT=y
-+# CONFIG_KALLSYMS is not set
-+# CONFIG_HOTPLUG is not set
-+CONFIG_SLAB=y
-+CONFIG_MODULES=y
-+CONFIG_MODULE_FORCE_LOAD=y
-+CONFIG_MODULE_UNLOAD=y
-+CONFIG_MODULE_FORCE_UNLOAD=y
-+# CONFIG_BLK_DEV_BSG is not set
-+CONFIG_BINFMT_MISC=y
-+CONFIG_NET=y
-+CONFIG_UNIX=y
-+CONFIG_NET_KEY=y
-+CONFIG_INET=y
-+CONFIG_IP_MULTICAST=y
-+CONFIG_ARPD=y
-+# CONFIG_INET_LRO is not set
-+# CONFIG_IPV6 is not set
-+# CONFIG_STANDALONE is not set
-+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
-+CONFIG_BLK_DEV_LOOP=y
-+CONFIG_BLK_DEV_CRYPTOLOOP=y
-+CONFIG_BLK_DEV_RAM=y
-+CONFIG_BLK_DEV_RAM_COUNT=1
-+# CONFIG_MISC_DEVICES is not set
-+CONFIG_NETDEVICES=y
-+# CONFIG_NETDEV_1000 is not set
-+# CONFIG_NETDEV_10000 is not set
-+# CONFIG_INPUT_MOUSEDEV is not set
-+# CONFIG_INPUT_KEYBOARD is not set
-+# CONFIG_INPUT_MOUSE is not set
-+# CONFIG_SERIO is not set
-+CONFIG_SERIAL_NONSTANDARD=y
-+CONFIG_STALDRV=y
-+# CONFIG_HW_RANDOM is not set
-+CONFIG_RAW_DRIVER=y
-+CONFIG_MAX_RAW_DEVS=8192
-+# CONFIG_HWMON is not set
-+# CONFIG_VGA_CONSOLE is not set
-+# CONFIG_HID_SUPPORT is not set
-+# CONFIG_USB_SUPPORT is not set
-+CONFIG_EXT2_FS=y
-+CONFIG_EXT2_FS_XATTR=y
-+CONFIG_EXT2_FS_POSIX_ACL=y
-+CONFIG_EXT3_FS=y
-+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-+CONFIG_EXT3_FS_POSIX_ACL=y
-+CONFIG_AUTOFS_FS=y
-+CONFIG_AUTOFS4_FS=y
-+CONFIG_PROC_KCORE=y
-+# CONFIG_PROC_PAGE_MONITOR is not set
-+CONFIG_TMPFS=y
-+CONFIG_TMPFS_POSIX_ACL=y
-+CONFIG_NFS_FS=y
-+CONFIG_NFS_V3=y
-+CONFIG_NFS_V3_ACL=y
-+CONFIG_NFS_V4=y
-+CONFIG_NFSD=y
-+CONFIG_NFSD_V3_ACL=y
-+CONFIG_NFSD_V4=y
-+# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-+CONFIG_SECURITY=y
-+CONFIG_SECURITY_NETWORK=y
-+CONFIG_CRYPTO_NULL=y
-+CONFIG_CRYPTO_CRYPTD=y
-+CONFIG_CRYPTO_SEQIV=y
-+CONFIG_CRYPTO_MD4=y
-+CONFIG_CRYPTO_MICHAEL_MIC=y
-+# CONFIG_CRYPTO_ANSI_CPRNG is not set
-+# CONFIG_CRYPTO_HW is not set
-+CONFIG_CRC_CCITT=y
-+CONFIG_CRC16=y
-+CONFIG_LIBCRC32C=y
-diff --git a/arch/sh/configs/se7712_defconfig b/arch/sh/configs/se7712_defconfig
-index 9a527f978106..5895f2cc726e 100644
---- a/arch/sh/configs/se7712_defconfig
-+++ b/arch/sh/configs/se7712_defconfig
-@@ -23,7 +23,7 @@ CONFIG_FLATMEM_MANUAL=y
- CONFIG_SH_SOLUTION_ENGINE=y
- CONFIG_SH_PCLK_FREQ=66666666
- CONFIG_HEARTBEAT=y
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- CONFIG_CMDLINE_OVERWRITE=y
- CONFIG_CMDLINE="console=ttySC0,115200 root=/dev/sda1"
- CONFIG_NET=y
-diff --git a/arch/sh/configs/se7721_defconfig b/arch/sh/configs/se7721_defconfig
-index 3b0e1eb6e874..e296a2cd9903 100644
---- a/arch/sh/configs/se7721_defconfig
-+++ b/arch/sh/configs/se7721_defconfig
-@@ -23,7 +23,7 @@ CONFIG_FLATMEM_MANUAL=y
- CONFIG_SH_7721_SOLUTION_ENGINE=y
- CONFIG_SH_PCLK_FREQ=33333333
- CONFIG_HEARTBEAT=y
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- CONFIG_CMDLINE_OVERWRITE=y
- CONFIG_CMDLINE="console=ttySC0,115200 root=/dev/sda2"
- CONFIG_NET=y
-diff --git a/arch/sh/configs/titan_defconfig b/arch/sh/configs/titan_defconfig
-index 4ec961ace688..a03a1ad670a0 100644
---- a/arch/sh/configs/titan_defconfig
-+++ b/arch/sh/configs/titan_defconfig
-@@ -20,7 +20,7 @@ CONFIG_SH_TITAN=y
- CONFIG_SH_PCLK_FREQ=30000000
- CONFIG_SH_DMA=y
- CONFIG_SH_DMA_API=y
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- CONFIG_CMDLINE_OVERWRITE=y
- CONFIG_CMDLINE="console=ttySC1,38400N81 root=/dev/nfs ip=:::::eth1:autoconf rw"
- CONFIG_PCI=y
-diff --git a/arch/sparc/configs/sparc64_defconfig b/arch/sparc/configs/sparc64_defconfig
-index 6c325d53a20a..98d4ef3d76cf 100644
---- a/arch/sparc/configs/sparc64_defconfig
-+++ b/arch/sparc/configs/sparc64_defconfig
-@@ -22,7 +22,7 @@ CONFIG_NO_HZ=y
- CONFIG_HIGH_RES_TIMERS=y
- CONFIG_NUMA=y
- CONFIG_DEFAULT_MMAP_MIN_ADDR=8192
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- CONFIG_SUN_LDOMS=y
- CONFIG_PCI=y
- CONFIG_PCI_MSI=y
-diff --git a/arch/tile/configs/tilegx_defconfig b/arch/tile/configs/tilegx_defconfig
-new file mode 100644
-index 000000000000..939c63ba7e6e
---- /dev/null
-+++ b/arch/tile/configs/tilegx_defconfig
-@@ -0,0 +1,411 @@
-+CONFIG_TILEGX=y
-+CONFIG_SYSVIPC=y
-+CONFIG_POSIX_MQUEUE=y
-+CONFIG_FHANDLE=y
-+CONFIG_AUDIT=y
-+CONFIG_NO_HZ=y
-+CONFIG_BSD_PROCESS_ACCT=y
-+CONFIG_BSD_PROCESS_ACCT_V3=y
-+CONFIG_TASKSTATS=y
-+CONFIG_TASK_DELAY_ACCT=y
-+CONFIG_TASK_XACCT=y
-+CONFIG_TASK_IO_ACCOUNTING=y
-+CONFIG_LOG_BUF_SHIFT=19
-+CONFIG_CGROUPS=y
-+CONFIG_CGROUP_DEBUG=y
-+CONFIG_CGROUP_DEVICE=y
-+CONFIG_CPUSETS=y
-+CONFIG_CGROUP_CPUACCT=y
-+CONFIG_CGROUP_SCHED=y
-+CONFIG_RT_GROUP_SCHED=y
-+CONFIG_BLK_CGROUP=y
-+CONFIG_NAMESPACES=y
-+CONFIG_RELAY=y
-+CONFIG_BLK_DEV_INITRD=y
-+CONFIG_RD_XZ=y
-+CONFIG_SYSCTL_SYSCALL=y
-+CONFIG_EMBEDDED=y
-+# CONFIG_COMPAT_BRK is not set
-+CONFIG_PROFILING=y
-+CONFIG_KPROBES=y
-+CONFIG_MODULES=y
-+CONFIG_MODULE_FORCE_LOAD=y
-+CONFIG_MODULE_UNLOAD=y
-+CONFIG_BLK_DEV_INTEGRITY=y
-+CONFIG_PARTITION_ADVANCED=y
-+CONFIG_OSF_PARTITION=y
-+CONFIG_AMIGA_PARTITION=y
-+CONFIG_MAC_PARTITION=y
-+CONFIG_BSD_DISKLABEL=y
-+CONFIG_MINIX_SUBPARTITION=y
-+CONFIG_SOLARIS_X86_PARTITION=y
-+CONFIG_UNIXWARE_DISKLABEL=y
-+CONFIG_SGI_PARTITION=y
-+CONFIG_SUN_PARTITION=y
-+CONFIG_KARMA_PARTITION=y
-+CONFIG_CFQ_GROUP_IOSCHED=y
-+CONFIG_NR_CPUS=100
-+CONFIG_HZ_100=y
-+# CONFIG_COMPACTION is not set
-+CONFIG_PREEMPT=y
-+CONFIG_TILE_PCI_IO=y
-+CONFIG_PCI_DEBUG=y
-+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-+CONFIG_BINFMT_MISC=y
-+CONFIG_NET=y
-+CONFIG_PACKET=y
-+CONFIG_UNIX=y
-+CONFIG_XFRM_USER=y
-+CONFIG_XFRM_SUB_POLICY=y
-+CONFIG_XFRM_STATISTICS=y
-+CONFIG_NET_KEY=m
-+CONFIG_NET_KEY_MIGRATE=y
-+CONFIG_INET=y
-+CONFIG_IP_MULTICAST=y
-+CONFIG_IP_ADVANCED_ROUTER=y
-+CONFIG_IP_MULTIPLE_TABLES=y
-+CONFIG_IP_ROUTE_MULTIPATH=y
-+CONFIG_IP_ROUTE_VERBOSE=y
-+CONFIG_NET_IPIP=m
-+CONFIG_IP_MROUTE=y
-+CONFIG_IP_PIMSM_V1=y
-+CONFIG_IP_PIMSM_V2=y
-+CONFIG_SYN_COOKIES=y
-+CONFIG_INET_AH=m
-+CONFIG_INET_ESP=m
-+CONFIG_INET_IPCOMP=m
-+CONFIG_INET_XFRM_MODE_TRANSPORT=m
-+CONFIG_INET_XFRM_MODE_TUNNEL=m
-+CONFIG_INET_XFRM_MODE_BEET=m
-+CONFIG_INET_DIAG=m
-+CONFIG_TCP_CONG_ADVANCED=y
-+CONFIG_TCP_CONG_HSTCP=m
-+CONFIG_TCP_CONG_HYBLA=m
-+CONFIG_TCP_CONG_SCALABLE=m
-+CONFIG_TCP_CONG_LP=m
-+CONFIG_TCP_CONG_VENO=m
-+CONFIG_TCP_CONG_YEAH=m
-+CONFIG_TCP_CONG_ILLINOIS=m
-+CONFIG_TCP_MD5SIG=y
-+CONFIG_IPV6=y
-+CONFIG_IPV6_ROUTER_PREF=y
-+CONFIG_IPV6_ROUTE_INFO=y
-+CONFIG_IPV6_OPTIMISTIC_DAD=y
-+CONFIG_INET6_AH=m
-+CONFIG_INET6_ESP=m
-+CONFIG_INET6_IPCOMP=m
-+CONFIG_IPV6_MIP6=m
-+CONFIG_INET6_XFRM_MODE_TRANSPORT=m
-+CONFIG_INET6_XFRM_MODE_TUNNEL=m
-+CONFIG_INET6_XFRM_MODE_BEET=m
-+CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m
-+CONFIG_IPV6_SIT=m
-+CONFIG_IPV6_TUNNEL=m
-+CONFIG_IPV6_MULTIPLE_TABLES=y
-+CONFIG_IPV6_MROUTE=y
-+CONFIG_IPV6_PIMSM_V2=y
-+CONFIG_NETLABEL=y
-+CONFIG_RDS=m
-+CONFIG_RDS_TCP=m
-+CONFIG_BRIDGE=m
-+CONFIG_VLAN_8021Q=m
-+CONFIG_VLAN_8021Q_GVRP=y
-+CONFIG_PHONET=m
-+CONFIG_NET_SCHED=y
-+CONFIG_NET_SCH_CBQ=m
-+CONFIG_NET_SCH_HTB=m
-+CONFIG_NET_SCH_HFSC=m
-+CONFIG_NET_SCH_PRIO=m
-+CONFIG_NET_SCH_MULTIQ=m
-+CONFIG_NET_SCH_RED=m
-+CONFIG_NET_SCH_SFQ=m
-+CONFIG_NET_SCH_TEQL=m
-+CONFIG_NET_SCH_TBF=m
-+CONFIG_NET_SCH_GRED=m
-+CONFIG_NET_SCH_DSMARK=m
-+CONFIG_NET_SCH_NETEM=m
-+CONFIG_NET_SCH_DRR=m
-+CONFIG_NET_SCH_INGRESS=m
-+CONFIG_NET_CLS_BASIC=m
-+CONFIG_NET_CLS_TCINDEX=m
-+CONFIG_NET_CLS_ROUTE4=m
-+CONFIG_NET_CLS_FW=m
-+CONFIG_NET_CLS_U32=m
-+CONFIG_CLS_U32_PERF=y
-+CONFIG_CLS_U32_MARK=y
-+CONFIG_NET_CLS_RSVP=m
-+CONFIG_NET_CLS_RSVP6=m
-+CONFIG_NET_CLS_FLOW=m
-+CONFIG_NET_CLS_CGROUP=y
-+CONFIG_NET_EMATCH=y
-+CONFIG_NET_EMATCH_CMP=m
-+CONFIG_NET_EMATCH_NBYTE=m
-+CONFIG_NET_EMATCH_U32=m
-+CONFIG_NET_EMATCH_META=m
-+CONFIG_NET_EMATCH_TEXT=m
-+CONFIG_NET_CLS_ACT=y
-+CONFIG_NET_ACT_POLICE=m
-+CONFIG_NET_ACT_GACT=m
-+CONFIG_GACT_PROB=y
-+CONFIG_NET_ACT_MIRRED=m
-+CONFIG_NET_ACT_NAT=m
-+CONFIG_NET_ACT_PEDIT=m
-+CONFIG_NET_ACT_SIMP=m
-+CONFIG_NET_ACT_SKBEDIT=m
-+CONFIG_NET_CLS_IND=y
-+CONFIG_DCB=y
-+CONFIG_DNS_RESOLVER=y
-+# CONFIG_WIRELESS is not set
-+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-+CONFIG_DEVTMPFS=y
-+CONFIG_DEVTMPFS_MOUNT=y
-+CONFIG_CONNECTOR=y
-+CONFIG_BLK_DEV_LOOP=y
-+CONFIG_BLK_DEV_CRYPTOLOOP=m
-+CONFIG_BLK_DEV_SX8=m
-+CONFIG_BLK_DEV_RAM=y
-+CONFIG_BLK_DEV_RAM_SIZE=16384
-+CONFIG_ATA_OVER_ETH=m
-+CONFIG_RAID_ATTRS=m
-+CONFIG_BLK_DEV_SD=y
-+CONFIG_SCSI_CONSTANTS=y
-+CONFIG_SCSI_LOGGING=y
-+CONFIG_SCSI_SAS_ATA=y
-+CONFIG_ISCSI_TCP=m
-+CONFIG_SCSI_MVSAS=y
-+# CONFIG_SCSI_MVSAS_DEBUG is not set
-+CONFIG_SCSI_MVSAS_TASKLET=y
-+CONFIG_ATA=y
-+CONFIG_SATA_AHCI=y
-+CONFIG_SATA_SIL24=y
-+# CONFIG_ATA_SFF is not set
-+CONFIG_MD=y
-+CONFIG_BLK_DEV_MD=y
-+CONFIG_MD_LINEAR=m
-+CONFIG_MD_RAID0=m
-+CONFIG_MD_RAID1=m
-+CONFIG_MD_RAID10=m
-+CONFIG_MD_RAID456=m
-+CONFIG_MD_FAULTY=m
-+CONFIG_BLK_DEV_DM=m
-+CONFIG_DM_DEBUG=y
-+CONFIG_DM_CRYPT=m
-+CONFIG_DM_SNAPSHOT=m
-+CONFIG_DM_MIRROR=m
-+CONFIG_DM_LOG_USERSPACE=m
-+CONFIG_DM_ZERO=m
-+CONFIG_DM_MULTIPATH=m
-+CONFIG_DM_MULTIPATH_QL=m
-+CONFIG_DM_MULTIPATH_ST=m
-+CONFIG_DM_DELAY=m
-+CONFIG_DM_UEVENT=y
-+CONFIG_TARGET_CORE=m
-+CONFIG_TCM_IBLOCK=m
-+CONFIG_TCM_FILEIO=m
-+CONFIG_TCM_PSCSI=m
-+CONFIG_LOOPBACK_TARGET=m
-+CONFIG_ISCSI_TARGET=m
-+CONFIG_FUSION=y
-+CONFIG_FUSION_SAS=y
-+CONFIG_NETDEVICES=y
-+CONFIG_BONDING=m
-+CONFIG_DUMMY=m
-+CONFIG_IFB=m
-+CONFIG_MACVLAN=m
-+CONFIG_MACVTAP=m
-+CONFIG_NETCONSOLE=m
-+CONFIG_NETCONSOLE_DYNAMIC=y
-+CONFIG_TUN=y
-+CONFIG_VETH=m
-+CONFIG_NET_DSA_MV88E6060=y
-+CONFIG_NET_DSA_MV88E6XXX=y
-+CONFIG_SKY2=y
-+CONFIG_PTP_1588_CLOCK_TILEGX=y
-+# CONFIG_WLAN is not set
-+# CONFIG_INPUT_MOUSEDEV is not set
-+# CONFIG_INPUT_KEYBOARD is not set
-+# CONFIG_INPUT_MOUSE is not set
-+# CONFIG_SERIO is not set
-+# CONFIG_VT is not set
-+# CONFIG_LEGACY_PTYS is not set
-+CONFIG_SERIAL_TILEGX=y
-+CONFIG_HW_RANDOM=y
-+CONFIG_HW_RANDOM_TIMERIOMEM=m
-+CONFIG_I2C=y
-+CONFIG_I2C_CHARDEV=y
-+# CONFIG_HWMON is not set
-+CONFIG_WATCHDOG=y
-+CONFIG_WATCHDOG_NOWAYOUT=y
-+# CONFIG_VGA_ARB is not set
-+CONFIG_DRM=m
-+CONFIG_DRM_TDFX=m
-+CONFIG_DRM_R128=m
-+CONFIG_DRM_MGA=m
-+CONFIG_DRM_VIA=m
-+CONFIG_DRM_SAVAGE=m
-+CONFIG_USB=y
-+CONFIG_USB_EHCI_HCD=y
-+CONFIG_USB_OHCI_HCD=y
-+CONFIG_USB_STORAGE=y
-+CONFIG_EDAC=y
-+CONFIG_RTC_CLASS=y
-+CONFIG_RTC_DRV_TILE=y
-+CONFIG_EXT2_FS=y
-+CONFIG_EXT2_FS_XATTR=y
-+CONFIG_EXT2_FS_POSIX_ACL=y
-+CONFIG_EXT2_FS_SECURITY=y
-+CONFIG_EXT2_FS_XIP=y
-+CONFIG_EXT3_FS=y
-+CONFIG_EXT3_FS_POSIX_ACL=y
-+CONFIG_EXT3_FS_SECURITY=y
-+CONFIG_EXT4_FS=y
-+CONFIG_EXT4_FS_POSIX_ACL=y
-+CONFIG_EXT4_FS_SECURITY=y
-+CONFIG_XFS_FS=y
-+CONFIG_XFS_QUOTA=y
-+CONFIG_XFS_POSIX_ACL=y
-+CONFIG_GFS2_FS=m
-+CONFIG_GFS2_FS_LOCKING_DLM=y
-+CONFIG_BTRFS_FS=m
-+CONFIG_BTRFS_FS_POSIX_ACL=y
-+CONFIG_QUOTA=y
-+CONFIG_QUOTA_NETLINK_INTERFACE=y
-+# CONFIG_PRINT_QUOTA_WARNING is not set
-+CONFIG_QFMT_V2=y
-+CONFIG_AUTOFS4_FS=m
-+CONFIG_FUSE_FS=y
-+CONFIG_CUSE=m
-+CONFIG_FSCACHE=m
-+CONFIG_FSCACHE_STATS=y
-+CONFIG_CACHEFILES=m
-+CONFIG_ISO9660_FS=m
-+CONFIG_JOLIET=y
-+CONFIG_ZISOFS=y
-+CONFIG_UDF_FS=m
-+CONFIG_MSDOS_FS=m
-+CONFIG_VFAT_FS=m
-+CONFIG_FAT_DEFAULT_IOCHARSET="ascii"
-+CONFIG_PROC_KCORE=y
-+CONFIG_TMPFS=y
-+CONFIG_TMPFS_POSIX_ACL=y
-+CONFIG_HUGETLBFS=y
-+CONFIG_ECRYPT_FS=m
-+CONFIG_CRAMFS=m
-+CONFIG_SQUASHFS=m
-+CONFIG_NFS_FS=m
-+CONFIG_NFS_V3_ACL=y
-+CONFIG_NFS_V4=m
-+CONFIG_NFS_V4_1=y
-+CONFIG_NFS_FSCACHE=y
-+CONFIG_NFSD=m
-+CONFIG_NFSD_V3_ACL=y
-+CONFIG_NFSD_V4=y
-+CONFIG_CIFS=m
-+CONFIG_CIFS_STATS=y
-+CONFIG_CIFS_WEAK_PW_HASH=y
-+CONFIG_CIFS_UPCALL=y
-+CONFIG_CIFS_XATTR=y
-+CONFIG_CIFS_POSIX=y
-+CONFIG_CIFS_DFS_UPCALL=y
-+CONFIG_CIFS_FSCACHE=y
-+CONFIG_NLS_DEFAULT="utf8"
-+CONFIG_NLS_CODEPAGE_437=y
-+CONFIG_NLS_CODEPAGE_737=m
-+CONFIG_NLS_CODEPAGE_775=m
-+CONFIG_NLS_CODEPAGE_850=m
-+CONFIG_NLS_CODEPAGE_852=m
-+CONFIG_NLS_CODEPAGE_855=m
-+CONFIG_NLS_CODEPAGE_857=m
-+CONFIG_NLS_CODEPAGE_860=m
-+CONFIG_NLS_CODEPAGE_861=m
-+CONFIG_NLS_CODEPAGE_862=m
-+CONFIG_NLS_CODEPAGE_863=m
-+CONFIG_NLS_CODEPAGE_864=m
-+CONFIG_NLS_CODEPAGE_865=m
-+CONFIG_NLS_CODEPAGE_866=m
-+CONFIG_NLS_CODEPAGE_869=m
-+CONFIG_NLS_CODEPAGE_936=m
-+CONFIG_NLS_CODEPAGE_950=m
-+CONFIG_NLS_CODEPAGE_932=m
-+CONFIG_NLS_CODEPAGE_949=m
-+CONFIG_NLS_CODEPAGE_874=m
-+CONFIG_NLS_ISO8859_8=m
-+CONFIG_NLS_CODEPAGE_1250=m
-+CONFIG_NLS_CODEPAGE_1251=m
-+CONFIG_NLS_ASCII=y
-+CONFIG_NLS_ISO8859_1=m
-+CONFIG_NLS_ISO8859_2=m
-+CONFIG_NLS_ISO8859_3=m
-+CONFIG_NLS_ISO8859_4=m
-+CONFIG_NLS_ISO8859_5=m
-+CONFIG_NLS_ISO8859_6=m
-+CONFIG_NLS_ISO8859_7=m
-+CONFIG_NLS_ISO8859_9=m
-+CONFIG_NLS_ISO8859_13=m
-+CONFIG_NLS_ISO8859_14=m
-+CONFIG_NLS_ISO8859_15=m
-+CONFIG_NLS_KOI8_R=m
-+CONFIG_NLS_KOI8_U=m
-+CONFIG_NLS_UTF8=m
-+CONFIG_DLM=m
-+CONFIG_DLM_DEBUG=y
-+CONFIG_DYNAMIC_DEBUG=y
-+CONFIG_DEBUG_INFO=y
-+CONFIG_DEBUG_INFO_REDUCED=y
-+# CONFIG_ENABLE_WARN_DEPRECATED is not set
-+CONFIG_STRIP_ASM_SYMS=y
-+CONFIG_DEBUG_FS=y
-+CONFIG_HEADERS_CHECK=y
-+# CONFIG_FRAME_POINTER is not set
-+CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y
-+CONFIG_DEBUG_VM=y
-+CONFIG_DEBUG_MEMORY_INIT=y
-+CONFIG_DEBUG_STACKOVERFLOW=y
-+CONFIG_LOCKUP_DETECTOR=y
-+CONFIG_SCHEDSTATS=y
-+CONFIG_TIMER_STATS=y
-+CONFIG_DEBUG_LIST=y
-+CONFIG_DEBUG_CREDENTIALS=y
-+CONFIG_RCU_CPU_STALL_TIMEOUT=60
-+CONFIG_ASYNC_RAID6_TEST=m
-+CONFIG_KGDB=y
-+CONFIG_SECURITY=y
-+CONFIG_SECURITYFS=y
-+CONFIG_SECURITY_NETWORK=y
-+CONFIG_SECURITY_NETWORK_XFRM=y
-+CONFIG_SECURITY_SELINUX=y
-+CONFIG_SECURITY_SELINUX_BOOTPARAM=y
-+CONFIG_SECURITY_SELINUX_DISABLE=y
-+CONFIG_CRYPTO_PCRYPT=m
-+CONFIG_CRYPTO_CRYPTD=m
-+CONFIG_CRYPTO_TEST=m
-+CONFIG_CRYPTO_CCM=m
-+CONFIG_CRYPTO_GCM=m
-+CONFIG_CRYPTO_CTS=m
-+CONFIG_CRYPTO_LRW=m
-+CONFIG_CRYPTO_PCBC=m
-+CONFIG_CRYPTO_XTS=m
-+CONFIG_CRYPTO_HMAC=y
-+CONFIG_CRYPTO_XCBC=m
-+CONFIG_CRYPTO_VMAC=m
-+CONFIG_CRYPTO_MICHAEL_MIC=m
-+CONFIG_CRYPTO_RMD128=m
-+CONFIG_CRYPTO_RMD160=m
-+CONFIG_CRYPTO_RMD256=m
-+CONFIG_CRYPTO_RMD320=m
-+CONFIG_CRYPTO_SHA1=y
-+CONFIG_CRYPTO_SHA512=m
-+CONFIG_CRYPTO_TGR192=m
-+CONFIG_CRYPTO_WP512=m
-+CONFIG_CRYPTO_ANUBIS=m
-+CONFIG_CRYPTO_BLOWFISH=m
-+CONFIG_CRYPTO_CAMELLIA=m
-+CONFIG_CRYPTO_CAST5=m
-+CONFIG_CRYPTO_CAST6=m
-+CONFIG_CRYPTO_FCRYPT=m
-+CONFIG_CRYPTO_KHAZAD=m
-+CONFIG_CRYPTO_SEED=m
-+CONFIG_CRYPTO_SERPENT=m
-+CONFIG_CRYPTO_TEA=m
-+CONFIG_CRYPTO_TWOFISH=m
-+CONFIG_CRYPTO_LZO=m
-diff --git a/arch/tile/configs/tilepro_defconfig b/arch/tile/configs/tilepro_defconfig
-new file mode 100644
-index 000000000000..e8c4003cbd81
---- /dev/null
-+++ b/arch/tile/configs/tilepro_defconfig
-@@ -0,0 +1,524 @@
-+CONFIG_SYSVIPC=y
-+CONFIG_POSIX_MQUEUE=y
-+CONFIG_AUDIT=y
-+CONFIG_NO_HZ=y
-+CONFIG_HIGH_RES_TIMERS=y
-+CONFIG_BSD_PROCESS_ACCT=y
-+CONFIG_BSD_PROCESS_ACCT_V3=y
-+CONFIG_TASKSTATS=y
-+CONFIG_TASK_DELAY_ACCT=y
-+CONFIG_TASK_XACCT=y
-+CONFIG_TASK_IO_ACCOUNTING=y
-+CONFIG_LOG_BUF_SHIFT=19
-+CONFIG_CGROUPS=y
-+CONFIG_CGROUP_DEBUG=y
-+CONFIG_CGROUP_DEVICE=y
-+CONFIG_CPUSETS=y
-+CONFIG_CGROUP_CPUACCT=y
-+CONFIG_CGROUP_SCHED=y
-+CONFIG_RT_GROUP_SCHED=y
-+CONFIG_BLK_CGROUP=y
-+CONFIG_NAMESPACES=y
-+CONFIG_RELAY=y
-+CONFIG_BLK_DEV_INITRD=y
-+CONFIG_RD_XZ=y
-+CONFIG_SYSCTL_SYSCALL=y
-+CONFIG_EMBEDDED=y
-+# CONFIG_COMPAT_BRK is not set
-+CONFIG_PROFILING=y
-+CONFIG_MODULES=y
-+CONFIG_MODULE_FORCE_LOAD=y
-+CONFIG_MODULE_UNLOAD=y
-+CONFIG_BLK_DEV_INTEGRITY=y
-+CONFIG_PARTITION_ADVANCED=y
-+CONFIG_OSF_PARTITION=y
-+CONFIG_AMIGA_PARTITION=y
-+CONFIG_MAC_PARTITION=y
-+CONFIG_BSD_DISKLABEL=y
-+CONFIG_MINIX_SUBPARTITION=y
-+CONFIG_SOLARIS_X86_PARTITION=y
-+CONFIG_UNIXWARE_DISKLABEL=y
-+CONFIG_SGI_PARTITION=y
-+CONFIG_SUN_PARTITION=y
-+CONFIG_KARMA_PARTITION=y
-+CONFIG_CFQ_GROUP_IOSCHED=y
-+CONFIG_HZ_100=y
-+# CONFIG_COMPACTION is not set
-+CONFIG_PREEMPT=y
-+CONFIG_PCI_DEBUG=y
-+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-+CONFIG_BINFMT_MISC=y
-+CONFIG_NET=y
-+CONFIG_PACKET=y
-+CONFIG_UNIX=y
-+CONFIG_XFRM_USER=y
-+CONFIG_XFRM_SUB_POLICY=y
-+CONFIG_XFRM_STATISTICS=y
-+CONFIG_NET_KEY=m
-+CONFIG_NET_KEY_MIGRATE=y
-+CONFIG_INET=y
-+CONFIG_IP_MULTICAST=y
-+CONFIG_IP_ADVANCED_ROUTER=y
-+CONFIG_IP_MULTIPLE_TABLES=y
-+CONFIG_IP_ROUTE_MULTIPATH=y
-+CONFIG_IP_ROUTE_VERBOSE=y
-+CONFIG_NET_IPIP=m
-+CONFIG_IP_MROUTE=y
-+CONFIG_IP_PIMSM_V1=y
-+CONFIG_IP_PIMSM_V2=y
-+CONFIG_SYN_COOKIES=y
-+CONFIG_INET_AH=m
-+CONFIG_INET_ESP=m
-+CONFIG_INET_IPCOMP=m
-+CONFIG_INET_XFRM_MODE_TRANSPORT=m
-+CONFIG_INET_XFRM_MODE_TUNNEL=m
-+CONFIG_INET_XFRM_MODE_BEET=m
-+CONFIG_INET_DIAG=m
-+CONFIG_TCP_CONG_ADVANCED=y
-+CONFIG_TCP_CONG_HSTCP=m
-+CONFIG_TCP_CONG_HYBLA=m
-+CONFIG_TCP_CONG_SCALABLE=m
-+CONFIG_TCP_CONG_LP=m
-+CONFIG_TCP_CONG_VENO=m
-+CONFIG_TCP_CONG_YEAH=m
-+CONFIG_TCP_CONG_ILLINOIS=m
-+CONFIG_TCP_MD5SIG=y
-+CONFIG_IPV6=y
-+CONFIG_IPV6_ROUTER_PREF=y
-+CONFIG_IPV6_ROUTE_INFO=y
-+CONFIG_IPV6_OPTIMISTIC_DAD=y
-+CONFIG_INET6_AH=m
-+CONFIG_INET6_ESP=m
-+CONFIG_INET6_IPCOMP=m
-+CONFIG_IPV6_MIP6=m
-+CONFIG_INET6_XFRM_MODE_TRANSPORT=m
-+CONFIG_INET6_XFRM_MODE_TUNNEL=m
-+CONFIG_INET6_XFRM_MODE_BEET=m
-+CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m
-+CONFIG_IPV6_SIT=m
-+CONFIG_IPV6_TUNNEL=m
-+CONFIG_IPV6_MULTIPLE_TABLES=y
-+CONFIG_IPV6_MROUTE=y
-+CONFIG_IPV6_PIMSM_V2=y
-+CONFIG_NETLABEL=y
-+CONFIG_NETFILTER=y
-+CONFIG_NF_CONNTRACK=m
-+CONFIG_NF_CONNTRACK_SECMARK=y
-+CONFIG_NF_CONNTRACK_ZONES=y
-+CONFIG_NF_CONNTRACK_EVENTS=y
-+CONFIG_NF_CT_PROTO_DCCP=m
-+CONFIG_NF_CT_PROTO_UDPLITE=m
-+CONFIG_NF_CONNTRACK_AMANDA=m
-+CONFIG_NF_CONNTRACK_FTP=m
-+CONFIG_NF_CONNTRACK_H323=m
-+CONFIG_NF_CONNTRACK_IRC=m
-+CONFIG_NF_CONNTRACK_NETBIOS_NS=m
-+CONFIG_NF_CONNTRACK_PPTP=m
-+CONFIG_NF_CONNTRACK_SANE=m
-+CONFIG_NF_CONNTRACK_SIP=m
-+CONFIG_NF_CONNTRACK_TFTP=m
-+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
-+CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
-+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
-+CONFIG_NETFILTER_XT_TARGET_DSCP=m
-+CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
-+CONFIG_NETFILTER_XT_TARGET_MARK=m
-+CONFIG_NETFILTER_XT_TARGET_NFLOG=m
-+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
-+CONFIG_NETFILTER_XT_TARGET_NOTRACK=m
-+CONFIG_NETFILTER_XT_TARGET_TEE=m
-+CONFIG_NETFILTER_XT_TARGET_TPROXY=m
-+CONFIG_NETFILTER_XT_TARGET_TRACE=m
-+CONFIG_NETFILTER_XT_TARGET_SECMARK=m
-+CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
-+CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
-+CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
-+CONFIG_NETFILTER_XT_MATCH_COMMENT=m
-+CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
-+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
-+CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
-+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
-+CONFIG_NETFILTER_XT_MATCH_DCCP=m
-+CONFIG_NETFILTER_XT_MATCH_DSCP=m
-+CONFIG_NETFILTER_XT_MATCH_ESP=m
-+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
-+CONFIG_NETFILTER_XT_MATCH_HELPER=m
-+CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
-+CONFIG_NETFILTER_XT_MATCH_IPVS=m
-+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
-+CONFIG_NETFILTER_XT_MATCH_LIMIT=m
-+CONFIG_NETFILTER_XT_MATCH_MAC=m
-+CONFIG_NETFILTER_XT_MATCH_MARK=m
-+CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
-+CONFIG_NETFILTER_XT_MATCH_OSF=m
-+CONFIG_NETFILTER_XT_MATCH_OWNER=m
-+CONFIG_NETFILTER_XT_MATCH_POLICY=m
-+CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
-+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
-+CONFIG_NETFILTER_XT_MATCH_QUOTA=m
-+CONFIG_NETFILTER_XT_MATCH_RATEEST=m
-+CONFIG_NETFILTER_XT_MATCH_REALM=m
-+CONFIG_NETFILTER_XT_MATCH_RECENT=m
-+CONFIG_NETFILTER_XT_MATCH_SOCKET=m
-+CONFIG_NETFILTER_XT_MATCH_STATE=m
-+CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
-+CONFIG_NETFILTER_XT_MATCH_STRING=m
-+CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
-+CONFIG_NETFILTER_XT_MATCH_TIME=m
-+CONFIG_NETFILTER_XT_MATCH_U32=m
-+CONFIG_IP_VS=m
-+CONFIG_IP_VS_IPV6=y
-+CONFIG_IP_VS_PROTO_TCP=y
-+CONFIG_IP_VS_PROTO_UDP=y
-+CONFIG_IP_VS_PROTO_ESP=y
-+CONFIG_IP_VS_PROTO_AH=y
-+CONFIG_IP_VS_PROTO_SCTP=y
-+CONFIG_IP_VS_RR=m
-+CONFIG_IP_VS_WRR=m
-+CONFIG_IP_VS_LC=m
-+CONFIG_IP_VS_WLC=m
-+CONFIG_IP_VS_LBLC=m
-+CONFIG_IP_VS_LBLCR=m
-+CONFIG_IP_VS_SED=m
-+CONFIG_IP_VS_NQ=m
-+CONFIG_NF_CONNTRACK_IPV4=m
-+# CONFIG_NF_CONNTRACK_PROC_COMPAT is not set
-+CONFIG_IP_NF_IPTABLES=y
-+CONFIG_IP_NF_MATCH_AH=m
-+CONFIG_IP_NF_MATCH_ECN=m
-+CONFIG_IP_NF_MATCH_TTL=m
-+CONFIG_IP_NF_FILTER=y
-+CONFIG_IP_NF_TARGET_REJECT=y
-+CONFIG_IP_NF_MANGLE=m
-+CONFIG_IP_NF_TARGET_ECN=m
-+CONFIG_IP_NF_TARGET_TTL=m
-+CONFIG_IP_NF_RAW=m
-+CONFIG_IP_NF_SECURITY=m
-+CONFIG_IP_NF_ARPTABLES=m
-+CONFIG_IP_NF_ARPFILTER=m
-+CONFIG_IP_NF_ARP_MANGLE=m
-+CONFIG_NF_CONNTRACK_IPV6=m
-+CONFIG_IP6_NF_MATCH_AH=m
-+CONFIG_IP6_NF_MATCH_EUI64=m
-+CONFIG_IP6_NF_MATCH_FRAG=m
-+CONFIG_IP6_NF_MATCH_OPTS=m
-+CONFIG_IP6_NF_MATCH_HL=m
-+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
-+CONFIG_IP6_NF_MATCH_MH=m
-+CONFIG_IP6_NF_MATCH_RT=m
-+CONFIG_IP6_NF_TARGET_HL=m
-+CONFIG_IP6_NF_FILTER=m
-+CONFIG_IP6_NF_TARGET_REJECT=m
-+CONFIG_IP6_NF_MANGLE=m
-+CONFIG_IP6_NF_RAW=m
-+CONFIG_IP6_NF_SECURITY=m
-+CONFIG_BRIDGE_NF_EBTABLES=m
-+CONFIG_BRIDGE_EBT_BROUTE=m
-+CONFIG_BRIDGE_EBT_T_FILTER=m
-+CONFIG_BRIDGE_EBT_T_NAT=m
-+CONFIG_BRIDGE_EBT_802_3=m
-+CONFIG_BRIDGE_EBT_AMONG=m
-+CONFIG_BRIDGE_EBT_ARP=m
-+CONFIG_BRIDGE_EBT_IP=m
-+CONFIG_BRIDGE_EBT_IP6=m
-+CONFIG_BRIDGE_EBT_LIMIT=m
-+CONFIG_BRIDGE_EBT_MARK=m
-+CONFIG_BRIDGE_EBT_PKTTYPE=m
-+CONFIG_BRIDGE_EBT_STP=m
-+CONFIG_BRIDGE_EBT_VLAN=m
-+CONFIG_BRIDGE_EBT_ARPREPLY=m
-+CONFIG_BRIDGE_EBT_DNAT=m
-+CONFIG_BRIDGE_EBT_MARK_T=m
-+CONFIG_BRIDGE_EBT_REDIRECT=m
-+CONFIG_BRIDGE_EBT_SNAT=m
-+CONFIG_BRIDGE_EBT_LOG=m
-+CONFIG_BRIDGE_EBT_ULOG=m
-+CONFIG_BRIDGE_EBT_NFLOG=m
-+CONFIG_RDS=m
-+CONFIG_RDS_TCP=m
-+CONFIG_BRIDGE=m
-+CONFIG_VLAN_8021Q=m
-+CONFIG_VLAN_8021Q_GVRP=y
-+CONFIG_PHONET=m
-+CONFIG_NET_SCHED=y
-+CONFIG_NET_SCH_CBQ=m
-+CONFIG_NET_SCH_HTB=m
-+CONFIG_NET_SCH_HFSC=m
-+CONFIG_NET_SCH_PRIO=m
-+CONFIG_NET_SCH_MULTIQ=m
-+CONFIG_NET_SCH_RED=m
-+CONFIG_NET_SCH_SFQ=m
-+CONFIG_NET_SCH_TEQL=m
-+CONFIG_NET_SCH_TBF=m
-+CONFIG_NET_SCH_GRED=m
-+CONFIG_NET_SCH_DSMARK=m
-+CONFIG_NET_SCH_NETEM=m
-+CONFIG_NET_SCH_DRR=m
-+CONFIG_NET_SCH_INGRESS=m
-+CONFIG_NET_CLS_BASIC=m
-+CONFIG_NET_CLS_TCINDEX=m
-+CONFIG_NET_CLS_ROUTE4=m
-+CONFIG_NET_CLS_FW=m
-+CONFIG_NET_CLS_U32=m
-+CONFIG_CLS_U32_PERF=y
-+CONFIG_CLS_U32_MARK=y
-+CONFIG_NET_CLS_RSVP=m
-+CONFIG_NET_CLS_RSVP6=m
-+CONFIG_NET_CLS_FLOW=m
-+CONFIG_NET_CLS_CGROUP=y
-+CONFIG_NET_EMATCH=y
-+CONFIG_NET_EMATCH_CMP=m
-+CONFIG_NET_EMATCH_NBYTE=m
-+CONFIG_NET_EMATCH_U32=m
-+CONFIG_NET_EMATCH_META=m
-+CONFIG_NET_EMATCH_TEXT=m
-+CONFIG_NET_CLS_ACT=y
-+CONFIG_NET_ACT_POLICE=m
-+CONFIG_NET_ACT_GACT=m
-+CONFIG_GACT_PROB=y
-+CONFIG_NET_ACT_MIRRED=m
-+CONFIG_NET_ACT_IPT=m
-+CONFIG_NET_ACT_NAT=m
-+CONFIG_NET_ACT_PEDIT=m
-+CONFIG_NET_ACT_SIMP=m
-+CONFIG_NET_ACT_SKBEDIT=m
-+CONFIG_NET_CLS_IND=y
-+CONFIG_DCB=y
-+CONFIG_DNS_RESOLVER=y
-+# CONFIG_WIRELESS is not set
-+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-+CONFIG_DEVTMPFS=y
-+CONFIG_DEVTMPFS_MOUNT=y
-+CONFIG_CONNECTOR=y
-+CONFIG_BLK_DEV_LOOP=y
-+CONFIG_BLK_DEV_CRYPTOLOOP=m
-+CONFIG_BLK_DEV_SX8=m
-+CONFIG_BLK_DEV_RAM=y
-+CONFIG_BLK_DEV_RAM_SIZE=16384
-+CONFIG_ATA_OVER_ETH=m
-+CONFIG_RAID_ATTRS=m
-+CONFIG_BLK_DEV_SD=y
-+CONFIG_SCSI_CONSTANTS=y
-+CONFIG_SCSI_LOGGING=y
-+CONFIG_ATA=y
-+CONFIG_SATA_SIL24=y
-+# CONFIG_ATA_SFF is not set
-+CONFIG_MD=y
-+CONFIG_BLK_DEV_MD=y
-+CONFIG_MD_LINEAR=m
-+CONFIG_MD_RAID0=m
-+CONFIG_MD_RAID1=m
-+CONFIG_MD_RAID10=m
-+CONFIG_MD_RAID456=m
-+CONFIG_MD_FAULTY=m
-+CONFIG_BLK_DEV_DM=m
-+CONFIG_DM_DEBUG=y
-+CONFIG_DM_CRYPT=m
-+CONFIG_DM_SNAPSHOT=m
-+CONFIG_DM_MIRROR=m
-+CONFIG_DM_LOG_USERSPACE=m
-+CONFIG_DM_ZERO=m
-+CONFIG_DM_MULTIPATH=m
-+CONFIG_DM_MULTIPATH_QL=m
-+CONFIG_DM_MULTIPATH_ST=m
-+CONFIG_DM_DELAY=m
-+CONFIG_DM_UEVENT=y
-+CONFIG_FUSION=y
-+CONFIG_FUSION_SAS=y
-+CONFIG_NETDEVICES=y
-+CONFIG_BONDING=m
-+CONFIG_DUMMY=m
-+CONFIG_IFB=m
-+CONFIG_MACVLAN=m
-+CONFIG_MACVTAP=m
-+CONFIG_NETCONSOLE=m
-+CONFIG_NETCONSOLE_DYNAMIC=y
-+CONFIG_TUN=y
-+CONFIG_VETH=m
-+CONFIG_NET_DSA_MV88E6060=y
-+CONFIG_NET_DSA_MV88E6XXX=y
-+# CONFIG_NET_VENDOR_3COM is not set
-+CONFIG_E1000E=y
-+# CONFIG_WLAN is not set
-+# CONFIG_INPUT_MOUSEDEV is not set
-+# CONFIG_INPUT_KEYBOARD is not set
-+# CONFIG_INPUT_MOUSE is not set
-+# CONFIG_SERIO is not set
-+# CONFIG_VT is not set
-+# CONFIG_LEGACY_PTYS is not set
-+CONFIG_HW_RANDOM=y
-+CONFIG_HW_RANDOM_TIMERIOMEM=m
-+CONFIG_I2C=y
-+CONFIG_I2C_CHARDEV=y
-+# CONFIG_HWMON is not set
-+CONFIG_WATCHDOG=y
-+CONFIG_WATCHDOG_NOWAYOUT=y
-+# CONFIG_VGA_ARB is not set
-+# CONFIG_USB_SUPPORT is not set
-+CONFIG_EDAC=y
-+CONFIG_RTC_CLASS=y
-+CONFIG_RTC_DRV_TILE=y
-+CONFIG_EXT2_FS=y
-+CONFIG_EXT2_FS_XATTR=y
-+CONFIG_EXT2_FS_POSIX_ACL=y
-+CONFIG_EXT2_FS_SECURITY=y
-+CONFIG_EXT2_FS_XIP=y
-+CONFIG_EXT3_FS=y
-+CONFIG_EXT3_FS_POSIX_ACL=y
-+CONFIG_EXT3_FS_SECURITY=y
-+CONFIG_EXT4_FS=y
-+CONFIG_EXT4_FS_POSIX_ACL=y
-+CONFIG_EXT4_FS_SECURITY=y
-+CONFIG_XFS_FS=y
-+CONFIG_XFS_QUOTA=y
-+CONFIG_XFS_POSIX_ACL=y
-+CONFIG_GFS2_FS=m
-+CONFIG_GFS2_FS_LOCKING_DLM=y
-+CONFIG_BTRFS_FS=m
-+CONFIG_BTRFS_FS_POSIX_ACL=y
-+CONFIG_QUOTA=y
-+CONFIG_QUOTA_NETLINK_INTERFACE=y
-+# CONFIG_PRINT_QUOTA_WARNING is not set
-+CONFIG_QFMT_V2=y
-+CONFIG_AUTOFS4_FS=m
-+CONFIG_FUSE_FS=y
-+CONFIG_CUSE=m
-+CONFIG_FSCACHE=m
-+CONFIG_FSCACHE_STATS=y
-+CONFIG_CACHEFILES=m
-+CONFIG_ISO9660_FS=m
-+CONFIG_JOLIET=y
-+CONFIG_ZISOFS=y
-+CONFIG_UDF_FS=m
-+CONFIG_MSDOS_FS=m
-+CONFIG_VFAT_FS=m
-+CONFIG_FAT_DEFAULT_IOCHARSET="ascii"
-+CONFIG_PROC_KCORE=y
-+CONFIG_TMPFS=y
-+CONFIG_TMPFS_POSIX_ACL=y
-+CONFIG_HUGETLBFS=y
-+CONFIG_CONFIGFS_FS=m
-+CONFIG_ECRYPT_FS=m
-+CONFIG_CRAMFS=m
-+CONFIG_SQUASHFS=m
-+CONFIG_NFS_FS=m
-+CONFIG_NFS_V3_ACL=y
-+CONFIG_NFS_V4=m
-+CONFIG_NFS_V4_1=y
-+CONFIG_NFS_FSCACHE=y
-+CONFIG_NFSD=m
-+CONFIG_NFSD_V3_ACL=y
-+CONFIG_NFSD_V4=y
-+CONFIG_CIFS=m
-+CONFIG_CIFS_STATS=y
-+CONFIG_CIFS_WEAK_PW_HASH=y
-+CONFIG_CIFS_UPCALL=y
-+CONFIG_CIFS_XATTR=y
-+CONFIG_CIFS_POSIX=y
-+CONFIG_CIFS_DFS_UPCALL=y
-+CONFIG_CIFS_FSCACHE=y
-+CONFIG_NLS=y
-+CONFIG_NLS_DEFAULT="utf8"
-+CONFIG_NLS_CODEPAGE_437=y
-+CONFIG_NLS_CODEPAGE_737=m
-+CONFIG_NLS_CODEPAGE_775=m
-+CONFIG_NLS_CODEPAGE_850=m
-+CONFIG_NLS_CODEPAGE_852=m
-+CONFIG_NLS_CODEPAGE_855=m
-+CONFIG_NLS_CODEPAGE_857=m
-+CONFIG_NLS_CODEPAGE_860=m
-+CONFIG_NLS_CODEPAGE_861=m
-+CONFIG_NLS_CODEPAGE_862=m
-+CONFIG_NLS_CODEPAGE_863=m
-+CONFIG_NLS_CODEPAGE_864=m
-+CONFIG_NLS_CODEPAGE_865=m
-+CONFIG_NLS_CODEPAGE_866=m
-+CONFIG_NLS_CODEPAGE_869=m
-+CONFIG_NLS_CODEPAGE_936=m
-+CONFIG_NLS_CODEPAGE_950=m
-+CONFIG_NLS_CODEPAGE_932=m
-+CONFIG_NLS_CODEPAGE_949=m
-+CONFIG_NLS_CODEPAGE_874=m
-+CONFIG_NLS_ISO8859_8=m
-+CONFIG_NLS_CODEPAGE_1250=m
-+CONFIG_NLS_CODEPAGE_1251=m
-+CONFIG_NLS_ASCII=y
-+CONFIG_NLS_ISO8859_1=m
-+CONFIG_NLS_ISO8859_2=m
-+CONFIG_NLS_ISO8859_3=m
-+CONFIG_NLS_ISO8859_4=m
-+CONFIG_NLS_ISO8859_5=m
-+CONFIG_NLS_ISO8859_6=m
-+CONFIG_NLS_ISO8859_7=m
-+CONFIG_NLS_ISO8859_9=m
-+CONFIG_NLS_ISO8859_13=m
-+CONFIG_NLS_ISO8859_14=m
-+CONFIG_NLS_ISO8859_15=m
-+CONFIG_NLS_KOI8_R=m
-+CONFIG_NLS_KOI8_U=m
-+CONFIG_NLS_UTF8=m
-+CONFIG_DLM=m
-+CONFIG_DLM_DEBUG=y
-+CONFIG_DYNAMIC_DEBUG=y
-+CONFIG_DEBUG_INFO=y
-+CONFIG_DEBUG_INFO_REDUCED=y
-+# CONFIG_ENABLE_WARN_DEPRECATED is not set
-+CONFIG_FRAME_WARN=2048
-+CONFIG_STRIP_ASM_SYMS=y
-+CONFIG_DEBUG_FS=y
-+CONFIG_HEADERS_CHECK=y
-+# CONFIG_FRAME_POINTER is not set
-+CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y
-+CONFIG_MAGIC_SYSRQ=y
-+CONFIG_DEBUG_VM=y
-+CONFIG_DEBUG_MEMORY_INIT=y
-+CONFIG_DEBUG_STACKOVERFLOW=y
-+CONFIG_LOCKUP_DETECTOR=y
-+CONFIG_SCHEDSTATS=y
-+CONFIG_TIMER_STATS=y
-+CONFIG_DEBUG_LIST=y
-+CONFIG_DEBUG_CREDENTIALS=y
-+CONFIG_RCU_CPU_STALL_TIMEOUT=60
-+CONFIG_ASYNC_RAID6_TEST=m
-+CONFIG_SECURITY=y
-+CONFIG_SECURITYFS=y
-+CONFIG_SECURITY_NETWORK=y
-+CONFIG_SECURITY_NETWORK_XFRM=y
-+CONFIG_SECURITY_SELINUX=y
-+CONFIG_SECURITY_SELINUX_BOOTPARAM=y
-+CONFIG_SECURITY_SELINUX_DISABLE=y
-+CONFIG_CRYPTO_PCRYPT=m
-+CONFIG_CRYPTO_CRYPTD=m
-+CONFIG_CRYPTO_TEST=m
-+CONFIG_CRYPTO_CCM=m
-+CONFIG_CRYPTO_GCM=m
-+CONFIG_CRYPTO_CTS=m
-+CONFIG_CRYPTO_LRW=m
-+CONFIG_CRYPTO_PCBC=m
-+CONFIG_CRYPTO_XTS=m
-+CONFIG_CRYPTO_HMAC=y
-+CONFIG_CRYPTO_XCBC=m
-+CONFIG_CRYPTO_VMAC=m
-+CONFIG_CRYPTO_MICHAEL_MIC=m
-+CONFIG_CRYPTO_RMD128=m
-+CONFIG_CRYPTO_RMD160=m
-+CONFIG_CRYPTO_RMD256=m
-+CONFIG_CRYPTO_RMD320=m
-+CONFIG_CRYPTO_SHA1=y
-+CONFIG_CRYPTO_SHA512=m
-+CONFIG_CRYPTO_TGR192=m
-+CONFIG_CRYPTO_WP512=m
-+CONFIG_CRYPTO_ANUBIS=m
-+CONFIG_CRYPTO_BLOWFISH=m
-+CONFIG_CRYPTO_CAMELLIA=m
-+CONFIG_CRYPTO_CAST5=m
-+CONFIG_CRYPTO_CAST6=m
-+CONFIG_CRYPTO_FCRYPT=m
-+CONFIG_CRYPTO_KHAZAD=m
-+CONFIG_CRYPTO_SEED=m
-+CONFIG_CRYPTO_SERPENT=m
-+CONFIG_CRYPTO_TEA=m
-+CONFIG_CRYPTO_TWOFISH=m
-+CONFIG_CRYPTO_LZO=m
-+CONFIG_CRC_CCITT=m
-+CONFIG_CRC7=m
-diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
-index 8ef85139553f..6f6ecda60d5b 100644
---- a/arch/x86/Kconfig
-+++ b/arch/x86/Kconfig
-@@ -1034,6 +1034,22 @@ config NR_CPUS
- config SCHED_SMT
- 	def_bool y if SMP
- 
-+config SMT_NICE
-+	bool "SMT (Hyperthreading) aware nice priority and policy support"
-+	depends on SCHED_MUQSS && SCHED_SMT
-+	default y
-+	---help---
-+	  Enabling Hyperthreading on Intel CPUs decreases the effectiveness
-+	  of the use of 'nice' levels and different scheduling policies
-+	  (e.g. realtime) due to sharing of CPU power between hyperthreads.
-+	  SMT nice support makes each logical CPU aware of what is running on
-+	  its hyperthread siblings, maintaining appropriate distribution of
-+	  CPU according to nice levels and scheduling policies at the expense
-+	  of slightly increased overhead.
-+
-+	  If unsure say Y here.
-+
-+
- config SCHED_MC
- 	def_bool y
- 	prompt "Multi-core scheduler support"
-@@ -1064,6 +1080,8 @@ config SCHED_MC_PRIO
- 
- 	  If unsure say Y here.
- 
-+source "kernel/Kconfig.MuQSS"
-+
- config UP_LATE_INIT
-        def_bool y
-        depends on !SMP && X86_LOCAL_APIC
-@@ -1433,7 +1451,7 @@ config HIGHMEM64G
- endchoice
- 
- choice
--	prompt "Memory split" if EXPERT
-+	prompt "Memory split"
- 	default VMSPLIT_3G
- 	depends on X86_32
- 	---help---
-@@ -1453,17 +1471,17 @@ choice
- 	  option alone!
- 
- 	config VMSPLIT_3G
--		bool "3G/1G user/kernel split"
-+		bool "Default 896MB lowmem (3G/1G user/kernel split)"
- 	config VMSPLIT_3G_OPT
- 		depends on !X86_PAE
--		bool "3G/1G user/kernel split (for full 1G low memory)"
-+		bool "1GB lowmem (3G/1G user/kernel split)"
- 	config VMSPLIT_2G
--		bool "2G/2G user/kernel split"
-+		bool "2GB lowmem (2G/2G user/kernel split)"
- 	config VMSPLIT_2G_OPT
- 		depends on !X86_PAE
--		bool "2G/2G user/kernel split (for full 2G low memory)"
-+		bool "2GB lowmem (2G/2G user/kernel split)"
- 	config VMSPLIT_1G
--		bool "1G/3G user/kernel split"
-+		bool "3GB lowmem (1G/3G user/kernel split)"
- endchoice
- 
- config PAGE_OFFSET
-diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
-index 59ce9ed58430..f19741b0f43d 100644
---- a/arch/x86/configs/i386_defconfig
-+++ b/arch/x86/configs/i386_defconfig
-@@ -29,7 +29,7 @@ CONFIG_SMP=y
- CONFIG_X86_GENERIC=y
- CONFIG_HPET_TIMER=y
- CONFIG_SCHED_SMT=y
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
- CONFIG_X86_MCE=y
- CONFIG_X86_REBOOTFIXUPS=y
-diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
-index d0a5ffeae8df..63f1fb92590c 100644
---- a/arch/x86/configs/x86_64_defconfig
-+++ b/arch/x86/configs/x86_64_defconfig
-@@ -28,7 +28,7 @@ CONFIG_SMP=y
- CONFIG_CALGARY_IOMMU=y
- CONFIG_NR_CPUS=64
- CONFIG_SCHED_SMT=y
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
- CONFIG_X86_MCE=y
- CONFIG_MICROCODE=y
-diff --git a/drivers/block/swim.c b/drivers/block/swim.c
-index 4c297f69171d..5bc4f1be2617 100644
---- a/drivers/block/swim.c
-+++ b/drivers/block/swim.c
-@@ -328,7 +328,7 @@ static inline void swim_motor(struct swim __iomem *base,
- 			if (swim_readbit(base, MOTOR_ON))
- 				break;
- 			current->state = TASK_INTERRUPTIBLE;
--			schedule_timeout(1);
-+			schedule_min_hrtimeout();
- 		}
- 	} else if (action == OFF) {
- 		swim_action(base, MOTOR_OFF);
-@@ -347,7 +347,7 @@ static inline void swim_eject(struct swim __iomem *base)
- 		if (!swim_readbit(base, DISK_IN))
- 			break;
- 		current->state = TASK_INTERRUPTIBLE;
--		schedule_timeout(1);
-+		schedule_min_hrtimeout();
- 	}
- 	swim_select(base, RELAX);
- }
-@@ -371,7 +371,7 @@ static inline int swim_step(struct swim __iomem *base)
- 	for (wait = 0; wait < HZ; wait++) {
- 
- 		current->state = TASK_INTERRUPTIBLE;
--		schedule_timeout(1);
-+		schedule_min_hrtimeout();
- 
- 		swim_select(base, RELAX);
- 		if (!swim_readbit(base, STEP))
-diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
-index 2aab80e19ae0..6200dbb3b5ef 100644
---- a/drivers/char/ipmi/ipmi_msghandler.c
-+++ b/drivers/char/ipmi/ipmi_msghandler.c
-@@ -3544,7 +3544,7 @@ static void cleanup_smi_msgs(struct ipmi_smi *intf)
- 	/* Current message first, to preserve order */
- 	while (intf->curr_msg && !list_empty(&intf->waiting_rcv_msgs)) {
- 		/* Wait for the message to clear out. */
--		schedule_timeout(1);
-+		schedule_min_hrtimeout();
- 	}
- 
- 	/* No need for locks, the interface is down. */
-diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c
-index 22c6a2e61236..c4bccd444cbf 100644
---- a/drivers/char/ipmi/ipmi_ssif.c
-+++ b/drivers/char/ipmi/ipmi_ssif.c
-@@ -1289,7 +1289,7 @@ static void shutdown_ssif(void *send_info)
- 
- 	/* make sure the driver is not looking for flags any more. */
- 	while (ssif_info->ssif_state != SSIF_NORMAL)
--		schedule_timeout(1);
-+		schedule_min_hrtimeout();
- 
- 	ssif_info->stopping = true;
- 	del_timer_sync(&ssif_info->watch_timer);
-diff --git a/drivers/char/snsc.c b/drivers/char/snsc.c
-new file mode 100644
-index 000000000000..5228e78df804
---- /dev/null
-+++ b/drivers/char/snsc.c
-@@ -0,0 +1,469 @@
-+/*
-+ * SN Platform system controller communication support
-+ *
-+ * This file is subject to the terms and conditions of the GNU General Public
-+ * License.  See the file "COPYING" in the main directory of this archive
-+ * for more details.
-+ *
-+ * Copyright (C) 2004, 2006 Silicon Graphics, Inc. All rights reserved.
-+ */
-+
-+/*
-+ * System controller communication driver
-+ *
-+ * This driver allows a user process to communicate with the system
-+ * controller (a.k.a. "IRouter") network in an SGI SN system.
-+ */
-+
-+#include <linux/interrupt.h>
-+#include <linux/sched/signal.h>
-+#include <linux/device.h>
-+#include <linux/poll.h>
-+#include <linux/init.h>
-+#include <linux/slab.h>
-+#include <linux/mutex.h>
-+#include <asm/sn/io.h>
-+#include <asm/sn/sn_sal.h>
-+#include <asm/sn/module.h>
-+#include <asm/sn/geo.h>
-+#include <asm/sn/nodepda.h>
-+#include "snsc.h"
-+
-+#define SYSCTL_BASENAME	"snsc"
-+
-+#define SCDRV_BUFSZ	2048
-+#define SCDRV_TIMEOUT	1000
-+
-+static DEFINE_MUTEX(scdrv_mutex);
-+static irqreturn_t
-+scdrv_interrupt(int irq, void *subch_data)
-+{
-+	struct subch_data_s *sd = subch_data;
-+	unsigned long flags;
-+	int status;
-+
-+	spin_lock_irqsave(&sd->sd_rlock, flags);
-+	spin_lock(&sd->sd_wlock);
-+	status = ia64_sn_irtr_intr(sd->sd_nasid, sd->sd_subch);
-+
-+	if (status > 0) {
-+		if (status & SAL_IROUTER_INTR_RECV) {
-+			wake_up(&sd->sd_rq);
-+		}
-+		if (status & SAL_IROUTER_INTR_XMIT) {
-+			ia64_sn_irtr_intr_disable
-+			    (sd->sd_nasid, sd->sd_subch,
-+			     SAL_IROUTER_INTR_XMIT);
-+			wake_up(&sd->sd_wq);
-+		}
-+	}
-+	spin_unlock(&sd->sd_wlock);
-+	spin_unlock_irqrestore(&sd->sd_rlock, flags);
-+	return IRQ_HANDLED;
-+}
-+
-+/*
-+ * scdrv_open
-+ *
-+ * Reserve a subchannel for system controller communication.
-+ */
-+
-+static int
-+scdrv_open(struct inode *inode, struct file *file)
-+{
-+	struct sysctl_data_s *scd;
-+	struct subch_data_s *sd;
-+	int rv;
-+
-+	/* look up device info for this device file */
-+	scd = container_of(inode->i_cdev, struct sysctl_data_s, scd_cdev);
-+
-+	/* allocate memory for subchannel data */
-+	sd = kzalloc(sizeof (struct subch_data_s), GFP_KERNEL);
-+	if (sd == NULL) {
-+		printk("%s: couldn't allocate subchannel data\n",
-+		       __func__);
-+		return -ENOMEM;
-+	}
-+
-+	/* initialize subch_data_s fields */
-+	sd->sd_nasid = scd->scd_nasid;
-+	sd->sd_subch = ia64_sn_irtr_open(scd->scd_nasid);
-+
-+	if (sd->sd_subch < 0) {
-+		kfree(sd);
-+		printk("%s: couldn't allocate subchannel\n", __func__);
-+		return -EBUSY;
-+	}
-+
-+	spin_lock_init(&sd->sd_rlock);
-+	spin_lock_init(&sd->sd_wlock);
-+	init_waitqueue_head(&sd->sd_rq);
-+	init_waitqueue_head(&sd->sd_wq);
-+	sema_init(&sd->sd_rbs, 1);
-+	sema_init(&sd->sd_wbs, 1);
-+
-+	file->private_data = sd;
-+
-+	/* hook this subchannel up to the system controller interrupt */
-+	mutex_lock(&scdrv_mutex);
-+	rv = request_irq(SGI_UART_VECTOR, scdrv_interrupt,
-+			 IRQF_SHARED, SYSCTL_BASENAME, sd);
-+	if (rv) {
-+		ia64_sn_irtr_close(sd->sd_nasid, sd->sd_subch);
-+		kfree(sd);
-+		printk("%s: irq request failed (%d)\n", __func__, rv);
-+		mutex_unlock(&scdrv_mutex);
-+		return -EBUSY;
-+	}
-+	mutex_unlock(&scdrv_mutex);
-+	return 0;
-+}
-+
-+/*
-+ * scdrv_release
-+ *
-+ * Release a previously-reserved subchannel.
-+ */
-+
-+static int
-+scdrv_release(struct inode *inode, struct file *file)
-+{
-+	struct subch_data_s *sd = (struct subch_data_s *) file->private_data;
-+	int rv;
-+
-+	/* free the interrupt */
-+	free_irq(SGI_UART_VECTOR, sd);
-+
-+	/* ask SAL to close the subchannel */
-+	rv = ia64_sn_irtr_close(sd->sd_nasid, sd->sd_subch);
-+
-+	kfree(sd);
-+	return rv;
-+}
-+
-+/*
-+ * scdrv_read
-+ *
-+ * Called to read bytes from the open IRouter pipe.
-+ *
-+ */
-+
-+static inline int
-+read_status_check(struct subch_data_s *sd, int *len)
-+{
-+	return ia64_sn_irtr_recv(sd->sd_nasid, sd->sd_subch, sd->sd_rb, len);
-+}
-+
-+static ssize_t
-+scdrv_read(struct file *file, char __user *buf, size_t count, loff_t *f_pos)
-+{
-+	int status;
-+	int len;
-+	unsigned long flags;
-+	struct subch_data_s *sd = (struct subch_data_s *) file->private_data;
-+
-+	/* try to get control of the read buffer */
-+	if (down_trylock(&sd->sd_rbs)) {
-+		/* somebody else has it now;
-+		 * if we're non-blocking, then exit...
-+		 */
-+		if (file->f_flags & O_NONBLOCK) {
-+			return -EAGAIN;
-+		}
-+		/* ...or if we want to block, then do so here */
-+		if (down_interruptible(&sd->sd_rbs)) {
-+			/* something went wrong with wait */
-+			return -ERESTARTSYS;
-+		}
-+	}
-+
-+	/* anything to read? */
-+	len = CHUNKSIZE;
-+	spin_lock_irqsave(&sd->sd_rlock, flags);
-+	status = read_status_check(sd, &len);
-+
-+	/* if not, and we're blocking I/O, loop */
-+	while (status < 0) {
-+		DECLARE_WAITQUEUE(wait, current);
-+
-+		if (file->f_flags & O_NONBLOCK) {
-+			spin_unlock_irqrestore(&sd->sd_rlock, flags);
-+			up(&sd->sd_rbs);
-+			return -EAGAIN;
-+		}
-+
-+		len = CHUNKSIZE;
-+		set_current_state(TASK_INTERRUPTIBLE);
-+		add_wait_queue(&sd->sd_rq, &wait);
-+		spin_unlock_irqrestore(&sd->sd_rlock, flags);
-+
-+		schedule_msec_hrtimeout((SCDRV_TIMEOUT));
-+
-+		remove_wait_queue(&sd->sd_rq, &wait);
-+		if (signal_pending(current)) {
-+			/* wait was interrupted */
-+			up(&sd->sd_rbs);
-+			return -ERESTARTSYS;
-+		}
-+
-+		spin_lock_irqsave(&sd->sd_rlock, flags);
-+		status = read_status_check(sd, &len);
-+	}
-+	spin_unlock_irqrestore(&sd->sd_rlock, flags);
-+
-+	if (len > 0) {
-+		/* we read something in the last read_status_check(); copy
-+		 * it out to user space
-+		 */
-+		if (count < len) {
-+			pr_debug("%s: only accepting %d of %d bytes\n",
-+				 __func__, (int) count, len);
-+		}
-+		len = min((int) count, len);
-+		if (copy_to_user(buf, sd->sd_rb, len))
-+			len = -EFAULT;
-+	}
-+
-+	/* release the read buffer and wake anyone who might be
-+	 * waiting for it
-+	 */
-+	up(&sd->sd_rbs);
-+
-+	/* return the number of characters read in */
-+	return len;
-+}
-+
-+/*
-+ * scdrv_write
-+ *
-+ * Writes a chunk of an IRouter packet (or other system controller data)
-+ * to the system controller.
-+ *
-+ */
-+static inline int
-+write_status_check(struct subch_data_s *sd, int count)
-+{
-+	return ia64_sn_irtr_send(sd->sd_nasid, sd->sd_subch, sd->sd_wb, count);
-+}
-+
-+static ssize_t
-+scdrv_write(struct file *file, const char __user *buf,
-+	    size_t count, loff_t *f_pos)
-+{
-+	unsigned long flags;
-+	int status;
-+	struct subch_data_s *sd = (struct subch_data_s *) file->private_data;
-+
-+	/* try to get control of the write buffer */
-+	if (down_trylock(&sd->sd_wbs)) {
-+		/* somebody else has it now;
-+		 * if we're non-blocking, then exit...
-+		 */
-+		if (file->f_flags & O_NONBLOCK) {
-+			return -EAGAIN;
-+		}
-+		/* ...or if we want to block, then do so here */
-+		if (down_interruptible(&sd->sd_wbs)) {
-+			/* something went wrong with wait */
-+			return -ERESTARTSYS;
-+		}
-+	}
-+
-+	count = min((int) count, CHUNKSIZE);
-+	if (copy_from_user(sd->sd_wb, buf, count)) {
-+		up(&sd->sd_wbs);
-+		return -EFAULT;
-+	}
-+
-+	/* try to send the buffer */
-+	spin_lock_irqsave(&sd->sd_wlock, flags);
-+	status = write_status_check(sd, count);
-+
-+	/* if we failed, and we want to block, then loop */
-+	while (status <= 0) {
-+		DECLARE_WAITQUEUE(wait, current);
-+
-+		if (file->f_flags & O_NONBLOCK) {
-+			spin_unlock_irqrestore(&sd->sd_wlock, flags);
-+			up(&sd->sd_wbs);
-+			return -EAGAIN;
-+		}
-+
-+		set_current_state(TASK_INTERRUPTIBLE);
-+		add_wait_queue(&sd->sd_wq, &wait);
-+		spin_unlock_irqrestore(&sd->sd_wlock, flags);
-+
-+		schedule_msec_hrtimeout((SCDRV_TIMEOUT));
-+
-+		remove_wait_queue(&sd->sd_wq, &wait);
-+		if (signal_pending(current)) {
-+			/* wait was interrupted */
-+			up(&sd->sd_wbs);
-+			return -ERESTARTSYS;
-+		}
-+
-+		spin_lock_irqsave(&sd->sd_wlock, flags);
-+		status = write_status_check(sd, count);
-+	}
-+	spin_unlock_irqrestore(&sd->sd_wlock, flags);
-+
-+	/* release the write buffer and wake anyone who's waiting for it */
-+	up(&sd->sd_wbs);
-+
-+	/* return the number of characters accepted (should be the complete
-+	 * "chunk" as requested)
-+	 */
-+	if ((status >= 0) && (status < count)) {
-+		pr_debug("Didn't accept the full chunk; %d of %d\n",
-+			 status, (int) count);
-+	}
-+	return status;
-+}
-+
-+static __poll_t
-+scdrv_poll(struct file *file, struct poll_table_struct *wait)
-+{
-+	__poll_t mask = 0;
-+	int status = 0;
-+	struct subch_data_s *sd = (struct subch_data_s *) file->private_data;
-+	unsigned long flags;
-+
-+	poll_wait(file, &sd->sd_rq, wait);
-+	poll_wait(file, &sd->sd_wq, wait);
-+
-+	spin_lock_irqsave(&sd->sd_rlock, flags);
-+	spin_lock(&sd->sd_wlock);
-+	status = ia64_sn_irtr_intr(sd->sd_nasid, sd->sd_subch);
-+	spin_unlock(&sd->sd_wlock);
-+	spin_unlock_irqrestore(&sd->sd_rlock, flags);
-+
-+	if (status > 0) {
-+		if (status & SAL_IROUTER_INTR_RECV) {
-+			mask |= EPOLLIN | EPOLLRDNORM;
-+		}
-+		if (status & SAL_IROUTER_INTR_XMIT) {
-+			mask |= EPOLLOUT | EPOLLWRNORM;
-+		}
-+	}
-+
-+	return mask;
-+}
-+
-+static const struct file_operations scdrv_fops = {
-+	.owner =	THIS_MODULE,
-+	.read =		scdrv_read,
-+	.write =	scdrv_write,
-+	.poll =		scdrv_poll,
-+	.open =		scdrv_open,
-+	.release =	scdrv_release,
-+	.llseek =	noop_llseek,
-+};
-+
-+static struct class *snsc_class;
-+
-+/*
-+ * scdrv_init
-+ *
-+ * Called at boot time to initialize the system controller communication
-+ * facility.
-+ */
-+int __init
-+scdrv_init(void)
-+{
-+	geoid_t geoid;
-+	cnodeid_t cnode;
-+	char devname[32];
-+	char *devnamep;
-+	struct sysctl_data_s *scd;
-+	void *salbuf;
-+	dev_t first_dev, dev;
-+	nasid_t event_nasid;
-+
-+	if (!ia64_platform_is("sn2"))
-+		return -ENODEV;
-+
-+	event_nasid = ia64_sn_get_console_nasid();
-+
-+	snsc_class = class_create(THIS_MODULE, SYSCTL_BASENAME);
-+	if (IS_ERR(snsc_class)) {
-+		printk("%s: failed to allocate class\n", __func__);
-+		return PTR_ERR(snsc_class);
-+	}
-+
-+	if (alloc_chrdev_region(&first_dev, 0, num_cnodes,
-+				SYSCTL_BASENAME) < 0) {
-+		printk("%s: failed to register SN system controller device\n",
-+		       __func__);
-+		return -ENODEV;
-+	}
-+
-+	for (cnode = 0; cnode < num_cnodes; cnode++) {
-+			geoid = cnodeid_get_geoid(cnode);
-+			devnamep = devname;
-+			format_module_id(devnamep, geo_module(geoid),
-+					 MODULE_FORMAT_BRIEF);
-+			devnamep = devname + strlen(devname);
-+			sprintf(devnamep, "^%d#%d", geo_slot(geoid),
-+				geo_slab(geoid));
-+
-+			/* allocate sysctl device data */
-+			scd = kzalloc(sizeof (struct sysctl_data_s),
-+				      GFP_KERNEL);
-+			if (!scd) {
-+				printk("%s: failed to allocate device info"
-+				       "for %s/%s\n", __func__,
-+				       SYSCTL_BASENAME, devname);
-+				continue;
-+			}
-+
-+			/* initialize sysctl device data fields */
-+			scd->scd_nasid = cnodeid_to_nasid(cnode);
-+			if (!(salbuf = kmalloc(SCDRV_BUFSZ, GFP_KERNEL))) {
-+				printk("%s: failed to allocate driver buffer"
-+				       "(%s%s)\n", __func__,
-+				       SYSCTL_BASENAME, devname);
-+				kfree(scd);
-+				continue;
-+			}
-+
-+			if (ia64_sn_irtr_init(scd->scd_nasid, salbuf,
-+					      SCDRV_BUFSZ) < 0) {
-+				printk
-+				    ("%s: failed to initialize SAL for"
-+				     " system controller communication"
-+				     " (%s/%s): outdated PROM?\n",
-+				     __func__, SYSCTL_BASENAME, devname);
-+				kfree(scd);
-+				kfree(salbuf);
-+				continue;
-+			}
-+
-+			dev = first_dev + cnode;
-+			cdev_init(&scd->scd_cdev, &scdrv_fops);
-+			if (cdev_add(&scd->scd_cdev, dev, 1)) {
-+				printk("%s: failed to register system"
-+				       " controller device (%s%s)\n",
-+				       __func__, SYSCTL_BASENAME, devname);
-+				kfree(scd);
-+				kfree(salbuf);
-+				continue;
-+			}
-+
-+			device_create(snsc_class, NULL, dev, NULL,
-+				      "%s", devname);
-+
-+			ia64_sn_irtr_intr_enable(scd->scd_nasid,
-+						 0 /*ignored */ ,
-+						 SAL_IROUTER_INTR_RECV);
-+
-+                        /* on the console nasid, prepare to receive
-+                         * system controller environmental events
-+                         */
-+                        if(scd->scd_nasid == event_nasid) {
-+                                scdrv_event_init(scd);
-+                        }
-+	}
-+	return 0;
-+}
-+device_initcall(scdrv_init);
-diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
-index e5252ef3812f..6ae6241185ea 100644
---- a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
-+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
-@@ -237,7 +237,7 @@ static int vmw_fifo_wait_noirq(struct vmw_private *dev_priv,
- 			DRM_ERROR("SVGA device lockup.\n");
- 			break;
- 		}
--		schedule_timeout(1);
-+		schedule_min_hrtimeout();
- 		if (interruptible && signal_pending(current)) {
- 			ret = -ERESTARTSYS;
- 			break;
-diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
-index 75f3efee21a4..09b1932ce85b 100644
---- a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
-+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
-@@ -203,7 +203,7 @@ int vmw_fallback_wait(struct vmw_private *dev_priv,
- 			break;
- 		}
- 		if (lazy)
--			schedule_timeout(1);
-+			schedule_min_hrtimeout();
- 		else if ((++count & 0x0F) == 0) {
- 			/**
- 			 * FIXME: Use schedule_hr_timeout here for
-diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c
-index 267eac00a3fb..352af68c6cd7 100644
---- a/drivers/hwmon/fam15h_power.c
-+++ b/drivers/hwmon/fam15h_power.c
-@@ -225,7 +225,7 @@ static ssize_t power1_average_show(struct device *dev,
- 		prev_ptsc[cu] = data->cpu_sw_pwr_ptsc[cu];
- 	}
- 
--	leftover = schedule_timeout_interruptible(msecs_to_jiffies(data->power_period));
-+	leftover = schedule_msec_hrtimeout_interruptible((data->power_period));
- 	if (leftover)
- 		return 0;
- 
-diff --git a/drivers/iio/light/tsl2563.c b/drivers/iio/light/tsl2563.c
-index d8c40a83097d..8332baf4961c 100644
---- a/drivers/iio/light/tsl2563.c
-+++ b/drivers/iio/light/tsl2563.c
-@@ -269,11 +269,7 @@ static void tsl2563_wait_adc(struct tsl2563_chip *chip)
- 	default:
- 		delay = 402;
- 	}
--	/*
--	 * TODO: Make sure that we wait at least required delay but why we
--	 * have to extend it one tick more?
--	 */
--	schedule_timeout_interruptible(msecs_to_jiffies(delay) + 2);
-+	schedule_msec_hrtimeout_interruptible(delay + 1);
- }
- 
- static int tsl2563_adjust_gainlevel(struct tsl2563_chip *chip, u16 adc)
-diff --git a/drivers/media/i2c/msp3400-driver.c b/drivers/media/i2c/msp3400-driver.c
-index 39530d43590e..a7caf2eb5771 100644
---- a/drivers/media/i2c/msp3400-driver.c
-+++ b/drivers/media/i2c/msp3400-driver.c
-@@ -170,7 +170,7 @@ static int msp_read(struct i2c_client *client, int dev, int addr)
- 			break;
- 		dev_warn(&client->dev, "I/O error #%d (read 0x%02x/0x%02x)\n", err,
- 		       dev, addr);
--		schedule_timeout_interruptible(msecs_to_jiffies(10));
-+		schedule_msec_hrtimeout_interruptible((10));
- 	}
- 	if (err == 3) {
- 		dev_warn(&client->dev, "resetting chip, sound will go off.\n");
-@@ -211,7 +211,7 @@ static int msp_write(struct i2c_client *client, int dev, int addr, int val)
- 			break;
- 		dev_warn(&client->dev, "I/O error #%d (write 0x%02x/0x%02x)\n", err,
- 		       dev, addr);
--		schedule_timeout_interruptible(msecs_to_jiffies(10));
-+		schedule_msec_hrtimeout_interruptible((10));
- 	}
- 	if (err == 3) {
- 		dev_warn(&client->dev, "resetting chip, sound will go off.\n");
-diff --git a/drivers/media/pci/cx18/cx18-gpio.c b/drivers/media/pci/cx18/cx18-gpio.c
-index cf7cfda94107..f63e17489547 100644
---- a/drivers/media/pci/cx18/cx18-gpio.c
-+++ b/drivers/media/pci/cx18/cx18-gpio.c
-@@ -81,11 +81,11 @@ static void gpio_reset_seq(struct cx18 *cx, u32 active_lo, u32 active_hi,
- 
- 	/* Assert */
- 	gpio_update(cx, mask, ~active_lo);
--	schedule_timeout_uninterruptible(msecs_to_jiffies(assert_msecs));
-+	schedule_msec_hrtimeout_uninterruptible((assert_msecs));
- 
- 	/* Deassert */
- 	gpio_update(cx, mask, ~active_hi);
--	schedule_timeout_uninterruptible(msecs_to_jiffies(recovery_msecs));
-+	schedule_msec_hrtimeout_uninterruptible((recovery_msecs));
- }
- 
- /*
-diff --git a/drivers/media/pci/ivtv/ivtv-gpio.c b/drivers/media/pci/ivtv/ivtv-gpio.c
-index 856e7ab7f33e..766a26251337 100644
---- a/drivers/media/pci/ivtv/ivtv-gpio.c
-+++ b/drivers/media/pci/ivtv/ivtv-gpio.c
-@@ -105,7 +105,7 @@ void ivtv_reset_ir_gpio(struct ivtv *itv)
- 	curout = (curout & ~0xF) | 1;
- 	write_reg(curout, IVTV_REG_GPIO_OUT);
- 	/* We could use something else for smaller time */
--	schedule_timeout_interruptible(msecs_to_jiffies(1));
-+	schedule_msec_hrtimeout_interruptible((1));
- 	curout |= 2;
- 	write_reg(curout, IVTV_REG_GPIO_OUT);
- 	curdir &= ~0x80;
-@@ -125,11 +125,11 @@ int ivtv_reset_tuner_gpio(void *dev, int component, int cmd, int value)
- 	curout = read_reg(IVTV_REG_GPIO_OUT);
- 	curout &= ~(1 << itv->card->xceive_pin);
- 	write_reg(curout, IVTV_REG_GPIO_OUT);
--	schedule_timeout_interruptible(msecs_to_jiffies(1));
-+	schedule_msec_hrtimeout_interruptible((1));
- 
- 	curout |= 1 << itv->card->xceive_pin;
- 	write_reg(curout, IVTV_REG_GPIO_OUT);
--	schedule_timeout_interruptible(msecs_to_jiffies(1));
-+	schedule_msec_hrtimeout_interruptible((1));
- 	return 0;
- }
- 
-diff --git a/drivers/media/pci/ivtv/ivtv-ioctl.c b/drivers/media/pci/ivtv/ivtv-ioctl.c
-index 137853944e46..76830892f373 100644
---- a/drivers/media/pci/ivtv/ivtv-ioctl.c
-+++ b/drivers/media/pci/ivtv/ivtv-ioctl.c
-@@ -1137,7 +1137,7 @@ void ivtv_s_std_dec(struct ivtv *itv, v4l2_std_id std)
- 				TASK_UNINTERRUPTIBLE);
- 		if ((read_reg(IVTV_REG_DEC_LINE_FIELD) >> 16) < 100)
- 			break;
--		schedule_timeout(msecs_to_jiffies(25));
-+		schedule_msec_hrtimeout((25));
- 	}
- 	finish_wait(&itv->vsync_waitq, &wait);
- 	mutex_lock(&itv->serialize_lock);
-diff --git a/drivers/media/pci/ivtv/ivtv-streams.c b/drivers/media/pci/ivtv/ivtv-streams.c
-index f7de9118f609..f39ad2952c0f 100644
---- a/drivers/media/pci/ivtv/ivtv-streams.c
-+++ b/drivers/media/pci/ivtv/ivtv-streams.c
-@@ -849,7 +849,7 @@ int ivtv_stop_v4l2_encode_stream(struct ivtv_stream *s, int gop_end)
- 			while (!test_bit(IVTV_F_I_EOS, &itv->i_flags) &&
- 				time_before(jiffies,
- 					    then + msecs_to_jiffies(2000))) {
--				schedule_timeout(msecs_to_jiffies(10));
-+				schedule_msec_hrtimeout((10));
- 			}
- 
- 			/* To convert jiffies to ms, we must multiply by 1000
-diff --git a/drivers/media/radio/radio-mr800.c b/drivers/media/radio/radio-mr800.c
-index cb0437b4c331..163fffc0e1d4 100644
---- a/drivers/media/radio/radio-mr800.c
-+++ b/drivers/media/radio/radio-mr800.c
-@@ -366,7 +366,7 @@ static int vidioc_s_hw_freq_seek(struct file *file, void *priv,
- 			retval = -ENODATA;
- 			break;
- 		}
--		if (schedule_timeout_interruptible(msecs_to_jiffies(10))) {
-+		if (schedule_msec_hrtimeout_interruptible((10))) {
- 			retval = -ERESTARTSYS;
- 			break;
- 		}
-diff --git a/drivers/media/radio/radio-tea5777.c b/drivers/media/radio/radio-tea5777.c
-index fb9de7bbcd19..e53cf45e7f3f 100644
---- a/drivers/media/radio/radio-tea5777.c
-+++ b/drivers/media/radio/radio-tea5777.c
-@@ -235,7 +235,7 @@ static int radio_tea5777_update_read_reg(struct radio_tea5777 *tea, int wait)
- 	}
- 
- 	if (wait) {
--		if (schedule_timeout_interruptible(msecs_to_jiffies(wait)))
-+		if (schedule_msec_hrtimeout_interruptible((wait)))
- 			return -ERESTARTSYS;
- 	}
- 
-diff --git a/drivers/media/radio/tea575x.c b/drivers/media/radio/tea575x.c
-index b0303cf00387..0925b5065147 100644
---- a/drivers/media/radio/tea575x.c
-+++ b/drivers/media/radio/tea575x.c
-@@ -401,7 +401,7 @@ int snd_tea575x_s_hw_freq_seek(struct file *file, struct snd_tea575x *tea,
- 	for (;;) {
- 		if (time_after(jiffies, timeout))
- 			break;
--		if (schedule_timeout_interruptible(msecs_to_jiffies(10))) {
-+		if (schedule_msec_hrtimeout_interruptible((10))) {
- 			/* some signal arrived, stop search */
- 			tea->val &= ~TEA575X_BIT_SEARCH;
- 			snd_tea575x_set_freq(tea);
-diff --git a/drivers/mfd/ucb1x00-core.c b/drivers/mfd/ucb1x00-core.c
-index b690796d24d4..448b13da62b4 100644
---- a/drivers/mfd/ucb1x00-core.c
-+++ b/drivers/mfd/ucb1x00-core.c
-@@ -250,7 +250,7 @@ unsigned int ucb1x00_adc_read(struct ucb1x00 *ucb, int adc_channel, int sync)
- 			break;
- 		/* yield to other processes */
- 		set_current_state(TASK_INTERRUPTIBLE);
--		schedule_timeout(1);
-+		schedule_min_hrtimeout();
- 	}
- 
- 	return UCB_ADC_DAT(val);
-diff --git a/drivers/misc/sgi-xp/xpc_channel.c b/drivers/misc/sgi-xp/xpc_channel.c
-index 8e6607fc8a67..b9ab770bbdb5 100644
---- a/drivers/misc/sgi-xp/xpc_channel.c
-+++ b/drivers/misc/sgi-xp/xpc_channel.c
-@@ -834,7 +834,7 @@ xpc_allocate_msg_wait(struct xpc_channel *ch)
- 
- 	atomic_inc(&ch->n_on_msg_allocate_wq);
- 	prepare_to_wait(&ch->msg_allocate_wq, &wait, TASK_INTERRUPTIBLE);
--	ret = schedule_timeout(1);
-+	ret = schedule_min_hrtimeout();
- 	finish_wait(&ch->msg_allocate_wq, &wait);
- 	atomic_dec(&ch->n_on_msg_allocate_wq);
- 
-diff --git a/drivers/net/caif/caif_hsi.c b/drivers/net/caif/caif_hsi.c
-index bbb2575d4728..637757144221 100644
---- a/drivers/net/caif/caif_hsi.c
-+++ b/drivers/net/caif/caif_hsi.c
-@@ -939,7 +939,7 @@ static void cfhsi_wake_down(struct work_struct *work)
- 			break;
- 
- 		set_current_state(TASK_INTERRUPTIBLE);
--		schedule_timeout(1);
-+		schedule_min_hrtimeout();
- 		retry--;
- 	}
- 
-diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c
-index d2539c95adb6..0c2f31a03ce9 100644
---- a/drivers/net/can/usb/peak_usb/pcan_usb.c
-+++ b/drivers/net/can/usb/peak_usb/pcan_usb.c
-@@ -242,7 +242,7 @@ static int pcan_usb_write_mode(struct peak_usb_device *dev, u8 onoff)
- 	} else {
- 		/* the PCAN-USB needs time to init */
- 		set_current_state(TASK_INTERRUPTIBLE);
--		schedule_timeout(msecs_to_jiffies(PCAN_USB_STARTUP_TIMEOUT));
-+		schedule_msec_hrtimeout((PCAN_USB_STARTUP_TIMEOUT));
- 	}
- 
- 	return err;
-diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
-index f24a1b0b801f..972313b92b0a 100644
---- a/drivers/net/usb/lan78xx.c
-+++ b/drivers/net/usb/lan78xx.c
-@@ -2676,7 +2676,7 @@ static void lan78xx_terminate_urbs(struct lan78xx_net *dev)
- 	while (!skb_queue_empty(&dev->rxq) &&
- 	       !skb_queue_empty(&dev->txq) &&
- 	       !skb_queue_empty(&dev->done)) {
--		schedule_timeout(msecs_to_jiffies(UNLINK_TIMEOUT_MS));
-+		schedule_msec_hrtimeout((UNLINK_TIMEOUT_MS));
- 		set_current_state(TASK_UNINTERRUPTIBLE);
- 		netif_dbg(dev, ifdown, dev->net,
- 			  "waited for %d urb completions\n", temp);
-diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
-index dde05e2fdc3e..fa6c1581136e 100644
---- a/drivers/net/usb/usbnet.c
-+++ b/drivers/net/usb/usbnet.c
-@@ -767,7 +767,7 @@ static void wait_skb_queue_empty(struct sk_buff_head *q)
- 	spin_lock_irqsave(&q->lock, flags);
- 	while (!skb_queue_empty(q)) {
- 		spin_unlock_irqrestore(&q->lock, flags);
--		schedule_timeout(msecs_to_jiffies(UNLINK_TIMEOUT_MS));
-+		schedule_msec_hrtimeout((UNLINK_TIMEOUT_MS));
- 		set_current_state(TASK_UNINTERRUPTIBLE);
- 		spin_lock_irqsave(&q->lock, flags);
- 	}
-diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2100.c b/drivers/net/wireless/intel/ipw2x00/ipw2100.c
-index 8dfbaff2d1fe..d1d6b9777f47 100644
---- a/drivers/net/wireless/intel/ipw2x00/ipw2100.c
-+++ b/drivers/net/wireless/intel/ipw2x00/ipw2100.c
-@@ -816,7 +816,7 @@ static int ipw2100_hw_send_command(struct ipw2100_priv *priv,
- 	 * doesn't seem to have as many firmware restart cycles...
- 	 *
- 	 * As a test, we're sticking in a 1/100s delay here */
--	schedule_timeout_uninterruptible(msecs_to_jiffies(10));
-+	schedule_msec_hrtimeout_uninterruptible((10));
- 
- 	return 0;
- 
-@@ -1267,7 +1267,7 @@ static int ipw2100_start_adapter(struct ipw2100_priv *priv)
- 	IPW_DEBUG_FW("Waiting for f/w initialization to complete...\n");
- 	i = 5000;
- 	do {
--		schedule_timeout_uninterruptible(msecs_to_jiffies(40));
-+		schedule_msec_hrtimeout_uninterruptible((40));
- 		/* Todo... wait for sync command ... */
- 
- 		read_register(priv->net_dev, IPW_REG_INTA, &inta);
-diff --git a/drivers/parport/ieee1284.c b/drivers/parport/ieee1284.c
-index 90fb73575495..c94048b048a5 100644
---- a/drivers/parport/ieee1284.c
-+++ b/drivers/parport/ieee1284.c
-@@ -208,7 +208,7 @@ int parport_wait_peripheral(struct parport *port,
- 			/* parport_wait_event didn't time out, but the
- 			 * peripheral wasn't actually ready either.
- 			 * Wait for another 10ms. */
--			schedule_timeout_interruptible(msecs_to_jiffies(10));
-+			schedule_msec_hrtimeout_interruptible((10));
- 		}
- 	}
- 
-diff --git a/drivers/parport/ieee1284_ops.c b/drivers/parport/ieee1284_ops.c
-index 5d41dda6da4e..34705f6b423f 100644
---- a/drivers/parport/ieee1284_ops.c
-+++ b/drivers/parport/ieee1284_ops.c
-@@ -537,7 +537,7 @@ size_t parport_ieee1284_ecp_read_data (struct parport *port,
- 			/* Yield the port for a while. */
- 			if (count && dev->port->irq != PARPORT_IRQ_NONE) {
- 				parport_release (dev);
--				schedule_timeout_interruptible(msecs_to_jiffies(40));
-+				schedule_msec_hrtimeout_interruptible((40));
- 				parport_claim_or_block (dev);
- 			}
- 			else
-diff --git a/drivers/platform/x86/intel_ips.c b/drivers/platform/x86/intel_ips.c
-index bffe548187ee..c2918ee3e100 100644
---- a/drivers/platform/x86/intel_ips.c
-+++ b/drivers/platform/x86/intel_ips.c
-@@ -798,7 +798,7 @@ static int ips_adjust(void *data)
- 			ips_gpu_lower(ips);
- 
- sleep:
--		schedule_timeout_interruptible(msecs_to_jiffies(IPS_ADJUST_PERIOD));
-+		schedule_msec_hrtimeout_interruptible((IPS_ADJUST_PERIOD));
- 	} while (!kthread_should_stop());
- 
- 	dev_dbg(ips->dev, "ips-adjust thread stopped\n");
-@@ -974,7 +974,7 @@ static int ips_monitor(void *data)
- 	seqno_timestamp = get_jiffies_64();
- 
- 	old_cpu_power = thm_readl(THM_CEC);
--	schedule_timeout_interruptible(msecs_to_jiffies(IPS_SAMPLE_PERIOD));
-+	schedule_msec_hrtimeout_interruptible((IPS_SAMPLE_PERIOD));
- 
- 	/* Collect an initial average */
- 	for (i = 0; i < IPS_SAMPLE_COUNT; i++) {
-@@ -1001,7 +1001,7 @@ static int ips_monitor(void *data)
- 			mchp_samples[i] = mchp;
- 		}
- 
--		schedule_timeout_interruptible(msecs_to_jiffies(IPS_SAMPLE_PERIOD));
-+		schedule_msec_hrtimeout_interruptible((IPS_SAMPLE_PERIOD));
- 		if (kthread_should_stop())
- 			break;
- 	}
-@@ -1028,7 +1028,7 @@ static int ips_monitor(void *data)
- 	 * us to reduce the sample frequency if the CPU and GPU are idle.
- 	 */
- 	old_cpu_power = thm_readl(THM_CEC);
--	schedule_timeout_interruptible(msecs_to_jiffies(IPS_SAMPLE_PERIOD));
-+	schedule_msec_hrtimeout_interruptible((IPS_SAMPLE_PERIOD));
- 	last_sample_period = IPS_SAMPLE_PERIOD;
- 
- 	timer_setup(&ips->timer, monitor_timeout, TIMER_DEFERRABLE);
-diff --git a/drivers/rtc/rtc-wm8350.c b/drivers/rtc/rtc-wm8350.c
-index 2018614f258f..fc19b312c345 100644
---- a/drivers/rtc/rtc-wm8350.c
-+++ b/drivers/rtc/rtc-wm8350.c
-@@ -114,7 +114,7 @@ static int wm8350_rtc_settime(struct device *dev, struct rtc_time *tm)
- 	/* Wait until confirmation of stopping */
- 	do {
- 		rtc_ctrl = wm8350_reg_read(wm8350, WM8350_RTC_TIME_CONTROL);
--		schedule_timeout_uninterruptible(msecs_to_jiffies(1));
-+		schedule_msec_hrtimeout_uninterruptible((1));
- 	} while (--retries && !(rtc_ctrl & WM8350_RTC_STS));
- 
- 	if (!retries) {
-@@ -197,7 +197,7 @@ static int wm8350_rtc_stop_alarm(struct wm8350 *wm8350)
- 	/* Wait until confirmation of stopping */
- 	do {
- 		rtc_ctrl = wm8350_reg_read(wm8350, WM8350_RTC_TIME_CONTROL);
--		schedule_timeout_uninterruptible(msecs_to_jiffies(1));
-+		schedule_msec_hrtimeout_uninterruptible((1));
- 	} while (retries-- && !(rtc_ctrl & WM8350_RTC_ALMSTS));
- 
- 	if (!(rtc_ctrl & WM8350_RTC_ALMSTS))
-@@ -220,7 +220,7 @@ static int wm8350_rtc_start_alarm(struct wm8350 *wm8350)
- 	/* Wait until confirmation */
- 	do {
- 		rtc_ctrl = wm8350_reg_read(wm8350, WM8350_RTC_TIME_CONTROL);
--		schedule_timeout_uninterruptible(msecs_to_jiffies(1));
-+		schedule_msec_hrtimeout_uninterruptible((1));
- 	} while (retries-- && rtc_ctrl & WM8350_RTC_ALMSTS);
- 
- 	if (rtc_ctrl & WM8350_RTC_ALMSTS)
-diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c
-index 80608b53897b..84051b538fa8 100644
---- a/drivers/scsi/fnic/fnic_scsi.c
-+++ b/drivers/scsi/fnic/fnic_scsi.c
-@@ -216,7 +216,7 @@ int fnic_fw_reset_handler(struct fnic *fnic)
- 
- 	/* wait for io cmpl */
- 	while (atomic_read(&fnic->in_flight))
--		schedule_timeout(msecs_to_jiffies(1));
-+		schedule_msec_hrtimeout((1));
- 
- 	spin_lock_irqsave(&fnic->wq_copy_lock[0], flags);
- 
-@@ -2273,7 +2273,7 @@ static int fnic_clean_pending_aborts(struct fnic *fnic,
- 		}
- 	}
- 
--	schedule_timeout(msecs_to_jiffies(2 * fnic->config.ed_tov));
-+	schedule_msec_hrtimeout((2 * fnic->config.ed_tov));
- 
- 	/* walk again to check, if IOs are still pending in fw */
- 	if (fnic_is_abts_pending(fnic, lr_sc))
-diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
-index 6822cd9ff8f1..ac3ad534be1a 100644
---- a/drivers/scsi/lpfc/lpfc_scsi.c
-+++ b/drivers/scsi/lpfc/lpfc_scsi.c
-@@ -5176,7 +5176,7 @@ lpfc_reset_flush_io_context(struct lpfc_vport *vport, uint16_t tgt_id,
- 					tgt_id, lun_id, context);
- 	later = msecs_to_jiffies(2 * vport->cfg_devloss_tmo * 1000) + jiffies;
- 	while (time_after(later, jiffies) && cnt) {
--		schedule_timeout_uninterruptible(msecs_to_jiffies(20));
-+		schedule_msec_hrtimeout_uninterruptible((20));
- 		cnt = lpfc_sli_sum_iocb(vport, tgt_id, lun_id, context);
- 	}
- 	if (cnt) {
-diff --git a/drivers/scsi/snic/snic_scsi.c b/drivers/scsi/snic/snic_scsi.c
-index b3650c989ed4..7ed1fb285754 100644
---- a/drivers/scsi/snic/snic_scsi.c
-+++ b/drivers/scsi/snic/snic_scsi.c
-@@ -2353,7 +2353,7 @@ snic_reset(struct Scsi_Host *shost, struct scsi_cmnd *sc)
- 
- 	/* Wait for all the IOs that are entered in Qcmd */
- 	while (atomic_read(&snic->ios_inflight))
--		schedule_timeout(msecs_to_jiffies(1));
-+		schedule_msec_hrtimeout((1));
- 
- 	ret = snic_issue_hba_reset(snic, sc);
- 	if (ret) {
-diff --git a/drivers/staging/comedi/drivers/ni_mio_common.c b/drivers/staging/comedi/drivers/ni_mio_common.c
-index f98e3ae27bff..0741c8352a6d 100644
---- a/drivers/staging/comedi/drivers/ni_mio_common.c
-+++ b/drivers/staging/comedi/drivers/ni_mio_common.c
-@@ -4742,7 +4742,7 @@ static int cs5529_wait_for_idle(struct comedi_device *dev)
- 		if ((status & NI67XX_CAL_STATUS_BUSY) == 0)
- 			break;
- 		set_current_state(TASK_INTERRUPTIBLE);
--		if (schedule_timeout(1))
-+		if (schedule_min_hrtimeout())
- 			return -EIO;
- 	}
- 	if (i == timeout) {
-diff --git a/drivers/staging/lustre/lnet/lnet/lib-eq.c b/drivers/staging/lustre/lnet/lnet/lib-eq.c
-new file mode 100644
-index 000000000000..8cca151741b2
---- /dev/null
-+++ b/drivers/staging/lustre/lnet/lnet/lib-eq.c
-@@ -0,0 +1,426 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * GPL HEADER START
-+ *
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This program is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License version 2 only,
-+ * as published by the Free Software Foundation.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+ * General Public License version 2 for more details (a copy is included
-+ * in the LICENSE file that accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * version 2 along with this program; If not, see
-+ * http://www.gnu.org/licenses/gpl-2.0.html
-+ *
-+ * GPL HEADER END
-+ */
-+/*
-+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Use is subject to license terms.
-+ *
-+ * Copyright (c) 2012, Intel Corporation.
-+ */
-+/*
-+ * This file is part of Lustre, http://www.lustre.org/
-+ * Lustre is a trademark of Sun Microsystems, Inc.
-+ *
-+ * lnet/lnet/lib-eq.c
-+ *
-+ * Library level Event queue management routines
-+ */
-+
-+#define DEBUG_SUBSYSTEM S_LNET
-+
-+#include <linux/lnet/lib-lnet.h>
-+
-+/**
-+ * Create an event queue that has room for \a count number of events.
-+ *
-+ * The event queue is circular and older events will be overwritten by new
-+ * ones if they are not removed in time by the user using the functions
-+ * LNetEQGet(), LNetEQWait(), or LNetEQPoll(). It is up to the user to
-+ * determine the appropriate size of the event queue to prevent this loss
-+ * of events. Note that when EQ handler is specified in \a callback, no
-+ * event loss can happen, since the handler is run for each event deposited
-+ * into the EQ.
-+ *
-+ * \param count The number of events to be stored in the event queue. It
-+ * will be rounded up to the next power of two.
-+ * \param callback A handler function that runs when an event is deposited
-+ * into the EQ. The constant value LNET_EQ_HANDLER_NONE can be used to
-+ * indicate that no event handler is desired.
-+ * \param handle On successful return, this location will hold a handle for
-+ * the newly created EQ.
-+ *
-+ * \retval 0       On success.
-+ * \retval -EINVAL If an parameter is not valid.
-+ * \retval -ENOMEM If memory for the EQ can't be allocated.
-+ *
-+ * \see lnet_eq_handler_t for the discussion on EQ handler semantics.
-+ */
-+int
-+LNetEQAlloc(unsigned int count, lnet_eq_handler_t callback,
-+	    struct lnet_handle_eq *handle)
-+{
-+	struct lnet_eq *eq;
-+
-+	LASSERT(the_lnet.ln_refcount > 0);
-+
-+	/*
-+	 * We need count to be a power of 2 so that when eq_{enq,deq}_seq
-+	 * overflow, they don't skip entries, so the queue has the same
-+	 * apparent capacity at all times
-+	 */
-+	if (count)
-+		count = roundup_pow_of_two(count);
-+
-+	if (callback != LNET_EQ_HANDLER_NONE && count)
-+		CWARN("EQ callback is guaranteed to get every event, do you still want to set eqcount %d for polling event which will have locking overhead? Please contact with developer to confirm\n", count);
-+
-+	/*
-+	 * count can be 0 if only need callback, we can eliminate
-+	 * overhead of enqueue event
-+	 */
-+	if (!count && callback == LNET_EQ_HANDLER_NONE)
-+		return -EINVAL;
-+
-+	eq = kzalloc(sizeof(*eq), GFP_NOFS);
-+	if (!eq)
-+		return -ENOMEM;
-+
-+	if (count) {
-+		eq->eq_events = kvmalloc_array(count, sizeof(struct lnet_event),
-+					       GFP_KERNEL | __GFP_ZERO);
-+		if (!eq->eq_events)
-+			goto failed;
-+		/*
-+		 * NB allocator has set all event sequence numbers to 0,
-+		 * so all them should be earlier than eq_deq_seq
-+		 */
-+	}
-+
-+	eq->eq_deq_seq = 1;
-+	eq->eq_enq_seq = 1;
-+	eq->eq_size = count;
-+	eq->eq_callback = callback;
-+
-+	eq->eq_refs = cfs_percpt_alloc(lnet_cpt_table(),
-+				       sizeof(*eq->eq_refs[0]));
-+	if (!eq->eq_refs)
-+		goto failed;
-+
-+	/* MUST hold both exclusive lnet_res_lock */
-+	lnet_res_lock(LNET_LOCK_EX);
-+	/*
-+	 * NB: hold lnet_eq_wait_lock for EQ link/unlink, so we can do
-+	 * both EQ lookup and poll event with only lnet_eq_wait_lock
-+	 */
-+	lnet_eq_wait_lock();
-+
-+	lnet_res_lh_initialize(&the_lnet.ln_eq_container, &eq->eq_lh);
-+	list_add(&eq->eq_list, &the_lnet.ln_eq_container.rec_active);
-+
-+	lnet_eq_wait_unlock();
-+	lnet_res_unlock(LNET_LOCK_EX);
-+
-+	lnet_eq2handle(handle, eq);
-+	return 0;
-+
-+failed:
-+	kvfree(eq->eq_events);
-+
-+	if (eq->eq_refs)
-+		cfs_percpt_free(eq->eq_refs);
-+
-+	kfree(eq);
-+	return -ENOMEM;
-+}
-+EXPORT_SYMBOL(LNetEQAlloc);
-+
-+/**
-+ * Release the resources associated with an event queue if it's idle;
-+ * otherwise do nothing and it's up to the user to try again.
-+ *
-+ * \param eqh A handle for the event queue to be released.
-+ *
-+ * \retval 0 If the EQ is not in use and freed.
-+ * \retval -ENOENT If \a eqh does not point to a valid EQ.
-+ * \retval -EBUSY  If the EQ is still in use by some MDs.
-+ */
-+int
-+LNetEQFree(struct lnet_handle_eq eqh)
-+{
-+	struct lnet_eq *eq;
-+	struct lnet_event *events = NULL;
-+	int **refs = NULL;
-+	int *ref;
-+	int rc = 0;
-+	int size = 0;
-+	int i;
-+
-+	LASSERT(the_lnet.ln_refcount > 0);
-+
-+	lnet_res_lock(LNET_LOCK_EX);
-+	/*
-+	 * NB: hold lnet_eq_wait_lock for EQ link/unlink, so we can do
-+	 * both EQ lookup and poll event with only lnet_eq_wait_lock
-+	 */
-+	lnet_eq_wait_lock();
-+
-+	eq = lnet_handle2eq(&eqh);
-+	if (!eq) {
-+		rc = -ENOENT;
-+		goto out;
-+	}
-+
-+	cfs_percpt_for_each(ref, i, eq->eq_refs) {
-+		LASSERT(*ref >= 0);
-+		if (!*ref)
-+			continue;
-+
-+		CDEBUG(D_NET, "Event equeue (%d: %d) busy on destroy.\n",
-+		       i, *ref);
-+		rc = -EBUSY;
-+		goto out;
-+	}
-+
-+	/* stash for free after lock dropped */
-+	events = eq->eq_events;
-+	size = eq->eq_size;
-+	refs = eq->eq_refs;
-+
-+	lnet_res_lh_invalidate(&eq->eq_lh);
-+	list_del(&eq->eq_list);
-+	kfree(eq);
-+ out:
-+	lnet_eq_wait_unlock();
-+	lnet_res_unlock(LNET_LOCK_EX);
-+
-+	kvfree(events);
-+	if (refs)
-+		cfs_percpt_free(refs);
-+
-+	return rc;
-+}
-+EXPORT_SYMBOL(LNetEQFree);
-+
-+void
-+lnet_eq_enqueue_event(struct lnet_eq *eq, struct lnet_event *ev)
-+{
-+	/* MUST called with resource lock hold but w/o lnet_eq_wait_lock */
-+	int index;
-+
-+	if (!eq->eq_size) {
-+		LASSERT(eq->eq_callback != LNET_EQ_HANDLER_NONE);
-+		eq->eq_callback(ev);
-+		return;
-+	}
-+
-+	lnet_eq_wait_lock();
-+	ev->sequence = eq->eq_enq_seq++;
-+
-+	LASSERT(eq->eq_size == LOWEST_BIT_SET(eq->eq_size));
-+	index = ev->sequence & (eq->eq_size - 1);
-+
-+	eq->eq_events[index] = *ev;
-+
-+	if (eq->eq_callback != LNET_EQ_HANDLER_NONE)
-+		eq->eq_callback(ev);
-+
-+	/* Wake anyone waiting in LNetEQPoll() */
-+	if (waitqueue_active(&the_lnet.ln_eq_waitq))
-+		wake_up_all(&the_lnet.ln_eq_waitq);
-+	lnet_eq_wait_unlock();
-+}
-+
-+static int
-+lnet_eq_dequeue_event(struct lnet_eq *eq, struct lnet_event *ev)
-+{
-+	int new_index = eq->eq_deq_seq & (eq->eq_size - 1);
-+	struct lnet_event *new_event = &eq->eq_events[new_index];
-+	int rc;
-+
-+	/* must called with lnet_eq_wait_lock hold */
-+	if (LNET_SEQ_GT(eq->eq_deq_seq, new_event->sequence))
-+		return 0;
-+
-+	/* We've got a new event... */
-+	*ev = *new_event;
-+
-+	CDEBUG(D_INFO, "event: %p, sequence: %lu, eq->size: %u\n",
-+	       new_event, eq->eq_deq_seq, eq->eq_size);
-+
-+	/* ...but did it overwrite an event we've not seen yet? */
-+	if (eq->eq_deq_seq == new_event->sequence) {
-+		rc = 1;
-+	} else {
-+		/*
-+		 * don't complain with CERROR: some EQs are sized small
-+		 * anyway; if it's important, the caller should complain
-+		 */
-+		CDEBUG(D_NET, "Event Queue Overflow: eq seq %lu ev seq %lu\n",
-+		       eq->eq_deq_seq, new_event->sequence);
-+		rc = -EOVERFLOW;
-+	}
-+
-+	eq->eq_deq_seq = new_event->sequence + 1;
-+	return rc;
-+}
-+
-+/**
-+ * A nonblocking function that can be used to get the next event in an EQ.
-+ * If an event handler is associated with the EQ, the handler will run before
-+ * this function returns successfully. The event is removed from the queue.
-+ *
-+ * \param eventq A handle for the event queue.
-+ * \param event On successful return (1 or -EOVERFLOW), this location will
-+ * hold the next event in the EQ.
-+ *
-+ * \retval 0	  No pending event in the EQ.
-+ * \retval 1	  Indicates success.
-+ * \retval -ENOENT    If \a eventq does not point to a valid EQ.
-+ * \retval -EOVERFLOW Indicates success (i.e., an event is returned) and that
-+ * at least one event between this event and the last event obtained from the
-+ * EQ has been dropped due to limited space in the EQ.
-+ */
-+
-+/**
-+ * Block the calling process until there is an event in the EQ.
-+ * If an event handler is associated with the EQ, the handler will run before
-+ * this function returns successfully. This function returns the next event
-+ * in the EQ and removes it from the EQ.
-+ *
-+ * \param eventq A handle for the event queue.
-+ * \param event On successful return (1 or -EOVERFLOW), this location will
-+ * hold the next event in the EQ.
-+ *
-+ * \retval 1	  Indicates success.
-+ * \retval -ENOENT    If \a eventq does not point to a valid EQ.
-+ * \retval -EOVERFLOW Indicates success (i.e., an event is returned) and that
-+ * at least one event between this event and the last event obtained from the
-+ * EQ has been dropped due to limited space in the EQ.
-+ */
-+
-+static int
-+lnet_eq_wait_locked(int *timeout_ms, long state)
-+__must_hold(&the_lnet.ln_eq_wait_lock)
-+{
-+	int tms = *timeout_ms;
-+	int wait;
-+	wait_queue_entry_t wl;
-+	unsigned long now;
-+
-+	if (!tms)
-+		return -ENXIO; /* don't want to wait and no new event */
-+
-+	init_waitqueue_entry(&wl, current);
-+	set_current_state(state);
-+	add_wait_queue(&the_lnet.ln_eq_waitq, &wl);
-+
-+	lnet_eq_wait_unlock();
-+
-+	if (tms < 0) {
-+		schedule();
-+	} else {
-+		now = jiffies;
-+		schedule_msec_hrtimeout((tms));
-+		tms -= jiffies_to_msecs(jiffies - now);
-+		if (tms < 0) /* no more wait but may have new event */
-+			tms = 0;
-+	}
-+
-+	wait = tms; /* might need to call here again */
-+	*timeout_ms = tms;
-+
-+	lnet_eq_wait_lock();
-+	remove_wait_queue(&the_lnet.ln_eq_waitq, &wl);
-+
-+	return wait;
-+}
-+
-+/**
-+ * Block the calling process until there's an event from a set of EQs or
-+ * timeout happens.
-+ *
-+ * If an event handler is associated with the EQ, the handler will run before
-+ * this function returns successfully, in which case the corresponding event
-+ * is consumed.
-+ *
-+ * LNetEQPoll() provides a timeout to allow applications to poll, block for a
-+ * fixed period, or block indefinitely.
-+ *
-+ * \param eventqs,neq An array of EQ handles, and size of the array.
-+ * \param timeout_ms Time in milliseconds to wait for an event to occur on
-+ * one of the EQs. The constant LNET_TIME_FOREVER can be used to indicate an
-+ * infinite timeout.
-+ * \param interruptible, if true, use TASK_INTERRUPTIBLE, else TASK_NOLOAD
-+ * \param event,which On successful return (1 or -EOVERFLOW), \a event will
-+ * hold the next event in the EQs, and \a which will contain the index of the
-+ * EQ from which the event was taken.
-+ *
-+ * \retval 0	  No pending event in the EQs after timeout.
-+ * \retval 1	  Indicates success.
-+ * \retval -EOVERFLOW Indicates success (i.e., an event is returned) and that
-+ * at least one event between this event and the last event obtained from the
-+ * EQ indicated by \a which has been dropped due to limited space in the EQ.
-+ * \retval -ENOENT    If there's an invalid handle in \a eventqs.
-+ */
-+int
-+LNetEQPoll(struct lnet_handle_eq *eventqs, int neq, int timeout_ms,
-+	   int interruptible,
-+	   struct lnet_event *event, int *which)
-+{
-+	int wait = 1;
-+	int rc;
-+	int i;
-+
-+	LASSERT(the_lnet.ln_refcount > 0);
-+
-+	if (neq < 1)
-+		return -ENOENT;
-+
-+	lnet_eq_wait_lock();
-+
-+	for (;;) {
-+		for (i = 0; i < neq; i++) {
-+			struct lnet_eq *eq = lnet_handle2eq(&eventqs[i]);
-+
-+			if (!eq) {
-+				lnet_eq_wait_unlock();
-+				return -ENOENT;
-+			}
-+
-+			rc = lnet_eq_dequeue_event(eq, event);
-+			if (rc) {
-+				lnet_eq_wait_unlock();
-+				*which = i;
-+				return rc;
-+			}
-+		}
-+
-+		if (!wait)
-+			break;
-+
-+		/*
-+		 * return value of lnet_eq_wait_locked:
-+		 * -1 : did nothing and it's sure no new event
-+		 *  1 : sleep inside and wait until new event
-+		 *  0 : don't want to wait anymore, but might have new event
-+		 *      so need to call dequeue again
-+		 */
-+		wait = lnet_eq_wait_locked(&timeout_ms,
-+					   interruptible ? TASK_INTERRUPTIBLE
-+					   : TASK_NOLOAD);
-+		if (wait < 0) /* no new event */
-+			break;
-+	}
-+
-+	lnet_eq_wait_unlock();
-+	return 0;
-+}
-diff --git a/drivers/staging/rts5208/rtsx.c b/drivers/staging/rts5208/rtsx.c
-index fa597953e9a0..685cf842badc 100644
---- a/drivers/staging/rts5208/rtsx.c
-+++ b/drivers/staging/rts5208/rtsx.c
-@@ -490,7 +490,7 @@ static int rtsx_polling_thread(void *__dev)
- 
- 	for (;;) {
- 		set_current_state(TASK_INTERRUPTIBLE);
--		schedule_timeout(msecs_to_jiffies(POLLING_INTERVAL));
-+		schedule_msec_hrtimeout((POLLING_INTERVAL));
- 
- 		/* lock the device pointers */
- 		mutex_lock(&dev->dev_mutex);
-diff --git a/drivers/staging/speakup/speakup_acntpc.c b/drivers/staging/speakup/speakup_acntpc.c
-index c94328a5bd4a..6e7d4671aa69 100644
---- a/drivers/staging/speakup/speakup_acntpc.c
-+++ b/drivers/staging/speakup/speakup_acntpc.c
-@@ -198,7 +198,7 @@ static void do_catch_up(struct spk_synth *synth)
- 		full_time_val = full_time->u.n.value;
- 		spin_unlock_irqrestore(&speakup_info.spinlock, flags);
- 		if (synth_full()) {
--			schedule_timeout(msecs_to_jiffies(full_time_val));
-+			schedule_msec_hrtimeout((full_time_val));
- 			continue;
- 		}
- 		set_current_state(TASK_RUNNING);
-@@ -226,7 +226,7 @@ static void do_catch_up(struct spk_synth *synth)
- 			jiffy_delta_val = jiffy_delta->u.n.value;
- 			delay_time_val = delay_time->u.n.value;
- 			spin_unlock_irqrestore(&speakup_info.spinlock, flags);
--			schedule_timeout(msecs_to_jiffies(delay_time_val));
-+			schedule_msec_hrtimeout(delay_time_val);
- 			jiff_max = jiffies + jiffy_delta_val;
- 		}
- 	}
-diff --git a/drivers/staging/speakup/speakup_apollo.c b/drivers/staging/speakup/speakup_apollo.c
-index 0877b4044c28..627102d048c1 100644
---- a/drivers/staging/speakup/speakup_apollo.c
-+++ b/drivers/staging/speakup/speakup_apollo.c
-@@ -165,7 +165,7 @@ static void do_catch_up(struct spk_synth *synth)
- 		if (!synth->io_ops->synth_out(synth, ch)) {
- 			synth->io_ops->tiocmset(0, UART_MCR_RTS);
- 			synth->io_ops->tiocmset(UART_MCR_RTS, 0);
--			schedule_timeout(msecs_to_jiffies(full_time_val));
-+			schedule_msec_hrtimeout(full_time_val);
- 			continue;
- 		}
- 		if (time_after_eq(jiffies, jiff_max) && (ch == SPACE)) {
-diff --git a/drivers/staging/speakup/speakup_decext.c b/drivers/staging/speakup/speakup_decext.c
-index ddbb7e97d118..f9502addc765 100644
---- a/drivers/staging/speakup/speakup_decext.c
-+++ b/drivers/staging/speakup/speakup_decext.c
-@@ -176,7 +176,7 @@ static void do_catch_up(struct spk_synth *synth)
- 		if (ch == '\n')
- 			ch = 0x0D;
- 		if (synth_full() || !synth->io_ops->synth_out(synth, ch)) {
--			schedule_timeout(msecs_to_jiffies(delay_time_val));
-+			schedule_msec_hrtimeout(delay_time_val);
- 			continue;
- 		}
- 		set_current_state(TASK_RUNNING);
-diff --git a/drivers/staging/speakup/speakup_decpc.c b/drivers/staging/speakup/speakup_decpc.c
-index 798c42dfa16c..d85b41db67a3 100644
---- a/drivers/staging/speakup/speakup_decpc.c
-+++ b/drivers/staging/speakup/speakup_decpc.c
-@@ -394,7 +394,7 @@ static void do_catch_up(struct spk_synth *synth)
- 		if (ch == '\n')
- 			ch = 0x0D;
- 		if (dt_sendchar(ch)) {
--			schedule_timeout(msecs_to_jiffies(delay_time_val));
-+			schedule_msec_hrtimeout((delay_time_val));
- 			continue;
- 		}
- 		set_current_state(TASK_RUNNING);
-diff --git a/drivers/staging/speakup/speakup_dectlk.c b/drivers/staging/speakup/speakup_dectlk.c
-index dccb4ea29d37..8ecead307d04 100644
---- a/drivers/staging/speakup/speakup_dectlk.c
-+++ b/drivers/staging/speakup/speakup_dectlk.c
-@@ -244,7 +244,7 @@ static void do_catch_up(struct spk_synth *synth)
- 		if (ch == '\n')
- 			ch = 0x0D;
- 		if (synth_full_val || !synth->io_ops->synth_out(synth, ch)) {
--			schedule_timeout(msecs_to_jiffies(delay_time_val));
-+			schedule_msec_hrtimeout(delay_time_val);
- 			continue;
- 		}
- 		set_current_state(TASK_RUNNING);
-diff --git a/drivers/staging/speakup/speakup_dtlk.c b/drivers/staging/speakup/speakup_dtlk.c
-index dbebed0eeeec..6d83c13ca4a6 100644
---- a/drivers/staging/speakup/speakup_dtlk.c
-+++ b/drivers/staging/speakup/speakup_dtlk.c
-@@ -211,7 +211,7 @@ static void do_catch_up(struct spk_synth *synth)
- 		delay_time_val = delay_time->u.n.value;
- 		spin_unlock_irqrestore(&speakup_info.spinlock, flags);
- 		if (synth_full()) {
--			schedule_timeout(msecs_to_jiffies(delay_time_val));
-+			schedule_msec_hrtimeout((delay_time_val));
- 			continue;
- 		}
- 		set_current_state(TASK_RUNNING);
-@@ -227,7 +227,7 @@ static void do_catch_up(struct spk_synth *synth)
- 			delay_time_val = delay_time->u.n.value;
- 			jiffy_delta_val = jiffy_delta->u.n.value;
- 			spin_unlock_irqrestore(&speakup_info.spinlock, flags);
--			schedule_timeout(msecs_to_jiffies(delay_time_val));
-+			schedule_msec_hrtimeout((delay_time_val));
- 			jiff_max = jiffies + jiffy_delta_val;
- 		}
- 	}
-diff --git a/drivers/staging/speakup/speakup_keypc.c b/drivers/staging/speakup/speakup_keypc.c
-index 414827e888fc..cb31c9176daa 100644
---- a/drivers/staging/speakup/speakup_keypc.c
-+++ b/drivers/staging/speakup/speakup_keypc.c
-@@ -199,7 +199,7 @@ static void do_catch_up(struct spk_synth *synth)
- 		full_time_val = full_time->u.n.value;
- 		spin_unlock_irqrestore(&speakup_info.spinlock, flags);
- 		if (synth_full()) {
--			schedule_timeout(msecs_to_jiffies(full_time_val));
-+			schedule_msec_hrtimeout((full_time_val));
- 			continue;
- 		}
- 		set_current_state(TASK_RUNNING);
-@@ -232,7 +232,7 @@ static void do_catch_up(struct spk_synth *synth)
- 			jiffy_delta_val = jiffy_delta->u.n.value;
- 			delay_time_val = delay_time->u.n.value;
- 			spin_unlock_irqrestore(&speakup_info.spinlock, flags);
--			schedule_timeout(msecs_to_jiffies(delay_time_val));
-+			schedule_msec_hrtimeout(delay_time_val);
- 			jiff_max = jiffies + jiffy_delta_val;
- 		}
- 	}
-diff --git a/drivers/staging/speakup/synth.c b/drivers/staging/speakup/synth.c
-index 3568bfb89912..0a80b3b098b2 100644
---- a/drivers/staging/speakup/synth.c
-+++ b/drivers/staging/speakup/synth.c
-@@ -93,12 +93,8 @@ static void _spk_do_catch_up(struct spk_synth *synth, int unicode)
- 		spin_unlock_irqrestore(&speakup_info.spinlock, flags);
- 		if (ch == '\n')
- 			ch = synth->procspeech;
--		if (unicode)
--			ret = synth->io_ops->synth_out_unicode(synth, ch);
--		else
--			ret = synth->io_ops->synth_out(synth, ch);
--		if (!ret) {
--			schedule_timeout(msecs_to_jiffies(full_time_val));
-+		if (!synth->io_ops->synth_out(synth, ch)) {
-+			schedule_msec_hrtimeout(full_time_val);
- 			continue;
- 		}
- 		if (time_after_eq(jiffies, jiff_max) && (ch == SPACE)) {
-@@ -108,11 +104,9 @@ static void _spk_do_catch_up(struct spk_synth *synth, int unicode)
- 			full_time_val = full_time->u.n.value;
- 			spin_unlock_irqrestore(&speakup_info.spinlock, flags);
- 			if (synth->io_ops->synth_out(synth, synth->procspeech))
--				schedule_timeout(
--					msecs_to_jiffies(delay_time_val));
-+				schedule_msec_hrtimeout(delay_time_val);
- 			else
--				schedule_timeout(
--					msecs_to_jiffies(full_time_val));
-+				schedule_msec_hrtimeout(full_time_val);
- 			jiff_max = jiffies + jiffy_delta_val;
- 		}
- 		set_current_state(TASK_RUNNING);
-diff --git a/drivers/staging/unisys/visornic/visornic_main.c b/drivers/staging/unisys/visornic/visornic_main.c
-index 1d1440d43002..52fe89ae1d9d 100644
---- a/drivers/staging/unisys/visornic/visornic_main.c
-+++ b/drivers/staging/unisys/visornic/visornic_main.c
-@@ -549,7 +549,7 @@ static int visornic_disable_with_timeout(struct net_device *netdev,
- 		}
- 		set_current_state(TASK_INTERRUPTIBLE);
- 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
--		wait += schedule_timeout(msecs_to_jiffies(10));
-+		wait += schedule_msec_hrtimeout((10));
- 		spin_lock_irqsave(&devdata->priv_lock, flags);
- 	}
- 
-@@ -560,7 +560,7 @@ static int visornic_disable_with_timeout(struct net_device *netdev,
- 		while (1) {
- 			set_current_state(TASK_INTERRUPTIBLE);
- 			spin_unlock_irqrestore(&devdata->priv_lock, flags);
--			schedule_timeout(msecs_to_jiffies(10));
-+			schedule_msec_hrtimeout((10));
- 			spin_lock_irqsave(&devdata->priv_lock, flags);
- 			if (atomic_read(&devdata->usage))
- 				break;
-@@ -714,7 +714,7 @@ static int visornic_enable_with_timeout(struct net_device *netdev,
- 		}
- 		set_current_state(TASK_INTERRUPTIBLE);
- 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
--		wait += schedule_timeout(msecs_to_jiffies(10));
-+		wait += schedule_msec_hrtimeout((10));
- 		spin_lock_irqsave(&devdata->priv_lock, flags);
- 	}
- 
-diff --git a/drivers/video/fbdev/omap/hwa742.c b/drivers/video/fbdev/omap/hwa742.c
-index cfe63932f825..71c00ef772a3 100644
---- a/drivers/video/fbdev/omap/hwa742.c
-+++ b/drivers/video/fbdev/omap/hwa742.c
-@@ -913,7 +913,7 @@ static void hwa742_resume(void)
- 		if (hwa742_read_reg(HWA742_PLL_DIV_REG) & (1 << 7))
- 			break;
- 		set_current_state(TASK_UNINTERRUPTIBLE);
--		schedule_timeout(msecs_to_jiffies(5));
-+		schedule_msec_hrtimeout((5));
- 	}
- 	hwa742_set_update_mode(hwa742.update_mode_before_suspend);
- }
-diff --git a/drivers/video/fbdev/pxafb.c b/drivers/video/fbdev/pxafb.c
-index f70c9f79622e..0b363eaee24f 100644
---- a/drivers/video/fbdev/pxafb.c
-+++ b/drivers/video/fbdev/pxafb.c
-@@ -1287,7 +1287,7 @@ static int pxafb_smart_thread(void *arg)
- 		mutex_unlock(&fbi->ctrlr_lock);
- 
- 		set_current_state(TASK_INTERRUPTIBLE);
--		schedule_timeout(msecs_to_jiffies(30));
-+		schedule_msec_hrtimeout((30));
- 	}
- 
- 	pr_debug("%s(): task ending\n", __func__);
-diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
-index 37345fb6191d..3874c17d1bc5 100644
---- a/fs/btrfs/inode-map.c
-+++ b/fs/btrfs/inode-map.c
-@@ -91,7 +91,7 @@ static int caching_kthread(void *data)
- 				btrfs_release_path(path);
- 				root->ino_cache_progress = last;
- 				up_read(&fs_info->commit_root_sem);
--				schedule_timeout(1);
-+				schedule_min_hrtimeout();
- 				goto again;
- 			} else
- 				continue;
-diff --git a/fs/proc/base.c b/fs/proc/base.c
-index ebea9501afb8..51c9346a69fe 100644
---- a/fs/proc/base.c
-+++ b/fs/proc/base.c
-@@ -477,7 +477,7 @@ static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns,
- 		seq_puts(m, "0 0 0\n");
- 	else
- 		seq_printf(m, "%llu %llu %lu\n",
--		   (unsigned long long)task->se.sum_exec_runtime,
-+		   (unsigned long long)tsk_seruntime(task),
- 		   (unsigned long long)task->sched_info.run_delay,
- 		   task->sched_info.pcount);
- 
-diff --git a/include/linux/freezer.h b/include/linux/freezer.h
-index 21f5aa0b217f..ee9b46394fdf 100644
---- a/include/linux/freezer.h
-+++ b/include/linux/freezer.h
-@@ -297,6 +297,7 @@ static inline void set_freezable(void) {}
- #define wait_event_freezekillable_unsafe(wq, condition)			\
- 		wait_event_killable(wq, condition)
- 
-+#define pm_freezing (false)
- #endif /* !CONFIG_FREEZER */
- 
- #endif	/* FREEZER_H_INCLUDED */
-diff --git a/include/linux/init_task.h b/include/linux/init_task.h
-index 2c620d7ac432..73417df5daa2 100644
---- a/include/linux/init_task.h
-+++ b/include/linux/init_task.h
-@@ -36,7 +36,11 @@ extern struct cred init_cred;
- #define INIT_PREV_CPUTIME(x)
- #endif
- 
-+#ifdef CONFIG_SCHED_MUQSS
-+#define INIT_TASK_COMM "MuQSS"
-+#else
- #define INIT_TASK_COMM "swapper"
-+#endif
- 
- /* Attach to the init_task data structure for proper alignment */
- #ifdef CONFIG_ARCH_TASK_STRUCT_ON_STACK
-diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h
-index e9bfe6972aed..16ba1c7e5bde 100644
---- a/include/linux/ioprio.h
-+++ b/include/linux/ioprio.h
-@@ -53,6 +53,8 @@ enum {
-  */
- static inline int task_nice_ioprio(struct task_struct *task)
- {
-+	if (iso_task(task))
-+		return 0;
- 	return (task_nice(task) + 20) / 5;
- }
- 
-diff --git a/include/linux/sched.h b/include/linux/sched.h
-index 67a1d86981a9..95b427fdbb2e 100644
---- a/include/linux/sched.h
-+++ b/include/linux/sched.h
-@@ -31,6 +31,9 @@
- #include <linux/task_io_accounting.h>
- #include <linux/posix-timers.h>
- #include <linux/rseq.h>
-+#ifdef CONFIG_SCHED_MUQSS
-+#include <linux/skip_list.h>
-+#endif
- 
- /* task_struct member predeclarations (sorted alphabetically): */
- struct audit_context;
-@@ -214,13 +217,40 @@ struct task_group;
- 
- extern void scheduler_tick(void);
- 
--#define	MAX_SCHEDULE_TIMEOUT		LONG_MAX
--
-+#define	MAX_SCHEDULE_TIMEOUT	LONG_MAX
- extern long schedule_timeout(long timeout);
- extern long schedule_timeout_interruptible(long timeout);
- extern long schedule_timeout_killable(long timeout);
- extern long schedule_timeout_uninterruptible(long timeout);
- extern long schedule_timeout_idle(long timeout);
-+
-+#ifdef CONFIG_HIGH_RES_TIMERS
-+extern long schedule_msec_hrtimeout(long timeout);
-+extern long schedule_min_hrtimeout(void);
-+extern long schedule_msec_hrtimeout_interruptible(long timeout);
-+extern long schedule_msec_hrtimeout_uninterruptible(long timeout);
-+#else
-+static inline long schedule_msec_hrtimeout(long timeout)
-+{
-+	return schedule_timeout(msecs_to_jiffies(timeout));
-+}
-+
-+static inline long schedule_min_hrtimeout(void)
-+{
-+	return schedule_timeout(1);
-+}
-+
-+static inline long schedule_msec_hrtimeout_interruptible(long timeout)
-+{
-+	return schedule_timeout_interruptible(msecs_to_jiffies(timeout));
-+}
-+
-+static inline long schedule_msec_hrtimeout_uninterruptible(long timeout)
-+{
-+	return schedule_timeout_uninterruptible(msecs_to_jiffies(timeout));
-+}
-+#endif
-+
- asmlinkage void schedule(void);
- extern void schedule_preempt_disabled(void);
- asmlinkage void preempt_schedule_irq(void);
-@@ -644,9 +674,11 @@ struct task_struct {
- 	unsigned int			flags;
- 	unsigned int			ptrace;
- 
-+#if defined(CONFIG_SMP) || defined(CONFIG_SCHED_MUQSS)
-+	int on_cpu;
-+#endif
- #ifdef CONFIG_SMP
- 	struct llist_node		wake_entry;
--	int				on_cpu;
- #ifdef CONFIG_THREAD_INFO_IN_TASK
- 	/* Current CPU: */
- 	unsigned int			cpu;
-@@ -671,10 +703,25 @@ struct task_struct {
- 	int				static_prio;
- 	int				normal_prio;
- 	unsigned int			rt_priority;
-+#ifdef CONFIG_SCHED_MUQSS
-+	int time_slice;
-+	u64 deadline;
-+	skiplist_node node; /* Skip list node */
-+	u64 last_ran;
-+	u64 sched_time; /* sched_clock time spent running */
-+#ifdef CONFIG_SMT_NICE
-+	int smt_bias; /* Policy/nice level bias across smt siblings */
-+#endif
-+#ifdef CONFIG_HOTPLUG_CPU
-+	bool zerobound; /* Bound to CPU0 for hotplug */
-+#endif
-+	unsigned long rt_timeout;
-+#else /* CONFIG_SCHED_MUQSS */
- 
- 	const struct sched_class	*sched_class;
- 	struct sched_entity		se;
- 	struct sched_rt_entity		rt;
-+#endif
- #ifdef CONFIG_CGROUP_SCHED
- 	struct task_group		*sched_task_group;
- #endif
-@@ -839,6 +886,10 @@ struct task_struct {
- #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
- 	u64				utimescaled;
- 	u64				stimescaled;
-+#endif
-+#ifdef CONFIG_SCHED_MUQSS
-+	/* Unbanked cpu time */
-+	unsigned long utime_ns, stime_ns;
- #endif
- 	u64				gtime;
- 	struct prev_cputime		prev_cputime;
-@@ -1283,6 +1334,40 @@ struct task_struct {
- 	 */
- };
- 
-+#ifdef CONFIG_SCHED_MUQSS
-+#define tsk_seruntime(t)		((t)->sched_time)
-+#define tsk_rttimeout(t)		((t)->rt_timeout)
-+
-+static inline void tsk_cpus_current(struct task_struct *p)
-+{
-+}
-+
-+void print_scheduler_version(void);
-+
-+static inline bool iso_task(struct task_struct *p)
-+{
-+	return (p->policy == SCHED_ISO);
-+}
-+#else /* CFS */
-+#define tsk_seruntime(t)	((t)->se.sum_exec_runtime)
-+#define tsk_rttimeout(t)	((t)->rt.timeout)
-+
-+static inline void tsk_cpus_current(struct task_struct *p)
-+{
-+	p->nr_cpus_allowed = current->nr_cpus_allowed;
-+}
-+
-+static inline void print_scheduler_version(void)
-+{
-+	printk(KERN_INFO "CFS CPU scheduler.\n");
-+}
-+
-+static inline bool iso_task(struct task_struct *p)
-+{
-+	return false;
-+}
-+#endif /* CONFIG_SCHED_MUQSS */
-+
- static inline struct pid *task_pid(struct task_struct *task)
- {
- 	return task->thread_pid;
-diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h
-index 1aff00b65f3c..73d6319a856a 100644
---- a/include/linux/sched/deadline.h
-+++ b/include/linux/sched/deadline.h
-@@ -28,7 +28,16 @@ static inline bool dl_time_before(u64 a, u64 b)
- #ifdef CONFIG_SMP
- 
- struct root_domain;
-+#ifdef CONFIG_SCHED_MUQSS
-+static inline void dl_clear_root_domain(struct root_domain *rd)
-+{
-+}
-+static inline void dl_add_task_root_domain(struct task_struct *p)
-+{
-+}
-+#else /* CONFIG_SCHED_MUQSS */
- extern void dl_add_task_root_domain(struct task_struct *p);
- extern void dl_clear_root_domain(struct root_domain *rd);
-+#endif /* CONFIG_SCHED_MUQSS */
- 
- #endif /* CONFIG_SMP */
-diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h
-index 1abe91ff6e4a..20ba383562b0 100644
---- a/include/linux/sched/nohz.h
-+++ b/include/linux/sched/nohz.h
-@@ -13,7 +13,7 @@ extern int get_nohz_timer_target(void);
- static inline void nohz_balance_enter_idle(int cpu) { }
- #endif
- 
--#ifdef CONFIG_NO_HZ_COMMON
-+#if defined(CONFIG_NO_HZ_COMMON) && !defined(CONFIG_SCHED_MUQSS)
- void calc_load_nohz_start(void);
- void calc_load_nohz_stop(void);
- #else
-diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h
-index 7d64feafc408..43c9d9e50c09 100644
---- a/include/linux/sched/prio.h
-+++ b/include/linux/sched/prio.h
-@@ -20,8 +20,20 @@
-  */
- 
- #define MAX_USER_RT_PRIO	100
-+
-+#ifdef CONFIG_SCHED_MUQSS
-+/* Note different MAX_RT_PRIO */
-+#define MAX_RT_PRIO		(MAX_USER_RT_PRIO + 1)
-+
-+#define ISO_PRIO		(MAX_RT_PRIO)
-+#define NORMAL_PRIO		(MAX_RT_PRIO + 1)
-+#define IDLE_PRIO		(MAX_RT_PRIO + 2)
-+#define PRIO_LIMIT		((IDLE_PRIO) + 1)
-+#else /* CONFIG_SCHED_MUQSS */
- #define MAX_RT_PRIO		MAX_USER_RT_PRIO
- 
-+#endif /* CONFIG_SCHED_MUQSS */
-+
- #define MAX_PRIO		(MAX_RT_PRIO + NICE_WIDTH)
- #define DEFAULT_PRIO		(MAX_RT_PRIO + NICE_WIDTH / 2)
- 
-diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
-index e5af028c08b4..010b2244e0b6 100644
---- a/include/linux/sched/rt.h
-+++ b/include/linux/sched/rt.h
-@@ -24,8 +24,10 @@ static inline bool task_is_realtime(struct task_struct *tsk)
- 
- 	if (policy == SCHED_FIFO || policy == SCHED_RR)
- 		return true;
-+#ifndef CONFIG_SCHED_MUQSS
- 	if (policy == SCHED_DEADLINE)
- 		return true;
-+#endif
- 	return false;
- }
- 
-diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
-index 4b1c3b664f51..a9671b48799c 100644
---- a/include/linux/sched/task.h
-+++ b/include/linux/sched/task.h
-@@ -99,7 +99,7 @@ extern long kernel_wait4(pid_t, int __user *, int, struct rusage *);
- extern void free_task(struct task_struct *tsk);
- 
- /* sched_exec is called by processes performing an exec */
--#ifdef CONFIG_SMP
-+#if defined(CONFIG_SMP) && !defined(CONFIG_SCHED_MUQSS)
- extern void sched_exec(void);
- #else
- #define sched_exec()   {}
-diff --git a/include/linux/skip_list.h b/include/linux/skip_list.h
-new file mode 100644
-index 000000000000..d4be84ba273b
---- /dev/null
-+++ b/include/linux/skip_list.h
-@@ -0,0 +1,33 @@
-+#ifndef _LINUX_SKIP_LISTS_H
-+#define _LINUX_SKIP_LISTS_H
-+typedef u64 keyType;
-+typedef void *valueType;
-+
-+typedef struct nodeStructure skiplist_node;
-+
-+struct nodeStructure {
-+	int level;	/* Levels in this structure */
-+	keyType key;
-+	valueType value;
-+	skiplist_node *next[8];
-+	skiplist_node *prev[8];
-+};
-+
-+typedef struct listStructure {
-+	int entries;
-+	int level;	/* Maximum level of the list
-+			(1 more than the number of levels in the list) */
-+	skiplist_node *header; /* pointer to header */
-+} skiplist;
-+
-+void skiplist_init(skiplist_node *slnode);
-+skiplist *new_skiplist(skiplist_node *slnode);
-+void free_skiplist(skiplist *l);
-+void skiplist_node_init(skiplist_node *node);
-+void skiplist_insert(skiplist *l, skiplist_node *node, keyType key, valueType value, unsigned int randseed);
-+void skiplist_delete(skiplist *l, skiplist_node *node);
-+
-+static inline bool skiplist_node_empty(skiplist_node *node) {
-+	return (!node->next[0]);
-+}
-+#endif /* _LINUX_SKIP_LISTS_H */
-diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
-index 25b4fa00bad1..c2503cd28025 100644
---- a/include/uapi/linux/sched.h
-+++ b/include/uapi/linux/sched.h
-@@ -84,9 +84,16 @@ struct clone_args {
- #define SCHED_FIFO		1
- #define SCHED_RR		2
- #define SCHED_BATCH		3
--/* SCHED_ISO: reserved but not implemented yet */
-+/* SCHED_ISO: Implemented on MuQSS only */
- #define SCHED_IDLE		5
-+#ifdef CONFIG_SCHED_MUQSS
-+#define SCHED_ISO		4
-+#define SCHED_IDLEPRIO		SCHED_IDLE
-+#define SCHED_MAX		(SCHED_IDLEPRIO)
-+#define SCHED_RANGE(policy)	((policy) <= SCHED_MAX)
-+#else /* CONFIG_SCHED_MUQSS */
- #define SCHED_DEADLINE		6
-+#endif /* CONFIG_SCHED_MUQSS */
- 
- /* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */
- #define SCHED_RESET_ON_FORK     0x40000000
-diff --git a/init/Kconfig b/init/Kconfig
-index b4daad2bac23..da90d33ba4b3 100644
---- a/init/Kconfig
-+++ b/init/Kconfig
-@@ -73,6 +73,18 @@ config THREAD_INFO_IN_TASK
- 
- menu "General setup"
- 
-+config SCHED_MUQSS
-+	bool "MuQSS cpu scheduler"
-+	select HIGH_RES_TIMERS
-+	---help---
-+	  The Multiple Queue Skiplist Scheduler for excellent interactivity and
-+	  responsiveness on the desktop and highly scalable deterministic
-+	  low latency on any hardware.
-+
-+          Say Y here.
-+	default y
-+
-+
- config BROKEN
- 	bool
- 
-@@ -802,6 +814,7 @@ config NUMA_BALANCING
- 	depends on ARCH_SUPPORTS_NUMA_BALANCING
- 	depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY
- 	depends on SMP && NUMA && MIGRATION
-+	depends on !SCHED_MUQSS
- 	help
- 	  This option adds support for automatic NUMA aware memory/task placement.
- 	  The mechanism is quite primitive and is based on migrating memory when
-@@ -901,9 +914,13 @@ menuconfig CGROUP_SCHED
- 	help
- 	  This feature lets CPU scheduler recognize task groups and control CPU
- 	  bandwidth allocation to such task groups. It uses cgroups to group
--	  tasks.
-+	  tasks. In combination with MuQSS this is purely a STUB to create the
-+	  files associated with the CPU controller cgroup but most of the
-+	  controls do nothing. This is useful for working in environments and
-+	  with applications that will only work if this control group is
-+	  present.
- 
--if CGROUP_SCHED
-+if CGROUP_SCHED && !SCHED_MUQSS
- config FAIR_GROUP_SCHED
- 	bool "Group scheduling for SCHED_OTHER"
- 	depends on CGROUP_SCHED
-@@ -1032,6 +1049,7 @@ config CGROUP_DEVICE
- 
- config CGROUP_CPUACCT
- 	bool "Simple CPU accounting controller"
-+	depends on !SCHED_MUQSS
- 	help
- 	  Provides a simple controller for monitoring the
- 	  total CPU consumed by the tasks in a cgroup.
-@@ -1150,6 +1168,7 @@ config CHECKPOINT_RESTORE
- 
- config SCHED_AUTOGROUP
- 	bool "Automatic process group scheduling"
-+	depends on !SCHED_MUQSS
- 	select CGROUPS
- 	select CGROUP_SCHED
- 	select FAIR_GROUP_SCHED
-diff --git a/init/init_task.c b/init/init_task.c
-index 9e5cbe5eab7b..5c2bcbf25add 100644
---- a/init/init_task.c
-+++ b/init/init_task.c
-@@ -66,9 +66,17 @@ struct task_struct init_task
- 	.stack		= init_stack,
- 	.usage		= REFCOUNT_INIT(2),
- 	.flags		= PF_KTHREAD,
-+#ifdef CONFIG_SCHED_MUQSS
-+	.prio		= NORMAL_PRIO,
-+	.static_prio	= MAX_PRIO - 20,
-+	.normal_prio	= NORMAL_PRIO,
-+	.deadline	= 0,
-+	.time_slice	= 1000000,
-+#else
- 	.prio		= MAX_PRIO - 20,
- 	.static_prio	= MAX_PRIO - 20,
- 	.normal_prio	= MAX_PRIO - 20,
-+#endif
- 	.policy		= SCHED_NORMAL,
- 	.cpus_ptr	= &init_task.cpus_mask,
- 	.cpus_mask	= CPU_MASK_ALL,
-@@ -78,6 +86,7 @@ struct task_struct init_task
- 	.restart_block	= {
- 		.fn = do_no_restart_syscall,
- 	},
-+#ifndef CONFIG_SCHED_MUQSS
- 	.se		= {
- 		.group_node 	= LIST_HEAD_INIT(init_task.se.group_node),
- 	},
-@@ -85,6 +94,7 @@ struct task_struct init_task
- 		.run_list	= LIST_HEAD_INIT(init_task.rt.run_list),
- 		.time_slice	= RR_TIMESLICE,
- 	},
-+#endif
- 	.tasks		= LIST_HEAD_INIT(init_task.tasks),
- #ifdef CONFIG_SMP
- 	.pushable_tasks	= PLIST_NODE_INIT(init_task.pushable_tasks, MAX_PRIO),
-diff --git a/init/main.c b/init/main.c
-index 91f6ebb30ef0..22792032de64 100644
---- a/init/main.c
-+++ b/init/main.c
-@@ -1124,6 +1124,8 @@ static int __ref kernel_init(void *unused)
- 
- 	rcu_end_inkernel_boot();
- 
-+	print_scheduler_version();
-+
- 	if (ramdisk_execute_command) {
- 		ret = run_init_process(ramdisk_execute_command);
- 		if (!ret)
-diff --git a/kernel/Kconfig.MuQSS b/kernel/Kconfig.MuQSS
-new file mode 100644
-index 000000000000..a6a58781ef91
---- /dev/null
-+++ b/kernel/Kconfig.MuQSS
-@@ -0,0 +1,105 @@
-+choice
-+	prompt "CPU scheduler runqueue sharing"
-+	default RQ_MC if SCHED_MUQSS
-+	default RQ_NONE
-+
-+config RQ_NONE
-+	bool "No sharing"
-+	help
-+	  This is the default behaviour where the CPU scheduler has one runqueue
-+	  per CPU, whether it is a physical or logical CPU (hyperthread).
-+
-+	  This can still be enabled runtime with the boot parameter
-+	  rqshare=none
-+
-+	  If unsure, say N.
-+
-+config RQ_SMT
-+	bool "SMT (hyperthread) siblings"
-+	depends on SCHED_SMT && SCHED_MUQSS
-+
-+	help
-+	  With this option enabled, the CPU scheduler will have one runqueue
-+	  shared by SMT (hyperthread) siblings. As these logical cores share
-+	  one physical core, sharing the runqueue resource can lead to decreased
-+	  overhead, lower latency and higher throughput.
-+
-+	  This can still be enabled runtime with the boot parameter
-+	  rqshare=smt
-+
-+	  If unsure, say N.
-+
-+config RQ_MC
-+	bool "Multicore siblings"
-+	depends on SCHED_MC && SCHED_MUQSS
-+	help
-+	  With this option enabled, the CPU scheduler will have one runqueue
-+	  shared by multicore siblings in addition to any SMT siblings.
-+	  As these physical cores share caches, sharing the runqueue resource
-+	  will lead to lower latency, but its effects on overhead and throughput
-+	  are less predictable. As a general rule, 6 or fewer cores will likely
-+	  benefit from this, while larger CPUs will only derive a latency
-+	  benefit. If your workloads are primarily single threaded, this will
-+	  possibly worsen throughput. If you are only concerned about latency
-+	  then enable this regardless of how many cores you have.
-+
-+	  This can still be enabled runtime with the boot parameter
-+	  rqshare=mc
-+
-+	  If unsure, say Y.
-+
-+config RQ_MC_LLC
-+	bool "Multicore siblings (LLC)"
-+	depends on SCHED_MC && SCHED_MUQSS
-+	help
-+	  With this option enabled, the CPU scheduler will behave similarly as
-+	  with "Multicore siblings".
-+	  This option takes LLC cache into account when scheduling tasks.
-+	  Option may benefit CPUs with multiple LLC caches, such as Ryzen
-+	  and Xeon CPUs.
-+
-+	  This can still be enabled runtime with the boot parameter
-+	  rqshare=llc
-+
-+	  If unsure, say N.
-+
-+config RQ_SMP
-+	bool "Symmetric Multi-Processing"
-+	depends on SMP && SCHED_MUQSS
-+	help
-+	  With this option enabled, the CPU scheduler will have one runqueue
-+	  shared by all physical CPUs unless they are on separate NUMA nodes.
-+	  As physical CPUs usually do not share resources, sharing the runqueue
-+	  will normally worsen throughput but improve latency. If you only
-+	  care about latency enable this.
-+
-+	  This can still be enabled runtime with the boot parameter
-+	  rqshare=smp
-+
-+	  If unsure, say N.
-+
-+config RQ_ALL
-+	bool "NUMA"
-+	depends on SMP && SCHED_MUQSS
-+	help
-+	  With this option enabled, the CPU scheduler will have one runqueue
-+	  regardless of the architecture configuration, including across NUMA
-+	  nodes. This can substantially decrease throughput in NUMA
-+	  configurations, but light NUMA designs will not be dramatically
-+	  affected. This option should only be chosen if latency is the prime
-+	  concern.
-+
-+	  This can still be enabled runtime with the boot parameter
-+	  rqshare=all
-+
-+	  If unsure, say N.
-+endchoice
-+
-+config SHARERQ
-+	int
-+	default 0 if RQ_NONE
-+	default 1 if RQ_SMT
-+	default 2 if RQ_MC
-+	default 3 if RQ_MC_LLC
-+	default 4 if RQ_SMP
-+	default 5 if RQ_ALL
-diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
-index 38ef6d06888e..89ed751ac4e4 100644
---- a/kernel/Kconfig.hz
-+++ b/kernel/Kconfig.hz
-@@ -5,7 +5,8 @@
- 
- choice
- 	prompt "Timer frequency"
--	default HZ_250
-+	default HZ_100 if SCHED_MUQSS
-+	default HZ_250_NODEF if !SCHED_MUQSS
- 	help
- 	 Allows the configuration of the timer frequency. It is customary
- 	 to have the timer interrupt run at 1000 Hz but 100 Hz may be more
-@@ -20,11 +21,18 @@ choice
- 	config HZ_100
- 		bool "100 HZ"
- 	help
-+	  100 Hz is a suitable choice in combination with MuQSS which does
-+	  not rely on ticks for rescheduling interrupts, and is not Hz limited
-+	  for timeouts and sleeps from both the kernel and userspace.
-+	  This allows us to benefit from the lower overhead and higher
-+	  throughput of fewer timer ticks.
-+
-+	  Non-MuQSS kernels:
- 	  100 Hz is a typical choice for servers, SMP and NUMA systems
- 	  with lots of processors that may show reduced performance if
- 	  too many timer interrupts are occurring.
- 
--	config HZ_250
-+	config HZ_250_NODEF
- 		bool "250 HZ"
- 	help
- 	 250 Hz is a good compromise choice allowing server performance
-@@ -32,7 +40,10 @@ choice
- 	 on SMP and NUMA systems. If you are going to be using NTSC video
- 	 or multimedia, selected 300Hz instead.
- 
--	config HZ_300
-+	 250 Hz is the default choice for the mainline scheduler but not
-+	 advantageous in combination with MuQSS.
-+
-+	config HZ_300_NODEF
- 		bool "300 HZ"
- 	help
- 	 300 Hz is a good compromise choice allowing server performance
-@@ -40,7 +51,7 @@ choice
- 	 on SMP and NUMA systems and exactly dividing by both PAL and
- 	 NTSC frame rates for video and multimedia work.
- 
--	config HZ_1000
-+	config HZ_1000_NODEF
- 		bool "1000 HZ"
- 	help
- 	 1000 Hz is the preferred choice for desktop systems and other
-@@ -51,9 +62,9 @@ endchoice
- config HZ
- 	int
- 	default 100 if HZ_100
--	default 250 if HZ_250
--	default 300 if HZ_300
--	default 1000 if HZ_1000
-+	default 250 if HZ_250_NODEF
-+	default 300 if HZ_300_NODEF
-+	default 1000 if HZ_1000_NODEF
- 
- config SCHED_HRTICK
- 	def_bool HIGH_RES_TIMERS
-diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
-index deff97217496..883998dd0437 100644
---- a/kernel/Kconfig.preempt
-+++ b/kernel/Kconfig.preempt
-@@ -2,7 +2,7 @@
- 
- choice
- 	prompt "Preemption Model"
--	default PREEMPT_NONE
-+	default PREEMPT
- 
- config PREEMPT_NONE
- 	bool "No Forced Preemption (Server)"
-@@ -18,7 +18,7 @@ config PREEMPT_NONE
- 	  latencies.
- 
- config PREEMPT_VOLUNTARY
--	bool "Voluntary Kernel Preemption (Desktop)"
-+	bool "Voluntary Kernel Preemption (Nothing)"
- 	depends on !ARCH_NO_PREEMPT
- 	help
- 	  This option reduces the latency of the kernel by adding more
-@@ -33,7 +33,8 @@ config PREEMPT_VOLUNTARY
- 	  applications to run more 'smoothly' even when the system is
- 	  under load.
- 
--	  Select this if you are building a kernel for a desktop system.
-+	  Select this for no system in particular (choose Preemptible
-+	  instead on a desktop if you know what's good for you).
- 
- config PREEMPT
- 	bool "Preemptible Kernel (Low-Latency Desktop)"
-diff --git a/kernel/Makefile b/kernel/Makefile
-index daad787fb795..9bb44fc4ef5b 100644
---- a/kernel/Makefile
-+++ b/kernel/Makefile
-@@ -10,7 +10,7 @@ obj-y     = fork.o exec_domain.o panic.o \
- 	    extable.o params.o \
- 	    kthread.o sys_ni.o nsproxy.o \
- 	    notifier.o ksysfs.o cred.o reboot.o \
--	    async.o range.o smpboot.o ucount.o
-+	    async.o range.o smpboot.o ucount.o skip_list.o
- 
- obj-$(CONFIG_MODULES) += kmod.o
- obj-$(CONFIG_MULTIUSER) += groups.o
-diff --git a/kernel/delayacct.c b/kernel/delayacct.c
-index 27725754ac99..769d773c7182 100644
---- a/kernel/delayacct.c
-+++ b/kernel/delayacct.c
-@@ -106,7 +106,7 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
- 	 */
- 	t1 = tsk->sched_info.pcount;
- 	t2 = tsk->sched_info.run_delay;
--	t3 = tsk->se.sum_exec_runtime;
-+	t3 = tsk_seruntime(tsk);
- 
- 	d->cpu_count += t1;
- 
-diff --git a/kernel/exit.c b/kernel/exit.c
-index a46a50d67002..58043176b285 100644
---- a/kernel/exit.c
-+++ b/kernel/exit.c
-@@ -131,7 +131,7 @@ static void __exit_signal(struct task_struct *tsk)
- 			sig->curr_target = next_thread(tsk);
- 	}
- 
--	add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
-+	add_device_randomness((const void*) &tsk_seruntime(tsk),
- 			      sizeof(unsigned long long));
- 
- 	/*
-@@ -152,7 +152,7 @@ static void __exit_signal(struct task_struct *tsk)
- 	sig->inblock += task_io_get_inblock(tsk);
- 	sig->oublock += task_io_get_oublock(tsk);
- 	task_io_accounting_add(&sig->ioac, &tsk->ioac);
--	sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
-+	sig->sum_sched_runtime += tsk_seruntime(tsk);
- 	sig->nr_threads--;
- 	__unhash_process(tsk, group_dead);
- 	write_sequnlock(&sig->stats_lock);
-diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
-index f92d9a687372..d17db0ff775f 100644
---- a/kernel/irq/Kconfig
-+++ b/kernel/irq/Kconfig
-@@ -111,6 +111,23 @@ config GENERIC_IRQ_RESERVATION_MODE
- config IRQ_FORCED_THREADING
-        bool
- 
-+config FORCE_IRQ_THREADING
-+	bool "Make IRQ threading compulsory"
-+	depends on IRQ_FORCED_THREADING
-+	default n
-+	---help---
-+
-+	  Make IRQ threading mandatory for any IRQ handlers that support it
-+	  instead of being optional and requiring the threadirqs kernel
-+	  parameter. Instead they can be optionally disabled with the
-+	  nothreadirqs kernel parameter.
-+
-+	  Enabling this may make some architectures not boot with runqueue
-+	  sharing and MuQSS.
-+
-+	  Enable if you are building for a desktop or low latency system,
-+	  otherwise say N.
-+
- config SPARSE_IRQ
- 	bool "Support sparse irq numbering" if MAY_HAVE_SPARSE_IRQ
- 	---help---
-diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
-index 1753486b440c..f43423737493 100644
---- a/kernel/irq/manage.c
-+++ b/kernel/irq/manage.c
-@@ -24,9 +24,20 @@
- #include "internals.h"
- 
- #if defined(CONFIG_IRQ_FORCED_THREADING) && !defined(CONFIG_PREEMPT_RT)
-+#ifdef CONFIG_FORCE_IRQ_THREADING
-+__read_mostly bool force_irqthreads = true;
-+#else
- __read_mostly bool force_irqthreads;
-+#endif
- EXPORT_SYMBOL_GPL(force_irqthreads);
- 
-+static int __init setup_noforced_irqthreads(char *arg)
-+{
-+	force_irqthreads = false;
-+	return 0;
-+}
-+early_param("nothreadirqs", setup_noforced_irqthreads);
-+
- static int __init setup_forced_irqthreads(char *arg)
- {
- 	force_irqthreads = true;
-diff --git a/kernel/kthread.c b/kernel/kthread.c
-index b262f47046ca..9797ad652268 100644
---- a/kernel/kthread.c
-+++ b/kernel/kthread.c
-@@ -433,6 +433,34 @@ void kthread_bind(struct task_struct *p, unsigned int cpu)
- }
- EXPORT_SYMBOL(kthread_bind);
- 
-+#if defined(CONFIG_SCHED_MUQSS) && defined(CONFIG_SMP)
-+extern void __do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask);
-+
-+/*
-+ * new_kthread_bind is a special variant of __kthread_bind_mask.
-+ * For new threads to work on muqss we want to call do_set_cpus_allowed
-+ * without the task_cpu being set and the task rescheduled until they're
-+ * rescheduled on their own so we call __do_set_cpus_allowed directly which
-+ * only changes the cpumask. This is particularly important for smpboot threads
-+ * to work.
-+ */
-+static void new_kthread_bind(struct task_struct *p, unsigned int cpu)
-+{
-+	unsigned long flags;
-+
-+	if (WARN_ON(!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)))
-+		return;
-+
-+	/* It's safe because the task is inactive. */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	__do_set_cpus_allowed(p, cpumask_of(cpu));
-+	p->flags |= PF_NO_SETAFFINITY;
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+}
-+#else
-+#define new_kthread_bind(p, cpu) kthread_bind(p, cpu)
-+#endif
-+
- /**
-  * kthread_create_on_cpu - Create a cpu bound kthread
-  * @threadfn: the function to run until signal_pending(current).
-@@ -454,7 +482,7 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
- 				   cpu);
- 	if (IS_ERR(p))
- 		return p;
--	kthread_bind(p, cpu);
-+	new_kthread_bind(p, cpu);
- 	/* CPU hotplug need to bind once again when unparking the thread. */
- 	set_bit(KTHREAD_IS_PER_CPU, &to_kthread(p)->flags);
- 	to_kthread(p)->cpu = cpu;
-diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
-index cdf318d86dd6..304c0c8c2bea 100644
---- a/kernel/livepatch/transition.c
-+++ b/kernel/livepatch/transition.c
-@@ -282,7 +282,7 @@ static bool klp_try_switch_task(struct task_struct *task)
- {
- 	static char err_buf[STACK_ERR_BUF_SIZE];
- 	struct rq *rq;
--	struct rq_flags flags;
-+	struct rq_flags rf;
- 	int ret;
- 	bool success = false;
- 
-@@ -304,7 +304,7 @@ static bool klp_try_switch_task(struct task_struct *task)
- 	 * functions.  If all goes well, switch the task to the target patch
- 	 * state.
- 	 */
--	rq = task_rq_lock(task, &flags);
-+	rq = task_rq_lock(task, &rf);
- 
- 	if (task_running(rq, task) && task != current) {
- 		snprintf(err_buf, STACK_ERR_BUF_SIZE,
-@@ -323,7 +323,7 @@ static bool klp_try_switch_task(struct task_struct *task)
- 	task->patch_state = klp_target_state;
- 
- done:
--	task_rq_unlock(rq, task, &flags);
-+	task_rq_unlock(rq, task, &rf);
- 
- 	/*
- 	 * Due to console deadlock issues, pr_debug() can't be used while
-diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
-index 21fb5a5662b5..a04ffebc6b7a 100644
---- a/kernel/sched/Makefile
-+++ b/kernel/sched/Makefile
-@@ -16,15 +16,23 @@ ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
- CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
- endif
- 
-+ifdef CONFIG_SCHED_MUQSS
-+obj-y += MuQSS.o clock.o cputime.o
-+obj-y += idle.o
-+obj-y += wait.o wait_bit.o swait.o completion.o
-+
-+obj-$(CONFIG_SMP) += topology.o
-+else
- obj-y += core.o loadavg.o clock.o cputime.o
- obj-y += idle.o fair.o rt.o deadline.o
- obj-y += wait.o wait_bit.o swait.o completion.o
- 
- obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o
- obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
--obj-$(CONFIG_SCHEDSTATS) += stats.o
- obj-$(CONFIG_SCHED_DEBUG) += debug.o
- obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
-+endif
-+obj-$(CONFIG_SCHEDSTATS) += stats.o
- obj-$(CONFIG_CPU_FREQ) += cpufreq.o
- obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o
- obj-$(CONFIG_MEMBARRIER) += membarrier.o
-diff --git a/kernel/sched/MuQSS.c b/kernel/sched/MuQSS.c
-new file mode 100644
-index 000000000000..fafb5a790cf1
---- /dev/null
-+++ b/kernel/sched/MuQSS.c
-@@ -0,0 +1,7606 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ *  kernel/sched/MuQSS.c, was kernel/sched.c
-+ *
-+ *  Kernel scheduler and related syscalls
-+ *
-+ *  Copyright (C) 1991-2002  Linus Torvalds
-+ *
-+ *  1996-12-23  Modified by Dave Grothe to fix bugs in semaphores and
-+ *		make semaphores SMP safe
-+ *  1998-11-19	Implemented schedule_timeout() and related stuff
-+ *		by Andrea Arcangeli
-+ *  2002-01-04	New ultra-scalable O(1) scheduler by Ingo Molnar:
-+ *		hybrid priority-list and round-robin design with
-+ *		an array-switch method of distributing timeslices
-+ *		and per-CPU runqueues.  Cleanups and useful suggestions
-+ *		by Davide Libenzi, preemptible kernel bits by Robert Love.
-+ *  2003-09-03	Interactivity tuning by Con Kolivas.
-+ *  2004-04-02	Scheduler domains code by Nick Piggin
-+ *  2007-04-15  Work begun on replacing all interactivity tuning with a
-+ *              fair scheduling design by Con Kolivas.
-+ *  2007-05-05  Load balancing (smp-nice) and other improvements
-+ *              by Peter Williams
-+ *  2007-05-06  Interactivity improvements to CFS by Mike Galbraith
-+ *  2007-07-01  Group scheduling enhancements by Srivatsa Vaddagiri
-+ *  2007-11-29  RT balancing improvements by Steven Rostedt, Gregory Haskins,
-+ *              Thomas Gleixner, Mike Kravetz
-+ *  2009-08-13	Brainfuck deadline scheduling policy by Con Kolivas deletes
-+ *              a whole lot of those previous things.
-+ *  2016-10-01  Multiple Queue Skiplist Scheduler scalable evolution of BFS
-+ * 		scheduler by Con Kolivas.
-+ *  2019-08-31  LLC bits by Eduards Bezverhijs
-+ */
-+
-+#include <linux/sched/isolation.h>
-+#include <linux/sched/loadavg.h>
-+
-+#include <linux/binfmts.h>
-+#include <linux/blkdev.h>
-+#include <linux/compat.h>
-+#include <linux/context_tracking.h>
-+#include <linux/cpuset.h>
-+#include <linux/delayacct.h>
-+#include <linux/init_task.h>
-+#include <linux/kcov.h>
-+#include <linux/kprobes.h>
-+#include <linux/mmu_context.h>
-+#include <linux/module.h>
-+#include <linux/nmi.h>
-+#include <linux/prefetch.h>
-+#include <linux/profile.h>
-+#include <linux/rcupdate_wait.h>
-+#include <linux/sched.h>
-+#include <linux/security.h>
-+#include <linux/skip_list.h>
-+#include <linux/syscalls.h>
-+#include <linux/tick.h>
-+#include <linux/wait_bit.h>
-+
-+#include <asm/irq_regs.h>
-+#include <asm/switch_to.h>
-+#include <asm/tlb.h>
-+
-+#include "../workqueue_internal.h"
-+#include "../smpboot.h"
-+
-+#define CREATE_TRACE_POINTS
-+#include <trace/events/sched.h>
-+
-+#include "MuQSS.h"
-+
-+#define rt_prio(prio)		unlikely((prio) < MAX_RT_PRIO)
-+#define rt_task(p)		rt_prio((p)->prio)
-+#define batch_task(p)		(unlikely((p)->policy == SCHED_BATCH))
-+#define is_rt_policy(policy)	((policy) == SCHED_FIFO || \
-+					(policy) == SCHED_RR)
-+#define has_rt_policy(p)	unlikely(is_rt_policy((p)->policy))
-+
-+#define is_idle_policy(policy)	((policy) == SCHED_IDLEPRIO)
-+#define idleprio_task(p)	unlikely(is_idle_policy((p)->policy))
-+#define task_running_idle(p)	unlikely((p)->prio == IDLE_PRIO)
-+
-+#define is_iso_policy(policy)	((policy) == SCHED_ISO)
-+#define iso_task(p)		unlikely(is_iso_policy((p)->policy))
-+#define task_running_iso(p)	unlikely((p)->prio == ISO_PRIO)
-+
-+#define rq_idle(rq)		((rq)->rq_prio == PRIO_LIMIT)
-+
-+#define ISO_PERIOD		(5 * HZ)
-+
-+#define STOP_PRIO		(MAX_RT_PRIO - 1)
-+
-+/*
-+ * Some helpers for converting to/from various scales. Use shifts to get
-+ * approximate multiples of ten for less overhead.
-+ */
-+#define APPROX_NS_PS		(1073741824) /* Approximate ns per second */
-+#define JIFFIES_TO_NS(TIME)	((TIME) * (APPROX_NS_PS / HZ))
-+#define JIFFY_NS		(APPROX_NS_PS / HZ)
-+#define JIFFY_US		(1048576 / HZ)
-+#define NS_TO_JIFFIES(TIME)	((TIME) / JIFFY_NS)
-+#define HALF_JIFFY_NS		(APPROX_NS_PS / HZ / 2)
-+#define HALF_JIFFY_US		(1048576 / HZ / 2)
-+#define MS_TO_NS(TIME)		((TIME) << 20)
-+#define MS_TO_US(TIME)		((TIME) << 10)
-+#define NS_TO_MS(TIME)		((TIME) >> 20)
-+#define NS_TO_US(TIME)		((TIME) >> 10)
-+#define US_TO_NS(TIME)		((TIME) << 10)
-+#define TICK_APPROX_NS		((APPROX_NS_PS+HZ/2)/HZ)
-+
-+#define RESCHED_US	(100) /* Reschedule if less than this many μs left */
-+
-+void print_scheduler_version(void)
-+{
-+	printk(KERN_INFO "MuQSS CPU scheduler v0.196 by Con Kolivas.\n");
-+}
-+
-+/* Define RQ share levels */
-+#define RQSHARE_NONE 0
-+#define RQSHARE_SMT 1
-+#define RQSHARE_MC 2
-+#define RQSHARE_MC_LLC 3
-+#define RQSHARE_SMP 4
-+#define RQSHARE_ALL 5
-+
-+/* Define locality levels */
-+#define LOCALITY_SAME 0
-+#define LOCALITY_SMT 1
-+#define LOCALITY_MC_LLC 2
-+#define LOCALITY_MC 3
-+#define LOCALITY_SMP 4
-+#define LOCALITY_DISTANT 5
-+
-+/*
-+ * This determines what level of runqueue sharing will be done and is
-+ * configurable at boot time with the bootparam rqshare =
-+ */
-+static int rqshare __read_mostly = CONFIG_SHARERQ; /* Default RQSHARE_MC */
-+
-+static int __init set_rqshare(char *str)
-+{
-+	if (!strncmp(str, "none", 4)) {
-+		rqshare = RQSHARE_NONE;
-+		return 0;
-+	}
-+	if (!strncmp(str, "smt", 3)) {
-+		rqshare = RQSHARE_SMT;
-+		return 0;
-+	}
-+	if (!strncmp(str, "mc", 2)) {
-+		rqshare = RQSHARE_MC;
-+		return 0;
-+	}
-+	if (!strncmp(str, "llc", 3)) {
-+		rqshare = RQSHARE_MC_LLC;
-+		return 0;
-+	}
-+	if (!strncmp(str, "smp", 3)) {
-+		rqshare = RQSHARE_SMP;
-+		return 0;
-+	}
-+	if (!strncmp(str, "all", 3)) {
-+		rqshare = RQSHARE_ALL;
-+		return 0;
-+	}
-+	return 1;
-+}
-+__setup("rqshare=", set_rqshare);
-+
-+/*
-+ * This is the time all tasks within the same priority round robin.
-+ * Value is in ms and set to a minimum of 6ms.
-+ * Tunable via /proc interface.
-+ */
-+int rr_interval __read_mostly = 6;
-+
-+/*
-+ * Tunable to choose whether to prioritise latency or throughput, simple
-+ * binary yes or no
-+ */
-+int sched_interactive __read_mostly = 1;
-+
-+/*
-+ * sched_iso_cpu - sysctl which determines the cpu percentage SCHED_ISO tasks
-+ * are allowed to run five seconds as real time tasks. This is the total over
-+ * all online cpus.
-+ */
-+int sched_iso_cpu __read_mostly = 70;
-+
-+/*
-+ * sched_yield_type - Choose what sort of yield sched_yield will perform.
-+ * 0: No yield.
-+ * 1: Yield only to better priority/deadline tasks. (default)
-+ * 2: Expire timeslice and recalculate deadline.
-+ */
-+int sched_yield_type __read_mostly = 1;
-+
-+/*
-+ * The relative length of deadline for each priority(nice) level.
-+ */
-+static int prio_ratios[NICE_WIDTH] __read_mostly;
-+
-+
-+/*
-+ * The quota handed out to tasks of all priority levels when refilling their
-+ * time_slice.
-+ */
-+static inline int timeslice(void)
-+{
-+	return MS_TO_US(rr_interval);
-+}
-+
-+DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
-+
-+#ifdef CONFIG_SMP
-+/*
-+ * Total number of runqueues. Equals number of CPUs when there is no runqueue
-+ * sharing but is usually less with SMT/MC sharing of runqueues.
-+ */
-+static int total_runqueues __read_mostly = 1;
-+
-+static cpumask_t cpu_idle_map ____cacheline_aligned_in_smp;
-+
-+struct rq *cpu_rq(int cpu)
-+{
-+	return &per_cpu(runqueues, (cpu));
-+}
-+#define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
-+
-+/*
-+ * For asym packing, by default the lower numbered cpu has higher priority.
-+ */
-+int __weak arch_asym_cpu_priority(int cpu)
-+{
-+	return -cpu;
-+}
-+
-+int __weak arch_sd_sibling_asym_packing(void)
-+{
-+       return 0*SD_ASYM_PACKING;
-+}
-+
-+#ifdef CONFIG_SCHED_SMT
-+DEFINE_STATIC_KEY_FALSE(sched_smt_present);
-+EXPORT_SYMBOL_GPL(sched_smt_present);
-+#endif
-+
-+#else
-+struct rq *uprq;
-+#endif /* CONFIG_SMP */
-+
-+#include "stats.h"
-+
-+/*
-+ * All common locking functions performed on rq->lock. rq->clock is local to
-+ * the CPU accessing it so it can be modified just with interrupts disabled
-+ * when we're not updating niffies.
-+ * Looking up task_rq must be done under rq->lock to be safe.
-+ */
-+
-+/*
-+ * RQ-clock updating methods:
-+ */
-+
-+#ifdef HAVE_SCHED_AVG_IRQ
-+static void update_irq_load_avg(struct rq *rq, long delta);
-+#else
-+static inline void update_irq_load_avg(struct rq *rq, long delta) {}
-+#endif
-+
-+static void update_rq_clock_task(struct rq *rq, s64 delta)
-+{
-+/*
-+ * In theory, the compile should just see 0 here, and optimize out the call
-+ * to sched_rt_avg_update. But I don't trust it...
-+ */
-+	s64 __maybe_unused steal = 0, irq_delta = 0;
-+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-+	irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
-+
-+	/*
-+	 * Since irq_time is only updated on {soft,}irq_exit, we might run into
-+	 * this case when a previous update_rq_clock() happened inside a
-+	 * {soft,}irq region.
-+	 *
-+	 * When this happens, we stop ->clock_task and only update the
-+	 * prev_irq_time stamp to account for the part that fit, so that a next
-+	 * update will consume the rest. This ensures ->clock_task is
-+	 * monotonic.
-+	 *
-+	 * It does however cause some slight miss-attribution of {soft,}irq
-+	 * time, a more accurate solution would be to update the irq_time using
-+	 * the current rq->clock timestamp, except that would require using
-+	 * atomic ops.
-+	 */
-+	if (irq_delta > delta)
-+		irq_delta = delta;
-+
-+	rq->prev_irq_time += irq_delta;
-+	delta -= irq_delta;
-+#endif
-+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
-+	if (static_key_false((&paravirt_steal_rq_enabled))) {
-+		steal = paravirt_steal_clock(cpu_of(rq));
-+		steal -= rq->prev_steal_time_rq;
-+
-+		if (unlikely(steal > delta))
-+			steal = delta;
-+
-+		rq->prev_steal_time_rq += steal;
-+		delta -= steal;
-+	}
-+#endif
-+	rq->clock_task += delta;
-+
-+#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
-+	if (irq_delta + steal)
-+		update_irq_load_avg(rq, irq_delta + steal);
-+#endif
-+}
-+
-+static inline void update_rq_clock(struct rq *rq)
-+{
-+	s64 delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
-+
-+	if (unlikely(delta < 0))
-+		return;
-+	rq->clock += delta;
-+	update_rq_clock_task(rq, delta);
-+}
-+
-+/*
-+ * Niffies are a globally increasing nanosecond counter. They're only used by
-+ * update_load_avg and time_slice_expired, however deadlines are based on them
-+ * across CPUs. Update them whenever we will call one of those functions, and
-+ * synchronise them across CPUs whenever we hold both runqueue locks.
-+ */
-+static inline void update_clocks(struct rq *rq)
-+{
-+	s64 ndiff, minndiff;
-+	long jdiff;
-+
-+	update_rq_clock(rq);
-+	ndiff = rq->clock - rq->old_clock;
-+	rq->old_clock = rq->clock;
-+	jdiff = jiffies - rq->last_jiffy;
-+
-+	/* Subtract any niffies added by balancing with other rqs */
-+	ndiff -= rq->niffies - rq->last_niffy;
-+	minndiff = JIFFIES_TO_NS(jdiff) - rq->niffies + rq->last_jiffy_niffies;
-+	if (minndiff < 0)
-+		minndiff = 0;
-+	ndiff = max(ndiff, minndiff);
-+	rq->niffies += ndiff;
-+	rq->last_niffy = rq->niffies;
-+	if (jdiff) {
-+		rq->last_jiffy += jdiff;
-+		rq->last_jiffy_niffies = rq->niffies;
-+	}
-+}
-+
-+/*
-+ * Any time we have two runqueues locked we use that as an opportunity to
-+ * synchronise niffies to the highest value as idle ticks may have artificially
-+ * kept niffies low on one CPU and the truth can only be later.
-+ */
-+static inline void synchronise_niffies(struct rq *rq1, struct rq *rq2)
-+{
-+	if (rq1->niffies > rq2->niffies)
-+		rq2->niffies = rq1->niffies;
-+	else
-+		rq1->niffies = rq2->niffies;
-+}
-+
-+/*
-+ * double_rq_lock - safely lock two runqueues
-+ *
-+ * Note this does not disable interrupts like task_rq_lock,
-+ * you need to do so manually before calling.
-+ */
-+
-+/* For when we know rq1 != rq2 */
-+static inline void __double_rq_lock(struct rq *rq1, struct rq *rq2)
-+	__acquires(rq1->lock)
-+	__acquires(rq2->lock)
-+{
-+	if (rq1 < rq2) {
-+		raw_spin_lock(rq1->lock);
-+		raw_spin_lock_nested(rq2->lock, SINGLE_DEPTH_NESTING);
-+	} else {
-+		raw_spin_lock(rq2->lock);
-+		raw_spin_lock_nested(rq1->lock, SINGLE_DEPTH_NESTING);
-+	}
-+}
-+
-+static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
-+	__acquires(rq1->lock)
-+	__acquires(rq2->lock)
-+{
-+	BUG_ON(!irqs_disabled());
-+	if (rq1->lock == rq2->lock) {
-+		raw_spin_lock(rq1->lock);
-+		__acquire(rq2->lock);	/* Fake it out ;) */
-+	} else
-+		__double_rq_lock(rq1, rq2);
-+	synchronise_niffies(rq1, rq2);
-+}
-+
-+/*
-+ * double_rq_unlock - safely unlock two runqueues
-+ *
-+ * Note this does not restore interrupts like task_rq_unlock,
-+ * you need to do so manually after calling.
-+ */
-+static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
-+	__releases(rq1->lock)
-+	__releases(rq2->lock)
-+{
-+	raw_spin_unlock(rq1->lock);
-+	if (rq1->lock != rq2->lock)
-+		raw_spin_unlock(rq2->lock);
-+	else
-+		__release(rq2->lock);
-+}
-+
-+static inline void lock_all_rqs(void)
-+{
-+	int cpu;
-+
-+	preempt_disable();
-+	for_each_possible_cpu(cpu) {
-+		struct rq *rq = cpu_rq(cpu);
-+
-+		do_raw_spin_lock(rq->lock);
-+	}
-+}
-+
-+static inline void unlock_all_rqs(void)
-+{
-+	int cpu;
-+
-+	for_each_possible_cpu(cpu) {
-+		struct rq *rq = cpu_rq(cpu);
-+
-+		do_raw_spin_unlock(rq->lock);
-+	}
-+	preempt_enable();
-+}
-+
-+/* Specially nest trylock an rq */
-+static inline bool trylock_rq(struct rq *this_rq, struct rq *rq)
-+{
-+	if (unlikely(!do_raw_spin_trylock(rq->lock)))
-+		return false;
-+	spin_acquire(&rq->lock->dep_map, SINGLE_DEPTH_NESTING, 1, _RET_IP_);
-+	synchronise_niffies(this_rq, rq);
-+	return true;
-+}
-+
-+/* Unlock a specially nested trylocked rq */
-+static inline void unlock_rq(struct rq *rq)
-+{
-+	spin_release(&rq->lock->dep_map, 1, _RET_IP_);
-+	do_raw_spin_unlock(rq->lock);
-+}
-+
-+/*
-+ * cmpxchg based fetch_or, macro so it works for different integer types
-+ */
-+#define fetch_or(ptr, mask)						\
-+	({								\
-+		typeof(ptr) _ptr = (ptr);				\
-+		typeof(mask) _mask = (mask);				\
-+		typeof(*_ptr) _old, _val = *_ptr;			\
-+									\
-+		for (;;) {						\
-+			_old = cmpxchg(_ptr, _val, _val | _mask);	\
-+			if (_old == _val)				\
-+				break;					\
-+			_val = _old;					\
-+		}							\
-+	_old;								\
-+})
-+
-+#if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
-+/*
-+ * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG,
-+ * this avoids any races wrt polling state changes and thereby avoids
-+ * spurious IPIs.
-+ */
-+static bool set_nr_and_not_polling(struct task_struct *p)
-+{
-+	struct thread_info *ti = task_thread_info(p);
-+	return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG);
-+}
-+
-+/*
-+ * Atomically set TIF_NEED_RESCHED if TIF_POLLING_NRFLAG is set.
-+ *
-+ * If this returns true, then the idle task promises to call
-+ * sched_ttwu_pending() and reschedule soon.
-+ */
-+static bool set_nr_if_polling(struct task_struct *p)
-+{
-+	struct thread_info *ti = task_thread_info(p);
-+	typeof(ti->flags) old, val = READ_ONCE(ti->flags);
-+
-+	for (;;) {
-+		if (!(val & _TIF_POLLING_NRFLAG))
-+			return false;
-+		if (val & _TIF_NEED_RESCHED)
-+			return true;
-+		old = cmpxchg(&ti->flags, val, val | _TIF_NEED_RESCHED);
-+		if (old == val)
-+			break;
-+		val = old;
-+	}
-+	return true;
-+}
-+
-+#else
-+static bool set_nr_and_not_polling(struct task_struct *p)
-+{
-+	set_tsk_need_resched(p);
-+	return true;
-+}
-+
-+#ifdef CONFIG_SMP
-+static bool set_nr_if_polling(struct task_struct *p)
-+{
-+	return false;
-+}
-+#endif
-+#endif
-+
-+static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task)
-+{
-+	struct wake_q_node *node = &task->wake_q;
-+
-+	/*
-+	 * Atomically grab the task, if ->wake_q is !nil already it means
-+	 * its already queued (either by us or someone else) and will get the
-+	 * wakeup due to that.
-+	 *
-+	 * In order to ensure that a pending wakeup will observe our pending
-+	 * state, even in the failed case, an explicit smp_mb() must be used.
-+	 */
-+	smp_mb__before_atomic();
-+	if (unlikely(cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL)))
-+		return false;
-+
-+	/*
-+	 * The head is context local, there can be no concurrency.
-+	 */
-+	*head->lastp = node;
-+	head->lastp = &node->next;
-+	return true;
-+}
-+
-+/**
-+ * wake_q_add() - queue a wakeup for 'later' waking.
-+ * @head: the wake_q_head to add @task to
-+ * @task: the task to queue for 'later' wakeup
-+ *
-+ * Queue a task for later wakeup, most likely by the wake_up_q() call in the
-+ * same context, _HOWEVER_ this is not guaranteed, the wakeup can come
-+ * instantly.
-+ *
-+ * This function must be used as-if it were wake_up_process(); IOW the task
-+ * must be ready to be woken at this location.
-+ */
-+void wake_q_add(struct wake_q_head *head, struct task_struct *task)
-+{
-+	if (__wake_q_add(head, task))
-+		get_task_struct(task);
-+}
-+
-+/**
-+ * wake_q_add_safe() - safely queue a wakeup for 'later' waking.
-+ * @head: the wake_q_head to add @task to
-+ * @task: the task to queue for 'later' wakeup
-+ *
-+ * Queue a task for later wakeup, most likely by the wake_up_q() call in the
-+ * same context, _HOWEVER_ this is not guaranteed, the wakeup can come
-+ * instantly.
-+ *
-+ * This function must be used as-if it were wake_up_process(); IOW the task
-+ * must be ready to be woken at this location.
-+ *
-+ * This function is essentially a task-safe equivalent to wake_q_add(). Callers
-+ * that already hold reference to @task can call the 'safe' version and trust
-+ * wake_q to do the right thing depending whether or not the @task is already
-+ * queued for wakeup.
-+ */
-+void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task)
-+{
-+	if (!__wake_q_add(head, task))
-+		put_task_struct(task);
-+}
-+
-+void wake_up_q(struct wake_q_head *head)
-+{
-+	struct wake_q_node *node = head->first;
-+
-+	while (node != WAKE_Q_TAIL) {
-+		struct task_struct *task;
-+
-+		task = container_of(node, struct task_struct, wake_q);
-+		BUG_ON(!task);
-+		/* Task can safely be re-inserted now */
-+		node = node->next;
-+		task->wake_q.next = NULL;
-+
-+		/*
-+		 * wake_up_process() executes a full barrier, which pairs with
-+		 * the queueing in wake_q_add() so as not to miss wakeups.
-+		 */
-+		wake_up_process(task);
-+		put_task_struct(task);
-+	}
-+}
-+
-+static inline void smp_sched_reschedule(int cpu)
-+{
-+	if (likely(cpu_online(cpu)))
-+		smp_send_reschedule(cpu);
-+}
-+
-+/*
-+ * resched_task - mark a task 'to be rescheduled now'.
-+ *
-+ * On UP this means the setting of the need_resched flag, on SMP it
-+ * might also involve a cross-CPU call to trigger the scheduler on
-+ * the target CPU.
-+ */
-+void resched_task(struct task_struct *p)
-+{
-+	int cpu;
-+#ifdef CONFIG_LOCKDEP
-+	/* Kernel threads call this when creating workqueues while still
-+	 * inactive from __kthread_bind_mask, holding only the pi_lock */
-+	if (!(p->flags & PF_KTHREAD)) {
-+		struct rq *rq = task_rq(p);
-+
-+		lockdep_assert_held(rq->lock);
-+	}
-+#endif
-+	if (test_tsk_need_resched(p))
-+		return;
-+
-+	cpu = task_cpu(p);
-+	if (cpu == smp_processor_id()) {
-+		set_tsk_need_resched(p);
-+		set_preempt_need_resched();
-+		return;
-+	}
-+
-+	if (set_nr_and_not_polling(p))
-+		smp_sched_reschedule(cpu);
-+	else
-+		trace_sched_wake_idle_without_ipi(cpu);
-+}
-+
-+/*
-+ * A task that is not running or queued will not have a node set.
-+ * A task that is queued but not running will have a node set.
-+ * A task that is currently running will have ->on_cpu set but no node set.
-+ */
-+static inline bool task_queued(struct task_struct *p)
-+{
-+	return !skiplist_node_empty(&p->node);
-+}
-+
-+static void enqueue_task(struct rq *rq, struct task_struct *p, int flags);
-+static inline void resched_if_idle(struct rq *rq);
-+
-+/* Dodgy workaround till we figure out where the softirqs are going */
-+static inline void do_pending_softirq(struct rq *rq, struct task_struct *next)
-+{
-+	if (unlikely(next == rq->idle && local_softirq_pending() && !in_interrupt()))
-+		do_softirq_own_stack();
-+}
-+
-+static inline bool deadline_before(u64 deadline, u64 time)
-+{
-+	return (deadline < time);
-+}
-+
-+/*
-+ * Deadline is "now" in niffies + (offset by priority). Setting the deadline
-+ * is the key to everything. It distributes cpu fairly amongst tasks of the
-+ * same nice value, it proportions cpu according to nice level, it means the
-+ * task that last woke up the longest ago has the earliest deadline, thus
-+ * ensuring that interactive tasks get low latency on wake up. The CPU
-+ * proportion works out to the square of the virtual deadline difference, so
-+ * this equation will give nice 19 3% CPU compared to nice 0.
-+ */
-+static inline u64 prio_deadline_diff(int user_prio)
-+{
-+	return (prio_ratios[user_prio] * rr_interval * (MS_TO_NS(1) / 128));
-+}
-+
-+static inline u64 task_deadline_diff(struct task_struct *p)
-+{
-+	return prio_deadline_diff(TASK_USER_PRIO(p));
-+}
-+
-+static inline u64 static_deadline_diff(int static_prio)
-+{
-+	return prio_deadline_diff(USER_PRIO(static_prio));
-+}
-+
-+static inline int longest_deadline_diff(void)
-+{
-+	return prio_deadline_diff(39);
-+}
-+
-+static inline int ms_longest_deadline_diff(void)
-+{
-+	return NS_TO_MS(longest_deadline_diff());
-+}
-+
-+static inline bool rq_local(struct rq *rq);
-+
-+#ifndef SCHED_CAPACITY_SCALE
-+#define SCHED_CAPACITY_SCALE 1024
-+#endif
-+
-+static inline int rq_load(struct rq *rq)
-+{
-+	return rq->nr_running;
-+}
-+
-+/*
-+ * Update the load average for feeding into cpu frequency governors. Use a
-+ * rough estimate of a rolling average with ~ time constant of 32ms.
-+ * 80/128 ~ 0.63. * 80 / 32768 / 128 == * 5 / 262144
-+ * Make sure a call to update_clocks has been made before calling this to get
-+ * an updated rq->niffies.
-+ */
-+static void update_load_avg(struct rq *rq, unsigned int flags)
-+{
-+	long us_interval, load;
-+	unsigned long curload;
-+
-+	us_interval = NS_TO_US(rq->niffies - rq->load_update);
-+	if (unlikely(us_interval <= 0))
-+		return;
-+
-+	curload = rq_load(rq);
-+	load = rq->load_avg - (rq->load_avg * us_interval * 5 / 262144);
-+	if (unlikely(load < 0))
-+		load = 0;
-+	load += curload * curload * SCHED_CAPACITY_SCALE * us_interval * 5 / 262144;
-+	rq->load_avg = load;
-+
-+	rq->load_update = rq->niffies;
-+	update_irq_load_avg(rq, 0);
-+	if (likely(rq_local(rq)))
-+		cpufreq_trigger(rq, flags);
-+}
-+
-+#ifdef HAVE_SCHED_AVG_IRQ
-+/*
-+ * IRQ variant of update_load_avg below. delta is actually time in nanoseconds
-+ * here so we scale curload to how long it's been since the last update.
-+ */
-+static void update_irq_load_avg(struct rq *rq, long delta)
-+{
-+	long us_interval, load;
-+	unsigned long curload;
-+
-+	us_interval = NS_TO_US(rq->niffies - rq->irq_load_update);
-+	if (unlikely(us_interval <= 0))
-+		return;
-+
-+	curload = NS_TO_US(delta) / us_interval;
-+	load = rq->irq_load_avg - (rq->irq_load_avg * us_interval * 5 / 262144);
-+	if (unlikely(load < 0))
-+		load = 0;
-+	load += curload * curload * SCHED_CAPACITY_SCALE * us_interval * 5 / 262144;
-+	rq->irq_load_avg = load;
-+
-+	rq->irq_load_update = rq->niffies;
-+}
-+#endif
-+
-+/*
-+ * Removing from the runqueue. Enter with rq locked. Deleting a task
-+ * from the skip list is done via the stored node reference in the task struct
-+ * and does not require a full look up. Thus it occurs in O(k) time where k
-+ * is the "level" of the list the task was stored at - usually < 4, max 8.
-+ */
-+static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
-+{
-+	skiplist_delete(rq->sl, &p->node);
-+	rq->best_key = rq->node->next[0]->key;
-+	update_clocks(rq);
-+
-+	if (!(flags & DEQUEUE_SAVE)) {
-+		sched_info_dequeued(rq, p);
-+		psi_dequeue(p, flags & DEQUEUE_SLEEP);
-+	}
-+	rq->nr_running--;
-+	if (rt_task(p))
-+		rq->rt_nr_running--;
-+	update_load_avg(rq, flags);
-+}
-+
-+#ifdef CONFIG_PREEMPT_RCU
-+static bool rcu_read_critical(struct task_struct *p)
-+{
-+	return p->rcu_read_unlock_special.b.blocked;
-+}
-+#else /* CONFIG_PREEMPT_RCU */
-+#define rcu_read_critical(p) (false)
-+#endif /* CONFIG_PREEMPT_RCU */
-+
-+/*
-+ * To determine if it's safe for a task of SCHED_IDLEPRIO to actually run as
-+ * an idle task, we ensure none of the following conditions are met.
-+ */
-+static bool idleprio_suitable(struct task_struct *p)
-+{
-+	return (!(task_contributes_to_load(p)) && !(p->flags & (PF_EXITING)) &&
-+		!signal_pending(p) && !rcu_read_critical(p) && !freezing(p));
-+}
-+
-+/*
-+ * To determine if a task of SCHED_ISO can run in pseudo-realtime, we check
-+ * that the iso_refractory flag is not set.
-+ */
-+static inline bool isoprio_suitable(struct rq *rq)
-+{
-+	return !rq->iso_refractory;
-+}
-+
-+/*
-+ * Adding to the runqueue. Enter with rq locked.
-+ */
-+static void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
-+{
-+	unsigned int randseed, cflags = 0;
-+	u64 sl_id;
-+
-+	if (!rt_task(p)) {
-+		/* Check it hasn't gotten rt from PI */
-+		if ((idleprio_task(p) && idleprio_suitable(p)) ||
-+		   (iso_task(p) && isoprio_suitable(rq)))
-+			p->prio = p->normal_prio;
-+		else
-+			p->prio = NORMAL_PRIO;
-+	} else
-+		rq->rt_nr_running++;
-+	/*
-+	 * The sl_id key passed to the skiplist generates a sorted list.
-+	 * Realtime and sched iso tasks run FIFO so they only need be sorted
-+	 * according to priority. The skiplist will put tasks of the same
-+	 * key inserted later in FIFO order. Tasks of sched normal, batch
-+	 * and idleprio are sorted according to their deadlines. Idleprio
-+	 * tasks are offset by an impossibly large deadline value ensuring
-+	 * they get sorted into last positions, but still according to their
-+	 * own deadlines. This creates a "landscape" of skiplists running
-+	 * from priority 0 realtime in first place to the lowest priority
-+	 * idleprio tasks last. Skiplist insertion is an O(log n) process.
-+	 */
-+	if (p->prio <= ISO_PRIO) {
-+		sl_id = p->prio;
-+	} else {
-+		sl_id = p->deadline;
-+		if (idleprio_task(p)) {
-+			if (p->prio == IDLE_PRIO)
-+				sl_id |= 0xF000000000000000;
-+			else
-+				sl_id += longest_deadline_diff();
-+		}
-+	}
-+	/*
-+	 * Some architectures don't have better than microsecond resolution
-+	 * so mask out ~microseconds as the random seed for skiplist insertion.
-+	 */
-+	update_clocks(rq);
-+	if (!(flags & ENQUEUE_RESTORE)) {
-+		sched_info_queued(rq, p);
-+		psi_enqueue(p, flags & ENQUEUE_WAKEUP);
-+	}
-+
-+	randseed = (rq->niffies >> 10) & 0xFFFFFFFF;
-+	skiplist_insert(rq->sl, &p->node, sl_id, p, randseed);
-+	rq->best_key = rq->node->next[0]->key;
-+	if (p->in_iowait)
-+		cflags |= SCHED_CPUFREQ_IOWAIT;
-+	rq->nr_running++;
-+	update_load_avg(rq, cflags);
-+}
-+
-+/*
-+ * Returns the relative length of deadline all compared to the shortest
-+ * deadline which is that of nice -20.
-+ */
-+static inline int task_prio_ratio(struct task_struct *p)
-+{
-+	return prio_ratios[TASK_USER_PRIO(p)];
-+}
-+
-+/*
-+ * task_timeslice - all tasks of all priorities get the exact same timeslice
-+ * length. CPU distribution is handled by giving different deadlines to
-+ * tasks of different priorities. Use 128 as the base value for fast shifts.
-+ */
-+static inline int task_timeslice(struct task_struct *p)
-+{
-+	return (rr_interval * task_prio_ratio(p) / 128);
-+}
-+
-+#ifdef CONFIG_SMP
-+/* Entered with rq locked */
-+static inline void resched_if_idle(struct rq *rq)
-+{
-+	if (rq_idle(rq))
-+		resched_task(rq->curr);
-+}
-+
-+static inline bool rq_local(struct rq *rq)
-+{
-+	return (rq->cpu == smp_processor_id());
-+}
-+#ifdef CONFIG_SMT_NICE
-+static const cpumask_t *thread_cpumask(int cpu);
-+
-+/* Find the best real time priority running on any SMT siblings of cpu and if
-+ * none are running, the static priority of the best deadline task running.
-+ * The lookups to the other runqueues is done lockless as the occasional wrong
-+ * value would be harmless. */
-+static int best_smt_bias(struct rq *this_rq)
-+{
-+	int other_cpu, best_bias = 0;
-+
-+	for_each_cpu(other_cpu, &this_rq->thread_mask) {
-+		struct rq *rq = cpu_rq(other_cpu);
-+
-+		if (rq_idle(rq))
-+			continue;
-+		if (unlikely(!rq->online))
-+			continue;
-+		if (!rq->rq_mm)
-+			continue;
-+		if (likely(rq->rq_smt_bias > best_bias))
-+			best_bias = rq->rq_smt_bias;
-+	}
-+	return best_bias;
-+}
-+
-+static int task_prio_bias(struct task_struct *p)
-+{
-+	if (rt_task(p))
-+		return 1 << 30;
-+	else if (task_running_iso(p))
-+		return 1 << 29;
-+	else if (task_running_idle(p))
-+		return 0;
-+	return MAX_PRIO - p->static_prio;
-+}
-+
-+static bool smt_always_schedule(struct task_struct __maybe_unused *p, struct rq __maybe_unused *this_rq)
-+{
-+	return true;
-+}
-+
-+static bool (*smt_schedule)(struct task_struct *p, struct rq *this_rq) = &smt_always_schedule;
-+
-+/* We've already decided p can run on CPU, now test if it shouldn't for SMT
-+ * nice reasons. */
-+static bool smt_should_schedule(struct task_struct *p, struct rq *this_rq)
-+{
-+	int best_bias, task_bias;
-+
-+	/* Kernel threads always run */
-+	if (unlikely(!p->mm))
-+		return true;
-+	if (rt_task(p))
-+		return true;
-+	if (!idleprio_suitable(p))
-+		return true;
-+	best_bias = best_smt_bias(this_rq);
-+	/* The smt siblings are all idle or running IDLEPRIO */
-+	if (best_bias < 1)
-+		return true;
-+	task_bias = task_prio_bias(p);
-+	if (task_bias < 1)
-+		return false;
-+	if (task_bias >= best_bias)
-+		return true;
-+	/* Dither 25% cpu of normal tasks regardless of nice difference */
-+	if (best_bias % 4 == 1)
-+		return true;
-+	/* Sorry, you lose */
-+	return false;
-+}
-+#else /* CONFIG_SMT_NICE */
-+#define smt_schedule(p, this_rq) (true)
-+#endif /* CONFIG_SMT_NICE */
-+
-+static inline void atomic_set_cpu(int cpu, cpumask_t *cpumask)
-+{
-+	set_bit(cpu, (volatile unsigned long *)cpumask);
-+}
-+
-+/*
-+ * The cpu_idle_map stores a bitmap of all the CPUs currently idle to
-+ * allow easy lookup of whether any suitable idle CPUs are available.
-+ * It's cheaper to maintain a binary yes/no if there are any idle CPUs on the
-+ * idle_cpus variable than to do a full bitmask check when we are busy. The
-+ * bits are set atomically but read locklessly as occasional false positive /
-+ * negative is harmless.
-+ */
-+static inline void set_cpuidle_map(int cpu)
-+{
-+	if (likely(cpu_online(cpu)))
-+		atomic_set_cpu(cpu, &cpu_idle_map);
-+}
-+
-+static inline void atomic_clear_cpu(int cpu, cpumask_t *cpumask)
-+{
-+	clear_bit(cpu, (volatile unsigned long *)cpumask);
-+}
-+
-+static inline void clear_cpuidle_map(int cpu)
-+{
-+	atomic_clear_cpu(cpu, &cpu_idle_map);
-+}
-+
-+static bool suitable_idle_cpus(struct task_struct *p)
-+{
-+	return (cpumask_intersects(p->cpus_ptr, &cpu_idle_map));
-+}
-+
-+/*
-+ * Resched current on rq. We don't know if rq is local to this CPU nor if it
-+ * is locked so we do not use an intermediate variable for the task to avoid
-+ * having it dereferenced.
-+ */
-+static void resched_curr(struct rq *rq)
-+{
-+	int cpu;
-+
-+	if (test_tsk_need_resched(rq->curr))
-+		return;
-+
-+	rq->preempt = rq->curr;
-+	cpu = rq->cpu;
-+
-+	/* We're doing this without holding the rq lock if it's not task_rq */
-+
-+	if (cpu == smp_processor_id()) {
-+		set_tsk_need_resched(rq->curr);
-+		set_preempt_need_resched();
-+		return;
-+	}
-+
-+	if (set_nr_and_not_polling(rq->curr))
-+		smp_sched_reschedule(cpu);
-+	else
-+		trace_sched_wake_idle_without_ipi(cpu);
-+}
-+
-+#define CPUIDLE_DIFF_THREAD     (1)
-+#define CPUIDLE_DIFF_CORE_LLC   (2)
-+#define CPUIDLE_DIFF_CORE       (4)
-+#define CPUIDLE_CACHE_BUSY      (8)
-+#define CPUIDLE_DIFF_CPU        (16)
-+#define CPUIDLE_THREAD_BUSY     (32)
-+#define CPUIDLE_DIFF_NODE       (64)
-+
-+/*
-+ * The best idle CPU is chosen according to the CPUIDLE ranking above where the
-+ * lowest value would give the most suitable CPU to schedule p onto next. The
-+ * order works out to be the following:
-+ *
-+ * Same thread, idle or busy cache, idle or busy threads
-+ * Other core, same cache, idle or busy cache, idle threads.
-+ * Same node, other CPU, idle cache, idle threads.
-+ * Same node, other CPU, busy cache, idle threads.
-+ * Other core, same cache, busy threads.
-+ * Same node, other CPU, busy threads.
-+ * Other node, other CPU, idle cache, idle threads.
-+ * Other node, other CPU, busy cache, idle threads.
-+ * Other node, other CPU, busy threads.
-+ */
-+static int best_mask_cpu(int best_cpu, struct rq *rq, cpumask_t *tmpmask)
-+{
-+	int best_ranking = CPUIDLE_DIFF_NODE | CPUIDLE_THREAD_BUSY |
-+		CPUIDLE_DIFF_CPU | CPUIDLE_CACHE_BUSY | CPUIDLE_DIFF_CORE |
-+		CPUIDLE_DIFF_CORE_LLC | CPUIDLE_DIFF_THREAD;
-+	int cpu_tmp;
-+
-+	if (cpumask_test_cpu(best_cpu, tmpmask))
-+		goto out;
-+
-+	for_each_cpu(cpu_tmp, tmpmask) {
-+		int ranking, locality;
-+		struct rq *tmp_rq;
-+
-+		ranking = 0;
-+		tmp_rq = cpu_rq(cpu_tmp);
-+
-+		locality = rq->cpu_locality[cpu_tmp];
-+#ifdef CONFIG_NUMA
-+		if (locality > LOCALITY_SMP)
-+			ranking |= CPUIDLE_DIFF_NODE;
-+		else
-+#endif
-+			if (locality > LOCALITY_MC)
-+				ranking |= CPUIDLE_DIFF_CPU;
-+#ifdef CONFIG_SCHED_MC
-+			else if (locality == LOCALITY_MC_LLC)
-+				ranking |= CPUIDLE_DIFF_CORE_LLC;
-+			else if (locality == LOCALITY_MC)
-+				ranking |= CPUIDLE_DIFF_CORE;
-+		if (!(tmp_rq->cache_idle(tmp_rq)))
-+			ranking |= CPUIDLE_CACHE_BUSY;
-+#endif
-+#ifdef CONFIG_SCHED_SMT
-+		if (locality == LOCALITY_SMT)
-+			ranking |= CPUIDLE_DIFF_THREAD;
-+#endif
-+		if (ranking < best_ranking
-+#ifdef CONFIG_SCHED_SMT
-+			|| (ranking == best_ranking && (tmp_rq->siblings_idle(tmp_rq)))
-+#endif
-+		) {
-+			best_cpu = cpu_tmp;
-+			best_ranking = ranking;
-+		}
-+	}
-+out:
-+	return best_cpu;
-+}
-+
-+bool cpus_share_cache(int this_cpu, int that_cpu)
-+{
-+	struct rq *this_rq = cpu_rq(this_cpu);
-+
-+	return (this_rq->cpu_locality[that_cpu] < LOCALITY_SMP);
-+}
-+
-+/* As per resched_curr but only will resched idle task */
-+static inline void resched_idle(struct rq *rq)
-+{
-+	if (test_tsk_need_resched(rq->idle))
-+		return;
-+
-+	rq->preempt = rq->idle;
-+
-+	set_tsk_need_resched(rq->idle);
-+
-+	if (rq_local(rq)) {
-+		set_preempt_need_resched();
-+		return;
-+	}
-+
-+	smp_sched_reschedule(rq->cpu);
-+}
-+
-+static struct rq *resched_best_idle(struct task_struct *p, int cpu)
-+{
-+	cpumask_t tmpmask;
-+	struct rq *rq;
-+	int best_cpu;
-+
-+	cpumask_and(&tmpmask, p->cpus_ptr, &cpu_idle_map);
-+	best_cpu = best_mask_cpu(cpu, task_rq(p), &tmpmask);
-+	rq = cpu_rq(best_cpu);
-+	if (!smt_schedule(p, rq))
-+		return NULL;
-+	rq->preempt = p;
-+	resched_idle(rq);
-+	return rq;
-+}
-+
-+static inline void resched_suitable_idle(struct task_struct *p)
-+{
-+	if (suitable_idle_cpus(p))
-+		resched_best_idle(p, task_cpu(p));
-+}
-+
-+static inline struct rq *rq_order(struct rq *rq, int cpu)
-+{
-+	return rq->rq_order[cpu];
-+}
-+#else /* CONFIG_SMP */
-+static inline void set_cpuidle_map(int cpu)
-+{
-+}
-+
-+static inline void clear_cpuidle_map(int cpu)
-+{
-+}
-+
-+static inline bool suitable_idle_cpus(struct task_struct *p)
-+{
-+	return uprq->curr == uprq->idle;
-+}
-+
-+static inline void resched_suitable_idle(struct task_struct *p)
-+{
-+}
-+
-+static inline void resched_curr(struct rq *rq)
-+{
-+	resched_task(rq->curr);
-+}
-+
-+static inline void resched_if_idle(struct rq *rq)
-+{
-+}
-+
-+static inline bool rq_local(struct rq *rq)
-+{
-+	return true;
-+}
-+
-+static inline struct rq *rq_order(struct rq *rq, int cpu)
-+{
-+	return rq;
-+}
-+
-+static inline bool smt_schedule(struct task_struct *p, struct rq *rq)
-+{
-+	return true;
-+}
-+#endif /* CONFIG_SMP */
-+
-+static inline int normal_prio(struct task_struct *p)
-+{
-+	if (has_rt_policy(p))
-+		return MAX_RT_PRIO - 1 - p->rt_priority;
-+	if (idleprio_task(p))
-+		return IDLE_PRIO;
-+	if (iso_task(p))
-+		return ISO_PRIO;
-+	return NORMAL_PRIO;
-+}
-+
-+/*
-+ * Calculate the current priority, i.e. the priority
-+ * taken into account by the scheduler. This value might
-+ * be boosted by RT tasks as it will be RT if the task got
-+ * RT-boosted. If not then it returns p->normal_prio.
-+ */
-+static int effective_prio(struct task_struct *p)
-+{
-+	p->normal_prio = normal_prio(p);
-+	/*
-+	 * If we are RT tasks or we were boosted to RT priority,
-+	 * keep the priority unchanged. Otherwise, update priority
-+	 * to the normal priority:
-+	 */
-+	if (!rt_prio(p->prio))
-+		return p->normal_prio;
-+	return p->prio;
-+}
-+
-+/*
-+ * activate_task - move a task to the runqueue. Enter with rq locked.
-+ */
-+static void activate_task(struct rq *rq, struct task_struct *p, int flags)
-+{
-+	resched_if_idle(rq);
-+
-+	/*
-+	 * Sleep time is in units of nanosecs, so shift by 20 to get a
-+	 * milliseconds-range estimation of the amount of time that the task
-+	 * spent sleeping:
-+	 */
-+	if (unlikely(prof_on == SLEEP_PROFILING)) {
-+		if (p->state == TASK_UNINTERRUPTIBLE)
-+			profile_hits(SLEEP_PROFILING, (void *)get_wchan(p),
-+				     (rq->niffies - p->last_ran) >> 20);
-+	}
-+
-+	p->prio = effective_prio(p);
-+	if (task_contributes_to_load(p))
-+		rq->nr_uninterruptible--;
-+
-+	enqueue_task(rq, p, flags);
-+	p->on_rq = TASK_ON_RQ_QUEUED;
-+}
-+
-+/*
-+ * deactivate_task - If it's running, it's not on the runqueue and we can just
-+ * decrement the nr_running. Enter with rq locked.
-+ */
-+static inline void deactivate_task(struct task_struct *p, struct rq *rq)
-+{
-+	if (task_contributes_to_load(p))
-+		rq->nr_uninterruptible++;
-+
-+	p->on_rq = 0;
-+	sched_info_dequeued(rq, p);
-+	/* deactivate_task is always DEQUEUE_SLEEP in muqss */
-+	psi_dequeue(p, DEQUEUE_SLEEP);
-+}
-+
-+#ifdef CONFIG_SMP
-+void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
-+{
-+	struct rq *rq;
-+
-+	if (task_cpu(p) == new_cpu)
-+		return;
-+
-+	/* Do NOT call set_task_cpu on a currently queued task as we will not
-+	 * be reliably holding the rq lock after changing CPU. */
-+	BUG_ON(task_queued(p));
-+	rq = task_rq(p);
-+
-+#ifdef CONFIG_LOCKDEP
-+	/*
-+	 * The caller should hold either p->pi_lock or rq->lock, when changing
-+	 * a task's CPU. ->pi_lock for waking tasks, rq->lock for runnable tasks.
-+	 *
-+	 * Furthermore, all task_rq users should acquire both locks, see
-+	 * task_rq_lock().
-+	 */
-+	WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) ||
-+				      lockdep_is_held(rq->lock)));
-+#endif
-+
-+	trace_sched_migrate_task(p, new_cpu);
-+	rseq_migrate(p);
-+	perf_event_task_migrate(p);
-+
-+	/*
-+	 * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
-+	 * successfully executed on another CPU. We must ensure that updates of
-+	 * per-task data have been completed by this moment.
-+	 */
-+	smp_wmb();
-+
-+	p->wake_cpu = new_cpu;
-+
-+	if (task_running(rq, p)) {
-+		/*
-+		 * We should only be calling this on a running task if we're
-+		 * holding rq lock.
-+		 */
-+		lockdep_assert_held(rq->lock);
-+
-+		/*
-+		 * We can't change the task_thread_info CPU on a running task
-+		 * as p will still be protected by the rq lock of the CPU it
-+		 * is still running on so we only set the wake_cpu for it to be
-+		 * lazily updated once off the CPU.
-+		 */
-+		return;
-+	}
-+
-+#ifdef CONFIG_THREAD_INFO_IN_TASK
-+	WRITE_ONCE(p->cpu, new_cpu);
-+#else
-+	WRITE_ONCE(task_thread_info(p)->cpu, new_cpu);
-+#endif
-+	/* We're no longer protecting p after this point since we're holding
-+	 * the wrong runqueue lock. */
-+}
-+#endif /* CONFIG_SMP */
-+
-+/*
-+ * Move a task off the runqueue and take it to a cpu for it will
-+ * become the running task.
-+ */
-+static inline void take_task(struct rq *rq, int cpu, struct task_struct *p)
-+{
-+	struct rq *p_rq = task_rq(p);
-+
-+	dequeue_task(p_rq, p, DEQUEUE_SAVE);
-+	if (p_rq != rq) {
-+		sched_info_dequeued(p_rq, p);
-+		sched_info_queued(rq, p);
-+	}
-+	set_task_cpu(p, cpu);
-+}
-+
-+/*
-+ * Returns a descheduling task to the runqueue unless it is being
-+ * deactivated.
-+ */
-+static inline void return_task(struct task_struct *p, struct rq *rq,
-+			       int cpu, bool deactivate)
-+{
-+	if (deactivate)
-+		deactivate_task(p, rq);
-+	else {
-+#ifdef CONFIG_SMP
-+		/*
-+		 * set_task_cpu was called on the running task that doesn't
-+		 * want to deactivate so it has to be enqueued to a different
-+		 * CPU and we need its lock. Tag it to be moved with as the
-+		 * lock is dropped in finish_lock_switch.
-+		 */
-+		if (unlikely(p->wake_cpu != cpu))
-+			WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING);
-+		else
-+#endif
-+			enqueue_task(rq, p, ENQUEUE_RESTORE);
-+	}
-+}
-+
-+/* Enter with rq lock held. We know p is on the local cpu */
-+static inline void __set_tsk_resched(struct task_struct *p)
-+{
-+	set_tsk_need_resched(p);
-+	set_preempt_need_resched();
-+}
-+
-+/**
-+ * task_curr - is this task currently executing on a CPU?
-+ * @p: the task in question.
-+ *
-+ * Return: 1 if the task is currently executing. 0 otherwise.
-+ */
-+inline int task_curr(const struct task_struct *p)
-+{
-+	return cpu_curr(task_cpu(p)) == p;
-+}
-+
-+#ifdef CONFIG_SMP
-+/*
-+ * wait_task_inactive - wait for a thread to unschedule.
-+ *
-+ * If @match_state is nonzero, it's the @p->state value just checked and
-+ * not expected to change.  If it changes, i.e. @p might have woken up,
-+ * then return zero.  When we succeed in waiting for @p to be off its CPU,
-+ * we return a positive number (its total switch count).  If a second call
-+ * a short while later returns the same number, the caller can be sure that
-+ * @p has remained unscheduled the whole time.
-+ *
-+ * The caller must ensure that the task *will* unschedule sometime soon,
-+ * else this function might spin for a *long* time. This function can't
-+ * be called with interrupts off, or it may introduce deadlock with
-+ * smp_call_function() if an IPI is sent by the same process we are
-+ * waiting to become inactive.
-+ */
-+unsigned long wait_task_inactive(struct task_struct *p, long match_state)
-+{
-+	int running, queued;
-+	struct rq_flags rf;
-+	unsigned long ncsw;
-+	struct rq *rq;
-+
-+	for (;;) {
-+		rq = task_rq(p);
-+
-+		/*
-+		 * If the task is actively running on another CPU
-+		 * still, just relax and busy-wait without holding
-+		 * any locks.
-+		 *
-+		 * NOTE! Since we don't hold any locks, it's not
-+		 * even sure that "rq" stays as the right runqueue!
-+		 * But we don't care, since this will return false
-+		 * if the runqueue has changed and p is actually now
-+		 * running somewhere else!
-+		 */
-+		while (task_running(rq, p)) {
-+			if (match_state && unlikely(p->state != match_state))
-+				return 0;
-+			cpu_relax();
-+		}
-+
-+		/*
-+		 * Ok, time to look more closely! We need the rq
-+		 * lock now, to be *sure*. If we're wrong, we'll
-+		 * just go back and repeat.
-+		 */
-+		rq = task_rq_lock(p, &rf);
-+		trace_sched_wait_task(p);
-+		running = task_running(rq, p);
-+		queued = task_on_rq_queued(p);
-+		ncsw = 0;
-+		if (!match_state || p->state == match_state)
-+			ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
-+		task_rq_unlock(rq, p, &rf);
-+
-+		/*
-+		 * If it changed from the expected state, bail out now.
-+		 */
-+		if (unlikely(!ncsw))
-+			break;
-+
-+		/*
-+		 * Was it really running after all now that we
-+		 * checked with the proper locks actually held?
-+		 *
-+		 * Oops. Go back and try again..
-+		 */
-+		if (unlikely(running)) {
-+			cpu_relax();
-+			continue;
-+		}
-+
-+		/*
-+		 * It's not enough that it's not actively running,
-+		 * it must be off the runqueue _entirely_, and not
-+		 * preempted!
-+		 *
-+		 * So if it was still runnable (but just not actively
-+		 * running right now), it's preempted, and we should
-+		 * yield - it could be a while.
-+		 */
-+		if (unlikely(queued)) {
-+			ktime_t to = NSEC_PER_SEC / HZ;
-+
-+			set_current_state(TASK_UNINTERRUPTIBLE);
-+			schedule_hrtimeout(&to, HRTIMER_MODE_REL);
-+			continue;
-+		}
-+
-+		/*
-+		 * Ahh, all good. It wasn't running, and it wasn't
-+		 * runnable, which means that it will never become
-+		 * running in the future either. We're all done!
-+		 */
-+		break;
-+	}
-+
-+	return ncsw;
-+}
-+
-+/***
-+ * kick_process - kick a running thread to enter/exit the kernel
-+ * @p: the to-be-kicked thread
-+ *
-+ * Cause a process which is running on another CPU to enter
-+ * kernel-mode, without any delay. (to get signals handled.)
-+ *
-+ * NOTE: this function doesn't have to take the runqueue lock,
-+ * because all it wants to ensure is that the remote task enters
-+ * the kernel. If the IPI races and the task has been migrated
-+ * to another CPU then no harm is done and the purpose has been
-+ * achieved as well.
-+ */
-+void kick_process(struct task_struct *p)
-+{
-+	int cpu;
-+
-+	preempt_disable();
-+	cpu = task_cpu(p);
-+	if ((cpu != smp_processor_id()) && task_curr(p))
-+		smp_sched_reschedule(cpu);
-+	preempt_enable();
-+}
-+EXPORT_SYMBOL_GPL(kick_process);
-+#endif
-+
-+/*
-+ * RT tasks preempt purely on priority. SCHED_NORMAL tasks preempt on the
-+ * basis of earlier deadlines. SCHED_IDLEPRIO don't preempt anything else or
-+ * between themselves, they cooperatively multitask. An idle rq scores as
-+ * prio PRIO_LIMIT so it is always preempted.
-+ */
-+static inline bool
-+can_preempt(struct task_struct *p, int prio, u64 deadline)
-+{
-+	/* Better static priority RT task or better policy preemption */
-+	if (p->prio < prio)
-+		return true;
-+	if (p->prio > prio)
-+		return false;
-+	if (p->policy == SCHED_BATCH)
-+		return false;
-+	/* SCHED_NORMAL and ISO will preempt based on deadline */
-+	if (!deadline_before(p->deadline, deadline))
-+		return false;
-+	return true;
-+}
-+
-+#ifdef CONFIG_SMP
-+
-+static inline bool is_per_cpu_kthread(struct task_struct *p)
-+{
-+	if (!(p->flags & PF_KTHREAD))
-+		return false;
-+
-+	if (p->nr_cpus_allowed != 1)
-+		return false;
-+
-+	return true;
-+}
-+
-+/*
-+ * Per-CPU kthreads are allowed to run on !active && online CPUs, see
-+ * __set_cpus_allowed_ptr().
-+ */
-+static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
-+{
-+	if (!cpumask_test_cpu(cpu, p->cpus_ptr))
-+		return false;
-+
-+	if (is_per_cpu_kthread(p))
-+		return cpu_online(cpu);
-+
-+	return cpu_active(cpu);
-+}
-+
-+/*
-+ * Check to see if p can run on cpu, and if not, whether there are any online
-+ * CPUs it can run on instead. This only happens with the hotplug threads that
-+ * bring up the CPUs.
-+ */
-+static inline bool sched_other_cpu(struct task_struct *p, int cpu)
-+{
-+	if (likely(cpumask_test_cpu(cpu, p->cpus_ptr)))
-+		return false;
-+	if (p->nr_cpus_allowed == 1) {
-+		cpumask_t valid_mask;
-+
-+		cpumask_and(&valid_mask, p->cpus_ptr, cpu_online_mask);
-+		if (unlikely(cpumask_empty(&valid_mask)))
-+			return false;
-+	}
-+	return true;
-+}
-+
-+static inline bool needs_other_cpu(struct task_struct *p, int cpu)
-+{
-+	if (cpumask_test_cpu(cpu, p->cpus_ptr))
-+		return false;
-+	return true;
-+}
-+
-+#define cpu_online_map		(*(cpumask_t *)cpu_online_mask)
-+
-+static void try_preempt(struct task_struct *p, struct rq *this_rq)
-+{
-+	int i, this_entries = rq_load(this_rq);
-+	cpumask_t tmp;
-+
-+	if (suitable_idle_cpus(p) && resched_best_idle(p, task_cpu(p)))
-+		return;
-+
-+	/* IDLEPRIO tasks never preempt anything but idle */
-+	if (p->policy == SCHED_IDLEPRIO)
-+		return;
-+
-+	cpumask_and(&tmp, &cpu_online_map, p->cpus_ptr);
-+
-+	for (i = 0; i < num_online_cpus(); i++) {
-+		struct rq *rq = this_rq->cpu_order[i];
-+
-+		if (!cpumask_test_cpu(rq->cpu, &tmp))
-+			continue;
-+
-+		if (!sched_interactive && rq != this_rq && rq_load(rq) <= this_entries)
-+			continue;
-+		if (smt_schedule(p, rq) && can_preempt(p, rq->rq_prio, rq->rq_deadline)) {
-+			/* We set rq->preempting lockless, it's a hint only */
-+			rq->preempting = p;
-+			resched_curr(rq);
-+			return;
-+		}
-+	}
-+}
-+
-+static int __set_cpus_allowed_ptr(struct task_struct *p,
-+				  const struct cpumask *new_mask, bool check);
-+#else /* CONFIG_SMP */
-+static inline bool needs_other_cpu(struct task_struct *p, int cpu)
-+{
-+	return false;
-+}
-+
-+static void try_preempt(struct task_struct *p, struct rq *this_rq)
-+{
-+	if (p->policy == SCHED_IDLEPRIO)
-+		return;
-+	if (can_preempt(p, uprq->rq_prio, uprq->rq_deadline))
-+		resched_curr(uprq);
-+}
-+
-+static inline int __set_cpus_allowed_ptr(struct task_struct *p,
-+					 const struct cpumask *new_mask, bool check)
-+{
-+	return set_cpus_allowed_ptr(p, new_mask);
-+}
-+#endif /* CONFIG_SMP */
-+
-+/*
-+ * wake flags
-+ */
-+#define WF_SYNC		0x01		/* waker goes to sleep after wakeup */
-+#define WF_FORK		0x02		/* child wakeup after fork */
-+#define WF_MIGRATED	0x04		/* internal use, task got migrated */
-+
-+static void
-+ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
-+{
-+	struct rq *rq;
-+
-+	if (!schedstat_enabled())
-+		return;
-+
-+	rq = this_rq();
-+
-+#ifdef CONFIG_SMP
-+	if (cpu == rq->cpu) {
-+		__schedstat_inc(rq->ttwu_local);
-+	} else {
-+		struct sched_domain *sd;
-+
-+		rcu_read_lock();
-+		for_each_domain(rq->cpu, sd) {
-+			if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
-+				__schedstat_inc(sd->ttwu_wake_remote);
-+				break;
-+			}
-+		}
-+		rcu_read_unlock();
-+	}
-+
-+#endif /* CONFIG_SMP */
-+
-+	__schedstat_inc(rq->ttwu_count);
-+}
-+
-+/*
-+ * Mark the task runnable and perform wakeup-preemption.
-+ */
-+static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
-+{
-+	/*
-+	 * Sync wakeups (i.e. those types of wakeups where the waker
-+	 * has indicated that it will leave the CPU in short order)
-+	 * don't trigger a preemption if there are no idle cpus,
-+	 * instead waiting for current to deschedule.
-+	 */
-+	if (wake_flags & WF_SYNC)
-+		resched_suitable_idle(p);
-+	else
-+		try_preempt(p, rq);
-+	p->state = TASK_RUNNING;
-+	trace_sched_wakeup(p);
-+}
-+
-+static void
-+ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags)
-+{
-+	int en_flags = ENQUEUE_WAKEUP;
-+
-+	lockdep_assert_held(rq->lock);
-+
-+#ifdef CONFIG_SMP
-+	if (p->sched_contributes_to_load)
-+		rq->nr_uninterruptible--;
-+
-+	if (wake_flags & WF_MIGRATED)
-+		en_flags |= ENQUEUE_MIGRATED;
-+#endif
-+
-+	activate_task(rq, p, en_flags);
-+	ttwu_do_wakeup(rq, p, wake_flags);
-+}
-+
-+/*
-+ * Called in case the task @p isn't fully descheduled from its runqueue,
-+ * in this case we must do a remote wakeup. Its a 'light' wakeup though,
-+ * since all we need to do is flip p->state to TASK_RUNNING, since
-+ * the task is still ->on_rq.
-+ */
-+static int ttwu_remote(struct task_struct *p, int wake_flags)
-+{
-+	struct rq *rq;
-+	int ret = 0;
-+
-+	rq = __task_rq_lock(p, NULL);
-+	if (likely(task_on_rq_queued(p))) {
-+		ttwu_do_wakeup(rq, p, wake_flags);
-+		ret = 1;
-+	}
-+	__task_rq_unlock(rq, NULL);
-+
-+	return ret;
-+}
-+
-+#ifdef CONFIG_SMP
-+void sched_ttwu_pending(void)
-+{
-+	struct rq *rq = this_rq();
-+	struct llist_node *llist = llist_del_all(&rq->wake_list);
-+	struct task_struct *p, *t;
-+	struct rq_flags rf;
-+
-+	if (!llist)
-+		return;
-+
-+	rq_lock_irqsave(rq, &rf);
-+
-+	llist_for_each_entry_safe(p, t, llist, wake_entry)
-+		ttwu_do_activate(rq, p, 0);
-+
-+	rq_unlock_irqrestore(rq, &rf);
-+}
-+
-+void scheduler_ipi(void)
-+{
-+	/*
-+	 * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting
-+	 * TIF_NEED_RESCHED remotely (for the first time) will also send
-+	 * this IPI.
-+	 */
-+	preempt_fold_need_resched();
-+
-+	if (llist_empty(&this_rq()->wake_list) && (!idle_cpu(smp_processor_id()) || need_resched()))
-+		return;
-+
-+	/*
-+	 * Not all reschedule IPI handlers call irq_enter/irq_exit, since
-+	 * traditionally all their work was done from the interrupt return
-+	 * path. Now that we actually do some work, we need to make sure
-+	 * we do call them.
-+	 *
-+	 * Some archs already do call them, luckily irq_enter/exit nest
-+	 * properly.
-+	 *
-+	 * Arguably we should visit all archs and update all handlers,
-+	 * however a fair share of IPIs are still resched only so this would
-+	 * somewhat pessimize the simple resched case.
-+	 */
-+	irq_enter();
-+	sched_ttwu_pending();
-+	irq_exit();
-+}
-+
-+static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	if (llist_add(&p->wake_entry, &cpu_rq(cpu)->wake_list)) {
-+		if (!set_nr_if_polling(rq->idle))
-+			smp_sched_reschedule(cpu);
-+		else
-+			trace_sched_wake_idle_without_ipi(cpu);
-+	}
-+}
-+
-+void wake_up_if_idle(int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	struct rq_flags rf;
-+
-+	rcu_read_lock();
-+
-+	if (!is_idle_task(rcu_dereference(rq->curr)))
-+		goto out;
-+
-+	if (set_nr_if_polling(rq->idle)) {
-+		trace_sched_wake_idle_without_ipi(cpu);
-+	} else {
-+		rq_lock_irqsave(rq, &rf);
-+		if (likely(is_idle_task(rq->curr)))
-+			smp_sched_reschedule(cpu);
-+		/* Else cpu is not in idle, do nothing here */
-+		rq_unlock_irqrestore(rq, &rf);
-+	}
-+
-+out:
-+	rcu_read_unlock();
-+}
-+
-+static int valid_task_cpu(struct task_struct *p)
-+{
-+	cpumask_t valid_mask;
-+
-+	if (p->flags & PF_KTHREAD)
-+		cpumask_and(&valid_mask, p->cpus_ptr, cpu_all_mask);
-+	else
-+		cpumask_and(&valid_mask, p->cpus_ptr, cpu_active_mask);
-+
-+	if (unlikely(!cpumask_weight(&valid_mask))) {
-+		/* We shouldn't be hitting this any more */
-+		printk(KERN_WARNING "SCHED: No cpumask for %s/%d weight %d\n", p->comm,
-+		       p->pid, cpumask_weight(p->cpus_ptr));
-+		return cpumask_any(p->cpus_ptr);
-+	}
-+	return cpumask_any(&valid_mask);
-+}
-+
-+/*
-+ * For a task that's just being woken up we have a valuable balancing
-+ * opportunity so choose the nearest cache most lightly loaded runqueue.
-+ * Entered with rq locked and returns with the chosen runqueue locked.
-+ */
-+static inline int select_best_cpu(struct task_struct *p)
-+{
-+	unsigned int idlest = ~0U;
-+	struct rq *rq = NULL;
-+	int i;
-+
-+	if (suitable_idle_cpus(p)) {
-+		int cpu = task_cpu(p);
-+
-+		if (unlikely(needs_other_cpu(p, cpu)))
-+			cpu = valid_task_cpu(p);
-+		rq = resched_best_idle(p, cpu);
-+		if (likely(rq))
-+			return rq->cpu;
-+	}
-+
-+	for (i = 0; i < num_online_cpus(); i++) {
-+		struct rq *other_rq = task_rq(p)->cpu_order[i];
-+		int entries;
-+
-+		if (!other_rq->online)
-+			continue;
-+		if (needs_other_cpu(p, other_rq->cpu))
-+			continue;
-+		entries = rq_load(other_rq);
-+		if (entries >= idlest)
-+			continue;
-+		idlest = entries;
-+		rq = other_rq;
-+	}
-+	if (unlikely(!rq))
-+		return task_cpu(p);
-+	return rq->cpu;
-+}
-+#else /* CONFIG_SMP */
-+static int valid_task_cpu(struct task_struct *p)
-+{
-+	return 0;
-+}
-+
-+static inline int select_best_cpu(struct task_struct *p)
-+{
-+	return 0;
-+}
-+
-+static struct rq *resched_best_idle(struct task_struct *p, int cpu)
-+{
-+	return NULL;
-+}
-+#endif /* CONFIG_SMP */
-+
-+static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+#if defined(CONFIG_SMP)
-+	if (!cpus_share_cache(smp_processor_id(), cpu)) {
-+		sched_clock_cpu(cpu); /* Sync clocks across CPUs */
-+		ttwu_queue_remote(p, cpu, wake_flags);
-+		return;
-+	}
-+#endif
-+	rq_lock(rq);
-+	ttwu_do_activate(rq, p, wake_flags);
-+	rq_unlock(rq);
-+}
-+
-+/***
-+ * try_to_wake_up - wake up a thread
-+ * @p: the thread to be awakened
-+ * @state: the mask of task states that can be woken
-+ * @wake_flags: wake modifier flags (WF_*)
-+ *
-+ * Put it on the run-queue if it's not already there. The "current"
-+ * thread is always on the run-queue (except when the actual
-+ * re-schedule is in progress), and as such you're allowed to do
-+ * the simpler "current->state = TASK_RUNNING" to mark yourself
-+ * runnable without the overhead of this.
-+ *
-+ * Return: %true if @p was woken up, %false if it was already running.
-+ * or @state didn't match @p's state.
-+ */
-+static int
-+try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
-+{
-+	unsigned long flags;
-+	int cpu, success = 0;
-+
-+	preempt_disable();
-+	if (p == current) {
-+		/*
-+		 * We're waking current, this means 'p->on_rq' and 'task_cpu(p)
-+		 * == smp_processor_id()'. Together this means we can special
-+		 * case the whole 'p->on_rq && ttwu_remote()' case below
-+		 * without taking any locks.
-+		 *
-+		 * In particular:
-+		 *  - we rely on Program-Order guarantees for all the ordering,
-+		 *  - we're serialized against set_special_state() by virtue of
-+		 *    it disabling IRQs (this allows not taking ->pi_lock).
-+		 */
-+		if (!(p->state & state))
-+			goto out;
-+
-+		success = 1;
-+		cpu = task_cpu(p);
-+		trace_sched_waking(p);
-+		p->state = TASK_RUNNING;
-+		trace_sched_wakeup(p);
-+		goto out;
-+	}
-+
-+	/*
-+	 * If we are going to wake up a thread waiting for CONDITION we
-+	 * need to ensure that CONDITION=1 done by the caller can not be
-+	 * reordered with p->state check below. This pairs with mb() in
-+	 * set_current_state() the waiting thread does.
-+	 */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	smp_mb__after_spinlock();
-+	if (!(p->state & state))
-+		goto unlock;
-+
-+	trace_sched_waking(p);
-+
-+	/* We're going to change ->state: */
-+	success = 1;
-+	cpu = task_cpu(p);
-+
-+	/*
-+	 * Ensure we load p->on_rq _after_ p->state, otherwise it would
-+	 * be possible to, falsely, observe p->on_rq == 0 and get stuck
-+	 * in smp_cond_load_acquire() below.
-+	 *
-+	 * sched_ttwu_pending()			try_to_wake_up()
-+	 *   STORE p->on_rq = 1			  LOAD p->state
-+	 *   UNLOCK rq->lock
-+	 *
-+	 * __schedule() (switch to task 'p')
-+	 *   LOCK rq->lock			  smp_rmb();
-+	 *   smp_mb__after_spinlock();
-+	 *   UNLOCK rq->lock
-+	 *
-+	 * [task p]
-+	 *   STORE p->state = UNINTERRUPTIBLE	  LOAD p->on_rq
-+	 *
-+	 * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
-+	 * __schedule().  See the comment for smp_mb__after_spinlock().
-+	 */
-+	smp_rmb();
-+	if (p->on_rq && ttwu_remote(p, wake_flags))
-+		goto unlock;
-+
-+#ifdef CONFIG_SMP
-+	/*
-+	 * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
-+	 * possible to, falsely, observe p->on_cpu == 0.
-+	 *
-+	 * One must be running (->on_cpu == 1) in order to remove oneself
-+	 * from the runqueue.
-+	 *
-+	 * __schedule() (switch to task 'p')	try_to_wake_up()
-+	 *   STORE p->on_cpu = 1		  LOAD p->on_rq
-+	 *   UNLOCK rq->lock
-+	 *
-+	 * __schedule() (put 'p' to sleep)
-+	 *   LOCK rq->lock			  smp_rmb();
-+	 *   smp_mb__after_spinlock();
-+	 *   STORE p->on_rq = 0			  LOAD p->on_cpu
-+	 *
-+	 * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
-+	 * __schedule().  See the comment for smp_mb__after_spinlock().
-+	 */
-+	smp_rmb();
-+
-+	/*
-+	 * If the owning (remote) CPU is still in the middle of schedule() with
-+	 * this task as prev, wait until its done referencing the task.
-+	 *
-+	 * Pairs with the smp_store_release() in finish_task().
-+	 *
-+	 * This ensures that tasks getting woken will be fully ordered against
-+	 * their previous state and preserve Program Order.
-+	 */
-+	smp_cond_load_acquire(&p->on_cpu, !VAL);
-+
-+	p->sched_contributes_to_load = !!task_contributes_to_load(p);
-+	p->state = TASK_WAKING;
-+
-+	if (p->in_iowait) {
-+		delayacct_blkio_end(p);
-+		atomic_dec(&task_rq(p)->nr_iowait);
-+	}
-+
-+	cpu = select_best_cpu(p);
-+	if (task_cpu(p) != cpu) {
-+		wake_flags |= WF_MIGRATED;
-+		psi_ttwu_dequeue(p);
-+		set_task_cpu(p, cpu);
-+	}
-+
-+#else /* CONFIG_SMP */
-+
-+	if (p->in_iowait) {
-+		delayacct_blkio_end(p);
-+		atomic_dec(&task_rq(p)->nr_iowait);
-+	}
-+
-+#endif /* CONFIG_SMP */
-+
-+	ttwu_queue(p, cpu, wake_flags);
-+unlock:
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+out:
-+	if (success)
-+		ttwu_stat(p, cpu, wake_flags);
-+	preempt_enable();
-+
-+	return success;
-+}
-+
-+/**
-+ * wake_up_process - Wake up a specific process
-+ * @p: The process to be woken up.
-+ *
-+ * Attempt to wake up the nominated process and move it to the set of runnable
-+ * processes.
-+ *
-+ * Return: 1 if the process was woken up, 0 if it was already running.
-+ *
-+ * This function executes a full memory barrier before accessing the task state.
-+ */
-+int wake_up_process(struct task_struct *p)
-+{
-+	return try_to_wake_up(p, TASK_NORMAL, 0);
-+}
-+EXPORT_SYMBOL(wake_up_process);
-+
-+int wake_up_state(struct task_struct *p, unsigned int state)
-+{
-+	return try_to_wake_up(p, state, 0);
-+}
-+
-+static void time_slice_expired(struct task_struct *p, struct rq *rq);
-+
-+/*
-+ * Perform scheduler related setup for a newly forked process p.
-+ * p is forked by current.
-+ */
-+int sched_fork(unsigned long __maybe_unused clone_flags, struct task_struct *p)
-+{
-+	unsigned long flags;
-+
-+#ifdef CONFIG_PREEMPT_NOTIFIERS
-+	INIT_HLIST_HEAD(&p->preempt_notifiers);
-+#endif
-+
-+#ifdef CONFIG_COMPACTION
-+	p->capture_control = NULL;
-+#endif
-+
-+	/*
-+	 * We mark the process as NEW here. This guarantees that
-+	 * nobody will actually run it, and a signal or other external
-+	 * event cannot wake it up and insert it on the runqueue either.
-+	 */
-+	p->state = TASK_NEW;
-+
-+	/*
-+	 * The process state is set to the same value of the process executing
-+	 * do_fork() code. That is running. This guarantees that nobody will
-+	 * actually run it, and a signal or other external event cannot wake
-+	 * it up and insert it on the runqueue either.
-+	 */
-+
-+	/* Should be reset in fork.c but done here for ease of MuQSS patching */
-+	p->on_cpu =
-+	p->on_rq =
-+	p->utime =
-+	p->stime =
-+	p->sched_time =
-+	p->stime_ns =
-+	p->utime_ns = 0;
-+	skiplist_node_init(&p->node);
-+
-+	/*
-+	 * Revert to default priority/policy on fork if requested.
-+	 */
-+	if (unlikely(p->sched_reset_on_fork)) {
-+		if (p->policy == SCHED_FIFO || p->policy == SCHED_RR) {
-+			p->policy = SCHED_NORMAL;
-+			p->normal_prio = normal_prio(p);
-+		}
-+
-+		if (PRIO_TO_NICE(p->static_prio) < 0) {
-+			p->static_prio = NICE_TO_PRIO(0);
-+			p->normal_prio = p->static_prio;
-+		}
-+
-+		/*
-+		 * We don't need the reset flag anymore after the fork. It has
-+		 * fulfilled its duty:
-+		 */
-+		p->sched_reset_on_fork = 0;
-+	}
-+
-+	/*
-+	 * Silence PROVE_RCU.
-+	 */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	set_task_cpu(p, smp_processor_id());
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+
-+#ifdef CONFIG_SCHED_INFO
-+	if (unlikely(sched_info_on()))
-+		memset(&p->sched_info, 0, sizeof(p->sched_info));
-+#endif
-+	init_task_preempt_count(p);
-+
-+	return 0;
-+}
-+
-+#ifdef CONFIG_SCHEDSTATS
-+
-+DEFINE_STATIC_KEY_FALSE(sched_schedstats);
-+static bool __initdata __sched_schedstats = false;
-+
-+static void set_schedstats(bool enabled)
-+{
-+	if (enabled)
-+		static_branch_enable(&sched_schedstats);
-+	else
-+		static_branch_disable(&sched_schedstats);
-+}
-+
-+void force_schedstat_enabled(void)
-+{
-+	if (!schedstat_enabled()) {
-+		pr_info("kernel profiling enabled schedstats, disable via kernel.sched_schedstats.\n");
-+		static_branch_enable(&sched_schedstats);
-+	}
-+}
-+
-+static int __init setup_schedstats(char *str)
-+{
-+	int ret = 0;
-+	if (!str)
-+		goto out;
-+
-+	/*
-+	 * This code is called before jump labels have been set up, so we can't
-+	 * change the static branch directly just yet.  Instead set a temporary
-+	 * variable so init_schedstats() can do it later.
-+	 */
-+	if (!strcmp(str, "enable")) {
-+		__sched_schedstats = true;
-+		ret = 1;
-+	} else if (!strcmp(str, "disable")) {
-+		__sched_schedstats = false;
-+		ret = 1;
-+	}
-+out:
-+	if (!ret)
-+		pr_warn("Unable to parse schedstats=\n");
-+
-+	return ret;
-+}
-+__setup("schedstats=", setup_schedstats);
-+
-+static void __init init_schedstats(void)
-+{
-+	set_schedstats(__sched_schedstats);
-+}
-+
-+#ifdef CONFIG_PROC_SYSCTL
-+int sysctl_schedstats(struct ctl_table *table, int write,
-+			 void __user *buffer, size_t *lenp, loff_t *ppos)
-+{
-+	struct ctl_table t;
-+	int err;
-+	int state = static_branch_likely(&sched_schedstats);
-+
-+	if (write && !capable(CAP_SYS_ADMIN))
-+		return -EPERM;
-+
-+	t = *table;
-+	t.data = &state;
-+	err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
-+	if (err < 0)
-+		return err;
-+	if (write)
-+		set_schedstats(state);
-+	return err;
-+}
-+#endif /* CONFIG_PROC_SYSCTL */
-+#else  /* !CONFIG_SCHEDSTATS */
-+static inline void init_schedstats(void) {}
-+#endif /* CONFIG_SCHEDSTATS */
-+
-+static void update_cpu_clock_switch(struct rq *rq, struct task_struct *p);
-+
-+static void account_task_cpu(struct rq *rq, struct task_struct *p)
-+{
-+	update_clocks(rq);
-+	/* This isn't really a context switch but accounting is the same */
-+	update_cpu_clock_switch(rq, p);
-+	p->last_ran = rq->niffies;
-+}
-+
-+bool sched_smp_initialized __read_mostly;
-+
-+static inline int hrexpiry_enabled(struct rq *rq)
-+{
-+	if (unlikely(!cpu_active(cpu_of(rq)) || !sched_smp_initialized))
-+		return 0;
-+	return hrtimer_is_hres_active(&rq->hrexpiry_timer);
-+}
-+
-+/*
-+ * Use HR-timers to deliver accurate preemption points.
-+ */
-+static inline void hrexpiry_clear(struct rq *rq)
-+{
-+	if (!hrexpiry_enabled(rq))
-+		return;
-+	if (hrtimer_active(&rq->hrexpiry_timer))
-+		hrtimer_cancel(&rq->hrexpiry_timer);
-+}
-+
-+/*
-+ * High-resolution time_slice expiry.
-+ * Runs from hardirq context with interrupts disabled.
-+ */
-+static enum hrtimer_restart hrexpiry(struct hrtimer *timer)
-+{
-+	struct rq *rq = container_of(timer, struct rq, hrexpiry_timer);
-+	struct task_struct *p;
-+
-+	/* This can happen during CPU hotplug / resume */
-+	if (unlikely(cpu_of(rq) != smp_processor_id()))
-+		goto out;
-+
-+	/*
-+	 * We're doing this without the runqueue lock but this should always
-+	 * be run on the local CPU. Time slice should run out in __schedule
-+	 * but we set it to zero here in case niffies is slightly less.
-+	 */
-+	p = rq->curr;
-+	p->time_slice = 0;
-+	__set_tsk_resched(p);
-+out:
-+	return HRTIMER_NORESTART;
-+}
-+
-+/*
-+ * Called to set the hrexpiry timer state.
-+ *
-+ * called with irqs disabled from the local CPU only
-+ */
-+static void hrexpiry_start(struct rq *rq, u64 delay)
-+{
-+	if (!hrexpiry_enabled(rq))
-+		return;
-+
-+	hrtimer_start(&rq->hrexpiry_timer, ns_to_ktime(delay),
-+		      HRTIMER_MODE_REL_PINNED);
-+}
-+
-+static void init_rq_hrexpiry(struct rq *rq)
-+{
-+	hrtimer_init(&rq->hrexpiry_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-+	rq->hrexpiry_timer.function = hrexpiry;
-+}
-+
-+static inline int rq_dither(struct rq *rq)
-+{
-+	if (!hrexpiry_enabled(rq))
-+		return HALF_JIFFY_US;
-+	return 0;
-+}
-+
-+/*
-+ * wake_up_new_task - wake up a newly created task for the first time.
-+ *
-+ * This function will do some initial scheduler statistics housekeeping
-+ * that must be done for every newly created context, then puts the task
-+ * on the runqueue and wakes it.
-+ */
-+void wake_up_new_task(struct task_struct *p)
-+{
-+	struct task_struct *parent, *rq_curr;
-+	struct rq *rq, *new_rq;
-+	unsigned long flags;
-+
-+	parent = p->parent;
-+
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	p->state = TASK_RUNNING;
-+	/* Task_rq can't change yet on a new task */
-+	new_rq = rq = task_rq(p);
-+	if (unlikely(needs_other_cpu(p, task_cpu(p)))) {
-+		set_task_cpu(p, valid_task_cpu(p));
-+		new_rq = task_rq(p);
-+	}
-+
-+	double_rq_lock(rq, new_rq);
-+	rq_curr = rq->curr;
-+
-+	/*
-+	 * Make sure we do not leak PI boosting priority to the child.
-+	 */
-+	p->prio = rq_curr->normal_prio;
-+
-+	trace_sched_wakeup_new(p);
-+
-+	/*
-+	 * Share the timeslice between parent and child, thus the
-+	 * total amount of pending timeslices in the system doesn't change,
-+	 * resulting in more scheduling fairness. If it's negative, it won't
-+	 * matter since that's the same as being 0. rq->rq_deadline is only
-+	 * modified within schedule() so it is always equal to
-+	 * current->deadline.
-+	 */
-+	account_task_cpu(rq, rq_curr);
-+	p->last_ran = rq_curr->last_ran;
-+	if (likely(rq_curr->policy != SCHED_FIFO)) {
-+		rq_curr->time_slice /= 2;
-+		if (rq_curr->time_slice < RESCHED_US) {
-+			/*
-+			 * Forking task has run out of timeslice. Reschedule it and
-+			 * start its child with a new time slice and deadline. The
-+			 * child will end up running first because its deadline will
-+			 * be slightly earlier.
-+			 */
-+			__set_tsk_resched(rq_curr);
-+			time_slice_expired(p, new_rq);
-+			if (suitable_idle_cpus(p))
-+				resched_best_idle(p, task_cpu(p));
-+			else if (unlikely(rq != new_rq))
-+				try_preempt(p, new_rq);
-+		} else {
-+			p->time_slice = rq_curr->time_slice;
-+			if (rq_curr == parent && rq == new_rq && !suitable_idle_cpus(p)) {
-+				/*
-+				 * The VM isn't cloned, so we're in a good position to
-+				 * do child-runs-first in anticipation of an exec. This
-+				 * usually avoids a lot of COW overhead.
-+				 */
-+				__set_tsk_resched(rq_curr);
-+			} else {
-+				/*
-+				 * Adjust the hrexpiry since rq_curr will keep
-+				 * running and its timeslice has been shortened.
-+				 */
-+				hrexpiry_start(rq, US_TO_NS(rq_curr->time_slice));
-+				try_preempt(p, new_rq);
-+			}
-+		}
-+	} else {
-+		time_slice_expired(p, new_rq);
-+		try_preempt(p, new_rq);
-+	}
-+	activate_task(new_rq, p, 0);
-+	double_rq_unlock(rq, new_rq);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+}
-+
-+#ifdef CONFIG_PREEMPT_NOTIFIERS
-+
-+static DEFINE_STATIC_KEY_FALSE(preempt_notifier_key);
-+
-+void preempt_notifier_inc(void)
-+{
-+	static_branch_inc(&preempt_notifier_key);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_inc);
-+
-+void preempt_notifier_dec(void)
-+{
-+	static_branch_dec(&preempt_notifier_key);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_dec);
-+
-+/**
-+ * preempt_notifier_register - tell me when current is being preempted & rescheduled
-+ * @notifier: notifier struct to register
-+ */
-+void preempt_notifier_register(struct preempt_notifier *notifier)
-+{
-+	if (!static_branch_unlikely(&preempt_notifier_key))
-+		WARN(1, "registering preempt_notifier while notifiers disabled\n");
-+
-+	hlist_add_head(&notifier->link, &current->preempt_notifiers);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_register);
-+
-+/**
-+ * preempt_notifier_unregister - no longer interested in preemption notifications
-+ * @notifier: notifier struct to unregister
-+ *
-+ * This is *not* safe to call from within a preemption notifier.
-+ */
-+void preempt_notifier_unregister(struct preempt_notifier *notifier)
-+{
-+	hlist_del(&notifier->link);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_unregister);
-+
-+static void __fire_sched_in_preempt_notifiers(struct task_struct *curr)
-+{
-+	struct preempt_notifier *notifier;
-+
-+	hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
-+		notifier->ops->sched_in(notifier, raw_smp_processor_id());
-+}
-+
-+static __always_inline void fire_sched_in_preempt_notifiers(struct task_struct *curr)
-+{
-+	if (static_branch_unlikely(&preempt_notifier_key))
-+		__fire_sched_in_preempt_notifiers(curr);
-+}
-+
-+static void
-+__fire_sched_out_preempt_notifiers(struct task_struct *curr,
-+				 struct task_struct *next)
-+{
-+	struct preempt_notifier *notifier;
-+
-+	hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
-+		notifier->ops->sched_out(notifier, next);
-+}
-+
-+static __always_inline void
-+fire_sched_out_preempt_notifiers(struct task_struct *curr,
-+				 struct task_struct *next)
-+{
-+	if (static_branch_unlikely(&preempt_notifier_key))
-+		__fire_sched_out_preempt_notifiers(curr, next);
-+}
-+
-+#else /* !CONFIG_PREEMPT_NOTIFIERS */
-+
-+static inline void fire_sched_in_preempt_notifiers(struct task_struct *curr)
-+{
-+}
-+
-+static inline void
-+fire_sched_out_preempt_notifiers(struct task_struct *curr,
-+				 struct task_struct *next)
-+{
-+}
-+
-+#endif /* CONFIG_PREEMPT_NOTIFIERS */
-+
-+static inline void prepare_task(struct task_struct *next)
-+{
-+	/*
-+	 * Claim the task as running, we do this before switching to it
-+	 * such that any running task will have this set.
-+	 */
-+	next->on_cpu = 1;
-+}
-+
-+static inline void finish_task(struct task_struct *prev)
-+{
-+#ifdef CONFIG_SMP
-+	/*
-+	 * After ->on_cpu is cleared, the task can be moved to a different CPU.
-+	 * We must ensure this doesn't happen until the switch is completely
-+	 * finished.
-+	 *
-+	 * In particular, the load of prev->state in finish_task_switch() must
-+	 * happen before this.
-+	 *
-+	 * Pairs with the smp_cond_load_acquire() in try_to_wake_up().
-+	 */
-+	smp_store_release(&prev->on_cpu, 0);
-+#endif
-+}
-+
-+static inline void
-+prepare_lock_switch(struct rq *rq, struct task_struct *next)
-+{
-+	/*
-+	 * Since the runqueue lock will be released by the next
-+	 * task (which is an invalid locking op but in the case
-+	 * of the scheduler it's an obvious special-case), so we
-+	 * do an early lockdep release here:
-+	 */
-+	spin_release(&rq->lock->dep_map, 1, _THIS_IP_);
-+#ifdef CONFIG_DEBUG_SPINLOCK
-+	/* this is a valid case when another task releases the spinlock */
-+	rq->lock->owner = next;
-+#endif
-+}
-+
-+static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
-+{
-+	/*
-+	 * If we are tracking spinlock dependencies then we have to
-+	 * fix up the runqueue lock - which gets 'carried over' from
-+	 * prev into current:
-+	 */
-+	spin_acquire(&rq->lock->dep_map, 0, 0, _THIS_IP_);
-+
-+#ifdef CONFIG_SMP
-+	/*
-+	 * If prev was marked as migrating to another CPU in return_task, drop
-+	 * the local runqueue lock but leave interrupts disabled and grab the
-+	 * remote lock we're migrating it to before enabling them.
-+	 */
-+	if (unlikely(task_on_rq_migrating(prev))) {
-+		sched_info_dequeued(rq, prev);
-+		/*
-+		 * We move the ownership of prev to the new cpu now. ttwu can't
-+		 * activate prev to the wrong cpu since it has to grab this
-+		 * runqueue in ttwu_remote.
-+		 */
-+#ifdef CONFIG_THREAD_INFO_IN_TASK
-+		prev->cpu = prev->wake_cpu;
-+#else
-+		task_thread_info(prev)->cpu = prev->wake_cpu;
-+#endif
-+		raw_spin_unlock(rq->lock);
-+
-+		raw_spin_lock(&prev->pi_lock);
-+		rq = __task_rq_lock(prev, NULL);
-+		/* Check that someone else hasn't already queued prev */
-+		if (likely(!task_queued(prev))) {
-+			enqueue_task(rq, prev, 0);
-+			prev->on_rq = TASK_ON_RQ_QUEUED;
-+			/* Wake up the CPU if it's not already running */
-+			resched_if_idle(rq);
-+		}
-+		raw_spin_unlock(&prev->pi_lock);
-+	}
-+#endif
-+	rq_unlock(rq);
-+
-+	do_pending_softirq(rq, current);
-+
-+	local_irq_enable();
-+}
-+
-+#ifndef prepare_arch_switch
-+# define prepare_arch_switch(next)	do { } while (0)
-+#endif
-+#ifndef finish_arch_switch
-+# define finish_arch_switch(prev)	do { } while (0)
-+#endif
-+#ifndef finish_arch_post_lock_switch
-+# define finish_arch_post_lock_switch()	do { } while (0)
-+#endif
-+
-+/**
-+ * prepare_task_switch - prepare to switch tasks
-+ * @rq: the runqueue preparing to switch
-+ * @next: the task we are going to switch to.
-+ *
-+ * This is called with the rq lock held and interrupts off. It must
-+ * be paired with a subsequent finish_task_switch after the context
-+ * switch.
-+ *
-+ * prepare_task_switch sets up locking and calls architecture specific
-+ * hooks.
-+ */
-+static inline void
-+prepare_task_switch(struct rq *rq, struct task_struct *prev,
-+		    struct task_struct *next)
-+{
-+	kcov_prepare_switch(prev);
-+	sched_info_switch(rq, prev, next);
-+	perf_event_task_sched_out(prev, next);
-+	rseq_preempt(prev);
-+	fire_sched_out_preempt_notifiers(prev, next);
-+	prepare_task(next);
-+	prepare_arch_switch(next);
-+}
-+
-+/**
-+ * finish_task_switch - clean up after a task-switch
-+ * @rq: runqueue associated with task-switch
-+ * @prev: the thread we just switched away from.
-+ *
-+ * finish_task_switch must be called after the context switch, paired
-+ * with a prepare_task_switch call before the context switch.
-+ * finish_task_switch will reconcile locking set up by prepare_task_switch,
-+ * and do any other architecture-specific cleanup actions.
-+ *
-+ * Note that we may have delayed dropping an mm in context_switch(). If
-+ * so, we finish that here outside of the runqueue lock.  (Doing it
-+ * with the lock held can cause deadlocks; see schedule() for
-+ * details.)
-+ *
-+ * The context switch have flipped the stack from under us and restored the
-+ * local variables which were saved when this task called schedule() in the
-+ * past. prev == current is still correct but we need to recalculate this_rq
-+ * because prev may have moved to another CPU.
-+ */
-+static void finish_task_switch(struct task_struct *prev)
-+	__releases(rq->lock)
-+{
-+	struct rq *rq = this_rq();
-+	struct mm_struct *mm = rq->prev_mm;
-+	long prev_state;
-+
-+	/*
-+	 * The previous task will have left us with a preempt_count of 2
-+	 * because it left us after:
-+	 *
-+	 *	schedule()
-+	 *	  preempt_disable();			// 1
-+	 *	  __schedule()
-+	 *	    raw_spin_lock_irq(rq->lock)	// 2
-+	 *
-+	 * Also, see FORK_PREEMPT_COUNT.
-+	 */
-+	if (WARN_ONCE(preempt_count() != 2*PREEMPT_DISABLE_OFFSET,
-+		      "corrupted preempt_count: %s/%d/0x%x\n",
-+		      current->comm, current->pid, preempt_count()))
-+		preempt_count_set(FORK_PREEMPT_COUNT);
-+
-+	rq->prev_mm = NULL;
-+
-+	/*
-+	 * A task struct has one reference for the use as "current".
-+	 * If a task dies, then it sets TASK_DEAD in tsk->state and calls
-+	 * schedule one last time. The schedule call will never return, and
-+	 * the scheduled task must drop that reference.
-+	 *
-+	 * We must observe prev->state before clearing prev->on_cpu (in
-+	 * finish_task), otherwise a concurrent wakeup can get prev
-+	 * running on another CPU and we could rave with its RUNNING -> DEAD
-+	 * transition, resulting in a double drop.
-+	 */
-+	prev_state = prev->state;
-+	vtime_task_switch(prev);
-+	perf_event_task_sched_in(prev, current);
-+	finish_task(prev);
-+	finish_lock_switch(rq, prev);
-+	finish_arch_post_lock_switch();
-+	kcov_finish_switch(current);
-+
-+	fire_sched_in_preempt_notifiers(current);
-+	/*
-+	 * When switching through a kernel thread, the loop in
-+	 * membarrier_{private,global}_expedited() may have observed that
-+	 * kernel thread and not issued an IPI. It is therefore possible to
-+	 * schedule between user->kernel->user threads without passing though
-+	 * switch_mm(). Membarrier requires a barrier after storing to
-+	 * rq->curr, before returning to userspace, so provide them here:
-+	 *
-+	 * - a full memory barrier for {PRIVATE,GLOBAL}_EXPEDITED, implicitly
-+	 *   provided by mmdrop(),
-+	 * - a sync_core for SYNC_CORE.
-+	 */
-+	if (mm) {
-+		membarrier_mm_sync_core_before_usermode(mm);
-+		mmdrop(mm);
-+	}
-+	if (unlikely(prev_state == TASK_DEAD)) {
-+		/*
-+		 * Remove function-return probe instances associated with this
-+		 * task and put them back on the free list.
-+		 */
-+		kprobe_flush_task(prev);
-+
-+		/* Task is done with its stack. */
-+		put_task_stack(prev);
-+
-+		put_task_struct_rcu_user(prev);
-+	}
-+}
-+
-+/**
-+ * schedule_tail - first thing a freshly forked thread must call.
-+ * @prev: the thread we just switched away from.
-+ */
-+asmlinkage __visible void schedule_tail(struct task_struct *prev)
-+{
-+	/*
-+	 * New tasks start with FORK_PREEMPT_COUNT, see there and
-+	 * finish_task_switch() for details.
-+	 *
-+	 * finish_task_switch() will drop rq->lock() and lower preempt_count
-+	 * and the preempt_enable() will end up enabling preemption (on
-+	 * PREEMPT_COUNT kernels).
-+	 */
-+
-+	finish_task_switch(prev);
-+	preempt_enable();
-+
-+	if (current->set_child_tid)
-+		put_user(task_pid_vnr(current), current->set_child_tid);
-+
-+	calculate_sigpending();
-+}
-+
-+/*
-+ * context_switch - switch to the new MM and the new thread's register state.
-+ */
-+static __always_inline void
-+context_switch(struct rq *rq, struct task_struct *prev,
-+	       struct task_struct *next)
-+{
-+	prepare_task_switch(rq, prev, next);
-+
-+	/*
-+	 * For paravirt, this is coupled with an exit in switch_to to
-+	 * combine the page table reload and the switch backend into
-+	 * one hypercall.
-+	 */
-+	arch_start_context_switch(prev);
-+
-+	/*
-+	 * kernel -> kernel   lazy + transfer active
-+	 *   user -> kernel   lazy + mmgrab() active
-+	 *
-+	 * kernel ->   user   switch + mmdrop() active
-+	 *   user ->   user   switch
-+	 */
-+	if (!next->mm) {                                // to kernel
-+		enter_lazy_tlb(prev->active_mm, next);
-+
-+		next->active_mm = prev->active_mm;
-+		if (prev->mm)                           // from user
-+			mmgrab(prev->active_mm);
-+		else
-+			prev->active_mm = NULL;
-+	} else {                                        // to user
-+		membarrier_switch_mm(rq, prev->active_mm, next->mm);
-+		/*
-+		 * sys_membarrier() requires an smp_mb() between setting
-+		 * rq->curr / membarrier_switch_mm() and returning to userspace.
-+		 *
-+		 * The below provides this either through switch_mm(), or in
-+		 * case 'prev->active_mm == next->mm' through
-+		 * finish_task_switch()'s mmdrop().
-+		 */
-+		switch_mm_irqs_off(prev->active_mm, next->mm, next);
-+
-+		if (!prev->mm) {                        // from kernel
-+			/* will mmdrop() in finish_task_switch(). */
-+			rq->prev_mm = prev->active_mm;
-+			prev->active_mm = NULL;
-+		}
-+	}
-+	prepare_lock_switch(rq, next);
-+
-+	/* Here we just switch the register state and the stack. */
-+	switch_to(prev, next, prev);
-+	barrier();
-+
-+	finish_task_switch(prev);
-+}
-+
-+/*
-+ * nr_running, nr_uninterruptible and nr_context_switches:
-+ *
-+ * externally visible scheduler statistics: current number of runnable
-+ * threads, total number of context switches performed since bootup.
-+ */
-+unsigned long nr_running(void)
-+{
-+	unsigned long i, sum = 0;
-+
-+	for_each_online_cpu(i)
-+		sum += cpu_rq(i)->nr_running;
-+
-+	return sum;
-+}
-+
-+static unsigned long nr_uninterruptible(void)
-+{
-+	unsigned long i, sum = 0;
-+
-+	for_each_online_cpu(i)
-+		sum += cpu_rq(i)->nr_uninterruptible;
-+
-+	return sum;
-+}
-+
-+/*
-+ * Check if only the current task is running on the CPU.
-+ *
-+ * Caution: this function does not check that the caller has disabled
-+ * preemption, thus the result might have a time-of-check-to-time-of-use
-+ * race.  The caller is responsible to use it correctly, for example:
-+ *
-+ * - from a non-preemptible section (of course)
-+ *
-+ * - from a thread that is bound to a single CPU
-+ *
-+ * - in a loop with very short iterations (e.g. a polling loop)
-+ */
-+bool single_task_running(void)
-+{
-+	if (rq_load(raw_rq()) == 1)
-+		return true;
-+	else
-+		return false;
-+}
-+EXPORT_SYMBOL(single_task_running);
-+
-+unsigned long long nr_context_switches(void)
-+{
-+	int cpu;
-+	unsigned long long sum = 0;
-+
-+	for_each_possible_cpu(cpu)
-+		sum += cpu_rq(cpu)->nr_switches;
-+
-+	return sum;
-+}
-+
-+/*
-+ * Consumers of these two interfaces, like for example the cpufreq menu
-+ * governor are using nonsensical data. Boosting frequency for a CPU that has
-+ * IO-wait which might not even end up running the task when it does become
-+ * runnable.
-+ */
-+
-+unsigned long nr_iowait_cpu(int cpu)
-+{
-+	return atomic_read(&cpu_rq(cpu)->nr_iowait);
-+}
-+
-+/*
-+ * IO-wait accounting, and how its mostly bollocks (on SMP).
-+ *
-+ * The idea behind IO-wait account is to account the idle time that we could
-+ * have spend running if it were not for IO. That is, if we were to improve the
-+ * storage performance, we'd have a proportional reduction in IO-wait time.
-+ *
-+ * This all works nicely on UP, where, when a task blocks on IO, we account
-+ * idle time as IO-wait, because if the storage were faster, it could've been
-+ * running and we'd not be idle.
-+ *
-+ * This has been extended to SMP, by doing the same for each CPU. This however
-+ * is broken.
-+ *
-+ * Imagine for instance the case where two tasks block on one CPU, only the one
-+ * CPU will have IO-wait accounted, while the other has regular idle. Even
-+ * though, if the storage were faster, both could've ran at the same time,
-+ * utilising both CPUs.
-+ *
-+ * This means, that when looking globally, the current IO-wait accounting on
-+ * SMP is a lower bound, by reason of under accounting.
-+ *
-+ * Worse, since the numbers are provided per CPU, they are sometimes
-+ * interpreted per CPU, and that is nonsensical. A blocked task isn't strictly
-+ * associated with any one particular CPU, it can wake to another CPU than it
-+ * blocked on. This means the per CPU IO-wait number is meaningless.
-+ *
-+ * Task CPU affinities can make all that even more 'interesting'.
-+ */
-+
-+unsigned long nr_iowait(void)
-+{
-+	unsigned long cpu, sum = 0;
-+
-+	for_each_possible_cpu(cpu)
-+		sum += nr_iowait_cpu(cpu);
-+
-+	return sum;
-+}
-+
-+unsigned long nr_active(void)
-+{
-+	return nr_running() + nr_uninterruptible();
-+}
-+
-+/* Variables and functions for calc_load */
-+static unsigned long calc_load_update;
-+unsigned long avenrun[3];
-+EXPORT_SYMBOL(avenrun);
-+
-+/**
-+ * get_avenrun - get the load average array
-+ * @loads:	pointer to dest load array
-+ * @offset:	offset to add
-+ * @shift:	shift count to shift the result left
-+ *
-+ * These values are estimates at best, so no need for locking.
-+ */
-+void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
-+{
-+	loads[0] = (avenrun[0] + offset) << shift;
-+	loads[1] = (avenrun[1] + offset) << shift;
-+	loads[2] = (avenrun[2] + offset) << shift;
-+}
-+
-+/*
-+ * calc_load - update the avenrun load estimates every LOAD_FREQ seconds.
-+ */
-+void calc_global_load(unsigned long ticks)
-+{
-+	long active;
-+
-+	if (time_before(jiffies, READ_ONCE(calc_load_update)))
-+		return;
-+	active = nr_active() * FIXED_1;
-+
-+	avenrun[0] = calc_load(avenrun[0], EXP_1, active);
-+	avenrun[1] = calc_load(avenrun[1], EXP_5, active);
-+	avenrun[2] = calc_load(avenrun[2], EXP_15, active);
-+
-+	calc_load_update = jiffies + LOAD_FREQ;
-+}
-+
-+/**
-+ * fixed_power_int - compute: x^n, in O(log n) time
-+ *
-+ * @x:         base of the power
-+ * @frac_bits: fractional bits of @x
-+ * @n:         power to raise @x to.
-+ *
-+ * By exploiting the relation between the definition of the natural power
-+ * function: x^n := x*x*...*x (x multiplied by itself for n times), and
-+ * the binary encoding of numbers used by computers: n := \Sum n_i * 2^i,
-+ * (where: n_i \elem {0, 1}, the binary vector representing n),
-+ * we find: x^n := x^(\Sum n_i * 2^i) := \Prod x^(n_i * 2^i), which is
-+ * of course trivially computable in O(log_2 n), the length of our binary
-+ * vector.
-+ */
-+static unsigned long
-+fixed_power_int(unsigned long x, unsigned int frac_bits, unsigned int n)
-+{
-+	unsigned long result = 1UL << frac_bits;
-+
-+	if (n) {
-+		for (;;) {
-+			if (n & 1) {
-+				result *= x;
-+				result += 1UL << (frac_bits - 1);
-+				result >>= frac_bits;
-+			}
-+			n >>= 1;
-+			if (!n)
-+				break;
-+			x *= x;
-+			x += 1UL << (frac_bits - 1);
-+			x >>= frac_bits;
-+		}
-+	}
-+
-+	return result;
-+}
-+
-+/*
-+ * a1 = a0 * e + a * (1 - e)
-+ *
-+ * a2 = a1 * e + a * (1 - e)
-+ *    = (a0 * e + a * (1 - e)) * e + a * (1 - e)
-+ *    = a0 * e^2 + a * (1 - e) * (1 + e)
-+ *
-+ * a3 = a2 * e + a * (1 - e)
-+ *    = (a0 * e^2 + a * (1 - e) * (1 + e)) * e + a * (1 - e)
-+ *    = a0 * e^3 + a * (1 - e) * (1 + e + e^2)
-+ *
-+ *  ...
-+ *
-+ * an = a0 * e^n + a * (1 - e) * (1 + e + ... + e^n-1) [1]
-+ *    = a0 * e^n + a * (1 - e) * (1 - e^n)/(1 - e)
-+ *    = a0 * e^n + a * (1 - e^n)
-+ *
-+ * [1] application of the geometric series:
-+ *
-+ *              n         1 - x^(n+1)
-+ *     S_n := \Sum x^i = -------------
-+ *             i=0          1 - x
-+ */
-+unsigned long
-+calc_load_n(unsigned long load, unsigned long exp,
-+	    unsigned long active, unsigned int n)
-+{
-+	return calc_load(load, fixed_power_int(exp, FSHIFT, n), active);
-+}
-+
-+DEFINE_PER_CPU(struct kernel_stat, kstat);
-+DEFINE_PER_CPU(struct kernel_cpustat, kernel_cpustat);
-+
-+EXPORT_PER_CPU_SYMBOL(kstat);
-+EXPORT_PER_CPU_SYMBOL(kernel_cpustat);
-+
-+#ifdef CONFIG_PARAVIRT
-+static inline u64 steal_ticks(u64 steal)
-+{
-+	if (unlikely(steal > NSEC_PER_SEC))
-+		return div_u64(steal, TICK_NSEC);
-+
-+	return __iter_div_u64_rem(steal, TICK_NSEC, &steal);
-+}
-+#endif
-+
-+#ifndef nsecs_to_cputime
-+# define nsecs_to_cputime(__nsecs)	nsecs_to_jiffies(__nsecs)
-+#endif
-+
-+/*
-+ * On each tick, add the number of nanoseconds to the unbanked variables and
-+ * once one tick's worth has accumulated, account it allowing for accurate
-+ * sub-tick accounting and totals. Use the TICK_APPROX_NS to match the way we
-+ * deduct nanoseconds.
-+ */
-+static void pc_idle_time(struct rq *rq, struct task_struct *idle, unsigned long ns)
-+{
-+	u64 *cpustat = kcpustat_this_cpu->cpustat;
-+	unsigned long ticks;
-+
-+	if (atomic_read(&rq->nr_iowait) > 0) {
-+		rq->iowait_ns += ns;
-+		if (rq->iowait_ns >= JIFFY_NS) {
-+			ticks = NS_TO_JIFFIES(rq->iowait_ns);
-+			cpustat[CPUTIME_IOWAIT] += (__force u64)TICK_APPROX_NS * ticks;
-+			rq->iowait_ns %= JIFFY_NS;
-+		}
-+	} else {
-+		rq->idle_ns += ns;
-+		if (rq->idle_ns >= JIFFY_NS) {
-+			ticks = NS_TO_JIFFIES(rq->idle_ns);
-+			cpustat[CPUTIME_IDLE] += (__force u64)TICK_APPROX_NS * ticks;
-+			rq->idle_ns %= JIFFY_NS;
-+		}
-+	}
-+	acct_update_integrals(idle);
-+}
-+
-+static void pc_system_time(struct rq *rq, struct task_struct *p,
-+			   int hardirq_offset, unsigned long ns)
-+{
-+	u64 *cpustat = kcpustat_this_cpu->cpustat;
-+	unsigned long ticks;
-+
-+	p->stime_ns += ns;
-+	if (p->stime_ns >= JIFFY_NS) {
-+		ticks = NS_TO_JIFFIES(p->stime_ns);
-+		p->stime_ns %= JIFFY_NS;
-+		p->stime += (__force u64)TICK_APPROX_NS * ticks;
-+		account_group_system_time(p, TICK_APPROX_NS * ticks);
-+	}
-+	p->sched_time += ns;
-+	account_group_exec_runtime(p, ns);
-+
-+	if (hardirq_count() - hardirq_offset) {
-+		rq->irq_ns += ns;
-+		if (rq->irq_ns >= JIFFY_NS) {
-+			ticks = NS_TO_JIFFIES(rq->irq_ns);
-+			cpustat[CPUTIME_IRQ] += (__force u64)TICK_APPROX_NS * ticks;
-+			rq->irq_ns %= JIFFY_NS;
-+		}
-+	} else if (in_serving_softirq()) {
-+		rq->softirq_ns += ns;
-+		if (rq->softirq_ns >= JIFFY_NS) {
-+			ticks = NS_TO_JIFFIES(rq->softirq_ns);
-+			cpustat[CPUTIME_SOFTIRQ] += (__force u64)TICK_APPROX_NS * ticks;
-+			rq->softirq_ns %= JIFFY_NS;
-+		}
-+	} else {
-+		rq->system_ns += ns;
-+		if (rq->system_ns >= JIFFY_NS) {
-+			ticks = NS_TO_JIFFIES(rq->system_ns);
-+			cpustat[CPUTIME_SYSTEM] += (__force u64)TICK_APPROX_NS * ticks;
-+			rq->system_ns %= JIFFY_NS;
-+		}
-+	}
-+	acct_update_integrals(p);
-+}
-+
-+static void pc_user_time(struct rq *rq, struct task_struct *p, unsigned long ns)
-+{
-+	u64 *cpustat = kcpustat_this_cpu->cpustat;
-+	unsigned long ticks;
-+
-+	p->utime_ns += ns;
-+	if (p->utime_ns >= JIFFY_NS) {
-+		ticks = NS_TO_JIFFIES(p->utime_ns);
-+		p->utime_ns %= JIFFY_NS;
-+		p->utime += (__force u64)TICK_APPROX_NS * ticks;
-+		account_group_user_time(p, TICK_APPROX_NS * ticks);
-+	}
-+	p->sched_time += ns;
-+	account_group_exec_runtime(p, ns);
-+
-+	if (this_cpu_ksoftirqd() == p) {
-+		/*
-+		 * ksoftirqd time do not get accounted in cpu_softirq_time.
-+		 * So, we have to handle it separately here.
-+		 */
-+		rq->softirq_ns += ns;
-+		if (rq->softirq_ns >= JIFFY_NS) {
-+			ticks = NS_TO_JIFFIES(rq->softirq_ns);
-+			cpustat[CPUTIME_SOFTIRQ] += (__force u64)TICK_APPROX_NS * ticks;
-+			rq->softirq_ns %= JIFFY_NS;
-+		}
-+	}
-+
-+	if (task_nice(p) > 0 || idleprio_task(p)) {
-+		rq->nice_ns += ns;
-+		if (rq->nice_ns >= JIFFY_NS) {
-+			ticks = NS_TO_JIFFIES(rq->nice_ns);
-+			cpustat[CPUTIME_NICE] += (__force u64)TICK_APPROX_NS * ticks;
-+			rq->nice_ns %= JIFFY_NS;
-+		}
-+	} else {
-+		rq->user_ns += ns;
-+		if (rq->user_ns >= JIFFY_NS) {
-+			ticks = NS_TO_JIFFIES(rq->user_ns);
-+			cpustat[CPUTIME_USER] += (__force u64)TICK_APPROX_NS * ticks;
-+			rq->user_ns %= JIFFY_NS;
-+		}
-+	}
-+	acct_update_integrals(p);
-+}
-+
-+/*
-+ * This is called on clock ticks.
-+ * Bank in p->sched_time the ns elapsed since the last tick or switch.
-+ * CPU scheduler quota accounting is also performed here in microseconds.
-+ */
-+static void update_cpu_clock_tick(struct rq *rq, struct task_struct *p)
-+{
-+	s64 account_ns = rq->niffies - p->last_ran;
-+	struct task_struct *idle = rq->idle;
-+
-+	/* Accurate tick timekeeping */
-+	if (user_mode(get_irq_regs()))
-+		pc_user_time(rq, p, account_ns);
-+	else if (p != idle || (irq_count() != HARDIRQ_OFFSET)) {
-+		pc_system_time(rq, p, HARDIRQ_OFFSET, account_ns);
-+	} else
-+		pc_idle_time(rq, idle, account_ns);
-+
-+	/* time_slice accounting is done in usecs to avoid overflow on 32bit */
-+	if (p->policy != SCHED_FIFO && p != idle)
-+		p->time_slice -= NS_TO_US(account_ns);
-+
-+	p->last_ran = rq->niffies;
-+}
-+
-+/*
-+ * This is called on context switches.
-+ * Bank in p->sched_time the ns elapsed since the last tick or switch.
-+ * CPU scheduler quota accounting is also performed here in microseconds.
-+ */
-+static void update_cpu_clock_switch(struct rq *rq, struct task_struct *p)
-+{
-+	s64 account_ns = rq->niffies - p->last_ran;
-+	struct task_struct *idle = rq->idle;
-+
-+	/* Accurate subtick timekeeping */
-+	if (p != idle)
-+		pc_user_time(rq, p, account_ns);
-+	else
-+		pc_idle_time(rq, idle, account_ns);
-+
-+	/* time_slice accounting is done in usecs to avoid overflow on 32bit */
-+	if (p->policy != SCHED_FIFO && p != idle)
-+		p->time_slice -= NS_TO_US(account_ns);
-+}
-+
-+/*
-+ * Return any ns on the sched_clock that have not yet been accounted in
-+ * @p in case that task is currently running.
-+ *
-+ * Called with task_rq_lock(p) held.
-+ */
-+static inline u64 do_task_delta_exec(struct task_struct *p, struct rq *rq)
-+{
-+	u64 ns = 0;
-+
-+	/*
-+	 * Must be ->curr _and_ ->on_rq.  If dequeued, we would
-+	 * project cycles that may never be accounted to this
-+	 * thread, breaking clock_gettime().
-+	 */
-+	if (p == rq->curr && task_on_rq_queued(p)) {
-+		update_clocks(rq);
-+		ns = rq->niffies - p->last_ran;
-+	}
-+
-+	return ns;
-+}
-+
-+/*
-+ * Return accounted runtime for the task.
-+ * Return separately the current's pending runtime that have not been
-+ * accounted yet.
-+ *
-+ */
-+unsigned long long task_sched_runtime(struct task_struct *p)
-+{
-+	struct rq_flags rf;
-+	struct rq *rq;
-+	u64 ns;
-+
-+#if defined(CONFIG_64BIT) && defined(CONFIG_SMP)
-+	/*
-+	 * 64-bit doesn't need locks to atomically read a 64-bit value.
-+	 * So we have a optimisation chance when the task's delta_exec is 0.
-+	 * Reading ->on_cpu is racy, but this is ok.
-+	 *
-+	 * If we race with it leaving CPU, we'll take a lock. So we're correct.
-+	 * If we race with it entering CPU, unaccounted time is 0. This is
-+	 * indistinguishable from the read occurring a few cycles earlier.
-+	 * If we see ->on_cpu without ->on_rq, the task is leaving, and has
-+	 * been accounted, so we're correct here as well.
-+	 */
-+	if (!p->on_cpu || !task_on_rq_queued(p))
-+		return tsk_seruntime(p);
-+#endif
-+
-+	rq = task_rq_lock(p, &rf);
-+	ns = p->sched_time + do_task_delta_exec(p, rq);
-+	task_rq_unlock(rq, p, &rf);
-+
-+	return ns;
-+}
-+
-+/*
-+ * Functions to test for when SCHED_ISO tasks have used their allocated
-+ * quota as real time scheduling and convert them back to SCHED_NORMAL. All
-+ * data is modified only by the local runqueue during scheduler_tick with
-+ * interrupts disabled.
-+ */
-+
-+/*
-+ * Test if SCHED_ISO tasks have run longer than their alloted period as RT
-+ * tasks and set the refractory flag if necessary. There is 10% hysteresis
-+ * for unsetting the flag. 115/128 is ~90/100 as a fast shift instead of a
-+ * slow division.
-+ */
-+static inline void iso_tick(struct rq *rq)
-+{
-+	rq->iso_ticks = rq->iso_ticks * (ISO_PERIOD - 1) / ISO_PERIOD;
-+	rq->iso_ticks += 100;
-+	if (rq->iso_ticks > ISO_PERIOD * sched_iso_cpu) {
-+		rq->iso_refractory = true;
-+		if (unlikely(rq->iso_ticks > ISO_PERIOD * 100))
-+			rq->iso_ticks = ISO_PERIOD * 100;
-+	}
-+}
-+
-+/* No SCHED_ISO task was running so decrease rq->iso_ticks */
-+static inline void no_iso_tick(struct rq *rq, int ticks)
-+{
-+	if (rq->iso_ticks > 0 || rq->iso_refractory) {
-+		rq->iso_ticks = rq->iso_ticks * (ISO_PERIOD - ticks) / ISO_PERIOD;
-+		if (rq->iso_ticks < ISO_PERIOD * (sched_iso_cpu * 115 / 128)) {
-+			rq->iso_refractory = false;
-+			if (unlikely(rq->iso_ticks < 0))
-+				rq->iso_ticks = 0;
-+		}
-+	}
-+}
-+
-+/* This manages tasks that have run out of timeslice during a scheduler_tick */
-+static void task_running_tick(struct rq *rq)
-+{
-+	struct task_struct *p = rq->curr;
-+
-+	/*
-+	 * If a SCHED_ISO task is running we increment the iso_ticks. In
-+	 * order to prevent SCHED_ISO tasks from causing starvation in the
-+	 * presence of true RT tasks we account those as iso_ticks as well.
-+	 */
-+	if (rt_task(p) || task_running_iso(p))
-+		iso_tick(rq);
-+	else
-+		no_iso_tick(rq, 1);
-+
-+	/* SCHED_FIFO tasks never run out of timeslice. */
-+	if (p->policy == SCHED_FIFO)
-+		return;
-+
-+	if (iso_task(p)) {
-+		if (task_running_iso(p)) {
-+			if (rq->iso_refractory) {
-+				/*
-+				 * SCHED_ISO task is running as RT and limit
-+				 * has been hit. Force it to reschedule as
-+				 * SCHED_NORMAL by zeroing its time_slice
-+				 */
-+				p->time_slice = 0;
-+			}
-+		} else if (!rq->iso_refractory) {
-+			/* Can now run again ISO. Reschedule to pick up prio */
-+			goto out_resched;
-+		}
-+	}
-+
-+	/*
-+	 * Tasks that were scheduled in the first half of a tick are not
-+	 * allowed to run into the 2nd half of the next tick if they will
-+	 * run out of time slice in the interim. Otherwise, if they have
-+	 * less than RESCHED_US μs of time slice left they will be rescheduled.
-+	 * Dither is used as a backup for when hrexpiry is disabled or high res
-+	 * timers not configured in.
-+	 */
-+	if (p->time_slice - rq->dither >= RESCHED_US)
-+		return;
-+out_resched:
-+	rq_lock(rq);
-+	__set_tsk_resched(p);
-+	rq_unlock(rq);
-+}
-+
-+static inline void task_tick(struct rq *rq)
-+{
-+	if (!rq_idle(rq))
-+		task_running_tick(rq);
-+	else if (rq->last_jiffy > rq->last_scheduler_tick)
-+		no_iso_tick(rq, rq->last_jiffy - rq->last_scheduler_tick);
-+}
-+
-+#ifdef CONFIG_NO_HZ_FULL
-+/*
-+ * We can stop the timer tick any time highres timers are active since
-+ * we rely entirely on highres timeouts for task expiry rescheduling.
-+ */
-+static void sched_stop_tick(struct rq *rq, int cpu)
-+{
-+	if (!hrexpiry_enabled(rq))
-+		return;
-+	if (!tick_nohz_full_enabled())
-+		return;
-+	if (!tick_nohz_full_cpu(cpu))
-+		return;
-+	tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED);
-+}
-+
-+static inline void sched_start_tick(struct rq *rq, int cpu)
-+{
-+	tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
-+}
-+
-+struct tick_work {
-+	int			cpu;
-+	atomic_t		state;
-+	struct delayed_work	work;
-+};
-+/* Values for ->state, see diagram below. */
-+#define TICK_SCHED_REMOTE_OFFLINE	0
-+#define TICK_SCHED_REMOTE_OFFLINING	1
-+#define TICK_SCHED_REMOTE_RUNNING	2
-+
-+/*
-+ * State diagram for ->state:
-+ *
-+ *
-+ *          TICK_SCHED_REMOTE_OFFLINE
-+ *                    |   ^
-+ *                    |   |
-+ *                    |   | sched_tick_remote()
-+ *                    |   |
-+ *                    |   |
-+ *                    +--TICK_SCHED_REMOTE_OFFLINING
-+ *                    |   ^
-+ *                    |   |
-+ * sched_tick_start() |   | sched_tick_stop()
-+ *                    |   |
-+ *                    V   |
-+ *          TICK_SCHED_REMOTE_RUNNING
-+ *
-+ *
-+ * Other transitions get WARN_ON_ONCE(), except that sched_tick_remote()
-+ * and sched_tick_start() are happy to leave the state in RUNNING.
-+ */
-+
-+static struct tick_work __percpu *tick_work_cpu;
-+
-+static void sched_tick_remote(struct work_struct *work)
-+{
-+	struct delayed_work *dwork = to_delayed_work(work);
-+	struct tick_work *twork = container_of(dwork, struct tick_work, work);
-+	int cpu = twork->cpu;
-+	struct rq *rq = cpu_rq(cpu);
-+	struct task_struct *curr;
-+	u64 delta;
-+	int os;
-+
-+	/*
-+	 * Handle the tick only if it appears the remote CPU is running in full
-+	 * dynticks mode. The check is racy by nature, but missing a tick or
-+	 * having one too much is no big deal because the scheduler tick updates
-+	 * statistics and checks timeslices in a time-independent way, regardless
-+	 * of when exactly it is running.
-+	 */
-+	if (idle_cpu(cpu) || !tick_nohz_tick_stopped_cpu(cpu))
-+		goto out_requeue;
-+
-+	rq_lock_irq(rq);
-+	curr = rq->curr;
-+	if (is_idle_task(curr) || cpu_is_offline(cpu))
-+		goto out_unlock;
-+
-+	update_rq_clock(rq);
-+	delta = rq_clock_task(rq) - curr->last_ran;
-+
-+	/*
-+	 * Make sure the next tick runs within a reasonable
-+	 * amount of time.
-+	 */
-+	WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
-+	task_tick(rq);
-+
-+out_unlock:
-+	rq_unlock_irq(rq, NULL);
-+
-+out_requeue:
-+	/*
-+	 * Run the remote tick once per second (1Hz). This arbitrary
-+	 * frequency is large enough to avoid overload but short enough
-+	 * to keep scheduler internal stats reasonably up to date.  But
-+	 * first update state to reflect hotplug activity if required.
-+	 */
-+	os = atomic_fetch_add_unless(&twork->state, -1, TICK_SCHED_REMOTE_RUNNING);
-+	WARN_ON_ONCE(os == TICK_SCHED_REMOTE_OFFLINE);
-+	if (os == TICK_SCHED_REMOTE_RUNNING)
-+		queue_delayed_work(system_unbound_wq, dwork, HZ);
-+}
-+
-+static void sched_tick_start(int cpu)
-+{
-+	struct tick_work *twork;
-+	int os;
-+
-+	if (housekeeping_cpu(cpu, HK_FLAG_TICK))
-+		return;
-+
-+	WARN_ON_ONCE(!tick_work_cpu);
-+
-+	twork = per_cpu_ptr(tick_work_cpu, cpu);
-+	os = atomic_xchg(&twork->state, TICK_SCHED_REMOTE_RUNNING);
-+	WARN_ON_ONCE(os == TICK_SCHED_REMOTE_RUNNING);
-+	if (os == TICK_SCHED_REMOTE_OFFLINE) {
-+		twork->cpu = cpu;
-+		INIT_DELAYED_WORK(&twork->work, sched_tick_remote);
-+		queue_delayed_work(system_unbound_wq, &twork->work, HZ);
-+	}
-+}
-+
-+#ifdef CONFIG_HOTPLUG_CPU
-+static void sched_tick_stop(int cpu)
-+{
-+	struct tick_work *twork;
-+	int os;
-+
-+	if (housekeeping_cpu(cpu, HK_FLAG_TICK))
-+		return;
-+
-+	WARN_ON_ONCE(!tick_work_cpu);
-+
-+	twork = per_cpu_ptr(tick_work_cpu, cpu);
-+	/* There cannot be competing actions, but don't rely on stop-machine. */
-+	os = atomic_xchg(&twork->state, TICK_SCHED_REMOTE_OFFLINING);
-+	WARN_ON_ONCE(os != TICK_SCHED_REMOTE_RUNNING);
-+	/* Don't cancel, as this would mess up the state machine. */
-+}
-+#endif /* CONFIG_HOTPLUG_CPU */
-+
-+int __init sched_tick_offload_init(void)
-+{
-+	tick_work_cpu = alloc_percpu(struct tick_work);
-+	BUG_ON(!tick_work_cpu);
-+	return 0;
-+}
-+
-+#else /* !CONFIG_NO_HZ_FULL */
-+static inline void sched_stop_tick(struct rq *rq, int cpu) {}
-+static inline void sched_start_tick(struct rq *rq, int cpu) {}
-+static inline void sched_tick_start(int cpu) { }
-+static inline void sched_tick_stop(int cpu) { }
-+#endif
-+
-+/*
-+ * This function gets called by the timer code, with HZ frequency.
-+ * We call it with interrupts disabled.
-+ */
-+void scheduler_tick(void)
-+{
-+	int cpu __maybe_unused = smp_processor_id();
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	sched_clock_tick();
-+	update_clocks(rq);
-+	update_load_avg(rq, 0);
-+	update_cpu_clock_tick(rq, rq->curr);
-+	task_tick(rq);
-+	rq->last_scheduler_tick = rq->last_jiffy;
-+	rq->last_tick = rq->clock;
-+	psi_task_tick(rq);
-+	perf_event_task_tick();
-+	sched_stop_tick(rq, cpu);
-+}
-+
-+#if defined(CONFIG_PREEMPTION) && (defined(CONFIG_DEBUG_PREEMPT) || \
-+				defined(CONFIG_TRACE_PREEMPT_TOGGLE))
-+/*
-+ * If the value passed in is equal to the current preempt count
-+ * then we just disabled preemption. Start timing the latency.
-+ */
-+static inline void preempt_latency_start(int val)
-+{
-+	if (preempt_count() == val) {
-+		unsigned long ip = get_lock_parent_ip();
-+#ifdef CONFIG_DEBUG_PREEMPT
-+		current->preempt_disable_ip = ip;
-+#endif
-+		trace_preempt_off(CALLER_ADDR0, ip);
-+	}
-+}
-+
-+void preempt_count_add(int val)
-+{
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	/*
-+	 * Underflow?
-+	 */
-+	if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
-+		return;
-+#endif
-+	__preempt_count_add(val);
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	/*
-+	 * Spinlock count overflowing soon?
-+	 */
-+	DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
-+				PREEMPT_MASK - 10);
-+#endif
-+	preempt_latency_start(val);
-+}
-+EXPORT_SYMBOL(preempt_count_add);
-+NOKPROBE_SYMBOL(preempt_count_add);
-+
-+/*
-+ * If the value passed in equals to the current preempt count
-+ * then we just enabled preemption. Stop timing the latency.
-+ */
-+static inline void preempt_latency_stop(int val)
-+{
-+	if (preempt_count() == val)
-+		trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
-+}
-+
-+void preempt_count_sub(int val)
-+{
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	/*
-+	 * Underflow?
-+	 */
-+	if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
-+		return;
-+	/*
-+	 * Is the spinlock portion underflowing?
-+	 */
-+	if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) &&
-+			!(preempt_count() & PREEMPT_MASK)))
-+		return;
-+#endif
-+
-+	preempt_latency_stop(val);
-+	__preempt_count_sub(val);
-+}
-+EXPORT_SYMBOL(preempt_count_sub);
-+NOKPROBE_SYMBOL(preempt_count_sub);
-+
-+#else
-+static inline void preempt_latency_start(int val) { }
-+static inline void preempt_latency_stop(int val) { }
-+#endif
-+
-+static inline unsigned long get_preempt_disable_ip(struct task_struct *p)
-+{
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	return p->preempt_disable_ip;
-+#else
-+	return 0;
-+#endif
-+}
-+
-+/*
-+ * The time_slice is only refilled when it is empty and that is when we set a
-+ * new deadline. Make sure update_clocks has been called recently to update
-+ * rq->niffies.
-+ */
-+static void time_slice_expired(struct task_struct *p, struct rq *rq)
-+{
-+	p->time_slice = timeslice();
-+	p->deadline = rq->niffies + task_deadline_diff(p);
-+#ifdef CONFIG_SMT_NICE
-+	if (!p->mm)
-+		p->smt_bias = 0;
-+	else if (rt_task(p))
-+		p->smt_bias = 1 << 30;
-+	else if (task_running_iso(p))
-+		p->smt_bias = 1 << 29;
-+	else if (idleprio_task(p)) {
-+		if (task_running_idle(p))
-+			p->smt_bias = 0;
-+		else
-+			p->smt_bias = 1;
-+	} else if (--p->smt_bias < 1)
-+		p->smt_bias = MAX_PRIO - p->static_prio;
-+#endif
-+}
-+
-+/*
-+ * Timeslices below RESCHED_US are considered as good as expired as there's no
-+ * point rescheduling when there's so little time left. SCHED_BATCH tasks
-+ * have been flagged be not latency sensitive and likely to be fully CPU
-+ * bound so every time they're rescheduled they have their time_slice
-+ * refilled, but get a new later deadline to have little effect on
-+ * SCHED_NORMAL tasks.
-+
-+ */
-+static inline void check_deadline(struct task_struct *p, struct rq *rq)
-+{
-+	if (p->time_slice < RESCHED_US || batch_task(p))
-+		time_slice_expired(p, rq);
-+}
-+
-+/*
-+ * Task selection with skiplists is a simple matter of picking off the first
-+ * task in the sorted list, an O(1) operation. The lookup is amortised O(1)
-+ * being bound to the number of processors.
-+ *
-+ * Runqueues are selectively locked based on their unlocked data and then
-+ * unlocked if not needed. At most 3 locks will be held at any time and are
-+ * released as soon as they're no longer needed. All balancing between CPUs
-+ * is thus done here in an extremely simple first come best fit manner.
-+ *
-+ * This iterates over runqueues in cache locality order. In interactive mode
-+ * it iterates over all CPUs and finds the task with the best key/deadline.
-+ * In non-interactive mode it will only take a task if it's from the current
-+ * runqueue or a runqueue with more tasks than the current one with a better
-+ * key/deadline.
-+ */
-+#ifdef CONFIG_SMP
-+static inline struct task_struct
-+*earliest_deadline_task(struct rq *rq, int cpu, struct task_struct *idle)
-+{
-+	struct rq *locked = NULL, *chosen = NULL;
-+	struct task_struct *edt = idle;
-+	int i, best_entries = 0;
-+	u64 best_key = ~0ULL;
-+
-+	for (i = 0; i < total_runqueues; i++) {
-+		struct rq *other_rq = rq_order(rq, i);
-+		skiplist_node *next;
-+		int entries;
-+
-+		entries = other_rq->sl->entries;
-+		/*
-+		 * Check for queued entres lockless first. The local runqueue
-+		 * is locked so entries will always be accurate.
-+		 */
-+		if (!sched_interactive) {
-+			/*
-+			 * Don't reschedule balance across nodes unless the CPU
-+			 * is idle.
-+			 */
-+			if (edt != idle && rq->cpu_locality[other_rq->cpu] > LOCALITY_SMP)
-+				break;
-+			if (entries <= best_entries)
-+				continue;
-+		} else if (!entries)
-+			continue;
-+
-+		/* if (i) implies other_rq != rq */
-+		if (i) {
-+			/* Check for best id queued lockless first */
-+			if (other_rq->best_key >= best_key)
-+				continue;
-+
-+			if (unlikely(!trylock_rq(rq, other_rq)))
-+				continue;
-+
-+			/* Need to reevaluate entries after locking */
-+			entries = other_rq->sl->entries;
-+			if (unlikely(!entries)) {
-+				unlock_rq(other_rq);
-+				continue;
-+			}
-+		}
-+
-+		next = other_rq->node;
-+		/*
-+		 * In interactive mode we check beyond the best entry on other
-+		 * runqueues if we can't get the best for smt or affinity
-+		 * reasons.
-+		 */
-+		while ((next = next->next[0]) != other_rq->node) {
-+			struct task_struct *p;
-+			u64 key = next->key;
-+
-+			/* Reevaluate key after locking */
-+			if (key >= best_key)
-+				break;
-+
-+			p = next->value;
-+			if (!smt_schedule(p, rq)) {
-+				if (i && !sched_interactive)
-+					break;
-+				continue;
-+			}
-+
-+			if (sched_other_cpu(p, cpu)) {
-+				if (sched_interactive || !i)
-+					continue;
-+				break;
-+			}
-+			/* Make sure affinity is ok */
-+			if (i) {
-+				/* From this point on p is the best so far */
-+				if (locked)
-+					unlock_rq(locked);
-+				chosen = locked = other_rq;
-+			}
-+			best_entries = entries;
-+			best_key = key;
-+			edt = p;
-+			break;
-+		}
-+		/* rq->preempting is a hint only as the state may have changed
-+		 * since it was set with the resched call but if we have met
-+		 * the condition we can break out here. */
-+		if (edt == rq->preempting)
-+			break;
-+		if (i && other_rq != chosen)
-+			unlock_rq(other_rq);
-+	}
-+
-+	if (likely(edt != idle))
-+		take_task(rq, cpu, edt);
-+
-+	if (locked)
-+		unlock_rq(locked);
-+
-+	rq->preempting = NULL;
-+
-+	return edt;
-+}
-+#else /* CONFIG_SMP */
-+static inline struct task_struct
-+*earliest_deadline_task(struct rq *rq, int cpu, struct task_struct *idle)
-+{
-+	struct task_struct *edt;
-+
-+	if (unlikely(!rq->sl->entries))
-+		return idle;
-+	edt = rq->node->next[0]->value;
-+	take_task(rq, cpu, edt);
-+	return edt;
-+}
-+#endif /* CONFIG_SMP */
-+
-+/*
-+ * Print scheduling while atomic bug:
-+ */
-+static noinline void __schedule_bug(struct task_struct *prev)
-+{
-+	/* Save this before calling printk(), since that will clobber it */
-+	unsigned long preempt_disable_ip = get_preempt_disable_ip(current);
-+
-+	if (oops_in_progress)
-+		return;
-+
-+	printk(KERN_ERR "BUG: scheduling while atomic: %s/%d/0x%08x\n",
-+		prev->comm, prev->pid, preempt_count());
-+
-+	debug_show_held_locks(prev);
-+	print_modules();
-+	if (irqs_disabled())
-+		print_irqtrace_events(prev);
-+	if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)
-+	    && in_atomic_preempt_off()) {
-+		pr_err("Preemption disabled at:");
-+		print_ip_sym(preempt_disable_ip);
-+		pr_cont("\n");
-+	}
-+	dump_stack();
-+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+}
-+
-+/*
-+ * Various schedule()-time debugging checks and statistics:
-+ */
-+static inline void schedule_debug(struct task_struct *prev, bool preempt)
-+{
-+#ifdef CONFIG_SCHED_STACK_END_CHECK
-+	if (task_stack_end_corrupted(prev))
-+		panic("corrupted stack end detected inside scheduler\n");
-+#endif
-+
-+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-+	if (!preempt && prev->state && prev->non_block_count) {
-+		printk(KERN_ERR "BUG: scheduling in a non-blocking section: %s/%d/%i\n",
-+			prev->comm, prev->pid, prev->non_block_count);
-+		dump_stack();
-+		add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+	}
-+#endif
-+
-+	if (unlikely(in_atomic_preempt_off())) {
-+		__schedule_bug(prev);
-+		preempt_count_set(PREEMPT_DISABLED);
-+	}
-+	rcu_sleep_check();
-+
-+	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
-+
-+	schedstat_inc(this_rq()->sched_count);
-+}
-+
-+/*
-+ * The currently running task's information is all stored in rq local data
-+ * which is only modified by the local CPU.
-+ */
-+static inline void set_rq_task(struct rq *rq, struct task_struct *p)
-+{
-+	if (p == rq->idle || p->policy == SCHED_FIFO)
-+		hrexpiry_clear(rq);
-+	else
-+		hrexpiry_start(rq, US_TO_NS(p->time_slice));
-+	if (rq->clock - rq->last_tick > HALF_JIFFY_NS)
-+		rq->dither = 0;
-+	else
-+		rq->dither = rq_dither(rq);
-+
-+	rq->rq_deadline = p->deadline;
-+	rq->rq_prio = p->prio;
-+#ifdef CONFIG_SMT_NICE
-+	rq->rq_mm = p->mm;
-+	rq->rq_smt_bias = p->smt_bias;
-+#endif
-+}
-+
-+#ifdef CONFIG_SMT_NICE
-+static void check_no_siblings(struct rq __maybe_unused *this_rq) {}
-+static void wake_no_siblings(struct rq __maybe_unused *this_rq) {}
-+static void (*check_siblings)(struct rq *this_rq) = &check_no_siblings;
-+static void (*wake_siblings)(struct rq *this_rq) = &wake_no_siblings;
-+
-+/* Iterate over smt siblings when we've scheduled a process on cpu and decide
-+ * whether they should continue running or be descheduled. */
-+static void check_smt_siblings(struct rq *this_rq)
-+{
-+	int other_cpu;
-+
-+	for_each_cpu(other_cpu, &this_rq->thread_mask) {
-+		struct task_struct *p;
-+		struct rq *rq;
-+
-+		rq = cpu_rq(other_cpu);
-+		if (rq_idle(rq))
-+			continue;
-+		p = rq->curr;
-+		if (!smt_schedule(p, this_rq))
-+			resched_curr(rq);
-+	}
-+}
-+
-+static void wake_smt_siblings(struct rq *this_rq)
-+{
-+	int other_cpu;
-+
-+	for_each_cpu(other_cpu, &this_rq->thread_mask) {
-+		struct rq *rq;
-+
-+		rq = cpu_rq(other_cpu);
-+		if (rq_idle(rq))
-+			resched_idle(rq);
-+	}
-+}
-+#else
-+static void check_siblings(struct rq __maybe_unused *this_rq) {}
-+static void wake_siblings(struct rq __maybe_unused *this_rq) {}
-+#endif
-+
-+/*
-+ * schedule() is the main scheduler function.
-+ *
-+ * The main means of driving the scheduler and thus entering this function are:
-+ *
-+ *   1. Explicit blocking: mutex, semaphore, waitqueue, etc.
-+ *
-+ *   2. TIF_NEED_RESCHED flag is checked on interrupt and userspace return
-+ *      paths. For example, see arch/x86/entry_64.S.
-+ *
-+ *      To drive preemption between tasks, the scheduler sets the flag in timer
-+ *      interrupt handler scheduler_tick().
-+ *
-+ *   3. Wakeups don't really cause entry into schedule(). They add a
-+ *      task to the run-queue and that's it.
-+ *
-+ *      Now, if the new task added to the run-queue preempts the current
-+ *      task, then the wakeup sets TIF_NEED_RESCHED and schedule() gets
-+ *      called on the nearest possible occasion:
-+ *
-+ *       - If the kernel is preemptible (CONFIG_PREEMPTION=y):
-+ *
-+ *         - in syscall or exception context, at the next outmost
-+ *           preempt_enable(). (this might be as soon as the wake_up()'s
-+ *           spin_unlock()!)
-+ *
-+ *         - in IRQ context, return from interrupt-handler to
-+ *           preemptible context
-+ *
-+ *       - If the kernel is not preemptible (CONFIG_PREEMPTION is not set)
-+ *         then at the next:
-+ *
-+ *          - cond_resched() call
-+ *          - explicit schedule() call
-+ *          - return from syscall or exception to user-space
-+ *          - return from interrupt-handler to user-space
-+ *
-+ * WARNING: must be called with preemption disabled!
-+ */
-+static void __sched notrace __schedule(bool preempt)
-+{
-+	struct task_struct *prev, *next, *idle;
-+	unsigned long *switch_count;
-+	bool deactivate = false;
-+	struct rq *rq;
-+	u64 niffies;
-+	int cpu;
-+
-+	cpu = smp_processor_id();
-+	rq = cpu_rq(cpu);
-+	prev = rq->curr;
-+	idle = rq->idle;
-+
-+	schedule_debug(prev, preempt);
-+
-+	local_irq_disable();
-+	rcu_note_context_switch(preempt);
-+
-+	/*
-+	 * Make sure that signal_pending_state()->signal_pending() below
-+	 * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
-+	 * done by the caller to avoid the race with signal_wake_up().
-+	 *
-+	 * The membarrier system call requires a full memory barrier
-+	 * after coming from user-space, before storing to rq->curr.
-+	 */
-+	rq_lock(rq);
-+	smp_mb__after_spinlock();
-+#ifdef CONFIG_SMP
-+	if (rq->preempt) {
-+		/*
-+		 * Make sure resched_curr hasn't triggered a preemption
-+		 * locklessly on a task that has since scheduled away. Spurious
-+		 * wakeup of idle is okay though.
-+		 */
-+		if (unlikely(preempt && prev != idle && !test_tsk_need_resched(prev))) {
-+			rq->preempt = NULL;
-+			clear_preempt_need_resched();
-+			rq_unlock_irq(rq, NULL);
-+			return;
-+		}
-+		rq->preempt = NULL;
-+	}
-+#endif
-+
-+	switch_count = &prev->nivcsw;
-+	if (!preempt && prev->state) {
-+		if (signal_pending_state(prev->state, prev)) {
-+			prev->state = TASK_RUNNING;
-+		} else {
-+			deactivate = true;
-+
-+			if (prev->in_iowait) {
-+				atomic_inc(&rq->nr_iowait);
-+				delayacct_blkio_start();
-+			}
-+		}
-+		switch_count = &prev->nvcsw;
-+	}
-+
-+	/*
-+	 * Store the niffy value here for use by the next task's last_ran
-+	 * below to avoid losing niffies due to update_clocks being called
-+	 * again after this point.
-+	 */
-+	update_clocks(rq);
-+	niffies = rq->niffies;
-+	update_cpu_clock_switch(rq, prev);
-+
-+	clear_tsk_need_resched(prev);
-+	clear_preempt_need_resched();
-+
-+	if (idle != prev) {
-+		check_deadline(prev, rq);
-+		return_task(prev, rq, cpu, deactivate);
-+	}
-+
-+	next = earliest_deadline_task(rq, cpu, idle);
-+	if (likely(next->prio != PRIO_LIMIT))
-+		clear_cpuidle_map(cpu);
-+	else {
-+		set_cpuidle_map(cpu);
-+		update_load_avg(rq, 0);
-+	}
-+
-+	set_rq_task(rq, next);
-+	next->last_ran = niffies;
-+
-+	if (likely(prev != next)) {
-+		/*
-+		 * Don't reschedule an idle task or deactivated tasks
-+		 */
-+		if (prev == idle) {
-+			rq->nr_running++;
-+			if (rt_task(next))
-+				rq->rt_nr_running++;
-+		} else if (!deactivate)
-+			resched_suitable_idle(prev);
-+		if (unlikely(next == idle)) {
-+			rq->nr_running--;
-+			if (rt_task(prev))
-+				rq->rt_nr_running--;
-+			wake_siblings(rq);
-+		} else
-+			check_siblings(rq);
-+		rq->nr_switches++;
-+		/*
-+		 * RCU users of rcu_dereference(rq->curr) may not see
-+		 * changes to task_struct made by pick_next_task().
-+		 */
-+		RCU_INIT_POINTER(rq->curr, next);
-+		/*
-+		 * The membarrier system call requires each architecture
-+		 * to have a full memory barrier after updating
-+		 * rq->curr, before returning to user-space.
-+		 *
-+		 * Here are the schemes providing that barrier on the
-+		 * various architectures:
-+		 * - mm ? switch_mm() : mmdrop() for x86, s390, sparc, PowerPC.
-+		 *   switch_mm() rely on membarrier_arch_switch_mm() on PowerPC.
-+		 * - finish_lock_switch() for weakly-ordered
-+		 *   architectures where spin_unlock is a full barrier,
-+		 * - switch_to() for arm64 (weakly-ordered, spin_unlock
-+		 *   is a RELEASE barrier),
-+		 */
-+		++*switch_count;
-+
-+		trace_sched_switch(preempt, prev, next);
-+		context_switch(rq, prev, next); /* unlocks the rq */
-+	} else {
-+		check_siblings(rq);
-+		rq_unlock(rq);
-+		do_pending_softirq(rq, next);
-+		local_irq_enable();
-+	}
-+}
-+
-+void __noreturn do_task_dead(void)
-+{
-+	/* Causes final put_task_struct in finish_task_switch(). */
-+	set_special_state(TASK_DEAD);
-+
-+	/* Tell freezer to ignore us: */
-+	current->flags |= PF_NOFREEZE;
-+	__schedule(false);
-+	BUG();
-+
-+	/* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */
-+	for (;;)
-+		cpu_relax();
-+}
-+
-+static inline void sched_submit_work(struct task_struct *tsk)
-+{
-+	if (!tsk->state)
-+		return;
-+
-+	/*
-+	 * If a worker went to sleep, notify and ask workqueue whether
-+	 * it wants to wake up a task to maintain concurrency.
-+	 * As this function is called inside the schedule() context,
-+	 * we disable preemption to avoid it calling schedule() again
-+	 * in the possible wakeup of a kworker.
-+	 */
-+	if (tsk->flags & PF_WQ_WORKER) {
-+		preempt_disable();
-+		wq_worker_sleeping(tsk);
-+		preempt_enable_no_resched();
-+	}
-+
-+	if (tsk_is_pi_blocked(tsk))
-+		return;
-+
-+	/*
-+	 * If we are going to sleep and we have plugged IO queued,
-+	 * make sure to submit it to avoid deadlocks.
-+	 */
-+	if (blk_needs_flush_plug(tsk))
-+		blk_schedule_flush_plug(tsk);
-+}
-+
-+static inline void sched_update_worker(struct task_struct *tsk)
-+{
-+	if (tsk->flags & PF_WQ_WORKER)
-+		wq_worker_running(tsk);
-+}
-+
-+asmlinkage __visible void __sched schedule(void)
-+{
-+	struct task_struct *tsk = current;
-+
-+	sched_submit_work(tsk);
-+	do {
-+		preempt_disable();
-+		__schedule(false);
-+		sched_preempt_enable_no_resched();
-+	} while (need_resched());
-+	sched_update_worker(tsk);
-+}
-+
-+EXPORT_SYMBOL(schedule);
-+
-+/*
-+ * synchronize_rcu_tasks() makes sure that no task is stuck in preempted
-+ * state (have scheduled out non-voluntarily) by making sure that all
-+ * tasks have either left the run queue or have gone into user space.
-+ * As idle tasks do not do either, they must not ever be preempted
-+ * (schedule out non-voluntarily).
-+ *
-+ * schedule_idle() is similar to schedule_preempt_disable() except that it
-+ * never enables preemption because it does not call sched_submit_work().
-+ */
-+void __sched schedule_idle(void)
-+{
-+	/*
-+	 * As this skips calling sched_submit_work(), which the idle task does
-+	 * regardless because that function is a nop when the task is in a
-+	 * TASK_RUNNING state, make sure this isn't used someplace that the
-+	 * current task can be in any other state. Note, idle is always in the
-+	 * TASK_RUNNING state.
-+	 */
-+	WARN_ON_ONCE(current->state);
-+	do {
-+		__schedule(false);
-+	} while (need_resched());
-+}
-+
-+#ifdef CONFIG_CONTEXT_TRACKING
-+asmlinkage __visible void __sched schedule_user(void)
-+{
-+	/*
-+	 * If we come here after a random call to set_need_resched(),
-+	 * or we have been woken up remotely but the IPI has not yet arrived,
-+	 * we haven't yet exited the RCU idle mode. Do it here manually until
-+	 * we find a better solution.
-+	 *
-+	 * NB: There are buggy callers of this function.  Ideally we
-+	 * should warn if prev_state != IN_USER, but that will trigger
-+	 * too frequently to make sense yet.
-+	 */
-+	enum ctx_state prev_state = exception_enter();
-+	schedule();
-+	exception_exit(prev_state);
-+}
-+#endif
-+
-+/**
-+ * schedule_preempt_disabled - called with preemption disabled
-+ *
-+ * Returns with preemption disabled. Note: preempt_count must be 1
-+ */
-+void __sched schedule_preempt_disabled(void)
-+{
-+	sched_preempt_enable_no_resched();
-+	schedule();
-+	preempt_disable();
-+}
-+
-+static void __sched notrace preempt_schedule_common(void)
-+{
-+	do {
-+		/*
-+		 * Because the function tracer can trace preempt_count_sub()
-+		 * and it also uses preempt_enable/disable_notrace(), if
-+		 * NEED_RESCHED is set, the preempt_enable_notrace() called
-+		 * by the function tracer will call this function again and
-+		 * cause infinite recursion.
-+		 *
-+		 * Preemption must be disabled here before the function
-+		 * tracer can trace. Break up preempt_disable() into two
-+		 * calls. One to disable preemption without fear of being
-+		 * traced. The other to still record the preemption latency,
-+		 * which can also be traced by the function tracer.
-+		 */
-+		preempt_disable_notrace();
-+		preempt_latency_start(1);
-+		__schedule(true);
-+		preempt_latency_stop(1);
-+		preempt_enable_no_resched_notrace();
-+
-+		/*
-+		 * Check again in case we missed a preemption opportunity
-+		 * between schedule and now.
-+		 */
-+	} while (need_resched());
-+}
-+
-+#ifdef CONFIG_PREEMPTION
-+/*
-+ * This is the entry point to schedule() from in-kernel preemption
-+ * off of preempt_enable.
-+ */
-+asmlinkage __visible void __sched notrace preempt_schedule(void)
-+{
-+	/*
-+	 * If there is a non-zero preempt_count or interrupts are disabled,
-+	 * we do not want to preempt the current task. Just return..
-+	 */
-+	if (likely(!preemptible()))
-+		return;
-+
-+	preempt_schedule_common();
-+}
-+NOKPROBE_SYMBOL(preempt_schedule);
-+EXPORT_SYMBOL(preempt_schedule);
-+
-+/**
-+ * preempt_schedule_notrace - preempt_schedule called by tracing
-+ *
-+ * The tracing infrastructure uses preempt_enable_notrace to prevent
-+ * recursion and tracing preempt enabling caused by the tracing
-+ * infrastructure itself. But as tracing can happen in areas coming
-+ * from userspace or just about to enter userspace, a preempt enable
-+ * can occur before user_exit() is called. This will cause the scheduler
-+ * to be called when the system is still in usermode.
-+ *
-+ * To prevent this, the preempt_enable_notrace will use this function
-+ * instead of preempt_schedule() to exit user context if needed before
-+ * calling the scheduler.
-+ */
-+asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
-+{
-+	enum ctx_state prev_ctx;
-+
-+	if (likely(!preemptible()))
-+		return;
-+
-+	do {
-+		/*
-+		 * Because the function tracer can trace preempt_count_sub()
-+		 * and it also uses preempt_enable/disable_notrace(), if
-+		 * NEED_RESCHED is set, the preempt_enable_notrace() called
-+		 * by the function tracer will call this function again and
-+		 * cause infinite recursion.
-+		 *
-+		 * Preemption must be disabled here before the function
-+		 * tracer can trace. Break up preempt_disable() into two
-+		 * calls. One to disable preemption without fear of being
-+		 * traced. The other to still record the preemption latency,
-+		 * which can also be traced by the function tracer.
-+		 */
-+		preempt_disable_notrace();
-+		preempt_latency_start(1);
-+		/*
-+		 * Needs preempt disabled in case user_exit() is traced
-+		 * and the tracer calls preempt_enable_notrace() causing
-+		 * an infinite recursion.
-+		 */
-+		prev_ctx = exception_enter();
-+		__schedule(true);
-+		exception_exit(prev_ctx);
-+
-+		preempt_latency_stop(1);
-+		preempt_enable_no_resched_notrace();
-+	} while (need_resched());
-+}
-+EXPORT_SYMBOL_GPL(preempt_schedule_notrace);
-+
-+#endif /* CONFIG_PREEMPTION */
-+
-+/*
-+ * This is the entry point to schedule() from kernel preemption
-+ * off of irq context.
-+ * Note, that this is called and return with irqs disabled. This will
-+ * protect us against recursive calling from irq.
-+ */
-+asmlinkage __visible void __sched preempt_schedule_irq(void)
-+{
-+	enum ctx_state prev_state;
-+
-+	/* Catch callers which need to be fixed */
-+	BUG_ON(preempt_count() || !irqs_disabled());
-+
-+	prev_state = exception_enter();
-+
-+	do {
-+		preempt_disable();
-+		local_irq_enable();
-+		__schedule(true);
-+		local_irq_disable();
-+		sched_preempt_enable_no_resched();
-+	} while (need_resched());
-+
-+	exception_exit(prev_state);
-+}
-+
-+int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags,
-+			  void *key)
-+{
-+	return try_to_wake_up(curr->private, mode, wake_flags);
-+}
-+EXPORT_SYMBOL(default_wake_function);
-+
-+#ifdef CONFIG_RT_MUTEXES
-+
-+static inline int __rt_effective_prio(struct task_struct *pi_task, int prio)
-+{
-+	if (pi_task)
-+		prio = min(prio, pi_task->prio);
-+
-+	return prio;
-+}
-+
-+static inline int rt_effective_prio(struct task_struct *p, int prio)
-+{
-+	struct task_struct *pi_task = rt_mutex_get_top_task(p);
-+
-+	return __rt_effective_prio(pi_task, prio);
-+}
-+
-+/*
-+ * rt_mutex_setprio - set the current priority of a task
-+ * @p: task to boost
-+ * @pi_task: donor task
-+ *
-+ * This function changes the 'effective' priority of a task. It does
-+ * not touch ->normal_prio like __setscheduler().
-+ *
-+ * Used by the rt_mutex code to implement priority inheritance
-+ * logic. Call site only calls if the priority of the task changed.
-+ */
-+void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
-+{
-+	int prio, oldprio;
-+	struct rq *rq;
-+
-+	/* XXX used to be waiter->prio, not waiter->task->prio */
-+	prio = __rt_effective_prio(pi_task, p->normal_prio);
-+
-+	/*
-+	 * If nothing changed; bail early.
-+	 */
-+	if (p->pi_top_task == pi_task && prio == p->prio)
-+		return;
-+
-+	rq = __task_rq_lock(p, NULL);
-+	update_rq_clock(rq);
-+	/*
-+	 * Set under pi_lock && rq->lock, such that the value can be used under
-+	 * either lock.
-+	 *
-+	 * Note that there is loads of tricky to make this pointer cache work
-+	 * right. rt_mutex_slowunlock()+rt_mutex_postunlock() work together to
-+	 * ensure a task is de-boosted (pi_task is set to NULL) before the
-+	 * task is allowed to run again (and can exit). This ensures the pointer
-+	 * points to a blocked task -- which guaratees the task is present.
-+	 */
-+	p->pi_top_task = pi_task;
-+
-+	/*
-+	 * For FIFO/RR we only need to set prio, if that matches we're done.
-+	 */
-+	if (prio == p->prio)
-+		goto out_unlock;
-+
-+	/*
-+	 * Idle task boosting is a nono in general. There is one
-+	 * exception, when PREEMPT_RT and NOHZ is active:
-+	 *
-+	 * The idle task calls get_next_timer_interrupt() and holds
-+	 * the timer wheel base->lock on the CPU and another CPU wants
-+	 * to access the timer (probably to cancel it). We can safely
-+	 * ignore the boosting request, as the idle CPU runs this code
-+	 * with interrupts disabled and will complete the lock
-+	 * protected section without being interrupted. So there is no
-+	 * real need to boost.
-+	 */
-+	if (unlikely(p == rq->idle)) {
-+		WARN_ON(p != rq->curr);
-+		WARN_ON(p->pi_blocked_on);
-+		goto out_unlock;
-+	}
-+
-+	trace_sched_pi_setprio(p, pi_task);
-+	oldprio = p->prio;
-+	p->prio = prio;
-+	if (task_running(rq, p)){
-+		if (prio > oldprio)
-+			resched_task(p);
-+	} else if (task_queued(p)) {
-+		dequeue_task(rq, p, DEQUEUE_SAVE);
-+		enqueue_task(rq, p, ENQUEUE_RESTORE);
-+		if (prio < oldprio)
-+			try_preempt(p, rq);
-+	}
-+out_unlock:
-+	__task_rq_unlock(rq, NULL);
-+}
-+#else
-+static inline int rt_effective_prio(struct task_struct *p, int prio)
-+{
-+	return prio;
-+}
-+#endif
-+
-+/*
-+ * Adjust the deadline for when the priority is to change, before it's
-+ * changed.
-+ */
-+static inline void adjust_deadline(struct task_struct *p, int new_prio)
-+{
-+	p->deadline += static_deadline_diff(new_prio) - task_deadline_diff(p);
-+}
-+
-+void set_user_nice(struct task_struct *p, long nice)
-+{
-+	int new_static, old_static;
-+	struct rq_flags rf;
-+	struct rq *rq;
-+
-+	if (task_nice(p) == nice || nice < MIN_NICE || nice > MAX_NICE)
-+		return;
-+	new_static = NICE_TO_PRIO(nice);
-+	/*
-+	 * We have to be careful, if called from sys_setpriority(),
-+	 * the task might be in the middle of scheduling on another CPU.
-+	 */
-+	rq = task_rq_lock(p, &rf);
-+	update_rq_clock(rq);
-+
-+	/*
-+	 * The RT priorities are set via sched_setscheduler(), but we still
-+	 * allow the 'normal' nice value to be set - but as expected
-+	 * it wont have any effect on scheduling until the task is
-+	 * not SCHED_NORMAL/SCHED_BATCH:
-+	 */
-+	if (has_rt_policy(p)) {
-+		p->static_prio = new_static;
-+		goto out_unlock;
-+	}
-+
-+	adjust_deadline(p, new_static);
-+	old_static = p->static_prio;
-+	p->static_prio = new_static;
-+	p->prio = effective_prio(p);
-+
-+	if (task_queued(p)) {
-+		dequeue_task(rq, p, DEQUEUE_SAVE);
-+		enqueue_task(rq, p, ENQUEUE_RESTORE);
-+		if (new_static < old_static)
-+			try_preempt(p, rq);
-+	} else if (task_running(rq, p)) {
-+		set_rq_task(rq, p);
-+		if (old_static < new_static)
-+			resched_task(p);
-+	}
-+out_unlock:
-+	task_rq_unlock(rq, p, &rf);
-+}
-+EXPORT_SYMBOL(set_user_nice);
-+
-+/*
-+ * can_nice - check if a task can reduce its nice value
-+ * @p: task
-+ * @nice: nice value
-+ */
-+int can_nice(const struct task_struct *p, const int nice)
-+{
-+	/* Convert nice value [19,-20] to rlimit style value [1,40] */
-+	int nice_rlim = nice_to_rlimit(nice);
-+
-+	return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) ||
-+		capable(CAP_SYS_NICE));
-+}
-+
-+#ifdef __ARCH_WANT_SYS_NICE
-+
-+/*
-+ * sys_nice - change the priority of the current process.
-+ * @increment: priority increment
-+ *
-+ * sys_setpriority is a more generic, but much slower function that
-+ * does similar things.
-+ */
-+SYSCALL_DEFINE1(nice, int, increment)
-+{
-+	long nice, retval;
-+
-+	/*
-+	 * Setpriority might change our priority at the same moment.
-+	 * We don't have to worry. Conceptually one call occurs first
-+	 * and we have a single winner.
-+	 */
-+
-+	increment = clamp(increment, -NICE_WIDTH, NICE_WIDTH);
-+	nice = task_nice(current) + increment;
-+
-+	nice = clamp_val(nice, MIN_NICE, MAX_NICE);
-+	if (increment < 0 && !can_nice(current, nice))
-+		return -EPERM;
-+
-+	retval = security_task_setnice(current, nice);
-+	if (retval)
-+		return retval;
-+
-+	set_user_nice(current, nice);
-+	return 0;
-+}
-+
-+#endif
-+
-+/**
-+ * task_prio - return the priority value of a given task.
-+ * @p: the task in question.
-+ *
-+ * Return: The priority value as seen by users in /proc.
-+ * RT tasks are offset by -100. Normal tasks are centered around 1, value goes
-+ * from 0 (SCHED_ISO) up to 82 (nice +19 SCHED_IDLEPRIO).
-+ */
-+int task_prio(const struct task_struct *p)
-+{
-+	int delta, prio = p->prio - MAX_RT_PRIO;
-+
-+	/* rt tasks and iso tasks */
-+	if (prio <= 0)
-+		goto out;
-+
-+	/* Convert to ms to avoid overflows */
-+	delta = NS_TO_MS(p->deadline - task_rq(p)->niffies);
-+	if (unlikely(delta < 0))
-+		delta = 0;
-+	delta = delta * 40 / ms_longest_deadline_diff();
-+	if (delta <= 80)
-+		prio += delta;
-+	if (idleprio_task(p))
-+		prio += 40;
-+out:
-+	return prio;
-+}
-+
-+/**
-+ * idle_cpu - is a given CPU idle currently?
-+ * @cpu: the processor in question.
-+ *
-+ * Return: 1 if the CPU is currently idle. 0 otherwise.
-+ */
-+int idle_cpu(int cpu)
-+{
-+	return cpu_curr(cpu) == cpu_rq(cpu)->idle;
-+}
-+
-+/**
-+ * available_idle_cpu - is a given CPU idle for enqueuing work.
-+ * @cpu: the CPU in question.
-+ *
-+ * Return: 1 if the CPU is currently idle. 0 otherwise.
-+ */
-+int available_idle_cpu(int cpu)
-+{
-+	if (!idle_cpu(cpu))
-+		return 0;
-+
-+	if (vcpu_is_preempted(cpu))
-+		return 0;
-+
-+	return 1;
-+}
-+
-+/**
-+ * idle_task - return the idle task for a given CPU.
-+ * @cpu: the processor in question.
-+ *
-+ * Return: The idle task for the CPU @cpu.
-+ */
-+struct task_struct *idle_task(int cpu)
-+{
-+	return cpu_rq(cpu)->idle;
-+}
-+
-+/**
-+ * find_process_by_pid - find a process with a matching PID value.
-+ * @pid: the pid in question.
-+ *
-+ * The task of @pid, if found. %NULL otherwise.
-+ */
-+static inline struct task_struct *find_process_by_pid(pid_t pid)
-+{
-+	return pid ? find_task_by_vpid(pid) : current;
-+}
-+
-+/* Actually do priority change: must hold rq lock. */
-+static void __setscheduler(struct task_struct *p, struct rq *rq, int policy,
-+			   int prio, const struct sched_attr *attr,
-+			   bool keep_boost)
-+{
-+	int oldrtprio, oldprio;
-+
-+	/*
-+	 * If params can't change scheduling class changes aren't allowed
-+	 * either.
-+	 */
-+	if (attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)
-+		return;
-+
-+	p->policy = policy;
-+	oldrtprio = p->rt_priority;
-+	p->rt_priority = prio;
-+	p->normal_prio = normal_prio(p);
-+	oldprio = p->prio;
-+	/*
-+	 * Keep a potential priority boosting if called from
-+	 * sched_setscheduler().
-+	 */
-+	p->prio = normal_prio(p);
-+	if (keep_boost)
-+		p->prio = rt_effective_prio(p, p->prio);
-+
-+	if (task_running(rq, p)) {
-+		set_rq_task(rq, p);
-+		resched_task(p);
-+	} else if (task_queued(p)) {
-+		dequeue_task(rq, p, DEQUEUE_SAVE);
-+		enqueue_task(rq, p, ENQUEUE_RESTORE);
-+		if (p->prio < oldprio || p->rt_priority > oldrtprio)
-+			try_preempt(p, rq);
-+	}
-+}
-+
-+/*
-+ * Check the target process has a UID that matches the current process's
-+ */
-+static bool check_same_owner(struct task_struct *p)
-+{
-+	const struct cred *cred = current_cred(), *pcred;
-+	bool match;
-+
-+	rcu_read_lock();
-+	pcred = __task_cred(p);
-+	match = (uid_eq(cred->euid, pcred->euid) ||
-+		 uid_eq(cred->euid, pcred->uid));
-+	rcu_read_unlock();
-+	return match;
-+}
-+
-+static int __sched_setscheduler(struct task_struct *p,
-+				const struct sched_attr *attr,
-+				bool user, bool pi)
-+{
-+	int retval, policy = attr->sched_policy, oldpolicy = -1, priority = attr->sched_priority;
-+	unsigned long rlim_rtprio = 0;
-+	struct rq_flags rf;
-+	int reset_on_fork;
-+	struct rq *rq;
-+
-+	/* The pi code expects interrupts enabled */
-+	BUG_ON(pi && in_interrupt());
-+
-+	if (is_rt_policy(policy) && !capable(CAP_SYS_NICE)) {
-+		unsigned long lflags;
-+
-+		if (!lock_task_sighand(p, &lflags))
-+			return -ESRCH;
-+		rlim_rtprio = task_rlimit(p, RLIMIT_RTPRIO);
-+		unlock_task_sighand(p, &lflags);
-+		if (rlim_rtprio)
-+			goto recheck;
-+		/*
-+		 * If the caller requested an RT policy without having the
-+		 * necessary rights, we downgrade the policy to SCHED_ISO.
-+		 * We also set the parameter to zero to pass the checks.
-+		 */
-+		policy = SCHED_ISO;
-+		priority = 0;
-+	}
-+recheck:
-+	/* Double check policy once rq lock held */
-+	if (policy < 0) {
-+		reset_on_fork = p->sched_reset_on_fork;
-+		policy = oldpolicy = p->policy;
-+	} else {
-+		reset_on_fork = !!(policy & SCHED_RESET_ON_FORK);
-+		policy &= ~SCHED_RESET_ON_FORK;
-+
-+		if (!SCHED_RANGE(policy))
-+			return -EINVAL;
-+	}
-+
-+	if (attr->sched_flags & ~(SCHED_FLAG_ALL | SCHED_FLAG_SUGOV))
-+		return -EINVAL;
-+
-+	/*
-+	 * Valid priorities for SCHED_FIFO and SCHED_RR are
-+	 * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL and
-+	 * SCHED_BATCH is 0.
-+	 */
-+	if (priority < 0 ||
-+	    (p->mm && priority > MAX_USER_RT_PRIO - 1) ||
-+	    (!p->mm && priority > MAX_RT_PRIO - 1))
-+		return -EINVAL;
-+	if (is_rt_policy(policy) != (priority != 0))
-+		return -EINVAL;
-+
-+	/*
-+	 * Allow unprivileged RT tasks to decrease priority:
-+	 */
-+	if (user && !capable(CAP_SYS_NICE)) {
-+		if (is_rt_policy(policy)) {
-+			unsigned long rlim_rtprio =
-+					task_rlimit(p, RLIMIT_RTPRIO);
-+
-+			/* Can't set/change the rt policy */
-+			if (policy != p->policy && !rlim_rtprio)
-+				return -EPERM;
-+
-+			/* Can't increase priority */
-+			if (priority > p->rt_priority &&
-+			    priority > rlim_rtprio)
-+				return -EPERM;
-+		} else {
-+			switch (p->policy) {
-+				/*
-+				 * Can only downgrade policies but not back to
-+				 * SCHED_NORMAL
-+				 */
-+				case SCHED_ISO:
-+					if (policy == SCHED_ISO)
-+						goto out;
-+					if (policy != SCHED_NORMAL)
-+						return -EPERM;
-+					break;
-+				case SCHED_BATCH:
-+					if (policy == SCHED_BATCH)
-+						goto out;
-+					if (policy != SCHED_IDLEPRIO)
-+						return -EPERM;
-+					break;
-+				case SCHED_IDLEPRIO:
-+					if (policy == SCHED_IDLEPRIO)
-+						goto out;
-+					return -EPERM;
-+				default:
-+					break;
-+			}
-+		}
-+
-+		/* Can't change other user's priorities */
-+		if (!check_same_owner(p))
-+			return -EPERM;
-+
-+		/* Normal users shall not reset the sched_reset_on_fork flag: */
-+		if (p->sched_reset_on_fork && !reset_on_fork)
-+			return -EPERM;
-+	}
-+
-+	if (user) {
-+		retval = security_task_setscheduler(p);
-+		if (retval)
-+			return retval;
-+	}
-+
-+	if (pi)
-+		cpuset_read_lock();
-+
-+	/*
-+	 * Make sure no PI-waiters arrive (or leave) while we are
-+	 * changing the priority of the task:
-+	 *
-+	 * To be able to change p->policy safely, the runqueue lock must be
-+	 * held.
-+	 */
-+	rq = task_rq_lock(p, &rf);
-+	update_rq_clock(rq);
-+
-+	/*
-+	 * Changing the policy of the stop threads its a very bad idea:
-+	 */
-+	if (p == rq->stop) {
-+		retval = -EINVAL;
-+		goto unlock;
-+	}
-+
-+	/*
-+	 * If not changing anything there's no need to proceed further:
-+	 */
-+	if (unlikely(policy == p->policy && (!is_rt_policy(policy) ||
-+	    priority == p->rt_priority))) {
-+		retval = 0;
-+		goto unlock;
-+	}
-+
-+	/* Re-check policy now with rq lock held */
-+	if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
-+		policy = oldpolicy = -1;
-+		task_rq_unlock(rq, p, &rf);
-+		if (pi)
-+			cpuset_read_unlock();
-+		goto recheck;
-+	}
-+	p->sched_reset_on_fork = reset_on_fork;
-+
-+	__setscheduler(p, rq, policy, priority, attr, pi);
-+
-+	/* Avoid rq from going away on us: */
-+	preempt_disable();
-+	task_rq_unlock(rq, p, &rf);
-+
-+	if (pi) {
-+		cpuset_read_unlock();
-+		rt_mutex_adjust_pi(p);
-+	}
-+	preempt_enable();
-+out:
-+	return 0;
-+
-+unlock:
-+	task_rq_unlock(rq, p, &rf);
-+	if (pi)
-+		cpuset_read_unlock();
-+	return retval;
-+}
-+
-+static int _sched_setscheduler(struct task_struct *p, int policy,
-+			       const struct sched_param *param, bool check)
-+{
-+	struct sched_attr attr = {
-+		.sched_policy   = policy,
-+		.sched_priority = param->sched_priority,
-+		.sched_nice	= PRIO_TO_NICE(p->static_prio),
-+	};
-+
-+	return __sched_setscheduler(p, &attr, check, true);
-+}
-+/**
-+ * sched_setscheduler - change the scheduling policy and/or RT priority of a thread.
-+ * @p: the task in question.
-+ * @policy: new policy.
-+ * @param: structure containing the new RT priority.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ *
-+ * NOTE that the task may be already dead.
-+ */
-+int sched_setscheduler(struct task_struct *p, int policy,
-+		       const struct sched_param *param)
-+{
-+	return _sched_setscheduler(p, policy, param, true);
-+}
-+
-+EXPORT_SYMBOL_GPL(sched_setscheduler);
-+
-+int sched_setattr(struct task_struct *p, const struct sched_attr *attr)
-+{
-+	return __sched_setscheduler(p, attr, true, true);
-+}
-+EXPORT_SYMBOL_GPL(sched_setattr);
-+
-+int sched_setattr_nocheck(struct task_struct *p, const struct sched_attr *attr)
-+{
-+	return __sched_setscheduler(p, attr, false, true);
-+}
-+
-+/**
-+ * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace.
-+ * @p: the task in question.
-+ * @policy: new policy.
-+ * @param: structure containing the new RT priority.
-+ *
-+ * Just like sched_setscheduler, only don't bother checking if the
-+ * current context has permission.  For example, this is needed in
-+ * stop_machine(): we create temporary high priority worker threads,
-+ * but our caller might not have that capability.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+int sched_setscheduler_nocheck(struct task_struct *p, int policy,
-+			       const struct sched_param *param)
-+{
-+	return _sched_setscheduler(p, policy, param, false);
-+}
-+EXPORT_SYMBOL_GPL(sched_setscheduler_nocheck);
-+
-+static int
-+do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
-+{
-+	struct sched_param lparam;
-+	struct task_struct *p;
-+	int retval;
-+
-+	if (!param || pid < 0)
-+		return -EINVAL;
-+	if (copy_from_user(&lparam, param, sizeof(struct sched_param)))
-+		return -EFAULT;
-+
-+	rcu_read_lock();
-+	retval = -ESRCH;
-+	p = find_process_by_pid(pid);
-+	if (likely(p))
-+		get_task_struct(p);
-+	rcu_read_unlock();
-+
-+	if (likely(p)) {
-+		retval = sched_setscheduler(p, policy, &lparam);
-+		put_task_struct(p);
-+	}
-+
-+	return retval;
-+}
-+
-+/*
-+ * Mimics kernel/events/core.c perf_copy_attr().
-+ */
-+static int sched_copy_attr(struct sched_attr __user *uattr,
-+			   struct sched_attr *attr)
-+{
-+	u32 size;
-+	int ret;
-+
-+	/* Zero the full structure, so that a short copy will be nice: */
-+	memset(attr, 0, sizeof(*attr));
-+
-+	ret = get_user(size, &uattr->size);
-+	if (ret)
-+		return ret;
-+
-+	/* ABI compatibility quirk: */
-+	if (!size)
-+		size = SCHED_ATTR_SIZE_VER0;
-+
-+	if (size < SCHED_ATTR_SIZE_VER0 || size > PAGE_SIZE)
-+		goto err_size;
-+
-+	ret = copy_struct_from_user(attr, sizeof(*attr), uattr, size);
-+	if (ret) {
-+		if (ret == -E2BIG)
-+			goto err_size;
-+		return ret;
-+	}
-+
-+	/*
-+	 * XXX: Do we want to be lenient like existing syscalls; or do we want
-+	 * to be strict and return an error on out-of-bounds values?
-+	 */
-+	attr->sched_nice = clamp(attr->sched_nice, -20, 19);
-+
-+	/* sched/core.c uses zero here but we already know ret is zero */
-+	return 0;
-+
-+err_size:
-+	put_user(sizeof(*attr), &uattr->size);
-+	return -E2BIG;
-+}
-+
-+/*
-+ * sched_setparam() passes in -1 for its policy, to let the functions
-+ * it calls know not to change it.
-+ */
-+#define SETPARAM_POLICY	-1
-+
-+/**
-+ * sys_sched_setscheduler - set/change the scheduler policy and RT priority
-+ * @pid: the pid in question.
-+ * @policy: new policy.
-+ * @param: structure containing the new RT priority.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy, struct sched_param __user *, param)
-+{
-+	if (policy < 0)
-+		return -EINVAL;
-+
-+	return do_sched_setscheduler(pid, policy, param);
-+}
-+
-+/**
-+ * sys_sched_setparam - set/change the RT priority of a thread
-+ * @pid: the pid in question.
-+ * @param: structure containing the new RT priority.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
-+{
-+	return do_sched_setscheduler(pid, SETPARAM_POLICY, param);
-+}
-+
-+/**
-+ * sys_sched_setattr - same as above, but with extended sched_attr
-+ * @pid: the pid in question.
-+ * @uattr: structure containing the extended parameters.
-+ */
-+SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr,
-+			       unsigned int, flags)
-+{
-+	struct sched_attr attr;
-+	struct task_struct *p;
-+	int retval;
-+
-+	if (!uattr || pid < 0 || flags)
-+		return -EINVAL;
-+
-+	retval = sched_copy_attr(uattr, &attr);
-+	if (retval)
-+		return retval;
-+
-+	if ((int)attr.sched_policy < 0)
-+		return -EINVAL;
-+	if (attr.sched_flags & SCHED_FLAG_KEEP_POLICY)
-+		attr.sched_policy = SETPARAM_POLICY;
-+
-+	rcu_read_lock();
-+	retval = -ESRCH;
-+	p = find_process_by_pid(pid);
-+	if (likely(p))
-+		get_task_struct(p);
-+	rcu_read_unlock();
-+
-+	if (likely(p)) {
-+		retval = sched_setattr(p, &attr);
-+		put_task_struct(p);
-+	}
-+
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_getscheduler - get the policy (scheduling class) of a thread
-+ * @pid: the pid in question.
-+ *
-+ * Return: On success, the policy of the thread. Otherwise, a negative error
-+ * code.
-+ */
-+SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
-+{
-+	struct task_struct *p;
-+	int retval = -EINVAL;
-+
-+	if (pid < 0)
-+		goto out_nounlock;
-+
-+	retval = -ESRCH;
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	if (p) {
-+		retval = security_task_getscheduler(p);
-+		if (!retval)
-+			retval = p->policy;
-+	}
-+	rcu_read_unlock();
-+
-+out_nounlock:
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_getscheduler - get the RT priority of a thread
-+ * @pid: the pid in question.
-+ * @param: structure containing the RT priority.
-+ *
-+ * Return: On success, 0 and the RT priority is in @param. Otherwise, an error
-+ * code.
-+ */
-+SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
-+{
-+	struct sched_param lp = { .sched_priority = 0 };
-+	struct task_struct *p;
-+	int retval = -EINVAL;
-+
-+	if (!param || pid < 0)
-+		goto out_nounlock;
-+
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	retval = -ESRCH;
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	if (has_rt_policy(p))
-+		lp.sched_priority = p->rt_priority;
-+	rcu_read_unlock();
-+
-+	/*
-+	 * This one might sleep, we cannot do it with a spinlock held ...
-+	 */
-+	retval = copy_to_user(param, &lp, sizeof(*param)) ? -EFAULT : 0;
-+
-+out_nounlock:
-+	return retval;
-+
-+out_unlock:
-+	rcu_read_unlock();
-+	return retval;
-+}
-+
-+/*
-+ * Copy the kernel size attribute structure (which might be larger
-+ * than what user-space knows about) to user-space.
-+ *
-+ * Note that all cases are valid: user-space buffer can be larger or
-+ * smaller than the kernel-space buffer. The usual case is that both
-+ * have the same size.
-+ */
-+static int
-+sched_attr_copy_to_user(struct sched_attr __user *uattr,
-+			struct sched_attr *kattr,
-+			unsigned int usize)
-+{
-+	unsigned int ksize = sizeof(*kattr);
-+
-+	if (!access_ok(uattr, usize))
-+		return -EFAULT;
-+
-+	/*
-+	 * sched_getattr() ABI forwards and backwards compatibility:
-+	 *
-+	 * If usize == ksize then we just copy everything to user-space and all is good.
-+	 *
-+	 * If usize < ksize then we only copy as much as user-space has space for,
-+	 * this keeps ABI compatibility as well. We skip the rest.
-+	 *
-+	 * If usize > ksize then user-space is using a newer version of the ABI,
-+	 * which part the kernel doesn't know about. Just ignore it - tooling can
-+	 * detect the kernel's knowledge of attributes from the attr->size value
-+	 * which is set to ksize in this case.
-+	 */
-+	kattr->size = min(usize, ksize);
-+
-+	if (copy_to_user(uattr, kattr, kattr->size))
-+		return -EFAULT;
-+
-+	return 0;
-+}
-+
-+/**
-+ * sys_sched_getattr - similar to sched_getparam, but with sched_attr
-+ * @pid: the pid in question.
-+ * @uattr: structure containing the extended parameters.
-+ * @usize: sizeof(attr) for fwd/bwd comp.
-+ * @flags: for future extension.
-+ */
-+SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
-+		unsigned int, usize, unsigned int, flags)
-+{
-+	struct sched_attr kattr = { };
-+	struct task_struct *p;
-+	int retval;
-+
-+	if (!uattr || pid < 0 || usize > PAGE_SIZE ||
-+	    usize < SCHED_ATTR_SIZE_VER0 || flags)
-+		return -EINVAL;
-+
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	retval = -ESRCH;
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	kattr.sched_policy = p->policy;
-+	if (rt_task(p))
-+		kattr.sched_priority = p->rt_priority;
-+	else
-+		kattr.sched_nice = task_nice(p);
-+
-+	rcu_read_unlock();
-+
-+	return sched_attr_copy_to_user(uattr, &kattr, usize);
-+
-+out_unlock:
-+	rcu_read_unlock();
-+	return retval;
-+}
-+
-+long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
-+{
-+	cpumask_var_t cpus_allowed, new_mask;
-+	struct task_struct *p;
-+	int retval;
-+
-+	rcu_read_lock();
-+
-+	p = find_process_by_pid(pid);
-+	if (!p) {
-+		rcu_read_unlock();
-+		return -ESRCH;
-+	}
-+
-+	/* Prevent p going away */
-+	get_task_struct(p);
-+	rcu_read_unlock();
-+
-+	if (p->flags & PF_NO_SETAFFINITY) {
-+		retval = -EINVAL;
-+		goto out_put_task;
-+	}
-+	if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
-+		retval = -ENOMEM;
-+		goto out_put_task;
-+	}
-+	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) {
-+		retval = -ENOMEM;
-+		goto out_free_cpus_allowed;
-+	}
-+	retval = -EPERM;
-+	if (!check_same_owner(p)) {
-+		rcu_read_lock();
-+		if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
-+			rcu_read_unlock();
-+			goto out_unlock;
-+		}
-+		rcu_read_unlock();
-+	}
-+
-+	retval = security_task_setscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	cpuset_cpus_allowed(p, cpus_allowed);
-+	cpumask_and(new_mask, in_mask, cpus_allowed);
-+again:
-+	retval = __set_cpus_allowed_ptr(p, new_mask, true);
-+
-+	if (!retval) {
-+		cpuset_cpus_allowed(p, cpus_allowed);
-+		if (!cpumask_subset(new_mask, cpus_allowed)) {
-+			/*
-+			 * We must have raced with a concurrent cpuset
-+			 * update. Just reset the cpus_allowed to the
-+			 * cpuset's cpus_allowed
-+			 */
-+			cpumask_copy(new_mask, cpus_allowed);
-+			goto again;
-+		}
-+	}
-+out_unlock:
-+	free_cpumask_var(new_mask);
-+out_free_cpus_allowed:
-+	free_cpumask_var(cpus_allowed);
-+out_put_task:
-+	put_task_struct(p);
-+	return retval;
-+}
-+
-+static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
-+			     cpumask_t *new_mask)
-+{
-+	if (len < cpumask_size())
-+		cpumask_clear(new_mask);
-+	else if (len > cpumask_size())
-+		len = cpumask_size();
-+
-+	return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0;
-+}
-+
-+
-+/**
-+ * sys_sched_setaffinity - set the CPU affinity of a process
-+ * @pid: pid of the process
-+ * @len: length in bytes of the bitmask pointed to by user_mask_ptr
-+ * @user_mask_ptr: user-space pointer to the new CPU mask
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
-+		unsigned long __user *, user_mask_ptr)
-+{
-+	cpumask_var_t new_mask;
-+	int retval;
-+
-+	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
-+		return -ENOMEM;
-+
-+	retval = get_user_cpu_mask(user_mask_ptr, len, new_mask);
-+	if (retval == 0)
-+		retval = sched_setaffinity(pid, new_mask);
-+	free_cpumask_var(new_mask);
-+	return retval;
-+}
-+
-+long sched_getaffinity(pid_t pid, cpumask_t *mask)
-+{
-+	struct task_struct *p;
-+	unsigned long flags;
-+	int retval;
-+
-+	get_online_cpus();
-+	rcu_read_lock();
-+
-+	retval = -ESRCH;
-+	p = find_process_by_pid(pid);
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	cpumask_and(mask, &p->cpus_mask, cpu_active_mask);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+
-+out_unlock:
-+	rcu_read_unlock();
-+	put_online_cpus();
-+
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_getaffinity - get the CPU affinity of a process
-+ * @pid: pid of the process
-+ * @len: length in bytes of the bitmask pointed to by user_mask_ptr
-+ * @user_mask_ptr: user-space pointer to hold the current CPU mask
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
-+		unsigned long __user *, user_mask_ptr)
-+{
-+	int ret;
-+	cpumask_var_t mask;
-+
-+	if ((len * BITS_PER_BYTE) < nr_cpu_ids)
-+		return -EINVAL;
-+	if (len & (sizeof(unsigned long)-1))
-+		return -EINVAL;
-+
-+	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
-+		return -ENOMEM;
-+
-+	ret = sched_getaffinity(pid, mask);
-+	if (ret == 0) {
-+		unsigned int retlen = min(len, cpumask_size());
-+
-+		if (copy_to_user(user_mask_ptr, mask, retlen))
-+			ret = -EFAULT;
-+		else
-+			ret = retlen;
-+	}
-+	free_cpumask_var(mask);
-+
-+	return ret;
-+}
-+
-+/**
-+ * sys_sched_yield - yield the current processor to other threads.
-+ *
-+ * This function yields the current CPU to other tasks. It does this by
-+ * scheduling away the current task. If it still has the earliest deadline
-+ * it will be scheduled again as the next task.
-+ *
-+ * Return: 0.
-+ */
-+static void do_sched_yield(void)
-+{
-+	struct rq *rq;
-+
-+	if (!sched_yield_type)
-+		return;
-+
-+	local_irq_disable();
-+	rq = this_rq();
-+	rq_lock(rq);
-+
-+	if (sched_yield_type > 1)
-+		time_slice_expired(current, rq);
-+	schedstat_inc(rq->yld_count);
-+
-+	/*
-+	 * Since we are going to call schedule() anyway, there's
-+	 * no need to preempt or enable interrupts:
-+	 */
-+	preempt_disable();
-+	rq_unlock(rq);
-+	sched_preempt_enable_no_resched();
-+
-+	schedule();
-+}
-+
-+SYSCALL_DEFINE0(sched_yield)
-+{
-+	do_sched_yield();
-+	return 0;
-+}
-+
-+#ifndef CONFIG_PREEMPTION
-+int __sched _cond_resched(void)
-+{
-+	if (should_resched(0)) {
-+		preempt_schedule_common();
-+		return 1;
-+	}
-+	rcu_all_qs();
-+	return 0;
-+}
-+EXPORT_SYMBOL(_cond_resched);
-+#endif
-+
-+/*
-+ * __cond_resched_lock() - if a reschedule is pending, drop the given lock,
-+ * call schedule, and on return reacquire the lock.
-+ *
-+ * This works OK both with and without CONFIG_PREEMPTION.  We do strange low-level
-+ * operations here to prevent schedule() from being called twice (once via
-+ * spin_unlock(), once by hand).
-+ */
-+int __cond_resched_lock(spinlock_t *lock)
-+{
-+	int resched = should_resched(PREEMPT_LOCK_OFFSET);
-+	int ret = 0;
-+
-+	lockdep_assert_held(lock);
-+
-+	if (spin_needbreak(lock) || resched) {
-+		spin_unlock(lock);
-+		if (resched)
-+			preempt_schedule_common();
-+		else
-+			cpu_relax();
-+		ret = 1;
-+		spin_lock(lock);
-+	}
-+	return ret;
-+}
-+EXPORT_SYMBOL(__cond_resched_lock);
-+
-+/**
-+ * yield - yield the current processor to other threads.
-+ *
-+ * Do not ever use this function, there's a 99% chance you're doing it wrong.
-+ *
-+ * The scheduler is at all times free to pick the calling task as the most
-+ * eligible task to run, if removing the yield() call from your code breaks
-+ * it, its already broken.
-+ *
-+ * Typical broken usage is:
-+ *
-+ * while (!event)
-+ *	yield();
-+ *
-+ * where one assumes that yield() will let 'the other' process run that will
-+ * make event true. If the current task is a SCHED_FIFO task that will never
-+ * happen. Never use yield() as a progress guarantee!!
-+ *
-+ * If you want to use yield() to wait for something, use wait_event().
-+ * If you want to use yield() to be 'nice' for others, use cond_resched().
-+ * If you still want to use yield(), do not!
-+ */
-+void __sched yield(void)
-+{
-+	set_current_state(TASK_RUNNING);
-+	do_sched_yield();
-+}
-+EXPORT_SYMBOL(yield);
-+
-+/**
-+ * yield_to - yield the current processor to another thread in
-+ * your thread group, or accelerate that thread toward the
-+ * processor it's on.
-+ * @p: target task
-+ * @preempt: whether task preemption is allowed or not
-+ *
-+ * It's the caller's job to ensure that the target task struct
-+ * can't go away on us before we can do any checks.
-+ *
-+ * Return:
-+ *	true (>0) if we indeed boosted the target task.
-+ *	false (0) if we failed to boost the target.
-+ *	-ESRCH if there's no task to yield to.
-+ */
-+int __sched yield_to(struct task_struct *p, bool preempt)
-+{
-+	struct task_struct *rq_p;
-+	struct rq *rq, *p_rq;
-+	unsigned long flags;
-+	int yielded = 0;
-+
-+	local_irq_save(flags);
-+	rq = this_rq();
-+
-+again:
-+	p_rq = task_rq(p);
-+	/*
-+	 * If we're the only runnable task on the rq and target rq also
-+	 * has only one task, there's absolutely no point in yielding.
-+	 */
-+	if (task_running(p_rq, p) || p->state) {
-+		yielded = -ESRCH;
-+		goto out_irq;
-+	}
-+
-+	double_rq_lock(rq, p_rq);
-+	if (unlikely(task_rq(p) != p_rq)) {
-+		double_rq_unlock(rq, p_rq);
-+		goto again;
-+	}
-+
-+	yielded = 1;
-+	schedstat_inc(rq->yld_count);
-+	rq_p = rq->curr;
-+	if (p->deadline > rq_p->deadline)
-+		p->deadline = rq_p->deadline;
-+	p->time_slice += rq_p->time_slice;
-+	if (p->time_slice > timeslice())
-+		p->time_slice = timeslice();
-+	time_slice_expired(rq_p, rq);
-+	if (preempt && rq != p_rq)
-+		resched_task(p_rq->curr);
-+	double_rq_unlock(rq, p_rq);
-+out_irq:
-+	local_irq_restore(flags);
-+
-+	if (yielded > 0)
-+		schedule();
-+	return yielded;
-+}
-+EXPORT_SYMBOL_GPL(yield_to);
-+
-+int io_schedule_prepare(void)
-+{
-+	int old_iowait = current->in_iowait;
-+
-+	current->in_iowait = 1;
-+	blk_schedule_flush_plug(current);
-+
-+	return old_iowait;
-+}
-+
-+void io_schedule_finish(int token)
-+{
-+	current->in_iowait = token;
-+}
-+
-+/*
-+ * This task is about to go to sleep on IO.  Increment rq->nr_iowait so
-+ * that process accounting knows that this is a task in IO wait state.
-+ *
-+ * But don't do that if it is a deliberate, throttling IO wait (this task
-+ * has set its backing_dev_info: the queue against which it should throttle)
-+ */
-+
-+long __sched io_schedule_timeout(long timeout)
-+{
-+	int token;
-+	long ret;
-+
-+	token = io_schedule_prepare();
-+	ret = schedule_timeout(timeout);
-+	io_schedule_finish(token);
-+
-+	return ret;
-+}
-+EXPORT_SYMBOL(io_schedule_timeout);
-+
-+void __sched io_schedule(void)
-+{
-+	int token;
-+
-+	token = io_schedule_prepare();
-+	schedule();
-+	io_schedule_finish(token);
-+}
-+EXPORT_SYMBOL(io_schedule);
-+
-+/**
-+ * sys_sched_get_priority_max - return maximum RT priority.
-+ * @policy: scheduling class.
-+ *
-+ * Return: On success, this syscall returns the maximum
-+ * rt_priority that can be used by a given scheduling class.
-+ * On failure, a negative error code is returned.
-+ */
-+SYSCALL_DEFINE1(sched_get_priority_max, int, policy)
-+{
-+	int ret = -EINVAL;
-+
-+	switch (policy) {
-+	case SCHED_FIFO:
-+	case SCHED_RR:
-+		ret = MAX_USER_RT_PRIO-1;
-+		break;
-+	case SCHED_NORMAL:
-+	case SCHED_BATCH:
-+	case SCHED_ISO:
-+	case SCHED_IDLEPRIO:
-+		ret = 0;
-+		break;
-+	}
-+	return ret;
-+}
-+
-+/**
-+ * sys_sched_get_priority_min - return minimum RT priority.
-+ * @policy: scheduling class.
-+ *
-+ * Return: On success, this syscall returns the minimum
-+ * rt_priority that can be used by a given scheduling class.
-+ * On failure, a negative error code is returned.
-+ */
-+SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
-+{
-+	int ret = -EINVAL;
-+
-+	switch (policy) {
-+	case SCHED_FIFO:
-+	case SCHED_RR:
-+		ret = 1;
-+		break;
-+	case SCHED_NORMAL:
-+	case SCHED_BATCH:
-+	case SCHED_ISO:
-+	case SCHED_IDLEPRIO:
-+		ret = 0;
-+		break;
-+	}
-+	return ret;
-+}
-+
-+static int sched_rr_get_interval(pid_t pid, struct timespec64 *t)
-+{
-+	struct task_struct *p;
-+	unsigned int time_slice;
-+	struct rq_flags rf;
-+	struct rq *rq;
-+	int retval;
-+
-+	if (pid < 0)
-+		return -EINVAL;
-+
-+	retval = -ESRCH;
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	rq = task_rq_lock(p, &rf);
-+	time_slice = p->policy == SCHED_FIFO ? 0 : MS_TO_NS(task_timeslice(p));
-+	task_rq_unlock(rq, p, &rf);
-+
-+	rcu_read_unlock();
-+	*t = ns_to_timespec64(time_slice);
-+	return 0;
-+
-+out_unlock:
-+	rcu_read_unlock();
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_rr_get_interval - return the default timeslice of a process.
-+ * @pid: pid of the process.
-+ * @interval: userspace pointer to the timeslice value.
-+ *
-+ * this syscall writes the default timeslice value of a given process
-+ * into the user-space timespec buffer. A value of '0' means infinity.
-+ *
-+ * Return: On success, 0 and the timeslice is in @interval. Otherwise,
-+ * an error code.
-+ */
-+SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
-+		struct __kernel_timespec __user *, interval)
-+{
-+	struct timespec64 t;
-+	int retval = sched_rr_get_interval(pid, &t);
-+
-+	if (retval == 0)
-+		retval = put_timespec64(&t, interval);
-+
-+	return retval;
-+}
-+
-+#ifdef CONFIG_COMPAT_32BIT_TIME
-+SYSCALL_DEFINE2(sched_rr_get_interval_time32, pid_t, pid,
-+		struct old_timespec32 __user *, interval)
-+{
-+	struct timespec64 t;
-+	int retval = sched_rr_get_interval(pid, &t);
-+
-+	if (retval == 0)
-+		retval = put_old_timespec32(&t, interval);
-+	return retval;
-+}
-+#endif
-+
-+void sched_show_task(struct task_struct *p)
-+{
-+	unsigned long free = 0;
-+	int ppid;
-+
-+	if (!try_get_task_stack(p))
-+		return;
-+
-+	printk(KERN_INFO "%-15.15s %c", p->comm, task_state_to_char(p));
-+
-+	if (p->state == TASK_RUNNING)
-+		printk(KERN_CONT "  running task    ");
-+#ifdef CONFIG_DEBUG_STACK_USAGE
-+	free = stack_not_used(p);
-+#endif
-+	ppid = 0;
-+	rcu_read_lock();
-+	if (pid_alive(p))
-+		ppid = task_pid_nr(rcu_dereference(p->real_parent));
-+	rcu_read_unlock();
-+	printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free,
-+		task_pid_nr(p), ppid,
-+		(unsigned long)task_thread_info(p)->flags);
-+
-+	print_worker_info(KERN_INFO, p);
-+	show_stack(p, NULL);
-+	put_task_stack(p);
-+}
-+EXPORT_SYMBOL_GPL(sched_show_task);
-+
-+static inline bool
-+state_filter_match(unsigned long state_filter, struct task_struct *p)
-+{
-+	/* no filter, everything matches */
-+	if (!state_filter)
-+		return true;
-+
-+	/* filter, but doesn't match */
-+	if (!(p->state & state_filter))
-+		return false;
-+
-+	/*
-+	 * When looking for TASK_UNINTERRUPTIBLE skip TASK_IDLE (allows
-+	 * TASK_KILLABLE).
-+	 */
-+	if (state_filter == TASK_UNINTERRUPTIBLE && p->state == TASK_IDLE)
-+		return false;
-+
-+	return true;
-+}
-+
-+void show_state_filter(unsigned long state_filter)
-+{
-+	struct task_struct *g, *p;
-+
-+#if BITS_PER_LONG == 32
-+	printk(KERN_INFO
-+		"  task                PC stack   pid father\n");
-+#else
-+	printk(KERN_INFO
-+		"  task                        PC stack   pid father\n");
-+#endif
-+	rcu_read_lock();
-+	for_each_process_thread(g, p) {
-+		/*
-+		 * reset the NMI-timeout, listing all files on a slow
-+		 * console might take a lot of time:
-+		 * Also, reset softlockup watchdogs on all CPUs, because
-+		 * another CPU might be blocked waiting for us to process
-+		 * an IPI.
-+		 */
-+		touch_nmi_watchdog();
-+		touch_all_softlockup_watchdogs();
-+		if (state_filter_match(state_filter, p))
-+			sched_show_task(p);
-+	}
-+
-+	rcu_read_unlock();
-+	/*
-+	 * Only show locks if all tasks are dumped:
-+	 */
-+	if (!state_filter)
-+		debug_show_all_locks();
-+}
-+
-+void dump_cpu_task(int cpu)
-+{
-+	pr_info("Task dump for CPU %d:\n", cpu);
-+	sched_show_task(cpu_curr(cpu));
-+}
-+
-+#ifdef CONFIG_SMP
-+void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	cpumask_copy(&p->cpus_mask, new_mask);
-+	p->nr_cpus_allowed = cpumask_weight(new_mask);
-+}
-+
-+void __do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	struct rq *rq = task_rq(p);
-+
-+	lockdep_assert_held(&p->pi_lock);
-+
-+	cpumask_copy(&p->cpus_mask, new_mask);
-+
-+	if (task_queued(p)) {
-+		/*
-+		 * Because __kthread_bind() calls this on blocked tasks without
-+		 * holding rq->lock.
-+		 */
-+		lockdep_assert_held(rq->lock);
-+	}
-+}
-+
-+/*
-+ * Calling do_set_cpus_allowed from outside the scheduler code should not be
-+ * called on a running or queued task. We should be holding pi_lock.
-+ */
-+void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	__do_set_cpus_allowed(p, new_mask);
-+	if (needs_other_cpu(p, task_cpu(p))) {
-+		struct rq *rq;
-+
-+		rq = __task_rq_lock(p, NULL);
-+		set_task_cpu(p, valid_task_cpu(p));
-+		resched_task(p);
-+		__task_rq_unlock(rq, NULL);
-+	}
-+}
-+#endif
-+
-+/**
-+ * init_idle - set up an idle thread for a given CPU
-+ * @idle: task in question
-+ * @cpu: cpu the idle task belongs to
-+ *
-+ * NOTE: this function does not set the idle thread's NEED_RESCHED
-+ * flag, to make booting more robust.
-+ */
-+void init_idle(struct task_struct *idle, int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	raw_spin_lock_irqsave(&idle->pi_lock, flags);
-+	raw_spin_lock(rq->lock);
-+	idle->last_ran = rq->niffies;
-+	time_slice_expired(idle, rq);
-+	idle->state = TASK_RUNNING;
-+	/* Setting prio to illegal value shouldn't matter when never queued */
-+	idle->prio = PRIO_LIMIT;
-+
-+	kasan_unpoison_task_stack(idle);
-+
-+#ifdef CONFIG_SMP
-+	/*
-+	 * It's possible that init_idle() gets called multiple times on a task,
-+	 * in that case do_set_cpus_allowed() will not do the right thing.
-+	 *
-+	 * And since this is boot we can forgo the serialisation.
-+	 */
-+	set_cpus_allowed_common(idle, cpumask_of(cpu));
-+#ifdef CONFIG_SMT_NICE
-+	idle->smt_bias = 0;
-+#endif
-+#endif
-+	set_rq_task(rq, idle);
-+
-+	/* Silence PROVE_RCU */
-+	rcu_read_lock();
-+	set_task_cpu(idle, cpu);
-+	rcu_read_unlock();
-+
-+	rq->idle = idle;
-+	rcu_assign_pointer(rq->curr, idle);
-+	idle->on_rq = TASK_ON_RQ_QUEUED;
-+	raw_spin_unlock(rq->lock);
-+	raw_spin_unlock_irqrestore(&idle->pi_lock, flags);
-+
-+	/* Set the preempt count _outside_ the spinlocks! */
-+	init_idle_preempt_count(idle, cpu);
-+
-+	ftrace_graph_init_idle_task(idle, cpu);
-+	vtime_init_idle(idle, cpu);
-+#ifdef CONFIG_SMP
-+	sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
-+#endif
-+}
-+
-+int cpuset_cpumask_can_shrink(const struct cpumask __maybe_unused *cur,
-+			      const struct cpumask __maybe_unused *trial)
-+{
-+	return 1;
-+}
-+
-+int task_can_attach(struct task_struct *p,
-+		    const struct cpumask *cs_cpus_allowed)
-+{
-+	int ret = 0;
-+
-+	/*
-+	 * Kthreads which disallow setaffinity shouldn't be moved
-+	 * to a new cpuset; we don't want to change their CPU
-+	 * affinity and isolating such threads by their set of
-+	 * allowed nodes is unnecessary.  Thus, cpusets are not
-+	 * applicable for such threads.  This prevents checking for
-+	 * success of set_cpus_allowed_ptr() on all attached tasks
-+	 * before cpus_mask may be changed.
-+	 */
-+	if (p->flags & PF_NO_SETAFFINITY)
-+		ret = -EINVAL;
-+
-+	return ret;
-+}
-+
-+void resched_cpu(int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	struct rq_flags rf;
-+
-+	rq_lock_irqsave(rq, &rf);
-+	if (cpu_online(cpu) || cpu == smp_processor_id())
-+		resched_curr(rq);
-+	rq_unlock_irqrestore(rq, &rf);
-+}
-+
-+#ifdef CONFIG_SMP
-+#ifdef CONFIG_NO_HZ_COMMON
-+void select_nohz_load_balancer(int stop_tick)
-+{
-+}
-+
-+void set_cpu_sd_state_idle(void) {}
-+void nohz_balance_enter_idle(int cpu) {}
-+
-+/*
-+ * In the semi idle case, use the nearest busy CPU for migrating timers
-+ * from an idle CPU.  This is good for power-savings.
-+ *
-+ * We don't do similar optimization for completely idle system, as
-+ * selecting an idle CPU will add more delays to the timers than intended
-+ * (as that CPU's timer base may not be uptodate wrt jiffies etc).
-+ */
-+int get_nohz_timer_target(void)
-+{
-+	int i, cpu = smp_processor_id();
-+	struct sched_domain *sd;
-+
-+	if (!idle_cpu(cpu) && housekeeping_cpu(cpu, HK_FLAG_TIMER))
-+		return cpu;
-+
-+	rcu_read_lock();
-+	for_each_domain(cpu, sd) {
-+		for_each_cpu(i, sched_domain_span(sd)) {
-+			if (cpu == i)
-+				continue;
-+
-+			if (!idle_cpu(i) && housekeeping_cpu(i, HK_FLAG_TIMER)) {
-+ 				cpu = i;
-+				cpu = i;
-+				goto unlock;
-+			}
-+		}
-+	}
-+
-+	if (!housekeeping_cpu(cpu, HK_FLAG_TIMER))
-+		cpu = housekeeping_any_cpu(HK_FLAG_TIMER);
-+unlock:
-+	rcu_read_unlock();
-+	return cpu;
-+}
-+
-+/*
-+ * When add_timer_on() enqueues a timer into the timer wheel of an
-+ * idle CPU then this timer might expire before the next timer event
-+ * which is scheduled to wake up that CPU. In case of a completely
-+ * idle system the next event might even be infinite time into the
-+ * future. wake_up_idle_cpu() ensures that the CPU is woken up and
-+ * leaves the inner idle loop so the newly added timer is taken into
-+ * account when the CPU goes back to idle and evaluates the timer
-+ * wheel for the next timer event.
-+ */
-+void wake_up_idle_cpu(int cpu)
-+{
-+	if (cpu == smp_processor_id())
-+		return;
-+
-+	if (set_nr_and_not_polling(cpu_rq(cpu)->idle))
-+		smp_sched_reschedule(cpu);
-+	else
-+		trace_sched_wake_idle_without_ipi(cpu);
-+}
-+
-+static bool wake_up_full_nohz_cpu(int cpu)
-+{
-+	/*
-+	 * We just need the target to call irq_exit() and re-evaluate
-+	 * the next tick. The nohz full kick at least implies that.
-+	 * If needed we can still optimize that later with an
-+	 * empty IRQ.
-+	 */
-+	if (cpu_is_offline(cpu))
-+		return true;  /* Don't try to wake offline CPUs. */
-+	if (tick_nohz_full_cpu(cpu)) {
-+		if (cpu != smp_processor_id() ||
-+		    tick_nohz_tick_stopped())
-+			tick_nohz_full_kick_cpu(cpu);
-+		return true;
-+	}
-+
-+	return false;
-+}
-+
-+/*
-+ * Wake up the specified CPU.  If the CPU is going offline, it is the
-+ * caller's responsibility to deal with the lost wakeup, for example,
-+ * by hooking into the CPU_DEAD notifier like timers and hrtimers do.
-+ */
-+void wake_up_nohz_cpu(int cpu)
-+{
-+	if (!wake_up_full_nohz_cpu(cpu))
-+		wake_up_idle_cpu(cpu);
-+}
-+#endif /* CONFIG_NO_HZ_COMMON */
-+
-+/*
-+ * Change a given task's CPU affinity. Migrate the thread to a
-+ * proper CPU and schedule it away if the CPU it's executing on
-+ * is removed from the allowed bitmask.
-+ *
-+ * NOTE: the caller must have a valid reference to the task, the
-+ * task must not exit() & deallocate itself prematurely. The
-+ * call is not atomic; no spinlocks may be held.
-+ */
-+static int __set_cpus_allowed_ptr(struct task_struct *p,
-+				  const struct cpumask *new_mask, bool check)
-+{
-+	const struct cpumask *cpu_valid_mask = cpu_active_mask;
-+	bool queued = false, running_wrong = false, kthread;
-+	struct cpumask old_mask;
-+	unsigned int dest_cpu;
-+	struct rq_flags rf;
-+	struct rq *rq;
-+	int ret = 0;
-+
-+	rq = task_rq_lock(p, &rf);
-+	update_rq_clock(rq);
-+
-+	kthread = !!(p->flags & PF_KTHREAD);
-+	if (kthread) {
-+		/*
-+		 * Kernel threads are allowed on online && !active CPUs
-+		 */
-+		cpu_valid_mask = cpu_online_mask;
-+	}
-+
-+	/*
-+	 * Must re-check here, to close a race against __kthread_bind(),
-+	 * sched_setaffinity() is not guaranteed to observe the flag.
-+	 */
-+	if (check && (p->flags & PF_NO_SETAFFINITY)) {
-+		ret = -EINVAL;
-+		goto out;
-+	}
-+
-+	cpumask_copy(&old_mask, p->cpus_ptr);
-+	if (cpumask_equal(&old_mask, new_mask))
-+		goto out;
-+
-+	dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
-+	if (dest_cpu >= nr_cpu_ids) {
-+		ret = -EINVAL;
-+		goto out;
-+	}
-+
-+	queued = task_queued(p);
-+	__do_set_cpus_allowed(p, new_mask);
-+
-+	if (kthread) {
-+		/*
-+		 * For kernel threads that do indeed end up on online &&
-+		 * !active we want to ensure they are strict per-CPU threads.
-+		 */
-+		WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) &&
-+			!cpumask_intersects(new_mask, cpu_active_mask) &&
-+			p->nr_cpus_allowed != 1);
-+	}
-+
-+	/* Can the task run on the task's current CPU? If so, we're done */
-+	if (cpumask_test_cpu(task_cpu(p), new_mask))
-+		goto out;
-+
-+	if (task_running(rq, p)) {
-+		/* Task is running on the wrong cpu now, reschedule it. */
-+		if (rq == this_rq()) {
-+			set_task_cpu(p, dest_cpu);
-+			set_tsk_need_resched(p);
-+			running_wrong = true;
-+		} else
-+			resched_task(p);
-+	} else {
-+		if (queued) {
-+			/*
-+			 * Switch runqueue locks after dequeueing the task
-+			 * here while still holding the pi_lock to be holding
-+			 * the correct lock for enqueueing.
-+			 */
-+			dequeue_task(rq, p, 0);
-+			rq_unlock(rq);
-+
-+			rq = cpu_rq(dest_cpu);
-+			rq_lock(rq);
-+		}
-+		set_task_cpu(p, dest_cpu);
-+		if (queued)
-+			enqueue_task(rq, p, 0);
-+	}
-+	if (queued)
-+		try_preempt(p, rq);
-+	if (running_wrong)
-+		preempt_disable();
-+out:
-+	task_rq_unlock(rq, p, &rf);
-+
-+	if (running_wrong) {
-+		__schedule(true);
-+		preempt_enable();
-+	}
-+
-+	return ret;
-+}
-+
-+int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	return __set_cpus_allowed_ptr(p, new_mask, false);
-+}
-+EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
-+
-+#ifdef CONFIG_HOTPLUG_CPU
-+/*
-+ * Run through task list and find tasks affined to the dead cpu, then remove
-+ * that cpu from the list, enable cpu0 and set the zerobound flag. Must hold
-+ * cpu 0 and src_cpu's runqueue locks. We should be holding both rq lock and
-+ * pi_lock to change cpus_mask but it's not going to matter here.
-+ */
-+static void bind_zero(int src_cpu)
-+{
-+	struct task_struct *p, *t;
-+	struct rq *rq0;
-+	int bound = 0;
-+
-+	if (src_cpu == 0)
-+		return;
-+
-+	rq0 = cpu_rq(0);
-+
-+	do_each_thread(t, p) {
-+		if (cpumask_test_cpu(src_cpu, p->cpus_ptr)) {
-+			bool local = (task_cpu(p) == src_cpu);
-+			struct rq *rq = task_rq(p);
-+
-+			/* task_running is the cpu stopper thread */
-+			if (local && task_running(rq, p))
-+				continue;
-+			atomic_clear_cpu(src_cpu, &p->cpus_mask);
-+			atomic_set_cpu(0, &p->cpus_mask);
-+			p->zerobound = true;
-+			bound++;
-+			if (local) {
-+				bool queued = task_queued(p);
-+
-+				if (queued)
-+					dequeue_task(rq, p, 0);
-+				set_task_cpu(p, 0);
-+				if (queued)
-+					enqueue_task(rq0, p, 0);
-+			}
-+		}
-+	} while_each_thread(t, p);
-+
-+	if (bound) {
-+		printk(KERN_INFO "MuQSS removed affinity for %d processes to cpu %d\n",
-+		       bound, src_cpu);
-+	}
-+}
-+
-+/* Find processes with the zerobound flag and reenable their affinity for the
-+ * CPU coming alive. */
-+static void unbind_zero(int src_cpu)
-+{
-+	int unbound = 0, zerobound = 0;
-+	struct task_struct *p, *t;
-+
-+	if (src_cpu == 0)
-+		return;
-+
-+	do_each_thread(t, p) {
-+		if (!p->mm)
-+			p->zerobound = false;
-+		if (p->zerobound) {
-+			unbound++;
-+			cpumask_set_cpu(src_cpu, &p->cpus_mask);
-+			/* Once every CPU affinity has been re-enabled, remove
-+			 * the zerobound flag */
-+			if (cpumask_subset(cpu_possible_mask, p->cpus_ptr)) {
-+				p->zerobound = false;
-+				zerobound++;
-+			}
-+		}
-+	} while_each_thread(t, p);
-+
-+	if (unbound) {
-+		printk(KERN_INFO "MuQSS added affinity for %d processes to cpu %d\n",
-+		       unbound, src_cpu);
-+	}
-+	if (zerobound) {
-+		printk(KERN_INFO "MuQSS released forced binding to cpu0 for %d processes\n",
-+		       zerobound);
-+	}
-+}
-+
-+/*
-+ * Ensure that the idle task is using init_mm right before its cpu goes
-+ * offline.
-+ */
-+void idle_task_exit(void)
-+{
-+	struct mm_struct *mm = current->active_mm;
-+
-+	BUG_ON(cpu_online(smp_processor_id()));
-+
-+	if (mm != &init_mm) {
-+		switch_mm(mm, &init_mm, current);
-+		current->active_mm = &init_mm;
-+		finish_arch_post_lock_switch();
-+	}
-+	mmdrop(mm);
-+}
-+#else /* CONFIG_HOTPLUG_CPU */
-+static void unbind_zero(int src_cpu) {}
-+#endif /* CONFIG_HOTPLUG_CPU */
-+
-+void sched_set_stop_task(int cpu, struct task_struct *stop)
-+{
-+	struct sched_param stop_param = { .sched_priority = STOP_PRIO };
-+	struct sched_param start_param = { .sched_priority = 0 };
-+	struct task_struct *old_stop = cpu_rq(cpu)->stop;
-+
-+	if (stop) {
-+		/*
-+		 * Make it appear like a SCHED_FIFO task, its something
-+		 * userspace knows about and won't get confused about.
-+		 *
-+		 * Also, it will make PI more or less work without too
-+		 * much confusion -- but then, stop work should not
-+		 * rely on PI working anyway.
-+		 */
-+		sched_setscheduler_nocheck(stop, SCHED_FIFO, &stop_param);
-+	}
-+
-+	cpu_rq(cpu)->stop = stop;
-+
-+	if (old_stop) {
-+		/*
-+		 * Reset it back to a normal scheduling policy so that
-+		 * it can die in pieces.
-+		 */
-+		sched_setscheduler_nocheck(old_stop, SCHED_NORMAL, &start_param);
-+	}
-+}
-+
-+#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
-+
-+static struct ctl_table sd_ctl_dir[] = {
-+	{
-+		.procname	= "sched_domain",
-+		.mode		= 0555,
-+	},
-+	{}
-+};
-+
-+static struct ctl_table sd_ctl_root[] = {
-+	{
-+		.procname	= "kernel",
-+		.mode		= 0555,
-+		.child		= sd_ctl_dir,
-+	},
-+	{}
-+};
-+
-+static struct ctl_table *sd_alloc_ctl_entry(int n)
-+{
-+	struct ctl_table *entry =
-+		kcalloc(n, sizeof(struct ctl_table), GFP_KERNEL);
-+
-+	return entry;
-+}
-+
-+static void sd_free_ctl_entry(struct ctl_table **tablep)
-+{
-+	struct ctl_table *entry;
-+
-+	/*
-+	 * In the intermediate directories, both the child directory and
-+	 * procname are dynamically allocated and could fail but the mode
-+	 * will always be set. In the lowest directory the names are
-+	 * static strings and all have proc handlers.
-+	 */
-+	for (entry = *tablep; entry->mode; entry++) {
-+		if (entry->child)
-+			sd_free_ctl_entry(&entry->child);
-+		if (entry->proc_handler == NULL)
-+			kfree(entry->procname);
-+	}
-+
-+	kfree(*tablep);
-+	*tablep = NULL;
-+}
-+
-+static void
-+set_table_entry(struct ctl_table *entry,
-+		const char *procname, void *data, int maxlen,
-+		umode_t mode, proc_handler *proc_handler)
-+{
-+	entry->procname = procname;
-+	entry->data = data;
-+	entry->maxlen = maxlen;
-+	entry->mode = mode;
-+	entry->proc_handler = proc_handler;
-+}
-+
-+static struct ctl_table *
-+sd_alloc_ctl_domain_table(struct sched_domain *sd)
-+{
-+	struct ctl_table *table = sd_alloc_ctl_entry(9);
-+
-+	if (table == NULL)
-+		return NULL;
-+
-+	set_table_entry(&table[0], "min_interval",	  &sd->min_interval,	    sizeof(long), 0644, proc_doulongvec_minmax);
-+	set_table_entry(&table[1], "max_interval",	  &sd->max_interval,	    sizeof(long), 0644, proc_doulongvec_minmax);
-+	set_table_entry(&table[2], "busy_factor",	  &sd->busy_factor,	    sizeof(int),  0644, proc_dointvec_minmax);
-+	set_table_entry(&table[3], "imbalance_pct",	  &sd->imbalance_pct,	    sizeof(int),  0644, proc_dointvec_minmax);
-+	set_table_entry(&table[4], "cache_nice_tries",	  &sd->cache_nice_tries,    sizeof(int),  0644, proc_dointvec_minmax);
-+	set_table_entry(&table[5], "flags",		  &sd->flags,		    sizeof(int),  0644, proc_dointvec_minmax);
-+	set_table_entry(&table[6], "max_newidle_lb_cost", &sd->max_newidle_lb_cost, sizeof(long), 0644, proc_doulongvec_minmax);
-+	set_table_entry(&table[7], "name",		  sd->name,	       CORENAME_MAX_SIZE, 0444, proc_dostring);
-+	/* &table[8] is terminator */
-+
-+	return table;
-+}
-+
-+static struct ctl_table *sd_alloc_ctl_cpu_table(int cpu)
-+{
-+	struct ctl_table *entry, *table;
-+	struct sched_domain *sd;
-+	int domain_num = 0, i;
-+	char buf[32];
-+
-+	for_each_domain(cpu, sd)
-+		domain_num++;
-+	entry = table = sd_alloc_ctl_entry(domain_num + 1);
-+	if (table == NULL)
-+		return NULL;
-+
-+	i = 0;
-+	for_each_domain(cpu, sd) {
-+		snprintf(buf, 32, "domain%d", i);
-+		entry->procname = kstrdup(buf, GFP_KERNEL);
-+		entry->mode = 0555;
-+		entry->child = sd_alloc_ctl_domain_table(sd);
-+		entry++;
-+		i++;
-+	}
-+	return table;
-+}
-+
-+static cpumask_var_t sd_sysctl_cpus;
-+static struct ctl_table_header *sd_sysctl_header;
-+
-+void register_sched_domain_sysctl(void)
-+{
-+	static struct ctl_table *cpu_entries;
-+	static struct ctl_table **cpu_idx;
-+	char buf[32];
-+	int i;
-+
-+	if (!cpu_entries) {
-+		cpu_entries = sd_alloc_ctl_entry(num_possible_cpus() + 1);
-+		if (!cpu_entries)
-+			return;
-+
-+		WARN_ON(sd_ctl_dir[0].child);
-+		sd_ctl_dir[0].child = cpu_entries;
-+	}
-+
-+	if (!cpu_idx) {
-+		struct ctl_table *e = cpu_entries;
-+
-+		cpu_idx = kcalloc(nr_cpu_ids, sizeof(struct ctl_table*), GFP_KERNEL);
-+		if (!cpu_idx)
-+			return;
-+
-+		/* deal with sparse possible map */
-+		for_each_possible_cpu(i) {
-+			cpu_idx[i] = e;
-+			e++;
-+		}
-+	}
-+
-+	if (!cpumask_available(sd_sysctl_cpus)) {
-+		if (!alloc_cpumask_var(&sd_sysctl_cpus, GFP_KERNEL))
-+			return;
-+
-+		/* init to possible to not have holes in @cpu_entries */
-+		cpumask_copy(sd_sysctl_cpus, cpu_possible_mask);
-+	}
-+
-+	for_each_cpu(i, sd_sysctl_cpus) {
-+		struct ctl_table *e = cpu_idx[i];
-+
-+		if (e->child)
-+			sd_free_ctl_entry(&e->child);
-+
-+		if (!e->procname) {
-+			snprintf(buf, 32, "cpu%d", i);
-+			e->procname = kstrdup(buf, GFP_KERNEL);
-+		}
-+		e->mode = 0555;
-+		e->child = sd_alloc_ctl_cpu_table(i);
-+
-+		__cpumask_clear_cpu(i, sd_sysctl_cpus);
-+	}
-+
-+	WARN_ON(sd_sysctl_header);
-+	sd_sysctl_header = register_sysctl_table(sd_ctl_root);
-+}
-+
-+void dirty_sched_domain_sysctl(int cpu)
-+{
-+	if (cpumask_available(sd_sysctl_cpus))
-+		__cpumask_set_cpu(cpu, sd_sysctl_cpus);
-+}
-+
-+/* may be called multiple times per register */
-+void unregister_sched_domain_sysctl(void)
-+{
-+	unregister_sysctl_table(sd_sysctl_header);
-+	sd_sysctl_header = NULL;
-+}
-+#endif /* CONFIG_SYSCTL */
-+
-+void set_rq_online(struct rq *rq)
-+{
-+	if (!rq->online) {
-+		cpumask_set_cpu(cpu_of(rq), rq->rd->online);
-+		rq->online = true;
-+	}
-+}
-+
-+void set_rq_offline(struct rq *rq)
-+{
-+	if (rq->online) {
-+		int cpu = cpu_of(rq);
-+
-+		cpumask_clear_cpu(cpu, rq->rd->online);
-+		rq->online = false;
-+		clear_cpuidle_map(cpu);
-+	}
-+}
-+
-+/*
-+ * used to mark begin/end of suspend/resume:
-+ */
-+static int num_cpus_frozen;
-+
-+/*
-+ * Update cpusets according to cpu_active mask.  If cpusets are
-+ * disabled, cpuset_update_active_cpus() becomes a simple wrapper
-+ * around partition_sched_domains().
-+ *
-+ * If we come here as part of a suspend/resume, don't touch cpusets because we
-+ * want to restore it back to its original state upon resume anyway.
-+ */
-+static void cpuset_cpu_active(void)
-+{
-+	if (cpuhp_tasks_frozen) {
-+		/*
-+		 * num_cpus_frozen tracks how many CPUs are involved in suspend
-+		 * resume sequence. As long as this is not the last online
-+		 * operation in the resume sequence, just build a single sched
-+		 * domain, ignoring cpusets.
-+		 */
-+		partition_sched_domains(1, NULL, NULL);
-+		if (--num_cpus_frozen)
-+			return;
-+		/*
-+		 * This is the last CPU online operation. So fall through and
-+		 * restore the original sched domains by considering the
-+		 * cpuset configurations.
-+		 */
-+		cpuset_force_rebuild();
-+	}
-+
-+	cpuset_update_active_cpus();
-+}
-+
-+static int cpuset_cpu_inactive(unsigned int cpu)
-+{
-+	if (!cpuhp_tasks_frozen) {
-+		cpuset_update_active_cpus();
-+	} else {
-+		num_cpus_frozen++;
-+		partition_sched_domains(1, NULL, NULL);
-+	}
-+	return 0;
-+}
-+
-+int sched_cpu_activate(unsigned int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	struct rq_flags rf;
-+
-+#ifdef CONFIG_SCHED_SMT
-+	/*
-+	 * When going up, increment the number of cores with SMT present.
-+	 */
-+	if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
-+		static_branch_inc_cpuslocked(&sched_smt_present);
-+#endif
-+	set_cpu_active(cpu, true);
-+
-+	if (sched_smp_initialized) {
-+		sched_domains_numa_masks_set(cpu);
-+		cpuset_cpu_active();
-+	}
-+
-+	/*
-+	 * Put the rq online, if not already. This happens:
-+	 *
-+	 * 1) In the early boot process, because we build the real domains
-+	 *    after all CPUs have been brought up.
-+	 *
-+	 * 2) At runtime, if cpuset_cpu_active() fails to rebuild the
-+	 *    domains.
-+	 */
-+	rq_lock_irqsave(rq, &rf);
-+	if (rq->rd) {
-+		BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
-+		set_rq_online(rq);
-+	}
-+	unbind_zero(cpu);
-+	rq_unlock_irqrestore(rq, &rf);
-+
-+	return 0;
-+}
-+
-+int sched_cpu_deactivate(unsigned int cpu)
-+{
-+	int ret;
-+
-+	set_cpu_active(cpu, false);
-+	/*
-+	 * We've cleared cpu_active_mask, wait for all preempt-disabled and RCU
-+	 * users of this state to go away such that all new such users will
-+	 * observe it.
-+	 *
-+	 * Do sync before park smpboot threads to take care the rcu boost case.
-+	 */
-+	synchronize_rcu();
-+
-+#ifdef CONFIG_SCHED_SMT
-+	/*
-+	 * When going down, decrement the number of cores with SMT present.
-+	 */
-+	if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
-+		static_branch_dec_cpuslocked(&sched_smt_present);
-+#endif
-+
-+	if (!sched_smp_initialized)
-+		return 0;
-+
-+	ret = cpuset_cpu_inactive(cpu);
-+	if (ret) {
-+		set_cpu_active(cpu, true);
-+		return ret;
-+	}
-+	sched_domains_numa_masks_clear(cpu);
-+	return 0;
-+}
-+
-+int sched_cpu_starting(unsigned int cpu)
-+{
-+	sched_tick_start(cpu);
-+	return 0;
-+}
-+
-+#ifdef CONFIG_HOTPLUG_CPU
-+int sched_cpu_dying(unsigned int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	/* Handle pending wakeups and then migrate everything off */
-+	sched_ttwu_pending();
-+	sched_tick_stop(cpu);
-+
-+	local_irq_save(flags);
-+	double_rq_lock(rq, cpu_rq(0));
-+	if (rq->rd) {
-+		BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
-+		set_rq_offline(rq);
-+	}
-+	bind_zero(cpu);
-+	double_rq_unlock(rq, cpu_rq(0));
-+	sched_start_tick(rq, cpu);
-+	hrexpiry_clear(rq);
-+	local_irq_restore(flags);
-+
-+	return 0;
-+}
-+#endif
-+
-+#if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_MC)
-+/*
-+ * Cheaper version of the below functions in case support for SMT and MC is
-+ * compiled in but CPUs have no siblings.
-+ */
-+static bool sole_cpu_idle(struct rq *rq)
-+{
-+	return rq_idle(rq);
-+}
-+#endif
-+#ifdef CONFIG_SCHED_SMT
-+static const cpumask_t *thread_cpumask(int cpu)
-+{
-+	return topology_sibling_cpumask(cpu);
-+}
-+/* All this CPU's SMT siblings are idle */
-+static bool siblings_cpu_idle(struct rq *rq)
-+{
-+	return cpumask_subset(&rq->thread_mask, &cpu_idle_map);
-+}
-+#endif
-+#ifdef CONFIG_SCHED_MC
-+static const cpumask_t *core_cpumask(int cpu)
-+{
-+	return topology_core_cpumask(cpu);
-+}
-+/* All this CPU's shared cache siblings are idle */
-+static bool cache_cpu_idle(struct rq *rq)
-+{
-+	return cpumask_subset(&rq->core_mask, &cpu_idle_map);
-+}
-+/* MC siblings CPU mask which share the same LLC */
-+static const cpumask_t *llc_core_cpumask(int cpu)
-+{
-+	return per_cpu(cpu_llc_shared_map, cpu);
-+}
-+#endif
-+
-+enum sched_domain_level {
-+	SD_LV_NONE = 0,
-+	SD_LV_SIBLING,
-+	SD_LV_MC,
-+	SD_LV_BOOK,
-+	SD_LV_CPU,
-+	SD_LV_NODE,
-+	SD_LV_ALLNODES,
-+	SD_LV_MAX
-+};
-+
-+void __init sched_init_smp(void)
-+{
-+	struct rq *rq, *other_rq, *leader = cpu_rq(0);
-+	struct sched_domain *sd;
-+	int cpu, other_cpu, i;
-+#ifdef CONFIG_SCHED_SMT
-+	bool smt_threads = false;
-+#endif
-+	sched_init_numa();
-+
-+	/*
-+	 * There's no userspace yet to cause hotplug operations; hence all the
-+	 * cpu masks are stable and all blatant races in the below code cannot
-+	 * happen.
-+	 */
-+	mutex_lock(&sched_domains_mutex);
-+	sched_init_domains(cpu_active_mask);
-+	mutex_unlock(&sched_domains_mutex);
-+
-+	/* Move init over to a non-isolated CPU */
-+	if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0)
-+		BUG();
-+
-+	local_irq_disable();
-+	mutex_lock(&sched_domains_mutex);
-+	lock_all_rqs();
-+
-+	printk(KERN_INFO "MuQSS possible/present/online CPUs: %d/%d/%d\n",
-+		num_possible_cpus(), num_present_cpus(), num_online_cpus());
-+
-+	/*
-+	 * Set up the relative cache distance of each online cpu from each
-+	 * other in a simple array for quick lookup. Locality is determined
-+	 * by the closest sched_domain that CPUs are separated by. CPUs with
-+	 * shared cache in SMT and MC are treated as local. Separate CPUs
-+	 * (within the same package or physically) within the same node are
-+	 * treated as not local. CPUs not even in the same domain (different
-+	 * nodes) are treated as very distant.
-+	 */
-+	for (cpu = num_online_cpus() - 1; cpu >= 0; cpu--) {
-+		rq = cpu_rq(cpu);
-+		leader = NULL;
-+		/* First check if this cpu is in the same node */
-+		for_each_domain(cpu, sd) {
-+			if (sd->level > SD_LV_MC)
-+				continue;
-+			if (rqshare != RQSHARE_ALL)
-+				leader = NULL;
-+			/* Set locality to local node if not already found lower */
-+			for_each_cpu(other_cpu, sched_domain_span(sd)) {
-+				if (rqshare >= RQSHARE_SMP) {
-+					other_rq = cpu_rq(other_cpu);
-+
-+					/* Set the smp_leader to the first CPU */
-+					if (!leader)
-+						leader = rq;
-+					other_rq->smp_leader = leader;
-+				}
-+				if (rq->cpu_locality[other_cpu] > LOCALITY_SMP)
-+					rq->cpu_locality[other_cpu] = LOCALITY_SMP;
-+			}
-+		}
-+
-+		/*
-+		 * Each runqueue has its own function in case it doesn't have
-+		 * siblings of its own allowing mixed topologies.
-+		 */
-+#ifdef CONFIG_SCHED_MC
-+		leader = NULL;
-+		if (cpumask_weight(core_cpumask(cpu)) > 1) {
-+			cpumask_copy(&rq->core_mask, llc_core_cpumask(cpu));
-+			cpumask_clear_cpu(cpu, &rq->core_mask);
-+			for_each_cpu(other_cpu, core_cpumask(cpu)) {
-+				if (rqshare == RQSHARE_MC ||
-+					(rqshare == RQSHARE_MC_LLC && cpumask_test_cpu(other_cpu, llc_core_cpumask(cpu)))) {
-+					other_rq = cpu_rq(other_cpu);
-+
-+					/* Set the mc_leader to the first CPU */
-+					if (!leader)
-+						leader = rq;
-+					other_rq->mc_leader = leader;
-+				}
-+				if (rq->cpu_locality[other_cpu] > LOCALITY_MC) {
-+					/* this is to get LLC into play even in case LLC sharing is not used */
-+					if (cpumask_test_cpu(other_cpu, llc_core_cpumask(cpu)))
-+						rq->cpu_locality[other_cpu] = LOCALITY_MC_LLC;
-+					else
-+						rq->cpu_locality[other_cpu] = LOCALITY_MC;
-+				}
-+			}
-+			rq->cache_idle = cache_cpu_idle;
-+		}
-+#endif
-+#ifdef CONFIG_SCHED_SMT
-+		leader = NULL;
-+		if (cpumask_weight(thread_cpumask(cpu)) > 1) {
-+			cpumask_copy(&rq->thread_mask, thread_cpumask(cpu));
-+			cpumask_clear_cpu(cpu, &rq->thread_mask);
-+			for_each_cpu(other_cpu, thread_cpumask(cpu)) {
-+				if (rqshare == RQSHARE_SMT) {
-+					other_rq = cpu_rq(other_cpu);
-+
-+					/* Set the smt_leader to the first CPU */
-+					if (!leader)
-+						leader = rq;
-+					other_rq->smt_leader = leader;
-+				}
-+				if (rq->cpu_locality[other_cpu] > LOCALITY_SMT)
-+					rq->cpu_locality[other_cpu] = LOCALITY_SMT;
-+			}
-+			rq->siblings_idle = siblings_cpu_idle;
-+			smt_threads = true;
-+		}
-+#endif
-+	}
-+
-+#ifdef CONFIG_SMT_NICE
-+	if (smt_threads) {
-+		check_siblings = &check_smt_siblings;
-+		wake_siblings = &wake_smt_siblings;
-+		smt_schedule = &smt_should_schedule;
-+	}
-+#endif
-+	unlock_all_rqs();
-+	mutex_unlock(&sched_domains_mutex);
-+
-+	for_each_online_cpu(cpu) {
-+		rq = cpu_rq(cpu);
-+		for_each_online_cpu(other_cpu) {
-+			printk(KERN_DEBUG "MuQSS locality CPU %d to %d: %d\n", cpu, other_cpu, rq->cpu_locality[other_cpu]);
-+		}
-+	}
-+
-+	for_each_online_cpu(cpu) {
-+		rq = cpu_rq(cpu);
-+		leader = rq->smp_leader;
-+
-+		rq_lock(rq);
-+		if (leader && rq != leader) {
-+			printk(KERN_INFO "MuQSS sharing SMP runqueue from CPU %d to CPU %d\n",
-+			       leader->cpu, rq->cpu);
-+			kfree(rq->node);
-+			kfree(rq->sl);
-+			kfree(rq->lock);
-+			rq->node = leader->node;
-+			rq->sl = leader->sl;
-+			rq->lock = leader->lock;
-+			barrier();
-+			/* To make up for not unlocking the freed runlock */
-+			preempt_enable();
-+		} else
-+			rq_unlock(rq);
-+	}
-+
-+#ifdef CONFIG_SCHED_MC
-+	for_each_online_cpu(cpu) {
-+		rq = cpu_rq(cpu);
-+		leader = rq->mc_leader;
-+
-+		rq_lock(rq);
-+		if (leader && rq != leader) {
-+			printk(KERN_INFO "MuQSS sharing MC runqueue from CPU %d to CPU %d\n",
-+			       leader->cpu, rq->cpu);
-+			kfree(rq->node);
-+			kfree(rq->sl);
-+			kfree(rq->lock);
-+			rq->node = leader->node;
-+			rq->sl = leader->sl;
-+			rq->lock = leader->lock;
-+			barrier();
-+			/* To make up for not unlocking the freed runlock */
-+			preempt_enable();
-+		} else
-+			rq_unlock(rq);
-+	}
-+#endif /* CONFIG_SCHED_MC */
-+
-+#ifdef CONFIG_SCHED_SMT
-+	for_each_online_cpu(cpu) {
-+		rq = cpu_rq(cpu);
-+
-+		leader = rq->smt_leader;
-+
-+		rq_lock(rq);
-+		if (leader && rq != leader) {
-+			printk(KERN_INFO "MuQSS sharing SMT runqueue from CPU %d to CPU %d\n",
-+			       leader->cpu, rq->cpu);
-+			kfree(rq->node);
-+			kfree(rq->sl);
-+			kfree(rq->lock);
-+			rq->node = leader->node;
-+			rq->sl = leader->sl;
-+			rq->lock = leader->lock;
-+			barrier();
-+			/* To make up for not unlocking the freed runlock */
-+			preempt_enable();
-+		} else
-+			rq_unlock(rq);
-+	}
-+#endif /* CONFIG_SCHED_SMT */
-+
-+	local_irq_enable();
-+
-+	total_runqueues = 0;
-+	for_each_online_cpu(cpu) {
-+		int locality, total_rqs = 0, total_cpus = 0;
-+
-+		rq = cpu_rq(cpu);
-+		if (
-+#ifdef CONFIG_SCHED_MC
-+		    (rq->mc_leader == rq) &&
-+#endif
-+#ifdef CONFIG_SCHED_SMT
-+		    (rq->smt_leader == rq) &&
-+#endif
-+		    (rq->smp_leader == rq)) {
-+			total_runqueues++;
-+		}
-+
-+		for (locality = LOCALITY_SAME; locality <= LOCALITY_DISTANT; locality++) {
-+			int selected_cpus[NR_CPUS], selected_cpu_cnt, selected_cpu_idx, test_cpu_idx, cpu_idx, best_locality, test_cpu;
-+			int ordered_cpus[NR_CPUS], ordered_cpus_idx;
-+
-+			ordered_cpus_idx = -1;
-+			selected_cpu_cnt = 0;
-+
-+			for_each_online_cpu(test_cpu) {
-+				if (cpu < num_online_cpus() / 2)
-+					other_cpu = cpu + test_cpu;
-+				else
-+					other_cpu = cpu - test_cpu;
-+				if (other_cpu < 0)
-+					other_cpu += num_online_cpus();
-+				else
-+					other_cpu %= num_online_cpus();
-+				/* gather CPUs of the same locality */
-+				if (rq->cpu_locality[other_cpu] == locality) {
-+					selected_cpus[selected_cpu_cnt] = other_cpu;
-+					selected_cpu_cnt++;
-+				}
-+			}
-+
-+			/* reserve first CPU as starting point */
-+			if (selected_cpu_cnt > 0) {
-+				ordered_cpus_idx++;
-+				ordered_cpus[ordered_cpus_idx] = selected_cpus[ordered_cpus_idx];
-+				selected_cpus[ordered_cpus_idx] = -1;
-+			}
-+
-+			/* take each CPU and sort it within the same locality based on each inter-CPU localities */
-+			for(test_cpu_idx = 1; test_cpu_idx < selected_cpu_cnt; test_cpu_idx++) {
-+				/* starting point with worst locality and current CPU */
-+				best_locality = LOCALITY_DISTANT;
-+				selected_cpu_idx = test_cpu_idx;
-+
-+				/* try to find the best locality within group */
-+				for(cpu_idx = 1; cpu_idx < selected_cpu_cnt; cpu_idx++) {
-+					/* if CPU has not been used and locality is better */
-+					if (selected_cpus[cpu_idx] > -1) {
-+						other_rq = cpu_rq(ordered_cpus[ordered_cpus_idx]);
-+						if (best_locality > other_rq->cpu_locality[selected_cpus[cpu_idx]]) {
-+							/* assign best locality and best CPU idx in array */
-+							best_locality = other_rq->cpu_locality[selected_cpus[cpu_idx]];
-+							selected_cpu_idx = cpu_idx;
-+						}
-+					}
-+				}
-+
-+				/* add our next best CPU to ordered list */
-+				ordered_cpus_idx++;
-+				ordered_cpus[ordered_cpus_idx] = selected_cpus[selected_cpu_idx];
-+				/* mark this CPU as used */
-+				selected_cpus[selected_cpu_idx] =  -1;
-+			}
-+
-+			/* set up RQ and CPU orders */
-+			for (test_cpu = 0; test_cpu <= ordered_cpus_idx; test_cpu++) {
-+				other_rq = cpu_rq(ordered_cpus[test_cpu]);
-+				/* set up cpu orders */
-+				rq->cpu_order[total_cpus++] = other_rq;
-+				if (
-+#ifdef CONFIG_SCHED_MC
-+				    (other_rq->mc_leader == other_rq) &&
-+#endif
-+#ifdef CONFIG_SCHED_SMT
-+				    (other_rq->smt_leader == other_rq) &&
-+#endif
-+				    (other_rq->smp_leader == other_rq)) {
-+					/* set up RQ orders */
-+					rq->rq_order[total_rqs++] = other_rq;
-+				}
-+			}
-+		}
-+	}
-+
-+	for_each_online_cpu(cpu) {
-+		rq = cpu_rq(cpu);
-+		for (i = 0; i < total_runqueues; i++) {
-+			printk(KERN_DEBUG "MuQSS CPU %d llc %d RQ order %d RQ %d llc %d\n", cpu, per_cpu(cpu_llc_id, cpu), i,
-+			       rq->rq_order[i]->cpu, per_cpu(cpu_llc_id, rq->rq_order[i]->cpu));
-+		}
-+	}
-+
-+	for_each_online_cpu(cpu) {
-+		rq = cpu_rq(cpu);
-+		for (i = 0; i < num_online_cpus(); i++) {
-+			printk(KERN_DEBUG "MuQSS CPU %d llc %d CPU order %d RQ %d llc %d\n", cpu, per_cpu(cpu_llc_id, cpu), i,
-+			       rq->cpu_order[i]->cpu, per_cpu(cpu_llc_id, rq->cpu_order[i]->cpu));
-+		}
-+	}
-+
-+	switch (rqshare) {
-+		case RQSHARE_ALL:
-+			/* This should only ever read 1 */
-+			printk(KERN_INFO "MuQSS runqueue share type ALL total runqueues: %d\n",
-+			       total_runqueues);
-+			break;
-+		case RQSHARE_SMP:
-+			printk(KERN_INFO "MuQSS runqueue share type SMP total runqueues: %d\n",
-+			       total_runqueues);
-+			break;
-+		case RQSHARE_MC:
-+			printk(KERN_INFO "MuQSS runqueue share type MC total runqueues: %d\n",
-+			       total_runqueues);
-+			break;
-+		case RQSHARE_MC_LLC:
-+			printk(KERN_INFO "MuQSS runqueue share type LLC total runqueues: %d\n",
-+			       total_runqueues);
-+			break;
-+		case RQSHARE_SMT:
-+			printk(KERN_INFO "MuQSS runqueue share type SMT total runqueues: %d\n",
-+			       total_runqueues);
-+			break;
-+		case RQSHARE_NONE:
-+			printk(KERN_INFO "MuQSS runqueue share type NONE total runqueues: %d\n",
-+			       total_runqueues);
-+			break;
-+	}
-+
-+	sched_smp_initialized = true;
-+}
-+#else
-+void __init sched_init_smp(void)
-+{
-+	sched_smp_initialized = true;
-+}
-+#endif /* CONFIG_SMP */
-+
-+int in_sched_functions(unsigned long addr)
-+{
-+	return in_lock_functions(addr) ||
-+		(addr >= (unsigned long)__sched_text_start
-+		&& addr < (unsigned long)__sched_text_end);
-+}
-+
-+#ifdef CONFIG_CGROUP_SCHED
-+/* task group related information */
-+struct task_group {
-+	struct cgroup_subsys_state css;
-+
-+	struct rcu_head rcu;
-+	struct list_head list;
-+
-+	struct task_group *parent;
-+	struct list_head siblings;
-+	struct list_head children;
-+};
-+
-+/*
-+ * Default task group.
-+ * Every task in system belongs to this group at bootup.
-+ */
-+struct task_group root_task_group;
-+LIST_HEAD(task_groups);
-+
-+/* Cacheline aligned slab cache for task_group */
-+static struct kmem_cache *task_group_cache __read_mostly;
-+#endif /* CONFIG_CGROUP_SCHED */
-+
-+void __init sched_init(void)
-+{
-+#ifdef CONFIG_SMP
-+	int cpu_ids;
-+#endif
-+	int i;
-+	struct rq *rq;
-+
-+	wait_bit_init();
-+
-+	prio_ratios[0] = 128;
-+	for (i = 1 ; i < NICE_WIDTH ; i++)
-+		prio_ratios[i] = prio_ratios[i - 1] * 11 / 10;
-+
-+	skiplist_node_init(&init_task.node);
-+
-+#ifdef CONFIG_SMP
-+	init_defrootdomain();
-+	cpumask_clear(&cpu_idle_map);
-+#else
-+	uprq = &per_cpu(runqueues, 0);
-+#endif
-+
-+#ifdef CONFIG_CGROUP_SCHED
-+	task_group_cache = KMEM_CACHE(task_group, 0);
-+
-+	list_add(&root_task_group.list, &task_groups);
-+	INIT_LIST_HEAD(&root_task_group.children);
-+	INIT_LIST_HEAD(&root_task_group.siblings);
-+#endif /* CONFIG_CGROUP_SCHED */
-+	for_each_possible_cpu(i) {
-+		rq = cpu_rq(i);
-+		rq->node = kmalloc(sizeof(skiplist_node), GFP_ATOMIC);
-+		skiplist_init(rq->node);
-+		rq->sl = new_skiplist(rq->node);
-+		rq->lock = kmalloc(sizeof(raw_spinlock_t), GFP_ATOMIC);
-+		raw_spin_lock_init(rq->lock);
-+		rq->nr_running = 0;
-+		rq->nr_uninterruptible = 0;
-+		rq->nr_switches = 0;
-+		rq->clock = rq->old_clock = rq->last_niffy = rq->niffies = 0;
-+		rq->last_jiffy = jiffies;
-+		rq->user_ns = rq->nice_ns = rq->softirq_ns = rq->system_ns =
-+			      rq->iowait_ns = rq->idle_ns = 0;
-+		rq->dither = 0;
-+		set_rq_task(rq, &init_task);
-+		rq->iso_ticks = 0;
-+		rq->iso_refractory = false;
-+#ifdef CONFIG_SMP
-+		rq->smp_leader = rq;
-+#ifdef CONFIG_SCHED_MC
-+		rq->mc_leader = rq;
-+#endif
-+#ifdef CONFIG_SCHED_SMT
-+		rq->smt_leader = rq;
-+#endif
-+		rq->sd = NULL;
-+		rq->rd = NULL;
-+		rq->online = false;
-+		rq->cpu = i;
-+		rq_attach_root(rq, &def_root_domain);
-+#endif
-+		init_rq_hrexpiry(rq);
-+		atomic_set(&rq->nr_iowait, 0);
-+	}
-+
-+#ifdef CONFIG_SMP
-+	cpu_ids = i;
-+	/*
-+	 * Set the base locality for cpu cache distance calculation to
-+	 * "distant" (3). Make sure the distance from a CPU to itself is 0.
-+	 */
-+	for_each_possible_cpu(i) {
-+		int j;
-+
-+		rq = cpu_rq(i);
-+#ifdef CONFIG_SCHED_SMT
-+		rq->siblings_idle = sole_cpu_idle;
-+#endif
-+#ifdef CONFIG_SCHED_MC
-+		rq->cache_idle = sole_cpu_idle;
-+#endif
-+		rq->cpu_locality = kmalloc(cpu_ids * sizeof(int *), GFP_ATOMIC);
-+		for_each_possible_cpu(j) {
-+			if (i == j)
-+				rq->cpu_locality[j] = LOCALITY_SAME;
-+			else
-+				rq->cpu_locality[j] = LOCALITY_DISTANT;
-+		}
-+		rq->rq_order = kmalloc(cpu_ids * sizeof(struct rq *), GFP_ATOMIC);
-+		rq->cpu_order = kmalloc(cpu_ids * sizeof(struct rq *), GFP_ATOMIC);
-+		rq->rq_order[0] = rq->cpu_order[0] = rq;
-+		for (j = 1; j < cpu_ids; j++)
-+			rq->rq_order[j] = rq->cpu_order[j] = cpu_rq(j);
-+	}
-+#endif
-+
-+	/*
-+	 * The boot idle thread does lazy MMU switching as well:
-+	 */
-+	mmgrab(&init_mm);
-+	enter_lazy_tlb(&init_mm, current);
-+
-+	/*
-+	 * Make us the idle thread. Technically, schedule() should not be
-+	 * called from this thread, however somewhere below it might be,
-+	 * but because we are the idle thread, we just pick up running again
-+	 * when this runqueue becomes "idle".
-+	 */
-+	init_idle(current, smp_processor_id());
-+
-+#ifdef CONFIG_SMP
-+	idle_thread_set_boot_cpu();
-+#endif /* SMP */
-+
-+	init_schedstats();
-+
-+	psi_init();
-+}
-+
-+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-+static inline int preempt_count_equals(int preempt_offset)
-+{
-+	int nested = preempt_count() + rcu_preempt_depth();
-+
-+	return (nested == preempt_offset);
-+}
-+
-+void __might_sleep(const char *file, int line, int preempt_offset)
-+{
-+	/*
-+	 * Blocking primitives will set (and therefore destroy) current->state,
-+	 * since we will exit with TASK_RUNNING make sure we enter with it,
-+	 * otherwise we will destroy state.
-+	 */
-+	WARN_ONCE(current->state != TASK_RUNNING && current->task_state_change,
-+			"do not call blocking ops when !TASK_RUNNING; "
-+			"state=%lx set at [<%p>] %pS\n",
-+			current->state,
-+			(void *)current->task_state_change,
-+			(void *)current->task_state_change);
-+
-+	___might_sleep(file, line, preempt_offset);
-+}
-+EXPORT_SYMBOL(__might_sleep);
-+
-+void __cant_sleep(const char *file, int line, int preempt_offset)
-+{
-+	static unsigned long prev_jiffy;
-+
-+	if (irqs_disabled())
-+		return;
-+
-+	if (!IS_ENABLED(CONFIG_PREEMPT_COUNT))
-+		return;
-+
-+	if (preempt_count() > preempt_offset)
-+		return;
-+
-+	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
-+		return;
-+	prev_jiffy = jiffies;
-+
-+	printk(KERN_ERR "BUG: assuming atomic context at %s:%d\n", file, line);
-+	printk(KERN_ERR "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
-+			in_atomic(), irqs_disabled(),
-+			current->pid, current->comm);
-+
-+	debug_show_held_locks(current);
-+	dump_stack();
-+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+}
-+EXPORT_SYMBOL_GPL(__cant_sleep);
-+
-+void ___might_sleep(const char *file, int line, int preempt_offset)
-+{
-+	/* Ratelimiting timestamp: */
-+	static unsigned long prev_jiffy;
-+
-+	unsigned long preempt_disable_ip;
-+
-+	/* WARN_ON_ONCE() by default, no rate limit required: */
-+	rcu_sleep_check();
-+
-+	if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
-+	     !is_idle_task(current) && !current->non_block_count) ||
-+	    system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING ||
-+	    oops_in_progress)
-+		return;
-+
-+	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
-+		return;
-+	prev_jiffy = jiffies;
-+
-+	/* Save this before calling printk(), since that will clobber it: */
-+	preempt_disable_ip = get_preempt_disable_ip(current);
-+
-+	printk(KERN_ERR
-+		"BUG: sleeping function called from invalid context at %s:%d\n",
-+			file, line);
-+	printk(KERN_ERR
-+		"in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n",
-+			in_atomic(), irqs_disabled(), current->non_block_count,
-+			current->pid, current->comm);
-+
-+	if (task_stack_end_corrupted(current))
-+		printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");
-+
-+	debug_show_held_locks(current);
-+	if (irqs_disabled())
-+		print_irqtrace_events(current);
-+	if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)
-+	    && !preempt_count_equals(preempt_offset)) {
-+		pr_err("Preemption disabled at:");
-+		print_ip_sym(preempt_disable_ip);
-+		pr_cont("\n");
-+	}
-+	dump_stack();
-+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+}
-+EXPORT_SYMBOL(___might_sleep);
-+#endif
-+
-+#ifdef CONFIG_MAGIC_SYSRQ
-+static inline void normalise_rt_tasks(void)
-+{
-+	struct sched_attr attr = {};
-+	struct task_struct *g, *p;
-+	struct rq_flags rf;
-+	struct rq *rq;
-+
-+	read_lock(&tasklist_lock);
-+	for_each_process_thread(g, p) {
-+		/*
-+		 * Only normalize user tasks:
-+		 */
-+		if (p->flags & PF_KTHREAD)
-+			continue;
-+
-+		if (!rt_task(p) && !iso_task(p))
-+			continue;
-+
-+		rq = task_rq_lock(p, &rf);
-+		__setscheduler(p, rq, SCHED_NORMAL, 0, &attr, false);
-+		task_rq_unlock(rq, p, &rf);
-+	}
-+	read_unlock(&tasklist_lock);
-+}
-+
-+void normalize_rt_tasks(void)
-+{
-+	normalise_rt_tasks();
-+}
-+#endif /* CONFIG_MAGIC_SYSRQ */
-+
-+#if defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB)
-+/*
-+ * These functions are only useful for the IA64 MCA handling, or kdb.
-+ *
-+ * They can only be called when the whole system has been
-+ * stopped - every CPU needs to be quiescent, and no scheduling
-+ * activity can take place. Using them for anything else would
-+ * be a serious bug, and as a result, they aren't even visible
-+ * under any other configuration.
-+ */
-+
-+/**
-+ * curr_task - return the current task for a given CPU.
-+ * @cpu: the processor in question.
-+ *
-+ * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
-+ *
-+ * Return: The current task for @cpu.
-+ */
-+struct task_struct *curr_task(int cpu)
-+{
-+	return cpu_curr(cpu);
-+}
-+
-+#endif /* defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB) */
-+
-+#ifdef CONFIG_IA64
-+/**
-+ * ia64_set_curr_task - set the current task for a given CPU.
-+ * @cpu: the processor in question.
-+ * @p: the task pointer to set.
-+ *
-+ * Description: This function must only be used when non-maskable interrupts
-+ * are serviced on a separate stack.  It allows the architecture to switch the
-+ * notion of the current task on a CPU in a non-blocking manner.  This function
-+ * must be called with all CPU's synchronised, and interrupts disabled, the
-+ * and caller must save the original value of the current task (see
-+ * curr_task() above) and restore that value before reenabling interrupts and
-+ * re-starting the system.
-+ *
-+ * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
-+ */
-+void ia64_set_curr_task(int cpu, struct task_struct *p)
-+{
-+	cpu_curr(cpu) = p;
-+}
-+
-+#endif
-+
-+void init_idle_bootup_task(struct task_struct *idle)
-+{}
-+
-+#ifdef CONFIG_SCHED_DEBUG
-+__read_mostly bool sched_debug_enabled;
-+
-+void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
-+			  struct seq_file *m)
-+{
-+	seq_printf(m, "%s (%d, #threads: %d)\n", p->comm, task_pid_nr_ns(p, ns),
-+		   get_nr_threads(p));
-+}
-+
-+void proc_sched_set_task(struct task_struct *p)
-+{}
-+#endif
-+
-+#ifdef CONFIG_CGROUP_SCHED
-+static void sched_free_group(struct task_group *tg)
-+{
-+	kmem_cache_free(task_group_cache, tg);
-+}
-+
-+/* allocate runqueue etc for a new task group */
-+struct task_group *sched_create_group(struct task_group *parent)
-+{
-+	struct task_group *tg;
-+
-+	tg = kmem_cache_alloc(task_group_cache, GFP_KERNEL | __GFP_ZERO);
-+	if (!tg)
-+		return ERR_PTR(-ENOMEM);
-+
-+	return tg;
-+}
-+
-+void sched_online_group(struct task_group *tg, struct task_group *parent)
-+{
-+}
-+
-+/* rcu callback to free various structures associated with a task group */
-+static void sched_free_group_rcu(struct rcu_head *rhp)
-+{
-+	/* Now it should be safe to free those cfs_rqs */
-+	sched_free_group(container_of(rhp, struct task_group, rcu));
-+}
-+
-+void sched_destroy_group(struct task_group *tg)
-+{
-+	/* Wait for possible concurrent references to cfs_rqs complete */
-+	call_rcu(&tg->rcu, sched_free_group_rcu);
-+}
-+
-+void sched_offline_group(struct task_group *tg)
-+{
-+}
-+
-+static inline struct task_group *css_tg(struct cgroup_subsys_state *css)
-+{
-+	return css ? container_of(css, struct task_group, css) : NULL;
-+}
-+
-+static struct cgroup_subsys_state *
-+cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
-+{
-+	struct task_group *parent = css_tg(parent_css);
-+	struct task_group *tg;
-+
-+	if (!parent) {
-+		/* This is early initialization for the top cgroup */
-+		return &root_task_group.css;
-+	}
-+
-+	tg = sched_create_group(parent);
-+	if (IS_ERR(tg))
-+		return ERR_PTR(-ENOMEM);
-+	return &tg->css;
-+}
-+
-+/* Expose task group only after completing cgroup initialization */
-+static int cpu_cgroup_css_online(struct cgroup_subsys_state *css)
-+{
-+	struct task_group *tg = css_tg(css);
-+	struct task_group *parent = css_tg(css->parent);
-+
-+	if (parent)
-+		sched_online_group(tg, parent);
-+	return 0;
-+}
-+
-+static void cpu_cgroup_css_released(struct cgroup_subsys_state *css)
-+{
-+	struct task_group *tg = css_tg(css);
-+
-+	sched_offline_group(tg);
-+}
-+
-+static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
-+{
-+	struct task_group *tg = css_tg(css);
-+
-+	/*
-+	 * Relies on the RCU grace period between css_released() and this.
-+	 */
-+	sched_free_group(tg);
-+}
-+
-+static void cpu_cgroup_fork(struct task_struct *task)
-+{
-+}
-+
-+static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
-+{
-+	return 0;
-+}
-+
-+static void cpu_cgroup_attach(struct cgroup_taskset *tset)
-+{
-+}
-+
-+static struct cftype cpu_legacy_files[] = {
-+	{ }	/* Terminate */
-+};
-+
-+static struct cftype cpu_files[] = {
-+	{ }	/* terminate */
-+};
-+
-+static int cpu_extra_stat_show(struct seq_file *sf,
-+			       struct cgroup_subsys_state *css)
-+{
-+	return 0;
-+}
-+
-+struct cgroup_subsys cpu_cgrp_subsys = {
-+	.css_alloc	= cpu_cgroup_css_alloc,
-+	.css_online	= cpu_cgroup_css_online,
-+	.css_released	= cpu_cgroup_css_released,
-+	.css_free	= cpu_cgroup_css_free,
-+	.css_extra_stat_show = cpu_extra_stat_show,
-+	.fork		= cpu_cgroup_fork,
-+	.can_attach	= cpu_cgroup_can_attach,
-+	.attach		= cpu_cgroup_attach,
-+	.legacy_cftypes	= cpu_files,
-+	.legacy_cftypes	= cpu_legacy_files,
-+	.dfl_cftypes	= cpu_files,
-+	.early_init	= true,
-+	.threaded	= true,
-+};
-+#endif	/* CONFIG_CGROUP_SCHED */
-+
-+#undef CREATE_TRACE_POINTS
-diff --git a/kernel/sched/MuQSS.h b/kernel/sched/MuQSS.h
-new file mode 100644
-index 000000000000..5214b158d82f
---- /dev/null
-+++ b/kernel/sched/MuQSS.h
-@@ -0,0 +1,1010 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef MUQSS_SCHED_H
-+#define MUQSS_SCHED_H
-+
-+#include <linux/sched/clock.h>
-+#include <linux/sched/cpufreq.h>
-+#include <linux/sched/cputime.h>
-+#include <linux/sched/debug.h>
-+#include <linux/sched/hotplug.h>
-+#include <linux/sched/init.h>
-+#include <linux/sched/isolation.h>
-+#include <linux/sched/mm.h>
-+#include <linux/sched/nohz.h>
-+#include <linux/sched/signal.h>
-+#include <linux/sched/smt.h>
-+#include <linux/sched/stat.h>
-+#include <linux/sched/task.h>
-+#include <linux/sched/task_stack.h>
-+#include <linux/sched/topology.h>
-+#include <linux/sched/wake_q.h>
-+
-+#include <uapi/linux/sched/types.h>
-+
-+#include <linux/cgroup.h>
-+#include <linux/cpufreq.h>
-+#include <linux/cpuidle.h>
-+#include <linux/cpuset.h>
-+#include <linux/ctype.h>
-+#include <linux/energy_model.h>
-+#include <linux/freezer.h>
-+#include <linux/interrupt.h>
-+#include <linux/kernel_stat.h>
-+#include <linux/kthread.h>
-+#include <linux/membarrier.h>
-+#include <linux/livepatch.h>
-+#include <linux/proc_fs.h>
-+#include <linux/psi.h>
-+#include <linux/sched.h>
-+#include <linux/slab.h>
-+#include <linux/skip_list.h>
-+#include <linux/stop_machine.h>
-+#include <linux/suspend.h>
-+#include <linux/swait.h>
-+#include <linux/syscalls.h>
-+#include <linux/tick.h>
-+#include <linux/tsacct_kern.h>
-+#include <linux/u64_stats_sync.h>
-+
-+#ifdef CONFIG_PARAVIRT
-+#include <asm/paravirt.h>
-+#endif
-+
-+#include "cpupri.h"
-+
-+#ifdef CONFIG_SCHED_DEBUG
-+# define SCHED_WARN_ON(x)	WARN_ONCE(x, #x)
-+#else
-+# define SCHED_WARN_ON(x)	((void)(x))
-+#endif
-+
-+/* task_struct::on_rq states: */
-+#define TASK_ON_RQ_QUEUED	1
-+#define TASK_ON_RQ_MIGRATING	2
-+
-+struct rq;
-+
-+#ifdef CONFIG_SMP
-+
-+static inline bool sched_asym_prefer(int a, int b)
-+{
-+	return arch_asym_cpu_priority(a) > arch_asym_cpu_priority(b);
-+}
-+
-+struct perf_domain {
-+	struct em_perf_domain *em_pd;
-+	struct perf_domain *next;
-+	struct rcu_head rcu;
-+};
-+
-+/* Scheduling group status flags */
-+#define SG_OVERLOAD		0x1 /* More than one runnable task on a CPU. */
-+#define SG_OVERUTILIZED		0x2 /* One or more CPUs are over-utilized. */
-+
-+/*
-+ * We add the notion of a root-domain which will be used to define per-domain
-+ * variables. Each exclusive cpuset essentially defines an island domain by
-+ * fully partitioning the member cpus from any other cpuset. Whenever a new
-+ * exclusive cpuset is created, we also create and attach a new root-domain
-+ * object.
-+ *
-+ */
-+struct root_domain {
-+	atomic_t refcount;
-+	atomic_t rto_count;
-+	struct rcu_head rcu;
-+	cpumask_var_t span;
-+	cpumask_var_t online;
-+
-+	/*
-+	 * Indicate pullable load on at least one CPU, e.g:
-+	 * - More than one runnable task
-+	 * - Running task is misfit
-+	 */
-+	int			overload;
-+
-+	/* Indicate one or more cpus over-utilized (tipping point) */
-+	int			overutilized;
-+
-+	/*
-+	 * The bit corresponding to a CPU gets set here if such CPU has more
-+	 * than one runnable -deadline task (as it is below for RT tasks).
-+	 */
-+	cpumask_var_t dlo_mask;
-+	atomic_t dlo_count;
-+	/* Replace unused CFS structures with void */
-+	//struct dl_bw dl_bw;
-+	//struct cpudl cpudl;
-+	void *dl_bw;
-+	void *cpudl;
-+
-+	/*
-+	 * The "RT overload" flag: it gets set if a CPU has more than
-+	 * one runnable RT task.
-+	 */
-+	cpumask_var_t rto_mask;
-+	//struct cpupri cpupri;
-+	void *cpupri;
-+
-+	unsigned long max_cpu_capacity;
-+
-+	/*
-+	 * NULL-terminated list of performance domains intersecting with the
-+	 * CPUs of the rd. Protected by RCU.
-+	 */
-+	struct perf_domain	*pd;
-+};
-+
-+extern void init_defrootdomain(void);
-+extern int sched_init_domains(const struct cpumask *cpu_map);
-+extern void rq_attach_root(struct rq *rq, struct root_domain *rd);
-+
-+static inline void cpupri_cleanup(void __maybe_unused *cpupri)
-+{
-+}
-+
-+static inline void cpudl_cleanup(void __maybe_unused *cpudl)
-+{
-+}
-+
-+static inline void init_dl_bw(void __maybe_unused *dl_bw)
-+{
-+}
-+
-+static inline int cpudl_init(void __maybe_unused *dl_bw)
-+{
-+	return 0;
-+}
-+
-+static inline int cpupri_init(void __maybe_unused *cpupri)
-+{
-+	return 0;
-+}
-+#endif /* CONFIG_SMP */
-+
-+/*
-+ * This is the main, per-CPU runqueue data structure.
-+ * This data should only be modified by the local cpu.
-+ */
-+struct rq {
-+	raw_spinlock_t *lock;
-+	raw_spinlock_t *orig_lock;
-+
-+	struct task_struct *curr, *idle, *stop;
-+	struct mm_struct *prev_mm;
-+
-+	unsigned int nr_running;
-+	/*
-+	 * This is part of a global counter where only the total sum
-+	 * over all CPUs matters. A task can increase this counter on
-+	 * one CPU and if it got migrated afterwards it may decrease
-+	 * it on another CPU. Always updated under the runqueue lock:
-+	 */
-+	unsigned long nr_uninterruptible;
-+	u64 nr_switches;
-+
-+	/* Stored data about rq->curr to work outside rq lock */
-+	u64 rq_deadline;
-+	int rq_prio;
-+
-+	/* Best queued id for use outside lock */
-+	u64 best_key;
-+
-+	unsigned long last_scheduler_tick; /* Last jiffy this RQ ticked */
-+	unsigned long last_jiffy; /* Last jiffy this RQ updated rq clock */
-+	u64 niffies; /* Last time this RQ updated rq clock */
-+	u64 last_niffy; /* Last niffies as updated by local clock */
-+	u64 last_jiffy_niffies; /* Niffies @ last_jiffy */
-+
-+	u64 load_update; /* When we last updated load */
-+	unsigned long load_avg; /* Rolling load average */
-+#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
-+	u64 irq_load_update; /* When we last updated IRQ load */
-+	unsigned long irq_load_avg; /* Rolling IRQ load average */
-+#endif
-+#ifdef CONFIG_SMT_NICE
-+	struct mm_struct *rq_mm;
-+	int rq_smt_bias; /* Policy/nice level bias across smt siblings */
-+#endif
-+	/* Accurate timekeeping data */
-+	unsigned long user_ns, nice_ns, irq_ns, softirq_ns, system_ns,
-+		iowait_ns, idle_ns;
-+	atomic_t nr_iowait;
-+
-+#ifdef CONFIG_MEMBARRIER
-+	int membarrier_state;
-+#endif
-+
-+	skiplist_node *node;
-+	skiplist *sl;
-+#ifdef CONFIG_SMP
-+	struct task_struct *preempt; /* Preempt triggered on this task */
-+	struct task_struct *preempting; /* Hint only, what task is preempting */
-+
-+	int cpu;		/* cpu of this runqueue */
-+	bool online;
-+
-+	struct root_domain *rd;
-+	struct sched_domain *sd;
-+
-+	unsigned long cpu_capacity_orig;
-+
-+	int *cpu_locality; /* CPU relative cache distance */
-+	struct rq **rq_order; /* Shared RQs ordered by relative cache distance */
-+	struct rq **cpu_order; /* RQs of discrete CPUs ordered by distance */
-+
-+	struct rq *smp_leader; /* First physical CPU per node */
-+#ifdef CONFIG_SCHED_SMT
-+	struct rq *smt_leader; /* First logical CPU in SMT siblings */
-+	cpumask_t thread_mask;
-+	bool (*siblings_idle)(struct rq *rq);
-+	/* See if all smt siblings are idle */
-+#endif /* CONFIG_SCHED_SMT */
-+#ifdef CONFIG_SCHED_MC
-+	struct rq *mc_leader; /* First logical CPU in MC siblings */
-+	cpumask_t core_mask;
-+	bool (*cache_idle)(struct rq *rq);
-+	/* See if all cache siblings are idle */
-+#endif /* CONFIG_SCHED_MC */
-+#endif /* CONFIG_SMP */
-+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-+	u64 prev_irq_time;
-+#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
-+#ifdef CONFIG_PARAVIRT
-+	u64 prev_steal_time;
-+#endif /* CONFIG_PARAVIRT */
-+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
-+	u64 prev_steal_time_rq;
-+#endif /* CONFIG_PARAVIRT_TIME_ACCOUNTING */
-+
-+	u64 clock, old_clock, last_tick;
-+	/* Ensure that all clocks are in the same cache line */
-+	u64 clock_task ____cacheline_aligned;
-+	int dither;
-+
-+	int iso_ticks;
-+	bool iso_refractory;
-+
-+#ifdef CONFIG_HIGH_RES_TIMERS
-+	struct hrtimer hrexpiry_timer;
-+#endif
-+
-+	int rt_nr_running; /* Number real time tasks running */
-+#ifdef CONFIG_SCHEDSTATS
-+
-+	/* latency stats */
-+	struct sched_info rq_sched_info;
-+	unsigned long long rq_cpu_time;
-+	/* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
-+
-+	/* sys_sched_yield() stats */
-+	unsigned int yld_count;
-+
-+	/* schedule() stats */
-+	unsigned int sched_switch;
-+	unsigned int sched_count;
-+	unsigned int sched_goidle;
-+
-+	/* try_to_wake_up() stats */
-+	unsigned int ttwu_count;
-+	unsigned int ttwu_local;
-+#endif /* CONFIG_SCHEDSTATS */
-+
-+#ifdef CONFIG_SMP
-+	struct llist_head wake_list;
-+#endif
-+
-+#ifdef CONFIG_CPU_IDLE
-+	/* Must be inspected within a rcu lock section */
-+	struct cpuidle_state *idle_state;
-+#endif
-+};
-+
-+struct rq_flags {
-+	unsigned long flags;
-+};
-+
-+#ifdef CONFIG_SMP
-+struct rq *cpu_rq(int cpu);
-+#endif
-+
-+#ifndef CONFIG_SMP
-+extern struct rq *uprq;
-+#define cpu_rq(cpu)	(uprq)
-+#define this_rq()	(uprq)
-+#define raw_rq()	(uprq)
-+#define task_rq(p)	(uprq)
-+#define cpu_curr(cpu)	((uprq)->curr)
-+#else /* CONFIG_SMP */
-+DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
-+#define this_rq()		this_cpu_ptr(&runqueues)
-+#define raw_rq()		raw_cpu_ptr(&runqueues)
-+#define task_rq(p)		cpu_rq(task_cpu(p))
-+#endif /* CONFIG_SMP */
-+
-+static inline int task_current(struct rq *rq, struct task_struct *p)
-+{
-+	return rq->curr == p;
-+}
-+
-+static inline int task_running(struct rq *rq, struct task_struct *p)
-+{
-+#ifdef CONFIG_SMP
-+	return p->on_cpu;
-+#else
-+	return task_current(rq, p);
-+#endif
-+}
-+
-+static inline int task_on_rq_queued(struct task_struct *p)
-+{
-+	return p->on_rq == TASK_ON_RQ_QUEUED;
-+}
-+
-+static inline int task_on_rq_migrating(struct task_struct *p)
-+{
-+	return READ_ONCE(p->on_rq) == TASK_ON_RQ_MIGRATING;
-+}
-+
-+static inline void rq_lock(struct rq *rq)
-+	__acquires(rq->lock)
-+{
-+	raw_spin_lock(rq->lock);
-+}
-+
-+static inline void rq_unlock(struct rq *rq)
-+	__releases(rq->lock)
-+{
-+	raw_spin_unlock(rq->lock);
-+}
-+
-+static inline void rq_lock_irq(struct rq *rq)
-+	__acquires(rq->lock)
-+{
-+	raw_spin_lock_irq(rq->lock);
-+}
-+
-+static inline void rq_unlock_irq(struct rq *rq, struct rq_flags __always_unused *rf)
-+	__releases(rq->lock)
-+{
-+	raw_spin_unlock_irq(rq->lock);
-+}
-+
-+static inline void rq_lock_irqsave(struct rq *rq, struct rq_flags *rf)
-+	__acquires(rq->lock)
-+{
-+	raw_spin_lock_irqsave(rq->lock, rf->flags);
-+}
-+
-+static inline void rq_unlock_irqrestore(struct rq *rq, struct rq_flags *rf)
-+	__releases(rq->lock)
-+{
-+	raw_spin_unlock_irqrestore(rq->lock, rf->flags);
-+}
-+
-+static inline struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
-+	__acquires(p->pi_lock)
-+	__acquires(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	while (42) {
-+		raw_spin_lock_irqsave(&p->pi_lock, rf->flags);
-+		rq = task_rq(p);
-+		raw_spin_lock(rq->lock);
-+		if (likely(rq == task_rq(p)))
-+			break;
-+		raw_spin_unlock(rq->lock);
-+		raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
-+	}
-+	return rq;
-+}
-+
-+static inline void task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
-+	__releases(rq->lock)
-+	__releases(p->pi_lock)
-+{
-+	rq_unlock(rq);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
-+}
-+
-+static inline struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags __always_unused *rf)
-+	__acquires(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	lockdep_assert_held(&p->pi_lock);
-+
-+	while (42) {
-+		rq = task_rq(p);
-+		raw_spin_lock(rq->lock);
-+		if (likely(rq == task_rq(p)))
-+			break;
-+		raw_spin_unlock(rq->lock);
-+	}
-+	return rq;
-+}
-+
-+static inline void __task_rq_unlock(struct rq *rq, struct rq_flags __always_unused *rf)
-+{
-+	rq_unlock(rq);
-+}
-+
-+static inline struct rq *
-+this_rq_lock_irq(struct rq_flags *rf)
-+	__acquires(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	local_irq_disable();
-+	rq = this_rq();
-+	rq_lock(rq);
-+	return rq;
-+}
-+
-+/*
-+ * {de,en}queue flags: Most not used on MuQSS.
-+ *
-+ * DEQUEUE_SLEEP  - task is no longer runnable
-+ * ENQUEUE_WAKEUP - task just became runnable
-+ *
-+ * SAVE/RESTORE - an otherwise spurious dequeue/enqueue, done to ensure tasks
-+ *                are in a known state which allows modification. Such pairs
-+ *                should preserve as much state as possible.
-+ *
-+ * MOVE - paired with SAVE/RESTORE, explicitly does not preserve the location
-+ *        in the runqueue.
-+ *
-+ * ENQUEUE_HEAD      - place at front of runqueue (tail if not specified)
-+ * ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline)
-+ * ENQUEUE_MIGRATED  - the task was migrated during wakeup
-+ *
-+ */
-+
-+#define DEQUEUE_SLEEP		0x01
-+#define DEQUEUE_SAVE		0x02 /* matches ENQUEUE_RESTORE */
-+
-+#define ENQUEUE_WAKEUP		0x01
-+#define ENQUEUE_RESTORE		0x02
-+
-+#ifdef CONFIG_SMP
-+#define ENQUEUE_MIGRATED	0x40
-+#else
-+#define ENQUEUE_MIGRATED	0x00
-+#endif
-+
-+static inline u64 __rq_clock_broken(struct rq *rq)
-+{
-+	return READ_ONCE(rq->clock);
-+}
-+
-+static inline u64 rq_clock(struct rq *rq)
-+{
-+	lockdep_assert_held(rq->lock);
-+
-+	return rq->clock;
-+}
-+
-+static inline u64 rq_clock_task(struct rq *rq)
-+{
-+	lockdep_assert_held(rq->lock);
-+
-+	return rq->clock_task;
-+}
-+
-+#ifdef CONFIG_NUMA
-+enum numa_topology_type {
-+	NUMA_DIRECT,
-+	NUMA_GLUELESS_MESH,
-+	NUMA_BACKPLANE,
-+};
-+extern enum numa_topology_type sched_numa_topology_type;
-+extern int sched_max_numa_distance;
-+extern bool find_numa_distance(int distance);
-+extern void sched_init_numa(void);
-+extern void sched_domains_numa_masks_set(unsigned int cpu);
-+extern void sched_domains_numa_masks_clear(unsigned int cpu);
-+extern int sched_numa_find_closest(const struct cpumask *cpus, int cpu);
-+#else
-+static inline void sched_init_numa(void) { }
-+static inline void sched_domains_numa_masks_set(unsigned int cpu) { }
-+static inline void sched_domains_numa_masks_clear(unsigned int cpu) { }
-+static inline int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
-+{
-+	return nr_cpu_ids;
-+}
-+#endif
-+
-+extern struct mutex sched_domains_mutex;
-+extern struct static_key_false sched_schedstats;
-+
-+#define rcu_dereference_check_sched_domain(p) \
-+	rcu_dereference_check((p), \
-+			      lockdep_is_held(&sched_domains_mutex))
-+
-+#ifdef CONFIG_SMP
-+
-+/*
-+ * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
-+ * See destroy_sched_domains: call_rcu for details.
-+ *
-+ * The domain tree of any CPU may only be accessed from within
-+ * preempt-disabled sections.
-+ */
-+#define for_each_domain(cpu, __sd) \
-+	for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); \
-+			__sd; __sd = __sd->parent)
-+
-+#define for_each_lower_domain(sd) for (; sd; sd = sd->child)
-+
-+/**
-+ * highest_flag_domain - Return highest sched_domain containing flag.
-+ * @cpu:	The cpu whose highest level of sched domain is to
-+ *		be returned.
-+ * @flag:	The flag to check for the highest sched_domain
-+ *		for the given cpu.
-+ *
-+ * Returns the highest sched_domain of a cpu which contains the given flag.
-+ */
-+static inline struct sched_domain *highest_flag_domain(int cpu, int flag)
-+{
-+	struct sched_domain *sd, *hsd = NULL;
-+
-+	for_each_domain(cpu, sd) {
-+		if (!(sd->flags & flag))
-+			break;
-+		hsd = sd;
-+	}
-+
-+	return hsd;
-+}
-+
-+static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
-+{
-+	struct sched_domain *sd;
-+
-+	for_each_domain(cpu, sd) {
-+		if (sd->flags & flag)
-+			break;
-+	}
-+
-+	return sd;
-+}
-+
-+DECLARE_PER_CPU(struct sched_domain *, sd_llc);
-+DECLARE_PER_CPU(int, sd_llc_size);
-+DECLARE_PER_CPU(int, sd_llc_id);
-+DECLARE_PER_CPU(struct sched_domain_shared *, sd_llc_shared);
-+DECLARE_PER_CPU(struct sched_domain *, sd_numa);
-+DECLARE_PER_CPU(struct sched_domain *, sd_asym_packing);
-+DECLARE_PER_CPU(struct sched_domain *, sd_asym_cpucapacity);
-+
-+struct sched_group_capacity {
-+	atomic_t ref;
-+	/*
-+	 * CPU capacity of this group, SCHED_CAPACITY_SCALE being max capacity
-+	 * for a single CPU.
-+	 */
-+	unsigned long		capacity;
-+	unsigned long		min_capacity;		/* Min per-CPU capacity in group */
-+	unsigned long		max_capacity;		/* Max per-CPU capacity in group */
-+	unsigned long		next_update;
-+	int			imbalance;		/* XXX unrelated to capacity but shared group state */
-+
-+#ifdef CONFIG_SCHED_DEBUG
-+	int id;
-+#endif
-+
-+	unsigned long cpumask[0]; /* balance mask */
-+};
-+
-+struct sched_group {
-+	struct sched_group *next;	/* Must be a circular list */
-+	atomic_t ref;
-+
-+	unsigned int group_weight;
-+	struct sched_group_capacity *sgc;
-+	int asym_prefer_cpu;		/* cpu of highest priority in group */
-+
-+	/*
-+	 * The CPUs this group covers.
-+	 *
-+	 * NOTE: this field is variable length. (Allocated dynamically
-+	 * by attaching extra space to the end of the structure,
-+	 * depending on how many CPUs the kernel has booted up with)
-+	 */
-+	unsigned long cpumask[0];
-+};
-+
-+static inline struct cpumask *sched_group_span(struct sched_group *sg)
-+{
-+	return to_cpumask(sg->cpumask);
-+}
-+
-+/*
-+ * See build_balance_mask().
-+ */
-+static inline struct cpumask *group_balance_mask(struct sched_group *sg)
-+{
-+	return to_cpumask(sg->sgc->cpumask);
-+}
-+
-+/**
-+ * group_first_cpu - Returns the first cpu in the cpumask of a sched_group.
-+ * @group: The group whose first cpu is to be returned.
-+ */
-+static inline unsigned int group_first_cpu(struct sched_group *group)
-+{
-+	return cpumask_first(sched_group_span(group));
-+}
-+
-+
-+#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
-+void register_sched_domain_sysctl(void);
-+void dirty_sched_domain_sysctl(int cpu);
-+void unregister_sched_domain_sysctl(void);
-+#else
-+static inline void register_sched_domain_sysctl(void)
-+{
-+}
-+static inline void dirty_sched_domain_sysctl(int cpu)
-+{
-+}
-+static inline void unregister_sched_domain_sysctl(void)
-+{
-+}
-+#endif
-+
-+extern void sched_ttwu_pending(void);
-+extern void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask);
-+extern void set_rq_online (struct rq *rq);
-+extern void set_rq_offline(struct rq *rq);
-+extern bool sched_smp_initialized;
-+
-+static inline void update_group_capacity(struct sched_domain *sd, int cpu)
-+{
-+}
-+
-+static inline void trigger_load_balance(struct rq *rq)
-+{
-+}
-+
-+#define sched_feat(x) 0
-+
-+#else /* CONFIG_SMP */
-+
-+static inline void sched_ttwu_pending(void) { }
-+
-+#endif /* CONFIG_SMP */
-+
-+#ifdef CONFIG_CPU_IDLE
-+static inline void idle_set_state(struct rq *rq,
-+				  struct cpuidle_state *idle_state)
-+{
-+	rq->idle_state = idle_state;
-+}
-+
-+static inline struct cpuidle_state *idle_get_state(struct rq *rq)
-+{
-+	SCHED_WARN_ON(!rcu_read_lock_held());
-+	return rq->idle_state;
-+}
-+#else
-+static inline void idle_set_state(struct rq *rq,
-+				  struct cpuidle_state *idle_state)
-+{
-+}
-+
-+static inline struct cpuidle_state *idle_get_state(struct rq *rq)
-+{
-+	return NULL;
-+}
-+#endif
-+
-+#ifdef CONFIG_SCHED_DEBUG
-+extern bool sched_debug_enabled;
-+#endif
-+
-+extern void schedule_idle(void);
-+
-+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-+struct irqtime {
-+	u64			total;
-+	u64			tick_delta;
-+	u64			irq_start_time;
-+	struct u64_stats_sync	sync;
-+};
-+
-+DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
-+
-+/*
-+ * Returns the irqtime minus the softirq time computed by ksoftirqd.
-+ * Otherwise ksoftirqd's sum_exec_runtime is substracted its own runtime
-+ * and never move forward.
-+ */
-+static inline u64 irq_time_read(int cpu)
-+{
-+	struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu);
-+	unsigned int seq;
-+	u64 total;
-+
-+	do {
-+		seq = __u64_stats_fetch_begin(&irqtime->sync);
-+		total = irqtime->total;
-+	} while (__u64_stats_fetch_retry(&irqtime->sync, seq));
-+
-+	return total;
-+}
-+#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
-+
-+static inline bool sched_stop_runnable(struct rq *rq)
-+{
-+	return rq->stop && task_on_rq_queued(rq->stop);
-+}
-+
-+#ifdef CONFIG_SMP
-+static inline int cpu_of(struct rq *rq)
-+{
-+	return rq->cpu;
-+}
-+#else /* CONFIG_SMP */
-+static inline int cpu_of(struct rq *rq)
-+{
-+	return 0;
-+}
-+#endif
-+
-+#ifdef CONFIG_CPU_FREQ
-+DECLARE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
-+
-+static inline void cpufreq_trigger(struct rq *rq, unsigned int flags)
-+{
-+	struct update_util_data *data;
-+
-+	data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data,
-+						  cpu_of(rq)));
-+
-+	if (data)
-+		data->func(data, rq->niffies, flags);
-+}
-+#else
-+static inline void cpufreq_trigger(struct rq *rq, unsigned int flag)
-+{
-+}
-+#endif /* CONFIG_CPU_FREQ */
-+
-+static inline bool uclamp_is_used(void)
-+{
-+	return false;
-+}
-+
-+static __always_inline
-+unsigned int uclamp_util_with(struct rq __maybe_unused *rq, unsigned int util,
-+			      struct task_struct __maybe_unused *p)
-+{
-+	return util;
-+}
-+
-+static inline unsigned int uclamp_util(struct rq *rq, unsigned int util)
-+{
-+	return util;
-+}
-+
-+#ifdef arch_scale_freq_capacity
-+#ifndef arch_scale_freq_invariant
-+#define arch_scale_freq_invariant()	(true)
-+#endif
-+#else /* arch_scale_freq_capacity */
-+#define arch_scale_freq_invariant()	(false)
-+#endif
-+
-+/*
-+ * This should only be called when current == rq->idle. Dodgy workaround for
-+ * when softirqs are pending and we are in the idle loop. Setting current to
-+ * resched will kick us out of the idle loop and the softirqs will be serviced
-+ * on our next pass through schedule().
-+ */
-+static inline bool softirq_pending(int cpu)
-+{
-+	if (likely(!local_softirq_pending()))
-+		return false;
-+	set_tsk_need_resched(current);
-+	return true;
-+}
-+
-+#ifdef CONFIG_64BIT
-+static inline u64 read_sum_exec_runtime(struct task_struct *t)
-+{
-+	return tsk_seruntime(t);
-+}
-+#else
-+static inline u64 read_sum_exec_runtime(struct task_struct *t)
-+{
-+	struct rq_flags rf;
-+	u64 ns;
-+	struct rq *rq;
-+
-+	rq = task_rq_lock(t, &rf);
-+	ns = tsk_seruntime(t);
-+	task_rq_unlock(rq, t, &rf);
-+
-+	return ns;
-+}
-+#endif
-+
-+#ifndef arch_scale_freq_capacity
-+static __always_inline
-+unsigned long arch_scale_freq_capacity(int cpu)
-+{
-+	return SCHED_CAPACITY_SCALE;
-+}
-+#endif
-+
-+#ifdef CONFIG_NO_HZ_FULL
-+extern bool sched_can_stop_tick(struct rq *rq);
-+extern int __init sched_tick_offload_init(void);
-+
-+/*
-+ * Tick may be needed by tasks in the runqueue depending on their policy and
-+ * requirements. If tick is needed, lets send the target an IPI to kick it out of
-+ * nohz mode if necessary.
-+ */
-+static inline void sched_update_tick_dependency(struct rq *rq)
-+{
-+	int cpu;
-+
-+	if (!tick_nohz_full_enabled())
-+		return;
-+
-+	cpu = cpu_of(rq);
-+
-+	if (!tick_nohz_full_cpu(cpu))
-+		return;
-+
-+	if (sched_can_stop_tick(rq))
-+		tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED);
-+	else
-+		tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
-+}
-+#else
-+static inline int sched_tick_offload_init(void) { return 0; }
-+static inline void sched_update_tick_dependency(struct rq *rq) { }
-+#endif
-+
-+#define SCHED_FLAG_SUGOV	0x10000000
-+
-+static inline bool rt_rq_is_runnable(struct rq *rt_rq)
-+{
-+	return rt_rq->rt_nr_running;
-+}
-+
-+/**
-+ * enum schedutil_type - CPU utilization type
-+ * @FREQUENCY_UTIL:	Utilization used to select frequency
-+ * @ENERGY_UTIL:	Utilization used during energy calculation
-+ *
-+ * The utilization signals of all scheduling classes (CFS/RT/DL) and IRQ time
-+ * need to be aggregated differently depending on the usage made of them. This
-+ * enum is used within schedutil_freq_util() to differentiate the types of
-+ * utilization expected by the callers, and adjust the aggregation accordingly.
-+ */
-+enum schedutil_type {
-+	FREQUENCY_UTIL,
-+	ENERGY_UTIL,
-+};
-+
-+#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
-+
-+unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs,
-+				 unsigned long max, enum schedutil_type type,
-+				 struct task_struct *p);
-+
-+static inline unsigned long cpu_bw_dl(struct rq *rq)
-+{
-+	return 0;
-+}
-+
-+static inline unsigned long cpu_util_dl(struct rq *rq)
-+{
-+	return 0;
-+}
-+
-+static inline unsigned long cpu_util_cfs(struct rq *rq)
-+{
-+	unsigned long ret = READ_ONCE(rq->load_avg);
-+
-+	if (ret > SCHED_CAPACITY_SCALE)
-+		ret = SCHED_CAPACITY_SCALE;
-+	return ret;
-+}
-+
-+static inline unsigned long cpu_util_rt(struct rq *rq)
-+{
-+	unsigned long ret = READ_ONCE(rq->rt_nr_running);
-+
-+	if (ret > SCHED_CAPACITY_SCALE)
-+		ret = SCHED_CAPACITY_SCALE;
-+	return ret;
-+}
-+
-+#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
-+static inline unsigned long cpu_util_irq(struct rq *rq)
-+{
-+	unsigned long ret = READ_ONCE(rq->irq_load_avg);
-+
-+	if (ret > SCHED_CAPACITY_SCALE)
-+		ret = SCHED_CAPACITY_SCALE;
-+	return ret;
-+}
-+
-+static inline
-+unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned long max)
-+{
-+	util *= (max - irq);
-+	util /= max;
-+
-+	return util;
-+
-+}
-+#else
-+static inline unsigned long cpu_util_irq(struct rq *rq)
-+{
-+	return 0;
-+}
-+
-+static inline
-+unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned long max)
-+{
-+	return util;
-+}
-+#endif
-+#endif
-+
-+#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
-+#define perf_domain_span(pd) (to_cpumask(((pd)->em_pd->cpus)))
-+
-+DECLARE_STATIC_KEY_FALSE(sched_energy_present);
-+
-+static inline bool sched_energy_enabled(void)
-+{
-+	return static_branch_unlikely(&sched_energy_present);
-+}
-+
-+#else /* ! (CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL) */
-+
-+#define perf_domain_span(pd) NULL
-+static inline bool sched_energy_enabled(void) { return false; }
-+
-+#endif /* CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL */
-+
-+#ifdef CONFIG_MEMBARRIER
-+/*
-+ * The scheduler provides memory barriers required by membarrier between:
-+ * - prior user-space memory accesses and store to rq->membarrier_state,
-+ * - store to rq->membarrier_state and following user-space memory accesses.
-+ * In the same way it provides those guarantees around store to rq->curr.
-+ */
-+static inline void membarrier_switch_mm(struct rq *rq,
-+					struct mm_struct *prev_mm,
-+					struct mm_struct *next_mm)
-+{
-+	int membarrier_state;
-+
-+	if (prev_mm == next_mm)
-+		return;
-+
-+	membarrier_state = atomic_read(&next_mm->membarrier_state);
-+	if (READ_ONCE(rq->membarrier_state) == membarrier_state)
-+		return;
-+
-+	WRITE_ONCE(rq->membarrier_state, membarrier_state);
-+}
-+#else
-+static inline void membarrier_switch_mm(struct rq *rq,
-+					struct mm_struct *prev_mm,
-+					struct mm_struct *next_mm)
-+{
-+}
-+#endif
-+
-+#endif /* MUQSS_SCHED_H */
-diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
-index 86800b4d5453..f3d8dca0538a 100644
---- a/kernel/sched/cpufreq_schedutil.c
-+++ b/kernel/sched/cpufreq_schedutil.c
-@@ -185,6 +185,12 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy,
- 	return cpufreq_driver_resolve_freq(policy, freq);
- }
- 
-+#ifdef CONFIG_SCHED_MUQSS
-+#define rt_rq_runnable(rq_rt) rt_rq_is_runnable(rq)
-+#else
-+#define rt_rq_runnable(rq_rt) rt_rq_is_runnable(&rq->rt)
-+#endif
-+
- /*
-  * This function computes an effective utilization for the given CPU, to be
-  * used for frequency selection given the linear relation: f = u * f_max.
-@@ -213,7 +219,7 @@ unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs,
- 	struct rq *rq = cpu_rq(cpu);
- 
- 	if (!uclamp_is_used() &&
--	    type == FREQUENCY_UTIL && rt_rq_is_runnable(&rq->rt)) {
-+	    type == FREQUENCY_UTIL && rt_rq_runnable(rq)) {
- 		return max;
- 	}
- 
-@@ -658,7 +664,11 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy)
- 	struct task_struct *thread;
- 	struct sched_attr attr = {
- 		.size		= sizeof(struct sched_attr),
-+#ifdef CONFIG_SCHED_MUQSS
-+		.sched_policy	= SCHED_RR,
-+#else
- 		.sched_policy	= SCHED_DEADLINE,
-+#endif
- 		.sched_flags	= SCHED_FLAG_SUGOV,
- 		.sched_nice	= 0,
- 		.sched_priority	= 0,
-diff --git a/kernel/sched/cpupri.h b/kernel/sched/cpupri.h
-index 7dc20a3232e7..e733a0a53b0a 100644
---- a/kernel/sched/cpupri.h
-+++ b/kernel/sched/cpupri.h
-@@ -17,9 +17,11 @@ struct cpupri {
- 	int			*cpu_to_pri;
- };
- 
-+#ifndef CONFIG_SCHED_MUQSS
- #ifdef CONFIG_SMP
- int  cpupri_find(struct cpupri *cp, struct task_struct *p, struct cpumask *lowest_mask);
- void cpupri_set(struct cpupri *cp, int cpu, int pri);
- int  cpupri_init(struct cpupri *cp);
- void cpupri_cleanup(struct cpupri *cp);
- #endif
-+#endif
-diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
-index 46ed4e1383e2..f077fcd22d2b 100644
---- a/kernel/sched/cputime.c
-+++ b/kernel/sched/cputime.c
-@@ -266,26 +266,6 @@ static inline u64 account_other_time(u64 max)
- 	return accounted;
- }
- 
--#ifdef CONFIG_64BIT
--static inline u64 read_sum_exec_runtime(struct task_struct *t)
--{
--	return t->se.sum_exec_runtime;
--}
--#else
--static u64 read_sum_exec_runtime(struct task_struct *t)
--{
--	u64 ns;
--	struct rq_flags rf;
--	struct rq *rq;
--
--	rq = task_rq_lock(t, &rf);
--	ns = t->se.sum_exec_runtime;
--	task_rq_unlock(rq, t, &rf);
--
--	return ns;
--}
--#endif
--
- /*
-  * Accumulate raw cputime values of dead tasks (sig->[us]time) and live
-  * tasks (sum on group iteration) belonging to @tsk's group.
-@@ -663,7 +643,7 @@ void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev,
- void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
- {
- 	struct task_cputime cputime = {
--		.sum_exec_runtime = p->se.sum_exec_runtime,
-+		.sum_exec_runtime = tsk_seruntime(p),
- 	};
- 
- 	task_cputime(p, &cputime.utime, &cputime.stime);
-diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
-index f65ef1e2f204..e0aa6c73a5fa 100644
---- a/kernel/sched/idle.c
-+++ b/kernel/sched/idle.c
-@@ -225,6 +225,8 @@ static void cpuidle_idle_call(void)
- static void do_idle(void)
- {
- 	int cpu = smp_processor_id();
-+	bool pending = false;
-+
- 	/*
- 	 * If the arch has a polling bit, we maintain an invariant:
- 	 *
-@@ -235,7 +237,10 @@ static void do_idle(void)
- 	 */
- 
- 	__current_set_polling();
--	tick_nohz_idle_enter();
-+	if (unlikely(softirq_pending(cpu)))
-+		pending = true;
-+	else
-+		tick_nohz_idle_enter();
- 
- 	while (!need_resched()) {
- 		rmb();
-@@ -273,7 +278,8 @@ static void do_idle(void)
- 	 * an IPI to fold the state for us.
- 	 */
- 	preempt_set_need_resched();
--	tick_nohz_idle_exit();
-+	if (!pending)
-+		tick_nohz_idle_exit();
- 	__current_clr_polling();
- 
- 	/*
-@@ -355,6 +361,7 @@ void cpu_startup_entry(enum cpuhp_state state)
- 		do_idle();
- }
- 
-+#ifndef CONFIG_SCHED_MUQSS
- /*
-  * idle-task scheduling class.
-  */
-@@ -479,3 +486,4 @@ const struct sched_class idle_sched_class = {
- 	.switched_to		= switched_to_idle,
- 	.update_curr		= update_curr_idle,
- };
-+#endif /* CONFIG_SCHED_MUQSS */
-diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
-index c8870c5bd7df..add1d74c2e91 100644
---- a/kernel/sched/sched.h
-+++ b/kernel/sched/sched.h
-@@ -2,6 +2,19 @@
- /*
-  * Scheduler internal types and methods:
-  */
-+#ifdef CONFIG_SCHED_MUQSS
-+#include "MuQSS.h"
-+
-+/* Begin compatibility wrappers for MuQSS/CFS differences */
-+#define rq_rt_nr_running(rq) ((rq)->rt_nr_running)
-+#define rq_h_nr_running(rq) ((rq)->nr_running)
-+
-+#else /* CONFIG_SCHED_MUQSS */
-+
-+#define rq_rt_nr_running(rq) ((rq)->rt.rt_nr_running)
-+#define rq_h_nr_running(rq) ((rq)->cfs.h_nr_running)
-+
-+
- #include <linux/sched.h>
- 
- #include <linux/sched/autogroup.h>
-@@ -2496,3 +2509,30 @@ static inline void membarrier_switch_mm(struct rq *rq,
- {
- }
- #endif
-+
-+/* MuQSS compatibility functions */
-+static inline bool softirq_pending(int cpu)
-+{
-+	return false;
-+}
-+
-+#ifdef CONFIG_64BIT
-+static inline u64 read_sum_exec_runtime(struct task_struct *t)
-+{
-+	return t->se.sum_exec_runtime;
-+}
-+#else
-+static inline u64 read_sum_exec_runtime(struct task_struct *t)
-+{
-+	u64 ns;
-+	struct rq_flags rf;
-+	struct rq *rq;
-+
-+	rq = task_rq_lock(t, &rf);
-+	ns = t->se.sum_exec_runtime;
-+	task_rq_unlock(rq, t, &rf);
-+
-+	return ns;
-+}
-+#endif
-+#endif /* CONFIG_SCHED_MUQSS */
-diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
-index 49b835f1305f..0253ea846c0d 100644
---- a/kernel/sched/topology.c
-+++ b/kernel/sched/topology.c
-@@ -3,6 +3,7 @@
-  * Scheduler topology setup/handling methods
-  */
- #include "sched.h"
-+#include "linux/sched/deadline.h"
- 
- DEFINE_MUTEX(sched_domains_mutex);
- 
-@@ -442,7 +443,11 @@ void rq_attach_root(struct rq *rq, struct root_domain *rd)
- 	struct root_domain *old_rd = NULL;
- 	unsigned long flags;
- 
-+#ifdef CONFIG_SCHED_MUQSS
-+	raw_spin_lock_irqsave(rq->lock, flags);
-+#else
- 	raw_spin_lock_irqsave(&rq->lock, flags);
-+#endif
- 
- 	if (rq->rd) {
- 		old_rd = rq->rd;
-@@ -468,7 +473,11 @@ void rq_attach_root(struct rq *rq, struct root_domain *rd)
- 	if (cpumask_test_cpu(rq->cpu, cpu_active_mask))
- 		set_rq_online(rq);
- 
-+#ifdef CONFIG_SCHED_MUQSS
-+	raw_spin_unlock_irqrestore(rq->lock, flags);
-+#else
- 	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+#endif
- 
- 	if (old_rd)
- 		call_rcu(&old_rd->rcu, free_rootdomain);
-diff --git a/kernel/skip_list.c b/kernel/skip_list.c
-new file mode 100644
-index 000000000000..bf5c6e97e139
---- /dev/null
-+++ b/kernel/skip_list.c
-@@ -0,0 +1,148 @@
-+/*
-+  Copyright (C) 2011,2016 Con Kolivas.
-+
-+  Code based on example originally by William Pugh.
-+
-+Skip Lists are a probabilistic alternative to balanced trees, as
-+described in the June 1990 issue of CACM and were invented by
-+William Pugh in 1987.
-+
-+A couple of comments about this implementation:
-+The routine randomLevel has been hard-coded to generate random
-+levels using p=0.25. It can be easily changed.
-+
-+The insertion routine has been implemented so as to use the
-+dirty hack described in the CACM paper: if a random level is
-+generated that is more than the current maximum level, the
-+current maximum level plus one is used instead.
-+
-+Levels start at zero and go up to MaxLevel (which is equal to
-+MaxNumberOfLevels-1).
-+
-+The routines defined in this file are:
-+
-+init: defines slnode
-+
-+new_skiplist: returns a new, empty list
-+
-+randomLevel: Returns a random level based on a u64 random seed passed to it.
-+In MuQSS, the "niffy" time is used for this purpose.
-+
-+insert(l,key, value): inserts the binding (key, value) into l. This operation
-+occurs in O(log n) time.
-+
-+delnode(slnode, l, node): deletes any binding of key from the l based on the
-+actual node value. This operation occurs in O(k) time where k is the
-+number of levels of the node in question (max 8). The original delete
-+function occurred in O(log n) time and involved a search.
-+
-+MuQSS Notes: In this implementation of skiplists, there are bidirectional
-+next/prev pointers and the insert function returns a pointer to the actual
-+node the value is stored. The key here is chosen by the scheduler so as to
-+sort tasks according to the priority list requirements and is no longer used
-+by the scheduler after insertion. The scheduler lookup, however, occurs in
-+O(1) time because it is always the first item in the level 0 linked list.
-+Since the task struct stores a copy of the node pointer upon skiplist_insert,
-+it can also remove it much faster than the original implementation with the
-+aid of prev<->next pointer manipulation and no searching.
-+
-+*/
-+
-+#include <linux/slab.h>
-+#include <linux/skip_list.h>
-+
-+#define MaxNumberOfLevels 8
-+#define MaxLevel (MaxNumberOfLevels - 1)
-+
-+void skiplist_init(skiplist_node *slnode)
-+{
-+	int i;
-+
-+	slnode->key = 0xFFFFFFFFFFFFFFFF;
-+	slnode->level = 0;
-+	slnode->value = NULL;
-+	for (i = 0; i < MaxNumberOfLevels; i++)
-+		slnode->next[i] = slnode->prev[i] = slnode;
-+}
-+
-+skiplist *new_skiplist(skiplist_node *slnode)
-+{
-+	skiplist *l = kzalloc(sizeof(skiplist), GFP_ATOMIC);
-+
-+	BUG_ON(!l);
-+	l->header = slnode;
-+	return l;
-+}
-+
-+void free_skiplist(skiplist *l)
-+{
-+	skiplist_node *p, *q;
-+
-+	p = l->header;
-+	do {
-+		q = p->next[0];
-+		p->next[0]->prev[0] = q->prev[0];
-+		skiplist_node_init(p);
-+		p = q;
-+	} while (p != l->header);
-+	kfree(l);
-+}
-+
-+void skiplist_node_init(skiplist_node *node)
-+{
-+	memset(node, 0, sizeof(skiplist_node));
-+}
-+
-+static inline unsigned int randomLevel(const long unsigned int randseed)
-+{
-+	return find_first_bit(&randseed, MaxLevel) / 2;
-+}
-+
-+void skiplist_insert(skiplist *l, skiplist_node *node, keyType key, valueType value, unsigned int randseed)
-+{
-+	skiplist_node *update[MaxNumberOfLevels];
-+	skiplist_node *p, *q;
-+	int k = l->level;
-+
-+	p = l->header;
-+	do {
-+		while (q = p->next[k], q->key <= key)
-+			p = q;
-+		update[k] = p;
-+	} while (--k >= 0);
-+
-+	++l->entries;
-+	k = randomLevel(randseed);
-+	if (k > l->level) {
-+		k = ++l->level;
-+		update[k] = l->header;
-+	}
-+
-+	node->level = k;
-+	node->key = key;
-+	node->value = value;
-+	do {
-+		p = update[k];
-+		node->next[k] = p->next[k];
-+		p->next[k] = node;
-+		node->prev[k] = p;
-+		node->next[k]->prev[k] = node;
-+	} while (--k >= 0);
-+}
-+
-+void skiplist_delete(skiplist *l, skiplist_node *node)
-+{
-+	int k, m = node->level;
-+
-+	for (k = 0; k <= m; k++) {
-+		node->prev[k]->next[k] = node->next[k];
-+		node->next[k]->prev[k] = node->prev[k];
-+	}
-+	skiplist_node_init(node);
-+	if (m == l->level) {
-+		while (l->header->next[m] == l->header && l->header->prev[m] == l->header && m > 0)
-+			m--;
-+		l->level = m;
-+	}
-+	l->entries--;
-+}
-diff --git a/kernel/sysctl.c b/kernel/sysctl.c
-index b6f2f35d0bcf..349f5a249593 100644
---- a/kernel/sysctl.c
-+++ b/kernel/sysctl.c
-@@ -130,9 +130,19 @@ static int __maybe_unused four = 4;
- static unsigned long zero_ul;
- static unsigned long one_ul = 1;
- static unsigned long long_max = LONG_MAX;
--static int one_hundred = 100;
--static int one_thousand = 1000;
--#ifdef CONFIG_PRINTK
-+static int __read_mostly one_hundred = 100;
-+static int __read_mostly one_thousand = 1000;
-+#ifdef CONFIG_SCHED_MUQSS
-+static int zero = 0;
-+static int one = 1;
-+extern int rr_interval;
-+extern int sched_interactive;
-+extern int sched_iso_cpu;
-+extern int sched_yield_type;
-+#endif
-+extern int hrtimer_granularity_us;
-+extern int hrtimeout_min_us;
-+#if defined(CONFIG_PRINTK) || defined(CONFIG_SCHED_MUQSS)
- static int ten_thousand = 10000;
- #endif
- #ifdef CONFIG_PERF_EVENTS
-@@ -300,7 +310,7 @@ static struct ctl_table sysctl_base_table[] = {
- 	{ }
- };
- 
--#ifdef CONFIG_SCHED_DEBUG
-+#if defined(CONFIG_SCHED_DEBUG) && !defined(CONFIG_SCHED_MUQSS)
- static int min_sched_granularity_ns = 100000;		/* 100 usecs */
- static int max_sched_granularity_ns = NSEC_PER_SEC;	/* 1 second */
- static int min_wakeup_granularity_ns;			/* 0 usecs */
-@@ -317,6 +327,7 @@ static int max_extfrag_threshold = 1000;
- #endif
- 
- static struct ctl_table kern_table[] = {
-+#ifndef CONFIG_SCHED_MUQSS
- 	{
- 		.procname	= "sched_child_runs_first",
- 		.data		= &sysctl_sched_child_runs_first,
-@@ -498,6 +509,7 @@ static struct ctl_table kern_table[] = {
- 		.extra2		= SYSCTL_ONE,
- 	},
- #endif
-+#endif /* !CONFIG_SCHED_MUQSS */
- #ifdef CONFIG_PROVE_LOCKING
- 	{
- 		.procname	= "prove_locking",
-@@ -1070,6 +1082,62 @@ static struct ctl_table kern_table[] = {
- 		.proc_handler	= proc_dointvec,
- 	},
- #endif
-+#ifdef CONFIG_SCHED_MUQSS
-+	{
-+		.procname	= "rr_interval",
-+		.data		= &rr_interval,
-+		.maxlen		= sizeof (int),
-+		.mode		= 0644,
-+		.proc_handler	= &proc_dointvec_minmax,
-+		.extra1		= &one,
-+		.extra2		= &one_thousand,
-+	},
-+	{
-+		.procname	= "interactive",
-+		.data		= &sched_interactive,
-+		.maxlen		= sizeof(int),
-+		.mode		= 0644,
-+		.proc_handler	= &proc_dointvec_minmax,
-+		.extra1		= &zero,
-+		.extra2		= &one,
-+	},
-+	{
-+		.procname	= "iso_cpu",
-+		.data		= &sched_iso_cpu,
-+		.maxlen		= sizeof (int),
-+		.mode		= 0644,
-+		.proc_handler	= &proc_dointvec_minmax,
-+		.extra1		= &zero,
-+		.extra2		= &one_hundred,
-+	},
-+	{
-+		.procname	= "yield_type",
-+		.data		= &sched_yield_type,
-+		.maxlen		= sizeof (int),
-+		.mode		= 0644,
-+		.proc_handler	= &proc_dointvec_minmax,
-+		.extra1		= &zero,
-+		.extra2		= &two,
-+	},
-+#endif
-+	{
-+		.procname	= "hrtimer_granularity_us",
-+		.data		= &hrtimer_granularity_us,
-+		.maxlen		= sizeof(int),
-+		.mode		= 0644,
-+		.proc_handler	= &proc_dointvec_minmax,
-+		.extra1		= &one,
-+		.extra2		= &ten_thousand,
-+	},
-+	{
-+		.procname	= "hrtimeout_min_us",
-+		.data		= &hrtimeout_min_us,
-+		.maxlen		= sizeof(int),
-+		.mode		= 0644,
-+		.proc_handler	= &proc_dointvec_minmax,
-+		.extra1		= &one,
-+		.extra2		= &ten_thousand,
-+	},
- #if defined(CONFIG_S390) && defined(CONFIG_SMP)
- 	{
- 		.procname	= "spin_retry",
-diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
-index fcc42353f125..46bb16d3c159 100644
---- a/kernel/time/Kconfig
-+++ b/kernel/time/Kconfig
-@@ -66,6 +66,9 @@ config NO_HZ_COMMON
- 	depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS
- 	select TICK_ONESHOT
- 
-+config NO_HZ_FULL
-+	bool
-+
- choice
- 	prompt "Timer tick handling"
- 	default NO_HZ_IDLE if NO_HZ
-@@ -87,8 +90,9 @@ config NO_HZ_IDLE
- 
- 	  Most of the time you want to say Y here.
- 
--config NO_HZ_FULL
-+config NO_HZ_FULL_NODEF
- 	bool "Full dynticks system (tickless)"
-+	select NO_HZ_FULL
- 	# NO_HZ_COMMON dependency
- 	depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS
- 	# We need at least one periodic CPU for timekeeping
-@@ -114,6 +118,8 @@ config NO_HZ_FULL
- 	 transitions: syscalls, exceptions and interrupts. Even when it's
- 	 dynamically off.
- 
-+	 Not recommended for desktops,laptops, or mobile devices.
-+
- 	 Say N.
- 
- endchoice
-diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
-index f5490222e134..544c58c29267 100644
---- a/kernel/time/clockevents.c
-+++ b/kernel/time/clockevents.c
-@@ -190,8 +190,9 @@ int clockevents_tick_resume(struct clock_event_device *dev)
- 
- #ifdef CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST
- 
--/* Limit min_delta to a jiffie */
--#define MIN_DELTA_LIMIT		(NSEC_PER_SEC / HZ)
-+int __read_mostly hrtimer_granularity_us = 100;
-+/* Limit min_delta to 100us */
-+#define MIN_DELTA_LIMIT		(hrtimer_granularity_us * NSEC_PER_USEC)
- 
- /**
-  * clockevents_increase_min_delta - raise minimum delta of a clock event device
-diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
-index 65605530ee34..75e67a12a97b 100644
---- a/kernel/time/hrtimer.c
-+++ b/kernel/time/hrtimer.c
-@@ -2206,3 +2206,113 @@ int __sched schedule_hrtimeout(ktime_t *expires,
- 	return schedule_hrtimeout_range(expires, 0, mode);
- }
- EXPORT_SYMBOL_GPL(schedule_hrtimeout);
-+
-+/*
-+ * As per schedule_hrtimeout but taskes a millisecond value and returns how
-+ * many milliseconds are left.
-+ */
-+long __sched schedule_msec_hrtimeout(long timeout)
-+{
-+	struct hrtimer_sleeper t;
-+	int delta, jiffs;
-+	ktime_t expires;
-+
-+	if (!timeout) {
-+		__set_current_state(TASK_RUNNING);
-+		return 0;
-+	}
-+
-+	jiffs = msecs_to_jiffies(timeout);
-+	/*
-+	 * If regular timer resolution is adequate or hrtimer resolution is not
-+	 * (yet) better than Hz, as would occur during startup, use regular
-+	 * timers.
-+	 */
-+	if (jiffs > 4 || hrtimer_resolution >= NSEC_PER_SEC / HZ || pm_freezing)
-+		return schedule_timeout(jiffs);
-+
-+	delta = (timeout % 1000) * NSEC_PER_MSEC;
-+	expires = ktime_set(0, delta);
-+
-+	hrtimer_init_sleeper_on_stack(&t, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-+	hrtimer_set_expires_range_ns(&t.timer, expires, delta);
-+
-+	hrtimer_sleeper_start_expires(&t, HRTIMER_MODE_REL);
-+
-+	if (likely(t.task))
-+		schedule();
-+
-+	hrtimer_cancel(&t.timer);
-+	destroy_hrtimer_on_stack(&t.timer);
-+
-+	__set_current_state(TASK_RUNNING);
-+
-+	expires = hrtimer_expires_remaining(&t.timer);
-+	timeout = ktime_to_ms(expires);
-+	return timeout < 0 ? 0 : timeout;
-+}
-+
-+EXPORT_SYMBOL(schedule_msec_hrtimeout);
-+
-+#define USECS_PER_SEC 1000000
-+extern int hrtimer_granularity_us;
-+
-+static inline long schedule_usec_hrtimeout(long timeout)
-+{
-+	struct hrtimer_sleeper t;
-+	ktime_t expires;
-+	int delta;
-+
-+	if (!timeout) {
-+		__set_current_state(TASK_RUNNING);
-+		return 0;
-+	}
-+
-+	if (hrtimer_resolution >= NSEC_PER_SEC / HZ)
-+		return schedule_timeout(usecs_to_jiffies(timeout));
-+
-+	if (timeout < hrtimer_granularity_us)
-+		timeout = hrtimer_granularity_us;
-+	delta = (timeout % USECS_PER_SEC) * NSEC_PER_USEC;
-+	expires = ktime_set(0, delta);
-+
-+	hrtimer_init_sleeper_on_stack(&t, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-+	hrtimer_set_expires_range_ns(&t.timer, expires, delta);
-+
-+	hrtimer_sleeper_start_expires(&t, HRTIMER_MODE_REL);
-+
-+	if (likely(t.task))
-+		schedule();
-+
-+	hrtimer_cancel(&t.timer);
-+	destroy_hrtimer_on_stack(&t.timer);
-+
-+	__set_current_state(TASK_RUNNING);
-+
-+	expires = hrtimer_expires_remaining(&t.timer);
-+	timeout = ktime_to_us(expires);
-+	return timeout < 0 ? 0 : timeout;
-+}
-+
-+int __read_mostly hrtimeout_min_us = 500;
-+
-+long __sched schedule_min_hrtimeout(void)
-+{
-+	return usecs_to_jiffies(schedule_usec_hrtimeout(hrtimeout_min_us));
-+}
-+
-+EXPORT_SYMBOL(schedule_min_hrtimeout);
-+
-+long __sched schedule_msec_hrtimeout_interruptible(long timeout)
-+{
-+	__set_current_state(TASK_INTERRUPTIBLE);
-+	return schedule_msec_hrtimeout(timeout);
-+}
-+EXPORT_SYMBOL(schedule_msec_hrtimeout_interruptible);
-+
-+long __sched schedule_msec_hrtimeout_uninterruptible(long timeout)
-+{
-+	__set_current_state(TASK_UNINTERRUPTIBLE);
-+	return schedule_msec_hrtimeout(timeout);
-+}
-+EXPORT_SYMBOL(schedule_msec_hrtimeout_uninterruptible);
-diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
-index 42d512fcfda2..0db83bdf7f39 100644
---- a/kernel/time/posix-cpu-timers.c
-+++ b/kernel/time/posix-cpu-timers.c
-@@ -226,7 +226,7 @@ static void task_sample_cputime(struct task_struct *p, u64 *samples)
- 	u64 stime, utime;
- 
- 	task_cputime(p, &utime, &stime);
--	store_samples(samples, stime, utime, p->se.sum_exec_runtime);
-+	store_samples(samples, stime, utime, tsk_seruntime(p));
- }
- 
- static void proc_sample_cputime_atomic(struct task_cputime_atomic *at,
-@@ -845,7 +845,7 @@ static void check_thread_timers(struct task_struct *tsk,
- 	soft = task_rlimit(tsk, RLIMIT_RTTIME);
- 	if (soft != RLIM_INFINITY) {
- 		/* Task RT timeout is accounted in jiffies. RTTIME is usec */
--		unsigned long rttime = tsk->rt.timeout * (USEC_PER_SEC / HZ);
-+		unsigned long rttime = tsk_rttimeout(tsk) * (USEC_PER_SEC / HZ);
- 		unsigned long hard = task_rlimit_max(tsk, RLIMIT_RTTIME);
- 
- 		/* At the hard limit, send SIGKILL. No further action. */
-diff --git a/kernel/time/timer.c b/kernel/time/timer.c
-index 4820823515e9..13034cc7c9a4 100644
---- a/kernel/time/timer.c
-+++ b/kernel/time/timer.c
-@@ -43,6 +43,7 @@
- #include <linux/slab.h>
- #include <linux/compat.h>
- #include <linux/random.h>
-+#include <linux/freezer.h>
- 
- #include <linux/uaccess.h>
- #include <asm/unistd.h>
-@@ -1567,7 +1568,7 @@ static unsigned long __next_timer_interrupt(struct timer_base *base)
-  * Check, if the next hrtimer event is before the next timer wheel
-  * event:
-  */
--static u64 cmp_next_hrtimer_event(u64 basem, u64 expires)
-+static u64 cmp_next_hrtimer_event(struct timer_base *base, u64 basem, u64 expires)
- {
- 	u64 nextevt = hrtimer_get_next_event();
- 
-@@ -1585,6 +1586,9 @@ static u64 cmp_next_hrtimer_event(u64 basem, u64 expires)
- 	if (nextevt <= basem)
- 		return basem;
- 
-+	if (nextevt < expires && nextevt - basem <= TICK_NSEC)
-+		base->is_idle = false;
-+
- 	/*
- 	 * Round up to the next jiffie. High resolution timers are
- 	 * off, so the hrtimers are expired in the tick and we need to
-@@ -1654,7 +1658,7 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
- 	}
- 	raw_spin_unlock(&base->lock);
- 
--	return cmp_next_hrtimer_event(basem, expires);
-+	return cmp_next_hrtimer_event(base, basem, expires);
- }
- 
- /**
-@@ -1889,6 +1893,18 @@ signed long __sched schedule_timeout(signed long timeout)
- 
- 	expire = timeout + jiffies;
- 
-+#ifdef CONFIG_HIGH_RES_TIMERS
-+	if (timeout == 1 && hrtimer_resolution < NSEC_PER_SEC / HZ) {
-+		/*
-+		 * Special case 1 as being a request for the minimum timeout
-+		 * and use highres timers to timeout after 1ms to workaround
-+		 * the granularity of low Hz tick timers.
-+		 */
-+		if (!schedule_min_hrtimeout())
-+			return 0;
-+		goto out_timeout;
-+	}
-+#endif
- 	timer.task = current;
- 	timer_setup_on_stack(&timer.timer, process_timeout, 0);
- 	__mod_timer(&timer.timer, expire, 0);
-@@ -1897,10 +1913,10 @@ signed long __sched schedule_timeout(signed long timeout)
- 
- 	/* Remove the timer from the object tracker */
- 	destroy_timer_on_stack(&timer.timer);
--
-+out_timeout:
- 	timeout = expire - jiffies;
- 
-- out:
-+out:
- 	return timeout < 0 ? 0 : timeout;
- }
- EXPORT_SYMBOL(schedule_timeout);
-@@ -2042,7 +2058,19 @@ void __init init_timers(void)
-  */
- void msleep(unsigned int msecs)
- {
--	unsigned long timeout = msecs_to_jiffies(msecs) + 1;
-+	int jiffs = msecs_to_jiffies(msecs);
-+	unsigned long timeout;
-+
-+	/*
-+	 * Use high resolution timers where the resolution of tick based
-+	 * timers is inadequate.
-+	 */
-+	if (jiffs < 5 && hrtimer_resolution < NSEC_PER_SEC / HZ && !pm_freezing) {
-+		while (msecs)
-+			msecs = schedule_msec_hrtimeout_uninterruptible(msecs);
-+		return;
-+	}
-+	timeout = jiffs + 1;
- 
- 	while (timeout)
- 		timeout = schedule_timeout_uninterruptible(timeout);
-@@ -2056,7 +2084,15 @@ EXPORT_SYMBOL(msleep);
-  */
- unsigned long msleep_interruptible(unsigned int msecs)
- {
--	unsigned long timeout = msecs_to_jiffies(msecs) + 1;
-+	int jiffs = msecs_to_jiffies(msecs);
-+	unsigned long timeout;
-+
-+	if (jiffs < 5 && hrtimer_resolution < NSEC_PER_SEC / HZ && !pm_freezing) {
-+		while (msecs && !signal_pending(current))
-+			msecs = schedule_msec_hrtimeout_interruptible(msecs);
-+		return msecs;
-+	}
-+	timeout = jiffs + 1;
- 
- 	while (timeout && !signal_pending(current))
- 		timeout = schedule_timeout_interruptible(timeout);
-diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
-index 69ee8ef12cee..6edb01f2fd81 100644
---- a/kernel/trace/trace_selftest.c
-+++ b/kernel/trace/trace_selftest.c
-@@ -1048,10 +1048,15 @@ static int trace_wakeup_test_thread(void *data)
- {
- 	/* Make this a -deadline thread */
- 	static const struct sched_attr attr = {
-+#ifdef CONFIG_SCHED_MUQSS
-+		/* No deadline on MuQSS, use RR */
-+		.sched_policy = SCHED_RR,
-+#else
- 		.sched_policy = SCHED_DEADLINE,
- 		.sched_runtime = 100000ULL,
- 		.sched_deadline = 10000000ULL,
- 		.sched_period = 10000000ULL
-+#endif
- 	};
- 	struct wakeup_test_data *x = data;
- 
-diff --git a/mm/vmscan.c b/mm/vmscan.c
-index ee4eecc7e1c2..22c1b0469468 100644
---- a/mm/vmscan.c
-+++ b/mm/vmscan.c
-@@ -164,7 +164,7 @@ struct scan_control {
- /*
-  * From 0 .. 100.  Higher means more swappy.
-  */
--int vm_swappiness = 60;
-+int vm_swappiness = 33;
- /*
-  * The total number of pages which are beyond the high watermark within all
-  * zones.
-diff --git a/net/core/pktgen.c b/net/core/pktgen.c
-index 48b1e429857c..908c866bc9fc 100644
---- a/net/core/pktgen.c
-+++ b/net/core/pktgen.c
-@@ -1894,7 +1894,7 @@ static void pktgen_mark_device(const struct pktgen_net *pn, const char *ifname)
- 		mutex_unlock(&pktgen_thread_lock);
- 		pr_debug("%s: waiting for %s to disappear....\n",
- 			 __func__, ifname);
--		schedule_timeout_interruptible(msecs_to_jiffies(msec_per_try));
-+		schedule_msec_hrtimeout_interruptible((msec_per_try));
- 		mutex_lock(&pktgen_thread_lock);
- 
- 		if (++i >= max_tries) {
-diff --git a/sound/pci/maestro3.c b/sound/pci/maestro3.c
-index 19fa73df0846..46caed9b924d 100644
---- a/sound/pci/maestro3.c
-+++ b/sound/pci/maestro3.c
-@@ -2001,7 +2001,7 @@ static void snd_m3_ac97_reset(struct snd_m3 *chip)
- 		outw(0, io + GPIO_DATA);
- 		outw(dir | GPO_PRIMARY_AC97, io + GPIO_DIRECTION);
- 
--		schedule_timeout_uninterruptible(msecs_to_jiffies(delay1));
-+		schedule_msec_hrtimeout_uninterruptible((delay1));
- 
- 		outw(GPO_PRIMARY_AC97, io + GPIO_DATA);
- 		udelay(5);
-@@ -2009,7 +2009,7 @@ static void snd_m3_ac97_reset(struct snd_m3 *chip)
- 		outw(IO_SRAM_ENABLE | SERIAL_AC_LINK_ENABLE, io + RING_BUS_CTRL_A);
- 		outw(~0, io + GPIO_MASK);
- 
--		schedule_timeout_uninterruptible(msecs_to_jiffies(delay2));
-+		schedule_msec_hrtimeout_uninterruptible((delay2));
- 
- 		if (! snd_m3_try_read_vendor(chip))
- 			break;
-diff --git a/sound/soc/codecs/rt5631.c b/sound/soc/codecs/rt5631.c
-index f70b9f7e68bb..77b65398ca07 100644
---- a/sound/soc/codecs/rt5631.c
-+++ b/sound/soc/codecs/rt5631.c
-@@ -415,7 +415,7 @@ static void onebit_depop_mute_stage(struct snd_soc_component *component, int ena
- 	hp_zc = snd_soc_component_read32(component, RT5631_INT_ST_IRQ_CTRL_2);
- 	snd_soc_component_write(component, RT5631_INT_ST_IRQ_CTRL_2, hp_zc & 0xf7ff);
- 	if (enable) {
--		schedule_timeout_uninterruptible(msecs_to_jiffies(10));
-+		schedule_msec_hrtimeout_uninterruptible((10));
- 		/* config one-bit depop parameter */
- 		rt5631_write_index(component, RT5631_SPK_INTL_CTRL, 0x307f);
- 		snd_soc_component_update_bits(component, RT5631_HP_OUT_VOL,
-@@ -525,7 +525,7 @@ static void depop_seq_mute_stage(struct snd_soc_component *component, int enable
- 	hp_zc = snd_soc_component_read32(component, RT5631_INT_ST_IRQ_CTRL_2);
- 	snd_soc_component_write(component, RT5631_INT_ST_IRQ_CTRL_2, hp_zc & 0xf7ff);
- 	if (enable) {
--		schedule_timeout_uninterruptible(msecs_to_jiffies(10));
-+		schedule_msec_hrtimeout_uninterruptible((10));
- 
- 		/* config depop sequence parameter */
- 		rt5631_write_index(component, RT5631_SPK_INTL_CTRL, 0x302f);
-diff --git a/sound/soc/codecs/wm8350.c b/sound/soc/codecs/wm8350.c
-index fe99584c917f..f1344d532a13 100644
---- a/sound/soc/codecs/wm8350.c
-+++ b/sound/soc/codecs/wm8350.c
-@@ -233,10 +233,10 @@ static void wm8350_pga_work(struct work_struct *work)
- 		    out2->ramp == WM8350_RAMP_UP) {
- 			/* delay is longer over 0dB as increases are larger */
- 			if (i >= WM8350_OUTn_0dB)
--				schedule_timeout_interruptible(msecs_to_jiffies
-+				schedule_msec_hrtimeout_interruptible(
- 							       (2));
- 			else
--				schedule_timeout_interruptible(msecs_to_jiffies
-+				schedule_msec_hrtimeout_interruptible(
- 							       (1));
- 		} else
- 			udelay(50);	/* doesn't matter if we delay longer */
-@@ -1120,7 +1120,7 @@ static int wm8350_set_bias_level(struct snd_soc_component *component,
- 					 (platform->dis_out4 << 6));
- 
- 			/* wait for discharge */
--			schedule_timeout_interruptible(msecs_to_jiffies
-+			schedule_msec_hrtimeout_interruptible(
- 						       (platform->
- 							cap_discharge_msecs));
- 
-@@ -1136,7 +1136,7 @@ static int wm8350_set_bias_level(struct snd_soc_component *component,
- 					 WM8350_VBUFEN);
- 
- 			/* wait for vmid */
--			schedule_timeout_interruptible(msecs_to_jiffies
-+			schedule_msec_hrtimeout_interruptible(
- 						       (platform->
- 							vmid_charge_msecs));
- 
-@@ -1187,7 +1187,7 @@ static int wm8350_set_bias_level(struct snd_soc_component *component,
- 		wm8350_reg_write(wm8350, WM8350_POWER_MGMT_1, pm1);
- 
- 		/* wait */
--		schedule_timeout_interruptible(msecs_to_jiffies
-+		schedule_msec_hrtimeout_interruptible(
- 					       (platform->
- 						vmid_discharge_msecs));
- 
-@@ -1205,7 +1205,7 @@ static int wm8350_set_bias_level(struct snd_soc_component *component,
- 				 pm1 | WM8350_OUTPUT_DRAIN_EN);
- 
- 		/* wait */
--		schedule_timeout_interruptible(msecs_to_jiffies
-+		schedule_msec_hrtimeout_interruptible(
- 					       (platform->drain_msecs));
- 
- 		pm1 &= ~WM8350_BIASEN;
-diff --git a/sound/soc/codecs/wm8900.c b/sound/soc/codecs/wm8900.c
-index 271235a69c01..3ec90e1b1eb4 100644
---- a/sound/soc/codecs/wm8900.c
-+++ b/sound/soc/codecs/wm8900.c
-@@ -1109,7 +1109,7 @@ static int wm8900_set_bias_level(struct snd_soc_component *component,
- 		/* Need to let things settle before stopping the clock
- 		 * to ensure that restart works, see "Stopping the
- 		 * master clock" in the datasheet. */
--		schedule_timeout_interruptible(msecs_to_jiffies(1));
-+		schedule_msec_hrtimeout_interruptible(1);
- 		snd_soc_component_write(component, WM8900_REG_POWER2,
- 			     WM8900_REG_POWER2_SYSCLK_ENA);
- 		break;
-diff --git a/sound/soc/codecs/wm9713.c b/sound/soc/codecs/wm9713.c
-index 6497c1ea6228..08fefeca9d82 100644
---- a/sound/soc/codecs/wm9713.c
-+++ b/sound/soc/codecs/wm9713.c
-@@ -199,7 +199,7 @@ static int wm9713_voice_shutdown(struct snd_soc_dapm_widget *w,
- 
- 	/* Gracefully shut down the voice interface. */
- 	snd_soc_component_update_bits(component, AC97_HANDSET_RATE, 0x0f00, 0x0200);
--	schedule_timeout_interruptible(msecs_to_jiffies(1));
-+	schedule_msec_hrtimeout_interruptible(1);
- 	snd_soc_component_update_bits(component, AC97_HANDSET_RATE, 0x0f00, 0x0f00);
- 	snd_soc_component_update_bits(component, AC97_EXTENDED_MID, 0x1000, 0x1000);
- 
-@@ -868,7 +868,7 @@ static int wm9713_set_pll(struct snd_soc_component *component,
- 	wm9713->pll_in = freq_in;
- 
- 	/* wait 10ms AC97 link frames for the link to stabilise */
--	schedule_timeout_interruptible(msecs_to_jiffies(10));
-+	schedule_msec_hrtimeout_interruptible((10));
- 	return 0;
- }
- 
-diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
-index b6378f025836..5f5e58655d32 100644
---- a/sound/soc/soc-dapm.c
-+++ b/sound/soc/soc-dapm.c
-@@ -154,7 +154,7 @@ static void dapm_assert_locked(struct snd_soc_dapm_context *dapm)
- static void pop_wait(u32 pop_time)
- {
- 	if (pop_time)
--		schedule_timeout_uninterruptible(msecs_to_jiffies(pop_time));
-+		schedule_msec_hrtimeout_uninterruptible((pop_time));
- }
- 
- __printf(3, 4)
-diff --git a/sound/usb/line6/pcm.c b/sound/usb/line6/pcm.c
-index f70211e6b174..5ae4421225e6 100644
---- a/sound/usb/line6/pcm.c
-+++ b/sound/usb/line6/pcm.c
-@@ -127,7 +127,7 @@ static void line6_wait_clear_audio_urbs(struct snd_line6_pcm *line6pcm,
- 		if (!alive)
- 			break;
- 		set_current_state(TASK_UNINTERRUPTIBLE);
--		schedule_timeout(1);
-+		schedule_min_hrtimeout();
- 	} while (--timeout > 0);
- 	if (alive)
- 		dev_err(line6pcm->line6->ifcdev,
diff --git a/linux54-tkg/linux54-tkg-patches/0004-glitched-muqss.patch b/linux54-tkg/linux54-tkg-patches/0004-glitched-muqss.patch
deleted file mode 100644
index 2c4837e..0000000
--- a/linux54-tkg/linux54-tkg-patches/0004-glitched-muqss.patch
+++ /dev/null
@@ -1,78 +0,0 @@
-From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
-From: Tk-Glitch <ti3nou@gmail.com>
-Date: Wed, 4 Jul 2018 04:30:08 +0200
-Subject: glitched - MuQSS
-
-diff --git a/kernel/sched/MuQSS.c b/kernel/sched/MuQSS.c
-index 84a1d08d68551..57c3036a68952 100644
---- a/kernel/sched/MuQSS.c
-+++ b/kernel/sched/MuQSS.c
-@@ -163,7 +167,11 @@ int sched_interactive __read_mostly = 1;
-  * are allowed to run five seconds as real time tasks. This is the total over
-  * all online cpus.
-  */
-+#ifdef CONFIG_ZENIFY
-+int sched_iso_cpu __read_mostly = 25;
-+#else
- int sched_iso_cpu __read_mostly = 70;
-+#endif
- 
- /*
-  * sched_yield_type - Choose what sort of yield sched_yield will perform.
-
-diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
-index 2a202a846757..1d9c7ed79b11 100644
---- a/kernel/Kconfig.hz
-+++ b/kernel/Kconfig.hz
-@@ -5,7 +5,7 @@
- choice
- 	prompt "Timer frequency"
- 	default HZ_100 if SCHED_MUQSS
--	default HZ_250_NODEF if !SCHED_MUQSS
-+	default HZ_500_NODEF if !SCHED_MUQSS
- 	help
- 	 Allows the configuration of the timer frequency. It is customary
- 	 to have the timer interrupt run at 1000 Hz but 100 Hz may be more
-@@ -50,6 +50,20 @@ choice
- 	 on SMP and NUMA systems and exactly dividing by both PAL and
- 	 NTSC frame rates for video and multimedia work.
- 
-+	config HZ_500_NODEF
-+		bool "500 HZ"
-+	help
-+	 500 Hz is a good timer frequency for desktops. Provides fast
-+	 interactivity with great smoothness without sacrificing too
-+	 much throughput.
-+
-+	config HZ_750_NODEF
-+		bool "750 HZ"
-+	help
-+	 750 Hz is a good timer frequency for desktops. Provides fast
-+	 interactivity with great smoothness without sacrificing too
-+	 much throughput.
-+
- 	config HZ_1000_NODEF
- 		bool "1000 HZ"
- 	help
-@@ -63,6 +70,8 @@ config HZ
- 	default 100 if HZ_100
- 	default 250 if HZ_250_NODEF
- 	default 300 if HZ_300_NODEF
-+	default 500 if HZ_500_NODEF
-+	default 750 if HZ_750_NODEF
- 	default 1000 if HZ_1000_NODEF
- 
- config SCHED_HRTICK
-
-diff --git a/Makefile b/Makefile
-index d4d36c61940b..4a9dfe471f1f 100644
---- a/Makefile
-+++ b/Makefile
-@@ -15,7 +15,6 @@ NAME = Kleptomaniac Octopus
- 
- CKVERSION = -ck1
- CKNAME = MuQSS Powered
--EXTRAVERSION := $(EXTRAVERSION)$(CKVERSION)
- 
- # We are using a recursive build, so we need to do a little thinking
- # to get the ordering right.
diff --git a/linux54-tkg/linux54-tkg-patches/0004-glitched-ondemand-muqss.patch b/linux54-tkg/linux54-tkg-patches/0004-glitched-ondemand-muqss.patch
deleted file mode 100644
index 02933e4..0000000
--- a/linux54-tkg/linux54-tkg-patches/0004-glitched-ondemand-muqss.patch
+++ /dev/null
@@ -1,18 +0,0 @@
-diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
-index 6b423eebfd5d..61e3271675d6 100644
---- a/drivers/cpufreq/cpufreq_ondemand.c
-+++ b/drivers/cpufreq/cpufreq_ondemand.c
-@@ -21,10 +21,10 @@
- #include "cpufreq_ondemand.h"
- 
- /* On-demand governor macros */
--#define DEF_FREQUENCY_UP_THRESHOLD		(80)
--#define DEF_SAMPLING_DOWN_FACTOR		(1)
-+#define DEF_FREQUENCY_UP_THRESHOLD		(45)
-+#define DEF_SAMPLING_DOWN_FACTOR		(5)
- #define MAX_SAMPLING_DOWN_FACTOR		(100000)
--#define MICRO_FREQUENCY_UP_THRESHOLD		(95)
-+#define MICRO_FREQUENCY_UP_THRESHOLD		(45)
- #define MICRO_FREQUENCY_MIN_SAMPLE_RATE		(10000)
- #define MIN_FREQUENCY_UP_THRESHOLD		(1)
- #define MAX_FREQUENCY_UP_THRESHOLD		(100) 
diff --git a/linux54-tkg/linux54-tkg-patches/0005-glitched-ondemand-pds.patch b/linux54-tkg/linux54-tkg-patches/0005-glitched-ondemand-pds.patch
deleted file mode 100644
index c1929e8..0000000
--- a/linux54-tkg/linux54-tkg-patches/0005-glitched-ondemand-pds.patch
+++ /dev/null
@@ -1,18 +0,0 @@
-diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
-index 6b423eebfd5d..61e3271675d6 100644
---- a/drivers/cpufreq/cpufreq_ondemand.c
-+++ b/drivers/cpufreq/cpufreq_ondemand.c
-@@ -21,10 +21,10 @@
- #include "cpufreq_ondemand.h"
- 
- /* On-demand governor macros */
--#define DEF_FREQUENCY_UP_THRESHOLD		(63)
--#define DEF_SAMPLING_DOWN_FACTOR		(1)
-+#define DEF_FREQUENCY_UP_THRESHOLD		(55)
-+#define DEF_SAMPLING_DOWN_FACTOR		(5)
- #define MAX_SAMPLING_DOWN_FACTOR		(100000)
--#define MICRO_FREQUENCY_UP_THRESHOLD		(95)
-+#define MICRO_FREQUENCY_UP_THRESHOLD		(63)
- #define MICRO_FREQUENCY_MIN_SAMPLE_RATE		(10000)
- #define MIN_FREQUENCY_UP_THRESHOLD		(1)
- #define MAX_FREQUENCY_UP_THRESHOLD		(100) 
diff --git a/linux54-tkg/linux54-tkg-patches/0005-glitched-pds.patch b/linux54-tkg/linux54-tkg-patches/0005-glitched-pds.patch
deleted file mode 100644
index 21f2d69..0000000
--- a/linux54-tkg/linux54-tkg-patches/0005-glitched-pds.patch
+++ /dev/null
@@ -1,213 +0,0 @@
-From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
-From: Tk-Glitch <ti3nou@gmail.com>
-Date: Wed, 4 Jul 2018 04:30:08 +0200
-Subject: glitched - PDS
-
-diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
-index 2a202a846757..1d9c7ed79b11 100644
---- a/kernel/Kconfig.hz
-+++ b/kernel/Kconfig.hz
-@@ -4,7 +4,7 @@
- 
- choice
- 	prompt "Timer frequency"
--	default HZ_250
-+	default HZ_500
- 	help
- 	 Allows the configuration of the timer frequency. It is customary
- 	 to have the timer interrupt run at 1000 Hz but 100 Hz may be more
-@@ -39,6 +39,13 @@ choice
- 	 on SMP and NUMA systems and exactly dividing by both PAL and
- 	 NTSC frame rates for video and multimedia work.
- 
-+	config HZ_500
-+		bool "500 HZ"
-+	help
-+	 500 Hz is a balanced timer frequency. Provides fast interactivity
-+	 on desktops with great smoothness without increasing CPU power
-+	 consumption and sacrificing the battery life on laptops.
-+
- 	config HZ_1000
- 		bool "1000 HZ"
- 	help
-@@ -52,6 +59,7 @@ config HZ
- 	default 100 if HZ_100
- 	default 250 if HZ_250
- 	default 300 if HZ_300
-+	default 500 if HZ_500
- 	default 1000 if HZ_1000
- 
- config SCHED_HRTICK
-
-diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
-index 2a202a846757..1d9c7ed79b11 100644
---- a/kernel/Kconfig.hz
-+++ b/kernel/Kconfig.hz
-@@ -4,7 +4,7 @@
- 
- choice
- 	prompt "Timer frequency"
--	default HZ_500
-+	default HZ_750
- 	help
- 	 Allows the configuration of the timer frequency. It is customary
- 	 to have the timer interrupt run at 1000 Hz but 100 Hz may be more
-@@ -46,6 +46,13 @@ choice
- 	 on desktops with great smoothness without increasing CPU power
- 	 consumption and sacrificing the battery life on laptops.
- 
-+	config HZ_750
-+		bool "750 HZ"
-+	help
-+	 750 Hz is a good timer frequency for desktops. Provides fast
-+	 interactivity with great smoothness without sacrificing too
-+	 much throughput.
-+
- 	config HZ_1000
- 		bool "1000 HZ"
- 	help
-@@ -60,6 +67,7 @@ config HZ
- 	default 250 if HZ_250
- 	default 300 if HZ_300
- 	default 500 if HZ_500
-+	default 750 if HZ_750
- 	default 1000 if HZ_1000
- 
- config SCHED_HRTICK
-
-diff --git a/mm/vmscan.c b/mm/vmscan.c
-index 9270a4370d54..30d01e647417 100644
---- a/mm/vmscan.c
-+++ b/mm/vmscan.c
-@@ -159,7 +159,7 @@ struct scan_control {
- /*
-  * From 0 .. 100.  Higher means more swappy.
-  */
--int vm_swappiness = 60;
-+int vm_swappiness = 20;
- /*
-  * The total number of pages which are beyond the high watermark within all
-  * zones.
-
-diff --git a/kernel/sched/pds.c b/kernel/sched/pds.c
-index c2d831b242b6d18a47e0d87a9f5433a7748b52ff..5bc8d7a8f920c21feab69b2706a3328dc8d39f9a 100644
---- a/kernel/sched/pds.c
-+++ b/kernel/sched/pds.c
-@@ -409,12 +409,11 @@ struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
- 		 *					[L] ->on_rq
- 		 *	RELEASE (rq->lock)
- 		 *
--		 * If we observe the old CPU in task_rq_lock(), the acquire of
-+		 * If we observe the old CPU in task_rq_lock, the acquire of
- 		 * the old rq->lock will fully serialize against the stores.
- 		 *
--		 * If we observe the new CPU in task_rq_lock(), the address
--		 * dependency headed by '[L] rq = task_rq()' and the acquire
--		 * will pair with the WMB to ensure we then also see migrating.
-+		 * If we observe the new CPU in task_rq_lock, the acquire will
-+		 * pair with the WMB to ensure we must then also see migrating.
- 		 */
- 		if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
- 			return rq;
-@@ -952,9 +953,9 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
- 	smp_wmb();
- 
- #ifdef CONFIG_THREAD_INFO_IN_TASK
--	WRITE_ONCE(p->cpu, cpu);
-+	p->cpu = cpu;
- #else
--	WRITE_ONCE(task_thread_info(p)->cpu, cpu);
-+	task_thread_info(p)->cpu = cpu;
- #endif
- #endif
- }
-@@ -1035,7 +1036,7 @@ static void detach_task(struct rq *rq, struct task_struct *p, int target_cpu)
- {
- 	lockdep_assert_held(&rq->lock);
- 
--	WRITE_ONCE(p->on_rq ,TASK_ON_RQ_MIGRATING);
-+	p->on_rq = TASK_ON_RQ_MIGRATING;
- 	if (task_contributes_to_load(p))
- 		rq->nr_uninterruptible++;
- 	dequeue_task(p, rq, 0);
-diff --git a/kernel/sched/pds_sched.h b/kernel/sched/pds_sched.h
-index 20dcf19ea057627d91be07b4ec20f0827c30084c..24fa90ca63d144cc4f45d82d88407ea70d2d2edf 100644
---- a/kernel/sched/pds_sched.h
-+++ b/kernel/sched/pds_sched.h
-@@ -56,7 +56,7 @@ static inline int task_on_rq_queued(struct task_struct *p)
- 
- static inline int task_on_rq_migrating(struct task_struct *p)
- {
--	return READ_ONCE(p->on_rq) == TASK_ON_RQ_MIGRATING;
-+	return p->on_rq == TASK_ON_RQ_MIGRATING;
- }
- 
- enum {
- 
-diff --git a/init/Kconfig b/init/Kconfig
-index 11fd9b502d06..e9bc34d3019b 100644
---- a/init/Kconfig
-+++ b/init/Kconfig
-@@ -948,7 +948,6 @@ config CGROUP_DEVICE
- 
- config CGROUP_CPUACCT
- 	bool "Simple CPU accounting controller"
--	depends on !SCHED_PDS
- 	help
- 	  Provides a simple controller for monitoring the
- 	  total CPU consumed by the tasks in a cgroup.
-diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
-index b23231bae996..cab4e5c5b38e 100644
---- a/kernel/sched/Makefile
-+++ b/kernel/sched/Makefile
-@@ -24,13 +24,13 @@ obj-y += fair.o rt.o deadline.o
- obj-$(CONFIG_SMP) += cpudeadline.o topology.o stop_task.o
- obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
- obj-$(CONFIG_SCHED_DEBUG) += debug.o
--obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
- endif
- obj-y += loadavg.o clock.o cputime.o
- obj-y += idle.o
- obj-y += wait.o wait_bit.o swait.o completion.o
- obj-$(CONFIG_SMP) += cpupri.o pelt.o
- obj-$(CONFIG_SCHEDSTATS) += stats.o
-+obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
- obj-$(CONFIG_CPU_FREQ) += cpufreq.o
- obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o
- obj-$(CONFIG_MEMBARRIER) += membarrier.o
-
-diff --git a/kernel/sched/pds.c b/kernel/sched/pds.c
-index 9281ad164..f09a609cf 100644
---- a/kernel/sched/pds.c
-+++ b/kernel/sched/pds.c
-@@ -81,6 +81,18 @@ enum {
- 	NR_CPU_AFFINITY_CHK_LEVEL
- };
- 
-+/*
-+ * This allows printing both to /proc/sched_debug and
-+ * to the console
-+ */
-+#define SEQ_printf(m, x...)			\
-+ do {						\
-+	if (m)					\
-+		seq_printf(m, x);		\
-+	else					\
-+		pr_cont(x);			\
-+ } while (0)
-+
- static inline void print_scheduler_version(void)
- {
- 	printk(KERN_INFO "pds: PDS-mq CPU Scheduler 0.99o by Alfred Chen.\n");
-@@ -6353,7 +6365,10 @@ void ia64_set_curr_task(int cpu, struct task_struct *p)
- #ifdef CONFIG_SCHED_DEBUG
- void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
- 			  struct seq_file *m)
--{}
-+{
-+	SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, task_pid_nr_ns(p, ns),
-+						get_nr_threads(p));
-+}
- 
- void proc_sched_set_task(struct task_struct *p)
- {}
diff --git a/linux54-tkg/linux54-tkg-patches/0005-v5.4_undead-pds099o.patch b/linux54-tkg/linux54-tkg-patches/0005-v5.4_undead-pds099o.patch
deleted file mode 100644
index e6db1ad..0000000
--- a/linux54-tkg/linux54-tkg-patches/0005-v5.4_undead-pds099o.patch
+++ /dev/null
@@ -1,8387 +0,0 @@
-From 89067d28ca90681fc6cf108de79b9aedb93dfa9d Mon Sep 17 00:00:00 2001
-From: Tk-Glitch <ti3nou@gmail.com>
-Date: Mon, 25 Nov 2019 21:46:23 +0100
-Subject: PDS 099o, 5.4 rebase
-
-
-diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
-index 032c7cd3cede..360a229b0abe 100644
---- a/Documentation/admin-guide/sysctl/kernel.rst
-+++ b/Documentation/admin-guide/sysctl/kernel.rst
-@@ -82,6 +82,7 @@ show up in /proc/sys/kernel:
- - randomize_va_space
- - real-root-dev               ==> Documentation/admin-guide/initrd.rst
- - reboot-cmd                  [ SPARC only ]
-+- rr_interval
- - rtsig-max
- - rtsig-nr
- - sched_energy_aware
-@@ -105,6 +106,7 @@ show up in /proc/sys/kernel:
- - unknown_nmi_panic
- - watchdog
- - watchdog_thresh
-+- yield_type
- - version
- 
- 
-diff --git a/Documentation/scheduler/sched-PDS-mq.txt b/Documentation/scheduler/sched-PDS-mq.txt
-new file mode 100644
-index 000000000000..709e86f6487e
---- /dev/null
-+++ b/Documentation/scheduler/sched-PDS-mq.txt
-@@ -0,0 +1,56 @@
-+        Priority and Deadline based Skiplist multiple queue Scheduler
-+        -------------------------------------------------------------
-+
-+CONTENT
-+========
-+
-+ 0. Development
-+ 1. Overview
-+   1.1 Design goal
-+   1.2 Design summary
-+ 2. Design Detail
-+   2.1 Skip list implementation
-+   2.2 Task preempt
-+   2.3 Task policy, priority and deadline
-+   2.4 Task selection
-+   2.5 Run queue balance
-+   2.6 Task migration
-+
-+
-+0. Development
-+==============
-+
-+Priority and Deadline based Skiplist multiple queue scheduler, referred to as
-+PDS from here on, is developed upon the enhancement patchset VRQ(Variable Run
-+Queue) for BFS(Brain Fuck Scheduler by Con Kolivas). PDS inherits the existing
-+design from VRQ and inspired by the introduction of skiplist data structure
-+to the scheduler by Con Kolivas. However, PDS is different from MuQSS(Multiple
-+Queue Skiplist Scheduler, the successor after BFS) in many ways.
-+
-+1. Overview
-+===========
-+
-+1.1 Design goal
-+---------------
-+
-+PDS is designed to make the cpu process scheduler code to be simple, but while
-+efficiency and scalable. Be Simple, the scheduler code will be easy to be read
-+and the behavious of scheduler will be easy to predict. Be efficiency, the
-+scheduler shall be well balance the thoughput performance and task interactivity
-+at the same time for different properties the tasks behave. Be scalable, the
-+performance of the scheduler should be in good shape with the glowing of
-+workload or with the growing of the cpu numbers.
-+
-+1.2 Design summary
-+------------------
-+
-+PDS is described as a multiple run queues cpu scheduler. Each cpu has its own
-+run queue. A heavry customized skiplist is used as the backend data structure
-+of the cpu run queue. Tasks in run queue is sorted by priority then virtual
-+deadline(simplfy to just deadline from here on). In PDS, balance action among
-+run queues are kept as less as possible to reduce the migration cost. Cpumask
-+data structure is widely used in cpu affinity checking and cpu preemption/
-+selection to make PDS scalable with increasing cpu number.
-+
-+
-+To be continued...
-diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
-index f18d5067cd0f..fe489fc01c73 100644
---- a/arch/powerpc/platforms/cell/spufs/sched.c
-+++ b/arch/powerpc/platforms/cell/spufs/sched.c
-@@ -51,11 +51,6 @@ static struct task_struct *spusched_task;
- static struct timer_list spusched_timer;
- static struct timer_list spuloadavg_timer;
- 
--/*
-- * Priority of a normal, non-rt, non-niced'd process (aka nice level 0).
-- */
--#define NORMAL_PRIO		120
--
- /*
-  * Frequency of the spu scheduler tick.  By default we do one SPU scheduler
-  * tick for every 10 CPU scheduler ticks.
-diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
-index 8ef85139553f..9d44d8d78259 100644
---- a/arch/x86/Kconfig
-+++ b/arch/x86/Kconfig
-@@ -1034,6 +1034,22 @@ config NR_CPUS
- config SCHED_SMT
- 	def_bool y if SMP
- 
-+config SMT_NICE
-+	bool "SMT (Hyperthreading) aware nice priority and policy support"
-+	depends on SCHED_PDS && SCHED_SMT
-+	default y
-+	---help---
-+	  Enabling Hyperthreading on Intel CPUs decreases the effectiveness
-+	  of the use of 'nice' levels and different scheduling policies
-+	  (e.g. realtime) due to sharing of CPU power between hyperthreads.
-+	  SMT nice support makes each logical CPU aware of what is running on
-+	  its hyperthread siblings, maintaining appropriate distribution of
-+	  CPU according to nice levels and scheduling policies at the expense
-+	  of slightly increased overhead.
-+
-+	  If unsure say Y here.
-+
-+
- config SCHED_MC
- 	def_bool y
- 	prompt "Multi-core scheduler support"
-diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
-index b66e81c06a57..a294f8f5fd75 100644
---- a/drivers/cpufreq/cpufreq_conservative.c
-+++ b/drivers/cpufreq/cpufreq_conservative.c
-@@ -28,8 +28,8 @@ struct cs_dbs_tuners {
- };
- 
- /* Conservative governor macros */
--#define DEF_FREQUENCY_UP_THRESHOLD		(80)
--#define DEF_FREQUENCY_DOWN_THRESHOLD		(20)
-+#define DEF_FREQUENCY_UP_THRESHOLD		(63)
-+#define DEF_FREQUENCY_DOWN_THRESHOLD		(26)
- #define DEF_FREQUENCY_STEP			(5)
- #define DEF_SAMPLING_DOWN_FACTOR		(1)
- #define MAX_SAMPLING_DOWN_FACTOR		(10)
-diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
-index dced033875bf..d2cd03766b09 100644
---- a/drivers/cpufreq/cpufreq_ondemand.c
-+++ b/drivers/cpufreq/cpufreq_ondemand.c
-@@ -18,7 +18,7 @@
- #include "cpufreq_ondemand.h"
- 
- /* On-demand governor macros */
--#define DEF_FREQUENCY_UP_THRESHOLD		(80)
-+#define DEF_FREQUENCY_UP_THRESHOLD		(63)
- #define DEF_SAMPLING_DOWN_FACTOR		(1)
- #define MAX_SAMPLING_DOWN_FACTOR		(100000)
- #define MICRO_FREQUENCY_UP_THRESHOLD		(95)
-@@ -127,7 +127,7 @@ static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq)
- }
- 
- /*
-- * Every sampling_rate, we check, if current idle time is less than 20%
-+ * Every sampling_rate, we check, if current idle time is less than 37%
-  * (default), then we try to increase frequency. Else, we adjust the frequency
-  * proportional to load.
-  */
-diff --git a/fs/proc/base.c b/fs/proc/base.c
-index ebea9501afb8..51c9346a69fe 100644
---- a/fs/proc/base.c
-+++ b/fs/proc/base.c
-@@ -477,7 +477,7 @@ static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns,
- 		seq_puts(m, "0 0 0\n");
- 	else
- 		seq_printf(m, "%llu %llu %lu\n",
--		   (unsigned long long)task->se.sum_exec_runtime,
-+		   (unsigned long long)tsk_seruntime(task),
- 		   (unsigned long long)task->sched_info.run_delay,
- 		   task->sched_info.pcount);
- 
-diff --git a/include/linux/init_task.h b/include/linux/init_task.h
-index 2c620d7ac432..1a7987c40c80 100644
---- a/include/linux/init_task.h
-+++ b/include/linux/init_task.h
-@@ -36,7 +36,11 @@ extern struct cred init_cred;
- #define INIT_PREV_CPUTIME(x)
- #endif
- 
-+#ifdef CONFIG_SCHED_PDS
-+#define INIT_TASK_COMM "PDS"
-+#else
- #define INIT_TASK_COMM "swapper"
-+#endif /* !CONFIG_SCHED_PDS */
- 
- /* Attach to the init_task data structure for proper alignment */
- #ifdef CONFIG_ARCH_TASK_STRUCT_ON_STACK
-diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
-index 1b6d31da7cbc..dea181bdb1dd 100644
---- a/include/linux/jiffies.h
-+++ b/include/linux/jiffies.h
-@@ -171,7 +171,7 @@ static inline u64 get_jiffies_64(void)
-  * Have the 32 bit jiffies value wrap 5 minutes after boot
-  * so jiffies wrap bugs show up earlier.
-  */
--#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-300*HZ))
-+#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-10*HZ))
- 
- /*
-  * Change timeval to jiffies, trying to avoid the
-diff --git a/include/linux/sched.h b/include/linux/sched.h
-index 67a1d86981a9..8268cad4b0a2 100644
---- a/include/linux/sched.h
-+++ b/include/linux/sched.h
-@@ -31,6 +31,7 @@
- #include <linux/task_io_accounting.h>
- #include <linux/posix-timers.h>
- #include <linux/rseq.h>
-+#include <linux/skip_list.h>
- 
- /* task_struct member predeclarations (sorted alphabetically): */
- struct audit_context;
-@@ -644,9 +645,13 @@ struct task_struct {
- 	unsigned int			flags;
- 	unsigned int			ptrace;
- 
--#ifdef CONFIG_SMP
-+#if defined(CONFIG_SMP) && !defined(CONFIG_SCHED_PDS)
- 	struct llist_node		wake_entry;
-+#endif
-+#if defined(CONFIG_SMP) || defined(CONFIG_SCHED_PDS)
- 	int				on_cpu;
-+#endif
-+#ifdef CONFIG_SMP
- #ifdef CONFIG_THREAD_INFO_IN_TASK
- 	/* Current CPU: */
- 	unsigned int			cpu;
-@@ -655,6 +660,7 @@ struct task_struct {
- 	unsigned long			wakee_flip_decay_ts;
- 	struct task_struct		*last_wakee;
- 
-+#ifndef CONFIG_SCHED_PDS
- 	/*
- 	 * recent_used_cpu is initially set as the last CPU used by a task
- 	 * that wakes affine another task. Waker/wakee relationships can
-@@ -663,6 +669,7 @@ struct task_struct {
- 	 * used CPU that may be idle.
- 	 */
- 	int				recent_used_cpu;
-+#endif /* CONFIG_SCHED_PDS */
- 	int				wake_cpu;
- #endif
- 	int				on_rq;
-@@ -672,13 +679,27 @@ struct task_struct {
- 	int				normal_prio;
- 	unsigned int			rt_priority;
- 
-+#ifdef CONFIG_SCHED_PDS
-+	int				time_slice;
-+	u64				deadline;
-+	/* skip list level */
-+	int				sl_level;
-+	/* skip list node */
-+	struct skiplist_node		sl_node;
-+	/* 8bits prio and 56bits deadline for quick processing */
-+	u64				priodl;
-+	u64				last_ran;
-+	/* sched_clock time spent running */
-+	u64				sched_time;
-+#else /* CONFIG_SCHED_PDS */
- 	const struct sched_class	*sched_class;
- 	struct sched_entity		se;
- 	struct sched_rt_entity		rt;
-+	struct sched_dl_entity		dl;
-+#endif
- #ifdef CONFIG_CGROUP_SCHED
- 	struct task_group		*sched_task_group;
- #endif
--	struct sched_dl_entity		dl;
- 
- #ifdef CONFIG_UCLAMP_TASK
- 	/* Clamp values requested for a scheduling entity */
-@@ -1283,6 +1304,29 @@ struct task_struct {
- 	 */
- };
- 
-+#ifdef CONFIG_SCHED_PDS
-+void cpu_scaling(int cpu);
-+void cpu_nonscaling(int cpu);
-+#define tsk_seruntime(t)		((t)->sched_time)
-+/* replace the uncertian rt_timeout with 0UL */
-+#define tsk_rttimeout(t)		(0UL)
-+
-+#define task_running_idle(p)	((p)->prio == IDLE_PRIO)
-+#else /* CFS */
-+extern int runqueue_is_locked(int cpu);
-+static inline void cpu_scaling(int cpu)
-+{
-+}
-+
-+static inline void cpu_nonscaling(int cpu)
-+{
-+}
-+#define tsk_seruntime(t)	((t)->se.sum_exec_runtime)
-+#define tsk_rttimeout(t)	((t)->rt.timeout)
-+
-+#define iso_task(p)		(false)
-+#endif /* CONFIG_SCHED_PDS */
-+
- static inline struct pid *task_pid(struct task_struct *task)
- {
- 	return task->thread_pid;
-diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h
-index 1aff00b65f3c..a5e5fc2c9170 100644
---- a/include/linux/sched/deadline.h
-+++ b/include/linux/sched/deadline.h
-@@ -1,5 +1,22 @@
- /* SPDX-License-Identifier: GPL-2.0 */
- 
-+#ifdef CONFIG_SCHED_PDS
-+
-+#define __tsk_deadline(p)	((p)->deadline)
-+
-+static inline int dl_prio(int prio)
-+{
-+	return 1;
-+}
-+
-+static inline int dl_task(struct task_struct *p)
-+{
-+	return 1;
-+}
-+#else
-+
-+#define __tsk_deadline(p)	((p)->dl.deadline)
-+
- /*
-  * SCHED_DEADLINE tasks has negative priorities, reflecting
-  * the fact that any of them has higher prio than RT and
-@@ -19,6 +36,7 @@ static inline int dl_task(struct task_struct *p)
- {
- 	return dl_prio(p->prio);
- }
-+#endif /* CONFIG_SCHED_PDS */
- 
- static inline bool dl_time_before(u64 a, u64 b)
- {
-diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h
-index 7d64feafc408..fba04bb91492 100644
---- a/include/linux/sched/prio.h
-+++ b/include/linux/sched/prio.h
-@@ -20,7 +20,18 @@
-  */
- 
- #define MAX_USER_RT_PRIO	100
-+
-+#ifdef CONFIG_SCHED_PDS
-+#define ISO_PRIO		(MAX_USER_RT_PRIO)
-+
-+#define MAX_RT_PRIO		((MAX_USER_RT_PRIO) + 1)
-+
-+#define NORMAL_PRIO		(MAX_RT_PRIO)
-+#define IDLE_PRIO		((MAX_RT_PRIO) + 1)
-+#define PRIO_LIMIT		((IDLE_PRIO) + 1)
-+#else /* !CONFIG_SCHED_PDS */
- #define MAX_RT_PRIO		MAX_USER_RT_PRIO
-+#endif /* CONFIG_SCHED_PDS */
- 
- #define MAX_PRIO		(MAX_RT_PRIO + NICE_WIDTH)
- #define DEFAULT_PRIO		(MAX_RT_PRIO + NICE_WIDTH / 2)
-diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
-index e5af028c08b4..a96012e6f15e 100644
---- a/include/linux/sched/rt.h
-+++ b/include/linux/sched/rt.h
-@@ -24,8 +24,10 @@ static inline bool task_is_realtime(struct task_struct *tsk)
- 
- 	if (policy == SCHED_FIFO || policy == SCHED_RR)
- 		return true;
-+#ifndef CONFIG_SCHED_PDS
- 	if (policy == SCHED_DEADLINE)
- 		return true;
-+#endif
- 	return false;
- }
- 
-diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
-index 4b1c3b664f51..f186b8119ad6 100644
---- a/include/linux/sched/task.h
-+++ b/include/linux/sched/task.h
-@@ -99,7 +99,7 @@ extern long kernel_wait4(pid_t, int __user *, int, struct rusage *);
- extern void free_task(struct task_struct *tsk);
- 
- /* sched_exec is called by processes performing an exec */
--#ifdef CONFIG_SMP
-+#if defined(CONFIG_SMP) && !defined(CONFIG_SCHED_PDS)
- extern void sched_exec(void);
- #else
- #define sched_exec()   {}
-diff --git a/include/linux/skip_list.h b/include/linux/skip_list.h
-new file mode 100644
-index 000000000000..713fedd8034f
---- /dev/null
-+++ b/include/linux/skip_list.h
-@@ -0,0 +1,177 @@
-+/*
-+  Copyright (C) 2016 Alfred Chen.
-+
-+  Code based on Con Kolivas's skip list implementation for BFS, and
-+  which is based on example originally by William Pugh.
-+
-+Skip Lists are a probabilistic alternative to balanced trees, as
-+described in the June 1990 issue of CACM and were invented by
-+William Pugh in 1987.
-+
-+A couple of comments about this implementation:
-+
-+This file only provides a infrastructure of skip list.
-+
-+skiplist_node is embedded into container data structure, to get rid the
-+dependency of kmalloc/kfree operation in scheduler code.
-+
-+A customized search function should be defined using DEFINE_SKIPLIST_INSERT
-+macro and be used for skip list insert operation.
-+
-+Random Level is also not defined in this file, instead, it should be customized
-+implemented and set to node->level then pass to the customized skiplist_insert
-+function.
-+
-+Levels start at zero and go up to (NUM_SKIPLIST_LEVEL -1)
-+
-+NUM_SKIPLIST_LEVEL in this implementation is 8 instead of origin 16,
-+considering that there will be 256 entries to enable the top level when using
-+random level p=0.5, and that number is more than enough for a run queue usage
-+in a scheduler usage. And it also help to reduce the memory usage of the
-+embedded skip list node in task_struct to about 50%.
-+
-+The insertion routine has been implemented so as to use the
-+dirty hack described in the CACM paper: if a random level is
-+generated that is more than the current maximum level, the
-+current maximum level plus one is used instead.
-+
-+BFS Notes: In this implementation of skiplists, there are bidirectional
-+next/prev pointers and the insert function returns a pointer to the actual
-+node the value is stored. The key here is chosen by the scheduler so as to
-+sort tasks according to the priority list requirements and is no longer used
-+by the scheduler after insertion. The scheduler lookup, however, occurs in
-+O(1) time because it is always the first item in the level 0 linked list.
-+Since the task struct stores a copy of the node pointer upon skiplist_insert,
-+it can also remove it much faster than the original implementation with the
-+aid of prev<->next pointer manipulation and no searching.
-+*/
-+#ifndef _LINUX_SKIP_LIST_H
-+#define _LINUX_SKIP_LIST_H
-+
-+#include <linux/kernel.h>
-+
-+#define NUM_SKIPLIST_LEVEL (8)
-+
-+struct skiplist_node {
-+	int level;	/* Levels in this node */
-+	struct skiplist_node *next[NUM_SKIPLIST_LEVEL];
-+	struct skiplist_node *prev[NUM_SKIPLIST_LEVEL];
-+};
-+
-+#define SKIPLIST_NODE_INIT(name) { 0,\
-+				   {&name, &name, &name, &name,\
-+				    &name, &name, &name, &name},\
-+				   {&name, &name, &name, &name,\
-+				    &name, &name, &name, &name},\
-+				 }
-+
-+static inline void INIT_SKIPLIST_NODE(struct skiplist_node *node)
-+{
-+	/* only level 0 ->next matters in skiplist_empty()*/
-+	WRITE_ONCE(node->next[0], node);
-+}
-+
-+/**
-+ * FULL_INIT_SKIPLIST_NODE -- fully init a skiplist_node, expecially for header
-+ * @node: the skip list node to be inited.
-+ */
-+static inline void FULL_INIT_SKIPLIST_NODE(struct skiplist_node *node)
-+{
-+	int i;
-+
-+	node->level = 0;
-+	for (i = 0; i < NUM_SKIPLIST_LEVEL; i++) {
-+		WRITE_ONCE(node->next[i], node);
-+		node->prev[i] = node;
-+	}
-+}
-+
-+/**
-+ * skiplist_empty - test whether a skip list is empty
-+ * @head: the skip list to test.
-+ */
-+static inline int skiplist_empty(const struct skiplist_node *head)
-+{
-+	return READ_ONCE(head->next[0]) == head;
-+}
-+
-+/**
-+ * skiplist_entry - get the struct for this entry
-+ * @ptr: the &struct skiplist_node pointer.
-+ * @type:       the type of the struct this is embedded in.
-+ * @member:     the name of the skiplist_node within the struct.
-+ */
-+#define skiplist_entry(ptr, type, member) \
-+	container_of(ptr, type, member)
-+
-+/**
-+ * DEFINE_SKIPLIST_INSERT_FUNC -- macro to define a customized skip list insert
-+ * function, which takes two parameters, first one is the header node of the
-+ * skip list, second one is the skip list node to be inserted
-+ * @func_name: the customized skip list insert function name
-+ * @search_func: the search function to be used, which takes two parameters,
-+ * 1st one is the itrator of skiplist_node in the list, the 2nd is the skip list
-+ * node to be inserted, the function should return true if search should be
-+ * continued, otherwise return false.
-+ * Returns 1 if @node is inserted as the first item of skip list at level zero,
-+ * otherwise 0
-+ */
-+#define DEFINE_SKIPLIST_INSERT_FUNC(func_name, search_func)\
-+static inline int func_name(struct skiplist_node *head, struct skiplist_node *node)\
-+{\
-+	struct skiplist_node *update[NUM_SKIPLIST_LEVEL];\
-+	struct skiplist_node *p, *q;\
-+	int k = head->level;\
-+\
-+	p = head;\
-+	do {\
-+		while (q = p->next[k], q != head && search_func(q, node))\
-+			p = q;\
-+		update[k] = p;\
-+	} while (--k >= 0);\
-+\
-+	k = node->level;\
-+	if (unlikely(k > head->level)) {\
-+		node->level = k = ++head->level;\
-+		update[k] = head;\
-+	}\
-+\
-+	do {\
-+		p = update[k];\
-+		q = p->next[k];\
-+		node->next[k] = q;\
-+		p->next[k] = node;\
-+		node->prev[k] = p;\
-+		q->prev[k] = node;\
-+	} while (--k >= 0);\
-+\
-+	return (p == head);\
-+}
-+
-+/**
-+ * skiplist_del_init -- delete skip list node from a skip list and reset it's
-+ * init state
-+ * @head: the header node of the skip list to be deleted from.
-+ * @node: the skip list node to be deleted, the caller need to ensure @node is
-+ * in skip list which @head represent.
-+ * Returns 1 if @node is the first item of skip level at level zero, otherwise 0
-+ */
-+static inline int
-+skiplist_del_init(struct skiplist_node *head, struct skiplist_node *node)
-+{
-+	int l, m = node->level;
-+
-+	for (l = 0; l <= m; l++) {
-+		node->prev[l]->next[l] = node->next[l];
-+		node->next[l]->prev[l] = node->prev[l];
-+	}
-+	if (m == head->level && m > 0) {
-+		while (head->next[m] == head && m > 0)
-+			m--;
-+		head->level = m;
-+	}
-+	INIT_SKIPLIST_NODE(node);
-+
-+	return (node->prev[0] == head);
-+}
-+#endif /* _LINUX_SKIP_LIST_H */
-diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
-index 25b4fa00bad1..fc0aabdce15f 100644
---- a/include/uapi/linux/sched.h
-+++ b/include/uapi/linux/sched.h
-@@ -84,7 +84,10 @@ struct clone_args {
- #define SCHED_FIFO		1
- #define SCHED_RR		2
- #define SCHED_BATCH		3
--/* SCHED_ISO: reserved but not implemented yet */
-+/* SCHED_ISO: Implemented in BFS/MuQSSPDS only */
-+#ifdef CONFIG_SCHED_PDS
-+#define SCHED_ISO		4
-+#endif
- #define SCHED_IDLE		5
- #define SCHED_DEADLINE		6
- 
-diff --git a/init/Kconfig b/init/Kconfig
-index b4daad2bac23..ee3b9957cf3b 100644
---- a/init/Kconfig
-+++ b/init/Kconfig
-@@ -73,6 +73,21 @@ config THREAD_INFO_IN_TASK
- 
- menu "General setup"
- 
-+config SCHED_PDS
-+	bool "PDS-mq cpu scheduler"
-+	help
-+	  The Priority and Deadline based Skip list multiple queue CPU
-+	  Scheduler for excellent interactivity and responsiveness on the
-+	  desktop and solid scalability on normal hardware and commodity
-+	  servers.
-+
-+	  Currently incompatible with the Group CPU scheduler, and RCU TORTURE
-+          TEST so these options are disabled.
-+
-+          Say Y here.
-+	default y
-+
-+
- config BROKEN
- 	bool
- 
-@@ -802,6 +817,7 @@ config NUMA_BALANCING
- 	depends on ARCH_SUPPORTS_NUMA_BALANCING
- 	depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY
- 	depends on SMP && NUMA && MIGRATION
-+	depends on !SCHED_PDS
- 	help
- 	  This option adds support for automatic NUMA aware memory/task placement.
- 	  The mechanism is quite primitive and is based on migrating memory when
-@@ -903,7 +919,7 @@ menuconfig CGROUP_SCHED
- 	  bandwidth allocation to such task groups. It uses cgroups to group
- 	  tasks.
- 
--if CGROUP_SCHED
-+if CGROUP_SCHED && !SCHED_PDS
- config FAIR_GROUP_SCHED
- 	bool "Group scheduling for SCHED_OTHER"
- 	depends on CGROUP_SCHED
-@@ -1032,6 +1048,7 @@ config CGROUP_DEVICE
- 
- config CGROUP_CPUACCT
- 	bool "Simple CPU accounting controller"
-+	depends on !SCHED_PDS
- 	help
- 	  Provides a simple controller for monitoring the
- 	  total CPU consumed by the tasks in a cgroup.
-@@ -1150,6 +1167,7 @@ config CHECKPOINT_RESTORE
- 
- config SCHED_AUTOGROUP
- 	bool "Automatic process group scheduling"
-+	depends on !SCHED_PDS
- 	select CGROUPS
- 	select CGROUP_SCHED
- 	select FAIR_GROUP_SCHED
-diff --git a/init/init_task.c b/init/init_task.c
-index 9e5cbe5eab7b..89787e2feb60 100644
---- a/init/init_task.c
-+++ b/init/init_task.c
-@@ -58,6 +58,126 @@ struct task_struct init_task
- 	__init_task_data
- #endif
- = {
-+#ifdef CONFIG_SCHED_PDS
-+#ifdef CONFIG_THREAD_INFO_IN_TASK
-+	.thread_info	= INIT_THREAD_INFO(init_task),
-+	.stack_refcount	= ATOMIC_INIT(1),
-+#endif
-+	.state		= 0,
-+	.stack		= init_stack,
-+	.usage		= ATOMIC_INIT(2),
-+	.flags		= PF_KTHREAD,
-+	.prio		= NORMAL_PRIO,
-+	.static_prio	= MAX_PRIO - 20,
-+	.normal_prio	= NORMAL_PRIO,
-+	.deadline	= 0, /* PDS only */
-+	.policy		= SCHED_NORMAL,
-+	.cpus_ptr	= &init_task.cpus_mask,
-+	.cpus_mask	= CPU_MASK_ALL,
-+	.nr_cpus_allowed= NR_CPUS,
-+	.mm		= NULL,
-+	.active_mm	= &init_mm,
-+	.restart_block	= {
-+		.fn = do_no_restart_syscall,
-+	},
-+	.sl_level	= 0, /* PDS only */
-+	.sl_node	= SKIPLIST_NODE_INIT(init_task.sl_node), /* PDS only */
-+	.time_slice	= HZ, /* PDS only */
-+	.tasks		= LIST_HEAD_INIT(init_task.tasks),
-+#ifdef CONFIG_SMP
-+	.pushable_tasks	= PLIST_NODE_INIT(init_task.pushable_tasks, MAX_PRIO),
-+#endif
-+#ifdef CONFIG_CGROUP_SCHED
-+	.sched_task_group = &root_task_group,
-+#endif
-+	.ptraced	= LIST_HEAD_INIT(init_task.ptraced),
-+	.ptrace_entry	= LIST_HEAD_INIT(init_task.ptrace_entry),
-+	.real_parent	= &init_task,
-+	.parent		= &init_task,
-+	.children	= LIST_HEAD_INIT(init_task.children),
-+	.sibling	= LIST_HEAD_INIT(init_task.sibling),
-+	.group_leader	= &init_task,
-+	RCU_POINTER_INITIALIZER(real_cred, &init_cred),
-+	RCU_POINTER_INITIALIZER(cred, &init_cred),
-+	.comm		= INIT_TASK_COMM,
-+	.thread		= INIT_THREAD,
-+	.fs		= &init_fs,
-+	.files		= &init_files,
-+	.signal		= &init_signals,
-+	.sighand	= &init_sighand,
-+	.nsproxy	= &init_nsproxy,
-+	.pending	= {
-+		.list = LIST_HEAD_INIT(init_task.pending.list),
-+		.signal = {{0}}
-+	},
-+	.blocked	= {{0}},
-+	.alloc_lock	= __SPIN_LOCK_UNLOCKED(init_task.alloc_lock),
-+	.journal_info	= NULL,
-+	INIT_CPU_TIMERS(init_task)
-+	.pi_lock	= __RAW_SPIN_LOCK_UNLOCKED(init_task.pi_lock),
-+	.timer_slack_ns = 50000, /* 50 usec default slack */
-+	.thread_pid	= &init_struct_pid,
-+	.thread_group	= LIST_HEAD_INIT(init_task.thread_group),
-+	.thread_node	= LIST_HEAD_INIT(init_signals.thread_head),
-+#ifdef CONFIG_AUDITSYSCALL
-+	.loginuid	= INVALID_UID,
-+	.sessionid	= AUDIT_SID_UNSET,
-+#endif
-+#ifdef CONFIG_PERF_EVENTS
-+	.perf_event_mutex = __MUTEX_INITIALIZER(init_task.perf_event_mutex),
-+	.perf_event_list = LIST_HEAD_INIT(init_task.perf_event_list),
-+#endif
-+#ifdef CONFIG_PREEMPT_RCU
-+	.rcu_read_lock_nesting = 0,
-+	.rcu_read_unlock_special.s = 0,
-+	.rcu_node_entry = LIST_HEAD_INIT(init_task.rcu_node_entry),
-+	.rcu_blocked_node = NULL,
-+#endif
-+#ifdef CONFIG_TASKS_RCU
-+	.rcu_tasks_holdout = false,
-+	.rcu_tasks_holdout_list = LIST_HEAD_INIT(init_task.rcu_tasks_holdout_list),
-+	.rcu_tasks_idle_cpu = -1,
-+#endif
-+#ifdef CONFIG_CPUSETS
-+	.mems_allowed_seq = SEQCNT_ZERO(init_task.mems_allowed_seq),
-+#endif
-+#ifdef CONFIG_RT_MUTEXES
-+	.pi_waiters	= RB_ROOT_CACHED,
-+	.pi_top_task	= NULL,
-+#endif
-+	INIT_PREV_CPUTIME(init_task)
-+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-+	.vtime.seqcount	= SEQCNT_ZERO(init_task.vtime_seqcount),
-+	.vtime.starttime = 0,
-+	.vtime.state	= VTIME_SYS,
-+#endif
-+#ifdef CONFIG_NUMA_BALANCING
-+	.numa_preferred_nid = -1,
-+	.numa_group	= NULL,
-+	.numa_faults	= NULL,
-+#endif
-+#ifdef CONFIG_KASAN
-+	.kasan_depth	= 1,
-+#endif
-+#ifdef CONFIG_TRACE_IRQFLAGS
-+	.softirqs_enabled = 1,
-+#endif
-+#ifdef CONFIG_LOCKDEP
-+	.lockdep_recursion = 0,
-+#endif
-+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-+	.ret_stack	= NULL,
-+#endif
-+#if defined(CONFIG_TRACING) && defined(CONFIG_PREEMPT)
-+	.trace_recursion = 0,
-+#endif
-+#ifdef CONFIG_LIVEPATCH
-+	.patch_state	= KLP_UNDEFINED,
-+#endif
-+#ifdef CONFIG_SECURITY
-+	.security	= NULL,
-+#endif
-+#else /* CONFIG_SCHED_PDS */
- #ifdef CONFIG_THREAD_INFO_IN_TASK
- 	.thread_info	= INIT_THREAD_INFO(init_task),
- 	.stack_refcount	= REFCOUNT_INIT(1),
-@@ -181,6 +301,7 @@ struct task_struct init_task
- #ifdef CONFIG_SECURITY
- 	.security	= NULL,
- #endif
-+#endif /* CONFIG_SCHED_PDS */
- };
- EXPORT_SYMBOL(init_task);
- 
-diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
-index c87ee6412b36..4045c8532027 100644
---- a/kernel/cgroup/cpuset.c
-+++ b/kernel/cgroup/cpuset.c
-@@ -632,7 +632,7 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial)
- 	return ret;
- }
- 
--#ifdef CONFIG_SMP
-+#if defined(CONFIG_SMP) && !defined(CONFIG_SCHED_PDS)
- /*
-  * Helper routine for generate_sched_domains().
-  * Do cpusets a, b have overlapping effective cpus_allowed masks?
-@@ -1007,7 +1007,7 @@ static void rebuild_sched_domains_locked(void)
- 	/* Have scheduler rebuild the domains */
- 	partition_and_rebuild_sched_domains(ndoms, doms, attr);
- }
--#else /* !CONFIG_SMP */
-+#else /* !CONFIG_SMP || CONFIG_SCHED_PDS */
- static void rebuild_sched_domains_locked(void)
- {
- }
-diff --git a/kernel/delayacct.c b/kernel/delayacct.c
-index 27725754ac99..769d773c7182 100644
---- a/kernel/delayacct.c
-+++ b/kernel/delayacct.c
-@@ -106,7 +106,7 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
- 	 */
- 	t1 = tsk->sched_info.pcount;
- 	t2 = tsk->sched_info.run_delay;
--	t3 = tsk->se.sum_exec_runtime;
-+	t3 = tsk_seruntime(tsk);
- 
- 	d->cpu_count += t1;
- 
-diff --git a/kernel/exit.c b/kernel/exit.c
-index a46a50d67002..58043176b285 100644
---- a/kernel/exit.c
-+++ b/kernel/exit.c
-@@ -131,7 +131,7 @@ static void __exit_signal(struct task_struct *tsk)
- 			sig->curr_target = next_thread(tsk);
- 	}
- 
--	add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
-+	add_device_randomness((const void*) &tsk_seruntime(tsk),
- 			      sizeof(unsigned long long));
- 
- 	/*
-@@ -152,7 +152,7 @@ static void __exit_signal(struct task_struct *tsk)
- 	sig->inblock += task_io_get_inblock(tsk);
- 	sig->oublock += task_io_get_oublock(tsk);
- 	task_io_accounting_add(&sig->ioac, &tsk->ioac);
--	sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
-+	sig->sum_sched_runtime += tsk_seruntime(tsk);
- 	sig->nr_threads--;
- 	__unhash_process(tsk, group_dead);
- 	write_sequnlock(&sig->stats_lock);
-diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
-index cdf318d86dd6..baa525865d5c 100644
---- a/kernel/livepatch/transition.c
-+++ b/kernel/livepatch/transition.c
-@@ -306,7 +306,11 @@ static bool klp_try_switch_task(struct task_struct *task)
- 	 */
- 	rq = task_rq_lock(task, &flags);
- 
-+#ifdef	CONFIG_SCHED_PDS
-+	if (task_running(task) && task != current) {
-+#else
- 	if (task_running(rq, task) && task != current) {
-+#endif
- 		snprintf(err_buf, STACK_ERR_BUF_SIZE,
- 			 "%s: %s:%d is running\n", __func__, task->comm,
- 			 task->pid);
-diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
-index 2874bf556162..fad8a279fdfa 100644
---- a/kernel/locking/rtmutex.c
-+++ b/kernel/locking/rtmutex.c
-@@ -229,7 +229,7 @@ static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
-  * Only use with rt_mutex_waiter_{less,equal}()
-  */
- #define task_to_waiter(p)	\
--	&(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline }
-+	&(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = __tsk_deadline(p) }
- 
- static inline int
- rt_mutex_waiter_less(struct rt_mutex_waiter *left,
-@@ -680,7 +680,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
- 	 * the values of the node being removed.
- 	 */
- 	waiter->prio = task->prio;
--	waiter->deadline = task->dl.deadline;
-+	waiter->deadline = __tsk_deadline(task);
- 
- 	rt_mutex_enqueue(lock, waiter);
- 
-@@ -953,7 +953,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
- 	waiter->task = task;
- 	waiter->lock = lock;
- 	waiter->prio = task->prio;
--	waiter->deadline = task->dl.deadline;
-+	waiter->deadline = __tsk_deadline(task);
- 
- 	/* Get the top priority waiter on the lock */
- 	if (rt_mutex_has_waiters(lock))
-diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
-index 21fb5a5662b5..8ebe4e33fb5f 100644
---- a/kernel/sched/Makefile
-+++ b/kernel/sched/Makefile
-@@ -16,15 +16,21 @@ ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
- CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
- endif
- 
--obj-y += core.o loadavg.o clock.o cputime.o
--obj-y += idle.o fair.o rt.o deadline.o
--obj-y += wait.o wait_bit.o swait.o completion.o
--
--obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o
-+ifdef CONFIG_SCHED_PDS
-+obj-y += pds.o
-+else
-+obj-y += core.o
-+obj-y += fair.o rt.o deadline.o
-+obj-$(CONFIG_SMP) += cpudeadline.o topology.o stop_task.o
- obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
--obj-$(CONFIG_SCHEDSTATS) += stats.o
- obj-$(CONFIG_SCHED_DEBUG) += debug.o
- obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
-+endif
-+obj-y += loadavg.o clock.o cputime.o
-+obj-y += idle.o
-+obj-y += wait.o wait_bit.o swait.o completion.o
-+obj-$(CONFIG_SMP) += cpupri.o pelt.o
-+obj-$(CONFIG_SCHEDSTATS) += stats.o
- obj-$(CONFIG_CPU_FREQ) += cpufreq.o
- obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o
- obj-$(CONFIG_MEMBARRIER) += membarrier.o
-diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
-index 86800b4d5453..07f278dc3137 100644
---- a/kernel/sched/cpufreq_schedutil.c
-+++ b/kernel/sched/cpufreq_schedutil.c
-@@ -185,6 +185,7 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy,
- 	return cpufreq_driver_resolve_freq(policy, freq);
- }
- 
-+#ifndef CONFIG_SCHED_PDS
- /*
-  * This function computes an effective utilization for the given CPU, to be
-  * used for frequency selection given the linear relation: f = u * f_max.
-@@ -302,6 +303,13 @@ static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu)
- 
- 	return schedutil_cpu_util(sg_cpu->cpu, util, max, FREQUENCY_UTIL, NULL);
- }
-+#else /* CONFIG_SCHED_PDS */
-+static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu)
-+{
-+	sg_cpu->max = arch_scale_cpu_capacity(sg_cpu->cpu);
-+	return sg_cpu->max;
-+}
-+#endif
- 
- /**
-  * sugov_iowait_reset() - Reset the IO boost status of a CPU.
-@@ -445,7 +453,9 @@ static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; }
-  */
- static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu, struct sugov_policy *sg_policy)
- {
-+#ifndef CONFIG_SCHED_PDS
- 	if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_dl)
-+#endif
- 		sg_policy->limits_changed = true;
- }
- 
-@@ -688,6 +698,7 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy)
- 	}
- 
- 	ret = sched_setattr_nocheck(thread, &attr);
-+
- 	if (ret) {
- 		kthread_stop(thread);
- 		pr_warn("%s: failed to set SCHED_DEADLINE\n", __func__);
-@@ -918,6 +929,7 @@ static int __init sugov_register(void)
- fs_initcall(sugov_register);
- 
- #ifdef CONFIG_ENERGY_MODEL
-+#ifndef CONFIG_SCHED_PDS
- extern bool sched_energy_update;
- extern struct mutex sched_energy_mutex;
- 
-@@ -948,4 +960,10 @@ void sched_cpufreq_governor_change(struct cpufreq_policy *policy,
- 	}
- 
- }
-+#else /* CONFIG_SCHED_PDS */
-+void sched_cpufreq_governor_change(struct cpufreq_policy *policy,
-+				  struct cpufreq_governor *old_gov)
-+{
-+}
-+#endif
- #endif
-diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
-index 46ed4e1383e2..0a9548ee995c 100644
---- a/kernel/sched/cputime.c
-+++ b/kernel/sched/cputime.c
-@@ -122,7 +122,12 @@ void account_user_time(struct task_struct *p, u64 cputime)
- 	p->utime += cputime;
- 	account_group_user_time(p, cputime);
- 
-+#ifdef	CONFIG_SCHED_PDS
-+	index = (task_nice(p) > 0 || task_running_idle(p)) ? CPUTIME_NICE :
-+		CPUTIME_USER;
-+#else
- 	index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
-+#endif
- 
- 	/* Add user time to cpustat. */
- 	task_group_account_field(p, index, cputime);
-@@ -146,7 +151,11 @@ void account_guest_time(struct task_struct *p, u64 cputime)
- 	p->gtime += cputime;
- 
- 	/* Add guest time to cpustat. */
-+#ifdef	CONFIG_SCHED_PDS
-+	if (task_nice(p) > 0 || task_running_idle(p)) {
-+#else
- 	if (task_nice(p) > 0) {
-+#endif
- 		cpustat[CPUTIME_NICE] += cputime;
- 		cpustat[CPUTIME_GUEST_NICE] += cputime;
- 	} else {
-@@ -269,7 +278,7 @@ static inline u64 account_other_time(u64 max)
- #ifdef CONFIG_64BIT
- static inline u64 read_sum_exec_runtime(struct task_struct *t)
- {
--	return t->se.sum_exec_runtime;
-+	return tsk_seruntime(t);
- }
- #else
- static u64 read_sum_exec_runtime(struct task_struct *t)
-@@ -279,7 +288,7 @@ static u64 read_sum_exec_runtime(struct task_struct *t)
- 	struct rq *rq;
- 
- 	rq = task_rq_lock(t, &rf);
--	ns = t->se.sum_exec_runtime;
-+	ns = tsk_seruntime(t);
- 	task_rq_unlock(rq, t, &rf);
- 
- 	return ns;
-@@ -663,7 +672,7 @@ void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev,
- void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
- {
- 	struct task_cputime cputime = {
--		.sum_exec_runtime = p->se.sum_exec_runtime,
-+		.sum_exec_runtime = tsk_seruntime(p),
- 	};
- 
- 	task_cputime(p, &cputime.utime, &cputime.stime);
-diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
-index f65ef1e2f204..454fa7e460e3 100644
---- a/kernel/sched/idle.c
-+++ b/kernel/sched/idle.c
-@@ -355,6 +355,7 @@ void cpu_startup_entry(enum cpuhp_state state)
- 		do_idle();
- }
- 
-+#ifndef CONFIG_SCHED_PDS
- /*
-  * idle-task scheduling class.
-  */
-@@ -479,3 +480,4 @@ const struct sched_class idle_sched_class = {
- 	.switched_to		= switched_to_idle,
- 	.update_curr		= update_curr_idle,
- };
-+#endif
-diff --git a/kernel/sched/pds.c b/kernel/sched/pds.c
-new file mode 100644
-index 000000000000..aefbd9cebcfb
---- /dev/null
-+++ b/kernel/sched/pds.c
-@@ -0,0 +1,6566 @@
-+/*
-+ *  kernel/sched/pds.c, was kernel/sched.c
-+ *
-+ *  PDS-mq Core kernel scheduler code and related syscalls
-+ *
-+ *  Copyright (C) 1991-2002  Linus Torvalds
-+ *
-+ *  2009-08-13	Brainfuck deadline scheduling policy by Con Kolivas deletes
-+ *		a whole lot of those previous things.
-+ *  2017-09-06	Priority and Deadline based Skip list multiple queue kernel
-+ *		scheduler by Alfred Chen.
-+ */
-+#include "pds_sched.h"
-+
-+#include <linux/sched/rt.h>
-+
-+#include <linux/context_tracking.h>
-+#include <linux/compat.h>
-+#include <linux/blkdev.h>
-+#include <linux/delayacct.h>
-+#include <linux/freezer.h>
-+#include <linux/init_task.h>
-+#include <linux/kprobes.h>
-+#include <linux/mmu_context.h>
-+#include <linux/nmi.h>
-+#include <linux/profile.h>
-+#include <linux/rcupdate_wait.h>
-+#include <linux/security.h>
-+#include <linux/syscalls.h>
-+#include <linux/wait_bit.h>
-+
-+#include <linux/kcov.h>
-+
-+#include <asm/switch_to.h>
-+
-+#include "../workqueue_internal.h"
-+#include "../smpboot.h"
-+
-+#include "pelt.h"
-+
-+#define CREATE_TRACE_POINTS
-+#include <trace/events/sched.h>
-+
-+
-+#define rt_prio(prio)		((prio) < MAX_RT_PRIO)
-+#define rt_task(p)		rt_prio((p)->prio)
-+#define rt_policy(policy)	((policy) == SCHED_FIFO || \
-+				 (policy) == SCHED_RR || \
-+				 (policy) == SCHED_ISO)
-+#define task_has_rt_policy(p)	(rt_policy((p)->policy))
-+
-+#define idle_policy(policy)	((policy) == SCHED_IDLE)
-+#define idleprio_task(p)	unlikely(idle_policy((p)->policy))
-+
-+#define STOP_PRIO		(MAX_RT_PRIO - 1)
-+
-+/*
-+ * Some helpers for converting to/from various scales. Use shifts to get
-+ * approximate multiples of ten for less overhead.
-+ */
-+#define JIFFIES_TO_NS(TIME)	((TIME) * (1000000000 / HZ))
-+#define JIFFY_NS		(1000000000 / HZ)
-+#define HALF_JIFFY_NS		(1000000000 / HZ / 2)
-+#define HALF_JIFFY_US		(1000000 / HZ / 2)
-+#define MS_TO_NS(TIME)		((TIME) << 20)
-+#define MS_TO_US(TIME)		((TIME) << 10)
-+#define NS_TO_MS(TIME)		((TIME) >> 20)
-+#define NS_TO_US(TIME)		((TIME) >> 10)
-+#define US_TO_NS(TIME)		((TIME) << 10)
-+
-+#define RESCHED_US	(100) /* Reschedule if less than this many μs left */
-+
-+enum {
-+	BASE_CPU_AFFINITY_CHK_LEVEL = 1,
-+#ifdef CONFIG_SCHED_SMT
-+	SMT_CPU_AFFINITY_CHK_LEVEL_SPACE_HOLDER,
-+#endif
-+#ifdef CONFIG_SCHED_MC
-+	MC_CPU_AFFINITY_CHK_LEVEL_SPACE_HOLDER,
-+#endif
-+	NR_CPU_AFFINITY_CHK_LEVEL
-+};
-+
-+static inline void print_scheduler_version(void)
-+{
-+	printk(KERN_INFO "pds: PDS-mq CPU Scheduler 0.99o by Alfred Chen and kept alive artificially by Tk-Glitch.\n");
-+}
-+
-+/*
-+ * This is the time all tasks within the same priority round robin.
-+ * Value is in ms and set to a minimum of 6ms. Scales with number of cpus.
-+ * Tunable via /proc interface.
-+ */
-+#define SCHED_DEFAULT_RR (4)
-+int rr_interval __read_mostly = SCHED_DEFAULT_RR;
-+
-+static int __init rr_interval_set(char *str)
-+{
-+	u32 rr;
-+
-+	pr_info("rr_interval: ");
-+	if (kstrtouint(str, 0, &rr)) {
-+		pr_cont("using default of %u, unable to parse %s\n",
-+			rr_interval, str);
-+		return 1;
-+	}
-+
-+	rr_interval = rr;
-+	pr_cont("%d\n", rr_interval);
-+
-+	return 1;
-+}
-+__setup("rr_interval=", rr_interval_set);
-+
-+
-+static const u64 sched_prio2deadline[NICE_WIDTH] = {
-+/* -20 */	  6291456,   6920601,   7612661,   8373927,   9211319,
-+/* -15 */	 10132450,  11145695,  12260264,  13486290,  14834919,
-+/* -10 */	 16318410,  17950251,  19745276,  21719803,  23891783,
-+/*  -5 */	 26280961,  28909057,  31799962,  34979958,  38477953,
-+/*   0 */	 42325748,  46558322,  51214154,  56335569,  61969125,
-+/*   5 */	 68166037,  74982640,  82480904,  90728994,  99801893,
-+/*  10 */	109782082, 120760290, 132836319, 146119950, 160731945,
-+/*  15 */	176805139, 194485652, 213934217, 235327638, 258860401
-+};
-+
-+/**
-+ * sched_yield_type - Choose what sort of yield sched_yield will perform.
-+ * 0: No yield.
-+ * 1: Yield only to better priority/deadline tasks. (default)
-+ * 2: Expire timeslice and recalculate deadline.
-+ */
-+int sched_yield_type __read_mostly = 1;
-+
-+/*
-+ * The quota handed out to tasks of all priority levels when refilling their
-+ * time_slice.
-+ */
-+static inline int timeslice(void)
-+{
-+	return MS_TO_US(rr_interval);
-+}
-+
-+#ifdef CONFIG_SMP
-+enum {
-+SCHED_RQ_EMPTY		=	0,
-+SCHED_RQ_IDLE,
-+SCHED_RQ_NORMAL_0,
-+SCHED_RQ_NORMAL_1,
-+SCHED_RQ_NORMAL_2,
-+SCHED_RQ_NORMAL_3,
-+SCHED_RQ_NORMAL_4,
-+SCHED_RQ_NORMAL_5,
-+SCHED_RQ_NORMAL_6,
-+SCHED_RQ_NORMAL_7,
-+SCHED_RQ_ISO,
-+SCHED_RQ_RT,
-+NR_SCHED_RQ_QUEUED_LEVEL
-+};
-+
-+static cpumask_t sched_rq_queued_masks[NR_SCHED_RQ_QUEUED_LEVEL]
-+____cacheline_aligned_in_smp;
-+
-+static DECLARE_BITMAP(sched_rq_queued_masks_bitmap, NR_SCHED_RQ_QUEUED_LEVEL)
-+____cacheline_aligned_in_smp;
-+
-+static cpumask_t sched_rq_pending_masks[NR_SCHED_RQ_QUEUED_LEVEL]
-+____cacheline_aligned_in_smp;
-+
-+static DECLARE_BITMAP(sched_rq_pending_masks_bitmap, NR_SCHED_RQ_QUEUED_LEVEL)
-+____cacheline_aligned_in_smp;
-+
-+DEFINE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_CHK_LEVEL], sched_cpu_affinity_chk_masks);
-+DEFINE_PER_CPU(cpumask_t *, sched_cpu_llc_start_mask);
-+DEFINE_PER_CPU(cpumask_t *, sched_cpu_affinity_chk_end_masks);
-+
-+#ifdef CONFIG_SCHED_SMT
-+DEFINE_PER_CPU(int, sched_sibling_cpu);
-+DEFINE_STATIC_KEY_FALSE(sched_smt_present);
-+EXPORT_SYMBOL_GPL(sched_smt_present);
-+
-+static cpumask_t sched_cpu_sg_idle_mask ____cacheline_aligned_in_smp;
-+
-+#ifdef CONFIG_SMT_NICE
-+/*
-+ * Preemptible sibling group mask
-+ * Which all sibling cpus are running at PRIO_LIMIT or IDLE_PRIO
-+ */
-+static cpumask_t sched_cpu_psg_mask ____cacheline_aligned_in_smp;
-+/*
-+ * SMT supressed mask
-+ * When a cpu is running task with NORMAL/ISO/RT policy, its sibling cpu
-+ * will be supressed to run IDLE priority task.
-+ */
-+static cpumask_t sched_smt_supressed_mask ____cacheline_aligned_in_smp;
-+#endif /* CONFIG_SMT_NICE */
-+#endif
-+
-+static int sched_rq_prio[NR_CPUS] ____cacheline_aligned;
-+
-+/*
-+ * Keep a unique ID per domain (we use the first CPUs number in the cpumask of
-+ * the domain), this allows us to quickly tell if two cpus are in the same cache
-+ * domain, see cpus_share_cache().
-+ */
-+DEFINE_PER_CPU(int, sd_llc_id);
-+
-+int __weak arch_sd_sibling_asym_packing(void)
-+{
-+       return 0*SD_ASYM_PACKING;
-+}
-+#else
-+struct rq *uprq;
-+#endif /* CONFIG_SMP */
-+
-+static DEFINE_MUTEX(sched_hotcpu_mutex);
-+
-+DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
-+
-+#ifndef prepare_arch_switch
-+# define prepare_arch_switch(next)	do { } while (0)
-+#endif
-+#ifndef finish_arch_post_lock_switch
-+# define finish_arch_post_lock_switch()	do { } while (0)
-+#endif
-+
-+/*
-+ * Context: p->pi_lock
-+ */
-+static inline struct rq
-+*__task_access_lock(struct task_struct *p, raw_spinlock_t **plock)
-+{
-+	struct rq *rq;
-+	for (;;) {
-+		rq = task_rq(p);
-+		if (p->on_cpu || task_on_rq_queued(p)) {
-+			raw_spin_lock(&rq->lock);
-+			if (likely((p->on_cpu || task_on_rq_queued(p))
-+				   && rq == task_rq(p))) {
-+				*plock = &rq->lock;
-+				return rq;
-+			}
-+			raw_spin_unlock(&rq->lock);
-+		} else if (task_on_rq_migrating(p)) {
-+			do {
-+				cpu_relax();
-+			} while (unlikely(task_on_rq_migrating(p)));
-+		} else {
-+			*plock = NULL;
-+			return rq;
-+		}
-+	}
-+}
-+
-+static inline void
-+__task_access_unlock(struct task_struct *p, raw_spinlock_t *lock)
-+{
-+	if (NULL != lock)
-+		raw_spin_unlock(lock);
-+}
-+
-+static inline struct rq
-+*task_access_lock_irqsave(struct task_struct *p, raw_spinlock_t **plock,
-+			  unsigned long *flags)
-+{
-+	struct rq *rq;
-+	for (;;) {
-+		rq = task_rq(p);
-+		if (p->on_cpu || task_on_rq_queued(p)) {
-+			raw_spin_lock_irqsave(&rq->lock, *flags);
-+			if (likely((p->on_cpu || task_on_rq_queued(p))
-+				   && rq == task_rq(p))) {
-+				*plock = &rq->lock;
-+				return rq;
-+			}
-+			raw_spin_unlock_irqrestore(&rq->lock, *flags);
-+		} else if (task_on_rq_migrating(p)) {
-+			do {
-+				cpu_relax();
-+			} while (unlikely(task_on_rq_migrating(p)));
-+		} else {
-+			raw_spin_lock_irqsave(&p->pi_lock, *flags);
-+			if (likely(!p->on_cpu && !p->on_rq &&
-+				   rq == task_rq(p))) {
-+				*plock = &p->pi_lock;
-+				return rq;
-+			}
-+			raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
-+		}
-+	}
-+}
-+
-+static inline void
-+task_access_unlock_irqrestore(struct task_struct *p, raw_spinlock_t *lock,
-+			      unsigned long *flags)
-+{
-+	raw_spin_unlock_irqrestore(lock, *flags);
-+}
-+
-+/*
-+ * __task_rq_lock - lock the rq @p resides on.
-+ */
-+struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
-+	__acquires(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	lockdep_assert_held(&p->pi_lock);
-+
-+	for (;;) {
-+		rq = task_rq(p);
-+		raw_spin_lock(&rq->lock);
-+		if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
-+			return rq;
-+		raw_spin_unlock(&rq->lock);
-+
-+		while (unlikely(task_on_rq_migrating(p)))
-+			cpu_relax();
-+	}
-+}
-+
-+/*
-+ * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
-+ */
-+struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
-+	__acquires(p->pi_lock)
-+	__acquires(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	for (;;) {
-+		raw_spin_lock_irqsave(&p->pi_lock, rf->flags);
-+		rq = task_rq(p);
-+		raw_spin_lock(&rq->lock);
-+		/*
-+		 *	move_queued_task()		task_rq_lock()
-+		 *
-+		 *	ACQUIRE (rq->lock)
-+		 *	[S] ->on_rq = MIGRATING		[L] rq = task_rq()
-+		 *	WMB (__set_task_cpu())		ACQUIRE (rq->lock);
-+		 *	[S] ->cpu = new_cpu		[L] task_rq()
-+		 *					[L] ->on_rq
-+		 *	RELEASE (rq->lock)
-+		 *
-+		 * If we observe the old CPU in task_rq_lock(), the acquire of
-+		 * the old rq->lock will fully serialize against the stores.
-+		 *
-+		 * If we observe the new CPU in task_rq_lock(), the address
-+		 * dependency headed by '[L] rq = task_rq()' and the acquire
-+		 * will pair with the WMB to ensure we then also see migrating.
-+		 */
-+		if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
-+			return rq;
-+		}
-+		raw_spin_unlock(&rq->lock);
-+		raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
-+
-+		while (unlikely(task_on_rq_migrating(p)))
-+			cpu_relax();
-+	}
-+}
-+
-+/*
-+ * RQ-clock updating methods:
-+ */
-+
-+static void update_rq_clock_task(struct rq *rq, s64 delta)
-+{
-+/*
-+ * In theory, the compile should just see 0 here, and optimize out the call
-+ * to sched_rt_avg_update. But I don't trust it...
-+ */
-+	s64 __maybe_unused steal = 0, irq_delta = 0;
-+
-+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-+	irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
-+
-+	/*
-+	 * Since irq_time is only updated on {soft,}irq_exit, we might run into
-+	 * this case when a previous update_rq_clock() happened inside a
-+	 * {soft,}irq region.
-+	 *
-+	 * When this happens, we stop ->clock_task and only update the
-+	 * prev_irq_time stamp to account for the part that fit, so that a next
-+	 * update will consume the rest. This ensures ->clock_task is
-+	 * monotonic.
-+	 *
-+	 * It does however cause some slight miss-attribution of {soft,}irq
-+	 * time, a more accurate solution would be to update the irq_time using
-+	 * the current rq->clock timestamp, except that would require using
-+	 * atomic ops.
-+	 */
-+	if (irq_delta > delta)
-+		irq_delta = delta;
-+
-+	rq->prev_irq_time += irq_delta;
-+	delta -= irq_delta;
-+#endif
-+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
-+	if (static_key_false((&paravirt_steal_rq_enabled))) {
-+		steal = paravirt_steal_clock(cpu_of(rq));
-+		steal -= rq->prev_steal_time_rq;
-+
-+		if (unlikely(steal > delta))
-+			steal = delta;
-+
-+		rq->prev_steal_time_rq += steal;
-+
-+		delta -= steal;
-+	}
-+#endif
-+
-+	rq->clock_task += delta;
-+
-+#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
-+	if ((irq_delta + steal))
-+		update_irq_load_avg(rq, irq_delta + steal);
-+#endif
-+}
-+
-+static inline void update_rq_clock(struct rq *rq)
-+{
-+	s64 delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
-+
-+	if (unlikely(delta <= 0))
-+		return;
-+	rq->clock += delta;
-+	update_rq_clock_task(rq, delta);
-+}
-+
-+static inline void update_task_priodl(struct task_struct *p)
-+{
-+	p->priodl = (((u64) (p->prio))<<56) | ((p->deadline)>>8);
-+}
-+
-+/*
-+ * Deadline is "now" in niffies + (offset by priority). Setting the deadline
-+ * is the key to everything. It distributes CPU fairly amongst tasks of the
-+ * same nice value, it proportions CPU according to nice level, it means the
-+ * task that last woke up the longest ago has the earliest deadline, thus
-+ * ensuring that interactive tasks get low latency on wake up. The CPU
-+ * proportion works out to the square of the virtual deadline difference, so
-+ * this equation will give nice 19 3% CPU compared to nice 0.
-+ */
-+static inline u64 task_deadline_diff(const struct task_struct *p)
-+{
-+	return sched_prio2deadline[TASK_USER_PRIO(p)];
-+}
-+
-+static inline u64 static_deadline_diff(int static_prio)
-+{
-+	return sched_prio2deadline[USER_PRIO(static_prio)];
-+}
-+
-+/*
-+ * The time_slice is only refilled when it is empty and that is when we set a
-+ * new deadline for non-rt tasks.
-+ */
-+static inline void time_slice_expired(struct task_struct *p, struct rq *rq)
-+{
-+	p->time_slice = timeslice();
-+	if (p->prio >= NORMAL_PRIO)
-+		p->deadline = rq->clock + task_deadline_diff(p);
-+
-+	update_task_priodl(p);
-+}
-+
-+static inline struct task_struct *rq_first_queued_task(struct rq *rq)
-+{
-+	struct skiplist_node *node = rq->sl_header.next[0];
-+
-+	if (node == &rq->sl_header)
-+		return rq->idle;
-+
-+	return skiplist_entry(node, struct task_struct, sl_node);
-+}
-+
-+static inline struct task_struct *rq_second_queued_task(struct rq *rq)
-+{
-+	struct skiplist_node *node = rq->sl_header.next[0]->next[0];
-+
-+	if (node == &rq->sl_header)
-+		return rq->idle;
-+
-+	return skiplist_entry(node, struct task_struct, sl_node);
-+}
-+
-+static inline int is_second_in_rq(struct task_struct *p, struct rq *rq)
-+{
-+	return (p->sl_node.prev[0]->prev[0] == &rq->sl_header);
-+}
-+
-+static const int task_dl_hash_tbl[] = {
-+/*	0           4           8           12           */
-+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
-+/*	16          20          24          28           */
-+	1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5, 6, 7
-+};
-+
-+static inline int
-+task_deadline_level(const struct task_struct *p, const struct rq *rq)
-+{
-+	u64 delta = (rq->clock + sched_prio2deadline[39] - p->deadline) >> 23;
-+
-+	delta = min((size_t)delta, ARRAY_SIZE(task_dl_hash_tbl) - 1);
-+	return task_dl_hash_tbl[delta];
-+}
-+
-+/*
-+ * cmpxchg based fetch_or, macro so it works for different integer types
-+ */
-+#define fetch_or(ptr, mask)						\
-+	({								\
-+		typeof(ptr) _ptr = (ptr);				\
-+		typeof(mask) _mask = (mask);				\
-+		typeof(*_ptr) _old, _val = *_ptr;			\
-+									\
-+		for (;;) {						\
-+			_old = cmpxchg(_ptr, _val, _val | _mask);	\
-+			if (_old == _val)				\
-+				break;					\
-+			_val = _old;					\
-+		}							\
-+	_old;								\
-+})
-+
-+#if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
-+/*
-+ * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG,
-+ * this avoids any races wrt polling state changes and thereby avoids
-+ * spurious IPIs.
-+ */
-+static bool set_nr_and_not_polling(struct task_struct *p)
-+{
-+	struct thread_info *ti = task_thread_info(p);
-+	return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG);
-+}
-+
-+/*
-+ * Atomically set TIF_NEED_RESCHED if TIF_POLLING_NRFLAG is set.
-+ *
-+ * If this returns true, then the idle task promises to call
-+ * sched_ttwu_pending() and reschedule soon.
-+ */
-+static bool set_nr_if_polling(struct task_struct *p)
-+{
-+	struct thread_info *ti = task_thread_info(p);
-+	typeof(ti->flags) old, val = READ_ONCE(ti->flags);
-+
-+	for (;;) {
-+		if (!(val & _TIF_POLLING_NRFLAG))
-+			return false;
-+		if (val & _TIF_NEED_RESCHED)
-+			return true;
-+		old = cmpxchg(&ti->flags, val, val | _TIF_NEED_RESCHED);
-+		if (old == val)
-+			break;
-+		val = old;
-+	}
-+	return true;
-+}
-+
-+#else
-+static bool set_nr_and_not_polling(struct task_struct *p)
-+{
-+	set_tsk_need_resched(p);
-+	return true;
-+}
-+
-+#ifdef CONFIG_SMP
-+static bool set_nr_if_polling(struct task_struct *p)
-+{
-+	return false;
-+}
-+#endif
-+#endif
-+
-+#ifdef	CONFIG_SMP
-+#ifdef	CONFIG_SMT_NICE
-+static void resched_cpu_if_curr_is(int cpu, int priority)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	rcu_read_lock();
-+
-+	if (rcu_dereference(rq->curr)->prio != priority)
-+		goto out;
-+
-+	if (set_nr_if_polling(rq->idle)) {
-+		trace_sched_wake_idle_without_ipi(cpu);
-+	} else {
-+		if (!do_raw_spin_trylock(&rq->lock))
-+			goto out;
-+		spin_acquire(&rq->lock.dep_map, SINGLE_DEPTH_NESTING, 1, _RET_IP_);
-+
-+		if (priority == rq->curr->prio)
-+			smp_send_reschedule(cpu);
-+		/* Else CPU is not idle, do nothing here */
-+
-+		spin_release(&rq->lock.dep_map, 1, _RET_IP_);
-+		do_raw_spin_unlock(&rq->lock);
-+	}
-+
-+out:
-+	rcu_read_unlock();
-+}
-+#endif /* CONFIG_SMT_NICE */
-+
-+static inline bool
-+__update_cpumasks_bitmap(int cpu, unsigned long *plevel, unsigned long level,
-+			 cpumask_t cpumasks[], unsigned long bitmap[])
-+{
-+	if (*plevel == level)
-+		return false;
-+
-+	cpumask_clear_cpu(cpu, cpumasks + *plevel);
-+	if (cpumask_empty(cpumasks + *plevel))
-+		clear_bit(*plevel, bitmap);
-+	cpumask_set_cpu(cpu, cpumasks + level);
-+	set_bit(level, bitmap);
-+
-+	*plevel = level;
-+
-+	return true;
-+}
-+
-+static inline int
-+task_running_policy_level(const struct task_struct *p, const struct rq *rq)
-+{
-+	int prio = p->prio;
-+
-+	if (NORMAL_PRIO == prio)
-+		return SCHED_RQ_NORMAL_0 + task_deadline_level(p, rq);
-+
-+	if (ISO_PRIO == prio)
-+		return SCHED_RQ_ISO;
-+	if (prio < MAX_RT_PRIO)
-+		return SCHED_RQ_RT;
-+	return PRIO_LIMIT - prio;
-+}
-+
-+static inline void update_sched_rq_queued_masks_normal(struct rq *rq)
-+{
-+	struct task_struct *p = rq_first_queued_task(rq);
-+
-+	if (p->prio != NORMAL_PRIO)
-+		return;
-+
-+	__update_cpumasks_bitmap(cpu_of(rq), &rq->queued_level,
-+				 task_running_policy_level(p, rq),
-+				 &sched_rq_queued_masks[0],
-+				 &sched_rq_queued_masks_bitmap[0]);
-+}
-+
-+#ifdef CONFIG_SMT_NICE
-+static inline void update_sched_cpu_psg_mask(const int cpu)
-+{
-+	cpumask_t tmp;
-+
-+	cpumask_or(&tmp, &sched_rq_queued_masks[SCHED_RQ_EMPTY],
-+		   &sched_rq_queued_masks[SCHED_RQ_IDLE]);
-+	cpumask_and(&tmp, &tmp, cpu_smt_mask(cpu));
-+	if (cpumask_equal(&tmp, cpu_smt_mask(cpu)))
-+		cpumask_or(&sched_cpu_psg_mask, &sched_cpu_psg_mask,
-+			   cpu_smt_mask(cpu));
-+	else
-+		cpumask_andnot(&sched_cpu_psg_mask, &sched_cpu_psg_mask,
-+			       cpu_smt_mask(cpu));
-+}
-+#endif
-+
-+static inline void update_sched_rq_queued_masks(struct rq *rq)
-+{
-+	int cpu = cpu_of(rq);
-+	struct task_struct *p = rq_first_queued_task(rq);
-+	unsigned long level;
-+#ifdef CONFIG_SCHED_SMT
-+	unsigned long last_level = rq->queued_level;
-+#endif
-+
-+	level = task_running_policy_level(p, rq);
-+	sched_rq_prio[cpu] = p->prio;
-+
-+	if (!__update_cpumasks_bitmap(cpu, &rq->queued_level, level,
-+				      &sched_rq_queued_masks[0],
-+				      &sched_rq_queued_masks_bitmap[0]))
-+		return;
-+
-+#ifdef CONFIG_SCHED_SMT
-+	if (cpu == per_cpu(sched_sibling_cpu, cpu))
-+		return;
-+
-+	if (SCHED_RQ_EMPTY == last_level) {
-+		cpumask_andnot(&sched_cpu_sg_idle_mask, &sched_cpu_sg_idle_mask,
-+			       cpu_smt_mask(cpu));
-+	} else if (SCHED_RQ_EMPTY == level) {
-+		cpumask_t tmp;
-+
-+		cpumask_and(&tmp, cpu_smt_mask(cpu),
-+			    &sched_rq_queued_masks[SCHED_RQ_EMPTY]);
-+		if (cpumask_equal(&tmp, cpu_smt_mask(cpu)))
-+			cpumask_or(&sched_cpu_sg_idle_mask, cpu_smt_mask(cpu),
-+				   &sched_cpu_sg_idle_mask);
-+	}
-+
-+#ifdef CONFIG_SMT_NICE
-+	if (level <= SCHED_RQ_IDLE && last_level > SCHED_RQ_IDLE) {
-+		cpumask_clear_cpu(per_cpu(sched_sibling_cpu, cpu),
-+				  &sched_smt_supressed_mask);
-+		update_sched_cpu_psg_mask(cpu);
-+		resched_cpu_if_curr_is(per_cpu(sched_sibling_cpu, cpu), PRIO_LIMIT);
-+	} else if (last_level <= SCHED_RQ_IDLE && level > SCHED_RQ_IDLE) {
-+		cpumask_set_cpu(per_cpu(sched_sibling_cpu, cpu),
-+				&sched_smt_supressed_mask);
-+		update_sched_cpu_psg_mask(cpu);
-+		resched_cpu_if_curr_is(per_cpu(sched_sibling_cpu, cpu), IDLE_PRIO);
-+	}
-+#endif /* CONFIG_SMT_NICE */
-+#endif
-+}
-+
-+static inline void update_sched_rq_pending_masks(struct rq *rq)
-+{
-+	unsigned long level;
-+	struct task_struct *p = rq_second_queued_task(rq);
-+
-+	level = task_running_policy_level(p, rq);
-+
-+	__update_cpumasks_bitmap(cpu_of(rq), &rq->pending_level, level,
-+				 &sched_rq_pending_masks[0],
-+				 &sched_rq_pending_masks_bitmap[0]);
-+}
-+
-+#else /* CONFIG_SMP */
-+static inline void update_sched_rq_queued_masks(struct rq *rq) {}
-+static inline void update_sched_rq_queued_masks_normal(struct rq *rq) {}
-+static inline void update_sched_rq_pending_masks(struct rq *rq) {}
-+#endif
-+
-+#ifdef CONFIG_NO_HZ_FULL
-+/*
-+ * Tick may be needed by tasks in the runqueue depending on their policy and
-+ * requirements. If tick is needed, lets send the target an IPI to kick it out
-+ * of nohz mode if necessary.
-+ */
-+static inline void sched_update_tick_dependency(struct rq *rq)
-+{
-+	int cpu;
-+
-+	if (!tick_nohz_full_enabled())
-+		return;
-+
-+	cpu = cpu_of(rq);
-+
-+	if (!tick_nohz_full_cpu(cpu))
-+		return;
-+
-+	if (rq->nr_running < 2)
-+		tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED);
-+	else
-+		tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
-+}
-+#else /* !CONFIG_NO_HZ_FULL */
-+static inline void sched_update_tick_dependency(struct rq *rq) { }
-+#endif
-+
-+/*
-+ * Removing from the runqueue. Deleting a task from the skip list is done
-+ * via the stored node reference in the task struct and does not require a full
-+ * look up. Thus it occurs in O(k) time where k is the "level" of the list the
-+ * task was stored at - usually < 4, max 16.
-+ *
-+ * Context: rq->lock
-+ */
-+static inline void dequeue_task(struct task_struct *p, struct rq *rq, int flags)
-+{
-+	lockdep_assert_held(&rq->lock);
-+
-+	WARN_ONCE(task_rq(p) != rq, "pds: dequeue task reside on cpu%d from cpu%d\n",
-+		  task_cpu(p), cpu_of(rq));
-+	if (skiplist_del_init(&rq->sl_header, &p->sl_node)) {
-+		update_sched_rq_queued_masks(rq);
-+		update_sched_rq_pending_masks(rq);
-+	} else if (is_second_in_rq(p, rq))
-+		update_sched_rq_pending_masks(rq);
-+	rq->nr_running--;
-+
-+	sched_update_tick_dependency(rq);
-+	psi_dequeue(p, flags & DEQUEUE_SLEEP);
-+
-+	sched_info_dequeued(rq, p);
-+}
-+
-+/*
-+ * To determine if it's safe for a task of SCHED_IDLE to actually run as
-+ * an idle task, we ensure none of the following conditions are met.
-+ */
-+static inline bool idleprio_suitable(struct task_struct *p)
-+{
-+	return (!freezing(p) && !signal_pending(p) &&
-+		!(task_contributes_to_load(p)) && !(p->flags & (PF_EXITING)));
-+}
-+
-+/*
-+ * pds_skiplist_random_level -- Returns a pseudo-random level number for skip
-+ * list node which is used in PDS run queue.
-+ *
-+ * In current implementation, based on testing, the first 8 bits in microseconds
-+ * of niffies are suitable for random level population.
-+ * find_first_bit() is used to satisfy p = 0.5 between each levels, and there
-+ * should be platform hardware supported instruction(known as ctz/clz) to speed
-+ * up this function.
-+ * The skiplist level for a task is populated when task is created and doesn't
-+ * change in task's life time. When task is being inserted into run queue, this
-+ * skiplist level is set to task's sl_node->level, the skiplist insert function
-+ * may change it based on current level of the skip lsit.
-+ */
-+static inline int pds_skiplist_random_level(const struct task_struct *p)
-+{
-+	long unsigned int randseed;
-+
-+	/*
-+	 * 1. Some architectures don't have better than microsecond resolution
-+	 * so mask out ~microseconds as a factor of the random seed for skiplist
-+	 * insertion.
-+	 * 2. Use address of task structure pointer as another factor of the
-+	 * random seed for task burst forking scenario.
-+	 */
-+	randseed = (task_rq(p)->clock ^ (long unsigned int)p) >> 10;
-+
-+	return find_first_bit(&randseed, NUM_SKIPLIST_LEVEL - 1);
-+}
-+
-+/**
-+ * pds_skiplist_task_search -- search function used in PDS run queue skip list
-+ * node insert operation.
-+ * @it: iterator pointer to the node in the skip list
-+ * @node: pointer to the skiplist_node to be inserted
-+ *
-+ * Returns true if key of @it is less or equal to key value of @node, otherwise
-+ * false.
-+ */
-+static inline bool
-+pds_skiplist_task_search(struct skiplist_node *it, struct skiplist_node *node)
-+{
-+	return (skiplist_entry(it, struct task_struct, sl_node)->priodl <=
-+		skiplist_entry(node, struct task_struct, sl_node)->priodl);
-+}
-+
-+/*
-+ * Define the skip list insert function for PDS
-+ */
-+DEFINE_SKIPLIST_INSERT_FUNC(pds_skiplist_insert, pds_skiplist_task_search);
-+
-+/*
-+ * Adding task to the runqueue.
-+ *
-+ * Context: rq->lock
-+ */
-+static inline void enqueue_task(struct task_struct *p, struct rq *rq, int flags)
-+{
-+	lockdep_assert_held(&rq->lock);
-+
-+	WARN_ONCE(task_rq(p) != rq, "pds: enqueue task reside on cpu%d to cpu%d\n",
-+		  task_cpu(p), cpu_of(rq));
-+
-+	p->sl_node.level = p->sl_level;
-+	if (pds_skiplist_insert(&rq->sl_header, &p->sl_node)) {
-+		update_sched_rq_queued_masks(rq);
-+		update_sched_rq_pending_masks(rq);
-+	} else if (is_second_in_rq(p, rq))
-+		update_sched_rq_pending_masks(rq);
-+	rq->nr_running++;
-+
-+	sched_update_tick_dependency(rq);
-+
-+	sched_info_queued(rq, p);
-+	psi_enqueue(p, flags);
-+
-+	/*
-+	 * If in_iowait is set, the code below may not trigger any cpufreq
-+	 * utilization updates, so do it here explicitly with the IOWAIT flag
-+	 * passed.
-+	 */
-+	if (p->in_iowait)
-+		cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_IOWAIT);
-+}
-+
-+static inline void requeue_task(struct task_struct *p, struct rq *rq)
-+{
-+	bool b_first, b_second;
-+
-+	lockdep_assert_held(&rq->lock);
-+
-+	WARN_ONCE(task_rq(p) != rq, "pds: cpu[%d] requeue task reside on cpu%d\n",
-+		  cpu_of(rq), task_cpu(p));
-+
-+	b_first = skiplist_del_init(&rq->sl_header, &p->sl_node);
-+	b_second = is_second_in_rq(p, rq);
-+
-+	p->sl_node.level = p->sl_level;
-+	if (pds_skiplist_insert(&rq->sl_header, &p->sl_node) || b_first) {
-+		update_sched_rq_queued_masks(rq);
-+		update_sched_rq_pending_masks(rq);
-+	} else if (is_second_in_rq(p, rq) || b_second)
-+		update_sched_rq_pending_masks(rq);
-+}
-+
-+/*
-+ * resched_curr - mark rq's current task 'to be rescheduled now'.
-+ *
-+ * On UP this means the setting of the need_resched flag, on SMP it
-+ * might also involve a cross-CPU call to trigger the scheduler on
-+ * the target CPU.
-+ */
-+void resched_curr(struct rq *rq)
-+{
-+	struct task_struct *curr = rq->curr;
-+	int cpu;
-+
-+	lockdep_assert_held(&rq->lock);
-+
-+	if (test_tsk_need_resched(curr))
-+		return;
-+
-+	cpu = cpu_of(rq);
-+	if (cpu == smp_processor_id()) {
-+		set_tsk_need_resched(curr);
-+		set_preempt_need_resched();
-+		return;
-+	}
-+
-+	if (set_nr_and_not_polling(curr))
-+		smp_send_reschedule(cpu);
-+	else
-+		trace_sched_wake_idle_without_ipi(cpu);
-+}
-+
-+static inline void check_preempt_curr(struct rq *rq, struct task_struct *p)
-+{
-+	struct task_struct *curr = rq->curr;
-+
-+	if (curr->prio == PRIO_LIMIT)
-+		resched_curr(rq);
-+
-+	if (task_running_idle(p))
-+		return;
-+
-+	if (p->priodl < curr->priodl)
-+		resched_curr(rq);
-+}
-+
-+#ifdef CONFIG_SCHED_HRTICK
-+/*
-+ * Use HR-timers to deliver accurate preemption points.
-+ */
-+
-+static void hrtick_clear(struct rq *rq)
-+{
-+	if (hrtimer_active(&rq->hrtick_timer))
-+		hrtimer_cancel(&rq->hrtick_timer);
-+}
-+
-+/*
-+ * High-resolution timer tick.
-+ * Runs from hardirq context with interrupts disabled.
-+ */
-+static enum hrtimer_restart hrtick(struct hrtimer *timer)
-+{
-+	struct rq *rq = container_of(timer, struct rq, hrtick_timer);
-+	struct task_struct *p;
-+
-+	WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
-+
-+	raw_spin_lock(&rq->lock);
-+	p = rq->curr;
-+	p->time_slice = 0;
-+	resched_curr(rq);
-+	raw_spin_unlock(&rq->lock);
-+
-+	return HRTIMER_NORESTART;
-+}
-+
-+/*
-+ * Use hrtick when:
-+ *  - enabled by features
-+ *  - hrtimer is actually high res
-+ */
-+static inline int hrtick_enabled(struct rq *rq)
-+{
-+	/**
-+	 * PDS doesn't support sched_feat yet
-+	if (!sched_feat(HRTICK))
-+		return 0;
-+	*/
-+	if (!cpu_active(cpu_of(rq)))
-+		return 0;
-+	return hrtimer_is_hres_active(&rq->hrtick_timer);
-+}
-+
-+#ifdef CONFIG_SMP
-+
-+static void __hrtick_restart(struct rq *rq)
-+{
-+	struct hrtimer *timer = &rq->hrtick_timer;
-+
-+	hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED_HARD);
-+}
-+
-+/*
-+ * called from hardirq (IPI) context
-+ */
-+static void __hrtick_start(void *arg)
-+{
-+	struct rq *rq = arg;
-+
-+	raw_spin_lock(&rq->lock);
-+	__hrtick_restart(rq);
-+	rq->hrtick_csd_pending = 0;
-+	raw_spin_unlock(&rq->lock);
-+}
-+
-+/*
-+ * Called to set the hrtick timer state.
-+ *
-+ * called with rq->lock held and irqs disabled
-+ */
-+void hrtick_start(struct rq *rq, u64 delay)
-+{
-+	struct hrtimer *timer = &rq->hrtick_timer;
-+	ktime_t time;
-+	s64 delta;
-+
-+	/*
-+	 * Don't schedule slices shorter than 10000ns, that just
-+	 * doesn't make sense and can cause timer DoS.
-+	 */
-+	delta = max_t(s64, delay, 10000LL);
-+	time = ktime_add_ns(timer->base->get_time(), delta);
-+
-+	hrtimer_set_expires(timer, time);
-+
-+	if (rq == this_rq()) {
-+		__hrtick_restart(rq);
-+	} else if (!rq->hrtick_csd_pending) {
-+		smp_call_function_single_async(cpu_of(rq), &rq->hrtick_csd);
-+		rq->hrtick_csd_pending = 1;
-+	}
-+}
-+
-+#else
-+/*
-+ * Called to set the hrtick timer state.
-+ *
-+ * called with rq->lock held and irqs disabled
-+ */
-+void hrtick_start(struct rq *rq, u64 delay)
-+{
-+	/*
-+	 * Don't schedule slices shorter than 10000ns, that just
-+	 * doesn't make sense. Rely on vruntime for fairness.
-+	 */
-+	delay = max_t(u64, delay, 10000LL);
-+	hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay),
-+		      HRTIMER_MODE_REL_PINNED_HARD);
-+}
-+#endif /* CONFIG_SMP */
-+
-+static void hrtick_rq_init(struct rq *rq)
-+{
-+#ifdef CONFIG_SMP
-+	rq->hrtick_csd_pending = 0;
-+
-+	rq->hrtick_csd.flags = 0;
-+	rq->hrtick_csd.func = __hrtick_start;
-+	rq->hrtick_csd.info = rq;
-+#endif
-+
-+	hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
-+	rq->hrtick_timer.function = hrtick;
-+}
-+
-+static inline int rq_dither(struct rq *rq)
-+{
-+	if ((rq->clock - rq->last_tick > HALF_JIFFY_NS) || hrtick_enabled(rq))
-+		return 0;
-+
-+	return HALF_JIFFY_NS;
-+}
-+
-+#else	/* CONFIG_SCHED_HRTICK */
-+static inline int hrtick_enabled(struct rq *rq)
-+{
-+	return 0;
-+}
-+
-+static inline void hrtick_clear(struct rq *rq)
-+{
-+}
-+
-+static inline void hrtick_rq_init(struct rq *rq)
-+{
-+}
-+
-+static inline int rq_dither(struct rq *rq)
-+{
-+	return (rq->clock - rq->last_tick > HALF_JIFFY_NS)? 0:HALF_JIFFY_NS;
-+}
-+#endif	/* CONFIG_SCHED_HRTICK */
-+
-+static inline int normal_prio(struct task_struct *p)
-+{
-+	static const int policy_to_prio[] = {
-+		NORMAL_PRIO,	/* SCHED_NORMAL */
-+		0,		/* SCHED_FIFO */
-+		0,		/* SCHED_RR */
-+		IDLE_PRIO,	/* SCHED_BATCH */
-+		ISO_PRIO,	/* SCHED_ISO */
-+		IDLE_PRIO	/* SCHED_IDLE */
-+	};
-+
-+	if (task_has_rt_policy(p))
-+		return MAX_RT_PRIO - 1 - p->rt_priority;
-+	return policy_to_prio[p->policy];
-+}
-+
-+/*
-+ * Calculate the current priority, i.e. the priority
-+ * taken into account by the scheduler. This value might
-+ * be boosted by RT tasks as it will be RT if the task got
-+ * RT-boosted. If not then it returns p->normal_prio.
-+ */
-+static int effective_prio(struct task_struct *p)
-+{
-+	p->normal_prio = normal_prio(p);
-+	/*
-+	 * If we are RT tasks or we were boosted to RT priority,
-+	 * keep the priority unchanged. Otherwise, update priority
-+	 * to the normal priority:
-+	 */
-+	if (!rt_prio(p->prio))
-+		return p->normal_prio;
-+	return p->prio;
-+}
-+
-+/*
-+ * activate_task - move a task to the runqueue.
-+ *
-+ * Context: rq->lock
-+ */
-+static void activate_task(struct task_struct *p, struct rq *rq)
-+{
-+	if (task_contributes_to_load(p))
-+		rq->nr_uninterruptible--;
-+	enqueue_task(p, rq, ENQUEUE_WAKEUP);
-+	p->on_rq = 1;
-+	cpufreq_update_this_cpu(rq, 0);
-+}
-+
-+/*
-+ * deactivate_task - remove a task from the runqueue.
-+ *
-+ * Context: rq->lock
-+ */
-+static inline void deactivate_task(struct task_struct *p, struct rq *rq)
-+{
-+	if (task_contributes_to_load(p))
-+		rq->nr_uninterruptible++;
-+	dequeue_task(p, rq, DEQUEUE_SLEEP);
-+	p->on_rq = 0;
-+	cpufreq_update_this_cpu(rq, 0);
-+}
-+
-+static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
-+{
-+#ifdef CONFIG_SMP
-+	/*
-+	 * After ->cpu is set up to a new value, task_access_lock(p, ...) can be
-+	 * successfully executed on another CPU. We must ensure that updates of
-+	 * per-task data have been completed by this moment.
-+	 */
-+	smp_wmb();
-+
-+#ifdef CONFIG_THREAD_INFO_IN_TASK
-+	WRITE_ONCE(p->cpu, cpu);
-+#else
-+	WRITE_ONCE(task_thread_info(p)->cpu, cpu);
-+#endif
-+#endif
-+}
-+
-+#ifdef CONFIG_SMP
-+void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
-+{
-+#ifdef CONFIG_SCHED_DEBUG
-+	/*
-+	 * We should never call set_task_cpu() on a blocked task,
-+	 * ttwu() will sort out the placement.
-+	 */
-+	WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING &&
-+		     !p->on_rq);
-+#ifdef CONFIG_LOCKDEP
-+	/*
-+	 * The caller should hold either p->pi_lock or rq->lock, when changing
-+	 * a task's CPU. ->pi_lock for waking tasks, rq->lock for runnable tasks.
-+	 *
-+	 * sched_move_task() holds both and thus holding either pins the cgroup,
-+	 * see task_group().
-+	 */
-+	WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) ||
-+				      lockdep_is_held(&task_rq(p)->lock)));
-+#endif
-+	/*
-+	 * Clearly, migrating tasks to offline CPUs is a fairly daft thing.
-+	 */
-+	WARN_ON_ONCE(!cpu_online(new_cpu));
-+#endif
-+	if (task_cpu(p) == new_cpu)
-+		return;
-+	trace_sched_migrate_task(p, new_cpu);
-+	rseq_migrate(p);
-+	perf_event_task_migrate(p);
-+
-+	__set_task_cpu(p, new_cpu);
-+}
-+
-+static inline bool is_per_cpu_kthread(struct task_struct *p)
-+{
-+	return ((p->flags & PF_KTHREAD) && (1 == p->nr_cpus_allowed));
-+}
-+
-+/*
-+ * Per-CPU kthreads are allowed to run on !active && online CPUs, see
-+ * __set_cpus_allowed_ptr() and select_fallback_rq().
-+ */
-+static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
-+{
-+	if (!cpumask_test_cpu(cpu, &p->cpus_mask))
-+		return false;
-+
-+	if (is_per_cpu_kthread(p))
-+		return cpu_online(cpu);
-+
-+	return cpu_active(cpu);
-+}
-+
-+/*
-+ * This is how migration works:
-+ *
-+ * 1) we invoke migration_cpu_stop() on the target CPU using
-+ *    stop_one_cpu().
-+ * 2) stopper starts to run (implicitly forcing the migrated thread
-+ *    off the CPU)
-+ * 3) it checks whether the migrated task is still in the wrong runqueue.
-+ * 4) if it's in the wrong runqueue then the migration thread removes
-+ *    it and puts it into the right queue.
-+ * 5) stopper completes and stop_one_cpu() returns and the migration
-+ *    is done.
-+ */
-+
-+/*
-+ * detach_task() -- detach the task for the migration specified in @target_cpu
-+ */
-+static void detach_task(struct rq *rq, struct task_struct *p, int target_cpu)
-+{
-+	lockdep_assert_held(&rq->lock);
-+
-+	WRITE_ONCE(p->on_rq ,TASK_ON_RQ_MIGRATING);
-+	if (task_contributes_to_load(p))
-+		rq->nr_uninterruptible++;
-+	dequeue_task(p, rq, 0);
-+
-+	set_task_cpu(p, target_cpu);
-+}
-+
-+/*
-+ * attach_task() -- attach the task detached by detach_task() to its new rq.
-+ */
-+static void attach_task(struct rq *rq, struct task_struct *p)
-+{
-+	lockdep_assert_held(&rq->lock);
-+
-+	BUG_ON(task_rq(p) != rq);
-+
-+	if (task_contributes_to_load(p))
-+		rq->nr_uninterruptible--;
-+	enqueue_task(p, rq, 0);
-+	p->on_rq = TASK_ON_RQ_QUEUED;
-+	cpufreq_update_this_cpu(rq, 0);
-+}
-+
-+/*
-+ * move_queued_task - move a queued task to new rq.
-+ *
-+ * Returns (locked) new rq. Old rq's lock is released.
-+ */
-+static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int
-+				   new_cpu)
-+{
-+	detach_task(rq, p, new_cpu);
-+	raw_spin_unlock(&rq->lock);
-+
-+	rq = cpu_rq(new_cpu);
-+
-+	raw_spin_lock(&rq->lock);
-+	update_rq_clock(rq);
-+
-+	attach_task(rq, p);
-+
-+	check_preempt_curr(rq, p);
-+
-+	return rq;
-+}
-+
-+struct migration_arg {
-+	struct task_struct *task;
-+	int dest_cpu;
-+};
-+
-+/*
-+ * Move (not current) task off this CPU, onto the destination CPU. We're doing
-+ * this because either it can't run here any more (set_cpus_allowed()
-+ * away from this CPU, or CPU going down), or because we're
-+ * attempting to rebalance this task on exec (sched_exec).
-+ *
-+ * So we race with normal scheduler movements, but that's OK, as long
-+ * as the task is no longer on this CPU.
-+ */
-+static struct rq *__migrate_task(struct rq *rq, struct task_struct *p, int
-+				 dest_cpu)
-+{
-+	/* Affinity changed (again). */
-+	if (!is_cpu_allowed(p, dest_cpu))
-+		return rq;
-+
-+	update_rq_clock(rq);
-+	return move_queued_task(rq, p, dest_cpu);
-+}
-+
-+/*
-+ * migration_cpu_stop - this will be executed by a highprio stopper thread
-+ * and performs thread migration by bumping thread off CPU then
-+ * 'pushing' onto another runqueue.
-+ */
-+static int migration_cpu_stop(void *data)
-+{
-+	struct migration_arg *arg = data;
-+	struct task_struct *p = arg->task;
-+	struct rq *rq = this_rq();
-+
-+	/*
-+	 * The original target CPU might have gone down and we might
-+	 * be on another CPU but it doesn't matter.
-+	 */
-+	local_irq_disable();
-+
-+	raw_spin_lock(&p->pi_lock);
-+	raw_spin_lock(&rq->lock);
-+	/*
-+	 * If task_rq(p) != rq, it cannot be migrated here, because we're
-+	 * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because
-+	 * we're holding p->pi_lock.
-+	 */
-+	if (task_rq(p) == rq)
-+		if (task_on_rq_queued(p))
-+			rq = __migrate_task(rq, p, arg->dest_cpu);
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock(&p->pi_lock);
-+
-+	local_irq_enable();
-+	return 0;
-+}
-+
-+static inline void
-+set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	cpumask_copy(&p->cpus_mask, new_mask);
-+	p->nr_cpus_allowed = cpumask_weight(new_mask);
-+}
-+
-+void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	set_cpus_allowed_common(p, new_mask);
-+}
-+#endif
-+
-+/* Enter with rq lock held. We know p is on the local CPU */
-+static inline void __set_tsk_resched(struct task_struct *p)
-+{
-+	set_tsk_need_resched(p);
-+	set_preempt_need_resched();
-+}
-+
-+/**
-+ * task_curr - is this task currently executing on a CPU?
-+ * @p: the task in question.
-+ *
-+ * Return: 1 if the task is currently executing. 0 otherwise.
-+ */
-+inline int task_curr(const struct task_struct *p)
-+{
-+	return cpu_curr(task_cpu(p)) == p;
-+}
-+
-+#ifdef CONFIG_SMP
-+/*
-+ * wait_task_inactive - wait for a thread to unschedule.
-+ *
-+ * If @match_state is nonzero, it's the @p->state value just checked and
-+ * not expected to change.  If it changes, i.e. @p might have woken up,
-+ * then return zero.  When we succeed in waiting for @p to be off its CPU,
-+ * we return a positive number (its total switch count).  If a second call
-+ * a short while later returns the same number, the caller can be sure that
-+ * @p has remained unscheduled the whole time.
-+ *
-+ * The caller must ensure that the task *will* unschedule sometime soon,
-+ * else this function might spin for a *long* time. This function can't
-+ * be called with interrupts off, or it may introduce deadlock with
-+ * smp_call_function() if an IPI is sent by the same process we are
-+ * waiting to become inactive.
-+ */
-+unsigned long wait_task_inactive(struct task_struct *p, long match_state)
-+{
-+	unsigned long flags;
-+	bool running, on_rq;
-+	unsigned long ncsw;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+
-+	for (;;) {
-+		rq = task_rq(p);
-+
-+		/*
-+		 * If the task is actively running on another CPU
-+		 * still, just relax and busy-wait without holding
-+		 * any locks.
-+		 *
-+		 * NOTE! Since we don't hold any locks, it's not
-+		 * even sure that "rq" stays as the right runqueue!
-+		 * But we don't care, since this will return false
-+		 * if the runqueue has changed and p is actually now
-+		 * running somewhere else!
-+		 */
-+		while (task_running(p) && p == rq->curr) {
-+			if (match_state && unlikely(p->state != match_state))
-+				return 0;
-+			cpu_relax();
-+		}
-+
-+		/*
-+		 * Ok, time to look more closely! We need the rq
-+		 * lock now, to be *sure*. If we're wrong, we'll
-+		 * just go back and repeat.
-+		 */
-+		task_access_lock_irqsave(p, &lock, &flags);
-+		trace_sched_wait_task(p);
-+		running = task_running(p);
-+		on_rq = p->on_rq;
-+		ncsw = 0;
-+		if (!match_state || p->state == match_state)
-+			ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
-+		task_access_unlock_irqrestore(p, lock, &flags);
-+
-+		/*
-+		 * If it changed from the expected state, bail out now.
-+		 */
-+		if (unlikely(!ncsw))
-+			break;
-+
-+		/*
-+		 * Was it really running after all now that we
-+		 * checked with the proper locks actually held?
-+		 *
-+		 * Oops. Go back and try again..
-+		 */
-+		if (unlikely(running)) {
-+			cpu_relax();
-+			continue;
-+		}
-+
-+		/*
-+		 * It's not enough that it's not actively running,
-+		 * it must be off the runqueue _entirely_, and not
-+		 * preempted!
-+		 *
-+		 * So if it was still runnable (but just not actively
-+		 * running right now), it's preempted, and we should
-+		 * yield - it could be a while.
-+		 */
-+		if (unlikely(on_rq)) {
-+			ktime_t to = NSEC_PER_SEC / HZ;
-+
-+			set_current_state(TASK_UNINTERRUPTIBLE);
-+			schedule_hrtimeout(&to, HRTIMER_MODE_REL);
-+			continue;
-+		}
-+
-+		/*
-+		 * Ahh, all good. It wasn't running, and it wasn't
-+		 * runnable, which means that it will never become
-+		 * running in the future either. We're all done!
-+		 */
-+		break;
-+	}
-+
-+	return ncsw;
-+}
-+
-+/***
-+ * kick_process - kick a running thread to enter/exit the kernel
-+ * @p: the to-be-kicked thread
-+ *
-+ * Cause a process which is running on another CPU to enter
-+ * kernel-mode, without any delay. (to get signals handled.)
-+ *
-+ * NOTE: this function doesn't have to take the runqueue lock,
-+ * because all it wants to ensure is that the remote task enters
-+ * the kernel. If the IPI races and the task has been migrated
-+ * to another CPU then no harm is done and the purpose has been
-+ * achieved as well.
-+ */
-+void kick_process(struct task_struct *p)
-+{
-+	int cpu;
-+
-+	preempt_disable();
-+	cpu = task_cpu(p);
-+	if ((cpu != smp_processor_id()) && task_curr(p))
-+		smp_send_reschedule(cpu);
-+	preempt_enable();
-+}
-+EXPORT_SYMBOL_GPL(kick_process);
-+
-+/*
-+ * ->cpus_mask is protected by both rq->lock and p->pi_lock
-+ *
-+ * A few notes on cpu_active vs cpu_online:
-+ *
-+ *  - cpu_active must be a subset of cpu_online
-+ *
-+ *  - on CPU-up we allow per-CPU kthreads on the online && !active CPU,
-+ *    see __set_cpus_allowed_ptr(). At this point the newly online
-+ *    CPU isn't yet part of the sched domains, and balancing will not
-+ *    see it.
-+ *
-+ *  - on cpu-down we clear cpu_active() to mask the sched domains and
-+ *    avoid the load balancer to place new tasks on the to be removed
-+ *    CPU. Existing tasks will remain running there and will be taken
-+ *    off.
-+ *
-+ * This means that fallback selection must not select !active CPUs.
-+ * And can assume that any active CPU must be online. Conversely
-+ * select_task_rq() below may allow selection of !active CPUs in order
-+ * to satisfy the above rules.
-+ */
-+static int select_fallback_rq(int cpu, struct task_struct *p)
-+{
-+	int nid = cpu_to_node(cpu);
-+	const struct cpumask *nodemask = NULL;
-+	enum { cpuset, possible, fail } state = cpuset;
-+	int dest_cpu;
-+
-+	/*
-+	 * If the node that the CPU is on has been offlined, cpu_to_node()
-+	 * will return -1. There is no CPU on the node, and we should
-+	 * select the CPU on the other node.
-+	 */
-+	if (nid != -1) {
-+		nodemask = cpumask_of_node(nid);
-+
-+		/* Look for allowed, online CPU in same node. */
-+		for_each_cpu(dest_cpu, nodemask) {
-+			if (!cpu_active(dest_cpu))
-+				continue;
-+			if (cpumask_test_cpu(dest_cpu, &p->cpus_mask))
-+				return dest_cpu;
-+		}
-+	}
-+
-+	for (;;) {
-+		/* Any allowed, online CPU? */
-+		for_each_cpu(dest_cpu, &p->cpus_mask) {
-+			if (!is_cpu_allowed(p, dest_cpu))
-+				continue;
-+			goto out;
-+		}
-+
-+		/* No more Mr. Nice Guy. */
-+		switch (state) {
-+		case cpuset:
-+			if (IS_ENABLED(CONFIG_CPUSETS)) {
-+				cpuset_cpus_allowed_fallback(p);
-+				state = possible;
-+				break;
-+			}
-+			/* Fall-through */
-+		case possible:
-+			do_set_cpus_allowed(p, cpu_possible_mask);
-+			state = fail;
-+			break;
-+
-+		case fail:
-+			BUG();
-+			break;
-+		}
-+	}
-+
-+out:
-+	if (state != cpuset) {
-+		/*
-+		 * Don't tell them about moving exiting tasks or
-+		 * kernel threads (both mm NULL), since they never
-+		 * leave kernel.
-+		 */
-+		if (p->mm && printk_ratelimit()) {
-+			printk_deferred("process %d (%s) no longer affine to cpu%d\n",
-+					task_pid_nr(p), p->comm, cpu);
-+		}
-+	}
-+
-+	return dest_cpu;
-+}
-+
-+static inline int best_mask_cpu(int cpu, const cpumask_t *cpumask)
-+{
-+	cpumask_t *mask;
-+
-+	if (cpumask_test_cpu(cpu, cpumask))
-+		return cpu;
-+
-+	mask = &(per_cpu(sched_cpu_affinity_chk_masks, cpu)[0]);
-+	while ((cpu = cpumask_any_and(cpumask, mask)) >= nr_cpu_ids)
-+		mask++;
-+
-+	return cpu;
-+}
-+
-+/*
-+ * task_preemptible_rq - return the rq which the given task can preempt on
-+ * @p: task wants to preempt CPU
-+ * @only_preempt_low_policy: indicate only preempt rq running low policy than @p
-+ */
-+static inline int
-+task_preemptible_rq_idle(struct task_struct *p, cpumask_t *chk_mask)
-+{
-+	cpumask_t tmp;
-+
-+#ifdef CONFIG_SCHED_SMT
-+	if (cpumask_and(&tmp, chk_mask, &sched_cpu_sg_idle_mask))
-+		return best_mask_cpu(task_cpu(p), &tmp);
-+#endif
-+
-+#ifdef CONFIG_SMT_NICE
-+	/* Only ttwu on cpu which is not smt supressed */
-+	if (cpumask_andnot(&tmp, chk_mask, &sched_smt_supressed_mask)) {
-+		cpumask_t t;
-+		if (cpumask_and(&t, &tmp, &sched_rq_queued_masks[SCHED_RQ_EMPTY]))
-+			return best_mask_cpu(task_cpu(p), &t);
-+		return best_mask_cpu(task_cpu(p), &tmp);
-+	}
-+#endif
-+
-+	if (cpumask_and(&tmp, chk_mask, &sched_rq_queued_masks[SCHED_RQ_EMPTY]))
-+		return best_mask_cpu(task_cpu(p), &tmp);
-+	return best_mask_cpu(task_cpu(p), chk_mask);
-+}
-+
-+static inline int
-+task_preemptible_rq(struct task_struct *p, cpumask_t *chk_mask,
-+		    int preempt_level)
-+{
-+	cpumask_t tmp;
-+	int level;
-+
-+#ifdef CONFIG_SCHED_SMT
-+#ifdef CONFIG_SMT_NICE
-+	if (cpumask_and(&tmp, chk_mask, &sched_cpu_psg_mask))
-+		return best_mask_cpu(task_cpu(p), &tmp);
-+#else
-+	if (cpumask_and(&tmp, chk_mask, &sched_cpu_sg_idle_mask))
-+		return best_mask_cpu(task_cpu(p), &tmp);
-+#endif
-+#endif
-+
-+	level = find_first_bit(sched_rq_queued_masks_bitmap,
-+			       NR_SCHED_RQ_QUEUED_LEVEL);
-+
-+	while (level < preempt_level) {
-+		if (cpumask_and(&tmp, chk_mask, &sched_rq_queued_masks[level]))
-+			return best_mask_cpu(task_cpu(p), &tmp);
-+
-+		level = find_next_bit(sched_rq_queued_masks_bitmap,
-+				      NR_SCHED_RQ_QUEUED_LEVEL,
-+				      level + 1);
-+	}
-+
-+	if (unlikely(SCHED_RQ_RT == level &&
-+		     level == preempt_level &&
-+		     cpumask_and(&tmp, chk_mask,
-+				 &sched_rq_queued_masks[SCHED_RQ_RT]))) {
-+		unsigned int cpu;
-+
-+		for_each_cpu (cpu, &tmp)
-+			if (p->prio < sched_rq_prio[cpu])
-+				return cpu;
-+	}
-+
-+	return best_mask_cpu(task_cpu(p), chk_mask);
-+}
-+
-+/*
-+ * wake flags
-+ */
-+#define WF_SYNC		0x01		/* waker goes to sleep after wakeup */
-+#define WF_FORK		0x02		/* child wakeup after fork */
-+#define WF_MIGRATED	0x04		/* internal use, task got migrated */
-+
-+static inline int select_task_rq(struct task_struct *p)
-+{
-+	cpumask_t chk_mask;
-+
-+	if (unlikely(!cpumask_and(&chk_mask, &p->cpus_mask, cpu_online_mask)))
-+		return select_fallback_rq(task_cpu(p), p);
-+
-+	/* Check IDLE tasks suitable to run normal priority */
-+	if (idleprio_task(p)) {
-+		if (idleprio_suitable(p)) {
-+			p->prio = p->normal_prio;
-+			update_task_priodl(p);
-+			return task_preemptible_rq_idle(p, &chk_mask);
-+		}
-+		p->prio = NORMAL_PRIO;
-+		update_task_priodl(p);
-+	}
-+
-+	return task_preemptible_rq(p, &chk_mask,
-+				   task_running_policy_level(p, this_rq()));
-+}
-+#else /* CONFIG_SMP */
-+static inline int select_task_rq(struct task_struct *p)
-+{
-+	return 0;
-+}
-+#endif /* CONFIG_SMP */
-+
-+static void
-+ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
-+{
-+	struct rq *rq;
-+
-+	if (!schedstat_enabled())
-+		return;
-+
-+	rq= this_rq();
-+
-+#ifdef CONFIG_SMP
-+	if (cpu == rq->cpu)
-+		__schedstat_inc(rq->ttwu_local);
-+	else {
-+		/** PDS ToDo:
-+		 * How to do ttwu_wake_remote
-+		 */
-+	}
-+#endif /* CONFIG_SMP */
-+
-+	__schedstat_inc(rq->ttwu_count);
-+}
-+
-+/*
-+ * Mark the task runnable and perform wakeup-preemption.
-+ */
-+static inline void
-+ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
-+{
-+	p->state = TASK_RUNNING;
-+	trace_sched_wakeup(p);
-+}
-+
-+static inline void
-+ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags)
-+{
-+#ifdef CONFIG_SMP
-+	if (p->sched_contributes_to_load)
-+		rq->nr_uninterruptible--;
-+#endif
-+
-+	activate_task(p, rq);
-+	ttwu_do_wakeup(rq, p, 0);
-+}
-+
-+static int ttwu_remote(struct task_struct *p, int wake_flags)
-+{
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+	int ret = 0;
-+
-+	rq = __task_access_lock(p, &lock);
-+	if (task_on_rq_queued(p)) {
-+		ttwu_do_wakeup(rq, p, wake_flags);
-+		ret = 1;
-+	}
-+	__task_access_unlock(p, lock);
-+
-+	return ret;
-+}
-+
-+/*
-+ * Notes on Program-Order guarantees on SMP systems.
-+ *
-+ *  MIGRATION
-+ *
-+ * The basic program-order guarantee on SMP systems is that when a task [t]
-+ * migrates, all its activity on its old CPU [c0] happens-before any subsequent
-+ * execution on its new CPU [c1].
-+ *
-+ * For migration (of runnable tasks) this is provided by the following means:
-+ *
-+ *  A) UNLOCK of the rq(c0)->lock scheduling out task t
-+ *  B) migration for t is required to synchronize *both* rq(c0)->lock and
-+ *     rq(c1)->lock (if not at the same time, then in that order).
-+ *  C) LOCK of the rq(c1)->lock scheduling in task
-+ *
-+ * Transitivity guarantees that B happens after A and C after B.
-+ * Note: we only require RCpc transitivity.
-+ * Note: the CPU doing B need not be c0 or c1
-+ *
-+ * Example:
-+ *
-+ *   CPU0            CPU1            CPU2
-+ *
-+ *   LOCK rq(0)->lock
-+ *   sched-out X
-+ *   sched-in Y
-+ *   UNLOCK rq(0)->lock
-+ *
-+ *                                   LOCK rq(0)->lock // orders against CPU0
-+ *                                   dequeue X
-+ *                                   UNLOCK rq(0)->lock
-+ *
-+ *                                   LOCK rq(1)->lock
-+ *                                   enqueue X
-+ *                                   UNLOCK rq(1)->lock
-+ *
-+ *                   LOCK rq(1)->lock // orders against CPU2
-+ *                   sched-out Z
-+ *                   sched-in X
-+ *                   UNLOCK rq(1)->lock
-+ *
-+ *
-+ *  BLOCKING -- aka. SLEEP + WAKEUP
-+ *
-+ * For blocking we (obviously) need to provide the same guarantee as for
-+ * migration. However the means are completely different as there is no lock
-+ * chain to provide order. Instead we do:
-+ *
-+ *   1) smp_store_release(X->on_cpu, 0)
-+ *   2) smp_cond_load_acquire(!X->on_cpu)
-+ *
-+ * Example:
-+ *
-+ *   CPU0 (schedule)  CPU1 (try_to_wake_up) CPU2 (schedule)
-+ *
-+ *   LOCK rq(0)->lock LOCK X->pi_lock
-+ *   dequeue X
-+ *   sched-out X
-+ *   smp_store_release(X->on_cpu, 0);
-+ *
-+ *                    smp_cond_load_acquire(&X->on_cpu, !VAL);
-+ *                    X->state = WAKING
-+ *                    set_task_cpu(X,2)
-+ *
-+ *                    LOCK rq(2)->lock
-+ *                    enqueue X
-+ *                    X->state = RUNNING
-+ *                    UNLOCK rq(2)->lock
-+ *
-+ *                                          LOCK rq(2)->lock // orders against CPU1
-+ *                                          sched-out Z
-+ *                                          sched-in X
-+ *                                          UNLOCK rq(2)->lock
-+ *
-+ *                    UNLOCK X->pi_lock
-+ *   UNLOCK rq(0)->lock
-+ *
-+ *
-+ * However; for wakeups there is a second guarantee we must provide, namely we
-+ * must observe the state that lead to our wakeup. That is, not only must our
-+ * task observe its own prior state, it must also observe the stores prior to
-+ * its wakeup.
-+ *
-+ * This means that any means of doing remote wakeups must order the CPU doing
-+ * the wakeup against the CPU the task is going to end up running on. This,
-+ * however, is already required for the regular Program-Order guarantee above,
-+ * since the waking CPU is the one issueing the ACQUIRE (smp_cond_load_acquire).
-+ *
-+ */
-+
-+/***
-+ * try_to_wake_up - wake up a thread
-+ * @p: the thread to be awakened
-+ * @state: the mask of task states that can be woken
-+ * @wake_flags: wake modifier flags (WF_*)
-+ *
-+ * Put it on the run-queue if it's not already there. The "current"
-+ * thread is always on the run-queue (except when the actual
-+ * re-schedule is in progress), and as such you're allowed to do
-+ * the simpler "current->state = TASK_RUNNING" to mark yourself
-+ * runnable without the overhead of this.
-+ *
-+ * Return: %true if @p was woken up, %false if it was already running.
-+ * or @state didn't match @p's state.
-+ */
-+static int try_to_wake_up(struct task_struct *p, unsigned int state,
-+			  int wake_flags)
-+{
-+	unsigned long flags;
-+	struct rq *rq;
-+	int cpu, success = 0;
-+
-+	/*
-+	 * If we are going to wake up a thread waiting for CONDITION we
-+	 * need to ensure that CONDITION=1 done by the caller can not be
-+	 * reordered with p->state check below. This pairs with mb() in
-+	 * set_current_state() the waiting thread does.
-+	 */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	smp_mb__after_spinlock();
-+	if (!(p->state & state))
-+		goto out;
-+
-+	trace_sched_waking(p);
-+
-+	/* We're going to change ->state: */
-+	success = 1;
-+	cpu = task_cpu(p);
-+
-+	/*
-+	 * Ensure we load p->on_rq _after_ p->state, otherwise it would
-+	 * be possible to, falsely, observe p->on_rq == 0 and get stuck
-+	 * in smp_cond_load_acquire() below.
-+	 *
-+	 * sched_ttwu_pending()			try_to_wake_up()
-+	 *   STORE p->on_rq = 1			  LOAD p->state
-+	 *   UNLOCK rq->lock
-+	 *
-+	 * __schedule() (switch to task 'p')
-+	 *   LOCK rq->lock			  smp_rmb();
-+	 *   smp_mb__after_spinlock();
-+	 *   UNLOCK rq->lock
-+	 *
-+	 * [task p]
-+	 *   STORE p->state = UNINTERRUPTIBLE	  LOAD p->on_rq
-+	 *
-+	 * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
-+	 * __schedule().  See the comment for smp_mb__after_spinlock().
-+	 */
-+	smp_rmb();
-+	if (p->on_rq && ttwu_remote(p, wake_flags))
-+		goto stat;
-+
-+#ifdef CONFIG_SMP
-+	/*
-+	 * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
-+	 * possible to, falsely, observe p->on_cpu == 0.
-+	 *
-+	 * One must be running (->on_cpu == 1) in order to remove oneself
-+	 * from the runqueue.
-+	 *
-+	 * __schedule() (switch to task 'p')	try_to_wake_up()
-+	 *   STORE p->on_cpu = 1		  LOAD p->on_rq
-+	 *   UNLOCK rq->lock
-+	 *
-+	 * __schedule() (put 'p' to sleep)
-+	 *   LOCK rq->lock			  smp_rmb();
-+	 *   smp_mb__after_spinlock();
-+	 *   STORE p->on_rq = 0			  LOAD p->on_cpu
-+	 *
-+	 * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
-+	 * __schedule().  See the comment for smp_mb__after_spinlock().
-+	 */
-+	smp_rmb();
-+
-+	/*
-+	 * If the owning (remote) CPU is still in the middle of schedule() with
-+	 * this task as prev, wait until its done referencing the task.
-+	 *
-+	 * Pairs with the smp_store_release() in finish_task().
-+	 *
-+	 * This ensures that tasks getting woken will be fully ordered against
-+	 * their previous state and preserve Program Order.
-+	 */
-+	smp_cond_load_acquire(&p->on_cpu, !VAL);
-+
-+	p->sched_contributes_to_load = !!task_contributes_to_load(p);
-+	p->state = TASK_WAKING;
-+
-+	if (p->in_iowait) {
-+		delayacct_blkio_end(p);
-+		atomic_dec(&task_rq(p)->nr_iowait);
-+	}
-+
-+	if (SCHED_ISO == p->policy && ISO_PRIO != p->prio) {
-+		p->prio = ISO_PRIO;
-+		p->deadline = 0UL;
-+		update_task_priodl(p);
-+	}
-+
-+	cpu = select_task_rq(p);
-+
-+	if (cpu != task_cpu(p)) {
-+		wake_flags |= WF_MIGRATED;
-+		psi_ttwu_dequeue(p);
-+		set_task_cpu(p, cpu);
-+	}
-+#else /* CONFIG_SMP */
-+	if (p->in_iowait) {
-+		delayacct_blkio_end(p);
-+		atomic_dec(&task_rq(p)->nr_iowait);
-+	}
-+#endif
-+
-+	rq = cpu_rq(cpu);
-+	raw_spin_lock(&rq->lock);
-+
-+	update_rq_clock(rq);
-+	ttwu_do_activate(rq, p, wake_flags);
-+	check_preempt_curr(rq, p);
-+
-+	raw_spin_unlock(&rq->lock);
-+
-+stat:
-+	ttwu_stat(p, cpu, wake_flags);
-+out:
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+
-+	return success;
-+}
-+
-+/**
-+ * wake_up_process - Wake up a specific process
-+ * @p: The process to be woken up.
-+ *
-+ * Attempt to wake up the nominated process and move it to the set of runnable
-+ * processes.
-+ *
-+ * Return: 1 if the process was woken up, 0 if it was already running.
-+ *
-+ * This function executes a full memory barrier before accessing the task state.
-+ */
-+int wake_up_process(struct task_struct *p)
-+{
-+	return try_to_wake_up(p, TASK_NORMAL, 0);
-+}
-+EXPORT_SYMBOL(wake_up_process);
-+
-+int wake_up_state(struct task_struct *p, unsigned int state)
-+{
-+	return try_to_wake_up(p, state, 0);
-+}
-+
-+/*
-+ * Perform scheduler related setup for a newly forked process p.
-+ * p is forked by current.
-+ */
-+int sched_fork(unsigned long __maybe_unused clone_flags, struct task_struct *p)
-+{
-+	unsigned long flags;
-+	int cpu = get_cpu();
-+	struct rq *rq = this_rq();
-+
-+#ifdef CONFIG_PREEMPT_NOTIFIERS
-+	INIT_HLIST_HEAD(&p->preempt_notifiers);
-+#endif
-+	/* Should be reset in fork.c but done here for ease of PDS patching */
-+	p->on_cpu =
-+	p->on_rq =
-+	p->utime =
-+	p->stime =
-+	p->sched_time = 0;
-+
-+	p->sl_level = pds_skiplist_random_level(p);
-+	INIT_SKIPLIST_NODE(&p->sl_node);
-+
-+#ifdef CONFIG_COMPACTION
-+	p->capture_control = NULL;
-+#endif
-+
-+	/*
-+	 * We mark the process as NEW here. This guarantees that
-+	 * nobody will actually run it, and a signal or other external
-+	 * event cannot wake it up and insert it on the runqueue either.
-+	 */
-+	p->state = TASK_NEW;
-+
-+	/*
-+	 * Make sure we do not leak PI boosting priority to the child.
-+	 */
-+	p->prio = current->normal_prio;
-+
-+	/*
-+	 * Revert to default priority/policy on fork if requested.
-+	 */
-+	if (unlikely(p->sched_reset_on_fork)) {
-+		if (task_has_rt_policy(p)) {
-+			p->policy = SCHED_NORMAL;
-+			p->static_prio = NICE_TO_PRIO(0);
-+			p->rt_priority = 0;
-+		} else if (PRIO_TO_NICE(p->static_prio) < 0)
-+			p->static_prio = NICE_TO_PRIO(0);
-+
-+		p->prio = p->normal_prio = normal_prio(p);
-+
-+		/*
-+		 * We don't need the reset flag anymore after the fork. It has
-+		 * fulfilled its duty:
-+		 */
-+		p->sched_reset_on_fork = 0;
-+	}
-+
-+	/*
-+	 * Share the timeslice between parent and child, thus the
-+	 * total amount of pending timeslices in the system doesn't change,
-+	 * resulting in more scheduling fairness.
-+	 */
-+	raw_spin_lock_irqsave(&rq->lock, flags);
-+	rq->curr->time_slice /= 2;
-+	p->time_slice = rq->curr->time_slice;
-+#ifdef CONFIG_SCHED_HRTICK
-+	hrtick_start(rq, US_TO_NS(rq->curr->time_slice));
-+#endif
-+
-+	if (p->time_slice < RESCHED_US) {
-+		update_rq_clock(rq);
-+		time_slice_expired(p, rq);
-+		resched_curr(rq);
-+	} else
-+		update_task_priodl(p);
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+
-+	/*
-+	 * The child is not yet in the pid-hash so no cgroup attach races,
-+	 * and the cgroup is pinned to this child due to cgroup_fork()
-+	 * is ran before sched_fork().
-+	 *
-+	 * Silence PROVE_RCU.
-+	 */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	/*
-+	 * We're setting the CPU for the first time, we don't migrate,
-+	 * so use __set_task_cpu().
-+	 */
-+	__set_task_cpu(p, cpu);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+
-+#ifdef CONFIG_SCHED_INFO
-+	if (unlikely(sched_info_on()))
-+		memset(&p->sched_info, 0, sizeof(p->sched_info));
-+#endif
-+	init_task_preempt_count(p);
-+
-+	put_cpu();
-+	return 0;
-+}
-+
-+#ifdef CONFIG_SCHEDSTATS
-+
-+DEFINE_STATIC_KEY_FALSE(sched_schedstats);
-+static bool __initdata __sched_schedstats = false;
-+
-+static void set_schedstats(bool enabled)
-+{
-+	if (enabled)
-+		static_branch_enable(&sched_schedstats);
-+	else
-+		static_branch_disable(&sched_schedstats);
-+}
-+
-+void force_schedstat_enabled(void)
-+{
-+	if (!schedstat_enabled()) {
-+		pr_info("kernel profiling enabled schedstats, disable via kernel.sched_schedstats.\n");
-+		static_branch_enable(&sched_schedstats);
-+	}
-+}
-+
-+static int __init setup_schedstats(char *str)
-+{
-+	int ret = 0;
-+	if (!str)
-+		goto out;
-+
-+	/*
-+	 * This code is called before jump labels have been set up, so we can't
-+	 * change the static branch directly just yet.  Instead set a temporary
-+	 * variable so init_schedstats() can do it later.
-+	 */
-+	if (!strcmp(str, "enable")) {
-+		__sched_schedstats = true;
-+		ret = 1;
-+	} else if (!strcmp(str, "disable")) {
-+		__sched_schedstats = false;
-+		ret = 1;
-+	}
-+out:
-+	if (!ret)
-+		pr_warn("Unable to parse schedstats=\n");
-+
-+	return ret;
-+}
-+__setup("schedstats=", setup_schedstats);
-+
-+static void __init init_schedstats(void)
-+{
-+	set_schedstats(__sched_schedstats);
-+}
-+
-+#ifdef CONFIG_PROC_SYSCTL
-+int sysctl_schedstats(struct ctl_table *table, int write,
-+			 void __user *buffer, size_t *lenp, loff_t *ppos)
-+{
-+	struct ctl_table t;
-+	int err;
-+	int state = static_branch_likely(&sched_schedstats);
-+
-+	if (write && !capable(CAP_SYS_ADMIN))
-+		return -EPERM;
-+
-+	t = *table;
-+	t.data = &state;
-+	err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
-+	if (err < 0)
-+		return err;
-+	if (write)
-+		set_schedstats(state);
-+	return err;
-+}
-+#endif /* CONFIG_PROC_SYSCTL */
-+#else  /* !CONFIG_SCHEDSTATS */
-+static inline void init_schedstats(void) {}
-+#endif /* CONFIG_SCHEDSTATS */
-+
-+/*
-+ * wake_up_new_task - wake up a newly created task for the first time.
-+ *
-+ * This function will do some initial scheduler statistics housekeeping
-+ * that must be done for every newly created context, then puts the task
-+ * on the runqueue and wakes it.
-+ */
-+void wake_up_new_task(struct task_struct *p)
-+{
-+	unsigned long flags;
-+	struct rq *rq;
-+
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+
-+	p->state = TASK_RUNNING;
-+
-+	rq = cpu_rq(select_task_rq(p));
-+#ifdef CONFIG_SMP
-+	/*
-+	 * Fork balancing, do it here and not earlier because:
-+	 * - cpus_mask can change in the fork path
-+	 * - any previously selected CPU might disappear through hotplug
-+	 * Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq,
-+	 * as we're not fully set-up yet.
-+	 */
-+	__set_task_cpu(p, cpu_of(rq));
-+#endif
-+
-+	raw_spin_lock(&rq->lock);
-+
-+	update_rq_clock(rq);
-+	activate_task(p, rq);
-+	trace_sched_wakeup_new(p);
-+	check_preempt_curr(rq, p);
-+
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+}
-+
-+#ifdef CONFIG_PREEMPT_NOTIFIERS
-+
-+static DEFINE_STATIC_KEY_FALSE(preempt_notifier_key);
-+
-+void preempt_notifier_inc(void)
-+{
-+	static_branch_inc(&preempt_notifier_key);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_inc);
-+
-+void preempt_notifier_dec(void)
-+{
-+	static_branch_dec(&preempt_notifier_key);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_dec);
-+
-+/**
-+ * preempt_notifier_register - tell me when current is being preempted & rescheduled
-+ * @notifier: notifier struct to register
-+ */
-+void preempt_notifier_register(struct preempt_notifier *notifier)
-+{
-+	if (!static_branch_unlikely(&preempt_notifier_key))
-+		WARN(1, "registering preempt_notifier while notifiers disabled\n");
-+
-+	hlist_add_head(&notifier->link, &current->preempt_notifiers);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_register);
-+
-+/**
-+ * preempt_notifier_unregister - no longer interested in preemption notifications
-+ * @notifier: notifier struct to unregister
-+ *
-+ * This is *not* safe to call from within a preemption notifier.
-+ */
-+void preempt_notifier_unregister(struct preempt_notifier *notifier)
-+{
-+	hlist_del(&notifier->link);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_unregister);
-+
-+static void __fire_sched_in_preempt_notifiers(struct task_struct *curr)
-+{
-+	struct preempt_notifier *notifier;
-+
-+	hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
-+		notifier->ops->sched_in(notifier, raw_smp_processor_id());
-+}
-+
-+static __always_inline void fire_sched_in_preempt_notifiers(struct task_struct *curr)
-+{
-+	if (static_branch_unlikely(&preempt_notifier_key))
-+		__fire_sched_in_preempt_notifiers(curr);
-+}
-+
-+static void
-+__fire_sched_out_preempt_notifiers(struct task_struct *curr,
-+				   struct task_struct *next)
-+{
-+	struct preempt_notifier *notifier;
-+
-+	hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
-+		notifier->ops->sched_out(notifier, next);
-+}
-+
-+static __always_inline void
-+fire_sched_out_preempt_notifiers(struct task_struct *curr,
-+				 struct task_struct *next)
-+{
-+	if (static_branch_unlikely(&preempt_notifier_key))
-+		__fire_sched_out_preempt_notifiers(curr, next);
-+}
-+
-+#else /* !CONFIG_PREEMPT_NOTIFIERS */
-+
-+static inline void fire_sched_in_preempt_notifiers(struct task_struct *curr)
-+{
-+}
-+
-+static inline void
-+fire_sched_out_preempt_notifiers(struct task_struct *curr,
-+				 struct task_struct *next)
-+{
-+}
-+
-+#endif /* CONFIG_PREEMPT_NOTIFIERS */
-+
-+static inline void prepare_task(struct task_struct *next)
-+{
-+	/*
-+	 * Claim the task as running, we do this before switching to it
-+	 * such that any running task will have this set.
-+	 */
-+	next->on_cpu = 1;
-+}
-+
-+static inline void finish_task(struct task_struct *prev)
-+{
-+#ifdef CONFIG_SMP
-+	/*
-+	 * After ->on_cpu is cleared, the task can be moved to a different CPU.
-+	 * We must ensure this doesn't happen until the switch is completely
-+	 * finished.
-+	 *
-+	 * In particular, the load of prev->state in finish_task_switch() must
-+	 * happen before this.
-+	 *
-+	 * Pairs with the smp_cond_load_acquire() in try_to_wake_up().
-+	 */
-+	smp_store_release(&prev->on_cpu, 0);
-+#else
-+	prev->on_cpu = 0;
-+#endif
-+}
-+
-+static inline void
-+prepare_lock_switch(struct rq *rq, struct task_struct *next)
-+{
-+	/*
-+	 * Since the runqueue lock will be released by the next
-+	 * task (which is an invalid locking op but in the case
-+	 * of the scheduler it's an obvious special-case), so we
-+	 * do an early lockdep release here:
-+	 */
-+	spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
-+#ifdef CONFIG_DEBUG_SPINLOCK
-+	/* this is a valid case when another task releases the spinlock */
-+	rq->lock.owner = next;
-+#endif
-+}
-+
-+static inline void finish_lock_switch(struct rq *rq)
-+{
-+	/*
-+	 * If we are tracking spinlock dependencies then we have to
-+	 * fix up the runqueue lock - which gets 'carried over' from
-+	 * prev into current:
-+	 */
-+	spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
-+	raw_spin_unlock_irq(&rq->lock);
-+}
-+
-+/**
-+ * prepare_task_switch - prepare to switch tasks
-+ * @rq: the runqueue preparing to switch
-+ * @next: the task we are going to switch to.
-+ *
-+ * This is called with the rq lock held and interrupts off. It must
-+ * be paired with a subsequent finish_task_switch after the context
-+ * switch.
-+ *
-+ * prepare_task_switch sets up locking and calls architecture specific
-+ * hooks.
-+ */
-+static inline void
-+prepare_task_switch(struct rq *rq, struct task_struct *prev,
-+		    struct task_struct *next)
-+{
-+	kcov_prepare_switch(prev);
-+	sched_info_switch(rq, prev, next);
-+	perf_event_task_sched_out(prev, next);
-+	rseq_preempt(prev);
-+	fire_sched_out_preempt_notifiers(prev, next);
-+	prepare_task(next);
-+	prepare_arch_switch(next);
-+}
-+
-+/**
-+ * finish_task_switch - clean up after a task-switch
-+ * @rq: runqueue associated with task-switch
-+ * @prev: the thread we just switched away from.
-+ *
-+ * finish_task_switch must be called after the context switch, paired
-+ * with a prepare_task_switch call before the context switch.
-+ * finish_task_switch will reconcile locking set up by prepare_task_switch,
-+ * and do any other architecture-specific cleanup actions.
-+ *
-+ * Note that we may have delayed dropping an mm in context_switch(). If
-+ * so, we finish that here outside of the runqueue lock.  (Doing it
-+ * with the lock held can cause deadlocks; see schedule() for
-+ * details.)
-+ *
-+ * The context switch have flipped the stack from under us and restored the
-+ * local variables which were saved when this task called schedule() in the
-+ * past. prev == current is still correct but we need to recalculate this_rq
-+ * because prev may have moved to another CPU.
-+ */
-+static struct rq *finish_task_switch(struct task_struct *prev)
-+	__releases(rq->lock)
-+{
-+	struct rq *rq = this_rq();
-+	struct mm_struct *mm = rq->prev_mm;
-+	long prev_state;
-+
-+	/*
-+	 * The previous task will have left us with a preempt_count of 2
-+	 * because it left us after:
-+	 *
-+	 *	schedule()
-+	 *	  preempt_disable();			// 1
-+	 *	  __schedule()
-+	 *	    raw_spin_lock_irq(&rq->lock)	// 2
-+	 *
-+	 * Also, see FORK_PREEMPT_COUNT.
-+	 */
-+	if (WARN_ONCE(preempt_count() != 2*PREEMPT_DISABLE_OFFSET,
-+		      "corrupted preempt_count: %s/%d/0x%x\n",
-+		      current->comm, current->pid, preempt_count()))
-+		preempt_count_set(FORK_PREEMPT_COUNT);
-+
-+	rq->prev_mm = NULL;
-+
-+	/*
-+	 * A task struct has one reference for the use as "current".
-+	 * If a task dies, then it sets TASK_DEAD in tsk->state and calls
-+	 * schedule one last time. The schedule call will never return, and
-+	 * the scheduled task must drop that reference.
-+	 *
-+	 * We must observe prev->state before clearing prev->on_cpu (in
-+	 * finish_task), otherwise a concurrent wakeup can get prev
-+	 * running on another CPU and we could rave with its RUNNING -> DEAD
-+	 * transition, resulting in a double drop.
-+	 */
-+	prev_state = prev->state;
-+	vtime_task_switch(prev);
-+	perf_event_task_sched_in(prev, current);
-+	finish_task(prev);
-+	finish_lock_switch(rq);
-+	finish_arch_post_lock_switch();
-+	kcov_finish_switch(current);
-+
-+	fire_sched_in_preempt_notifiers(current);
-+	/*
-+	 * When switching through a kernel thread, the loop in
-+	 * membarrier_{private,global}_expedited() may have observed that
-+	 * kernel thread and not issued an IPI. It is therefore possible to
-+	 * schedule between user->kernel->user threads without passing though
-+	 * switch_mm(). Membarrier requires a barrier after storing to
-+	 * rq->curr, before returning to userspace, so provide them here:
-+	 *
-+	 * - a full memory barrier for {PRIVATE,GLOBAL}_EXPEDITED, implicitly
-+	 *   provided by mmdrop(),
-+	 * - a sync_core for SYNC_CORE.
-+	 */
-+	if (mm) {
-+		membarrier_mm_sync_core_before_usermode(mm);
-+		mmdrop(mm);
-+	}
-+	if (unlikely(prev_state == TASK_DEAD)) {
-+		/*
-+		 * Remove function-return probe instances associated with this
-+		 * task and put them back on the free list.
-+		 */
-+		kprobe_flush_task(prev);
-+
-+		/* Task is done with its stack. */
-+		put_task_stack(prev);
-+
-+		put_task_struct_rcu_user(prev);
-+	}
-+
-+	tick_nohz_task_switch();
-+	return rq;
-+}
-+
-+/**
-+ * schedule_tail - first thing a freshly forked thread must call.
-+ * @prev: the thread we just switched away from.
-+ */
-+asmlinkage __visible void schedule_tail(struct task_struct *prev)
-+	__releases(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	/*
-+	 * New tasks start with FORK_PREEMPT_COUNT, see there and
-+	 * finish_task_switch() for details.
-+	 *
-+	 * finish_task_switch() will drop rq->lock() and lower preempt_count
-+	 * and the preempt_enable() will end up enabling preemption (on
-+	 * PREEMPT_COUNT kernels).
-+	 */
-+
-+	rq = finish_task_switch(prev);
-+	preempt_enable();
-+
-+	if (current->set_child_tid)
-+		put_user(task_pid_vnr(current), current->set_child_tid);
-+
-+	calculate_sigpending();
-+}
-+
-+/*
-+ * context_switch - switch to the new MM and the new thread's register state.
-+ */
-+static __always_inline struct rq *
-+context_switch(struct rq *rq, struct task_struct *prev,
-+	       struct task_struct *next)
-+{
-+	prepare_task_switch(rq, prev, next);
-+
-+	/*
-+	 * For paravirt, this is coupled with an exit in switch_to to
-+	 * combine the page table reload and the switch backend into
-+	 * one hypercall.
-+	 */
-+	arch_start_context_switch(prev);
-+
-+	/*
-+	 * kernel -> kernel   lazy + transfer active
-+	 *   user -> kernel   lazy + mmgrab() active
-+	 *
-+	 * kernel ->   user   switch + mmdrop() active
-+	 *   user ->   user   switch
-+	 */
-+	if (!next->mm) {                                // to kernel
-+		enter_lazy_tlb(prev->active_mm, next);
-+
-+		next->active_mm = prev->active_mm;
-+		if (prev->mm)                           // from user
-+			mmgrab(prev->active_mm);
-+		else
-+			prev->active_mm = NULL;
-+	} else {                                        // to user
-+		membarrier_switch_mm(rq, prev->active_mm, next->mm);
-+		/*
-+		 * sys_membarrier() requires an smp_mb() between setting
-+		 * rq->curr / membarrier_switch_mm() and returning to userspace.
-+		 *
-+		 * The below provides this either through switch_mm(), or in
-+		 * case 'prev->active_mm == next->mm' through
-+		 * finish_task_switch()'s mmdrop().
-+		 */
-+		switch_mm_irqs_off(prev->active_mm, next->mm, next);
-+
-+		if (!prev->mm) {                        // from kernel
-+			/* will mmdrop() in finish_task_switch(). */
-+			rq->prev_mm = prev->active_mm;
-+			prev->active_mm = NULL;
-+		}
-+	}
-+
-+	prepare_lock_switch(rq, next);
-+
-+	/* Here we just switch the register state and the stack. */
-+	switch_to(prev, next, prev);
-+	barrier();
-+
-+	return finish_task_switch(prev);
-+}
-+
-+/*
-+ * nr_running, nr_uninterruptible and nr_context_switches:
-+ *
-+ * externally visible scheduler statistics: current number of runnable
-+ * threads, total number of context switches performed since bootup.
-+ */
-+unsigned long nr_running(void)
-+{
-+	unsigned long i, sum = 0;
-+
-+	for_each_online_cpu(i)
-+		sum += cpu_rq(i)->nr_running;
-+
-+	return sum;
-+}
-+
-+/*
-+ * Check if only the current task is running on the CPU.
-+ *
-+ * Caution: this function does not check that the caller has disabled
-+ * preemption, thus the result might have a time-of-check-to-time-of-use
-+ * race.  The caller is responsible to use it correctly, for example:
-+ *
-+ * - from a non-preemptible section (of course)
-+ *
-+ * - from a thread that is bound to a single CPU
-+ *
-+ * - in a loop with very short iterations (e.g. a polling loop)
-+ */
-+bool single_task_running(void)
-+{
-+	return raw_rq()->nr_running == 1;
-+}
-+EXPORT_SYMBOL(single_task_running);
-+
-+unsigned long long nr_context_switches(void)
-+{
-+	int i;
-+	unsigned long long sum = 0;
-+
-+	for_each_possible_cpu(i)
-+		sum += cpu_rq(i)->nr_switches;
-+
-+	return sum;
-+}
-+
-+/*
-+ * Consumers of these two interfaces, like for example the cpuidle menu
-+ * governor, are using nonsensical data. Preferring shallow idle state selection
-+ * for a CPU that has IO-wait which might not even end up running the task when
-+ * it does become runnable.
-+ */
-+
-+unsigned long nr_iowait_cpu(int cpu)
-+{
-+	return atomic_read(&cpu_rq(cpu)->nr_iowait);
-+}
-+
-+/*
-+ * IO-wait accounting, and how its mostly bollocks (on SMP).
-+ *
-+ * The idea behind IO-wait account is to account the idle time that we could
-+ * have spend running if it were not for IO. That is, if we were to improve the
-+ * storage performance, we'd have a proportional reduction in IO-wait time.
-+ *
-+ * This all works nicely on UP, where, when a task blocks on IO, we account
-+ * idle time as IO-wait, because if the storage were faster, it could've been
-+ * running and we'd not be idle.
-+ *
-+ * This has been extended to SMP, by doing the same for each CPU. This however
-+ * is broken.
-+ *
-+ * Imagine for instance the case where two tasks block on one CPU, only the one
-+ * CPU will have IO-wait accounted, while the other has regular idle. Even
-+ * though, if the storage were faster, both could've ran at the same time,
-+ * utilising both CPUs.
-+ *
-+ * This means, that when looking globally, the current IO-wait accounting on
-+ * SMP is a lower bound, by reason of under accounting.
-+ *
-+ * Worse, since the numbers are provided per CPU, they are sometimes
-+ * interpreted per CPU, and that is nonsensical. A blocked task isn't strictly
-+ * associated with any one particular CPU, it can wake to another CPU than it
-+ * blocked on. This means the per CPU IO-wait number is meaningless.
-+ *
-+ * Task CPU affinities can make all that even more 'interesting'.
-+ */
-+
-+unsigned long nr_iowait(void)
-+{
-+	unsigned long i, sum = 0;
-+
-+	for_each_possible_cpu(i)
-+		sum += nr_iowait_cpu(i);
-+
-+	return sum;
-+}
-+
-+DEFINE_PER_CPU(struct kernel_stat, kstat);
-+DEFINE_PER_CPU(struct kernel_cpustat, kernel_cpustat);
-+
-+EXPORT_PER_CPU_SYMBOL(kstat);
-+EXPORT_PER_CPU_SYMBOL(kernel_cpustat);
-+
-+static inline void pds_update_curr(struct rq *rq, struct task_struct *p)
-+{
-+	s64 ns = rq->clock_task - p->last_ran;
-+
-+	p->sched_time += ns;
-+	account_group_exec_runtime(p, ns);
-+
-+	/* time_slice accounting is done in usecs to avoid overflow on 32bit */
-+	p->time_slice -= NS_TO_US(ns);
-+	p->last_ran = rq->clock_task;
-+}
-+
-+/*
-+ * Return accounted runtime for the task.
-+ * Return separately the current's pending runtime that have not been
-+ * accounted yet.
-+ */
-+unsigned long long task_sched_runtime(struct task_struct *p)
-+{
-+	unsigned long flags;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+	u64 ns;
-+
-+#if defined(CONFIG_64BIT) && defined(CONFIG_SMP)
-+	/*
-+	 * 64-bit doesn't need locks to atomically read a 64-bit value.
-+	 * So we have a optimization chance when the task's delta_exec is 0.
-+	 * Reading ->on_cpu is racy, but this is ok.
-+	 *
-+	 * If we race with it leaving CPU, we'll take a lock. So we're correct.
-+	 * If we race with it entering CPU, unaccounted time is 0. This is
-+	 * indistinguishable from the read occurring a few cycles earlier.
-+	 * If we see ->on_cpu without ->on_rq, the task is leaving, and has
-+	 * been accounted, so we're correct here as well.
-+	 */
-+	if (!p->on_cpu || !task_on_rq_queued(p))
-+		return tsk_seruntime(p);
-+#endif
-+
-+	rq = task_access_lock_irqsave(p, &lock, &flags);
-+	/*
-+	 * Must be ->curr _and_ ->on_rq.  If dequeued, we would
-+	 * project cycles that may never be accounted to this
-+	 * thread, breaking clock_gettime().
-+	 */
-+	if (p == rq->curr && task_on_rq_queued(p)) {
-+		update_rq_clock(rq);
-+		pds_update_curr(rq, p);
-+	}
-+	ns = tsk_seruntime(p);
-+	task_access_unlock_irqrestore(p, lock, &flags);
-+
-+	return ns;
-+}
-+
-+/* This manages tasks that have run out of timeslice during a scheduler_tick */
-+static inline void pds_scheduler_task_tick(struct rq *rq)
-+{
-+	struct task_struct *p = rq->curr;
-+
-+	if (is_idle_task(p))
-+		return;
-+
-+	pds_update_curr(rq, p);
-+
-+	cpufreq_update_util(rq, 0);
-+
-+	/*
-+	 * Tasks that were scheduled in the first half of a tick are not
-+	 * allowed to run into the 2nd half of the next tick if they will
-+	 * run out of time slice in the interim. Otherwise, if they have
-+	 * less than RESCHED_US μs of time slice left they will be rescheduled.
-+	 */
-+	if (p->time_slice - rq->dither >= RESCHED_US)
-+		return;
-+
-+	/**
-+	 * p->time_slice < RESCHED_US. We will modify task_struct under
-+	 * rq lock as p is rq->curr
-+	 */
-+	__set_tsk_resched(p);
-+}
-+
-+#ifdef CONFIG_SMP
-+
-+#ifdef CONFIG_SCHED_SMT
-+static int active_load_balance_cpu_stop(void *data)
-+{
-+	struct rq *rq = this_rq();
-+	struct task_struct *p = data;
-+	int cpu;
-+	unsigned long flags;
-+
-+	local_irq_save(flags);
-+
-+	raw_spin_lock(&p->pi_lock);
-+	raw_spin_lock(&rq->lock);
-+
-+	rq->active_balance = 0;
-+	/*
-+	 * _something_ may have changed the task, double check again
-+	 */
-+	if (task_on_rq_queued(p) && task_rq(p) == rq &&
-+	    (cpu = cpumask_any_and(&p->cpus_mask, &sched_cpu_sg_idle_mask)) < nr_cpu_ids)
-+		rq = __migrate_task(rq, p, cpu);
-+
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock(&p->pi_lock);
-+
-+	local_irq_restore(flags);
-+
-+	return 0;
-+}
-+
-+/* pds_sg_balance_trigger - trigger slibing group balance for @cpu */
-+static void pds_sg_balance_trigger(const int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+	struct task_struct *curr;
-+
-+	if (!raw_spin_trylock_irqsave(&rq->lock, flags))
-+		return;
-+	curr = rq->curr;
-+	if (!is_idle_task(curr) &&
-+	    cpumask_intersects(&curr->cpus_mask, &sched_cpu_sg_idle_mask)) {
-+		int active_balance = 0;
-+
-+		if (likely(!rq->active_balance)) {
-+			rq->active_balance = 1;
-+			active_balance = 1;
-+		}
-+
-+		raw_spin_unlock_irqrestore(&rq->lock, flags);
-+
-+		if (likely(active_balance))
-+			stop_one_cpu_nowait(cpu, active_load_balance_cpu_stop,
-+					    curr, &rq->active_balance_work);
-+	} else
-+		raw_spin_unlock_irqrestore(&rq->lock, flags);
-+}
-+
-+/*
-+ * pds_sg_balance_check - slibing group balance check for run queue @rq
-+ */
-+static inline void pds_sg_balance_check(const struct rq *rq)
-+{
-+	cpumask_t chk;
-+	int i;
-+
-+	/* Only online cpu will do sg balance checking */
-+	if (unlikely(!rq->online))
-+		return;
-+
-+	/* Only cpu in slibing idle group will do the checking */
-+	if (!cpumask_test_cpu(cpu_of(rq), &sched_cpu_sg_idle_mask))
-+		return;
-+
-+	/* Find potential cpus which can migrate the currently running task */
-+	if (!cpumask_andnot(&chk, &sched_rq_pending_masks[SCHED_RQ_EMPTY],
-+			    &sched_rq_queued_masks[SCHED_RQ_EMPTY]))
-+		return;
-+
-+	for_each_cpu(i, &chk) {
-+		/* skip the cpu which has idle slibing cpu */
-+		if (cpumask_test_cpu(per_cpu(sched_sibling_cpu, i),
-+				     &sched_rq_queued_masks[SCHED_RQ_EMPTY]))
-+			continue;
-+		pds_sg_balance_trigger(i);
-+	}
-+}
-+#endif /* CONFIG_SCHED_SMT */
-+#endif /* CONFIG_SMP */
-+
-+/*
-+ * This function gets called by the timer code, with HZ frequency.
-+ * We call it with interrupts disabled.
-+ */
-+void scheduler_tick(void)
-+{
-+	int cpu __maybe_unused = smp_processor_id();
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	sched_clock_tick();
-+
-+	raw_spin_lock(&rq->lock);
-+	update_rq_clock(rq);
-+
-+	pds_scheduler_task_tick(rq);
-+	update_sched_rq_queued_masks_normal(rq);
-+	calc_global_load_tick(rq);
-+	psi_task_tick(rq);
-+
-+	rq->last_tick = rq->clock;
-+	raw_spin_unlock(&rq->lock);
-+
-+	perf_event_task_tick();
-+}
-+
-+#ifdef CONFIG_NO_HZ_FULL
-+struct tick_work {
-+	int			cpu;
-+	atomic_t		state;
-+	struct delayed_work	work;
-+};
-+/* Values for ->state, see diagram below. */
-+#define TICK_SCHED_REMOTE_OFFLINE	0
-+#define TICK_SCHED_REMOTE_OFFLINING	1
-+#define TICK_SCHED_REMOTE_RUNNING	2
-+
-+/*
-+ * State diagram for ->state:
-+ *
-+ *
-+ *          TICK_SCHED_REMOTE_OFFLINE
-+ *                    |   ^
-+ *                    |   |
-+ *                    |   | sched_tick_remote()
-+ *                    |   |
-+ *                    |   |
-+ *                    +--TICK_SCHED_REMOTE_OFFLINING
-+ *                    |   ^
-+ *                    |   |
-+ * sched_tick_start() |   | sched_tick_stop()
-+ *                    |   |
-+ *                    V   |
-+ *          TICK_SCHED_REMOTE_RUNNING
-+ *
-+ *
-+ * Other transitions get WARN_ON_ONCE(), except that sched_tick_remote()
-+ * and sched_tick_start() are happy to leave the state in RUNNING.
-+ */
-+
-+static struct tick_work __percpu *tick_work_cpu;
-+
-+static void sched_tick_remote(struct work_struct *work)
-+{
-+	struct delayed_work *dwork = to_delayed_work(work);
-+	struct tick_work *twork = container_of(dwork, struct tick_work, work);
-+	int cpu = twork->cpu;
-+	struct rq *rq = cpu_rq(cpu);
-+	struct task_struct *curr;
-+	unsigned long flags;
-+	u64 delta;
-+	int os;
-+
-+	/*
-+	 * Handle the tick only if it appears the remote CPU is running in full
-+	 * dynticks mode. The check is racy by nature, but missing a tick or
-+	 * having one too much is no big deal because the scheduler tick updates
-+	 * statistics and checks timeslices in a time-independent way, regardless
-+	 * of when exactly it is running.
-+	 */
-+	if (idle_cpu(cpu) || !tick_nohz_tick_stopped_cpu(cpu))
-+		goto out_requeue;
-+
-+	raw_spin_lock_irqsave(&rq->lock, flags);
-+	curr = rq->curr;
-+
-+	if (is_idle_task(curr) || cpu_is_offline(cpu))
-+		goto out_unlock;
-+
-+	update_rq_clock(rq);
-+	delta = rq_clock_task(rq) - curr->last_ran;
-+
-+	/*
-+	 * Make sure the next tick runs within a reasonable
-+	 * amount of time.
-+	 */
-+	WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
-+	pds_scheduler_task_tick(rq);
-+	update_sched_rq_queued_masks_normal(rq);
-+
-+out_unlock:
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+
-+out_requeue:
-+	/*
-+	 * Run the remote tick once per second (1Hz). This arbitrary
-+	 * frequency is large enough to avoid overload but short enough
-+	 * to keep scheduler internal stats reasonably up to date.  But
-+	 * first update state to reflect hotplug activity if required.
-+	 */
-+	os = atomic_fetch_add_unless(&twork->state, -1, TICK_SCHED_REMOTE_RUNNING);
-+	WARN_ON_ONCE(os == TICK_SCHED_REMOTE_OFFLINE);
-+	if (os == TICK_SCHED_REMOTE_RUNNING)
-+		queue_delayed_work(system_unbound_wq, dwork, HZ);
-+}
-+
-+static void sched_tick_start(int cpu)
-+{
-+	int os;
-+	struct tick_work *twork;
-+
-+	if (housekeeping_cpu(cpu, HK_FLAG_TICK))
-+		return;
-+
-+	WARN_ON_ONCE(!tick_work_cpu);
-+
-+	twork = per_cpu_ptr(tick_work_cpu, cpu);
-+	os = atomic_xchg(&twork->state, TICK_SCHED_REMOTE_RUNNING);
-+	WARN_ON_ONCE(os == TICK_SCHED_REMOTE_RUNNING);
-+	if (os == TICK_SCHED_REMOTE_OFFLINE) {
-+		twork->cpu = cpu;
-+		INIT_DELAYED_WORK(&twork->work, sched_tick_remote);
-+		queue_delayed_work(system_unbound_wq, &twork->work, HZ);
-+	}
-+}
-+
-+#ifdef CONFIG_HOTPLUG_CPU
-+static void sched_tick_stop(int cpu)
-+{
-+	struct tick_work *twork;
-+
-+	if (housekeeping_cpu(cpu, HK_FLAG_TICK))
-+		return;
-+
-+	WARN_ON_ONCE(!tick_work_cpu);
-+
-+	twork = per_cpu_ptr(tick_work_cpu, cpu);
-+	cancel_delayed_work_sync(&twork->work);
-+}
-+#endif /* CONFIG_HOTPLUG_CPU */
-+
-+int __init sched_tick_offload_init(void)
-+{
-+	tick_work_cpu = alloc_percpu(struct tick_work);
-+	BUG_ON(!tick_work_cpu);
-+	return 0;
-+}
-+
-+#else /* !CONFIG_NO_HZ_FULL */
-+static inline void sched_tick_start(int cpu) { }
-+static inline void sched_tick_stop(int cpu) { }
-+#endif
-+
-+#if defined(CONFIG_PREEMPTION) && (defined(CONFIG_DEBUG_PREEMPT) || \
-+				defined(CONFIG_PREEMPT_TRACER))
-+/*
-+ * If the value passed in is equal to the current preempt count
-+ * then we just disabled preemption. Start timing the latency.
-+ */
-+static inline void preempt_latency_start(int val)
-+{
-+	if (preempt_count() == val) {
-+		unsigned long ip = get_lock_parent_ip();
-+#ifdef CONFIG_DEBUG_PREEMPT
-+		current->preempt_disable_ip = ip;
-+#endif
-+		trace_preempt_off(CALLER_ADDR0, ip);
-+	}
-+}
-+
-+void preempt_count_add(int val)
-+{
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	/*
-+	 * Underflow?
-+	 */
-+	if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
-+		return;
-+#endif
-+	__preempt_count_add(val);
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	/*
-+	 * Spinlock count overflowing soon?
-+	 */
-+	DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
-+				PREEMPT_MASK - 10);
-+#endif
-+	preempt_latency_start(val);
-+}
-+EXPORT_SYMBOL(preempt_count_add);
-+NOKPROBE_SYMBOL(preempt_count_add);
-+
-+/*
-+ * If the value passed in equals to the current preempt count
-+ * then we just enabled preemption. Stop timing the latency.
-+ */
-+static inline void preempt_latency_stop(int val)
-+{
-+	if (preempt_count() == val)
-+		trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
-+}
-+
-+void preempt_count_sub(int val)
-+{
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	/*
-+	 * Underflow?
-+	 */
-+	if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
-+		return;
-+	/*
-+	 * Is the spinlock portion underflowing?
-+	 */
-+	if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) &&
-+			!(preempt_count() & PREEMPT_MASK)))
-+		return;
-+#endif
-+
-+	preempt_latency_stop(val);
-+	__preempt_count_sub(val);
-+}
-+EXPORT_SYMBOL(preempt_count_sub);
-+NOKPROBE_SYMBOL(preempt_count_sub);
-+
-+#else
-+static inline void preempt_latency_start(int val) { }
-+static inline void preempt_latency_stop(int val) { }
-+#endif
-+
-+/*
-+ * Timeslices below RESCHED_US are considered as good as expired as there's no
-+ * point rescheduling when there's so little time left. SCHED_BATCH tasks
-+ * have been flagged be not latency sensitive and likely to be fully CPU
-+ * bound so every time they're rescheduled they have their time_slice
-+ * refilled, but get a new later deadline to have little effect on
-+ * SCHED_NORMAL tasks.
-+
-+ */
-+static inline void check_deadline(struct task_struct *p, struct rq *rq)
-+{
-+	if (rq->idle == p)
-+		return;
-+
-+	pds_update_curr(rq, p);
-+
-+	if (p->time_slice < RESCHED_US) {
-+		time_slice_expired(p, rq);
-+		if (SCHED_ISO == p->policy && ISO_PRIO == p->prio) {
-+			p->prio = NORMAL_PRIO;
-+			p->deadline = rq->clock + task_deadline_diff(p);
-+			update_task_priodl(p);
-+		}
-+		if (SCHED_FIFO != p->policy && task_on_rq_queued(p))
-+			requeue_task(p, rq);
-+	}
-+}
-+
-+#ifdef	CONFIG_SMP
-+
-+#define SCHED_RQ_NR_MIGRATION (32UL)
-+/*
-+ * Migrate pending tasks in @rq to @dest_cpu
-+ * Will try to migrate mininal of half of @rq nr_running tasks and
-+ * SCHED_RQ_NR_MIGRATION to @dest_cpu
-+ */
-+static inline int
-+migrate_pending_tasks(struct rq *rq, struct rq *dest_rq, int filter_prio)
-+{
-+	struct task_struct *p;
-+	int dest_cpu = cpu_of(dest_rq);
-+	int nr_migrated = 0;
-+	int nr_tries = min((rq->nr_running + 1) / 2, SCHED_RQ_NR_MIGRATION);
-+	struct skiplist_node *node = rq->sl_header.next[0];
-+
-+	while (nr_tries && node != &rq->sl_header) {
-+		p = skiplist_entry(node, struct task_struct, sl_node);
-+		node = node->next[0];
-+
-+		if (task_running(p))
-+			continue;
-+		if (p->prio >= filter_prio)
-+			break;
-+		if (cpumask_test_cpu(dest_cpu, &p->cpus_mask)) {
-+			detach_task(rq, p, dest_cpu);
-+			attach_task(dest_rq, p);
-+			nr_migrated++;
-+		}
-+		nr_tries--;
-+		/* make a jump */
-+		if (node == &rq->sl_header)
-+			break;
-+		node = node->next[0];
-+	}
-+
-+	return nr_migrated;
-+}
-+
-+static inline int
-+take_queued_task_cpumask(struct rq *rq, cpumask_t *chk_mask, int filter_prio)
-+{
-+	int src_cpu;
-+
-+	for_each_cpu(src_cpu, chk_mask) {
-+		int nr_migrated;
-+		struct rq *src_rq = cpu_rq(src_cpu);
-+
-+		if (!do_raw_spin_trylock(&src_rq->lock)) {
-+			if (PRIO_LIMIT == filter_prio)
-+				continue;
-+			return 0;
-+		}
-+		spin_acquire(&src_rq->lock.dep_map, SINGLE_DEPTH_NESTING, 1, _RET_IP_);
-+
-+		update_rq_clock(src_rq);
-+		nr_migrated = migrate_pending_tasks(src_rq, rq, filter_prio);
-+
-+		spin_release(&src_rq->lock.dep_map, 1, _RET_IP_);
-+		do_raw_spin_unlock(&src_rq->lock);
-+
-+		if (nr_migrated || PRIO_LIMIT != filter_prio)
-+			return nr_migrated;
-+	}
-+	return 0;
-+}
-+
-+static inline int take_other_rq_task(struct rq *rq, int cpu, int filter_prio)
-+{
-+	struct cpumask *affinity_mask, *end;
-+	struct cpumask chk;
-+
-+	if (PRIO_LIMIT == filter_prio) {
-+		cpumask_complement(&chk, &sched_rq_pending_masks[SCHED_RQ_EMPTY]);
-+#ifdef CONFIG_SMT_NICE
-+		{
-+		/* also try to take IDLE priority tasks from smt supressed cpu */
-+		struct cpumask t;
-+		if (cpumask_and(&t, &sched_smt_supressed_mask,
-+				&sched_rq_queued_masks[SCHED_RQ_IDLE]))
-+			cpumask_or(&chk, &chk, &t);
-+		}
-+#endif
-+	} else if (NORMAL_PRIO == filter_prio) {
-+		cpumask_or(&chk, &sched_rq_pending_masks[SCHED_RQ_RT],
-+			   &sched_rq_pending_masks[SCHED_RQ_ISO]);
-+	} else if (IDLE_PRIO == filter_prio) {
-+		cpumask_complement(&chk, &sched_rq_pending_masks[SCHED_RQ_EMPTY]);
-+		cpumask_andnot(&chk, &chk, &sched_rq_pending_masks[SCHED_RQ_IDLE]);
-+	} else
-+		cpumask_copy(&chk, &sched_rq_pending_masks[SCHED_RQ_RT]);
-+
-+	if (cpumask_empty(&chk))
-+		return 0;
-+
-+	affinity_mask = per_cpu(sched_cpu_llc_start_mask, cpu);
-+	end = per_cpu(sched_cpu_affinity_chk_end_masks, cpu);
-+	do {
-+		struct cpumask tmp;
-+
-+		if (cpumask_and(&tmp, &chk, affinity_mask) &&
-+		    take_queued_task_cpumask(rq, &tmp, filter_prio))
-+			return 1;
-+	} while (++affinity_mask < end);
-+
-+	return 0;
-+}
-+#endif
-+
-+static inline struct task_struct *
-+choose_next_task(struct rq *rq, int cpu, struct task_struct *prev)
-+{
-+	struct task_struct *next = rq_first_queued_task(rq);
-+
-+#ifdef CONFIG_SMT_NICE
-+	if (cpumask_test_cpu(cpu, &sched_smt_supressed_mask)) {
-+		if (next->prio >= IDLE_PRIO) {
-+			if (rq->online &&
-+			    take_other_rq_task(rq, cpu, IDLE_PRIO))
-+				return rq_first_queued_task(rq);
-+			return rq->idle;
-+		}
-+	}
-+#endif
-+
-+#ifdef	CONFIG_SMP
-+	if (likely(rq->online))
-+		if (take_other_rq_task(rq, cpu, next->prio)) {
-+			resched_curr(rq);
-+			return rq_first_queued_task(rq);
-+		}
-+#endif
-+	return next;
-+}
-+
-+static inline unsigned long get_preempt_disable_ip(struct task_struct *p)
-+{
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	return p->preempt_disable_ip;
-+#else
-+	return 0;
-+#endif
-+}
-+
-+/*
-+ * Print scheduling while atomic bug:
-+ */
-+static noinline void __schedule_bug(struct task_struct *prev)
-+{
-+	/* Save this before calling printk(), since that will clobber it */
-+	unsigned long preempt_disable_ip = get_preempt_disable_ip(current);
-+
-+	if (oops_in_progress)
-+		return;
-+
-+	printk(KERN_ERR "BUG: scheduling while atomic: %s/%d/0x%08x\n",
-+		prev->comm, prev->pid, preempt_count());
-+
-+	debug_show_held_locks(prev);
-+	print_modules();
-+	if (irqs_disabled())
-+		print_irqtrace_events(prev);
-+	if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)
-+	    && in_atomic_preempt_off()) {
-+		pr_err("Preemption disabled at:");
-+		print_ip_sym(preempt_disable_ip);
-+		pr_cont("\n");
-+	}
-+	if (panic_on_warn)
-+		panic("scheduling while atomic\n");
-+
-+	dump_stack();
-+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+}
-+
-+/*
-+ * Various schedule()-time debugging checks and statistics:
-+ */
-+static inline void schedule_debug(struct task_struct *prev, bool preempt)
-+{
-+#ifdef CONFIG_SCHED_STACK_END_CHECK
-+	if (task_stack_end_corrupted(prev))
-+		panic("corrupted stack end detected inside scheduler\n");
-+#endif
-+
-+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-+	if (!preempt && prev->state && prev->non_block_count) {
-+		printk(KERN_ERR "BUG: scheduling in a non-blocking section: %s/%d/%i\n",
-+			prev->comm, prev->pid, prev->non_block_count);
-+		dump_stack();
-+		add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+	}
-+#endif
-+
-+	if (unlikely(in_atomic_preempt_off())) {
-+		__schedule_bug(prev);
-+		preempt_count_set(PREEMPT_DISABLED);
-+	}
-+	rcu_sleep_check();
-+
-+	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
-+
-+	schedstat_inc(this_rq()->sched_count);
-+}
-+
-+static inline void set_rq_task(struct rq *rq, struct task_struct *p)
-+{
-+	p->last_ran = rq->clock_task;
-+
-+#ifdef CONFIG_HIGH_RES_TIMERS
-+	if (p != rq->idle)
-+		hrtick_start(rq, US_TO_NS(p->time_slice));
-+#endif
-+	/* update rq->dither */
-+	rq->dither = rq_dither(rq);
-+}
-+
-+/*
-+ * schedule() is the main scheduler function.
-+ *
-+ * The main means of driving the scheduler and thus entering this function are:
-+ *
-+ *   1. Explicit blocking: mutex, semaphore, waitqueue, etc.
-+ *
-+ *   2. TIF_NEED_RESCHED flag is checked on interrupt and userspace return
-+ *      paths. For example, see arch/x86/entry_64.S.
-+ *
-+ *      To drive preemption between tasks, the scheduler sets the flag in timer
-+ *      interrupt handler scheduler_tick().
-+ *
-+ *   3. Wakeups don't really cause entry into schedule(). They add a
-+ *      task to the run-queue and that's it.
-+ *
-+ *      Now, if the new task added to the run-queue preempts the current
-+ *      task, then the wakeup sets TIF_NEED_RESCHED and schedule() gets
-+ *      called on the nearest possible occasion:
-+ *
-+ *       - If the kernel is preemptible (CONFIG_PREEMPTION=y):
-+ *
-+ *         - in syscall or exception context, at the next outmost
-+ *           preempt_enable(). (this might be as soon as the wake_up()'s
-+ *           spin_unlock()!)
-+ *
-+ *         - in IRQ context, return from interrupt-handler to
-+ *           preemptible context
-+ *
-+ *       - If the kernel is not preemptible (CONFIG_PREEMPT is not set)
-+ *         then at the next:
-+ *
-+ *          - cond_resched() call
-+ *          - explicit schedule() call
-+ *          - return from syscall or exception to user-space
-+ *          - return from interrupt-handler to user-space
-+ *
-+ * WARNING: must be called with preemption disabled!
-+ */
-+static void __sched notrace __schedule(bool preempt)
-+{
-+	struct task_struct *prev, *next;
-+	unsigned long *switch_count;
-+	struct rq *rq;
-+	int cpu;
-+
-+	cpu = smp_processor_id();
-+	rq = cpu_rq(cpu);
-+	prev = rq->curr;
-+
-+	schedule_debug(prev, preempt);
-+
-+	/* by passing sched_feat(HRTICK) checking which PDS doesn't support */
-+	hrtick_clear(rq);
-+
-+	local_irq_disable();
-+	rcu_note_context_switch(preempt);
-+
-+	/*
-+	 * Make sure that signal_pending_state()->signal_pending() below
-+	 * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
-+	 * done by the caller to avoid the race with signal_wake_up().
-+	 *
-+	 * The membarrier system call requires a full memory barrier
-+	 * after coming from user-space, before storing to rq->curr.
-+	 */
-+	raw_spin_lock(&rq->lock);
-+	smp_mb__after_spinlock();
-+
-+	update_rq_clock(rq);
-+
-+	switch_count = &prev->nivcsw;
-+	if (!preempt && prev->state) {
-+		if (signal_pending_state(prev->state, prev)) {
-+			prev->state = TASK_RUNNING;
-+		} else {
-+			deactivate_task(prev, rq);
-+
-+			if (prev->in_iowait) {
-+				atomic_inc(&rq->nr_iowait);
-+				delayacct_blkio_start();
-+			}
-+		}
-+		switch_count = &prev->nvcsw;
-+	}
-+
-+	clear_tsk_need_resched(prev);
-+	clear_preempt_need_resched();
-+
-+	check_deadline(prev, rq);
-+
-+	next = choose_next_task(rq, cpu, prev);
-+
-+	set_rq_task(rq, next);
-+
-+	if (prev != next) {
-+		if (next->prio == PRIO_LIMIT)
-+			schedstat_inc(rq->sched_goidle);
-+
-+		/*
-+		 * RCU users of rcu_dereference(rq->curr) may not see
-+		 * changes to task_struct made by pick_next_task().
-+		 */
-+		RCU_INIT_POINTER(rq->curr, next);
-+		/*
-+		 * The membarrier system call requires each architecture
-+		 * to have a full memory barrier after updating
-+		 * rq->curr, before returning to user-space.
-+		 *
-+		 * Here are the schemes providing that barrier on the
-+		 * various architectures:
-+		 * - mm ? switch_mm() : mmdrop() for x86, s390, sparc, PowerPC.
-+		 *   switch_mm() rely on membarrier_arch_switch_mm() on PowerPC.
-+		 * - finish_lock_switch() for weakly-ordered
-+		 *   architectures where spin_unlock is a full barrier,
-+		 * - switch_to() for arm64 (weakly-ordered, spin_unlock
-+		 *   is a RELEASE barrier),
-+		 */
-+		++*switch_count;
-+		rq->nr_switches++;
-+
-+		trace_sched_switch(preempt, prev, next);
-+
-+		/* Also unlocks the rq: */
-+		rq = context_switch(rq, prev, next);
-+#ifdef CONFIG_SCHED_SMT
-+		pds_sg_balance_check(rq);
-+#endif
-+	} else
-+		raw_spin_unlock_irq(&rq->lock);
-+}
-+
-+void __noreturn do_task_dead(void)
-+{
-+	/* Causes final put_task_struct in finish_task_switch(): */
-+	set_special_state(TASK_DEAD);
-+
-+	/* Tell freezer to ignore us: */
-+	current->flags |= PF_NOFREEZE;
-+	__schedule(false);
-+
-+	BUG();
-+
-+	/* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */
-+	for (;;)
-+		cpu_relax();
-+}
-+
-+static inline void sched_submit_work(struct task_struct *tsk)
-+{
-+	if (!tsk->state || tsk_is_pi_blocked(tsk) ||
-+	    signal_pending_state(tsk->state, tsk))
-+		return;
-+
-+	/*
-+	 * If a worker went to sleep, notify and ask workqueue whether
-+	 * it wants to wake up a task to maintain concurrency.
-+	 * As this function is called inside the schedule() context,
-+	 * we disable preemption to avoid it calling schedule() again
-+	 * in the possible wakeup of a kworker.
-+	 */
-+	if (tsk->flags & PF_WQ_WORKER) {
-+		preempt_disable();
-+		wq_worker_sleeping(tsk);
-+		preempt_enable_no_resched();
-+	}
-+
-+	/*
-+	 * If we are going to sleep and we have plugged IO queued,
-+	 * make sure to submit it to avoid deadlocks.
-+	 */
-+	if (blk_needs_flush_plug(tsk))
-+		blk_schedule_flush_plug(tsk);
-+}
-+
-+static void sched_update_worker(struct task_struct *tsk)
-+{
-+	if (tsk->flags & PF_WQ_WORKER)
-+		wq_worker_running(tsk);
-+}
-+
-+asmlinkage __visible void __sched schedule(void)
-+{
-+	struct task_struct *tsk = current;
-+
-+	sched_submit_work(tsk);
-+	do {
-+		preempt_disable();
-+		__schedule(false);
-+		sched_preempt_enable_no_resched();
-+	} while (need_resched());
-+    sched_update_worker(tsk);
-+}
-+EXPORT_SYMBOL(schedule);
-+
-+/*
-+ * synchronize_rcu_tasks() makes sure that no task is stuck in preempted
-+ * state (have scheduled out non-voluntarily) by making sure that all
-+ * tasks have either left the run queue or have gone into user space.
-+ * As idle tasks do not do either, they must not ever be preempted
-+ * (schedule out non-voluntarily).
-+ *
-+ * schedule_idle() is similar to schedule_preempt_disable() except that it
-+ * never enables preemption because it does not call sched_submit_work().
-+ */
-+void __sched schedule_idle(void)
-+{
-+	/*
-+	 * As this skips calling sched_submit_work(), which the idle task does
-+	 * regardless because that function is a nop when the task is in a
-+	 * TASK_RUNNING state, make sure this isn't used someplace that the
-+	 * current task can be in any other state. Note, idle is always in the
-+	 * TASK_RUNNING state.
-+	 */
-+	WARN_ON_ONCE(current->state);
-+	do {
-+		__schedule(false);
-+	} while (need_resched());
-+}
-+
-+#ifdef CONFIG_CONTEXT_TRACKING
-+asmlinkage __visible void __sched schedule_user(void)
-+{
-+	/*
-+	 * If we come here after a random call to set_need_resched(),
-+	 * or we have been woken up remotely but the IPI has not yet arrived,
-+	 * we haven't yet exited the RCU idle mode. Do it here manually until
-+	 * we find a better solution.
-+	 *
-+	 * NB: There are buggy callers of this function.  Ideally we
-+	 * should warn if prev_state != CONTEXT_USER, but that will trigger
-+	 * too frequently to make sense yet.
-+	 */
-+	enum ctx_state prev_state = exception_enter();
-+	schedule();
-+	exception_exit(prev_state);
-+}
-+#endif
-+
-+/**
-+ * schedule_preempt_disabled - called with preemption disabled
-+ *
-+ * Returns with preemption disabled. Note: preempt_count must be 1
-+ */
-+void __sched schedule_preempt_disabled(void)
-+{
-+	sched_preempt_enable_no_resched();
-+	schedule();
-+	preempt_disable();
-+}
-+
-+static void __sched notrace preempt_schedule_common(void)
-+{
-+	do {
-+		/*
-+		 * Because the function tracer can trace preempt_count_sub()
-+		 * and it also uses preempt_enable/disable_notrace(), if
-+		 * NEED_RESCHED is set, the preempt_enable_notrace() called
-+		 * by the function tracer will call this function again and
-+		 * cause infinite recursion.
-+		 *
-+		 * Preemption must be disabled here before the function
-+		 * tracer can trace. Break up preempt_disable() into two
-+		 * calls. One to disable preemption without fear of being
-+		 * traced. The other to still record the preemption latency,
-+		 * which can also be traced by the function tracer.
-+		 */
-+		preempt_disable_notrace();
-+		preempt_latency_start(1);
-+		__schedule(true);
-+		preempt_latency_stop(1);
-+		preempt_enable_no_resched_notrace();
-+
-+		/*
-+		 * Check again in case we missed a preemption opportunity
-+		 * between schedule and now.
-+		 */
-+	} while (need_resched());
-+}
-+
-+#ifdef CONFIG_PREEMPTION
-+/*
-+ * This is the entry point to schedule() from in-kernel preemption
-+ * off of preempt_enable.
-+ */
-+asmlinkage __visible void __sched notrace preempt_schedule(void)
-+{
-+	/*
-+	 * If there is a non-zero preempt_count or interrupts are disabled,
-+	 * we do not want to preempt the current task. Just return..
-+	 */
-+	if (likely(!preemptible()))
-+		return;
-+
-+	preempt_schedule_common();
-+}
-+NOKPROBE_SYMBOL(preempt_schedule);
-+EXPORT_SYMBOL(preempt_schedule);
-+
-+/**
-+ * preempt_schedule_notrace - preempt_schedule called by tracing
-+ *
-+ * The tracing infrastructure uses preempt_enable_notrace to prevent
-+ * recursion and tracing preempt enabling caused by the tracing
-+ * infrastructure itself. But as tracing can happen in areas coming
-+ * from userspace or just about to enter userspace, a preempt enable
-+ * can occur before user_exit() is called. This will cause the scheduler
-+ * to be called when the system is still in usermode.
-+ *
-+ * To prevent this, the preempt_enable_notrace will use this function
-+ * instead of preempt_schedule() to exit user context if needed before
-+ * calling the scheduler.
-+ */
-+asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
-+{
-+	enum ctx_state prev_ctx;
-+
-+	if (likely(!preemptible()))
-+		return;
-+
-+	do {
-+		/*
-+		 * Because the function tracer can trace preempt_count_sub()
-+		 * and it also uses preempt_enable/disable_notrace(), if
-+		 * NEED_RESCHED is set, the preempt_enable_notrace() called
-+		 * by the function tracer will call this function again and
-+		 * cause infinite recursion.
-+		 *
-+		 * Preemption must be disabled here before the function
-+		 * tracer can trace. Break up preempt_disable() into two
-+		 * calls. One to disable preemption without fear of being
-+		 * traced. The other to still record the preemption latency,
-+		 * which can also be traced by the function tracer.
-+		 */
-+		preempt_disable_notrace();
-+		preempt_latency_start(1);
-+		/*
-+		 * Needs preempt disabled in case user_exit() is traced
-+		 * and the tracer calls preempt_enable_notrace() causing
-+		 * an infinite recursion.
-+		 */
-+		prev_ctx = exception_enter();
-+		__schedule(true);
-+		exception_exit(prev_ctx);
-+
-+		preempt_latency_stop(1);
-+		preempt_enable_no_resched_notrace();
-+	} while (need_resched());
-+}
-+EXPORT_SYMBOL_GPL(preempt_schedule_notrace);
-+
-+#endif /* CONFIG_PREEMPTION */
-+
-+/*
-+ * This is the entry point to schedule() from kernel preemption
-+ * off of irq context.
-+ * Note, that this is called and return with irqs disabled. This will
-+ * protect us against recursive calling from irq.
-+ */
-+asmlinkage __visible void __sched preempt_schedule_irq(void)
-+{
-+	enum ctx_state prev_state;
-+
-+	/* Catch callers which need to be fixed */
-+	BUG_ON(preempt_count() || !irqs_disabled());
-+
-+	prev_state = exception_enter();
-+
-+	do {
-+		preempt_disable();
-+		local_irq_enable();
-+		__schedule(true);
-+		local_irq_disable();
-+		sched_preempt_enable_no_resched();
-+	} while (need_resched());
-+
-+	exception_exit(prev_state);
-+}
-+
-+int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags,
-+			  void *key)
-+{
-+	return try_to_wake_up(curr->private, mode, wake_flags);
-+}
-+EXPORT_SYMBOL(default_wake_function);
-+
-+static inline void
-+check_task_changed(struct rq *rq, struct task_struct *p)
-+{
-+	/*
-+	 * Trigger changes when task priority/deadline modified.
-+	 */
-+	if (task_on_rq_queued(p)) {
-+		struct task_struct *first;
-+
-+		requeue_task(p, rq);
-+
-+		/* Resched if first queued task not running and not IDLE */
-+		if ((first = rq_first_queued_task(rq)) != rq->curr &&
-+		    !task_running_idle(first))
-+			resched_curr(rq);
-+	}
-+}
-+
-+#ifdef CONFIG_RT_MUTEXES
-+
-+static inline int __rt_effective_prio(struct task_struct *pi_task, int prio)
-+{
-+	if (pi_task)
-+		prio = min(prio, pi_task->prio);
-+
-+	return prio;
-+}
-+
-+static inline int rt_effective_prio(struct task_struct *p, int prio)
-+{
-+	struct task_struct *pi_task = rt_mutex_get_top_task(p);
-+
-+	return __rt_effective_prio(pi_task, prio);
-+}
-+
-+/*
-+ * rt_mutex_setprio - set the current priority of a task
-+ * @p: task to boost
-+ * @pi_task: donor task
-+ *
-+ * This function changes the 'effective' priority of a task. It does
-+ * not touch ->normal_prio like __setscheduler().
-+ *
-+ * Used by the rt_mutex code to implement priority inheritance
-+ * logic. Call site only calls if the priority of the task changed.
-+ */
-+void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
-+{
-+	int prio;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+
-+	/* XXX used to be waiter->prio, not waiter->task->prio */
-+	prio = __rt_effective_prio(pi_task, p->normal_prio);
-+
-+	/*
-+	 * If nothing changed; bail early.
-+	 */
-+	if (p->pi_top_task == pi_task && prio == p->prio)
-+		return;
-+
-+	rq = __task_access_lock(p, &lock);
-+	/*
-+	 * Set under pi_lock && rq->lock, such that the value can be used under
-+	 * either lock.
-+	 *
-+	 * Note that there is loads of tricky to make this pointer cache work
-+	 * right. rt_mutex_slowunlock()+rt_mutex_postunlock() work together to
-+	 * ensure a task is de-boosted (pi_task is set to NULL) before the
-+	 * task is allowed to run again (and can exit). This ensures the pointer
-+	 * points to a blocked task -- which guaratees the task is present.
-+	 */
-+	p->pi_top_task = pi_task;
-+
-+	/*
-+	 * For FIFO/RR we only need to set prio, if that matches we're done.
-+	 */
-+	if (prio == p->prio)
-+		goto out_unlock;
-+
-+	/*
-+	 * Idle task boosting is a nono in general. There is one
-+	 * exception, when PREEMPT_RT and NOHZ is active:
-+	 *
-+	 * The idle task calls get_next_timer_interrupt() and holds
-+	 * the timer wheel base->lock on the CPU and another CPU wants
-+	 * to access the timer (probably to cancel it). We can safely
-+	 * ignore the boosting request, as the idle CPU runs this code
-+	 * with interrupts disabled and will complete the lock
-+	 * protected section without being interrupted. So there is no
-+	 * real need to boost.
-+	 */
-+	if (unlikely(p == rq->idle)) {
-+		WARN_ON(p != rq->curr);
-+		WARN_ON(p->pi_blocked_on);
-+		goto out_unlock;
-+	}
-+
-+	trace_sched_pi_setprio(p, pi_task);
-+	p->prio = prio;
-+	update_task_priodl(p);
-+
-+	check_task_changed(rq, p);
-+
-+out_unlock:
-+	__task_access_unlock(p, lock);
-+}
-+#else
-+static inline int rt_effective_prio(struct task_struct *p, int prio)
-+{
-+	return prio;
-+}
-+#endif
-+
-+void set_user_nice(struct task_struct *p, long nice)
-+{
-+	int new_static;
-+	unsigned long flags;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+
-+	if (task_nice(p) == nice || nice < MIN_NICE || nice > MAX_NICE)
-+		return;
-+	new_static = NICE_TO_PRIO(nice);
-+	/*
-+	 * We have to be careful, if called from sys_setpriority(),
-+	 * the task might be in the middle of scheduling on another CPU.
-+	 */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	rq = __task_access_lock(p, &lock);
-+
-+	/* rq lock may not held!! */
-+	update_rq_clock(rq);
-+
-+	p->static_prio = new_static;
-+	/*
-+	 * The RT priorities are set via sched_setscheduler(), but we still
-+	 * allow the 'normal' nice value to be set - but as expected
-+	 * it wont have any effect on scheduling until the task is
-+	 * not SCHED_NORMAL/SCHED_BATCH:
-+	 */
-+	if (task_has_rt_policy(p))
-+		goto out_unlock;
-+
-+	p->deadline -= task_deadline_diff(p);
-+	p->deadline += static_deadline_diff(new_static);
-+	p->prio = effective_prio(p);
-+	update_task_priodl(p);
-+
-+	check_task_changed(rq, p);
-+out_unlock:
-+	__task_access_unlock(p, lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+}
-+EXPORT_SYMBOL(set_user_nice);
-+
-+/*
-+ * can_nice - check if a task can reduce its nice value
-+ * @p: task
-+ * @nice: nice value
-+ */
-+int can_nice(const struct task_struct *p, const int nice)
-+{
-+	/* Convert nice value [19,-20] to rlimit style value [1,40] */
-+	int nice_rlim = nice_to_rlimit(nice);
-+
-+	return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) ||
-+		capable(CAP_SYS_NICE));
-+}
-+
-+#ifdef __ARCH_WANT_SYS_NICE
-+
-+/*
-+ * sys_nice - change the priority of the current process.
-+ * @increment: priority increment
-+ *
-+ * sys_setpriority is a more generic, but much slower function that
-+ * does similar things.
-+ */
-+SYSCALL_DEFINE1(nice, int, increment)
-+{
-+	long nice, retval;
-+
-+	/*
-+	 * Setpriority might change our priority at the same moment.
-+	 * We don't have to worry. Conceptually one call occurs first
-+	 * and we have a single winner.
-+	 */
-+
-+	increment = clamp(increment, -NICE_WIDTH, NICE_WIDTH);
-+	nice = task_nice(current) + increment;
-+
-+	nice = clamp_val(nice, MIN_NICE, MAX_NICE);
-+	if (increment < 0 && !can_nice(current, nice))
-+		return -EPERM;
-+
-+	retval = security_task_setnice(current, nice);
-+	if (retval)
-+		return retval;
-+
-+	set_user_nice(current, nice);
-+	return 0;
-+}
-+
-+#endif
-+
-+/**
-+ * task_prio - return the priority value of a given task.
-+ * @p: the task in question.
-+ *
-+ * Return: The priority value as seen by users in /proc.
-+ * RT tasks are offset by -100. Normal tasks are centered around 1, value goes
-+ * from 0(SCHED_ISO) up to 82 (nice +19 SCHED_IDLE).
-+ */
-+int task_prio(const struct task_struct *p)
-+{
-+	int level, prio = p->prio - MAX_RT_PRIO;
-+	static const int level_to_nice_prio[] = {39, 33, 26, 20, 14, 7, 0, 0};
-+
-+	/* rt tasks */
-+	if (prio <= 0)
-+		goto out;
-+
-+	preempt_disable();
-+	level = task_deadline_level(p, this_rq());
-+	preempt_enable();
-+	prio += level_to_nice_prio[level];
-+	if (idleprio_task(p))
-+		prio += NICE_WIDTH;
-+out:
-+	return prio;
-+}
-+
-+/**
-+ * idle_cpu - is a given CPU idle currently?
-+ * @cpu: the processor in question.
-+ *
-+ * Return: 1 if the CPU is currently idle. 0 otherwise.
-+ */
-+int idle_cpu(int cpu)
-+{
-+	return cpu_curr(cpu) == cpu_rq(cpu)->idle;
-+}
-+
-+/**
-+ * idle_task - return the idle task for a given CPU.
-+ * @cpu: the processor in question.
-+ *
-+ * Return: The idle task for the cpu @cpu.
-+ */
-+struct task_struct *idle_task(int cpu)
-+{
-+	return cpu_rq(cpu)->idle;
-+}
-+
-+/**
-+ * find_process_by_pid - find a process with a matching PID value.
-+ * @pid: the pid in question.
-+ *
-+ * The task of @pid, if found. %NULL otherwise.
-+ */
-+static inline struct task_struct *find_process_by_pid(pid_t pid)
-+{
-+	return pid ? find_task_by_vpid(pid) : current;
-+}
-+
-+#ifdef CONFIG_SMP
-+void sched_set_stop_task(int cpu, struct task_struct *stop)
-+{
-+	struct sched_param stop_param = { .sched_priority = STOP_PRIO };
-+	struct sched_param start_param = { .sched_priority = 0 };
-+	struct task_struct *old_stop = cpu_rq(cpu)->stop;
-+
-+	if (stop) {
-+		/*
-+		 * Make it appear like a SCHED_FIFO task, its something
-+		 * userspace knows about and won't get confused about.
-+		 *
-+		 * Also, it will make PI more or less work without too
-+		 * much confusion -- but then, stop work should not
-+		 * rely on PI working anyway.
-+		 */
-+		sched_setscheduler_nocheck(stop, SCHED_FIFO, &stop_param);
-+	}
-+
-+	cpu_rq(cpu)->stop = stop;
-+
-+	if (old_stop) {
-+		/*
-+		 * Reset it back to a normal scheduling policy so that
-+		 * it can die in pieces.
-+		 */
-+		sched_setscheduler_nocheck(old_stop, SCHED_NORMAL, &start_param);
-+	}
-+}
-+
-+/*
-+ * Change a given task's CPU affinity. Migrate the thread to a
-+ * proper CPU and schedule it away if the CPU it's executing on
-+ * is removed from the allowed bitmask.
-+ *
-+ * NOTE: the caller must have a valid reference to the task, the
-+ * task must not exit() & deallocate itself prematurely. The
-+ * call is not atomic; no spinlocks may be held.
-+ */
-+static int __set_cpus_allowed_ptr(struct task_struct *p,
-+				  const struct cpumask *new_mask, bool check)
-+{
-+	const struct cpumask *cpu_valid_mask = cpu_active_mask;
-+	int dest_cpu;
-+	unsigned long flags;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+	int ret = 0;
-+
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	rq = __task_access_lock(p, &lock);
-+
-+	if (p->flags & PF_KTHREAD) {
-+		/*
-+		 * Kernel threads are allowed on online && !active CPUs
-+		 */
-+		cpu_valid_mask = cpu_online_mask;
-+	}
-+
-+	/*
-+	 * Must re-check here, to close a race against __kthread_bind(),
-+	 * sched_setaffinity() is not guaranteed to observe the flag.
-+	 */
-+	if (check && (p->flags & PF_NO_SETAFFINITY)) {
-+		ret = -EINVAL;
-+		goto out;
-+	}
-+
-+	if (cpumask_equal(&p->cpus_mask, new_mask))
-+		goto out;
-+
-+	dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
-+	if (dest_cpu >= nr_cpu_ids) {
-+		ret = -EINVAL;
-+		goto out;
-+	}
-+
-+	do_set_cpus_allowed(p, new_mask);
-+
-+	if (p->flags & PF_KTHREAD) {
-+		/*
-+		 * For kernel threads that do indeed end up on online &&
-+		 * !active we want to ensure they are strict per-CPU threads.
-+		 */
-+		WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) &&
-+			!cpumask_intersects(new_mask, cpu_active_mask) &&
-+			p->nr_cpus_allowed != 1);
-+	}
-+
-+	/* Can the task run on the task's current CPU? If so, we're done */
-+	if (cpumask_test_cpu(task_cpu(p), new_mask))
-+		goto out;
-+
-+	if (task_running(p) || p->state == TASK_WAKING) {
-+		struct migration_arg arg = { p, dest_cpu };
-+
-+		/* Need help from migration thread: drop lock and wait. */
-+		__task_access_unlock(p, lock);
-+		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+		stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
-+		return 0;
-+	}
-+	if (task_on_rq_queued(p)) {
-+		/*
-+		 * OK, since we're going to drop the lock immediately
-+		 * afterwards anyway.
-+		 */
-+		update_rq_clock(rq);
-+		rq = move_queued_task(rq, p, dest_cpu);
-+		lock = &rq->lock;
-+	}
-+
-+out:
-+	__task_access_unlock(p, lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+
-+	return ret;
-+}
-+
-+int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	return __set_cpus_allowed_ptr(p, new_mask, false);
-+}
-+EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
-+
-+#else
-+static inline int
-+__set_cpus_allowed_ptr(struct task_struct *p,
-+		       const struct cpumask *new_mask, bool check)
-+{
-+	return set_cpus_allowed_ptr(p, new_mask);
-+}
-+#endif
-+
-+static u64 task_init_deadline(const struct task_struct *p)
-+{
-+	return task_rq(p)->clock + task_deadline_diff(p);
-+}
-+
-+u64 (* task_init_deadline_func_tbl[])(const struct task_struct *p) = {
-+	task_init_deadline,	/* SCHED_NORMAL */
-+	NULL,			/* SCHED_FIFO */
-+	NULL,			/* SCHED_RR */
-+	task_init_deadline,	/* SCHED_BATCH */
-+	NULL,			/* SCHED_ISO */
-+	task_init_deadline	/* SCHED_IDLE */
-+};
-+
-+/*
-+ * sched_setparam() passes in -1 for its policy, to let the functions
-+ * it calls know not to change it.
-+ */
-+#define SETPARAM_POLICY -1
-+
-+static void __setscheduler_params(struct task_struct *p,
-+		const struct sched_attr *attr)
-+{
-+	int old_policy = p->policy;
-+	int policy = attr->sched_policy;
-+
-+	if (policy == SETPARAM_POLICY)
-+		policy = p->policy;
-+
-+	p->policy = policy;
-+
-+	/*
-+	 * allow normal nice value to be set, but will not have any
-+	 * effect on scheduling until the task not SCHED_NORMAL/
-+	 * SCHED_BATCH
-+	 */
-+	p->static_prio = NICE_TO_PRIO(attr->sched_nice);
-+
-+	/*
-+	 * __sched_setscheduler() ensures attr->sched_priority == 0 when
-+	 * !rt_policy. Always setting this ensures that things like
-+	 * getparam()/getattr() don't report silly values for !rt tasks.
-+	 */
-+	p->rt_priority = attr->sched_priority;
-+	p->normal_prio = normal_prio(p);
-+
-+	if (old_policy != policy)
-+		p->deadline = (task_init_deadline_func_tbl[p->policy])?
-+			task_init_deadline_func_tbl[p->policy](p):0ULL;
-+}
-+
-+/* Actually do priority change: must hold rq lock. */
-+static void __setscheduler(struct rq *rq, struct task_struct *p,
-+			   const struct sched_attr *attr, bool keep_boost)
-+{
-+	__setscheduler_params(p, attr);
-+
-+	/*
-+	 * Keep a potential priority boosting if called from
-+	 * sched_setscheduler().
-+	 */
-+	p->prio = normal_prio(p);
-+	if (keep_boost)
-+		p->prio = rt_effective_prio(p, p->prio);
-+	update_task_priodl(p);
-+}
-+
-+/*
-+ * check the target process has a UID that matches the current process's
-+ */
-+static bool check_same_owner(struct task_struct *p)
-+{
-+	const struct cred *cred = current_cred(), *pcred;
-+	bool match;
-+
-+	rcu_read_lock();
-+	pcred = __task_cred(p);
-+	match = (uid_eq(cred->euid, pcred->euid) ||
-+		 uid_eq(cred->euid, pcred->uid));
-+	rcu_read_unlock();
-+	return match;
-+}
-+
-+static int
-+__sched_setscheduler(struct task_struct *p,
-+		     const struct sched_attr *attr, bool user, bool pi)
-+{
-+	const struct sched_attr dl_squash_attr = {
-+		.size		= sizeof(struct sched_attr),
-+		.sched_policy	= SCHED_FIFO,
-+		.sched_nice	= 0,
-+		.sched_priority = 99,
-+	};
-+	int newprio = MAX_RT_PRIO - 1 - attr->sched_priority;
-+	int retval, oldpolicy = -1;
-+	int policy = attr->sched_policy;
-+	unsigned long flags;
-+	struct rq *rq;
-+	int reset_on_fork;
-+	raw_spinlock_t *lock;
-+
-+	/* The pi code expects interrupts enabled */
-+	BUG_ON(pi && in_interrupt());
-+
-+	/*
-+	 * PDS supports SCHED_DEADLINE by squash it as prio 0 SCHED_FIFO
-+	 */
-+	if (unlikely(SCHED_DEADLINE == policy)) {
-+		attr = &dl_squash_attr;
-+		policy = attr->sched_policy;
-+		newprio = MAX_RT_PRIO - 1 - attr->sched_priority;
-+	}
-+recheck:
-+	/* Double check policy once rq lock held */
-+	if (policy < 0) {
-+		reset_on_fork = p->sched_reset_on_fork;
-+		policy = oldpolicy = p->policy;
-+	} else {
-+		reset_on_fork = !!(attr->sched_flags & SCHED_RESET_ON_FORK);
-+
-+		if (policy > SCHED_IDLE)
-+			return -EINVAL;
-+	}
-+
-+	if (attr->sched_flags & ~(SCHED_FLAG_ALL))
-+		return -EINVAL;
-+
-+	/*
-+	 * Valid priorities for SCHED_FIFO and SCHED_RR are
-+	 * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL and
-+	 * SCHED_BATCH and SCHED_IDLE is 0.
-+	 */
-+	if (attr->sched_priority < 0 ||
-+	    (p->mm && attr->sched_priority > MAX_USER_RT_PRIO - 1) ||
-+	    (!p->mm && attr->sched_priority > MAX_RT_PRIO - 1))
-+		return -EINVAL;
-+	if ((SCHED_RR == policy || SCHED_FIFO == policy) !=
-+	    (attr->sched_priority != 0))
-+		return -EINVAL;
-+
-+	/*
-+	 * Allow unprivileged RT tasks to decrease priority:
-+	 */
-+	if (user && !capable(CAP_SYS_NICE)) {
-+		if (SCHED_FIFO == policy || SCHED_RR == policy) {
-+			unsigned long rlim_rtprio =
-+					task_rlimit(p, RLIMIT_RTPRIO);
-+
-+			/* Can't set/change the rt policy */
-+			if (policy != p->policy && !rlim_rtprio)
-+				return -EPERM;
-+
-+			/* Can't increase priority */
-+			if (attr->sched_priority > p->rt_priority &&
-+			    attr->sched_priority > rlim_rtprio)
-+				return -EPERM;
-+		}
-+
-+		/* Can't change other user's priorities */
-+		if (!check_same_owner(p))
-+			return -EPERM;
-+
-+		/* Normal users shall not reset the sched_reset_on_fork flag */
-+		if (p->sched_reset_on_fork && !reset_on_fork)
-+			return -EPERM;
-+	}
-+
-+	if (user) {
-+		retval = security_task_setscheduler(p);
-+		if (retval)
-+			return retval;
-+	}
-+
-+	if (pi)
-+		cpuset_read_lock();
-+
-+	/*
-+	 * Make sure no PI-waiters arrive (or leave) while we are
-+	 * changing the priority of the task:
-+	 */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+
-+	/*
-+	 * To be able to change p->policy safely, task_access_lock()
-+	 * must be called.
-+	 * IF use task_access_lock() here:
-+	 * For the task p which is not running, reading rq->stop is
-+	 * racy but acceptable as ->stop doesn't change much.
-+	 * An enhancemnet can be made to read rq->stop saftly.
-+	 */
-+	rq = __task_access_lock(p, &lock);
-+
-+	/*
-+	 * Changing the policy of the stop threads its a very bad idea
-+	 */
-+	if (p == rq->stop) {
-+		retval = -EINVAL;
-+		goto unlock;
-+	}
-+
-+	/*
-+	 * If not changing anything there's no need to proceed further:
-+	 */
-+	if (unlikely(policy == p->policy)) {
-+		if (rt_policy(policy) && attr->sched_priority != p->rt_priority)
-+			goto change;
-+		if (!rt_policy(policy) &&
-+		    NICE_TO_PRIO(attr->sched_nice) != p->static_prio)
-+			goto change;
-+
-+		p->sched_reset_on_fork = reset_on_fork;
-+		retval = 0;
-+		goto unlock;
-+	}
-+change:
-+
-+	/* Re-check policy now with rq lock held */
-+	if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
-+		policy = oldpolicy = -1;
-+		__task_access_unlock(p, lock);
-+		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+		if (pi)
-+			cpuset_read_unlock();
-+		goto recheck;
-+	}
-+
-+	p->sched_reset_on_fork = reset_on_fork;
-+
-+	if (pi) {
-+		/*
-+		 * Take priority boosted tasks into account. If the new
-+		 * effective priority is unchanged, we just store the new
-+		 * normal parameters and do not touch the scheduler class and
-+		 * the runqueue. This will be done when the task deboost
-+		 * itself.
-+		 */
-+		if (rt_effective_prio(p, newprio) == p->prio) {
-+			__setscheduler_params(p, attr);
-+			retval = 0;
-+			goto unlock;
-+		}
-+	}
-+
-+	__setscheduler(rq, p, attr, pi);
-+
-+	check_task_changed(rq, p);
-+
-+	/* Avoid rq from going away on us: */
-+	preempt_disable();
-+	__task_access_unlock(p, lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+
-+	if (pi) {
-+		cpuset_read_unlock();
-+		rt_mutex_adjust_pi(p);
-+	}
-+
-+	preempt_enable();
-+
-+	return 0;
-+
-+unlock:
-+	__task_access_unlock(p, lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+	if (pi)
-+		cpuset_read_unlock();
-+	return retval;
-+}
-+
-+static int _sched_setscheduler(struct task_struct *p, int policy,
-+			       const struct sched_param *param, bool check)
-+{
-+	struct sched_attr attr = {
-+		.sched_policy   = policy,
-+		.sched_priority = param->sched_priority,
-+		.sched_nice     = PRIO_TO_NICE(p->static_prio),
-+	};
-+
-+	/* Fixup the legacy SCHED_RESET_ON_FORK hack. */
-+	if ((policy != SETPARAM_POLICY) && (policy & SCHED_RESET_ON_FORK)) {
-+		attr.sched_flags |= SCHED_FLAG_RESET_ON_FORK;
-+		policy &= ~SCHED_RESET_ON_FORK;
-+		attr.sched_policy = policy;
-+	}
-+
-+	return __sched_setscheduler(p, &attr, check, true);
-+}
-+
-+/**
-+ * sched_setscheduler - change the scheduling policy and/or RT priority of a thread.
-+ * @p: the task in question.
-+ * @policy: new policy.
-+ * @param: structure containing the new RT priority.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ *
-+ * NOTE that the task may be already dead.
-+ */
-+int sched_setscheduler(struct task_struct *p, int policy,
-+		       const struct sched_param *param)
-+{
-+	return _sched_setscheduler(p, policy, param, true);
-+}
-+
-+EXPORT_SYMBOL_GPL(sched_setscheduler);
-+
-+int sched_setattr(struct task_struct *p, const struct sched_attr *attr)
-+{
-+	return __sched_setscheduler(p, attr, true, true);
-+}
-+EXPORT_SYMBOL_GPL(sched_setattr);
-+
-+int sched_setattr_nocheck(struct task_struct *p, const struct sched_attr *attr)
-+{
-+	return __sched_setscheduler(p, attr, false, true);
-+}
-+
-+/**
-+ * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace.
-+ * @p: the task in question.
-+ * @policy: new policy.
-+ * @param: structure containing the new RT priority.
-+ *
-+ * Just like sched_setscheduler, only don't bother checking if the
-+ * current context has permission.  For example, this is needed in
-+ * stop_machine(): we create temporary high priority worker threads,
-+ * but our caller might not have that capability.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+int sched_setscheduler_nocheck(struct task_struct *p, int policy,
-+			       const struct sched_param *param)
-+{
-+	return _sched_setscheduler(p, policy, param, false);
-+}
-+EXPORT_SYMBOL_GPL(sched_setscheduler_nocheck);
-+
-+static int
-+do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
-+{
-+	struct sched_param lparam;
-+	struct task_struct *p;
-+	int retval;
-+
-+	if (!param || pid < 0)
-+		return -EINVAL;
-+	if (copy_from_user(&lparam, param, sizeof(struct sched_param)))
-+		return -EFAULT;
-+
-+	rcu_read_lock();
-+	retval = -ESRCH;
-+	p = find_process_by_pid(pid);
-+	if (likely(p))
-+		get_task_struct(p);
-+	rcu_read_unlock();
-+
-+	if (likely(p)) {
-+		retval = sched_setscheduler(p, policy, &lparam);
-+		put_task_struct(p);
-+	}
-+
-+	return retval;
-+}
-+
-+/*
-+ * Mimics kernel/events/core.c perf_copy_attr().
-+ */
-+static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *attr)
-+{
-+	u32 size;
-+	int ret;
-+
-+	/* Zero the full structure, so that a short copy will be nice: */
-+	memset(attr, 0, sizeof(*attr));
-+
-+	ret = get_user(size, &uattr->size);
-+	if (ret)
-+		return ret;
-+
-+	/* ABI compatibility quirk: */
-+	if (!size)
-+		size = SCHED_ATTR_SIZE_VER0;
-+
-+	if (size < SCHED_ATTR_SIZE_VER0 || size > PAGE_SIZE)
-+		goto err_size;
-+
-+	ret = copy_struct_from_user(attr, sizeof(*attr), uattr, size);
-+	if (ret) {
-+		if (ret == -E2BIG)
-+			goto err_size;
-+		return ret;
-+	}
-+
-+	/*
-+	 * XXX: Do we want to be lenient like existing syscalls; or do we want
-+	 * to be strict and return an error on out-of-bounds values?
-+	 */
-+	attr->sched_nice = clamp(attr->sched_nice, -20, 19);
-+
-+	/* sched/core.c uses zero here but we already know ret is zero */
-+	return 0;
-+
-+err_size:
-+	put_user(sizeof(*attr), &uattr->size);
-+	return -E2BIG;
-+}
-+
-+/**
-+ * sys_sched_setscheduler - set/change the scheduler policy and RT priority
-+ * @pid: the pid in question.
-+ * @policy: new policy.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ * @param: structure containing the new RT priority.
-+ */
-+SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy, struct sched_param __user *, param)
-+{
-+	if (policy < 0)
-+		return -EINVAL;
-+
-+	return do_sched_setscheduler(pid, policy, param);
-+}
-+
-+/**
-+ * sys_sched_setparam - set/change the RT priority of a thread
-+ * @pid: the pid in question.
-+ * @param: structure containing the new RT priority.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
-+{
-+	return do_sched_setscheduler(pid, SETPARAM_POLICY, param);
-+}
-+
-+/**
-+ * sys_sched_setattr - same as above, but with extended sched_attr
-+ * @pid: the pid in question.
-+ * @uattr: structure containing the extended parameters.
-+ */
-+SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr,
-+			       unsigned int, flags)
-+{
-+	struct sched_attr attr;
-+	struct task_struct *p;
-+	int retval;
-+
-+	if (!uattr || pid < 0 || flags)
-+		return -EINVAL;
-+
-+	retval = sched_copy_attr(uattr, &attr);
-+	if (retval)
-+		return retval;
-+
-+	if ((int)attr.sched_policy < 0)
-+		return -EINVAL;
-+
-+	rcu_read_lock();
-+	retval = -ESRCH;
-+	p = find_process_by_pid(pid);
-+	if (p != NULL)
-+		retval = sched_setattr(p, &attr);
-+	rcu_read_unlock();
-+
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_getscheduler - get the policy (scheduling class) of a thread
-+ * @pid: the pid in question.
-+ *
-+ * Return: On success, the policy of the thread. Otherwise, a negative error
-+ * code.
-+ */
-+SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
-+{
-+	struct task_struct *p;
-+	int retval = -EINVAL;
-+
-+	if (pid < 0)
-+		goto out_nounlock;
-+
-+	retval = -ESRCH;
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	if (p) {
-+		retval = security_task_getscheduler(p);
-+		if (!retval)
-+			retval = p->policy;
-+	}
-+	rcu_read_unlock();
-+
-+out_nounlock:
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_getscheduler - get the RT priority of a thread
-+ * @pid: the pid in question.
-+ * @param: structure containing the RT priority.
-+ *
-+ * Return: On success, 0 and the RT priority is in @param. Otherwise, an error
-+ * code.
-+ */
-+SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
-+{
-+	struct sched_param lp = { .sched_priority = 0 };
-+	struct task_struct *p;
-+	int retval = -EINVAL;
-+
-+	if (!param || pid < 0)
-+		goto out_nounlock;
-+
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	retval = -ESRCH;
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	if (task_has_rt_policy(p))
-+		lp.sched_priority = p->rt_priority;
-+	rcu_read_unlock();
-+
-+	/*
-+	 * This one might sleep, we cannot do it with a spinlock held ...
-+	 */
-+	retval = copy_to_user(param, &lp, sizeof(*param)) ? -EFAULT : 0;
-+
-+out_nounlock:
-+	return retval;
-+
-+out_unlock:
-+	rcu_read_unlock();
-+	return retval;
-+}
-+
-+/*
-+ * Copy the kernel size attribute structure (which might be larger
-+ * than what user-space knows about) to user-space.
-+ *
-+ * Note that all cases are valid: user-space buffer can be larger or
-+ * smaller than the kernel-space buffer. The usual case is that both
-+ * have the same size.
-+ */
-+static int
-+sched_attr_copy_to_user(struct sched_attr __user *uattr,
-+			struct sched_attr *kattr,
-+			unsigned int usize)
-+{
-+	unsigned int ksize = sizeof(*kattr);
-+
-+	if (!access_ok(uattr, usize))
-+		return -EFAULT;
-+
-+	/*
-+	 * sched_getattr() ABI forwards and backwards compatibility:
-+	 *
-+	 * If usize == ksize then we just copy everything to user-space and all is good.
-+	 *
-+	 * If usize < ksize then we only copy as much as user-space has space for,
-+	 * this keeps ABI compatibility as well. We skip the rest.
-+	 *
-+	 * If usize > ksize then user-space is using a newer version of the ABI,
-+	 * which part the kernel doesn't know about. Just ignore it - tooling can
-+	 * detect the kernel's knowledge of attributes from the attr->size value
-+	 * which is set to ksize in this case.
-+	 */
-+	kattr->size = min(usize, ksize);
-+
-+	if (copy_to_user(uattr, kattr, kattr->size))
-+		return -EFAULT;
-+
-+	return 0;
-+}
-+
-+/**
-+ * sys_sched_getattr - similar to sched_getparam, but with sched_attr
-+ * @pid: the pid in question.
-+ * @uattr: structure containing the extended parameters.
-+ * @usize: sizeof(attr) for fwd/bwd comp.
-+ * @flags: for future extension.
-+ */
-+SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
-+		unsigned int, usize, unsigned int, flags)
-+{
-+	struct sched_attr kattr = { };
-+	struct task_struct *p;
-+	int retval;
-+
-+	if (!uattr || pid < 0 || usize > PAGE_SIZE ||
-+	    usize < SCHED_ATTR_SIZE_VER0 || flags)
-+		return -EINVAL;
-+
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	retval = -ESRCH;
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	kattr.sched_policy = p->policy;
-+	if (rt_task(p))
-+		kattr.sched_priority = p->rt_priority;
-+	else
-+		kattr.sched_nice = task_nice(p);
-+
-+#ifdef CONFIG_UCLAMP_TASK
-+	kattr.sched_util_min = p->uclamp_req[UCLAMP_MIN].value;
-+	kattr.sched_util_max = p->uclamp_req[UCLAMP_MAX].value;
-+#endif
-+
-+	rcu_read_unlock();
-+
-+	return sched_attr_copy_to_user(uattr, &kattr, usize);
-+
-+out_unlock:
-+	rcu_read_unlock();
-+	return retval;
-+}
-+
-+long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
-+{
-+	cpumask_var_t cpus_mask, new_mask;
-+	struct task_struct *p;
-+	int retval;
-+
-+	get_online_cpus();
-+	rcu_read_lock();
-+
-+	p = find_process_by_pid(pid);
-+	if (!p) {
-+		rcu_read_unlock();
-+		put_online_cpus();
-+		return -ESRCH;
-+	}
-+
-+	/* Prevent p going away */
-+	get_task_struct(p);
-+	rcu_read_unlock();
-+
-+	if (p->flags & PF_NO_SETAFFINITY) {
-+		retval = -EINVAL;
-+		goto out_put_task;
-+	}
-+	if (!alloc_cpumask_var(&cpus_mask, GFP_KERNEL)) {
-+		retval = -ENOMEM;
-+		goto out_put_task;
-+	}
-+	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) {
-+		retval = -ENOMEM;
-+		goto out_free_cpus_allowed;
-+	}
-+	retval = -EPERM;
-+	if (!check_same_owner(p)) {
-+		rcu_read_lock();
-+		if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
-+			rcu_read_unlock();
-+			goto out_unlock;
-+		}
-+		rcu_read_unlock();
-+	}
-+
-+	retval = security_task_setscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	cpuset_cpus_allowed(p, cpus_mask);
-+	cpumask_and(new_mask, in_mask, cpus_mask);
-+again:
-+	retval = __set_cpus_allowed_ptr(p, new_mask, true);
-+
-+	if (!retval) {
-+		cpuset_cpus_allowed(p, cpus_mask);
-+		if (!cpumask_subset(new_mask, cpus_mask)) {
-+			/*
-+			 * We must have raced with a concurrent cpuset
-+			 * update. Just reset the cpus_mask to the
-+			 * cpuset's cpus_mask
-+			 */
-+			cpumask_copy(new_mask, cpus_mask);
-+			goto again;
-+		}
-+	}
-+out_unlock:
-+	free_cpumask_var(new_mask);
-+out_free_cpus_allowed:
-+	free_cpumask_var(cpus_mask);
-+out_put_task:
-+	put_task_struct(p);
-+	put_online_cpus();
-+	return retval;
-+}
-+
-+static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
-+			     struct cpumask *new_mask)
-+{
-+	if (len < cpumask_size())
-+		cpumask_clear(new_mask);
-+	else if (len > cpumask_size())
-+		len = cpumask_size();
-+
-+	return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0;
-+}
-+
-+/**
-+ * sys_sched_setaffinity - set the CPU affinity of a process
-+ * @pid: pid of the process
-+ * @len: length in bytes of the bitmask pointed to by user_mask_ptr
-+ * @user_mask_ptr: user-space pointer to the new CPU mask
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
-+		unsigned long __user *, user_mask_ptr)
-+{
-+	cpumask_var_t new_mask;
-+	int retval;
-+
-+	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
-+		return -ENOMEM;
-+
-+	retval = get_user_cpu_mask(user_mask_ptr, len, new_mask);
-+	if (retval == 0)
-+		retval = sched_setaffinity(pid, new_mask);
-+	free_cpumask_var(new_mask);
-+	return retval;
-+}
-+
-+long sched_getaffinity(pid_t pid, cpumask_t *mask)
-+{
-+	struct task_struct *p;
-+	raw_spinlock_t *lock;
-+	unsigned long flags;
-+	int retval;
-+
-+	rcu_read_lock();
-+
-+	retval = -ESRCH;
-+	p = find_process_by_pid(pid);
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	task_access_lock_irqsave(p, &lock, &flags);
-+	cpumask_and(mask, &p->cpus_mask, cpu_active_mask);
-+	task_access_unlock_irqrestore(p, lock, &flags);
-+
-+out_unlock:
-+	rcu_read_unlock();
-+
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_getaffinity - get the CPU affinity of a process
-+ * @pid: pid of the process
-+ * @len: length in bytes of the bitmask pointed to by user_mask_ptr
-+ * @user_mask_ptr: user-space pointer to hold the current CPU mask
-+ *
-+ * Return: size of CPU mask copied to user_mask_ptr on success. An
-+ * error code otherwise.
-+ */
-+SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
-+		unsigned long __user *, user_mask_ptr)
-+{
-+	int ret;
-+	cpumask_var_t mask;
-+
-+	if ((len * BITS_PER_BYTE) < nr_cpu_ids)
-+		return -EINVAL;
-+	if (len & (sizeof(unsigned long)-1))
-+		return -EINVAL;
-+
-+	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
-+		return -ENOMEM;
-+
-+	ret = sched_getaffinity(pid, mask);
-+	if (ret == 0) {
-+		unsigned int retlen = min_t(size_t, len, cpumask_size());
-+
-+		if (copy_to_user(user_mask_ptr, mask, retlen))
-+			ret = -EFAULT;
-+		else
-+			ret = retlen;
-+	}
-+	free_cpumask_var(mask);
-+
-+	return ret;
-+}
-+
-+/**
-+ * sys_sched_yield - yield the current processor to other threads.
-+ *
-+ * This function yields the current CPU to other tasks. It does this by
-+ * scheduling away the current task. If it still has the earliest deadline
-+ * it will be scheduled again as the next task.
-+ *
-+ * Return: 0.
-+ */
-+static void do_sched_yield(void)
-+{
-+	struct rq *rq;
-+	struct rq_flags rf;
-+
-+	if (!sched_yield_type)
-+		return;
-+
-+	rq = this_rq_lock_irq(&rf);
-+
-+	if (sched_yield_type > 1) {
-+		time_slice_expired(current, rq);
-+		requeue_task(current, rq);
-+	}
-+	schedstat_inc(rq->yld_count);
-+
-+	/*
-+	 * Since we are going to call schedule() anyway, there's
-+	 * no need to preempt or enable interrupts:
-+	 */
-+	preempt_disable();
-+	raw_spin_unlock(&rq->lock);
-+	sched_preempt_enable_no_resched();
-+
-+	schedule();
-+}
-+
-+SYSCALL_DEFINE0(sched_yield)
-+{
-+	do_sched_yield();
-+	return 0;
-+}
-+
-+#ifndef CONFIG_PREEMPTION
-+int __sched _cond_resched(void)
-+{
-+	if (should_resched(0)) {
-+		preempt_schedule_common();
-+		return 1;
-+	}
-+	rcu_all_qs();
-+	return 0;
-+}
-+EXPORT_SYMBOL(_cond_resched);
-+#endif
-+
-+/*
-+ * __cond_resched_lock() - if a reschedule is pending, drop the given lock,
-+ * call schedule, and on return reacquire the lock.
-+ *
-+ * This works OK both with and without CONFIG_PREEMPTION.  We do strange low-level
-+ * operations here to prevent schedule() from being called twice (once via
-+ * spin_unlock(), once by hand).
-+ */
-+int __cond_resched_lock(spinlock_t *lock)
-+{
-+	int resched = should_resched(PREEMPT_LOCK_OFFSET);
-+	int ret = 0;
-+
-+	lockdep_assert_held(lock);
-+
-+	if (spin_needbreak(lock) || resched) {
-+		spin_unlock(lock);
-+		if (resched)
-+			preempt_schedule_common();
-+		else
-+			cpu_relax();
-+		ret = 1;
-+		spin_lock(lock);
-+	}
-+	return ret;
-+}
-+EXPORT_SYMBOL(__cond_resched_lock);
-+
-+/**
-+ * yield - yield the current processor to other threads.
-+ *
-+ * Do not ever use this function, there's a 99% chance you're doing it wrong.
-+ *
-+ * The scheduler is at all times free to pick the calling task as the most
-+ * eligible task to run, if removing the yield() call from your code breaks
-+ * it, its already broken.
-+ *
-+ * Typical broken usage is:
-+ *
-+ * while (!event)
-+ * 	yield();
-+ *
-+ * where one assumes that yield() will let 'the other' process run that will
-+ * make event true. If the current task is a SCHED_FIFO task that will never
-+ * happen. Never use yield() as a progress guarantee!!
-+ *
-+ * If you want to use yield() to wait for something, use wait_event().
-+ * If you want to use yield() to be 'nice' for others, use cond_resched().
-+ * If you still want to use yield(), do not!
-+ */
-+void __sched yield(void)
-+{
-+	set_current_state(TASK_RUNNING);
-+	do_sched_yield();
-+}
-+EXPORT_SYMBOL(yield);
-+
-+/**
-+ * yield_to - yield the current processor to another thread in
-+ * your thread group, or accelerate that thread toward the
-+ * processor it's on.
-+ * @p: target task
-+ * @preempt: whether task preemption is allowed or not
-+ *
-+ * It's the caller's job to ensure that the target task struct
-+ * can't go away on us before we can do any checks.
-+ *
-+ * In PDS, yield_to is not supported.
-+ *
-+ * Return:
-+ *	true (>0) if we indeed boosted the target task.
-+ *	false (0) if we failed to boost the target.
-+ *	-ESRCH if there's no task to yield to.
-+ */
-+int __sched yield_to(struct task_struct *p, bool preempt)
-+{
-+	return 0;
-+}
-+EXPORT_SYMBOL_GPL(yield_to);
-+
-+int io_schedule_prepare(void)
-+{
-+	int old_iowait = current->in_iowait;
-+
-+	current->in_iowait = 1;
-+	blk_schedule_flush_plug(current);
-+
-+	return old_iowait;
-+}
-+
-+void io_schedule_finish(int token)
-+{
-+	current->in_iowait = token;
-+}
-+
-+/*
-+ * This task is about to go to sleep on IO.  Increment rq->nr_iowait so
-+ * that process accounting knows that this is a task in IO wait state.
-+ *
-+ * But don't do that if it is a deliberate, throttling IO wait (this task
-+ * has set its backing_dev_info: the queue against which it should throttle)
-+ */
-+
-+long __sched io_schedule_timeout(long timeout)
-+{
-+	int token;
-+	long ret;
-+
-+	token = io_schedule_prepare();
-+	ret = schedule_timeout(timeout);
-+	io_schedule_finish(token);
-+
-+	return ret;
-+}
-+EXPORT_SYMBOL(io_schedule_timeout);
-+
-+void io_schedule(void)
-+{
-+	int token;
-+
-+	token = io_schedule_prepare();
-+	schedule();
-+	io_schedule_finish(token);
-+}
-+EXPORT_SYMBOL(io_schedule);
-+
-+/**
-+ * sys_sched_get_priority_max - return maximum RT priority.
-+ * @policy: scheduling class.
-+ *
-+ * Return: On success, this syscall returns the maximum
-+ * rt_priority that can be used by a given scheduling class.
-+ * On failure, a negative error code is returned.
-+ */
-+SYSCALL_DEFINE1(sched_get_priority_max, int, policy)
-+{
-+	int ret = -EINVAL;
-+
-+	switch (policy) {
-+	case SCHED_FIFO:
-+	case SCHED_RR:
-+		ret = MAX_USER_RT_PRIO-1;
-+		break;
-+	case SCHED_NORMAL:
-+	case SCHED_BATCH:
-+	case SCHED_ISO:
-+	case SCHED_IDLE:
-+		ret = 0;
-+		break;
-+	}
-+	return ret;
-+}
-+
-+/**
-+ * sys_sched_get_priority_min - return minimum RT priority.
-+ * @policy: scheduling class.
-+ *
-+ * Return: On success, this syscall returns the minimum
-+ * rt_priority that can be used by a given scheduling class.
-+ * On failure, a negative error code is returned.
-+ */
-+SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
-+{
-+	int ret = -EINVAL;
-+
-+	switch (policy) {
-+	case SCHED_FIFO:
-+	case SCHED_RR:
-+		ret = 1;
-+		break;
-+	case SCHED_NORMAL:
-+	case SCHED_BATCH:
-+	case SCHED_ISO:
-+	case SCHED_IDLE:
-+		ret = 0;
-+		break;
-+	}
-+	return ret;
-+}
-+
-+static int sched_rr_get_interval(pid_t pid, struct timespec64 *t)
-+{
-+	struct task_struct *p;
-+	int retval;
-+
-+	if (pid < 0)
-+		return -EINVAL;
-+
-+	retval = -ESRCH;
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+	rcu_read_unlock();
-+
-+	*t = ns_to_timespec64(MS_TO_NS(rr_interval));
-+	return 0;
-+
-+out_unlock:
-+	rcu_read_unlock();
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_rr_get_interval - return the default timeslice of a process.
-+ * @pid: pid of the process.
-+ * @interval: userspace pointer to the timeslice value.
-+ *
-+ *
-+ * Return: On success, 0 and the timeslice is in @interval. Otherwise,
-+ * an error code.
-+ */
-+SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
-+		struct __kernel_timespec __user *, interval)
-+{
-+	struct timespec64 t;
-+	int retval = sched_rr_get_interval(pid, &t);
-+
-+	if (retval == 0)
-+		retval = put_timespec64(&t, interval);
-+
-+	return retval;
-+}
-+
-+#ifdef CONFIG_COMPAT_32BIT_TIME
-+SYSCALL_DEFINE2(sched_rr_get_interval_time32, pid_t, pid,
-+		struct old_timespec32 __user *, interval)
-+{
-+	struct timespec64 t;
-+	int retval = sched_rr_get_interval(pid, &t);
-+
-+	if (retval == 0)
-+		retval = put_old_timespec32(&t, interval);
-+	return retval;
-+}
-+#endif
-+
-+void sched_show_task(struct task_struct *p)
-+{
-+	unsigned long free = 0;
-+	int ppid;
-+
-+	if (!try_get_task_stack(p))
-+		return;
-+
-+	printk(KERN_INFO "%-15.15s %c", p->comm, task_state_to_char(p));
-+
-+	if (p->state == TASK_RUNNING)
-+		printk(KERN_CONT "  running task    ");
-+#ifdef CONFIG_DEBUG_STACK_USAGE
-+	free = stack_not_used(p);
-+#endif
-+	ppid = 0;
-+	rcu_read_lock();
-+	if (pid_alive(p))
-+		ppid = task_pid_nr(rcu_dereference(p->real_parent));
-+	rcu_read_unlock();
-+	printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free,
-+		task_pid_nr(p), ppid,
-+		(unsigned long)task_thread_info(p)->flags);
-+
-+	print_worker_info(KERN_INFO, p);
-+	show_stack(p, NULL);
-+	put_task_stack(p);
-+}
-+EXPORT_SYMBOL_GPL(sched_show_task);
-+
-+static inline bool
-+state_filter_match(unsigned long state_filter, struct task_struct *p)
-+{
-+	/* no filter, everything matches */
-+	if (!state_filter)
-+		return true;
-+
-+	/* filter, but doesn't match */
-+	if (!(p->state & state_filter))
-+		return false;
-+
-+	/*
-+	 * When looking for TASK_UNINTERRUPTIBLE skip TASK_IDLE (allows
-+	 * TASK_KILLABLE).
-+	 */
-+	if (state_filter == TASK_UNINTERRUPTIBLE && p->state == TASK_IDLE)
-+		return false;
-+
-+	return true;
-+}
-+
-+
-+void show_state_filter(unsigned long state_filter)
-+{
-+	struct task_struct *g, *p;
-+
-+#if BITS_PER_LONG == 32
-+	printk(KERN_INFO
-+		"  task                PC stack   pid father\n");
-+#else
-+	printk(KERN_INFO
-+		"  task                        PC stack   pid father\n");
-+#endif
-+	rcu_read_lock();
-+	for_each_process_thread(g, p) {
-+		/*
-+		 * reset the NMI-timeout, listing all files on a slow
-+		 * console might take a lot of time:
-+		 * Also, reset softlockup watchdogs on all CPUs, because
-+		 * another CPU might be blocked waiting for us to process
-+		 * an IPI.
-+		 */
-+		touch_nmi_watchdog();
-+		touch_all_softlockup_watchdogs();
-+		if (state_filter_match(state_filter, p))
-+			sched_show_task(p);
-+	}
-+
-+#ifdef CONFIG_SCHED_DEBUG
-+	/* PDS TODO: should support this
-+	if (!state_filter)
-+		sysrq_sched_debug_show();
-+	*/
-+#endif
-+	rcu_read_unlock();
-+	/*
-+	 * Only show locks if all tasks are dumped:
-+	 */
-+	if (!state_filter)
-+		debug_show_all_locks();
-+}
-+
-+void dump_cpu_task(int cpu)
-+{
-+	pr_info("Task dump for CPU %d:\n", cpu);
-+	sched_show_task(cpu_curr(cpu));
-+}
-+
-+/**
-+ * init_idle - set up an idle thread for a given CPU
-+ * @idle: task in question
-+ * @cpu: cpu the idle task belongs to
-+ *
-+ * NOTE: this function does not set the idle thread's NEED_RESCHED
-+ * flag, to make booting more robust.
-+ */
-+void init_idle(struct task_struct *idle, int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	raw_spin_lock_irqsave(&idle->pi_lock, flags);
-+	raw_spin_lock(&rq->lock);
-+	update_rq_clock(rq);
-+
-+	idle->last_ran = rq->clock_task;
-+	idle->state = TASK_RUNNING;
-+	idle->flags |= PF_IDLE;
-+	/* Setting prio to illegal value shouldn't matter when never queued */
-+	idle->prio = PRIO_LIMIT;
-+	idle->deadline = rq_clock(rq) + task_deadline_diff(idle);
-+	update_task_priodl(idle);
-+
-+	kasan_unpoison_task_stack(idle);
-+
-+#ifdef CONFIG_SMP
-+	/*
-+	 * It's possible that init_idle() gets called multiple times on a task,
-+	 * in that case do_set_cpus_allowed() will not do the right thing.
-+	 *
-+	 * And since this is boot we can forgo the serialisation.
-+	 */
-+	set_cpus_allowed_common(idle, cpumask_of(cpu));
-+#endif
-+
-+	/* Silence PROVE_RCU */
-+	rcu_read_lock();
-+	__set_task_cpu(idle, cpu);
-+	rcu_read_unlock();
-+
-+	rq->idle = idle;
-+	rcu_assign_pointer(rq->curr, idle);
-+	idle->on_cpu = 1;
-+
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock_irqrestore(&idle->pi_lock, flags);
-+
-+	/* Set the preempt count _outside_ the spinlocks! */
-+	init_idle_preempt_count(idle, cpu);
-+
-+	ftrace_graph_init_idle_task(idle, cpu);
-+	vtime_init_idle(idle, cpu);
-+#ifdef CONFIG_SMP
-+	sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
-+#endif
-+}
-+
-+void resched_cpu(int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	raw_spin_lock_irqsave(&rq->lock, flags);
-+	if (cpu_online(cpu) || cpu == smp_processor_id())
-+		resched_curr(cpu_rq(cpu));
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+}
-+
-+static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task)
-+{
-+	struct wake_q_node *node = &task->wake_q;
-+
-+	/*
-+	 * Atomically grab the task, if ->wake_q is !nil already it means
-+	 * its already queued (either by us or someone else) and will get the
-+	 * wakeup due to that.
-+	 *
-+	 * In order to ensure that a pending wakeup will observe our pending
-+	 * state, even in the failed case, an explicit smp_mb() must be used.
-+	 */
-+	smp_mb__before_atomic();
-+	if (unlikely(cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL)))
-+		return false;
-+
-+	/*
-+	 * The head is context local, there can be no concurrency.
-+	 */
-+	*head->lastp = node;
-+	head->lastp = &node->next;
-+	return true;
-+}
-+
-+/**
-+ * wake_q_add() - queue a wakeup for 'later' waking.
-+ * @head: the wake_q_head to add @task to
-+ * @task: the task to queue for 'later' wakeup
-+ *
-+ * Queue a task for later wakeup, most likely by the wake_up_q() call in the
-+ * same context, _HOWEVER_ this is not guaranteed, the wakeup can come
-+ * instantly.
-+ *
-+ * This function must be used as-if it were wake_up_process(); IOW the task
-+ * must be ready to be woken at this location.
-+ */
-+void wake_q_add(struct wake_q_head *head, struct task_struct *task)
-+{
-+	if (__wake_q_add(head, task))
-+		get_task_struct(task);
-+}
-+
-+/**
-+ * wake_q_add_safe() - safely queue a wakeup for 'later' waking.
-+ * @head: the wake_q_head to add @task to
-+ * @task: the task to queue for 'later' wakeup
-+ *
-+ * Queue a task for later wakeup, most likely by the wake_up_q() call in the
-+ * same context, _HOWEVER_ this is not guaranteed, the wakeup can come
-+ * instantly.
-+ *
-+ * This function must be used as-if it were wake_up_process(); IOW the task
-+ * must be ready to be woken at this location.
-+ *
-+ * This function is essentially a task-safe equivalent to wake_q_add(). Callers
-+ * that already hold reference to @task can call the 'safe' version and trust
-+ * wake_q to do the right thing depending whether or not the @task is already
-+ * queued for wakeup.
-+ */
-+void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task)
-+{
-+	if (!__wake_q_add(head, task))
-+		put_task_struct(task);
-+}
-+
-+void wake_up_q(struct wake_q_head *head)
-+{
-+	struct wake_q_node *node = head->first;
-+
-+	while (node != WAKE_Q_TAIL) {
-+		struct task_struct *task;
-+
-+		task = container_of(node, struct task_struct, wake_q);
-+		BUG_ON(!task);
-+		/* task can safely be re-inserted now: */
-+		node = node->next;
-+		task->wake_q.next = NULL;
-+
-+		/*
-+		 * wake_up_process() executes a full barrier, which pairs with
-+		 * the queueing in wake_q_add() so as not to miss wakeups.
-+		 */
-+		wake_up_process(task);
-+		put_task_struct(task);
-+	}
-+}
-+
-+#ifdef CONFIG_SMP
-+
-+int cpuset_cpumask_can_shrink(const struct cpumask __maybe_unused *cur,
-+			      const struct cpumask __maybe_unused *trial)
-+{
-+	return 1;
-+}
-+
-+int task_can_attach(struct task_struct *p,
-+		    const struct cpumask *cs_cpus_allowed)
-+{
-+	int ret = 0;
-+
-+	/*
-+	 * Kthreads which disallow setaffinity shouldn't be moved
-+	 * to a new cpuset; we don't want to change their CPU
-+	 * affinity and isolating such threads by their set of
-+	 * allowed nodes is unnecessary.  Thus, cpusets are not
-+	 * applicable for such threads.  This prevents checking for
-+	 * success of set_cpus_allowed_ptr() on all attached tasks
-+	 * before cpus_mask may be changed.
-+	 */
-+	if (p->flags & PF_NO_SETAFFINITY)
-+		ret = -EINVAL;
-+
-+	return ret;
-+}
-+
-+static bool sched_smp_initialized __read_mostly;
-+
-+#ifdef CONFIG_NO_HZ_COMMON
-+void nohz_balance_enter_idle(int cpu)
-+{
-+}
-+
-+void select_nohz_load_balancer(int stop_tick)
-+{
-+}
-+
-+void set_cpu_sd_state_idle(void) {}
-+
-+/*
-+ * In the semi idle case, use the nearest busy CPU for migrating timers
-+ * from an idle CPU.  This is good for power-savings.
-+ *
-+ * We don't do similar optimization for completely idle system, as
-+ * selecting an idle CPU will add more delays to the timers than intended
-+ * (as that CPU's timer base may not be uptodate wrt jiffies etc).
-+ */
-+int get_nohz_timer_target(void)
-+{
-+	int i, cpu = smp_processor_id();
-+	struct cpumask *mask;
-+
-+	if (!idle_cpu(cpu) && housekeeping_cpu(cpu, HK_FLAG_TIMER))
-+		return cpu;
-+
-+	for (mask = &(per_cpu(sched_cpu_affinity_chk_masks, cpu)[0]);
-+	     mask < per_cpu(sched_cpu_affinity_chk_end_masks, cpu); mask++)
-+		for_each_cpu(i, mask)
-+			if (!idle_cpu(i) && housekeeping_cpu(i, HK_FLAG_TIMER))
-+				return i;
-+
-+	if (!housekeeping_cpu(cpu, HK_FLAG_TIMER))
-+		cpu = housekeeping_any_cpu(HK_FLAG_TIMER);
-+
-+	return cpu;
-+}
-+
-+/*
-+ * When add_timer_on() enqueues a timer into the timer wheel of an
-+ * idle CPU then this timer might expire before the next timer event
-+ * which is scheduled to wake up that CPU. In case of a completely
-+ * idle system the next event might even be infinite time into the
-+ * future. wake_up_idle_cpu() ensures that the CPU is woken up and
-+ * leaves the inner idle loop so the newly added timer is taken into
-+ * account when the CPU goes back to idle and evaluates the timer
-+ * wheel for the next timer event.
-+ */
-+void wake_up_idle_cpu(int cpu)
-+{
-+	if (cpu == smp_processor_id())
-+		return;
-+
-+	set_tsk_need_resched(cpu_rq(cpu)->idle);
-+	smp_send_reschedule(cpu);
-+}
-+
-+void wake_up_nohz_cpu(int cpu)
-+{
-+	wake_up_idle_cpu(cpu);
-+}
-+#endif /* CONFIG_NO_HZ_COMMON */
-+
-+#ifdef CONFIG_HOTPLUG_CPU
-+/*
-+ * Ensures that the idle task is using init_mm right before its CPU goes
-+ * offline.
-+ */
-+void idle_task_exit(void)
-+{
-+	struct mm_struct *mm = current->active_mm;
-+
-+	BUG_ON(cpu_online(smp_processor_id()));
-+
-+	if (mm != &init_mm) {
-+		switch_mm(mm, &init_mm, current);
-+		current->active_mm = &init_mm;
-+		finish_arch_post_lock_switch();
-+	}
-+	mmdrop(mm);
-+}
-+
-+/*
-+ * Migrate all tasks from the rq, sleeping tasks will be migrated by
-+ * try_to_wake_up()->select_task_rq().
-+ *
-+ * Called with rq->lock held even though we'er in stop_machine() and
-+ * there's no concurrency possible, we hold the required locks anyway
-+ * because of lock validation efforts.
-+ */
-+static void migrate_tasks(struct rq *dead_rq)
-+{
-+	struct rq *rq = dead_rq;
-+	struct task_struct *p, *stop = rq->stop;
-+	struct skiplist_node *node;
-+	int count = 0;
-+
-+	/*
-+	 * Fudge the rq selection such that the below task selection loop
-+	 * doesn't get stuck on the currently eligible stop task.
-+	 *
-+	 * We're currently inside stop_machine() and the rq is either stuck
-+	 * in the stop_machine_cpu_stop() loop, or we're executing this code,
-+	 * either way we should never end up calling schedule() until we're
-+	 * done here.
-+	 */
-+	rq->stop = NULL;
-+
-+	node = &rq->sl_header;
-+	while ((node = node->next[0]) != &rq->sl_header) {
-+		int dest_cpu;
-+
-+		p = skiplist_entry(node, struct task_struct, sl_node);
-+
-+		/* skip the running task */
-+		if (task_running(p))
-+			continue;
-+
-+		/*
-+		 * Rules for changing task_struct::cpus_mask are holding
-+		 * both pi_lock and rq->lock, such that holding either
-+		 * stabilizes the mask.
-+		 *
-+		 * Drop rq->lock is not quite as disastrous as it usually is
-+		 * because !cpu_active at this point, which means load-balance
-+		 * will not interfere. Also, stop-machine.
-+		 */
-+		raw_spin_unlock(&rq->lock);
-+		raw_spin_lock(&p->pi_lock);
-+		raw_spin_lock(&rq->lock);
-+
-+		/*
-+		 * Since we're inside stop-machine, _nothing_ should have
-+		 * changed the task, WARN if weird stuff happened, because in
-+		 * that case the above rq->lock drop is a fail too.
-+		 */
-+		if (WARN_ON(task_rq(p) != rq || !task_on_rq_queued(p))) {
-+			raw_spin_unlock(&p->pi_lock);
-+			continue;
-+		}
-+
-+		count++;
-+		/* Find suitable destination for @next, with force if needed. */
-+		dest_cpu = select_fallback_rq(dead_rq->cpu, p);
-+
-+		rq = __migrate_task(rq, p, dest_cpu);
-+		raw_spin_unlock(&rq->lock);
-+		raw_spin_unlock(&p->pi_lock);
-+
-+		rq = dead_rq;
-+		raw_spin_lock(&rq->lock);
-+		/* Check queued task all over from the header again */
-+		node = &rq->sl_header;
-+	}
-+
-+	rq->stop = stop;
-+}
-+
-+static void set_rq_offline(struct rq *rq)
-+{
-+	if (rq->online)
-+		rq->online = false;
-+}
-+#endif /* CONFIG_HOTPLUG_CPU */
-+
-+static void set_rq_online(struct rq *rq)
-+{
-+	if (!rq->online)
-+		rq->online = true;
-+}
-+
-+#ifdef CONFIG_SCHED_DEBUG
-+
-+static __read_mostly int sched_debug_enabled;
-+
-+static int __init sched_debug_setup(char *str)
-+{
-+	sched_debug_enabled = 1;
-+
-+	return 0;
-+}
-+early_param("sched_debug", sched_debug_setup);
-+
-+static inline bool sched_debug(void)
-+{
-+	return sched_debug_enabled;
-+}
-+#else /* !CONFIG_SCHED_DEBUG */
-+static inline bool sched_debug(void)
-+{
-+	return false;
-+}
-+#endif /* CONFIG_SCHED_DEBUG */
-+
-+#ifdef CONFIG_SMP
-+void scheduler_ipi(void)
-+{
-+	/*
-+	 * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting
-+	 * TIF_NEED_RESCHED remotely (for the first time) will also send
-+	 * this IPI.
-+	 */
-+	preempt_fold_need_resched();
-+
-+	if (!idle_cpu(smp_processor_id()) || need_resched())
-+		return;
-+
-+	irq_enter();
-+	irq_exit();
-+}
-+
-+void wake_up_if_idle(int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	rcu_read_lock();
-+
-+	if (!is_idle_task(rcu_dereference(rq->curr)))
-+		goto out;
-+
-+	if (set_nr_if_polling(rq->idle)) {
-+		trace_sched_wake_idle_without_ipi(cpu);
-+	} else {
-+		raw_spin_lock_irqsave(&rq->lock, flags);
-+		if (is_idle_task(rq->curr))
-+			smp_send_reschedule(cpu);
-+		/* Else CPU is not idle, do nothing here */
-+		raw_spin_unlock_irqrestore(&rq->lock, flags);
-+	}
-+
-+out:
-+	rcu_read_unlock();
-+}
-+
-+bool cpus_share_cache(int this_cpu, int that_cpu)
-+{
-+	return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu);
-+}
-+#endif /* CONFIG_SMP */
-+
-+/*
-+ * Topology list, bottom-up.
-+ */
-+static struct sched_domain_topology_level default_topology[] = {
-+#ifdef CONFIG_SCHED_SMT
-+	{ cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
-+#endif
-+#ifdef CONFIG_SCHED_MC
-+	{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
-+#endif
-+	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
-+	{ NULL, },
-+};
-+
-+static struct sched_domain_topology_level *sched_domain_topology =
-+	default_topology;
-+
-+#define for_each_sd_topology(tl)			\
-+	for (tl = sched_domain_topology; tl->mask; tl++)
-+
-+void set_sched_topology(struct sched_domain_topology_level *tl)
-+{
-+	if (WARN_ON_ONCE(sched_smp_initialized))
-+		return;
-+
-+	sched_domain_topology = tl;
-+}
-+
-+/*
-+ * Initializers for schedule domains
-+ * Non-inlined to reduce accumulated stack pressure in build_sched_domains()
-+ */
-+
-+int sched_domain_level_max;
-+
-+/*
-+ * Partition sched domains as specified by the 'ndoms_new'
-+ * cpumasks in the array doms_new[] of cpumasks. This compares
-+ * doms_new[] to the current sched domain partitioning, doms_cur[].
-+ * It destroys each deleted domain and builds each new domain.
-+ *
-+ * 'doms_new' is an array of cpumask_var_t's of length 'ndoms_new'.
-+ * The masks don't intersect (don't overlap.) We should setup one
-+ * sched domain for each mask. CPUs not in any of the cpumasks will
-+ * not be load balanced. If the same cpumask appears both in the
-+ * current 'doms_cur' domains and in the new 'doms_new', we can leave
-+ * it as it is.
-+ *
-+ * The passed in 'doms_new' should be allocated using
-+ * alloc_sched_domains.  This routine takes ownership of it and will
-+ * free_sched_domains it when done with it. If the caller failed the
-+ * alloc call, then it can pass in doms_new == NULL && ndoms_new == 1,
-+ * and partition_sched_domains() will fallback to the single partition
-+ * 'fallback_doms', it also forces the domains to be rebuilt.
-+ *
-+ * If doms_new == NULL it will be replaced with cpu_online_mask.
-+ * ndoms_new == 0 is a special case for destroying existing domains,
-+ * and it will not create the default domain.
-+ *
-+ * Call with hotplug lock held
-+ */
-+void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
-+			     struct sched_domain_attr *dattr_new)
-+{
-+	/**
-+	 * PDS doesn't depend on sched domains, but just keep this api
-+	 */
-+}
-+
-+/*
-+ * used to mark begin/end of suspend/resume:
-+ */
-+static int num_cpus_frozen;
-+
-+#ifdef CONFIG_NUMA
-+int __read_mostly		node_reclaim_distance = RECLAIM_DISTANCE;
-+
-+/*
-+ * sched_numa_find_closest() - given the NUMA topology, find the cpu
-+ *                             closest to @cpu from @cpumask.
-+ * cpumask: cpumask to find a cpu from
-+ * cpu: cpu to be close to
-+ *
-+ * returns: cpu, or nr_cpu_ids when nothing found.
-+ */
-+int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
-+{
-+	return best_mask_cpu(cpu, cpus);
-+}
-+#endif /* CONFIG_NUMA */
-+
-+/*
-+ * Update cpusets according to cpu_active mask.  If cpusets are
-+ * disabled, cpuset_update_active_cpus() becomes a simple wrapper
-+ * around partition_sched_domains().
-+ *
-+ * If we come here as part of a suspend/resume, don't touch cpusets because we
-+ * want to restore it back to its original state upon resume anyway.
-+ */
-+static void cpuset_cpu_active(void)
-+{
-+	if (cpuhp_tasks_frozen) {
-+		/*
-+		 * num_cpus_frozen tracks how many CPUs are involved in suspend
-+		 * resume sequence. As long as this is not the last online
-+		 * operation in the resume sequence, just build a single sched
-+		 * domain, ignoring cpusets.
-+		 */
-+		partition_sched_domains(1, NULL, NULL);
-+		if (--num_cpus_frozen)
-+			return;
-+		/*
-+		 * This is the last CPU online operation. So fall through and
-+		 * restore the original sched domains by considering the
-+		 * cpuset configurations.
-+		 */
-+		cpuset_force_rebuild();
-+	}
-+
-+	cpuset_update_active_cpus();
-+}
-+
-+static int cpuset_cpu_inactive(unsigned int cpu)
-+{
-+	if (!cpuhp_tasks_frozen) {
-+		cpuset_update_active_cpus();
-+	} else {
-+		num_cpus_frozen++;
-+		partition_sched_domains(1, NULL, NULL);
-+	}
-+	return 0;
-+}
-+
-+int sched_cpu_activate(unsigned int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+#ifdef CONFIG_SCHED_SMT
-+	/*
-+	 * When going up, increment the number of cores with SMT present.
-+	 */
-+	if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
-+		static_branch_inc_cpuslocked(&sched_smt_present);
-+#endif
-+	set_cpu_active(cpu, true);
-+
-+	if (sched_smp_initialized)
-+		cpuset_cpu_active();
-+
-+	/*
-+	 * Put the rq online, if not already. This happens:
-+	 *
-+	 * 1) In the early boot process, because we build the real domains
-+	 *    after all cpus have been brought up.
-+	 *
-+	 * 2) At runtime, if cpuset_cpu_active() fails to rebuild the
-+	 *    domains.
-+	 */
-+	raw_spin_lock_irqsave(&rq->lock, flags);
-+	set_rq_online(rq);
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+
-+	return 0;
-+}
-+
-+int sched_cpu_deactivate(unsigned int cpu)
-+{
-+	int ret;
-+
-+	set_cpu_active(cpu, false);
-+	/*
-+	 * We've cleared cpu_active_mask, wait for all preempt-disabled and RCU
-+	 * users of this state to go away such that all new such users will
-+	 * observe it.
-+	 *
-+	 * Do sync before park smpboot threads to take care the rcu boost case.
-+	 */
-+	synchronize_rcu();
-+
-+#ifdef CONFIG_SCHED_SMT
-+	/*
-+	 * When going down, decrement the number of cores with SMT present.
-+	 */
-+	if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
-+		static_branch_dec_cpuslocked(&sched_smt_present);
-+#endif
-+
-+	if (!sched_smp_initialized)
-+		return 0;
-+
-+	ret = cpuset_cpu_inactive(cpu);
-+	if (ret) {
-+		set_cpu_active(cpu, true);
-+		return ret;
-+	}
-+	return 0;
-+}
-+
-+static void sched_rq_cpu_starting(unsigned int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	rq->calc_load_update = calc_load_update;
-+}
-+
-+int sched_cpu_starting(unsigned int cpu)
-+{
-+	sched_rq_cpu_starting(cpu);
-+	sched_tick_start(cpu);
-+	return 0;
-+}
-+
-+#ifdef CONFIG_HOTPLUG_CPU
-+int sched_cpu_dying(unsigned int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	sched_tick_stop(cpu);
-+	raw_spin_lock_irqsave(&rq->lock, flags);
-+	set_rq_offline(rq);
-+	migrate_tasks(rq);
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+
-+	hrtick_clear(rq);
-+	return 0;
-+}
-+#endif
-+
-+#ifdef CONFIG_SMP
-+static void sched_init_topology_cpumask_early(void)
-+{
-+	int cpu, level;
-+	cpumask_t *tmp;
-+
-+	for_each_possible_cpu(cpu) {
-+		for (level = 0; level < NR_CPU_AFFINITY_CHK_LEVEL; level++) {
-+			tmp = &(per_cpu(sched_cpu_affinity_chk_masks, cpu)[level]);
-+			cpumask_copy(tmp, cpu_possible_mask);
-+			cpumask_clear_cpu(cpu, tmp);
-+		}
-+		per_cpu(sched_cpu_llc_start_mask, cpu) =
-+			&(per_cpu(sched_cpu_affinity_chk_masks, cpu)[0]);
-+		per_cpu(sched_cpu_affinity_chk_end_masks, cpu) =
-+			&(per_cpu(sched_cpu_affinity_chk_masks, cpu)[1]);
-+	}
-+}
-+
-+static void sched_init_topology_cpumask(void)
-+{
-+	int cpu;
-+	cpumask_t *chk;
-+
-+	for_each_online_cpu(cpu) {
-+		chk = &(per_cpu(sched_cpu_affinity_chk_masks, cpu)[0]);
-+
-+#ifdef CONFIG_SCHED_SMT
-+		cpumask_setall(chk);
-+		cpumask_clear_cpu(cpu, chk);
-+		if (cpumask_and(chk, chk, topology_sibling_cpumask(cpu))) {
-+			per_cpu(sched_sibling_cpu, cpu) = cpumask_first(chk);
-+			printk(KERN_INFO "pds: cpu #%d affinity check mask - smt 0x%08lx",
-+			       cpu, (chk++)->bits[0]);
-+		}
-+#endif
-+#ifdef CONFIG_SCHED_MC
-+		cpumask_setall(chk);
-+		cpumask_clear_cpu(cpu, chk);
-+		if (cpumask_and(chk, chk, cpu_coregroup_mask(cpu))) {
-+			per_cpu(sched_cpu_llc_start_mask, cpu) = chk;
-+			printk(KERN_INFO "pds: cpu #%d affinity check mask - coregroup 0x%08lx",
-+			       cpu, (chk++)->bits[0]);
-+		}
-+		cpumask_complement(chk, cpu_coregroup_mask(cpu));
-+
-+		/**
-+		 * Set up sd_llc_id per CPU
-+		 */
-+		per_cpu(sd_llc_id, cpu) =
-+			cpumask_first(cpu_coregroup_mask(cpu));
-+#else
-+		per_cpu(sd_llc_id, cpu) =
-+			cpumask_first(topology_core_cpumask(cpu));
-+
-+		per_cpu(sched_cpu_llc_start_mask, cpu) = chk;
-+
-+		cpumask_setall(chk);
-+		cpumask_clear_cpu(cpu, chk);
-+#endif /* NOT CONFIG_SCHED_MC */
-+		if (cpumask_and(chk, chk, topology_core_cpumask(cpu)))
-+			printk(KERN_INFO "pds: cpu #%d affinity check mask - core 0x%08lx",
-+			       cpu, (chk++)->bits[0]);
-+		cpumask_complement(chk, topology_core_cpumask(cpu));
-+
-+		if (cpumask_and(chk, chk, cpu_online_mask))
-+			printk(KERN_INFO "pds: cpu #%d affinity check mask - others 0x%08lx",
-+			       cpu, (chk++)->bits[0]);
-+
-+		per_cpu(sched_cpu_affinity_chk_end_masks, cpu) = chk;
-+	}
-+}
-+#endif
-+
-+void __init sched_init_smp(void)
-+{
-+	/* Move init over to a non-isolated CPU */
-+	if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0)
-+		BUG();
-+
-+	cpumask_copy(&sched_rq_queued_masks[SCHED_RQ_EMPTY], cpu_online_mask);
-+
-+	sched_init_topology_cpumask();
-+
-+	sched_smp_initialized = true;
-+}
-+#else
-+void __init sched_init_smp(void)
-+{
-+}
-+#endif /* CONFIG_SMP */
-+
-+int in_sched_functions(unsigned long addr)
-+{
-+	return in_lock_functions(addr) ||
-+		(addr >= (unsigned long)__sched_text_start
-+		&& addr < (unsigned long)__sched_text_end);
-+}
-+
-+#ifdef CONFIG_CGROUP_SCHED
-+/* task group related information */
-+struct task_group {
-+	struct cgroup_subsys_state css;
-+
-+	struct rcu_head rcu;
-+	struct list_head list;
-+
-+	struct task_group *parent;
-+	struct list_head siblings;
-+	struct list_head children;
-+};
-+
-+/*
-+ * Default task group.
-+ * Every task in system belongs to this group at bootup.
-+ */
-+struct task_group root_task_group;
-+LIST_HEAD(task_groups);
-+
-+/* Cacheline aligned slab cache for task_group */
-+static struct kmem_cache *task_group_cache __read_mostly;
-+#endif /* CONFIG_CGROUP_SCHED */
-+
-+void __init sched_init(void)
-+{
-+	int i;
-+	struct rq *rq;
-+
-+	print_scheduler_version();
-+
-+	wait_bit_init();
-+
-+#ifdef CONFIG_SMP
-+	for (i = 0; i < NR_SCHED_RQ_QUEUED_LEVEL; i++)
-+		cpumask_clear(&sched_rq_queued_masks[i]);
-+	cpumask_setall(&sched_rq_queued_masks[SCHED_RQ_EMPTY]);
-+	set_bit(SCHED_RQ_EMPTY, sched_rq_queued_masks_bitmap);
-+
-+	cpumask_setall(&sched_rq_pending_masks[SCHED_RQ_EMPTY]);
-+	set_bit(SCHED_RQ_EMPTY, sched_rq_pending_masks_bitmap);
-+#else
-+	uprq = &per_cpu(runqueues, 0);
-+#endif
-+
-+#ifdef CONFIG_CGROUP_SCHED
-+	task_group_cache = KMEM_CACHE(task_group, 0);
-+
-+	list_add(&root_task_group.list, &task_groups);
-+	INIT_LIST_HEAD(&root_task_group.children);
-+	INIT_LIST_HEAD(&root_task_group.siblings);
-+#endif /* CONFIG_CGROUP_SCHED */
-+	for_each_possible_cpu(i) {
-+		rq = cpu_rq(i);
-+		FULL_INIT_SKIPLIST_NODE(&rq->sl_header);
-+		raw_spin_lock_init(&rq->lock);
-+		rq->dither = 0;
-+		rq->nr_running = rq->nr_uninterruptible = 0;
-+		rq->calc_load_active = 0;
-+		rq->calc_load_update = jiffies + LOAD_FREQ;
-+#ifdef CONFIG_SMP
-+		rq->online = false;
-+		rq->cpu = i;
-+
-+		rq->queued_level = SCHED_RQ_EMPTY;
-+		rq->pending_level = SCHED_RQ_EMPTY;
-+#ifdef CONFIG_SCHED_SMT
-+		per_cpu(sched_sibling_cpu, i) = i;
-+		rq->active_balance = 0;
-+#endif
-+#endif
-+		rq->nr_switches = 0;
-+		atomic_set(&rq->nr_iowait, 0);
-+		hrtick_rq_init(rq);
-+	}
-+#ifdef CONFIG_SMP
-+	/* Set rq->online for cpu 0 */
-+	cpu_rq(0)->online = true;
-+#endif
-+
-+	/*
-+	 * The boot idle thread does lazy MMU switching as well:
-+	 */
-+	mmgrab(&init_mm);
-+	enter_lazy_tlb(&init_mm, current);
-+
-+	/*
-+	 * Make us the idle thread. Technically, schedule() should not be
-+	 * called from this thread, however somewhere below it might be,
-+	 * but because we are the idle thread, we just pick up running again
-+	 * when this runqueue becomes "idle".
-+	 */
-+	init_idle(current, smp_processor_id());
-+
-+	calc_load_update = jiffies + LOAD_FREQ;
-+
-+#ifdef CONFIG_SMP
-+	idle_thread_set_boot_cpu();
-+
-+	sched_init_topology_cpumask_early();
-+#endif /* SMP */
-+
-+	init_schedstats();
-+
-+	psi_init();
-+}
-+
-+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-+static inline int preempt_count_equals(int preempt_offset)
-+{
-+	int nested = preempt_count() + rcu_preempt_depth();
-+
-+	return (nested == preempt_offset);
-+}
-+
-+void __might_sleep(const char *file, int line, int preempt_offset)
-+{
-+	/*
-+	 * Blocking primitives will set (and therefore destroy) current->state,
-+	 * since we will exit with TASK_RUNNING make sure we enter with it,
-+	 * otherwise we will destroy state.
-+	 */
-+	WARN_ONCE(current->state != TASK_RUNNING && current->task_state_change,
-+			"do not call blocking ops when !TASK_RUNNING; "
-+			"state=%lx set at [<%p>] %pS\n",
-+			current->state,
-+			(void *)current->task_state_change,
-+			(void *)current->task_state_change);
-+
-+	___might_sleep(file, line, preempt_offset);
-+}
-+EXPORT_SYMBOL(__might_sleep);
-+
-+void ___might_sleep(const char *file, int line, int preempt_offset)
-+{
-+	/* Ratelimiting timestamp: */
-+	static unsigned long prev_jiffy;
-+
-+	unsigned long preempt_disable_ip;
-+
-+	/* WARN_ON_ONCE() by default, no rate limit required: */
-+	rcu_sleep_check();
-+
-+	if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
-+	     !is_idle_task(current) && !current->non_block_count) ||
-+	    system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING ||
-+	    oops_in_progress)
-+		return;
-+	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
-+		return;
-+	prev_jiffy = jiffies;
-+
-+	/* Save this before calling printk(), since that will clobber it: */
-+	preempt_disable_ip = get_preempt_disable_ip(current);
-+
-+	printk(KERN_ERR
-+		"BUG: sleeping function called from invalid context at %s:%d\n",
-+			file, line);
-+	printk(KERN_ERR
-+		"in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n",
-+			in_atomic(), irqs_disabled(), current->non_block_count,
-+			current->pid, current->comm);
-+
-+	if (task_stack_end_corrupted(current))
-+		printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");
-+
-+	debug_show_held_locks(current);
-+	if (irqs_disabled())
-+		print_irqtrace_events(current);
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	if (!preempt_count_equals(preempt_offset)) {
-+		pr_err("Preemption disabled at:");
-+		print_ip_sym(preempt_disable_ip);
-+		pr_cont("\n");
-+	}
-+#endif
-+	dump_stack();
-+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+}
-+EXPORT_SYMBOL(___might_sleep);
-+
-+void __cant_sleep(const char *file, int line, int preempt_offset)
-+{
-+	static unsigned long prev_jiffy;
-+
-+	if (irqs_disabled())
-+		return;
-+
-+	if (!IS_ENABLED(CONFIG_PREEMPT_COUNT))
-+		return;
-+
-+	if (preempt_count() > preempt_offset)
-+		return;
-+
-+	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
-+		return;
-+	prev_jiffy = jiffies;
-+
-+	printk(KERN_ERR "BUG: assuming atomic context at %s:%d\n", file, line);
-+	printk(KERN_ERR "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
-+			in_atomic(), irqs_disabled(),
-+			current->pid, current->comm);
-+
-+	debug_show_held_locks(current);
-+	dump_stack();
-+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+}
-+EXPORT_SYMBOL_GPL(__cant_sleep);
-+#endif
-+
-+#ifdef CONFIG_MAGIC_SYSRQ
-+void normalize_rt_tasks(void)
-+{
-+	struct task_struct *g, *p;
-+	struct sched_attr attr = {
-+		.sched_policy = SCHED_NORMAL,
-+	};
-+
-+	read_lock(&tasklist_lock);
-+	for_each_process_thread(g, p) {
-+		/*
-+		 * Only normalize user tasks:
-+		 */
-+		if (p->flags & PF_KTHREAD)
-+			continue;
-+
-+		if (!rt_task(p)) {
-+			/*
-+			 * Renice negative nice level userspace
-+			 * tasks back to 0:
-+			 */
-+			if (task_nice(p) < 0)
-+				set_user_nice(p, 0);
-+			continue;
-+		}
-+
-+		__sched_setscheduler(p, &attr, false, false);
-+	}
-+	read_unlock(&tasklist_lock);
-+}
-+#endif /* CONFIG_MAGIC_SYSRQ */
-+
-+#if defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB)
-+/*
-+ * These functions are only useful for the IA64 MCA handling, or kdb.
-+ *
-+ * They can only be called when the whole system has been
-+ * stopped - every CPU needs to be quiescent, and no scheduling
-+ * activity can take place. Using them for anything else would
-+ * be a serious bug, and as a result, they aren't even visible
-+ * under any other configuration.
-+ */
-+
-+/**
-+ * curr_task - return the current task for a given CPU.
-+ * @cpu: the processor in question.
-+ *
-+ * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
-+ *
-+ * Return: The current task for @cpu.
-+ */
-+struct task_struct *curr_task(int cpu)
-+{
-+	return cpu_curr(cpu);
-+}
-+
-+#endif /* defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB) */
-+
-+#ifdef CONFIG_IA64
-+/**
-+ * ia64_set_curr_task - set the current task for a given CPU.
-+ * @cpu: the processor in question.
-+ * @p: the task pointer to set.
-+ *
-+ * Description: This function must only be used when non-maskable interrupts
-+ * are serviced on a separate stack.  It allows the architecture to switch the
-+ * notion of the current task on a CPU in a non-blocking manner.  This function
-+ * must be called with all CPU's synchronised, and interrupts disabled, the
-+ * and caller must save the original value of the current task (see
-+ * curr_task() above) and restore that value before reenabling interrupts and
-+ * re-starting the system.
-+ *
-+ * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
-+ */
-+void ia64_set_curr_task(int cpu, struct task_struct *p)
-+{
-+	cpu_curr(cpu) = p;
-+}
-+
-+#endif
-+
-+#ifdef CONFIG_SCHED_DEBUG
-+void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
-+			  struct seq_file *m)
-+{}
-+
-+void proc_sched_set_task(struct task_struct *p)
-+{}
-+#endif
-+
-+#ifdef CONFIG_CGROUP_SCHED
-+static void sched_free_group(struct task_group *tg)
-+{
-+	kmem_cache_free(task_group_cache, tg);
-+}
-+
-+/* allocate runqueue etc for a new task group */
-+struct task_group *sched_create_group(struct task_group *parent)
-+{
-+	struct task_group *tg;
-+
-+	tg = kmem_cache_alloc(task_group_cache, GFP_KERNEL | __GFP_ZERO);
-+	if (!tg)
-+		return ERR_PTR(-ENOMEM);
-+
-+	return tg;
-+}
-+
-+void sched_online_group(struct task_group *tg, struct task_group *parent)
-+{
-+}
-+
-+/* rcu callback to free various structures associated with a task group */
-+static void sched_free_group_rcu(struct rcu_head *rhp)
-+{
-+	/* Now it should be safe to free those cfs_rqs */
-+	sched_free_group(container_of(rhp, struct task_group, rcu));
-+}
-+
-+void sched_destroy_group(struct task_group *tg)
-+{
-+	/* Wait for possible concurrent references to cfs_rqs complete */
-+	call_rcu(&tg->rcu, sched_free_group_rcu);
-+}
-+
-+void sched_offline_group(struct task_group *tg)
-+{
-+}
-+
-+static inline struct task_group *css_tg(struct cgroup_subsys_state *css)
-+{
-+	return css ? container_of(css, struct task_group, css) : NULL;
-+}
-+
-+static struct cgroup_subsys_state *
-+cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
-+{
-+	struct task_group *parent = css_tg(parent_css);
-+	struct task_group *tg;
-+
-+	if (!parent) {
-+		/* This is early initialization for the top cgroup */
-+		return &root_task_group.css;
-+	}
-+
-+	tg = sched_create_group(parent);
-+	if (IS_ERR(tg))
-+		return ERR_PTR(-ENOMEM);
-+	return &tg->css;
-+}
-+
-+/* Expose task group only after completing cgroup initialization */
-+static int cpu_cgroup_css_online(struct cgroup_subsys_state *css)
-+{
-+	struct task_group *tg = css_tg(css);
-+	struct task_group *parent = css_tg(css->parent);
-+
-+	if (parent)
-+		sched_online_group(tg, parent);
-+	return 0;
-+}
-+
-+static void cpu_cgroup_css_released(struct cgroup_subsys_state *css)
-+{
-+	struct task_group *tg = css_tg(css);
-+
-+	sched_offline_group(tg);
-+}
-+
-+static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
-+{
-+	struct task_group *tg = css_tg(css);
-+
-+	/*
-+	 * Relies on the RCU grace period between css_released() and this.
-+	 */
-+	sched_free_group(tg);
-+}
-+
-+static void cpu_cgroup_fork(struct task_struct *task)
-+{
-+}
-+
-+static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
-+{
-+	return 0;
-+}
-+
-+static void cpu_cgroup_attach(struct cgroup_taskset *tset)
-+{
-+}
-+
-+static struct cftype cpu_legacy_files[] = {
-+	{ }	/* Terminate */
-+};
-+
-+static struct cftype cpu_files[] = {
-+	{ }	/* terminate */
-+};
-+
-+static int cpu_extra_stat_show(struct seq_file *sf,
-+			       struct cgroup_subsys_state *css)
-+{
-+	return 0;
-+}
-+
-+struct cgroup_subsys cpu_cgrp_subsys = {
-+	.css_alloc	= cpu_cgroup_css_alloc,
-+	.css_online	= cpu_cgroup_css_online,
-+	.css_released	= cpu_cgroup_css_released,
-+	.css_free	= cpu_cgroup_css_free,
-+	.css_extra_stat_show = cpu_extra_stat_show,
-+	.fork		= cpu_cgroup_fork,
-+	.can_attach	= cpu_cgroup_can_attach,
-+	.attach		= cpu_cgroup_attach,
-+	.legacy_cftypes	= cpu_files,
-+	.legacy_cftypes	= cpu_legacy_files,
-+	.dfl_cftypes	= cpu_files,
-+	.early_init	= true,
-+	.threaded	= true,
-+};
-+#endif	/* CONFIG_CGROUP_SCHED */
-+
-+#undef CREATE_TRACE_POINTS
-diff --git a/kernel/sched/pds_sched.h b/kernel/sched/pds_sched.h
-new file mode 100644
-index 000000000000..b3926a8425b2
---- /dev/null
-+++ b/kernel/sched/pds_sched.h
-@@ -0,0 +1,474 @@
-+#ifndef PDS_SCHED_H
-+#define PDS_SCHED_H
-+
-+#include <linux/sched.h>
-+
-+#include <linux/sched/clock.h>
-+#include <linux/sched/cpufreq.h>
-+#include <linux/sched/cputime.h>
-+#include <linux/sched/debug.h>
-+#include <linux/sched/init.h>
-+#include <linux/sched/isolation.h>
-+#include <linux/sched/loadavg.h>
-+#include <linux/sched/mm.h>
-+#include <linux/sched/nohz.h>
-+#include <linux/sched/signal.h>
-+#include <linux/sched/stat.h>
-+#include <linux/sched/sysctl.h>
-+#include <linux/sched/task.h>
-+#include <linux/sched/topology.h>
-+#include <linux/sched/wake_q.h>
-+
-+#include <uapi/linux/sched/types.h>
-+
-+#include <linux/cgroup.h>
-+#include <linux/cpufreq.h>
-+#include <linux/cpuidle.h>
-+#include <linux/cpuset.h>
-+#include <linux/ctype.h>
-+#include <linux/kthread.h>
-+#include <linux/livepatch.h>
-+#include <linux/membarrier.h>
-+#include <linux/proc_fs.h>
-+#include <linux/psi.h>
-+#include <linux/slab.h>
-+#include <linux/stop_machine.h>
-+#include <linux/suspend.h>
-+#include <linux/swait.h>
-+#include <linux/syscalls.h>
-+#include <linux/tsacct_kern.h>
-+
-+#include <asm/tlb.h>
-+
-+#ifdef CONFIG_PARAVIRT
-+# include <asm/paravirt.h>
-+#endif
-+
-+#include "cpupri.h"
-+
-+/* task_struct::on_rq states: */
-+#define TASK_ON_RQ_QUEUED	1
-+#define TASK_ON_RQ_MIGRATING	2
-+
-+static inline int task_on_rq_queued(struct task_struct *p)
-+{
-+	return p->on_rq == TASK_ON_RQ_QUEUED;
-+}
-+
-+static inline int task_on_rq_migrating(struct task_struct *p)
-+{
-+	return READ_ONCE(p->on_rq) == TASK_ON_RQ_MIGRATING;
-+}
-+
-+/*
-+ * This is the main, per-CPU runqueue data structure.
-+ * This data should only be modified by the local cpu.
-+ */
-+struct rq {
-+	/* runqueue lock: */
-+	raw_spinlock_t lock;
-+
-+	struct task_struct *curr, *idle, *stop;
-+	struct mm_struct *prev_mm;
-+
-+	struct skiplist_node sl_header;
-+
-+	/* switch count */
-+	u64 nr_switches;
-+
-+	atomic_t nr_iowait;
-+
-+#ifdef CONFIG_MEMBARRIER
-+	int membarrier_state;
-+#endif
-+
-+#ifdef CONFIG_SMP
-+	int cpu;		/* cpu of this runqueue */
-+	bool online;
-+
-+#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
-+	struct sched_avg	avg_irq;
-+#endif
-+
-+	unsigned long queued_level;
-+	unsigned long pending_level;
-+
-+#ifdef CONFIG_SCHED_SMT
-+	int active_balance;
-+	struct cpu_stop_work active_balance_work;
-+#endif
-+#endif /* CONFIG_SMP */
-+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-+	u64 prev_irq_time;
-+#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
-+#ifdef CONFIG_PARAVIRT
-+	u64 prev_steal_time;
-+#endif /* CONFIG_PARAVIRT */
-+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
-+	u64 prev_steal_time_rq;
-+#endif /* CONFIG_PARAVIRT_TIME_ACCOUNTING */
-+
-+	/* calc_load related fields */
-+	unsigned long calc_load_update;
-+	long calc_load_active;
-+
-+	u64 clock, last_tick;
-+	u64 clock_task;
-+	int dither;
-+
-+	unsigned long nr_running;
-+	unsigned long nr_uninterruptible;
-+
-+#ifdef CONFIG_SCHED_HRTICK
-+#ifdef CONFIG_SMP
-+	int hrtick_csd_pending;
-+	call_single_data_t hrtick_csd;
-+#endif
-+	struct hrtimer hrtick_timer;
-+#endif
-+
-+#ifdef CONFIG_SCHEDSTATS
-+
-+	/* latency stats */
-+	struct sched_info rq_sched_info;
-+	unsigned long long rq_cpu_time;
-+	/* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
-+
-+	/* sys_sched_yield() stats */
-+	unsigned int yld_count;
-+
-+	/* schedule() stats */
-+	unsigned int sched_switch;
-+	unsigned int sched_count;
-+	unsigned int sched_goidle;
-+
-+	/* try_to_wake_up() stats */
-+	unsigned int ttwu_count;
-+	unsigned int ttwu_local;
-+#endif /* CONFIG_SCHEDSTATS */
-+#ifdef CONFIG_CPU_IDLE
-+	/* Must be inspected within a rcu lock section */
-+	struct cpuidle_state *idle_state;
-+#endif
-+};
-+
-+extern unsigned long calc_load_update;
-+extern atomic_long_t calc_load_tasks;
-+
-+extern void calc_global_load_tick(struct rq *this_rq);
-+extern long calc_load_fold_active(struct rq *this_rq, long adjust);
-+
-+#ifndef CONFIG_SMP
-+extern struct rq *uprq;
-+#define cpu_rq(cpu)	(uprq)
-+#define this_rq()	(uprq)
-+#define raw_rq()	(uprq)
-+#define task_rq(p)	(uprq)
-+#define cpu_curr(cpu)	((uprq)->curr)
-+#else /* CONFIG_SMP */
-+DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
-+#define cpu_rq(cpu)		(&per_cpu(runqueues, (cpu)))
-+#define this_rq()		this_cpu_ptr(&runqueues)
-+#define raw_rq()		raw_cpu_ptr(&runqueues)
-+#define task_rq(p)		cpu_rq(task_cpu(p))
-+#define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
-+
-+#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
-+void register_sched_domain_sysctl(void);
-+void unregister_sched_domain_sysctl(void);
-+#else
-+static inline void register_sched_domain_sysctl(void)
-+{
-+}
-+static inline void unregister_sched_domain_sysctl(void)
-+{
-+}
-+#endif
-+
-+#endif /* CONFIG_SMP */
-+
-+#ifndef arch_scale_freq_capacity
-+static __always_inline
-+unsigned long arch_scale_freq_capacity(int cpu)
-+{
-+		return SCHED_CAPACITY_SCALE;
-+}
-+#endif
-+
-+static inline u64 __rq_clock_broken(struct rq *rq)
-+{
-+	return READ_ONCE(rq->clock);
-+}
-+
-+static inline u64 rq_clock(struct rq *rq)
-+{
-+	/*
-+	 * Relax lockdep_assert_held() checking as in VRQ, call to
-+	 * sched_info_xxxx() may not held rq->lock
-+	 * lockdep_assert_held(&rq->lock);
-+	 */
-+	return rq->clock;
-+}
-+
-+static inline u64 rq_clock_task(struct rq *rq)
-+{
-+	/*
-+	 * Relax lockdep_assert_held() checking as in VRQ, call to
-+	 * sched_info_xxxx() may not held rq->lock
-+	 * lockdep_assert_held(&rq->lock);
-+	 */
-+	return rq->clock_task;
-+}
-+
-+/*
-+ * {de,en}queue flags:
-+ *
-+ * DEQUEUE_SLEEP  - task is no longer runnable
-+ * ENQUEUE_WAKEUP - task just became runnable
-+ *
-+ */
-+
-+#define DEQUEUE_SLEEP		0x01
-+
-+#define ENQUEUE_WAKEUP		0x01
-+
-+
-+/*
-+ * Below are scheduler API which using in other kernel code
-+ * It use the dummy rq_flags
-+ * ToDo : PDS need to support these APIs for compatibility with mainline
-+ * scheduler code.
-+ */
-+struct rq_flags {
-+	unsigned long flags;
-+};
-+
-+struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
-+	__acquires(rq->lock);
-+
-+struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
-+	__acquires(p->pi_lock)
-+	__acquires(rq->lock);
-+
-+static inline void __task_rq_unlock(struct rq *rq, struct rq_flags *rf)
-+	__releases(rq->lock)
-+{
-+	raw_spin_unlock(&rq->lock);
-+}
-+
-+static inline void
-+task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
-+	__releases(rq->lock)
-+	__releases(p->pi_lock)
-+{
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
-+}
-+
-+static inline void
-+rq_unlock_irq(struct rq *rq, struct rq_flags *rf)
-+	__releases(rq->lock)
-+{
-+	raw_spin_unlock_irq(&rq->lock);
-+}
-+
-+static inline struct rq *
-+this_rq_lock_irq(struct rq_flags *rf)
-+	__acquires(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	local_irq_disable();
-+	rq = this_rq();
-+	raw_spin_lock(&rq->lock);
-+
-+	return rq;
-+}
-+
-+static inline bool task_running(struct task_struct *p)
-+{
-+	return p->on_cpu;
-+}
-+
-+extern struct static_key_false sched_schedstats;
-+
-+static inline void sched_ttwu_pending(void) { }
-+
-+#ifdef CONFIG_CPU_IDLE
-+static inline void idle_set_state(struct rq *rq,
-+				  struct cpuidle_state *idle_state)
-+{
-+	rq->idle_state = idle_state;
-+}
-+
-+static inline struct cpuidle_state *idle_get_state(struct rq *rq)
-+{
-+	WARN_ON(!rcu_read_lock_held());
-+	return rq->idle_state;
-+}
-+#else
-+static inline void idle_set_state(struct rq *rq,
-+				  struct cpuidle_state *idle_state)
-+{
-+}
-+
-+static inline struct cpuidle_state *idle_get_state(struct rq *rq)
-+{
-+	return NULL;
-+}
-+#endif
-+
-+static inline int cpu_of(const struct rq *rq)
-+{
-+#ifdef CONFIG_SMP
-+	return rq->cpu;
-+#else
-+	return 0;
-+#endif
-+}
-+
-+#include "stats.h"
-+
-+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-+struct irqtime {
-+	u64			total;
-+	u64			tick_delta;
-+	u64			irq_start_time;
-+	struct u64_stats_sync	sync;
-+};
-+
-+DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
-+
-+/*
-+ * Returns the irqtime minus the softirq time computed by ksoftirqd.
-+ * Otherwise ksoftirqd's sum_exec_runtime is substracted its own runtime
-+ * and never move forward.
-+ */
-+static inline u64 irq_time_read(int cpu)
-+{
-+	struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu);
-+	unsigned int seq;
-+	u64 total;
-+
-+	do {
-+		seq = __u64_stats_fetch_begin(&irqtime->sync);
-+		total = irqtime->total;
-+	} while (__u64_stats_fetch_retry(&irqtime->sync, seq));
-+
-+	return total;
-+}
-+#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
-+
-+#ifdef CONFIG_CPU_FREQ
-+DECLARE_PER_CPU(struct update_util_data __rcu *, cpufreq_update_util_data);
-+
-+/**
-+ * cpufreq_update_util - Take a note about CPU utilization changes.
-+ * @rq: Runqueue to carry out the update for.
-+ * @flags: Update reason flags.
-+ *
-+ * This function is called by the scheduler on the CPU whose utilization is
-+ * being updated.
-+ *
-+ * It can only be called from RCU-sched read-side critical sections.
-+ *
-+ * The way cpufreq is currently arranged requires it to evaluate the CPU
-+ * performance state (frequency/voltage) on a regular basis to prevent it from
-+ * being stuck in a completely inadequate performance level for too long.
-+ * That is not guaranteed to happen if the updates are only triggered from CFS
-+ * and DL, though, because they may not be coming in if only RT tasks are
-+ * active all the time (or there are RT tasks only).
-+ *
-+ * As a workaround for that issue, this function is called periodically by the
-+ * RT sched class to trigger extra cpufreq updates to prevent it from stalling,
-+ * but that really is a band-aid.  Going forward it should be replaced with
-+ * solutions targeted more specifically at RT tasks.
-+ */
-+static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
-+{
-+	struct update_util_data *data;
-+
-+	data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data));
-+	if (data)
-+		data->func(data, rq_clock(rq), flags);
-+}
-+
-+static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags)
-+{
-+	if (cpu_of(rq) == smp_processor_id())
-+		cpufreq_update_util(rq, flags);
-+}
-+#else
-+static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
-+static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags) {}
-+#endif /* CONFIG_CPU_FREQ */
-+
-+#ifdef CONFIG_NO_HZ_FULL
-+extern int __init sched_tick_offload_init(void);
-+#else
-+static inline int sched_tick_offload_init(void) { return 0; }
-+#endif
-+
-+#ifdef arch_scale_freq_capacity
-+#ifndef arch_scale_freq_invariant
-+#define arch_scale_freq_invariant()	(true)
-+#endif
-+#else /* arch_scale_freq_capacity */
-+#define arch_scale_freq_invariant()	(false)
-+#endif
-+
-+extern void schedule_idle(void);
-+
-+/*
-+ * !! For sched_setattr_nocheck() (kernel) only !!
-+ *
-+ * This is actually gross. :(
-+ *
-+ * It is used to make schedutil kworker(s) higher priority than SCHED_DEADLINE
-+ * tasks, but still be able to sleep. We need this on platforms that cannot
-+ * atomically change clock frequency. Remove once fast switching will be
-+ * available on such platforms.
-+ *
-+ * SUGOV stands for SchedUtil GOVernor.
-+ */
-+#define SCHED_FLAG_SUGOV	0x10000000
-+
-+#ifdef CONFIG_MEMBARRIER
-+/*
-+ * The scheduler provides memory barriers required by membarrier between:
-+ * - prior user-space memory accesses and store to rq->membarrier_state,
-+ * - store to rq->membarrier_state and following user-space memory accesses.
-+ * In the same way it provides those guarantees around store to rq->curr.
-+ */
-+static inline void membarrier_switch_mm(struct rq *rq,
-+					struct mm_struct *prev_mm,
-+					struct mm_struct *next_mm)
-+{
-+	int membarrier_state;
-+
-+	if (prev_mm == next_mm)
-+		return;
-+
-+	membarrier_state = atomic_read(&next_mm->membarrier_state);
-+	if (READ_ONCE(rq->membarrier_state) == membarrier_state)
-+		return;
-+
-+	WRITE_ONCE(rq->membarrier_state, membarrier_state);
-+}
-+#else
-+static inline void membarrier_switch_mm(struct rq *rq,
-+					struct mm_struct *prev_mm,
-+					struct mm_struct *next_mm)
-+{
-+}
-+#endif
-+
-+#ifdef CONFIG_NUMA
-+extern int sched_numa_find_closest(const struct cpumask *cpus, int cpu);
-+#else
-+static inline int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
-+{
-+	return nr_cpu_ids;
-+}
-+#endif
-+#endif /* PDS_SCHED_H */
-diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c
-index a96db50d40e0..d3d12baa9036 100644
---- a/kernel/sched/pelt.c
-+++ b/kernel/sched/pelt.c
-@@ -236,6 +236,7 @@ ___update_load_avg(struct sched_avg *sa, unsigned long load, unsigned long runna
- 	WRITE_ONCE(sa->util_avg, sa->util_sum / divider);
- }
- 
-+#ifndef CONFIG_SCHED_PDS
- /*
-  * sched_entity:
-  *
-@@ -352,6 +353,7 @@ int update_dl_rq_load_avg(u64 now, struct rq *rq, int running)
- 
- 	return 0;
- }
-+#endif
- 
- #ifdef CONFIG_HAVE_SCHED_AVG_IRQ
- /*
-diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h
-index afff644da065..26d6b47fc156 100644
---- a/kernel/sched/pelt.h
-+++ b/kernel/sched/pelt.h
-@@ -1,11 +1,13 @@
- #ifdef CONFIG_SMP
- #include "sched-pelt.h"
- 
-+#ifndef CONFIG_SCHED_PDS
- int __update_load_avg_blocked_se(u64 now, struct sched_entity *se);
- int __update_load_avg_se(u64 now, struct cfs_rq *cfs_rq, struct sched_entity *se);
- int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq);
- int update_rt_rq_load_avg(u64 now, struct rq *rq, int running);
- int update_dl_rq_load_avg(u64 now, struct rq *rq, int running);
-+#endif
- 
- #ifdef CONFIG_HAVE_SCHED_AVG_IRQ
- int update_irq_load_avg(struct rq *rq, u64 running);
-@@ -17,6 +19,7 @@ update_irq_load_avg(struct rq *rq, u64 running)
- }
- #endif
- 
-+#ifndef CONFIG_SCHED_PDS
- /*
-  * When a task is dequeued, its estimated utilization should not be update if
-  * its util_avg has not been updated at least once.
-@@ -137,9 +140,11 @@ static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq)
- 	return rq_clock_pelt(rq_of(cfs_rq));
- }
- #endif
-+#endif /* CONFIG_SCHED_PDS */
- 
- #else
- 
-+#ifndef CONFIG_SCHED_PDS
- static inline int
- update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
- {
-@@ -157,6 +162,7 @@ update_dl_rq_load_avg(u64 now, struct rq *rq, int running)
- {
- 	return 0;
- }
-+#endif
- 
- static inline int
- update_irq_load_avg(struct rq *rq, u64 running)
-diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
-index c8870c5bd7df..4fc9f2ead4d2 100644
---- a/kernel/sched/sched.h
-+++ b/kernel/sched/sched.h
-@@ -2,6 +2,10 @@
- /*
-  * Scheduler internal types and methods:
-  */
-+#ifdef CONFIG_SCHED_PDS
-+#include "pds_sched.h"
-+#else
-+
- #include <linux/sched.h>
- 
- #include <linux/sched/autogroup.h>
-@@ -2496,3 +2500,4 @@ static inline void membarrier_switch_mm(struct rq *rq,
- {
- }
- #endif
-+#endif /* !CONFIG_SCHED_PDS */
-diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c
-index 750fb3c67eed..45bd43942575 100644
---- a/kernel/sched/stats.c
-+++ b/kernel/sched/stats.c
-@@ -22,8 +22,10 @@ static int show_schedstat(struct seq_file *seq, void *v)
- 	} else {
- 		struct rq *rq;
- #ifdef CONFIG_SMP
-+#ifndef CONFIG_SCHED_PDS
- 		struct sched_domain *sd;
- 		int dcount = 0;
-+#endif
- #endif
- 		cpu = (unsigned long)(v - 2);
- 		rq = cpu_rq(cpu);
-@@ -40,6 +42,7 @@ static int show_schedstat(struct seq_file *seq, void *v)
- 		seq_printf(seq, "\n");
- 
- #ifdef CONFIG_SMP
-+#ifndef CONFIG_SCHED_PDS
- 		/* domain-specific stats */
- 		rcu_read_lock();
- 		for_each_domain(cpu, sd) {
-@@ -68,6 +71,7 @@ static int show_schedstat(struct seq_file *seq, void *v)
- 			    sd->ttwu_move_balance);
- 		}
- 		rcu_read_unlock();
-+#endif
- #endif
- 	}
- 	return 0;
-diff --git a/kernel/sysctl.c b/kernel/sysctl.c
-index b6f2f35d0bcf..204933ebc95a 100644
---- a/kernel/sysctl.c
-+++ b/kernel/sysctl.c
-@@ -130,8 +130,12 @@ static int __maybe_unused four = 4;
- static unsigned long zero_ul;
- static unsigned long one_ul = 1;
- static unsigned long long_max = LONG_MAX;
--static int one_hundred = 100;
--static int one_thousand = 1000;
-+static int __read_mostly one_hundred = 100;
-+static int __read_mostly one_thousand = 1000;
-+#ifdef CONFIG_SCHED_PDS
-+extern int rr_interval;
-+extern int sched_yield_type;
-+#endif
- #ifdef CONFIG_PRINTK
- static int ten_thousand = 10000;
- #endif
-@@ -300,7 +304,7 @@ static struct ctl_table sysctl_base_table[] = {
- 	{ }
- };
- 
--#ifdef CONFIG_SCHED_DEBUG
-+#if defined(CONFIG_SCHED_DEBUG) && !defined(CONFIG_SCHED_PDS)
- static int min_sched_granularity_ns = 100000;		/* 100 usecs */
- static int max_sched_granularity_ns = NSEC_PER_SEC;	/* 1 second */
- static int min_wakeup_granularity_ns;			/* 0 usecs */
-@@ -317,6 +321,7 @@ static int max_extfrag_threshold = 1000;
- #endif
- 
- static struct ctl_table kern_table[] = {
-+#ifndef CONFIG_SCHED_PDS
- 	{
- 		.procname	= "sched_child_runs_first",
- 		.data		= &sysctl_sched_child_runs_first,
-@@ -498,6 +503,7 @@ static struct ctl_table kern_table[] = {
- 		.extra2		= SYSCTL_ONE,
- 	},
- #endif
-+#endif /* !CONFIG_SCHED_PDS */
- #ifdef CONFIG_PROVE_LOCKING
- 	{
- 		.procname	= "prove_locking",
-@@ -1070,6 +1076,26 @@ static struct ctl_table kern_table[] = {
- 		.proc_handler	= proc_dointvec,
- 	},
- #endif
-+#ifdef CONFIG_SCHED_PDS
-+	{
-+		.procname	= "rr_interval",
-+		.data		= &rr_interval,
-+		.maxlen		= sizeof (int),
-+		.mode		= 0644,
-+		.proc_handler	= &proc_dointvec_minmax,
-+		.extra1		= SYSCTL_ONE,
-+		.extra2		= &one_thousand,
-+	},
-+	{
-+		.procname	= "yield_type",
-+		.data		= &sched_yield_type,
-+		.maxlen		= sizeof (int),
-+		.mode		= 0644,
-+		.proc_handler	= &proc_dointvec_minmax,
-+		.extra1		= SYSCTL_ZERO,
-+		.extra2		= &two,
-+	},
-+#endif
- #if defined(CONFIG_S390) && defined(CONFIG_SMP)
- 	{
- 		.procname	= "spin_retry",
-diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
-index 42d512fcfda2..71af3cd30ccc 100644
---- a/kernel/time/posix-cpu-timers.c
-+++ b/kernel/time/posix-cpu-timers.c
-@@ -226,7 +226,7 @@ static void task_sample_cputime(struct task_struct *p, u64 *samples)
- 	u64 stime, utime;
- 
- 	task_cputime(p, &utime, &stime);
--	store_samples(samples, stime, utime, p->se.sum_exec_runtime);
-+	store_samples(samples, stime, utime, tsk_seruntime(p));
- }
- 
- static void proc_sample_cputime_atomic(struct task_cputime_atomic *at,
-@@ -796,6 +796,7 @@ static void collect_posix_cputimers(struct posix_cputimers *pct, u64 *samples,
- 	}
- }
- 
-+#ifndef CONFIG_SCHED_PDS
- static inline void check_dl_overrun(struct task_struct *tsk)
- {
- 	if (tsk->dl.dl_overrun) {
-@@ -803,6 +804,7 @@ static inline void check_dl_overrun(struct task_struct *tsk)
- 		__group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
- 	}
- }
-+#endif
- 
- static bool check_rlimit(u64 time, u64 limit, int signo, bool rt, bool hard)
- {
-@@ -830,8 +832,10 @@ static void check_thread_timers(struct task_struct *tsk,
- 	u64 samples[CPUCLOCK_MAX];
- 	unsigned long soft;
- 
-+#ifndef CONFIG_SCHED_PDS
- 	if (dl_task(tsk))
- 		check_dl_overrun(tsk);
-+#endif
- 
- 	if (expiry_cache_is_inactive(pct))
- 		return;
-@@ -845,7 +849,7 @@ static void check_thread_timers(struct task_struct *tsk,
- 	soft = task_rlimit(tsk, RLIMIT_RTTIME);
- 	if (soft != RLIM_INFINITY) {
- 		/* Task RT timeout is accounted in jiffies. RTTIME is usec */
--		unsigned long rttime = tsk->rt.timeout * (USEC_PER_SEC / HZ);
-+		unsigned long rttime = tsk_rttimeout(tsk) * (USEC_PER_SEC / HZ);
- 		unsigned long hard = task_rlimit_max(tsk, RLIMIT_RTTIME);
- 
- 		/* At the hard limit, send SIGKILL. No further action. */
-@@ -1099,8 +1103,10 @@ static inline bool fastpath_timer_check(struct task_struct *tsk)
- 			return true;
- 	}
- 
-+#ifndef CONFIG_SCHED_PDS
- 	if (dl_task(tsk) && tsk->dl.dl_overrun)
- 		return true;
-+#endif
- 
- 	return false;
- }
-diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
-index 69ee8ef12cee..3eaa2a21caa4 100644
---- a/kernel/trace/trace_selftest.c
-+++ b/kernel/trace/trace_selftest.c
-@@ -1048,10 +1048,15 @@ static int trace_wakeup_test_thread(void *data)
- {
- 	/* Make this a -deadline thread */
- 	static const struct sched_attr attr = {
-+#ifdef CONFIG_SCHED_PDS
-+		/* No deadline on BFS, use RR */
-+		.sched_policy = SCHED_RR,
-+#else
- 		.sched_policy = SCHED_DEADLINE,
- 		.sched_runtime = 100000ULL,
- 		.sched_deadline = 10000000ULL,
- 		.sched_period = 10000000ULL
-+#endif
- 	};
- 	struct wakeup_test_data *x = data;
- 
diff --git a/linux54-tkg/linux54-tkg-patches/0006-add-acs-overrides_iommu.patch b/linux54-tkg/linux54-tkg-patches/0006-add-acs-overrides_iommu.patch
deleted file mode 100644
index d1303a5..0000000
--- a/linux54-tkg/linux54-tkg-patches/0006-add-acs-overrides_iommu.patch
+++ /dev/null
@@ -1,193 +0,0 @@
-From cdeab384f48dd9c88e2dff2e9ad8d57dca1a1b1c Mon Sep 17 00:00:00 2001
-From: Mark Weiman <mark.weiman@markzz.com>
-Date: Sun, 12 Aug 2018 11:36:21 -0400
-Subject: [PATCH] pci: Enable overrides for missing ACS capabilities
-
-This an updated version of Alex Williamson's patch from:
-https://lkml.org/lkml/2013/5/30/513
-
-Original commit message follows:
-
-PCIe ACS (Access Control Services) is the PCIe 2.0+ feature that
-allows us to control whether transactions are allowed to be redirected
-in various subnodes of a PCIe topology.  For instance, if two
-endpoints are below a root port or downsteam switch port, the
-downstream port may optionally redirect transactions between the
-devices, bypassing upstream devices.  The same can happen internally
-on multifunction devices.  The transaction may never be visible to the
-upstream devices.
-
-One upstream device that we particularly care about is the IOMMU.  If
-a redirection occurs in the topology below the IOMMU, then the IOMMU
-cannot provide isolation between devices.  This is why the PCIe spec
-encourages topologies to include ACS support.  Without it, we have to
-assume peer-to-peer DMA within a hierarchy can bypass IOMMU isolation.
-
-Unfortunately, far too many topologies do not support ACS to make this
-a steadfast requirement.  Even the latest chipsets from Intel are only
-sporadically supporting ACS.  We have trouble getting interconnect
-vendors to include the PCIe spec required PCIe capability, let alone
-suggested features.
-
-Therefore, we need to add some flexibility.  The pcie_acs_override=
-boot option lets users opt-in specific devices or sets of devices to
-assume ACS support.  The "downstream" option assumes full ACS support
-on root ports and downstream switch ports.  The "multifunction"
-option assumes the subset of ACS features available on multifunction
-endpoints and upstream switch ports are supported.  The "id:nnnn:nnnn"
-option enables ACS support on devices matching the provided vendor
-and device IDs, allowing more strategic ACS overrides.  These options
-may be combined in any order.  A maximum of 16 id specific overrides
-are available.  It's suggested to use the most limited set of options
-necessary to avoid completely disabling ACS across the topology.
-Note to hardware vendors, we have facilities to permanently quirk
-specific devices which enforce isolation but not provide an ACS
-capability.  Please contact me to have your devices added and save
-your customers the hassle of this boot option.
-
-Signed-off-by: Mark Weiman <mark.weiman@markzz.com>
----
- .../admin-guide/kernel-parameters.txt         |   9 ++
- drivers/pci/quirks.c                          | 101 ++++++++++++++++++
- 2 files changed, 110 insertions(+)
-
-diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
-index aefd358a5ca3..173b3596fd9e 100644
---- a/Documentation/admin-guide/kernel-parameters.txt
-+++ b/Documentation/admin-guide/kernel-parameters.txt
-@@ -3190,6 +3190,15 @@
- 		nomsi		[MSI] If the PCI_MSI kernel config parameter is
- 				enabled, this kernel boot option can be used to
- 				disable the use of MSI interrupts system-wide.
-+		pcie_acs_override =
-+					[PCIE] Override missing PCIe ACS support for:
-+				downstream
-+					All downstream ports - full ACS capabilities
-+				multifunction
-+					All multifunction devices - multifunction ACS subset
-+				id:nnnn:nnnn
-+					Specific device - full ACS capabilities
-+					Specified as vid:did (vendor/device ID) in hex
- 		noioapicquirk	[APIC] Disable all boot interrupt quirks.
- 				Safety option to keep boot IRQs enabled. This
- 				should never be necessary.
-diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
-index 4700d24e5d55..8f7a3d7fd9c1 100644
---- a/drivers/pci/quirks.c
-+++ b/drivers/pci/quirks.c
-@@ -3372,6 +3372,106 @@ static void quirk_no_bus_reset(struct pci_dev *dev)
- 	dev->dev_flags |= PCI_DEV_FLAGS_NO_BUS_RESET;
- }
- 
-+static bool acs_on_downstream;
-+static bool acs_on_multifunction;
-+
-+#define NUM_ACS_IDS 16
-+struct acs_on_id {
-+	unsigned short vendor;
-+	unsigned short device;
-+};
-+static struct acs_on_id acs_on_ids[NUM_ACS_IDS];
-+static u8 max_acs_id;
-+
-+static __init int pcie_acs_override_setup(char *p)
-+{
-+	if (!p)
-+		return -EINVAL;
-+
-+	while (*p) {
-+		if (!strncmp(p, "downstream", 10))
-+			acs_on_downstream = true;
-+		if (!strncmp(p, "multifunction", 13))
-+			acs_on_multifunction = true;
-+		if (!strncmp(p, "id:", 3)) {
-+			char opt[5];
-+			int ret;
-+			long val;
-+
-+			if (max_acs_id >= NUM_ACS_IDS - 1) {
-+				pr_warn("Out of PCIe ACS override slots (%d)\n",
-+						NUM_ACS_IDS);
-+				goto next;
-+			}
-+
-+			p += 3;
-+			snprintf(opt, 5, "%s", p);
-+			ret = kstrtol(opt, 16, &val);
-+			if (ret) {
-+				pr_warn("PCIe ACS ID parse error %d\n", ret);
-+				goto next;
-+			}
-+			acs_on_ids[max_acs_id].vendor = val;
-+
-+			p += strcspn(p, ":");
-+			if (*p != ':') {
-+				pr_warn("PCIe ACS invalid ID\n");
-+				goto next;
-+			}
-+
-+			p++;
-+			snprintf(opt, 5, "%s", p);
-+			ret = kstrtol(opt, 16, &val);
-+			if (ret) {
-+				pr_warn("PCIe ACS ID parse error %d\n", ret);
-+				goto next;
-+			}
-+			acs_on_ids[max_acs_id].device = val;
-+			max_acs_id++;
-+		}
-+next:
-+		p += strcspn(p, ",");
-+		if (*p == ',')
-+			p++;
-+	}
-+
-+	if (acs_on_downstream || acs_on_multifunction || max_acs_id)
-+		pr_warn("Warning: PCIe ACS overrides enabled; This may allow non-IOMMU protected peer-to-peer DMA\n");
-+
-+	return 0;
-+}
-+early_param("pcie_acs_override", pcie_acs_override_setup);
-+
-+static int pcie_acs_overrides(struct pci_dev *dev, u16 acs_flags)
-+{
-+	int i;
-+
-+	/* Never override ACS for legacy devices or devices with ACS caps */
-+	if (!pci_is_pcie(dev) ||
-+		pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS))
-+			return -ENOTTY;
-+
-+	for (i = 0; i < max_acs_id; i++)
-+		if (acs_on_ids[i].vendor == dev->vendor &&
-+			acs_on_ids[i].device == dev->device)
-+				return 1;
-+
-+	switch (pci_pcie_type(dev)) {
-+	case PCI_EXP_TYPE_DOWNSTREAM:
-+	case PCI_EXP_TYPE_ROOT_PORT:
-+		if (acs_on_downstream)
-+			return 1;
-+		break;
-+	case PCI_EXP_TYPE_ENDPOINT:
-+	case PCI_EXP_TYPE_UPSTREAM:
-+	case PCI_EXP_TYPE_LEG_END:
-+	case PCI_EXP_TYPE_RC_END:
-+		if (acs_on_multifunction && dev->multifunction)
-+			return 1;
-+	}
-+
-+	return -ENOTTY;
-+}
- /*
-  * Some Atheros AR9xxx and QCA988x chips do not behave after a bus reset.
-  * The device will throw a Link Down error on AER-capable systems and
-@@ -4513,6 +4613,7 @@ static const struct pci_dev_acs_enabled {
- 	{ PCI_VENDOR_ID_ZHAOXIN, 0x9083, pci_quirk_mf_endpoint_acs },
- 	/* Zhaoxin Root/Downstream Ports */
- 	{ PCI_VENDOR_ID_ZHAOXIN, PCI_ANY_ID, pci_quirk_zhaoxin_pcie_ports_acs },
-+ 	{ PCI_ANY_ID, PCI_ANY_ID, pcie_acs_overrides },
- 	{ 0 }
- };
- 
-
diff --git a/linux54-tkg/linux54-tkg-patches/0007-v5.4-fsync.patch b/linux54-tkg/linux54-tkg-patches/0007-v5.4-fsync.patch
deleted file mode 100644
index 027116f..0000000
--- a/linux54-tkg/linux54-tkg-patches/0007-v5.4-fsync.patch
+++ /dev/null
@@ -1,419 +0,0 @@
-split the futex key setup from the queue locking and key reading.  This
-is useful to support the setup of multiple keys at the same time, like
-what is done in futex_requeue() and what will be done for the
-FUTEX_WAIT_MULTIPLE command.
-
-Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
----
- kernel/futex.c | 71 +++++++++++++++++++++++++++++---------------------
- 1 file changed, 42 insertions(+), 29 deletions(-)
-
-diff --git a/kernel/futex.c b/kernel/futex.c
-index 6d50728ef2e7..91f3db335c57 100644
---- a/kernel/futex.c
-+++ b/kernel/futex.c
-@@ -2631,6 +2631,39 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
- 	__set_current_state(TASK_RUNNING);
- }
- 
-+static int __futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
-+			      struct futex_q *q, struct futex_hash_bucket **hb)
-+{
-+
-+	u32 uval;
-+	int ret;
-+
-+retry_private:
-+	*hb = queue_lock(q);
-+
-+	ret = get_futex_value_locked(&uval, uaddr);
-+
-+	if (ret) {
-+		queue_unlock(*hb);
-+
-+		ret = get_user(uval, uaddr);
-+		if (ret)
-+			return ret;
-+
-+		if (!(flags & FLAGS_SHARED))
-+			goto retry_private;
-+
-+		return 1;
-+	}
-+
-+	if (uval != val) {
-+		queue_unlock(*hb);
-+		ret = -EWOULDBLOCK;
-+	}
-+
-+	return ret;
-+}
-+
- /**
-  * futex_wait_setup() - Prepare to wait on a futex
-  * @uaddr:	the futex userspace address
-@@ -2651,7 +2684,6 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
- static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
- 			   struct futex_q *q, struct futex_hash_bucket **hb)
- {
--	u32 uval;
- 	int ret;
- 
- 	/*
-@@ -2672,38 +2704,19 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
- 	 * absorb a wakeup if *uaddr does not match the desired values
- 	 * while the syscall executes.
- 	 */
--retry:
--	ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, FUTEX_READ);
--	if (unlikely(ret != 0))
--		return ret;
--
--retry_private:
--	*hb = queue_lock(q);
-+	do {
-+		ret = get_futex_key(uaddr, flags & FLAGS_SHARED,
-+				    &q->key, FUTEX_READ);
-+		if (unlikely(ret != 0))
-+			return ret;
- 
--	ret = get_futex_value_locked(&uval, uaddr);
-+		ret = __futex_wait_setup(uaddr, val, flags, q, hb);
- 
--	if (ret) {
--		queue_unlock(*hb);
--
--		ret = get_user(uval, uaddr);
-+		/* Drop key reference if retry or error. */
- 		if (ret)
--			goto out;
-+			put_futex_key(&q->key);
-+	} while (ret > 0);
- 
--		if (!(flags & FLAGS_SHARED))
--			goto retry_private;
--
--		put_futex_key(&q->key);
--		goto retry;
--	}
--
--	if (uval != val) {
--		queue_unlock(*hb);
--		ret = -EWOULDBLOCK;
--	}
--
--out:
--	if (ret)
--		put_futex_key(&q->key);
- 	return ret;
- }
- 
--- 
-2.20.1 
-
-This is a new futex operation, called FUTEX_WAIT_MULTIPLE, which allows
-a thread to wait on several futexes at the same time, and be awoken by
-any of them.  In a sense, it implements one of the features that was
-supported by pooling on the old FUTEX_FD interface.
-
-My use case for this operation lies in Wine, where we want to implement
-a similar interface available in Windows, used mainly for event
-handling.  The wine folks have an implementation that uses eventfd, but
-it suffers from FD exhaustion (I was told they have application that go
-to the order of multi-milion FDs), and higher CPU utilization.
-
-In time, we are also proposing modifications to glibc and libpthread to
-make this feature available for Linux native multithreaded applications
-using libpthread, which can benefit from the behavior of waiting on any
-of a group of futexes.
-
-In particular, using futexes in our Wine use case reduced the CPU
-utilization by 4% for the game Beat Saber and by 1.5% for the game
-Shadow of Tomb Raider, both running over Proton (a wine based solution
-for Windows emulation), when compared to the eventfd interface. This
-implementation also doesn't rely of file descriptors, so it doesn't risk
-overflowing the resource.
-
-Technically, the existing FUTEX_WAIT implementation can be easily
-reworked by using do_futex_wait_multiple with a count of one, and I
-have a patch showing how it works.  I'm not proposing it, since
-futex is such a tricky code, that I'd be more confortable to have
-FUTEX_WAIT_MULTIPLE running upstream for a couple development cycles,
-before considering modifying FUTEX_WAIT.
-
-From an implementation perspective, the futex list is passed as an array
-of (pointer,value,bitset) to the kernel, which will enqueue all of them
-and sleep if none was already triggered. It returns a hint of which
-futex caused the wake up event to userspace, but the hint doesn't
-guarantee that is the only futex triggered.  Before calling the syscall
-again, userspace should traverse the list, trying to re-acquire any of
-the other futexes, to prevent an immediate -EWOULDBLOCK return code from
-the kernel.
-
-This was tested using three mechanisms:
-
-1) By reimplementing FUTEX_WAIT in terms of FUTEX_WAIT_MULTIPLE and
-running the unmodified tools/testing/selftests/futex and a full linux
-distro on top of this kernel.
-
-2) By an example code that exercises the FUTEX_WAIT_MULTIPLE path on a
-multi-threaded, event-handling setup.
-
-3) By running the Wine fsync implementation and executing multi-threaded
-applications, in particular the modern games mentioned above, on top of
-this implementation.
-
-Signed-off-by: Zebediah Figura <z.figura12@gmail.com>
-Signed-off-by: Steven Noonan <steven@valvesoftware.com>
-Signed-off-by: Pierre-Loup A. Griffais <pgriffais@valvesoftware.com>
-Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
----
- include/uapi/linux/futex.h |   7 ++
- kernel/futex.c             | 161 ++++++++++++++++++++++++++++++++++++-
- 2 files changed, 164 insertions(+), 4 deletions(-)
-
-diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h
-index a89eb0accd5e..2401c4cf5095 100644
---- a/include/uapi/linux/futex.h
-+++ b/include/uapi/linux/futex.h
-@@ -21,6 +21,7 @@
- #define FUTEX_WAKE_BITSET	10
- #define FUTEX_WAIT_REQUEUE_PI	11
- #define FUTEX_CMP_REQUEUE_PI	12
-+#define FUTEX_WAIT_MULTIPLE	31
- 
- #define FUTEX_PRIVATE_FLAG	128
- #define FUTEX_CLOCK_REALTIME	256
-@@ -150,4 +151,10 @@ struct robust_list_head {
-   (((op & 0xf) << 28) | ((cmp & 0xf) << 24)		\
-    | ((oparg & 0xfff) << 12) | (cmparg & 0xfff))
- 
-+struct futex_wait_block {
-+	__u32 __user *uaddr;
-+	__u32 val;
-+	__u32 bitset;
-+};
-+
- #endif /* _UAPI_LINUX_FUTEX_H */
-diff --git a/kernel/futex.c b/kernel/futex.c
-index 91f3db335c57..2623e8f152cd 100644
---- a/kernel/futex.c
-+++ b/kernel/futex.c
-@@ -183,6 +183,7 @@ static int  __read_mostly futex_cmpxchg_enabled;
- #endif
- #define FLAGS_CLOCKRT		0x02
- #define FLAGS_HAS_TIMEOUT	0x04
-+#define FLAGS_WAKE_MULTIPLE	0x08
- 
- /*
-  * Priority Inheritance state:
-@@ -2720,6 +2721,150 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
- 	return ret;
- }
- 
-+static int do_futex_wait_multiple(struct futex_wait_block *wb,
-+				  u32 count, unsigned int flags,
-+				  ktime_t *abs_time)
-+{
-+
-+	struct hrtimer_sleeper timeout, *to;
-+	struct futex_hash_bucket *hb;
-+	struct futex_q *qs = NULL;
-+	int ret;
-+	int i;
-+
-+	qs = kcalloc(count, sizeof(struct futex_q), GFP_KERNEL);
-+	if (!qs)
-+		return -ENOMEM;
-+
-+	to = futex_setup_timer(abs_time, &timeout, flags,
-+			       current->timer_slack_ns);
-+ retry:
-+	for (i = 0; i < count; i++) {
-+		qs[i].key = FUTEX_KEY_INIT;
-+		qs[i].bitset = wb[i].bitset;
-+
-+		ret = get_futex_key(wb[i].uaddr, flags & FLAGS_SHARED,
-+				    &qs[i].key, FUTEX_READ);
-+		if (unlikely(ret != 0)) {
-+			for (--i; i >= 0; i--)
-+				put_futex_key(&qs[i].key);
-+			goto out;
-+		}
-+	}
-+
-+	set_current_state(TASK_INTERRUPTIBLE);
-+
-+	for (i = 0; i < count; i++) {
-+		ret = __futex_wait_setup(wb[i].uaddr, wb[i].val,
-+					 flags, &qs[i], &hb);
-+		if (ret) {
-+			/* Drop the failed key directly.  keys 0..(i-1)
-+			 * will be put by unqueue_me.
-+			 */
-+			put_futex_key(&qs[i].key);
-+
-+			/* Undo the partial work we did. */
-+			for (--i; i >= 0; i--)
-+				unqueue_me(&qs[i]);
-+
-+			__set_current_state(TASK_RUNNING);
-+			if (ret > 0)
-+				goto retry;
-+			goto out;
-+		}
-+
-+		/* We can't hold to the bucket lock when dealing with
-+		 * the next futex. Queue ourselves now so we can unlock
-+		 * it before moving on.
-+		 */
-+		queue_me(&qs[i], hb);
-+	}
-+
-+	if (to)
-+		hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS);
-+
-+	/* There is no easy to way to check if we are wake already on
-+	 * multiple futexes without waking through each one of them.  So
-+	 * just sleep and let the scheduler handle it.
-+	 */
-+	if (!to || to->task)
-+		freezable_schedule();
-+
-+	__set_current_state(TASK_RUNNING);
-+
-+	ret = -ETIMEDOUT;
-+	/* If we were woken (and unqueued), we succeeded. */
-+	for (i = 0; i < count; i++)
-+		if (!unqueue_me(&qs[i]))
-+			ret = i;
-+
-+	/* Succeed wakeup */
-+	if (ret >= 0)
-+		goto out;
-+
-+	/* Woken by triggered timeout */
-+	if (to && !to->task)
-+		goto out;
-+
-+	/*
-+	 * We expect signal_pending(current), but we might be the
-+	 * victim of a spurious wakeup as well.
-+	 */
-+	if (!signal_pending(current))
-+		goto retry;
-+
-+	ret = -ERESTARTSYS;
-+	if (!abs_time)
-+		goto out;
-+
-+	ret = -ERESTART_RESTARTBLOCK;
-+ out:
-+	if (to) {
-+		hrtimer_cancel(&to->timer);
-+		destroy_hrtimer_on_stack(&to->timer);
-+	}
-+
-+	kfree(qs);
-+	return ret;
-+}
-+
-+static int futex_wait_multiple(u32 __user *uaddr, unsigned int flags,
-+			       u32 count, ktime_t *abs_time)
-+{
-+	struct futex_wait_block *wb;
-+	struct restart_block *restart;
-+	int ret;
-+
-+	if (!count)
-+		return -EINVAL;
-+
-+	wb = kcalloc(count, sizeof(struct futex_wait_block), GFP_KERNEL);
-+	if (!wb)
-+		return -ENOMEM;
-+
-+	if (copy_from_user(wb, uaddr,
-+			   count * sizeof(struct futex_wait_block))) {
-+		ret = -EFAULT;
-+		goto out;
-+	}
-+
-+	ret = do_futex_wait_multiple(wb, count, flags, abs_time);
-+
-+	if (ret == -ERESTART_RESTARTBLOCK) {
-+		restart = &current->restart_block;
-+		restart->fn = futex_wait_restart;
-+		restart->futex.uaddr = uaddr;
-+		restart->futex.val = count;
-+		restart->futex.time = *abs_time;
-+		restart->futex.flags = (flags | FLAGS_HAS_TIMEOUT |
-+					FLAGS_WAKE_MULTIPLE);
-+	}
-+
-+out:
-+	kfree(wb);
-+	return ret;
-+}
-+
- static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
- 		      ktime_t *abs_time, u32 bitset)
- {
-@@ -2797,6 +2942,10 @@ static long futex_wait_restart(struct restart_block *restart)
- 	}
- 	restart->fn = do_no_restart_syscall;
- 
-+	if (restart->futex.flags & FLAGS_WAKE_MULTIPLE)
-+		return (long)futex_wait_multiple(uaddr, restart->futex.flags,
-+						 restart->futex.val, tp);
-+
- 	return (long)futex_wait(uaddr, restart->futex.flags,
- 				restart->futex.val, tp, restart->futex.bitset);
- }
-@@ -3680,6 +3829,8 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
- 					     uaddr2);
- 	case FUTEX_CMP_REQUEUE_PI:
- 		return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1);
-+	case FUTEX_WAIT_MULTIPLE:
-+		return futex_wait_multiple(uaddr, flags, val, timeout);
- 	}
- 	return -ENOSYS;
- }
-@@ -3696,7 +3847,8 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
- 
- 	if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
- 		      cmd == FUTEX_WAIT_BITSET ||
--		      cmd == FUTEX_WAIT_REQUEUE_PI)) {
-+		      cmd == FUTEX_WAIT_REQUEUE_PI ||
-+		      cmd == FUTEX_WAIT_MULTIPLE)) {
- 		if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG))))
- 			return -EFAULT;
- 		if (get_timespec64(&ts, utime))
-@@ -3705,7 +3857,7 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
- 			return -EINVAL;
- 
- 		t = timespec64_to_ktime(ts);
--		if (cmd == FUTEX_WAIT)
-+		if (cmd == FUTEX_WAIT || cmd == FUTEX_WAIT_MULTIPLE)
- 			t = ktime_add_safe(ktime_get(), t);
- 		tp = &t;
- 	}
-@@ -3889,14 +4041,15 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
- 
- 	if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
- 		      cmd == FUTEX_WAIT_BITSET ||
--		      cmd == FUTEX_WAIT_REQUEUE_PI)) {
-+		      cmd == FUTEX_WAIT_REQUEUE_PI ||
-+		      cmd == FUTEX_WAIT_MULTIPLE)) {
- 		if (get_old_timespec32(&ts, utime))
- 			return -EFAULT;
- 		if (!timespec64_valid(&ts))
- 			return -EINVAL;
- 
- 		t = timespec64_to_ktime(ts);
--		if (cmd == FUTEX_WAIT)
-+		if (cmd == FUTEX_WAIT || cmd == FUTEX_WAIT_MULTIPLE)
- 			t = ktime_add_safe(ktime_get(), t);
- 		tp = &t;
- 	}
--- 
-2.20.1
diff --git a/linux54-tkg/linux54-tkg-patches/0009-bmq_v5.4-r2.patch b/linux54-tkg/linux54-tkg-patches/0009-bmq_v5.4-r2.patch
deleted file mode 100644
index 4d86ca6..0000000
--- a/linux54-tkg/linux54-tkg-patches/0009-bmq_v5.4-r2.patch
+++ /dev/null
@@ -1,7601 +0,0 @@
-diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
-index 032c7cd3cede..97ea247cc43a 100644
---- a/Documentation/admin-guide/sysctl/kernel.rst
-+++ b/Documentation/admin-guide/sysctl/kernel.rst
-@@ -105,6 +105,7 @@ show up in /proc/sys/kernel:
- - unknown_nmi_panic
- - watchdog
- - watchdog_thresh
-+- yield_type
- - version
- 
- 
-@@ -1175,3 +1176,13 @@ is 10 seconds.
- 
- The softlockup threshold is (2 * watchdog_thresh). Setting this
- tunable to zero will disable lockup detection altogether.
-+
-+yield_type:
-+===========
-+
-+BMQ CPU scheduler only. This determines what type of yield calls to
-+sched_yield will perform.
-+
-+  0 - No yield.
-+  1 - Deboost and requeue task. (default)
-+  2 - Set run queue skip task.
-diff --git a/Documentation/scheduler/sched-BMQ.txt b/Documentation/scheduler/sched-BMQ.txt
-new file mode 100644
-index 000000000000..05c84eec0f31
---- /dev/null
-+++ b/Documentation/scheduler/sched-BMQ.txt
-@@ -0,0 +1,110 @@
-+                         BitMap queue CPU Scheduler
-+                         --------------------------
-+
-+CONTENT
-+========
-+
-+ Background
-+ Design
-+   Overview
-+   Task policy
-+   Priority management
-+   BitMap Queue
-+   CPU Assignment and Migration
-+
-+
-+Background
-+==========
-+
-+BitMap Queue CPU scheduler, referred to as BMQ from here on, is an evolution
-+of previous Priority and Deadline based Skiplist multiple queue scheduler(PDS),
-+and inspired by Zircon scheduler. The goal of it is to keep the scheduler code
-+simple, while efficiency and scalable for interactive tasks, such as desktop,
-+movie playback and gaming etc.
-+
-+Design
-+======
-+
-+Overview
-+--------
-+
-+BMQ use per CPU run queue design, each CPU(logical) has it's own run queue,
-+each CPU is responsible for scheduling the tasks that are putting into it's
-+run queue.
-+
-+The run queue is a set of priority queues. Note that these queues are fifo
-+queue for non-rt tasks or priority queue for rt tasks in data structure. See
-+BitMap Queue below for details. BMQ is optimized for non-rt tasks in the fact
-+that most applications are non-rt tasks. No matter the queue is fifo or
-+priority, In each queue is an ordered list of runnable tasks awaiting execution
-+and the data structures are the same. When it is time for a new task to run,
-+the scheduler simply looks the lowest numbered queueue that contains a task,
-+and runs the first task from the head of that queue. And per CPU idle task is
-+also in the run queue, so the scheduler can always find a task to run on from
-+its run queue.
-+
-+Each task will assigned the same timeslice(default 4ms) when it is picked to
-+start running. Task will be reinserted at the end of the appropriate priority
-+queue when it uses its whole timeslice. When the scheduler selects a new task
-+from the priority queue it sets the CPU's preemption timer for the remainder of
-+the previous timeslice. When that timer fires the scheduler will stop execution
-+on that task, select another task and start over again.
-+
-+If a task blocks waiting for a shared resource then it's taken out of its
-+priority queue and is placed in a wait queue for the shared resource. When it
-+is unblocked it will be reinserted in the appropriate priority queue of an
-+eligible CPU.
-+
-+Task policy
-+-----------
-+
-+BMQ supports DEADLINE, FIFO, RR, NORMAL, BATCH and IDLE task policy like the
-+mainline CFS scheduler. But BMQ is heavy optimized for non-rt task, that's
-+NORMAL/BATCH/IDLE policy tasks. Below is the implementation detail of each
-+policy.
-+
-+DEADLINE
-+	It is squashed as priority 0 FIFO task.
-+
-+FIFO/RR
-+	All RT tasks share one single priority queue in BMQ run queue designed. The
-+complexity of insert operation is O(n). BMQ is not designed for system runs
-+with major rt policy tasks.
-+
-+NORMAL/BATCH/IDLE
-+	BATCH and IDLE tasks are treated as the same policy. They compete CPU with
-+NORMAL policy tasks, but they just don't boost. To control the priority of
-+NORMAL/BATCH/IDLE tasks, simply use nice level.
-+
-+ISO
-+	ISO policy is not supported in BMQ. Please use nice level -20 NORMAL policy
-+task instead.
-+
-+Priority management
-+-------------------
-+
-+RT tasks have priority from 0-99. For non-rt tasks, there are three different
-+factors used to determine the effective priority of a task. The effective
-+priority being what is used to determine which queue it will be in.
-+
-+The first factor is simply the task’s static priority. Which is assigned from
-+task's nice level, within [-20, 19] in userland's point of view and [0, 39]
-+internally.
-+
-+The second factor is the priority boost. This is a value bounded between
-+[-MAX_PRIORITY_ADJ, MAX_PRIORITY_ADJ] used to offset the base priority, it is
-+modified by the following cases:
-+
-+*When a thread has used up its entire timeslice, always deboost its boost by
-+increasing by one.
-+*When a thread gives up cpu control(voluntary or non-voluntary) to reschedule,
-+and its switch-in time(time after last switch and run) below the thredhold
-+based on its priority boost, will boost its boost by decreasing by one buti is
-+capped at 0 (won’t go negative).
-+
-+The intent in this system is to ensure that interactive threads are serviced
-+quickly. These are usually the threads that interact directly with the user
-+and cause user-perceivable latency. These threads usually do little work and
-+spend most of their time blocked awaiting another user event. So they get the
-+priority boost from unblocking while background threads that do most of the
-+processing receive the priority penalty for using their entire timeslice.
-diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
-index f18d5067cd0f..fe489fc01c73 100644
---- a/arch/powerpc/platforms/cell/spufs/sched.c
-+++ b/arch/powerpc/platforms/cell/spufs/sched.c
-@@ -51,11 +51,6 @@ static struct task_struct *spusched_task;
- static struct timer_list spusched_timer;
- static struct timer_list spuloadavg_timer;
- 
--/*
-- * Priority of a normal, non-rt, non-niced'd process (aka nice level 0).
-- */
--#define NORMAL_PRIO		120
--
- /*
-  * Frequency of the spu scheduler tick.  By default we do one SPU scheduler
-  * tick for every 10 CPU scheduler ticks.
-diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
-index b66e81c06a57..a294f8f5fd75 100644
---- a/drivers/cpufreq/cpufreq_conservative.c
-+++ b/drivers/cpufreq/cpufreq_conservative.c
-@@ -28,8 +28,8 @@ struct cs_dbs_tuners {
- };
- 
- /* Conservative governor macros */
--#define DEF_FREQUENCY_UP_THRESHOLD		(80)
--#define DEF_FREQUENCY_DOWN_THRESHOLD		(20)
-+#define DEF_FREQUENCY_UP_THRESHOLD		(63)
-+#define DEF_FREQUENCY_DOWN_THRESHOLD		(26)
- #define DEF_FREQUENCY_STEP			(5)
- #define DEF_SAMPLING_DOWN_FACTOR		(1)
- #define MAX_SAMPLING_DOWN_FACTOR		(10)
-diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
-index dced033875bf..d2cd03766b09 100644
---- a/drivers/cpufreq/cpufreq_ondemand.c
-+++ b/drivers/cpufreq/cpufreq_ondemand.c
-@@ -18,7 +18,7 @@
- #include "cpufreq_ondemand.h"
- 
- /* On-demand governor macros */
--#define DEF_FREQUENCY_UP_THRESHOLD		(80)
-+#define DEF_FREQUENCY_UP_THRESHOLD		(63)
- #define DEF_SAMPLING_DOWN_FACTOR		(1)
- #define MAX_SAMPLING_DOWN_FACTOR		(100000)
- #define MICRO_FREQUENCY_UP_THRESHOLD		(95)
-@@ -127,7 +127,7 @@ static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq)
- }
- 
- /*
-- * Every sampling_rate, we check, if current idle time is less than 20%
-+ * Every sampling_rate, we check, if current idle time is less than 37%
-  * (default), then we try to increase frequency. Else, we adjust the frequency
-  * proportional to load.
-  */
-diff --git a/fs/proc/base.c b/fs/proc/base.c
-index ebea9501afb8..51c9346a69fe 100644
---- a/fs/proc/base.c
-+++ b/fs/proc/base.c
-@@ -477,7 +477,7 @@ static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns,
- 		seq_puts(m, "0 0 0\n");
- 	else
- 		seq_printf(m, "%llu %llu %lu\n",
--		   (unsigned long long)task->se.sum_exec_runtime,
-+		   (unsigned long long)tsk_seruntime(task),
- 		   (unsigned long long)task->sched_info.run_delay,
- 		   task->sched_info.pcount);
- 
-diff --git a/include/asm-generic/resource.h b/include/asm-generic/resource.h
-index 8874f681b056..59eb72bf7d5f 100644
---- a/include/asm-generic/resource.h
-+++ b/include/asm-generic/resource.h
-@@ -23,7 +23,7 @@
- 	[RLIMIT_LOCKS]		= {  RLIM_INFINITY,  RLIM_INFINITY },	\
- 	[RLIMIT_SIGPENDING]	= { 		0,	       0 },	\
- 	[RLIMIT_MSGQUEUE]	= {   MQ_BYTES_MAX,   MQ_BYTES_MAX },	\
--	[RLIMIT_NICE]		= { 0, 0 },				\
-+	[RLIMIT_NICE]		= { 30, 30 },				\
- 	[RLIMIT_RTPRIO]		= { 0, 0 },				\
- 	[RLIMIT_RTTIME]		= {  RLIM_INFINITY,  RLIM_INFINITY },	\
- }
-diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
-index 1b6d31da7cbc..dea181bdb1dd 100644
---- a/include/linux/jiffies.h
-+++ b/include/linux/jiffies.h
-@@ -171,7 +171,7 @@ static inline u64 get_jiffies_64(void)
-  * Have the 32 bit jiffies value wrap 5 minutes after boot
-  * so jiffies wrap bugs show up earlier.
-  */
--#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-300*HZ))
-+#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-10*HZ))
- 
- /*
-  * Change timeval to jiffies, trying to avoid the
-diff --git a/include/linux/sched.h b/include/linux/sched.h
-index 67a1d86981a9..a38ec88efbad 100644
---- a/include/linux/sched.h
-+++ b/include/linux/sched.h
-@@ -644,13 +644,18 @@ struct task_struct {
- 	unsigned int			flags;
- 	unsigned int			ptrace;
- 
--#ifdef CONFIG_SMP
-+#if defined(CONFIG_SMP) && !defined(CONFIG_SCHED_BMQ)
- 	struct llist_node		wake_entry;
-+#endif
-+#if defined(CONFIG_SMP) || defined(CONFIG_SCHED_BMQ)
- 	int				on_cpu;
-+#endif
-+#ifdef CONFIG_SMP
- #ifdef CONFIG_THREAD_INFO_IN_TASK
- 	/* Current CPU: */
- 	unsigned int			cpu;
- #endif
-+#ifndef CONFIG_SCHED_BMQ
- 	unsigned int			wakee_flips;
- 	unsigned long			wakee_flip_decay_ts;
- 	struct task_struct		*last_wakee;
-@@ -664,6 +669,7 @@ struct task_struct {
- 	 */
- 	int				recent_used_cpu;
- 	int				wake_cpu;
-+#endif /* !CONFIG_SCHED_BMQ */
- #endif
- 	int				on_rq;
- 
-@@ -672,13 +678,23 @@ struct task_struct {
- 	int				normal_prio;
- 	unsigned int			rt_priority;
- 
-+#ifdef CONFIG_SCHED_BMQ
-+	u64				last_ran;
-+	s64				time_slice;
-+	int				boost_prio;
-+	int				bmq_idx;
-+	struct list_head		bmq_node;
-+	/* sched_clock time spent running */
-+	u64				sched_time;
-+#else /* !CONFIG_SCHED_BMQ */
- 	const struct sched_class	*sched_class;
- 	struct sched_entity		se;
- 	struct sched_rt_entity		rt;
-+	struct sched_dl_entity		dl;
-+#endif
- #ifdef CONFIG_CGROUP_SCHED
- 	struct task_group		*sched_task_group;
- #endif
--	struct sched_dl_entity		dl;
- 
- #ifdef CONFIG_UCLAMP_TASK
- 	/* Clamp values requested for a scheduling entity */
-@@ -1283,6 +1299,15 @@ struct task_struct {
- 	 */
- };
- 
-+#ifdef CONFIG_SCHED_BMQ
-+#define tsk_seruntime(t)		((t)->sched_time)
-+/* replace the uncertian rt_timeout with 0UL */
-+#define tsk_rttimeout(t)		(0UL)
-+#else /* CFS */
-+#define tsk_seruntime(t)	((t)->se.sum_exec_runtime)
-+#define tsk_rttimeout(t)	((t)->rt.timeout)
-+#endif /* !CONFIG_SCHED_BMQ */
-+
- static inline struct pid *task_pid(struct task_struct *task)
- {
- 	return task->thread_pid;
-diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h
-index 1aff00b65f3c..02a3c5d34ee4 100644
---- a/include/linux/sched/deadline.h
-+++ b/include/linux/sched/deadline.h
-@@ -1,5 +1,22 @@
- /* SPDX-License-Identifier: GPL-2.0 */
- 
-+#ifdef CONFIG_SCHED_BMQ
-+
-+#define __tsk_deadline(p)	(0UL)
-+
-+static inline int dl_prio(int prio)
-+{
-+	return 0;
-+}
-+
-+static inline int dl_task(struct task_struct *p)
-+{
-+	return (SCHED_NORMAL == p->policy);
-+}
-+#else
-+
-+#define __tsk_deadline(p)	((p)->dl.deadline)
-+
- /*
-  * SCHED_DEADLINE tasks has negative priorities, reflecting
-  * the fact that any of them has higher prio than RT and
-@@ -19,6 +36,7 @@ static inline int dl_task(struct task_struct *p)
- {
- 	return dl_prio(p->prio);
- }
-+#endif /* CONFIG_SCHED_BMQ */
- 
- static inline bool dl_time_before(u64 a, u64 b)
- {
-diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h
-index 7d64feafc408..d9dc5d3ccd2e 100644
---- a/include/linux/sched/prio.h
-+++ b/include/linux/sched/prio.h
-@@ -20,11 +20,17 @@
-  */
- 
- #define MAX_USER_RT_PRIO	100
-+
- #define MAX_RT_PRIO		MAX_USER_RT_PRIO
- 
- #define MAX_PRIO		(MAX_RT_PRIO + NICE_WIDTH)
- #define DEFAULT_PRIO		(MAX_RT_PRIO + NICE_WIDTH / 2)
- 
-+#ifdef CONFIG_SCHED_BMQ
-+/* +/- priority levels from the base priority */
-+#define MAX_PRIORITY_ADJ	4
-+#endif
-+
- /*
-  * Convert user-nice values [ -20 ... 0 ... 19 ]
-  * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
-diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
-index e5af028c08b4..6387c8ea9832 100644
---- a/include/linux/sched/rt.h
-+++ b/include/linux/sched/rt.h
-@@ -24,8 +24,10 @@ static inline bool task_is_realtime(struct task_struct *tsk)
- 
- 	if (policy == SCHED_FIFO || policy == SCHED_RR)
- 		return true;
-+#ifndef CONFIG_SCHED_BMQ
- 	if (policy == SCHED_DEADLINE)
- 		return true;
-+#endif
- 	return false;
- }
- 
-diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
-index 4b1c3b664f51..f0f966219695 100644
---- a/include/linux/sched/task.h
-+++ b/include/linux/sched/task.h
-@@ -99,7 +99,7 @@ extern long kernel_wait4(pid_t, int __user *, int, struct rusage *);
- extern void free_task(struct task_struct *tsk);
- 
- /* sched_exec is called by processes performing an exec */
--#ifdef CONFIG_SMP
-+#if defined(CONFIG_SMP) && !defined(CONFIG_SCHED_BMQ)
- extern void sched_exec(void);
- #else
- #define sched_exec()   {}
-diff --git a/init/Kconfig b/init/Kconfig
-index b4daad2bac23..f9faeb82f677 100644
---- a/init/Kconfig
-+++ b/init/Kconfig
-@@ -717,9 +717,28 @@ config GENERIC_SCHED_CLOCK
- 
- menu "Scheduler features"
- 
-+config SCHED_BMQ
-+	bool "BMQ CPU scheduler"
-+	help
-+	  The BitMap Queue CPU scheduler for excellent interactivity and
-+	  responsiveness on the desktop and solid scalability on normal
-+	  hardware and commodity servers.
-+
-+          Say Y here.
-+	default y
-+
-+config SCHED_TIMESLICE
-+	int "Scheduler Task time slice"
-+	depends on SCHED_BMQ
-+	help
-+	  Time slice in ms for BMQ CPU scheduler, default 4 ms.
-+	default 2 if PREEMPT
-+	default 4 if !PREEMPT
-+
- config UCLAMP_TASK
- 	bool "Enable utilization clamping for RT/FAIR tasks"
- 	depends on CPU_FREQ_GOV_SCHEDUTIL
-+	depends on !SCHED_BMQ
- 	help
- 	  This feature enables the scheduler to track the clamped utilization
- 	  of each CPU based on RUNNABLE tasks scheduled on that CPU.
-@@ -802,6 +821,7 @@ config NUMA_BALANCING
- 	depends on ARCH_SUPPORTS_NUMA_BALANCING
- 	depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY
- 	depends on SMP && NUMA && MIGRATION
-+	depends on !SCHED_BMQ
- 	help
- 	  This option adds support for automatic NUMA aware memory/task placement.
- 	  The mechanism is quite primitive and is based on migrating memory when
-@@ -903,7 +923,7 @@ menuconfig CGROUP_SCHED
- 	  bandwidth allocation to such task groups. It uses cgroups to group
- 	  tasks.
- 
--if CGROUP_SCHED
-+if CGROUP_SCHED && !SCHED_BMQ
- config FAIR_GROUP_SCHED
- 	bool "Group scheduling for SCHED_OTHER"
- 	depends on CGROUP_SCHED
-@@ -1150,6 +1170,7 @@ config CHECKPOINT_RESTORE
- 
- config SCHED_AUTOGROUP
- 	bool "Automatic process group scheduling"
-+	depends on !SCHED_BMQ
- 	select CGROUPS
- 	select CGROUP_SCHED
- 	select FAIR_GROUP_SCHED
-diff --git a/init/init_task.c b/init/init_task.c
-index 9e5cbe5eab7b..c293de91d90f 100644
---- a/init/init_task.c
-+++ b/init/init_task.c
-@@ -66,9 +66,15 @@ struct task_struct init_task
- 	.stack		= init_stack,
- 	.usage		= REFCOUNT_INIT(2),
- 	.flags		= PF_KTHREAD,
-+#ifdef CONFIG_SCHED_BMQ
-+	.prio		= DEFAULT_PRIO + MAX_PRIORITY_ADJ,
-+	.static_prio	= DEFAULT_PRIO,
-+	.normal_prio	= DEFAULT_PRIO + MAX_PRIORITY_ADJ,
-+#else
- 	.prio		= MAX_PRIO - 20,
- 	.static_prio	= MAX_PRIO - 20,
- 	.normal_prio	= MAX_PRIO - 20,
-+#endif
- 	.policy		= SCHED_NORMAL,
- 	.cpus_ptr	= &init_task.cpus_mask,
- 	.cpus_mask	= CPU_MASK_ALL,
-@@ -78,6 +84,12 @@ struct task_struct init_task
- 	.restart_block	= {
- 		.fn = do_no_restart_syscall,
- 	},
-+#ifdef CONFIG_SCHED_BMQ
-+	.boost_prio	= 0,
-+	.bmq_idx	= 15,
-+	.bmq_node	= LIST_HEAD_INIT(init_task.bmq_node),
-+	.time_slice	= HZ,
-+#else
- 	.se		= {
- 		.group_node 	= LIST_HEAD_INIT(init_task.se.group_node),
- 	},
-@@ -85,6 +97,7 @@ struct task_struct init_task
- 		.run_list	= LIST_HEAD_INIT(init_task.rt.run_list),
- 		.time_slice	= RR_TIMESLICE,
- 	},
-+#endif
- 	.tasks		= LIST_HEAD_INIT(init_task.tasks),
- #ifdef CONFIG_SMP
- 	.pushable_tasks	= PLIST_NODE_INIT(init_task.pushable_tasks, MAX_PRIO),
-diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
-index c87ee6412b36..45fac7b9c940 100644
---- a/kernel/cgroup/cpuset.c
-+++ b/kernel/cgroup/cpuset.c
-@@ -632,7 +632,7 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial)
- 	return ret;
- }
- 
--#ifdef CONFIG_SMP
-+#if defined(CONFIG_SMP) && !defined(CONFIG_SCHED_BMQ)
- /*
-  * Helper routine for generate_sched_domains().
-  * Do cpusets a, b have overlapping effective cpus_allowed masks?
-@@ -1007,7 +1007,7 @@ static void rebuild_sched_domains_locked(void)
- 	/* Have scheduler rebuild the domains */
- 	partition_and_rebuild_sched_domains(ndoms, doms, attr);
- }
--#else /* !CONFIG_SMP */
-+#else /* !CONFIG_SMP || CONFIG_SCHED_BMQ */
- static void rebuild_sched_domains_locked(void)
- {
- }
-diff --git a/kernel/delayacct.c b/kernel/delayacct.c
-index 27725754ac99..769d773c7182 100644
---- a/kernel/delayacct.c
-+++ b/kernel/delayacct.c
-@@ -106,7 +106,7 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
- 	 */
- 	t1 = tsk->sched_info.pcount;
- 	t2 = tsk->sched_info.run_delay;
--	t3 = tsk->se.sum_exec_runtime;
-+	t3 = tsk_seruntime(tsk);
- 
- 	d->cpu_count += t1;
- 
-diff --git a/kernel/exit.c b/kernel/exit.c
-index a46a50d67002..58043176b285 100644
---- a/kernel/exit.c
-+++ b/kernel/exit.c
-@@ -131,7 +131,7 @@ static void __exit_signal(struct task_struct *tsk)
- 			sig->curr_target = next_thread(tsk);
- 	}
- 
--	add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
-+	add_device_randomness((const void*) &tsk_seruntime(tsk),
- 			      sizeof(unsigned long long));
- 
- 	/*
-@@ -152,7 +152,7 @@ static void __exit_signal(struct task_struct *tsk)
- 	sig->inblock += task_io_get_inblock(tsk);
- 	sig->oublock += task_io_get_oublock(tsk);
- 	task_io_accounting_add(&sig->ioac, &tsk->ioac);
--	sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
-+	sig->sum_sched_runtime += tsk_seruntime(tsk);
- 	sig->nr_threads--;
- 	__unhash_process(tsk, group_dead);
- 	write_sequnlock(&sig->stats_lock);
-diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
-index cdf318d86dd6..b3bd1e65c002 100644
---- a/kernel/livepatch/transition.c
-+++ b/kernel/livepatch/transition.c
-@@ -306,7 +306,11 @@ static bool klp_try_switch_task(struct task_struct *task)
- 	 */
- 	rq = task_rq_lock(task, &flags);
- 
-+#ifdef	CONFIG_SCHED_BMQ
-+	if (task_running(task) && task != current) {
-+#else
- 	if (task_running(rq, task) && task != current) {
-+#endif
- 		snprintf(err_buf, STACK_ERR_BUF_SIZE,
- 			 "%s: %s:%d is running\n", __func__, task->comm,
- 			 task->pid);
-diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
-index 2874bf556162..fad8a279fdfa 100644
---- a/kernel/locking/rtmutex.c
-+++ b/kernel/locking/rtmutex.c
-@@ -229,7 +229,7 @@ static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
-  * Only use with rt_mutex_waiter_{less,equal}()
-  */
- #define task_to_waiter(p)	\
--	&(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline }
-+	&(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = __tsk_deadline(p) }
- 
- static inline int
- rt_mutex_waiter_less(struct rt_mutex_waiter *left,
-@@ -680,7 +680,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
- 	 * the values of the node being removed.
- 	 */
- 	waiter->prio = task->prio;
--	waiter->deadline = task->dl.deadline;
-+	waiter->deadline = __tsk_deadline(task);
- 
- 	rt_mutex_enqueue(lock, waiter);
- 
-@@ -953,7 +953,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
- 	waiter->task = task;
- 	waiter->lock = lock;
- 	waiter->prio = task->prio;
--	waiter->deadline = task->dl.deadline;
-+	waiter->deadline = __tsk_deadline(task);
- 
- 	/* Get the top priority waiter on the lock */
- 	if (rt_mutex_has_waiters(lock))
-diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
-index 21fb5a5662b5..cab4e5c5b38e 100644
---- a/kernel/sched/Makefile
-+++ b/kernel/sched/Makefile
-@@ -16,14 +16,20 @@ ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
- CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
- endif
- 
--obj-y += core.o loadavg.o clock.o cputime.o
--obj-y += idle.o fair.o rt.o deadline.o
--obj-y += wait.o wait_bit.o swait.o completion.o
--
--obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o
-+ifdef CONFIG_SCHED_BMQ
-+obj-y += bmq.o
-+else
-+obj-y += core.o
-+obj-y += fair.o rt.o deadline.o
-+obj-$(CONFIG_SMP) += cpudeadline.o topology.o stop_task.o
- obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
--obj-$(CONFIG_SCHEDSTATS) += stats.o
- obj-$(CONFIG_SCHED_DEBUG) += debug.o
-+endif
-+obj-y += loadavg.o clock.o cputime.o
-+obj-y += idle.o
-+obj-y += wait.o wait_bit.o swait.o completion.o
-+obj-$(CONFIG_SMP) += cpupri.o pelt.o
-+obj-$(CONFIG_SCHEDSTATS) += stats.o
- obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
- obj-$(CONFIG_CPU_FREQ) += cpufreq.o
- obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o
-diff --git a/kernel/sched/bmq.c b/kernel/sched/bmq.c
-new file mode 100644
-index 000000000000..42a2a5b3d172
---- /dev/null
-+++ b/kernel/sched/bmq.c
-@@ -0,0 +1,6102 @@
-+/*
-+ *  kernel/sched/bmq.c
-+ *
-+ *  BMQ Core kernel scheduler code and related syscalls
-+ *
-+ *  Copyright (C) 1991-2002  Linus Torvalds
-+ *
-+ *  2009-08-13	Brainfuck deadline scheduling policy by Con Kolivas deletes
-+ *		a whole lot of those previous things.
-+ *  2017-09-06	Priority and Deadline based Skip list multiple queue kernel
-+ *		scheduler by Alfred Chen.
-+ *  2019-02-20	BMQ(BitMap Queue) kernel scheduler by Alfred Chen.
-+ */
-+#include "bmq_sched.h"
-+
-+#include <linux/sched/rt.h>
-+
-+#include <linux/context_tracking.h>
-+#include <linux/compat.h>
-+#include <linux/blkdev.h>
-+#include <linux/delayacct.h>
-+#include <linux/freezer.h>
-+#include <linux/init_task.h>
-+#include <linux/kprobes.h>
-+#include <linux/mmu_context.h>
-+#include <linux/nmi.h>
-+#include <linux/profile.h>
-+#include <linux/rcupdate_wait.h>
-+#include <linux/security.h>
-+#include <linux/syscalls.h>
-+#include <linux/wait_bit.h>
-+
-+#include <linux/kcov.h>
-+
-+#include <asm/switch_to.h>
-+
-+#include "../workqueue_internal.h"
-+#include "../smpboot.h"
-+
-+#include "pelt.h"
-+
-+#define CREATE_TRACE_POINTS
-+#include <trace/events/sched.h>
-+
-+/* rt_prio(prio) defined in include/linux/sched/rt.h */
-+#define rt_task(p)		rt_prio((p)->prio)
-+#define rt_policy(policy)	((policy) == SCHED_FIFO || (policy) == SCHED_RR)
-+#define task_has_rt_policy(p)	(rt_policy((p)->policy))
-+
-+#define STOP_PRIO		(MAX_RT_PRIO - 1)
-+
-+#define SCHED_TIMESLICE_NS	(CONFIG_SCHED_TIMESLICE * 1000 * 1000)
-+
-+/* Reschedule if less than this many μs left */
-+#define RESCHED_NS		(100 * 1000)
-+
-+/*
-+ * This allows printing both to /proc/sched_debug and
-+ * to the console
-+ */
-+#define SEQ_printf(m, x...)			\
-+ do {						\
-+	if (m)					\
-+		seq_printf(m, x);		\
-+	else					\
-+		pr_cont(x);			\
-+ } while (0)
-+
-+static inline void print_scheduler_version(void)
-+{
-+	printk(KERN_INFO "bmq: BMQ CPU Scheduler 5.4-r2 by Alfred Chen.\n");
-+}
-+
-+/**
-+ * sched_yield_type - Choose what sort of yield sched_yield will perform.
-+ * 0: No yield.
-+ * 1: Deboost and requeue task. (default)
-+ * 2: Set rq skip task.
-+ */
-+int sched_yield_type __read_mostly = 1;
-+
-+#define rq_switch_time(rq)	((rq)->clock - (rq)->last_ts_switch)
-+#define boost_threshold(p)	(SCHED_TIMESLICE_NS >>\
-+				 (10 - MAX_PRIORITY_ADJ -  (p)->boost_prio))
-+
-+static inline void boost_task(struct task_struct *p)
-+{
-+	int limit;
-+
-+	switch (p->policy) {
-+	case SCHED_NORMAL:
-+		limit = -MAX_PRIORITY_ADJ;
-+		break;
-+	case SCHED_BATCH:
-+	case SCHED_IDLE:
-+		limit = 0;
-+		break;
-+	default:
-+		return;
-+	}
-+
-+	if (p->boost_prio > limit)
-+		p->boost_prio--;
-+}
-+
-+static inline void deboost_task(struct task_struct *p)
-+{
-+	if (p->boost_prio < MAX_PRIORITY_ADJ)
-+		p->boost_prio++;
-+}
-+
-+#ifdef CONFIG_SMP
-+static cpumask_t sched_rq_pending_mask ____cacheline_aligned_in_smp;
-+
-+enum {
-+	BASE_CPU_AFFINITY_CHK_LEVEL = 1,
-+#ifdef CONFIG_SCHED_SMT
-+	SMT_CPU_AFFINITY_CHK_LEVEL_SPACE_HOLDER,
-+#endif
-+#ifdef CONFIG_SCHED_MC
-+	MC_CPU_AFFINITY_CHK_LEVEL_SPACE_HOLDER,
-+#endif
-+	NR_CPU_AFFINITY_CHK_LEVEL
-+};
-+
-+DEFINE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_CHK_LEVEL], sched_cpu_affinity_masks);
-+DEFINE_PER_CPU(cpumask_t *, sched_cpu_affinity_end_mask);
-+
-+#ifdef CONFIG_SCHED_SMT
-+DEFINE_STATIC_KEY_FALSE(sched_smt_present);
-+EXPORT_SYMBOL_GPL(sched_smt_present);
-+#endif
-+
-+/*
-+ * Keep a unique ID per domain (we use the first CPUs number in the cpumask of
-+ * the domain), this allows us to quickly tell if two cpus are in the same cache
-+ * domain, see cpus_share_cache().
-+ */
-+DEFINE_PER_CPU(int, sd_llc_id);
-+
-+int __weak arch_sd_sibling_asym_packing(void)
-+{
-+       return 0*SD_ASYM_PACKING;
-+}
-+#endif /* CONFIG_SMP */
-+
-+static DEFINE_MUTEX(sched_hotcpu_mutex);
-+
-+DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
-+
-+#ifndef prepare_arch_switch
-+# define prepare_arch_switch(next)	do { } while (0)
-+#endif
-+#ifndef finish_arch_post_lock_switch
-+# define finish_arch_post_lock_switch()	do { } while (0)
-+#endif
-+
-+#define IDLE_WM	(IDLE_TASK_SCHED_PRIO)
-+
-+static cpumask_t sched_sg_idle_mask ____cacheline_aligned_in_smp;
-+static cpumask_t sched_rq_watermark[bmq_BITS] ____cacheline_aligned_in_smp;
-+
-+#if (bmq_BITS <= BITS_PER_LONG)
-+#define bmq_find_first_bit(bm)		__ffs((bm[0]))
-+#define bmq_find_next_bit(bm, start)	__ffs(BITMAP_FIRST_WORD_MASK(start) & bm[0])
-+#else
-+#define bmq_find_first_bit(bm)		find_first_bit((bm), bmq_BITS)
-+#define bmq_find_next_bit(bm, start)	find_next_bit(bm, bmq_BITS, start)
-+#endif
-+
-+static inline void update_sched_rq_watermark(struct rq *rq)
-+{
-+	unsigned long watermark = bmq_find_first_bit(rq->queue.bitmap);
-+	unsigned long last_wm = rq->watermark;
-+	unsigned long i;
-+	int cpu;
-+
-+	if (watermark == last_wm)
-+		return;
-+
-+	rq->watermark = watermark;
-+	cpu = cpu_of(rq);
-+	if (watermark < last_wm) {
-+		for (i = watermark + 1; i <= last_wm; i++)
-+			cpumask_andnot(&sched_rq_watermark[i],
-+				       &sched_rq_watermark[i], cpumask_of(cpu));
-+#ifdef CONFIG_SCHED_SMT
-+		if (!static_branch_likely(&sched_smt_present))
-+			return;
-+		if (IDLE_WM == last_wm)
-+			cpumask_andnot(&sched_sg_idle_mask,
-+				       &sched_sg_idle_mask, cpu_smt_mask(cpu));
-+#endif
-+		return;
-+	}
-+	/* last_wm < watermark */
-+	for (i = last_wm + 1; i <= watermark; i++)
-+		cpumask_set_cpu(cpu, &sched_rq_watermark[i]);
-+#ifdef CONFIG_SCHED_SMT
-+	if (!static_branch_likely(&sched_smt_present))
-+		return;
-+	if (IDLE_WM == watermark) {
-+		cpumask_t tmp;
-+		cpumask_and(&tmp, cpu_smt_mask(cpu), &sched_rq_watermark[IDLE_WM]);
-+		if (cpumask_equal(&tmp, cpu_smt_mask(cpu)))
-+			cpumask_or(&sched_sg_idle_mask, cpu_smt_mask(cpu),
-+				   &sched_sg_idle_mask);
-+	}
-+#endif
-+}
-+
-+static inline int task_sched_prio(struct task_struct *p)
-+{
-+	return (p->prio < MAX_RT_PRIO)? 0:p->prio - MAX_RT_PRIO + p->boost_prio + 1;
-+}
-+
-+static inline void bmq_init(struct bmq *q)
-+{
-+	int i;
-+
-+	bitmap_zero(q->bitmap, bmq_BITS);
-+	for(i = 0; i < bmq_BITS; i++)
-+		INIT_LIST_HEAD(&q->heads[i]);
-+}
-+
-+static inline void bmq_init_idle(struct bmq *q, struct task_struct *idle)
-+{
-+	INIT_LIST_HEAD(&q->heads[IDLE_TASK_SCHED_PRIO]);
-+	list_add(&idle->bmq_node, &q->heads[IDLE_TASK_SCHED_PRIO]);
-+	set_bit(IDLE_TASK_SCHED_PRIO, q->bitmap);
-+}
-+
-+static inline void bmq_add_task(struct task_struct *p, struct bmq *q, int idx)
-+{
-+	struct list_head *n;
-+
-+	if (likely(idx)) {
-+		list_add_tail(&p->bmq_node, &q->heads[idx]);
-+		return;
-+	}
-+
-+	list_for_each(n, &q->heads[idx])
-+		if (list_entry(n, struct task_struct, bmq_node)->prio > p->prio)
-+			break;
-+	__list_add(&p->bmq_node, n->prev, n);
-+}
-+
-+/*
-+ * This routine used in bmq scheduler only which assume the idle task in the bmq
-+ */
-+static inline struct task_struct *rq_first_bmq_task(struct rq *rq)
-+{
-+	unsigned long idx = bmq_find_first_bit(rq->queue.bitmap);
-+	const struct list_head *head = &rq->queue.heads[idx];
-+
-+	return list_first_entry(head, struct task_struct, bmq_node);
-+}
-+
-+static inline struct task_struct *
-+rq_next_bmq_task(struct task_struct *p, struct rq *rq)
-+{
-+	unsigned long idx = p->bmq_idx;
-+	struct list_head *head = &rq->queue.heads[idx];
-+
-+	if (list_is_last(&p->bmq_node, head)) {
-+		idx = bmq_find_next_bit(rq->queue.bitmap, idx + 1);
-+		head = &rq->queue.heads[idx];
-+
-+		return list_first_entry(head, struct task_struct, bmq_node);
-+	}
-+
-+	return list_next_entry(p, bmq_node);
-+}
-+
-+static inline struct task_struct *rq_runnable_task(struct rq *rq)
-+{
-+	struct task_struct *next = rq_first_bmq_task(rq);
-+
-+	if (unlikely(next == rq->skip))
-+		next = rq_next_bmq_task(next, rq);
-+
-+	return next;
-+}
-+
-+/*
-+ * Context: p->pi_lock
-+ */
-+static inline struct rq
-+*__task_access_lock(struct task_struct *p, raw_spinlock_t **plock)
-+{
-+	struct rq *rq;
-+	for (;;) {
-+		rq = task_rq(p);
-+		if (p->on_cpu || task_on_rq_queued(p)) {
-+			raw_spin_lock(&rq->lock);
-+			if (likely((p->on_cpu || task_on_rq_queued(p))
-+				   && rq == task_rq(p))) {
-+				*plock = &rq->lock;
-+				return rq;
-+			}
-+			raw_spin_unlock(&rq->lock);
-+		} else if (task_on_rq_migrating(p)) {
-+			do {
-+				cpu_relax();
-+			} while (unlikely(task_on_rq_migrating(p)));
-+		} else {
-+			*plock = NULL;
-+			return rq;
-+		}
-+	}
-+}
-+
-+static inline void
-+__task_access_unlock(struct task_struct *p, raw_spinlock_t *lock)
-+{
-+	if (NULL != lock)
-+		raw_spin_unlock(lock);
-+}
-+
-+static inline struct rq
-+*task_access_lock_irqsave(struct task_struct *p, raw_spinlock_t **plock,
-+			  unsigned long *flags)
-+{
-+	struct rq *rq;
-+	for (;;) {
-+		rq = task_rq(p);
-+		if (p->on_cpu || task_on_rq_queued(p)) {
-+			raw_spin_lock_irqsave(&rq->lock, *flags);
-+			if (likely((p->on_cpu || task_on_rq_queued(p))
-+				   && rq == task_rq(p))) {
-+				*plock = &rq->lock;
-+				return rq;
-+			}
-+			raw_spin_unlock_irqrestore(&rq->lock, *flags);
-+		} else if (task_on_rq_migrating(p)) {
-+			do {
-+				cpu_relax();
-+			} while (unlikely(task_on_rq_migrating(p)));
-+		} else {
-+			raw_spin_lock_irqsave(&p->pi_lock, *flags);
-+			if (likely(!p->on_cpu && !p->on_rq &&
-+				   rq == task_rq(p))) {
-+				*plock = &p->pi_lock;
-+				return rq;
-+			}
-+			raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
-+		}
-+	}
-+}
-+
-+static inline void
-+task_access_unlock_irqrestore(struct task_struct *p, raw_spinlock_t *lock,
-+			      unsigned long *flags)
-+{
-+	raw_spin_unlock_irqrestore(lock, *flags);
-+}
-+
-+/*
-+ * __task_rq_lock - lock the rq @p resides on.
-+ */
-+struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
-+	__acquires(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	lockdep_assert_held(&p->pi_lock);
-+
-+	for (;;) {
-+		rq = task_rq(p);
-+		raw_spin_lock(&rq->lock);
-+		if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
-+			return rq;
-+		raw_spin_unlock(&rq->lock);
-+
-+		while (unlikely(task_on_rq_migrating(p)))
-+			cpu_relax();
-+	}
-+}
-+
-+/*
-+ * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
-+ */
-+struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
-+	__acquires(p->pi_lock)
-+	__acquires(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	for (;;) {
-+		raw_spin_lock_irqsave(&p->pi_lock, rf->flags);
-+		rq = task_rq(p);
-+		raw_spin_lock(&rq->lock);
-+		/*
-+		 *	move_queued_task()		task_rq_lock()
-+		 *
-+		 *	ACQUIRE (rq->lock)
-+		 *	[S] ->on_rq = MIGRATING		[L] rq = task_rq()
-+		 *	WMB (__set_task_cpu())		ACQUIRE (rq->lock);
-+		 *	[S] ->cpu = new_cpu		[L] task_rq()
-+		 *					[L] ->on_rq
-+		 *	RELEASE (rq->lock)
-+		 *
-+		 * If we observe the old CPU in task_rq_lock(), the acquire of
-+		 * the old rq->lock will fully serialize against the stores.
-+		 *
-+		 * If we observe the new CPU in task_rq_lock(), the address
-+		 * dependency headed by '[L] rq = task_rq()' and the acquire
-+		 * will pair with the WMB to ensure we then also see migrating.
-+		 */
-+		if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
-+			return rq;
-+		}
-+		raw_spin_unlock(&rq->lock);
-+		raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
-+
-+		while (unlikely(task_on_rq_migrating(p)))
-+			cpu_relax();
-+	}
-+}
-+
-+/*
-+ * RQ-clock updating methods:
-+ */
-+
-+static void update_rq_clock_task(struct rq *rq, s64 delta)
-+{
-+/*
-+ * In theory, the compile should just see 0 here, and optimize out the call
-+ * to sched_rt_avg_update. But I don't trust it...
-+ */
-+	s64 __maybe_unused steal = 0, irq_delta = 0;
-+
-+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-+	irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
-+
-+	/*
-+	 * Since irq_time is only updated on {soft,}irq_exit, we might run into
-+	 * this case when a previous update_rq_clock() happened inside a
-+	 * {soft,}irq region.
-+	 *
-+	 * When this happens, we stop ->clock_task and only update the
-+	 * prev_irq_time stamp to account for the part that fit, so that a next
-+	 * update will consume the rest. This ensures ->clock_task is
-+	 * monotonic.
-+	 *
-+	 * It does however cause some slight miss-attribution of {soft,}irq
-+	 * time, a more accurate solution would be to update the irq_time using
-+	 * the current rq->clock timestamp, except that would require using
-+	 * atomic ops.
-+	 */
-+	if (irq_delta > delta)
-+		irq_delta = delta;
-+
-+	rq->prev_irq_time += irq_delta;
-+	delta -= irq_delta;
-+#endif
-+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
-+	if (static_key_false((&paravirt_steal_rq_enabled))) {
-+		steal = paravirt_steal_clock(cpu_of(rq));
-+		steal -= rq->prev_steal_time_rq;
-+
-+		if (unlikely(steal > delta))
-+			steal = delta;
-+
-+		rq->prev_steal_time_rq += steal;
-+		delta -= steal;
-+	}
-+#endif
-+
-+	rq->clock_task += delta;
-+
-+#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
-+	if ((irq_delta + steal))
-+		update_irq_load_avg(rq, irq_delta + steal);
-+#endif
-+}
-+
-+static inline void update_rq_clock(struct rq *rq)
-+{
-+	s64 delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
-+
-+	if (unlikely(delta <= 0))
-+		return;
-+	rq->clock += delta;
-+	update_rq_clock_task(rq, delta);
-+}
-+
-+/*
-+ * cmpxchg based fetch_or, macro so it works for different integer types
-+ */
-+#define fetch_or(ptr, mask)						\
-+	({								\
-+		typeof(ptr) _ptr = (ptr);				\
-+		typeof(mask) _mask = (mask);				\
-+		typeof(*_ptr) _old, _val = *_ptr;			\
-+									\
-+		for (;;) {						\
-+			_old = cmpxchg(_ptr, _val, _val | _mask);	\
-+			if (_old == _val)				\
-+				break;					\
-+			_val = _old;					\
-+		}							\
-+	_old;								\
-+})
-+
-+#if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
-+/*
-+ * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG,
-+ * this avoids any races wrt polling state changes and thereby avoids
-+ * spurious IPIs.
-+ */
-+static bool set_nr_and_not_polling(struct task_struct *p)
-+{
-+	struct thread_info *ti = task_thread_info(p);
-+	return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG);
-+}
-+
-+/*
-+ * Atomically set TIF_NEED_RESCHED if TIF_POLLING_NRFLAG is set.
-+ *
-+ * If this returns true, then the idle task promises to call
-+ * sched_ttwu_pending() and reschedule soon.
-+ */
-+static bool set_nr_if_polling(struct task_struct *p)
-+{
-+	struct thread_info *ti = task_thread_info(p);
-+	typeof(ti->flags) old, val = READ_ONCE(ti->flags);
-+
-+	for (;;) {
-+		if (!(val & _TIF_POLLING_NRFLAG))
-+			return false;
-+		if (val & _TIF_NEED_RESCHED)
-+			return true;
-+		old = cmpxchg(&ti->flags, val, val | _TIF_NEED_RESCHED);
-+		if (old == val)
-+			break;
-+		val = old;
-+	}
-+	return true;
-+}
-+
-+#else
-+static bool set_nr_and_not_polling(struct task_struct *p)
-+{
-+	set_tsk_need_resched(p);
-+	return true;
-+}
-+
-+#ifdef CONFIG_SMP
-+static bool set_nr_if_polling(struct task_struct *p)
-+{
-+	return false;
-+}
-+#endif
-+#endif
-+
-+#ifdef CONFIG_NO_HZ_FULL
-+/*
-+ * Tick may be needed by tasks in the runqueue depending on their policy and
-+ * requirements. If tick is needed, lets send the target an IPI to kick it out
-+ * of nohz mode if necessary.
-+ */
-+static inline void sched_update_tick_dependency(struct rq *rq)
-+{
-+	int cpu;
-+
-+	if (!tick_nohz_full_enabled())
-+		return;
-+
-+	cpu = cpu_of(rq);
-+
-+	if (!tick_nohz_full_cpu(cpu))
-+		return;
-+
-+	if (rq->nr_running < 2)
-+		tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED);
-+	else
-+		tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
-+}
-+#else /* !CONFIG_NO_HZ_FULL */
-+static inline void sched_update_tick_dependency(struct rq *rq) { }
-+#endif
-+
-+/*
-+ * Add/Remove/Requeue task to/from the runqueue routines
-+ * Context: rq->lock
-+ */
-+static inline void dequeue_task(struct task_struct *p, struct rq *rq, int flags)
-+{
-+	lockdep_assert_held(&rq->lock);
-+
-+	WARN_ONCE(task_rq(p) != rq, "bmq: dequeue task reside on cpu%d from cpu%d\n",
-+		  task_cpu(p), cpu_of(rq));
-+
-+	list_del(&p->bmq_node);
-+	if (list_empty(&rq->queue.heads[p->bmq_idx])) {
-+		clear_bit(p->bmq_idx, rq->queue.bitmap);
-+		update_sched_rq_watermark(rq);
-+	}
-+	--rq->nr_running;
-+#ifdef CONFIG_SMP
-+	if (1 == rq->nr_running)
-+		cpumask_clear_cpu(cpu_of(rq), &sched_rq_pending_mask);
-+#endif
-+
-+	sched_update_tick_dependency(rq);
-+	psi_dequeue(p, flags & DEQUEUE_SLEEP);
-+
-+	sched_info_dequeued(rq, p);
-+}
-+
-+static inline void enqueue_task(struct task_struct *p, struct rq *rq, int flags)
-+{
-+	lockdep_assert_held(&rq->lock);
-+
-+	WARN_ONCE(task_rq(p) != rq, "bmq: enqueue task reside on cpu%d to cpu%d\n",
-+		  task_cpu(p), cpu_of(rq));
-+
-+	p->bmq_idx = task_sched_prio(p);
-+	bmq_add_task(p, &rq->queue, p->bmq_idx);
-+	set_bit(p->bmq_idx, rq->queue.bitmap);
-+	update_sched_rq_watermark(rq);
-+	++rq->nr_running;
-+#ifdef CONFIG_SMP
-+	if (2 == rq->nr_running)
-+		cpumask_set_cpu(cpu_of(rq), &sched_rq_pending_mask);
-+#endif
-+
-+	sched_update_tick_dependency(rq);
-+
-+	sched_info_queued(rq, p);
-+	psi_enqueue(p, flags);
-+
-+	/*
-+	 * If in_iowait is set, the code below may not trigger any cpufreq
-+	 * utilization updates, so do it here explicitly with the IOWAIT flag
-+	 * passed.
-+	 */
-+	if (p->in_iowait)
-+		cpufreq_update_util(rq, SCHED_CPUFREQ_IOWAIT);
-+}
-+
-+static inline void requeue_task(struct task_struct *p, struct rq *rq)
-+{
-+	int idx = task_sched_prio(p);
-+
-+	lockdep_assert_held(&rq->lock);
-+	WARN_ONCE(task_rq(p) != rq, "bmq: cpu[%d] requeue task reside on cpu%d\n",
-+		  cpu_of(rq), task_cpu(p));
-+
-+	list_del(&p->bmq_node);
-+	bmq_add_task(p, &rq->queue, idx);
-+	if (idx != p->bmq_idx) {
-+		if (list_empty(&rq->queue.heads[p->bmq_idx]))
-+			clear_bit(p->bmq_idx, rq->queue.bitmap);
-+		p->bmq_idx = idx;
-+		set_bit(p->bmq_idx, rq->queue.bitmap);
-+		update_sched_rq_watermark(rq);
-+	}
-+}
-+
-+/*
-+ * resched_curr - mark rq's current task 'to be rescheduled now'.
-+ *
-+ * On UP this means the setting of the need_resched flag, on SMP it
-+ * might also involve a cross-CPU call to trigger the scheduler on
-+ * the target CPU.
-+ */
-+void resched_curr(struct rq *rq)
-+{
-+	struct task_struct *curr = rq->curr;
-+	int cpu;
-+
-+	lockdep_assert_held(&rq->lock);
-+
-+	if (test_tsk_need_resched(curr))
-+		return;
-+
-+	cpu = cpu_of(rq);
-+	if (cpu == smp_processor_id()) {
-+		set_tsk_need_resched(curr);
-+		set_preempt_need_resched();
-+		return;
-+	}
-+
-+	if (set_nr_and_not_polling(curr))
-+		smp_send_reschedule(cpu);
-+	else
-+		trace_sched_wake_idle_without_ipi(cpu);
-+}
-+
-+void resched_cpu(int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	raw_spin_lock_irqsave(&rq->lock, flags);
-+	if (cpu_online(cpu) || cpu == smp_processor_id())
-+		resched_curr(cpu_rq(cpu));
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+}
-+
-+static inline void check_preempt_curr(struct rq *rq)
-+{
-+	if (rq_first_bmq_task(rq) != rq->curr)
-+		resched_curr(rq);
-+}
-+
-+#ifdef CONFIG_SCHED_HRTICK
-+/*
-+ * Use HR-timers to deliver accurate preemption points.
-+ */
-+
-+static void hrtick_clear(struct rq *rq)
-+{
-+	if (hrtimer_active(&rq->hrtick_timer))
-+		hrtimer_cancel(&rq->hrtick_timer);
-+}
-+
-+/*
-+ * High-resolution timer tick.
-+ * Runs from hardirq context with interrupts disabled.
-+ */
-+static enum hrtimer_restart hrtick(struct hrtimer *timer)
-+{
-+	struct rq *rq = container_of(timer, struct rq, hrtick_timer);
-+	struct task_struct *p;
-+
-+	WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
-+
-+	raw_spin_lock(&rq->lock);
-+	p = rq->curr;
-+	p->time_slice = 0;
-+	resched_curr(rq);
-+	raw_spin_unlock(&rq->lock);
-+
-+	return HRTIMER_NORESTART;
-+}
-+
-+/*
-+ * Use hrtick when:
-+ *  - enabled by features
-+ *  - hrtimer is actually high res
-+ */
-+static inline int hrtick_enabled(struct rq *rq)
-+{
-+	/**
-+	 * BMQ doesn't support sched_feat yet
-+	if (!sched_feat(HRTICK))
-+		return 0;
-+	*/
-+	if (!cpu_active(cpu_of(rq)))
-+		return 0;
-+	return hrtimer_is_hres_active(&rq->hrtick_timer);
-+}
-+
-+#ifdef CONFIG_SMP
-+
-+static void __hrtick_restart(struct rq *rq)
-+{
-+	struct hrtimer *timer = &rq->hrtick_timer;
-+
-+	hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED_HARD);
-+}
-+
-+/*
-+ * called from hardirq (IPI) context
-+ */
-+static void __hrtick_start(void *arg)
-+{
-+	struct rq *rq = arg;
-+
-+	raw_spin_lock(&rq->lock);
-+	__hrtick_restart(rq);
-+	rq->hrtick_csd_pending = 0;
-+	raw_spin_unlock(&rq->lock);
-+}
-+
-+/*
-+ * Called to set the hrtick timer state.
-+ *
-+ * called with rq->lock held and irqs disabled
-+ */
-+void hrtick_start(struct rq *rq, u64 delay)
-+{
-+	struct hrtimer *timer = &rq->hrtick_timer;
-+	ktime_t time;
-+	s64 delta;
-+
-+	/*
-+	 * Don't schedule slices shorter than 10000ns, that just
-+	 * doesn't make sense and can cause timer DoS.
-+	 */
-+	delta = max_t(s64, delay, 10000LL);
-+	time = ktime_add_ns(timer->base->get_time(), delta);
-+
-+	hrtimer_set_expires(timer, time);
-+
-+	if (rq == this_rq()) {
-+		__hrtick_restart(rq);
-+	} else if (!rq->hrtick_csd_pending) {
-+		smp_call_function_single_async(cpu_of(rq), &rq->hrtick_csd);
-+		rq->hrtick_csd_pending = 1;
-+	}
-+}
-+
-+#else
-+/*
-+ * Called to set the hrtick timer state.
-+ *
-+ * called with rq->lock held and irqs disabled
-+ */
-+void hrtick_start(struct rq *rq, u64 delay)
-+{
-+	/*
-+	 * Don't schedule slices shorter than 10000ns, that just
-+	 * doesn't make sense. Rely on vruntime for fairness.
-+	 */
-+	delay = max_t(u64, delay, 10000LL);
-+	hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay),
-+		      HRTIMER_MODE_REL_PINNED_HARD);
-+}
-+#endif /* CONFIG_SMP */
-+
-+static void hrtick_rq_init(struct rq *rq)
-+{
-+#ifdef CONFIG_SMP
-+	rq->hrtick_csd_pending = 0;
-+
-+	rq->hrtick_csd.flags = 0;
-+	rq->hrtick_csd.func = __hrtick_start;
-+	rq->hrtick_csd.info = rq;
-+#endif
-+
-+	hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
-+	rq->hrtick_timer.function = hrtick;
-+}
-+#else	/* CONFIG_SCHED_HRTICK */
-+static inline int hrtick_enabled(struct rq *rq)
-+{
-+	return 0;
-+}
-+
-+static inline void hrtick_clear(struct rq *rq)
-+{
-+}
-+
-+static inline void hrtick_rq_init(struct rq *rq)
-+{
-+}
-+#endif	/* CONFIG_SCHED_HRTICK */
-+
-+static inline int normal_prio(struct task_struct *p)
-+{
-+	if (task_has_rt_policy(p))
-+		return MAX_RT_PRIO - 1 - p->rt_priority;
-+
-+	return p->static_prio + MAX_PRIORITY_ADJ;
-+}
-+
-+/*
-+ * Calculate the current priority, i.e. the priority
-+ * taken into account by the scheduler. This value might
-+ * be boosted by RT tasks as it will be RT if the task got
-+ * RT-boosted. If not then it returns p->normal_prio.
-+ */
-+static int effective_prio(struct task_struct *p)
-+{
-+	p->normal_prio = normal_prio(p);
-+	/*
-+	 * If we are RT tasks or we were boosted to RT priority,
-+	 * keep the priority unchanged. Otherwise, update priority
-+	 * to the normal priority:
-+	 */
-+	if (!rt_prio(p->prio))
-+		return p->normal_prio;
-+	return p->prio;
-+}
-+
-+/*
-+ * activate_task - move a task to the runqueue.
-+ *
-+ * Context: rq->lock
-+ */
-+static void activate_task(struct task_struct *p, struct rq *rq)
-+{
-+	if (task_contributes_to_load(p))
-+		rq->nr_uninterruptible--;
-+	enqueue_task(p, rq, ENQUEUE_WAKEUP);
-+	p->on_rq = TASK_ON_RQ_QUEUED;
-+	cpufreq_update_util(rq, 0);
-+}
-+
-+/*
-+ * deactivate_task - remove a task from the runqueue.
-+ *
-+ * Context: rq->lock
-+ */
-+static inline void deactivate_task(struct task_struct *p, struct rq *rq)
-+{
-+	if (task_contributes_to_load(p))
-+		rq->nr_uninterruptible++;
-+	dequeue_task(p, rq, DEQUEUE_SLEEP);
-+	p->on_rq = 0;
-+	cpufreq_update_util(rq, 0);
-+}
-+
-+static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
-+{
-+#ifdef CONFIG_SMP
-+	/*
-+	 * After ->cpu is set up to a new value, task_access_lock(p, ...) can be
-+	 * successfully executed on another CPU. We must ensure that updates of
-+	 * per-task data have been completed by this moment.
-+	 */
-+	smp_wmb();
-+
-+#ifdef CONFIG_THREAD_INFO_IN_TASK
-+	WRITE_ONCE(p->cpu, cpu);
-+#else
-+	WRITE_ONCE(task_thread_info(p)->cpu, cpu);
-+#endif
-+#endif
-+}
-+
-+#ifdef CONFIG_SMP
-+void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
-+{
-+#ifdef CONFIG_SCHED_DEBUG
-+	/*
-+	 * We should never call set_task_cpu() on a blocked task,
-+	 * ttwu() will sort out the placement.
-+	 */
-+	WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING &&
-+		     !p->on_rq);
-+#ifdef CONFIG_LOCKDEP
-+	/*
-+	 * The caller should hold either p->pi_lock or rq->lock, when changing
-+	 * a task's CPU. ->pi_lock for waking tasks, rq->lock for runnable tasks.
-+	 *
-+	 * sched_move_task() holds both and thus holding either pins the cgroup,
-+	 * see task_group().
-+	 */
-+	WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) ||
-+				      lockdep_is_held(&task_rq(p)->lock)));
-+#endif
-+	/*
-+	 * Clearly, migrating tasks to offline CPUs is a fairly daft thing.
-+	 */
-+	WARN_ON_ONCE(!cpu_online(new_cpu));
-+#endif
-+	if (task_cpu(p) == new_cpu)
-+		return;
-+	trace_sched_migrate_task(p, new_cpu);
-+	rseq_migrate(p);
-+	perf_event_task_migrate(p);
-+
-+	__set_task_cpu(p, new_cpu);
-+}
-+
-+static inline bool is_per_cpu_kthread(struct task_struct *p)
-+{
-+	return ((p->flags & PF_KTHREAD) && (1 == p->nr_cpus_allowed));
-+}
-+
-+/*
-+ * Per-CPU kthreads are allowed to run on !active && online CPUs, see
-+ * __set_cpus_allowed_ptr() and select_fallback_rq().
-+ */
-+static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
-+{
-+	if (!cpumask_test_cpu(cpu, p->cpus_ptr))
-+		return false;
-+
-+	if (is_per_cpu_kthread(p))
-+		return cpu_online(cpu);
-+
-+	return cpu_active(cpu);
-+}
-+
-+/*
-+ * This is how migration works:
-+ *
-+ * 1) we invoke migration_cpu_stop() on the target CPU using
-+ *    stop_one_cpu().
-+ * 2) stopper starts to run (implicitly forcing the migrated thread
-+ *    off the CPU)
-+ * 3) it checks whether the migrated task is still in the wrong runqueue.
-+ * 4) if it's in the wrong runqueue then the migration thread removes
-+ *    it and puts it into the right queue.
-+ * 5) stopper completes and stop_one_cpu() returns and the migration
-+ *    is done.
-+ */
-+
-+/*
-+ * move_queued_task - move a queued task to new rq.
-+ *
-+ * Returns (locked) new rq. Old rq's lock is released.
-+ */
-+static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int
-+				   new_cpu)
-+{
-+	lockdep_assert_held(&rq->lock);
-+
-+	WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING);
-+	dequeue_task(p, rq, 0);
-+	set_task_cpu(p, new_cpu);
-+	raw_spin_unlock(&rq->lock);
-+
-+	rq = cpu_rq(new_cpu);
-+
-+	raw_spin_lock(&rq->lock);
-+	BUG_ON(task_cpu(p) != new_cpu);
-+	enqueue_task(p, rq, 0);
-+	p->on_rq = TASK_ON_RQ_QUEUED;
-+	check_preempt_curr(rq);
-+
-+	return rq;
-+}
-+
-+struct migration_arg {
-+	struct task_struct *task;
-+	int dest_cpu;
-+};
-+
-+/*
-+ * Move (not current) task off this CPU, onto the destination CPU. We're doing
-+ * this because either it can't run here any more (set_cpus_allowed()
-+ * away from this CPU, or CPU going down), or because we're
-+ * attempting to rebalance this task on exec (sched_exec).
-+ *
-+ * So we race with normal scheduler movements, but that's OK, as long
-+ * as the task is no longer on this CPU.
-+ */
-+static struct rq *__migrate_task(struct rq *rq, struct task_struct *p, int
-+				 dest_cpu)
-+{
-+	/* Affinity changed (again). */
-+	if (!is_cpu_allowed(p, dest_cpu))
-+		return rq;
-+
-+	update_rq_clock(rq);
-+	return move_queued_task(rq, p, dest_cpu);
-+}
-+
-+/*
-+ * migration_cpu_stop - this will be executed by a highprio stopper thread
-+ * and performs thread migration by bumping thread off CPU then
-+ * 'pushing' onto another runqueue.
-+ */
-+static int migration_cpu_stop(void *data)
-+{
-+	struct migration_arg *arg = data;
-+	struct task_struct *p = arg->task;
-+	struct rq *rq = this_rq();
-+
-+	/*
-+	 * The original target CPU might have gone down and we might
-+	 * be on another CPU but it doesn't matter.
-+	 */
-+	local_irq_disable();
-+
-+	raw_spin_lock(&p->pi_lock);
-+	raw_spin_lock(&rq->lock);
-+	/*
-+	 * If task_rq(p) != rq, it cannot be migrated here, because we're
-+	 * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because
-+	 * we're holding p->pi_lock.
-+	 */
-+	if (task_rq(p) == rq && task_on_rq_queued(p))
-+		rq = __migrate_task(rq, p, arg->dest_cpu);
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock(&p->pi_lock);
-+
-+	local_irq_enable();
-+	return 0;
-+}
-+
-+static inline void
-+set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	cpumask_copy(&p->cpus_mask, new_mask);
-+	p->nr_cpus_allowed = cpumask_weight(new_mask);
-+}
-+
-+void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	set_cpus_allowed_common(p, new_mask);
-+}
-+#endif
-+
-+/* Enter with rq lock held. We know p is on the local CPU */
-+static inline void __set_tsk_resched(struct task_struct *p)
-+{
-+	set_tsk_need_resched(p);
-+	set_preempt_need_resched();
-+}
-+
-+/**
-+ * task_curr - is this task currently executing on a CPU?
-+ * @p: the task in question.
-+ *
-+ * Return: 1 if the task is currently executing. 0 otherwise.
-+ */
-+inline int task_curr(const struct task_struct *p)
-+{
-+	return cpu_curr(task_cpu(p)) == p;
-+}
-+
-+#ifdef CONFIG_SMP
-+/*
-+ * wait_task_inactive - wait for a thread to unschedule.
-+ *
-+ * If @match_state is nonzero, it's the @p->state value just checked and
-+ * not expected to change.  If it changes, i.e. @p might have woken up,
-+ * then return zero.  When we succeed in waiting for @p to be off its CPU,
-+ * we return a positive number (its total switch count).  If a second call
-+ * a short while later returns the same number, the caller can be sure that
-+ * @p has remained unscheduled the whole time.
-+ *
-+ * The caller must ensure that the task *will* unschedule sometime soon,
-+ * else this function might spin for a *long* time. This function can't
-+ * be called with interrupts off, or it may introduce deadlock with
-+ * smp_call_function() if an IPI is sent by the same process we are
-+ * waiting to become inactive.
-+ */
-+unsigned long wait_task_inactive(struct task_struct *p, long match_state)
-+{
-+	unsigned long flags;
-+	bool running, on_rq;
-+	unsigned long ncsw;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+
-+	for (;;) {
-+		rq = task_rq(p);
-+
-+		/*
-+		 * If the task is actively running on another CPU
-+		 * still, just relax and busy-wait without holding
-+		 * any locks.
-+		 *
-+		 * NOTE! Since we don't hold any locks, it's not
-+		 * even sure that "rq" stays as the right runqueue!
-+		 * But we don't care, since this will return false
-+		 * if the runqueue has changed and p is actually now
-+		 * running somewhere else!
-+		 */
-+		while (task_running(p) && p == rq->curr) {
-+			if (match_state && unlikely(p->state != match_state))
-+				return 0;
-+			cpu_relax();
-+		}
-+
-+		/*
-+		 * Ok, time to look more closely! We need the rq
-+		 * lock now, to be *sure*. If we're wrong, we'll
-+		 * just go back and repeat.
-+		 */
-+		task_access_lock_irqsave(p, &lock, &flags);
-+		trace_sched_wait_task(p);
-+		running = task_running(p);
-+		on_rq = p->on_rq;
-+		ncsw = 0;
-+		if (!match_state || p->state == match_state)
-+			ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
-+		task_access_unlock_irqrestore(p, lock, &flags);
-+
-+		/*
-+		 * If it changed from the expected state, bail out now.
-+		 */
-+		if (unlikely(!ncsw))
-+			break;
-+
-+		/*
-+		 * Was it really running after all now that we
-+		 * checked with the proper locks actually held?
-+		 *
-+		 * Oops. Go back and try again..
-+		 */
-+		if (unlikely(running)) {
-+			cpu_relax();
-+			continue;
-+		}
-+
-+		/*
-+		 * It's not enough that it's not actively running,
-+		 * it must be off the runqueue _entirely_, and not
-+		 * preempted!
-+		 *
-+		 * So if it was still runnable (but just not actively
-+		 * running right now), it's preempted, and we should
-+		 * yield - it could be a while.
-+		 */
-+		if (unlikely(on_rq)) {
-+			ktime_t to = NSEC_PER_SEC / HZ;
-+
-+			set_current_state(TASK_UNINTERRUPTIBLE);
-+			schedule_hrtimeout(&to, HRTIMER_MODE_REL);
-+			continue;
-+		}
-+
-+		/*
-+		 * Ahh, all good. It wasn't running, and it wasn't
-+		 * runnable, which means that it will never become
-+		 * running in the future either. We're all done!
-+		 */
-+		break;
-+	}
-+
-+	return ncsw;
-+}
-+
-+/***
-+ * kick_process - kick a running thread to enter/exit the kernel
-+ * @p: the to-be-kicked thread
-+ *
-+ * Cause a process which is running on another CPU to enter
-+ * kernel-mode, without any delay. (to get signals handled.)
-+ *
-+ * NOTE: this function doesn't have to take the runqueue lock,
-+ * because all it wants to ensure is that the remote task enters
-+ * the kernel. If the IPI races and the task has been migrated
-+ * to another CPU then no harm is done and the purpose has been
-+ * achieved as well.
-+ */
-+void kick_process(struct task_struct *p)
-+{
-+	int cpu;
-+
-+	preempt_disable();
-+	cpu = task_cpu(p);
-+	if ((cpu != smp_processor_id()) && task_curr(p))
-+		smp_send_reschedule(cpu);
-+	preempt_enable();
-+}
-+EXPORT_SYMBOL_GPL(kick_process);
-+
-+/*
-+ * ->cpus_ptr is protected by both rq->lock and p->pi_lock
-+ *
-+ * A few notes on cpu_active vs cpu_online:
-+ *
-+ *  - cpu_active must be a subset of cpu_online
-+ *
-+ *  - on CPU-up we allow per-CPU kthreads on the online && !active CPU,
-+ *    see __set_cpus_allowed_ptr(). At this point the newly online
-+ *    CPU isn't yet part of the sched domains, and balancing will not
-+ *    see it.
-+ *
-+ *  - on cpu-down we clear cpu_active() to mask the sched domains and
-+ *    avoid the load balancer to place new tasks on the to be removed
-+ *    CPU. Existing tasks will remain running there and will be taken
-+ *    off.
-+ *
-+ * This means that fallback selection must not select !active CPUs.
-+ * And can assume that any active CPU must be online. Conversely
-+ * select_task_rq() below may allow selection of !active CPUs in order
-+ * to satisfy the above rules.
-+ */
-+static int select_fallback_rq(int cpu, struct task_struct *p)
-+{
-+	int nid = cpu_to_node(cpu);
-+	const struct cpumask *nodemask = NULL;
-+	enum { cpuset, possible, fail } state = cpuset;
-+	int dest_cpu;
-+
-+	/*
-+	 * If the node that the CPU is on has been offlined, cpu_to_node()
-+	 * will return -1. There is no CPU on the node, and we should
-+	 * select the CPU on the other node.
-+	 */
-+	if (nid != -1) {
-+		nodemask = cpumask_of_node(nid);
-+
-+		/* Look for allowed, online CPU in same node. */
-+		for_each_cpu(dest_cpu, nodemask) {
-+			if (!cpu_active(dest_cpu))
-+				continue;
-+			if (cpumask_test_cpu(dest_cpu, p->cpus_ptr))
-+				return dest_cpu;
-+		}
-+	}
-+
-+	for (;;) {
-+		/* Any allowed, online CPU? */
-+		for_each_cpu(dest_cpu, p->cpus_ptr) {
-+			if (!is_cpu_allowed(p, dest_cpu))
-+				continue;
-+			goto out;
-+		}
-+
-+		/* No more Mr. Nice Guy. */
-+		switch (state) {
-+		case cpuset:
-+			if (IS_ENABLED(CONFIG_CPUSETS)) {
-+				cpuset_cpus_allowed_fallback(p);
-+				state = possible;
-+				break;
-+			}
-+			/* Fall-through */
-+		case possible:
-+			do_set_cpus_allowed(p, cpu_possible_mask);
-+			state = fail;
-+			break;
-+
-+		case fail:
-+			BUG();
-+			break;
-+		}
-+	}
-+
-+out:
-+	if (state != cpuset) {
-+		/*
-+		 * Don't tell them about moving exiting tasks or
-+		 * kernel threads (both mm NULL), since they never
-+		 * leave kernel.
-+		 */
-+		if (p->mm && printk_ratelimit()) {
-+			printk_deferred("process %d (%s) no longer affine to cpu%d\n",
-+					task_pid_nr(p), p->comm, cpu);
-+		}
-+	}
-+
-+	return dest_cpu;
-+}
-+
-+static inline int __best_mask_cpu(int cpu, const cpumask_t *cpumask)
-+{
-+	cpumask_t *mask = &(per_cpu(sched_cpu_affinity_masks, cpu)[0]);
-+	while ((cpu = cpumask_any_and(cpumask, mask)) >= nr_cpu_ids)
-+		mask++;
-+	return cpu;
-+}
-+
-+static inline int best_mask_cpu(int cpu, const cpumask_t *cpumask)
-+{
-+	return cpumask_test_cpu(cpu, cpumask)? cpu:__best_mask_cpu(cpu, cpumask);
-+}
-+
-+/*
-+ * wake flags
-+ */
-+#define WF_SYNC		0x01		/* waker goes to sleep after wakeup */
-+#define WF_FORK		0x02		/* child wakeup after fork */
-+#define WF_MIGRATED	0x04		/* internal use, task got migrated */
-+
-+static inline int select_task_rq(struct task_struct *p)
-+{
-+	cpumask_t chk_mask, tmp;
-+
-+	if (unlikely(!cpumask_and(&chk_mask, p->cpus_ptr, cpu_online_mask)))
-+		return select_fallback_rq(task_cpu(p), p);
-+
-+	if (
-+#ifdef CONFIG_SCHED_SMT
-+	    cpumask_and(&tmp, &chk_mask, &sched_sg_idle_mask) ||
-+#endif
-+	    cpumask_and(&tmp, &chk_mask, &sched_rq_watermark[IDLE_WM]) ||
-+	    cpumask_and(&tmp, &chk_mask,
-+			&sched_rq_watermark[task_sched_prio(p) + 1]))
-+		return best_mask_cpu(task_cpu(p), &tmp);
-+
-+	return best_mask_cpu(task_cpu(p), &chk_mask);
-+}
-+#else /* CONFIG_SMP */
-+static inline int select_task_rq(struct task_struct *p)
-+{
-+	return 0;
-+}
-+#endif /* CONFIG_SMP */
-+
-+static void
-+ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
-+{
-+	struct rq *rq;
-+
-+	if (!schedstat_enabled())
-+		return;
-+
-+	rq= this_rq();
-+
-+#ifdef CONFIG_SMP
-+	if (cpu == rq->cpu)
-+		__schedstat_inc(rq->ttwu_local);
-+	else {
-+		/** BMQ ToDo:
-+		 * How to do ttwu_wake_remote
-+		 */
-+	}
-+#endif /* CONFIG_SMP */
-+
-+	__schedstat_inc(rq->ttwu_count);
-+}
-+
-+/*
-+ * Mark the task runnable and perform wakeup-preemption.
-+ */
-+static inline void
-+ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
-+{
-+	p->state = TASK_RUNNING;
-+	trace_sched_wakeup(p);
-+}
-+
-+static inline void
-+ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags)
-+{
-+#ifdef CONFIG_SMP
-+	if (p->sched_contributes_to_load)
-+		rq->nr_uninterruptible--;
-+#endif
-+
-+	activate_task(p, rq);
-+	ttwu_do_wakeup(rq, p, 0);
-+}
-+
-+static inline void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	raw_spin_lock(&rq->lock);
-+	update_rq_clock(rq);
-+	ttwu_do_activate(rq, p, wake_flags);
-+	check_preempt_curr(rq);
-+	raw_spin_unlock(&rq->lock);
-+}
-+
-+static int ttwu_remote(struct task_struct *p, int wake_flags)
-+{
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+	int ret = 0;
-+
-+	rq = __task_access_lock(p, &lock);
-+	if (task_on_rq_queued(p)) {
-+		ttwu_do_wakeup(rq, p, wake_flags);
-+		ret = 1;
-+	}
-+	__task_access_unlock(p, lock);
-+
-+	return ret;
-+}
-+
-+/*
-+ * Notes on Program-Order guarantees on SMP systems.
-+ *
-+ *  MIGRATION
-+ *
-+ * The basic program-order guarantee on SMP systems is that when a task [t]
-+ * migrates, all its activity on its old CPU [c0] happens-before any subsequent
-+ * execution on its new CPU [c1].
-+ *
-+ * For migration (of runnable tasks) this is provided by the following means:
-+ *
-+ *  A) UNLOCK of the rq(c0)->lock scheduling out task t
-+ *  B) migration for t is required to synchronize *both* rq(c0)->lock and
-+ *     rq(c1)->lock (if not at the same time, then in that order).
-+ *  C) LOCK of the rq(c1)->lock scheduling in task
-+ *
-+ * Transitivity guarantees that B happens after A and C after B.
-+ * Note: we only require RCpc transitivity.
-+ * Note: the CPU doing B need not be c0 or c1
-+ *
-+ * Example:
-+ *
-+ *   CPU0            CPU1            CPU2
-+ *
-+ *   LOCK rq(0)->lock
-+ *   sched-out X
-+ *   sched-in Y
-+ *   UNLOCK rq(0)->lock
-+ *
-+ *                                   LOCK rq(0)->lock // orders against CPU0
-+ *                                   dequeue X
-+ *                                   UNLOCK rq(0)->lock
-+ *
-+ *                                   LOCK rq(1)->lock
-+ *                                   enqueue X
-+ *                                   UNLOCK rq(1)->lock
-+ *
-+ *                   LOCK rq(1)->lock // orders against CPU2
-+ *                   sched-out Z
-+ *                   sched-in X
-+ *                   UNLOCK rq(1)->lock
-+ *
-+ *
-+ *  BLOCKING -- aka. SLEEP + WAKEUP
-+ *
-+ * For blocking we (obviously) need to provide the same guarantee as for
-+ * migration. However the means are completely different as there is no lock
-+ * chain to provide order. Instead we do:
-+ *
-+ *   1) smp_store_release(X->on_cpu, 0)
-+ *   2) smp_cond_load_acquire(!X->on_cpu)
-+ *
-+ * Example:
-+ *
-+ *   CPU0 (schedule)  CPU1 (try_to_wake_up) CPU2 (schedule)
-+ *
-+ *   LOCK rq(0)->lock LOCK X->pi_lock
-+ *   dequeue X
-+ *   sched-out X
-+ *   smp_store_release(X->on_cpu, 0);
-+ *
-+ *                    smp_cond_load_acquire(&X->on_cpu, !VAL);
-+ *                    X->state = WAKING
-+ *                    set_task_cpu(X,2)
-+ *
-+ *                    LOCK rq(2)->lock
-+ *                    enqueue X
-+ *                    X->state = RUNNING
-+ *                    UNLOCK rq(2)->lock
-+ *
-+ *                                          LOCK rq(2)->lock // orders against CPU1
-+ *                                          sched-out Z
-+ *                                          sched-in X
-+ *                                          UNLOCK rq(2)->lock
-+ *
-+ *                    UNLOCK X->pi_lock
-+ *   UNLOCK rq(0)->lock
-+ *
-+ *
-+ * However; for wakeups there is a second guarantee we must provide, namely we
-+ * must observe the state that lead to our wakeup. That is, not only must our
-+ * task observe its own prior state, it must also observe the stores prior to
-+ * its wakeup.
-+ *
-+ * This means that any means of doing remote wakeups must order the CPU doing
-+ * the wakeup against the CPU the task is going to end up running on. This,
-+ * however, is already required for the regular Program-Order guarantee above,
-+ * since the waking CPU is the one issueing the ACQUIRE (smp_cond_load_acquire).
-+ *
-+ */
-+
-+/***
-+ * try_to_wake_up - wake up a thread
-+ * @p: the thread to be awakened
-+ * @state: the mask of task states that can be woken
-+ * @wake_flags: wake modifier flags (WF_*)
-+ *
-+ * Put it on the run-queue if it's not already there. The "current"
-+ * thread is always on the run-queue (except when the actual
-+ * re-schedule is in progress), and as such you're allowed to do
-+ * the simpler "current->state = TASK_RUNNING" to mark yourself
-+ * runnable without the overhead of this.
-+ *
-+ * Return: %true if @p was woken up, %false if it was already running.
-+ * or @state didn't match @p's state.
-+ */
-+static int try_to_wake_up(struct task_struct *p, unsigned int state,
-+			  int wake_flags)
-+{
-+	unsigned long flags;
-+	int cpu, success = 0;
-+
-+	preempt_disable();
-+	if (p == current) {
-+		/*
-+		 * We're waking current, this means 'p->on_rq' and 'task_cpu(p)
-+		 * == smp_processor_id()'. Together this means we can special
-+		 * case the whole 'p->on_rq && ttwu_remote()' case below
-+		 * without taking any locks.
-+		 *
-+		 * In particular:
-+		 *  - we rely on Program-Order guarantees for all the ordering,
-+		 *  - we're serialized against set_special_state() by virtue of
-+		 *    it disabling IRQs (this allows not taking ->pi_lock).
-+		 */
-+		if (!(p->state & state))
-+			goto out;
-+
-+		success = 1;
-+		cpu = task_cpu(p);
-+		trace_sched_waking(p);
-+		p->state = TASK_RUNNING;
-+		trace_sched_wakeup(p);
-+		goto out;
-+	}
-+
-+	/*
-+	 * If we are going to wake up a thread waiting for CONDITION we
-+	 * need to ensure that CONDITION=1 done by the caller can not be
-+	 * reordered with p->state check below. This pairs with mb() in
-+	 * set_current_state() the waiting thread does.
-+	 */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	smp_mb__after_spinlock();
-+	if (!(p->state & state))
-+		goto unlock;
-+
-+	trace_sched_waking(p);
-+
-+	/* We're going to change ->state: */
-+	success = 1;
-+	cpu = task_cpu(p);
-+
-+	/*
-+	 * Ensure we load p->on_rq _after_ p->state, otherwise it would
-+	 * be possible to, falsely, observe p->on_rq == 0 and get stuck
-+	 * in smp_cond_load_acquire() below.
-+	 *
-+	 * sched_ttwu_pending()			try_to_wake_up()
-+	 *   STORE p->on_rq = 1			  LOAD p->state
-+	 *   UNLOCK rq->lock
-+	 *
-+	 * __schedule() (switch to task 'p')
-+	 *   LOCK rq->lock			  smp_rmb();
-+	 *   smp_mb__after_spinlock();
-+	 *   UNLOCK rq->lock
-+	 *
-+	 * [task p]
-+	 *   STORE p->state = UNINTERRUPTIBLE	  LOAD p->on_rq
-+	 *
-+	 * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
-+	 * __schedule().  See the comment for smp_mb__after_spinlock().
-+	 */
-+	smp_rmb();
-+	if (p->on_rq && ttwu_remote(p, wake_flags))
-+		goto unlock;
-+
-+#ifdef CONFIG_SMP
-+	/*
-+	 * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
-+	 * possible to, falsely, observe p->on_cpu == 0.
-+	 *
-+	 * One must be running (->on_cpu == 1) in order to remove oneself
-+	 * from the runqueue.
-+	 *
-+	 * __schedule() (switch to task 'p')	try_to_wake_up()
-+	 *   STORE p->on_cpu = 1		  LOAD p->on_rq
-+	 *   UNLOCK rq->lock
-+	 *
-+	 * __schedule() (put 'p' to sleep)
-+	 *   LOCK rq->lock			  smp_rmb();
-+	 *   smp_mb__after_spinlock();
-+	 *   STORE p->on_rq = 0			  LOAD p->on_cpu
-+	 *
-+	 * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
-+	 * __schedule().  See the comment for smp_mb__after_spinlock().
-+	 */
-+	smp_rmb();
-+
-+	/*
-+	 * If the owning (remote) CPU is still in the middle of schedule() with
-+	 * this task as prev, wait until its done referencing the task.
-+	 *
-+	 * Pairs with the smp_store_release() in finish_task().
-+	 *
-+	 * This ensures that tasks getting woken will be fully ordered against
-+	 * their previous state and preserve Program Order.
-+	 */
-+	smp_cond_load_acquire(&p->on_cpu, !VAL);
-+
-+	p->sched_contributes_to_load = !!task_contributes_to_load(p);
-+	p->state = TASK_WAKING;
-+
-+	if (p->in_iowait) {
-+		delayacct_blkio_end(p);
-+		atomic_dec(&task_rq(p)->nr_iowait);
-+	}
-+
-+	if(cpu_rq(smp_processor_id())->clock - p->last_ran > SCHED_TIMESLICE_NS)
-+		boost_task(p);
-+
-+	cpu = select_task_rq(p);
-+
-+	if (cpu != task_cpu(p)) {
-+		wake_flags |= WF_MIGRATED;
-+		psi_ttwu_dequeue(p);
-+		set_task_cpu(p, cpu);
-+	}
-+#else /* CONFIG_SMP */
-+	if (p->in_iowait) {
-+		delayacct_blkio_end(p);
-+		atomic_dec(&task_rq(p)->nr_iowait);
-+	}
-+#endif /* CONFIG_SMP */
-+
-+	ttwu_queue(p, cpu, wake_flags);
-+unlock:
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+out:
-+	if (success)
-+		ttwu_stat(p, cpu, wake_flags);
-+	preempt_enable();
-+
-+	return success;
-+}
-+
-+/**
-+ * wake_up_process - Wake up a specific process
-+ * @p: The process to be woken up.
-+ *
-+ * Attempt to wake up the nominated process and move it to the set of runnable
-+ * processes.
-+ *
-+ * Return: 1 if the process was woken up, 0 if it was already running.
-+ *
-+ * This function executes a full memory barrier before accessing the task state.
-+ */
-+int wake_up_process(struct task_struct *p)
-+{
-+	return try_to_wake_up(p, TASK_NORMAL, 0);
-+}
-+EXPORT_SYMBOL(wake_up_process);
-+
-+int wake_up_state(struct task_struct *p, unsigned int state)
-+{
-+	return try_to_wake_up(p, state, 0);
-+}
-+
-+/*
-+ * Perform scheduler related setup for a newly forked process p.
-+ * p is forked by current.
-+ */
-+int sched_fork(unsigned long __maybe_unused clone_flags, struct task_struct *p)
-+{
-+	unsigned long flags;
-+	int cpu = get_cpu();
-+	struct rq *rq = this_rq();
-+
-+#ifdef CONFIG_PREEMPT_NOTIFIERS
-+	INIT_HLIST_HEAD(&p->preempt_notifiers);
-+#endif
-+	/* Should be reset in fork.c but done here for ease of BMQ patching */
-+	p->on_cpu =
-+	p->on_rq =
-+	p->utime =
-+	p->stime =
-+	p->sched_time = 0;
-+
-+#ifdef CONFIG_COMPACTION
-+	p->capture_control = NULL;
-+#endif
-+
-+	/*
-+	 * We mark the process as NEW here. This guarantees that
-+	 * nobody will actually run it, and a signal or other external
-+	 * event cannot wake it up and insert it on the runqueue either.
-+	 */
-+	p->state = TASK_NEW;
-+
-+	/*
-+	 * Make sure we do not leak PI boosting priority to the child.
-+	 */
-+	p->prio = current->normal_prio;
-+
-+	/*
-+	 * Revert to default priority/policy on fork if requested.
-+	 */
-+	if (unlikely(p->sched_reset_on_fork)) {
-+		if (task_has_rt_policy(p)) {
-+			p->policy = SCHED_NORMAL;
-+			p->static_prio = NICE_TO_PRIO(0);
-+			p->rt_priority = 0;
-+		} else if (PRIO_TO_NICE(p->static_prio) < 0)
-+			p->static_prio = NICE_TO_PRIO(0);
-+
-+		p->prio = p->normal_prio = normal_prio(p);
-+
-+		/*
-+		 * We don't need the reset flag anymore after the fork. It has
-+		 * fulfilled its duty:
-+		 */
-+		p->sched_reset_on_fork = 0;
-+	}
-+
-+	p->boost_prio = (p->boost_prio < 0) ?
-+		p->boost_prio + MAX_PRIORITY_ADJ : MAX_PRIORITY_ADJ;
-+	/*
-+	 * Share the timeslice between parent and child, thus the
-+	 * total amount of pending timeslices in the system doesn't change,
-+	 * resulting in more scheduling fairness.
-+	 */
-+	raw_spin_lock_irqsave(&rq->lock, flags);
-+	rq->curr->time_slice /= 2;
-+	p->time_slice = rq->curr->time_slice;
-+#ifdef CONFIG_SCHED_HRTICK
-+	hrtick_start(rq, rq->curr->time_slice);
-+#endif
-+
-+	if (p->time_slice < RESCHED_NS) {
-+		p->time_slice = SCHED_TIMESLICE_NS;
-+		resched_curr(rq);
-+	}
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+
-+	/*
-+	 * The child is not yet in the pid-hash so no cgroup attach races,
-+	 * and the cgroup is pinned to this child due to cgroup_fork()
-+	 * is ran before sched_fork().
-+	 *
-+	 * Silence PROVE_RCU.
-+	 */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	/*
-+	 * We're setting the CPU for the first time, we don't migrate,
-+	 * so use __set_task_cpu().
-+	 */
-+	__set_task_cpu(p, cpu);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+
-+#ifdef CONFIG_SCHED_INFO
-+	if (unlikely(sched_info_on()))
-+		memset(&p->sched_info, 0, sizeof(p->sched_info));
-+#endif
-+	init_task_preempt_count(p);
-+
-+	put_cpu();
-+	return 0;
-+}
-+
-+#ifdef CONFIG_SCHEDSTATS
-+
-+DEFINE_STATIC_KEY_FALSE(sched_schedstats);
-+static bool __initdata __sched_schedstats = false;
-+
-+static void set_schedstats(bool enabled)
-+{
-+	if (enabled)
-+		static_branch_enable(&sched_schedstats);
-+	else
-+		static_branch_disable(&sched_schedstats);
-+}
-+
-+void force_schedstat_enabled(void)
-+{
-+	if (!schedstat_enabled()) {
-+		pr_info("kernel profiling enabled schedstats, disable via kernel.sched_schedstats.\n");
-+		static_branch_enable(&sched_schedstats);
-+	}
-+}
-+
-+static int __init setup_schedstats(char *str)
-+{
-+	int ret = 0;
-+	if (!str)
-+		goto out;
-+
-+	/*
-+	 * This code is called before jump labels have been set up, so we can't
-+	 * change the static branch directly just yet.  Instead set a temporary
-+	 * variable so init_schedstats() can do it later.
-+	 */
-+	if (!strcmp(str, "enable")) {
-+		__sched_schedstats = true;
-+		ret = 1;
-+	} else if (!strcmp(str, "disable")) {
-+		__sched_schedstats = false;
-+		ret = 1;
-+	}
-+out:
-+	if (!ret)
-+		pr_warn("Unable to parse schedstats=\n");
-+
-+	return ret;
-+}
-+__setup("schedstats=", setup_schedstats);
-+
-+static void __init init_schedstats(void)
-+{
-+	set_schedstats(__sched_schedstats);
-+}
-+
-+#ifdef CONFIG_PROC_SYSCTL
-+int sysctl_schedstats(struct ctl_table *table, int write,
-+			 void __user *buffer, size_t *lenp, loff_t *ppos)
-+{
-+	struct ctl_table t;
-+	int err;
-+	int state = static_branch_likely(&sched_schedstats);
-+
-+	if (write && !capable(CAP_SYS_ADMIN))
-+		return -EPERM;
-+
-+	t = *table;
-+	t.data = &state;
-+	err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
-+	if (err < 0)
-+		return err;
-+	if (write)
-+		set_schedstats(state);
-+	return err;
-+}
-+#endif /* CONFIG_PROC_SYSCTL */
-+#else  /* !CONFIG_SCHEDSTATS */
-+static inline void init_schedstats(void) {}
-+#endif /* CONFIG_SCHEDSTATS */
-+
-+/*
-+ * wake_up_new_task - wake up a newly created task for the first time.
-+ *
-+ * This function will do some initial scheduler statistics housekeeping
-+ * that must be done for every newly created context, then puts the task
-+ * on the runqueue and wakes it.
-+ */
-+void wake_up_new_task(struct task_struct *p)
-+{
-+	unsigned long flags;
-+	struct rq *rq;
-+
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+
-+	p->state = TASK_RUNNING;
-+
-+	rq = cpu_rq(select_task_rq(p));
-+#ifdef CONFIG_SMP
-+	/*
-+	 * Fork balancing, do it here and not earlier because:
-+	 * - cpus_ptr can change in the fork path
-+	 * - any previously selected CPU might disappear through hotplug
-+	 * Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq,
-+	 * as we're not fully set-up yet.
-+	 */
-+	__set_task_cpu(p, cpu_of(rq));
-+#endif
-+
-+	raw_spin_lock(&rq->lock);
-+
-+	update_rq_clock(rq);
-+	activate_task(p, rq);
-+	trace_sched_wakeup_new(p);
-+	check_preempt_curr(rq);
-+
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+}
-+
-+#ifdef CONFIG_PREEMPT_NOTIFIERS
-+
-+static DEFINE_STATIC_KEY_FALSE(preempt_notifier_key);
-+
-+void preempt_notifier_inc(void)
-+{
-+	static_branch_inc(&preempt_notifier_key);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_inc);
-+
-+void preempt_notifier_dec(void)
-+{
-+	static_branch_dec(&preempt_notifier_key);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_dec);
-+
-+/**
-+ * preempt_notifier_register - tell me when current is being preempted & rescheduled
-+ * @notifier: notifier struct to register
-+ */
-+void preempt_notifier_register(struct preempt_notifier *notifier)
-+{
-+	if (!static_branch_unlikely(&preempt_notifier_key))
-+		WARN(1, "registering preempt_notifier while notifiers disabled\n");
-+
-+	hlist_add_head(&notifier->link, &current->preempt_notifiers);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_register);
-+
-+/**
-+ * preempt_notifier_unregister - no longer interested in preemption notifications
-+ * @notifier: notifier struct to unregister
-+ *
-+ * This is *not* safe to call from within a preemption notifier.
-+ */
-+void preempt_notifier_unregister(struct preempt_notifier *notifier)
-+{
-+	hlist_del(&notifier->link);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_unregister);
-+
-+static void __fire_sched_in_preempt_notifiers(struct task_struct *curr)
-+{
-+	struct preempt_notifier *notifier;
-+
-+	hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
-+		notifier->ops->sched_in(notifier, raw_smp_processor_id());
-+}
-+
-+static __always_inline void fire_sched_in_preempt_notifiers(struct task_struct *curr)
-+{
-+	if (static_branch_unlikely(&preempt_notifier_key))
-+		__fire_sched_in_preempt_notifiers(curr);
-+}
-+
-+static void
-+__fire_sched_out_preempt_notifiers(struct task_struct *curr,
-+				   struct task_struct *next)
-+{
-+	struct preempt_notifier *notifier;
-+
-+	hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
-+		notifier->ops->sched_out(notifier, next);
-+}
-+
-+static __always_inline void
-+fire_sched_out_preempt_notifiers(struct task_struct *curr,
-+				 struct task_struct *next)
-+{
-+	if (static_branch_unlikely(&preempt_notifier_key))
-+		__fire_sched_out_preempt_notifiers(curr, next);
-+}
-+
-+#else /* !CONFIG_PREEMPT_NOTIFIERS */
-+
-+static inline void fire_sched_in_preempt_notifiers(struct task_struct *curr)
-+{
-+}
-+
-+static inline void
-+fire_sched_out_preempt_notifiers(struct task_struct *curr,
-+				 struct task_struct *next)
-+{
-+}
-+
-+#endif /* CONFIG_PREEMPT_NOTIFIERS */
-+
-+static inline void prepare_task(struct task_struct *next)
-+{
-+	/*
-+	 * Claim the task as running, we do this before switching to it
-+	 * such that any running task will have this set.
-+	 */
-+	next->on_cpu = 1;
-+}
-+
-+static inline void finish_task(struct task_struct *prev)
-+{
-+#ifdef CONFIG_SMP
-+	/*
-+	 * After ->on_cpu is cleared, the task can be moved to a different CPU.
-+	 * We must ensure this doesn't happen until the switch is completely
-+	 * finished.
-+	 *
-+	 * In particular, the load of prev->state in finish_task_switch() must
-+	 * happen before this.
-+	 *
-+	 * Pairs with the smp_cond_load_acquire() in try_to_wake_up().
-+	 */
-+	smp_store_release(&prev->on_cpu, 0);
-+#else
-+	prev->on_cpu = 0;
-+#endif
-+}
-+
-+static inline void
-+prepare_lock_switch(struct rq *rq, struct task_struct *next)
-+{
-+	/*
-+	 * Since the runqueue lock will be released by the next
-+	 * task (which is an invalid locking op but in the case
-+	 * of the scheduler it's an obvious special-case), so we
-+	 * do an early lockdep release here:
-+	 */
-+	spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
-+#ifdef CONFIG_DEBUG_SPINLOCK
-+	/* this is a valid case when another task releases the spinlock */
-+	rq->lock.owner = next;
-+#endif
-+}
-+
-+static inline void finish_lock_switch(struct rq *rq)
-+{
-+	/*
-+	 * If we are tracking spinlock dependencies then we have to
-+	 * fix up the runqueue lock - which gets 'carried over' from
-+	 * prev into current:
-+	 */
-+	spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
-+	raw_spin_unlock_irq(&rq->lock);
-+}
-+
-+/**
-+ * prepare_task_switch - prepare to switch tasks
-+ * @rq: the runqueue preparing to switch
-+ * @next: the task we are going to switch to.
-+ *
-+ * This is called with the rq lock held and interrupts off. It must
-+ * be paired with a subsequent finish_task_switch after the context
-+ * switch.
-+ *
-+ * prepare_task_switch sets up locking and calls architecture specific
-+ * hooks.
-+ */
-+static inline void
-+prepare_task_switch(struct rq *rq, struct task_struct *prev,
-+		    struct task_struct *next)
-+{
-+	kcov_prepare_switch(prev);
-+	sched_info_switch(rq, prev, next);
-+	perf_event_task_sched_out(prev, next);
-+	rseq_preempt(prev);
-+	fire_sched_out_preempt_notifiers(prev, next);
-+	prepare_task(next);
-+	prepare_arch_switch(next);
-+}
-+
-+/**
-+ * finish_task_switch - clean up after a task-switch
-+ * @rq: runqueue associated with task-switch
-+ * @prev: the thread we just switched away from.
-+ *
-+ * finish_task_switch must be called after the context switch, paired
-+ * with a prepare_task_switch call before the context switch.
-+ * finish_task_switch will reconcile locking set up by prepare_task_switch,
-+ * and do any other architecture-specific cleanup actions.
-+ *
-+ * Note that we may have delayed dropping an mm in context_switch(). If
-+ * so, we finish that here outside of the runqueue lock.  (Doing it
-+ * with the lock held can cause deadlocks; see schedule() for
-+ * details.)
-+ *
-+ * The context switch have flipped the stack from under us and restored the
-+ * local variables which were saved when this task called schedule() in the
-+ * past. prev == current is still correct but we need to recalculate this_rq
-+ * because prev may have moved to another CPU.
-+ */
-+static struct rq *finish_task_switch(struct task_struct *prev)
-+	__releases(rq->lock)
-+{
-+	struct rq *rq = this_rq();
-+	struct mm_struct *mm = rq->prev_mm;
-+	long prev_state;
-+
-+	/*
-+	 * The previous task will have left us with a preempt_count of 2
-+	 * because it left us after:
-+	 *
-+	 *	schedule()
-+	 *	  preempt_disable();			// 1
-+	 *	  __schedule()
-+	 *	    raw_spin_lock_irq(&rq->lock)	// 2
-+	 *
-+	 * Also, see FORK_PREEMPT_COUNT.
-+	 */
-+	if (WARN_ONCE(preempt_count() != 2*PREEMPT_DISABLE_OFFSET,
-+		      "corrupted preempt_count: %s/%d/0x%x\n",
-+		      current->comm, current->pid, preempt_count()))
-+		preempt_count_set(FORK_PREEMPT_COUNT);
-+
-+	rq->prev_mm = NULL;
-+
-+	/*
-+	 * A task struct has one reference for the use as "current".
-+	 * If a task dies, then it sets TASK_DEAD in tsk->state and calls
-+	 * schedule one last time. The schedule call will never return, and
-+	 * the scheduled task must drop that reference.
-+	 *
-+	 * We must observe prev->state before clearing prev->on_cpu (in
-+	 * finish_task), otherwise a concurrent wakeup can get prev
-+	 * running on another CPU and we could rave with its RUNNING -> DEAD
-+	 * transition, resulting in a double drop.
-+	 */
-+	prev_state = prev->state;
-+	vtime_task_switch(prev);
-+	perf_event_task_sched_in(prev, current);
-+	finish_task(prev);
-+	finish_lock_switch(rq);
-+	finish_arch_post_lock_switch();
-+	kcov_finish_switch(current);
-+
-+	fire_sched_in_preempt_notifiers(current);
-+	/*
-+	 * When switching through a kernel thread, the loop in
-+	 * membarrier_{private,global}_expedited() may have observed that
-+	 * kernel thread and not issued an IPI. It is therefore possible to
-+	 * schedule between user->kernel->user threads without passing though
-+	 * switch_mm(). Membarrier requires a barrier after storing to
-+	 * rq->curr, before returning to userspace, so provide them here:
-+	 *
-+	 * - a full memory barrier for {PRIVATE,GLOBAL}_EXPEDITED, implicitly
-+	 *   provided by mmdrop(),
-+	 * - a sync_core for SYNC_CORE.
-+	 */
-+	if (mm) {
-+		membarrier_mm_sync_core_before_usermode(mm);
-+		mmdrop(mm);
-+	}
-+	if (unlikely(prev_state == TASK_DEAD)) {
-+		/*
-+		 * Remove function-return probe instances associated with this
-+		 * task and put them back on the free list.
-+		 */
-+		kprobe_flush_task(prev);
-+
-+		/* Task is done with its stack. */
-+		put_task_stack(prev);
-+
-+		put_task_struct_rcu_user(prev);
-+	}
-+
-+	tick_nohz_task_switch();
-+	return rq;
-+}
-+
-+/**
-+ * schedule_tail - first thing a freshly forked thread must call.
-+ * @prev: the thread we just switched away from.
-+ */
-+asmlinkage __visible void schedule_tail(struct task_struct *prev)
-+	__releases(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	/*
-+	 * New tasks start with FORK_PREEMPT_COUNT, see there and
-+	 * finish_task_switch() for details.
-+	 *
-+	 * finish_task_switch() will drop rq->lock() and lower preempt_count
-+	 * and the preempt_enable() will end up enabling preemption (on
-+	 * PREEMPT_COUNT kernels).
-+	 */
-+
-+	rq = finish_task_switch(prev);
-+	preempt_enable();
-+
-+	if (current->set_child_tid)
-+		put_user(task_pid_vnr(current), current->set_child_tid);
-+
-+	calculate_sigpending();
-+}
-+
-+/*
-+ * context_switch - switch to the new MM and the new thread's register state.
-+ */
-+static __always_inline struct rq *
-+context_switch(struct rq *rq, struct task_struct *prev,
-+	       struct task_struct *next)
-+{
-+	prepare_task_switch(rq, prev, next);
-+
-+	/*
-+	 * For paravirt, this is coupled with an exit in switch_to to
-+	 * combine the page table reload and the switch backend into
-+	 * one hypercall.
-+	 */
-+	arch_start_context_switch(prev);
-+
-+	/*
-+	 * kernel -> kernel   lazy + transfer active
-+	 *   user -> kernel   lazy + mmgrab() active
-+	 *
-+	 * kernel ->   user   switch + mmdrop() active
-+	 *   user ->   user   switch
-+	 */
-+	if (!next->mm) {                                // to kernel
-+		enter_lazy_tlb(prev->active_mm, next);
-+
-+		next->active_mm = prev->active_mm;
-+		if (prev->mm)                           // from user
-+			mmgrab(prev->active_mm);
-+		else
-+			prev->active_mm = NULL;
-+	} else {                                        // to user
-+		membarrier_switch_mm(rq, prev->active_mm, next->mm);
-+		/*
-+		 * sys_membarrier() requires an smp_mb() between setting
-+		 * rq->curr / membarrier_switch_mm() and returning to userspace.
-+		 *
-+		 * The below provides this either through switch_mm(), or in
-+		 * case 'prev->active_mm == next->mm' through
-+		 * finish_task_switch()'s mmdrop().
-+		 */
-+		switch_mm_irqs_off(prev->active_mm, next->mm, next);
-+
-+		if (!prev->mm) {                        // from kernel
-+			/* will mmdrop() in finish_task_switch(). */
-+			rq->prev_mm = prev->active_mm;
-+			prev->active_mm = NULL;
-+		}
-+	}
-+
-+	prepare_lock_switch(rq, next);
-+
-+	/* Here we just switch the register state and the stack. */
-+	switch_to(prev, next, prev);
-+	barrier();
-+
-+	return finish_task_switch(prev);
-+}
-+
-+/*
-+ * nr_running, nr_uninterruptible and nr_context_switches:
-+ *
-+ * externally visible scheduler statistics: current number of runnable
-+ * threads, total number of context switches performed since bootup.
-+ */
-+unsigned long nr_running(void)
-+{
-+	unsigned long i, sum = 0;
-+
-+	for_each_online_cpu(i)
-+		sum += cpu_rq(i)->nr_running;
-+
-+	return sum;
-+}
-+
-+/*
-+ * Check if only the current task is running on the CPU.
-+ *
-+ * Caution: this function does not check that the caller has disabled
-+ * preemption, thus the result might have a time-of-check-to-time-of-use
-+ * race.  The caller is responsible to use it correctly, for example:
-+ *
-+ * - from a non-preemptible section (of course)
-+ *
-+ * - from a thread that is bound to a single CPU
-+ *
-+ * - in a loop with very short iterations (e.g. a polling loop)
-+ */
-+bool single_task_running(void)
-+{
-+	return raw_rq()->nr_running == 1;
-+}
-+EXPORT_SYMBOL(single_task_running);
-+
-+unsigned long long nr_context_switches(void)
-+{
-+	int i;
-+	unsigned long long sum = 0;
-+
-+	for_each_possible_cpu(i)
-+		sum += cpu_rq(i)->nr_switches;
-+
-+	return sum;
-+}
-+
-+/*
-+ * Consumers of these two interfaces, like for example the cpuidle menu
-+ * governor, are using nonsensical data. Preferring shallow idle state selection
-+ * for a CPU that has IO-wait which might not even end up running the task when
-+ * it does become runnable.
-+ */
-+
-+unsigned long nr_iowait_cpu(int cpu)
-+{
-+	return atomic_read(&cpu_rq(cpu)->nr_iowait);
-+}
-+
-+/*
-+ * IO-wait accounting, and how its mostly bollocks (on SMP).
-+ *
-+ * The idea behind IO-wait account is to account the idle time that we could
-+ * have spend running if it were not for IO. That is, if we were to improve the
-+ * storage performance, we'd have a proportional reduction in IO-wait time.
-+ *
-+ * This all works nicely on UP, where, when a task blocks on IO, we account
-+ * idle time as IO-wait, because if the storage were faster, it could've been
-+ * running and we'd not be idle.
-+ *
-+ * This has been extended to SMP, by doing the same for each CPU. This however
-+ * is broken.
-+ *
-+ * Imagine for instance the case where two tasks block on one CPU, only the one
-+ * CPU will have IO-wait accounted, while the other has regular idle. Even
-+ * though, if the storage were faster, both could've ran at the same time,
-+ * utilising both CPUs.
-+ *
-+ * This means, that when looking globally, the current IO-wait accounting on
-+ * SMP is a lower bound, by reason of under accounting.
-+ *
-+ * Worse, since the numbers are provided per CPU, they are sometimes
-+ * interpreted per CPU, and that is nonsensical. A blocked task isn't strictly
-+ * associated with any one particular CPU, it can wake to another CPU than it
-+ * blocked on. This means the per CPU IO-wait number is meaningless.
-+ *
-+ * Task CPU affinities can make all that even more 'interesting'.
-+ */
-+
-+unsigned long nr_iowait(void)
-+{
-+	unsigned long i, sum = 0;
-+
-+	for_each_possible_cpu(i)
-+		sum += nr_iowait_cpu(i);
-+
-+	return sum;
-+}
-+
-+DEFINE_PER_CPU(struct kernel_stat, kstat);
-+DEFINE_PER_CPU(struct kernel_cpustat, kernel_cpustat);
-+
-+EXPORT_PER_CPU_SYMBOL(kstat);
-+EXPORT_PER_CPU_SYMBOL(kernel_cpustat);
-+
-+static inline void update_curr(struct rq *rq, struct task_struct *p)
-+{
-+	s64 ns = rq->clock_task - p->last_ran;
-+
-+	p->sched_time += ns;
-+	account_group_exec_runtime(p, ns);
-+
-+	p->time_slice -= ns;
-+	p->last_ran = rq->clock_task;
-+}
-+
-+/*
-+ * Return accounted runtime for the task.
-+ * Return separately the current's pending runtime that have not been
-+ * accounted yet.
-+ */
-+unsigned long long task_sched_runtime(struct task_struct *p)
-+{
-+	unsigned long flags;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+	u64 ns;
-+
-+#if defined(CONFIG_64BIT) && defined(CONFIG_SMP)
-+	/*
-+	 * 64-bit doesn't need locks to atomically read a 64-bit value.
-+	 * So we have a optimization chance when the task's delta_exec is 0.
-+	 * Reading ->on_cpu is racy, but this is ok.
-+	 *
-+	 * If we race with it leaving CPU, we'll take a lock. So we're correct.
-+	 * If we race with it entering CPU, unaccounted time is 0. This is
-+	 * indistinguishable from the read occurring a few cycles earlier.
-+	 * If we see ->on_cpu without ->on_rq, the task is leaving, and has
-+	 * been accounted, so we're correct here as well.
-+	 */
-+	if (!p->on_cpu || !task_on_rq_queued(p))
-+		return tsk_seruntime(p);
-+#endif
-+
-+	rq = task_access_lock_irqsave(p, &lock, &flags);
-+	/*
-+	 * Must be ->curr _and_ ->on_rq.  If dequeued, we would
-+	 * project cycles that may never be accounted to this
-+	 * thread, breaking clock_gettime().
-+	 */
-+	if (p == rq->curr && task_on_rq_queued(p)) {
-+		update_rq_clock(rq);
-+		update_curr(rq, p);
-+	}
-+	ns = tsk_seruntime(p);
-+	task_access_unlock_irqrestore(p, lock, &flags);
-+
-+	return ns;
-+}
-+
-+/* This manages tasks that have run out of timeslice during a scheduler_tick */
-+static inline void scheduler_task_tick(struct rq *rq)
-+{
-+	struct task_struct *p = rq->curr;
-+
-+	if (is_idle_task(p))
-+		return;
-+
-+	update_curr(rq, p);
-+	cpufreq_update_util(rq, 0);
-+
-+	/*
-+	 * Tasks have less than RESCHED_NS of time slice left they will be
-+	 * rescheduled.
-+	 */
-+	if (p->time_slice >= RESCHED_NS)
-+		return;
-+	__set_tsk_resched(p);
-+}
-+
-+#ifdef CONFIG_SCHED_SMT
-+static inline int active_load_balance_cpu_stop(void *data)
-+{
-+	struct rq *rq = this_rq();
-+	struct task_struct *p = data;
-+	cpumask_t tmp;
-+	unsigned long flags;
-+
-+	local_irq_save(flags);
-+
-+	raw_spin_lock(&p->pi_lock);
-+	raw_spin_lock(&rq->lock);
-+
-+	rq->active_balance = 0;
-+	/* _something_ may have changed the task, double check again */
-+	if (task_on_rq_queued(p) && task_rq(p) == rq &&
-+	    cpumask_and(&tmp, p->cpus_ptr, &sched_sg_idle_mask))
-+		rq = move_queued_task(rq, p, __best_mask_cpu(cpu_of(rq), &tmp));
-+
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock(&p->pi_lock);
-+
-+	local_irq_restore(flags);
-+
-+	return 0;
-+}
-+
-+/* sg_balance_trigger - trigger slibing group balance for @cpu */
-+static inline int sg_balance_trigger(const int cpu, struct rq *rq)
-+{
-+	unsigned long flags;
-+	struct task_struct *curr;
-+	int res;
-+
-+	if (!raw_spin_trylock_irqsave(&rq->lock, flags))
-+		return 0;
-+	curr = rq->curr;
-+	res = (!is_idle_task(curr)) && (1 == rq->nr_running) &&\
-+	      cpumask_intersects(curr->cpus_ptr, &sched_sg_idle_mask) &&\
-+	      (!rq->active_balance);
-+
-+	if (res)
-+		rq->active_balance = 1;
-+
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+
-+	if (res)
-+		stop_one_cpu_nowait(cpu, active_load_balance_cpu_stop,
-+				    curr, &rq->active_balance_work);
-+	return res;
-+}
-+
-+/*
-+ * sg_balance_check - slibing group balance check for run queue @rq
-+ */
-+static inline void sg_balance_check(struct rq *rq)
-+{
-+	cpumask_t chk;
-+	int cpu;
-+
-+	/* exit when no sg in idle */
-+	if (cpumask_empty(&sched_sg_idle_mask))
-+		return;
-+
-+	cpu = cpu_of(rq);
-+	/* Only cpu in slibing idle group will do the checking */
-+	if (cpumask_test_cpu(cpu, &sched_sg_idle_mask)) {
-+		/* Find potential cpus which can migrate the currently running task */
-+		if (cpumask_andnot(&chk, cpu_online_mask, &sched_rq_pending_mask) &&
-+		    cpumask_andnot(&chk, &chk, &sched_rq_watermark[IDLE_WM])) {
-+			int i, tried = 0;
-+
-+			for_each_cpu_wrap(i, &chk, cpu) {
-+				/* skip the cpu which has idle slibing cpu */
-+				if (cpumask_intersects(cpu_smt_mask(i),
-+						       &sched_rq_watermark[IDLE_WM]))
-+					continue;
-+				if (cpumask_intersects(cpu_smt_mask(i),
-+						       &sched_rq_pending_mask))
-+					continue;
-+				if (sg_balance_trigger(i, cpu_rq(i)))
-+					return;
-+				if (tried)
-+					return;
-+				tried++;
-+			}
-+		}
-+		return;
-+	}
-+
-+	if (1 != rq->nr_running)
-+		return;
-+
-+	if (cpumask_andnot(&chk, cpu_smt_mask(cpu), &sched_rq_pending_mask) &&
-+	    cpumask_andnot(&chk, &chk, &sched_rq_watermark[IDLE_WM]) &&
-+	    cpumask_equal(&chk, cpu_smt_mask(cpu)))
-+		sg_balance_trigger(cpu, rq);
-+}
-+#endif /* CONFIG_SCHED_SMT */
-+
-+/*
-+ * This function gets called by the timer code, with HZ frequency.
-+ * We call it with interrupts disabled.
-+ */
-+void scheduler_tick(void)
-+{
-+	int cpu __maybe_unused = smp_processor_id();
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	sched_clock_tick();
-+
-+	raw_spin_lock(&rq->lock);
-+	update_rq_clock(rq);
-+
-+	scheduler_task_tick(rq);
-+	calc_global_load_tick(rq);
-+	psi_task_tick(rq);
-+
-+	rq->last_tick = rq->clock;
-+	raw_spin_unlock(&rq->lock);
-+
-+	perf_event_task_tick();
-+}
-+
-+#ifdef CONFIG_NO_HZ_FULL
-+struct tick_work {
-+	int			cpu;
-+	atomic_t		state;
-+	struct delayed_work	work;
-+};
-+/* Values for ->state, see diagram below. */
-+#define TICK_SCHED_REMOTE_OFFLINE	0
-+#define TICK_SCHED_REMOTE_OFFLINING	1
-+#define TICK_SCHED_REMOTE_RUNNING	2
-+
-+/*
-+ * State diagram for ->state:
-+ *
-+ *
-+ *          TICK_SCHED_REMOTE_OFFLINE
-+ *                    |   ^
-+ *                    |   |
-+ *                    |   | sched_tick_remote()
-+ *                    |   |
-+ *                    |   |
-+ *                    +--TICK_SCHED_REMOTE_OFFLINING
-+ *                    |   ^
-+ *                    |   |
-+ * sched_tick_start() |   | sched_tick_stop()
-+ *                    |   |
-+ *                    V   |
-+ *          TICK_SCHED_REMOTE_RUNNING
-+ *
-+ *
-+ * Other transitions get WARN_ON_ONCE(), except that sched_tick_remote()
-+ * and sched_tick_start() are happy to leave the state in RUNNING.
-+ */
-+
-+static struct tick_work __percpu *tick_work_cpu;
-+
-+static void sched_tick_remote(struct work_struct *work)
-+{
-+	struct delayed_work *dwork = to_delayed_work(work);
-+	struct tick_work *twork = container_of(dwork, struct tick_work, work);
-+	int cpu = twork->cpu;
-+	struct rq *rq = cpu_rq(cpu);
-+	struct task_struct *curr;
-+	unsigned long flags;
-+	u64 delta;
-+	int os;
-+
-+	/*
-+	 * Handle the tick only if it appears the remote CPU is running in full
-+	 * dynticks mode. The check is racy by nature, but missing a tick or
-+	 * having one too much is no big deal because the scheduler tick updates
-+	 * statistics and checks timeslices in a time-independent way, regardless
-+	 * of when exactly it is running.
-+	 */
-+	if (idle_cpu(cpu) || !tick_nohz_tick_stopped_cpu(cpu))
-+		goto out_requeue;
-+
-+	raw_spin_lock_irqsave(&rq->lock, flags);
-+	curr = rq->curr;
-+
-+	if (is_idle_task(curr) || cpu_is_offline(cpu))
-+		goto out_unlock;
-+
-+	update_rq_clock(rq);
-+	delta = rq_clock_task(rq) - curr->last_ran;
-+
-+	/*
-+	 * Make sure the next tick runs within a reasonable
-+	 * amount of time.
-+	 */
-+	WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
-+	scheduler_task_tick(rq);
-+
-+out_unlock:
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+
-+out_requeue:
-+	/*
-+	 * Run the remote tick once per second (1Hz). This arbitrary
-+	 * frequency is large enough to avoid overload but short enough
-+	 * to keep scheduler internal stats reasonably up to date.  But
-+	 * first update state to reflect hotplug activity if required.
-+	 */
-+	os = atomic_fetch_add_unless(&twork->state, -1, TICK_SCHED_REMOTE_RUNNING);
-+	WARN_ON_ONCE(os == TICK_SCHED_REMOTE_OFFLINE);
-+	if (os == TICK_SCHED_REMOTE_RUNNING)
-+		queue_delayed_work(system_unbound_wq, dwork, HZ);
-+}
-+
-+static void sched_tick_start(int cpu)
-+{
-+	int os;
-+	struct tick_work *twork;
-+
-+	if (housekeeping_cpu(cpu, HK_FLAG_TICK))
-+		return;
-+
-+	WARN_ON_ONCE(!tick_work_cpu);
-+
-+	twork = per_cpu_ptr(tick_work_cpu, cpu);
-+	os = atomic_xchg(&twork->state, TICK_SCHED_REMOTE_RUNNING);
-+	WARN_ON_ONCE(os == TICK_SCHED_REMOTE_RUNNING);
-+	if (os == TICK_SCHED_REMOTE_OFFLINE) {
-+		twork->cpu = cpu;
-+		INIT_DELAYED_WORK(&twork->work, sched_tick_remote);
-+		queue_delayed_work(system_unbound_wq, &twork->work, HZ);
-+	}
-+}
-+
-+#ifdef CONFIG_HOTPLUG_CPU
-+static void sched_tick_stop(int cpu)
-+{
-+	struct tick_work *twork;
-+
-+	if (housekeeping_cpu(cpu, HK_FLAG_TICK))
-+		return;
-+
-+	WARN_ON_ONCE(!tick_work_cpu);
-+
-+	twork = per_cpu_ptr(tick_work_cpu, cpu);
-+	cancel_delayed_work_sync(&twork->work);
-+}
-+#endif /* CONFIG_HOTPLUG_CPU */
-+
-+int __init sched_tick_offload_init(void)
-+{
-+	tick_work_cpu = alloc_percpu(struct tick_work);
-+	BUG_ON(!tick_work_cpu);
-+	return 0;
-+}
-+
-+#else /* !CONFIG_NO_HZ_FULL */
-+static inline void sched_tick_start(int cpu) { }
-+static inline void sched_tick_stop(int cpu) { }
-+#endif
-+
-+#if defined(CONFIG_PREEMPTION) && (defined(CONFIG_DEBUG_PREEMPT) || \
-+				defined(CONFIG_PREEMPT_TRACER))
-+/*
-+ * If the value passed in is equal to the current preempt count
-+ * then we just disabled preemption. Start timing the latency.
-+ */
-+static inline void preempt_latency_start(int val)
-+{
-+	if (preempt_count() == val) {
-+		unsigned long ip = get_lock_parent_ip();
-+#ifdef CONFIG_DEBUG_PREEMPT
-+		current->preempt_disable_ip = ip;
-+#endif
-+		trace_preempt_off(CALLER_ADDR0, ip);
-+	}
-+}
-+
-+void preempt_count_add(int val)
-+{
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	/*
-+	 * Underflow?
-+	 */
-+	if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
-+		return;
-+#endif
-+	__preempt_count_add(val);
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	/*
-+	 * Spinlock count overflowing soon?
-+	 */
-+	DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
-+				PREEMPT_MASK - 10);
-+#endif
-+	preempt_latency_start(val);
-+}
-+EXPORT_SYMBOL(preempt_count_add);
-+NOKPROBE_SYMBOL(preempt_count_add);
-+
-+/*
-+ * If the value passed in equals to the current preempt count
-+ * then we just enabled preemption. Stop timing the latency.
-+ */
-+static inline void preempt_latency_stop(int val)
-+{
-+	if (preempt_count() == val)
-+		trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
-+}
-+
-+void preempt_count_sub(int val)
-+{
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	/*
-+	 * Underflow?
-+	 */
-+	if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
-+		return;
-+	/*
-+	 * Is the spinlock portion underflowing?
-+	 */
-+	if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) &&
-+			!(preempt_count() & PREEMPT_MASK)))
-+		return;
-+#endif
-+
-+	preempt_latency_stop(val);
-+	__preempt_count_sub(val);
-+}
-+EXPORT_SYMBOL(preempt_count_sub);
-+NOKPROBE_SYMBOL(preempt_count_sub);
-+
-+#else
-+static inline void preempt_latency_start(int val) { }
-+static inline void preempt_latency_stop(int val) { }
-+#endif
-+
-+/*
-+ * Timeslices below RESCHED_NS are considered as good as expired as there's no
-+ * point rescheduling when there's so little time left.
-+ */
-+static inline void check_curr(struct task_struct *p, struct rq *rq)
-+{
-+	if (rq->idle == p)
-+		return;
-+
-+	update_curr(rq, p);
-+
-+	if (p->time_slice < RESCHED_NS) {
-+		p->time_slice = SCHED_TIMESLICE_NS;
-+		if (SCHED_FIFO != p->policy && task_on_rq_queued(p)) {
-+			if (SCHED_RR != p->policy)
-+				deboost_task(p);
-+			requeue_task(p, rq);
-+		}
-+	}
-+}
-+
-+#ifdef	CONFIG_SMP
-+
-+#define SCHED_RQ_NR_MIGRATION (32UL)
-+/*
-+ * Migrate pending tasks in @rq to @dest_cpu
-+ * Will try to migrate mininal of half of @rq nr_running tasks and
-+ * SCHED_RQ_NR_MIGRATION to @dest_cpu
-+ */
-+static inline int
-+migrate_pending_tasks(struct rq *rq, struct rq *dest_rq, const int dest_cpu)
-+{
-+	struct task_struct *p, *skip = rq->curr;
-+	int nr_migrated = 0;
-+	int nr_tries = min(rq->nr_running / 2, SCHED_RQ_NR_MIGRATION);
-+
-+	while (skip != rq->idle && nr_tries &&
-+	       (p = rq_next_bmq_task(skip, rq)) != rq->idle) {
-+		skip = rq_next_bmq_task(p, rq);
-+		if (cpumask_test_cpu(dest_cpu, p->cpus_ptr)) {
-+			dequeue_task(p, rq, 0);
-+			set_task_cpu(p, dest_cpu);
-+			enqueue_task(p, dest_rq, 0);
-+			nr_migrated++;
-+		}
-+		nr_tries--;
-+	}
-+
-+	return nr_migrated;
-+}
-+
-+static inline int take_other_rq_tasks(struct rq *rq, int cpu)
-+{
-+	struct cpumask *affinity_mask, *end_mask;
-+
-+	if (cpumask_empty(&sched_rq_pending_mask))
-+		return 0;
-+
-+	affinity_mask = &(per_cpu(sched_cpu_affinity_masks, cpu)[0]);
-+	end_mask = per_cpu(sched_cpu_affinity_end_mask, cpu);
-+	do {
-+		int i;
-+		for_each_cpu_and(i, &sched_rq_pending_mask, affinity_mask) {
-+			int nr_migrated;
-+			struct rq *src_rq;
-+
-+			src_rq = cpu_rq(i);
-+			if (!do_raw_spin_trylock(&src_rq->lock))
-+				continue;
-+			spin_acquire(&src_rq->lock.dep_map,
-+				     SINGLE_DEPTH_NESTING, 1, _RET_IP_);
-+
-+			nr_migrated = migrate_pending_tasks(src_rq, rq, cpu);
-+
-+			spin_release(&src_rq->lock.dep_map, 1, _RET_IP_);
-+			do_raw_spin_unlock(&src_rq->lock);
-+
-+			if (nr_migrated) {
-+				cpufreq_update_util(rq, 0);
-+				return 1;
-+			}
-+		}
-+	} while (++affinity_mask < end_mask);
-+
-+	return 0;
-+}
-+#endif
-+
-+static inline struct task_struct *
-+choose_next_task(struct rq *rq, int cpu, struct task_struct *prev)
-+{
-+	struct task_struct *next;
-+
-+	if (unlikely(rq->skip)) {
-+		next = rq_runnable_task(rq);
-+#ifdef	CONFIG_SMP
-+		if (likely(rq->online))
-+			if (next == rq->idle && take_other_rq_tasks(rq, cpu))
-+				next = rq_runnable_task(rq);
-+#endif
-+		rq->skip = NULL;
-+		return next;
-+	}
-+
-+	next = rq_first_bmq_task(rq);
-+#ifdef	CONFIG_SMP
-+	if (likely(rq->online))
-+		if (next == rq->idle && take_other_rq_tasks(rq, cpu))
-+			return rq_first_bmq_task(rq);
-+#endif
-+	return next;
-+}
-+
-+static inline unsigned long get_preempt_disable_ip(struct task_struct *p)
-+{
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	return p->preempt_disable_ip;
-+#else
-+	return 0;
-+#endif
-+}
-+
-+/*
-+ * Print scheduling while atomic bug:
-+ */
-+static noinline void __schedule_bug(struct task_struct *prev)
-+{
-+	/* Save this before calling printk(), since that will clobber it */
-+	unsigned long preempt_disable_ip = get_preempt_disable_ip(current);
-+
-+	if (oops_in_progress)
-+		return;
-+
-+	printk(KERN_ERR "BUG: scheduling while atomic: %s/%d/0x%08x\n",
-+		prev->comm, prev->pid, preempt_count());
-+
-+	debug_show_held_locks(prev);
-+	print_modules();
-+	if (irqs_disabled())
-+		print_irqtrace_events(prev);
-+	if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)
-+	    && in_atomic_preempt_off()) {
-+		pr_err("Preemption disabled at:");
-+		print_ip_sym(preempt_disable_ip);
-+		pr_cont("\n");
-+	}
-+	if (panic_on_warn)
-+		panic("scheduling while atomic\n");
-+
-+	dump_stack();
-+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+}
-+
-+/*
-+ * Various schedule()-time debugging checks and statistics:
-+ */
-+static inline void schedule_debug(struct task_struct *prev, bool preempt)
-+{
-+#ifdef CONFIG_SCHED_STACK_END_CHECK
-+	if (task_stack_end_corrupted(prev))
-+		panic("corrupted stack end detected inside scheduler\n");
-+#endif
-+
-+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-+	if (!preempt && prev->state && prev->non_block_count) {
-+		printk(KERN_ERR "BUG: scheduling in a non-blocking section: %s/%d/%i\n",
-+			prev->comm, prev->pid, prev->non_block_count);
-+		dump_stack();
-+		add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+	}
-+#endif
-+
-+	if (unlikely(in_atomic_preempt_off())) {
-+		__schedule_bug(prev);
-+		preempt_count_set(PREEMPT_DISABLED);
-+	}
-+	rcu_sleep_check();
-+
-+	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
-+
-+	schedstat_inc(this_rq()->sched_count);
-+}
-+
-+static inline void set_rq_task(struct rq *rq, struct task_struct *p)
-+{
-+	p->last_ran = rq->clock_task;
-+
-+	if (unlikely(SCHED_TIMESLICE_NS == p->time_slice))
-+		rq->last_ts_switch = rq->clock;
-+#ifdef CONFIG_HIGH_RES_TIMERS
-+	if (p != rq->idle)
-+		hrtick_start(rq, p->time_slice);
-+#endif
-+}
-+
-+/*
-+ * schedule() is the main scheduler function.
-+ *
-+ * The main means of driving the scheduler and thus entering this function are:
-+ *
-+ *   1. Explicit blocking: mutex, semaphore, waitqueue, etc.
-+ *
-+ *   2. TIF_NEED_RESCHED flag is checked on interrupt and userspace return
-+ *      paths. For example, see arch/x86/entry_64.S.
-+ *
-+ *      To drive preemption between tasks, the scheduler sets the flag in timer
-+ *      interrupt handler scheduler_tick().
-+ *
-+ *   3. Wakeups don't really cause entry into schedule(). They add a
-+ *      task to the run-queue and that's it.
-+ *
-+ *      Now, if the new task added to the run-queue preempts the current
-+ *      task, then the wakeup sets TIF_NEED_RESCHED and schedule() gets
-+ *      called on the nearest possible occasion:
-+ *
-+ *       - If the kernel is preemptible (CONFIG_PREEMPTION=y):
-+ *
-+ *         - in syscall or exception context, at the next outmost
-+ *           preempt_enable(). (this might be as soon as the wake_up()'s
-+ *           spin_unlock()!)
-+ *
-+ *         - in IRQ context, return from interrupt-handler to
-+ *           preemptible context
-+ *
-+ *       - If the kernel is not preemptible (CONFIG_PREEMPTION is not set)
-+ *         then at the next:
-+ *
-+ *          - cond_resched() call
-+ *          - explicit schedule() call
-+ *          - return from syscall or exception to user-space
-+ *          - return from interrupt-handler to user-space
-+ *
-+ * WARNING: must be called with preemption disabled!
-+ */
-+static void __sched notrace __schedule(bool preempt)
-+{
-+	struct task_struct *prev, *next;
-+	unsigned long *switch_count;
-+	struct rq *rq;
-+	int cpu;
-+
-+	cpu = smp_processor_id();
-+	rq = cpu_rq(cpu);
-+	prev = rq->curr;
-+
-+	schedule_debug(prev, preempt);
-+
-+	/* by passing sched_feat(HRTICK) checking which BMQ doesn't support */
-+	hrtick_clear(rq);
-+
-+	local_irq_disable();
-+	rcu_note_context_switch(preempt);
-+
-+	/*
-+	 * Make sure that signal_pending_state()->signal_pending() below
-+	 * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
-+	 * done by the caller to avoid the race with signal_wake_up().
-+	 *
-+	 * The membarrier system call requires a full memory barrier
-+	 * after coming from user-space, before storing to rq->curr.
-+	 */
-+	raw_spin_lock(&rq->lock);
-+	smp_mb__after_spinlock();
-+
-+	update_rq_clock(rq);
-+
-+	switch_count = &prev->nivcsw;
-+	if (!preempt && prev->state) {
-+		if (signal_pending_state(prev->state, prev)) {
-+			prev->state = TASK_RUNNING;
-+		} else {
-+			if (rq_switch_time(rq) < boost_threshold(prev))
-+				boost_task(prev);
-+			deactivate_task(prev, rq);
-+
-+			if (prev->in_iowait) {
-+				atomic_inc(&rq->nr_iowait);
-+				delayacct_blkio_start();
-+			}
-+		}
-+		switch_count = &prev->nvcsw;
-+	}
-+
-+	clear_tsk_need_resched(prev);
-+	clear_preempt_need_resched();
-+
-+	check_curr(prev, rq);
-+
-+	next = choose_next_task(rq, cpu, prev);
-+
-+	set_rq_task(rq, next);
-+
-+	if (prev != next) {
-+		if (MAX_PRIO == next->prio)
-+			schedstat_inc(rq->sched_goidle);
-+
-+		/*
-+		 * RCU users of rcu_dereference(rq->curr) may not see
-+		 * changes to task_struct made by pick_next_task().
-+		 */
-+		RCU_INIT_POINTER(rq->curr, next);
-+		/*
-+		 * The membarrier system call requires each architecture
-+		 * to have a full memory barrier after updating
-+		 * rq->curr, before returning to user-space.
-+		 *
-+		 * Here are the schemes providing that barrier on the
-+		 * various architectures:
-+		 * - mm ? switch_mm() : mmdrop() for x86, s390, sparc, PowerPC.
-+		 *   switch_mm() rely on membarrier_arch_switch_mm() on PowerPC.
-+		 * - finish_lock_switch() for weakly-ordered
-+		 *   architectures where spin_unlock is a full barrier,
-+		 * - switch_to() for arm64 (weakly-ordered, spin_unlock
-+		 *   is a RELEASE barrier),
-+		 */
-+		++*switch_count;
-+		rq->nr_switches++;
-+		rq->last_ts_switch = rq->clock;
-+
-+		trace_sched_switch(preempt, prev, next);
-+
-+		/* Also unlocks the rq: */
-+		rq = context_switch(rq, prev, next);
-+#ifdef CONFIG_SCHED_SMT
-+		sg_balance_check(rq);
-+#endif
-+	} else
-+		raw_spin_unlock_irq(&rq->lock);
-+}
-+
-+void __noreturn do_task_dead(void)
-+{
-+	/* Causes final put_task_struct in finish_task_switch(): */
-+	set_special_state(TASK_DEAD);
-+
-+	/* Tell freezer to ignore us: */
-+	current->flags |= PF_NOFREEZE;
-+	__schedule(false);
-+
-+	BUG();
-+
-+	/* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */
-+	for (;;)
-+		cpu_relax();
-+}
-+
-+static inline void sched_submit_work(struct task_struct *tsk)
-+{
-+	if (!tsk->state)
-+		return;
-+
-+	/*
-+	 * If a worker went to sleep, notify and ask workqueue whether
-+	 * it wants to wake up a task to maintain concurrency.
-+	 * As this function is called inside the schedule() context,
-+	 * we disable preemption to avoid it calling schedule() again
-+	 * in the possible wakeup of a kworker.
-+	 */
-+	if (tsk->flags & PF_WQ_WORKER) {
-+		preempt_disable();
-+		wq_worker_sleeping(tsk);
-+		preempt_enable_no_resched();
-+	}
-+
-+	if (tsk_is_pi_blocked(tsk))
-+		return;
-+
-+	/*
-+	 * If we are going to sleep and we have plugged IO queued,
-+	 * make sure to submit it to avoid deadlocks.
-+	 */
-+	if (blk_needs_flush_plug(tsk))
-+		blk_schedule_flush_plug(tsk);
-+}
-+
-+static void sched_update_worker(struct task_struct *tsk)
-+{
-+	if (tsk->flags & PF_WQ_WORKER)
-+		wq_worker_running(tsk);
-+}
-+
-+asmlinkage __visible void __sched schedule(void)
-+{
-+	struct task_struct *tsk = current;
-+
-+	sched_submit_work(tsk);
-+	do {
-+		preempt_disable();
-+		__schedule(false);
-+		sched_preempt_enable_no_resched();
-+	} while (need_resched());
-+	sched_update_worker(tsk);
-+}
-+EXPORT_SYMBOL(schedule);
-+
-+/*
-+ * synchronize_rcu_tasks() makes sure that no task is stuck in preempted
-+ * state (have scheduled out non-voluntarily) by making sure that all
-+ * tasks have either left the run queue or have gone into user space.
-+ * As idle tasks do not do either, they must not ever be preempted
-+ * (schedule out non-voluntarily).
-+ *
-+ * schedule_idle() is similar to schedule_preempt_disable() except that it
-+ * never enables preemption because it does not call sched_submit_work().
-+ */
-+void __sched schedule_idle(void)
-+{
-+	/*
-+	 * As this skips calling sched_submit_work(), which the idle task does
-+	 * regardless because that function is a nop when the task is in a
-+	 * TASK_RUNNING state, make sure this isn't used someplace that the
-+	 * current task can be in any other state. Note, idle is always in the
-+	 * TASK_RUNNING state.
-+	 */
-+	WARN_ON_ONCE(current->state);
-+	do {
-+		__schedule(false);
-+	} while (need_resched());
-+}
-+
-+#ifdef CONFIG_CONTEXT_TRACKING
-+asmlinkage __visible void __sched schedule_user(void)
-+{
-+	/*
-+	 * If we come here after a random call to set_need_resched(),
-+	 * or we have been woken up remotely but the IPI has not yet arrived,
-+	 * we haven't yet exited the RCU idle mode. Do it here manually until
-+	 * we find a better solution.
-+	 *
-+	 * NB: There are buggy callers of this function.  Ideally we
-+	 * should warn if prev_state != CONTEXT_USER, but that will trigger
-+	 * too frequently to make sense yet.
-+	 */
-+	enum ctx_state prev_state = exception_enter();
-+	schedule();
-+	exception_exit(prev_state);
-+}
-+#endif
-+
-+/**
-+ * schedule_preempt_disabled - called with preemption disabled
-+ *
-+ * Returns with preemption disabled. Note: preempt_count must be 1
-+ */
-+void __sched schedule_preempt_disabled(void)
-+{
-+	sched_preempt_enable_no_resched();
-+	schedule();
-+	preempt_disable();
-+}
-+
-+static void __sched notrace preempt_schedule_common(void)
-+{
-+	do {
-+		/*
-+		 * Because the function tracer can trace preempt_count_sub()
-+		 * and it also uses preempt_enable/disable_notrace(), if
-+		 * NEED_RESCHED is set, the preempt_enable_notrace() called
-+		 * by the function tracer will call this function again and
-+		 * cause infinite recursion.
-+		 *
-+		 * Preemption must be disabled here before the function
-+		 * tracer can trace. Break up preempt_disable() into two
-+		 * calls. One to disable preemption without fear of being
-+		 * traced. The other to still record the preemption latency,
-+		 * which can also be traced by the function tracer.
-+		 */
-+		preempt_disable_notrace();
-+		preempt_latency_start(1);
-+		__schedule(true);
-+		preempt_latency_stop(1);
-+		preempt_enable_no_resched_notrace();
-+
-+		/*
-+		 * Check again in case we missed a preemption opportunity
-+		 * between schedule and now.
-+		 */
-+	} while (need_resched());
-+}
-+
-+#ifdef CONFIG_PREEMPTION
-+/*
-+ * This is the entry point to schedule() from in-kernel preemption
-+ * off of preempt_enable.
-+ */
-+asmlinkage __visible void __sched notrace preempt_schedule(void)
-+{
-+	/*
-+	 * If there is a non-zero preempt_count or interrupts are disabled,
-+	 * we do not want to preempt the current task. Just return..
-+	 */
-+	if (likely(!preemptible()))
-+		return;
-+
-+	preempt_schedule_common();
-+}
-+NOKPROBE_SYMBOL(preempt_schedule);
-+EXPORT_SYMBOL(preempt_schedule);
-+
-+/**
-+ * preempt_schedule_notrace - preempt_schedule called by tracing
-+ *
-+ * The tracing infrastructure uses preempt_enable_notrace to prevent
-+ * recursion and tracing preempt enabling caused by the tracing
-+ * infrastructure itself. But as tracing can happen in areas coming
-+ * from userspace or just about to enter userspace, a preempt enable
-+ * can occur before user_exit() is called. This will cause the scheduler
-+ * to be called when the system is still in usermode.
-+ *
-+ * To prevent this, the preempt_enable_notrace will use this function
-+ * instead of preempt_schedule() to exit user context if needed before
-+ * calling the scheduler.
-+ */
-+asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
-+{
-+	enum ctx_state prev_ctx;
-+
-+	if (likely(!preemptible()))
-+		return;
-+
-+	do {
-+		/*
-+		 * Because the function tracer can trace preempt_count_sub()
-+		 * and it also uses preempt_enable/disable_notrace(), if
-+		 * NEED_RESCHED is set, the preempt_enable_notrace() called
-+		 * by the function tracer will call this function again and
-+		 * cause infinite recursion.
-+		 *
-+		 * Preemption must be disabled here before the function
-+		 * tracer can trace. Break up preempt_disable() into two
-+		 * calls. One to disable preemption without fear of being
-+		 * traced. The other to still record the preemption latency,
-+		 * which can also be traced by the function tracer.
-+		 */
-+		preempt_disable_notrace();
-+		preempt_latency_start(1);
-+		/*
-+		 * Needs preempt disabled in case user_exit() is traced
-+		 * and the tracer calls preempt_enable_notrace() causing
-+		 * an infinite recursion.
-+		 */
-+		prev_ctx = exception_enter();
-+		__schedule(true);
-+		exception_exit(prev_ctx);
-+
-+		preempt_latency_stop(1);
-+		preempt_enable_no_resched_notrace();
-+	} while (need_resched());
-+}
-+EXPORT_SYMBOL_GPL(preempt_schedule_notrace);
-+
-+#endif /* CONFIG_PREEMPTION */
-+
-+/*
-+ * This is the entry point to schedule() from kernel preemption
-+ * off of irq context.
-+ * Note, that this is called and return with irqs disabled. This will
-+ * protect us against recursive calling from irq.
-+ */
-+asmlinkage __visible void __sched preempt_schedule_irq(void)
-+{
-+	enum ctx_state prev_state;
-+
-+	/* Catch callers which need to be fixed */
-+	BUG_ON(preempt_count() || !irqs_disabled());
-+
-+	prev_state = exception_enter();
-+
-+	do {
-+		preempt_disable();
-+		local_irq_enable();
-+		__schedule(true);
-+		local_irq_disable();
-+		sched_preempt_enable_no_resched();
-+	} while (need_resched());
-+
-+	exception_exit(prev_state);
-+}
-+
-+int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags,
-+			  void *key)
-+{
-+	return try_to_wake_up(curr->private, mode, wake_flags);
-+}
-+EXPORT_SYMBOL(default_wake_function);
-+
-+static inline void check_task_changed(struct rq *rq, struct task_struct *p)
-+{
-+	/* Trigger resched if task sched_prio has been modified. */
-+	if (task_on_rq_queued(p) && task_sched_prio(p) != p->bmq_idx) {
-+		requeue_task(p, rq);
-+		check_preempt_curr(rq);
-+	}
-+}
-+
-+#ifdef CONFIG_RT_MUTEXES
-+
-+static inline int __rt_effective_prio(struct task_struct *pi_task, int prio)
-+{
-+	if (pi_task)
-+		prio = min(prio, pi_task->prio);
-+
-+	return prio;
-+}
-+
-+static inline int rt_effective_prio(struct task_struct *p, int prio)
-+{
-+	struct task_struct *pi_task = rt_mutex_get_top_task(p);
-+
-+	return __rt_effective_prio(pi_task, prio);
-+}
-+
-+/*
-+ * rt_mutex_setprio - set the current priority of a task
-+ * @p: task to boost
-+ * @pi_task: donor task
-+ *
-+ * This function changes the 'effective' priority of a task. It does
-+ * not touch ->normal_prio like __setscheduler().
-+ *
-+ * Used by the rt_mutex code to implement priority inheritance
-+ * logic. Call site only calls if the priority of the task changed.
-+ */
-+void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
-+{
-+	int prio;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+
-+	/* XXX used to be waiter->prio, not waiter->task->prio */
-+	prio = __rt_effective_prio(pi_task, p->normal_prio);
-+
-+	/*
-+	 * If nothing changed; bail early.
-+	 */
-+	if (p->pi_top_task == pi_task && prio == p->prio)
-+		return;
-+
-+	rq = __task_access_lock(p, &lock);
-+	/*
-+	 * Set under pi_lock && rq->lock, such that the value can be used under
-+	 * either lock.
-+	 *
-+	 * Note that there is loads of tricky to make this pointer cache work
-+	 * right. rt_mutex_slowunlock()+rt_mutex_postunlock() work together to
-+	 * ensure a task is de-boosted (pi_task is set to NULL) before the
-+	 * task is allowed to run again (and can exit). This ensures the pointer
-+	 * points to a blocked task -- which guaratees the task is present.
-+	 */
-+	p->pi_top_task = pi_task;
-+
-+	/*
-+	 * For FIFO/RR we only need to set prio, if that matches we're done.
-+	 */
-+	if (prio == p->prio)
-+		goto out_unlock;
-+
-+	/*
-+	 * Idle task boosting is a nono in general. There is one
-+	 * exception, when PREEMPT_RT and NOHZ is active:
-+	 *
-+	 * The idle task calls get_next_timer_interrupt() and holds
-+	 * the timer wheel base->lock on the CPU and another CPU wants
-+	 * to access the timer (probably to cancel it). We can safely
-+	 * ignore the boosting request, as the idle CPU runs this code
-+	 * with interrupts disabled and will complete the lock
-+	 * protected section without being interrupted. So there is no
-+	 * real need to boost.
-+	 */
-+	if (unlikely(p == rq->idle)) {
-+		WARN_ON(p != rq->curr);
-+		WARN_ON(p->pi_blocked_on);
-+		goto out_unlock;
-+	}
-+
-+	trace_sched_pi_setprio(p, pi_task);
-+	p->prio = prio;
-+
-+	check_task_changed(rq, p);
-+out_unlock:
-+	__task_access_unlock(p, lock);
-+}
-+#else
-+static inline int rt_effective_prio(struct task_struct *p, int prio)
-+{
-+	return prio;
-+}
-+#endif
-+
-+void set_user_nice(struct task_struct *p, long nice)
-+{
-+	unsigned long flags;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+
-+	if (task_nice(p) == nice || nice < MIN_NICE || nice > MAX_NICE)
-+		return;
-+	/*
-+	 * We have to be careful, if called from sys_setpriority(),
-+	 * the task might be in the middle of scheduling on another CPU.
-+	 */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	rq = __task_access_lock(p, &lock);
-+
-+	p->static_prio = NICE_TO_PRIO(nice);
-+	/*
-+	 * The RT priorities are set via sched_setscheduler(), but we still
-+	 * allow the 'normal' nice value to be set - but as expected
-+	 * it wont have any effect on scheduling until the task is
-+	 * not SCHED_NORMAL/SCHED_BATCH:
-+	 */
-+	if (task_has_rt_policy(p))
-+		goto out_unlock;
-+
-+	p->prio = effective_prio(p);
-+	check_task_changed(rq, p);
-+out_unlock:
-+	__task_access_unlock(p, lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+}
-+EXPORT_SYMBOL(set_user_nice);
-+
-+/*
-+ * can_nice - check if a task can reduce its nice value
-+ * @p: task
-+ * @nice: nice value
-+ */
-+int can_nice(const struct task_struct *p, const int nice)
-+{
-+	/* Convert nice value [19,-20] to rlimit style value [1,40] */
-+	int nice_rlim = nice_to_rlimit(nice);
-+
-+	return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) ||
-+		capable(CAP_SYS_NICE));
-+}
-+
-+#ifdef __ARCH_WANT_SYS_NICE
-+
-+/*
-+ * sys_nice - change the priority of the current process.
-+ * @increment: priority increment
-+ *
-+ * sys_setpriority is a more generic, but much slower function that
-+ * does similar things.
-+ */
-+SYSCALL_DEFINE1(nice, int, increment)
-+{
-+	long nice, retval;
-+
-+	/*
-+	 * Setpriority might change our priority at the same moment.
-+	 * We don't have to worry. Conceptually one call occurs first
-+	 * and we have a single winner.
-+	 */
-+
-+	increment = clamp(increment, -NICE_WIDTH, NICE_WIDTH);
-+	nice = task_nice(current) + increment;
-+
-+	nice = clamp_val(nice, MIN_NICE, MAX_NICE);
-+	if (increment < 0 && !can_nice(current, nice))
-+		return -EPERM;
-+
-+	retval = security_task_setnice(current, nice);
-+	if (retval)
-+		return retval;
-+
-+	set_user_nice(current, nice);
-+	return 0;
-+}
-+
-+#endif
-+
-+/**
-+ * task_prio - return the priority value of a given task.
-+ * @p: the task in question.
-+ *
-+ * Return: The priority value as seen by users in /proc.
-+ * RT tasks are offset by -100. Normal tasks are centered around 1, value goes
-+ * from 0(SCHED_ISO) up to 82 (nice +19 SCHED_IDLE).
-+ */
-+int task_prio(const struct task_struct *p)
-+{
-+	if (p->prio < MAX_RT_PRIO)
-+		return (p->prio - MAX_RT_PRIO);
-+	return (p->prio - MAX_RT_PRIO + p->boost_prio);
-+}
-+
-+/**
-+ * idle_cpu - is a given CPU idle currently?
-+ * @cpu: the processor in question.
-+ *
-+ * Return: 1 if the CPU is currently idle. 0 otherwise.
-+ */
-+int idle_cpu(int cpu)
-+{
-+	return cpu_curr(cpu) == cpu_rq(cpu)->idle;
-+}
-+
-+/**
-+ * idle_task - return the idle task for a given CPU.
-+ * @cpu: the processor in question.
-+ *
-+ * Return: The idle task for the cpu @cpu.
-+ */
-+struct task_struct *idle_task(int cpu)
-+{
-+	return cpu_rq(cpu)->idle;
-+}
-+
-+/**
-+ * find_process_by_pid - find a process with a matching PID value.
-+ * @pid: the pid in question.
-+ *
-+ * The task of @pid, if found. %NULL otherwise.
-+ */
-+static inline struct task_struct *find_process_by_pid(pid_t pid)
-+{
-+	return pid ? find_task_by_vpid(pid) : current;
-+}
-+
-+#ifdef CONFIG_SMP
-+void sched_set_stop_task(int cpu, struct task_struct *stop)
-+{
-+	struct sched_param stop_param = { .sched_priority = STOP_PRIO };
-+	struct sched_param start_param = { .sched_priority = 0 };
-+	struct task_struct *old_stop = cpu_rq(cpu)->stop;
-+
-+	if (stop) {
-+		/*
-+		 * Make it appear like a SCHED_FIFO task, its something
-+		 * userspace knows about and won't get confused about.
-+		 *
-+		 * Also, it will make PI more or less work without too
-+		 * much confusion -- but then, stop work should not
-+		 * rely on PI working anyway.
-+		 */
-+		sched_setscheduler_nocheck(stop, SCHED_FIFO, &stop_param);
-+	}
-+
-+	cpu_rq(cpu)->stop = stop;
-+
-+	if (old_stop) {
-+		/*
-+		 * Reset it back to a normal scheduling policy so that
-+		 * it can die in pieces.
-+		 */
-+		sched_setscheduler_nocheck(old_stop, SCHED_NORMAL, &start_param);
-+	}
-+}
-+
-+/*
-+ * Change a given task's CPU affinity. Migrate the thread to a
-+ * proper CPU and schedule it away if the CPU it's executing on
-+ * is removed from the allowed bitmask.
-+ *
-+ * NOTE: the caller must have a valid reference to the task, the
-+ * task must not exit() & deallocate itself prematurely. The
-+ * call is not atomic; no spinlocks may be held.
-+ */
-+static int __set_cpus_allowed_ptr(struct task_struct *p,
-+				  const struct cpumask *new_mask, bool check)
-+{
-+	const struct cpumask *cpu_valid_mask = cpu_active_mask;
-+	int dest_cpu;
-+	unsigned long flags;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+	int ret = 0;
-+
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	rq = __task_access_lock(p, &lock);
-+
-+	if (p->flags & PF_KTHREAD) {
-+		/*
-+		 * Kernel threads are allowed on online && !active CPUs
-+		 */
-+		cpu_valid_mask = cpu_online_mask;
-+	}
-+
-+	/*
-+	 * Must re-check here, to close a race against __kthread_bind(),
-+	 * sched_setaffinity() is not guaranteed to observe the flag.
-+	 */
-+	if (check && (p->flags & PF_NO_SETAFFINITY)) {
-+		ret = -EINVAL;
-+		goto out;
-+	}
-+
-+	if (cpumask_equal(p->cpus_ptr, new_mask))
-+		goto out;
-+
-+	dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
-+	if (dest_cpu >= nr_cpu_ids) {
-+		ret = -EINVAL;
-+		goto out;
-+	}
-+
-+	do_set_cpus_allowed(p, new_mask);
-+
-+	if (p->flags & PF_KTHREAD) {
-+		/*
-+		 * For kernel threads that do indeed end up on online &&
-+		 * !active we want to ensure they are strict per-CPU threads.
-+		 */
-+		WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) &&
-+			!cpumask_intersects(new_mask, cpu_active_mask) &&
-+			p->nr_cpus_allowed != 1);
-+	}
-+
-+	/* Can the task run on the task's current CPU? If so, we're done */
-+	if (cpumask_test_cpu(task_cpu(p), new_mask))
-+		goto out;
-+
-+	if (task_running(p) || p->state == TASK_WAKING) {
-+		struct migration_arg arg = { p, dest_cpu };
-+
-+		/* Need help from migration thread: drop lock and wait. */
-+		__task_access_unlock(p, lock);
-+		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+		stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
-+		return 0;
-+	}
-+	if (task_on_rq_queued(p)) {
-+		/*
-+		 * OK, since we're going to drop the lock immediately
-+		 * afterwards anyway.
-+		 */
-+		update_rq_clock(rq);
-+		rq = move_queued_task(rq, p, dest_cpu);
-+		lock = &rq->lock;
-+	}
-+
-+out:
-+	__task_access_unlock(p, lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+
-+	return ret;
-+}
-+
-+int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	return __set_cpus_allowed_ptr(p, new_mask, false);
-+}
-+EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
-+
-+#else
-+static inline int
-+__set_cpus_allowed_ptr(struct task_struct *p,
-+		       const struct cpumask *new_mask, bool check)
-+{
-+	return set_cpus_allowed_ptr(p, new_mask);
-+}
-+#endif
-+
-+/*
-+ * sched_setparam() passes in -1 for its policy, to let the functions
-+ * it calls know not to change it.
-+ */
-+#define SETPARAM_POLICY -1
-+
-+static void __setscheduler_params(struct task_struct *p,
-+		const struct sched_attr *attr)
-+{
-+	int policy = attr->sched_policy;
-+
-+	if (policy == SETPARAM_POLICY)
-+		policy = p->policy;
-+
-+	p->policy = policy;
-+
-+	/*
-+	 * allow normal nice value to be set, but will not have any
-+	 * effect on scheduling until the task not SCHED_NORMAL/
-+	 * SCHED_BATCH
-+	 */
-+	p->static_prio = NICE_TO_PRIO(attr->sched_nice);
-+
-+	/*
-+	 * __sched_setscheduler() ensures attr->sched_priority == 0 when
-+	 * !rt_policy. Always setting this ensures that things like
-+	 * getparam()/getattr() don't report silly values for !rt tasks.
-+	 */
-+	p->rt_priority = attr->sched_priority;
-+	p->normal_prio = normal_prio(p);
-+}
-+
-+/* Actually do priority change: must hold rq lock. */
-+static void __setscheduler(struct rq *rq, struct task_struct *p,
-+			   const struct sched_attr *attr, bool keep_boost)
-+{
-+	__setscheduler_params(p, attr);
-+
-+	/*
-+	 * Keep a potential priority boosting if called from
-+	 * sched_setscheduler().
-+	 */
-+	p->prio = normal_prio(p);
-+	if (keep_boost)
-+		p->prio = rt_effective_prio(p, p->prio);
-+}
-+
-+/*
-+ * check the target process has a UID that matches the current process's
-+ */
-+static bool check_same_owner(struct task_struct *p)
-+{
-+	const struct cred *cred = current_cred(), *pcred;
-+	bool match;
-+
-+	rcu_read_lock();
-+	pcred = __task_cred(p);
-+	match = (uid_eq(cred->euid, pcred->euid) ||
-+		 uid_eq(cred->euid, pcred->uid));
-+	rcu_read_unlock();
-+	return match;
-+}
-+
-+static int __sched_setscheduler(struct task_struct *p,
-+				const struct sched_attr *attr,
-+				bool user, bool pi)
-+{
-+	const struct sched_attr dl_squash_attr = {
-+		.size		= sizeof(struct sched_attr),
-+		.sched_policy	= SCHED_FIFO,
-+		.sched_nice	= 0,
-+		.sched_priority = 99,
-+	};
-+	int newprio = MAX_RT_PRIO - 1 - attr->sched_priority;
-+	int retval, oldpolicy = -1;
-+	int policy = attr->sched_policy;
-+	unsigned long flags;
-+	struct rq *rq;
-+	int reset_on_fork;
-+	raw_spinlock_t *lock;
-+
-+	/* The pi code expects interrupts enabled */
-+	BUG_ON(pi && in_interrupt());
-+
-+	/*
-+	 * BMQ supports SCHED_DEADLINE by squash it as prio 0 SCHED_FIFO
-+	 */
-+	if (unlikely(SCHED_DEADLINE == policy)) {
-+		attr = &dl_squash_attr;
-+		policy = attr->sched_policy;
-+		newprio = MAX_RT_PRIO - 1 - attr->sched_priority;
-+	}
-+recheck:
-+	/* Double check policy once rq lock held */
-+	if (policy < 0) {
-+		reset_on_fork = p->sched_reset_on_fork;
-+		policy = oldpolicy = p->policy;
-+	} else {
-+		reset_on_fork = !!(attr->sched_flags & SCHED_RESET_ON_FORK);
-+
-+		if (policy > SCHED_IDLE)
-+			return -EINVAL;
-+	}
-+
-+	if (attr->sched_flags & ~(SCHED_FLAG_ALL))
-+		return -EINVAL;
-+
-+	/*
-+	 * Valid priorities for SCHED_FIFO and SCHED_RR are
-+	 * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL and
-+	 * SCHED_BATCH and SCHED_IDLE is 0.
-+	 */
-+	if (attr->sched_priority < 0 ||
-+	    (p->mm && attr->sched_priority > MAX_USER_RT_PRIO - 1) ||
-+	    (!p->mm && attr->sched_priority > MAX_RT_PRIO - 1))
-+		return -EINVAL;
-+	if ((SCHED_RR == policy || SCHED_FIFO == policy) !=
-+	    (attr->sched_priority != 0))
-+		return -EINVAL;
-+
-+	/*
-+	 * Allow unprivileged RT tasks to decrease priority:
-+	 */
-+	if (user && !capable(CAP_SYS_NICE)) {
-+		if (SCHED_FIFO == policy || SCHED_RR == policy) {
-+			unsigned long rlim_rtprio =
-+					task_rlimit(p, RLIMIT_RTPRIO);
-+
-+			/* Can't set/change the rt policy */
-+			if (policy != p->policy && !rlim_rtprio)
-+				return -EPERM;
-+
-+			/* Can't increase priority */
-+			if (attr->sched_priority > p->rt_priority &&
-+			    attr->sched_priority > rlim_rtprio)
-+				return -EPERM;
-+		}
-+
-+		/* Can't change other user's priorities */
-+		if (!check_same_owner(p))
-+			return -EPERM;
-+
-+		/* Normal users shall not reset the sched_reset_on_fork flag */
-+		if (p->sched_reset_on_fork && !reset_on_fork)
-+			return -EPERM;
-+	}
-+
-+	if (user) {
-+		retval = security_task_setscheduler(p);
-+		if (retval)
-+			return retval;
-+	}
-+
-+	if (pi)
-+		cpuset_read_lock();
-+
-+	/*
-+	 * Make sure no PI-waiters arrive (or leave) while we are
-+	 * changing the priority of the task:
-+	 */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+
-+	/*
-+	 * To be able to change p->policy safely, task_access_lock()
-+	 * must be called.
-+	 * IF use task_access_lock() here:
-+	 * For the task p which is not running, reading rq->stop is
-+	 * racy but acceptable as ->stop doesn't change much.
-+	 * An enhancemnet can be made to read rq->stop saftly.
-+	 */
-+	rq = __task_access_lock(p, &lock);
-+
-+	/*
-+	 * Changing the policy of the stop threads its a very bad idea
-+	 */
-+	if (p == rq->stop) {
-+		retval = -EINVAL;
-+		goto unlock;
-+	}
-+
-+	/*
-+	 * If not changing anything there's no need to proceed further:
-+	 */
-+	if (unlikely(policy == p->policy)) {
-+		if (rt_policy(policy) && attr->sched_priority != p->rt_priority)
-+			goto change;
-+		if (!rt_policy(policy) &&
-+		    NICE_TO_PRIO(attr->sched_nice) != p->static_prio)
-+			goto change;
-+
-+		p->sched_reset_on_fork = reset_on_fork;
-+		retval = 0;
-+		goto unlock;
-+	}
-+change:
-+
-+	/* Re-check policy now with rq lock held */
-+	if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
-+		policy = oldpolicy = -1;
-+		__task_access_unlock(p, lock);
-+		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+		if (pi)
-+			cpuset_read_unlock();
-+		goto recheck;
-+	}
-+
-+	p->sched_reset_on_fork = reset_on_fork;
-+
-+	if (pi) {
-+		/*
-+		 * Take priority boosted tasks into account. If the new
-+		 * effective priority is unchanged, we just store the new
-+		 * normal parameters and do not touch the scheduler class and
-+		 * the runqueue. This will be done when the task deboost
-+		 * itself.
-+		 */
-+		if (rt_effective_prio(p, newprio) == p->prio) {
-+			__setscheduler_params(p, attr);
-+			retval = 0;
-+			goto unlock;
-+		}
-+	}
-+
-+	__setscheduler(rq, p, attr, pi);
-+
-+	check_task_changed(rq, p);
-+
-+	/* Avoid rq from going away on us: */
-+	preempt_disable();
-+	__task_access_unlock(p, lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+
-+	if (pi) {
-+		cpuset_read_unlock();
-+		rt_mutex_adjust_pi(p);
-+	}
-+
-+	preempt_enable();
-+
-+	return 0;
-+
-+unlock:
-+	__task_access_unlock(p, lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+	if (pi)
-+		cpuset_read_unlock();
-+	return retval;
-+}
-+
-+static int _sched_setscheduler(struct task_struct *p, int policy,
-+			       const struct sched_param *param, bool check)
-+{
-+	struct sched_attr attr = {
-+		.sched_policy   = policy,
-+		.sched_priority = param->sched_priority,
-+		.sched_nice     = PRIO_TO_NICE(p->static_prio),
-+	};
-+
-+	/* Fixup the legacy SCHED_RESET_ON_FORK hack. */
-+	if ((policy != SETPARAM_POLICY) && (policy & SCHED_RESET_ON_FORK)) {
-+		attr.sched_flags |= SCHED_FLAG_RESET_ON_FORK;
-+		policy &= ~SCHED_RESET_ON_FORK;
-+		attr.sched_policy = policy;
-+	}
-+
-+	return __sched_setscheduler(p, &attr, check, true);
-+}
-+
-+/**
-+ * sched_setscheduler - change the scheduling policy and/or RT priority of a thread.
-+ * @p: the task in question.
-+ * @policy: new policy.
-+ * @param: structure containing the new RT priority.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ *
-+ * NOTE that the task may be already dead.
-+ */
-+int sched_setscheduler(struct task_struct *p, int policy,
-+		       const struct sched_param *param)
-+{
-+	return _sched_setscheduler(p, policy, param, true);
-+}
-+
-+EXPORT_SYMBOL_GPL(sched_setscheduler);
-+
-+int sched_setattr(struct task_struct *p, const struct sched_attr *attr)
-+{
-+	return __sched_setscheduler(p, attr, true, true);
-+}
-+EXPORT_SYMBOL_GPL(sched_setattr);
-+
-+int sched_setattr_nocheck(struct task_struct *p, const struct sched_attr *attr)
-+{
-+	return __sched_setscheduler(p, attr, false, true);
-+}
-+
-+/**
-+ * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace.
-+ * @p: the task in question.
-+ * @policy: new policy.
-+ * @param: structure containing the new RT priority.
-+ *
-+ * Just like sched_setscheduler, only don't bother checking if the
-+ * current context has permission.  For example, this is needed in
-+ * stop_machine(): we create temporary high priority worker threads,
-+ * but our caller might not have that capability.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+int sched_setscheduler_nocheck(struct task_struct *p, int policy,
-+			       const struct sched_param *param)
-+{
-+	return _sched_setscheduler(p, policy, param, false);
-+}
-+EXPORT_SYMBOL_GPL(sched_setscheduler_nocheck);
-+
-+static int
-+do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
-+{
-+	struct sched_param lparam;
-+	struct task_struct *p;
-+	int retval;
-+
-+	if (!param || pid < 0)
-+		return -EINVAL;
-+	if (copy_from_user(&lparam, param, sizeof(struct sched_param)))
-+		return -EFAULT;
-+
-+	rcu_read_lock();
-+	retval = -ESRCH;
-+	p = find_process_by_pid(pid);
-+	if (likely(p))
-+		get_task_struct(p);
-+	rcu_read_unlock();
-+
-+	if (likely(p)) {
-+		retval = sched_setscheduler(p, policy, &lparam);
-+		put_task_struct(p);
-+	}
-+
-+	return retval;
-+}
-+
-+/*
-+ * Mimics kernel/events/core.c perf_copy_attr().
-+ */
-+static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *attr)
-+{
-+	u32 size;
-+	int ret;
-+
-+	/* Zero the full structure, so that a short copy will be nice: */
-+	memset(attr, 0, sizeof(*attr));
-+
-+	ret = get_user(size, &uattr->size);
-+	if (ret)
-+		return ret;
-+
-+	/* ABI compatibility quirk: */
-+	if (!size)
-+		size = SCHED_ATTR_SIZE_VER0;
-+
-+	if (size < SCHED_ATTR_SIZE_VER0 || size > PAGE_SIZE)
-+		goto err_size;
-+
-+	ret = copy_struct_from_user(attr, sizeof(*attr), uattr, size);
-+	if (ret) {
-+		if (ret == -E2BIG)
-+			goto err_size;
-+		return ret;
-+	}
-+
-+	/*
-+	 * XXX: Do we want to be lenient like existing syscalls; or do we want
-+	 * to be strict and return an error on out-of-bounds values?
-+	 */
-+	attr->sched_nice = clamp(attr->sched_nice, -20, 19);
-+
-+	/* sched/core.c uses zero here but we already know ret is zero */
-+	return 0;
-+
-+err_size:
-+	put_user(sizeof(*attr), &uattr->size);
-+	return -E2BIG;
-+}
-+
-+/**
-+ * sys_sched_setscheduler - set/change the scheduler policy and RT priority
-+ * @pid: the pid in question.
-+ * @policy: new policy.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ * @param: structure containing the new RT priority.
-+ */
-+SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy, struct sched_param __user *, param)
-+{
-+	if (policy < 0)
-+		return -EINVAL;
-+
-+	return do_sched_setscheduler(pid, policy, param);
-+}
-+
-+/**
-+ * sys_sched_setparam - set/change the RT priority of a thread
-+ * @pid: the pid in question.
-+ * @param: structure containing the new RT priority.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
-+{
-+	return do_sched_setscheduler(pid, SETPARAM_POLICY, param);
-+}
-+
-+/**
-+ * sys_sched_setattr - same as above, but with extended sched_attr
-+ * @pid: the pid in question.
-+ * @uattr: structure containing the extended parameters.
-+ */
-+SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr,
-+			       unsigned int, flags)
-+{
-+	struct sched_attr attr;
-+	struct task_struct *p;
-+	int retval;
-+
-+	if (!uattr || pid < 0 || flags)
-+		return -EINVAL;
-+
-+	retval = sched_copy_attr(uattr, &attr);
-+	if (retval)
-+		return retval;
-+
-+	if ((int)attr.sched_policy < 0)
-+		return -EINVAL;
-+
-+	rcu_read_lock();
-+	retval = -ESRCH;
-+	p = find_process_by_pid(pid);
-+	if (p != NULL)
-+		retval = sched_setattr(p, &attr);
-+	rcu_read_unlock();
-+
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_getscheduler - get the policy (scheduling class) of a thread
-+ * @pid: the pid in question.
-+ *
-+ * Return: On success, the policy of the thread. Otherwise, a negative error
-+ * code.
-+ */
-+SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
-+{
-+	struct task_struct *p;
-+	int retval = -EINVAL;
-+
-+	if (pid < 0)
-+		goto out_nounlock;
-+
-+	retval = -ESRCH;
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	if (p) {
-+		retval = security_task_getscheduler(p);
-+		if (!retval)
-+			retval = p->policy;
-+	}
-+	rcu_read_unlock();
-+
-+out_nounlock:
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_getscheduler - get the RT priority of a thread
-+ * @pid: the pid in question.
-+ * @param: structure containing the RT priority.
-+ *
-+ * Return: On success, 0 and the RT priority is in @param. Otherwise, an error
-+ * code.
-+ */
-+SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
-+{
-+	struct sched_param lp = { .sched_priority = 0 };
-+	struct task_struct *p;
-+	int retval = -EINVAL;
-+
-+	if (!param || pid < 0)
-+		goto out_nounlock;
-+
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	retval = -ESRCH;
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	if (task_has_rt_policy(p))
-+		lp.sched_priority = p->rt_priority;
-+	rcu_read_unlock();
-+
-+	/*
-+	 * This one might sleep, we cannot do it with a spinlock held ...
-+	 */
-+	retval = copy_to_user(param, &lp, sizeof(*param)) ? -EFAULT : 0;
-+
-+out_nounlock:
-+	return retval;
-+
-+out_unlock:
-+	rcu_read_unlock();
-+	return retval;
-+}
-+
-+/*
-+ * Copy the kernel size attribute structure (which might be larger
-+ * than what user-space knows about) to user-space.
-+ *
-+ * Note that all cases are valid: user-space buffer can be larger or
-+ * smaller than the kernel-space buffer. The usual case is that both
-+ * have the same size.
-+ */
-+static int
-+sched_attr_copy_to_user(struct sched_attr __user *uattr,
-+			struct sched_attr *kattr,
-+			unsigned int usize)
-+{
-+	unsigned int ksize = sizeof(*kattr);
-+
-+	if (!access_ok(uattr, usize))
-+		return -EFAULT;
-+
-+	/*
-+	 * sched_getattr() ABI forwards and backwards compatibility:
-+	 *
-+	 * If usize == ksize then we just copy everything to user-space and all is good.
-+	 *
-+	 * If usize < ksize then we only copy as much as user-space has space for,
-+	 * this keeps ABI compatibility as well. We skip the rest.
-+	 *
-+	 * If usize > ksize then user-space is using a newer version of the ABI,
-+	 * which part the kernel doesn't know about. Just ignore it - tooling can
-+	 * detect the kernel's knowledge of attributes from the attr->size value
-+	 * which is set to ksize in this case.
-+	 */
-+	kattr->size = min(usize, ksize);
-+
-+	if (copy_to_user(uattr, kattr, kattr->size))
-+		return -EFAULT;
-+
-+	return 0;
-+}
-+
-+/**
-+ * sys_sched_getattr - similar to sched_getparam, but with sched_attr
-+ * @pid: the pid in question.
-+ * @uattr: structure containing the extended parameters.
-+ * @usize: sizeof(attr) for fwd/bwd comp.
-+ * @flags: for future extension.
-+ */
-+SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
-+		unsigned int, usize, unsigned int, flags)
-+{
-+	struct sched_attr kattr = { };
-+	struct task_struct *p;
-+	int retval;
-+
-+	if (!uattr || pid < 0 || usize > PAGE_SIZE ||
-+	    usize < SCHED_ATTR_SIZE_VER0 || flags)
-+		return -EINVAL;
-+
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	retval = -ESRCH;
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	kattr.sched_policy = p->policy;
-+	if (p->sched_reset_on_fork)
-+		kattr.sched_flags |= SCHED_FLAG_RESET_ON_FORK;
-+	if (task_has_rt_policy(p))
-+		kattr.sched_priority = p->rt_priority;
-+	else
-+		kattr.sched_nice = task_nice(p);
-+
-+#ifdef CONFIG_UCLAMP_TASK
-+	kattr.sched_util_min = p->uclamp_req[UCLAMP_MIN].value;
-+	kattr.sched_util_max = p->uclamp_req[UCLAMP_MAX].value;
-+#endif
-+
-+	rcu_read_unlock();
-+
-+	return sched_attr_copy_to_user(uattr, &kattr, usize);
-+
-+out_unlock:
-+	rcu_read_unlock();
-+	return retval;
-+}
-+
-+long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
-+{
-+	cpumask_var_t cpus_allowed, new_mask;
-+	struct task_struct *p;
-+	int retval;
-+
-+	get_online_cpus();
-+	rcu_read_lock();
-+
-+	p = find_process_by_pid(pid);
-+	if (!p) {
-+		rcu_read_unlock();
-+		put_online_cpus();
-+		return -ESRCH;
-+	}
-+
-+	/* Prevent p going away */
-+	get_task_struct(p);
-+	rcu_read_unlock();
-+
-+	if (p->flags & PF_NO_SETAFFINITY) {
-+		retval = -EINVAL;
-+		goto out_put_task;
-+	}
-+	if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
-+		retval = -ENOMEM;
-+		goto out_put_task;
-+	}
-+	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) {
-+		retval = -ENOMEM;
-+		goto out_free_cpus_allowed;
-+	}
-+	retval = -EPERM;
-+	if (!check_same_owner(p)) {
-+		rcu_read_lock();
-+		if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
-+			rcu_read_unlock();
-+			goto out_unlock;
-+		}
-+		rcu_read_unlock();
-+	}
-+
-+	retval = security_task_setscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	cpuset_cpus_allowed(p, cpus_allowed);
-+	cpumask_and(new_mask, in_mask, cpus_allowed);
-+again:
-+	retval = __set_cpus_allowed_ptr(p, new_mask, true);
-+
-+	if (!retval) {
-+		cpuset_cpus_allowed(p, cpus_allowed);
-+		if (!cpumask_subset(new_mask, cpus_allowed)) {
-+			/*
-+			 * We must have raced with a concurrent cpuset
-+			 * update. Just reset the cpus_allowed to the
-+			 * cpuset's cpus_allowed
-+			 */
-+			cpumask_copy(new_mask, cpus_allowed);
-+			goto again;
-+		}
-+	}
-+out_unlock:
-+	free_cpumask_var(new_mask);
-+out_free_cpus_allowed:
-+	free_cpumask_var(cpus_allowed);
-+out_put_task:
-+	put_task_struct(p);
-+	put_online_cpus();
-+	return retval;
-+}
-+
-+static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
-+			     struct cpumask *new_mask)
-+{
-+	if (len < cpumask_size())
-+		cpumask_clear(new_mask);
-+	else if (len > cpumask_size())
-+		len = cpumask_size();
-+
-+	return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0;
-+}
-+
-+/**
-+ * sys_sched_setaffinity - set the CPU affinity of a process
-+ * @pid: pid of the process
-+ * @len: length in bytes of the bitmask pointed to by user_mask_ptr
-+ * @user_mask_ptr: user-space pointer to the new CPU mask
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
-+		unsigned long __user *, user_mask_ptr)
-+{
-+	cpumask_var_t new_mask;
-+	int retval;
-+
-+	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
-+		return -ENOMEM;
-+
-+	retval = get_user_cpu_mask(user_mask_ptr, len, new_mask);
-+	if (retval == 0)
-+		retval = sched_setaffinity(pid, new_mask);
-+	free_cpumask_var(new_mask);
-+	return retval;
-+}
-+
-+long sched_getaffinity(pid_t pid, cpumask_t *mask)
-+{
-+	struct task_struct *p;
-+	raw_spinlock_t *lock;
-+	unsigned long flags;
-+	int retval;
-+
-+	rcu_read_lock();
-+
-+	retval = -ESRCH;
-+	p = find_process_by_pid(pid);
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	task_access_lock_irqsave(p, &lock, &flags);
-+	cpumask_and(mask, &p->cpus_mask, cpu_active_mask);
-+	task_access_unlock_irqrestore(p, lock, &flags);
-+
-+out_unlock:
-+	rcu_read_unlock();
-+
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_getaffinity - get the CPU affinity of a process
-+ * @pid: pid of the process
-+ * @len: length in bytes of the bitmask pointed to by user_mask_ptr
-+ * @user_mask_ptr: user-space pointer to hold the current CPU mask
-+ *
-+ * Return: size of CPU mask copied to user_mask_ptr on success. An
-+ * error code otherwise.
-+ */
-+SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
-+		unsigned long __user *, user_mask_ptr)
-+{
-+	int ret;
-+	cpumask_var_t mask;
-+
-+	if ((len * BITS_PER_BYTE) < nr_cpu_ids)
-+		return -EINVAL;
-+	if (len & (sizeof(unsigned long)-1))
-+		return -EINVAL;
-+
-+	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
-+		return -ENOMEM;
-+
-+	ret = sched_getaffinity(pid, mask);
-+	if (ret == 0) {
-+		unsigned int retlen = min_t(size_t, len, cpumask_size());
-+
-+		if (copy_to_user(user_mask_ptr, mask, retlen))
-+			ret = -EFAULT;
-+		else
-+			ret = retlen;
-+	}
-+	free_cpumask_var(mask);
-+
-+	return ret;
-+}
-+
-+/**
-+ * sys_sched_yield - yield the current processor to other threads.
-+ *
-+ * This function yields the current CPU to other tasks. It does this by
-+ * scheduling away the current task. If it still has the earliest deadline
-+ * it will be scheduled again as the next task.
-+ *
-+ * Return: 0.
-+ */
-+static void do_sched_yield(void)
-+{
-+	struct rq *rq;
-+	struct rq_flags rf;
-+
-+	if (!sched_yield_type)
-+		return;
-+
-+	rq = this_rq_lock_irq(&rf);
-+
-+	schedstat_inc(rq->yld_count);
-+
-+	if (1 == sched_yield_type) {
-+		if (!rt_task(current)) {
-+			current->boost_prio = MAX_PRIORITY_ADJ;
-+			requeue_task(current, rq);
-+		}
-+	} else if (2 == sched_yield_type) {
-+		if (rq->nr_running > 1)
-+			rq->skip = current;
-+	}
-+
-+	/*
-+	 * Since we are going to call schedule() anyway, there's
-+	 * no need to preempt or enable interrupts:
-+	 */
-+	preempt_disable();
-+	raw_spin_unlock(&rq->lock);
-+	sched_preempt_enable_no_resched();
-+
-+	schedule();
-+}
-+
-+SYSCALL_DEFINE0(sched_yield)
-+{
-+	do_sched_yield();
-+	return 0;
-+}
-+
-+#ifndef CONFIG_PREEMPTION
-+int __sched _cond_resched(void)
-+{
-+	if (should_resched(0)) {
-+		preempt_schedule_common();
-+		return 1;
-+	}
-+	rcu_all_qs();
-+	return 0;
-+}
-+EXPORT_SYMBOL(_cond_resched);
-+#endif
-+
-+/*
-+ * __cond_resched_lock() - if a reschedule is pending, drop the given lock,
-+ * call schedule, and on return reacquire the lock.
-+ *
-+ * This works OK both with and without CONFIG_PREEMPTION.  We do strange low-level
-+ * operations here to prevent schedule() from being called twice (once via
-+ * spin_unlock(), once by hand).
-+ */
-+int __cond_resched_lock(spinlock_t *lock)
-+{
-+	int resched = should_resched(PREEMPT_LOCK_OFFSET);
-+	int ret = 0;
-+
-+	lockdep_assert_held(lock);
-+
-+	if (spin_needbreak(lock) || resched) {
-+		spin_unlock(lock);
-+		if (resched)
-+			preempt_schedule_common();
-+		else
-+			cpu_relax();
-+		ret = 1;
-+		spin_lock(lock);
-+	}
-+	return ret;
-+}
-+EXPORT_SYMBOL(__cond_resched_lock);
-+
-+/**
-+ * yield - yield the current processor to other threads.
-+ *
-+ * Do not ever use this function, there's a 99% chance you're doing it wrong.
-+ *
-+ * The scheduler is at all times free to pick the calling task as the most
-+ * eligible task to run, if removing the yield() call from your code breaks
-+ * it, its already broken.
-+ *
-+ * Typical broken usage is:
-+ *
-+ * while (!event)
-+ * 	yield();
-+ *
-+ * where one assumes that yield() will let 'the other' process run that will
-+ * make event true. If the current task is a SCHED_FIFO task that will never
-+ * happen. Never use yield() as a progress guarantee!!
-+ *
-+ * If you want to use yield() to wait for something, use wait_event().
-+ * If you want to use yield() to be 'nice' for others, use cond_resched().
-+ * If you still want to use yield(), do not!
-+ */
-+void __sched yield(void)
-+{
-+	set_current_state(TASK_RUNNING);
-+	do_sched_yield();
-+}
-+EXPORT_SYMBOL(yield);
-+
-+/**
-+ * yield_to - yield the current processor to another thread in
-+ * your thread group, or accelerate that thread toward the
-+ * processor it's on.
-+ * @p: target task
-+ * @preempt: whether task preemption is allowed or not
-+ *
-+ * It's the caller's job to ensure that the target task struct
-+ * can't go away on us before we can do any checks.
-+ *
-+ * In BMQ, yield_to is not supported.
-+ *
-+ * Return:
-+ *	true (>0) if we indeed boosted the target task.
-+ *	false (0) if we failed to boost the target.
-+ *	-ESRCH if there's no task to yield to.
-+ */
-+int __sched yield_to(struct task_struct *p, bool preempt)
-+{
-+	return 0;
-+}
-+EXPORT_SYMBOL_GPL(yield_to);
-+
-+int io_schedule_prepare(void)
-+{
-+	int old_iowait = current->in_iowait;
-+
-+	current->in_iowait = 1;
-+	blk_schedule_flush_plug(current);
-+
-+	return old_iowait;
-+}
-+
-+void io_schedule_finish(int token)
-+{
-+	current->in_iowait = token;
-+}
-+
-+/*
-+ * This task is about to go to sleep on IO.  Increment rq->nr_iowait so
-+ * that process accounting knows that this is a task in IO wait state.
-+ *
-+ * But don't do that if it is a deliberate, throttling IO wait (this task
-+ * has set its backing_dev_info: the queue against which it should throttle)
-+ */
-+
-+long __sched io_schedule_timeout(long timeout)
-+{
-+	int token;
-+	long ret;
-+
-+	token = io_schedule_prepare();
-+	ret = schedule_timeout(timeout);
-+	io_schedule_finish(token);
-+
-+	return ret;
-+}
-+EXPORT_SYMBOL(io_schedule_timeout);
-+
-+void __sched io_schedule(void)
-+{
-+	int token;
-+
-+	token = io_schedule_prepare();
-+	schedule();
-+	io_schedule_finish(token);
-+}
-+EXPORT_SYMBOL(io_schedule);
-+
-+/**
-+ * sys_sched_get_priority_max - return maximum RT priority.
-+ * @policy: scheduling class.
-+ *
-+ * Return: On success, this syscall returns the maximum
-+ * rt_priority that can be used by a given scheduling class.
-+ * On failure, a negative error code is returned.
-+ */
-+SYSCALL_DEFINE1(sched_get_priority_max, int, policy)
-+{
-+	int ret = -EINVAL;
-+
-+	switch (policy) {
-+	case SCHED_FIFO:
-+	case SCHED_RR:
-+		ret = MAX_USER_RT_PRIO-1;
-+		break;
-+	case SCHED_NORMAL:
-+	case SCHED_BATCH:
-+	case SCHED_IDLE:
-+		ret = 0;
-+		break;
-+	}
-+	return ret;
-+}
-+
-+/**
-+ * sys_sched_get_priority_min - return minimum RT priority.
-+ * @policy: scheduling class.
-+ *
-+ * Return: On success, this syscall returns the minimum
-+ * rt_priority that can be used by a given scheduling class.
-+ * On failure, a negative error code is returned.
-+ */
-+SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
-+{
-+	int ret = -EINVAL;
-+
-+	switch (policy) {
-+	case SCHED_FIFO:
-+	case SCHED_RR:
-+		ret = 1;
-+		break;
-+	case SCHED_NORMAL:
-+	case SCHED_BATCH:
-+	case SCHED_IDLE:
-+		ret = 0;
-+		break;
-+	}
-+	return ret;
-+}
-+
-+static int sched_rr_get_interval(pid_t pid, struct timespec64 *t)
-+{
-+	struct task_struct *p;
-+	int retval;
-+
-+	if (pid < 0)
-+		return -EINVAL;
-+
-+	retval = -ESRCH;
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+	rcu_read_unlock();
-+
-+	*t = ns_to_timespec64(SCHED_TIMESLICE_NS);
-+	return 0;
-+
-+out_unlock:
-+	rcu_read_unlock();
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_rr_get_interval - return the default timeslice of a process.
-+ * @pid: pid of the process.
-+ * @interval: userspace pointer to the timeslice value.
-+ *
-+ *
-+ * Return: On success, 0 and the timeslice is in @interval. Otherwise,
-+ * an error code.
-+ */
-+SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
-+		struct __kernel_timespec __user *, interval)
-+{
-+	struct timespec64 t;
-+	int retval = sched_rr_get_interval(pid, &t);
-+
-+	if (retval == 0)
-+		retval = put_timespec64(&t, interval);
-+
-+	return retval;
-+}
-+
-+#ifdef CONFIG_COMPAT_32BIT_TIME
-+SYSCALL_DEFINE2(sched_rr_get_interval_time32, pid_t, pid,
-+		struct old_timespec32 __user *, interval)
-+{
-+	struct timespec64 t;
-+	int retval = sched_rr_get_interval(pid, &t);
-+
-+	if (retval == 0)
-+		retval = put_old_timespec32(&t, interval);
-+	return retval;
-+}
-+#endif
-+
-+void sched_show_task(struct task_struct *p)
-+{
-+	unsigned long free = 0;
-+	int ppid;
-+
-+	if (!try_get_task_stack(p))
-+		return;
-+
-+	printk(KERN_INFO "%-15.15s %c", p->comm, task_state_to_char(p));
-+
-+	if (p->state == TASK_RUNNING)
-+		printk(KERN_CONT "  running task    ");
-+#ifdef CONFIG_DEBUG_STACK_USAGE
-+	free = stack_not_used(p);
-+#endif
-+	ppid = 0;
-+	rcu_read_lock();
-+	if (pid_alive(p))
-+		ppid = task_pid_nr(rcu_dereference(p->real_parent));
-+	rcu_read_unlock();
-+	printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free,
-+		task_pid_nr(p), ppid,
-+		(unsigned long)task_thread_info(p)->flags);
-+
-+	print_worker_info(KERN_INFO, p);
-+	show_stack(p, NULL);
-+	put_task_stack(p);
-+}
-+EXPORT_SYMBOL_GPL(sched_show_task);
-+
-+static inline bool
-+state_filter_match(unsigned long state_filter, struct task_struct *p)
-+{
-+	/* no filter, everything matches */
-+	if (!state_filter)
-+		return true;
-+
-+	/* filter, but doesn't match */
-+	if (!(p->state & state_filter))
-+		return false;
-+
-+	/*
-+	 * When looking for TASK_UNINTERRUPTIBLE skip TASK_IDLE (allows
-+	 * TASK_KILLABLE).
-+	 */
-+	if (state_filter == TASK_UNINTERRUPTIBLE && p->state == TASK_IDLE)
-+		return false;
-+
-+	return true;
-+}
-+
-+
-+void show_state_filter(unsigned long state_filter)
-+{
-+	struct task_struct *g, *p;
-+
-+#if BITS_PER_LONG == 32
-+	printk(KERN_INFO
-+		"  task                PC stack   pid father\n");
-+#else
-+	printk(KERN_INFO
-+		"  task                        PC stack   pid father\n");
-+#endif
-+	rcu_read_lock();
-+	for_each_process_thread(g, p) {
-+		/*
-+		 * reset the NMI-timeout, listing all files on a slow
-+		 * console might take a lot of time:
-+		 * Also, reset softlockup watchdogs on all CPUs, because
-+		 * another CPU might be blocked waiting for us to process
-+		 * an IPI.
-+		 */
-+		touch_nmi_watchdog();
-+		touch_all_softlockup_watchdogs();
-+		if (state_filter_match(state_filter, p))
-+			sched_show_task(p);
-+	}
-+
-+#ifdef CONFIG_SCHED_DEBUG
-+	/* TODO: BMQ should support this
-+	if (!state_filter)
-+		sysrq_sched_debug_show();
-+	*/
-+#endif
-+	rcu_read_unlock();
-+	/*
-+	 * Only show locks if all tasks are dumped:
-+	 */
-+	if (!state_filter)
-+		debug_show_all_locks();
-+}
-+
-+void dump_cpu_task(int cpu)
-+{
-+	pr_info("Task dump for CPU %d:\n", cpu);
-+	sched_show_task(cpu_curr(cpu));
-+}
-+
-+/**
-+ * init_idle - set up an idle thread for a given CPU
-+ * @idle: task in question
-+ * @cpu: cpu the idle task belongs to
-+ *
-+ * NOTE: this function does not set the idle thread's NEED_RESCHED
-+ * flag, to make booting more robust.
-+ */
-+void init_idle(struct task_struct *idle, int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	raw_spin_lock_irqsave(&idle->pi_lock, flags);
-+	raw_spin_lock(&rq->lock);
-+	update_rq_clock(rq);
-+
-+	idle->last_ran = rq->clock_task;
-+	idle->state = TASK_RUNNING;
-+	idle->flags |= PF_IDLE;
-+	/* Setting prio to illegal value shouldn't matter when never queued */
-+	idle->prio = MAX_PRIO;
-+
-+	idle->bmq_idx = IDLE_TASK_SCHED_PRIO;
-+	bmq_init_idle(&rq->queue, idle);
-+
-+	kasan_unpoison_task_stack(idle);
-+
-+#ifdef CONFIG_SMP
-+	/*
-+	 * It's possible that init_idle() gets called multiple times on a task,
-+	 * in that case do_set_cpus_allowed() will not do the right thing.
-+	 *
-+	 * And since this is boot we can forgo the serialisation.
-+	 */
-+	set_cpus_allowed_common(idle, cpumask_of(cpu));
-+#endif
-+
-+	/* Silence PROVE_RCU */
-+	rcu_read_lock();
-+	__set_task_cpu(idle, cpu);
-+	rcu_read_unlock();
-+
-+	rq->idle = idle;
-+	rcu_assign_pointer(rq->curr, idle);
-+	idle->on_cpu = 1;
-+
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock_irqrestore(&idle->pi_lock, flags);
-+
-+	/* Set the preempt count _outside_ the spinlocks! */
-+	init_idle_preempt_count(idle, cpu);
-+
-+	ftrace_graph_init_idle_task(idle, cpu);
-+	vtime_init_idle(idle, cpu);
-+#ifdef CONFIG_SMP
-+	sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
-+#endif
-+}
-+
-+static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task)
-+{
-+	struct wake_q_node *node = &task->wake_q;
-+
-+	/*
-+	 * Atomically grab the task, if ->wake_q is !nil already it means
-+	 * its already queued (either by us or someone else) and will get the
-+	 * wakeup due to that.
-+	 *
-+	 * In order to ensure that a pending wakeup will observe our pending
-+	 * state, even in the failed case, an explicit smp_mb() must be used.
-+	 */
-+	smp_mb__before_atomic();
-+	if (unlikely(cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL)))
-+		return false;
-+
-+	/*
-+	 * The head is context local, there can be no concurrency.
-+	 */
-+	*head->lastp = node;
-+	head->lastp = &node->next;
-+	return true;
-+}
-+
-+/**
-+ * wake_q_add() - queue a wakeup for 'later' waking.
-+ * @head: the wake_q_head to add @task to
-+ * @task: the task to queue for 'later' wakeup
-+ *
-+ * Queue a task for later wakeup, most likely by the wake_up_q() call in the
-+ * same context, _HOWEVER_ this is not guaranteed, the wakeup can come
-+ * instantly.
-+ *
-+ * This function must be used as-if it were wake_up_process(); IOW the task
-+ * must be ready to be woken at this location.
-+ */
-+void wake_q_add(struct wake_q_head *head, struct task_struct *task)
-+{
-+	if (__wake_q_add(head, task))
-+		get_task_struct(task);
-+}
-+
-+/**
-+ * wake_q_add_safe() - safely queue a wakeup for 'later' waking.
-+ * @head: the wake_q_head to add @task to
-+ * @task: the task to queue for 'later' wakeup
-+ *
-+ * Queue a task for later wakeup, most likely by the wake_up_q() call in the
-+ * same context, _HOWEVER_ this is not guaranteed, the wakeup can come
-+ * instantly.
-+ *
-+ * This function must be used as-if it were wake_up_process(); IOW the task
-+ * must be ready to be woken at this location.
-+ *
-+ * This function is essentially a task-safe equivalent to wake_q_add(). Callers
-+ * that already hold reference to @task can call the 'safe' version and trust
-+ * wake_q to do the right thing depending whether or not the @task is already
-+ * queued for wakeup.
-+ */
-+void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task)
-+{
-+	if (!__wake_q_add(head, task))
-+		put_task_struct(task);
-+}
-+
-+void wake_up_q(struct wake_q_head *head)
-+{
-+	struct wake_q_node *node = head->first;
-+
-+	while (node != WAKE_Q_TAIL) {
-+		struct task_struct *task;
-+
-+		task = container_of(node, struct task_struct, wake_q);
-+		BUG_ON(!task);
-+		/* task can safely be re-inserted now: */
-+		node = node->next;
-+		task->wake_q.next = NULL;
-+
-+		/*
-+		 * wake_up_process() executes a full barrier, which pairs with
-+		 * the queueing in wake_q_add() so as not to miss wakeups.
-+		 */
-+		wake_up_process(task);
-+		put_task_struct(task);
-+	}
-+}
-+
-+#ifdef CONFIG_SMP
-+
-+int cpuset_cpumask_can_shrink(const struct cpumask __maybe_unused *cur,
-+			      const struct cpumask __maybe_unused *trial)
-+{
-+	return 1;
-+}
-+
-+int task_can_attach(struct task_struct *p,
-+		    const struct cpumask *cs_cpus_allowed)
-+{
-+	int ret = 0;
-+
-+	/*
-+	 * Kthreads which disallow setaffinity shouldn't be moved
-+	 * to a new cpuset; we don't want to change their CPU
-+	 * affinity and isolating such threads by their set of
-+	 * allowed nodes is unnecessary.  Thus, cpusets are not
-+	 * applicable for such threads.  This prevents checking for
-+	 * success of set_cpus_allowed_ptr() on all attached tasks
-+	 * before cpus_mask may be changed.
-+	 */
-+	if (p->flags & PF_NO_SETAFFINITY)
-+		ret = -EINVAL;
-+
-+	return ret;
-+}
-+
-+static bool sched_smp_initialized __read_mostly;
-+
-+#ifdef CONFIG_NO_HZ_COMMON
-+void nohz_balance_enter_idle(int cpu)
-+{
-+}
-+
-+void select_nohz_load_balancer(int stop_tick)
-+{
-+}
-+
-+void set_cpu_sd_state_idle(void) {}
-+
-+/*
-+ * In the semi idle case, use the nearest busy CPU for migrating timers
-+ * from an idle CPU.  This is good for power-savings.
-+ *
-+ * We don't do similar optimization for completely idle system, as
-+ * selecting an idle CPU will add more delays to the timers than intended
-+ * (as that CPU's timer base may not be uptodate wrt jiffies etc).
-+ */
-+int get_nohz_timer_target(void)
-+{
-+	int i, cpu = smp_processor_id();
-+	struct cpumask *mask;
-+
-+	if (!idle_cpu(cpu) && housekeeping_cpu(cpu, HK_FLAG_TIMER))
-+		return cpu;
-+
-+	for (mask = &(per_cpu(sched_cpu_affinity_masks, cpu)[0]);
-+	     mask < per_cpu(sched_cpu_affinity_end_mask, cpu); mask++)
-+		for_each_cpu(i, mask)
-+			if (!idle_cpu(i) && housekeeping_cpu(i, HK_FLAG_TIMER))
-+				return i;
-+
-+	if (!housekeeping_cpu(cpu, HK_FLAG_TIMER))
-+		cpu = housekeeping_any_cpu(HK_FLAG_TIMER);
-+
-+	return cpu;
-+}
-+
-+/*
-+ * When add_timer_on() enqueues a timer into the timer wheel of an
-+ * idle CPU then this timer might expire before the next timer event
-+ * which is scheduled to wake up that CPU. In case of a completely
-+ * idle system the next event might even be infinite time into the
-+ * future. wake_up_idle_cpu() ensures that the CPU is woken up and
-+ * leaves the inner idle loop so the newly added timer is taken into
-+ * account when the CPU goes back to idle and evaluates the timer
-+ * wheel for the next timer event.
-+ */
-+void wake_up_idle_cpu(int cpu)
-+{
-+	if (cpu == smp_processor_id())
-+		return;
-+
-+	set_tsk_need_resched(cpu_rq(cpu)->idle);
-+	smp_send_reschedule(cpu);
-+}
-+
-+void wake_up_nohz_cpu(int cpu)
-+{
-+	wake_up_idle_cpu(cpu);
-+}
-+#endif /* CONFIG_NO_HZ_COMMON */
-+
-+#ifdef CONFIG_HOTPLUG_CPU
-+/*
-+ * Ensures that the idle task is using init_mm right before its CPU goes
-+ * offline.
-+ */
-+void idle_task_exit(void)
-+{
-+	struct mm_struct *mm = current->active_mm;
-+
-+	BUG_ON(cpu_online(smp_processor_id()));
-+
-+	if (mm != &init_mm) {
-+		switch_mm(mm, &init_mm, current);
-+		current->active_mm = &init_mm;
-+		finish_arch_post_lock_switch();
-+	}
-+	mmdrop(mm);
-+}
-+
-+/*
-+ * Migrate all tasks from the rq, sleeping tasks will be migrated by
-+ * try_to_wake_up()->select_task_rq().
-+ *
-+ * Called with rq->lock held even though we'er in stop_machine() and
-+ * there's no concurrency possible, we hold the required locks anyway
-+ * because of lock validation efforts.
-+ */
-+static void migrate_tasks(struct rq *dead_rq)
-+{
-+	struct rq *rq = dead_rq;
-+	struct task_struct *p, *stop = rq->stop;
-+	int count = 0;
-+
-+	/*
-+	 * Fudge the rq selection such that the below task selection loop
-+	 * doesn't get stuck on the currently eligible stop task.
-+	 *
-+	 * We're currently inside stop_machine() and the rq is either stuck
-+	 * in the stop_machine_cpu_stop() loop, or we're executing this code,
-+	 * either way we should never end up calling schedule() until we're
-+	 * done here.
-+	 */
-+	rq->stop = NULL;
-+
-+	p = rq_first_bmq_task(rq);
-+	while (p != rq->idle) {
-+		int dest_cpu;
-+
-+		/* skip the running task */
-+		if (task_running(p) || 1 == p->nr_cpus_allowed) {
-+			p = rq_next_bmq_task(p, rq);
-+			continue;
-+		}
-+
-+		/*
-+		 * Rules for changing task_struct::cpus_allowed are holding
-+		 * both pi_lock and rq->lock, such that holding either
-+		 * stabilizes the mask.
-+		 *
-+		 * Drop rq->lock is not quite as disastrous as it usually is
-+		 * because !cpu_active at this point, which means load-balance
-+		 * will not interfere. Also, stop-machine.
-+		 */
-+		raw_spin_unlock(&rq->lock);
-+		raw_spin_lock(&p->pi_lock);
-+		raw_spin_lock(&rq->lock);
-+
-+		/*
-+		 * Since we're inside stop-machine, _nothing_ should have
-+		 * changed the task, WARN if weird stuff happened, because in
-+		 * that case the above rq->lock drop is a fail too.
-+		 */
-+		if (WARN_ON(task_rq(p) != rq || !task_on_rq_queued(p))) {
-+			raw_spin_unlock(&p->pi_lock);
-+			p = rq_next_bmq_task(p, rq);
-+			continue;
-+		}
-+
-+		count++;
-+		/* Find suitable destination for @next, with force if needed. */
-+		dest_cpu = select_fallback_rq(dead_rq->cpu, p);
-+		rq = __migrate_task(rq, p, dest_cpu);
-+		raw_spin_unlock(&rq->lock);
-+		raw_spin_unlock(&p->pi_lock);
-+
-+		rq = dead_rq;
-+		raw_spin_lock(&rq->lock);
-+		/* Check queued task all over from the header again */
-+		p = rq_first_bmq_task(rq);
-+	}
-+
-+	rq->stop = stop;
-+}
-+
-+static void set_rq_offline(struct rq *rq)
-+{
-+	if (rq->online)
-+		rq->online = false;
-+}
-+#endif /* CONFIG_HOTPLUG_CPU */
-+
-+static void set_rq_online(struct rq *rq)
-+{
-+	if (!rq->online)
-+		rq->online = true;
-+}
-+
-+#ifdef CONFIG_SCHED_DEBUG
-+
-+static __read_mostly int sched_debug_enabled;
-+
-+static int __init sched_debug_setup(char *str)
-+{
-+	sched_debug_enabled = 1;
-+
-+	return 0;
-+}
-+early_param("sched_debug", sched_debug_setup);
-+
-+static inline bool sched_debug(void)
-+{
-+	return sched_debug_enabled;
-+}
-+#else /* !CONFIG_SCHED_DEBUG */
-+static inline bool sched_debug(void)
-+{
-+	return false;
-+}
-+#endif /* CONFIG_SCHED_DEBUG */
-+
-+#ifdef CONFIG_SMP
-+void scheduler_ipi(void)
-+{
-+	/*
-+	 * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting
-+	 * TIF_NEED_RESCHED remotely (for the first time) will also send
-+	 * this IPI.
-+	 */
-+	preempt_fold_need_resched();
-+
-+	if (!idle_cpu(smp_processor_id()) || need_resched())
-+		return;
-+
-+	irq_enter();
-+	irq_exit();
-+}
-+
-+void wake_up_if_idle(int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	rcu_read_lock();
-+
-+	if (!is_idle_task(rcu_dereference(rq->curr)))
-+		goto out;
-+
-+	if (set_nr_if_polling(rq->idle)) {
-+		trace_sched_wake_idle_without_ipi(cpu);
-+	} else {
-+		raw_spin_lock_irqsave(&rq->lock, flags);
-+		if (is_idle_task(rq->curr))
-+			smp_send_reschedule(cpu);
-+		/* Else CPU is not idle, do nothing here */
-+		raw_spin_unlock_irqrestore(&rq->lock, flags);
-+	}
-+
-+out:
-+	rcu_read_unlock();
-+}
-+
-+bool cpus_share_cache(int this_cpu, int that_cpu)
-+{
-+	return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu);
-+}
-+#endif /* CONFIG_SMP */
-+
-+/*
-+ * Topology list, bottom-up.
-+ */
-+static struct sched_domain_topology_level default_topology[] = {
-+#ifdef CONFIG_SCHED_SMT
-+	{ cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
-+#endif
-+#ifdef CONFIG_SCHED_MC
-+	{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
-+#endif
-+	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
-+	{ NULL, },
-+};
-+
-+static struct sched_domain_topology_level *sched_domain_topology =
-+	default_topology;
-+
-+#define for_each_sd_topology(tl)			\
-+	for (tl = sched_domain_topology; tl->mask; tl++)
-+
-+void set_sched_topology(struct sched_domain_topology_level *tl)
-+{
-+	if (WARN_ON_ONCE(sched_smp_initialized))
-+		return;
-+
-+	sched_domain_topology = tl;
-+}
-+
-+/*
-+ * Initializers for schedule domains
-+ * Non-inlined to reduce accumulated stack pressure in build_sched_domains()
-+ */
-+
-+int sched_domain_level_max;
-+
-+/*
-+ * Partition sched domains as specified by the 'ndoms_new'
-+ * cpumasks in the array doms_new[] of cpumasks. This compares
-+ * doms_new[] to the current sched domain partitioning, doms_cur[].
-+ * It destroys each deleted domain and builds each new domain.
-+ *
-+ * 'doms_new' is an array of cpumask_var_t's of length 'ndoms_new'.
-+ * The masks don't intersect (don't overlap.) We should setup one
-+ * sched domain for each mask. CPUs not in any of the cpumasks will
-+ * not be load balanced. If the same cpumask appears both in the
-+ * current 'doms_cur' domains and in the new 'doms_new', we can leave
-+ * it as it is.
-+ *
-+ * The passed in 'doms_new' should be allocated using
-+ * alloc_sched_domains.  This routine takes ownership of it and will
-+ * free_sched_domains it when done with it. If the caller failed the
-+ * alloc call, then it can pass in doms_new == NULL && ndoms_new == 1,
-+ * and partition_sched_domains() will fallback to the single partition
-+ * 'fallback_doms', it also forces the domains to be rebuilt.
-+ *
-+ * If doms_new == NULL it will be replaced with cpu_online_mask.
-+ * ndoms_new == 0 is a special case for destroying existing domains,
-+ * and it will not create the default domain.
-+ *
-+ * Call with hotplug lock held
-+ */
-+void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
-+			     struct sched_domain_attr *dattr_new)
-+{
-+	/**
-+	 * BMQ doesn't depend on sched domains, but just keep this api
-+	 */
-+}
-+
-+/*
-+ * used to mark begin/end of suspend/resume:
-+ */
-+static int num_cpus_frozen;
-+
-+#ifdef CONFIG_NUMA
-+int __read_mostly		node_reclaim_distance = RECLAIM_DISTANCE;
-+
-+/*
-+ * sched_numa_find_closest() - given the NUMA topology, find the cpu
-+ *                             closest to @cpu from @cpumask.
-+ * cpumask: cpumask to find a cpu from
-+ * cpu: cpu to be close to
-+ *
-+ * returns: cpu, or nr_cpu_ids when nothing found.
-+ */
-+int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
-+{
-+	return best_mask_cpu(cpu, cpus);
-+}
-+#endif /* CONFIG_NUMA */
-+
-+/*
-+ * Update cpusets according to cpu_active mask.  If cpusets are
-+ * disabled, cpuset_update_active_cpus() becomes a simple wrapper
-+ * around partition_sched_domains().
-+ *
-+ * If we come here as part of a suspend/resume, don't touch cpusets because we
-+ * want to restore it back to its original state upon resume anyway.
-+ */
-+static void cpuset_cpu_active(void)
-+{
-+	if (cpuhp_tasks_frozen) {
-+		/*
-+		 * num_cpus_frozen tracks how many CPUs are involved in suspend
-+		 * resume sequence. As long as this is not the last online
-+		 * operation in the resume sequence, just build a single sched
-+		 * domain, ignoring cpusets.
-+		 */
-+		partition_sched_domains(1, NULL, NULL);
-+		if (--num_cpus_frozen)
-+			return;
-+		/*
-+		 * This is the last CPU online operation. So fall through and
-+		 * restore the original sched domains by considering the
-+		 * cpuset configurations.
-+		 */
-+		cpuset_force_rebuild();
-+	}
-+
-+	cpuset_update_active_cpus();
-+}
-+
-+static int cpuset_cpu_inactive(unsigned int cpu)
-+{
-+	if (!cpuhp_tasks_frozen) {
-+		cpuset_update_active_cpus();
-+	} else {
-+		num_cpus_frozen++;
-+		partition_sched_domains(1, NULL, NULL);
-+	}
-+	return 0;
-+}
-+
-+int sched_cpu_activate(unsigned int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+#ifdef CONFIG_SCHED_SMT
-+	/*
-+	 * When going up, increment the number of cores with SMT present.
-+	 */
-+	if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
-+		static_branch_inc_cpuslocked(&sched_smt_present);
-+#endif
-+	set_cpu_active(cpu, true);
-+
-+	if (sched_smp_initialized)
-+		cpuset_cpu_active();
-+
-+	/*
-+	 * Put the rq online, if not already. This happens:
-+	 *
-+	 * 1) In the early boot process, because we build the real domains
-+	 *    after all cpus have been brought up.
-+	 *
-+	 * 2) At runtime, if cpuset_cpu_active() fails to rebuild the
-+	 *    domains.
-+	 */
-+	raw_spin_lock_irqsave(&rq->lock, flags);
-+	set_rq_online(rq);
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+
-+	return 0;
-+}
-+
-+int sched_cpu_deactivate(unsigned int cpu)
-+{
-+	int ret;
-+
-+	set_cpu_active(cpu, false);
-+	/*
-+	 * We've cleared cpu_active_mask, wait for all preempt-disabled and RCU
-+	 * users of this state to go away such that all new such users will
-+	 * observe it.
-+	 *
-+	 * Do sync before park smpboot threads to take care the rcu boost case.
-+	 */
-+	synchronize_rcu();
-+
-+#ifdef CONFIG_SCHED_SMT
-+	/*
-+	 * When going down, decrement the number of cores with SMT present.
-+	 */
-+	if (cpumask_weight(cpu_smt_mask(cpu)) == 2) {
-+		static_branch_dec_cpuslocked(&sched_smt_present);
-+		if (!static_branch_likely(&sched_smt_present))
-+			cpumask_clear(&sched_sg_idle_mask);
-+	}
-+#endif
-+
-+	if (!sched_smp_initialized)
-+		return 0;
-+
-+	ret = cpuset_cpu_inactive(cpu);
-+	if (ret) {
-+		set_cpu_active(cpu, true);
-+		return ret;
-+	}
-+	return 0;
-+}
-+
-+static void sched_rq_cpu_starting(unsigned int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	rq->calc_load_update = calc_load_update;
-+}
-+
-+int sched_cpu_starting(unsigned int cpu)
-+{
-+	sched_rq_cpu_starting(cpu);
-+	sched_tick_start(cpu);
-+	return 0;
-+}
-+
-+#ifdef CONFIG_HOTPLUG_CPU
-+int sched_cpu_dying(unsigned int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	sched_tick_stop(cpu);
-+	raw_spin_lock_irqsave(&rq->lock, flags);
-+	set_rq_offline(rq);
-+	migrate_tasks(rq);
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+
-+	hrtick_clear(rq);
-+	return 0;
-+}
-+#endif
-+
-+#ifdef CONFIG_SMP
-+static void sched_init_topology_cpumask_early(void)
-+{
-+	int cpu, level;
-+	cpumask_t *tmp;
-+
-+	for_each_possible_cpu(cpu) {
-+		for (level = 0; level < NR_CPU_AFFINITY_CHK_LEVEL; level++) {
-+			tmp = &(per_cpu(sched_cpu_affinity_masks, cpu)[level]);
-+			cpumask_copy(tmp, cpu_possible_mask);
-+			cpumask_clear_cpu(cpu, tmp);
-+		}
-+		per_cpu(sched_cpu_affinity_end_mask, cpu) =
-+			&(per_cpu(sched_cpu_affinity_masks, cpu)[1]);
-+	}
-+}
-+
-+static void sched_init_topology_cpumask(void)
-+{
-+	int cpu;
-+	cpumask_t *chk;
-+
-+	for_each_online_cpu(cpu) {
-+		chk = &(per_cpu(sched_cpu_affinity_masks, cpu)[0]);
-+
-+#ifdef CONFIG_SCHED_SMT
-+		cpumask_setall(chk);
-+		cpumask_clear_cpu(cpu, chk);
-+		if (cpumask_and(chk, chk, topology_sibling_cpumask(cpu))) {
-+			printk(KERN_INFO "bmq: cpu #%d affinity check mask - smt 0x%08lx",
-+			       cpu, (chk++)->bits[0]);
-+		}
-+		cpumask_complement(chk, topology_sibling_cpumask(cpu));
-+#else
-+		cpumask_clear_cpu(cpu, chk);
-+#endif
-+#ifdef CONFIG_SCHED_MC
-+		if (cpumask_and(chk, chk, cpu_coregroup_mask(cpu)))
-+			printk(KERN_INFO "bmq: cpu #%d affinity check mask - coregroup 0x%08lx",
-+			       cpu, (chk++)->bits[0]);
-+		cpumask_complement(chk, cpu_coregroup_mask(cpu));
-+
-+		/**
-+		 * Set up sd_llc_id per CPU
-+		 */
-+		per_cpu(sd_llc_id, cpu) =
-+			cpumask_first(cpu_coregroup_mask(cpu));
-+#else
-+		per_cpu(sd_llc_id, cpu) =
-+			cpumask_first(topology_core_cpumask(cpu));
-+
-+		cpumask_setall(chk);
-+		cpumask_clear_cpu(cpu, chk);
-+#endif /* NOT CONFIG_SCHED_MC */
-+		if (cpumask_and(chk, chk, topology_core_cpumask(cpu)))
-+			printk(KERN_INFO "bmq: cpu #%d affinity check mask - core 0x%08lx",
-+			       cpu, (chk++)->bits[0]);
-+		cpumask_complement(chk, topology_core_cpumask(cpu));
-+
-+		if (cpumask_and(chk, chk, cpu_online_mask))
-+			printk(KERN_INFO "bmq: cpu #%d affinity check mask - others 0x%08lx",
-+			       cpu, (chk++)->bits[0]);
-+
-+		per_cpu(sched_cpu_affinity_end_mask, cpu) = chk;
-+	}
-+}
-+#endif
-+
-+void __init sched_init_smp(void)
-+{
-+	/* Move init over to a non-isolated CPU */
-+	if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0)
-+		BUG();
-+
-+	sched_init_topology_cpumask();
-+
-+	sched_smp_initialized = true;
-+}
-+#else
-+void __init sched_init_smp(void)
-+{
-+}
-+#endif /* CONFIG_SMP */
-+
-+int in_sched_functions(unsigned long addr)
-+{
-+	return in_lock_functions(addr) ||
-+		(addr >= (unsigned long)__sched_text_start
-+		&& addr < (unsigned long)__sched_text_end);
-+}
-+
-+#ifdef CONFIG_CGROUP_SCHED
-+/* task group related information */
-+struct task_group {
-+	struct cgroup_subsys_state css;
-+
-+	struct rcu_head rcu;
-+	struct list_head list;
-+
-+	struct task_group *parent;
-+	struct list_head siblings;
-+	struct list_head children;
-+};
-+
-+/*
-+ * Default task group.
-+ * Every task in system belongs to this group at bootup.
-+ */
-+struct task_group root_task_group;
-+LIST_HEAD(task_groups);
-+
-+/* Cacheline aligned slab cache for task_group */
-+static struct kmem_cache *task_group_cache __read_mostly;
-+#endif /* CONFIG_CGROUP_SCHED */
-+
-+void __init sched_init(void)
-+{
-+	int i;
-+	struct rq *rq;
-+
-+	print_scheduler_version();
-+
-+	wait_bit_init();
-+
-+#ifdef CONFIG_SMP
-+	for (i = 0; i < bmq_BITS; i++)
-+		cpumask_copy(&sched_rq_watermark[i], cpu_present_mask);
-+#endif
-+
-+#ifdef CONFIG_CGROUP_SCHED
-+	task_group_cache = KMEM_CACHE(task_group, 0);
-+
-+	list_add(&root_task_group.list, &task_groups);
-+	INIT_LIST_HEAD(&root_task_group.children);
-+	INIT_LIST_HEAD(&root_task_group.siblings);
-+#endif /* CONFIG_CGROUP_SCHED */
-+	for_each_possible_cpu(i) {
-+		rq = cpu_rq(i);
-+
-+		bmq_init(&rq->queue);
-+		rq->watermark = IDLE_WM;
-+		rq->skip = NULL;
-+
-+		raw_spin_lock_init(&rq->lock);
-+		rq->nr_running = rq->nr_uninterruptible = 0;
-+		rq->calc_load_active = 0;
-+		rq->calc_load_update = jiffies + LOAD_FREQ;
-+#ifdef CONFIG_SMP
-+		rq->online = false;
-+		rq->cpu = i;
-+
-+#ifdef CONFIG_SCHED_SMT
-+		rq->active_balance = 0;
-+#endif
-+#endif
-+		rq->nr_switches = 0;
-+		atomic_set(&rq->nr_iowait, 0);
-+		hrtick_rq_init(rq);
-+	}
-+#ifdef CONFIG_SMP
-+	/* Set rq->online for cpu 0 */
-+	cpu_rq(0)->online = true;
-+#endif
-+
-+	/*
-+	 * The boot idle thread does lazy MMU switching as well:
-+	 */
-+	mmgrab(&init_mm);
-+	enter_lazy_tlb(&init_mm, current);
-+
-+	/*
-+	 * Make us the idle thread. Technically, schedule() should not be
-+	 * called from this thread, however somewhere below it might be,
-+	 * but because we are the idle thread, we just pick up running again
-+	 * when this runqueue becomes "idle".
-+	 */
-+	init_idle(current, smp_processor_id());
-+
-+	calc_load_update = jiffies + LOAD_FREQ;
-+
-+#ifdef CONFIG_SMP
-+	idle_thread_set_boot_cpu();
-+
-+	sched_init_topology_cpumask_early();
-+#endif /* SMP */
-+
-+	init_schedstats();
-+
-+	psi_init();
-+}
-+
-+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-+static inline int preempt_count_equals(int preempt_offset)
-+{
-+	int nested = preempt_count() + rcu_preempt_depth();
-+
-+	return (nested == preempt_offset);
-+}
-+
-+void __might_sleep(const char *file, int line, int preempt_offset)
-+{
-+	/*
-+	 * Blocking primitives will set (and therefore destroy) current->state,
-+	 * since we will exit with TASK_RUNNING make sure we enter with it,
-+	 * otherwise we will destroy state.
-+	 */
-+	WARN_ONCE(current->state != TASK_RUNNING && current->task_state_change,
-+			"do not call blocking ops when !TASK_RUNNING; "
-+			"state=%lx set at [<%p>] %pS\n",
-+			current->state,
-+			(void *)current->task_state_change,
-+			(void *)current->task_state_change);
-+
-+	___might_sleep(file, line, preempt_offset);
-+}
-+EXPORT_SYMBOL(__might_sleep);
-+
-+void ___might_sleep(const char *file, int line, int preempt_offset)
-+{
-+	/* Ratelimiting timestamp: */
-+	static unsigned long prev_jiffy;
-+
-+	unsigned long preempt_disable_ip;
-+
-+	/* WARN_ON_ONCE() by default, no rate limit required: */
-+	rcu_sleep_check();
-+
-+	if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
-+	     !is_idle_task(current) && !current->non_block_count) ||
-+	    system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING ||
-+	    oops_in_progress)
-+		return;
-+	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
-+		return;
-+	prev_jiffy = jiffies;
-+
-+	/* Save this before calling printk(), since that will clobber it: */
-+	preempt_disable_ip = get_preempt_disable_ip(current);
-+
-+	printk(KERN_ERR
-+		"BUG: sleeping function called from invalid context at %s:%d\n",
-+			file, line);
-+	printk(KERN_ERR
-+		"in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n",
-+			in_atomic(), irqs_disabled(), current->non_block_count,
-+			current->pid, current->comm);
-+
-+	if (task_stack_end_corrupted(current))
-+		printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");
-+
-+	debug_show_held_locks(current);
-+	if (irqs_disabled())
-+		print_irqtrace_events(current);
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	if (!preempt_count_equals(preempt_offset)) {
-+		pr_err("Preemption disabled at:");
-+		print_ip_sym(preempt_disable_ip);
-+		pr_cont("\n");
-+	}
-+#endif
-+	dump_stack();
-+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+}
-+EXPORT_SYMBOL(___might_sleep);
-+
-+void __cant_sleep(const char *file, int line, int preempt_offset)
-+{
-+	static unsigned long prev_jiffy;
-+
-+	if (irqs_disabled())
-+		return;
-+
-+	if (!IS_ENABLED(CONFIG_PREEMPT_COUNT))
-+		return;
-+
-+	if (preempt_count() > preempt_offset)
-+		return;
-+
-+	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
-+		return;
-+	prev_jiffy = jiffies;
-+
-+	printk(KERN_ERR "BUG: assuming atomic context at %s:%d\n", file, line);
-+	printk(KERN_ERR "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
-+			in_atomic(), irqs_disabled(),
-+			current->pid, current->comm);
-+
-+	debug_show_held_locks(current);
-+	dump_stack();
-+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+}
-+EXPORT_SYMBOL_GPL(__cant_sleep);
-+#endif
-+
-+#ifdef CONFIG_MAGIC_SYSRQ
-+void normalize_rt_tasks(void)
-+{
-+	struct task_struct *g, *p;
-+	struct sched_attr attr = {
-+		.sched_policy = SCHED_NORMAL,
-+	};
-+
-+	read_lock(&tasklist_lock);
-+	for_each_process_thread(g, p) {
-+		/*
-+		 * Only normalize user tasks:
-+		 */
-+		if (p->flags & PF_KTHREAD)
-+			continue;
-+
-+		if (!rt_task(p)) {
-+			/*
-+			 * Renice negative nice level userspace
-+			 * tasks back to 0:
-+			 */
-+			if (task_nice(p) < 0)
-+				set_user_nice(p, 0);
-+			continue;
-+		}
-+
-+		__sched_setscheduler(p, &attr, false, false);
-+	}
-+	read_unlock(&tasklist_lock);
-+}
-+#endif /* CONFIG_MAGIC_SYSRQ */
-+
-+#if defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB)
-+/*
-+ * These functions are only useful for the IA64 MCA handling, or kdb.
-+ *
-+ * They can only be called when the whole system has been
-+ * stopped - every CPU needs to be quiescent, and no scheduling
-+ * activity can take place. Using them for anything else would
-+ * be a serious bug, and as a result, they aren't even visible
-+ * under any other configuration.
-+ */
-+
-+/**
-+ * curr_task - return the current task for a given CPU.
-+ * @cpu: the processor in question.
-+ *
-+ * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
-+ *
-+ * Return: The current task for @cpu.
-+ */
-+struct task_struct *curr_task(int cpu)
-+{
-+	return cpu_curr(cpu);
-+}
-+
-+#endif /* defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB) */
-+
-+#ifdef CONFIG_IA64
-+/**
-+ * ia64_set_curr_task - set the current task for a given CPU.
-+ * @cpu: the processor in question.
-+ * @p: the task pointer to set.
-+ *
-+ * Description: This function must only be used when non-maskable interrupts
-+ * are serviced on a separate stack.  It allows the architecture to switch the
-+ * notion of the current task on a CPU in a non-blocking manner.  This function
-+ * must be called with all CPU's synchronised, and interrupts disabled, the
-+ * and caller must save the original value of the current task (see
-+ * curr_task() above) and restore that value before reenabling interrupts and
-+ * re-starting the system.
-+ *
-+ * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
-+ */
-+void ia64_set_curr_task(int cpu, struct task_struct *p)
-+{
-+	cpu_curr(cpu) = p;
-+}
-+
-+#endif
-+
-+#ifdef CONFIG_SCHED_DEBUG
-+void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
-+			  struct seq_file *m)
-+{
-+	SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, task_pid_nr_ns(p, ns),
-+						get_nr_threads(p));
-+}
-+
-+void proc_sched_set_task(struct task_struct *p)
-+{}
-+#endif
-+
-+#ifdef CONFIG_CGROUP_SCHED
-+static void sched_free_group(struct task_group *tg)
-+{
-+	kmem_cache_free(task_group_cache, tg);
-+}
-+
-+/* allocate runqueue etc for a new task group */
-+struct task_group *sched_create_group(struct task_group *parent)
-+{
-+	struct task_group *tg;
-+
-+	tg = kmem_cache_alloc(task_group_cache, GFP_KERNEL | __GFP_ZERO);
-+	if (!tg)
-+		return ERR_PTR(-ENOMEM);
-+
-+	return tg;
-+}
-+
-+void sched_online_group(struct task_group *tg, struct task_group *parent)
-+{
-+}
-+
-+/* rcu callback to free various structures associated with a task group */
-+static void sched_free_group_rcu(struct rcu_head *rhp)
-+{
-+	/* Now it should be safe to free those cfs_rqs */
-+	sched_free_group(container_of(rhp, struct task_group, rcu));
-+}
-+
-+void sched_destroy_group(struct task_group *tg)
-+{
-+	/* Wait for possible concurrent references to cfs_rqs complete */
-+	call_rcu(&tg->rcu, sched_free_group_rcu);
-+}
-+
-+void sched_offline_group(struct task_group *tg)
-+{
-+}
-+
-+static inline struct task_group *css_tg(struct cgroup_subsys_state *css)
-+{
-+	return css ? container_of(css, struct task_group, css) : NULL;
-+}
-+
-+static struct cgroup_subsys_state *
-+cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
-+{
-+	struct task_group *parent = css_tg(parent_css);
-+	struct task_group *tg;
-+
-+	if (!parent) {
-+		/* This is early initialization for the top cgroup */
-+		return &root_task_group.css;
-+	}
-+
-+	tg = sched_create_group(parent);
-+	if (IS_ERR(tg))
-+		return ERR_PTR(-ENOMEM);
-+	return &tg->css;
-+}
-+
-+/* Expose task group only after completing cgroup initialization */
-+static int cpu_cgroup_css_online(struct cgroup_subsys_state *css)
-+{
-+	struct task_group *tg = css_tg(css);
-+	struct task_group *parent = css_tg(css->parent);
-+
-+	if (parent)
-+		sched_online_group(tg, parent);
-+	return 0;
-+}
-+
-+static void cpu_cgroup_css_released(struct cgroup_subsys_state *css)
-+{
-+	struct task_group *tg = css_tg(css);
-+
-+	sched_offline_group(tg);
-+}
-+
-+static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
-+{
-+	struct task_group *tg = css_tg(css);
-+
-+	/*
-+	 * Relies on the RCU grace period between css_released() and this.
-+	 */
-+	sched_free_group(tg);
-+}
-+
-+static void cpu_cgroup_fork(struct task_struct *task)
-+{
-+}
-+
-+static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
-+{
-+	return 0;
-+}
-+
-+static void cpu_cgroup_attach(struct cgroup_taskset *tset)
-+{
-+}
-+
-+static struct cftype cpu_legacy_files[] = {
-+	{ }	/* Terminate */
-+};
-+
-+static struct cftype cpu_files[] = {
-+	{ }	/* terminate */
-+};
-+
-+static int cpu_extra_stat_show(struct seq_file *sf,
-+			       struct cgroup_subsys_state *css)
-+{
-+	return 0;
-+}
-+
-+struct cgroup_subsys cpu_cgrp_subsys = {
-+	.css_alloc	= cpu_cgroup_css_alloc,
-+	.css_online	= cpu_cgroup_css_online,
-+	.css_released	= cpu_cgroup_css_released,
-+	.css_free	= cpu_cgroup_css_free,
-+	.css_extra_stat_show = cpu_extra_stat_show,
-+	.fork		= cpu_cgroup_fork,
-+	.can_attach	= cpu_cgroup_can_attach,
-+	.attach		= cpu_cgroup_attach,
-+	.legacy_cftypes	= cpu_files,
-+	.legacy_cftypes	= cpu_legacy_files,
-+	.dfl_cftypes	= cpu_files,
-+	.early_init	= true,
-+	.threaded	= true,
-+};
-+#endif	/* CONFIG_CGROUP_SCHED */
-+
-+#undef CREATE_TRACE_POINTS
-diff --git a/kernel/sched/bmq_sched.h b/kernel/sched/bmq_sched.h
-new file mode 100644
-index 000000000000..ed08dd0b8227
---- /dev/null
-+++ b/kernel/sched/bmq_sched.h
-@@ -0,0 +1,472 @@
-+#ifndef BMQ_SCHED_H
-+#define BMQ_SCHED_H
-+
-+#include <linux/sched.h>
-+
-+#include <linux/sched/clock.h>
-+#include <linux/sched/cpufreq.h>
-+#include <linux/sched/cputime.h>
-+#include <linux/sched/debug.h>
-+#include <linux/sched/init.h>
-+#include <linux/sched/isolation.h>
-+#include <linux/sched/loadavg.h>
-+#include <linux/sched/mm.h>
-+#include <linux/sched/nohz.h>
-+#include <linux/sched/signal.h>
-+#include <linux/sched/stat.h>
-+#include <linux/sched/sysctl.h>
-+#include <linux/sched/task.h>
-+#include <linux/sched/topology.h>
-+#include <linux/sched/wake_q.h>
-+
-+#include <uapi/linux/sched/types.h>
-+
-+#include <linux/cgroup.h>
-+#include <linux/cpufreq.h>
-+#include <linux/cpuidle.h>
-+#include <linux/cpuset.h>
-+#include <linux/ctype.h>
-+#include <linux/kthread.h>
-+#include <linux/livepatch.h>
-+#include <linux/membarrier.h>
-+#include <linux/proc_fs.h>
-+#include <linux/psi.h>
-+#include <linux/slab.h>
-+#include <linux/stop_machine.h>
-+#include <linux/suspend.h>
-+#include <linux/swait.h>
-+#include <linux/syscalls.h>
-+#include <linux/tsacct_kern.h>
-+
-+#include <asm/tlb.h>
-+
-+#ifdef CONFIG_PARAVIRT
-+# include <asm/paravirt.h>
-+#endif
-+
-+#include "cpupri.h"
-+
-+/* task_struct::on_rq states: */
-+#define TASK_ON_RQ_QUEUED	1
-+#define TASK_ON_RQ_MIGRATING	2
-+
-+static inline int task_on_rq_queued(struct task_struct *p)
-+{
-+	return p->on_rq == TASK_ON_RQ_QUEUED;
-+}
-+
-+static inline int task_on_rq_migrating(struct task_struct *p)
-+{
-+	return READ_ONCE(p->on_rq) == TASK_ON_RQ_MIGRATING;
-+}
-+
-+/* bits:
-+ * RT, Low prio adj range, nice width, high prio adj range, cpu idle task */
-+#define bmq_BITS		(NICE_WIDTH + 2 * MAX_PRIORITY_ADJ + 2)
-+#define IDLE_TASK_SCHED_PRIO	(bmq_BITS - 1)
-+
-+struct bmq {
-+	DECLARE_BITMAP(bitmap, bmq_BITS);
-+	struct list_head heads[bmq_BITS];
-+};
-+
-+/*
-+ * This is the main, per-CPU runqueue data structure.
-+ * This data should only be modified by the local cpu.
-+ */
-+struct rq {
-+	/* runqueue lock: */
-+	raw_spinlock_t lock;
-+
-+	struct task_struct *curr, *idle, *stop, *skip;
-+	struct mm_struct *prev_mm;
-+
-+	struct bmq queue;
-+	unsigned long watermark;
-+
-+	/* switch count */
-+	u64 nr_switches;
-+
-+	atomic_t nr_iowait;
-+
-+#ifdef CONFIG_MEMBARRIER
-+	int membarrier_state;
-+#endif
-+
-+#ifdef CONFIG_SMP
-+	int cpu;		/* cpu of this runqueue */
-+	bool online;
-+
-+#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
-+	struct sched_avg	avg_irq;
-+#endif
-+
-+#ifdef CONFIG_SCHED_SMT
-+	int active_balance;
-+	struct cpu_stop_work active_balance_work;
-+#endif
-+#endif /* CONFIG_SMP */
-+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-+	u64 prev_irq_time;
-+#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
-+#ifdef CONFIG_PARAVIRT
-+	u64 prev_steal_time;
-+#endif /* CONFIG_PARAVIRT */
-+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
-+	u64 prev_steal_time_rq;
-+#endif /* CONFIG_PARAVIRT_TIME_ACCOUNTING */
-+
-+	/* calc_load related fields */
-+	unsigned long calc_load_update;
-+	long calc_load_active;
-+
-+	u64 clock, last_tick;
-+	u64 last_ts_switch;
-+	u64 clock_task;
-+
-+	unsigned long nr_running;
-+	unsigned long nr_uninterruptible;
-+
-+#ifdef CONFIG_SCHED_HRTICK
-+#ifdef CONFIG_SMP
-+	int hrtick_csd_pending;
-+	call_single_data_t hrtick_csd;
-+#endif
-+	struct hrtimer hrtick_timer;
-+#endif
-+
-+#ifdef CONFIG_SCHEDSTATS
-+
-+	/* latency stats */
-+	struct sched_info rq_sched_info;
-+	unsigned long long rq_cpu_time;
-+	/* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
-+
-+	/* sys_sched_yield() stats */
-+	unsigned int yld_count;
-+
-+	/* schedule() stats */
-+	unsigned int sched_switch;
-+	unsigned int sched_count;
-+	unsigned int sched_goidle;
-+
-+	/* try_to_wake_up() stats */
-+	unsigned int ttwu_count;
-+	unsigned int ttwu_local;
-+#endif /* CONFIG_SCHEDSTATS */
-+#ifdef CONFIG_CPU_IDLE
-+	/* Must be inspected within a rcu lock section */
-+	struct cpuidle_state *idle_state;
-+#endif
-+};
-+
-+extern unsigned long calc_load_update;
-+extern atomic_long_t calc_load_tasks;
-+
-+extern void calc_global_load_tick(struct rq *this_rq);
-+extern long calc_load_fold_active(struct rq *this_rq, long adjust);
-+
-+DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
-+#define cpu_rq(cpu)		(&per_cpu(runqueues, (cpu)))
-+#define this_rq()		this_cpu_ptr(&runqueues)
-+#define task_rq(p)		cpu_rq(task_cpu(p))
-+#define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
-+#define raw_rq()		raw_cpu_ptr(&runqueues)
-+
-+#ifdef CONFIG_SMP
-+#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
-+void register_sched_domain_sysctl(void);
-+void unregister_sched_domain_sysctl(void);
-+#else
-+static inline void register_sched_domain_sysctl(void)
-+{
-+}
-+static inline void unregister_sched_domain_sysctl(void)
-+{
-+}
-+#endif
-+#endif /* CONFIG_SMP */
-+
-+#ifndef arch_scale_freq_capacity
-+static __always_inline
-+unsigned long arch_scale_freq_capacity(int cpu)
-+{
-+	return SCHED_CAPACITY_SCALE;
-+}
-+#endif
-+
-+static inline u64 __rq_clock_broken(struct rq *rq)
-+{
-+	return READ_ONCE(rq->clock);
-+}
-+
-+static inline u64 rq_clock(struct rq *rq)
-+{
-+	/*
-+	 * Relax lockdep_assert_held() checking as in VRQ, call to
-+	 * sched_info_xxxx() may not held rq->lock
-+	 * lockdep_assert_held(&rq->lock);
-+	 */
-+	return rq->clock;
-+}
-+
-+static inline u64 rq_clock_task(struct rq *rq)
-+{
-+	/*
-+	 * Relax lockdep_assert_held() checking as in VRQ, call to
-+	 * sched_info_xxxx() may not held rq->lock
-+	 * lockdep_assert_held(&rq->lock);
-+	 */
-+	return rq->clock_task;
-+}
-+
-+/*
-+ * {de,en}queue flags:
-+ *
-+ * DEQUEUE_SLEEP  - task is no longer runnable
-+ * ENQUEUE_WAKEUP - task just became runnable
-+ *
-+ */
-+
-+#define DEQUEUE_SLEEP		0x01
-+
-+#define ENQUEUE_WAKEUP		0x01
-+
-+
-+/*
-+ * Below are scheduler API which using in other kernel code
-+ * It use the dummy rq_flags
-+ * ToDo : BMQ need to support these APIs for compatibility with mainline
-+ * scheduler code.
-+ */
-+struct rq_flags {
-+	unsigned long flags;
-+};
-+
-+struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
-+	__acquires(rq->lock);
-+
-+struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
-+	__acquires(p->pi_lock)
-+	__acquires(rq->lock);
-+
-+static inline void __task_rq_unlock(struct rq *rq, struct rq_flags *rf)
-+	__releases(rq->lock)
-+{
-+	raw_spin_unlock(&rq->lock);
-+}
-+
-+static inline void
-+task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
-+	__releases(rq->lock)
-+	__releases(p->pi_lock)
-+{
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
-+}
-+
-+static inline void
-+rq_unlock_irq(struct rq *rq, struct rq_flags *rf)
-+	__releases(rq->lock)
-+{
-+	raw_spin_unlock_irq(&rq->lock);
-+}
-+
-+static inline struct rq *
-+this_rq_lock_irq(struct rq_flags *rf)
-+	__acquires(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	local_irq_disable();
-+	rq = this_rq();
-+	raw_spin_lock(&rq->lock);
-+
-+	return rq;
-+}
-+
-+static inline bool task_running(struct task_struct *p)
-+{
-+	return p->on_cpu;
-+}
-+
-+extern struct static_key_false sched_schedstats;
-+
-+static inline void sched_ttwu_pending(void) { }
-+
-+#ifdef CONFIG_CPU_IDLE
-+static inline void idle_set_state(struct rq *rq,
-+				  struct cpuidle_state *idle_state)
-+{
-+	rq->idle_state = idle_state;
-+}
-+
-+static inline struct cpuidle_state *idle_get_state(struct rq *rq)
-+{
-+	WARN_ON(!rcu_read_lock_held());
-+	return rq->idle_state;
-+}
-+#else
-+static inline void idle_set_state(struct rq *rq,
-+				  struct cpuidle_state *idle_state)
-+{
-+}
-+
-+static inline struct cpuidle_state *idle_get_state(struct rq *rq)
-+{
-+	return NULL;
-+}
-+#endif
-+
-+static inline int cpu_of(const struct rq *rq)
-+{
-+#ifdef CONFIG_SMP
-+	return rq->cpu;
-+#else
-+	return 0;
-+#endif
-+}
-+
-+#include "stats.h"
-+
-+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-+struct irqtime {
-+	u64			total;
-+	u64			tick_delta;
-+	u64			irq_start_time;
-+	struct u64_stats_sync	sync;
-+};
-+
-+DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
-+
-+/*
-+ * Returns the irqtime minus the softirq time computed by ksoftirqd.
-+ * Otherwise ksoftirqd's sum_exec_runtime is substracted its own runtime
-+ * and never move forward.
-+ */
-+static inline u64 irq_time_read(int cpu)
-+{
-+	struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu);
-+	unsigned int seq;
-+	u64 total;
-+
-+	do {
-+		seq = __u64_stats_fetch_begin(&irqtime->sync);
-+		total = irqtime->total;
-+	} while (__u64_stats_fetch_retry(&irqtime->sync, seq));
-+
-+	return total;
-+}
-+#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
-+
-+#ifdef CONFIG_CPU_FREQ
-+DECLARE_PER_CPU(struct update_util_data __rcu *, cpufreq_update_util_data);
-+
-+/**
-+ * cpufreq_update_util - Take a note about CPU utilization changes.
-+ * @rq: Runqueue to carry out the update for.
-+ * @flags: Update reason flags.
-+ *
-+ * This function is called by the scheduler on the CPU whose utilization is
-+ * being updated.
-+ *
-+ * It can only be called from RCU-sched read-side critical sections.
-+ *
-+ * The way cpufreq is currently arranged requires it to evaluate the CPU
-+ * performance state (frequency/voltage) on a regular basis to prevent it from
-+ * being stuck in a completely inadequate performance level for too long.
-+ * That is not guaranteed to happen if the updates are only triggered from CFS
-+ * and DL, though, because they may not be coming in if only RT tasks are
-+ * active all the time (or there are RT tasks only).
-+ *
-+ * As a workaround for that issue, this function is called periodically by the
-+ * RT sched class to trigger extra cpufreq updates to prevent it from stalling,
-+ * but that really is a band-aid.  Going forward it should be replaced with
-+ * solutions targeted more specifically at RT tasks.
-+ */
-+static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
-+{
-+	struct update_util_data *data;
-+
-+	data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data));
-+	if (data)
-+		data->func(data, rq_clock(rq), flags);
-+}
-+#else
-+static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
-+#endif /* CONFIG_CPU_FREQ */
-+
-+#ifdef CONFIG_NO_HZ_FULL
-+extern int __init sched_tick_offload_init(void);
-+#else
-+static inline int sched_tick_offload_init(void) { return 0; }
-+#endif
-+
-+#ifdef arch_scale_freq_capacity
-+#ifndef arch_scale_freq_invariant
-+#define arch_scale_freq_invariant()	(true)
-+#endif
-+#else /* arch_scale_freq_capacity */
-+#define arch_scale_freq_invariant()	(false)
-+#endif
-+
-+extern void schedule_idle(void);
-+
-+/*
-+ * !! For sched_setattr_nocheck() (kernel) only !!
-+ *
-+ * This is actually gross. :(
-+ *
-+ * It is used to make schedutil kworker(s) higher priority than SCHED_DEADLINE
-+ * tasks, but still be able to sleep. We need this on platforms that cannot
-+ * atomically change clock frequency. Remove once fast switching will be
-+ * available on such platforms.
-+ *
-+ * SUGOV stands for SchedUtil GOVernor.
-+ */
-+#define SCHED_FLAG_SUGOV	0x10000000
-+
-+#ifdef CONFIG_MEMBARRIER
-+/*
-+ * The scheduler provides memory barriers required by membarrier between:
-+ * - prior user-space memory accesses and store to rq->membarrier_state,
-+ * - store to rq->membarrier_state and following user-space memory accesses.
-+ * In the same way it provides those guarantees around store to rq->curr.
-+ */
-+static inline void membarrier_switch_mm(struct rq *rq,
-+					struct mm_struct *prev_mm,
-+					struct mm_struct *next_mm)
-+{
-+	int membarrier_state;
-+
-+	if (prev_mm == next_mm)
-+		return;
-+
-+	membarrier_state = atomic_read(&next_mm->membarrier_state);
-+	if (READ_ONCE(rq->membarrier_state) == membarrier_state)
-+		return;
-+
-+	WRITE_ONCE(rq->membarrier_state, membarrier_state);
-+}
-+#else
-+static inline void membarrier_switch_mm(struct rq *rq,
-+					struct mm_struct *prev_mm,
-+					struct mm_struct *next_mm)
-+{
-+}
-+#endif
-+
-+static inline int task_running_nice(struct task_struct *p)
-+{
-+	return (p->prio + p->boost_prio > DEFAULT_PRIO + MAX_PRIORITY_ADJ);
-+}
-+
-+#ifdef CONFIG_NUMA
-+extern int sched_numa_find_closest(const struct cpumask *cpus, int cpu);
-+#else
-+static inline int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
-+{
-+	return nr_cpu_ids;
-+}
-+#endif
-+#endif /* BMQ_SCHED_H */
-diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
-index 86800b4d5453..a816aafa6ba3 100644
---- a/kernel/sched/cpufreq_schedutil.c
-+++ b/kernel/sched/cpufreq_schedutil.c
-@@ -185,6 +185,7 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy,
- 	return cpufreq_driver_resolve_freq(policy, freq);
- }
- 
-+#ifndef CONFIG_SCHED_BMQ
- /*
-  * This function computes an effective utilization for the given CPU, to be
-  * used for frequency selection given the linear relation: f = u * f_max.
-@@ -302,6 +303,13 @@ static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu)
- 
- 	return schedutil_cpu_util(sg_cpu->cpu, util, max, FREQUENCY_UTIL, NULL);
- }
-+#else /* CONFIG_SCHED_BMQ */
-+static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu)
-+{
-+	sg_cpu->max = arch_scale_cpu_capacity(sg_cpu->cpu);
-+	return sg_cpu->max;
-+}
-+#endif
- 
- /**
-  * sugov_iowait_reset() - Reset the IO boost status of a CPU.
-@@ -445,7 +453,9 @@ static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; }
-  */
- static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu, struct sugov_policy *sg_policy)
- {
-+#ifndef CONFIG_SCHED_BMQ
- 	if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_dl)
-+#endif
- 		sg_policy->limits_changed = true;
- }
- 
-@@ -688,6 +698,7 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy)
- 	}
- 
- 	ret = sched_setattr_nocheck(thread, &attr);
-+
- 	if (ret) {
- 		kthread_stop(thread);
- 		pr_warn("%s: failed to set SCHED_DEADLINE\n", __func__);
-@@ -918,6 +929,7 @@ static int __init sugov_register(void)
- fs_initcall(sugov_register);
- 
- #ifdef CONFIG_ENERGY_MODEL
-+#ifndef CONFIG_SCHED_BMQ
- extern bool sched_energy_update;
- extern struct mutex sched_energy_mutex;
- 
-@@ -948,4 +960,10 @@ void sched_cpufreq_governor_change(struct cpufreq_policy *policy,
- 	}
- 
- }
-+#else /* CONFIG_SCHED_BMQ */
-+void sched_cpufreq_governor_change(struct cpufreq_policy *policy,
-+				  struct cpufreq_governor *old_gov)
-+{
-+}
-+#endif
- #endif
-diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
-index 46ed4e1383e2..51460a446da0 100644
---- a/kernel/sched/cputime.c
-+++ b/kernel/sched/cputime.c
-@@ -122,7 +122,7 @@ void account_user_time(struct task_struct *p, u64 cputime)
- 	p->utime += cputime;
- 	account_group_user_time(p, cputime);
- 
--	index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
-+	index = task_running_nice(p) ? CPUTIME_NICE : CPUTIME_USER;
- 
- 	/* Add user time to cpustat. */
- 	task_group_account_field(p, index, cputime);
-@@ -146,7 +146,7 @@ void account_guest_time(struct task_struct *p, u64 cputime)
- 	p->gtime += cputime;
- 
- 	/* Add guest time to cpustat. */
--	if (task_nice(p) > 0) {
-+	if (task_running_nice(p)) {
- 		cpustat[CPUTIME_NICE] += cputime;
- 		cpustat[CPUTIME_GUEST_NICE] += cputime;
- 	} else {
-@@ -269,7 +269,7 @@ static inline u64 account_other_time(u64 max)
- #ifdef CONFIG_64BIT
- static inline u64 read_sum_exec_runtime(struct task_struct *t)
- {
--	return t->se.sum_exec_runtime;
-+	return tsk_seruntime(t);
- }
- #else
- static u64 read_sum_exec_runtime(struct task_struct *t)
-@@ -279,7 +279,7 @@ static u64 read_sum_exec_runtime(struct task_struct *t)
- 	struct rq *rq;
- 
- 	rq = task_rq_lock(t, &rf);
--	ns = t->se.sum_exec_runtime;
-+	ns = tsk_seruntime(t);
- 	task_rq_unlock(rq, t, &rf);
- 
- 	return ns;
-@@ -663,7 +663,7 @@ void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev,
- void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
- {
- 	struct task_cputime cputime = {
--		.sum_exec_runtime = p->se.sum_exec_runtime,
-+		.sum_exec_runtime = tsk_seruntime(p),
- 	};
- 
- 	task_cputime(p, &cputime.utime, &cputime.stime);
-diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
-index f65ef1e2f204..77bf219444fa 100644
---- a/kernel/sched/idle.c
-+++ b/kernel/sched/idle.c
-@@ -355,6 +355,7 @@ void cpu_startup_entry(enum cpuhp_state state)
- 		do_idle();
- }
- 
-+#ifndef CONFIG_SCHED_BMQ
- /*
-  * idle-task scheduling class.
-  */
-@@ -479,3 +480,4 @@ const struct sched_class idle_sched_class = {
- 	.switched_to		= switched_to_idle,
- 	.update_curr		= update_curr_idle,
- };
-+#endif
-diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c
-index a96db50d40e0..22c20e28b613 100644
---- a/kernel/sched/pelt.c
-+++ b/kernel/sched/pelt.c
-@@ -236,6 +236,7 @@ ___update_load_avg(struct sched_avg *sa, unsigned long load, unsigned long runna
- 	WRITE_ONCE(sa->util_avg, sa->util_sum / divider);
- }
- 
-+#ifndef CONFIG_SCHED_BMQ
- /*
-  * sched_entity:
-  *
-@@ -352,6 +353,7 @@ int update_dl_rq_load_avg(u64 now, struct rq *rq, int running)
- 
- 	return 0;
- }
-+#endif
- 
- #ifdef CONFIG_HAVE_SCHED_AVG_IRQ
- /*
-diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h
-index afff644da065..4da52afaeff8 100644
---- a/kernel/sched/pelt.h
-+++ b/kernel/sched/pelt.h
-@@ -1,11 +1,13 @@
- #ifdef CONFIG_SMP
- #include "sched-pelt.h"
- 
-+#ifndef CONFIG_SCHED_BMQ
- int __update_load_avg_blocked_se(u64 now, struct sched_entity *se);
- int __update_load_avg_se(u64 now, struct cfs_rq *cfs_rq, struct sched_entity *se);
- int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq);
- int update_rt_rq_load_avg(u64 now, struct rq *rq, int running);
- int update_dl_rq_load_avg(u64 now, struct rq *rq, int running);
-+#endif
- 
- #ifdef CONFIG_HAVE_SCHED_AVG_IRQ
- int update_irq_load_avg(struct rq *rq, u64 running);
-@@ -17,6 +19,7 @@ update_irq_load_avg(struct rq *rq, u64 running)
- }
- #endif
- 
-+#ifndef CONFIG_SCHED_BMQ
- /*
-  * When a task is dequeued, its estimated utilization should not be update if
-  * its util_avg has not been updated at least once.
-@@ -137,9 +140,11 @@ static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq)
- 	return rq_clock_pelt(rq_of(cfs_rq));
- }
- #endif
-+#endif /* CONFIG_SCHED_BMQ */
- 
- #else
- 
-+#ifndef CONFIG_SCHED_BMQ
- static inline int
- update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
- {
-@@ -157,6 +162,7 @@ update_dl_rq_load_avg(u64 now, struct rq *rq, int running)
- {
- 	return 0;
- }
-+#endif
- 
- static inline int
- update_irq_load_avg(struct rq *rq, u64 running)
-diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
-index c8870c5bd7df..4bca9838b6f0 100644
---- a/kernel/sched/sched.h
-+++ b/kernel/sched/sched.h
-@@ -2,6 +2,10 @@
- /*
-  * Scheduler internal types and methods:
-  */
-+#ifdef CONFIG_SCHED_BMQ
-+#include "bmq_sched.h"
-+#else
-+
- #include <linux/sched.h>
- 
- #include <linux/sched/autogroup.h>
-@@ -2496,3 +2500,9 @@ static inline void membarrier_switch_mm(struct rq *rq,
- {
- }
- #endif
-+
-+static inline int task_running_nice(struct task_struct *p)
-+{
-+	return (task_nice(p) > 0);
-+}
-+#endif /* !CONFIG_SCHED_BMQ */
-diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c
-index 750fb3c67eed..0cc040a28d3f 100644
---- a/kernel/sched/stats.c
-+++ b/kernel/sched/stats.c
-@@ -22,8 +22,10 @@ static int show_schedstat(struct seq_file *seq, void *v)
- 	} else {
- 		struct rq *rq;
- #ifdef CONFIG_SMP
-+#ifndef CONFIG_SCHED_BMQ
- 		struct sched_domain *sd;
- 		int dcount = 0;
-+#endif
- #endif
- 		cpu = (unsigned long)(v - 2);
- 		rq = cpu_rq(cpu);
-@@ -40,6 +42,7 @@ static int show_schedstat(struct seq_file *seq, void *v)
- 		seq_printf(seq, "\n");
- 
- #ifdef CONFIG_SMP
-+#ifndef CONFIG_SCHED_BMQ
- 		/* domain-specific stats */
- 		rcu_read_lock();
- 		for_each_domain(cpu, sd) {
-@@ -68,6 +71,7 @@ static int show_schedstat(struct seq_file *seq, void *v)
- 			    sd->ttwu_move_balance);
- 		}
- 		rcu_read_unlock();
-+#endif
- #endif
- 	}
- 	return 0;
-diff --git a/kernel/sysctl.c b/kernel/sysctl.c
-index b6f2f35d0bcf..435440943455 100644
---- a/kernel/sysctl.c
-+++ b/kernel/sysctl.c
-@@ -132,6 +132,10 @@ static unsigned long one_ul = 1;
- static unsigned long long_max = LONG_MAX;
- static int one_hundred = 100;
- static int one_thousand = 1000;
-+#ifdef CONFIG_SCHED_BMQ
-+static int __maybe_unused zero = 0;
-+extern int sched_yield_type;
-+#endif
- #ifdef CONFIG_PRINTK
- static int ten_thousand = 10000;
- #endif
-@@ -300,7 +304,7 @@ static struct ctl_table sysctl_base_table[] = {
- 	{ }
- };
- 
--#ifdef CONFIG_SCHED_DEBUG
-+#if defined(CONFIG_SCHED_DEBUG) && !defined(CONFIG_SCHED_BMQ)
- static int min_sched_granularity_ns = 100000;		/* 100 usecs */
- static int max_sched_granularity_ns = NSEC_PER_SEC;	/* 1 second */
- static int min_wakeup_granularity_ns;			/* 0 usecs */
-@@ -317,6 +321,7 @@ static int max_extfrag_threshold = 1000;
- #endif
- 
- static struct ctl_table kern_table[] = {
-+#ifndef CONFIG_SCHED_BMQ
- 	{
- 		.procname	= "sched_child_runs_first",
- 		.data		= &sysctl_sched_child_runs_first,
-@@ -498,6 +503,7 @@ static struct ctl_table kern_table[] = {
- 		.extra2		= SYSCTL_ONE,
- 	},
- #endif
-+#endif /* !CONFIG_SCHED_BMQ */
- #ifdef CONFIG_PROVE_LOCKING
- 	{
- 		.procname	= "prove_locking",
-@@ -1070,6 +1076,17 @@ static struct ctl_table kern_table[] = {
- 		.proc_handler	= proc_dointvec,
- 	},
- #endif
-+#ifdef CONFIG_SCHED_BMQ
-+	{
-+		.procname	= "yield_type",
-+		.data		= &sched_yield_type,
-+		.maxlen		= sizeof (int),
-+		.mode		= 0644,
-+		.proc_handler	= &proc_dointvec_minmax,
-+		.extra1		= &zero,
-+		.extra2		= &two,
-+	},
-+#endif
- #if defined(CONFIG_S390) && defined(CONFIG_SMP)
- 	{
- 		.procname	= "spin_retry",
-diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
-index 42d512fcfda2..70b97fe0ff44 100644
---- a/kernel/time/posix-cpu-timers.c
-+++ b/kernel/time/posix-cpu-timers.c
-@@ -226,7 +226,7 @@ static void task_sample_cputime(struct task_struct *p, u64 *samples)
- 	u64 stime, utime;
- 
- 	task_cputime(p, &utime, &stime);
--	store_samples(samples, stime, utime, p->se.sum_exec_runtime);
-+	store_samples(samples, stime, utime, tsk_seruntime(p));
- }
- 
- static void proc_sample_cputime_atomic(struct task_cputime_atomic *at,
-@@ -796,6 +796,7 @@ static void collect_posix_cputimers(struct posix_cputimers *pct, u64 *samples,
- 	}
- }
- 
-+#ifndef CONFIG_SCHED_BMQ
- static inline void check_dl_overrun(struct task_struct *tsk)
- {
- 	if (tsk->dl.dl_overrun) {
-@@ -803,6 +804,7 @@ static inline void check_dl_overrun(struct task_struct *tsk)
- 		__group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
- 	}
- }
-+#endif
- 
- static bool check_rlimit(u64 time, u64 limit, int signo, bool rt, bool hard)
- {
-@@ -830,8 +832,10 @@ static void check_thread_timers(struct task_struct *tsk,
- 	u64 samples[CPUCLOCK_MAX];
- 	unsigned long soft;
- 
-+#ifndef CONFIG_SCHED_BMQ
- 	if (dl_task(tsk))
- 		check_dl_overrun(tsk);
-+#endif
- 
- 	if (expiry_cache_is_inactive(pct))
- 		return;
-@@ -845,7 +849,7 @@ static void check_thread_timers(struct task_struct *tsk,
- 	soft = task_rlimit(tsk, RLIMIT_RTTIME);
- 	if (soft != RLIM_INFINITY) {
- 		/* Task RT timeout is accounted in jiffies. RTTIME is usec */
--		unsigned long rttime = tsk->rt.timeout * (USEC_PER_SEC / HZ);
-+		unsigned long rttime = tsk_rttimeout(tsk) * (USEC_PER_SEC / HZ);
- 		unsigned long hard = task_rlimit_max(tsk, RLIMIT_RTTIME);
- 
- 		/* At the hard limit, send SIGKILL. No further action. */
-@@ -1099,8 +1103,10 @@ static inline bool fastpath_timer_check(struct task_struct *tsk)
- 			return true;
- 	}
- 
-+#ifndef CONFIG_SCHED_BMQ
- 	if (dl_task(tsk) && tsk->dl.dl_overrun)
- 		return true;
-+#endif
- 
- 	return false;
- }
-diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
-index 69ee8ef12cee..208788fcbb0e 100644
---- a/kernel/trace/trace_selftest.c
-+++ b/kernel/trace/trace_selftest.c
-@@ -1048,10 +1048,15 @@ static int trace_wakeup_test_thread(void *data)
- {
- 	/* Make this a -deadline thread */
- 	static const struct sched_attr attr = {
-+#ifdef CONFIG_SCHED_BMQ
-+		/* No deadline on BMQ, use RR */
-+		.sched_policy = SCHED_RR,
-+#else
- 		.sched_policy = SCHED_DEADLINE,
- 		.sched_runtime = 100000ULL,
- 		.sched_deadline = 10000000ULL,
- 		.sched_period = 10000000ULL
-+#endif
- 	};
- 	struct wakeup_test_data *x = data;
- 
diff --git a/linux54-tkg/linux54-tkg-patches/0009-glitched-bmq.patch b/linux54-tkg/linux54-tkg-patches/0009-glitched-bmq.patch
deleted file mode 100644
index 5e78811..0000000
--- a/linux54-tkg/linux54-tkg-patches/0009-glitched-bmq.patch
+++ /dev/null
@@ -1,108 +0,0 @@
-From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
-From: Tk-Glitch <ti3nou@gmail.com>
-Date: Wed, 4 Jul 2018 04:30:08 +0200
-Subject: glitched - BMQ
-
-diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
-index 6b423eebfd5d..61e3271675d6 100644
---- a/drivers/cpufreq/cpufreq_ondemand.c
-+++ b/drivers/cpufreq/cpufreq_ondemand.c
-@@ -21,10 +21,10 @@
- #include "cpufreq_ondemand.h"
- 
- /* On-demand governor macros */
--#define DEF_FREQUENCY_UP_THRESHOLD		(63)
--#define DEF_SAMPLING_DOWN_FACTOR		(1)
-+#define DEF_FREQUENCY_UP_THRESHOLD		(55)
-+#define DEF_SAMPLING_DOWN_FACTOR		(5)
- #define MAX_SAMPLING_DOWN_FACTOR		(100000)
--#define MICRO_FREQUENCY_UP_THRESHOLD		(95)
-+#define MICRO_FREQUENCY_UP_THRESHOLD		(63)
- #define MICRO_FREQUENCY_MIN_SAMPLE_RATE		(10000)
- #define MIN_FREQUENCY_UP_THRESHOLD		(1)
- #define MAX_FREQUENCY_UP_THRESHOLD		(100)
-diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
-index 2a202a846757..1d9c7ed79b11 100644
---- a/kernel/Kconfig.hz
-+++ b/kernel/Kconfig.hz
-@@ -4,7 +4,7 @@
- 
- choice
- 	prompt "Timer frequency"
--	default HZ_250
-+	default HZ_500
- 	help
- 	 Allows the configuration of the timer frequency. It is customary
- 	 to have the timer interrupt run at 1000 Hz but 100 Hz may be more
-@@ -39,6 +39,13 @@ choice
- 	 on SMP and NUMA systems and exactly dividing by both PAL and
- 	 NTSC frame rates for video and multimedia work.
- 
-+	config HZ_500
-+		bool "500 HZ"
-+	help
-+	 500 Hz is a balanced timer frequency. Provides fast interactivity
-+	 on desktops with great smoothness without increasing CPU power
-+	 consumption and sacrificing the battery life on laptops.
-+
- 	config HZ_1000
- 		bool "1000 HZ"
- 	help
-@@ -52,6 +59,7 @@ config HZ
- 	default 100 if HZ_100
- 	default 250 if HZ_250
- 	default 300 if HZ_300
-+	default 500 if HZ_500
- 	default 1000 if HZ_1000
- 
- config SCHED_HRTICK
-
-diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
-index 2a202a846757..1d9c7ed79b11 100644
---- a/kernel/Kconfig.hz
-+++ b/kernel/Kconfig.hz
-@@ -4,7 +4,7 @@
- 
- choice
- 	prompt "Timer frequency"
--	default HZ_500
-+	default HZ_750
- 	help
- 	 Allows the configuration of the timer frequency. It is customary
- 	 to have the timer interrupt run at 1000 Hz but 100 Hz may be more
-@@ -46,6 +46,13 @@ choice
- 	 on desktops with great smoothness without increasing CPU power
- 	 consumption and sacrificing the battery life on laptops.
- 
-+	config HZ_750
-+		bool "750 HZ"
-+	help
-+	 750 Hz is a good timer frequency for desktops. Provides fast
-+	 interactivity with great smoothness without sacrificing too
-+	 much throughput.
-+
- 	config HZ_1000
- 		bool "1000 HZ"
- 	help
-@@ -60,6 +67,7 @@ config HZ
- 	default 250 if HZ_250
- 	default 300 if HZ_300
- 	default 500 if HZ_500
-+	default 750 if HZ_750
- 	default 1000 if HZ_1000
- 
- config SCHED_HRTICK
-
-diff --git a/mm/vmscan.c b/mm/vmscan.c
-index 9270a4370d54..30d01e647417 100644
---- a/mm/vmscan.c
-+++ b/mm/vmscan.c
-@@ -159,7 +159,7 @@ struct scan_control {
- /*
-  * From 0 .. 100.  Higher means more swappy.
-  */
--int vm_swappiness = 60;
-+int vm_swappiness = 20;
- /*
-  * The total number of pages which are beyond the high watermark within all
-  * zones.
diff --git a/linux54-tkg/linux54-tkg-patches/0011-ZFS-fix.patch b/linux54-tkg/linux54-tkg-patches/0011-ZFS-fix.patch
deleted file mode 100644
index af71d04..0000000
--- a/linux54-tkg/linux54-tkg-patches/0011-ZFS-fix.patch
+++ /dev/null
@@ -1,43 +0,0 @@
-From 1e010beda2896bdf3082fb37a3e49f8ce20e04d8 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?J=C3=B6rg=20Thalheim?= <joerg@thalheim.io>
-Date: Thu, 2 May 2019 05:28:08 +0100
-Subject: [PATCH] x86/fpu: Export kernel_fpu_{begin,end}() with
- EXPORT_SYMBOL_GPL
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-We need these symbols in zfs as the fpu implementation breaks userspace:
-
-https://github.com/zfsonlinux/zfs/issues/9346
-Signed-off-by: Jörg Thalheim <joerg@thalheim.io>
----
- arch/x86/kernel/fpu/core.c | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
-index 12c70840980e..352538b3bb5d 100644
---- a/arch/x86/kernel/fpu/core.c
-+++ b/arch/x86/kernel/fpu/core.c
-@@ -102,7 +102,7 @@ void kernel_fpu_begin(void)
- 	}
- 	__cpu_invalidate_fpregs_state();
- }
--EXPORT_SYMBOL_GPL(kernel_fpu_begin);
-+EXPORT_SYMBOL(kernel_fpu_begin);
- 
- void kernel_fpu_end(void)
- {
-@@ -111,7 +111,7 @@ void kernel_fpu_end(void)
- 	this_cpu_write(in_kernel_fpu, false);
- 	preempt_enable();
- }
--EXPORT_SYMBOL_GPL(kernel_fpu_end);
-+EXPORT_SYMBOL(kernel_fpu_end);
- 
- /*
-  * Save the FPU state (mark it for reload if necessary):
--- 
-2.23.0
-
-
diff --git a/linux54-tkg/linux54-tkg-patches/0012-linux-hardened.patch b/linux54-tkg/linux54-tkg-patches/0012-linux-hardened.patch
deleted file mode 100644
index b50ec74..0000000
--- a/linux54-tkg/linux54-tkg-patches/0012-linux-hardened.patch
+++ /dev/null
@@ -1,2806 +0,0 @@
-diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
-index 5594c8bf1dcd..ac80978f4629 100644
---- a/Documentation/admin-guide/kernel-parameters.txt
-+++ b/Documentation/admin-guide/kernel-parameters.txt
-@@ -505,16 +505,6 @@
- 			nosocket -- Disable socket memory accounting.
- 			nokmem -- Disable kernel memory accounting.
- 
--	checkreqprot	[SELINUX] Set initial checkreqprot flag value.
--			Format: { "0" | "1" }
--			See security/selinux/Kconfig help text.
--			0 -- check protection applied by kernel (includes
--				any implied execute protection).
--			1 -- check protection requested by application.
--			Default value is set via a kernel config option.
--			Value can be changed at runtime via
--				/selinux/checkreqprot.
--
- 	cio_ignore=	[S390]
- 			See Documentation/s390/common_io.rst for details.
- 	clk_ignore_unused
-@@ -3345,6 +3335,11 @@
- 			the specified number of seconds.  This is to be used if
- 			your oopses keep scrolling off the screen.
- 
-+	extra_latent_entropy
-+			Enable a very simple form of latent entropy extraction
-+			from the first 4GB of memory as the bootmem allocator
-+			passes the memory pages to the buddy allocator.
-+
- 	pcbit=		[HW,ISDN]
- 
- 	pcd.		[PARIDE]
-diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
-index 032c7cd3cede..cc3491b05976 100644
---- a/Documentation/admin-guide/sysctl/kernel.rst
-+++ b/Documentation/admin-guide/sysctl/kernel.rst
-@@ -102,6 +102,7 @@ show up in /proc/sys/kernel:
- - sysctl_writes_strict
- - tainted                     ==> Documentation/admin-guide/tainted-kernels.rst
- - threads-max
-+- tiocsti_restrict
- - unknown_nmi_panic
- - watchdog
- - watchdog_thresh
-@@ -1114,6 +1115,25 @@ thread structures would occupy too much (more than 1/8th) of the
- available RAM pages threads-max is reduced accordingly.
- 
- 
-+tiocsti_restrict:
-+=================
-+
-+This toggle indicates whether unprivileged users are prevented from using the
-+TIOCSTI ioctl to inject commands into other processes which share a tty
-+session.
-+
-+When tiocsti_restrict is set to (0) there are no restrictions(accept the
-+default restriction of only being able to injection commands into one's own
-+tty). When tiocsti_restrict is set to (1), users must have CAP_SYS_ADMIN to
-+use the TIOCSTI ioctl.
-+
-+When user namespaces are in use, the check for the capability CAP_SYS_ADMIN is
-+done against the user namespace that originally opened the tty.
-+
-+The kernel config option CONFIG_SECURITY_TIOCSTI_RESTRICT sets the default
-+value of tiocsti_restrict.
-+
-+
- unknown_nmi_panic:
- ==================
- 
-diff --git a/arch/Kconfig b/arch/Kconfig
-index 5f8a5d84dbbe..60103a76d33e 100644
---- a/arch/Kconfig
-+++ b/arch/Kconfig
-@@ -653,7 +653,7 @@ config ARCH_MMAP_RND_BITS
- 	int "Number of bits to use for ASLR of mmap base address" if EXPERT
- 	range ARCH_MMAP_RND_BITS_MIN ARCH_MMAP_RND_BITS_MAX
- 	default ARCH_MMAP_RND_BITS_DEFAULT if ARCH_MMAP_RND_BITS_DEFAULT
--	default ARCH_MMAP_RND_BITS_MIN
-+	default ARCH_MMAP_RND_BITS_MAX
- 	depends on HAVE_ARCH_MMAP_RND_BITS
- 	help
- 	  This value can be used to select the number of bits to use to
-@@ -687,7 +687,7 @@ config ARCH_MMAP_RND_COMPAT_BITS
- 	int "Number of bits to use for ASLR of mmap base address for compatible applications" if EXPERT
- 	range ARCH_MMAP_RND_COMPAT_BITS_MIN ARCH_MMAP_RND_COMPAT_BITS_MAX
- 	default ARCH_MMAP_RND_COMPAT_BITS_DEFAULT if ARCH_MMAP_RND_COMPAT_BITS_DEFAULT
--	default ARCH_MMAP_RND_COMPAT_BITS_MIN
-+	default ARCH_MMAP_RND_COMPAT_BITS_MAX
- 	depends on HAVE_ARCH_MMAP_RND_COMPAT_BITS
- 	help
- 	  This value can be used to select the number of bits to use to
-@@ -906,6 +906,7 @@ config ARCH_HAS_REFCOUNT
- 
- config REFCOUNT_FULL
- 	bool "Perform full reference count validation at the expense of speed"
-+	default y
- 	help
- 	  Enabling this switches the refcounting infrastructure from a fast
- 	  unchecked atomic_t implementation to a fully state checked
-diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
-index 6ccd2ed30963..56d39ec3c2c3 100644
---- a/arch/arm64/Kconfig
-+++ b/arch/arm64/Kconfig
-@@ -1139,6 +1139,7 @@ config RODATA_FULL_DEFAULT_ENABLED
- 
- config ARM64_SW_TTBR0_PAN
- 	bool "Emulate Privileged Access Never using TTBR0_EL1 switching"
-+	default y
- 	help
- 	  Enabling this option prevents the kernel from accessing
- 	  user-space memory directly by pointing TTBR0_EL1 to a reserved
-@@ -1538,6 +1539,7 @@ config RANDOMIZE_BASE
- 	bool "Randomize the address of the kernel image"
- 	select ARM64_MODULE_PLTS if MODULES
- 	select RELOCATABLE
-+	default y
- 	help
- 	  Randomizes the virtual address at which the kernel image is
- 	  loaded, as a security feature that deters exploit attempts
-diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug
-index cf09010d825f..dc4083ceff57 100644
---- a/arch/arm64/Kconfig.debug
-+++ b/arch/arm64/Kconfig.debug
-@@ -43,6 +43,7 @@ config ARM64_RANDOMIZE_TEXT_OFFSET
- config DEBUG_WX
- 	bool "Warn on W+X mappings at boot"
- 	select ARM64_PTDUMP_CORE
-+	default y
- 	---help---
- 	  Generate a warning if any W+X mappings are found at boot.
- 
-diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
-index c9a867ac32d4..5c4d264f6a6e 100644
---- a/arch/arm64/configs/defconfig
-+++ b/arch/arm64/configs/defconfig
-@@ -1,4 +1,3 @@
--CONFIG_SYSVIPC=y
- CONFIG_POSIX_MQUEUE=y
- CONFIG_AUDIT=y
- CONFIG_NO_HZ_IDLE=y
-diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
-index b618017205a3..0a228dbcad65 100644
---- a/arch/arm64/include/asm/elf.h
-+++ b/arch/arm64/include/asm/elf.h
-@@ -103,14 +103,10 @@
- 
- /*
-  * This is the base location for PIE (ET_DYN with INTERP) loads. On
-- * 64-bit, this is above 4GB to leave the entire 32-bit address
-+ * 64-bit, this is raised to 4GB to leave the entire 32-bit address
-  * space open for things that want to use the area for 32-bit pointers.
-  */
--#ifdef CONFIG_ARM64_FORCE_52BIT
--#define ELF_ET_DYN_BASE		(2 * TASK_SIZE_64 / 3)
--#else
--#define ELF_ET_DYN_BASE		(2 * DEFAULT_MAP_WINDOW_64 / 3)
--#endif /* CONFIG_ARM64_FORCE_52BIT */
-+#define ELF_ET_DYN_BASE		0x100000000UL
- 
- #ifndef __ASSEMBLY__
- 
-@@ -164,10 +160,10 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm,
- /* 1GB of VA */
- #ifdef CONFIG_COMPAT
- #define STACK_RND_MASK			(test_thread_flag(TIF_32BIT) ? \
--						0x7ff >> (PAGE_SHIFT - 12) : \
--						0x3ffff >> (PAGE_SHIFT - 12))
-+						((1UL << mmap_rnd_compat_bits) - 1) >> (PAGE_SHIFT - 12) : \
-+						((1UL << mmap_rnd_bits) - 1) >> (PAGE_SHIFT - 12))
- #else
--#define STACK_RND_MASK			(0x3ffff >> (PAGE_SHIFT - 12))
-+#define STACK_RND_MASK			(((1UL << mmap_rnd_bits) - 1) >> (PAGE_SHIFT - 12))
- #endif
- 
- #ifdef __AARCH64EB__
-diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
-index 8ef85139553f..e16076b30625 100644
---- a/arch/x86/Kconfig
-+++ b/arch/x86/Kconfig
-@@ -1219,8 +1219,7 @@ config VM86
-        default X86_LEGACY_VM86
- 
- config X86_16BIT
--	bool "Enable support for 16-bit segments" if EXPERT
--	default y
-+	bool "Enable support for 16-bit segments"
- 	depends on MODIFY_LDT_SYSCALL
- 	---help---
- 	  This option is required by programs like Wine to run 16-bit
-@@ -2365,7 +2364,7 @@ config COMPAT_VDSO
- choice
- 	prompt "vsyscall table for legacy applications"
- 	depends on X86_64
--	default LEGACY_VSYSCALL_XONLY
-+	default LEGACY_VSYSCALL_NONE
- 	help
- 	  Legacy user code that does not know how to find the vDSO expects
- 	  to be able to issue three syscalls by calling fixed addresses in
-@@ -2461,8 +2460,7 @@ config CMDLINE_OVERRIDE
- 	  be set to 'N' under normal conditions.
- 
- config MODIFY_LDT_SYSCALL
--	bool "Enable the LDT (local descriptor table)" if EXPERT
--	default y
-+	bool "Enable the LDT (local descriptor table)"
- 	---help---
- 	  Linux can allow user programs to install a per-process x86
- 	  Local Descriptor Table (LDT) using the modify_ldt(2) system
-diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
-index bf9cd83de777..13ef90f3de52 100644
---- a/arch/x86/Kconfig.debug
-+++ b/arch/x86/Kconfig.debug
-@@ -91,6 +91,7 @@ config EFI_PGT_DUMP
- config DEBUG_WX
- 	bool "Warn on W+X mappings at boot"
- 	select X86_PTDUMP_CORE
-+	default y
- 	---help---
- 	  Generate a warning if any W+X mappings are found at boot.
- 
-diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
-index d0a5ffeae8df..2a91d4a9f640 100644
---- a/arch/x86/configs/x86_64_defconfig
-+++ b/arch/x86/configs/x86_64_defconfig
-@@ -1,5 +1,4 @@
- # CONFIG_LOCALVERSION_AUTO is not set
--CONFIG_SYSVIPC=y
- CONFIG_POSIX_MQUEUE=y
- CONFIG_BSD_PROCESS_ACCT=y
- CONFIG_TASKSTATS=y
-diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
-index f5937742b290..6655ce228e25 100644
---- a/arch/x86/entry/vdso/vma.c
-+++ b/arch/x86/entry/vdso/vma.c
-@@ -198,55 +198,9 @@ static int map_vdso(const struct vdso_image *image, unsigned long addr)
- }
- 
- #ifdef CONFIG_X86_64
--/*
-- * Put the vdso above the (randomized) stack with another randomized
-- * offset.  This way there is no hole in the middle of address space.
-- * To save memory make sure it is still in the same PTE as the stack
-- * top.  This doesn't give that many random bits.
-- *
-- * Note that this algorithm is imperfect: the distribution of the vdso
-- * start address within a PMD is biased toward the end.
-- *
-- * Only used for the 64-bit and x32 vdsos.
-- */
--static unsigned long vdso_addr(unsigned long start, unsigned len)
--{
--	unsigned long addr, end;
--	unsigned offset;
--
--	/*
--	 * Round up the start address.  It can start out unaligned as a result
--	 * of stack start randomization.
--	 */
--	start = PAGE_ALIGN(start);
--
--	/* Round the lowest possible end address up to a PMD boundary. */
--	end = (start + len + PMD_SIZE - 1) & PMD_MASK;
--	if (end >= TASK_SIZE_MAX)
--		end = TASK_SIZE_MAX;
--	end -= len;
--
--	if (end > start) {
--		offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1);
--		addr = start + (offset << PAGE_SHIFT);
--	} else {
--		addr = start;
--	}
--
--	/*
--	 * Forcibly align the final address in case we have a hardware
--	 * issue that requires alignment for performance reasons.
--	 */
--	addr = align_vdso_addr(addr);
--
--	return addr;
--}
--
- static int map_vdso_randomized(const struct vdso_image *image)
- {
--	unsigned long addr = vdso_addr(current->mm->start_stack, image->size-image->sym_vvar_start);
--
--	return map_vdso(image, addr);
-+	return map_vdso(image, 0);
- }
- #endif
- 
-diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
-index 69c0f892e310..f9f7a85bb71e 100644
---- a/arch/x86/include/asm/elf.h
-+++ b/arch/x86/include/asm/elf.h
-@@ -248,11 +248,11 @@ extern int force_personality32;
- 
- /*
-  * This is the base location for PIE (ET_DYN with INTERP) loads. On
-- * 64-bit, this is above 4GB to leave the entire 32-bit address
-+ * 64-bit, this is raised to 4GB to leave the entire 32-bit address
-  * space open for things that want to use the area for 32-bit pointers.
-  */
- #define ELF_ET_DYN_BASE		(mmap_is_ia32() ? 0x000400000UL : \
--						  (DEFAULT_MAP_WINDOW / 3 * 2))
-+						  0x100000000UL)
- 
- /* This yields a mask that user programs can use to figure out what
-    instruction set this CPU supports.  This could be done in user space,
-@@ -312,8 +312,8 @@ extern bool mmap_address_hint_valid(unsigned long addr, unsigned long len);
- 
- #ifdef CONFIG_X86_32
- 
--#define __STACK_RND_MASK(is32bit) (0x7ff)
--#define STACK_RND_MASK (0x7ff)
-+#define __STACK_RND_MASK(is32bit) ((1UL << mmap_rnd_bits) - 1)
-+#define STACK_RND_MASK ((1UL << mmap_rnd_bits) - 1)
- 
- #define ARCH_DLINFO		ARCH_DLINFO_IA32
- 
-@@ -322,7 +322,11 @@ extern bool mmap_address_hint_valid(unsigned long addr, unsigned long len);
- #else /* CONFIG_X86_32 */
- 
- /* 1GB for 64bit, 8MB for 32bit */
--#define __STACK_RND_MASK(is32bit) ((is32bit) ? 0x7ff : 0x3fffff)
-+#ifdef CONFIG_COMPAT
-+#define __STACK_RND_MASK(is32bit) ((is32bit) ? (1UL << mmap_rnd_compat_bits) - 1 : (1UL << mmap_rnd_bits) - 1)
-+#else
-+#define __STACK_RND_MASK(is32bit) ((1UL << mmap_rnd_bits) - 1)
-+#endif
- #define STACK_RND_MASK __STACK_RND_MASK(mmap_is_ia32())
- 
- #define ARCH_DLINFO							\
-@@ -380,5 +384,4 @@ struct va_alignment {
- } ____cacheline_aligned;
- 
- extern struct va_alignment va_align;
--extern unsigned long align_vdso_addr(unsigned long);
- #endif /* _ASM_X86_ELF_H */
-diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
-index 6f66d841262d..b786e7cb395d 100644
---- a/arch/x86/include/asm/tlbflush.h
-+++ b/arch/x86/include/asm/tlbflush.h
-@@ -295,6 +295,7 @@ static inline void cr4_set_bits_irqsoff(unsigned long mask)
- 	unsigned long cr4;
- 
- 	cr4 = this_cpu_read(cpu_tlbstate.cr4);
-+	BUG_ON(cr4 != __read_cr4());
- 	if ((cr4 | mask) != cr4)
- 		__cr4_set(cr4 | mask);
- }
-@@ -305,6 +306,7 @@ static inline void cr4_clear_bits_irqsoff(unsigned long mask)
- 	unsigned long cr4;
- 
- 	cr4 = this_cpu_read(cpu_tlbstate.cr4);
-+	BUG_ON(cr4 != __read_cr4());
- 	if ((cr4 & ~mask) != cr4)
- 		__cr4_set(cr4 & ~mask);
- }
-@@ -334,6 +336,7 @@ static inline void cr4_toggle_bits_irqsoff(unsigned long mask)
- 	unsigned long cr4;
- 
- 	cr4 = this_cpu_read(cpu_tlbstate.cr4);
-+        BUG_ON(cr4 != __read_cr4());
- 	__cr4_set(cr4 ^ mask);
- }
- 
-@@ -440,6 +443,7 @@ static inline void __native_flush_tlb_global(void)
- 	raw_local_irq_save(flags);
- 
- 	cr4 = this_cpu_read(cpu_tlbstate.cr4);
-+	BUG_ON(cr4 != __read_cr4());
- 	/* toggle PGE */
- 	native_write_cr4(cr4 ^ X86_CR4_PGE);
- 	/* write old PGE again and flush TLBs */
-diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
-index fffe21945374..e9e124eb6ccb 100644
---- a/arch/x86/kernel/cpu/common.c
-+++ b/arch/x86/kernel/cpu/common.c
-@@ -1854,7 +1854,6 @@ void cpu_init(void)
- 	wrmsrl(MSR_KERNEL_GS_BASE, 0);
- 	barrier();
- 
--	x86_configure_nx();
- 	x2apic_setup();
- 
- 	/*
-diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
-index 5e94c4354d4e..093bd8ad1130 100644
---- a/arch/x86/kernel/process.c
-+++ b/arch/x86/kernel/process.c
-@@ -42,6 +42,8 @@
- #include <asm/prctl.h>
- #include <asm/spec-ctrl.h>
- #include <asm/proto.h>
-+#include <asm/elf.h>
-+#include <linux/sizes.h>
- 
- #include "process.h"
- 
-@@ -798,7 +800,10 @@ unsigned long arch_align_stack(unsigned long sp)
- 
- unsigned long arch_randomize_brk(struct mm_struct *mm)
- {
--	return randomize_page(mm->brk, 0x02000000);
-+	if (mmap_is_ia32())
-+		return mm->brk + get_random_long() % SZ_32M + PAGE_SIZE;
-+	else
-+		return mm->brk + get_random_long() % SZ_1G + PAGE_SIZE;
- }
- 
- /*
-diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
-index f7476ce23b6e..652169a2b23a 100644
---- a/arch/x86/kernel/sys_x86_64.c
-+++ b/arch/x86/kernel/sys_x86_64.c
-@@ -54,13 +54,6 @@ static unsigned long get_align_bits(void)
- 	return va_align.bits & get_align_mask();
- }
- 
--unsigned long align_vdso_addr(unsigned long addr)
--{
--	unsigned long align_mask = get_align_mask();
--	addr = (addr + align_mask) & ~align_mask;
--	return addr | get_align_bits();
--}
--
- static int __init control_va_addr_alignment(char *str)
- {
- 	/* guard against enabling this on other CPU families */
-@@ -122,10 +115,7 @@ static void find_start_end(unsigned long addr, unsigned long flags,
- 	}
- 
- 	*begin	= get_mmap_base(1);
--	if (in_32bit_syscall())
--		*end = task_size_32bit();
--	else
--		*end = task_size_64bit(addr > DEFAULT_MAP_WINDOW);
-+	*end	= get_mmap_base(0);
- }
- 
- unsigned long
-@@ -210,7 +200,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
- 
- 	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
- 	info.length = len;
--	info.low_limit = PAGE_SIZE;
-+	info.low_limit = get_mmap_base(1);
- 	info.high_limit = get_mmap_base(0);
- 
- 	/*
-diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
-index 0a74407ef92e..5ceff405c81c 100644
---- a/arch/x86/mm/init_32.c
-+++ b/arch/x86/mm/init_32.c
-@@ -560,9 +560,9 @@ static void __init pagetable_init(void)
- 
- #define DEFAULT_PTE_MASK ~(_PAGE_NX | _PAGE_GLOBAL)
- /* Bits supported by the hardware: */
--pteval_t __supported_pte_mask __read_mostly = DEFAULT_PTE_MASK;
-+pteval_t __supported_pte_mask __ro_after_init = DEFAULT_PTE_MASK;
- /* Bits allowed in normal kernel mappings: */
--pteval_t __default_kernel_pte_mask __read_mostly = DEFAULT_PTE_MASK;
-+pteval_t __default_kernel_pte_mask __ro_after_init = DEFAULT_PTE_MASK;
- EXPORT_SYMBOL_GPL(__supported_pte_mask);
- /* Used in PAGE_KERNEL_* macros which are reasonably used out-of-tree: */
- EXPORT_SYMBOL(__default_kernel_pte_mask);
-diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
-index b8541d77452c..a231504e0348 100644
---- a/arch/x86/mm/init_64.c
-+++ b/arch/x86/mm/init_64.c
-@@ -97,9 +97,9 @@ DEFINE_ENTRY(pte, pte, init)
-  */
- 
- /* Bits supported by the hardware: */
--pteval_t __supported_pte_mask __read_mostly = ~0;
-+pteval_t __supported_pte_mask __ro_after_init = ~0;
- /* Bits allowed in normal kernel mappings: */
--pteval_t __default_kernel_pte_mask __read_mostly = ~0;
-+pteval_t __default_kernel_pte_mask __ro_after_init = ~0;
- EXPORT_SYMBOL_GPL(__supported_pte_mask);
- /* Used in PAGE_KERNEL_* macros which are reasonably used out-of-tree: */
- EXPORT_SYMBOL(__default_kernel_pte_mask);
-diff --git a/block/blk-softirq.c b/block/blk-softirq.c
-index 457d9ba3eb20..5f987fc1c0a0 100644
---- a/block/blk-softirq.c
-+++ b/block/blk-softirq.c
-@@ -20,7 +20,7 @@ static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
-  * Softirq action handler - move entries to local list and loop over them
-  * while passing them to the queue registered handler.
-  */
--static __latent_entropy void blk_done_softirq(struct softirq_action *h)
-+static __latent_entropy void blk_done_softirq(void)
- {
- 	struct list_head *cpu_list, local_list;
- 
-diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
-index 84b183a6424e..b83bff5e9ab5 100644
---- a/drivers/ata/libata-core.c
-+++ b/drivers/ata/libata-core.c
-@@ -5143,7 +5143,7 @@ void ata_qc_free(struct ata_queued_cmd *qc)
- 	struct ata_port *ap;
- 	unsigned int tag;
- 
--	WARN_ON_ONCE(qc == NULL); /* ata_qc_from_tag _might_ return NULL */
-+	BUG_ON(qc == NULL); /* ata_qc_from_tag _might_ return NULL */
- 	ap = qc->ap;
- 
- 	qc->flags = 0;
-@@ -5160,7 +5160,7 @@ void __ata_qc_complete(struct ata_queued_cmd *qc)
- 	struct ata_port *ap;
- 	struct ata_link *link;
- 
--	WARN_ON_ONCE(qc == NULL); /* ata_qc_from_tag _might_ return NULL */
-+	BUG_ON(qc == NULL); /* ata_qc_from_tag _might_ return NULL */
- 	WARN_ON_ONCE(!(qc->flags & ATA_QCFLAG_ACTIVE));
- 	ap = qc->ap;
- 	link = qc->dev->link;
-diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
-index df0fc997dc3e..bd8eed8de6c1 100644
---- a/drivers/char/Kconfig
-+++ b/drivers/char/Kconfig
-@@ -9,7 +9,6 @@ source "drivers/tty/Kconfig"
- 
- config DEVMEM
- 	bool "/dev/mem virtual device support"
--	default y
- 	help
- 	  Say Y here if you want to support the /dev/mem device.
- 	  The /dev/mem device is used to access areas of physical
-@@ -514,7 +513,6 @@ config TELCLOCK
- config DEVPORT
- 	bool "/dev/port character device"
- 	depends on ISA || PCI
--	default y
- 	help
- 	  Say Y here if you want to support the /dev/port device. The /dev/port
- 	  device is similar to /dev/mem, but for I/O ports.
-diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig
-index c7623f99ac0f..859c2782c8e2 100644
---- a/drivers/tty/Kconfig
-+++ b/drivers/tty/Kconfig
-@@ -122,7 +122,6 @@ config UNIX98_PTYS
- 
- config LEGACY_PTYS
- 	bool "Legacy (BSD) PTY support"
--	default y
- 	---help---
- 	  A pseudo terminal (PTY) is a software device consisting of two
- 	  halves: a master and a slave. The slave device behaves identical to
-diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
-index 802c1210558f..0cc320f33cdc 100644
---- a/drivers/tty/tty_io.c
-+++ b/drivers/tty/tty_io.c
-@@ -173,6 +173,7 @@ static void free_tty_struct(struct tty_struct *tty)
- 	put_device(tty->dev);
- 	kfree(tty->write_buf);
- 	tty->magic = 0xDEADDEAD;
-+	put_user_ns(tty->owner_user_ns);
- 	kfree(tty);
- }
- 
-@@ -2180,11 +2181,19 @@ static int tty_fasync(int fd, struct file *filp, int on)
-  *	FIXME: may race normal receive processing
-  */
- 
-+int tiocsti_restrict = IS_ENABLED(CONFIG_SECURITY_TIOCSTI_RESTRICT);
-+
- static int tiocsti(struct tty_struct *tty, char __user *p)
- {
- 	char ch, mbz = 0;
- 	struct tty_ldisc *ld;
- 
-+	if (tiocsti_restrict &&
-+		!ns_capable(tty->owner_user_ns, CAP_SYS_ADMIN)) {
-+		dev_warn_ratelimited(tty->dev,
-+			"Denied TIOCSTI ioctl for non-privileged process\n");
-+		return -EPERM;
-+	}
- 	if ((current->signal->tty != tty) && !capable(CAP_SYS_ADMIN))
- 		return -EPERM;
- 	if (get_user(ch, p))
-@@ -3004,6 +3013,7 @@ struct tty_struct *alloc_tty_struct(struct tty_driver *driver, int idx)
- 	tty->index = idx;
- 	tty_line_name(driver, idx, tty->name);
- 	tty->dev = tty_get_device(tty);
-+	tty->owner_user_ns = get_user_ns(current_user_ns());
- 
- 	return tty;
- }
-diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
-index 4ac74b354801..7c2cb5b3a449 100644
---- a/drivers/usb/core/hub.c
-+++ b/drivers/usb/core/hub.c
-@@ -42,6 +42,8 @@
- #define USB_TP_TRANSMISSION_DELAY	40	/* ns */
- #define USB_TP_TRANSMISSION_DELAY_MAX	65535	/* ns */
- 
-+extern int deny_new_usb;
-+
- /* Protect struct usb_device->state and ->children members
-  * Note: Both are also protected by ->dev.sem, except that ->state can
-  * change to USB_STATE_NOTATTACHED even when the semaphore isn't held. */
-@@ -4991,6 +4993,12 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus,
- 			goto done;
- 		return;
- 	}
-+
-+	if (deny_new_usb) {
-+		dev_err(&port_dev->dev, "denied insert of USB device on port %d\n", port1);
-+		goto done;
-+	}
-+
- 	if (hub_is_superspeed(hub->hdev))
- 		unit_load = 150;
- 	else
-diff --git a/fs/exec.c b/fs/exec.c
-index c27231234764..4038334db213 100644
---- a/fs/exec.c
-+++ b/fs/exec.c
-@@ -63,6 +63,7 @@
- #include <linux/oom.h>
- #include <linux/compat.h>
- #include <linux/vmalloc.h>
-+#include <linux/random.h>
- 
- #include <linux/uaccess.h>
- #include <asm/mmu_context.h>
-@@ -276,6 +277,8 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
- 	arch_bprm_mm_init(mm, vma);
- 	up_write(&mm->mmap_sem);
- 	bprm->p = vma->vm_end - sizeof(void *);
-+	if (randomize_va_space)
-+		bprm->p ^= get_random_int() & ~PAGE_MASK;
- 	return 0;
- err:
- 	up_write(&mm->mmap_sem);
-diff --git a/fs/namei.c b/fs/namei.c
-index 671c3c1a3425..618ef0b5d000 100644
---- a/fs/namei.c
-+++ b/fs/namei.c
-@@ -877,10 +877,10 @@ static inline void put_link(struct nameidata *nd)
- 		path_put(&last->link);
- }
- 
--int sysctl_protected_symlinks __read_mostly = 0;
--int sysctl_protected_hardlinks __read_mostly = 0;
--int sysctl_protected_fifos __read_mostly;
--int sysctl_protected_regular __read_mostly;
-+int sysctl_protected_symlinks __read_mostly = 1;
-+int sysctl_protected_hardlinks __read_mostly = 1;
-+int sysctl_protected_fifos __read_mostly = 2;
-+int sysctl_protected_regular __read_mostly = 2;
- 
- /**
-  * may_follow_link - Check symlink following for unsafe situations
-diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
-index 295a7a21b774..3aed361bc0f9 100644
---- a/fs/nfs/Kconfig
-+++ b/fs/nfs/Kconfig
-@@ -195,4 +195,3 @@ config NFS_DEBUG
- 	bool
- 	depends on NFS_FS && SUNRPC_DEBUG
- 	select CRC32
--	default y
-diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
-index cb5629bd5fff..bc44606fcc48 100644
---- a/fs/proc/Kconfig
-+++ b/fs/proc/Kconfig
-@@ -41,7 +41,6 @@ config PROC_KCORE
- config PROC_VMCORE
- 	bool "/proc/vmcore support"
- 	depends on PROC_FS && CRASH_DUMP
--	default y
-         help
-         Exports the dump image of crashed kernel in ELF format.
- 
-diff --git a/fs/stat.c b/fs/stat.c
-index c38e4c2e1221..6135fbaf7298 100644
---- a/fs/stat.c
-+++ b/fs/stat.c
-@@ -40,8 +40,13 @@ void generic_fillattr(struct inode *inode, struct kstat *stat)
- 	stat->gid = inode->i_gid;
- 	stat->rdev = inode->i_rdev;
- 	stat->size = i_size_read(inode);
--	stat->atime = inode->i_atime;
--	stat->mtime = inode->i_mtime;
-+	if (is_sidechannel_device(inode) && !capable_noaudit(CAP_MKNOD)) {
-+		stat->atime = inode->i_ctime;
-+		stat->mtime = inode->i_ctime;
-+	} else {
-+		stat->atime = inode->i_atime;
-+		stat->mtime = inode->i_mtime;
-+	}
- 	stat->ctime = inode->i_ctime;
- 	stat->blksize = i_blocksize(inode);
- 	stat->blocks = inode->i_blocks;
-@@ -77,9 +82,14 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat,
- 	if (IS_AUTOMOUNT(inode))
- 		stat->attributes |= STATX_ATTR_AUTOMOUNT;
- 
--	if (inode->i_op->getattr)
--		return inode->i_op->getattr(path, stat, request_mask,
--					    query_flags);
-+	if (inode->i_op->getattr) {
-+		int retval = inode->i_op->getattr(path, stat, request_mask, query_flags);
-+		if (!retval && is_sidechannel_device(inode) && !capable_noaudit(CAP_MKNOD)) {
-+			stat->atime = stat->ctime;
-+			stat->mtime = stat->ctime;
-+		}
-+		return retval;
-+	}
- 
- 	generic_fillattr(inode, stat);
- 	return 0;
-diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
-index d99d166fd892..7a4f2854feb8 100644
---- a/fs/userfaultfd.c
-+++ b/fs/userfaultfd.c
-@@ -28,7 +28,11 @@
- #include <linux/security.h>
- #include <linux/hugetlb.h>
- 
-+#ifdef CONFIG_USERFAULTFD_UNPRIVILEGED
- int sysctl_unprivileged_userfaultfd __read_mostly = 1;
-+#else
-+int sysctl_unprivileged_userfaultfd __read_mostly;
-+#endif
- 
- static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly;
- 
-diff --git a/include/linux/cache.h b/include/linux/cache.h
-index 750621e41d1c..e7157c18c62c 100644
---- a/include/linux/cache.h
-+++ b/include/linux/cache.h
-@@ -31,6 +31,8 @@
- #define __ro_after_init __attribute__((__section__(".data..ro_after_init")))
- #endif
- 
-+#define __read_only __ro_after_init
-+
- #ifndef ____cacheline_aligned
- #define ____cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES)))
- #endif
-diff --git a/include/linux/capability.h b/include/linux/capability.h
-index ecce0f43c73a..e46306dd4401 100644
---- a/include/linux/capability.h
-+++ b/include/linux/capability.h
-@@ -208,6 +208,7 @@ extern bool has_capability_noaudit(struct task_struct *t, int cap);
- extern bool has_ns_capability_noaudit(struct task_struct *t,
- 				      struct user_namespace *ns, int cap);
- extern bool capable(int cap);
-+extern bool capable_noaudit(int cap);
- extern bool ns_capable(struct user_namespace *ns, int cap);
- extern bool ns_capable_noaudit(struct user_namespace *ns, int cap);
- extern bool ns_capable_setid(struct user_namespace *ns, int cap);
-@@ -234,6 +235,10 @@ static inline bool capable(int cap)
- {
- 	return true;
- }
-+static inline bool capable_noaudit(int cap)
-+{
-+	return true;
-+}
- static inline bool ns_capable(struct user_namespace *ns, int cap)
- {
- 	return true;
-diff --git a/include/linux/fs.h b/include/linux/fs.h
-index 0b4d8fc79e0f..6f318e089249 100644
---- a/include/linux/fs.h
-+++ b/include/linux/fs.h
-@@ -3627,4 +3627,15 @@ static inline int inode_drain_writes(struct inode *inode)
- 	return filemap_write_and_wait(inode->i_mapping);
- }
- 
-+extern int device_sidechannel_restrict;
-+
-+static inline bool is_sidechannel_device(const struct inode *inode)
-+{
-+	umode_t mode;
-+	if (!device_sidechannel_restrict)
-+		return false;
-+	mode = inode->i_mode;
-+	return ((S_ISCHR(mode) || S_ISBLK(mode)) && (mode & (S_IROTH | S_IWOTH)));
-+}
-+
- #endif /* _LINUX_FS_H */
-diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
-index a2d5d175d3c1..e91ab06119b0 100644
---- a/include/linux/fsnotify.h
-+++ b/include/linux/fsnotify.h
-@@ -233,6 +233,9 @@ static inline void fsnotify_access(struct file *file)
- 	struct inode *inode = file_inode(file);
- 	__u32 mask = FS_ACCESS;
- 
-+	if (is_sidechannel_device(inode))
-+		return;
-+
- 	if (S_ISDIR(inode->i_mode))
- 		mask |= FS_ISDIR;
- 
-@@ -249,6 +252,9 @@ static inline void fsnotify_modify(struct file *file)
- 	struct inode *inode = file_inode(file);
- 	__u32 mask = FS_MODIFY;
- 
-+	if (is_sidechannel_device(inode))
-+		return;
-+
- 	if (S_ISDIR(inode->i_mode))
- 		mask |= FS_ISDIR;
- 
-diff --git a/include/linux/gfp.h b/include/linux/gfp.h
-index 61f2f6ff9467..f9b3e3d675ae 100644
---- a/include/linux/gfp.h
-+++ b/include/linux/gfp.h
-@@ -553,9 +553,9 @@ extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
- extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
- extern unsigned long get_zeroed_page(gfp_t gfp_mask);
- 
--void *alloc_pages_exact(size_t size, gfp_t gfp_mask);
-+void *alloc_pages_exact(size_t size, gfp_t gfp_mask) __attribute__((alloc_size(1)));
- void free_pages_exact(void *virt, size_t size);
--void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask);
-+void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) __attribute__((alloc_size(2)));
- 
- #define __get_free_page(gfp_mask) \
- 		__get_free_pages((gfp_mask), 0)
-diff --git a/include/linux/highmem.h b/include/linux/highmem.h
-index ea5cdbd8c2c3..805b84d6bbca 100644
---- a/include/linux/highmem.h
-+++ b/include/linux/highmem.h
-@@ -215,6 +215,13 @@ static inline void clear_highpage(struct page *page)
- 	kunmap_atomic(kaddr);
- }
- 
-+static inline void verify_zero_highpage(struct page *page)
-+{
-+	void *kaddr = kmap_atomic(page);
-+	BUG_ON(memchr_inv(kaddr, 0, PAGE_SIZE));
-+	kunmap_atomic(kaddr);
-+}
-+
- static inline void zero_user_segments(struct page *page,
- 	unsigned start1, unsigned end1,
- 	unsigned start2, unsigned end2)
-diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
-index 89fc59dab57d..5f98e14e9470 100644
---- a/include/linux/interrupt.h
-+++ b/include/linux/interrupt.h
-@@ -540,7 +540,7 @@ extern const char * const softirq_to_name[NR_SOFTIRQS];
- 
- struct softirq_action
- {
--	void	(*action)(struct softirq_action *);
-+	void	(*action)(void);
- };
- 
- asmlinkage void do_softirq(void);
-@@ -555,7 +555,7 @@ static inline void do_softirq_own_stack(void)
- }
- #endif
- 
--extern void open_softirq(int nr, void (*action)(struct softirq_action *));
-+extern void __init open_softirq(int nr, void (*action)(void));
- extern void softirq_init(void);
- extern void __raise_softirq_irqoff(unsigned int nr);
- 
-diff --git a/include/linux/kobject_ns.h b/include/linux/kobject_ns.h
-index 069aa2ebef90..cb9e3637a620 100644
---- a/include/linux/kobject_ns.h
-+++ b/include/linux/kobject_ns.h
-@@ -45,7 +45,7 @@ struct kobj_ns_type_operations {
- 	void (*drop_ns)(void *);
- };
- 
--int kobj_ns_type_register(const struct kobj_ns_type_operations *ops);
-+int __init kobj_ns_type_register(const struct kobj_ns_type_operations *ops);
- int kobj_ns_type_registered(enum kobj_ns_type type);
- const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent);
- const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj);
-diff --git a/include/linux/mm.h b/include/linux/mm.h
-index b249d2e033aa..a4855777d1fa 100644
---- a/include/linux/mm.h
-+++ b/include/linux/mm.h
-@@ -664,7 +664,7 @@ static inline int is_vmalloc_or_module_addr(const void *x)
- }
- #endif
- 
--extern void *kvmalloc_node(size_t size, gfp_t flags, int node);
-+extern void *kvmalloc_node(size_t size, gfp_t flags, int node) __attribute__((alloc_size(1)));
- static inline void *kvmalloc(size_t size, gfp_t flags)
- {
- 	return kvmalloc_node(size, flags, NUMA_NO_NODE);
-diff --git a/include/linux/percpu.h b/include/linux/percpu.h
-index 5e76af742c80..9a6c682ec127 100644
---- a/include/linux/percpu.h
-+++ b/include/linux/percpu.h
-@@ -123,7 +123,7 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size,
- 				pcpu_fc_populate_pte_fn_t populate_pte_fn);
- #endif
- 
--extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align);
-+extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align) __attribute__((alloc_size(1)));
- extern bool __is_kernel_percpu_address(unsigned long addr, unsigned long *can_addr);
- extern bool is_kernel_percpu_address(unsigned long addr);
- 
-@@ -131,8 +131,8 @@ extern bool is_kernel_percpu_address(unsigned long addr);
- extern void __init setup_per_cpu_areas(void);
- #endif
- 
--extern void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp);
--extern void __percpu *__alloc_percpu(size_t size, size_t align);
-+extern void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp) __attribute__((alloc_size(1)));
-+extern void __percpu *__alloc_percpu(size_t size, size_t align) __attribute__((alloc_size(1)));
- extern void free_percpu(void __percpu *__pdata);
- extern phys_addr_t per_cpu_ptr_to_phys(void *addr);
- 
-diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
-index 68ccc5b1913b..a7565ea44938 100644
---- a/include/linux/perf_event.h
-+++ b/include/linux/perf_event.h
-@@ -1241,6 +1241,11 @@ extern int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
- int perf_event_max_stack_handler(struct ctl_table *table, int write,
- 				 void __user *buffer, size_t *lenp, loff_t *ppos);
- 
-+static inline bool perf_paranoid_any(void)
-+{
-+	return sysctl_perf_event_paranoid > 2;
-+}
-+
- static inline bool perf_paranoid_tracepoint_raw(void)
- {
- 	return sysctl_perf_event_paranoid > -1;
-diff --git a/include/linux/slab.h b/include/linux/slab.h
-index 4d2a2fa55ed5..be3a8234edde 100644
---- a/include/linux/slab.h
-+++ b/include/linux/slab.h
-@@ -184,8 +184,8 @@ void memcg_deactivate_kmem_caches(struct mem_cgroup *, struct mem_cgroup *);
- /*
-  * Common kmalloc functions provided by all allocators
-  */
--void * __must_check __krealloc(const void *, size_t, gfp_t);
--void * __must_check krealloc(const void *, size_t, gfp_t);
-+void * __must_check __krealloc(const void *, size_t, gfp_t) __attribute__((alloc_size(2)));
-+void * __must_check krealloc(const void *, size_t, gfp_t) __attribute((alloc_size(2)));
- void kfree(const void *);
- void kzfree(const void *);
- size_t __ksize(const void *);
-@@ -390,7 +390,7 @@ static __always_inline unsigned int kmalloc_index(size_t size)
- }
- #endif /* !CONFIG_SLOB */
- 
--void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __malloc;
-+void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __malloc __attribute__((alloc_size(1)));
- void *kmem_cache_alloc(struct kmem_cache *, gfp_t flags) __assume_slab_alignment __malloc;
- void kmem_cache_free(struct kmem_cache *, void *);
- 
-@@ -414,7 +414,7 @@ static __always_inline void kfree_bulk(size_t size, void **p)
- }
- 
- #ifdef CONFIG_NUMA
--void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment __malloc;
-+void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment __malloc __attribute__((alloc_size(1)));
- void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node) __assume_slab_alignment __malloc;
- #else
- static __always_inline void *__kmalloc_node(size_t size, gfp_t flags, int node)
-@@ -539,7 +539,7 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
-  *	Try really hard to succeed the allocation but fail
-  *	eventually.
-  */
--static __always_inline void *kmalloc(size_t size, gfp_t flags)
-+static __always_inline __attribute__((alloc_size(1))) void *kmalloc(size_t size, gfp_t flags)
- {
- 	if (__builtin_constant_p(size)) {
- #ifndef CONFIG_SLOB
-@@ -581,7 +581,7 @@ static __always_inline unsigned int kmalloc_size(unsigned int n)
- 	return 0;
- }
- 
--static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
-+static __always_inline __attribute__((alloc_size(1))) void *kmalloc_node(size_t size, gfp_t flags, int node)
- {
- #ifndef CONFIG_SLOB
- 	if (__builtin_constant_p(size) &&
-diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
-index d2153789bd9f..97da977d6060 100644
---- a/include/linux/slub_def.h
-+++ b/include/linux/slub_def.h
-@@ -121,6 +121,11 @@ struct kmem_cache {
- 	unsigned long random;
- #endif
- 
-+#ifdef CONFIG_SLAB_CANARY
-+	unsigned long random_active;
-+	unsigned long random_inactive;
-+#endif
-+
- #ifdef CONFIG_NUMA
- 	/*
- 	 * Defragmentation by allocating from a remote node.
-diff --git a/include/linux/string.h b/include/linux/string.h
-index b6ccdc2c7f02..6d66b8740f90 100644
---- a/include/linux/string.h
-+++ b/include/linux/string.h
-@@ -268,10 +268,16 @@ void __read_overflow2(void) __compiletime_error("detected read beyond size of ob
- void __read_overflow3(void) __compiletime_error("detected read beyond size of object passed as 3rd parameter");
- void __write_overflow(void) __compiletime_error("detected write beyond size of object passed as 1st parameter");
- 
-+#ifdef CONFIG_FORTIFY_SOURCE_STRICT_STRING
-+#define __string_size(p) __builtin_object_size(p, 1)
-+#else
-+#define __string_size(p) __builtin_object_size(p, 0)
-+#endif
-+
- #if !defined(__NO_FORTIFY) && defined(__OPTIMIZE__) && defined(CONFIG_FORTIFY_SOURCE)
- __FORTIFY_INLINE char *strncpy(char *p, const char *q, __kernel_size_t size)
- {
--	size_t p_size = __builtin_object_size(p, 0);
-+	size_t p_size = __string_size(p);
- 	if (__builtin_constant_p(size) && p_size < size)
- 		__write_overflow();
- 	if (p_size < size)
-@@ -281,7 +287,7 @@ __FORTIFY_INLINE char *strncpy(char *p, const char *q, __kernel_size_t size)
- 
- __FORTIFY_INLINE char *strcat(char *p, const char *q)
- {
--	size_t p_size = __builtin_object_size(p, 0);
-+	size_t p_size = __string_size(p);
- 	if (p_size == (size_t)-1)
- 		return __builtin_strcat(p, q);
- 	if (strlcat(p, q, p_size) >= p_size)
-@@ -292,7 +298,7 @@ __FORTIFY_INLINE char *strcat(char *p, const char *q)
- __FORTIFY_INLINE __kernel_size_t strlen(const char *p)
- {
- 	__kernel_size_t ret;
--	size_t p_size = __builtin_object_size(p, 0);
-+	size_t p_size = __string_size(p);
- 
- 	/* Work around gcc excess stack consumption issue */
- 	if (p_size == (size_t)-1 ||
-@@ -307,7 +313,7 @@ __FORTIFY_INLINE __kernel_size_t strlen(const char *p)
- extern __kernel_size_t __real_strnlen(const char *, __kernel_size_t) __RENAME(strnlen);
- __FORTIFY_INLINE __kernel_size_t strnlen(const char *p, __kernel_size_t maxlen)
- {
--	size_t p_size = __builtin_object_size(p, 0);
-+	size_t p_size = __string_size(p);
- 	__kernel_size_t ret = __real_strnlen(p, maxlen < p_size ? maxlen : p_size);
- 	if (p_size <= ret && maxlen != ret)
- 		fortify_panic(__func__);
-@@ -319,8 +325,8 @@ extern size_t __real_strlcpy(char *, const char *, size_t) __RENAME(strlcpy);
- __FORTIFY_INLINE size_t strlcpy(char *p, const char *q, size_t size)
- {
- 	size_t ret;
--	size_t p_size = __builtin_object_size(p, 0);
--	size_t q_size = __builtin_object_size(q, 0);
-+	size_t p_size = __string_size(p);
-+	size_t q_size = __string_size(q);
- 	if (p_size == (size_t)-1 && q_size == (size_t)-1)
- 		return __real_strlcpy(p, q, size);
- 	ret = strlen(q);
-@@ -340,8 +346,8 @@ __FORTIFY_INLINE size_t strlcpy(char *p, const char *q, size_t size)
- __FORTIFY_INLINE char *strncat(char *p, const char *q, __kernel_size_t count)
- {
- 	size_t p_len, copy_len;
--	size_t p_size = __builtin_object_size(p, 0);
--	size_t q_size = __builtin_object_size(q, 0);
-+	size_t p_size = __string_size(p);
-+	size_t q_size = __string_size(q);
- 	if (p_size == (size_t)-1 && q_size == (size_t)-1)
- 		return __builtin_strncat(p, q, count);
- 	p_len = strlen(p);
-@@ -454,8 +460,8 @@ __FORTIFY_INLINE void *kmemdup(const void *p, size_t size, gfp_t gfp)
- /* defined after fortified strlen and memcpy to reuse them */
- __FORTIFY_INLINE char *strcpy(char *p, const char *q)
- {
--	size_t p_size = __builtin_object_size(p, 0);
--	size_t q_size = __builtin_object_size(q, 0);
-+	size_t p_size = __string_size(p);
-+	size_t q_size = __string_size(q);
- 	if (p_size == (size_t)-1 && q_size == (size_t)-1)
- 		return __builtin_strcpy(p, q);
- 	memcpy(p, q, strlen(q) + 1);
-diff --git a/include/linux/tty.h b/include/linux/tty.h
-index bfa4e2ee94a9..3e18d583fc8d 100644
---- a/include/linux/tty.h
-+++ b/include/linux/tty.h
-@@ -14,6 +14,7 @@
- #include <uapi/linux/tty.h>
- #include <linux/rwsem.h>
- #include <linux/llist.h>
-+#include <linux/user_namespace.h>
- 
- 
- /*
-@@ -336,6 +337,7 @@ struct tty_struct {
- 	/* If the tty has a pending do_SAK, queue it here - akpm */
- 	struct work_struct SAK_work;
- 	struct tty_port *port;
-+	struct user_namespace *owner_user_ns;
- } __randomize_layout;
- 
- /* Each of a tty's open files has private_data pointing to tty_file_private */
-@@ -345,6 +347,8 @@ struct tty_file_private {
- 	struct list_head list;
- };
- 
-+extern int tiocsti_restrict;
-+
- /* tty magic number */
- #define TTY_MAGIC		0x5401
- 
-diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
-index 4e7809408073..0b58a5176a25 100644
---- a/include/linux/vmalloc.h
-+++ b/include/linux/vmalloc.h
-@@ -88,19 +88,19 @@ static inline void vmalloc_init(void)
- static inline unsigned long vmalloc_nr_pages(void) { return 0; }
- #endif
- 
--extern void *vmalloc(unsigned long size);
--extern void *vzalloc(unsigned long size);
--extern void *vmalloc_user(unsigned long size);
--extern void *vmalloc_node(unsigned long size, int node);
--extern void *vzalloc_node(unsigned long size, int node);
--extern void *vmalloc_exec(unsigned long size);
--extern void *vmalloc_32(unsigned long size);
--extern void *vmalloc_32_user(unsigned long size);
--extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot);
-+extern void *vmalloc(unsigned long size) __attribute__((alloc_size(1)));
-+extern void *vzalloc(unsigned long size) __attribute__((alloc_size(1)));
-+extern void *vmalloc_user(unsigned long size) __attribute__((alloc_size(1)));
-+extern void *vmalloc_node(unsigned long size, int node) __attribute__((alloc_size(1)));
-+extern void *vzalloc_node(unsigned long size, int node) __attribute__((alloc_size(1)));
-+extern void *vmalloc_exec(unsigned long size) __attribute__((alloc_size(1)));
-+extern void *vmalloc_32(unsigned long size) __attribute__((alloc_size(1)));
-+extern void *vmalloc_32_user(unsigned long size) __attribute__((alloc_size(1)));
-+extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) __attribute__((alloc_size(1)));
- extern void *__vmalloc_node_range(unsigned long size, unsigned long align,
- 			unsigned long start, unsigned long end, gfp_t gfp_mask,
- 			pgprot_t prot, unsigned long vm_flags, int node,
--			const void *caller);
-+			const void *caller) __attribute__((alloc_size(1)));
- #ifndef CONFIG_MMU
- extern void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags);
- static inline void *__vmalloc_node_flags_caller(unsigned long size, int node,
-diff --git a/init/Kconfig b/init/Kconfig
-index b4daad2bac23..c1016fd960f0 100644
---- a/init/Kconfig
-+++ b/init/Kconfig
-@@ -381,6 +381,7 @@ config USELIB
- config AUDIT
- 	bool "Auditing support"
- 	depends on NET
-+	default y
- 	help
- 	  Enable auditing infrastructure that can be used with another
- 	  kernel subsystem, such as SELinux (which requires this for
-@@ -1118,6 +1119,22 @@ config USER_NS
- 
- 	  If unsure, say N.
- 
-+config USER_NS_UNPRIVILEGED
-+	bool "Allow unprivileged users to create namespaces"
-+	depends on USER_NS
-+	default n
-+	help
-+	  When disabled, unprivileged users will not be able to create
-+	  new namespaces. Allowing users to create their own namespaces
-+	  has been part of several recent local privilege escalation
-+	  exploits, so if you need user namespaces but are
-+	  paranoid^Wsecurity-conscious you want to disable this.
-+
-+	  This setting can be overridden at runtime via the
-+	  kernel.unprivileged_userns_clone sysctl.
-+
-+	  If unsure, say N.
-+
- config PID_NS
- 	bool "PID Namespaces"
- 	default y
-@@ -1538,8 +1555,7 @@ config SHMEM
- 	  which may be appropriate on small systems without swap.
- 
- config AIO
--	bool "Enable AIO support" if EXPERT
--	default y
-+	bool "Enable AIO support"
- 	help
- 	  This option enables POSIX asynchronous I/O which may by used
- 	  by some high performance threaded applications. Disabling
-@@ -1650,6 +1666,23 @@ config USERFAULTFD
- 	  Enable the userfaultfd() system call that allows to intercept and
- 	  handle page faults in userland.
- 
-+config USERFAULTFD_UNPRIVILEGED
-+	bool "Allow unprivileged users to use the userfaultfd syscall"
-+	depends on USERFAULTFD
-+	default n
-+	help
-+	  When disabled, unprivileged users will not be able to use the userfaultfd
-+	  syscall. Userfaultfd provide attackers with a way to stall a kernel
-+	  thread in the middle of memory accesses from userspace by initiating an
-+	  access on an unmapped page. To avoid various heap grooming and heap
-+	  spraying techniques for exploiting use-after-free flaws this should be
-+	  disabled by default.
-+
-+	  This setting can be overridden at runtime via the
-+	  vm.unprivileged_userfaultfd sysctl.
-+
-+	  If unsure, say N.
-+
- config ARCH_HAS_MEMBARRIER_CALLBACKS
- 	bool
- 
-@@ -1762,7 +1795,7 @@ config VM_EVENT_COUNTERS
- 
- config SLUB_DEBUG
- 	default y
--	bool "Enable SLUB debugging support" if EXPERT
-+	bool "Enable SLUB debugging support"
- 	depends on SLUB && SYSFS
- 	help
- 	  SLUB has extensive debug support features. Disabling these can
-@@ -1786,7 +1819,6 @@ config SLUB_MEMCG_SYSFS_ON
- 
- config COMPAT_BRK
- 	bool "Disable heap randomization"
--	default y
- 	help
- 	  Randomizing heap placement makes heap exploits harder, but it
- 	  also breaks ancient binaries (including anything libc5 based).
-@@ -1833,7 +1865,6 @@ endchoice
- 
- config SLAB_MERGE_DEFAULT
- 	bool "Allow slab caches to be merged"
--	default y
- 	help
- 	  For reduced kernel memory fragmentation, slab caches can be
- 	  merged when they share the same size and other characteristics.
-@@ -1846,9 +1877,9 @@ config SLAB_MERGE_DEFAULT
- 	  command line.
- 
- config SLAB_FREELIST_RANDOM
--	default n
- 	depends on SLAB || SLUB
- 	bool "SLAB freelist randomization"
-+	default y
- 	help
- 	  Randomizes the freelist order used on creating new pages. This
- 	  security feature reduces the predictability of the kernel slab
-@@ -1857,12 +1888,30 @@ config SLAB_FREELIST_RANDOM
- config SLAB_FREELIST_HARDENED
- 	bool "Harden slab freelist metadata"
- 	depends on SLUB
-+	default y
- 	help
- 	  Many kernel heap attacks try to target slab cache metadata and
- 	  other infrastructure. This options makes minor performance
- 	  sacrifices to harden the kernel slab allocator against common
- 	  freelist exploit methods.
- 
-+config SLAB_CANARY
-+	depends on SLUB
-+	depends on !SLAB_MERGE_DEFAULT
-+	bool "SLAB canaries"
-+	default y
-+	help
-+	  Place canaries at the end of kernel slab allocations, sacrificing
-+	  some performance and memory usage for security.
-+
-+	  Canaries can detect some forms of heap corruption when allocations
-+	  are freed and as part of the HARDENED_USERCOPY feature. It provides
-+	  basic use-after-free detection for HARDENED_USERCOPY.
-+
-+	  Canaries absorb small overflows (rendering them harmless), mitigate
-+	  non-NUL terminated C string overflows on 64-bit via a guaranteed zero
-+	  byte and provide basic double-free detection.
-+
- config SHUFFLE_PAGE_ALLOCATOR
- 	bool "Page allocator randomization"
- 	default SLAB_FREELIST_RANDOM && ACPI_NUMA
-diff --git a/kernel/audit.c b/kernel/audit.c
-index da8dc0db5bd3..62dda6867dd9 100644
---- a/kernel/audit.c
-+++ b/kernel/audit.c
-@@ -1628,6 +1628,9 @@ static int __init audit_enable(char *str)
- 
- 	if (audit_default == AUDIT_OFF)
- 		audit_initialized = AUDIT_DISABLED;
-+	else if (!audit_ever_enabled)
-+		audit_initialized = AUDIT_UNINITIALIZED;
-+
- 	if (audit_set_enabled(audit_default))
- 		pr_err("audit: error setting audit state (%d)\n",
- 		       audit_default);
-diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
-index ef0e1e3e66f4..d1ddc8695ab8 100644
---- a/kernel/bpf/core.c
-+++ b/kernel/bpf/core.c
-@@ -519,7 +519,7 @@ void bpf_prog_kallsyms_del_all(struct bpf_prog *fp)
- #ifdef CONFIG_BPF_JIT
- /* All BPF JIT sysctl knobs here. */
- int bpf_jit_enable   __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON);
--int bpf_jit_harden   __read_mostly;
-+int bpf_jit_harden   __read_mostly = 2;
- int bpf_jit_kallsyms __read_mostly;
- long bpf_jit_limit   __read_mostly;
- 
-diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
-index ace1cfaa24b6..37e08fc44a6b 100644
---- a/kernel/bpf/syscall.c
-+++ b/kernel/bpf/syscall.c
-@@ -39,7 +39,7 @@ static DEFINE_SPINLOCK(prog_idr_lock);
- static DEFINE_IDR(map_idr);
- static DEFINE_SPINLOCK(map_idr_lock);
- 
--int sysctl_unprivileged_bpf_disabled __read_mostly;
-+int sysctl_unprivileged_bpf_disabled __read_mostly = 1;
- 
- static const struct bpf_map_ops * const bpf_map_types[] = {
- #define BPF_PROG_TYPE(_id, _ops)
-diff --git a/kernel/capability.c b/kernel/capability.c
-index 1444f3954d75..8cc9dd7992f2 100644
---- a/kernel/capability.c
-+++ b/kernel/capability.c
-@@ -449,6 +449,12 @@ bool capable(int cap)
- 	return ns_capable(&init_user_ns, cap);
- }
- EXPORT_SYMBOL(capable);
-+
-+bool capable_noaudit(int cap)
-+{
-+	return ns_capable_noaudit(&init_user_ns, cap);
-+}
-+EXPORT_SYMBOL(capable_noaudit);
- #endif /* CONFIG_MULTIUSER */
- 
- /**
-diff --git a/kernel/events/core.c b/kernel/events/core.c
-index 6c829e22bad3..3063a7239a94 100644
---- a/kernel/events/core.c
-+++ b/kernel/events/core.c
-@@ -398,8 +398,13 @@ static cpumask_var_t perf_online_mask;
-  *   0 - disallow raw tracepoint access for unpriv
-  *   1 - disallow cpu events for unpriv
-  *   2 - disallow kernel profiling for unpriv
-+ *   3 - disallow all unpriv perf event use
-  */
-+#ifdef CONFIG_SECURITY_PERF_EVENTS_RESTRICT
-+int sysctl_perf_event_paranoid __read_mostly = 3;
-+#else
- int sysctl_perf_event_paranoid __read_mostly = 2;
-+#endif
- 
- /* Minimum for 512 kiB + 1 user control page */
- int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024); /* 'free' kiB per user */
-@@ -10895,6 +10900,9 @@ SYSCALL_DEFINE5(perf_event_open,
- 	if (flags & ~PERF_FLAG_ALL)
- 		return -EINVAL;
- 
-+	if (perf_paranoid_any() && !capable(CAP_SYS_ADMIN))
-+		return -EACCES;
-+
- 	err = perf_copy_attr(attr_uptr, &attr);
- 	if (err)
- 		return err;
-diff --git a/kernel/fork.c b/kernel/fork.c
-index 755d8160e001..ed909f8050b2 100644
---- a/kernel/fork.c
-+++ b/kernel/fork.c
-@@ -106,6 +106,11 @@
- 
- #define CREATE_TRACE_POINTS
- #include <trace/events/task.h>
-+#ifdef CONFIG_USER_NS
-+extern int unprivileged_userns_clone;
-+#else
-+#define unprivileged_userns_clone 0
-+#endif
- 
- /*
-  * Minimum number of threads to boot the kernel
-@@ -1779,6 +1784,10 @@ static __latent_entropy struct task_struct *copy_process(
- 	if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
- 		return ERR_PTR(-EINVAL);
- 
-+	if ((clone_flags & CLONE_NEWUSER) && !unprivileged_userns_clone)
-+		if (!capable(CAP_SYS_ADMIN))
-+			return ERR_PTR(-EPERM);
-+
- 	/*
- 	 * Thread groups must share signals as well, and detached threads
- 	 * can only be started up within the thread group.
-@@ -2836,6 +2845,12 @@ int ksys_unshare(unsigned long unshare_flags)
- 	if (unshare_flags & CLONE_NEWNS)
- 		unshare_flags |= CLONE_FS;
- 
-+	if ((unshare_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) {
-+		err = -EPERM;
-+		if (!capable(CAP_SYS_ADMIN))
-+			goto bad_unshare_out;
-+	}
-+
- 	err = check_unshare_flags(unshare_flags);
- 	if (err)
- 		goto bad_unshare_out;
-diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
-index 477b4eb44af5..db28cc3fd301 100644
---- a/kernel/rcu/tiny.c
-+++ b/kernel/rcu/tiny.c
-@@ -74,7 +74,7 @@ void rcu_sched_clock_irq(int user)
- }
- 
- /* Invoke the RCU callbacks whose grace period has elapsed.  */
--static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused)
-+static __latent_entropy void rcu_process_callbacks(void)
- {
- 	struct rcu_head *next, *list;
- 	unsigned long flags;
-diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
-index 81105141b6a8..38f04f653d29 100644
---- a/kernel/rcu/tree.c
-+++ b/kernel/rcu/tree.c
-@@ -2381,7 +2381,7 @@ static __latent_entropy void rcu_core(void)
- 	trace_rcu_utilization(TPS("End RCU core"));
- }
- 
--static void rcu_core_si(struct softirq_action *h)
-+static void rcu_core_si(void)
- {
- 	rcu_core();
- }
-diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
-index c87a798d1456..341c384cc597 100644
---- a/kernel/sched/fair.c
-+++ b/kernel/sched/fair.c
-@@ -9889,7 +9889,7 @@ int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
-  * run_rebalance_domains is triggered when needed from the scheduler tick.
-  * Also triggered for nohz idle balancing (with nohz_balancing_kick set).
-  */
--static __latent_entropy void run_rebalance_domains(struct softirq_action *h)
-+static __latent_entropy void run_rebalance_domains(void)
- {
- 	struct rq *this_rq = this_rq();
- 	enum cpu_idle_type idle = this_rq->idle_balance ?
-diff --git a/kernel/softirq.c b/kernel/softirq.c
-index 0427a86743a4..5e6a9b4ccb41 100644
---- a/kernel/softirq.c
-+++ b/kernel/softirq.c
-@@ -52,7 +52,7 @@ DEFINE_PER_CPU_ALIGNED(irq_cpustat_t, irq_stat);
- EXPORT_PER_CPU_SYMBOL(irq_stat);
- #endif
- 
--static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
-+static struct softirq_action softirq_vec[NR_SOFTIRQS] __ro_after_init __aligned(PAGE_SIZE);
- 
- DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
- 
-@@ -289,7 +289,7 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
- 		kstat_incr_softirqs_this_cpu(vec_nr);
- 
- 		trace_softirq_entry(vec_nr);
--		h->action(h);
-+		h->action();
- 		trace_softirq_exit(vec_nr);
- 		if (unlikely(prev_count != preempt_count())) {
- 			pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
-@@ -452,7 +452,7 @@ void __raise_softirq_irqoff(unsigned int nr)
- 	or_softirq_pending(1UL << nr);
- }
- 
--void open_softirq(int nr, void (*action)(struct softirq_action *))
-+void __init open_softirq(int nr, void (*action)(void))
- {
- 	softirq_vec[nr].action = action;
- }
-@@ -498,8 +498,7 @@ void __tasklet_hi_schedule(struct tasklet_struct *t)
- }
- EXPORT_SYMBOL(__tasklet_hi_schedule);
- 
--static void tasklet_action_common(struct softirq_action *a,
--				  struct tasklet_head *tl_head,
-+static void tasklet_action_common(struct tasklet_head *tl_head,
- 				  unsigned int softirq_nr)
- {
- 	struct tasklet_struct *list;
-@@ -536,14 +535,14 @@ static void tasklet_action_common(struct softirq_action *a,
- 	}
- }
- 
--static __latent_entropy void tasklet_action(struct softirq_action *a)
-+static __latent_entropy void tasklet_action(void)
- {
--	tasklet_action_common(a, this_cpu_ptr(&tasklet_vec), TASKLET_SOFTIRQ);
-+	tasklet_action_common(this_cpu_ptr(&tasklet_vec), TASKLET_SOFTIRQ);
- }
- 
--static __latent_entropy void tasklet_hi_action(struct softirq_action *a)
-+static __latent_entropy void tasklet_hi_action(void)
- {
--	tasklet_action_common(a, this_cpu_ptr(&tasklet_hi_vec), HI_SOFTIRQ);
-+	tasklet_action_common(this_cpu_ptr(&tasklet_hi_vec), HI_SOFTIRQ);
- }
- 
- void tasklet_init(struct tasklet_struct *t,
-diff --git a/kernel/sysctl.c b/kernel/sysctl.c
-index 70665934d53e..8ea67d08b926 100644
---- a/kernel/sysctl.c
-+++ b/kernel/sysctl.c
-@@ -68,6 +68,7 @@
- #include <linux/bpf.h>
- #include <linux/mount.h>
- #include <linux/userfaultfd_k.h>
-+#include <linux/tty.h>
- 
- #include "../lib/kstrtox.h"
- 
-@@ -104,12 +105,19 @@
- #if defined(CONFIG_SYSCTL)
- 
- /* External variables not in a header file. */
-+#if IS_ENABLED(CONFIG_USB)
-+int deny_new_usb __read_mostly = 0;
-+EXPORT_SYMBOL(deny_new_usb);
-+#endif
- extern int suid_dumpable;
- #ifdef CONFIG_COREDUMP
- extern int core_uses_pid;
- extern char core_pattern[];
- extern unsigned int core_pipe_limit;
- #endif
-+#ifdef CONFIG_USER_NS
-+extern int unprivileged_userns_clone;
-+#endif
- extern int pid_max;
- extern int pid_max_min, pid_max_max;
- extern int percpu_pagelist_fraction;
-@@ -121,32 +129,32 @@ extern int sysctl_nr_trim_pages;
- 
- /* Constants used for minimum and  maximum */
- #ifdef CONFIG_LOCKUP_DETECTOR
--static int sixty = 60;
-+static int sixty __read_only = 60;
- #endif
- 
--static int __maybe_unused neg_one = -1;
--static int __maybe_unused two = 2;
--static int __maybe_unused four = 4;
--static unsigned long zero_ul;
--static unsigned long one_ul = 1;
--static unsigned long long_max = LONG_MAX;
--static int one_hundred = 100;
--static int one_thousand = 1000;
-+static int __maybe_unused neg_one __read_only = -1;
-+static int __maybe_unused two __read_only = 2;
-+static int __maybe_unused four __read_only = 4;
-+static unsigned long zero_ul __read_only;
-+static unsigned long one_ul __read_only = 1;
-+static unsigned long long_max __read_only = LONG_MAX;
-+static int one_hundred __read_only = 100;
-+static int one_thousand __read_only = 1000;
- #ifdef CONFIG_PRINTK
--static int ten_thousand = 10000;
-+static int ten_thousand __read_only = 10000;
- #endif
- #ifdef CONFIG_PERF_EVENTS
--static int six_hundred_forty_kb = 640 * 1024;
-+static int six_hundred_forty_kb __read_only = 640 * 1024;
- #endif
- 
- /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
--static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
-+static unsigned long dirty_bytes_min __read_only = 2 * PAGE_SIZE;
- 
- /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
--static int maxolduid = 65535;
--static int minolduid;
-+static int maxolduid __read_only = 65535;
-+static int minolduid __read_only;
- 
--static int ngroups_max = NGROUPS_MAX;
-+static int ngroups_max __read_only = NGROUPS_MAX;
- static const int cap_last_cap = CAP_LAST_CAP;
- 
- /*
-@@ -154,9 +162,12 @@ static const int cap_last_cap = CAP_LAST_CAP;
-  * and hung_task_check_interval_secs
-  */
- #ifdef CONFIG_DETECT_HUNG_TASK
--static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
-+static unsigned long hung_task_timeout_max __read_only = (LONG_MAX/HZ);
- #endif
- 
-+int device_sidechannel_restrict __read_mostly = 1;
-+EXPORT_SYMBOL(device_sidechannel_restrict);
-+
- #ifdef CONFIG_INOTIFY_USER
- #include <linux/inotify.h>
- #endif
-@@ -301,19 +312,19 @@ static struct ctl_table sysctl_base_table[] = {
- };
- 
- #ifdef CONFIG_SCHED_DEBUG
--static int min_sched_granularity_ns = 100000;		/* 100 usecs */
--static int max_sched_granularity_ns = NSEC_PER_SEC;	/* 1 second */
--static int min_wakeup_granularity_ns;			/* 0 usecs */
--static int max_wakeup_granularity_ns = NSEC_PER_SEC;	/* 1 second */
-+static int min_sched_granularity_ns __read_only = 100000;		/* 100 usecs */
-+static int max_sched_granularity_ns __read_only = NSEC_PER_SEC;	/* 1 second */
-+static int min_wakeup_granularity_ns __read_only;			/* 0 usecs */
-+static int max_wakeup_granularity_ns __read_only = NSEC_PER_SEC;	/* 1 second */
- #ifdef CONFIG_SMP
--static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
--static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
-+static int min_sched_tunable_scaling __read_only = SCHED_TUNABLESCALING_NONE;
-+static int max_sched_tunable_scaling __read_only = SCHED_TUNABLESCALING_END-1;
- #endif /* CONFIG_SMP */
- #endif /* CONFIG_SCHED_DEBUG */
- 
- #ifdef CONFIG_COMPACTION
--static int min_extfrag_threshold;
--static int max_extfrag_threshold = 1000;
-+static int min_extfrag_threshold __read_only;
-+static int max_extfrag_threshold __read_only = 1000;
- #endif
- 
- static struct ctl_table kern_table[] = {
-@@ -546,6 +557,15 @@ static struct ctl_table kern_table[] = {
- 		.proc_handler	= proc_dointvec,
- 	},
- #endif
-+#ifdef CONFIG_USER_NS
-+	{
-+		.procname	= "unprivileged_userns_clone",
-+		.data		= &unprivileged_userns_clone,
-+		.maxlen		= sizeof(int),
-+		.mode		= 0644,
-+		.proc_handler	= proc_dointvec,
-+	},
-+#endif
- #ifdef CONFIG_PROC_SYSCTL
- 	{
- 		.procname	= "tainted",
-@@ -901,6 +921,37 @@ static struct ctl_table kern_table[] = {
- 		.extra1		= SYSCTL_ZERO,
- 		.extra2		= &two,
- 	},
-+#endif
-+#if defined CONFIG_TTY
-+	{
-+		.procname	= "tiocsti_restrict",
-+		.data		= &tiocsti_restrict,
-+		.maxlen		= sizeof(int),
-+		.mode		= 0644,
-+		.proc_handler	= proc_dointvec_minmax_sysadmin,
-+		.extra1		= SYSCTL_ZERO,
-+		.extra2		= SYSCTL_ONE,
-+	},
-+#endif
-+	{
-+		.procname	= "device_sidechannel_restrict",
-+		.data		= &device_sidechannel_restrict,
-+		.maxlen		= sizeof(int),
-+		.mode		= 0644,
-+		.proc_handler	= proc_dointvec_minmax_sysadmin,
-+		.extra1		= SYSCTL_ZERO,
-+		.extra2		= SYSCTL_ONE,
-+	},
-+#if IS_ENABLED(CONFIG_USB)
-+	{
-+		.procname	= "deny_new_usb",
-+		.data		= &deny_new_usb,
-+		.maxlen		= sizeof(int),
-+		.mode		= 0644,
-+		.proc_handler	= proc_dointvec_minmax_sysadmin,
-+		.extra1		= SYSCTL_ZERO,
-+		.extra2		= SYSCTL_ONE,
-+	},
- #endif
- 	{
- 		.procname	= "ngroups_max",
-diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
-index 7f31932216a1..9ede224fc81f 100644
---- a/kernel/time/hrtimer.c
-+++ b/kernel/time/hrtimer.c
-@@ -1583,7 +1583,7 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now,
- 	}
- }
- 
--static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h)
-+static __latent_entropy void hrtimer_run_softirq(void)
- {
- 	struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
- 	unsigned long flags;
-diff --git a/kernel/time/timer.c b/kernel/time/timer.c
-index 4820823515e9..1a61e5aa87ae 100644
---- a/kernel/time/timer.c
-+++ b/kernel/time/timer.c
-@@ -1779,7 +1779,7 @@ static inline void __run_timers(struct timer_base *base)
- /*
-  * This function runs timers and the timer-tq in bottom half context.
-  */
--static __latent_entropy void run_timer_softirq(struct softirq_action *h)
-+static __latent_entropy void run_timer_softirq(void)
- {
- 	struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
- 
-diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
-index 8eadadc478f9..c36ecd19562c 100644
---- a/kernel/user_namespace.c
-+++ b/kernel/user_namespace.c
-@@ -21,6 +21,13 @@
- #include <linux/bsearch.h>
- #include <linux/sort.h>
- 
-+/* sysctl */
-+#ifdef CONFIG_USER_NS_UNPRIVILEGED
-+int unprivileged_userns_clone = 1;
-+#else
-+int unprivileged_userns_clone;
-+#endif
-+
- static struct kmem_cache *user_ns_cachep __read_mostly;
- static DEFINE_MUTEX(userns_state_mutex);
- 
-diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
-index 93d97f9b0157..fb923cae2120 100644
---- a/lib/Kconfig.debug
-+++ b/lib/Kconfig.debug
-@@ -352,6 +352,9 @@ config SECTION_MISMATCH_WARN_ONLY
- 
- 	  If unsure, say Y.
- 
-+config DEBUG_WRITABLE_FUNCTION_POINTERS_VERBOSE
-+	bool "Enable verbose reporting of writable function pointers"
-+
- #
- # Select this config option from the architecture Kconfig, if it
- # is preferred to always offer frame pointers as a config
-@@ -974,6 +977,7 @@ endmenu # "Debug lockups and hangs"
- 
- config PANIC_ON_OOPS
- 	bool "Panic on Oops"
-+	default y
- 	help
- 	  Say Y here to enable the kernel to panic when it oopses. This
- 	  has the same effect as setting oops=panic on the kernel command
-@@ -983,7 +987,7 @@ config PANIC_ON_OOPS
- 	  anything erroneous after an oops which could result in data
- 	  corruption or other issues.
- 
--	  Say N if unsure.
-+	  Say Y if unsure.
- 
- config PANIC_ON_OOPS_VALUE
- 	int
-@@ -1352,6 +1356,7 @@ config DEBUG_BUGVERBOSE
- config DEBUG_LIST
- 	bool "Debug linked list manipulation"
- 	depends on DEBUG_KERNEL || BUG_ON_DATA_CORRUPTION
-+	default y
- 	help
- 	  Enable this to turn on extended checks in the linked-list
- 	  walking routines.
-@@ -2073,6 +2078,7 @@ config MEMTEST
- config BUG_ON_DATA_CORRUPTION
- 	bool "Trigger a BUG when data corruption is detected"
- 	select DEBUG_LIST
-+	default y
- 	help
- 	  Select this option if the kernel should BUG when it encounters
- 	  data corruption in kernel memory structures when they get checked
-@@ -2112,6 +2118,7 @@ config STRICT_DEVMEM
- config IO_STRICT_DEVMEM
- 	bool "Filter I/O access to /dev/mem"
- 	depends on STRICT_DEVMEM
-+	default y
- 	---help---
- 	  If this option is disabled, you allow userspace (root) access to all
- 	  io-memory regardless of whether a driver is actively using that
-diff --git a/lib/irq_poll.c b/lib/irq_poll.c
-index 2f17b488d58e..b6e7996a0058 100644
---- a/lib/irq_poll.c
-+++ b/lib/irq_poll.c
-@@ -75,7 +75,7 @@ void irq_poll_complete(struct irq_poll *iop)
- }
- EXPORT_SYMBOL(irq_poll_complete);
- 
--static void __latent_entropy irq_poll_softirq(struct softirq_action *h)
-+static void __latent_entropy irq_poll_softirq(void)
- {
- 	struct list_head *list = this_cpu_ptr(&blk_cpu_iopoll);
- 	int rearm = 0, budget = irq_poll_budget;
-diff --git a/lib/kobject.c b/lib/kobject.c
-index 83198cb37d8d..4a053b7aef42 100644
---- a/lib/kobject.c
-+++ b/lib/kobject.c
-@@ -1009,9 +1009,9 @@ EXPORT_SYMBOL_GPL(kset_create_and_add);
- 
- 
- static DEFINE_SPINLOCK(kobj_ns_type_lock);
--static const struct kobj_ns_type_operations *kobj_ns_ops_tbl[KOBJ_NS_TYPES];
-+static const struct kobj_ns_type_operations *kobj_ns_ops_tbl[KOBJ_NS_TYPES] __ro_after_init;
- 
--int kobj_ns_type_register(const struct kobj_ns_type_operations *ops)
-+int __init kobj_ns_type_register(const struct kobj_ns_type_operations *ops)
- {
- 	enum kobj_ns_type type = ops->type;
- 	int error;
-diff --git a/lib/nlattr.c b/lib/nlattr.c
-index cace9b307781..39ba1387045d 100644
---- a/lib/nlattr.c
-+++ b/lib/nlattr.c
-@@ -571,6 +571,8 @@ int nla_memcpy(void *dest, const struct nlattr *src, int count)
- {
- 	int minlen = min_t(int, count, nla_len(src));
- 
-+	BUG_ON(minlen < 0);
-+
- 	memcpy(dest, nla_data(src), minlen);
- 	if (count > minlen)
- 		memset(dest + minlen, 0, count - minlen);
-diff --git a/lib/vsprintf.c b/lib/vsprintf.c
-index e78017a3e1bd..ac5a5b5a439b 100644
---- a/lib/vsprintf.c
-+++ b/lib/vsprintf.c
-@@ -771,7 +771,7 @@ static char *ptr_to_id(char *buf, char *end, const void *ptr,
- 	return pointer_string(buf, end, (const void *)hashval, spec);
- }
- 
--int kptr_restrict __read_mostly;
-+int kptr_restrict __read_mostly = 2;
- 
- static noinline_for_stack
- char *restricted_pointer(char *buf, char *end, const void *ptr,
-diff --git a/mm/Kconfig b/mm/Kconfig
-index a5dae9a7eb51..0a3070c5a125 100644
---- a/mm/Kconfig
-+++ b/mm/Kconfig
-@@ -303,7 +303,8 @@ config KSM
- config DEFAULT_MMAP_MIN_ADDR
-         int "Low address space to protect from user allocation"
- 	depends on MMU
--        default 4096
-+	default 32768 if ARM || (ARM64 && COMPAT)
-+	default 65536
-         help
- 	  This is the portion of low virtual memory which should be protected
- 	  from userspace allocation.  Keeping a user from writing to low pages
-diff --git a/mm/mmap.c b/mm/mmap.c
-index 4390dbea4aa5..076fd46af68c 100644
---- a/mm/mmap.c
-+++ b/mm/mmap.c
-@@ -230,6 +230,13 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
- 
- 	newbrk = PAGE_ALIGN(brk);
- 	oldbrk = PAGE_ALIGN(mm->brk);
-+	/* properly handle unaligned min_brk as an empty heap */
-+	if (min_brk & ~PAGE_MASK) {
-+		if (brk == min_brk)
-+			newbrk -= PAGE_SIZE;
-+		if (mm->brk == min_brk)
-+			oldbrk -= PAGE_SIZE;
-+	}
- 	if (oldbrk == newbrk) {
- 		mm->brk = brk;
- 		goto success;
-diff --git a/mm/page_alloc.c b/mm/page_alloc.c
-index 45e39131a716..78b4865f8a1c 100644
---- a/mm/page_alloc.c
-+++ b/mm/page_alloc.c
-@@ -68,6 +68,7 @@
- #include <linux/lockdep.h>
- #include <linux/nmi.h>
- #include <linux/psi.h>
-+#include <linux/random.h>
- 
- #include <asm/sections.h>
- #include <asm/tlbflush.h>
-@@ -106,6 +107,15 @@ struct pcpu_drain {
- DEFINE_MUTEX(pcpu_drain_mutex);
- DEFINE_PER_CPU(struct pcpu_drain, pcpu_drain);
- 
-+bool __meminitdata extra_latent_entropy;
-+
-+static int __init setup_extra_latent_entropy(char *str)
-+{
-+	extra_latent_entropy = true;
-+	return 0;
-+}
-+early_param("extra_latent_entropy", setup_extra_latent_entropy);
-+
- #ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY
- volatile unsigned long latent_entropy __latent_entropy;
- EXPORT_SYMBOL(latent_entropy);
-@@ -1427,6 +1437,25 @@ static void __free_pages_ok(struct page *page, unsigned int order)
- 	local_irq_restore(flags);
- }
- 
-+static void __init __gather_extra_latent_entropy(struct page *page,
-+						 unsigned int nr_pages)
-+{
-+	if (extra_latent_entropy && !PageHighMem(page) && page_to_pfn(page) < 0x100000) {
-+		unsigned long hash = 0;
-+		size_t index, end = PAGE_SIZE * nr_pages / sizeof hash;
-+		const unsigned long *data = lowmem_page_address(page);
-+
-+		for (index = 0; index < end; index++)
-+			hash ^= hash + data[index];
-+#ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY
-+		latent_entropy ^= hash;
-+		add_device_randomness((const void *)&latent_entropy, sizeof(latent_entropy));
-+#else
-+		add_device_randomness((const void *)&hash, sizeof(hash));
-+#endif
-+	}
-+}
-+
- void __free_pages_core(struct page *page, unsigned int order)
- {
- 	unsigned int nr_pages = 1 << order;
-@@ -1441,7 +1470,6 @@ void __free_pages_core(struct page *page, unsigned int order)
- 	}
- 	__ClearPageReserved(p);
- 	set_page_count(p, 0);
--
- 	atomic_long_add(nr_pages, &page_zone(page)->managed_pages);
- 	set_page_refcounted(page);
- 	__free_pages(page, order);
-@@ -1492,6 +1520,7 @@ void __init memblock_free_pages(struct page *page, unsigned long pfn,
- {
- 	if (early_page_uninitialised(pfn))
- 		return;
-+	__gather_extra_latent_entropy(page, 1 << order);
- 	__free_pages_core(page, order);
- }
- 
-@@ -1582,6 +1611,7 @@ static void __init deferred_free_range(unsigned long pfn,
- 	if (nr_pages == pageblock_nr_pages &&
- 	    (pfn & (pageblock_nr_pages - 1)) == 0) {
- 		set_pageblock_migratetype(page, MIGRATE_MOVABLE);
-+		__gather_extra_latent_entropy(page, 1 << pageblock_order);
- 		__free_pages_core(page, pageblock_order);
- 		return;
- 	}
-@@ -1589,6 +1619,7 @@ static void __init deferred_free_range(unsigned long pfn,
- 	for (i = 0; i < nr_pages; i++, page++, pfn++) {
- 		if ((pfn & (pageblock_nr_pages - 1)) == 0)
- 			set_pageblock_migratetype(page, MIGRATE_MOVABLE);
-+		__gather_extra_latent_entropy(page, 1);
- 		__free_pages_core(page, 0);
- 	}
- }
-@@ -2156,6 +2187,12 @@ static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags
- {
- 	post_alloc_hook(page, order, gfp_flags);
- 
-+	if (IS_ENABLED(CONFIG_PAGE_SANITIZE_VERIFY) && want_init_on_free()) {
-+		int i;
-+		for (i = 0; i < (1 << order); i++)
-+			verify_zero_highpage(page + i);
-+	}
-+
- 	if (!free_pages_prezeroed() && want_init_on_alloc(gfp_flags))
- 		kernel_init_free_pages(page, 1 << order);
- 
-diff --git a/mm/slab.h b/mm/slab.h
-index b2b01694dc43..b531661095a2 100644
---- a/mm/slab.h
-+++ b/mm/slab.h
-@@ -470,9 +470,13 @@ static inline struct kmem_cache *virt_to_cache(const void *obj)
- 	struct page *page;
- 
- 	page = virt_to_head_page(obj);
-+#ifdef CONFIG_BUG_ON_DATA_CORRUPTION
-+	BUG_ON(!PageSlab(page));
-+#else
- 	if (WARN_ONCE(!PageSlab(page), "%s: Object is not a Slab page!\n",
- 					__func__))
- 		return NULL;
-+#endif
- 	return page->slab_cache;
- }
- 
-@@ -518,9 +522,14 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
- 		return s;
- 
- 	cachep = virt_to_cache(x);
--	WARN_ONCE(cachep && !slab_equal_or_root(cachep, s),
--		  "%s: Wrong slab cache. %s but object is from %s\n",
--		  __func__, s->name, cachep->name);
-+	if (cachep && !slab_equal_or_root(cachep, s)) {
-+#ifdef CONFIG_BUG_ON_DATA_CORRUPTION
-+		BUG();
-+#else
-+		WARN_ONCE(1, "%s: Wrong slab cache. %s but object is from %s\n",
-+			     __func__, s->name, cachep->name);
-+#endif
-+	}
- 	return cachep;
- }
- 
-@@ -545,7 +554,7 @@ static inline size_t slab_ksize(const struct kmem_cache *s)
- 	 * back there or track user information then we can
- 	 * only use the space before that information.
- 	 */
--	if (s->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_STORE_USER))
-+	if ((s->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_STORE_USER)) || IS_ENABLED(CONFIG_SLAB_CANARY))
- 		return s->inuse;
- 	/*
- 	 * Else we can use all the padding etc for the allocation
-@@ -674,8 +683,10 @@ static inline void cache_random_seq_destroy(struct kmem_cache *cachep) { }
- static inline bool slab_want_init_on_alloc(gfp_t flags, struct kmem_cache *c)
- {
- 	if (static_branch_unlikely(&init_on_alloc)) {
-+#ifndef CONFIG_SLUB
- 		if (c->ctor)
- 			return false;
-+#endif
- 		if (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON))
- 			return flags & __GFP_ZERO;
- 		return true;
-@@ -685,9 +696,15 @@ static inline bool slab_want_init_on_alloc(gfp_t flags, struct kmem_cache *c)
- 
- static inline bool slab_want_init_on_free(struct kmem_cache *c)
- {
--	if (static_branch_unlikely(&init_on_free))
--		return !(c->ctor ||
--			 (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)));
-+	if (static_branch_unlikely(&init_on_free)) {
-+#ifndef CONFIG_SLUB
-+		if (c->ctor)
-+			return false;
-+#endif
-+		if (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON))
-+			return false;
-+		return true;
-+	}
- 	return false;
- }
- 
-diff --git a/mm/slab_common.c b/mm/slab_common.c
-index ade6c257d4b4..f8f9ebd51296 100644
---- a/mm/slab_common.c
-+++ b/mm/slab_common.c
-@@ -28,10 +28,10 @@
- 
- #include "slab.h"
- 
--enum slab_state slab_state;
-+enum slab_state slab_state __ro_after_init;
- LIST_HEAD(slab_caches);
- DEFINE_MUTEX(slab_mutex);
--struct kmem_cache *kmem_cache;
-+struct kmem_cache *kmem_cache __ro_after_init;
- 
- #ifdef CONFIG_HARDENED_USERCOPY
- bool usercopy_fallback __ro_after_init =
-@@ -59,7 +59,7 @@ static DECLARE_WORK(slab_caches_to_rcu_destroy_work,
- /*
-  * Merge control. If this is set then no merging of slab caches will occur.
-  */
--static bool slab_nomerge = !IS_ENABLED(CONFIG_SLAB_MERGE_DEFAULT);
-+static bool slab_nomerge __ro_after_init = !IS_ENABLED(CONFIG_SLAB_MERGE_DEFAULT);
- 
- static int __init setup_slab_nomerge(char *str)
- {
-diff --git a/mm/slub.c b/mm/slub.c
-index 20d72cb20515..6690bce322a4 100644
---- a/mm/slub.c
-+++ b/mm/slub.c
-@@ -125,6 +125,12 @@ static inline int kmem_cache_debug(struct kmem_cache *s)
- #endif
- }
- 
-+static inline bool has_sanitize_verify(struct kmem_cache *s)
-+{
-+	return IS_ENABLED(CONFIG_SLAB_SANITIZE_VERIFY) &&
-+	       slab_want_init_on_free(s);
-+}
-+
- void *fixup_red_left(struct kmem_cache *s, void *p)
- {
- 	if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE)
-@@ -309,6 +315,35 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
- 	*(void **)freeptr_addr = freelist_ptr(s, fp, freeptr_addr);
- }
- 
-+#ifdef CONFIG_SLAB_CANARY
-+static inline unsigned long *get_canary(struct kmem_cache *s, void *object)
-+{
-+	if (s->offset)
-+		return object + s->offset + sizeof(void *);
-+	return object + s->inuse;
-+}
-+
-+static inline unsigned long get_canary_value(const void *canary, unsigned long value)
-+{
-+	return (value ^ (unsigned long)canary) & CANARY_MASK;
-+}
-+
-+static inline void set_canary(struct kmem_cache *s, void *object, unsigned long value)
-+{
-+	unsigned long *canary = get_canary(s, object);
-+	*canary = get_canary_value(canary, value);
-+}
-+
-+static inline void check_canary(struct kmem_cache *s, void *object, unsigned long value)
-+{
-+	unsigned long *canary = get_canary(s, object);
-+	BUG_ON(*canary != get_canary_value(canary, value));
-+}
-+#else
-+#define set_canary(s, object, value)
-+#define check_canary(s, object, value)
-+#endif
-+
- /* Loop over all objects in a slab */
- #define for_each_object(__p, __s, __addr, __objects) \
- 	for (__p = fixup_red_left(__s, __addr); \
-@@ -476,13 +511,13 @@ static inline void *restore_red_left(struct kmem_cache *s, void *p)
-  * Debug settings:
-  */
- #if defined(CONFIG_SLUB_DEBUG_ON)
--static slab_flags_t slub_debug = DEBUG_DEFAULT_FLAGS;
-+static slab_flags_t slub_debug __ro_after_init = DEBUG_DEFAULT_FLAGS;
- #else
--static slab_flags_t slub_debug;
-+static slab_flags_t slub_debug __ro_after_init;
- #endif
- 
--static char *slub_debug_slabs;
--static int disable_higher_order_debug;
-+static char *slub_debug_slabs __ro_after_init;
-+static int disable_higher_order_debug __ro_after_init;
- 
- /*
-  * slub is about to manipulate internal object metadata.  This memory lies
-@@ -543,6 +578,9 @@ static struct track *get_track(struct kmem_cache *s, void *object,
- 	else
- 		p = object + s->inuse;
- 
-+	if (IS_ENABLED(CONFIG_SLAB_CANARY))
-+		p = (void *)p + sizeof(void *);
-+
- 	return p + alloc;
- }
- 
-@@ -673,6 +711,9 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
- 	else
- 		off = s->inuse;
- 
-+	if (IS_ENABLED(CONFIG_SLAB_CANARY))
-+		off += sizeof(void *);
-+
- 	if (s->flags & SLAB_STORE_USER)
- 		off += 2 * sizeof(struct track);
- 
-@@ -802,6 +843,9 @@ static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
- 		/* Freepointer is placed after the object. */
- 		off += sizeof(void *);
- 
-+	if (IS_ENABLED(CONFIG_SLAB_CANARY))
-+		off += sizeof(void *);
-+
- 	if (s->flags & SLAB_STORE_USER)
- 		/* We also have user information there */
- 		off += 2 * sizeof(struct track);
-@@ -1441,6 +1485,8 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s,
- 		object = next;
- 		next = get_freepointer(s, object);
- 
-+		check_canary(s, object, s->random_active);
-+
- 		if (slab_want_init_on_free(s)) {
- 			/*
- 			 * Clear the object and the metadata, but don't touch
-@@ -1451,8 +1497,12 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s,
- 							   : 0;
- 			memset((char *)object + s->inuse, 0,
- 			       s->size - s->inuse - rsize);
--
-+			if (!IS_ENABLED(CONFIG_SLAB_SANITIZE_VERIFY) && s->ctor)
-+				s->ctor(object);
- 		}
-+
-+		set_canary(s, object, s->random_inactive);
-+
- 		/* If object's reuse doesn't have to be delayed */
- 		if (!slab_free_hook(s, object)) {
- 			/* Move object to the new freelist */
-@@ -1460,6 +1510,17 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s,
- 			*head = object;
- 			if (!*tail)
- 				*tail = object;
-+		} else if (slab_want_init_on_free(s) && s->ctor) {
-+			/* Objects that are put into quarantine by KASAN will
-+			 * still undergo free_consistency_checks() and thus
-+			 * need to show a valid freepointer to check_object().
-+			 *
-+			 * Note that doing this for all caches (not just ctor
-+			 * ones, which have s->offset != NULL)) causes a GPF,
-+			 * due to KASAN poisoning and the way set_freepointer()
-+			 * eventually dereferences the freepointer.
-+			 */
-+			set_freepointer(s, object, NULL);
- 		}
- 	} while (object != old_tail);
- 
-@@ -1473,8 +1534,9 @@ static void *setup_object(struct kmem_cache *s, struct page *page,
- 				void *object)
- {
- 	setup_object_debug(s, page, object);
-+	set_canary(s, object, s->random_inactive);
- 	object = kasan_init_slab_obj(s, object);
--	if (unlikely(s->ctor)) {
-+	if (unlikely(s->ctor) && !has_sanitize_verify(s)) {
- 		kasan_unpoison_object_data(s, object);
- 		s->ctor(object);
- 		kasan_poison_object_data(s, object);
-@@ -2752,8 +2814,28 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
- 
- 	maybe_wipe_obj_freeptr(s, object);
- 
--	if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object)
-+	if (has_sanitize_verify(s) && object) {
-+		/* KASAN hasn't unpoisoned the object yet (this is done in the
-+		 * post-alloc hook), so let's do it temporarily.
-+		 */
-+		kasan_unpoison_object_data(s, object);
-+		BUG_ON(memchr_inv(object, 0, s->object_size));
-+		if (s->ctor)
-+			s->ctor(object);
-+		kasan_poison_object_data(s, object);
-+	} else if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object) {
- 		memset(object, 0, s->object_size);
-+		if (s->ctor) {
-+			kasan_unpoison_object_data(s, object);
-+			s->ctor(object);
-+			kasan_poison_object_data(s, object);
-+		}
-+	}
-+
-+	if (object) {
-+		check_canary(s, object, s->random_inactive);
-+		set_canary(s, object, s->random_active);
-+	}
- 
- 	slab_post_alloc_hook(s, gfpflags, 1, &object);
- 
-@@ -3136,7 +3218,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
- 			  void **p)
- {
- 	struct kmem_cache_cpu *c;
--	int i;
-+	int i, k;
- 
- 	/* memcg and kmem_cache debug support */
- 	s = slab_pre_alloc_hook(s, flags);
-@@ -3176,11 +3258,35 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
- 	local_irq_enable();
- 
- 	/* Clear memory outside IRQ disabled fastpath loop */
--	if (unlikely(slab_want_init_on_alloc(flags, s))) {
-+	if (has_sanitize_verify(s)) {
- 		int j;
- 
--		for (j = 0; j < i; j++)
-+		for (j = 0; j < i; j++) {
-+			/* KASAN hasn't unpoisoned the object yet (this is done
-+			 * in the post-alloc hook), so let's do it temporarily.
-+			 */
-+			kasan_unpoison_object_data(s, p[j]);
-+			BUG_ON(memchr_inv(p[j], 0, s->object_size));
-+			if (s->ctor)
-+				s->ctor(p[j]);
-+			kasan_poison_object_data(s, p[j]);
-+		}
-+	} else if (unlikely(slab_want_init_on_alloc(flags, s))) {
-+		int j;
-+
-+		for (j = 0; j < i; j++) {
- 			memset(p[j], 0, s->object_size);
-+			if (s->ctor) {
-+				kasan_unpoison_object_data(s, p[j]);
-+				s->ctor(p[j]);
-+				kasan_poison_object_data(s, p[j]);
-+			}
-+		}
-+	}
-+
-+	for (k = 0; k < i; k++) {
-+		check_canary(s, p[k], s->random_inactive);
-+		set_canary(s, p[k], s->random_active);
- 	}
- 
- 	/* memcg and kmem_cache debug support */
-@@ -3214,9 +3320,9 @@ EXPORT_SYMBOL(kmem_cache_alloc_bulk);
-  * and increases the number of allocations possible without having to
-  * take the list_lock.
-  */
--static unsigned int slub_min_order;
--static unsigned int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
--static unsigned int slub_min_objects;
-+static unsigned int slub_min_order __ro_after_init;
-+static unsigned int slub_max_order __ro_after_init = PAGE_ALLOC_COSTLY_ORDER;
-+static unsigned int slub_min_objects __ro_after_init;
- 
- /*
-  * Calculate the order of allocation given an slab object size.
-@@ -3384,6 +3490,7 @@ static void early_kmem_cache_node_alloc(int node)
- 	init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
- 	init_tracking(kmem_cache_node, n);
- #endif
-+	set_canary(kmem_cache_node, n, kmem_cache_node->random_active);
- 	n = kasan_kmalloc(kmem_cache_node, n, sizeof(struct kmem_cache_node),
- 		      GFP_KERNEL);
- 	page->freelist = get_freepointer(kmem_cache_node, n);
-@@ -3544,6 +3651,9 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
- 		size += sizeof(void *);
- 	}
- 
-+	if (IS_ENABLED(CONFIG_SLAB_CANARY))
-+		size += sizeof(void *);
-+
- #ifdef CONFIG_SLUB_DEBUG
- 	if (flags & SLAB_STORE_USER)
- 		/*
-@@ -3616,6 +3726,10 @@ static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
- #ifdef CONFIG_SLAB_FREELIST_HARDENED
- 	s->random = get_random_long();
- #endif
-+#ifdef CONFIG_SLAB_CANARY
-+	s->random_active = get_random_long();
-+	s->random_inactive = get_random_long();
-+#endif
- 
- 	if (!calculate_sizes(s, -1))
- 		goto error;
-@@ -3891,6 +4005,8 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
- 		offset -= s->red_left_pad;
- 	}
- 
-+	check_canary(s, (void *)ptr - offset, s->random_active);
-+
- 	/* Allow address range falling entirely within usercopy region. */
- 	if (offset >= s->useroffset &&
- 	    offset - s->useroffset <= s->usersize &&
-@@ -3924,7 +4040,11 @@ size_t __ksize(const void *object)
- 	page = virt_to_head_page(object);
- 
- 	if (unlikely(!PageSlab(page))) {
-+#ifdef CONFIG_BUG_ON_DATA_CORRUPTION
-+		BUG_ON(!PageCompound(page));
-+#else
- 		WARN_ON(!PageCompound(page));
-+#endif
- 		return page_size(page);
- 	}
- 
-@@ -4769,7 +4889,7 @@ enum slab_stat_type {
- #define SO_TOTAL	(1 << SL_TOTAL)
- 
- #ifdef CONFIG_MEMCG
--static bool memcg_sysfs_enabled = IS_ENABLED(CONFIG_SLUB_MEMCG_SYSFS_ON);
-+static bool memcg_sysfs_enabled __ro_after_init = IS_ENABLED(CONFIG_SLUB_MEMCG_SYSFS_ON);
- 
- static int __init setup_slub_memcg_sysfs(char *str)
- {
-diff --git a/mm/swap.c b/mm/swap.c
-index 38c3fa4308e2..0534c2e348c2 100644
---- a/mm/swap.c
-+++ b/mm/swap.c
-@@ -94,6 +94,13 @@ static void __put_compound_page(struct page *page)
- 	if (!PageHuge(page))
- 		__page_cache_release(page);
- 	dtor = get_compound_page_dtor(page);
-+	if (!PageHuge(page))
-+		BUG_ON(dtor != free_compound_page
-+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-+			&& dtor != free_transhuge_page
-+#endif
-+		);
-+
- 	(*dtor)(page);
- }
- 
-diff --git a/mm/util.c b/mm/util.c
-index 3ad6db9a722e..80209685f67c 100644
---- a/mm/util.c
-+++ b/mm/util.c
-@@ -325,9 +325,9 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
- {
- 	/* Is the current task 32bit ? */
- 	if (!IS_ENABLED(CONFIG_64BIT) || is_compat_task())
--		return randomize_page(mm->brk, SZ_32M);
-+		return mm->brk + get_random_long() % SZ_32M + PAGE_SIZE;
- 
--	return randomize_page(mm->brk, SZ_1G);
-+	return mm->brk + get_random_long() % SZ_1G + PAGE_SIZE;
- }
- 
- unsigned long arch_mmap_rnd(void)
-diff --git a/net/core/dev.c b/net/core/dev.c
-index 3098c90d60e2..08de516adfd5 100644
---- a/net/core/dev.c
-+++ b/net/core/dev.c
-@@ -4492,7 +4492,7 @@ int netif_rx_ni(struct sk_buff *skb)
- }
- EXPORT_SYMBOL(netif_rx_ni);
- 
--static __latent_entropy void net_tx_action(struct softirq_action *h)
-+static __latent_entropy void net_tx_action(void)
- {
- 	struct softnet_data *sd = this_cpu_ptr(&softnet_data);
- 
-@@ -6353,7 +6353,7 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
- 	return work;
- }
- 
--static __latent_entropy void net_rx_action(struct softirq_action *h)
-+static __latent_entropy void net_rx_action(void)
- {
- 	struct softnet_data *sd = this_cpu_ptr(&softnet_data);
- 	unsigned long time_limit = jiffies +
-diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
-index 03381f3e12ba..8ea409f37436 100644
---- a/net/ipv4/Kconfig
-+++ b/net/ipv4/Kconfig
-@@ -267,6 +267,7 @@ config IP_PIMSM_V2
- 
- config SYN_COOKIES
- 	bool "IP: TCP syncookie support"
-+	default y
- 	---help---
- 	  Normal TCP/IP networking is open to an attack known as "SYN
- 	  flooding". This denial-of-service attack prevents legitimate remote
-diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost
-index 952fff485546..59ffccdb1be4 100644
---- a/scripts/Makefile.modpost
-+++ b/scripts/Makefile.modpost
-@@ -54,6 +54,7 @@ MODPOST = scripts/mod/modpost						\
- 	$(if $(KBUILD_EXTMOD),$(addprefix -e ,$(KBUILD_EXTRA_SYMBOLS)))	\
- 	$(if $(KBUILD_EXTMOD),-o $(modulesymfile))			\
- 	$(if $(CONFIG_SECTION_MISMATCH_WARN_ONLY),,-E)			\
-+	$(if $(CONFIG_DEBUG_WRITABLE_FUNCTION_POINTERS_VERBOSE),-f)	\
- 	$(if $(KBUILD_MODPOST_WARN),-w)					\
- 	$(if $(filter nsdeps,$(MAKECMDGOALS)),-d)
- 
-diff --git a/scripts/gcc-plugins/Kconfig b/scripts/gcc-plugins/Kconfig
-index e3569543bdac..55cc439b3bc6 100644
---- a/scripts/gcc-plugins/Kconfig
-+++ b/scripts/gcc-plugins/Kconfig
-@@ -61,6 +61,11 @@ config GCC_PLUGIN_LATENT_ENTROPY
- 	  is some slowdown of the boot process (about 0.5%) and fork and
- 	  irq processing.
- 
-+	  When extra_latent_entropy is passed on the kernel command line,
-+	  entropy will be extracted from up to the first 4GB of RAM while the
-+	  runtime memory allocator is being initialized.  This costs even more
-+	  slowdown of the boot process.
-+
- 	  Note that entropy extracted this way is not cryptographically
- 	  secure!
- 
-diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
-index d2a30a7b3f07..ff57a5fe8029 100644
---- a/scripts/mod/modpost.c
-+++ b/scripts/mod/modpost.c
-@@ -36,6 +36,8 @@ static int warn_unresolved = 0;
- /* How a symbol is exported */
- static int sec_mismatch_count = 0;
- static int sec_mismatch_fatal = 0;
-+static int writable_fptr_count = 0;
-+static int writable_fptr_verbose = 0;
- /* ignore missing files */
- static int ignore_missing_files;
- /* write namespace dependencies */
-@@ -1019,6 +1021,7 @@ enum mismatch {
- 	ANY_EXIT_TO_ANY_INIT,
- 	EXPORT_TO_INIT_EXIT,
- 	EXTABLE_TO_NON_TEXT,
-+	DATA_TO_TEXT
- };
- 
- /**
-@@ -1145,6 +1148,12 @@ static const struct sectioncheck sectioncheck[] = {
- 	.good_tosec = {ALL_TEXT_SECTIONS , NULL},
- 	.mismatch = EXTABLE_TO_NON_TEXT,
- 	.handler = extable_mismatch_handler,
-+},
-+/* Do not reference code from writable data */
-+{
-+	.fromsec = { DATA_SECTIONS, NULL },
-+	.bad_tosec = { ALL_TEXT_SECTIONS, NULL },
-+	.mismatch = DATA_TO_TEXT
- }
- };
- 
-@@ -1332,10 +1341,10 @@ static Elf_Sym *find_elf_symbol(struct elf_info *elf, Elf64_Sword addr,
- 			continue;
- 		if (!is_valid_name(elf, sym))
- 			continue;
--		if (sym->st_value == addr)
--			return sym;
- 		/* Find a symbol nearby - addr are maybe negative */
- 		d = sym->st_value - addr;
-+		if (d == 0)
-+			return sym;
- 		if (d < 0)
- 			d = addr - sym->st_value;
- 		if (d < distance) {
-@@ -1470,7 +1479,13 @@ static void report_sec_mismatch(const char *modname,
- 	char *prl_from;
- 	char *prl_to;
- 
--	sec_mismatch_count++;
-+	if (mismatch->mismatch == DATA_TO_TEXT) {
-+		writable_fptr_count++;
-+		if (!writable_fptr_verbose)
-+			return;
-+	} else {
-+		sec_mismatch_count++;
-+	}
- 
- 	get_pretty_name(from_is_func, &from, &from_p);
- 	get_pretty_name(to_is_func, &to, &to_p);
-@@ -1592,6 +1607,12 @@ static void report_sec_mismatch(const char *modname,
- 		fatal("There's a special handler for this mismatch type, "
- 		      "we should never get here.");
- 		break;
-+	case DATA_TO_TEXT:
-+		fprintf(stderr,
-+		"The %s %s:%s references\n"
-+		"the %s %s:%s%s\n",
-+		from, fromsec, fromsym, to, tosec, tosym, to_p);
-+		break;
- 	}
- 	fprintf(stderr, "\n");
- }
-@@ -2569,7 +2590,7 @@ int main(int argc, char **argv)
- 	struct ext_sym_list *extsym_iter;
- 	struct ext_sym_list *extsym_start = NULL;
- 
--	while ((opt = getopt(argc, argv, "i:I:e:mnsT:o:awEd")) != -1) {
-+	while ((opt = getopt(argc, argv, "i:I:e:fmnsT:o:awEd")) != -1) {
- 		switch (opt) {
- 		case 'i':
- 			kernel_read = optarg;
-@@ -2586,6 +2607,9 @@ int main(int argc, char **argv)
- 			extsym_iter->file = optarg;
- 			extsym_start = extsym_iter;
- 			break;
-+		case 'f':
-+			writable_fptr_verbose = 1;
-+			break;
- 		case 'm':
- 			modversions = 1;
- 			break;
-@@ -2692,6 +2716,11 @@ int main(int argc, char **argv)
- 	}
- 
- 	free(buf.p);
-+	if (writable_fptr_count && !writable_fptr_verbose)
-+		warn("modpost: Found %d writable function pointer%s.\n"
-+		     "To see full details build your kernel with:\n"
-+		     "'make CONFIG_DEBUG_WRITABLE_FUNCTION_POINTERS_VERBOSE=y'\n",
-+		     writable_fptr_count, (writable_fptr_count == 1 ? "" : "s"));
- 
- 	return err;
- }
-diff --git a/security/Kconfig b/security/Kconfig
-index 2a1a2d396228..3b7a71410f88 100644
---- a/security/Kconfig
-+++ b/security/Kconfig
-@@ -9,7 +9,7 @@ source "security/keys/Kconfig"
- 
- config SECURITY_DMESG_RESTRICT
- 	bool "Restrict unprivileged access to the kernel syslog"
--	default n
-+	default y
- 	help
- 	  This enforces restrictions on unprivileged users reading the kernel
- 	  syslog via dmesg(8).
-@@ -19,10 +19,34 @@ config SECURITY_DMESG_RESTRICT
- 
- 	  If you are unsure how to answer this question, answer N.
- 
-+config SECURITY_PERF_EVENTS_RESTRICT
-+	bool "Restrict unprivileged use of performance events"
-+	depends on PERF_EVENTS
-+	default y
-+	help
-+	  If you say Y here, the kernel.perf_event_paranoid sysctl
-+	  will be set to 3 by default, and no unprivileged use of the
-+	  perf_event_open syscall will be permitted unless it is
-+	  changed.
-+
-+config SECURITY_TIOCSTI_RESTRICT
-+	bool "Restrict unprivileged use of tiocsti command injection"
-+	default y
-+	help
-+	  This enforces restrictions on unprivileged users injecting commands
-+	  into other processes which share a tty session using the TIOCSTI
-+	  ioctl. This option makes TIOCSTI use require CAP_SYS_ADMIN.
-+
-+	  If this option is not selected, no restrictions will be enforced
-+	  unless the tiocsti_restrict sysctl is explicitly set to (1).
-+
-+	  If you are unsure how to answer this question, answer N.
-+
- config SECURITY
- 	bool "Enable different security models"
- 	depends on SYSFS
- 	depends on MULTIUSER
-+	default y
- 	help
- 	  This allows you to choose different security modules to be
- 	  configured into your kernel.
-@@ -48,6 +72,7 @@ config SECURITYFS
- config SECURITY_NETWORK
- 	bool "Socket and Networking Security Hooks"
- 	depends on SECURITY
-+	default y
- 	help
- 	  This enables the socket and networking security hooks.
- 	  If enabled, a security module can use these hooks to
-@@ -154,6 +179,7 @@ config HARDENED_USERCOPY
- 	bool "Harden memory copies between kernel and userspace"
- 	depends on HAVE_HARDENED_USERCOPY_ALLOCATOR
- 	imply STRICT_DEVMEM
-+	default y
- 	help
- 	  This option checks for obviously wrong memory regions when
- 	  copying memory to/from the kernel (via copy_to_user() and
-@@ -166,7 +192,6 @@ config HARDENED_USERCOPY
- config HARDENED_USERCOPY_FALLBACK
- 	bool "Allow usercopy whitelist violations to fallback to object size"
- 	depends on HARDENED_USERCOPY
--	default y
- 	help
- 	  This is a temporary option that allows missing usercopy whitelists
- 	  to be discovered via a WARN() to the kernel log, instead of
-@@ -191,10 +216,21 @@ config HARDENED_USERCOPY_PAGESPAN
- config FORTIFY_SOURCE
- 	bool "Harden common str/mem functions against buffer overflows"
- 	depends on ARCH_HAS_FORTIFY_SOURCE
-+	default y
- 	help
- 	  Detect overflows of buffers in common string and memory functions
- 	  where the compiler can determine and validate the buffer sizes.
- 
-+config FORTIFY_SOURCE_STRICT_STRING
-+	bool "Harden common functions against buffer overflows"
-+	depends on FORTIFY_SOURCE
-+	depends on EXPERT
-+	help
-+	  Perform stricter overflow checks catching overflows within objects
-+	  for common C string functions rather than only between objects.
-+
-+	  This is not yet intended for production use, only bug finding.
-+
- config STATIC_USERMODEHELPER
- 	bool "Force all usermode helper calls through a single binary"
- 	help
-diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening
-index af4c979b38ee..473e40bb8537 100644
---- a/security/Kconfig.hardening
-+++ b/security/Kconfig.hardening
-@@ -169,6 +169,7 @@ config STACKLEAK_RUNTIME_DISABLE
- 
- config INIT_ON_ALLOC_DEFAULT_ON
- 	bool "Enable heap memory zeroing on allocation by default"
-+	default yes
- 	help
- 	  This has the effect of setting "init_on_alloc=1" on the kernel
- 	  command line. This can be disabled with "init_on_alloc=0".
-@@ -181,6 +182,7 @@ config INIT_ON_ALLOC_DEFAULT_ON
- 
- config INIT_ON_FREE_DEFAULT_ON
- 	bool "Enable heap memory zeroing on free by default"
-+	default yes
- 	help
- 	  This has the effect of setting "init_on_free=1" on the kernel
- 	  command line. This can be disabled with "init_on_free=0".
-@@ -196,6 +198,20 @@ config INIT_ON_FREE_DEFAULT_ON
- 	  touching "cold" memory areas. Most cases see 3-5% impact. Some
- 	  synthetic workloads have measured as high as 8%.
- 
-+config PAGE_SANITIZE_VERIFY
-+	bool "Verify sanitized pages"
-+	default y
-+	help
-+	  When init_on_free is enabled, verify that newly allocated pages
-+	  are zeroed to detect write-after-free bugs.
-+
-+config SLAB_SANITIZE_VERIFY
-+	default y
-+	bool "Verify sanitized SLAB allocations"
-+	help
-+	  When init_on_free is enabled, verify that newly allocated slab
-+	  objects are zeroed to detect write-after-free bugs.
-+
- endmenu
- 
- endmenu
-diff --git a/security/selinux/Kconfig b/security/selinux/Kconfig
-index 5711689deb6a..fab0cb896907 100644
---- a/security/selinux/Kconfig
-+++ b/security/selinux/Kconfig
-@@ -3,7 +3,7 @@ config SECURITY_SELINUX
- 	bool "NSA SELinux Support"
- 	depends on SECURITY_NETWORK && AUDIT && NET && INET
- 	select NETWORK_SECMARK
--	default n
-+	default y
- 	help
- 	  This selects NSA Security-Enhanced Linux (SELinux).
- 	  You will also need a policy configuration and a labeled filesystem.
-@@ -65,23 +65,3 @@ config SECURITY_SELINUX_AVC_STATS
- 	  This option collects access vector cache statistics to
- 	  /selinux/avc/cache_stats, which may be monitored via
- 	  tools such as avcstat.
--
--config SECURITY_SELINUX_CHECKREQPROT_VALUE
--	int "NSA SELinux checkreqprot default value"
--	depends on SECURITY_SELINUX
--	range 0 1
--	default 0
--	help
--	  This option sets the default value for the 'checkreqprot' flag
--	  that determines whether SELinux checks the protection requested
--	  by the application or the protection that will be applied by the
--	  kernel (including any implied execute for read-implies-exec) for
--	  mmap and mprotect calls.  If this option is set to 0 (zero),
--	  SELinux will default to checking the protection that will be applied
--	  by the kernel.  If this option is set to 1 (one), SELinux will
--	  default to checking the protection requested by the application.
--	  The checkreqprot flag may be changed from the default via the
--	  'checkreqprot=' boot parameter.  It may also be changed at runtime
--	  via /selinux/checkreqprot if authorized by policy.
--
--	  If you are unsure how to answer this question, answer 0.
-diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
-index 9625b99e677f..daa40da7a8f9 100644
---- a/security/selinux/hooks.c
-+++ b/security/selinux/hooks.c
-@@ -135,18 +135,7 @@ static int __init selinux_enabled_setup(char *str)
- __setup("selinux=", selinux_enabled_setup);
- #endif
- 
--static unsigned int selinux_checkreqprot_boot =
--	CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE;
--
--static int __init checkreqprot_setup(char *str)
--{
--	unsigned long checkreqprot;
--
--	if (!kstrtoul(str, 0, &checkreqprot))
--		selinux_checkreqprot_boot = checkreqprot ? 1 : 0;
--	return 1;
--}
--__setup("checkreqprot=", checkreqprot_setup);
-+static const unsigned int selinux_checkreqprot_boot;
- 
- /**
-  * selinux_secmark_enabled - Check to see if SECMARK is currently enabled
-diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
-index e6c7643c3fc0..0e8217f72c5a 100644
---- a/security/selinux/selinuxfs.c
-+++ b/security/selinux/selinuxfs.c
-@@ -639,7 +639,6 @@ static ssize_t sel_read_checkreqprot(struct file *filp, char __user *buf,
- static ssize_t sel_write_checkreqprot(struct file *file, const char __user *buf,
- 				      size_t count, loff_t *ppos)
- {
--	struct selinux_fs_info *fsi = file_inode(file)->i_sb->s_fs_info;
- 	char *page;
- 	ssize_t length;
- 	unsigned int new_value;
-@@ -663,10 +662,9 @@ static ssize_t sel_write_checkreqprot(struct file *file, const char __user *buf,
- 		return PTR_ERR(page);
- 
- 	length = -EINVAL;
--	if (sscanf(page, "%u", &new_value) != 1)
-+	if (sscanf(page, "%u", &new_value) != 1 || new_value)
- 		goto out;
- 
--	fsi->state->checkreqprot = new_value ? 1 : 0;
- 	length = count;
- out:
- 	kfree(page);
-diff --git a/security/yama/Kconfig b/security/yama/Kconfig
-index a810304123ca..b809050b25d2 100644
---- a/security/yama/Kconfig
-+++ b/security/yama/Kconfig
-@@ -2,7 +2,7 @@
- config SECURITY_YAMA
- 	bool "Yama support"
- 	depends on SECURITY
--	default n
-+	default y
- 	help
- 	  This selects Yama, which extends DAC support with additional
- 	  system-wide security settings beyond regular Linux discretionary
diff --git a/linux57-tkg/PKGBUILD b/linux57-tkg/PKGBUILD
deleted file mode 100644
index a4b9696..0000000
--- a/linux57-tkg/PKGBUILD
+++ /dev/null
@@ -1,282 +0,0 @@
-# Based on the file created for Arch Linux by:
-# Tobias Powalowski <tpowa@archlinux.org>
-# Thomas Baechler <thomas@archlinux.org>
-
-# Contributor: Tk-Glitch <ti3nou at gmail dot com>
-
-plain '       .---.`               `.---.'
-plain '    `/syhhhyso-           -osyhhhys/`'
-plain '   .syNMdhNNhss/``.---.``/sshNNhdMNys.'
-plain '   +sdMh.`+MNsssssssssssssssNM+`.hMds+'
-plain '   :syNNdhNNhssssssssssssssshNNhdNNys:'
-plain '    /ssyhhhysssssssssssssssssyhhhyss/'
-plain '    .ossssssssssssssssssssssssssssso.'
-plain '   :sssssssssssssssssssssssssssssssss:'
-plain '  /sssssssssssssssssssssssssssssssssss/'
-plain ' :sssssssssssssoosssssssoosssssssssssss:'
-plain ' osssssssssssssoosssssssoossssssssssssso'
-plain ' osssssssssssyyyyhhhhhhhyyyyssssssssssso'
-plain ' /yyyyyyhhdmmmmNNNNNNNNNNNmmmmdhhyyyyyy/'
-plain '  smmmNNNNNNNNNNNNNNNNNNNNNNNNNNNNNmmms'
-plain '   /dNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNd/'
-plain '    `:sdNNNNNNNNNNNNNNNNNNNNNNNNNds:`'
-plain '       `-+shdNNNNNNNNNNNNNNNdhs+-`'
-plain '             `.-:///////:-.`'
-
-_where="$PWD" # track basedir as different Arch based distros are moving srcdir around
-
-source "$_where"/customization.cfg # load default configuration from file
-source "$_where"/linux*-tkg-config/prepare
-
-_tkg_initscript
-
-_distro="Arch"
-
-if [ -n "$_custom_pkgbase" ]; then
-  pkgbase="${_custom_pkgbase}"
-else
-  pkgbase=linux"${_basever}"-tkg-"${_cpusched}"
-fi
-pkgname=("${pkgbase}" "${pkgbase}-headers")
-pkgver="${_basekernel}"."${_sub}"
-pkgrel=33
-pkgdesc='Linux-tkg'
-arch=('x86_64') # no i686 in here
-url="http://www.kernel.org/"
-license=('GPL2')
-makedepends=('xmlto' 'docbook-xsl' 'kmod' 'inetutils' 'bc' 'libelf' 'pahole' 'patchutils' 'flex' 'python-sphinx' 'python-sphinx_rtd_theme' 'graphviz' 'imagemagick' 'git')
-optdepends=('schedtool')
-options=('!strip' 'docs')
-source=("https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-${_basekernel}.tar.xz"
-        "https://cdn.kernel.org/pub/linux/kernel/v5.x/patch-${pkgver}.xz"
-        "https://raw.githubusercontent.com/graysky2/kernel_gcc_patch/master/enable_additional_cpu_optimizations_for_gcc_v10.1%2B_kernel_v5.7%2B.patch"
-        'config.x86_64' # stock Arch config
-        'config_hardened.x86_64' # hardened Arch config
-        90-cleanup.hook
-        cleanup
-        # ARCH Patches
-        0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch
-        # TkG
-        0002-clear-patches.patch
-        0003-glitched-base.patch
-        0003-glitched-cfs.patch
-        0004-glitched-ondemand-muqss.patch
-        0004-glitched-muqss.patch
-        0004-5.7-ck1.patch
-        0005-glitched-ondemand-pds.patch
-        0005-glitched-pds.patch
-        0005-v5.7_undead-pds099o.patch
-        0006-add-acs-overrides_iommu.patch
-        0007-v5.7-fsync.patch
-        0008-5.7-bcachefs.patch
-        0009-glitched-ondemand-bmq.patch
-        0009-glitched-bmq.patch
-        0009-prjc_v5.7-r3.patch
-        0011-ZFS-fix.patch
-        0012-linux-hardened.patch
-        0012-misc-additions.patch
-)
-sha256sums=('de8163bb62f822d84f7a3983574ec460060bf013a78ff79cd7c979ff1ec1d7e0'
-            '66a0173a13cd58015f5bf1b14f67bfa15dc1db5d8e7225fcd95ac2e9a5341653'
-            '1f56a2466bd9b4477925682d8f944fabb38727140e246733214fe50aa326fc47'
-            '6313ccad7f8e4d8ce09dd5bdb51b8dfa124d0034d7097ba47008380a14a84f09'
-            '15ce09447b7e9b28425c1df5961c955378f2829e4115037337eef347b1db3d9d'
-            '1e15fc2ef3fa770217ecc63a220e5df2ddbcf3295eb4a021171e7edd4c6cc898'
-            '66a03c246037451a77b4d448565b1d7e9368270c7d02872fbd0b5d024ed0a997'
-            '31dc68e84aecfb7d069efb1305049122c65694676be8b955634abcf0675922a2'
-            'd02bf5ca08fd610394b9d3a0c3b176d74af206f897dee826e5cbaec97bb4a4aa'
-            'bbf332201423888257c9687bee06916a5dbbac2194f9df5b4126100c40e48d16'
-            '7058e57fd68367b029adc77f2a82928f1433daaf02c8c279cb2d13556c8804d7'
-            'c605f638d74c61861ebdc36ebd4cb8b6475eae2f6273e1ccb2bbb3e10a2ec3fe'
-            'bc69d6e5ee8172b0242c8fa72d13cfe2b8d2b6601468836908a7dfe8b78a3bbb'
-            '8d8aec86e34dbec6cc3a47f2cd55dc9212e95d36b6cd34d6e637c66731e7d838'
-            '62496f9ca788996181ef145f96ad26291282fcc3fb95cdc04080dcf84365be33'
-            '7fd8e776209dac98627453fda754bdf9aff4a09f27cb0b3766d7983612eb3c74'
-            '55be5e4c6254da0a9d34bbfac807a70d8b58b3f7b2ec852026195c4db5e263e2'
-            '19661ec0d39f9663452b34433214c755179894528bf73a42f6ba52ccf572832a'
-            'cd225e86d72eaf6c31ef3d7b20df397f4cc44ddd04389850691292cdf292b204'
-            'd2214504c43f9d297a8ef68dffc198143bfebf85614b71637a71978d7a86bd78'
-            '9fad4a40449e09522899955762c8928ae17f4cdaa16e01239fd12592e9d58177'
-            '965a517a283f265a012545fbb5cc9e516efc9f6166d2aa1baf7293a32a1086b7'
-            'b2a2ae866fc3f1093f67e69ba59738827e336b8f800fb0487599127f7f3ef881'
-            '49262ce4a8089fa70275aad742fc914baa28d9c384f710c9a62f64796d13e104'
-            '6821f92bd2bde3a3938d17b070d70f18a2f33cae81647567b5a4d94c9cd75f3d'
-            'bdc60c83cd5fbf9912f9201d6e4fe3c84fe5f634e6823bd8e78264ad606b3a9e')
-
-export KBUILD_BUILD_HOST=archlinux
-export KBUILD_BUILD_USER=$pkgbase
-export KBUILD_BUILD_TIMESTAMP="$(date -Ru${SOURCE_DATE_EPOCH:+d @$SOURCE_DATE_EPOCH})"
-
-prepare() {
-  rm -rf $pkgdir # Nuke the entire pkg folder so it'll get regenerated clean on next build
-
-  ln -s "${_where}/customization.cfg" "${srcdir}" # workaround
-
-  cd "${srcdir}/linux-${_basekernel}"
-
-  _tkg_srcprep
-}
-
-build() {
-  cd "${srcdir}/linux-${_basekernel}"
-
-  # Use custom compiler paths if defined
-  if [ -n "${CUSTOM_GCC_PATH}" ]; then
-    PATH=${CUSTOM_GCC_PATH}/bin:${CUSTOM_GCC_PATH}/lib:${CUSTOM_GCC_PATH}/include:${PATH}
-  fi
-
-  if [ "$_force_all_threads" = "true" ]; then
-    _force_all_threads="-j$((`nproc`*2))"
-  else
-    _force_all_threads="${MAKEFLAGS}"
-  fi
-
-  # ccache
-  if [ "$_noccache" != "true" ] && pacman -Qq ccache &> /dev/null; then
-    export PATH="/usr/lib/ccache/bin/:$PATH"
-    export CCACHE_SLOPPINESS="file_macro,locale,time_macros"
-    export CCACHE_NOHASHDIR="true"
-    msg2 'ccache was found and will be used'
-  fi
-
-  # document the TkG variables, excluding "_", "_EXT_CONFIG_PATH", and "_where".
-  declare -p | cut -d ' ' -f 3 | grep -P '^_(?!=|EXT_CONFIG_PATH|where)' > "${srcdir}/customization-full.cfg"
-
-  # build!
-  _runtime=$( time ( schedtool -B -n 1 -e ionice -n 1 make ${_force_all_threads} LOCALVERSION= bzImage modules 2>&1 ) 3>&1 1>&2 2>&3 ) || _runtime=$( time ( make ${_force_all_threads} LOCALVERSION= bzImage modules 2>&1 ) 3>&1 1>&2 2>&3 )
-}
-
-hackbase() {
-  pkgdesc="The $pkgdesc kernel and modules"
-  depends=('coreutils' 'kmod' 'initramfs')
-  optdepends=('linux-docs: Kernel hackers manual - HTML documentation that comes with the Linux kernel.'
-              'crda: to set the correct wireless channels of your country.'
-              'linux-firmware: Firmware files for Linux'
-              'modprobed-db: Keeps track of EVERY kernel module that has ever been probed. Useful for make localmodconfig.'
-              'nvidia-tkg: NVIDIA drivers for all installed kernels - non-dkms version.'
-              'nvidia-dkms-tkg: NVIDIA drivers for all installed kernels - dkms version.'
-              'update-grub: Simple wrapper around grub-mkconfig.')
-  provides=("linux=${pkgver}" "${pkgbase}" VIRTUALBOX-GUEST-MODULES WIREGUARD-MODULE)
-  replaces=(virtualbox-guest-modules-arch wireguard-arch)
-
-  cd "${srcdir}/linux-${_basekernel}"
-
-  # get kernel version
-  local _kernver="$(<version)"
-  local modulesdir="$pkgdir/usr/lib/modules/$_kernver"
-
-  msg2 "Installing boot image..."
-  # systemd expects to find the kernel here to allow hibernation
-  # https://github.com/systemd/systemd/commit/edda44605f06a41fb86b7ab8128dcf99161d2344
-  install -Dm644 "$(make -s image_name)" "$modulesdir/vmlinuz"
-
-  # Used by mkinitcpio to name the kernel
-  echo "$pkgbase" | install -Dm644 /dev/stdin "$modulesdir/pkgbase"
-
-  msg2 "Installing modules..."
-  make INSTALL_MOD_PATH="$pkgdir/usr" INSTALL_MOD_STRIP=1 modules_install
-
-  # remove build and source links
-  rm "$modulesdir"/{source,build}
-
-  # install cleanup pacman hook and script
-  sed -e "s|cleanup|${pkgbase}-cleanup|g" "${srcdir}"/90-cleanup.hook |
-    install -Dm644 /dev/stdin "${pkgdir}/usr/share/libalpm/hooks/90-${pkgbase}.hook"
-  install -Dm755 "${srcdir}"/cleanup "${pkgdir}/usr/share/libalpm/scripts/${pkgbase}-cleanup"
-
-  # install customization file, for reference
-  install -Dm644 "${srcdir}"/customization-full.cfg "${pkgdir}/usr/share/doc/${pkgbase}/customization.cfg"
-}
-
-hackheaders() {
-  pkgdesc="Headers and scripts for building modules for the $pkgdesc kernel"
-  provides=("linux-headers=${pkgver}" "${pkgbase}-headers=${pkgver}")
-
-  cd "${srcdir}/linux-${_basekernel}"
-  local builddir="${pkgdir}/usr/lib/modules/$(<version)/build"
-
-  msg2 "Installing build files..."
-  install -Dt "$builddir" -m644 .config Makefile Module.symvers System.map \
-    localversion.* version vmlinux
-  install -Dt "$builddir/kernel" -m644 kernel/Makefile
-  install -Dt "$builddir/arch/x86" -m644 arch/x86/Makefile
-  cp -t "$builddir" -a scripts
-
-  # add objtool for external module building and enabled VALIDATION_STACK option
-  install -Dt "$builddir/tools/objtool" tools/objtool/objtool
-
-  # add xfs and shmem for aufs building
-  mkdir -p "$builddir"/{fs/xfs,mm}
-
-  msg2 "Installing headers..."
-  cp -t "$builddir" -a include
-  cp -t "$builddir/arch/x86" -a arch/x86/include
-  install -Dt "$builddir/arch/x86/kernel" -m644 arch/x86/kernel/asm-offsets.s
-
-  install -Dt "$builddir/drivers/md" -m644 drivers/md/*.h
-  install -Dt "$builddir/net/mac80211" -m644 net/mac80211/*.h
-
-  # http://bugs.archlinux.org/task/13146
-  install -Dt "$builddir/drivers/media/i2c" -m644 drivers/media/i2c/msp3400-driver.h
-
-  # http://bugs.archlinux.org/task/20402
-  install -Dt "$builddir/drivers/media/usb/dvb-usb" -m644 drivers/media/usb/dvb-usb/*.h
-  install -Dt "$builddir/drivers/media/dvb-frontends" -m644 drivers/media/dvb-frontends/*.h
-  install -Dt "$builddir/drivers/media/tuners" -m644 drivers/media/tuners/*.h
-
-  msg2 "Installing KConfig files..."
-  find . -name 'Kconfig*' -exec install -Dm644 {} "$builddir/{}" \;
-
-  msg2 "Removing unneeded architectures..."
-  local arch
-  for arch in "$builddir"/arch/*/; do
-    [[ $arch = */x86/ ]] && continue
-    echo "Removing $(basename "$arch")"
-    rm -r "$arch"
-  done
-
-  msg2 "Removing documentation..."
-  rm -r "$builddir/Documentation"
-
-  msg2 "Removing broken symlinks..."
-  find -L "$builddir" -type l -printf 'Removing %P\n' -delete
-
-  msg2 "Removing loose objects..."
-  find "$builddir" -type f -name '*.o' -printf 'Removing %P\n' -delete
-
-  msg2 "Stripping build tools..."
-  local file
-  while read -rd '' file; do
-    case "$(file -bi "$file")" in
-      application/x-sharedlib\;*)      # Libraries (.so)
-        strip -v $STRIP_SHARED "$file" ;;
-      application/x-archive\;*)        # Libraries (.a)
-        strip -v $STRIP_STATIC "$file" ;;
-      application/x-executable\;*)     # Binaries
-        strip -v $STRIP_BINARIES "$file" ;;
-      application/x-pie-executable\;*) # Relocatable binaries
-        strip -v $STRIP_SHARED "$file" ;;
-    esac
-  done < <(find "$builddir" -type f -perm -u+x ! -name vmlinux -print0)
-
-  msg2 "Adding symlink..."
-  mkdir -p "$pkgdir/usr/src"
-  ln -sr "$builddir" "$pkgdir/usr/src/$pkgbase"
-
-  echo "Stripping vmlinux..."
-  strip -v $STRIP_STATIC "$builddir/vmlinux"
-
-  if [ $_NUKR = "true" ]; then
-    rm -rf "$srcdir" # Nuke the entire src folder so it'll get regenerated clean on next build
-  fi
-}
-
-source /dev/stdin <<EOF
-package_${pkgbase}() {
-hackbase
-}
-
-package_${pkgbase}-headers() {
-hackheaders
-}
-EOF
diff --git a/linux57-tkg/README.md b/linux57-tkg/README.md
deleted file mode 100644
index 7cde739..0000000
--- a/linux57-tkg/README.md
+++ /dev/null
@@ -1,69 +0,0 @@
-**Due to intel_pstate poor performances as of late, I have decided to set it to passive mode to make use of the acpi_cpufreq governors passthrough, keeping full support for turbo frequencies.**
-
-A custom Linux kernel 5.7.y with specific PDS, MuQSS and Project C / BMQ CPU schedulers related patchsets selector (stock CFS is also an option) and added tweaks for a nice interactivity/performance balance, aiming for the best gaming experience.
-
-Various personalization options available and userpatches support (put your own patches in the same dir as the PKGBUILD, with the ".mypatch" extension). The options built with are installed to `/usr/share/doc/$pkgbase/customization.cfg`, where `$pkgbase` is the package name.
-
-MuQSS : http://ck-hack.blogspot.com/
-
-Project C / BMQ : http://cchalpha.blogspot.com/
-
-PDS-mq was originally created by Alfred Chen : http://cchalpha.blogspot.com/
-
-While he dropped it with kernel 5.1 in favor of its BMQ evolution/rework, my pretty bad gaming experiences with BMQ up to this point convinced me to keep PDS afloat for as long as it'll make sense/I'll be able to.
-
-Various personalization options available and userpatches support (put your own patches in the same dir as the PKGBUILD, with the ".mypatch" extension.
-
-Comes with a slightly modified Arch config asking for a few core personalization settings at compilation time.
-If you want to streamline your kernel config for lower footprint and faster compilations : https://wiki.archlinux.org/index.php/Modprobed-db
-You can enable support for it at the beginning of the PKGBUILD file. Make sure to read everything you need to know about it.
-
-## Other stuff included:
-- Graysky's per-CPU-arch native optimizations - https://github.com/graysky2/kernel_gcc_patch
-- memory management and swapping tweaks
-- scheduling tweaks
-- optional "Zenify" patchset using core blk, mm and scheduler tweaks from Zen
-- CFS tweaks
-- using yeah TCP congestion algo by default
-- using cake network queue management system
-- using vm.max_map_count=262144 by default
-- cherry-picked clear linux patches
-- **optional** overrides for missing ACS capabilities
-- **optional** ZFS fpu symbols
-- **optional** Fsync support (proton)
-
-
-## Install procedure
-
-### Arch & derivatives
-```
-git clone https://github.com/Frogging-Family/linux-tkg.git
-cd linux-tkg/linux57-tkg
-# Optional: edit customization.cfg file 
-makepkg -si
-```
-
-### DEB (Debian, Ubuntu and derivatives) and RPM (Fedora, SUSE and derivatives) based distributions
-```
-git clone https://github.com/Frogging-Family/linux-tkg.git
-cd linux-tkg/linux57-tkg
-# Optional: edit customization.cfg file
-./install.sh install
-```
-Uninstalling custom kernels installed through the script has to be done 
-manually, the script can can help out with some useful information:
-```
-cd path/to/linux-tkg/linux57-tkg
-./install.sh uninstall-help
-```
-
-### Other linux distributions
-If your distro is not DEB or RPM based, `install.sh` script can clone the kernel tree, patch and edit a `.config` file from your current distro's 
-that is expected at ``/boot/config-`uname -r`.config`` (otherwise it won't work as-is)
-
-The command to do for that is:
-```
-./install.sh config
-```
-
-
diff --git a/linux57-tkg/customization.cfg b/linux57-tkg/customization.cfg
deleted file mode 100644
index 0aef7b5..0000000
--- a/linux57-tkg/customization.cfg
+++ /dev/null
@@ -1,188 +0,0 @@
-# linux57-TkG config file
-
-# Linux distribution you are using, options are "Arch", "Ubuntu", "Debian", "Fedora" or "Suse". 
-# It is automatically set to "Arch" when using PKGBUILD.
-# If left empty, the script will prompt
-_distro=""
-
-#### MISC OPTIONS #### 
-
-# External config file to use - If the given file exists in path, it will override default config (customization.cfg) - Default is ~/.config/frogminer/linux52-tkg.cfg
-_EXT_CONFIG_PATH=~/.config/frogminer/linux57-tkg.cfg
-
-# [Arch specific] Set to anything else than "true" to limit cleanup operations and keep source and files generated during compilation.
-# Default is "true".
-_NUKR="true"
-
-# Custom compiler root dirs - Leave empty to use system compilers
-# Example: CUSTOM_GCC_PATH="/home/frog/PKGBUILDS/mostlyportable-gcc/gcc-mostlyportable-9.2.0"
-CUSTOM_GCC_PATH=""
-
-# Set to the number corresponding to a predefined profile to use it. Current list of available profiles :
-# 1 - Custom (meaning nothing will be enforced and you get to configure everything)
-# 2 - Ryzen desktop (performance)
-# 3 - Generic Desktop (Performance)
-_OPTIPROFILE=""
-
-# Set to true to bypass makepkg.conf and use all available threads for compilation. False will respect your makepkg.conf options.
-_force_all_threads="true"
-
-# Set to true to prevent ccache from being used and set CONFIG_GCC_PLUGINS=y (which needs to be disabled for ccache to work properly)
-_noccache="false"
-
-# Set to true to use modprobed db to clean config from unneeded modules. Speeds up compilation considerably. Requires root - https://wiki.archlinux.org/index.php/Modprobed-db
-# !!!! Make sure to have a well populated db !!!! - Leave empty to be asked about it at build time
-_modprobeddb="false"
-
-# Set to "1" to call make menuconfig, "2" to call make nconfig, "3" to call make xconfig, before building the kernel. Set to false to disable and skip the prompt.
-_menunconfig=""
-
-# Set to true to generate a kernel config fragment from your changes in menuconfig/nconfig. Set to false to disable and skip the prompt.
-_diffconfig=""
-
-# Set to the file name where the generated config fragment should be written to. Only used if _diffconfig is active.
-_diffconfig_name=""
-
-#### KERNEL OPTIONS ####
-
-# [Arch specific] Name of the default config file to use from the linux???-tkg-config folder, use "distro" to use the config file of the kernel you are currently using. Arch default is "config.x86_64" and Arch hardened is "config_hardened.x86_64".
-# To get a complete hardened setup, you have to use "cfs" as _cpusched
-_configfile="config.x86_64"
-
-# Disable some non-module debugging - See PKGBUILD for the list
-_debugdisable="false"
-
-# LEAVE AN EMPTY VALUE TO BE PROMPTED ABOUT FOLLOWING OPTIONS AT BUILD TIME
-
-# CPU scheduler - Options are "pds", "muqss", "bmq", "cfs" or "cfsturbo" (experimental, for power CPUs https://lkml.org/lkml/2019/7/25/296)
-# "pds" is the recommended option for gaming
-_cpusched=""
-
-# CPU sched_yield_type - Choose what sort of yield sched_yield will perform
-# For PDS and MuQSS: 0: No yield. (Recommended option for gaming on PDS and MuQSS)
-#                    1: Yield only to better priority/deadline tasks. (Default - can be unstable with PDS on some platforms)
-#                    2: Expire timeslice and recalculate deadline. (Usually the slowest option for PDS and MuQSS, not recommended)
-# For BMQ:           0: No yield.
-#                    1: Deboost and requeue task. (Default)
-#                    2: Set rq skip task.
-_sched_yield_type=""
-
-# Round Robin interval is the longest duration two tasks with the same nice level will be delayed for. When CPU time is requested by a task, it receives a time slice equal
-# to the rr_interval in addition to a virtual deadline. When using yield_type 2, a low value can help offset the disadvantages of rescheduling a process that has yielded.
-# MuQSS default: 6ms"
-# PDS default: 4ms"
-# BMQ default: 2ms"
-# Set to "1" for 2ms, "2" for 4ms, "3" for 6ms, "4" for 8ms, or "default" to keep the chosen scheduler defaults.
-_rr_interval=""
-
-# Set to "true" to disable FUNCTION_TRACER/GRAPH_TRACER, lowering overhead but limiting debugging and analyzing of kernel functions - Kernel default is "false"
-_ftracedisable="false"
-
-# Set to "true" to disable NUMA, lowering overhead, but breaking CUDA/NvEnc on Nvidia equipped systems - Kernel default is "false"
-_numadisable="false"
-
-# Set to "true" to enable misc additions - May contain temporary fixes pending upstream or changes that can break on non-Arch - Kernel default is "true"
-_misc_adds="true"
-
-# Set to "1" to use CattaRappa mode (enabling full tickless), "2" for tickless idle only, or "0" for periodic ticks.
-# Full tickless can give higher performances in various cases but, depending on hardware, lower consistency. Just tickless idle can perform better on some platforms (mostly AMD based).
-_tickless=""
-
-# Setting this to to "true" can improve latency on PDS (at the cost of throughput) and improve throughput on other schedulers (at the cost of latency) - Can improve VMs performance - Kernel default is "false"
-_voluntary_preempt=""
-
-# Set to "true" to enable Device Tree and Open Firmware support. If you don't know about it, you don't need it - Default is "false"
-_OFenable="false"
-
-# Set to "true" to use ACS override patch - https://wiki.archlinux.org/index.php/PCI_passthrough_via_OVMF#Bypassing_the_IOMMU_groups_.28ACS_override_patch.29 - Kernel default is "false"
-_acs_override=""
-
-# Set to "true" to add Bcache filesystem support. You'll have to install bcachefs-tools-git from AUR for utilities - https://bcachefs.org/ - If in doubt, set to "false"
-_bcachefs=""
-
-# Set to "true" to add back missing symbol for AES-NI/AVX support on ZFS - https://github.com/NixOS/nixpkgs/blob/master/pkgs/os-specific/linux/kernel/export_kernel_fpu_functions.patch - Kernel default is "false"
-_zfsfix=""
-
-# Set to "true" to enable support for fsync, an experimental replacement for esync found in Valve Proton 4.11+ - https://steamcommunity.com/games/221410/announcements/detail/2957094910196249305
-_fsync=""
-
-# A selection of patches from Zen/Liquorix kernel and additional tweaks for a better gaming experience (ZENIFY) - Default is "true"
-_zenify="true"
-
-# compiler optimization level - 1. Optimize for performance (-O2); 2. Optimize harder (-O3); 3. Optimize for size (-Os) - Kernel default is "1"
-_compileroptlevel="1"
-
-# CPU compiler optimizations - Defaults to generic optimizations if left empty
-# AMD CPUs : "k8" "k8sse3" "k10" "barcelona" "bobcat" "jaguar" "bulldozer" "piledriver" "steamroller" "excavator" "zen" "zen2"
-# Intel CPUs : "mpsc"(P4 & older Netburst based Xeon) "atom" "core2" "nehalem" "westmere" "silvermont" "sandybridge" "ivybridge" "haswell" "broadwell" "skylake" "skylakex" "cannonlake" "icelake" "goldmont" "goldmontplus" "cascadelake" "cooperlake" "tigerlake"
-# Other options :
-# - "generic" (to share the package between machines with different CPUs)
-# - "native" (use compiler autodetection and will prompt for P6_NOPS - Selecting your arch manually in the list above is recommended instead of this option)
-_processor_opt=""
-
-# MuQSS only - Make IRQ threading compulsory (FORCE_IRQ_THREADING) - Default is "false"
-_irq_threading="false"
-
-# MuQSS and PDS only - SMT (Hyperthreading) aware nice priority and policy support (SMT_NICE) - Kernel default is "true" - You can disable this on non-SMT/HT CPUs for lower overhead
-_smt_nice=""
-
-# Trust the CPU manufacturer to initialize Linux's CRNG (RANDOM_TRUST_CPU) - Kernel default is "false"
-_random_trust_cpu="false"
-
-# MuQSS only - CPU scheduler runqueue sharing - No sharing (RQ_NONE), SMT (hyperthread) siblings (RQ_SMT), Multicore siblings (RQ_MC), Symmetric Multi-Processing (RQ_SMP), NUMA (RQ_ALL)
-# Valid values are "none", "smt", "mc", "mc-llc"(for zen), "smp", "all" - Kernel default is "smt"
-_runqueue_sharing=""
-
-# Timer frequency - "100" "500", "750" or "1000" - More options available in kernel config prompt when left empty depending on selected cpusched - Kernel default is "500" - For MuQSS, 100Hz is recommended
-_timer_freq=""
-
-# Default CPU governor - "performance", "ondemand", "schedutil" or leave empty for default (schedutil)
-_default_cpu_gov="ondemand"
-
-# Use an aggressive ondemand governor instead of default ondemand to improve performance on low loads/high core count CPUs while keeping some power efficiency from frequency scaling.
-# It still requires you to either set ondemand as default governor or to select it some way.
-_aggressive_ondemand="true"
-
-# On some platforms, an acpi_cpufreq bug affects performance negatively. Set to "true" to disable it as a workaround, but it will use more power.
-# https://github.com/Tk-Glitch/PKGBUILDS/issues/263
-_disable_acpi_cpufreq=""
-
-# You can pass a default set of kernel command line options here - example: "intel_pstate=passive nowatchdog amdgpu.ppfeaturemask=0xfffd7fff mitigations=off"
-_custom_commandline="intel_pstate=passive"
-
-
-#### SPESHUL OPTION ####
-
-# If you want to bypass the stock naming scheme and enforce something else (example : "linux") - Useful for some bootloaders requiring manual entry editing on each release.
-# !!! It will also change pkgname - If you don't explicitely need this, don't use it !!!
-_custom_pkgbase=""
-
-# [non-Arch specific] Kernel localversion. Putting it to "Mario" will make for example the kernel version be 5.7.0-tkg-Mario (given by uname -r)
-# If left empty, it will use -tkg-"${_cpusched}" where "${_cpusched}" will be replaced by the user chosen scheduler
-_kernel_localversion=""
-
-#### USER PATCHES ####
-
-# community patches - add patches (separated by a space) of your choice by name from the community-patches dir
-# example: _community_patches="clear_nack_in_tend_isr.myrevert ffb_regression_fix.mypatch 0008-drm-amd-powerplay-force-the-trim-of-the-mclk-dpm-levels-if-OD-is-enabled.mypatch"
-_community_patches=""
-
-# You can use your own patches by putting them in the same folder as the PKGBUILD and giving them the .mypatch extension.
-# You can also revert patches by putting them in the same folder as the PKGBUILD and giving them the .myrevert extension.
-
-# Also, userpatches variable below must be set to true for the above to work.
-_user_patches="true"
-
-# Apply all user patches without confirmation - !!! NOT RECOMMENDED !!!
-_user_patches_no_confirm="false"
-
-
-#### CONFIG FRAGMENTS ####
-
-# You can use your own kernel config fragments by putting them in the same folder as the PKGBUILD and giving them the .myfrag extension.
-
-# Also, the config fragments variable below must be set to true for the above to work.
-_config_fragments="true"
-
-# Apply all config fragments without confirmation - !!! NOT RECOMMENDED !!!
-_config_fragments_no_confirm="false"
diff --git a/linux57-tkg/install.sh b/linux57-tkg/install.sh
deleted file mode 100755
index 9b3a410..0000000
--- a/linux57-tkg/install.sh
+++ /dev/null
@@ -1,283 +0,0 @@
-#!/bin/bash
-
-msg2() {
- echo -e " \033[1;34m->\033[1;0m \033[1;1m$1\033[1;0m" >&2
-}
-
-error() {
- echo -e " \033[1;31m==> ERROR: $1\033[1;0m" >&2
-}
-
-warning() {
- echo -e " \033[1;33m==> WARNING: $1\033[1;0m" >&2
-}
-
-plain() {
- echo "$1" >&2
-}
-
-# Stop the script at any ecountered error
-set -e
-
-_where=`pwd`
-srcdir="$_where"
-
-source linux*-tkg-config/prepare
-
-_cpu_opt_patch_link="https://raw.githubusercontent.com/graysky2/kernel_gcc_patch/master/enable_additional_cpu_optimizations_for_gcc_v10.1%2B_kernel_v${_basekernel}%2B.patch"  
-
-source customization.cfg
-
-if [ "$1" != "install" ] && [ "$1" != "config" ] && [ "$1" != "uninstall-help" ]; then
-  msg2 "Argument not recognised, options are:
-        - config : shallow clones the linux ${_basekernel}.x git tree into the folder linux-${_basekernel}, then applies on it the extra patches and prepares the .config file 
-                   by copying the one from the current linux system in /boot/config-`uname -r` and updates it. 
-        - install : [RPM and DEB based distros only], does the config step, proceeds to compile, then prompts to install
-        - uninstall-help : [RPM and DEB based distros only], lists the installed kernels in this system, then gives a hint on how to uninstall them manually."
-  exit 0
-fi
-
-# Load external configuration file if present. Available variable values will overwrite customization.cfg ones.
-if [ -e "$_EXT_CONFIG_PATH" ]; then
-  msg2 "External configuration file $_EXT_CONFIG_PATH will be used and will override customization.cfg values."
-  source "$_EXT_CONFIG_PATH"
-fi
-
-_misc_adds="false" # We currently don't want this enabled on non-Arch
-
-if [ "$1" = "install" ] || [ "$1" = "config" ]; then
-
-  if [ -z $_distro ] && [ "$1" = "install" ]; then
-    while true; do
-      echo "Which linux distribution are you running ?"
-      echo "if it's not on the list, chose the closest one to it: Fedora/Suse for RPM, Ubuntu/Debian for DEB"
-      echo "   1) Debian"
-      echo "   2) Fedora"
-      echo "   3) Suse"
-      echo "   4) Ubuntu"
-      read -p "[1-4]: " _distro_index
-
-      if [ "$_distro_index" = "1" ]; then
-        _distro="Debian"
-        break
-      elif [ "$_distro_index" = "2" ]; then
-        _distro="Fedora"
-        break
-      elif [ "$_distro_index" = "3" ]; then
-        _distro="Suse"
-        break
-      elif [ "$_distro_index" = "4" ]; then
-        _distro="Ubuntu"
-        break
-      else
-        echo "Wrong index."
-      fi
-    done
-  fi
-
-  if [[ $1 = "install" && "$_distro" != "Ubuntu" && "$_distro" != "Debian" &&  "$_distro" != "Fedora" && "$_distro" != "Suse" ]]; then 
-    msg2 "Variable \"_distro\" in \"customization.cfg\" hasn't been set to \"Ubuntu\", \"Debian\",  \"Fedora\" or \"Suse\""
-    msg2 "This script can only install custom kernels for RPM and DEB based distros, though only those keywords are permitted. Exiting..."
-    exit 0
-  fi
-
-  if [ "$_distro" = "Ubuntu" ] || [ "$_distro" = "Debian" ]; then
-    msg2 "Installing dependencies"
-    sudo apt install git build-essential kernel-package fakeroot libncurses5-dev libssl-dev ccache bison flex qtbase5-dev -y
-  elif [ "$_distro" = "Fedora" ]; then
-    msg2 "Installing dependencies"
-    sudo dnf install fedpkg fedora-packager rpmdevtools ncurses-devel pesign grubby qt5-devel libXi-devel gcc-c++ git ccache flex bison elfutils-libelf-devel openssl-devel dwarves rpm-build -y
-  elif [ "$_distro" = "Suse" ]; then
-    msg2 "Installing dependencies"
-    sudo zypper install -y rpmdevtools ncurses-devel pesign libXi-devel gcc-c++ git ccache flex bison elfutils libelf-devel openssl-devel dwarves make patch bc rpm-build libqt5-qtbase-common-devel libqt5-qtbase-devel lz4
-  fi
-
-  # Force prepare script to avoid Arch specific commands if the user is using `config`
-  if [ "$1" = "config" ]; then
-    _distro=""
-  fi
-
-  if [ -d linux-${_basekernel}.orig ]; then
-    rm -rf linux-${_basekernel}.orig
-  fi
-
-  if [ -d linux-${_basekernel} ]; then
-    msg2 "Reseting files in linux-$_basekernel to their original state and getting latest updates"
-    cd "$_where"/linux-${_basekernel}
-    git checkout --force linux-$_basekernel.y
-    git clean -f -d -x
-    git pull
-    msg2 "Done" 
-    cd "$_where"
-  else
-    msg2 "Shallow git cloning linux $_basekernel"
-    git clone --branch linux-$_basekernel.y --single-branch --depth=1 https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git linux-${_basekernel}
-    msg2 "Done"
-  fi
-
-  # Define current kernel subversion
-  if [ -z $_kernel_subver ]; then
-    cd "$_where"/linux-${_basekernel}
-    _kernelverstr=`git describe`
-    _kernel_subver=${_kernelverstr:5}
-    cd "$_where"
-  fi
-
-
-  # Run init script that is also run in PKGBUILD, it will define some env vars that we will use
-  _tkg_initscript
-
-  cd "$_where"
-  msg2 "Downloading Graysky2's CPU optimisations patch"
-  wget "$_cpu_opt_patch_link"
-
-  # Follow Ubuntu install isntructions in https://wiki.ubuntu.com/KernelTeam/GitKernelBuild
-
-  # cd in linux folder, copy Ubuntu's current config file, update with new params
-  cd "$_where"/linux-${_basekernel}
-
-  msg2 "Copying current kernel's config and running make oldconfig..."
-  cp /boot/config-`uname -r` .config
-  if [ "$_distro" = "Debian" ]; then #Help Debian cert problem.
-    sed -i -e 's#CONFIG_SYSTEM_TRUSTED_KEYS="debian/certs/test-signing-certs.pem"#CONFIG_SYSTEM_TRUSTED_KEYS=""#g' .config
-    sed -i -e 's#CONFIG_SYSTEM_TRUSTED_KEYS="debian/certs/debian-uefi-certs.pem"#CONFIG_SYSTEM_TRUSTED_KEYS=""#g' .config
-  fi
-  yes '' | make oldconfig
-  msg2 "Done"
-
-  # apply linux-tkg patching script
-  _tkg_srcprep
-
-  msg2 "Configuration done."
-fi
-
-if [ "$1" = "install" ]; then
-
-  # Use custom compiler paths if defined
-  if [ -n "${CUSTOM_GCC_PATH}" ]; then
-    PATH=${CUSTOM_GCC_PATH}/bin:${CUSTOM_GCC_PATH}/lib:${CUSTOM_GCC_PATH}/include:${PATH}
-  fi
-
-  if [ "$_force_all_threads" = "true" ]; then
-    _thread_num=`nproc`
-  else
-    _thread_num=`expr \`nproc\` / 4`
-    if [ "$_thread_num" = "0" ]; then
-      _thread_num=1
-    fi
-  fi
-
-  # ccache
-  if [ "$_noccache" != "true" ]; then
-
-    if [ "$_distro" = "Ubuntu" ] || [ "$_distro" = "Debian" ]; then
-      export PATH="/usr/lib/ccache/bin/:$PATH"
-    elif [ "$_distro" = "Fedora" ] || [ "$_distro" = "Suse" ]; then
-      export PATH="/usr/lib64/ccache/:$PATH" 
-    fi
-
-    export CCACHE_SLOPPINESS="file_macro,locale,time_macros"
-    export CCACHE_NOHASHDIR="true"
-    msg2 'ccache was found and will be used'
-
-  fi
-
-  if [ -z $_kernel_localversion ]; then
-    _kernel_flavor="tkg-${_cpusched}"
-  else
-    _kernel_flavor="tkg-${_kernel_localversion}"
-  fi
-
-  if [ "$_distro" = "Ubuntu" ]  || [ "$_distro" = "Debian" ]; then
-
-    if make -j ${_thread_num} deb-pkg LOCALVERSION=-${_kernel_flavor}; then
-      msg2 "Building successfully finished!"
-
-      cd "$_where"
-
-      # Create DEBS folder if it doesn't exist
-      mkdir -p DEBS
-      
-      # Move rpm files to RPMS folder inside the linux-tkg folder
-      mv "$_where"/*.deb "$_where"/DEBS/
-
-      read -p "Do you want to install the new Kernel ? y/[n]: " _install
-      if [[ $_install =~ [yY] ]] || [ $_install = "yes" ] || [ $_install = "Yes" ]; then
-        cd "$_where"
-        _kernelname=$_basekernel.$_kernel_subver-$_kernel_flavor
-        _headers_deb="linux-headers-${_kernelname}*.deb"
-        _image_deb="linux-image-${_kernelname}_*.deb"
-        _kernel_devel_deb="linux-libc-dev_${_kernelname}*.deb"
-        
-        cd DEBS
-        sudo dpkg -i $_headers_deb $_image_deb $_kernel_devel_deb
-      fi
-    fi
-
-  elif [[ "$_distro" = "Fedora" ||  "$_distro" = "Suse" ]]; then
-
-    # Replace dashes with underscores, it seems that it's being done by binrpm-pkg
-    # Se we can actually refer properly to the rpm files.
-    _kernel_flavor=${_kernel_flavor//-/_}
-
-    if make -j ${_thread_num} rpm-pkg EXTRAVERSION="_${_kernel_flavor}"; then
-      msg2 "Building successfully finished!"
-
-      cd "$_where"
-
-      # Create RPMS folder if it doesn't exist
-      mkdir -p RPMS
-      
-      # Move rpm files to RPMS folder inside the linux-tkg folder
-      mv ~/rpmbuild/RPMS/x86_64/* "$_where"/RPMS/
-
-      #Clean up the original folder, unneeded and takes a lot of space
-      rm -rf ~/rpmbuild/
-
-      read -p "Do you want to install the new Kernel ? y/[n]: " _install
-      if [ "$_install" = "y" ] || [ "$_install" = "Y" ] || [ "$_install" = "yes" ] || [ "$_install" = "Yes" ]; then
-        
-        _kernelname=$_basekernel.${_kernel_subver}_$_kernel_flavor
-        _headers_rpm="kernel-headers-${_kernelname}*.rpm"
-        _kernel_rpm="kernel-${_kernelname}*.rpm"
-        _kernel_devel_rpm="kernel-devel-${_kernelname}*.rpm"
-        
-        cd RPMS
-        if [ "$_distro" = "Fedora" ]; then
-          sudo dnf install $_headers_rpm $_kernel_rpm $_kernel_devel_rpm
-        elif [ "$_distro" = "Suse" ]; then
-          msg2 "Some files from 'linux-glibc-devel' will be replaced by files from the custom kernel-hearders package"
-          msg2 "To revert back to the original kernel headers do 'sudo zypper install -f linux-glibc-devel'" 
-          sudo zypper install --replacefiles --allow-unsigned-rpm $_headers_rpm $_kernel_rpm $_kernel_devel_rpm
-        fi
-        
-        msg2 "Install successful" 
-      fi
-    fi
-  fi
-fi
-
-if [ "$1" = "uninstall-help" ]; then
-
-  cd "$_where"
-  msg2 "List of installed custom tkg kernels: "
-
-  if [ "$_distro" = "Ubuntu" ]; then
-    dpkg -l "*tkg*" | grep "linux.*tkg"
-    dpkg -l "*linux-libc-dev*" | grep "linux.*tkg"
-    msg2 "To uninstall a version, you should remove the linux-image, linux-headers and linux-libc-dev associated to it (if installed), with: "
-    msg2 "      sudo apt remove linux-image-VERSION linux-headers-VERSION linux-libc-dev-VERSION"
-    msg2 "       where VERSION is displayed in the lists above, uninstall only versions that have \"tkg\" in its name"
-  elif [ "$_distro" = "Fedora" ]; then
-    dnf list --installed kernel*
-    msg2 "To uninstall a version, you should remove the kernel, kernel-headers and kernel-devel associated to it (if installed), with: "
-    msg2 "      sudo dnf remove --noautoremove kernel-VERSION kernel-devel-VERSION kernel-headers-VERSION"
-    msg2 "       where VERSION is displayed in the second column"
-  elif [ "$_distro" = "Suse" ]; then
-    zypper packages --installed-only | grep "kernel.*tkg"
-    msg2 "To uninstall a version, you should remove the kernel, kernel-headers and kernel-devel associated to it (if installed), with: "
-    msg2 "      sudo zypper remove --no-clean-deps kernel-VERSION kernel-devel-VERSION kernel-headers-VERSION"
-    msg2 "       where VERSION is displayed in the second to last column"
-  fi
-
-fi
diff --git a/linux57-tkg/linux57-tkg-config/90-cleanup.hook b/linux57-tkg/linux57-tkg-config/90-cleanup.hook
deleted file mode 100644
index 99f5221..0000000
--- a/linux57-tkg/linux57-tkg-config/90-cleanup.hook
+++ /dev/null
@@ -1,14 +0,0 @@
-[Trigger]
-Type = File
-Operation = Install
-Operation = Upgrade
-Operation = Remove
-Target = usr/lib/modules/*/
-Target = !usr/lib/modules/*/?*
-
-[Action]
-Description = Cleaning up...
-When = PostTransaction
-Exec = /usr/share/libalpm/scripts/cleanup
-NeedsTargets
- 
diff --git a/linux57-tkg/linux57-tkg-config/cleanup b/linux57-tkg/linux57-tkg-config/cleanup
deleted file mode 100755
index c00c08d..0000000
--- a/linux57-tkg/linux57-tkg-config/cleanup
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/bash
-
-for _f in /usr/lib/modules/*tkg*; do
-  if [[ ! -e ${_f}/vmlinuz ]]; then
-    rm -rf "$_f"
-  fi
-done
-
-# vim:set ft=sh sw=2 et:
- 
diff --git a/linux57-tkg/linux57-tkg-config/config.x86_64 b/linux57-tkg/linux57-tkg-config/config.x86_64
deleted file mode 100644
index 1014972..0000000
--- a/linux57-tkg/linux57-tkg-config/config.x86_64
+++ /dev/null
@@ -1,10864 +0,0 @@
-#
-# Automatically generated file; DO NOT EDIT.
-# Linux/x86 5.7.11-arch1 Kernel Configuration
-#
-
-#
-# Compiler: gcc (GCC) 10.1.0
-#
-CONFIG_CC_IS_GCC=y
-CONFIG_GCC_VERSION=100100
-CONFIG_LD_VERSION=234000000
-CONFIG_CLANG_VERSION=0
-CONFIG_CC_CAN_LINK=y
-CONFIG_CC_HAS_ASM_GOTO=y
-CONFIG_CC_HAS_ASM_INLINE=y
-CONFIG_IRQ_WORK=y
-CONFIG_BUILDTIME_TABLE_SORT=y
-CONFIG_THREAD_INFO_IN_TASK=y
-
-#
-# General setup
-#
-CONFIG_INIT_ENV_ARG_LIMIT=32
-# CONFIG_COMPILE_TEST is not set
-CONFIG_LOCALVERSION=""
-CONFIG_LOCALVERSION_AUTO=y
-CONFIG_BUILD_SALT=""
-CONFIG_HAVE_KERNEL_GZIP=y
-CONFIG_HAVE_KERNEL_BZIP2=y
-CONFIG_HAVE_KERNEL_LZMA=y
-CONFIG_HAVE_KERNEL_XZ=y
-CONFIG_HAVE_KERNEL_LZO=y
-CONFIG_HAVE_KERNEL_LZ4=y
-# CONFIG_KERNEL_GZIP is not set
-# CONFIG_KERNEL_BZIP2 is not set
-# CONFIG_KERNEL_LZMA is not set
-CONFIG_KERNEL_XZ=y
-# CONFIG_KERNEL_LZO is not set
-# CONFIG_KERNEL_LZ4 is not set
-CONFIG_DEFAULT_HOSTNAME="archlinux"
-CONFIG_SWAP=y
-CONFIG_SYSVIPC=y
-CONFIG_SYSVIPC_SYSCTL=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_POSIX_MQUEUE_SYSCTL=y
-CONFIG_CROSS_MEMORY_ATTACH=y
-# CONFIG_USELIB is not set
-CONFIG_AUDIT=y
-CONFIG_HAVE_ARCH_AUDITSYSCALL=y
-CONFIG_AUDITSYSCALL=y
-
-#
-# IRQ subsystem
-#
-CONFIG_GENERIC_IRQ_PROBE=y
-CONFIG_GENERIC_IRQ_SHOW=y
-CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK=y
-CONFIG_GENERIC_PENDING_IRQ=y
-CONFIG_GENERIC_IRQ_MIGRATION=y
-CONFIG_HARDIRQS_SW_RESEND=y
-CONFIG_GENERIC_IRQ_CHIP=y
-CONFIG_IRQ_DOMAIN=y
-CONFIG_IRQ_SIM=y
-CONFIG_IRQ_DOMAIN_HIERARCHY=y
-CONFIG_GENERIC_MSI_IRQ=y
-CONFIG_GENERIC_MSI_IRQ_DOMAIN=y
-CONFIG_IRQ_MSI_IOMMU=y
-CONFIG_GENERIC_IRQ_MATRIX_ALLOCATOR=y
-CONFIG_GENERIC_IRQ_RESERVATION_MODE=y
-CONFIG_IRQ_FORCED_THREADING=y
-CONFIG_SPARSE_IRQ=y
-# CONFIG_GENERIC_IRQ_DEBUGFS is not set
-# end of IRQ subsystem
-
-CONFIG_CLOCKSOURCE_WATCHDOG=y
-CONFIG_ARCH_CLOCKSOURCE_INIT=y
-CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE=y
-CONFIG_GENERIC_TIME_VSYSCALL=y
-CONFIG_GENERIC_CLOCKEVENTS=y
-CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y
-CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST=y
-CONFIG_GENERIC_CMOS_UPDATE=y
-
-#
-# Timers subsystem
-#
-CONFIG_TICK_ONESHOT=y
-CONFIG_NO_HZ_COMMON=y
-# CONFIG_HZ_PERIODIC is not set
-CONFIG_NO_HZ_IDLE=y
-# CONFIG_NO_HZ_FULL is not set
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-# end of Timers subsystem
-
-# CONFIG_PREEMPT_NONE is not set
-# CONFIG_PREEMPT_VOLUNTARY is not set
-CONFIG_PREEMPT=y
-CONFIG_PREEMPT_COUNT=y
-CONFIG_PREEMPTION=y
-
-#
-# CPU/Task time and stats accounting
-#
-CONFIG_TICK_CPU_ACCOUNTING=y
-# CONFIG_VIRT_CPU_ACCOUNTING_GEN is not set
-CONFIG_IRQ_TIME_ACCOUNTING=y
-CONFIG_HAVE_SCHED_AVG_IRQ=y
-# CONFIG_SCHED_THERMAL_PRESSURE is not set
-CONFIG_BSD_PROCESS_ACCT=y
-CONFIG_BSD_PROCESS_ACCT_V3=y
-CONFIG_TASKSTATS=y
-CONFIG_TASK_DELAY_ACCT=y
-CONFIG_TASK_XACCT=y
-CONFIG_TASK_IO_ACCOUNTING=y
-CONFIG_PSI=y
-# CONFIG_PSI_DEFAULT_DISABLED is not set
-# end of CPU/Task time and stats accounting
-
-CONFIG_CPU_ISOLATION=y
-
-#
-# RCU Subsystem
-#
-CONFIG_TREE_RCU=y
-CONFIG_PREEMPT_RCU=y
-CONFIG_RCU_EXPERT=y
-CONFIG_SRCU=y
-CONFIG_TREE_SRCU=y
-CONFIG_TASKS_RCU=y
-CONFIG_RCU_STALL_COMMON=y
-CONFIG_RCU_NEED_SEGCBLIST=y
-CONFIG_RCU_FANOUT=64
-CONFIG_RCU_FANOUT_LEAF=16
-CONFIG_RCU_FAST_NO_HZ=y
-CONFIG_RCU_BOOST=y
-CONFIG_RCU_BOOST_DELAY=500
-# CONFIG_RCU_NOCB_CPU is not set
-# end of RCU Subsystem
-
-CONFIG_BUILD_BIN2C=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-# CONFIG_IKHEADERS is not set
-CONFIG_LOG_BUF_SHIFT=17
-CONFIG_LOG_CPU_MAX_BUF_SHIFT=12
-CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=13
-CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y
-
-#
-# Scheduler features
-#
-CONFIG_UCLAMP_TASK=y
-CONFIG_UCLAMP_BUCKETS_COUNT=5
-# end of Scheduler features
-
-CONFIG_ARCH_SUPPORTS_NUMA_BALANCING=y
-CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH=y
-CONFIG_CC_HAS_INT128=y
-CONFIG_ARCH_SUPPORTS_INT128=y
-CONFIG_NUMA_BALANCING=y
-CONFIG_NUMA_BALANCING_DEFAULT_ENABLED=y
-CONFIG_CGROUPS=y
-CONFIG_PAGE_COUNTER=y
-CONFIG_MEMCG=y
-CONFIG_MEMCG_SWAP=y
-CONFIG_MEMCG_SWAP_ENABLED=y
-CONFIG_MEMCG_KMEM=y
-CONFIG_BLK_CGROUP=y
-CONFIG_CGROUP_WRITEBACK=y
-CONFIG_CGROUP_SCHED=y
-CONFIG_FAIR_GROUP_SCHED=y
-CONFIG_CFS_BANDWIDTH=y
-# CONFIG_RT_GROUP_SCHED is not set
-CONFIG_UCLAMP_TASK_GROUP=y
-CONFIG_CGROUP_PIDS=y
-CONFIG_CGROUP_RDMA=y
-CONFIG_CGROUP_FREEZER=y
-CONFIG_CGROUP_HUGETLB=y
-CONFIG_CPUSETS=y
-CONFIG_PROC_PID_CPUSET=y
-CONFIG_CGROUP_DEVICE=y
-CONFIG_CGROUP_CPUACCT=y
-CONFIG_CGROUP_PERF=y
-CONFIG_CGROUP_BPF=y
-# CONFIG_CGROUP_DEBUG is not set
-CONFIG_SOCK_CGROUP_DATA=y
-CONFIG_NAMESPACES=y
-CONFIG_UTS_NS=y
-CONFIG_TIME_NS=y
-CONFIG_IPC_NS=y
-CONFIG_USER_NS=y
-CONFIG_USER_NS_UNPRIVILEGED=y
-CONFIG_PID_NS=y
-CONFIG_NET_NS=y
-CONFIG_CHECKPOINT_RESTORE=y
-CONFIG_SCHED_AUTOGROUP=y
-# CONFIG_SYSFS_DEPRECATED is not set
-CONFIG_RELAY=y
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE=""
-CONFIG_RD_GZIP=y
-CONFIG_RD_BZIP2=y
-CONFIG_RD_LZMA=y
-CONFIG_RD_XZ=y
-CONFIG_RD_LZO=y
-CONFIG_RD_LZ4=y
-CONFIG_BOOT_CONFIG=y
-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-CONFIG_SYSCTL=y
-CONFIG_HAVE_UID16=y
-CONFIG_SYSCTL_EXCEPTION_TRACE=y
-CONFIG_HAVE_PCSPKR_PLATFORM=y
-CONFIG_BPF=y
-CONFIG_EXPERT=y
-# CONFIG_UID16 is not set
-CONFIG_MULTIUSER=y
-CONFIG_SGETMASK_SYSCALL=y
-# CONFIG_SYSFS_SYSCALL is not set
-CONFIG_FHANDLE=y
-CONFIG_POSIX_TIMERS=y
-CONFIG_PRINTK=y
-CONFIG_PRINTK_NMI=y
-CONFIG_BUG=y
-CONFIG_ELF_CORE=y
-CONFIG_PCSPKR_PLATFORM=y
-CONFIG_BASE_FULL=y
-CONFIG_FUTEX=y
-CONFIG_FUTEX_PI=y
-CONFIG_EPOLL=y
-CONFIG_SIGNALFD=y
-CONFIG_TIMERFD=y
-CONFIG_EVENTFD=y
-CONFIG_SHMEM=y
-CONFIG_AIO=y
-CONFIG_IO_URING=y
-CONFIG_ADVISE_SYSCALLS=y
-CONFIG_MEMBARRIER=y
-CONFIG_KALLSYMS=y
-CONFIG_KALLSYMS_ALL=y
-CONFIG_KALLSYMS_ABSOLUTE_PERCPU=y
-CONFIG_KALLSYMS_BASE_RELATIVE=y
-CONFIG_BPF_LSM=y
-CONFIG_BPF_SYSCALL=y
-CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y
-CONFIG_BPF_JIT_ALWAYS_ON=y
-CONFIG_BPF_JIT_DEFAULT_ON=y
-# CONFIG_USERFAULTFD is not set
-CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE=y
-CONFIG_RSEQ=y
-# CONFIG_DEBUG_RSEQ is not set
-# CONFIG_EMBEDDED is not set
-CONFIG_HAVE_PERF_EVENTS=y
-# CONFIG_PC104 is not set
-
-#
-# Kernel Performance Events And Counters
-#
-CONFIG_PERF_EVENTS=y
-# CONFIG_DEBUG_PERF_USE_VMALLOC is not set
-# end of Kernel Performance Events And Counters
-
-CONFIG_VM_EVENT_COUNTERS=y
-CONFIG_SLUB_DEBUG=y
-# CONFIG_SLUB_MEMCG_SYSFS_ON is not set
-# CONFIG_COMPAT_BRK is not set
-# CONFIG_SLAB is not set
-CONFIG_SLUB=y
-# CONFIG_SLOB is not set
-CONFIG_SLAB_MERGE_DEFAULT=y
-CONFIG_SLAB_FREELIST_RANDOM=y
-CONFIG_SLAB_FREELIST_HARDENED=y
-CONFIG_SHUFFLE_PAGE_ALLOCATOR=y
-CONFIG_SLUB_CPU_PARTIAL=y
-CONFIG_SYSTEM_DATA_VERIFICATION=y
-CONFIG_PROFILING=y
-CONFIG_TRACEPOINTS=y
-# end of General setup
-
-CONFIG_64BIT=y
-CONFIG_X86_64=y
-CONFIG_X86=y
-CONFIG_INSTRUCTION_DECODER=y
-CONFIG_OUTPUT_FORMAT="elf64-x86-64"
-CONFIG_LOCKDEP_SUPPORT=y
-CONFIG_STACKTRACE_SUPPORT=y
-CONFIG_MMU=y
-CONFIG_ARCH_MMAP_RND_BITS_MIN=28
-CONFIG_ARCH_MMAP_RND_BITS_MAX=32
-CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN=8
-CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX=16
-CONFIG_GENERIC_ISA_DMA=y
-CONFIG_GENERIC_BUG=y
-CONFIG_GENERIC_BUG_RELATIVE_POINTERS=y
-CONFIG_ARCH_MAY_HAVE_PC_FDC=y
-CONFIG_GENERIC_CALIBRATE_DELAY=y
-CONFIG_ARCH_HAS_CPU_RELAX=y
-CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
-CONFIG_ARCH_HAS_FILTER_PGPROT=y
-CONFIG_HAVE_SETUP_PER_CPU_AREA=y
-CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y
-CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y
-CONFIG_ARCH_HIBERNATION_POSSIBLE=y
-CONFIG_ARCH_SUSPEND_POSSIBLE=y
-CONFIG_ARCH_WANT_GENERAL_HUGETLB=y
-CONFIG_ZONE_DMA32=y
-CONFIG_AUDIT_ARCH=y
-CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y
-CONFIG_HAVE_INTEL_TXT=y
-CONFIG_X86_64_SMP=y
-CONFIG_ARCH_SUPPORTS_UPROBES=y
-CONFIG_FIX_EARLYCON_MEM=y
-CONFIG_DYNAMIC_PHYSICAL_MASK=y
-CONFIG_PGTABLE_LEVELS=5
-CONFIG_CC_HAS_SANE_STACKPROTECTOR=y
-
-#
-# Processor type and features
-#
-CONFIG_ZONE_DMA=y
-CONFIG_SMP=y
-CONFIG_X86_FEATURE_NAMES=y
-CONFIG_X86_X2APIC=y
-CONFIG_X86_MPPARSE=y
-# CONFIG_GOLDFISH is not set
-CONFIG_RETPOLINE=y
-CONFIG_X86_CPU_RESCTRL=y
-# CONFIG_X86_EXTENDED_PLATFORM is not set
-CONFIG_X86_INTEL_LPSS=y
-CONFIG_X86_AMD_PLATFORM_DEVICE=y
-CONFIG_IOSF_MBI=y
-# CONFIG_IOSF_MBI_DEBUG is not set
-CONFIG_X86_SUPPORTS_MEMORY_FAILURE=y
-CONFIG_SCHED_OMIT_FRAME_POINTER=y
-CONFIG_HYPERVISOR_GUEST=y
-CONFIG_PARAVIRT=y
-CONFIG_PARAVIRT_XXL=y
-# CONFIG_PARAVIRT_DEBUG is not set
-CONFIG_PARAVIRT_SPINLOCKS=y
-CONFIG_X86_HV_CALLBACK_VECTOR=y
-CONFIG_XEN=y
-CONFIG_XEN_PV=y
-CONFIG_XEN_PV_SMP=y
-CONFIG_XEN_DOM0=y
-CONFIG_XEN_PVHVM=y
-CONFIG_XEN_PVHVM_SMP=y
-CONFIG_XEN_512GB=y
-CONFIG_XEN_SAVE_RESTORE=y
-# CONFIG_XEN_DEBUG_FS is not set
-CONFIG_XEN_PVH=y
-CONFIG_KVM_GUEST=y
-CONFIG_ARCH_CPUIDLE_HALTPOLL=y
-CONFIG_PVH=y
-# CONFIG_KVM_DEBUG_FS is not set
-CONFIG_PARAVIRT_TIME_ACCOUNTING=y
-CONFIG_PARAVIRT_CLOCK=y
-CONFIG_JAILHOUSE_GUEST=y
-CONFIG_ACRN_GUEST=y
-# CONFIG_MK8 is not set
-# CONFIG_MPSC is not set
-# CONFIG_MCORE2 is not set
-# CONFIG_MATOM is not set
-CONFIG_GENERIC_CPU=y
-CONFIG_X86_INTERNODE_CACHE_SHIFT=6
-CONFIG_X86_L1_CACHE_SHIFT=6
-CONFIG_X86_TSC=y
-CONFIG_X86_CMPXCHG64=y
-CONFIG_X86_CMOV=y
-CONFIG_X86_MINIMUM_CPU_FAMILY=64
-CONFIG_X86_DEBUGCTLMSR=y
-CONFIG_IA32_FEAT_CTL=y
-CONFIG_X86_VMX_FEATURE_NAMES=y
-CONFIG_PROCESSOR_SELECT=y
-CONFIG_CPU_SUP_INTEL=y
-CONFIG_CPU_SUP_AMD=y
-CONFIG_CPU_SUP_HYGON=y
-CONFIG_CPU_SUP_CENTAUR=y
-CONFIG_CPU_SUP_ZHAOXIN=y
-CONFIG_HPET_TIMER=y
-CONFIG_HPET_EMULATE_RTC=y
-CONFIG_DMI=y
-CONFIG_GART_IOMMU=y
-# CONFIG_MAXSMP is not set
-CONFIG_NR_CPUS_RANGE_BEGIN=2
-CONFIG_NR_CPUS_RANGE_END=512
-CONFIG_NR_CPUS_DEFAULT=64
-CONFIG_NR_CPUS=320
-CONFIG_SCHED_SMT=y
-CONFIG_SCHED_MC=y
-CONFIG_SCHED_MC_PRIO=y
-CONFIG_X86_LOCAL_APIC=y
-CONFIG_X86_IO_APIC=y
-CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
-CONFIG_X86_MCE=y
-# CONFIG_X86_MCELOG_LEGACY is not set
-CONFIG_X86_MCE_INTEL=y
-CONFIG_X86_MCE_AMD=y
-CONFIG_X86_MCE_THRESHOLD=y
-CONFIG_X86_MCE_INJECT=m
-CONFIG_X86_THERMAL_VECTOR=y
-
-#
-# Performance monitoring
-#
-CONFIG_PERF_EVENTS_INTEL_UNCORE=m
-CONFIG_PERF_EVENTS_INTEL_RAPL=m
-CONFIG_PERF_EVENTS_INTEL_CSTATE=m
-CONFIG_PERF_EVENTS_AMD_POWER=m
-# end of Performance monitoring
-
-CONFIG_X86_16BIT=y
-CONFIG_X86_ESPFIX64=y
-CONFIG_X86_VSYSCALL_EMULATION=y
-CONFIG_X86_IOPL_IOPERM=y
-CONFIG_I8K=m
-CONFIG_MICROCODE=y
-CONFIG_MICROCODE_INTEL=y
-CONFIG_MICROCODE_AMD=y
-CONFIG_MICROCODE_OLD_INTERFACE=y
-CONFIG_X86_MSR=m
-CONFIG_X86_CPUID=m
-CONFIG_X86_5LEVEL=y
-CONFIG_X86_DIRECT_GBPAGES=y
-# CONFIG_X86_CPA_STATISTICS is not set
-CONFIG_AMD_MEM_ENCRYPT=y
-# CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT is not set
-CONFIG_NUMA=y
-CONFIG_AMD_NUMA=y
-CONFIG_X86_64_ACPI_NUMA=y
-CONFIG_NODES_SPAN_OTHER_NODES=y
-# CONFIG_NUMA_EMU is not set
-CONFIG_NODES_SHIFT=5
-CONFIG_ARCH_SPARSEMEM_ENABLE=y
-CONFIG_ARCH_SPARSEMEM_DEFAULT=y
-CONFIG_ARCH_SELECT_MEMORY_MODEL=y
-CONFIG_ARCH_MEMORY_PROBE=y
-CONFIG_ARCH_PROC_KCORE_TEXT=y
-CONFIG_ILLEGAL_POINTER_VALUE=0xdead000000000000
-CONFIG_X86_PMEM_LEGACY_DEVICE=y
-CONFIG_X86_PMEM_LEGACY=m
-CONFIG_X86_CHECK_BIOS_CORRUPTION=y
-CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK=y
-CONFIG_X86_RESERVE_LOW=64
-CONFIG_MTRR=y
-CONFIG_MTRR_SANITIZER=y
-CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT=1
-CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT=0
-CONFIG_X86_PAT=y
-CONFIG_ARCH_USES_PG_UNCACHED=y
-CONFIG_ARCH_RANDOM=y
-CONFIG_X86_SMAP=y
-CONFIG_X86_UMIP=y
-CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS=y
-# CONFIG_X86_INTEL_TSX_MODE_OFF is not set
-# CONFIG_X86_INTEL_TSX_MODE_ON is not set
-CONFIG_X86_INTEL_TSX_MODE_AUTO=y
-CONFIG_EFI=y
-CONFIG_EFI_STUB=y
-CONFIG_EFI_MIXED=y
-CONFIG_SECCOMP=y
-# CONFIG_HZ_100 is not set
-# CONFIG_HZ_250 is not set
-CONFIG_HZ_300=y
-# CONFIG_HZ_1000 is not set
-CONFIG_HZ=300
-CONFIG_SCHED_HRTICK=y
-CONFIG_KEXEC=y
-CONFIG_KEXEC_FILE=y
-CONFIG_ARCH_HAS_KEXEC_PURGATORY=y
-# CONFIG_KEXEC_SIG is not set
-CONFIG_CRASH_DUMP=y
-CONFIG_KEXEC_JUMP=y
-CONFIG_PHYSICAL_START=0x1000000
-CONFIG_RELOCATABLE=y
-CONFIG_RANDOMIZE_BASE=y
-CONFIG_X86_NEED_RELOCS=y
-CONFIG_PHYSICAL_ALIGN=0x200000
-CONFIG_DYNAMIC_MEMORY_LAYOUT=y
-CONFIG_RANDOMIZE_MEMORY=y
-CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING=0x1
-CONFIG_HOTPLUG_CPU=y
-# CONFIG_BOOTPARAM_HOTPLUG_CPU0 is not set
-# CONFIG_DEBUG_HOTPLUG_CPU0 is not set
-# CONFIG_COMPAT_VDSO is not set
-# CONFIG_LEGACY_VSYSCALL_EMULATE is not set
-CONFIG_LEGACY_VSYSCALL_XONLY=y
-# CONFIG_LEGACY_VSYSCALL_NONE is not set
-# CONFIG_CMDLINE_BOOL is not set
-CONFIG_MODIFY_LDT_SYSCALL=y
-CONFIG_HAVE_LIVEPATCH=y
-# CONFIG_LIVEPATCH is not set
-# end of Processor type and features
-
-CONFIG_ARCH_HAS_ADD_PAGES=y
-CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
-CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y
-CONFIG_USE_PERCPU_NUMA_NODE_ID=y
-CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK=y
-CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION=y
-CONFIG_ARCH_ENABLE_THP_MIGRATION=y
-
-#
-# Power management and ACPI options
-#
-CONFIG_ARCH_HIBERNATION_HEADER=y
-CONFIG_SUSPEND=y
-CONFIG_SUSPEND_FREEZER=y
-# CONFIG_SUSPEND_SKIP_SYNC is not set
-CONFIG_HIBERNATE_CALLBACKS=y
-CONFIG_HIBERNATION=y
-CONFIG_PM_STD_PARTITION=""
-CONFIG_PM_SLEEP=y
-CONFIG_PM_SLEEP_SMP=y
-CONFIG_PM_AUTOSLEEP=y
-CONFIG_PM_WAKELOCKS=y
-CONFIG_PM_WAKELOCKS_LIMIT=100
-CONFIG_PM_WAKELOCKS_GC=y
-CONFIG_PM=y
-CONFIG_PM_DEBUG=y
-CONFIG_PM_ADVANCED_DEBUG=y
-# CONFIG_PM_TEST_SUSPEND is not set
-CONFIG_PM_SLEEP_DEBUG=y
-# CONFIG_DPM_WATCHDOG is not set
-CONFIG_PM_TRACE=y
-CONFIG_PM_TRACE_RTC=y
-CONFIG_PM_CLK=y
-CONFIG_PM_GENERIC_DOMAINS=y
-CONFIG_WQ_POWER_EFFICIENT_DEFAULT=y
-CONFIG_PM_GENERIC_DOMAINS_SLEEP=y
-CONFIG_PM_GENERIC_DOMAINS_OF=y
-CONFIG_ENERGY_MODEL=y
-CONFIG_ARCH_SUPPORTS_ACPI=y
-CONFIG_ACPI=y
-CONFIG_ACPI_LEGACY_TABLES_LOOKUP=y
-CONFIG_ARCH_MIGHT_HAVE_ACPI_PDC=y
-CONFIG_ACPI_SYSTEM_POWER_STATES_SUPPORT=y
-# CONFIG_ACPI_DEBUGGER is not set
-CONFIG_ACPI_SPCR_TABLE=y
-CONFIG_ACPI_LPIT=y
-CONFIG_ACPI_SLEEP=y
-# CONFIG_ACPI_PROCFS_POWER is not set
-CONFIG_ACPI_REV_OVERRIDE_POSSIBLE=y
-CONFIG_ACPI_EC_DEBUGFS=y
-CONFIG_ACPI_AC=m
-CONFIG_ACPI_BATTERY=m
-CONFIG_ACPI_BUTTON=y
-CONFIG_ACPI_VIDEO=y
-CONFIG_ACPI_FAN=y
-CONFIG_ACPI_TAD=m
-CONFIG_ACPI_DOCK=y
-CONFIG_ACPI_CPU_FREQ_PSS=y
-CONFIG_ACPI_PROCESSOR_CSTATE=y
-CONFIG_ACPI_PROCESSOR_IDLE=y
-CONFIG_ACPI_CPPC_LIB=y
-CONFIG_ACPI_PROCESSOR=y
-CONFIG_ACPI_IPMI=m
-CONFIG_ACPI_HOTPLUG_CPU=y
-CONFIG_ACPI_PROCESSOR_AGGREGATOR=y
-CONFIG_ACPI_THERMAL=y
-CONFIG_ARCH_HAS_ACPI_TABLE_UPGRADE=y
-CONFIG_ACPI_TABLE_UPGRADE=y
-CONFIG_ACPI_DEBUG=y
-CONFIG_ACPI_PCI_SLOT=y
-CONFIG_ACPI_CONTAINER=y
-CONFIG_ACPI_HOTPLUG_MEMORY=y
-CONFIG_ACPI_HOTPLUG_IOAPIC=y
-CONFIG_ACPI_SBS=m
-CONFIG_ACPI_HED=y
-CONFIG_ACPI_CUSTOM_METHOD=m
-CONFIG_ACPI_BGRT=y
-# CONFIG_ACPI_REDUCED_HARDWARE_ONLY is not set
-CONFIG_ACPI_NFIT=m
-# CONFIG_NFIT_SECURITY_DEBUG is not set
-CONFIG_ACPI_NUMA=y
-CONFIG_ACPI_HMAT=y
-CONFIG_HAVE_ACPI_APEI=y
-CONFIG_HAVE_ACPI_APEI_NMI=y
-CONFIG_ACPI_APEI=y
-CONFIG_ACPI_APEI_GHES=y
-CONFIG_ACPI_APEI_PCIEAER=y
-CONFIG_ACPI_APEI_MEMORY_FAILURE=y
-CONFIG_ACPI_APEI_EINJ=m
-CONFIG_ACPI_APEI_ERST_DEBUG=m
-CONFIG_DPTF_POWER=m
-CONFIG_ACPI_WATCHDOG=y
-CONFIG_ACPI_EXTLOG=m
-CONFIG_ACPI_ADXL=y
-CONFIG_PMIC_OPREGION=y
-CONFIG_BYTCRC_PMIC_OPREGION=y
-CONFIG_CHTCRC_PMIC_OPREGION=y
-CONFIG_XPOWER_PMIC_OPREGION=y
-CONFIG_BXT_WC_PMIC_OPREGION=y
-CONFIG_CHT_WC_PMIC_OPREGION=y
-CONFIG_CHT_DC_TI_PMIC_OPREGION=y
-CONFIG_ACPI_CONFIGFS=m
-CONFIG_TPS68470_PMIC_OPREGION=y
-CONFIG_X86_PM_TIMER=y
-CONFIG_SFI=y
-
-#
-# CPU Frequency scaling
-#
-CONFIG_CPU_FREQ=y
-CONFIG_CPU_FREQ_GOV_ATTR_SET=y
-CONFIG_CPU_FREQ_GOV_COMMON=y
-CONFIG_CPU_FREQ_STAT=y
-# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set
-# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set
-# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set
-# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set
-# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set
-CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL=y
-CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
-CONFIG_CPU_FREQ_GOV_POWERSAVE=m
-CONFIG_CPU_FREQ_GOV_USERSPACE=m
-CONFIG_CPU_FREQ_GOV_ONDEMAND=m
-CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m
-CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y
-
-#
-# CPU frequency scaling drivers
-#
-CONFIG_CPUFREQ_DT=m
-CONFIG_CPUFREQ_DT_PLATDEV=y
-CONFIG_X86_INTEL_PSTATE=y
-CONFIG_X86_PCC_CPUFREQ=m
-CONFIG_X86_ACPI_CPUFREQ=m
-CONFIG_X86_ACPI_CPUFREQ_CPB=y
-CONFIG_X86_POWERNOW_K8=m
-CONFIG_X86_AMD_FREQ_SENSITIVITY=m
-# CONFIG_X86_SPEEDSTEP_CENTRINO is not set
-CONFIG_X86_P4_CLOCKMOD=m
-
-#
-# shared options
-#
-CONFIG_X86_SPEEDSTEP_LIB=m
-# end of CPU Frequency scaling
-
-#
-# CPU Idle
-#
-CONFIG_CPU_IDLE=y
-CONFIG_CPU_IDLE_GOV_LADDER=y
-CONFIG_CPU_IDLE_GOV_MENU=y
-CONFIG_CPU_IDLE_GOV_TEO=y
-CONFIG_CPU_IDLE_GOV_HALTPOLL=y
-CONFIG_HALTPOLL_CPUIDLE=m
-# end of CPU Idle
-
-CONFIG_INTEL_IDLE=y
-# end of Power management and ACPI options
-
-#
-# Bus options (PCI etc.)
-#
-CONFIG_PCI_DIRECT=y
-CONFIG_PCI_MMCONFIG=y
-CONFIG_PCI_XEN=y
-CONFIG_MMCONF_FAM10H=y
-# CONFIG_PCI_CNB20LE_QUIRK is not set
-# CONFIG_ISA_BUS is not set
-CONFIG_ISA_DMA_API=y
-CONFIG_AMD_NB=y
-# CONFIG_X86_SYSFB is not set
-# end of Bus options (PCI etc.)
-
-#
-# Binary Emulations
-#
-CONFIG_IA32_EMULATION=y
-# CONFIG_X86_X32 is not set
-CONFIG_COMPAT_32=y
-CONFIG_COMPAT=y
-CONFIG_COMPAT_FOR_U64_ALIGNMENT=y
-CONFIG_SYSVIPC_COMPAT=y
-# end of Binary Emulations
-
-#
-# Firmware Drivers
-#
-CONFIG_EDD=m
-# CONFIG_EDD_OFF is not set
-CONFIG_FIRMWARE_MEMMAP=y
-CONFIG_DMIID=y
-CONFIG_DMI_SYSFS=m
-CONFIG_DMI_SCAN_MACHINE_NON_EFI_FALLBACK=y
-CONFIG_ISCSI_IBFT_FIND=y
-CONFIG_ISCSI_IBFT=m
-CONFIG_FW_CFG_SYSFS=m
-# CONFIG_FW_CFG_SYSFS_CMDLINE is not set
-CONFIG_GOOGLE_FIRMWARE=y
-# CONFIG_GOOGLE_SMI is not set
-CONFIG_GOOGLE_COREBOOT_TABLE=m
-CONFIG_GOOGLE_MEMCONSOLE=m
-# CONFIG_GOOGLE_MEMCONSOLE_X86_LEGACY is not set
-CONFIG_GOOGLE_FRAMEBUFFER_COREBOOT=m
-CONFIG_GOOGLE_MEMCONSOLE_COREBOOT=m
-CONFIG_GOOGLE_VPD=m
-
-#
-# EFI (Extensible Firmware Interface) Support
-#
-# CONFIG_EFI_VARS is not set
-CONFIG_EFI_ESRT=y
-CONFIG_EFI_RUNTIME_MAP=y
-# CONFIG_EFI_FAKE_MEMMAP is not set
-CONFIG_EFI_SOFT_RESERVE=y
-CONFIG_EFI_RUNTIME_WRAPPERS=y
-CONFIG_EFI_CAPSULE_LOADER=m
-# CONFIG_EFI_TEST is not set
-CONFIG_APPLE_PROPERTIES=y
-# CONFIG_RESET_ATTACK_MITIGATION is not set
-CONFIG_EFI_RCI2_TABLE=y
-# CONFIG_EFI_DISABLE_PCI_DMA is not set
-# end of EFI (Extensible Firmware Interface) Support
-
-CONFIG_EFI_EMBEDDED_FIRMWARE=y
-CONFIG_UEFI_CPER=y
-CONFIG_UEFI_CPER_X86=y
-CONFIG_EFI_DEV_PATH_PARSER=y
-CONFIG_EFI_EARLYCON=y
-
-#
-# Tegra firmware driver
-#
-# end of Tegra firmware driver
-# end of Firmware Drivers
-
-CONFIG_HAVE_KVM=y
-CONFIG_HAVE_KVM_IRQCHIP=y
-CONFIG_HAVE_KVM_IRQFD=y
-CONFIG_HAVE_KVM_IRQ_ROUTING=y
-CONFIG_HAVE_KVM_EVENTFD=y
-CONFIG_KVM_MMIO=y
-CONFIG_KVM_ASYNC_PF=y
-CONFIG_HAVE_KVM_MSI=y
-CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT=y
-CONFIG_KVM_VFIO=y
-CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT=y
-CONFIG_KVM_COMPAT=y
-CONFIG_HAVE_KVM_IRQ_BYPASS=y
-CONFIG_HAVE_KVM_NO_POLL=y
-CONFIG_VIRTUALIZATION=y
-CONFIG_KVM=m
-CONFIG_KVM_WERROR=y
-CONFIG_KVM_INTEL=m
-CONFIG_KVM_AMD=m
-CONFIG_KVM_AMD_SEV=y
-CONFIG_KVM_MMU_AUDIT=y
-CONFIG_AS_AVX512=y
-CONFIG_AS_SHA1_NI=y
-CONFIG_AS_SHA256_NI=y
-
-#
-# General architecture-dependent options
-#
-CONFIG_CRASH_CORE=y
-CONFIG_KEXEC_CORE=y
-CONFIG_HOTPLUG_SMT=y
-CONFIG_OPROFILE=m
-# CONFIG_OPROFILE_EVENT_MULTIPLEX is not set
-CONFIG_HAVE_OPROFILE=y
-CONFIG_OPROFILE_NMI_TIMER=y
-CONFIG_KPROBES=y
-CONFIG_JUMP_LABEL=y
-# CONFIG_STATIC_KEYS_SELFTEST is not set
-CONFIG_OPTPROBES=y
-CONFIG_KPROBES_ON_FTRACE=y
-CONFIG_UPROBES=y
-CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y
-CONFIG_ARCH_USE_BUILTIN_BSWAP=y
-CONFIG_KRETPROBES=y
-CONFIG_USER_RETURN_NOTIFIER=y
-CONFIG_HAVE_IOREMAP_PROT=y
-CONFIG_HAVE_KPROBES=y
-CONFIG_HAVE_KRETPROBES=y
-CONFIG_HAVE_OPTPROBES=y
-CONFIG_HAVE_KPROBES_ON_FTRACE=y
-CONFIG_HAVE_FUNCTION_ERROR_INJECTION=y
-CONFIG_HAVE_NMI=y
-CONFIG_HAVE_ARCH_TRACEHOOK=y
-CONFIG_HAVE_DMA_CONTIGUOUS=y
-CONFIG_GENERIC_SMP_IDLE_THREAD=y
-CONFIG_ARCH_HAS_FORTIFY_SOURCE=y
-CONFIG_ARCH_HAS_SET_MEMORY=y
-CONFIG_ARCH_HAS_SET_DIRECT_MAP=y
-CONFIG_HAVE_ARCH_THREAD_STRUCT_WHITELIST=y
-CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT=y
-CONFIG_HAVE_ASM_MODVERSIONS=y
-CONFIG_HAVE_REGS_AND_STACK_ACCESS_API=y
-CONFIG_HAVE_RSEQ=y
-CONFIG_HAVE_FUNCTION_ARG_ACCESS_API=y
-CONFIG_HAVE_CLK=y
-CONFIG_HAVE_HW_BREAKPOINT=y
-CONFIG_HAVE_MIXED_BREAKPOINTS_REGS=y
-CONFIG_HAVE_USER_RETURN_NOTIFIER=y
-CONFIG_HAVE_PERF_EVENTS_NMI=y
-CONFIG_HAVE_HARDLOCKUP_DETECTOR_PERF=y
-CONFIG_HAVE_PERF_REGS=y
-CONFIG_HAVE_PERF_USER_STACK_DUMP=y
-CONFIG_HAVE_ARCH_JUMP_LABEL=y
-CONFIG_HAVE_ARCH_JUMP_LABEL_RELATIVE=y
-CONFIG_MMU_GATHER_TABLE_FREE=y
-CONFIG_MMU_GATHER_RCU_TABLE_FREE=y
-CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG=y
-CONFIG_HAVE_ALIGNED_STRUCT_PAGE=y
-CONFIG_HAVE_CMPXCHG_LOCAL=y
-CONFIG_HAVE_CMPXCHG_DOUBLE=y
-CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION=y
-CONFIG_ARCH_WANT_OLD_COMPAT_IPC=y
-CONFIG_HAVE_ARCH_SECCOMP_FILTER=y
-CONFIG_SECCOMP_FILTER=y
-CONFIG_HAVE_ARCH_STACKLEAK=y
-CONFIG_HAVE_STACKPROTECTOR=y
-CONFIG_CC_HAS_STACKPROTECTOR_NONE=y
-CONFIG_STACKPROTECTOR=y
-CONFIG_STACKPROTECTOR_STRONG=y
-CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES=y
-CONFIG_HAVE_CONTEXT_TRACKING=y
-CONFIG_HAVE_VIRT_CPU_ACCOUNTING_GEN=y
-CONFIG_HAVE_IRQ_TIME_ACCOUNTING=y
-CONFIG_HAVE_MOVE_PMD=y
-CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE=y
-CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD=y
-CONFIG_HAVE_ARCH_HUGE_VMAP=y
-CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y
-CONFIG_HAVE_ARCH_SOFT_DIRTY=y
-CONFIG_HAVE_MOD_ARCH_SPECIFIC=y
-CONFIG_MODULES_USE_ELF_RELA=y
-CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK=y
-CONFIG_ARCH_HAS_ELF_RANDOMIZE=y
-CONFIG_HAVE_ARCH_MMAP_RND_BITS=y
-CONFIG_HAVE_EXIT_THREAD=y
-CONFIG_ARCH_MMAP_RND_BITS=28
-CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS=y
-CONFIG_ARCH_MMAP_RND_COMPAT_BITS=8
-CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES=y
-CONFIG_HAVE_COPY_THREAD_TLS=y
-CONFIG_HAVE_STACK_VALIDATION=y
-CONFIG_HAVE_RELIABLE_STACKTRACE=y
-CONFIG_ISA_BUS_API=y
-CONFIG_OLD_SIGSUSPEND3=y
-CONFIG_COMPAT_OLD_SIGACTION=y
-CONFIG_COMPAT_32BIT_TIME=y
-CONFIG_HAVE_ARCH_VMAP_STACK=y
-CONFIG_VMAP_STACK=y
-CONFIG_ARCH_HAS_STRICT_KERNEL_RWX=y
-CONFIG_STRICT_KERNEL_RWX=y
-CONFIG_ARCH_HAS_STRICT_MODULE_RWX=y
-CONFIG_STRICT_MODULE_RWX=y
-CONFIG_HAVE_ARCH_PREL32_RELOCATIONS=y
-CONFIG_ARCH_USE_MEMREMAP_PROT=y
-CONFIG_LOCK_EVENT_COUNTS=y
-CONFIG_ARCH_HAS_MEM_ENCRYPT=y
-
-#
-# GCOV-based kernel profiling
-#
-# CONFIG_GCOV_KERNEL is not set
-CONFIG_ARCH_HAS_GCOV_PROFILE_ALL=y
-# end of GCOV-based kernel profiling
-
-CONFIG_HAVE_GCC_PLUGINS=y
-CONFIG_GCC_PLUGINS=y
-# CONFIG_GCC_PLUGIN_CYC_COMPLEXITY is not set
-# CONFIG_GCC_PLUGIN_LATENT_ENTROPY is not set
-# CONFIG_GCC_PLUGIN_RANDSTRUCT is not set
-# end of General architecture-dependent options
-
-CONFIG_RT_MUTEXES=y
-CONFIG_BASE_SMALL=0
-CONFIG_MODULE_SIG_FORMAT=y
-CONFIG_MODULES=y
-CONFIG_MODULE_FORCE_LOAD=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODULE_FORCE_UNLOAD=y
-# CONFIG_MODVERSIONS is not set
-CONFIG_MODULE_SRCVERSION_ALL=y
-CONFIG_MODULE_SIG=y
-# CONFIG_MODULE_SIG_FORCE is not set
-CONFIG_MODULE_SIG_ALL=y
-# CONFIG_MODULE_SIG_SHA1 is not set
-# CONFIG_MODULE_SIG_SHA224 is not set
-# CONFIG_MODULE_SIG_SHA256 is not set
-# CONFIG_MODULE_SIG_SHA384 is not set
-CONFIG_MODULE_SIG_SHA512=y
-CONFIG_MODULE_SIG_HASH="sha512"
-CONFIG_MODULE_COMPRESS=y
-# CONFIG_MODULE_COMPRESS_GZIP is not set
-CONFIG_MODULE_COMPRESS_XZ=y
-CONFIG_MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS=y
-CONFIG_UNUSED_SYMBOLS=y
-CONFIG_MODULES_TREE_LOOKUP=y
-CONFIG_BLOCK=y
-CONFIG_BLK_RQ_ALLOC_TIME=y
-CONFIG_BLK_SCSI_REQUEST=y
-CONFIG_BLK_CGROUP_RWSTAT=y
-CONFIG_BLK_DEV_BSG=y
-CONFIG_BLK_DEV_BSGLIB=y
-CONFIG_BLK_DEV_INTEGRITY=y
-CONFIG_BLK_DEV_INTEGRITY_T10=y
-CONFIG_BLK_DEV_ZONED=y
-CONFIG_BLK_DEV_THROTTLING=y
-CONFIG_BLK_DEV_THROTTLING_LOW=y
-# CONFIG_BLK_CMDLINE_PARSER is not set
-CONFIG_BLK_WBT=y
-CONFIG_BLK_CGROUP_IOLATENCY=y
-CONFIG_BLK_CGROUP_IOCOST=y
-CONFIG_BLK_WBT_MQ=y
-CONFIG_BLK_DEBUG_FS=y
-CONFIG_BLK_DEBUG_FS_ZONED=y
-CONFIG_BLK_SED_OPAL=y
-
-#
-# Partition Types
-#
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_ACORN_PARTITION is not set
-CONFIG_AIX_PARTITION=y
-# CONFIG_OSF_PARTITION is not set
-# CONFIG_AMIGA_PARTITION is not set
-# CONFIG_ATARI_PARTITION is not set
-CONFIG_MAC_PARTITION=y
-CONFIG_MSDOS_PARTITION=y
-CONFIG_BSD_DISKLABEL=y
-CONFIG_MINIX_SUBPARTITION=y
-CONFIG_SOLARIS_X86_PARTITION=y
-# CONFIG_UNIXWARE_DISKLABEL is not set
-CONFIG_LDM_PARTITION=y
-# CONFIG_LDM_DEBUG is not set
-# CONFIG_SGI_PARTITION is not set
-# CONFIG_ULTRIX_PARTITION is not set
-# CONFIG_SUN_PARTITION is not set
-CONFIG_KARMA_PARTITION=y
-CONFIG_EFI_PARTITION=y
-# CONFIG_SYSV68_PARTITION is not set
-# CONFIG_CMDLINE_PARTITION is not set
-# end of Partition Types
-
-CONFIG_BLOCK_COMPAT=y
-CONFIG_BLK_MQ_PCI=y
-CONFIG_BLK_MQ_VIRTIO=y
-CONFIG_BLK_MQ_RDMA=y
-CONFIG_BLK_PM=y
-
-#
-# IO Schedulers
-#
-CONFIG_MQ_IOSCHED_DEADLINE=y
-CONFIG_MQ_IOSCHED_KYBER=y
-CONFIG_IOSCHED_BFQ=y
-CONFIG_BFQ_GROUP_IOSCHED=y
-# CONFIG_BFQ_CGROUP_DEBUG is not set
-# end of IO Schedulers
-
-CONFIG_PREEMPT_NOTIFIERS=y
-CONFIG_PADATA=y
-CONFIG_ASN1=y
-CONFIG_UNINLINE_SPIN_UNLOCK=y
-CONFIG_ARCH_SUPPORTS_ATOMIC_RMW=y
-CONFIG_MUTEX_SPIN_ON_OWNER=y
-CONFIG_RWSEM_SPIN_ON_OWNER=y
-CONFIG_LOCK_SPIN_ON_OWNER=y
-CONFIG_ARCH_USE_QUEUED_SPINLOCKS=y
-CONFIG_QUEUED_SPINLOCKS=y
-CONFIG_ARCH_USE_QUEUED_RWLOCKS=y
-CONFIG_QUEUED_RWLOCKS=y
-CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE=y
-CONFIG_ARCH_HAS_SYNC_CORE_BEFORE_USERMODE=y
-CONFIG_ARCH_HAS_SYSCALL_WRAPPER=y
-CONFIG_FREEZER=y
-
-#
-# Executable file formats
-#
-CONFIG_BINFMT_ELF=y
-CONFIG_COMPAT_BINFMT_ELF=y
-CONFIG_ELFCORE=y
-CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
-CONFIG_BINFMT_SCRIPT=y
-CONFIG_BINFMT_MISC=y
-CONFIG_COREDUMP=y
-# end of Executable file formats
-
-#
-# Memory Management options
-#
-CONFIG_SELECT_MEMORY_MODEL=y
-CONFIG_SPARSEMEM_MANUAL=y
-CONFIG_SPARSEMEM=y
-CONFIG_NEED_MULTIPLE_NODES=y
-CONFIG_HAVE_MEMORY_PRESENT=y
-CONFIG_SPARSEMEM_EXTREME=y
-CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
-CONFIG_SPARSEMEM_VMEMMAP=y
-CONFIG_HAVE_MEMBLOCK_NODE_MAP=y
-CONFIG_HAVE_FAST_GUP=y
-CONFIG_NUMA_KEEP_MEMINFO=y
-CONFIG_MEMORY_ISOLATION=y
-CONFIG_HAVE_BOOTMEM_INFO_NODE=y
-CONFIG_MEMORY_HOTPLUG=y
-CONFIG_MEMORY_HOTPLUG_SPARSE=y
-CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE=y
-CONFIG_MEMORY_HOTREMOVE=y
-CONFIG_SPLIT_PTLOCK_CPUS=4
-CONFIG_MEMORY_BALLOON=y
-CONFIG_BALLOON_COMPACTION=y
-CONFIG_COMPACTION=y
-CONFIG_PAGE_REPORTING=y
-CONFIG_MIGRATION=y
-CONFIG_CONTIG_ALLOC=y
-CONFIG_PHYS_ADDR_T_64BIT=y
-CONFIG_BOUNCE=y
-CONFIG_VIRT_TO_BUS=y
-CONFIG_MMU_NOTIFIER=y
-CONFIG_KSM=y
-CONFIG_DEFAULT_MMAP_MIN_ADDR=65536
-CONFIG_ARCH_SUPPORTS_MEMORY_FAILURE=y
-CONFIG_MEMORY_FAILURE=y
-CONFIG_HWPOISON_INJECT=m
-CONFIG_TRANSPARENT_HUGEPAGE=y
-# CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS is not set
-CONFIG_TRANSPARENT_HUGEPAGE_MADVISE=y
-CONFIG_ARCH_WANTS_THP_SWAP=y
-CONFIG_THP_SWAP=y
-CONFIG_CLEANCACHE=y
-CONFIG_FRONTSWAP=y
-# CONFIG_CMA is not set
-# CONFIG_MEM_SOFT_DIRTY is not set
-CONFIG_ZSWAP=y
-# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_DEFLATE is not set
-# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZO is not set
-# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_842 is not set
-CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZ4=y
-# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZ4HC is not set
-# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_ZSTD is not set
-CONFIG_ZSWAP_COMPRESSOR_DEFAULT="lz4"
-# CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD is not set
-CONFIG_ZSWAP_ZPOOL_DEFAULT_Z3FOLD=y
-# CONFIG_ZSWAP_ZPOOL_DEFAULT_ZSMALLOC is not set
-CONFIG_ZSWAP_ZPOOL_DEFAULT="z3fold"
-CONFIG_ZSWAP_DEFAULT_ON=y
-CONFIG_ZPOOL=y
-CONFIG_ZBUD=y
-CONFIG_Z3FOLD=y
-CONFIG_ZSMALLOC=y
-# CONFIG_PGTABLE_MAPPING is not set
-# CONFIG_ZSMALLOC_STAT is not set
-CONFIG_GENERIC_EARLY_IOREMAP=y
-# CONFIG_DEFERRED_STRUCT_PAGE_INIT is not set
-# CONFIG_IDLE_PAGE_TRACKING is not set
-CONFIG_ARCH_HAS_PTE_DEVMAP=y
-CONFIG_ZONE_DEVICE=y
-CONFIG_DEV_PAGEMAP_OPS=y
-CONFIG_HMM_MIRROR=y
-CONFIG_DEVICE_PRIVATE=y
-CONFIG_FRAME_VECTOR=y
-CONFIG_ARCH_USES_HIGH_VMA_FLAGS=y
-CONFIG_ARCH_HAS_PKEYS=y
-# CONFIG_PERCPU_STATS is not set
-# CONFIG_GUP_BENCHMARK is not set
-CONFIG_READ_ONLY_THP_FOR_FS=y
-CONFIG_ARCH_HAS_PTE_SPECIAL=y
-CONFIG_MAPPING_DIRTY_HELPERS=y
-# end of Memory Management options
-
-CONFIG_NET=y
-CONFIG_COMPAT_NETLINK_MESSAGES=y
-CONFIG_NET_INGRESS=y
-CONFIG_NET_EGRESS=y
-CONFIG_NET_REDIRECT=y
-CONFIG_SKB_EXTENSIONS=y
-
-#
-# Networking options
-#
-CONFIG_PACKET=y
-CONFIG_PACKET_DIAG=y
-CONFIG_UNIX=y
-CONFIG_UNIX_SCM=y
-CONFIG_UNIX_DIAG=y
-CONFIG_TLS=m
-CONFIG_TLS_DEVICE=y
-# CONFIG_TLS_TOE is not set
-CONFIG_XFRM=y
-CONFIG_XFRM_OFFLOAD=y
-CONFIG_XFRM_ALGO=m
-CONFIG_XFRM_USER=m
-CONFIG_XFRM_INTERFACE=m
-CONFIG_XFRM_SUB_POLICY=y
-CONFIG_XFRM_MIGRATE=y
-CONFIG_XFRM_STATISTICS=y
-CONFIG_XFRM_IPCOMP=m
-CONFIG_NET_KEY=m
-CONFIG_NET_KEY_MIGRATE=y
-CONFIG_SMC=m
-CONFIG_SMC_DIAG=m
-CONFIG_XDP_SOCKETS=y
-CONFIG_XDP_SOCKETS_DIAG=y
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_ADVANCED_ROUTER=y
-# CONFIG_IP_FIB_TRIE_STATS is not set
-CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_ROUTE_MULTIPATH=y
-CONFIG_IP_ROUTE_VERBOSE=y
-CONFIG_IP_ROUTE_CLASSID=y
-# CONFIG_IP_PNP is not set
-CONFIG_NET_IPIP=m
-CONFIG_NET_IPGRE_DEMUX=m
-CONFIG_NET_IP_TUNNEL=m
-CONFIG_NET_IPGRE=m
-# CONFIG_NET_IPGRE_BROADCAST is not set
-CONFIG_IP_MROUTE_COMMON=y
-CONFIG_IP_MROUTE=y
-CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
-CONFIG_IP_PIMSM_V1=y
-CONFIG_IP_PIMSM_V2=y
-CONFIG_SYN_COOKIES=y
-CONFIG_NET_IPVTI=m
-CONFIG_NET_UDP_TUNNEL=m
-CONFIG_NET_FOU=m
-CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_INET_AH=m
-CONFIG_INET_ESP=m
-CONFIG_INET_ESP_OFFLOAD=m
-CONFIG_INET_ESPINTCP=y
-CONFIG_INET_IPCOMP=m
-CONFIG_INET_XFRM_TUNNEL=m
-CONFIG_INET_TUNNEL=m
-CONFIG_INET_DIAG=m
-CONFIG_INET_TCP_DIAG=m
-CONFIG_INET_UDP_DIAG=m
-CONFIG_INET_RAW_DIAG=m
-CONFIG_INET_DIAG_DESTROY=y
-CONFIG_TCP_CONG_ADVANCED=y
-CONFIG_TCP_CONG_BIC=m
-CONFIG_TCP_CONG_CUBIC=y
-CONFIG_TCP_CONG_WESTWOOD=m
-CONFIG_TCP_CONG_HTCP=m
-CONFIG_TCP_CONG_HSTCP=m
-CONFIG_TCP_CONG_HYBLA=m
-CONFIG_TCP_CONG_VEGAS=m
-CONFIG_TCP_CONG_NV=m
-CONFIG_TCP_CONG_SCALABLE=m
-CONFIG_TCP_CONG_LP=m
-CONFIG_TCP_CONG_VENO=m
-CONFIG_TCP_CONG_YEAH=m
-CONFIG_TCP_CONG_ILLINOIS=m
-CONFIG_TCP_CONG_DCTCP=m
-CONFIG_TCP_CONG_CDG=m
-CONFIG_TCP_CONG_BBR=m
-CONFIG_DEFAULT_CUBIC=y
-# CONFIG_DEFAULT_RENO is not set
-CONFIG_DEFAULT_TCP_CONG="cubic"
-CONFIG_TCP_MD5SIG=y
-CONFIG_IPV6=y
-CONFIG_IPV6_ROUTER_PREF=y
-CONFIG_IPV6_ROUTE_INFO=y
-CONFIG_IPV6_OPTIMISTIC_DAD=y
-CONFIG_INET6_AH=m
-CONFIG_INET6_ESP=m
-CONFIG_INET6_ESP_OFFLOAD=m
-CONFIG_INET6_IPCOMP=m
-CONFIG_IPV6_MIP6=m
-CONFIG_IPV6_ILA=m
-CONFIG_INET6_XFRM_TUNNEL=m
-CONFIG_INET6_TUNNEL=m
-CONFIG_IPV6_VTI=m
-CONFIG_IPV6_SIT=m
-CONFIG_IPV6_SIT_6RD=y
-CONFIG_IPV6_NDISC_NODETYPE=y
-CONFIG_IPV6_TUNNEL=m
-CONFIG_IPV6_GRE=m
-CONFIG_IPV6_FOU=m
-CONFIG_IPV6_FOU_TUNNEL=m
-CONFIG_IPV6_MULTIPLE_TABLES=y
-CONFIG_IPV6_SUBTREES=y
-CONFIG_IPV6_MROUTE=y
-CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y
-CONFIG_IPV6_PIMSM_V2=y
-CONFIG_IPV6_SEG6_LWTUNNEL=y
-CONFIG_IPV6_SEG6_HMAC=y
-CONFIG_IPV6_SEG6_BPF=y
-CONFIG_IPV6_RPL_LWTUNNEL=y
-CONFIG_NETLABEL=y
-CONFIG_MPTCP=y
-CONFIG_MPTCP_IPV6=y
-# CONFIG_MPTCP_HMAC_TEST is not set
-CONFIG_NETWORK_SECMARK=y
-CONFIG_NET_PTP_CLASSIFY=y
-CONFIG_NETWORK_PHY_TIMESTAMPING=y
-CONFIG_NETFILTER=y
-CONFIG_NETFILTER_ADVANCED=y
-CONFIG_BRIDGE_NETFILTER=m
-
-#
-# Core Netfilter Configuration
-#
-CONFIG_NETFILTER_INGRESS=y
-CONFIG_NETFILTER_NETLINK=m
-CONFIG_NETFILTER_FAMILY_BRIDGE=y
-CONFIG_NETFILTER_FAMILY_ARP=y
-CONFIG_NETFILTER_NETLINK_ACCT=m
-CONFIG_NETFILTER_NETLINK_QUEUE=m
-CONFIG_NETFILTER_NETLINK_LOG=m
-CONFIG_NETFILTER_NETLINK_OSF=m
-CONFIG_NF_CONNTRACK=m
-CONFIG_NF_LOG_COMMON=m
-CONFIG_NF_LOG_NETDEV=m
-CONFIG_NETFILTER_CONNCOUNT=m
-CONFIG_NF_CONNTRACK_MARK=y
-CONFIG_NF_CONNTRACK_SECMARK=y
-CONFIG_NF_CONNTRACK_ZONES=y
-CONFIG_NF_CONNTRACK_PROCFS=y
-CONFIG_NF_CONNTRACK_EVENTS=y
-CONFIG_NF_CONNTRACK_TIMEOUT=y
-CONFIG_NF_CONNTRACK_TIMESTAMP=y
-CONFIG_NF_CONNTRACK_LABELS=y
-CONFIG_NF_CT_PROTO_DCCP=y
-CONFIG_NF_CT_PROTO_GRE=y
-CONFIG_NF_CT_PROTO_SCTP=y
-CONFIG_NF_CT_PROTO_UDPLITE=y
-CONFIG_NF_CONNTRACK_AMANDA=m
-CONFIG_NF_CONNTRACK_FTP=m
-CONFIG_NF_CONNTRACK_H323=m
-CONFIG_NF_CONNTRACK_IRC=m
-CONFIG_NF_CONNTRACK_BROADCAST=m
-CONFIG_NF_CONNTRACK_NETBIOS_NS=m
-CONFIG_NF_CONNTRACK_SNMP=m
-CONFIG_NF_CONNTRACK_PPTP=m
-CONFIG_NF_CONNTRACK_SANE=m
-CONFIG_NF_CONNTRACK_SIP=m
-CONFIG_NF_CONNTRACK_TFTP=m
-CONFIG_NF_CT_NETLINK=m
-CONFIG_NF_CT_NETLINK_TIMEOUT=m
-CONFIG_NF_CT_NETLINK_HELPER=m
-CONFIG_NETFILTER_NETLINK_GLUE_CT=y
-CONFIG_NF_NAT=m
-CONFIG_NF_NAT_AMANDA=m
-CONFIG_NF_NAT_FTP=m
-CONFIG_NF_NAT_IRC=m
-CONFIG_NF_NAT_SIP=m
-CONFIG_NF_NAT_TFTP=m
-CONFIG_NF_NAT_REDIRECT=y
-CONFIG_NF_NAT_MASQUERADE=y
-CONFIG_NETFILTER_SYNPROXY=m
-CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_INET=y
-CONFIG_NF_TABLES_NETDEV=y
-CONFIG_NFT_NUMGEN=m
-CONFIG_NFT_CT=m
-CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NFT_COUNTER=m
-CONFIG_NFT_CONNLIMIT=m
-CONFIG_NFT_LOG=m
-CONFIG_NFT_LIMIT=m
-CONFIG_NFT_MASQ=m
-CONFIG_NFT_REDIR=m
-CONFIG_NFT_NAT=m
-CONFIG_NFT_TUNNEL=m
-CONFIG_NFT_OBJREF=m
-CONFIG_NFT_QUEUE=m
-CONFIG_NFT_QUOTA=m
-CONFIG_NFT_REJECT=m
-CONFIG_NFT_REJECT_INET=m
-CONFIG_NFT_COMPAT=m
-CONFIG_NFT_HASH=m
-CONFIG_NFT_FIB=m
-CONFIG_NFT_FIB_INET=m
-CONFIG_NFT_XFRM=m
-CONFIG_NFT_SOCKET=m
-CONFIG_NFT_OSF=m
-CONFIG_NFT_TPROXY=m
-CONFIG_NFT_SYNPROXY=m
-CONFIG_NF_DUP_NETDEV=m
-CONFIG_NFT_DUP_NETDEV=m
-CONFIG_NFT_FWD_NETDEV=m
-CONFIG_NFT_FIB_NETDEV=m
-CONFIG_NF_FLOW_TABLE_INET=m
-CONFIG_NF_FLOW_TABLE=m
-CONFIG_NETFILTER_XTABLES=m
-
-#
-# Xtables combined modules
-#
-CONFIG_NETFILTER_XT_MARK=m
-CONFIG_NETFILTER_XT_CONNMARK=m
-CONFIG_NETFILTER_XT_SET=m
-
-#
-# Xtables targets
-#
-CONFIG_NETFILTER_XT_TARGET_AUDIT=m
-CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
-CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
-CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
-CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
-CONFIG_NETFILTER_XT_TARGET_CT=m
-CONFIG_NETFILTER_XT_TARGET_DSCP=m
-CONFIG_NETFILTER_XT_TARGET_HL=m
-CONFIG_NETFILTER_XT_TARGET_HMARK=m
-CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
-CONFIG_NETFILTER_XT_TARGET_LED=m
-CONFIG_NETFILTER_XT_TARGET_LOG=m
-CONFIG_NETFILTER_XT_TARGET_MARK=m
-CONFIG_NETFILTER_XT_NAT=m
-CONFIG_NETFILTER_XT_TARGET_NETMAP=m
-CONFIG_NETFILTER_XT_TARGET_NFLOG=m
-CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
-CONFIG_NETFILTER_XT_TARGET_NOTRACK=m
-CONFIG_NETFILTER_XT_TARGET_RATEEST=m
-CONFIG_NETFILTER_XT_TARGET_REDIRECT=m
-CONFIG_NETFILTER_XT_TARGET_MASQUERADE=m
-CONFIG_NETFILTER_XT_TARGET_TEE=m
-CONFIG_NETFILTER_XT_TARGET_TPROXY=m
-CONFIG_NETFILTER_XT_TARGET_TRACE=m
-CONFIG_NETFILTER_XT_TARGET_SECMARK=m
-CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
-CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
-
-#
-# Xtables matches
-#
-CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
-CONFIG_NETFILTER_XT_MATCH_BPF=m
-CONFIG_NETFILTER_XT_MATCH_CGROUP=m
-CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
-CONFIG_NETFILTER_XT_MATCH_COMMENT=m
-CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
-CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m
-CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
-CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
-CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
-CONFIG_NETFILTER_XT_MATCH_CPU=m
-CONFIG_NETFILTER_XT_MATCH_DCCP=m
-CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m
-CONFIG_NETFILTER_XT_MATCH_DSCP=m
-CONFIG_NETFILTER_XT_MATCH_ECN=m
-CONFIG_NETFILTER_XT_MATCH_ESP=m
-CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
-CONFIG_NETFILTER_XT_MATCH_HELPER=m
-CONFIG_NETFILTER_XT_MATCH_HL=m
-CONFIG_NETFILTER_XT_MATCH_IPCOMP=m
-CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
-CONFIG_NETFILTER_XT_MATCH_IPVS=m
-CONFIG_NETFILTER_XT_MATCH_L2TP=m
-CONFIG_NETFILTER_XT_MATCH_LENGTH=m
-CONFIG_NETFILTER_XT_MATCH_LIMIT=m
-CONFIG_NETFILTER_XT_MATCH_MAC=m
-CONFIG_NETFILTER_XT_MATCH_MARK=m
-CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
-CONFIG_NETFILTER_XT_MATCH_NFACCT=m
-CONFIG_NETFILTER_XT_MATCH_OSF=m
-CONFIG_NETFILTER_XT_MATCH_OWNER=m
-CONFIG_NETFILTER_XT_MATCH_POLICY=m
-CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
-CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
-CONFIG_NETFILTER_XT_MATCH_QUOTA=m
-CONFIG_NETFILTER_XT_MATCH_RATEEST=m
-CONFIG_NETFILTER_XT_MATCH_REALM=m
-CONFIG_NETFILTER_XT_MATCH_RECENT=m
-CONFIG_NETFILTER_XT_MATCH_SCTP=m
-CONFIG_NETFILTER_XT_MATCH_SOCKET=m
-CONFIG_NETFILTER_XT_MATCH_STATE=m
-CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
-CONFIG_NETFILTER_XT_MATCH_STRING=m
-CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
-CONFIG_NETFILTER_XT_MATCH_TIME=m
-CONFIG_NETFILTER_XT_MATCH_U32=m
-# end of Core Netfilter Configuration
-
-CONFIG_IP_SET=m
-CONFIG_IP_SET_MAX=256
-CONFIG_IP_SET_BITMAP_IP=m
-CONFIG_IP_SET_BITMAP_IPMAC=m
-CONFIG_IP_SET_BITMAP_PORT=m
-CONFIG_IP_SET_HASH_IP=m
-CONFIG_IP_SET_HASH_IPMARK=m
-CONFIG_IP_SET_HASH_IPPORT=m
-CONFIG_IP_SET_HASH_IPPORTIP=m
-CONFIG_IP_SET_HASH_IPPORTNET=m
-CONFIG_IP_SET_HASH_IPMAC=m
-CONFIG_IP_SET_HASH_MAC=m
-CONFIG_IP_SET_HASH_NETPORTNET=m
-CONFIG_IP_SET_HASH_NET=m
-CONFIG_IP_SET_HASH_NETNET=m
-CONFIG_IP_SET_HASH_NETPORT=m
-CONFIG_IP_SET_HASH_NETIFACE=m
-CONFIG_IP_SET_LIST_SET=m
-CONFIG_IP_VS=m
-CONFIG_IP_VS_IPV6=y
-# CONFIG_IP_VS_DEBUG is not set
-CONFIG_IP_VS_TAB_BITS=15
-
-#
-# IPVS transport protocol load balancing support
-#
-CONFIG_IP_VS_PROTO_TCP=y
-CONFIG_IP_VS_PROTO_UDP=y
-CONFIG_IP_VS_PROTO_AH_ESP=y
-CONFIG_IP_VS_PROTO_ESP=y
-CONFIG_IP_VS_PROTO_AH=y
-CONFIG_IP_VS_PROTO_SCTP=y
-
-#
-# IPVS scheduler
-#
-CONFIG_IP_VS_RR=m
-CONFIG_IP_VS_WRR=m
-CONFIG_IP_VS_LC=m
-CONFIG_IP_VS_WLC=m
-CONFIG_IP_VS_FO=m
-CONFIG_IP_VS_OVF=m
-CONFIG_IP_VS_LBLC=m
-CONFIG_IP_VS_LBLCR=m
-CONFIG_IP_VS_DH=m
-CONFIG_IP_VS_SH=m
-CONFIG_IP_VS_MH=m
-CONFIG_IP_VS_SED=m
-CONFIG_IP_VS_NQ=m
-
-#
-# IPVS SH scheduler
-#
-CONFIG_IP_VS_SH_TAB_BITS=8
-
-#
-# IPVS MH scheduler
-#
-CONFIG_IP_VS_MH_TAB_INDEX=12
-
-#
-# IPVS application helper
-#
-CONFIG_IP_VS_FTP=m
-CONFIG_IP_VS_NFCT=y
-CONFIG_IP_VS_PE_SIP=m
-
-#
-# IP: Netfilter Configuration
-#
-CONFIG_NF_DEFRAG_IPV4=m
-CONFIG_NF_SOCKET_IPV4=m
-CONFIG_NF_TPROXY_IPV4=m
-CONFIG_NF_TABLES_IPV4=y
-CONFIG_NFT_REJECT_IPV4=m
-CONFIG_NFT_DUP_IPV4=m
-CONFIG_NFT_FIB_IPV4=m
-CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_FLOW_TABLE_IPV4=m
-CONFIG_NF_DUP_IPV4=m
-CONFIG_NF_LOG_ARP=m
-CONFIG_NF_LOG_IPV4=m
-CONFIG_NF_REJECT_IPV4=m
-CONFIG_NF_NAT_SNMP_BASIC=m
-CONFIG_NF_NAT_PPTP=m
-CONFIG_NF_NAT_H323=m
-CONFIG_IP_NF_IPTABLES=m
-CONFIG_IP_NF_MATCH_AH=m
-CONFIG_IP_NF_MATCH_ECN=m
-CONFIG_IP_NF_MATCH_RPFILTER=m
-CONFIG_IP_NF_MATCH_TTL=m
-CONFIG_IP_NF_FILTER=m
-CONFIG_IP_NF_TARGET_REJECT=m
-CONFIG_IP_NF_TARGET_SYNPROXY=m
-CONFIG_IP_NF_NAT=m
-CONFIG_IP_NF_TARGET_MASQUERADE=m
-CONFIG_IP_NF_TARGET_NETMAP=m
-CONFIG_IP_NF_TARGET_REDIRECT=m
-CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
-CONFIG_IP_NF_TARGET_ECN=m
-CONFIG_IP_NF_TARGET_TTL=m
-CONFIG_IP_NF_RAW=m
-CONFIG_IP_NF_SECURITY=m
-CONFIG_IP_NF_ARPTABLES=m
-CONFIG_IP_NF_ARPFILTER=m
-CONFIG_IP_NF_ARP_MANGLE=m
-# end of IP: Netfilter Configuration
-
-#
-# IPv6: Netfilter Configuration
-#
-CONFIG_NF_SOCKET_IPV6=m
-CONFIG_NF_TPROXY_IPV6=m
-CONFIG_NF_TABLES_IPV6=y
-CONFIG_NFT_REJECT_IPV6=m
-CONFIG_NFT_DUP_IPV6=m
-CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
-CONFIG_NF_DUP_IPV6=m
-CONFIG_NF_REJECT_IPV6=m
-CONFIG_NF_LOG_IPV6=m
-CONFIG_IP6_NF_IPTABLES=m
-CONFIG_IP6_NF_MATCH_AH=m
-CONFIG_IP6_NF_MATCH_EUI64=m
-CONFIG_IP6_NF_MATCH_FRAG=m
-CONFIG_IP6_NF_MATCH_OPTS=m
-CONFIG_IP6_NF_MATCH_HL=m
-CONFIG_IP6_NF_MATCH_IPV6HEADER=m
-CONFIG_IP6_NF_MATCH_MH=m
-CONFIG_IP6_NF_MATCH_RPFILTER=m
-CONFIG_IP6_NF_MATCH_RT=m
-CONFIG_IP6_NF_MATCH_SRH=m
-CONFIG_IP6_NF_TARGET_HL=m
-CONFIG_IP6_NF_FILTER=m
-CONFIG_IP6_NF_TARGET_REJECT=m
-CONFIG_IP6_NF_TARGET_SYNPROXY=m
-CONFIG_IP6_NF_MANGLE=m
-CONFIG_IP6_NF_RAW=m
-CONFIG_IP6_NF_SECURITY=m
-CONFIG_IP6_NF_NAT=m
-CONFIG_IP6_NF_TARGET_MASQUERADE=m
-CONFIG_IP6_NF_TARGET_NPT=m
-# end of IPv6: Netfilter Configuration
-
-CONFIG_NF_DEFRAG_IPV6=m
-CONFIG_NF_TABLES_BRIDGE=m
-CONFIG_NFT_BRIDGE_META=m
-CONFIG_NFT_BRIDGE_REJECT=m
-CONFIG_NF_LOG_BRIDGE=m
-CONFIG_NF_CONNTRACK_BRIDGE=m
-CONFIG_BRIDGE_NF_EBTABLES=m
-CONFIG_BRIDGE_EBT_BROUTE=m
-CONFIG_BRIDGE_EBT_T_FILTER=m
-CONFIG_BRIDGE_EBT_T_NAT=m
-CONFIG_BRIDGE_EBT_802_3=m
-CONFIG_BRIDGE_EBT_AMONG=m
-CONFIG_BRIDGE_EBT_ARP=m
-CONFIG_BRIDGE_EBT_IP=m
-CONFIG_BRIDGE_EBT_IP6=m
-CONFIG_BRIDGE_EBT_LIMIT=m
-CONFIG_BRIDGE_EBT_MARK=m
-CONFIG_BRIDGE_EBT_PKTTYPE=m
-CONFIG_BRIDGE_EBT_STP=m
-CONFIG_BRIDGE_EBT_VLAN=m
-CONFIG_BRIDGE_EBT_ARPREPLY=m
-CONFIG_BRIDGE_EBT_DNAT=m
-CONFIG_BRIDGE_EBT_MARK_T=m
-CONFIG_BRIDGE_EBT_REDIRECT=m
-CONFIG_BRIDGE_EBT_SNAT=m
-CONFIG_BRIDGE_EBT_LOG=m
-CONFIG_BRIDGE_EBT_NFLOG=m
-# CONFIG_BPFILTER is not set
-CONFIG_IP_DCCP=m
-CONFIG_INET_DCCP_DIAG=m
-
-#
-# DCCP CCIDs Configuration
-#
-# CONFIG_IP_DCCP_CCID2_DEBUG is not set
-CONFIG_IP_DCCP_CCID3=y
-# CONFIG_IP_DCCP_CCID3_DEBUG is not set
-CONFIG_IP_DCCP_TFRC_LIB=y
-# end of DCCP CCIDs Configuration
-
-#
-# DCCP Kernel Hacking
-#
-# CONFIG_IP_DCCP_DEBUG is not set
-# end of DCCP Kernel Hacking
-
-CONFIG_IP_SCTP=m
-# CONFIG_SCTP_DBG_OBJCNT is not set
-# CONFIG_SCTP_DEFAULT_COOKIE_HMAC_MD5 is not set
-CONFIG_SCTP_DEFAULT_COOKIE_HMAC_SHA1=y
-# CONFIG_SCTP_DEFAULT_COOKIE_HMAC_NONE is not set
-CONFIG_SCTP_COOKIE_HMAC_MD5=y
-CONFIG_SCTP_COOKIE_HMAC_SHA1=y
-CONFIG_INET_SCTP_DIAG=m
-CONFIG_RDS=m
-CONFIG_RDS_RDMA=m
-CONFIG_RDS_TCP=m
-# CONFIG_RDS_DEBUG is not set
-CONFIG_TIPC=m
-CONFIG_TIPC_MEDIA_IB=y
-CONFIG_TIPC_MEDIA_UDP=y
-CONFIG_TIPC_CRYPTO=y
-CONFIG_TIPC_DIAG=m
-CONFIG_ATM=m
-CONFIG_ATM_CLIP=m
-# CONFIG_ATM_CLIP_NO_ICMP is not set
-CONFIG_ATM_LANE=m
-CONFIG_ATM_MPOA=m
-CONFIG_ATM_BR2684=m
-# CONFIG_ATM_BR2684_IPFILTER is not set
-CONFIG_L2TP=m
-# CONFIG_L2TP_DEBUGFS is not set
-CONFIG_L2TP_V3=y
-CONFIG_L2TP_IP=m
-CONFIG_L2TP_ETH=m
-CONFIG_STP=m
-CONFIG_GARP=m
-CONFIG_MRP=m
-CONFIG_BRIDGE=m
-CONFIG_BRIDGE_IGMP_SNOOPING=y
-CONFIG_BRIDGE_VLAN_FILTERING=y
-CONFIG_HAVE_NET_DSA=y
-CONFIG_NET_DSA=m
-CONFIG_NET_DSA_TAG_8021Q=m
-CONFIG_NET_DSA_TAG_AR9331=m
-CONFIG_NET_DSA_TAG_BRCM_COMMON=m
-CONFIG_NET_DSA_TAG_BRCM=m
-CONFIG_NET_DSA_TAG_BRCM_PREPEND=m
-CONFIG_NET_DSA_TAG_GSWIP=m
-CONFIG_NET_DSA_TAG_DSA=m
-CONFIG_NET_DSA_TAG_EDSA=m
-CONFIG_NET_DSA_TAG_MTK=m
-CONFIG_NET_DSA_TAG_KSZ=m
-CONFIG_NET_DSA_TAG_OCELOT=m
-CONFIG_NET_DSA_TAG_QCA=m
-CONFIG_NET_DSA_TAG_LAN9303=m
-CONFIG_NET_DSA_TAG_SJA1105=m
-CONFIG_NET_DSA_TAG_TRAILER=m
-CONFIG_VLAN_8021Q=m
-CONFIG_VLAN_8021Q_GVRP=y
-CONFIG_VLAN_8021Q_MVRP=y
-# CONFIG_DECNET is not set
-CONFIG_LLC=m
-CONFIG_LLC2=m
-CONFIG_ATALK=m
-CONFIG_DEV_APPLETALK=m
-CONFIG_IPDDP=m
-CONFIG_IPDDP_ENCAP=y
-# CONFIG_X25 is not set
-# CONFIG_LAPB is not set
-CONFIG_PHONET=m
-CONFIG_6LOWPAN=m
-# CONFIG_6LOWPAN_DEBUGFS is not set
-CONFIG_6LOWPAN_NHC=m
-CONFIG_6LOWPAN_NHC_DEST=m
-CONFIG_6LOWPAN_NHC_FRAGMENT=m
-CONFIG_6LOWPAN_NHC_HOP=m
-CONFIG_6LOWPAN_NHC_IPV6=m
-CONFIG_6LOWPAN_NHC_MOBILITY=m
-CONFIG_6LOWPAN_NHC_ROUTING=m
-CONFIG_6LOWPAN_NHC_UDP=m
-CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m
-CONFIG_6LOWPAN_GHC_UDP=m
-CONFIG_6LOWPAN_GHC_ICMPV6=m
-CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m
-CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
-CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
-CONFIG_IEEE802154=m
-CONFIG_IEEE802154_NL802154_EXPERIMENTAL=y
-CONFIG_IEEE802154_SOCKET=m
-CONFIG_IEEE802154_6LOWPAN=m
-CONFIG_MAC802154=m
-CONFIG_NET_SCHED=y
-
-#
-# Queueing/Scheduling
-#
-CONFIG_NET_SCH_CBQ=m
-CONFIG_NET_SCH_HTB=m
-CONFIG_NET_SCH_HFSC=m
-CONFIG_NET_SCH_ATM=m
-CONFIG_NET_SCH_PRIO=m
-CONFIG_NET_SCH_MULTIQ=m
-CONFIG_NET_SCH_RED=m
-CONFIG_NET_SCH_SFB=m
-CONFIG_NET_SCH_SFQ=m
-CONFIG_NET_SCH_TEQL=m
-CONFIG_NET_SCH_TBF=m
-CONFIG_NET_SCH_CBS=m
-CONFIG_NET_SCH_ETF=m
-CONFIG_NET_SCH_TAPRIO=m
-CONFIG_NET_SCH_GRED=m
-CONFIG_NET_SCH_DSMARK=m
-CONFIG_NET_SCH_NETEM=m
-CONFIG_NET_SCH_DRR=m
-CONFIG_NET_SCH_MQPRIO=m
-CONFIG_NET_SCH_SKBPRIO=m
-CONFIG_NET_SCH_CHOKE=m
-CONFIG_NET_SCH_QFQ=m
-CONFIG_NET_SCH_CODEL=m
-CONFIG_NET_SCH_FQ_CODEL=y
-CONFIG_NET_SCH_CAKE=m
-CONFIG_NET_SCH_FQ=m
-CONFIG_NET_SCH_HHF=m
-CONFIG_NET_SCH_PIE=m
-CONFIG_NET_SCH_FQ_PIE=m
-CONFIG_NET_SCH_INGRESS=m
-CONFIG_NET_SCH_PLUG=m
-CONFIG_NET_SCH_ETS=m
-CONFIG_NET_SCH_DEFAULT=y
-# CONFIG_DEFAULT_FQ is not set
-# CONFIG_DEFAULT_CODEL is not set
-CONFIG_DEFAULT_FQ_CODEL=y
-# CONFIG_DEFAULT_SFQ is not set
-# CONFIG_DEFAULT_PFIFO_FAST is not set
-CONFIG_DEFAULT_NET_SCH="fq_codel"
-
-#
-# Classification
-#
-CONFIG_NET_CLS=y
-CONFIG_NET_CLS_BASIC=m
-CONFIG_NET_CLS_TCINDEX=m
-CONFIG_NET_CLS_ROUTE4=m
-CONFIG_NET_CLS_FW=m
-CONFIG_NET_CLS_U32=m
-CONFIG_CLS_U32_PERF=y
-CONFIG_CLS_U32_MARK=y
-CONFIG_NET_CLS_RSVP=m
-CONFIG_NET_CLS_RSVP6=m
-CONFIG_NET_CLS_FLOW=m
-CONFIG_NET_CLS_CGROUP=m
-CONFIG_NET_CLS_BPF=m
-CONFIG_NET_CLS_FLOWER=m
-CONFIG_NET_CLS_MATCHALL=m
-CONFIG_NET_EMATCH=y
-CONFIG_NET_EMATCH_STACK=32
-CONFIG_NET_EMATCH_CMP=m
-CONFIG_NET_EMATCH_NBYTE=m
-CONFIG_NET_EMATCH_U32=m
-CONFIG_NET_EMATCH_META=m
-CONFIG_NET_EMATCH_TEXT=m
-CONFIG_NET_EMATCH_CANID=m
-CONFIG_NET_EMATCH_IPSET=m
-CONFIG_NET_EMATCH_IPT=m
-CONFIG_NET_CLS_ACT=y
-CONFIG_NET_ACT_POLICE=m
-CONFIG_NET_ACT_GACT=m
-CONFIG_GACT_PROB=y
-CONFIG_NET_ACT_MIRRED=m
-CONFIG_NET_ACT_SAMPLE=m
-CONFIG_NET_ACT_IPT=m
-CONFIG_NET_ACT_NAT=m
-CONFIG_NET_ACT_PEDIT=m
-CONFIG_NET_ACT_SIMP=m
-CONFIG_NET_ACT_SKBEDIT=m
-CONFIG_NET_ACT_CSUM=m
-CONFIG_NET_ACT_MPLS=m
-CONFIG_NET_ACT_VLAN=m
-CONFIG_NET_ACT_BPF=m
-CONFIG_NET_ACT_CONNMARK=m
-CONFIG_NET_ACT_CTINFO=m
-CONFIG_NET_ACT_SKBMOD=m
-CONFIG_NET_ACT_IFE=m
-CONFIG_NET_ACT_TUNNEL_KEY=m
-CONFIG_NET_ACT_CT=m
-CONFIG_NET_IFE_SKBMARK=m
-CONFIG_NET_IFE_SKBPRIO=m
-CONFIG_NET_IFE_SKBTCINDEX=m
-CONFIG_NET_TC_SKB_EXT=y
-CONFIG_NET_SCH_FIFO=y
-CONFIG_DCB=y
-CONFIG_DNS_RESOLVER=m
-CONFIG_BATMAN_ADV=m
-CONFIG_BATMAN_ADV_BATMAN_V=y
-CONFIG_BATMAN_ADV_BLA=y
-CONFIG_BATMAN_ADV_DAT=y
-CONFIG_BATMAN_ADV_NC=y
-CONFIG_BATMAN_ADV_MCAST=y
-CONFIG_BATMAN_ADV_DEBUGFS=y
-# CONFIG_BATMAN_ADV_DEBUG is not set
-CONFIG_BATMAN_ADV_SYSFS=y
-# CONFIG_BATMAN_ADV_TRACING is not set
-CONFIG_OPENVSWITCH=m
-CONFIG_OPENVSWITCH_GRE=m
-CONFIG_OPENVSWITCH_VXLAN=m
-CONFIG_OPENVSWITCH_GENEVE=m
-CONFIG_VSOCKETS=m
-CONFIG_VSOCKETS_DIAG=m
-CONFIG_VSOCKETS_LOOPBACK=m
-CONFIG_VMWARE_VMCI_VSOCKETS=m
-CONFIG_VIRTIO_VSOCKETS=m
-CONFIG_VIRTIO_VSOCKETS_COMMON=m
-CONFIG_HYPERV_VSOCKETS=m
-CONFIG_NETLINK_DIAG=m
-CONFIG_MPLS=y
-CONFIG_NET_MPLS_GSO=m
-CONFIG_MPLS_ROUTING=m
-CONFIG_MPLS_IPTUNNEL=m
-CONFIG_NET_NSH=m
-CONFIG_HSR=m
-CONFIG_NET_SWITCHDEV=y
-CONFIG_NET_L3_MASTER_DEV=y
-CONFIG_NET_NCSI=y
-CONFIG_NCSI_OEM_CMD_GET_MAC=y
-CONFIG_RPS=y
-CONFIG_RFS_ACCEL=y
-CONFIG_XPS=y
-CONFIG_CGROUP_NET_PRIO=y
-CONFIG_CGROUP_NET_CLASSID=y
-CONFIG_NET_RX_BUSY_POLL=y
-CONFIG_BQL=y
-CONFIG_BPF_JIT=y
-CONFIG_BPF_STREAM_PARSER=y
-CONFIG_NET_FLOW_LIMIT=y
-
-#
-# Network testing
-#
-CONFIG_NET_PKTGEN=m
-CONFIG_NET_DROP_MONITOR=y
-# end of Network testing
-# end of Networking options
-
-CONFIG_HAMRADIO=y
-
-#
-# Packet Radio protocols
-#
-CONFIG_AX25=m
-CONFIG_AX25_DAMA_SLAVE=y
-CONFIG_NETROM=m
-CONFIG_ROSE=m
-
-#
-# AX.25 network device drivers
-#
-CONFIG_MKISS=m
-CONFIG_6PACK=m
-CONFIG_BPQETHER=m
-CONFIG_BAYCOM_SER_FDX=m
-CONFIG_BAYCOM_SER_HDX=m
-CONFIG_BAYCOM_PAR=m
-CONFIG_YAM=m
-# end of AX.25 network device drivers
-
-CONFIG_CAN=m
-CONFIG_CAN_RAW=m
-CONFIG_CAN_BCM=m
-CONFIG_CAN_GW=m
-CONFIG_CAN_J1939=m
-
-#
-# CAN Device Drivers
-#
-CONFIG_CAN_VCAN=m
-CONFIG_CAN_VXCAN=m
-CONFIG_CAN_SLCAN=m
-CONFIG_CAN_DEV=m
-CONFIG_CAN_CALC_BITTIMING=y
-CONFIG_CAN_FLEXCAN=m
-CONFIG_CAN_GRCAN=m
-CONFIG_CAN_JANZ_ICAN3=m
-CONFIG_CAN_KVASER_PCIEFD=m
-CONFIG_CAN_C_CAN=m
-CONFIG_CAN_C_CAN_PLATFORM=m
-CONFIG_CAN_C_CAN_PCI=m
-CONFIG_CAN_CC770=m
-# CONFIG_CAN_CC770_ISA is not set
-CONFIG_CAN_CC770_PLATFORM=m
-CONFIG_CAN_IFI_CANFD=m
-CONFIG_CAN_M_CAN=m
-CONFIG_CAN_M_CAN_PLATFORM=m
-CONFIG_CAN_M_CAN_TCAN4X5X=m
-CONFIG_CAN_PEAK_PCIEFD=m
-CONFIG_CAN_SJA1000=m
-CONFIG_CAN_EMS_PCI=m
-# CONFIG_CAN_EMS_PCMCIA is not set
-CONFIG_CAN_F81601=m
-CONFIG_CAN_KVASER_PCI=m
-CONFIG_CAN_PEAK_PCI=m
-CONFIG_CAN_PEAK_PCIEC=y
-CONFIG_CAN_PEAK_PCMCIA=m
-CONFIG_CAN_PLX_PCI=m
-# CONFIG_CAN_SJA1000_ISA is not set
-CONFIG_CAN_SJA1000_PLATFORM=m
-CONFIG_CAN_SOFTING=m
-CONFIG_CAN_SOFTING_CS=m
-
-#
-# CAN SPI interfaces
-#
-CONFIG_CAN_HI311X=m
-CONFIG_CAN_MCP251X=m
-# end of CAN SPI interfaces
-
-#
-# CAN USB interfaces
-#
-CONFIG_CAN_8DEV_USB=m
-CONFIG_CAN_EMS_USB=m
-CONFIG_CAN_ESD_USB2=m
-CONFIG_CAN_GS_USB=m
-CONFIG_CAN_KVASER_USB=m
-CONFIG_CAN_MCBA_USB=m
-CONFIG_CAN_PEAK_USB=m
-CONFIG_CAN_UCAN=m
-# end of CAN USB interfaces
-
-# CONFIG_CAN_DEBUG_DEVICES is not set
-# end of CAN Device Drivers
-
-CONFIG_BT=m
-CONFIG_BT_BREDR=y
-CONFIG_BT_RFCOMM=m
-CONFIG_BT_RFCOMM_TTY=y
-CONFIG_BT_BNEP=m
-CONFIG_BT_BNEP_MC_FILTER=y
-CONFIG_BT_BNEP_PROTO_FILTER=y
-CONFIG_BT_CMTP=m
-CONFIG_BT_HIDP=m
-CONFIG_BT_HS=y
-CONFIG_BT_LE=y
-CONFIG_BT_6LOWPAN=m
-CONFIG_BT_LEDS=y
-# CONFIG_BT_SELFTEST is not set
-CONFIG_BT_DEBUGFS=y
-
-#
-# Bluetooth device drivers
-#
-CONFIG_BT_INTEL=m
-CONFIG_BT_BCM=m
-CONFIG_BT_RTL=m
-CONFIG_BT_QCA=m
-CONFIG_BT_HCIBTUSB=m
-CONFIG_BT_HCIBTUSB_AUTOSUSPEND=y
-CONFIG_BT_HCIBTUSB_BCM=y
-CONFIG_BT_HCIBTUSB_MTK=y
-CONFIG_BT_HCIBTUSB_RTL=y
-CONFIG_BT_HCIBTSDIO=m
-CONFIG_BT_HCIUART=m
-CONFIG_BT_HCIUART_SERDEV=y
-CONFIG_BT_HCIUART_H4=y
-CONFIG_BT_HCIUART_NOKIA=m
-CONFIG_BT_HCIUART_BCSP=y
-CONFIG_BT_HCIUART_ATH3K=y
-CONFIG_BT_HCIUART_LL=y
-CONFIG_BT_HCIUART_3WIRE=y
-CONFIG_BT_HCIUART_INTEL=y
-CONFIG_BT_HCIUART_BCM=y
-CONFIG_BT_HCIUART_RTL=y
-CONFIG_BT_HCIUART_QCA=y
-CONFIG_BT_HCIUART_AG6XX=y
-CONFIG_BT_HCIUART_MRVL=y
-CONFIG_BT_HCIBCM203X=m
-CONFIG_BT_HCIBPA10X=m
-CONFIG_BT_HCIBFUSB=m
-CONFIG_BT_HCIDTL1=m
-CONFIG_BT_HCIBT3C=m
-CONFIG_BT_HCIBLUECARD=m
-CONFIG_BT_HCIVHCI=m
-CONFIG_BT_MRVL=m
-CONFIG_BT_MRVL_SDIO=m
-CONFIG_BT_ATH3K=m
-CONFIG_BT_MTKSDIO=m
-CONFIG_BT_MTKUART=m
-CONFIG_BT_HCIRSI=m
-# end of Bluetooth device drivers
-
-CONFIG_AF_RXRPC=m
-CONFIG_AF_RXRPC_IPV6=y
-# CONFIG_AF_RXRPC_INJECT_LOSS is not set
-CONFIG_AF_RXRPC_DEBUG=y
-CONFIG_RXKAD=y
-CONFIG_AF_KCM=m
-CONFIG_STREAM_PARSER=y
-CONFIG_FIB_RULES=y
-CONFIG_WIRELESS=y
-CONFIG_WIRELESS_EXT=y
-CONFIG_WEXT_CORE=y
-CONFIG_WEXT_PROC=y
-CONFIG_WEXT_SPY=y
-CONFIG_WEXT_PRIV=y
-CONFIG_CFG80211=m
-# CONFIG_NL80211_TESTMODE is not set
-# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set
-# CONFIG_CFG80211_CERTIFICATION_ONUS is not set
-CONFIG_CFG80211_REQUIRE_SIGNED_REGDB=y
-CONFIG_CFG80211_USE_KERNEL_REGDB_KEYS=y
-CONFIG_CFG80211_DEFAULT_PS=y
-CONFIG_CFG80211_DEBUGFS=y
-CONFIG_CFG80211_CRDA_SUPPORT=y
-CONFIG_CFG80211_WEXT=y
-CONFIG_CFG80211_WEXT_EXPORT=y
-CONFIG_LIB80211=m
-CONFIG_LIB80211_CRYPT_WEP=m
-CONFIG_LIB80211_CRYPT_CCMP=m
-CONFIG_LIB80211_CRYPT_TKIP=m
-# CONFIG_LIB80211_DEBUG is not set
-CONFIG_MAC80211=m
-CONFIG_MAC80211_HAS_RC=y
-CONFIG_MAC80211_RC_MINSTREL=y
-CONFIG_MAC80211_RC_DEFAULT_MINSTREL=y
-CONFIG_MAC80211_RC_DEFAULT="minstrel_ht"
-CONFIG_MAC80211_MESH=y
-CONFIG_MAC80211_LEDS=y
-CONFIG_MAC80211_DEBUGFS=y
-# CONFIG_MAC80211_MESSAGE_TRACING is not set
-# CONFIG_MAC80211_DEBUG_MENU is not set
-CONFIG_MAC80211_STA_HASH_MAX_SIZE=0
-CONFIG_WIMAX=m
-CONFIG_WIMAX_DEBUG_LEVEL=8
-CONFIG_RFKILL=m
-CONFIG_RFKILL_LEDS=y
-CONFIG_RFKILL_INPUT=y
-CONFIG_RFKILL_GPIO=m
-CONFIG_NET_9P=m
-CONFIG_NET_9P_VIRTIO=m
-CONFIG_NET_9P_XEN=m
-CONFIG_NET_9P_RDMA=m
-# CONFIG_NET_9P_DEBUG is not set
-CONFIG_CAIF=m
-# CONFIG_CAIF_DEBUG is not set
-CONFIG_CAIF_NETDEV=m
-CONFIG_CAIF_USB=m
-CONFIG_CEPH_LIB=m
-CONFIG_CEPH_LIB_PRETTYDEBUG=y
-CONFIG_CEPH_LIB_USE_DNS_RESOLVER=y
-CONFIG_NFC=m
-CONFIG_NFC_DIGITAL=m
-CONFIG_NFC_NCI=m
-CONFIG_NFC_NCI_SPI=m
-CONFIG_NFC_NCI_UART=m
-CONFIG_NFC_HCI=m
-CONFIG_NFC_SHDLC=y
-
-#
-# Near Field Communication (NFC) devices
-#
-CONFIG_NFC_TRF7970A=m
-CONFIG_NFC_MEI_PHY=m
-CONFIG_NFC_SIM=m
-CONFIG_NFC_PORT100=m
-CONFIG_NFC_FDP=m
-CONFIG_NFC_FDP_I2C=m
-CONFIG_NFC_PN544=m
-CONFIG_NFC_PN544_I2C=m
-CONFIG_NFC_PN544_MEI=m
-CONFIG_NFC_PN533=m
-CONFIG_NFC_PN533_USB=m
-CONFIG_NFC_PN533_I2C=m
-CONFIG_NFC_PN532_UART=m
-CONFIG_NFC_MICROREAD=m
-CONFIG_NFC_MICROREAD_I2C=m
-CONFIG_NFC_MICROREAD_MEI=m
-CONFIG_NFC_MRVL=m
-CONFIG_NFC_MRVL_USB=m
-CONFIG_NFC_MRVL_UART=m
-CONFIG_NFC_MRVL_I2C=m
-CONFIG_NFC_MRVL_SPI=m
-CONFIG_NFC_ST21NFCA=m
-CONFIG_NFC_ST21NFCA_I2C=m
-CONFIG_NFC_ST_NCI=m
-CONFIG_NFC_ST_NCI_I2C=m
-CONFIG_NFC_ST_NCI_SPI=m
-CONFIG_NFC_NXP_NCI=m
-CONFIG_NFC_NXP_NCI_I2C=m
-CONFIG_NFC_S3FWRN5=m
-CONFIG_NFC_S3FWRN5_I2C=m
-CONFIG_NFC_ST95HF=m
-# end of Near Field Communication (NFC) devices
-
-CONFIG_PSAMPLE=m
-CONFIG_NET_IFE=m
-CONFIG_LWTUNNEL=y
-CONFIG_LWTUNNEL_BPF=y
-CONFIG_DST_CACHE=y
-CONFIG_GRO_CELLS=y
-CONFIG_SOCK_VALIDATE_XMIT=y
-CONFIG_NET_SOCK_MSG=y
-CONFIG_NET_DEVLINK=y
-CONFIG_PAGE_POOL=y
-CONFIG_FAILOVER=m
-CONFIG_ETHTOOL_NETLINK=y
-CONFIG_HAVE_EBPF_JIT=y
-
-#
-# Device Drivers
-#
-CONFIG_HAVE_EISA=y
-# CONFIG_EISA is not set
-CONFIG_HAVE_PCI=y
-CONFIG_PCI=y
-CONFIG_PCI_DOMAINS=y
-CONFIG_PCIEPORTBUS=y
-CONFIG_HOTPLUG_PCI_PCIE=y
-CONFIG_PCIEAER=y
-# CONFIG_PCIEAER_INJECT is not set
-CONFIG_PCIE_ECRC=y
-CONFIG_PCIEASPM=y
-CONFIG_PCIEASPM_DEFAULT=y
-# CONFIG_PCIEASPM_POWERSAVE is not set
-# CONFIG_PCIEASPM_POWER_SUPERSAVE is not set
-# CONFIG_PCIEASPM_PERFORMANCE is not set
-CONFIG_PCIE_PME=y
-CONFIG_PCIE_DPC=y
-CONFIG_PCIE_PTM=y
-# CONFIG_PCIE_BW is not set
-CONFIG_PCIE_EDR=y
-CONFIG_PCI_MSI=y
-CONFIG_PCI_MSI_IRQ_DOMAIN=y
-CONFIG_PCI_QUIRKS=y
-# CONFIG_PCI_DEBUG is not set
-CONFIG_PCI_REALLOC_ENABLE_AUTO=y
-CONFIG_PCI_STUB=y
-CONFIG_PCI_PF_STUB=m
-CONFIG_XEN_PCIDEV_FRONTEND=m
-CONFIG_PCI_ATS=y
-CONFIG_PCI_ECAM=y
-CONFIG_PCI_LOCKLESS_CONFIG=y
-CONFIG_PCI_IOV=y
-CONFIG_PCI_PRI=y
-CONFIG_PCI_PASID=y
-CONFIG_PCI_P2PDMA=y
-CONFIG_PCI_LABEL=y
-CONFIG_PCI_HYPERV=m
-CONFIG_HOTPLUG_PCI=y
-CONFIG_HOTPLUG_PCI_ACPI=y
-CONFIG_HOTPLUG_PCI_ACPI_IBM=m
-CONFIG_HOTPLUG_PCI_CPCI=y
-CONFIG_HOTPLUG_PCI_CPCI_ZT5550=m
-CONFIG_HOTPLUG_PCI_CPCI_GENERIC=m
-CONFIG_HOTPLUG_PCI_SHPC=y
-
-#
-# PCI controller drivers
-#
-CONFIG_PCI_FTPCI100=y
-CONFIG_PCI_HOST_COMMON=y
-CONFIG_PCI_HOST_GENERIC=y
-CONFIG_PCIE_XILINX=y
-CONFIG_VMD=m
-CONFIG_PCI_HYPERV_INTERFACE=m
-
-#
-# DesignWare PCI Core Support
-#
-CONFIG_PCIE_DW=y
-CONFIG_PCIE_DW_HOST=y
-CONFIG_PCIE_DW_EP=y
-CONFIG_PCIE_DW_PLAT=y
-CONFIG_PCIE_DW_PLAT_HOST=y
-CONFIG_PCIE_DW_PLAT_EP=y
-CONFIG_PCIE_INTEL_GW=y
-CONFIG_PCI_MESON=y
-# end of DesignWare PCI Core Support
-
-#
-# Mobiveil PCIe Core Support
-#
-# end of Mobiveil PCIe Core Support
-
-#
-# Cadence PCIe controllers support
-#
-CONFIG_PCIE_CADENCE=y
-CONFIG_PCIE_CADENCE_HOST=y
-CONFIG_PCIE_CADENCE_EP=y
-CONFIG_PCIE_CADENCE_PLAT=y
-CONFIG_PCIE_CADENCE_PLAT_HOST=y
-CONFIG_PCIE_CADENCE_PLAT_EP=y
-# end of Cadence PCIe controllers support
-# end of PCI controller drivers
-
-#
-# PCI Endpoint
-#
-CONFIG_PCI_ENDPOINT=y
-CONFIG_PCI_ENDPOINT_CONFIGFS=y
-# CONFIG_PCI_EPF_TEST is not set
-# end of PCI Endpoint
-
-#
-# PCI switch controller drivers
-#
-CONFIG_PCI_SW_SWITCHTEC=m
-# end of PCI switch controller drivers
-
-CONFIG_PCCARD=m
-CONFIG_PCMCIA=m
-CONFIG_PCMCIA_LOAD_CIS=y
-CONFIG_CARDBUS=y
-
-#
-# PC-card bridges
-#
-CONFIG_YENTA=m
-CONFIG_YENTA_O2=y
-CONFIG_YENTA_RICOH=y
-CONFIG_YENTA_TI=y
-CONFIG_YENTA_ENE_TUNE=y
-CONFIG_YENTA_TOSHIBA=y
-CONFIG_PD6729=m
-CONFIG_I82092=m
-CONFIG_PCCARD_NONSTATIC=y
-CONFIG_RAPIDIO=m
-CONFIG_RAPIDIO_TSI721=m
-CONFIG_RAPIDIO_DISC_TIMEOUT=30
-CONFIG_RAPIDIO_ENABLE_RX_TX_PORTS=y
-CONFIG_RAPIDIO_DMA_ENGINE=y
-# CONFIG_RAPIDIO_DEBUG is not set
-CONFIG_RAPIDIO_ENUM_BASIC=m
-CONFIG_RAPIDIO_CHMAN=m
-CONFIG_RAPIDIO_MPORT_CDEV=m
-
-#
-# RapidIO Switch drivers
-#
-CONFIG_RAPIDIO_TSI57X=m
-CONFIG_RAPIDIO_CPS_XX=m
-CONFIG_RAPIDIO_TSI568=m
-CONFIG_RAPIDIO_CPS_GEN2=m
-CONFIG_RAPIDIO_RXS_GEN3=m
-# end of RapidIO Switch drivers
-
-#
-# Generic Driver Options
-#
-# CONFIG_UEVENT_HELPER is not set
-CONFIG_DEVTMPFS=y
-CONFIG_DEVTMPFS_MOUNT=y
-CONFIG_STANDALONE=y
-CONFIG_PREVENT_FIRMWARE_BUILD=y
-
-#
-# Firmware loader
-#
-CONFIG_FW_LOADER=y
-CONFIG_FW_LOADER_PAGED_BUF=y
-CONFIG_EXTRA_FIRMWARE=""
-# CONFIG_FW_LOADER_USER_HELPER is not set
-CONFIG_FW_LOADER_COMPRESS=y
-CONFIG_FW_CACHE=y
-# end of Firmware loader
-
-CONFIG_WANT_DEV_COREDUMP=y
-CONFIG_ALLOW_DEV_COREDUMP=y
-CONFIG_DEV_COREDUMP=y
-# CONFIG_DEBUG_DRIVER is not set
-# CONFIG_DEBUG_DEVRES is not set
-# CONFIG_DEBUG_TEST_DRIVER_REMOVE is not set
-CONFIG_HMEM_REPORTING=y
-# CONFIG_TEST_ASYNC_DRIVER_PROBE is not set
-CONFIG_SYS_HYPERVISOR=y
-CONFIG_GENERIC_CPU_AUTOPROBE=y
-CONFIG_GENERIC_CPU_VULNERABILITIES=y
-CONFIG_REGMAP=y
-CONFIG_REGMAP_I2C=y
-CONFIG_REGMAP_SLIMBUS=m
-CONFIG_REGMAP_SPI=y
-CONFIG_REGMAP_SPMI=m
-CONFIG_REGMAP_W1=m
-CONFIG_REGMAP_MMIO=y
-CONFIG_REGMAP_IRQ=y
-CONFIG_REGMAP_SOUNDWIRE=m
-CONFIG_REGMAP_SCCB=m
-CONFIG_REGMAP_I3C=m
-CONFIG_DMA_SHARED_BUFFER=y
-# CONFIG_DMA_FENCE_TRACE is not set
-# end of Generic Driver Options
-
-#
-# Bus devices
-#
-CONFIG_MOXTET=m
-CONFIG_SIMPLE_PM_BUS=y
-CONFIG_MHI_BUS=m
-# end of Bus devices
-
-CONFIG_CONNECTOR=y
-CONFIG_PROC_EVENTS=y
-CONFIG_GNSS=m
-CONFIG_GNSS_SERIAL=m
-CONFIG_GNSS_MTK_SERIAL=m
-CONFIG_GNSS_SIRF_SERIAL=m
-CONFIG_GNSS_UBX_SERIAL=m
-CONFIG_MTD=m
-CONFIG_MTD_TESTS=m
-
-#
-# Partition parsers
-#
-CONFIG_MTD_AR7_PARTS=m
-CONFIG_MTD_CMDLINE_PARTS=m
-CONFIG_MTD_OF_PARTS=m
-CONFIG_MTD_REDBOOT_PARTS=m
-CONFIG_MTD_REDBOOT_DIRECTORY_BLOCK=-1
-# CONFIG_MTD_REDBOOT_PARTS_UNALLOCATED is not set
-# CONFIG_MTD_REDBOOT_PARTS_READONLY is not set
-# end of Partition parsers
-
-#
-# User Modules And Translation Layers
-#
-CONFIG_MTD_BLKDEVS=m
-CONFIG_MTD_BLOCK=m
-CONFIG_MTD_BLOCK_RO=m
-CONFIG_FTL=m
-CONFIG_NFTL=m
-CONFIG_NFTL_RW=y
-CONFIG_INFTL=m
-CONFIG_RFD_FTL=m
-CONFIG_SSFDC=m
-CONFIG_SM_FTL=m
-CONFIG_MTD_OOPS=m
-CONFIG_MTD_SWAP=m
-CONFIG_MTD_PARTITIONED_MASTER=y
-
-#
-# RAM/ROM/Flash chip drivers
-#
-CONFIG_MTD_CFI=m
-CONFIG_MTD_JEDECPROBE=m
-CONFIG_MTD_GEN_PROBE=m
-# CONFIG_MTD_CFI_ADV_OPTIONS is not set
-CONFIG_MTD_MAP_BANK_WIDTH_1=y
-CONFIG_MTD_MAP_BANK_WIDTH_2=y
-CONFIG_MTD_MAP_BANK_WIDTH_4=y
-CONFIG_MTD_CFI_I1=y
-CONFIG_MTD_CFI_I2=y
-CONFIG_MTD_CFI_INTELEXT=m
-CONFIG_MTD_CFI_AMDSTD=m
-CONFIG_MTD_CFI_STAA=m
-CONFIG_MTD_CFI_UTIL=m
-CONFIG_MTD_RAM=m
-CONFIG_MTD_ROM=m
-CONFIG_MTD_ABSENT=m
-# end of RAM/ROM/Flash chip drivers
-
-#
-# Mapping drivers for chip access
-#
-CONFIG_MTD_COMPLEX_MAPPINGS=y
-CONFIG_MTD_PHYSMAP=m
-# CONFIG_MTD_PHYSMAP_COMPAT is not set
-CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_MTD_PHYSMAP_VERSATILE=y
-CONFIG_MTD_PHYSMAP_GEMINI=y
-CONFIG_MTD_PHYSMAP_GPIO_ADDR=y
-CONFIG_MTD_SBC_GXX=m
-CONFIG_MTD_AMD76XROM=m
-CONFIG_MTD_ICHXROM=m
-CONFIG_MTD_ESB2ROM=m
-CONFIG_MTD_CK804XROM=m
-CONFIG_MTD_SCB2_FLASH=m
-CONFIG_MTD_NETtel=m
-CONFIG_MTD_L440GX=m
-CONFIG_MTD_PCI=m
-CONFIG_MTD_PCMCIA=m
-# CONFIG_MTD_PCMCIA_ANONYMOUS is not set
-CONFIG_MTD_INTEL_VR_NOR=m
-CONFIG_MTD_PLATRAM=m
-# end of Mapping drivers for chip access
-
-#
-# Self-contained MTD device drivers
-#
-CONFIG_MTD_PMC551=m
-# CONFIG_MTD_PMC551_BUGFIX is not set
-# CONFIG_MTD_PMC551_DEBUG is not set
-CONFIG_MTD_DATAFLASH=m
-# CONFIG_MTD_DATAFLASH_WRITE_VERIFY is not set
-CONFIG_MTD_DATAFLASH_OTP=y
-CONFIG_MTD_MCHP23K256=m
-CONFIG_MTD_SST25L=m
-CONFIG_MTD_SLRAM=m
-CONFIG_MTD_PHRAM=m
-CONFIG_MTD_MTDRAM=m
-CONFIG_MTDRAM_TOTAL_SIZE=4096
-CONFIG_MTDRAM_ERASE_SIZE=128
-CONFIG_MTD_BLOCK2MTD=m
-
-#
-# Disk-On-Chip Device Drivers
-#
-CONFIG_MTD_DOCG3=m
-CONFIG_BCH_CONST_M=14
-CONFIG_BCH_CONST_T=4
-# end of Self-contained MTD device drivers
-
-CONFIG_MTD_NAND_CORE=m
-CONFIG_MTD_ONENAND=m
-# CONFIG_MTD_ONENAND_VERIFY_WRITE is not set
-CONFIG_MTD_ONENAND_GENERIC=m
-CONFIG_MTD_ONENAND_OTP=y
-CONFIG_MTD_ONENAND_2X_PROGRAM=y
-CONFIG_MTD_NAND_ECC_SW_HAMMING=m
-CONFIG_MTD_NAND_ECC_SW_HAMMING_SMC=y
-CONFIG_MTD_RAW_NAND=m
-CONFIG_MTD_NAND_ECC_SW_BCH=y
-
-#
-# Raw/parallel NAND flash controllers
-#
-CONFIG_MTD_NAND_DENALI=m
-CONFIG_MTD_NAND_DENALI_PCI=m
-CONFIG_MTD_NAND_DENALI_DT=m
-CONFIG_MTD_NAND_CAFE=m
-CONFIG_MTD_NAND_MXIC=m
-CONFIG_MTD_NAND_GPIO=m
-CONFIG_MTD_NAND_PLATFORM=m
-CONFIG_MTD_NAND_CADENCE=m
-
-#
-# Misc
-#
-CONFIG_MTD_SM_COMMON=m
-CONFIG_MTD_NAND_NANDSIM=m
-CONFIG_MTD_NAND_RICOH=m
-CONFIG_MTD_NAND_DISKONCHIP=m
-# CONFIG_MTD_NAND_DISKONCHIP_PROBE_ADVANCED is not set
-CONFIG_MTD_NAND_DISKONCHIP_PROBE_ADDRESS=0
-CONFIG_MTD_NAND_DISKONCHIP_BBTWRITE=y
-CONFIG_MTD_SPI_NAND=m
-
-#
-# LPDDR & LPDDR2 PCM memory drivers
-#
-CONFIG_MTD_LPDDR=m
-CONFIG_MTD_QINFO_PROBE=m
-# end of LPDDR & LPDDR2 PCM memory drivers
-
-CONFIG_MTD_SPI_NOR=m
-CONFIG_MTD_SPI_NOR_USE_4K_SECTORS=y
-CONFIG_SPI_INTEL_SPI=m
-CONFIG_SPI_INTEL_SPI_PCI=m
-CONFIG_SPI_INTEL_SPI_PLATFORM=m
-CONFIG_MTD_UBI=m
-CONFIG_MTD_UBI_WL_THRESHOLD=4096
-CONFIG_MTD_UBI_BEB_LIMIT=20
-CONFIG_MTD_UBI_FASTMAP=y
-CONFIG_MTD_UBI_GLUEBI=m
-CONFIG_MTD_UBI_BLOCK=y
-CONFIG_MTD_HYPERBUS=m
-CONFIG_DTC=y
-CONFIG_OF=y
-# CONFIG_OF_UNITTEST is not set
-CONFIG_OF_FLATTREE=y
-CONFIG_OF_KOBJ=y
-CONFIG_OF_DYNAMIC=y
-CONFIG_OF_ADDRESS=y
-CONFIG_OF_IRQ=y
-CONFIG_OF_NET=y
-CONFIG_OF_MDIO=m
-CONFIG_OF_RESOLVE=y
-CONFIG_OF_OVERLAY=y
-CONFIG_ARCH_MIGHT_HAVE_PC_PARPORT=y
-CONFIG_PARPORT=m
-CONFIG_PARPORT_PC=m
-CONFIG_PARPORT_SERIAL=m
-CONFIG_PARPORT_PC_FIFO=y
-CONFIG_PARPORT_PC_SUPERIO=y
-CONFIG_PARPORT_PC_PCMCIA=m
-CONFIG_PARPORT_AX88796=m
-CONFIG_PARPORT_1284=y
-CONFIG_PARPORT_NOT_PC=y
-CONFIG_PNP=y
-CONFIG_PNP_DEBUG_MESSAGES=y
-
-#
-# Protocols
-#
-CONFIG_PNPACPI=y
-CONFIG_BLK_DEV=y
-# CONFIG_BLK_DEV_NULL_BLK is not set
-CONFIG_BLK_DEV_FD=m
-CONFIG_CDROM=m
-# CONFIG_PARIDE is not set
-CONFIG_BLK_DEV_PCIESSD_MTIP32XX=m
-CONFIG_ZRAM=m
-CONFIG_ZRAM_WRITEBACK=y
-# CONFIG_ZRAM_MEMORY_TRACKING is not set
-CONFIG_BLK_DEV_UMEM=m
-CONFIG_BLK_DEV_LOOP=m
-CONFIG_BLK_DEV_LOOP_MIN_COUNT=8
-CONFIG_BLK_DEV_CRYPTOLOOP=m
-CONFIG_BLK_DEV_DRBD=m
-# CONFIG_DRBD_FAULT_INJECTION is not set
-CONFIG_BLK_DEV_NBD=m
-CONFIG_BLK_DEV_SKD=m
-CONFIG_BLK_DEV_SX8=m
-CONFIG_BLK_DEV_RAM=m
-CONFIG_BLK_DEV_RAM_COUNT=16
-CONFIG_BLK_DEV_RAM_SIZE=16384
-CONFIG_CDROM_PKTCDVD=m
-CONFIG_CDROM_PKTCDVD_BUFFERS=8
-# CONFIG_CDROM_PKTCDVD_WCACHE is not set
-CONFIG_ATA_OVER_ETH=m
-CONFIG_XEN_BLKDEV_FRONTEND=m
-CONFIG_XEN_BLKDEV_BACKEND=m
-CONFIG_VIRTIO_BLK=m
-CONFIG_BLK_DEV_RBD=m
-CONFIG_BLK_DEV_RSXX=m
-
-#
-# NVME Support
-#
-CONFIG_NVME_CORE=y
-CONFIG_BLK_DEV_NVME=y
-CONFIG_NVME_MULTIPATH=y
-CONFIG_NVME_HWMON=y
-CONFIG_NVME_FABRICS=m
-CONFIG_NVME_RDMA=m
-CONFIG_NVME_FC=m
-CONFIG_NVME_TCP=m
-CONFIG_NVME_TARGET=m
-CONFIG_NVME_TARGET_LOOP=m
-CONFIG_NVME_TARGET_RDMA=m
-CONFIG_NVME_TARGET_FC=m
-CONFIG_NVME_TARGET_FCLOOP=m
-CONFIG_NVME_TARGET_TCP=m
-# end of NVME Support
-
-#
-# Misc devices
-#
-CONFIG_SENSORS_LIS3LV02D=m
-CONFIG_AD525X_DPOT=m
-CONFIG_AD525X_DPOT_I2C=m
-CONFIG_AD525X_DPOT_SPI=m
-# CONFIG_DUMMY_IRQ is not set
-CONFIG_IBM_ASM=m
-CONFIG_PHANTOM=m
-CONFIG_TIFM_CORE=m
-CONFIG_TIFM_7XX1=m
-CONFIG_ICS932S401=m
-CONFIG_ENCLOSURE_SERVICES=m
-CONFIG_HP_ILO=m
-CONFIG_APDS9802ALS=m
-CONFIG_ISL29003=m
-CONFIG_ISL29020=m
-CONFIG_SENSORS_TSL2550=m
-CONFIG_SENSORS_BH1770=m
-CONFIG_SENSORS_APDS990X=m
-CONFIG_HMC6352=m
-CONFIG_DS1682=m
-CONFIG_VMWARE_BALLOON=m
-CONFIG_LATTICE_ECP3_CONFIG=m
-# CONFIG_SRAM is not set
-CONFIG_PCI_ENDPOINT_TEST=m
-CONFIG_XILINX_SDFEC=m
-CONFIG_MISC_RTSX=m
-CONFIG_PVPANIC=m
-CONFIG_C2PORT=m
-CONFIG_C2PORT_DURAMAR_2150=m
-
-#
-# EEPROM support
-#
-CONFIG_EEPROM_AT24=m
-# CONFIG_EEPROM_AT25 is not set
-CONFIG_EEPROM_LEGACY=m
-CONFIG_EEPROM_MAX6875=m
-CONFIG_EEPROM_93CX6=m
-# CONFIG_EEPROM_93XX46 is not set
-CONFIG_EEPROM_IDT_89HPESX=m
-CONFIG_EEPROM_EE1004=m
-# end of EEPROM support
-
-CONFIG_CB710_CORE=m
-# CONFIG_CB710_DEBUG is not set
-CONFIG_CB710_DEBUG_ASSUMPTIONS=y
-
-#
-# Texas Instruments shared transport line discipline
-#
-CONFIG_TI_ST=m
-# end of Texas Instruments shared transport line discipline
-
-CONFIG_SENSORS_LIS3_I2C=m
-CONFIG_ALTERA_STAPL=m
-CONFIG_INTEL_MEI=m
-CONFIG_INTEL_MEI_ME=m
-CONFIG_INTEL_MEI_TXE=m
-CONFIG_INTEL_MEI_HDCP=m
-CONFIG_VMWARE_VMCI=m
-
-#
-# Intel MIC & related support
-#
-CONFIG_INTEL_MIC_BUS=m
-CONFIG_SCIF_BUS=m
-CONFIG_VOP_BUS=m
-CONFIG_INTEL_MIC_HOST=m
-CONFIG_INTEL_MIC_CARD=m
-CONFIG_SCIF=m
-CONFIG_MIC_COSM=m
-CONFIG_VOP=m
-# end of Intel MIC & related support
-
-CONFIG_GENWQE=m
-CONFIG_GENWQE_PLATFORM_ERROR_RECOVERY=0
-CONFIG_ECHO=m
-CONFIG_MISC_ALCOR_PCI=m
-CONFIG_MISC_RTSX_PCI=m
-CONFIG_MISC_RTSX_USB=m
-CONFIG_HABANA_AI=m
-CONFIG_UACCE=m
-# end of Misc devices
-
-CONFIG_HAVE_IDE=y
-# CONFIG_IDE is not set
-
-#
-# SCSI device support
-#
-CONFIG_SCSI_MOD=y
-CONFIG_RAID_ATTRS=m
-CONFIG_SCSI=y
-CONFIG_SCSI_DMA=y
-CONFIG_SCSI_NETLINK=y
-CONFIG_SCSI_PROC_FS=y
-
-#
-# SCSI support type (disk, tape, CD-ROM)
-#
-CONFIG_BLK_DEV_SD=y
-CONFIG_CHR_DEV_ST=m
-CONFIG_BLK_DEV_SR=m
-CONFIG_CHR_DEV_SG=m
-CONFIG_CHR_DEV_SCH=m
-CONFIG_SCSI_ENCLOSURE=m
-CONFIG_SCSI_CONSTANTS=y
-CONFIG_SCSI_LOGGING=y
-CONFIG_SCSI_SCAN_ASYNC=y
-
-#
-# SCSI Transports
-#
-CONFIG_SCSI_SPI_ATTRS=m
-CONFIG_SCSI_FC_ATTRS=m
-CONFIG_SCSI_ISCSI_ATTRS=m
-CONFIG_SCSI_SAS_ATTRS=m
-CONFIG_SCSI_SAS_LIBSAS=m
-CONFIG_SCSI_SAS_ATA=y
-CONFIG_SCSI_SAS_HOST_SMP=y
-CONFIG_SCSI_SRP_ATTRS=m
-# end of SCSI Transports
-
-CONFIG_SCSI_LOWLEVEL=y
-CONFIG_ISCSI_TCP=m
-CONFIG_ISCSI_BOOT_SYSFS=m
-CONFIG_SCSI_CXGB3_ISCSI=m
-CONFIG_SCSI_CXGB4_ISCSI=m
-CONFIG_SCSI_BNX2_ISCSI=m
-CONFIG_SCSI_BNX2X_FCOE=m
-CONFIG_BE2ISCSI=m
-CONFIG_BLK_DEV_3W_XXXX_RAID=m
-CONFIG_SCSI_HPSA=m
-CONFIG_SCSI_3W_9XXX=m
-CONFIG_SCSI_3W_SAS=m
-CONFIG_SCSI_ACARD=m
-CONFIG_SCSI_AACRAID=m
-CONFIG_SCSI_AIC7XXX=m
-CONFIG_AIC7XXX_CMDS_PER_DEVICE=32
-CONFIG_AIC7XXX_RESET_DELAY_MS=15000
-CONFIG_AIC7XXX_DEBUG_ENABLE=y
-CONFIG_AIC7XXX_DEBUG_MASK=0
-CONFIG_AIC7XXX_REG_PRETTY_PRINT=y
-CONFIG_SCSI_AIC79XX=m
-CONFIG_AIC79XX_CMDS_PER_DEVICE=32
-CONFIG_AIC79XX_RESET_DELAY_MS=15000
-CONFIG_AIC79XX_DEBUG_ENABLE=y
-CONFIG_AIC79XX_DEBUG_MASK=0
-CONFIG_AIC79XX_REG_PRETTY_PRINT=y
-CONFIG_SCSI_AIC94XX=m
-CONFIG_AIC94XX_DEBUG=y
-CONFIG_SCSI_MVSAS=m
-CONFIG_SCSI_MVSAS_DEBUG=y
-CONFIG_SCSI_MVSAS_TASKLET=y
-CONFIG_SCSI_MVUMI=m
-CONFIG_SCSI_DPT_I2O=m
-CONFIG_SCSI_ADVANSYS=m
-CONFIG_SCSI_ARCMSR=m
-CONFIG_SCSI_ESAS2R=m
-CONFIG_MEGARAID_NEWGEN=y
-CONFIG_MEGARAID_MM=m
-CONFIG_MEGARAID_MAILBOX=m
-CONFIG_MEGARAID_LEGACY=m
-CONFIG_MEGARAID_SAS=m
-CONFIG_SCSI_MPT3SAS=m
-CONFIG_SCSI_MPT2SAS_MAX_SGE=128
-CONFIG_SCSI_MPT3SAS_MAX_SGE=128
-CONFIG_SCSI_MPT2SAS=m
-CONFIG_SCSI_SMARTPQI=m
-CONFIG_SCSI_UFSHCD=m
-CONFIG_SCSI_UFSHCD_PCI=m
-# CONFIG_SCSI_UFS_DWC_TC_PCI is not set
-CONFIG_SCSI_UFSHCD_PLATFORM=m
-CONFIG_SCSI_UFS_CDNS_PLATFORM=m
-# CONFIG_SCSI_UFS_DWC_TC_PLATFORM is not set
-CONFIG_SCSI_UFS_BSG=y
-CONFIG_SCSI_HPTIOP=m
-CONFIG_SCSI_BUSLOGIC=m
-CONFIG_SCSI_FLASHPOINT=y
-CONFIG_SCSI_MYRB=m
-CONFIG_SCSI_MYRS=m
-CONFIG_VMWARE_PVSCSI=m
-CONFIG_XEN_SCSI_FRONTEND=m
-CONFIG_HYPERV_STORAGE=m
-CONFIG_LIBFC=m
-CONFIG_LIBFCOE=m
-CONFIG_FCOE=m
-CONFIG_FCOE_FNIC=m
-CONFIG_SCSI_SNIC=m
-# CONFIG_SCSI_SNIC_DEBUG_FS is not set
-CONFIG_SCSI_DMX3191D=m
-CONFIG_SCSI_FDOMAIN=m
-CONFIG_SCSI_FDOMAIN_PCI=m
-CONFIG_SCSI_GDTH=m
-CONFIG_SCSI_ISCI=m
-CONFIG_SCSI_IPS=m
-CONFIG_SCSI_INITIO=m
-CONFIG_SCSI_INIA100=m
-CONFIG_SCSI_PPA=m
-CONFIG_SCSI_IMM=m
-# CONFIG_SCSI_IZIP_EPP16 is not set
-# CONFIG_SCSI_IZIP_SLOW_CTR is not set
-CONFIG_SCSI_STEX=m
-CONFIG_SCSI_SYM53C8XX_2=m
-CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1
-CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
-CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
-CONFIG_SCSI_SYM53C8XX_MMIO=y
-CONFIG_SCSI_IPR=m
-CONFIG_SCSI_IPR_TRACE=y
-CONFIG_SCSI_IPR_DUMP=y
-CONFIG_SCSI_QLOGIC_1280=m
-CONFIG_SCSI_QLA_FC=m
-CONFIG_TCM_QLA2XXX=m
-# CONFIG_TCM_QLA2XXX_DEBUG is not set
-CONFIG_SCSI_QLA_ISCSI=m
-CONFIG_QEDI=m
-CONFIG_QEDF=m
-CONFIG_SCSI_LPFC=m
-# CONFIG_SCSI_LPFC_DEBUG_FS is not set
-CONFIG_SCSI_DC395x=m
-CONFIG_SCSI_AM53C974=m
-CONFIG_SCSI_WD719X=m
-CONFIG_SCSI_DEBUG=m
-CONFIG_SCSI_PMCRAID=m
-CONFIG_SCSI_PM8001=m
-CONFIG_SCSI_BFA_FC=m
-CONFIG_SCSI_VIRTIO=m
-CONFIG_SCSI_CHELSIO_FCOE=m
-CONFIG_SCSI_LOWLEVEL_PCMCIA=y
-CONFIG_PCMCIA_AHA152X=m
-CONFIG_PCMCIA_FDOMAIN=m
-CONFIG_PCMCIA_QLOGIC=m
-CONFIG_PCMCIA_SYM53C500=m
-CONFIG_SCSI_DH=y
-CONFIG_SCSI_DH_RDAC=m
-CONFIG_SCSI_DH_HP_SW=m
-CONFIG_SCSI_DH_EMC=m
-CONFIG_SCSI_DH_ALUA=m
-# end of SCSI device support
-
-CONFIG_ATA=y
-CONFIG_SATA_HOST=y
-CONFIG_PATA_TIMINGS=y
-CONFIG_ATA_VERBOSE_ERROR=y
-CONFIG_ATA_FORCE=y
-CONFIG_ATA_ACPI=y
-CONFIG_SATA_ZPODD=y
-CONFIG_SATA_PMP=y
-
-#
-# Controllers with non-SFF native interface
-#
-CONFIG_SATA_AHCI=y
-CONFIG_SATA_MOBILE_LPM_POLICY=3
-CONFIG_SATA_AHCI_PLATFORM=m
-CONFIG_AHCI_CEVA=m
-CONFIG_AHCI_QORIQ=m
-CONFIG_SATA_INIC162X=m
-CONFIG_SATA_ACARD_AHCI=m
-CONFIG_SATA_SIL24=m
-CONFIG_ATA_SFF=y
-
-#
-# SFF controllers with custom DMA interface
-#
-CONFIG_PDC_ADMA=m
-CONFIG_SATA_QSTOR=m
-CONFIG_SATA_SX4=m
-CONFIG_ATA_BMDMA=y
-
-#
-# SATA SFF controllers with BMDMA
-#
-CONFIG_ATA_PIIX=m
-CONFIG_SATA_DWC=m
-# CONFIG_SATA_DWC_OLD_DMA is not set
-# CONFIG_SATA_DWC_DEBUG is not set
-CONFIG_SATA_MV=m
-CONFIG_SATA_NV=m
-CONFIG_SATA_PROMISE=m
-CONFIG_SATA_SIL=m
-CONFIG_SATA_SIS=m
-CONFIG_SATA_SVW=m
-CONFIG_SATA_ULI=m
-CONFIG_SATA_VIA=m
-CONFIG_SATA_VITESSE=m
-
-#
-# PATA SFF controllers with BMDMA
-#
-CONFIG_PATA_ALI=m
-CONFIG_PATA_AMD=m
-CONFIG_PATA_ARTOP=m
-CONFIG_PATA_ATIIXP=m
-CONFIG_PATA_ATP867X=m
-CONFIG_PATA_CMD64X=m
-CONFIG_PATA_CYPRESS=m
-CONFIG_PATA_EFAR=m
-CONFIG_PATA_HPT366=m
-CONFIG_PATA_HPT37X=m
-CONFIG_PATA_HPT3X2N=m
-CONFIG_PATA_HPT3X3=m
-CONFIG_PATA_HPT3X3_DMA=y
-CONFIG_PATA_IT8213=m
-CONFIG_PATA_IT821X=m
-CONFIG_PATA_JMICRON=m
-CONFIG_PATA_MARVELL=m
-CONFIG_PATA_NETCELL=m
-CONFIG_PATA_NINJA32=m
-CONFIG_PATA_NS87415=m
-CONFIG_PATA_OLDPIIX=m
-CONFIG_PATA_OPTIDMA=m
-CONFIG_PATA_PDC2027X=m
-CONFIG_PATA_PDC_OLD=m
-CONFIG_PATA_RADISYS=m
-CONFIG_PATA_RDC=m
-CONFIG_PATA_SCH=m
-CONFIG_PATA_SERVERWORKS=m
-CONFIG_PATA_SIL680=m
-CONFIG_PATA_SIS=m
-CONFIG_PATA_TOSHIBA=m
-CONFIG_PATA_TRIFLEX=m
-CONFIG_PATA_VIA=m
-CONFIG_PATA_WINBOND=m
-
-#
-# PIO-only SFF controllers
-#
-CONFIG_PATA_CMD640_PCI=m
-CONFIG_PATA_MPIIX=m
-CONFIG_PATA_NS87410=m
-CONFIG_PATA_OPTI=m
-CONFIG_PATA_PCMCIA=m
-# CONFIG_PATA_PLATFORM is not set
-CONFIG_PATA_RZ1000=m
-
-#
-# Generic fallback / legacy drivers
-#
-CONFIG_PATA_ACPI=m
-CONFIG_ATA_GENERIC=m
-CONFIG_PATA_LEGACY=m
-CONFIG_MD=y
-CONFIG_BLK_DEV_MD=m
-CONFIG_MD_LINEAR=m
-CONFIG_MD_RAID0=m
-CONFIG_MD_RAID1=m
-CONFIG_MD_RAID10=m
-CONFIG_MD_RAID456=m
-CONFIG_MD_MULTIPATH=m
-CONFIG_MD_FAULTY=m
-CONFIG_MD_CLUSTER=m
-CONFIG_BCACHE=m
-# CONFIG_BCACHE_DEBUG is not set
-# CONFIG_BCACHE_CLOSURES_DEBUG is not set
-CONFIG_BLK_DEV_DM_BUILTIN=y
-CONFIG_BLK_DEV_DM=m
-CONFIG_DM_DEBUG=y
-CONFIG_DM_BUFIO=m
-# CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING is not set
-CONFIG_DM_BIO_PRISON=m
-CONFIG_DM_PERSISTENT_DATA=m
-CONFIG_DM_UNSTRIPED=m
-CONFIG_DM_CRYPT=m
-CONFIG_DM_SNAPSHOT=m
-CONFIG_DM_THIN_PROVISIONING=m
-CONFIG_DM_CACHE=m
-CONFIG_DM_CACHE_SMQ=m
-CONFIG_DM_WRITECACHE=m
-CONFIG_DM_ERA=m
-CONFIG_DM_CLONE=m
-CONFIG_DM_MIRROR=m
-CONFIG_DM_LOG_USERSPACE=m
-CONFIG_DM_RAID=m
-CONFIG_DM_ZERO=m
-CONFIG_DM_MULTIPATH=m
-CONFIG_DM_MULTIPATH_QL=m
-CONFIG_DM_MULTIPATH_ST=m
-CONFIG_DM_DELAY=m
-CONFIG_DM_DUST=m
-CONFIG_DM_UEVENT=y
-CONFIG_DM_FLAKEY=m
-CONFIG_DM_VERITY=m
-CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG=y
-CONFIG_DM_VERITY_FEC=y
-CONFIG_DM_SWITCH=m
-CONFIG_DM_LOG_WRITES=m
-CONFIG_DM_INTEGRITY=m
-CONFIG_DM_ZONED=m
-CONFIG_TARGET_CORE=m
-CONFIG_TCM_IBLOCK=m
-CONFIG_TCM_FILEIO=m
-CONFIG_TCM_PSCSI=m
-CONFIG_TCM_USER2=m
-CONFIG_LOOPBACK_TARGET=m
-CONFIG_TCM_FC=m
-CONFIG_ISCSI_TARGET=m
-CONFIG_ISCSI_TARGET_CXGB4=m
-CONFIG_SBP_TARGET=m
-CONFIG_FUSION=y
-CONFIG_FUSION_SPI=m
-CONFIG_FUSION_FC=m
-CONFIG_FUSION_SAS=m
-CONFIG_FUSION_MAX_SGE=128
-CONFIG_FUSION_CTL=m
-CONFIG_FUSION_LAN=m
-# CONFIG_FUSION_LOGGING is not set
-
-#
-# IEEE 1394 (FireWire) support
-#
-CONFIG_FIREWIRE=m
-CONFIG_FIREWIRE_OHCI=m
-CONFIG_FIREWIRE_SBP2=m
-CONFIG_FIREWIRE_NET=m
-CONFIG_FIREWIRE_NOSY=m
-# end of IEEE 1394 (FireWire) support
-
-CONFIG_MACINTOSH_DRIVERS=y
-CONFIG_MAC_EMUMOUSEBTN=m
-CONFIG_NETDEVICES=y
-CONFIG_MII=m
-CONFIG_NET_CORE=y
-CONFIG_BONDING=m
-CONFIG_DUMMY=m
-CONFIG_WIREGUARD=m
-# CONFIG_WIREGUARD_DEBUG is not set
-CONFIG_EQUALIZER=m
-CONFIG_NET_FC=y
-CONFIG_IFB=m
-CONFIG_NET_TEAM=m
-CONFIG_NET_TEAM_MODE_BROADCAST=m
-CONFIG_NET_TEAM_MODE_ROUNDROBIN=m
-CONFIG_NET_TEAM_MODE_RANDOM=m
-CONFIG_NET_TEAM_MODE_ACTIVEBACKUP=m
-CONFIG_NET_TEAM_MODE_LOADBALANCE=m
-CONFIG_MACVLAN=m
-CONFIG_MACVTAP=m
-CONFIG_IPVLAN_L3S=y
-CONFIG_IPVLAN=m
-CONFIG_IPVTAP=m
-CONFIG_VXLAN=m
-CONFIG_GENEVE=m
-CONFIG_BAREUDP=m
-CONFIG_GTP=m
-CONFIG_MACSEC=m
-CONFIG_NETCONSOLE=m
-CONFIG_NETCONSOLE_DYNAMIC=y
-CONFIG_NETPOLL=y
-CONFIG_NET_POLL_CONTROLLER=y
-CONFIG_NTB_NETDEV=m
-CONFIG_RIONET=m
-CONFIG_RIONET_TX_SIZE=128
-CONFIG_RIONET_RX_SIZE=128
-CONFIG_TUN=m
-CONFIG_TAP=m
-# CONFIG_TUN_VNET_CROSS_LE is not set
-CONFIG_VETH=m
-CONFIG_VIRTIO_NET=m
-CONFIG_NLMON=m
-CONFIG_NET_VRF=m
-CONFIG_VSOCKMON=m
-CONFIG_SUNGEM_PHY=m
-# CONFIG_ARCNET is not set
-CONFIG_ATM_DRIVERS=y
-# CONFIG_ATM_DUMMY is not set
-CONFIG_ATM_TCP=m
-CONFIG_ATM_LANAI=m
-CONFIG_ATM_ENI=m
-# CONFIG_ATM_ENI_DEBUG is not set
-# CONFIG_ATM_ENI_TUNE_BURST is not set
-CONFIG_ATM_FIRESTREAM=m
-CONFIG_ATM_ZATM=m
-# CONFIG_ATM_ZATM_DEBUG is not set
-CONFIG_ATM_NICSTAR=m
-# CONFIG_ATM_NICSTAR_USE_SUNI is not set
-# CONFIG_ATM_NICSTAR_USE_IDT77105 is not set
-CONFIG_ATM_IDT77252=m
-# CONFIG_ATM_IDT77252_DEBUG is not set
-# CONFIG_ATM_IDT77252_RCV_ALL is not set
-CONFIG_ATM_IDT77252_USE_SUNI=y
-CONFIG_ATM_AMBASSADOR=m
-# CONFIG_ATM_AMBASSADOR_DEBUG is not set
-CONFIG_ATM_HORIZON=m
-# CONFIG_ATM_HORIZON_DEBUG is not set
-CONFIG_ATM_IA=m
-# CONFIG_ATM_IA_DEBUG is not set
-CONFIG_ATM_FORE200E=m
-CONFIG_ATM_FORE200E_USE_TASKLET=y
-CONFIG_ATM_FORE200E_TX_RETRY=16
-CONFIG_ATM_FORE200E_DEBUG=0
-CONFIG_ATM_HE=m
-CONFIG_ATM_HE_USE_SUNI=y
-CONFIG_ATM_SOLOS=m
-CONFIG_CAIF_DRIVERS=y
-CONFIG_CAIF_TTY=m
-CONFIG_CAIF_SPI_SLAVE=m
-CONFIG_CAIF_SPI_SYNC=y
-CONFIG_CAIF_HSI=m
-CONFIG_CAIF_VIRTIO=m
-
-#
-# Distributed Switch Architecture drivers
-#
-CONFIG_B53=m
-# CONFIG_B53_SPI_DRIVER is not set
-CONFIG_B53_MDIO_DRIVER=m
-CONFIG_B53_MMAP_DRIVER=m
-CONFIG_B53_SRAB_DRIVER=m
-CONFIG_B53_SERDES=m
-CONFIG_NET_DSA_BCM_SF2=m
-CONFIG_NET_DSA_LOOP=m
-CONFIG_NET_DSA_LANTIQ_GSWIP=m
-CONFIG_NET_DSA_MT7530=m
-CONFIG_NET_DSA_MV88E6060=m
-CONFIG_NET_DSA_MICROCHIP_KSZ_COMMON=m
-CONFIG_NET_DSA_MICROCHIP_KSZ9477=m
-CONFIG_NET_DSA_MICROCHIP_KSZ9477_I2C=m
-CONFIG_NET_DSA_MICROCHIP_KSZ9477_SPI=m
-CONFIG_NET_DSA_MICROCHIP_KSZ8795=m
-CONFIG_NET_DSA_MICROCHIP_KSZ8795_SPI=m
-CONFIG_NET_DSA_MV88E6XXX=m
-CONFIG_NET_DSA_MV88E6XXX_GLOBAL2=y
-CONFIG_NET_DSA_MV88E6XXX_PTP=y
-CONFIG_NET_DSA_AR9331=m
-CONFIG_NET_DSA_SJA1105=m
-CONFIG_NET_DSA_SJA1105_PTP=y
-CONFIG_NET_DSA_SJA1105_TAS=y
-CONFIG_NET_DSA_QCA8K=m
-CONFIG_NET_DSA_REALTEK_SMI=m
-CONFIG_NET_DSA_SMSC_LAN9303=m
-CONFIG_NET_DSA_SMSC_LAN9303_I2C=m
-CONFIG_NET_DSA_SMSC_LAN9303_MDIO=m
-CONFIG_NET_DSA_VITESSE_VSC73XX=m
-CONFIG_NET_DSA_VITESSE_VSC73XX_SPI=m
-CONFIG_NET_DSA_VITESSE_VSC73XX_PLATFORM=m
-# end of Distributed Switch Architecture drivers
-
-CONFIG_ETHERNET=y
-CONFIG_MDIO=m
-CONFIG_NET_VENDOR_3COM=y
-CONFIG_PCMCIA_3C574=m
-CONFIG_PCMCIA_3C589=m
-CONFIG_VORTEX=m
-CONFIG_TYPHOON=m
-CONFIG_NET_VENDOR_ADAPTEC=y
-CONFIG_ADAPTEC_STARFIRE=m
-CONFIG_NET_VENDOR_AGERE=y
-CONFIG_ET131X=m
-CONFIG_NET_VENDOR_ALACRITECH=y
-CONFIG_SLICOSS=m
-CONFIG_NET_VENDOR_ALTEON=y
-CONFIG_ACENIC=m
-# CONFIG_ACENIC_OMIT_TIGON_I is not set
-CONFIG_ALTERA_TSE=m
-CONFIG_NET_VENDOR_AMAZON=y
-CONFIG_ENA_ETHERNET=m
-CONFIG_NET_VENDOR_AMD=y
-CONFIG_AMD8111_ETH=m
-CONFIG_PCNET32=m
-CONFIG_PCMCIA_NMCLAN=m
-CONFIG_AMD_XGBE=m
-CONFIG_AMD_XGBE_DCB=y
-CONFIG_AMD_XGBE_HAVE_ECC=y
-CONFIG_NET_VENDOR_AQUANTIA=y
-CONFIG_AQTION=m
-CONFIG_NET_VENDOR_ARC=y
-CONFIG_NET_VENDOR_ATHEROS=y
-CONFIG_ATL2=m
-CONFIG_ATL1=m
-CONFIG_ATL1E=m
-CONFIG_ATL1C=m
-CONFIG_ALX=m
-CONFIG_NET_VENDOR_AURORA=y
-CONFIG_AURORA_NB8800=m
-CONFIG_NET_VENDOR_BROADCOM=y
-CONFIG_B44=m
-CONFIG_B44_PCI_AUTOSELECT=y
-CONFIG_B44_PCICORE_AUTOSELECT=y
-CONFIG_B44_PCI=y
-CONFIG_BCMGENET=m
-CONFIG_BNX2=m
-CONFIG_CNIC=m
-CONFIG_TIGON3=m
-CONFIG_TIGON3_HWMON=y
-CONFIG_BNX2X=m
-CONFIG_BNX2X_SRIOV=y
-CONFIG_SYSTEMPORT=m
-CONFIG_BNXT=m
-CONFIG_BNXT_SRIOV=y
-CONFIG_BNXT_FLOWER_OFFLOAD=y
-CONFIG_BNXT_DCB=y
-CONFIG_BNXT_HWMON=y
-CONFIG_NET_VENDOR_BROCADE=y
-CONFIG_BNA=m
-CONFIG_NET_VENDOR_CADENCE=y
-CONFIG_MACB=m
-CONFIG_MACB_USE_HWSTAMP=y
-CONFIG_MACB_PCI=m
-CONFIG_NET_VENDOR_CAVIUM=y
-CONFIG_THUNDER_NIC_PF=m
-CONFIG_THUNDER_NIC_VF=m
-CONFIG_THUNDER_NIC_BGX=m
-CONFIG_THUNDER_NIC_RGX=m
-CONFIG_CAVIUM_PTP=m
-CONFIG_LIQUIDIO=m
-CONFIG_LIQUIDIO_VF=m
-CONFIG_NET_VENDOR_CHELSIO=y
-CONFIG_CHELSIO_T1=m
-CONFIG_CHELSIO_T1_1G=y
-CONFIG_CHELSIO_T3=m
-CONFIG_CHELSIO_T4=m
-CONFIG_CHELSIO_T4_DCB=y
-CONFIG_CHELSIO_T4_FCOE=y
-CONFIG_CHELSIO_T4VF=m
-CONFIG_CHELSIO_LIB=m
-CONFIG_NET_VENDOR_CISCO=y
-CONFIG_ENIC=m
-CONFIG_NET_VENDOR_CORTINA=y
-CONFIG_GEMINI_ETHERNET=m
-CONFIG_CX_ECAT=m
-CONFIG_DNET=m
-CONFIG_NET_VENDOR_DEC=y
-CONFIG_NET_TULIP=y
-CONFIG_DE2104X=m
-CONFIG_DE2104X_DSL=0
-CONFIG_TULIP=m
-CONFIG_TULIP_MWI=y
-CONFIG_TULIP_MMIO=y
-CONFIG_TULIP_NAPI=y
-CONFIG_TULIP_NAPI_HW_MITIGATION=y
-CONFIG_DE4X5=m
-CONFIG_WINBOND_840=m
-CONFIG_DM9102=m
-CONFIG_ULI526X=m
-CONFIG_PCMCIA_XIRCOM=m
-CONFIG_NET_VENDOR_DLINK=y
-CONFIG_DL2K=m
-CONFIG_SUNDANCE=m
-# CONFIG_SUNDANCE_MMIO is not set
-CONFIG_NET_VENDOR_EMULEX=y
-CONFIG_BE2NET=m
-CONFIG_BE2NET_HWMON=y
-CONFIG_BE2NET_BE2=y
-CONFIG_BE2NET_BE3=y
-CONFIG_BE2NET_LANCER=y
-CONFIG_BE2NET_SKYHAWK=y
-CONFIG_NET_VENDOR_EZCHIP=y
-CONFIG_EZCHIP_NPS_MANAGEMENT_ENET=m
-CONFIG_NET_VENDOR_FUJITSU=y
-CONFIG_PCMCIA_FMVJ18X=m
-CONFIG_NET_VENDOR_GOOGLE=y
-CONFIG_GVE=m
-CONFIG_NET_VENDOR_HUAWEI=y
-CONFIG_HINIC=m
-CONFIG_NET_VENDOR_I825XX=y
-CONFIG_NET_VENDOR_INTEL=y
-CONFIG_E100=m
-CONFIG_E1000=m
-CONFIG_E1000E=m
-CONFIG_E1000E_HWTS=y
-CONFIG_IGB=m
-CONFIG_IGB_HWMON=y
-CONFIG_IGB_DCA=y
-CONFIG_IGBVF=m
-CONFIG_IXGB=m
-CONFIG_IXGBE=m
-CONFIG_IXGBE_HWMON=y
-CONFIG_IXGBE_DCA=y
-CONFIG_IXGBE_DCB=y
-# CONFIG_IXGBE_IPSEC is not set
-CONFIG_IXGBEVF=m
-CONFIG_IXGBEVF_IPSEC=y
-CONFIG_I40E=m
-CONFIG_I40E_DCB=y
-CONFIG_IAVF=m
-CONFIG_I40EVF=m
-CONFIG_ICE=m
-CONFIG_FM10K=m
-CONFIG_IGC=m
-CONFIG_JME=m
-CONFIG_NET_VENDOR_MARVELL=y
-CONFIG_MVMDIO=m
-CONFIG_SKGE=m
-# CONFIG_SKGE_DEBUG is not set
-CONFIG_SKGE_GENESIS=y
-CONFIG_SKY2=m
-# CONFIG_SKY2_DEBUG is not set
-CONFIG_NET_VENDOR_MELLANOX=y
-CONFIG_MLX4_EN=m
-CONFIG_MLX4_EN_DCB=y
-CONFIG_MLX4_CORE=m
-CONFIG_MLX4_DEBUG=y
-CONFIG_MLX4_CORE_GEN2=y
-CONFIG_MLX5_CORE=m
-CONFIG_MLX5_ACCEL=y
-CONFIG_MLX5_FPGA=y
-CONFIG_MLX5_CORE_EN=y
-CONFIG_MLX5_EN_ARFS=y
-CONFIG_MLX5_EN_RXNFC=y
-CONFIG_MLX5_MPFS=y
-CONFIG_MLX5_ESWITCH=y
-CONFIG_MLX5_TC_CT=y
-CONFIG_MLX5_CORE_EN_DCB=y
-CONFIG_MLX5_CORE_IPOIB=y
-CONFIG_MLX5_FPGA_IPSEC=y
-CONFIG_MLX5_EN_IPSEC=y
-CONFIG_MLX5_FPGA_TLS=y
-CONFIG_MLX5_TLS=y
-CONFIG_MLX5_EN_TLS=y
-CONFIG_MLX5_SW_STEERING=y
-CONFIG_MLXSW_CORE=m
-CONFIG_MLXSW_CORE_HWMON=y
-CONFIG_MLXSW_CORE_THERMAL=y
-CONFIG_MLXSW_PCI=m
-CONFIG_MLXSW_I2C=m
-CONFIG_MLXSW_SWITCHIB=m
-CONFIG_MLXSW_SWITCHX2=m
-CONFIG_MLXSW_SPECTRUM=m
-CONFIG_MLXSW_SPECTRUM_DCB=y
-CONFIG_MLXSW_MINIMAL=m
-CONFIG_MLXFW=m
-CONFIG_NET_VENDOR_MICREL=y
-CONFIG_KS8842=m
-CONFIG_KS8851=m
-CONFIG_KS8851_MLL=m
-CONFIG_KSZ884X_PCI=m
-CONFIG_NET_VENDOR_MICROCHIP=y
-CONFIG_ENC28J60=m
-# CONFIG_ENC28J60_WRITEVERIFY is not set
-CONFIG_ENCX24J600=m
-CONFIG_LAN743X=m
-CONFIG_NET_VENDOR_MICROSEMI=y
-CONFIG_MSCC_OCELOT_SWITCH=m
-CONFIG_MSCC_OCELOT_SWITCH_OCELOT=m
-CONFIG_NET_VENDOR_MYRI=y
-CONFIG_MYRI10GE=m
-CONFIG_MYRI10GE_DCA=y
-CONFIG_FEALNX=m
-CONFIG_NET_VENDOR_NATSEMI=y
-CONFIG_NATSEMI=m
-CONFIG_NS83820=m
-CONFIG_NET_VENDOR_NETERION=y
-CONFIG_S2IO=m
-CONFIG_VXGE=m
-# CONFIG_VXGE_DEBUG_TRACE_ALL is not set
-CONFIG_NET_VENDOR_NETRONOME=y
-CONFIG_NFP=m
-CONFIG_NFP_APP_FLOWER=y
-CONFIG_NFP_APP_ABM_NIC=y
-# CONFIG_NFP_DEBUG is not set
-CONFIG_NET_VENDOR_NI=y
-CONFIG_NI_XGE_MANAGEMENT_ENET=m
-CONFIG_NET_VENDOR_8390=y
-CONFIG_PCMCIA_AXNET=m
-CONFIG_NE2K_PCI=m
-CONFIG_PCMCIA_PCNET=m
-CONFIG_NET_VENDOR_NVIDIA=y
-CONFIG_FORCEDETH=m
-CONFIG_NET_VENDOR_OKI=y
-CONFIG_ETHOC=m
-CONFIG_NET_VENDOR_PACKET_ENGINES=y
-CONFIG_HAMACHI=m
-CONFIG_YELLOWFIN=m
-CONFIG_NET_VENDOR_PENSANDO=y
-CONFIG_IONIC=m
-CONFIG_NET_VENDOR_QLOGIC=y
-CONFIG_QLA3XXX=m
-CONFIG_QLCNIC=m
-CONFIG_QLCNIC_SRIOV=y
-CONFIG_QLCNIC_DCB=y
-CONFIG_QLCNIC_HWMON=y
-CONFIG_NETXEN_NIC=m
-CONFIG_QED=m
-CONFIG_QED_LL2=y
-CONFIG_QED_SRIOV=y
-CONFIG_QEDE=m
-CONFIG_QED_RDMA=y
-CONFIG_QED_ISCSI=y
-CONFIG_QED_FCOE=y
-CONFIG_QED_OOO=y
-CONFIG_NET_VENDOR_QUALCOMM=y
-CONFIG_QCA7000=m
-CONFIG_QCA7000_SPI=m
-CONFIG_QCA7000_UART=m
-CONFIG_QCOM_EMAC=m
-CONFIG_RMNET=m
-CONFIG_NET_VENDOR_RDC=y
-CONFIG_R6040=m
-CONFIG_NET_VENDOR_REALTEK=y
-CONFIG_ATP=m
-CONFIG_8139CP=m
-CONFIG_8139TOO=m
-# CONFIG_8139TOO_PIO is not set
-CONFIG_8139TOO_TUNE_TWISTER=y
-CONFIG_8139TOO_8129=y
-# CONFIG_8139_OLD_RX_RESET is not set
-CONFIG_R8169=m
-CONFIG_NET_VENDOR_RENESAS=y
-CONFIG_NET_VENDOR_ROCKER=y
-CONFIG_ROCKER=m
-CONFIG_NET_VENDOR_SAMSUNG=y
-CONFIG_SXGBE_ETH=m
-CONFIG_NET_VENDOR_SEEQ=y
-CONFIG_NET_VENDOR_SOLARFLARE=y
-CONFIG_SFC=m
-CONFIG_SFC_MTD=y
-CONFIG_SFC_MCDI_MON=y
-CONFIG_SFC_SRIOV=y
-CONFIG_SFC_MCDI_LOGGING=y
-CONFIG_SFC_FALCON=m
-CONFIG_SFC_FALCON_MTD=y
-CONFIG_NET_VENDOR_SILAN=y
-CONFIG_SC92031=m
-CONFIG_NET_VENDOR_SIS=y
-CONFIG_SIS900=m
-CONFIG_SIS190=m
-CONFIG_NET_VENDOR_SMSC=y
-CONFIG_PCMCIA_SMC91C92=m
-CONFIG_EPIC100=m
-CONFIG_SMSC911X=m
-CONFIG_SMSC9420=m
-CONFIG_NET_VENDOR_SOCIONEXT=y
-CONFIG_NET_VENDOR_STMICRO=y
-CONFIG_STMMAC_ETH=m
-# CONFIG_STMMAC_SELFTESTS is not set
-CONFIG_STMMAC_PLATFORM=m
-CONFIG_DWMAC_DWC_QOS_ETH=m
-CONFIG_DWMAC_GENERIC=m
-CONFIG_DWMAC_INTEL=m
-CONFIG_STMMAC_PCI=m
-CONFIG_NET_VENDOR_SUN=y
-CONFIG_HAPPYMEAL=m
-CONFIG_SUNGEM=m
-CONFIG_CASSINI=m
-CONFIG_NIU=m
-CONFIG_NET_VENDOR_SYNOPSYS=y
-CONFIG_DWC_XLGMAC=m
-CONFIG_DWC_XLGMAC_PCI=m
-CONFIG_NET_VENDOR_TEHUTI=y
-CONFIG_TEHUTI=m
-CONFIG_NET_VENDOR_TI=y
-# CONFIG_TI_CPSW_PHY_SEL is not set
-CONFIG_TLAN=m
-CONFIG_NET_VENDOR_VIA=y
-CONFIG_VIA_RHINE=m
-CONFIG_VIA_RHINE_MMIO=y
-CONFIG_VIA_VELOCITY=m
-CONFIG_NET_VENDOR_WIZNET=y
-CONFIG_WIZNET_W5100=m
-CONFIG_WIZNET_W5300=m
-# CONFIG_WIZNET_BUS_DIRECT is not set
-# CONFIG_WIZNET_BUS_INDIRECT is not set
-CONFIG_WIZNET_BUS_ANY=y
-CONFIG_WIZNET_W5100_SPI=m
-CONFIG_NET_VENDOR_XILINX=y
-CONFIG_XILINX_AXI_EMAC=m
-CONFIG_XILINX_LL_TEMAC=m
-CONFIG_NET_VENDOR_XIRCOM=y
-CONFIG_PCMCIA_XIRC2PS=m
-CONFIG_FDDI=m
-CONFIG_DEFXX=m
-CONFIG_DEFXX_MMIO=y
-CONFIG_SKFP=m
-# CONFIG_HIPPI is not set
-CONFIG_NET_SB1000=m
-CONFIG_MDIO_DEVICE=m
-CONFIG_MDIO_BUS=m
-CONFIG_MDIO_BCM_UNIMAC=m
-CONFIG_MDIO_BITBANG=m
-CONFIG_MDIO_BUS_MUX=m
-CONFIG_MDIO_BUS_MUX_GPIO=m
-CONFIG_MDIO_BUS_MUX_MMIOREG=m
-CONFIG_MDIO_BUS_MUX_MULTIPLEXER=m
-CONFIG_MDIO_CAVIUM=m
-CONFIG_MDIO_GPIO=m
-CONFIG_MDIO_HISI_FEMAC=m
-CONFIG_MDIO_I2C=m
-CONFIG_MDIO_IPQ8064=m
-CONFIG_MDIO_MSCC_MIIM=m
-CONFIG_MDIO_MVUSB=m
-CONFIG_MDIO_OCTEON=m
-CONFIG_MDIO_THUNDER=m
-CONFIG_MDIO_XPCS=m
-CONFIG_PHYLINK=m
-CONFIG_PHYLIB=m
-CONFIG_SWPHY=y
-CONFIG_LED_TRIGGER_PHY=y
-
-#
-# MII PHY device drivers
-#
-CONFIG_SFP=m
-CONFIG_ADIN_PHY=m
-CONFIG_AMD_PHY=m
-CONFIG_AQUANTIA_PHY=m
-CONFIG_AX88796B_PHY=m
-CONFIG_BCM7XXX_PHY=m
-CONFIG_BCM87XX_PHY=m
-CONFIG_BCM_NET_PHYLIB=m
-CONFIG_BROADCOM_PHY=m
-CONFIG_BCM84881_PHY=m
-CONFIG_CICADA_PHY=m
-CONFIG_CORTINA_PHY=m
-CONFIG_DAVICOM_PHY=m
-CONFIG_DP83822_PHY=m
-CONFIG_DP83TC811_PHY=m
-CONFIG_DP83848_PHY=m
-CONFIG_DP83867_PHY=m
-CONFIG_DP83869_PHY=m
-CONFIG_FIXED_PHY=m
-CONFIG_ICPLUS_PHY=m
-CONFIG_INTEL_XWAY_PHY=m
-CONFIG_LSI_ET1011C_PHY=m
-CONFIG_LXT_PHY=m
-CONFIG_MARVELL_PHY=m
-CONFIG_MARVELL_10G_PHY=m
-CONFIG_MICREL_PHY=m
-CONFIG_MICROCHIP_PHY=m
-CONFIG_MICROCHIP_T1_PHY=m
-CONFIG_MICROSEMI_PHY=m
-CONFIG_NATIONAL_PHY=m
-CONFIG_NXP_TJA11XX_PHY=m
-CONFIG_AT803X_PHY=m
-CONFIG_QSEMI_PHY=m
-CONFIG_REALTEK_PHY=m
-CONFIG_RENESAS_PHY=m
-CONFIG_ROCKCHIP_PHY=m
-CONFIG_SMSC_PHY=m
-CONFIG_STE10XP=m
-CONFIG_TERANETICS_PHY=m
-CONFIG_VITESSE_PHY=m
-CONFIG_XILINX_GMII2RGMII=m
-CONFIG_MICREL_KS8995MA=m
-CONFIG_PLIP=m
-CONFIG_PPP=m
-CONFIG_PPP_BSDCOMP=m
-CONFIG_PPP_DEFLATE=m
-CONFIG_PPP_FILTER=y
-CONFIG_PPP_MPPE=m
-CONFIG_PPP_MULTILINK=y
-CONFIG_PPPOATM=m
-CONFIG_PPPOE=m
-CONFIG_PPTP=m
-CONFIG_PPPOL2TP=m
-CONFIG_PPP_ASYNC=m
-CONFIG_PPP_SYNC_TTY=m
-CONFIG_SLIP=m
-CONFIG_SLHC=m
-CONFIG_SLIP_COMPRESSED=y
-CONFIG_SLIP_SMART=y
-CONFIG_SLIP_MODE_SLIP6=y
-CONFIG_USB_NET_DRIVERS=m
-CONFIG_USB_CATC=m
-CONFIG_USB_KAWETH=m
-CONFIG_USB_PEGASUS=m
-CONFIG_USB_RTL8150=m
-CONFIG_USB_RTL8152=m
-CONFIG_USB_LAN78XX=m
-CONFIG_USB_USBNET=m
-CONFIG_USB_NET_AX8817X=m
-CONFIG_USB_NET_AX88179_178A=m
-CONFIG_USB_NET_CDCETHER=m
-CONFIG_USB_NET_CDC_EEM=m
-CONFIG_USB_NET_CDC_NCM=m
-CONFIG_USB_NET_HUAWEI_CDC_NCM=m
-CONFIG_USB_NET_CDC_MBIM=m
-CONFIG_USB_NET_DM9601=m
-CONFIG_USB_NET_SR9700=m
-CONFIG_USB_NET_SR9800=m
-CONFIG_USB_NET_SMSC75XX=m
-CONFIG_USB_NET_SMSC95XX=m
-CONFIG_USB_NET_GL620A=m
-CONFIG_USB_NET_NET1080=m
-CONFIG_USB_NET_PLUSB=m
-CONFIG_USB_NET_MCS7830=m
-CONFIG_USB_NET_RNDIS_HOST=m
-CONFIG_USB_NET_CDC_SUBSET_ENABLE=m
-CONFIG_USB_NET_CDC_SUBSET=m
-CONFIG_USB_ALI_M5632=y
-CONFIG_USB_AN2720=y
-CONFIG_USB_BELKIN=y
-CONFIG_USB_ARMLINUX=y
-CONFIG_USB_EPSON2888=y
-CONFIG_USB_KC2190=y
-CONFIG_USB_NET_ZAURUS=m
-CONFIG_USB_NET_CX82310_ETH=m
-CONFIG_USB_NET_KALMIA=m
-CONFIG_USB_NET_QMI_WWAN=m
-CONFIG_USB_HSO=m
-CONFIG_USB_NET_INT51X1=m
-CONFIG_USB_CDC_PHONET=m
-CONFIG_USB_IPHETH=m
-CONFIG_USB_SIERRA_NET=m
-CONFIG_USB_VL600=m
-CONFIG_USB_NET_CH9200=m
-CONFIG_USB_NET_AQC111=m
-CONFIG_WLAN=y
-# CONFIG_WIRELESS_WDS is not set
-CONFIG_WLAN_VENDOR_ADMTEK=y
-CONFIG_ADM8211=m
-CONFIG_ATH_COMMON=m
-CONFIG_WLAN_VENDOR_ATH=y
-# CONFIG_ATH_DEBUG is not set
-CONFIG_ATH5K=m
-CONFIG_ATH5K_DEBUG=y
-CONFIG_ATH5K_TRACER=y
-CONFIG_ATH5K_PCI=y
-CONFIG_ATH9K_HW=m
-CONFIG_ATH9K_COMMON=m
-CONFIG_ATH9K_COMMON_DEBUG=y
-CONFIG_ATH9K_BTCOEX_SUPPORT=y
-CONFIG_ATH9K=m
-CONFIG_ATH9K_PCI=y
-CONFIG_ATH9K_AHB=y
-CONFIG_ATH9K_DEBUGFS=y
-CONFIG_ATH9K_STATION_STATISTICS=y
-CONFIG_ATH9K_DYNACK=y
-CONFIG_ATH9K_WOW=y
-CONFIG_ATH9K_RFKILL=y
-CONFIG_ATH9K_CHANNEL_CONTEXT=y
-CONFIG_ATH9K_PCOEM=y
-CONFIG_ATH9K_PCI_NO_EEPROM=m
-CONFIG_ATH9K_HTC=m
-CONFIG_ATH9K_HTC_DEBUGFS=y
-CONFIG_ATH9K_HWRNG=y
-CONFIG_ATH9K_COMMON_SPECTRAL=y
-CONFIG_CARL9170=m
-CONFIG_CARL9170_LEDS=y
-CONFIG_CARL9170_DEBUGFS=y
-CONFIG_CARL9170_WPC=y
-# CONFIG_CARL9170_HWRNG is not set
-CONFIG_ATH6KL=m
-CONFIG_ATH6KL_SDIO=m
-CONFIG_ATH6KL_USB=m
-CONFIG_ATH6KL_DEBUG=y
-CONFIG_ATH6KL_TRACING=y
-CONFIG_AR5523=m
-CONFIG_WIL6210=m
-CONFIG_WIL6210_ISR_COR=y
-CONFIG_WIL6210_TRACING=y
-CONFIG_WIL6210_DEBUGFS=y
-CONFIG_ATH10K=m
-CONFIG_ATH10K_CE=y
-CONFIG_ATH10K_PCI=m
-CONFIG_ATH10K_AHB=y
-CONFIG_ATH10K_SDIO=m
-CONFIG_ATH10K_USB=m
-CONFIG_ATH10K_DEBUG=y
-CONFIG_ATH10K_DEBUGFS=y
-CONFIG_ATH10K_SPECTRAL=y
-CONFIG_ATH10K_TRACING=y
-CONFIG_WCN36XX=m
-CONFIG_WCN36XX_DEBUGFS=y
-CONFIG_WLAN_VENDOR_ATMEL=y
-CONFIG_ATMEL=m
-CONFIG_PCI_ATMEL=m
-CONFIG_PCMCIA_ATMEL=m
-CONFIG_AT76C50X_USB=m
-CONFIG_WLAN_VENDOR_BROADCOM=y
-CONFIG_B43=m
-CONFIG_B43_BCMA=y
-CONFIG_B43_SSB=y
-CONFIG_B43_BUSES_BCMA_AND_SSB=y
-# CONFIG_B43_BUSES_BCMA is not set
-# CONFIG_B43_BUSES_SSB is not set
-CONFIG_B43_PCI_AUTOSELECT=y
-CONFIG_B43_PCICORE_AUTOSELECT=y
-CONFIG_B43_SDIO=y
-CONFIG_B43_BCMA_PIO=y
-CONFIG_B43_PIO=y
-CONFIG_B43_PHY_G=y
-CONFIG_B43_PHY_N=y
-CONFIG_B43_PHY_LP=y
-CONFIG_B43_PHY_HT=y
-CONFIG_B43_LEDS=y
-CONFIG_B43_HWRNG=y
-# CONFIG_B43_DEBUG is not set
-CONFIG_B43LEGACY=m
-CONFIG_B43LEGACY_PCI_AUTOSELECT=y
-CONFIG_B43LEGACY_PCICORE_AUTOSELECT=y
-CONFIG_B43LEGACY_LEDS=y
-CONFIG_B43LEGACY_HWRNG=y
-CONFIG_B43LEGACY_DEBUG=y
-CONFIG_B43LEGACY_DMA=y
-CONFIG_B43LEGACY_PIO=y
-CONFIG_B43LEGACY_DMA_AND_PIO_MODE=y
-# CONFIG_B43LEGACY_DMA_MODE is not set
-# CONFIG_B43LEGACY_PIO_MODE is not set
-CONFIG_BRCMUTIL=m
-CONFIG_BRCMSMAC=m
-CONFIG_BRCMFMAC=m
-CONFIG_BRCMFMAC_PROTO_BCDC=y
-CONFIG_BRCMFMAC_PROTO_MSGBUF=y
-CONFIG_BRCMFMAC_SDIO=y
-CONFIG_BRCMFMAC_USB=y
-CONFIG_BRCMFMAC_PCIE=y
-CONFIG_BRCM_TRACING=y
-CONFIG_BRCMDBG=y
-CONFIG_WLAN_VENDOR_CISCO=y
-CONFIG_AIRO=m
-CONFIG_AIRO_CS=m
-CONFIG_WLAN_VENDOR_INTEL=y
-CONFIG_IPW2100=m
-CONFIG_IPW2100_MONITOR=y
-# CONFIG_IPW2100_DEBUG is not set
-CONFIG_IPW2200=m
-CONFIG_IPW2200_MONITOR=y
-CONFIG_IPW2200_RADIOTAP=y
-CONFIG_IPW2200_PROMISCUOUS=y
-CONFIG_IPW2200_QOS=y
-# CONFIG_IPW2200_DEBUG is not set
-CONFIG_LIBIPW=m
-# CONFIG_LIBIPW_DEBUG is not set
-CONFIG_IWLEGACY=m
-CONFIG_IWL4965=m
-CONFIG_IWL3945=m
-
-#
-# iwl3945 / iwl4965 Debugging Options
-#
-CONFIG_IWLEGACY_DEBUG=y
-CONFIG_IWLEGACY_DEBUGFS=y
-# end of iwl3945 / iwl4965 Debugging Options
-
-CONFIG_IWLWIFI=m
-CONFIG_IWLWIFI_LEDS=y
-CONFIG_IWLDVM=m
-CONFIG_IWLMVM=m
-CONFIG_IWLWIFI_OPMODE_MODULAR=y
-# CONFIG_IWLWIFI_BCAST_FILTERING is not set
-
-#
-# Debugging Options
-#
-CONFIG_IWLWIFI_DEBUG=y
-CONFIG_IWLWIFI_DEBUGFS=y
-CONFIG_IWLWIFI_DEVICE_TRACING=y
-# end of Debugging Options
-
-CONFIG_WLAN_VENDOR_INTERSIL=y
-CONFIG_HOSTAP=m
-CONFIG_HOSTAP_FIRMWARE=y
-CONFIG_HOSTAP_FIRMWARE_NVRAM=y
-CONFIG_HOSTAP_PLX=m
-CONFIG_HOSTAP_PCI=m
-CONFIG_HOSTAP_CS=m
-CONFIG_HERMES=m
-CONFIG_HERMES_PRISM=y
-CONFIG_HERMES_CACHE_FW_ON_INIT=y
-CONFIG_PLX_HERMES=m
-CONFIG_TMD_HERMES=m
-CONFIG_NORTEL_HERMES=m
-CONFIG_PCI_HERMES=m
-CONFIG_PCMCIA_HERMES=m
-CONFIG_PCMCIA_SPECTRUM=m
-CONFIG_ORINOCO_USB=m
-CONFIG_P54_COMMON=m
-CONFIG_P54_USB=m
-CONFIG_P54_PCI=m
-CONFIG_P54_SPI=m
-# CONFIG_P54_SPI_DEFAULT_EEPROM is not set
-CONFIG_P54_LEDS=y
-CONFIG_PRISM54=m
-CONFIG_WLAN_VENDOR_MARVELL=y
-CONFIG_LIBERTAS=m
-CONFIG_LIBERTAS_USB=m
-CONFIG_LIBERTAS_CS=m
-CONFIG_LIBERTAS_SDIO=m
-CONFIG_LIBERTAS_SPI=m
-# CONFIG_LIBERTAS_DEBUG is not set
-CONFIG_LIBERTAS_MESH=y
-CONFIG_LIBERTAS_THINFIRM=m
-# CONFIG_LIBERTAS_THINFIRM_DEBUG is not set
-CONFIG_LIBERTAS_THINFIRM_USB=m
-CONFIG_MWIFIEX=m
-CONFIG_MWIFIEX_SDIO=m
-CONFIG_MWIFIEX_PCIE=m
-CONFIG_MWIFIEX_USB=m
-CONFIG_MWL8K=m
-CONFIG_WLAN_VENDOR_MEDIATEK=y
-CONFIG_MT7601U=m
-CONFIG_MT76_CORE=m
-CONFIG_MT76_LEDS=y
-CONFIG_MT76_USB=m
-CONFIG_MT76x02_LIB=m
-CONFIG_MT76x02_USB=m
-CONFIG_MT76x0_COMMON=m
-CONFIG_MT76x0U=m
-CONFIG_MT76x0E=m
-CONFIG_MT76x2_COMMON=m
-CONFIG_MT76x2E=m
-CONFIG_MT76x2U=m
-CONFIG_MT7603E=m
-CONFIG_MT7615E=m
-CONFIG_WLAN_VENDOR_RALINK=y
-CONFIG_RT2X00=m
-CONFIG_RT2400PCI=m
-CONFIG_RT2500PCI=m
-CONFIG_RT61PCI=m
-CONFIG_RT2800PCI=m
-CONFIG_RT2800PCI_RT33XX=y
-CONFIG_RT2800PCI_RT35XX=y
-CONFIG_RT2800PCI_RT53XX=y
-CONFIG_RT2800PCI_RT3290=y
-CONFIG_RT2500USB=m
-CONFIG_RT73USB=m
-CONFIG_RT2800USB=m
-CONFIG_RT2800USB_RT33XX=y
-CONFIG_RT2800USB_RT35XX=y
-CONFIG_RT2800USB_RT3573=y
-CONFIG_RT2800USB_RT53XX=y
-CONFIG_RT2800USB_RT55XX=y
-CONFIG_RT2800USB_UNKNOWN=y
-CONFIG_RT2800_LIB=m
-CONFIG_RT2800_LIB_MMIO=m
-CONFIG_RT2X00_LIB_MMIO=m
-CONFIG_RT2X00_LIB_PCI=m
-CONFIG_RT2X00_LIB_USB=m
-CONFIG_RT2X00_LIB=m
-CONFIG_RT2X00_LIB_FIRMWARE=y
-CONFIG_RT2X00_LIB_CRYPTO=y
-CONFIG_RT2X00_LIB_LEDS=y
-CONFIG_RT2X00_LIB_DEBUGFS=y
-# CONFIG_RT2X00_DEBUG is not set
-CONFIG_WLAN_VENDOR_REALTEK=y
-CONFIG_RTL8180=m
-CONFIG_RTL8187=m
-CONFIG_RTL8187_LEDS=y
-CONFIG_RTL_CARDS=m
-CONFIG_RTL8192CE=m
-CONFIG_RTL8192SE=m
-CONFIG_RTL8192DE=m
-CONFIG_RTL8723AE=m
-CONFIG_RTL8723BE=m
-CONFIG_RTL8188EE=m
-CONFIG_RTL8192EE=m
-CONFIG_RTL8821AE=m
-CONFIG_RTL8192CU=m
-CONFIG_RTLWIFI=m
-CONFIG_RTLWIFI_PCI=m
-CONFIG_RTLWIFI_USB=m
-CONFIG_RTLWIFI_DEBUG=y
-CONFIG_RTL8192C_COMMON=m
-CONFIG_RTL8723_COMMON=m
-CONFIG_RTLBTCOEXIST=m
-CONFIG_RTL8XXXU=m
-CONFIG_RTL8XXXU_UNTESTED=y
-CONFIG_RTW88=m
-CONFIG_RTW88_CORE=m
-CONFIG_RTW88_PCI=m
-CONFIG_RTW88_8822BE=y
-CONFIG_RTW88_8822CE=y
-CONFIG_RTW88_DEBUG=y
-CONFIG_RTW88_DEBUGFS=y
-CONFIG_WLAN_VENDOR_RSI=y
-CONFIG_RSI_91X=m
-CONFIG_RSI_DEBUGFS=y
-CONFIG_RSI_SDIO=m
-CONFIG_RSI_USB=m
-CONFIG_RSI_COEX=y
-CONFIG_WLAN_VENDOR_ST=y
-CONFIG_CW1200=m
-CONFIG_CW1200_WLAN_SDIO=m
-CONFIG_CW1200_WLAN_SPI=m
-CONFIG_WLAN_VENDOR_TI=y
-CONFIG_WL1251=m
-CONFIG_WL1251_SPI=m
-CONFIG_WL1251_SDIO=m
-CONFIG_WL12XX=m
-CONFIG_WL18XX=m
-CONFIG_WLCORE=m
-CONFIG_WLCORE_SPI=m
-CONFIG_WLCORE_SDIO=m
-CONFIG_WILINK_PLATFORM_DATA=y
-CONFIG_WLAN_VENDOR_ZYDAS=y
-CONFIG_USB_ZD1201=m
-CONFIG_ZD1211RW=m
-# CONFIG_ZD1211RW_DEBUG is not set
-CONFIG_WLAN_VENDOR_QUANTENNA=y
-CONFIG_QTNFMAC=m
-CONFIG_QTNFMAC_PCIE=m
-CONFIG_PCMCIA_RAYCS=m
-CONFIG_PCMCIA_WL3501=m
-CONFIG_MAC80211_HWSIM=m
-CONFIG_USB_NET_RNDIS_WLAN=m
-CONFIG_VIRT_WIFI=m
-
-#
-# WiMAX Wireless Broadband devices
-#
-CONFIG_WIMAX_I2400M=m
-CONFIG_WIMAX_I2400M_USB=m
-CONFIG_WIMAX_I2400M_DEBUG_LEVEL=8
-# end of WiMAX Wireless Broadband devices
-
-# CONFIG_WAN is not set
-CONFIG_IEEE802154_DRIVERS=m
-CONFIG_IEEE802154_FAKELB=m
-CONFIG_IEEE802154_AT86RF230=m
-# CONFIG_IEEE802154_AT86RF230_DEBUGFS is not set
-CONFIG_IEEE802154_MRF24J40=m
-CONFIG_IEEE802154_CC2520=m
-CONFIG_IEEE802154_ATUSB=m
-CONFIG_IEEE802154_ADF7242=m
-CONFIG_IEEE802154_CA8210=m
-# CONFIG_IEEE802154_CA8210_DEBUGFS is not set
-CONFIG_IEEE802154_MCR20A=m
-CONFIG_IEEE802154_HWSIM=m
-CONFIG_XEN_NETDEV_FRONTEND=m
-CONFIG_XEN_NETDEV_BACKEND=m
-CONFIG_VMXNET3=m
-CONFIG_FUJITSU_ES=m
-CONFIG_USB4_NET=m
-CONFIG_HYPERV_NET=m
-CONFIG_NETDEVSIM=m
-CONFIG_NET_FAILOVER=m
-CONFIG_ISDN=y
-CONFIG_ISDN_CAPI=y
-CONFIG_CAPI_TRACE=y
-CONFIG_ISDN_CAPI_MIDDLEWARE=y
-CONFIG_MISDN=m
-CONFIG_MISDN_DSP=m
-CONFIG_MISDN_L1OIP=m
-
-#
-# mISDN hardware drivers
-#
-CONFIG_MISDN_HFCPCI=m
-CONFIG_MISDN_HFCMULTI=m
-CONFIG_MISDN_HFCUSB=m
-CONFIG_MISDN_AVMFRITZ=m
-CONFIG_MISDN_SPEEDFAX=m
-CONFIG_MISDN_INFINEON=m
-CONFIG_MISDN_W6692=m
-CONFIG_MISDN_NETJET=m
-CONFIG_MISDN_HDLC=m
-CONFIG_MISDN_IPAC=m
-CONFIG_MISDN_ISAR=m
-CONFIG_NVM=y
-CONFIG_NVM_PBLK=m
-# CONFIG_NVM_PBLK_DEBUG is not set
-
-#
-# Input device support
-#
-CONFIG_INPUT=y
-CONFIG_INPUT_LEDS=m
-CONFIG_INPUT_FF_MEMLESS=m
-CONFIG_INPUT_POLLDEV=m
-CONFIG_INPUT_SPARSEKMAP=m
-CONFIG_INPUT_MATRIXKMAP=m
-
-#
-# Userland interfaces
-#
-CONFIG_INPUT_MOUSEDEV=m
-CONFIG_INPUT_MOUSEDEV_PSAUX=y
-CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
-CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
-CONFIG_INPUT_JOYDEV=m
-CONFIG_INPUT_EVDEV=m
-# CONFIG_INPUT_EVBUG is not set
-
-#
-# Input Device Drivers
-#
-CONFIG_INPUT_KEYBOARD=y
-CONFIG_KEYBOARD_ADC=m
-CONFIG_KEYBOARD_ADP5520=m
-CONFIG_KEYBOARD_ADP5588=m
-CONFIG_KEYBOARD_ADP5589=m
-CONFIG_KEYBOARD_APPLESPI=m
-CONFIG_KEYBOARD_ATKBD=m
-CONFIG_KEYBOARD_QT1050=m
-CONFIG_KEYBOARD_QT1070=m
-CONFIG_KEYBOARD_QT2160=m
-CONFIG_KEYBOARD_DLINK_DIR685=m
-CONFIG_KEYBOARD_LKKBD=m
-CONFIG_KEYBOARD_GPIO=m
-CONFIG_KEYBOARD_GPIO_POLLED=m
-CONFIG_KEYBOARD_TCA6416=m
-CONFIG_KEYBOARD_TCA8418=m
-CONFIG_KEYBOARD_MATRIX=m
-CONFIG_KEYBOARD_LM8323=m
-CONFIG_KEYBOARD_LM8333=m
-CONFIG_KEYBOARD_MAX7359=m
-CONFIG_KEYBOARD_MCS=m
-CONFIG_KEYBOARD_MPR121=m
-CONFIG_KEYBOARD_NEWTON=m
-CONFIG_KEYBOARD_OPENCORES=m
-CONFIG_KEYBOARD_SAMSUNG=m
-CONFIG_KEYBOARD_STOWAWAY=m
-CONFIG_KEYBOARD_SUNKBD=m
-CONFIG_KEYBOARD_STMPE=m
-CONFIG_KEYBOARD_IQS62X=m
-CONFIG_KEYBOARD_OMAP4=m
-CONFIG_KEYBOARD_TC3589X=m
-CONFIG_KEYBOARD_TM2_TOUCHKEY=m
-CONFIG_KEYBOARD_TWL4030=m
-CONFIG_KEYBOARD_XTKBD=m
-CONFIG_KEYBOARD_CROS_EC=m
-CONFIG_KEYBOARD_CAP11XX=m
-CONFIG_KEYBOARD_BCM=m
-CONFIG_KEYBOARD_MTK_PMIC=m
-CONFIG_INPUT_MOUSE=y
-CONFIG_MOUSE_PS2=m
-CONFIG_MOUSE_PS2_ALPS=y
-CONFIG_MOUSE_PS2_BYD=y
-CONFIG_MOUSE_PS2_LOGIPS2PP=y
-CONFIG_MOUSE_PS2_SYNAPTICS=y
-CONFIG_MOUSE_PS2_SYNAPTICS_SMBUS=y
-CONFIG_MOUSE_PS2_CYPRESS=y
-CONFIG_MOUSE_PS2_LIFEBOOK=y
-CONFIG_MOUSE_PS2_TRACKPOINT=y
-CONFIG_MOUSE_PS2_ELANTECH=y
-CONFIG_MOUSE_PS2_ELANTECH_SMBUS=y
-CONFIG_MOUSE_PS2_SENTELIC=y
-CONFIG_MOUSE_PS2_TOUCHKIT=y
-CONFIG_MOUSE_PS2_FOCALTECH=y
-CONFIG_MOUSE_PS2_VMMOUSE=y
-CONFIG_MOUSE_PS2_SMBUS=y
-CONFIG_MOUSE_SERIAL=m
-CONFIG_MOUSE_APPLETOUCH=m
-CONFIG_MOUSE_BCM5974=m
-CONFIG_MOUSE_CYAPA=m
-CONFIG_MOUSE_ELAN_I2C=m
-CONFIG_MOUSE_ELAN_I2C_I2C=y
-CONFIG_MOUSE_ELAN_I2C_SMBUS=y
-CONFIG_MOUSE_VSXXXAA=m
-CONFIG_MOUSE_GPIO=m
-CONFIG_MOUSE_SYNAPTICS_I2C=m
-CONFIG_MOUSE_SYNAPTICS_USB=m
-CONFIG_INPUT_JOYSTICK=y
-CONFIG_JOYSTICK_ANALOG=m
-CONFIG_JOYSTICK_A3D=m
-CONFIG_JOYSTICK_ADI=m
-CONFIG_JOYSTICK_COBRA=m
-CONFIG_JOYSTICK_GF2K=m
-CONFIG_JOYSTICK_GRIP=m
-CONFIG_JOYSTICK_GRIP_MP=m
-CONFIG_JOYSTICK_GUILLEMOT=m
-CONFIG_JOYSTICK_INTERACT=m
-CONFIG_JOYSTICK_SIDEWINDER=m
-CONFIG_JOYSTICK_TMDC=m
-CONFIG_JOYSTICK_IFORCE=m
-CONFIG_JOYSTICK_IFORCE_USB=m
-CONFIG_JOYSTICK_IFORCE_232=m
-CONFIG_JOYSTICK_WARRIOR=m
-CONFIG_JOYSTICK_MAGELLAN=m
-CONFIG_JOYSTICK_SPACEORB=m
-CONFIG_JOYSTICK_SPACEBALL=m
-CONFIG_JOYSTICK_STINGER=m
-CONFIG_JOYSTICK_TWIDJOY=m
-CONFIG_JOYSTICK_ZHENHUA=m
-CONFIG_JOYSTICK_DB9=m
-CONFIG_JOYSTICK_GAMECON=m
-CONFIG_JOYSTICK_TURBOGRAFX=m
-CONFIG_JOYSTICK_AS5011=m
-CONFIG_JOYSTICK_JOYDUMP=m
-CONFIG_JOYSTICK_XPAD=m
-CONFIG_JOYSTICK_XPAD_FF=y
-CONFIG_JOYSTICK_XPAD_LEDS=y
-CONFIG_JOYSTICK_WALKERA0701=m
-CONFIG_JOYSTICK_PSXPAD_SPI=m
-CONFIG_JOYSTICK_PSXPAD_SPI_FF=y
-CONFIG_JOYSTICK_PXRC=m
-CONFIG_JOYSTICK_FSIA6B=m
-CONFIG_INPUT_TABLET=y
-CONFIG_TABLET_USB_ACECAD=m
-CONFIG_TABLET_USB_AIPTEK=m
-CONFIG_TABLET_USB_GTCO=m
-CONFIG_TABLET_USB_HANWANG=m
-CONFIG_TABLET_USB_KBTAB=m
-CONFIG_TABLET_USB_PEGASUS=m
-CONFIG_TABLET_SERIAL_WACOM4=m
-CONFIG_INPUT_TOUCHSCREEN=y
-CONFIG_TOUCHSCREEN_PROPERTIES=y
-CONFIG_TOUCHSCREEN_88PM860X=m
-CONFIG_TOUCHSCREEN_ADS7846=m
-CONFIG_TOUCHSCREEN_AD7877=m
-CONFIG_TOUCHSCREEN_AD7879=m
-CONFIG_TOUCHSCREEN_AD7879_I2C=m
-CONFIG_TOUCHSCREEN_AD7879_SPI=m
-CONFIG_TOUCHSCREEN_ADC=m
-CONFIG_TOUCHSCREEN_AR1021_I2C=m
-CONFIG_TOUCHSCREEN_ATMEL_MXT=m
-CONFIG_TOUCHSCREEN_ATMEL_MXT_T37=y
-CONFIG_TOUCHSCREEN_AUO_PIXCIR=m
-CONFIG_TOUCHSCREEN_BU21013=m
-CONFIG_TOUCHSCREEN_BU21029=m
-CONFIG_TOUCHSCREEN_CHIPONE_ICN8318=m
-CONFIG_TOUCHSCREEN_CHIPONE_ICN8505=m
-CONFIG_TOUCHSCREEN_CY8CTMG110=m
-CONFIG_TOUCHSCREEN_CYTTSP_CORE=m
-CONFIG_TOUCHSCREEN_CYTTSP_I2C=m
-CONFIG_TOUCHSCREEN_CYTTSP_SPI=m
-CONFIG_TOUCHSCREEN_CYTTSP4_CORE=m
-CONFIG_TOUCHSCREEN_CYTTSP4_I2C=m
-CONFIG_TOUCHSCREEN_CYTTSP4_SPI=m
-CONFIG_TOUCHSCREEN_DA9034=m
-CONFIG_TOUCHSCREEN_DA9052=m
-CONFIG_TOUCHSCREEN_DYNAPRO=m
-CONFIG_TOUCHSCREEN_HAMPSHIRE=m
-CONFIG_TOUCHSCREEN_EETI=m
-CONFIG_TOUCHSCREEN_EGALAX=m
-CONFIG_TOUCHSCREEN_EGALAX_SERIAL=m
-CONFIG_TOUCHSCREEN_EXC3000=m
-CONFIG_TOUCHSCREEN_FUJITSU=m
-CONFIG_TOUCHSCREEN_GOODIX=m
-CONFIG_TOUCHSCREEN_HIDEEP=m
-CONFIG_TOUCHSCREEN_ILI210X=m
-CONFIG_TOUCHSCREEN_S6SY761=m
-CONFIG_TOUCHSCREEN_GUNZE=m
-CONFIG_TOUCHSCREEN_EKTF2127=m
-CONFIG_TOUCHSCREEN_ELAN=m
-CONFIG_TOUCHSCREEN_ELO=m
-CONFIG_TOUCHSCREEN_WACOM_W8001=m
-CONFIG_TOUCHSCREEN_WACOM_I2C=m
-CONFIG_TOUCHSCREEN_MAX11801=m
-CONFIG_TOUCHSCREEN_MCS5000=m
-CONFIG_TOUCHSCREEN_MMS114=m
-CONFIG_TOUCHSCREEN_MELFAS_MIP4=m
-CONFIG_TOUCHSCREEN_MTOUCH=m
-CONFIG_TOUCHSCREEN_IMX6UL_TSC=m
-CONFIG_TOUCHSCREEN_INEXIO=m
-CONFIG_TOUCHSCREEN_MK712=m
-CONFIG_TOUCHSCREEN_PENMOUNT=m
-CONFIG_TOUCHSCREEN_EDT_FT5X06=m
-CONFIG_TOUCHSCREEN_TOUCHRIGHT=m
-CONFIG_TOUCHSCREEN_TOUCHWIN=m
-CONFIG_TOUCHSCREEN_TI_AM335X_TSC=m
-CONFIG_TOUCHSCREEN_UCB1400=m
-CONFIG_TOUCHSCREEN_PIXCIR=m
-CONFIG_TOUCHSCREEN_WDT87XX_I2C=m
-CONFIG_TOUCHSCREEN_WM831X=m
-CONFIG_TOUCHSCREEN_WM97XX=m
-CONFIG_TOUCHSCREEN_WM9705=y
-CONFIG_TOUCHSCREEN_WM9712=y
-CONFIG_TOUCHSCREEN_WM9713=y
-CONFIG_TOUCHSCREEN_USB_COMPOSITE=m
-CONFIG_TOUCHSCREEN_MC13783=m
-CONFIG_TOUCHSCREEN_USB_EGALAX=y
-CONFIG_TOUCHSCREEN_USB_PANJIT=y
-CONFIG_TOUCHSCREEN_USB_3M=y
-CONFIG_TOUCHSCREEN_USB_ITM=y
-CONFIG_TOUCHSCREEN_USB_ETURBO=y
-CONFIG_TOUCHSCREEN_USB_GUNZE=y
-CONFIG_TOUCHSCREEN_USB_DMC_TSC10=y
-CONFIG_TOUCHSCREEN_USB_IRTOUCH=y
-CONFIG_TOUCHSCREEN_USB_IDEALTEK=y
-CONFIG_TOUCHSCREEN_USB_GENERAL_TOUCH=y
-CONFIG_TOUCHSCREEN_USB_GOTOP=y
-CONFIG_TOUCHSCREEN_USB_JASTEC=y
-CONFIG_TOUCHSCREEN_USB_ELO=y
-CONFIG_TOUCHSCREEN_USB_E2I=y
-CONFIG_TOUCHSCREEN_USB_ZYTRONIC=y
-CONFIG_TOUCHSCREEN_USB_ETT_TC45USB=y
-CONFIG_TOUCHSCREEN_USB_NEXIO=y
-CONFIG_TOUCHSCREEN_USB_EASYTOUCH=y
-CONFIG_TOUCHSCREEN_TOUCHIT213=m
-CONFIG_TOUCHSCREEN_TSC_SERIO=m
-CONFIG_TOUCHSCREEN_TSC200X_CORE=m
-CONFIG_TOUCHSCREEN_TSC2004=m
-CONFIG_TOUCHSCREEN_TSC2005=m
-CONFIG_TOUCHSCREEN_TSC2007=m
-CONFIG_TOUCHSCREEN_TSC2007_IIO=y
-CONFIG_TOUCHSCREEN_PCAP=m
-CONFIG_TOUCHSCREEN_RM_TS=m
-CONFIG_TOUCHSCREEN_SILEAD=m
-CONFIG_TOUCHSCREEN_SIS_I2C=m
-CONFIG_TOUCHSCREEN_ST1232=m
-CONFIG_TOUCHSCREEN_STMFTS=m
-CONFIG_TOUCHSCREEN_STMPE=m
-CONFIG_TOUCHSCREEN_SUR40=m
-CONFIG_TOUCHSCREEN_SURFACE3_SPI=m
-CONFIG_TOUCHSCREEN_SX8654=m
-CONFIG_TOUCHSCREEN_TPS6507X=m
-CONFIG_TOUCHSCREEN_ZET6223=m
-CONFIG_TOUCHSCREEN_ZFORCE=m
-CONFIG_TOUCHSCREEN_COLIBRI_VF50=m
-CONFIG_TOUCHSCREEN_ROHM_BU21023=m
-CONFIG_TOUCHSCREEN_IQS5XX=m
-CONFIG_INPUT_MISC=y
-CONFIG_INPUT_88PM860X_ONKEY=m
-CONFIG_INPUT_88PM80X_ONKEY=m
-CONFIG_INPUT_AD714X=m
-CONFIG_INPUT_AD714X_I2C=m
-CONFIG_INPUT_AD714X_SPI=m
-CONFIG_INPUT_ARIZONA_HAPTICS=m
-CONFIG_INPUT_ATMEL_CAPTOUCH=m
-CONFIG_INPUT_BMA150=m
-CONFIG_INPUT_E3X0_BUTTON=m
-CONFIG_INPUT_MSM_VIBRATOR=m
-CONFIG_INPUT_PCSPKR=m
-CONFIG_INPUT_MAX77650_ONKEY=m
-CONFIG_INPUT_MAX77693_HAPTIC=m
-CONFIG_INPUT_MAX8925_ONKEY=m
-CONFIG_INPUT_MAX8997_HAPTIC=m
-CONFIG_INPUT_MC13783_PWRBUTTON=m
-CONFIG_INPUT_MMA8450=m
-CONFIG_INPUT_APANEL=m
-CONFIG_INPUT_GP2A=m
-CONFIG_INPUT_GPIO_BEEPER=m
-CONFIG_INPUT_GPIO_DECODER=m
-CONFIG_INPUT_GPIO_VIBRA=m
-CONFIG_INPUT_CPCAP_PWRBUTTON=m
-CONFIG_INPUT_ATLAS_BTNS=m
-CONFIG_INPUT_ATI_REMOTE2=m
-CONFIG_INPUT_KEYSPAN_REMOTE=m
-CONFIG_INPUT_KXTJ9=m
-CONFIG_INPUT_POWERMATE=m
-CONFIG_INPUT_YEALINK=m
-CONFIG_INPUT_CM109=m
-CONFIG_INPUT_REGULATOR_HAPTIC=m
-CONFIG_INPUT_RETU_PWRBUTTON=m
-CONFIG_INPUT_TPS65218_PWRBUTTON=m
-CONFIG_INPUT_AXP20X_PEK=m
-CONFIG_INPUT_TWL4030_PWRBUTTON=m
-CONFIG_INPUT_TWL4030_VIBRA=m
-CONFIG_INPUT_TWL6040_VIBRA=m
-CONFIG_INPUT_UINPUT=m
-CONFIG_INPUT_PALMAS_PWRBUTTON=m
-CONFIG_INPUT_PCF50633_PMU=m
-CONFIG_INPUT_PCF8574=m
-CONFIG_INPUT_PWM_BEEPER=m
-CONFIG_INPUT_PWM_VIBRA=m
-CONFIG_INPUT_RK805_PWRKEY=m
-CONFIG_INPUT_GPIO_ROTARY_ENCODER=m
-CONFIG_INPUT_DA9052_ONKEY=m
-CONFIG_INPUT_DA9055_ONKEY=m
-CONFIG_INPUT_DA9063_ONKEY=m
-CONFIG_INPUT_WM831X_ON=m
-CONFIG_INPUT_PCAP=m
-CONFIG_INPUT_ADXL34X=m
-CONFIG_INPUT_ADXL34X_I2C=m
-CONFIG_INPUT_ADXL34X_SPI=m
-CONFIG_INPUT_IMS_PCU=m
-CONFIG_INPUT_CMA3000=m
-CONFIG_INPUT_CMA3000_I2C=m
-CONFIG_INPUT_XEN_KBDDEV_FRONTEND=m
-CONFIG_INPUT_IDEAPAD_SLIDEBAR=m
-CONFIG_INPUT_SOC_BUTTON_ARRAY=m
-CONFIG_INPUT_DRV260X_HAPTICS=m
-CONFIG_INPUT_DRV2665_HAPTICS=m
-CONFIG_INPUT_DRV2667_HAPTICS=m
-CONFIG_INPUT_RAVE_SP_PWRBUTTON=m
-CONFIG_INPUT_STPMIC1_ONKEY=m
-CONFIG_RMI4_CORE=m
-CONFIG_RMI4_I2C=m
-CONFIG_RMI4_SPI=m
-CONFIG_RMI4_SMB=m
-CONFIG_RMI4_F03=y
-CONFIG_RMI4_F03_SERIO=m
-CONFIG_RMI4_2D_SENSOR=y
-CONFIG_RMI4_F11=y
-CONFIG_RMI4_F12=y
-CONFIG_RMI4_F30=y
-CONFIG_RMI4_F34=y
-# CONFIG_RMI4_F54 is not set
-CONFIG_RMI4_F55=y
-
-#
-# Hardware I/O ports
-#
-CONFIG_SERIO=m
-CONFIG_ARCH_MIGHT_HAVE_PC_SERIO=y
-CONFIG_SERIO_I8042=m
-CONFIG_SERIO_SERPORT=m
-CONFIG_SERIO_CT82C710=m
-CONFIG_SERIO_PARKBD=m
-CONFIG_SERIO_PCIPS2=m
-CONFIG_SERIO_LIBPS2=m
-CONFIG_SERIO_RAW=m
-CONFIG_SERIO_ALTERA_PS2=m
-CONFIG_SERIO_PS2MULT=m
-CONFIG_SERIO_ARC_PS2=m
-# CONFIG_SERIO_APBPS2 is not set
-CONFIG_HYPERV_KEYBOARD=m
-CONFIG_SERIO_GPIO_PS2=m
-CONFIG_USERIO=m
-CONFIG_GAMEPORT=m
-CONFIG_GAMEPORT_NS558=m
-CONFIG_GAMEPORT_L4=m
-CONFIG_GAMEPORT_EMU10K1=m
-CONFIG_GAMEPORT_FM801=m
-# end of Hardware I/O ports
-# end of Input device support
-
-#
-# Character devices
-#
-CONFIG_TTY=y
-CONFIG_VT=y
-CONFIG_CONSOLE_TRANSLATIONS=y
-CONFIG_VT_CONSOLE=y
-CONFIG_VT_CONSOLE_SLEEP=y
-CONFIG_HW_CONSOLE=y
-CONFIG_VT_HW_CONSOLE_BINDING=y
-CONFIG_UNIX98_PTYS=y
-# CONFIG_LEGACY_PTYS is not set
-CONFIG_LDISC_AUTOLOAD=y
-
-#
-# Serial drivers
-#
-CONFIG_SERIAL_EARLYCON=y
-CONFIG_SERIAL_8250=y
-# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
-CONFIG_SERIAL_8250_PNP=y
-# CONFIG_SERIAL_8250_16550A_VARIANTS is not set
-CONFIG_SERIAL_8250_FINTEK=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_DMA=y
-CONFIG_SERIAL_8250_PCI=y
-CONFIG_SERIAL_8250_EXAR=m
-CONFIG_SERIAL_8250_CS=m
-CONFIG_SERIAL_8250_MEN_MCB=m
-CONFIG_SERIAL_8250_NR_UARTS=32
-CONFIG_SERIAL_8250_RUNTIME_UARTS=4
-CONFIG_SERIAL_8250_EXTENDED=y
-CONFIG_SERIAL_8250_MANY_PORTS=y
-CONFIG_SERIAL_8250_ASPEED_VUART=m
-CONFIG_SERIAL_8250_SHARE_IRQ=y
-# CONFIG_SERIAL_8250_DETECT_IRQ is not set
-CONFIG_SERIAL_8250_RSA=y
-CONFIG_SERIAL_8250_DWLIB=y
-CONFIG_SERIAL_8250_DW=m
-CONFIG_SERIAL_8250_RT288X=y
-CONFIG_SERIAL_8250_LPSS=y
-CONFIG_SERIAL_8250_MID=y
-CONFIG_SERIAL_OF_PLATFORM=m
-
-#
-# Non-8250 serial port support
-#
-CONFIG_SERIAL_MAX3100=m
-CONFIG_SERIAL_MAX310X=m
-CONFIG_SERIAL_UARTLITE=m
-CONFIG_SERIAL_UARTLITE_NR_UARTS=1
-CONFIG_SERIAL_CORE=y
-CONFIG_SERIAL_CORE_CONSOLE=y
-CONFIG_SERIAL_JSM=m
-CONFIG_SERIAL_SIFIVE=m
-CONFIG_SERIAL_SCCNXP=m
-CONFIG_SERIAL_SC16IS7XX_CORE=m
-CONFIG_SERIAL_SC16IS7XX=m
-CONFIG_SERIAL_SC16IS7XX_I2C=y
-CONFIG_SERIAL_SC16IS7XX_SPI=y
-CONFIG_SERIAL_ALTERA_JTAGUART=m
-CONFIG_SERIAL_ALTERA_UART=m
-CONFIG_SERIAL_ALTERA_UART_MAXPORTS=4
-CONFIG_SERIAL_ALTERA_UART_BAUDRATE=115200
-CONFIG_SERIAL_IFX6X60=m
-CONFIG_SERIAL_XILINX_PS_UART=m
-CONFIG_SERIAL_ARC=m
-CONFIG_SERIAL_ARC_NR_PORTS=1
-CONFIG_SERIAL_RP2=m
-CONFIG_SERIAL_RP2_NR_UARTS=32
-CONFIG_SERIAL_FSL_LPUART=m
-CONFIG_SERIAL_FSL_LINFLEXUART=m
-CONFIG_SERIAL_CONEXANT_DIGICOLOR=m
-CONFIG_SERIAL_MEN_Z135=m
-CONFIG_SERIAL_SPRD=m
-# end of Serial drivers
-
-CONFIG_SERIAL_MCTRL_GPIO=y
-CONFIG_SERIAL_NONSTANDARD=y
-CONFIG_ROCKETPORT=m
-CONFIG_CYCLADES=m
-CONFIG_CYZ_INTR=y
-CONFIG_MOXA_INTELLIO=m
-CONFIG_MOXA_SMARTIO=m
-CONFIG_SYNCLINK=m
-CONFIG_SYNCLINKMP=m
-CONFIG_SYNCLINK_GT=m
-CONFIG_ISI=m
-CONFIG_N_HDLC=m
-CONFIG_N_GSM=m
-CONFIG_NOZOMI=m
-CONFIG_NULL_TTY=m
-CONFIG_TRACE_ROUTER=m
-CONFIG_TRACE_SINK=m
-CONFIG_HVC_DRIVER=y
-CONFIG_HVC_IRQ=y
-CONFIG_HVC_XEN=y
-CONFIG_HVC_XEN_FRONTEND=y
-CONFIG_SERIAL_DEV_BUS=y
-CONFIG_SERIAL_DEV_CTRL_TTYPORT=y
-# CONFIG_TTY_PRINTK is not set
-CONFIG_PRINTER=m
-# CONFIG_LP_CONSOLE is not set
-CONFIG_PPDEV=m
-CONFIG_VIRTIO_CONSOLE=m
-CONFIG_IPMI_HANDLER=m
-CONFIG_IPMI_DMI_DECODE=y
-CONFIG_IPMI_PLAT_DATA=y
-# CONFIG_IPMI_PANIC_EVENT is not set
-CONFIG_IPMI_DEVICE_INTERFACE=m
-CONFIG_IPMI_SI=m
-CONFIG_IPMI_SSIF=m
-CONFIG_IPMI_WATCHDOG=m
-CONFIG_IPMI_POWEROFF=m
-CONFIG_IPMB_DEVICE_INTERFACE=m
-CONFIG_HW_RANDOM=m
-CONFIG_HW_RANDOM_TIMERIOMEM=m
-CONFIG_HW_RANDOM_INTEL=m
-CONFIG_HW_RANDOM_AMD=m
-CONFIG_HW_RANDOM_VIA=m
-CONFIG_HW_RANDOM_VIRTIO=m
-CONFIG_APPLICOM=m
-
-#
-# PCMCIA character devices
-#
-CONFIG_SYNCLINK_CS=m
-CONFIG_CARDMAN_4000=m
-CONFIG_CARDMAN_4040=m
-CONFIG_SCR24X=m
-CONFIG_IPWIRELESS=m
-# end of PCMCIA character devices
-
-CONFIG_MWAVE=m
-CONFIG_DEVMEM=y
-# CONFIG_DEVKMEM is not set
-CONFIG_NVRAM=m
-CONFIG_RAW_DRIVER=m
-CONFIG_MAX_RAW_DEVS=256
-CONFIG_DEVPORT=y
-CONFIG_HPET=y
-CONFIG_HPET_MMAP=y
-CONFIG_HPET_MMAP_DEFAULT=y
-CONFIG_HANGCHECK_TIMER=m
-CONFIG_TCG_TPM=m
-CONFIG_HW_RANDOM_TPM=y
-CONFIG_TCG_TIS_CORE=m
-CONFIG_TCG_TIS=m
-CONFIG_TCG_TIS_SPI=m
-CONFIG_TCG_TIS_SPI_CR50=y
-CONFIG_TCG_TIS_I2C_ATMEL=m
-CONFIG_TCG_TIS_I2C_INFINEON=m
-CONFIG_TCG_TIS_I2C_NUVOTON=m
-CONFIG_TCG_NSC=m
-CONFIG_TCG_ATMEL=m
-CONFIG_TCG_INFINEON=m
-CONFIG_TCG_XEN=m
-CONFIG_TCG_CRB=m
-CONFIG_TCG_VTPM_PROXY=m
-CONFIG_TCG_TIS_ST33ZP24=m
-CONFIG_TCG_TIS_ST33ZP24_I2C=m
-CONFIG_TCG_TIS_ST33ZP24_SPI=m
-CONFIG_TELCLOCK=m
-CONFIG_XILLYBUS=m
-CONFIG_XILLYBUS_PCIE=m
-CONFIG_XILLYBUS_OF=m
-# end of Character devices
-
-# CONFIG_RANDOM_TRUST_CPU is not set
-# CONFIG_RANDOM_TRUST_BOOTLOADER is not set
-
-#
-# I2C support
-#
-CONFIG_I2C=y
-CONFIG_ACPI_I2C_OPREGION=y
-CONFIG_I2C_BOARDINFO=y
-CONFIG_I2C_COMPAT=y
-CONFIG_I2C_CHARDEV=m
-CONFIG_I2C_MUX=m
-
-#
-# Multiplexer I2C Chip support
-#
-CONFIG_I2C_ARB_GPIO_CHALLENGE=m
-CONFIG_I2C_MUX_GPIO=m
-CONFIG_I2C_MUX_GPMUX=m
-CONFIG_I2C_MUX_LTC4306=m
-CONFIG_I2C_MUX_PCA9541=m
-CONFIG_I2C_MUX_PCA954x=m
-CONFIG_I2C_MUX_PINCTRL=m
-CONFIG_I2C_MUX_REG=m
-CONFIG_I2C_DEMUX_PINCTRL=m
-CONFIG_I2C_MUX_MLXCPLD=m
-# end of Multiplexer I2C Chip support
-
-CONFIG_I2C_HELPER_AUTO=y
-CONFIG_I2C_SMBUS=m
-CONFIG_I2C_ALGOBIT=m
-CONFIG_I2C_ALGOPCA=m
-
-#
-# I2C Hardware Bus support
-#
-
-#
-# PC SMBus host controller drivers
-#
-CONFIG_I2C_ALI1535=m
-CONFIG_I2C_ALI1563=m
-CONFIG_I2C_ALI15X3=m
-CONFIG_I2C_AMD756=m
-CONFIG_I2C_AMD756_S4882=m
-CONFIG_I2C_AMD8111=m
-CONFIG_I2C_AMD_MP2=m
-CONFIG_I2C_I801=m
-CONFIG_I2C_ISCH=m
-CONFIG_I2C_ISMT=m
-CONFIG_I2C_PIIX4=m
-CONFIG_I2C_CHT_WC=m
-CONFIG_I2C_NFORCE2=m
-CONFIG_I2C_NFORCE2_S4985=m
-CONFIG_I2C_NVIDIA_GPU=m
-CONFIG_I2C_SIS5595=m
-CONFIG_I2C_SIS630=m
-CONFIG_I2C_SIS96X=m
-CONFIG_I2C_VIA=m
-CONFIG_I2C_VIAPRO=m
-
-#
-# ACPI drivers
-#
-CONFIG_I2C_SCMI=m
-
-#
-# I2C system bus drivers (mostly embedded / system-on-chip)
-#
-CONFIG_I2C_CBUS_GPIO=m
-CONFIG_I2C_DESIGNWARE_CORE=y
-CONFIG_I2C_DESIGNWARE_PLATFORM=y
-CONFIG_I2C_DESIGNWARE_SLAVE=y
-CONFIG_I2C_DESIGNWARE_PCI=m
-CONFIG_I2C_DESIGNWARE_BAYTRAIL=y
-CONFIG_I2C_EMEV2=m
-CONFIG_I2C_GPIO=m
-# CONFIG_I2C_GPIO_FAULT_INJECTOR is not set
-CONFIG_I2C_KEMPLD=m
-CONFIG_I2C_OCORES=m
-CONFIG_I2C_PCA_PLATFORM=m
-CONFIG_I2C_RK3X=m
-CONFIG_I2C_SIMTEC=m
-CONFIG_I2C_XILINX=m
-
-#
-# External I2C/SMBus adapter drivers
-#
-CONFIG_I2C_DIOLAN_U2C=m
-CONFIG_I2C_DLN2=m
-CONFIG_I2C_PARPORT=m
-CONFIG_I2C_ROBOTFUZZ_OSIF=m
-CONFIG_I2C_TAOS_EVM=m
-CONFIG_I2C_TINY_USB=m
-CONFIG_I2C_VIPERBOARD=m
-
-#
-# Other I2C/SMBus bus drivers
-#
-CONFIG_I2C_MLXCPLD=m
-CONFIG_I2C_CROS_EC_TUNNEL=m
-CONFIG_I2C_FSI=m
-# end of I2C Hardware Bus support
-
-CONFIG_I2C_STUB=m
-CONFIG_I2C_SLAVE=y
-CONFIG_I2C_SLAVE_EEPROM=m
-# CONFIG_I2C_DEBUG_CORE is not set
-# CONFIG_I2C_DEBUG_ALGO is not set
-# CONFIG_I2C_DEBUG_BUS is not set
-# end of I2C support
-
-CONFIG_I3C=m
-CONFIG_CDNS_I3C_MASTER=m
-CONFIG_DW_I3C_MASTER=m
-CONFIG_SPI=y
-# CONFIG_SPI_DEBUG is not set
-CONFIG_SPI_MASTER=y
-CONFIG_SPI_MEM=y
-
-#
-# SPI Master Controller Drivers
-#
-CONFIG_SPI_ALTERA=m
-CONFIG_SPI_AXI_SPI_ENGINE=m
-CONFIG_SPI_BITBANG=m
-CONFIG_SPI_BUTTERFLY=m
-CONFIG_SPI_CADENCE=m
-CONFIG_SPI_DESIGNWARE=m
-CONFIG_SPI_DW_PCI=m
-CONFIG_SPI_DW_MID_DMA=y
-CONFIG_SPI_DW_MMIO=m
-CONFIG_SPI_DLN2=m
-CONFIG_SPI_FSI=m
-CONFIG_SPI_NXP_FLEXSPI=m
-CONFIG_SPI_GPIO=m
-CONFIG_SPI_LM70_LLP=m
-CONFIG_SPI_FSL_LIB=m
-CONFIG_SPI_FSL_SPI=m
-CONFIG_SPI_OC_TINY=m
-CONFIG_SPI_PXA2XX=m
-CONFIG_SPI_PXA2XX_PCI=m
-CONFIG_SPI_ROCKCHIP=m
-CONFIG_SPI_SC18IS602=m
-CONFIG_SPI_SIFIVE=m
-CONFIG_SPI_MXIC=m
-CONFIG_SPI_XCOMM=m
-CONFIG_SPI_XILINX=m
-CONFIG_SPI_ZYNQMP_GQSPI=m
-
-#
-# SPI Multiplexer support
-#
-CONFIG_SPI_MUX=m
-
-#
-# SPI Protocol Masters
-#
-CONFIG_SPI_SPIDEV=m
-CONFIG_SPI_LOOPBACK_TEST=m
-CONFIG_SPI_TLE62X0=m
-CONFIG_SPI_SLAVE=y
-CONFIG_SPI_SLAVE_TIME=m
-CONFIG_SPI_SLAVE_SYSTEM_CONTROL=m
-CONFIG_SPMI=m
-CONFIG_HSI=m
-CONFIG_HSI_BOARDINFO=y
-
-#
-# HSI controllers
-#
-
-#
-# HSI clients
-#
-CONFIG_HSI_CHAR=m
-CONFIG_PPS=y
-# CONFIG_PPS_DEBUG is not set
-
-#
-# PPS clients support
-#
-CONFIG_PPS_CLIENT_KTIMER=m
-CONFIG_PPS_CLIENT_LDISC=m
-CONFIG_PPS_CLIENT_PARPORT=m
-CONFIG_PPS_CLIENT_GPIO=m
-
-#
-# PPS generators support
-#
-
-#
-# PTP clock support
-#
-CONFIG_PTP_1588_CLOCK=y
-CONFIG_DP83640_PHY=m
-CONFIG_PTP_1588_CLOCK_INES=m
-CONFIG_PTP_1588_CLOCK_KVM=m
-CONFIG_PTP_1588_CLOCK_IDT82P33=m
-CONFIG_PTP_1588_CLOCK_IDTCM=m
-CONFIG_PTP_1588_CLOCK_VMW=m
-# end of PTP clock support
-
-CONFIG_PINCTRL=y
-CONFIG_GENERIC_PINCTRL_GROUPS=y
-CONFIG_PINMUX=y
-CONFIG_GENERIC_PINMUX_FUNCTIONS=y
-CONFIG_PINCONF=y
-CONFIG_GENERIC_PINCONF=y
-# CONFIG_DEBUG_PINCTRL is not set
-CONFIG_PINCTRL_AS3722=m
-CONFIG_PINCTRL_AXP209=m
-CONFIG_PINCTRL_AMD=m
-CONFIG_PINCTRL_DA9062=m
-CONFIG_PINCTRL_MCP23S08=m
-CONFIG_PINCTRL_SINGLE=m
-CONFIG_PINCTRL_SX150X=y
-CONFIG_PINCTRL_STMFX=m
-CONFIG_PINCTRL_MAX77620=m
-CONFIG_PINCTRL_PALMAS=m
-CONFIG_PINCTRL_RK805=m
-CONFIG_PINCTRL_OCELOT=y
-CONFIG_PINCTRL_BAYTRAIL=y
-CONFIG_PINCTRL_CHERRYVIEW=y
-CONFIG_PINCTRL_LYNXPOINT=y
-CONFIG_PINCTRL_INTEL=y
-CONFIG_PINCTRL_BROXTON=y
-CONFIG_PINCTRL_CANNONLAKE=y
-CONFIG_PINCTRL_CEDARFORK=y
-CONFIG_PINCTRL_DENVERTON=y
-CONFIG_PINCTRL_GEMINILAKE=y
-CONFIG_PINCTRL_ICELAKE=y
-CONFIG_PINCTRL_LEWISBURG=y
-CONFIG_PINCTRL_SUNRISEPOINT=y
-CONFIG_PINCTRL_TIGERLAKE=y
-CONFIG_PINCTRL_LOCHNAGAR=m
-CONFIG_PINCTRL_MADERA=m
-CONFIG_PINCTRL_CS47L15=y
-CONFIG_PINCTRL_CS47L35=y
-CONFIG_PINCTRL_CS47L85=y
-CONFIG_PINCTRL_CS47L90=y
-CONFIG_PINCTRL_CS47L92=y
-CONFIG_PINCTRL_EQUILIBRIUM=m
-CONFIG_GPIOLIB=y
-CONFIG_GPIOLIB_FASTPATH_LIMIT=512
-CONFIG_OF_GPIO=y
-CONFIG_GPIO_ACPI=y
-CONFIG_GPIOLIB_IRQCHIP=y
-# CONFIG_DEBUG_GPIO is not set
-CONFIG_GPIO_SYSFS=y
-CONFIG_GPIO_GENERIC=y
-CONFIG_GPIO_MAX730X=m
-
-#
-# Memory mapped GPIO drivers
-#
-CONFIG_GPIO_74XX_MMIO=m
-CONFIG_GPIO_ALTERA=m
-CONFIG_GPIO_AMDPT=m
-CONFIG_GPIO_CADENCE=m
-CONFIG_GPIO_DWAPB=m
-CONFIG_GPIO_EXAR=m
-CONFIG_GPIO_FTGPIO010=y
-CONFIG_GPIO_GENERIC_PLATFORM=m
-CONFIG_GPIO_GRGPIO=m
-CONFIG_GPIO_HLWD=m
-CONFIG_GPIO_ICH=m
-CONFIG_GPIO_LOGICVC=m
-CONFIG_GPIO_MB86S7X=m
-CONFIG_GPIO_MENZ127=m
-CONFIG_GPIO_SAMA5D2_PIOBU=m
-CONFIG_GPIO_SIFIVE=y
-CONFIG_GPIO_SIOX=m
-CONFIG_GPIO_SYSCON=m
-CONFIG_GPIO_VX855=m
-CONFIG_GPIO_WCD934X=m
-CONFIG_GPIO_XILINX=m
-CONFIG_GPIO_AMD_FCH=m
-# end of Memory mapped GPIO drivers
-
-#
-# Port-mapped I/O GPIO drivers
-#
-CONFIG_GPIO_F7188X=m
-CONFIG_GPIO_IT87=m
-CONFIG_GPIO_SCH=m
-CONFIG_GPIO_SCH311X=m
-CONFIG_GPIO_WINBOND=m
-CONFIG_GPIO_WS16C48=m
-# end of Port-mapped I/O GPIO drivers
-
-#
-# I2C GPIO expanders
-#
-CONFIG_GPIO_ADP5588=m
-CONFIG_GPIO_ADNP=m
-CONFIG_GPIO_GW_PLD=m
-CONFIG_GPIO_MAX7300=m
-CONFIG_GPIO_MAX732X=m
-CONFIG_GPIO_PCA953X=m
-CONFIG_GPIO_PCF857X=m
-CONFIG_GPIO_TPIC2810=m
-# end of I2C GPIO expanders
-
-#
-# MFD GPIO expanders
-#
-CONFIG_GPIO_ADP5520=m
-CONFIG_GPIO_ARIZONA=m
-CONFIG_GPIO_BD70528=m
-CONFIG_GPIO_BD71828=m
-CONFIG_GPIO_BD9571MWV=m
-CONFIG_GPIO_CRYSTAL_COVE=m
-CONFIG_GPIO_DA9052=m
-CONFIG_GPIO_DA9055=m
-CONFIG_GPIO_DLN2=m
-CONFIG_GPIO_JANZ_TTL=m
-CONFIG_GPIO_KEMPLD=m
-CONFIG_GPIO_LP3943=m
-CONFIG_GPIO_LP873X=m
-CONFIG_GPIO_LP87565=m
-CONFIG_GPIO_MADERA=m
-CONFIG_GPIO_MAX77620=m
-CONFIG_GPIO_MAX77650=m
-CONFIG_GPIO_PALMAS=y
-CONFIG_GPIO_RC5T583=y
-CONFIG_GPIO_STMPE=y
-CONFIG_GPIO_TC3589X=y
-CONFIG_GPIO_TPS65086=m
-CONFIG_GPIO_TPS65218=m
-CONFIG_GPIO_TPS6586X=y
-CONFIG_GPIO_TPS65910=y
-CONFIG_GPIO_TPS65912=m
-CONFIG_GPIO_TPS68470=y
-CONFIG_GPIO_TQMX86=m
-CONFIG_GPIO_TWL4030=m
-CONFIG_GPIO_TWL6040=m
-CONFIG_GPIO_UCB1400=m
-CONFIG_GPIO_WHISKEY_COVE=m
-CONFIG_GPIO_WM831X=m
-CONFIG_GPIO_WM8350=m
-CONFIG_GPIO_WM8994=m
-# end of MFD GPIO expanders
-
-#
-# PCI GPIO expanders
-#
-CONFIG_GPIO_AMD8111=m
-CONFIG_GPIO_ML_IOH=m
-CONFIG_GPIO_PCI_IDIO_16=m
-CONFIG_GPIO_PCIE_IDIO_24=m
-CONFIG_GPIO_RDC321X=m
-CONFIG_GPIO_SODAVILLE=y
-# end of PCI GPIO expanders
-
-#
-# SPI GPIO expanders
-#
-CONFIG_GPIO_74X164=m
-CONFIG_GPIO_MAX3191X=m
-CONFIG_GPIO_MAX7301=m
-CONFIG_GPIO_MC33880=m
-CONFIG_GPIO_PISOSR=m
-CONFIG_GPIO_XRA1403=m
-CONFIG_GPIO_MOXTET=m
-# end of SPI GPIO expanders
-
-#
-# USB GPIO expanders
-#
-CONFIG_GPIO_VIPERBOARD=m
-# end of USB GPIO expanders
-
-CONFIG_GPIO_MOCKUP=m
-CONFIG_W1=m
-CONFIG_W1_CON=y
-
-#
-# 1-wire Bus Masters
-#
-CONFIG_W1_MASTER_MATROX=m
-CONFIG_W1_MASTER_DS2490=m
-CONFIG_W1_MASTER_DS2482=m
-CONFIG_W1_MASTER_DS1WM=m
-CONFIG_W1_MASTER_GPIO=m
-CONFIG_W1_MASTER_SGI=m
-# end of 1-wire Bus Masters
-
-#
-# 1-wire Slaves
-#
-CONFIG_W1_SLAVE_THERM=m
-CONFIG_W1_SLAVE_SMEM=m
-CONFIG_W1_SLAVE_DS2405=m
-CONFIG_W1_SLAVE_DS2408=m
-# CONFIG_W1_SLAVE_DS2408_READBACK is not set
-CONFIG_W1_SLAVE_DS2413=m
-CONFIG_W1_SLAVE_DS2406=m
-CONFIG_W1_SLAVE_DS2423=m
-CONFIG_W1_SLAVE_DS2805=m
-CONFIG_W1_SLAVE_DS2430=m
-CONFIG_W1_SLAVE_DS2431=m
-CONFIG_W1_SLAVE_DS2433=m
-# CONFIG_W1_SLAVE_DS2433_CRC is not set
-CONFIG_W1_SLAVE_DS2438=m
-CONFIG_W1_SLAVE_DS250X=m
-CONFIG_W1_SLAVE_DS2780=m
-CONFIG_W1_SLAVE_DS2781=m
-CONFIG_W1_SLAVE_DS28E04=m
-CONFIG_W1_SLAVE_DS28E17=m
-# end of 1-wire Slaves
-
-CONFIG_POWER_AVS=y
-CONFIG_QCOM_CPR=m
-CONFIG_POWER_RESET=y
-CONFIG_POWER_RESET_AS3722=y
-CONFIG_POWER_RESET_GPIO=y
-CONFIG_POWER_RESET_GPIO_RESTART=y
-CONFIG_POWER_RESET_LTC2952=y
-CONFIG_POWER_RESET_MT6323=y
-CONFIG_POWER_RESET_RESTART=y
-CONFIG_POWER_RESET_SYSCON=y
-CONFIG_POWER_RESET_SYSCON_POWEROFF=y
-CONFIG_REBOOT_MODE=m
-CONFIG_SYSCON_REBOOT_MODE=m
-CONFIG_NVMEM_REBOOT_MODE=m
-CONFIG_POWER_SUPPLY=y
-# CONFIG_POWER_SUPPLY_DEBUG is not set
-CONFIG_POWER_SUPPLY_HWMON=y
-CONFIG_PDA_POWER=m
-CONFIG_GENERIC_ADC_BATTERY=m
-CONFIG_MAX8925_POWER=m
-CONFIG_WM831X_BACKUP=m
-CONFIG_WM831X_POWER=m
-CONFIG_WM8350_POWER=m
-CONFIG_TEST_POWER=m
-CONFIG_BATTERY_88PM860X=m
-CONFIG_CHARGER_ADP5061=m
-CONFIG_BATTERY_ACT8945A=m
-CONFIG_BATTERY_CPCAP=m
-CONFIG_BATTERY_DS2760=m
-CONFIG_BATTERY_DS2780=m
-CONFIG_BATTERY_DS2781=m
-CONFIG_BATTERY_DS2782=m
-CONFIG_BATTERY_LEGO_EV3=m
-CONFIG_BATTERY_SBS=m
-CONFIG_CHARGER_SBS=m
-CONFIG_MANAGER_SBS=m
-CONFIG_BATTERY_BQ27XXX=m
-CONFIG_BATTERY_BQ27XXX_I2C=m
-CONFIG_BATTERY_BQ27XXX_HDQ=m
-# CONFIG_BATTERY_BQ27XXX_DT_UPDATES_NVM is not set
-CONFIG_BATTERY_DA9030=m
-CONFIG_BATTERY_DA9052=m
-CONFIG_CHARGER_DA9150=m
-CONFIG_BATTERY_DA9150=m
-CONFIG_CHARGER_AXP20X=m
-CONFIG_BATTERY_AXP20X=m
-CONFIG_AXP20X_POWER=m
-CONFIG_AXP288_CHARGER=m
-CONFIG_AXP288_FUEL_GAUGE=m
-CONFIG_BATTERY_MAX17040=m
-CONFIG_BATTERY_MAX17042=m
-CONFIG_BATTERY_MAX1721X=m
-CONFIG_BATTERY_TWL4030_MADC=m
-CONFIG_CHARGER_88PM860X=m
-CONFIG_CHARGER_PCF50633=m
-CONFIG_BATTERY_RX51=m
-CONFIG_CHARGER_ISP1704=m
-CONFIG_CHARGER_MAX8903=m
-CONFIG_CHARGER_TWL4030=m
-CONFIG_CHARGER_LP8727=m
-CONFIG_CHARGER_LP8788=m
-CONFIG_CHARGER_GPIO=m
-CONFIG_CHARGER_MANAGER=y
-CONFIG_CHARGER_LT3651=m
-CONFIG_CHARGER_MAX14577=m
-CONFIG_CHARGER_DETECTOR_MAX14656=m
-CONFIG_CHARGER_MAX77650=m
-CONFIG_CHARGER_MAX77693=m
-CONFIG_CHARGER_MAX8997=m
-CONFIG_CHARGER_MAX8998=m
-CONFIG_CHARGER_BQ2415X=m
-CONFIG_CHARGER_BQ24190=m
-CONFIG_CHARGER_BQ24257=m
-CONFIG_CHARGER_BQ24735=m
-CONFIG_CHARGER_BQ25890=m
-CONFIG_CHARGER_SMB347=m
-CONFIG_CHARGER_TPS65090=m
-CONFIG_CHARGER_TPS65217=m
-CONFIG_BATTERY_GAUGE_LTC2941=m
-CONFIG_BATTERY_RT5033=m
-CONFIG_CHARGER_RT9455=m
-CONFIG_CHARGER_CROS_USBPD=m
-CONFIG_CHARGER_UCS1002=m
-CONFIG_CHARGER_BD70528=m
-CONFIG_CHARGER_WILCO=m
-CONFIG_HWMON=y
-CONFIG_HWMON_VID=m
-# CONFIG_HWMON_DEBUG_CHIP is not set
-
-#
-# Native drivers
-#
-CONFIG_SENSORS_ABITUGURU=m
-CONFIG_SENSORS_ABITUGURU3=m
-CONFIG_SENSORS_AD7314=m
-CONFIG_SENSORS_AD7414=m
-CONFIG_SENSORS_AD7418=m
-CONFIG_SENSORS_ADM1021=m
-CONFIG_SENSORS_ADM1025=m
-CONFIG_SENSORS_ADM1026=m
-CONFIG_SENSORS_ADM1029=m
-CONFIG_SENSORS_ADM1031=m
-CONFIG_SENSORS_ADM1177=m
-CONFIG_SENSORS_ADM9240=m
-CONFIG_SENSORS_ADT7X10=m
-CONFIG_SENSORS_ADT7310=m
-CONFIG_SENSORS_ADT7410=m
-CONFIG_SENSORS_ADT7411=m
-CONFIG_SENSORS_ADT7462=m
-CONFIG_SENSORS_ADT7470=m
-CONFIG_SENSORS_ADT7475=m
-CONFIG_SENSORS_AS370=m
-CONFIG_SENSORS_ASC7621=m
-CONFIG_SENSORS_AXI_FAN_CONTROL=m
-CONFIG_SENSORS_K8TEMP=m
-CONFIG_SENSORS_K10TEMP=m
-CONFIG_SENSORS_FAM15H_POWER=m
-CONFIG_SENSORS_APPLESMC=m
-CONFIG_SENSORS_ASB100=m
-CONFIG_SENSORS_ASPEED=m
-CONFIG_SENSORS_ATXP1=m
-CONFIG_SENSORS_DRIVETEMP=m
-CONFIG_SENSORS_DS620=m
-CONFIG_SENSORS_DS1621=m
-CONFIG_SENSORS_DELL_SMM=m
-CONFIG_SENSORS_DA9052_ADC=m
-CONFIG_SENSORS_DA9055=m
-CONFIG_SENSORS_I5K_AMB=m
-CONFIG_SENSORS_F71805F=m
-CONFIG_SENSORS_F71882FG=m
-CONFIG_SENSORS_F75375S=m
-CONFIG_SENSORS_MC13783_ADC=m
-CONFIG_SENSORS_FSCHMD=m
-CONFIG_SENSORS_FTSTEUTATES=m
-CONFIG_SENSORS_GL518SM=m
-CONFIG_SENSORS_GL520SM=m
-CONFIG_SENSORS_G760A=m
-CONFIG_SENSORS_G762=m
-CONFIG_SENSORS_GPIO_FAN=m
-CONFIG_SENSORS_HIH6130=m
-CONFIG_SENSORS_IBMAEM=m
-CONFIG_SENSORS_IBMPEX=m
-CONFIG_SENSORS_IIO_HWMON=m
-CONFIG_SENSORS_I5500=m
-CONFIG_SENSORS_CORETEMP=m
-CONFIG_SENSORS_IT87=m
-CONFIG_SENSORS_JC42=m
-CONFIG_SENSORS_POWR1220=m
-CONFIG_SENSORS_LINEAGE=m
-CONFIG_SENSORS_LOCHNAGAR=m
-CONFIG_SENSORS_LTC2945=m
-CONFIG_SENSORS_LTC2947=m
-CONFIG_SENSORS_LTC2947_I2C=m
-CONFIG_SENSORS_LTC2947_SPI=m
-CONFIG_SENSORS_LTC2990=m
-CONFIG_SENSORS_LTC4151=m
-CONFIG_SENSORS_LTC4215=m
-CONFIG_SENSORS_LTC4222=m
-CONFIG_SENSORS_LTC4245=m
-CONFIG_SENSORS_LTC4260=m
-CONFIG_SENSORS_LTC4261=m
-CONFIG_SENSORS_MAX1111=m
-CONFIG_SENSORS_MAX16065=m
-CONFIG_SENSORS_MAX1619=m
-CONFIG_SENSORS_MAX1668=m
-CONFIG_SENSORS_MAX197=m
-CONFIG_SENSORS_MAX31722=m
-CONFIG_SENSORS_MAX31730=m
-CONFIG_SENSORS_MAX6621=m
-CONFIG_SENSORS_MAX6639=m
-CONFIG_SENSORS_MAX6642=m
-CONFIG_SENSORS_MAX6650=m
-CONFIG_SENSORS_MAX6697=m
-CONFIG_SENSORS_MAX31790=m
-CONFIG_SENSORS_MCP3021=m
-CONFIG_SENSORS_MLXREG_FAN=m
-CONFIG_SENSORS_TC654=m
-CONFIG_SENSORS_MENF21BMC_HWMON=m
-CONFIG_SENSORS_ADCXX=m
-CONFIG_SENSORS_LM63=m
-CONFIG_SENSORS_LM70=m
-CONFIG_SENSORS_LM73=m
-CONFIG_SENSORS_LM75=m
-CONFIG_SENSORS_LM77=m
-CONFIG_SENSORS_LM78=m
-CONFIG_SENSORS_LM80=m
-CONFIG_SENSORS_LM83=m
-CONFIG_SENSORS_LM85=m
-CONFIG_SENSORS_LM87=m
-CONFIG_SENSORS_LM90=m
-CONFIG_SENSORS_LM92=m
-CONFIG_SENSORS_LM93=m
-CONFIG_SENSORS_LM95234=m
-CONFIG_SENSORS_LM95241=m
-CONFIG_SENSORS_LM95245=m
-CONFIG_SENSORS_PC87360=m
-CONFIG_SENSORS_PC87427=m
-CONFIG_SENSORS_NTC_THERMISTOR=m
-CONFIG_SENSORS_NCT6683=m
-CONFIG_SENSORS_NCT6775=m
-CONFIG_SENSORS_NCT7802=m
-CONFIG_SENSORS_NCT7904=m
-CONFIG_SENSORS_NPCM7XX=m
-CONFIG_SENSORS_PCF8591=m
-CONFIG_PMBUS=m
-CONFIG_SENSORS_PMBUS=m
-CONFIG_SENSORS_ADM1275=m
-CONFIG_SENSORS_BEL_PFE=m
-CONFIG_SENSORS_IBM_CFFPS=m
-CONFIG_SENSORS_INSPUR_IPSPS=m
-CONFIG_SENSORS_IR35221=m
-CONFIG_SENSORS_IR38064=m
-CONFIG_SENSORS_IRPS5401=m
-CONFIG_SENSORS_ISL68137=m
-CONFIG_SENSORS_LM25066=m
-CONFIG_SENSORS_LTC2978=m
-# CONFIG_SENSORS_LTC2978_REGULATOR is not set
-CONFIG_SENSORS_LTC3815=m
-CONFIG_SENSORS_MAX16064=m
-CONFIG_SENSORS_MAX20730=m
-CONFIG_SENSORS_MAX20751=m
-CONFIG_SENSORS_MAX31785=m
-CONFIG_SENSORS_MAX34440=m
-CONFIG_SENSORS_MAX8688=m
-CONFIG_SENSORS_PXE1610=m
-CONFIG_SENSORS_TPS40422=m
-CONFIG_SENSORS_TPS53679=m
-CONFIG_SENSORS_UCD9000=m
-CONFIG_SENSORS_UCD9200=m
-CONFIG_SENSORS_XDPE122=m
-CONFIG_SENSORS_ZL6100=m
-CONFIG_SENSORS_PWM_FAN=m
-CONFIG_SENSORS_SHT15=m
-CONFIG_SENSORS_SHT21=m
-CONFIG_SENSORS_SHT3x=m
-CONFIG_SENSORS_SHTC1=m
-CONFIG_SENSORS_SIS5595=m
-CONFIG_SENSORS_DME1737=m
-CONFIG_SENSORS_EMC1403=m
-CONFIG_SENSORS_EMC2103=m
-CONFIG_SENSORS_EMC6W201=m
-CONFIG_SENSORS_SMSC47M1=m
-CONFIG_SENSORS_SMSC47M192=m
-CONFIG_SENSORS_SMSC47B397=m
-CONFIG_SENSORS_SCH56XX_COMMON=m
-CONFIG_SENSORS_SCH5627=m
-CONFIG_SENSORS_SCH5636=m
-CONFIG_SENSORS_STTS751=m
-CONFIG_SENSORS_SMM665=m
-CONFIG_SENSORS_ADC128D818=m
-CONFIG_SENSORS_ADS7828=m
-CONFIG_SENSORS_ADS7871=m
-CONFIG_SENSORS_AMC6821=m
-CONFIG_SENSORS_INA209=m
-CONFIG_SENSORS_INA2XX=m
-CONFIG_SENSORS_INA3221=m
-CONFIG_SENSORS_TC74=m
-CONFIG_SENSORS_THMC50=m
-CONFIG_SENSORS_TMP102=m
-CONFIG_SENSORS_TMP103=m
-CONFIG_SENSORS_TMP108=m
-CONFIG_SENSORS_TMP401=m
-CONFIG_SENSORS_TMP421=m
-CONFIG_SENSORS_TMP513=m
-CONFIG_SENSORS_VIA_CPUTEMP=m
-CONFIG_SENSORS_VIA686A=m
-CONFIG_SENSORS_VT1211=m
-CONFIG_SENSORS_VT8231=m
-CONFIG_SENSORS_W83773G=m
-CONFIG_SENSORS_W83781D=m
-CONFIG_SENSORS_W83791D=m
-CONFIG_SENSORS_W83792D=m
-CONFIG_SENSORS_W83793=m
-CONFIG_SENSORS_W83795=m
-# CONFIG_SENSORS_W83795_FANCTRL is not set
-CONFIG_SENSORS_W83L785TS=m
-CONFIG_SENSORS_W83L786NG=m
-CONFIG_SENSORS_W83627HF=m
-CONFIG_SENSORS_W83627EHF=m
-CONFIG_SENSORS_WM831X=m
-CONFIG_SENSORS_WM8350=m
-CONFIG_SENSORS_XGENE=m
-
-#
-# ACPI drivers
-#
-CONFIG_SENSORS_ACPI_POWER=m
-CONFIG_SENSORS_ATK0110=m
-CONFIG_THERMAL=y
-# CONFIG_THERMAL_STATISTICS is not set
-CONFIG_THERMAL_EMERGENCY_POWEROFF_DELAY_MS=100
-CONFIG_THERMAL_HWMON=y
-CONFIG_THERMAL_OF=y
-CONFIG_THERMAL_WRITABLE_TRIPS=y
-CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE=y
-# CONFIG_THERMAL_DEFAULT_GOV_FAIR_SHARE is not set
-# CONFIG_THERMAL_DEFAULT_GOV_USER_SPACE is not set
-# CONFIG_THERMAL_DEFAULT_GOV_POWER_ALLOCATOR is not set
-CONFIG_THERMAL_GOV_FAIR_SHARE=y
-CONFIG_THERMAL_GOV_STEP_WISE=y
-CONFIG_THERMAL_GOV_BANG_BANG=y
-CONFIG_THERMAL_GOV_USER_SPACE=y
-CONFIG_THERMAL_GOV_POWER_ALLOCATOR=y
-CONFIG_CPU_THERMAL=y
-CONFIG_CPU_FREQ_THERMAL=y
-CONFIG_CPU_IDLE_THERMAL=y
-CONFIG_CLOCK_THERMAL=y
-CONFIG_DEVFREQ_THERMAL=y
-# CONFIG_THERMAL_EMULATION is not set
-CONFIG_THERMAL_MMIO=m
-CONFIG_MAX77620_THERMAL=m
-CONFIG_QORIQ_THERMAL=m
-CONFIG_DA9062_THERMAL=m
-
-#
-# Intel thermal drivers
-#
-CONFIG_INTEL_POWERCLAMP=m
-CONFIG_X86_PKG_TEMP_THERMAL=m
-CONFIG_INTEL_SOC_DTS_IOSF_CORE=m
-CONFIG_INTEL_SOC_DTS_THERMAL=m
-
-#
-# ACPI INT340X thermal drivers
-#
-CONFIG_INT340X_THERMAL=m
-CONFIG_ACPI_THERMAL_REL=m
-CONFIG_INT3406_THERMAL=m
-CONFIG_PROC_THERMAL_MMIO_RAPL=y
-# end of ACPI INT340X thermal drivers
-
-CONFIG_INTEL_BXT_PMIC_THERMAL=m
-CONFIG_INTEL_PCH_THERMAL=m
-# end of Intel thermal drivers
-
-# CONFIG_TI_SOC_THERMAL is not set
-CONFIG_GENERIC_ADC_THERMAL=m
-CONFIG_WATCHDOG=y
-CONFIG_WATCHDOG_CORE=y
-# CONFIG_WATCHDOG_NOWAYOUT is not set
-CONFIG_WATCHDOG_HANDLE_BOOT_ENABLED=y
-CONFIG_WATCHDOG_OPEN_TIMEOUT=0
-CONFIG_WATCHDOG_SYSFS=y
-
-#
-# Watchdog Pretimeout Governors
-#
-CONFIG_WATCHDOG_PRETIMEOUT_GOV=y
-CONFIG_WATCHDOG_PRETIMEOUT_GOV_SEL=m
-CONFIG_WATCHDOG_PRETIMEOUT_GOV_NOOP=m
-CONFIG_WATCHDOG_PRETIMEOUT_GOV_PANIC=y
-# CONFIG_WATCHDOG_PRETIMEOUT_DEFAULT_GOV_NOOP is not set
-CONFIG_WATCHDOG_PRETIMEOUT_DEFAULT_GOV_PANIC=y
-
-#
-# Watchdog Device Drivers
-#
-CONFIG_SOFT_WATCHDOG=m
-# CONFIG_SOFT_WATCHDOG_PRETIMEOUT is not set
-CONFIG_BD70528_WATCHDOG=m
-CONFIG_DA9052_WATCHDOG=m
-CONFIG_DA9055_WATCHDOG=m
-CONFIG_DA9063_WATCHDOG=m
-CONFIG_DA9062_WATCHDOG=m
-CONFIG_GPIO_WATCHDOG=m
-CONFIG_MENF21BMC_WATCHDOG=m
-CONFIG_MENZ069_WATCHDOG=m
-CONFIG_WDAT_WDT=m
-CONFIG_WM831X_WATCHDOG=m
-CONFIG_WM8350_WATCHDOG=m
-CONFIG_XILINX_WATCHDOG=m
-CONFIG_ZIIRAVE_WATCHDOG=m
-CONFIG_RAVE_SP_WATCHDOG=m
-CONFIG_MLX_WDT=m
-CONFIG_CADENCE_WATCHDOG=m
-CONFIG_DW_WATCHDOG=m
-CONFIG_RN5T618_WATCHDOG=m
-CONFIG_TWL4030_WATCHDOG=m
-CONFIG_MAX63XX_WATCHDOG=m
-CONFIG_MAX77620_WATCHDOG=m
-CONFIG_RETU_WATCHDOG=m
-CONFIG_STPMIC1_WATCHDOG=m
-CONFIG_ACQUIRE_WDT=m
-CONFIG_ADVANTECH_WDT=m
-CONFIG_ALIM1535_WDT=m
-CONFIG_ALIM7101_WDT=m
-CONFIG_EBC_C384_WDT=m
-CONFIG_F71808E_WDT=m
-CONFIG_SP5100_TCO=m
-CONFIG_SBC_FITPC2_WATCHDOG=m
-CONFIG_EUROTECH_WDT=m
-CONFIG_IB700_WDT=m
-CONFIG_IBMASR=m
-CONFIG_WAFER_WDT=m
-CONFIG_I6300ESB_WDT=m
-CONFIG_IE6XX_WDT=m
-CONFIG_ITCO_WDT=m
-CONFIG_ITCO_VENDOR_SUPPORT=y
-CONFIG_IT8712F_WDT=m
-CONFIG_IT87_WDT=m
-CONFIG_HP_WATCHDOG=m
-CONFIG_HPWDT_NMI_DECODING=y
-CONFIG_KEMPLD_WDT=m
-CONFIG_SC1200_WDT=m
-CONFIG_PC87413_WDT=m
-CONFIG_NV_TCO=m
-CONFIG_60XX_WDT=m
-CONFIG_CPU5_WDT=m
-CONFIG_SMSC_SCH311X_WDT=m
-CONFIG_SMSC37B787_WDT=m
-CONFIG_TQMX86_WDT=m
-CONFIG_VIA_WDT=m
-CONFIG_W83627HF_WDT=m
-CONFIG_W83877F_WDT=m
-CONFIG_W83977F_WDT=m
-CONFIG_MACHZ_WDT=m
-CONFIG_SBC_EPX_C3_WATCHDOG=m
-CONFIG_INTEL_MEI_WDT=m
-CONFIG_NI903X_WDT=m
-CONFIG_NIC7018_WDT=m
-CONFIG_MEN_A21_WDT=m
-CONFIG_XEN_WDT=m
-
-#
-# PCI-based Watchdog Cards
-#
-CONFIG_PCIPCWATCHDOG=m
-CONFIG_WDTPCI=m
-
-#
-# USB-based Watchdog Cards
-#
-CONFIG_USBPCWATCHDOG=m
-CONFIG_SSB_POSSIBLE=y
-CONFIG_SSB=m
-CONFIG_SSB_SPROM=y
-CONFIG_SSB_BLOCKIO=y
-CONFIG_SSB_PCIHOST_POSSIBLE=y
-CONFIG_SSB_PCIHOST=y
-CONFIG_SSB_B43_PCI_BRIDGE=y
-CONFIG_SSB_PCMCIAHOST_POSSIBLE=y
-CONFIG_SSB_PCMCIAHOST=y
-CONFIG_SSB_SDIOHOST_POSSIBLE=y
-CONFIG_SSB_SDIOHOST=y
-CONFIG_SSB_DRIVER_PCICORE_POSSIBLE=y
-CONFIG_SSB_DRIVER_PCICORE=y
-CONFIG_SSB_DRIVER_GPIO=y
-CONFIG_BCMA_POSSIBLE=y
-CONFIG_BCMA=m
-CONFIG_BCMA_BLOCKIO=y
-CONFIG_BCMA_HOST_PCI_POSSIBLE=y
-CONFIG_BCMA_HOST_PCI=y
-# CONFIG_BCMA_HOST_SOC is not set
-CONFIG_BCMA_DRIVER_PCI=y
-CONFIG_BCMA_DRIVER_GMAC_CMN=y
-CONFIG_BCMA_DRIVER_GPIO=y
-# CONFIG_BCMA_DEBUG is not set
-
-#
-# Multifunction device drivers
-#
-CONFIG_MFD_CORE=y
-CONFIG_MFD_ACT8945A=m
-CONFIG_MFD_AS3711=y
-CONFIG_MFD_AS3722=m
-CONFIG_PMIC_ADP5520=y
-CONFIG_MFD_AAT2870_CORE=y
-CONFIG_MFD_ATMEL_FLEXCOM=m
-CONFIG_MFD_ATMEL_HLCDC=m
-CONFIG_MFD_BCM590XX=m
-CONFIG_MFD_BD9571MWV=m
-CONFIG_MFD_AXP20X=m
-CONFIG_MFD_AXP20X_I2C=m
-CONFIG_MFD_CROS_EC_DEV=m
-CONFIG_MFD_MADERA=m
-CONFIG_MFD_MADERA_I2C=m
-CONFIG_MFD_MADERA_SPI=m
-CONFIG_MFD_CS47L15=y
-CONFIG_MFD_CS47L35=y
-CONFIG_MFD_CS47L85=y
-CONFIG_MFD_CS47L90=y
-CONFIG_MFD_CS47L92=y
-CONFIG_PMIC_DA903X=y
-CONFIG_PMIC_DA9052=y
-CONFIG_MFD_DA9052_SPI=y
-CONFIG_MFD_DA9052_I2C=y
-CONFIG_MFD_DA9055=y
-CONFIG_MFD_DA9062=m
-CONFIG_MFD_DA9063=m
-CONFIG_MFD_DA9150=m
-CONFIG_MFD_DLN2=m
-CONFIG_MFD_MC13XXX=m
-CONFIG_MFD_MC13XXX_SPI=m
-CONFIG_MFD_MC13XXX_I2C=m
-CONFIG_MFD_HI6421_PMIC=m
-CONFIG_HTC_PASIC3=m
-CONFIG_HTC_I2CPLD=y
-CONFIG_MFD_INTEL_QUARK_I2C_GPIO=m
-CONFIG_LPC_ICH=m
-CONFIG_LPC_SCH=m
-CONFIG_INTEL_SOC_PMIC=y
-CONFIG_INTEL_SOC_PMIC_BXTWC=m
-CONFIG_INTEL_SOC_PMIC_CHTWC=y
-CONFIG_INTEL_SOC_PMIC_CHTDC_TI=m
-CONFIG_MFD_INTEL_LPSS=m
-CONFIG_MFD_INTEL_LPSS_ACPI=m
-CONFIG_MFD_INTEL_LPSS_PCI=m
-CONFIG_MFD_IQS62X=m
-CONFIG_MFD_JANZ_CMODIO=m
-CONFIG_MFD_KEMPLD=m
-CONFIG_MFD_88PM800=m
-CONFIG_MFD_88PM805=m
-CONFIG_MFD_88PM860X=y
-CONFIG_MFD_MAX14577=m
-CONFIG_MFD_MAX77620=y
-CONFIG_MFD_MAX77650=m
-CONFIG_MFD_MAX77686=m
-CONFIG_MFD_MAX77693=m
-CONFIG_MFD_MAX77843=y
-CONFIG_MFD_MAX8907=m
-CONFIG_MFD_MAX8925=y
-CONFIG_MFD_MAX8997=y
-CONFIG_MFD_MAX8998=y
-CONFIG_MFD_MT6397=m
-CONFIG_MFD_MENF21BMC=m
-CONFIG_EZX_PCAP=y
-CONFIG_MFD_CPCAP=m
-CONFIG_MFD_VIPERBOARD=m
-CONFIG_MFD_RETU=m
-CONFIG_MFD_PCF50633=m
-CONFIG_PCF50633_ADC=m
-CONFIG_PCF50633_GPIO=m
-CONFIG_UCB1400_CORE=m
-CONFIG_MFD_RDC321X=m
-CONFIG_MFD_RT5033=m
-CONFIG_MFD_RC5T583=y
-CONFIG_MFD_RK808=m
-CONFIG_MFD_RN5T618=m
-CONFIG_MFD_SEC_CORE=y
-CONFIG_MFD_SI476X_CORE=m
-CONFIG_MFD_SM501=m
-CONFIG_MFD_SM501_GPIO=y
-CONFIG_MFD_SKY81452=m
-CONFIG_MFD_SMSC=y
-CONFIG_ABX500_CORE=y
-CONFIG_AB3100_CORE=y
-CONFIG_AB3100_OTP=y
-CONFIG_MFD_STMPE=y
-
-#
-# STMicroelectronics STMPE Interface Drivers
-#
-CONFIG_STMPE_I2C=y
-CONFIG_STMPE_SPI=y
-# end of STMicroelectronics STMPE Interface Drivers
-
-CONFIG_MFD_SYSCON=y
-CONFIG_MFD_TI_AM335X_TSCADC=m
-CONFIG_MFD_LP3943=m
-CONFIG_MFD_LP8788=y
-CONFIG_MFD_TI_LMU=m
-CONFIG_MFD_PALMAS=y
-CONFIG_TPS6105X=m
-CONFIG_TPS65010=m
-CONFIG_TPS6507X=m
-CONFIG_MFD_TPS65086=m
-CONFIG_MFD_TPS65090=y
-CONFIG_MFD_TPS65217=m
-CONFIG_MFD_TPS68470=y
-CONFIG_MFD_TI_LP873X=m
-CONFIG_MFD_TI_LP87565=m
-CONFIG_MFD_TPS65218=m
-CONFIG_MFD_TPS6586X=y
-CONFIG_MFD_TPS65910=y
-CONFIG_MFD_TPS65912=m
-CONFIG_MFD_TPS65912_I2C=m
-CONFIG_MFD_TPS65912_SPI=m
-CONFIG_MFD_TPS80031=y
-CONFIG_TWL4030_CORE=y
-CONFIG_MFD_TWL4030_AUDIO=y
-CONFIG_TWL6040_CORE=y
-CONFIG_MFD_WL1273_CORE=m
-CONFIG_MFD_LM3533=m
-CONFIG_MFD_TC3589X=y
-CONFIG_MFD_TQMX86=m
-CONFIG_MFD_VX855=m
-CONFIG_MFD_LOCHNAGAR=y
-CONFIG_MFD_ARIZONA=y
-CONFIG_MFD_ARIZONA_I2C=m
-CONFIG_MFD_ARIZONA_SPI=m
-CONFIG_MFD_CS47L24=y
-CONFIG_MFD_WM5102=y
-CONFIG_MFD_WM5110=y
-CONFIG_MFD_WM8997=y
-CONFIG_MFD_WM8998=y
-CONFIG_MFD_WM8400=y
-CONFIG_MFD_WM831X=y
-CONFIG_MFD_WM831X_I2C=y
-CONFIG_MFD_WM831X_SPI=y
-CONFIG_MFD_WM8350=y
-CONFIG_MFD_WM8350_I2C=y
-CONFIG_MFD_WM8994=m
-CONFIG_MFD_ROHM_BD718XX=m
-CONFIG_MFD_ROHM_BD70528=m
-CONFIG_MFD_ROHM_BD71828=m
-CONFIG_MFD_STPMIC1=m
-CONFIG_MFD_STMFX=m
-CONFIG_MFD_WCD934X=m
-CONFIG_RAVE_SP_CORE=m
-# end of Multifunction device drivers
-
-CONFIG_REGULATOR=y
-# CONFIG_REGULATOR_DEBUG is not set
-CONFIG_REGULATOR_FIXED_VOLTAGE=m
-CONFIG_REGULATOR_VIRTUAL_CONSUMER=m
-CONFIG_REGULATOR_USERSPACE_CONSUMER=m
-CONFIG_REGULATOR_88PG86X=m
-CONFIG_REGULATOR_88PM800=m
-CONFIG_REGULATOR_88PM8607=m
-CONFIG_REGULATOR_ACT8865=m
-CONFIG_REGULATOR_ACT8945A=m
-CONFIG_REGULATOR_AD5398=m
-CONFIG_REGULATOR_AAT2870=m
-CONFIG_REGULATOR_AB3100=m
-CONFIG_REGULATOR_ARIZONA_LDO1=m
-CONFIG_REGULATOR_ARIZONA_MICSUPP=m
-CONFIG_REGULATOR_AS3711=m
-CONFIG_REGULATOR_AS3722=m
-CONFIG_REGULATOR_AXP20X=m
-CONFIG_REGULATOR_BCM590XX=m
-CONFIG_REGULATOR_BD70528=m
-CONFIG_REGULATOR_BD71828=m
-CONFIG_REGULATOR_BD718XX=m
-CONFIG_REGULATOR_BD9571MWV=m
-CONFIG_REGULATOR_CPCAP=m
-CONFIG_REGULATOR_DA903X=m
-CONFIG_REGULATOR_DA9052=m
-CONFIG_REGULATOR_DA9055=m
-CONFIG_REGULATOR_DA9062=m
-CONFIG_REGULATOR_DA9063=m
-CONFIG_REGULATOR_DA9210=m
-CONFIG_REGULATOR_DA9211=m
-CONFIG_REGULATOR_FAN53555=m
-CONFIG_REGULATOR_GPIO=m
-CONFIG_REGULATOR_HI6421=m
-CONFIG_REGULATOR_HI6421V530=m
-CONFIG_REGULATOR_ISL9305=m
-CONFIG_REGULATOR_ISL6271A=m
-CONFIG_REGULATOR_LM363X=m
-CONFIG_REGULATOR_LOCHNAGAR=m
-CONFIG_REGULATOR_LP3971=m
-CONFIG_REGULATOR_LP3972=m
-CONFIG_REGULATOR_LP872X=m
-CONFIG_REGULATOR_LP873X=m
-CONFIG_REGULATOR_LP8755=m
-CONFIG_REGULATOR_LP87565=m
-CONFIG_REGULATOR_LP8788=m
-CONFIG_REGULATOR_LTC3589=m
-CONFIG_REGULATOR_LTC3676=m
-CONFIG_REGULATOR_MAX14577=m
-CONFIG_REGULATOR_MAX1586=m
-CONFIG_REGULATOR_MAX77620=m
-CONFIG_REGULATOR_MAX77650=m
-CONFIG_REGULATOR_MAX8649=m
-CONFIG_REGULATOR_MAX8660=m
-CONFIG_REGULATOR_MAX8907=m
-CONFIG_REGULATOR_MAX8925=m
-CONFIG_REGULATOR_MAX8952=m
-CONFIG_REGULATOR_MAX8973=m
-CONFIG_REGULATOR_MAX8997=m
-CONFIG_REGULATOR_MAX8998=m
-CONFIG_REGULATOR_MAX77686=m
-CONFIG_REGULATOR_MAX77693=m
-CONFIG_REGULATOR_MAX77802=m
-CONFIG_REGULATOR_MC13XXX_CORE=m
-CONFIG_REGULATOR_MC13783=m
-CONFIG_REGULATOR_MC13892=m
-CONFIG_REGULATOR_MCP16502=m
-CONFIG_REGULATOR_MP5416=m
-CONFIG_REGULATOR_MP8859=m
-CONFIG_REGULATOR_MP886X=m
-CONFIG_REGULATOR_MPQ7920=m
-CONFIG_REGULATOR_MT6311=m
-CONFIG_REGULATOR_MT6323=m
-CONFIG_REGULATOR_MT6397=m
-CONFIG_REGULATOR_PALMAS=m
-CONFIG_REGULATOR_PCAP=m
-CONFIG_REGULATOR_PCF50633=m
-CONFIG_REGULATOR_PFUZE100=m
-CONFIG_REGULATOR_PV88060=m
-CONFIG_REGULATOR_PV88080=m
-CONFIG_REGULATOR_PV88090=m
-CONFIG_REGULATOR_PWM=m
-CONFIG_REGULATOR_QCOM_SPMI=m
-CONFIG_REGULATOR_RC5T583=m
-CONFIG_REGULATOR_RK808=m
-CONFIG_REGULATOR_RN5T618=m
-CONFIG_REGULATOR_ROHM=m
-CONFIG_REGULATOR_RT5033=m
-CONFIG_REGULATOR_S2MPA01=m
-CONFIG_REGULATOR_S2MPS11=m
-CONFIG_REGULATOR_S5M8767=m
-CONFIG_REGULATOR_SKY81452=m
-CONFIG_REGULATOR_SLG51000=m
-CONFIG_REGULATOR_STPMIC1=m
-CONFIG_REGULATOR_SY8106A=m
-CONFIG_REGULATOR_SY8824X=m
-CONFIG_REGULATOR_TPS51632=m
-CONFIG_REGULATOR_TPS6105X=m
-CONFIG_REGULATOR_TPS62360=m
-CONFIG_REGULATOR_TPS65023=m
-CONFIG_REGULATOR_TPS6507X=m
-CONFIG_REGULATOR_TPS65086=m
-CONFIG_REGULATOR_TPS65090=m
-CONFIG_REGULATOR_TPS65132=m
-CONFIG_REGULATOR_TPS65217=m
-CONFIG_REGULATOR_TPS65218=m
-CONFIG_REGULATOR_TPS6524X=m
-CONFIG_REGULATOR_TPS6586X=m
-CONFIG_REGULATOR_TPS65910=m
-CONFIG_REGULATOR_TPS65912=m
-CONFIG_REGULATOR_TPS80031=m
-CONFIG_REGULATOR_TWL4030=m
-CONFIG_REGULATOR_VCTRL=m
-CONFIG_REGULATOR_WM831X=m
-CONFIG_REGULATOR_WM8350=m
-CONFIG_REGULATOR_WM8400=m
-CONFIG_REGULATOR_WM8994=m
-CONFIG_CEC_CORE=m
-CONFIG_CEC_NOTIFIER=y
-CONFIG_CEC_PIN=y
-CONFIG_RC_CORE=m
-CONFIG_RC_MAP=m
-CONFIG_LIRC=y
-CONFIG_RC_DECODERS=y
-CONFIG_IR_NEC_DECODER=m
-CONFIG_IR_RC5_DECODER=m
-CONFIG_IR_RC6_DECODER=m
-CONFIG_IR_JVC_DECODER=m
-CONFIG_IR_SONY_DECODER=m
-CONFIG_IR_SANYO_DECODER=m
-CONFIG_IR_SHARP_DECODER=m
-CONFIG_IR_MCE_KBD_DECODER=m
-CONFIG_IR_XMP_DECODER=m
-CONFIG_IR_IMON_DECODER=m
-CONFIG_IR_RCMM_DECODER=m
-CONFIG_RC_DEVICES=y
-CONFIG_RC_ATI_REMOTE=m
-CONFIG_IR_ENE=m
-CONFIG_IR_HIX5HD2=m
-CONFIG_IR_IMON=m
-CONFIG_IR_IMON_RAW=m
-CONFIG_IR_MCEUSB=m
-CONFIG_IR_ITE_CIR=m
-CONFIG_IR_FINTEK=m
-CONFIG_IR_NUVOTON=m
-CONFIG_IR_REDRAT3=m
-CONFIG_IR_SPI=m
-CONFIG_IR_STREAMZAP=m
-CONFIG_IR_WINBOND_CIR=m
-CONFIG_IR_IGORPLUGUSB=m
-CONFIG_IR_IGUANA=m
-CONFIG_IR_TTUSBIR=m
-CONFIG_RC_LOOPBACK=m
-CONFIG_IR_GPIO_CIR=m
-CONFIG_IR_GPIO_TX=m
-CONFIG_IR_PWM_TX=m
-CONFIG_IR_SERIAL=m
-CONFIG_IR_SERIAL_TRANSMITTER=y
-CONFIG_IR_SIR=m
-CONFIG_RC_XBOX_DVD=m
-CONFIG_MEDIA_SUPPORT=m
-
-#
-# Multimedia core support
-#
-CONFIG_MEDIA_CAMERA_SUPPORT=y
-CONFIG_MEDIA_ANALOG_TV_SUPPORT=y
-CONFIG_MEDIA_DIGITAL_TV_SUPPORT=y
-CONFIG_MEDIA_RADIO_SUPPORT=y
-CONFIG_MEDIA_SDR_SUPPORT=y
-CONFIG_MEDIA_CEC_SUPPORT=y
-CONFIG_MEDIA_CEC_RC=y
-# CONFIG_CEC_PIN_ERROR_INJ is not set
-CONFIG_MEDIA_CONTROLLER=y
-CONFIG_MEDIA_CONTROLLER_DVB=y
-# CONFIG_MEDIA_CONTROLLER_REQUEST_API is not set
-CONFIG_VIDEO_DEV=m
-CONFIG_VIDEO_V4L2_SUBDEV_API=y
-CONFIG_VIDEO_V4L2=m
-CONFIG_VIDEO_V4L2_I2C=y
-# CONFIG_VIDEO_ADV_DEBUG is not set
-# CONFIG_VIDEO_FIXED_MINOR_RANGES is not set
-CONFIG_VIDEO_TUNER=m
-CONFIG_V4L2_MEM2MEM_DEV=m
-CONFIG_V4L2_FLASH_LED_CLASS=m
-CONFIG_V4L2_FWNODE=m
-CONFIG_VIDEOBUF_GEN=m
-CONFIG_VIDEOBUF_DMA_SG=m
-CONFIG_VIDEOBUF_VMALLOC=m
-CONFIG_DVB_CORE=m
-CONFIG_DVB_MMAP=y
-CONFIG_DVB_NET=y
-CONFIG_TTPCI_EEPROM=m
-CONFIG_DVB_MAX_ADAPTERS=16
-# CONFIG_DVB_DYNAMIC_MINORS is not set
-# CONFIG_DVB_DEMUX_SECTION_LOSS_LOG is not set
-# CONFIG_DVB_ULE_DEBUG is not set
-
-#
-# Media drivers
-#
-CONFIG_MEDIA_USB_SUPPORT=y
-
-#
-# Webcam devices
-#
-CONFIG_USB_VIDEO_CLASS=m
-CONFIG_USB_VIDEO_CLASS_INPUT_EVDEV=y
-CONFIG_USB_GSPCA=m
-CONFIG_USB_M5602=m
-CONFIG_USB_STV06XX=m
-CONFIG_USB_GL860=m
-CONFIG_USB_GSPCA_BENQ=m
-CONFIG_USB_GSPCA_CONEX=m
-CONFIG_USB_GSPCA_CPIA1=m
-CONFIG_USB_GSPCA_DTCS033=m
-CONFIG_USB_GSPCA_ETOMS=m
-CONFIG_USB_GSPCA_FINEPIX=m
-CONFIG_USB_GSPCA_JEILINJ=m
-CONFIG_USB_GSPCA_JL2005BCD=m
-CONFIG_USB_GSPCA_KINECT=m
-CONFIG_USB_GSPCA_KONICA=m
-CONFIG_USB_GSPCA_MARS=m
-CONFIG_USB_GSPCA_MR97310A=m
-CONFIG_USB_GSPCA_NW80X=m
-CONFIG_USB_GSPCA_OV519=m
-CONFIG_USB_GSPCA_OV534=m
-CONFIG_USB_GSPCA_OV534_9=m
-CONFIG_USB_GSPCA_PAC207=m
-CONFIG_USB_GSPCA_PAC7302=m
-CONFIG_USB_GSPCA_PAC7311=m
-CONFIG_USB_GSPCA_SE401=m
-CONFIG_USB_GSPCA_SN9C2028=m
-CONFIG_USB_GSPCA_SN9C20X=m
-CONFIG_USB_GSPCA_SONIXB=m
-CONFIG_USB_GSPCA_SONIXJ=m
-CONFIG_USB_GSPCA_SPCA500=m
-CONFIG_USB_GSPCA_SPCA501=m
-CONFIG_USB_GSPCA_SPCA505=m
-CONFIG_USB_GSPCA_SPCA506=m
-CONFIG_USB_GSPCA_SPCA508=m
-CONFIG_USB_GSPCA_SPCA561=m
-CONFIG_USB_GSPCA_SPCA1528=m
-CONFIG_USB_GSPCA_SQ905=m
-CONFIG_USB_GSPCA_SQ905C=m
-CONFIG_USB_GSPCA_SQ930X=m
-CONFIG_USB_GSPCA_STK014=m
-CONFIG_USB_GSPCA_STK1135=m
-CONFIG_USB_GSPCA_STV0680=m
-CONFIG_USB_GSPCA_SUNPLUS=m
-CONFIG_USB_GSPCA_T613=m
-CONFIG_USB_GSPCA_TOPRO=m
-CONFIG_USB_GSPCA_TOUPTEK=m
-CONFIG_USB_GSPCA_TV8532=m
-CONFIG_USB_GSPCA_VC032X=m
-CONFIG_USB_GSPCA_VICAM=m
-CONFIG_USB_GSPCA_XIRLINK_CIT=m
-CONFIG_USB_GSPCA_ZC3XX=m
-CONFIG_USB_PWC=m
-# CONFIG_USB_PWC_DEBUG is not set
-CONFIG_USB_PWC_INPUT_EVDEV=y
-CONFIG_VIDEO_CPIA2=m
-CONFIG_USB_ZR364XX=m
-CONFIG_USB_STKWEBCAM=m
-CONFIG_USB_S2255=m
-CONFIG_VIDEO_USBTV=m
-
-#
-# Analog TV USB devices
-#
-CONFIG_VIDEO_PVRUSB2=m
-CONFIG_VIDEO_PVRUSB2_SYSFS=y
-CONFIG_VIDEO_PVRUSB2_DVB=y
-# CONFIG_VIDEO_PVRUSB2_DEBUGIFC is not set
-CONFIG_VIDEO_HDPVR=m
-CONFIG_VIDEO_STK1160_COMMON=m
-CONFIG_VIDEO_STK1160=m
-CONFIG_VIDEO_GO7007=m
-CONFIG_VIDEO_GO7007_USB=m
-CONFIG_VIDEO_GO7007_LOADER=m
-CONFIG_VIDEO_GO7007_USB_S2250_BOARD=m
-
-#
-# Analog/digital TV USB devices
-#
-CONFIG_VIDEO_AU0828=m
-CONFIG_VIDEO_AU0828_V4L2=y
-CONFIG_VIDEO_AU0828_RC=y
-CONFIG_VIDEO_CX231XX=m
-CONFIG_VIDEO_CX231XX_RC=y
-CONFIG_VIDEO_CX231XX_ALSA=m
-CONFIG_VIDEO_CX231XX_DVB=m
-CONFIG_VIDEO_TM6000=m
-CONFIG_VIDEO_TM6000_ALSA=m
-CONFIG_VIDEO_TM6000_DVB=m
-
-#
-# Digital TV USB devices
-#
-CONFIG_DVB_USB=m
-# CONFIG_DVB_USB_DEBUG is not set
-CONFIG_DVB_USB_DIB3000MC=m
-CONFIG_DVB_USB_A800=m
-CONFIG_DVB_USB_DIBUSB_MB=m
-CONFIG_DVB_USB_DIBUSB_MB_FAULTY=y
-CONFIG_DVB_USB_DIBUSB_MC=m
-CONFIG_DVB_USB_DIB0700=m
-CONFIG_DVB_USB_UMT_010=m
-CONFIG_DVB_USB_CXUSB=m
-CONFIG_DVB_USB_CXUSB_ANALOG=y
-CONFIG_DVB_USB_M920X=m
-CONFIG_DVB_USB_DIGITV=m
-CONFIG_DVB_USB_VP7045=m
-CONFIG_DVB_USB_VP702X=m
-CONFIG_DVB_USB_GP8PSK=m
-CONFIG_DVB_USB_NOVA_T_USB2=m
-CONFIG_DVB_USB_TTUSB2=m
-CONFIG_DVB_USB_DTT200U=m
-CONFIG_DVB_USB_OPERA1=m
-CONFIG_DVB_USB_AF9005=m
-CONFIG_DVB_USB_AF9005_REMOTE=m
-CONFIG_DVB_USB_PCTV452E=m
-CONFIG_DVB_USB_DW2102=m
-CONFIG_DVB_USB_CINERGY_T2=m
-CONFIG_DVB_USB_DTV5100=m
-CONFIG_DVB_USB_AZ6027=m
-CONFIG_DVB_USB_TECHNISAT_USB2=m
-CONFIG_DVB_USB_V2=m
-CONFIG_DVB_USB_AF9015=m
-CONFIG_DVB_USB_AF9035=m
-CONFIG_DVB_USB_ANYSEE=m
-CONFIG_DVB_USB_AU6610=m
-CONFIG_DVB_USB_AZ6007=m
-CONFIG_DVB_USB_CE6230=m
-CONFIG_DVB_USB_EC168=m
-CONFIG_DVB_USB_GL861=m
-CONFIG_DVB_USB_LME2510=m
-CONFIG_DVB_USB_MXL111SF=m
-CONFIG_DVB_USB_RTL28XXU=m
-CONFIG_DVB_USB_DVBSKY=m
-CONFIG_DVB_USB_ZD1301=m
-CONFIG_DVB_TTUSB_BUDGET=m
-CONFIG_DVB_TTUSB_DEC=m
-CONFIG_SMS_USB_DRV=m
-CONFIG_DVB_B2C2_FLEXCOP_USB=m
-# CONFIG_DVB_B2C2_FLEXCOP_USB_DEBUG is not set
-CONFIG_DVB_AS102=m
-
-#
-# Webcam, TV (analog/digital) USB devices
-#
-CONFIG_VIDEO_EM28XX=m
-CONFIG_VIDEO_EM28XX_V4L2=m
-CONFIG_VIDEO_EM28XX_ALSA=m
-CONFIG_VIDEO_EM28XX_DVB=m
-CONFIG_VIDEO_EM28XX_RC=m
-
-#
-# Software defined radio USB devices
-#
-CONFIG_USB_AIRSPY=m
-CONFIG_USB_HACKRF=m
-CONFIG_USB_MSI2500=m
-
-#
-# USB HDMI CEC adapters
-#
-CONFIG_USB_PULSE8_CEC=m
-CONFIG_USB_RAINSHADOW_CEC=m
-CONFIG_MEDIA_PCI_SUPPORT=y
-
-#
-# Media capture support
-#
-CONFIG_VIDEO_MEYE=m
-CONFIG_VIDEO_SOLO6X10=m
-CONFIG_VIDEO_TW5864=m
-CONFIG_VIDEO_TW68=m
-CONFIG_VIDEO_TW686X=m
-
-#
-# Media capture/analog TV support
-#
-CONFIG_VIDEO_IVTV=m
-# CONFIG_VIDEO_IVTV_DEPRECATED_IOCTLS is not set
-CONFIG_VIDEO_IVTV_ALSA=m
-CONFIG_VIDEO_FB_IVTV=m
-# CONFIG_VIDEO_FB_IVTV_FORCE_PAT is not set
-CONFIG_VIDEO_HEXIUM_GEMINI=m
-CONFIG_VIDEO_HEXIUM_ORION=m
-CONFIG_VIDEO_MXB=m
-CONFIG_VIDEO_DT3155=m
-
-#
-# Media capture/analog/hybrid TV support
-#
-CONFIG_VIDEO_CX18=m
-CONFIG_VIDEO_CX18_ALSA=m
-CONFIG_VIDEO_CX23885=m
-CONFIG_MEDIA_ALTERA_CI=m
-CONFIG_VIDEO_CX25821=m
-CONFIG_VIDEO_CX25821_ALSA=m
-CONFIG_VIDEO_CX88=m
-CONFIG_VIDEO_CX88_ALSA=m
-CONFIG_VIDEO_CX88_BLACKBIRD=m
-CONFIG_VIDEO_CX88_DVB=m
-CONFIG_VIDEO_CX88_ENABLE_VP3054=y
-CONFIG_VIDEO_CX88_VP3054=m
-CONFIG_VIDEO_CX88_MPEG=m
-CONFIG_VIDEO_BT848=m
-CONFIG_DVB_BT8XX=m
-CONFIG_VIDEO_SAA7134=m
-CONFIG_VIDEO_SAA7134_ALSA=m
-CONFIG_VIDEO_SAA7134_RC=y
-CONFIG_VIDEO_SAA7134_DVB=m
-CONFIG_VIDEO_SAA7134_GO7007=m
-CONFIG_VIDEO_SAA7164=m
-
-#
-# Media digital TV PCI Adapters
-#
-CONFIG_DVB_AV7110_IR=y
-CONFIG_DVB_AV7110=m
-CONFIG_DVB_AV7110_OSD=y
-CONFIG_DVB_BUDGET_CORE=m
-CONFIG_DVB_BUDGET=m
-CONFIG_DVB_BUDGET_CI=m
-CONFIG_DVB_BUDGET_AV=m
-CONFIG_DVB_BUDGET_PATCH=m
-CONFIG_DVB_B2C2_FLEXCOP_PCI=m
-# CONFIG_DVB_B2C2_FLEXCOP_PCI_DEBUG is not set
-CONFIG_DVB_PLUTO2=m
-CONFIG_DVB_DM1105=m
-CONFIG_DVB_PT1=m
-CONFIG_DVB_PT3=m
-CONFIG_MANTIS_CORE=m
-CONFIG_DVB_MANTIS=m
-CONFIG_DVB_HOPPER=m
-CONFIG_DVB_NGENE=m
-CONFIG_DVB_DDBRIDGE=m
-# CONFIG_DVB_DDBRIDGE_MSIENABLE is not set
-CONFIG_DVB_SMIPCIE=m
-CONFIG_DVB_NETUP_UNIDVB=m
-CONFIG_VIDEO_IPU3_CIO2=m
-CONFIG_V4L_PLATFORM_DRIVERS=y
-CONFIG_VIDEO_CAFE_CCIC=m
-CONFIG_VIDEO_CADENCE=y
-CONFIG_VIDEO_CADENCE_CSI2RX=m
-CONFIG_VIDEO_CADENCE_CSI2TX=m
-CONFIG_VIDEO_ASPEED=m
-CONFIG_VIDEO_MUX=m
-CONFIG_VIDEO_XILINX=m
-CONFIG_VIDEO_XILINX_TPG=m
-CONFIG_VIDEO_XILINX_VTC=m
-CONFIG_V4L_MEM2MEM_DRIVERS=y
-CONFIG_VIDEO_MEM2MEM_DEINTERLACE=m
-CONFIG_VIDEO_SH_VEU=m
-CONFIG_V4L_TEST_DRIVERS=y
-CONFIG_VIDEO_VIMC=m
-CONFIG_VIDEO_VIVID=m
-CONFIG_VIDEO_VIVID_CEC=y
-CONFIG_VIDEO_VIVID_MAX_DEVS=64
-CONFIG_VIDEO_VIM2M=m
-CONFIG_VIDEO_VICODEC=m
-CONFIG_DVB_PLATFORM_DRIVERS=y
-CONFIG_CEC_PLATFORM_DRIVERS=y
-CONFIG_VIDEO_CROS_EC_CEC=m
-CONFIG_CEC_GPIO=m
-CONFIG_VIDEO_SECO_CEC=m
-CONFIG_VIDEO_SECO_RC=y
-CONFIG_SDR_PLATFORM_DRIVERS=y
-
-#
-# Supported MMC/SDIO adapters
-#
-CONFIG_SMS_SDIO_DRV=m
-CONFIG_RADIO_ADAPTERS=y
-CONFIG_RADIO_TEA575X=m
-CONFIG_RADIO_SI470X=m
-CONFIG_USB_SI470X=m
-CONFIG_I2C_SI470X=m
-CONFIG_RADIO_SI4713=m
-CONFIG_USB_SI4713=m
-CONFIG_PLATFORM_SI4713=m
-CONFIG_I2C_SI4713=m
-CONFIG_RADIO_SI476X=m
-CONFIG_USB_MR800=m
-CONFIG_USB_DSBR=m
-CONFIG_RADIO_MAXIRADIO=m
-CONFIG_RADIO_SHARK=m
-CONFIG_RADIO_SHARK2=m
-CONFIG_USB_KEENE=m
-CONFIG_USB_RAREMONO=m
-CONFIG_USB_MA901=m
-CONFIG_RADIO_TEA5764=m
-CONFIG_RADIO_SAA7706H=m
-CONFIG_RADIO_TEF6862=m
-CONFIG_RADIO_WL1273=m
-
-#
-# Texas Instruments WL128x FM driver (ST based)
-#
-CONFIG_RADIO_WL128X=m
-# end of Texas Instruments WL128x FM driver (ST based)
-
-#
-# Supported FireWire (IEEE 1394) Adapters
-#
-CONFIG_DVB_FIREDTV=m
-CONFIG_DVB_FIREDTV_INPUT=y
-CONFIG_MEDIA_COMMON_OPTIONS=y
-
-#
-# common driver options
-#
-CONFIG_VIDEO_CX2341X=m
-CONFIG_VIDEO_TVEEPROM=m
-CONFIG_CYPRESS_FIRMWARE=m
-CONFIG_VIDEOBUF2_CORE=m
-CONFIG_VIDEOBUF2_V4L2=m
-CONFIG_VIDEOBUF2_MEMOPS=m
-CONFIG_VIDEOBUF2_DMA_CONTIG=m
-CONFIG_VIDEOBUF2_VMALLOC=m
-CONFIG_VIDEOBUF2_DMA_SG=m
-CONFIG_VIDEOBUF2_DVB=m
-CONFIG_DVB_B2C2_FLEXCOP=m
-CONFIG_VIDEO_SAA7146=m
-CONFIG_VIDEO_SAA7146_VV=m
-CONFIG_SMS_SIANO_MDTV=m
-CONFIG_SMS_SIANO_RC=y
-# CONFIG_SMS_SIANO_DEBUGFS is not set
-CONFIG_VIDEO_V4L2_TPG=m
-
-#
-# Media ancillary drivers (tuners, sensors, i2c, spi, frontends)
-#
-CONFIG_MEDIA_SUBDRV_AUTOSELECT=y
-CONFIG_MEDIA_ATTACH=y
-CONFIG_VIDEO_IR_I2C=m
-
-#
-# I2C Encoders, decoders, sensors and other helper chips
-#
-
-#
-# Audio decoders, processors and mixers
-#
-CONFIG_VIDEO_TVAUDIO=m
-CONFIG_VIDEO_TDA7432=m
-CONFIG_VIDEO_TDA9840=m
-CONFIG_VIDEO_TDA1997X=m
-CONFIG_VIDEO_TEA6415C=m
-CONFIG_VIDEO_TEA6420=m
-CONFIG_VIDEO_MSP3400=m
-CONFIG_VIDEO_CS3308=m
-CONFIG_VIDEO_CS5345=m
-CONFIG_VIDEO_CS53L32A=m
-CONFIG_VIDEO_TLV320AIC23B=m
-CONFIG_VIDEO_UDA1342=m
-CONFIG_VIDEO_WM8775=m
-CONFIG_VIDEO_WM8739=m
-CONFIG_VIDEO_VP27SMPX=m
-CONFIG_VIDEO_SONY_BTF_MPX=m
-
-#
-# RDS decoders
-#
-CONFIG_VIDEO_SAA6588=m
-
-#
-# Video decoders
-#
-CONFIG_VIDEO_ADV7180=m
-CONFIG_VIDEO_ADV7183=m
-CONFIG_VIDEO_ADV748X=m
-CONFIG_VIDEO_ADV7604=m
-CONFIG_VIDEO_ADV7604_CEC=y
-CONFIG_VIDEO_ADV7842=m
-CONFIG_VIDEO_ADV7842_CEC=y
-CONFIG_VIDEO_BT819=m
-CONFIG_VIDEO_BT856=m
-CONFIG_VIDEO_BT866=m
-CONFIG_VIDEO_KS0127=m
-CONFIG_VIDEO_ML86V7667=m
-CONFIG_VIDEO_SAA7110=m
-CONFIG_VIDEO_SAA711X=m
-CONFIG_VIDEO_TC358743=m
-CONFIG_VIDEO_TC358743_CEC=y
-CONFIG_VIDEO_TVP514X=m
-CONFIG_VIDEO_TVP5150=m
-CONFIG_VIDEO_TVP7002=m
-CONFIG_VIDEO_TW2804=m
-CONFIG_VIDEO_TW9903=m
-CONFIG_VIDEO_TW9906=m
-CONFIG_VIDEO_TW9910=m
-CONFIG_VIDEO_VPX3220=m
-
-#
-# Video and audio decoders
-#
-CONFIG_VIDEO_SAA717X=m
-CONFIG_VIDEO_CX25840=m
-
-#
-# Video encoders
-#
-CONFIG_VIDEO_SAA7127=m
-CONFIG_VIDEO_SAA7185=m
-CONFIG_VIDEO_ADV7170=m
-CONFIG_VIDEO_ADV7175=m
-CONFIG_VIDEO_ADV7343=m
-CONFIG_VIDEO_ADV7393=m
-CONFIG_VIDEO_AD9389B=m
-CONFIG_VIDEO_AK881X=m
-CONFIG_VIDEO_THS8200=m
-
-#
-# Camera sensor devices
-#
-CONFIG_VIDEO_APTINA_PLL=m
-CONFIG_VIDEO_SMIAPP_PLL=m
-CONFIG_VIDEO_HI556=m
-CONFIG_VIDEO_IMX214=m
-CONFIG_VIDEO_IMX219=m
-CONFIG_VIDEO_IMX258=m
-CONFIG_VIDEO_IMX274=m
-CONFIG_VIDEO_IMX290=m
-CONFIG_VIDEO_IMX319=m
-CONFIG_VIDEO_IMX355=m
-CONFIG_VIDEO_OV2640=m
-CONFIG_VIDEO_OV2659=m
-CONFIG_VIDEO_OV2680=m
-CONFIG_VIDEO_OV2685=m
-CONFIG_VIDEO_OV5640=m
-CONFIG_VIDEO_OV5645=m
-CONFIG_VIDEO_OV5647=m
-CONFIG_VIDEO_OV6650=m
-CONFIG_VIDEO_OV5670=m
-CONFIG_VIDEO_OV5675=m
-CONFIG_VIDEO_OV5695=m
-CONFIG_VIDEO_OV7251=m
-CONFIG_VIDEO_OV772X=m
-CONFIG_VIDEO_OV7640=m
-CONFIG_VIDEO_OV7670=m
-CONFIG_VIDEO_OV7740=m
-CONFIG_VIDEO_OV8856=m
-CONFIG_VIDEO_OV9640=m
-CONFIG_VIDEO_OV9650=m
-CONFIG_VIDEO_OV13858=m
-CONFIG_VIDEO_VS6624=m
-CONFIG_VIDEO_MT9M001=m
-CONFIG_VIDEO_MT9M032=m
-CONFIG_VIDEO_MT9M111=m
-CONFIG_VIDEO_MT9P031=m
-CONFIG_VIDEO_MT9T001=m
-CONFIG_VIDEO_MT9T112=m
-CONFIG_VIDEO_MT9V011=m
-CONFIG_VIDEO_MT9V032=m
-CONFIG_VIDEO_MT9V111=m
-CONFIG_VIDEO_SR030PC30=m
-CONFIG_VIDEO_NOON010PC30=m
-CONFIG_VIDEO_M5MOLS=m
-CONFIG_VIDEO_RJ54N1=m
-CONFIG_VIDEO_S5K6AA=m
-CONFIG_VIDEO_S5K6A3=m
-CONFIG_VIDEO_S5K4ECGX=m
-CONFIG_VIDEO_S5K5BAF=m
-CONFIG_VIDEO_SMIAPP=m
-CONFIG_VIDEO_ET8EK8=m
-CONFIG_VIDEO_S5C73M3=m
-
-#
-# Lens drivers
-#
-CONFIG_VIDEO_AD5820=m
-CONFIG_VIDEO_AK7375=m
-CONFIG_VIDEO_DW9714=m
-CONFIG_VIDEO_DW9807_VCM=m
-
-#
-# Flash devices
-#
-CONFIG_VIDEO_ADP1653=m
-CONFIG_VIDEO_LM3560=m
-CONFIG_VIDEO_LM3646=m
-
-#
-# Video improvement chips
-#
-CONFIG_VIDEO_UPD64031A=m
-CONFIG_VIDEO_UPD64083=m
-
-#
-# Audio/Video compression chips
-#
-CONFIG_VIDEO_SAA6752HS=m
-
-#
-# SDR tuner chips
-#
-CONFIG_SDR_MAX2175=m
-
-#
-# Miscellaneous helper chips
-#
-CONFIG_VIDEO_THS7303=m
-CONFIG_VIDEO_M52790=m
-CONFIG_VIDEO_I2C=m
-CONFIG_VIDEO_ST_MIPID02=m
-# end of I2C Encoders, decoders, sensors and other helper chips
-
-#
-# SPI helper chips
-#
-CONFIG_VIDEO_GS1662=m
-# end of SPI helper chips
-
-#
-# Media SPI Adapters
-#
-CONFIG_CXD2880_SPI_DRV=m
-# end of Media SPI Adapters
-
-CONFIG_MEDIA_TUNER=m
-
-#
-# Customize TV tuners
-#
-CONFIG_MEDIA_TUNER_SIMPLE=m
-CONFIG_MEDIA_TUNER_TDA18250=m
-CONFIG_MEDIA_TUNER_TDA8290=m
-CONFIG_MEDIA_TUNER_TDA827X=m
-CONFIG_MEDIA_TUNER_TDA18271=m
-CONFIG_MEDIA_TUNER_TDA9887=m
-CONFIG_MEDIA_TUNER_TEA5761=m
-CONFIG_MEDIA_TUNER_TEA5767=m
-CONFIG_MEDIA_TUNER_MSI001=m
-CONFIG_MEDIA_TUNER_MT20XX=m
-CONFIG_MEDIA_TUNER_MT2060=m
-CONFIG_MEDIA_TUNER_MT2063=m
-CONFIG_MEDIA_TUNER_MT2266=m
-CONFIG_MEDIA_TUNER_MT2131=m
-CONFIG_MEDIA_TUNER_QT1010=m
-CONFIG_MEDIA_TUNER_XC2028=m
-CONFIG_MEDIA_TUNER_XC5000=m
-CONFIG_MEDIA_TUNER_XC4000=m
-CONFIG_MEDIA_TUNER_MXL5005S=m
-CONFIG_MEDIA_TUNER_MXL5007T=m
-CONFIG_MEDIA_TUNER_MC44S803=m
-CONFIG_MEDIA_TUNER_MAX2165=m
-CONFIG_MEDIA_TUNER_TDA18218=m
-CONFIG_MEDIA_TUNER_FC0011=m
-CONFIG_MEDIA_TUNER_FC0012=m
-CONFIG_MEDIA_TUNER_FC0013=m
-CONFIG_MEDIA_TUNER_TDA18212=m
-CONFIG_MEDIA_TUNER_E4000=m
-CONFIG_MEDIA_TUNER_FC2580=m
-CONFIG_MEDIA_TUNER_M88RS6000T=m
-CONFIG_MEDIA_TUNER_TUA9001=m
-CONFIG_MEDIA_TUNER_SI2157=m
-CONFIG_MEDIA_TUNER_IT913X=m
-CONFIG_MEDIA_TUNER_R820T=m
-CONFIG_MEDIA_TUNER_MXL301RF=m
-CONFIG_MEDIA_TUNER_QM1D1C0042=m
-CONFIG_MEDIA_TUNER_QM1D1B0004=m
-# end of Customize TV tuners
-
-#
-# Customise DVB Frontends
-#
-
-#
-# Multistandard (satellite) frontends
-#
-CONFIG_DVB_STB0899=m
-CONFIG_DVB_STB6100=m
-CONFIG_DVB_STV090x=m
-CONFIG_DVB_STV0910=m
-CONFIG_DVB_STV6110x=m
-CONFIG_DVB_STV6111=m
-CONFIG_DVB_MXL5XX=m
-CONFIG_DVB_M88DS3103=m
-
-#
-# Multistandard (cable + terrestrial) frontends
-#
-CONFIG_DVB_DRXK=m
-CONFIG_DVB_TDA18271C2DD=m
-CONFIG_DVB_SI2165=m
-CONFIG_DVB_MN88472=m
-CONFIG_DVB_MN88473=m
-
-#
-# DVB-S (satellite) frontends
-#
-CONFIG_DVB_CX24110=m
-CONFIG_DVB_CX24123=m
-CONFIG_DVB_MT312=m
-CONFIG_DVB_ZL10036=m
-CONFIG_DVB_ZL10039=m
-CONFIG_DVB_S5H1420=m
-CONFIG_DVB_STV0288=m
-CONFIG_DVB_STB6000=m
-CONFIG_DVB_STV0299=m
-CONFIG_DVB_STV6110=m
-CONFIG_DVB_STV0900=m
-CONFIG_DVB_TDA8083=m
-CONFIG_DVB_TDA10086=m
-CONFIG_DVB_TDA8261=m
-CONFIG_DVB_VES1X93=m
-CONFIG_DVB_TUNER_ITD1000=m
-CONFIG_DVB_TUNER_CX24113=m
-CONFIG_DVB_TDA826X=m
-CONFIG_DVB_TUA6100=m
-CONFIG_DVB_CX24116=m
-CONFIG_DVB_CX24117=m
-CONFIG_DVB_CX24120=m
-CONFIG_DVB_SI21XX=m
-CONFIG_DVB_TS2020=m
-CONFIG_DVB_DS3000=m
-CONFIG_DVB_MB86A16=m
-CONFIG_DVB_TDA10071=m
-
-#
-# DVB-T (terrestrial) frontends
-#
-CONFIG_DVB_SP8870=m
-CONFIG_DVB_SP887X=m
-CONFIG_DVB_CX22700=m
-CONFIG_DVB_CX22702=m
-CONFIG_DVB_S5H1432=m
-CONFIG_DVB_DRXD=m
-CONFIG_DVB_L64781=m
-CONFIG_DVB_TDA1004X=m
-CONFIG_DVB_NXT6000=m
-CONFIG_DVB_MT352=m
-CONFIG_DVB_ZL10353=m
-CONFIG_DVB_DIB3000MB=m
-CONFIG_DVB_DIB3000MC=m
-CONFIG_DVB_DIB7000M=m
-CONFIG_DVB_DIB7000P=m
-CONFIG_DVB_DIB9000=m
-CONFIG_DVB_TDA10048=m
-CONFIG_DVB_AF9013=m
-CONFIG_DVB_EC100=m
-CONFIG_DVB_STV0367=m
-CONFIG_DVB_CXD2820R=m
-CONFIG_DVB_CXD2841ER=m
-CONFIG_DVB_RTL2830=m
-CONFIG_DVB_RTL2832=m
-CONFIG_DVB_RTL2832_SDR=m
-CONFIG_DVB_SI2168=m
-CONFIG_DVB_AS102_FE=m
-CONFIG_DVB_ZD1301_DEMOD=m
-CONFIG_DVB_GP8PSK_FE=m
-CONFIG_DVB_CXD2880=m
-
-#
-# DVB-C (cable) frontends
-#
-CONFIG_DVB_VES1820=m
-CONFIG_DVB_TDA10021=m
-CONFIG_DVB_TDA10023=m
-CONFIG_DVB_STV0297=m
-
-#
-# ATSC (North American/Korean Terrestrial/Cable DTV) frontends
-#
-CONFIG_DVB_NXT200X=m
-CONFIG_DVB_OR51211=m
-CONFIG_DVB_OR51132=m
-CONFIG_DVB_BCM3510=m
-CONFIG_DVB_LGDT330X=m
-CONFIG_DVB_LGDT3305=m
-CONFIG_DVB_LGDT3306A=m
-CONFIG_DVB_LG2160=m
-CONFIG_DVB_S5H1409=m
-CONFIG_DVB_AU8522=m
-CONFIG_DVB_AU8522_DTV=m
-CONFIG_DVB_AU8522_V4L=m
-CONFIG_DVB_S5H1411=m
-
-#
-# ISDB-T (terrestrial) frontends
-#
-CONFIG_DVB_S921=m
-CONFIG_DVB_DIB8000=m
-CONFIG_DVB_MB86A20S=m
-
-#
-# ISDB-S (satellite) & ISDB-T (terrestrial) frontends
-#
-CONFIG_DVB_TC90522=m
-CONFIG_DVB_MN88443X=m
-
-#
-# Digital terrestrial only tuners/PLL
-#
-CONFIG_DVB_PLL=m
-CONFIG_DVB_TUNER_DIB0070=m
-CONFIG_DVB_TUNER_DIB0090=m
-
-#
-# SEC control devices for DVB-S
-#
-CONFIG_DVB_DRX39XYJ=m
-CONFIG_DVB_LNBH25=m
-CONFIG_DVB_LNBH29=m
-CONFIG_DVB_LNBP21=m
-CONFIG_DVB_LNBP22=m
-CONFIG_DVB_ISL6405=m
-CONFIG_DVB_ISL6421=m
-CONFIG_DVB_ISL6423=m
-CONFIG_DVB_A8293=m
-CONFIG_DVB_LGS8GL5=m
-CONFIG_DVB_LGS8GXX=m
-CONFIG_DVB_ATBM8830=m
-CONFIG_DVB_TDA665x=m
-CONFIG_DVB_IX2505V=m
-CONFIG_DVB_M88RS2000=m
-CONFIG_DVB_AF9033=m
-CONFIG_DVB_HORUS3A=m
-CONFIG_DVB_ASCOT2E=m
-CONFIG_DVB_HELENE=m
-
-#
-# Common Interface (EN50221) controller drivers
-#
-CONFIG_DVB_CXD2099=m
-CONFIG_DVB_SP2=m
-
-#
-# Tools to develop new frontends
-#
-CONFIG_DVB_DUMMY_FE=m
-# end of Customise DVB Frontends
-
-#
-# Graphics support
-#
-CONFIG_AGP=m
-CONFIG_AGP_AMD64=m
-CONFIG_AGP_INTEL=m
-CONFIG_AGP_SIS=m
-CONFIG_AGP_VIA=m
-CONFIG_INTEL_GTT=m
-CONFIG_VGA_ARB=y
-CONFIG_VGA_ARB_MAX_GPUS=10
-CONFIG_VGA_SWITCHEROO=y
-CONFIG_DRM=m
-CONFIG_DRM_MIPI_DBI=m
-CONFIG_DRM_MIPI_DSI=y
-CONFIG_DRM_DP_AUX_CHARDEV=y
-# CONFIG_DRM_DEBUG_SELFTEST is not set
-CONFIG_DRM_KMS_HELPER=m
-CONFIG_DRM_KMS_FB_HELPER=y
-# CONFIG_DRM_DEBUG_DP_MST_TOPOLOGY_REFS is not set
-CONFIG_DRM_FBDEV_EMULATION=y
-CONFIG_DRM_FBDEV_OVERALLOC=100
-# CONFIG_DRM_FBDEV_LEAK_PHYS_SMEM is not set
-CONFIG_DRM_LOAD_EDID_FIRMWARE=y
-CONFIG_DRM_DP_CEC=y
-CONFIG_DRM_TTM=m
-CONFIG_DRM_TTM_DMA_PAGE_POOL=y
-CONFIG_DRM_VRAM_HELPER=m
-CONFIG_DRM_TTM_HELPER=m
-CONFIG_DRM_GEM_CMA_HELPER=y
-CONFIG_DRM_KMS_CMA_HELPER=y
-CONFIG_DRM_GEM_SHMEM_HELPER=y
-CONFIG_DRM_SCHED=m
-
-#
-# I2C encoder or helper chips
-#
-CONFIG_DRM_I2C_CH7006=m
-CONFIG_DRM_I2C_SIL164=m
-CONFIG_DRM_I2C_NXP_TDA998X=m
-CONFIG_DRM_I2C_NXP_TDA9950=m
-# end of I2C encoder or helper chips
-
-#
-# ARM devices
-#
-CONFIG_DRM_KOMEDA=m
-# end of ARM devices
-
-CONFIG_DRM_RADEON=m
-CONFIG_DRM_RADEON_USERPTR=y
-CONFIG_DRM_AMDGPU=m
-CONFIG_DRM_AMDGPU_SI=y
-CONFIG_DRM_AMDGPU_CIK=y
-CONFIG_DRM_AMDGPU_USERPTR=y
-# CONFIG_DRM_AMDGPU_GART_DEBUGFS is not set
-
-#
-# ACP (Audio CoProcessor) Configuration
-#
-CONFIG_DRM_AMD_ACP=y
-# end of ACP (Audio CoProcessor) Configuration
-
-#
-# Display Engine Configuration
-#
-CONFIG_DRM_AMD_DC=y
-CONFIG_DRM_AMD_DC_DCN=y
-CONFIG_DRM_AMD_DC_HDCP=y
-# CONFIG_DEBUG_KERNEL_DC is not set
-# end of Display Engine Configuration
-
-CONFIG_HSA_AMD=y
-CONFIG_DRM_NOUVEAU=m
-# CONFIG_NOUVEAU_LEGACY_CTX_SUPPORT is not set
-CONFIG_NOUVEAU_DEBUG=5
-CONFIG_NOUVEAU_DEBUG_DEFAULT=3
-# CONFIG_NOUVEAU_DEBUG_MMU is not set
-CONFIG_DRM_NOUVEAU_BACKLIGHT=y
-CONFIG_DRM_NOUVEAU_SVM=y
-CONFIG_DRM_I915=m
-CONFIG_DRM_I915_FORCE_PROBE="*"
-CONFIG_DRM_I915_CAPTURE_ERROR=y
-CONFIG_DRM_I915_COMPRESS_ERROR=y
-CONFIG_DRM_I915_USERPTR=y
-CONFIG_DRM_I915_GVT=y
-CONFIG_DRM_I915_GVT_KVMGT=m
-
-#
-# drm/i915 Debugging
-#
-# CONFIG_DRM_I915_WERROR is not set
-# CONFIG_DRM_I915_DEBUG is not set
-# CONFIG_DRM_I915_DEBUG_MMIO is not set
-# CONFIG_DRM_I915_SW_FENCE_DEBUG_OBJECTS is not set
-# CONFIG_DRM_I915_SW_FENCE_CHECK_DAG is not set
-# CONFIG_DRM_I915_DEBUG_GUC is not set
-# CONFIG_DRM_I915_SELFTEST is not set
-# CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS is not set
-# CONFIG_DRM_I915_DEBUG_VBLANK_EVADE is not set
-# CONFIG_DRM_I915_DEBUG_RUNTIME_PM is not set
-# end of drm/i915 Debugging
-
-#
-# drm/i915 Profile Guided Optimisation
-#
-CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND=250
-CONFIG_DRM_I915_HEARTBEAT_INTERVAL=2500
-CONFIG_DRM_I915_PREEMPT_TIMEOUT=640
-CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT=8000
-CONFIG_DRM_I915_STOP_TIMEOUT=100
-CONFIG_DRM_I915_TIMESLICE_DURATION=1
-# end of drm/i915 Profile Guided Optimisation
-
-CONFIG_DRM_VGEM=m
-CONFIG_DRM_VKMS=m
-CONFIG_DRM_VMWGFX=m
-CONFIG_DRM_VMWGFX_FBCON=y
-CONFIG_DRM_GMA500=m
-CONFIG_DRM_GMA600=y
-CONFIG_DRM_GMA3600=y
-CONFIG_DRM_UDL=m
-CONFIG_DRM_AST=m
-CONFIG_DRM_MGAG200=m
-CONFIG_DRM_CIRRUS_QEMU=m
-CONFIG_DRM_RCAR_DW_HDMI=m
-CONFIG_DRM_RCAR_LVDS=m
-CONFIG_DRM_QXL=m
-CONFIG_DRM_BOCHS=m
-CONFIG_DRM_VIRTIO_GPU=m
-CONFIG_DRM_PANEL=y
-
-#
-# Display Panels
-#
-CONFIG_DRM_PANEL_ARM_VERSATILE=m
-CONFIG_DRM_PANEL_BOE_HIMAX8279D=m
-CONFIG_DRM_PANEL_BOE_TV101WUM_NL6=m
-CONFIG_DRM_PANEL_LVDS=m
-CONFIG_DRM_PANEL_SIMPLE=m
-CONFIG_DRM_PANEL_ELIDA_KD35T133=m
-CONFIG_DRM_PANEL_FEIXIN_K101_IM2BA02=m
-CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D=m
-CONFIG_DRM_PANEL_ILITEK_IL9322=m
-CONFIG_DRM_PANEL_ILITEK_ILI9881C=m
-CONFIG_DRM_PANEL_INNOLUX_P079ZCA=m
-CONFIG_DRM_PANEL_JDI_LT070ME05000=m
-CONFIG_DRM_PANEL_KINGDISPLAY_KD097D04=m
-CONFIG_DRM_PANEL_LEADTEK_LTK500HD1829=m
-CONFIG_DRM_PANEL_SAMSUNG_LD9040=m
-CONFIG_DRM_PANEL_LG_LB035Q02=m
-CONFIG_DRM_PANEL_LG_LG4573=m
-CONFIG_DRM_PANEL_NEC_NL8048HL11=m
-CONFIG_DRM_PANEL_NOVATEK_NT35510=m
-CONFIG_DRM_PANEL_NOVATEK_NT39016=m
-CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO=m
-CONFIG_DRM_PANEL_ORISETECH_OTM8009A=m
-CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS=m
-CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00=m
-CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN=m
-CONFIG_DRM_PANEL_RAYDIUM_RM67191=m
-CONFIG_DRM_PANEL_RAYDIUM_RM68200=m
-CONFIG_DRM_PANEL_ROCKTECH_JH057N00900=m
-CONFIG_DRM_PANEL_RONBO_RB070D30=m
-CONFIG_DRM_PANEL_SAMSUNG_S6D16D0=m
-CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2=m
-CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03=m
-CONFIG_DRM_PANEL_SAMSUNG_S6E63M0=m
-CONFIG_DRM_PANEL_SAMSUNG_S6E88A0_AMS452EF01=m
-CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0=m
-CONFIG_DRM_PANEL_SEIKO_43WVF1G=m
-CONFIG_DRM_PANEL_SHARP_LQ101R1SX01=m
-CONFIG_DRM_PANEL_SHARP_LS037V7DW01=m
-CONFIG_DRM_PANEL_SHARP_LS043T1LE01=m
-CONFIG_DRM_PANEL_SITRONIX_ST7701=m
-CONFIG_DRM_PANEL_SITRONIX_ST7789V=m
-CONFIG_DRM_PANEL_SONY_ACX424AKP=m
-CONFIG_DRM_PANEL_SONY_ACX565AKM=m
-CONFIG_DRM_PANEL_TPO_TD028TTEC1=m
-CONFIG_DRM_PANEL_TPO_TD043MTEA1=m
-CONFIG_DRM_PANEL_TPO_TPG110=m
-CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA=m
-CONFIG_DRM_PANEL_XINPENG_XPP055C272=m
-# end of Display Panels
-
-CONFIG_DRM_BRIDGE=y
-CONFIG_DRM_PANEL_BRIDGE=y
-
-#
-# Display Interface Bridges
-#
-CONFIG_DRM_CDNS_DSI=m
-CONFIG_DRM_DISPLAY_CONNECTOR=m
-CONFIG_DRM_LVDS_CODEC=m
-CONFIG_DRM_MEGACHIPS_STDPXXXX_GE_B850V3_FW=m
-CONFIG_DRM_NXP_PTN3460=m
-CONFIG_DRM_PARADE_PS8622=m
-CONFIG_DRM_PARADE_PS8640=m
-CONFIG_DRM_SIL_SII8620=m
-CONFIG_DRM_SII902X=m
-CONFIG_DRM_SII9234=m
-CONFIG_DRM_SIMPLE_BRIDGE=m
-CONFIG_DRM_THINE_THC63LVD1024=m
-CONFIG_DRM_TOSHIBA_TC358764=m
-CONFIG_DRM_TOSHIBA_TC358767=m
-CONFIG_DRM_TOSHIBA_TC358768=m
-CONFIG_DRM_TI_TFP410=m
-CONFIG_DRM_TI_SN65DSI86=m
-CONFIG_DRM_TI_TPD12S015=m
-CONFIG_DRM_ANALOGIX_ANX6345=m
-CONFIG_DRM_ANALOGIX_ANX78XX=m
-CONFIG_DRM_ANALOGIX_DP=m
-CONFIG_DRM_I2C_ADV7511=m
-CONFIG_DRM_I2C_ADV7511_AUDIO=y
-CONFIG_DRM_I2C_ADV7511_CEC=y
-CONFIG_DRM_DW_HDMI=m
-CONFIG_DRM_DW_HDMI_AHB_AUDIO=m
-CONFIG_DRM_DW_HDMI_I2S_AUDIO=m
-CONFIG_DRM_DW_HDMI_CEC=m
-# end of Display Interface Bridges
-
-# CONFIG_DRM_ETNAVIV is not set
-CONFIG_DRM_ARCPGU=m
-CONFIG_DRM_MXS=y
-CONFIG_DRM_MXSFB=m
-CONFIG_DRM_GM12U320=m
-CONFIG_TINYDRM_HX8357D=m
-CONFIG_TINYDRM_ILI9225=m
-CONFIG_TINYDRM_ILI9341=m
-CONFIG_TINYDRM_ILI9486=m
-CONFIG_TINYDRM_MI0283QT=m
-CONFIG_TINYDRM_REPAPER=m
-CONFIG_TINYDRM_ST7586=m
-CONFIG_TINYDRM_ST7735R=m
-CONFIG_DRM_XEN=y
-CONFIG_DRM_XEN_FRONTEND=m
-CONFIG_DRM_VBOXVIDEO=m
-# CONFIG_DRM_LEGACY is not set
-CONFIG_DRM_PANEL_ORIENTATION_QUIRKS=y
-
-#
-# Frame buffer Devices
-#
-CONFIG_FB_CMDLINE=y
-CONFIG_FB_NOTIFY=y
-CONFIG_FB=y
-CONFIG_FIRMWARE_EDID=y
-CONFIG_FB_BOOT_VESA_SUPPORT=y
-CONFIG_FB_CFB_FILLRECT=y
-CONFIG_FB_CFB_COPYAREA=y
-CONFIG_FB_CFB_IMAGEBLIT=y
-CONFIG_FB_SYS_FILLRECT=m
-CONFIG_FB_SYS_COPYAREA=m
-CONFIG_FB_SYS_IMAGEBLIT=m
-# CONFIG_FB_FOREIGN_ENDIAN is not set
-CONFIG_FB_SYS_FOPS=m
-CONFIG_FB_DEFERRED_IO=y
-CONFIG_FB_BACKLIGHT=m
-CONFIG_FB_MODE_HELPERS=y
-CONFIG_FB_TILEBLITTING=y
-
-#
-# Frame buffer hardware drivers
-#
-# CONFIG_FB_CIRRUS is not set
-# CONFIG_FB_PM2 is not set
-# CONFIG_FB_CYBER2000 is not set
-# CONFIG_FB_ARC is not set
-# CONFIG_FB_ASILIANT is not set
-# CONFIG_FB_IMSTT is not set
-# CONFIG_FB_VGA16 is not set
-# CONFIG_FB_UVESA is not set
-CONFIG_FB_VESA=y
-CONFIG_FB_EFI=y
-# CONFIG_FB_N411 is not set
-# CONFIG_FB_HGA is not set
-# CONFIG_FB_OPENCORES is not set
-# CONFIG_FB_S1D13XXX is not set
-# CONFIG_FB_NVIDIA is not set
-# CONFIG_FB_RIVA is not set
-# CONFIG_FB_I740 is not set
-# CONFIG_FB_LE80578 is not set
-# CONFIG_FB_INTEL is not set
-# CONFIG_FB_MATROX is not set
-# CONFIG_FB_RADEON is not set
-# CONFIG_FB_ATY128 is not set
-# CONFIG_FB_ATY is not set
-# CONFIG_FB_S3 is not set
-# CONFIG_FB_SAVAGE is not set
-# CONFIG_FB_SIS is not set
-# CONFIG_FB_VIA is not set
-# CONFIG_FB_NEOMAGIC is not set
-# CONFIG_FB_KYRO is not set
-# CONFIG_FB_3DFX is not set
-# CONFIG_FB_VOODOO1 is not set
-# CONFIG_FB_VT8623 is not set
-# CONFIG_FB_TRIDENT is not set
-# CONFIG_FB_ARK is not set
-# CONFIG_FB_PM3 is not set
-# CONFIG_FB_CARMINE is not set
-# CONFIG_FB_SM501 is not set
-# CONFIG_FB_SMSCUFX is not set
-# CONFIG_FB_UDL is not set
-# CONFIG_FB_IBM_GXT4500 is not set
-# CONFIG_FB_VIRTUAL is not set
-CONFIG_XEN_FBDEV_FRONTEND=m
-# CONFIG_FB_METRONOME is not set
-# CONFIG_FB_MB862XX is not set
-CONFIG_FB_HYPERV=m
-CONFIG_FB_SIMPLE=y
-# CONFIG_FB_SSD1307 is not set
-# CONFIG_FB_SM712 is not set
-# end of Frame buffer Devices
-
-#
-# Backlight & LCD device support
-#
-CONFIG_LCD_CLASS_DEVICE=m
-CONFIG_LCD_L4F00242T03=m
-CONFIG_LCD_LMS283GF05=m
-CONFIG_LCD_LTV350QV=m
-CONFIG_LCD_ILI922X=m
-CONFIG_LCD_ILI9320=m
-CONFIG_LCD_TDO24M=m
-CONFIG_LCD_VGG2432A4=m
-CONFIG_LCD_PLATFORM=m
-CONFIG_LCD_AMS369FG06=m
-CONFIG_LCD_LMS501KF03=m
-CONFIG_LCD_HX8357=m
-CONFIG_LCD_OTM3225A=m
-CONFIG_BACKLIGHT_CLASS_DEVICE=y
-CONFIG_BACKLIGHT_GENERIC=m
-CONFIG_BACKLIGHT_LM3533=m
-CONFIG_BACKLIGHT_PWM=m
-CONFIG_BACKLIGHT_DA903X=m
-CONFIG_BACKLIGHT_DA9052=m
-CONFIG_BACKLIGHT_MAX8925=m
-CONFIG_BACKLIGHT_APPLE=m
-CONFIG_BACKLIGHT_QCOM_WLED=m
-CONFIG_BACKLIGHT_SAHARA=m
-CONFIG_BACKLIGHT_WM831X=m
-CONFIG_BACKLIGHT_ADP5520=m
-CONFIG_BACKLIGHT_ADP8860=m
-CONFIG_BACKLIGHT_ADP8870=m
-CONFIG_BACKLIGHT_88PM860X=m
-CONFIG_BACKLIGHT_PCF50633=m
-CONFIG_BACKLIGHT_AAT2870=m
-CONFIG_BACKLIGHT_LM3630A=m
-CONFIG_BACKLIGHT_LM3639=m
-CONFIG_BACKLIGHT_LP855X=m
-CONFIG_BACKLIGHT_LP8788=m
-CONFIG_BACKLIGHT_PANDORA=m
-CONFIG_BACKLIGHT_SKY81452=m
-CONFIG_BACKLIGHT_TPS65217=m
-CONFIG_BACKLIGHT_AS3711=m
-CONFIG_BACKLIGHT_GPIO=m
-CONFIG_BACKLIGHT_LV5207LP=m
-CONFIG_BACKLIGHT_BD6107=m
-CONFIG_BACKLIGHT_ARCXCNN=m
-CONFIG_BACKLIGHT_RAVE_SP=m
-CONFIG_BACKLIGHT_LED=m
-# end of Backlight & LCD device support
-
-CONFIG_VIDEOMODE_HELPERS=y
-CONFIG_HDMI=y
-
-#
-# Console display driver support
-#
-CONFIG_VGA_CONSOLE=y
-CONFIG_VGACON_SOFT_SCROLLBACK=y
-CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=64
-# CONFIG_VGACON_SOFT_SCROLLBACK_PERSISTENT_ENABLE_BY_DEFAULT is not set
-CONFIG_DUMMY_CONSOLE=y
-CONFIG_DUMMY_CONSOLE_COLUMNS=80
-CONFIG_DUMMY_CONSOLE_ROWS=25
-CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
-CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y
-CONFIG_FRAMEBUFFER_CONSOLE_DEFERRED_TAKEOVER=y
-# end of Console display driver support
-
-# CONFIG_LOGO is not set
-# end of Graphics support
-
-CONFIG_SOUND=m
-CONFIG_SOUND_OSS_CORE=y
-# CONFIG_SOUND_OSS_CORE_PRECLAIM is not set
-CONFIG_SND=m
-CONFIG_SND_TIMER=m
-CONFIG_SND_PCM=m
-CONFIG_SND_PCM_ELD=y
-CONFIG_SND_PCM_IEC958=y
-CONFIG_SND_DMAENGINE_PCM=m
-CONFIG_SND_HWDEP=m
-CONFIG_SND_SEQ_DEVICE=m
-CONFIG_SND_RAWMIDI=m
-CONFIG_SND_COMPRESS_OFFLOAD=m
-CONFIG_SND_JACK=y
-CONFIG_SND_JACK_INPUT_DEV=y
-CONFIG_SND_OSSEMUL=y
-CONFIG_SND_MIXER_OSS=m
-CONFIG_SND_PCM_OSS=m
-CONFIG_SND_PCM_OSS_PLUGINS=y
-CONFIG_SND_PCM_TIMER=y
-CONFIG_SND_HRTIMER=m
-CONFIG_SND_DYNAMIC_MINORS=y
-CONFIG_SND_MAX_CARDS=32
-# CONFIG_SND_SUPPORT_OLD_API is not set
-CONFIG_SND_PROC_FS=y
-CONFIG_SND_VERBOSE_PROCFS=y
-CONFIG_SND_VERBOSE_PRINTK=y
-CONFIG_SND_DEBUG=y
-# CONFIG_SND_DEBUG_VERBOSE is not set
-# CONFIG_SND_PCM_XRUN_DEBUG is not set
-# CONFIG_SND_CTL_VALIDATION is not set
-CONFIG_SND_VMASTER=y
-CONFIG_SND_DMA_SGBUF=y
-CONFIG_SND_SEQUENCER=m
-CONFIG_SND_SEQ_DUMMY=m
-CONFIG_SND_SEQUENCER_OSS=m
-CONFIG_SND_SEQ_HRTIMER_DEFAULT=y
-CONFIG_SND_SEQ_MIDI_EVENT=m
-CONFIG_SND_SEQ_MIDI=m
-CONFIG_SND_SEQ_MIDI_EMUL=m
-CONFIG_SND_SEQ_VIRMIDI=m
-CONFIG_SND_MPU401_UART=m
-CONFIG_SND_OPL3_LIB=m
-CONFIG_SND_OPL3_LIB_SEQ=m
-CONFIG_SND_VX_LIB=m
-CONFIG_SND_AC97_CODEC=m
-CONFIG_SND_DRIVERS=y
-# CONFIG_SND_PCSP is not set
-CONFIG_SND_DUMMY=m
-CONFIG_SND_ALOOP=m
-CONFIG_SND_VIRMIDI=m
-CONFIG_SND_MTPAV=m
-CONFIG_SND_MTS64=m
-CONFIG_SND_SERIAL_U16550=m
-CONFIG_SND_MPU401=m
-CONFIG_SND_PORTMAN2X4=m
-CONFIG_SND_AC97_POWER_SAVE=y
-CONFIG_SND_AC97_POWER_SAVE_DEFAULT=0
-CONFIG_SND_SB_COMMON=m
-CONFIG_SND_PCI=y
-CONFIG_SND_AD1889=m
-CONFIG_SND_ALS300=m
-CONFIG_SND_ALS4000=m
-CONFIG_SND_ALI5451=m
-CONFIG_SND_ASIHPI=m
-CONFIG_SND_ATIIXP=m
-CONFIG_SND_ATIIXP_MODEM=m
-CONFIG_SND_AU8810=m
-CONFIG_SND_AU8820=m
-CONFIG_SND_AU8830=m
-CONFIG_SND_AW2=m
-CONFIG_SND_AZT3328=m
-CONFIG_SND_BT87X=m
-# CONFIG_SND_BT87X_OVERCLOCK is not set
-CONFIG_SND_CA0106=m
-CONFIG_SND_CMIPCI=m
-CONFIG_SND_OXYGEN_LIB=m
-CONFIG_SND_OXYGEN=m
-CONFIG_SND_CS4281=m
-CONFIG_SND_CS46XX=m
-CONFIG_SND_CS46XX_NEW_DSP=y
-CONFIG_SND_CTXFI=m
-CONFIG_SND_DARLA20=m
-CONFIG_SND_GINA20=m
-CONFIG_SND_LAYLA20=m
-CONFIG_SND_DARLA24=m
-CONFIG_SND_GINA24=m
-CONFIG_SND_LAYLA24=m
-CONFIG_SND_MONA=m
-CONFIG_SND_MIA=m
-CONFIG_SND_ECHO3G=m
-CONFIG_SND_INDIGO=m
-CONFIG_SND_INDIGOIO=m
-CONFIG_SND_INDIGODJ=m
-CONFIG_SND_INDIGOIOX=m
-CONFIG_SND_INDIGODJX=m
-CONFIG_SND_EMU10K1=m
-CONFIG_SND_EMU10K1_SEQ=m
-CONFIG_SND_EMU10K1X=m
-CONFIG_SND_ENS1370=m
-CONFIG_SND_ENS1371=m
-CONFIG_SND_ES1938=m
-CONFIG_SND_ES1968=m
-CONFIG_SND_ES1968_INPUT=y
-CONFIG_SND_ES1968_RADIO=y
-CONFIG_SND_FM801=m
-CONFIG_SND_FM801_TEA575X_BOOL=y
-CONFIG_SND_HDSP=m
-CONFIG_SND_HDSPM=m
-CONFIG_SND_ICE1712=m
-CONFIG_SND_ICE1724=m
-CONFIG_SND_INTEL8X0=m
-CONFIG_SND_INTEL8X0M=m
-CONFIG_SND_KORG1212=m
-CONFIG_SND_LOLA=m
-CONFIG_SND_LX6464ES=m
-CONFIG_SND_MAESTRO3=m
-CONFIG_SND_MAESTRO3_INPUT=y
-CONFIG_SND_MIXART=m
-CONFIG_SND_NM256=m
-CONFIG_SND_PCXHR=m
-CONFIG_SND_RIPTIDE=m
-CONFIG_SND_RME32=m
-CONFIG_SND_RME96=m
-CONFIG_SND_RME9652=m
-CONFIG_SND_SONICVIBES=m
-CONFIG_SND_TRIDENT=m
-CONFIG_SND_VIA82XX=m
-CONFIG_SND_VIA82XX_MODEM=m
-CONFIG_SND_VIRTUOSO=m
-CONFIG_SND_VX222=m
-CONFIG_SND_YMFPCI=m
-
-#
-# HD-Audio
-#
-CONFIG_SND_HDA=m
-CONFIG_SND_HDA_INTEL=m
-CONFIG_SND_HDA_HWDEP=y
-CONFIG_SND_HDA_RECONFIG=y
-CONFIG_SND_HDA_INPUT_BEEP=y
-CONFIG_SND_HDA_INPUT_BEEP_MODE=1
-CONFIG_SND_HDA_PATCH_LOADER=y
-CONFIG_SND_HDA_CODEC_REALTEK=m
-CONFIG_SND_HDA_CODEC_ANALOG=m
-CONFIG_SND_HDA_CODEC_SIGMATEL=m
-CONFIG_SND_HDA_CODEC_VIA=m
-CONFIG_SND_HDA_CODEC_HDMI=m
-CONFIG_SND_HDA_CODEC_CIRRUS=m
-CONFIG_SND_HDA_CODEC_CONEXANT=m
-CONFIG_SND_HDA_CODEC_CA0110=m
-CONFIG_SND_HDA_CODEC_CA0132=m
-CONFIG_SND_HDA_CODEC_CA0132_DSP=y
-CONFIG_SND_HDA_CODEC_CMEDIA=m
-CONFIG_SND_HDA_CODEC_SI3054=m
-CONFIG_SND_HDA_GENERIC=m
-CONFIG_SND_HDA_POWER_SAVE_DEFAULT=0
-# end of HD-Audio
-
-CONFIG_SND_HDA_CORE=m
-CONFIG_SND_HDA_DSP_LOADER=y
-CONFIG_SND_HDA_COMPONENT=y
-CONFIG_SND_HDA_I915=y
-CONFIG_SND_HDA_EXT_CORE=m
-CONFIG_SND_HDA_PREALLOC_SIZE=0
-CONFIG_SND_INTEL_NHLT=y
-CONFIG_SND_INTEL_DSP_CONFIG=m
-CONFIG_SND_SPI=y
-CONFIG_SND_USB=y
-CONFIG_SND_USB_AUDIO=m
-CONFIG_SND_USB_AUDIO_USE_MEDIA_CONTROLLER=y
-CONFIG_SND_USB_UA101=m
-CONFIG_SND_USB_USX2Y=m
-CONFIG_SND_USB_CAIAQ=m
-CONFIG_SND_USB_CAIAQ_INPUT=y
-CONFIG_SND_USB_US122L=m
-CONFIG_SND_USB_6FIRE=m
-CONFIG_SND_USB_HIFACE=m
-CONFIG_SND_BCD2000=m
-CONFIG_SND_USB_LINE6=m
-CONFIG_SND_USB_POD=m
-CONFIG_SND_USB_PODHD=m
-CONFIG_SND_USB_TONEPORT=m
-CONFIG_SND_USB_VARIAX=m
-CONFIG_SND_FIREWIRE=y
-CONFIG_SND_FIREWIRE_LIB=m
-CONFIG_SND_DICE=m
-CONFIG_SND_OXFW=m
-CONFIG_SND_ISIGHT=m
-CONFIG_SND_FIREWORKS=m
-CONFIG_SND_BEBOB=m
-CONFIG_SND_FIREWIRE_DIGI00X=m
-CONFIG_SND_FIREWIRE_TASCAM=m
-CONFIG_SND_FIREWIRE_MOTU=m
-CONFIG_SND_FIREFACE=m
-CONFIG_SND_PCMCIA=y
-CONFIG_SND_VXPOCKET=m
-CONFIG_SND_PDAUDIOCF=m
-CONFIG_SND_SOC=m
-CONFIG_SND_SOC_AC97_BUS=y
-CONFIG_SND_SOC_GENERIC_DMAENGINE_PCM=y
-CONFIG_SND_SOC_COMPRESS=y
-CONFIG_SND_SOC_TOPOLOGY=y
-CONFIG_SND_SOC_ACPI=m
-CONFIG_SND_SOC_AMD_ACP=m
-CONFIG_SND_SOC_AMD_CZ_DA7219MX98357_MACH=m
-CONFIG_SND_SOC_AMD_CZ_RT5645_MACH=m
-CONFIG_SND_SOC_AMD_ACP3x=m
-CONFIG_SND_SOC_AMD_RV_RT5682_MACH=m
-CONFIG_SND_ATMEL_SOC=m
-CONFIG_SND_SOC_MIKROE_PROTO=m
-CONFIG_SND_BCM63XX_I2S_WHISTLER=m
-CONFIG_SND_DESIGNWARE_I2S=m
-CONFIG_SND_DESIGNWARE_PCM=y
-
-#
-# SoC Audio for Freescale CPUs
-#
-
-#
-# Common SoC Audio options for Freescale CPUs:
-#
-# CONFIG_SND_SOC_FSL_ASRC is not set
-# CONFIG_SND_SOC_FSL_SAI is not set
-# CONFIG_SND_SOC_FSL_AUDMIX is not set
-# CONFIG_SND_SOC_FSL_SSI is not set
-# CONFIG_SND_SOC_FSL_SPDIF is not set
-# CONFIG_SND_SOC_FSL_ESAI is not set
-# CONFIG_SND_SOC_FSL_MICFIL is not set
-# CONFIG_SND_SOC_IMX_AUDMUX is not set
-# end of SoC Audio for Freescale CPUs
-
-CONFIG_SND_I2S_HI6210_I2S=m
-CONFIG_SND_SOC_IMG=y
-CONFIG_SND_SOC_IMG_I2S_IN=m
-CONFIG_SND_SOC_IMG_I2S_OUT=m
-CONFIG_SND_SOC_IMG_PARALLEL_OUT=m
-CONFIG_SND_SOC_IMG_SPDIF_IN=m
-CONFIG_SND_SOC_IMG_SPDIF_OUT=m
-CONFIG_SND_SOC_IMG_PISTACHIO_INTERNAL_DAC=m
-CONFIG_SND_SOC_INTEL_SST_TOPLEVEL=y
-CONFIG_SND_SST_IPC=m
-CONFIG_SND_SST_IPC_PCI=m
-CONFIG_SND_SST_IPC_ACPI=m
-CONFIG_SND_SOC_INTEL_SST_ACPI=m
-CONFIG_SND_SOC_INTEL_SST=m
-CONFIG_SND_SOC_INTEL_SST_FIRMWARE=m
-CONFIG_SND_SOC_INTEL_HASWELL=m
-CONFIG_SND_SST_ATOM_HIFI2_PLATFORM=m
-CONFIG_SND_SST_ATOM_HIFI2_PLATFORM_PCI=m
-CONFIG_SND_SST_ATOM_HIFI2_PLATFORM_ACPI=m
-CONFIG_SND_SOC_INTEL_SKYLAKE=m
-CONFIG_SND_SOC_INTEL_SKL=m
-CONFIG_SND_SOC_INTEL_APL=m
-CONFIG_SND_SOC_INTEL_KBL=m
-CONFIG_SND_SOC_INTEL_GLK=m
-CONFIG_SND_SOC_INTEL_CNL=m
-CONFIG_SND_SOC_INTEL_CFL=m
-CONFIG_SND_SOC_INTEL_CML_H=m
-CONFIG_SND_SOC_INTEL_CML_LP=m
-CONFIG_SND_SOC_INTEL_SKYLAKE_FAMILY=m
-CONFIG_SND_SOC_INTEL_SKYLAKE_SSP_CLK=m
-# CONFIG_SND_SOC_INTEL_SKYLAKE_HDAUDIO_CODEC is not set
-CONFIG_SND_SOC_INTEL_SKYLAKE_COMMON=m
-CONFIG_SND_SOC_ACPI_INTEL_MATCH=m
-CONFIG_SND_SOC_INTEL_MACH=y
-# CONFIG_SND_SOC_INTEL_USER_FRIENDLY_LONG_NAMES is not set
-CONFIG_SND_SOC_INTEL_HASWELL_MACH=m
-CONFIG_SND_SOC_INTEL_BDW_RT5650_MACH=m
-CONFIG_SND_SOC_INTEL_BDW_RT5677_MACH=m
-CONFIG_SND_SOC_INTEL_BROADWELL_MACH=m
-CONFIG_SND_SOC_INTEL_BYTCR_RT5640_MACH=m
-CONFIG_SND_SOC_INTEL_BYTCR_RT5651_MACH=m
-CONFIG_SND_SOC_INTEL_CHT_BSW_RT5672_MACH=m
-CONFIG_SND_SOC_INTEL_CHT_BSW_RT5645_MACH=m
-CONFIG_SND_SOC_INTEL_CHT_BSW_MAX98090_TI_MACH=m
-CONFIG_SND_SOC_INTEL_CHT_BSW_NAU8824_MACH=m
-CONFIG_SND_SOC_INTEL_BYT_CHT_CX2072X_MACH=m
-CONFIG_SND_SOC_INTEL_BYT_CHT_DA7213_MACH=m
-CONFIG_SND_SOC_INTEL_BYT_CHT_ES8316_MACH=m
-# CONFIG_SND_SOC_INTEL_BYT_CHT_NOCODEC_MACH is not set
-CONFIG_SND_SOC_INTEL_SKL_RT286_MACH=m
-CONFIG_SND_SOC_INTEL_SKL_NAU88L25_SSM4567_MACH=m
-CONFIG_SND_SOC_INTEL_SKL_NAU88L25_MAX98357A_MACH=m
-CONFIG_SND_SOC_INTEL_DA7219_MAX98357A_GENERIC=m
-CONFIG_SND_SOC_INTEL_BXT_DA7219_MAX98357A_COMMON=m
-CONFIG_SND_SOC_INTEL_BXT_DA7219_MAX98357A_MACH=m
-CONFIG_SND_SOC_INTEL_BXT_RT298_MACH=m
-CONFIG_SND_SOC_INTEL_KBL_RT5663_MAX98927_MACH=m
-CONFIG_SND_SOC_INTEL_KBL_RT5663_RT5514_MAX98927_MACH=m
-CONFIG_SND_SOC_INTEL_KBL_DA7219_MAX98357A_MACH=m
-CONFIG_SND_SOC_INTEL_KBL_DA7219_MAX98927_MACH=m
-CONFIG_SND_SOC_INTEL_KBL_RT5660_MACH=m
-CONFIG_SND_SOC_INTEL_GLK_DA7219_MAX98357A_MACH=m
-CONFIG_SND_SOC_INTEL_GLK_RT5682_MAX98357A_MACH=m
-CONFIG_SND_SOC_INTEL_SKL_HDA_DSP_GENERIC_MACH=m
-CONFIG_SND_SOC_INTEL_SOF_RT5682_MACH=m
-CONFIG_SND_SOC_INTEL_SOF_PCM512x_MACH=m
-CONFIG_SND_SOC_INTEL_CML_LP_DA7219_MAX98357A_MACH=m
-CONFIG_SND_SOC_INTEL_SOF_CML_RT1011_RT5682_MACH=m
-CONFIG_SND_SOC_INTEL_SOF_DA7219_MAX98373_MACH=m
-CONFIG_SND_SOC_MTK_BTCVSD=m
-CONFIG_SND_SOC_SOF_TOPLEVEL=y
-CONFIG_SND_SOC_SOF_PCI=m
-CONFIG_SND_SOC_SOF_ACPI=m
-CONFIG_SND_SOC_SOF_OF=m
-# CONFIG_SND_SOC_SOF_DEBUG_PROBES is not set
-# CONFIG_SND_SOC_SOF_DEVELOPER_SUPPORT is not set
-CONFIG_SND_SOC_SOF=m
-CONFIG_SND_SOC_SOF_PROBE_WORK_QUEUE=y
-CONFIG_SND_SOC_SOF_INTEL_TOPLEVEL=y
-CONFIG_SND_SOC_SOF_INTEL_ACPI=m
-CONFIG_SND_SOC_SOF_INTEL_PCI=m
-CONFIG_SND_SOC_SOF_INTEL_HIFI_EP_IPC=m
-CONFIG_SND_SOC_SOF_INTEL_ATOM_HIFI_EP=m
-CONFIG_SND_SOC_SOF_INTEL_COMMON=m
-CONFIG_SND_SOC_SOF_MERRIFIELD_SUPPORT=y
-CONFIG_SND_SOC_SOF_MERRIFIELD=m
-CONFIG_SND_SOC_SOF_APOLLOLAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_APOLLOLAKE=m
-CONFIG_SND_SOC_SOF_GEMINILAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_GEMINILAKE=m
-CONFIG_SND_SOC_SOF_CANNONLAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_CANNONLAKE=m
-CONFIG_SND_SOC_SOF_COFFEELAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_COFFEELAKE=m
-CONFIG_SND_SOC_SOF_ICELAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_ICELAKE=m
-CONFIG_SND_SOC_SOF_COMETLAKE_LP=m
-CONFIG_SND_SOC_SOF_COMETLAKE_LP_SUPPORT=y
-CONFIG_SND_SOC_SOF_COMETLAKE_H=m
-CONFIG_SND_SOC_SOF_COMETLAKE_H_SUPPORT=y
-CONFIG_SND_SOC_SOF_TIGERLAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_TIGERLAKE=m
-CONFIG_SND_SOC_SOF_ELKHARTLAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_ELKHARTLAKE=m
-CONFIG_SND_SOC_SOF_JASPERLAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_JASPERLAKE=m
-CONFIG_SND_SOC_SOF_HDA_COMMON=m
-CONFIG_SND_SOC_SOF_HDA_LINK=y
-CONFIG_SND_SOC_SOF_HDA_AUDIO_CODEC=y
-# CONFIG_SND_SOC_SOF_HDA_ALWAYS_ENABLE_DMI_L1 is not set
-CONFIG_SND_SOC_SOF_HDA_LINK_BASELINE=m
-CONFIG_SND_SOC_SOF_HDA=m
-CONFIG_SND_SOC_SOF_XTENSA=m
-
-#
-# STMicroelectronics STM32 SOC audio support
-#
-# end of STMicroelectronics STM32 SOC audio support
-
-CONFIG_SND_SOC_XILINX_I2S=m
-CONFIG_SND_SOC_XILINX_AUDIO_FORMATTER=m
-CONFIG_SND_SOC_XILINX_SPDIF=m
-CONFIG_SND_SOC_XTFPGA_I2S=m
-CONFIG_ZX_TDM=m
-CONFIG_SND_SOC_I2C_AND_SPI=m
-
-#
-# CODEC drivers
-#
-CONFIG_SND_SOC_AC97_CODEC=m
-CONFIG_SND_SOC_ADAU_UTILS=m
-CONFIG_SND_SOC_ADAU1701=m
-CONFIG_SND_SOC_ADAU17X1=m
-CONFIG_SND_SOC_ADAU1761=m
-CONFIG_SND_SOC_ADAU1761_I2C=m
-CONFIG_SND_SOC_ADAU1761_SPI=m
-CONFIG_SND_SOC_ADAU7002=m
-CONFIG_SND_SOC_ADAU7118=m
-CONFIG_SND_SOC_ADAU7118_HW=m
-CONFIG_SND_SOC_ADAU7118_I2C=m
-CONFIG_SND_SOC_AK4104=m
-CONFIG_SND_SOC_AK4118=m
-CONFIG_SND_SOC_AK4458=m
-CONFIG_SND_SOC_AK4554=m
-CONFIG_SND_SOC_AK4613=m
-CONFIG_SND_SOC_AK4642=m
-CONFIG_SND_SOC_AK5386=m
-CONFIG_SND_SOC_AK5558=m
-CONFIG_SND_SOC_ALC5623=m
-CONFIG_SND_SOC_BD28623=m
-# CONFIG_SND_SOC_BT_SCO is not set
-CONFIG_SND_SOC_CPCAP=m
-CONFIG_SND_SOC_CROS_EC_CODEC=m
-CONFIG_SND_SOC_CS35L32=m
-CONFIG_SND_SOC_CS35L33=m
-CONFIG_SND_SOC_CS35L34=m
-CONFIG_SND_SOC_CS35L35=m
-CONFIG_SND_SOC_CS35L36=m
-CONFIG_SND_SOC_CS42L42=m
-CONFIG_SND_SOC_CS42L51=m
-CONFIG_SND_SOC_CS42L51_I2C=m
-CONFIG_SND_SOC_CS42L52=m
-CONFIG_SND_SOC_CS42L56=m
-CONFIG_SND_SOC_CS42L73=m
-CONFIG_SND_SOC_CS4265=m
-CONFIG_SND_SOC_CS4270=m
-CONFIG_SND_SOC_CS4271=m
-CONFIG_SND_SOC_CS4271_I2C=m
-CONFIG_SND_SOC_CS4271_SPI=m
-CONFIG_SND_SOC_CS42XX8=m
-CONFIG_SND_SOC_CS42XX8_I2C=m
-CONFIG_SND_SOC_CS43130=m
-CONFIG_SND_SOC_CS4341=m
-CONFIG_SND_SOC_CS4349=m
-CONFIG_SND_SOC_CS53L30=m
-CONFIG_SND_SOC_CX2072X=m
-CONFIG_SND_SOC_DA7213=m
-CONFIG_SND_SOC_DA7219=m
-CONFIG_SND_SOC_DMIC=m
-CONFIG_SND_SOC_HDMI_CODEC=m
-CONFIG_SND_SOC_ES7134=m
-CONFIG_SND_SOC_ES7241=m
-CONFIG_SND_SOC_ES8316=m
-CONFIG_SND_SOC_ES8328=m
-CONFIG_SND_SOC_ES8328_I2C=m
-CONFIG_SND_SOC_ES8328_SPI=m
-CONFIG_SND_SOC_GTM601=m
-CONFIG_SND_SOC_HDAC_HDMI=m
-CONFIG_SND_SOC_HDAC_HDA=m
-CONFIG_SND_SOC_INNO_RK3036=m
-CONFIG_SND_SOC_LOCHNAGAR_SC=m
-CONFIG_SND_SOC_MAX98088=m
-CONFIG_SND_SOC_MAX98090=m
-CONFIG_SND_SOC_MAX98357A=m
-CONFIG_SND_SOC_MAX98504=m
-CONFIG_SND_SOC_MAX9867=m
-CONFIG_SND_SOC_MAX98927=m
-CONFIG_SND_SOC_MAX98373=m
-CONFIG_SND_SOC_MAX9860=m
-CONFIG_SND_SOC_MSM8916_WCD_ANALOG=m
-CONFIG_SND_SOC_MSM8916_WCD_DIGITAL=m
-CONFIG_SND_SOC_PCM1681=m
-CONFIG_SND_SOC_PCM1789=m
-CONFIG_SND_SOC_PCM1789_I2C=m
-CONFIG_SND_SOC_PCM179X=m
-CONFIG_SND_SOC_PCM179X_I2C=m
-CONFIG_SND_SOC_PCM179X_SPI=m
-CONFIG_SND_SOC_PCM186X=m
-CONFIG_SND_SOC_PCM186X_I2C=m
-CONFIG_SND_SOC_PCM186X_SPI=m
-CONFIG_SND_SOC_PCM3060=m
-CONFIG_SND_SOC_PCM3060_I2C=m
-CONFIG_SND_SOC_PCM3060_SPI=m
-CONFIG_SND_SOC_PCM3168A=m
-CONFIG_SND_SOC_PCM3168A_I2C=m
-CONFIG_SND_SOC_PCM3168A_SPI=m
-CONFIG_SND_SOC_PCM512x=m
-CONFIG_SND_SOC_PCM512x_I2C=m
-CONFIG_SND_SOC_PCM512x_SPI=m
-CONFIG_SND_SOC_RK3328=m
-CONFIG_SND_SOC_RL6231=m
-CONFIG_SND_SOC_RL6347A=m
-CONFIG_SND_SOC_RT286=m
-CONFIG_SND_SOC_RT298=m
-CONFIG_SND_SOC_RT1011=m
-CONFIG_SND_SOC_RT1015=m
-CONFIG_SND_SOC_RT1308_SDW=m
-CONFIG_SND_SOC_RT5514=m
-CONFIG_SND_SOC_RT5514_SPI=m
-CONFIG_SND_SOC_RT5616=m
-CONFIG_SND_SOC_RT5631=m
-CONFIG_SND_SOC_RT5640=m
-CONFIG_SND_SOC_RT5645=m
-CONFIG_SND_SOC_RT5651=m
-CONFIG_SND_SOC_RT5660=m
-CONFIG_SND_SOC_RT5663=m
-CONFIG_SND_SOC_RT5670=m
-CONFIG_SND_SOC_RT5677=m
-CONFIG_SND_SOC_RT5677_SPI=m
-CONFIG_SND_SOC_RT5682=m
-CONFIG_SND_SOC_RT5682_SDW=m
-CONFIG_SND_SOC_RT700=m
-CONFIG_SND_SOC_RT700_SDW=m
-CONFIG_SND_SOC_RT711=m
-CONFIG_SND_SOC_RT711_SDW=m
-CONFIG_SND_SOC_RT715=m
-CONFIG_SND_SOC_RT715_SDW=m
-CONFIG_SND_SOC_SGTL5000=m
-CONFIG_SND_SOC_SI476X=m
-CONFIG_SND_SOC_SIGMADSP=m
-CONFIG_SND_SOC_SIGMADSP_I2C=m
-CONFIG_SND_SOC_SIGMADSP_REGMAP=m
-CONFIG_SND_SOC_SIMPLE_AMPLIFIER=m
-CONFIG_SND_SOC_SIRF_AUDIO_CODEC=m
-CONFIG_SND_SOC_SPDIF=m
-CONFIG_SND_SOC_SSM2305=m
-CONFIG_SND_SOC_SSM2602=m
-CONFIG_SND_SOC_SSM2602_SPI=m
-CONFIG_SND_SOC_SSM2602_I2C=m
-CONFIG_SND_SOC_SSM4567=m
-CONFIG_SND_SOC_STA32X=m
-CONFIG_SND_SOC_STA350=m
-CONFIG_SND_SOC_STI_SAS=m
-CONFIG_SND_SOC_TAS2552=m
-CONFIG_SND_SOC_TAS2562=m
-CONFIG_SND_SOC_TAS2770=m
-CONFIG_SND_SOC_TAS5086=m
-CONFIG_SND_SOC_TAS571X=m
-CONFIG_SND_SOC_TAS5720=m
-CONFIG_SND_SOC_TAS6424=m
-CONFIG_SND_SOC_TDA7419=m
-CONFIG_SND_SOC_TFA9879=m
-CONFIG_SND_SOC_TLV320AIC23=m
-CONFIG_SND_SOC_TLV320AIC23_I2C=m
-CONFIG_SND_SOC_TLV320AIC23_SPI=m
-CONFIG_SND_SOC_TLV320AIC31XX=m
-CONFIG_SND_SOC_TLV320AIC32X4=m
-CONFIG_SND_SOC_TLV320AIC32X4_I2C=m
-CONFIG_SND_SOC_TLV320AIC32X4_SPI=m
-CONFIG_SND_SOC_TLV320AIC3X=m
-CONFIG_SND_SOC_TLV320ADCX140=m
-CONFIG_SND_SOC_TS3A227E=m
-CONFIG_SND_SOC_TSCS42XX=m
-CONFIG_SND_SOC_TSCS454=m
-CONFIG_SND_SOC_UDA1334=m
-CONFIG_SND_SOC_WCD9335=m
-CONFIG_SND_SOC_WCD934X=m
-CONFIG_SND_SOC_WM8510=m
-CONFIG_SND_SOC_WM8523=m
-CONFIG_SND_SOC_WM8524=m
-CONFIG_SND_SOC_WM8580=m
-CONFIG_SND_SOC_WM8711=m
-CONFIG_SND_SOC_WM8728=m
-CONFIG_SND_SOC_WM8731=m
-CONFIG_SND_SOC_WM8737=m
-CONFIG_SND_SOC_WM8741=m
-CONFIG_SND_SOC_WM8750=m
-CONFIG_SND_SOC_WM8753=m
-CONFIG_SND_SOC_WM8770=m
-CONFIG_SND_SOC_WM8776=m
-CONFIG_SND_SOC_WM8782=m
-CONFIG_SND_SOC_WM8804=m
-CONFIG_SND_SOC_WM8804_I2C=m
-CONFIG_SND_SOC_WM8804_SPI=m
-CONFIG_SND_SOC_WM8903=m
-CONFIG_SND_SOC_WM8904=m
-CONFIG_SND_SOC_WM8960=m
-CONFIG_SND_SOC_WM8962=m
-CONFIG_SND_SOC_WM8974=m
-CONFIG_SND_SOC_WM8978=m
-CONFIG_SND_SOC_WM8985=m
-CONFIG_SND_SOC_WSA881X=m
-CONFIG_SND_SOC_ZX_AUD96P22=m
-CONFIG_SND_SOC_MAX9759=m
-CONFIG_SND_SOC_MT6351=m
-CONFIG_SND_SOC_MT6358=m
-CONFIG_SND_SOC_MT6660=m
-CONFIG_SND_SOC_NAU8540=m
-CONFIG_SND_SOC_NAU8810=m
-CONFIG_SND_SOC_NAU8822=m
-CONFIG_SND_SOC_NAU8824=m
-CONFIG_SND_SOC_NAU8825=m
-CONFIG_SND_SOC_TPA6130A2=m
-# end of CODEC drivers
-
-CONFIG_SND_SIMPLE_CARD_UTILS=m
-CONFIG_SND_SIMPLE_CARD=m
-CONFIG_SND_AUDIO_GRAPH_CARD=m
-CONFIG_SND_X86=y
-CONFIG_HDMI_LPE_AUDIO=m
-CONFIG_SND_SYNTH_EMUX=m
-CONFIG_SND_XEN_FRONTEND=m
-CONFIG_AC97_BUS=m
-
-#
-# HID support
-#
-CONFIG_HID=m
-CONFIG_HID_BATTERY_STRENGTH=y
-CONFIG_HIDRAW=y
-CONFIG_UHID=m
-CONFIG_HID_GENERIC=m
-
-#
-# Special HID drivers
-#
-CONFIG_HID_A4TECH=m
-CONFIG_HID_ACCUTOUCH=m
-CONFIG_HID_ACRUX=m
-CONFIG_HID_ACRUX_FF=y
-CONFIG_HID_APPLE=m
-CONFIG_HID_APPLEIR=m
-CONFIG_HID_ASUS=m
-CONFIG_HID_AUREAL=m
-CONFIG_HID_BELKIN=m
-CONFIG_HID_BETOP_FF=m
-CONFIG_HID_BIGBEN_FF=m
-CONFIG_HID_CHERRY=m
-CONFIG_HID_CHICONY=m
-CONFIG_HID_CORSAIR=m
-CONFIG_HID_COUGAR=m
-CONFIG_HID_MACALLY=m
-CONFIG_HID_PRODIKEYS=m
-CONFIG_HID_CMEDIA=m
-CONFIG_HID_CP2112=m
-CONFIG_HID_CREATIVE_SB0540=m
-CONFIG_HID_CYPRESS=m
-CONFIG_HID_DRAGONRISE=m
-CONFIG_DRAGONRISE_FF=y
-CONFIG_HID_EMS_FF=m
-CONFIG_HID_ELAN=m
-CONFIG_HID_ELECOM=m
-CONFIG_HID_ELO=m
-CONFIG_HID_EZKEY=m
-CONFIG_HID_GEMBIRD=m
-CONFIG_HID_GFRM=m
-CONFIG_HID_GLORIOUS=m
-CONFIG_HID_HOLTEK=m
-CONFIG_HOLTEK_FF=y
-CONFIG_HID_GOOGLE_HAMMER=m
-CONFIG_HID_GT683R=m
-CONFIG_HID_KEYTOUCH=m
-CONFIG_HID_KYE=m
-CONFIG_HID_UCLOGIC=m
-CONFIG_HID_WALTOP=m
-CONFIG_HID_VIEWSONIC=m
-CONFIG_HID_GYRATION=m
-CONFIG_HID_ICADE=m
-CONFIG_HID_ITE=m
-CONFIG_HID_JABRA=m
-CONFIG_HID_TWINHAN=m
-CONFIG_HID_KENSINGTON=m
-CONFIG_HID_LCPOWER=m
-CONFIG_HID_LED=m
-CONFIG_HID_LENOVO=m
-CONFIG_HID_LOGITECH=m
-CONFIG_HID_LOGITECH_DJ=m
-CONFIG_HID_LOGITECH_HIDPP=m
-CONFIG_LOGITECH_FF=y
-CONFIG_LOGIRUMBLEPAD2_FF=y
-CONFIG_LOGIG940_FF=y
-CONFIG_LOGIWHEELS_FF=y
-CONFIG_HID_MAGICMOUSE=m
-CONFIG_HID_MALTRON=m
-CONFIG_HID_MAYFLASH=m
-CONFIG_HID_REDRAGON=m
-CONFIG_HID_MICROSOFT=m
-CONFIG_HID_MONTEREY=m
-CONFIG_HID_MULTITOUCH=m
-CONFIG_HID_NTI=m
-CONFIG_HID_NTRIG=m
-CONFIG_HID_ORTEK=m
-CONFIG_HID_PANTHERLORD=m
-CONFIG_PANTHERLORD_FF=y
-CONFIG_HID_PENMOUNT=m
-CONFIG_HID_PETALYNX=m
-CONFIG_HID_PICOLCD=m
-CONFIG_HID_PICOLCD_FB=y
-CONFIG_HID_PICOLCD_BACKLIGHT=y
-CONFIG_HID_PICOLCD_LCD=y
-CONFIG_HID_PICOLCD_LEDS=y
-CONFIG_HID_PICOLCD_CIR=y
-CONFIG_HID_PLANTRONICS=m
-CONFIG_HID_PRIMAX=m
-CONFIG_HID_RETRODE=m
-CONFIG_HID_ROCCAT=m
-CONFIG_HID_SAITEK=m
-CONFIG_HID_SAMSUNG=m
-CONFIG_HID_SONY=m
-CONFIG_SONY_FF=y
-CONFIG_HID_SPEEDLINK=m
-CONFIG_HID_STEAM=m
-CONFIG_HID_STEELSERIES=m
-CONFIG_HID_SUNPLUS=m
-CONFIG_HID_RMI=m
-CONFIG_HID_GREENASIA=m
-CONFIG_GREENASIA_FF=y
-CONFIG_HID_HYPERV_MOUSE=m
-CONFIG_HID_SMARTJOYPLUS=m
-CONFIG_SMARTJOYPLUS_FF=y
-CONFIG_HID_TIVO=m
-CONFIG_HID_TOPSEED=m
-CONFIG_HID_THINGM=m
-CONFIG_HID_THRUSTMASTER=m
-CONFIG_THRUSTMASTER_FF=y
-CONFIG_HID_UDRAW_PS3=m
-CONFIG_HID_U2FZERO=m
-CONFIG_HID_WACOM=m
-CONFIG_HID_WIIMOTE=m
-CONFIG_HID_XINMO=m
-CONFIG_HID_ZEROPLUS=m
-CONFIG_ZEROPLUS_FF=y
-CONFIG_HID_ZYDACRON=m
-CONFIG_HID_SENSOR_HUB=m
-# CONFIG_HID_SENSOR_CUSTOM_SENSOR is not set
-CONFIG_HID_ALPS=m
-CONFIG_HID_MCP2221=m
-# end of Special HID drivers
-
-#
-# USB HID support
-#
-CONFIG_USB_HID=m
-CONFIG_HID_PID=y
-CONFIG_USB_HIDDEV=y
-
-#
-# USB HID Boot Protocol drivers
-#
-# CONFIG_USB_KBD is not set
-# CONFIG_USB_MOUSE is not set
-# end of USB HID Boot Protocol drivers
-# end of USB HID support
-
-#
-# I2C HID support
-#
-CONFIG_I2C_HID=m
-# end of I2C HID support
-
-#
-# Intel ISH HID support
-#
-CONFIG_INTEL_ISH_HID=m
-CONFIG_INTEL_ISH_FIRMWARE_DOWNLOADER=m
-# end of Intel ISH HID support
-# end of HID support
-
-CONFIG_USB_OHCI_LITTLE_ENDIAN=y
-CONFIG_USB_SUPPORT=y
-CONFIG_USB_COMMON=y
-CONFIG_USB_LED_TRIG=y
-CONFIG_USB_ULPI_BUS=m
-CONFIG_USB_CONN_GPIO=m
-CONFIG_USB_ARCH_HAS_HCD=y
-CONFIG_USB=y
-CONFIG_USB_PCI=y
-CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
-
-#
-# Miscellaneous USB options
-#
-CONFIG_USB_DEFAULT_PERSIST=y
-CONFIG_USB_DYNAMIC_MINORS=y
-# CONFIG_USB_OTG is not set
-# CONFIG_USB_OTG_WHITELIST is not set
-# CONFIG_USB_OTG_BLACKLIST_HUB is not set
-CONFIG_USB_LEDS_TRIGGER_USBPORT=m
-CONFIG_USB_AUTOSUSPEND_DELAY=2
-CONFIG_USB_MON=m
-
-#
-# USB Host Controller Drivers
-#
-CONFIG_USB_C67X00_HCD=m
-CONFIG_USB_XHCI_HCD=m
-# CONFIG_USB_XHCI_DBGCAP is not set
-CONFIG_USB_XHCI_PCI=m
-CONFIG_USB_XHCI_PLATFORM=m
-CONFIG_USB_EHCI_HCD=m
-CONFIG_USB_EHCI_ROOT_HUB_TT=y
-CONFIG_USB_EHCI_TT_NEWSCHED=y
-CONFIG_USB_EHCI_PCI=m
-CONFIG_USB_EHCI_FSL=m
-CONFIG_USB_EHCI_HCD_PLATFORM=m
-CONFIG_USB_OXU210HP_HCD=m
-CONFIG_USB_ISP116X_HCD=m
-CONFIG_USB_FOTG210_HCD=m
-CONFIG_USB_MAX3421_HCD=m
-CONFIG_USB_OHCI_HCD=m
-CONFIG_USB_OHCI_HCD_PCI=m
-# CONFIG_USB_OHCI_HCD_SSB is not set
-CONFIG_USB_OHCI_HCD_PLATFORM=m
-CONFIG_USB_UHCI_HCD=m
-CONFIG_USB_U132_HCD=m
-CONFIG_USB_SL811_HCD=m
-# CONFIG_USB_SL811_HCD_ISO is not set
-CONFIG_USB_SL811_CS=m
-CONFIG_USB_R8A66597_HCD=m
-CONFIG_USB_HCD_BCMA=m
-CONFIG_USB_HCD_SSB=m
-# CONFIG_USB_HCD_TEST_MODE is not set
-
-#
-# USB Device Class drivers
-#
-CONFIG_USB_ACM=m
-CONFIG_USB_PRINTER=m
-CONFIG_USB_WDM=m
-CONFIG_USB_TMC=m
-
-#
-# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may
-#
-
-#
-# also be needed; see USB_STORAGE Help for more info
-#
-CONFIG_USB_STORAGE=m
-# CONFIG_USB_STORAGE_DEBUG is not set
-CONFIG_USB_STORAGE_REALTEK=m
-CONFIG_REALTEK_AUTOPM=y
-CONFIG_USB_STORAGE_DATAFAB=m
-CONFIG_USB_STORAGE_FREECOM=m
-CONFIG_USB_STORAGE_ISD200=m
-CONFIG_USB_STORAGE_USBAT=m
-CONFIG_USB_STORAGE_SDDR09=m
-CONFIG_USB_STORAGE_SDDR55=m
-CONFIG_USB_STORAGE_JUMPSHOT=m
-CONFIG_USB_STORAGE_ALAUDA=m
-CONFIG_USB_STORAGE_ONETOUCH=m
-CONFIG_USB_STORAGE_KARMA=m
-CONFIG_USB_STORAGE_CYPRESS_ATACB=m
-CONFIG_USB_STORAGE_ENE_UB6250=m
-CONFIG_USB_UAS=m
-
-#
-# USB Imaging devices
-#
-CONFIG_USB_MDC800=m
-CONFIG_USB_MICROTEK=m
-CONFIG_USBIP_CORE=m
-CONFIG_USBIP_VHCI_HCD=m
-CONFIG_USBIP_VHCI_HC_PORTS=8
-CONFIG_USBIP_VHCI_NR_HCS=1
-CONFIG_USBIP_HOST=m
-CONFIG_USBIP_VUDC=m
-# CONFIG_USBIP_DEBUG is not set
-CONFIG_USB_CDNS3=m
-CONFIG_USB_CDNS3_GADGET=y
-CONFIG_USB_CDNS3_HOST=y
-CONFIG_USB_CDNS3_PCI_WRAP=m
-CONFIG_USB_MUSB_HDRC=m
-# CONFIG_USB_MUSB_HOST is not set
-# CONFIG_USB_MUSB_GADGET is not set
-CONFIG_USB_MUSB_DUAL_ROLE=y
-
-#
-# Platform Glue Layer
-#
-
-#
-# MUSB DMA mode
-#
-# CONFIG_MUSB_PIO_ONLY is not set
-CONFIG_USB_DWC3=m
-CONFIG_USB_DWC3_ULPI=y
-# CONFIG_USB_DWC3_HOST is not set
-# CONFIG_USB_DWC3_GADGET is not set
-CONFIG_USB_DWC3_DUAL_ROLE=y
-
-#
-# Platform Glue Driver Support
-#
-CONFIG_USB_DWC3_PCI=m
-CONFIG_USB_DWC3_HAPS=m
-CONFIG_USB_DWC3_OF_SIMPLE=m
-CONFIG_USB_DWC2=m
-# CONFIG_USB_DWC2_HOST is not set
-
-#
-# Gadget/Dual-role mode requires USB Gadget support to be enabled
-#
-# CONFIG_USB_DWC2_PERIPHERAL is not set
-CONFIG_USB_DWC2_DUAL_ROLE=y
-CONFIG_USB_DWC2_PCI=m
-# CONFIG_USB_DWC2_DEBUG is not set
-# CONFIG_USB_DWC2_TRACK_MISSED_SOFS is not set
-CONFIG_USB_CHIPIDEA=m
-CONFIG_USB_CHIPIDEA_OF=m
-CONFIG_USB_CHIPIDEA_PCI=m
-CONFIG_USB_CHIPIDEA_UDC=y
-CONFIG_USB_CHIPIDEA_HOST=y
-CONFIG_USB_ISP1760=m
-CONFIG_USB_ISP1760_HCD=y
-CONFIG_USB_ISP1761_UDC=y
-# CONFIG_USB_ISP1760_HOST_ROLE is not set
-# CONFIG_USB_ISP1760_GADGET_ROLE is not set
-CONFIG_USB_ISP1760_DUAL_ROLE=y
-
-#
-# USB port drivers
-#
-CONFIG_USB_USS720=m
-CONFIG_USB_SERIAL=y
-CONFIG_USB_SERIAL_CONSOLE=y
-CONFIG_USB_SERIAL_GENERIC=y
-CONFIG_USB_SERIAL_SIMPLE=m
-CONFIG_USB_SERIAL_AIRCABLE=m
-CONFIG_USB_SERIAL_ARK3116=m
-CONFIG_USB_SERIAL_BELKIN=m
-CONFIG_USB_SERIAL_CH341=m
-CONFIG_USB_SERIAL_WHITEHEAT=m
-CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m
-CONFIG_USB_SERIAL_CP210X=m
-CONFIG_USB_SERIAL_CYPRESS_M8=m
-CONFIG_USB_SERIAL_EMPEG=m
-CONFIG_USB_SERIAL_FTDI_SIO=m
-CONFIG_USB_SERIAL_VISOR=m
-CONFIG_USB_SERIAL_IPAQ=m
-CONFIG_USB_SERIAL_IR=m
-CONFIG_USB_SERIAL_EDGEPORT=m
-CONFIG_USB_SERIAL_EDGEPORT_TI=m
-CONFIG_USB_SERIAL_F81232=m
-CONFIG_USB_SERIAL_F8153X=m
-CONFIG_USB_SERIAL_GARMIN=m
-CONFIG_USB_SERIAL_IPW=m
-CONFIG_USB_SERIAL_IUU=m
-CONFIG_USB_SERIAL_KEYSPAN_PDA=m
-CONFIG_USB_SERIAL_KEYSPAN=m
-CONFIG_USB_SERIAL_KLSI=m
-CONFIG_USB_SERIAL_KOBIL_SCT=m
-CONFIG_USB_SERIAL_MCT_U232=m
-CONFIG_USB_SERIAL_METRO=m
-CONFIG_USB_SERIAL_MOS7720=m
-CONFIG_USB_SERIAL_MOS7715_PARPORT=y
-CONFIG_USB_SERIAL_MOS7840=m
-CONFIG_USB_SERIAL_MXUPORT=m
-CONFIG_USB_SERIAL_NAVMAN=m
-CONFIG_USB_SERIAL_PL2303=m
-CONFIG_USB_SERIAL_OTI6858=m
-CONFIG_USB_SERIAL_QCAUX=m
-CONFIG_USB_SERIAL_QUALCOMM=m
-CONFIG_USB_SERIAL_SPCP8X5=m
-CONFIG_USB_SERIAL_SAFE=m
-# CONFIG_USB_SERIAL_SAFE_PADDED is not set
-CONFIG_USB_SERIAL_SIERRAWIRELESS=m
-CONFIG_USB_SERIAL_SYMBOL=m
-CONFIG_USB_SERIAL_TI=m
-CONFIG_USB_SERIAL_CYBERJACK=m
-CONFIG_USB_SERIAL_XIRCOM=m
-CONFIG_USB_SERIAL_WWAN=m
-CONFIG_USB_SERIAL_OPTION=m
-CONFIG_USB_SERIAL_OMNINET=m
-CONFIG_USB_SERIAL_OPTICON=m
-CONFIG_USB_SERIAL_XSENS_MT=m
-CONFIG_USB_SERIAL_WISHBONE=m
-CONFIG_USB_SERIAL_SSU100=m
-CONFIG_USB_SERIAL_QT2=m
-CONFIG_USB_SERIAL_UPD78F0730=m
-CONFIG_USB_SERIAL_DEBUG=m
-
-#
-# USB Miscellaneous drivers
-#
-CONFIG_USB_EMI62=m
-CONFIG_USB_EMI26=m
-CONFIG_USB_ADUTUX=m
-CONFIG_USB_SEVSEG=m
-CONFIG_USB_LEGOTOWER=m
-CONFIG_USB_LCD=m
-CONFIG_USB_CYPRESS_CY7C63=m
-CONFIG_USB_CYTHERM=m
-CONFIG_USB_IDMOUSE=m
-CONFIG_USB_FTDI_ELAN=m
-CONFIG_USB_APPLEDISPLAY=m
-CONFIG_APPLE_MFI_FASTCHARGE=m
-CONFIG_USB_SISUSBVGA=m
-CONFIG_USB_SISUSBVGA_CON=y
-CONFIG_USB_LD=m
-CONFIG_USB_TRANCEVIBRATOR=m
-CONFIG_USB_IOWARRIOR=m
-CONFIG_USB_TEST=m
-CONFIG_USB_EHSET_TEST_FIXTURE=m
-CONFIG_USB_ISIGHTFW=m
-CONFIG_USB_YUREX=m
-CONFIG_USB_EZUSB_FX2=m
-CONFIG_USB_HUB_USB251XB=m
-CONFIG_USB_HSIC_USB3503=m
-CONFIG_USB_HSIC_USB4604=m
-CONFIG_USB_LINK_LAYER_TEST=m
-CONFIG_USB_CHAOSKEY=m
-CONFIG_USB_ATM=m
-CONFIG_USB_SPEEDTOUCH=m
-CONFIG_USB_CXACRU=m
-CONFIG_USB_UEAGLEATM=m
-CONFIG_USB_XUSBATM=m
-
-#
-# USB Physical Layer drivers
-#
-CONFIG_USB_PHY=y
-CONFIG_NOP_USB_XCEIV=m
-CONFIG_USB_GPIO_VBUS=m
-CONFIG_TAHVO_USB=m
-# CONFIG_TAHVO_USB_HOST_BY_DEFAULT is not set
-CONFIG_USB_ISP1301=m
-# end of USB Physical Layer drivers
-
-CONFIG_USB_GADGET=m
-# CONFIG_USB_GADGET_DEBUG is not set
-# CONFIG_USB_GADGET_DEBUG_FILES is not set
-# CONFIG_USB_GADGET_DEBUG_FS is not set
-CONFIG_USB_GADGET_VBUS_DRAW=2
-CONFIG_USB_GADGET_STORAGE_NUM_BUFFERS=2
-CONFIG_U_SERIAL_CONSOLE=y
-
-#
-# USB Peripheral Controller
-#
-CONFIG_USB_FOTG210_UDC=m
-CONFIG_USB_GR_UDC=m
-CONFIG_USB_R8A66597=m
-CONFIG_USB_PXA27X=m
-CONFIG_USB_MV_UDC=m
-CONFIG_USB_MV_U3D=m
-CONFIG_USB_SNP_CORE=m
-CONFIG_USB_SNP_UDC_PLAT=m
-CONFIG_USB_M66592=m
-CONFIG_USB_BDC_UDC=m
-
-#
-# Platform Support
-#
-CONFIG_USB_BDC_PCI=m
-CONFIG_USB_AMD5536UDC=m
-CONFIG_USB_NET2272=m
-CONFIG_USB_NET2272_DMA=y
-CONFIG_USB_NET2280=m
-CONFIG_USB_GOKU=m
-CONFIG_USB_EG20T=m
-CONFIG_USB_GADGET_XILINX=m
-CONFIG_USB_MAX3420_UDC=m
-CONFIG_USB_DUMMY_HCD=m
-# end of USB Peripheral Controller
-
-CONFIG_USB_LIBCOMPOSITE=m
-CONFIG_USB_F_ACM=m
-CONFIG_USB_F_SS_LB=m
-CONFIG_USB_U_SERIAL=m
-CONFIG_USB_U_ETHER=m
-CONFIG_USB_U_AUDIO=m
-CONFIG_USB_F_SERIAL=m
-CONFIG_USB_F_OBEX=m
-CONFIG_USB_F_NCM=m
-CONFIG_USB_F_ECM=m
-CONFIG_USB_F_PHONET=m
-CONFIG_USB_F_EEM=m
-CONFIG_USB_F_SUBSET=m
-CONFIG_USB_F_RNDIS=m
-CONFIG_USB_F_MASS_STORAGE=m
-CONFIG_USB_F_FS=m
-CONFIG_USB_F_UAC1=m
-CONFIG_USB_F_UAC1_LEGACY=m
-CONFIG_USB_F_UAC2=m
-CONFIG_USB_F_UVC=m
-CONFIG_USB_F_MIDI=m
-CONFIG_USB_F_HID=m
-CONFIG_USB_F_PRINTER=m
-CONFIG_USB_F_TCM=m
-CONFIG_USB_CONFIGFS=m
-CONFIG_USB_CONFIGFS_SERIAL=y
-CONFIG_USB_CONFIGFS_ACM=y
-CONFIG_USB_CONFIGFS_OBEX=y
-CONFIG_USB_CONFIGFS_NCM=y
-CONFIG_USB_CONFIGFS_ECM=y
-CONFIG_USB_CONFIGFS_ECM_SUBSET=y
-CONFIG_USB_CONFIGFS_RNDIS=y
-CONFIG_USB_CONFIGFS_EEM=y
-CONFIG_USB_CONFIGFS_PHONET=y
-CONFIG_USB_CONFIGFS_MASS_STORAGE=y
-CONFIG_USB_CONFIGFS_F_LB_SS=y
-CONFIG_USB_CONFIGFS_F_FS=y
-CONFIG_USB_CONFIGFS_F_UAC1=y
-CONFIG_USB_CONFIGFS_F_UAC1_LEGACY=y
-CONFIG_USB_CONFIGFS_F_UAC2=y
-CONFIG_USB_CONFIGFS_F_MIDI=y
-CONFIG_USB_CONFIGFS_F_HID=y
-CONFIG_USB_CONFIGFS_F_UVC=y
-CONFIG_USB_CONFIGFS_F_PRINTER=y
-CONFIG_USB_CONFIGFS_F_TCM=y
-
-#
-# USB Gadget precomposed configurations
-#
-CONFIG_USB_ZERO=m
-CONFIG_USB_AUDIO=m
-# CONFIG_GADGET_UAC1 is not set
-CONFIG_USB_ETH=m
-CONFIG_USB_ETH_RNDIS=y
-CONFIG_USB_ETH_EEM=y
-CONFIG_USB_G_NCM=m
-CONFIG_USB_GADGETFS=m
-CONFIG_USB_FUNCTIONFS=m
-CONFIG_USB_FUNCTIONFS_ETH=y
-CONFIG_USB_FUNCTIONFS_RNDIS=y
-CONFIG_USB_FUNCTIONFS_GENERIC=y
-CONFIG_USB_MASS_STORAGE=m
-CONFIG_USB_GADGET_TARGET=m
-CONFIG_USB_G_SERIAL=m
-CONFIG_USB_MIDI_GADGET=m
-CONFIG_USB_G_PRINTER=m
-CONFIG_USB_CDC_COMPOSITE=m
-CONFIG_USB_G_NOKIA=m
-CONFIG_USB_G_ACM_MS=m
-CONFIG_USB_G_MULTI=m
-CONFIG_USB_G_MULTI_RNDIS=y
-CONFIG_USB_G_MULTI_CDC=y
-CONFIG_USB_G_HID=m
-CONFIG_USB_G_DBGP=m
-# CONFIG_USB_G_DBGP_PRINTK is not set
-CONFIG_USB_G_DBGP_SERIAL=y
-CONFIG_USB_G_WEBCAM=m
-CONFIG_USB_RAW_GADGET=m
-# end of USB Gadget precomposed configurations
-
-CONFIG_TYPEC=m
-CONFIG_TYPEC_TCPM=m
-CONFIG_TYPEC_TCPCI=m
-CONFIG_TYPEC_RT1711H=m
-CONFIG_TYPEC_FUSB302=m
-CONFIG_TYPEC_WCOVE=m
-CONFIG_TYPEC_UCSI=m
-CONFIG_UCSI_CCG=m
-CONFIG_UCSI_ACPI=m
-CONFIG_TYPEC_HD3SS3220=m
-CONFIG_TYPEC_TPS6598X=m
-
-#
-# USB Type-C Multiplexer/DeMultiplexer Switch support
-#
-CONFIG_TYPEC_MUX_PI3USB30532=m
-CONFIG_TYPEC_MUX_INTEL_PMC=m
-# end of USB Type-C Multiplexer/DeMultiplexer Switch support
-
-#
-# USB Type-C Alternate Mode drivers
-#
-CONFIG_TYPEC_DP_ALTMODE=m
-CONFIG_TYPEC_NVIDIA_ALTMODE=m
-# end of USB Type-C Alternate Mode drivers
-
-CONFIG_USB_ROLE_SWITCH=m
-CONFIG_USB_ROLES_INTEL_XHCI=m
-CONFIG_MMC=m
-CONFIG_PWRSEQ_EMMC=m
-CONFIG_PWRSEQ_SD8787=m
-CONFIG_PWRSEQ_SIMPLE=m
-CONFIG_MMC_BLOCK=m
-CONFIG_MMC_BLOCK_MINORS=8
-CONFIG_SDIO_UART=m
-CONFIG_MMC_TEST=m
-
-#
-# MMC/SD/SDIO Host Controller Drivers
-#
-# CONFIG_MMC_DEBUG is not set
-CONFIG_MMC_SDHCI=m
-CONFIG_MMC_SDHCI_IO_ACCESSORS=y
-CONFIG_MMC_SDHCI_PCI=m
-CONFIG_MMC_RICOH_MMC=y
-CONFIG_MMC_SDHCI_ACPI=m
-CONFIG_MMC_SDHCI_PLTFM=m
-CONFIG_MMC_SDHCI_OF_ARASAN=m
-CONFIG_MMC_SDHCI_OF_ASPEED=m
-CONFIG_MMC_SDHCI_OF_AT91=m
-CONFIG_MMC_SDHCI_OF_DWCMSHC=m
-CONFIG_MMC_SDHCI_CADENCE=m
-CONFIG_MMC_SDHCI_F_SDH30=m
-CONFIG_MMC_SDHCI_MILBEAUT=m
-CONFIG_MMC_WBSD=m
-CONFIG_MMC_ALCOR=m
-CONFIG_MMC_TIFM_SD=m
-CONFIG_MMC_SPI=m
-CONFIG_MMC_SDRICOH_CS=m
-CONFIG_MMC_CB710=m
-CONFIG_MMC_VIA_SDMMC=m
-CONFIG_MMC_VUB300=m
-CONFIG_MMC_USHC=m
-CONFIG_MMC_USDHI6ROL0=m
-CONFIG_MMC_REALTEK_PCI=m
-CONFIG_MMC_REALTEK_USB=m
-CONFIG_MMC_CQHCI=m
-CONFIG_MMC_HSQ=m
-CONFIG_MMC_TOSHIBA_PCI=m
-CONFIG_MMC_MTK=m
-CONFIG_MMC_SDHCI_XENON=m
-CONFIG_MMC_SDHCI_OMAP=m
-CONFIG_MMC_SDHCI_AM654=m
-CONFIG_MMC_SDHCI_EXTERNAL_DMA=y
-CONFIG_MEMSTICK=m
-# CONFIG_MEMSTICK_DEBUG is not set
-
-#
-# MemoryStick drivers
-#
-# CONFIG_MEMSTICK_UNSAFE_RESUME is not set
-CONFIG_MSPRO_BLOCK=m
-CONFIG_MS_BLOCK=m
-
-#
-# MemoryStick Host Controller Drivers
-#
-CONFIG_MEMSTICK_TIFM_MS=m
-CONFIG_MEMSTICK_JMICRON_38X=m
-CONFIG_MEMSTICK_R592=m
-CONFIG_MEMSTICK_REALTEK_PCI=m
-CONFIG_MEMSTICK_REALTEK_USB=m
-CONFIG_NEW_LEDS=y
-CONFIG_LEDS_CLASS=y
-CONFIG_LEDS_CLASS_FLASH=m
-CONFIG_LEDS_BRIGHTNESS_HW_CHANGED=y
-
-#
-# LED drivers
-#
-CONFIG_LEDS_88PM860X=m
-CONFIG_LEDS_AAT1290=m
-CONFIG_LEDS_AN30259A=m
-CONFIG_LEDS_APU=m
-CONFIG_LEDS_AS3645A=m
-CONFIG_LEDS_BCM6328=m
-CONFIG_LEDS_BCM6358=m
-CONFIG_LEDS_CPCAP=m
-CONFIG_LEDS_CR0014114=m
-CONFIG_LEDS_EL15203000=m
-CONFIG_LEDS_LM3530=m
-CONFIG_LEDS_LM3532=m
-CONFIG_LEDS_LM3533=m
-CONFIG_LEDS_LM3642=m
-CONFIG_LEDS_LM3692X=m
-CONFIG_LEDS_LM3601X=m
-CONFIG_LEDS_MT6323=m
-CONFIG_LEDS_PCA9532=m
-CONFIG_LEDS_PCA9532_GPIO=y
-CONFIG_LEDS_GPIO=m
-CONFIG_LEDS_LP3944=m
-CONFIG_LEDS_LP3952=m
-# CONFIG_LEDS_LP5521 is not set
-# CONFIG_LEDS_LP5523 is not set
-# CONFIG_LEDS_LP5562 is not set
-# CONFIG_LEDS_LP8501 is not set
-CONFIG_LEDS_LP8788=m
-CONFIG_LEDS_LP8860=m
-CONFIG_LEDS_CLEVO_MAIL=m
-CONFIG_LEDS_PCA955X=m
-CONFIG_LEDS_PCA955X_GPIO=y
-CONFIG_LEDS_PCA963X=m
-CONFIG_LEDS_WM831X_STATUS=m
-CONFIG_LEDS_WM8350=m
-CONFIG_LEDS_DA903X=m
-CONFIG_LEDS_DA9052=m
-CONFIG_LEDS_DAC124S085=m
-CONFIG_LEDS_PWM=m
-CONFIG_LEDS_REGULATOR=m
-CONFIG_LEDS_BD2802=m
-CONFIG_LEDS_INTEL_SS4200=m
-CONFIG_LEDS_LT3593=m
-CONFIG_LEDS_ADP5520=m
-CONFIG_LEDS_MC13783=m
-CONFIG_LEDS_TCA6507=m
-CONFIG_LEDS_TLC591XX=m
-CONFIG_LEDS_MAX77650=m
-CONFIG_LEDS_MAX77693=m
-CONFIG_LEDS_MAX8997=m
-CONFIG_LEDS_LM355x=m
-CONFIG_LEDS_MENF21BMC=m
-CONFIG_LEDS_KTD2692=m
-CONFIG_LEDS_IS31FL319X=m
-CONFIG_LEDS_IS31FL32XX=m
-
-#
-# LED driver for blink(1) USB RGB LED is under Special HID drivers (HID_THINGM)
-#
-CONFIG_LEDS_BLINKM=m
-CONFIG_LEDS_SYSCON=y
-CONFIG_LEDS_MLXCPLD=m
-CONFIG_LEDS_MLXREG=m
-CONFIG_LEDS_USER=m
-CONFIG_LEDS_NIC78BX=m
-CONFIG_LEDS_SPI_BYTE=m
-CONFIG_LEDS_TI_LMU_COMMON=m
-CONFIG_LEDS_LM3697=m
-CONFIG_LEDS_LM36274=m
-CONFIG_LEDS_TPS6105X=m
-
-#
-# LED Triggers
-#
-CONFIG_LEDS_TRIGGERS=y
-CONFIG_LEDS_TRIGGER_TIMER=m
-CONFIG_LEDS_TRIGGER_ONESHOT=m
-CONFIG_LEDS_TRIGGER_DISK=y
-CONFIG_LEDS_TRIGGER_MTD=y
-CONFIG_LEDS_TRIGGER_HEARTBEAT=m
-CONFIG_LEDS_TRIGGER_BACKLIGHT=m
-CONFIG_LEDS_TRIGGER_CPU=y
-CONFIG_LEDS_TRIGGER_ACTIVITY=m
-CONFIG_LEDS_TRIGGER_GPIO=m
-CONFIG_LEDS_TRIGGER_DEFAULT_ON=m
-
-#
-# iptables trigger is under Netfilter config (LED target)
-#
-CONFIG_LEDS_TRIGGER_TRANSIENT=m
-CONFIG_LEDS_TRIGGER_CAMERA=m
-CONFIG_LEDS_TRIGGER_PANIC=y
-CONFIG_LEDS_TRIGGER_NETDEV=m
-CONFIG_LEDS_TRIGGER_PATTERN=m
-CONFIG_LEDS_TRIGGER_AUDIO=m
-CONFIG_ACCESSIBILITY=y
-CONFIG_A11Y_BRAILLE_CONSOLE=y
-CONFIG_INFINIBAND=m
-CONFIG_INFINIBAND_USER_MAD=m
-CONFIG_INFINIBAND_USER_ACCESS=m
-# CONFIG_INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI is not set
-CONFIG_INFINIBAND_USER_MEM=y
-CONFIG_INFINIBAND_ON_DEMAND_PAGING=y
-CONFIG_INFINIBAND_ADDR_TRANS=y
-CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS=y
-CONFIG_INFINIBAND_MTHCA=m
-CONFIG_INFINIBAND_MTHCA_DEBUG=y
-CONFIG_INFINIBAND_QIB=m
-CONFIG_INFINIBAND_QIB_DCA=y
-CONFIG_INFINIBAND_CXGB4=m
-CONFIG_INFINIBAND_EFA=m
-CONFIG_INFINIBAND_I40IW=m
-CONFIG_MLX4_INFINIBAND=m
-CONFIG_MLX5_INFINIBAND=m
-CONFIG_INFINIBAND_OCRDMA=m
-CONFIG_INFINIBAND_VMWARE_PVRDMA=m
-CONFIG_INFINIBAND_USNIC=m
-CONFIG_INFINIBAND_BNXT_RE=m
-CONFIG_INFINIBAND_HFI1=m
-# CONFIG_HFI1_DEBUG_SDMA_ORDER is not set
-# CONFIG_SDMA_VERBOSITY is not set
-CONFIG_INFINIBAND_QEDR=m
-CONFIG_INFINIBAND_RDMAVT=m
-CONFIG_RDMA_RXE=m
-CONFIG_RDMA_SIW=m
-CONFIG_INFINIBAND_IPOIB=m
-CONFIG_INFINIBAND_IPOIB_CM=y
-CONFIG_INFINIBAND_IPOIB_DEBUG=y
-# CONFIG_INFINIBAND_IPOIB_DEBUG_DATA is not set
-CONFIG_INFINIBAND_SRP=m
-CONFIG_INFINIBAND_SRPT=m
-CONFIG_INFINIBAND_ISER=m
-CONFIG_INFINIBAND_ISERT=m
-CONFIG_INFINIBAND_OPA_VNIC=m
-CONFIG_EDAC_ATOMIC_SCRUB=y
-CONFIG_EDAC_SUPPORT=y
-CONFIG_EDAC=y
-CONFIG_EDAC_LEGACY_SYSFS=y
-# CONFIG_EDAC_DEBUG is not set
-CONFIG_EDAC_DECODE_MCE=m
-CONFIG_EDAC_GHES=y
-CONFIG_EDAC_AMD64=m
-# CONFIG_EDAC_AMD64_ERROR_INJECTION is not set
-CONFIG_EDAC_E752X=m
-CONFIG_EDAC_I82975X=m
-CONFIG_EDAC_I3000=m
-CONFIG_EDAC_I3200=m
-CONFIG_EDAC_IE31200=m
-CONFIG_EDAC_X38=m
-CONFIG_EDAC_I5400=m
-CONFIG_EDAC_I7CORE=m
-CONFIG_EDAC_I5000=m
-CONFIG_EDAC_I5100=m
-CONFIG_EDAC_I7300=m
-CONFIG_EDAC_SBRIDGE=m
-CONFIG_EDAC_SKX=m
-CONFIG_EDAC_I10NM=m
-CONFIG_EDAC_PND2=m
-CONFIG_RTC_LIB=y
-CONFIG_RTC_MC146818_LIB=y
-CONFIG_RTC_CLASS=y
-CONFIG_RTC_HCTOSYS=y
-CONFIG_RTC_HCTOSYS_DEVICE="rtc0"
-CONFIG_RTC_SYSTOHC=y
-CONFIG_RTC_SYSTOHC_DEVICE="rtc0"
-# CONFIG_RTC_DEBUG is not set
-CONFIG_RTC_NVMEM=y
-
-#
-# RTC interfaces
-#
-CONFIG_RTC_INTF_SYSFS=y
-CONFIG_RTC_INTF_PROC=y
-CONFIG_RTC_INTF_DEV=y
-CONFIG_RTC_INTF_DEV_UIE_EMUL=y
-# CONFIG_RTC_DRV_TEST is not set
-
-#
-# I2C RTC drivers
-#
-CONFIG_RTC_DRV_88PM860X=m
-CONFIG_RTC_DRV_88PM80X=m
-CONFIG_RTC_DRV_ABB5ZES3=m
-CONFIG_RTC_DRV_ABEOZ9=m
-CONFIG_RTC_DRV_ABX80X=m
-CONFIG_RTC_DRV_AS3722=m
-CONFIG_RTC_DRV_DS1307=m
-CONFIG_RTC_DRV_DS1307_CENTURY=y
-CONFIG_RTC_DRV_DS1374=m
-CONFIG_RTC_DRV_DS1374_WDT=y
-CONFIG_RTC_DRV_DS1672=m
-CONFIG_RTC_DRV_HYM8563=m
-CONFIG_RTC_DRV_LP8788=m
-CONFIG_RTC_DRV_MAX6900=m
-CONFIG_RTC_DRV_MAX8907=m
-CONFIG_RTC_DRV_MAX8925=m
-CONFIG_RTC_DRV_MAX8998=m
-CONFIG_RTC_DRV_MAX8997=m
-CONFIG_RTC_DRV_MAX77686=m
-CONFIG_RTC_DRV_RK808=m
-CONFIG_RTC_DRV_RS5C372=m
-CONFIG_RTC_DRV_ISL1208=m
-CONFIG_RTC_DRV_ISL12022=m
-CONFIG_RTC_DRV_ISL12026=m
-CONFIG_RTC_DRV_X1205=m
-CONFIG_RTC_DRV_PCF8523=m
-CONFIG_RTC_DRV_PCF85063=m
-CONFIG_RTC_DRV_PCF85363=m
-CONFIG_RTC_DRV_PCF8563=m
-CONFIG_RTC_DRV_PCF8583=m
-CONFIG_RTC_DRV_M41T80=m
-CONFIG_RTC_DRV_M41T80_WDT=y
-CONFIG_RTC_DRV_BD70528=m
-CONFIG_RTC_DRV_BQ32K=m
-CONFIG_RTC_DRV_TWL4030=m
-CONFIG_RTC_DRV_PALMAS=m
-CONFIG_RTC_DRV_TPS6586X=m
-CONFIG_RTC_DRV_TPS65910=m
-CONFIG_RTC_DRV_TPS80031=m
-CONFIG_RTC_DRV_RC5T583=m
-CONFIG_RTC_DRV_RC5T619=m
-CONFIG_RTC_DRV_S35390A=m
-CONFIG_RTC_DRV_FM3130=m
-CONFIG_RTC_DRV_RX8010=m
-CONFIG_RTC_DRV_RX8581=m
-CONFIG_RTC_DRV_RX8025=m
-CONFIG_RTC_DRV_EM3027=m
-CONFIG_RTC_DRV_RV3028=m
-CONFIG_RTC_DRV_RV8803=m
-CONFIG_RTC_DRV_S5M=m
-CONFIG_RTC_DRV_SD3078=m
-
-#
-# SPI RTC drivers
-#
-CONFIG_RTC_DRV_M41T93=m
-CONFIG_RTC_DRV_M41T94=m
-CONFIG_RTC_DRV_DS1302=m
-CONFIG_RTC_DRV_DS1305=m
-CONFIG_RTC_DRV_DS1343=m
-CONFIG_RTC_DRV_DS1347=m
-CONFIG_RTC_DRV_DS1390=m
-CONFIG_RTC_DRV_MAX6916=m
-CONFIG_RTC_DRV_R9701=m
-CONFIG_RTC_DRV_RX4581=m
-CONFIG_RTC_DRV_RX6110=m
-CONFIG_RTC_DRV_RS5C348=m
-CONFIG_RTC_DRV_MAX6902=m
-CONFIG_RTC_DRV_PCF2123=m
-CONFIG_RTC_DRV_MCP795=m
-CONFIG_RTC_I2C_AND_SPI=y
-
-#
-# SPI and I2C RTC drivers
-#
-CONFIG_RTC_DRV_DS3232=m
-CONFIG_RTC_DRV_DS3232_HWMON=y
-CONFIG_RTC_DRV_PCF2127=m
-CONFIG_RTC_DRV_RV3029C2=m
-CONFIG_RTC_DRV_RV3029_HWMON=y
-
-#
-# Platform RTC drivers
-#
-CONFIG_RTC_DRV_CMOS=y
-CONFIG_RTC_DRV_DS1286=m
-CONFIG_RTC_DRV_DS1511=m
-CONFIG_RTC_DRV_DS1553=m
-CONFIG_RTC_DRV_DS1685_FAMILY=m
-CONFIG_RTC_DRV_DS1685=y
-# CONFIG_RTC_DRV_DS1689 is not set
-# CONFIG_RTC_DRV_DS17285 is not set
-# CONFIG_RTC_DRV_DS17485 is not set
-# CONFIG_RTC_DRV_DS17885 is not set
-CONFIG_RTC_DRV_DS1742=m
-CONFIG_RTC_DRV_DS2404=m
-CONFIG_RTC_DRV_DA9052=m
-CONFIG_RTC_DRV_DA9055=m
-CONFIG_RTC_DRV_DA9063=m
-CONFIG_RTC_DRV_STK17TA8=m
-CONFIG_RTC_DRV_M48T86=m
-CONFIG_RTC_DRV_M48T35=m
-CONFIG_RTC_DRV_M48T59=m
-CONFIG_RTC_DRV_MSM6242=m
-CONFIG_RTC_DRV_BQ4802=m
-CONFIG_RTC_DRV_RP5C01=m
-CONFIG_RTC_DRV_V3020=m
-CONFIG_RTC_DRV_WM831X=m
-CONFIG_RTC_DRV_WM8350=m
-CONFIG_RTC_DRV_PCF50633=m
-CONFIG_RTC_DRV_AB3100=m
-CONFIG_RTC_DRV_ZYNQMP=m
-CONFIG_RTC_DRV_CROS_EC=m
-
-#
-# on-CPU RTC drivers
-#
-CONFIG_RTC_DRV_CADENCE=m
-CONFIG_RTC_DRV_FTRTC010=m
-CONFIG_RTC_DRV_PCAP=m
-CONFIG_RTC_DRV_MC13XXX=m
-CONFIG_RTC_DRV_MT6397=m
-CONFIG_RTC_DRV_R7301=m
-CONFIG_RTC_DRV_CPCAP=m
-
-#
-# HID Sensor RTC drivers
-#
-CONFIG_RTC_DRV_HID_SENSOR_TIME=m
-CONFIG_RTC_DRV_WILCO_EC=m
-CONFIG_DMADEVICES=y
-# CONFIG_DMADEVICES_DEBUG is not set
-
-#
-# DMA Devices
-#
-CONFIG_DMA_ENGINE=y
-CONFIG_DMA_VIRTUAL_CHANNELS=y
-CONFIG_DMA_ACPI=y
-CONFIG_DMA_OF=y
-CONFIG_ALTERA_MSGDMA=m
-CONFIG_DW_AXI_DMAC=m
-CONFIG_FSL_EDMA=m
-CONFIG_INTEL_IDMA64=m
-CONFIG_INTEL_IDXD=m
-CONFIG_INTEL_IOATDMA=m
-CONFIG_INTEL_MIC_X100_DMA=m
-CONFIG_PLX_DMA=m
-CONFIG_QCOM_HIDMA_MGMT=m
-CONFIG_QCOM_HIDMA=m
-CONFIG_DW_DMAC_CORE=y
-CONFIG_DW_DMAC=y
-CONFIG_DW_DMAC_PCI=y
-CONFIG_DW_EDMA=m
-CONFIG_DW_EDMA_PCIE=m
-CONFIG_HSU_DMA=y
-CONFIG_SF_PDMA=m
-
-#
-# DMA Clients
-#
-CONFIG_ASYNC_TX_DMA=y
-# CONFIG_DMATEST is not set
-CONFIG_DMA_ENGINE_RAID=y
-
-#
-# DMABUF options
-#
-CONFIG_SYNC_FILE=y
-# CONFIG_SW_SYNC is not set
-CONFIG_UDMABUF=y
-# CONFIG_DMABUF_MOVE_NOTIFY is not set
-# CONFIG_DMABUF_SELFTESTS is not set
-CONFIG_DMABUF_HEAPS=y
-CONFIG_DMABUF_HEAPS_SYSTEM=y
-# end of DMABUF options
-
-CONFIG_DCA=m
-CONFIG_AUXDISPLAY=y
-CONFIG_HD44780=m
-CONFIG_KS0108=m
-CONFIG_KS0108_PORT=0x378
-CONFIG_KS0108_DELAY=2
-CONFIG_CFAG12864B=m
-CONFIG_CFAG12864B_RATE=20
-CONFIG_IMG_ASCII_LCD=m
-CONFIG_HT16K33=m
-CONFIG_PARPORT_PANEL=m
-CONFIG_PANEL_PARPORT=0
-CONFIG_PANEL_PROFILE=5
-# CONFIG_PANEL_CHANGE_MESSAGE is not set
-# CONFIG_CHARLCD_BL_OFF is not set
-# CONFIG_CHARLCD_BL_ON is not set
-CONFIG_CHARLCD_BL_FLASH=y
-CONFIG_PANEL=m
-CONFIG_CHARLCD=m
-CONFIG_UIO=m
-CONFIG_UIO_CIF=m
-CONFIG_UIO_PDRV_GENIRQ=m
-CONFIG_UIO_DMEM_GENIRQ=m
-CONFIG_UIO_AEC=m
-CONFIG_UIO_SERCOS3=m
-CONFIG_UIO_PCI_GENERIC=m
-CONFIG_UIO_NETX=m
-CONFIG_UIO_PRUSS=m
-CONFIG_UIO_MF624=m
-CONFIG_UIO_HV_GENERIC=m
-CONFIG_VFIO_IOMMU_TYPE1=m
-CONFIG_VFIO_VIRQFD=m
-CONFIG_VFIO=m
-# CONFIG_VFIO_NOIOMMU is not set
-CONFIG_VFIO_PCI=m
-CONFIG_VFIO_PCI_VGA=y
-CONFIG_VFIO_PCI_MMAP=y
-CONFIG_VFIO_PCI_INTX=y
-CONFIG_VFIO_PCI_IGD=y
-CONFIG_VFIO_MDEV=m
-CONFIG_VFIO_MDEV_DEVICE=m
-CONFIG_IRQ_BYPASS_MANAGER=m
-CONFIG_VIRT_DRIVERS=y
-CONFIG_VBOXGUEST=m
-CONFIG_VIRTIO=y
-CONFIG_VIRTIO_MENU=y
-CONFIG_VIRTIO_PCI=m
-CONFIG_VIRTIO_PCI_LEGACY=y
-CONFIG_VIRTIO_VDPA=m
-CONFIG_VIRTIO_PMEM=m
-CONFIG_VIRTIO_BALLOON=m
-CONFIG_VIRTIO_INPUT=m
-CONFIG_VIRTIO_MMIO=m
-CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y
-CONFIG_VDPA=m
-CONFIG_VDPA_SIM=m
-CONFIG_IFCVF=m
-CONFIG_VHOST_IOTLB=m
-CONFIG_VHOST_RING=m
-CONFIG_VHOST_DPN=y
-CONFIG_VHOST=m
-CONFIG_VHOST_MENU=y
-CONFIG_VHOST_NET=m
-CONFIG_VHOST_SCSI=m
-CONFIG_VHOST_VSOCK=m
-CONFIG_VHOST_VDPA=m
-# CONFIG_VHOST_CROSS_ENDIAN_LEGACY is not set
-
-#
-# Microsoft Hyper-V guest support
-#
-CONFIG_HYPERV=m
-CONFIG_HYPERV_TIMER=y
-CONFIG_HYPERV_UTILS=m
-CONFIG_HYPERV_BALLOON=m
-# end of Microsoft Hyper-V guest support
-
-#
-# Xen driver support
-#
-CONFIG_XEN_BALLOON=y
-CONFIG_XEN_BALLOON_MEMORY_HOTPLUG=y
-CONFIG_XEN_BALLOON_MEMORY_HOTPLUG_LIMIT=512
-CONFIG_XEN_SCRUB_PAGES_DEFAULT=y
-CONFIG_XEN_DEV_EVTCHN=m
-CONFIG_XEN_BACKEND=y
-CONFIG_XENFS=m
-CONFIG_XEN_COMPAT_XENFS=y
-CONFIG_XEN_SYS_HYPERVISOR=y
-CONFIG_XEN_XENBUS_FRONTEND=y
-CONFIG_XEN_GNTDEV=m
-CONFIG_XEN_GNTDEV_DMABUF=y
-CONFIG_XEN_GRANT_DEV_ALLOC=m
-CONFIG_XEN_GRANT_DMA_ALLOC=y
-CONFIG_SWIOTLB_XEN=y
-CONFIG_XEN_PCIDEV_BACKEND=m
-CONFIG_XEN_PVCALLS_FRONTEND=m
-CONFIG_XEN_PVCALLS_BACKEND=y
-CONFIG_XEN_SCSI_BACKEND=m
-CONFIG_XEN_PRIVCMD=m
-CONFIG_XEN_ACPI_PROCESSOR=m
-CONFIG_XEN_MCE_LOG=y
-CONFIG_XEN_HAVE_PVMMU=y
-CONFIG_XEN_EFI=y
-CONFIG_XEN_AUTO_XLATE=y
-CONFIG_XEN_ACPI=y
-CONFIG_XEN_SYMS=y
-CONFIG_XEN_HAVE_VPMU=y
-CONFIG_XEN_FRONT_PGDIR_SHBUF=m
-# end of Xen driver support
-
-# CONFIG_GREYBUS is not set
-CONFIG_STAGING=y
-CONFIG_PRISM2_USB=m
-CONFIG_COMEDI=m
-# CONFIG_COMEDI_DEBUG is not set
-CONFIG_COMEDI_DEFAULT_BUF_SIZE_KB=2048
-CONFIG_COMEDI_DEFAULT_BUF_MAXSIZE_KB=20480
-CONFIG_COMEDI_MISC_DRIVERS=y
-CONFIG_COMEDI_BOND=m
-CONFIG_COMEDI_TEST=m
-CONFIG_COMEDI_PARPORT=m
-# CONFIG_COMEDI_ISA_DRIVERS is not set
-CONFIG_COMEDI_PCI_DRIVERS=m
-CONFIG_COMEDI_8255_PCI=m
-CONFIG_COMEDI_ADDI_WATCHDOG=m
-CONFIG_COMEDI_ADDI_APCI_1032=m
-CONFIG_COMEDI_ADDI_APCI_1500=m
-CONFIG_COMEDI_ADDI_APCI_1516=m
-CONFIG_COMEDI_ADDI_APCI_1564=m
-CONFIG_COMEDI_ADDI_APCI_16XX=m
-CONFIG_COMEDI_ADDI_APCI_2032=m
-CONFIG_COMEDI_ADDI_APCI_2200=m
-CONFIG_COMEDI_ADDI_APCI_3120=m
-CONFIG_COMEDI_ADDI_APCI_3501=m
-CONFIG_COMEDI_ADDI_APCI_3XXX=m
-CONFIG_COMEDI_ADL_PCI6208=m
-CONFIG_COMEDI_ADL_PCI7X3X=m
-CONFIG_COMEDI_ADL_PCI8164=m
-CONFIG_COMEDI_ADL_PCI9111=m
-CONFIG_COMEDI_ADL_PCI9118=m
-CONFIG_COMEDI_ADV_PCI1710=m
-CONFIG_COMEDI_ADV_PCI1720=m
-CONFIG_COMEDI_ADV_PCI1723=m
-CONFIG_COMEDI_ADV_PCI1724=m
-CONFIG_COMEDI_ADV_PCI1760=m
-CONFIG_COMEDI_ADV_PCI_DIO=m
-CONFIG_COMEDI_AMPLC_DIO200_PCI=m
-CONFIG_COMEDI_AMPLC_PC236_PCI=m
-CONFIG_COMEDI_AMPLC_PC263_PCI=m
-CONFIG_COMEDI_AMPLC_PCI224=m
-CONFIG_COMEDI_AMPLC_PCI230=m
-CONFIG_COMEDI_CONTEC_PCI_DIO=m
-CONFIG_COMEDI_DAS08_PCI=m
-CONFIG_COMEDI_DT3000=m
-CONFIG_COMEDI_DYNA_PCI10XX=m
-CONFIG_COMEDI_GSC_HPDI=m
-CONFIG_COMEDI_MF6X4=m
-CONFIG_COMEDI_ICP_MULTI=m
-CONFIG_COMEDI_DAQBOARD2000=m
-CONFIG_COMEDI_JR3_PCI=m
-CONFIG_COMEDI_KE_COUNTER=m
-CONFIG_COMEDI_CB_PCIDAS64=m
-CONFIG_COMEDI_CB_PCIDAS=m
-CONFIG_COMEDI_CB_PCIDDA=m
-CONFIG_COMEDI_CB_PCIMDAS=m
-CONFIG_COMEDI_CB_PCIMDDA=m
-CONFIG_COMEDI_ME4000=m
-CONFIG_COMEDI_ME_DAQ=m
-CONFIG_COMEDI_NI_6527=m
-CONFIG_COMEDI_NI_65XX=m
-CONFIG_COMEDI_NI_660X=m
-CONFIG_COMEDI_NI_670X=m
-CONFIG_COMEDI_NI_LABPC_PCI=m
-CONFIG_COMEDI_NI_PCIDIO=m
-CONFIG_COMEDI_NI_PCIMIO=m
-CONFIG_COMEDI_RTD520=m
-CONFIG_COMEDI_S626=m
-CONFIG_COMEDI_MITE=m
-CONFIG_COMEDI_NI_TIOCMD=m
-CONFIG_COMEDI_PCMCIA_DRIVERS=m
-CONFIG_COMEDI_CB_DAS16_CS=m
-CONFIG_COMEDI_DAS08_CS=m
-CONFIG_COMEDI_NI_DAQ_700_CS=m
-CONFIG_COMEDI_NI_DAQ_DIO24_CS=m
-CONFIG_COMEDI_NI_LABPC_CS=m
-CONFIG_COMEDI_NI_MIO_CS=m
-CONFIG_COMEDI_QUATECH_DAQP_CS=m
-CONFIG_COMEDI_USB_DRIVERS=m
-CONFIG_COMEDI_DT9812=m
-CONFIG_COMEDI_NI_USB6501=m
-CONFIG_COMEDI_USBDUX=m
-CONFIG_COMEDI_USBDUXFAST=m
-CONFIG_COMEDI_USBDUXSIGMA=m
-CONFIG_COMEDI_VMK80XX=m
-CONFIG_COMEDI_8254=m
-CONFIG_COMEDI_8255=m
-CONFIG_COMEDI_8255_SA=m
-CONFIG_COMEDI_KCOMEDILIB=m
-CONFIG_COMEDI_AMPLC_DIO200=m
-CONFIG_COMEDI_AMPLC_PC236=m
-CONFIG_COMEDI_DAS08=m
-CONFIG_COMEDI_NI_LABPC=m
-CONFIG_COMEDI_NI_TIO=m
-CONFIG_COMEDI_NI_ROUTING=m
-CONFIG_RTL8192U=m
-CONFIG_RTLLIB=m
-CONFIG_RTLLIB_CRYPTO_CCMP=m
-CONFIG_RTLLIB_CRYPTO_TKIP=m
-CONFIG_RTLLIB_CRYPTO_WEP=m
-CONFIG_RTL8192E=m
-CONFIG_RTL8723BS=m
-CONFIG_R8712U=m
-CONFIG_R8188EU=m
-CONFIG_88EU_AP_MODE=y
-CONFIG_RTS5208=m
-CONFIG_VT6655=m
-CONFIG_VT6656=m
-
-#
-# IIO staging drivers
-#
-
-#
-# Accelerometers
-#
-CONFIG_ADIS16203=m
-CONFIG_ADIS16240=m
-# end of Accelerometers
-
-#
-# Analog to digital converters
-#
-CONFIG_AD7816=m
-CONFIG_AD7280=m
-# end of Analog to digital converters
-
-#
-# Analog digital bi-direction converters
-#
-CONFIG_ADT7316=m
-CONFIG_ADT7316_SPI=m
-CONFIG_ADT7316_I2C=m
-# end of Analog digital bi-direction converters
-
-#
-# Capacitance to digital converters
-#
-CONFIG_AD7150=m
-CONFIG_AD7746=m
-# end of Capacitance to digital converters
-
-#
-# Direct Digital Synthesis
-#
-CONFIG_AD9832=m
-CONFIG_AD9834=m
-# end of Direct Digital Synthesis
-
-#
-# Network Analyzer, Impedance Converters
-#
-CONFIG_AD5933=m
-# end of Network Analyzer, Impedance Converters
-
-#
-# Active energy metering IC
-#
-CONFIG_ADE7854=m
-CONFIG_ADE7854_I2C=m
-CONFIG_ADE7854_SPI=m
-# end of Active energy metering IC
-
-#
-# Resolver to digital converters
-#
-CONFIG_AD2S1210=m
-# end of Resolver to digital converters
-# end of IIO staging drivers
-
-# CONFIG_FB_SM750 is not set
-
-#
-# Speakup console speech
-#
-CONFIG_SPEAKUP=m
-CONFIG_SPEAKUP_SYNTH_ACNTSA=m
-CONFIG_SPEAKUP_SYNTH_APOLLO=m
-CONFIG_SPEAKUP_SYNTH_AUDPTR=m
-CONFIG_SPEAKUP_SYNTH_BNS=m
-CONFIG_SPEAKUP_SYNTH_DECTLK=m
-CONFIG_SPEAKUP_SYNTH_DECEXT=m
-CONFIG_SPEAKUP_SYNTH_LTLK=m
-CONFIG_SPEAKUP_SYNTH_SOFT=m
-CONFIG_SPEAKUP_SYNTH_SPKOUT=m
-CONFIG_SPEAKUP_SYNTH_TXPRT=m
-CONFIG_SPEAKUP_SYNTH_DUMMY=m
-# end of Speakup console speech
-
-CONFIG_STAGING_MEDIA=y
-CONFIG_VIDEO_IPU3_IMGU=m
-
-#
-# soc_camera sensor drivers
-#
-CONFIG_VIDEO_USBVISION=m
-
-#
-# Android
-#
-# end of Android
-
-CONFIG_STAGING_BOARD=y
-CONFIG_LTE_GDM724X=m
-CONFIG_FIREWIRE_SERIAL=m
-CONFIG_FWTTY_MAX_TOTAL_PORTS=64
-CONFIG_FWTTY_MAX_CARD_PORTS=32
-CONFIG_GS_FPGABOOT=m
-CONFIG_UNISYSSPAR=y
-CONFIG_UNISYS_VISORNIC=m
-CONFIG_UNISYS_VISORINPUT=m
-CONFIG_UNISYS_VISORHBA=m
-CONFIG_COMMON_CLK_XLNX_CLKWZRD=m
-# CONFIG_FB_TFT is not set
-CONFIG_WILC1000=m
-CONFIG_WILC1000_SDIO=m
-CONFIG_WILC1000_SPI=m
-# CONFIG_WILC1000_HW_OOB_INTR is not set
-CONFIG_MOST_COMPONENTS=m
-CONFIG_MOST_CDEV=m
-CONFIG_MOST_NET=m
-CONFIG_MOST_SOUND=m
-CONFIG_MOST_VIDEO=m
-CONFIG_MOST_DIM2=m
-CONFIG_MOST_I2C=m
-CONFIG_MOST_USB=m
-CONFIG_KS7010=m
-CONFIG_PI433=m
-
-#
-# Gasket devices
-#
-CONFIG_STAGING_GASKET_FRAMEWORK=m
-CONFIG_STAGING_APEX_DRIVER=m
-# end of Gasket devices
-
-CONFIG_XIL_AXIS_FIFO=m
-CONFIG_FIELDBUS_DEV=m
-CONFIG_HMS_ANYBUSS_BUS=m
-CONFIG_ARCX_ANYBUS_CONTROLLER=m
-CONFIG_HMS_PROFINET=m
-CONFIG_KPC2000=y
-CONFIG_KPC2000_CORE=m
-CONFIG_KPC2000_SPI=m
-CONFIG_KPC2000_I2C=m
-CONFIG_KPC2000_DMA=m
-CONFIG_QLGE=m
-CONFIG_WFX=m
-CONFIG_X86_PLATFORM_DEVICES=y
-CONFIG_ACPI_WMI=m
-CONFIG_WMI_BMOF=m
-CONFIG_ALIENWARE_WMI=m
-CONFIG_HUAWEI_WMI=m
-CONFIG_INTEL_WMI_THUNDERBOLT=m
-CONFIG_MXM_WMI=m
-CONFIG_PEAQ_WMI=m
-CONFIG_XIAOMI_WMI=m
-CONFIG_ACERHDF=m
-CONFIG_ACER_WIRELESS=m
-CONFIG_ACER_WMI=m
-CONFIG_APPLE_GMUX=m
-CONFIG_ASUS_LAPTOP=m
-CONFIG_ASUS_WIRELESS=m
-CONFIG_ASUS_WMI=m
-CONFIG_ASUS_NB_WMI=m
-CONFIG_EEEPC_LAPTOP=m
-CONFIG_EEEPC_WMI=m
-CONFIG_DCDBAS=m
-CONFIG_DELL_SMBIOS=m
-CONFIG_DELL_SMBIOS_WMI=y
-CONFIG_DELL_SMBIOS_SMM=y
-CONFIG_DELL_LAPTOP=m
-CONFIG_DELL_RBTN=m
-# CONFIG_DELL_RBU is not set
-CONFIG_DELL_SMO8800=m
-CONFIG_DELL_WMI=m
-CONFIG_DELL_WMI_DESCRIPTOR=m
-CONFIG_DELL_WMI_AIO=m
-CONFIG_DELL_WMI_LED=m
-CONFIG_AMILO_RFKILL=m
-CONFIG_FUJITSU_LAPTOP=m
-CONFIG_FUJITSU_TABLET=m
-CONFIG_GPD_POCKET_FAN=m
-CONFIG_HP_ACCEL=m
-CONFIG_HP_WIRELESS=m
-CONFIG_HP_WMI=m
-CONFIG_IBM_RTL=m
-CONFIG_IDEAPAD_LAPTOP=m
-CONFIG_SENSORS_HDAPS=m
-CONFIG_THINKPAD_ACPI=m
-CONFIG_THINKPAD_ACPI_ALSA_SUPPORT=y
-# CONFIG_THINKPAD_ACPI_DEBUGFACILITIES is not set
-# CONFIG_THINKPAD_ACPI_DEBUG is not set
-# CONFIG_THINKPAD_ACPI_UNSAFE_LEDS is not set
-CONFIG_THINKPAD_ACPI_VIDEO=y
-CONFIG_THINKPAD_ACPI_HOTKEY_POLL=y
-CONFIG_INTEL_ATOMISP2_PM=m
-CONFIG_INTEL_CHT_INT33FE=m
-CONFIG_INTEL_HID_EVENT=m
-CONFIG_INTEL_INT0002_VGPIO=m
-CONFIG_INTEL_MENLOW=m
-CONFIG_INTEL_OAKTRAIL=m
-CONFIG_INTEL_VBTN=m
-CONFIG_SURFACE3_WMI=m
-CONFIG_SURFACE_3_BUTTON=m
-CONFIG_SURFACE_3_POWER_OPREGION=m
-CONFIG_SURFACE_PRO3_BUTTON=m
-CONFIG_MSI_LAPTOP=m
-CONFIG_MSI_WMI=m
-CONFIG_PCENGINES_APU2=m
-CONFIG_SAMSUNG_LAPTOP=m
-CONFIG_SAMSUNG_Q10=m
-CONFIG_ACPI_TOSHIBA=m
-CONFIG_TOSHIBA_BT_RFKILL=m
-CONFIG_TOSHIBA_HAPS=m
-CONFIG_TOSHIBA_WMI=m
-CONFIG_ACPI_CMPC=m
-CONFIG_COMPAL_LAPTOP=m
-CONFIG_LG_LAPTOP=m
-CONFIG_PANASONIC_LAPTOP=m
-CONFIG_SONY_LAPTOP=m
-CONFIG_SONYPI_COMPAT=y
-CONFIG_SYSTEM76_ACPI=m
-CONFIG_TOPSTAR_LAPTOP=m
-CONFIG_I2C_MULTI_INSTANTIATE=m
-CONFIG_MLX_PLATFORM=m
-CONFIG_TOUCHSCREEN_DMI=y
-CONFIG_INTEL_IPS=m
-CONFIG_INTEL_RST=m
-CONFIG_INTEL_SMARTCONNECT=m
-
-#
-# Intel Speed Select Technology interface support
-#
-CONFIG_INTEL_SPEED_SELECT_INTERFACE=m
-# end of Intel Speed Select Technology interface support
-
-CONFIG_INTEL_TURBO_MAX_3=y
-CONFIG_INTEL_UNCORE_FREQ_CONTROL=m
-CONFIG_INTEL_BXTWC_PMIC_TMU=m
-CONFIG_INTEL_CHTDC_TI_PWRBTN=m
-CONFIG_INTEL_PMC_CORE=y
-CONFIG_INTEL_PMC_IPC=m
-CONFIG_INTEL_PUNIT_IPC=m
-CONFIG_INTEL_TELEMETRY=m
-CONFIG_PMC_ATOM=y
-CONFIG_MFD_CROS_EC=m
-CONFIG_CHROME_PLATFORMS=y
-CONFIG_CHROMEOS_LAPTOP=m
-CONFIG_CHROMEOS_PSTORE=m
-CONFIG_CHROMEOS_TBMC=m
-CONFIG_CROS_EC=m
-CONFIG_CROS_EC_I2C=m
-CONFIG_CROS_EC_RPMSG=m
-CONFIG_CROS_EC_ISHTP=m
-CONFIG_CROS_EC_SPI=m
-CONFIG_CROS_EC_LPC=m
-CONFIG_CROS_EC_PROTO=y
-CONFIG_CROS_KBD_LED_BACKLIGHT=m
-CONFIG_CROS_EC_CHARDEV=m
-CONFIG_CROS_EC_LIGHTBAR=m
-CONFIG_CROS_EC_VBC=m
-CONFIG_CROS_EC_DEBUGFS=m
-CONFIG_CROS_EC_SENSORHUB=m
-CONFIG_CROS_EC_SYSFS=m
-CONFIG_CROS_EC_TYPEC=m
-CONFIG_CROS_USBPD_LOGGER=m
-CONFIG_CROS_USBPD_NOTIFY=m
-CONFIG_WILCO_EC=m
-CONFIG_WILCO_EC_DEBUGFS=m
-CONFIG_WILCO_EC_EVENTS=m
-CONFIG_WILCO_EC_TELEMETRY=m
-CONFIG_MELLANOX_PLATFORM=y
-CONFIG_MLXREG_HOTPLUG=m
-CONFIG_MLXREG_IO=m
-CONFIG_CLKDEV_LOOKUP=y
-CONFIG_HAVE_CLK_PREPARE=y
-CONFIG_COMMON_CLK=y
-
-#
-# Common Clock Framework
-#
-CONFIG_COMMON_CLK_WM831X=m
-CONFIG_CLK_HSDK=y
-CONFIG_COMMON_CLK_MAX77686=m
-CONFIG_COMMON_CLK_MAX9485=m
-CONFIG_COMMON_CLK_RK808=m
-CONFIG_COMMON_CLK_SI5341=m
-CONFIG_COMMON_CLK_SI5351=m
-CONFIG_COMMON_CLK_SI514=m
-CONFIG_COMMON_CLK_SI544=m
-CONFIG_COMMON_CLK_SI570=m
-CONFIG_COMMON_CLK_CDCE706=m
-CONFIG_COMMON_CLK_CDCE925=m
-CONFIG_COMMON_CLK_CS2000_CP=m
-CONFIG_COMMON_CLK_S2MPS11=m
-CONFIG_CLK_TWL6040=m
-CONFIG_COMMON_CLK_LOCHNAGAR=m
-CONFIG_COMMON_CLK_PALMAS=m
-CONFIG_COMMON_CLK_PWM=m
-CONFIG_COMMON_CLK_VC5=m
-CONFIG_COMMON_CLK_BD718XX=m
-CONFIG_COMMON_CLK_FIXED_MMIO=y
-# end of Common Clock Framework
-
-CONFIG_HWSPINLOCK=y
-
-#
-# Clock Source drivers
-#
-CONFIG_TIMER_OF=y
-CONFIG_TIMER_PROBE=y
-CONFIG_CLKEVT_I8253=y
-CONFIG_I8253_LOCK=y
-CONFIG_CLKBLD_I8253=y
-CONFIG_CLKSRC_MMIO=y
-CONFIG_MICROCHIP_PIT64B=y
-# end of Clock Source drivers
-
-CONFIG_MAILBOX=y
-CONFIG_PLATFORM_MHU=m
-CONFIG_PCC=y
-CONFIG_ALTERA_MBOX=m
-CONFIG_MAILBOX_TEST=m
-CONFIG_IOMMU_IOVA=y
-CONFIG_IOASID=y
-CONFIG_IOMMU_API=y
-CONFIG_IOMMU_SUPPORT=y
-
-#
-# Generic IOMMU Pagetable Support
-#
-# end of Generic IOMMU Pagetable Support
-
-# CONFIG_IOMMU_DEBUGFS is not set
-# CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set
-CONFIG_OF_IOMMU=y
-CONFIG_IOMMU_DMA=y
-CONFIG_AMD_IOMMU=y
-CONFIG_AMD_IOMMU_V2=y
-CONFIG_DMAR_TABLE=y
-CONFIG_INTEL_IOMMU=y
-CONFIG_INTEL_IOMMU_SVM=y
-# CONFIG_INTEL_IOMMU_DEFAULT_ON is not set
-CONFIG_INTEL_IOMMU_FLOPPY_WA=y
-# CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON is not set
-CONFIG_IRQ_REMAP=y
-CONFIG_HYPERV_IOMMU=y
-
-#
-# Remoteproc drivers
-#
-CONFIG_REMOTEPROC=y
-# end of Remoteproc drivers
-
-#
-# Rpmsg drivers
-#
-CONFIG_RPMSG=m
-CONFIG_RPMSG_CHAR=m
-CONFIG_RPMSG_QCOM_GLINK_NATIVE=m
-CONFIG_RPMSG_QCOM_GLINK_RPM=m
-CONFIG_RPMSG_VIRTIO=m
-# end of Rpmsg drivers
-
-CONFIG_SOUNDWIRE=m
-
-#
-# SoundWire Devices
-#
-CONFIG_SOUNDWIRE_CADENCE=m
-CONFIG_SOUNDWIRE_INTEL=m
-CONFIG_SOUNDWIRE_QCOM=m
-
-#
-# SOC (System On Chip) specific Drivers
-#
-
-#
-# Amlogic SoC drivers
-#
-# end of Amlogic SoC drivers
-
-#
-# Aspeed SoC drivers
-#
-# end of Aspeed SoC drivers
-
-#
-# Broadcom SoC drivers
-#
-# end of Broadcom SoC drivers
-
-#
-# NXP/Freescale QorIQ SoC drivers
-#
-# end of NXP/Freescale QorIQ SoC drivers
-
-#
-# i.MX SoC drivers
-#
-# end of i.MX SoC drivers
-
-#
-# Qualcomm SoC drivers
-#
-# end of Qualcomm SoC drivers
-
-CONFIG_SOC_TI=y
-
-#
-# Xilinx SoC drivers
-#
-CONFIG_XILINX_VCU=m
-# end of Xilinx SoC drivers
-# end of SOC (System On Chip) specific Drivers
-
-CONFIG_PM_DEVFREQ=y
-
-#
-# DEVFREQ Governors
-#
-CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=m
-CONFIG_DEVFREQ_GOV_PERFORMANCE=m
-CONFIG_DEVFREQ_GOV_POWERSAVE=m
-CONFIG_DEVFREQ_GOV_USERSPACE=m
-CONFIG_DEVFREQ_GOV_PASSIVE=m
-
-#
-# DEVFREQ Drivers
-#
-CONFIG_PM_DEVFREQ_EVENT=y
-CONFIG_EXTCON=y
-
-#
-# Extcon Device Drivers
-#
-CONFIG_EXTCON_ADC_JACK=m
-CONFIG_EXTCON_ARIZONA=m
-CONFIG_EXTCON_AXP288=m
-CONFIG_EXTCON_FSA9480=m
-CONFIG_EXTCON_GPIO=m
-CONFIG_EXTCON_INTEL_INT3496=m
-CONFIG_EXTCON_INTEL_CHT_WC=m
-CONFIG_EXTCON_MAX14577=m
-CONFIG_EXTCON_MAX3355=m
-CONFIG_EXTCON_MAX77693=m
-CONFIG_EXTCON_MAX77843=m
-CONFIG_EXTCON_MAX8997=m
-CONFIG_EXTCON_PALMAS=m
-CONFIG_EXTCON_PTN5150=m
-CONFIG_EXTCON_RT8973A=m
-CONFIG_EXTCON_SM5502=m
-CONFIG_EXTCON_USB_GPIO=m
-CONFIG_EXTCON_USBC_CROS_EC=m
-CONFIG_MEMORY=y
-CONFIG_IIO=m
-CONFIG_IIO_BUFFER=y
-CONFIG_IIO_BUFFER_CB=m
-CONFIG_IIO_BUFFER_HW_CONSUMER=m
-CONFIG_IIO_KFIFO_BUF=m
-CONFIG_IIO_TRIGGERED_BUFFER=m
-CONFIG_IIO_CONFIGFS=m
-CONFIG_IIO_TRIGGER=y
-CONFIG_IIO_CONSUMERS_PER_TRIGGER=2
-CONFIG_IIO_SW_DEVICE=m
-CONFIG_IIO_SW_TRIGGER=m
-CONFIG_IIO_TRIGGERED_EVENT=m
-
-#
-# Accelerometers
-#
-CONFIG_ADIS16201=m
-CONFIG_ADIS16209=m
-CONFIG_ADXL372=m
-CONFIG_ADXL372_SPI=m
-CONFIG_ADXL372_I2C=m
-CONFIG_BMA180=m
-CONFIG_BMA220=m
-CONFIG_BMA400=m
-CONFIG_BMA400_I2C=m
-CONFIG_BMC150_ACCEL=m
-CONFIG_BMC150_ACCEL_I2C=m
-CONFIG_BMC150_ACCEL_SPI=m
-CONFIG_DA280=m
-CONFIG_DA311=m
-CONFIG_DMARD06=m
-CONFIG_DMARD09=m
-CONFIG_DMARD10=m
-CONFIG_HID_SENSOR_ACCEL_3D=m
-CONFIG_IIO_CROS_EC_ACCEL_LEGACY=m
-CONFIG_IIO_ST_ACCEL_3AXIS=m
-CONFIG_IIO_ST_ACCEL_I2C_3AXIS=m
-CONFIG_IIO_ST_ACCEL_SPI_3AXIS=m
-CONFIG_KXSD9=m
-CONFIG_KXSD9_SPI=m
-CONFIG_KXSD9_I2C=m
-CONFIG_KXCJK1013=m
-CONFIG_MC3230=m
-CONFIG_MMA7455=m
-CONFIG_MMA7455_I2C=m
-CONFIG_MMA7455_SPI=m
-CONFIG_MMA7660=m
-CONFIG_MMA8452=m
-CONFIG_MMA9551_CORE=m
-CONFIG_MMA9551=m
-CONFIG_MMA9553=m
-CONFIG_MXC4005=m
-CONFIG_MXC6255=m
-CONFIG_SCA3000=m
-CONFIG_STK8312=m
-CONFIG_STK8BA50=m
-# end of Accelerometers
-
-#
-# Analog to digital converters
-#
-CONFIG_AD_SIGMA_DELTA=m
-CONFIG_AD7091R5=m
-CONFIG_AD7124=m
-CONFIG_AD7192=m
-CONFIG_AD7266=m
-CONFIG_AD7291=m
-CONFIG_AD7292=m
-CONFIG_AD7298=m
-CONFIG_AD7476=m
-CONFIG_AD7606=m
-CONFIG_AD7606_IFACE_PARALLEL=m
-CONFIG_AD7606_IFACE_SPI=m
-CONFIG_AD7766=m
-CONFIG_AD7768_1=m
-CONFIG_AD7780=m
-CONFIG_AD7791=m
-CONFIG_AD7793=m
-CONFIG_AD7887=m
-CONFIG_AD7923=m
-CONFIG_AD7949=m
-CONFIG_AD799X=m
-CONFIG_AXP20X_ADC=m
-CONFIG_AXP288_ADC=m
-CONFIG_CC10001_ADC=m
-CONFIG_CPCAP_ADC=m
-CONFIG_DA9150_GPADC=m
-CONFIG_DLN2_ADC=m
-CONFIG_ENVELOPE_DETECTOR=m
-CONFIG_HI8435=m
-CONFIG_HX711=m
-CONFIG_INA2XX_ADC=m
-CONFIG_LP8788_ADC=m
-CONFIG_LTC2471=m
-CONFIG_LTC2485=m
-CONFIG_LTC2496=m
-CONFIG_LTC2497=m
-CONFIG_MAX1027=m
-CONFIG_MAX11100=m
-CONFIG_MAX1118=m
-CONFIG_MAX1363=m
-CONFIG_MAX9611=m
-CONFIG_MCP320X=m
-CONFIG_MCP3422=m
-CONFIG_MCP3911=m
-CONFIG_MEN_Z188_ADC=m
-CONFIG_NAU7802=m
-CONFIG_PALMAS_GPADC=m
-CONFIG_QCOM_VADC_COMMON=m
-CONFIG_QCOM_SPMI_IADC=m
-CONFIG_QCOM_SPMI_VADC=m
-CONFIG_QCOM_SPMI_ADC5=m
-CONFIG_RN5T618_ADC=m
-CONFIG_SD_ADC_MODULATOR=m
-CONFIG_STMPE_ADC=m
-CONFIG_TI_ADC081C=m
-CONFIG_TI_ADC0832=m
-CONFIG_TI_ADC084S021=m
-CONFIG_TI_ADC12138=m
-CONFIG_TI_ADC108S102=m
-CONFIG_TI_ADC128S052=m
-CONFIG_TI_ADC161S626=m
-CONFIG_TI_ADS1015=m
-CONFIG_TI_ADS7950=m
-CONFIG_TI_ADS8344=m
-CONFIG_TI_ADS8688=m
-CONFIG_TI_ADS124S08=m
-CONFIG_TI_AM335X_ADC=m
-CONFIG_TI_TLC4541=m
-CONFIG_TWL4030_MADC=m
-CONFIG_TWL6030_GPADC=m
-CONFIG_VF610_ADC=m
-CONFIG_VIPERBOARD_ADC=m
-CONFIG_XILINX_XADC=m
-# end of Analog to digital converters
-
-#
-# Analog Front Ends
-#
-CONFIG_IIO_RESCALE=m
-# end of Analog Front Ends
-
-#
-# Amplifiers
-#
-CONFIG_AD8366=m
-CONFIG_HMC425=m
-# end of Amplifiers
-
-#
-# Chemical Sensors
-#
-CONFIG_ATLAS_PH_SENSOR=m
-CONFIG_BME680=m
-CONFIG_BME680_I2C=m
-CONFIG_BME680_SPI=m
-CONFIG_CCS811=m
-CONFIG_IAQCORE=m
-CONFIG_PMS7003=m
-CONFIG_SENSIRION_SGP30=m
-CONFIG_SPS30=m
-CONFIG_VZ89X=m
-# end of Chemical Sensors
-
-CONFIG_IIO_CROS_EC_SENSORS_CORE=m
-CONFIG_IIO_CROS_EC_SENSORS=m
-CONFIG_IIO_CROS_EC_SENSORS_LID_ANGLE=m
-
-#
-# Hid Sensor IIO Common
-#
-CONFIG_HID_SENSOR_IIO_COMMON=m
-CONFIG_HID_SENSOR_IIO_TRIGGER=m
-# end of Hid Sensor IIO Common
-
-CONFIG_IIO_MS_SENSORS_I2C=m
-
-#
-# SSP Sensor Common
-#
-CONFIG_IIO_SSP_SENSORS_COMMONS=m
-CONFIG_IIO_SSP_SENSORHUB=m
-# end of SSP Sensor Common
-
-CONFIG_IIO_ST_SENSORS_I2C=m
-CONFIG_IIO_ST_SENSORS_SPI=m
-CONFIG_IIO_ST_SENSORS_CORE=m
-
-#
-# Digital to analog converters
-#
-CONFIG_AD5064=m
-CONFIG_AD5360=m
-CONFIG_AD5380=m
-CONFIG_AD5421=m
-CONFIG_AD5446=m
-CONFIG_AD5449=m
-CONFIG_AD5592R_BASE=m
-CONFIG_AD5592R=m
-CONFIG_AD5593R=m
-CONFIG_AD5504=m
-CONFIG_AD5624R_SPI=m
-CONFIG_AD5686=m
-CONFIG_AD5686_SPI=m
-CONFIG_AD5696_I2C=m
-CONFIG_AD5755=m
-CONFIG_AD5758=m
-CONFIG_AD5761=m
-CONFIG_AD5764=m
-CONFIG_AD5770R=m
-CONFIG_AD5791=m
-CONFIG_AD7303=m
-CONFIG_AD8801=m
-CONFIG_DPOT_DAC=m
-CONFIG_DS4424=m
-CONFIG_LTC1660=m
-CONFIG_LTC2632=m
-CONFIG_M62332=m
-CONFIG_MAX517=m
-CONFIG_MAX5821=m
-CONFIG_MCP4725=m
-CONFIG_MCP4922=m
-CONFIG_TI_DAC082S085=m
-CONFIG_TI_DAC5571=m
-CONFIG_TI_DAC7311=m
-CONFIG_TI_DAC7612=m
-CONFIG_VF610_DAC=m
-# end of Digital to analog converters
-
-#
-# IIO dummy driver
-#
-# CONFIG_IIO_SIMPLE_DUMMY is not set
-# end of IIO dummy driver
-
-#
-# Frequency Synthesizers DDS/PLL
-#
-
-#
-# Clock Generator/Distribution
-#
-CONFIG_AD9523=m
-# end of Clock Generator/Distribution
-
-#
-# Phase-Locked Loop (PLL) frequency synthesizers
-#
-CONFIG_ADF4350=m
-CONFIG_ADF4371=m
-# end of Phase-Locked Loop (PLL) frequency synthesizers
-# end of Frequency Synthesizers DDS/PLL
-
-#
-# Digital gyroscope sensors
-#
-CONFIG_ADIS16080=m
-CONFIG_ADIS16130=m
-CONFIG_ADIS16136=m
-CONFIG_ADIS16260=m
-CONFIG_ADXRS450=m
-CONFIG_BMG160=m
-CONFIG_BMG160_I2C=m
-CONFIG_BMG160_SPI=m
-CONFIG_FXAS21002C=m
-CONFIG_FXAS21002C_I2C=m
-CONFIG_FXAS21002C_SPI=m
-CONFIG_HID_SENSOR_GYRO_3D=m
-CONFIG_MPU3050=m
-CONFIG_MPU3050_I2C=m
-CONFIG_IIO_ST_GYRO_3AXIS=m
-CONFIG_IIO_ST_GYRO_I2C_3AXIS=m
-CONFIG_IIO_ST_GYRO_SPI_3AXIS=m
-CONFIG_ITG3200=m
-# end of Digital gyroscope sensors
-
-#
-# Health Sensors
-#
-
-#
-# Heart Rate Monitors
-#
-CONFIG_AFE4403=m
-CONFIG_AFE4404=m
-CONFIG_MAX30100=m
-CONFIG_MAX30102=m
-# end of Heart Rate Monitors
-# end of Health Sensors
-
-#
-# Humidity sensors
-#
-CONFIG_AM2315=m
-CONFIG_DHT11=m
-CONFIG_HDC100X=m
-CONFIG_HID_SENSOR_HUMIDITY=m
-CONFIG_HTS221=m
-CONFIG_HTS221_I2C=m
-CONFIG_HTS221_SPI=m
-CONFIG_HTU21=m
-CONFIG_SI7005=m
-CONFIG_SI7020=m
-# end of Humidity sensors
-
-#
-# Inertial measurement units
-#
-CONFIG_ADIS16400=m
-CONFIG_ADIS16460=m
-CONFIG_ADIS16480=m
-CONFIG_BMI160=m
-CONFIG_BMI160_I2C=m
-CONFIG_BMI160_SPI=m
-CONFIG_FXOS8700=m
-CONFIG_FXOS8700_I2C=m
-CONFIG_FXOS8700_SPI=m
-CONFIG_KMX61=m
-CONFIG_INV_MPU6050_IIO=m
-CONFIG_INV_MPU6050_I2C=m
-CONFIG_INV_MPU6050_SPI=m
-CONFIG_IIO_ST_LSM6DSX=m
-CONFIG_IIO_ST_LSM6DSX_I2C=m
-CONFIG_IIO_ST_LSM6DSX_SPI=m
-CONFIG_IIO_ST_LSM6DSX_I3C=m
-# end of Inertial measurement units
-
-CONFIG_IIO_ADIS_LIB=m
-CONFIG_IIO_ADIS_LIB_BUFFER=y
-
-#
-# Light sensors
-#
-CONFIG_ACPI_ALS=m
-CONFIG_ADJD_S311=m
-CONFIG_ADUX1020=m
-CONFIG_AL3010=m
-CONFIG_AL3320A=m
-CONFIG_APDS9300=m
-CONFIG_APDS9960=m
-CONFIG_BH1750=m
-CONFIG_BH1780=m
-CONFIG_CM32181=m
-CONFIG_CM3232=m
-CONFIG_CM3323=m
-CONFIG_CM3605=m
-CONFIG_CM36651=m
-CONFIG_IIO_CROS_EC_LIGHT_PROX=m
-CONFIG_GP2AP002=m
-CONFIG_GP2AP020A00F=m
-CONFIG_IQS621_ALS=m
-CONFIG_SENSORS_ISL29018=m
-CONFIG_SENSORS_ISL29028=m
-CONFIG_ISL29125=m
-CONFIG_HID_SENSOR_ALS=m
-CONFIG_HID_SENSOR_PROX=m
-CONFIG_JSA1212=m
-CONFIG_RPR0521=m
-CONFIG_SENSORS_LM3533=m
-CONFIG_LTR501=m
-CONFIG_LV0104CS=m
-CONFIG_MAX44000=m
-CONFIG_MAX44009=m
-CONFIG_NOA1305=m
-CONFIG_OPT3001=m
-CONFIG_PA12203001=m
-CONFIG_SI1133=m
-CONFIG_SI1145=m
-CONFIG_STK3310=m
-CONFIG_ST_UVIS25=m
-CONFIG_ST_UVIS25_I2C=m
-CONFIG_ST_UVIS25_SPI=m
-CONFIG_TCS3414=m
-CONFIG_TCS3472=m
-CONFIG_SENSORS_TSL2563=m
-CONFIG_TSL2583=m
-CONFIG_TSL2772=m
-CONFIG_TSL4531=m
-CONFIG_US5182D=m
-CONFIG_VCNL4000=m
-CONFIG_VCNL4035=m
-CONFIG_VEML6030=m
-CONFIG_VEML6070=m
-CONFIG_VL6180=m
-CONFIG_ZOPT2201=m
-# end of Light sensors
-
-#
-# Magnetometer sensors
-#
-CONFIG_AK8974=m
-CONFIG_AK8975=m
-CONFIG_AK09911=m
-CONFIG_BMC150_MAGN=m
-CONFIG_BMC150_MAGN_I2C=m
-CONFIG_BMC150_MAGN_SPI=m
-CONFIG_MAG3110=m
-CONFIG_HID_SENSOR_MAGNETOMETER_3D=m
-CONFIG_MMC35240=m
-CONFIG_IIO_ST_MAGN_3AXIS=m
-CONFIG_IIO_ST_MAGN_I2C_3AXIS=m
-CONFIG_IIO_ST_MAGN_SPI_3AXIS=m
-CONFIG_SENSORS_HMC5843=m
-CONFIG_SENSORS_HMC5843_I2C=m
-CONFIG_SENSORS_HMC5843_SPI=m
-CONFIG_SENSORS_RM3100=m
-CONFIG_SENSORS_RM3100_I2C=m
-CONFIG_SENSORS_RM3100_SPI=m
-# end of Magnetometer sensors
-
-#
-# Multiplexers
-#
-CONFIG_IIO_MUX=m
-# end of Multiplexers
-
-#
-# Inclinometer sensors
-#
-CONFIG_HID_SENSOR_INCLINOMETER_3D=m
-CONFIG_HID_SENSOR_DEVICE_ROTATION=m
-# end of Inclinometer sensors
-
-#
-# Triggers - standalone
-#
-CONFIG_IIO_HRTIMER_TRIGGER=m
-CONFIG_IIO_INTERRUPT_TRIGGER=m
-CONFIG_IIO_TIGHTLOOP_TRIGGER=m
-CONFIG_IIO_SYSFS_TRIGGER=m
-# end of Triggers - standalone
-
-#
-# Linear and angular position sensors
-#
-CONFIG_IQS624_POS=m
-# end of Linear and angular position sensors
-
-#
-# Digital potentiometers
-#
-CONFIG_AD5272=m
-CONFIG_DS1803=m
-CONFIG_MAX5432=m
-CONFIG_MAX5481=m
-CONFIG_MAX5487=m
-CONFIG_MCP4018=m
-CONFIG_MCP4131=m
-CONFIG_MCP4531=m
-CONFIG_MCP41010=m
-CONFIG_TPL0102=m
-# end of Digital potentiometers
-
-#
-# Digital potentiostats
-#
-CONFIG_LMP91000=m
-# end of Digital potentiostats
-
-#
-# Pressure sensors
-#
-CONFIG_ABP060MG=m
-CONFIG_BMP280=m
-CONFIG_BMP280_I2C=m
-CONFIG_BMP280_SPI=m
-CONFIG_IIO_CROS_EC_BARO=m
-CONFIG_DLHL60D=m
-CONFIG_DPS310=m
-CONFIG_HID_SENSOR_PRESS=m
-CONFIG_HP03=m
-CONFIG_ICP10100=m
-CONFIG_MPL115=m
-CONFIG_MPL115_I2C=m
-CONFIG_MPL115_SPI=m
-CONFIG_MPL3115=m
-CONFIG_MS5611=m
-CONFIG_MS5611_I2C=m
-CONFIG_MS5611_SPI=m
-CONFIG_MS5637=m
-CONFIG_IIO_ST_PRESS=m
-CONFIG_IIO_ST_PRESS_I2C=m
-CONFIG_IIO_ST_PRESS_SPI=m
-CONFIG_T5403=m
-CONFIG_HP206C=m
-CONFIG_ZPA2326=m
-CONFIG_ZPA2326_I2C=m
-CONFIG_ZPA2326_SPI=m
-# end of Pressure sensors
-
-#
-# Lightning sensors
-#
-CONFIG_AS3935=m
-# end of Lightning sensors
-
-#
-# Proximity and distance sensors
-#
-CONFIG_ISL29501=m
-CONFIG_LIDAR_LITE_V2=m
-CONFIG_MB1232=m
-CONFIG_PING=m
-CONFIG_RFD77402=m
-CONFIG_SRF04=m
-CONFIG_SX9500=m
-CONFIG_SRF08=m
-CONFIG_VL53L0X_I2C=m
-# end of Proximity and distance sensors
-
-#
-# Resolver to digital converters
-#
-CONFIG_AD2S90=m
-CONFIG_AD2S1200=m
-# end of Resolver to digital converters
-
-#
-# Temperature sensors
-#
-CONFIG_IQS620AT_TEMP=m
-CONFIG_LTC2983=m
-CONFIG_MAXIM_THERMOCOUPLE=m
-CONFIG_HID_SENSOR_TEMP=m
-CONFIG_MLX90614=m
-CONFIG_MLX90632=m
-CONFIG_TMP006=m
-CONFIG_TMP007=m
-CONFIG_TSYS01=m
-CONFIG_TSYS02D=m
-CONFIG_MAX31856=m
-# end of Temperature sensors
-
-CONFIG_NTB=m
-CONFIG_NTB_MSI=y
-CONFIG_NTB_AMD=m
-CONFIG_NTB_IDT=m
-CONFIG_NTB_INTEL=m
-CONFIG_NTB_SWITCHTEC=m
-# CONFIG_NTB_PINGPONG is not set
-# CONFIG_NTB_TOOL is not set
-# CONFIG_NTB_PERF is not set
-# CONFIG_NTB_MSI_TEST is not set
-CONFIG_NTB_TRANSPORT=m
-CONFIG_VME_BUS=y
-
-#
-# VME Bridge Drivers
-#
-CONFIG_VME_CA91CX42=m
-CONFIG_VME_TSI148=m
-# CONFIG_VME_FAKE is not set
-
-#
-# VME Board Drivers
-#
-CONFIG_VMIVME_7805=m
-
-#
-# VME Device Drivers
-#
-CONFIG_VME_USER=m
-CONFIG_PWM=y
-CONFIG_PWM_SYSFS=y
-# CONFIG_PWM_DEBUG is not set
-CONFIG_PWM_ATMEL_HLCDC_PWM=m
-CONFIG_PWM_CRC=y
-CONFIG_PWM_CROS_EC=m
-CONFIG_PWM_FSL_FTM=m
-CONFIG_PWM_LP3943=m
-CONFIG_PWM_LPSS=m
-CONFIG_PWM_LPSS_PCI=m
-CONFIG_PWM_LPSS_PLATFORM=m
-CONFIG_PWM_PCA9685=m
-CONFIG_PWM_STMPE=y
-CONFIG_PWM_TWL=m
-CONFIG_PWM_TWL_LED=m
-
-#
-# IRQ chip support
-#
-CONFIG_IRQCHIP=y
-CONFIG_AL_FIC=y
-CONFIG_MADERA_IRQ=m
-# end of IRQ chip support
-
-CONFIG_IPACK_BUS=m
-CONFIG_BOARD_TPCI200=m
-CONFIG_SERIAL_IPOCTAL=m
-CONFIG_RESET_CONTROLLER=y
-CONFIG_RESET_BRCMSTB_RESCAL=y
-CONFIG_RESET_INTEL_GW=y
-CONFIG_RESET_TI_SYSCON=m
-
-#
-# PHY Subsystem
-#
-CONFIG_GENERIC_PHY=y
-CONFIG_GENERIC_PHY_MIPI_DPHY=y
-CONFIG_BCM_KONA_USB2_PHY=m
-CONFIG_PHY_CADENCE_TORRENT=m
-CONFIG_PHY_CADENCE_DPHY=m
-CONFIG_PHY_CADENCE_SIERRA=m
-CONFIG_PHY_FSL_IMX8MQ_USB=m
-CONFIG_PHY_MIXEL_MIPI_DPHY=m
-CONFIG_PHY_PXA_28NM_HSIC=m
-CONFIG_PHY_PXA_28NM_USB2=m
-CONFIG_PHY_CPCAP_USB=m
-CONFIG_PHY_MAPPHONE_MDM6600=m
-CONFIG_PHY_OCELOT_SERDES=m
-CONFIG_PHY_QCOM_USB_HS=m
-CONFIG_PHY_QCOM_USB_HSIC=m
-CONFIG_PHY_SAMSUNG_USB2=m
-CONFIG_PHY_TUSB1210=m
-CONFIG_PHY_INTEL_EMMC=m
-# end of PHY Subsystem
-
-CONFIG_POWERCAP=y
-CONFIG_INTEL_RAPL_CORE=m
-CONFIG_INTEL_RAPL=m
-CONFIG_IDLE_INJECT=y
-CONFIG_MCB=m
-CONFIG_MCB_PCI=m
-CONFIG_MCB_LPC=m
-
-#
-# Performance monitor support
-#
-# end of Performance monitor support
-
-CONFIG_RAS=y
-CONFIG_RAS_CEC=y
-# CONFIG_RAS_CEC_DEBUG is not set
-CONFIG_USB4=m
-
-#
-# Android
-#
-# CONFIG_ANDROID is not set
-# end of Android
-
-CONFIG_LIBNVDIMM=y
-CONFIG_BLK_DEV_PMEM=m
-CONFIG_ND_BLK=m
-CONFIG_ND_CLAIM=y
-CONFIG_ND_BTT=m
-CONFIG_BTT=y
-CONFIG_ND_PFN=m
-CONFIG_NVDIMM_PFN=y
-CONFIG_NVDIMM_DAX=y
-CONFIG_OF_PMEM=m
-CONFIG_DAX_DRIVER=y
-CONFIG_DAX=y
-CONFIG_DEV_DAX=m
-CONFIG_DEV_DAX_PMEM=m
-CONFIG_DEV_DAX_HMEM=m
-CONFIG_DEV_DAX_KMEM=m
-CONFIG_DEV_DAX_PMEM_COMPAT=m
-CONFIG_NVMEM=y
-CONFIG_NVMEM_SYSFS=y
-CONFIG_NVMEM_SPMI_SDAM=m
-CONFIG_RAVE_SP_EEPROM=m
-
-#
-# HW tracing support
-#
-CONFIG_STM=m
-CONFIG_STM_PROTO_BASIC=m
-CONFIG_STM_PROTO_SYS_T=m
-# CONFIG_STM_DUMMY is not set
-CONFIG_STM_SOURCE_CONSOLE=m
-CONFIG_STM_SOURCE_HEARTBEAT=m
-CONFIG_STM_SOURCE_FTRACE=m
-CONFIG_INTEL_TH=m
-CONFIG_INTEL_TH_PCI=m
-CONFIG_INTEL_TH_ACPI=m
-CONFIG_INTEL_TH_GTH=m
-CONFIG_INTEL_TH_STH=m
-CONFIG_INTEL_TH_MSU=m
-CONFIG_INTEL_TH_PTI=m
-# CONFIG_INTEL_TH_DEBUG is not set
-# end of HW tracing support
-
-CONFIG_FPGA=m
-CONFIG_ALTERA_PR_IP_CORE=m
-CONFIG_ALTERA_PR_IP_CORE_PLAT=m
-CONFIG_FPGA_MGR_ALTERA_PS_SPI=m
-CONFIG_FPGA_MGR_ALTERA_CVP=m
-CONFIG_FPGA_MGR_XILINX_SPI=m
-CONFIG_FPGA_MGR_ICE40_SPI=m
-CONFIG_FPGA_MGR_MACHXO2_SPI=m
-CONFIG_FPGA_BRIDGE=m
-CONFIG_ALTERA_FREEZE_BRIDGE=m
-CONFIG_XILINX_PR_DECOUPLER=m
-CONFIG_FPGA_REGION=m
-CONFIG_OF_FPGA_REGION=m
-CONFIG_FPGA_DFL=m
-CONFIG_FPGA_DFL_FME=m
-CONFIG_FPGA_DFL_FME_MGR=m
-CONFIG_FPGA_DFL_FME_BRIDGE=m
-CONFIG_FPGA_DFL_FME_REGION=m
-CONFIG_FPGA_DFL_AFU=m
-CONFIG_FPGA_DFL_PCI=m
-CONFIG_FSI=m
-CONFIG_FSI_NEW_DEV_NODE=y
-CONFIG_FSI_MASTER_GPIO=m
-CONFIG_FSI_MASTER_HUB=m
-CONFIG_FSI_MASTER_ASPEED=m
-CONFIG_FSI_SCOM=m
-CONFIG_FSI_SBEFIFO=m
-CONFIG_FSI_OCC=m
-CONFIG_TEE=m
-
-#
-# TEE drivers
-#
-CONFIG_AMDTEE=m
-# end of TEE drivers
-
-CONFIG_MULTIPLEXER=m
-
-#
-# Multiplexer drivers
-#
-CONFIG_MUX_ADG792A=m
-CONFIG_MUX_ADGS1408=m
-CONFIG_MUX_GPIO=m
-CONFIG_MUX_MMIO=m
-# end of Multiplexer drivers
-
-CONFIG_PM_OPP=y
-CONFIG_UNISYS_VISORBUS=m
-CONFIG_SIOX=m
-CONFIG_SIOX_BUS_GPIO=m
-CONFIG_SLIMBUS=m
-CONFIG_SLIM_QCOM_CTRL=m
-CONFIG_INTERCONNECT=m
-CONFIG_COUNTER=m
-CONFIG_FTM_QUADDEC=m
-CONFIG_MOST=m
-# end of Device Drivers
-
-#
-# File systems
-#
-CONFIG_DCACHE_WORD_ACCESS=y
-CONFIG_VALIDATE_FS_PARSER=y
-CONFIG_FS_IOMAP=y
-# CONFIG_EXT2_FS is not set
-# CONFIG_EXT3_FS is not set
-CONFIG_EXT4_FS=m
-CONFIG_EXT4_USE_FOR_EXT2=y
-CONFIG_EXT4_FS_POSIX_ACL=y
-CONFIG_EXT4_FS_SECURITY=y
-# CONFIG_EXT4_DEBUG is not set
-CONFIG_JBD2=m
-# CONFIG_JBD2_DEBUG is not set
-CONFIG_FS_MBCACHE=m
-CONFIG_REISERFS_FS=m
-# CONFIG_REISERFS_CHECK is not set
-CONFIG_REISERFS_PROC_INFO=y
-CONFIG_REISERFS_FS_XATTR=y
-CONFIG_REISERFS_FS_POSIX_ACL=y
-CONFIG_REISERFS_FS_SECURITY=y
-CONFIG_JFS_FS=m
-CONFIG_JFS_POSIX_ACL=y
-CONFIG_JFS_SECURITY=y
-# CONFIG_JFS_DEBUG is not set
-CONFIG_JFS_STATISTICS=y
-CONFIG_XFS_FS=m
-CONFIG_XFS_QUOTA=y
-CONFIG_XFS_POSIX_ACL=y
-CONFIG_XFS_RT=y
-CONFIG_XFS_ONLINE_SCRUB=y
-CONFIG_XFS_ONLINE_REPAIR=y
-# CONFIG_XFS_WARN is not set
-# CONFIG_XFS_DEBUG is not set
-CONFIG_GFS2_FS=m
-CONFIG_GFS2_FS_LOCKING_DLM=y
-CONFIG_OCFS2_FS=m
-CONFIG_OCFS2_FS_O2CB=m
-CONFIG_OCFS2_FS_USERSPACE_CLUSTER=m
-CONFIG_OCFS2_FS_STATS=y
-CONFIG_OCFS2_DEBUG_MASKLOG=y
-# CONFIG_OCFS2_DEBUG_FS is not set
-CONFIG_BTRFS_FS=m
-CONFIG_BTRFS_FS_POSIX_ACL=y
-# CONFIG_BTRFS_FS_CHECK_INTEGRITY is not set
-# CONFIG_BTRFS_FS_RUN_SANITY_TESTS is not set
-# CONFIG_BTRFS_DEBUG is not set
-# CONFIG_BTRFS_ASSERT is not set
-# CONFIG_BTRFS_FS_REF_VERIFY is not set
-CONFIG_NILFS2_FS=m
-CONFIG_F2FS_FS=m
-CONFIG_F2FS_STAT_FS=y
-CONFIG_F2FS_FS_XATTR=y
-CONFIG_F2FS_FS_POSIX_ACL=y
-CONFIG_F2FS_FS_SECURITY=y
-CONFIG_F2FS_CHECK_FS=y
-# CONFIG_F2FS_IO_TRACE is not set
-# CONFIG_F2FS_FAULT_INJECTION is not set
-CONFIG_F2FS_FS_COMPRESSION=y
-CONFIG_F2FS_FS_LZO=y
-CONFIG_F2FS_FS_LZ4=y
-CONFIG_F2FS_FS_ZSTD=y
-CONFIG_ZONEFS_FS=m
-CONFIG_FS_DAX=y
-CONFIG_FS_DAX_PMD=y
-CONFIG_FS_POSIX_ACL=y
-CONFIG_EXPORTFS=y
-CONFIG_EXPORTFS_BLOCK_OPS=y
-CONFIG_FILE_LOCKING=y
-# CONFIG_MANDATORY_FILE_LOCKING is not set
-CONFIG_FS_ENCRYPTION=y
-CONFIG_FS_ENCRYPTION_ALGS=m
-CONFIG_FS_VERITY=y
-# CONFIG_FS_VERITY_DEBUG is not set
-CONFIG_FS_VERITY_BUILTIN_SIGNATURES=y
-CONFIG_FSNOTIFY=y
-CONFIG_DNOTIFY=y
-CONFIG_INOTIFY_USER=y
-CONFIG_FANOTIFY=y
-CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
-CONFIG_QUOTA=y
-CONFIG_QUOTA_NETLINK_INTERFACE=y
-# CONFIG_PRINT_QUOTA_WARNING is not set
-# CONFIG_QUOTA_DEBUG is not set
-CONFIG_QUOTA_TREE=m
-CONFIG_QFMT_V1=m
-CONFIG_QFMT_V2=m
-CONFIG_QUOTACTL=y
-CONFIG_QUOTACTL_COMPAT=y
-CONFIG_AUTOFS4_FS=y
-CONFIG_AUTOFS_FS=y
-CONFIG_FUSE_FS=m
-CONFIG_CUSE=m
-CONFIG_VIRTIO_FS=m
-CONFIG_OVERLAY_FS=m
-CONFIG_OVERLAY_FS_REDIRECT_DIR=y
-# CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW is not set
-CONFIG_OVERLAY_FS_INDEX=y
-CONFIG_OVERLAY_FS_XINO_AUTO=y
-CONFIG_OVERLAY_FS_METACOPY=y
-
-#
-# Caches
-#
-CONFIG_FSCACHE=m
-CONFIG_FSCACHE_STATS=y
-CONFIG_FSCACHE_HISTOGRAM=y
-# CONFIG_FSCACHE_DEBUG is not set
-# CONFIG_FSCACHE_OBJECT_LIST is not set
-CONFIG_CACHEFILES=m
-# CONFIG_CACHEFILES_DEBUG is not set
-# CONFIG_CACHEFILES_HISTOGRAM is not set
-# end of Caches
-
-#
-# CD-ROM/DVD Filesystems
-#
-CONFIG_ISO9660_FS=m
-CONFIG_JOLIET=y
-CONFIG_ZISOFS=y
-CONFIG_UDF_FS=m
-# end of CD-ROM/DVD Filesystems
-
-#
-# DOS/FAT/EXFAT/NT Filesystems
-#
-CONFIG_FAT_FS=m
-CONFIG_MSDOS_FS=m
-CONFIG_VFAT_FS=m
-CONFIG_FAT_DEFAULT_CODEPAGE=437
-CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
-CONFIG_FAT_DEFAULT_UTF8=y
-CONFIG_EXFAT_FS=m
-CONFIG_EXFAT_DEFAULT_IOCHARSET="utf8"
-CONFIG_NTFS_FS=m
-# CONFIG_NTFS_DEBUG is not set
-CONFIG_NTFS_RW=y
-# end of DOS/FAT/EXFAT/NT Filesystems
-
-#
-# Pseudo filesystems
-#
-CONFIG_PROC_FS=y
-CONFIG_PROC_KCORE=y
-CONFIG_PROC_VMCORE=y
-CONFIG_PROC_VMCORE_DEVICE_DUMP=y
-CONFIG_PROC_SYSCTL=y
-CONFIG_PROC_PAGE_MONITOR=y
-CONFIG_PROC_CHILDREN=y
-CONFIG_PROC_PID_ARCH_STATUS=y
-CONFIG_PROC_CPU_RESCTRL=y
-CONFIG_KERNFS=y
-CONFIG_SYSFS=y
-CONFIG_TMPFS=y
-CONFIG_TMPFS_POSIX_ACL=y
-CONFIG_TMPFS_XATTR=y
-CONFIG_HUGETLBFS=y
-CONFIG_HUGETLB_PAGE=y
-CONFIG_MEMFD_CREATE=y
-CONFIG_ARCH_HAS_GIGANTIC_PAGE=y
-CONFIG_CONFIGFS_FS=y
-CONFIG_EFIVAR_FS=y
-# end of Pseudo filesystems
-
-CONFIG_MISC_FILESYSTEMS=y
-CONFIG_ORANGEFS_FS=m
-# CONFIG_ADFS_FS is not set
-CONFIG_AFFS_FS=m
-CONFIG_ECRYPT_FS=m
-# CONFIG_ECRYPT_FS_MESSAGING is not set
-CONFIG_HFS_FS=m
-CONFIG_HFSPLUS_FS=m
-CONFIG_BEFS_FS=m
-# CONFIG_BEFS_DEBUG is not set
-# CONFIG_BFS_FS is not set
-# CONFIG_EFS_FS is not set
-CONFIG_JFFS2_FS=m
-CONFIG_JFFS2_FS_DEBUG=0
-CONFIG_JFFS2_FS_WRITEBUFFER=y
-# CONFIG_JFFS2_FS_WBUF_VERIFY is not set
-CONFIG_JFFS2_SUMMARY=y
-CONFIG_JFFS2_FS_XATTR=y
-CONFIG_JFFS2_FS_POSIX_ACL=y
-CONFIG_JFFS2_FS_SECURITY=y
-# CONFIG_JFFS2_COMPRESSION_OPTIONS is not set
-CONFIG_JFFS2_ZLIB=y
-CONFIG_JFFS2_RTIME=y
-CONFIG_UBIFS_FS=m
-# CONFIG_UBIFS_FS_ADVANCED_COMPR is not set
-CONFIG_UBIFS_FS_LZO=y
-CONFIG_UBIFS_FS_ZLIB=y
-CONFIG_UBIFS_FS_ZSTD=y
-CONFIG_UBIFS_ATIME_SUPPORT=y
-CONFIG_UBIFS_FS_XATTR=y
-CONFIG_UBIFS_FS_SECURITY=y
-CONFIG_UBIFS_FS_AUTHENTICATION=y
-CONFIG_CRAMFS=m
-CONFIG_CRAMFS_BLOCKDEV=y
-CONFIG_CRAMFS_MTD=y
-CONFIG_SQUASHFS=m
-# CONFIG_SQUASHFS_FILE_CACHE is not set
-CONFIG_SQUASHFS_FILE_DIRECT=y
-# CONFIG_SQUASHFS_DECOMP_SINGLE is not set
-CONFIG_SQUASHFS_DECOMP_MULTI=y
-# CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU is not set
-CONFIG_SQUASHFS_XATTR=y
-CONFIG_SQUASHFS_ZLIB=y
-CONFIG_SQUASHFS_LZ4=y
-CONFIG_SQUASHFS_LZO=y
-CONFIG_SQUASHFS_XZ=y
-CONFIG_SQUASHFS_ZSTD=y
-# CONFIG_SQUASHFS_4K_DEVBLK_SIZE is not set
-# CONFIG_SQUASHFS_EMBEDDED is not set
-CONFIG_SQUASHFS_FRAGMENT_CACHE_SIZE=3
-# CONFIG_VXFS_FS is not set
-CONFIG_MINIX_FS=m
-CONFIG_OMFS_FS=m
-# CONFIG_HPFS_FS is not set
-# CONFIG_QNX4FS_FS is not set
-# CONFIG_QNX6FS_FS is not set
-CONFIG_ROMFS_FS=m
-CONFIG_ROMFS_BACKED_BY_BLOCK=y
-# CONFIG_ROMFS_BACKED_BY_MTD is not set
-# CONFIG_ROMFS_BACKED_BY_BOTH is not set
-CONFIG_ROMFS_ON_BLOCK=y
-CONFIG_PSTORE=y
-CONFIG_PSTORE_DEFLATE_COMPRESS=m
-CONFIG_PSTORE_LZO_COMPRESS=m
-CONFIG_PSTORE_LZ4_COMPRESS=m
-CONFIG_PSTORE_LZ4HC_COMPRESS=m
-# CONFIG_PSTORE_842_COMPRESS is not set
-CONFIG_PSTORE_ZSTD_COMPRESS=y
-CONFIG_PSTORE_COMPRESS=y
-# CONFIG_PSTORE_DEFLATE_COMPRESS_DEFAULT is not set
-# CONFIG_PSTORE_LZO_COMPRESS_DEFAULT is not set
-# CONFIG_PSTORE_LZ4_COMPRESS_DEFAULT is not set
-# CONFIG_PSTORE_LZ4HC_COMPRESS_DEFAULT is not set
-CONFIG_PSTORE_ZSTD_COMPRESS_DEFAULT=y
-CONFIG_PSTORE_COMPRESS_DEFAULT="zstd"
-# CONFIG_PSTORE_CONSOLE is not set
-# CONFIG_PSTORE_PMSG is not set
-# CONFIG_PSTORE_FTRACE is not set
-CONFIG_PSTORE_RAM=y
-# CONFIG_SYSV_FS is not set
-CONFIG_UFS_FS=m
-# CONFIG_UFS_FS_WRITE is not set
-# CONFIG_UFS_DEBUG is not set
-CONFIG_EROFS_FS=m
-# CONFIG_EROFS_FS_DEBUG is not set
-CONFIG_EROFS_FS_XATTR=y
-CONFIG_EROFS_FS_POSIX_ACL=y
-CONFIG_EROFS_FS_SECURITY=y
-CONFIG_EROFS_FS_ZIP=y
-CONFIG_EROFS_FS_CLUSTER_PAGE_LIMIT=2
-CONFIG_VBOXSF_FS=m
-CONFIG_NETWORK_FILESYSTEMS=y
-CONFIG_NFS_FS=m
-CONFIG_NFS_V2=m
-CONFIG_NFS_V3=m
-CONFIG_NFS_V3_ACL=y
-CONFIG_NFS_V4=m
-CONFIG_NFS_SWAP=y
-CONFIG_NFS_V4_1=y
-CONFIG_NFS_V4_2=y
-CONFIG_PNFS_FILE_LAYOUT=m
-CONFIG_PNFS_BLOCK=m
-CONFIG_PNFS_FLEXFILE_LAYOUT=m
-CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN="kernel.org"
-CONFIG_NFS_V4_1_MIGRATION=y
-CONFIG_NFS_V4_SECURITY_LABEL=y
-CONFIG_NFS_FSCACHE=y
-# CONFIG_NFS_USE_LEGACY_DNS is not set
-CONFIG_NFS_USE_KERNEL_DNS=y
-CONFIG_NFS_DEBUG=y
-# CONFIG_NFS_DISABLE_UDP_SUPPORT is not set
-CONFIG_NFSD=m
-CONFIG_NFSD_V2_ACL=y
-CONFIG_NFSD_V3=y
-CONFIG_NFSD_V3_ACL=y
-CONFIG_NFSD_V4=y
-CONFIG_NFSD_PNFS=y
-CONFIG_NFSD_BLOCKLAYOUT=y
-CONFIG_NFSD_SCSILAYOUT=y
-# CONFIG_NFSD_FLEXFILELAYOUT is not set
-CONFIG_NFSD_V4_SECURITY_LABEL=y
-CONFIG_GRACE_PERIOD=m
-CONFIG_LOCKD=m
-CONFIG_LOCKD_V4=y
-CONFIG_NFS_ACL_SUPPORT=m
-CONFIG_NFS_COMMON=y
-CONFIG_SUNRPC=m
-CONFIG_SUNRPC_GSS=m
-CONFIG_SUNRPC_BACKCHANNEL=y
-CONFIG_SUNRPC_SWAP=y
-CONFIG_RPCSEC_GSS_KRB5=m
-CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES=y
-CONFIG_SUNRPC_DEBUG=y
-CONFIG_SUNRPC_XPRT_RDMA=m
-CONFIG_CEPH_FS=m
-CONFIG_CEPH_FSCACHE=y
-CONFIG_CEPH_FS_POSIX_ACL=y
-CONFIG_CEPH_FS_SECURITY_LABEL=y
-CONFIG_CIFS=m
-# CONFIG_CIFS_STATS2 is not set
-CONFIG_CIFS_ALLOW_INSECURE_LEGACY=y
-# CONFIG_CIFS_WEAK_PW_HASH is not set
-CONFIG_CIFS_UPCALL=y
-CONFIG_CIFS_XATTR=y
-CONFIG_CIFS_POSIX=y
-CONFIG_CIFS_DEBUG=y
-# CONFIG_CIFS_DEBUG2 is not set
-# CONFIG_CIFS_DEBUG_DUMP_KEYS is not set
-CONFIG_CIFS_DFS_UPCALL=y
-# CONFIG_CIFS_SMB_DIRECT is not set
-CONFIG_CIFS_FSCACHE=y
-CONFIG_CODA_FS=m
-CONFIG_AFS_FS=m
-# CONFIG_AFS_DEBUG is not set
-CONFIG_AFS_FSCACHE=y
-# CONFIG_AFS_DEBUG_CURSOR is not set
-CONFIG_9P_FS=m
-CONFIG_9P_FSCACHE=y
-CONFIG_9P_FS_POSIX_ACL=y
-CONFIG_9P_FS_SECURITY=y
-CONFIG_NLS=y
-CONFIG_NLS_DEFAULT="utf8"
-CONFIG_NLS_CODEPAGE_437=m
-CONFIG_NLS_CODEPAGE_737=m
-CONFIG_NLS_CODEPAGE_775=m
-CONFIG_NLS_CODEPAGE_850=m
-CONFIG_NLS_CODEPAGE_852=m
-CONFIG_NLS_CODEPAGE_855=m
-CONFIG_NLS_CODEPAGE_857=m
-CONFIG_NLS_CODEPAGE_860=m
-CONFIG_NLS_CODEPAGE_861=m
-CONFIG_NLS_CODEPAGE_862=m
-CONFIG_NLS_CODEPAGE_863=m
-CONFIG_NLS_CODEPAGE_864=m
-CONFIG_NLS_CODEPAGE_865=m
-CONFIG_NLS_CODEPAGE_866=m
-CONFIG_NLS_CODEPAGE_869=m
-CONFIG_NLS_CODEPAGE_936=m
-CONFIG_NLS_CODEPAGE_950=m
-CONFIG_NLS_CODEPAGE_932=m
-CONFIG_NLS_CODEPAGE_949=m
-CONFIG_NLS_CODEPAGE_874=m
-CONFIG_NLS_ISO8859_8=m
-CONFIG_NLS_CODEPAGE_1250=m
-CONFIG_NLS_CODEPAGE_1251=m
-CONFIG_NLS_ASCII=m
-CONFIG_NLS_ISO8859_1=m
-CONFIG_NLS_ISO8859_2=m
-CONFIG_NLS_ISO8859_3=m
-CONFIG_NLS_ISO8859_4=m
-CONFIG_NLS_ISO8859_5=m
-CONFIG_NLS_ISO8859_6=m
-CONFIG_NLS_ISO8859_7=m
-CONFIG_NLS_ISO8859_9=m
-CONFIG_NLS_ISO8859_13=m
-CONFIG_NLS_ISO8859_14=m
-CONFIG_NLS_ISO8859_15=m
-CONFIG_NLS_KOI8_R=m
-CONFIG_NLS_KOI8_U=m
-CONFIG_NLS_MAC_ROMAN=m
-CONFIG_NLS_MAC_CELTIC=m
-CONFIG_NLS_MAC_CENTEURO=m
-CONFIG_NLS_MAC_CROATIAN=m
-CONFIG_NLS_MAC_CYRILLIC=m
-CONFIG_NLS_MAC_GAELIC=m
-CONFIG_NLS_MAC_GREEK=m
-CONFIG_NLS_MAC_ICELAND=m
-CONFIG_NLS_MAC_INUIT=m
-CONFIG_NLS_MAC_ROMANIAN=m
-CONFIG_NLS_MAC_TURKISH=m
-CONFIG_NLS_UTF8=m
-CONFIG_DLM=m
-# CONFIG_DLM_DEBUG is not set
-CONFIG_UNICODE=y
-# CONFIG_UNICODE_NORMALIZATION_SELFTEST is not set
-CONFIG_IO_WQ=y
-# end of File systems
-
-#
-# Security options
-#
-CONFIG_KEYS=y
-CONFIG_KEYS_REQUEST_CACHE=y
-CONFIG_PERSISTENT_KEYRINGS=y
-CONFIG_BIG_KEYS=y
-CONFIG_TRUSTED_KEYS=m
-CONFIG_ENCRYPTED_KEYS=m
-CONFIG_KEY_DH_OPERATIONS=y
-# CONFIG_SECURITY_DMESG_RESTRICT is not set
-CONFIG_SECURITY=y
-CONFIG_SECURITYFS=y
-CONFIG_SECURITY_NETWORK=y
-CONFIG_PAGE_TABLE_ISOLATION=y
-CONFIG_SECURITY_INFINIBAND=y
-CONFIG_SECURITY_NETWORK_XFRM=y
-CONFIG_SECURITY_PATH=y
-# CONFIG_INTEL_TXT is not set
-CONFIG_LSM_MMAP_MIN_ADDR=65536
-CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR=y
-CONFIG_HARDENED_USERCOPY=y
-CONFIG_HARDENED_USERCOPY_FALLBACK=y
-# CONFIG_HARDENED_USERCOPY_PAGESPAN is not set
-CONFIG_FORTIFY_SOURCE=y
-# CONFIG_STATIC_USERMODEHELPER is not set
-CONFIG_SECURITY_SELINUX=y
-CONFIG_SECURITY_SELINUX_BOOTPARAM=y
-# CONFIG_SECURITY_SELINUX_DISABLE is not set
-CONFIG_SECURITY_SELINUX_DEVELOP=y
-CONFIG_SECURITY_SELINUX_AVC_STATS=y
-CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=0
-CONFIG_SECURITY_SELINUX_SIDTAB_HASH_BITS=9
-CONFIG_SECURITY_SELINUX_SID2STR_CACHE_SIZE=256
-CONFIG_SECURITY_SMACK=y
-CONFIG_SECURITY_SMACK_BRINGUP=y
-CONFIG_SECURITY_SMACK_NETFILTER=y
-CONFIG_SECURITY_SMACK_APPEND_SIGNALS=y
-CONFIG_SECURITY_TOMOYO=y
-CONFIG_SECURITY_TOMOYO_MAX_ACCEPT_ENTRY=2048
-CONFIG_SECURITY_TOMOYO_MAX_AUDIT_LOG=1024
-# CONFIG_SECURITY_TOMOYO_OMIT_USERSPACE_LOADER is not set
-CONFIG_SECURITY_TOMOYO_POLICY_LOADER="/sbin/tomoyo-init"
-CONFIG_SECURITY_TOMOYO_ACTIVATION_TRIGGER="/sbin/init"
-# CONFIG_SECURITY_TOMOYO_INSECURE_BUILTIN_SETTING is not set
-CONFIG_SECURITY_APPARMOR=y
-CONFIG_SECURITY_APPARMOR_HASH=y
-CONFIG_SECURITY_APPARMOR_HASH_DEFAULT=y
-# CONFIG_SECURITY_APPARMOR_DEBUG is not set
-# CONFIG_SECURITY_LOADPIN is not set
-CONFIG_SECURITY_YAMA=y
-CONFIG_SECURITY_SAFESETID=y
-CONFIG_SECURITY_LOCKDOWN_LSM=y
-# CONFIG_SECURITY_LOCKDOWN_LSM_EARLY is not set
-CONFIG_LOCK_DOWN_KERNEL_FORCE_NONE=y
-# CONFIG_LOCK_DOWN_KERNEL_FORCE_INTEGRITY is not set
-# CONFIG_LOCK_DOWN_KERNEL_FORCE_CONFIDENTIALITY is not set
-# CONFIG_INTEGRITY is not set
-# CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT is not set
-# CONFIG_DEFAULT_SECURITY_SELINUX is not set
-# CONFIG_DEFAULT_SECURITY_SMACK is not set
-# CONFIG_DEFAULT_SECURITY_TOMOYO is not set
-# CONFIG_DEFAULT_SECURITY_APPARMOR is not set
-CONFIG_DEFAULT_SECURITY_DAC=y
-CONFIG_LSM="lockdown,yama"
-
-#
-# Kernel hardening options
-#
-CONFIG_GCC_PLUGIN_STRUCTLEAK=y
-
-#
-# Memory initialization
-#
-# CONFIG_INIT_STACK_NONE is not set
-# CONFIG_GCC_PLUGIN_STRUCTLEAK_USER is not set
-# CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF is not set
-CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL=y
-# CONFIG_GCC_PLUGIN_STRUCTLEAK_VERBOSE is not set
-# CONFIG_GCC_PLUGIN_STACKLEAK is not set
-CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y
-# CONFIG_INIT_ON_FREE_DEFAULT_ON is not set
-# end of Memory initialization
-# end of Kernel hardening options
-# end of Security options
-
-CONFIG_XOR_BLOCKS=m
-CONFIG_ASYNC_CORE=m
-CONFIG_ASYNC_MEMCPY=m
-CONFIG_ASYNC_XOR=m
-CONFIG_ASYNC_PQ=m
-CONFIG_ASYNC_RAID6_RECOV=m
-CONFIG_CRYPTO=y
-
-#
-# Crypto core or helper
-#
-CONFIG_CRYPTO_ALGAPI=y
-CONFIG_CRYPTO_ALGAPI2=y
-CONFIG_CRYPTO_AEAD=y
-CONFIG_CRYPTO_AEAD2=y
-CONFIG_CRYPTO_SKCIPHER=y
-CONFIG_CRYPTO_SKCIPHER2=y
-CONFIG_CRYPTO_HASH=y
-CONFIG_CRYPTO_HASH2=y
-CONFIG_CRYPTO_RNG=y
-CONFIG_CRYPTO_RNG2=y
-CONFIG_CRYPTO_RNG_DEFAULT=y
-CONFIG_CRYPTO_AKCIPHER2=y
-CONFIG_CRYPTO_AKCIPHER=y
-CONFIG_CRYPTO_KPP2=y
-CONFIG_CRYPTO_KPP=y
-CONFIG_CRYPTO_ACOMP2=y
-CONFIG_CRYPTO_MANAGER=y
-CONFIG_CRYPTO_MANAGER2=y
-CONFIG_CRYPTO_USER=m
-CONFIG_CRYPTO_MANAGER_DISABLE_TESTS=y
-CONFIG_CRYPTO_GF128MUL=y
-CONFIG_CRYPTO_NULL=y
-CONFIG_CRYPTO_NULL2=y
-CONFIG_CRYPTO_PCRYPT=m
-CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_AUTHENC=m
-CONFIG_CRYPTO_TEST=m
-CONFIG_CRYPTO_SIMD=m
-CONFIG_CRYPTO_GLUE_HELPER_X86=m
-CONFIG_CRYPTO_ENGINE=m
-
-#
-# Public-key cryptography
-#
-CONFIG_CRYPTO_RSA=y
-CONFIG_CRYPTO_DH=y
-CONFIG_CRYPTO_ECC=m
-CONFIG_CRYPTO_ECDH=m
-CONFIG_CRYPTO_ECRDSA=m
-CONFIG_CRYPTO_CURVE25519=m
-CONFIG_CRYPTO_CURVE25519_X86=m
-
-#
-# Authenticated Encryption with Associated Data
-#
-CONFIG_CRYPTO_CCM=m
-CONFIG_CRYPTO_GCM=y
-CONFIG_CRYPTO_CHACHA20POLY1305=m
-CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128_AESNI_SSE2=m
-CONFIG_CRYPTO_SEQIV=y
-CONFIG_CRYPTO_ECHAINIV=m
-
-#
-# Block modes
-#
-CONFIG_CRYPTO_CBC=m
-CONFIG_CRYPTO_CFB=m
-CONFIG_CRYPTO_CTR=y
-CONFIG_CRYPTO_CTS=m
-CONFIG_CRYPTO_ECB=m
-CONFIG_CRYPTO_LRW=m
-CONFIG_CRYPTO_OFB=m
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_XTS=m
-CONFIG_CRYPTO_KEYWRAP=m
-CONFIG_CRYPTO_NHPOLY1305=m
-CONFIG_CRYPTO_NHPOLY1305_SSE2=m
-CONFIG_CRYPTO_NHPOLY1305_AVX2=m
-CONFIG_CRYPTO_ADIANTUM=m
-CONFIG_CRYPTO_ESSIV=m
-
-#
-# Hash modes
-#
-CONFIG_CRYPTO_CMAC=m
-CONFIG_CRYPTO_HMAC=y
-CONFIG_CRYPTO_XCBC=m
-CONFIG_CRYPTO_VMAC=m
-
-#
-# Digest
-#
-CONFIG_CRYPTO_CRC32C=m
-CONFIG_CRYPTO_CRC32C_INTEL=m
-CONFIG_CRYPTO_CRC32=m
-CONFIG_CRYPTO_CRC32_PCLMUL=m
-CONFIG_CRYPTO_XXHASH=m
-CONFIG_CRYPTO_BLAKE2B=m
-CONFIG_CRYPTO_BLAKE2S=m
-CONFIG_CRYPTO_BLAKE2S_X86=m
-CONFIG_CRYPTO_CRCT10DIF=y
-CONFIG_CRYPTO_CRCT10DIF_PCLMUL=m
-CONFIG_CRYPTO_GHASH=y
-CONFIG_CRYPTO_POLY1305=m
-CONFIG_CRYPTO_POLY1305_X86_64=m
-CONFIG_CRYPTO_MD4=m
-CONFIG_CRYPTO_MD5=y
-CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_RMD128=m
-CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_RMD256=m
-CONFIG_CRYPTO_RMD320=m
-CONFIG_CRYPTO_SHA1=y
-CONFIG_CRYPTO_SHA1_SSSE3=m
-CONFIG_CRYPTO_SHA256_SSSE3=m
-CONFIG_CRYPTO_SHA512_SSSE3=m
-CONFIG_CRYPTO_SHA256=y
-CONFIG_CRYPTO_SHA512=y
-CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_SM3=m
-CONFIG_CRYPTO_STREEBOG=m
-CONFIG_CRYPTO_TGR192=m
-CONFIG_CRYPTO_WP512=m
-CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL=m
-
-#
-# Ciphers
-#
-CONFIG_CRYPTO_AES=y
-CONFIG_CRYPTO_AES_TI=m
-CONFIG_CRYPTO_AES_NI_INTEL=m
-CONFIG_CRYPTO_ANUBIS=m
-CONFIG_CRYPTO_ARC4=m
-CONFIG_CRYPTO_BLOWFISH=m
-CONFIG_CRYPTO_BLOWFISH_COMMON=m
-CONFIG_CRYPTO_BLOWFISH_X86_64=m
-CONFIG_CRYPTO_CAMELLIA=m
-CONFIG_CRYPTO_CAMELLIA_X86_64=m
-CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64=m
-CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64=m
-CONFIG_CRYPTO_CAST_COMMON=m
-CONFIG_CRYPTO_CAST5=m
-CONFIG_CRYPTO_CAST5_AVX_X86_64=m
-CONFIG_CRYPTO_CAST6=m
-CONFIG_CRYPTO_CAST6_AVX_X86_64=m
-CONFIG_CRYPTO_DES=m
-CONFIG_CRYPTO_DES3_EDE_X86_64=m
-CONFIG_CRYPTO_FCRYPT=m
-CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SALSA20=m
-CONFIG_CRYPTO_CHACHA20=m
-CONFIG_CRYPTO_CHACHA20_X86_64=m
-CONFIG_CRYPTO_SEED=m
-CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_SERPENT_SSE2_X86_64=m
-CONFIG_CRYPTO_SERPENT_AVX_X86_64=m
-CONFIG_CRYPTO_SERPENT_AVX2_X86_64=m
-CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_TEA=m
-CONFIG_CRYPTO_TWOFISH=m
-CONFIG_CRYPTO_TWOFISH_COMMON=m
-CONFIG_CRYPTO_TWOFISH_X86_64=m
-CONFIG_CRYPTO_TWOFISH_X86_64_3WAY=m
-CONFIG_CRYPTO_TWOFISH_AVX_X86_64=m
-
-#
-# Compression
-#
-CONFIG_CRYPTO_DEFLATE=m
-CONFIG_CRYPTO_LZO=m
-CONFIG_CRYPTO_842=m
-CONFIG_CRYPTO_LZ4=y
-CONFIG_CRYPTO_LZ4HC=m
-CONFIG_CRYPTO_ZSTD=y
-
-#
-# Random Number Generation
-#
-CONFIG_CRYPTO_ANSI_CPRNG=m
-CONFIG_CRYPTO_DRBG_MENU=y
-CONFIG_CRYPTO_DRBG_HMAC=y
-CONFIG_CRYPTO_DRBG_HASH=y
-CONFIG_CRYPTO_DRBG_CTR=y
-CONFIG_CRYPTO_DRBG=y
-CONFIG_CRYPTO_JITTERENTROPY=y
-CONFIG_CRYPTO_USER_API=m
-CONFIG_CRYPTO_USER_API_HASH=m
-CONFIG_CRYPTO_USER_API_SKCIPHER=m
-CONFIG_CRYPTO_USER_API_RNG=m
-CONFIG_CRYPTO_USER_API_AEAD=m
-# CONFIG_CRYPTO_STATS is not set
-CONFIG_CRYPTO_HASH_INFO=y
-
-#
-# Crypto library routines
-#
-CONFIG_CRYPTO_LIB_AES=y
-CONFIG_CRYPTO_LIB_ARC4=m
-CONFIG_CRYPTO_ARCH_HAVE_LIB_BLAKE2S=m
-CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC=m
-CONFIG_CRYPTO_LIB_BLAKE2S=m
-CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA=m
-CONFIG_CRYPTO_LIB_CHACHA_GENERIC=m
-CONFIG_CRYPTO_LIB_CHACHA=m
-CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519=m
-CONFIG_CRYPTO_LIB_CURVE25519_GENERIC=m
-CONFIG_CRYPTO_LIB_CURVE25519=m
-CONFIG_CRYPTO_LIB_DES=m
-CONFIG_CRYPTO_LIB_POLY1305_RSIZE=11
-CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305=m
-CONFIG_CRYPTO_LIB_POLY1305_GENERIC=m
-CONFIG_CRYPTO_LIB_POLY1305=m
-CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
-CONFIG_CRYPTO_LIB_SHA256=y
-CONFIG_CRYPTO_HW=y
-CONFIG_CRYPTO_DEV_PADLOCK=m
-CONFIG_CRYPTO_DEV_PADLOCK_AES=m
-CONFIG_CRYPTO_DEV_PADLOCK_SHA=m
-CONFIG_CRYPTO_DEV_ATMEL_I2C=m
-CONFIG_CRYPTO_DEV_ATMEL_ECC=m
-CONFIG_CRYPTO_DEV_ATMEL_SHA204A=m
-CONFIG_CRYPTO_DEV_CCP=y
-CONFIG_CRYPTO_DEV_CCP_DD=m
-CONFIG_CRYPTO_DEV_SP_CCP=y
-CONFIG_CRYPTO_DEV_CCP_CRYPTO=m
-CONFIG_CRYPTO_DEV_SP_PSP=y
-CONFIG_CRYPTO_DEV_CCP_DEBUGFS=y
-CONFIG_CRYPTO_DEV_QAT=m
-CONFIG_CRYPTO_DEV_QAT_DH895xCC=m
-CONFIG_CRYPTO_DEV_QAT_C3XXX=m
-CONFIG_CRYPTO_DEV_QAT_C62X=m
-CONFIG_CRYPTO_DEV_QAT_DH895xCCVF=m
-CONFIG_CRYPTO_DEV_QAT_C3XXXVF=m
-CONFIG_CRYPTO_DEV_QAT_C62XVF=m
-CONFIG_CRYPTO_DEV_NITROX=m
-CONFIG_CRYPTO_DEV_NITROX_CNN55XX=m
-CONFIG_CRYPTO_DEV_CHELSIO=m
-CONFIG_CHELSIO_IPSEC_INLINE=y
-CONFIG_CHELSIO_TLS_DEVICE=y
-CONFIG_CRYPTO_DEV_VIRTIO=m
-CONFIG_CRYPTO_DEV_SAFEXCEL=m
-CONFIG_CRYPTO_DEV_CCREE=m
-CONFIG_CRYPTO_DEV_AMLOGIC_GXL=m
-CONFIG_CRYPTO_DEV_AMLOGIC_GXL_DEBUG=y
-CONFIG_ASYMMETRIC_KEY_TYPE=y
-CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y
-CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE=m
-CONFIG_X509_CERTIFICATE_PARSER=y
-CONFIG_PKCS8_PRIVATE_KEY_PARSER=m
-CONFIG_TPM_KEY_PARSER=m
-CONFIG_PKCS7_MESSAGE_PARSER=y
-# CONFIG_PKCS7_TEST_KEY is not set
-CONFIG_SIGNED_PE_FILE_VERIFICATION=y
-
-#
-# Certificates for signature checking
-#
-CONFIG_MODULE_SIG_KEY="certs/signing_key.pem"
-CONFIG_SYSTEM_TRUSTED_KEYRING=y
-CONFIG_SYSTEM_TRUSTED_KEYS=""
-# CONFIG_SYSTEM_EXTRA_CERTIFICATE is not set
-CONFIG_SECONDARY_TRUSTED_KEYRING=y
-CONFIG_SYSTEM_BLACKLIST_KEYRING=y
-CONFIG_SYSTEM_BLACKLIST_HASH_LIST=""
-# end of Certificates for signature checking
-
-CONFIG_BINARY_PRINTF=y
-
-#
-# Library routines
-#
-CONFIG_RAID6_PQ=m
-CONFIG_RAID6_PQ_BENCHMARK=y
-CONFIG_PACKING=y
-CONFIG_BITREVERSE=y
-CONFIG_GENERIC_STRNCPY_FROM_USER=y
-CONFIG_GENERIC_STRNLEN_USER=y
-CONFIG_GENERIC_NET_UTILS=y
-CONFIG_GENERIC_FIND_FIRST_BIT=y
-CONFIG_CORDIC=m
-CONFIG_RATIONAL=y
-CONFIG_GENERIC_PCI_IOMAP=y
-CONFIG_GENERIC_IOMAP=y
-CONFIG_ARCH_USE_CMPXCHG_LOCKREF=y
-CONFIG_ARCH_HAS_FAST_MULTIPLIER=y
-CONFIG_CRC_CCITT=y
-CONFIG_CRC16=m
-CONFIG_CRC_T10DIF=y
-CONFIG_CRC_ITU_T=m
-CONFIG_CRC32=y
-# CONFIG_CRC32_SELFTEST is not set
-CONFIG_CRC32_SLICEBY8=y
-# CONFIG_CRC32_SLICEBY4 is not set
-# CONFIG_CRC32_SARWATE is not set
-# CONFIG_CRC32_BIT is not set
-CONFIG_CRC64=m
-CONFIG_CRC4=m
-CONFIG_CRC7=m
-CONFIG_LIBCRC32C=m
-CONFIG_CRC8=m
-CONFIG_XXHASH=y
-# CONFIG_RANDOM32_SELFTEST is not set
-CONFIG_842_COMPRESS=m
-CONFIG_842_DECOMPRESS=m
-CONFIG_ZLIB_INFLATE=y
-CONFIG_ZLIB_DEFLATE=y
-CONFIG_LZO_COMPRESS=y
-CONFIG_LZO_DECOMPRESS=y
-CONFIG_LZ4_COMPRESS=y
-CONFIG_LZ4HC_COMPRESS=m
-CONFIG_LZ4_DECOMPRESS=y
-CONFIG_ZSTD_COMPRESS=y
-CONFIG_ZSTD_DECOMPRESS=y
-CONFIG_XZ_DEC=y
-CONFIG_XZ_DEC_X86=y
-CONFIG_XZ_DEC_POWERPC=y
-CONFIG_XZ_DEC_IA64=y
-CONFIG_XZ_DEC_ARM=y
-CONFIG_XZ_DEC_ARMTHUMB=y
-CONFIG_XZ_DEC_SPARC=y
-CONFIG_XZ_DEC_BCJ=y
-# CONFIG_XZ_DEC_TEST is not set
-CONFIG_DECOMPRESS_GZIP=y
-CONFIG_DECOMPRESS_BZIP2=y
-CONFIG_DECOMPRESS_LZMA=y
-CONFIG_DECOMPRESS_XZ=y
-CONFIG_DECOMPRESS_LZO=y
-CONFIG_DECOMPRESS_LZ4=y
-CONFIG_GENERIC_ALLOCATOR=y
-CONFIG_REED_SOLOMON=y
-CONFIG_REED_SOLOMON_ENC8=y
-CONFIG_REED_SOLOMON_DEC8=y
-CONFIG_REED_SOLOMON_DEC16=y
-CONFIG_BCH=m
-CONFIG_TEXTSEARCH=y
-CONFIG_TEXTSEARCH_KMP=m
-CONFIG_TEXTSEARCH_BM=m
-CONFIG_TEXTSEARCH_FSM=m
-CONFIG_BTREE=y
-CONFIG_INTERVAL_TREE=y
-CONFIG_XARRAY_MULTI=y
-CONFIG_ASSOCIATIVE_ARRAY=y
-CONFIG_HAS_IOMEM=y
-CONFIG_HAS_IOPORT_MAP=y
-CONFIG_HAS_DMA=y
-CONFIG_NEED_SG_DMA_LENGTH=y
-CONFIG_NEED_DMA_MAP_STATE=y
-CONFIG_ARCH_DMA_ADDR_T_64BIT=y
-CONFIG_ARCH_HAS_FORCE_DMA_UNENCRYPTED=y
-CONFIG_DMA_VIRT_OPS=y
-CONFIG_SWIOTLB=y
-# CONFIG_DMA_API_DEBUG is not set
-CONFIG_SGL_ALLOC=y
-CONFIG_IOMMU_HELPER=y
-CONFIG_CHECK_SIGNATURE=y
-CONFIG_CPU_RMAP=y
-CONFIG_DQL=y
-CONFIG_GLOB=y
-# CONFIG_GLOB_SELFTEST is not set
-CONFIG_NLATTR=y
-CONFIG_LRU_CACHE=m
-CONFIG_CLZ_TAB=y
-CONFIG_IRQ_POLL=y
-CONFIG_MPILIB=y
-CONFIG_DIMLIB=y
-CONFIG_LIBFDT=y
-CONFIG_OID_REGISTRY=y
-CONFIG_UCS2_STRING=y
-CONFIG_HAVE_GENERIC_VDSO=y
-CONFIG_GENERIC_GETTIMEOFDAY=y
-CONFIG_GENERIC_VDSO_TIME_NS=y
-CONFIG_FONT_SUPPORT=y
-CONFIG_FONTS=y
-# CONFIG_FONT_8x8 is not set
-CONFIG_FONT_8x16=y
-# CONFIG_FONT_6x11 is not set
-# CONFIG_FONT_7x14 is not set
-# CONFIG_FONT_PEARL_8x8 is not set
-# CONFIG_FONT_ACORN_8x8 is not set
-# CONFIG_FONT_MINI_4x6 is not set
-# CONFIG_FONT_6x10 is not set
-# CONFIG_FONT_10x18 is not set
-# CONFIG_FONT_SUN8x16 is not set
-# CONFIG_FONT_SUN12x22 is not set
-CONFIG_FONT_TER16x32=y
-CONFIG_SG_POOL=y
-CONFIG_ARCH_HAS_PMEM_API=y
-CONFIG_MEMREGION=y
-CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE=y
-CONFIG_ARCH_HAS_UACCESS_MCSAFE=y
-CONFIG_ARCH_STACKWALK=y
-CONFIG_SBITMAP=y
-CONFIG_PARMAN=m
-CONFIG_OBJAGG=m
-# CONFIG_STRING_SELFTEST is not set
-# end of Library routines
-
-#
-# Kernel hacking
-#
-
-#
-# printk and dmesg options
-#
-CONFIG_PRINTK_TIME=y
-# CONFIG_PRINTK_CALLER is not set
-CONFIG_CONSOLE_LOGLEVEL_DEFAULT=4
-CONFIG_CONSOLE_LOGLEVEL_QUIET=1
-CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4
-# CONFIG_BOOT_PRINTK_DELAY is not set
-CONFIG_DYNAMIC_DEBUG=y
-CONFIG_SYMBOLIC_ERRNAME=y
-CONFIG_DEBUG_BUGVERBOSE=y
-# end of printk and dmesg options
-
-#
-# Compile-time checks and compiler options
-#
-CONFIG_DEBUG_INFO=y
-# CONFIG_DEBUG_INFO_REDUCED is not set
-# CONFIG_DEBUG_INFO_SPLIT is not set
-CONFIG_DEBUG_INFO_DWARF4=y
-CONFIG_DEBUG_INFO_BTF=y
-# CONFIG_GDB_SCRIPTS is not set
-# CONFIG_ENABLE_MUST_CHECK is not set
-CONFIG_FRAME_WARN=2048
-CONFIG_STRIP_ASM_SYMS=y
-# CONFIG_READABLE_ASM is not set
-# CONFIG_HEADERS_INSTALL is not set
-# CONFIG_DEBUG_SECTION_MISMATCH is not set
-CONFIG_SECTION_MISMATCH_WARN_ONLY=y
-CONFIG_STACK_VALIDATION=y
-# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set
-# end of Compile-time checks and compiler options
-
-#
-# Generic Kernel Debugging Instruments
-#
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE=0x0
-CONFIG_MAGIC_SYSRQ_SERIAL=y
-CONFIG_MAGIC_SYSRQ_SERIAL_SEQUENCE=""
-CONFIG_DEBUG_FS=y
-CONFIG_HAVE_ARCH_KGDB=y
-# CONFIG_KGDB is not set
-CONFIG_ARCH_HAS_UBSAN_SANITIZE_ALL=y
-# CONFIG_UBSAN is not set
-# end of Generic Kernel Debugging Instruments
-
-CONFIG_DEBUG_KERNEL=y
-CONFIG_DEBUG_MISC=y
-
-#
-# Memory Debugging
-#
-# CONFIG_PAGE_EXTENSION is not set
-# CONFIG_DEBUG_PAGEALLOC is not set
-# CONFIG_PAGE_OWNER is not set
-CONFIG_PAGE_POISONING=y
-CONFIG_PAGE_POISONING_NO_SANITY=y
-CONFIG_PAGE_POISONING_ZERO=y
-# CONFIG_DEBUG_PAGE_REF is not set
-# CONFIG_DEBUG_RODATA_TEST is not set
-CONFIG_GENERIC_PTDUMP=y
-CONFIG_PTDUMP_CORE=y
-# CONFIG_PTDUMP_DEBUGFS is not set
-# CONFIG_DEBUG_OBJECTS is not set
-# CONFIG_SLUB_DEBUG_ON is not set
-# CONFIG_SLUB_STATS is not set
-CONFIG_HAVE_DEBUG_KMEMLEAK=y
-# CONFIG_DEBUG_KMEMLEAK is not set
-# CONFIG_DEBUG_STACK_USAGE is not set
-CONFIG_SCHED_STACK_END_CHECK=y
-# CONFIG_DEBUG_VM is not set
-CONFIG_ARCH_HAS_DEBUG_VIRTUAL=y
-# CONFIG_DEBUG_VIRTUAL is not set
-CONFIG_DEBUG_MEMORY_INIT=y
-# CONFIG_DEBUG_PER_CPU_MAPS is not set
-CONFIG_HAVE_ARCH_KASAN=y
-CONFIG_HAVE_ARCH_KASAN_VMALLOC=y
-CONFIG_CC_HAS_KASAN_GENERIC=y
-# CONFIG_KASAN is not set
-CONFIG_KASAN_STACK=1
-# end of Memory Debugging
-
-# CONFIG_DEBUG_SHIRQ is not set
-
-#
-# Debug Oops, Lockups and Hangs
-#
-# CONFIG_PANIC_ON_OOPS is not set
-CONFIG_PANIC_ON_OOPS_VALUE=0
-CONFIG_PANIC_TIMEOUT=0
-CONFIG_LOCKUP_DETECTOR=y
-CONFIG_SOFTLOCKUP_DETECTOR=y
-# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
-CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
-CONFIG_HARDLOCKUP_DETECTOR_PERF=y
-CONFIG_HARDLOCKUP_CHECK_TIMESTAMP=y
-CONFIG_HARDLOCKUP_DETECTOR=y
-# CONFIG_BOOTPARAM_HARDLOCKUP_PANIC is not set
-CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE=0
-CONFIG_DETECT_HUNG_TASK=y
-CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=120
-# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
-CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
-# CONFIG_WQ_WATCHDOG is not set
-# CONFIG_TEST_LOCKUP is not set
-# end of Debug Oops, Lockups and Hangs
-
-#
-# Scheduler Debugging
-#
-CONFIG_SCHED_DEBUG=y
-CONFIG_SCHED_INFO=y
-CONFIG_SCHEDSTATS=y
-# end of Scheduler Debugging
-
-# CONFIG_DEBUG_TIMEKEEPING is not set
-CONFIG_DEBUG_PREEMPT=y
-
-#
-# Lock Debugging (spinlocks, mutexes, etc...)
-#
-CONFIG_LOCK_DEBUGGING_SUPPORT=y
-# CONFIG_PROVE_LOCKING is not set
-# CONFIG_LOCK_STAT is not set
-# CONFIG_DEBUG_RT_MUTEXES is not set
-# CONFIG_DEBUG_SPINLOCK is not set
-# CONFIG_DEBUG_MUTEXES is not set
-# CONFIG_DEBUG_WW_MUTEX_SLOWPATH is not set
-# CONFIG_DEBUG_RWSEMS is not set
-# CONFIG_DEBUG_LOCK_ALLOC is not set
-# CONFIG_DEBUG_ATOMIC_SLEEP is not set
-# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
-# CONFIG_LOCK_TORTURE_TEST is not set
-# CONFIG_WW_MUTEX_SELFTEST is not set
-# end of Lock Debugging (spinlocks, mutexes, etc...)
-
-CONFIG_STACKTRACE=y
-# CONFIG_WARN_ALL_UNSEEDED_RANDOM is not set
-# CONFIG_DEBUG_KOBJECT is not set
-
-#
-# Debug kernel data structures
-#
-# CONFIG_DEBUG_LIST is not set
-# CONFIG_DEBUG_PLIST is not set
-# CONFIG_DEBUG_SG is not set
-# CONFIG_DEBUG_NOTIFIERS is not set
-# CONFIG_BUG_ON_DATA_CORRUPTION is not set
-# end of Debug kernel data structures
-
-# CONFIG_DEBUG_CREDENTIALS is not set
-
-#
-# RCU Debugging
-#
-# CONFIG_RCU_PERF_TEST is not set
-# CONFIG_RCU_TORTURE_TEST is not set
-CONFIG_RCU_CPU_STALL_TIMEOUT=60
-# CONFIG_RCU_TRACE is not set
-# CONFIG_RCU_EQS_DEBUG is not set
-# end of RCU Debugging
-
-# CONFIG_DEBUG_WQ_FORCE_RR_CPU is not set
-# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
-# CONFIG_CPU_HOTPLUG_STATE_CONTROL is not set
-CONFIG_LATENCYTOP=y
-CONFIG_USER_STACKTRACE_SUPPORT=y
-CONFIG_NOP_TRACER=y
-CONFIG_HAVE_FUNCTION_TRACER=y
-CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
-CONFIG_HAVE_DYNAMIC_FTRACE=y
-CONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS=y
-CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS=y
-CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
-CONFIG_HAVE_SYSCALL_TRACEPOINTS=y
-CONFIG_HAVE_FENTRY=y
-CONFIG_HAVE_C_RECORDMCOUNT=y
-CONFIG_TRACER_MAX_TRACE=y
-CONFIG_TRACE_CLOCK=y
-CONFIG_RING_BUFFER=y
-CONFIG_EVENT_TRACING=y
-CONFIG_CONTEXT_SWITCH_TRACER=y
-CONFIG_RING_BUFFER_ALLOW_SWAP=y
-CONFIG_TRACING=y
-CONFIG_GENERIC_TRACER=y
-CONFIG_TRACING_SUPPORT=y
-CONFIG_FTRACE=y
-# CONFIG_BOOTTIME_TRACING is not set
-CONFIG_FUNCTION_TRACER=y
-CONFIG_FUNCTION_GRAPH_TRACER=y
-CONFIG_DYNAMIC_FTRACE=y
-CONFIG_DYNAMIC_FTRACE_WITH_REGS=y
-CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS=y
-CONFIG_FUNCTION_PROFILER=y
-CONFIG_STACK_TRACER=y
-# CONFIG_PREEMPTIRQ_EVENTS is not set
-# CONFIG_IRQSOFF_TRACER is not set
-# CONFIG_PREEMPT_TRACER is not set
-CONFIG_SCHED_TRACER=y
-CONFIG_HWLAT_TRACER=y
-CONFIG_MMIOTRACE=y
-CONFIG_FTRACE_SYSCALLS=y
-CONFIG_TRACER_SNAPSHOT=y
-# CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP is not set
-CONFIG_BRANCH_PROFILE_NONE=y
-# CONFIG_PROFILE_ANNOTATED_BRANCHES is not set
-CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_KPROBE_EVENTS=y
-# CONFIG_KPROBE_EVENTS_ON_NOTRACE is not set
-CONFIG_UPROBE_EVENTS=y
-CONFIG_BPF_EVENTS=y
-CONFIG_DYNAMIC_EVENTS=y
-CONFIG_PROBE_EVENTS=y
-CONFIG_BPF_KPROBE_OVERRIDE=y
-CONFIG_FTRACE_MCOUNT_RECORD=y
-CONFIG_TRACING_MAP=y
-CONFIG_HIST_TRIGGERS=y
-# CONFIG_TRACE_EVENT_INJECT is not set
-# CONFIG_TRACEPOINT_BENCHMARK is not set
-# CONFIG_RING_BUFFER_BENCHMARK is not set
-# CONFIG_TRACE_EVAL_MAP_FILE is not set
-# CONFIG_FTRACE_STARTUP_TEST is not set
-# CONFIG_RING_BUFFER_STARTUP_TEST is not set
-# CONFIG_MMIOTRACE_TEST is not set
-# CONFIG_PREEMPTIRQ_DELAY_TEST is not set
-# CONFIG_SYNTH_EVENT_GEN_TEST is not set
-# CONFIG_KPROBE_EVENT_GEN_TEST is not set
-# CONFIG_PROVIDE_OHCI1394_DMA_INIT is not set
-# CONFIG_SAMPLES is not set
-CONFIG_ARCH_HAS_DEVMEM_IS_ALLOWED=y
-CONFIG_STRICT_DEVMEM=y
-CONFIG_IO_STRICT_DEVMEM=y
-
-#
-# x86 Debugging
-#
-CONFIG_TRACE_IRQFLAGS_SUPPORT=y
-# CONFIG_X86_VERBOSE_BOOTUP is not set
-CONFIG_EARLY_PRINTK=y
-# CONFIG_EARLY_PRINTK_DBGP is not set
-# CONFIG_EARLY_PRINTK_USB_XDBC is not set
-# CONFIG_EFI_PGT_DUMP is not set
-CONFIG_DEBUG_WX=y
-CONFIG_DOUBLEFAULT=y
-# CONFIG_DEBUG_TLBFLUSH is not set
-# CONFIG_IOMMU_DEBUG is not set
-CONFIG_HAVE_MMIOTRACE_SUPPORT=y
-# CONFIG_X86_DECODER_SELFTEST is not set
-CONFIG_IO_DELAY_0X80=y
-# CONFIG_IO_DELAY_0XED is not set
-# CONFIG_IO_DELAY_UDELAY is not set
-# CONFIG_IO_DELAY_NONE is not set
-CONFIG_DEBUG_BOOT_PARAMS=y
-# CONFIG_CPA_DEBUG is not set
-# CONFIG_DEBUG_ENTRY is not set
-# CONFIG_DEBUG_NMI_SELFTEST is not set
-# CONFIG_X86_DEBUG_FPU is not set
-# CONFIG_PUNIT_ATOM_DEBUG is not set
-CONFIG_UNWINDER_ORC=y
-# CONFIG_UNWINDER_FRAME_POINTER is not set
-# CONFIG_UNWINDER_GUESS is not set
-# end of x86 Debugging
-
-#
-# Kernel Testing and Coverage
-#
-# CONFIG_KUNIT is not set
-# CONFIG_NOTIFIER_ERROR_INJECTION is not set
-CONFIG_FUNCTION_ERROR_INJECTION=y
-# CONFIG_FAULT_INJECTION is not set
-CONFIG_ARCH_HAS_KCOV=y
-CONFIG_CC_HAS_SANCOV_TRACE_PC=y
-# CONFIG_KCOV is not set
-CONFIG_RUNTIME_TESTING_MENU=y
-CONFIG_LKDTM=m
-# CONFIG_TEST_LIST_SORT is not set
-# CONFIG_TEST_MIN_HEAP is not set
-# CONFIG_TEST_SORT is not set
-# CONFIG_KPROBES_SANITY_TEST is not set
-# CONFIG_BACKTRACE_SELF_TEST is not set
-# CONFIG_RBTREE_TEST is not set
-# CONFIG_REED_SOLOMON_TEST is not set
-# CONFIG_INTERVAL_TREE_TEST is not set
-# CONFIG_PERCPU_TEST is not set
-# CONFIG_ATOMIC64_SELFTEST is not set
-# CONFIG_ASYNC_RAID6_TEST is not set
-# CONFIG_TEST_HEXDUMP is not set
-# CONFIG_TEST_STRING_HELPERS is not set
-# CONFIG_TEST_STRSCPY is not set
-# CONFIG_TEST_KSTRTOX is not set
-# CONFIG_TEST_PRINTF is not set
-# CONFIG_TEST_BITMAP is not set
-# CONFIG_TEST_BITFIELD is not set
-# CONFIG_TEST_UUID is not set
-# CONFIG_TEST_XARRAY is not set
-# CONFIG_TEST_OVERFLOW is not set
-# CONFIG_TEST_RHASHTABLE is not set
-# CONFIG_TEST_HASH is not set
-# CONFIG_TEST_IDA is not set
-# CONFIG_TEST_PARMAN is not set
-# CONFIG_TEST_LKM is not set
-# CONFIG_TEST_VMALLOC is not set
-# CONFIG_TEST_USER_COPY is not set
-# CONFIG_TEST_BPF is not set
-# CONFIG_TEST_BLACKHOLE_DEV is not set
-# CONFIG_FIND_BIT_BENCHMARK is not set
-# CONFIG_TEST_FIRMWARE is not set
-# CONFIG_TEST_SYSCTL is not set
-# CONFIG_TEST_UDELAY is not set
-# CONFIG_TEST_STATIC_KEYS is not set
-# CONFIG_TEST_KMOD is not set
-# CONFIG_TEST_MEMCAT_P is not set
-# CONFIG_TEST_OBJAGG is not set
-# CONFIG_TEST_STACKINIT is not set
-# CONFIG_TEST_MEMINIT is not set
-# CONFIG_MEMTEST is not set
-# CONFIG_HYPERV_TESTING is not set
-# end of Kernel Testing and Coverage
-# end of Kernel hacking
diff --git a/linux57-tkg/linux57-tkg-config/config_hardened.x86_64 b/linux57-tkg/linux57-tkg-config/config_hardened.x86_64
deleted file mode 100644
index 105f167..0000000
--- a/linux57-tkg/linux57-tkg-config/config_hardened.x86_64
+++ /dev/null
@@ -1,10839 +0,0 @@
-#
-# Automatically generated file; DO NOT EDIT.
-# Linux/x86 5.7.8 Kernel Configuration
-#
-
-#
-# Compiler: gcc (GCC) 10.1.0
-#
-CONFIG_CC_IS_GCC=y
-CONFIG_GCC_VERSION=100100
-CONFIG_LD_VERSION=234000000
-CONFIG_CLANG_VERSION=0
-CONFIG_CC_CAN_LINK=y
-CONFIG_CC_HAS_ASM_GOTO=y
-CONFIG_CC_HAS_ASM_INLINE=y
-CONFIG_IRQ_WORK=y
-CONFIG_BUILDTIME_TABLE_SORT=y
-CONFIG_THREAD_INFO_IN_TASK=y
-
-#
-# General setup
-#
-CONFIG_INIT_ENV_ARG_LIMIT=32
-# CONFIG_COMPILE_TEST is not set
-CONFIG_LOCALVERSION=""
-CONFIG_LOCALVERSION_AUTO=y
-CONFIG_BUILD_SALT=""
-CONFIG_HAVE_KERNEL_GZIP=y
-CONFIG_HAVE_KERNEL_BZIP2=y
-CONFIG_HAVE_KERNEL_LZMA=y
-CONFIG_HAVE_KERNEL_XZ=y
-CONFIG_HAVE_KERNEL_LZO=y
-CONFIG_HAVE_KERNEL_LZ4=y
-# CONFIG_KERNEL_GZIP is not set
-# CONFIG_KERNEL_BZIP2 is not set
-# CONFIG_KERNEL_LZMA is not set
-CONFIG_KERNEL_XZ=y
-# CONFIG_KERNEL_LZO is not set
-# CONFIG_KERNEL_LZ4 is not set
-CONFIG_DEFAULT_HOSTNAME="archlinux"
-CONFIG_SWAP=y
-CONFIG_SYSVIPC=y
-CONFIG_SYSVIPC_SYSCTL=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_POSIX_MQUEUE_SYSCTL=y
-CONFIG_CROSS_MEMORY_ATTACH=y
-# CONFIG_USELIB is not set
-CONFIG_AUDIT=y
-CONFIG_HAVE_ARCH_AUDITSYSCALL=y
-CONFIG_AUDITSYSCALL=y
-
-#
-# IRQ subsystem
-#
-CONFIG_GENERIC_IRQ_PROBE=y
-CONFIG_GENERIC_IRQ_SHOW=y
-CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK=y
-CONFIG_GENERIC_PENDING_IRQ=y
-CONFIG_GENERIC_IRQ_MIGRATION=y
-CONFIG_HARDIRQS_SW_RESEND=y
-CONFIG_GENERIC_IRQ_CHIP=y
-CONFIG_IRQ_DOMAIN=y
-CONFIG_IRQ_SIM=y
-CONFIG_IRQ_DOMAIN_HIERARCHY=y
-CONFIG_GENERIC_MSI_IRQ=y
-CONFIG_GENERIC_MSI_IRQ_DOMAIN=y
-CONFIG_IRQ_MSI_IOMMU=y
-CONFIG_GENERIC_IRQ_MATRIX_ALLOCATOR=y
-CONFIG_GENERIC_IRQ_RESERVATION_MODE=y
-CONFIG_IRQ_FORCED_THREADING=y
-CONFIG_SPARSE_IRQ=y
-# CONFIG_GENERIC_IRQ_DEBUGFS is not set
-# end of IRQ subsystem
-
-CONFIG_CLOCKSOURCE_WATCHDOG=y
-CONFIG_ARCH_CLOCKSOURCE_INIT=y
-CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE=y
-CONFIG_GENERIC_TIME_VSYSCALL=y
-CONFIG_GENERIC_CLOCKEVENTS=y
-CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y
-CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST=y
-CONFIG_GENERIC_CMOS_UPDATE=y
-
-#
-# Timers subsystem
-#
-CONFIG_TICK_ONESHOT=y
-CONFIG_NO_HZ_COMMON=y
-# CONFIG_HZ_PERIODIC is not set
-CONFIG_NO_HZ_IDLE=y
-# CONFIG_NO_HZ_FULL is not set
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-# end of Timers subsystem
-
-# CONFIG_PREEMPT_NONE is not set
-# CONFIG_PREEMPT_VOLUNTARY is not set
-CONFIG_PREEMPT=y
-CONFIG_PREEMPT_COUNT=y
-CONFIG_PREEMPTION=y
-
-#
-# CPU/Task time and stats accounting
-#
-CONFIG_TICK_CPU_ACCOUNTING=y
-# CONFIG_VIRT_CPU_ACCOUNTING_GEN is not set
-CONFIG_IRQ_TIME_ACCOUNTING=y
-CONFIG_HAVE_SCHED_AVG_IRQ=y
-# CONFIG_SCHED_THERMAL_PRESSURE is not set
-CONFIG_BSD_PROCESS_ACCT=y
-CONFIG_BSD_PROCESS_ACCT_V3=y
-CONFIG_TASKSTATS=y
-CONFIG_TASK_DELAY_ACCT=y
-CONFIG_TASK_XACCT=y
-CONFIG_TASK_IO_ACCOUNTING=y
-CONFIG_PSI=y
-# CONFIG_PSI_DEFAULT_DISABLED is not set
-# end of CPU/Task time and stats accounting
-
-CONFIG_CPU_ISOLATION=y
-
-#
-# RCU Subsystem
-#
-CONFIG_TREE_RCU=y
-CONFIG_PREEMPT_RCU=y
-CONFIG_RCU_EXPERT=y
-CONFIG_SRCU=y
-CONFIG_TREE_SRCU=y
-CONFIG_TASKS_RCU=y
-CONFIG_RCU_STALL_COMMON=y
-CONFIG_RCU_NEED_SEGCBLIST=y
-CONFIG_RCU_FANOUT=64
-CONFIG_RCU_FANOUT_LEAF=16
-CONFIG_RCU_FAST_NO_HZ=y
-CONFIG_RCU_BOOST=y
-CONFIG_RCU_BOOST_DELAY=500
-# CONFIG_RCU_NOCB_CPU is not set
-# end of RCU Subsystem
-
-CONFIG_BUILD_BIN2C=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-# CONFIG_IKHEADERS is not set
-CONFIG_LOG_BUF_SHIFT=17
-CONFIG_LOG_CPU_MAX_BUF_SHIFT=12
-CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=13
-CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y
-
-#
-# Scheduler features
-#
-CONFIG_UCLAMP_TASK=y
-CONFIG_UCLAMP_BUCKETS_COUNT=5
-# end of Scheduler features
-
-CONFIG_ARCH_SUPPORTS_NUMA_BALANCING=y
-CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH=y
-CONFIG_CC_HAS_INT128=y
-CONFIG_ARCH_SUPPORTS_INT128=y
-CONFIG_NUMA_BALANCING=y
-CONFIG_NUMA_BALANCING_DEFAULT_ENABLED=y
-CONFIG_CGROUPS=y
-CONFIG_PAGE_COUNTER=y
-CONFIG_MEMCG=y
-CONFIG_MEMCG_SWAP=y
-CONFIG_MEMCG_SWAP_ENABLED=y
-CONFIG_MEMCG_KMEM=y
-CONFIG_BLK_CGROUP=y
-CONFIG_CGROUP_WRITEBACK=y
-CONFIG_CGROUP_SCHED=y
-CONFIG_FAIR_GROUP_SCHED=y
-CONFIG_CFS_BANDWIDTH=y
-# CONFIG_RT_GROUP_SCHED is not set
-CONFIG_UCLAMP_TASK_GROUP=y
-CONFIG_CGROUP_PIDS=y
-CONFIG_CGROUP_RDMA=y
-CONFIG_CGROUP_FREEZER=y
-CONFIG_CGROUP_HUGETLB=y
-CONFIG_CPUSETS=y
-CONFIG_PROC_PID_CPUSET=y
-CONFIG_CGROUP_DEVICE=y
-CONFIG_CGROUP_CPUACCT=y
-CONFIG_CGROUP_PERF=y
-CONFIG_CGROUP_BPF=y
-# CONFIG_CGROUP_DEBUG is not set
-CONFIG_SOCK_CGROUP_DATA=y
-CONFIG_NAMESPACES=y
-CONFIG_UTS_NS=y
-CONFIG_TIME_NS=y
-CONFIG_IPC_NS=y
-CONFIG_USER_NS=y
-# CONFIG_USER_NS_UNPRIVILEGED is not set
-CONFIG_PID_NS=y
-CONFIG_NET_NS=y
-# CONFIG_CHECKPOINT_RESTORE is not set
-CONFIG_SCHED_AUTOGROUP=y
-# CONFIG_SYSFS_DEPRECATED is not set
-CONFIG_RELAY=y
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE=""
-CONFIG_RD_GZIP=y
-CONFIG_RD_BZIP2=y
-CONFIG_RD_LZMA=y
-CONFIG_RD_XZ=y
-CONFIG_RD_LZO=y
-CONFIG_RD_LZ4=y
-CONFIG_BOOT_CONFIG=y
-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-CONFIG_SYSCTL=y
-CONFIG_HAVE_UID16=y
-CONFIG_SYSCTL_EXCEPTION_TRACE=y
-CONFIG_HAVE_PCSPKR_PLATFORM=y
-CONFIG_BPF=y
-CONFIG_EXPERT=y
-# CONFIG_UID16 is not set
-CONFIG_MULTIUSER=y
-CONFIG_SGETMASK_SYSCALL=y
-# CONFIG_SYSFS_SYSCALL is not set
-CONFIG_FHANDLE=y
-CONFIG_POSIX_TIMERS=y
-CONFIG_PRINTK=y
-CONFIG_PRINTK_NMI=y
-CONFIG_BUG=y
-CONFIG_ELF_CORE=y
-CONFIG_PCSPKR_PLATFORM=y
-CONFIG_BASE_FULL=y
-CONFIG_FUTEX=y
-CONFIG_FUTEX_PI=y
-CONFIG_EPOLL=y
-CONFIG_SIGNALFD=y
-CONFIG_TIMERFD=y
-CONFIG_EVENTFD=y
-CONFIG_SHMEM=y
-CONFIG_AIO=y
-CONFIG_IO_URING=y
-CONFIG_ADVISE_SYSCALLS=y
-CONFIG_MEMBARRIER=y
-CONFIG_KALLSYMS=y
-CONFIG_KALLSYMS_ALL=y
-CONFIG_KALLSYMS_ABSOLUTE_PERCPU=y
-CONFIG_KALLSYMS_BASE_RELATIVE=y
-CONFIG_BPF_LSM=y
-CONFIG_BPF_SYSCALL=y
-CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y
-CONFIG_BPF_JIT_ALWAYS_ON=y
-CONFIG_BPF_JIT_DEFAULT_ON=y
-# CONFIG_USERFAULTFD is not set
-CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE=y
-CONFIG_RSEQ=y
-# CONFIG_DEBUG_RSEQ is not set
-# CONFIG_EMBEDDED is not set
-CONFIG_HAVE_PERF_EVENTS=y
-# CONFIG_PC104 is not set
-
-#
-# Kernel Performance Events And Counters
-#
-CONFIG_PERF_EVENTS=y
-# CONFIG_DEBUG_PERF_USE_VMALLOC is not set
-# end of Kernel Performance Events And Counters
-
-CONFIG_VM_EVENT_COUNTERS=y
-CONFIG_SLUB_DEBUG=y
-# CONFIG_SLUB_MEMCG_SYSFS_ON is not set
-# CONFIG_COMPAT_BRK is not set
-# CONFIG_SLAB is not set
-CONFIG_SLUB=y
-# CONFIG_SLOB is not set
-# CONFIG_SLAB_MERGE_DEFAULT is not set
-CONFIG_SLAB_FREELIST_RANDOM=y
-CONFIG_SLAB_FREELIST_HARDENED=y
-CONFIG_SLAB_CANARY=y
-CONFIG_SHUFFLE_PAGE_ALLOCATOR=y
-CONFIG_SLUB_CPU_PARTIAL=y
-CONFIG_SYSTEM_DATA_VERIFICATION=y
-CONFIG_PROFILING=y
-CONFIG_TRACEPOINTS=y
-# end of General setup
-
-CONFIG_64BIT=y
-CONFIG_X86_64=y
-CONFIG_X86=y
-CONFIG_INSTRUCTION_DECODER=y
-CONFIG_OUTPUT_FORMAT="elf64-x86-64"
-CONFIG_LOCKDEP_SUPPORT=y
-CONFIG_STACKTRACE_SUPPORT=y
-CONFIG_MMU=y
-CONFIG_ARCH_MMAP_RND_BITS_MIN=28
-CONFIG_ARCH_MMAP_RND_BITS_MAX=32
-CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN=8
-CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX=16
-CONFIG_GENERIC_ISA_DMA=y
-CONFIG_GENERIC_BUG=y
-CONFIG_GENERIC_BUG_RELATIVE_POINTERS=y
-CONFIG_ARCH_MAY_HAVE_PC_FDC=y
-CONFIG_GENERIC_CALIBRATE_DELAY=y
-CONFIG_ARCH_HAS_CPU_RELAX=y
-CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
-CONFIG_ARCH_HAS_FILTER_PGPROT=y
-CONFIG_HAVE_SETUP_PER_CPU_AREA=y
-CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y
-CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y
-CONFIG_ARCH_HIBERNATION_POSSIBLE=y
-CONFIG_ARCH_SUSPEND_POSSIBLE=y
-CONFIG_ARCH_WANT_GENERAL_HUGETLB=y
-CONFIG_ZONE_DMA32=y
-CONFIG_AUDIT_ARCH=y
-CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y
-CONFIG_HAVE_INTEL_TXT=y
-CONFIG_X86_64_SMP=y
-CONFIG_ARCH_SUPPORTS_UPROBES=y
-CONFIG_FIX_EARLYCON_MEM=y
-CONFIG_DYNAMIC_PHYSICAL_MASK=y
-CONFIG_PGTABLE_LEVELS=5
-CONFIG_CC_HAS_SANE_STACKPROTECTOR=y
-
-#
-# Processor type and features
-#
-CONFIG_ZONE_DMA=y
-CONFIG_SMP=y
-CONFIG_X86_FEATURE_NAMES=y
-CONFIG_X86_X2APIC=y
-CONFIG_X86_MPPARSE=y
-# CONFIG_GOLDFISH is not set
-CONFIG_RETPOLINE=y
-CONFIG_X86_CPU_RESCTRL=y
-# CONFIG_X86_EXTENDED_PLATFORM is not set
-CONFIG_X86_INTEL_LPSS=y
-CONFIG_X86_AMD_PLATFORM_DEVICE=y
-CONFIG_IOSF_MBI=y
-# CONFIG_IOSF_MBI_DEBUG is not set
-CONFIG_X86_SUPPORTS_MEMORY_FAILURE=y
-CONFIG_SCHED_OMIT_FRAME_POINTER=y
-CONFIG_HYPERVISOR_GUEST=y
-CONFIG_PARAVIRT=y
-CONFIG_PARAVIRT_XXL=y
-# CONFIG_PARAVIRT_DEBUG is not set
-CONFIG_PARAVIRT_SPINLOCKS=y
-CONFIG_X86_HV_CALLBACK_VECTOR=y
-CONFIG_XEN=y
-CONFIG_XEN_PV=y
-CONFIG_XEN_PV_SMP=y
-CONFIG_XEN_DOM0=y
-CONFIG_XEN_PVHVM=y
-CONFIG_XEN_PVHVM_SMP=y
-CONFIG_XEN_512GB=y
-CONFIG_XEN_SAVE_RESTORE=y
-# CONFIG_XEN_DEBUG_FS is not set
-CONFIG_XEN_PVH=y
-CONFIG_KVM_GUEST=y
-CONFIG_ARCH_CPUIDLE_HALTPOLL=y
-CONFIG_PVH=y
-# CONFIG_KVM_DEBUG_FS is not set
-CONFIG_PARAVIRT_TIME_ACCOUNTING=y
-CONFIG_PARAVIRT_CLOCK=y
-CONFIG_JAILHOUSE_GUEST=y
-CONFIG_ACRN_GUEST=y
-# CONFIG_MK8 is not set
-# CONFIG_MPSC is not set
-# CONFIG_MCORE2 is not set
-# CONFIG_MATOM is not set
-CONFIG_GENERIC_CPU=y
-CONFIG_X86_INTERNODE_CACHE_SHIFT=6
-CONFIG_X86_L1_CACHE_SHIFT=6
-CONFIG_X86_TSC=y
-CONFIG_X86_CMPXCHG64=y
-CONFIG_X86_CMOV=y
-CONFIG_X86_MINIMUM_CPU_FAMILY=64
-CONFIG_X86_DEBUGCTLMSR=y
-CONFIG_IA32_FEAT_CTL=y
-CONFIG_X86_VMX_FEATURE_NAMES=y
-CONFIG_PROCESSOR_SELECT=y
-CONFIG_CPU_SUP_INTEL=y
-CONFIG_CPU_SUP_AMD=y
-CONFIG_CPU_SUP_HYGON=y
-CONFIG_CPU_SUP_CENTAUR=y
-CONFIG_CPU_SUP_ZHAOXIN=y
-CONFIG_HPET_TIMER=y
-CONFIG_HPET_EMULATE_RTC=y
-CONFIG_DMI=y
-CONFIG_GART_IOMMU=y
-# CONFIG_MAXSMP is not set
-CONFIG_NR_CPUS_RANGE_BEGIN=2
-CONFIG_NR_CPUS_RANGE_END=512
-CONFIG_NR_CPUS_DEFAULT=64
-CONFIG_NR_CPUS=320
-CONFIG_SCHED_SMT=y
-CONFIG_SCHED_MC=y
-CONFIG_SCHED_MC_PRIO=y
-CONFIG_X86_LOCAL_APIC=y
-CONFIG_X86_IO_APIC=y
-CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
-CONFIG_X86_MCE=y
-# CONFIG_X86_MCELOG_LEGACY is not set
-CONFIG_X86_MCE_INTEL=y
-CONFIG_X86_MCE_AMD=y
-CONFIG_X86_MCE_THRESHOLD=y
-CONFIG_X86_MCE_INJECT=m
-CONFIG_X86_THERMAL_VECTOR=y
-
-#
-# Performance monitoring
-#
-CONFIG_PERF_EVENTS_INTEL_UNCORE=m
-CONFIG_PERF_EVENTS_INTEL_RAPL=m
-CONFIG_PERF_EVENTS_INTEL_CSTATE=m
-CONFIG_PERF_EVENTS_AMD_POWER=m
-# end of Performance monitoring
-
-CONFIG_X86_VSYSCALL_EMULATION=y
-CONFIG_X86_IOPL_IOPERM=y
-CONFIG_I8K=m
-CONFIG_MICROCODE=y
-CONFIG_MICROCODE_INTEL=y
-CONFIG_MICROCODE_AMD=y
-CONFIG_MICROCODE_OLD_INTERFACE=y
-CONFIG_X86_MSR=m
-CONFIG_X86_CPUID=m
-CONFIG_X86_5LEVEL=y
-CONFIG_X86_DIRECT_GBPAGES=y
-# CONFIG_X86_CPA_STATISTICS is not set
-CONFIG_AMD_MEM_ENCRYPT=y
-CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT=y
-CONFIG_NUMA=y
-CONFIG_AMD_NUMA=y
-CONFIG_X86_64_ACPI_NUMA=y
-CONFIG_NODES_SPAN_OTHER_NODES=y
-# CONFIG_NUMA_EMU is not set
-CONFIG_NODES_SHIFT=5
-CONFIG_ARCH_SPARSEMEM_ENABLE=y
-CONFIG_ARCH_SPARSEMEM_DEFAULT=y
-CONFIG_ARCH_SELECT_MEMORY_MODEL=y
-CONFIG_ARCH_MEMORY_PROBE=y
-CONFIG_ILLEGAL_POINTER_VALUE=0xdead000000000000
-CONFIG_X86_PMEM_LEGACY_DEVICE=y
-CONFIG_X86_PMEM_LEGACY=m
-CONFIG_X86_CHECK_BIOS_CORRUPTION=y
-CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK=y
-CONFIG_X86_RESERVE_LOW=64
-CONFIG_MTRR=y
-CONFIG_MTRR_SANITIZER=y
-CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT=1
-CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT=0
-CONFIG_X86_PAT=y
-CONFIG_ARCH_USES_PG_UNCACHED=y
-CONFIG_ARCH_RANDOM=y
-CONFIG_X86_SMAP=y
-CONFIG_X86_UMIP=y
-CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS=y
-# CONFIG_X86_INTEL_TSX_MODE_OFF is not set
-# CONFIG_X86_INTEL_TSX_MODE_ON is not set
-CONFIG_X86_INTEL_TSX_MODE_AUTO=y
-CONFIG_EFI=y
-CONFIG_EFI_STUB=y
-CONFIG_EFI_MIXED=y
-CONFIG_SECCOMP=y
-# CONFIG_HZ_100 is not set
-# CONFIG_HZ_250 is not set
-CONFIG_HZ_300=y
-# CONFIG_HZ_1000 is not set
-CONFIG_HZ=300
-CONFIG_SCHED_HRTICK=y
-# CONFIG_KEXEC is not set
-# CONFIG_KEXEC_FILE is not set
-CONFIG_CRASH_DUMP=y
-CONFIG_PHYSICAL_START=0x1000000
-CONFIG_RELOCATABLE=y
-CONFIG_RANDOMIZE_BASE=y
-CONFIG_X86_NEED_RELOCS=y
-CONFIG_PHYSICAL_ALIGN=0x1000000
-CONFIG_DYNAMIC_MEMORY_LAYOUT=y
-CONFIG_RANDOMIZE_MEMORY=y
-CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING=0x1
-CONFIG_HOTPLUG_CPU=y
-# CONFIG_BOOTPARAM_HOTPLUG_CPU0 is not set
-# CONFIG_DEBUG_HOTPLUG_CPU0 is not set
-# CONFIG_COMPAT_VDSO is not set
-# CONFIG_LEGACY_VSYSCALL_EMULATE is not set
-# CONFIG_LEGACY_VSYSCALL_XONLY is not set
-CONFIG_LEGACY_VSYSCALL_NONE=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="pti=on page_alloc.shuffle=1"
-# CONFIG_CMDLINE_OVERRIDE is not set
-# CONFIG_MODIFY_LDT_SYSCALL is not set
-CONFIG_HAVE_LIVEPATCH=y
-# CONFIG_LIVEPATCH is not set
-# end of Processor type and features
-
-CONFIG_ARCH_HAS_ADD_PAGES=y
-CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
-CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y
-CONFIG_USE_PERCPU_NUMA_NODE_ID=y
-CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK=y
-CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION=y
-CONFIG_ARCH_ENABLE_THP_MIGRATION=y
-
-#
-# Power management and ACPI options
-#
-CONFIG_SUSPEND=y
-CONFIG_SUSPEND_FREEZER=y
-# CONFIG_SUSPEND_SKIP_SYNC is not set
-CONFIG_HIBERNATE_CALLBACKS=y
-# CONFIG_HIBERNATION is not set
-CONFIG_PM_SLEEP=y
-CONFIG_PM_SLEEP_SMP=y
-CONFIG_PM_AUTOSLEEP=y
-CONFIG_PM_WAKELOCKS=y
-CONFIG_PM_WAKELOCKS_LIMIT=100
-CONFIG_PM_WAKELOCKS_GC=y
-CONFIG_PM=y
-CONFIG_PM_DEBUG=y
-CONFIG_PM_ADVANCED_DEBUG=y
-# CONFIG_PM_TEST_SUSPEND is not set
-CONFIG_PM_SLEEP_DEBUG=y
-# CONFIG_DPM_WATCHDOG is not set
-CONFIG_PM_TRACE=y
-CONFIG_PM_TRACE_RTC=y
-CONFIG_PM_CLK=y
-CONFIG_PM_GENERIC_DOMAINS=y
-CONFIG_WQ_POWER_EFFICIENT_DEFAULT=y
-CONFIG_PM_GENERIC_DOMAINS_SLEEP=y
-CONFIG_PM_GENERIC_DOMAINS_OF=y
-CONFIG_ENERGY_MODEL=y
-CONFIG_ARCH_SUPPORTS_ACPI=y
-CONFIG_ACPI=y
-CONFIG_ACPI_LEGACY_TABLES_LOOKUP=y
-CONFIG_ARCH_MIGHT_HAVE_ACPI_PDC=y
-CONFIG_ACPI_SYSTEM_POWER_STATES_SUPPORT=y
-# CONFIG_ACPI_DEBUGGER is not set
-CONFIG_ACPI_SPCR_TABLE=y
-CONFIG_ACPI_LPIT=y
-CONFIG_ACPI_SLEEP=y
-# CONFIG_ACPI_PROCFS_POWER is not set
-CONFIG_ACPI_REV_OVERRIDE_POSSIBLE=y
-# CONFIG_ACPI_EC_DEBUGFS is not set
-CONFIG_ACPI_AC=m
-CONFIG_ACPI_BATTERY=m
-CONFIG_ACPI_BUTTON=y
-CONFIG_ACPI_VIDEO=y
-CONFIG_ACPI_FAN=y
-CONFIG_ACPI_TAD=m
-CONFIG_ACPI_DOCK=y
-CONFIG_ACPI_CPU_FREQ_PSS=y
-CONFIG_ACPI_PROCESSOR_CSTATE=y
-CONFIG_ACPI_PROCESSOR_IDLE=y
-CONFIG_ACPI_CPPC_LIB=y
-CONFIG_ACPI_PROCESSOR=y
-CONFIG_ACPI_IPMI=m
-CONFIG_ACPI_HOTPLUG_CPU=y
-CONFIG_ACPI_PROCESSOR_AGGREGATOR=y
-CONFIG_ACPI_THERMAL=y
-CONFIG_ARCH_HAS_ACPI_TABLE_UPGRADE=y
-CONFIG_ACPI_TABLE_UPGRADE=y
-# CONFIG_ACPI_DEBUG is not set
-CONFIG_ACPI_PCI_SLOT=y
-CONFIG_ACPI_CONTAINER=y
-CONFIG_ACPI_HOTPLUG_MEMORY=y
-CONFIG_ACPI_HOTPLUG_IOAPIC=y
-CONFIG_ACPI_SBS=m
-CONFIG_ACPI_HED=y
-# CONFIG_ACPI_CUSTOM_METHOD is not set
-CONFIG_ACPI_BGRT=y
-# CONFIG_ACPI_REDUCED_HARDWARE_ONLY is not set
-CONFIG_ACPI_NFIT=m
-# CONFIG_NFIT_SECURITY_DEBUG is not set
-CONFIG_ACPI_NUMA=y
-CONFIG_ACPI_HMAT=y
-CONFIG_HAVE_ACPI_APEI=y
-CONFIG_HAVE_ACPI_APEI_NMI=y
-CONFIG_ACPI_APEI=y
-CONFIG_ACPI_APEI_GHES=y
-CONFIG_ACPI_APEI_PCIEAER=y
-CONFIG_ACPI_APEI_MEMORY_FAILURE=y
-CONFIG_ACPI_APEI_EINJ=m
-CONFIG_ACPI_APEI_ERST_DEBUG=m
-CONFIG_DPTF_POWER=m
-CONFIG_ACPI_WATCHDOG=y
-CONFIG_ACPI_EXTLOG=m
-CONFIG_ACPI_ADXL=y
-CONFIG_PMIC_OPREGION=y
-CONFIG_BYTCRC_PMIC_OPREGION=y
-CONFIG_CHTCRC_PMIC_OPREGION=y
-CONFIG_XPOWER_PMIC_OPREGION=y
-CONFIG_BXT_WC_PMIC_OPREGION=y
-CONFIG_CHT_WC_PMIC_OPREGION=y
-CONFIG_CHT_DC_TI_PMIC_OPREGION=y
-CONFIG_ACPI_CONFIGFS=m
-CONFIG_TPS68470_PMIC_OPREGION=y
-CONFIG_X86_PM_TIMER=y
-CONFIG_SFI=y
-
-#
-# CPU Frequency scaling
-#
-CONFIG_CPU_FREQ=y
-CONFIG_CPU_FREQ_GOV_ATTR_SET=y
-CONFIG_CPU_FREQ_GOV_COMMON=y
-CONFIG_CPU_FREQ_STAT=y
-# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set
-# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set
-# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set
-# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set
-# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set
-CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL=y
-CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
-CONFIG_CPU_FREQ_GOV_POWERSAVE=m
-CONFIG_CPU_FREQ_GOV_USERSPACE=m
-CONFIG_CPU_FREQ_GOV_ONDEMAND=m
-CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m
-CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y
-
-#
-# CPU frequency scaling drivers
-#
-CONFIG_CPUFREQ_DT=m
-CONFIG_CPUFREQ_DT_PLATDEV=y
-CONFIG_X86_INTEL_PSTATE=y
-CONFIG_X86_PCC_CPUFREQ=m
-CONFIG_X86_ACPI_CPUFREQ=m
-CONFIG_X86_ACPI_CPUFREQ_CPB=y
-CONFIG_X86_POWERNOW_K8=m
-CONFIG_X86_AMD_FREQ_SENSITIVITY=m
-# CONFIG_X86_SPEEDSTEP_CENTRINO is not set
-CONFIG_X86_P4_CLOCKMOD=m
-
-#
-# shared options
-#
-CONFIG_X86_SPEEDSTEP_LIB=m
-# end of CPU Frequency scaling
-
-#
-# CPU Idle
-#
-CONFIG_CPU_IDLE=y
-CONFIG_CPU_IDLE_GOV_LADDER=y
-CONFIG_CPU_IDLE_GOV_MENU=y
-CONFIG_CPU_IDLE_GOV_TEO=y
-CONFIG_CPU_IDLE_GOV_HALTPOLL=y
-CONFIG_HALTPOLL_CPUIDLE=m
-# end of CPU Idle
-
-CONFIG_INTEL_IDLE=y
-# end of Power management and ACPI options
-
-#
-# Bus options (PCI etc.)
-#
-CONFIG_PCI_DIRECT=y
-CONFIG_PCI_MMCONFIG=y
-CONFIG_PCI_XEN=y
-CONFIG_MMCONF_FAM10H=y
-# CONFIG_PCI_CNB20LE_QUIRK is not set
-# CONFIG_ISA_BUS is not set
-CONFIG_ISA_DMA_API=y
-CONFIG_AMD_NB=y
-# CONFIG_X86_SYSFB is not set
-# end of Bus options (PCI etc.)
-
-#
-# Binary Emulations
-#
-CONFIG_IA32_EMULATION=y
-# CONFIG_X86_X32 is not set
-CONFIG_COMPAT_32=y
-CONFIG_COMPAT=y
-CONFIG_COMPAT_FOR_U64_ALIGNMENT=y
-CONFIG_SYSVIPC_COMPAT=y
-# end of Binary Emulations
-
-#
-# Firmware Drivers
-#
-CONFIG_EDD=m
-# CONFIG_EDD_OFF is not set
-CONFIG_FIRMWARE_MEMMAP=y
-CONFIG_DMIID=y
-CONFIG_DMI_SYSFS=m
-CONFIG_DMI_SCAN_MACHINE_NON_EFI_FALLBACK=y
-CONFIG_ISCSI_IBFT_FIND=y
-CONFIG_ISCSI_IBFT=m
-CONFIG_FW_CFG_SYSFS=m
-# CONFIG_FW_CFG_SYSFS_CMDLINE is not set
-CONFIG_GOOGLE_FIRMWARE=y
-# CONFIG_GOOGLE_SMI is not set
-CONFIG_GOOGLE_COREBOOT_TABLE=m
-CONFIG_GOOGLE_MEMCONSOLE=m
-# CONFIG_GOOGLE_MEMCONSOLE_X86_LEGACY is not set
-CONFIG_GOOGLE_FRAMEBUFFER_COREBOOT=m
-CONFIG_GOOGLE_MEMCONSOLE_COREBOOT=m
-CONFIG_GOOGLE_VPD=m
-
-#
-# EFI (Extensible Firmware Interface) Support
-#
-# CONFIG_EFI_VARS is not set
-CONFIG_EFI_ESRT=y
-# CONFIG_EFI_FAKE_MEMMAP is not set
-CONFIG_EFI_SOFT_RESERVE=y
-CONFIG_EFI_RUNTIME_WRAPPERS=y
-CONFIG_EFI_CAPSULE_LOADER=m
-# CONFIG_EFI_TEST is not set
-CONFIG_APPLE_PROPERTIES=y
-CONFIG_RESET_ATTACK_MITIGATION=y
-CONFIG_EFI_RCI2_TABLE=y
-# CONFIG_EFI_DISABLE_PCI_DMA is not set
-# end of EFI (Extensible Firmware Interface) Support
-
-CONFIG_EFI_EMBEDDED_FIRMWARE=y
-CONFIG_UEFI_CPER=y
-CONFIG_UEFI_CPER_X86=y
-CONFIG_EFI_DEV_PATH_PARSER=y
-CONFIG_EFI_EARLYCON=y
-
-#
-# Tegra firmware driver
-#
-# end of Tegra firmware driver
-# end of Firmware Drivers
-
-CONFIG_HAVE_KVM=y
-CONFIG_HAVE_KVM_IRQCHIP=y
-CONFIG_HAVE_KVM_IRQFD=y
-CONFIG_HAVE_KVM_IRQ_ROUTING=y
-CONFIG_HAVE_KVM_EVENTFD=y
-CONFIG_KVM_MMIO=y
-CONFIG_KVM_ASYNC_PF=y
-CONFIG_HAVE_KVM_MSI=y
-CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT=y
-CONFIG_KVM_VFIO=y
-CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT=y
-CONFIG_KVM_COMPAT=y
-CONFIG_HAVE_KVM_IRQ_BYPASS=y
-CONFIG_HAVE_KVM_NO_POLL=y
-CONFIG_VIRTUALIZATION=y
-CONFIG_KVM=m
-CONFIG_KVM_WERROR=y
-CONFIG_KVM_INTEL=m
-CONFIG_KVM_AMD=m
-CONFIG_KVM_AMD_SEV=y
-CONFIG_KVM_MMU_AUDIT=y
-CONFIG_AS_AVX512=y
-CONFIG_AS_SHA1_NI=y
-CONFIG_AS_SHA256_NI=y
-
-#
-# General architecture-dependent options
-#
-CONFIG_HOTPLUG_SMT=y
-CONFIG_OPROFILE=m
-# CONFIG_OPROFILE_EVENT_MULTIPLEX is not set
-CONFIG_HAVE_OPROFILE=y
-CONFIG_OPROFILE_NMI_TIMER=y
-CONFIG_KPROBES=y
-CONFIG_JUMP_LABEL=y
-# CONFIG_STATIC_KEYS_SELFTEST is not set
-CONFIG_OPTPROBES=y
-CONFIG_KPROBES_ON_FTRACE=y
-CONFIG_UPROBES=y
-CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y
-CONFIG_ARCH_USE_BUILTIN_BSWAP=y
-CONFIG_KRETPROBES=y
-CONFIG_USER_RETURN_NOTIFIER=y
-CONFIG_HAVE_IOREMAP_PROT=y
-CONFIG_HAVE_KPROBES=y
-CONFIG_HAVE_KRETPROBES=y
-CONFIG_HAVE_OPTPROBES=y
-CONFIG_HAVE_KPROBES_ON_FTRACE=y
-CONFIG_HAVE_FUNCTION_ERROR_INJECTION=y
-CONFIG_HAVE_NMI=y
-CONFIG_HAVE_ARCH_TRACEHOOK=y
-CONFIG_HAVE_DMA_CONTIGUOUS=y
-CONFIG_GENERIC_SMP_IDLE_THREAD=y
-CONFIG_ARCH_HAS_FORTIFY_SOURCE=y
-CONFIG_ARCH_HAS_SET_MEMORY=y
-CONFIG_ARCH_HAS_SET_DIRECT_MAP=y
-CONFIG_HAVE_ARCH_THREAD_STRUCT_WHITELIST=y
-CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT=y
-CONFIG_HAVE_ASM_MODVERSIONS=y
-CONFIG_HAVE_REGS_AND_STACK_ACCESS_API=y
-CONFIG_HAVE_RSEQ=y
-CONFIG_HAVE_FUNCTION_ARG_ACCESS_API=y
-CONFIG_HAVE_CLK=y
-CONFIG_HAVE_HW_BREAKPOINT=y
-CONFIG_HAVE_MIXED_BREAKPOINTS_REGS=y
-CONFIG_HAVE_USER_RETURN_NOTIFIER=y
-CONFIG_HAVE_PERF_EVENTS_NMI=y
-CONFIG_HAVE_HARDLOCKUP_DETECTOR_PERF=y
-CONFIG_HAVE_PERF_REGS=y
-CONFIG_HAVE_PERF_USER_STACK_DUMP=y
-CONFIG_HAVE_ARCH_JUMP_LABEL=y
-CONFIG_HAVE_ARCH_JUMP_LABEL_RELATIVE=y
-CONFIG_MMU_GATHER_TABLE_FREE=y
-CONFIG_MMU_GATHER_RCU_TABLE_FREE=y
-CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG=y
-CONFIG_HAVE_ALIGNED_STRUCT_PAGE=y
-CONFIG_HAVE_CMPXCHG_LOCAL=y
-CONFIG_HAVE_CMPXCHG_DOUBLE=y
-CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION=y
-CONFIG_ARCH_WANT_OLD_COMPAT_IPC=y
-CONFIG_HAVE_ARCH_SECCOMP_FILTER=y
-CONFIG_SECCOMP_FILTER=y
-CONFIG_HAVE_ARCH_STACKLEAK=y
-CONFIG_HAVE_STACKPROTECTOR=y
-CONFIG_CC_HAS_STACKPROTECTOR_NONE=y
-CONFIG_STACKPROTECTOR=y
-CONFIG_STACKPROTECTOR_STRONG=y
-CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES=y
-CONFIG_HAVE_CONTEXT_TRACKING=y
-CONFIG_HAVE_VIRT_CPU_ACCOUNTING_GEN=y
-CONFIG_HAVE_IRQ_TIME_ACCOUNTING=y
-CONFIG_HAVE_MOVE_PMD=y
-CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE=y
-CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD=y
-CONFIG_HAVE_ARCH_HUGE_VMAP=y
-CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y
-CONFIG_HAVE_ARCH_SOFT_DIRTY=y
-CONFIG_HAVE_MOD_ARCH_SPECIFIC=y
-CONFIG_MODULES_USE_ELF_RELA=y
-CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK=y
-CONFIG_ARCH_HAS_ELF_RANDOMIZE=y
-CONFIG_HAVE_ARCH_MMAP_RND_BITS=y
-CONFIG_HAVE_EXIT_THREAD=y
-CONFIG_ARCH_MMAP_RND_BITS=32
-CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS=y
-CONFIG_ARCH_MMAP_RND_COMPAT_BITS=16
-CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES=y
-CONFIG_HAVE_COPY_THREAD_TLS=y
-CONFIG_HAVE_STACK_VALIDATION=y
-CONFIG_HAVE_RELIABLE_STACKTRACE=y
-CONFIG_ISA_BUS_API=y
-CONFIG_OLD_SIGSUSPEND3=y
-CONFIG_COMPAT_OLD_SIGACTION=y
-CONFIG_COMPAT_32BIT_TIME=y
-CONFIG_HAVE_ARCH_VMAP_STACK=y
-CONFIG_VMAP_STACK=y
-CONFIG_ARCH_HAS_STRICT_KERNEL_RWX=y
-CONFIG_STRICT_KERNEL_RWX=y
-CONFIG_ARCH_HAS_STRICT_MODULE_RWX=y
-CONFIG_STRICT_MODULE_RWX=y
-CONFIG_HAVE_ARCH_PREL32_RELOCATIONS=y
-CONFIG_ARCH_USE_MEMREMAP_PROT=y
-CONFIG_LOCK_EVENT_COUNTS=y
-CONFIG_ARCH_HAS_MEM_ENCRYPT=y
-
-#
-# GCOV-based kernel profiling
-#
-# CONFIG_GCOV_KERNEL is not set
-CONFIG_ARCH_HAS_GCOV_PROFILE_ALL=y
-# end of GCOV-based kernel profiling
-
-CONFIG_HAVE_GCC_PLUGINS=y
-CONFIG_GCC_PLUGINS=y
-# CONFIG_GCC_PLUGIN_CYC_COMPLEXITY is not set
-CONFIG_GCC_PLUGIN_LATENT_ENTROPY=y
-# CONFIG_GCC_PLUGIN_RANDSTRUCT is not set
-# end of General architecture-dependent options
-
-CONFIG_RT_MUTEXES=y
-CONFIG_BASE_SMALL=0
-CONFIG_MODULE_SIG_FORMAT=y
-CONFIG_MODULES=y
-CONFIG_MODULE_FORCE_LOAD=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODULE_FORCE_UNLOAD=y
-# CONFIG_MODVERSIONS is not set
-CONFIG_MODULE_SRCVERSION_ALL=y
-CONFIG_MODULE_SIG=y
-# CONFIG_MODULE_SIG_FORCE is not set
-CONFIG_MODULE_SIG_ALL=y
-# CONFIG_MODULE_SIG_SHA1 is not set
-# CONFIG_MODULE_SIG_SHA224 is not set
-# CONFIG_MODULE_SIG_SHA256 is not set
-# CONFIG_MODULE_SIG_SHA384 is not set
-CONFIG_MODULE_SIG_SHA512=y
-CONFIG_MODULE_SIG_HASH="sha512"
-CONFIG_MODULE_COMPRESS=y
-# CONFIG_MODULE_COMPRESS_GZIP is not set
-CONFIG_MODULE_COMPRESS_XZ=y
-# CONFIG_MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS is not set
-CONFIG_UNUSED_SYMBOLS=y
-CONFIG_MODULES_TREE_LOOKUP=y
-CONFIG_BLOCK=y
-CONFIG_BLK_RQ_ALLOC_TIME=y
-CONFIG_BLK_SCSI_REQUEST=y
-CONFIG_BLK_CGROUP_RWSTAT=y
-CONFIG_BLK_DEV_BSG=y
-CONFIG_BLK_DEV_BSGLIB=y
-CONFIG_BLK_DEV_INTEGRITY=y
-CONFIG_BLK_DEV_INTEGRITY_T10=y
-CONFIG_BLK_DEV_ZONED=y
-CONFIG_BLK_DEV_THROTTLING=y
-CONFIG_BLK_DEV_THROTTLING_LOW=y
-# CONFIG_BLK_CMDLINE_PARSER is not set
-CONFIG_BLK_WBT=y
-CONFIG_BLK_CGROUP_IOLATENCY=y
-CONFIG_BLK_CGROUP_IOCOST=y
-CONFIG_BLK_WBT_MQ=y
-CONFIG_BLK_DEBUG_FS=y
-CONFIG_BLK_DEBUG_FS_ZONED=y
-CONFIG_BLK_SED_OPAL=y
-
-#
-# Partition Types
-#
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_ACORN_PARTITION is not set
-CONFIG_AIX_PARTITION=y
-# CONFIG_OSF_PARTITION is not set
-# CONFIG_AMIGA_PARTITION is not set
-# CONFIG_ATARI_PARTITION is not set
-CONFIG_MAC_PARTITION=y
-CONFIG_MSDOS_PARTITION=y
-CONFIG_BSD_DISKLABEL=y
-CONFIG_MINIX_SUBPARTITION=y
-CONFIG_SOLARIS_X86_PARTITION=y
-# CONFIG_UNIXWARE_DISKLABEL is not set
-CONFIG_LDM_PARTITION=y
-# CONFIG_LDM_DEBUG is not set
-# CONFIG_SGI_PARTITION is not set
-# CONFIG_ULTRIX_PARTITION is not set
-# CONFIG_SUN_PARTITION is not set
-CONFIG_KARMA_PARTITION=y
-CONFIG_EFI_PARTITION=y
-# CONFIG_SYSV68_PARTITION is not set
-# CONFIG_CMDLINE_PARTITION is not set
-# end of Partition Types
-
-CONFIG_BLOCK_COMPAT=y
-CONFIG_BLK_MQ_PCI=y
-CONFIG_BLK_MQ_VIRTIO=y
-CONFIG_BLK_MQ_RDMA=y
-CONFIG_BLK_PM=y
-
-#
-# IO Schedulers
-#
-CONFIG_MQ_IOSCHED_DEADLINE=y
-CONFIG_MQ_IOSCHED_KYBER=y
-CONFIG_IOSCHED_BFQ=y
-CONFIG_BFQ_GROUP_IOSCHED=y
-# CONFIG_BFQ_CGROUP_DEBUG is not set
-# end of IO Schedulers
-
-CONFIG_PREEMPT_NOTIFIERS=y
-CONFIG_PADATA=y
-CONFIG_ASN1=y
-CONFIG_UNINLINE_SPIN_UNLOCK=y
-CONFIG_ARCH_SUPPORTS_ATOMIC_RMW=y
-CONFIG_MUTEX_SPIN_ON_OWNER=y
-CONFIG_RWSEM_SPIN_ON_OWNER=y
-CONFIG_LOCK_SPIN_ON_OWNER=y
-CONFIG_ARCH_USE_QUEUED_SPINLOCKS=y
-CONFIG_QUEUED_SPINLOCKS=y
-CONFIG_ARCH_USE_QUEUED_RWLOCKS=y
-CONFIG_QUEUED_RWLOCKS=y
-CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE=y
-CONFIG_ARCH_HAS_SYNC_CORE_BEFORE_USERMODE=y
-CONFIG_ARCH_HAS_SYSCALL_WRAPPER=y
-CONFIG_FREEZER=y
-
-#
-# Executable file formats
-#
-CONFIG_BINFMT_ELF=y
-CONFIG_COMPAT_BINFMT_ELF=y
-CONFIG_ELFCORE=y
-CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
-CONFIG_BINFMT_SCRIPT=y
-CONFIG_BINFMT_MISC=y
-CONFIG_COREDUMP=y
-# end of Executable file formats
-
-#
-# Memory Management options
-#
-CONFIG_SELECT_MEMORY_MODEL=y
-CONFIG_SPARSEMEM_MANUAL=y
-CONFIG_SPARSEMEM=y
-CONFIG_NEED_MULTIPLE_NODES=y
-CONFIG_HAVE_MEMORY_PRESENT=y
-CONFIG_SPARSEMEM_EXTREME=y
-CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
-CONFIG_SPARSEMEM_VMEMMAP=y
-CONFIG_HAVE_MEMBLOCK_NODE_MAP=y
-CONFIG_HAVE_FAST_GUP=y
-CONFIG_NUMA_KEEP_MEMINFO=y
-CONFIG_MEMORY_ISOLATION=y
-CONFIG_HAVE_BOOTMEM_INFO_NODE=y
-CONFIG_MEMORY_HOTPLUG=y
-CONFIG_MEMORY_HOTPLUG_SPARSE=y
-CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE=y
-CONFIG_MEMORY_HOTREMOVE=y
-CONFIG_SPLIT_PTLOCK_CPUS=4
-CONFIG_MEMORY_BALLOON=y
-CONFIG_BALLOON_COMPACTION=y
-CONFIG_COMPACTION=y
-CONFIG_PAGE_REPORTING=y
-CONFIG_MIGRATION=y
-CONFIG_CONTIG_ALLOC=y
-CONFIG_PHYS_ADDR_T_64BIT=y
-CONFIG_BOUNCE=y
-CONFIG_VIRT_TO_BUS=y
-CONFIG_MMU_NOTIFIER=y
-CONFIG_KSM=y
-CONFIG_DEFAULT_MMAP_MIN_ADDR=65536
-CONFIG_ARCH_SUPPORTS_MEMORY_FAILURE=y
-CONFIG_MEMORY_FAILURE=y
-CONFIG_HWPOISON_INJECT=m
-CONFIG_TRANSPARENT_HUGEPAGE=y
-# CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS is not set
-CONFIG_TRANSPARENT_HUGEPAGE_MADVISE=y
-CONFIG_ARCH_WANTS_THP_SWAP=y
-CONFIG_THP_SWAP=y
-CONFIG_CLEANCACHE=y
-CONFIG_FRONTSWAP=y
-# CONFIG_CMA is not set
-CONFIG_ZSWAP=y
-# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_DEFLATE is not set
-# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZO is not set
-# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_842 is not set
-CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZ4=y
-# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZ4HC is not set
-# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_ZSTD is not set
-CONFIG_ZSWAP_COMPRESSOR_DEFAULT="lz4"
-# CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD is not set
-CONFIG_ZSWAP_ZPOOL_DEFAULT_Z3FOLD=y
-# CONFIG_ZSWAP_ZPOOL_DEFAULT_ZSMALLOC is not set
-CONFIG_ZSWAP_ZPOOL_DEFAULT="z3fold"
-CONFIG_ZSWAP_DEFAULT_ON=y
-CONFIG_ZPOOL=y
-CONFIG_ZBUD=y
-CONFIG_Z3FOLD=y
-CONFIG_ZSMALLOC=y
-# CONFIG_PGTABLE_MAPPING is not set
-# CONFIG_ZSMALLOC_STAT is not set
-CONFIG_GENERIC_EARLY_IOREMAP=y
-# CONFIG_DEFERRED_STRUCT_PAGE_INIT is not set
-# CONFIG_IDLE_PAGE_TRACKING is not set
-CONFIG_ARCH_HAS_PTE_DEVMAP=y
-CONFIG_ZONE_DEVICE=y
-CONFIG_DEV_PAGEMAP_OPS=y
-CONFIG_HMM_MIRROR=y
-CONFIG_DEVICE_PRIVATE=y
-CONFIG_FRAME_VECTOR=y
-CONFIG_ARCH_USES_HIGH_VMA_FLAGS=y
-CONFIG_ARCH_HAS_PKEYS=y
-# CONFIG_PERCPU_STATS is not set
-# CONFIG_GUP_BENCHMARK is not set
-CONFIG_READ_ONLY_THP_FOR_FS=y
-CONFIG_ARCH_HAS_PTE_SPECIAL=y
-CONFIG_MAPPING_DIRTY_HELPERS=y
-# end of Memory Management options
-
-CONFIG_NET=y
-CONFIG_COMPAT_NETLINK_MESSAGES=y
-CONFIG_NET_INGRESS=y
-CONFIG_NET_EGRESS=y
-CONFIG_NET_REDIRECT=y
-CONFIG_SKB_EXTENSIONS=y
-
-#
-# Networking options
-#
-CONFIG_PACKET=y
-CONFIG_PACKET_DIAG=y
-CONFIG_UNIX=y
-CONFIG_UNIX_SCM=y
-CONFIG_UNIX_DIAG=y
-CONFIG_TLS=m
-CONFIG_TLS_DEVICE=y
-# CONFIG_TLS_TOE is not set
-CONFIG_XFRM=y
-CONFIG_XFRM_OFFLOAD=y
-CONFIG_XFRM_ALGO=m
-CONFIG_XFRM_USER=m
-CONFIG_XFRM_INTERFACE=m
-CONFIG_XFRM_SUB_POLICY=y
-CONFIG_XFRM_MIGRATE=y
-CONFIG_XFRM_STATISTICS=y
-CONFIG_XFRM_IPCOMP=m
-CONFIG_NET_KEY=m
-CONFIG_NET_KEY_MIGRATE=y
-CONFIG_SMC=m
-CONFIG_SMC_DIAG=m
-CONFIG_XDP_SOCKETS=y
-CONFIG_XDP_SOCKETS_DIAG=y
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_ADVANCED_ROUTER=y
-# CONFIG_IP_FIB_TRIE_STATS is not set
-CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_ROUTE_MULTIPATH=y
-CONFIG_IP_ROUTE_VERBOSE=y
-CONFIG_IP_ROUTE_CLASSID=y
-# CONFIG_IP_PNP is not set
-CONFIG_NET_IPIP=m
-CONFIG_NET_IPGRE_DEMUX=m
-CONFIG_NET_IP_TUNNEL=m
-CONFIG_NET_IPGRE=m
-# CONFIG_NET_IPGRE_BROADCAST is not set
-CONFIG_IP_MROUTE_COMMON=y
-CONFIG_IP_MROUTE=y
-CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
-CONFIG_IP_PIMSM_V1=y
-CONFIG_IP_PIMSM_V2=y
-CONFIG_SYN_COOKIES=y
-CONFIG_NET_IPVTI=m
-CONFIG_NET_UDP_TUNNEL=m
-CONFIG_NET_FOU=m
-CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_INET_AH=m
-CONFIG_INET_ESP=m
-CONFIG_INET_ESP_OFFLOAD=m
-CONFIG_INET_ESPINTCP=y
-CONFIG_INET_IPCOMP=m
-CONFIG_INET_XFRM_TUNNEL=m
-CONFIG_INET_TUNNEL=m
-CONFIG_INET_DIAG=m
-CONFIG_INET_TCP_DIAG=m
-CONFIG_INET_UDP_DIAG=m
-CONFIG_INET_RAW_DIAG=m
-CONFIG_INET_DIAG_DESTROY=y
-CONFIG_TCP_CONG_ADVANCED=y
-CONFIG_TCP_CONG_BIC=m
-CONFIG_TCP_CONG_CUBIC=y
-CONFIG_TCP_CONG_WESTWOOD=m
-CONFIG_TCP_CONG_HTCP=m
-CONFIG_TCP_CONG_HSTCP=m
-CONFIG_TCP_CONG_HYBLA=m
-CONFIG_TCP_CONG_VEGAS=m
-CONFIG_TCP_CONG_NV=m
-CONFIG_TCP_CONG_SCALABLE=m
-CONFIG_TCP_CONG_LP=m
-CONFIG_TCP_CONG_VENO=m
-CONFIG_TCP_CONG_YEAH=m
-CONFIG_TCP_CONG_ILLINOIS=m
-CONFIG_TCP_CONG_DCTCP=m
-CONFIG_TCP_CONG_CDG=m
-CONFIG_TCP_CONG_BBR=m
-CONFIG_DEFAULT_CUBIC=y
-# CONFIG_DEFAULT_RENO is not set
-CONFIG_DEFAULT_TCP_CONG="cubic"
-CONFIG_TCP_MD5SIG=y
-# CONFIG_TCP_SIMULT_CONNECT_DEFAULT_ON is not set
-CONFIG_IPV6=y
-CONFIG_IPV6_ROUTER_PREF=y
-CONFIG_IPV6_ROUTE_INFO=y
-CONFIG_IPV6_OPTIMISTIC_DAD=y
-CONFIG_INET6_AH=m
-CONFIG_INET6_ESP=m
-CONFIG_INET6_ESP_OFFLOAD=m
-CONFIG_INET6_IPCOMP=m
-CONFIG_IPV6_MIP6=m
-CONFIG_IPV6_ILA=m
-CONFIG_INET6_XFRM_TUNNEL=m
-CONFIG_INET6_TUNNEL=m
-CONFIG_IPV6_VTI=m
-CONFIG_IPV6_SIT=m
-CONFIG_IPV6_SIT_6RD=y
-CONFIG_IPV6_NDISC_NODETYPE=y
-CONFIG_IPV6_TUNNEL=m
-CONFIG_IPV6_GRE=m
-CONFIG_IPV6_FOU=m
-CONFIG_IPV6_FOU_TUNNEL=m
-CONFIG_IPV6_MULTIPLE_TABLES=y
-CONFIG_IPV6_SUBTREES=y
-CONFIG_IPV6_MROUTE=y
-CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y
-CONFIG_IPV6_PIMSM_V2=y
-CONFIG_IPV6_SEG6_LWTUNNEL=y
-CONFIG_IPV6_SEG6_HMAC=y
-CONFIG_IPV6_SEG6_BPF=y
-CONFIG_IPV6_RPL_LWTUNNEL=y
-CONFIG_NETLABEL=y
-CONFIG_MPTCP=y
-CONFIG_MPTCP_IPV6=y
-# CONFIG_MPTCP_HMAC_TEST is not set
-CONFIG_NETWORK_SECMARK=y
-CONFIG_NET_PTP_CLASSIFY=y
-CONFIG_NETWORK_PHY_TIMESTAMPING=y
-CONFIG_NETFILTER=y
-CONFIG_NETFILTER_ADVANCED=y
-CONFIG_BRIDGE_NETFILTER=m
-
-#
-# Core Netfilter Configuration
-#
-CONFIG_NETFILTER_INGRESS=y
-CONFIG_NETFILTER_NETLINK=m
-CONFIG_NETFILTER_FAMILY_BRIDGE=y
-CONFIG_NETFILTER_FAMILY_ARP=y
-CONFIG_NETFILTER_NETLINK_ACCT=m
-CONFIG_NETFILTER_NETLINK_QUEUE=m
-CONFIG_NETFILTER_NETLINK_LOG=m
-CONFIG_NETFILTER_NETLINK_OSF=m
-CONFIG_NF_CONNTRACK=m
-CONFIG_NF_LOG_COMMON=m
-CONFIG_NF_LOG_NETDEV=m
-CONFIG_NETFILTER_CONNCOUNT=m
-CONFIG_NF_CONNTRACK_MARK=y
-CONFIG_NF_CONNTRACK_SECMARK=y
-CONFIG_NF_CONNTRACK_ZONES=y
-CONFIG_NF_CONNTRACK_PROCFS=y
-CONFIG_NF_CONNTRACK_EVENTS=y
-CONFIG_NF_CONNTRACK_TIMEOUT=y
-CONFIG_NF_CONNTRACK_TIMESTAMP=y
-CONFIG_NF_CONNTRACK_LABELS=y
-CONFIG_NF_CT_PROTO_DCCP=y
-CONFIG_NF_CT_PROTO_GRE=y
-CONFIG_NF_CT_PROTO_SCTP=y
-CONFIG_NF_CT_PROTO_UDPLITE=y
-CONFIG_NF_CONNTRACK_AMANDA=m
-CONFIG_NF_CONNTRACK_FTP=m
-CONFIG_NF_CONNTRACK_H323=m
-CONFIG_NF_CONNTRACK_IRC=m
-CONFIG_NF_CONNTRACK_BROADCAST=m
-CONFIG_NF_CONNTRACK_NETBIOS_NS=m
-CONFIG_NF_CONNTRACK_SNMP=m
-CONFIG_NF_CONNTRACK_PPTP=m
-CONFIG_NF_CONNTRACK_SANE=m
-CONFIG_NF_CONNTRACK_SIP=m
-CONFIG_NF_CONNTRACK_TFTP=m
-CONFIG_NF_CT_NETLINK=m
-CONFIG_NF_CT_NETLINK_TIMEOUT=m
-CONFIG_NF_CT_NETLINK_HELPER=m
-CONFIG_NETFILTER_NETLINK_GLUE_CT=y
-CONFIG_NF_NAT=m
-CONFIG_NF_NAT_AMANDA=m
-CONFIG_NF_NAT_FTP=m
-CONFIG_NF_NAT_IRC=m
-CONFIG_NF_NAT_SIP=m
-CONFIG_NF_NAT_TFTP=m
-CONFIG_NF_NAT_REDIRECT=y
-CONFIG_NF_NAT_MASQUERADE=y
-CONFIG_NETFILTER_SYNPROXY=m
-CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_INET=y
-CONFIG_NF_TABLES_NETDEV=y
-CONFIG_NFT_NUMGEN=m
-CONFIG_NFT_CT=m
-CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NFT_COUNTER=m
-CONFIG_NFT_CONNLIMIT=m
-CONFIG_NFT_LOG=m
-CONFIG_NFT_LIMIT=m
-CONFIG_NFT_MASQ=m
-CONFIG_NFT_REDIR=m
-CONFIG_NFT_NAT=m
-CONFIG_NFT_TUNNEL=m
-CONFIG_NFT_OBJREF=m
-CONFIG_NFT_QUEUE=m
-CONFIG_NFT_QUOTA=m
-CONFIG_NFT_REJECT=m
-CONFIG_NFT_REJECT_INET=m
-CONFIG_NFT_COMPAT=m
-CONFIG_NFT_HASH=m
-CONFIG_NFT_FIB=m
-CONFIG_NFT_FIB_INET=m
-CONFIG_NFT_XFRM=m
-CONFIG_NFT_SOCKET=m
-CONFIG_NFT_OSF=m
-CONFIG_NFT_TPROXY=m
-CONFIG_NFT_SYNPROXY=m
-CONFIG_NF_DUP_NETDEV=m
-CONFIG_NFT_DUP_NETDEV=m
-CONFIG_NFT_FWD_NETDEV=m
-CONFIG_NFT_FIB_NETDEV=m
-CONFIG_NF_FLOW_TABLE_INET=m
-CONFIG_NF_FLOW_TABLE=m
-CONFIG_NETFILTER_XTABLES=m
-
-#
-# Xtables combined modules
-#
-CONFIG_NETFILTER_XT_MARK=m
-CONFIG_NETFILTER_XT_CONNMARK=m
-CONFIG_NETFILTER_XT_SET=m
-
-#
-# Xtables targets
-#
-CONFIG_NETFILTER_XT_TARGET_AUDIT=m
-CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
-CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
-CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
-CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
-CONFIG_NETFILTER_XT_TARGET_CT=m
-CONFIG_NETFILTER_XT_TARGET_DSCP=m
-CONFIG_NETFILTER_XT_TARGET_HL=m
-CONFIG_NETFILTER_XT_TARGET_HMARK=m
-CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
-CONFIG_NETFILTER_XT_TARGET_LED=m
-CONFIG_NETFILTER_XT_TARGET_LOG=m
-CONFIG_NETFILTER_XT_TARGET_MARK=m
-CONFIG_NETFILTER_XT_NAT=m
-CONFIG_NETFILTER_XT_TARGET_NETMAP=m
-CONFIG_NETFILTER_XT_TARGET_NFLOG=m
-CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
-CONFIG_NETFILTER_XT_TARGET_NOTRACK=m
-CONFIG_NETFILTER_XT_TARGET_RATEEST=m
-CONFIG_NETFILTER_XT_TARGET_REDIRECT=m
-CONFIG_NETFILTER_XT_TARGET_MASQUERADE=m
-CONFIG_NETFILTER_XT_TARGET_TEE=m
-CONFIG_NETFILTER_XT_TARGET_TPROXY=m
-CONFIG_NETFILTER_XT_TARGET_TRACE=m
-CONFIG_NETFILTER_XT_TARGET_SECMARK=m
-CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
-CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
-
-#
-# Xtables matches
-#
-CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
-CONFIG_NETFILTER_XT_MATCH_BPF=m
-CONFIG_NETFILTER_XT_MATCH_CGROUP=m
-CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
-CONFIG_NETFILTER_XT_MATCH_COMMENT=m
-CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
-CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m
-CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
-CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
-CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
-CONFIG_NETFILTER_XT_MATCH_CPU=m
-CONFIG_NETFILTER_XT_MATCH_DCCP=m
-CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m
-CONFIG_NETFILTER_XT_MATCH_DSCP=m
-CONFIG_NETFILTER_XT_MATCH_ECN=m
-CONFIG_NETFILTER_XT_MATCH_ESP=m
-CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
-CONFIG_NETFILTER_XT_MATCH_HELPER=m
-CONFIG_NETFILTER_XT_MATCH_HL=m
-CONFIG_NETFILTER_XT_MATCH_IPCOMP=m
-CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
-CONFIG_NETFILTER_XT_MATCH_IPVS=m
-CONFIG_NETFILTER_XT_MATCH_L2TP=m
-CONFIG_NETFILTER_XT_MATCH_LENGTH=m
-CONFIG_NETFILTER_XT_MATCH_LIMIT=m
-CONFIG_NETFILTER_XT_MATCH_MAC=m
-CONFIG_NETFILTER_XT_MATCH_MARK=m
-CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
-CONFIG_NETFILTER_XT_MATCH_NFACCT=m
-CONFIG_NETFILTER_XT_MATCH_OSF=m
-CONFIG_NETFILTER_XT_MATCH_OWNER=m
-CONFIG_NETFILTER_XT_MATCH_POLICY=m
-CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
-CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
-CONFIG_NETFILTER_XT_MATCH_QUOTA=m
-CONFIG_NETFILTER_XT_MATCH_RATEEST=m
-CONFIG_NETFILTER_XT_MATCH_REALM=m
-CONFIG_NETFILTER_XT_MATCH_RECENT=m
-CONFIG_NETFILTER_XT_MATCH_SCTP=m
-CONFIG_NETFILTER_XT_MATCH_SOCKET=m
-CONFIG_NETFILTER_XT_MATCH_STATE=m
-CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
-CONFIG_NETFILTER_XT_MATCH_STRING=m
-CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
-CONFIG_NETFILTER_XT_MATCH_TIME=m
-CONFIG_NETFILTER_XT_MATCH_U32=m
-# end of Core Netfilter Configuration
-
-CONFIG_IP_SET=m
-CONFIG_IP_SET_MAX=256
-CONFIG_IP_SET_BITMAP_IP=m
-CONFIG_IP_SET_BITMAP_IPMAC=m
-CONFIG_IP_SET_BITMAP_PORT=m
-CONFIG_IP_SET_HASH_IP=m
-CONFIG_IP_SET_HASH_IPMARK=m
-CONFIG_IP_SET_HASH_IPPORT=m
-CONFIG_IP_SET_HASH_IPPORTIP=m
-CONFIG_IP_SET_HASH_IPPORTNET=m
-CONFIG_IP_SET_HASH_IPMAC=m
-CONFIG_IP_SET_HASH_MAC=m
-CONFIG_IP_SET_HASH_NETPORTNET=m
-CONFIG_IP_SET_HASH_NET=m
-CONFIG_IP_SET_HASH_NETNET=m
-CONFIG_IP_SET_HASH_NETPORT=m
-CONFIG_IP_SET_HASH_NETIFACE=m
-CONFIG_IP_SET_LIST_SET=m
-CONFIG_IP_VS=m
-CONFIG_IP_VS_IPV6=y
-# CONFIG_IP_VS_DEBUG is not set
-CONFIG_IP_VS_TAB_BITS=15
-
-#
-# IPVS transport protocol load balancing support
-#
-CONFIG_IP_VS_PROTO_TCP=y
-CONFIG_IP_VS_PROTO_UDP=y
-CONFIG_IP_VS_PROTO_AH_ESP=y
-CONFIG_IP_VS_PROTO_ESP=y
-CONFIG_IP_VS_PROTO_AH=y
-CONFIG_IP_VS_PROTO_SCTP=y
-
-#
-# IPVS scheduler
-#
-CONFIG_IP_VS_RR=m
-CONFIG_IP_VS_WRR=m
-CONFIG_IP_VS_LC=m
-CONFIG_IP_VS_WLC=m
-CONFIG_IP_VS_FO=m
-CONFIG_IP_VS_OVF=m
-CONFIG_IP_VS_LBLC=m
-CONFIG_IP_VS_LBLCR=m
-CONFIG_IP_VS_DH=m
-CONFIG_IP_VS_SH=m
-CONFIG_IP_VS_MH=m
-CONFIG_IP_VS_SED=m
-CONFIG_IP_VS_NQ=m
-
-#
-# IPVS SH scheduler
-#
-CONFIG_IP_VS_SH_TAB_BITS=8
-
-#
-# IPVS MH scheduler
-#
-CONFIG_IP_VS_MH_TAB_INDEX=12
-
-#
-# IPVS application helper
-#
-CONFIG_IP_VS_FTP=m
-CONFIG_IP_VS_NFCT=y
-CONFIG_IP_VS_PE_SIP=m
-
-#
-# IP: Netfilter Configuration
-#
-CONFIG_NF_DEFRAG_IPV4=m
-CONFIG_NF_SOCKET_IPV4=m
-CONFIG_NF_TPROXY_IPV4=m
-CONFIG_NF_TABLES_IPV4=y
-CONFIG_NFT_REJECT_IPV4=m
-CONFIG_NFT_DUP_IPV4=m
-CONFIG_NFT_FIB_IPV4=m
-CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_FLOW_TABLE_IPV4=m
-CONFIG_NF_DUP_IPV4=m
-CONFIG_NF_LOG_ARP=m
-CONFIG_NF_LOG_IPV4=m
-CONFIG_NF_REJECT_IPV4=m
-CONFIG_NF_NAT_SNMP_BASIC=m
-CONFIG_NF_NAT_PPTP=m
-CONFIG_NF_NAT_H323=m
-CONFIG_IP_NF_IPTABLES=m
-CONFIG_IP_NF_MATCH_AH=m
-CONFIG_IP_NF_MATCH_ECN=m
-CONFIG_IP_NF_MATCH_RPFILTER=m
-CONFIG_IP_NF_MATCH_TTL=m
-CONFIG_IP_NF_FILTER=m
-CONFIG_IP_NF_TARGET_REJECT=m
-CONFIG_IP_NF_TARGET_SYNPROXY=m
-CONFIG_IP_NF_NAT=m
-CONFIG_IP_NF_TARGET_MASQUERADE=m
-CONFIG_IP_NF_TARGET_NETMAP=m
-CONFIG_IP_NF_TARGET_REDIRECT=m
-CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
-CONFIG_IP_NF_TARGET_ECN=m
-CONFIG_IP_NF_TARGET_TTL=m
-CONFIG_IP_NF_RAW=m
-CONFIG_IP_NF_SECURITY=m
-CONFIG_IP_NF_ARPTABLES=m
-CONFIG_IP_NF_ARPFILTER=m
-CONFIG_IP_NF_ARP_MANGLE=m
-# end of IP: Netfilter Configuration
-
-#
-# IPv6: Netfilter Configuration
-#
-CONFIG_NF_SOCKET_IPV6=m
-CONFIG_NF_TPROXY_IPV6=m
-CONFIG_NF_TABLES_IPV6=y
-CONFIG_NFT_REJECT_IPV6=m
-CONFIG_NFT_DUP_IPV6=m
-CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
-CONFIG_NF_DUP_IPV6=m
-CONFIG_NF_REJECT_IPV6=m
-CONFIG_NF_LOG_IPV6=m
-CONFIG_IP6_NF_IPTABLES=m
-CONFIG_IP6_NF_MATCH_AH=m
-CONFIG_IP6_NF_MATCH_EUI64=m
-CONFIG_IP6_NF_MATCH_FRAG=m
-CONFIG_IP6_NF_MATCH_OPTS=m
-CONFIG_IP6_NF_MATCH_HL=m
-CONFIG_IP6_NF_MATCH_IPV6HEADER=m
-CONFIG_IP6_NF_MATCH_MH=m
-CONFIG_IP6_NF_MATCH_RPFILTER=m
-CONFIG_IP6_NF_MATCH_RT=m
-CONFIG_IP6_NF_MATCH_SRH=m
-CONFIG_IP6_NF_TARGET_HL=m
-CONFIG_IP6_NF_FILTER=m
-CONFIG_IP6_NF_TARGET_REJECT=m
-CONFIG_IP6_NF_TARGET_SYNPROXY=m
-CONFIG_IP6_NF_MANGLE=m
-CONFIG_IP6_NF_RAW=m
-CONFIG_IP6_NF_SECURITY=m
-CONFIG_IP6_NF_NAT=m
-CONFIG_IP6_NF_TARGET_MASQUERADE=m
-CONFIG_IP6_NF_TARGET_NPT=m
-# end of IPv6: Netfilter Configuration
-
-CONFIG_NF_DEFRAG_IPV6=m
-CONFIG_NF_TABLES_BRIDGE=m
-CONFIG_NFT_BRIDGE_META=m
-CONFIG_NFT_BRIDGE_REJECT=m
-CONFIG_NF_LOG_BRIDGE=m
-CONFIG_NF_CONNTRACK_BRIDGE=m
-CONFIG_BRIDGE_NF_EBTABLES=m
-CONFIG_BRIDGE_EBT_BROUTE=m
-CONFIG_BRIDGE_EBT_T_FILTER=m
-CONFIG_BRIDGE_EBT_T_NAT=m
-CONFIG_BRIDGE_EBT_802_3=m
-CONFIG_BRIDGE_EBT_AMONG=m
-CONFIG_BRIDGE_EBT_ARP=m
-CONFIG_BRIDGE_EBT_IP=m
-CONFIG_BRIDGE_EBT_IP6=m
-CONFIG_BRIDGE_EBT_LIMIT=m
-CONFIG_BRIDGE_EBT_MARK=m
-CONFIG_BRIDGE_EBT_PKTTYPE=m
-CONFIG_BRIDGE_EBT_STP=m
-CONFIG_BRIDGE_EBT_VLAN=m
-CONFIG_BRIDGE_EBT_ARPREPLY=m
-CONFIG_BRIDGE_EBT_DNAT=m
-CONFIG_BRIDGE_EBT_MARK_T=m
-CONFIG_BRIDGE_EBT_REDIRECT=m
-CONFIG_BRIDGE_EBT_SNAT=m
-CONFIG_BRIDGE_EBT_LOG=m
-CONFIG_BRIDGE_EBT_NFLOG=m
-# CONFIG_BPFILTER is not set
-CONFIG_IP_DCCP=m
-CONFIG_INET_DCCP_DIAG=m
-
-#
-# DCCP CCIDs Configuration
-#
-# CONFIG_IP_DCCP_CCID2_DEBUG is not set
-CONFIG_IP_DCCP_CCID3=y
-# CONFIG_IP_DCCP_CCID3_DEBUG is not set
-CONFIG_IP_DCCP_TFRC_LIB=y
-# end of DCCP CCIDs Configuration
-
-#
-# DCCP Kernel Hacking
-#
-# CONFIG_IP_DCCP_DEBUG is not set
-# end of DCCP Kernel Hacking
-
-CONFIG_IP_SCTP=m
-# CONFIG_SCTP_DBG_OBJCNT is not set
-# CONFIG_SCTP_DEFAULT_COOKIE_HMAC_MD5 is not set
-CONFIG_SCTP_DEFAULT_COOKIE_HMAC_SHA1=y
-# CONFIG_SCTP_DEFAULT_COOKIE_HMAC_NONE is not set
-CONFIG_SCTP_COOKIE_HMAC_MD5=y
-CONFIG_SCTP_COOKIE_HMAC_SHA1=y
-CONFIG_INET_SCTP_DIAG=m
-CONFIG_RDS=m
-CONFIG_RDS_RDMA=m
-CONFIG_RDS_TCP=m
-# CONFIG_RDS_DEBUG is not set
-CONFIG_TIPC=m
-CONFIG_TIPC_MEDIA_IB=y
-CONFIG_TIPC_MEDIA_UDP=y
-CONFIG_TIPC_CRYPTO=y
-CONFIG_TIPC_DIAG=m
-CONFIG_ATM=m
-CONFIG_ATM_CLIP=m
-# CONFIG_ATM_CLIP_NO_ICMP is not set
-CONFIG_ATM_LANE=m
-CONFIG_ATM_MPOA=m
-CONFIG_ATM_BR2684=m
-# CONFIG_ATM_BR2684_IPFILTER is not set
-CONFIG_L2TP=m
-# CONFIG_L2TP_DEBUGFS is not set
-CONFIG_L2TP_V3=y
-CONFIG_L2TP_IP=m
-CONFIG_L2TP_ETH=m
-CONFIG_STP=m
-CONFIG_GARP=m
-CONFIG_MRP=m
-CONFIG_BRIDGE=m
-CONFIG_BRIDGE_IGMP_SNOOPING=y
-CONFIG_BRIDGE_VLAN_FILTERING=y
-CONFIG_HAVE_NET_DSA=y
-CONFIG_NET_DSA=m
-CONFIG_NET_DSA_TAG_8021Q=m
-CONFIG_NET_DSA_TAG_AR9331=m
-CONFIG_NET_DSA_TAG_BRCM_COMMON=m
-CONFIG_NET_DSA_TAG_BRCM=m
-CONFIG_NET_DSA_TAG_BRCM_PREPEND=m
-CONFIG_NET_DSA_TAG_GSWIP=m
-CONFIG_NET_DSA_TAG_DSA=m
-CONFIG_NET_DSA_TAG_EDSA=m
-CONFIG_NET_DSA_TAG_MTK=m
-CONFIG_NET_DSA_TAG_KSZ=m
-CONFIG_NET_DSA_TAG_OCELOT=m
-CONFIG_NET_DSA_TAG_QCA=m
-CONFIG_NET_DSA_TAG_LAN9303=m
-CONFIG_NET_DSA_TAG_SJA1105=m
-CONFIG_NET_DSA_TAG_TRAILER=m
-CONFIG_VLAN_8021Q=m
-CONFIG_VLAN_8021Q_GVRP=y
-CONFIG_VLAN_8021Q_MVRP=y
-# CONFIG_DECNET is not set
-CONFIG_LLC=m
-CONFIG_LLC2=m
-# CONFIG_ATALK is not set
-# CONFIG_X25 is not set
-# CONFIG_LAPB is not set
-CONFIG_PHONET=m
-CONFIG_6LOWPAN=m
-# CONFIG_6LOWPAN_DEBUGFS is not set
-CONFIG_6LOWPAN_NHC=m
-CONFIG_6LOWPAN_NHC_DEST=m
-CONFIG_6LOWPAN_NHC_FRAGMENT=m
-CONFIG_6LOWPAN_NHC_HOP=m
-CONFIG_6LOWPAN_NHC_IPV6=m
-CONFIG_6LOWPAN_NHC_MOBILITY=m
-CONFIG_6LOWPAN_NHC_ROUTING=m
-CONFIG_6LOWPAN_NHC_UDP=m
-CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m
-CONFIG_6LOWPAN_GHC_UDP=m
-CONFIG_6LOWPAN_GHC_ICMPV6=m
-CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m
-CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
-CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
-CONFIG_IEEE802154=m
-CONFIG_IEEE802154_NL802154_EXPERIMENTAL=y
-CONFIG_IEEE802154_SOCKET=m
-CONFIG_IEEE802154_6LOWPAN=m
-CONFIG_MAC802154=m
-CONFIG_NET_SCHED=y
-
-#
-# Queueing/Scheduling
-#
-CONFIG_NET_SCH_CBQ=m
-CONFIG_NET_SCH_HTB=m
-CONFIG_NET_SCH_HFSC=m
-CONFIG_NET_SCH_ATM=m
-CONFIG_NET_SCH_PRIO=m
-CONFIG_NET_SCH_MULTIQ=m
-CONFIG_NET_SCH_RED=m
-CONFIG_NET_SCH_SFB=m
-CONFIG_NET_SCH_SFQ=m
-CONFIG_NET_SCH_TEQL=m
-CONFIG_NET_SCH_TBF=m
-CONFIG_NET_SCH_CBS=m
-CONFIG_NET_SCH_ETF=m
-CONFIG_NET_SCH_TAPRIO=m
-CONFIG_NET_SCH_GRED=m
-CONFIG_NET_SCH_DSMARK=m
-CONFIG_NET_SCH_NETEM=m
-CONFIG_NET_SCH_DRR=m
-CONFIG_NET_SCH_MQPRIO=m
-CONFIG_NET_SCH_SKBPRIO=m
-CONFIG_NET_SCH_CHOKE=m
-CONFIG_NET_SCH_QFQ=m
-CONFIG_NET_SCH_CODEL=m
-CONFIG_NET_SCH_FQ_CODEL=y
-CONFIG_NET_SCH_CAKE=m
-CONFIG_NET_SCH_FQ=m
-CONFIG_NET_SCH_HHF=m
-CONFIG_NET_SCH_PIE=m
-CONFIG_NET_SCH_FQ_PIE=m
-CONFIG_NET_SCH_INGRESS=m
-CONFIG_NET_SCH_PLUG=m
-CONFIG_NET_SCH_ETS=m
-CONFIG_NET_SCH_DEFAULT=y
-# CONFIG_DEFAULT_FQ is not set
-# CONFIG_DEFAULT_CODEL is not set
-CONFIG_DEFAULT_FQ_CODEL=y
-# CONFIG_DEFAULT_SFQ is not set
-# CONFIG_DEFAULT_PFIFO_FAST is not set
-CONFIG_DEFAULT_NET_SCH="fq_codel"
-
-#
-# Classification
-#
-CONFIG_NET_CLS=y
-CONFIG_NET_CLS_BASIC=m
-CONFIG_NET_CLS_TCINDEX=m
-CONFIG_NET_CLS_ROUTE4=m
-CONFIG_NET_CLS_FW=m
-CONFIG_NET_CLS_U32=m
-CONFIG_CLS_U32_PERF=y
-CONFIG_CLS_U32_MARK=y
-CONFIG_NET_CLS_RSVP=m
-CONFIG_NET_CLS_RSVP6=m
-CONFIG_NET_CLS_FLOW=m
-CONFIG_NET_CLS_CGROUP=m
-CONFIG_NET_CLS_BPF=m
-CONFIG_NET_CLS_FLOWER=m
-CONFIG_NET_CLS_MATCHALL=m
-CONFIG_NET_EMATCH=y
-CONFIG_NET_EMATCH_STACK=32
-CONFIG_NET_EMATCH_CMP=m
-CONFIG_NET_EMATCH_NBYTE=m
-CONFIG_NET_EMATCH_U32=m
-CONFIG_NET_EMATCH_META=m
-CONFIG_NET_EMATCH_TEXT=m
-CONFIG_NET_EMATCH_CANID=m
-CONFIG_NET_EMATCH_IPSET=m
-CONFIG_NET_EMATCH_IPT=m
-CONFIG_NET_CLS_ACT=y
-CONFIG_NET_ACT_POLICE=m
-CONFIG_NET_ACT_GACT=m
-CONFIG_GACT_PROB=y
-CONFIG_NET_ACT_MIRRED=m
-CONFIG_NET_ACT_SAMPLE=m
-CONFIG_NET_ACT_IPT=m
-CONFIG_NET_ACT_NAT=m
-CONFIG_NET_ACT_PEDIT=m
-CONFIG_NET_ACT_SIMP=m
-CONFIG_NET_ACT_SKBEDIT=m
-CONFIG_NET_ACT_CSUM=m
-CONFIG_NET_ACT_MPLS=m
-CONFIG_NET_ACT_VLAN=m
-CONFIG_NET_ACT_BPF=m
-CONFIG_NET_ACT_CONNMARK=m
-CONFIG_NET_ACT_CTINFO=m
-CONFIG_NET_ACT_SKBMOD=m
-CONFIG_NET_ACT_IFE=m
-CONFIG_NET_ACT_TUNNEL_KEY=m
-CONFIG_NET_ACT_CT=m
-CONFIG_NET_IFE_SKBMARK=m
-CONFIG_NET_IFE_SKBPRIO=m
-CONFIG_NET_IFE_SKBTCINDEX=m
-CONFIG_NET_TC_SKB_EXT=y
-CONFIG_NET_SCH_FIFO=y
-CONFIG_DCB=y
-CONFIG_DNS_RESOLVER=m
-CONFIG_BATMAN_ADV=m
-CONFIG_BATMAN_ADV_BATMAN_V=y
-CONFIG_BATMAN_ADV_BLA=y
-CONFIG_BATMAN_ADV_DAT=y
-CONFIG_BATMAN_ADV_NC=y
-CONFIG_BATMAN_ADV_MCAST=y
-# CONFIG_BATMAN_ADV_DEBUGFS is not set
-# CONFIG_BATMAN_ADV_DEBUG is not set
-CONFIG_BATMAN_ADV_SYSFS=y
-# CONFIG_BATMAN_ADV_TRACING is not set
-CONFIG_OPENVSWITCH=m
-CONFIG_OPENVSWITCH_GRE=m
-CONFIG_OPENVSWITCH_VXLAN=m
-CONFIG_OPENVSWITCH_GENEVE=m
-CONFIG_VSOCKETS=m
-CONFIG_VSOCKETS_DIAG=m
-CONFIG_VSOCKETS_LOOPBACK=m
-CONFIG_VMWARE_VMCI_VSOCKETS=m
-CONFIG_VIRTIO_VSOCKETS=m
-CONFIG_VIRTIO_VSOCKETS_COMMON=m
-CONFIG_HYPERV_VSOCKETS=m
-CONFIG_NETLINK_DIAG=m
-CONFIG_MPLS=y
-CONFIG_NET_MPLS_GSO=m
-CONFIG_MPLS_ROUTING=m
-CONFIG_MPLS_IPTUNNEL=m
-CONFIG_NET_NSH=m
-CONFIG_HSR=m
-CONFIG_NET_SWITCHDEV=y
-CONFIG_NET_L3_MASTER_DEV=y
-CONFIG_NET_NCSI=y
-CONFIG_NCSI_OEM_CMD_GET_MAC=y
-CONFIG_RPS=y
-CONFIG_RFS_ACCEL=y
-CONFIG_XPS=y
-CONFIG_CGROUP_NET_PRIO=y
-CONFIG_CGROUP_NET_CLASSID=y
-CONFIG_NET_RX_BUSY_POLL=y
-CONFIG_BQL=y
-CONFIG_BPF_JIT=y
-CONFIG_BPF_STREAM_PARSER=y
-CONFIG_NET_FLOW_LIMIT=y
-
-#
-# Network testing
-#
-CONFIG_NET_PKTGEN=m
-CONFIG_NET_DROP_MONITOR=y
-# end of Network testing
-# end of Networking options
-
-CONFIG_HAMRADIO=y
-
-#
-# Packet Radio protocols
-#
-CONFIG_AX25=m
-CONFIG_AX25_DAMA_SLAVE=y
-CONFIG_NETROM=m
-CONFIG_ROSE=m
-
-#
-# AX.25 network device drivers
-#
-CONFIG_MKISS=m
-CONFIG_6PACK=m
-CONFIG_BPQETHER=m
-CONFIG_BAYCOM_SER_FDX=m
-CONFIG_BAYCOM_SER_HDX=m
-CONFIG_BAYCOM_PAR=m
-CONFIG_YAM=m
-# end of AX.25 network device drivers
-
-CONFIG_CAN=m
-CONFIG_CAN_RAW=m
-CONFIG_CAN_BCM=m
-CONFIG_CAN_GW=m
-CONFIG_CAN_J1939=m
-
-#
-# CAN Device Drivers
-#
-CONFIG_CAN_VCAN=m
-CONFIG_CAN_VXCAN=m
-CONFIG_CAN_SLCAN=m
-CONFIG_CAN_DEV=m
-CONFIG_CAN_CALC_BITTIMING=y
-CONFIG_CAN_FLEXCAN=m
-CONFIG_CAN_GRCAN=m
-CONFIG_CAN_JANZ_ICAN3=m
-CONFIG_CAN_KVASER_PCIEFD=m
-CONFIG_CAN_C_CAN=m
-CONFIG_CAN_C_CAN_PLATFORM=m
-CONFIG_CAN_C_CAN_PCI=m
-CONFIG_CAN_CC770=m
-# CONFIG_CAN_CC770_ISA is not set
-CONFIG_CAN_CC770_PLATFORM=m
-CONFIG_CAN_IFI_CANFD=m
-CONFIG_CAN_M_CAN=m
-CONFIG_CAN_M_CAN_PLATFORM=m
-CONFIG_CAN_M_CAN_TCAN4X5X=m
-CONFIG_CAN_PEAK_PCIEFD=m
-CONFIG_CAN_SJA1000=m
-CONFIG_CAN_EMS_PCI=m
-# CONFIG_CAN_EMS_PCMCIA is not set
-CONFIG_CAN_F81601=m
-CONFIG_CAN_KVASER_PCI=m
-CONFIG_CAN_PEAK_PCI=m
-CONFIG_CAN_PEAK_PCIEC=y
-CONFIG_CAN_PEAK_PCMCIA=m
-CONFIG_CAN_PLX_PCI=m
-# CONFIG_CAN_SJA1000_ISA is not set
-CONFIG_CAN_SJA1000_PLATFORM=m
-CONFIG_CAN_SOFTING=m
-CONFIG_CAN_SOFTING_CS=m
-
-#
-# CAN SPI interfaces
-#
-CONFIG_CAN_HI311X=m
-CONFIG_CAN_MCP251X=m
-# end of CAN SPI interfaces
-
-#
-# CAN USB interfaces
-#
-CONFIG_CAN_8DEV_USB=m
-CONFIG_CAN_EMS_USB=m
-CONFIG_CAN_ESD_USB2=m
-CONFIG_CAN_GS_USB=m
-CONFIG_CAN_KVASER_USB=m
-CONFIG_CAN_MCBA_USB=m
-CONFIG_CAN_PEAK_USB=m
-CONFIG_CAN_UCAN=m
-# end of CAN USB interfaces
-
-# CONFIG_CAN_DEBUG_DEVICES is not set
-# end of CAN Device Drivers
-
-CONFIG_BT=m
-CONFIG_BT_BREDR=y
-CONFIG_BT_RFCOMM=m
-CONFIG_BT_RFCOMM_TTY=y
-CONFIG_BT_BNEP=m
-CONFIG_BT_BNEP_MC_FILTER=y
-CONFIG_BT_BNEP_PROTO_FILTER=y
-CONFIG_BT_CMTP=m
-CONFIG_BT_HIDP=m
-CONFIG_BT_HS=y
-CONFIG_BT_LE=y
-CONFIG_BT_6LOWPAN=m
-CONFIG_BT_LEDS=y
-# CONFIG_BT_SELFTEST is not set
-# CONFIG_BT_DEBUGFS is not set
-
-#
-# Bluetooth device drivers
-#
-CONFIG_BT_INTEL=m
-CONFIG_BT_BCM=m
-CONFIG_BT_RTL=m
-CONFIG_BT_QCA=m
-CONFIG_BT_HCIBTUSB=m
-CONFIG_BT_HCIBTUSB_AUTOSUSPEND=y
-CONFIG_BT_HCIBTUSB_BCM=y
-CONFIG_BT_HCIBTUSB_MTK=y
-CONFIG_BT_HCIBTUSB_RTL=y
-CONFIG_BT_HCIBTSDIO=m
-CONFIG_BT_HCIUART=m
-CONFIG_BT_HCIUART_SERDEV=y
-CONFIG_BT_HCIUART_H4=y
-CONFIG_BT_HCIUART_NOKIA=m
-CONFIG_BT_HCIUART_BCSP=y
-CONFIG_BT_HCIUART_ATH3K=y
-CONFIG_BT_HCIUART_LL=y
-CONFIG_BT_HCIUART_3WIRE=y
-CONFIG_BT_HCIUART_INTEL=y
-CONFIG_BT_HCIUART_BCM=y
-CONFIG_BT_HCIUART_RTL=y
-CONFIG_BT_HCIUART_QCA=y
-CONFIG_BT_HCIUART_AG6XX=y
-CONFIG_BT_HCIUART_MRVL=y
-CONFIG_BT_HCIBCM203X=m
-CONFIG_BT_HCIBPA10X=m
-CONFIG_BT_HCIBFUSB=m
-CONFIG_BT_HCIDTL1=m
-CONFIG_BT_HCIBT3C=m
-CONFIG_BT_HCIBLUECARD=m
-CONFIG_BT_HCIVHCI=m
-CONFIG_BT_MRVL=m
-CONFIG_BT_MRVL_SDIO=m
-CONFIG_BT_ATH3K=m
-CONFIG_BT_MTKSDIO=m
-CONFIG_BT_MTKUART=m
-CONFIG_BT_HCIRSI=m
-# end of Bluetooth device drivers
-
-CONFIG_AF_RXRPC=m
-CONFIG_AF_RXRPC_IPV6=y
-# CONFIG_AF_RXRPC_INJECT_LOSS is not set
-# CONFIG_AF_RXRPC_DEBUG is not set
-CONFIG_RXKAD=y
-CONFIG_AF_KCM=m
-CONFIG_STREAM_PARSER=y
-CONFIG_FIB_RULES=y
-CONFIG_WIRELESS=y
-CONFIG_WIRELESS_EXT=y
-CONFIG_WEXT_CORE=y
-CONFIG_WEXT_PROC=y
-CONFIG_WEXT_SPY=y
-CONFIG_WEXT_PRIV=y
-CONFIG_CFG80211=m
-# CONFIG_NL80211_TESTMODE is not set
-# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set
-# CONFIG_CFG80211_CERTIFICATION_ONUS is not set
-CONFIG_CFG80211_REQUIRE_SIGNED_REGDB=y
-CONFIG_CFG80211_USE_KERNEL_REGDB_KEYS=y
-CONFIG_CFG80211_DEFAULT_PS=y
-# CONFIG_CFG80211_DEBUGFS is not set
-CONFIG_CFG80211_CRDA_SUPPORT=y
-CONFIG_CFG80211_WEXT=y
-CONFIG_CFG80211_WEXT_EXPORT=y
-CONFIG_LIB80211=m
-CONFIG_LIB80211_CRYPT_WEP=m
-CONFIG_LIB80211_CRYPT_CCMP=m
-CONFIG_LIB80211_CRYPT_TKIP=m
-# CONFIG_LIB80211_DEBUG is not set
-CONFIG_MAC80211=m
-CONFIG_MAC80211_HAS_RC=y
-CONFIG_MAC80211_RC_MINSTREL=y
-CONFIG_MAC80211_RC_DEFAULT_MINSTREL=y
-CONFIG_MAC80211_RC_DEFAULT="minstrel_ht"
-CONFIG_MAC80211_MESH=y
-CONFIG_MAC80211_LEDS=y
-# CONFIG_MAC80211_DEBUGFS is not set
-# CONFIG_MAC80211_MESSAGE_TRACING is not set
-# CONFIG_MAC80211_DEBUG_MENU is not set
-CONFIG_MAC80211_STA_HASH_MAX_SIZE=0
-CONFIG_WIMAX=m
-CONFIG_WIMAX_DEBUG_LEVEL=8
-CONFIG_RFKILL=m
-CONFIG_RFKILL_LEDS=y
-CONFIG_RFKILL_INPUT=y
-CONFIG_RFKILL_GPIO=m
-CONFIG_NET_9P=m
-CONFIG_NET_9P_VIRTIO=m
-CONFIG_NET_9P_XEN=m
-CONFIG_NET_9P_RDMA=m
-# CONFIG_NET_9P_DEBUG is not set
-CONFIG_CAIF=m
-# CONFIG_CAIF_DEBUG is not set
-CONFIG_CAIF_NETDEV=m
-CONFIG_CAIF_USB=m
-CONFIG_CEPH_LIB=m
-CONFIG_CEPH_LIB_PRETTYDEBUG=y
-CONFIG_CEPH_LIB_USE_DNS_RESOLVER=y
-CONFIG_NFC=m
-CONFIG_NFC_DIGITAL=m
-CONFIG_NFC_NCI=m
-CONFIG_NFC_NCI_SPI=m
-CONFIG_NFC_NCI_UART=m
-CONFIG_NFC_HCI=m
-CONFIG_NFC_SHDLC=y
-
-#
-# Near Field Communication (NFC) devices
-#
-CONFIG_NFC_TRF7970A=m
-CONFIG_NFC_MEI_PHY=m
-CONFIG_NFC_SIM=m
-CONFIG_NFC_PORT100=m
-CONFIG_NFC_FDP=m
-CONFIG_NFC_FDP_I2C=m
-CONFIG_NFC_PN544=m
-CONFIG_NFC_PN544_I2C=m
-CONFIG_NFC_PN544_MEI=m
-CONFIG_NFC_PN533=m
-CONFIG_NFC_PN533_USB=m
-CONFIG_NFC_PN533_I2C=m
-CONFIG_NFC_PN532_UART=m
-CONFIG_NFC_MICROREAD=m
-CONFIG_NFC_MICROREAD_I2C=m
-CONFIG_NFC_MICROREAD_MEI=m
-CONFIG_NFC_MRVL=m
-CONFIG_NFC_MRVL_USB=m
-CONFIG_NFC_MRVL_UART=m
-CONFIG_NFC_MRVL_I2C=m
-CONFIG_NFC_MRVL_SPI=m
-CONFIG_NFC_ST21NFCA=m
-CONFIG_NFC_ST21NFCA_I2C=m
-CONFIG_NFC_ST_NCI=m
-CONFIG_NFC_ST_NCI_I2C=m
-CONFIG_NFC_ST_NCI_SPI=m
-CONFIG_NFC_NXP_NCI=m
-CONFIG_NFC_NXP_NCI_I2C=m
-CONFIG_NFC_S3FWRN5=m
-CONFIG_NFC_S3FWRN5_I2C=m
-CONFIG_NFC_ST95HF=m
-# end of Near Field Communication (NFC) devices
-
-CONFIG_PSAMPLE=m
-CONFIG_NET_IFE=m
-CONFIG_LWTUNNEL=y
-CONFIG_LWTUNNEL_BPF=y
-CONFIG_DST_CACHE=y
-CONFIG_GRO_CELLS=y
-CONFIG_SOCK_VALIDATE_XMIT=y
-CONFIG_NET_SOCK_MSG=y
-CONFIG_NET_DEVLINK=y
-CONFIG_PAGE_POOL=y
-CONFIG_FAILOVER=m
-CONFIG_ETHTOOL_NETLINK=y
-CONFIG_HAVE_EBPF_JIT=y
-
-#
-# Device Drivers
-#
-CONFIG_HAVE_EISA=y
-# CONFIG_EISA is not set
-CONFIG_HAVE_PCI=y
-CONFIG_PCI=y
-CONFIG_PCI_DOMAINS=y
-CONFIG_PCIEPORTBUS=y
-CONFIG_HOTPLUG_PCI_PCIE=y
-CONFIG_PCIEAER=y
-# CONFIG_PCIEAER_INJECT is not set
-CONFIG_PCIE_ECRC=y
-CONFIG_PCIEASPM=y
-CONFIG_PCIEASPM_DEFAULT=y
-# CONFIG_PCIEASPM_POWERSAVE is not set
-# CONFIG_PCIEASPM_POWER_SUPERSAVE is not set
-# CONFIG_PCIEASPM_PERFORMANCE is not set
-CONFIG_PCIE_PME=y
-CONFIG_PCIE_DPC=y
-CONFIG_PCIE_PTM=y
-# CONFIG_PCIE_BW is not set
-CONFIG_PCIE_EDR=y
-CONFIG_PCI_MSI=y
-CONFIG_PCI_MSI_IRQ_DOMAIN=y
-CONFIG_PCI_QUIRKS=y
-# CONFIG_PCI_DEBUG is not set
-CONFIG_PCI_REALLOC_ENABLE_AUTO=y
-CONFIG_PCI_STUB=y
-CONFIG_PCI_PF_STUB=m
-CONFIG_XEN_PCIDEV_FRONTEND=m
-CONFIG_PCI_ATS=y
-CONFIG_PCI_ECAM=y
-CONFIG_PCI_LOCKLESS_CONFIG=y
-CONFIG_PCI_IOV=y
-CONFIG_PCI_PRI=y
-CONFIG_PCI_PASID=y
-CONFIG_PCI_P2PDMA=y
-CONFIG_PCI_LABEL=y
-CONFIG_PCI_HYPERV=m
-CONFIG_HOTPLUG_PCI=y
-CONFIG_HOTPLUG_PCI_ACPI=y
-CONFIG_HOTPLUG_PCI_ACPI_IBM=m
-CONFIG_HOTPLUG_PCI_CPCI=y
-CONFIG_HOTPLUG_PCI_CPCI_ZT5550=m
-CONFIG_HOTPLUG_PCI_CPCI_GENERIC=m
-CONFIG_HOTPLUG_PCI_SHPC=y
-
-#
-# PCI controller drivers
-#
-CONFIG_PCI_FTPCI100=y
-CONFIG_PCI_HOST_COMMON=y
-CONFIG_PCI_HOST_GENERIC=y
-CONFIG_PCIE_XILINX=y
-CONFIG_VMD=m
-CONFIG_PCI_HYPERV_INTERFACE=m
-
-#
-# DesignWare PCI Core Support
-#
-CONFIG_PCIE_DW=y
-CONFIG_PCIE_DW_HOST=y
-CONFIG_PCIE_DW_EP=y
-CONFIG_PCIE_DW_PLAT=y
-CONFIG_PCIE_DW_PLAT_HOST=y
-CONFIG_PCIE_DW_PLAT_EP=y
-CONFIG_PCIE_INTEL_GW=y
-CONFIG_PCI_MESON=y
-# end of DesignWare PCI Core Support
-
-#
-# Mobiveil PCIe Core Support
-#
-# end of Mobiveil PCIe Core Support
-
-#
-# Cadence PCIe controllers support
-#
-CONFIG_PCIE_CADENCE=y
-CONFIG_PCIE_CADENCE_HOST=y
-CONFIG_PCIE_CADENCE_EP=y
-CONFIG_PCIE_CADENCE_PLAT=y
-CONFIG_PCIE_CADENCE_PLAT_HOST=y
-CONFIG_PCIE_CADENCE_PLAT_EP=y
-# end of Cadence PCIe controllers support
-# end of PCI controller drivers
-
-#
-# PCI Endpoint
-#
-CONFIG_PCI_ENDPOINT=y
-CONFIG_PCI_ENDPOINT_CONFIGFS=y
-# CONFIG_PCI_EPF_TEST is not set
-# end of PCI Endpoint
-
-#
-# PCI switch controller drivers
-#
-CONFIG_PCI_SW_SWITCHTEC=m
-# end of PCI switch controller drivers
-
-CONFIG_PCCARD=m
-CONFIG_PCMCIA=m
-CONFIG_PCMCIA_LOAD_CIS=y
-CONFIG_CARDBUS=y
-
-#
-# PC-card bridges
-#
-CONFIG_YENTA=m
-CONFIG_YENTA_O2=y
-CONFIG_YENTA_RICOH=y
-CONFIG_YENTA_TI=y
-CONFIG_YENTA_ENE_TUNE=y
-CONFIG_YENTA_TOSHIBA=y
-CONFIG_PD6729=m
-CONFIG_I82092=m
-CONFIG_PCCARD_NONSTATIC=y
-CONFIG_RAPIDIO=m
-CONFIG_RAPIDIO_TSI721=m
-CONFIG_RAPIDIO_DISC_TIMEOUT=30
-CONFIG_RAPIDIO_ENABLE_RX_TX_PORTS=y
-CONFIG_RAPIDIO_DMA_ENGINE=y
-# CONFIG_RAPIDIO_DEBUG is not set
-CONFIG_RAPIDIO_ENUM_BASIC=m
-CONFIG_RAPIDIO_CHMAN=m
-CONFIG_RAPIDIO_MPORT_CDEV=m
-
-#
-# RapidIO Switch drivers
-#
-CONFIG_RAPIDIO_TSI57X=m
-CONFIG_RAPIDIO_CPS_XX=m
-CONFIG_RAPIDIO_TSI568=m
-CONFIG_RAPIDIO_CPS_GEN2=m
-CONFIG_RAPIDIO_RXS_GEN3=m
-# end of RapidIO Switch drivers
-
-#
-# Generic Driver Options
-#
-# CONFIG_UEVENT_HELPER is not set
-CONFIG_DEVTMPFS=y
-CONFIG_DEVTMPFS_MOUNT=y
-CONFIG_STANDALONE=y
-CONFIG_PREVENT_FIRMWARE_BUILD=y
-
-#
-# Firmware loader
-#
-CONFIG_FW_LOADER=y
-CONFIG_FW_LOADER_PAGED_BUF=y
-CONFIG_EXTRA_FIRMWARE=""
-# CONFIG_FW_LOADER_USER_HELPER is not set
-CONFIG_FW_LOADER_COMPRESS=y
-CONFIG_FW_CACHE=y
-# end of Firmware loader
-
-CONFIG_WANT_DEV_COREDUMP=y
-CONFIG_ALLOW_DEV_COREDUMP=y
-CONFIG_DEV_COREDUMP=y
-# CONFIG_DEBUG_DRIVER is not set
-# CONFIG_DEBUG_DEVRES is not set
-# CONFIG_DEBUG_TEST_DRIVER_REMOVE is not set
-CONFIG_HMEM_REPORTING=y
-# CONFIG_TEST_ASYNC_DRIVER_PROBE is not set
-CONFIG_SYS_HYPERVISOR=y
-CONFIG_GENERIC_CPU_AUTOPROBE=y
-CONFIG_GENERIC_CPU_VULNERABILITIES=y
-CONFIG_REGMAP=y
-CONFIG_REGMAP_I2C=y
-CONFIG_REGMAP_SLIMBUS=m
-CONFIG_REGMAP_SPI=y
-CONFIG_REGMAP_SPMI=m
-CONFIG_REGMAP_W1=m
-CONFIG_REGMAP_MMIO=y
-CONFIG_REGMAP_IRQ=y
-CONFIG_REGMAP_SOUNDWIRE=m
-CONFIG_REGMAP_SCCB=m
-CONFIG_REGMAP_I3C=m
-CONFIG_DMA_SHARED_BUFFER=y
-# CONFIG_DMA_FENCE_TRACE is not set
-# end of Generic Driver Options
-
-#
-# Bus devices
-#
-CONFIG_MOXTET=m
-CONFIG_SIMPLE_PM_BUS=y
-CONFIG_MHI_BUS=m
-# end of Bus devices
-
-CONFIG_CONNECTOR=y
-CONFIG_PROC_EVENTS=y
-CONFIG_GNSS=m
-CONFIG_GNSS_SERIAL=m
-CONFIG_GNSS_MTK_SERIAL=m
-CONFIG_GNSS_SIRF_SERIAL=m
-CONFIG_GNSS_UBX_SERIAL=m
-CONFIG_MTD=m
-CONFIG_MTD_TESTS=m
-
-#
-# Partition parsers
-#
-CONFIG_MTD_AR7_PARTS=m
-CONFIG_MTD_CMDLINE_PARTS=m
-CONFIG_MTD_OF_PARTS=m
-CONFIG_MTD_REDBOOT_PARTS=m
-CONFIG_MTD_REDBOOT_DIRECTORY_BLOCK=-1
-# CONFIG_MTD_REDBOOT_PARTS_UNALLOCATED is not set
-# CONFIG_MTD_REDBOOT_PARTS_READONLY is not set
-# end of Partition parsers
-
-#
-# User Modules And Translation Layers
-#
-CONFIG_MTD_BLKDEVS=m
-CONFIG_MTD_BLOCK=m
-CONFIG_MTD_BLOCK_RO=m
-CONFIG_FTL=m
-CONFIG_NFTL=m
-CONFIG_NFTL_RW=y
-CONFIG_INFTL=m
-CONFIG_RFD_FTL=m
-CONFIG_SSFDC=m
-CONFIG_SM_FTL=m
-CONFIG_MTD_OOPS=m
-CONFIG_MTD_SWAP=m
-CONFIG_MTD_PARTITIONED_MASTER=y
-
-#
-# RAM/ROM/Flash chip drivers
-#
-CONFIG_MTD_CFI=m
-CONFIG_MTD_JEDECPROBE=m
-CONFIG_MTD_GEN_PROBE=m
-# CONFIG_MTD_CFI_ADV_OPTIONS is not set
-CONFIG_MTD_MAP_BANK_WIDTH_1=y
-CONFIG_MTD_MAP_BANK_WIDTH_2=y
-CONFIG_MTD_MAP_BANK_WIDTH_4=y
-CONFIG_MTD_CFI_I1=y
-CONFIG_MTD_CFI_I2=y
-CONFIG_MTD_CFI_INTELEXT=m
-CONFIG_MTD_CFI_AMDSTD=m
-CONFIG_MTD_CFI_STAA=m
-CONFIG_MTD_CFI_UTIL=m
-CONFIG_MTD_RAM=m
-CONFIG_MTD_ROM=m
-CONFIG_MTD_ABSENT=m
-# end of RAM/ROM/Flash chip drivers
-
-#
-# Mapping drivers for chip access
-#
-CONFIG_MTD_COMPLEX_MAPPINGS=y
-CONFIG_MTD_PHYSMAP=m
-# CONFIG_MTD_PHYSMAP_COMPAT is not set
-CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_MTD_PHYSMAP_VERSATILE=y
-CONFIG_MTD_PHYSMAP_GEMINI=y
-CONFIG_MTD_PHYSMAP_GPIO_ADDR=y
-CONFIG_MTD_SBC_GXX=m
-CONFIG_MTD_AMD76XROM=m
-CONFIG_MTD_ICHXROM=m
-CONFIG_MTD_ESB2ROM=m
-CONFIG_MTD_CK804XROM=m
-CONFIG_MTD_SCB2_FLASH=m
-CONFIG_MTD_NETtel=m
-CONFIG_MTD_L440GX=m
-CONFIG_MTD_PCI=m
-CONFIG_MTD_PCMCIA=m
-# CONFIG_MTD_PCMCIA_ANONYMOUS is not set
-CONFIG_MTD_INTEL_VR_NOR=m
-CONFIG_MTD_PLATRAM=m
-# end of Mapping drivers for chip access
-
-#
-# Self-contained MTD device drivers
-#
-CONFIG_MTD_PMC551=m
-# CONFIG_MTD_PMC551_BUGFIX is not set
-# CONFIG_MTD_PMC551_DEBUG is not set
-CONFIG_MTD_DATAFLASH=m
-# CONFIG_MTD_DATAFLASH_WRITE_VERIFY is not set
-CONFIG_MTD_DATAFLASH_OTP=y
-CONFIG_MTD_MCHP23K256=m
-CONFIG_MTD_SST25L=m
-CONFIG_MTD_SLRAM=m
-CONFIG_MTD_PHRAM=m
-CONFIG_MTD_MTDRAM=m
-CONFIG_MTDRAM_TOTAL_SIZE=4096
-CONFIG_MTDRAM_ERASE_SIZE=128
-CONFIG_MTD_BLOCK2MTD=m
-
-#
-# Disk-On-Chip Device Drivers
-#
-CONFIG_MTD_DOCG3=m
-CONFIG_BCH_CONST_M=14
-CONFIG_BCH_CONST_T=4
-# end of Self-contained MTD device drivers
-
-CONFIG_MTD_NAND_CORE=m
-CONFIG_MTD_ONENAND=m
-# CONFIG_MTD_ONENAND_VERIFY_WRITE is not set
-CONFIG_MTD_ONENAND_GENERIC=m
-CONFIG_MTD_ONENAND_OTP=y
-CONFIG_MTD_ONENAND_2X_PROGRAM=y
-CONFIG_MTD_NAND_ECC_SW_HAMMING=m
-CONFIG_MTD_NAND_ECC_SW_HAMMING_SMC=y
-CONFIG_MTD_RAW_NAND=m
-CONFIG_MTD_NAND_ECC_SW_BCH=y
-
-#
-# Raw/parallel NAND flash controllers
-#
-CONFIG_MTD_NAND_DENALI=m
-CONFIG_MTD_NAND_DENALI_PCI=m
-CONFIG_MTD_NAND_DENALI_DT=m
-CONFIG_MTD_NAND_CAFE=m
-CONFIG_MTD_NAND_MXIC=m
-CONFIG_MTD_NAND_GPIO=m
-CONFIG_MTD_NAND_PLATFORM=m
-CONFIG_MTD_NAND_CADENCE=m
-
-#
-# Misc
-#
-CONFIG_MTD_SM_COMMON=m
-CONFIG_MTD_NAND_NANDSIM=m
-CONFIG_MTD_NAND_RICOH=m
-CONFIG_MTD_NAND_DISKONCHIP=m
-# CONFIG_MTD_NAND_DISKONCHIP_PROBE_ADVANCED is not set
-CONFIG_MTD_NAND_DISKONCHIP_PROBE_ADDRESS=0
-CONFIG_MTD_NAND_DISKONCHIP_BBTWRITE=y
-CONFIG_MTD_SPI_NAND=m
-
-#
-# LPDDR & LPDDR2 PCM memory drivers
-#
-CONFIG_MTD_LPDDR=m
-CONFIG_MTD_QINFO_PROBE=m
-# end of LPDDR & LPDDR2 PCM memory drivers
-
-CONFIG_MTD_SPI_NOR=m
-CONFIG_MTD_SPI_NOR_USE_4K_SECTORS=y
-CONFIG_SPI_INTEL_SPI=m
-CONFIG_SPI_INTEL_SPI_PCI=m
-CONFIG_SPI_INTEL_SPI_PLATFORM=m
-CONFIG_MTD_UBI=m
-CONFIG_MTD_UBI_WL_THRESHOLD=4096
-CONFIG_MTD_UBI_BEB_LIMIT=20
-CONFIG_MTD_UBI_FASTMAP=y
-CONFIG_MTD_UBI_GLUEBI=m
-CONFIG_MTD_UBI_BLOCK=y
-CONFIG_MTD_HYPERBUS=m
-CONFIG_DTC=y
-CONFIG_OF=y
-# CONFIG_OF_UNITTEST is not set
-CONFIG_OF_FLATTREE=y
-CONFIG_OF_KOBJ=y
-CONFIG_OF_DYNAMIC=y
-CONFIG_OF_ADDRESS=y
-CONFIG_OF_IRQ=y
-CONFIG_OF_NET=y
-CONFIG_OF_MDIO=m
-CONFIG_OF_RESOLVE=y
-CONFIG_OF_OVERLAY=y
-CONFIG_ARCH_MIGHT_HAVE_PC_PARPORT=y
-CONFIG_PARPORT=m
-CONFIG_PARPORT_PC=m
-CONFIG_PARPORT_SERIAL=m
-CONFIG_PARPORT_PC_FIFO=y
-CONFIG_PARPORT_PC_SUPERIO=y
-CONFIG_PARPORT_PC_PCMCIA=m
-CONFIG_PARPORT_AX88796=m
-CONFIG_PARPORT_1284=y
-CONFIG_PARPORT_NOT_PC=y
-CONFIG_PNP=y
-CONFIG_PNP_DEBUG_MESSAGES=y
-
-#
-# Protocols
-#
-CONFIG_PNPACPI=y
-CONFIG_BLK_DEV=y
-# CONFIG_BLK_DEV_NULL_BLK is not set
-CONFIG_BLK_DEV_FD=m
-CONFIG_CDROM=m
-# CONFIG_PARIDE is not set
-CONFIG_BLK_DEV_PCIESSD_MTIP32XX=m
-CONFIG_ZRAM=m
-CONFIG_ZRAM_WRITEBACK=y
-# CONFIG_ZRAM_MEMORY_TRACKING is not set
-CONFIG_BLK_DEV_UMEM=m
-CONFIG_BLK_DEV_LOOP=m
-CONFIG_BLK_DEV_LOOP_MIN_COUNT=8
-CONFIG_BLK_DEV_CRYPTOLOOP=m
-CONFIG_BLK_DEV_DRBD=m
-# CONFIG_DRBD_FAULT_INJECTION is not set
-CONFIG_BLK_DEV_NBD=m
-CONFIG_BLK_DEV_SKD=m
-CONFIG_BLK_DEV_SX8=m
-CONFIG_BLK_DEV_RAM=m
-CONFIG_BLK_DEV_RAM_COUNT=16
-CONFIG_BLK_DEV_RAM_SIZE=16384
-CONFIG_CDROM_PKTCDVD=m
-CONFIG_CDROM_PKTCDVD_BUFFERS=8
-# CONFIG_CDROM_PKTCDVD_WCACHE is not set
-CONFIG_ATA_OVER_ETH=m
-CONFIG_XEN_BLKDEV_FRONTEND=m
-CONFIG_XEN_BLKDEV_BACKEND=m
-CONFIG_VIRTIO_BLK=m
-CONFIG_BLK_DEV_RBD=m
-CONFIG_BLK_DEV_RSXX=m
-
-#
-# NVME Support
-#
-CONFIG_NVME_CORE=y
-CONFIG_BLK_DEV_NVME=y
-CONFIG_NVME_MULTIPATH=y
-CONFIG_NVME_HWMON=y
-CONFIG_NVME_FABRICS=m
-CONFIG_NVME_RDMA=m
-CONFIG_NVME_FC=m
-CONFIG_NVME_TCP=m
-CONFIG_NVME_TARGET=m
-CONFIG_NVME_TARGET_LOOP=m
-CONFIG_NVME_TARGET_RDMA=m
-CONFIG_NVME_TARGET_FC=m
-CONFIG_NVME_TARGET_FCLOOP=m
-CONFIG_NVME_TARGET_TCP=m
-# end of NVME Support
-
-#
-# Misc devices
-#
-CONFIG_SENSORS_LIS3LV02D=m
-CONFIG_AD525X_DPOT=m
-CONFIG_AD525X_DPOT_I2C=m
-CONFIG_AD525X_DPOT_SPI=m
-# CONFIG_DUMMY_IRQ is not set
-CONFIG_IBM_ASM=m
-CONFIG_PHANTOM=m
-CONFIG_TIFM_CORE=m
-CONFIG_TIFM_7XX1=m
-CONFIG_ICS932S401=m
-CONFIG_ENCLOSURE_SERVICES=m
-CONFIG_HP_ILO=m
-CONFIG_APDS9802ALS=m
-CONFIG_ISL29003=m
-CONFIG_ISL29020=m
-CONFIG_SENSORS_TSL2550=m
-CONFIG_SENSORS_BH1770=m
-CONFIG_SENSORS_APDS990X=m
-CONFIG_HMC6352=m
-CONFIG_DS1682=m
-CONFIG_VMWARE_BALLOON=m
-CONFIG_LATTICE_ECP3_CONFIG=m
-# CONFIG_SRAM is not set
-CONFIG_PCI_ENDPOINT_TEST=m
-CONFIG_XILINX_SDFEC=m
-CONFIG_MISC_RTSX=m
-CONFIG_PVPANIC=m
-CONFIG_C2PORT=m
-CONFIG_C2PORT_DURAMAR_2150=m
-
-#
-# EEPROM support
-#
-CONFIG_EEPROM_AT24=m
-# CONFIG_EEPROM_AT25 is not set
-CONFIG_EEPROM_LEGACY=m
-CONFIG_EEPROM_MAX6875=m
-CONFIG_EEPROM_93CX6=m
-# CONFIG_EEPROM_93XX46 is not set
-CONFIG_EEPROM_IDT_89HPESX=m
-CONFIG_EEPROM_EE1004=m
-# end of EEPROM support
-
-CONFIG_CB710_CORE=m
-# CONFIG_CB710_DEBUG is not set
-CONFIG_CB710_DEBUG_ASSUMPTIONS=y
-
-#
-# Texas Instruments shared transport line discipline
-#
-CONFIG_TI_ST=m
-# end of Texas Instruments shared transport line discipline
-
-CONFIG_SENSORS_LIS3_I2C=m
-CONFIG_ALTERA_STAPL=m
-CONFIG_INTEL_MEI=m
-CONFIG_INTEL_MEI_ME=m
-CONFIG_INTEL_MEI_TXE=m
-CONFIG_INTEL_MEI_HDCP=m
-CONFIG_VMWARE_VMCI=m
-
-#
-# Intel MIC & related support
-#
-CONFIG_INTEL_MIC_BUS=m
-CONFIG_SCIF_BUS=m
-CONFIG_VOP_BUS=m
-CONFIG_INTEL_MIC_HOST=m
-CONFIG_INTEL_MIC_CARD=m
-CONFIG_SCIF=m
-CONFIG_MIC_COSM=m
-CONFIG_VOP=m
-# end of Intel MIC & related support
-
-CONFIG_GENWQE=m
-CONFIG_GENWQE_PLATFORM_ERROR_RECOVERY=0
-CONFIG_ECHO=m
-CONFIG_MISC_ALCOR_PCI=m
-CONFIG_MISC_RTSX_PCI=m
-CONFIG_MISC_RTSX_USB=m
-CONFIG_HABANA_AI=m
-CONFIG_UACCE=m
-# end of Misc devices
-
-CONFIG_HAVE_IDE=y
-# CONFIG_IDE is not set
-
-#
-# SCSI device support
-#
-CONFIG_SCSI_MOD=y
-CONFIG_RAID_ATTRS=m
-CONFIG_SCSI=y
-CONFIG_SCSI_DMA=y
-CONFIG_SCSI_NETLINK=y
-CONFIG_SCSI_PROC_FS=y
-
-#
-# SCSI support type (disk, tape, CD-ROM)
-#
-CONFIG_BLK_DEV_SD=y
-CONFIG_CHR_DEV_ST=m
-CONFIG_BLK_DEV_SR=m
-CONFIG_CHR_DEV_SG=m
-CONFIG_CHR_DEV_SCH=m
-CONFIG_SCSI_ENCLOSURE=m
-CONFIG_SCSI_CONSTANTS=y
-CONFIG_SCSI_LOGGING=y
-CONFIG_SCSI_SCAN_ASYNC=y
-
-#
-# SCSI Transports
-#
-CONFIG_SCSI_SPI_ATTRS=m
-CONFIG_SCSI_FC_ATTRS=m
-CONFIG_SCSI_ISCSI_ATTRS=m
-CONFIG_SCSI_SAS_ATTRS=m
-CONFIG_SCSI_SAS_LIBSAS=m
-CONFIG_SCSI_SAS_ATA=y
-CONFIG_SCSI_SAS_HOST_SMP=y
-CONFIG_SCSI_SRP_ATTRS=m
-# end of SCSI Transports
-
-CONFIG_SCSI_LOWLEVEL=y
-CONFIG_ISCSI_TCP=m
-CONFIG_ISCSI_BOOT_SYSFS=m
-CONFIG_SCSI_CXGB3_ISCSI=m
-CONFIG_SCSI_CXGB4_ISCSI=m
-CONFIG_SCSI_BNX2_ISCSI=m
-CONFIG_SCSI_BNX2X_FCOE=m
-CONFIG_BE2ISCSI=m
-CONFIG_BLK_DEV_3W_XXXX_RAID=m
-CONFIG_SCSI_HPSA=m
-CONFIG_SCSI_3W_9XXX=m
-CONFIG_SCSI_3W_SAS=m
-CONFIG_SCSI_ACARD=m
-CONFIG_SCSI_AACRAID=m
-CONFIG_SCSI_AIC7XXX=m
-CONFIG_AIC7XXX_CMDS_PER_DEVICE=32
-CONFIG_AIC7XXX_RESET_DELAY_MS=15000
-CONFIG_AIC7XXX_DEBUG_ENABLE=y
-CONFIG_AIC7XXX_DEBUG_MASK=0
-CONFIG_AIC7XXX_REG_PRETTY_PRINT=y
-CONFIG_SCSI_AIC79XX=m
-CONFIG_AIC79XX_CMDS_PER_DEVICE=32
-CONFIG_AIC79XX_RESET_DELAY_MS=15000
-CONFIG_AIC79XX_DEBUG_ENABLE=y
-CONFIG_AIC79XX_DEBUG_MASK=0
-CONFIG_AIC79XX_REG_PRETTY_PRINT=y
-CONFIG_SCSI_AIC94XX=m
-CONFIG_AIC94XX_DEBUG=y
-CONFIG_SCSI_MVSAS=m
-CONFIG_SCSI_MVSAS_DEBUG=y
-CONFIG_SCSI_MVSAS_TASKLET=y
-CONFIG_SCSI_MVUMI=m
-CONFIG_SCSI_DPT_I2O=m
-CONFIG_SCSI_ADVANSYS=m
-CONFIG_SCSI_ARCMSR=m
-CONFIG_SCSI_ESAS2R=m
-CONFIG_MEGARAID_NEWGEN=y
-CONFIG_MEGARAID_MM=m
-CONFIG_MEGARAID_MAILBOX=m
-CONFIG_MEGARAID_LEGACY=m
-CONFIG_MEGARAID_SAS=m
-CONFIG_SCSI_MPT3SAS=m
-CONFIG_SCSI_MPT2SAS_MAX_SGE=128
-CONFIG_SCSI_MPT3SAS_MAX_SGE=128
-CONFIG_SCSI_MPT2SAS=m
-CONFIG_SCSI_SMARTPQI=m
-CONFIG_SCSI_UFSHCD=m
-CONFIG_SCSI_UFSHCD_PCI=m
-# CONFIG_SCSI_UFS_DWC_TC_PCI is not set
-CONFIG_SCSI_UFSHCD_PLATFORM=m
-CONFIG_SCSI_UFS_CDNS_PLATFORM=m
-# CONFIG_SCSI_UFS_DWC_TC_PLATFORM is not set
-CONFIG_SCSI_UFS_BSG=y
-CONFIG_SCSI_HPTIOP=m
-CONFIG_SCSI_BUSLOGIC=m
-CONFIG_SCSI_FLASHPOINT=y
-CONFIG_SCSI_MYRB=m
-CONFIG_SCSI_MYRS=m
-CONFIG_VMWARE_PVSCSI=m
-CONFIG_XEN_SCSI_FRONTEND=m
-CONFIG_HYPERV_STORAGE=m
-CONFIG_LIBFC=m
-CONFIG_LIBFCOE=m
-CONFIG_FCOE=m
-CONFIG_FCOE_FNIC=m
-CONFIG_SCSI_SNIC=m
-# CONFIG_SCSI_SNIC_DEBUG_FS is not set
-CONFIG_SCSI_DMX3191D=m
-CONFIG_SCSI_FDOMAIN=m
-CONFIG_SCSI_FDOMAIN_PCI=m
-CONFIG_SCSI_GDTH=m
-CONFIG_SCSI_ISCI=m
-CONFIG_SCSI_IPS=m
-CONFIG_SCSI_INITIO=m
-CONFIG_SCSI_INIA100=m
-CONFIG_SCSI_PPA=m
-CONFIG_SCSI_IMM=m
-# CONFIG_SCSI_IZIP_EPP16 is not set
-# CONFIG_SCSI_IZIP_SLOW_CTR is not set
-CONFIG_SCSI_STEX=m
-CONFIG_SCSI_SYM53C8XX_2=m
-CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1
-CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
-CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
-CONFIG_SCSI_SYM53C8XX_MMIO=y
-CONFIG_SCSI_IPR=m
-CONFIG_SCSI_IPR_TRACE=y
-CONFIG_SCSI_IPR_DUMP=y
-CONFIG_SCSI_QLOGIC_1280=m
-CONFIG_SCSI_QLA_FC=m
-CONFIG_TCM_QLA2XXX=m
-# CONFIG_TCM_QLA2XXX_DEBUG is not set
-CONFIG_SCSI_QLA_ISCSI=m
-CONFIG_QEDI=m
-CONFIG_QEDF=m
-CONFIG_SCSI_LPFC=m
-# CONFIG_SCSI_LPFC_DEBUG_FS is not set
-CONFIG_SCSI_DC395x=m
-CONFIG_SCSI_AM53C974=m
-CONFIG_SCSI_WD719X=m
-CONFIG_SCSI_DEBUG=m
-CONFIG_SCSI_PMCRAID=m
-CONFIG_SCSI_PM8001=m
-CONFIG_SCSI_BFA_FC=m
-CONFIG_SCSI_VIRTIO=m
-CONFIG_SCSI_CHELSIO_FCOE=m
-CONFIG_SCSI_LOWLEVEL_PCMCIA=y
-CONFIG_PCMCIA_AHA152X=m
-CONFIG_PCMCIA_FDOMAIN=m
-CONFIG_PCMCIA_QLOGIC=m
-CONFIG_PCMCIA_SYM53C500=m
-CONFIG_SCSI_DH=y
-CONFIG_SCSI_DH_RDAC=m
-CONFIG_SCSI_DH_HP_SW=m
-CONFIG_SCSI_DH_EMC=m
-CONFIG_SCSI_DH_ALUA=m
-# end of SCSI device support
-
-CONFIG_ATA=y
-CONFIG_SATA_HOST=y
-CONFIG_PATA_TIMINGS=y
-CONFIG_ATA_VERBOSE_ERROR=y
-CONFIG_ATA_FORCE=y
-CONFIG_ATA_ACPI=y
-CONFIG_SATA_ZPODD=y
-CONFIG_SATA_PMP=y
-
-#
-# Controllers with non-SFF native interface
-#
-CONFIG_SATA_AHCI=y
-CONFIG_SATA_MOBILE_LPM_POLICY=3
-CONFIG_SATA_AHCI_PLATFORM=m
-CONFIG_AHCI_CEVA=m
-CONFIG_AHCI_QORIQ=m
-CONFIG_SATA_INIC162X=m
-CONFIG_SATA_ACARD_AHCI=m
-CONFIG_SATA_SIL24=m
-CONFIG_ATA_SFF=y
-
-#
-# SFF controllers with custom DMA interface
-#
-CONFIG_PDC_ADMA=m
-CONFIG_SATA_QSTOR=m
-CONFIG_SATA_SX4=m
-CONFIG_ATA_BMDMA=y
-
-#
-# SATA SFF controllers with BMDMA
-#
-CONFIG_ATA_PIIX=m
-CONFIG_SATA_DWC=m
-# CONFIG_SATA_DWC_OLD_DMA is not set
-# CONFIG_SATA_DWC_DEBUG is not set
-CONFIG_SATA_MV=m
-CONFIG_SATA_NV=m
-CONFIG_SATA_PROMISE=m
-CONFIG_SATA_SIL=m
-CONFIG_SATA_SIS=m
-CONFIG_SATA_SVW=m
-CONFIG_SATA_ULI=m
-CONFIG_SATA_VIA=m
-CONFIG_SATA_VITESSE=m
-
-#
-# PATA SFF controllers with BMDMA
-#
-CONFIG_PATA_ALI=m
-CONFIG_PATA_AMD=m
-CONFIG_PATA_ARTOP=m
-CONFIG_PATA_ATIIXP=m
-CONFIG_PATA_ATP867X=m
-CONFIG_PATA_CMD64X=m
-CONFIG_PATA_CYPRESS=m
-CONFIG_PATA_EFAR=m
-CONFIG_PATA_HPT366=m
-CONFIG_PATA_HPT37X=m
-CONFIG_PATA_HPT3X2N=m
-CONFIG_PATA_HPT3X3=m
-CONFIG_PATA_HPT3X3_DMA=y
-CONFIG_PATA_IT8213=m
-CONFIG_PATA_IT821X=m
-CONFIG_PATA_JMICRON=m
-CONFIG_PATA_MARVELL=m
-CONFIG_PATA_NETCELL=m
-CONFIG_PATA_NINJA32=m
-CONFIG_PATA_NS87415=m
-CONFIG_PATA_OLDPIIX=m
-CONFIG_PATA_OPTIDMA=m
-CONFIG_PATA_PDC2027X=m
-CONFIG_PATA_PDC_OLD=m
-CONFIG_PATA_RADISYS=m
-CONFIG_PATA_RDC=m
-CONFIG_PATA_SCH=m
-CONFIG_PATA_SERVERWORKS=m
-CONFIG_PATA_SIL680=m
-CONFIG_PATA_SIS=m
-CONFIG_PATA_TOSHIBA=m
-CONFIG_PATA_TRIFLEX=m
-CONFIG_PATA_VIA=m
-CONFIG_PATA_WINBOND=m
-
-#
-# PIO-only SFF controllers
-#
-CONFIG_PATA_CMD640_PCI=m
-CONFIG_PATA_MPIIX=m
-CONFIG_PATA_NS87410=m
-CONFIG_PATA_OPTI=m
-CONFIG_PATA_PCMCIA=m
-# CONFIG_PATA_PLATFORM is not set
-CONFIG_PATA_RZ1000=m
-
-#
-# Generic fallback / legacy drivers
-#
-CONFIG_PATA_ACPI=m
-CONFIG_ATA_GENERIC=m
-CONFIG_PATA_LEGACY=m
-CONFIG_MD=y
-CONFIG_BLK_DEV_MD=m
-CONFIG_MD_LINEAR=m
-CONFIG_MD_RAID0=m
-CONFIG_MD_RAID1=m
-CONFIG_MD_RAID10=m
-CONFIG_MD_RAID456=m
-CONFIG_MD_MULTIPATH=m
-CONFIG_MD_FAULTY=m
-CONFIG_MD_CLUSTER=m
-CONFIG_BCACHE=m
-# CONFIG_BCACHE_DEBUG is not set
-# CONFIG_BCACHE_CLOSURES_DEBUG is not set
-CONFIG_BLK_DEV_DM_BUILTIN=y
-CONFIG_BLK_DEV_DM=m
-# CONFIG_DM_DEBUG is not set
-CONFIG_DM_BUFIO=m
-# CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING is not set
-CONFIG_DM_BIO_PRISON=m
-CONFIG_DM_PERSISTENT_DATA=m
-CONFIG_DM_UNSTRIPED=m
-CONFIG_DM_CRYPT=m
-CONFIG_DM_SNAPSHOT=m
-CONFIG_DM_THIN_PROVISIONING=m
-CONFIG_DM_CACHE=m
-CONFIG_DM_CACHE_SMQ=m
-CONFIG_DM_WRITECACHE=m
-CONFIG_DM_ERA=m
-CONFIG_DM_CLONE=m
-CONFIG_DM_MIRROR=m
-CONFIG_DM_LOG_USERSPACE=m
-CONFIG_DM_RAID=m
-CONFIG_DM_ZERO=m
-CONFIG_DM_MULTIPATH=m
-CONFIG_DM_MULTIPATH_QL=m
-CONFIG_DM_MULTIPATH_ST=m
-CONFIG_DM_DELAY=m
-CONFIG_DM_DUST=m
-CONFIG_DM_UEVENT=y
-CONFIG_DM_FLAKEY=m
-CONFIG_DM_VERITY=m
-CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG=y
-CONFIG_DM_VERITY_FEC=y
-CONFIG_DM_SWITCH=m
-CONFIG_DM_LOG_WRITES=m
-CONFIG_DM_INTEGRITY=m
-CONFIG_DM_ZONED=m
-CONFIG_TARGET_CORE=m
-CONFIG_TCM_IBLOCK=m
-CONFIG_TCM_FILEIO=m
-CONFIG_TCM_PSCSI=m
-CONFIG_TCM_USER2=m
-CONFIG_LOOPBACK_TARGET=m
-CONFIG_TCM_FC=m
-CONFIG_ISCSI_TARGET=m
-CONFIG_ISCSI_TARGET_CXGB4=m
-CONFIG_SBP_TARGET=m
-CONFIG_FUSION=y
-CONFIG_FUSION_SPI=m
-CONFIG_FUSION_FC=m
-CONFIG_FUSION_SAS=m
-CONFIG_FUSION_MAX_SGE=128
-CONFIG_FUSION_CTL=m
-CONFIG_FUSION_LAN=m
-# CONFIG_FUSION_LOGGING is not set
-
-#
-# IEEE 1394 (FireWire) support
-#
-CONFIG_FIREWIRE=m
-CONFIG_FIREWIRE_OHCI=m
-CONFIG_FIREWIRE_SBP2=m
-CONFIG_FIREWIRE_NET=m
-CONFIG_FIREWIRE_NOSY=m
-# end of IEEE 1394 (FireWire) support
-
-CONFIG_MACINTOSH_DRIVERS=y
-CONFIG_MAC_EMUMOUSEBTN=m
-CONFIG_NETDEVICES=y
-CONFIG_MII=m
-CONFIG_NET_CORE=y
-CONFIG_BONDING=m
-CONFIG_DUMMY=m
-CONFIG_WIREGUARD=m
-# CONFIG_WIREGUARD_DEBUG is not set
-CONFIG_EQUALIZER=m
-CONFIG_NET_FC=y
-CONFIG_IFB=m
-CONFIG_NET_TEAM=m
-CONFIG_NET_TEAM_MODE_BROADCAST=m
-CONFIG_NET_TEAM_MODE_ROUNDROBIN=m
-CONFIG_NET_TEAM_MODE_RANDOM=m
-CONFIG_NET_TEAM_MODE_ACTIVEBACKUP=m
-CONFIG_NET_TEAM_MODE_LOADBALANCE=m
-CONFIG_MACVLAN=m
-CONFIG_MACVTAP=m
-CONFIG_IPVLAN_L3S=y
-CONFIG_IPVLAN=m
-CONFIG_IPVTAP=m
-CONFIG_VXLAN=m
-CONFIG_GENEVE=m
-CONFIG_BAREUDP=m
-CONFIG_GTP=m
-CONFIG_MACSEC=m
-CONFIG_NETCONSOLE=m
-CONFIG_NETCONSOLE_DYNAMIC=y
-CONFIG_NETPOLL=y
-CONFIG_NET_POLL_CONTROLLER=y
-CONFIG_NTB_NETDEV=m
-CONFIG_RIONET=m
-CONFIG_RIONET_TX_SIZE=128
-CONFIG_RIONET_RX_SIZE=128
-CONFIG_TUN=m
-CONFIG_TAP=m
-# CONFIG_TUN_VNET_CROSS_LE is not set
-CONFIG_VETH=m
-CONFIG_VIRTIO_NET=m
-CONFIG_NLMON=m
-CONFIG_NET_VRF=m
-CONFIG_VSOCKMON=m
-CONFIG_SUNGEM_PHY=m
-# CONFIG_ARCNET is not set
-CONFIG_ATM_DRIVERS=y
-# CONFIG_ATM_DUMMY is not set
-CONFIG_ATM_TCP=m
-CONFIG_ATM_LANAI=m
-CONFIG_ATM_ENI=m
-# CONFIG_ATM_ENI_DEBUG is not set
-# CONFIG_ATM_ENI_TUNE_BURST is not set
-CONFIG_ATM_FIRESTREAM=m
-CONFIG_ATM_ZATM=m
-# CONFIG_ATM_ZATM_DEBUG is not set
-CONFIG_ATM_NICSTAR=m
-# CONFIG_ATM_NICSTAR_USE_SUNI is not set
-# CONFIG_ATM_NICSTAR_USE_IDT77105 is not set
-CONFIG_ATM_IDT77252=m
-# CONFIG_ATM_IDT77252_DEBUG is not set
-# CONFIG_ATM_IDT77252_RCV_ALL is not set
-CONFIG_ATM_IDT77252_USE_SUNI=y
-CONFIG_ATM_AMBASSADOR=m
-# CONFIG_ATM_AMBASSADOR_DEBUG is not set
-CONFIG_ATM_HORIZON=m
-# CONFIG_ATM_HORIZON_DEBUG is not set
-CONFIG_ATM_IA=m
-# CONFIG_ATM_IA_DEBUG is not set
-CONFIG_ATM_FORE200E=m
-CONFIG_ATM_FORE200E_USE_TASKLET=y
-CONFIG_ATM_FORE200E_TX_RETRY=16
-CONFIG_ATM_FORE200E_DEBUG=0
-CONFIG_ATM_HE=m
-CONFIG_ATM_HE_USE_SUNI=y
-CONFIG_ATM_SOLOS=m
-CONFIG_CAIF_DRIVERS=y
-CONFIG_CAIF_TTY=m
-CONFIG_CAIF_SPI_SLAVE=m
-CONFIG_CAIF_SPI_SYNC=y
-CONFIG_CAIF_HSI=m
-CONFIG_CAIF_VIRTIO=m
-
-#
-# Distributed Switch Architecture drivers
-#
-CONFIG_B53=m
-# CONFIG_B53_SPI_DRIVER is not set
-CONFIG_B53_MDIO_DRIVER=m
-CONFIG_B53_MMAP_DRIVER=m
-CONFIG_B53_SRAB_DRIVER=m
-CONFIG_B53_SERDES=m
-CONFIG_NET_DSA_BCM_SF2=m
-CONFIG_NET_DSA_LOOP=m
-CONFIG_NET_DSA_LANTIQ_GSWIP=m
-CONFIG_NET_DSA_MT7530=m
-CONFIG_NET_DSA_MV88E6060=m
-CONFIG_NET_DSA_MICROCHIP_KSZ_COMMON=m
-CONFIG_NET_DSA_MICROCHIP_KSZ9477=m
-CONFIG_NET_DSA_MICROCHIP_KSZ9477_I2C=m
-CONFIG_NET_DSA_MICROCHIP_KSZ9477_SPI=m
-CONFIG_NET_DSA_MICROCHIP_KSZ8795=m
-CONFIG_NET_DSA_MICROCHIP_KSZ8795_SPI=m
-CONFIG_NET_DSA_MV88E6XXX=m
-CONFIG_NET_DSA_MV88E6XXX_GLOBAL2=y
-CONFIG_NET_DSA_MV88E6XXX_PTP=y
-CONFIG_NET_DSA_AR9331=m
-CONFIG_NET_DSA_SJA1105=m
-CONFIG_NET_DSA_SJA1105_PTP=y
-CONFIG_NET_DSA_SJA1105_TAS=y
-CONFIG_NET_DSA_QCA8K=m
-CONFIG_NET_DSA_REALTEK_SMI=m
-CONFIG_NET_DSA_SMSC_LAN9303=m
-CONFIG_NET_DSA_SMSC_LAN9303_I2C=m
-CONFIG_NET_DSA_SMSC_LAN9303_MDIO=m
-CONFIG_NET_DSA_VITESSE_VSC73XX=m
-CONFIG_NET_DSA_VITESSE_VSC73XX_SPI=m
-CONFIG_NET_DSA_VITESSE_VSC73XX_PLATFORM=m
-# end of Distributed Switch Architecture drivers
-
-CONFIG_ETHERNET=y
-CONFIG_MDIO=m
-CONFIG_NET_VENDOR_3COM=y
-CONFIG_PCMCIA_3C574=m
-CONFIG_PCMCIA_3C589=m
-CONFIG_VORTEX=m
-CONFIG_TYPHOON=m
-CONFIG_NET_VENDOR_ADAPTEC=y
-CONFIG_ADAPTEC_STARFIRE=m
-CONFIG_NET_VENDOR_AGERE=y
-CONFIG_ET131X=m
-CONFIG_NET_VENDOR_ALACRITECH=y
-CONFIG_SLICOSS=m
-CONFIG_NET_VENDOR_ALTEON=y
-CONFIG_ACENIC=m
-# CONFIG_ACENIC_OMIT_TIGON_I is not set
-CONFIG_ALTERA_TSE=m
-CONFIG_NET_VENDOR_AMAZON=y
-CONFIG_ENA_ETHERNET=m
-CONFIG_NET_VENDOR_AMD=y
-CONFIG_AMD8111_ETH=m
-CONFIG_PCNET32=m
-CONFIG_PCMCIA_NMCLAN=m
-CONFIG_AMD_XGBE=m
-CONFIG_AMD_XGBE_DCB=y
-CONFIG_AMD_XGBE_HAVE_ECC=y
-CONFIG_NET_VENDOR_AQUANTIA=y
-CONFIG_AQTION=m
-CONFIG_NET_VENDOR_ARC=y
-CONFIG_NET_VENDOR_ATHEROS=y
-CONFIG_ATL2=m
-CONFIG_ATL1=m
-CONFIG_ATL1E=m
-CONFIG_ATL1C=m
-CONFIG_ALX=m
-CONFIG_NET_VENDOR_AURORA=y
-CONFIG_AURORA_NB8800=m
-CONFIG_NET_VENDOR_BROADCOM=y
-CONFIG_B44=m
-CONFIG_B44_PCI_AUTOSELECT=y
-CONFIG_B44_PCICORE_AUTOSELECT=y
-CONFIG_B44_PCI=y
-CONFIG_BCMGENET=m
-CONFIG_BNX2=m
-CONFIG_CNIC=m
-CONFIG_TIGON3=m
-CONFIG_TIGON3_HWMON=y
-CONFIG_BNX2X=m
-CONFIG_BNX2X_SRIOV=y
-CONFIG_SYSTEMPORT=m
-CONFIG_BNXT=m
-CONFIG_BNXT_SRIOV=y
-CONFIG_BNXT_FLOWER_OFFLOAD=y
-CONFIG_BNXT_DCB=y
-CONFIG_BNXT_HWMON=y
-CONFIG_NET_VENDOR_BROCADE=y
-CONFIG_BNA=m
-CONFIG_NET_VENDOR_CADENCE=y
-CONFIG_MACB=m
-CONFIG_MACB_USE_HWSTAMP=y
-CONFIG_MACB_PCI=m
-CONFIG_NET_VENDOR_CAVIUM=y
-CONFIG_THUNDER_NIC_PF=m
-CONFIG_THUNDER_NIC_VF=m
-CONFIG_THUNDER_NIC_BGX=m
-CONFIG_THUNDER_NIC_RGX=m
-CONFIG_CAVIUM_PTP=m
-CONFIG_LIQUIDIO=m
-CONFIG_LIQUIDIO_VF=m
-CONFIG_NET_VENDOR_CHELSIO=y
-CONFIG_CHELSIO_T1=m
-CONFIG_CHELSIO_T1_1G=y
-CONFIG_CHELSIO_T3=m
-CONFIG_CHELSIO_T4=m
-CONFIG_CHELSIO_T4_DCB=y
-CONFIG_CHELSIO_T4_FCOE=y
-CONFIG_CHELSIO_T4VF=m
-CONFIG_CHELSIO_LIB=m
-CONFIG_NET_VENDOR_CISCO=y
-CONFIG_ENIC=m
-CONFIG_NET_VENDOR_CORTINA=y
-CONFIG_GEMINI_ETHERNET=m
-CONFIG_CX_ECAT=m
-CONFIG_DNET=m
-CONFIG_NET_VENDOR_DEC=y
-CONFIG_NET_TULIP=y
-CONFIG_DE2104X=m
-CONFIG_DE2104X_DSL=0
-CONFIG_TULIP=m
-CONFIG_TULIP_MWI=y
-CONFIG_TULIP_MMIO=y
-CONFIG_TULIP_NAPI=y
-CONFIG_TULIP_NAPI_HW_MITIGATION=y
-CONFIG_DE4X5=m
-CONFIG_WINBOND_840=m
-CONFIG_DM9102=m
-CONFIG_ULI526X=m
-CONFIG_PCMCIA_XIRCOM=m
-CONFIG_NET_VENDOR_DLINK=y
-CONFIG_DL2K=m
-CONFIG_SUNDANCE=m
-# CONFIG_SUNDANCE_MMIO is not set
-CONFIG_NET_VENDOR_EMULEX=y
-CONFIG_BE2NET=m
-CONFIG_BE2NET_HWMON=y
-CONFIG_BE2NET_BE2=y
-CONFIG_BE2NET_BE3=y
-CONFIG_BE2NET_LANCER=y
-CONFIG_BE2NET_SKYHAWK=y
-CONFIG_NET_VENDOR_EZCHIP=y
-CONFIG_EZCHIP_NPS_MANAGEMENT_ENET=m
-CONFIG_NET_VENDOR_FUJITSU=y
-CONFIG_PCMCIA_FMVJ18X=m
-CONFIG_NET_VENDOR_GOOGLE=y
-CONFIG_GVE=m
-CONFIG_NET_VENDOR_HUAWEI=y
-CONFIG_HINIC=m
-CONFIG_NET_VENDOR_I825XX=y
-CONFIG_NET_VENDOR_INTEL=y
-CONFIG_E100=m
-CONFIG_E1000=m
-CONFIG_E1000E=m
-CONFIG_E1000E_HWTS=y
-CONFIG_IGB=m
-CONFIG_IGB_HWMON=y
-CONFIG_IGB_DCA=y
-CONFIG_IGBVF=m
-CONFIG_IXGB=m
-CONFIG_IXGBE=m
-CONFIG_IXGBE_HWMON=y
-CONFIG_IXGBE_DCA=y
-CONFIG_IXGBE_DCB=y
-# CONFIG_IXGBE_IPSEC is not set
-CONFIG_IXGBEVF=m
-CONFIG_IXGBEVF_IPSEC=y
-CONFIG_I40E=m
-CONFIG_I40E_DCB=y
-CONFIG_IAVF=m
-CONFIG_I40EVF=m
-CONFIG_ICE=m
-CONFIG_FM10K=m
-CONFIG_IGC=m
-CONFIG_JME=m
-CONFIG_NET_VENDOR_MARVELL=y
-CONFIG_MVMDIO=m
-CONFIG_SKGE=m
-# CONFIG_SKGE_DEBUG is not set
-CONFIG_SKGE_GENESIS=y
-CONFIG_SKY2=m
-# CONFIG_SKY2_DEBUG is not set
-CONFIG_NET_VENDOR_MELLANOX=y
-CONFIG_MLX4_EN=m
-CONFIG_MLX4_EN_DCB=y
-CONFIG_MLX4_CORE=m
-CONFIG_MLX4_DEBUG=y
-CONFIG_MLX4_CORE_GEN2=y
-CONFIG_MLX5_CORE=m
-CONFIG_MLX5_ACCEL=y
-CONFIG_MLX5_FPGA=y
-CONFIG_MLX5_CORE_EN=y
-CONFIG_MLX5_EN_ARFS=y
-CONFIG_MLX5_EN_RXNFC=y
-CONFIG_MLX5_MPFS=y
-CONFIG_MLX5_ESWITCH=y
-CONFIG_MLX5_TC_CT=y
-CONFIG_MLX5_CORE_EN_DCB=y
-CONFIG_MLX5_CORE_IPOIB=y
-CONFIG_MLX5_FPGA_IPSEC=y
-CONFIG_MLX5_EN_IPSEC=y
-CONFIG_MLX5_FPGA_TLS=y
-CONFIG_MLX5_TLS=y
-CONFIG_MLX5_EN_TLS=y
-CONFIG_MLX5_SW_STEERING=y
-CONFIG_MLXSW_CORE=m
-CONFIG_MLXSW_CORE_HWMON=y
-CONFIG_MLXSW_CORE_THERMAL=y
-CONFIG_MLXSW_PCI=m
-CONFIG_MLXSW_I2C=m
-CONFIG_MLXSW_SWITCHIB=m
-CONFIG_MLXSW_SWITCHX2=m
-CONFIG_MLXSW_SPECTRUM=m
-CONFIG_MLXSW_SPECTRUM_DCB=y
-CONFIG_MLXSW_MINIMAL=m
-CONFIG_MLXFW=m
-CONFIG_NET_VENDOR_MICREL=y
-CONFIG_KS8842=m
-CONFIG_KS8851=m
-CONFIG_KS8851_MLL=m
-CONFIG_KSZ884X_PCI=m
-CONFIG_NET_VENDOR_MICROCHIP=y
-CONFIG_ENC28J60=m
-# CONFIG_ENC28J60_WRITEVERIFY is not set
-CONFIG_ENCX24J600=m
-CONFIG_LAN743X=m
-CONFIG_NET_VENDOR_MICROSEMI=y
-CONFIG_MSCC_OCELOT_SWITCH=m
-CONFIG_MSCC_OCELOT_SWITCH_OCELOT=m
-CONFIG_NET_VENDOR_MYRI=y
-CONFIG_MYRI10GE=m
-CONFIG_MYRI10GE_DCA=y
-CONFIG_FEALNX=m
-CONFIG_NET_VENDOR_NATSEMI=y
-CONFIG_NATSEMI=m
-CONFIG_NS83820=m
-CONFIG_NET_VENDOR_NETERION=y
-CONFIG_S2IO=m
-CONFIG_VXGE=m
-# CONFIG_VXGE_DEBUG_TRACE_ALL is not set
-CONFIG_NET_VENDOR_NETRONOME=y
-CONFIG_NFP=m
-CONFIG_NFP_APP_FLOWER=y
-CONFIG_NFP_APP_ABM_NIC=y
-# CONFIG_NFP_DEBUG is not set
-CONFIG_NET_VENDOR_NI=y
-CONFIG_NI_XGE_MANAGEMENT_ENET=m
-CONFIG_NET_VENDOR_8390=y
-CONFIG_PCMCIA_AXNET=m
-CONFIG_NE2K_PCI=m
-CONFIG_PCMCIA_PCNET=m
-CONFIG_NET_VENDOR_NVIDIA=y
-CONFIG_FORCEDETH=m
-CONFIG_NET_VENDOR_OKI=y
-CONFIG_ETHOC=m
-CONFIG_NET_VENDOR_PACKET_ENGINES=y
-CONFIG_HAMACHI=m
-CONFIG_YELLOWFIN=m
-CONFIG_NET_VENDOR_PENSANDO=y
-CONFIG_IONIC=m
-CONFIG_NET_VENDOR_QLOGIC=y
-CONFIG_QLA3XXX=m
-CONFIG_QLCNIC=m
-CONFIG_QLCNIC_SRIOV=y
-CONFIG_QLCNIC_DCB=y
-CONFIG_QLCNIC_HWMON=y
-CONFIG_NETXEN_NIC=m
-CONFIG_QED=m
-CONFIG_QED_LL2=y
-CONFIG_QED_SRIOV=y
-CONFIG_QEDE=m
-CONFIG_QED_RDMA=y
-CONFIG_QED_ISCSI=y
-CONFIG_QED_FCOE=y
-CONFIG_QED_OOO=y
-CONFIG_NET_VENDOR_QUALCOMM=y
-CONFIG_QCA7000=m
-CONFIG_QCA7000_SPI=m
-CONFIG_QCA7000_UART=m
-CONFIG_QCOM_EMAC=m
-CONFIG_RMNET=m
-CONFIG_NET_VENDOR_RDC=y
-CONFIG_R6040=m
-CONFIG_NET_VENDOR_REALTEK=y
-CONFIG_ATP=m
-CONFIG_8139CP=m
-CONFIG_8139TOO=m
-# CONFIG_8139TOO_PIO is not set
-CONFIG_8139TOO_TUNE_TWISTER=y
-CONFIG_8139TOO_8129=y
-# CONFIG_8139_OLD_RX_RESET is not set
-CONFIG_R8169=m
-CONFIG_NET_VENDOR_RENESAS=y
-CONFIG_NET_VENDOR_ROCKER=y
-CONFIG_ROCKER=m
-CONFIG_NET_VENDOR_SAMSUNG=y
-CONFIG_SXGBE_ETH=m
-CONFIG_NET_VENDOR_SEEQ=y
-CONFIG_NET_VENDOR_SOLARFLARE=y
-CONFIG_SFC=m
-CONFIG_SFC_MTD=y
-CONFIG_SFC_MCDI_MON=y
-CONFIG_SFC_SRIOV=y
-CONFIG_SFC_MCDI_LOGGING=y
-CONFIG_SFC_FALCON=m
-CONFIG_SFC_FALCON_MTD=y
-CONFIG_NET_VENDOR_SILAN=y
-CONFIG_SC92031=m
-CONFIG_NET_VENDOR_SIS=y
-CONFIG_SIS900=m
-CONFIG_SIS190=m
-CONFIG_NET_VENDOR_SMSC=y
-CONFIG_PCMCIA_SMC91C92=m
-CONFIG_EPIC100=m
-CONFIG_SMSC911X=m
-CONFIG_SMSC9420=m
-CONFIG_NET_VENDOR_SOCIONEXT=y
-CONFIG_NET_VENDOR_STMICRO=y
-CONFIG_STMMAC_ETH=m
-# CONFIG_STMMAC_SELFTESTS is not set
-CONFIG_STMMAC_PLATFORM=m
-CONFIG_DWMAC_DWC_QOS_ETH=m
-CONFIG_DWMAC_GENERIC=m
-CONFIG_DWMAC_INTEL=m
-CONFIG_STMMAC_PCI=m
-CONFIG_NET_VENDOR_SUN=y
-CONFIG_HAPPYMEAL=m
-CONFIG_SUNGEM=m
-CONFIG_CASSINI=m
-CONFIG_NIU=m
-CONFIG_NET_VENDOR_SYNOPSYS=y
-CONFIG_DWC_XLGMAC=m
-CONFIG_DWC_XLGMAC_PCI=m
-CONFIG_NET_VENDOR_TEHUTI=y
-CONFIG_TEHUTI=m
-CONFIG_NET_VENDOR_TI=y
-# CONFIG_TI_CPSW_PHY_SEL is not set
-CONFIG_TLAN=m
-CONFIG_NET_VENDOR_VIA=y
-CONFIG_VIA_RHINE=m
-CONFIG_VIA_RHINE_MMIO=y
-CONFIG_VIA_VELOCITY=m
-CONFIG_NET_VENDOR_WIZNET=y
-CONFIG_WIZNET_W5100=m
-CONFIG_WIZNET_W5300=m
-# CONFIG_WIZNET_BUS_DIRECT is not set
-# CONFIG_WIZNET_BUS_INDIRECT is not set
-CONFIG_WIZNET_BUS_ANY=y
-CONFIG_WIZNET_W5100_SPI=m
-CONFIG_NET_VENDOR_XILINX=y
-CONFIG_XILINX_AXI_EMAC=m
-CONFIG_XILINX_LL_TEMAC=m
-CONFIG_NET_VENDOR_XIRCOM=y
-CONFIG_PCMCIA_XIRC2PS=m
-CONFIG_FDDI=m
-CONFIG_DEFXX=m
-CONFIG_DEFXX_MMIO=y
-CONFIG_SKFP=m
-# CONFIG_HIPPI is not set
-CONFIG_NET_SB1000=m
-CONFIG_MDIO_DEVICE=m
-CONFIG_MDIO_BUS=m
-CONFIG_MDIO_BCM_UNIMAC=m
-CONFIG_MDIO_BITBANG=m
-CONFIG_MDIO_BUS_MUX=m
-CONFIG_MDIO_BUS_MUX_GPIO=m
-CONFIG_MDIO_BUS_MUX_MMIOREG=m
-CONFIG_MDIO_BUS_MUX_MULTIPLEXER=m
-CONFIG_MDIO_CAVIUM=m
-CONFIG_MDIO_GPIO=m
-CONFIG_MDIO_HISI_FEMAC=m
-CONFIG_MDIO_I2C=m
-CONFIG_MDIO_IPQ8064=m
-CONFIG_MDIO_MSCC_MIIM=m
-CONFIG_MDIO_MVUSB=m
-CONFIG_MDIO_OCTEON=m
-CONFIG_MDIO_THUNDER=m
-CONFIG_MDIO_XPCS=m
-CONFIG_PHYLINK=m
-CONFIG_PHYLIB=m
-CONFIG_SWPHY=y
-CONFIG_LED_TRIGGER_PHY=y
-
-#
-# MII PHY device drivers
-#
-CONFIG_SFP=m
-CONFIG_ADIN_PHY=m
-CONFIG_AMD_PHY=m
-CONFIG_AQUANTIA_PHY=m
-CONFIG_AX88796B_PHY=m
-CONFIG_BCM7XXX_PHY=m
-CONFIG_BCM87XX_PHY=m
-CONFIG_BCM_NET_PHYLIB=m
-CONFIG_BROADCOM_PHY=m
-CONFIG_BCM84881_PHY=m
-CONFIG_CICADA_PHY=m
-CONFIG_CORTINA_PHY=m
-CONFIG_DAVICOM_PHY=m
-CONFIG_DP83822_PHY=m
-CONFIG_DP83TC811_PHY=m
-CONFIG_DP83848_PHY=m
-CONFIG_DP83867_PHY=m
-CONFIG_DP83869_PHY=m
-CONFIG_FIXED_PHY=m
-CONFIG_ICPLUS_PHY=m
-CONFIG_INTEL_XWAY_PHY=m
-CONFIG_LSI_ET1011C_PHY=m
-CONFIG_LXT_PHY=m
-CONFIG_MARVELL_PHY=m
-CONFIG_MARVELL_10G_PHY=m
-CONFIG_MICREL_PHY=m
-CONFIG_MICROCHIP_PHY=m
-CONFIG_MICROCHIP_T1_PHY=m
-CONFIG_MICROSEMI_PHY=m
-CONFIG_NATIONAL_PHY=m
-CONFIG_NXP_TJA11XX_PHY=m
-CONFIG_AT803X_PHY=m
-CONFIG_QSEMI_PHY=m
-CONFIG_REALTEK_PHY=m
-CONFIG_RENESAS_PHY=m
-CONFIG_ROCKCHIP_PHY=m
-CONFIG_SMSC_PHY=m
-CONFIG_STE10XP=m
-CONFIG_TERANETICS_PHY=m
-CONFIG_VITESSE_PHY=m
-CONFIG_XILINX_GMII2RGMII=m
-CONFIG_MICREL_KS8995MA=m
-CONFIG_PLIP=m
-CONFIG_PPP=m
-CONFIG_PPP_BSDCOMP=m
-CONFIG_PPP_DEFLATE=m
-CONFIG_PPP_FILTER=y
-CONFIG_PPP_MPPE=m
-CONFIG_PPP_MULTILINK=y
-CONFIG_PPPOATM=m
-CONFIG_PPPOE=m
-CONFIG_PPTP=m
-CONFIG_PPPOL2TP=m
-CONFIG_PPP_ASYNC=m
-CONFIG_PPP_SYNC_TTY=m
-CONFIG_SLIP=m
-CONFIG_SLHC=m
-CONFIG_SLIP_COMPRESSED=y
-CONFIG_SLIP_SMART=y
-CONFIG_SLIP_MODE_SLIP6=y
-CONFIG_USB_NET_DRIVERS=m
-CONFIG_USB_CATC=m
-CONFIG_USB_KAWETH=m
-CONFIG_USB_PEGASUS=m
-CONFIG_USB_RTL8150=m
-CONFIG_USB_RTL8152=m
-CONFIG_USB_LAN78XX=m
-CONFIG_USB_USBNET=m
-CONFIG_USB_NET_AX8817X=m
-CONFIG_USB_NET_AX88179_178A=m
-CONFIG_USB_NET_CDCETHER=m
-CONFIG_USB_NET_CDC_EEM=m
-CONFIG_USB_NET_CDC_NCM=m
-CONFIG_USB_NET_HUAWEI_CDC_NCM=m
-CONFIG_USB_NET_CDC_MBIM=m
-CONFIG_USB_NET_DM9601=m
-CONFIG_USB_NET_SR9700=m
-CONFIG_USB_NET_SR9800=m
-CONFIG_USB_NET_SMSC75XX=m
-CONFIG_USB_NET_SMSC95XX=m
-CONFIG_USB_NET_GL620A=m
-CONFIG_USB_NET_NET1080=m
-CONFIG_USB_NET_PLUSB=m
-CONFIG_USB_NET_MCS7830=m
-CONFIG_USB_NET_RNDIS_HOST=m
-CONFIG_USB_NET_CDC_SUBSET_ENABLE=m
-CONFIG_USB_NET_CDC_SUBSET=m
-CONFIG_USB_ALI_M5632=y
-CONFIG_USB_AN2720=y
-CONFIG_USB_BELKIN=y
-CONFIG_USB_ARMLINUX=y
-CONFIG_USB_EPSON2888=y
-CONFIG_USB_KC2190=y
-CONFIG_USB_NET_ZAURUS=m
-CONFIG_USB_NET_CX82310_ETH=m
-CONFIG_USB_NET_KALMIA=m
-CONFIG_USB_NET_QMI_WWAN=m
-CONFIG_USB_HSO=m
-CONFIG_USB_NET_INT51X1=m
-CONFIG_USB_CDC_PHONET=m
-CONFIG_USB_IPHETH=m
-CONFIG_USB_SIERRA_NET=m
-CONFIG_USB_VL600=m
-CONFIG_USB_NET_CH9200=m
-CONFIG_USB_NET_AQC111=m
-CONFIG_WLAN=y
-# CONFIG_WIRELESS_WDS is not set
-CONFIG_WLAN_VENDOR_ADMTEK=y
-CONFIG_ADM8211=m
-CONFIG_ATH_COMMON=m
-CONFIG_WLAN_VENDOR_ATH=y
-# CONFIG_ATH_DEBUG is not set
-CONFIG_ATH5K=m
-# CONFIG_ATH5K_DEBUG is not set
-# CONFIG_ATH5K_TRACER is not set
-CONFIG_ATH5K_PCI=y
-CONFIG_ATH9K_HW=m
-CONFIG_ATH9K_COMMON=m
-CONFIG_ATH9K_BTCOEX_SUPPORT=y
-CONFIG_ATH9K=m
-CONFIG_ATH9K_PCI=y
-CONFIG_ATH9K_AHB=y
-# CONFIG_ATH9K_DEBUGFS is not set
-CONFIG_ATH9K_DYNACK=y
-CONFIG_ATH9K_WOW=y
-CONFIG_ATH9K_RFKILL=y
-CONFIG_ATH9K_CHANNEL_CONTEXT=y
-CONFIG_ATH9K_PCOEM=y
-CONFIG_ATH9K_PCI_NO_EEPROM=m
-CONFIG_ATH9K_HTC=m
-# CONFIG_ATH9K_HTC_DEBUGFS is not set
-CONFIG_ATH9K_HWRNG=y
-CONFIG_CARL9170=m
-CONFIG_CARL9170_LEDS=y
-CONFIG_CARL9170_WPC=y
-# CONFIG_CARL9170_HWRNG is not set
-CONFIG_ATH6KL=m
-CONFIG_ATH6KL_SDIO=m
-CONFIG_ATH6KL_USB=m
-# CONFIG_ATH6KL_DEBUG is not set
-# CONFIG_ATH6KL_TRACING is not set
-CONFIG_AR5523=m
-CONFIG_WIL6210=m
-CONFIG_WIL6210_ISR_COR=y
-CONFIG_WIL6210_TRACING=y
-# CONFIG_WIL6210_DEBUGFS is not set
-CONFIG_ATH10K=m
-CONFIG_ATH10K_CE=y
-CONFIG_ATH10K_PCI=m
-CONFIG_ATH10K_AHB=y
-CONFIG_ATH10K_SDIO=m
-CONFIG_ATH10K_USB=m
-# CONFIG_ATH10K_DEBUG is not set
-# CONFIG_ATH10K_DEBUGFS is not set
-# CONFIG_ATH10K_TRACING is not set
-CONFIG_WCN36XX=m
-# CONFIG_WCN36XX_DEBUGFS is not set
-CONFIG_WLAN_VENDOR_ATMEL=y
-CONFIG_ATMEL=m
-CONFIG_PCI_ATMEL=m
-CONFIG_PCMCIA_ATMEL=m
-CONFIG_AT76C50X_USB=m
-CONFIG_WLAN_VENDOR_BROADCOM=y
-CONFIG_B43=m
-CONFIG_B43_BCMA=y
-CONFIG_B43_SSB=y
-CONFIG_B43_BUSES_BCMA_AND_SSB=y
-# CONFIG_B43_BUSES_BCMA is not set
-# CONFIG_B43_BUSES_SSB is not set
-CONFIG_B43_PCI_AUTOSELECT=y
-CONFIG_B43_PCICORE_AUTOSELECT=y
-CONFIG_B43_SDIO=y
-CONFIG_B43_BCMA_PIO=y
-CONFIG_B43_PIO=y
-CONFIG_B43_PHY_G=y
-CONFIG_B43_PHY_N=y
-CONFIG_B43_PHY_LP=y
-CONFIG_B43_PHY_HT=y
-CONFIG_B43_LEDS=y
-CONFIG_B43_HWRNG=y
-# CONFIG_B43_DEBUG is not set
-CONFIG_B43LEGACY=m
-CONFIG_B43LEGACY_PCI_AUTOSELECT=y
-CONFIG_B43LEGACY_PCICORE_AUTOSELECT=y
-CONFIG_B43LEGACY_LEDS=y
-CONFIG_B43LEGACY_HWRNG=y
-CONFIG_B43LEGACY_DEBUG=y
-CONFIG_B43LEGACY_DMA=y
-CONFIG_B43LEGACY_PIO=y
-CONFIG_B43LEGACY_DMA_AND_PIO_MODE=y
-# CONFIG_B43LEGACY_DMA_MODE is not set
-# CONFIG_B43LEGACY_PIO_MODE is not set
-CONFIG_BRCMUTIL=m
-CONFIG_BRCMSMAC=m
-CONFIG_BRCMFMAC=m
-CONFIG_BRCMFMAC_PROTO_BCDC=y
-CONFIG_BRCMFMAC_PROTO_MSGBUF=y
-CONFIG_BRCMFMAC_SDIO=y
-CONFIG_BRCMFMAC_USB=y
-CONFIG_BRCMFMAC_PCIE=y
-# CONFIG_BRCM_TRACING is not set
-CONFIG_BRCMDBG=y
-CONFIG_WLAN_VENDOR_CISCO=y
-CONFIG_AIRO=m
-CONFIG_AIRO_CS=m
-CONFIG_WLAN_VENDOR_INTEL=y
-CONFIG_IPW2100=m
-CONFIG_IPW2100_MONITOR=y
-# CONFIG_IPW2100_DEBUG is not set
-CONFIG_IPW2200=m
-CONFIG_IPW2200_MONITOR=y
-CONFIG_IPW2200_RADIOTAP=y
-CONFIG_IPW2200_PROMISCUOUS=y
-CONFIG_IPW2200_QOS=y
-# CONFIG_IPW2200_DEBUG is not set
-CONFIG_LIBIPW=m
-# CONFIG_LIBIPW_DEBUG is not set
-CONFIG_IWLEGACY=m
-CONFIG_IWL4965=m
-CONFIG_IWL3945=m
-
-#
-# iwl3945 / iwl4965 Debugging Options
-#
-# CONFIG_IWLEGACY_DEBUG is not set
-# end of iwl3945 / iwl4965 Debugging Options
-
-CONFIG_IWLWIFI=m
-CONFIG_IWLWIFI_LEDS=y
-CONFIG_IWLDVM=m
-CONFIG_IWLMVM=m
-CONFIG_IWLWIFI_OPMODE_MODULAR=y
-# CONFIG_IWLWIFI_BCAST_FILTERING is not set
-
-#
-# Debugging Options
-#
-# CONFIG_IWLWIFI_DEBUG is not set
-# CONFIG_IWLWIFI_DEVICE_TRACING is not set
-# end of Debugging Options
-
-CONFIG_WLAN_VENDOR_INTERSIL=y
-CONFIG_HOSTAP=m
-CONFIG_HOSTAP_FIRMWARE=y
-CONFIG_HOSTAP_FIRMWARE_NVRAM=y
-CONFIG_HOSTAP_PLX=m
-CONFIG_HOSTAP_PCI=m
-CONFIG_HOSTAP_CS=m
-CONFIG_HERMES=m
-CONFIG_HERMES_PRISM=y
-CONFIG_HERMES_CACHE_FW_ON_INIT=y
-CONFIG_PLX_HERMES=m
-CONFIG_TMD_HERMES=m
-CONFIG_NORTEL_HERMES=m
-CONFIG_PCI_HERMES=m
-CONFIG_PCMCIA_HERMES=m
-CONFIG_PCMCIA_SPECTRUM=m
-CONFIG_ORINOCO_USB=m
-CONFIG_P54_COMMON=m
-CONFIG_P54_USB=m
-CONFIG_P54_PCI=m
-CONFIG_P54_SPI=m
-# CONFIG_P54_SPI_DEFAULT_EEPROM is not set
-CONFIG_P54_LEDS=y
-CONFIG_PRISM54=m
-CONFIG_WLAN_VENDOR_MARVELL=y
-CONFIG_LIBERTAS=m
-CONFIG_LIBERTAS_USB=m
-CONFIG_LIBERTAS_CS=m
-CONFIG_LIBERTAS_SDIO=m
-CONFIG_LIBERTAS_SPI=m
-# CONFIG_LIBERTAS_DEBUG is not set
-CONFIG_LIBERTAS_MESH=y
-CONFIG_LIBERTAS_THINFIRM=m
-# CONFIG_LIBERTAS_THINFIRM_DEBUG is not set
-CONFIG_LIBERTAS_THINFIRM_USB=m
-CONFIG_MWIFIEX=m
-CONFIG_MWIFIEX_SDIO=m
-CONFIG_MWIFIEX_PCIE=m
-CONFIG_MWIFIEX_USB=m
-CONFIG_MWL8K=m
-CONFIG_WLAN_VENDOR_MEDIATEK=y
-CONFIG_MT7601U=m
-CONFIG_MT76_CORE=m
-CONFIG_MT76_LEDS=y
-CONFIG_MT76_USB=m
-CONFIG_MT76x02_LIB=m
-CONFIG_MT76x02_USB=m
-CONFIG_MT76x0_COMMON=m
-CONFIG_MT76x0U=m
-CONFIG_MT76x0E=m
-CONFIG_MT76x2_COMMON=m
-CONFIG_MT76x2E=m
-CONFIG_MT76x2U=m
-CONFIG_MT7603E=m
-CONFIG_MT7615E=m
-CONFIG_WLAN_VENDOR_RALINK=y
-CONFIG_RT2X00=m
-CONFIG_RT2400PCI=m
-CONFIG_RT2500PCI=m
-CONFIG_RT61PCI=m
-CONFIG_RT2800PCI=m
-CONFIG_RT2800PCI_RT33XX=y
-CONFIG_RT2800PCI_RT35XX=y
-CONFIG_RT2800PCI_RT53XX=y
-CONFIG_RT2800PCI_RT3290=y
-CONFIG_RT2500USB=m
-CONFIG_RT73USB=m
-CONFIG_RT2800USB=m
-CONFIG_RT2800USB_RT33XX=y
-CONFIG_RT2800USB_RT35XX=y
-CONFIG_RT2800USB_RT3573=y
-CONFIG_RT2800USB_RT53XX=y
-CONFIG_RT2800USB_RT55XX=y
-CONFIG_RT2800USB_UNKNOWN=y
-CONFIG_RT2800_LIB=m
-CONFIG_RT2800_LIB_MMIO=m
-CONFIG_RT2X00_LIB_MMIO=m
-CONFIG_RT2X00_LIB_PCI=m
-CONFIG_RT2X00_LIB_USB=m
-CONFIG_RT2X00_LIB=m
-CONFIG_RT2X00_LIB_FIRMWARE=y
-CONFIG_RT2X00_LIB_CRYPTO=y
-CONFIG_RT2X00_LIB_LEDS=y
-# CONFIG_RT2X00_DEBUG is not set
-CONFIG_WLAN_VENDOR_REALTEK=y
-CONFIG_RTL8180=m
-CONFIG_RTL8187=m
-CONFIG_RTL8187_LEDS=y
-CONFIG_RTL_CARDS=m
-CONFIG_RTL8192CE=m
-CONFIG_RTL8192SE=m
-CONFIG_RTL8192DE=m
-CONFIG_RTL8723AE=m
-CONFIG_RTL8723BE=m
-CONFIG_RTL8188EE=m
-CONFIG_RTL8192EE=m
-CONFIG_RTL8821AE=m
-CONFIG_RTL8192CU=m
-CONFIG_RTLWIFI=m
-CONFIG_RTLWIFI_PCI=m
-CONFIG_RTLWIFI_USB=m
-CONFIG_RTLWIFI_DEBUG=y
-CONFIG_RTL8192C_COMMON=m
-CONFIG_RTL8723_COMMON=m
-CONFIG_RTLBTCOEXIST=m
-CONFIG_RTL8XXXU=m
-CONFIG_RTL8XXXU_UNTESTED=y
-CONFIG_RTW88=m
-CONFIG_RTW88_CORE=m
-CONFIG_RTW88_PCI=m
-CONFIG_RTW88_8822BE=y
-CONFIG_RTW88_8822CE=y
-# CONFIG_RTW88_DEBUG is not set
-# CONFIG_RTW88_DEBUGFS is not set
-CONFIG_WLAN_VENDOR_RSI=y
-CONFIG_RSI_91X=m
-# CONFIG_RSI_DEBUGFS is not set
-CONFIG_RSI_SDIO=m
-CONFIG_RSI_USB=m
-CONFIG_RSI_COEX=y
-CONFIG_WLAN_VENDOR_ST=y
-CONFIG_CW1200=m
-CONFIG_CW1200_WLAN_SDIO=m
-CONFIG_CW1200_WLAN_SPI=m
-CONFIG_WLAN_VENDOR_TI=y
-CONFIG_WL1251=m
-CONFIG_WL1251_SPI=m
-CONFIG_WL1251_SDIO=m
-CONFIG_WL12XX=m
-CONFIG_WL18XX=m
-CONFIG_WLCORE=m
-CONFIG_WLCORE_SPI=m
-CONFIG_WLCORE_SDIO=m
-CONFIG_WILINK_PLATFORM_DATA=y
-CONFIG_WLAN_VENDOR_ZYDAS=y
-CONFIG_USB_ZD1201=m
-CONFIG_ZD1211RW=m
-# CONFIG_ZD1211RW_DEBUG is not set
-CONFIG_WLAN_VENDOR_QUANTENNA=y
-CONFIG_QTNFMAC=m
-CONFIG_QTNFMAC_PCIE=m
-CONFIG_PCMCIA_RAYCS=m
-CONFIG_PCMCIA_WL3501=m
-CONFIG_MAC80211_HWSIM=m
-CONFIG_USB_NET_RNDIS_WLAN=m
-CONFIG_VIRT_WIFI=m
-
-#
-# WiMAX Wireless Broadband devices
-#
-CONFIG_WIMAX_I2400M=m
-CONFIG_WIMAX_I2400M_USB=m
-CONFIG_WIMAX_I2400M_DEBUG_LEVEL=8
-# end of WiMAX Wireless Broadband devices
-
-# CONFIG_WAN is not set
-CONFIG_IEEE802154_DRIVERS=m
-CONFIG_IEEE802154_FAKELB=m
-CONFIG_IEEE802154_AT86RF230=m
-# CONFIG_IEEE802154_AT86RF230_DEBUGFS is not set
-CONFIG_IEEE802154_MRF24J40=m
-CONFIG_IEEE802154_CC2520=m
-CONFIG_IEEE802154_ATUSB=m
-CONFIG_IEEE802154_ADF7242=m
-CONFIG_IEEE802154_CA8210=m
-# CONFIG_IEEE802154_CA8210_DEBUGFS is not set
-CONFIG_IEEE802154_MCR20A=m
-CONFIG_IEEE802154_HWSIM=m
-CONFIG_XEN_NETDEV_FRONTEND=m
-CONFIG_XEN_NETDEV_BACKEND=m
-CONFIG_VMXNET3=m
-CONFIG_FUJITSU_ES=m
-CONFIG_USB4_NET=m
-CONFIG_HYPERV_NET=m
-CONFIG_NETDEVSIM=m
-CONFIG_NET_FAILOVER=m
-CONFIG_ISDN=y
-CONFIG_ISDN_CAPI=y
-CONFIG_CAPI_TRACE=y
-CONFIG_ISDN_CAPI_MIDDLEWARE=y
-CONFIG_MISDN=m
-CONFIG_MISDN_DSP=m
-CONFIG_MISDN_L1OIP=m
-
-#
-# mISDN hardware drivers
-#
-CONFIG_MISDN_HFCPCI=m
-CONFIG_MISDN_HFCMULTI=m
-CONFIG_MISDN_HFCUSB=m
-CONFIG_MISDN_AVMFRITZ=m
-CONFIG_MISDN_SPEEDFAX=m
-CONFIG_MISDN_INFINEON=m
-CONFIG_MISDN_W6692=m
-CONFIG_MISDN_NETJET=m
-CONFIG_MISDN_HDLC=m
-CONFIG_MISDN_IPAC=m
-CONFIG_MISDN_ISAR=m
-CONFIG_NVM=y
-CONFIG_NVM_PBLK=m
-# CONFIG_NVM_PBLK_DEBUG is not set
-
-#
-# Input device support
-#
-CONFIG_INPUT=y
-CONFIG_INPUT_LEDS=m
-CONFIG_INPUT_FF_MEMLESS=m
-CONFIG_INPUT_POLLDEV=m
-CONFIG_INPUT_SPARSEKMAP=m
-CONFIG_INPUT_MATRIXKMAP=m
-
-#
-# Userland interfaces
-#
-CONFIG_INPUT_MOUSEDEV=m
-CONFIG_INPUT_MOUSEDEV_PSAUX=y
-CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
-CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
-CONFIG_INPUT_JOYDEV=m
-CONFIG_INPUT_EVDEV=m
-# CONFIG_INPUT_EVBUG is not set
-
-#
-# Input Device Drivers
-#
-CONFIG_INPUT_KEYBOARD=y
-CONFIG_KEYBOARD_ADC=m
-CONFIG_KEYBOARD_ADP5520=m
-CONFIG_KEYBOARD_ADP5588=m
-CONFIG_KEYBOARD_ADP5589=m
-CONFIG_KEYBOARD_APPLESPI=m
-CONFIG_KEYBOARD_ATKBD=m
-CONFIG_KEYBOARD_QT1050=m
-CONFIG_KEYBOARD_QT1070=m
-CONFIG_KEYBOARD_QT2160=m
-CONFIG_KEYBOARD_DLINK_DIR685=m
-CONFIG_KEYBOARD_LKKBD=m
-CONFIG_KEYBOARD_GPIO=m
-CONFIG_KEYBOARD_GPIO_POLLED=m
-CONFIG_KEYBOARD_TCA6416=m
-CONFIG_KEYBOARD_TCA8418=m
-CONFIG_KEYBOARD_MATRIX=m
-CONFIG_KEYBOARD_LM8323=m
-CONFIG_KEYBOARD_LM8333=m
-CONFIG_KEYBOARD_MAX7359=m
-CONFIG_KEYBOARD_MCS=m
-CONFIG_KEYBOARD_MPR121=m
-CONFIG_KEYBOARD_NEWTON=m
-CONFIG_KEYBOARD_OPENCORES=m
-CONFIG_KEYBOARD_SAMSUNG=m
-CONFIG_KEYBOARD_STOWAWAY=m
-CONFIG_KEYBOARD_SUNKBD=m
-CONFIG_KEYBOARD_STMPE=m
-CONFIG_KEYBOARD_IQS62X=m
-CONFIG_KEYBOARD_OMAP4=m
-CONFIG_KEYBOARD_TC3589X=m
-CONFIG_KEYBOARD_TM2_TOUCHKEY=m
-CONFIG_KEYBOARD_TWL4030=m
-CONFIG_KEYBOARD_XTKBD=m
-CONFIG_KEYBOARD_CROS_EC=m
-CONFIG_KEYBOARD_CAP11XX=m
-CONFIG_KEYBOARD_BCM=m
-CONFIG_KEYBOARD_MTK_PMIC=m
-CONFIG_INPUT_MOUSE=y
-CONFIG_MOUSE_PS2=m
-CONFIG_MOUSE_PS2_ALPS=y
-CONFIG_MOUSE_PS2_BYD=y
-CONFIG_MOUSE_PS2_LOGIPS2PP=y
-CONFIG_MOUSE_PS2_SYNAPTICS=y
-CONFIG_MOUSE_PS2_SYNAPTICS_SMBUS=y
-CONFIG_MOUSE_PS2_CYPRESS=y
-CONFIG_MOUSE_PS2_LIFEBOOK=y
-CONFIG_MOUSE_PS2_TRACKPOINT=y
-CONFIG_MOUSE_PS2_ELANTECH=y
-CONFIG_MOUSE_PS2_ELANTECH_SMBUS=y
-CONFIG_MOUSE_PS2_SENTELIC=y
-CONFIG_MOUSE_PS2_TOUCHKIT=y
-CONFIG_MOUSE_PS2_FOCALTECH=y
-CONFIG_MOUSE_PS2_VMMOUSE=y
-CONFIG_MOUSE_PS2_SMBUS=y
-CONFIG_MOUSE_SERIAL=m
-CONFIG_MOUSE_APPLETOUCH=m
-CONFIG_MOUSE_BCM5974=m
-CONFIG_MOUSE_CYAPA=m
-CONFIG_MOUSE_ELAN_I2C=m
-CONFIG_MOUSE_ELAN_I2C_I2C=y
-CONFIG_MOUSE_ELAN_I2C_SMBUS=y
-CONFIG_MOUSE_VSXXXAA=m
-CONFIG_MOUSE_GPIO=m
-CONFIG_MOUSE_SYNAPTICS_I2C=m
-CONFIG_MOUSE_SYNAPTICS_USB=m
-CONFIG_INPUT_JOYSTICK=y
-CONFIG_JOYSTICK_ANALOG=m
-CONFIG_JOYSTICK_A3D=m
-CONFIG_JOYSTICK_ADI=m
-CONFIG_JOYSTICK_COBRA=m
-CONFIG_JOYSTICK_GF2K=m
-CONFIG_JOYSTICK_GRIP=m
-CONFIG_JOYSTICK_GRIP_MP=m
-CONFIG_JOYSTICK_GUILLEMOT=m
-CONFIG_JOYSTICK_INTERACT=m
-CONFIG_JOYSTICK_SIDEWINDER=m
-CONFIG_JOYSTICK_TMDC=m
-CONFIG_JOYSTICK_IFORCE=m
-CONFIG_JOYSTICK_IFORCE_USB=m
-CONFIG_JOYSTICK_IFORCE_232=m
-CONFIG_JOYSTICK_WARRIOR=m
-CONFIG_JOYSTICK_MAGELLAN=m
-CONFIG_JOYSTICK_SPACEORB=m
-CONFIG_JOYSTICK_SPACEBALL=m
-CONFIG_JOYSTICK_STINGER=m
-CONFIG_JOYSTICK_TWIDJOY=m
-CONFIG_JOYSTICK_ZHENHUA=m
-CONFIG_JOYSTICK_DB9=m
-CONFIG_JOYSTICK_GAMECON=m
-CONFIG_JOYSTICK_TURBOGRAFX=m
-CONFIG_JOYSTICK_AS5011=m
-CONFIG_JOYSTICK_JOYDUMP=m
-CONFIG_JOYSTICK_XPAD=m
-CONFIG_JOYSTICK_XPAD_FF=y
-CONFIG_JOYSTICK_XPAD_LEDS=y
-CONFIG_JOYSTICK_WALKERA0701=m
-CONFIG_JOYSTICK_PSXPAD_SPI=m
-CONFIG_JOYSTICK_PSXPAD_SPI_FF=y
-CONFIG_JOYSTICK_PXRC=m
-CONFIG_JOYSTICK_FSIA6B=m
-CONFIG_INPUT_TABLET=y
-CONFIG_TABLET_USB_ACECAD=m
-CONFIG_TABLET_USB_AIPTEK=m
-CONFIG_TABLET_USB_GTCO=m
-CONFIG_TABLET_USB_HANWANG=m
-CONFIG_TABLET_USB_KBTAB=m
-CONFIG_TABLET_USB_PEGASUS=m
-CONFIG_TABLET_SERIAL_WACOM4=m
-CONFIG_INPUT_TOUCHSCREEN=y
-CONFIG_TOUCHSCREEN_PROPERTIES=y
-CONFIG_TOUCHSCREEN_88PM860X=m
-CONFIG_TOUCHSCREEN_ADS7846=m
-CONFIG_TOUCHSCREEN_AD7877=m
-CONFIG_TOUCHSCREEN_AD7879=m
-CONFIG_TOUCHSCREEN_AD7879_I2C=m
-CONFIG_TOUCHSCREEN_AD7879_SPI=m
-CONFIG_TOUCHSCREEN_ADC=m
-CONFIG_TOUCHSCREEN_AR1021_I2C=m
-CONFIG_TOUCHSCREEN_ATMEL_MXT=m
-CONFIG_TOUCHSCREEN_ATMEL_MXT_T37=y
-CONFIG_TOUCHSCREEN_AUO_PIXCIR=m
-CONFIG_TOUCHSCREEN_BU21013=m
-CONFIG_TOUCHSCREEN_BU21029=m
-CONFIG_TOUCHSCREEN_CHIPONE_ICN8318=m
-CONFIG_TOUCHSCREEN_CHIPONE_ICN8505=m
-CONFIG_TOUCHSCREEN_CY8CTMG110=m
-CONFIG_TOUCHSCREEN_CYTTSP_CORE=m
-CONFIG_TOUCHSCREEN_CYTTSP_I2C=m
-CONFIG_TOUCHSCREEN_CYTTSP_SPI=m
-CONFIG_TOUCHSCREEN_CYTTSP4_CORE=m
-CONFIG_TOUCHSCREEN_CYTTSP4_I2C=m
-CONFIG_TOUCHSCREEN_CYTTSP4_SPI=m
-CONFIG_TOUCHSCREEN_DA9034=m
-CONFIG_TOUCHSCREEN_DA9052=m
-CONFIG_TOUCHSCREEN_DYNAPRO=m
-CONFIG_TOUCHSCREEN_HAMPSHIRE=m
-CONFIG_TOUCHSCREEN_EETI=m
-CONFIG_TOUCHSCREEN_EGALAX=m
-CONFIG_TOUCHSCREEN_EGALAX_SERIAL=m
-CONFIG_TOUCHSCREEN_EXC3000=m
-CONFIG_TOUCHSCREEN_FUJITSU=m
-CONFIG_TOUCHSCREEN_GOODIX=m
-CONFIG_TOUCHSCREEN_HIDEEP=m
-CONFIG_TOUCHSCREEN_ILI210X=m
-CONFIG_TOUCHSCREEN_S6SY761=m
-CONFIG_TOUCHSCREEN_GUNZE=m
-CONFIG_TOUCHSCREEN_EKTF2127=m
-CONFIG_TOUCHSCREEN_ELAN=m
-CONFIG_TOUCHSCREEN_ELO=m
-CONFIG_TOUCHSCREEN_WACOM_W8001=m
-CONFIG_TOUCHSCREEN_WACOM_I2C=m
-CONFIG_TOUCHSCREEN_MAX11801=m
-CONFIG_TOUCHSCREEN_MCS5000=m
-CONFIG_TOUCHSCREEN_MMS114=m
-CONFIG_TOUCHSCREEN_MELFAS_MIP4=m
-CONFIG_TOUCHSCREEN_MTOUCH=m
-CONFIG_TOUCHSCREEN_IMX6UL_TSC=m
-CONFIG_TOUCHSCREEN_INEXIO=m
-CONFIG_TOUCHSCREEN_MK712=m
-CONFIG_TOUCHSCREEN_PENMOUNT=m
-CONFIG_TOUCHSCREEN_EDT_FT5X06=m
-CONFIG_TOUCHSCREEN_TOUCHRIGHT=m
-CONFIG_TOUCHSCREEN_TOUCHWIN=m
-CONFIG_TOUCHSCREEN_TI_AM335X_TSC=m
-CONFIG_TOUCHSCREEN_UCB1400=m
-CONFIG_TOUCHSCREEN_PIXCIR=m
-CONFIG_TOUCHSCREEN_WDT87XX_I2C=m
-CONFIG_TOUCHSCREEN_WM831X=m
-CONFIG_TOUCHSCREEN_WM97XX=m
-CONFIG_TOUCHSCREEN_WM9705=y
-CONFIG_TOUCHSCREEN_WM9712=y
-CONFIG_TOUCHSCREEN_WM9713=y
-CONFIG_TOUCHSCREEN_USB_COMPOSITE=m
-CONFIG_TOUCHSCREEN_MC13783=m
-CONFIG_TOUCHSCREEN_USB_EGALAX=y
-CONFIG_TOUCHSCREEN_USB_PANJIT=y
-CONFIG_TOUCHSCREEN_USB_3M=y
-CONFIG_TOUCHSCREEN_USB_ITM=y
-CONFIG_TOUCHSCREEN_USB_ETURBO=y
-CONFIG_TOUCHSCREEN_USB_GUNZE=y
-CONFIG_TOUCHSCREEN_USB_DMC_TSC10=y
-CONFIG_TOUCHSCREEN_USB_IRTOUCH=y
-CONFIG_TOUCHSCREEN_USB_IDEALTEK=y
-CONFIG_TOUCHSCREEN_USB_GENERAL_TOUCH=y
-CONFIG_TOUCHSCREEN_USB_GOTOP=y
-CONFIG_TOUCHSCREEN_USB_JASTEC=y
-CONFIG_TOUCHSCREEN_USB_ELO=y
-CONFIG_TOUCHSCREEN_USB_E2I=y
-CONFIG_TOUCHSCREEN_USB_ZYTRONIC=y
-CONFIG_TOUCHSCREEN_USB_ETT_TC45USB=y
-CONFIG_TOUCHSCREEN_USB_NEXIO=y
-CONFIG_TOUCHSCREEN_USB_EASYTOUCH=y
-CONFIG_TOUCHSCREEN_TOUCHIT213=m
-CONFIG_TOUCHSCREEN_TSC_SERIO=m
-CONFIG_TOUCHSCREEN_TSC200X_CORE=m
-CONFIG_TOUCHSCREEN_TSC2004=m
-CONFIG_TOUCHSCREEN_TSC2005=m
-CONFIG_TOUCHSCREEN_TSC2007=m
-CONFIG_TOUCHSCREEN_TSC2007_IIO=y
-CONFIG_TOUCHSCREEN_PCAP=m
-CONFIG_TOUCHSCREEN_RM_TS=m
-CONFIG_TOUCHSCREEN_SILEAD=m
-CONFIG_TOUCHSCREEN_SIS_I2C=m
-CONFIG_TOUCHSCREEN_ST1232=m
-CONFIG_TOUCHSCREEN_STMFTS=m
-CONFIG_TOUCHSCREEN_STMPE=m
-CONFIG_TOUCHSCREEN_SUR40=m
-CONFIG_TOUCHSCREEN_SURFACE3_SPI=m
-CONFIG_TOUCHSCREEN_SX8654=m
-CONFIG_TOUCHSCREEN_TPS6507X=m
-CONFIG_TOUCHSCREEN_ZET6223=m
-CONFIG_TOUCHSCREEN_ZFORCE=m
-CONFIG_TOUCHSCREEN_COLIBRI_VF50=m
-CONFIG_TOUCHSCREEN_ROHM_BU21023=m
-CONFIG_TOUCHSCREEN_IQS5XX=m
-CONFIG_INPUT_MISC=y
-CONFIG_INPUT_88PM860X_ONKEY=m
-CONFIG_INPUT_88PM80X_ONKEY=m
-CONFIG_INPUT_AD714X=m
-CONFIG_INPUT_AD714X_I2C=m
-CONFIG_INPUT_AD714X_SPI=m
-CONFIG_INPUT_ARIZONA_HAPTICS=m
-CONFIG_INPUT_ATMEL_CAPTOUCH=m
-CONFIG_INPUT_BMA150=m
-CONFIG_INPUT_E3X0_BUTTON=m
-CONFIG_INPUT_MSM_VIBRATOR=m
-CONFIG_INPUT_PCSPKR=m
-CONFIG_INPUT_MAX77650_ONKEY=m
-CONFIG_INPUT_MAX77693_HAPTIC=m
-CONFIG_INPUT_MAX8925_ONKEY=m
-CONFIG_INPUT_MAX8997_HAPTIC=m
-CONFIG_INPUT_MC13783_PWRBUTTON=m
-CONFIG_INPUT_MMA8450=m
-CONFIG_INPUT_APANEL=m
-CONFIG_INPUT_GP2A=m
-CONFIG_INPUT_GPIO_BEEPER=m
-CONFIG_INPUT_GPIO_DECODER=m
-CONFIG_INPUT_GPIO_VIBRA=m
-CONFIG_INPUT_CPCAP_PWRBUTTON=m
-CONFIG_INPUT_ATLAS_BTNS=m
-CONFIG_INPUT_ATI_REMOTE2=m
-CONFIG_INPUT_KEYSPAN_REMOTE=m
-CONFIG_INPUT_KXTJ9=m
-CONFIG_INPUT_POWERMATE=m
-CONFIG_INPUT_YEALINK=m
-CONFIG_INPUT_CM109=m
-CONFIG_INPUT_REGULATOR_HAPTIC=m
-CONFIG_INPUT_RETU_PWRBUTTON=m
-CONFIG_INPUT_TPS65218_PWRBUTTON=m
-CONFIG_INPUT_AXP20X_PEK=m
-CONFIG_INPUT_TWL4030_PWRBUTTON=m
-CONFIG_INPUT_TWL4030_VIBRA=m
-CONFIG_INPUT_TWL6040_VIBRA=m
-CONFIG_INPUT_UINPUT=m
-CONFIG_INPUT_PALMAS_PWRBUTTON=m
-CONFIG_INPUT_PCF50633_PMU=m
-CONFIG_INPUT_PCF8574=m
-CONFIG_INPUT_PWM_BEEPER=m
-CONFIG_INPUT_PWM_VIBRA=m
-CONFIG_INPUT_RK805_PWRKEY=m
-CONFIG_INPUT_GPIO_ROTARY_ENCODER=m
-CONFIG_INPUT_DA9052_ONKEY=m
-CONFIG_INPUT_DA9055_ONKEY=m
-CONFIG_INPUT_DA9063_ONKEY=m
-CONFIG_INPUT_WM831X_ON=m
-CONFIG_INPUT_PCAP=m
-CONFIG_INPUT_ADXL34X=m
-CONFIG_INPUT_ADXL34X_I2C=m
-CONFIG_INPUT_ADXL34X_SPI=m
-CONFIG_INPUT_IMS_PCU=m
-CONFIG_INPUT_CMA3000=m
-CONFIG_INPUT_CMA3000_I2C=m
-CONFIG_INPUT_XEN_KBDDEV_FRONTEND=m
-CONFIG_INPUT_IDEAPAD_SLIDEBAR=m
-CONFIG_INPUT_SOC_BUTTON_ARRAY=m
-CONFIG_INPUT_DRV260X_HAPTICS=m
-CONFIG_INPUT_DRV2665_HAPTICS=m
-CONFIG_INPUT_DRV2667_HAPTICS=m
-CONFIG_INPUT_RAVE_SP_PWRBUTTON=m
-CONFIG_INPUT_STPMIC1_ONKEY=m
-CONFIG_RMI4_CORE=m
-CONFIG_RMI4_I2C=m
-CONFIG_RMI4_SPI=m
-CONFIG_RMI4_SMB=m
-CONFIG_RMI4_F03=y
-CONFIG_RMI4_F03_SERIO=m
-CONFIG_RMI4_2D_SENSOR=y
-CONFIG_RMI4_F11=y
-CONFIG_RMI4_F12=y
-CONFIG_RMI4_F30=y
-CONFIG_RMI4_F34=y
-# CONFIG_RMI4_F54 is not set
-CONFIG_RMI4_F55=y
-
-#
-# Hardware I/O ports
-#
-CONFIG_SERIO=m
-CONFIG_ARCH_MIGHT_HAVE_PC_SERIO=y
-CONFIG_SERIO_I8042=m
-CONFIG_SERIO_SERPORT=m
-CONFIG_SERIO_CT82C710=m
-CONFIG_SERIO_PARKBD=m
-CONFIG_SERIO_PCIPS2=m
-CONFIG_SERIO_LIBPS2=m
-CONFIG_SERIO_RAW=m
-CONFIG_SERIO_ALTERA_PS2=m
-CONFIG_SERIO_PS2MULT=m
-CONFIG_SERIO_ARC_PS2=m
-# CONFIG_SERIO_APBPS2 is not set
-CONFIG_HYPERV_KEYBOARD=m
-CONFIG_SERIO_GPIO_PS2=m
-CONFIG_USERIO=m
-CONFIG_GAMEPORT=m
-CONFIG_GAMEPORT_NS558=m
-CONFIG_GAMEPORT_L4=m
-CONFIG_GAMEPORT_EMU10K1=m
-CONFIG_GAMEPORT_FM801=m
-# end of Hardware I/O ports
-# end of Input device support
-
-#
-# Character devices
-#
-CONFIG_TTY=y
-CONFIG_VT=y
-CONFIG_CONSOLE_TRANSLATIONS=y
-CONFIG_VT_CONSOLE=y
-CONFIG_VT_CONSOLE_SLEEP=y
-CONFIG_HW_CONSOLE=y
-CONFIG_VT_HW_CONSOLE_BINDING=y
-CONFIG_UNIX98_PTYS=y
-# CONFIG_LEGACY_PTYS is not set
-CONFIG_LDISC_AUTOLOAD=y
-
-#
-# Serial drivers
-#
-CONFIG_SERIAL_EARLYCON=y
-CONFIG_SERIAL_8250=y
-# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
-CONFIG_SERIAL_8250_PNP=y
-# CONFIG_SERIAL_8250_16550A_VARIANTS is not set
-CONFIG_SERIAL_8250_FINTEK=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_DMA=y
-CONFIG_SERIAL_8250_PCI=y
-CONFIG_SERIAL_8250_EXAR=m
-CONFIG_SERIAL_8250_CS=m
-CONFIG_SERIAL_8250_MEN_MCB=m
-CONFIG_SERIAL_8250_NR_UARTS=32
-CONFIG_SERIAL_8250_RUNTIME_UARTS=4
-CONFIG_SERIAL_8250_EXTENDED=y
-CONFIG_SERIAL_8250_MANY_PORTS=y
-CONFIG_SERIAL_8250_ASPEED_VUART=m
-CONFIG_SERIAL_8250_SHARE_IRQ=y
-# CONFIG_SERIAL_8250_DETECT_IRQ is not set
-CONFIG_SERIAL_8250_RSA=y
-CONFIG_SERIAL_8250_DWLIB=y
-CONFIG_SERIAL_8250_DW=m
-CONFIG_SERIAL_8250_RT288X=y
-CONFIG_SERIAL_8250_LPSS=y
-CONFIG_SERIAL_8250_MID=y
-CONFIG_SERIAL_OF_PLATFORM=m
-
-#
-# Non-8250 serial port support
-#
-CONFIG_SERIAL_MAX3100=m
-CONFIG_SERIAL_MAX310X=m
-CONFIG_SERIAL_UARTLITE=m
-CONFIG_SERIAL_UARTLITE_NR_UARTS=1
-CONFIG_SERIAL_CORE=y
-CONFIG_SERIAL_CORE_CONSOLE=y
-CONFIG_SERIAL_JSM=m
-CONFIG_SERIAL_SIFIVE=m
-CONFIG_SERIAL_SCCNXP=m
-CONFIG_SERIAL_SC16IS7XX_CORE=m
-CONFIG_SERIAL_SC16IS7XX=m
-CONFIG_SERIAL_SC16IS7XX_I2C=y
-CONFIG_SERIAL_SC16IS7XX_SPI=y
-CONFIG_SERIAL_ALTERA_JTAGUART=m
-CONFIG_SERIAL_ALTERA_UART=m
-CONFIG_SERIAL_ALTERA_UART_MAXPORTS=4
-CONFIG_SERIAL_ALTERA_UART_BAUDRATE=115200
-CONFIG_SERIAL_IFX6X60=m
-CONFIG_SERIAL_XILINX_PS_UART=m
-CONFIG_SERIAL_ARC=m
-CONFIG_SERIAL_ARC_NR_PORTS=1
-CONFIG_SERIAL_RP2=m
-CONFIG_SERIAL_RP2_NR_UARTS=32
-CONFIG_SERIAL_FSL_LPUART=m
-CONFIG_SERIAL_FSL_LINFLEXUART=m
-CONFIG_SERIAL_CONEXANT_DIGICOLOR=m
-CONFIG_SERIAL_MEN_Z135=m
-CONFIG_SERIAL_SPRD=m
-# end of Serial drivers
-
-CONFIG_SERIAL_MCTRL_GPIO=y
-CONFIG_SERIAL_NONSTANDARD=y
-CONFIG_ROCKETPORT=m
-CONFIG_CYCLADES=m
-CONFIG_CYZ_INTR=y
-CONFIG_MOXA_INTELLIO=m
-CONFIG_MOXA_SMARTIO=m
-CONFIG_SYNCLINK=m
-CONFIG_SYNCLINKMP=m
-CONFIG_SYNCLINK_GT=m
-CONFIG_ISI=m
-CONFIG_N_HDLC=m
-CONFIG_N_GSM=m
-CONFIG_NOZOMI=m
-CONFIG_NULL_TTY=m
-CONFIG_TRACE_ROUTER=m
-CONFIG_TRACE_SINK=m
-CONFIG_HVC_DRIVER=y
-CONFIG_HVC_IRQ=y
-CONFIG_HVC_XEN=y
-CONFIG_HVC_XEN_FRONTEND=y
-CONFIG_SERIAL_DEV_BUS=y
-CONFIG_SERIAL_DEV_CTRL_TTYPORT=y
-# CONFIG_TTY_PRINTK is not set
-CONFIG_PRINTER=m
-# CONFIG_LP_CONSOLE is not set
-CONFIG_PPDEV=m
-CONFIG_VIRTIO_CONSOLE=m
-CONFIG_IPMI_HANDLER=m
-CONFIG_IPMI_DMI_DECODE=y
-CONFIG_IPMI_PLAT_DATA=y
-# CONFIG_IPMI_PANIC_EVENT is not set
-CONFIG_IPMI_DEVICE_INTERFACE=m
-CONFIG_IPMI_SI=m
-CONFIG_IPMI_SSIF=m
-CONFIG_IPMI_WATCHDOG=m
-CONFIG_IPMI_POWEROFF=m
-CONFIG_IPMB_DEVICE_INTERFACE=m
-CONFIG_HW_RANDOM=m
-CONFIG_HW_RANDOM_TIMERIOMEM=m
-CONFIG_HW_RANDOM_INTEL=m
-CONFIG_HW_RANDOM_AMD=m
-CONFIG_HW_RANDOM_VIA=m
-CONFIG_HW_RANDOM_VIRTIO=m
-CONFIG_APPLICOM=m
-
-#
-# PCMCIA character devices
-#
-CONFIG_SYNCLINK_CS=m
-CONFIG_CARDMAN_4000=m
-CONFIG_CARDMAN_4040=m
-CONFIG_SCR24X=m
-CONFIG_IPWIRELESS=m
-# end of PCMCIA character devices
-
-CONFIG_MWAVE=m
-# CONFIG_DEVMEM is not set
-# CONFIG_DEVKMEM is not set
-CONFIG_NVRAM=m
-CONFIG_RAW_DRIVER=m
-CONFIG_MAX_RAW_DEVS=256
-# CONFIG_DEVPORT is not set
-CONFIG_HPET=y
-CONFIG_HPET_MMAP=y
-CONFIG_HPET_MMAP_DEFAULT=y
-CONFIG_HANGCHECK_TIMER=m
-CONFIG_TCG_TPM=m
-CONFIG_HW_RANDOM_TPM=y
-CONFIG_TCG_TIS_CORE=m
-CONFIG_TCG_TIS=m
-CONFIG_TCG_TIS_SPI=m
-CONFIG_TCG_TIS_SPI_CR50=y
-CONFIG_TCG_TIS_I2C_ATMEL=m
-CONFIG_TCG_TIS_I2C_INFINEON=m
-CONFIG_TCG_TIS_I2C_NUVOTON=m
-CONFIG_TCG_NSC=m
-CONFIG_TCG_ATMEL=m
-CONFIG_TCG_INFINEON=m
-CONFIG_TCG_XEN=m
-CONFIG_TCG_CRB=m
-CONFIG_TCG_VTPM_PROXY=m
-CONFIG_TCG_TIS_ST33ZP24=m
-CONFIG_TCG_TIS_ST33ZP24_I2C=m
-CONFIG_TCG_TIS_ST33ZP24_SPI=m
-CONFIG_TELCLOCK=m
-CONFIG_XILLYBUS=m
-CONFIG_XILLYBUS_PCIE=m
-CONFIG_XILLYBUS_OF=m
-# end of Character devices
-
-# CONFIG_RANDOM_TRUST_CPU is not set
-# CONFIG_RANDOM_TRUST_BOOTLOADER is not set
-
-#
-# I2C support
-#
-CONFIG_I2C=y
-CONFIG_ACPI_I2C_OPREGION=y
-CONFIG_I2C_BOARDINFO=y
-CONFIG_I2C_COMPAT=y
-CONFIG_I2C_CHARDEV=m
-CONFIG_I2C_MUX=m
-
-#
-# Multiplexer I2C Chip support
-#
-CONFIG_I2C_ARB_GPIO_CHALLENGE=m
-CONFIG_I2C_MUX_GPIO=m
-CONFIG_I2C_MUX_GPMUX=m
-CONFIG_I2C_MUX_LTC4306=m
-CONFIG_I2C_MUX_PCA9541=m
-CONFIG_I2C_MUX_PCA954x=m
-CONFIG_I2C_MUX_PINCTRL=m
-CONFIG_I2C_MUX_REG=m
-CONFIG_I2C_DEMUX_PINCTRL=m
-CONFIG_I2C_MUX_MLXCPLD=m
-# end of Multiplexer I2C Chip support
-
-CONFIG_I2C_HELPER_AUTO=y
-CONFIG_I2C_SMBUS=m
-CONFIG_I2C_ALGOBIT=m
-CONFIG_I2C_ALGOPCA=m
-
-#
-# I2C Hardware Bus support
-#
-
-#
-# PC SMBus host controller drivers
-#
-CONFIG_I2C_ALI1535=m
-CONFIG_I2C_ALI1563=m
-CONFIG_I2C_ALI15X3=m
-CONFIG_I2C_AMD756=m
-CONFIG_I2C_AMD756_S4882=m
-CONFIG_I2C_AMD8111=m
-CONFIG_I2C_AMD_MP2=m
-CONFIG_I2C_I801=m
-CONFIG_I2C_ISCH=m
-CONFIG_I2C_ISMT=m
-CONFIG_I2C_PIIX4=m
-CONFIG_I2C_CHT_WC=m
-CONFIG_I2C_NFORCE2=m
-CONFIG_I2C_NFORCE2_S4985=m
-CONFIG_I2C_NVIDIA_GPU=m
-CONFIG_I2C_SIS5595=m
-CONFIG_I2C_SIS630=m
-CONFIG_I2C_SIS96X=m
-CONFIG_I2C_VIA=m
-CONFIG_I2C_VIAPRO=m
-
-#
-# ACPI drivers
-#
-CONFIG_I2C_SCMI=m
-
-#
-# I2C system bus drivers (mostly embedded / system-on-chip)
-#
-CONFIG_I2C_CBUS_GPIO=m
-CONFIG_I2C_DESIGNWARE_CORE=y
-CONFIG_I2C_DESIGNWARE_PLATFORM=y
-CONFIG_I2C_DESIGNWARE_SLAVE=y
-CONFIG_I2C_DESIGNWARE_PCI=m
-CONFIG_I2C_DESIGNWARE_BAYTRAIL=y
-CONFIG_I2C_EMEV2=m
-CONFIG_I2C_GPIO=m
-# CONFIG_I2C_GPIO_FAULT_INJECTOR is not set
-CONFIG_I2C_KEMPLD=m
-CONFIG_I2C_OCORES=m
-CONFIG_I2C_PCA_PLATFORM=m
-CONFIG_I2C_RK3X=m
-CONFIG_I2C_SIMTEC=m
-CONFIG_I2C_XILINX=m
-
-#
-# External I2C/SMBus adapter drivers
-#
-CONFIG_I2C_DIOLAN_U2C=m
-CONFIG_I2C_DLN2=m
-CONFIG_I2C_PARPORT=m
-CONFIG_I2C_ROBOTFUZZ_OSIF=m
-CONFIG_I2C_TAOS_EVM=m
-CONFIG_I2C_TINY_USB=m
-CONFIG_I2C_VIPERBOARD=m
-
-#
-# Other I2C/SMBus bus drivers
-#
-CONFIG_I2C_MLXCPLD=m
-CONFIG_I2C_CROS_EC_TUNNEL=m
-CONFIG_I2C_FSI=m
-# end of I2C Hardware Bus support
-
-CONFIG_I2C_STUB=m
-CONFIG_I2C_SLAVE=y
-CONFIG_I2C_SLAVE_EEPROM=m
-# CONFIG_I2C_DEBUG_CORE is not set
-# CONFIG_I2C_DEBUG_ALGO is not set
-# CONFIG_I2C_DEBUG_BUS is not set
-# end of I2C support
-
-CONFIG_I3C=m
-CONFIG_CDNS_I3C_MASTER=m
-CONFIG_DW_I3C_MASTER=m
-CONFIG_SPI=y
-# CONFIG_SPI_DEBUG is not set
-CONFIG_SPI_MASTER=y
-CONFIG_SPI_MEM=y
-
-#
-# SPI Master Controller Drivers
-#
-CONFIG_SPI_ALTERA=m
-CONFIG_SPI_AXI_SPI_ENGINE=m
-CONFIG_SPI_BITBANG=m
-CONFIG_SPI_BUTTERFLY=m
-CONFIG_SPI_CADENCE=m
-CONFIG_SPI_DESIGNWARE=m
-CONFIG_SPI_DW_PCI=m
-CONFIG_SPI_DW_MID_DMA=y
-CONFIG_SPI_DW_MMIO=m
-CONFIG_SPI_DLN2=m
-CONFIG_SPI_FSI=m
-CONFIG_SPI_NXP_FLEXSPI=m
-CONFIG_SPI_GPIO=m
-CONFIG_SPI_LM70_LLP=m
-CONFIG_SPI_FSL_LIB=m
-CONFIG_SPI_FSL_SPI=m
-CONFIG_SPI_OC_TINY=m
-CONFIG_SPI_PXA2XX=m
-CONFIG_SPI_PXA2XX_PCI=m
-CONFIG_SPI_ROCKCHIP=m
-CONFIG_SPI_SC18IS602=m
-CONFIG_SPI_SIFIVE=m
-CONFIG_SPI_MXIC=m
-CONFIG_SPI_XCOMM=m
-CONFIG_SPI_XILINX=m
-CONFIG_SPI_ZYNQMP_GQSPI=m
-
-#
-# SPI Multiplexer support
-#
-CONFIG_SPI_MUX=m
-
-#
-# SPI Protocol Masters
-#
-CONFIG_SPI_SPIDEV=m
-CONFIG_SPI_LOOPBACK_TEST=m
-CONFIG_SPI_TLE62X0=m
-CONFIG_SPI_SLAVE=y
-CONFIG_SPI_SLAVE_TIME=m
-CONFIG_SPI_SLAVE_SYSTEM_CONTROL=m
-CONFIG_SPMI=m
-CONFIG_HSI=m
-CONFIG_HSI_BOARDINFO=y
-
-#
-# HSI controllers
-#
-
-#
-# HSI clients
-#
-CONFIG_HSI_CHAR=m
-CONFIG_PPS=y
-# CONFIG_PPS_DEBUG is not set
-
-#
-# PPS clients support
-#
-CONFIG_PPS_CLIENT_KTIMER=m
-CONFIG_PPS_CLIENT_LDISC=m
-CONFIG_PPS_CLIENT_PARPORT=m
-CONFIG_PPS_CLIENT_GPIO=m
-
-#
-# PPS generators support
-#
-
-#
-# PTP clock support
-#
-CONFIG_PTP_1588_CLOCK=y
-CONFIG_DP83640_PHY=m
-CONFIG_PTP_1588_CLOCK_INES=m
-CONFIG_PTP_1588_CLOCK_KVM=m
-CONFIG_PTP_1588_CLOCK_IDT82P33=m
-CONFIG_PTP_1588_CLOCK_IDTCM=m
-CONFIG_PTP_1588_CLOCK_VMW=m
-# end of PTP clock support
-
-CONFIG_PINCTRL=y
-CONFIG_GENERIC_PINCTRL_GROUPS=y
-CONFIG_PINMUX=y
-CONFIG_GENERIC_PINMUX_FUNCTIONS=y
-CONFIG_PINCONF=y
-CONFIG_GENERIC_PINCONF=y
-# CONFIG_DEBUG_PINCTRL is not set
-CONFIG_PINCTRL_AS3722=m
-CONFIG_PINCTRL_AXP209=m
-CONFIG_PINCTRL_AMD=m
-CONFIG_PINCTRL_DA9062=m
-CONFIG_PINCTRL_MCP23S08=m
-CONFIG_PINCTRL_SINGLE=m
-CONFIG_PINCTRL_SX150X=y
-CONFIG_PINCTRL_STMFX=m
-CONFIG_PINCTRL_MAX77620=m
-CONFIG_PINCTRL_PALMAS=m
-CONFIG_PINCTRL_RK805=m
-CONFIG_PINCTRL_OCELOT=y
-CONFIG_PINCTRL_BAYTRAIL=y
-CONFIG_PINCTRL_CHERRYVIEW=y
-CONFIG_PINCTRL_LYNXPOINT=y
-CONFIG_PINCTRL_INTEL=y
-CONFIG_PINCTRL_BROXTON=y
-CONFIG_PINCTRL_CANNONLAKE=y
-CONFIG_PINCTRL_CEDARFORK=y
-CONFIG_PINCTRL_DENVERTON=y
-CONFIG_PINCTRL_GEMINILAKE=y
-CONFIG_PINCTRL_ICELAKE=y
-CONFIG_PINCTRL_LEWISBURG=y
-CONFIG_PINCTRL_SUNRISEPOINT=y
-CONFIG_PINCTRL_TIGERLAKE=y
-CONFIG_PINCTRL_LOCHNAGAR=m
-CONFIG_PINCTRL_MADERA=m
-CONFIG_PINCTRL_CS47L15=y
-CONFIG_PINCTRL_CS47L35=y
-CONFIG_PINCTRL_CS47L85=y
-CONFIG_PINCTRL_CS47L90=y
-CONFIG_PINCTRL_CS47L92=y
-CONFIG_PINCTRL_EQUILIBRIUM=m
-CONFIG_GPIOLIB=y
-CONFIG_GPIOLIB_FASTPATH_LIMIT=512
-CONFIG_OF_GPIO=y
-CONFIG_GPIO_ACPI=y
-CONFIG_GPIOLIB_IRQCHIP=y
-# CONFIG_DEBUG_GPIO is not set
-CONFIG_GPIO_SYSFS=y
-CONFIG_GPIO_GENERIC=y
-CONFIG_GPIO_MAX730X=m
-
-#
-# Memory mapped GPIO drivers
-#
-CONFIG_GPIO_74XX_MMIO=m
-CONFIG_GPIO_ALTERA=m
-CONFIG_GPIO_AMDPT=m
-CONFIG_GPIO_CADENCE=m
-CONFIG_GPIO_DWAPB=m
-CONFIG_GPIO_EXAR=m
-CONFIG_GPIO_FTGPIO010=y
-CONFIG_GPIO_GENERIC_PLATFORM=m
-CONFIG_GPIO_GRGPIO=m
-CONFIG_GPIO_HLWD=m
-CONFIG_GPIO_ICH=m
-CONFIG_GPIO_LOGICVC=m
-CONFIG_GPIO_MB86S7X=m
-CONFIG_GPIO_MENZ127=m
-CONFIG_GPIO_SAMA5D2_PIOBU=m
-CONFIG_GPIO_SIFIVE=y
-CONFIG_GPIO_SIOX=m
-CONFIG_GPIO_SYSCON=m
-CONFIG_GPIO_VX855=m
-CONFIG_GPIO_WCD934X=m
-CONFIG_GPIO_XILINX=m
-CONFIG_GPIO_AMD_FCH=m
-# end of Memory mapped GPIO drivers
-
-#
-# Port-mapped I/O GPIO drivers
-#
-CONFIG_GPIO_F7188X=m
-CONFIG_GPIO_IT87=m
-CONFIG_GPIO_SCH=m
-CONFIG_GPIO_SCH311X=m
-CONFIG_GPIO_WINBOND=m
-CONFIG_GPIO_WS16C48=m
-# end of Port-mapped I/O GPIO drivers
-
-#
-# I2C GPIO expanders
-#
-CONFIG_GPIO_ADP5588=m
-CONFIG_GPIO_ADNP=m
-CONFIG_GPIO_GW_PLD=m
-CONFIG_GPIO_MAX7300=m
-CONFIG_GPIO_MAX732X=m
-CONFIG_GPIO_PCA953X=m
-CONFIG_GPIO_PCF857X=m
-CONFIG_GPIO_TPIC2810=m
-# end of I2C GPIO expanders
-
-#
-# MFD GPIO expanders
-#
-CONFIG_GPIO_ADP5520=m
-CONFIG_GPIO_ARIZONA=m
-CONFIG_GPIO_BD70528=m
-CONFIG_GPIO_BD71828=m
-CONFIG_GPIO_BD9571MWV=m
-CONFIG_GPIO_CRYSTAL_COVE=m
-CONFIG_GPIO_DA9052=m
-CONFIG_GPIO_DA9055=m
-CONFIG_GPIO_DLN2=m
-CONFIG_GPIO_JANZ_TTL=m
-CONFIG_GPIO_KEMPLD=m
-CONFIG_GPIO_LP3943=m
-CONFIG_GPIO_LP873X=m
-CONFIG_GPIO_LP87565=m
-CONFIG_GPIO_MADERA=m
-CONFIG_GPIO_MAX77620=m
-CONFIG_GPIO_MAX77650=m
-CONFIG_GPIO_PALMAS=y
-CONFIG_GPIO_RC5T583=y
-CONFIG_GPIO_STMPE=y
-CONFIG_GPIO_TC3589X=y
-CONFIG_GPIO_TPS65086=m
-CONFIG_GPIO_TPS65218=m
-CONFIG_GPIO_TPS6586X=y
-CONFIG_GPIO_TPS65910=y
-CONFIG_GPIO_TPS65912=m
-CONFIG_GPIO_TPS68470=y
-CONFIG_GPIO_TQMX86=m
-CONFIG_GPIO_TWL4030=m
-CONFIG_GPIO_TWL6040=m
-CONFIG_GPIO_UCB1400=m
-CONFIG_GPIO_WHISKEY_COVE=m
-CONFIG_GPIO_WM831X=m
-CONFIG_GPIO_WM8350=m
-CONFIG_GPIO_WM8994=m
-# end of MFD GPIO expanders
-
-#
-# PCI GPIO expanders
-#
-CONFIG_GPIO_AMD8111=m
-CONFIG_GPIO_ML_IOH=m
-CONFIG_GPIO_PCI_IDIO_16=m
-CONFIG_GPIO_PCIE_IDIO_24=m
-CONFIG_GPIO_RDC321X=m
-CONFIG_GPIO_SODAVILLE=y
-# end of PCI GPIO expanders
-
-#
-# SPI GPIO expanders
-#
-CONFIG_GPIO_74X164=m
-CONFIG_GPIO_MAX3191X=m
-CONFIG_GPIO_MAX7301=m
-CONFIG_GPIO_MC33880=m
-CONFIG_GPIO_PISOSR=m
-CONFIG_GPIO_XRA1403=m
-CONFIG_GPIO_MOXTET=m
-# end of SPI GPIO expanders
-
-#
-# USB GPIO expanders
-#
-CONFIG_GPIO_VIPERBOARD=m
-# end of USB GPIO expanders
-
-CONFIG_GPIO_MOCKUP=m
-CONFIG_W1=m
-CONFIG_W1_CON=y
-
-#
-# 1-wire Bus Masters
-#
-CONFIG_W1_MASTER_MATROX=m
-CONFIG_W1_MASTER_DS2490=m
-CONFIG_W1_MASTER_DS2482=m
-CONFIG_W1_MASTER_DS1WM=m
-CONFIG_W1_MASTER_GPIO=m
-CONFIG_W1_MASTER_SGI=m
-# end of 1-wire Bus Masters
-
-#
-# 1-wire Slaves
-#
-CONFIG_W1_SLAVE_THERM=m
-CONFIG_W1_SLAVE_SMEM=m
-CONFIG_W1_SLAVE_DS2405=m
-CONFIG_W1_SLAVE_DS2408=m
-# CONFIG_W1_SLAVE_DS2408_READBACK is not set
-CONFIG_W1_SLAVE_DS2413=m
-CONFIG_W1_SLAVE_DS2406=m
-CONFIG_W1_SLAVE_DS2423=m
-CONFIG_W1_SLAVE_DS2805=m
-CONFIG_W1_SLAVE_DS2430=m
-CONFIG_W1_SLAVE_DS2431=m
-CONFIG_W1_SLAVE_DS2433=m
-# CONFIG_W1_SLAVE_DS2433_CRC is not set
-CONFIG_W1_SLAVE_DS2438=m
-CONFIG_W1_SLAVE_DS250X=m
-CONFIG_W1_SLAVE_DS2780=m
-CONFIG_W1_SLAVE_DS2781=m
-CONFIG_W1_SLAVE_DS28E04=m
-CONFIG_W1_SLAVE_DS28E17=m
-# end of 1-wire Slaves
-
-CONFIG_POWER_AVS=y
-CONFIG_QCOM_CPR=m
-CONFIG_POWER_RESET=y
-CONFIG_POWER_RESET_AS3722=y
-CONFIG_POWER_RESET_GPIO=y
-CONFIG_POWER_RESET_GPIO_RESTART=y
-CONFIG_POWER_RESET_LTC2952=y
-CONFIG_POWER_RESET_MT6323=y
-CONFIG_POWER_RESET_RESTART=y
-CONFIG_POWER_RESET_SYSCON=y
-CONFIG_POWER_RESET_SYSCON_POWEROFF=y
-CONFIG_REBOOT_MODE=m
-CONFIG_SYSCON_REBOOT_MODE=m
-CONFIG_NVMEM_REBOOT_MODE=m
-CONFIG_POWER_SUPPLY=y
-# CONFIG_POWER_SUPPLY_DEBUG is not set
-CONFIG_POWER_SUPPLY_HWMON=y
-CONFIG_PDA_POWER=m
-CONFIG_GENERIC_ADC_BATTERY=m
-CONFIG_MAX8925_POWER=m
-CONFIG_WM831X_BACKUP=m
-CONFIG_WM831X_POWER=m
-CONFIG_WM8350_POWER=m
-CONFIG_TEST_POWER=m
-CONFIG_BATTERY_88PM860X=m
-CONFIG_CHARGER_ADP5061=m
-CONFIG_BATTERY_ACT8945A=m
-CONFIG_BATTERY_CPCAP=m
-CONFIG_BATTERY_DS2760=m
-CONFIG_BATTERY_DS2780=m
-CONFIG_BATTERY_DS2781=m
-CONFIG_BATTERY_DS2782=m
-CONFIG_BATTERY_LEGO_EV3=m
-CONFIG_BATTERY_SBS=m
-CONFIG_CHARGER_SBS=m
-CONFIG_MANAGER_SBS=m
-CONFIG_BATTERY_BQ27XXX=m
-CONFIG_BATTERY_BQ27XXX_I2C=m
-CONFIG_BATTERY_BQ27XXX_HDQ=m
-# CONFIG_BATTERY_BQ27XXX_DT_UPDATES_NVM is not set
-CONFIG_BATTERY_DA9030=m
-CONFIG_BATTERY_DA9052=m
-CONFIG_CHARGER_DA9150=m
-CONFIG_BATTERY_DA9150=m
-CONFIG_CHARGER_AXP20X=m
-CONFIG_BATTERY_AXP20X=m
-CONFIG_AXP20X_POWER=m
-CONFIG_AXP288_CHARGER=m
-CONFIG_AXP288_FUEL_GAUGE=m
-CONFIG_BATTERY_MAX17040=m
-CONFIG_BATTERY_MAX17042=m
-CONFIG_BATTERY_MAX1721X=m
-CONFIG_BATTERY_TWL4030_MADC=m
-CONFIG_CHARGER_88PM860X=m
-CONFIG_CHARGER_PCF50633=m
-CONFIG_BATTERY_RX51=m
-CONFIG_CHARGER_ISP1704=m
-CONFIG_CHARGER_MAX8903=m
-CONFIG_CHARGER_TWL4030=m
-CONFIG_CHARGER_LP8727=m
-CONFIG_CHARGER_LP8788=m
-CONFIG_CHARGER_GPIO=m
-CONFIG_CHARGER_MANAGER=y
-CONFIG_CHARGER_LT3651=m
-CONFIG_CHARGER_MAX14577=m
-CONFIG_CHARGER_DETECTOR_MAX14656=m
-CONFIG_CHARGER_MAX77650=m
-CONFIG_CHARGER_MAX77693=m
-CONFIG_CHARGER_MAX8997=m
-CONFIG_CHARGER_MAX8998=m
-CONFIG_CHARGER_BQ2415X=m
-CONFIG_CHARGER_BQ24190=m
-CONFIG_CHARGER_BQ24257=m
-CONFIG_CHARGER_BQ24735=m
-CONFIG_CHARGER_BQ25890=m
-CONFIG_CHARGER_SMB347=m
-CONFIG_CHARGER_TPS65090=m
-CONFIG_CHARGER_TPS65217=m
-CONFIG_BATTERY_GAUGE_LTC2941=m
-CONFIG_BATTERY_RT5033=m
-CONFIG_CHARGER_RT9455=m
-CONFIG_CHARGER_CROS_USBPD=m
-CONFIG_CHARGER_UCS1002=m
-CONFIG_CHARGER_BD70528=m
-CONFIG_CHARGER_WILCO=m
-CONFIG_HWMON=y
-CONFIG_HWMON_VID=m
-# CONFIG_HWMON_DEBUG_CHIP is not set
-
-#
-# Native drivers
-#
-CONFIG_SENSORS_ABITUGURU=m
-CONFIG_SENSORS_ABITUGURU3=m
-CONFIG_SENSORS_AD7314=m
-CONFIG_SENSORS_AD7414=m
-CONFIG_SENSORS_AD7418=m
-CONFIG_SENSORS_ADM1021=m
-CONFIG_SENSORS_ADM1025=m
-CONFIG_SENSORS_ADM1026=m
-CONFIG_SENSORS_ADM1029=m
-CONFIG_SENSORS_ADM1031=m
-CONFIG_SENSORS_ADM1177=m
-CONFIG_SENSORS_ADM9240=m
-CONFIG_SENSORS_ADT7X10=m
-CONFIG_SENSORS_ADT7310=m
-CONFIG_SENSORS_ADT7410=m
-CONFIG_SENSORS_ADT7411=m
-CONFIG_SENSORS_ADT7462=m
-CONFIG_SENSORS_ADT7470=m
-CONFIG_SENSORS_ADT7475=m
-CONFIG_SENSORS_AS370=m
-CONFIG_SENSORS_ASC7621=m
-CONFIG_SENSORS_AXI_FAN_CONTROL=m
-CONFIG_SENSORS_K8TEMP=m
-CONFIG_SENSORS_K10TEMP=m
-CONFIG_SENSORS_FAM15H_POWER=m
-CONFIG_SENSORS_APPLESMC=m
-CONFIG_SENSORS_ASB100=m
-CONFIG_SENSORS_ASPEED=m
-CONFIG_SENSORS_ATXP1=m
-CONFIG_SENSORS_DRIVETEMP=m
-CONFIG_SENSORS_DS620=m
-CONFIG_SENSORS_DS1621=m
-CONFIG_SENSORS_DELL_SMM=m
-CONFIG_SENSORS_DA9052_ADC=m
-CONFIG_SENSORS_DA9055=m
-CONFIG_SENSORS_I5K_AMB=m
-CONFIG_SENSORS_F71805F=m
-CONFIG_SENSORS_F71882FG=m
-CONFIG_SENSORS_F75375S=m
-CONFIG_SENSORS_MC13783_ADC=m
-CONFIG_SENSORS_FSCHMD=m
-CONFIG_SENSORS_FTSTEUTATES=m
-CONFIG_SENSORS_GL518SM=m
-CONFIG_SENSORS_GL520SM=m
-CONFIG_SENSORS_G760A=m
-CONFIG_SENSORS_G762=m
-CONFIG_SENSORS_GPIO_FAN=m
-CONFIG_SENSORS_HIH6130=m
-CONFIG_SENSORS_IBMAEM=m
-CONFIG_SENSORS_IBMPEX=m
-CONFIG_SENSORS_IIO_HWMON=m
-CONFIG_SENSORS_I5500=m
-CONFIG_SENSORS_CORETEMP=m
-CONFIG_SENSORS_IT87=m
-CONFIG_SENSORS_JC42=m
-CONFIG_SENSORS_POWR1220=m
-CONFIG_SENSORS_LINEAGE=m
-CONFIG_SENSORS_LOCHNAGAR=m
-CONFIG_SENSORS_LTC2945=m
-CONFIG_SENSORS_LTC2947=m
-CONFIG_SENSORS_LTC2947_I2C=m
-CONFIG_SENSORS_LTC2947_SPI=m
-CONFIG_SENSORS_LTC2990=m
-CONFIG_SENSORS_LTC4151=m
-CONFIG_SENSORS_LTC4215=m
-CONFIG_SENSORS_LTC4222=m
-CONFIG_SENSORS_LTC4245=m
-CONFIG_SENSORS_LTC4260=m
-CONFIG_SENSORS_LTC4261=m
-CONFIG_SENSORS_MAX1111=m
-CONFIG_SENSORS_MAX16065=m
-CONFIG_SENSORS_MAX1619=m
-CONFIG_SENSORS_MAX1668=m
-CONFIG_SENSORS_MAX197=m
-CONFIG_SENSORS_MAX31722=m
-CONFIG_SENSORS_MAX31730=m
-CONFIG_SENSORS_MAX6621=m
-CONFIG_SENSORS_MAX6639=m
-CONFIG_SENSORS_MAX6642=m
-CONFIG_SENSORS_MAX6650=m
-CONFIG_SENSORS_MAX6697=m
-CONFIG_SENSORS_MAX31790=m
-CONFIG_SENSORS_MCP3021=m
-CONFIG_SENSORS_MLXREG_FAN=m
-CONFIG_SENSORS_TC654=m
-CONFIG_SENSORS_MENF21BMC_HWMON=m
-CONFIG_SENSORS_ADCXX=m
-CONFIG_SENSORS_LM63=m
-CONFIG_SENSORS_LM70=m
-CONFIG_SENSORS_LM73=m
-CONFIG_SENSORS_LM75=m
-CONFIG_SENSORS_LM77=m
-CONFIG_SENSORS_LM78=m
-CONFIG_SENSORS_LM80=m
-CONFIG_SENSORS_LM83=m
-CONFIG_SENSORS_LM85=m
-CONFIG_SENSORS_LM87=m
-CONFIG_SENSORS_LM90=m
-CONFIG_SENSORS_LM92=m
-CONFIG_SENSORS_LM93=m
-CONFIG_SENSORS_LM95234=m
-CONFIG_SENSORS_LM95241=m
-CONFIG_SENSORS_LM95245=m
-CONFIG_SENSORS_PC87360=m
-CONFIG_SENSORS_PC87427=m
-CONFIG_SENSORS_NTC_THERMISTOR=m
-CONFIG_SENSORS_NCT6683=m
-CONFIG_SENSORS_NCT6775=m
-CONFIG_SENSORS_NCT7802=m
-CONFIG_SENSORS_NCT7904=m
-CONFIG_SENSORS_NPCM7XX=m
-CONFIG_SENSORS_PCF8591=m
-CONFIG_PMBUS=m
-CONFIG_SENSORS_PMBUS=m
-CONFIG_SENSORS_ADM1275=m
-CONFIG_SENSORS_BEL_PFE=m
-CONFIG_SENSORS_IBM_CFFPS=m
-CONFIG_SENSORS_INSPUR_IPSPS=m
-CONFIG_SENSORS_IR35221=m
-CONFIG_SENSORS_IR38064=m
-CONFIG_SENSORS_IRPS5401=m
-CONFIG_SENSORS_ISL68137=m
-CONFIG_SENSORS_LM25066=m
-CONFIG_SENSORS_LTC2978=m
-# CONFIG_SENSORS_LTC2978_REGULATOR is not set
-CONFIG_SENSORS_LTC3815=m
-CONFIG_SENSORS_MAX16064=m
-CONFIG_SENSORS_MAX20730=m
-CONFIG_SENSORS_MAX20751=m
-CONFIG_SENSORS_MAX31785=m
-CONFIG_SENSORS_MAX34440=m
-CONFIG_SENSORS_MAX8688=m
-CONFIG_SENSORS_PXE1610=m
-CONFIG_SENSORS_TPS40422=m
-CONFIG_SENSORS_TPS53679=m
-CONFIG_SENSORS_UCD9000=m
-CONFIG_SENSORS_UCD9200=m
-CONFIG_SENSORS_XDPE122=m
-CONFIG_SENSORS_ZL6100=m
-CONFIG_SENSORS_PWM_FAN=m
-CONFIG_SENSORS_SHT15=m
-CONFIG_SENSORS_SHT21=m
-CONFIG_SENSORS_SHT3x=m
-CONFIG_SENSORS_SHTC1=m
-CONFIG_SENSORS_SIS5595=m
-CONFIG_SENSORS_DME1737=m
-CONFIG_SENSORS_EMC1403=m
-CONFIG_SENSORS_EMC2103=m
-CONFIG_SENSORS_EMC6W201=m
-CONFIG_SENSORS_SMSC47M1=m
-CONFIG_SENSORS_SMSC47M192=m
-CONFIG_SENSORS_SMSC47B397=m
-CONFIG_SENSORS_SCH56XX_COMMON=m
-CONFIG_SENSORS_SCH5627=m
-CONFIG_SENSORS_SCH5636=m
-CONFIG_SENSORS_STTS751=m
-CONFIG_SENSORS_SMM665=m
-CONFIG_SENSORS_ADC128D818=m
-CONFIG_SENSORS_ADS7828=m
-CONFIG_SENSORS_ADS7871=m
-CONFIG_SENSORS_AMC6821=m
-CONFIG_SENSORS_INA209=m
-CONFIG_SENSORS_INA2XX=m
-CONFIG_SENSORS_INA3221=m
-CONFIG_SENSORS_TC74=m
-CONFIG_SENSORS_THMC50=m
-CONFIG_SENSORS_TMP102=m
-CONFIG_SENSORS_TMP103=m
-CONFIG_SENSORS_TMP108=m
-CONFIG_SENSORS_TMP401=m
-CONFIG_SENSORS_TMP421=m
-CONFIG_SENSORS_TMP513=m
-CONFIG_SENSORS_VIA_CPUTEMP=m
-CONFIG_SENSORS_VIA686A=m
-CONFIG_SENSORS_VT1211=m
-CONFIG_SENSORS_VT8231=m
-CONFIG_SENSORS_W83773G=m
-CONFIG_SENSORS_W83781D=m
-CONFIG_SENSORS_W83791D=m
-CONFIG_SENSORS_W83792D=m
-CONFIG_SENSORS_W83793=m
-CONFIG_SENSORS_W83795=m
-# CONFIG_SENSORS_W83795_FANCTRL is not set
-CONFIG_SENSORS_W83L785TS=m
-CONFIG_SENSORS_W83L786NG=m
-CONFIG_SENSORS_W83627HF=m
-CONFIG_SENSORS_W83627EHF=m
-CONFIG_SENSORS_WM831X=m
-CONFIG_SENSORS_WM8350=m
-CONFIG_SENSORS_XGENE=m
-
-#
-# ACPI drivers
-#
-CONFIG_SENSORS_ACPI_POWER=m
-CONFIG_SENSORS_ATK0110=m
-CONFIG_THERMAL=y
-# CONFIG_THERMAL_STATISTICS is not set
-CONFIG_THERMAL_EMERGENCY_POWEROFF_DELAY_MS=100
-CONFIG_THERMAL_HWMON=y
-CONFIG_THERMAL_OF=y
-CONFIG_THERMAL_WRITABLE_TRIPS=y
-CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE=y
-# CONFIG_THERMAL_DEFAULT_GOV_FAIR_SHARE is not set
-# CONFIG_THERMAL_DEFAULT_GOV_USER_SPACE is not set
-# CONFIG_THERMAL_DEFAULT_GOV_POWER_ALLOCATOR is not set
-CONFIG_THERMAL_GOV_FAIR_SHARE=y
-CONFIG_THERMAL_GOV_STEP_WISE=y
-CONFIG_THERMAL_GOV_BANG_BANG=y
-CONFIG_THERMAL_GOV_USER_SPACE=y
-CONFIG_THERMAL_GOV_POWER_ALLOCATOR=y
-CONFIG_CPU_THERMAL=y
-CONFIG_CPU_FREQ_THERMAL=y
-CONFIG_CPU_IDLE_THERMAL=y
-CONFIG_CLOCK_THERMAL=y
-CONFIG_DEVFREQ_THERMAL=y
-# CONFIG_THERMAL_EMULATION is not set
-CONFIG_THERMAL_MMIO=m
-CONFIG_MAX77620_THERMAL=m
-CONFIG_QORIQ_THERMAL=m
-CONFIG_DA9062_THERMAL=m
-
-#
-# Intel thermal drivers
-#
-CONFIG_INTEL_POWERCLAMP=m
-CONFIG_X86_PKG_TEMP_THERMAL=m
-CONFIG_INTEL_SOC_DTS_IOSF_CORE=m
-CONFIG_INTEL_SOC_DTS_THERMAL=m
-
-#
-# ACPI INT340X thermal drivers
-#
-CONFIG_INT340X_THERMAL=m
-CONFIG_ACPI_THERMAL_REL=m
-CONFIG_INT3406_THERMAL=m
-CONFIG_PROC_THERMAL_MMIO_RAPL=y
-# end of ACPI INT340X thermal drivers
-
-CONFIG_INTEL_BXT_PMIC_THERMAL=m
-CONFIG_INTEL_PCH_THERMAL=m
-# end of Intel thermal drivers
-
-# CONFIG_TI_SOC_THERMAL is not set
-CONFIG_GENERIC_ADC_THERMAL=m
-CONFIG_WATCHDOG=y
-CONFIG_WATCHDOG_CORE=y
-# CONFIG_WATCHDOG_NOWAYOUT is not set
-CONFIG_WATCHDOG_HANDLE_BOOT_ENABLED=y
-CONFIG_WATCHDOG_OPEN_TIMEOUT=0
-CONFIG_WATCHDOG_SYSFS=y
-
-#
-# Watchdog Pretimeout Governors
-#
-CONFIG_WATCHDOG_PRETIMEOUT_GOV=y
-CONFIG_WATCHDOG_PRETIMEOUT_GOV_SEL=m
-CONFIG_WATCHDOG_PRETIMEOUT_GOV_NOOP=m
-CONFIG_WATCHDOG_PRETIMEOUT_GOV_PANIC=y
-# CONFIG_WATCHDOG_PRETIMEOUT_DEFAULT_GOV_NOOP is not set
-CONFIG_WATCHDOG_PRETIMEOUT_DEFAULT_GOV_PANIC=y
-
-#
-# Watchdog Device Drivers
-#
-CONFIG_SOFT_WATCHDOG=m
-# CONFIG_SOFT_WATCHDOG_PRETIMEOUT is not set
-CONFIG_BD70528_WATCHDOG=m
-CONFIG_DA9052_WATCHDOG=m
-CONFIG_DA9055_WATCHDOG=m
-CONFIG_DA9063_WATCHDOG=m
-CONFIG_DA9062_WATCHDOG=m
-CONFIG_GPIO_WATCHDOG=m
-CONFIG_MENF21BMC_WATCHDOG=m
-CONFIG_MENZ069_WATCHDOG=m
-CONFIG_WDAT_WDT=m
-CONFIG_WM831X_WATCHDOG=m
-CONFIG_WM8350_WATCHDOG=m
-CONFIG_XILINX_WATCHDOG=m
-CONFIG_ZIIRAVE_WATCHDOG=m
-CONFIG_RAVE_SP_WATCHDOG=m
-CONFIG_MLX_WDT=m
-CONFIG_CADENCE_WATCHDOG=m
-CONFIG_DW_WATCHDOG=m
-CONFIG_RN5T618_WATCHDOG=m
-CONFIG_TWL4030_WATCHDOG=m
-CONFIG_MAX63XX_WATCHDOG=m
-CONFIG_MAX77620_WATCHDOG=m
-CONFIG_RETU_WATCHDOG=m
-CONFIG_STPMIC1_WATCHDOG=m
-CONFIG_ACQUIRE_WDT=m
-CONFIG_ADVANTECH_WDT=m
-CONFIG_ALIM1535_WDT=m
-CONFIG_ALIM7101_WDT=m
-CONFIG_EBC_C384_WDT=m
-CONFIG_F71808E_WDT=m
-CONFIG_SP5100_TCO=m
-CONFIG_SBC_FITPC2_WATCHDOG=m
-CONFIG_EUROTECH_WDT=m
-CONFIG_IB700_WDT=m
-CONFIG_IBMASR=m
-CONFIG_WAFER_WDT=m
-CONFIG_I6300ESB_WDT=m
-CONFIG_IE6XX_WDT=m
-CONFIG_ITCO_WDT=m
-CONFIG_ITCO_VENDOR_SUPPORT=y
-CONFIG_IT8712F_WDT=m
-CONFIG_IT87_WDT=m
-CONFIG_HP_WATCHDOG=m
-CONFIG_HPWDT_NMI_DECODING=y
-CONFIG_KEMPLD_WDT=m
-CONFIG_SC1200_WDT=m
-CONFIG_PC87413_WDT=m
-CONFIG_NV_TCO=m
-CONFIG_60XX_WDT=m
-CONFIG_CPU5_WDT=m
-CONFIG_SMSC_SCH311X_WDT=m
-CONFIG_SMSC37B787_WDT=m
-CONFIG_TQMX86_WDT=m
-CONFIG_VIA_WDT=m
-CONFIG_W83627HF_WDT=m
-CONFIG_W83877F_WDT=m
-CONFIG_W83977F_WDT=m
-CONFIG_MACHZ_WDT=m
-CONFIG_SBC_EPX_C3_WATCHDOG=m
-CONFIG_INTEL_MEI_WDT=m
-CONFIG_NI903X_WDT=m
-CONFIG_NIC7018_WDT=m
-CONFIG_MEN_A21_WDT=m
-CONFIG_XEN_WDT=m
-
-#
-# PCI-based Watchdog Cards
-#
-CONFIG_PCIPCWATCHDOG=m
-CONFIG_WDTPCI=m
-
-#
-# USB-based Watchdog Cards
-#
-CONFIG_USBPCWATCHDOG=m
-CONFIG_SSB_POSSIBLE=y
-CONFIG_SSB=m
-CONFIG_SSB_SPROM=y
-CONFIG_SSB_BLOCKIO=y
-CONFIG_SSB_PCIHOST_POSSIBLE=y
-CONFIG_SSB_PCIHOST=y
-CONFIG_SSB_B43_PCI_BRIDGE=y
-CONFIG_SSB_PCMCIAHOST_POSSIBLE=y
-CONFIG_SSB_PCMCIAHOST=y
-CONFIG_SSB_SDIOHOST_POSSIBLE=y
-CONFIG_SSB_SDIOHOST=y
-CONFIG_SSB_DRIVER_PCICORE_POSSIBLE=y
-CONFIG_SSB_DRIVER_PCICORE=y
-CONFIG_SSB_DRIVER_GPIO=y
-CONFIG_BCMA_POSSIBLE=y
-CONFIG_BCMA=m
-CONFIG_BCMA_BLOCKIO=y
-CONFIG_BCMA_HOST_PCI_POSSIBLE=y
-CONFIG_BCMA_HOST_PCI=y
-# CONFIG_BCMA_HOST_SOC is not set
-CONFIG_BCMA_DRIVER_PCI=y
-CONFIG_BCMA_DRIVER_GMAC_CMN=y
-CONFIG_BCMA_DRIVER_GPIO=y
-# CONFIG_BCMA_DEBUG is not set
-
-#
-# Multifunction device drivers
-#
-CONFIG_MFD_CORE=y
-CONFIG_MFD_ACT8945A=m
-CONFIG_MFD_AS3711=y
-CONFIG_MFD_AS3722=m
-CONFIG_PMIC_ADP5520=y
-CONFIG_MFD_AAT2870_CORE=y
-CONFIG_MFD_ATMEL_FLEXCOM=m
-CONFIG_MFD_ATMEL_HLCDC=m
-CONFIG_MFD_BCM590XX=m
-CONFIG_MFD_BD9571MWV=m
-CONFIG_MFD_AXP20X=m
-CONFIG_MFD_AXP20X_I2C=m
-CONFIG_MFD_CROS_EC_DEV=m
-CONFIG_MFD_MADERA=m
-CONFIG_MFD_MADERA_I2C=m
-CONFIG_MFD_MADERA_SPI=m
-CONFIG_MFD_CS47L15=y
-CONFIG_MFD_CS47L35=y
-CONFIG_MFD_CS47L85=y
-CONFIG_MFD_CS47L90=y
-CONFIG_MFD_CS47L92=y
-CONFIG_PMIC_DA903X=y
-CONFIG_PMIC_DA9052=y
-CONFIG_MFD_DA9052_SPI=y
-CONFIG_MFD_DA9052_I2C=y
-CONFIG_MFD_DA9055=y
-CONFIG_MFD_DA9062=m
-CONFIG_MFD_DA9063=m
-CONFIG_MFD_DA9150=m
-CONFIG_MFD_DLN2=m
-CONFIG_MFD_MC13XXX=m
-CONFIG_MFD_MC13XXX_SPI=m
-CONFIG_MFD_MC13XXX_I2C=m
-CONFIG_MFD_HI6421_PMIC=m
-CONFIG_HTC_PASIC3=m
-CONFIG_HTC_I2CPLD=y
-CONFIG_MFD_INTEL_QUARK_I2C_GPIO=m
-CONFIG_LPC_ICH=m
-CONFIG_LPC_SCH=m
-CONFIG_INTEL_SOC_PMIC=y
-CONFIG_INTEL_SOC_PMIC_BXTWC=m
-CONFIG_INTEL_SOC_PMIC_CHTWC=y
-CONFIG_INTEL_SOC_PMIC_CHTDC_TI=m
-CONFIG_MFD_INTEL_LPSS=m
-CONFIG_MFD_INTEL_LPSS_ACPI=m
-CONFIG_MFD_INTEL_LPSS_PCI=m
-CONFIG_MFD_IQS62X=m
-CONFIG_MFD_JANZ_CMODIO=m
-CONFIG_MFD_KEMPLD=m
-CONFIG_MFD_88PM800=m
-CONFIG_MFD_88PM805=m
-CONFIG_MFD_88PM860X=y
-CONFIG_MFD_MAX14577=m
-CONFIG_MFD_MAX77620=y
-CONFIG_MFD_MAX77650=m
-CONFIG_MFD_MAX77686=m
-CONFIG_MFD_MAX77693=m
-CONFIG_MFD_MAX77843=y
-CONFIG_MFD_MAX8907=m
-CONFIG_MFD_MAX8925=y
-CONFIG_MFD_MAX8997=y
-CONFIG_MFD_MAX8998=y
-CONFIG_MFD_MT6397=m
-CONFIG_MFD_MENF21BMC=m
-CONFIG_EZX_PCAP=y
-CONFIG_MFD_CPCAP=m
-CONFIG_MFD_VIPERBOARD=m
-CONFIG_MFD_RETU=m
-CONFIG_MFD_PCF50633=m
-CONFIG_PCF50633_ADC=m
-CONFIG_PCF50633_GPIO=m
-CONFIG_UCB1400_CORE=m
-CONFIG_MFD_RDC321X=m
-CONFIG_MFD_RT5033=m
-CONFIG_MFD_RC5T583=y
-CONFIG_MFD_RK808=m
-CONFIG_MFD_RN5T618=m
-CONFIG_MFD_SEC_CORE=y
-CONFIG_MFD_SI476X_CORE=m
-CONFIG_MFD_SM501=m
-CONFIG_MFD_SM501_GPIO=y
-CONFIG_MFD_SKY81452=m
-CONFIG_MFD_SMSC=y
-CONFIG_ABX500_CORE=y
-CONFIG_AB3100_CORE=y
-CONFIG_AB3100_OTP=y
-CONFIG_MFD_STMPE=y
-
-#
-# STMicroelectronics STMPE Interface Drivers
-#
-CONFIG_STMPE_I2C=y
-CONFIG_STMPE_SPI=y
-# end of STMicroelectronics STMPE Interface Drivers
-
-CONFIG_MFD_SYSCON=y
-CONFIG_MFD_TI_AM335X_TSCADC=m
-CONFIG_MFD_LP3943=m
-CONFIG_MFD_LP8788=y
-CONFIG_MFD_TI_LMU=m
-CONFIG_MFD_PALMAS=y
-CONFIG_TPS6105X=m
-CONFIG_TPS65010=m
-CONFIG_TPS6507X=m
-CONFIG_MFD_TPS65086=m
-CONFIG_MFD_TPS65090=y
-CONFIG_MFD_TPS65217=m
-CONFIG_MFD_TPS68470=y
-CONFIG_MFD_TI_LP873X=m
-CONFIG_MFD_TI_LP87565=m
-CONFIG_MFD_TPS65218=m
-CONFIG_MFD_TPS6586X=y
-CONFIG_MFD_TPS65910=y
-CONFIG_MFD_TPS65912=m
-CONFIG_MFD_TPS65912_I2C=m
-CONFIG_MFD_TPS65912_SPI=m
-CONFIG_MFD_TPS80031=y
-CONFIG_TWL4030_CORE=y
-CONFIG_MFD_TWL4030_AUDIO=y
-CONFIG_TWL6040_CORE=y
-CONFIG_MFD_WL1273_CORE=m
-CONFIG_MFD_LM3533=m
-CONFIG_MFD_TC3589X=y
-CONFIG_MFD_TQMX86=m
-CONFIG_MFD_VX855=m
-CONFIG_MFD_LOCHNAGAR=y
-CONFIG_MFD_ARIZONA=y
-CONFIG_MFD_ARIZONA_I2C=m
-CONFIG_MFD_ARIZONA_SPI=m
-CONFIG_MFD_CS47L24=y
-CONFIG_MFD_WM5102=y
-CONFIG_MFD_WM5110=y
-CONFIG_MFD_WM8997=y
-CONFIG_MFD_WM8998=y
-CONFIG_MFD_WM8400=y
-CONFIG_MFD_WM831X=y
-CONFIG_MFD_WM831X_I2C=y
-CONFIG_MFD_WM831X_SPI=y
-CONFIG_MFD_WM8350=y
-CONFIG_MFD_WM8350_I2C=y
-CONFIG_MFD_WM8994=m
-CONFIG_MFD_ROHM_BD718XX=m
-CONFIG_MFD_ROHM_BD70528=m
-CONFIG_MFD_ROHM_BD71828=m
-CONFIG_MFD_STPMIC1=m
-CONFIG_MFD_STMFX=m
-CONFIG_MFD_WCD934X=m
-CONFIG_RAVE_SP_CORE=m
-# end of Multifunction device drivers
-
-CONFIG_REGULATOR=y
-# CONFIG_REGULATOR_DEBUG is not set
-CONFIG_REGULATOR_FIXED_VOLTAGE=m
-CONFIG_REGULATOR_VIRTUAL_CONSUMER=m
-CONFIG_REGULATOR_USERSPACE_CONSUMER=m
-CONFIG_REGULATOR_88PG86X=m
-CONFIG_REGULATOR_88PM800=m
-CONFIG_REGULATOR_88PM8607=m
-CONFIG_REGULATOR_ACT8865=m
-CONFIG_REGULATOR_ACT8945A=m
-CONFIG_REGULATOR_AD5398=m
-CONFIG_REGULATOR_AAT2870=m
-CONFIG_REGULATOR_AB3100=m
-CONFIG_REGULATOR_ARIZONA_LDO1=m
-CONFIG_REGULATOR_ARIZONA_MICSUPP=m
-CONFIG_REGULATOR_AS3711=m
-CONFIG_REGULATOR_AS3722=m
-CONFIG_REGULATOR_AXP20X=m
-CONFIG_REGULATOR_BCM590XX=m
-CONFIG_REGULATOR_BD70528=m
-CONFIG_REGULATOR_BD71828=m
-CONFIG_REGULATOR_BD718XX=m
-CONFIG_REGULATOR_BD9571MWV=m
-CONFIG_REGULATOR_CPCAP=m
-CONFIG_REGULATOR_DA903X=m
-CONFIG_REGULATOR_DA9052=m
-CONFIG_REGULATOR_DA9055=m
-CONFIG_REGULATOR_DA9062=m
-CONFIG_REGULATOR_DA9063=m
-CONFIG_REGULATOR_DA9210=m
-CONFIG_REGULATOR_DA9211=m
-CONFIG_REGULATOR_FAN53555=m
-CONFIG_REGULATOR_GPIO=m
-CONFIG_REGULATOR_HI6421=m
-CONFIG_REGULATOR_HI6421V530=m
-CONFIG_REGULATOR_ISL9305=m
-CONFIG_REGULATOR_ISL6271A=m
-CONFIG_REGULATOR_LM363X=m
-CONFIG_REGULATOR_LOCHNAGAR=m
-CONFIG_REGULATOR_LP3971=m
-CONFIG_REGULATOR_LP3972=m
-CONFIG_REGULATOR_LP872X=m
-CONFIG_REGULATOR_LP873X=m
-CONFIG_REGULATOR_LP8755=m
-CONFIG_REGULATOR_LP87565=m
-CONFIG_REGULATOR_LP8788=m
-CONFIG_REGULATOR_LTC3589=m
-CONFIG_REGULATOR_LTC3676=m
-CONFIG_REGULATOR_MAX14577=m
-CONFIG_REGULATOR_MAX1586=m
-CONFIG_REGULATOR_MAX77620=m
-CONFIG_REGULATOR_MAX77650=m
-CONFIG_REGULATOR_MAX8649=m
-CONFIG_REGULATOR_MAX8660=m
-CONFIG_REGULATOR_MAX8907=m
-CONFIG_REGULATOR_MAX8925=m
-CONFIG_REGULATOR_MAX8952=m
-CONFIG_REGULATOR_MAX8973=m
-CONFIG_REGULATOR_MAX8997=m
-CONFIG_REGULATOR_MAX8998=m
-CONFIG_REGULATOR_MAX77686=m
-CONFIG_REGULATOR_MAX77693=m
-CONFIG_REGULATOR_MAX77802=m
-CONFIG_REGULATOR_MC13XXX_CORE=m
-CONFIG_REGULATOR_MC13783=m
-CONFIG_REGULATOR_MC13892=m
-CONFIG_REGULATOR_MCP16502=m
-CONFIG_REGULATOR_MP5416=m
-CONFIG_REGULATOR_MP8859=m
-CONFIG_REGULATOR_MP886X=m
-CONFIG_REGULATOR_MPQ7920=m
-CONFIG_REGULATOR_MT6311=m
-CONFIG_REGULATOR_MT6323=m
-CONFIG_REGULATOR_MT6397=m
-CONFIG_REGULATOR_PALMAS=m
-CONFIG_REGULATOR_PCAP=m
-CONFIG_REGULATOR_PCF50633=m
-CONFIG_REGULATOR_PFUZE100=m
-CONFIG_REGULATOR_PV88060=m
-CONFIG_REGULATOR_PV88080=m
-CONFIG_REGULATOR_PV88090=m
-CONFIG_REGULATOR_PWM=m
-CONFIG_REGULATOR_QCOM_SPMI=m
-CONFIG_REGULATOR_RC5T583=m
-CONFIG_REGULATOR_RK808=m
-CONFIG_REGULATOR_RN5T618=m
-CONFIG_REGULATOR_ROHM=m
-CONFIG_REGULATOR_RT5033=m
-CONFIG_REGULATOR_S2MPA01=m
-CONFIG_REGULATOR_S2MPS11=m
-CONFIG_REGULATOR_S5M8767=m
-CONFIG_REGULATOR_SKY81452=m
-CONFIG_REGULATOR_SLG51000=m
-CONFIG_REGULATOR_STPMIC1=m
-CONFIG_REGULATOR_SY8106A=m
-CONFIG_REGULATOR_SY8824X=m
-CONFIG_REGULATOR_TPS51632=m
-CONFIG_REGULATOR_TPS6105X=m
-CONFIG_REGULATOR_TPS62360=m
-CONFIG_REGULATOR_TPS65023=m
-CONFIG_REGULATOR_TPS6507X=m
-CONFIG_REGULATOR_TPS65086=m
-CONFIG_REGULATOR_TPS65090=m
-CONFIG_REGULATOR_TPS65132=m
-CONFIG_REGULATOR_TPS65217=m
-CONFIG_REGULATOR_TPS65218=m
-CONFIG_REGULATOR_TPS6524X=m
-CONFIG_REGULATOR_TPS6586X=m
-CONFIG_REGULATOR_TPS65910=m
-CONFIG_REGULATOR_TPS65912=m
-CONFIG_REGULATOR_TPS80031=m
-CONFIG_REGULATOR_TWL4030=m
-CONFIG_REGULATOR_VCTRL=m
-CONFIG_REGULATOR_WM831X=m
-CONFIG_REGULATOR_WM8350=m
-CONFIG_REGULATOR_WM8400=m
-CONFIG_REGULATOR_WM8994=m
-CONFIG_CEC_CORE=m
-CONFIG_CEC_NOTIFIER=y
-CONFIG_CEC_PIN=y
-CONFIG_RC_CORE=m
-CONFIG_RC_MAP=m
-CONFIG_LIRC=y
-CONFIG_RC_DECODERS=y
-CONFIG_IR_NEC_DECODER=m
-CONFIG_IR_RC5_DECODER=m
-CONFIG_IR_RC6_DECODER=m
-CONFIG_IR_JVC_DECODER=m
-CONFIG_IR_SONY_DECODER=m
-CONFIG_IR_SANYO_DECODER=m
-CONFIG_IR_SHARP_DECODER=m
-CONFIG_IR_MCE_KBD_DECODER=m
-CONFIG_IR_XMP_DECODER=m
-CONFIG_IR_IMON_DECODER=m
-CONFIG_IR_RCMM_DECODER=m
-CONFIG_RC_DEVICES=y
-CONFIG_RC_ATI_REMOTE=m
-CONFIG_IR_ENE=m
-CONFIG_IR_HIX5HD2=m
-CONFIG_IR_IMON=m
-CONFIG_IR_IMON_RAW=m
-CONFIG_IR_MCEUSB=m
-CONFIG_IR_ITE_CIR=m
-CONFIG_IR_FINTEK=m
-CONFIG_IR_NUVOTON=m
-CONFIG_IR_REDRAT3=m
-CONFIG_IR_SPI=m
-CONFIG_IR_STREAMZAP=m
-CONFIG_IR_WINBOND_CIR=m
-CONFIG_IR_IGORPLUGUSB=m
-CONFIG_IR_IGUANA=m
-CONFIG_IR_TTUSBIR=m
-CONFIG_RC_LOOPBACK=m
-CONFIG_IR_GPIO_CIR=m
-CONFIG_IR_GPIO_TX=m
-CONFIG_IR_PWM_TX=m
-CONFIG_IR_SERIAL=m
-CONFIG_IR_SERIAL_TRANSMITTER=y
-CONFIG_IR_SIR=m
-CONFIG_RC_XBOX_DVD=m
-CONFIG_MEDIA_SUPPORT=m
-
-#
-# Multimedia core support
-#
-CONFIG_MEDIA_CAMERA_SUPPORT=y
-CONFIG_MEDIA_ANALOG_TV_SUPPORT=y
-CONFIG_MEDIA_DIGITAL_TV_SUPPORT=y
-CONFIG_MEDIA_RADIO_SUPPORT=y
-CONFIG_MEDIA_SDR_SUPPORT=y
-CONFIG_MEDIA_CEC_SUPPORT=y
-CONFIG_MEDIA_CEC_RC=y
-# CONFIG_CEC_PIN_ERROR_INJ is not set
-CONFIG_MEDIA_CONTROLLER=y
-CONFIG_MEDIA_CONTROLLER_DVB=y
-# CONFIG_MEDIA_CONTROLLER_REQUEST_API is not set
-CONFIG_VIDEO_DEV=m
-CONFIG_VIDEO_V4L2_SUBDEV_API=y
-CONFIG_VIDEO_V4L2=m
-CONFIG_VIDEO_V4L2_I2C=y
-# CONFIG_VIDEO_ADV_DEBUG is not set
-# CONFIG_VIDEO_FIXED_MINOR_RANGES is not set
-CONFIG_VIDEO_TUNER=m
-CONFIG_V4L2_MEM2MEM_DEV=m
-CONFIG_V4L2_FLASH_LED_CLASS=m
-CONFIG_V4L2_FWNODE=m
-CONFIG_VIDEOBUF_GEN=m
-CONFIG_VIDEOBUF_DMA_SG=m
-CONFIG_VIDEOBUF_VMALLOC=m
-CONFIG_DVB_CORE=m
-CONFIG_DVB_MMAP=y
-CONFIG_DVB_NET=y
-CONFIG_TTPCI_EEPROM=m
-CONFIG_DVB_MAX_ADAPTERS=16
-# CONFIG_DVB_DYNAMIC_MINORS is not set
-# CONFIG_DVB_DEMUX_SECTION_LOSS_LOG is not set
-# CONFIG_DVB_ULE_DEBUG is not set
-
-#
-# Media drivers
-#
-CONFIG_MEDIA_USB_SUPPORT=y
-
-#
-# Webcam devices
-#
-CONFIG_USB_VIDEO_CLASS=m
-CONFIG_USB_VIDEO_CLASS_INPUT_EVDEV=y
-CONFIG_USB_GSPCA=m
-CONFIG_USB_M5602=m
-CONFIG_USB_STV06XX=m
-CONFIG_USB_GL860=m
-CONFIG_USB_GSPCA_BENQ=m
-CONFIG_USB_GSPCA_CONEX=m
-CONFIG_USB_GSPCA_CPIA1=m
-CONFIG_USB_GSPCA_DTCS033=m
-CONFIG_USB_GSPCA_ETOMS=m
-CONFIG_USB_GSPCA_FINEPIX=m
-CONFIG_USB_GSPCA_JEILINJ=m
-CONFIG_USB_GSPCA_JL2005BCD=m
-CONFIG_USB_GSPCA_KINECT=m
-CONFIG_USB_GSPCA_KONICA=m
-CONFIG_USB_GSPCA_MARS=m
-CONFIG_USB_GSPCA_MR97310A=m
-CONFIG_USB_GSPCA_NW80X=m
-CONFIG_USB_GSPCA_OV519=m
-CONFIG_USB_GSPCA_OV534=m
-CONFIG_USB_GSPCA_OV534_9=m
-CONFIG_USB_GSPCA_PAC207=m
-CONFIG_USB_GSPCA_PAC7302=m
-CONFIG_USB_GSPCA_PAC7311=m
-CONFIG_USB_GSPCA_SE401=m
-CONFIG_USB_GSPCA_SN9C2028=m
-CONFIG_USB_GSPCA_SN9C20X=m
-CONFIG_USB_GSPCA_SONIXB=m
-CONFIG_USB_GSPCA_SONIXJ=m
-CONFIG_USB_GSPCA_SPCA500=m
-CONFIG_USB_GSPCA_SPCA501=m
-CONFIG_USB_GSPCA_SPCA505=m
-CONFIG_USB_GSPCA_SPCA506=m
-CONFIG_USB_GSPCA_SPCA508=m
-CONFIG_USB_GSPCA_SPCA561=m
-CONFIG_USB_GSPCA_SPCA1528=m
-CONFIG_USB_GSPCA_SQ905=m
-CONFIG_USB_GSPCA_SQ905C=m
-CONFIG_USB_GSPCA_SQ930X=m
-CONFIG_USB_GSPCA_STK014=m
-CONFIG_USB_GSPCA_STK1135=m
-CONFIG_USB_GSPCA_STV0680=m
-CONFIG_USB_GSPCA_SUNPLUS=m
-CONFIG_USB_GSPCA_T613=m
-CONFIG_USB_GSPCA_TOPRO=m
-CONFIG_USB_GSPCA_TOUPTEK=m
-CONFIG_USB_GSPCA_TV8532=m
-CONFIG_USB_GSPCA_VC032X=m
-CONFIG_USB_GSPCA_VICAM=m
-CONFIG_USB_GSPCA_XIRLINK_CIT=m
-CONFIG_USB_GSPCA_ZC3XX=m
-CONFIG_USB_PWC=m
-# CONFIG_USB_PWC_DEBUG is not set
-CONFIG_USB_PWC_INPUT_EVDEV=y
-CONFIG_VIDEO_CPIA2=m
-CONFIG_USB_ZR364XX=m
-CONFIG_USB_STKWEBCAM=m
-CONFIG_USB_S2255=m
-CONFIG_VIDEO_USBTV=m
-
-#
-# Analog TV USB devices
-#
-CONFIG_VIDEO_PVRUSB2=m
-CONFIG_VIDEO_PVRUSB2_SYSFS=y
-CONFIG_VIDEO_PVRUSB2_DVB=y
-# CONFIG_VIDEO_PVRUSB2_DEBUGIFC is not set
-CONFIG_VIDEO_HDPVR=m
-CONFIG_VIDEO_STK1160_COMMON=m
-CONFIG_VIDEO_STK1160=m
-CONFIG_VIDEO_GO7007=m
-CONFIG_VIDEO_GO7007_USB=m
-CONFIG_VIDEO_GO7007_LOADER=m
-CONFIG_VIDEO_GO7007_USB_S2250_BOARD=m
-
-#
-# Analog/digital TV USB devices
-#
-CONFIG_VIDEO_AU0828=m
-CONFIG_VIDEO_AU0828_V4L2=y
-CONFIG_VIDEO_AU0828_RC=y
-CONFIG_VIDEO_CX231XX=m
-CONFIG_VIDEO_CX231XX_RC=y
-CONFIG_VIDEO_CX231XX_ALSA=m
-CONFIG_VIDEO_CX231XX_DVB=m
-CONFIG_VIDEO_TM6000=m
-CONFIG_VIDEO_TM6000_ALSA=m
-CONFIG_VIDEO_TM6000_DVB=m
-
-#
-# Digital TV USB devices
-#
-CONFIG_DVB_USB=m
-# CONFIG_DVB_USB_DEBUG is not set
-CONFIG_DVB_USB_DIB3000MC=m
-CONFIG_DVB_USB_A800=m
-CONFIG_DVB_USB_DIBUSB_MB=m
-CONFIG_DVB_USB_DIBUSB_MB_FAULTY=y
-CONFIG_DVB_USB_DIBUSB_MC=m
-CONFIG_DVB_USB_DIB0700=m
-CONFIG_DVB_USB_UMT_010=m
-CONFIG_DVB_USB_CXUSB=m
-CONFIG_DVB_USB_CXUSB_ANALOG=y
-CONFIG_DVB_USB_M920X=m
-CONFIG_DVB_USB_DIGITV=m
-CONFIG_DVB_USB_VP7045=m
-CONFIG_DVB_USB_VP702X=m
-CONFIG_DVB_USB_GP8PSK=m
-CONFIG_DVB_USB_NOVA_T_USB2=m
-CONFIG_DVB_USB_TTUSB2=m
-CONFIG_DVB_USB_DTT200U=m
-CONFIG_DVB_USB_OPERA1=m
-CONFIG_DVB_USB_AF9005=m
-CONFIG_DVB_USB_AF9005_REMOTE=m
-CONFIG_DVB_USB_PCTV452E=m
-CONFIG_DVB_USB_DW2102=m
-CONFIG_DVB_USB_CINERGY_T2=m
-CONFIG_DVB_USB_DTV5100=m
-CONFIG_DVB_USB_AZ6027=m
-CONFIG_DVB_USB_TECHNISAT_USB2=m
-CONFIG_DVB_USB_V2=m
-CONFIG_DVB_USB_AF9015=m
-CONFIG_DVB_USB_AF9035=m
-CONFIG_DVB_USB_ANYSEE=m
-CONFIG_DVB_USB_AU6610=m
-CONFIG_DVB_USB_AZ6007=m
-CONFIG_DVB_USB_CE6230=m
-CONFIG_DVB_USB_EC168=m
-CONFIG_DVB_USB_GL861=m
-CONFIG_DVB_USB_LME2510=m
-CONFIG_DVB_USB_MXL111SF=m
-CONFIG_DVB_USB_RTL28XXU=m
-CONFIG_DVB_USB_DVBSKY=m
-CONFIG_DVB_USB_ZD1301=m
-CONFIG_DVB_TTUSB_BUDGET=m
-CONFIG_DVB_TTUSB_DEC=m
-CONFIG_SMS_USB_DRV=m
-CONFIG_DVB_B2C2_FLEXCOP_USB=m
-# CONFIG_DVB_B2C2_FLEXCOP_USB_DEBUG is not set
-CONFIG_DVB_AS102=m
-
-#
-# Webcam, TV (analog/digital) USB devices
-#
-CONFIG_VIDEO_EM28XX=m
-CONFIG_VIDEO_EM28XX_V4L2=m
-CONFIG_VIDEO_EM28XX_ALSA=m
-CONFIG_VIDEO_EM28XX_DVB=m
-CONFIG_VIDEO_EM28XX_RC=m
-
-#
-# Software defined radio USB devices
-#
-CONFIG_USB_AIRSPY=m
-CONFIG_USB_HACKRF=m
-CONFIG_USB_MSI2500=m
-
-#
-# USB HDMI CEC adapters
-#
-CONFIG_USB_PULSE8_CEC=m
-CONFIG_USB_RAINSHADOW_CEC=m
-CONFIG_MEDIA_PCI_SUPPORT=y
-
-#
-# Media capture support
-#
-CONFIG_VIDEO_MEYE=m
-CONFIG_VIDEO_SOLO6X10=m
-CONFIG_VIDEO_TW5864=m
-CONFIG_VIDEO_TW68=m
-CONFIG_VIDEO_TW686X=m
-
-#
-# Media capture/analog TV support
-#
-CONFIG_VIDEO_IVTV=m
-# CONFIG_VIDEO_IVTV_DEPRECATED_IOCTLS is not set
-CONFIG_VIDEO_IVTV_ALSA=m
-CONFIG_VIDEO_FB_IVTV=m
-# CONFIG_VIDEO_FB_IVTV_FORCE_PAT is not set
-CONFIG_VIDEO_HEXIUM_GEMINI=m
-CONFIG_VIDEO_HEXIUM_ORION=m
-CONFIG_VIDEO_MXB=m
-CONFIG_VIDEO_DT3155=m
-
-#
-# Media capture/analog/hybrid TV support
-#
-CONFIG_VIDEO_CX18=m
-CONFIG_VIDEO_CX18_ALSA=m
-CONFIG_VIDEO_CX23885=m
-CONFIG_MEDIA_ALTERA_CI=m
-CONFIG_VIDEO_CX25821=m
-CONFIG_VIDEO_CX25821_ALSA=m
-CONFIG_VIDEO_CX88=m
-CONFIG_VIDEO_CX88_ALSA=m
-CONFIG_VIDEO_CX88_BLACKBIRD=m
-CONFIG_VIDEO_CX88_DVB=m
-CONFIG_VIDEO_CX88_ENABLE_VP3054=y
-CONFIG_VIDEO_CX88_VP3054=m
-CONFIG_VIDEO_CX88_MPEG=m
-CONFIG_VIDEO_BT848=m
-CONFIG_DVB_BT8XX=m
-CONFIG_VIDEO_SAA7134=m
-CONFIG_VIDEO_SAA7134_ALSA=m
-CONFIG_VIDEO_SAA7134_RC=y
-CONFIG_VIDEO_SAA7134_DVB=m
-CONFIG_VIDEO_SAA7134_GO7007=m
-CONFIG_VIDEO_SAA7164=m
-
-#
-# Media digital TV PCI Adapters
-#
-CONFIG_DVB_AV7110_IR=y
-CONFIG_DVB_AV7110=m
-CONFIG_DVB_AV7110_OSD=y
-CONFIG_DVB_BUDGET_CORE=m
-CONFIG_DVB_BUDGET=m
-CONFIG_DVB_BUDGET_CI=m
-CONFIG_DVB_BUDGET_AV=m
-CONFIG_DVB_BUDGET_PATCH=m
-CONFIG_DVB_B2C2_FLEXCOP_PCI=m
-# CONFIG_DVB_B2C2_FLEXCOP_PCI_DEBUG is not set
-CONFIG_DVB_PLUTO2=m
-CONFIG_DVB_DM1105=m
-CONFIG_DVB_PT1=m
-CONFIG_DVB_PT3=m
-CONFIG_MANTIS_CORE=m
-CONFIG_DVB_MANTIS=m
-CONFIG_DVB_HOPPER=m
-CONFIG_DVB_NGENE=m
-CONFIG_DVB_DDBRIDGE=m
-# CONFIG_DVB_DDBRIDGE_MSIENABLE is not set
-CONFIG_DVB_SMIPCIE=m
-CONFIG_DVB_NETUP_UNIDVB=m
-CONFIG_VIDEO_IPU3_CIO2=m
-CONFIG_V4L_PLATFORM_DRIVERS=y
-CONFIG_VIDEO_CAFE_CCIC=m
-CONFIG_VIDEO_CADENCE=y
-CONFIG_VIDEO_CADENCE_CSI2RX=m
-CONFIG_VIDEO_CADENCE_CSI2TX=m
-CONFIG_VIDEO_ASPEED=m
-CONFIG_VIDEO_MUX=m
-CONFIG_VIDEO_XILINX=m
-CONFIG_VIDEO_XILINX_TPG=m
-CONFIG_VIDEO_XILINX_VTC=m
-CONFIG_V4L_MEM2MEM_DRIVERS=y
-CONFIG_VIDEO_MEM2MEM_DEINTERLACE=m
-CONFIG_VIDEO_SH_VEU=m
-CONFIG_V4L_TEST_DRIVERS=y
-CONFIG_VIDEO_VIMC=m
-CONFIG_VIDEO_VIVID=m
-CONFIG_VIDEO_VIVID_CEC=y
-CONFIG_VIDEO_VIVID_MAX_DEVS=64
-CONFIG_VIDEO_VIM2M=m
-CONFIG_VIDEO_VICODEC=m
-CONFIG_DVB_PLATFORM_DRIVERS=y
-CONFIG_CEC_PLATFORM_DRIVERS=y
-CONFIG_VIDEO_CROS_EC_CEC=m
-CONFIG_CEC_GPIO=m
-CONFIG_VIDEO_SECO_CEC=m
-CONFIG_VIDEO_SECO_RC=y
-CONFIG_SDR_PLATFORM_DRIVERS=y
-
-#
-# Supported MMC/SDIO adapters
-#
-CONFIG_SMS_SDIO_DRV=m
-CONFIG_RADIO_ADAPTERS=y
-CONFIG_RADIO_TEA575X=m
-CONFIG_RADIO_SI470X=m
-CONFIG_USB_SI470X=m
-CONFIG_I2C_SI470X=m
-CONFIG_RADIO_SI4713=m
-CONFIG_USB_SI4713=m
-CONFIG_PLATFORM_SI4713=m
-CONFIG_I2C_SI4713=m
-CONFIG_RADIO_SI476X=m
-CONFIG_USB_MR800=m
-CONFIG_USB_DSBR=m
-CONFIG_RADIO_MAXIRADIO=m
-CONFIG_RADIO_SHARK=m
-CONFIG_RADIO_SHARK2=m
-CONFIG_USB_KEENE=m
-CONFIG_USB_RAREMONO=m
-CONFIG_USB_MA901=m
-CONFIG_RADIO_TEA5764=m
-CONFIG_RADIO_SAA7706H=m
-CONFIG_RADIO_TEF6862=m
-CONFIG_RADIO_WL1273=m
-
-#
-# Texas Instruments WL128x FM driver (ST based)
-#
-CONFIG_RADIO_WL128X=m
-# end of Texas Instruments WL128x FM driver (ST based)
-
-#
-# Supported FireWire (IEEE 1394) Adapters
-#
-CONFIG_DVB_FIREDTV=m
-CONFIG_DVB_FIREDTV_INPUT=y
-CONFIG_MEDIA_COMMON_OPTIONS=y
-
-#
-# common driver options
-#
-CONFIG_VIDEO_CX2341X=m
-CONFIG_VIDEO_TVEEPROM=m
-CONFIG_CYPRESS_FIRMWARE=m
-CONFIG_VIDEOBUF2_CORE=m
-CONFIG_VIDEOBUF2_V4L2=m
-CONFIG_VIDEOBUF2_MEMOPS=m
-CONFIG_VIDEOBUF2_DMA_CONTIG=m
-CONFIG_VIDEOBUF2_VMALLOC=m
-CONFIG_VIDEOBUF2_DMA_SG=m
-CONFIG_VIDEOBUF2_DVB=m
-CONFIG_DVB_B2C2_FLEXCOP=m
-CONFIG_VIDEO_SAA7146=m
-CONFIG_VIDEO_SAA7146_VV=m
-CONFIG_SMS_SIANO_MDTV=m
-CONFIG_SMS_SIANO_RC=y
-# CONFIG_SMS_SIANO_DEBUGFS is not set
-CONFIG_VIDEO_V4L2_TPG=m
-
-#
-# Media ancillary drivers (tuners, sensors, i2c, spi, frontends)
-#
-CONFIG_MEDIA_SUBDRV_AUTOSELECT=y
-CONFIG_MEDIA_ATTACH=y
-CONFIG_VIDEO_IR_I2C=m
-
-#
-# I2C Encoders, decoders, sensors and other helper chips
-#
-
-#
-# Audio decoders, processors and mixers
-#
-CONFIG_VIDEO_TVAUDIO=m
-CONFIG_VIDEO_TDA7432=m
-CONFIG_VIDEO_TDA9840=m
-CONFIG_VIDEO_TDA1997X=m
-CONFIG_VIDEO_TEA6415C=m
-CONFIG_VIDEO_TEA6420=m
-CONFIG_VIDEO_MSP3400=m
-CONFIG_VIDEO_CS3308=m
-CONFIG_VIDEO_CS5345=m
-CONFIG_VIDEO_CS53L32A=m
-CONFIG_VIDEO_TLV320AIC23B=m
-CONFIG_VIDEO_UDA1342=m
-CONFIG_VIDEO_WM8775=m
-CONFIG_VIDEO_WM8739=m
-CONFIG_VIDEO_VP27SMPX=m
-CONFIG_VIDEO_SONY_BTF_MPX=m
-
-#
-# RDS decoders
-#
-CONFIG_VIDEO_SAA6588=m
-
-#
-# Video decoders
-#
-CONFIG_VIDEO_ADV7180=m
-CONFIG_VIDEO_ADV7183=m
-CONFIG_VIDEO_ADV748X=m
-CONFIG_VIDEO_ADV7604=m
-CONFIG_VIDEO_ADV7604_CEC=y
-CONFIG_VIDEO_ADV7842=m
-CONFIG_VIDEO_ADV7842_CEC=y
-CONFIG_VIDEO_BT819=m
-CONFIG_VIDEO_BT856=m
-CONFIG_VIDEO_BT866=m
-CONFIG_VIDEO_KS0127=m
-CONFIG_VIDEO_ML86V7667=m
-CONFIG_VIDEO_SAA7110=m
-CONFIG_VIDEO_SAA711X=m
-CONFIG_VIDEO_TC358743=m
-CONFIG_VIDEO_TC358743_CEC=y
-CONFIG_VIDEO_TVP514X=m
-CONFIG_VIDEO_TVP5150=m
-CONFIG_VIDEO_TVP7002=m
-CONFIG_VIDEO_TW2804=m
-CONFIG_VIDEO_TW9903=m
-CONFIG_VIDEO_TW9906=m
-CONFIG_VIDEO_TW9910=m
-CONFIG_VIDEO_VPX3220=m
-
-#
-# Video and audio decoders
-#
-CONFIG_VIDEO_SAA717X=m
-CONFIG_VIDEO_CX25840=m
-
-#
-# Video encoders
-#
-CONFIG_VIDEO_SAA7127=m
-CONFIG_VIDEO_SAA7185=m
-CONFIG_VIDEO_ADV7170=m
-CONFIG_VIDEO_ADV7175=m
-CONFIG_VIDEO_ADV7343=m
-CONFIG_VIDEO_ADV7393=m
-CONFIG_VIDEO_AD9389B=m
-CONFIG_VIDEO_AK881X=m
-CONFIG_VIDEO_THS8200=m
-
-#
-# Camera sensor devices
-#
-CONFIG_VIDEO_APTINA_PLL=m
-CONFIG_VIDEO_SMIAPP_PLL=m
-CONFIG_VIDEO_HI556=m
-CONFIG_VIDEO_IMX214=m
-CONFIG_VIDEO_IMX219=m
-CONFIG_VIDEO_IMX258=m
-CONFIG_VIDEO_IMX274=m
-CONFIG_VIDEO_IMX290=m
-CONFIG_VIDEO_IMX319=m
-CONFIG_VIDEO_IMX355=m
-CONFIG_VIDEO_OV2640=m
-CONFIG_VIDEO_OV2659=m
-CONFIG_VIDEO_OV2680=m
-CONFIG_VIDEO_OV2685=m
-CONFIG_VIDEO_OV5640=m
-CONFIG_VIDEO_OV5645=m
-CONFIG_VIDEO_OV5647=m
-CONFIG_VIDEO_OV6650=m
-CONFIG_VIDEO_OV5670=m
-CONFIG_VIDEO_OV5675=m
-CONFIG_VIDEO_OV5695=m
-CONFIG_VIDEO_OV7251=m
-CONFIG_VIDEO_OV772X=m
-CONFIG_VIDEO_OV7640=m
-CONFIG_VIDEO_OV7670=m
-CONFIG_VIDEO_OV7740=m
-CONFIG_VIDEO_OV8856=m
-CONFIG_VIDEO_OV9640=m
-CONFIG_VIDEO_OV9650=m
-CONFIG_VIDEO_OV13858=m
-CONFIG_VIDEO_VS6624=m
-CONFIG_VIDEO_MT9M001=m
-CONFIG_VIDEO_MT9M032=m
-CONFIG_VIDEO_MT9M111=m
-CONFIG_VIDEO_MT9P031=m
-CONFIG_VIDEO_MT9T001=m
-CONFIG_VIDEO_MT9T112=m
-CONFIG_VIDEO_MT9V011=m
-CONFIG_VIDEO_MT9V032=m
-CONFIG_VIDEO_MT9V111=m
-CONFIG_VIDEO_SR030PC30=m
-CONFIG_VIDEO_NOON010PC30=m
-CONFIG_VIDEO_M5MOLS=m
-CONFIG_VIDEO_RJ54N1=m
-CONFIG_VIDEO_S5K6AA=m
-CONFIG_VIDEO_S5K6A3=m
-CONFIG_VIDEO_S5K4ECGX=m
-CONFIG_VIDEO_S5K5BAF=m
-CONFIG_VIDEO_SMIAPP=m
-CONFIG_VIDEO_ET8EK8=m
-CONFIG_VIDEO_S5C73M3=m
-
-#
-# Lens drivers
-#
-CONFIG_VIDEO_AD5820=m
-CONFIG_VIDEO_AK7375=m
-CONFIG_VIDEO_DW9714=m
-CONFIG_VIDEO_DW9807_VCM=m
-
-#
-# Flash devices
-#
-CONFIG_VIDEO_ADP1653=m
-CONFIG_VIDEO_LM3560=m
-CONFIG_VIDEO_LM3646=m
-
-#
-# Video improvement chips
-#
-CONFIG_VIDEO_UPD64031A=m
-CONFIG_VIDEO_UPD64083=m
-
-#
-# Audio/Video compression chips
-#
-CONFIG_VIDEO_SAA6752HS=m
-
-#
-# SDR tuner chips
-#
-CONFIG_SDR_MAX2175=m
-
-#
-# Miscellaneous helper chips
-#
-CONFIG_VIDEO_THS7303=m
-CONFIG_VIDEO_M52790=m
-CONFIG_VIDEO_I2C=m
-CONFIG_VIDEO_ST_MIPID02=m
-# end of I2C Encoders, decoders, sensors and other helper chips
-
-#
-# SPI helper chips
-#
-CONFIG_VIDEO_GS1662=m
-# end of SPI helper chips
-
-#
-# Media SPI Adapters
-#
-CONFIG_CXD2880_SPI_DRV=m
-# end of Media SPI Adapters
-
-CONFIG_MEDIA_TUNER=m
-
-#
-# Customize TV tuners
-#
-CONFIG_MEDIA_TUNER_SIMPLE=m
-CONFIG_MEDIA_TUNER_TDA18250=m
-CONFIG_MEDIA_TUNER_TDA8290=m
-CONFIG_MEDIA_TUNER_TDA827X=m
-CONFIG_MEDIA_TUNER_TDA18271=m
-CONFIG_MEDIA_TUNER_TDA9887=m
-CONFIG_MEDIA_TUNER_TEA5761=m
-CONFIG_MEDIA_TUNER_TEA5767=m
-CONFIG_MEDIA_TUNER_MSI001=m
-CONFIG_MEDIA_TUNER_MT20XX=m
-CONFIG_MEDIA_TUNER_MT2060=m
-CONFIG_MEDIA_TUNER_MT2063=m
-CONFIG_MEDIA_TUNER_MT2266=m
-CONFIG_MEDIA_TUNER_MT2131=m
-CONFIG_MEDIA_TUNER_QT1010=m
-CONFIG_MEDIA_TUNER_XC2028=m
-CONFIG_MEDIA_TUNER_XC5000=m
-CONFIG_MEDIA_TUNER_XC4000=m
-CONFIG_MEDIA_TUNER_MXL5005S=m
-CONFIG_MEDIA_TUNER_MXL5007T=m
-CONFIG_MEDIA_TUNER_MC44S803=m
-CONFIG_MEDIA_TUNER_MAX2165=m
-CONFIG_MEDIA_TUNER_TDA18218=m
-CONFIG_MEDIA_TUNER_FC0011=m
-CONFIG_MEDIA_TUNER_FC0012=m
-CONFIG_MEDIA_TUNER_FC0013=m
-CONFIG_MEDIA_TUNER_TDA18212=m
-CONFIG_MEDIA_TUNER_E4000=m
-CONFIG_MEDIA_TUNER_FC2580=m
-CONFIG_MEDIA_TUNER_M88RS6000T=m
-CONFIG_MEDIA_TUNER_TUA9001=m
-CONFIG_MEDIA_TUNER_SI2157=m
-CONFIG_MEDIA_TUNER_IT913X=m
-CONFIG_MEDIA_TUNER_R820T=m
-CONFIG_MEDIA_TUNER_MXL301RF=m
-CONFIG_MEDIA_TUNER_QM1D1C0042=m
-CONFIG_MEDIA_TUNER_QM1D1B0004=m
-# end of Customize TV tuners
-
-#
-# Customise DVB Frontends
-#
-
-#
-# Multistandard (satellite) frontends
-#
-CONFIG_DVB_STB0899=m
-CONFIG_DVB_STB6100=m
-CONFIG_DVB_STV090x=m
-CONFIG_DVB_STV0910=m
-CONFIG_DVB_STV6110x=m
-CONFIG_DVB_STV6111=m
-CONFIG_DVB_MXL5XX=m
-CONFIG_DVB_M88DS3103=m
-
-#
-# Multistandard (cable + terrestrial) frontends
-#
-CONFIG_DVB_DRXK=m
-CONFIG_DVB_TDA18271C2DD=m
-CONFIG_DVB_SI2165=m
-CONFIG_DVB_MN88472=m
-CONFIG_DVB_MN88473=m
-
-#
-# DVB-S (satellite) frontends
-#
-CONFIG_DVB_CX24110=m
-CONFIG_DVB_CX24123=m
-CONFIG_DVB_MT312=m
-CONFIG_DVB_ZL10036=m
-CONFIG_DVB_ZL10039=m
-CONFIG_DVB_S5H1420=m
-CONFIG_DVB_STV0288=m
-CONFIG_DVB_STB6000=m
-CONFIG_DVB_STV0299=m
-CONFIG_DVB_STV6110=m
-CONFIG_DVB_STV0900=m
-CONFIG_DVB_TDA8083=m
-CONFIG_DVB_TDA10086=m
-CONFIG_DVB_TDA8261=m
-CONFIG_DVB_VES1X93=m
-CONFIG_DVB_TUNER_ITD1000=m
-CONFIG_DVB_TUNER_CX24113=m
-CONFIG_DVB_TDA826X=m
-CONFIG_DVB_TUA6100=m
-CONFIG_DVB_CX24116=m
-CONFIG_DVB_CX24117=m
-CONFIG_DVB_CX24120=m
-CONFIG_DVB_SI21XX=m
-CONFIG_DVB_TS2020=m
-CONFIG_DVB_DS3000=m
-CONFIG_DVB_MB86A16=m
-CONFIG_DVB_TDA10071=m
-
-#
-# DVB-T (terrestrial) frontends
-#
-CONFIG_DVB_SP8870=m
-CONFIG_DVB_SP887X=m
-CONFIG_DVB_CX22700=m
-CONFIG_DVB_CX22702=m
-CONFIG_DVB_S5H1432=m
-CONFIG_DVB_DRXD=m
-CONFIG_DVB_L64781=m
-CONFIG_DVB_TDA1004X=m
-CONFIG_DVB_NXT6000=m
-CONFIG_DVB_MT352=m
-CONFIG_DVB_ZL10353=m
-CONFIG_DVB_DIB3000MB=m
-CONFIG_DVB_DIB3000MC=m
-CONFIG_DVB_DIB7000M=m
-CONFIG_DVB_DIB7000P=m
-CONFIG_DVB_DIB9000=m
-CONFIG_DVB_TDA10048=m
-CONFIG_DVB_AF9013=m
-CONFIG_DVB_EC100=m
-CONFIG_DVB_STV0367=m
-CONFIG_DVB_CXD2820R=m
-CONFIG_DVB_CXD2841ER=m
-CONFIG_DVB_RTL2830=m
-CONFIG_DVB_RTL2832=m
-CONFIG_DVB_RTL2832_SDR=m
-CONFIG_DVB_SI2168=m
-CONFIG_DVB_AS102_FE=m
-CONFIG_DVB_ZD1301_DEMOD=m
-CONFIG_DVB_GP8PSK_FE=m
-CONFIG_DVB_CXD2880=m
-
-#
-# DVB-C (cable) frontends
-#
-CONFIG_DVB_VES1820=m
-CONFIG_DVB_TDA10021=m
-CONFIG_DVB_TDA10023=m
-CONFIG_DVB_STV0297=m
-
-#
-# ATSC (North American/Korean Terrestrial/Cable DTV) frontends
-#
-CONFIG_DVB_NXT200X=m
-CONFIG_DVB_OR51211=m
-CONFIG_DVB_OR51132=m
-CONFIG_DVB_BCM3510=m
-CONFIG_DVB_LGDT330X=m
-CONFIG_DVB_LGDT3305=m
-CONFIG_DVB_LGDT3306A=m
-CONFIG_DVB_LG2160=m
-CONFIG_DVB_S5H1409=m
-CONFIG_DVB_AU8522=m
-CONFIG_DVB_AU8522_DTV=m
-CONFIG_DVB_AU8522_V4L=m
-CONFIG_DVB_S5H1411=m
-
-#
-# ISDB-T (terrestrial) frontends
-#
-CONFIG_DVB_S921=m
-CONFIG_DVB_DIB8000=m
-CONFIG_DVB_MB86A20S=m
-
-#
-# ISDB-S (satellite) & ISDB-T (terrestrial) frontends
-#
-CONFIG_DVB_TC90522=m
-CONFIG_DVB_MN88443X=m
-
-#
-# Digital terrestrial only tuners/PLL
-#
-CONFIG_DVB_PLL=m
-CONFIG_DVB_TUNER_DIB0070=m
-CONFIG_DVB_TUNER_DIB0090=m
-
-#
-# SEC control devices for DVB-S
-#
-CONFIG_DVB_DRX39XYJ=m
-CONFIG_DVB_LNBH25=m
-CONFIG_DVB_LNBH29=m
-CONFIG_DVB_LNBP21=m
-CONFIG_DVB_LNBP22=m
-CONFIG_DVB_ISL6405=m
-CONFIG_DVB_ISL6421=m
-CONFIG_DVB_ISL6423=m
-CONFIG_DVB_A8293=m
-CONFIG_DVB_LGS8GL5=m
-CONFIG_DVB_LGS8GXX=m
-CONFIG_DVB_ATBM8830=m
-CONFIG_DVB_TDA665x=m
-CONFIG_DVB_IX2505V=m
-CONFIG_DVB_M88RS2000=m
-CONFIG_DVB_AF9033=m
-CONFIG_DVB_HORUS3A=m
-CONFIG_DVB_ASCOT2E=m
-CONFIG_DVB_HELENE=m
-
-#
-# Common Interface (EN50221) controller drivers
-#
-CONFIG_DVB_CXD2099=m
-CONFIG_DVB_SP2=m
-
-#
-# Tools to develop new frontends
-#
-CONFIG_DVB_DUMMY_FE=m
-# end of Customise DVB Frontends
-
-#
-# Graphics support
-#
-CONFIG_AGP=m
-CONFIG_AGP_AMD64=m
-CONFIG_AGP_INTEL=m
-CONFIG_AGP_SIS=m
-CONFIG_AGP_VIA=m
-CONFIG_INTEL_GTT=m
-CONFIG_VGA_ARB=y
-CONFIG_VGA_ARB_MAX_GPUS=10
-CONFIG_VGA_SWITCHEROO=y
-CONFIG_DRM=m
-CONFIG_DRM_MIPI_DBI=m
-CONFIG_DRM_MIPI_DSI=y
-CONFIG_DRM_DP_AUX_CHARDEV=y
-# CONFIG_DRM_DEBUG_SELFTEST is not set
-CONFIG_DRM_KMS_HELPER=m
-CONFIG_DRM_KMS_FB_HELPER=y
-# CONFIG_DRM_DEBUG_DP_MST_TOPOLOGY_REFS is not set
-CONFIG_DRM_FBDEV_EMULATION=y
-CONFIG_DRM_FBDEV_OVERALLOC=100
-# CONFIG_DRM_FBDEV_LEAK_PHYS_SMEM is not set
-CONFIG_DRM_LOAD_EDID_FIRMWARE=y
-CONFIG_DRM_DP_CEC=y
-CONFIG_DRM_TTM=m
-CONFIG_DRM_TTM_DMA_PAGE_POOL=y
-CONFIG_DRM_VRAM_HELPER=m
-CONFIG_DRM_TTM_HELPER=m
-CONFIG_DRM_GEM_CMA_HELPER=y
-CONFIG_DRM_KMS_CMA_HELPER=y
-CONFIG_DRM_GEM_SHMEM_HELPER=y
-CONFIG_DRM_SCHED=m
-
-#
-# I2C encoder or helper chips
-#
-CONFIG_DRM_I2C_CH7006=m
-CONFIG_DRM_I2C_SIL164=m
-CONFIG_DRM_I2C_NXP_TDA998X=m
-CONFIG_DRM_I2C_NXP_TDA9950=m
-# end of I2C encoder or helper chips
-
-#
-# ARM devices
-#
-CONFIG_DRM_KOMEDA=m
-# end of ARM devices
-
-CONFIG_DRM_RADEON=m
-CONFIG_DRM_RADEON_USERPTR=y
-CONFIG_DRM_AMDGPU=m
-CONFIG_DRM_AMDGPU_SI=y
-CONFIG_DRM_AMDGPU_CIK=y
-CONFIG_DRM_AMDGPU_USERPTR=y
-# CONFIG_DRM_AMDGPU_GART_DEBUGFS is not set
-
-#
-# ACP (Audio CoProcessor) Configuration
-#
-CONFIG_DRM_AMD_ACP=y
-# end of ACP (Audio CoProcessor) Configuration
-
-#
-# Display Engine Configuration
-#
-CONFIG_DRM_AMD_DC=y
-CONFIG_DRM_AMD_DC_DCN=y
-CONFIG_DRM_AMD_DC_HDCP=y
-# CONFIG_DEBUG_KERNEL_DC is not set
-# end of Display Engine Configuration
-
-CONFIG_HSA_AMD=y
-CONFIG_DRM_NOUVEAU=m
-# CONFIG_NOUVEAU_LEGACY_CTX_SUPPORT is not set
-CONFIG_NOUVEAU_DEBUG=5
-CONFIG_NOUVEAU_DEBUG_DEFAULT=3
-# CONFIG_NOUVEAU_DEBUG_MMU is not set
-CONFIG_DRM_NOUVEAU_BACKLIGHT=y
-CONFIG_DRM_NOUVEAU_SVM=y
-CONFIG_DRM_I915=m
-CONFIG_DRM_I915_FORCE_PROBE="*"
-CONFIG_DRM_I915_CAPTURE_ERROR=y
-CONFIG_DRM_I915_COMPRESS_ERROR=y
-CONFIG_DRM_I915_USERPTR=y
-CONFIG_DRM_I915_GVT=y
-CONFIG_DRM_I915_GVT_KVMGT=m
-
-#
-# drm/i915 Debugging
-#
-# CONFIG_DRM_I915_WERROR is not set
-# CONFIG_DRM_I915_DEBUG is not set
-# CONFIG_DRM_I915_DEBUG_MMIO is not set
-# CONFIG_DRM_I915_SW_FENCE_DEBUG_OBJECTS is not set
-# CONFIG_DRM_I915_SW_FENCE_CHECK_DAG is not set
-# CONFIG_DRM_I915_DEBUG_GUC is not set
-# CONFIG_DRM_I915_SELFTEST is not set
-# CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS is not set
-# CONFIG_DRM_I915_DEBUG_VBLANK_EVADE is not set
-# CONFIG_DRM_I915_DEBUG_RUNTIME_PM is not set
-# end of drm/i915 Debugging
-
-#
-# drm/i915 Profile Guided Optimisation
-#
-CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND=250
-CONFIG_DRM_I915_HEARTBEAT_INTERVAL=2500
-CONFIG_DRM_I915_PREEMPT_TIMEOUT=640
-CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT=8000
-CONFIG_DRM_I915_STOP_TIMEOUT=100
-CONFIG_DRM_I915_TIMESLICE_DURATION=1
-# end of drm/i915 Profile Guided Optimisation
-
-CONFIG_DRM_VGEM=m
-CONFIG_DRM_VKMS=m
-CONFIG_DRM_VMWGFX=m
-CONFIG_DRM_VMWGFX_FBCON=y
-CONFIG_DRM_GMA500=m
-CONFIG_DRM_GMA600=y
-CONFIG_DRM_GMA3600=y
-CONFIG_DRM_UDL=m
-CONFIG_DRM_AST=m
-CONFIG_DRM_MGAG200=m
-CONFIG_DRM_CIRRUS_QEMU=m
-CONFIG_DRM_RCAR_DW_HDMI=m
-CONFIG_DRM_RCAR_LVDS=m
-CONFIG_DRM_QXL=m
-CONFIG_DRM_BOCHS=m
-CONFIG_DRM_VIRTIO_GPU=m
-CONFIG_DRM_PANEL=y
-
-#
-# Display Panels
-#
-CONFIG_DRM_PANEL_ARM_VERSATILE=m
-CONFIG_DRM_PANEL_BOE_HIMAX8279D=m
-CONFIG_DRM_PANEL_BOE_TV101WUM_NL6=m
-CONFIG_DRM_PANEL_LVDS=m
-CONFIG_DRM_PANEL_SIMPLE=m
-CONFIG_DRM_PANEL_ELIDA_KD35T133=m
-CONFIG_DRM_PANEL_FEIXIN_K101_IM2BA02=m
-CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D=m
-CONFIG_DRM_PANEL_ILITEK_IL9322=m
-CONFIG_DRM_PANEL_ILITEK_ILI9881C=m
-CONFIG_DRM_PANEL_INNOLUX_P079ZCA=m
-CONFIG_DRM_PANEL_JDI_LT070ME05000=m
-CONFIG_DRM_PANEL_KINGDISPLAY_KD097D04=m
-CONFIG_DRM_PANEL_LEADTEK_LTK500HD1829=m
-CONFIG_DRM_PANEL_SAMSUNG_LD9040=m
-CONFIG_DRM_PANEL_LG_LB035Q02=m
-CONFIG_DRM_PANEL_LG_LG4573=m
-CONFIG_DRM_PANEL_NEC_NL8048HL11=m
-CONFIG_DRM_PANEL_NOVATEK_NT35510=m
-CONFIG_DRM_PANEL_NOVATEK_NT39016=m
-CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO=m
-CONFIG_DRM_PANEL_ORISETECH_OTM8009A=m
-CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS=m
-CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00=m
-CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN=m
-CONFIG_DRM_PANEL_RAYDIUM_RM67191=m
-CONFIG_DRM_PANEL_RAYDIUM_RM68200=m
-CONFIG_DRM_PANEL_ROCKTECH_JH057N00900=m
-CONFIG_DRM_PANEL_RONBO_RB070D30=m
-CONFIG_DRM_PANEL_SAMSUNG_S6D16D0=m
-CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2=m
-CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03=m
-CONFIG_DRM_PANEL_SAMSUNG_S6E63M0=m
-CONFIG_DRM_PANEL_SAMSUNG_S6E88A0_AMS452EF01=m
-CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0=m
-CONFIG_DRM_PANEL_SEIKO_43WVF1G=m
-CONFIG_DRM_PANEL_SHARP_LQ101R1SX01=m
-CONFIG_DRM_PANEL_SHARP_LS037V7DW01=m
-CONFIG_DRM_PANEL_SHARP_LS043T1LE01=m
-CONFIG_DRM_PANEL_SITRONIX_ST7701=m
-CONFIG_DRM_PANEL_SITRONIX_ST7789V=m
-CONFIG_DRM_PANEL_SONY_ACX424AKP=m
-CONFIG_DRM_PANEL_SONY_ACX565AKM=m
-CONFIG_DRM_PANEL_TPO_TD028TTEC1=m
-CONFIG_DRM_PANEL_TPO_TD043MTEA1=m
-CONFIG_DRM_PANEL_TPO_TPG110=m
-CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA=m
-CONFIG_DRM_PANEL_XINPENG_XPP055C272=m
-# end of Display Panels
-
-CONFIG_DRM_BRIDGE=y
-CONFIG_DRM_PANEL_BRIDGE=y
-
-#
-# Display Interface Bridges
-#
-CONFIG_DRM_CDNS_DSI=m
-CONFIG_DRM_DISPLAY_CONNECTOR=m
-CONFIG_DRM_LVDS_CODEC=m
-CONFIG_DRM_MEGACHIPS_STDPXXXX_GE_B850V3_FW=m
-CONFIG_DRM_NXP_PTN3460=m
-CONFIG_DRM_PARADE_PS8622=m
-CONFIG_DRM_PARADE_PS8640=m
-CONFIG_DRM_SIL_SII8620=m
-CONFIG_DRM_SII902X=m
-CONFIG_DRM_SII9234=m
-CONFIG_DRM_SIMPLE_BRIDGE=m
-CONFIG_DRM_THINE_THC63LVD1024=m
-CONFIG_DRM_TOSHIBA_TC358764=m
-CONFIG_DRM_TOSHIBA_TC358767=m
-CONFIG_DRM_TOSHIBA_TC358768=m
-CONFIG_DRM_TI_TFP410=m
-CONFIG_DRM_TI_SN65DSI86=m
-CONFIG_DRM_TI_TPD12S015=m
-CONFIG_DRM_ANALOGIX_ANX6345=m
-CONFIG_DRM_ANALOGIX_ANX78XX=m
-CONFIG_DRM_ANALOGIX_DP=m
-CONFIG_DRM_I2C_ADV7511=m
-CONFIG_DRM_I2C_ADV7511_AUDIO=y
-CONFIG_DRM_I2C_ADV7511_CEC=y
-CONFIG_DRM_DW_HDMI=m
-CONFIG_DRM_DW_HDMI_AHB_AUDIO=m
-CONFIG_DRM_DW_HDMI_I2S_AUDIO=m
-CONFIG_DRM_DW_HDMI_CEC=m
-# end of Display Interface Bridges
-
-# CONFIG_DRM_ETNAVIV is not set
-CONFIG_DRM_ARCPGU=m
-CONFIG_DRM_MXS=y
-CONFIG_DRM_MXSFB=m
-CONFIG_DRM_GM12U320=m
-CONFIG_TINYDRM_HX8357D=m
-CONFIG_TINYDRM_ILI9225=m
-CONFIG_TINYDRM_ILI9341=m
-CONFIG_TINYDRM_ILI9486=m
-CONFIG_TINYDRM_MI0283QT=m
-CONFIG_TINYDRM_REPAPER=m
-CONFIG_TINYDRM_ST7586=m
-CONFIG_TINYDRM_ST7735R=m
-CONFIG_DRM_XEN=y
-CONFIG_DRM_XEN_FRONTEND=m
-CONFIG_DRM_VBOXVIDEO=m
-# CONFIG_DRM_LEGACY is not set
-CONFIG_DRM_PANEL_ORIENTATION_QUIRKS=y
-
-#
-# Frame buffer Devices
-#
-CONFIG_FB_CMDLINE=y
-CONFIG_FB_NOTIFY=y
-CONFIG_FB=y
-CONFIG_FIRMWARE_EDID=y
-CONFIG_FB_BOOT_VESA_SUPPORT=y
-CONFIG_FB_CFB_FILLRECT=y
-CONFIG_FB_CFB_COPYAREA=y
-CONFIG_FB_CFB_IMAGEBLIT=y
-CONFIG_FB_SYS_FILLRECT=m
-CONFIG_FB_SYS_COPYAREA=m
-CONFIG_FB_SYS_IMAGEBLIT=m
-# CONFIG_FB_FOREIGN_ENDIAN is not set
-CONFIG_FB_SYS_FOPS=m
-CONFIG_FB_DEFERRED_IO=y
-CONFIG_FB_BACKLIGHT=m
-CONFIG_FB_MODE_HELPERS=y
-CONFIG_FB_TILEBLITTING=y
-
-#
-# Frame buffer hardware drivers
-#
-# CONFIG_FB_CIRRUS is not set
-# CONFIG_FB_PM2 is not set
-# CONFIG_FB_CYBER2000 is not set
-# CONFIG_FB_ARC is not set
-# CONFIG_FB_ASILIANT is not set
-# CONFIG_FB_IMSTT is not set
-# CONFIG_FB_VGA16 is not set
-# CONFIG_FB_UVESA is not set
-CONFIG_FB_VESA=y
-CONFIG_FB_EFI=y
-# CONFIG_FB_N411 is not set
-# CONFIG_FB_HGA is not set
-# CONFIG_FB_OPENCORES is not set
-# CONFIG_FB_S1D13XXX is not set
-# CONFIG_FB_NVIDIA is not set
-# CONFIG_FB_RIVA is not set
-# CONFIG_FB_I740 is not set
-# CONFIG_FB_LE80578 is not set
-# CONFIG_FB_INTEL is not set
-# CONFIG_FB_MATROX is not set
-# CONFIG_FB_RADEON is not set
-# CONFIG_FB_ATY128 is not set
-# CONFIG_FB_ATY is not set
-# CONFIG_FB_S3 is not set
-# CONFIG_FB_SAVAGE is not set
-# CONFIG_FB_SIS is not set
-# CONFIG_FB_VIA is not set
-# CONFIG_FB_NEOMAGIC is not set
-# CONFIG_FB_KYRO is not set
-# CONFIG_FB_3DFX is not set
-# CONFIG_FB_VOODOO1 is not set
-# CONFIG_FB_VT8623 is not set
-# CONFIG_FB_TRIDENT is not set
-# CONFIG_FB_ARK is not set
-# CONFIG_FB_PM3 is not set
-# CONFIG_FB_CARMINE is not set
-# CONFIG_FB_SM501 is not set
-# CONFIG_FB_SMSCUFX is not set
-# CONFIG_FB_UDL is not set
-# CONFIG_FB_IBM_GXT4500 is not set
-# CONFIG_FB_VIRTUAL is not set
-CONFIG_XEN_FBDEV_FRONTEND=m
-# CONFIG_FB_METRONOME is not set
-# CONFIG_FB_MB862XX is not set
-CONFIG_FB_HYPERV=m
-CONFIG_FB_SIMPLE=y
-# CONFIG_FB_SSD1307 is not set
-# CONFIG_FB_SM712 is not set
-# end of Frame buffer Devices
-
-#
-# Backlight & LCD device support
-#
-CONFIG_LCD_CLASS_DEVICE=m
-CONFIG_LCD_L4F00242T03=m
-CONFIG_LCD_LMS283GF05=m
-CONFIG_LCD_LTV350QV=m
-CONFIG_LCD_ILI922X=m
-CONFIG_LCD_ILI9320=m
-CONFIG_LCD_TDO24M=m
-CONFIG_LCD_VGG2432A4=m
-CONFIG_LCD_PLATFORM=m
-CONFIG_LCD_AMS369FG06=m
-CONFIG_LCD_LMS501KF03=m
-CONFIG_LCD_HX8357=m
-CONFIG_LCD_OTM3225A=m
-CONFIG_BACKLIGHT_CLASS_DEVICE=y
-CONFIG_BACKLIGHT_GENERIC=m
-CONFIG_BACKLIGHT_LM3533=m
-CONFIG_BACKLIGHT_PWM=m
-CONFIG_BACKLIGHT_DA903X=m
-CONFIG_BACKLIGHT_DA9052=m
-CONFIG_BACKLIGHT_MAX8925=m
-CONFIG_BACKLIGHT_APPLE=m
-CONFIG_BACKLIGHT_QCOM_WLED=m
-CONFIG_BACKLIGHT_SAHARA=m
-CONFIG_BACKLIGHT_WM831X=m
-CONFIG_BACKLIGHT_ADP5520=m
-CONFIG_BACKLIGHT_ADP8860=m
-CONFIG_BACKLIGHT_ADP8870=m
-CONFIG_BACKLIGHT_88PM860X=m
-CONFIG_BACKLIGHT_PCF50633=m
-CONFIG_BACKLIGHT_AAT2870=m
-CONFIG_BACKLIGHT_LM3630A=m
-CONFIG_BACKLIGHT_LM3639=m
-CONFIG_BACKLIGHT_LP855X=m
-CONFIG_BACKLIGHT_LP8788=m
-CONFIG_BACKLIGHT_PANDORA=m
-CONFIG_BACKLIGHT_SKY81452=m
-CONFIG_BACKLIGHT_TPS65217=m
-CONFIG_BACKLIGHT_AS3711=m
-CONFIG_BACKLIGHT_GPIO=m
-CONFIG_BACKLIGHT_LV5207LP=m
-CONFIG_BACKLIGHT_BD6107=m
-CONFIG_BACKLIGHT_ARCXCNN=m
-CONFIG_BACKLIGHT_RAVE_SP=m
-CONFIG_BACKLIGHT_LED=m
-# end of Backlight & LCD device support
-
-CONFIG_VIDEOMODE_HELPERS=y
-CONFIG_HDMI=y
-
-#
-# Console display driver support
-#
-CONFIG_VGA_CONSOLE=y
-CONFIG_VGACON_SOFT_SCROLLBACK=y
-CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=64
-# CONFIG_VGACON_SOFT_SCROLLBACK_PERSISTENT_ENABLE_BY_DEFAULT is not set
-CONFIG_DUMMY_CONSOLE=y
-CONFIG_DUMMY_CONSOLE_COLUMNS=80
-CONFIG_DUMMY_CONSOLE_ROWS=25
-CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
-CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y
-CONFIG_FRAMEBUFFER_CONSOLE_DEFERRED_TAKEOVER=y
-# end of Console display driver support
-
-# CONFIG_LOGO is not set
-# end of Graphics support
-
-CONFIG_SOUND=m
-CONFIG_SOUND_OSS_CORE=y
-# CONFIG_SOUND_OSS_CORE_PRECLAIM is not set
-CONFIG_SND=m
-CONFIG_SND_TIMER=m
-CONFIG_SND_PCM=m
-CONFIG_SND_PCM_ELD=y
-CONFIG_SND_PCM_IEC958=y
-CONFIG_SND_DMAENGINE_PCM=m
-CONFIG_SND_HWDEP=m
-CONFIG_SND_SEQ_DEVICE=m
-CONFIG_SND_RAWMIDI=m
-CONFIG_SND_COMPRESS_OFFLOAD=m
-CONFIG_SND_JACK=y
-CONFIG_SND_JACK_INPUT_DEV=y
-CONFIG_SND_OSSEMUL=y
-CONFIG_SND_MIXER_OSS=m
-CONFIG_SND_PCM_OSS=m
-CONFIG_SND_PCM_OSS_PLUGINS=y
-CONFIG_SND_PCM_TIMER=y
-CONFIG_SND_HRTIMER=m
-CONFIG_SND_DYNAMIC_MINORS=y
-CONFIG_SND_MAX_CARDS=32
-# CONFIG_SND_SUPPORT_OLD_API is not set
-CONFIG_SND_PROC_FS=y
-CONFIG_SND_VERBOSE_PROCFS=y
-CONFIG_SND_VERBOSE_PRINTK=y
-CONFIG_SND_DEBUG=y
-# CONFIG_SND_DEBUG_VERBOSE is not set
-# CONFIG_SND_PCM_XRUN_DEBUG is not set
-# CONFIG_SND_CTL_VALIDATION is not set
-CONFIG_SND_VMASTER=y
-CONFIG_SND_DMA_SGBUF=y
-CONFIG_SND_SEQUENCER=m
-CONFIG_SND_SEQ_DUMMY=m
-CONFIG_SND_SEQUENCER_OSS=m
-CONFIG_SND_SEQ_HRTIMER_DEFAULT=y
-CONFIG_SND_SEQ_MIDI_EVENT=m
-CONFIG_SND_SEQ_MIDI=m
-CONFIG_SND_SEQ_MIDI_EMUL=m
-CONFIG_SND_SEQ_VIRMIDI=m
-CONFIG_SND_MPU401_UART=m
-CONFIG_SND_OPL3_LIB=m
-CONFIG_SND_OPL3_LIB_SEQ=m
-CONFIG_SND_VX_LIB=m
-CONFIG_SND_AC97_CODEC=m
-CONFIG_SND_DRIVERS=y
-# CONFIG_SND_PCSP is not set
-CONFIG_SND_DUMMY=m
-CONFIG_SND_ALOOP=m
-CONFIG_SND_VIRMIDI=m
-CONFIG_SND_MTPAV=m
-CONFIG_SND_MTS64=m
-CONFIG_SND_SERIAL_U16550=m
-CONFIG_SND_MPU401=m
-CONFIG_SND_PORTMAN2X4=m
-CONFIG_SND_AC97_POWER_SAVE=y
-CONFIG_SND_AC97_POWER_SAVE_DEFAULT=0
-CONFIG_SND_SB_COMMON=m
-CONFIG_SND_PCI=y
-CONFIG_SND_AD1889=m
-CONFIG_SND_ALS300=m
-CONFIG_SND_ALS4000=m
-CONFIG_SND_ALI5451=m
-CONFIG_SND_ASIHPI=m
-CONFIG_SND_ATIIXP=m
-CONFIG_SND_ATIIXP_MODEM=m
-CONFIG_SND_AU8810=m
-CONFIG_SND_AU8820=m
-CONFIG_SND_AU8830=m
-CONFIG_SND_AW2=m
-CONFIG_SND_AZT3328=m
-CONFIG_SND_BT87X=m
-# CONFIG_SND_BT87X_OVERCLOCK is not set
-CONFIG_SND_CA0106=m
-CONFIG_SND_CMIPCI=m
-CONFIG_SND_OXYGEN_LIB=m
-CONFIG_SND_OXYGEN=m
-CONFIG_SND_CS4281=m
-CONFIG_SND_CS46XX=m
-CONFIG_SND_CS46XX_NEW_DSP=y
-CONFIG_SND_CTXFI=m
-CONFIG_SND_DARLA20=m
-CONFIG_SND_GINA20=m
-CONFIG_SND_LAYLA20=m
-CONFIG_SND_DARLA24=m
-CONFIG_SND_GINA24=m
-CONFIG_SND_LAYLA24=m
-CONFIG_SND_MONA=m
-CONFIG_SND_MIA=m
-CONFIG_SND_ECHO3G=m
-CONFIG_SND_INDIGO=m
-CONFIG_SND_INDIGOIO=m
-CONFIG_SND_INDIGODJ=m
-CONFIG_SND_INDIGOIOX=m
-CONFIG_SND_INDIGODJX=m
-CONFIG_SND_EMU10K1=m
-CONFIG_SND_EMU10K1_SEQ=m
-CONFIG_SND_EMU10K1X=m
-CONFIG_SND_ENS1370=m
-CONFIG_SND_ENS1371=m
-CONFIG_SND_ES1938=m
-CONFIG_SND_ES1968=m
-CONFIG_SND_ES1968_INPUT=y
-CONFIG_SND_ES1968_RADIO=y
-CONFIG_SND_FM801=m
-CONFIG_SND_FM801_TEA575X_BOOL=y
-CONFIG_SND_HDSP=m
-CONFIG_SND_HDSPM=m
-CONFIG_SND_ICE1712=m
-CONFIG_SND_ICE1724=m
-CONFIG_SND_INTEL8X0=m
-CONFIG_SND_INTEL8X0M=m
-CONFIG_SND_KORG1212=m
-CONFIG_SND_LOLA=m
-CONFIG_SND_LX6464ES=m
-CONFIG_SND_MAESTRO3=m
-CONFIG_SND_MAESTRO3_INPUT=y
-CONFIG_SND_MIXART=m
-CONFIG_SND_NM256=m
-CONFIG_SND_PCXHR=m
-CONFIG_SND_RIPTIDE=m
-CONFIG_SND_RME32=m
-CONFIG_SND_RME96=m
-CONFIG_SND_RME9652=m
-CONFIG_SND_SONICVIBES=m
-CONFIG_SND_TRIDENT=m
-CONFIG_SND_VIA82XX=m
-CONFIG_SND_VIA82XX_MODEM=m
-CONFIG_SND_VIRTUOSO=m
-CONFIG_SND_VX222=m
-CONFIG_SND_YMFPCI=m
-
-#
-# HD-Audio
-#
-CONFIG_SND_HDA=m
-CONFIG_SND_HDA_INTEL=m
-CONFIG_SND_HDA_HWDEP=y
-CONFIG_SND_HDA_RECONFIG=y
-CONFIG_SND_HDA_INPUT_BEEP=y
-CONFIG_SND_HDA_INPUT_BEEP_MODE=1
-CONFIG_SND_HDA_PATCH_LOADER=y
-CONFIG_SND_HDA_CODEC_REALTEK=m
-CONFIG_SND_HDA_CODEC_ANALOG=m
-CONFIG_SND_HDA_CODEC_SIGMATEL=m
-CONFIG_SND_HDA_CODEC_VIA=m
-CONFIG_SND_HDA_CODEC_HDMI=m
-CONFIG_SND_HDA_CODEC_CIRRUS=m
-CONFIG_SND_HDA_CODEC_CONEXANT=m
-CONFIG_SND_HDA_CODEC_CA0110=m
-CONFIG_SND_HDA_CODEC_CA0132=m
-CONFIG_SND_HDA_CODEC_CA0132_DSP=y
-CONFIG_SND_HDA_CODEC_CMEDIA=m
-CONFIG_SND_HDA_CODEC_SI3054=m
-CONFIG_SND_HDA_GENERIC=m
-CONFIG_SND_HDA_POWER_SAVE_DEFAULT=0
-# end of HD-Audio
-
-CONFIG_SND_HDA_CORE=m
-CONFIG_SND_HDA_DSP_LOADER=y
-CONFIG_SND_HDA_COMPONENT=y
-CONFIG_SND_HDA_I915=y
-CONFIG_SND_HDA_EXT_CORE=m
-CONFIG_SND_HDA_PREALLOC_SIZE=0
-CONFIG_SND_INTEL_NHLT=y
-CONFIG_SND_INTEL_DSP_CONFIG=m
-CONFIG_SND_SPI=y
-CONFIG_SND_USB=y
-CONFIG_SND_USB_AUDIO=m
-CONFIG_SND_USB_AUDIO_USE_MEDIA_CONTROLLER=y
-CONFIG_SND_USB_UA101=m
-CONFIG_SND_USB_USX2Y=m
-CONFIG_SND_USB_CAIAQ=m
-CONFIG_SND_USB_CAIAQ_INPUT=y
-CONFIG_SND_USB_US122L=m
-CONFIG_SND_USB_6FIRE=m
-CONFIG_SND_USB_HIFACE=m
-CONFIG_SND_BCD2000=m
-CONFIG_SND_USB_LINE6=m
-CONFIG_SND_USB_POD=m
-CONFIG_SND_USB_PODHD=m
-CONFIG_SND_USB_TONEPORT=m
-CONFIG_SND_USB_VARIAX=m
-CONFIG_SND_FIREWIRE=y
-CONFIG_SND_FIREWIRE_LIB=m
-CONFIG_SND_DICE=m
-CONFIG_SND_OXFW=m
-CONFIG_SND_ISIGHT=m
-CONFIG_SND_FIREWORKS=m
-CONFIG_SND_BEBOB=m
-CONFIG_SND_FIREWIRE_DIGI00X=m
-CONFIG_SND_FIREWIRE_TASCAM=m
-CONFIG_SND_FIREWIRE_MOTU=m
-CONFIG_SND_FIREFACE=m
-CONFIG_SND_PCMCIA=y
-CONFIG_SND_VXPOCKET=m
-CONFIG_SND_PDAUDIOCF=m
-CONFIG_SND_SOC=m
-CONFIG_SND_SOC_AC97_BUS=y
-CONFIG_SND_SOC_GENERIC_DMAENGINE_PCM=y
-CONFIG_SND_SOC_COMPRESS=y
-CONFIG_SND_SOC_TOPOLOGY=y
-CONFIG_SND_SOC_ACPI=m
-CONFIG_SND_SOC_AMD_ACP=m
-CONFIG_SND_SOC_AMD_CZ_DA7219MX98357_MACH=m
-CONFIG_SND_SOC_AMD_CZ_RT5645_MACH=m
-CONFIG_SND_SOC_AMD_ACP3x=m
-CONFIG_SND_SOC_AMD_RV_RT5682_MACH=m
-CONFIG_SND_ATMEL_SOC=m
-CONFIG_SND_SOC_MIKROE_PROTO=m
-CONFIG_SND_BCM63XX_I2S_WHISTLER=m
-CONFIG_SND_DESIGNWARE_I2S=m
-CONFIG_SND_DESIGNWARE_PCM=y
-
-#
-# SoC Audio for Freescale CPUs
-#
-
-#
-# Common SoC Audio options for Freescale CPUs:
-#
-# CONFIG_SND_SOC_FSL_ASRC is not set
-# CONFIG_SND_SOC_FSL_SAI is not set
-# CONFIG_SND_SOC_FSL_AUDMIX is not set
-# CONFIG_SND_SOC_FSL_SSI is not set
-# CONFIG_SND_SOC_FSL_SPDIF is not set
-# CONFIG_SND_SOC_FSL_ESAI is not set
-# CONFIG_SND_SOC_FSL_MICFIL is not set
-# CONFIG_SND_SOC_IMX_AUDMUX is not set
-# end of SoC Audio for Freescale CPUs
-
-CONFIG_SND_I2S_HI6210_I2S=m
-CONFIG_SND_SOC_IMG=y
-CONFIG_SND_SOC_IMG_I2S_IN=m
-CONFIG_SND_SOC_IMG_I2S_OUT=m
-CONFIG_SND_SOC_IMG_PARALLEL_OUT=m
-CONFIG_SND_SOC_IMG_SPDIF_IN=m
-CONFIG_SND_SOC_IMG_SPDIF_OUT=m
-CONFIG_SND_SOC_IMG_PISTACHIO_INTERNAL_DAC=m
-CONFIG_SND_SOC_INTEL_SST_TOPLEVEL=y
-CONFIG_SND_SST_IPC=m
-CONFIG_SND_SST_IPC_PCI=m
-CONFIG_SND_SST_IPC_ACPI=m
-CONFIG_SND_SOC_INTEL_SST_ACPI=m
-CONFIG_SND_SOC_INTEL_SST=m
-CONFIG_SND_SOC_INTEL_SST_FIRMWARE=m
-CONFIG_SND_SOC_INTEL_HASWELL=m
-CONFIG_SND_SST_ATOM_HIFI2_PLATFORM=m
-CONFIG_SND_SST_ATOM_HIFI2_PLATFORM_PCI=m
-CONFIG_SND_SST_ATOM_HIFI2_PLATFORM_ACPI=m
-CONFIG_SND_SOC_INTEL_SKYLAKE=m
-CONFIG_SND_SOC_INTEL_SKL=m
-CONFIG_SND_SOC_INTEL_APL=m
-CONFIG_SND_SOC_INTEL_KBL=m
-CONFIG_SND_SOC_INTEL_GLK=m
-CONFIG_SND_SOC_INTEL_CNL=m
-CONFIG_SND_SOC_INTEL_CFL=m
-CONFIG_SND_SOC_INTEL_CML_H=m
-CONFIG_SND_SOC_INTEL_CML_LP=m
-CONFIG_SND_SOC_INTEL_SKYLAKE_FAMILY=m
-CONFIG_SND_SOC_INTEL_SKYLAKE_SSP_CLK=m
-# CONFIG_SND_SOC_INTEL_SKYLAKE_HDAUDIO_CODEC is not set
-CONFIG_SND_SOC_INTEL_SKYLAKE_COMMON=m
-CONFIG_SND_SOC_ACPI_INTEL_MATCH=m
-CONFIG_SND_SOC_INTEL_MACH=y
-# CONFIG_SND_SOC_INTEL_USER_FRIENDLY_LONG_NAMES is not set
-CONFIG_SND_SOC_INTEL_HASWELL_MACH=m
-CONFIG_SND_SOC_INTEL_BDW_RT5650_MACH=m
-CONFIG_SND_SOC_INTEL_BDW_RT5677_MACH=m
-CONFIG_SND_SOC_INTEL_BROADWELL_MACH=m
-CONFIG_SND_SOC_INTEL_BYTCR_RT5640_MACH=m
-CONFIG_SND_SOC_INTEL_BYTCR_RT5651_MACH=m
-CONFIG_SND_SOC_INTEL_CHT_BSW_RT5672_MACH=m
-CONFIG_SND_SOC_INTEL_CHT_BSW_RT5645_MACH=m
-CONFIG_SND_SOC_INTEL_CHT_BSW_MAX98090_TI_MACH=m
-CONFIG_SND_SOC_INTEL_CHT_BSW_NAU8824_MACH=m
-CONFIG_SND_SOC_INTEL_BYT_CHT_CX2072X_MACH=m
-CONFIG_SND_SOC_INTEL_BYT_CHT_DA7213_MACH=m
-CONFIG_SND_SOC_INTEL_BYT_CHT_ES8316_MACH=m
-# CONFIG_SND_SOC_INTEL_BYT_CHT_NOCODEC_MACH is not set
-CONFIG_SND_SOC_INTEL_SKL_RT286_MACH=m
-CONFIG_SND_SOC_INTEL_SKL_NAU88L25_SSM4567_MACH=m
-CONFIG_SND_SOC_INTEL_SKL_NAU88L25_MAX98357A_MACH=m
-CONFIG_SND_SOC_INTEL_DA7219_MAX98357A_GENERIC=m
-CONFIG_SND_SOC_INTEL_BXT_DA7219_MAX98357A_COMMON=m
-CONFIG_SND_SOC_INTEL_BXT_DA7219_MAX98357A_MACH=m
-CONFIG_SND_SOC_INTEL_BXT_RT298_MACH=m
-CONFIG_SND_SOC_INTEL_KBL_RT5663_MAX98927_MACH=m
-CONFIG_SND_SOC_INTEL_KBL_RT5663_RT5514_MAX98927_MACH=m
-CONFIG_SND_SOC_INTEL_KBL_DA7219_MAX98357A_MACH=m
-CONFIG_SND_SOC_INTEL_KBL_DA7219_MAX98927_MACH=m
-CONFIG_SND_SOC_INTEL_KBL_RT5660_MACH=m
-CONFIG_SND_SOC_INTEL_GLK_DA7219_MAX98357A_MACH=m
-CONFIG_SND_SOC_INTEL_GLK_RT5682_MAX98357A_MACH=m
-CONFIG_SND_SOC_INTEL_SKL_HDA_DSP_GENERIC_MACH=m
-CONFIG_SND_SOC_INTEL_SOF_RT5682_MACH=m
-CONFIG_SND_SOC_INTEL_SOF_PCM512x_MACH=m
-CONFIG_SND_SOC_INTEL_CML_LP_DA7219_MAX98357A_MACH=m
-CONFIG_SND_SOC_INTEL_SOF_CML_RT1011_RT5682_MACH=m
-CONFIG_SND_SOC_INTEL_SOF_DA7219_MAX98373_MACH=m
-CONFIG_SND_SOC_MTK_BTCVSD=m
-CONFIG_SND_SOC_SOF_TOPLEVEL=y
-CONFIG_SND_SOC_SOF_PCI=m
-CONFIG_SND_SOC_SOF_ACPI=m
-CONFIG_SND_SOC_SOF_OF=m
-# CONFIG_SND_SOC_SOF_DEBUG_PROBES is not set
-# CONFIG_SND_SOC_SOF_DEVELOPER_SUPPORT is not set
-CONFIG_SND_SOC_SOF=m
-CONFIG_SND_SOC_SOF_PROBE_WORK_QUEUE=y
-CONFIG_SND_SOC_SOF_INTEL_TOPLEVEL=y
-CONFIG_SND_SOC_SOF_INTEL_ACPI=m
-CONFIG_SND_SOC_SOF_INTEL_PCI=m
-CONFIG_SND_SOC_SOF_INTEL_HIFI_EP_IPC=m
-CONFIG_SND_SOC_SOF_INTEL_ATOM_HIFI_EP=m
-CONFIG_SND_SOC_SOF_INTEL_COMMON=m
-CONFIG_SND_SOC_SOF_MERRIFIELD_SUPPORT=y
-CONFIG_SND_SOC_SOF_MERRIFIELD=m
-CONFIG_SND_SOC_SOF_APOLLOLAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_APOLLOLAKE=m
-CONFIG_SND_SOC_SOF_GEMINILAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_GEMINILAKE=m
-CONFIG_SND_SOC_SOF_CANNONLAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_CANNONLAKE=m
-CONFIG_SND_SOC_SOF_COFFEELAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_COFFEELAKE=m
-CONFIG_SND_SOC_SOF_ICELAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_ICELAKE=m
-CONFIG_SND_SOC_SOF_COMETLAKE_LP=m
-CONFIG_SND_SOC_SOF_COMETLAKE_LP_SUPPORT=y
-CONFIG_SND_SOC_SOF_COMETLAKE_H=m
-CONFIG_SND_SOC_SOF_COMETLAKE_H_SUPPORT=y
-CONFIG_SND_SOC_SOF_TIGERLAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_TIGERLAKE=m
-CONFIG_SND_SOC_SOF_ELKHARTLAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_ELKHARTLAKE=m
-CONFIG_SND_SOC_SOF_JASPERLAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_JASPERLAKE=m
-CONFIG_SND_SOC_SOF_HDA_COMMON=m
-CONFIG_SND_SOC_SOF_HDA_LINK=y
-CONFIG_SND_SOC_SOF_HDA_AUDIO_CODEC=y
-# CONFIG_SND_SOC_SOF_HDA_ALWAYS_ENABLE_DMI_L1 is not set
-CONFIG_SND_SOC_SOF_HDA_LINK_BASELINE=m
-CONFIG_SND_SOC_SOF_HDA=m
-CONFIG_SND_SOC_SOF_XTENSA=m
-
-#
-# STMicroelectronics STM32 SOC audio support
-#
-# end of STMicroelectronics STM32 SOC audio support
-
-CONFIG_SND_SOC_XILINX_I2S=m
-CONFIG_SND_SOC_XILINX_AUDIO_FORMATTER=m
-CONFIG_SND_SOC_XILINX_SPDIF=m
-CONFIG_SND_SOC_XTFPGA_I2S=m
-CONFIG_ZX_TDM=m
-CONFIG_SND_SOC_I2C_AND_SPI=m
-
-#
-# CODEC drivers
-#
-CONFIG_SND_SOC_AC97_CODEC=m
-CONFIG_SND_SOC_ADAU_UTILS=m
-CONFIG_SND_SOC_ADAU1701=m
-CONFIG_SND_SOC_ADAU17X1=m
-CONFIG_SND_SOC_ADAU1761=m
-CONFIG_SND_SOC_ADAU1761_I2C=m
-CONFIG_SND_SOC_ADAU1761_SPI=m
-CONFIG_SND_SOC_ADAU7002=m
-CONFIG_SND_SOC_ADAU7118=m
-CONFIG_SND_SOC_ADAU7118_HW=m
-CONFIG_SND_SOC_ADAU7118_I2C=m
-CONFIG_SND_SOC_AK4104=m
-CONFIG_SND_SOC_AK4118=m
-CONFIG_SND_SOC_AK4458=m
-CONFIG_SND_SOC_AK4554=m
-CONFIG_SND_SOC_AK4613=m
-CONFIG_SND_SOC_AK4642=m
-CONFIG_SND_SOC_AK5386=m
-CONFIG_SND_SOC_AK5558=m
-CONFIG_SND_SOC_ALC5623=m
-CONFIG_SND_SOC_BD28623=m
-# CONFIG_SND_SOC_BT_SCO is not set
-CONFIG_SND_SOC_CPCAP=m
-CONFIG_SND_SOC_CROS_EC_CODEC=m
-CONFIG_SND_SOC_CS35L32=m
-CONFIG_SND_SOC_CS35L33=m
-CONFIG_SND_SOC_CS35L34=m
-CONFIG_SND_SOC_CS35L35=m
-CONFIG_SND_SOC_CS35L36=m
-CONFIG_SND_SOC_CS42L42=m
-CONFIG_SND_SOC_CS42L51=m
-CONFIG_SND_SOC_CS42L51_I2C=m
-CONFIG_SND_SOC_CS42L52=m
-CONFIG_SND_SOC_CS42L56=m
-CONFIG_SND_SOC_CS42L73=m
-CONFIG_SND_SOC_CS4265=m
-CONFIG_SND_SOC_CS4270=m
-CONFIG_SND_SOC_CS4271=m
-CONFIG_SND_SOC_CS4271_I2C=m
-CONFIG_SND_SOC_CS4271_SPI=m
-CONFIG_SND_SOC_CS42XX8=m
-CONFIG_SND_SOC_CS42XX8_I2C=m
-CONFIG_SND_SOC_CS43130=m
-CONFIG_SND_SOC_CS4341=m
-CONFIG_SND_SOC_CS4349=m
-CONFIG_SND_SOC_CS53L30=m
-CONFIG_SND_SOC_CX2072X=m
-CONFIG_SND_SOC_DA7213=m
-CONFIG_SND_SOC_DA7219=m
-CONFIG_SND_SOC_DMIC=m
-CONFIG_SND_SOC_HDMI_CODEC=m
-CONFIG_SND_SOC_ES7134=m
-CONFIG_SND_SOC_ES7241=m
-CONFIG_SND_SOC_ES8316=m
-CONFIG_SND_SOC_ES8328=m
-CONFIG_SND_SOC_ES8328_I2C=m
-CONFIG_SND_SOC_ES8328_SPI=m
-CONFIG_SND_SOC_GTM601=m
-CONFIG_SND_SOC_HDAC_HDMI=m
-CONFIG_SND_SOC_HDAC_HDA=m
-CONFIG_SND_SOC_INNO_RK3036=m
-CONFIG_SND_SOC_LOCHNAGAR_SC=m
-CONFIG_SND_SOC_MAX98088=m
-CONFIG_SND_SOC_MAX98090=m
-CONFIG_SND_SOC_MAX98357A=m
-CONFIG_SND_SOC_MAX98504=m
-CONFIG_SND_SOC_MAX9867=m
-CONFIG_SND_SOC_MAX98927=m
-CONFIG_SND_SOC_MAX98373=m
-CONFIG_SND_SOC_MAX9860=m
-CONFIG_SND_SOC_MSM8916_WCD_ANALOG=m
-CONFIG_SND_SOC_MSM8916_WCD_DIGITAL=m
-CONFIG_SND_SOC_PCM1681=m
-CONFIG_SND_SOC_PCM1789=m
-CONFIG_SND_SOC_PCM1789_I2C=m
-CONFIG_SND_SOC_PCM179X=m
-CONFIG_SND_SOC_PCM179X_I2C=m
-CONFIG_SND_SOC_PCM179X_SPI=m
-CONFIG_SND_SOC_PCM186X=m
-CONFIG_SND_SOC_PCM186X_I2C=m
-CONFIG_SND_SOC_PCM186X_SPI=m
-CONFIG_SND_SOC_PCM3060=m
-CONFIG_SND_SOC_PCM3060_I2C=m
-CONFIG_SND_SOC_PCM3060_SPI=m
-CONFIG_SND_SOC_PCM3168A=m
-CONFIG_SND_SOC_PCM3168A_I2C=m
-CONFIG_SND_SOC_PCM3168A_SPI=m
-CONFIG_SND_SOC_PCM512x=m
-CONFIG_SND_SOC_PCM512x_I2C=m
-CONFIG_SND_SOC_PCM512x_SPI=m
-CONFIG_SND_SOC_RK3328=m
-CONFIG_SND_SOC_RL6231=m
-CONFIG_SND_SOC_RL6347A=m
-CONFIG_SND_SOC_RT286=m
-CONFIG_SND_SOC_RT298=m
-CONFIG_SND_SOC_RT1011=m
-CONFIG_SND_SOC_RT1015=m
-CONFIG_SND_SOC_RT1308_SDW=m
-CONFIG_SND_SOC_RT5514=m
-CONFIG_SND_SOC_RT5514_SPI=m
-CONFIG_SND_SOC_RT5616=m
-CONFIG_SND_SOC_RT5631=m
-CONFIG_SND_SOC_RT5640=m
-CONFIG_SND_SOC_RT5645=m
-CONFIG_SND_SOC_RT5651=m
-CONFIG_SND_SOC_RT5660=m
-CONFIG_SND_SOC_RT5663=m
-CONFIG_SND_SOC_RT5670=m
-CONFIG_SND_SOC_RT5677=m
-CONFIG_SND_SOC_RT5677_SPI=m
-CONFIG_SND_SOC_RT5682=m
-CONFIG_SND_SOC_RT5682_SDW=m
-CONFIG_SND_SOC_RT700=m
-CONFIG_SND_SOC_RT700_SDW=m
-CONFIG_SND_SOC_RT711=m
-CONFIG_SND_SOC_RT711_SDW=m
-CONFIG_SND_SOC_RT715=m
-CONFIG_SND_SOC_RT715_SDW=m
-CONFIG_SND_SOC_SGTL5000=m
-CONFIG_SND_SOC_SI476X=m
-CONFIG_SND_SOC_SIGMADSP=m
-CONFIG_SND_SOC_SIGMADSP_I2C=m
-CONFIG_SND_SOC_SIGMADSP_REGMAP=m
-CONFIG_SND_SOC_SIMPLE_AMPLIFIER=m
-CONFIG_SND_SOC_SIRF_AUDIO_CODEC=m
-CONFIG_SND_SOC_SPDIF=m
-CONFIG_SND_SOC_SSM2305=m
-CONFIG_SND_SOC_SSM2602=m
-CONFIG_SND_SOC_SSM2602_SPI=m
-CONFIG_SND_SOC_SSM2602_I2C=m
-CONFIG_SND_SOC_SSM4567=m
-CONFIG_SND_SOC_STA32X=m
-CONFIG_SND_SOC_STA350=m
-CONFIG_SND_SOC_STI_SAS=m
-CONFIG_SND_SOC_TAS2552=m
-CONFIG_SND_SOC_TAS2562=m
-CONFIG_SND_SOC_TAS2770=m
-CONFIG_SND_SOC_TAS5086=m
-CONFIG_SND_SOC_TAS571X=m
-CONFIG_SND_SOC_TAS5720=m
-CONFIG_SND_SOC_TAS6424=m
-CONFIG_SND_SOC_TDA7419=m
-CONFIG_SND_SOC_TFA9879=m
-CONFIG_SND_SOC_TLV320AIC23=m
-CONFIG_SND_SOC_TLV320AIC23_I2C=m
-CONFIG_SND_SOC_TLV320AIC23_SPI=m
-CONFIG_SND_SOC_TLV320AIC31XX=m
-CONFIG_SND_SOC_TLV320AIC32X4=m
-CONFIG_SND_SOC_TLV320AIC32X4_I2C=m
-CONFIG_SND_SOC_TLV320AIC32X4_SPI=m
-CONFIG_SND_SOC_TLV320AIC3X=m
-CONFIG_SND_SOC_TLV320ADCX140=m
-CONFIG_SND_SOC_TS3A227E=m
-CONFIG_SND_SOC_TSCS42XX=m
-CONFIG_SND_SOC_TSCS454=m
-CONFIG_SND_SOC_UDA1334=m
-CONFIG_SND_SOC_WCD9335=m
-CONFIG_SND_SOC_WCD934X=m
-CONFIG_SND_SOC_WM8510=m
-CONFIG_SND_SOC_WM8523=m
-CONFIG_SND_SOC_WM8524=m
-CONFIG_SND_SOC_WM8580=m
-CONFIG_SND_SOC_WM8711=m
-CONFIG_SND_SOC_WM8728=m
-CONFIG_SND_SOC_WM8731=m
-CONFIG_SND_SOC_WM8737=m
-CONFIG_SND_SOC_WM8741=m
-CONFIG_SND_SOC_WM8750=m
-CONFIG_SND_SOC_WM8753=m
-CONFIG_SND_SOC_WM8770=m
-CONFIG_SND_SOC_WM8776=m
-CONFIG_SND_SOC_WM8782=m
-CONFIG_SND_SOC_WM8804=m
-CONFIG_SND_SOC_WM8804_I2C=m
-CONFIG_SND_SOC_WM8804_SPI=m
-CONFIG_SND_SOC_WM8903=m
-CONFIG_SND_SOC_WM8904=m
-CONFIG_SND_SOC_WM8960=m
-CONFIG_SND_SOC_WM8962=m
-CONFIG_SND_SOC_WM8974=m
-CONFIG_SND_SOC_WM8978=m
-CONFIG_SND_SOC_WM8985=m
-CONFIG_SND_SOC_WSA881X=m
-CONFIG_SND_SOC_ZX_AUD96P22=m
-CONFIG_SND_SOC_MAX9759=m
-CONFIG_SND_SOC_MT6351=m
-CONFIG_SND_SOC_MT6358=m
-CONFIG_SND_SOC_MT6660=m
-CONFIG_SND_SOC_NAU8540=m
-CONFIG_SND_SOC_NAU8810=m
-CONFIG_SND_SOC_NAU8822=m
-CONFIG_SND_SOC_NAU8824=m
-CONFIG_SND_SOC_NAU8825=m
-CONFIG_SND_SOC_TPA6130A2=m
-# end of CODEC drivers
-
-CONFIG_SND_SIMPLE_CARD_UTILS=m
-CONFIG_SND_SIMPLE_CARD=m
-CONFIG_SND_AUDIO_GRAPH_CARD=m
-CONFIG_SND_X86=y
-CONFIG_HDMI_LPE_AUDIO=m
-CONFIG_SND_SYNTH_EMUX=m
-CONFIG_SND_XEN_FRONTEND=m
-CONFIG_AC97_BUS=m
-
-#
-# HID support
-#
-CONFIG_HID=m
-CONFIG_HID_BATTERY_STRENGTH=y
-CONFIG_HIDRAW=y
-CONFIG_UHID=m
-CONFIG_HID_GENERIC=m
-
-#
-# Special HID drivers
-#
-CONFIG_HID_A4TECH=m
-CONFIG_HID_ACCUTOUCH=m
-CONFIG_HID_ACRUX=m
-CONFIG_HID_ACRUX_FF=y
-CONFIG_HID_APPLE=m
-CONFIG_HID_APPLEIR=m
-CONFIG_HID_ASUS=m
-CONFIG_HID_AUREAL=m
-CONFIG_HID_BELKIN=m
-CONFIG_HID_BETOP_FF=m
-CONFIG_HID_BIGBEN_FF=m
-CONFIG_HID_CHERRY=m
-CONFIG_HID_CHICONY=m
-CONFIG_HID_CORSAIR=m
-CONFIG_HID_COUGAR=m
-CONFIG_HID_MACALLY=m
-CONFIG_HID_PRODIKEYS=m
-CONFIG_HID_CMEDIA=m
-CONFIG_HID_CP2112=m
-CONFIG_HID_CREATIVE_SB0540=m
-CONFIG_HID_CYPRESS=m
-CONFIG_HID_DRAGONRISE=m
-CONFIG_DRAGONRISE_FF=y
-CONFIG_HID_EMS_FF=m
-CONFIG_HID_ELAN=m
-CONFIG_HID_ELECOM=m
-CONFIG_HID_ELO=m
-CONFIG_HID_EZKEY=m
-CONFIG_HID_GEMBIRD=m
-CONFIG_HID_GFRM=m
-CONFIG_HID_GLORIOUS=m
-CONFIG_HID_HOLTEK=m
-CONFIG_HOLTEK_FF=y
-CONFIG_HID_GOOGLE_HAMMER=m
-CONFIG_HID_GT683R=m
-CONFIG_HID_KEYTOUCH=m
-CONFIG_HID_KYE=m
-CONFIG_HID_UCLOGIC=m
-CONFIG_HID_WALTOP=m
-CONFIG_HID_VIEWSONIC=m
-CONFIG_HID_GYRATION=m
-CONFIG_HID_ICADE=m
-CONFIG_HID_ITE=m
-CONFIG_HID_JABRA=m
-CONFIG_HID_TWINHAN=m
-CONFIG_HID_KENSINGTON=m
-CONFIG_HID_LCPOWER=m
-CONFIG_HID_LED=m
-CONFIG_HID_LENOVO=m
-CONFIG_HID_LOGITECH=m
-CONFIG_HID_LOGITECH_DJ=m
-CONFIG_HID_LOGITECH_HIDPP=m
-CONFIG_LOGITECH_FF=y
-CONFIG_LOGIRUMBLEPAD2_FF=y
-CONFIG_LOGIG940_FF=y
-CONFIG_LOGIWHEELS_FF=y
-CONFIG_HID_MAGICMOUSE=m
-CONFIG_HID_MALTRON=m
-CONFIG_HID_MAYFLASH=m
-CONFIG_HID_REDRAGON=m
-CONFIG_HID_MICROSOFT=m
-CONFIG_HID_MONTEREY=m
-CONFIG_HID_MULTITOUCH=m
-CONFIG_HID_NTI=m
-CONFIG_HID_NTRIG=m
-CONFIG_HID_ORTEK=m
-CONFIG_HID_PANTHERLORD=m
-CONFIG_PANTHERLORD_FF=y
-CONFIG_HID_PENMOUNT=m
-CONFIG_HID_PETALYNX=m
-CONFIG_HID_PICOLCD=m
-CONFIG_HID_PICOLCD_FB=y
-CONFIG_HID_PICOLCD_BACKLIGHT=y
-CONFIG_HID_PICOLCD_LCD=y
-CONFIG_HID_PICOLCD_LEDS=y
-CONFIG_HID_PICOLCD_CIR=y
-CONFIG_HID_PLANTRONICS=m
-CONFIG_HID_PRIMAX=m
-CONFIG_HID_RETRODE=m
-CONFIG_HID_ROCCAT=m
-CONFIG_HID_SAITEK=m
-CONFIG_HID_SAMSUNG=m
-CONFIG_HID_SONY=m
-CONFIG_SONY_FF=y
-CONFIG_HID_SPEEDLINK=m
-CONFIG_HID_STEAM=m
-CONFIG_HID_STEELSERIES=m
-CONFIG_HID_SUNPLUS=m
-CONFIG_HID_RMI=m
-CONFIG_HID_GREENASIA=m
-CONFIG_GREENASIA_FF=y
-CONFIG_HID_HYPERV_MOUSE=m
-CONFIG_HID_SMARTJOYPLUS=m
-CONFIG_SMARTJOYPLUS_FF=y
-CONFIG_HID_TIVO=m
-CONFIG_HID_TOPSEED=m
-CONFIG_HID_THINGM=m
-CONFIG_HID_THRUSTMASTER=m
-CONFIG_THRUSTMASTER_FF=y
-CONFIG_HID_UDRAW_PS3=m
-CONFIG_HID_U2FZERO=m
-CONFIG_HID_WACOM=m
-CONFIG_HID_WIIMOTE=m
-CONFIG_HID_XINMO=m
-CONFIG_HID_ZEROPLUS=m
-CONFIG_ZEROPLUS_FF=y
-CONFIG_HID_ZYDACRON=m
-CONFIG_HID_SENSOR_HUB=m
-# CONFIG_HID_SENSOR_CUSTOM_SENSOR is not set
-CONFIG_HID_ALPS=m
-CONFIG_HID_MCP2221=m
-# end of Special HID drivers
-
-#
-# USB HID support
-#
-CONFIG_USB_HID=m
-CONFIG_HID_PID=y
-CONFIG_USB_HIDDEV=y
-
-#
-# USB HID Boot Protocol drivers
-#
-# CONFIG_USB_KBD is not set
-# CONFIG_USB_MOUSE is not set
-# end of USB HID Boot Protocol drivers
-# end of USB HID support
-
-#
-# I2C HID support
-#
-CONFIG_I2C_HID=m
-# end of I2C HID support
-
-#
-# Intel ISH HID support
-#
-CONFIG_INTEL_ISH_HID=m
-CONFIG_INTEL_ISH_FIRMWARE_DOWNLOADER=m
-# end of Intel ISH HID support
-# end of HID support
-
-CONFIG_USB_OHCI_LITTLE_ENDIAN=y
-CONFIG_USB_SUPPORT=y
-CONFIG_USB_COMMON=y
-CONFIG_USB_LED_TRIG=y
-CONFIG_USB_ULPI_BUS=m
-CONFIG_USB_CONN_GPIO=m
-CONFIG_USB_ARCH_HAS_HCD=y
-CONFIG_USB=y
-CONFIG_USB_PCI=y
-CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
-
-#
-# Miscellaneous USB options
-#
-CONFIG_USB_DEFAULT_PERSIST=y
-CONFIG_USB_DYNAMIC_MINORS=y
-# CONFIG_USB_OTG is not set
-# CONFIG_USB_OTG_WHITELIST is not set
-# CONFIG_USB_OTG_BLACKLIST_HUB is not set
-CONFIG_USB_LEDS_TRIGGER_USBPORT=m
-CONFIG_USB_AUTOSUSPEND_DELAY=2
-CONFIG_USB_MON=m
-
-#
-# USB Host Controller Drivers
-#
-CONFIG_USB_C67X00_HCD=m
-CONFIG_USB_XHCI_HCD=m
-# CONFIG_USB_XHCI_DBGCAP is not set
-CONFIG_USB_XHCI_PCI=m
-CONFIG_USB_XHCI_PLATFORM=m
-CONFIG_USB_EHCI_HCD=m
-CONFIG_USB_EHCI_ROOT_HUB_TT=y
-CONFIG_USB_EHCI_TT_NEWSCHED=y
-CONFIG_USB_EHCI_PCI=m
-CONFIG_USB_EHCI_FSL=m
-CONFIG_USB_EHCI_HCD_PLATFORM=m
-CONFIG_USB_OXU210HP_HCD=m
-CONFIG_USB_ISP116X_HCD=m
-CONFIG_USB_FOTG210_HCD=m
-CONFIG_USB_MAX3421_HCD=m
-CONFIG_USB_OHCI_HCD=m
-CONFIG_USB_OHCI_HCD_PCI=m
-# CONFIG_USB_OHCI_HCD_SSB is not set
-CONFIG_USB_OHCI_HCD_PLATFORM=m
-CONFIG_USB_UHCI_HCD=m
-CONFIG_USB_U132_HCD=m
-CONFIG_USB_SL811_HCD=m
-# CONFIG_USB_SL811_HCD_ISO is not set
-CONFIG_USB_SL811_CS=m
-CONFIG_USB_R8A66597_HCD=m
-CONFIG_USB_HCD_BCMA=m
-CONFIG_USB_HCD_SSB=m
-# CONFIG_USB_HCD_TEST_MODE is not set
-
-#
-# USB Device Class drivers
-#
-CONFIG_USB_ACM=m
-CONFIG_USB_PRINTER=m
-CONFIG_USB_WDM=m
-CONFIG_USB_TMC=m
-
-#
-# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may
-#
-
-#
-# also be needed; see USB_STORAGE Help for more info
-#
-CONFIG_USB_STORAGE=m
-# CONFIG_USB_STORAGE_DEBUG is not set
-CONFIG_USB_STORAGE_REALTEK=m
-CONFIG_REALTEK_AUTOPM=y
-CONFIG_USB_STORAGE_DATAFAB=m
-CONFIG_USB_STORAGE_FREECOM=m
-CONFIG_USB_STORAGE_ISD200=m
-CONFIG_USB_STORAGE_USBAT=m
-CONFIG_USB_STORAGE_SDDR09=m
-CONFIG_USB_STORAGE_SDDR55=m
-CONFIG_USB_STORAGE_JUMPSHOT=m
-CONFIG_USB_STORAGE_ALAUDA=m
-CONFIG_USB_STORAGE_ONETOUCH=m
-CONFIG_USB_STORAGE_KARMA=m
-CONFIG_USB_STORAGE_CYPRESS_ATACB=m
-CONFIG_USB_STORAGE_ENE_UB6250=m
-CONFIG_USB_UAS=m
-
-#
-# USB Imaging devices
-#
-CONFIG_USB_MDC800=m
-CONFIG_USB_MICROTEK=m
-CONFIG_USBIP_CORE=m
-CONFIG_USBIP_VHCI_HCD=m
-CONFIG_USBIP_VHCI_HC_PORTS=8
-CONFIG_USBIP_VHCI_NR_HCS=1
-CONFIG_USBIP_HOST=m
-CONFIG_USBIP_VUDC=m
-# CONFIG_USBIP_DEBUG is not set
-CONFIG_USB_CDNS3=m
-CONFIG_USB_CDNS3_GADGET=y
-CONFIG_USB_CDNS3_HOST=y
-CONFIG_USB_CDNS3_PCI_WRAP=m
-CONFIG_USB_MUSB_HDRC=m
-# CONFIG_USB_MUSB_HOST is not set
-# CONFIG_USB_MUSB_GADGET is not set
-CONFIG_USB_MUSB_DUAL_ROLE=y
-
-#
-# Platform Glue Layer
-#
-
-#
-# MUSB DMA mode
-#
-# CONFIG_MUSB_PIO_ONLY is not set
-CONFIG_USB_DWC3=m
-CONFIG_USB_DWC3_ULPI=y
-# CONFIG_USB_DWC3_HOST is not set
-# CONFIG_USB_DWC3_GADGET is not set
-CONFIG_USB_DWC3_DUAL_ROLE=y
-
-#
-# Platform Glue Driver Support
-#
-CONFIG_USB_DWC3_PCI=m
-CONFIG_USB_DWC3_HAPS=m
-CONFIG_USB_DWC3_OF_SIMPLE=m
-CONFIG_USB_DWC2=m
-# CONFIG_USB_DWC2_HOST is not set
-
-#
-# Gadget/Dual-role mode requires USB Gadget support to be enabled
-#
-# CONFIG_USB_DWC2_PERIPHERAL is not set
-CONFIG_USB_DWC2_DUAL_ROLE=y
-CONFIG_USB_DWC2_PCI=m
-# CONFIG_USB_DWC2_DEBUG is not set
-# CONFIG_USB_DWC2_TRACK_MISSED_SOFS is not set
-CONFIG_USB_CHIPIDEA=m
-CONFIG_USB_CHIPIDEA_OF=m
-CONFIG_USB_CHIPIDEA_PCI=m
-CONFIG_USB_CHIPIDEA_UDC=y
-CONFIG_USB_CHIPIDEA_HOST=y
-CONFIG_USB_ISP1760=m
-CONFIG_USB_ISP1760_HCD=y
-CONFIG_USB_ISP1761_UDC=y
-# CONFIG_USB_ISP1760_HOST_ROLE is not set
-# CONFIG_USB_ISP1760_GADGET_ROLE is not set
-CONFIG_USB_ISP1760_DUAL_ROLE=y
-
-#
-# USB port drivers
-#
-CONFIG_USB_USS720=m
-CONFIG_USB_SERIAL=y
-CONFIG_USB_SERIAL_CONSOLE=y
-CONFIG_USB_SERIAL_GENERIC=y
-CONFIG_USB_SERIAL_SIMPLE=m
-CONFIG_USB_SERIAL_AIRCABLE=m
-CONFIG_USB_SERIAL_ARK3116=m
-CONFIG_USB_SERIAL_BELKIN=m
-CONFIG_USB_SERIAL_CH341=m
-CONFIG_USB_SERIAL_WHITEHEAT=m
-CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m
-CONFIG_USB_SERIAL_CP210X=m
-CONFIG_USB_SERIAL_CYPRESS_M8=m
-CONFIG_USB_SERIAL_EMPEG=m
-CONFIG_USB_SERIAL_FTDI_SIO=m
-CONFIG_USB_SERIAL_VISOR=m
-CONFIG_USB_SERIAL_IPAQ=m
-CONFIG_USB_SERIAL_IR=m
-CONFIG_USB_SERIAL_EDGEPORT=m
-CONFIG_USB_SERIAL_EDGEPORT_TI=m
-CONFIG_USB_SERIAL_F81232=m
-CONFIG_USB_SERIAL_F8153X=m
-CONFIG_USB_SERIAL_GARMIN=m
-CONFIG_USB_SERIAL_IPW=m
-CONFIG_USB_SERIAL_IUU=m
-CONFIG_USB_SERIAL_KEYSPAN_PDA=m
-CONFIG_USB_SERIAL_KEYSPAN=m
-CONFIG_USB_SERIAL_KLSI=m
-CONFIG_USB_SERIAL_KOBIL_SCT=m
-CONFIG_USB_SERIAL_MCT_U232=m
-CONFIG_USB_SERIAL_METRO=m
-CONFIG_USB_SERIAL_MOS7720=m
-CONFIG_USB_SERIAL_MOS7715_PARPORT=y
-CONFIG_USB_SERIAL_MOS7840=m
-CONFIG_USB_SERIAL_MXUPORT=m
-CONFIG_USB_SERIAL_NAVMAN=m
-CONFIG_USB_SERIAL_PL2303=m
-CONFIG_USB_SERIAL_OTI6858=m
-CONFIG_USB_SERIAL_QCAUX=m
-CONFIG_USB_SERIAL_QUALCOMM=m
-CONFIG_USB_SERIAL_SPCP8X5=m
-CONFIG_USB_SERIAL_SAFE=m
-# CONFIG_USB_SERIAL_SAFE_PADDED is not set
-CONFIG_USB_SERIAL_SIERRAWIRELESS=m
-CONFIG_USB_SERIAL_SYMBOL=m
-CONFIG_USB_SERIAL_TI=m
-CONFIG_USB_SERIAL_CYBERJACK=m
-CONFIG_USB_SERIAL_XIRCOM=m
-CONFIG_USB_SERIAL_WWAN=m
-CONFIG_USB_SERIAL_OPTION=m
-CONFIG_USB_SERIAL_OMNINET=m
-CONFIG_USB_SERIAL_OPTICON=m
-CONFIG_USB_SERIAL_XSENS_MT=m
-CONFIG_USB_SERIAL_WISHBONE=m
-CONFIG_USB_SERIAL_SSU100=m
-CONFIG_USB_SERIAL_QT2=m
-CONFIG_USB_SERIAL_UPD78F0730=m
-CONFIG_USB_SERIAL_DEBUG=m
-
-#
-# USB Miscellaneous drivers
-#
-CONFIG_USB_EMI62=m
-CONFIG_USB_EMI26=m
-CONFIG_USB_ADUTUX=m
-CONFIG_USB_SEVSEG=m
-CONFIG_USB_LEGOTOWER=m
-CONFIG_USB_LCD=m
-CONFIG_USB_CYPRESS_CY7C63=m
-CONFIG_USB_CYTHERM=m
-CONFIG_USB_IDMOUSE=m
-CONFIG_USB_FTDI_ELAN=m
-CONFIG_USB_APPLEDISPLAY=m
-CONFIG_APPLE_MFI_FASTCHARGE=m
-CONFIG_USB_SISUSBVGA=m
-CONFIG_USB_SISUSBVGA_CON=y
-CONFIG_USB_LD=m
-CONFIG_USB_TRANCEVIBRATOR=m
-CONFIG_USB_IOWARRIOR=m
-CONFIG_USB_TEST=m
-CONFIG_USB_EHSET_TEST_FIXTURE=m
-CONFIG_USB_ISIGHTFW=m
-CONFIG_USB_YUREX=m
-CONFIG_USB_EZUSB_FX2=m
-CONFIG_USB_HUB_USB251XB=m
-CONFIG_USB_HSIC_USB3503=m
-CONFIG_USB_HSIC_USB4604=m
-CONFIG_USB_LINK_LAYER_TEST=m
-CONFIG_USB_CHAOSKEY=m
-CONFIG_USB_ATM=m
-CONFIG_USB_SPEEDTOUCH=m
-CONFIG_USB_CXACRU=m
-CONFIG_USB_UEAGLEATM=m
-CONFIG_USB_XUSBATM=m
-
-#
-# USB Physical Layer drivers
-#
-CONFIG_USB_PHY=y
-CONFIG_NOP_USB_XCEIV=m
-CONFIG_USB_GPIO_VBUS=m
-CONFIG_TAHVO_USB=m
-# CONFIG_TAHVO_USB_HOST_BY_DEFAULT is not set
-CONFIG_USB_ISP1301=m
-# end of USB Physical Layer drivers
-
-CONFIG_USB_GADGET=m
-# CONFIG_USB_GADGET_DEBUG is not set
-# CONFIG_USB_GADGET_DEBUG_FILES is not set
-# CONFIG_USB_GADGET_DEBUG_FS is not set
-CONFIG_USB_GADGET_VBUS_DRAW=2
-CONFIG_USB_GADGET_STORAGE_NUM_BUFFERS=2
-CONFIG_U_SERIAL_CONSOLE=y
-
-#
-# USB Peripheral Controller
-#
-CONFIG_USB_FOTG210_UDC=m
-CONFIG_USB_GR_UDC=m
-CONFIG_USB_R8A66597=m
-CONFIG_USB_PXA27X=m
-CONFIG_USB_MV_UDC=m
-CONFIG_USB_MV_U3D=m
-CONFIG_USB_SNP_CORE=m
-CONFIG_USB_SNP_UDC_PLAT=m
-CONFIG_USB_M66592=m
-CONFIG_USB_BDC_UDC=m
-
-#
-# Platform Support
-#
-CONFIG_USB_BDC_PCI=m
-CONFIG_USB_AMD5536UDC=m
-CONFIG_USB_NET2272=m
-CONFIG_USB_NET2272_DMA=y
-CONFIG_USB_NET2280=m
-CONFIG_USB_GOKU=m
-CONFIG_USB_EG20T=m
-CONFIG_USB_GADGET_XILINX=m
-CONFIG_USB_MAX3420_UDC=m
-CONFIG_USB_DUMMY_HCD=m
-# end of USB Peripheral Controller
-
-CONFIG_USB_LIBCOMPOSITE=m
-CONFIG_USB_F_ACM=m
-CONFIG_USB_F_SS_LB=m
-CONFIG_USB_U_SERIAL=m
-CONFIG_USB_U_ETHER=m
-CONFIG_USB_U_AUDIO=m
-CONFIG_USB_F_SERIAL=m
-CONFIG_USB_F_OBEX=m
-CONFIG_USB_F_NCM=m
-CONFIG_USB_F_ECM=m
-CONFIG_USB_F_PHONET=m
-CONFIG_USB_F_EEM=m
-CONFIG_USB_F_SUBSET=m
-CONFIG_USB_F_RNDIS=m
-CONFIG_USB_F_MASS_STORAGE=m
-CONFIG_USB_F_FS=m
-CONFIG_USB_F_UAC1=m
-CONFIG_USB_F_UAC1_LEGACY=m
-CONFIG_USB_F_UAC2=m
-CONFIG_USB_F_UVC=m
-CONFIG_USB_F_MIDI=m
-CONFIG_USB_F_HID=m
-CONFIG_USB_F_PRINTER=m
-CONFIG_USB_F_TCM=m
-CONFIG_USB_CONFIGFS=m
-CONFIG_USB_CONFIGFS_SERIAL=y
-CONFIG_USB_CONFIGFS_ACM=y
-CONFIG_USB_CONFIGFS_OBEX=y
-CONFIG_USB_CONFIGFS_NCM=y
-CONFIG_USB_CONFIGFS_ECM=y
-CONFIG_USB_CONFIGFS_ECM_SUBSET=y
-CONFIG_USB_CONFIGFS_RNDIS=y
-CONFIG_USB_CONFIGFS_EEM=y
-CONFIG_USB_CONFIGFS_PHONET=y
-CONFIG_USB_CONFIGFS_MASS_STORAGE=y
-CONFIG_USB_CONFIGFS_F_LB_SS=y
-CONFIG_USB_CONFIGFS_F_FS=y
-CONFIG_USB_CONFIGFS_F_UAC1=y
-CONFIG_USB_CONFIGFS_F_UAC1_LEGACY=y
-CONFIG_USB_CONFIGFS_F_UAC2=y
-CONFIG_USB_CONFIGFS_F_MIDI=y
-CONFIG_USB_CONFIGFS_F_HID=y
-CONFIG_USB_CONFIGFS_F_UVC=y
-CONFIG_USB_CONFIGFS_F_PRINTER=y
-CONFIG_USB_CONFIGFS_F_TCM=y
-
-#
-# USB Gadget precomposed configurations
-#
-CONFIG_USB_ZERO=m
-CONFIG_USB_AUDIO=m
-# CONFIG_GADGET_UAC1 is not set
-CONFIG_USB_ETH=m
-CONFIG_USB_ETH_RNDIS=y
-CONFIG_USB_ETH_EEM=y
-CONFIG_USB_G_NCM=m
-CONFIG_USB_GADGETFS=m
-CONFIG_USB_FUNCTIONFS=m
-CONFIG_USB_FUNCTIONFS_ETH=y
-CONFIG_USB_FUNCTIONFS_RNDIS=y
-CONFIG_USB_FUNCTIONFS_GENERIC=y
-CONFIG_USB_MASS_STORAGE=m
-CONFIG_USB_GADGET_TARGET=m
-CONFIG_USB_G_SERIAL=m
-CONFIG_USB_MIDI_GADGET=m
-CONFIG_USB_G_PRINTER=m
-CONFIG_USB_CDC_COMPOSITE=m
-CONFIG_USB_G_NOKIA=m
-CONFIG_USB_G_ACM_MS=m
-CONFIG_USB_G_MULTI=m
-CONFIG_USB_G_MULTI_RNDIS=y
-CONFIG_USB_G_MULTI_CDC=y
-CONFIG_USB_G_HID=m
-CONFIG_USB_G_DBGP=m
-# CONFIG_USB_G_DBGP_PRINTK is not set
-CONFIG_USB_G_DBGP_SERIAL=y
-CONFIG_USB_G_WEBCAM=m
-CONFIG_USB_RAW_GADGET=m
-# end of USB Gadget precomposed configurations
-
-CONFIG_TYPEC=m
-CONFIG_TYPEC_TCPM=m
-CONFIG_TYPEC_TCPCI=m
-CONFIG_TYPEC_RT1711H=m
-CONFIG_TYPEC_FUSB302=m
-CONFIG_TYPEC_WCOVE=m
-CONFIG_TYPEC_UCSI=m
-CONFIG_UCSI_CCG=m
-CONFIG_UCSI_ACPI=m
-CONFIG_TYPEC_HD3SS3220=m
-CONFIG_TYPEC_TPS6598X=m
-
-#
-# USB Type-C Multiplexer/DeMultiplexer Switch support
-#
-CONFIG_TYPEC_MUX_PI3USB30532=m
-CONFIG_TYPEC_MUX_INTEL_PMC=m
-# end of USB Type-C Multiplexer/DeMultiplexer Switch support
-
-#
-# USB Type-C Alternate Mode drivers
-#
-CONFIG_TYPEC_DP_ALTMODE=m
-CONFIG_TYPEC_NVIDIA_ALTMODE=m
-# end of USB Type-C Alternate Mode drivers
-
-CONFIG_USB_ROLE_SWITCH=m
-CONFIG_USB_ROLES_INTEL_XHCI=m
-CONFIG_MMC=m
-CONFIG_PWRSEQ_EMMC=m
-CONFIG_PWRSEQ_SD8787=m
-CONFIG_PWRSEQ_SIMPLE=m
-CONFIG_MMC_BLOCK=m
-CONFIG_MMC_BLOCK_MINORS=8
-CONFIG_SDIO_UART=m
-CONFIG_MMC_TEST=m
-
-#
-# MMC/SD/SDIO Host Controller Drivers
-#
-# CONFIG_MMC_DEBUG is not set
-CONFIG_MMC_SDHCI=m
-CONFIG_MMC_SDHCI_IO_ACCESSORS=y
-CONFIG_MMC_SDHCI_PCI=m
-CONFIG_MMC_RICOH_MMC=y
-CONFIG_MMC_SDHCI_ACPI=m
-CONFIG_MMC_SDHCI_PLTFM=m
-CONFIG_MMC_SDHCI_OF_ARASAN=m
-CONFIG_MMC_SDHCI_OF_ASPEED=m
-CONFIG_MMC_SDHCI_OF_AT91=m
-CONFIG_MMC_SDHCI_OF_DWCMSHC=m
-CONFIG_MMC_SDHCI_CADENCE=m
-CONFIG_MMC_SDHCI_F_SDH30=m
-CONFIG_MMC_SDHCI_MILBEAUT=m
-CONFIG_MMC_WBSD=m
-CONFIG_MMC_ALCOR=m
-CONFIG_MMC_TIFM_SD=m
-CONFIG_MMC_SPI=m
-CONFIG_MMC_SDRICOH_CS=m
-CONFIG_MMC_CB710=m
-CONFIG_MMC_VIA_SDMMC=m
-CONFIG_MMC_VUB300=m
-CONFIG_MMC_USHC=m
-CONFIG_MMC_USDHI6ROL0=m
-CONFIG_MMC_REALTEK_PCI=m
-CONFIG_MMC_REALTEK_USB=m
-CONFIG_MMC_CQHCI=m
-CONFIG_MMC_HSQ=m
-CONFIG_MMC_TOSHIBA_PCI=m
-CONFIG_MMC_MTK=m
-CONFIG_MMC_SDHCI_XENON=m
-CONFIG_MMC_SDHCI_OMAP=m
-CONFIG_MMC_SDHCI_AM654=m
-CONFIG_MMC_SDHCI_EXTERNAL_DMA=y
-CONFIG_MEMSTICK=m
-# CONFIG_MEMSTICK_DEBUG is not set
-
-#
-# MemoryStick drivers
-#
-# CONFIG_MEMSTICK_UNSAFE_RESUME is not set
-CONFIG_MSPRO_BLOCK=m
-CONFIG_MS_BLOCK=m
-
-#
-# MemoryStick Host Controller Drivers
-#
-CONFIG_MEMSTICK_TIFM_MS=m
-CONFIG_MEMSTICK_JMICRON_38X=m
-CONFIG_MEMSTICK_R592=m
-CONFIG_MEMSTICK_REALTEK_PCI=m
-CONFIG_MEMSTICK_REALTEK_USB=m
-CONFIG_NEW_LEDS=y
-CONFIG_LEDS_CLASS=y
-CONFIG_LEDS_CLASS_FLASH=m
-CONFIG_LEDS_BRIGHTNESS_HW_CHANGED=y
-
-#
-# LED drivers
-#
-CONFIG_LEDS_88PM860X=m
-CONFIG_LEDS_AAT1290=m
-CONFIG_LEDS_AN30259A=m
-CONFIG_LEDS_APU=m
-CONFIG_LEDS_AS3645A=m
-CONFIG_LEDS_BCM6328=m
-CONFIG_LEDS_BCM6358=m
-CONFIG_LEDS_CPCAP=m
-CONFIG_LEDS_CR0014114=m
-CONFIG_LEDS_EL15203000=m
-CONFIG_LEDS_LM3530=m
-CONFIG_LEDS_LM3532=m
-CONFIG_LEDS_LM3533=m
-CONFIG_LEDS_LM3642=m
-CONFIG_LEDS_LM3692X=m
-CONFIG_LEDS_LM3601X=m
-CONFIG_LEDS_MT6323=m
-CONFIG_LEDS_PCA9532=m
-CONFIG_LEDS_PCA9532_GPIO=y
-CONFIG_LEDS_GPIO=m
-CONFIG_LEDS_LP3944=m
-CONFIG_LEDS_LP3952=m
-# CONFIG_LEDS_LP5521 is not set
-# CONFIG_LEDS_LP5523 is not set
-# CONFIG_LEDS_LP5562 is not set
-# CONFIG_LEDS_LP8501 is not set
-CONFIG_LEDS_LP8788=m
-CONFIG_LEDS_LP8860=m
-CONFIG_LEDS_CLEVO_MAIL=m
-CONFIG_LEDS_PCA955X=m
-CONFIG_LEDS_PCA955X_GPIO=y
-CONFIG_LEDS_PCA963X=m
-CONFIG_LEDS_WM831X_STATUS=m
-CONFIG_LEDS_WM8350=m
-CONFIG_LEDS_DA903X=m
-CONFIG_LEDS_DA9052=m
-CONFIG_LEDS_DAC124S085=m
-CONFIG_LEDS_PWM=m
-CONFIG_LEDS_REGULATOR=m
-CONFIG_LEDS_BD2802=m
-CONFIG_LEDS_INTEL_SS4200=m
-CONFIG_LEDS_LT3593=m
-CONFIG_LEDS_ADP5520=m
-CONFIG_LEDS_MC13783=m
-CONFIG_LEDS_TCA6507=m
-CONFIG_LEDS_TLC591XX=m
-CONFIG_LEDS_MAX77650=m
-CONFIG_LEDS_MAX77693=m
-CONFIG_LEDS_MAX8997=m
-CONFIG_LEDS_LM355x=m
-CONFIG_LEDS_MENF21BMC=m
-CONFIG_LEDS_KTD2692=m
-CONFIG_LEDS_IS31FL319X=m
-CONFIG_LEDS_IS31FL32XX=m
-
-#
-# LED driver for blink(1) USB RGB LED is under Special HID drivers (HID_THINGM)
-#
-CONFIG_LEDS_BLINKM=m
-CONFIG_LEDS_SYSCON=y
-CONFIG_LEDS_MLXCPLD=m
-CONFIG_LEDS_MLXREG=m
-CONFIG_LEDS_USER=m
-CONFIG_LEDS_NIC78BX=m
-CONFIG_LEDS_SPI_BYTE=m
-CONFIG_LEDS_TI_LMU_COMMON=m
-CONFIG_LEDS_LM3697=m
-CONFIG_LEDS_LM36274=m
-CONFIG_LEDS_TPS6105X=m
-
-#
-# LED Triggers
-#
-CONFIG_LEDS_TRIGGERS=y
-CONFIG_LEDS_TRIGGER_TIMER=m
-CONFIG_LEDS_TRIGGER_ONESHOT=m
-CONFIG_LEDS_TRIGGER_DISK=y
-CONFIG_LEDS_TRIGGER_MTD=y
-CONFIG_LEDS_TRIGGER_HEARTBEAT=m
-CONFIG_LEDS_TRIGGER_BACKLIGHT=m
-CONFIG_LEDS_TRIGGER_CPU=y
-CONFIG_LEDS_TRIGGER_ACTIVITY=m
-CONFIG_LEDS_TRIGGER_GPIO=m
-CONFIG_LEDS_TRIGGER_DEFAULT_ON=m
-
-#
-# iptables trigger is under Netfilter config (LED target)
-#
-CONFIG_LEDS_TRIGGER_TRANSIENT=m
-CONFIG_LEDS_TRIGGER_CAMERA=m
-CONFIG_LEDS_TRIGGER_PANIC=y
-CONFIG_LEDS_TRIGGER_NETDEV=m
-CONFIG_LEDS_TRIGGER_PATTERN=m
-CONFIG_LEDS_TRIGGER_AUDIO=m
-CONFIG_ACCESSIBILITY=y
-CONFIG_A11Y_BRAILLE_CONSOLE=y
-CONFIG_INFINIBAND=m
-CONFIG_INFINIBAND_USER_MAD=m
-CONFIG_INFINIBAND_USER_ACCESS=m
-# CONFIG_INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI is not set
-CONFIG_INFINIBAND_USER_MEM=y
-CONFIG_INFINIBAND_ON_DEMAND_PAGING=y
-CONFIG_INFINIBAND_ADDR_TRANS=y
-CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS=y
-CONFIG_INFINIBAND_MTHCA=m
-CONFIG_INFINIBAND_MTHCA_DEBUG=y
-CONFIG_INFINIBAND_QIB=m
-CONFIG_INFINIBAND_QIB_DCA=y
-CONFIG_INFINIBAND_CXGB4=m
-CONFIG_INFINIBAND_EFA=m
-CONFIG_INFINIBAND_I40IW=m
-CONFIG_MLX4_INFINIBAND=m
-CONFIG_MLX5_INFINIBAND=m
-CONFIG_INFINIBAND_OCRDMA=m
-CONFIG_INFINIBAND_VMWARE_PVRDMA=m
-CONFIG_INFINIBAND_USNIC=m
-CONFIG_INFINIBAND_BNXT_RE=m
-CONFIG_INFINIBAND_HFI1=m
-# CONFIG_HFI1_DEBUG_SDMA_ORDER is not set
-# CONFIG_SDMA_VERBOSITY is not set
-CONFIG_INFINIBAND_QEDR=m
-CONFIG_INFINIBAND_RDMAVT=m
-CONFIG_RDMA_RXE=m
-CONFIG_RDMA_SIW=m
-CONFIG_INFINIBAND_IPOIB=m
-CONFIG_INFINIBAND_IPOIB_CM=y
-CONFIG_INFINIBAND_IPOIB_DEBUG=y
-# CONFIG_INFINIBAND_IPOIB_DEBUG_DATA is not set
-CONFIG_INFINIBAND_SRP=m
-CONFIG_INFINIBAND_SRPT=m
-CONFIG_INFINIBAND_ISER=m
-CONFIG_INFINIBAND_ISERT=m
-CONFIG_INFINIBAND_OPA_VNIC=m
-CONFIG_EDAC_ATOMIC_SCRUB=y
-CONFIG_EDAC_SUPPORT=y
-CONFIG_EDAC=y
-CONFIG_EDAC_LEGACY_SYSFS=y
-# CONFIG_EDAC_DEBUG is not set
-CONFIG_EDAC_DECODE_MCE=m
-CONFIG_EDAC_GHES=y
-CONFIG_EDAC_AMD64=m
-# CONFIG_EDAC_AMD64_ERROR_INJECTION is not set
-CONFIG_EDAC_E752X=m
-CONFIG_EDAC_I82975X=m
-CONFIG_EDAC_I3000=m
-CONFIG_EDAC_I3200=m
-CONFIG_EDAC_IE31200=m
-CONFIG_EDAC_X38=m
-CONFIG_EDAC_I5400=m
-CONFIG_EDAC_I7CORE=m
-CONFIG_EDAC_I5000=m
-CONFIG_EDAC_I5100=m
-CONFIG_EDAC_I7300=m
-CONFIG_EDAC_SBRIDGE=m
-CONFIG_EDAC_SKX=m
-CONFIG_EDAC_I10NM=m
-CONFIG_EDAC_PND2=m
-CONFIG_RTC_LIB=y
-CONFIG_RTC_MC146818_LIB=y
-CONFIG_RTC_CLASS=y
-CONFIG_RTC_HCTOSYS=y
-CONFIG_RTC_HCTOSYS_DEVICE="rtc0"
-CONFIG_RTC_SYSTOHC=y
-CONFIG_RTC_SYSTOHC_DEVICE="rtc0"
-# CONFIG_RTC_DEBUG is not set
-CONFIG_RTC_NVMEM=y
-
-#
-# RTC interfaces
-#
-CONFIG_RTC_INTF_SYSFS=y
-CONFIG_RTC_INTF_PROC=y
-CONFIG_RTC_INTF_DEV=y
-CONFIG_RTC_INTF_DEV_UIE_EMUL=y
-# CONFIG_RTC_DRV_TEST is not set
-
-#
-# I2C RTC drivers
-#
-CONFIG_RTC_DRV_88PM860X=m
-CONFIG_RTC_DRV_88PM80X=m
-CONFIG_RTC_DRV_ABB5ZES3=m
-CONFIG_RTC_DRV_ABEOZ9=m
-CONFIG_RTC_DRV_ABX80X=m
-CONFIG_RTC_DRV_AS3722=m
-CONFIG_RTC_DRV_DS1307=m
-CONFIG_RTC_DRV_DS1307_CENTURY=y
-CONFIG_RTC_DRV_DS1374=m
-CONFIG_RTC_DRV_DS1374_WDT=y
-CONFIG_RTC_DRV_DS1672=m
-CONFIG_RTC_DRV_HYM8563=m
-CONFIG_RTC_DRV_LP8788=m
-CONFIG_RTC_DRV_MAX6900=m
-CONFIG_RTC_DRV_MAX8907=m
-CONFIG_RTC_DRV_MAX8925=m
-CONFIG_RTC_DRV_MAX8998=m
-CONFIG_RTC_DRV_MAX8997=m
-CONFIG_RTC_DRV_MAX77686=m
-CONFIG_RTC_DRV_RK808=m
-CONFIG_RTC_DRV_RS5C372=m
-CONFIG_RTC_DRV_ISL1208=m
-CONFIG_RTC_DRV_ISL12022=m
-CONFIG_RTC_DRV_ISL12026=m
-CONFIG_RTC_DRV_X1205=m
-CONFIG_RTC_DRV_PCF8523=m
-CONFIG_RTC_DRV_PCF85063=m
-CONFIG_RTC_DRV_PCF85363=m
-CONFIG_RTC_DRV_PCF8563=m
-CONFIG_RTC_DRV_PCF8583=m
-CONFIG_RTC_DRV_M41T80=m
-CONFIG_RTC_DRV_M41T80_WDT=y
-CONFIG_RTC_DRV_BD70528=m
-CONFIG_RTC_DRV_BQ32K=m
-CONFIG_RTC_DRV_TWL4030=m
-CONFIG_RTC_DRV_PALMAS=m
-CONFIG_RTC_DRV_TPS6586X=m
-CONFIG_RTC_DRV_TPS65910=m
-CONFIG_RTC_DRV_TPS80031=m
-CONFIG_RTC_DRV_RC5T583=m
-CONFIG_RTC_DRV_RC5T619=m
-CONFIG_RTC_DRV_S35390A=m
-CONFIG_RTC_DRV_FM3130=m
-CONFIG_RTC_DRV_RX8010=m
-CONFIG_RTC_DRV_RX8581=m
-CONFIG_RTC_DRV_RX8025=m
-CONFIG_RTC_DRV_EM3027=m
-CONFIG_RTC_DRV_RV3028=m
-CONFIG_RTC_DRV_RV8803=m
-CONFIG_RTC_DRV_S5M=m
-CONFIG_RTC_DRV_SD3078=m
-
-#
-# SPI RTC drivers
-#
-CONFIG_RTC_DRV_M41T93=m
-CONFIG_RTC_DRV_M41T94=m
-CONFIG_RTC_DRV_DS1302=m
-CONFIG_RTC_DRV_DS1305=m
-CONFIG_RTC_DRV_DS1343=m
-CONFIG_RTC_DRV_DS1347=m
-CONFIG_RTC_DRV_DS1390=m
-CONFIG_RTC_DRV_MAX6916=m
-CONFIG_RTC_DRV_R9701=m
-CONFIG_RTC_DRV_RX4581=m
-CONFIG_RTC_DRV_RX6110=m
-CONFIG_RTC_DRV_RS5C348=m
-CONFIG_RTC_DRV_MAX6902=m
-CONFIG_RTC_DRV_PCF2123=m
-CONFIG_RTC_DRV_MCP795=m
-CONFIG_RTC_I2C_AND_SPI=y
-
-#
-# SPI and I2C RTC drivers
-#
-CONFIG_RTC_DRV_DS3232=m
-CONFIG_RTC_DRV_DS3232_HWMON=y
-CONFIG_RTC_DRV_PCF2127=m
-CONFIG_RTC_DRV_RV3029C2=m
-CONFIG_RTC_DRV_RV3029_HWMON=y
-
-#
-# Platform RTC drivers
-#
-CONFIG_RTC_DRV_CMOS=y
-CONFIG_RTC_DRV_DS1286=m
-CONFIG_RTC_DRV_DS1511=m
-CONFIG_RTC_DRV_DS1553=m
-CONFIG_RTC_DRV_DS1685_FAMILY=m
-CONFIG_RTC_DRV_DS1685=y
-# CONFIG_RTC_DRV_DS1689 is not set
-# CONFIG_RTC_DRV_DS17285 is not set
-# CONFIG_RTC_DRV_DS17485 is not set
-# CONFIG_RTC_DRV_DS17885 is not set
-CONFIG_RTC_DRV_DS1742=m
-CONFIG_RTC_DRV_DS2404=m
-CONFIG_RTC_DRV_DA9052=m
-CONFIG_RTC_DRV_DA9055=m
-CONFIG_RTC_DRV_DA9063=m
-CONFIG_RTC_DRV_STK17TA8=m
-CONFIG_RTC_DRV_M48T86=m
-CONFIG_RTC_DRV_M48T35=m
-CONFIG_RTC_DRV_M48T59=m
-CONFIG_RTC_DRV_MSM6242=m
-CONFIG_RTC_DRV_BQ4802=m
-CONFIG_RTC_DRV_RP5C01=m
-CONFIG_RTC_DRV_V3020=m
-CONFIG_RTC_DRV_WM831X=m
-CONFIG_RTC_DRV_WM8350=m
-CONFIG_RTC_DRV_PCF50633=m
-CONFIG_RTC_DRV_AB3100=m
-CONFIG_RTC_DRV_ZYNQMP=m
-CONFIG_RTC_DRV_CROS_EC=m
-
-#
-# on-CPU RTC drivers
-#
-CONFIG_RTC_DRV_CADENCE=m
-CONFIG_RTC_DRV_FTRTC010=m
-CONFIG_RTC_DRV_PCAP=m
-CONFIG_RTC_DRV_MC13XXX=m
-CONFIG_RTC_DRV_MT6397=m
-CONFIG_RTC_DRV_R7301=m
-CONFIG_RTC_DRV_CPCAP=m
-
-#
-# HID Sensor RTC drivers
-#
-CONFIG_RTC_DRV_HID_SENSOR_TIME=m
-CONFIG_RTC_DRV_WILCO_EC=m
-CONFIG_DMADEVICES=y
-# CONFIG_DMADEVICES_DEBUG is not set
-
-#
-# DMA Devices
-#
-CONFIG_DMA_ENGINE=y
-CONFIG_DMA_VIRTUAL_CHANNELS=y
-CONFIG_DMA_ACPI=y
-CONFIG_DMA_OF=y
-CONFIG_ALTERA_MSGDMA=m
-CONFIG_DW_AXI_DMAC=m
-CONFIG_FSL_EDMA=m
-CONFIG_INTEL_IDMA64=m
-CONFIG_INTEL_IDXD=m
-CONFIG_INTEL_IOATDMA=m
-CONFIG_INTEL_MIC_X100_DMA=m
-CONFIG_PLX_DMA=m
-CONFIG_QCOM_HIDMA_MGMT=m
-CONFIG_QCOM_HIDMA=m
-CONFIG_DW_DMAC_CORE=y
-CONFIG_DW_DMAC=y
-CONFIG_DW_DMAC_PCI=y
-CONFIG_DW_EDMA=m
-CONFIG_DW_EDMA_PCIE=m
-CONFIG_HSU_DMA=y
-CONFIG_SF_PDMA=m
-
-#
-# DMA Clients
-#
-CONFIG_ASYNC_TX_DMA=y
-# CONFIG_DMATEST is not set
-CONFIG_DMA_ENGINE_RAID=y
-
-#
-# DMABUF options
-#
-CONFIG_SYNC_FILE=y
-# CONFIG_SW_SYNC is not set
-CONFIG_UDMABUF=y
-# CONFIG_DMABUF_MOVE_NOTIFY is not set
-# CONFIG_DMABUF_SELFTESTS is not set
-CONFIG_DMABUF_HEAPS=y
-CONFIG_DMABUF_HEAPS_SYSTEM=y
-# end of DMABUF options
-
-CONFIG_DCA=m
-CONFIG_AUXDISPLAY=y
-CONFIG_HD44780=m
-CONFIG_KS0108=m
-CONFIG_KS0108_PORT=0x378
-CONFIG_KS0108_DELAY=2
-CONFIG_CFAG12864B=m
-CONFIG_CFAG12864B_RATE=20
-CONFIG_IMG_ASCII_LCD=m
-CONFIG_HT16K33=m
-CONFIG_PARPORT_PANEL=m
-CONFIG_PANEL_PARPORT=0
-CONFIG_PANEL_PROFILE=5
-# CONFIG_PANEL_CHANGE_MESSAGE is not set
-# CONFIG_CHARLCD_BL_OFF is not set
-# CONFIG_CHARLCD_BL_ON is not set
-CONFIG_CHARLCD_BL_FLASH=y
-CONFIG_PANEL=m
-CONFIG_CHARLCD=m
-CONFIG_UIO=m
-CONFIG_UIO_CIF=m
-CONFIG_UIO_PDRV_GENIRQ=m
-CONFIG_UIO_DMEM_GENIRQ=m
-CONFIG_UIO_AEC=m
-CONFIG_UIO_SERCOS3=m
-CONFIG_UIO_PCI_GENERIC=m
-CONFIG_UIO_NETX=m
-CONFIG_UIO_PRUSS=m
-CONFIG_UIO_MF624=m
-CONFIG_UIO_HV_GENERIC=m
-CONFIG_VFIO_IOMMU_TYPE1=m
-CONFIG_VFIO_VIRQFD=m
-CONFIG_VFIO=m
-# CONFIG_VFIO_NOIOMMU is not set
-CONFIG_VFIO_PCI=m
-CONFIG_VFIO_PCI_VGA=y
-CONFIG_VFIO_PCI_MMAP=y
-CONFIG_VFIO_PCI_INTX=y
-CONFIG_VFIO_PCI_IGD=y
-CONFIG_VFIO_MDEV=m
-CONFIG_VFIO_MDEV_DEVICE=m
-CONFIG_IRQ_BYPASS_MANAGER=m
-CONFIG_VIRT_DRIVERS=y
-CONFIG_VBOXGUEST=m
-CONFIG_VIRTIO=y
-CONFIG_VIRTIO_MENU=y
-CONFIG_VIRTIO_PCI=m
-CONFIG_VIRTIO_PCI_LEGACY=y
-CONFIG_VIRTIO_VDPA=m
-CONFIG_VIRTIO_PMEM=m
-CONFIG_VIRTIO_BALLOON=m
-CONFIG_VIRTIO_INPUT=m
-CONFIG_VIRTIO_MMIO=m
-CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y
-CONFIG_VDPA=m
-CONFIG_VDPA_SIM=m
-CONFIG_IFCVF=m
-CONFIG_VHOST_IOTLB=m
-CONFIG_VHOST_RING=m
-CONFIG_VHOST_DPN=y
-CONFIG_VHOST=m
-CONFIG_VHOST_MENU=y
-CONFIG_VHOST_NET=m
-CONFIG_VHOST_SCSI=m
-CONFIG_VHOST_VSOCK=m
-CONFIG_VHOST_VDPA=m
-# CONFIG_VHOST_CROSS_ENDIAN_LEGACY is not set
-
-#
-# Microsoft Hyper-V guest support
-#
-CONFIG_HYPERV=m
-CONFIG_HYPERV_TIMER=y
-CONFIG_HYPERV_UTILS=m
-CONFIG_HYPERV_BALLOON=m
-# end of Microsoft Hyper-V guest support
-
-#
-# Xen driver support
-#
-CONFIG_XEN_BALLOON=y
-CONFIG_XEN_BALLOON_MEMORY_HOTPLUG=y
-CONFIG_XEN_BALLOON_MEMORY_HOTPLUG_LIMIT=512
-CONFIG_XEN_SCRUB_PAGES_DEFAULT=y
-CONFIG_XEN_DEV_EVTCHN=m
-CONFIG_XEN_BACKEND=y
-CONFIG_XENFS=m
-CONFIG_XEN_COMPAT_XENFS=y
-CONFIG_XEN_SYS_HYPERVISOR=y
-CONFIG_XEN_XENBUS_FRONTEND=y
-CONFIG_XEN_GNTDEV=m
-CONFIG_XEN_GNTDEV_DMABUF=y
-CONFIG_XEN_GRANT_DEV_ALLOC=m
-CONFIG_XEN_GRANT_DMA_ALLOC=y
-CONFIG_SWIOTLB_XEN=y
-CONFIG_XEN_PCIDEV_BACKEND=m
-CONFIG_XEN_PVCALLS_FRONTEND=m
-CONFIG_XEN_PVCALLS_BACKEND=y
-CONFIG_XEN_SCSI_BACKEND=m
-CONFIG_XEN_PRIVCMD=m
-CONFIG_XEN_ACPI_PROCESSOR=m
-CONFIG_XEN_MCE_LOG=y
-CONFIG_XEN_HAVE_PVMMU=y
-CONFIG_XEN_EFI=y
-CONFIG_XEN_AUTO_XLATE=y
-CONFIG_XEN_ACPI=y
-CONFIG_XEN_SYMS=y
-CONFIG_XEN_HAVE_VPMU=y
-CONFIG_XEN_FRONT_PGDIR_SHBUF=m
-# end of Xen driver support
-
-# CONFIG_GREYBUS is not set
-CONFIG_STAGING=y
-CONFIG_PRISM2_USB=m
-CONFIG_COMEDI=m
-# CONFIG_COMEDI_DEBUG is not set
-CONFIG_COMEDI_DEFAULT_BUF_SIZE_KB=2048
-CONFIG_COMEDI_DEFAULT_BUF_MAXSIZE_KB=20480
-CONFIG_COMEDI_MISC_DRIVERS=y
-CONFIG_COMEDI_BOND=m
-CONFIG_COMEDI_TEST=m
-CONFIG_COMEDI_PARPORT=m
-# CONFIG_COMEDI_ISA_DRIVERS is not set
-CONFIG_COMEDI_PCI_DRIVERS=m
-CONFIG_COMEDI_8255_PCI=m
-CONFIG_COMEDI_ADDI_WATCHDOG=m
-CONFIG_COMEDI_ADDI_APCI_1032=m
-CONFIG_COMEDI_ADDI_APCI_1500=m
-CONFIG_COMEDI_ADDI_APCI_1516=m
-CONFIG_COMEDI_ADDI_APCI_1564=m
-CONFIG_COMEDI_ADDI_APCI_16XX=m
-CONFIG_COMEDI_ADDI_APCI_2032=m
-CONFIG_COMEDI_ADDI_APCI_2200=m
-CONFIG_COMEDI_ADDI_APCI_3120=m
-CONFIG_COMEDI_ADDI_APCI_3501=m
-CONFIG_COMEDI_ADDI_APCI_3XXX=m
-CONFIG_COMEDI_ADL_PCI6208=m
-CONFIG_COMEDI_ADL_PCI7X3X=m
-CONFIG_COMEDI_ADL_PCI8164=m
-CONFIG_COMEDI_ADL_PCI9111=m
-CONFIG_COMEDI_ADL_PCI9118=m
-CONFIG_COMEDI_ADV_PCI1710=m
-CONFIG_COMEDI_ADV_PCI1720=m
-CONFIG_COMEDI_ADV_PCI1723=m
-CONFIG_COMEDI_ADV_PCI1724=m
-CONFIG_COMEDI_ADV_PCI1760=m
-CONFIG_COMEDI_ADV_PCI_DIO=m
-CONFIG_COMEDI_AMPLC_DIO200_PCI=m
-CONFIG_COMEDI_AMPLC_PC236_PCI=m
-CONFIG_COMEDI_AMPLC_PC263_PCI=m
-CONFIG_COMEDI_AMPLC_PCI224=m
-CONFIG_COMEDI_AMPLC_PCI230=m
-CONFIG_COMEDI_CONTEC_PCI_DIO=m
-CONFIG_COMEDI_DAS08_PCI=m
-CONFIG_COMEDI_DT3000=m
-CONFIG_COMEDI_DYNA_PCI10XX=m
-CONFIG_COMEDI_GSC_HPDI=m
-CONFIG_COMEDI_MF6X4=m
-CONFIG_COMEDI_ICP_MULTI=m
-CONFIG_COMEDI_DAQBOARD2000=m
-CONFIG_COMEDI_JR3_PCI=m
-CONFIG_COMEDI_KE_COUNTER=m
-CONFIG_COMEDI_CB_PCIDAS64=m
-CONFIG_COMEDI_CB_PCIDAS=m
-CONFIG_COMEDI_CB_PCIDDA=m
-CONFIG_COMEDI_CB_PCIMDAS=m
-CONFIG_COMEDI_CB_PCIMDDA=m
-CONFIG_COMEDI_ME4000=m
-CONFIG_COMEDI_ME_DAQ=m
-CONFIG_COMEDI_NI_6527=m
-CONFIG_COMEDI_NI_65XX=m
-CONFIG_COMEDI_NI_660X=m
-CONFIG_COMEDI_NI_670X=m
-CONFIG_COMEDI_NI_LABPC_PCI=m
-CONFIG_COMEDI_NI_PCIDIO=m
-CONFIG_COMEDI_NI_PCIMIO=m
-CONFIG_COMEDI_RTD520=m
-CONFIG_COMEDI_S626=m
-CONFIG_COMEDI_MITE=m
-CONFIG_COMEDI_NI_TIOCMD=m
-CONFIG_COMEDI_PCMCIA_DRIVERS=m
-CONFIG_COMEDI_CB_DAS16_CS=m
-CONFIG_COMEDI_DAS08_CS=m
-CONFIG_COMEDI_NI_DAQ_700_CS=m
-CONFIG_COMEDI_NI_DAQ_DIO24_CS=m
-CONFIG_COMEDI_NI_LABPC_CS=m
-CONFIG_COMEDI_NI_MIO_CS=m
-CONFIG_COMEDI_QUATECH_DAQP_CS=m
-CONFIG_COMEDI_USB_DRIVERS=m
-CONFIG_COMEDI_DT9812=m
-CONFIG_COMEDI_NI_USB6501=m
-CONFIG_COMEDI_USBDUX=m
-CONFIG_COMEDI_USBDUXFAST=m
-CONFIG_COMEDI_USBDUXSIGMA=m
-CONFIG_COMEDI_VMK80XX=m
-CONFIG_COMEDI_8254=m
-CONFIG_COMEDI_8255=m
-CONFIG_COMEDI_8255_SA=m
-CONFIG_COMEDI_KCOMEDILIB=m
-CONFIG_COMEDI_AMPLC_DIO200=m
-CONFIG_COMEDI_AMPLC_PC236=m
-CONFIG_COMEDI_DAS08=m
-CONFIG_COMEDI_NI_LABPC=m
-CONFIG_COMEDI_NI_TIO=m
-CONFIG_COMEDI_NI_ROUTING=m
-CONFIG_RTL8192U=m
-CONFIG_RTLLIB=m
-CONFIG_RTLLIB_CRYPTO_CCMP=m
-CONFIG_RTLLIB_CRYPTO_TKIP=m
-CONFIG_RTLLIB_CRYPTO_WEP=m
-CONFIG_RTL8192E=m
-CONFIG_RTL8723BS=m
-CONFIG_R8712U=m
-CONFIG_R8188EU=m
-CONFIG_88EU_AP_MODE=y
-CONFIG_RTS5208=m
-CONFIG_VT6655=m
-CONFIG_VT6656=m
-
-#
-# IIO staging drivers
-#
-
-#
-# Accelerometers
-#
-CONFIG_ADIS16203=m
-CONFIG_ADIS16240=m
-# end of Accelerometers
-
-#
-# Analog to digital converters
-#
-CONFIG_AD7816=m
-CONFIG_AD7280=m
-# end of Analog to digital converters
-
-#
-# Analog digital bi-direction converters
-#
-CONFIG_ADT7316=m
-CONFIG_ADT7316_SPI=m
-CONFIG_ADT7316_I2C=m
-# end of Analog digital bi-direction converters
-
-#
-# Capacitance to digital converters
-#
-CONFIG_AD7150=m
-CONFIG_AD7746=m
-# end of Capacitance to digital converters
-
-#
-# Direct Digital Synthesis
-#
-CONFIG_AD9832=m
-CONFIG_AD9834=m
-# end of Direct Digital Synthesis
-
-#
-# Network Analyzer, Impedance Converters
-#
-CONFIG_AD5933=m
-# end of Network Analyzer, Impedance Converters
-
-#
-# Active energy metering IC
-#
-CONFIG_ADE7854=m
-CONFIG_ADE7854_I2C=m
-CONFIG_ADE7854_SPI=m
-# end of Active energy metering IC
-
-#
-# Resolver to digital converters
-#
-CONFIG_AD2S1210=m
-# end of Resolver to digital converters
-# end of IIO staging drivers
-
-# CONFIG_FB_SM750 is not set
-
-#
-# Speakup console speech
-#
-CONFIG_SPEAKUP=m
-CONFIG_SPEAKUP_SYNTH_ACNTSA=m
-CONFIG_SPEAKUP_SYNTH_APOLLO=m
-CONFIG_SPEAKUP_SYNTH_AUDPTR=m
-CONFIG_SPEAKUP_SYNTH_BNS=m
-CONFIG_SPEAKUP_SYNTH_DECTLK=m
-CONFIG_SPEAKUP_SYNTH_DECEXT=m
-CONFIG_SPEAKUP_SYNTH_LTLK=m
-CONFIG_SPEAKUP_SYNTH_SOFT=m
-CONFIG_SPEAKUP_SYNTH_SPKOUT=m
-CONFIG_SPEAKUP_SYNTH_TXPRT=m
-CONFIG_SPEAKUP_SYNTH_DUMMY=m
-# end of Speakup console speech
-
-CONFIG_STAGING_MEDIA=y
-CONFIG_VIDEO_IPU3_IMGU=m
-
-#
-# soc_camera sensor drivers
-#
-CONFIG_VIDEO_USBVISION=m
-
-#
-# Android
-#
-# end of Android
-
-CONFIG_STAGING_BOARD=y
-CONFIG_LTE_GDM724X=m
-CONFIG_FIREWIRE_SERIAL=m
-CONFIG_FWTTY_MAX_TOTAL_PORTS=64
-CONFIG_FWTTY_MAX_CARD_PORTS=32
-CONFIG_GS_FPGABOOT=m
-CONFIG_UNISYSSPAR=y
-CONFIG_UNISYS_VISORNIC=m
-CONFIG_UNISYS_VISORINPUT=m
-CONFIG_UNISYS_VISORHBA=m
-CONFIG_COMMON_CLK_XLNX_CLKWZRD=m
-# CONFIG_FB_TFT is not set
-CONFIG_WILC1000=m
-CONFIG_WILC1000_SDIO=m
-CONFIG_WILC1000_SPI=m
-# CONFIG_WILC1000_HW_OOB_INTR is not set
-CONFIG_MOST_COMPONENTS=m
-CONFIG_MOST_CDEV=m
-CONFIG_MOST_NET=m
-CONFIG_MOST_SOUND=m
-CONFIG_MOST_VIDEO=m
-CONFIG_MOST_DIM2=m
-CONFIG_MOST_I2C=m
-CONFIG_MOST_USB=m
-CONFIG_KS7010=m
-CONFIG_PI433=m
-
-#
-# Gasket devices
-#
-CONFIG_STAGING_GASKET_FRAMEWORK=m
-CONFIG_STAGING_APEX_DRIVER=m
-# end of Gasket devices
-
-CONFIG_XIL_AXIS_FIFO=m
-CONFIG_FIELDBUS_DEV=m
-CONFIG_HMS_ANYBUSS_BUS=m
-CONFIG_ARCX_ANYBUS_CONTROLLER=m
-CONFIG_HMS_PROFINET=m
-CONFIG_KPC2000=y
-CONFIG_KPC2000_CORE=m
-CONFIG_KPC2000_SPI=m
-CONFIG_KPC2000_I2C=m
-CONFIG_KPC2000_DMA=m
-CONFIG_QLGE=m
-CONFIG_WFX=m
-CONFIG_X86_PLATFORM_DEVICES=y
-CONFIG_ACPI_WMI=m
-CONFIG_WMI_BMOF=m
-CONFIG_ALIENWARE_WMI=m
-CONFIG_HUAWEI_WMI=m
-CONFIG_INTEL_WMI_THUNDERBOLT=m
-CONFIG_MXM_WMI=m
-CONFIG_PEAQ_WMI=m
-CONFIG_XIAOMI_WMI=m
-CONFIG_ACERHDF=m
-CONFIG_ACER_WIRELESS=m
-CONFIG_ACER_WMI=m
-CONFIG_APPLE_GMUX=m
-CONFIG_ASUS_LAPTOP=m
-CONFIG_ASUS_WIRELESS=m
-CONFIG_ASUS_WMI=m
-CONFIG_ASUS_NB_WMI=m
-CONFIG_EEEPC_LAPTOP=m
-CONFIG_EEEPC_WMI=m
-CONFIG_DCDBAS=m
-CONFIG_DELL_SMBIOS=m
-CONFIG_DELL_SMBIOS_WMI=y
-CONFIG_DELL_SMBIOS_SMM=y
-CONFIG_DELL_LAPTOP=m
-CONFIG_DELL_RBTN=m
-# CONFIG_DELL_RBU is not set
-CONFIG_DELL_SMO8800=m
-CONFIG_DELL_WMI=m
-CONFIG_DELL_WMI_DESCRIPTOR=m
-CONFIG_DELL_WMI_AIO=m
-CONFIG_DELL_WMI_LED=m
-CONFIG_AMILO_RFKILL=m
-CONFIG_FUJITSU_LAPTOP=m
-CONFIG_FUJITSU_TABLET=m
-CONFIG_GPD_POCKET_FAN=m
-CONFIG_HP_ACCEL=m
-CONFIG_HP_WIRELESS=m
-CONFIG_HP_WMI=m
-CONFIG_IBM_RTL=m
-CONFIG_IDEAPAD_LAPTOP=m
-CONFIG_SENSORS_HDAPS=m
-CONFIG_THINKPAD_ACPI=m
-CONFIG_THINKPAD_ACPI_ALSA_SUPPORT=y
-# CONFIG_THINKPAD_ACPI_DEBUGFACILITIES is not set
-# CONFIG_THINKPAD_ACPI_DEBUG is not set
-# CONFIG_THINKPAD_ACPI_UNSAFE_LEDS is not set
-CONFIG_THINKPAD_ACPI_VIDEO=y
-CONFIG_THINKPAD_ACPI_HOTKEY_POLL=y
-CONFIG_INTEL_ATOMISP2_PM=m
-CONFIG_INTEL_CHT_INT33FE=m
-CONFIG_INTEL_HID_EVENT=m
-CONFIG_INTEL_INT0002_VGPIO=m
-CONFIG_INTEL_MENLOW=m
-CONFIG_INTEL_OAKTRAIL=m
-CONFIG_INTEL_VBTN=m
-CONFIG_SURFACE3_WMI=m
-CONFIG_SURFACE_3_BUTTON=m
-CONFIG_SURFACE_3_POWER_OPREGION=m
-CONFIG_SURFACE_PRO3_BUTTON=m
-CONFIG_MSI_LAPTOP=m
-CONFIG_MSI_WMI=m
-CONFIG_PCENGINES_APU2=m
-CONFIG_SAMSUNG_LAPTOP=m
-CONFIG_SAMSUNG_Q10=m
-CONFIG_ACPI_TOSHIBA=m
-CONFIG_TOSHIBA_BT_RFKILL=m
-CONFIG_TOSHIBA_HAPS=m
-CONFIG_TOSHIBA_WMI=m
-CONFIG_ACPI_CMPC=m
-CONFIG_COMPAL_LAPTOP=m
-CONFIG_LG_LAPTOP=m
-CONFIG_PANASONIC_LAPTOP=m
-CONFIG_SONY_LAPTOP=m
-CONFIG_SONYPI_COMPAT=y
-CONFIG_SYSTEM76_ACPI=m
-CONFIG_TOPSTAR_LAPTOP=m
-CONFIG_I2C_MULTI_INSTANTIATE=m
-CONFIG_MLX_PLATFORM=m
-CONFIG_TOUCHSCREEN_DMI=y
-CONFIG_INTEL_IPS=m
-CONFIG_INTEL_RST=m
-CONFIG_INTEL_SMARTCONNECT=m
-
-#
-# Intel Speed Select Technology interface support
-#
-CONFIG_INTEL_SPEED_SELECT_INTERFACE=m
-# end of Intel Speed Select Technology interface support
-
-CONFIG_INTEL_TURBO_MAX_3=y
-CONFIG_INTEL_UNCORE_FREQ_CONTROL=m
-CONFIG_INTEL_BXTWC_PMIC_TMU=m
-CONFIG_INTEL_CHTDC_TI_PWRBTN=m
-CONFIG_INTEL_PMC_CORE=y
-CONFIG_INTEL_PMC_IPC=m
-CONFIG_INTEL_PUNIT_IPC=m
-CONFIG_INTEL_TELEMETRY=m
-CONFIG_PMC_ATOM=y
-CONFIG_MFD_CROS_EC=m
-CONFIG_CHROME_PLATFORMS=y
-CONFIG_CHROMEOS_LAPTOP=m
-CONFIG_CHROMEOS_PSTORE=m
-CONFIG_CHROMEOS_TBMC=m
-CONFIG_CROS_EC=m
-CONFIG_CROS_EC_I2C=m
-CONFIG_CROS_EC_RPMSG=m
-CONFIG_CROS_EC_ISHTP=m
-CONFIG_CROS_EC_SPI=m
-CONFIG_CROS_EC_LPC=m
-CONFIG_CROS_EC_PROTO=y
-CONFIG_CROS_KBD_LED_BACKLIGHT=m
-CONFIG_CROS_EC_CHARDEV=m
-CONFIG_CROS_EC_LIGHTBAR=m
-CONFIG_CROS_EC_VBC=m
-# CONFIG_CROS_EC_DEBUGFS is not set
-CONFIG_CROS_EC_SENSORHUB=m
-CONFIG_CROS_EC_SYSFS=m
-CONFIG_CROS_EC_TYPEC=m
-CONFIG_CROS_USBPD_LOGGER=m
-CONFIG_CROS_USBPD_NOTIFY=m
-CONFIG_WILCO_EC=m
-# CONFIG_WILCO_EC_DEBUGFS is not set
-CONFIG_WILCO_EC_EVENTS=m
-CONFIG_WILCO_EC_TELEMETRY=m
-CONFIG_MELLANOX_PLATFORM=y
-CONFIG_MLXREG_HOTPLUG=m
-CONFIG_MLXREG_IO=m
-CONFIG_CLKDEV_LOOKUP=y
-CONFIG_HAVE_CLK_PREPARE=y
-CONFIG_COMMON_CLK=y
-
-#
-# Common Clock Framework
-#
-CONFIG_COMMON_CLK_WM831X=m
-CONFIG_CLK_HSDK=y
-CONFIG_COMMON_CLK_MAX77686=m
-CONFIG_COMMON_CLK_MAX9485=m
-CONFIG_COMMON_CLK_RK808=m
-CONFIG_COMMON_CLK_SI5341=m
-CONFIG_COMMON_CLK_SI5351=m
-CONFIG_COMMON_CLK_SI514=m
-CONFIG_COMMON_CLK_SI544=m
-CONFIG_COMMON_CLK_SI570=m
-CONFIG_COMMON_CLK_CDCE706=m
-CONFIG_COMMON_CLK_CDCE925=m
-CONFIG_COMMON_CLK_CS2000_CP=m
-CONFIG_COMMON_CLK_S2MPS11=m
-CONFIG_CLK_TWL6040=m
-CONFIG_COMMON_CLK_LOCHNAGAR=m
-CONFIG_COMMON_CLK_PALMAS=m
-CONFIG_COMMON_CLK_PWM=m
-CONFIG_COMMON_CLK_VC5=m
-CONFIG_COMMON_CLK_BD718XX=m
-CONFIG_COMMON_CLK_FIXED_MMIO=y
-# end of Common Clock Framework
-
-CONFIG_HWSPINLOCK=y
-
-#
-# Clock Source drivers
-#
-CONFIG_TIMER_OF=y
-CONFIG_TIMER_PROBE=y
-CONFIG_CLKEVT_I8253=y
-CONFIG_I8253_LOCK=y
-CONFIG_CLKBLD_I8253=y
-CONFIG_CLKSRC_MMIO=y
-CONFIG_MICROCHIP_PIT64B=y
-# end of Clock Source drivers
-
-CONFIG_MAILBOX=y
-CONFIG_PLATFORM_MHU=m
-CONFIG_PCC=y
-CONFIG_ALTERA_MBOX=m
-CONFIG_MAILBOX_TEST=m
-CONFIG_IOMMU_IOVA=y
-CONFIG_IOASID=y
-CONFIG_IOMMU_API=y
-CONFIG_IOMMU_SUPPORT=y
-
-#
-# Generic IOMMU Pagetable Support
-#
-# end of Generic IOMMU Pagetable Support
-
-# CONFIG_IOMMU_DEBUGFS is not set
-# CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set
-CONFIG_OF_IOMMU=y
-CONFIG_IOMMU_DMA=y
-CONFIG_AMD_IOMMU=y
-CONFIG_AMD_IOMMU_V2=y
-CONFIG_DMAR_TABLE=y
-CONFIG_INTEL_IOMMU=y
-CONFIG_INTEL_IOMMU_SVM=y
-# CONFIG_INTEL_IOMMU_DEFAULT_ON is not set
-CONFIG_INTEL_IOMMU_FLOPPY_WA=y
-# CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON is not set
-CONFIG_IRQ_REMAP=y
-CONFIG_HYPERV_IOMMU=y
-
-#
-# Remoteproc drivers
-#
-CONFIG_REMOTEPROC=y
-# end of Remoteproc drivers
-
-#
-# Rpmsg drivers
-#
-CONFIG_RPMSG=m
-CONFIG_RPMSG_CHAR=m
-CONFIG_RPMSG_QCOM_GLINK_NATIVE=m
-CONFIG_RPMSG_QCOM_GLINK_RPM=m
-CONFIG_RPMSG_VIRTIO=m
-# end of Rpmsg drivers
-
-CONFIG_SOUNDWIRE=m
-
-#
-# SoundWire Devices
-#
-CONFIG_SOUNDWIRE_CADENCE=m
-CONFIG_SOUNDWIRE_INTEL=m
-CONFIG_SOUNDWIRE_QCOM=m
-
-#
-# SOC (System On Chip) specific Drivers
-#
-
-#
-# Amlogic SoC drivers
-#
-# end of Amlogic SoC drivers
-
-#
-# Aspeed SoC drivers
-#
-# end of Aspeed SoC drivers
-
-#
-# Broadcom SoC drivers
-#
-# end of Broadcom SoC drivers
-
-#
-# NXP/Freescale QorIQ SoC drivers
-#
-# end of NXP/Freescale QorIQ SoC drivers
-
-#
-# i.MX SoC drivers
-#
-# end of i.MX SoC drivers
-
-#
-# Qualcomm SoC drivers
-#
-# end of Qualcomm SoC drivers
-
-CONFIG_SOC_TI=y
-
-#
-# Xilinx SoC drivers
-#
-CONFIG_XILINX_VCU=m
-# end of Xilinx SoC drivers
-# end of SOC (System On Chip) specific Drivers
-
-CONFIG_PM_DEVFREQ=y
-
-#
-# DEVFREQ Governors
-#
-CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=m
-CONFIG_DEVFREQ_GOV_PERFORMANCE=m
-CONFIG_DEVFREQ_GOV_POWERSAVE=m
-CONFIG_DEVFREQ_GOV_USERSPACE=m
-CONFIG_DEVFREQ_GOV_PASSIVE=m
-
-#
-# DEVFREQ Drivers
-#
-CONFIG_PM_DEVFREQ_EVENT=y
-CONFIG_EXTCON=y
-
-#
-# Extcon Device Drivers
-#
-CONFIG_EXTCON_ADC_JACK=m
-CONFIG_EXTCON_ARIZONA=m
-CONFIG_EXTCON_AXP288=m
-CONFIG_EXTCON_FSA9480=m
-CONFIG_EXTCON_GPIO=m
-CONFIG_EXTCON_INTEL_INT3496=m
-CONFIG_EXTCON_INTEL_CHT_WC=m
-CONFIG_EXTCON_MAX14577=m
-CONFIG_EXTCON_MAX3355=m
-CONFIG_EXTCON_MAX77693=m
-CONFIG_EXTCON_MAX77843=m
-CONFIG_EXTCON_MAX8997=m
-CONFIG_EXTCON_PALMAS=m
-CONFIG_EXTCON_PTN5150=m
-CONFIG_EXTCON_RT8973A=m
-CONFIG_EXTCON_SM5502=m
-CONFIG_EXTCON_USB_GPIO=m
-CONFIG_EXTCON_USBC_CROS_EC=m
-CONFIG_MEMORY=y
-CONFIG_IIO=m
-CONFIG_IIO_BUFFER=y
-CONFIG_IIO_BUFFER_CB=m
-CONFIG_IIO_BUFFER_HW_CONSUMER=m
-CONFIG_IIO_KFIFO_BUF=m
-CONFIG_IIO_TRIGGERED_BUFFER=m
-CONFIG_IIO_CONFIGFS=m
-CONFIG_IIO_TRIGGER=y
-CONFIG_IIO_CONSUMERS_PER_TRIGGER=2
-CONFIG_IIO_SW_DEVICE=m
-CONFIG_IIO_SW_TRIGGER=m
-CONFIG_IIO_TRIGGERED_EVENT=m
-
-#
-# Accelerometers
-#
-CONFIG_ADIS16201=m
-CONFIG_ADIS16209=m
-CONFIG_ADXL372=m
-CONFIG_ADXL372_SPI=m
-CONFIG_ADXL372_I2C=m
-CONFIG_BMA180=m
-CONFIG_BMA220=m
-CONFIG_BMA400=m
-CONFIG_BMA400_I2C=m
-CONFIG_BMC150_ACCEL=m
-CONFIG_BMC150_ACCEL_I2C=m
-CONFIG_BMC150_ACCEL_SPI=m
-CONFIG_DA280=m
-CONFIG_DA311=m
-CONFIG_DMARD06=m
-CONFIG_DMARD09=m
-CONFIG_DMARD10=m
-CONFIG_HID_SENSOR_ACCEL_3D=m
-CONFIG_IIO_CROS_EC_ACCEL_LEGACY=m
-CONFIG_IIO_ST_ACCEL_3AXIS=m
-CONFIG_IIO_ST_ACCEL_I2C_3AXIS=m
-CONFIG_IIO_ST_ACCEL_SPI_3AXIS=m
-CONFIG_KXSD9=m
-CONFIG_KXSD9_SPI=m
-CONFIG_KXSD9_I2C=m
-CONFIG_KXCJK1013=m
-CONFIG_MC3230=m
-CONFIG_MMA7455=m
-CONFIG_MMA7455_I2C=m
-CONFIG_MMA7455_SPI=m
-CONFIG_MMA7660=m
-CONFIG_MMA8452=m
-CONFIG_MMA9551_CORE=m
-CONFIG_MMA9551=m
-CONFIG_MMA9553=m
-CONFIG_MXC4005=m
-CONFIG_MXC6255=m
-CONFIG_SCA3000=m
-CONFIG_STK8312=m
-CONFIG_STK8BA50=m
-# end of Accelerometers
-
-#
-# Analog to digital converters
-#
-CONFIG_AD_SIGMA_DELTA=m
-CONFIG_AD7091R5=m
-CONFIG_AD7124=m
-CONFIG_AD7192=m
-CONFIG_AD7266=m
-CONFIG_AD7291=m
-CONFIG_AD7292=m
-CONFIG_AD7298=m
-CONFIG_AD7476=m
-CONFIG_AD7606=m
-CONFIG_AD7606_IFACE_PARALLEL=m
-CONFIG_AD7606_IFACE_SPI=m
-CONFIG_AD7766=m
-CONFIG_AD7768_1=m
-CONFIG_AD7780=m
-CONFIG_AD7791=m
-CONFIG_AD7793=m
-CONFIG_AD7887=m
-CONFIG_AD7923=m
-CONFIG_AD7949=m
-CONFIG_AD799X=m
-CONFIG_AXP20X_ADC=m
-CONFIG_AXP288_ADC=m
-CONFIG_CC10001_ADC=m
-CONFIG_CPCAP_ADC=m
-CONFIG_DA9150_GPADC=m
-CONFIG_DLN2_ADC=m
-CONFIG_ENVELOPE_DETECTOR=m
-CONFIG_HI8435=m
-CONFIG_HX711=m
-CONFIG_INA2XX_ADC=m
-CONFIG_LP8788_ADC=m
-CONFIG_LTC2471=m
-CONFIG_LTC2485=m
-CONFIG_LTC2496=m
-CONFIG_LTC2497=m
-CONFIG_MAX1027=m
-CONFIG_MAX11100=m
-CONFIG_MAX1118=m
-CONFIG_MAX1363=m
-CONFIG_MAX9611=m
-CONFIG_MCP320X=m
-CONFIG_MCP3422=m
-CONFIG_MCP3911=m
-CONFIG_MEN_Z188_ADC=m
-CONFIG_NAU7802=m
-CONFIG_PALMAS_GPADC=m
-CONFIG_QCOM_VADC_COMMON=m
-CONFIG_QCOM_SPMI_IADC=m
-CONFIG_QCOM_SPMI_VADC=m
-CONFIG_QCOM_SPMI_ADC5=m
-CONFIG_RN5T618_ADC=m
-CONFIG_SD_ADC_MODULATOR=m
-CONFIG_STMPE_ADC=m
-CONFIG_TI_ADC081C=m
-CONFIG_TI_ADC0832=m
-CONFIG_TI_ADC084S021=m
-CONFIG_TI_ADC12138=m
-CONFIG_TI_ADC108S102=m
-CONFIG_TI_ADC128S052=m
-CONFIG_TI_ADC161S626=m
-CONFIG_TI_ADS1015=m
-CONFIG_TI_ADS7950=m
-CONFIG_TI_ADS8344=m
-CONFIG_TI_ADS8688=m
-CONFIG_TI_ADS124S08=m
-CONFIG_TI_AM335X_ADC=m
-CONFIG_TI_TLC4541=m
-CONFIG_TWL4030_MADC=m
-CONFIG_TWL6030_GPADC=m
-CONFIG_VF610_ADC=m
-CONFIG_VIPERBOARD_ADC=m
-CONFIG_XILINX_XADC=m
-# end of Analog to digital converters
-
-#
-# Analog Front Ends
-#
-CONFIG_IIO_RESCALE=m
-# end of Analog Front Ends
-
-#
-# Amplifiers
-#
-CONFIG_AD8366=m
-CONFIG_HMC425=m
-# end of Amplifiers
-
-#
-# Chemical Sensors
-#
-CONFIG_ATLAS_PH_SENSOR=m
-CONFIG_BME680=m
-CONFIG_BME680_I2C=m
-CONFIG_BME680_SPI=m
-CONFIG_CCS811=m
-CONFIG_IAQCORE=m
-CONFIG_PMS7003=m
-CONFIG_SENSIRION_SGP30=m
-CONFIG_SPS30=m
-CONFIG_VZ89X=m
-# end of Chemical Sensors
-
-CONFIG_IIO_CROS_EC_SENSORS_CORE=m
-CONFIG_IIO_CROS_EC_SENSORS=m
-CONFIG_IIO_CROS_EC_SENSORS_LID_ANGLE=m
-
-#
-# Hid Sensor IIO Common
-#
-CONFIG_HID_SENSOR_IIO_COMMON=m
-CONFIG_HID_SENSOR_IIO_TRIGGER=m
-# end of Hid Sensor IIO Common
-
-CONFIG_IIO_MS_SENSORS_I2C=m
-
-#
-# SSP Sensor Common
-#
-CONFIG_IIO_SSP_SENSORS_COMMONS=m
-CONFIG_IIO_SSP_SENSORHUB=m
-# end of SSP Sensor Common
-
-CONFIG_IIO_ST_SENSORS_I2C=m
-CONFIG_IIO_ST_SENSORS_SPI=m
-CONFIG_IIO_ST_SENSORS_CORE=m
-
-#
-# Digital to analog converters
-#
-CONFIG_AD5064=m
-CONFIG_AD5360=m
-CONFIG_AD5380=m
-CONFIG_AD5421=m
-CONFIG_AD5446=m
-CONFIG_AD5449=m
-CONFIG_AD5592R_BASE=m
-CONFIG_AD5592R=m
-CONFIG_AD5593R=m
-CONFIG_AD5504=m
-CONFIG_AD5624R_SPI=m
-CONFIG_AD5686=m
-CONFIG_AD5686_SPI=m
-CONFIG_AD5696_I2C=m
-CONFIG_AD5755=m
-CONFIG_AD5758=m
-CONFIG_AD5761=m
-CONFIG_AD5764=m
-CONFIG_AD5770R=m
-CONFIG_AD5791=m
-CONFIG_AD7303=m
-CONFIG_AD8801=m
-CONFIG_DPOT_DAC=m
-CONFIG_DS4424=m
-CONFIG_LTC1660=m
-CONFIG_LTC2632=m
-CONFIG_M62332=m
-CONFIG_MAX517=m
-CONFIG_MAX5821=m
-CONFIG_MCP4725=m
-CONFIG_MCP4922=m
-CONFIG_TI_DAC082S085=m
-CONFIG_TI_DAC5571=m
-CONFIG_TI_DAC7311=m
-CONFIG_TI_DAC7612=m
-CONFIG_VF610_DAC=m
-# end of Digital to analog converters
-
-#
-# IIO dummy driver
-#
-# CONFIG_IIO_SIMPLE_DUMMY is not set
-# end of IIO dummy driver
-
-#
-# Frequency Synthesizers DDS/PLL
-#
-
-#
-# Clock Generator/Distribution
-#
-CONFIG_AD9523=m
-# end of Clock Generator/Distribution
-
-#
-# Phase-Locked Loop (PLL) frequency synthesizers
-#
-CONFIG_ADF4350=m
-CONFIG_ADF4371=m
-# end of Phase-Locked Loop (PLL) frequency synthesizers
-# end of Frequency Synthesizers DDS/PLL
-
-#
-# Digital gyroscope sensors
-#
-CONFIG_ADIS16080=m
-CONFIG_ADIS16130=m
-CONFIG_ADIS16136=m
-CONFIG_ADIS16260=m
-CONFIG_ADXRS450=m
-CONFIG_BMG160=m
-CONFIG_BMG160_I2C=m
-CONFIG_BMG160_SPI=m
-CONFIG_FXAS21002C=m
-CONFIG_FXAS21002C_I2C=m
-CONFIG_FXAS21002C_SPI=m
-CONFIG_HID_SENSOR_GYRO_3D=m
-CONFIG_MPU3050=m
-CONFIG_MPU3050_I2C=m
-CONFIG_IIO_ST_GYRO_3AXIS=m
-CONFIG_IIO_ST_GYRO_I2C_3AXIS=m
-CONFIG_IIO_ST_GYRO_SPI_3AXIS=m
-CONFIG_ITG3200=m
-# end of Digital gyroscope sensors
-
-#
-# Health Sensors
-#
-
-#
-# Heart Rate Monitors
-#
-CONFIG_AFE4403=m
-CONFIG_AFE4404=m
-CONFIG_MAX30100=m
-CONFIG_MAX30102=m
-# end of Heart Rate Monitors
-# end of Health Sensors
-
-#
-# Humidity sensors
-#
-CONFIG_AM2315=m
-CONFIG_DHT11=m
-CONFIG_HDC100X=m
-CONFIG_HID_SENSOR_HUMIDITY=m
-CONFIG_HTS221=m
-CONFIG_HTS221_I2C=m
-CONFIG_HTS221_SPI=m
-CONFIG_HTU21=m
-CONFIG_SI7005=m
-CONFIG_SI7020=m
-# end of Humidity sensors
-
-#
-# Inertial measurement units
-#
-CONFIG_ADIS16400=m
-CONFIG_ADIS16460=m
-CONFIG_ADIS16480=m
-CONFIG_BMI160=m
-CONFIG_BMI160_I2C=m
-CONFIG_BMI160_SPI=m
-CONFIG_FXOS8700=m
-CONFIG_FXOS8700_I2C=m
-CONFIG_FXOS8700_SPI=m
-CONFIG_KMX61=m
-CONFIG_INV_MPU6050_IIO=m
-CONFIG_INV_MPU6050_I2C=m
-CONFIG_INV_MPU6050_SPI=m
-CONFIG_IIO_ST_LSM6DSX=m
-CONFIG_IIO_ST_LSM6DSX_I2C=m
-CONFIG_IIO_ST_LSM6DSX_SPI=m
-CONFIG_IIO_ST_LSM6DSX_I3C=m
-# end of Inertial measurement units
-
-CONFIG_IIO_ADIS_LIB=m
-CONFIG_IIO_ADIS_LIB_BUFFER=y
-
-#
-# Light sensors
-#
-CONFIG_ACPI_ALS=m
-CONFIG_ADJD_S311=m
-CONFIG_ADUX1020=m
-CONFIG_AL3010=m
-CONFIG_AL3320A=m
-CONFIG_APDS9300=m
-CONFIG_APDS9960=m
-CONFIG_BH1750=m
-CONFIG_BH1780=m
-CONFIG_CM32181=m
-CONFIG_CM3232=m
-CONFIG_CM3323=m
-CONFIG_CM3605=m
-CONFIG_CM36651=m
-CONFIG_IIO_CROS_EC_LIGHT_PROX=m
-CONFIG_GP2AP002=m
-CONFIG_GP2AP020A00F=m
-CONFIG_IQS621_ALS=m
-CONFIG_SENSORS_ISL29018=m
-CONFIG_SENSORS_ISL29028=m
-CONFIG_ISL29125=m
-CONFIG_HID_SENSOR_ALS=m
-CONFIG_HID_SENSOR_PROX=m
-CONFIG_JSA1212=m
-CONFIG_RPR0521=m
-CONFIG_SENSORS_LM3533=m
-CONFIG_LTR501=m
-CONFIG_LV0104CS=m
-CONFIG_MAX44000=m
-CONFIG_MAX44009=m
-CONFIG_NOA1305=m
-CONFIG_OPT3001=m
-CONFIG_PA12203001=m
-CONFIG_SI1133=m
-CONFIG_SI1145=m
-CONFIG_STK3310=m
-CONFIG_ST_UVIS25=m
-CONFIG_ST_UVIS25_I2C=m
-CONFIG_ST_UVIS25_SPI=m
-CONFIG_TCS3414=m
-CONFIG_TCS3472=m
-CONFIG_SENSORS_TSL2563=m
-CONFIG_TSL2583=m
-CONFIG_TSL2772=m
-CONFIG_TSL4531=m
-CONFIG_US5182D=m
-CONFIG_VCNL4000=m
-CONFIG_VCNL4035=m
-CONFIG_VEML6030=m
-CONFIG_VEML6070=m
-CONFIG_VL6180=m
-CONFIG_ZOPT2201=m
-# end of Light sensors
-
-#
-# Magnetometer sensors
-#
-CONFIG_AK8974=m
-CONFIG_AK8975=m
-CONFIG_AK09911=m
-CONFIG_BMC150_MAGN=m
-CONFIG_BMC150_MAGN_I2C=m
-CONFIG_BMC150_MAGN_SPI=m
-CONFIG_MAG3110=m
-CONFIG_HID_SENSOR_MAGNETOMETER_3D=m
-CONFIG_MMC35240=m
-CONFIG_IIO_ST_MAGN_3AXIS=m
-CONFIG_IIO_ST_MAGN_I2C_3AXIS=m
-CONFIG_IIO_ST_MAGN_SPI_3AXIS=m
-CONFIG_SENSORS_HMC5843=m
-CONFIG_SENSORS_HMC5843_I2C=m
-CONFIG_SENSORS_HMC5843_SPI=m
-CONFIG_SENSORS_RM3100=m
-CONFIG_SENSORS_RM3100_I2C=m
-CONFIG_SENSORS_RM3100_SPI=m
-# end of Magnetometer sensors
-
-#
-# Multiplexers
-#
-CONFIG_IIO_MUX=m
-# end of Multiplexers
-
-#
-# Inclinometer sensors
-#
-CONFIG_HID_SENSOR_INCLINOMETER_3D=m
-CONFIG_HID_SENSOR_DEVICE_ROTATION=m
-# end of Inclinometer sensors
-
-#
-# Triggers - standalone
-#
-CONFIG_IIO_HRTIMER_TRIGGER=m
-CONFIG_IIO_INTERRUPT_TRIGGER=m
-CONFIG_IIO_TIGHTLOOP_TRIGGER=m
-CONFIG_IIO_SYSFS_TRIGGER=m
-# end of Triggers - standalone
-
-#
-# Linear and angular position sensors
-#
-CONFIG_IQS624_POS=m
-# end of Linear and angular position sensors
-
-#
-# Digital potentiometers
-#
-CONFIG_AD5272=m
-CONFIG_DS1803=m
-CONFIG_MAX5432=m
-CONFIG_MAX5481=m
-CONFIG_MAX5487=m
-CONFIG_MCP4018=m
-CONFIG_MCP4131=m
-CONFIG_MCP4531=m
-CONFIG_MCP41010=m
-CONFIG_TPL0102=m
-# end of Digital potentiometers
-
-#
-# Digital potentiostats
-#
-CONFIG_LMP91000=m
-# end of Digital potentiostats
-
-#
-# Pressure sensors
-#
-CONFIG_ABP060MG=m
-CONFIG_BMP280=m
-CONFIG_BMP280_I2C=m
-CONFIG_BMP280_SPI=m
-CONFIG_IIO_CROS_EC_BARO=m
-CONFIG_DLHL60D=m
-CONFIG_DPS310=m
-CONFIG_HID_SENSOR_PRESS=m
-CONFIG_HP03=m
-CONFIG_ICP10100=m
-CONFIG_MPL115=m
-CONFIG_MPL115_I2C=m
-CONFIG_MPL115_SPI=m
-CONFIG_MPL3115=m
-CONFIG_MS5611=m
-CONFIG_MS5611_I2C=m
-CONFIG_MS5611_SPI=m
-CONFIG_MS5637=m
-CONFIG_IIO_ST_PRESS=m
-CONFIG_IIO_ST_PRESS_I2C=m
-CONFIG_IIO_ST_PRESS_SPI=m
-CONFIG_T5403=m
-CONFIG_HP206C=m
-CONFIG_ZPA2326=m
-CONFIG_ZPA2326_I2C=m
-CONFIG_ZPA2326_SPI=m
-# end of Pressure sensors
-
-#
-# Lightning sensors
-#
-CONFIG_AS3935=m
-# end of Lightning sensors
-
-#
-# Proximity and distance sensors
-#
-CONFIG_ISL29501=m
-CONFIG_LIDAR_LITE_V2=m
-CONFIG_MB1232=m
-CONFIG_PING=m
-CONFIG_RFD77402=m
-CONFIG_SRF04=m
-CONFIG_SX9500=m
-CONFIG_SRF08=m
-CONFIG_VL53L0X_I2C=m
-# end of Proximity and distance sensors
-
-#
-# Resolver to digital converters
-#
-CONFIG_AD2S90=m
-CONFIG_AD2S1200=m
-# end of Resolver to digital converters
-
-#
-# Temperature sensors
-#
-CONFIG_IQS620AT_TEMP=m
-CONFIG_LTC2983=m
-CONFIG_MAXIM_THERMOCOUPLE=m
-CONFIG_HID_SENSOR_TEMP=m
-CONFIG_MLX90614=m
-CONFIG_MLX90632=m
-CONFIG_TMP006=m
-CONFIG_TMP007=m
-CONFIG_TSYS01=m
-CONFIG_TSYS02D=m
-CONFIG_MAX31856=m
-# end of Temperature sensors
-
-CONFIG_NTB=m
-CONFIG_NTB_MSI=y
-CONFIG_NTB_AMD=m
-CONFIG_NTB_IDT=m
-CONFIG_NTB_INTEL=m
-CONFIG_NTB_SWITCHTEC=m
-# CONFIG_NTB_PINGPONG is not set
-# CONFIG_NTB_TOOL is not set
-# CONFIG_NTB_PERF is not set
-# CONFIG_NTB_MSI_TEST is not set
-CONFIG_NTB_TRANSPORT=m
-CONFIG_VME_BUS=y
-
-#
-# VME Bridge Drivers
-#
-CONFIG_VME_CA91CX42=m
-CONFIG_VME_TSI148=m
-# CONFIG_VME_FAKE is not set
-
-#
-# VME Board Drivers
-#
-CONFIG_VMIVME_7805=m
-
-#
-# VME Device Drivers
-#
-CONFIG_VME_USER=m
-CONFIG_PWM=y
-CONFIG_PWM_SYSFS=y
-# CONFIG_PWM_DEBUG is not set
-CONFIG_PWM_ATMEL_HLCDC_PWM=m
-CONFIG_PWM_CRC=y
-CONFIG_PWM_CROS_EC=m
-CONFIG_PWM_FSL_FTM=m
-CONFIG_PWM_LP3943=m
-CONFIG_PWM_LPSS=m
-CONFIG_PWM_LPSS_PCI=m
-CONFIG_PWM_LPSS_PLATFORM=m
-CONFIG_PWM_PCA9685=m
-CONFIG_PWM_STMPE=y
-CONFIG_PWM_TWL=m
-CONFIG_PWM_TWL_LED=m
-
-#
-# IRQ chip support
-#
-CONFIG_IRQCHIP=y
-CONFIG_AL_FIC=y
-CONFIG_MADERA_IRQ=m
-# end of IRQ chip support
-
-CONFIG_IPACK_BUS=m
-CONFIG_BOARD_TPCI200=m
-CONFIG_SERIAL_IPOCTAL=m
-CONFIG_RESET_CONTROLLER=y
-CONFIG_RESET_BRCMSTB_RESCAL=y
-CONFIG_RESET_INTEL_GW=y
-CONFIG_RESET_TI_SYSCON=m
-
-#
-# PHY Subsystem
-#
-CONFIG_GENERIC_PHY=y
-CONFIG_GENERIC_PHY_MIPI_DPHY=y
-CONFIG_BCM_KONA_USB2_PHY=m
-CONFIG_PHY_CADENCE_TORRENT=m
-CONFIG_PHY_CADENCE_DPHY=m
-CONFIG_PHY_CADENCE_SIERRA=m
-CONFIG_PHY_FSL_IMX8MQ_USB=m
-CONFIG_PHY_MIXEL_MIPI_DPHY=m
-CONFIG_PHY_PXA_28NM_HSIC=m
-CONFIG_PHY_PXA_28NM_USB2=m
-CONFIG_PHY_CPCAP_USB=m
-CONFIG_PHY_MAPPHONE_MDM6600=m
-CONFIG_PHY_OCELOT_SERDES=m
-CONFIG_PHY_QCOM_USB_HS=m
-CONFIG_PHY_QCOM_USB_HSIC=m
-CONFIG_PHY_SAMSUNG_USB2=m
-CONFIG_PHY_TUSB1210=m
-CONFIG_PHY_INTEL_EMMC=m
-# end of PHY Subsystem
-
-CONFIG_POWERCAP=y
-CONFIG_INTEL_RAPL_CORE=m
-CONFIG_INTEL_RAPL=m
-CONFIG_IDLE_INJECT=y
-CONFIG_MCB=m
-CONFIG_MCB_PCI=m
-CONFIG_MCB_LPC=m
-
-#
-# Performance monitor support
-#
-# end of Performance monitor support
-
-CONFIG_RAS=y
-CONFIG_RAS_CEC=y
-# CONFIG_RAS_CEC_DEBUG is not set
-CONFIG_USB4=m
-
-#
-# Android
-#
-# CONFIG_ANDROID is not set
-# end of Android
-
-CONFIG_LIBNVDIMM=y
-CONFIG_BLK_DEV_PMEM=m
-CONFIG_ND_BLK=m
-CONFIG_ND_CLAIM=y
-CONFIG_ND_BTT=m
-CONFIG_BTT=y
-CONFIG_ND_PFN=m
-CONFIG_NVDIMM_PFN=y
-CONFIG_NVDIMM_DAX=y
-CONFIG_OF_PMEM=m
-CONFIG_DAX_DRIVER=y
-CONFIG_DAX=y
-CONFIG_DEV_DAX=m
-CONFIG_DEV_DAX_PMEM=m
-CONFIG_DEV_DAX_HMEM=m
-CONFIG_DEV_DAX_KMEM=m
-CONFIG_DEV_DAX_PMEM_COMPAT=m
-CONFIG_NVMEM=y
-CONFIG_NVMEM_SYSFS=y
-CONFIG_NVMEM_SPMI_SDAM=m
-CONFIG_RAVE_SP_EEPROM=m
-
-#
-# HW tracing support
-#
-CONFIG_STM=m
-CONFIG_STM_PROTO_BASIC=m
-CONFIG_STM_PROTO_SYS_T=m
-# CONFIG_STM_DUMMY is not set
-CONFIG_STM_SOURCE_CONSOLE=m
-CONFIG_STM_SOURCE_HEARTBEAT=m
-CONFIG_STM_SOURCE_FTRACE=m
-CONFIG_INTEL_TH=m
-CONFIG_INTEL_TH_PCI=m
-CONFIG_INTEL_TH_ACPI=m
-CONFIG_INTEL_TH_GTH=m
-CONFIG_INTEL_TH_STH=m
-CONFIG_INTEL_TH_MSU=m
-CONFIG_INTEL_TH_PTI=m
-# CONFIG_INTEL_TH_DEBUG is not set
-# end of HW tracing support
-
-CONFIG_FPGA=m
-CONFIG_ALTERA_PR_IP_CORE=m
-CONFIG_ALTERA_PR_IP_CORE_PLAT=m
-CONFIG_FPGA_MGR_ALTERA_PS_SPI=m
-CONFIG_FPGA_MGR_ALTERA_CVP=m
-CONFIG_FPGA_MGR_XILINX_SPI=m
-CONFIG_FPGA_MGR_ICE40_SPI=m
-CONFIG_FPGA_MGR_MACHXO2_SPI=m
-CONFIG_FPGA_BRIDGE=m
-CONFIG_ALTERA_FREEZE_BRIDGE=m
-CONFIG_XILINX_PR_DECOUPLER=m
-CONFIG_FPGA_REGION=m
-CONFIG_OF_FPGA_REGION=m
-CONFIG_FPGA_DFL=m
-CONFIG_FPGA_DFL_FME=m
-CONFIG_FPGA_DFL_FME_MGR=m
-CONFIG_FPGA_DFL_FME_BRIDGE=m
-CONFIG_FPGA_DFL_FME_REGION=m
-CONFIG_FPGA_DFL_AFU=m
-CONFIG_FPGA_DFL_PCI=m
-CONFIG_FSI=m
-CONFIG_FSI_NEW_DEV_NODE=y
-CONFIG_FSI_MASTER_GPIO=m
-CONFIG_FSI_MASTER_HUB=m
-CONFIG_FSI_MASTER_ASPEED=m
-CONFIG_FSI_SCOM=m
-CONFIG_FSI_SBEFIFO=m
-CONFIG_FSI_OCC=m
-CONFIG_TEE=m
-
-#
-# TEE drivers
-#
-CONFIG_AMDTEE=m
-# end of TEE drivers
-
-CONFIG_MULTIPLEXER=m
-
-#
-# Multiplexer drivers
-#
-CONFIG_MUX_ADG792A=m
-CONFIG_MUX_ADGS1408=m
-CONFIG_MUX_GPIO=m
-CONFIG_MUX_MMIO=m
-# end of Multiplexer drivers
-
-CONFIG_PM_OPP=y
-CONFIG_UNISYS_VISORBUS=m
-CONFIG_SIOX=m
-CONFIG_SIOX_BUS_GPIO=m
-CONFIG_SLIMBUS=m
-CONFIG_SLIM_QCOM_CTRL=m
-CONFIG_INTERCONNECT=m
-CONFIG_COUNTER=m
-CONFIG_FTM_QUADDEC=m
-CONFIG_MOST=m
-# end of Device Drivers
-
-#
-# File systems
-#
-CONFIG_DCACHE_WORD_ACCESS=y
-CONFIG_VALIDATE_FS_PARSER=y
-CONFIG_FS_IOMAP=y
-# CONFIG_EXT2_FS is not set
-# CONFIG_EXT3_FS is not set
-CONFIG_EXT4_FS=m
-CONFIG_EXT4_USE_FOR_EXT2=y
-CONFIG_EXT4_FS_POSIX_ACL=y
-CONFIG_EXT4_FS_SECURITY=y
-# CONFIG_EXT4_DEBUG is not set
-CONFIG_JBD2=m
-# CONFIG_JBD2_DEBUG is not set
-CONFIG_FS_MBCACHE=m
-CONFIG_REISERFS_FS=m
-# CONFIG_REISERFS_CHECK is not set
-CONFIG_REISERFS_PROC_INFO=y
-CONFIG_REISERFS_FS_XATTR=y
-CONFIG_REISERFS_FS_POSIX_ACL=y
-CONFIG_REISERFS_FS_SECURITY=y
-CONFIG_JFS_FS=m
-CONFIG_JFS_POSIX_ACL=y
-CONFIG_JFS_SECURITY=y
-# CONFIG_JFS_DEBUG is not set
-CONFIG_JFS_STATISTICS=y
-CONFIG_XFS_FS=m
-CONFIG_XFS_QUOTA=y
-CONFIG_XFS_POSIX_ACL=y
-CONFIG_XFS_RT=y
-CONFIG_XFS_ONLINE_SCRUB=y
-CONFIG_XFS_ONLINE_REPAIR=y
-# CONFIG_XFS_WARN is not set
-# CONFIG_XFS_DEBUG is not set
-CONFIG_GFS2_FS=m
-CONFIG_GFS2_FS_LOCKING_DLM=y
-CONFIG_OCFS2_FS=m
-CONFIG_OCFS2_FS_O2CB=m
-CONFIG_OCFS2_FS_USERSPACE_CLUSTER=m
-CONFIG_OCFS2_FS_STATS=y
-CONFIG_OCFS2_DEBUG_MASKLOG=y
-# CONFIG_OCFS2_DEBUG_FS is not set
-CONFIG_BTRFS_FS=m
-CONFIG_BTRFS_FS_POSIX_ACL=y
-# CONFIG_BTRFS_FS_CHECK_INTEGRITY is not set
-# CONFIG_BTRFS_FS_RUN_SANITY_TESTS is not set
-# CONFIG_BTRFS_DEBUG is not set
-# CONFIG_BTRFS_ASSERT is not set
-# CONFIG_BTRFS_FS_REF_VERIFY is not set
-CONFIG_NILFS2_FS=m
-CONFIG_F2FS_FS=m
-CONFIG_F2FS_STAT_FS=y
-CONFIG_F2FS_FS_XATTR=y
-CONFIG_F2FS_FS_POSIX_ACL=y
-CONFIG_F2FS_FS_SECURITY=y
-CONFIG_F2FS_CHECK_FS=y
-# CONFIG_F2FS_IO_TRACE is not set
-# CONFIG_F2FS_FAULT_INJECTION is not set
-CONFIG_F2FS_FS_COMPRESSION=y
-CONFIG_F2FS_FS_LZO=y
-CONFIG_F2FS_FS_LZ4=y
-CONFIG_F2FS_FS_ZSTD=y
-CONFIG_ZONEFS_FS=m
-CONFIG_FS_DAX=y
-CONFIG_FS_DAX_PMD=y
-CONFIG_FS_POSIX_ACL=y
-CONFIG_EXPORTFS=y
-CONFIG_EXPORTFS_BLOCK_OPS=y
-CONFIG_FILE_LOCKING=y
-# CONFIG_MANDATORY_FILE_LOCKING is not set
-CONFIG_FS_ENCRYPTION=y
-CONFIG_FS_ENCRYPTION_ALGS=m
-CONFIG_FS_VERITY=y
-# CONFIG_FS_VERITY_DEBUG is not set
-CONFIG_FS_VERITY_BUILTIN_SIGNATURES=y
-CONFIG_FSNOTIFY=y
-CONFIG_DNOTIFY=y
-CONFIG_INOTIFY_USER=y
-CONFIG_FANOTIFY=y
-CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
-CONFIG_QUOTA=y
-CONFIG_QUOTA_NETLINK_INTERFACE=y
-# CONFIG_PRINT_QUOTA_WARNING is not set
-# CONFIG_QUOTA_DEBUG is not set
-CONFIG_QUOTA_TREE=m
-CONFIG_QFMT_V1=m
-CONFIG_QFMT_V2=m
-CONFIG_QUOTACTL=y
-CONFIG_QUOTACTL_COMPAT=y
-CONFIG_AUTOFS4_FS=y
-CONFIG_AUTOFS_FS=y
-CONFIG_FUSE_FS=m
-CONFIG_CUSE=m
-CONFIG_VIRTIO_FS=m
-CONFIG_OVERLAY_FS=m
-CONFIG_OVERLAY_FS_REDIRECT_DIR=y
-# CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW is not set
-CONFIG_OVERLAY_FS_INDEX=y
-CONFIG_OVERLAY_FS_XINO_AUTO=y
-CONFIG_OVERLAY_FS_METACOPY=y
-
-#
-# Caches
-#
-CONFIG_FSCACHE=m
-CONFIG_FSCACHE_STATS=y
-CONFIG_FSCACHE_HISTOGRAM=y
-# CONFIG_FSCACHE_DEBUG is not set
-# CONFIG_FSCACHE_OBJECT_LIST is not set
-CONFIG_CACHEFILES=m
-# CONFIG_CACHEFILES_DEBUG is not set
-# CONFIG_CACHEFILES_HISTOGRAM is not set
-# end of Caches
-
-#
-# CD-ROM/DVD Filesystems
-#
-CONFIG_ISO9660_FS=m
-CONFIG_JOLIET=y
-CONFIG_ZISOFS=y
-CONFIG_UDF_FS=m
-# end of CD-ROM/DVD Filesystems
-
-#
-# DOS/FAT/EXFAT/NT Filesystems
-#
-CONFIG_FAT_FS=m
-CONFIG_MSDOS_FS=m
-CONFIG_VFAT_FS=m
-CONFIG_FAT_DEFAULT_CODEPAGE=437
-CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
-CONFIG_FAT_DEFAULT_UTF8=y
-CONFIG_EXFAT_FS=m
-CONFIG_EXFAT_DEFAULT_IOCHARSET="utf8"
-CONFIG_NTFS_FS=m
-# CONFIG_NTFS_DEBUG is not set
-CONFIG_NTFS_RW=y
-# end of DOS/FAT/EXFAT/NT Filesystems
-
-#
-# Pseudo filesystems
-#
-CONFIG_PROC_FS=y
-# CONFIG_PROC_KCORE is not set
-# CONFIG_PROC_VMCORE is not set
-CONFIG_PROC_SYSCTL=y
-CONFIG_PROC_PAGE_MONITOR=y
-CONFIG_PROC_CHILDREN=y
-CONFIG_PROC_PID_ARCH_STATUS=y
-CONFIG_PROC_CPU_RESCTRL=y
-CONFIG_KERNFS=y
-CONFIG_SYSFS=y
-CONFIG_TMPFS=y
-CONFIG_TMPFS_POSIX_ACL=y
-CONFIG_TMPFS_XATTR=y
-CONFIG_HUGETLBFS=y
-CONFIG_HUGETLB_PAGE=y
-CONFIG_MEMFD_CREATE=y
-CONFIG_ARCH_HAS_GIGANTIC_PAGE=y
-CONFIG_CONFIGFS_FS=y
-CONFIG_EFIVAR_FS=y
-# end of Pseudo filesystems
-
-CONFIG_MISC_FILESYSTEMS=y
-CONFIG_ORANGEFS_FS=m
-# CONFIG_ADFS_FS is not set
-CONFIG_AFFS_FS=m
-CONFIG_ECRYPT_FS=m
-# CONFIG_ECRYPT_FS_MESSAGING is not set
-CONFIG_HFS_FS=m
-CONFIG_HFSPLUS_FS=m
-CONFIG_BEFS_FS=m
-# CONFIG_BEFS_DEBUG is not set
-# CONFIG_BFS_FS is not set
-# CONFIG_EFS_FS is not set
-CONFIG_JFFS2_FS=m
-CONFIG_JFFS2_FS_DEBUG=0
-CONFIG_JFFS2_FS_WRITEBUFFER=y
-# CONFIG_JFFS2_FS_WBUF_VERIFY is not set
-CONFIG_JFFS2_SUMMARY=y
-CONFIG_JFFS2_FS_XATTR=y
-CONFIG_JFFS2_FS_POSIX_ACL=y
-CONFIG_JFFS2_FS_SECURITY=y
-# CONFIG_JFFS2_COMPRESSION_OPTIONS is not set
-CONFIG_JFFS2_ZLIB=y
-CONFIG_JFFS2_RTIME=y
-CONFIG_UBIFS_FS=m
-# CONFIG_UBIFS_FS_ADVANCED_COMPR is not set
-CONFIG_UBIFS_FS_LZO=y
-CONFIG_UBIFS_FS_ZLIB=y
-CONFIG_UBIFS_FS_ZSTD=y
-CONFIG_UBIFS_ATIME_SUPPORT=y
-CONFIG_UBIFS_FS_XATTR=y
-CONFIG_UBIFS_FS_SECURITY=y
-CONFIG_UBIFS_FS_AUTHENTICATION=y
-CONFIG_CRAMFS=m
-CONFIG_CRAMFS_BLOCKDEV=y
-CONFIG_CRAMFS_MTD=y
-CONFIG_SQUASHFS=m
-# CONFIG_SQUASHFS_FILE_CACHE is not set
-CONFIG_SQUASHFS_FILE_DIRECT=y
-# CONFIG_SQUASHFS_DECOMP_SINGLE is not set
-CONFIG_SQUASHFS_DECOMP_MULTI=y
-# CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU is not set
-CONFIG_SQUASHFS_XATTR=y
-CONFIG_SQUASHFS_ZLIB=y
-CONFIG_SQUASHFS_LZ4=y
-CONFIG_SQUASHFS_LZO=y
-CONFIG_SQUASHFS_XZ=y
-CONFIG_SQUASHFS_ZSTD=y
-# CONFIG_SQUASHFS_4K_DEVBLK_SIZE is not set
-# CONFIG_SQUASHFS_EMBEDDED is not set
-CONFIG_SQUASHFS_FRAGMENT_CACHE_SIZE=3
-# CONFIG_VXFS_FS is not set
-CONFIG_MINIX_FS=m
-CONFIG_OMFS_FS=m
-# CONFIG_HPFS_FS is not set
-# CONFIG_QNX4FS_FS is not set
-# CONFIG_QNX6FS_FS is not set
-CONFIG_ROMFS_FS=m
-CONFIG_ROMFS_BACKED_BY_BLOCK=y
-# CONFIG_ROMFS_BACKED_BY_MTD is not set
-# CONFIG_ROMFS_BACKED_BY_BOTH is not set
-CONFIG_ROMFS_ON_BLOCK=y
-CONFIG_PSTORE=y
-CONFIG_PSTORE_DEFLATE_COMPRESS=m
-CONFIG_PSTORE_LZO_COMPRESS=m
-CONFIG_PSTORE_LZ4_COMPRESS=m
-CONFIG_PSTORE_LZ4HC_COMPRESS=m
-# CONFIG_PSTORE_842_COMPRESS is not set
-CONFIG_PSTORE_ZSTD_COMPRESS=y
-CONFIG_PSTORE_COMPRESS=y
-# CONFIG_PSTORE_DEFLATE_COMPRESS_DEFAULT is not set
-# CONFIG_PSTORE_LZO_COMPRESS_DEFAULT is not set
-# CONFIG_PSTORE_LZ4_COMPRESS_DEFAULT is not set
-# CONFIG_PSTORE_LZ4HC_COMPRESS_DEFAULT is not set
-CONFIG_PSTORE_ZSTD_COMPRESS_DEFAULT=y
-CONFIG_PSTORE_COMPRESS_DEFAULT="zstd"
-# CONFIG_PSTORE_CONSOLE is not set
-# CONFIG_PSTORE_PMSG is not set
-# CONFIG_PSTORE_FTRACE is not set
-CONFIG_PSTORE_RAM=y
-# CONFIG_SYSV_FS is not set
-CONFIG_UFS_FS=m
-# CONFIG_UFS_FS_WRITE is not set
-# CONFIG_UFS_DEBUG is not set
-CONFIG_EROFS_FS=m
-# CONFIG_EROFS_FS_DEBUG is not set
-CONFIG_EROFS_FS_XATTR=y
-CONFIG_EROFS_FS_POSIX_ACL=y
-CONFIG_EROFS_FS_SECURITY=y
-CONFIG_EROFS_FS_ZIP=y
-CONFIG_EROFS_FS_CLUSTER_PAGE_LIMIT=2
-CONFIG_VBOXSF_FS=m
-CONFIG_NETWORK_FILESYSTEMS=y
-CONFIG_NFS_FS=m
-CONFIG_NFS_V2=m
-CONFIG_NFS_V3=m
-CONFIG_NFS_V3_ACL=y
-CONFIG_NFS_V4=m
-CONFIG_NFS_SWAP=y
-CONFIG_NFS_V4_1=y
-CONFIG_NFS_V4_2=y
-CONFIG_PNFS_FILE_LAYOUT=m
-CONFIG_PNFS_BLOCK=m
-CONFIG_PNFS_FLEXFILE_LAYOUT=m
-CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN="kernel.org"
-CONFIG_NFS_V4_1_MIGRATION=y
-CONFIG_NFS_V4_SECURITY_LABEL=y
-CONFIG_NFS_FSCACHE=y
-# CONFIG_NFS_USE_LEGACY_DNS is not set
-CONFIG_NFS_USE_KERNEL_DNS=y
-# CONFIG_NFS_DISABLE_UDP_SUPPORT is not set
-CONFIG_NFSD=m
-CONFIG_NFSD_V2_ACL=y
-CONFIG_NFSD_V3=y
-CONFIG_NFSD_V3_ACL=y
-CONFIG_NFSD_V4=y
-CONFIG_NFSD_PNFS=y
-CONFIG_NFSD_BLOCKLAYOUT=y
-CONFIG_NFSD_SCSILAYOUT=y
-# CONFIG_NFSD_FLEXFILELAYOUT is not set
-CONFIG_NFSD_V4_SECURITY_LABEL=y
-CONFIG_GRACE_PERIOD=m
-CONFIG_LOCKD=m
-CONFIG_LOCKD_V4=y
-CONFIG_NFS_ACL_SUPPORT=m
-CONFIG_NFS_COMMON=y
-CONFIG_SUNRPC=m
-CONFIG_SUNRPC_GSS=m
-CONFIG_SUNRPC_BACKCHANNEL=y
-CONFIG_SUNRPC_SWAP=y
-CONFIG_RPCSEC_GSS_KRB5=m
-CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES=y
-CONFIG_SUNRPC_DEBUG=y
-CONFIG_SUNRPC_XPRT_RDMA=m
-CONFIG_CEPH_FS=m
-CONFIG_CEPH_FSCACHE=y
-CONFIG_CEPH_FS_POSIX_ACL=y
-CONFIG_CEPH_FS_SECURITY_LABEL=y
-CONFIG_CIFS=m
-# CONFIG_CIFS_STATS2 is not set
-# CONFIG_CIFS_ALLOW_INSECURE_LEGACY is not set
-CONFIG_CIFS_UPCALL=y
-CONFIG_CIFS_XATTR=y
-CONFIG_CIFS_DEBUG=y
-# CONFIG_CIFS_DEBUG2 is not set
-# CONFIG_CIFS_DEBUG_DUMP_KEYS is not set
-CONFIG_CIFS_DFS_UPCALL=y
-# CONFIG_CIFS_SMB_DIRECT is not set
-CONFIG_CIFS_FSCACHE=y
-CONFIG_CODA_FS=m
-CONFIG_AFS_FS=m
-# CONFIG_AFS_DEBUG is not set
-CONFIG_AFS_FSCACHE=y
-# CONFIG_AFS_DEBUG_CURSOR is not set
-CONFIG_9P_FS=m
-CONFIG_9P_FSCACHE=y
-CONFIG_9P_FS_POSIX_ACL=y
-CONFIG_9P_FS_SECURITY=y
-CONFIG_NLS=y
-CONFIG_NLS_DEFAULT="utf8"
-CONFIG_NLS_CODEPAGE_437=m
-CONFIG_NLS_CODEPAGE_737=m
-CONFIG_NLS_CODEPAGE_775=m
-CONFIG_NLS_CODEPAGE_850=m
-CONFIG_NLS_CODEPAGE_852=m
-CONFIG_NLS_CODEPAGE_855=m
-CONFIG_NLS_CODEPAGE_857=m
-CONFIG_NLS_CODEPAGE_860=m
-CONFIG_NLS_CODEPAGE_861=m
-CONFIG_NLS_CODEPAGE_862=m
-CONFIG_NLS_CODEPAGE_863=m
-CONFIG_NLS_CODEPAGE_864=m
-CONFIG_NLS_CODEPAGE_865=m
-CONFIG_NLS_CODEPAGE_866=m
-CONFIG_NLS_CODEPAGE_869=m
-CONFIG_NLS_CODEPAGE_936=m
-CONFIG_NLS_CODEPAGE_950=m
-CONFIG_NLS_CODEPAGE_932=m
-CONFIG_NLS_CODEPAGE_949=m
-CONFIG_NLS_CODEPAGE_874=m
-CONFIG_NLS_ISO8859_8=m
-CONFIG_NLS_CODEPAGE_1250=m
-CONFIG_NLS_CODEPAGE_1251=m
-CONFIG_NLS_ASCII=m
-CONFIG_NLS_ISO8859_1=m
-CONFIG_NLS_ISO8859_2=m
-CONFIG_NLS_ISO8859_3=m
-CONFIG_NLS_ISO8859_4=m
-CONFIG_NLS_ISO8859_5=m
-CONFIG_NLS_ISO8859_6=m
-CONFIG_NLS_ISO8859_7=m
-CONFIG_NLS_ISO8859_9=m
-CONFIG_NLS_ISO8859_13=m
-CONFIG_NLS_ISO8859_14=m
-CONFIG_NLS_ISO8859_15=m
-CONFIG_NLS_KOI8_R=m
-CONFIG_NLS_KOI8_U=m
-CONFIG_NLS_MAC_ROMAN=m
-CONFIG_NLS_MAC_CELTIC=m
-CONFIG_NLS_MAC_CENTEURO=m
-CONFIG_NLS_MAC_CROATIAN=m
-CONFIG_NLS_MAC_CYRILLIC=m
-CONFIG_NLS_MAC_GAELIC=m
-CONFIG_NLS_MAC_GREEK=m
-CONFIG_NLS_MAC_ICELAND=m
-CONFIG_NLS_MAC_INUIT=m
-CONFIG_NLS_MAC_ROMANIAN=m
-CONFIG_NLS_MAC_TURKISH=m
-CONFIG_NLS_UTF8=m
-CONFIG_DLM=m
-# CONFIG_DLM_DEBUG is not set
-CONFIG_UNICODE=y
-# CONFIG_UNICODE_NORMALIZATION_SELFTEST is not set
-CONFIG_IO_WQ=y
-# end of File systems
-
-#
-# Security options
-#
-CONFIG_KEYS=y
-CONFIG_KEYS_REQUEST_CACHE=y
-CONFIG_PERSISTENT_KEYRINGS=y
-CONFIG_BIG_KEYS=y
-CONFIG_TRUSTED_KEYS=m
-CONFIG_ENCRYPTED_KEYS=m
-CONFIG_KEY_DH_OPERATIONS=y
-CONFIG_SECURITY_DMESG_RESTRICT=y
-CONFIG_SECURITY_PERF_EVENTS_RESTRICT=y
-CONFIG_SECURITY_TIOCSTI_RESTRICT=y
-CONFIG_SECURITY=y
-CONFIG_SECURITYFS=y
-CONFIG_SECURITY_NETWORK=y
-CONFIG_PAGE_TABLE_ISOLATION=y
-CONFIG_SECURITY_INFINIBAND=y
-CONFIG_SECURITY_NETWORK_XFRM=y
-CONFIG_SECURITY_PATH=y
-# CONFIG_INTEL_TXT is not set
-CONFIG_LSM_MMAP_MIN_ADDR=65536
-CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR=y
-CONFIG_HARDENED_USERCOPY=y
-# CONFIG_HARDENED_USERCOPY_FALLBACK is not set
-# CONFIG_HARDENED_USERCOPY_PAGESPAN is not set
-CONFIG_FORTIFY_SOURCE=y
-# CONFIG_FORTIFY_SOURCE_STRICT_STRING is not set
-# CONFIG_STATIC_USERMODEHELPER is not set
-CONFIG_SECURITY_SELINUX=y
-CONFIG_SECURITY_SELINUX_BOOTPARAM=y
-# CONFIG_SECURITY_SELINUX_DISABLE is not set
-CONFIG_SECURITY_SELINUX_DEVELOP=y
-CONFIG_SECURITY_SELINUX_AVC_STATS=y
-CONFIG_SECURITY_SELINUX_SIDTAB_HASH_BITS=9
-CONFIG_SECURITY_SELINUX_SID2STR_CACHE_SIZE=256
-CONFIG_SECURITY_SMACK=y
-CONFIG_SECURITY_SMACK_BRINGUP=y
-CONFIG_SECURITY_SMACK_NETFILTER=y
-CONFIG_SECURITY_SMACK_APPEND_SIGNALS=y
-CONFIG_SECURITY_TOMOYO=y
-CONFIG_SECURITY_TOMOYO_MAX_ACCEPT_ENTRY=2048
-CONFIG_SECURITY_TOMOYO_MAX_AUDIT_LOG=1024
-# CONFIG_SECURITY_TOMOYO_OMIT_USERSPACE_LOADER is not set
-CONFIG_SECURITY_TOMOYO_POLICY_LOADER="/sbin/tomoyo-init"
-CONFIG_SECURITY_TOMOYO_ACTIVATION_TRIGGER="/sbin/init"
-# CONFIG_SECURITY_TOMOYO_INSECURE_BUILTIN_SETTING is not set
-CONFIG_SECURITY_APPARMOR=y
-CONFIG_SECURITY_APPARMOR_HASH=y
-CONFIG_SECURITY_APPARMOR_HASH_DEFAULT=y
-# CONFIG_SECURITY_APPARMOR_DEBUG is not set
-# CONFIG_SECURITY_LOADPIN is not set
-CONFIG_SECURITY_YAMA=y
-CONFIG_SECURITY_SAFESETID=y
-CONFIG_SECURITY_LOCKDOWN_LSM=y
-# CONFIG_SECURITY_LOCKDOWN_LSM_EARLY is not set
-CONFIG_LOCK_DOWN_KERNEL_FORCE_NONE=y
-# CONFIG_LOCK_DOWN_KERNEL_FORCE_INTEGRITY is not set
-# CONFIG_LOCK_DOWN_KERNEL_FORCE_CONFIDENTIALITY is not set
-# CONFIG_INTEGRITY is not set
-# CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT is not set
-# CONFIG_DEFAULT_SECURITY_SELINUX is not set
-# CONFIG_DEFAULT_SECURITY_SMACK is not set
-# CONFIG_DEFAULT_SECURITY_TOMOYO is not set
-# CONFIG_DEFAULT_SECURITY_APPARMOR is not set
-CONFIG_DEFAULT_SECURITY_DAC=y
-CONFIG_LSM="lockdown,yama"
-
-#
-# Kernel hardening options
-#
-CONFIG_GCC_PLUGIN_STRUCTLEAK=y
-
-#
-# Memory initialization
-#
-# CONFIG_INIT_STACK_NONE is not set
-# CONFIG_GCC_PLUGIN_STRUCTLEAK_USER is not set
-# CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF is not set
-CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL=y
-# CONFIG_GCC_PLUGIN_STRUCTLEAK_VERBOSE is not set
-CONFIG_GCC_PLUGIN_STACKLEAK=y
-CONFIG_STACKLEAK_TRACK_MIN_SIZE=100
-# CONFIG_STACKLEAK_METRICS is not set
-# CONFIG_STACKLEAK_RUNTIME_DISABLE is not set
-CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y
-CONFIG_INIT_ON_FREE_DEFAULT_ON=y
-CONFIG_PAGE_SANITIZE_VERIFY=y
-CONFIG_SLAB_SANITIZE_VERIFY=y
-# end of Memory initialization
-# end of Kernel hardening options
-# end of Security options
-
-CONFIG_XOR_BLOCKS=m
-CONFIG_ASYNC_CORE=m
-CONFIG_ASYNC_MEMCPY=m
-CONFIG_ASYNC_XOR=m
-CONFIG_ASYNC_PQ=m
-CONFIG_ASYNC_RAID6_RECOV=m
-CONFIG_CRYPTO=y
-
-#
-# Crypto core or helper
-#
-CONFIG_CRYPTO_ALGAPI=y
-CONFIG_CRYPTO_ALGAPI2=y
-CONFIG_CRYPTO_AEAD=y
-CONFIG_CRYPTO_AEAD2=y
-CONFIG_CRYPTO_SKCIPHER=y
-CONFIG_CRYPTO_SKCIPHER2=y
-CONFIG_CRYPTO_HASH=y
-CONFIG_CRYPTO_HASH2=y
-CONFIG_CRYPTO_RNG=y
-CONFIG_CRYPTO_RNG2=y
-CONFIG_CRYPTO_RNG_DEFAULT=y
-CONFIG_CRYPTO_AKCIPHER2=y
-CONFIG_CRYPTO_AKCIPHER=y
-CONFIG_CRYPTO_KPP2=y
-CONFIG_CRYPTO_KPP=y
-CONFIG_CRYPTO_ACOMP2=y
-CONFIG_CRYPTO_MANAGER=y
-CONFIG_CRYPTO_MANAGER2=y
-CONFIG_CRYPTO_USER=m
-CONFIG_CRYPTO_MANAGER_DISABLE_TESTS=y
-CONFIG_CRYPTO_GF128MUL=y
-CONFIG_CRYPTO_NULL=y
-CONFIG_CRYPTO_NULL2=y
-CONFIG_CRYPTO_PCRYPT=m
-CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_AUTHENC=m
-CONFIG_CRYPTO_TEST=m
-CONFIG_CRYPTO_SIMD=m
-CONFIG_CRYPTO_GLUE_HELPER_X86=m
-CONFIG_CRYPTO_ENGINE=m
-
-#
-# Public-key cryptography
-#
-CONFIG_CRYPTO_RSA=y
-CONFIG_CRYPTO_DH=y
-CONFIG_CRYPTO_ECC=m
-CONFIG_CRYPTO_ECDH=m
-CONFIG_CRYPTO_ECRDSA=m
-CONFIG_CRYPTO_CURVE25519=m
-CONFIG_CRYPTO_CURVE25519_X86=m
-
-#
-# Authenticated Encryption with Associated Data
-#
-CONFIG_CRYPTO_CCM=m
-CONFIG_CRYPTO_GCM=y
-CONFIG_CRYPTO_CHACHA20POLY1305=m
-CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128_AESNI_SSE2=m
-CONFIG_CRYPTO_SEQIV=y
-CONFIG_CRYPTO_ECHAINIV=m
-
-#
-# Block modes
-#
-CONFIG_CRYPTO_CBC=m
-CONFIG_CRYPTO_CFB=m
-CONFIG_CRYPTO_CTR=y
-CONFIG_CRYPTO_CTS=m
-CONFIG_CRYPTO_ECB=m
-CONFIG_CRYPTO_LRW=m
-CONFIG_CRYPTO_OFB=m
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_XTS=m
-CONFIG_CRYPTO_KEYWRAP=m
-CONFIG_CRYPTO_NHPOLY1305=m
-CONFIG_CRYPTO_NHPOLY1305_SSE2=m
-CONFIG_CRYPTO_NHPOLY1305_AVX2=m
-CONFIG_CRYPTO_ADIANTUM=m
-CONFIG_CRYPTO_ESSIV=m
-
-#
-# Hash modes
-#
-CONFIG_CRYPTO_CMAC=m
-CONFIG_CRYPTO_HMAC=y
-CONFIG_CRYPTO_XCBC=m
-CONFIG_CRYPTO_VMAC=m
-
-#
-# Digest
-#
-CONFIG_CRYPTO_CRC32C=m
-CONFIG_CRYPTO_CRC32C_INTEL=m
-CONFIG_CRYPTO_CRC32=m
-CONFIG_CRYPTO_CRC32_PCLMUL=m
-CONFIG_CRYPTO_XXHASH=m
-CONFIG_CRYPTO_BLAKE2B=m
-CONFIG_CRYPTO_BLAKE2S=m
-CONFIG_CRYPTO_BLAKE2S_X86=m
-CONFIG_CRYPTO_CRCT10DIF=y
-CONFIG_CRYPTO_CRCT10DIF_PCLMUL=m
-CONFIG_CRYPTO_GHASH=y
-CONFIG_CRYPTO_POLY1305=m
-CONFIG_CRYPTO_POLY1305_X86_64=m
-CONFIG_CRYPTO_MD4=m
-CONFIG_CRYPTO_MD5=y
-CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_RMD128=m
-CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_RMD256=m
-CONFIG_CRYPTO_RMD320=m
-CONFIG_CRYPTO_SHA1=y
-CONFIG_CRYPTO_SHA1_SSSE3=m
-CONFIG_CRYPTO_SHA256_SSSE3=m
-CONFIG_CRYPTO_SHA512_SSSE3=m
-CONFIG_CRYPTO_SHA256=y
-CONFIG_CRYPTO_SHA512=y
-CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_SM3=m
-CONFIG_CRYPTO_STREEBOG=m
-CONFIG_CRYPTO_TGR192=m
-CONFIG_CRYPTO_WP512=m
-CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL=m
-
-#
-# Ciphers
-#
-CONFIG_CRYPTO_AES=y
-CONFIG_CRYPTO_AES_TI=m
-CONFIG_CRYPTO_AES_NI_INTEL=m
-CONFIG_CRYPTO_ANUBIS=m
-CONFIG_CRYPTO_ARC4=m
-CONFIG_CRYPTO_BLOWFISH=m
-CONFIG_CRYPTO_BLOWFISH_COMMON=m
-CONFIG_CRYPTO_BLOWFISH_X86_64=m
-CONFIG_CRYPTO_CAMELLIA=m
-CONFIG_CRYPTO_CAMELLIA_X86_64=m
-CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64=m
-CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64=m
-CONFIG_CRYPTO_CAST_COMMON=m
-CONFIG_CRYPTO_CAST5=m
-CONFIG_CRYPTO_CAST5_AVX_X86_64=m
-CONFIG_CRYPTO_CAST6=m
-CONFIG_CRYPTO_CAST6_AVX_X86_64=m
-CONFIG_CRYPTO_DES=m
-CONFIG_CRYPTO_DES3_EDE_X86_64=m
-CONFIG_CRYPTO_FCRYPT=m
-CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SALSA20=m
-CONFIG_CRYPTO_CHACHA20=m
-CONFIG_CRYPTO_CHACHA20_X86_64=m
-CONFIG_CRYPTO_SEED=m
-CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_SERPENT_SSE2_X86_64=m
-CONFIG_CRYPTO_SERPENT_AVX_X86_64=m
-CONFIG_CRYPTO_SERPENT_AVX2_X86_64=m
-CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_TEA=m
-CONFIG_CRYPTO_TWOFISH=m
-CONFIG_CRYPTO_TWOFISH_COMMON=m
-CONFIG_CRYPTO_TWOFISH_X86_64=m
-CONFIG_CRYPTO_TWOFISH_X86_64_3WAY=m
-CONFIG_CRYPTO_TWOFISH_AVX_X86_64=m
-
-#
-# Compression
-#
-CONFIG_CRYPTO_DEFLATE=m
-CONFIG_CRYPTO_LZO=m
-CONFIG_CRYPTO_842=m
-CONFIG_CRYPTO_LZ4=y
-CONFIG_CRYPTO_LZ4HC=m
-CONFIG_CRYPTO_ZSTD=y
-
-#
-# Random Number Generation
-#
-CONFIG_CRYPTO_ANSI_CPRNG=m
-CONFIG_CRYPTO_DRBG_MENU=y
-CONFIG_CRYPTO_DRBG_HMAC=y
-CONFIG_CRYPTO_DRBG_HASH=y
-CONFIG_CRYPTO_DRBG_CTR=y
-CONFIG_CRYPTO_DRBG=y
-CONFIG_CRYPTO_JITTERENTROPY=y
-CONFIG_CRYPTO_USER_API=m
-CONFIG_CRYPTO_USER_API_HASH=m
-CONFIG_CRYPTO_USER_API_SKCIPHER=m
-CONFIG_CRYPTO_USER_API_RNG=m
-CONFIG_CRYPTO_USER_API_AEAD=m
-# CONFIG_CRYPTO_STATS is not set
-CONFIG_CRYPTO_HASH_INFO=y
-
-#
-# Crypto library routines
-#
-CONFIG_CRYPTO_LIB_AES=y
-CONFIG_CRYPTO_LIB_ARC4=m
-CONFIG_CRYPTO_ARCH_HAVE_LIB_BLAKE2S=m
-CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC=m
-CONFIG_CRYPTO_LIB_BLAKE2S=m
-CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA=m
-CONFIG_CRYPTO_LIB_CHACHA_GENERIC=m
-CONFIG_CRYPTO_LIB_CHACHA=m
-CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519=m
-CONFIG_CRYPTO_LIB_CURVE25519_GENERIC=m
-CONFIG_CRYPTO_LIB_CURVE25519=m
-CONFIG_CRYPTO_LIB_DES=m
-CONFIG_CRYPTO_LIB_POLY1305_RSIZE=11
-CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305=m
-CONFIG_CRYPTO_LIB_POLY1305_GENERIC=m
-CONFIG_CRYPTO_LIB_POLY1305=m
-CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
-CONFIG_CRYPTO_LIB_SHA256=y
-CONFIG_CRYPTO_HW=y
-CONFIG_CRYPTO_DEV_PADLOCK=m
-CONFIG_CRYPTO_DEV_PADLOCK_AES=m
-CONFIG_CRYPTO_DEV_PADLOCK_SHA=m
-CONFIG_CRYPTO_DEV_ATMEL_I2C=m
-CONFIG_CRYPTO_DEV_ATMEL_ECC=m
-CONFIG_CRYPTO_DEV_ATMEL_SHA204A=m
-CONFIG_CRYPTO_DEV_CCP=y
-CONFIG_CRYPTO_DEV_CCP_DD=m
-CONFIG_CRYPTO_DEV_SP_CCP=y
-CONFIG_CRYPTO_DEV_CCP_CRYPTO=m
-CONFIG_CRYPTO_DEV_SP_PSP=y
-# CONFIG_CRYPTO_DEV_CCP_DEBUGFS is not set
-CONFIG_CRYPTO_DEV_QAT=m
-CONFIG_CRYPTO_DEV_QAT_DH895xCC=m
-CONFIG_CRYPTO_DEV_QAT_C3XXX=m
-CONFIG_CRYPTO_DEV_QAT_C62X=m
-CONFIG_CRYPTO_DEV_QAT_DH895xCCVF=m
-CONFIG_CRYPTO_DEV_QAT_C3XXXVF=m
-CONFIG_CRYPTO_DEV_QAT_C62XVF=m
-CONFIG_CRYPTO_DEV_NITROX=m
-CONFIG_CRYPTO_DEV_NITROX_CNN55XX=m
-CONFIG_CRYPTO_DEV_CHELSIO=m
-CONFIG_CHELSIO_IPSEC_INLINE=y
-CONFIG_CHELSIO_TLS_DEVICE=y
-CONFIG_CRYPTO_DEV_VIRTIO=m
-CONFIG_CRYPTO_DEV_SAFEXCEL=m
-CONFIG_CRYPTO_DEV_CCREE=m
-CONFIG_CRYPTO_DEV_AMLOGIC_GXL=m
-# CONFIG_CRYPTO_DEV_AMLOGIC_GXL_DEBUG is not set
-CONFIG_ASYMMETRIC_KEY_TYPE=y
-CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y
-CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE=m
-CONFIG_X509_CERTIFICATE_PARSER=y
-CONFIG_PKCS8_PRIVATE_KEY_PARSER=m
-CONFIG_TPM_KEY_PARSER=m
-CONFIG_PKCS7_MESSAGE_PARSER=y
-# CONFIG_PKCS7_TEST_KEY is not set
-CONFIG_SIGNED_PE_FILE_VERIFICATION=y
-
-#
-# Certificates for signature checking
-#
-CONFIG_MODULE_SIG_KEY="certs/signing_key.pem"
-CONFIG_SYSTEM_TRUSTED_KEYRING=y
-CONFIG_SYSTEM_TRUSTED_KEYS=""
-# CONFIG_SYSTEM_EXTRA_CERTIFICATE is not set
-CONFIG_SECONDARY_TRUSTED_KEYRING=y
-CONFIG_SYSTEM_BLACKLIST_KEYRING=y
-CONFIG_SYSTEM_BLACKLIST_HASH_LIST=""
-# end of Certificates for signature checking
-
-CONFIG_BINARY_PRINTF=y
-
-#
-# Library routines
-#
-CONFIG_RAID6_PQ=m
-CONFIG_RAID6_PQ_BENCHMARK=y
-CONFIG_PACKING=y
-CONFIG_BITREVERSE=y
-CONFIG_GENERIC_STRNCPY_FROM_USER=y
-CONFIG_GENERIC_STRNLEN_USER=y
-CONFIG_GENERIC_NET_UTILS=y
-CONFIG_GENERIC_FIND_FIRST_BIT=y
-CONFIG_CORDIC=m
-CONFIG_RATIONAL=y
-CONFIG_GENERIC_PCI_IOMAP=y
-CONFIG_GENERIC_IOMAP=y
-CONFIG_ARCH_USE_CMPXCHG_LOCKREF=y
-CONFIG_ARCH_HAS_FAST_MULTIPLIER=y
-CONFIG_CRC_CCITT=y
-CONFIG_CRC16=m
-CONFIG_CRC_T10DIF=y
-CONFIG_CRC_ITU_T=m
-CONFIG_CRC32=y
-# CONFIG_CRC32_SELFTEST is not set
-CONFIG_CRC32_SLICEBY8=y
-# CONFIG_CRC32_SLICEBY4 is not set
-# CONFIG_CRC32_SARWATE is not set
-# CONFIG_CRC32_BIT is not set
-CONFIG_CRC64=m
-CONFIG_CRC4=m
-CONFIG_CRC7=m
-CONFIG_LIBCRC32C=m
-CONFIG_CRC8=m
-CONFIG_XXHASH=y
-# CONFIG_RANDOM32_SELFTEST is not set
-CONFIG_842_COMPRESS=m
-CONFIG_842_DECOMPRESS=m
-CONFIG_ZLIB_INFLATE=y
-CONFIG_ZLIB_DEFLATE=y
-CONFIG_LZO_COMPRESS=y
-CONFIG_LZO_DECOMPRESS=y
-CONFIG_LZ4_COMPRESS=y
-CONFIG_LZ4HC_COMPRESS=m
-CONFIG_LZ4_DECOMPRESS=y
-CONFIG_ZSTD_COMPRESS=y
-CONFIG_ZSTD_DECOMPRESS=y
-CONFIG_XZ_DEC=y
-CONFIG_XZ_DEC_X86=y
-CONFIG_XZ_DEC_POWERPC=y
-CONFIG_XZ_DEC_IA64=y
-CONFIG_XZ_DEC_ARM=y
-CONFIG_XZ_DEC_ARMTHUMB=y
-CONFIG_XZ_DEC_SPARC=y
-CONFIG_XZ_DEC_BCJ=y
-# CONFIG_XZ_DEC_TEST is not set
-CONFIG_DECOMPRESS_GZIP=y
-CONFIG_DECOMPRESS_BZIP2=y
-CONFIG_DECOMPRESS_LZMA=y
-CONFIG_DECOMPRESS_XZ=y
-CONFIG_DECOMPRESS_LZO=y
-CONFIG_DECOMPRESS_LZ4=y
-CONFIG_GENERIC_ALLOCATOR=y
-CONFIG_REED_SOLOMON=y
-CONFIG_REED_SOLOMON_ENC8=y
-CONFIG_REED_SOLOMON_DEC8=y
-CONFIG_REED_SOLOMON_DEC16=y
-CONFIG_BCH=m
-CONFIG_TEXTSEARCH=y
-CONFIG_TEXTSEARCH_KMP=m
-CONFIG_TEXTSEARCH_BM=m
-CONFIG_TEXTSEARCH_FSM=m
-CONFIG_BTREE=y
-CONFIG_INTERVAL_TREE=y
-CONFIG_XARRAY_MULTI=y
-CONFIG_ASSOCIATIVE_ARRAY=y
-CONFIG_HAS_IOMEM=y
-CONFIG_HAS_IOPORT_MAP=y
-CONFIG_HAS_DMA=y
-CONFIG_NEED_SG_DMA_LENGTH=y
-CONFIG_NEED_DMA_MAP_STATE=y
-CONFIG_ARCH_DMA_ADDR_T_64BIT=y
-CONFIG_ARCH_HAS_FORCE_DMA_UNENCRYPTED=y
-CONFIG_DMA_VIRT_OPS=y
-CONFIG_SWIOTLB=y
-# CONFIG_DMA_API_DEBUG is not set
-CONFIG_SGL_ALLOC=y
-CONFIG_IOMMU_HELPER=y
-CONFIG_CHECK_SIGNATURE=y
-CONFIG_CPU_RMAP=y
-CONFIG_DQL=y
-CONFIG_GLOB=y
-# CONFIG_GLOB_SELFTEST is not set
-CONFIG_NLATTR=y
-CONFIG_LRU_CACHE=m
-CONFIG_CLZ_TAB=y
-CONFIG_IRQ_POLL=y
-CONFIG_MPILIB=y
-CONFIG_DIMLIB=y
-CONFIG_LIBFDT=y
-CONFIG_OID_REGISTRY=y
-CONFIG_UCS2_STRING=y
-CONFIG_HAVE_GENERIC_VDSO=y
-CONFIG_GENERIC_GETTIMEOFDAY=y
-CONFIG_GENERIC_VDSO_TIME_NS=y
-CONFIG_FONT_SUPPORT=y
-CONFIG_FONTS=y
-# CONFIG_FONT_8x8 is not set
-CONFIG_FONT_8x16=y
-# CONFIG_FONT_6x11 is not set
-# CONFIG_FONT_7x14 is not set
-# CONFIG_FONT_PEARL_8x8 is not set
-# CONFIG_FONT_ACORN_8x8 is not set
-# CONFIG_FONT_MINI_4x6 is not set
-# CONFIG_FONT_6x10 is not set
-# CONFIG_FONT_10x18 is not set
-# CONFIG_FONT_SUN8x16 is not set
-# CONFIG_FONT_SUN12x22 is not set
-CONFIG_FONT_TER16x32=y
-CONFIG_SG_POOL=y
-CONFIG_ARCH_HAS_PMEM_API=y
-CONFIG_MEMREGION=y
-CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE=y
-CONFIG_ARCH_HAS_UACCESS_MCSAFE=y
-CONFIG_ARCH_STACKWALK=y
-CONFIG_SBITMAP=y
-CONFIG_PARMAN=m
-CONFIG_OBJAGG=m
-# CONFIG_STRING_SELFTEST is not set
-# end of Library routines
-
-#
-# Kernel hacking
-#
-
-#
-# printk and dmesg options
-#
-CONFIG_PRINTK_TIME=y
-# CONFIG_PRINTK_CALLER is not set
-CONFIG_CONSOLE_LOGLEVEL_DEFAULT=4
-CONFIG_CONSOLE_LOGLEVEL_QUIET=1
-CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4
-# CONFIG_BOOT_PRINTK_DELAY is not set
-CONFIG_DYNAMIC_DEBUG=y
-CONFIG_SYMBOLIC_ERRNAME=y
-CONFIG_DEBUG_BUGVERBOSE=y
-# end of printk and dmesg options
-
-#
-# Compile-time checks and compiler options
-#
-# CONFIG_DEBUG_INFO is not set
-# CONFIG_ENABLE_MUST_CHECK is not set
-CONFIG_FRAME_WARN=2048
-CONFIG_STRIP_ASM_SYMS=y
-# CONFIG_READABLE_ASM is not set
-# CONFIG_HEADERS_INSTALL is not set
-# CONFIG_DEBUG_SECTION_MISMATCH is not set
-CONFIG_SECTION_MISMATCH_WARN_ONLY=y
-# CONFIG_DEBUG_WRITABLE_FUNCTION_POINTERS_VERBOSE is not set
-CONFIG_STACK_VALIDATION=y
-# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set
-# end of Compile-time checks and compiler options
-
-#
-# Generic Kernel Debugging Instruments
-#
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE=0x0
-CONFIG_MAGIC_SYSRQ_SERIAL=y
-CONFIG_MAGIC_SYSRQ_SERIAL_SEQUENCE=""
-CONFIG_DEBUG_FS=y
-CONFIG_HAVE_ARCH_KGDB=y
-# CONFIG_KGDB is not set
-CONFIG_ARCH_HAS_UBSAN_SANITIZE_ALL=y
-# CONFIG_UBSAN is not set
-# end of Generic Kernel Debugging Instruments
-
-CONFIG_DEBUG_KERNEL=y
-CONFIG_DEBUG_MISC=y
-
-#
-# Memory Debugging
-#
-# CONFIG_PAGE_EXTENSION is not set
-# CONFIG_DEBUG_PAGEALLOC is not set
-# CONFIG_PAGE_OWNER is not set
-# CONFIG_PAGE_POISONING is not set
-# CONFIG_DEBUG_PAGE_REF is not set
-# CONFIG_DEBUG_RODATA_TEST is not set
-CONFIG_GENERIC_PTDUMP=y
-CONFIG_PTDUMP_CORE=y
-# CONFIG_PTDUMP_DEBUGFS is not set
-# CONFIG_DEBUG_OBJECTS is not set
-# CONFIG_SLUB_DEBUG_ON is not set
-# CONFIG_SLUB_STATS is not set
-CONFIG_HAVE_DEBUG_KMEMLEAK=y
-# CONFIG_DEBUG_KMEMLEAK is not set
-# CONFIG_DEBUG_STACK_USAGE is not set
-CONFIG_SCHED_STACK_END_CHECK=y
-# CONFIG_DEBUG_VM is not set
-CONFIG_ARCH_HAS_DEBUG_VIRTUAL=y
-# CONFIG_DEBUG_VIRTUAL is not set
-CONFIG_DEBUG_MEMORY_INIT=y
-# CONFIG_DEBUG_PER_CPU_MAPS is not set
-CONFIG_HAVE_ARCH_KASAN=y
-CONFIG_HAVE_ARCH_KASAN_VMALLOC=y
-CONFIG_CC_HAS_KASAN_GENERIC=y
-# CONFIG_KASAN is not set
-CONFIG_KASAN_STACK=1
-# end of Memory Debugging
-
-# CONFIG_DEBUG_SHIRQ is not set
-
-#
-# Debug Oops, Lockups and Hangs
-#
-CONFIG_PANIC_ON_OOPS=y
-CONFIG_PANIC_ON_OOPS_VALUE=1
-CONFIG_PANIC_TIMEOUT=0
-CONFIG_LOCKUP_DETECTOR=y
-CONFIG_SOFTLOCKUP_DETECTOR=y
-# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
-CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
-CONFIG_HARDLOCKUP_DETECTOR_PERF=y
-CONFIG_HARDLOCKUP_CHECK_TIMESTAMP=y
-CONFIG_HARDLOCKUP_DETECTOR=y
-# CONFIG_BOOTPARAM_HARDLOCKUP_PANIC is not set
-CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE=0
-CONFIG_DETECT_HUNG_TASK=y
-CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=120
-# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
-CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
-# CONFIG_WQ_WATCHDOG is not set
-# CONFIG_TEST_LOCKUP is not set
-# end of Debug Oops, Lockups and Hangs
-
-#
-# Scheduler Debugging
-#
-CONFIG_SCHED_DEBUG=y
-CONFIG_SCHED_INFO=y
-CONFIG_SCHEDSTATS=y
-# end of Scheduler Debugging
-
-# CONFIG_DEBUG_TIMEKEEPING is not set
-CONFIG_DEBUG_PREEMPT=y
-
-#
-# Lock Debugging (spinlocks, mutexes, etc...)
-#
-CONFIG_LOCK_DEBUGGING_SUPPORT=y
-# CONFIG_PROVE_LOCKING is not set
-# CONFIG_LOCK_STAT is not set
-# CONFIG_DEBUG_RT_MUTEXES is not set
-# CONFIG_DEBUG_SPINLOCK is not set
-# CONFIG_DEBUG_MUTEXES is not set
-# CONFIG_DEBUG_WW_MUTEX_SLOWPATH is not set
-# CONFIG_DEBUG_RWSEMS is not set
-# CONFIG_DEBUG_LOCK_ALLOC is not set
-# CONFIG_DEBUG_ATOMIC_SLEEP is not set
-# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
-# CONFIG_LOCK_TORTURE_TEST is not set
-# CONFIG_WW_MUTEX_SELFTEST is not set
-# end of Lock Debugging (spinlocks, mutexes, etc...)
-
-CONFIG_STACKTRACE=y
-# CONFIG_WARN_ALL_UNSEEDED_RANDOM is not set
-# CONFIG_DEBUG_KOBJECT is not set
-
-#
-# Debug kernel data structures
-#
-CONFIG_DEBUG_LIST=y
-# CONFIG_DEBUG_PLIST is not set
-CONFIG_DEBUG_SG=y
-CONFIG_DEBUG_NOTIFIERS=y
-CONFIG_BUG_ON_DATA_CORRUPTION=y
-# end of Debug kernel data structures
-
-CONFIG_DEBUG_CREDENTIALS=y
-
-#
-# RCU Debugging
-#
-# CONFIG_RCU_PERF_TEST is not set
-# CONFIG_RCU_TORTURE_TEST is not set
-CONFIG_RCU_CPU_STALL_TIMEOUT=60
-# CONFIG_RCU_TRACE is not set
-# CONFIG_RCU_EQS_DEBUG is not set
-# end of RCU Debugging
-
-# CONFIG_DEBUG_WQ_FORCE_RR_CPU is not set
-# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
-# CONFIG_CPU_HOTPLUG_STATE_CONTROL is not set
-CONFIG_LATENCYTOP=y
-CONFIG_USER_STACKTRACE_SUPPORT=y
-CONFIG_NOP_TRACER=y
-CONFIG_HAVE_FUNCTION_TRACER=y
-CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
-CONFIG_HAVE_DYNAMIC_FTRACE=y
-CONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS=y
-CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS=y
-CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
-CONFIG_HAVE_SYSCALL_TRACEPOINTS=y
-CONFIG_HAVE_FENTRY=y
-CONFIG_HAVE_C_RECORDMCOUNT=y
-CONFIG_TRACER_MAX_TRACE=y
-CONFIG_TRACE_CLOCK=y
-CONFIG_RING_BUFFER=y
-CONFIG_EVENT_TRACING=y
-CONFIG_CONTEXT_SWITCH_TRACER=y
-CONFIG_RING_BUFFER_ALLOW_SWAP=y
-CONFIG_TRACING=y
-CONFIG_GENERIC_TRACER=y
-CONFIG_TRACING_SUPPORT=y
-CONFIG_FTRACE=y
-# CONFIG_BOOTTIME_TRACING is not set
-CONFIG_FUNCTION_TRACER=y
-CONFIG_FUNCTION_GRAPH_TRACER=y
-CONFIG_DYNAMIC_FTRACE=y
-CONFIG_DYNAMIC_FTRACE_WITH_REGS=y
-CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS=y
-CONFIG_FUNCTION_PROFILER=y
-CONFIG_STACK_TRACER=y
-# CONFIG_PREEMPTIRQ_EVENTS is not set
-# CONFIG_IRQSOFF_TRACER is not set
-# CONFIG_PREEMPT_TRACER is not set
-CONFIG_SCHED_TRACER=y
-CONFIG_HWLAT_TRACER=y
-CONFIG_MMIOTRACE=y
-CONFIG_FTRACE_SYSCALLS=y
-CONFIG_TRACER_SNAPSHOT=y
-# CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP is not set
-CONFIG_BRANCH_PROFILE_NONE=y
-# CONFIG_PROFILE_ANNOTATED_BRANCHES is not set
-CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_KPROBE_EVENTS=y
-# CONFIG_KPROBE_EVENTS_ON_NOTRACE is not set
-CONFIG_UPROBE_EVENTS=y
-CONFIG_BPF_EVENTS=y
-CONFIG_DYNAMIC_EVENTS=y
-CONFIG_PROBE_EVENTS=y
-# CONFIG_BPF_KPROBE_OVERRIDE is not set
-CONFIG_FTRACE_MCOUNT_RECORD=y
-# CONFIG_HIST_TRIGGERS is not set
-# CONFIG_TRACE_EVENT_INJECT is not set
-# CONFIG_TRACEPOINT_BENCHMARK is not set
-# CONFIG_RING_BUFFER_BENCHMARK is not set
-# CONFIG_TRACE_EVAL_MAP_FILE is not set
-# CONFIG_FTRACE_STARTUP_TEST is not set
-# CONFIG_RING_BUFFER_STARTUP_TEST is not set
-# CONFIG_MMIOTRACE_TEST is not set
-# CONFIG_PREEMPTIRQ_DELAY_TEST is not set
-# CONFIG_KPROBE_EVENT_GEN_TEST is not set
-# CONFIG_PROVIDE_OHCI1394_DMA_INIT is not set
-# CONFIG_SAMPLES is not set
-CONFIG_ARCH_HAS_DEVMEM_IS_ALLOWED=y
-# CONFIG_STRICT_DEVMEM is not set
-
-#
-# x86 Debugging
-#
-CONFIG_TRACE_IRQFLAGS_SUPPORT=y
-# CONFIG_X86_VERBOSE_BOOTUP is not set
-CONFIG_EARLY_PRINTK=y
-# CONFIG_EARLY_PRINTK_DBGP is not set
-# CONFIG_EARLY_PRINTK_USB_XDBC is not set
-# CONFIG_EFI_PGT_DUMP is not set
-CONFIG_DEBUG_WX=y
-CONFIG_DOUBLEFAULT=y
-# CONFIG_DEBUG_TLBFLUSH is not set
-# CONFIG_IOMMU_DEBUG is not set
-CONFIG_HAVE_MMIOTRACE_SUPPORT=y
-# CONFIG_X86_DECODER_SELFTEST is not set
-CONFIG_IO_DELAY_0X80=y
-# CONFIG_IO_DELAY_0XED is not set
-# CONFIG_IO_DELAY_UDELAY is not set
-# CONFIG_IO_DELAY_NONE is not set
-CONFIG_DEBUG_BOOT_PARAMS=y
-# CONFIG_CPA_DEBUG is not set
-# CONFIG_DEBUG_ENTRY is not set
-# CONFIG_DEBUG_NMI_SELFTEST is not set
-# CONFIG_X86_DEBUG_FPU is not set
-# CONFIG_PUNIT_ATOM_DEBUG is not set
-CONFIG_UNWINDER_ORC=y
-# CONFIG_UNWINDER_FRAME_POINTER is not set
-# CONFIG_UNWINDER_GUESS is not set
-# end of x86 Debugging
-
-#
-# Kernel Testing and Coverage
-#
-# CONFIG_KUNIT is not set
-# CONFIG_NOTIFIER_ERROR_INJECTION is not set
-CONFIG_FUNCTION_ERROR_INJECTION=y
-# CONFIG_FAULT_INJECTION is not set
-CONFIG_ARCH_HAS_KCOV=y
-CONFIG_CC_HAS_SANCOV_TRACE_PC=y
-# CONFIG_KCOV is not set
-CONFIG_RUNTIME_TESTING_MENU=y
-CONFIG_LKDTM=m
-# CONFIG_TEST_LIST_SORT is not set
-# CONFIG_TEST_MIN_HEAP is not set
-# CONFIG_TEST_SORT is not set
-# CONFIG_KPROBES_SANITY_TEST is not set
-# CONFIG_BACKTRACE_SELF_TEST is not set
-# CONFIG_RBTREE_TEST is not set
-# CONFIG_REED_SOLOMON_TEST is not set
-# CONFIG_INTERVAL_TREE_TEST is not set
-# CONFIG_PERCPU_TEST is not set
-# CONFIG_ATOMIC64_SELFTEST is not set
-# CONFIG_ASYNC_RAID6_TEST is not set
-# CONFIG_TEST_HEXDUMP is not set
-# CONFIG_TEST_STRING_HELPERS is not set
-# CONFIG_TEST_STRSCPY is not set
-# CONFIG_TEST_KSTRTOX is not set
-# CONFIG_TEST_PRINTF is not set
-# CONFIG_TEST_BITMAP is not set
-# CONFIG_TEST_BITFIELD is not set
-# CONFIG_TEST_UUID is not set
-# CONFIG_TEST_XARRAY is not set
-# CONFIG_TEST_OVERFLOW is not set
-# CONFIG_TEST_RHASHTABLE is not set
-# CONFIG_TEST_HASH is not set
-# CONFIG_TEST_IDA is not set
-# CONFIG_TEST_PARMAN is not set
-# CONFIG_TEST_LKM is not set
-# CONFIG_TEST_VMALLOC is not set
-# CONFIG_TEST_USER_COPY is not set
-# CONFIG_TEST_BPF is not set
-# CONFIG_TEST_BLACKHOLE_DEV is not set
-# CONFIG_FIND_BIT_BENCHMARK is not set
-# CONFIG_TEST_FIRMWARE is not set
-# CONFIG_TEST_SYSCTL is not set
-# CONFIG_TEST_UDELAY is not set
-# CONFIG_TEST_STATIC_KEYS is not set
-# CONFIG_TEST_KMOD is not set
-# CONFIG_TEST_MEMCAT_P is not set
-# CONFIG_TEST_OBJAGG is not set
-# CONFIG_TEST_STACKINIT is not set
-# CONFIG_TEST_MEMINIT is not set
-# CONFIG_MEMTEST is not set
-# CONFIG_HYPERV_TESTING is not set
-# end of Kernel Testing and Coverage
-# end of Kernel hacking
diff --git a/linux57-tkg/linux57-tkg-config/generic-desktop-profile.cfg b/linux57-tkg/linux57-tkg-config/generic-desktop-profile.cfg
deleted file mode 100644
index 3750e64..0000000
--- a/linux57-tkg/linux57-tkg-config/generic-desktop-profile.cfg
+++ /dev/null
@@ -1,55 +0,0 @@
-# linux57-TkG config file
-# Generic Desktop
-
-
-#### MISC OPTIONS #### 
-
-# External config file to use - If the given file exists in path, it will override default config (customization.cfg) - Default is ~/.config/frogminer/linux50-tkg.cfg
-_EXT_CONFIG_PATH=~/.config/frogminer/linux57-tkg.cfg
-
-#### KERNEL OPTIONS ####
-
-# Name of the default config file to use from the linux???-tkg-config folder. Arch default is "config.x86_64".
-_configfile="config.x86_64"
-
-# Disable some non-module debugging - See PKGBUILD for the list
-_debugdisable="false"
-
-# LEAVE AN EMPTY VALUE TO BE PROMPTED ABOUT FOLLOWING OPTIONS AT BUILD TIME
-
-# Set to "true" to disable FUNCTION_TRACER/GRAPH_TRACER, lowering overhead but limiting debugging and analyzing of kernel functions - Kernel default is "false"
-_ftracedisable="false"
-
-# Set to "true" to disable NUMA, lowering overhead, but breaking CUDA/NvEnc on Nvidia equipped systems - Kernel default is "false"
-_numadisable="false"
-
-# Set to "true" to use explicit preemption points to lower latency at the cost of a small throughput loss - Can give a nice perf boost in VMs - Kernel default is "false"
-_voluntary_preempt="false"
-
-# A selection of patches from Zen/Liquorix kernel and additional tweaks for a better gaming experience (ZENIFY) - Default is "true"
-_zenify="true"
-
-# compiler optimization level - 1. Optimize for performance (-O2); 2. Optimize harder (-O3); 3. Optimize for size (-Os) - Kernel default is "2"
-_compileroptlevel="1"
-
-# Trust the CPU manufacturer to initialize Linux's CRNG (RANDOM_TRUST_CPU) - Kernel default is "false"
-_random_trust_cpu="false"
-
-# CPU scheduler runqueue sharing - No sharing (RQ_NONE), SMT (hyperthread) siblings (RQ_SMT), Multicore siblings (RQ_MC), Symmetric Multi-Processing (RQ_SMP), NUMA (RQ_ALL)
-# Valid values are "none", "smt", "mc", "mc-llc"(for zen), "smp", "all" - Kernel default is "mc"
-_runqueue_sharing="mc"
-
-# Timer frequency - "500", "750" or "1000" - More options available in kernel config prompt when left empty depending on selected cpusched - Kernel default is "750"
-_timer_freq="500"
-
-
-#### USER PATCHES ####
-
-# You can use your own patches by putting them in the same folder as the PKGBUILD and giving them the .mypatch extension.
-# You can also revert patches by putting them in the same folder as the PKGBUILD and giving them the .myrevert extension.
-
-# Also, userpatches variable below must be set to true for the above to work.
-_user_patches="true"
-
-# Apply all user patches without confirmation - !!! NOT RECOMMENDED !!!
-_user_patches_no_confirm="false"
diff --git a/linux57-tkg/linux57-tkg-config/prepare b/linux57-tkg/linux57-tkg-config/prepare
deleted file mode 100644
index 1350f34..0000000
--- a/linux57-tkg/linux57-tkg-config/prepare
+++ /dev/null
@@ -1,983 +0,0 @@
-#!/bin/bash
-
-_basever=57
-_basekernel=5.7
-_sub=19
-
-_tkg_initscript() {
-
-  cp "$_where"/linux"$_basever"-tkg-patches/* "$_where" # copy patches inside the PKGBUILD's dir to preserve makepkg sourcing and md5sum checking
-  cp "$_where"/linux"$_basever"-tkg-config/* "$_where" # copy config files and hooks inside the PKGBUILD's dir to preserve makepkg sourcing and md5sum checking
-
-  # Load external configuration file if present. Available variable values will overwrite customization.cfg ones.
-  if [ -e "$_EXT_CONFIG_PATH" ]; then
-    source "$_EXT_CONFIG_PATH" && msg2 "External configuration file $_EXT_CONFIG_PATH will be used to override customization.cfg values." && msg2 ""
-  fi
-
-  if [ -z "$_OPTIPROFILE" ] && [ ! -e "$_where"/cpuschedset ]; then
-    # Prompt about optimized configurations. Available variable values will overwrite customization.cfg/external config ones.
-    plain "Do you want to use a predefined optimized profile?"
-    read -rp "`echo $'  > 1.Custom\n    2.Ryzen Desktop (Performance)\n    3.Other Desktop (Performance)\nchoice[1-3?]: '`" _OPTIPROFILE;
-  fi
-  if [ "$_OPTIPROFILE" = "2" ]; then
-    source "$_where"/ryzen-desktop-profile.cfg && msg2 "Ryzen Desktop (Performance) profile will be used." && msg2 ""
-  elif [ "$_OPTIPROFILE" = "3" ]; then
-    source "$_where"/generic-desktop-profile.cfg && msg2 "Generic Desktop (Performance) profile will be used." && msg2 ""
-  fi
-
-  # source cpuschedset early if present
-  if [ -e "$_where"/cpuschedset ]; then
-    source "$_where"/cpuschedset
-  fi
-
-  # CPU SCHED selector
-  if [ -z "$_cpusched" ] && [ ! -e "$_where"/cpuschedset ]; then
-    plain "What CPU sched variant do you want to build/install?"
-    read -rp "`echo $'  > 1.PDS\n    2.MuQSS\n    3.BMQ\n    4.CFS\nchoice[1-4?]: '`" CONDITION;
-    if [ "$CONDITION" = "2" ]; then
-      echo "_cpusched=\"MuQSS\"" > "$_where"/cpuschedset
-    elif [ "$CONDITION" = "3" ]; then
-      echo "_cpusched=\"bmq\"" > "$_where"/cpuschedset
-    elif [ "$CONDITION" = "4" ]; then
-      echo "_cpusched=\"cfs\"" > "$_where"/cpuschedset
-    else
-      echo "_cpusched=\"pds\"" > "$_where"/cpuschedset
-    fi
-    if [ -n "$_custom_pkgbase" ]; then
-      echo "_custom_pkgbase=\"${_custom_pkgbase}\"" >> "$_where"/cpuschedset
-    fi
-  elif [ "$_cpusched" = "muqss" ] || [ "$_cpusched" = "MuQSS" ]; then
-    echo "_cpusched=\"MuQSS\"" > "$_where"/cpuschedset
-  elif [ "$_cpusched" = "pds" ]; then
-    echo "_cpusched=\"pds\"" > "$_where"/cpuschedset
-  elif [ "$_cpusched" = "bmq" ]; then
-    echo "_cpusched=\"bmq\"" > "$_where"/cpuschedset
-  else
-    if [ "$_nofallback" != "true" ]; then
-      warning "Something is wrong with your cpusched selection. Do you want to fallback to CFS (default)?"
-      read -rp "`echo $'    > N/y : '`" _fallback;
-    fi
-    if [[ "$_fallback" =~ [yY] ]] || [ "$_nofallback" = "true" ]; then
-      echo "_cpusched=\"cfs\"" > "$_where"/cpuschedset
-    else
-      error "Exiting..."
-      exit 1
-    fi
-  fi
-
-  source "$_where"/cpuschedset
-}
-
-user_patcher() {
-	# To patch the user because all your base are belong to us
-	local _patches=("$_where"/*."${_userpatch_ext}revert")
-	if [ ${#_patches[@]} -ge 2 ] || [ -e "${_patches}" ]; then
-	  if [ "$_user_patches_no_confirm" != "true" ]; then
-	    msg2 "Found ${#_patches[@]} 'to revert' userpatches for ${_userpatch_target}:"
-	    printf '%s\n' "${_patches[@]}"
-	    read -rp "Do you want to install it/them? - Be careful with that ;)"$'\n> N/y : ' _CONDITION;
-	  fi
-	  if [[ "$_CONDITION" =~ [yY] ]] || [ "$_user_patches_no_confirm" = "true" ]; then
-	    for _f in "${_patches[@]}"; do
-	      if [ -e "${_f}" ]; then
-	        msg2 "######################################################"
-	        msg2 ""
-	        msg2 "Reverting your own ${_userpatch_target} patch ${_f}"
-	        msg2 ""
-	        msg2 "######################################################"
-	        patch -Np1 -R < "${_f}"
-	        echo "Reverted your own patch ${_f}" >> "$_where"/last_build_config.log
-	      fi
-	    done
-	  fi
-	fi
-
-	_patches=("$_where"/*."${_userpatch_ext}patch")
-	if [ ${#_patches[@]} -ge 2 ] || [ -e "${_patches}" ]; then
-	  if [ "$_user_patches_no_confirm" != "true" ]; then
-	    msg2 "Found ${#_patches[@]} userpatches for ${_userpatch_target}:"
-	    printf '%s\n' "${_patches[@]}"
-	    read -rp "Do you want to install it/them? - Be careful with that ;)"$'\n> N/y : ' _CONDITION;
-	  fi
-	  if [[ "$_CONDITION" =~ [yY] ]] || [ "$_user_patches_no_confirm" = "true" ]; then
-	    for _f in "${_patches[@]}"; do
-	      if [ -e "${_f}" ]; then
-	        msg2 "######################################################"
-	        msg2 ""
-	        msg2 "Applying your own ${_userpatch_target} patch ${_f}"
-	        msg2 ""
-	        msg2 "######################################################"
-	        patch -Np1 < "${_f}"
-	        echo "Applied your own patch ${_f}" >> "$_where"/last_build_config.log
-	      fi
-	    done
-	  fi
-	fi
-}
-
-_tkg_srcprep() {
-
-  if [ "${_distro}" = "Arch" ]; then
-    msg2 "Setting version..."
-    scripts/setlocalversion --save-scmversion
-    echo "-$pkgrel-tkg-${_cpusched}" > localversion.10-pkgrel
-    echo "" > localversion.20-pkgname   
-
-    # add upstream patch
-    msg2 "Patching from $_basekernel to $pkgver"
-    patch -p1 -i "$srcdir"/patch-"${pkgver}"
-
-    # ARCH Patches
-    if [ "${_configfile}" = "config_hardened.x86_64" ] && [ "${_cpusched}" = "cfs" ]; then
-      msg2 "Using linux hardened patchset"
-      patch -Np1 -i "$srcdir"/0012-linux-hardened.patch
-    else
-      patch -Np1 -i "$srcdir"/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch
-    fi
-  fi
-
-  # graysky's cpu opts - https://github.com/graysky2/kernel_gcc_patch
-  msg2 "Applying graysky's cpu opts patch"
-  if [ "${_distro}" = "Arch" ]; then
-    patch -Np1 -i "$srcdir"/enable_additional_cpu_optimizations_for_gcc_v10.1%2B_kernel_v5.7%2B.patch
-  else
-    patch -Np1 -i "$srcdir"/enable_additional_cpu_optimizations_for_gcc_v10.1+_kernel_v5.7+.patch
-  fi
-
-  # TkG
-  msg2 "Applying clear linux patches"
-  patch -Np1 -i "$srcdir"/0002-clear-patches.patch
-
-  msg2 "Applying glitched base patch"
-  patch -Np1 -i "$srcdir"/0003-glitched-base.patch
-
-  if [ -z $_misc_adds ]; then
-    plain "Enable misc additions ? May contain temporary fixes pending upstream or changes that can break on non-Arch. "
-    read -rp "`echo $'    > [Y]/n : '`" _interactive_misc_adds;
-    if [ "$_interactive_misc_adds" != "n" ] && [ "$_interactive_misc_adds" != "N" ]; then
-      _misc_adds="true"
-    fi
-  fi
-
-  if [ "$_misc_adds" = "true" ]; then
-    msg2 "Applying misc additions patch"
-    patch -Np1 -i "$srcdir"/0012-misc-additions.patch
-  fi
-
-  if [ "${_cpusched}" = "MuQSS" ]; then
-    # MuQSS
-    msg2 "Applying MuQSS base patch"
-    patch -Np1 -i "$srcdir"/0004-5.7-ck1.patch
-    
-    if [ "${_aggressive_ondemand}" = "true" ]; then
-      msg2 "Applying MuQSS agressive ondemand governor patch"
-      patch -Np1 -i "$srcdir"/0004-glitched-ondemand-muqss.patch
-    fi
-
-    msg2 "Applying Glitched MuQSS patch"
-    patch -Np1 -i "$srcdir"/0004-glitched-muqss.patch
-  
-  elif [ "${_cpusched}" = "pds" ]; then
-    # PDS-mq
-    msg2 "Applying PDS base patch"
-    patch -Np1 -i "$srcdir"/0005-v5.7_undead-pds099o.patch
-
-    if [ "${_aggressive_ondemand}" = "true" ]; then
-      msg2 "Applying PDS agressive ondemand governor patch"
-      patch -Np1 -i "$srcdir"/0005-glitched-ondemand-pds.patch
-    fi
-
-    msg2 "Applying Glitched PDS patch"
-    patch -Np1 -i "$srcdir"/0005-glitched-pds.patch
-  
-  elif [ "${_cpusched}" = "bmq" ]; then
-    # Project C / BMQ
-    msg2 "Applying Project C / BMQ base patch"
-    
-    patch -Np1 -i "$srcdir"/0009-prjc_v5.7-r3.patch
-
-    if [ "${_aggressive_ondemand}" = "true" ]; then
-      msg2 "Applying BMQ agressive ondemand governor patch"
-      patch -Np1 -i "$srcdir"/0009-glitched-ondemand-bmq.patch
-    fi
-
-    msg2 "Applying Glitched BMQ patch"
-    patch -Np1 -i "$srcdir"/0009-glitched-bmq.patch
-
-  elif [ "${_cpusched}" = "cfs" ]; then
-    msg2 "Applying Glitched CFS patch"
-    patch -Np1 -i "$srcdir"/0003-glitched-cfs.patch
-  fi
-
-  if [ "${_distro}" = "Arch" ]; then
-    if [ -z "${_configfile}" ]; then
-    _configfile="config.x86_64"
-    fi
-
-    cat "${srcdir}/${_configfile}" > ./.config
-  fi 
-
-
-  # Set some -tkg defaults
-  echo "# CONFIG_DYNAMIC_FAULT is not set" >> ./.config
-  sed -i -e 's/CONFIG_DEFAULT_FQ_CODEL=y/# CONFIG_DEFAULT_FQ_CODEL is not set/' ./.config
-  echo "CONFIG_DEFAULT_CAKE=y" >> ./.config
-  echo "CONFIG_NR_TTY_DEVICES=63" >> ./.config
-  echo "# CONFIG_NTP_PPS is not set" >> ./.config
-  sed -i -e 's/CONFIG_CRYPTO_LZ4=m/CONFIG_CRYPTO_LZ4=y/' ./.config
-  sed -i -e 's/CONFIG_CRYPTO_LZ4HC=m/CONFIG_CRYPTO_LZ4HC=y/' ./.config
-  sed -i -e 's/CONFIG_LZ4_COMPRESS=m/CONFIG_LZ4_COMPRESS=y/' ./.config
-  sed -i -e 's/CONFIG_LZ4HC_COMPRESS=m/CONFIG_LZ4HC_COMPRESS=y/' ./.config
-  sed -i -e 's/CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZO=y/# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZO is not set/' ./.config
-  sed -i -e 's/# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZ4 is not set/CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZ4=y/' ./.config
-  sed -i -e 's/CONFIG_ZSWAP_COMPRESSOR_DEFAULT="lzo"/CONFIG_ZSWAP_COMPRESSOR_DEFAULT="lz4"/' ./.config
-  #sed -i -e 's/CONFIG_RCU_BOOST_DELAY=500/CONFIG_RCU_BOOST_DELAY=0/' ./.config
-  sed -i -e 's/# CONFIG_CMDLINE_BOOL is not set/CONFIG_CMDLINE_BOOL=y/' ./.config
-  echo "CONFIG_CMDLINE=\"${_custom_commandline}\"" >> ./.config
-  echo "# CONFIG_CMDLINE_OVERRIDE is not set" >> ./.config
-  echo "# CONFIG_X86_P6_NOP is not set" >> ./.config
-  if [ "$_noccache" != "true" ]; then 
-    if { [ "$_distro" = "Arch" ] && pacman -Qq ccache &> /dev/null; } || { [ "$_distro" = "Ubuntu" ] && dpkg -l ccache > /dev/null; }; then
-      sed -i -e 's/CONFIG_GCC_PLUGINS=y/# CONFIG_GCC_PLUGINS is not set/' ./.config
-    fi
-  fi
-  # Skip dbg package creation on non-Arch
-  if [ "$_distro" != "Arch" ]; then
-    sed -i -e 's/CONFIG_DEBUG_INFO.*/CONFIG_DEBUG_INFO=n/' ./.config
-  fi
-
-  if [ "$_font_autoselect" != "false" ]; then
-    sed -i -e 's/CONFIG_FONT_TER16x32=y/# CONFIG_FONT_TER16x32 is not set\nCONFIG_FONT_AUTOSELECT=y/' ./.config
-  fi
-
-  # Inject cpuopts options
-  echo "# CONFIG_MK8SSE3 is not set" >> ./.config
-  echo "# CONFIG_MK10 is not set" >> ./.config
-  echo "# CONFIG_MBARCELONA is not set" >> ./.config
-  echo "# CONFIG_MBOBCAT is not set" >> ./.config
-  echo "# CONFIG_MJAGUAR is not set" >> ./.config
-  echo "# CONFIG_MBULLDOZER is not set" >> ./.config
-  echo "# CONFIG_MPILEDRIVER is not set" >> ./.config
-  echo "# CONFIG_MSTEAMROLLER is not set" >> ./.config
-  echo "# CONFIG_MEXCAVATOR is not set" >> ./.config
-  echo "# CONFIG_MZEN is not set" >> ./.config
-  echo "# CONFIG_MZEN2 is not set" >> ./.config
-  echo "# CONFIG_MATOM is not set" >> ./.config
-  echo "# CONFIG_MNEHALEM is not set" >> ./.config
-  echo "# CONFIG_MWESTMERE is not set" >> ./.config
-  echo "# CONFIG_MSILVERMONT is not set" >> ./.config
-  echo "# CONFIG_MSANDYBRIDGE is not set" >> ./.config
-  echo "# CONFIG_MIVYBRIDGE is not set" >> ./.config
-  echo "# CONFIG_MHASWELL is not set" >> ./.config
-  echo "# CONFIG_MBROADWELL is not set" >> ./.config
-  echo "# CONFIG_MSKYLAKE is not set" >> ./.config
-  echo "# CONFIG_MSKYLAKEX is not set" >> ./.config
-  echo "# CONFIG_MCANNONLAKE is not set" >> ./.config
-  echo "# CONFIG_MICELAKE is not set" >> ./.config
-  echo "# CONFIG_MGOLDMONT is not set" >> ./.config
-  echo "# CONFIG_MGOLDMONTPLUS is not set" >> ./.config
-  echo "# CONFIG_MCASCADELAKE is not set" >> ./.config
-  echo "# CONFIG_MCOOPERLAKE is not set" >> ./.config
-  echo "# CONFIG_MTIGERLAKE is not set" >> ./.config
-
-  # Disable some debugging
-  if [ "${_debugdisable}" = "true" ]; then
-    sed -i -e 's/CONFIG_SLUB_DEBUG=y/# CONFIG_SLUB_DEBUG is not set/' ./.config
-    sed -i -e 's/CONFIG_PM_DEBUG=y/# CONFIG_PM_DEBUG is not set/' ./.config
-    sed -i -e 's/CONFIG_PM_ADVANCED_DEBUG=y/# CONFIG_PM_ADVANCED_DEBUG is not set/' ./.config
-    sed -i -e 's/CONFIG_PM_SLEEP_DEBUG=y/# CONFIG_PM_SLEEP_DEBUG is not set/' ./.config
-    sed -i -e 's/CONFIG_ACPI_DEBUG=y/# CONFIG_ACPI_DEBUG is not set/' ./.config
-    sed -i -e 's/CONFIG_SCHED_DEBUG=y/# CONFIG_SCHED_DEBUG is not set/' ./.config
-    sed -i -e 's/CONFIG_LATENCYTOP=y/# CONFIG_LATENCYTOP is not set/' ./.config
-    sed -i -e 's/CONFIG_DEBUG_PREEMPT=y/# CONFIG_DEBUG_PREEMPT is not set/' ./.config
-  fi
-
-  if [ "${_cpusched}" = "MuQSS" ]; then
-    # MuQSS default config
-    echo "CONFIG_SCHED_MUQSS=y" >> ./.config
-  elif [ "${_cpusched}" = "pds" ]; then
-    # PDS default config
-    echo "CONFIG_SCHED_PDS=y" >> ./.config
-  elif [ "${_cpusched}" = "bmq" ]; then
-    # BMQ default config
-    echo "CONFIG_SCHED_ALT=y" >> ./.config
-  fi
-
-  if [ "${_cpusched}" = "MuQSS" ] || [ "${_cpusched}" = "pds" ] || [ "${_cpusched}" = "bmq" ]; then
-    # Disable CFS
-    sed -i -e 's/CONFIG_FAIR_GROUP_SCHED=y/# CONFIG_FAIR_GROUP_SCHED is not set/' ./.config
-    sed -i -e 's/CONFIG_CFS_BANDWIDTH=y/# CONFIG_CFS_BANDWIDTH is not set/' ./.config
-    # sched yield type
-    if [ -n "$_sched_yield_type" ]; then
-      CONDITION0="$_sched_yield_type"
-    else
-      plain ""
-      plain "CPU sched_yield_type - Choose what sort of yield sched_yield will perform."
-      plain ""
-      plain "For PDS and MuQSS:"
-      plain "0: No yield."
-      plain "1: Yield only to better priority/deadline tasks."
-      plain "2: Expire timeslice and recalculate deadline."
-      plain ""
-      plain "For BMQ (experimental) - No recommended value yet, so try for yourself x) :"
-      plain "0: No yield."
-      plain "1: Deboost and requeue task. (default)"
-      plain "2: Set rq skip task."
-      if [ "${_cpusched}" = "MuQSS" ]; then
-        read -rp "`echo $'\n      0. Supposedly best option for gaming performance - could lead to stability issues on some (AMD) platforms when combined with MuQSS\n    > 1. Default and recommended option for MuQSS - could lead to stability issues on some (Intel) platforms\n      2. Can be a good option with low rr_interval on MuQSS\n    [0-2?]: '`" CONDITION0;
-      else
-        read -rp "`echo $'\n    > 0. Recommended option for gaming on PDS - "tkg" default\n      1. Default, but can lead to stability issues on some platforms\n      2. Can be a good option with low rr_interval on MuQSS\n    [0-2?]: '`" CONDITION0;
-      fi
-    fi
-    if [ "$CONDITION0" = "0" ]; then
-      if [ "${_cpusched}" = "bmq" ]; then
-        sed -i -e 's/int sched_yield_type __read_mostly = 1;/int sched_yield_type __read_mostly = 0;/' ./kernel/sched/alt_core.c
-      else
-        sed -i -e 's/int sched_yield_type __read_mostly = 1;/int sched_yield_type __read_mostly = 0;/' ./kernel/sched/"${_cpusched}".c
-      fi
-    elif [ "$CONDITION0" = "1" ]; then
-      msg2 "Using default CPU sched yield type (1)"
-    elif [ "$CONDITION0" = "2" ]; then
-      if [ "${_cpusched}" = "bmq" ]; then
-        sed -i -e 's/int sched_yield_type __read_mostly = 1;/int sched_yield_type __read_mostly = 2;/' ./kernel/sched/alt_core.c
-      else
-        sed -i -e 's/int sched_yield_type __read_mostly = 1;/int sched_yield_type __read_mostly = 2;/' ./kernel/sched/"${_cpusched}".c
-      fi
-    else
-      if [ "${_cpusched}" = "MuQSS" ]; then
-        msg2 "Using default CPU sched yield type (1)"
-      elif [ "${_cpusched}" = "bmq" ]; then
-        sed -i -e 's/int sched_yield_type __read_mostly = 1;/int sched_yield_type __read_mostly = 0;/' ./kernel/sched/alt_core.c
-      else
-        sed -i -e 's/int sched_yield_type __read_mostly = 1;/int sched_yield_type __read_mostly = 0;/' ./kernel/sched/"${_cpusched}".c
-      fi
-    fi
-  fi
-
-  # Round Robin interval
-  if [ "${_cpusched}" = "MuQSS" ] || [ "${_cpusched}" = "pds" ] || [ "${_cpusched}" = "bmq" ]; then
-    if [ -n "$_rr_interval" ]; then
-      CONDITION1="$_rr_interval"
-    else
-      plain ""
-      plain "Round Robin interval is the longest duration two tasks with the same nice level will"
-      plain "be delayed for. When CPU time is requested by a task, it receives a time slice equal"
-      plain "to the rr_interval in addition to a virtual deadline. When using yield_type 2, a low"
-      plain "value can help offset the disadvantages of rescheduling a process that has yielded."
-      plain ""
-      plain "MuQSS default: 6ms"
-      plain "PDS default: 4ms"
-      plain "BMQ default: 2ms"
-      read -rp "`echo $'\n    > 0.Keep defaults\n      1.2ms\n      2.4ms\n      3.6ms\n      4.8ms\n    [0-4?]: '`" CONDITION1;
-    fi
-    if [ "$CONDITION1" = "1" ]; then
-      msg2 "Using 2ms rr_interval"
-      _rrvalue="2"
-    elif [ "$CONDITION1" = "2" ]; then
-      msg2 "Using 4ms rr_interval"
-      _rrvalue="4"
-    elif [ "$CONDITION1" = "3" ]; then
-      msg2 "Using 6ms rr_interval"
-      _rrvalue="6"
-    elif [ "$CONDITION1" = "4" ]; then
-      msg2 "Using 8ms rr_interval"
-      _rrvalue="8"
-    else
-      msg2 "Using default rr_interval"
-      _rrvalue="default"
-    fi
-    if [ "$_rrvalue" != "default" ]; then
-      if [ "${_cpusched}" = "MuQSS" ]; then
-        sed -i -e "s/int rr_interval __read_mostly = 6;/int rr_interval __read_mostly = ${_rrvalue};/" ./kernel/sched/"${_cpusched}".c
-      elif [ "${_cpusched}" = "pds" ]; then
-        sed -i -e "s/#define SCHED_DEFAULT_RR (4)/#define SCHED_DEFAULT_RR (${_rrvalue})/" ./kernel/sched/"${_cpusched}".c
-      elif [ "${_cpusched}" = "bmq" ]; then
-        sed -i -e "s/u64 sched_timeslice_ns __read_mostly = (4 * 1000 * 1000);/u64 sched_timeslice_ns __read_mostly = (${_rrvalue} * 1000 * 1000);/" ./kernel/sched/alt_core.c
-      fi
-    else
-      if [ "${_cpusched}" = "bmq" ]; then
-        sed -i -e "s/u64 sched_timeslice_ns __read_mostly = (4 * 1000 * 1000);/u64 sched_timeslice_ns __read_mostly = (2 * 1000 * 1000);/" ./kernel/sched/alt_core.c
-      fi
-    fi
-  fi
-
-  # zenify
-  if [ "$_zenify" = "true" ]; then
-    echo "CONFIG_ZENIFY=y" >> ./.config
-  elif [ "$_zenify" = "false" ]; then
-    echo "# CONFIG_ZENIFY is not set" >> ./.config
-  fi
-
-  # compiler optimization level
-  if [ "$_compileroptlevel" = "1" ]; then
-    echo "# CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3 is not set" >> ./.config
-  elif [ "$_compileroptlevel" = "2" ]; then
-    sed -i -e 's/CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y/# CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE is not set/' ./.config
-    echo "CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y" >> ./.config
-  elif [ "$_compileroptlevel" = "3" ]; then
-    sed -i -e 's/CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y/# CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE is not set/' ./.config
-    sed -i -e 's/# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set/CONFIG_CC_OPTIMIZE_FOR_SIZE=y/' ./.config
-    echo "# CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3 is not set" >> ./.config
-  fi
-
-  # cpu opt
-  if [ -n "$_processor_opt" ] && [ "$_processor_opt" != "native" ]; then
-    echo "# CONFIG_MNATIVE is not set" >> ./.config
-  fi
-
-  if [ -n "$_processor_opt" ] && [ "$_processor_opt" != "generic" ]; then
-    sed -i -e 's/CONFIG_GENERIC_CPU=y/# CONFIG_GENERIC_CPU is not set/' ./.config
-  fi
-
-  if [ "$_processor_opt" = "native" ]; then
-    echo "CONFIG_MNATIVE=y" >> ./.config
-  elif [ "$_processor_opt" = "k8" ]; then
-    sed -i -e 's/# CONFIG_MK8 is not set/CONFIG_MK8=y/' ./.config
-  elif [ "$_processor_opt" = "k8sse3" ]; then
-    sed -i -e 's/# CONFIG_MK8SSE3 is not set/CONFIG_MK8SSE3=y/' ./.config
-  elif [ "$_processor_opt" = "k10" ]; then
-    sed -i -e 's/# CONFIG_MK10 is not set/CONFIG_MK10=y/' ./.config
-  elif [ "$_processor_opt" = "barcelona" ]; then
-    sed -i -e 's/# CONFIG_MBARCELONA is not set/CONFIG_MBARCELONA=y/' ./.config
-  elif [ "$_processor_opt" = "bobcat" ]; then
-    sed -i -e 's/# CONFIG_MBOBCAT is not set/CONFIG_MBOBCAT=y/' ./.config
-  elif [ "$_processor_opt" = "jaguar" ]; then
-    sed -i -e 's/# CONFIG_MJAGUAR is not set/CONFIG_MJAGUAR=y/' ./.config
-  elif [ "$_processor_opt" = "bulldozer" ]; then
-    sed -i -e 's/# CONFIG_MBULLDOZER is not set/CONFIG_MBULLDOZER=y/' ./.config
-  elif [ "$_processor_opt" = "piledriver" ]; then
-    sed -i -e 's/# CONFIG_MPILEDRIVER is not set/CONFIG_MPILEDRIVER=y/' ./.config
-  elif [ "$_processor_opt" = "steamroller" ]; then
-    sed -i -e 's/# CONFIG_MSTEAMROLLER is not set/CONFIG_MSTEAMROLLER=y/' ./.config
-  elif [ "$_processor_opt" = "excavator" ]; then
-    sed -i -e 's/# CONFIG_MEXCAVATOR is not set/CONFIG_MEXCAVATOR=y/' ./.config
-  elif [ "$_processor_opt" = "zen" ]; then
-    sed -i -e 's/# CONFIG_MZEN is not set/CONFIG_MZEN=y/' ./.config
-  elif [ "$_processor_opt" = "zen2" ]; then
-    sed -i -e 's/# CONFIG_MZEN2 is not set/CONFIG_MZEN2=y/' ./.config
-  elif [ "$_processor_opt" = "mpsc" ]; then
-    sed -i -e 's/# CONFIG_MPSC is not set/CONFIG_MPSC=y/' ./.config
-  elif [ "$_processor_opt" = "atom" ]; then
-    sed -i -e 's/# CONFIG_MATOM is not set/CONFIG_MATOM=y/' ./.config
-  elif [ "$_processor_opt" = "core2" ]; then
-    sed -i -e 's/# CONFIG_MCORE2 is not set/CONFIG_MCORE2=y/' ./.config
-  elif [ "$_processor_opt" = "nehalem" ]; then
-    sed -i -e 's/# CONFIG_MNEHALEM is not set/CONFIG_MNEHALEM=y/' ./.config
-  elif [ "$_processor_opt" = "westmere" ]; then
-    sed -i -e 's/# CONFIG_MWESTMERE is not set/CONFIG_MWESTMERE=y/' ./.config
-  elif [ "$_processor_opt" = "silvermont" ]; then
-    sed -i -e 's/# CONFIG_MSILVERMONT is not set/CONFIG_MSILVERMONT=y/' ./.config
-  elif [ "$_processor_opt" = "sandybridge" ]; then
-    sed -i -e 's/# CONFIG_MSANDYBRIDGE is not set/CONFIG_MSANDYBRIDGE=y/' ./.config
-  elif [ "$_processor_opt" = "ivybridge" ]; then
-    sed -i -e 's/# CONFIG_MIVYBRIDGE is not set/CONFIG_MIVYBRIDGE=y/' ./.config
-  elif [ "$_processor_opt" = "haswell" ]; then
-    sed -i -e 's/# CONFIG_MHASWELL is not set/CONFIG_MHASWELL=y/' ./.config
-  elif [ "$_processor_opt" = "broadwell" ]; then
-    sed -i -e 's/# CONFIG_MBROADWELL is not set/CONFIG_MBROADWELL=y/' ./.config
-  elif [ "$_processor_opt" = "skylake" ]; then
-    sed -i -e 's/# CONFIG_MSKYLAKE is not set/CONFIG_MSKYLAKE=y/' ./.config
-  elif [ "$_processor_opt" = "skylakex" ]; then
-    sed -i -e 's/# CONFIG_MSKYLAKEX is not set/CONFIG_MSKYLAKEX=y/' ./.config
-  elif [ "$_processor_opt" = "cannonlake" ]; then
-    sed -i -e 's/# CONFIG_MCANNONLAKE is not set/CONFIG_MCANNONLAKE=y/' ./.config
-  elif [ "$_processor_opt" = "icelake" ]; then
-    sed -i -e 's/# CONFIG_MICELAKE is not set/CONFIG_MICELAKE=y/' ./.config
-  elif [ "$_processor_opt" = "goldmont" ]; then
-    sed -i -e 's/# CONFIG_MGOLDMONT is not set/CONFIG_MGOLDMONT=y/' ./.config
-  elif [ "$_processor_opt" = "goldmontplus" ]; then
-    sed -i -e 's/# CONFIG_MGOLDMONTPLUS is not set/CONFIG_MGOLDMONTPLUS=y/' ./.config
-  elif [ "$_processor_opt" = "cascadelake" ]; then
-    sed -i -e 's/# CONFIG_MCASCADELAKE is not set/CONFIG_MCASCADELAKE=y/' ./.config
-  elif [ "$_processor_opt" = "cooperlake" ]; then
-    sed -i -e 's/# CONFIG_MCOOPERLAKE is not set/CONFIG_MCOOPERLAKE=y/' ./.config
-  elif [ "$_processor_opt" = "tigerlake" ]; then
-    sed -i -e 's/# CONFIG_MTIGERLAKE is not set/CONFIG_MTIGERLAKE=y/' ./.config
-  fi
-
-  # irq threading
-  if [ "$_irq_threading" = "true" ]; then
-    echo "CONFIG_FORCE_IRQ_THREADING=y" >> ./.config
-  elif [ "$_irq_threading" = "false" ]; then
-    echo "# CONFIG_FORCE_IRQ_THREADING is not set" >> ./.config
-  fi
-
-  # smt nice
-  if [ "$_smt_nice" = "true" ]; then
-    echo "CONFIG_SMT_NICE=y" >> ./.config
-  elif [ "$_smt_nice" = "false" ]; then
-    echo "# CONFIG_SMT_NICE is not set" >> ./.config
-  fi
-
-  # random trust cpu
-  if [ "$_random_trust_cpu" = "true" ]; then
-    sed -i -e 's/# CONFIG_RANDOM_TRUST_CPU is not set/CONFIG_RANDOM_TRUST_CPU=y/' ./.config
-  fi
-
-  # rq sharing
-  if [ "$_runqueue_sharing" = "none" ]; then
-    echo -e "CONFIG_RQ_NONE=y\n# CONFIG_RQ_SMT is not set\n# CONFIG_RQ_MC is not set\n# CONFIG_RQ_MC_LLC is not set\n# CONFIG_RQ_SMP is not set\n# CONFIG_RQ_ALL is not set" >> ./.config
-  elif [ -z "$_runqueue_sharing" ] || [ "$_runqueue_sharing" = "smt" ]; then
-    echo -e "# CONFIG_RQ_NONE is not set\nCONFIG_RQ_SMT=y\n# CONFIG_RQ_MC is not set\n# CONFIG_RQ_MC_LLC is not set\n# CONFIG_RQ_SMP is not set\n# CONFIG_RQ_ALL is not set" >> ./.config
-  elif [ "$_runqueue_sharing" = "mc" ]; then
-    echo -e "# CONFIG_RQ_NONE is not set\n# CONFIG_RQ_SMT is not set\nCONFIG_RQ_MC=y\n# CONFIG_RQ_MC_LLC is not set\n# CONFIG_RQ_SMP is not set\n# CONFIG_RQ_ALL is not set" >> ./.config
-  elif [ "$_runqueue_sharing" = "smp" ]; then
-    echo -e "# CONFIG_RQ_NONE is not set\n# CONFIG_RQ_SMT is not set\n# CONFIG_RQ_MC is not set\n# CONFIG_RQ_MC_LLC is not set\nCONFIG_RQ_SMP=y\n# CONFIG_RQ_ALL is not set" >> ./.config
-  elif [ "$_runqueue_sharing" = "all" ]; then
-    echo -e "# CONFIG_RQ_NONE is not set\n# CONFIG_RQ_SMT is not set\n# CONFIG_RQ_MC is not set\n# CONFIG_RQ_MC_LLC is not set\n# CONFIG_RQ_SMP is not set\nCONFIG_RQ_ALL=y" >> ./.config
-  elif [ "$_runqueue_sharing" = "mc-llc" ]; then
-    echo -e "# CONFIG_RQ_NONE is not set\n# CONFIG_RQ_SMT is not set\n# CONFIG_RQ_MC is not set\nCONFIG_RQ_MC_LLC=y\n# CONFIG_RQ_SMP is not set\n# CONFIG_RQ_ALL is not set" >> ./.config
-  fi
-
-  # timer freq
-  if [ -n "$_timer_freq" ] && [ "$_timer_freq" != "300" ]; then
-    sed -i -e 's/CONFIG_HZ_300=y/# CONFIG_HZ_300 is not set/' ./.config
-    sed -i -e 's/CONFIG_HZ_300_NODEF=y/# CONFIG_HZ_300_NODEF is not set/' ./.config
-    if [ "$_timer_freq" = "1000" ]; then
-      sed -i -e 's/# CONFIG_HZ_1000 is not set/CONFIG_HZ_1000=y/' ./.config
-      sed -i -e 's/CONFIG_HZ=300/CONFIG_HZ=1000/' ./.config
-      echo "# CONFIG_HZ_500 is not set" >> ./.config
-      echo "# CONFIG_HZ_500_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_750 is not set" >> ./.config
-      echo "# CONFIG_HZ_750_NODEF is not set" >> ./.config
-      echo "CONFIG_HZ_1000_NODEF=y" >> ./.config
-      echo "# CONFIG_HZ_250_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_300_NODEF is not set" >> ./.config
-    elif [ "$_timer_freq" = "750" ]; then
-      sed -i -e 's/CONFIG_HZ=300/CONFIG_HZ=750/' ./.config
-      echo "# CONFIG_HZ_500 is not set" >> ./.config
-      echo "# CONFIG_HZ_500_NODEF is not set" >> ./.config
-      echo "CONFIG_HZ_750=y" >> ./.config
-      echo "CONFIG_HZ_750_NODEF=y" >> ./.config
-      echo "# CONFIG_HZ_1000_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_250_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_300_NODEF is not set" >> ./.config
-    elif [ "$_timer_freq" = "500" ]; then
-      sed -i -e 's/CONFIG_HZ=300/CONFIG_HZ=500/' ./.config
-      echo "CONFIG_HZ_500=y" >> ./.config
-      echo "CONFIG_HZ_500_NODEF=y" >> ./.config
-      echo "# CONFIG_HZ_750 is not set" >> ./.config
-      echo "# CONFIG_HZ_750_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_1000_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_250_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_300_NODEF is not set" >> ./.config
-    elif [ "$_timer_freq" = "100" ]; then
-      sed -i -e 's/CONFIG_HZ=300/CONFIG_HZ=100/' ./.config
-      echo "# CONFIG_HZ_500 is not set" >> ./.config
-      echo "# CONFIG_HZ_750 is not set" >> ./.config
-      echo "# CONFIG_HZ_1000_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_750_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_500_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_250_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_300_NODEF is not set" >> ./.config
-      echo "CONFIG_HZ_100=y" >> ./.config
-      echo "CONFIG_HZ_100_NODEF=y" >> ./.config
-    fi
-  elif [ "${_cpusched}" = "MuQSS" ] && [ -z "$_timer_freq" ]; then
-      sed -i -e 's/CONFIG_HZ=300/CONFIG_HZ=100/' ./.config
-      echo "# CONFIG_HZ_500 is not set" >> ./.config
-      echo "# CONFIG_HZ_750 is not set" >> ./.config
-      echo "# CONFIG_HZ_1000_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_750_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_500_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_250_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_300_NODEF is not set" >> ./.config
-      echo "CONFIG_HZ_100=y" >> ./.config
-      echo "CONFIG_HZ_100_NODEF=y" >> ./.config
-  else
-    sed -i -e 's/CONFIG_HZ_300=y/# CONFIG_HZ_300 is not set/' ./.config
-    sed -i -e 's/CONFIG_HZ_300_NODEF=y/# CONFIG_HZ_300_NODEF is not set/' ./.config
-    sed -i -e 's/CONFIG_HZ=300/CONFIG_HZ=500/' ./.config
-    echo "CONFIG_HZ_500=y" >> ./.config
-    echo "CONFIG_HZ_500_NODEF=y" >> ./.config
-    echo "# CONFIG_HZ_250_NODEF is not set" >> ./.config
-    echo "# CONFIG_HZ_300_NODEF is not set" >> ./.config
-  fi
-
-  # default cpu gov
-  if [ "$_default_cpu_gov" = "performance" ]; then
-    sed -i -e 's/CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL=y/# CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL is not set/' ./.config
-    sed -i -e 's/# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set/CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y/' ./.config
-  elif [ "$_default_cpu_gov" = "ondemand" ]; then
-    sed -i -e 's/CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL=y/# CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL is not set/' ./.config
-    sed -i -e 's/# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set/CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y/' ./.config
-  fi
-
-  # ACPI_CPUFREQ disablement
-  if [ "$_disable_acpi_cpufreq" = "true" ]; then
-    sed -i -e 's/CONFIG_X86_ACPI_CPUFREQ=m/# CONFIG_X86_ACPI_CPUFREQ is not set/' ./.config
-  fi
-
-  # ftrace
-  if [ -z "$_ftracedisable" ]; then
-    plain ""
-    plain "Disable FUNCTION_TRACER/GRAPH_TRACER? Lowers overhead but limits debugging"
-    plain "and analyzing of kernel functions."
-    read -rp "`echo $'    > N/y : '`" CONDITION2;
-  fi
-  if [[ "$CONDITION2" =~ [yY] ]] || [ "$_ftracedisable" = "true" ]; then
-    sed -i -e 's/CONFIG_FUNCTION_TRACER=y/# CONFIG_FUNCTION_TRACER is not set/' ./.config
-    sed -i -e 's/CONFIG_FUNCTION_GRAPH_TRACER=y/# CONFIG_FUNCTION_GRAPH_TRACER is not set/' ./.config
-  fi
-
-  # disable numa
-  if [ -z "$_numadisable" ]; then
-    plain ""
-    plain "Disable NUMA? Lowers overhead, but breaks CUDA/NvEnc on Nvidia if disabled."
-    plain "https://bbs.archlinux.org/viewtopic.php?id=239174"
-    read -rp "`echo $'    > N/y : '`" CONDITION3;
-  fi
-  if [[ "$CONDITION3" =~ [yY] ]] || [ "$_numadisable" = "true" ]; then
-    # disable NUMA since 99.9% of users do not have multiple CPUs but do have multiple cores in one CPU
-    sed -i -e 's/CONFIG_NUMA=y/# CONFIG_NUMA is not set/' \
-        -i -e '/CONFIG_AMD_NUMA=y/d' \
-        -i -e '/CONFIG_X86_64_ACPI_NUMA=y/d' \
-        -i -e '/CONFIG_NODES_SPAN_OTHER_NODES=y/d' \
-        -i -e '/# CONFIG_NUMA_EMU is not set/d' \
-        -i -e '/CONFIG_NODES_SHIFT=6/d' \
-        -i -e '/CONFIG_NEED_MULTIPLE_NODES=y/d' \
-        -i -e '/CONFIG_USE_PERCPU_NUMA_NODE_ID=y/d' \
-        -i -e '/CONFIG_ACPI_NUMA=y/d' ./.config
-  fi
-
-  # tickless
-  if [ -z "$_tickless" ]; then
-    plain ""
-    plain "Use CattaRappa mode (Tickless/Dynticks) ?"
-    plain "Can give higher performances in many cases but lower consistency on some hardware."
-    plain "Just tickless idle can perform better with some platforms (mostly AMD) or CPU schedulers (mostly MuQSS)."
-    if [ "${_cpusched}" = "MuQSS" ]; then
-      read -rp "`echo $'\n      0.No, use periodic ticks\n      1.Yes, full tickless baby!\n    > 2.Just tickless idle plz\n    [0-2?]: '`" CONDITION4;
-    else
-      read -rp "`echo $'\n      0.No, use periodic ticks\n    > 1.Yes, full tickless baby!\n      2.Just tickless idle plz\n    [0-2?]: '`" CONDITION4;
-    fi
-  fi
-  if [ "$CONDITION4" = "0" ] || [ "$_tickless" = "0" ]; then
-    echo "# CONFIG_NO_HZ_FULL_NODEF is not set" >> ./.config
-    sed -i -e 's/# CONFIG_HZ_PERIODIC is not set/CONFIG_HZ_PERIODIC=y/' ./.config
-    sed -i -e 's/CONFIG_NO_HZ_IDLE=y/# CONFIG_NO_HZ_IDLE is not set/' ./.config
-    sed -i -e 's/CONFIG_NO_HZ_FULL=y/# CONFIG_NO_HZ_FULL is not set/' ./.config
-    sed -i -e 's/CONFIG_NO_HZ=y/# CONFIG_NO_HZ is not set/' ./.config
-    sed -i -e 's/CONFIG_NO_HZ_COMMON=y/# CONFIG_NO_HZ_COMMON is not set/' ./.config
-  elif [ "$CONDITION4" = "2" ] || [ "$_tickless" = "2" ]; then
-    echo "# CONFIG_NO_HZ_FULL_NODEF is not set" >> ./.config
-    sed -i -e 's/CONFIG_HZ_PERIODIC=y/# CONFIG_HZ_PERIODIC is not set/' ./.config
-    sed -i -e 's/# CONFIG_NO_HZ_IDLE is not set/CONFIG_NO_HZ_IDLE=y/' ./.config
-    sed -i -e 's/CONFIG_NO_HZ_FULL=y/# CONFIG_NO_HZ_FULL is not set/' ./.config
-    sed -i -e 's/# CONFIG_NO_HZ is not set/CONFIG_NO_HZ=y/' ./.config
-    sed -i -e 's/# CONFIG_NO_HZ_COMMON is not set/CONFIG_NO_HZ_COMMON=y/' ./.config
-  else
-    if [ "${_cpusched}" = "MuQSS" ]; then
-      echo "# CONFIG_NO_HZ_FULL_NODEF is not set" >> ./.config
-      sed -i -e 's/CONFIG_HZ_PERIODIC=y/# CONFIG_HZ_PERIODIC is not set/' ./.config
-      sed -i -e 's/# CONFIG_NO_HZ_IDLE is not set/CONFIG_NO_HZ_IDLE=y/' ./.config
-      sed -i -e 's/CONFIG_NO_HZ_FULL=y/# CONFIG_NO_HZ_FULL is not set/' ./.config
-      sed -i -e 's/# CONFIG_NO_HZ is not set/CONFIG_NO_HZ=y/' ./.config
-      sed -i -e 's/# CONFIG_NO_HZ_COMMON is not set/CONFIG_NO_HZ_COMMON=y/' ./.config
-    else
-      echo "CONFIG_NO_HZ_FULL_NODEF=y" >> ./.config
-      sed -i -e 's/CONFIG_HZ_PERIODIC=y/# CONFIG_HZ_PERIODIC is not set/' ./.config
-      sed -i -e 's/CONFIG_NO_HZ_IDLE=y/# CONFIG_NO_HZ_IDLE is not set/' ./.config
-      sed -i -e 's/# CONFIG_NO_HZ_FULL is not set/CONFIG_NO_HZ_FULL=y/' ./.config
-      sed -i -e 's/# CONFIG_NO_HZ is not set/CONFIG_NO_HZ=y/' ./.config
-      sed -i -e 's/# CONFIG_NO_HZ_COMMON is not set/CONFIG_NO_HZ_COMMON=y/' ./.config
-      echo "CONFIG_CONTEXT_TRACKING=y" >> ./.config
-      echo "# CONFIG_CONTEXT_TRACKING_FORCE is not set" >> ./.config
-    fi
-  fi
-
-  # voluntary preempt
-  if [ -z "$_voluntary_preempt" ]; then
-    plain ""
-    plain "Use explicit preemption points?"
-    plain "It can improve latency on PDS (at the cost of throughput)"
-    plain "and improve throughput on other schedulers (at the cost of latency)"
-    read -rp "`echo $'    > N/y : '`" CONDITION5;
-  fi
-  if [[ "$CONDITION5" =~ [yY] ]] || [ "$_voluntary_preempt" = "true" ]; then
-    sed -i -e 's/CONFIG_PREEMPT=y/# CONFIG_PREEMPT is not set/' ./.config
-    sed -i -e 's/CONFIG_PREEMPT_LL=y/# CONFIG_PREEMPT_LL is not set/' ./.config
-    sed -i -e 's/# CONFIG_PREEMPT_VOLUNTARY is not set/CONFIG_PREEMPT_VOLUNTARY=y/' ./.config
-  fi
-
-  # Open Firmware support
-  if [ -z "$_OFenable" ]; then
-    plain ""
-    plain "Enable Device Tree and Open Firmware support?"
-    read -rp "`echo $'    > N/y : '`" CONDITION6;
-  fi
-  if [[ "$CONDITION6" =~ [yY] ]] || [ "$_OFenable" = "true" ]; then
-    sed -i -e 's/# CONFIG_OF is not set/CONFIG_OF=y/' ./.config
-  fi
-
-  # acs override
-  if [ -z "$_acs_override" ]; then
-    plain ""
-    plain "Use ACS override patch?"
-    plain "https://wiki.archlinux.org/index.php/PCI_passthrough_via_OVMF#Bypassing_the_IOMMU_groups_.28ACS_override_patch.29"
-    read -rp "`echo $'    > N/y : '`" CONDITION7;
-  fi
-  if [[ "$CONDITION7" =~ [yY] ]] || [ "$_acs_override" = "true" ]; then
-    msg2 "Patching ACS override"
-    patch -Np1 -i "$srcdir"/0006-add-acs-overrides_iommu.patch
-  fi
-
-  # bcachefs
-  if [ -z "$_bcachefs" ]; then
-     plain ""
-     plain "Add Bcache filesystem support? You'll have to install bcachefs-tools-git from AUR for utilities."
-     plain "https://bcachefs.org/"
-     read -rp "`echo $'    > N/y : '`" CONDITION8;
-   fi
-   if [[ "$CONDITION8" =~ [yY] ]] || [ "$_bcachefs" = "true" ]; then
-     msg2 "Patching Bcache filesystem support override"
-     patch -Np1 -i "$srcdir"/0008-5.7-bcachefs.patch
-
-     echo "CONFIG_BCACHEFS_FS=m" >> ./.config
-     echo "CONFIG_BCACHEFS_QUOTA=y" >> ./.config
-     echo "CONFIG_BCACHEFS_POSIX_ACL=y" >> ./.config
-     echo "# CONFIG_BCACHEFS_DEBUG is not set" >> ./.config
-     echo "# CONFIG_BCACHEFS_TESTS is not set" >> ./.config
-     echo "# CONFIG_DEBUG_CLOSURES is not set" >> ./.config
-   fi
-
-  # fsync support
-  if [ -z "$_fsync" ]; then
-    plain ""
-    plain "Enable support for fsync, an experimental replacement for esync in Valve Proton 4.11+"
-    plain "https://steamcommunity.com/games/221410/announcements/detail/2957094910196249305"
-    read -rp "`echo $'    > N/y : '`" CONDITION9;
-  fi
-  if [[ "$CONDITION9" =~ [yY] ]] || [ "$_fsync" = "true" ]; then
-    msg2 "Patching Fsync support"
-    patch -Np1 -i "$srcdir"/0007-v5.7-fsync.patch
-  fi
-
-  # ZFS fix
-  if [ -z "$_zfsfix" ]; then
-    plain ""
-    plain "Add back missing symbol for AES-NI/AVX support on ZFS"
-    plain "https://github.com/NixOS/nixpkgs/blob/master/pkgs/os-specific/linux/kernel/export_kernel_fpu_functions_5_3.patch"
-    read -rp "`echo $'    > N/y : '`" CONDITION11;
-  fi
-  if [[ "$CONDITION11" =~ [yY] ]] || [ "$_zfsfix" = "true" ]; then
-    msg2 "Patching missing symbol for AES-NI/AVX support on ZFS"
-    patch -Np1 -i "$srcdir"/0011-ZFS-fix.patch
-  fi
-
-  # Community patches
-  if [ -n "$_community_patches" ]; then
-    if [ ! -d "$_where/../../community-patches" ]; then
-      cd "$_where/../.." && git clone https://github.com/Frogging-Family/community-patches.git && cd "${srcdir}/linux-${_basekernel}"
-    fi
-    _community_patches=($_community_patches)
-    for _p in ${_community_patches[@]}; do
-      ln -s "$_where"/../../community-patches/linux"$_basever"-tkg/$_p "$_where"/
-    done
-  fi
-
-  # userpatches
-  if [ "$_user_patches" = "true" ]; then
-    _userpatch_target="linux-${_basekernel}"
-    _userpatch_ext="my"
-    user_patcher
-  fi
-
-  # Community patches removal
-  for _p in ${_community_patches[@]}; do
-    rm -f "$_where"/$_p
-  done
-
-  if [ "$_distro" = "Arch" ]; then
-    # don't run depmod on 'make install'. We'll do this ourselves in packaging
-    sed -i '2iexit 0' scripts/depmod.sh
-
-    # get kernel version
-    make prepare
-  fi
- 
-  # modprobed-db
-  if [ -z "$_modprobeddb" ]; then
-    plain ""
-    plain "Use modprobed db to clean config from unneeded modules?"
-    plain "Speeds up compilation considerably. Requires root."
-    plain "https://wiki.archlinux.org/index.php/Modprobed-db"
-    plain "!!!! Make sure to have a well populated db !!!!"
-    read -rp "`echo $'    > N/y : '`" CONDITIONMPDB;
-  fi
-  if [[ "$CONDITIONMPDB" =~ [yY] ]] || [ "$_modprobeddb" = "true" ]; then
-    sudo modprobed-db recall
-    yes "" | make localmodconfig
-  fi
-
-  if [ true = "$_config_fragments" ]; then
-    local fragments=()
-    mapfile -d '' -t fragments < <(find "$_where"/ -type f -name "*.myfrag" -print0)
-
-    if [ true = "$_config_fragments_no_confirm" ]; then
-      printf 'Using config fragment %s\n' "${fragments[@]#$_where/}"
-    else
-      for i in "${!fragments[@]}"; do
-        while true; do
-          read -r -p 'Found config fragment '"${fragments[$i]#$_where/}"', apply it? [y/N] ' CONDITIONMPDB
-          CONDITIONMPDB="$(printf '%s' "$CONDITIONMPDB" | tr '[:upper:]' '[:lower:]')"
-          case "$CONDITIONMPDB" in
-            y|yes)
-              break;;
-            n|no|'')
-              unset fragments[$i]
-              break;;
-            *)
-              echo 'Please answer with yes or no'
-          esac
-        done
-      done
-    fi
-
-    if [ 0 -lt "${#fragments[@]}" ]; then
-      scripts/kconfig/merge_config.sh -m .config "${fragments[@]}"
-    fi
-  fi
-
-  # menuconfig / nconfig
-  if [ -z "$_menunconfig" ]; then
-    plain ""
-    plain "*Optional* For advanced users - Do you want to use make menuconfig or nconfig"
-    plain "to configure the kernel before building it?"
-    plain "If you do, make sure your terminal is currently"
-    plain "at least 19 lines by 80 columns large or you'll get an error :D"
-    read -rp "`echo $'    > 0. nope\n      1. menuconfig\n      2. nconfig\n      3. xconfig\n      choice[0-3?]: '`" CONDITIONMNC;
-    _menunconfig="$CONDITIONMNC"
-  fi
-  if [ 1 = "$_menunconfig" ]; then
-    cp .config .config.orig
-    make menuconfig
-  elif [ 2 = "$_menunconfig" ]; then
-    cp .config .config.orig
-    make nconfig
-  elif [ 3 = "$_menunconfig" ]; then
-    cp .config .config.orig
-    make xconfig
-  else
-    # rewrite configuration
-    yes "" | make config >/dev/null
-  fi
-  if [ 1 = "$_menunconfig" ] || [ 2 = "$_menunconfig" ] || [ 3 = "$_menunconfig" ]; then
-    if [ -z "${_diffconfig}" ]; then
-      while true; do
-        read -r -p 'Generate a config fragment from your changes? [y/N] ' CONDITIONF
-        CONDITIONF="$(printf '%s' "$CONDITIONF" | tr '[:upper:]' '[:lower:]')"
-        case "$CONDITIONF" in
-          y|yes)
-            _diffconfig=true
-            break;;
-          n|no|'')
-            _diffconfig=false
-            break;;
-          *)
-            echo 'Please answer with yes or no'
-        esac
-      done
-    fi
-    if [ true = "$_diffconfig" ]; then
-      if [ -z "$_diffconfig_name" ]; then
-        IFS= read -r -p 'Filename for the config fragment [leave empty to not generate fragment]: ' _diffconfig_name
-      fi
-      if [ -z "$_diffconfig_name" ]; then
-        echo 'No file name given, not generating config fragment.'
-      else (
-        prev_pwd="${PWD:-$(pwd)}"
-        cd "$_where"
-        "${prev_pwd}/scripts/diffconfig" -m "${prev_pwd}/.config.orig" "${prev_pwd}/.config" > "$_diffconfig_name"
-      ) fi
-    fi
-    rm .config.orig
-  fi
-
-  if [ "$_distro" = "Arch" ]; then
-    make -s kernelrelease > version
-    msg2 "Prepared %s version %s" "$pkgbase" "$(<version)"
-  fi
-}
-
-exit_cleanup() {
-  # Remove state tracker
-  rm -f "$_where"/cpuschedset
-
-  # Remove temporarily copied files
-  rm -rf "$_where"/*.patch
-  rm -rf "$_where"/*-profile.cfg
-  rm -f "$_where"/config*
-  rm -f "$_where"/*.hook
-  rm -f "$_where"/cleanup
-  rm -f "$_where"/prepare
-
-  # Community patches removal in case of failure
-  for _p in ${_community_patches[@]}; do
-    rm -f "$_where"/"$_p"
-  done
-
-  if [ "${_distro}" = "Arch" ]; then
-    if [ "$_NUKR" = "true" ] && [ "$_where" != "$srcdir" ]; then
-      rm -rf "$_where"/src/*
-      # Double tap
-      rm -rf "$srcdir"/linux-*
-      rm -rf "$srcdir"/*.xz
-      rm -rf "$srcdir"/*.patch
-      rm -rf "$srcdir"/*-profile.cfg
-      rm -f "$srcdir"/config.x86_64
-      rm -f "$srcdir"/customization.cfg
-    else
-      # Meh
-      rm -rf "$srcdir"/linux-${_basekernel}/Documentation/filesystems/aufs/*
-      rm -f "$srcdir"/linux-${_basekernel}/Documentation/ABI/testing/*-aufs
-      rm -rf "$srcdir"/linux-${_basekernel}/fs/aufs/*
-      rm -f "$srcdir"/linux-${_basekernel}/include/uapi/linux/aufs*
-
-      rm -f "$srcdir"/linux-${_basekernel}/mm/prfile.c
-
-      rm -f "$srcdir"/linux-${_basekernel}/block/bfq*
-
-      rm -rf "$srcdir"/linux-${_basekernel}/drivers/scsi/vhba/*
-
-      rm -rf "$srcdir"/linux-${_basekernel}/fs/exfat/*
-      rm -f "$srcdir"/linux-${_basekernel}/include/trace/events/fs.h
-
-      rm -f "$srcdir"/linux-${_basekernel}/Documentation/scheduler/sched-PDS-mq.txt
-      rm -f "$srcdir"/linux-${_basekernel}/include/linux/skip_list.h
-      rm -f "$srcdir"/linux-${_basekernel}/kernel/sched/pds.c
-      rm -f "$srcdir"/linux-${_basekernel}/kernel/sched/pds_sched.h
-
-      rm -f "$srcdir"/linux-${_basekernel}/Documentation/scheduler/sched-BMQ.txt
-      rm -f "$srcdir"/linux-${_basekernel}/kernel/sched/alt_core.c
-      rm -f "$srcdir"/linux-${_basekernel}/kernel/sched/sched/alt_debug.c
-      rm -f "$srcdir"/linux-${_basekernel}/kernel/sched/alt_sched.h
-
-      rm -f "$srcdir"/linux-${_basekernel}/Documentation/scheduler/sched-BFS.txt
-      rm -f "$srcdir"/linux-${_basekernel}/Documentation/scheduler/sched-MuQSS.txt
-      rm -rf "$srcdir"/linux-${_basekernel}/arch/blackfin/*
-      rm -f "$srcdir"/linux-${_basekernel}/arch/powerpc/configs/c2k_defconfig
-      rm -f "$srcdir"/linux-${_basekernel}/arch/score/configs/spct6600_defconfig
-      rm -f "$srcdir"/linux-${_basekernel}/arch/tile/configs/tilegx_defconfig
-      rm -f "$srcdir"/linux-${_basekernel}/arch/tile/configs/tilepro_defconfig
-      rm -f "$srcdir"/linux-${_basekernel}/drivers/staging/lustre/lnet/lnet/lib-eq.c
-      rm -f "$srcdir"/linux-${_basekernel}/kernel/sched/MuQSS*
-      rm -f "$srcdir"/linux-${_basekernel}/kernel/skip_list.c
-
-      rm -f "$srcdir"/linux-${_basekernel}/Documentation/vm/uksm.txt
-      rm -f "$srcdir"/linux-${_basekernel}/include/linux/sradix-tree.h
-      rm -f "$srcdir"/linux-${_basekernel}/include/linux/uksm.h
-      rm -f "$srcdir"/linux-${_basekernel}/lib/sradix-tree.c
-      rm -f "$srcdir"/linux-${_basekernel}/mm/uksm.c
-    fi
-
-    remove_deps
-  fi
-
-  msg2 'exit cleanup done\n'
-  if [ -n "$_runtime" ]; then
-    msg2 "compilation time : \n$_runtime"
-  fi
-}
-
-trap exit_cleanup EXIT
diff --git a/linux57-tkg/linux57-tkg-config/ryzen-desktop-profile.cfg b/linux57-tkg/linux57-tkg-config/ryzen-desktop-profile.cfg
deleted file mode 100644
index 13aadb5..0000000
--- a/linux57-tkg/linux57-tkg-config/ryzen-desktop-profile.cfg
+++ /dev/null
@@ -1,58 +0,0 @@
-# linux57-TkG config file
-# Ryzen Desktop
-
-
-#### MISC OPTIONS #### 
-
-# External config file to use - If the given file exists in path, it will override default config (customization.cfg) - Default is ~/.config/frogminer/linux52-tkg.cfg
-_EXT_CONFIG_PATH=~/.config/frogminer/linux57-tkg.cfg
-
-#### KERNEL OPTIONS ####
-
-# Name of the default config file to use from the linux???-tkg-config folder. Arch default is "config.x86_64".
-_configfile="config.x86_64"
-
-# Disable some non-module debugging - See PKGBUILD for the list
-_debugdisable="false"
-
-# LEAVE AN EMPTY VALUE TO BE PROMPTED ABOUT FOLLOWING OPTIONS AT BUILD TIME
-
-# Set to "true" to disable FUNCTION_TRACER/GRAPH_TRACER, lowering overhead but limiting debugging and analyzing of kernel functions - Kernel default is "false"
-_ftracedisable="false"
-
-# Set to "true" to disable NUMA, lowering overhead, but breaking CUDA/NvEnc on Nvidia equipped systems - Kernel default is "false"
-_numadisable="false"
-
-# Set to "true" to use explicit preemption points to lower latency at the cost of a small throughput loss - Can give a nice perf boost in VMs - Kernel default is "false"
-_voluntary_preempt="false"
-
-# A selection of patches from Zen/Liquorix kernel and additional tweaks for a better gaming experience (ZENIFY) - Default is "true"
-_zenify="true"
-
-# compiler optimization level - 1. Optimize for performance (-O2); 2. Optimize harder (-O3); 3. Optimize for size (-Os) - Kernel default is "2"
-_compileroptlevel="1"
-
-# Trust the CPU manufacturer to initialize Linux's CRNG (RANDOM_TRUST_CPU) - Kernel default is "false"
-_random_trust_cpu="false"
-
-# CPU scheduler runqueue sharing - No sharing (RQ_NONE), SMT (hyperthread) siblings (RQ_SMT), Multicore siblings (RQ_MC), Symmetric Multi-Processing (RQ_SMP), NUMA (RQ_ALL)
-# Valid values are "none", "smt", "mc", "mc-llc"(for zen), "smp", "all" - Kernel default is "mc"
-_runqueue_sharing="mc-llc"
-
-# Timer frequency - "500", "750" or "1000" - More options available in kernel config prompt when left empty depending on selected cpusched - Kernel default is "500"
-_timer_freq="500"
-
-# Default CPU governor - "performance", "ondemand" (tweaked), "schedutil" or leave empty for default (schedutil on AMD and legacy Intel, intel_pstate on modern Intel) - Enforcing an option will disable intel_pstate altogether!
-_default_cpu_gov="performance"
-
-
-#### USER PATCHES ####
-
-# You can use your own patches by putting them in the same folder as the PKGBUILD and giving them the .mypatch extension.
-# You can also revert patches by putting them in the same folder as the PKGBUILD and giving them the .myrevert extension.
-
-# Also, userpatches variable below must be set to true for the above to work.
-_user_patches="true"
-
-# Apply all user patches without confirmation - !!! NOT RECOMMENDED !!!
-_user_patches_no_confirm="false"
diff --git a/linux57-tkg/linux57-tkg-patches/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch b/linux57-tkg/linux57-tkg-patches/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch
deleted file mode 100644
index 3cef558..0000000
--- a/linux57-tkg/linux57-tkg-patches/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch
+++ /dev/null
@@ -1,156 +0,0 @@
-From 5ec2dd3a095442ec1a21d86042a4994f2ba24e63 Mon Sep 17 00:00:00 2001
-Message-Id: <5ec2dd3a095442ec1a21d86042a4994f2ba24e63.1512651251.git.jan.steffens@gmail.com>
-From: Serge Hallyn <serge.hallyn@canonical.com>
-Date: Fri, 31 May 2013 19:12:12 +0100
-Subject: [PATCH] add sysctl to disallow unprivileged CLONE_NEWUSER by default
-
-Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com>
-[bwh: Remove unneeded binary sysctl bits]
-Signed-off-by: Daniel Micay <danielmicay@gmail.com>
----
- kernel/fork.c           | 15 +++++++++++++++
- kernel/sysctl.c         | 12 ++++++++++++
- kernel/user_namespace.c |  3 +++
- 3 files changed, 30 insertions(+)
-
-diff --git a/kernel/fork.c b/kernel/fork.c
-index 07cc743698d3668e..4011d68a8ff9305c 100644
---- a/kernel/fork.c
-+++ b/kernel/fork.c
-@@ -102,6 +102,11 @@
- 
- #define CREATE_TRACE_POINTS
- #include <trace/events/task.h>
-+#ifdef CONFIG_USER_NS
-+extern int unprivileged_userns_clone;
-+#else
-+#define unprivileged_userns_clone 0
-+#endif
- 
- /*
-  * Minimum number of threads to boot the kernel
-@@ -1555,6 +1560,10 @@ static __latent_entropy struct task_struct *copy_process(
- 	if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
- 		return ERR_PTR(-EINVAL);
- 
-+	if ((clone_flags & CLONE_NEWUSER) && !unprivileged_userns_clone)
-+		if (!capable(CAP_SYS_ADMIN))
-+			return ERR_PTR(-EPERM);
-+
- 	/*
- 	 * Thread groups must share signals as well, and detached threads
- 	 * can only be started up within the thread group.
-@@ -2348,6 +2357,12 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
- 	if (unshare_flags & CLONE_NEWNS)
- 		unshare_flags |= CLONE_FS;
- 
-+	if ((unshare_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) {
-+		err = -EPERM;
-+		if (!capable(CAP_SYS_ADMIN))
-+			goto bad_unshare_out;
-+	}
-+
- 	err = check_unshare_flags(unshare_flags);
- 	if (err)
- 		goto bad_unshare_out;
-diff --git a/kernel/sysctl.c b/kernel/sysctl.c
-index b86520ed3fb60fbf..f7dab3760839f1a1 100644
---- a/kernel/sysctl.c
-+++ b/kernel/sysctl.c
-@@ -105,6 +105,9 @@ extern int core_uses_pid;
- extern char core_pattern[];
- extern unsigned int core_pipe_limit;
- #endif
-+#ifdef CONFIG_USER_NS
-+extern int unprivileged_userns_clone;
-+#endif
- extern int pid_max;
- extern int pid_max_min, pid_max_max;
- extern int percpu_pagelist_fraction;
-@@ -513,6 +516,15 @@ static struct ctl_table kern_table[] = {
- 		.proc_handler	= proc_dointvec,
- 	},
- #endif
-+#ifdef CONFIG_USER_NS
-+	{
-+		.procname	= "unprivileged_userns_clone",
-+		.data		= &unprivileged_userns_clone,
-+		.maxlen		= sizeof(int),
-+		.mode		= 0644,
-+		.proc_handler	= proc_dointvec,
-+	},
-+#endif
- #ifdef CONFIG_PROC_SYSCTL
- 	{
- 		.procname	= "tainted",
-diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
-index c490f1e4313b998a..dd03bd39d7bf194d 100644
---- a/kernel/user_namespace.c
-+++ b/kernel/user_namespace.c
-@@ -24,6 +24,9 @@
- #include <linux/projid.h>
- #include <linux/fs_struct.h>
- 
-+/* sysctl */
-+int unprivileged_userns_clone;
-+
- static struct kmem_cache *user_ns_cachep __read_mostly;
- static DEFINE_MUTEX(userns_state_mutex);
- 
--- 
-2.15.1
-
-From b5202296055dd333db4425120d3f93ef4e6a0573 Mon Sep 17 00:00:00 2001
-From: "Jan Alexander Steffens (heftig)" <jan.steffens@gmail.com>
-Date: Thu, 7 Dec 2017 13:50:48 +0100
-Subject: ZEN: Add CONFIG for unprivileged_userns_clone
-
-This way our default behavior continues to match the vanilla kernel.
----
- init/Kconfig            | 16 ++++++++++++++++
- kernel/user_namespace.c |  4 ++++
- 2 files changed, 20 insertions(+)
-
-diff --git a/init/Kconfig b/init/Kconfig
-index 4592bf7997c0..f3df02990aff 100644
---- a/init/Kconfig
-+++ b/init/Kconfig
-@@ -1004,6 +1004,22 @@ config USER_NS
- 
- 	  If unsure, say N.
- 
-+config USER_NS_UNPRIVILEGED
-+	bool "Allow unprivileged users to create namespaces"
-+	default y
-+	depends on USER_NS
-+	help
-+	  When disabled, unprivileged users will not be able to create
-+	  new namespaces. Allowing users to create their own namespaces
-+	  has been part of several recent local privilege escalation
-+	  exploits, so if you need user namespaces but are
-+	  paranoid^Wsecurity-conscious you want to disable this.
-+
-+	  This setting can be overridden at runtime via the
-+	  kernel.unprivileged_userns_clone sysctl.
-+
-+	  If unsure, say Y.
-+
- config PID_NS
- 	bool "PID Namespaces"
- 	default y
-diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
-index 6b9dbc257e34..107b17f0d528 100644
---- a/kernel/user_namespace.c
-+++ b/kernel/user_namespace.c
-@@ -27,7 +27,11 @@
- #include <linux/sort.h>
- 
- /* sysctl */
-+#ifdef CONFIG_USER_NS_UNPRIVILEGED
-+int unprivileged_userns_clone = 1;
-+#else
- int unprivileged_userns_clone;
-+#endif
- 
- static struct kmem_cache *user_ns_cachep __read_mostly;
- static DEFINE_MUTEX(userns_state_mutex);
diff --git a/linux57-tkg/linux57-tkg-patches/0002-clear-patches.patch b/linux57-tkg/linux57-tkg-patches/0002-clear-patches.patch
deleted file mode 100644
index a7c9d4a..0000000
--- a/linux57-tkg/linux57-tkg-patches/0002-clear-patches.patch
+++ /dev/null
@@ -1,354 +0,0 @@
-From 2ac70785613ef4c6b16414986bb18bd7b60d2a13 Mon Sep 17 00:00:00 2001
-From: Arjan van de Ven <arjan@linux.intel.com>
-Date: Mon, 14 Mar 2016 11:10:58 -0600
-Subject: [PATCH] pci pme wakeups
-
-Reduce wakeups for PME checks, which are a workaround for miswired
-boards (sadly, too many of them) in laptops.
----
- drivers/pci/pci.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
-index c25acace7d91..0ddebdad9f5b 100644
---- a/drivers/pci/pci.c
-+++ b/drivers/pci/pci.c
-@@ -61,7 +61,7 @@ struct pci_pme_device {
- 	struct pci_dev *dev;
- };
- 
--#define PME_TIMEOUT 1000 /* How long between PME checks */
-+#define PME_TIMEOUT 4000 /* How long between PME checks */
- 
- static void pci_dev_d3_sleep(struct pci_dev *dev)
- {
--- 
-2.20.1
-
-From 7e7e36c67aa71d6a1ec5676d99d37c1fea389ceb Mon Sep 17 00:00:00 2001
-From: Arjan van de Ven <arjan@linux.intel.com>
-Date: Sat, 19 Mar 2016 21:32:19 -0400
-Subject: [PATCH] intel_idle: tweak cpuidle cstates
-
-Increase target_residency in cpuidle cstate
-
-Tune intel_idle to be a bit less agressive;
-Clear linux is cleaner in hygiene (wakupes) than the average linux,
-so we can afford changing these in a way that increases
-performance while keeping power efficiency
----
- drivers/idle/intel_idle.c | 44 +++++++++++++++++++--------------------
- 1 file changed, 22 insertions(+), 22 deletions(-)
-
-diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
-index 8b5d85c91e9d..5e2d813a048d 100644
---- a/drivers/idle/intel_idle.c
-+++ b/drivers/idle/intel_idle.c
-@@ -466,7 +466,7 @@ static struct cpuidle_state hsw_cstates[] = {
- 		.desc = "MWAIT 0x01",
- 		.flags = MWAIT2flg(0x01),
- 		.exit_latency = 10,
--		.target_residency = 20,
-+		.target_residency = 120,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -474,7 +474,7 @@ static struct cpuidle_state hsw_cstates[] = {
- 		.desc = "MWAIT 0x10",
- 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 33,
--		.target_residency = 100,
-+		.target_residency = 900,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -482,7 +482,7 @@ static struct cpuidle_state hsw_cstates[] = {
- 		.desc = "MWAIT 0x20",
- 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 133,
--		.target_residency = 400,
-+		.target_residency = 1000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -490,7 +490,7 @@ static struct cpuidle_state hsw_cstates[] = {
- 		.desc = "MWAIT 0x32",
- 		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 166,
--		.target_residency = 500,
-+		.target_residency = 1500,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -498,7 +498,7 @@ static struct cpuidle_state hsw_cstates[] = {
- 		.desc = "MWAIT 0x40",
- 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 300,
--		.target_residency = 900,
-+		.target_residency = 2000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -506,7 +506,7 @@ static struct cpuidle_state hsw_cstates[] = {
- 		.desc = "MWAIT 0x50",
- 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 600,
--		.target_residency = 1800,
-+		.target_residency = 5000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -514,7 +514,7 @@ static struct cpuidle_state hsw_cstates[] = {
- 		.desc = "MWAIT 0x60",
- 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 2600,
--		.target_residency = 7700,
-+		.target_residency = 9000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -534,7 +534,7 @@ static struct cpuidle_state bdw_cstates[] = {
- 		.desc = "MWAIT 0x01",
- 		.flags = MWAIT2flg(0x01),
- 		.exit_latency = 10,
--		.target_residency = 20,
-+		.target_residency = 120,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -542,7 +542,7 @@ static struct cpuidle_state bdw_cstates[] = {
- 		.desc = "MWAIT 0x10",
- 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 40,
--		.target_residency = 100,
-+		.target_residency = 1000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -550,7 +550,7 @@ static struct cpuidle_state bdw_cstates[] = {
- 		.desc = "MWAIT 0x20",
- 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 133,
--		.target_residency = 400,
-+		.target_residency = 1000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -558,7 +558,7 @@ static struct cpuidle_state bdw_cstates[] = {
- 		.desc = "MWAIT 0x32",
- 		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 166,
--		.target_residency = 500,
-+		.target_residency = 2000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -566,7 +566,7 @@ static struct cpuidle_state bdw_cstates[] = {
- 		.desc = "MWAIT 0x40",
- 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 300,
--		.target_residency = 900,
-+		.target_residency = 4000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -574,7 +574,7 @@ static struct cpuidle_state bdw_cstates[] = {
- 		.desc = "MWAIT 0x50",
- 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 600,
--		.target_residency = 1800,
-+		.target_residency = 7000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -582,7 +582,7 @@ static struct cpuidle_state bdw_cstates[] = {
- 		.desc = "MWAIT 0x60",
- 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 2600,
--		.target_residency = 7700,
-+		.target_residency = 9000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -603,7 +603,7 @@ static struct cpuidle_state skl_cstates[] = {
- 		.desc = "MWAIT 0x01",
- 		.flags = MWAIT2flg(0x01),
- 		.exit_latency = 10,
--		.target_residency = 20,
-+		.target_residency = 120,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -611,7 +611,7 @@ static struct cpuidle_state skl_cstates[] = {
- 		.desc = "MWAIT 0x10",
- 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 70,
--		.target_residency = 100,
-+		.target_residency = 1000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -619,7 +619,7 @@ static struct cpuidle_state skl_cstates[] = {
- 		.desc = "MWAIT 0x20",
- 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 85,
--		.target_residency = 200,
-+		.target_residency = 600,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -627,7 +627,7 @@ static struct cpuidle_state skl_cstates[] = {
- 		.desc = "MWAIT 0x33",
- 		.flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 124,
--		.target_residency = 800,
-+		.target_residency = 3000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -635,7 +635,7 @@ static struct cpuidle_state skl_cstates[] = {
- 		.desc = "MWAIT 0x40",
- 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 200,
--		.target_residency = 800,
-+		.target_residency = 3200,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -643,7 +643,7 @@ static struct cpuidle_state skl_cstates[] = {
- 		.desc = "MWAIT 0x50",
- 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 480,
--		.target_residency = 5000,
-+		.target_residency = 9000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -651,7 +651,7 @@ static struct cpuidle_state skl_cstates[] = {
- 		.desc = "MWAIT 0x60",
- 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 890,
--		.target_residency = 5000,
-+		.target_residency = 9000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -672,7 +672,7 @@ static struct cpuidle_state skx_cstates[] = {
- 		.desc = "MWAIT 0x01",
- 		.flags = MWAIT2flg(0x01),
- 		.exit_latency = 10,
--		.target_residency = 20,
-+		.target_residency = 300,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
--- 
-2.20.1
-
-From b8211d4f79dd88dfc2d4bd52be46103ea0b70e3e Mon Sep 17 00:00:00 2001
-From: Arjan van de Ven <arjan@linux.intel.com>
-Date: Fri, 6 Jan 2017 15:34:09 +0000
-Subject: [PATCH] ipv4/tcp: allow the memory tuning for tcp to go a little
- bigger than default
-
----
- net/ipv4/tcp.c | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
-index cf3c5095c10e..b30d51837b2d 100644
---- a/net/ipv4/tcp.c
-+++ b/net/ipv4/tcp.c
-@@ -3897,8 +3897,8 @@ void __init tcp_init(void)
- 	tcp_init_mem();
- 	/* Set per-socket limits to no more than 1/128 the pressure threshold */
- 	limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7);
--	max_wshare = min(4UL*1024*1024, limit);
--	max_rshare = min(6UL*1024*1024, limit);
-+	max_wshare = min(16UL*1024*1024, limit);
-+	max_rshare = min(16UL*1024*1024, limit);
- 
- 	init_net.ipv4.sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
- 	init_net.ipv4.sysctl_tcp_wmem[1] = 16*1024;
--- 
-2.20.1
-
-From 050223869257b87e22636158a80da38d877248ed Mon Sep 17 00:00:00 2001
-From: Arjan van de Ven <arjan@linux.intel.com>
-Date: Sun, 18 Feb 2018 23:35:41 +0000
-Subject: [PATCH] locking: rwsem: spin faster
-
-tweak rwsem owner spinning a bit
----
- kernel/locking/rwsem.c | 4 +++-
- 1 file changed, 3 insertions(+), 1 deletion(-)
-
-diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
-index eef04551eae7..1ec5ab4c8ff7 100644
---- a/kernel/locking/rwsem.c
-+++ b/kernel/locking/rwsem.c
-@@ -720,6 +720,7 @@ rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable)
- 	struct task_struct *new, *owner;
- 	unsigned long flags, new_flags;
- 	enum owner_state state;
-+	int i = 0;
- 
- 	owner = rwsem_owner_flags(sem, &flags);
- 	state = rwsem_owner_state(owner, flags, nonspinnable);
-@@ -753,7 +754,8 @@ rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable)
- 			break;
- 		}
- 
--		cpu_relax();
-+		if (i++ > 1000)
-+			cpu_relax();
- 	}
- 	rcu_read_unlock();
- 
-From b836ea320114643d4354b43acb6ec8bb06ada487 Mon Sep 17 00:00:00 2001
-From: Arjan van de Ven <arjan@linux.intel.com>
-Date: Thu, 2 Jun 2016 23:36:32 -0500
-Subject: [PATCH] drivers: Initialize ata before graphics
-
-ATA init is the long pole in the boot process, and its asynchronous.
-move the graphics init after it so that ata and graphics initialize
-in parallel
----
- drivers/Makefile | 15 ++++++++-------
- 1 file changed, 8 insertions(+), 7 deletions(-)
-
-diff --git a/drivers/Makefile b/drivers/Makefile
-index aaef17cc6512..d08f3a394929 100644
---- a/drivers/Makefile
-+++ b/drivers/Makefile
-@@ -58,15 +58,8 @@ obj-y				+= char/
- # iommu/ comes before gpu as gpu are using iommu controllers
- obj-y				+= iommu/
- 
--# gpu/ comes after char for AGP vs DRM startup and after iommu
--obj-y				+= gpu/
--
- obj-$(CONFIG_CONNECTOR)		+= connector/
- 
--# i810fb and intelfb depend on char/agp/
--obj-$(CONFIG_FB_I810)           += video/fbdev/i810/
--obj-$(CONFIG_FB_INTEL)          += video/fbdev/intelfb/
--
- obj-$(CONFIG_PARPORT)		+= parport/
- obj-$(CONFIG_NVM)		+= lightnvm/
- obj-y				+= base/ block/ misc/ mfd/ nfc/
-@@ -79,6 +72,14 @@ obj-$(CONFIG_IDE)		+= ide/
- obj-y				+= scsi/
- obj-y				+= nvme/
- obj-$(CONFIG_ATA)		+= ata/
-+
-+# gpu/ comes after char for AGP vs DRM startup and after iommu
-+obj-y				+= gpu/
-+
-+# i810fb and intelfb depend on char/agp/
-+obj-$(CONFIG_FB_I810)           += video/fbdev/i810/
-+obj-$(CONFIG_FB_INTEL)          += video/fbdev/intelfb/
-+
- obj-$(CONFIG_TARGET_CORE)	+= target/
- obj-$(CONFIG_MTD)		+= mtd/
- obj-$(CONFIG_SPI)		+= spi/
diff --git a/linux57-tkg/linux57-tkg-patches/0003-glitched-base.patch b/linux57-tkg/linux57-tkg-patches/0003-glitched-base.patch
deleted file mode 100644
index 0cd2ef0..0000000
--- a/linux57-tkg/linux57-tkg-patches/0003-glitched-base.patch
+++ /dev/null
@@ -1,545 +0,0 @@
-From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
-From: Tk-Glitch <ti3nou@gmail.com>
-Date: Wed, 4 Jul 2018 04:30:08 +0200
-Subject: glitched
-
-diff --git a/scripts/mkcompile_h b/scripts/mkcompile_h
-index 87f1fc9..b3be470 100755
---- a/scripts/mkcompile_h
-+++ b/scripts/mkcompile_h
-@@ -50,8 +50,8 @@ else
- fi
- 
- UTS_VERSION="#$VERSION"
--CONFIG_FLAGS=""
--if [ -n "$SMP" ] ; then CONFIG_FLAGS="SMP"; fi
-+CONFIG_FLAGS="TKG"
-+if [ -n "$SMP" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS SMP"; fi
- if [ -n "$PREEMPT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT"; fi
- UTS_VERSION="$UTS_VERSION $CONFIG_FLAGS $TIMESTAMP"
-
-diff --git a/fs/dcache.c b/fs/dcache.c
-index 2acfc69878f5..3f1131431e06 100644
---- a/fs/dcache.c
-+++ b/fs/dcache.c
-@@ -69,7 +69,7 @@
-  * If no ancestor relationship:
-  * arbitrary, since it's serialized on rename_lock
-  */
--int sysctl_vfs_cache_pressure __read_mostly = 100;
-+int sysctl_vfs_cache_pressure __read_mostly = 50;
- EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
- 
- __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
-diff --git a/kernel/sched/core.c b/kernel/sched/core.c
-index 211890edf37e..37121563407d 100644
---- a/kernel/sched/core.c
-+++ b/kernel/sched/core.c
-@@ -41,7 +41,7 @@ const_debug unsigned int sysctl_sched_features =
-  * Number of tasks to iterate in a single balance run.
-  * Limited because this is done with IRQs disabled.
-  */
--const_debug unsigned int sysctl_sched_nr_migrate = 32;
-+const_debug unsigned int sysctl_sched_nr_migrate = 128;
- 
- /*
-  * period over which we average the RT time consumption, measured
-@@ -61,9 +61,9 @@ __read_mostly int scheduler_running;
- 
- /*
-  * part of the period that we allow rt tasks to run in us.
-- * default: 0.95s
-+ * XanMod default: 0.98s
-  */
--int sysctl_sched_rt_runtime = 950000;
-+int sysctl_sched_rt_runtime = 980000;
- 
- /*
-  * __task_rq_lock - lock the rq @p resides on.
-diff --git a/scripts/setlocalversion b/scripts/setlocalversion
-index 71f39410691b..288f9679e883 100755
---- a/scripts/setlocalversion
-+++ b/scripts/setlocalversion
-@@ -54,7 +54,7 @@ scm_version()
- 			# If only the short version is requested, don't bother
- 			# running further git commands
- 			if $short; then
--				echo "+"
-+			#	echo "+"
- 				return
- 			fi
- 			# If we are past a tagged commit (like
-
-From f85ed068b4d0e6c31edce8574a95757a60e58b87 Mon Sep 17 00:00:00 2001
-From: Etienne Juvigny <Ti3noU@gmail.com>
-Date: Mon, 3 Sep 2018 17:36:25 +0200
-Subject: Zenify & stuff
-
-
-diff --git a/init/Kconfig b/init/Kconfig
-index b4daad2bac23..c1e59dc04209 100644
---- a/init/Kconfig
-+++ b/init/Kconfig
-@@ -1244,7 +1244,6 @@ config CC_OPTIMIZE_FOR_PERFORMANCE
- 
- config CC_OPTIMIZE_FOR_PERFORMANCE_O3
- 	bool "Optimize more for performance (-O3)"
--	depends on ARC
- 	help
- 	  Choosing this option will pass "-O3" to your compiler to optimize
- 	  the kernel yet more for performance.
-diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
-index 4f32c4062fb6..c0bf039e1b40 100644
---- a/drivers/infiniband/core/addr.c
-+++ b/drivers/infiniband/core/addr.c
-@@ -721,6 +721,7 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
- 		struct sockaddr     _sockaddr;
- 		struct sockaddr_in  _sockaddr_in;
- 		struct sockaddr_in6 _sockaddr_in6;
-+		struct sockaddr_ib  _sockaddr_ib;
- 	} sgid_addr, dgid_addr;
- 	int ret;
- 
-diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
-index 79226ca8f80f..2a30060e7e1d 100644
---- a/include/linux/blkdev.h
-+++ b/include/linux/blkdev.h
-@@ -47,7 +47,11 @@ struct blk_queue_stats;
- struct blk_stat_callback;
- 
- #define BLKDEV_MIN_RQ	4
-+#ifdef CONFIG_ZENIFY
-+#define BLKDEV_MAX_RQ	512
-+#else
- #define BLKDEV_MAX_RQ	128	/* Default maximum */
-+#endif
- 
- /* Must be consistent with blk_mq_poll_stats_bkt() */
- #define BLK_MQ_POLL_STATS_BKTS 16
-diff --git a/init/Kconfig b/init/Kconfig
-index 041f3a022122..5ed70eb1ad3a 100644
---- a/init/Kconfig
-+++ b/init/Kconfig
-@@ -45,6 +45,38 @@ config THREAD_INFO_IN_TASK
- 
- menu "General setup"
- 
-+config ZENIFY
-+	bool "A selection of patches from Zen/Liquorix kernel and additional tweaks for a better gaming experience"
-+	default y
-+	help
-+	  Tunes the kernel for responsiveness at the cost of throughput and power usage.
-+
-+	  --- Virtual Memory Subsystem ---------------------------
-+
-+	    Mem dirty before bg writeback..:  10 %  ->  20 %
-+	    Mem dirty before sync writeback:  20 %  ->  50 %
-+
-+	  --- Block Layer ----------------------------------------
-+
-+	    Queue depth...............:      128    -> 512
-+	    Default MQ scheduler......: mq-deadline -> bfq
-+
-+	  --- CFS CPU Scheduler ----------------------------------
-+
-+	    Scheduling latency.............:   6    ->   3    ms
-+	    Minimal granularity............:   0.75 ->   0.3  ms
-+	    Wakeup granularity.............:   1    ->   0.5  ms
-+	    CPU migration cost.............:   0.5  ->   0.25 ms
-+	    Bandwidth slice size...........:   5    ->   3    ms
-+	    Ondemand fine upscaling limit..:  95 %  ->  85 %
-+
-+	  --- MuQSS CPU Scheduler --------------------------------
-+
-+	    Scheduling interval............:   6    ->   3    ms
-+	    ISO task max realtime use......:  70 %  ->  25 %
-+	    Ondemand coarse upscaling limit:  80 %  ->  45 %
-+	    Ondemand fine upscaling limit..:  95 %  ->  45 %
-+
- config BROKEN
- 	bool
- 
-diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
-index 2f0a0be4d344..bada807c7e59 100644
---- a/kernel/sched/fair.c
-+++ b/kernel/sched/fair.c
-@@ -37,8 +37,13 @@
-  *
-  * (default: 6ms * (1 + ilog(ncpus)), units: nanoseconds)
-  */
-+#ifdef CONFIG_ZENIFY
-+unsigned int sysctl_sched_latency			= 3000000ULL;
-+static unsigned int normalized_sysctl_sched_latency	= 3000000ULL;
-+#else
- unsigned int sysctl_sched_latency			= 6000000ULL;
- static unsigned int normalized_sysctl_sched_latency	= 6000000ULL;
-+#endif
- 
- /*
-  * The initial- and re-scaling of tunables is configurable
-@@ -58,13 +63,22 @@ enum sched_tunable_scaling sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_L
-  *
-  * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
-  */
-+#ifdef CONFIG_ZENIFY
-+unsigned int sysctl_sched_min_granularity			= 300000ULL;
-+static unsigned int normalized_sysctl_sched_min_granularity	= 300000ULL;
-+#else
- unsigned int sysctl_sched_min_granularity			= 750000ULL;
- static unsigned int normalized_sysctl_sched_min_granularity	= 750000ULL;
-+#endif
- 
- /*
-  * This value is kept at sysctl_sched_latency/sysctl_sched_min_granularity
-  */
-+#ifdef CONFIG_ZENIFY
-+static unsigned int sched_nr_latency = 10;
-+#else
- static unsigned int sched_nr_latency = 8;
-+#endif
- 
- /*
-  * After fork, child runs first. If set to 0 (default) then
-@@ -81,10 +95,17 @@ unsigned int sysctl_sched_child_runs_first __read_mostly;
-  *
-  * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds)
-  */
-+#ifdef CONFIG_ZENIFY
-+unsigned int sysctl_sched_wakeup_granularity			= 500000UL;
-+static unsigned int normalized_sysctl_sched_wakeup_granularity	= 500000UL;
-+
-+const_debug unsigned int sysctl_sched_migration_cost	= 50000UL;
-+#else
- unsigned int sysctl_sched_wakeup_granularity			= 1000000UL;
- static unsigned int normalized_sysctl_sched_wakeup_granularity	= 1000000UL;
- 
- const_debug unsigned int sysctl_sched_migration_cost	= 500000UL;
-+#endif
- 
- #ifdef CONFIG_SMP
- /*
-@@ -107,8 +128,12 @@ int __weak arch_asym_cpu_priority(int cpu)
-  *
-  * (default: 5 msec, units: microseconds)
-  */
-+#ifdef CONFIG_ZENIFY
-+unsigned int sysctl_sched_cfs_bandwidth_slice		= 3000UL;
-+#else
- unsigned int sysctl_sched_cfs_bandwidth_slice		= 5000UL;
- #endif
-+#endif
- 
- /*
-  * The margin used when comparing utilization with CPU capacity:
-diff --git a/mm/page-writeback.c b/mm/page-writeback.c
-index 337c6afb3345..9315e358f292 100644
---- a/mm/page-writeback.c
-+++ b/mm/page-writeback.c
-@@ -71,7 +71,11 @@ static long ratelimit_pages = 32;
- /*
-  * Start background writeback (via writeback threads) at this percentage
-  */
-+#ifdef CONFIG_ZENIFY
-+int dirty_background_ratio = 20;
-+#else
- int dirty_background_ratio = 10;
-+#endif
- 
- /*
-  * dirty_background_bytes starts at 0 (disabled) so that it is a function of
-@@ -88,7 +92,11 @@ int vm_highmem_is_dirtyable;
- /*
-  * The generator of dirty data starts writeback at this percentage
-  */
-+#ifdef CONFIG_ZENIFY
-+int vm_dirty_ratio = 50;
-+#else
- int vm_dirty_ratio = 20;
-+#endif
- 
- /*
-  * vm_dirty_bytes starts at 0 (disabled) so that it is a function of
-diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
-index 80dad301361d..42b7fa7d01f8 100644
---- a/net/ipv4/Kconfig
-+++ b/net/ipv4/Kconfig
-@@ -702,6 +702,9 @@ choice
- 	config DEFAULT_VEGAS
- 		bool "Vegas" if TCP_CONG_VEGAS=y
- 
-+	config DEFAULT_YEAH
-+		bool "YeAH" if TCP_CONG_YEAH=y
-+
- 	config DEFAULT_VENO
- 		bool "Veno" if TCP_CONG_VENO=y
- 
-@@ -735,6 +738,7 @@ config DEFAULT_TCP_CONG
- 	default "htcp" if DEFAULT_HTCP
- 	default "hybla" if DEFAULT_HYBLA
- 	default "vegas" if DEFAULT_VEGAS
-+	default "yeah" if DEFAULT_YEAH
- 	default "westwood" if DEFAULT_WESTWOOD
- 	default "veno" if DEFAULT_VENO
- 	default "reno" if DEFAULT_RENO
-
-From: Nick Desaulniers <ndesaulniers@google.com>
-Date: Mon, 24 Dec 2018 13:37:41 +0200
-Subject: include/linux/compiler*.h: define asm_volatile_goto
-
-asm_volatile_goto should also be defined for other compilers that
-support asm goto.
-
-Fixes commit 815f0dd ("include/linux/compiler*.h: make compiler-*.h
-mutually exclusive").
-
-Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
-Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
-
-diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
-index ba814f1..e77eeb0 100644
---- a/include/linux/compiler_types.h
-+++ b/include/linux/compiler_types.h
-@@ -188,6 +188,10 @@ struct ftrace_likely_data {
- #define asm_volatile_goto(x...) asm goto(x)
- #endif
- 
-+#ifndef asm_volatile_goto
-+#define asm_volatile_goto(x...) asm goto(x)
-+#endif
-+
- /* Are two types/vars the same type (ignoring qualifiers)? */
- #define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
- 
-From: Andy Lavr <andy.lavr@gmail.com>
-Date: Mon, 24 Dec 2018 14:57:47 +0200
-Subject: avl: Use [defer+madvise] as default khugepaged defrag strategy
-
-For some reason, the default strategy to respond to THP fault fallbacks
-is still just madvise, meaning stall if the program wants transparent
-hugepages, but don't trigger a background reclaim / compaction if THP
-begins to fail allocations.  This creates a snowball affect where we
-still use the THP code paths, but we almost always fail once a system
-has been active and busy for a while.
-
-The option "defer" was created for interactive systems where THP can
-still improve performance.  If we have to fallback to a regular page due
-to an allocation failure or anything else, we will trigger a background
-reclaim and compaction so future THP attempts succeed and previous
-attempts eventually have their smaller pages combined without stalling
-running applications.
-
-We still want madvise to stall applications that explicitely want THP,
-so defer+madvise _does_ make a ton of sense.  Make it the default for
-interactive systems, especially if the kernel maintainer left
-transparent hugepages on "always".
-
-Reasoning and details in the original patch:
-https://lwn.net/Articles/711248/
-
-Signed-off-by: Andy Lavr <andy.lavr@gmail.com>
-
-diff --git a/mm/huge_memory.c b/mm/huge_memory.c
-index e84a10b..21d62b7 100644
---- a/mm/huge_memory.c
-+++ b/mm/huge_memory.c
-@@ -53,7 +53,11 @@ unsigned long transparent_hugepage_flags __read_mostly =
- #ifdef CONFIG_TRANSPARENT_HUGEPAGE_MADVISE
- 	(1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)|
- #endif
-+#ifdef CONFIG_AVL_INTERACTIVE
-+	(1<<TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG)|
-+#else
- 	(1<<TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG)|
-+#endif
- 	(1<<TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG)|
- 	(1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);
- 
-diff --git a/net/sched/Kconfig b/net/sched/Kconfig
---- a/net/sched/Kconfig
-+++ b/net/sched/Kconfig
-@@ -429,6 +429,9 @@
-	  Select the queueing discipline that will be used by default
-	  for all network devices.
- 
-+	config DEFAULT_CAKE
-+ 		bool "Common Applications Kept Enhanced" if NET_SCH_CAKE
-+
-	config DEFAULT_FQ
-		bool "Fair Queue" if NET_SCH_FQ
- 
-@@ -448,6 +451,7 @@
- config DEFAULT_NET_SCH
-	string
-	default "pfifo_fast" if DEFAULT_PFIFO_FAST
-+	default "cake" if DEFAULT_CAKE
-	default "fq" if DEFAULT_FQ
-	default "fq_codel" if DEFAULT_FQ_CODEL
-	default "sfq" if DEFAULT_SFQ
-
-diff --git a/mm/page_alloc.c b/mm/page_alloc.c
-index a29043ea9..3fb219747 100644
---- a/mm/page_alloc.c
-+++ b/mm/page_alloc.c
-@@ -263,7 +263,7 @@ compound_page_dtor * const compound_page_dtors[] = {
- #else
- int watermark_boost_factor __read_mostly = 15000;
- #endif
--int watermark_scale_factor = 10;
-+int watermark_scale_factor = 200;
- 
- static unsigned long nr_kernel_pages __initdata;
- static unsigned long nr_all_pages __initdata;
- 
-diff --git a/include/linux/mm.h b/include/linux/mm.h
-index 80bb6408f..6c8b55cd1 100644
---- a/include/linux/mm.h
-+++ b/include/linux/mm.h
-@@ -146,8 +146,7 @@ extern int mmap_rnd_compat_bits __read_mostly;
-  * not a hard limit any more. Although some userspace tools can be surprised by
-  * that.
-  */
--#define MAPCOUNT_ELF_CORE_MARGIN	(5)
--#define DEFAULT_MAX_MAP_COUNT	(USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
-+#define DEFAULT_MAX_MAP_COUNT	(524288)
- 
- extern int sysctl_max_map_count;
- 
-From adb1f9df27f08e6488bcd80b1607987c6114a77a Mon Sep 17 00:00:00 2001
-From: Alexandre Frade <admfrade@gmail.com>
-Date: Mon, 25 Nov 2019 15:13:06 -0300
-Subject: [PATCH] elevator: set default scheduler to bfq for blk-mq
-
-Signed-off-by: Alexandre Frade <admfrade@gmail.com>
----
- block/elevator.c | 6 +++---
- 1 file changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/block/elevator.c b/block/elevator.c
-index 076ba7308e65..81f89095aa77 100644
---- a/block/elevator.c
-+++ b/block/elevator.c
-@@ -623,15 +623,15 @@ static inline bool elv_support_iosched(struct request_queue *q)
- }
- 
- /*
-- * For single queue devices, default to using mq-deadline. If we have multiple
-- * queues or mq-deadline is not available, default to "none".
-+ * For single queue devices, default to using bfq. If we have multiple
-+ * queues or bfq is not available, default to "none".
-  */
- static struct elevator_type *elevator_get_default(struct request_queue *q)
- {
- 	if (q->nr_hw_queues != 1)
- 		return NULL;
- 
--	return elevator_get(q, "mq-deadline", false);
-+	return elevator_get(q, "bfq", false);
- }
- 
- /*
-From c3ec05777c46e19a8a26d0fc4ca0c0db8a19de97 Mon Sep 17 00:00:00 2001
-From: Alexandre Frade <admfrade@gmail.com>
-Date: Fri, 10 May 2019 16:45:59 -0300
-Subject: [PATCH] block: set rq_affinity = 2 for full multithreading I/O
- requests
-
-Signed-off-by: Alexandre Frade <admfrade@gmail.com>
----
- include/linux/blkdev.h | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
-index f3ea78b0c91c..4dbacc6b073b 100644
---- a/include/linux/blkdev.h
-+++ b/include/linux/blkdev.h
-@@ -621,7 +621,8 @@ struct request_queue {
- #define QUEUE_FLAG_RQ_ALLOC_TIME 27	/* record rq->alloc_time_ns */
- 
- #define QUEUE_FLAG_MQ_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
--				 (1 << QUEUE_FLAG_SAME_COMP))
-+				 (1 << QUEUE_FLAG_SAME_COMP)	|	\
-+				 (1 << QUEUE_FLAG_SAME_FORCE))
- 
- void blk_queue_flag_set(unsigned int flag, struct request_queue *q);
- void blk_queue_flag_clear(unsigned int flag, struct request_queue *q);
-From 8171d33d0b84a953649863538fdbe4c26c035e4f Mon Sep 17 00:00:00 2001
-From: Alexandre Frade <admfrade@gmail.com>
-Date: Fri, 10 May 2019 14:32:50 -0300
-Subject: [PATCH] mm: set 2 megabytes for address_space-level file read-ahead
- pages size
-
-Signed-off-by: Alexandre Frade <admfrade@gmail.com>
----
- include/linux/mm.h | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/include/linux/mm.h b/include/linux/mm.h
-index a2adf95b3f9c..e804d9f7583a 100644
---- a/include/linux/mm.h
-+++ b/include/linux/mm.h
-@@ -2416,7 +2416,7 @@ int __must_check write_one_page(struct page *page);
- void task_dirty_inc(struct task_struct *tsk);
- 
- /* readahead.c */
--#define VM_READAHEAD_PAGES	(SZ_128K / PAGE_SIZE)
-+#define VM_READAHEAD_PAGES	(SZ_2M / PAGE_SIZE)
- 
- int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
- 			pgoff_t offset, unsigned long nr_to_read);
-From de7119e3db9fdb4c704355854a02a7e9fad931d4 Mon Sep 17 00:00:00 2001
-From: Steven Barrett <steven@liquorix.net>
-Date: Wed, 15 Jan 2020 20:43:56 -0600
-Subject: [PATCH] ZEN: intel-pstate: Implement "enable" parameter
-
-If intel-pstate is compiled into the kernel, it will preempt the loading
-of acpi-cpufreq so you can take advantage of hardware p-states without
-any friction.
-
-However, intel-pstate is not completely superior to cpufreq's ondemand
-for one reason.  There's no concept of an up_threshold property.
-
-In ondemand, up_threshold essentially reduces the maximum utilization to
-compare against, allowing you to hit max frequencies and turbo boost
-from a much lower core utilization.
-
-With intel-pstate, you have the concept of minimum and maximum
-performance, but no tunable that lets you define, maximum frequency
-means 50% core utilization.  For just this oversight, there's reasons
-you may want ondemand.
-
-Lets support setting "enable" in kernel boot parameters.  This lets
-kernel maintainers include "intel_pstate=disable" statically in the
-static boot parameters, but let users of the kernel override this
-selection.
----
- Documentation/admin-guide/kernel-parameters.txt | 3 +++
- drivers/cpufreq/intel_pstate.c                  | 2 ++
- 2 files changed, 5 insertions(+)
-
-diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
-index ade4e6ec23e03..0b613370d28d8 100644
---- a/Documentation/admin-guide/kernel-parameters.txt
-+++ b/Documentation/admin-guide/kernel-parameters.txt
-@@ -1765,6 +1765,9 @@
- 			disable
- 			  Do not enable intel_pstate as the default
- 			  scaling driver for the supported processors
-+			enable
-+			  Enable intel_pstate in-case "disable" was passed
-+			  previously in the kernel boot parameters
- 			passive
- 			  Use intel_pstate as a scaling driver, but configure it
- 			  to work with generic cpufreq governors (instead of
-diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
-index d2fa3e9ccd97c..bd10cb02fc0ff 100644
---- a/drivers/cpufreq/intel_pstate.c
-+++ b/drivers/cpufreq/intel_pstate.c
-@@ -2826,6 +2826,8 @@ static int __init intel_pstate_setup(char *str)
- 		pr_info("HWP disabled\n");
- 		no_hwp = 1;
- 	}
-+	if (!strcmp(str, "enable"))
-+		no_load = 0;
- 	if (!strcmp(str, "force"))
- 		force_load = 1;
- 	if (!strcmp(str, "hwp_only"))
diff --git a/linux57-tkg/linux57-tkg-patches/0003-glitched-cfs.patch b/linux57-tkg/linux57-tkg-patches/0003-glitched-cfs.patch
deleted file mode 100644
index 06b7f02..0000000
--- a/linux57-tkg/linux57-tkg-patches/0003-glitched-cfs.patch
+++ /dev/null
@@ -1,72 +0,0 @@
-diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
-index 2a202a846757..1d9c7ed79b11 100644
---- a/kernel/Kconfig.hz
-+++ b/kernel/Kconfig.hz
-@@ -4,7 +4,7 @@
- 
- choice
- 	prompt "Timer frequency"
--	default HZ_250
-+	default HZ_500
- 	help
- 	 Allows the configuration of the timer frequency. It is customary
- 	 to have the timer interrupt run at 1000 Hz but 100 Hz may be more
-@@ -39,6 +39,13 @@ choice
- 	 on SMP and NUMA systems and exactly dividing by both PAL and
- 	 NTSC frame rates for video and multimedia work.
- 
-+	config HZ_500
-+		bool "500 HZ"
-+	help
-+	 500 Hz is a balanced timer frequency. Provides fast interactivity
-+	 on desktops with great smoothness without increasing CPU power
-+	 consumption and sacrificing the battery life on laptops.
-+
- 	config HZ_1000
- 		bool "1000 HZ"
- 	help
-@@ -52,6 +59,7 @@ config HZ
- 	default 100 if HZ_100
- 	default 250 if HZ_250
- 	default 300 if HZ_300
-+	default 500 if HZ_500
- 	default 1000 if HZ_1000
- 
- config SCHED_HRTICK
-
-diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
-index 2a202a846757..1d9c7ed79b11 100644
---- a/kernel/Kconfig.hz
-+++ b/kernel/Kconfig.hz
-@@ -4,7 +4,7 @@
- 
- choice
- 	prompt "Timer frequency"
--	default HZ_500
-+	default HZ_750
- 	help
- 	 Allows the configuration of the timer frequency. It is customary
- 	 to have the timer interrupt run at 1000 Hz but 100 Hz may be more
-@@ -46,6 +46,13 @@ choice
- 	 on desktops with great smoothness without increasing CPU power
- 	 consumption and sacrificing the battery life on laptops.
- 
-+	config HZ_750
-+		bool "750 HZ"
-+	help
-+	 750 Hz is a good timer frequency for desktops. Provides fast
-+	 interactivity with great smoothness without sacrificing too
-+	 much throughput.
-+
- 	config HZ_1000
- 		bool "1000 HZ"
- 	help
-@@ -60,6 +67,7 @@ config HZ
- 	default 250 if HZ_250
- 	default 300 if HZ_300
- 	default 500 if HZ_500
-+	default 750 if HZ_750
- 	default 1000 if HZ_1000
- 
- config SCHED_HRTICK
-
diff --git a/linux57-tkg/linux57-tkg-patches/0004-5.7-ck1.patch b/linux57-tkg/linux57-tkg-patches/0004-5.7-ck1.patch
deleted file mode 100644
index ee1d1c8..0000000
--- a/linux57-tkg/linux57-tkg-patches/0004-5.7-ck1.patch
+++ /dev/null
@@ -1,13147 +0,0 @@
-diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
-index 7bc83f3d9bdf..2f9e8cdf5fec 100644
---- a/Documentation/admin-guide/kernel-parameters.txt
-+++ b/Documentation/admin-guide/kernel-parameters.txt
-@@ -4429,6 +4429,14 @@
- 			Memory area to be used by remote processor image,
- 			managed by CMA.
- 
-+	rqshare=	[X86] Select the MuQSS scheduler runqueue sharing type.
-+			Format: <string>
-+			smt -- Share SMT (hyperthread) sibling runqueues
-+			mc -- Share MC (multicore) sibling runqueues
-+			smp -- Share SMP runqueues
-+			none -- So not share any runqueues
-+			Default value is mc
-+
- 	rw		[KNL] Mount root device read-write on boot
- 
- 	S		[KNL] Run init in single mode
-diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
-index 0d427fd10941..5b3406a3d76f 100644
---- a/Documentation/admin-guide/sysctl/kernel.rst
-+++ b/Documentation/admin-guide/sysctl/kernel.rst
-@@ -344,6 +344,16 @@ Controls whether the panic kmsg data should be reported to Hyper-V.
- = =========================================================
- 
- 
-+iso_cpu: (MuQSS CPU scheduler only)
-+===================================
-+
-+This sets the percentage cpu that the unprivileged SCHED_ISO tasks can
-+run effectively at realtime priority, averaged over a rolling five
-+seconds over the -whole- system, meaning all cpus.
-+
-+Set to 70 (percent) by default.
-+
-+
- kexec_load_disabled
- ===================
- 
-@@ -922,6 +932,20 @@ ROM/Flash boot loader. Maybe to tell it what to do after
- rebooting. ???
- 
- 
-+rr_interval: (MuQSS CPU scheduler only)
-+=======================================
-+
-+This is the smallest duration that any cpu process scheduling unit
-+will run for. Increasing this value can increase throughput of cpu
-+bound tasks substantially but at the expense of increased latencies
-+overall. Conversely decreasing it will decrease average and maximum
-+latencies but at the expense of throughput. This value is in
-+milliseconds and the default value chosen depends on the number of
-+cpus available at scheduler initialisation with a minimum of 6.
-+
-+Valid values are from 1-1000.
-+
-+
- sched_energy_aware
- ==================
- 
-@@ -1230,3 +1254,13 @@ is 10 seconds.
- 
- The softlockup threshold is (``2 * watchdog_thresh``). Setting this
- tunable to zero will disable lockup detection altogether.
-+
-+
-+yield_type: (MuQSS CPU scheduler only)
-+======================================
-+
-+This determines what type of yield calls to sched_yield will perform.
-+
-+ 0: No yield.
-+ 1: Yield only to better priority/deadline tasks. (default)
-+ 2: Expire timeslice and recalculate deadline.
-diff --git a/Documentation/scheduler/sched-BFS.txt b/Documentation/scheduler/sched-BFS.txt
-new file mode 100644
-index 000000000000..c0282002a079
---- /dev/null
-+++ b/Documentation/scheduler/sched-BFS.txt
-@@ -0,0 +1,351 @@
-+BFS - The Brain Fuck Scheduler by Con Kolivas.
-+
-+Goals.
-+
-+The goal of the Brain Fuck Scheduler, referred to as BFS from here on, is to
-+completely do away with the complex designs of the past for the cpu process
-+scheduler and instead implement one that is very simple in basic design.
-+The main focus of BFS is to achieve excellent desktop interactivity and
-+responsiveness without heuristics and tuning knobs that are difficult to
-+understand, impossible to model and predict the effect of, and when tuned to
-+one workload cause massive detriment to another.
-+
-+
-+Design summary.
-+
-+BFS is best described as a single runqueue, O(n) lookup, earliest effective
-+virtual deadline first design, loosely based on EEVDF (earliest eligible virtual
-+deadline first) and my previous Staircase Deadline scheduler. Each component
-+shall be described in order to understand the significance of, and reasoning for
-+it. The codebase when the first stable version was released was approximately
-+9000 lines less code than the existing mainline linux kernel scheduler (in
-+2.6.31). This does not even take into account the removal of documentation and
-+the cgroups code that is not used.
-+
-+Design reasoning.
-+
-+The single runqueue refers to the queued but not running processes for the
-+entire system, regardless of the number of CPUs. The reason for going back to
-+a single runqueue design is that once multiple runqueues are introduced,
-+per-CPU or otherwise, there will be complex interactions as each runqueue will
-+be responsible for the scheduling latency and fairness of the tasks only on its
-+own runqueue, and to achieve fairness and low latency across multiple CPUs, any
-+advantage in throughput of having CPU local tasks causes other disadvantages.
-+This is due to requiring a very complex balancing system to at best achieve some
-+semblance of fairness across CPUs and can only maintain relatively low latency
-+for tasks bound to the same CPUs, not across them. To increase said fairness
-+and latency across CPUs, the advantage of local runqueue locking, which makes
-+for better scalability, is lost due to having to grab multiple locks.
-+
-+A significant feature of BFS is that all accounting is done purely based on CPU
-+used and nowhere is sleep time used in any way to determine entitlement or
-+interactivity. Interactivity "estimators" that use some kind of sleep/run
-+algorithm are doomed to fail to detect all interactive tasks, and to falsely tag
-+tasks that aren't interactive as being so. The reason for this is that it is
-+close to impossible to determine that when a task is sleeping, whether it is
-+doing it voluntarily, as in a userspace application waiting for input in the
-+form of a mouse click or otherwise, or involuntarily, because it is waiting for
-+another thread, process, I/O, kernel activity or whatever. Thus, such an
-+estimator will introduce corner cases, and more heuristics will be required to
-+cope with those corner cases, introducing more corner cases and failed
-+interactivity detection and so on. Interactivity in BFS is built into the design
-+by virtue of the fact that tasks that are waking up have not used up their quota
-+of CPU time, and have earlier effective deadlines, thereby making it very likely
-+they will preempt any CPU bound task of equivalent nice level. See below for
-+more information on the virtual deadline mechanism. Even if they do not preempt
-+a running task, because the rr interval is guaranteed to have a bound upper
-+limit on how long a task will wait for, it will be scheduled within a timeframe
-+that will not cause visible interface jitter.
-+
-+
-+Design details.
-+
-+Task insertion.
-+
-+BFS inserts tasks into each relevant queue as an O(1) insertion into a double
-+linked list. On insertion, *every* running queue is checked to see if the newly
-+queued task can run on any idle queue, or preempt the lowest running task on the
-+system. This is how the cross-CPU scheduling of BFS achieves significantly lower
-+latency per extra CPU the system has. In this case the lookup is, in the worst
-+case scenario, O(n) where n is the number of CPUs on the system.
-+
-+Data protection.
-+
-+BFS has one single lock protecting the process local data of every task in the
-+global queue. Thus every insertion, removal and modification of task data in the
-+global runqueue needs to grab the global lock. However, once a task is taken by
-+a CPU, the CPU has its own local data copy of the running process' accounting
-+information which only that CPU accesses and modifies (such as during a
-+timer tick) thus allowing the accounting data to be updated lockless. Once a
-+CPU has taken a task to run, it removes it from the global queue. Thus the
-+global queue only ever has, at most,
-+
-+	(number of tasks requesting cpu time) - (number of logical CPUs) + 1
-+
-+tasks in the global queue. This value is relevant for the time taken to look up
-+tasks during scheduling. This will increase if many tasks with CPU affinity set
-+in their policy to limit which CPUs they're allowed to run on if they outnumber
-+the number of CPUs. The +1 is because when rescheduling a task, the CPU's
-+currently running task is put back on the queue. Lookup will be described after
-+the virtual deadline mechanism is explained.
-+
-+Virtual deadline.
-+
-+The key to achieving low latency, scheduling fairness, and "nice level"
-+distribution in BFS is entirely in the virtual deadline mechanism. The one
-+tunable in BFS is the rr_interval, or "round robin interval". This is the
-+maximum time two SCHED_OTHER (or SCHED_NORMAL, the common scheduling policy)
-+tasks of the same nice level will be running for, or looking at it the other
-+way around, the longest duration two tasks of the same nice level will be
-+delayed for. When a task requests cpu time, it is given a quota (time_slice)
-+equal to the rr_interval and a virtual deadline. The virtual deadline is
-+offset from the current time in jiffies by this equation:
-+
-+	jiffies + (prio_ratio * rr_interval)
-+
-+The prio_ratio is determined as a ratio compared to the baseline of nice -20
-+and increases by 10% per nice level. The deadline is a virtual one only in that
-+no guarantee is placed that a task will actually be scheduled by this time, but
-+it is used to compare which task should go next. There are three components to
-+how a task is next chosen. First is time_slice expiration. If a task runs out
-+of its time_slice, it is descheduled, the time_slice is refilled, and the
-+deadline reset to that formula above. Second is sleep, where a task no longer
-+is requesting CPU for whatever reason. The time_slice and deadline are _not_
-+adjusted in this case and are just carried over for when the task is next
-+scheduled. Third is preemption, and that is when a newly waking task is deemed
-+higher priority than a currently running task on any cpu by virtue of the fact
-+that it has an earlier virtual deadline than the currently running task. The
-+earlier deadline is the key to which task is next chosen for the first and
-+second cases. Once a task is descheduled, it is put back on the queue, and an
-+O(n) lookup of all queued-but-not-running tasks is done to determine which has
-+the earliest deadline and that task is chosen to receive CPU next.
-+
-+The CPU proportion of different nice tasks works out to be approximately the
-+
-+	(prio_ratio difference)^2
-+
-+The reason it is squared is that a task's deadline does not change while it is
-+running unless it runs out of time_slice. Thus, even if the time actually
-+passes the deadline of another task that is queued, it will not get CPU time
-+unless the current running task deschedules, and the time "base" (jiffies) is
-+constantly moving.
-+
-+Task lookup.
-+
-+BFS has 103 priority queues. 100 of these are dedicated to the static priority
-+of realtime tasks, and the remaining 3 are, in order of best to worst priority,
-+SCHED_ISO (isochronous), SCHED_NORMAL, and SCHED_IDLEPRIO (idle priority
-+scheduling). When a task of these priorities is queued, a bitmap of running
-+priorities is set showing which of these priorities has tasks waiting for CPU
-+time. When a CPU is made to reschedule, the lookup for the next task to get
-+CPU time is performed in the following way:
-+
-+First the bitmap is checked to see what static priority tasks are queued. If
-+any realtime priorities are found, the corresponding queue is checked and the
-+first task listed there is taken (provided CPU affinity is suitable) and lookup
-+is complete. If the priority corresponds to a SCHED_ISO task, they are also
-+taken in FIFO order (as they behave like SCHED_RR). If the priority corresponds
-+to either SCHED_NORMAL or SCHED_IDLEPRIO, then the lookup becomes O(n). At this
-+stage, every task in the runlist that corresponds to that priority is checked
-+to see which has the earliest set deadline, and (provided it has suitable CPU
-+affinity) it is taken off the runqueue and given the CPU. If a task has an
-+expired deadline, it is taken and the rest of the lookup aborted (as they are
-+chosen in FIFO order).
-+
-+Thus, the lookup is O(n) in the worst case only, where n is as described
-+earlier, as tasks may be chosen before the whole task list is looked over.
-+
-+
-+Scalability.
-+
-+The major limitations of BFS will be that of scalability, as the separate
-+runqueue designs will have less lock contention as the number of CPUs rises.
-+However they do not scale linearly even with separate runqueues as multiple
-+runqueues will need to be locked concurrently on such designs to be able to
-+achieve fair CPU balancing, to try and achieve some sort of nice-level fairness
-+across CPUs, and to achieve low enough latency for tasks on a busy CPU when
-+other CPUs would be more suited. BFS has the advantage that it requires no
-+balancing algorithm whatsoever, as balancing occurs by proxy simply because
-+all CPUs draw off the global runqueue, in priority and deadline order. Despite
-+the fact that scalability is _not_ the prime concern of BFS, it both shows very
-+good scalability to smaller numbers of CPUs and is likely a more scalable design
-+at these numbers of CPUs.
-+
-+It also has some very low overhead scalability features built into the design
-+when it has been deemed their overhead is so marginal that they're worth adding.
-+The first is the local copy of the running process' data to the CPU it's running
-+on to allow that data to be updated lockless where possible. Then there is
-+deference paid to the last CPU a task was running on, by trying that CPU first
-+when looking for an idle CPU to use the next time it's scheduled. Finally there
-+is the notion of cache locality beyond the last running CPU. The sched_domains
-+information is used to determine the relative virtual "cache distance" that
-+other CPUs have from the last CPU a task was running on. CPUs with shared
-+caches, such as SMT siblings, or multicore CPUs with shared caches, are treated
-+as cache local. CPUs without shared caches are treated as not cache local, and
-+CPUs on different NUMA nodes are treated as very distant. This "relative cache
-+distance" is used by modifying the virtual deadline value when doing lookups.
-+Effectively, the deadline is unaltered between "cache local" CPUs, doubled for
-+"cache distant" CPUs, and quadrupled for "very distant" CPUs. The reasoning
-+behind the doubling of deadlines is as follows. The real cost of migrating a
-+task from one CPU to another is entirely dependant on the cache footprint of
-+the task, how cache intensive the task is, how long it's been running on that
-+CPU to take up the bulk of its cache, how big the CPU cache is, how fast and
-+how layered the CPU cache is, how fast a context switch is... and so on. In
-+other words, it's close to random in the real world where we do more than just
-+one sole workload. The only thing we can be sure of is that it's not free. So
-+BFS uses the principle that an idle CPU is a wasted CPU and utilising idle CPUs
-+is more important than cache locality, and cache locality only plays a part
-+after that. Doubling the effective deadline is based on the premise that the
-+"cache local" CPUs will tend to work on the same tasks up to double the number
-+of cache local CPUs, and once the workload is beyond that amount, it is likely
-+that none of the tasks are cache warm anywhere anyway. The quadrupling for NUMA
-+is a value I pulled out of my arse.
-+
-+When choosing an idle CPU for a waking task, the cache locality is determined
-+according to where the task last ran and then idle CPUs are ranked from best
-+to worst to choose the most suitable idle CPU based on cache locality, NUMA
-+node locality and hyperthread sibling business. They are chosen in the
-+following preference (if idle):
-+
-+* Same core, idle or busy cache, idle threads
-+* Other core, same cache, idle or busy cache, idle threads.
-+* Same node, other CPU, idle cache, idle threads.
-+* Same node, other CPU, busy cache, idle threads.
-+* Same core, busy threads.
-+* Other core, same cache, busy threads.
-+* Same node, other CPU, busy threads.
-+* Other node, other CPU, idle cache, idle threads.
-+* Other node, other CPU, busy cache, idle threads.
-+* Other node, other CPU, busy threads.
-+
-+This shows the SMT or "hyperthread" awareness in the design as well which will
-+choose a real idle core first before a logical SMT sibling which already has
-+tasks on the physical CPU.
-+
-+Early benchmarking of BFS suggested scalability dropped off at the 16 CPU mark.
-+However this benchmarking was performed on an earlier design that was far less
-+scalable than the current one so it's hard to know how scalable it is in terms
-+of both CPUs (due to the global runqueue) and heavily loaded machines (due to
-+O(n) lookup) at this stage. Note that in terms of scalability, the number of
-+_logical_ CPUs matters, not the number of _physical_ CPUs. Thus, a dual (2x)
-+quad core (4X) hyperthreaded (2X) machine is effectively a 16X. Newer benchmark
-+results are very promising indeed, without needing to tweak any knobs, features
-+or options. Benchmark contributions are most welcome.
-+
-+
-+Features
-+
-+As the initial prime target audience for BFS was the average desktop user, it
-+was designed to not need tweaking, tuning or have features set to obtain benefit
-+from it. Thus the number of knobs and features has been kept to an absolute
-+minimum and should not require extra user input for the vast majority of cases.
-+There are precisely 2 tunables, and 2 extra scheduling policies. The rr_interval
-+and iso_cpu tunables, and the SCHED_ISO and SCHED_IDLEPRIO policies. In addition
-+to this, BFS also uses sub-tick accounting. What BFS does _not_ now feature is
-+support for CGROUPS. The average user should neither need to know what these
-+are, nor should they need to be using them to have good desktop behaviour.
-+
-+rr_interval
-+
-+There is only one "scheduler" tunable, the round robin interval. This can be
-+accessed in
-+
-+	/proc/sys/kernel/rr_interval
-+
-+The value is in milliseconds, and the default value is set to 6 on a
-+uniprocessor machine, and automatically set to a progressively higher value on
-+multiprocessor machines. The reasoning behind increasing the value on more CPUs
-+is that the effective latency is decreased by virtue of there being more CPUs on
-+BFS (for reasons explained above), and increasing the value allows for less
-+cache contention and more throughput. Valid values are from 1 to 1000
-+Decreasing the value will decrease latencies at the cost of decreasing
-+throughput, while increasing it will improve throughput, but at the cost of
-+worsening latencies. The accuracy of the rr interval is limited by HZ resolution
-+of the kernel configuration. Thus, the worst case latencies are usually slightly
-+higher than this actual value. The default value of 6 is not an arbitrary one.
-+It is based on the fact that humans can detect jitter at approximately 7ms, so
-+aiming for much lower latencies is pointless under most circumstances. It is
-+worth noting this fact when comparing the latency performance of BFS to other
-+schedulers. Worst case latencies being higher than 7ms are far worse than
-+average latencies not being in the microsecond range.
-+
-+Isochronous scheduling.
-+
-+Isochronous scheduling is a unique scheduling policy designed to provide
-+near-real-time performance to unprivileged (ie non-root) users without the
-+ability to starve the machine indefinitely. Isochronous tasks (which means
-+"same time") are set using, for example, the schedtool application like so:
-+
-+	schedtool -I -e amarok
-+
-+This will start the audio application "amarok" as SCHED_ISO. How SCHED_ISO works
-+is that it has a priority level between true realtime tasks and SCHED_NORMAL
-+which would allow them to preempt all normal tasks, in a SCHED_RR fashion (ie,
-+if multiple SCHED_ISO tasks are running, they purely round robin at rr_interval
-+rate). However if ISO tasks run for more than a tunable finite amount of time,
-+they are then demoted back to SCHED_NORMAL scheduling. This finite amount of
-+time is the percentage of _total CPU_ available across the machine, configurable
-+as a percentage in the following "resource handling" tunable (as opposed to a
-+scheduler tunable):
-+
-+	/proc/sys/kernel/iso_cpu
-+
-+and is set to 70% by default. It is calculated over a rolling 5 second average
-+Because it is the total CPU available, it means that on a multi CPU machine, it
-+is possible to have an ISO task running as realtime scheduling indefinitely on
-+just one CPU, as the other CPUs will be available. Setting this to 100 is the
-+equivalent of giving all users SCHED_RR access and setting it to 0 removes the
-+ability to run any pseudo-realtime tasks.
-+
-+A feature of BFS is that it detects when an application tries to obtain a
-+realtime policy (SCHED_RR or SCHED_FIFO) and the caller does not have the
-+appropriate privileges to use those policies. When it detects this, it will
-+give the task SCHED_ISO policy instead. Thus it is transparent to the user.
-+Because some applications constantly set their policy as well as their nice
-+level, there is potential for them to undo the override specified by the user
-+on the command line of setting the policy to SCHED_ISO. To counter this, once
-+a task has been set to SCHED_ISO policy, it needs superuser privileges to set
-+it back to SCHED_NORMAL. This will ensure the task remains ISO and all child
-+processes and threads will also inherit the ISO policy.
-+
-+Idleprio scheduling.
-+
-+Idleprio scheduling is a scheduling policy designed to give out CPU to a task
-+_only_ when the CPU would be otherwise idle. The idea behind this is to allow
-+ultra low priority tasks to be run in the background that have virtually no
-+effect on the foreground tasks. This is ideally suited to distributed computing
-+clients (like setiathome, folding, mprime etc) but can also be used to start
-+a video encode or so on without any slowdown of other tasks. To avoid this
-+policy from grabbing shared resources and holding them indefinitely, if it
-+detects a state where the task is waiting on I/O, the machine is about to
-+suspend to ram and so on, it will transiently schedule them as SCHED_NORMAL. As
-+per the Isochronous task management, once a task has been scheduled as IDLEPRIO,
-+it cannot be put back to SCHED_NORMAL without superuser privileges. Tasks can
-+be set to start as SCHED_IDLEPRIO with the schedtool command like so:
-+
-+	schedtool -D -e ./mprime
-+
-+Subtick accounting.
-+
-+It is surprisingly difficult to get accurate CPU accounting, and in many cases,
-+the accounting is done by simply determining what is happening at the precise
-+moment a timer tick fires off. This becomes increasingly inaccurate as the
-+timer tick frequency (HZ) is lowered. It is possible to create an application
-+which uses almost 100% CPU, yet by being descheduled at the right time, records
-+zero CPU usage. While the main problem with this is that there are possible
-+security implications, it is also difficult to determine how much CPU a task
-+really does use. BFS tries to use the sub-tick accounting from the TSC clock,
-+where possible, to determine real CPU usage. This is not entirely reliable, but
-+is far more likely to produce accurate CPU usage data than the existing designs
-+and will not show tasks as consuming no CPU usage when they actually are. Thus,
-+the amount of CPU reported as being used by BFS will more accurately represent
-+how much CPU the task itself is using (as is shown for example by the 'time'
-+application), so the reported values may be quite different to other schedulers.
-+Values reported as the 'load' are more prone to problems with this design, but
-+per process values are closer to real usage. When comparing throughput of BFS
-+to other designs, it is important to compare the actual completed work in terms
-+of total wall clock time taken and total work done, rather than the reported
-+"cpu usage".
-+
-+
-+Con Kolivas <kernel@kolivas.org> Fri Aug 27 2010
-diff --git a/Documentation/scheduler/sched-MuQSS.txt b/Documentation/scheduler/sched-MuQSS.txt
-new file mode 100644
-index 000000000000..ae28b85c9995
---- /dev/null
-+++ b/Documentation/scheduler/sched-MuQSS.txt
-@@ -0,0 +1,373 @@
-+MuQSS - The Multiple Queue Skiplist Scheduler by Con Kolivas.
-+
-+MuQSS is a per-cpu runqueue variant of the original BFS scheduler with
-+one 8 level skiplist per runqueue, and fine grained locking for much more
-+scalability.
-+
-+
-+Goals.
-+
-+The goal of the Multiple Queue Skiplist Scheduler, referred to as MuQSS from
-+here on (pronounced mux) is to completely do away with the complex designs of
-+the past for the cpu process scheduler and instead implement one that is very
-+simple in basic design. The main focus of MuQSS is to achieve excellent desktop
-+interactivity and responsiveness without heuristics and tuning knobs that are
-+difficult to understand, impossible to model and predict the effect of, and when
-+tuned to one workload cause massive detriment to another, while still being
-+scalable to many CPUs and processes.
-+
-+
-+Design summary.
-+
-+MuQSS is best described as per-cpu multiple runqueue, O(log n) insertion, O(1)
-+lookup, earliest effective virtual deadline first tickless design, loosely based
-+on EEVDF (earliest eligible virtual deadline first) and my previous Staircase
-+Deadline scheduler, and evolved from the single runqueue O(n) BFS scheduler.
-+Each component shall be described in order to understand the significance of,
-+and reasoning for it.
-+
-+
-+Design reasoning.
-+
-+In BFS, the use of a single runqueue across all CPUs meant that each CPU would
-+need to scan the entire runqueue looking for the process with the earliest
-+deadline and schedule that next, regardless of which CPU it originally came
-+from. This made BFS deterministic with respect to latency and provided
-+guaranteed latencies dependent on number of processes and CPUs. The single
-+runqueue, however, meant that all CPUs would compete for the single lock
-+protecting it, which would lead to increasing lock contention as the number of
-+CPUs rose and appeared to limit scalability of common workloads beyond 16
-+logical CPUs. Additionally, the O(n) lookup of the runqueue list obviously
-+increased overhead proportionate to the number of queued proecesses and led to
-+cache thrashing while iterating over the linked list.
-+
-+MuQSS is an evolution of BFS, designed to maintain the same scheduling
-+decision mechanism and be virtually deterministic without relying on the
-+constrained design of the single runqueue by splitting out the single runqueue
-+to be per-CPU and use skiplists instead of linked lists.
-+
-+The original reason for going back to a single runqueue design for BFS was that
-+once multiple runqueues are introduced, per-CPU or otherwise, there will be
-+complex interactions as each runqueue will be responsible for the scheduling
-+latency and fairness of the tasks only on its own runqueue, and to achieve
-+fairness and low latency across multiple CPUs, any advantage in throughput of
-+having CPU local tasks causes other disadvantages. This is due to requiring a
-+very complex balancing system to at best achieve some semblance of fairness
-+across CPUs and can only maintain relatively low latency for tasks bound to the
-+same CPUs, not across them. To increase said fairness and latency across CPUs,
-+the advantage of local runqueue locking, which makes for better scalability, is
-+lost due to having to grab multiple locks.
-+
-+MuQSS works around the problems inherent in multiple runqueue designs by
-+making its skip lists priority ordered and through novel use of lockless
-+examination of each other runqueue it can decide if it should take the earliest
-+deadline task from another runqueue for latency reasons, or for CPU balancing
-+reasons. It still does not have a balancing system, choosing to allow the
-+next task scheduling decision and task wakeup CPU choice to allow balancing to
-+happen by virtue of its choices.
-+
-+As a further evolution of the design, MuQSS normally configures sharing of
-+runqueues in a logical fashion for when CPU resources are shared for improved
-+latency and throughput. By default it shares runqueues and locks between
-+multicore siblings. Optionally it can be configured to run with sharing of
-+SMT siblings only, all SMP packages or no sharing at all. Additionally it can
-+be selected at boot time.
-+
-+
-+Design details.
-+
-+Custom skip list implementation:
-+
-+To avoid the overhead of building up and tearing down skip list structures,
-+the variant used by MuQSS has a number of optimisations making it specific for
-+its use case in the scheduler. It uses static arrays of 8 'levels' instead of
-+building up and tearing down structures dynamically. This makes each runqueue
-+only scale O(log N) up to 64k tasks. However as there is one runqueue per CPU
-+it means that it scales O(log N) up to 64k x number of logical CPUs which is
-+far beyond the realistic task limits each CPU could handle. By being 8 levels
-+it also makes the array exactly one cacheline in size. Additionally, each
-+skip list node is bidirectional making insertion and removal amortised O(1),
-+being O(k) where k is 1-8. Uniquely, we are only ever interested in the very
-+first entry in each list at all times with MuQSS, so there is never a need to
-+do a search and thus look up is always O(1). In interactive mode, the queues
-+will be searched beyond their first entry if the first task is not suitable
-+for affinity or SMT nice reasons.
-+
-+Task insertion:
-+
-+MuQSS inserts tasks into a per CPU runqueue as an O(log N) insertion into
-+a custom skip list as described above (based on the original design by William
-+Pugh). Insertion is ordered in such a way that there is never a need to do a
-+search by ordering tasks according to static priority primarily, and then
-+virtual deadline at the time of insertion.
-+
-+Niffies:
-+
-+Niffies are a monotonic forward moving timer not unlike the "jiffies" but are
-+of nanosecond resolution. Niffies are calculated per-runqueue from the high
-+resolution TSC timers, and in order to maintain fairness are synchronised
-+between CPUs whenever both runqueues are locked concurrently.
-+
-+Virtual deadline:
-+
-+The key to achieving low latency, scheduling fairness, and "nice level"
-+distribution in MuQSS is entirely in the virtual deadline mechanism. The one
-+tunable in MuQSS is the rr_interval, or "round robin interval". This is the
-+maximum time two SCHED_OTHER (or SCHED_NORMAL, the common scheduling policy)
-+tasks of the same nice level will be running for, or looking at it the other
-+way around, the longest duration two tasks of the same nice level will be
-+delayed for. When a task requests cpu time, it is given a quota (time_slice)
-+equal to the rr_interval and a virtual deadline. The virtual deadline is
-+offset from the current time in niffies by this equation:
-+
-+	niffies + (prio_ratio * rr_interval)
-+
-+The prio_ratio is determined as a ratio compared to the baseline of nice -20
-+and increases by 10% per nice level. The deadline is a virtual one only in that
-+no guarantee is placed that a task will actually be scheduled by this time, but
-+it is used to compare which task should go next. There are three components to
-+how a task is next chosen. First is time_slice expiration. If a task runs out
-+of its time_slice, it is descheduled, the time_slice is refilled, and the
-+deadline reset to that formula above. Second is sleep, where a task no longer
-+is requesting CPU for whatever reason. The time_slice and deadline are _not_
-+adjusted in this case and are just carried over for when the task is next
-+scheduled. Third is preemption, and that is when a newly waking task is deemed
-+higher priority than a currently running task on any cpu by virtue of the fact
-+that it has an earlier virtual deadline than the currently running task. The
-+earlier deadline is the key to which task is next chosen for the first and
-+second cases.
-+
-+The CPU proportion of different nice tasks works out to be approximately the
-+
-+	(prio_ratio difference)^2
-+
-+The reason it is squared is that a task's deadline does not change while it is
-+running unless it runs out of time_slice. Thus, even if the time actually
-+passes the deadline of another task that is queued, it will not get CPU time
-+unless the current running task deschedules, and the time "base" (niffies) is
-+constantly moving.
-+
-+Task lookup:
-+
-+As tasks are already pre-ordered according to anticipated scheduling order in
-+the skip lists, lookup for the next suitable task per-runqueue is always a
-+matter of simply selecting the first task in the 0th level skip list entry.
-+In order to maintain optimal latency and fairness across CPUs, MuQSS does a
-+novel examination of every other runqueue in cache locality order, choosing the
-+best task across all runqueues. This provides near-determinism of how long any
-+task across the entire system may wait before receiving CPU time. The other
-+runqueues are first examine lockless and then trylocked to minimise the
-+potential lock contention if they are likely to have a suitable better task.
-+Each other runqueue lock is only held for as long as it takes to examine the
-+entry for suitability. In "interactive" mode, the default setting, MuQSS will
-+look for the best deadline task across all CPUs, while in !interactive mode,
-+it will only select a better deadline task from another CPU if it is more
-+heavily laden than the current one.
-+
-+Lookup is therefore O(k) where k is number of CPUs.
-+
-+
-+Latency.
-+
-+Through the use of virtual deadlines to govern the scheduling order of normal
-+tasks, queue-to-activation latency per runqueue is guaranteed to be bound by
-+the rr_interval tunable which is set to 6ms by default. This means that the
-+longest a CPU bound task will wait for more CPU is proportional to the number
-+of running tasks and in the common case of 0-2 running tasks per CPU, will be
-+under the 7ms threshold for human perception of jitter. Additionally, as newly
-+woken tasks will have an early deadline from their previous runtime, the very
-+tasks that are usually latency sensitive will have the shortest interval for
-+activation, usually preempting any existing CPU bound tasks.
-+
-+Tickless expiry:
-+
-+A feature of MuQSS is that it is not tied to the resolution of the chosen tick
-+rate in Hz, instead depending entirely on the high resolution timers where
-+possible for sub-millisecond accuracy on timeouts regarless of the underlying
-+tick rate. This allows MuQSS to be run with the low overhead of low Hz rates
-+such as 100 by default, benefiting from the improved throughput and lower
-+power usage it provides. Another advantage of this approach is that in
-+combination with the Full No HZ option, which disables ticks on running task
-+CPUs instead of just idle CPUs, the tick can be disabled at all times
-+regardless of how many tasks are running instead of being limited to just one
-+running task. Note that this option is NOT recommended for regular desktop
-+users.
-+
-+
-+Scalability and balancing.
-+
-+Unlike traditional approaches where balancing is a combination of CPU selection
-+at task wakeup and intermittent balancing based on a vast array of rules set
-+according to architecture, busyness calculations and special case management,
-+MuQSS indirectly balances on the fly at task wakeup and next task selection.
-+During initialisation, MuQSS creates a cache coherency ordered list of CPUs for
-+each logical CPU and uses this to aid task/CPU selection when CPUs are busy.
-+Additionally it selects any idle CPUs, if they are available, at any time over
-+busy CPUs according to the following preference:
-+
-+ * Same thread, idle or busy cache, idle or busy threads
-+ * Other core, same cache, idle or busy cache, idle threads.
-+ * Same node, other CPU, idle cache, idle threads.
-+ * Same node, other CPU, busy cache, idle threads.
-+ * Other core, same cache, busy threads.
-+ * Same node, other CPU, busy threads.
-+ * Other node, other CPU, idle cache, idle threads.
-+ * Other node, other CPU, busy cache, idle threads.
-+ * Other node, other CPU, busy threads.
-+
-+Mux is therefore SMT, MC and Numa aware without the need for extra
-+intermittent balancing to maintain CPUs busy and make the most of cache
-+coherency.
-+
-+
-+Features
-+
-+As the initial prime target audience for MuQSS was the average desktop user, it
-+was designed to not need tweaking, tuning or have features set to obtain benefit
-+from it. Thus the number of knobs and features has been kept to an absolute
-+minimum and should not require extra user input for the vast majority of cases.
-+There are 3 optional tunables, and 2 extra scheduling policies. The rr_interval,
-+interactive, and iso_cpu tunables, and the SCHED_ISO and SCHED_IDLEPRIO
-+policies. In addition to this, MuQSS also uses sub-tick accounting. What MuQSS
-+does _not_ now feature is support for CGROUPS. The average user should neither
-+need to know what these are, nor should they need to be using them to have good
-+desktop behaviour. However since some applications refuse to work without
-+cgroups, one can enable them with MuQSS as a stub and the filesystem will be
-+created which will allow the applications to work.
-+
-+rr_interval:
-+
-+	/proc/sys/kernel/rr_interval
-+
-+The value is in milliseconds, and the default value is set to 6. Valid values
-+are from 1 to 1000 Decreasing the value will decrease latencies at the cost of
-+decreasing throughput, while increasing it will improve throughput, but at the
-+cost of worsening latencies. It is based on the fact that humans can detect
-+jitter at approximately 7ms, so aiming for much lower latencies is pointless
-+under most circumstances. It is worth noting this fact when comparing the
-+latency performance of MuQSS to other schedulers. Worst case latencies being
-+higher than 7ms are far worse than average latencies not being in the
-+microsecond range.
-+
-+interactive:
-+
-+	/proc/sys/kernel/interactive
-+
-+The value is a simple boolean of 1 for on and 0 for off and is set to on by
-+default. Disabling this will disable the near-determinism of MuQSS when
-+selecting the next task by not examining all CPUs for the earliest deadline
-+task, or which CPU to wake to, instead prioritising CPU balancing for improved
-+throughput. Latency will still be bound by rr_interval, but on a per-CPU basis
-+instead of across the whole system.
-+
-+Runqueue sharing.
-+
-+By default MuQSS chooses to share runqueue resources (specifically the skip
-+list and locking) between multicore siblings. It is configurable at build time
-+to select between None, SMT, MC and SMP, corresponding to no sharing, sharing
-+only between simultaneous mulithreading siblings, multicore siblings, or
-+symmetric multiprocessing physical packages. Additionally it can be se at
-+bootime with the use of the rqshare parameter. The reason for configurability
-+is that some architectures have CPUs with many multicore siblings (>= 16)
-+where it may be detrimental to throughput to share runqueues and another
-+sharing option may be desirable. Additionally, more sharing than usual can
-+improve latency on a system-wide level at the expense of throughput if desired.
-+
-+The options are:
-+none, smt, mc, smp
-+
-+eg:
-+	rqshare=mc
-+
-+Isochronous scheduling:
-+
-+Isochronous scheduling is a unique scheduling policy designed to provide
-+near-real-time performance to unprivileged (ie non-root) users without the
-+ability to starve the machine indefinitely. Isochronous tasks (which means
-+"same time") are set using, for example, the schedtool application like so:
-+
-+	schedtool -I -e amarok
-+
-+This will start the audio application "amarok" as SCHED_ISO. How SCHED_ISO works
-+is that it has a priority level between true realtime tasks and SCHED_NORMAL
-+which would allow them to preempt all normal tasks, in a SCHED_RR fashion (ie,
-+if multiple SCHED_ISO tasks are running, they purely round robin at rr_interval
-+rate). However if ISO tasks run for more than a tunable finite amount of time,
-+they are then demoted back to SCHED_NORMAL scheduling. This finite amount of
-+time is the percentage of CPU available per CPU, configurable as a percentage in
-+the following "resource handling" tunable (as opposed to a scheduler tunable):
-+
-+iso_cpu:
-+
-+	/proc/sys/kernel/iso_cpu
-+
-+and is set to 70% by default. It is calculated over a rolling 5 second average
-+Because it is the total CPU available, it means that on a multi CPU machine, it
-+is possible to have an ISO task running as realtime scheduling indefinitely on
-+just one CPU, as the other CPUs will be available. Setting this to 100 is the
-+equivalent of giving all users SCHED_RR access and setting it to 0 removes the
-+ability to run any pseudo-realtime tasks.
-+
-+A feature of MuQSS is that it detects when an application tries to obtain a
-+realtime policy (SCHED_RR or SCHED_FIFO) and the caller does not have the
-+appropriate privileges to use those policies. When it detects this, it will
-+give the task SCHED_ISO policy instead. Thus it is transparent to the user.
-+
-+
-+Idleprio scheduling:
-+
-+Idleprio scheduling is a scheduling policy designed to give out CPU to a task
-+_only_ when the CPU would be otherwise idle. The idea behind this is to allow
-+ultra low priority tasks to be run in the background that have virtually no
-+effect on the foreground tasks. This is ideally suited to distributed computing
-+clients (like setiathome, folding, mprime etc) but can also be used to start a
-+video encode or so on without any slowdown of other tasks. To avoid this policy
-+from grabbing shared resources and holding them indefinitely, if it detects a
-+state where the task is waiting on I/O, the machine is about to suspend to ram
-+and so on, it will transiently schedule them as SCHED_NORMAL. Once a task has
-+been scheduled as IDLEPRIO, it cannot be put back to SCHED_NORMAL without
-+superuser privileges since it is effectively a lower scheduling policy. Tasks
-+can be set to start as SCHED_IDLEPRIO with the schedtool command like so:
-+
-+schedtool -D -e ./mprime
-+
-+Subtick accounting:
-+
-+It is surprisingly difficult to get accurate CPU accounting, and in many cases,
-+the accounting is done by simply determining what is happening at the precise
-+moment a timer tick fires off. This becomes increasingly inaccurate as the timer
-+tick frequency (HZ) is lowered. It is possible to create an application which
-+uses almost 100% CPU, yet by being descheduled at the right time, records zero
-+CPU usage. While the main problem with this is that there are possible security
-+implications, it is also difficult to determine how much CPU a task really does
-+use. Mux uses sub-tick accounting from the TSC clock to determine real CPU
-+usage. Thus, the amount of CPU reported as being used by MuQSS will more
-+accurately represent how much CPU the task itself is using (as is shown for
-+example by the 'time' application), so the reported values may be quite
-+different to other schedulers. When comparing throughput of MuQSS to other
-+designs, it is important to compare the actual completed work in terms of total
-+wall clock time taken and total work done, rather than the reported "cpu usage".
-+
-+Symmetric MultiThreading (SMT) aware nice:
-+
-+SMT, a.k.a. hyperthreading, is a very common feature on modern CPUs. While the
-+logical CPU count rises by adding thread units to each CPU core, allowing more
-+than one task to be run simultaneously on the same core, the disadvantage of it
-+is that the CPU power is shared between the tasks, not summating to the power
-+of two CPUs. The practical upshot of this is that two tasks running on
-+separate threads of the same core run significantly slower than if they had one
-+core each to run on. While smart CPU selection allows each task to have a core
-+to itself whenever available (as is done on MuQSS), it cannot offset the
-+slowdown that occurs when the cores are all loaded and only a thread is left.
-+Most of the time this is harmless as the CPU is effectively overloaded at this
-+point and the extra thread is of benefit. However when running a niced task in
-+the presence of an un-niced task (say nice 19 v nice 0), the nice task gets
-+precisely the same amount of CPU power as the unniced one. MuQSS has an
-+optional configuration feature known as SMT-NICE which selectively idles the
-+secondary niced thread for a period proportional to the nice difference,
-+allowing CPU distribution according to nice level to be maintained, at the
-+expense of a small amount of extra overhead. If this is configured in on a
-+machine without SMT threads, the overhead is minimal.
-+
-+
-+Con Kolivas <kernel@kolivas.org> Sat, 29th October 2016
-diff --git a/Makefile b/Makefile
-index b668725a2a62..73a4381d3ea9 100644
---- a/Makefile
-+++ b/Makefile
-@@ -15,6 +15,10 @@ NAME = Kleptomaniac Octopus
- PHONY := _all
- _all:
- 
-+CKVERSION = -ck1
-+CKNAME = MuQSS Powered
-+EXTRAVERSION := $(EXTRAVERSION)$(CKVERSION)
-+
- # We are using a recursive build, so we need to do a little thinking
- # to get the ordering right.
- #
-diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
-index ef179033a7c2..14b576a531ad 100644
---- a/arch/alpha/Kconfig
-+++ b/arch/alpha/Kconfig
-@@ -665,6 +665,8 @@ config HZ
- 	default 1200 if HZ_1200
- 	default 1024
- 
-+source "kernel/Kconfig.MuQSS"
-+
- config SRM_ENV
- 	tristate "SRM environment through procfs"
- 	depends on PROC_FS
-diff --git a/arch/arc/configs/tb10x_defconfig b/arch/arc/configs/tb10x_defconfig
-index a12656ec0072..b46b6ddc7636 100644
---- a/arch/arc/configs/tb10x_defconfig
-+++ b/arch/arc/configs/tb10x_defconfig
-@@ -29,7 +29,7 @@ CONFIG_ARC_PLAT_TB10X=y
- CONFIG_ARC_CACHE_LINE_SHIFT=5
- CONFIG_HZ=250
- CONFIG_ARC_BUILTIN_DTB_NAME="abilis_tb100_dvk"
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- # CONFIG_COMPACTION is not set
- CONFIG_NET=y
- CONFIG_PACKET=y
-diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
-index c77c93c485a0..c16a89549ff2 100644
---- a/arch/arm/Kconfig
-+++ b/arch/arm/Kconfig
-@@ -1237,6 +1237,8 @@ config SCHED_SMT
- 	  MultiThreading at a cost of slightly increased overhead in some
- 	  places. If unsure say N here.
- 
-+source "kernel/Kconfig.MuQSS"
-+
- config HAVE_ARM_SCU
- 	bool
- 	help
-diff --git a/arch/arm/configs/bcm2835_defconfig b/arch/arm/configs/bcm2835_defconfig
-index 8e7a3ed2a4df..8a1ec6d2c3fb 100644
---- a/arch/arm/configs/bcm2835_defconfig
-+++ b/arch/arm/configs/bcm2835_defconfig
-@@ -29,7 +29,7 @@ CONFIG_MODULE_UNLOAD=y
- CONFIG_ARCH_MULTI_V6=y
- CONFIG_ARCH_BCM=y
- CONFIG_ARCH_BCM2835=y
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- CONFIG_AEABI=y
- CONFIG_KSM=y
- CONFIG_CLEANCACHE=y
-diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig
-index 5a20d12d62bd..fb76e6ff18a0 100644
---- a/arch/arm/configs/imx_v6_v7_defconfig
-+++ b/arch/arm/configs/imx_v6_v7_defconfig
-@@ -45,6 +45,7 @@ CONFIG_PCI_MSI=y
- CONFIG_PCI_IMX6=y
- CONFIG_SMP=y
- CONFIG_ARM_PSCI=y
-+CONFIG_PREEMPT=y
- CONFIG_HIGHMEM=y
- CONFIG_FORCE_MAX_ZONEORDER=14
- CONFIG_CMDLINE="noinitrd console=ttymxc0,115200"
-diff --git a/arch/arm/configs/mps2_defconfig b/arch/arm/configs/mps2_defconfig
-index 1d923dbb9928..9c1931f1fafd 100644
---- a/arch/arm/configs/mps2_defconfig
-+++ b/arch/arm/configs/mps2_defconfig
-@@ -18,7 +18,7 @@ CONFIG_ARCH_MPS2=y
- CONFIG_SET_MEM_PARAM=y
- CONFIG_DRAM_BASE=0x21000000
- CONFIG_DRAM_SIZE=0x1000000
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- # CONFIG_ATAGS is not set
- CONFIG_ZBOOT_ROM_TEXT=0x0
- CONFIG_ZBOOT_ROM_BSS=0x0
-diff --git a/arch/arm/configs/mxs_defconfig b/arch/arm/configs/mxs_defconfig
-index a9c6f32a9b1c..870866aaa39d 100644
---- a/arch/arm/configs/mxs_defconfig
-+++ b/arch/arm/configs/mxs_defconfig
-@@ -1,7 +1,7 @@
- CONFIG_SYSVIPC=y
- CONFIG_NO_HZ=y
- CONFIG_HIGH_RES_TIMERS=y
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT_VOLUNTARY=n
- CONFIG_TASKSTATS=y
- CONFIG_TASK_DELAY_ACCT=y
- CONFIG_TASK_XACCT=y
-@@ -25,6 +25,13 @@ CONFIG_MODULE_UNLOAD=y
- CONFIG_MODULE_FORCE_UNLOAD=y
- CONFIG_MODVERSIONS=y
- CONFIG_BLK_DEV_INTEGRITY=y
-+# CONFIG_IOSCHED_DEADLINE is not set
-+# CONFIG_IOSCHED_CFQ is not set
-+# CONFIG_ARCH_MULTI_V7 is not set
-+CONFIG_ARCH_MXS=y
-+# CONFIG_ARM_THUMB is not set
-+CONFIG_PREEMPT=y
-+CONFIG_AEABI=y
- CONFIG_NET=y
- CONFIG_PACKET=y
- CONFIG_UNIX=y
-diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
-index 5d513f461957..7cb8456280be 100644
---- a/arch/arm64/Kconfig
-+++ b/arch/arm64/Kconfig
-@@ -942,6 +942,8 @@ config SCHED_SMT
- 	  MultiThreading at a cost of slightly increased overhead in some
- 	  places. If unsure say N here.
- 
-+source "kernel/Kconfig.MuQSS"
-+
- config NR_CPUS
- 	int "Maximum number of CPUs (2-4096)"
- 	range 2 4096
-diff --git a/arch/mips/configs/fuloong2e_defconfig b/arch/mips/configs/fuloong2e_defconfig
-index 6466e83067b4..776d8783fc2a 100644
---- a/arch/mips/configs/fuloong2e_defconfig
-+++ b/arch/mips/configs/fuloong2e_defconfig
-@@ -4,7 +4,7 @@ CONFIG_SYSVIPC=y
- CONFIG_POSIX_MQUEUE=y
- CONFIG_NO_HZ=y
- CONFIG_HIGH_RES_TIMERS=y
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- CONFIG_BSD_PROCESS_ACCT=y
- CONFIG_IKCONFIG=y
- CONFIG_IKCONFIG_PROC=y
-diff --git a/arch/mips/configs/gpr_defconfig b/arch/mips/configs/gpr_defconfig
-index 9085f4d6c698..fb23111d45f6 100644
---- a/arch/mips/configs/gpr_defconfig
-+++ b/arch/mips/configs/gpr_defconfig
-@@ -1,8 +1,8 @@
-+CONFIG_PREEMPT=y
- # CONFIG_LOCALVERSION_AUTO is not set
- CONFIG_SYSVIPC=y
- CONFIG_POSIX_MQUEUE=y
- CONFIG_HIGH_RES_TIMERS=y
--CONFIG_PREEMPT_VOLUNTARY=y
- CONFIG_BSD_PROCESS_ACCT=y
- CONFIG_BSD_PROCESS_ACCT_V3=y
- CONFIG_RELAY=y
-diff --git a/arch/mips/configs/ip22_defconfig b/arch/mips/configs/ip22_defconfig
-index 21a1168ae301..529a1b1007cf 100644
---- a/arch/mips/configs/ip22_defconfig
-+++ b/arch/mips/configs/ip22_defconfig
-@@ -1,7 +1,7 @@
- CONFIG_SYSVIPC=y
- CONFIG_NO_HZ=y
- CONFIG_HIGH_RES_TIMERS=y
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- CONFIG_IKCONFIG=y
- CONFIG_IKCONFIG_PROC=y
- CONFIG_LOG_BUF_SHIFT=14
-diff --git a/arch/mips/configs/ip28_defconfig b/arch/mips/configs/ip28_defconfig
-index 0921ef38e9fb..6da05cef46f8 100644
---- a/arch/mips/configs/ip28_defconfig
-+++ b/arch/mips/configs/ip28_defconfig
-@@ -1,5 +1,5 @@
- CONFIG_SYSVIPC=y
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- CONFIG_IKCONFIG=y
- CONFIG_IKCONFIG_PROC=y
- CONFIG_LOG_BUF_SHIFT=14
-diff --git a/arch/mips/configs/jazz_defconfig b/arch/mips/configs/jazz_defconfig
-index 8c223035921f..a3bf87450343 100644
---- a/arch/mips/configs/jazz_defconfig
-+++ b/arch/mips/configs/jazz_defconfig
-@@ -1,8 +1,8 @@
-+CONFIG_PREEMPT=y
- CONFIG_SYSVIPC=y
- CONFIG_POSIX_MQUEUE=y
- CONFIG_NO_HZ=y
- CONFIG_HIGH_RES_TIMERS=y
--CONFIG_PREEMPT_VOLUNTARY=y
- CONFIG_BSD_PROCESS_ACCT=y
- CONFIG_LOG_BUF_SHIFT=14
- CONFIG_RELAY=y
-diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig
-index 914af125a7fa..76a64290373f 100644
---- a/arch/mips/configs/mtx1_defconfig
-+++ b/arch/mips/configs/mtx1_defconfig
-@@ -1,8 +1,8 @@
-+CONFIG_PREEMPT=y
- # CONFIG_LOCALVERSION_AUTO is not set
- CONFIG_SYSVIPC=y
- CONFIG_POSIX_MQUEUE=y
- CONFIG_AUDIT=y
--CONFIG_PREEMPT_VOLUNTARY=y
- CONFIG_BSD_PROCESS_ACCT=y
- CONFIG_BSD_PROCESS_ACCT_V3=y
- CONFIG_RELAY=y
-diff --git a/arch/mips/configs/nlm_xlr_defconfig b/arch/mips/configs/nlm_xlr_defconfig
-index 4ecb157e56d4..ea7309283b01 100644
---- a/arch/mips/configs/nlm_xlr_defconfig
-+++ b/arch/mips/configs/nlm_xlr_defconfig
-@@ -1,10 +1,10 @@
-+CONFIG_PREEMPT=y
- # CONFIG_LOCALVERSION_AUTO is not set
- CONFIG_SYSVIPC=y
- CONFIG_POSIX_MQUEUE=y
- CONFIG_AUDIT=y
- CONFIG_NO_HZ=y
- CONFIG_HIGH_RES_TIMERS=y
--CONFIG_PREEMPT_VOLUNTARY=y
- CONFIG_BSD_PROCESS_ACCT=y
- CONFIG_BSD_PROCESS_ACCT_V3=y
- CONFIG_TASKSTATS=y
-diff --git a/arch/mips/configs/pic32mzda_defconfig b/arch/mips/configs/pic32mzda_defconfig
-index 63fe2da1b37f..7f08ee237345 100644
---- a/arch/mips/configs/pic32mzda_defconfig
-+++ b/arch/mips/configs/pic32mzda_defconfig
-@@ -1,7 +1,7 @@
-+CONFIG_PREEMPT=y
- CONFIG_SYSVIPC=y
- CONFIG_NO_HZ=y
- CONFIG_HIGH_RES_TIMERS=y
--CONFIG_PREEMPT_VOLUNTARY=y
- CONFIG_IKCONFIG=y
- CONFIG_IKCONFIG_PROC=y
- CONFIG_LOG_BUF_SHIFT=14
-diff --git a/arch/mips/configs/pistachio_defconfig b/arch/mips/configs/pistachio_defconfig
-index 24e07180c57d..38582e8f71c4 100644
---- a/arch/mips/configs/pistachio_defconfig
-+++ b/arch/mips/configs/pistachio_defconfig
-@@ -1,9 +1,9 @@
-+CONFIG_PREEMPT=y
- # CONFIG_LOCALVERSION_AUTO is not set
- CONFIG_DEFAULT_HOSTNAME="localhost"
- CONFIG_SYSVIPC=y
- CONFIG_NO_HZ=y
- CONFIG_HIGH_RES_TIMERS=y
--CONFIG_PREEMPT_VOLUNTARY=y
- CONFIG_IKCONFIG=m
- CONFIG_IKCONFIG_PROC=y
- CONFIG_LOG_BUF_SHIFT=18
-diff --git a/arch/mips/configs/pnx8335_stb225_defconfig b/arch/mips/configs/pnx8335_stb225_defconfig
-index d06db6b87959..fb2cd3234d95 100644
---- a/arch/mips/configs/pnx8335_stb225_defconfig
-+++ b/arch/mips/configs/pnx8335_stb225_defconfig
-@@ -1,9 +1,9 @@
-+CONFIG_PREEMPT=y
- # CONFIG_LOCALVERSION_AUTO is not set
- # CONFIG_SWAP is not set
- CONFIG_SYSVIPC=y
- CONFIG_NO_HZ=y
- CONFIG_HIGH_RES_TIMERS=y
--CONFIG_PREEMPT_VOLUNTARY=y
- CONFIG_LOG_BUF_SHIFT=14
- CONFIG_EXPERT=y
- CONFIG_SLAB=y
-diff --git a/arch/mips/configs/rm200_defconfig b/arch/mips/configs/rm200_defconfig
-index 30d7c3db884e..9e68acfa0d0e 100644
---- a/arch/mips/configs/rm200_defconfig
-+++ b/arch/mips/configs/rm200_defconfig
-@@ -1,6 +1,6 @@
-+CONFIG_PREEMPT=y
- CONFIG_SYSVIPC=y
- CONFIG_POSIX_MQUEUE=y
--CONFIG_PREEMPT_VOLUNTARY=y
- CONFIG_BSD_PROCESS_ACCT=y
- CONFIG_IKCONFIG=y
- CONFIG_IKCONFIG_PROC=y
-diff --git a/arch/parisc/configs/712_defconfig b/arch/parisc/configs/712_defconfig
-new file mode 100644
-index 000000000000..578524f80cc4
---- /dev/null
-+++ b/arch/parisc/configs/712_defconfig
-@@ -0,0 +1,181 @@
-+# CONFIG_LOCALVERSION_AUTO is not set
-+CONFIG_SYSVIPC=y
-+CONFIG_POSIX_MQUEUE=y
-+CONFIG_IKCONFIG=y
-+CONFIG_IKCONFIG_PROC=y
-+CONFIG_LOG_BUF_SHIFT=16
-+CONFIG_BLK_DEV_INITRD=y
-+CONFIG_KALLSYMS_ALL=y
-+CONFIG_SLAB=y
-+CONFIG_PROFILING=y
-+CONFIG_OPROFILE=m
-+CONFIG_MODULES=y
-+CONFIG_MODULE_UNLOAD=y
-+CONFIG_MODULE_FORCE_UNLOAD=y
-+CONFIG_PA7100LC=y
-+CONFIG_PREEMPT=y
-+CONFIG_GSC_LASI=y
-+# CONFIG_PDC_CHASSIS is not set
-+CONFIG_BINFMT_MISC=m
-+CONFIG_NET=y
-+CONFIG_PACKET=y
-+CONFIG_UNIX=y
-+CONFIG_XFRM_USER=m
-+CONFIG_NET_KEY=m
-+CONFIG_INET=y
-+CONFIG_IP_MULTICAST=y
-+CONFIG_IP_PNP=y
-+CONFIG_IP_PNP_DHCP=y
-+CONFIG_IP_PNP_BOOTP=y
-+CONFIG_INET_AH=m
-+CONFIG_INET_ESP=m
-+CONFIG_INET_DIAG=m
-+# CONFIG_IPV6 is not set
-+CONFIG_NETFILTER=y
-+CONFIG_LLC2=m
-+CONFIG_NET_PKTGEN=m
-+CONFIG_DEVTMPFS=y
-+CONFIG_DEVTMPFS_MOUNT=y
-+# CONFIG_STANDALONE is not set
-+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
-+CONFIG_PARPORT=y
-+CONFIG_PARPORT_PC=m
-+CONFIG_BLK_DEV_LOOP=y
-+CONFIG_BLK_DEV_CRYPTOLOOP=y
-+CONFIG_BLK_DEV_RAM=y
-+CONFIG_BLK_DEV_RAM_SIZE=6144
-+CONFIG_ATA_OVER_ETH=m
-+CONFIG_SCSI=y
-+CONFIG_BLK_DEV_SD=y
-+CONFIG_CHR_DEV_ST=y
-+CONFIG_BLK_DEV_SR=y
-+CONFIG_CHR_DEV_SG=y
-+CONFIG_SCSI_ISCSI_ATTRS=m
-+CONFIG_SCSI_LASI700=y
-+CONFIG_SCSI_DEBUG=m
-+CONFIG_MD=y
-+CONFIG_BLK_DEV_MD=m
-+CONFIG_MD_LINEAR=m
-+CONFIG_MD_RAID0=m
-+CONFIG_MD_RAID1=m
-+CONFIG_NETDEVICES=y
-+CONFIG_BONDING=m
-+CONFIG_DUMMY=m
-+CONFIG_TUN=m
-+CONFIG_LASI_82596=y
-+CONFIG_PPP=m
-+CONFIG_PPP_BSDCOMP=m
-+CONFIG_PPP_DEFLATE=m
-+CONFIG_PPP_MPPE=m
-+CONFIG_PPPOE=m
-+CONFIG_PPP_ASYNC=m
-+CONFIG_PPP_SYNC_TTY=m
-+# CONFIG_KEYBOARD_HIL_OLD is not set
-+CONFIG_MOUSE_SERIAL=m
-+CONFIG_LEGACY_PTY_COUNT=64
-+CONFIG_SERIAL_8250=y
-+CONFIG_SERIAL_8250_CONSOLE=y
-+CONFIG_SERIAL_8250_NR_UARTS=17
-+CONFIG_SERIAL_8250_EXTENDED=y
-+CONFIG_SERIAL_8250_MANY_PORTS=y
-+CONFIG_SERIAL_8250_SHARE_IRQ=y
-+# CONFIG_SERIAL_MUX is not set
-+CONFIG_PDC_CONSOLE=y
-+CONFIG_PRINTER=m
-+CONFIG_PPDEV=m
-+# CONFIG_HW_RANDOM is not set
-+CONFIG_RAW_DRIVER=y
-+# CONFIG_HWMON is not set
-+CONFIG_FB=y
-+CONFIG_FB_MODE_HELPERS=y
-+CONFIG_FB_TILEBLITTING=y
-+CONFIG_DUMMY_CONSOLE_COLUMNS=128
-+CONFIG_DUMMY_CONSOLE_ROWS=48
-+CONFIG_FRAMEBUFFER_CONSOLE=y
-+CONFIG_LOGO=y
-+# CONFIG_LOGO_LINUX_MONO is not set
-+# CONFIG_LOGO_LINUX_VGA16 is not set
-+# CONFIG_LOGO_LINUX_CLUT224 is not set
-+CONFIG_SOUND=y
-+CONFIG_SND=y
-+CONFIG_SND_SEQUENCER=y
-+CONFIG_SND_HARMONY=y
-+CONFIG_EXT2_FS=y
-+CONFIG_EXT3_FS=y
-+CONFIG_JFS_FS=m
-+CONFIG_XFS_FS=m
-+CONFIG_AUTOFS4_FS=y
-+CONFIG_ISO9660_FS=y
-+CONFIG_JOLIET=y
-+CONFIG_UDF_FS=m
-+CONFIG_MSDOS_FS=m
-+CONFIG_VFAT_FS=m
-+CONFIG_PROC_KCORE=y
-+CONFIG_TMPFS=y
-+CONFIG_UFS_FS=m
-+CONFIG_NFS_FS=y
-+CONFIG_NFS_V4=y
-+CONFIG_ROOT_NFS=y
-+CONFIG_NFSD=m
-+CONFIG_NFSD_V4=y
-+CONFIG_CIFS=m
-+CONFIG_NLS_CODEPAGE_437=m
-+CONFIG_NLS_CODEPAGE_737=m
-+CONFIG_NLS_CODEPAGE_775=m
-+CONFIG_NLS_CODEPAGE_850=m
-+CONFIG_NLS_CODEPAGE_852=m
-+CONFIG_NLS_CODEPAGE_855=m
-+CONFIG_NLS_CODEPAGE_857=m
-+CONFIG_NLS_CODEPAGE_860=m
-+CONFIG_NLS_CODEPAGE_861=m
-+CONFIG_NLS_CODEPAGE_862=m
-+CONFIG_NLS_CODEPAGE_863=m
-+CONFIG_NLS_CODEPAGE_864=m
-+CONFIG_NLS_CODEPAGE_865=m
-+CONFIG_NLS_CODEPAGE_866=m
-+CONFIG_NLS_CODEPAGE_869=m
-+CONFIG_NLS_CODEPAGE_936=m
-+CONFIG_NLS_CODEPAGE_950=m
-+CONFIG_NLS_CODEPAGE_932=m
-+CONFIG_NLS_CODEPAGE_949=m
-+CONFIG_NLS_CODEPAGE_874=m
-+CONFIG_NLS_ISO8859_8=m
-+CONFIG_NLS_CODEPAGE_1250=m
-+CONFIG_NLS_CODEPAGE_1251=m
-+CONFIG_NLS_ASCII=m
-+CONFIG_NLS_ISO8859_1=m
-+CONFIG_NLS_ISO8859_2=m
-+CONFIG_NLS_ISO8859_3=m
-+CONFIG_NLS_ISO8859_4=m
-+CONFIG_NLS_ISO8859_5=m
-+CONFIG_NLS_ISO8859_6=m
-+CONFIG_NLS_ISO8859_7=m
-+CONFIG_NLS_ISO8859_9=m
-+CONFIG_NLS_ISO8859_13=m
-+CONFIG_NLS_ISO8859_14=m
-+CONFIG_NLS_ISO8859_15=m
-+CONFIG_NLS_KOI8_R=m
-+CONFIG_NLS_KOI8_U=m
-+CONFIG_NLS_UTF8=m
-+CONFIG_DEBUG_FS=y
-+CONFIG_MAGIC_SYSRQ=y
-+CONFIG_DEBUG_KERNEL=y
-+CONFIG_DEBUG_MUTEXES=y
-+CONFIG_CRYPTO_TEST=m
-+CONFIG_CRYPTO_HMAC=y
-+CONFIG_CRYPTO_MICHAEL_MIC=m
-+CONFIG_CRYPTO_SHA512=m
-+CONFIG_CRYPTO_TGR192=m
-+CONFIG_CRYPTO_WP512=m
-+CONFIG_CRYPTO_ANUBIS=m
-+CONFIG_CRYPTO_BLOWFISH=m
-+CONFIG_CRYPTO_CAST6=m
-+CONFIG_CRYPTO_KHAZAD=m
-+CONFIG_CRYPTO_SERPENT=m
-+CONFIG_CRYPTO_TEA=m
-+CONFIG_CRYPTO_TWOFISH=m
-+CONFIG_CRYPTO_DEFLATE=m
-+# CONFIG_CRYPTO_HW is not set
-+CONFIG_FONTS=y
-+CONFIG_FONT_8x8=y
-+CONFIG_FONT_8x16=y
-diff --git a/arch/parisc/configs/c3000_defconfig b/arch/parisc/configs/c3000_defconfig
-new file mode 100644
-index 000000000000..d1bdfad94048
---- /dev/null
-+++ b/arch/parisc/configs/c3000_defconfig
-@@ -0,0 +1,151 @@
-+# CONFIG_LOCALVERSION_AUTO is not set
-+CONFIG_SYSVIPC=y
-+CONFIG_IKCONFIG=y
-+CONFIG_IKCONFIG_PROC=y
-+CONFIG_LOG_BUF_SHIFT=16
-+CONFIG_BLK_DEV_INITRD=y
-+CONFIG_EXPERT=y
-+CONFIG_KALLSYMS_ALL=y
-+CONFIG_SLAB=y
-+CONFIG_PROFILING=y
-+CONFIG_OPROFILE=m
-+CONFIG_MODULES=y
-+CONFIG_MODULE_UNLOAD=y
-+CONFIG_MODULE_FORCE_UNLOAD=y
-+CONFIG_PA8X00=y
-+CONFIG_PREEMPT=y
-+# CONFIG_GSC is not set
-+CONFIG_PCI=y
-+CONFIG_PCI_LBA=y
-+# CONFIG_PDC_CHASSIS is not set
-+CONFIG_NET=y
-+CONFIG_PACKET=y
-+CONFIG_UNIX=y
-+CONFIG_XFRM_USER=m
-+CONFIG_NET_KEY=m
-+CONFIG_INET=y
-+CONFIG_IP_MULTICAST=y
-+CONFIG_IP_PNP=y
-+CONFIG_IP_PNP_BOOTP=y
-+# CONFIG_INET_DIAG is not set
-+CONFIG_INET6_IPCOMP=m
-+CONFIG_IPV6_TUNNEL=m
-+CONFIG_NETFILTER=y
-+CONFIG_NET_PKTGEN=m
-+CONFIG_DEVTMPFS=y
-+CONFIG_DEVTMPFS_MOUNT=y
-+# CONFIG_STANDALONE is not set
-+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
-+CONFIG_BLK_DEV_UMEM=m
-+CONFIG_BLK_DEV_LOOP=y
-+CONFIG_BLK_DEV_CRYPTOLOOP=m
-+CONFIG_IDE=y
-+CONFIG_BLK_DEV_IDECD=y
-+CONFIG_BLK_DEV_NS87415=y
-+CONFIG_SCSI=y
-+CONFIG_BLK_DEV_SD=y
-+CONFIG_CHR_DEV_ST=y
-+CONFIG_BLK_DEV_SR=y
-+CONFIG_CHR_DEV_SG=y
-+CONFIG_SCSI_ISCSI_ATTRS=m
-+CONFIG_SCSI_SYM53C8XX_2=y
-+CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0
-+CONFIG_SCSI_DEBUG=m
-+CONFIG_MD=y
-+CONFIG_BLK_DEV_MD=y
-+CONFIG_MD_LINEAR=y
-+CONFIG_MD_RAID0=y
-+CONFIG_MD_RAID1=y
-+CONFIG_BLK_DEV_DM=m
-+CONFIG_DM_CRYPT=m
-+CONFIG_DM_SNAPSHOT=m
-+CONFIG_DM_MIRROR=m
-+CONFIG_DM_ZERO=m
-+CONFIG_DM_MULTIPATH=m
-+CONFIG_FUSION=y
-+CONFIG_FUSION_SPI=m
-+CONFIG_FUSION_CTL=m
-+CONFIG_NETDEVICES=y
-+CONFIG_BONDING=m
-+CONFIG_DUMMY=m
-+CONFIG_TUN=m
-+CONFIG_ACENIC=m
-+CONFIG_TIGON3=m
-+CONFIG_NET_TULIP=y
-+CONFIG_DE2104X=m
-+CONFIG_TULIP=y
-+CONFIG_TULIP_MMIO=y
-+CONFIG_E100=m
-+CONFIG_E1000=m
-+CONFIG_PPP=m
-+CONFIG_PPP_BSDCOMP=m
-+CONFIG_PPP_DEFLATE=m
-+CONFIG_PPPOE=m
-+CONFIG_PPP_ASYNC=m
-+CONFIG_PPP_SYNC_TTY=m
-+# CONFIG_KEYBOARD_ATKBD is not set
-+# CONFIG_MOUSE_PS2 is not set
-+CONFIG_SERIO=m
-+CONFIG_SERIO_LIBPS2=m
-+CONFIG_SERIAL_8250=y
-+CONFIG_SERIAL_8250_CONSOLE=y
-+CONFIG_SERIAL_8250_NR_UARTS=13
-+CONFIG_SERIAL_8250_EXTENDED=y
-+CONFIG_SERIAL_8250_MANY_PORTS=y
-+CONFIG_SERIAL_8250_SHARE_IRQ=y
-+# CONFIG_HW_RANDOM is not set
-+CONFIG_RAW_DRIVER=y
-+# CONFIG_HWMON is not set
-+CONFIG_FB=y
-+CONFIG_FRAMEBUFFER_CONSOLE=y
-+CONFIG_LOGO=y
-+# CONFIG_LOGO_LINUX_MONO is not set
-+# CONFIG_LOGO_LINUX_VGA16 is not set
-+# CONFIG_LOGO_LINUX_CLUT224 is not set
-+CONFIG_SOUND=y
-+CONFIG_SND=y
-+CONFIG_SND_SEQUENCER=y
-+CONFIG_SND_AD1889=y
-+CONFIG_USB_HIDDEV=y
-+CONFIG_USB=y
-+CONFIG_USB_OHCI_HCD=y
-+CONFIG_USB_PRINTER=m
-+CONFIG_USB_STORAGE=m
-+CONFIG_USB_STORAGE_USBAT=m
-+CONFIG_USB_STORAGE_SDDR09=m
-+CONFIG_USB_STORAGE_SDDR55=m
-+CONFIG_USB_STORAGE_JUMPSHOT=m
-+CONFIG_USB_MDC800=m
-+CONFIG_USB_MICROTEK=m
-+CONFIG_USB_LEGOTOWER=m
-+CONFIG_EXT2_FS=y
-+CONFIG_EXT3_FS=y
-+CONFIG_XFS_FS=m
-+CONFIG_AUTOFS4_FS=y
-+CONFIG_ISO9660_FS=y
-+CONFIG_JOLIET=y
-+CONFIG_MSDOS_FS=m
-+CONFIG_VFAT_FS=m
-+CONFIG_PROC_KCORE=y
-+CONFIG_TMPFS=y
-+CONFIG_NFS_FS=y
-+CONFIG_ROOT_NFS=y
-+CONFIG_NFSD=y
-+CONFIG_NFSD_V3=y
-+CONFIG_NLS_CODEPAGE_437=m
-+CONFIG_NLS_CODEPAGE_850=m
-+CONFIG_NLS_ASCII=m
-+CONFIG_NLS_ISO8859_1=m
-+CONFIG_NLS_ISO8859_15=m
-+CONFIG_NLS_UTF8=m
-+CONFIG_DEBUG_FS=y
-+CONFIG_HEADERS_INSTALL=y
-+CONFIG_HEADERS_CHECK=y
-+CONFIG_MAGIC_SYSRQ=y
-+CONFIG_DEBUG_MUTEXES=y
-+# CONFIG_DEBUG_BUGVERBOSE is not set
-+CONFIG_CRYPTO_TEST=m
-+CONFIG_CRYPTO_MD5=m
-+CONFIG_CRYPTO_BLOWFISH=m
-+CONFIG_CRYPTO_DES=m
-+# CONFIG_CRYPTO_HW is not set
-diff --git a/arch/parisc/configs/defconfig b/arch/parisc/configs/defconfig
-new file mode 100644
-index 000000000000..0d976614934c
---- /dev/null
-+++ b/arch/parisc/configs/defconfig
-@@ -0,0 +1,206 @@
-+# CONFIG_LOCALVERSION_AUTO is not set
-+CONFIG_SYSVIPC=y
-+CONFIG_POSIX_MQUEUE=y
-+CONFIG_IKCONFIG=y
-+CONFIG_IKCONFIG_PROC=y
-+CONFIG_LOG_BUF_SHIFT=16
-+CONFIG_BLK_DEV_INITRD=y
-+CONFIG_KALLSYMS_ALL=y
-+CONFIG_SLAB=y
-+CONFIG_PROFILING=y
-+CONFIG_OPROFILE=m
-+CONFIG_MODULES=y
-+CONFIG_MODULE_UNLOAD=y
-+CONFIG_MODULE_FORCE_UNLOAD=y
-+# CONFIG_BLK_DEV_BSG is not set
-+CONFIG_PA7100LC=y
-+CONFIG_PREEMPT=y
-+CONFIG_IOMMU_CCIO=y
-+CONFIG_GSC_LASI=y
-+CONFIG_GSC_WAX=y
-+CONFIG_EISA=y
-+CONFIG_PCI=y
-+CONFIG_GSC_DINO=y
-+CONFIG_PCI_LBA=y
-+CONFIG_PCCARD=y
-+CONFIG_YENTA=y
-+CONFIG_PD6729=y
-+CONFIG_I82092=y
-+CONFIG_BINFMT_MISC=m
-+CONFIG_NET=y
-+CONFIG_PACKET=y
-+CONFIG_UNIX=y
-+CONFIG_XFRM_USER=m
-+CONFIG_NET_KEY=m
-+CONFIG_INET=y
-+CONFIG_IP_MULTICAST=y
-+CONFIG_IP_PNP=y
-+CONFIG_IP_PNP_DHCP=y
-+CONFIG_IP_PNP_BOOTP=y
-+CONFIG_INET_AH=m
-+CONFIG_INET_ESP=m
-+CONFIG_INET_DIAG=m
-+CONFIG_INET6_AH=y
-+CONFIG_INET6_ESP=y
-+CONFIG_INET6_IPCOMP=y
-+CONFIG_LLC2=m
-+CONFIG_DEVTMPFS=y
-+CONFIG_DEVTMPFS_MOUNT=y
-+# CONFIG_STANDALONE is not set
-+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
-+CONFIG_PARPORT=y
-+CONFIG_PARPORT_PC=m
-+CONFIG_PARPORT_PC_PCMCIA=m
-+CONFIG_PARPORT_1284=y
-+CONFIG_BLK_DEV_LOOP=y
-+CONFIG_BLK_DEV_CRYPTOLOOP=y
-+CONFIG_BLK_DEV_RAM=y
-+CONFIG_BLK_DEV_RAM_SIZE=6144
-+CONFIG_IDE=y
-+CONFIG_BLK_DEV_IDECS=y
-+CONFIG_BLK_DEV_IDECD=y
-+CONFIG_BLK_DEV_GENERIC=y
-+CONFIG_BLK_DEV_NS87415=y
-+CONFIG_SCSI=y
-+CONFIG_BLK_DEV_SD=y
-+CONFIG_CHR_DEV_ST=y
-+CONFIG_BLK_DEV_SR=y
-+CONFIG_CHR_DEV_SG=y
-+CONFIG_SCSI_LASI700=y
-+CONFIG_SCSI_SYM53C8XX_2=y
-+CONFIG_SCSI_ZALON=y
-+CONFIG_MD=y
-+CONFIG_BLK_DEV_MD=y
-+CONFIG_MD_LINEAR=y
-+CONFIG_MD_RAID0=y
-+CONFIG_MD_RAID1=y
-+CONFIG_MD_RAID10=y
-+CONFIG_BLK_DEV_DM=y
-+CONFIG_NETDEVICES=y
-+CONFIG_BONDING=m
-+CONFIG_DUMMY=m
-+CONFIG_TUN=m
-+CONFIG_ACENIC=y
-+CONFIG_TIGON3=y
-+CONFIG_NET_TULIP=y
-+CONFIG_TULIP=y
-+CONFIG_LASI_82596=y
-+CONFIG_PPP=m
-+CONFIG_PPP_BSDCOMP=m
-+CONFIG_PPP_DEFLATE=m
-+CONFIG_PPPOE=m
-+CONFIG_PPP_ASYNC=m
-+CONFIG_PPP_SYNC_TTY=m
-+# CONFIG_KEYBOARD_HIL_OLD is not set
-+CONFIG_MOUSE_SERIAL=y
-+CONFIG_LEGACY_PTY_COUNT=64
-+CONFIG_SERIAL_8250=y
-+CONFIG_SERIAL_8250_CONSOLE=y
-+CONFIG_SERIAL_8250_CS=y
-+CONFIG_SERIAL_8250_NR_UARTS=17
-+CONFIG_SERIAL_8250_EXTENDED=y
-+CONFIG_SERIAL_8250_MANY_PORTS=y
-+CONFIG_SERIAL_8250_SHARE_IRQ=y
-+CONFIG_PRINTER=m
-+CONFIG_PPDEV=m
-+# CONFIG_HW_RANDOM is not set
-+# CONFIG_HWMON is not set
-+CONFIG_FB=y
-+CONFIG_FB_MODE_HELPERS=y
-+CONFIG_FB_TILEBLITTING=y
-+CONFIG_DUMMY_CONSOLE_COLUMNS=128
-+CONFIG_DUMMY_CONSOLE_ROWS=48
-+CONFIG_FRAMEBUFFER_CONSOLE=y
-+CONFIG_LOGO=y
-+# CONFIG_LOGO_LINUX_MONO is not set
-+# CONFIG_LOGO_LINUX_VGA16 is not set
-+# CONFIG_LOGO_LINUX_CLUT224 is not set
-+CONFIG_SOUND=y
-+CONFIG_SND=y
-+CONFIG_SND_DYNAMIC_MINORS=y
-+CONFIG_SND_SEQUENCER=y
-+CONFIG_SND_AD1889=y
-+CONFIG_SND_HARMONY=y
-+CONFIG_HID_GYRATION=y
-+CONFIG_HID_NTRIG=y
-+CONFIG_HID_PANTHERLORD=y
-+CONFIG_HID_PETALYNX=y
-+CONFIG_HID_SAMSUNG=y
-+CONFIG_HID_SUNPLUS=y
-+CONFIG_HID_TOPSEED=y
-+CONFIG_USB=y
-+CONFIG_USB_MON=y
-+CONFIG_USB_OHCI_HCD=y
-+CONFIG_USB_UHCI_HCD=y
-+CONFIG_EXT2_FS=y
-+CONFIG_EXT3_FS=y
-+CONFIG_ISO9660_FS=y
-+CONFIG_JOLIET=y
-+CONFIG_VFAT_FS=y
-+CONFIG_PROC_KCORE=y
-+CONFIG_TMPFS=y
-+CONFIG_NFS_FS=y
-+CONFIG_ROOT_NFS=y
-+CONFIG_NFSD=y
-+CONFIG_NFSD_V4=y
-+CONFIG_CIFS=m
-+CONFIG_NLS_CODEPAGE_437=y
-+CONFIG_NLS_CODEPAGE_737=m
-+CONFIG_NLS_CODEPAGE_775=m
-+CONFIG_NLS_CODEPAGE_850=m
-+CONFIG_NLS_CODEPAGE_852=m
-+CONFIG_NLS_CODEPAGE_855=m
-+CONFIG_NLS_CODEPAGE_857=m
-+CONFIG_NLS_CODEPAGE_860=m
-+CONFIG_NLS_CODEPAGE_861=m
-+CONFIG_NLS_CODEPAGE_862=m
-+CONFIG_NLS_CODEPAGE_863=m
-+CONFIG_NLS_CODEPAGE_864=m
-+CONFIG_NLS_CODEPAGE_865=m
-+CONFIG_NLS_CODEPAGE_866=m
-+CONFIG_NLS_CODEPAGE_869=m
-+CONFIG_NLS_CODEPAGE_936=m
-+CONFIG_NLS_CODEPAGE_950=m
-+CONFIG_NLS_CODEPAGE_932=m
-+CONFIG_NLS_CODEPAGE_949=m
-+CONFIG_NLS_CODEPAGE_874=m
-+CONFIG_NLS_ISO8859_8=m
-+CONFIG_NLS_CODEPAGE_1250=y
-+CONFIG_NLS_CODEPAGE_1251=m
-+CONFIG_NLS_ASCII=m
-+CONFIG_NLS_ISO8859_1=y
-+CONFIG_NLS_ISO8859_2=m
-+CONFIG_NLS_ISO8859_3=m
-+CONFIG_NLS_ISO8859_4=m
-+CONFIG_NLS_ISO8859_5=m
-+CONFIG_NLS_ISO8859_6=m
-+CONFIG_NLS_ISO8859_7=m
-+CONFIG_NLS_ISO8859_9=m
-+CONFIG_NLS_ISO8859_13=m
-+CONFIG_NLS_ISO8859_14=m
-+CONFIG_NLS_ISO8859_15=m
-+CONFIG_NLS_KOI8_R=m
-+CONFIG_NLS_KOI8_U=m
-+CONFIG_NLS_UTF8=y
-+CONFIG_DEBUG_FS=y
-+CONFIG_HEADERS_INSTALL=y
-+CONFIG_HEADERS_CHECK=y
-+CONFIG_MAGIC_SYSRQ=y
-+CONFIG_DEBUG_KERNEL=y
-+CONFIG_DEBUG_MUTEXES=y
-+CONFIG_KEYS=y
-+CONFIG_CRYPTO_TEST=m
-+CONFIG_CRYPTO_MICHAEL_MIC=m
-+CONFIG_CRYPTO_SHA512=m
-+CONFIG_CRYPTO_TGR192=m
-+CONFIG_CRYPTO_WP512=m
-+CONFIG_CRYPTO_ANUBIS=m
-+CONFIG_CRYPTO_BLOWFISH=m
-+CONFIG_CRYPTO_CAST6=m
-+CONFIG_CRYPTO_KHAZAD=m
-+CONFIG_CRYPTO_SERPENT=m
-+CONFIG_CRYPTO_TEA=m
-+CONFIG_CRYPTO_TWOFISH=m
-+# CONFIG_CRYPTO_HW is not set
-+CONFIG_LIBCRC32C=m
-+CONFIG_FONTS=y
-diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
-index b29d7cb38368..3af947541fdc 100644
---- a/arch/powerpc/Kconfig
-+++ b/arch/powerpc/Kconfig
-@@ -879,6 +879,8 @@ config SCHED_SMT
- 	  when dealing with POWER5 cpus at a cost of slightly increased
- 	  overhead in some places. If unsure say N here.
- 
-+source "kernel/Kconfig.MuQSS"
-+
- config PPC_DENORMALISATION
- 	bool "PowerPC denormalisation exception handling"
- 	depends on PPC_BOOK3S_64
-diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
-index feb5d47d8d1e..6ce1ce306381 100644
---- a/arch/powerpc/configs/ppc6xx_defconfig
-+++ b/arch/powerpc/configs/ppc6xx_defconfig
-@@ -74,7 +74,7 @@ CONFIG_QE_GPIO=y
- CONFIG_MCU_MPC8349EMITX=y
- CONFIG_HIGHMEM=y
- CONFIG_HZ_1000=y
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- CONFIG_BINFMT_MISC=y
- CONFIG_HIBERNATION=y
- CONFIG_PM_DEBUG=y
-diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
-index f18d5067cd0f..fe489fc01c73 100644
---- a/arch/powerpc/platforms/cell/spufs/sched.c
-+++ b/arch/powerpc/platforms/cell/spufs/sched.c
-@@ -51,11 +51,6 @@ static struct task_struct *spusched_task;
- static struct timer_list spusched_timer;
- static struct timer_list spuloadavg_timer;
- 
--/*
-- * Priority of a normal, non-rt, non-niced'd process (aka nice level 0).
-- */
--#define NORMAL_PRIO		120
--
- /*
-  * Frequency of the spu scheduler tick.  By default we do one SPU scheduler
-  * tick for every 10 CPU scheduler ticks.
-diff --git a/arch/sh/configs/se7712_defconfig b/arch/sh/configs/se7712_defconfig
-index 9a527f978106..5895f2cc726e 100644
---- a/arch/sh/configs/se7712_defconfig
-+++ b/arch/sh/configs/se7712_defconfig
-@@ -23,7 +23,7 @@ CONFIG_FLATMEM_MANUAL=y
- CONFIG_SH_SOLUTION_ENGINE=y
- CONFIG_SH_PCLK_FREQ=66666666
- CONFIG_HEARTBEAT=y
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- CONFIG_CMDLINE_OVERWRITE=y
- CONFIG_CMDLINE="console=ttySC0,115200 root=/dev/sda1"
- CONFIG_NET=y
-diff --git a/arch/sh/configs/se7721_defconfig b/arch/sh/configs/se7721_defconfig
-index 3b0e1eb6e874..e296a2cd9903 100644
---- a/arch/sh/configs/se7721_defconfig
-+++ b/arch/sh/configs/se7721_defconfig
-@@ -23,7 +23,7 @@ CONFIG_FLATMEM_MANUAL=y
- CONFIG_SH_7721_SOLUTION_ENGINE=y
- CONFIG_SH_PCLK_FREQ=33333333
- CONFIG_HEARTBEAT=y
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- CONFIG_CMDLINE_OVERWRITE=y
- CONFIG_CMDLINE="console=ttySC0,115200 root=/dev/sda2"
- CONFIG_NET=y
-diff --git a/arch/sh/configs/titan_defconfig b/arch/sh/configs/titan_defconfig
-index 4ec961ace688..a03a1ad670a0 100644
---- a/arch/sh/configs/titan_defconfig
-+++ b/arch/sh/configs/titan_defconfig
-@@ -20,7 +20,7 @@ CONFIG_SH_TITAN=y
- CONFIG_SH_PCLK_FREQ=30000000
- CONFIG_SH_DMA=y
- CONFIG_SH_DMA_API=y
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- CONFIG_CMDLINE_OVERWRITE=y
- CONFIG_CMDLINE="console=ttySC1,38400N81 root=/dev/nfs ip=:::::eth1:autoconf rw"
- CONFIG_PCI=y
-diff --git a/arch/sparc/configs/sparc64_defconfig b/arch/sparc/configs/sparc64_defconfig
-index bde4d21a8ac8..c054ec82d91b 100644
---- a/arch/sparc/configs/sparc64_defconfig
-+++ b/arch/sparc/configs/sparc64_defconfig
-@@ -22,7 +22,7 @@ CONFIG_NO_HZ=y
- CONFIG_HIGH_RES_TIMERS=y
- CONFIG_NUMA=y
- CONFIG_DEFAULT_MMAP_MIN_ADDR=8192
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- CONFIG_SUN_LDOMS=y
- CONFIG_PCI=y
- CONFIG_PCI_MSI=y
-diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
-index 2d3f963fd6f1..4df276a5781b 100644
---- a/arch/x86/Kconfig
-+++ b/arch/x86/Kconfig
-@@ -1006,6 +1006,22 @@ config NR_CPUS
- config SCHED_SMT
- 	def_bool y if SMP
- 
-+config SMT_NICE
-+	bool "SMT (Hyperthreading) aware nice priority and policy support"
-+	depends on SCHED_MUQSS && SCHED_SMT
-+	default y
-+	---help---
-+	  Enabling Hyperthreading on Intel CPUs decreases the effectiveness
-+	  of the use of 'nice' levels and different scheduling policies
-+	  (e.g. realtime) due to sharing of CPU power between hyperthreads.
-+	  SMT nice support makes each logical CPU aware of what is running on
-+	  its hyperthread siblings, maintaining appropriate distribution of
-+	  CPU according to nice levels and scheduling policies at the expense
-+	  of slightly increased overhead.
-+
-+	  If unsure say Y here.
-+
-+
- config SCHED_MC
- 	def_bool y
- 	prompt "Multi-core scheduler support"
-@@ -1036,6 +1052,8 @@ config SCHED_MC_PRIO
- 
- 	  If unsure say Y here.
- 
-+source "kernel/Kconfig.MuQSS"
-+
- config UP_LATE_INIT
- 	def_bool y
- 	depends on !SMP && X86_LOCAL_APIC
-@@ -1423,7 +1441,7 @@ config HIGHMEM64G
- endchoice
- 
- choice
--	prompt "Memory split" if EXPERT
-+	prompt "Memory split"
- 	default VMSPLIT_3G
- 	depends on X86_32
- 	---help---
-@@ -1443,17 +1461,17 @@ choice
- 	  option alone!
- 
- 	config VMSPLIT_3G
--		bool "3G/1G user/kernel split"
-+		bool "Default 896MB lowmem (3G/1G user/kernel split)"
- 	config VMSPLIT_3G_OPT
- 		depends on !X86_PAE
--		bool "3G/1G user/kernel split (for full 1G low memory)"
-+		bool "1GB lowmem (3G/1G user/kernel split)"
- 	config VMSPLIT_2G
--		bool "2G/2G user/kernel split"
-+		bool "2GB lowmem (2G/2G user/kernel split)"
- 	config VMSPLIT_2G_OPT
- 		depends on !X86_PAE
--		bool "2G/2G user/kernel split (for full 2G low memory)"
-+		bool "2GB lowmem (2G/2G user/kernel split)"
- 	config VMSPLIT_1G
--		bool "1G/3G user/kernel split"
-+		bool "3GB lowmem (1G/3G user/kernel split)"
- endchoice
- 
- config PAGE_OFFSET
-diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
-index 550904591e94..b5e80947326e 100644
---- a/arch/x86/configs/i386_defconfig
-+++ b/arch/x86/configs/i386_defconfig
-@@ -29,7 +29,7 @@ CONFIG_SMP=y
- CONFIG_X86_GENERIC=y
- CONFIG_HPET_TIMER=y
- CONFIG_SCHED_SMT=y
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
- CONFIG_X86_MCE=y
- CONFIG_X86_REBOOTFIXUPS=y
-diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
-index 614961009075..05802ec44d19 100644
---- a/arch/x86/configs/x86_64_defconfig
-+++ b/arch/x86/configs/x86_64_defconfig
-@@ -27,7 +27,7 @@ CONFIG_MODULE_FORCE_UNLOAD=y
- CONFIG_SMP=y
- CONFIG_NR_CPUS=64
- CONFIG_SCHED_SMT=y
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
- CONFIG_X86_MCE=y
- CONFIG_MICROCODE=y
-diff --git a/drivers/block/swim.c b/drivers/block/swim.c
-index 4c297f69171d..5bc4f1be2617 100644
---- a/drivers/block/swim.c
-+++ b/drivers/block/swim.c
-@@ -328,7 +328,7 @@ static inline void swim_motor(struct swim __iomem *base,
- 			if (swim_readbit(base, MOTOR_ON))
- 				break;
- 			current->state = TASK_INTERRUPTIBLE;
--			schedule_timeout(1);
-+			schedule_min_hrtimeout();
- 		}
- 	} else if (action == OFF) {
- 		swim_action(base, MOTOR_OFF);
-@@ -347,7 +347,7 @@ static inline void swim_eject(struct swim __iomem *base)
- 		if (!swim_readbit(base, DISK_IN))
- 			break;
- 		current->state = TASK_INTERRUPTIBLE;
--		schedule_timeout(1);
-+		schedule_min_hrtimeout();
- 	}
- 	swim_select(base, RELAX);
- }
-@@ -371,7 +371,7 @@ static inline int swim_step(struct swim __iomem *base)
- 	for (wait = 0; wait < HZ; wait++) {
- 
- 		current->state = TASK_INTERRUPTIBLE;
--		schedule_timeout(1);
-+		schedule_min_hrtimeout();
- 
- 		swim_select(base, RELAX);
- 		if (!swim_readbit(base, STEP))
-diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
-index c48d8f086382..8a6e399936c7 100644
---- a/drivers/char/ipmi/ipmi_msghandler.c
-+++ b/drivers/char/ipmi/ipmi_msghandler.c
-@@ -3543,7 +3543,7 @@ static void cleanup_smi_msgs(struct ipmi_smi *intf)
- 	/* Current message first, to preserve order */
- 	while (intf->curr_msg && !list_empty(&intf->waiting_rcv_msgs)) {
- 		/* Wait for the message to clear out. */
--		schedule_timeout(1);
-+		schedule_min_hrtimeout();
- 	}
- 
- 	/* No need for locks, the interface is down. */
-diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c
-index 2704470e021d..49504b7f3aa9 100644
---- a/drivers/char/ipmi/ipmi_ssif.c
-+++ b/drivers/char/ipmi/ipmi_ssif.c
-@@ -1295,7 +1295,7 @@ static void shutdown_ssif(void *send_info)
- 
- 	/* make sure the driver is not looking for flags any more. */
- 	while (ssif_info->ssif_state != SSIF_NORMAL)
--		schedule_timeout(1);
-+		schedule_min_hrtimeout();
- 
- 	ssif_info->stopping = true;
- 	del_timer_sync(&ssif_info->watch_timer);
-diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
-index 6941689085ed..ec5a24e95401 100644
---- a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
-+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
-@@ -235,7 +235,7 @@ static int vmw_fifo_wait_noirq(struct vmw_private *dev_priv,
- 			DRM_ERROR("SVGA device lockup.\n");
- 			break;
- 		}
--		schedule_timeout(1);
-+		schedule_min_hrtimeout();
- 		if (interruptible && signal_pending(current)) {
- 			ret = -ERESTARTSYS;
- 			break;
-diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
-index 75f3efee21a4..09b1932ce85b 100644
---- a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
-+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
-@@ -203,7 +203,7 @@ int vmw_fallback_wait(struct vmw_private *dev_priv,
- 			break;
- 		}
- 		if (lazy)
--			schedule_timeout(1);
-+			schedule_min_hrtimeout();
- 		else if ((++count & 0x0F) == 0) {
- 			/**
- 			 * FIXME: Use schedule_hr_timeout here for
-diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c
-index 267eac00a3fb..352af68c6cd7 100644
---- a/drivers/hwmon/fam15h_power.c
-+++ b/drivers/hwmon/fam15h_power.c
-@@ -225,7 +225,7 @@ static ssize_t power1_average_show(struct device *dev,
- 		prev_ptsc[cu] = data->cpu_sw_pwr_ptsc[cu];
- 	}
- 
--	leftover = schedule_timeout_interruptible(msecs_to_jiffies(data->power_period));
-+	leftover = schedule_msec_hrtimeout_interruptible((data->power_period));
- 	if (leftover)
- 		return 0;
- 
-diff --git a/drivers/iio/light/tsl2563.c b/drivers/iio/light/tsl2563.c
-index d8c40a83097d..8332baf4961c 100644
---- a/drivers/iio/light/tsl2563.c
-+++ b/drivers/iio/light/tsl2563.c
-@@ -269,11 +269,7 @@ static void tsl2563_wait_adc(struct tsl2563_chip *chip)
- 	default:
- 		delay = 402;
- 	}
--	/*
--	 * TODO: Make sure that we wait at least required delay but why we
--	 * have to extend it one tick more?
--	 */
--	schedule_timeout_interruptible(msecs_to_jiffies(delay) + 2);
-+	schedule_msec_hrtimeout_interruptible(delay + 1);
- }
- 
- static int tsl2563_adjust_gainlevel(struct tsl2563_chip *chip, u16 adc)
-diff --git a/drivers/media/i2c/msp3400-driver.c b/drivers/media/i2c/msp3400-driver.c
-index 39530d43590e..a7caf2eb5771 100644
---- a/drivers/media/i2c/msp3400-driver.c
-+++ b/drivers/media/i2c/msp3400-driver.c
-@@ -170,7 +170,7 @@ static int msp_read(struct i2c_client *client, int dev, int addr)
- 			break;
- 		dev_warn(&client->dev, "I/O error #%d (read 0x%02x/0x%02x)\n", err,
- 		       dev, addr);
--		schedule_timeout_interruptible(msecs_to_jiffies(10));
-+		schedule_msec_hrtimeout_interruptible((10));
- 	}
- 	if (err == 3) {
- 		dev_warn(&client->dev, "resetting chip, sound will go off.\n");
-@@ -211,7 +211,7 @@ static int msp_write(struct i2c_client *client, int dev, int addr, int val)
- 			break;
- 		dev_warn(&client->dev, "I/O error #%d (write 0x%02x/0x%02x)\n", err,
- 		       dev, addr);
--		schedule_timeout_interruptible(msecs_to_jiffies(10));
-+		schedule_msec_hrtimeout_interruptible((10));
- 	}
- 	if (err == 3) {
- 		dev_warn(&client->dev, "resetting chip, sound will go off.\n");
-diff --git a/drivers/media/pci/cx18/cx18-gpio.c b/drivers/media/pci/cx18/cx18-gpio.c
-index cf7cfda94107..f63e17489547 100644
---- a/drivers/media/pci/cx18/cx18-gpio.c
-+++ b/drivers/media/pci/cx18/cx18-gpio.c
-@@ -81,11 +81,11 @@ static void gpio_reset_seq(struct cx18 *cx, u32 active_lo, u32 active_hi,
- 
- 	/* Assert */
- 	gpio_update(cx, mask, ~active_lo);
--	schedule_timeout_uninterruptible(msecs_to_jiffies(assert_msecs));
-+	schedule_msec_hrtimeout_uninterruptible((assert_msecs));
- 
- 	/* Deassert */
- 	gpio_update(cx, mask, ~active_hi);
--	schedule_timeout_uninterruptible(msecs_to_jiffies(recovery_msecs));
-+	schedule_msec_hrtimeout_uninterruptible((recovery_msecs));
- }
- 
- /*
-diff --git a/drivers/media/pci/ivtv/ivtv-gpio.c b/drivers/media/pci/ivtv/ivtv-gpio.c
-index 856e7ab7f33e..766a26251337 100644
---- a/drivers/media/pci/ivtv/ivtv-gpio.c
-+++ b/drivers/media/pci/ivtv/ivtv-gpio.c
-@@ -105,7 +105,7 @@ void ivtv_reset_ir_gpio(struct ivtv *itv)
- 	curout = (curout & ~0xF) | 1;
- 	write_reg(curout, IVTV_REG_GPIO_OUT);
- 	/* We could use something else for smaller time */
--	schedule_timeout_interruptible(msecs_to_jiffies(1));
-+	schedule_msec_hrtimeout_interruptible((1));
- 	curout |= 2;
- 	write_reg(curout, IVTV_REG_GPIO_OUT);
- 	curdir &= ~0x80;
-@@ -125,11 +125,11 @@ int ivtv_reset_tuner_gpio(void *dev, int component, int cmd, int value)
- 	curout = read_reg(IVTV_REG_GPIO_OUT);
- 	curout &= ~(1 << itv->card->xceive_pin);
- 	write_reg(curout, IVTV_REG_GPIO_OUT);
--	schedule_timeout_interruptible(msecs_to_jiffies(1));
-+	schedule_msec_hrtimeout_interruptible((1));
- 
- 	curout |= 1 << itv->card->xceive_pin;
- 	write_reg(curout, IVTV_REG_GPIO_OUT);
--	schedule_timeout_interruptible(msecs_to_jiffies(1));
-+	schedule_msec_hrtimeout_interruptible((1));
- 	return 0;
- }
- 
-diff --git a/drivers/media/pci/ivtv/ivtv-ioctl.c b/drivers/media/pci/ivtv/ivtv-ioctl.c
-index 137853944e46..76830892f373 100644
---- a/drivers/media/pci/ivtv/ivtv-ioctl.c
-+++ b/drivers/media/pci/ivtv/ivtv-ioctl.c
-@@ -1137,7 +1137,7 @@ void ivtv_s_std_dec(struct ivtv *itv, v4l2_std_id std)
- 				TASK_UNINTERRUPTIBLE);
- 		if ((read_reg(IVTV_REG_DEC_LINE_FIELD) >> 16) < 100)
- 			break;
--		schedule_timeout(msecs_to_jiffies(25));
-+		schedule_msec_hrtimeout((25));
- 	}
- 	finish_wait(&itv->vsync_waitq, &wait);
- 	mutex_lock(&itv->serialize_lock);
-diff --git a/drivers/media/pci/ivtv/ivtv-streams.c b/drivers/media/pci/ivtv/ivtv-streams.c
-index f04ee84bab5f..c4469b4b8f99 100644
---- a/drivers/media/pci/ivtv/ivtv-streams.c
-+++ b/drivers/media/pci/ivtv/ivtv-streams.c
-@@ -849,7 +849,7 @@ int ivtv_stop_v4l2_encode_stream(struct ivtv_stream *s, int gop_end)
- 			while (!test_bit(IVTV_F_I_EOS, &itv->i_flags) &&
- 				time_before(jiffies,
- 					    then + msecs_to_jiffies(2000))) {
--				schedule_timeout(msecs_to_jiffies(10));
-+				schedule_msec_hrtimeout((10));
- 			}
- 
- 			/* To convert jiffies to ms, we must multiply by 1000
-diff --git a/drivers/media/radio/radio-mr800.c b/drivers/media/radio/radio-mr800.c
-index cb0437b4c331..163fffc0e1d4 100644
---- a/drivers/media/radio/radio-mr800.c
-+++ b/drivers/media/radio/radio-mr800.c
-@@ -366,7 +366,7 @@ static int vidioc_s_hw_freq_seek(struct file *file, void *priv,
- 			retval = -ENODATA;
- 			break;
- 		}
--		if (schedule_timeout_interruptible(msecs_to_jiffies(10))) {
-+		if (schedule_msec_hrtimeout_interruptible((10))) {
- 			retval = -ERESTARTSYS;
- 			break;
- 		}
-diff --git a/drivers/media/radio/radio-tea5777.c b/drivers/media/radio/radio-tea5777.c
-index fb9de7bbcd19..e53cf45e7f3f 100644
---- a/drivers/media/radio/radio-tea5777.c
-+++ b/drivers/media/radio/radio-tea5777.c
-@@ -235,7 +235,7 @@ static int radio_tea5777_update_read_reg(struct radio_tea5777 *tea, int wait)
- 	}
- 
- 	if (wait) {
--		if (schedule_timeout_interruptible(msecs_to_jiffies(wait)))
-+		if (schedule_msec_hrtimeout_interruptible((wait)))
- 			return -ERESTARTSYS;
- 	}
- 
-diff --git a/drivers/media/radio/tea575x.c b/drivers/media/radio/tea575x.c
-index b0303cf00387..0925b5065147 100644
---- a/drivers/media/radio/tea575x.c
-+++ b/drivers/media/radio/tea575x.c
-@@ -401,7 +401,7 @@ int snd_tea575x_s_hw_freq_seek(struct file *file, struct snd_tea575x *tea,
- 	for (;;) {
- 		if (time_after(jiffies, timeout))
- 			break;
--		if (schedule_timeout_interruptible(msecs_to_jiffies(10))) {
-+		if (schedule_msec_hrtimeout_interruptible((10))) {
- 			/* some signal arrived, stop search */
- 			tea->val &= ~TEA575X_BIT_SEARCH;
- 			snd_tea575x_set_freq(tea);
-diff --git a/drivers/mfd/ucb1x00-core.c b/drivers/mfd/ucb1x00-core.c
-index b690796d24d4..448b13da62b4 100644
---- a/drivers/mfd/ucb1x00-core.c
-+++ b/drivers/mfd/ucb1x00-core.c
-@@ -250,7 +250,7 @@ unsigned int ucb1x00_adc_read(struct ucb1x00 *ucb, int adc_channel, int sync)
- 			break;
- 		/* yield to other processes */
- 		set_current_state(TASK_INTERRUPTIBLE);
--		schedule_timeout(1);
-+		schedule_min_hrtimeout();
- 	}
- 
- 	return UCB_ADC_DAT(val);
-diff --git a/drivers/misc/sgi-xp/xpc_channel.c b/drivers/misc/sgi-xp/xpc_channel.c
-index 8e6607fc8a67..b9ab770bbdb5 100644
---- a/drivers/misc/sgi-xp/xpc_channel.c
-+++ b/drivers/misc/sgi-xp/xpc_channel.c
-@@ -834,7 +834,7 @@ xpc_allocate_msg_wait(struct xpc_channel *ch)
- 
- 	atomic_inc(&ch->n_on_msg_allocate_wq);
- 	prepare_to_wait(&ch->msg_allocate_wq, &wait, TASK_INTERRUPTIBLE);
--	ret = schedule_timeout(1);
-+	ret = schedule_min_hrtimeout();
- 	finish_wait(&ch->msg_allocate_wq, &wait);
- 	atomic_dec(&ch->n_on_msg_allocate_wq);
- 
-diff --git a/drivers/net/caif/caif_hsi.c b/drivers/net/caif/caif_hsi.c
-index bbb2575d4728..637757144221 100644
---- a/drivers/net/caif/caif_hsi.c
-+++ b/drivers/net/caif/caif_hsi.c
-@@ -939,7 +939,7 @@ static void cfhsi_wake_down(struct work_struct *work)
- 			break;
- 
- 		set_current_state(TASK_INTERRUPTIBLE);
--		schedule_timeout(1);
-+		schedule_min_hrtimeout();
- 		retry--;
- 	}
- 
-diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c
-index d2539c95adb6..0c2f31a03ce9 100644
---- a/drivers/net/can/usb/peak_usb/pcan_usb.c
-+++ b/drivers/net/can/usb/peak_usb/pcan_usb.c
-@@ -242,7 +242,7 @@ static int pcan_usb_write_mode(struct peak_usb_device *dev, u8 onoff)
- 	} else {
- 		/* the PCAN-USB needs time to init */
- 		set_current_state(TASK_INTERRUPTIBLE);
--		schedule_timeout(msecs_to_jiffies(PCAN_USB_STARTUP_TIMEOUT));
-+		schedule_msec_hrtimeout((PCAN_USB_STARTUP_TIMEOUT));
- 	}
- 
- 	return err;
-diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
-index eccbf4cd7149..03d285f022b0 100644
---- a/drivers/net/usb/lan78xx.c
-+++ b/drivers/net/usb/lan78xx.c
-@@ -2670,7 +2670,7 @@ static void lan78xx_terminate_urbs(struct lan78xx_net *dev)
- 	while (!skb_queue_empty(&dev->rxq) &&
- 	       !skb_queue_empty(&dev->txq) &&
- 	       !skb_queue_empty(&dev->done)) {
--		schedule_timeout(msecs_to_jiffies(UNLINK_TIMEOUT_MS));
-+		schedule_msec_hrtimeout((UNLINK_TIMEOUT_MS));
- 		set_current_state(TASK_UNINTERRUPTIBLE);
- 		netif_dbg(dev, ifdown, dev->net,
- 			  "waited for %d urb completions\n", temp);
-diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
-index 5ec97def3513..9e2bf55bbccd 100644
---- a/drivers/net/usb/usbnet.c
-+++ b/drivers/net/usb/usbnet.c
-@@ -767,7 +767,7 @@ static void wait_skb_queue_empty(struct sk_buff_head *q)
- 	spin_lock_irqsave(&q->lock, flags);
- 	while (!skb_queue_empty(q)) {
- 		spin_unlock_irqrestore(&q->lock, flags);
--		schedule_timeout(msecs_to_jiffies(UNLINK_TIMEOUT_MS));
-+		schedule_msec_hrtimeout((UNLINK_TIMEOUT_MS));
- 		set_current_state(TASK_UNINTERRUPTIBLE);
- 		spin_lock_irqsave(&q->lock, flags);
- 	}
-diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2100.c b/drivers/net/wireless/intel/ipw2x00/ipw2100.c
-index 97ea6e2035e6..1c693729bbd3 100644
---- a/drivers/net/wireless/intel/ipw2x00/ipw2100.c
-+++ b/drivers/net/wireless/intel/ipw2x00/ipw2100.c
-@@ -816,7 +816,7 @@ static int ipw2100_hw_send_command(struct ipw2100_priv *priv,
- 	 * doesn't seem to have as many firmware restart cycles...
- 	 *
- 	 * As a test, we're sticking in a 1/100s delay here */
--	schedule_timeout_uninterruptible(msecs_to_jiffies(10));
-+	schedule_msec_hrtimeout_uninterruptible((10));
- 
- 	return 0;
- 
-@@ -1267,7 +1267,7 @@ static int ipw2100_start_adapter(struct ipw2100_priv *priv)
- 	IPW_DEBUG_FW("Waiting for f/w initialization to complete...\n");
- 	i = 5000;
- 	do {
--		schedule_timeout_uninterruptible(msecs_to_jiffies(40));
-+		schedule_msec_hrtimeout_uninterruptible((40));
- 		/* Todo... wait for sync command ... */
- 
- 		read_register(priv->net_dev, IPW_REG_INTA, &inta);
-diff --git a/drivers/parport/ieee1284.c b/drivers/parport/ieee1284.c
-index 90fb73575495..c94048b048a5 100644
---- a/drivers/parport/ieee1284.c
-+++ b/drivers/parport/ieee1284.c
-@@ -208,7 +208,7 @@ int parport_wait_peripheral(struct parport *port,
- 			/* parport_wait_event didn't time out, but the
- 			 * peripheral wasn't actually ready either.
- 			 * Wait for another 10ms. */
--			schedule_timeout_interruptible(msecs_to_jiffies(10));
-+			schedule_msec_hrtimeout_interruptible((10));
- 		}
- 	}
- 
-diff --git a/drivers/parport/ieee1284_ops.c b/drivers/parport/ieee1284_ops.c
-index 5d41dda6da4e..34705f6b423f 100644
---- a/drivers/parport/ieee1284_ops.c
-+++ b/drivers/parport/ieee1284_ops.c
-@@ -537,7 +537,7 @@ size_t parport_ieee1284_ecp_read_data (struct parport *port,
- 			/* Yield the port for a while. */
- 			if (count && dev->port->irq != PARPORT_IRQ_NONE) {
- 				parport_release (dev);
--				schedule_timeout_interruptible(msecs_to_jiffies(40));
-+				schedule_msec_hrtimeout_interruptible((40));
- 				parport_claim_or_block (dev);
- 			}
- 			else
-diff --git a/drivers/platform/x86/intel_ips.c b/drivers/platform/x86/intel_ips.c
-index bffe548187ee..c2918ee3e100 100644
---- a/drivers/platform/x86/intel_ips.c
-+++ b/drivers/platform/x86/intel_ips.c
-@@ -798,7 +798,7 @@ static int ips_adjust(void *data)
- 			ips_gpu_lower(ips);
- 
- sleep:
--		schedule_timeout_interruptible(msecs_to_jiffies(IPS_ADJUST_PERIOD));
-+		schedule_msec_hrtimeout_interruptible((IPS_ADJUST_PERIOD));
- 	} while (!kthread_should_stop());
- 
- 	dev_dbg(ips->dev, "ips-adjust thread stopped\n");
-@@ -974,7 +974,7 @@ static int ips_monitor(void *data)
- 	seqno_timestamp = get_jiffies_64();
- 
- 	old_cpu_power = thm_readl(THM_CEC);
--	schedule_timeout_interruptible(msecs_to_jiffies(IPS_SAMPLE_PERIOD));
-+	schedule_msec_hrtimeout_interruptible((IPS_SAMPLE_PERIOD));
- 
- 	/* Collect an initial average */
- 	for (i = 0; i < IPS_SAMPLE_COUNT; i++) {
-@@ -1001,7 +1001,7 @@ static int ips_monitor(void *data)
- 			mchp_samples[i] = mchp;
- 		}
- 
--		schedule_timeout_interruptible(msecs_to_jiffies(IPS_SAMPLE_PERIOD));
-+		schedule_msec_hrtimeout_interruptible((IPS_SAMPLE_PERIOD));
- 		if (kthread_should_stop())
- 			break;
- 	}
-@@ -1028,7 +1028,7 @@ static int ips_monitor(void *data)
- 	 * us to reduce the sample frequency if the CPU and GPU are idle.
- 	 */
- 	old_cpu_power = thm_readl(THM_CEC);
--	schedule_timeout_interruptible(msecs_to_jiffies(IPS_SAMPLE_PERIOD));
-+	schedule_msec_hrtimeout_interruptible((IPS_SAMPLE_PERIOD));
- 	last_sample_period = IPS_SAMPLE_PERIOD;
- 
- 	timer_setup(&ips->timer, monitor_timeout, TIMER_DEFERRABLE);
-diff --git a/drivers/rtc/rtc-wm8350.c b/drivers/rtc/rtc-wm8350.c
-index 2018614f258f..fc19b312c345 100644
---- a/drivers/rtc/rtc-wm8350.c
-+++ b/drivers/rtc/rtc-wm8350.c
-@@ -114,7 +114,7 @@ static int wm8350_rtc_settime(struct device *dev, struct rtc_time *tm)
- 	/* Wait until confirmation of stopping */
- 	do {
- 		rtc_ctrl = wm8350_reg_read(wm8350, WM8350_RTC_TIME_CONTROL);
--		schedule_timeout_uninterruptible(msecs_to_jiffies(1));
-+		schedule_msec_hrtimeout_uninterruptible((1));
- 	} while (--retries && !(rtc_ctrl & WM8350_RTC_STS));
- 
- 	if (!retries) {
-@@ -197,7 +197,7 @@ static int wm8350_rtc_stop_alarm(struct wm8350 *wm8350)
- 	/* Wait until confirmation of stopping */
- 	do {
- 		rtc_ctrl = wm8350_reg_read(wm8350, WM8350_RTC_TIME_CONTROL);
--		schedule_timeout_uninterruptible(msecs_to_jiffies(1));
-+		schedule_msec_hrtimeout_uninterruptible((1));
- 	} while (retries-- && !(rtc_ctrl & WM8350_RTC_ALMSTS));
- 
- 	if (!(rtc_ctrl & WM8350_RTC_ALMSTS))
-@@ -220,7 +220,7 @@ static int wm8350_rtc_start_alarm(struct wm8350 *wm8350)
- 	/* Wait until confirmation */
- 	do {
- 		rtc_ctrl = wm8350_reg_read(wm8350, WM8350_RTC_TIME_CONTROL);
--		schedule_timeout_uninterruptible(msecs_to_jiffies(1));
-+		schedule_msec_hrtimeout_uninterruptible((1));
- 	} while (retries-- && rtc_ctrl & WM8350_RTC_ALMSTS);
- 
- 	if (rtc_ctrl & WM8350_RTC_ALMSTS)
-diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c
-index b60795893994..d2d05691dbd2 100644
---- a/drivers/scsi/fnic/fnic_scsi.c
-+++ b/drivers/scsi/fnic/fnic_scsi.c
-@@ -216,7 +216,7 @@ int fnic_fw_reset_handler(struct fnic *fnic)
- 
- 	/* wait for io cmpl */
- 	while (atomic_read(&fnic->in_flight))
--		schedule_timeout(msecs_to_jiffies(1));
-+		schedule_msec_hrtimeout((1));
- 
- 	spin_lock_irqsave(&fnic->wq_copy_lock[0], flags);
- 
-@@ -2277,7 +2277,7 @@ static int fnic_clean_pending_aborts(struct fnic *fnic,
- 		}
- 	}
- 
--	schedule_timeout(msecs_to_jiffies(2 * fnic->config.ed_tov));
-+	schedule_msec_hrtimeout((2 * fnic->config.ed_tov));
- 
- 	/* walk again to check, if IOs are still pending in fw */
- 	if (fnic_is_abts_pending(fnic, lr_sc))
-diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
-index ad62fb3f3a54..a84d4c99d7d7 100644
---- a/drivers/scsi/lpfc/lpfc_scsi.c
-+++ b/drivers/scsi/lpfc/lpfc_scsi.c
-@@ -5191,7 +5191,7 @@ lpfc_reset_flush_io_context(struct lpfc_vport *vport, uint16_t tgt_id,
- 					tgt_id, lun_id, context);
- 	later = msecs_to_jiffies(2 * vport->cfg_devloss_tmo * 1000) + jiffies;
- 	while (time_after(later, jiffies) && cnt) {
--		schedule_timeout_uninterruptible(msecs_to_jiffies(20));
-+		schedule_msec_hrtimeout_uninterruptible((20));
- 		cnt = lpfc_sli_sum_iocb(vport, tgt_id, lun_id, context);
- 	}
- 	if (cnt) {
-diff --git a/drivers/scsi/snic/snic_scsi.c b/drivers/scsi/snic/snic_scsi.c
-index b3650c989ed4..7ed1fb285754 100644
---- a/drivers/scsi/snic/snic_scsi.c
-+++ b/drivers/scsi/snic/snic_scsi.c
-@@ -2353,7 +2353,7 @@ snic_reset(struct Scsi_Host *shost, struct scsi_cmnd *sc)
- 
- 	/* Wait for all the IOs that are entered in Qcmd */
- 	while (atomic_read(&snic->ios_inflight))
--		schedule_timeout(msecs_to_jiffies(1));
-+		schedule_msec_hrtimeout((1));
- 
- 	ret = snic_issue_hba_reset(snic, sc);
- 	if (ret) {
-diff --git a/drivers/staging/comedi/drivers/ni_mio_common.c b/drivers/staging/comedi/drivers/ni_mio_common.c
-index d99f4065b96d..15f870d4e95f 100644
---- a/drivers/staging/comedi/drivers/ni_mio_common.c
-+++ b/drivers/staging/comedi/drivers/ni_mio_common.c
-@@ -4748,7 +4748,7 @@ static int cs5529_wait_for_idle(struct comedi_device *dev)
- 		if ((status & NI67XX_CAL_STATUS_BUSY) == 0)
- 			break;
- 		set_current_state(TASK_INTERRUPTIBLE);
--		if (schedule_timeout(1))
-+		if (schedule_min_hrtimeout())
- 			return -EIO;
- 	}
- 	if (i == timeout) {
-diff --git a/drivers/staging/rts5208/rtsx.c b/drivers/staging/rts5208/rtsx.c
-index be0053c795b7..cc2e18c733e1 100644
---- a/drivers/staging/rts5208/rtsx.c
-+++ b/drivers/staging/rts5208/rtsx.c
-@@ -490,7 +490,7 @@ static int rtsx_polling_thread(void *__dev)
- 
- 	for (;;) {
- 		set_current_state(TASK_INTERRUPTIBLE);
--		schedule_timeout(msecs_to_jiffies(POLLING_INTERVAL));
-+		schedule_msec_hrtimeout((POLLING_INTERVAL));
- 
- 		/* lock the device pointers */
- 		mutex_lock(&dev->dev_mutex);
-diff --git a/drivers/staging/speakup/speakup_acntpc.c b/drivers/staging/speakup/speakup_acntpc.c
-index c94328a5bd4a..6e7d4671aa69 100644
---- a/drivers/staging/speakup/speakup_acntpc.c
-+++ b/drivers/staging/speakup/speakup_acntpc.c
-@@ -198,7 +198,7 @@ static void do_catch_up(struct spk_synth *synth)
- 		full_time_val = full_time->u.n.value;
- 		spin_unlock_irqrestore(&speakup_info.spinlock, flags);
- 		if (synth_full()) {
--			schedule_timeout(msecs_to_jiffies(full_time_val));
-+			schedule_msec_hrtimeout((full_time_val));
- 			continue;
- 		}
- 		set_current_state(TASK_RUNNING);
-@@ -226,7 +226,7 @@ static void do_catch_up(struct spk_synth *synth)
- 			jiffy_delta_val = jiffy_delta->u.n.value;
- 			delay_time_val = delay_time->u.n.value;
- 			spin_unlock_irqrestore(&speakup_info.spinlock, flags);
--			schedule_timeout(msecs_to_jiffies(delay_time_val));
-+			schedule_msec_hrtimeout(delay_time_val);
- 			jiff_max = jiffies + jiffy_delta_val;
- 		}
- 	}
-diff --git a/drivers/staging/speakup/speakup_apollo.c b/drivers/staging/speakup/speakup_apollo.c
-index 0877b4044c28..627102d048c1 100644
---- a/drivers/staging/speakup/speakup_apollo.c
-+++ b/drivers/staging/speakup/speakup_apollo.c
-@@ -165,7 +165,7 @@ static void do_catch_up(struct spk_synth *synth)
- 		if (!synth->io_ops->synth_out(synth, ch)) {
- 			synth->io_ops->tiocmset(0, UART_MCR_RTS);
- 			synth->io_ops->tiocmset(UART_MCR_RTS, 0);
--			schedule_timeout(msecs_to_jiffies(full_time_val));
-+			schedule_msec_hrtimeout(full_time_val);
- 			continue;
- 		}
- 		if (time_after_eq(jiffies, jiff_max) && (ch == SPACE)) {
-diff --git a/drivers/staging/speakup/speakup_decext.c b/drivers/staging/speakup/speakup_decext.c
-index ddbb7e97d118..f9502addc765 100644
---- a/drivers/staging/speakup/speakup_decext.c
-+++ b/drivers/staging/speakup/speakup_decext.c
-@@ -176,7 +176,7 @@ static void do_catch_up(struct spk_synth *synth)
- 		if (ch == '\n')
- 			ch = 0x0D;
- 		if (synth_full() || !synth->io_ops->synth_out(synth, ch)) {
--			schedule_timeout(msecs_to_jiffies(delay_time_val));
-+			schedule_msec_hrtimeout(delay_time_val);
- 			continue;
- 		}
- 		set_current_state(TASK_RUNNING);
-diff --git a/drivers/staging/speakup/speakup_decpc.c b/drivers/staging/speakup/speakup_decpc.c
-index 798c42dfa16c..d85b41db67a3 100644
---- a/drivers/staging/speakup/speakup_decpc.c
-+++ b/drivers/staging/speakup/speakup_decpc.c
-@@ -394,7 +394,7 @@ static void do_catch_up(struct spk_synth *synth)
- 		if (ch == '\n')
- 			ch = 0x0D;
- 		if (dt_sendchar(ch)) {
--			schedule_timeout(msecs_to_jiffies(delay_time_val));
-+			schedule_msec_hrtimeout((delay_time_val));
- 			continue;
- 		}
- 		set_current_state(TASK_RUNNING);
-diff --git a/drivers/staging/speakup/speakup_dectlk.c b/drivers/staging/speakup/speakup_dectlk.c
-index dccb4ea29d37..8ecead307d04 100644
---- a/drivers/staging/speakup/speakup_dectlk.c
-+++ b/drivers/staging/speakup/speakup_dectlk.c
-@@ -244,7 +244,7 @@ static void do_catch_up(struct spk_synth *synth)
- 		if (ch == '\n')
- 			ch = 0x0D;
- 		if (synth_full_val || !synth->io_ops->synth_out(synth, ch)) {
--			schedule_timeout(msecs_to_jiffies(delay_time_val));
-+			schedule_msec_hrtimeout(delay_time_val);
- 			continue;
- 		}
- 		set_current_state(TASK_RUNNING);
-diff --git a/drivers/staging/speakup/speakup_dtlk.c b/drivers/staging/speakup/speakup_dtlk.c
-index dbebed0eeeec..6d83c13ca4a6 100644
---- a/drivers/staging/speakup/speakup_dtlk.c
-+++ b/drivers/staging/speakup/speakup_dtlk.c
-@@ -211,7 +211,7 @@ static void do_catch_up(struct spk_synth *synth)
- 		delay_time_val = delay_time->u.n.value;
- 		spin_unlock_irqrestore(&speakup_info.spinlock, flags);
- 		if (synth_full()) {
--			schedule_timeout(msecs_to_jiffies(delay_time_val));
-+			schedule_msec_hrtimeout((delay_time_val));
- 			continue;
- 		}
- 		set_current_state(TASK_RUNNING);
-@@ -227,7 +227,7 @@ static void do_catch_up(struct spk_synth *synth)
- 			delay_time_val = delay_time->u.n.value;
- 			jiffy_delta_val = jiffy_delta->u.n.value;
- 			spin_unlock_irqrestore(&speakup_info.spinlock, flags);
--			schedule_timeout(msecs_to_jiffies(delay_time_val));
-+			schedule_msec_hrtimeout((delay_time_val));
- 			jiff_max = jiffies + jiffy_delta_val;
- 		}
- 	}
-diff --git a/drivers/staging/speakup/speakup_keypc.c b/drivers/staging/speakup/speakup_keypc.c
-index 414827e888fc..cb31c9176daa 100644
---- a/drivers/staging/speakup/speakup_keypc.c
-+++ b/drivers/staging/speakup/speakup_keypc.c
-@@ -199,7 +199,7 @@ static void do_catch_up(struct spk_synth *synth)
- 		full_time_val = full_time->u.n.value;
- 		spin_unlock_irqrestore(&speakup_info.spinlock, flags);
- 		if (synth_full()) {
--			schedule_timeout(msecs_to_jiffies(full_time_val));
-+			schedule_msec_hrtimeout((full_time_val));
- 			continue;
- 		}
- 		set_current_state(TASK_RUNNING);
-@@ -232,7 +232,7 @@ static void do_catch_up(struct spk_synth *synth)
- 			jiffy_delta_val = jiffy_delta->u.n.value;
- 			delay_time_val = delay_time->u.n.value;
- 			spin_unlock_irqrestore(&speakup_info.spinlock, flags);
--			schedule_timeout(msecs_to_jiffies(delay_time_val));
-+			schedule_msec_hrtimeout(delay_time_val);
- 			jiff_max = jiffies + jiffy_delta_val;
- 		}
- 	}
-diff --git a/drivers/staging/speakup/synth.c b/drivers/staging/speakup/synth.c
-index 3568bfb89912..0a80b3b098b2 100644
---- a/drivers/staging/speakup/synth.c
-+++ b/drivers/staging/speakup/synth.c
-@@ -93,12 +93,8 @@ static void _spk_do_catch_up(struct spk_synth *synth, int unicode)
- 		spin_unlock_irqrestore(&speakup_info.spinlock, flags);
- 		if (ch == '\n')
- 			ch = synth->procspeech;
--		if (unicode)
--			ret = synth->io_ops->synth_out_unicode(synth, ch);
--		else
--			ret = synth->io_ops->synth_out(synth, ch);
--		if (!ret) {
--			schedule_timeout(msecs_to_jiffies(full_time_val));
-+		if (!synth->io_ops->synth_out(synth, ch)) {
-+			schedule_msec_hrtimeout(full_time_val);
- 			continue;
- 		}
- 		if (time_after_eq(jiffies, jiff_max) && (ch == SPACE)) {
-@@ -108,11 +104,9 @@ static void _spk_do_catch_up(struct spk_synth *synth, int unicode)
- 			full_time_val = full_time->u.n.value;
- 			spin_unlock_irqrestore(&speakup_info.spinlock, flags);
- 			if (synth->io_ops->synth_out(synth, synth->procspeech))
--				schedule_timeout(
--					msecs_to_jiffies(delay_time_val));
-+				schedule_msec_hrtimeout(delay_time_val);
- 			else
--				schedule_timeout(
--					msecs_to_jiffies(full_time_val));
-+				schedule_msec_hrtimeout(full_time_val);
- 			jiff_max = jiffies + jiffy_delta_val;
- 		}
- 		set_current_state(TASK_RUNNING);
-diff --git a/drivers/staging/unisys/visornic/visornic_main.c b/drivers/staging/unisys/visornic/visornic_main.c
-index 0433536930a9..d8726f28843f 100644
---- a/drivers/staging/unisys/visornic/visornic_main.c
-+++ b/drivers/staging/unisys/visornic/visornic_main.c
-@@ -549,7 +549,7 @@ static int visornic_disable_with_timeout(struct net_device *netdev,
- 		}
- 		set_current_state(TASK_INTERRUPTIBLE);
- 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
--		wait += schedule_timeout(msecs_to_jiffies(10));
-+		wait += schedule_msec_hrtimeout((10));
- 		spin_lock_irqsave(&devdata->priv_lock, flags);
- 	}
- 
-@@ -560,7 +560,7 @@ static int visornic_disable_with_timeout(struct net_device *netdev,
- 		while (1) {
- 			set_current_state(TASK_INTERRUPTIBLE);
- 			spin_unlock_irqrestore(&devdata->priv_lock, flags);
--			schedule_timeout(msecs_to_jiffies(10));
-+			schedule_msec_hrtimeout((10));
- 			spin_lock_irqsave(&devdata->priv_lock, flags);
- 			if (atomic_read(&devdata->usage))
- 				break;
-@@ -714,7 +714,7 @@ static int visornic_enable_with_timeout(struct net_device *netdev,
- 		}
- 		set_current_state(TASK_INTERRUPTIBLE);
- 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
--		wait += schedule_timeout(msecs_to_jiffies(10));
-+		wait += schedule_msec_hrtimeout((10));
- 		spin_lock_irqsave(&devdata->priv_lock, flags);
- 	}
- 
-diff --git a/drivers/video/fbdev/omap/hwa742.c b/drivers/video/fbdev/omap/hwa742.c
-index cfe63932f825..71c00ef772a3 100644
---- a/drivers/video/fbdev/omap/hwa742.c
-+++ b/drivers/video/fbdev/omap/hwa742.c
-@@ -913,7 +913,7 @@ static void hwa742_resume(void)
- 		if (hwa742_read_reg(HWA742_PLL_DIV_REG) & (1 << 7))
- 			break;
- 		set_current_state(TASK_UNINTERRUPTIBLE);
--		schedule_timeout(msecs_to_jiffies(5));
-+		schedule_msec_hrtimeout((5));
- 	}
- 	hwa742_set_update_mode(hwa742.update_mode_before_suspend);
- }
-diff --git a/drivers/video/fbdev/pxafb.c b/drivers/video/fbdev/pxafb.c
-index 00b96a78676e..37fc1c2d4cb9 100644
---- a/drivers/video/fbdev/pxafb.c
-+++ b/drivers/video/fbdev/pxafb.c
-@@ -1287,7 +1287,7 @@ static int pxafb_smart_thread(void *arg)
- 		mutex_unlock(&fbi->ctrlr_lock);
- 
- 		set_current_state(TASK_INTERRUPTIBLE);
--		schedule_timeout(msecs_to_jiffies(30));
-+		schedule_msec_hrtimeout((30));
- 	}
- 
- 	pr_debug("%s(): task ending\n", __func__);
-diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
-index 6009e0e939b5..43868e6a85dc 100644
---- a/fs/btrfs/inode-map.c
-+++ b/fs/btrfs/inode-map.c
-@@ -91,7 +91,7 @@ static int caching_kthread(void *data)
- 				btrfs_release_path(path);
- 				root->ino_cache_progress = last;
- 				up_read(&fs_info->commit_root_sem);
--				schedule_timeout(1);
-+				schedule_min_hrtimeout();
- 				goto again;
- 			} else
- 				continue;
-diff --git a/fs/proc/base.c b/fs/proc/base.c
-index eb2255e95f62..62b8cedbccb6 100644
---- a/fs/proc/base.c
-+++ b/fs/proc/base.c
-@@ -479,7 +479,7 @@ static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns,
- 		seq_puts(m, "0 0 0\n");
- 	else
- 		seq_printf(m, "%llu %llu %lu\n",
--		   (unsigned long long)task->se.sum_exec_runtime,
-+		   (unsigned long long)tsk_seruntime(task),
- 		   (unsigned long long)task->sched_info.run_delay,
- 		   task->sched_info.pcount);
- 
-diff --git a/include/linux/freezer.h b/include/linux/freezer.h
-index 21f5aa0b217f..ee9b46394fdf 100644
---- a/include/linux/freezer.h
-+++ b/include/linux/freezer.h
-@@ -297,6 +297,7 @@ static inline void set_freezable(void) {}
- #define wait_event_freezekillable_unsafe(wq, condition)			\
- 		wait_event_killable(wq, condition)
- 
-+#define pm_freezing (false)
- #endif /* !CONFIG_FREEZER */
- 
- #endif	/* FREEZER_H_INCLUDED */
-diff --git a/include/linux/init_task.h b/include/linux/init_task.h
-index 2c620d7ac432..73417df5daa2 100644
---- a/include/linux/init_task.h
-+++ b/include/linux/init_task.h
-@@ -36,7 +36,11 @@ extern struct cred init_cred;
- #define INIT_PREV_CPUTIME(x)
- #endif
- 
-+#ifdef CONFIG_SCHED_MUQSS
-+#define INIT_TASK_COMM "MuQSS"
-+#else
- #define INIT_TASK_COMM "swapper"
-+#endif
- 
- /* Attach to the init_task data structure for proper alignment */
- #ifdef CONFIG_ARCH_TASK_STRUCT_ON_STACK
-diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h
-index e9bfe6972aed..16ba1c7e5bde 100644
---- a/include/linux/ioprio.h
-+++ b/include/linux/ioprio.h
-@@ -53,6 +53,8 @@ enum {
-  */
- static inline int task_nice_ioprio(struct task_struct *task)
- {
-+	if (iso_task(task))
-+		return 0;
- 	return (task_nice(task) + 20) / 5;
- }
- 
-diff --git a/include/linux/sched.h b/include/linux/sched.h
-index 4418f5cb8324..71e3063c06b3 100644
---- a/include/linux/sched.h
-+++ b/include/linux/sched.h
-@@ -31,6 +31,9 @@
- #include <linux/task_io_accounting.h>
- #include <linux/posix-timers.h>
- #include <linux/rseq.h>
-+#ifdef CONFIG_SCHED_MUQSS
-+#include <linux/skip_list.h>
-+#endif
- 
- /* task_struct member predeclarations (sorted alphabetically): */
- struct audit_context;
-@@ -214,13 +217,40 @@ struct task_group;
- 
- extern void scheduler_tick(void);
- 
--#define	MAX_SCHEDULE_TIMEOUT		LONG_MAX
--
-+#define	MAX_SCHEDULE_TIMEOUT	LONG_MAX
- extern long schedule_timeout(long timeout);
- extern long schedule_timeout_interruptible(long timeout);
- extern long schedule_timeout_killable(long timeout);
- extern long schedule_timeout_uninterruptible(long timeout);
- extern long schedule_timeout_idle(long timeout);
-+
-+#ifdef CONFIG_HIGH_RES_TIMERS
-+extern long schedule_msec_hrtimeout(long timeout);
-+extern long schedule_min_hrtimeout(void);
-+extern long schedule_msec_hrtimeout_interruptible(long timeout);
-+extern long schedule_msec_hrtimeout_uninterruptible(long timeout);
-+#else
-+static inline long schedule_msec_hrtimeout(long timeout)
-+{
-+	return schedule_timeout(msecs_to_jiffies(timeout));
-+}
-+
-+static inline long schedule_min_hrtimeout(void)
-+{
-+	return schedule_timeout(1);
-+}
-+
-+static inline long schedule_msec_hrtimeout_interruptible(long timeout)
-+{
-+	return schedule_timeout_interruptible(msecs_to_jiffies(timeout));
-+}
-+
-+static inline long schedule_msec_hrtimeout_uninterruptible(long timeout)
-+{
-+	return schedule_timeout_uninterruptible(msecs_to_jiffies(timeout));
-+}
-+#endif
-+
- asmlinkage void schedule(void);
- extern void schedule_preempt_disabled(void);
- asmlinkage void preempt_schedule_irq(void);
-@@ -652,9 +682,11 @@ struct task_struct {
- 	unsigned int			flags;
- 	unsigned int			ptrace;
- 
-+#if defined(CONFIG_SMP) || defined(CONFIG_SCHED_MUQSS)
-+	int on_cpu;
-+#endif
- #ifdef CONFIG_SMP
- 	struct llist_node		wake_entry;
--	int				on_cpu;
- #ifdef CONFIG_THREAD_INFO_IN_TASK
- 	/* Current CPU: */
- 	unsigned int			cpu;
-@@ -679,10 +711,25 @@ struct task_struct {
- 	int				static_prio;
- 	int				normal_prio;
- 	unsigned int			rt_priority;
-+#ifdef CONFIG_SCHED_MUQSS
-+	int time_slice;
-+	u64 deadline;
-+	skiplist_node node; /* Skip list node */
-+	u64 last_ran;
-+	u64 sched_time; /* sched_clock time spent running */
-+#ifdef CONFIG_SMT_NICE
-+	int smt_bias; /* Policy/nice level bias across smt siblings */
-+#endif
-+#ifdef CONFIG_HOTPLUG_CPU
-+	bool zerobound; /* Bound to CPU0 for hotplug */
-+#endif
-+	unsigned long rt_timeout;
-+#else /* CONFIG_SCHED_MUQSS */
- 
- 	const struct sched_class	*sched_class;
- 	struct sched_entity		se;
- 	struct sched_rt_entity		rt;
-+#endif
- #ifdef CONFIG_CGROUP_SCHED
- 	struct task_group		*sched_task_group;
- #endif
-@@ -850,6 +897,10 @@ struct task_struct {
- #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
- 	u64				utimescaled;
- 	u64				stimescaled;
-+#endif
-+#ifdef CONFIG_SCHED_MUQSS
-+	/* Unbanked cpu time */
-+	unsigned long utime_ns, stime_ns;
- #endif
- 	u64				gtime;
- 	struct prev_cputime		prev_cputime;
-@@ -1306,6 +1357,40 @@ struct task_struct {
- 	 */
- };
- 
-+#ifdef CONFIG_SCHED_MUQSS
-+#define tsk_seruntime(t)		((t)->sched_time)
-+#define tsk_rttimeout(t)		((t)->rt_timeout)
-+
-+static inline void tsk_cpus_current(struct task_struct *p)
-+{
-+}
-+
-+void print_scheduler_version(void);
-+
-+static inline bool iso_task(struct task_struct *p)
-+{
-+	return (p->policy == SCHED_ISO);
-+}
-+#else /* CFS */
-+#define tsk_seruntime(t)	((t)->se.sum_exec_runtime)
-+#define tsk_rttimeout(t)	((t)->rt.timeout)
-+
-+static inline void tsk_cpus_current(struct task_struct *p)
-+{
-+	p->nr_cpus_allowed = current->nr_cpus_allowed;
-+}
-+
-+static inline void print_scheduler_version(void)
-+{
-+	printk(KERN_INFO "CFS CPU scheduler.\n");
-+}
-+
-+static inline bool iso_task(struct task_struct *p)
-+{
-+	return false;
-+}
-+#endif /* CONFIG_SCHED_MUQSS */
-+
- static inline struct pid *task_pid(struct task_struct *task)
- {
- 	return task->thread_pid;
-diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h
-index 1aff00b65f3c..73d6319a856a 100644
---- a/include/linux/sched/deadline.h
-+++ b/include/linux/sched/deadline.h
-@@ -28,7 +28,16 @@ static inline bool dl_time_before(u64 a, u64 b)
- #ifdef CONFIG_SMP
- 
- struct root_domain;
-+#ifdef CONFIG_SCHED_MUQSS
-+static inline void dl_clear_root_domain(struct root_domain *rd)
-+{
-+}
-+static inline void dl_add_task_root_domain(struct task_struct *p)
-+{
-+}
-+#else /* CONFIG_SCHED_MUQSS */
- extern void dl_add_task_root_domain(struct task_struct *p);
- extern void dl_clear_root_domain(struct root_domain *rd);
-+#endif /* CONFIG_SCHED_MUQSS */
- 
- #endif /* CONFIG_SMP */
-diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h
-index 6d67e9a5af6b..101fe470aa8f 100644
---- a/include/linux/sched/nohz.h
-+++ b/include/linux/sched/nohz.h
-@@ -13,7 +13,7 @@ extern int get_nohz_timer_target(void);
- static inline void nohz_balance_enter_idle(int cpu) { }
- #endif
- 
--#ifdef CONFIG_NO_HZ_COMMON
-+#if defined(CONFIG_NO_HZ_COMMON) && !defined(CONFIG_SCHED_MUQSS)
- void calc_load_nohz_start(void);
- void calc_load_nohz_remote(struct rq *rq);
- void calc_load_nohz_stop(void);
-diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h
-index 7d64feafc408..43c9d9e50c09 100644
---- a/include/linux/sched/prio.h
-+++ b/include/linux/sched/prio.h
-@@ -20,8 +20,20 @@
-  */
- 
- #define MAX_USER_RT_PRIO	100
-+
-+#ifdef CONFIG_SCHED_MUQSS
-+/* Note different MAX_RT_PRIO */
-+#define MAX_RT_PRIO		(MAX_USER_RT_PRIO + 1)
-+
-+#define ISO_PRIO		(MAX_RT_PRIO)
-+#define NORMAL_PRIO		(MAX_RT_PRIO + 1)
-+#define IDLE_PRIO		(MAX_RT_PRIO + 2)
-+#define PRIO_LIMIT		((IDLE_PRIO) + 1)
-+#else /* CONFIG_SCHED_MUQSS */
- #define MAX_RT_PRIO		MAX_USER_RT_PRIO
- 
-+#endif /* CONFIG_SCHED_MUQSS */
-+
- #define MAX_PRIO		(MAX_RT_PRIO + NICE_WIDTH)
- #define DEFAULT_PRIO		(MAX_RT_PRIO + NICE_WIDTH / 2)
- 
-diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
-index e5af028c08b4..010b2244e0b6 100644
---- a/include/linux/sched/rt.h
-+++ b/include/linux/sched/rt.h
-@@ -24,8 +24,10 @@ static inline bool task_is_realtime(struct task_struct *tsk)
- 
- 	if (policy == SCHED_FIFO || policy == SCHED_RR)
- 		return true;
-+#ifndef CONFIG_SCHED_MUQSS
- 	if (policy == SCHED_DEADLINE)
- 		return true;
-+#endif
- 	return false;
- }
- 
-diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
-index 38359071236a..e2ebedb6512c 100644
---- a/include/linux/sched/task.h
-+++ b/include/linux/sched/task.h
-@@ -106,7 +106,7 @@ extern long kernel_wait4(pid_t, int __user *, int, struct rusage *);
- extern void free_task(struct task_struct *tsk);
- 
- /* sched_exec is called by processes performing an exec */
--#ifdef CONFIG_SMP
-+#if defined(CONFIG_SMP) && !defined(CONFIG_SCHED_MUQSS)
- extern void sched_exec(void);
- #else
- #define sched_exec()   {}
-diff --git a/include/linux/skip_list.h b/include/linux/skip_list.h
-new file mode 100644
-index 000000000000..d4be84ba273b
---- /dev/null
-+++ b/include/linux/skip_list.h
-@@ -0,0 +1,33 @@
-+#ifndef _LINUX_SKIP_LISTS_H
-+#define _LINUX_SKIP_LISTS_H
-+typedef u64 keyType;
-+typedef void *valueType;
-+
-+typedef struct nodeStructure skiplist_node;
-+
-+struct nodeStructure {
-+	int level;	/* Levels in this structure */
-+	keyType key;
-+	valueType value;
-+	skiplist_node *next[8];
-+	skiplist_node *prev[8];
-+};
-+
-+typedef struct listStructure {
-+	int entries;
-+	int level;	/* Maximum level of the list
-+			(1 more than the number of levels in the list) */
-+	skiplist_node *header; /* pointer to header */
-+} skiplist;
-+
-+void skiplist_init(skiplist_node *slnode);
-+skiplist *new_skiplist(skiplist_node *slnode);
-+void free_skiplist(skiplist *l);
-+void skiplist_node_init(skiplist_node *node);
-+void skiplist_insert(skiplist *l, skiplist_node *node, keyType key, valueType value, unsigned int randseed);
-+void skiplist_delete(skiplist *l, skiplist_node *node);
-+
-+static inline bool skiplist_node_empty(skiplist_node *node) {
-+	return (!node->next[0]);
-+}
-+#endif /* _LINUX_SKIP_LISTS_H */
-diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
-index 3bac0a8ceab2..f48c5c5da651 100644
---- a/include/uapi/linux/sched.h
-+++ b/include/uapi/linux/sched.h
-@@ -115,9 +115,16 @@ struct clone_args {
- #define SCHED_FIFO		1
- #define SCHED_RR		2
- #define SCHED_BATCH		3
--/* SCHED_ISO: reserved but not implemented yet */
-+/* SCHED_ISO: Implemented on MuQSS only */
- #define SCHED_IDLE		5
-+#ifdef CONFIG_SCHED_MUQSS
-+#define SCHED_ISO		4
-+#define SCHED_IDLEPRIO		SCHED_IDLE
-+#define SCHED_MAX		(SCHED_IDLEPRIO)
-+#define SCHED_RANGE(policy)	((policy) <= SCHED_MAX)
-+#else /* CONFIG_SCHED_MUQSS */
- #define SCHED_DEADLINE		6
-+#endif /* CONFIG_SCHED_MUQSS */
- 
- /* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */
- #define SCHED_RESET_ON_FORK     0x40000000
-diff --git a/init/Kconfig b/init/Kconfig
-index 74a5ac65644f..44bba84664f3 100644
---- a/init/Kconfig
-+++ b/init/Kconfig
-@@ -61,6 +61,18 @@ config THREAD_INFO_IN_TASK
- 
- menu "General setup"
- 
-+config SCHED_MUQSS
-+	bool "MuQSS cpu scheduler"
-+	select HIGH_RES_TIMERS
-+	---help---
-+	  The Multiple Queue Skiplist Scheduler for excellent interactivity and
-+	  responsiveness on the desktop and highly scalable deterministic
-+	  low latency on any hardware.
-+
-+          Say Y here.
-+	default y
-+
-+
- config BROKEN
- 	bool
- 
-@@ -440,7 +452,7 @@ config HAVE_SCHED_AVG_IRQ
- 
- config SCHED_THERMAL_PRESSURE
- 	bool "Enable periodic averaging of thermal pressure"
--	depends on SMP
-+	depends on SMP && !SCHED_MUQSS
- 
- config BSD_PROCESS_ACCT
- 	bool "BSD Process Accounting"
-@@ -777,6 +789,7 @@ config NUMA_BALANCING
- 	depends on ARCH_SUPPORTS_NUMA_BALANCING
- 	depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY
- 	depends on SMP && NUMA && MIGRATION
-+	depends on !SCHED_MUQSS
- 	help
- 	  This option adds support for automatic NUMA aware memory/task placement.
- 	  The mechanism is quite primitive and is based on migrating memory when
-@@ -876,9 +889,13 @@ menuconfig CGROUP_SCHED
- 	help
- 	  This feature lets CPU scheduler recognize task groups and control CPU
- 	  bandwidth allocation to such task groups. It uses cgroups to group
--	  tasks.
-+	  tasks. In combination with MuQSS this is purely a STUB to create the
-+	  files associated with the CPU controller cgroup but most of the
-+	  controls do nothing. This is useful for working in environments and
-+	  with applications that will only work if this control group is
-+	  present.
- 
--if CGROUP_SCHED
-+if CGROUP_SCHED && !SCHED_MUQSS
- config FAIR_GROUP_SCHED
- 	bool "Group scheduling for SCHED_OTHER"
- 	depends on CGROUP_SCHED
-@@ -1007,6 +1024,7 @@ config CGROUP_DEVICE
- 
- config CGROUP_CPUACCT
- 	bool "Simple CPU accounting controller"
-+	depends on !SCHED_MUQSS
- 	help
- 	  Provides a simple controller for monitoring the
- 	  total CPU consumed by the tasks in a cgroup.
-@@ -1134,6 +1152,7 @@ config CHECKPOINT_RESTORE
- 
- config SCHED_AUTOGROUP
- 	bool "Automatic process group scheduling"
-+	depends on !SCHED_MUQSS
- 	select CGROUPS
- 	select CGROUP_SCHED
- 	select FAIR_GROUP_SCHED
-diff --git a/init/init_task.c b/init/init_task.c
-index bd403ed3e418..5df65b2578eb 100644
---- a/init/init_task.c
-+++ b/init/init_task.c
-@@ -67,9 +67,17 @@ struct task_struct init_task
- 	.stack		= init_stack,
- 	.usage		= REFCOUNT_INIT(2),
- 	.flags		= PF_KTHREAD,
-+#ifdef CONFIG_SCHED_MUQSS
-+	.prio		= NORMAL_PRIO,
-+	.static_prio	= MAX_PRIO - 20,
-+	.normal_prio	= NORMAL_PRIO,
-+	.deadline	= 0,
-+	.time_slice	= 1000000,
-+#else
- 	.prio		= MAX_PRIO - 20,
- 	.static_prio	= MAX_PRIO - 20,
- 	.normal_prio	= MAX_PRIO - 20,
-+#endif
- 	.policy		= SCHED_NORMAL,
- 	.cpus_ptr	= &init_task.cpus_mask,
- 	.cpus_mask	= CPU_MASK_ALL,
-@@ -79,6 +87,7 @@ struct task_struct init_task
- 	.restart_block	= {
- 		.fn = do_no_restart_syscall,
- 	},
-+#ifndef CONFIG_SCHED_MUQSS
- 	.se		= {
- 		.group_node 	= LIST_HEAD_INIT(init_task.se.group_node),
- 	},
-@@ -86,6 +95,7 @@ struct task_struct init_task
- 		.run_list	= LIST_HEAD_INIT(init_task.rt.run_list),
- 		.time_slice	= RR_TIMESLICE,
- 	},
-+#endif
- 	.tasks		= LIST_HEAD_INIT(init_task.tasks),
- #ifdef CONFIG_SMP
- 	.pushable_tasks	= PLIST_NODE_INIT(init_task.pushable_tasks, MAX_PRIO),
-diff --git a/init/main.c b/init/main.c
-index 03371976d387..63243a24de9b 100644
---- a/init/main.c
-+++ b/init/main.c
-@@ -1411,6 +1411,8 @@ static int __ref kernel_init(void *unused)
- 
- 	rcu_end_inkernel_boot();
- 
-+	print_scheduler_version();
-+
- 	if (ramdisk_execute_command) {
- 		ret = run_init_process(ramdisk_execute_command);
- 		if (!ret)
-diff --git a/kernel/Kconfig.MuQSS b/kernel/Kconfig.MuQSS
-new file mode 100644
-index 000000000000..a6a58781ef91
---- /dev/null
-+++ b/kernel/Kconfig.MuQSS
-@@ -0,0 +1,105 @@
-+choice
-+	prompt "CPU scheduler runqueue sharing"
-+	default RQ_MC if SCHED_MUQSS
-+	default RQ_NONE
-+
-+config RQ_NONE
-+	bool "No sharing"
-+	help
-+	  This is the default behaviour where the CPU scheduler has one runqueue
-+	  per CPU, whether it is a physical or logical CPU (hyperthread).
-+
-+	  This can still be enabled runtime with the boot parameter
-+	  rqshare=none
-+
-+	  If unsure, say N.
-+
-+config RQ_SMT
-+	bool "SMT (hyperthread) siblings"
-+	depends on SCHED_SMT && SCHED_MUQSS
-+
-+	help
-+	  With this option enabled, the CPU scheduler will have one runqueue
-+	  shared by SMT (hyperthread) siblings. As these logical cores share
-+	  one physical core, sharing the runqueue resource can lead to decreased
-+	  overhead, lower latency and higher throughput.
-+
-+	  This can still be enabled runtime with the boot parameter
-+	  rqshare=smt
-+
-+	  If unsure, say N.
-+
-+config RQ_MC
-+	bool "Multicore siblings"
-+	depends on SCHED_MC && SCHED_MUQSS
-+	help
-+	  With this option enabled, the CPU scheduler will have one runqueue
-+	  shared by multicore siblings in addition to any SMT siblings.
-+	  As these physical cores share caches, sharing the runqueue resource
-+	  will lead to lower latency, but its effects on overhead and throughput
-+	  are less predictable. As a general rule, 6 or fewer cores will likely
-+	  benefit from this, while larger CPUs will only derive a latency
-+	  benefit. If your workloads are primarily single threaded, this will
-+	  possibly worsen throughput. If you are only concerned about latency
-+	  then enable this regardless of how many cores you have.
-+
-+	  This can still be enabled runtime with the boot parameter
-+	  rqshare=mc
-+
-+	  If unsure, say Y.
-+
-+config RQ_MC_LLC
-+	bool "Multicore siblings (LLC)"
-+	depends on SCHED_MC && SCHED_MUQSS
-+	help
-+	  With this option enabled, the CPU scheduler will behave similarly as
-+	  with "Multicore siblings".
-+	  This option takes LLC cache into account when scheduling tasks.
-+	  Option may benefit CPUs with multiple LLC caches, such as Ryzen
-+	  and Xeon CPUs.
-+
-+	  This can still be enabled runtime with the boot parameter
-+	  rqshare=llc
-+
-+	  If unsure, say N.
-+
-+config RQ_SMP
-+	bool "Symmetric Multi-Processing"
-+	depends on SMP && SCHED_MUQSS
-+	help
-+	  With this option enabled, the CPU scheduler will have one runqueue
-+	  shared by all physical CPUs unless they are on separate NUMA nodes.
-+	  As physical CPUs usually do not share resources, sharing the runqueue
-+	  will normally worsen throughput but improve latency. If you only
-+	  care about latency enable this.
-+
-+	  This can still be enabled runtime with the boot parameter
-+	  rqshare=smp
-+
-+	  If unsure, say N.
-+
-+config RQ_ALL
-+	bool "NUMA"
-+	depends on SMP && SCHED_MUQSS
-+	help
-+	  With this option enabled, the CPU scheduler will have one runqueue
-+	  regardless of the architecture configuration, including across NUMA
-+	  nodes. This can substantially decrease throughput in NUMA
-+	  configurations, but light NUMA designs will not be dramatically
-+	  affected. This option should only be chosen if latency is the prime
-+	  concern.
-+
-+	  This can still be enabled runtime with the boot parameter
-+	  rqshare=all
-+
-+	  If unsure, say N.
-+endchoice
-+
-+config SHARERQ
-+	int
-+	default 0 if RQ_NONE
-+	default 1 if RQ_SMT
-+	default 2 if RQ_MC
-+	default 3 if RQ_MC_LLC
-+	default 4 if RQ_SMP
-+	default 5 if RQ_ALL
-diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
-index 38ef6d06888e..89ed751ac4e4 100644
---- a/kernel/Kconfig.hz
-+++ b/kernel/Kconfig.hz
-@@ -5,7 +5,8 @@
- 
- choice
- 	prompt "Timer frequency"
--	default HZ_250
-+	default HZ_100 if SCHED_MUQSS
-+	default HZ_250_NODEF if !SCHED_MUQSS
- 	help
- 	 Allows the configuration of the timer frequency. It is customary
- 	 to have the timer interrupt run at 1000 Hz but 100 Hz may be more
-@@ -20,11 +21,18 @@ choice
- 	config HZ_100
- 		bool "100 HZ"
- 	help
-+	  100 Hz is a suitable choice in combination with MuQSS which does
-+	  not rely on ticks for rescheduling interrupts, and is not Hz limited
-+	  for timeouts and sleeps from both the kernel and userspace.
-+	  This allows us to benefit from the lower overhead and higher
-+	  throughput of fewer timer ticks.
-+
-+	  Non-MuQSS kernels:
- 	  100 Hz is a typical choice for servers, SMP and NUMA systems
- 	  with lots of processors that may show reduced performance if
- 	  too many timer interrupts are occurring.
- 
--	config HZ_250
-+	config HZ_250_NODEF
- 		bool "250 HZ"
- 	help
- 	 250 Hz is a good compromise choice allowing server performance
-@@ -32,7 +40,10 @@ choice
- 	 on SMP and NUMA systems. If you are going to be using NTSC video
- 	 or multimedia, selected 300Hz instead.
- 
--	config HZ_300
-+	 250 Hz is the default choice for the mainline scheduler but not
-+	 advantageous in combination with MuQSS.
-+
-+	config HZ_300_NODEF
- 		bool "300 HZ"
- 	help
- 	 300 Hz is a good compromise choice allowing server performance
-@@ -40,7 +51,7 @@ choice
- 	 on SMP and NUMA systems and exactly dividing by both PAL and
- 	 NTSC frame rates for video and multimedia work.
- 
--	config HZ_1000
-+	config HZ_1000_NODEF
- 		bool "1000 HZ"
- 	help
- 	 1000 Hz is the preferred choice for desktop systems and other
-@@ -51,9 +62,9 @@ endchoice
- config HZ
- 	int
- 	default 100 if HZ_100
--	default 250 if HZ_250
--	default 300 if HZ_300
--	default 1000 if HZ_1000
-+	default 250 if HZ_250_NODEF
-+	default 300 if HZ_300_NODEF
-+	default 1000 if HZ_1000_NODEF
- 
- config SCHED_HRTICK
- 	def_bool HIGH_RES_TIMERS
-diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
-index bf82259cff96..d9438eb6f91c 100644
---- a/kernel/Kconfig.preempt
-+++ b/kernel/Kconfig.preempt
-@@ -2,7 +2,7 @@
- 
- choice
- 	prompt "Preemption Model"
--	default PREEMPT_NONE
-+	default PREEMPT
- 
- config PREEMPT_NONE
- 	bool "No Forced Preemption (Server)"
-@@ -18,7 +18,7 @@ config PREEMPT_NONE
- 	  latencies.
- 
- config PREEMPT_VOLUNTARY
--	bool "Voluntary Kernel Preemption (Desktop)"
-+	bool "Voluntary Kernel Preemption (Nothing)"
- 	depends on !ARCH_NO_PREEMPT
- 	help
- 	  This option reduces the latency of the kernel by adding more
-@@ -33,7 +33,8 @@ config PREEMPT_VOLUNTARY
- 	  applications to run more 'smoothly' even when the system is
- 	  under load.
- 
--	  Select this if you are building a kernel for a desktop system.
-+	  Select this for no system in particular (choose Preemptible
-+	  instead on a desktop if you know what's good for you).
- 
- config PREEMPT
- 	bool "Preemptible Kernel (Low-Latency Desktop)"
-diff --git a/kernel/Makefile b/kernel/Makefile
-index 4cb4130ced32..b11afae9eea8 100644
---- a/kernel/Makefile
-+++ b/kernel/Makefile
-@@ -10,7 +10,7 @@ obj-y     = fork.o exec_domain.o panic.o \
- 	    extable.o params.o \
- 	    kthread.o sys_ni.o nsproxy.o \
- 	    notifier.o ksysfs.o cred.o reboot.o \
--	    async.o range.o smpboot.o ucount.o
-+	    async.o range.o smpboot.o ucount.o skip_list.o
- 
- obj-$(CONFIG_MODULES) += kmod.o
- obj-$(CONFIG_MULTIUSER) += groups.o
-diff --git a/kernel/delayacct.c b/kernel/delayacct.c
-index 27725754ac99..769d773c7182 100644
---- a/kernel/delayacct.c
-+++ b/kernel/delayacct.c
-@@ -106,7 +106,7 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
- 	 */
- 	t1 = tsk->sched_info.pcount;
- 	t2 = tsk->sched_info.run_delay;
--	t3 = tsk->se.sum_exec_runtime;
-+	t3 = tsk_seruntime(tsk);
- 
- 	d->cpu_count += t1;
- 
-diff --git a/kernel/exit.c b/kernel/exit.c
-index ce2a75bc0ade..f0f864bc1ab9 100644
---- a/kernel/exit.c
-+++ b/kernel/exit.c
-@@ -122,7 +122,7 @@ static void __exit_signal(struct task_struct *tsk)
- 			sig->curr_target = next_thread(tsk);
- 	}
- 
--	add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
-+	add_device_randomness((const void*) &tsk_seruntime(tsk),
- 			      sizeof(unsigned long long));
- 
- 	/*
-@@ -143,7 +143,7 @@ static void __exit_signal(struct task_struct *tsk)
- 	sig->inblock += task_io_get_inblock(tsk);
- 	sig->oublock += task_io_get_oublock(tsk);
- 	task_io_accounting_add(&sig->ioac, &tsk->ioac);
--	sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
-+	sig->sum_sched_runtime += tsk_seruntime(tsk);
- 	sig->nr_threads--;
- 	__unhash_process(tsk, group_dead);
- 	write_sequnlock(&sig->stats_lock);
-diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
-index 20d501af4f2e..f92cabe495bd 100644
---- a/kernel/irq/Kconfig
-+++ b/kernel/irq/Kconfig
-@@ -115,6 +115,23 @@ config GENERIC_IRQ_RESERVATION_MODE
- config IRQ_FORCED_THREADING
-        bool
- 
-+config FORCE_IRQ_THREADING
-+	bool "Make IRQ threading compulsory"
-+	depends on IRQ_FORCED_THREADING
-+	default n
-+	---help---
-+
-+	  Make IRQ threading mandatory for any IRQ handlers that support it
-+	  instead of being optional and requiring the threadirqs kernel
-+	  parameter. Instead they can be optionally disabled with the
-+	  nothreadirqs kernel parameter.
-+
-+	  Enabling this may make some architectures not boot with runqueue
-+	  sharing and MuQSS.
-+
-+	  Enable if you are building for a desktop or low latency system,
-+	  otherwise say N.
-+
- config SPARSE_IRQ
- 	bool "Support sparse irq numbering" if MAY_HAVE_SPARSE_IRQ
- 	---help---
-diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
-index 453a8a0f4804..2f14a31d8efd 100644
---- a/kernel/irq/manage.c
-+++ b/kernel/irq/manage.c
-@@ -25,9 +25,20 @@
- #include "internals.h"
- 
- #if defined(CONFIG_IRQ_FORCED_THREADING) && !defined(CONFIG_PREEMPT_RT)
-+#ifdef CONFIG_FORCE_IRQ_THREADING
-+__read_mostly bool force_irqthreads = true;
-+#else
- __read_mostly bool force_irqthreads;
-+#endif
- EXPORT_SYMBOL_GPL(force_irqthreads);
- 
-+static int __init setup_noforced_irqthreads(char *arg)
-+{
-+	force_irqthreads = false;
-+	return 0;
-+}
-+early_param("nothreadirqs", setup_noforced_irqthreads);
-+
- static int __init setup_forced_irqthreads(char *arg)
- {
- 	force_irqthreads = true;
-diff --git a/kernel/kthread.c b/kernel/kthread.c
-index bfbfa481be3a..f5942fb29ba8 100644
---- a/kernel/kthread.c
-+++ b/kernel/kthread.c
-@@ -446,6 +446,34 @@ void kthread_bind(struct task_struct *p, unsigned int cpu)
- }
- EXPORT_SYMBOL(kthread_bind);
- 
-+#if defined(CONFIG_SCHED_MUQSS) && defined(CONFIG_SMP)
-+extern void __do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask);
-+
-+/*
-+ * new_kthread_bind is a special variant of __kthread_bind_mask.
-+ * For new threads to work on muqss we want to call do_set_cpus_allowed
-+ * without the task_cpu being set and the task rescheduled until they're
-+ * rescheduled on their own so we call __do_set_cpus_allowed directly which
-+ * only changes the cpumask. This is particularly important for smpboot threads
-+ * to work.
-+ */
-+static void new_kthread_bind(struct task_struct *p, unsigned int cpu)
-+{
-+	unsigned long flags;
-+
-+	if (WARN_ON(!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)))
-+		return;
-+
-+	/* It's safe because the task is inactive. */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	__do_set_cpus_allowed(p, cpumask_of(cpu));
-+	p->flags |= PF_NO_SETAFFINITY;
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+}
-+#else
-+#define new_kthread_bind(p, cpu) kthread_bind(p, cpu)
-+#endif
-+
- /**
-  * kthread_create_on_cpu - Create a cpu bound kthread
-  * @threadfn: the function to run until signal_pending(current).
-@@ -467,7 +495,7 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
- 				   cpu);
- 	if (IS_ERR(p))
- 		return p;
--	kthread_bind(p, cpu);
-+	new_kthread_bind(p, cpu);
- 	/* CPU hotplug need to bind once again when unparking the thread. */
- 	set_bit(KTHREAD_IS_PER_CPU, &to_kthread(p)->flags);
- 	to_kthread(p)->cpu = cpu;
-diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
-index f6310f848f34..825f9b8e228f 100644
---- a/kernel/livepatch/transition.c
-+++ b/kernel/livepatch/transition.c
-@@ -282,7 +282,7 @@ static bool klp_try_switch_task(struct task_struct *task)
- {
- 	static char err_buf[STACK_ERR_BUF_SIZE];
- 	struct rq *rq;
--	struct rq_flags flags;
-+	struct rq_flags rf;
- 	int ret;
- 	bool success = false;
- 
-@@ -304,7 +304,7 @@ static bool klp_try_switch_task(struct task_struct *task)
- 	 * functions.  If all goes well, switch the task to the target patch
- 	 * state.
- 	 */
--	rq = task_rq_lock(task, &flags);
-+	rq = task_rq_lock(task, &rf);
- 
- 	if (task_running(rq, task) && task != current) {
- 		snprintf(err_buf, STACK_ERR_BUF_SIZE,
-@@ -323,7 +323,7 @@ static bool klp_try_switch_task(struct task_struct *task)
- 	task->patch_state = klp_target_state;
- 
- done:
--	task_rq_unlock(rq, task, &flags);
-+	task_rq_unlock(rq, task, &rf);
- 
- 	/*
- 	 * Due to console deadlock issues, pr_debug() can't be used while
-diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
-index 21fb5a5662b5..a04ffebc6b7a 100644
---- a/kernel/sched/Makefile
-+++ b/kernel/sched/Makefile
-@@ -16,15 +16,23 @@ ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
- CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
- endif
- 
-+ifdef CONFIG_SCHED_MUQSS
-+obj-y += MuQSS.o clock.o cputime.o
-+obj-y += idle.o
-+obj-y += wait.o wait_bit.o swait.o completion.o
-+
-+obj-$(CONFIG_SMP) += topology.o
-+else
- obj-y += core.o loadavg.o clock.o cputime.o
- obj-y += idle.o fair.o rt.o deadline.o
- obj-y += wait.o wait_bit.o swait.o completion.o
- 
- obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o
- obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
--obj-$(CONFIG_SCHEDSTATS) += stats.o
- obj-$(CONFIG_SCHED_DEBUG) += debug.o
- obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
-+endif
-+obj-$(CONFIG_SCHEDSTATS) += stats.o
- obj-$(CONFIG_CPU_FREQ) += cpufreq.o
- obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o
- obj-$(CONFIG_MEMBARRIER) += membarrier.o
-diff --git a/kernel/sched/MuQSS.c b/kernel/sched/MuQSS.c
-new file mode 100644
-index 000000000000..18a9b4a23e44
---- /dev/null
-+++ b/kernel/sched/MuQSS.c
-@@ -0,0 +1,7624 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ *  kernel/sched/MuQSS.c, was kernel/sched.c
-+ *
-+ *  Kernel scheduler and related syscalls
-+ *
-+ *  Copyright (C) 1991-2002  Linus Torvalds
-+ *
-+ *  1996-12-23  Modified by Dave Grothe to fix bugs in semaphores and
-+ *		make semaphores SMP safe
-+ *  1998-11-19	Implemented schedule_timeout() and related stuff
-+ *		by Andrea Arcangeli
-+ *  2002-01-04	New ultra-scalable O(1) scheduler by Ingo Molnar:
-+ *		hybrid priority-list and round-robin design with
-+ *		an array-switch method of distributing timeslices
-+ *		and per-CPU runqueues.  Cleanups and useful suggestions
-+ *		by Davide Libenzi, preemptible kernel bits by Robert Love.
-+ *  2003-09-03	Interactivity tuning by Con Kolivas.
-+ *  2004-04-02	Scheduler domains code by Nick Piggin
-+ *  2007-04-15  Work begun on replacing all interactivity tuning with a
-+ *              fair scheduling design by Con Kolivas.
-+ *  2007-05-05  Load balancing (smp-nice) and other improvements
-+ *              by Peter Williams
-+ *  2007-05-06  Interactivity improvements to CFS by Mike Galbraith
-+ *  2007-07-01  Group scheduling enhancements by Srivatsa Vaddagiri
-+ *  2007-11-29  RT balancing improvements by Steven Rostedt, Gregory Haskins,
-+ *              Thomas Gleixner, Mike Kravetz
-+ *  2009-08-13	Brainfuck deadline scheduling policy by Con Kolivas deletes
-+ *              a whole lot of those previous things.
-+ *  2016-10-01  Multiple Queue Skiplist Scheduler scalable evolution of BFS
-+ * 		scheduler by Con Kolivas.
-+ *  2019-08-31  LLC bits by Eduards Bezverhijs
-+ */
-+
-+#include <linux/sched/isolation.h>
-+#include <linux/sched/loadavg.h>
-+
-+#include <linux/binfmts.h>
-+#include <linux/blkdev.h>
-+#include <linux/compat.h>
-+#include <linux/context_tracking.h>
-+#include <linux/cpuset.h>
-+#include <linux/delayacct.h>
-+#include <linux/init_task.h>
-+#include <linux/kcov.h>
-+#include <linux/kprobes.h>
-+#include <linux/mmu_context.h>
-+#include <linux/module.h>
-+#include <linux/nmi.h>
-+#include <linux/prefetch.h>
-+#include <linux/profile.h>
-+#include <linux/rcupdate_wait.h>
-+#include <linux/sched.h>
-+#include <linux/security.h>
-+#include <linux/skip_list.h>
-+#include <linux/syscalls.h>
-+#include <linux/tick.h>
-+#include <linux/wait_bit.h>
-+
-+#include <asm/irq_regs.h>
-+#include <asm/switch_to.h>
-+#include <asm/tlb.h>
-+
-+#include "../workqueue_internal.h"
-+#include "../../fs/io-wq.h"
-+#include "../smpboot.h"
-+
-+#define CREATE_TRACE_POINTS
-+#include <trace/events/sched.h>
-+
-+#include "MuQSS.h"
-+
-+#define rt_prio(prio)		unlikely((prio) < MAX_RT_PRIO)
-+#define rt_task(p)		rt_prio((p)->prio)
-+#define batch_task(p)		(unlikely((p)->policy == SCHED_BATCH))
-+#define is_rt_policy(policy)	((policy) == SCHED_FIFO || \
-+					(policy) == SCHED_RR)
-+#define has_rt_policy(p)	unlikely(is_rt_policy((p)->policy))
-+
-+#define is_idle_policy(policy)	((policy) == SCHED_IDLEPRIO)
-+#define idleprio_task(p)	unlikely(is_idle_policy((p)->policy))
-+#define task_running_idle(p)	unlikely((p)->prio == IDLE_PRIO)
-+
-+#define is_iso_policy(policy)	((policy) == SCHED_ISO)
-+#define iso_task(p)		unlikely(is_iso_policy((p)->policy))
-+#define task_running_iso(p)	unlikely((p)->prio == ISO_PRIO)
-+
-+#define rq_idle(rq)		((rq)->rq_prio == PRIO_LIMIT)
-+
-+#define ISO_PERIOD		(5 * HZ)
-+
-+#define STOP_PRIO		(MAX_RT_PRIO - 1)
-+
-+/*
-+ * Some helpers for converting to/from various scales. Use shifts to get
-+ * approximate multiples of ten for less overhead.
-+ */
-+#define APPROX_NS_PS		(1073741824) /* Approximate ns per second */
-+#define JIFFIES_TO_NS(TIME)	((TIME) * (APPROX_NS_PS / HZ))
-+#define JIFFY_NS		(APPROX_NS_PS / HZ)
-+#define JIFFY_US		(1048576 / HZ)
-+#define NS_TO_JIFFIES(TIME)	((TIME) / JIFFY_NS)
-+#define HALF_JIFFY_NS		(APPROX_NS_PS / HZ / 2)
-+#define HALF_JIFFY_US		(1048576 / HZ / 2)
-+#define MS_TO_NS(TIME)		((TIME) << 20)
-+#define MS_TO_US(TIME)		((TIME) << 10)
-+#define NS_TO_MS(TIME)		((TIME) >> 20)
-+#define NS_TO_US(TIME)		((TIME) >> 10)
-+#define US_TO_NS(TIME)		((TIME) << 10)
-+#define TICK_APPROX_NS		((APPROX_NS_PS+HZ/2)/HZ)
-+
-+#define RESCHED_US	(100) /* Reschedule if less than this many μs left */
-+
-+void print_scheduler_version(void)
-+{
-+	printk(KERN_INFO "MuQSS CPU scheduler v0.202 by Con Kolivas.\n");
-+}
-+
-+/* Define RQ share levels */
-+#define RQSHARE_NONE 0
-+#define RQSHARE_SMT 1
-+#define RQSHARE_MC 2
-+#define RQSHARE_MC_LLC 3
-+#define RQSHARE_SMP 4
-+#define RQSHARE_ALL 5
-+
-+/* Define locality levels */
-+#define LOCALITY_SAME 0
-+#define LOCALITY_SMT 1
-+#define LOCALITY_MC_LLC 2
-+#define LOCALITY_MC 3
-+#define LOCALITY_SMP 4
-+#define LOCALITY_DISTANT 5
-+
-+/*
-+ * This determines what level of runqueue sharing will be done and is
-+ * configurable at boot time with the bootparam rqshare =
-+ */
-+static int rqshare __read_mostly = CONFIG_SHARERQ; /* Default RQSHARE_MC */
-+
-+static int __init set_rqshare(char *str)
-+{
-+	if (!strncmp(str, "none", 4)) {
-+		rqshare = RQSHARE_NONE;
-+		return 0;
-+	}
-+	if (!strncmp(str, "smt", 3)) {
-+		rqshare = RQSHARE_SMT;
-+		return 0;
-+	}
-+	if (!strncmp(str, "mc", 2)) {
-+		rqshare = RQSHARE_MC;
-+		return 0;
-+	}
-+	if (!strncmp(str, "llc", 3)) {
-+		rqshare = RQSHARE_MC_LLC;
-+		return 0;
-+	}
-+	if (!strncmp(str, "smp", 3)) {
-+		rqshare = RQSHARE_SMP;
-+		return 0;
-+	}
-+	if (!strncmp(str, "all", 3)) {
-+		rqshare = RQSHARE_ALL;
-+		return 0;
-+	}
-+	return 1;
-+}
-+__setup("rqshare=", set_rqshare);
-+
-+/*
-+ * This is the time all tasks within the same priority round robin.
-+ * Value is in ms and set to a minimum of 6ms.
-+ * Tunable via /proc interface.
-+ */
-+int rr_interval __read_mostly = 6;
-+
-+/*
-+ * Tunable to choose whether to prioritise latency or throughput, simple
-+ * binary yes or no
-+ */
-+int sched_interactive __read_mostly = 1;
-+
-+/*
-+ * sched_iso_cpu - sysctl which determines the cpu percentage SCHED_ISO tasks
-+ * are allowed to run five seconds as real time tasks. This is the total over
-+ * all online cpus.
-+ */
-+int sched_iso_cpu __read_mostly = 70;
-+
-+/*
-+ * sched_yield_type - Choose what sort of yield sched_yield will perform.
-+ * 0: No yield.
-+ * 1: Yield only to better priority/deadline tasks. (default)
-+ * 2: Expire timeslice and recalculate deadline.
-+ */
-+int sched_yield_type __read_mostly = 1;
-+
-+/*
-+ * The relative length of deadline for each priority(nice) level.
-+ */
-+static int prio_ratios[NICE_WIDTH] __read_mostly;
-+
-+
-+/*
-+ * The quota handed out to tasks of all priority levels when refilling their
-+ * time_slice.
-+ */
-+static inline int timeslice(void)
-+{
-+	return MS_TO_US(rr_interval);
-+}
-+
-+DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
-+
-+#ifdef CONFIG_SMP
-+/*
-+ * Total number of runqueues. Equals number of CPUs when there is no runqueue
-+ * sharing but is usually less with SMT/MC sharing of runqueues.
-+ */
-+static int total_runqueues __read_mostly = 1;
-+
-+static cpumask_t cpu_idle_map ____cacheline_aligned_in_smp;
-+
-+struct rq *cpu_rq(int cpu)
-+{
-+	return &per_cpu(runqueues, (cpu));
-+}
-+#define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
-+
-+/*
-+ * For asym packing, by default the lower numbered cpu has higher priority.
-+ */
-+int __weak arch_asym_cpu_priority(int cpu)
-+{
-+	return -cpu;
-+}
-+
-+int __weak arch_sd_sibling_asym_packing(void)
-+{
-+       return 0*SD_ASYM_PACKING;
-+}
-+
-+#ifdef CONFIG_SCHED_SMT
-+DEFINE_STATIC_KEY_FALSE(sched_smt_present);
-+EXPORT_SYMBOL_GPL(sched_smt_present);
-+#endif
-+
-+#else
-+struct rq *uprq;
-+#endif /* CONFIG_SMP */
-+
-+#include "stats.h"
-+
-+/*
-+ * All common locking functions performed on rq->lock. rq->clock is local to
-+ * the CPU accessing it so it can be modified just with interrupts disabled
-+ * when we're not updating niffies.
-+ * Looking up task_rq must be done under rq->lock to be safe.
-+ */
-+
-+/*
-+ * RQ-clock updating methods:
-+ */
-+
-+#ifdef HAVE_SCHED_AVG_IRQ
-+static void update_irq_load_avg(struct rq *rq, long delta);
-+#else
-+static inline void update_irq_load_avg(struct rq *rq, long delta) {}
-+#endif
-+
-+static void update_rq_clock_task(struct rq *rq, s64 delta)
-+{
-+/*
-+ * In theory, the compile should just see 0 here, and optimize out the call
-+ * to sched_rt_avg_update. But I don't trust it...
-+ */
-+	s64 __maybe_unused steal = 0, irq_delta = 0;
-+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-+	irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
-+
-+	/*
-+	 * Since irq_time is only updated on {soft,}irq_exit, we might run into
-+	 * this case when a previous update_rq_clock() happened inside a
-+	 * {soft,}irq region.
-+	 *
-+	 * When this happens, we stop ->clock_task and only update the
-+	 * prev_irq_time stamp to account for the part that fit, so that a next
-+	 * update will consume the rest. This ensures ->clock_task is
-+	 * monotonic.
-+	 *
-+	 * It does however cause some slight miss-attribution of {soft,}irq
-+	 * time, a more accurate solution would be to update the irq_time using
-+	 * the current rq->clock timestamp, except that would require using
-+	 * atomic ops.
-+	 */
-+	if (irq_delta > delta)
-+		irq_delta = delta;
-+
-+	rq->prev_irq_time += irq_delta;
-+	delta -= irq_delta;
-+#endif
-+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
-+	if (static_key_false((&paravirt_steal_rq_enabled))) {
-+		steal = paravirt_steal_clock(cpu_of(rq));
-+		steal -= rq->prev_steal_time_rq;
-+
-+		if (unlikely(steal > delta))
-+			steal = delta;
-+
-+		rq->prev_steal_time_rq += steal;
-+		delta -= steal;
-+	}
-+#endif
-+	rq->clock_task += delta;
-+
-+#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
-+	if (irq_delta + steal)
-+		update_irq_load_avg(rq, irq_delta + steal);
-+#endif
-+}
-+
-+static inline void update_rq_clock(struct rq *rq)
-+{
-+	s64 delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
-+
-+	if (unlikely(delta < 0))
-+		return;
-+	rq->clock += delta;
-+	update_rq_clock_task(rq, delta);
-+}
-+
-+/*
-+ * Niffies are a globally increasing nanosecond counter. They're only used by
-+ * update_load_avg and time_slice_expired, however deadlines are based on them
-+ * across CPUs. Update them whenever we will call one of those functions, and
-+ * synchronise them across CPUs whenever we hold both runqueue locks.
-+ */
-+static inline void update_clocks(struct rq *rq)
-+{
-+	s64 ndiff, minndiff;
-+	long jdiff;
-+
-+	update_rq_clock(rq);
-+	ndiff = rq->clock - rq->old_clock;
-+	rq->old_clock = rq->clock;
-+	jdiff = jiffies - rq->last_jiffy;
-+
-+	/* Subtract any niffies added by balancing with other rqs */
-+	ndiff -= rq->niffies - rq->last_niffy;
-+	minndiff = JIFFIES_TO_NS(jdiff) - rq->niffies + rq->last_jiffy_niffies;
-+	if (minndiff < 0)
-+		minndiff = 0;
-+	ndiff = max(ndiff, minndiff);
-+	rq->niffies += ndiff;
-+	rq->last_niffy = rq->niffies;
-+	if (jdiff) {
-+		rq->last_jiffy += jdiff;
-+		rq->last_jiffy_niffies = rq->niffies;
-+	}
-+}
-+
-+/*
-+ * Any time we have two runqueues locked we use that as an opportunity to
-+ * synchronise niffies to the highest value as idle ticks may have artificially
-+ * kept niffies low on one CPU and the truth can only be later.
-+ */
-+static inline void synchronise_niffies(struct rq *rq1, struct rq *rq2)
-+{
-+	if (rq1->niffies > rq2->niffies)
-+		rq2->niffies = rq1->niffies;
-+	else
-+		rq1->niffies = rq2->niffies;
-+}
-+
-+/*
-+ * double_rq_lock - safely lock two runqueues
-+ *
-+ * Note this does not disable interrupts like task_rq_lock,
-+ * you need to do so manually before calling.
-+ */
-+
-+/* For when we know rq1 != rq2 */
-+static inline void __double_rq_lock(struct rq *rq1, struct rq *rq2)
-+	__acquires(rq1->lock)
-+	__acquires(rq2->lock)
-+{
-+	if (rq1 < rq2) {
-+		raw_spin_lock(rq1->lock);
-+		raw_spin_lock_nested(rq2->lock, SINGLE_DEPTH_NESTING);
-+	} else {
-+		raw_spin_lock(rq2->lock);
-+		raw_spin_lock_nested(rq1->lock, SINGLE_DEPTH_NESTING);
-+	}
-+}
-+
-+static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
-+	__acquires(rq1->lock)
-+	__acquires(rq2->lock)
-+{
-+	BUG_ON(!irqs_disabled());
-+	if (rq1->lock == rq2->lock) {
-+		raw_spin_lock(rq1->lock);
-+		__acquire(rq2->lock);	/* Fake it out ;) */
-+	} else
-+		__double_rq_lock(rq1, rq2);
-+	synchronise_niffies(rq1, rq2);
-+}
-+
-+/*
-+ * double_rq_unlock - safely unlock two runqueues
-+ *
-+ * Note this does not restore interrupts like task_rq_unlock,
-+ * you need to do so manually after calling.
-+ */
-+static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
-+	__releases(rq1->lock)
-+	__releases(rq2->lock)
-+{
-+	raw_spin_unlock(rq1->lock);
-+	if (rq1->lock != rq2->lock)
-+		raw_spin_unlock(rq2->lock);
-+	else
-+		__release(rq2->lock);
-+}
-+
-+static inline void lock_all_rqs(void)
-+{
-+	int cpu;
-+
-+	preempt_disable();
-+	for_each_possible_cpu(cpu) {
-+		struct rq *rq = cpu_rq(cpu);
-+
-+		do_raw_spin_lock(rq->lock);
-+	}
-+}
-+
-+static inline void unlock_all_rqs(void)
-+{
-+	int cpu;
-+
-+	for_each_possible_cpu(cpu) {
-+		struct rq *rq = cpu_rq(cpu);
-+
-+		do_raw_spin_unlock(rq->lock);
-+	}
-+	preempt_enable();
-+}
-+
-+/* Specially nest trylock an rq */
-+static inline bool trylock_rq(struct rq *this_rq, struct rq *rq)
-+{
-+	if (unlikely(!do_raw_spin_trylock(rq->lock)))
-+		return false;
-+	spin_acquire(&rq->lock->dep_map, SINGLE_DEPTH_NESTING, 1, _RET_IP_);
-+	synchronise_niffies(this_rq, rq);
-+	return true;
-+}
-+
-+/* Unlock a specially nested trylocked rq */
-+static inline void unlock_rq(struct rq *rq)
-+{
-+	spin_release(&rq->lock->dep_map, _RET_IP_);
-+	do_raw_spin_unlock(rq->lock);
-+}
-+
-+/*
-+ * cmpxchg based fetch_or, macro so it works for different integer types
-+ */
-+#define fetch_or(ptr, mask)						\
-+	({								\
-+		typeof(ptr) _ptr = (ptr);				\
-+		typeof(mask) _mask = (mask);				\
-+		typeof(*_ptr) _old, _val = *_ptr;			\
-+									\
-+		for (;;) {						\
-+			_old = cmpxchg(_ptr, _val, _val | _mask);	\
-+			if (_old == _val)				\
-+				break;					\
-+			_val = _old;					\
-+		}							\
-+	_old;								\
-+})
-+
-+#if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
-+/*
-+ * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG,
-+ * this avoids any races wrt polling state changes and thereby avoids
-+ * spurious IPIs.
-+ */
-+static bool set_nr_and_not_polling(struct task_struct *p)
-+{
-+	struct thread_info *ti = task_thread_info(p);
-+	return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG);
-+}
-+
-+/*
-+ * Atomically set TIF_NEED_RESCHED if TIF_POLLING_NRFLAG is set.
-+ *
-+ * If this returns true, then the idle task promises to call
-+ * sched_ttwu_pending() and reschedule soon.
-+ */
-+static bool set_nr_if_polling(struct task_struct *p)
-+{
-+	struct thread_info *ti = task_thread_info(p);
-+	typeof(ti->flags) old, val = READ_ONCE(ti->flags);
-+
-+	for (;;) {
-+		if (!(val & _TIF_POLLING_NRFLAG))
-+			return false;
-+		if (val & _TIF_NEED_RESCHED)
-+			return true;
-+		old = cmpxchg(&ti->flags, val, val | _TIF_NEED_RESCHED);
-+		if (old == val)
-+			break;
-+		val = old;
-+	}
-+	return true;
-+}
-+
-+#else
-+static bool set_nr_and_not_polling(struct task_struct *p)
-+{
-+	set_tsk_need_resched(p);
-+	return true;
-+}
-+
-+#ifdef CONFIG_SMP
-+static bool set_nr_if_polling(struct task_struct *p)
-+{
-+	return false;
-+}
-+#endif
-+#endif
-+
-+static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task)
-+{
-+	struct wake_q_node *node = &task->wake_q;
-+
-+	/*
-+	 * Atomically grab the task, if ->wake_q is !nil already it means
-+	 * its already queued (either by us or someone else) and will get the
-+	 * wakeup due to that.
-+	 *
-+	 * In order to ensure that a pending wakeup will observe our pending
-+	 * state, even in the failed case, an explicit smp_mb() must be used.
-+	 */
-+	smp_mb__before_atomic();
-+	if (unlikely(cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL)))
-+		return false;
-+
-+	/*
-+	 * The head is context local, there can be no concurrency.
-+	 */
-+	*head->lastp = node;
-+	head->lastp = &node->next;
-+	return true;
-+}
-+
-+/**
-+ * wake_q_add() - queue a wakeup for 'later' waking.
-+ * @head: the wake_q_head to add @task to
-+ * @task: the task to queue for 'later' wakeup
-+ *
-+ * Queue a task for later wakeup, most likely by the wake_up_q() call in the
-+ * same context, _HOWEVER_ this is not guaranteed, the wakeup can come
-+ * instantly.
-+ *
-+ * This function must be used as-if it were wake_up_process(); IOW the task
-+ * must be ready to be woken at this location.
-+ */
-+void wake_q_add(struct wake_q_head *head, struct task_struct *task)
-+{
-+	if (__wake_q_add(head, task))
-+		get_task_struct(task);
-+}
-+
-+/**
-+ * wake_q_add_safe() - safely queue a wakeup for 'later' waking.
-+ * @head: the wake_q_head to add @task to
-+ * @task: the task to queue for 'later' wakeup
-+ *
-+ * Queue a task for later wakeup, most likely by the wake_up_q() call in the
-+ * same context, _HOWEVER_ this is not guaranteed, the wakeup can come
-+ * instantly.
-+ *
-+ * This function must be used as-if it were wake_up_process(); IOW the task
-+ * must be ready to be woken at this location.
-+ *
-+ * This function is essentially a task-safe equivalent to wake_q_add(). Callers
-+ * that already hold reference to @task can call the 'safe' version and trust
-+ * wake_q to do the right thing depending whether or not the @task is already
-+ * queued for wakeup.
-+ */
-+void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task)
-+{
-+	if (!__wake_q_add(head, task))
-+		put_task_struct(task);
-+}
-+
-+void wake_up_q(struct wake_q_head *head)
-+{
-+	struct wake_q_node *node = head->first;
-+
-+	while (node != WAKE_Q_TAIL) {
-+		struct task_struct *task;
-+
-+		task = container_of(node, struct task_struct, wake_q);
-+		BUG_ON(!task);
-+		/* Task can safely be re-inserted now */
-+		node = node->next;
-+		task->wake_q.next = NULL;
-+
-+		/*
-+		 * wake_up_process() executes a full barrier, which pairs with
-+		 * the queueing in wake_q_add() so as not to miss wakeups.
-+		 */
-+		wake_up_process(task);
-+		put_task_struct(task);
-+	}
-+}
-+
-+static inline void smp_sched_reschedule(int cpu)
-+{
-+	if (likely(cpu_online(cpu)))
-+		smp_send_reschedule(cpu);
-+}
-+
-+/*
-+ * resched_task - mark a task 'to be rescheduled now'.
-+ *
-+ * On UP this means the setting of the need_resched flag, on SMP it
-+ * might also involve a cross-CPU call to trigger the scheduler on
-+ * the target CPU.
-+ */
-+void resched_task(struct task_struct *p)
-+{
-+	int cpu;
-+#ifdef CONFIG_LOCKDEP
-+	/* Kernel threads call this when creating workqueues while still
-+	 * inactive from __kthread_bind_mask, holding only the pi_lock */
-+	if (!(p->flags & PF_KTHREAD)) {
-+		struct rq *rq = task_rq(p);
-+
-+		lockdep_assert_held(rq->lock);
-+	}
-+#endif
-+	if (test_tsk_need_resched(p))
-+		return;
-+
-+	cpu = task_cpu(p);
-+	if (cpu == smp_processor_id()) {
-+		set_tsk_need_resched(p);
-+		set_preempt_need_resched();
-+		return;
-+	}
-+
-+	if (set_nr_and_not_polling(p))
-+		smp_sched_reschedule(cpu);
-+	else
-+		trace_sched_wake_idle_without_ipi(cpu);
-+}
-+
-+/*
-+ * A task that is not running or queued will not have a node set.
-+ * A task that is queued but not running will have a node set.
-+ * A task that is currently running will have ->on_cpu set but no node set.
-+ */
-+static inline bool task_queued(struct task_struct *p)
-+{
-+	return !skiplist_node_empty(&p->node);
-+}
-+
-+static void enqueue_task(struct rq *rq, struct task_struct *p, int flags);
-+static inline void resched_if_idle(struct rq *rq);
-+
-+static inline bool deadline_before(u64 deadline, u64 time)
-+{
-+	return (deadline < time);
-+}
-+
-+/*
-+ * Deadline is "now" in niffies + (offset by priority). Setting the deadline
-+ * is the key to everything. It distributes cpu fairly amongst tasks of the
-+ * same nice value, it proportions cpu according to nice level, it means the
-+ * task that last woke up the longest ago has the earliest deadline, thus
-+ * ensuring that interactive tasks get low latency on wake up. The CPU
-+ * proportion works out to the square of the virtual deadline difference, so
-+ * this equation will give nice 19 3% CPU compared to nice 0.
-+ */
-+static inline u64 prio_deadline_diff(int user_prio)
-+{
-+	return (prio_ratios[user_prio] * rr_interval * (MS_TO_NS(1) / 128));
-+}
-+
-+static inline u64 task_deadline_diff(struct task_struct *p)
-+{
-+	return prio_deadline_diff(TASK_USER_PRIO(p));
-+}
-+
-+static inline u64 static_deadline_diff(int static_prio)
-+{
-+	return prio_deadline_diff(USER_PRIO(static_prio));
-+}
-+
-+static inline int longest_deadline_diff(void)
-+{
-+	return prio_deadline_diff(39);
-+}
-+
-+static inline int ms_longest_deadline_diff(void)
-+{
-+	return NS_TO_MS(longest_deadline_diff());
-+}
-+
-+static inline bool rq_local(struct rq *rq);
-+
-+#ifndef SCHED_CAPACITY_SCALE
-+#define SCHED_CAPACITY_SCALE 1024
-+#endif
-+
-+static inline int rq_load(struct rq *rq)
-+{
-+	return rq->nr_running;
-+}
-+
-+/*
-+ * Update the load average for feeding into cpu frequency governors. Use a
-+ * rough estimate of a rolling average with ~ time constant of 32ms.
-+ * 80/128 ~ 0.63. * 80 / 32768 / 128 == * 5 / 262144
-+ * Make sure a call to update_clocks has been made before calling this to get
-+ * an updated rq->niffies.
-+ */
-+static void update_load_avg(struct rq *rq, unsigned int flags)
-+{
-+	long us_interval, load;
-+
-+	us_interval = NS_TO_US(rq->niffies - rq->load_update);
-+	if (unlikely(us_interval <= 0))
-+		return;
-+
-+	load = rq->load_avg - (rq->load_avg * us_interval * 5 / 262144);
-+	if (unlikely(load < 0))
-+		load = 0;
-+	load += rq_load(rq) * SCHED_CAPACITY_SCALE * us_interval * 5 / 262144;
-+	rq->load_avg = load;
-+
-+	rq->load_update = rq->niffies;
-+	update_irq_load_avg(rq, 0);
-+	if (likely(rq_local(rq)))
-+		cpufreq_trigger(rq, flags);
-+}
-+
-+#ifdef HAVE_SCHED_AVG_IRQ
-+/*
-+ * IRQ variant of update_load_avg below. delta is actually time in nanoseconds
-+ * here so we scale curload to how long it's been since the last update.
-+ */
-+static void update_irq_load_avg(struct rq *rq, long delta)
-+{
-+	long us_interval, load;
-+
-+	us_interval = NS_TO_US(rq->niffies - rq->irq_load_update);
-+	if (unlikely(us_interval <= 0))
-+		return;
-+
-+	load = rq->irq_load_avg - (rq->irq_load_avg * us_interval * 5 / 262144);
-+	if (unlikely(load < 0))
-+		load = 0;
-+	load += NS_TO_US(delta) * SCHED_CAPACITY_SCALE * 5 / 262144;
-+	rq->irq_load_avg = load;
-+
-+	rq->irq_load_update = rq->niffies;
-+}
-+#endif
-+
-+/*
-+ * Removing from the runqueue. Enter with rq locked. Deleting a task
-+ * from the skip list is done via the stored node reference in the task struct
-+ * and does not require a full look up. Thus it occurs in O(k) time where k
-+ * is the "level" of the list the task was stored at - usually < 4, max 8.
-+ */
-+static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
-+{
-+	skiplist_delete(rq->sl, &p->node);
-+	rq->best_key = rq->node->next[0]->key;
-+	update_clocks(rq);
-+
-+	if (!(flags & DEQUEUE_SAVE)) {
-+		sched_info_dequeued(rq, p);
-+		psi_dequeue(p, flags & DEQUEUE_SLEEP);
-+	}
-+	rq->nr_running--;
-+	if (rt_task(p))
-+		rq->rt_nr_running--;
-+	update_load_avg(rq, flags);
-+}
-+
-+#ifdef CONFIG_PREEMPT_RCU
-+static bool rcu_read_critical(struct task_struct *p)
-+{
-+	return p->rcu_read_unlock_special.b.blocked;
-+}
-+#else /* CONFIG_PREEMPT_RCU */
-+#define rcu_read_critical(p) (false)
-+#endif /* CONFIG_PREEMPT_RCU */
-+
-+/*
-+ * To determine if it's safe for a task of SCHED_IDLEPRIO to actually run as
-+ * an idle task, we ensure none of the following conditions are met.
-+ */
-+static bool idleprio_suitable(struct task_struct *p)
-+{
-+	return (!(task_contributes_to_load(p)) && !(p->flags & (PF_EXITING)) &&
-+		!signal_pending(p) && !rcu_read_critical(p) && !freezing(p));
-+}
-+
-+/*
-+ * To determine if a task of SCHED_ISO can run in pseudo-realtime, we check
-+ * that the iso_refractory flag is not set.
-+ */
-+static inline bool isoprio_suitable(struct rq *rq)
-+{
-+	return !rq->iso_refractory;
-+}
-+
-+/*
-+ * Adding to the runqueue. Enter with rq locked.
-+ */
-+static void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
-+{
-+	unsigned int randseed, cflags = 0;
-+	u64 sl_id;
-+
-+	if (!rt_task(p)) {
-+		/* Check it hasn't gotten rt from PI */
-+		if ((idleprio_task(p) && idleprio_suitable(p)) ||
-+		   (iso_task(p) && isoprio_suitable(rq)))
-+			p->prio = p->normal_prio;
-+		else
-+			p->prio = NORMAL_PRIO;
-+	} else
-+		rq->rt_nr_running++;
-+	/*
-+	 * The sl_id key passed to the skiplist generates a sorted list.
-+	 * Realtime and sched iso tasks run FIFO so they only need be sorted
-+	 * according to priority. The skiplist will put tasks of the same
-+	 * key inserted later in FIFO order. Tasks of sched normal, batch
-+	 * and idleprio are sorted according to their deadlines. Idleprio
-+	 * tasks are offset by an impossibly large deadline value ensuring
-+	 * they get sorted into last positions, but still according to their
-+	 * own deadlines. This creates a "landscape" of skiplists running
-+	 * from priority 0 realtime in first place to the lowest priority
-+	 * idleprio tasks last. Skiplist insertion is an O(log n) process.
-+	 */
-+	if (p->prio <= ISO_PRIO) {
-+		sl_id = p->prio;
-+	} else {
-+		sl_id = p->deadline;
-+		if (idleprio_task(p)) {
-+			if (p->prio == IDLE_PRIO)
-+				sl_id |= 0xF000000000000000;
-+			else
-+				sl_id += longest_deadline_diff();
-+		}
-+	}
-+	/*
-+	 * Some architectures don't have better than microsecond resolution
-+	 * so mask out ~microseconds as the random seed for skiplist insertion.
-+	 */
-+	update_clocks(rq);
-+	if (!(flags & ENQUEUE_RESTORE)) {
-+		sched_info_queued(rq, p);
-+		psi_enqueue(p, flags & ENQUEUE_WAKEUP);
-+	}
-+
-+	randseed = (rq->niffies >> 10) & 0xFFFFFFFF;
-+	skiplist_insert(rq->sl, &p->node, sl_id, p, randseed);
-+	rq->best_key = rq->node->next[0]->key;
-+	if (p->in_iowait)
-+		cflags |= SCHED_CPUFREQ_IOWAIT;
-+	rq->nr_running++;
-+	update_load_avg(rq, cflags);
-+}
-+
-+/*
-+ * Returns the relative length of deadline all compared to the shortest
-+ * deadline which is that of nice -20.
-+ */
-+static inline int task_prio_ratio(struct task_struct *p)
-+{
-+	return prio_ratios[TASK_USER_PRIO(p)];
-+}
-+
-+/*
-+ * task_timeslice - all tasks of all priorities get the exact same timeslice
-+ * length. CPU distribution is handled by giving different deadlines to
-+ * tasks of different priorities. Use 128 as the base value for fast shifts.
-+ */
-+static inline int task_timeslice(struct task_struct *p)
-+{
-+	return (rr_interval * task_prio_ratio(p) / 128);
-+}
-+
-+#ifdef CONFIG_SMP
-+/* Entered with rq locked */
-+static inline void resched_if_idle(struct rq *rq)
-+{
-+	if (rq_idle(rq))
-+		resched_task(rq->curr);
-+}
-+
-+static inline bool rq_local(struct rq *rq)
-+{
-+	return (rq->cpu == smp_processor_id());
-+}
-+#ifdef CONFIG_SMT_NICE
-+static const cpumask_t *thread_cpumask(int cpu);
-+
-+/* Find the best real time priority running on any SMT siblings of cpu and if
-+ * none are running, the static priority of the best deadline task running.
-+ * The lookups to the other runqueues is done lockless as the occasional wrong
-+ * value would be harmless. */
-+static int best_smt_bias(struct rq *this_rq)
-+{
-+	int other_cpu, best_bias = 0;
-+
-+	for_each_cpu(other_cpu, &this_rq->thread_mask) {
-+		struct rq *rq = cpu_rq(other_cpu);
-+
-+		if (rq_idle(rq))
-+			continue;
-+		if (unlikely(!rq->online))
-+			continue;
-+		if (!rq->rq_mm)
-+			continue;
-+		if (likely(rq->rq_smt_bias > best_bias))
-+			best_bias = rq->rq_smt_bias;
-+	}
-+	return best_bias;
-+}
-+
-+static int task_prio_bias(struct task_struct *p)
-+{
-+	if (rt_task(p))
-+		return 1 << 30;
-+	else if (task_running_iso(p))
-+		return 1 << 29;
-+	else if (task_running_idle(p))
-+		return 0;
-+	return MAX_PRIO - p->static_prio;
-+}
-+
-+static bool smt_always_schedule(struct task_struct __maybe_unused *p, struct rq __maybe_unused *this_rq)
-+{
-+	return true;
-+}
-+
-+static bool (*smt_schedule)(struct task_struct *p, struct rq *this_rq) = &smt_always_schedule;
-+
-+/* We've already decided p can run on CPU, now test if it shouldn't for SMT
-+ * nice reasons. */
-+static bool smt_should_schedule(struct task_struct *p, struct rq *this_rq)
-+{
-+	int best_bias, task_bias;
-+
-+	/* Kernel threads always run */
-+	if (unlikely(!p->mm))
-+		return true;
-+	if (rt_task(p))
-+		return true;
-+	if (!idleprio_suitable(p))
-+		return true;
-+	best_bias = best_smt_bias(this_rq);
-+	/* The smt siblings are all idle or running IDLEPRIO */
-+	if (best_bias < 1)
-+		return true;
-+	task_bias = task_prio_bias(p);
-+	if (task_bias < 1)
-+		return false;
-+	if (task_bias >= best_bias)
-+		return true;
-+	/* Dither 25% cpu of normal tasks regardless of nice difference */
-+	if (best_bias % 4 == 1)
-+		return true;
-+	/* Sorry, you lose */
-+	return false;
-+}
-+#else /* CONFIG_SMT_NICE */
-+#define smt_schedule(p, this_rq) (true)
-+#endif /* CONFIG_SMT_NICE */
-+
-+static inline void atomic_set_cpu(int cpu, cpumask_t *cpumask)
-+{
-+	set_bit(cpu, (volatile unsigned long *)cpumask);
-+}
-+
-+/*
-+ * The cpu_idle_map stores a bitmap of all the CPUs currently idle to
-+ * allow easy lookup of whether any suitable idle CPUs are available.
-+ * It's cheaper to maintain a binary yes/no if there are any idle CPUs on the
-+ * idle_cpus variable than to do a full bitmask check when we are busy. The
-+ * bits are set atomically but read locklessly as occasional false positive /
-+ * negative is harmless.
-+ */
-+static inline void set_cpuidle_map(int cpu)
-+{
-+	if (likely(cpu_online(cpu)))
-+		atomic_set_cpu(cpu, &cpu_idle_map);
-+}
-+
-+static inline void atomic_clear_cpu(int cpu, cpumask_t *cpumask)
-+{
-+	clear_bit(cpu, (volatile unsigned long *)cpumask);
-+}
-+
-+static inline void clear_cpuidle_map(int cpu)
-+{
-+	atomic_clear_cpu(cpu, &cpu_idle_map);
-+}
-+
-+static bool suitable_idle_cpus(struct task_struct *p)
-+{
-+	return (cpumask_intersects(p->cpus_ptr, &cpu_idle_map));
-+}
-+
-+/*
-+ * Resched current on rq. We don't know if rq is local to this CPU nor if it
-+ * is locked so we do not use an intermediate variable for the task to avoid
-+ * having it dereferenced.
-+ */
-+static void resched_curr(struct rq *rq)
-+{
-+	int cpu;
-+
-+	if (test_tsk_need_resched(rq->curr))
-+		return;
-+
-+	rq->preempt = rq->curr;
-+	cpu = rq->cpu;
-+
-+	/* We're doing this without holding the rq lock if it's not task_rq */
-+
-+	if (cpu == smp_processor_id()) {
-+		set_tsk_need_resched(rq->curr);
-+		set_preempt_need_resched();
-+		return;
-+	}
-+
-+	if (set_nr_and_not_polling(rq->curr))
-+		smp_sched_reschedule(cpu);
-+	else
-+		trace_sched_wake_idle_without_ipi(cpu);
-+}
-+
-+#define CPUIDLE_DIFF_THREAD     (1)
-+#define CPUIDLE_DIFF_CORE_LLC   (2)
-+#define CPUIDLE_DIFF_CORE       (4)
-+#define CPUIDLE_CACHE_BUSY      (8)
-+#define CPUIDLE_DIFF_CPU        (16)
-+#define CPUIDLE_THREAD_BUSY     (32)
-+#define CPUIDLE_DIFF_NODE       (64)
-+
-+/*
-+ * The best idle CPU is chosen according to the CPUIDLE ranking above where the
-+ * lowest value would give the most suitable CPU to schedule p onto next. The
-+ * order works out to be the following:
-+ *
-+ * Same thread, idle or busy cache, idle or busy threads
-+ * Other core, same cache, idle or busy cache, idle threads.
-+ * Same node, other CPU, idle cache, idle threads.
-+ * Same node, other CPU, busy cache, idle threads.
-+ * Other core, same cache, busy threads.
-+ * Same node, other CPU, busy threads.
-+ * Other node, other CPU, idle cache, idle threads.
-+ * Other node, other CPU, busy cache, idle threads.
-+ * Other node, other CPU, busy threads.
-+ */
-+static int best_mask_cpu(int best_cpu, struct rq *rq, cpumask_t *tmpmask)
-+{
-+	int best_ranking = CPUIDLE_DIFF_NODE | CPUIDLE_THREAD_BUSY |
-+		CPUIDLE_DIFF_CPU | CPUIDLE_CACHE_BUSY | CPUIDLE_DIFF_CORE |
-+		CPUIDLE_DIFF_CORE_LLC | CPUIDLE_DIFF_THREAD;
-+	int cpu_tmp;
-+
-+	if (cpumask_test_cpu(best_cpu, tmpmask))
-+		goto out;
-+
-+	for_each_cpu(cpu_tmp, tmpmask) {
-+		int ranking, locality;
-+		struct rq *tmp_rq;
-+
-+		ranking = 0;
-+		tmp_rq = cpu_rq(cpu_tmp);
-+
-+		locality = rq->cpu_locality[cpu_tmp];
-+#ifdef CONFIG_NUMA
-+		if (locality > LOCALITY_SMP)
-+			ranking |= CPUIDLE_DIFF_NODE;
-+		else
-+#endif
-+			if (locality > LOCALITY_MC)
-+				ranking |= CPUIDLE_DIFF_CPU;
-+#ifdef CONFIG_SCHED_MC
-+			else if (locality == LOCALITY_MC_LLC)
-+				ranking |= CPUIDLE_DIFF_CORE_LLC;
-+			else if (locality == LOCALITY_MC)
-+				ranking |= CPUIDLE_DIFF_CORE;
-+		if (!(tmp_rq->cache_idle(tmp_rq)))
-+			ranking |= CPUIDLE_CACHE_BUSY;
-+#endif
-+#ifdef CONFIG_SCHED_SMT
-+		if (locality == LOCALITY_SMT)
-+			ranking |= CPUIDLE_DIFF_THREAD;
-+#endif
-+		if (ranking < best_ranking
-+#ifdef CONFIG_SCHED_SMT
-+			|| (ranking == best_ranking && (tmp_rq->siblings_idle(tmp_rq)))
-+#endif
-+		) {
-+			best_cpu = cpu_tmp;
-+			best_ranking = ranking;
-+		}
-+	}
-+out:
-+	return best_cpu;
-+}
-+
-+bool cpus_share_cache(int this_cpu, int that_cpu)
-+{
-+	struct rq *this_rq = cpu_rq(this_cpu);
-+
-+	return (this_rq->cpu_locality[that_cpu] < LOCALITY_SMP);
-+}
-+
-+/* As per resched_curr but only will resched idle task */
-+static inline void resched_idle(struct rq *rq)
-+{
-+	if (test_tsk_need_resched(rq->idle))
-+		return;
-+
-+	rq->preempt = rq->idle;
-+
-+	set_tsk_need_resched(rq->idle);
-+
-+	if (rq_local(rq)) {
-+		set_preempt_need_resched();
-+		return;
-+	}
-+
-+	smp_sched_reschedule(rq->cpu);
-+}
-+
-+DEFINE_PER_CPU(cpumask_t, idlemask);
-+
-+static struct rq *resched_best_idle(struct task_struct *p, int cpu)
-+{
-+	cpumask_t *tmpmask = &(per_cpu(idlemask, cpu));
-+	struct rq *rq;
-+	int best_cpu;
-+
-+	cpumask_and(tmpmask, p->cpus_ptr, &cpu_idle_map);
-+	best_cpu = best_mask_cpu(cpu, task_rq(p), tmpmask);
-+	rq = cpu_rq(best_cpu);
-+	if (!smt_schedule(p, rq))
-+		return NULL;
-+	rq->preempt = p;
-+	resched_idle(rq);
-+	return rq;
-+}
-+
-+static inline void resched_suitable_idle(struct task_struct *p)
-+{
-+	if (suitable_idle_cpus(p))
-+		resched_best_idle(p, task_cpu(p));
-+}
-+
-+static inline struct rq *rq_order(struct rq *rq, int cpu)
-+{
-+	return rq->rq_order[cpu];
-+}
-+#else /* CONFIG_SMP */
-+static inline void set_cpuidle_map(int cpu)
-+{
-+}
-+
-+static inline void clear_cpuidle_map(int cpu)
-+{
-+}
-+
-+static inline bool suitable_idle_cpus(struct task_struct *p)
-+{
-+	return uprq->curr == uprq->idle;
-+}
-+
-+static inline void resched_suitable_idle(struct task_struct *p)
-+{
-+}
-+
-+static inline void resched_curr(struct rq *rq)
-+{
-+	resched_task(rq->curr);
-+}
-+
-+static inline void resched_if_idle(struct rq *rq)
-+{
-+}
-+
-+static inline bool rq_local(struct rq *rq)
-+{
-+	return true;
-+}
-+
-+static inline struct rq *rq_order(struct rq *rq, int cpu)
-+{
-+	return rq;
-+}
-+
-+static inline bool smt_schedule(struct task_struct *p, struct rq *rq)
-+{
-+	return true;
-+}
-+#endif /* CONFIG_SMP */
-+
-+static inline int normal_prio(struct task_struct *p)
-+{
-+	if (has_rt_policy(p))
-+		return MAX_RT_PRIO - 1 - p->rt_priority;
-+	if (idleprio_task(p))
-+		return IDLE_PRIO;
-+	if (iso_task(p))
-+		return ISO_PRIO;
-+	return NORMAL_PRIO;
-+}
-+
-+/*
-+ * Calculate the current priority, i.e. the priority
-+ * taken into account by the scheduler. This value might
-+ * be boosted by RT tasks as it will be RT if the task got
-+ * RT-boosted. If not then it returns p->normal_prio.
-+ */
-+static int effective_prio(struct task_struct *p)
-+{
-+	p->normal_prio = normal_prio(p);
-+	/*
-+	 * If we are RT tasks or we were boosted to RT priority,
-+	 * keep the priority unchanged. Otherwise, update priority
-+	 * to the normal priority:
-+	 */
-+	if (!rt_prio(p->prio))
-+		return p->normal_prio;
-+	return p->prio;
-+}
-+
-+/*
-+ * activate_task - move a task to the runqueue. Enter with rq locked.
-+ */
-+static void activate_task(struct rq *rq, struct task_struct *p, int flags)
-+{
-+	resched_if_idle(rq);
-+
-+	/*
-+	 * Sleep time is in units of nanosecs, so shift by 20 to get a
-+	 * milliseconds-range estimation of the amount of time that the task
-+	 * spent sleeping:
-+	 */
-+	if (unlikely(prof_on == SLEEP_PROFILING)) {
-+		if (p->state == TASK_UNINTERRUPTIBLE)
-+			profile_hits(SLEEP_PROFILING, (void *)get_wchan(p),
-+				     (rq->niffies - p->last_ran) >> 20);
-+	}
-+
-+	p->prio = effective_prio(p);
-+	if (task_contributes_to_load(p))
-+		rq->nr_uninterruptible--;
-+
-+	enqueue_task(rq, p, flags);
-+	p->on_rq = TASK_ON_RQ_QUEUED;
-+}
-+
-+/*
-+ * deactivate_task - If it's running, it's not on the runqueue and we can just
-+ * decrement the nr_running. Enter with rq locked.
-+ */
-+static inline void deactivate_task(struct task_struct *p, struct rq *rq)
-+{
-+	if (task_contributes_to_load(p))
-+		rq->nr_uninterruptible++;
-+
-+	p->on_rq = 0;
-+	sched_info_dequeued(rq, p);
-+	/* deactivate_task is always DEQUEUE_SLEEP in muqss */
-+	psi_dequeue(p, DEQUEUE_SLEEP);
-+}
-+
-+#ifdef CONFIG_SMP
-+void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
-+{
-+	struct rq *rq;
-+
-+	if (task_cpu(p) == new_cpu)
-+		return;
-+
-+	/* Do NOT call set_task_cpu on a currently queued task as we will not
-+	 * be reliably holding the rq lock after changing CPU. */
-+	BUG_ON(task_queued(p));
-+	rq = task_rq(p);
-+
-+#ifdef CONFIG_LOCKDEP
-+	/*
-+	 * The caller should hold either p->pi_lock or rq->lock, when changing
-+	 * a task's CPU. ->pi_lock for waking tasks, rq->lock for runnable tasks.
-+	 *
-+	 * Furthermore, all task_rq users should acquire both locks, see
-+	 * task_rq_lock().
-+	 */
-+	WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) ||
-+				      lockdep_is_held(rq->lock)));
-+#endif
-+
-+	trace_sched_migrate_task(p, new_cpu);
-+	rseq_migrate(p);
-+	perf_event_task_migrate(p);
-+
-+	/*
-+	 * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
-+	 * successfully executed on another CPU. We must ensure that updates of
-+	 * per-task data have been completed by this moment.
-+	 */
-+	smp_wmb();
-+
-+	p->wake_cpu = new_cpu;
-+
-+	if (task_running(rq, p)) {
-+		/*
-+		 * We should only be calling this on a running task if we're
-+		 * holding rq lock.
-+		 */
-+		lockdep_assert_held(rq->lock);
-+
-+		/*
-+		 * We can't change the task_thread_info CPU on a running task
-+		 * as p will still be protected by the rq lock of the CPU it
-+		 * is still running on so we only set the wake_cpu for it to be
-+		 * lazily updated once off the CPU.
-+		 */
-+		return;
-+	}
-+
-+#ifdef CONFIG_THREAD_INFO_IN_TASK
-+	WRITE_ONCE(p->cpu, new_cpu);
-+#else
-+	WRITE_ONCE(task_thread_info(p)->cpu, new_cpu);
-+#endif
-+	/* We're no longer protecting p after this point since we're holding
-+	 * the wrong runqueue lock. */
-+}
-+#endif /* CONFIG_SMP */
-+
-+/*
-+ * Move a task off the runqueue and take it to a cpu for it will
-+ * become the running task.
-+ */
-+static inline void take_task(struct rq *rq, int cpu, struct task_struct *p)
-+{
-+	struct rq *p_rq = task_rq(p);
-+
-+	dequeue_task(p_rq, p, DEQUEUE_SAVE);
-+	if (p_rq != rq) {
-+		sched_info_dequeued(p_rq, p);
-+		sched_info_queued(rq, p);
-+	}
-+	set_task_cpu(p, cpu);
-+}
-+
-+/*
-+ * Returns a descheduling task to the runqueue unless it is being
-+ * deactivated.
-+ */
-+static inline void return_task(struct task_struct *p, struct rq *rq,
-+			       int cpu, bool deactivate)
-+{
-+	if (deactivate)
-+		deactivate_task(p, rq);
-+	else {
-+#ifdef CONFIG_SMP
-+		/*
-+		 * set_task_cpu was called on the running task that doesn't
-+		 * want to deactivate so it has to be enqueued to a different
-+		 * CPU and we need its lock. Tag it to be moved with as the
-+		 * lock is dropped in finish_lock_switch.
-+		 */
-+		if (unlikely(p->wake_cpu != cpu))
-+			WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING);
-+		else
-+#endif
-+			enqueue_task(rq, p, ENQUEUE_RESTORE);
-+	}
-+}
-+
-+/* Enter with rq lock held. We know p is on the local cpu */
-+static inline void __set_tsk_resched(struct task_struct *p)
-+{
-+	set_tsk_need_resched(p);
-+	set_preempt_need_resched();
-+}
-+
-+/**
-+ * task_curr - is this task currently executing on a CPU?
-+ * @p: the task in question.
-+ *
-+ * Return: 1 if the task is currently executing. 0 otherwise.
-+ */
-+inline int task_curr(const struct task_struct *p)
-+{
-+	return cpu_curr(task_cpu(p)) == p;
-+}
-+
-+#ifdef CONFIG_SMP
-+/*
-+ * wait_task_inactive - wait for a thread to unschedule.
-+ *
-+ * If @match_state is nonzero, it's the @p->state value just checked and
-+ * not expected to change.  If it changes, i.e. @p might have woken up,
-+ * then return zero.  When we succeed in waiting for @p to be off its CPU,
-+ * we return a positive number (its total switch count).  If a second call
-+ * a short while later returns the same number, the caller can be sure that
-+ * @p has remained unscheduled the whole time.
-+ *
-+ * The caller must ensure that the task *will* unschedule sometime soon,
-+ * else this function might spin for a *long* time. This function can't
-+ * be called with interrupts off, or it may introduce deadlock with
-+ * smp_call_function() if an IPI is sent by the same process we are
-+ * waiting to become inactive.
-+ */
-+unsigned long wait_task_inactive(struct task_struct *p, long match_state)
-+{
-+	int running, queued;
-+	struct rq_flags rf;
-+	unsigned long ncsw;
-+	struct rq *rq;
-+
-+	for (;;) {
-+		rq = task_rq(p);
-+
-+		/*
-+		 * If the task is actively running on another CPU
-+		 * still, just relax and busy-wait without holding
-+		 * any locks.
-+		 *
-+		 * NOTE! Since we don't hold any locks, it's not
-+		 * even sure that "rq" stays as the right runqueue!
-+		 * But we don't care, since this will return false
-+		 * if the runqueue has changed and p is actually now
-+		 * running somewhere else!
-+		 */
-+		while (task_running(rq, p)) {
-+			if (match_state && unlikely(p->state != match_state))
-+				return 0;
-+			cpu_relax();
-+		}
-+
-+		/*
-+		 * Ok, time to look more closely! We need the rq
-+		 * lock now, to be *sure*. If we're wrong, we'll
-+		 * just go back and repeat.
-+		 */
-+		rq = task_rq_lock(p, &rf);
-+		trace_sched_wait_task(p);
-+		running = task_running(rq, p);
-+		queued = task_on_rq_queued(p);
-+		ncsw = 0;
-+		if (!match_state || p->state == match_state)
-+			ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
-+		task_rq_unlock(rq, p, &rf);
-+
-+		/*
-+		 * If it changed from the expected state, bail out now.
-+		 */
-+		if (unlikely(!ncsw))
-+			break;
-+
-+		/*
-+		 * Was it really running after all now that we
-+		 * checked with the proper locks actually held?
-+		 *
-+		 * Oops. Go back and try again..
-+		 */
-+		if (unlikely(running)) {
-+			cpu_relax();
-+			continue;
-+		}
-+
-+		/*
-+		 * It's not enough that it's not actively running,
-+		 * it must be off the runqueue _entirely_, and not
-+		 * preempted!
-+		 *
-+		 * So if it was still runnable (but just not actively
-+		 * running right now), it's preempted, and we should
-+		 * yield - it could be a while.
-+		 */
-+		if (unlikely(queued)) {
-+			ktime_t to = NSEC_PER_SEC / HZ;
-+
-+			set_current_state(TASK_UNINTERRUPTIBLE);
-+			schedule_hrtimeout(&to, HRTIMER_MODE_REL);
-+			continue;
-+		}
-+
-+		/*
-+		 * Ahh, all good. It wasn't running, and it wasn't
-+		 * runnable, which means that it will never become
-+		 * running in the future either. We're all done!
-+		 */
-+		break;
-+	}
-+
-+	return ncsw;
-+}
-+
-+/***
-+ * kick_process - kick a running thread to enter/exit the kernel
-+ * @p: the to-be-kicked thread
-+ *
-+ * Cause a process which is running on another CPU to enter
-+ * kernel-mode, without any delay. (to get signals handled.)
-+ *
-+ * NOTE: this function doesn't have to take the runqueue lock,
-+ * because all it wants to ensure is that the remote task enters
-+ * the kernel. If the IPI races and the task has been migrated
-+ * to another CPU then no harm is done and the purpose has been
-+ * achieved as well.
-+ */
-+void kick_process(struct task_struct *p)
-+{
-+	int cpu;
-+
-+	preempt_disable();
-+	cpu = task_cpu(p);
-+	if ((cpu != smp_processor_id()) && task_curr(p))
-+		smp_sched_reschedule(cpu);
-+	preempt_enable();
-+}
-+EXPORT_SYMBOL_GPL(kick_process);
-+#endif
-+
-+/*
-+ * RT tasks preempt purely on priority. SCHED_NORMAL tasks preempt on the
-+ * basis of earlier deadlines. SCHED_IDLEPRIO don't preempt anything else or
-+ * between themselves, they cooperatively multitask. An idle rq scores as
-+ * prio PRIO_LIMIT so it is always preempted.
-+ */
-+static inline bool
-+can_preempt(struct task_struct *p, int prio, u64 deadline)
-+{
-+	/* Better static priority RT task or better policy preemption */
-+	if (p->prio < prio)
-+		return true;
-+	if (p->prio > prio)
-+		return false;
-+	if (p->policy == SCHED_BATCH)
-+		return false;
-+	/* SCHED_NORMAL and ISO will preempt based on deadline */
-+	if (!deadline_before(p->deadline, deadline))
-+		return false;
-+	return true;
-+}
-+
-+#ifdef CONFIG_SMP
-+
-+/*
-+ * Per-CPU kthreads are allowed to run on !active && online CPUs, see
-+ * __set_cpus_allowed_ptr().
-+ */
-+static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
-+{
-+	if (!cpumask_test_cpu(cpu, p->cpus_ptr))
-+		return false;
-+
-+	if (is_per_cpu_kthread(p))
-+		return cpu_online(cpu);
-+
-+	return cpu_active(cpu);
-+}
-+
-+/*
-+ * Check to see if p can run on cpu, and if not, whether there are any online
-+ * CPUs it can run on instead. This only happens with the hotplug threads that
-+ * bring up the CPUs.
-+ */
-+static inline bool sched_other_cpu(struct task_struct *p, int cpu)
-+{
-+	if (likely(cpumask_test_cpu(cpu, p->cpus_ptr)))
-+		return false;
-+	if (p->nr_cpus_allowed == 1) {
-+		cpumask_t valid_mask;
-+
-+		cpumask_and(&valid_mask, p->cpus_ptr, cpu_online_mask);
-+		if (unlikely(cpumask_empty(&valid_mask)))
-+			return false;
-+	}
-+	return true;
-+}
-+
-+static inline bool needs_other_cpu(struct task_struct *p, int cpu)
-+{
-+	if (cpumask_test_cpu(cpu, p->cpus_ptr))
-+		return false;
-+	return true;
-+}
-+
-+#define cpu_online_map		(*(cpumask_t *)cpu_online_mask)
-+
-+static void try_preempt(struct task_struct *p, struct rq *this_rq)
-+{
-+	int i, this_entries = rq_load(this_rq);
-+	cpumask_t tmp;
-+
-+	if (suitable_idle_cpus(p) && resched_best_idle(p, task_cpu(p)))
-+		return;
-+
-+	/* IDLEPRIO tasks never preempt anything but idle */
-+	if (p->policy == SCHED_IDLEPRIO)
-+		return;
-+
-+	cpumask_and(&tmp, &cpu_online_map, p->cpus_ptr);
-+
-+	for (i = 0; i < num_online_cpus(); i++) {
-+		struct rq *rq = this_rq->cpu_order[i];
-+
-+		if (!cpumask_test_cpu(rq->cpu, &tmp))
-+			continue;
-+
-+		if (!sched_interactive && rq != this_rq && rq_load(rq) <= this_entries)
-+			continue;
-+		if (smt_schedule(p, rq) && can_preempt(p, rq->rq_prio, rq->rq_deadline)) {
-+			/* We set rq->preempting lockless, it's a hint only */
-+			rq->preempting = p;
-+			resched_curr(rq);
-+			return;
-+		}
-+	}
-+}
-+
-+static int __set_cpus_allowed_ptr(struct task_struct *p,
-+				  const struct cpumask *new_mask, bool check);
-+#else /* CONFIG_SMP */
-+static inline bool needs_other_cpu(struct task_struct *p, int cpu)
-+{
-+	return false;
-+}
-+
-+static void try_preempt(struct task_struct *p, struct rq *this_rq)
-+{
-+	if (p->policy == SCHED_IDLEPRIO)
-+		return;
-+	if (can_preempt(p, uprq->rq_prio, uprq->rq_deadline))
-+		resched_curr(uprq);
-+}
-+
-+static inline int __set_cpus_allowed_ptr(struct task_struct *p,
-+					 const struct cpumask *new_mask, bool check)
-+{
-+	return set_cpus_allowed_ptr(p, new_mask);
-+}
-+#endif /* CONFIG_SMP */
-+
-+static void
-+ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
-+{
-+	struct rq *rq;
-+
-+	if (!schedstat_enabled())
-+		return;
-+
-+	rq = this_rq();
-+
-+#ifdef CONFIG_SMP
-+	if (cpu == rq->cpu) {
-+		__schedstat_inc(rq->ttwu_local);
-+	} else {
-+		struct sched_domain *sd;
-+
-+		rcu_read_lock();
-+		for_each_domain(rq->cpu, sd) {
-+			if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
-+				__schedstat_inc(sd->ttwu_wake_remote);
-+				break;
-+			}
-+		}
-+		rcu_read_unlock();
-+	}
-+
-+#endif /* CONFIG_SMP */
-+
-+	__schedstat_inc(rq->ttwu_count);
-+}
-+
-+/*
-+ * Mark the task runnable and perform wakeup-preemption.
-+ */
-+static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
-+{
-+	/*
-+	 * Sync wakeups (i.e. those types of wakeups where the waker
-+	 * has indicated that it will leave the CPU in short order)
-+	 * don't trigger a preemption if there are no idle cpus,
-+	 * instead waiting for current to deschedule.
-+	 */
-+	if (wake_flags & WF_SYNC)
-+		resched_suitable_idle(p);
-+	else
-+		try_preempt(p, rq);
-+	p->state = TASK_RUNNING;
-+	trace_sched_wakeup(p);
-+}
-+
-+static void
-+ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags)
-+{
-+	int en_flags = ENQUEUE_WAKEUP;
-+
-+	lockdep_assert_held(rq->lock);
-+
-+#ifdef CONFIG_SMP
-+	if (p->sched_contributes_to_load)
-+		rq->nr_uninterruptible--;
-+
-+	if (wake_flags & WF_MIGRATED)
-+		en_flags |= ENQUEUE_MIGRATED;
-+#endif
-+
-+	activate_task(rq, p, en_flags);
-+	ttwu_do_wakeup(rq, p, wake_flags);
-+}
-+
-+/*
-+ * Called in case the task @p isn't fully descheduled from its runqueue,
-+ * in this case we must do a remote wakeup. Its a 'light' wakeup though,
-+ * since all we need to do is flip p->state to TASK_RUNNING, since
-+ * the task is still ->on_rq.
-+ */
-+static int ttwu_remote(struct task_struct *p, int wake_flags)
-+{
-+	struct rq *rq;
-+	int ret = 0;
-+
-+	rq = __task_rq_lock(p, NULL);
-+	if (likely(task_on_rq_queued(p))) {
-+		ttwu_do_wakeup(rq, p, wake_flags);
-+		ret = 1;
-+	}
-+	__task_rq_unlock(rq, NULL);
-+
-+	return ret;
-+}
-+
-+#ifdef CONFIG_SMP
-+void sched_ttwu_pending(void)
-+{
-+	struct rq *rq = this_rq();
-+	struct llist_node *llist = llist_del_all(&rq->wake_list);
-+	struct task_struct *p, *t;
-+	struct rq_flags rf;
-+
-+	if (!llist)
-+		return;
-+
-+	rq_lock_irqsave(rq, &rf);
-+
-+	llist_for_each_entry_safe(p, t, llist, wake_entry)
-+		ttwu_do_activate(rq, p, 0);
-+
-+	rq_unlock_irqrestore(rq, &rf);
-+}
-+
-+void scheduler_ipi(void)
-+{
-+	/*
-+	 * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting
-+	 * TIF_NEED_RESCHED remotely (for the first time) will also send
-+	 * this IPI.
-+	 */
-+	preempt_fold_need_resched();
-+
-+	if (llist_empty(&this_rq()->wake_list) && (!idle_cpu(smp_processor_id()) || need_resched()))
-+		return;
-+
-+	/*
-+	 * Not all reschedule IPI handlers call irq_enter/irq_exit, since
-+	 * traditionally all their work was done from the interrupt return
-+	 * path. Now that we actually do some work, we need to make sure
-+	 * we do call them.
-+	 *
-+	 * Some archs already do call them, luckily irq_enter/exit nest
-+	 * properly.
-+	 *
-+	 * Arguably we should visit all archs and update all handlers,
-+	 * however a fair share of IPIs are still resched only so this would
-+	 * somewhat pessimize the simple resched case.
-+	 */
-+	irq_enter();
-+	sched_ttwu_pending();
-+	irq_exit();
-+}
-+
-+static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	if (llist_add(&p->wake_entry, &cpu_rq(cpu)->wake_list)) {
-+		if (!set_nr_if_polling(rq->idle))
-+			smp_sched_reschedule(cpu);
-+		else
-+			trace_sched_wake_idle_without_ipi(cpu);
-+	}
-+}
-+
-+void wake_up_if_idle(int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	struct rq_flags rf;
-+
-+	rcu_read_lock();
-+
-+	if (!is_idle_task(rcu_dereference(rq->curr)))
-+		goto out;
-+
-+	if (set_nr_if_polling(rq->idle)) {
-+		trace_sched_wake_idle_without_ipi(cpu);
-+	} else {
-+		rq_lock_irqsave(rq, &rf);
-+		if (likely(is_idle_task(rq->curr)))
-+			smp_sched_reschedule(cpu);
-+		/* Else cpu is not in idle, do nothing here */
-+		rq_unlock_irqrestore(rq, &rf);
-+	}
-+
-+out:
-+	rcu_read_unlock();
-+}
-+
-+static int valid_task_cpu(struct task_struct *p)
-+{
-+	cpumask_t valid_mask;
-+
-+	if (p->flags & PF_KTHREAD)
-+		cpumask_and(&valid_mask, p->cpus_ptr, cpu_all_mask);
-+	else
-+		cpumask_and(&valid_mask, p->cpus_ptr, cpu_active_mask);
-+
-+	if (unlikely(!cpumask_weight(&valid_mask))) {
-+		/* We shouldn't be hitting this any more */
-+		printk(KERN_WARNING "SCHED: No cpumask for %s/%d weight %d\n", p->comm,
-+		       p->pid, cpumask_weight(p->cpus_ptr));
-+		return cpumask_any(p->cpus_ptr);
-+	}
-+	return cpumask_any(&valid_mask);
-+}
-+
-+/*
-+ * For a task that's just being woken up we have a valuable balancing
-+ * opportunity so choose the nearest cache most lightly loaded runqueue.
-+ * Entered with rq locked and returns with the chosen runqueue locked.
-+ */
-+static inline int select_best_cpu(struct task_struct *p)
-+{
-+	unsigned int idlest = ~0U;
-+	struct rq *rq = NULL;
-+	int i;
-+
-+	if (suitable_idle_cpus(p)) {
-+		int cpu = task_cpu(p);
-+
-+		if (unlikely(needs_other_cpu(p, cpu)))
-+			cpu = valid_task_cpu(p);
-+		rq = resched_best_idle(p, cpu);
-+		if (likely(rq))
-+			return rq->cpu;
-+	}
-+
-+	for (i = 0; i < num_online_cpus(); i++) {
-+		struct rq *other_rq = task_rq(p)->cpu_order[i];
-+		int entries;
-+
-+		if (!other_rq->online)
-+			continue;
-+		if (needs_other_cpu(p, other_rq->cpu))
-+			continue;
-+		entries = rq_load(other_rq);
-+		if (entries >= idlest)
-+			continue;
-+		idlest = entries;
-+		rq = other_rq;
-+	}
-+	if (unlikely(!rq))
-+		return task_cpu(p);
-+	return rq->cpu;
-+}
-+#else /* CONFIG_SMP */
-+static int valid_task_cpu(struct task_struct *p)
-+{
-+	return 0;
-+}
-+
-+static inline int select_best_cpu(struct task_struct *p)
-+{
-+	return 0;
-+}
-+
-+static struct rq *resched_best_idle(struct task_struct *p, int cpu)
-+{
-+	return NULL;
-+}
-+#endif /* CONFIG_SMP */
-+
-+static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+#if defined(CONFIG_SMP)
-+	if (!cpus_share_cache(smp_processor_id(), cpu)) {
-+		sched_clock_cpu(cpu); /* Sync clocks across CPUs */
-+		ttwu_queue_remote(p, cpu, wake_flags);
-+		return;
-+	}
-+#endif
-+	rq_lock(rq);
-+	ttwu_do_activate(rq, p, wake_flags);
-+	rq_unlock(rq);
-+}
-+
-+/***
-+ * try_to_wake_up - wake up a thread
-+ * @p: the thread to be awakened
-+ * @state: the mask of task states that can be woken
-+ * @wake_flags: wake modifier flags (WF_*)
-+ *
-+ * Put it on the run-queue if it's not already there. The "current"
-+ * thread is always on the run-queue (except when the actual
-+ * re-schedule is in progress), and as such you're allowed to do
-+ * the simpler "current->state = TASK_RUNNING" to mark yourself
-+ * runnable without the overhead of this.
-+ *
-+ * Return: %true if @p was woken up, %false if it was already running.
-+ * or @state didn't match @p's state.
-+ */
-+static int
-+try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
-+{
-+	unsigned long flags;
-+	int cpu, success = 0;
-+
-+	preempt_disable();
-+	if (p == current) {
-+		/*
-+		 * We're waking current, this means 'p->on_rq' and 'task_cpu(p)
-+		 * == smp_processor_id()'. Together this means we can special
-+		 * case the whole 'p->on_rq && ttwu_remote()' case below
-+		 * without taking any locks.
-+		 *
-+		 * In particular:
-+		 *  - we rely on Program-Order guarantees for all the ordering,
-+		 *  - we're serialized against set_special_state() by virtue of
-+		 *    it disabling IRQs (this allows not taking ->pi_lock).
-+		 */
-+		if (!(p->state & state))
-+			goto out;
-+
-+		success = 1;
-+		cpu = task_cpu(p);
-+		trace_sched_waking(p);
-+		p->state = TASK_RUNNING;
-+		trace_sched_wakeup(p);
-+		goto out;
-+	}
-+
-+	/*
-+	 * If we are going to wake up a thread waiting for CONDITION we
-+	 * need to ensure that CONDITION=1 done by the caller can not be
-+	 * reordered with p->state check below. This pairs with mb() in
-+	 * set_current_state() the waiting thread does.
-+	 */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	smp_mb__after_spinlock();
-+	if (!(p->state & state))
-+		goto unlock;
-+
-+	trace_sched_waking(p);
-+
-+	/* We're going to change ->state: */
-+	success = 1;
-+	cpu = task_cpu(p);
-+
-+	/*
-+	 * Ensure we load p->on_rq _after_ p->state, otherwise it would
-+	 * be possible to, falsely, observe p->on_rq == 0 and get stuck
-+	 * in smp_cond_load_acquire() below.
-+	 *
-+	 * sched_ttwu_pending()			try_to_wake_up()
-+	 *   STORE p->on_rq = 1			  LOAD p->state
-+	 *   UNLOCK rq->lock
-+	 *
-+	 * __schedule() (switch to task 'p')
-+	 *   LOCK rq->lock			  smp_rmb();
-+	 *   smp_mb__after_spinlock();
-+	 *   UNLOCK rq->lock
-+	 *
-+	 * [task p]
-+	 *   STORE p->state = UNINTERRUPTIBLE	  LOAD p->on_rq
-+	 *
-+	 * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
-+	 * __schedule().  See the comment for smp_mb__after_spinlock().
-+	 */
-+	smp_rmb();
-+	if (p->on_rq && ttwu_remote(p, wake_flags))
-+		goto unlock;
-+
-+#ifdef CONFIG_SMP
-+	/*
-+	 * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
-+	 * possible to, falsely, observe p->on_cpu == 0.
-+	 *
-+	 * One must be running (->on_cpu == 1) in order to remove oneself
-+	 * from the runqueue.
-+	 *
-+	 * __schedule() (switch to task 'p')	try_to_wake_up()
-+	 *   STORE p->on_cpu = 1		  LOAD p->on_rq
-+	 *   UNLOCK rq->lock
-+	 *
-+	 * __schedule() (put 'p' to sleep)
-+	 *   LOCK rq->lock			  smp_rmb();
-+	 *   smp_mb__after_spinlock();
-+	 *   STORE p->on_rq = 0			  LOAD p->on_cpu
-+	 *
-+	 * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
-+	 * __schedule().  See the comment for smp_mb__after_spinlock().
-+	 */
-+	smp_rmb();
-+
-+	/*
-+	 * If the owning (remote) CPU is still in the middle of schedule() with
-+	 * this task as prev, wait until its done referencing the task.
-+	 *
-+	 * Pairs with the smp_store_release() in finish_task().
-+	 *
-+	 * This ensures that tasks getting woken will be fully ordered against
-+	 * their previous state and preserve Program Order.
-+	 */
-+	smp_cond_load_acquire(&p->on_cpu, !VAL);
-+
-+	p->sched_contributes_to_load = !!task_contributes_to_load(p);
-+	p->state = TASK_WAKING;
-+
-+	if (p->in_iowait) {
-+		delayacct_blkio_end(p);
-+		atomic_dec(&task_rq(p)->nr_iowait);
-+	}
-+
-+	cpu = select_best_cpu(p);
-+	if (task_cpu(p) != cpu) {
-+		wake_flags |= WF_MIGRATED;
-+		psi_ttwu_dequeue(p);
-+		set_task_cpu(p, cpu);
-+	}
-+
-+#else /* CONFIG_SMP */
-+
-+	if (p->in_iowait) {
-+		delayacct_blkio_end(p);
-+		atomic_dec(&task_rq(p)->nr_iowait);
-+	}
-+
-+#endif /* CONFIG_SMP */
-+
-+	ttwu_queue(p, cpu, wake_flags);
-+unlock:
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+out:
-+	if (success)
-+		ttwu_stat(p, cpu, wake_flags);
-+	preempt_enable();
-+
-+	return success;
-+}
-+
-+/**
-+ * wake_up_process - Wake up a specific process
-+ * @p: The process to be woken up.
-+ *
-+ * Attempt to wake up the nominated process and move it to the set of runnable
-+ * processes.
-+ *
-+ * Return: 1 if the process was woken up, 0 if it was already running.
-+ *
-+ * This function executes a full memory barrier before accessing the task state.
-+ */
-+int wake_up_process(struct task_struct *p)
-+{
-+	return try_to_wake_up(p, TASK_NORMAL, 0);
-+}
-+EXPORT_SYMBOL(wake_up_process);
-+
-+int wake_up_state(struct task_struct *p, unsigned int state)
-+{
-+	return try_to_wake_up(p, state, 0);
-+}
-+
-+static void time_slice_expired(struct task_struct *p, struct rq *rq);
-+
-+/*
-+ * Perform scheduler related setup for a newly forked process p.
-+ * p is forked by current.
-+ */
-+int sched_fork(unsigned long __maybe_unused clone_flags, struct task_struct *p)
-+{
-+	unsigned long flags;
-+
-+#ifdef CONFIG_PREEMPT_NOTIFIERS
-+	INIT_HLIST_HEAD(&p->preempt_notifiers);
-+#endif
-+
-+#ifdef CONFIG_COMPACTION
-+	p->capture_control = NULL;
-+#endif
-+
-+	/*
-+	 * We mark the process as NEW here. This guarantees that
-+	 * nobody will actually run it, and a signal or other external
-+	 * event cannot wake it up and insert it on the runqueue either.
-+	 */
-+	p->state = TASK_NEW;
-+
-+	/*
-+	 * The process state is set to the same value of the process executing
-+	 * do_fork() code. That is running. This guarantees that nobody will
-+	 * actually run it, and a signal or other external event cannot wake
-+	 * it up and insert it on the runqueue either.
-+	 */
-+
-+	/* Should be reset in fork.c but done here for ease of MuQSS patching */
-+	p->on_cpu =
-+	p->on_rq =
-+	p->utime =
-+	p->stime =
-+	p->sched_time =
-+	p->stime_ns =
-+	p->utime_ns = 0;
-+	skiplist_node_init(&p->node);
-+
-+	/*
-+	 * Revert to default priority/policy on fork if requested.
-+	 */
-+	if (unlikely(p->sched_reset_on_fork)) {
-+		if (p->policy == SCHED_FIFO || p->policy == SCHED_RR || p-> policy == SCHED_ISO) {
-+			p->policy = SCHED_NORMAL;
-+			p->normal_prio = normal_prio(p);
-+		}
-+
-+		if (PRIO_TO_NICE(p->static_prio) < 0) {
-+			p->static_prio = NICE_TO_PRIO(0);
-+			p->normal_prio = p->static_prio;
-+		}
-+
-+		/*
-+		 * We don't need the reset flag anymore after the fork. It has
-+		 * fulfilled its duty:
-+		 */
-+		p->sched_reset_on_fork = 0;
-+	}
-+
-+	/*
-+	 * Silence PROVE_RCU.
-+	 */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	set_task_cpu(p, smp_processor_id());
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+
-+#ifdef CONFIG_SCHED_INFO
-+	if (unlikely(sched_info_on()))
-+		memset(&p->sched_info, 0, sizeof(p->sched_info));
-+#endif
-+	init_task_preempt_count(p);
-+
-+	return 0;
-+}
-+
-+#ifdef CONFIG_SCHEDSTATS
-+
-+DEFINE_STATIC_KEY_FALSE(sched_schedstats);
-+static bool __initdata __sched_schedstats = false;
-+
-+static void set_schedstats(bool enabled)
-+{
-+	if (enabled)
-+		static_branch_enable(&sched_schedstats);
-+	else
-+		static_branch_disable(&sched_schedstats);
-+}
-+
-+void force_schedstat_enabled(void)
-+{
-+	if (!schedstat_enabled()) {
-+		pr_info("kernel profiling enabled schedstats, disable via kernel.sched_schedstats.\n");
-+		static_branch_enable(&sched_schedstats);
-+	}
-+}
-+
-+static int __init setup_schedstats(char *str)
-+{
-+	int ret = 0;
-+	if (!str)
-+		goto out;
-+
-+	/*
-+	 * This code is called before jump labels have been set up, so we can't
-+	 * change the static branch directly just yet.  Instead set a temporary
-+	 * variable so init_schedstats() can do it later.
-+	 */
-+	if (!strcmp(str, "enable")) {
-+		__sched_schedstats = true;
-+		ret = 1;
-+	} else if (!strcmp(str, "disable")) {
-+		__sched_schedstats = false;
-+		ret = 1;
-+	}
-+out:
-+	if (!ret)
-+		pr_warn("Unable to parse schedstats=\n");
-+
-+	return ret;
-+}
-+__setup("schedstats=", setup_schedstats);
-+
-+static void __init init_schedstats(void)
-+{
-+	set_schedstats(__sched_schedstats);
-+}
-+
-+#ifdef CONFIG_PROC_SYSCTL
-+int sysctl_schedstats(struct ctl_table *table, int write,
-+			 void __user *buffer, size_t *lenp, loff_t *ppos)
-+{
-+	struct ctl_table t;
-+	int err;
-+	int state = static_branch_likely(&sched_schedstats);
-+
-+	if (write && !capable(CAP_SYS_ADMIN))
-+		return -EPERM;
-+
-+	t = *table;
-+	t.data = &state;
-+	err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
-+	if (err < 0)
-+		return err;
-+	if (write)
-+		set_schedstats(state);
-+	return err;
-+}
-+#endif /* CONFIG_PROC_SYSCTL */
-+#else  /* !CONFIG_SCHEDSTATS */
-+static inline void init_schedstats(void) {}
-+#endif /* CONFIG_SCHEDSTATS */
-+
-+static void update_cpu_clock_switch(struct rq *rq, struct task_struct *p);
-+
-+static void account_task_cpu(struct rq *rq, struct task_struct *p)
-+{
-+	update_clocks(rq);
-+	/* This isn't really a context switch but accounting is the same */
-+	update_cpu_clock_switch(rq, p);
-+	p->last_ran = rq->niffies;
-+}
-+
-+bool sched_smp_initialized __read_mostly;
-+
-+static inline int hrexpiry_enabled(struct rq *rq)
-+{
-+	if (unlikely(!cpu_active(cpu_of(rq)) || !sched_smp_initialized))
-+		return 0;
-+	return hrtimer_is_hres_active(&rq->hrexpiry_timer);
-+}
-+
-+/*
-+ * Use HR-timers to deliver accurate preemption points.
-+ */
-+static inline void hrexpiry_clear(struct rq *rq)
-+{
-+	if (!hrexpiry_enabled(rq))
-+		return;
-+	if (hrtimer_active(&rq->hrexpiry_timer))
-+		hrtimer_cancel(&rq->hrexpiry_timer);
-+}
-+
-+/*
-+ * High-resolution time_slice expiry.
-+ * Runs from hardirq context with interrupts disabled.
-+ */
-+static enum hrtimer_restart hrexpiry(struct hrtimer *timer)
-+{
-+	struct rq *rq = container_of(timer, struct rq, hrexpiry_timer);
-+	struct task_struct *p;
-+
-+	/* This can happen during CPU hotplug / resume */
-+	if (unlikely(cpu_of(rq) != smp_processor_id()))
-+		goto out;
-+
-+	/*
-+	 * We're doing this without the runqueue lock but this should always
-+	 * be run on the local CPU. Time slice should run out in __schedule
-+	 * but we set it to zero here in case niffies is slightly less.
-+	 */
-+	p = rq->curr;
-+	p->time_slice = 0;
-+	__set_tsk_resched(p);
-+out:
-+	return HRTIMER_NORESTART;
-+}
-+
-+/*
-+ * Called to set the hrexpiry timer state.
-+ *
-+ * called with irqs disabled from the local CPU only
-+ */
-+static void hrexpiry_start(struct rq *rq, u64 delay)
-+{
-+	if (!hrexpiry_enabled(rq))
-+		return;
-+
-+	hrtimer_start(&rq->hrexpiry_timer, ns_to_ktime(delay),
-+		      HRTIMER_MODE_REL_PINNED);
-+}
-+
-+static void init_rq_hrexpiry(struct rq *rq)
-+{
-+	hrtimer_init(&rq->hrexpiry_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-+	rq->hrexpiry_timer.function = hrexpiry;
-+}
-+
-+static inline int rq_dither(struct rq *rq)
-+{
-+	if (!hrexpiry_enabled(rq))
-+		return HALF_JIFFY_US;
-+	return 0;
-+}
-+
-+/*
-+ * wake_up_new_task - wake up a newly created task for the first time.
-+ *
-+ * This function will do some initial scheduler statistics housekeeping
-+ * that must be done for every newly created context, then puts the task
-+ * on the runqueue and wakes it.
-+ */
-+void wake_up_new_task(struct task_struct *p)
-+{
-+	struct task_struct *parent, *rq_curr;
-+	struct rq *rq, *new_rq;
-+	unsigned long flags;
-+
-+	parent = p->parent;
-+
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	p->state = TASK_RUNNING;
-+	/* Task_rq can't change yet on a new task */
-+	new_rq = rq = task_rq(p);
-+	if (unlikely(needs_other_cpu(p, task_cpu(p)))) {
-+		set_task_cpu(p, valid_task_cpu(p));
-+		new_rq = task_rq(p);
-+	}
-+
-+	double_rq_lock(rq, new_rq);
-+	rq_curr = rq->curr;
-+
-+	/*
-+	 * Make sure we do not leak PI boosting priority to the child.
-+	 */
-+	p->prio = rq_curr->normal_prio;
-+
-+	trace_sched_wakeup_new(p);
-+
-+	/*
-+	 * Share the timeslice between parent and child, thus the
-+	 * total amount of pending timeslices in the system doesn't change,
-+	 * resulting in more scheduling fairness. If it's negative, it won't
-+	 * matter since that's the same as being 0. rq->rq_deadline is only
-+	 * modified within schedule() so it is always equal to
-+	 * current->deadline.
-+	 */
-+	account_task_cpu(rq, rq_curr);
-+	p->last_ran = rq_curr->last_ran;
-+	if (likely(rq_curr->policy != SCHED_FIFO)) {
-+		rq_curr->time_slice /= 2;
-+		if (rq_curr->time_slice < RESCHED_US) {
-+			/*
-+			 * Forking task has run out of timeslice. Reschedule it and
-+			 * start its child with a new time slice and deadline. The
-+			 * child will end up running first because its deadline will
-+			 * be slightly earlier.
-+			 */
-+			__set_tsk_resched(rq_curr);
-+			time_slice_expired(p, new_rq);
-+			if (suitable_idle_cpus(p))
-+				resched_best_idle(p, task_cpu(p));
-+			else if (unlikely(rq != new_rq))
-+				try_preempt(p, new_rq);
-+		} else {
-+			p->time_slice = rq_curr->time_slice;
-+			if (rq_curr == parent && rq == new_rq && !suitable_idle_cpus(p)) {
-+				/*
-+				 * The VM isn't cloned, so we're in a good position to
-+				 * do child-runs-first in anticipation of an exec. This
-+				 * usually avoids a lot of COW overhead.
-+				 */
-+				__set_tsk_resched(rq_curr);
-+			} else {
-+				/*
-+				 * Adjust the hrexpiry since rq_curr will keep
-+				 * running and its timeslice has been shortened.
-+				 */
-+				hrexpiry_start(rq, US_TO_NS(rq_curr->time_slice));
-+				try_preempt(p, new_rq);
-+			}
-+		}
-+	} else {
-+		time_slice_expired(p, new_rq);
-+		try_preempt(p, new_rq);
-+	}
-+	activate_task(new_rq, p, 0);
-+	double_rq_unlock(rq, new_rq);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+}
-+
-+#ifdef CONFIG_PREEMPT_NOTIFIERS
-+
-+static DEFINE_STATIC_KEY_FALSE(preempt_notifier_key);
-+
-+void preempt_notifier_inc(void)
-+{
-+	static_branch_inc(&preempt_notifier_key);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_inc);
-+
-+void preempt_notifier_dec(void)
-+{
-+	static_branch_dec(&preempt_notifier_key);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_dec);
-+
-+/**
-+ * preempt_notifier_register - tell me when current is being preempted & rescheduled
-+ * @notifier: notifier struct to register
-+ */
-+void preempt_notifier_register(struct preempt_notifier *notifier)
-+{
-+	if (!static_branch_unlikely(&preempt_notifier_key))
-+		WARN(1, "registering preempt_notifier while notifiers disabled\n");
-+
-+	hlist_add_head(&notifier->link, &current->preempt_notifiers);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_register);
-+
-+/**
-+ * preempt_notifier_unregister - no longer interested in preemption notifications
-+ * @notifier: notifier struct to unregister
-+ *
-+ * This is *not* safe to call from within a preemption notifier.
-+ */
-+void preempt_notifier_unregister(struct preempt_notifier *notifier)
-+{
-+	hlist_del(&notifier->link);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_unregister);
-+
-+static void __fire_sched_in_preempt_notifiers(struct task_struct *curr)
-+{
-+	struct preempt_notifier *notifier;
-+
-+	hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
-+		notifier->ops->sched_in(notifier, raw_smp_processor_id());
-+}
-+
-+static __always_inline void fire_sched_in_preempt_notifiers(struct task_struct *curr)
-+{
-+	if (static_branch_unlikely(&preempt_notifier_key))
-+		__fire_sched_in_preempt_notifiers(curr);
-+}
-+
-+static void
-+__fire_sched_out_preempt_notifiers(struct task_struct *curr,
-+				 struct task_struct *next)
-+{
-+	struct preempt_notifier *notifier;
-+
-+	hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
-+		notifier->ops->sched_out(notifier, next);
-+}
-+
-+static __always_inline void
-+fire_sched_out_preempt_notifiers(struct task_struct *curr,
-+				 struct task_struct *next)
-+{
-+	if (static_branch_unlikely(&preempt_notifier_key))
-+		__fire_sched_out_preempt_notifiers(curr, next);
-+}
-+
-+#else /* !CONFIG_PREEMPT_NOTIFIERS */
-+
-+static inline void fire_sched_in_preempt_notifiers(struct task_struct *curr)
-+{
-+}
-+
-+static inline void
-+fire_sched_out_preempt_notifiers(struct task_struct *curr,
-+				 struct task_struct *next)
-+{
-+}
-+
-+#endif /* CONFIG_PREEMPT_NOTIFIERS */
-+
-+static inline void prepare_task(struct task_struct *next)
-+{
-+	/*
-+	 * Claim the task as running, we do this before switching to it
-+	 * such that any running task will have this set.
-+	 */
-+	next->on_cpu = 1;
-+}
-+
-+static inline void finish_task(struct task_struct *prev)
-+{
-+#ifdef CONFIG_SMP
-+	/*
-+	 * After ->on_cpu is cleared, the task can be moved to a different CPU.
-+	 * We must ensure this doesn't happen until the switch is completely
-+	 * finished.
-+	 *
-+	 * In particular, the load of prev->state in finish_task_switch() must
-+	 * happen before this.
-+	 *
-+	 * Pairs with the smp_cond_load_acquire() in try_to_wake_up().
-+	 */
-+	smp_store_release(&prev->on_cpu, 0);
-+#endif
-+}
-+
-+static inline void
-+prepare_lock_switch(struct rq *rq, struct task_struct *next)
-+{
-+	/*
-+	 * Since the runqueue lock will be released by the next
-+	 * task (which is an invalid locking op but in the case
-+	 * of the scheduler it's an obvious special-case), so we
-+	 * do an early lockdep release here:
-+	 */
-+	spin_release(&rq->lock->dep_map, _THIS_IP_);
-+#ifdef CONFIG_DEBUG_SPINLOCK
-+	/* this is a valid case when another task releases the spinlock */
-+	rq->lock->owner = next;
-+#endif
-+}
-+
-+static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
-+{
-+	/*
-+	 * If we are tracking spinlock dependencies then we have to
-+	 * fix up the runqueue lock - which gets 'carried over' from
-+	 * prev into current:
-+	 */
-+	spin_acquire(&rq->lock->dep_map, 0, 0, _THIS_IP_);
-+
-+#ifdef CONFIG_SMP
-+	/*
-+	 * If prev was marked as migrating to another CPU in return_task, drop
-+	 * the local runqueue lock but leave interrupts disabled and grab the
-+	 * remote lock we're migrating it to before enabling them.
-+	 */
-+	if (unlikely(task_on_rq_migrating(prev))) {
-+		sched_info_dequeued(rq, prev);
-+		/*
-+		 * We move the ownership of prev to the new cpu now. ttwu can't
-+		 * activate prev to the wrong cpu since it has to grab this
-+		 * runqueue in ttwu_remote.
-+		 */
-+#ifdef CONFIG_THREAD_INFO_IN_TASK
-+		prev->cpu = prev->wake_cpu;
-+#else
-+		task_thread_info(prev)->cpu = prev->wake_cpu;
-+#endif
-+		raw_spin_unlock(rq->lock);
-+
-+		raw_spin_lock(&prev->pi_lock);
-+		rq = __task_rq_lock(prev, NULL);
-+		/* Check that someone else hasn't already queued prev */
-+		if (likely(!task_queued(prev))) {
-+			enqueue_task(rq, prev, 0);
-+			prev->on_rq = TASK_ON_RQ_QUEUED;
-+			/* Wake up the CPU if it's not already running */
-+			resched_if_idle(rq);
-+		}
-+		raw_spin_unlock(&prev->pi_lock);
-+	}
-+#endif
-+	rq_unlock(rq);
-+	local_irq_enable();
-+}
-+
-+#ifndef prepare_arch_switch
-+# define prepare_arch_switch(next)	do { } while (0)
-+#endif
-+#ifndef finish_arch_switch
-+# define finish_arch_switch(prev)	do { } while (0)
-+#endif
-+#ifndef finish_arch_post_lock_switch
-+# define finish_arch_post_lock_switch()	do { } while (0)
-+#endif
-+
-+/**
-+ * prepare_task_switch - prepare to switch tasks
-+ * @rq: the runqueue preparing to switch
-+ * @next: the task we are going to switch to.
-+ *
-+ * This is called with the rq lock held and interrupts off. It must
-+ * be paired with a subsequent finish_task_switch after the context
-+ * switch.
-+ *
-+ * prepare_task_switch sets up locking and calls architecture specific
-+ * hooks.
-+ */
-+static inline void
-+prepare_task_switch(struct rq *rq, struct task_struct *prev,
-+		    struct task_struct *next)
-+{
-+	kcov_prepare_switch(prev);
-+	sched_info_switch(rq, prev, next);
-+	perf_event_task_sched_out(prev, next);
-+	rseq_preempt(prev);
-+	fire_sched_out_preempt_notifiers(prev, next);
-+	prepare_task(next);
-+	prepare_arch_switch(next);
-+}
-+
-+/**
-+ * finish_task_switch - clean up after a task-switch
-+ * @rq: runqueue associated with task-switch
-+ * @prev: the thread we just switched away from.
-+ *
-+ * finish_task_switch must be called after the context switch, paired
-+ * with a prepare_task_switch call before the context switch.
-+ * finish_task_switch will reconcile locking set up by prepare_task_switch,
-+ * and do any other architecture-specific cleanup actions.
-+ *
-+ * Note that we may have delayed dropping an mm in context_switch(). If
-+ * so, we finish that here outside of the runqueue lock.  (Doing it
-+ * with the lock held can cause deadlocks; see schedule() for
-+ * details.)
-+ *
-+ * The context switch have flipped the stack from under us and restored the
-+ * local variables which were saved when this task called schedule() in the
-+ * past. prev == current is still correct but we need to recalculate this_rq
-+ * because prev may have moved to another CPU.
-+ */
-+static void finish_task_switch(struct task_struct *prev)
-+	__releases(rq->lock)
-+{
-+	struct rq *rq = this_rq();
-+	struct mm_struct *mm = rq->prev_mm;
-+	long prev_state;
-+
-+	/*
-+	 * The previous task will have left us with a preempt_count of 2
-+	 * because it left us after:
-+	 *
-+	 *	schedule()
-+	 *	  preempt_disable();			// 1
-+	 *	  __schedule()
-+	 *	    raw_spin_lock_irq(rq->lock)	// 2
-+	 *
-+	 * Also, see FORK_PREEMPT_COUNT.
-+	 */
-+	if (WARN_ONCE(preempt_count() != 2*PREEMPT_DISABLE_OFFSET,
-+		      "corrupted preempt_count: %s/%d/0x%x\n",
-+		      current->comm, current->pid, preempt_count()))
-+		preempt_count_set(FORK_PREEMPT_COUNT);
-+
-+	rq->prev_mm = NULL;
-+
-+	/*
-+	 * A task struct has one reference for the use as "current".
-+	 * If a task dies, then it sets TASK_DEAD in tsk->state and calls
-+	 * schedule one last time. The schedule call will never return, and
-+	 * the scheduled task must drop that reference.
-+	 *
-+	 * We must observe prev->state before clearing prev->on_cpu (in
-+	 * finish_task), otherwise a concurrent wakeup can get prev
-+	 * running on another CPU and we could rave with its RUNNING -> DEAD
-+	 * transition, resulting in a double drop.
-+	 */
-+	prev_state = prev->state;
-+	vtime_task_switch(prev);
-+	perf_event_task_sched_in(prev, current);
-+	finish_task(prev);
-+	finish_lock_switch(rq, prev);
-+	finish_arch_post_lock_switch();
-+	kcov_finish_switch(current);
-+
-+	fire_sched_in_preempt_notifiers(current);
-+	/*
-+	 * When switching through a kernel thread, the loop in
-+	 * membarrier_{private,global}_expedited() may have observed that
-+	 * kernel thread and not issued an IPI. It is therefore possible to
-+	 * schedule between user->kernel->user threads without passing though
-+	 * switch_mm(). Membarrier requires a barrier after storing to
-+	 * rq->curr, before returning to userspace, so provide them here:
-+	 *
-+	 * - a full memory barrier for {PRIVATE,GLOBAL}_EXPEDITED, implicitly
-+	 *   provided by mmdrop(),
-+	 * - a sync_core for SYNC_CORE.
-+	 */
-+	if (mm) {
-+		membarrier_mm_sync_core_before_usermode(mm);
-+		mmdrop(mm);
-+	}
-+	if (unlikely(prev_state == TASK_DEAD)) {
-+		/*
-+		 * Remove function-return probe instances associated with this
-+		 * task and put them back on the free list.
-+		 */
-+		kprobe_flush_task(prev);
-+
-+		/* Task is done with its stack. */
-+		put_task_stack(prev);
-+
-+		put_task_struct_rcu_user(prev);
-+	}
-+}
-+
-+/**
-+ * schedule_tail - first thing a freshly forked thread must call.
-+ * @prev: the thread we just switched away from.
-+ */
-+asmlinkage __visible void schedule_tail(struct task_struct *prev)
-+{
-+	/*
-+	 * New tasks start with FORK_PREEMPT_COUNT, see there and
-+	 * finish_task_switch() for details.
-+	 *
-+	 * finish_task_switch() will drop rq->lock() and lower preempt_count
-+	 * and the preempt_enable() will end up enabling preemption (on
-+	 * PREEMPT_COUNT kernels).
-+	 */
-+
-+	finish_task_switch(prev);
-+	preempt_enable();
-+
-+	if (current->set_child_tid)
-+		put_user(task_pid_vnr(current), current->set_child_tid);
-+
-+	calculate_sigpending();
-+}
-+
-+/*
-+ * context_switch - switch to the new MM and the new thread's register state.
-+ */
-+static __always_inline void
-+context_switch(struct rq *rq, struct task_struct *prev,
-+	       struct task_struct *next)
-+{
-+	prepare_task_switch(rq, prev, next);
-+
-+	/*
-+	 * For paravirt, this is coupled with an exit in switch_to to
-+	 * combine the page table reload and the switch backend into
-+	 * one hypercall.
-+	 */
-+	arch_start_context_switch(prev);
-+
-+	/*
-+	 * kernel -> kernel   lazy + transfer active
-+	 *   user -> kernel   lazy + mmgrab() active
-+	 *
-+	 * kernel ->   user   switch + mmdrop() active
-+	 *   user ->   user   switch
-+	 */
-+	if (!next->mm) {                                // to kernel
-+		enter_lazy_tlb(prev->active_mm, next);
-+
-+		next->active_mm = prev->active_mm;
-+		if (prev->mm)                           // from user
-+			mmgrab(prev->active_mm);
-+		else
-+			prev->active_mm = NULL;
-+	} else {                                        // to user
-+		membarrier_switch_mm(rq, prev->active_mm, next->mm);
-+		/*
-+		 * sys_membarrier() requires an smp_mb() between setting
-+		 * rq->curr / membarrier_switch_mm() and returning to userspace.
-+		 *
-+		 * The below provides this either through switch_mm(), or in
-+		 * case 'prev->active_mm == next->mm' through
-+		 * finish_task_switch()'s mmdrop().
-+		 */
-+		switch_mm_irqs_off(prev->active_mm, next->mm, next);
-+
-+		if (!prev->mm) {                        // from kernel
-+			/* will mmdrop() in finish_task_switch(). */
-+			rq->prev_mm = prev->active_mm;
-+			prev->active_mm = NULL;
-+		}
-+	}
-+	prepare_lock_switch(rq, next);
-+
-+	/* Here we just switch the register state and the stack. */
-+	switch_to(prev, next, prev);
-+	barrier();
-+
-+	finish_task_switch(prev);
-+}
-+
-+/*
-+ * nr_running, nr_uninterruptible and nr_context_switches:
-+ *
-+ * externally visible scheduler statistics: current number of runnable
-+ * threads, total number of context switches performed since bootup.
-+ */
-+unsigned long nr_running(void)
-+{
-+	unsigned long i, sum = 0;
-+
-+	for_each_online_cpu(i)
-+		sum += cpu_rq(i)->nr_running;
-+
-+	return sum;
-+}
-+
-+static unsigned long nr_uninterruptible(void)
-+{
-+	unsigned long i, sum = 0;
-+
-+	for_each_online_cpu(i)
-+		sum += cpu_rq(i)->nr_uninterruptible;
-+
-+	return sum;
-+}
-+
-+/*
-+ * Check if only the current task is running on the CPU.
-+ *
-+ * Caution: this function does not check that the caller has disabled
-+ * preemption, thus the result might have a time-of-check-to-time-of-use
-+ * race.  The caller is responsible to use it correctly, for example:
-+ *
-+ * - from a non-preemptible section (of course)
-+ *
-+ * - from a thread that is bound to a single CPU
-+ *
-+ * - in a loop with very short iterations (e.g. a polling loop)
-+ */
-+bool single_task_running(void)
-+{
-+	if (rq_load(raw_rq()) == 1)
-+		return true;
-+	else
-+		return false;
-+}
-+EXPORT_SYMBOL(single_task_running);
-+
-+unsigned long long nr_context_switches(void)
-+{
-+	int cpu;
-+	unsigned long long sum = 0;
-+
-+	for_each_possible_cpu(cpu)
-+		sum += cpu_rq(cpu)->nr_switches;
-+
-+	return sum;
-+}
-+
-+/*
-+ * Consumers of these two interfaces, like for example the cpufreq menu
-+ * governor are using nonsensical data. Boosting frequency for a CPU that has
-+ * IO-wait which might not even end up running the task when it does become
-+ * runnable.
-+ */
-+
-+unsigned long nr_iowait_cpu(int cpu)
-+{
-+	return atomic_read(&cpu_rq(cpu)->nr_iowait);
-+}
-+
-+/*
-+ * IO-wait accounting, and how its mostly bollocks (on SMP).
-+ *
-+ * The idea behind IO-wait account is to account the idle time that we could
-+ * have spend running if it were not for IO. That is, if we were to improve the
-+ * storage performance, we'd have a proportional reduction in IO-wait time.
-+ *
-+ * This all works nicely on UP, where, when a task blocks on IO, we account
-+ * idle time as IO-wait, because if the storage were faster, it could've been
-+ * running and we'd not be idle.
-+ *
-+ * This has been extended to SMP, by doing the same for each CPU. This however
-+ * is broken.
-+ *
-+ * Imagine for instance the case where two tasks block on one CPU, only the one
-+ * CPU will have IO-wait accounted, while the other has regular idle. Even
-+ * though, if the storage were faster, both could've ran at the same time,
-+ * utilising both CPUs.
-+ *
-+ * This means, that when looking globally, the current IO-wait accounting on
-+ * SMP is a lower bound, by reason of under accounting.
-+ *
-+ * Worse, since the numbers are provided per CPU, they are sometimes
-+ * interpreted per CPU, and that is nonsensical. A blocked task isn't strictly
-+ * associated with any one particular CPU, it can wake to another CPU than it
-+ * blocked on. This means the per CPU IO-wait number is meaningless.
-+ *
-+ * Task CPU affinities can make all that even more 'interesting'.
-+ */
-+
-+unsigned long nr_iowait(void)
-+{
-+	unsigned long cpu, sum = 0;
-+
-+	for_each_possible_cpu(cpu)
-+		sum += nr_iowait_cpu(cpu);
-+
-+	return sum;
-+}
-+
-+unsigned long nr_active(void)
-+{
-+	return nr_running() + nr_uninterruptible();
-+}
-+
-+/* Variables and functions for calc_load */
-+static unsigned long calc_load_update;
-+unsigned long avenrun[3];
-+EXPORT_SYMBOL(avenrun);
-+
-+/**
-+ * get_avenrun - get the load average array
-+ * @loads:	pointer to dest load array
-+ * @offset:	offset to add
-+ * @shift:	shift count to shift the result left
-+ *
-+ * These values are estimates at best, so no need for locking.
-+ */
-+void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
-+{
-+	loads[0] = (avenrun[0] + offset) << shift;
-+	loads[1] = (avenrun[1] + offset) << shift;
-+	loads[2] = (avenrun[2] + offset) << shift;
-+}
-+
-+/*
-+ * calc_load - update the avenrun load estimates every LOAD_FREQ seconds.
-+ */
-+void calc_global_load(unsigned long ticks)
-+{
-+	long active;
-+
-+	if (time_before(jiffies, READ_ONCE(calc_load_update)))
-+		return;
-+	active = nr_active() * FIXED_1;
-+
-+	avenrun[0] = calc_load(avenrun[0], EXP_1, active);
-+	avenrun[1] = calc_load(avenrun[1], EXP_5, active);
-+	avenrun[2] = calc_load(avenrun[2], EXP_15, active);
-+
-+	calc_load_update = jiffies + LOAD_FREQ;
-+}
-+
-+/**
-+ * fixed_power_int - compute: x^n, in O(log n) time
-+ *
-+ * @x:         base of the power
-+ * @frac_bits: fractional bits of @x
-+ * @n:         power to raise @x to.
-+ *
-+ * By exploiting the relation between the definition of the natural power
-+ * function: x^n := x*x*...*x (x multiplied by itself for n times), and
-+ * the binary encoding of numbers used by computers: n := \Sum n_i * 2^i,
-+ * (where: n_i \elem {0, 1}, the binary vector representing n),
-+ * we find: x^n := x^(\Sum n_i * 2^i) := \Prod x^(n_i * 2^i), which is
-+ * of course trivially computable in O(log_2 n), the length of our binary
-+ * vector.
-+ */
-+static unsigned long
-+fixed_power_int(unsigned long x, unsigned int frac_bits, unsigned int n)
-+{
-+	unsigned long result = 1UL << frac_bits;
-+
-+	if (n) {
-+		for (;;) {
-+			if (n & 1) {
-+				result *= x;
-+				result += 1UL << (frac_bits - 1);
-+				result >>= frac_bits;
-+			}
-+			n >>= 1;
-+			if (!n)
-+				break;
-+			x *= x;
-+			x += 1UL << (frac_bits - 1);
-+			x >>= frac_bits;
-+		}
-+	}
-+
-+	return result;
-+}
-+
-+/*
-+ * a1 = a0 * e + a * (1 - e)
-+ *
-+ * a2 = a1 * e + a * (1 - e)
-+ *    = (a0 * e + a * (1 - e)) * e + a * (1 - e)
-+ *    = a0 * e^2 + a * (1 - e) * (1 + e)
-+ *
-+ * a3 = a2 * e + a * (1 - e)
-+ *    = (a0 * e^2 + a * (1 - e) * (1 + e)) * e + a * (1 - e)
-+ *    = a0 * e^3 + a * (1 - e) * (1 + e + e^2)
-+ *
-+ *  ...
-+ *
-+ * an = a0 * e^n + a * (1 - e) * (1 + e + ... + e^n-1) [1]
-+ *    = a0 * e^n + a * (1 - e) * (1 - e^n)/(1 - e)
-+ *    = a0 * e^n + a * (1 - e^n)
-+ *
-+ * [1] application of the geometric series:
-+ *
-+ *              n         1 - x^(n+1)
-+ *     S_n := \Sum x^i = -------------
-+ *             i=0          1 - x
-+ */
-+unsigned long
-+calc_load_n(unsigned long load, unsigned long exp,
-+	    unsigned long active, unsigned int n)
-+{
-+	return calc_load(load, fixed_power_int(exp, FSHIFT, n), active);
-+}
-+
-+DEFINE_PER_CPU(struct kernel_stat, kstat);
-+DEFINE_PER_CPU(struct kernel_cpustat, kernel_cpustat);
-+
-+EXPORT_PER_CPU_SYMBOL(kstat);
-+EXPORT_PER_CPU_SYMBOL(kernel_cpustat);
-+
-+#ifdef CONFIG_PARAVIRT
-+static inline u64 steal_ticks(u64 steal)
-+{
-+	if (unlikely(steal > NSEC_PER_SEC))
-+		return div_u64(steal, TICK_NSEC);
-+
-+	return __iter_div_u64_rem(steal, TICK_NSEC, &steal);
-+}
-+#endif
-+
-+#ifndef nsecs_to_cputime
-+# define nsecs_to_cputime(__nsecs)	nsecs_to_jiffies(__nsecs)
-+#endif
-+
-+/*
-+ * On each tick, add the number of nanoseconds to the unbanked variables and
-+ * once one tick's worth has accumulated, account it allowing for accurate
-+ * sub-tick accounting and totals. Use the TICK_APPROX_NS to match the way we
-+ * deduct nanoseconds.
-+ */
-+static void pc_idle_time(struct rq *rq, struct task_struct *idle, unsigned long ns)
-+{
-+	u64 *cpustat = kcpustat_this_cpu->cpustat;
-+	unsigned long ticks;
-+
-+	if (atomic_read(&rq->nr_iowait) > 0) {
-+		rq->iowait_ns += ns;
-+		if (rq->iowait_ns >= JIFFY_NS) {
-+			ticks = NS_TO_JIFFIES(rq->iowait_ns);
-+			cpustat[CPUTIME_IOWAIT] += (__force u64)TICK_APPROX_NS * ticks;
-+			rq->iowait_ns %= JIFFY_NS;
-+		}
-+	} else {
-+		rq->idle_ns += ns;
-+		if (rq->idle_ns >= JIFFY_NS) {
-+			ticks = NS_TO_JIFFIES(rq->idle_ns);
-+			cpustat[CPUTIME_IDLE] += (__force u64)TICK_APPROX_NS * ticks;
-+			rq->idle_ns %= JIFFY_NS;
-+		}
-+	}
-+	acct_update_integrals(idle);
-+}
-+
-+static void pc_system_time(struct rq *rq, struct task_struct *p,
-+			   int hardirq_offset, unsigned long ns)
-+{
-+	u64 *cpustat = kcpustat_this_cpu->cpustat;
-+	unsigned long ticks;
-+
-+	p->stime_ns += ns;
-+	if (p->stime_ns >= JIFFY_NS) {
-+		ticks = NS_TO_JIFFIES(p->stime_ns);
-+		p->stime_ns %= JIFFY_NS;
-+		p->stime += (__force u64)TICK_APPROX_NS * ticks;
-+		account_group_system_time(p, TICK_APPROX_NS * ticks);
-+	}
-+	p->sched_time += ns;
-+	account_group_exec_runtime(p, ns);
-+
-+	if (hardirq_count() - hardirq_offset) {
-+		rq->irq_ns += ns;
-+		if (rq->irq_ns >= JIFFY_NS) {
-+			ticks = NS_TO_JIFFIES(rq->irq_ns);
-+			cpustat[CPUTIME_IRQ] += (__force u64)TICK_APPROX_NS * ticks;
-+			rq->irq_ns %= JIFFY_NS;
-+		}
-+	} else if (in_serving_softirq()) {
-+		rq->softirq_ns += ns;
-+		if (rq->softirq_ns >= JIFFY_NS) {
-+			ticks = NS_TO_JIFFIES(rq->softirq_ns);
-+			cpustat[CPUTIME_SOFTIRQ] += (__force u64)TICK_APPROX_NS * ticks;
-+			rq->softirq_ns %= JIFFY_NS;
-+		}
-+	} else {
-+		rq->system_ns += ns;
-+		if (rq->system_ns >= JIFFY_NS) {
-+			ticks = NS_TO_JIFFIES(rq->system_ns);
-+			cpustat[CPUTIME_SYSTEM] += (__force u64)TICK_APPROX_NS * ticks;
-+			rq->system_ns %= JIFFY_NS;
-+		}
-+	}
-+	acct_update_integrals(p);
-+}
-+
-+static void pc_user_time(struct rq *rq, struct task_struct *p, unsigned long ns)
-+{
-+	u64 *cpustat = kcpustat_this_cpu->cpustat;
-+	unsigned long ticks;
-+
-+	p->utime_ns += ns;
-+	if (p->utime_ns >= JIFFY_NS) {
-+		ticks = NS_TO_JIFFIES(p->utime_ns);
-+		p->utime_ns %= JIFFY_NS;
-+		p->utime += (__force u64)TICK_APPROX_NS * ticks;
-+		account_group_user_time(p, TICK_APPROX_NS * ticks);
-+	}
-+	p->sched_time += ns;
-+	account_group_exec_runtime(p, ns);
-+
-+	if (this_cpu_ksoftirqd() == p) {
-+		/*
-+		 * ksoftirqd time do not get accounted in cpu_softirq_time.
-+		 * So, we have to handle it separately here.
-+		 */
-+		rq->softirq_ns += ns;
-+		if (rq->softirq_ns >= JIFFY_NS) {
-+			ticks = NS_TO_JIFFIES(rq->softirq_ns);
-+			cpustat[CPUTIME_SOFTIRQ] += (__force u64)TICK_APPROX_NS * ticks;
-+			rq->softirq_ns %= JIFFY_NS;
-+		}
-+	}
-+
-+	if (task_nice(p) > 0 || idleprio_task(p)) {
-+		rq->nice_ns += ns;
-+		if (rq->nice_ns >= JIFFY_NS) {
-+			ticks = NS_TO_JIFFIES(rq->nice_ns);
-+			cpustat[CPUTIME_NICE] += (__force u64)TICK_APPROX_NS * ticks;
-+			rq->nice_ns %= JIFFY_NS;
-+		}
-+	} else {
-+		rq->user_ns += ns;
-+		if (rq->user_ns >= JIFFY_NS) {
-+			ticks = NS_TO_JIFFIES(rq->user_ns);
-+			cpustat[CPUTIME_USER] += (__force u64)TICK_APPROX_NS * ticks;
-+			rq->user_ns %= JIFFY_NS;
-+		}
-+	}
-+	acct_update_integrals(p);
-+}
-+
-+/*
-+ * This is called on clock ticks.
-+ * Bank in p->sched_time the ns elapsed since the last tick or switch.
-+ * CPU scheduler quota accounting is also performed here in microseconds.
-+ */
-+static void update_cpu_clock_tick(struct rq *rq, struct task_struct *p)
-+{
-+	s64 account_ns = rq->niffies - p->last_ran;
-+	struct task_struct *idle = rq->idle;
-+
-+	/* Accurate tick timekeeping */
-+	if (user_mode(get_irq_regs()))
-+		pc_user_time(rq, p, account_ns);
-+	else if (p != idle || (irq_count() != HARDIRQ_OFFSET)) {
-+		pc_system_time(rq, p, HARDIRQ_OFFSET, account_ns);
-+	} else
-+		pc_idle_time(rq, idle, account_ns);
-+
-+	/* time_slice accounting is done in usecs to avoid overflow on 32bit */
-+	if (p->policy != SCHED_FIFO && p != idle)
-+		p->time_slice -= NS_TO_US(account_ns);
-+
-+	p->last_ran = rq->niffies;
-+}
-+
-+/*
-+ * This is called on context switches.
-+ * Bank in p->sched_time the ns elapsed since the last tick or switch.
-+ * CPU scheduler quota accounting is also performed here in microseconds.
-+ */
-+static void update_cpu_clock_switch(struct rq *rq, struct task_struct *p)
-+{
-+	s64 account_ns = rq->niffies - p->last_ran;
-+	struct task_struct *idle = rq->idle;
-+
-+	/* Accurate subtick timekeeping */
-+	if (p != idle)
-+		pc_user_time(rq, p, account_ns);
-+	else
-+		pc_idle_time(rq, idle, account_ns);
-+
-+	/* time_slice accounting is done in usecs to avoid overflow on 32bit */
-+	if (p->policy != SCHED_FIFO && p != idle)
-+		p->time_slice -= NS_TO_US(account_ns);
-+}
-+
-+/*
-+ * Return any ns on the sched_clock that have not yet been accounted in
-+ * @p in case that task is currently running.
-+ *
-+ * Called with task_rq_lock(p) held.
-+ */
-+static inline u64 do_task_delta_exec(struct task_struct *p, struct rq *rq)
-+{
-+	u64 ns = 0;
-+
-+	/*
-+	 * Must be ->curr _and_ ->on_rq.  If dequeued, we would
-+	 * project cycles that may never be accounted to this
-+	 * thread, breaking clock_gettime().
-+	 */
-+	if (p == rq->curr && task_on_rq_queued(p)) {
-+		update_clocks(rq);
-+		ns = rq->niffies - p->last_ran;
-+	}
-+
-+	return ns;
-+}
-+
-+/*
-+ * Return accounted runtime for the task.
-+ * Return separately the current's pending runtime that have not been
-+ * accounted yet.
-+ */
-+unsigned long long task_sched_runtime(struct task_struct *p)
-+{
-+	struct rq_flags rf;
-+	struct rq *rq;
-+	u64 ns;
-+
-+#if defined(CONFIG_64BIT) && defined(CONFIG_SMP)
-+	/*
-+	 * 64-bit doesn't need locks to atomically read a 64-bit value.
-+	 * So we have a optimisation chance when the task's delta_exec is 0.
-+	 * Reading ->on_cpu is racy, but this is ok.
-+	 *
-+	 * If we race with it leaving CPU, we'll take a lock. So we're correct.
-+	 * If we race with it entering CPU, unaccounted time is 0. This is
-+	 * indistinguishable from the read occurring a few cycles earlier.
-+	 * If we see ->on_cpu without ->on_rq, the task is leaving, and has
-+	 * been accounted, so we're correct here as well.
-+	 */
-+	if (!p->on_cpu || !task_on_rq_queued(p))
-+		return tsk_seruntime(p);
-+#endif
-+
-+	rq = task_rq_lock(p, &rf);
-+	ns = p->sched_time + do_task_delta_exec(p, rq);
-+	task_rq_unlock(rq, p, &rf);
-+
-+	return ns;
-+}
-+
-+/*
-+ * Functions to test for when SCHED_ISO tasks have used their allocated
-+ * quota as real time scheduling and convert them back to SCHED_NORMAL. All
-+ * data is modified only by the local runqueue during scheduler_tick with
-+ * interrupts disabled.
-+ */
-+
-+/*
-+ * Test if SCHED_ISO tasks have run longer than their alloted period as RT
-+ * tasks and set the refractory flag if necessary. There is 10% hysteresis
-+ * for unsetting the flag. 115/128 is ~90/100 as a fast shift instead of a
-+ * slow division.
-+ */
-+static inline void iso_tick(struct rq *rq)
-+{
-+	rq->iso_ticks = rq->iso_ticks * (ISO_PERIOD - 1) / ISO_PERIOD;
-+	rq->iso_ticks += 100;
-+	if (rq->iso_ticks > ISO_PERIOD * sched_iso_cpu) {
-+		rq->iso_refractory = true;
-+		if (unlikely(rq->iso_ticks > ISO_PERIOD * 100))
-+			rq->iso_ticks = ISO_PERIOD * 100;
-+	}
-+}
-+
-+/* No SCHED_ISO task was running so decrease rq->iso_ticks */
-+static inline void no_iso_tick(struct rq *rq, int ticks)
-+{
-+	if (rq->iso_ticks > 0 || rq->iso_refractory) {
-+		rq->iso_ticks = rq->iso_ticks * (ISO_PERIOD - ticks) / ISO_PERIOD;
-+		if (rq->iso_ticks < ISO_PERIOD * (sched_iso_cpu * 115 / 128)) {
-+			rq->iso_refractory = false;
-+			if (unlikely(rq->iso_ticks < 0))
-+				rq->iso_ticks = 0;
-+		}
-+	}
-+}
-+
-+/* This manages tasks that have run out of timeslice during a scheduler_tick */
-+static void task_running_tick(struct rq *rq)
-+{
-+	struct task_struct *p = rq->curr;
-+
-+	/*
-+	 * If a SCHED_ISO task is running we increment the iso_ticks. In
-+	 * order to prevent SCHED_ISO tasks from causing starvation in the
-+	 * presence of true RT tasks we account those as iso_ticks as well.
-+	 */
-+	if (rt_task(p) || task_running_iso(p))
-+		iso_tick(rq);
-+	else
-+		no_iso_tick(rq, 1);
-+
-+	/* SCHED_FIFO tasks never run out of timeslice. */
-+	if (p->policy == SCHED_FIFO)
-+		return;
-+
-+	if (iso_task(p)) {
-+		if (task_running_iso(p)) {
-+			if (rq->iso_refractory) {
-+				/*
-+				 * SCHED_ISO task is running as RT and limit
-+				 * has been hit. Force it to reschedule as
-+				 * SCHED_NORMAL by zeroing its time_slice
-+				 */
-+				p->time_slice = 0;
-+			}
-+		} else if (!rq->iso_refractory) {
-+			/* Can now run again ISO. Reschedule to pick up prio */
-+			goto out_resched;
-+		}
-+	}
-+
-+	/*
-+	 * Tasks that were scheduled in the first half of a tick are not
-+	 * allowed to run into the 2nd half of the next tick if they will
-+	 * run out of time slice in the interim. Otherwise, if they have
-+	 * less than RESCHED_US μs of time slice left they will be rescheduled.
-+	 * Dither is used as a backup for when hrexpiry is disabled or high res
-+	 * timers not configured in.
-+	 */
-+	if (p->time_slice - rq->dither >= RESCHED_US)
-+		return;
-+out_resched:
-+	rq_lock(rq);
-+	__set_tsk_resched(p);
-+	rq_unlock(rq);
-+}
-+
-+static inline void task_tick(struct rq *rq)
-+{
-+	if (!rq_idle(rq))
-+		task_running_tick(rq);
-+	else if (rq->last_jiffy > rq->last_scheduler_tick)
-+		no_iso_tick(rq, rq->last_jiffy - rq->last_scheduler_tick);
-+}
-+
-+#ifdef CONFIG_NO_HZ_FULL
-+/*
-+ * We can stop the timer tick any time highres timers are active since
-+ * we rely entirely on highres timeouts for task expiry rescheduling.
-+ */
-+static void sched_stop_tick(struct rq *rq, int cpu)
-+{
-+	if (!hrexpiry_enabled(rq))
-+		return;
-+	if (!tick_nohz_full_enabled())
-+		return;
-+	if (!tick_nohz_full_cpu(cpu))
-+		return;
-+	tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED);
-+}
-+
-+static inline void sched_start_tick(struct rq *rq, int cpu)
-+{
-+	tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
-+}
-+
-+struct tick_work {
-+	int			cpu;
-+	atomic_t		state;
-+	struct delayed_work	work;
-+};
-+/* Values for ->state, see diagram below. */
-+#define TICK_SCHED_REMOTE_OFFLINE	0
-+#define TICK_SCHED_REMOTE_OFFLINING	1
-+#define TICK_SCHED_REMOTE_RUNNING	2
-+
-+/*
-+ * State diagram for ->state:
-+ *
-+ *
-+ *          TICK_SCHED_REMOTE_OFFLINE
-+ *                    |   ^
-+ *                    |   |
-+ *                    |   | sched_tick_remote()
-+ *                    |   |
-+ *                    |   |
-+ *                    +--TICK_SCHED_REMOTE_OFFLINING
-+ *                    |   ^
-+ *                    |   |
-+ * sched_tick_start() |   | sched_tick_stop()
-+ *                    |   |
-+ *                    V   |
-+ *          TICK_SCHED_REMOTE_RUNNING
-+ *
-+ *
-+ * Other transitions get WARN_ON_ONCE(), except that sched_tick_remote()
-+ * and sched_tick_start() are happy to leave the state in RUNNING.
-+ */
-+
-+static struct tick_work __percpu *tick_work_cpu;
-+
-+static void sched_tick_remote(struct work_struct *work)
-+{
-+	struct delayed_work *dwork = to_delayed_work(work);
-+	struct tick_work *twork = container_of(dwork, struct tick_work, work);
-+	int cpu = twork->cpu;
-+	struct rq *rq = cpu_rq(cpu);
-+	struct task_struct *curr;
-+	u64 delta;
-+	int os;
-+
-+	/*
-+	 * Handle the tick only if it appears the remote CPU is running in full
-+	 * dynticks mode. The check is racy by nature, but missing a tick or
-+	 * having one too much is no big deal because the scheduler tick updates
-+	 * statistics and checks timeslices in a time-independent way, regardless
-+	 * of when exactly it is running.
-+	 */
-+	if (!tick_nohz_tick_stopped_cpu(cpu))
-+		goto out_requeue;
-+
-+	rq_lock_irq(rq);
-+	if (cpu_is_offline(cpu))
-+		goto out_unlock;
-+
-+	curr = rq->curr;
-+	update_rq_clock(rq);
-+
-+	if (!is_idle_task(curr)) {
-+		/*
-+		 * Make sure the next tick runs within a reasonable
-+		 * amount of time.
-+		 */
-+		delta = rq_clock_task(rq) - curr->last_ran;
-+		WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
-+	}
-+	task_tick(rq);
-+
-+out_unlock:
-+	rq_unlock_irq(rq, NULL);
-+
-+out_requeue:
-+
-+	/*
-+	 * Run the remote tick once per second (1Hz). This arbitrary
-+	 * frequency is large enough to avoid overload but short enough
-+	 * to keep scheduler internal stats reasonably up to date.  But
-+	 * first update state to reflect hotplug activity if required.
-+	 */
-+	os = atomic_fetch_add_unless(&twork->state, -1, TICK_SCHED_REMOTE_RUNNING);
-+	WARN_ON_ONCE(os == TICK_SCHED_REMOTE_OFFLINE);
-+	if (os == TICK_SCHED_REMOTE_RUNNING)
-+		queue_delayed_work(system_unbound_wq, dwork, HZ);
-+}
-+
-+static void sched_tick_start(int cpu)
-+{
-+	struct tick_work *twork;
-+	int os;
-+
-+	if (housekeeping_cpu(cpu, HK_FLAG_TICK))
-+		return;
-+
-+	WARN_ON_ONCE(!tick_work_cpu);
-+
-+	twork = per_cpu_ptr(tick_work_cpu, cpu);
-+	os = atomic_xchg(&twork->state, TICK_SCHED_REMOTE_RUNNING);
-+	WARN_ON_ONCE(os == TICK_SCHED_REMOTE_RUNNING);
-+	if (os == TICK_SCHED_REMOTE_OFFLINE) {
-+		twork->cpu = cpu;
-+		INIT_DELAYED_WORK(&twork->work, sched_tick_remote);
-+		queue_delayed_work(system_unbound_wq, &twork->work, HZ);
-+	}
-+}
-+
-+#ifdef CONFIG_HOTPLUG_CPU
-+static void sched_tick_stop(int cpu)
-+{
-+	struct tick_work *twork;
-+	int os;
-+
-+	if (housekeeping_cpu(cpu, HK_FLAG_TICK))
-+		return;
-+
-+	WARN_ON_ONCE(!tick_work_cpu);
-+
-+	twork = per_cpu_ptr(tick_work_cpu, cpu);
-+	/* There cannot be competing actions, but don't rely on stop-machine. */
-+	os = atomic_xchg(&twork->state, TICK_SCHED_REMOTE_OFFLINING);
-+	WARN_ON_ONCE(os != TICK_SCHED_REMOTE_RUNNING);
-+	/* Don't cancel, as this would mess up the state machine. */
-+}
-+#endif /* CONFIG_HOTPLUG_CPU */
-+
-+int __init sched_tick_offload_init(void)
-+{
-+	tick_work_cpu = alloc_percpu(struct tick_work);
-+	BUG_ON(!tick_work_cpu);
-+	return 0;
-+}
-+
-+#else /* !CONFIG_NO_HZ_FULL */
-+static inline void sched_stop_tick(struct rq *rq, int cpu) {}
-+static inline void sched_start_tick(struct rq *rq, int cpu) {}
-+static inline void sched_tick_start(int cpu) { }
-+static inline void sched_tick_stop(int cpu) { }
-+#endif
-+
-+DEFINE_PER_CPU(unsigned long, thermal_pressure);
-+
-+void arch_set_thermal_pressure(struct cpumask *cpus,
-+			       unsigned long th_pressure)
-+{
-+	int cpu;
-+
-+	for_each_cpu(cpu, cpus)
-+		WRITE_ONCE(per_cpu(thermal_pressure, cpu), th_pressure);
-+}
-+/*
-+ * This function gets called by the timer code, with HZ frequency.
-+ * We call it with interrupts disabled.
-+ */
-+void scheduler_tick(void)
-+{
-+	int cpu __maybe_unused = smp_processor_id();
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	arch_scale_freq_tick();
-+	sched_clock_tick();
-+	update_clocks(rq);
-+	update_load_avg(rq, 0);
-+	update_cpu_clock_tick(rq, rq->curr);
-+	task_tick(rq);
-+	rq->last_scheduler_tick = rq->last_jiffy;
-+	rq->last_tick = rq->clock;
-+	psi_task_tick(rq);
-+	perf_event_task_tick();
-+	sched_stop_tick(rq, cpu);
-+}
-+
-+#if defined(CONFIG_PREEMPTION) && (defined(CONFIG_DEBUG_PREEMPT) || \
-+				defined(CONFIG_TRACE_PREEMPT_TOGGLE))
-+/*
-+ * If the value passed in is equal to the current preempt count
-+ * then we just disabled preemption. Start timing the latency.
-+ */
-+static inline void preempt_latency_start(int val)
-+{
-+	if (preempt_count() == val) {
-+		unsigned long ip = get_lock_parent_ip();
-+#ifdef CONFIG_DEBUG_PREEMPT
-+		current->preempt_disable_ip = ip;
-+#endif
-+		trace_preempt_off(CALLER_ADDR0, ip);
-+	}
-+}
-+
-+void preempt_count_add(int val)
-+{
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	/*
-+	 * Underflow?
-+	 */
-+	if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
-+		return;
-+#endif
-+	__preempt_count_add(val);
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	/*
-+	 * Spinlock count overflowing soon?
-+	 */
-+	DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
-+				PREEMPT_MASK - 10);
-+#endif
-+	preempt_latency_start(val);
-+}
-+EXPORT_SYMBOL(preempt_count_add);
-+NOKPROBE_SYMBOL(preempt_count_add);
-+
-+/*
-+ * If the value passed in equals to the current preempt count
-+ * then we just enabled preemption. Stop timing the latency.
-+ */
-+static inline void preempt_latency_stop(int val)
-+{
-+	if (preempt_count() == val)
-+		trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
-+}
-+
-+void preempt_count_sub(int val)
-+{
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	/*
-+	 * Underflow?
-+	 */
-+	if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
-+		return;
-+	/*
-+	 * Is the spinlock portion underflowing?
-+	 */
-+	if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) &&
-+			!(preempt_count() & PREEMPT_MASK)))
-+		return;
-+#endif
-+
-+	preempt_latency_stop(val);
-+	__preempt_count_sub(val);
-+}
-+EXPORT_SYMBOL(preempt_count_sub);
-+NOKPROBE_SYMBOL(preempt_count_sub);
-+
-+#else
-+static inline void preempt_latency_start(int val) { }
-+static inline void preempt_latency_stop(int val) { }
-+#endif
-+
-+static inline unsigned long get_preempt_disable_ip(struct task_struct *p)
-+{
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	return p->preempt_disable_ip;
-+#else
-+	return 0;
-+#endif
-+}
-+
-+/*
-+ * The time_slice is only refilled when it is empty and that is when we set a
-+ * new deadline. Make sure update_clocks has been called recently to update
-+ * rq->niffies.
-+ */
-+static void time_slice_expired(struct task_struct *p, struct rq *rq)
-+{
-+	p->time_slice = timeslice();
-+	p->deadline = rq->niffies + task_deadline_diff(p);
-+#ifdef CONFIG_SMT_NICE
-+	if (!p->mm)
-+		p->smt_bias = 0;
-+	else if (rt_task(p))
-+		p->smt_bias = 1 << 30;
-+	else if (task_running_iso(p))
-+		p->smt_bias = 1 << 29;
-+	else if (idleprio_task(p)) {
-+		if (task_running_idle(p))
-+			p->smt_bias = 0;
-+		else
-+			p->smt_bias = 1;
-+	} else if (--p->smt_bias < 1)
-+		p->smt_bias = MAX_PRIO - p->static_prio;
-+#endif
-+}
-+
-+/*
-+ * Timeslices below RESCHED_US are considered as good as expired as there's no
-+ * point rescheduling when there's so little time left. SCHED_BATCH tasks
-+ * have been flagged be not latency sensitive and likely to be fully CPU
-+ * bound so every time they're rescheduled they have their time_slice
-+ * refilled, but get a new later deadline to have little effect on
-+ * SCHED_NORMAL tasks.
-+
-+ */
-+static inline void check_deadline(struct task_struct *p, struct rq *rq)
-+{
-+	if (p->time_slice < RESCHED_US || batch_task(p))
-+		time_slice_expired(p, rq);
-+}
-+
-+/*
-+ * Task selection with skiplists is a simple matter of picking off the first
-+ * task in the sorted list, an O(1) operation. The lookup is amortised O(1)
-+ * being bound to the number of processors.
-+ *
-+ * Runqueues are selectively locked based on their unlocked data and then
-+ * unlocked if not needed. At most 3 locks will be held at any time and are
-+ * released as soon as they're no longer needed. All balancing between CPUs
-+ * is thus done here in an extremely simple first come best fit manner.
-+ *
-+ * This iterates over runqueues in cache locality order. In interactive mode
-+ * it iterates over all CPUs and finds the task with the best key/deadline.
-+ * In non-interactive mode it will only take a task if it's from the current
-+ * runqueue or a runqueue with more tasks than the current one with a better
-+ * key/deadline.
-+ */
-+#ifdef CONFIG_SMP
-+static inline struct task_struct
-+*earliest_deadline_task(struct rq *rq, int cpu, struct task_struct *idle)
-+{
-+	struct rq *locked = NULL, *chosen = NULL;
-+	struct task_struct *edt = idle;
-+	int i, best_entries = 0;
-+	u64 best_key = ~0ULL;
-+
-+	for (i = 0; i < total_runqueues; i++) {
-+		struct rq *other_rq = rq_order(rq, i);
-+		skiplist_node *next;
-+		int entries;
-+
-+		entries = other_rq->sl->entries;
-+		/*
-+		 * Check for queued entres lockless first. The local runqueue
-+		 * is locked so entries will always be accurate.
-+		 */
-+		if (!sched_interactive) {
-+			/*
-+			 * Don't reschedule balance across nodes unless the CPU
-+			 * is idle.
-+			 */
-+			if (edt != idle && rq->cpu_locality[other_rq->cpu] > LOCALITY_SMP)
-+				break;
-+			if (entries <= best_entries)
-+				continue;
-+		} else if (!entries)
-+			continue;
-+
-+		/* if (i) implies other_rq != rq */
-+		if (i) {
-+			/* Check for best id queued lockless first */
-+			if (other_rq->best_key >= best_key)
-+				continue;
-+
-+			if (unlikely(!trylock_rq(rq, other_rq)))
-+				continue;
-+
-+			/* Need to reevaluate entries after locking */
-+			entries = other_rq->sl->entries;
-+			if (unlikely(!entries)) {
-+				unlock_rq(other_rq);
-+				continue;
-+			}
-+		}
-+
-+		next = other_rq->node;
-+		/*
-+		 * In interactive mode we check beyond the best entry on other
-+		 * runqueues if we can't get the best for smt or affinity
-+		 * reasons.
-+		 */
-+		while ((next = next->next[0]) != other_rq->node) {
-+			struct task_struct *p;
-+			u64 key = next->key;
-+
-+			/* Reevaluate key after locking */
-+			if (key >= best_key)
-+				break;
-+
-+			p = next->value;
-+			if (!smt_schedule(p, rq)) {
-+				if (i && !sched_interactive)
-+					break;
-+				continue;
-+			}
-+
-+			if (sched_other_cpu(p, cpu)) {
-+				if (sched_interactive || !i)
-+					continue;
-+				break;
-+			}
-+			/* Make sure affinity is ok */
-+			if (i) {
-+				/* From this point on p is the best so far */
-+				if (locked)
-+					unlock_rq(locked);
-+				chosen = locked = other_rq;
-+			}
-+			best_entries = entries;
-+			best_key = key;
-+			edt = p;
-+			break;
-+		}
-+		/* rq->preempting is a hint only as the state may have changed
-+		 * since it was set with the resched call but if we have met
-+		 * the condition we can break out here. */
-+		if (edt == rq->preempting)
-+			break;
-+		if (i && other_rq != chosen)
-+			unlock_rq(other_rq);
-+	}
-+
-+	if (likely(edt != idle))
-+		take_task(rq, cpu, edt);
-+
-+	if (locked)
-+		unlock_rq(locked);
-+
-+	rq->preempting = NULL;
-+
-+	return edt;
-+}
-+#else /* CONFIG_SMP */
-+static inline struct task_struct
-+*earliest_deadline_task(struct rq *rq, int cpu, struct task_struct *idle)
-+{
-+	struct task_struct *edt;
-+
-+	if (unlikely(!rq->sl->entries))
-+		return idle;
-+	edt = rq->node->next[0]->value;
-+	take_task(rq, cpu, edt);
-+	return edt;
-+}
-+#endif /* CONFIG_SMP */
-+
-+/*
-+ * Print scheduling while atomic bug:
-+ */
-+static noinline void __schedule_bug(struct task_struct *prev)
-+{
-+	/* Save this before calling printk(), since that will clobber it */
-+	unsigned long preempt_disable_ip = get_preempt_disable_ip(current);
-+
-+	if (oops_in_progress)
-+		return;
-+
-+	printk(KERN_ERR "BUG: scheduling while atomic: %s/%d/0x%08x\n",
-+		prev->comm, prev->pid, preempt_count());
-+
-+	debug_show_held_locks(prev);
-+	print_modules();
-+	if (irqs_disabled())
-+		print_irqtrace_events(prev);
-+	if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)
-+	    && in_atomic_preempt_off()) {
-+		pr_err("Preemption disabled at:");
-+		print_ip_sym(preempt_disable_ip);
-+		pr_cont("\n");
-+	}
-+	dump_stack();
-+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+}
-+
-+/*
-+ * Various schedule()-time debugging checks and statistics:
-+ */
-+static inline void schedule_debug(struct task_struct *prev, bool preempt)
-+{
-+#ifdef CONFIG_SCHED_STACK_END_CHECK
-+	if (task_stack_end_corrupted(prev))
-+		panic("corrupted stack end detected inside scheduler\n");
-+#endif
-+
-+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-+	if (!preempt && prev->state && prev->non_block_count) {
-+		printk(KERN_ERR "BUG: scheduling in a non-blocking section: %s/%d/%i\n",
-+			prev->comm, prev->pid, prev->non_block_count);
-+		dump_stack();
-+		add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+	}
-+#endif
-+
-+	if (unlikely(in_atomic_preempt_off())) {
-+		__schedule_bug(prev);
-+		preempt_count_set(PREEMPT_DISABLED);
-+	}
-+	rcu_sleep_check();
-+
-+	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
-+
-+	schedstat_inc(this_rq()->sched_count);
-+}
-+
-+/*
-+ * The currently running task's information is all stored in rq local data
-+ * which is only modified by the local CPU.
-+ */
-+static inline void set_rq_task(struct rq *rq, struct task_struct *p)
-+{
-+	if (p == rq->idle || p->policy == SCHED_FIFO)
-+		hrexpiry_clear(rq);
-+	else
-+		hrexpiry_start(rq, US_TO_NS(p->time_slice));
-+	if (rq->clock - rq->last_tick > HALF_JIFFY_NS)
-+		rq->dither = 0;
-+	else
-+		rq->dither = rq_dither(rq);
-+
-+	rq->rq_deadline = p->deadline;
-+	rq->rq_prio = p->prio;
-+#ifdef CONFIG_SMT_NICE
-+	rq->rq_mm = p->mm;
-+	rq->rq_smt_bias = p->smt_bias;
-+#endif
-+}
-+
-+#ifdef CONFIG_SMT_NICE
-+static void check_no_siblings(struct rq __maybe_unused *this_rq) {}
-+static void wake_no_siblings(struct rq __maybe_unused *this_rq) {}
-+static void (*check_siblings)(struct rq *this_rq) = &check_no_siblings;
-+static void (*wake_siblings)(struct rq *this_rq) = &wake_no_siblings;
-+
-+/* Iterate over smt siblings when we've scheduled a process on cpu and decide
-+ * whether they should continue running or be descheduled. */
-+static void check_smt_siblings(struct rq *this_rq)
-+{
-+	int other_cpu;
-+
-+	for_each_cpu(other_cpu, &this_rq->thread_mask) {
-+		struct task_struct *p;
-+		struct rq *rq;
-+
-+		rq = cpu_rq(other_cpu);
-+		if (rq_idle(rq))
-+			continue;
-+		p = rq->curr;
-+		if (!smt_schedule(p, this_rq))
-+			resched_curr(rq);
-+	}
-+}
-+
-+static void wake_smt_siblings(struct rq *this_rq)
-+{
-+	int other_cpu;
-+
-+	for_each_cpu(other_cpu, &this_rq->thread_mask) {
-+		struct rq *rq;
-+
-+		rq = cpu_rq(other_cpu);
-+		if (rq_idle(rq))
-+			resched_idle(rq);
-+	}
-+}
-+#else
-+static void check_siblings(struct rq __maybe_unused *this_rq) {}
-+static void wake_siblings(struct rq __maybe_unused *this_rq) {}
-+#endif
-+
-+/*
-+ * schedule() is the main scheduler function.
-+ *
-+ * The main means of driving the scheduler and thus entering this function are:
-+ *
-+ *   1. Explicit blocking: mutex, semaphore, waitqueue, etc.
-+ *
-+ *   2. TIF_NEED_RESCHED flag is checked on interrupt and userspace return
-+ *      paths. For example, see arch/x86/entry_64.S.
-+ *
-+ *      To drive preemption between tasks, the scheduler sets the flag in timer
-+ *      interrupt handler scheduler_tick().
-+ *
-+ *   3. Wakeups don't really cause entry into schedule(). They add a
-+ *      task to the run-queue and that's it.
-+ *
-+ *      Now, if the new task added to the run-queue preempts the current
-+ *      task, then the wakeup sets TIF_NEED_RESCHED and schedule() gets
-+ *      called on the nearest possible occasion:
-+ *
-+ *       - If the kernel is preemptible (CONFIG_PREEMPTION=y):
-+ *
-+ *         - in syscall or exception context, at the next outmost
-+ *           preempt_enable(). (this might be as soon as the wake_up()'s
-+ *           spin_unlock()!)
-+ *
-+ *         - in IRQ context, return from interrupt-handler to
-+ *           preemptible context
-+ *
-+ *       - If the kernel is not preemptible (CONFIG_PREEMPTION is not set)
-+ *         then at the next:
-+ *
-+ *          - cond_resched() call
-+ *          - explicit schedule() call
-+ *          - return from syscall or exception to user-space
-+ *          - return from interrupt-handler to user-space
-+ *
-+ * WARNING: must be called with preemption disabled!
-+ */
-+static void __sched notrace __schedule(bool preempt)
-+{
-+	struct task_struct *prev, *next, *idle;
-+	unsigned long *switch_count;
-+	bool deactivate = false;
-+	struct rq *rq;
-+	u64 niffies;
-+	int cpu;
-+
-+	cpu = smp_processor_id();
-+	rq = cpu_rq(cpu);
-+	prev = rq->curr;
-+	idle = rq->idle;
-+
-+	schedule_debug(prev, preempt);
-+
-+	local_irq_disable();
-+	rcu_note_context_switch(preempt);
-+
-+	/*
-+	 * Make sure that signal_pending_state()->signal_pending() below
-+	 * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
-+	 * done by the caller to avoid the race with signal_wake_up().
-+	 *
-+	 * The membarrier system call requires a full memory barrier
-+	 * after coming from user-space, before storing to rq->curr.
-+	 */
-+	rq_lock(rq);
-+	smp_mb__after_spinlock();
-+#ifdef CONFIG_SMP
-+	if (rq->preempt) {
-+		/*
-+		 * Make sure resched_curr hasn't triggered a preemption
-+		 * locklessly on a task that has since scheduled away. Spurious
-+		 * wakeup of idle is okay though.
-+		 */
-+		if (unlikely(preempt && prev != idle && !test_tsk_need_resched(prev))) {
-+			rq->preempt = NULL;
-+			clear_preempt_need_resched();
-+			rq_unlock_irq(rq, NULL);
-+			return;
-+		}
-+		rq->preempt = NULL;
-+	}
-+#endif
-+
-+	switch_count = &prev->nivcsw;
-+	if (!preempt && prev->state) {
-+		if (signal_pending_state(prev->state, prev)) {
-+			prev->state = TASK_RUNNING;
-+		} else {
-+			deactivate = true;
-+
-+			if (prev->in_iowait) {
-+				atomic_inc(&rq->nr_iowait);
-+				delayacct_blkio_start();
-+			}
-+		}
-+		switch_count = &prev->nvcsw;
-+	}
-+
-+	/*
-+	 * Store the niffy value here for use by the next task's last_ran
-+	 * below to avoid losing niffies due to update_clocks being called
-+	 * again after this point.
-+	 */
-+	update_clocks(rq);
-+	niffies = rq->niffies;
-+	update_cpu_clock_switch(rq, prev);
-+
-+	clear_tsk_need_resched(prev);
-+	clear_preempt_need_resched();
-+
-+	if (idle != prev) {
-+		check_deadline(prev, rq);
-+		return_task(prev, rq, cpu, deactivate);
-+	}
-+
-+	next = earliest_deadline_task(rq, cpu, idle);
-+	if (likely(next->prio != PRIO_LIMIT))
-+		clear_cpuidle_map(cpu);
-+	else {
-+		set_cpuidle_map(cpu);
-+		update_load_avg(rq, 0);
-+	}
-+
-+	set_rq_task(rq, next);
-+	next->last_ran = niffies;
-+
-+	if (likely(prev != next)) {
-+		/*
-+		 * Don't reschedule an idle task or deactivated tasks
-+		 */
-+		if (prev == idle) {
-+			rq->nr_running++;
-+			if (rt_task(next))
-+				rq->rt_nr_running++;
-+		} else if (!deactivate)
-+			resched_suitable_idle(prev);
-+		if (unlikely(next == idle)) {
-+			rq->nr_running--;
-+			if (rt_task(prev))
-+				rq->rt_nr_running--;
-+			wake_siblings(rq);
-+		} else
-+			check_siblings(rq);
-+		rq->nr_switches++;
-+		/*
-+		 * RCU users of rcu_dereference(rq->curr) may not see
-+		 * changes to task_struct made by pick_next_task().
-+		 */
-+		RCU_INIT_POINTER(rq->curr, next);
-+		/*
-+		 * The membarrier system call requires each architecture
-+		 * to have a full memory barrier after updating
-+		 * rq->curr, before returning to user-space.
-+		 *
-+		 * Here are the schemes providing that barrier on the
-+		 * various architectures:
-+		 * - mm ? switch_mm() : mmdrop() for x86, s390, sparc, PowerPC.
-+		 *   switch_mm() rely on membarrier_arch_switch_mm() on PowerPC.
-+		 * - finish_lock_switch() for weakly-ordered
-+		 *   architectures where spin_unlock is a full barrier,
-+		 * - switch_to() for arm64 (weakly-ordered, spin_unlock
-+		 *   is a RELEASE barrier),
-+		 */
-+		++*switch_count;
-+
-+		psi_sched_switch(prev, next, !task_on_rq_queued(prev));
-+
-+		trace_sched_switch(preempt, prev, next);
-+		context_switch(rq, prev, next); /* unlocks the rq */
-+	} else {
-+		check_siblings(rq);
-+		rq_unlock(rq);
-+		local_irq_enable();
-+	}
-+}
-+
-+void __noreturn do_task_dead(void)
-+{
-+	/* Causes final put_task_struct in finish_task_switch(). */
-+	set_special_state(TASK_DEAD);
-+
-+	/* Tell freezer to ignore us: */
-+	current->flags |= PF_NOFREEZE;
-+	__schedule(false);
-+	BUG();
-+
-+	/* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */
-+	for (;;)
-+		cpu_relax();
-+}
-+
-+static inline void sched_submit_work(struct task_struct *tsk)
-+{
-+	if (!tsk->state)
-+		return;
-+
-+	/*
-+	 * If a worker went to sleep, notify and ask workqueue whether
-+	 * it wants to wake up a task to maintain concurrency.
-+	 * As this function is called inside the schedule() context,
-+	 * we disable preemption to avoid it calling schedule() again
-+	 * in the possible wakeup of a kworker and because wq_worker_sleeping()
-+	 * requires it.
-+	 */
-+	if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) {
-+		preempt_disable();
-+		if (tsk->flags & PF_WQ_WORKER)
-+			wq_worker_sleeping(tsk);
-+		else
-+			io_wq_worker_sleeping(tsk);
-+		preempt_enable_no_resched();
-+	}
-+
-+	if (tsk_is_pi_blocked(tsk))
-+		return;
-+
-+	/*
-+	 * If we are going to sleep and we have plugged IO queued,
-+	 * make sure to submit it to avoid deadlocks.
-+	 */
-+	if (blk_needs_flush_plug(tsk))
-+		blk_schedule_flush_plug(tsk);
-+}
-+
-+static inline void sched_update_worker(struct task_struct *tsk)
-+{
-+	if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) {
-+		if (tsk->flags & PF_WQ_WORKER)
-+			wq_worker_running(tsk);
-+		else
-+			io_wq_worker_running(tsk);
-+	}
-+}
-+
-+asmlinkage __visible void __sched schedule(void)
-+{
-+	struct task_struct *tsk = current;
-+
-+	sched_submit_work(tsk);
-+	do {
-+		preempt_disable();
-+		__schedule(false);
-+		sched_preempt_enable_no_resched();
-+	} while (need_resched());
-+	sched_update_worker(tsk);
-+}
-+
-+EXPORT_SYMBOL(schedule);
-+
-+/*
-+ * synchronize_rcu_tasks() makes sure that no task is stuck in preempted
-+ * state (have scheduled out non-voluntarily) by making sure that all
-+ * tasks have either left the run queue or have gone into user space.
-+ * As idle tasks do not do either, they must not ever be preempted
-+ * (schedule out non-voluntarily).
-+ *
-+ * schedule_idle() is similar to schedule_preempt_disable() except that it
-+ * never enables preemption because it does not call sched_submit_work().
-+ */
-+void __sched schedule_idle(void)
-+{
-+	/*
-+	 * As this skips calling sched_submit_work(), which the idle task does
-+	 * regardless because that function is a nop when the task is in a
-+	 * TASK_RUNNING state, make sure this isn't used someplace that the
-+	 * current task can be in any other state. Note, idle is always in the
-+	 * TASK_RUNNING state.
-+	 */
-+	WARN_ON_ONCE(current->state);
-+	do {
-+		__schedule(false);
-+	} while (need_resched());
-+}
-+
-+#ifdef CONFIG_CONTEXT_TRACKING
-+asmlinkage __visible void __sched schedule_user(void)
-+{
-+	/*
-+	 * If we come here after a random call to set_need_resched(),
-+	 * or we have been woken up remotely but the IPI has not yet arrived,
-+	 * we haven't yet exited the RCU idle mode. Do it here manually until
-+	 * we find a better solution.
-+	 *
-+	 * NB: There are buggy callers of this function.  Ideally we
-+	 * should warn if prev_state != IN_USER, but that will trigger
-+	 * too frequently to make sense yet.
-+	 */
-+	enum ctx_state prev_state = exception_enter();
-+	schedule();
-+	exception_exit(prev_state);
-+}
-+#endif
-+
-+/**
-+ * schedule_preempt_disabled - called with preemption disabled
-+ *
-+ * Returns with preemption disabled. Note: preempt_count must be 1
-+ */
-+void __sched schedule_preempt_disabled(void)
-+{
-+	sched_preempt_enable_no_resched();
-+	schedule();
-+	preempt_disable();
-+}
-+
-+static void __sched notrace preempt_schedule_common(void)
-+{
-+	do {
-+		/*
-+		 * Because the function tracer can trace preempt_count_sub()
-+		 * and it also uses preempt_enable/disable_notrace(), if
-+		 * NEED_RESCHED is set, the preempt_enable_notrace() called
-+		 * by the function tracer will call this function again and
-+		 * cause infinite recursion.
-+		 *
-+		 * Preemption must be disabled here before the function
-+		 * tracer can trace. Break up preempt_disable() into two
-+		 * calls. One to disable preemption without fear of being
-+		 * traced. The other to still record the preemption latency,
-+		 * which can also be traced by the function tracer.
-+		 */
-+		preempt_disable_notrace();
-+		preempt_latency_start(1);
-+		__schedule(true);
-+		preempt_latency_stop(1);
-+		preempt_enable_no_resched_notrace();
-+
-+		/*
-+		 * Check again in case we missed a preemption opportunity
-+		 * between schedule and now.
-+		 */
-+	} while (need_resched());
-+}
-+
-+#ifdef CONFIG_PREEMPTION
-+/*
-+ * This is the entry point to schedule() from in-kernel preemption
-+ * off of preempt_enable.
-+ */
-+asmlinkage __visible void __sched notrace preempt_schedule(void)
-+{
-+	/*
-+	 * If there is a non-zero preempt_count or interrupts are disabled,
-+	 * we do not want to preempt the current task. Just return..
-+	 */
-+	if (likely(!preemptible()))
-+		return;
-+
-+	preempt_schedule_common();
-+}
-+NOKPROBE_SYMBOL(preempt_schedule);
-+EXPORT_SYMBOL(preempt_schedule);
-+
-+/**
-+ * preempt_schedule_notrace - preempt_schedule called by tracing
-+ *
-+ * The tracing infrastructure uses preempt_enable_notrace to prevent
-+ * recursion and tracing preempt enabling caused by the tracing
-+ * infrastructure itself. But as tracing can happen in areas coming
-+ * from userspace or just about to enter userspace, a preempt enable
-+ * can occur before user_exit() is called. This will cause the scheduler
-+ * to be called when the system is still in usermode.
-+ *
-+ * To prevent this, the preempt_enable_notrace will use this function
-+ * instead of preempt_schedule() to exit user context if needed before
-+ * calling the scheduler.
-+ */
-+asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
-+{
-+	enum ctx_state prev_ctx;
-+
-+	if (likely(!preemptible()))
-+		return;
-+
-+	do {
-+		/*
-+		 * Because the function tracer can trace preempt_count_sub()
-+		 * and it also uses preempt_enable/disable_notrace(), if
-+		 * NEED_RESCHED is set, the preempt_enable_notrace() called
-+		 * by the function tracer will call this function again and
-+		 * cause infinite recursion.
-+		 *
-+		 * Preemption must be disabled here before the function
-+		 * tracer can trace. Break up preempt_disable() into two
-+		 * calls. One to disable preemption without fear of being
-+		 * traced. The other to still record the preemption latency,
-+		 * which can also be traced by the function tracer.
-+		 */
-+		preempt_disable_notrace();
-+		preempt_latency_start(1);
-+		/*
-+		 * Needs preempt disabled in case user_exit() is traced
-+		 * and the tracer calls preempt_enable_notrace() causing
-+		 * an infinite recursion.
-+		 */
-+		prev_ctx = exception_enter();
-+		__schedule(true);
-+		exception_exit(prev_ctx);
-+
-+		preempt_latency_stop(1);
-+		preempt_enable_no_resched_notrace();
-+	} while (need_resched());
-+}
-+EXPORT_SYMBOL_GPL(preempt_schedule_notrace);
-+
-+#endif /* CONFIG_PREEMPTION */
-+
-+/*
-+ * This is the entry point to schedule() from kernel preemption
-+ * off of irq context.
-+ * Note, that this is called and return with irqs disabled. This will
-+ * protect us against recursive calling from irq.
-+ */
-+asmlinkage __visible void __sched preempt_schedule_irq(void)
-+{
-+	enum ctx_state prev_state;
-+
-+	/* Catch callers which need to be fixed */
-+	BUG_ON(preempt_count() || !irqs_disabled());
-+
-+	prev_state = exception_enter();
-+
-+	do {
-+		preempt_disable();
-+		local_irq_enable();
-+		__schedule(true);
-+		local_irq_disable();
-+		sched_preempt_enable_no_resched();
-+	} while (need_resched());
-+
-+	exception_exit(prev_state);
-+}
-+
-+int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags,
-+			  void *key)
-+{
-+	return try_to_wake_up(curr->private, mode, wake_flags);
-+}
-+EXPORT_SYMBOL(default_wake_function);
-+
-+#ifdef CONFIG_RT_MUTEXES
-+
-+static inline int __rt_effective_prio(struct task_struct *pi_task, int prio)
-+{
-+	if (pi_task)
-+		prio = min(prio, pi_task->prio);
-+
-+	return prio;
-+}
-+
-+static inline int rt_effective_prio(struct task_struct *p, int prio)
-+{
-+	struct task_struct *pi_task = rt_mutex_get_top_task(p);
-+
-+	return __rt_effective_prio(pi_task, prio);
-+}
-+
-+/*
-+ * rt_mutex_setprio - set the current priority of a task
-+ * @p: task to boost
-+ * @pi_task: donor task
-+ *
-+ * This function changes the 'effective' priority of a task. It does
-+ * not touch ->normal_prio like __setscheduler().
-+ *
-+ * Used by the rt_mutex code to implement priority inheritance
-+ * logic. Call site only calls if the priority of the task changed.
-+ */
-+void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
-+{
-+	int prio, oldprio;
-+	struct rq *rq;
-+
-+	/* XXX used to be waiter->prio, not waiter->task->prio */
-+	prio = __rt_effective_prio(pi_task, p->normal_prio);
-+
-+	/*
-+	 * If nothing changed; bail early.
-+	 */
-+	if (p->pi_top_task == pi_task && prio == p->prio)
-+		return;
-+
-+	rq = __task_rq_lock(p, NULL);
-+	update_rq_clock(rq);
-+	/*
-+	 * Set under pi_lock && rq->lock, such that the value can be used under
-+	 * either lock.
-+	 *
-+	 * Note that there is loads of tricky to make this pointer cache work
-+	 * right. rt_mutex_slowunlock()+rt_mutex_postunlock() work together to
-+	 * ensure a task is de-boosted (pi_task is set to NULL) before the
-+	 * task is allowed to run again (and can exit). This ensures the pointer
-+	 * points to a blocked task -- which guaratees the task is present.
-+	 */
-+	p->pi_top_task = pi_task;
-+
-+	/*
-+	 * For FIFO/RR we only need to set prio, if that matches we're done.
-+	 */
-+	if (prio == p->prio)
-+		goto out_unlock;
-+
-+	/*
-+	 * Idle task boosting is a nono in general. There is one
-+	 * exception, when PREEMPT_RT and NOHZ is active:
-+	 *
-+	 * The idle task calls get_next_timer_interrupt() and holds
-+	 * the timer wheel base->lock on the CPU and another CPU wants
-+	 * to access the timer (probably to cancel it). We can safely
-+	 * ignore the boosting request, as the idle CPU runs this code
-+	 * with interrupts disabled and will complete the lock
-+	 * protected section without being interrupted. So there is no
-+	 * real need to boost.
-+	 */
-+	if (unlikely(p == rq->idle)) {
-+		WARN_ON(p != rq->curr);
-+		WARN_ON(p->pi_blocked_on);
-+		goto out_unlock;
-+	}
-+
-+	trace_sched_pi_setprio(p, pi_task);
-+	oldprio = p->prio;
-+	p->prio = prio;
-+	if (task_running(rq, p)){
-+		if (prio > oldprio)
-+			resched_task(p);
-+	} else if (task_queued(p)) {
-+		dequeue_task(rq, p, DEQUEUE_SAVE);
-+		enqueue_task(rq, p, ENQUEUE_RESTORE);
-+		if (prio < oldprio)
-+			try_preempt(p, rq);
-+	}
-+out_unlock:
-+	__task_rq_unlock(rq, NULL);
-+}
-+#else
-+static inline int rt_effective_prio(struct task_struct *p, int prio)
-+{
-+	return prio;
-+}
-+#endif
-+
-+/*
-+ * Adjust the deadline for when the priority is to change, before it's
-+ * changed.
-+ */
-+static inline void adjust_deadline(struct task_struct *p, int new_prio)
-+{
-+	p->deadline += static_deadline_diff(new_prio) - task_deadline_diff(p);
-+}
-+
-+void set_user_nice(struct task_struct *p, long nice)
-+{
-+	int new_static, old_static;
-+	struct rq_flags rf;
-+	struct rq *rq;
-+
-+	if (task_nice(p) == nice || nice < MIN_NICE || nice > MAX_NICE)
-+		return;
-+	new_static = NICE_TO_PRIO(nice);
-+	/*
-+	 * We have to be careful, if called from sys_setpriority(),
-+	 * the task might be in the middle of scheduling on another CPU.
-+	 */
-+	rq = task_rq_lock(p, &rf);
-+	update_rq_clock(rq);
-+
-+	/*
-+	 * The RT priorities are set via sched_setscheduler(), but we still
-+	 * allow the 'normal' nice value to be set - but as expected
-+	 * it wont have any effect on scheduling until the task is
-+	 * not SCHED_NORMAL/SCHED_BATCH:
-+	 */
-+	if (has_rt_policy(p)) {
-+		p->static_prio = new_static;
-+		goto out_unlock;
-+	}
-+
-+	adjust_deadline(p, new_static);
-+	old_static = p->static_prio;
-+	p->static_prio = new_static;
-+	p->prio = effective_prio(p);
-+
-+	if (task_queued(p)) {
-+		dequeue_task(rq, p, DEQUEUE_SAVE);
-+		enqueue_task(rq, p, ENQUEUE_RESTORE);
-+		if (new_static < old_static)
-+			try_preempt(p, rq);
-+	} else if (task_running(rq, p)) {
-+		set_rq_task(rq, p);
-+		if (old_static < new_static)
-+			resched_task(p);
-+	}
-+out_unlock:
-+	task_rq_unlock(rq, p, &rf);
-+}
-+EXPORT_SYMBOL(set_user_nice);
-+
-+/*
-+ * can_nice - check if a task can reduce its nice value
-+ * @p: task
-+ * @nice: nice value
-+ */
-+int can_nice(const struct task_struct *p, const int nice)
-+{
-+	/* Convert nice value [19,-20] to rlimit style value [1,40] */
-+	int nice_rlim = nice_to_rlimit(nice);
-+
-+	return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) ||
-+		capable(CAP_SYS_NICE));
-+}
-+
-+#ifdef __ARCH_WANT_SYS_NICE
-+
-+/*
-+ * sys_nice - change the priority of the current process.
-+ * @increment: priority increment
-+ *
-+ * sys_setpriority is a more generic, but much slower function that
-+ * does similar things.
-+ */
-+SYSCALL_DEFINE1(nice, int, increment)
-+{
-+	long nice, retval;
-+
-+	/*
-+	 * Setpriority might change our priority at the same moment.
-+	 * We don't have to worry. Conceptually one call occurs first
-+	 * and we have a single winner.
-+	 */
-+
-+	increment = clamp(increment, -NICE_WIDTH, NICE_WIDTH);
-+	nice = task_nice(current) + increment;
-+
-+	nice = clamp_val(nice, MIN_NICE, MAX_NICE);
-+	if (increment < 0 && !can_nice(current, nice))
-+		return -EPERM;
-+
-+	retval = security_task_setnice(current, nice);
-+	if (retval)
-+		return retval;
-+
-+	set_user_nice(current, nice);
-+	return 0;
-+}
-+
-+#endif
-+
-+/**
-+ * task_prio - return the priority value of a given task.
-+ * @p: the task in question.
-+ *
-+ * Return: The priority value as seen by users in /proc.
-+ * RT tasks are offset by -100. Normal tasks are centered around 1, value goes
-+ * from 0 (SCHED_ISO) up to 82 (nice +19 SCHED_IDLEPRIO).
-+ */
-+int task_prio(const struct task_struct *p)
-+{
-+	int delta, prio = p->prio - MAX_RT_PRIO;
-+
-+	/* rt tasks and iso tasks */
-+	if (prio <= 0)
-+		goto out;
-+
-+	/* Convert to ms to avoid overflows */
-+	delta = NS_TO_MS(p->deadline - task_rq(p)->niffies);
-+	if (unlikely(delta < 0))
-+		delta = 0;
-+	delta = delta * 40 / ms_longest_deadline_diff();
-+	if (delta <= 80)
-+		prio += delta;
-+	if (idleprio_task(p))
-+		prio += 40;
-+out:
-+	return prio;
-+}
-+
-+/**
-+ * idle_cpu - is a given CPU idle currently?
-+ * @cpu: the processor in question.
-+ *
-+ * Return: 1 if the CPU is currently idle. 0 otherwise.
-+ */
-+int idle_cpu(int cpu)
-+{
-+	return cpu_curr(cpu) == cpu_rq(cpu)->idle;
-+}
-+
-+/**
-+ * available_idle_cpu - is a given CPU idle for enqueuing work.
-+ * @cpu: the CPU in question.
-+ *
-+ * Return: 1 if the CPU is currently idle. 0 otherwise.
-+ */
-+int available_idle_cpu(int cpu)
-+{
-+	if (!idle_cpu(cpu))
-+		return 0;
-+
-+	if (vcpu_is_preempted(cpu))
-+		return 0;
-+
-+	return 1;
-+}
-+
-+/**
-+ * idle_task - return the idle task for a given CPU.
-+ * @cpu: the processor in question.
-+ *
-+ * Return: The idle task for the CPU @cpu.
-+ */
-+struct task_struct *idle_task(int cpu)
-+{
-+	return cpu_rq(cpu)->idle;
-+}
-+
-+/**
-+ * find_process_by_pid - find a process with a matching PID value.
-+ * @pid: the pid in question.
-+ *
-+ * The task of @pid, if found. %NULL otherwise.
-+ */
-+static inline struct task_struct *find_process_by_pid(pid_t pid)
-+{
-+	return pid ? find_task_by_vpid(pid) : current;
-+}
-+
-+/* Actually do priority change: must hold rq lock. */
-+static void __setscheduler(struct task_struct *p, struct rq *rq, int policy,
-+			   int prio, const struct sched_attr *attr,
-+			   bool keep_boost)
-+{
-+	int oldrtprio, oldprio;
-+
-+	/*
-+	 * If params can't change scheduling class changes aren't allowed
-+	 * either.
-+	 */
-+	if (attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)
-+		return;
-+
-+	p->policy = policy;
-+	oldrtprio = p->rt_priority;
-+	p->rt_priority = prio;
-+	p->normal_prio = normal_prio(p);
-+	oldprio = p->prio;
-+	/*
-+	 * Keep a potential priority boosting if called from
-+	 * sched_setscheduler().
-+	 */
-+	p->prio = normal_prio(p);
-+	if (keep_boost)
-+		p->prio = rt_effective_prio(p, p->prio);
-+
-+	if (task_running(rq, p)) {
-+		set_rq_task(rq, p);
-+		resched_task(p);
-+	} else if (task_queued(p)) {
-+		dequeue_task(rq, p, DEQUEUE_SAVE);
-+		enqueue_task(rq, p, ENQUEUE_RESTORE);
-+		if (p->prio < oldprio || p->rt_priority > oldrtprio)
-+			try_preempt(p, rq);
-+	}
-+}
-+
-+/*
-+ * Check the target process has a UID that matches the current process's
-+ */
-+static bool check_same_owner(struct task_struct *p)
-+{
-+	const struct cred *cred = current_cred(), *pcred;
-+	bool match;
-+
-+	rcu_read_lock();
-+	pcred = __task_cred(p);
-+	match = (uid_eq(cred->euid, pcred->euid) ||
-+		 uid_eq(cred->euid, pcred->uid));
-+	rcu_read_unlock();
-+	return match;
-+}
-+
-+static int __sched_setscheduler(struct task_struct *p,
-+				const struct sched_attr *attr,
-+				bool user, bool pi)
-+{
-+	int retval, policy = attr->sched_policy, oldpolicy = -1, priority = attr->sched_priority;
-+	unsigned long rlim_rtprio = 0;
-+	struct rq_flags rf;
-+	int reset_on_fork;
-+	struct rq *rq;
-+
-+	/* The pi code expects interrupts enabled */
-+	BUG_ON(pi && in_interrupt());
-+
-+	if (is_rt_policy(policy) && !capable(CAP_SYS_NICE)) {
-+		unsigned long lflags;
-+
-+		if (!lock_task_sighand(p, &lflags))
-+			return -ESRCH;
-+		rlim_rtprio = task_rlimit(p, RLIMIT_RTPRIO);
-+		unlock_task_sighand(p, &lflags);
-+		if (rlim_rtprio)
-+			goto recheck;
-+		/*
-+		 * If the caller requested an RT policy without having the
-+		 * necessary rights, we downgrade the policy to SCHED_ISO.
-+		 * We also set the parameter to zero to pass the checks.
-+		 */
-+		policy = SCHED_ISO;
-+		priority = 0;
-+	}
-+recheck:
-+	/* Double check policy once rq lock held */
-+	if (policy < 0) {
-+		reset_on_fork = p->sched_reset_on_fork;
-+		policy = oldpolicy = p->policy;
-+	} else {
-+		reset_on_fork = !!(policy & SCHED_RESET_ON_FORK);
-+		policy &= ~SCHED_RESET_ON_FORK;
-+
-+		if (!SCHED_RANGE(policy))
-+			return -EINVAL;
-+	}
-+
-+	if (attr->sched_flags & ~(SCHED_FLAG_ALL | SCHED_FLAG_SUGOV))
-+		return -EINVAL;
-+
-+	/*
-+	 * Valid priorities for SCHED_FIFO and SCHED_RR are
-+	 * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL and
-+	 * SCHED_BATCH is 0.
-+	 */
-+	if (priority < 0 ||
-+	    (p->mm && priority > MAX_USER_RT_PRIO - 1) ||
-+	    (!p->mm && priority > MAX_RT_PRIO - 1))
-+		return -EINVAL;
-+	if (is_rt_policy(policy) != (priority != 0))
-+		return -EINVAL;
-+
-+	/*
-+	 * Allow unprivileged RT tasks to decrease priority:
-+	 */
-+	if (user && !capable(CAP_SYS_NICE)) {
-+		if (is_rt_policy(policy)) {
-+			unsigned long rlim_rtprio =
-+					task_rlimit(p, RLIMIT_RTPRIO);
-+
-+			/* Can't set/change the rt policy */
-+			if (policy != p->policy && !rlim_rtprio)
-+				return -EPERM;
-+
-+			/* Can't increase priority */
-+			if (priority > p->rt_priority &&
-+			    priority > rlim_rtprio)
-+				return -EPERM;
-+		} else {
-+			switch (p->policy) {
-+				/*
-+				 * Can only downgrade policies but not back to
-+				 * SCHED_NORMAL
-+				 */
-+				case SCHED_ISO:
-+					if (policy == SCHED_ISO)
-+						goto out;
-+					if (policy != SCHED_NORMAL)
-+						return -EPERM;
-+					break;
-+				case SCHED_BATCH:
-+					if (policy == SCHED_BATCH)
-+						goto out;
-+					if (policy != SCHED_IDLEPRIO)
-+						return -EPERM;
-+					break;
-+				case SCHED_IDLEPRIO:
-+					if (policy == SCHED_IDLEPRIO)
-+						goto out;
-+					return -EPERM;
-+				default:
-+					break;
-+			}
-+		}
-+
-+		/* Can't change other user's priorities */
-+		if (!check_same_owner(p))
-+			return -EPERM;
-+
-+		/* Normal users shall not reset the sched_reset_on_fork flag: */
-+		if (p->sched_reset_on_fork && !reset_on_fork)
-+			return -EPERM;
-+	}
-+
-+	if (user) {
-+		retval = security_task_setscheduler(p);
-+		if (retval)
-+			return retval;
-+	}
-+
-+	if (pi)
-+		cpuset_read_lock();
-+
-+	/*
-+	 * Make sure no PI-waiters arrive (or leave) while we are
-+	 * changing the priority of the task:
-+	 *
-+	 * To be able to change p->policy safely, the runqueue lock must be
-+	 * held.
-+	 */
-+	rq = task_rq_lock(p, &rf);
-+	update_rq_clock(rq);
-+
-+	/*
-+	 * Changing the policy of the stop threads its a very bad idea:
-+	 */
-+	if (p == rq->stop) {
-+		retval = -EINVAL;
-+		goto unlock;
-+	}
-+
-+	/*
-+	 * If not changing anything there's no need to proceed further:
-+	 */
-+	if (unlikely(policy == p->policy && (!is_rt_policy(policy) ||
-+	    priority == p->rt_priority))) {
-+		retval = 0;
-+		goto unlock;
-+	}
-+
-+	/* Re-check policy now with rq lock held */
-+	if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
-+		policy = oldpolicy = -1;
-+		task_rq_unlock(rq, p, &rf);
-+		if (pi)
-+			cpuset_read_unlock();
-+		goto recheck;
-+	}
-+	p->sched_reset_on_fork = reset_on_fork;
-+
-+	__setscheduler(p, rq, policy, priority, attr, pi);
-+
-+	/* Avoid rq from going away on us: */
-+	preempt_disable();
-+	task_rq_unlock(rq, p, &rf);
-+
-+	if (pi) {
-+		cpuset_read_unlock();
-+		rt_mutex_adjust_pi(p);
-+	}
-+	preempt_enable();
-+out:
-+	return 0;
-+
-+unlock:
-+	task_rq_unlock(rq, p, &rf);
-+	if (pi)
-+		cpuset_read_unlock();
-+	return retval;
-+}
-+
-+static int _sched_setscheduler(struct task_struct *p, int policy,
-+			       const struct sched_param *param, bool check)
-+{
-+	struct sched_attr attr = {
-+		.sched_policy   = policy,
-+		.sched_priority = param->sched_priority,
-+		.sched_nice	= PRIO_TO_NICE(p->static_prio),
-+	};
-+
-+	return __sched_setscheduler(p, &attr, check, true);
-+}
-+/**
-+ * sched_setscheduler - change the scheduling policy and/or RT priority of a thread.
-+ * @p: the task in question.
-+ * @policy: new policy.
-+ * @param: structure containing the new RT priority.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ *
-+ * NOTE that the task may be already dead.
-+ */
-+int sched_setscheduler(struct task_struct *p, int policy,
-+		       const struct sched_param *param)
-+{
-+	return _sched_setscheduler(p, policy, param, true);
-+}
-+
-+EXPORT_SYMBOL_GPL(sched_setscheduler);
-+
-+int sched_setattr(struct task_struct *p, const struct sched_attr *attr)
-+{
-+	return __sched_setscheduler(p, attr, true, true);
-+}
-+EXPORT_SYMBOL_GPL(sched_setattr);
-+
-+int sched_setattr_nocheck(struct task_struct *p, const struct sched_attr *attr)
-+{
-+	return __sched_setscheduler(p, attr, false, true);
-+}
-+
-+/**
-+ * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace.
-+ * @p: the task in question.
-+ * @policy: new policy.
-+ * @param: structure containing the new RT priority.
-+ *
-+ * Just like sched_setscheduler, only don't bother checking if the
-+ * current context has permission.  For example, this is needed in
-+ * stop_machine(): we create temporary high priority worker threads,
-+ * but our caller might not have that capability.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+int sched_setscheduler_nocheck(struct task_struct *p, int policy,
-+			       const struct sched_param *param)
-+{
-+	return _sched_setscheduler(p, policy, param, false);
-+}
-+EXPORT_SYMBOL_GPL(sched_setscheduler_nocheck);
-+
-+static int
-+do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
-+{
-+	struct sched_param lparam;
-+	struct task_struct *p;
-+	int retval;
-+
-+	if (!param || pid < 0)
-+		return -EINVAL;
-+	if (copy_from_user(&lparam, param, sizeof(struct sched_param)))
-+		return -EFAULT;
-+
-+	rcu_read_lock();
-+	retval = -ESRCH;
-+	p = find_process_by_pid(pid);
-+	if (likely(p))
-+		get_task_struct(p);
-+	rcu_read_unlock();
-+
-+	if (likely(p)) {
-+		retval = sched_setscheduler(p, policy, &lparam);
-+		put_task_struct(p);
-+	}
-+
-+	return retval;
-+}
-+
-+/*
-+ * Mimics kernel/events/core.c perf_copy_attr().
-+ */
-+static int sched_copy_attr(struct sched_attr __user *uattr,
-+			   struct sched_attr *attr)
-+{
-+	u32 size;
-+	int ret;
-+
-+	/* Zero the full structure, so that a short copy will be nice: */
-+	memset(attr, 0, sizeof(*attr));
-+
-+	ret = get_user(size, &uattr->size);
-+	if (ret)
-+		return ret;
-+
-+	/* ABI compatibility quirk: */
-+	if (!size)
-+		size = SCHED_ATTR_SIZE_VER0;
-+
-+	if (size < SCHED_ATTR_SIZE_VER0 || size > PAGE_SIZE)
-+		goto err_size;
-+
-+	ret = copy_struct_from_user(attr, sizeof(*attr), uattr, size);
-+	if (ret) {
-+		if (ret == -E2BIG)
-+			goto err_size;
-+		return ret;
-+	}
-+
-+	/*
-+	 * XXX: Do we want to be lenient like existing syscalls; or do we want
-+	 * to be strict and return an error on out-of-bounds values?
-+	 */
-+	attr->sched_nice = clamp(attr->sched_nice, -20, 19);
-+
-+	/* sched/core.c uses zero here but we already know ret is zero */
-+	return 0;
-+
-+err_size:
-+	put_user(sizeof(*attr), &uattr->size);
-+	return -E2BIG;
-+}
-+
-+/*
-+ * sched_setparam() passes in -1 for its policy, to let the functions
-+ * it calls know not to change it.
-+ */
-+#define SETPARAM_POLICY	-1
-+
-+/**
-+ * sys_sched_setscheduler - set/change the scheduler policy and RT priority
-+ * @pid: the pid in question.
-+ * @policy: new policy.
-+ * @param: structure containing the new RT priority.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy, struct sched_param __user *, param)
-+{
-+	if (policy < 0)
-+		return -EINVAL;
-+
-+	return do_sched_setscheduler(pid, policy, param);
-+}
-+
-+/**
-+ * sys_sched_setparam - set/change the RT priority of a thread
-+ * @pid: the pid in question.
-+ * @param: structure containing the new RT priority.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
-+{
-+	return do_sched_setscheduler(pid, SETPARAM_POLICY, param);
-+}
-+
-+/**
-+ * sys_sched_setattr - same as above, but with extended sched_attr
-+ * @pid: the pid in question.
-+ * @uattr: structure containing the extended parameters.
-+ */
-+SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr,
-+			       unsigned int, flags)
-+{
-+	struct sched_attr attr;
-+	struct task_struct *p;
-+	int retval;
-+
-+	if (!uattr || pid < 0 || flags)
-+		return -EINVAL;
-+
-+	retval = sched_copy_attr(uattr, &attr);
-+	if (retval)
-+		return retval;
-+
-+	if ((int)attr.sched_policy < 0)
-+		return -EINVAL;
-+	if (attr.sched_flags & SCHED_FLAG_KEEP_POLICY)
-+		attr.sched_policy = SETPARAM_POLICY;
-+
-+	rcu_read_lock();
-+	retval = -ESRCH;
-+	p = find_process_by_pid(pid);
-+	if (likely(p))
-+		get_task_struct(p);
-+	rcu_read_unlock();
-+
-+	if (likely(p)) {
-+		retval = sched_setattr(p, &attr);
-+		put_task_struct(p);
-+	}
-+
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_getscheduler - get the policy (scheduling class) of a thread
-+ * @pid: the pid in question.
-+ *
-+ * Return: On success, the policy of the thread. Otherwise, a negative error
-+ * code.
-+ */
-+SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
-+{
-+	struct task_struct *p;
-+	int retval = -EINVAL;
-+
-+	if (pid < 0)
-+		goto out_nounlock;
-+
-+	retval = -ESRCH;
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	if (p) {
-+		retval = security_task_getscheduler(p);
-+		if (!retval)
-+			retval = p->policy;
-+	}
-+	rcu_read_unlock();
-+
-+out_nounlock:
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_getscheduler - get the RT priority of a thread
-+ * @pid: the pid in question.
-+ * @param: structure containing the RT priority.
-+ *
-+ * Return: On success, 0 and the RT priority is in @param. Otherwise, an error
-+ * code.
-+ */
-+SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
-+{
-+	struct sched_param lp = { .sched_priority = 0 };
-+	struct task_struct *p;
-+	int retval = -EINVAL;
-+
-+	if (!param || pid < 0)
-+		goto out_nounlock;
-+
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	retval = -ESRCH;
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	if (has_rt_policy(p))
-+		lp.sched_priority = p->rt_priority;
-+	rcu_read_unlock();
-+
-+	/*
-+	 * This one might sleep, we cannot do it with a spinlock held ...
-+	 */
-+	retval = copy_to_user(param, &lp, sizeof(*param)) ? -EFAULT : 0;
-+
-+out_nounlock:
-+	return retval;
-+
-+out_unlock:
-+	rcu_read_unlock();
-+	return retval;
-+}
-+
-+/*
-+ * Copy the kernel size attribute structure (which might be larger
-+ * than what user-space knows about) to user-space.
-+ *
-+ * Note that all cases are valid: user-space buffer can be larger or
-+ * smaller than the kernel-space buffer. The usual case is that both
-+ * have the same size.
-+ */
-+static int
-+sched_attr_copy_to_user(struct sched_attr __user *uattr,
-+			struct sched_attr *kattr,
-+			unsigned int usize)
-+{
-+	unsigned int ksize = sizeof(*kattr);
-+
-+	if (!access_ok(uattr, usize))
-+		return -EFAULT;
-+
-+	/*
-+	 * sched_getattr() ABI forwards and backwards compatibility:
-+	 *
-+	 * If usize == ksize then we just copy everything to user-space and all is good.
-+	 *
-+	 * If usize < ksize then we only copy as much as user-space has space for,
-+	 * this keeps ABI compatibility as well. We skip the rest.
-+	 *
-+	 * If usize > ksize then user-space is using a newer version of the ABI,
-+	 * which part the kernel doesn't know about. Just ignore it - tooling can
-+	 * detect the kernel's knowledge of attributes from the attr->size value
-+	 * which is set to ksize in this case.
-+	 */
-+	kattr->size = min(usize, ksize);
-+
-+	if (copy_to_user(uattr, kattr, kattr->size))
-+		return -EFAULT;
-+
-+	return 0;
-+}
-+
-+/**
-+ * sys_sched_getattr - similar to sched_getparam, but with sched_attr
-+ * @pid: the pid in question.
-+ * @uattr: structure containing the extended parameters.
-+ * @usize: sizeof(attr) for fwd/bwd comp.
-+ * @flags: for future extension.
-+ */
-+SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
-+		unsigned int, usize, unsigned int, flags)
-+{
-+	struct sched_attr kattr = { };
-+	struct task_struct *p;
-+	int retval;
-+
-+	if (!uattr || pid < 0 || usize > PAGE_SIZE ||
-+	    usize < SCHED_ATTR_SIZE_VER0 || flags)
-+		return -EINVAL;
-+
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	retval = -ESRCH;
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	kattr.sched_policy = p->policy;
-+	if (rt_task(p))
-+		kattr.sched_priority = p->rt_priority;
-+	else
-+		kattr.sched_nice = task_nice(p);
-+
-+	rcu_read_unlock();
-+
-+	return sched_attr_copy_to_user(uattr, &kattr, usize);
-+
-+out_unlock:
-+	rcu_read_unlock();
-+	return retval;
-+}
-+
-+long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
-+{
-+	cpumask_var_t cpus_allowed, new_mask;
-+	struct task_struct *p;
-+	int retval;
-+
-+	rcu_read_lock();
-+
-+	p = find_process_by_pid(pid);
-+	if (!p) {
-+		rcu_read_unlock();
-+		return -ESRCH;
-+	}
-+
-+	/* Prevent p going away */
-+	get_task_struct(p);
-+	rcu_read_unlock();
-+
-+	if (p->flags & PF_NO_SETAFFINITY) {
-+		retval = -EINVAL;
-+		goto out_put_task;
-+	}
-+	if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
-+		retval = -ENOMEM;
-+		goto out_put_task;
-+	}
-+	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) {
-+		retval = -ENOMEM;
-+		goto out_free_cpus_allowed;
-+	}
-+	retval = -EPERM;
-+	if (!check_same_owner(p)) {
-+		rcu_read_lock();
-+		if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
-+			rcu_read_unlock();
-+			goto out_unlock;
-+		}
-+		rcu_read_unlock();
-+	}
-+
-+	retval = security_task_setscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	cpuset_cpus_allowed(p, cpus_allowed);
-+	cpumask_and(new_mask, in_mask, cpus_allowed);
-+again:
-+	retval = __set_cpus_allowed_ptr(p, new_mask, true);
-+
-+	if (!retval) {
-+		cpuset_cpus_allowed(p, cpus_allowed);
-+		if (!cpumask_subset(new_mask, cpus_allowed)) {
-+			/*
-+			 * We must have raced with a concurrent cpuset
-+			 * update. Just reset the cpus_allowed to the
-+			 * cpuset's cpus_allowed
-+			 */
-+			cpumask_copy(new_mask, cpus_allowed);
-+			goto again;
-+		}
-+	}
-+out_unlock:
-+	free_cpumask_var(new_mask);
-+out_free_cpus_allowed:
-+	free_cpumask_var(cpus_allowed);
-+out_put_task:
-+	put_task_struct(p);
-+	return retval;
-+}
-+
-+static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
-+			     cpumask_t *new_mask)
-+{
-+	if (len < cpumask_size())
-+		cpumask_clear(new_mask);
-+	else if (len > cpumask_size())
-+		len = cpumask_size();
-+
-+	return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0;
-+}
-+
-+
-+/**
-+ * sys_sched_setaffinity - set the CPU affinity of a process
-+ * @pid: pid of the process
-+ * @len: length in bytes of the bitmask pointed to by user_mask_ptr
-+ * @user_mask_ptr: user-space pointer to the new CPU mask
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
-+		unsigned long __user *, user_mask_ptr)
-+{
-+	cpumask_var_t new_mask;
-+	int retval;
-+
-+	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
-+		return -ENOMEM;
-+
-+	retval = get_user_cpu_mask(user_mask_ptr, len, new_mask);
-+	if (retval == 0)
-+		retval = sched_setaffinity(pid, new_mask);
-+	free_cpumask_var(new_mask);
-+	return retval;
-+}
-+
-+long sched_getaffinity(pid_t pid, cpumask_t *mask)
-+{
-+	struct task_struct *p;
-+	unsigned long flags;
-+	int retval;
-+
-+	get_online_cpus();
-+	rcu_read_lock();
-+
-+	retval = -ESRCH;
-+	p = find_process_by_pid(pid);
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	cpumask_and(mask, &p->cpus_mask, cpu_active_mask);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+
-+out_unlock:
-+	rcu_read_unlock();
-+	put_online_cpus();
-+
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_getaffinity - get the CPU affinity of a process
-+ * @pid: pid of the process
-+ * @len: length in bytes of the bitmask pointed to by user_mask_ptr
-+ * @user_mask_ptr: user-space pointer to hold the current CPU mask
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
-+		unsigned long __user *, user_mask_ptr)
-+{
-+	int ret;
-+	cpumask_var_t mask;
-+
-+	if ((len * BITS_PER_BYTE) < nr_cpu_ids)
-+		return -EINVAL;
-+	if (len & (sizeof(unsigned long)-1))
-+		return -EINVAL;
-+
-+	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
-+		return -ENOMEM;
-+
-+	ret = sched_getaffinity(pid, mask);
-+	if (ret == 0) {
-+		unsigned int retlen = min(len, cpumask_size());
-+
-+		if (copy_to_user(user_mask_ptr, mask, retlen))
-+			ret = -EFAULT;
-+		else
-+			ret = retlen;
-+	}
-+	free_cpumask_var(mask);
-+
-+	return ret;
-+}
-+
-+/**
-+ * sys_sched_yield - yield the current processor to other threads.
-+ *
-+ * This function yields the current CPU to other tasks. It does this by
-+ * scheduling away the current task. If it still has the earliest deadline
-+ * it will be scheduled again as the next task.
-+ *
-+ * Return: 0.
-+ */
-+static void do_sched_yield(void)
-+{
-+	struct rq *rq;
-+
-+	if (!sched_yield_type)
-+		return;
-+
-+	local_irq_disable();
-+	rq = this_rq();
-+	rq_lock(rq);
-+
-+	if (sched_yield_type > 1)
-+		time_slice_expired(current, rq);
-+	schedstat_inc(rq->yld_count);
-+
-+	/*
-+	 * Since we are going to call schedule() anyway, there's
-+	 * no need to preempt or enable interrupts:
-+	 */
-+	preempt_disable();
-+	rq_unlock(rq);
-+	sched_preempt_enable_no_resched();
-+
-+	schedule();
-+}
-+
-+SYSCALL_DEFINE0(sched_yield)
-+{
-+	do_sched_yield();
-+	return 0;
-+}
-+
-+#ifndef CONFIG_PREEMPTION
-+int __sched _cond_resched(void)
-+{
-+	if (should_resched(0)) {
-+		preempt_schedule_common();
-+		return 1;
-+	}
-+	rcu_all_qs();
-+	return 0;
-+}
-+EXPORT_SYMBOL(_cond_resched);
-+#endif
-+
-+/*
-+ * __cond_resched_lock() - if a reschedule is pending, drop the given lock,
-+ * call schedule, and on return reacquire the lock.
-+ *
-+ * This works OK both with and without CONFIG_PREEMPTION.  We do strange low-level
-+ * operations here to prevent schedule() from being called twice (once via
-+ * spin_unlock(), once by hand).
-+ */
-+int __cond_resched_lock(spinlock_t *lock)
-+{
-+	int resched = should_resched(PREEMPT_LOCK_OFFSET);
-+	int ret = 0;
-+
-+	lockdep_assert_held(lock);
-+
-+	if (spin_needbreak(lock) || resched) {
-+		spin_unlock(lock);
-+		if (resched)
-+			preempt_schedule_common();
-+		else
-+			cpu_relax();
-+		ret = 1;
-+		spin_lock(lock);
-+	}
-+	return ret;
-+}
-+EXPORT_SYMBOL(__cond_resched_lock);
-+
-+/**
-+ * yield - yield the current processor to other threads.
-+ *
-+ * Do not ever use this function, there's a 99% chance you're doing it wrong.
-+ *
-+ * The scheduler is at all times free to pick the calling task as the most
-+ * eligible task to run, if removing the yield() call from your code breaks
-+ * it, its already broken.
-+ *
-+ * Typical broken usage is:
-+ *
-+ * while (!event)
-+ *	yield();
-+ *
-+ * where one assumes that yield() will let 'the other' process run that will
-+ * make event true. If the current task is a SCHED_FIFO task that will never
-+ * happen. Never use yield() as a progress guarantee!!
-+ *
-+ * If you want to use yield() to wait for something, use wait_event().
-+ * If you want to use yield() to be 'nice' for others, use cond_resched().
-+ * If you still want to use yield(), do not!
-+ */
-+void __sched yield(void)
-+{
-+	set_current_state(TASK_RUNNING);
-+	do_sched_yield();
-+}
-+EXPORT_SYMBOL(yield);
-+
-+/**
-+ * yield_to - yield the current processor to another thread in
-+ * your thread group, or accelerate that thread toward the
-+ * processor it's on.
-+ * @p: target task
-+ * @preempt: whether task preemption is allowed or not
-+ *
-+ * It's the caller's job to ensure that the target task struct
-+ * can't go away on us before we can do any checks.
-+ *
-+ * Return:
-+ *	true (>0) if we indeed boosted the target task.
-+ *	false (0) if we failed to boost the target.
-+ *	-ESRCH if there's no task to yield to.
-+ */
-+int __sched yield_to(struct task_struct *p, bool preempt)
-+{
-+	struct task_struct *rq_p;
-+	struct rq *rq, *p_rq;
-+	unsigned long flags;
-+	int yielded = 0;
-+
-+	local_irq_save(flags);
-+	rq = this_rq();
-+
-+again:
-+	p_rq = task_rq(p);
-+	/*
-+	 * If we're the only runnable task on the rq and target rq also
-+	 * has only one task, there's absolutely no point in yielding.
-+	 */
-+	if (task_running(p_rq, p) || p->state) {
-+		yielded = -ESRCH;
-+		goto out_irq;
-+	}
-+
-+	double_rq_lock(rq, p_rq);
-+	if (unlikely(task_rq(p) != p_rq)) {
-+		double_rq_unlock(rq, p_rq);
-+		goto again;
-+	}
-+
-+	yielded = 1;
-+	schedstat_inc(rq->yld_count);
-+	rq_p = rq->curr;
-+	if (p->deadline > rq_p->deadline)
-+		p->deadline = rq_p->deadline;
-+	p->time_slice += rq_p->time_slice;
-+	if (p->time_slice > timeslice())
-+		p->time_slice = timeslice();
-+	time_slice_expired(rq_p, rq);
-+	if (preempt && rq != p_rq)
-+		resched_task(p_rq->curr);
-+	double_rq_unlock(rq, p_rq);
-+out_irq:
-+	local_irq_restore(flags);
-+
-+	if (yielded > 0)
-+		schedule();
-+	return yielded;
-+}
-+EXPORT_SYMBOL_GPL(yield_to);
-+
-+int io_schedule_prepare(void)
-+{
-+	int old_iowait = current->in_iowait;
-+
-+	current->in_iowait = 1;
-+	blk_schedule_flush_plug(current);
-+
-+	return old_iowait;
-+}
-+
-+void io_schedule_finish(int token)
-+{
-+	current->in_iowait = token;
-+}
-+
-+/*
-+ * This task is about to go to sleep on IO.  Increment rq->nr_iowait so
-+ * that process accounting knows that this is a task in IO wait state.
-+ *
-+ * But don't do that if it is a deliberate, throttling IO wait (this task
-+ * has set its backing_dev_info: the queue against which it should throttle)
-+ */
-+
-+long __sched io_schedule_timeout(long timeout)
-+{
-+	int token;
-+	long ret;
-+
-+	token = io_schedule_prepare();
-+	ret = schedule_timeout(timeout);
-+	io_schedule_finish(token);
-+
-+	return ret;
-+}
-+EXPORT_SYMBOL(io_schedule_timeout);
-+
-+void __sched io_schedule(void)
-+{
-+	int token;
-+
-+	token = io_schedule_prepare();
-+	schedule();
-+	io_schedule_finish(token);
-+}
-+EXPORT_SYMBOL(io_schedule);
-+
-+/**
-+ * sys_sched_get_priority_max - return maximum RT priority.
-+ * @policy: scheduling class.
-+ *
-+ * Return: On success, this syscall returns the maximum
-+ * rt_priority that can be used by a given scheduling class.
-+ * On failure, a negative error code is returned.
-+ */
-+SYSCALL_DEFINE1(sched_get_priority_max, int, policy)
-+{
-+	int ret = -EINVAL;
-+
-+	switch (policy) {
-+	case SCHED_FIFO:
-+	case SCHED_RR:
-+		ret = MAX_USER_RT_PRIO-1;
-+		break;
-+	case SCHED_NORMAL:
-+	case SCHED_BATCH:
-+	case SCHED_ISO:
-+	case SCHED_IDLEPRIO:
-+		ret = 0;
-+		break;
-+	}
-+	return ret;
-+}
-+
-+/**
-+ * sys_sched_get_priority_min - return minimum RT priority.
-+ * @policy: scheduling class.
-+ *
-+ * Return: On success, this syscall returns the minimum
-+ * rt_priority that can be used by a given scheduling class.
-+ * On failure, a negative error code is returned.
-+ */
-+SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
-+{
-+	int ret = -EINVAL;
-+
-+	switch (policy) {
-+	case SCHED_FIFO:
-+	case SCHED_RR:
-+		ret = 1;
-+		break;
-+	case SCHED_NORMAL:
-+	case SCHED_BATCH:
-+	case SCHED_ISO:
-+	case SCHED_IDLEPRIO:
-+		ret = 0;
-+		break;
-+	}
-+	return ret;
-+}
-+
-+static int sched_rr_get_interval(pid_t pid, struct timespec64 *t)
-+{
-+	struct task_struct *p;
-+	unsigned int time_slice;
-+	struct rq_flags rf;
-+	struct rq *rq;
-+	int retval;
-+
-+	if (pid < 0)
-+		return -EINVAL;
-+
-+	retval = -ESRCH;
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	rq = task_rq_lock(p, &rf);
-+	time_slice = p->policy == SCHED_FIFO ? 0 : MS_TO_NS(task_timeslice(p));
-+	task_rq_unlock(rq, p, &rf);
-+
-+	rcu_read_unlock();
-+	*t = ns_to_timespec64(time_slice);
-+	return 0;
-+
-+out_unlock:
-+	rcu_read_unlock();
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_rr_get_interval - return the default timeslice of a process.
-+ * @pid: pid of the process.
-+ * @interval: userspace pointer to the timeslice value.
-+ *
-+ * this syscall writes the default timeslice value of a given process
-+ * into the user-space timespec buffer. A value of '0' means infinity.
-+ *
-+ * Return: On success, 0 and the timeslice is in @interval. Otherwise,
-+ * an error code.
-+ */
-+SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
-+		struct __kernel_timespec __user *, interval)
-+{
-+	struct timespec64 t;
-+	int retval = sched_rr_get_interval(pid, &t);
-+
-+	if (retval == 0)
-+		retval = put_timespec64(&t, interval);
-+
-+	return retval;
-+}
-+
-+#ifdef CONFIG_COMPAT_32BIT_TIME
-+SYSCALL_DEFINE2(sched_rr_get_interval_time32, pid_t, pid,
-+		struct old_timespec32 __user *, interval)
-+{
-+	struct timespec64 t;
-+	int retval = sched_rr_get_interval(pid, &t);
-+
-+	if (retval == 0)
-+		retval = put_old_timespec32(&t, interval);
-+	return retval;
-+}
-+#endif
-+
-+void sched_show_task(struct task_struct *p)
-+{
-+	unsigned long free = 0;
-+	int ppid;
-+
-+	if (!try_get_task_stack(p))
-+		return;
-+
-+	printk(KERN_INFO "%-15.15s %c", p->comm, task_state_to_char(p));
-+
-+	if (p->state == TASK_RUNNING)
-+		printk(KERN_CONT "  running task    ");
-+#ifdef CONFIG_DEBUG_STACK_USAGE
-+	free = stack_not_used(p);
-+#endif
-+	ppid = 0;
-+	rcu_read_lock();
-+	if (pid_alive(p))
-+		ppid = task_pid_nr(rcu_dereference(p->real_parent));
-+	rcu_read_unlock();
-+	printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free,
-+		task_pid_nr(p), ppid,
-+		(unsigned long)task_thread_info(p)->flags);
-+
-+	print_worker_info(KERN_INFO, p);
-+	show_stack(p, NULL);
-+	put_task_stack(p);
-+}
-+EXPORT_SYMBOL_GPL(sched_show_task);
-+
-+static inline bool
-+state_filter_match(unsigned long state_filter, struct task_struct *p)
-+{
-+	/* no filter, everything matches */
-+	if (!state_filter)
-+		return true;
-+
-+	/* filter, but doesn't match */
-+	if (!(p->state & state_filter))
-+		return false;
-+
-+	/*
-+	 * When looking for TASK_UNINTERRUPTIBLE skip TASK_IDLE (allows
-+	 * TASK_KILLABLE).
-+	 */
-+	if (state_filter == TASK_UNINTERRUPTIBLE && p->state == TASK_IDLE)
-+		return false;
-+
-+	return true;
-+}
-+
-+void show_state_filter(unsigned long state_filter)
-+{
-+	struct task_struct *g, *p;
-+
-+#if BITS_PER_LONG == 32
-+	printk(KERN_INFO
-+		"  task                PC stack   pid father\n");
-+#else
-+	printk(KERN_INFO
-+		"  task                        PC stack   pid father\n");
-+#endif
-+	rcu_read_lock();
-+	for_each_process_thread(g, p) {
-+		/*
-+		 * reset the NMI-timeout, listing all files on a slow
-+		 * console might take a lot of time:
-+		 * Also, reset softlockup watchdogs on all CPUs, because
-+		 * another CPU might be blocked waiting for us to process
-+		 * an IPI.
-+		 */
-+		touch_nmi_watchdog();
-+		touch_all_softlockup_watchdogs();
-+		if (state_filter_match(state_filter, p))
-+			sched_show_task(p);
-+	}
-+
-+	rcu_read_unlock();
-+	/*
-+	 * Only show locks if all tasks are dumped:
-+	 */
-+	if (!state_filter)
-+		debug_show_all_locks();
-+}
-+
-+void dump_cpu_task(int cpu)
-+{
-+	pr_info("Task dump for CPU %d:\n", cpu);
-+	sched_show_task(cpu_curr(cpu));
-+}
-+
-+#ifdef CONFIG_SMP
-+void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	cpumask_copy(&p->cpus_mask, new_mask);
-+	p->nr_cpus_allowed = cpumask_weight(new_mask);
-+}
-+
-+void __do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	struct rq *rq = task_rq(p);
-+
-+	lockdep_assert_held(&p->pi_lock);
-+
-+	cpumask_copy(&p->cpus_mask, new_mask);
-+
-+	if (task_queued(p)) {
-+		/*
-+		 * Because __kthread_bind() calls this on blocked tasks without
-+		 * holding rq->lock.
-+		 */
-+		lockdep_assert_held(rq->lock);
-+	}
-+}
-+
-+/*
-+ * Calling do_set_cpus_allowed from outside the scheduler code should not be
-+ * called on a running or queued task. We should be holding pi_lock.
-+ */
-+void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	__do_set_cpus_allowed(p, new_mask);
-+	if (needs_other_cpu(p, task_cpu(p))) {
-+		struct rq *rq;
-+
-+		rq = __task_rq_lock(p, NULL);
-+		set_task_cpu(p, valid_task_cpu(p));
-+		resched_task(p);
-+		__task_rq_unlock(rq, NULL);
-+	}
-+}
-+#endif
-+
-+/**
-+ * init_idle - set up an idle thread for a given CPU
-+ * @idle: task in question
-+ * @cpu: cpu the idle task belongs to
-+ *
-+ * NOTE: this function does not set the idle thread's NEED_RESCHED
-+ * flag, to make booting more robust.
-+ */
-+void init_idle(struct task_struct *idle, int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	raw_spin_lock_irqsave(&idle->pi_lock, flags);
-+	raw_spin_lock(rq->lock);
-+	idle->last_ran = rq->niffies;
-+	time_slice_expired(idle, rq);
-+	idle->state = TASK_RUNNING;
-+	/* Setting prio to illegal value shouldn't matter when never queued */
-+	idle->prio = PRIO_LIMIT;
-+	idle->flags |= PF_IDLE;
-+
-+	kasan_unpoison_task_stack(idle);
-+
-+#ifdef CONFIG_SMP
-+	/*
-+	 * It's possible that init_idle() gets called multiple times on a task,
-+	 * in that case do_set_cpus_allowed() will not do the right thing.
-+	 *
-+	 * And since this is boot we can forgo the serialisation.
-+	 */
-+	set_cpus_allowed_common(idle, cpumask_of(cpu));
-+#ifdef CONFIG_SMT_NICE
-+	idle->smt_bias = 0;
-+#endif
-+#endif
-+	set_rq_task(rq, idle);
-+
-+	/* Silence PROVE_RCU */
-+	rcu_read_lock();
-+	set_task_cpu(idle, cpu);
-+	rcu_read_unlock();
-+
-+	rq->idle = idle;
-+	rcu_assign_pointer(rq->curr, idle);
-+	idle->on_rq = TASK_ON_RQ_QUEUED;
-+	raw_spin_unlock(rq->lock);
-+	raw_spin_unlock_irqrestore(&idle->pi_lock, flags);
-+
-+	/* Set the preempt count _outside_ the spinlocks! */
-+	init_idle_preempt_count(idle, cpu);
-+
-+	ftrace_graph_init_idle_task(idle, cpu);
-+	vtime_init_idle(idle, cpu);
-+#ifdef CONFIG_SMP
-+	sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
-+#endif
-+}
-+
-+int cpuset_cpumask_can_shrink(const struct cpumask __maybe_unused *cur,
-+			      const struct cpumask __maybe_unused *trial)
-+{
-+	return 1;
-+}
-+
-+int task_can_attach(struct task_struct *p,
-+		    const struct cpumask *cs_cpus_allowed)
-+{
-+	int ret = 0;
-+
-+	/*
-+	 * Kthreads which disallow setaffinity shouldn't be moved
-+	 * to a new cpuset; we don't want to change their CPU
-+	 * affinity and isolating such threads by their set of
-+	 * allowed nodes is unnecessary.  Thus, cpusets are not
-+	 * applicable for such threads.  This prevents checking for
-+	 * success of set_cpus_allowed_ptr() on all attached tasks
-+	 * before cpus_mask may be changed.
-+	 */
-+	if (p->flags & PF_NO_SETAFFINITY)
-+		ret = -EINVAL;
-+
-+	return ret;
-+}
-+
-+void resched_cpu(int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	struct rq_flags rf;
-+
-+	rq_lock_irqsave(rq, &rf);
-+	if (cpu_online(cpu) || cpu == smp_processor_id())
-+		resched_curr(rq);
-+	rq_unlock_irqrestore(rq, &rf);
-+}
-+
-+#ifdef CONFIG_SMP
-+#ifdef CONFIG_NO_HZ_COMMON
-+void select_nohz_load_balancer(int stop_tick)
-+{
-+}
-+
-+void set_cpu_sd_state_idle(void) {}
-+void nohz_balance_enter_idle(int cpu) {}
-+
-+/*
-+ * In the semi idle case, use the nearest busy CPU for migrating timers
-+ * from an idle CPU.  This is good for power-savings.
-+ *
-+ * We don't do similar optimization for completely idle system, as
-+ * selecting an idle CPU will add more delays to the timers than intended
-+ * (as that CPU's timer base may not be uptodate wrt jiffies etc).
-+ */
-+int get_nohz_timer_target(void)
-+{
-+	int i, cpu = smp_processor_id(), default_cpu = -1;
-+	struct sched_domain *sd;
-+
-+	if (housekeeping_cpu(cpu, HK_FLAG_TIMER)) {
-+		if (!idle_cpu(cpu))
-+			return cpu;
-+		default_cpu = cpu;
-+	}
-+
-+	rcu_read_lock();
-+	for_each_domain(cpu, sd) {
-+		for_each_cpu_and(i, sched_domain_span(sd),
-+			housekeeping_cpumask(HK_FLAG_TIMER)) {
-+			if (cpu == i)
-+				continue;
-+
-+			if (!idle_cpu(i)) {
-+				cpu = i;
-+				goto unlock;
-+			}
-+		}
-+	}
-+
-+	if (default_cpu == -1)
-+		default_cpu = housekeeping_any_cpu(HK_FLAG_TIMER);
-+	cpu = default_cpu;
-+unlock:
-+	rcu_read_unlock();
-+	return cpu;
-+}
-+
-+/*
-+ * When add_timer_on() enqueues a timer into the timer wheel of an
-+ * idle CPU then this timer might expire before the next timer event
-+ * which is scheduled to wake up that CPU. In case of a completely
-+ * idle system the next event might even be infinite time into the
-+ * future. wake_up_idle_cpu() ensures that the CPU is woken up and
-+ * leaves the inner idle loop so the newly added timer is taken into
-+ * account when the CPU goes back to idle and evaluates the timer
-+ * wheel for the next timer event.
-+ */
-+void wake_up_idle_cpu(int cpu)
-+{
-+	if (cpu == smp_processor_id())
-+		return;
-+
-+	if (set_nr_and_not_polling(cpu_rq(cpu)->idle))
-+		smp_sched_reschedule(cpu);
-+	else
-+		trace_sched_wake_idle_without_ipi(cpu);
-+}
-+
-+static bool wake_up_full_nohz_cpu(int cpu)
-+{
-+	/*
-+	 * We just need the target to call irq_exit() and re-evaluate
-+	 * the next tick. The nohz full kick at least implies that.
-+	 * If needed we can still optimize that later with an
-+	 * empty IRQ.
-+	 */
-+	if (cpu_is_offline(cpu))
-+		return true;  /* Don't try to wake offline CPUs. */
-+	if (tick_nohz_full_cpu(cpu)) {
-+		if (cpu != smp_processor_id() ||
-+		    tick_nohz_tick_stopped())
-+			tick_nohz_full_kick_cpu(cpu);
-+		return true;
-+	}
-+
-+	return false;
-+}
-+
-+/*
-+ * Wake up the specified CPU.  If the CPU is going offline, it is the
-+ * caller's responsibility to deal with the lost wakeup, for example,
-+ * by hooking into the CPU_DEAD notifier like timers and hrtimers do.
-+ */
-+void wake_up_nohz_cpu(int cpu)
-+{
-+	if (!wake_up_full_nohz_cpu(cpu))
-+		wake_up_idle_cpu(cpu);
-+}
-+#endif /* CONFIG_NO_HZ_COMMON */
-+
-+/*
-+ * Change a given task's CPU affinity. Migrate the thread to a
-+ * proper CPU and schedule it away if the CPU it's executing on
-+ * is removed from the allowed bitmask.
-+ *
-+ * NOTE: the caller must have a valid reference to the task, the
-+ * task must not exit() & deallocate itself prematurely. The
-+ * call is not atomic; no spinlocks may be held.
-+ */
-+static int __set_cpus_allowed_ptr(struct task_struct *p,
-+				  const struct cpumask *new_mask, bool check)
-+{
-+	const struct cpumask *cpu_valid_mask = cpu_active_mask;
-+	bool queued = false, running_wrong = false, kthread;
-+	unsigned int dest_cpu;
-+	struct rq_flags rf;
-+	struct rq *rq;
-+	int ret = 0;
-+
-+	rq = task_rq_lock(p, &rf);
-+	update_rq_clock(rq);
-+
-+	kthread = !!(p->flags & PF_KTHREAD);
-+	if (kthread) {
-+		/*
-+		 * Kernel threads are allowed on online && !active CPUs
-+		 */
-+		cpu_valid_mask = cpu_online_mask;
-+	}
-+
-+	/*
-+	 * Must re-check here, to close a race against __kthread_bind(),
-+	 * sched_setaffinity() is not guaranteed to observe the flag.
-+	 */
-+	if (check && (p->flags & PF_NO_SETAFFINITY)) {
-+		ret = -EINVAL;
-+		goto out;
-+	}
-+
-+	if (cpumask_equal(p->cpus_ptr, new_mask))
-+		goto out;
-+
-+	/*
-+	 * Picking a ~random cpu helps in cases where we are changing affinity
-+	 * for groups of tasks (ie. cpuset), so that load balancing is not
-+	 * immediately required to distribute the tasks within their new mask.
-+	 */
-+	dest_cpu = cpumask_any_and_distribute(cpu_valid_mask, new_mask);
-+	if (dest_cpu >= nr_cpu_ids) {
-+		ret = -EINVAL;
-+		goto out;
-+	}
-+
-+	queued = task_queued(p);
-+	__do_set_cpus_allowed(p, new_mask);
-+
-+	if (kthread) {
-+		/*
-+		 * For kernel threads that do indeed end up on online &&
-+		 * !active we want to ensure they are strict per-CPU threads.
-+		 */
-+		WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) &&
-+			!cpumask_intersects(new_mask, cpu_active_mask) &&
-+			p->nr_cpus_allowed != 1);
-+	}
-+
-+	/* Can the task run on the task's current CPU? If so, we're done */
-+	if (cpumask_test_cpu(task_cpu(p), new_mask))
-+		goto out;
-+
-+	if (task_running(rq, p)) {
-+		/* Task is running on the wrong cpu now, reschedule it. */
-+		if (rq == this_rq()) {
-+			set_task_cpu(p, dest_cpu);
-+			set_tsk_need_resched(p);
-+			running_wrong = true;
-+		} else
-+			resched_task(p);
-+	} else {
-+		if (queued) {
-+			/*
-+			 * Switch runqueue locks after dequeueing the task
-+			 * here while still holding the pi_lock to be holding
-+			 * the correct lock for enqueueing.
-+			 */
-+			dequeue_task(rq, p, 0);
-+			rq_unlock(rq);
-+
-+			rq = cpu_rq(dest_cpu);
-+			rq_lock(rq);
-+		}
-+		set_task_cpu(p, dest_cpu);
-+		if (queued)
-+			enqueue_task(rq, p, 0);
-+	}
-+	if (queued)
-+		try_preempt(p, rq);
-+	if (running_wrong)
-+		preempt_disable();
-+out:
-+	task_rq_unlock(rq, p, &rf);
-+
-+	if (running_wrong) {
-+		__schedule(true);
-+		preempt_enable();
-+	}
-+
-+	return ret;
-+}
-+
-+int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	return __set_cpus_allowed_ptr(p, new_mask, false);
-+}
-+EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
-+
-+#ifdef CONFIG_HOTPLUG_CPU
-+/*
-+ * Run through task list and find tasks affined to the dead cpu, then remove
-+ * that cpu from the list, enable cpu0 and set the zerobound flag. Must hold
-+ * cpu 0 and src_cpu's runqueue locks. We should be holding both rq lock and
-+ * pi_lock to change cpus_mask but it's not going to matter here.
-+ */
-+static void bind_zero(int src_cpu)
-+{
-+	struct task_struct *p, *t;
-+	struct rq *rq0;
-+	int bound = 0;
-+
-+	if (src_cpu == 0)
-+		return;
-+
-+	rq0 = cpu_rq(0);
-+
-+	do_each_thread(t, p) {
-+		if (cpumask_test_cpu(src_cpu, p->cpus_ptr)) {
-+			bool local = (task_cpu(p) == src_cpu);
-+			struct rq *rq = task_rq(p);
-+
-+			/* task_running is the cpu stopper thread */
-+			if (local && task_running(rq, p))
-+				continue;
-+			atomic_clear_cpu(src_cpu, &p->cpus_mask);
-+			atomic_set_cpu(0, &p->cpus_mask);
-+			p->zerobound = true;
-+			bound++;
-+			if (local) {
-+				bool queued = task_queued(p);
-+
-+				if (queued)
-+					dequeue_task(rq, p, 0);
-+				set_task_cpu(p, 0);
-+				if (queued)
-+					enqueue_task(rq0, p, 0);
-+			}
-+		}
-+	} while_each_thread(t, p);
-+
-+	if (bound) {
-+		printk(KERN_INFO "MuQSS removed affinity for %d processes to cpu %d\n",
-+		       bound, src_cpu);
-+	}
-+}
-+
-+/* Find processes with the zerobound flag and reenable their affinity for the
-+ * CPU coming alive. */
-+static void unbind_zero(int src_cpu)
-+{
-+	int unbound = 0, zerobound = 0;
-+	struct task_struct *p, *t;
-+
-+	if (src_cpu == 0)
-+		return;
-+
-+	do_each_thread(t, p) {
-+		if (!p->mm)
-+			p->zerobound = false;
-+		if (p->zerobound) {
-+			unbound++;
-+			cpumask_set_cpu(src_cpu, &p->cpus_mask);
-+			/* Once every CPU affinity has been re-enabled, remove
-+			 * the zerobound flag */
-+			if (cpumask_subset(cpu_possible_mask, p->cpus_ptr)) {
-+				p->zerobound = false;
-+				zerobound++;
-+			}
-+		}
-+	} while_each_thread(t, p);
-+
-+	if (unbound) {
-+		printk(KERN_INFO "MuQSS added affinity for %d processes to cpu %d\n",
-+		       unbound, src_cpu);
-+	}
-+	if (zerobound) {
-+		printk(KERN_INFO "MuQSS released forced binding to cpu0 for %d processes\n",
-+		       zerobound);
-+	}
-+}
-+
-+/*
-+ * Ensure that the idle task is using init_mm right before its cpu goes
-+ * offline.
-+ */
-+void idle_task_exit(void)
-+{
-+	struct mm_struct *mm = current->active_mm;
-+
-+	BUG_ON(cpu_online(smp_processor_id()));
-+
-+	if (mm != &init_mm) {
-+		switch_mm(mm, &init_mm, current);
-+		current->active_mm = &init_mm;
-+		finish_arch_post_lock_switch();
-+	}
-+	mmdrop(mm);
-+}
-+#else /* CONFIG_HOTPLUG_CPU */
-+static void unbind_zero(int src_cpu) {}
-+#endif /* CONFIG_HOTPLUG_CPU */
-+
-+void sched_set_stop_task(int cpu, struct task_struct *stop)
-+{
-+	struct sched_param stop_param = { .sched_priority = STOP_PRIO };
-+	struct sched_param start_param = { .sched_priority = 0 };
-+	struct task_struct *old_stop = cpu_rq(cpu)->stop;
-+
-+	if (stop) {
-+		/*
-+		 * Make it appear like a SCHED_FIFO task, its something
-+		 * userspace knows about and won't get confused about.
-+		 *
-+		 * Also, it will make PI more or less work without too
-+		 * much confusion -- but then, stop work should not
-+		 * rely on PI working anyway.
-+		 */
-+		sched_setscheduler_nocheck(stop, SCHED_FIFO, &stop_param);
-+	}
-+
-+	cpu_rq(cpu)->stop = stop;
-+
-+	if (old_stop) {
-+		/*
-+		 * Reset it back to a normal scheduling policy so that
-+		 * it can die in pieces.
-+		 */
-+		sched_setscheduler_nocheck(old_stop, SCHED_NORMAL, &start_param);
-+	}
-+}
-+
-+#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
-+
-+static struct ctl_table sd_ctl_dir[] = {
-+	{
-+		.procname	= "sched_domain",
-+		.mode		= 0555,
-+	},
-+	{}
-+};
-+
-+static struct ctl_table sd_ctl_root[] = {
-+	{
-+		.procname	= "kernel",
-+		.mode		= 0555,
-+		.child		= sd_ctl_dir,
-+	},
-+	{}
-+};
-+
-+static struct ctl_table *sd_alloc_ctl_entry(int n)
-+{
-+	struct ctl_table *entry =
-+		kcalloc(n, sizeof(struct ctl_table), GFP_KERNEL);
-+
-+	return entry;
-+}
-+
-+static void sd_free_ctl_entry(struct ctl_table **tablep)
-+{
-+	struct ctl_table *entry;
-+
-+	/*
-+	 * In the intermediate directories, both the child directory and
-+	 * procname are dynamically allocated and could fail but the mode
-+	 * will always be set. In the lowest directory the names are
-+	 * static strings and all have proc handlers.
-+	 */
-+	for (entry = *tablep; entry->mode; entry++) {
-+		if (entry->child)
-+			sd_free_ctl_entry(&entry->child);
-+		if (entry->proc_handler == NULL)
-+			kfree(entry->procname);
-+	}
-+
-+	kfree(*tablep);
-+	*tablep = NULL;
-+}
-+
-+static void
-+set_table_entry(struct ctl_table *entry,
-+		const char *procname, void *data, int maxlen,
-+		umode_t mode, proc_handler *proc_handler)
-+{
-+	entry->procname = procname;
-+	entry->data = data;
-+	entry->maxlen = maxlen;
-+	entry->mode = mode;
-+	entry->proc_handler = proc_handler;
-+}
-+
-+static struct ctl_table *
-+sd_alloc_ctl_domain_table(struct sched_domain *sd)
-+{
-+	struct ctl_table *table = sd_alloc_ctl_entry(9);
-+
-+	if (table == NULL)
-+		return NULL;
-+
-+	set_table_entry(&table[0], "min_interval",	  &sd->min_interval,	    sizeof(long), 0644, proc_doulongvec_minmax);
-+	set_table_entry(&table[1], "max_interval",	  &sd->max_interval,	    sizeof(long), 0644, proc_doulongvec_minmax);
-+	set_table_entry(&table[2], "busy_factor",	  &sd->busy_factor,	    sizeof(int),  0644, proc_dointvec_minmax);
-+	set_table_entry(&table[3], "imbalance_pct",	  &sd->imbalance_pct,	    sizeof(int),  0644, proc_dointvec_minmax);
-+	set_table_entry(&table[4], "cache_nice_tries",	  &sd->cache_nice_tries,    sizeof(int),  0644, proc_dointvec_minmax);
-+	set_table_entry(&table[5], "flags",		  &sd->flags,		    sizeof(int),  0644, proc_dointvec_minmax);
-+	set_table_entry(&table[6], "max_newidle_lb_cost", &sd->max_newidle_lb_cost, sizeof(long), 0644, proc_doulongvec_minmax);
-+	set_table_entry(&table[7], "name",		  sd->name,	       CORENAME_MAX_SIZE, 0444, proc_dostring);
-+	/* &table[8] is terminator */
-+
-+	return table;
-+}
-+
-+static struct ctl_table *sd_alloc_ctl_cpu_table(int cpu)
-+{
-+	struct ctl_table *entry, *table;
-+	struct sched_domain *sd;
-+	int domain_num = 0, i;
-+	char buf[32];
-+
-+	for_each_domain(cpu, sd)
-+		domain_num++;
-+	entry = table = sd_alloc_ctl_entry(domain_num + 1);
-+	if (table == NULL)
-+		return NULL;
-+
-+	i = 0;
-+	for_each_domain(cpu, sd) {
-+		snprintf(buf, 32, "domain%d", i);
-+		entry->procname = kstrdup(buf, GFP_KERNEL);
-+		entry->mode = 0555;
-+		entry->child = sd_alloc_ctl_domain_table(sd);
-+		entry++;
-+		i++;
-+	}
-+	return table;
-+}
-+
-+static cpumask_var_t sd_sysctl_cpus;
-+static struct ctl_table_header *sd_sysctl_header;
-+
-+void register_sched_domain_sysctl(void)
-+{
-+	static struct ctl_table *cpu_entries;
-+	static struct ctl_table **cpu_idx;
-+	char buf[32];
-+	int i;
-+
-+	if (!cpu_entries) {
-+		cpu_entries = sd_alloc_ctl_entry(num_possible_cpus() + 1);
-+		if (!cpu_entries)
-+			return;
-+
-+		WARN_ON(sd_ctl_dir[0].child);
-+		sd_ctl_dir[0].child = cpu_entries;
-+	}
-+
-+	if (!cpu_idx) {
-+		struct ctl_table *e = cpu_entries;
-+
-+		cpu_idx = kcalloc(nr_cpu_ids, sizeof(struct ctl_table*), GFP_KERNEL);
-+		if (!cpu_idx)
-+			return;
-+
-+		/* deal with sparse possible map */
-+		for_each_possible_cpu(i) {
-+			cpu_idx[i] = e;
-+			e++;
-+		}
-+	}
-+
-+	if (!cpumask_available(sd_sysctl_cpus)) {
-+		if (!alloc_cpumask_var(&sd_sysctl_cpus, GFP_KERNEL))
-+			return;
-+
-+		/* init to possible to not have holes in @cpu_entries */
-+		cpumask_copy(sd_sysctl_cpus, cpu_possible_mask);
-+	}
-+
-+	for_each_cpu(i, sd_sysctl_cpus) {
-+		struct ctl_table *e = cpu_idx[i];
-+
-+		if (e->child)
-+			sd_free_ctl_entry(&e->child);
-+
-+		if (!e->procname) {
-+			snprintf(buf, 32, "cpu%d", i);
-+			e->procname = kstrdup(buf, GFP_KERNEL);
-+		}
-+		e->mode = 0555;
-+		e->child = sd_alloc_ctl_cpu_table(i);
-+
-+		__cpumask_clear_cpu(i, sd_sysctl_cpus);
-+	}
-+
-+	WARN_ON(sd_sysctl_header);
-+	sd_sysctl_header = register_sysctl_table(sd_ctl_root);
-+}
-+
-+void dirty_sched_domain_sysctl(int cpu)
-+{
-+	if (cpumask_available(sd_sysctl_cpus))
-+		__cpumask_set_cpu(cpu, sd_sysctl_cpus);
-+}
-+
-+/* may be called multiple times per register */
-+void unregister_sched_domain_sysctl(void)
-+{
-+	unregister_sysctl_table(sd_sysctl_header);
-+	sd_sysctl_header = NULL;
-+}
-+#endif /* CONFIG_SYSCTL */
-+
-+void set_rq_online(struct rq *rq)
-+{
-+	if (!rq->online) {
-+		cpumask_set_cpu(cpu_of(rq), rq->rd->online);
-+		rq->online = true;
-+	}
-+}
-+
-+void set_rq_offline(struct rq *rq)
-+{
-+	if (rq->online) {
-+		int cpu = cpu_of(rq);
-+
-+		cpumask_clear_cpu(cpu, rq->rd->online);
-+		rq->online = false;
-+		clear_cpuidle_map(cpu);
-+	}
-+}
-+
-+/*
-+ * used to mark begin/end of suspend/resume:
-+ */
-+static int num_cpus_frozen;
-+
-+/*
-+ * Update cpusets according to cpu_active mask.  If cpusets are
-+ * disabled, cpuset_update_active_cpus() becomes a simple wrapper
-+ * around partition_sched_domains().
-+ *
-+ * If we come here as part of a suspend/resume, don't touch cpusets because we
-+ * want to restore it back to its original state upon resume anyway.
-+ */
-+static void cpuset_cpu_active(void)
-+{
-+	if (cpuhp_tasks_frozen) {
-+		/*
-+		 * num_cpus_frozen tracks how many CPUs are involved in suspend
-+		 * resume sequence. As long as this is not the last online
-+		 * operation in the resume sequence, just build a single sched
-+		 * domain, ignoring cpusets.
-+		 */
-+		partition_sched_domains(1, NULL, NULL);
-+		if (--num_cpus_frozen)
-+			return;
-+		/*
-+		 * This is the last CPU online operation. So fall through and
-+		 * restore the original sched domains by considering the
-+		 * cpuset configurations.
-+		 */
-+		cpuset_force_rebuild();
-+	}
-+
-+	cpuset_update_active_cpus();
-+}
-+
-+static int cpuset_cpu_inactive(unsigned int cpu)
-+{
-+	if (!cpuhp_tasks_frozen) {
-+		cpuset_update_active_cpus();
-+	} else {
-+		num_cpus_frozen++;
-+		partition_sched_domains(1, NULL, NULL);
-+	}
-+	return 0;
-+}
-+
-+int sched_cpu_activate(unsigned int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	struct rq_flags rf;
-+
-+#ifdef CONFIG_SCHED_SMT
-+	/*
-+	 * When going up, increment the number of cores with SMT present.
-+	 */
-+	if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
-+		static_branch_inc_cpuslocked(&sched_smt_present);
-+#endif
-+	set_cpu_active(cpu, true);
-+
-+	if (sched_smp_initialized) {
-+		sched_domains_numa_masks_set(cpu);
-+		cpuset_cpu_active();
-+	}
-+
-+	/*
-+	 * Put the rq online, if not already. This happens:
-+	 *
-+	 * 1) In the early boot process, because we build the real domains
-+	 *    after all CPUs have been brought up.
-+	 *
-+	 * 2) At runtime, if cpuset_cpu_active() fails to rebuild the
-+	 *    domains.
-+	 */
-+	rq_lock_irqsave(rq, &rf);
-+	if (rq->rd) {
-+		BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
-+		set_rq_online(rq);
-+	}
-+	unbind_zero(cpu);
-+	rq_unlock_irqrestore(rq, &rf);
-+
-+	return 0;
-+}
-+
-+int sched_cpu_deactivate(unsigned int cpu)
-+{
-+	int ret;
-+
-+	set_cpu_active(cpu, false);
-+	/*
-+	 * We've cleared cpu_active_mask, wait for all preempt-disabled and RCU
-+	 * users of this state to go away such that all new such users will
-+	 * observe it.
-+	 *
-+	 * Do sync before park smpboot threads to take care the rcu boost case.
-+	 */
-+	synchronize_rcu();
-+
-+#ifdef CONFIG_SCHED_SMT
-+	/*
-+	 * When going down, decrement the number of cores with SMT present.
-+	 */
-+	if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
-+		static_branch_dec_cpuslocked(&sched_smt_present);
-+#endif
-+
-+	if (!sched_smp_initialized)
-+		return 0;
-+
-+	ret = cpuset_cpu_inactive(cpu);
-+	if (ret) {
-+		set_cpu_active(cpu, true);
-+		return ret;
-+	}
-+	sched_domains_numa_masks_clear(cpu);
-+	return 0;
-+}
-+
-+int sched_cpu_starting(unsigned int cpu)
-+{
-+	sched_tick_start(cpu);
-+	return 0;
-+}
-+
-+#ifdef CONFIG_HOTPLUG_CPU
-+int sched_cpu_dying(unsigned int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	/* Handle pending wakeups and then migrate everything off */
-+	sched_ttwu_pending();
-+	sched_tick_stop(cpu);
-+
-+	local_irq_save(flags);
-+	double_rq_lock(rq, cpu_rq(0));
-+	if (rq->rd) {
-+		BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
-+		set_rq_offline(rq);
-+	}
-+	bind_zero(cpu);
-+	double_rq_unlock(rq, cpu_rq(0));
-+	sched_start_tick(rq, cpu);
-+	hrexpiry_clear(rq);
-+	local_irq_restore(flags);
-+
-+	return 0;
-+}
-+#endif
-+
-+#if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_MC)
-+/*
-+ * Cheaper version of the below functions in case support for SMT and MC is
-+ * compiled in but CPUs have no siblings.
-+ */
-+static bool sole_cpu_idle(struct rq *rq)
-+{
-+	return rq_idle(rq);
-+}
-+#endif
-+#ifdef CONFIG_SCHED_SMT
-+static const cpumask_t *thread_cpumask(int cpu)
-+{
-+	return topology_sibling_cpumask(cpu);
-+}
-+/* All this CPU's SMT siblings are idle */
-+static bool siblings_cpu_idle(struct rq *rq)
-+{
-+	return cpumask_subset(&rq->thread_mask, &cpu_idle_map);
-+}
-+#endif
-+#ifdef CONFIG_SCHED_MC
-+static const cpumask_t *core_cpumask(int cpu)
-+{
-+	return topology_core_cpumask(cpu);
-+}
-+/* All this CPU's shared cache siblings are idle */
-+static bool cache_cpu_idle(struct rq *rq)
-+{
-+	return cpumask_subset(&rq->core_mask, &cpu_idle_map);
-+}
-+/* MC siblings CPU mask which share the same LLC */
-+static const cpumask_t *llc_core_cpumask(int cpu)
-+{
-+#ifdef CONFIG_X86
-+	return per_cpu(cpu_llc_shared_map, cpu);
-+#else
-+	return topology_core_cpumask(cpu);
-+#endif
-+}
-+#endif
-+
-+enum sched_domain_level {
-+	SD_LV_NONE = 0,
-+	SD_LV_SIBLING,
-+	SD_LV_MC,
-+	SD_LV_BOOK,
-+	SD_LV_CPU,
-+	SD_LV_NODE,
-+	SD_LV_ALLNODES,
-+	SD_LV_MAX
-+};
-+
-+/*
-+ * Set up the relative cache distance of each online cpu from each
-+ * other in a simple array for quick lookup. Locality is determined
-+ * by the closest sched_domain that CPUs are separated by. CPUs with
-+ * shared cache in SMT and MC are treated as local. Separate CPUs
-+ * (within the same package or physically) within the same node are
-+ * treated as not local. CPUs not even in the same domain (different
-+ * nodes) are treated as very distant.
-+ */
-+static void __init select_leaders(void)
-+{
-+	struct rq *rq, *other_rq, *leader;
-+	struct sched_domain *sd;
-+	int cpu, other_cpu;
-+#ifdef CONFIG_SCHED_SMT
-+	bool smt_threads = false;
-+#endif
-+
-+	for (cpu = 0; cpu < num_online_cpus(); cpu++) {
-+		rq = cpu_rq(cpu);
-+		leader = NULL;
-+		/* First check if this cpu is in the same node */
-+		for_each_domain(cpu, sd) {
-+			if (sd->level > SD_LV_MC)
-+				continue;
-+			if (rqshare != RQSHARE_ALL)
-+				leader = NULL;
-+			/* Set locality to local node if not already found lower */
-+			for_each_cpu(other_cpu, sched_domain_span(sd)) {
-+				if (rqshare >= RQSHARE_SMP) {
-+					other_rq = cpu_rq(other_cpu);
-+
-+					/* Set the smp_leader to the first CPU */
-+					if (!leader)
-+						leader = rq;
-+					if (!other_rq->smp_leader)
-+						other_rq->smp_leader = leader;
-+				}
-+				if (rq->cpu_locality[other_cpu] > LOCALITY_SMP)
-+					rq->cpu_locality[other_cpu] = LOCALITY_SMP;
-+			}
-+		}
-+
-+		/*
-+		 * Each runqueue has its own function in case it doesn't have
-+		 * siblings of its own allowing mixed topologies.
-+		 */
-+#ifdef CONFIG_SCHED_MC
-+		leader = NULL;
-+		if (cpumask_weight(core_cpumask(cpu)) > 1) {
-+			cpumask_copy(&rq->core_mask, llc_core_cpumask(cpu));
-+			cpumask_clear_cpu(cpu, &rq->core_mask);
-+			for_each_cpu(other_cpu, core_cpumask(cpu)) {
-+				if (rqshare == RQSHARE_MC ||
-+					(rqshare == RQSHARE_MC_LLC && cpumask_test_cpu(other_cpu, llc_core_cpumask(cpu)))) {
-+					other_rq = cpu_rq(other_cpu);
-+
-+					/* Set the mc_leader to the first CPU */
-+					if (!leader)
-+						leader = rq;
-+					if (!other_rq->mc_leader)
-+						other_rq->mc_leader = leader;
-+				}
-+				if (rq->cpu_locality[other_cpu] > LOCALITY_MC) {
-+					/* this is to get LLC into play even in case LLC sharing is not used */
-+					if (cpumask_test_cpu(other_cpu, llc_core_cpumask(cpu)))
-+						rq->cpu_locality[other_cpu] = LOCALITY_MC_LLC;
-+					else
-+						rq->cpu_locality[other_cpu] = LOCALITY_MC;
-+				}
-+			}
-+			rq->cache_idle = cache_cpu_idle;
-+		}
-+#endif
-+#ifdef CONFIG_SCHED_SMT
-+		leader = NULL;
-+		if (cpumask_weight(thread_cpumask(cpu)) > 1) {
-+			cpumask_copy(&rq->thread_mask, thread_cpumask(cpu));
-+			cpumask_clear_cpu(cpu, &rq->thread_mask);
-+			for_each_cpu(other_cpu, thread_cpumask(cpu)) {
-+				if (rqshare == RQSHARE_SMT) {
-+					other_rq = cpu_rq(other_cpu);
-+
-+					/* Set the smt_leader to the first CPU */
-+					if (!leader)
-+						leader = rq;
-+					if (!other_rq->smt_leader)
-+						other_rq->smt_leader = leader;
-+				}
-+				if (rq->cpu_locality[other_cpu] > LOCALITY_SMT)
-+					rq->cpu_locality[other_cpu] = LOCALITY_SMT;
-+			}
-+			rq->siblings_idle = siblings_cpu_idle;
-+			smt_threads = true;
-+		}
-+#endif
-+	}
-+
-+#ifdef CONFIG_SMT_NICE
-+	if (smt_threads) {
-+		check_siblings = &check_smt_siblings;
-+		wake_siblings = &wake_smt_siblings;
-+		smt_schedule = &smt_should_schedule;
-+	}
-+#endif
-+
-+	for_each_online_cpu(cpu) {
-+		rq = cpu_rq(cpu);
-+		for_each_online_cpu(other_cpu) {
-+			printk(KERN_DEBUG "MuQSS locality CPU %d to %d: %d\n", cpu, other_cpu, rq->cpu_locality[other_cpu]);
-+		}
-+	}
-+}
-+
-+/* FIXME freeing locked spinlock */
-+static void __init share_and_free_rq(struct rq *leader, struct rq *rq)
-+{
-+	WARN_ON(rq->nr_running > 0);
-+
-+	kfree(rq->node);
-+	kfree(rq->sl);
-+	kfree(rq->lock);
-+	rq->node = leader->node;
-+	rq->sl = leader->sl;
-+	rq->lock = leader->lock;
-+	rq->is_leader = false;
-+	barrier();
-+	/* To make up for not unlocking the freed runlock */
-+	preempt_enable();
-+}
-+
-+static void __init share_rqs(void)
-+{
-+	struct rq *rq, *leader;
-+	int cpu;
-+
-+	for_each_online_cpu(cpu) {
-+		rq = cpu_rq(cpu);
-+		leader = rq->smp_leader;
-+
-+		rq_lock(rq);
-+		if (leader && rq != leader) {
-+			printk(KERN_INFO "MuQSS sharing SMP runqueue from CPU %d to CPU %d\n",
-+			       leader->cpu, rq->cpu);
-+			share_and_free_rq(leader, rq);
-+		} else
-+			rq_unlock(rq);
-+	}
-+
-+#ifdef CONFIG_SCHED_MC
-+	for_each_online_cpu(cpu) {
-+		rq = cpu_rq(cpu);
-+		leader = rq->mc_leader;
-+
-+		rq_lock(rq);
-+		if (leader && rq != leader) {
-+			printk(KERN_INFO "MuQSS sharing MC runqueue from CPU %d to CPU %d\n",
-+			       leader->cpu, rq->cpu);
-+			share_and_free_rq(leader, rq);
-+		} else
-+			rq_unlock(rq);
-+	}
-+#endif /* CONFIG_SCHED_MC */
-+
-+#ifdef CONFIG_SCHED_SMT
-+	for_each_online_cpu(cpu) {
-+		rq = cpu_rq(cpu);
-+		leader = rq->smt_leader;
-+
-+		rq_lock(rq);
-+		if (leader && rq != leader) {
-+			printk(KERN_INFO "MuQSS sharing SMT runqueue from CPU %d to CPU %d\n",
-+			       leader->cpu, rq->cpu);
-+			share_and_free_rq(leader, rq);
-+		} else
-+			rq_unlock(rq);
-+	}
-+#endif /* CONFIG_SCHED_SMT */
-+}
-+
-+static void __init setup_rq_orders(void)
-+{
-+	int *selected_cpus, *ordered_cpus;
-+	struct rq *rq, *other_rq;
-+	int cpu, other_cpu, i;
-+
-+	selected_cpus = kmalloc(sizeof(int) * NR_CPUS, GFP_ATOMIC);
-+	ordered_cpus = kmalloc(sizeof(int) * NR_CPUS, GFP_ATOMIC);
-+
-+	total_runqueues = 0;
-+	for_each_online_cpu(cpu) {
-+		int locality, total_rqs = 0, total_cpus = 0;
-+
-+		rq = cpu_rq(cpu);
-+		if (rq->is_leader)
-+			total_runqueues++;
-+
-+		for (locality = LOCALITY_SAME; locality <= LOCALITY_DISTANT; locality++) {
-+			int selected_cpu_cnt, selected_cpu_idx, test_cpu_idx, cpu_idx, best_locality, test_cpu;
-+			int ordered_cpus_idx;
-+
-+			ordered_cpus_idx = -1;
-+			selected_cpu_cnt = 0;
-+
-+			for_each_online_cpu(test_cpu) {
-+				if (cpu < num_online_cpus() / 2)
-+					other_cpu = cpu + test_cpu;
-+				else
-+					other_cpu = cpu - test_cpu;
-+				if (other_cpu < 0)
-+					other_cpu += num_online_cpus();
-+				else
-+					other_cpu %= num_online_cpus();
-+				/* gather CPUs of the same locality */
-+				if (rq->cpu_locality[other_cpu] == locality) {
-+					selected_cpus[selected_cpu_cnt] = other_cpu;
-+					selected_cpu_cnt++;
-+				}
-+			}
-+
-+			/* reserve first CPU as starting point */
-+			if (selected_cpu_cnt > 0) {
-+				ordered_cpus_idx++;
-+				ordered_cpus[ordered_cpus_idx] = selected_cpus[ordered_cpus_idx];
-+				selected_cpus[ordered_cpus_idx] = -1;
-+			}
-+
-+			/* take each CPU and sort it within the same locality based on each inter-CPU localities */
-+			for(test_cpu_idx = 1; test_cpu_idx < selected_cpu_cnt; test_cpu_idx++) {
-+				/* starting point with worst locality and current CPU */
-+				best_locality = LOCALITY_DISTANT;
-+				selected_cpu_idx = test_cpu_idx;
-+
-+				/* try to find the best locality within group */
-+				for(cpu_idx = 1; cpu_idx < selected_cpu_cnt; cpu_idx++) {
-+					/* if CPU has not been used and locality is better */
-+					if (selected_cpus[cpu_idx] > -1) {
-+						other_rq = cpu_rq(ordered_cpus[ordered_cpus_idx]);
-+						if (best_locality > other_rq->cpu_locality[selected_cpus[cpu_idx]]) {
-+							/* assign best locality and best CPU idx in array */
-+							best_locality = other_rq->cpu_locality[selected_cpus[cpu_idx]];
-+							selected_cpu_idx = cpu_idx;
-+						}
-+					}
-+				}
-+
-+				/* add our next best CPU to ordered list */
-+				ordered_cpus_idx++;
-+				ordered_cpus[ordered_cpus_idx] = selected_cpus[selected_cpu_idx];
-+				/* mark this CPU as used */
-+				selected_cpus[selected_cpu_idx] =  -1;
-+			}
-+
-+			/* set up RQ and CPU orders */
-+			for (test_cpu = 0; test_cpu <= ordered_cpus_idx; test_cpu++) {
-+				other_rq = cpu_rq(ordered_cpus[test_cpu]);
-+				/* set up cpu orders */
-+				rq->cpu_order[total_cpus++] = other_rq;
-+				if (other_rq->is_leader) {
-+					/* set up RQ orders */
-+					rq->rq_order[total_rqs++] = other_rq;
-+				}
-+			}
-+		}
-+	}
-+
-+	kfree(selected_cpus);
-+	kfree(ordered_cpus);
-+
-+#ifdef CONFIG_X86
-+	for_each_online_cpu(cpu) {
-+		rq = cpu_rq(cpu);
-+		for (i = 0; i < total_runqueues; i++) {
-+			printk(KERN_DEBUG "MuQSS CPU %d llc %d RQ order %d RQ %d llc %d\n", cpu, per_cpu(cpu_llc_id, cpu), i,
-+			       rq->rq_order[i]->cpu, per_cpu(cpu_llc_id, rq->rq_order[i]->cpu));
-+		}
-+	}
-+
-+	for_each_online_cpu(cpu) {
-+		rq = cpu_rq(cpu);
-+		for (i = 0; i < num_online_cpus(); i++) {
-+			printk(KERN_DEBUG "MuQSS CPU %d llc %d CPU order %d RQ %d llc %d\n", cpu, per_cpu(cpu_llc_id, cpu), i,
-+			       rq->cpu_order[i]->cpu, per_cpu(cpu_llc_id, rq->cpu_order[i]->cpu));
-+		}
-+	}
-+#endif
-+}
-+
-+void __init sched_init_smp(void)
-+{
-+	sched_init_numa();
-+
-+	/*
-+	 * There's no userspace yet to cause hotplug operations; hence all the
-+	 * cpu masks are stable and all blatant races in the below code cannot
-+	 * happen.
-+	 */
-+	mutex_lock(&sched_domains_mutex);
-+	sched_init_domains(cpu_active_mask);
-+	mutex_unlock(&sched_domains_mutex);
-+
-+	/* Move init over to a non-isolated CPU */
-+	if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0)
-+		BUG();
-+
-+	local_irq_disable();
-+	mutex_lock(&sched_domains_mutex);
-+	lock_all_rqs();
-+
-+	printk(KERN_INFO "MuQSS possible/present/online CPUs: %d/%d/%d\n",
-+		num_possible_cpus(), num_present_cpus(), num_online_cpus());
-+
-+	select_leaders();
-+
-+	unlock_all_rqs();
-+	mutex_unlock(&sched_domains_mutex);
-+
-+	share_rqs();
-+
-+	local_irq_enable();
-+
-+	setup_rq_orders();
-+
-+	switch (rqshare) {
-+		case RQSHARE_ALL:
-+			/* This should only ever read 1 */
-+			printk(KERN_INFO "MuQSS runqueue share type ALL total runqueues: %d\n",
-+			       total_runqueues);
-+			break;
-+		case RQSHARE_SMP:
-+			printk(KERN_INFO "MuQSS runqueue share type SMP total runqueues: %d\n",
-+			       total_runqueues);
-+			break;
-+		case RQSHARE_MC:
-+			printk(KERN_INFO "MuQSS runqueue share type MC total runqueues: %d\n",
-+			       total_runqueues);
-+			break;
-+		case RQSHARE_MC_LLC:
-+			printk(KERN_INFO "MuQSS runqueue share type LLC total runqueues: %d\n",
-+			       total_runqueues);
-+			break;
-+		case RQSHARE_SMT:
-+			printk(KERN_INFO "MuQSS runqueue share type SMT total runqueues: %d\n",
-+			       total_runqueues);
-+			break;
-+		case RQSHARE_NONE:
-+			printk(KERN_INFO "MuQSS runqueue share type NONE total runqueues: %d\n",
-+			       total_runqueues);
-+			break;
-+	}
-+
-+	sched_smp_initialized = true;
-+}
-+#else
-+void __init sched_init_smp(void)
-+{
-+	sched_smp_initialized = true;
-+}
-+#endif /* CONFIG_SMP */
-+
-+int in_sched_functions(unsigned long addr)
-+{
-+	return in_lock_functions(addr) ||
-+		(addr >= (unsigned long)__sched_text_start
-+		&& addr < (unsigned long)__sched_text_end);
-+}
-+
-+#ifdef CONFIG_CGROUP_SCHED
-+/* task group related information */
-+struct task_group {
-+	struct cgroup_subsys_state css;
-+
-+	struct rcu_head rcu;
-+	struct list_head list;
-+
-+	struct task_group *parent;
-+	struct list_head siblings;
-+	struct list_head children;
-+};
-+
-+/*
-+ * Default task group.
-+ * Every task in system belongs to this group at bootup.
-+ */
-+struct task_group root_task_group;
-+LIST_HEAD(task_groups);
-+
-+/* Cacheline aligned slab cache for task_group */
-+static struct kmem_cache *task_group_cache __read_mostly;
-+#endif /* CONFIG_CGROUP_SCHED */
-+
-+void __init sched_init(void)
-+{
-+#ifdef CONFIG_SMP
-+	int cpu_ids;
-+#endif
-+	int i;
-+	struct rq *rq;
-+
-+	wait_bit_init();
-+
-+	prio_ratios[0] = 128;
-+	for (i = 1 ; i < NICE_WIDTH ; i++)
-+		prio_ratios[i] = prio_ratios[i - 1] * 11 / 10;
-+
-+	skiplist_node_init(&init_task.node);
-+
-+#ifdef CONFIG_SMP
-+	init_defrootdomain();
-+	cpumask_clear(&cpu_idle_map);
-+#else
-+	uprq = &per_cpu(runqueues, 0);
-+#endif
-+
-+#ifdef CONFIG_CGROUP_SCHED
-+	task_group_cache = KMEM_CACHE(task_group, 0);
-+
-+	list_add(&root_task_group.list, &task_groups);
-+	INIT_LIST_HEAD(&root_task_group.children);
-+	INIT_LIST_HEAD(&root_task_group.siblings);
-+#endif /* CONFIG_CGROUP_SCHED */
-+	for_each_possible_cpu(i) {
-+		rq = cpu_rq(i);
-+		rq->node = kmalloc(sizeof(skiplist_node), GFP_ATOMIC);
-+		skiplist_init(rq->node);
-+		rq->sl = new_skiplist(rq->node);
-+		rq->lock = kmalloc(sizeof(raw_spinlock_t), GFP_ATOMIC);
-+		raw_spin_lock_init(rq->lock);
-+		rq->nr_running = 0;
-+		rq->nr_uninterruptible = 0;
-+		rq->nr_switches = 0;
-+		rq->clock = rq->old_clock = rq->last_niffy = rq->niffies = 0;
-+		rq->last_jiffy = jiffies;
-+		rq->user_ns = rq->nice_ns = rq->softirq_ns = rq->system_ns =
-+			      rq->iowait_ns = rq->idle_ns = 0;
-+		rq->dither = 0;
-+		set_rq_task(rq, &init_task);
-+		rq->iso_ticks = 0;
-+		rq->iso_refractory = false;
-+#ifdef CONFIG_SMP
-+		rq->is_leader = true;
-+		rq->smp_leader = NULL;
-+#ifdef CONFIG_SCHED_MC
-+		rq->mc_leader = NULL;
-+#endif
-+#ifdef CONFIG_SCHED_SMT
-+		rq->smt_leader = NULL;
-+#endif
-+		rq->sd = NULL;
-+		rq->rd = NULL;
-+		rq->online = false;
-+		rq->cpu = i;
-+		rq_attach_root(rq, &def_root_domain);
-+#endif
-+		init_rq_hrexpiry(rq);
-+		atomic_set(&rq->nr_iowait, 0);
-+	}
-+
-+#ifdef CONFIG_SMP
-+	cpu_ids = i;
-+	/*
-+	 * Set the base locality for cpu cache distance calculation to
-+	 * "distant" (3). Make sure the distance from a CPU to itself is 0.
-+	 */
-+	for_each_possible_cpu(i) {
-+		int j;
-+
-+		rq = cpu_rq(i);
-+#ifdef CONFIG_SCHED_SMT
-+		rq->siblings_idle = sole_cpu_idle;
-+#endif
-+#ifdef CONFIG_SCHED_MC
-+		rq->cache_idle = sole_cpu_idle;
-+#endif
-+		rq->cpu_locality = kmalloc(cpu_ids * sizeof(int *), GFP_ATOMIC);
-+		for_each_possible_cpu(j) {
-+			if (i == j)
-+				rq->cpu_locality[j] = LOCALITY_SAME;
-+			else
-+				rq->cpu_locality[j] = LOCALITY_DISTANT;
-+		}
-+		rq->rq_order = kmalloc(cpu_ids * sizeof(struct rq *), GFP_ATOMIC);
-+		rq->cpu_order = kmalloc(cpu_ids * sizeof(struct rq *), GFP_ATOMIC);
-+		rq->rq_order[0] = rq->cpu_order[0] = rq;
-+		for (j = 1; j < cpu_ids; j++)
-+			rq->rq_order[j] = rq->cpu_order[j] = cpu_rq(j);
-+	}
-+#endif
-+
-+	/*
-+	 * The boot idle thread does lazy MMU switching as well:
-+	 */
-+	mmgrab(&init_mm);
-+	enter_lazy_tlb(&init_mm, current);
-+
-+	/*
-+	 * Make us the idle thread. Technically, schedule() should not be
-+	 * called from this thread, however somewhere below it might be,
-+	 * but because we are the idle thread, we just pick up running again
-+	 * when this runqueue becomes "idle".
-+	 */
-+	init_idle(current, smp_processor_id());
-+
-+#ifdef CONFIG_SMP
-+	idle_thread_set_boot_cpu();
-+#endif /* SMP */
-+
-+	init_schedstats();
-+
-+	psi_init();
-+}
-+
-+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-+static inline int preempt_count_equals(int preempt_offset)
-+{
-+	int nested = preempt_count() + rcu_preempt_depth();
-+
-+	return (nested == preempt_offset);
-+}
-+
-+void __might_sleep(const char *file, int line, int preempt_offset)
-+{
-+	/*
-+	 * Blocking primitives will set (and therefore destroy) current->state,
-+	 * since we will exit with TASK_RUNNING make sure we enter with it,
-+	 * otherwise we will destroy state.
-+	 */
-+	WARN_ONCE(current->state != TASK_RUNNING && current->task_state_change,
-+			"do not call blocking ops when !TASK_RUNNING; "
-+			"state=%lx set at [<%p>] %pS\n",
-+			current->state,
-+			(void *)current->task_state_change,
-+			(void *)current->task_state_change);
-+
-+	___might_sleep(file, line, preempt_offset);
-+}
-+EXPORT_SYMBOL(__might_sleep);
-+
-+void __cant_sleep(const char *file, int line, int preempt_offset)
-+{
-+	static unsigned long prev_jiffy;
-+
-+	if (irqs_disabled())
-+		return;
-+
-+	if (!IS_ENABLED(CONFIG_PREEMPT_COUNT))
-+		return;
-+
-+	if (preempt_count() > preempt_offset)
-+		return;
-+
-+	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
-+		return;
-+	prev_jiffy = jiffies;
-+
-+	printk(KERN_ERR "BUG: assuming atomic context at %s:%d\n", file, line);
-+	printk(KERN_ERR "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
-+			in_atomic(), irqs_disabled(),
-+			current->pid, current->comm);
-+
-+	debug_show_held_locks(current);
-+	dump_stack();
-+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+}
-+EXPORT_SYMBOL_GPL(__cant_sleep);
-+
-+void ___might_sleep(const char *file, int line, int preempt_offset)
-+{
-+	/* Ratelimiting timestamp: */
-+	static unsigned long prev_jiffy;
-+
-+	unsigned long preempt_disable_ip;
-+
-+	/* WARN_ON_ONCE() by default, no rate limit required: */
-+	rcu_sleep_check();
-+
-+	if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
-+	     !is_idle_task(current) && !current->non_block_count) ||
-+	    system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING ||
-+	    oops_in_progress)
-+		return;
-+
-+	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
-+		return;
-+	prev_jiffy = jiffies;
-+
-+	/* Save this before calling printk(), since that will clobber it: */
-+	preempt_disable_ip = get_preempt_disable_ip(current);
-+
-+	printk(KERN_ERR
-+		"BUG: sleeping function called from invalid context at %s:%d\n",
-+			file, line);
-+	printk(KERN_ERR
-+		"in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n",
-+			in_atomic(), irqs_disabled(), current->non_block_count,
-+			current->pid, current->comm);
-+
-+	if (task_stack_end_corrupted(current))
-+		printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");
-+
-+	debug_show_held_locks(current);
-+	if (irqs_disabled())
-+		print_irqtrace_events(current);
-+	if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)
-+	    && !preempt_count_equals(preempt_offset)) {
-+		pr_err("Preemption disabled at:");
-+		print_ip_sym(preempt_disable_ip);
-+		pr_cont("\n");
-+	}
-+	dump_stack();
-+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+}
-+EXPORT_SYMBOL(___might_sleep);
-+#endif
-+
-+#ifdef CONFIG_MAGIC_SYSRQ
-+static inline void normalise_rt_tasks(void)
-+{
-+	struct sched_attr attr = {};
-+	struct task_struct *g, *p;
-+	struct rq_flags rf;
-+	struct rq *rq;
-+
-+	read_lock(&tasklist_lock);
-+	for_each_process_thread(g, p) {
-+		/*
-+		 * Only normalize user tasks:
-+		 */
-+		if (p->flags & PF_KTHREAD)
-+			continue;
-+
-+		if (!rt_task(p) && !iso_task(p))
-+			continue;
-+
-+		rq = task_rq_lock(p, &rf);
-+		__setscheduler(p, rq, SCHED_NORMAL, 0, &attr, false);
-+		task_rq_unlock(rq, p, &rf);
-+	}
-+	read_unlock(&tasklist_lock);
-+}
-+
-+void normalize_rt_tasks(void)
-+{
-+	normalise_rt_tasks();
-+}
-+#endif /* CONFIG_MAGIC_SYSRQ */
-+
-+#if defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB)
-+/*
-+ * These functions are only useful for the IA64 MCA handling, or kdb.
-+ *
-+ * They can only be called when the whole system has been
-+ * stopped - every CPU needs to be quiescent, and no scheduling
-+ * activity can take place. Using them for anything else would
-+ * be a serious bug, and as a result, they aren't even visible
-+ * under any other configuration.
-+ */
-+
-+/**
-+ * curr_task - return the current task for a given CPU.
-+ * @cpu: the processor in question.
-+ *
-+ * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
-+ *
-+ * Return: The current task for @cpu.
-+ */
-+struct task_struct *curr_task(int cpu)
-+{
-+	return cpu_curr(cpu);
-+}
-+
-+#endif /* defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB) */
-+
-+#ifdef CONFIG_IA64
-+/**
-+ * ia64_set_curr_task - set the current task for a given CPU.
-+ * @cpu: the processor in question.
-+ * @p: the task pointer to set.
-+ *
-+ * Description: This function must only be used when non-maskable interrupts
-+ * are serviced on a separate stack.  It allows the architecture to switch the
-+ * notion of the current task on a CPU in a non-blocking manner.  This function
-+ * must be called with all CPU's synchronised, and interrupts disabled, the
-+ * and caller must save the original value of the current task (see
-+ * curr_task() above) and restore that value before reenabling interrupts and
-+ * re-starting the system.
-+ *
-+ * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
-+ */
-+void ia64_set_curr_task(int cpu, struct task_struct *p)
-+{
-+	cpu_curr(cpu) = p;
-+}
-+
-+#endif
-+
-+void init_idle_bootup_task(struct task_struct *idle)
-+{}
-+
-+#ifdef CONFIG_SCHED_DEBUG
-+__read_mostly bool sched_debug_enabled;
-+
-+void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
-+			  struct seq_file *m)
-+{
-+	seq_printf(m, "%s (%d, #threads: %d)\n", p->comm, task_pid_nr_ns(p, ns),
-+		   get_nr_threads(p));
-+}
-+
-+void proc_sched_set_task(struct task_struct *p)
-+{}
-+#endif
-+
-+#ifdef CONFIG_CGROUP_SCHED
-+static void sched_free_group(struct task_group *tg)
-+{
-+	kmem_cache_free(task_group_cache, tg);
-+}
-+
-+/* allocate runqueue etc for a new task group */
-+struct task_group *sched_create_group(struct task_group *parent)
-+{
-+	struct task_group *tg;
-+
-+	tg = kmem_cache_alloc(task_group_cache, GFP_KERNEL | __GFP_ZERO);
-+	if (!tg)
-+		return ERR_PTR(-ENOMEM);
-+
-+	return tg;
-+}
-+
-+void sched_online_group(struct task_group *tg, struct task_group *parent)
-+{
-+}
-+
-+/* rcu callback to free various structures associated with a task group */
-+static void sched_free_group_rcu(struct rcu_head *rhp)
-+{
-+	/* Now it should be safe to free those cfs_rqs */
-+	sched_free_group(container_of(rhp, struct task_group, rcu));
-+}
-+
-+void sched_destroy_group(struct task_group *tg)
-+{
-+	/* Wait for possible concurrent references to cfs_rqs complete */
-+	call_rcu(&tg->rcu, sched_free_group_rcu);
-+}
-+
-+void sched_offline_group(struct task_group *tg)
-+{
-+}
-+
-+static inline struct task_group *css_tg(struct cgroup_subsys_state *css)
-+{
-+	return css ? container_of(css, struct task_group, css) : NULL;
-+}
-+
-+static struct cgroup_subsys_state *
-+cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
-+{
-+	struct task_group *parent = css_tg(parent_css);
-+	struct task_group *tg;
-+
-+	if (!parent) {
-+		/* This is early initialization for the top cgroup */
-+		return &root_task_group.css;
-+	}
-+
-+	tg = sched_create_group(parent);
-+	if (IS_ERR(tg))
-+		return ERR_PTR(-ENOMEM);
-+	return &tg->css;
-+}
-+
-+/* Expose task group only after completing cgroup initialization */
-+static int cpu_cgroup_css_online(struct cgroup_subsys_state *css)
-+{
-+	struct task_group *tg = css_tg(css);
-+	struct task_group *parent = css_tg(css->parent);
-+
-+	if (parent)
-+		sched_online_group(tg, parent);
-+	return 0;
-+}
-+
-+static void cpu_cgroup_css_released(struct cgroup_subsys_state *css)
-+{
-+	struct task_group *tg = css_tg(css);
-+
-+	sched_offline_group(tg);
-+}
-+
-+static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
-+{
-+	struct task_group *tg = css_tg(css);
-+
-+	/*
-+	 * Relies on the RCU grace period between css_released() and this.
-+	 */
-+	sched_free_group(tg);
-+}
-+
-+static void cpu_cgroup_fork(struct task_struct *task)
-+{
-+}
-+
-+static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
-+{
-+	return 0;
-+}
-+
-+static void cpu_cgroup_attach(struct cgroup_taskset *tset)
-+{
-+}
-+
-+static struct cftype cpu_legacy_files[] = {
-+	{ }	/* Terminate */
-+};
-+
-+static struct cftype cpu_files[] = {
-+	{ }	/* terminate */
-+};
-+
-+static int cpu_extra_stat_show(struct seq_file *sf,
-+			       struct cgroup_subsys_state *css)
-+{
-+	return 0;
-+}
-+
-+struct cgroup_subsys cpu_cgrp_subsys = {
-+	.css_alloc	= cpu_cgroup_css_alloc,
-+	.css_online	= cpu_cgroup_css_online,
-+	.css_released	= cpu_cgroup_css_released,
-+	.css_free	= cpu_cgroup_css_free,
-+	.css_extra_stat_show = cpu_extra_stat_show,
-+	.fork		= cpu_cgroup_fork,
-+	.can_attach	= cpu_cgroup_can_attach,
-+	.attach		= cpu_cgroup_attach,
-+	.legacy_cftypes	= cpu_files,
-+	.legacy_cftypes	= cpu_legacy_files,
-+	.dfl_cftypes	= cpu_files,
-+	.early_init	= true,
-+	.threaded	= true,
-+};
-+#endif	/* CONFIG_CGROUP_SCHED */
-+
-+#undef CREATE_TRACE_POINTS
-diff --git a/kernel/sched/MuQSS.h b/kernel/sched/MuQSS.h
-new file mode 100644
-index 000000000000..b34f2797e44f
---- /dev/null
-+++ b/kernel/sched/MuQSS.h
-@@ -0,0 +1,1056 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef MUQSS_SCHED_H
-+#define MUQSS_SCHED_H
-+
-+#include <linux/sched/clock.h>
-+#include <linux/sched/cpufreq.h>
-+#include <linux/sched/cputime.h>
-+#include <linux/sched/deadline.h>
-+#include <linux/sched/debug.h>
-+#include <linux/sched/hotplug.h>
-+#include <linux/sched/init.h>
-+#include <linux/sched/isolation.h>
-+#include <linux/sched/mm.h>
-+#include <linux/sched/nohz.h>
-+#include <linux/sched/signal.h>
-+#include <linux/sched/smt.h>
-+#include <linux/sched/stat.h>
-+#include <linux/sched/task.h>
-+#include <linux/sched/task_stack.h>
-+#include <linux/sched/topology.h>
-+#include <linux/sched/wake_q.h>
-+
-+#include <uapi/linux/sched/types.h>
-+
-+#include <linux/cgroup.h>
-+#include <linux/cpufreq.h>
-+#include <linux/cpuidle.h>
-+#include <linux/cpuset.h>
-+#include <linux/ctype.h>
-+#include <linux/energy_model.h>
-+#include <linux/freezer.h>
-+#include <linux/kernel_stat.h>
-+#include <linux/kthread.h>
-+#include <linux/membarrier.h>
-+#include <linux/livepatch.h>
-+#include <linux/proc_fs.h>
-+#include <linux/psi.h>
-+#include <linux/sched.h>
-+#include <linux/slab.h>
-+#include <linux/skip_list.h>
-+#include <linux/stop_machine.h>
-+#include <linux/suspend.h>
-+#include <linux/swait.h>
-+#include <linux/syscalls.h>
-+#include <linux/tick.h>
-+#include <linux/tsacct_kern.h>
-+#include <linux/u64_stats_sync.h>
-+
-+#ifdef CONFIG_PARAVIRT
-+#include <asm/paravirt.h>
-+#endif
-+
-+#include "cpupri.h"
-+
-+#ifdef CONFIG_SCHED_DEBUG
-+# define SCHED_WARN_ON(x)	WARN_ONCE(x, #x)
-+#else
-+# define SCHED_WARN_ON(x)	((void)(x))
-+#endif
-+
-+/*
-+ * wake flags
-+ */
-+#define WF_SYNC		0x01		/* waker goes to sleep after wakeup */
-+#define WF_FORK		0x02		/* child wakeup after fork */
-+#define WF_MIGRATED	0x04		/* internal use, task got migrated */
-+
-+/* task_struct::on_rq states: */
-+#define TASK_ON_RQ_QUEUED	1
-+#define TASK_ON_RQ_MIGRATING	2
-+
-+struct rq;
-+
-+#ifdef CONFIG_SMP
-+
-+static inline bool sched_asym_prefer(int a, int b)
-+{
-+	return arch_asym_cpu_priority(a) > arch_asym_cpu_priority(b);
-+}
-+
-+struct perf_domain {
-+	struct em_perf_domain *em_pd;
-+	struct perf_domain *next;
-+	struct rcu_head rcu;
-+};
-+
-+/* Scheduling group status flags */
-+#define SG_OVERLOAD		0x1 /* More than one runnable task on a CPU. */
-+#define SG_OVERUTILIZED		0x2 /* One or more CPUs are over-utilized. */
-+
-+/*
-+ * We add the notion of a root-domain which will be used to define per-domain
-+ * variables. Each exclusive cpuset essentially defines an island domain by
-+ * fully partitioning the member cpus from any other cpuset. Whenever a new
-+ * exclusive cpuset is created, we also create and attach a new root-domain
-+ * object.
-+ *
-+ */
-+struct root_domain {
-+	atomic_t refcount;
-+	atomic_t rto_count;
-+	struct rcu_head rcu;
-+	cpumask_var_t span;
-+	cpumask_var_t online;
-+
-+	/*
-+	 * Indicate pullable load on at least one CPU, e.g:
-+	 * - More than one runnable task
-+	 * - Running task is misfit
-+	 */
-+	int			overload;
-+
-+	/* Indicate one or more cpus over-utilized (tipping point) */
-+	int			overutilized;
-+
-+	/*
-+	 * The bit corresponding to a CPU gets set here if such CPU has more
-+	 * than one runnable -deadline task (as it is below for RT tasks).
-+	 */
-+	cpumask_var_t dlo_mask;
-+	atomic_t dlo_count;
-+	/* Replace unused CFS structures with void */
-+	//struct dl_bw dl_bw;
-+	//struct cpudl cpudl;
-+	void *dl_bw;
-+	void *cpudl;
-+
-+	/*
-+	 * The "RT overload" flag: it gets set if a CPU has more than
-+	 * one runnable RT task.
-+	 */
-+	cpumask_var_t rto_mask;
-+	//struct cpupri cpupri;
-+	void *cpupri;
-+
-+	unsigned long max_cpu_capacity;
-+
-+	/*
-+	 * NULL-terminated list of performance domains intersecting with the
-+	 * CPUs of the rd. Protected by RCU.
-+	 */
-+	struct perf_domain	*pd;
-+};
-+
-+extern void init_defrootdomain(void);
-+extern int sched_init_domains(const struct cpumask *cpu_map);
-+extern void rq_attach_root(struct rq *rq, struct root_domain *rd);
-+
-+static inline void cpupri_cleanup(void __maybe_unused *cpupri)
-+{
-+}
-+
-+static inline void cpudl_cleanup(void __maybe_unused *cpudl)
-+{
-+}
-+
-+static inline void init_dl_bw(void __maybe_unused *dl_bw)
-+{
-+}
-+
-+static inline int cpudl_init(void __maybe_unused *dl_bw)
-+{
-+	return 0;
-+}
-+
-+static inline int cpupri_init(void __maybe_unused *cpupri)
-+{
-+	return 0;
-+}
-+#endif /* CONFIG_SMP */
-+
-+/*
-+ * This is the main, per-CPU runqueue data structure.
-+ * This data should only be modified by the local cpu.
-+ */
-+struct rq {
-+	raw_spinlock_t *lock;
-+	raw_spinlock_t *orig_lock;
-+
-+	struct task_struct __rcu	*curr;
-+	struct task_struct	*idle;
-+	struct task_struct	*stop;
-+	struct mm_struct *prev_mm;
-+
-+	unsigned int nr_running;
-+	/*
-+	 * This is part of a global counter where only the total sum
-+	 * over all CPUs matters. A task can increase this counter on
-+	 * one CPU and if it got migrated afterwards it may decrease
-+	 * it on another CPU. Always updated under the runqueue lock:
-+	 */
-+	unsigned long nr_uninterruptible;
-+	u64 nr_switches;
-+
-+	/* Stored data about rq->curr to work outside rq lock */
-+	u64 rq_deadline;
-+	int rq_prio;
-+
-+	/* Best queued id for use outside lock */
-+	u64 best_key;
-+
-+	unsigned long last_scheduler_tick; /* Last jiffy this RQ ticked */
-+	unsigned long last_jiffy; /* Last jiffy this RQ updated rq clock */
-+	u64 niffies; /* Last time this RQ updated rq clock */
-+	u64 last_niffy; /* Last niffies as updated by local clock */
-+	u64 last_jiffy_niffies; /* Niffies @ last_jiffy */
-+
-+	u64 load_update; /* When we last updated load */
-+	unsigned long load_avg; /* Rolling load average */
-+#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
-+	u64 irq_load_update; /* When we last updated IRQ load */
-+	unsigned long irq_load_avg; /* Rolling IRQ load average */
-+#endif
-+#ifdef CONFIG_SMT_NICE
-+	struct mm_struct *rq_mm;
-+	int rq_smt_bias; /* Policy/nice level bias across smt siblings */
-+#endif
-+	/* Accurate timekeeping data */
-+	unsigned long user_ns, nice_ns, irq_ns, softirq_ns, system_ns,
-+		iowait_ns, idle_ns;
-+	atomic_t nr_iowait;
-+
-+#ifdef CONFIG_MEMBARRIER
-+	int membarrier_state;
-+#endif
-+
-+	skiplist_node *node;
-+	skiplist *sl;
-+#ifdef CONFIG_SMP
-+	struct task_struct *preempt; /* Preempt triggered on this task */
-+	struct task_struct *preempting; /* Hint only, what task is preempting */
-+
-+	int cpu;		/* cpu of this runqueue */
-+	bool online;
-+
-+	struct root_domain *rd;
-+	struct sched_domain *sd;
-+
-+	unsigned long cpu_capacity_orig;
-+
-+	int *cpu_locality; /* CPU relative cache distance */
-+	struct rq **rq_order; /* Shared RQs ordered by relative cache distance */
-+	struct rq **cpu_order; /* RQs of discrete CPUs ordered by distance */
-+
-+	bool is_leader;
-+	struct rq *smp_leader; /* First physical CPU per node */
-+#ifdef CONFIG_SCHED_THERMAL_PRESSURE
-+	struct sched_avg	avg_thermal;
-+#endif /* CONFIG_SCHED_THERMAL_PRESSURE */
-+#ifdef CONFIG_SCHED_SMT
-+	struct rq *smt_leader; /* First logical CPU in SMT siblings */
-+	cpumask_t thread_mask;
-+	bool (*siblings_idle)(struct rq *rq);
-+	/* See if all smt siblings are idle */
-+#endif /* CONFIG_SCHED_SMT */
-+#ifdef CONFIG_SCHED_MC
-+	struct rq *mc_leader; /* First logical CPU in MC siblings */
-+	cpumask_t core_mask;
-+	bool (*cache_idle)(struct rq *rq);
-+	/* See if all cache siblings are idle */
-+#endif /* CONFIG_SCHED_MC */
-+#endif /* CONFIG_SMP */
-+
-+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-+	u64 prev_irq_time;
-+#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
-+#ifdef CONFIG_PARAVIRT
-+	u64 prev_steal_time;
-+#endif /* CONFIG_PARAVIRT */
-+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
-+	u64 prev_steal_time_rq;
-+#endif /* CONFIG_PARAVIRT_TIME_ACCOUNTING */
-+
-+	u64 clock, old_clock, last_tick;
-+	/* Ensure that all clocks are in the same cache line */
-+	u64 clock_task ____cacheline_aligned;
-+	int dither;
-+
-+	int iso_ticks;
-+	bool iso_refractory;
-+
-+#ifdef CONFIG_HIGH_RES_TIMERS
-+	struct hrtimer hrexpiry_timer;
-+#endif
-+
-+	int rt_nr_running; /* Number real time tasks running */
-+#ifdef CONFIG_SCHEDSTATS
-+
-+	/* latency stats */
-+	struct sched_info rq_sched_info;
-+	unsigned long long rq_cpu_time;
-+	/* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
-+
-+	/* sys_sched_yield() stats */
-+	unsigned int yld_count;
-+
-+	/* schedule() stats */
-+	unsigned int sched_switch;
-+	unsigned int sched_count;
-+	unsigned int sched_goidle;
-+
-+	/* try_to_wake_up() stats */
-+	unsigned int ttwu_count;
-+	unsigned int ttwu_local;
-+#endif /* CONFIG_SCHEDSTATS */
-+
-+#ifdef CONFIG_SMP
-+	struct llist_head wake_list;
-+#endif
-+
-+#ifdef CONFIG_CPU_IDLE
-+	/* Must be inspected within a rcu lock section */
-+	struct cpuidle_state *idle_state;
-+#endif
-+};
-+
-+static inline u64 __rq_clock_broken(struct rq *rq)
-+{
-+	return READ_ONCE(rq->clock);
-+}
-+
-+static inline u64 rq_clock(struct rq *rq)
-+{
-+	lockdep_assert_held(rq->lock);
-+
-+	return rq->clock;
-+}
-+
-+static inline u64 rq_clock_task(struct rq *rq)
-+{
-+	lockdep_assert_held(rq->lock);
-+
-+	return rq->clock_task;
-+}
-+
-+/**
-+ * By default the decay is the default pelt decay period.
-+ * The decay shift can change the decay period in
-+ * multiples of 32.
-+ *  Decay shift		Decay period(ms)
-+ *	0			32
-+ *	1			64
-+ *	2			128
-+ *	3			256
-+ *	4			512
-+ */
-+extern int sched_thermal_decay_shift;
-+
-+static inline u64 rq_clock_thermal(struct rq *rq)
-+{
-+	return rq_clock_task(rq) >> sched_thermal_decay_shift;
-+}
-+
-+struct rq_flags {
-+	unsigned long flags;
-+};
-+
-+#ifdef CONFIG_SMP
-+struct rq *cpu_rq(int cpu);
-+#endif
-+
-+#ifndef CONFIG_SMP
-+extern struct rq *uprq;
-+#define cpu_rq(cpu)	(uprq)
-+#define this_rq()	(uprq)
-+#define raw_rq()	(uprq)
-+#define task_rq(p)	(uprq)
-+#define cpu_curr(cpu)	((uprq)->curr)
-+#else /* CONFIG_SMP */
-+DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
-+#define this_rq()		this_cpu_ptr(&runqueues)
-+#define raw_rq()		raw_cpu_ptr(&runqueues)
-+#define task_rq(p)		cpu_rq(task_cpu(p))
-+#endif /* CONFIG_SMP */
-+
-+static inline int task_current(struct rq *rq, struct task_struct *p)
-+{
-+	return rq->curr == p;
-+}
-+
-+static inline int task_running(struct rq *rq, struct task_struct *p)
-+{
-+#ifdef CONFIG_SMP
-+	return p->on_cpu;
-+#else
-+	return task_current(rq, p);
-+#endif
-+}
-+
-+static inline int task_on_rq_queued(struct task_struct *p)
-+{
-+	return p->on_rq == TASK_ON_RQ_QUEUED;
-+}
-+
-+static inline int task_on_rq_migrating(struct task_struct *p)
-+{
-+	return READ_ONCE(p->on_rq) == TASK_ON_RQ_MIGRATING;
-+}
-+
-+static inline void rq_lock(struct rq *rq)
-+	__acquires(rq->lock)
-+{
-+	raw_spin_lock(rq->lock);
-+}
-+
-+static inline void rq_unlock(struct rq *rq)
-+	__releases(rq->lock)
-+{
-+	raw_spin_unlock(rq->lock);
-+}
-+
-+static inline void rq_lock_irq(struct rq *rq)
-+	__acquires(rq->lock)
-+{
-+	raw_spin_lock_irq(rq->lock);
-+}
-+
-+static inline void rq_unlock_irq(struct rq *rq, struct rq_flags __always_unused *rf)
-+	__releases(rq->lock)
-+{
-+	raw_spin_unlock_irq(rq->lock);
-+}
-+
-+static inline void rq_lock_irqsave(struct rq *rq, struct rq_flags *rf)
-+	__acquires(rq->lock)
-+{
-+	raw_spin_lock_irqsave(rq->lock, rf->flags);
-+}
-+
-+static inline void rq_unlock_irqrestore(struct rq *rq, struct rq_flags *rf)
-+	__releases(rq->lock)
-+{
-+	raw_spin_unlock_irqrestore(rq->lock, rf->flags);
-+}
-+
-+static inline struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
-+	__acquires(p->pi_lock)
-+	__acquires(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	while (42) {
-+		raw_spin_lock_irqsave(&p->pi_lock, rf->flags);
-+		rq = task_rq(p);
-+		raw_spin_lock(rq->lock);
-+		if (likely(rq == task_rq(p)))
-+			break;
-+		raw_spin_unlock(rq->lock);
-+		raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
-+	}
-+	return rq;
-+}
-+
-+static inline void task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
-+	__releases(rq->lock)
-+	__releases(p->pi_lock)
-+{
-+	rq_unlock(rq);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
-+}
-+
-+static inline struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags __always_unused *rf)
-+	__acquires(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	lockdep_assert_held(&p->pi_lock);
-+
-+	while (42) {
-+		rq = task_rq(p);
-+		raw_spin_lock(rq->lock);
-+		if (likely(rq == task_rq(p)))
-+			break;
-+		raw_spin_unlock(rq->lock);
-+	}
-+	return rq;
-+}
-+
-+static inline void __task_rq_unlock(struct rq *rq, struct rq_flags __always_unused *rf)
-+{
-+	rq_unlock(rq);
-+}
-+
-+static inline struct rq *
-+this_rq_lock_irq(struct rq_flags *rf)
-+	__acquires(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	local_irq_disable();
-+	rq = this_rq();
-+	rq_lock(rq);
-+	return rq;
-+}
-+
-+/*
-+ * {de,en}queue flags: Most not used on MuQSS.
-+ *
-+ * DEQUEUE_SLEEP  - task is no longer runnable
-+ * ENQUEUE_WAKEUP - task just became runnable
-+ *
-+ * SAVE/RESTORE - an otherwise spurious dequeue/enqueue, done to ensure tasks
-+ *                are in a known state which allows modification. Such pairs
-+ *                should preserve as much state as possible.
-+ *
-+ * MOVE - paired with SAVE/RESTORE, explicitly does not preserve the location
-+ *        in the runqueue.
-+ *
-+ * ENQUEUE_HEAD      - place at front of runqueue (tail if not specified)
-+ * ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline)
-+ * ENQUEUE_MIGRATED  - the task was migrated during wakeup
-+ *
-+ */
-+
-+#define DEQUEUE_SLEEP		0x01
-+#define DEQUEUE_SAVE		0x02 /* matches ENQUEUE_RESTORE */
-+
-+#define ENQUEUE_WAKEUP		0x01
-+#define ENQUEUE_RESTORE		0x02
-+
-+#ifdef CONFIG_SMP
-+#define ENQUEUE_MIGRATED	0x40
-+#else
-+#define ENQUEUE_MIGRATED	0x00
-+#endif
-+
-+#ifdef CONFIG_NUMA
-+enum numa_topology_type {
-+	NUMA_DIRECT,
-+	NUMA_GLUELESS_MESH,
-+	NUMA_BACKPLANE,
-+};
-+extern enum numa_topology_type sched_numa_topology_type;
-+extern int sched_max_numa_distance;
-+extern bool find_numa_distance(int distance);
-+extern void sched_init_numa(void);
-+extern void sched_domains_numa_masks_set(unsigned int cpu);
-+extern void sched_domains_numa_masks_clear(unsigned int cpu);
-+extern int sched_numa_find_closest(const struct cpumask *cpus, int cpu);
-+#else
-+static inline void sched_init_numa(void) { }
-+static inline void sched_domains_numa_masks_set(unsigned int cpu) { }
-+static inline void sched_domains_numa_masks_clear(unsigned int cpu) { }
-+static inline int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
-+{
-+	return nr_cpu_ids;
-+}
-+#endif
-+
-+extern struct mutex sched_domains_mutex;
-+extern struct static_key_false sched_schedstats;
-+
-+#define rcu_dereference_check_sched_domain(p) \
-+	rcu_dereference_check((p), \
-+			      lockdep_is_held(&sched_domains_mutex))
-+
-+#ifdef CONFIG_SMP
-+
-+/*
-+ * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
-+ * See destroy_sched_domains: call_rcu for details.
-+ *
-+ * The domain tree of any CPU may only be accessed from within
-+ * preempt-disabled sections.
-+ */
-+#define for_each_domain(cpu, __sd) \
-+	for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); \
-+			__sd; __sd = __sd->parent)
-+
-+/**
-+ * highest_flag_domain - Return highest sched_domain containing flag.
-+ * @cpu:	The cpu whose highest level of sched domain is to
-+ *		be returned.
-+ * @flag:	The flag to check for the highest sched_domain
-+ *		for the given cpu.
-+ *
-+ * Returns the highest sched_domain of a cpu which contains the given flag.
-+ */
-+static inline struct sched_domain *highest_flag_domain(int cpu, int flag)
-+{
-+	struct sched_domain *sd, *hsd = NULL;
-+
-+	for_each_domain(cpu, sd) {
-+		if (!(sd->flags & flag))
-+			break;
-+		hsd = sd;
-+	}
-+
-+	return hsd;
-+}
-+
-+static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
-+{
-+	struct sched_domain *sd;
-+
-+	for_each_domain(cpu, sd) {
-+		if (sd->flags & flag)
-+			break;
-+	}
-+
-+	return sd;
-+}
-+
-+DECLARE_PER_CPU(struct sched_domain *, sd_llc);
-+DECLARE_PER_CPU(int, sd_llc_size);
-+DECLARE_PER_CPU(int, sd_llc_id);
-+DECLARE_PER_CPU(struct sched_domain_shared *, sd_llc_shared);
-+DECLARE_PER_CPU(struct sched_domain *, sd_numa);
-+DECLARE_PER_CPU(struct sched_domain *, sd_asym_packing);
-+DECLARE_PER_CPU(struct sched_domain *, sd_asym_cpucapacity);
-+
-+struct sched_group_capacity {
-+	atomic_t ref;
-+	/*
-+	 * CPU capacity of this group, SCHED_CAPACITY_SCALE being max capacity
-+	 * for a single CPU.
-+	 */
-+	unsigned long		capacity;
-+	unsigned long		min_capacity;		/* Min per-CPU capacity in group */
-+	unsigned long		max_capacity;		/* Max per-CPU capacity in group */
-+	unsigned long		next_update;
-+	int			imbalance;		/* XXX unrelated to capacity but shared group state */
-+
-+#ifdef CONFIG_SCHED_DEBUG
-+	int id;
-+#endif
-+
-+	unsigned long cpumask[0]; /* balance mask */
-+};
-+
-+struct sched_group {
-+	struct sched_group *next;	/* Must be a circular list */
-+	atomic_t ref;
-+
-+	unsigned int group_weight;
-+	struct sched_group_capacity *sgc;
-+	int asym_prefer_cpu;		/* cpu of highest priority in group */
-+
-+	/*
-+	 * The CPUs this group covers.
-+	 *
-+	 * NOTE: this field is variable length. (Allocated dynamically
-+	 * by attaching extra space to the end of the structure,
-+	 * depending on how many CPUs the kernel has booted up with)
-+	 */
-+	unsigned long cpumask[0];
-+};
-+
-+static inline struct cpumask *sched_group_span(struct sched_group *sg)
-+{
-+	return to_cpumask(sg->cpumask);
-+}
-+
-+/*
-+ * See build_balance_mask().
-+ */
-+static inline struct cpumask *group_balance_mask(struct sched_group *sg)
-+{
-+	return to_cpumask(sg->sgc->cpumask);
-+}
-+
-+/**
-+ * group_first_cpu - Returns the first cpu in the cpumask of a sched_group.
-+ * @group: The group whose first cpu is to be returned.
-+ */
-+static inline unsigned int group_first_cpu(struct sched_group *group)
-+{
-+	return cpumask_first(sched_group_span(group));
-+}
-+
-+
-+#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
-+void register_sched_domain_sysctl(void);
-+void dirty_sched_domain_sysctl(int cpu);
-+void unregister_sched_domain_sysctl(void);
-+#else
-+static inline void register_sched_domain_sysctl(void)
-+{
-+}
-+static inline void dirty_sched_domain_sysctl(int cpu)
-+{
-+}
-+static inline void unregister_sched_domain_sysctl(void)
-+{
-+}
-+#endif
-+
-+extern void sched_ttwu_pending(void);
-+extern void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask);
-+extern void set_rq_online (struct rq *rq);
-+extern void set_rq_offline(struct rq *rq);
-+extern bool sched_smp_initialized;
-+
-+static inline void update_group_capacity(struct sched_domain *sd, int cpu)
-+{
-+}
-+
-+static inline void trigger_load_balance(struct rq *rq)
-+{
-+}
-+
-+#define sched_feat(x) 0
-+
-+#else /* CONFIG_SMP */
-+
-+static inline void sched_ttwu_pending(void) { }
-+
-+#endif /* CONFIG_SMP */
-+
-+#ifdef CONFIG_CPU_IDLE
-+static inline void idle_set_state(struct rq *rq,
-+				  struct cpuidle_state *idle_state)
-+{
-+	rq->idle_state = idle_state;
-+}
-+
-+static inline struct cpuidle_state *idle_get_state(struct rq *rq)
-+{
-+	SCHED_WARN_ON(!rcu_read_lock_held());
-+	return rq->idle_state;
-+}
-+#else
-+static inline void idle_set_state(struct rq *rq,
-+				  struct cpuidle_state *idle_state)
-+{
-+}
-+
-+static inline struct cpuidle_state *idle_get_state(struct rq *rq)
-+{
-+	return NULL;
-+}
-+#endif
-+
-+#ifdef CONFIG_SCHED_DEBUG
-+extern bool sched_debug_enabled;
-+#endif
-+
-+extern void schedule_idle(void);
-+
-+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-+struct irqtime {
-+	u64			total;
-+	u64			tick_delta;
-+	u64			irq_start_time;
-+	struct u64_stats_sync	sync;
-+};
-+
-+DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
-+
-+/*
-+ * Returns the irqtime minus the softirq time computed by ksoftirqd.
-+ * Otherwise ksoftirqd's sum_exec_runtime is substracted its own runtime
-+ * and never move forward.
-+ */
-+static inline u64 irq_time_read(int cpu)
-+{
-+	struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu);
-+	unsigned int seq;
-+	u64 total;
-+
-+	do {
-+		seq = __u64_stats_fetch_begin(&irqtime->sync);
-+		total = irqtime->total;
-+	} while (__u64_stats_fetch_retry(&irqtime->sync, seq));
-+
-+	return total;
-+}
-+#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
-+
-+static inline bool sched_stop_runnable(struct rq *rq)
-+{
-+	return rq->stop && task_on_rq_queued(rq->stop);
-+}
-+
-+#ifdef CONFIG_SMP
-+static inline int cpu_of(struct rq *rq)
-+{
-+	return rq->cpu;
-+}
-+#else /* CONFIG_SMP */
-+static inline int cpu_of(struct rq *rq)
-+{
-+	return 0;
-+}
-+#endif
-+
-+#ifdef CONFIG_CPU_FREQ
-+DECLARE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
-+
-+static inline void cpufreq_trigger(struct rq *rq, unsigned int flags)
-+{
-+	struct update_util_data *data;
-+
-+	data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data,
-+						  cpu_of(rq)));
-+
-+	if (data)
-+		data->func(data, rq->niffies, flags);
-+}
-+#else
-+static inline void cpufreq_trigger(struct rq *rq, unsigned int flag)
-+{
-+}
-+#endif /* CONFIG_CPU_FREQ */
-+
-+static __always_inline
-+unsigned int uclamp_rq_util_with(struct rq __maybe_unused *rq, unsigned int util,
-+			      struct task_struct __maybe_unused *p)
-+{
-+	return util;
-+}
-+
-+static inline bool uclamp_is_used(void)
-+{
-+	return false;
-+}
-+
-+#ifndef arch_scale_freq_tick
-+static __always_inline
-+void arch_scale_freq_tick(void)
-+{
-+}
-+#endif
-+
-+#ifdef arch_scale_freq_capacity
-+#ifndef arch_scale_freq_invariant
-+#define arch_scale_freq_invariant()	(true)
-+#endif
-+#else /* arch_scale_freq_capacity */
-+#define arch_scale_freq_invariant()	(false)
-+#endif
-+
-+#ifdef CONFIG_64BIT
-+static inline u64 read_sum_exec_runtime(struct task_struct *t)
-+{
-+	return tsk_seruntime(t);
-+}
-+#else
-+static inline u64 read_sum_exec_runtime(struct task_struct *t)
-+{
-+	struct rq_flags rf;
-+	u64 ns;
-+	struct rq *rq;
-+
-+	rq = task_rq_lock(t, &rf);
-+	ns = tsk_seruntime(t);
-+	task_rq_unlock(rq, t, &rf);
-+
-+	return ns;
-+}
-+#endif
-+
-+#ifndef arch_scale_freq_capacity
-+static __always_inline
-+unsigned long arch_scale_freq_capacity(int cpu)
-+{
-+	return SCHED_CAPACITY_SCALE;
-+}
-+#endif
-+
-+#ifdef CONFIG_NO_HZ_FULL
-+extern bool sched_can_stop_tick(struct rq *rq);
-+extern int __init sched_tick_offload_init(void);
-+
-+/*
-+ * Tick may be needed by tasks in the runqueue depending on their policy and
-+ * requirements. If tick is needed, lets send the target an IPI to kick it out of
-+ * nohz mode if necessary.
-+ */
-+static inline void sched_update_tick_dependency(struct rq *rq)
-+{
-+	int cpu;
-+
-+	if (!tick_nohz_full_enabled())
-+		return;
-+
-+	cpu = cpu_of(rq);
-+
-+	if (!tick_nohz_full_cpu(cpu))
-+		return;
-+
-+	if (sched_can_stop_tick(rq))
-+		tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED);
-+	else
-+		tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
-+}
-+#else
-+static inline int sched_tick_offload_init(void) { return 0; }
-+static inline void sched_update_tick_dependency(struct rq *rq) { }
-+#endif
-+
-+#define SCHED_FLAG_SUGOV	0x10000000
-+
-+static inline bool rt_rq_is_runnable(struct rq *rt_rq)
-+{
-+	return rt_rq->rt_nr_running;
-+}
-+
-+/**
-+ * enum schedutil_type - CPU utilization type
-+ * @FREQUENCY_UTIL:	Utilization used to select frequency
-+ * @ENERGY_UTIL:	Utilization used during energy calculation
-+ *
-+ * The utilization signals of all scheduling classes (CFS/RT/DL) and IRQ time
-+ * need to be aggregated differently depending on the usage made of them. This
-+ * enum is used within schedutil_freq_util() to differentiate the types of
-+ * utilization expected by the callers, and adjust the aggregation accordingly.
-+ */
-+enum schedutil_type {
-+	FREQUENCY_UTIL,
-+	ENERGY_UTIL,
-+};
-+
-+#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
-+
-+unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs,
-+				 unsigned long max, enum schedutil_type type,
-+				 struct task_struct *p);
-+
-+static inline unsigned long cpu_bw_dl(struct rq *rq)
-+{
-+	return 0;
-+}
-+
-+static inline unsigned long cpu_util_dl(struct rq *rq)
-+{
-+	return 0;
-+}
-+
-+static inline unsigned long cpu_util_cfs(struct rq *rq)
-+{
-+	unsigned long ret = READ_ONCE(rq->load_avg);
-+
-+	if (ret > SCHED_CAPACITY_SCALE)
-+		ret = SCHED_CAPACITY_SCALE;
-+	return ret;
-+}
-+
-+static inline unsigned long cpu_util_rt(struct rq *rq)
-+{
-+	unsigned long ret = READ_ONCE(rq->rt_nr_running);
-+
-+	if (ret > SCHED_CAPACITY_SCALE)
-+		ret = SCHED_CAPACITY_SCALE;
-+	return ret;
-+}
-+
-+#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
-+static inline unsigned long cpu_util_irq(struct rq *rq)
-+{
-+	unsigned long ret = READ_ONCE(rq->irq_load_avg);
-+
-+	if (ret > SCHED_CAPACITY_SCALE)
-+		ret = SCHED_CAPACITY_SCALE;
-+	return ret;
-+}
-+
-+static inline
-+unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned long max)
-+{
-+	util *= (max - irq);
-+	util /= max;
-+
-+	return util;
-+
-+}
-+#else
-+static inline unsigned long cpu_util_irq(struct rq *rq)
-+{
-+	return 0;
-+}
-+
-+static inline
-+unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned long max)
-+{
-+	return util;
-+}
-+#endif
-+#endif
-+
-+#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
-+#define perf_domain_span(pd) (to_cpumask(((pd)->em_pd->cpus)))
-+
-+DECLARE_STATIC_KEY_FALSE(sched_energy_present);
-+
-+static inline bool sched_energy_enabled(void)
-+{
-+	return static_branch_unlikely(&sched_energy_present);
-+}
-+
-+#else /* ! (CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL) */
-+
-+#define perf_domain_span(pd) NULL
-+static inline bool sched_energy_enabled(void) { return false; }
-+
-+#endif /* CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL */
-+
-+#ifdef CONFIG_MEMBARRIER
-+/*
-+ * The scheduler provides memory barriers required by membarrier between:
-+ * - prior user-space memory accesses and store to rq->membarrier_state,
-+ * - store to rq->membarrier_state and following user-space memory accesses.
-+ * In the same way it provides those guarantees around store to rq->curr.
-+ */
-+static inline void membarrier_switch_mm(struct rq *rq,
-+					struct mm_struct *prev_mm,
-+					struct mm_struct *next_mm)
-+{
-+	int membarrier_state;
-+
-+	if (prev_mm == next_mm)
-+		return;
-+
-+	membarrier_state = atomic_read(&next_mm->membarrier_state);
-+	if (READ_ONCE(rq->membarrier_state) == membarrier_state)
-+		return;
-+
-+	WRITE_ONCE(rq->membarrier_state, membarrier_state);
-+}
-+#else
-+static inline void membarrier_switch_mm(struct rq *rq,
-+					struct mm_struct *prev_mm,
-+					struct mm_struct *next_mm)
-+{
-+}
-+#endif
-+
-+#ifdef CONFIG_SMP
-+static inline bool is_per_cpu_kthread(struct task_struct *p)
-+{
-+	if (!(p->flags & PF_KTHREAD))
-+		return false;
-+
-+	if (p->nr_cpus_allowed != 1)
-+		return false;
-+
-+	return true;
-+}
-+#endif
-+
-+void swake_up_all_locked(struct swait_queue_head *q);
-+void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
-+
-+/* pelt.h compat CONFIG_SCHED_THERMAL_PRESSURE impossible with MUQSS */
-+static inline int
-+update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity)
-+{
-+	return 0;
-+}
-+
-+static inline u64 thermal_load_avg(struct rq *rq)
-+{
-+	return 0;
-+}
-+
-+#endif /* MUQSS_SCHED_H */
-diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
-index 7fbaee24c824..15d274af9b1c 100644
---- a/kernel/sched/cpufreq_schedutil.c
-+++ b/kernel/sched/cpufreq_schedutil.c
-@@ -183,6 +183,12 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy,
- 	return cpufreq_driver_resolve_freq(policy, freq);
- }
- 
-+#ifdef CONFIG_SCHED_MUQSS
-+#define rt_rq_runnable(rq_rt) rt_rq_is_runnable(rq)
-+#else
-+#define rt_rq_runnable(rq_rt) rt_rq_is_runnable(&rq->rt)
-+#endif
-+
- /*
-  * This function computes an effective utilization for the given CPU, to be
-  * used for frequency selection given the linear relation: f = u * f_max.
-@@ -211,7 +217,7 @@ unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs,
- 	struct rq *rq = cpu_rq(cpu);
- 
- 	if (!uclamp_is_used() &&
--	    type == FREQUENCY_UTIL && rt_rq_is_runnable(&rq->rt)) {
-+	    type == FREQUENCY_UTIL && rt_rq_runnable(rq)) {
- 		return max;
- 	}
- 
-@@ -656,7 +662,11 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy)
- 	struct task_struct *thread;
- 	struct sched_attr attr = {
- 		.size		= sizeof(struct sched_attr),
-+#ifdef CONFIG_SCHED_MUQSS
-+		.sched_policy	= SCHED_RR,
-+#else
- 		.sched_policy	= SCHED_DEADLINE,
-+#endif
- 		.sched_flags	= SCHED_FLAG_SUGOV,
- 		.sched_nice	= 0,
- 		.sched_priority	= 0,
-diff --git a/kernel/sched/cpupri.h b/kernel/sched/cpupri.h
-index efbb492bb94c..f0288c32ab17 100644
---- a/kernel/sched/cpupri.h
-+++ b/kernel/sched/cpupri.h
-@@ -17,6 +17,7 @@ struct cpupri {
- 	int			*cpu_to_pri;
- };
- 
-+#ifndef CONFIG_SCHED_MUQSS
- #ifdef CONFIG_SMP
- int  cpupri_find(struct cpupri *cp, struct task_struct *p,
- 		 struct cpumask *lowest_mask);
-@@ -27,3 +28,4 @@ void cpupri_set(struct cpupri *cp, int cpu, int pri);
- int  cpupri_init(struct cpupri *cp);
- void cpupri_cleanup(struct cpupri *cp);
- #endif
-+#endif
-diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
-index ff9435dee1df..d7bd67204d65 100644
---- a/kernel/sched/cputime.c
-+++ b/kernel/sched/cputime.c
-@@ -266,26 +266,6 @@ static inline u64 account_other_time(u64 max)
- 	return accounted;
- }
- 
--#ifdef CONFIG_64BIT
--static inline u64 read_sum_exec_runtime(struct task_struct *t)
--{
--	return t->se.sum_exec_runtime;
--}
--#else
--static u64 read_sum_exec_runtime(struct task_struct *t)
--{
--	u64 ns;
--	struct rq_flags rf;
--	struct rq *rq;
--
--	rq = task_rq_lock(t, &rf);
--	ns = t->se.sum_exec_runtime;
--	task_rq_unlock(rq, t, &rf);
--
--	return ns;
--}
--#endif
--
- /*
-  * Accumulate raw cputime values of dead tasks (sig->[us]time) and live
-  * tasks (sum on group iteration) belonging to @tsk's group.
-@@ -658,7 +638,7 @@ void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev,
- void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
- {
- 	struct task_cputime cputime = {
--		.sum_exec_runtime = p->se.sum_exec_runtime,
-+		.sum_exec_runtime = tsk_seruntime(p),
- 	};
- 
- 	task_cputime(p, &cputime.utime, &cputime.stime);
-diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
-index b743bf38f08f..c769795d726b 100644
---- a/kernel/sched/idle.c
-+++ b/kernel/sched/idle.c
-@@ -361,6 +361,7 @@ void cpu_startup_entry(enum cpuhp_state state)
- 		do_idle();
- }
- 
-+#ifndef CONFIG_SCHED_MUQSS
- /*
-  * idle-task scheduling class.
-  */
-@@ -481,3 +482,4 @@ const struct sched_class idle_sched_class = {
- 	.switched_to		= switched_to_idle,
- 	.update_curr		= update_curr_idle,
- };
-+#endif /* CONFIG_SCHED_MUQSS */
-diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
-index db3a57675ccf..1f11cefe8d20 100644
---- a/kernel/sched/sched.h
-+++ b/kernel/sched/sched.h
-@@ -2,6 +2,19 @@
- /*
-  * Scheduler internal types and methods:
-  */
-+#ifdef CONFIG_SCHED_MUQSS
-+#include "MuQSS.h"
-+
-+/* Begin compatibility wrappers for MuQSS/CFS differences */
-+#define rq_rt_nr_running(rq) ((rq)->rt_nr_running)
-+#define rq_h_nr_running(rq) ((rq)->nr_running)
-+
-+#else /* CONFIG_SCHED_MUQSS */
-+
-+#define rq_rt_nr_running(rq) ((rq)->rt.rt_nr_running)
-+#define rq_h_nr_running(rq) ((rq)->cfs.h_nr_running)
-+
-+
- #include <linux/sched.h>
- 
- #include <linux/sched/autogroup.h>
-@@ -2546,3 +2559,25 @@ static inline bool is_per_cpu_kthread(struct task_struct *p)
- 
- void swake_up_all_locked(struct swait_queue_head *q);
- void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
-+
-+/* MuQSS compatibility functions */
-+#ifdef CONFIG_64BIT
-+static inline u64 read_sum_exec_runtime(struct task_struct *t)
-+{
-+	return t->se.sum_exec_runtime;
-+}
-+#else
-+static inline u64 read_sum_exec_runtime(struct task_struct *t)
-+{
-+	u64 ns;
-+	struct rq_flags rf;
-+	struct rq *rq;
-+
-+	rq = task_rq_lock(t, &rf);
-+	ns = t->se.sum_exec_runtime;
-+	task_rq_unlock(rq, t, &rf);
-+
-+	return ns;
-+}
-+#endif
-+#endif /* CONFIG_SCHED_MUQSS */
-diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
-index 8344757bba6e..d819af35a770 100644
---- a/kernel/sched/topology.c
-+++ b/kernel/sched/topology.c
-@@ -450,7 +450,11 @@ void rq_attach_root(struct rq *rq, struct root_domain *rd)
- 	struct root_domain *old_rd = NULL;
- 	unsigned long flags;
- 
-+#ifdef CONFIG_SCHED_MUQSS
-+	raw_spin_lock_irqsave(rq->lock, flags);
-+#else
- 	raw_spin_lock_irqsave(&rq->lock, flags);
-+#endif
- 
- 	if (rq->rd) {
- 		old_rd = rq->rd;
-@@ -476,7 +480,11 @@ void rq_attach_root(struct rq *rq, struct root_domain *rd)
- 	if (cpumask_test_cpu(rq->cpu, cpu_active_mask))
- 		set_rq_online(rq);
- 
-+#ifdef CONFIG_SCHED_MUQSS
-+	raw_spin_unlock_irqrestore(rq->lock, flags);
-+#else
- 	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+#endif
- 
- 	if (old_rd)
- 		call_rcu(&old_rd->rcu, free_rootdomain);
-diff --git a/kernel/skip_list.c b/kernel/skip_list.c
-new file mode 100644
-index 000000000000..bf5c6e97e139
---- /dev/null
-+++ b/kernel/skip_list.c
-@@ -0,0 +1,148 @@
-+/*
-+  Copyright (C) 2011,2016 Con Kolivas.
-+
-+  Code based on example originally by William Pugh.
-+
-+Skip Lists are a probabilistic alternative to balanced trees, as
-+described in the June 1990 issue of CACM and were invented by
-+William Pugh in 1987.
-+
-+A couple of comments about this implementation:
-+The routine randomLevel has been hard-coded to generate random
-+levels using p=0.25. It can be easily changed.
-+
-+The insertion routine has been implemented so as to use the
-+dirty hack described in the CACM paper: if a random level is
-+generated that is more than the current maximum level, the
-+current maximum level plus one is used instead.
-+
-+Levels start at zero and go up to MaxLevel (which is equal to
-+MaxNumberOfLevels-1).
-+
-+The routines defined in this file are:
-+
-+init: defines slnode
-+
-+new_skiplist: returns a new, empty list
-+
-+randomLevel: Returns a random level based on a u64 random seed passed to it.
-+In MuQSS, the "niffy" time is used for this purpose.
-+
-+insert(l,key, value): inserts the binding (key, value) into l. This operation
-+occurs in O(log n) time.
-+
-+delnode(slnode, l, node): deletes any binding of key from the l based on the
-+actual node value. This operation occurs in O(k) time where k is the
-+number of levels of the node in question (max 8). The original delete
-+function occurred in O(log n) time and involved a search.
-+
-+MuQSS Notes: In this implementation of skiplists, there are bidirectional
-+next/prev pointers and the insert function returns a pointer to the actual
-+node the value is stored. The key here is chosen by the scheduler so as to
-+sort tasks according to the priority list requirements and is no longer used
-+by the scheduler after insertion. The scheduler lookup, however, occurs in
-+O(1) time because it is always the first item in the level 0 linked list.
-+Since the task struct stores a copy of the node pointer upon skiplist_insert,
-+it can also remove it much faster than the original implementation with the
-+aid of prev<->next pointer manipulation and no searching.
-+
-+*/
-+
-+#include <linux/slab.h>
-+#include <linux/skip_list.h>
-+
-+#define MaxNumberOfLevels 8
-+#define MaxLevel (MaxNumberOfLevels - 1)
-+
-+void skiplist_init(skiplist_node *slnode)
-+{
-+	int i;
-+
-+	slnode->key = 0xFFFFFFFFFFFFFFFF;
-+	slnode->level = 0;
-+	slnode->value = NULL;
-+	for (i = 0; i < MaxNumberOfLevels; i++)
-+		slnode->next[i] = slnode->prev[i] = slnode;
-+}
-+
-+skiplist *new_skiplist(skiplist_node *slnode)
-+{
-+	skiplist *l = kzalloc(sizeof(skiplist), GFP_ATOMIC);
-+
-+	BUG_ON(!l);
-+	l->header = slnode;
-+	return l;
-+}
-+
-+void free_skiplist(skiplist *l)
-+{
-+	skiplist_node *p, *q;
-+
-+	p = l->header;
-+	do {
-+		q = p->next[0];
-+		p->next[0]->prev[0] = q->prev[0];
-+		skiplist_node_init(p);
-+		p = q;
-+	} while (p != l->header);
-+	kfree(l);
-+}
-+
-+void skiplist_node_init(skiplist_node *node)
-+{
-+	memset(node, 0, sizeof(skiplist_node));
-+}
-+
-+static inline unsigned int randomLevel(const long unsigned int randseed)
-+{
-+	return find_first_bit(&randseed, MaxLevel) / 2;
-+}
-+
-+void skiplist_insert(skiplist *l, skiplist_node *node, keyType key, valueType value, unsigned int randseed)
-+{
-+	skiplist_node *update[MaxNumberOfLevels];
-+	skiplist_node *p, *q;
-+	int k = l->level;
-+
-+	p = l->header;
-+	do {
-+		while (q = p->next[k], q->key <= key)
-+			p = q;
-+		update[k] = p;
-+	} while (--k >= 0);
-+
-+	++l->entries;
-+	k = randomLevel(randseed);
-+	if (k > l->level) {
-+		k = ++l->level;
-+		update[k] = l->header;
-+	}
-+
-+	node->level = k;
-+	node->key = key;
-+	node->value = value;
-+	do {
-+		p = update[k];
-+		node->next[k] = p->next[k];
-+		p->next[k] = node;
-+		node->prev[k] = p;
-+		node->next[k]->prev[k] = node;
-+	} while (--k >= 0);
-+}
-+
-+void skiplist_delete(skiplist *l, skiplist_node *node)
-+{
-+	int k, m = node->level;
-+
-+	for (k = 0; k <= m; k++) {
-+		node->prev[k]->next[k] = node->next[k];
-+		node->next[k]->prev[k] = node->prev[k];
-+	}
-+	skiplist_node_init(node);
-+	if (m == l->level) {
-+		while (l->header->next[m] == l->header && l->header->prev[m] == l->header && m > 0)
-+			m--;
-+		l->level = m;
-+	}
-+	l->entries--;
-+}
-diff --git a/kernel/sysctl.c b/kernel/sysctl.c
-index 8a176d8727a3..808473f947ee 100644
---- a/kernel/sysctl.c
-+++ b/kernel/sysctl.c
-@@ -130,9 +130,19 @@ static int __maybe_unused four = 4;
- static unsigned long zero_ul;
- static unsigned long one_ul = 1;
- static unsigned long long_max = LONG_MAX;
--static int one_hundred = 100;
--static int one_thousand = 1000;
--#ifdef CONFIG_PRINTK
-+static int __read_mostly one_hundred = 100;
-+static int __read_mostly one_thousand = 1000;
-+static int zero = 0;
-+static int one = 1;
-+#ifdef CONFIG_SCHED_MUQSS
-+extern int rr_interval;
-+extern int sched_interactive;
-+extern int sched_iso_cpu;
-+extern int sched_yield_type;
-+#endif
-+extern int hrtimer_granularity_us;
-+extern int hrtimeout_min_us;
-+#if defined(CONFIG_PRINTK) || defined(CONFIG_SCHED_MUQSS)
- static int ten_thousand = 10000;
- #endif
- #ifdef CONFIG_PERF_EVENTS
-@@ -288,7 +298,7 @@ static struct ctl_table sysctl_base_table[] = {
- 	{ }
- };
- 
--#ifdef CONFIG_SCHED_DEBUG
-+#if defined(CONFIG_SCHED_DEBUG) && !defined(CONFIG_SCHED_MUQSS)
- static int min_sched_granularity_ns = 100000;		/* 100 usecs */
- static int max_sched_granularity_ns = NSEC_PER_SEC;	/* 1 second */
- static int min_wakeup_granularity_ns;			/* 0 usecs */
-@@ -305,6 +315,7 @@ static int max_extfrag_threshold = 1000;
- #endif
- 
- static struct ctl_table kern_table[] = {
-+#ifndef CONFIG_SCHED_MUQSS
- 	{
- 		.procname	= "sched_child_runs_first",
- 		.data		= &sysctl_sched_child_runs_first,
-@@ -486,6 +497,7 @@ static struct ctl_table kern_table[] = {
- 		.extra2		= SYSCTL_ONE,
- 	},
- #endif
-+#endif /* !CONFIG_SCHED_MUQSS */
- #ifdef CONFIG_PROVE_LOCKING
- 	{
- 		.procname	= "prove_locking",
-@@ -1049,6 +1061,62 @@ static struct ctl_table kern_table[] = {
- 		.proc_handler	= proc_dointvec,
- 	},
- #endif
-+#ifdef CONFIG_SCHED_MUQSS
-+	{
-+		.procname	= "rr_interval",
-+		.data		= &rr_interval,
-+		.maxlen		= sizeof (int),
-+		.mode		= 0644,
-+		.proc_handler	= &proc_dointvec_minmax,
-+		.extra1		= &one,
-+		.extra2		= &one_thousand,
-+	},
-+	{
-+		.procname	= "interactive",
-+		.data		= &sched_interactive,
-+		.maxlen		= sizeof(int),
-+		.mode		= 0644,
-+		.proc_handler	= &proc_dointvec_minmax,
-+		.extra1		= &zero,
-+		.extra2		= &one,
-+	},
-+	{
-+		.procname	= "iso_cpu",
-+		.data		= &sched_iso_cpu,
-+		.maxlen		= sizeof (int),
-+		.mode		= 0644,
-+		.proc_handler	= &proc_dointvec_minmax,
-+		.extra1		= &zero,
-+		.extra2		= &one_hundred,
-+	},
-+	{
-+		.procname	= "yield_type",
-+		.data		= &sched_yield_type,
-+		.maxlen		= sizeof (int),
-+		.mode		= 0644,
-+		.proc_handler	= &proc_dointvec_minmax,
-+		.extra1		= &zero,
-+		.extra2		= &two,
-+	},
-+#endif
-+	{
-+		.procname	= "hrtimer_granularity_us",
-+		.data		= &hrtimer_granularity_us,
-+		.maxlen		= sizeof(int),
-+		.mode		= 0644,
-+		.proc_handler	= &proc_dointvec_minmax,
-+		.extra1		= &one,
-+		.extra2		= &ten_thousand,
-+	},
-+	{
-+		.procname	= "hrtimeout_min_us",
-+		.data		= &hrtimeout_min_us,
-+		.maxlen		= sizeof(int),
-+		.mode		= 0644,
-+		.proc_handler	= &proc_dointvec_minmax,
-+		.extra1		= &one,
-+		.extra2		= &ten_thousand,
-+	},
- #if defined(CONFIG_S390) && defined(CONFIG_SMP)
- 	{
- 		.procname	= "spin_retry",
-diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
-index fcc42353f125..2960cace6719 100644
---- a/kernel/time/Kconfig
-+++ b/kernel/time/Kconfig
-@@ -66,6 +66,9 @@ config NO_HZ_COMMON
- 	depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS
- 	select TICK_ONESHOT
- 
-+config NO_HZ_FULL
-+	bool
-+
- choice
- 	prompt "Timer tick handling"
- 	default NO_HZ_IDLE if NO_HZ
-@@ -87,8 +90,9 @@ config NO_HZ_IDLE
- 
- 	  Most of the time you want to say Y here.
- 
--config NO_HZ_FULL
-+config NO_HZ_FULL_NODEF
- 	bool "Full dynticks system (tickless)"
-+	select NO_HZ_FULL
- 	# NO_HZ_COMMON dependency
- 	depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS
- 	# We need at least one periodic CPU for timekeeping
-@@ -114,6 +118,8 @@ config NO_HZ_FULL
- 	 transitions: syscalls, exceptions and interrupts. Even when it's
- 	 dynamically off.
- 
-+	 Not recommended for desktops,laptops, or mobile devices.
-+
- 	 Say N.
- 
- endchoice
-@@ -123,7 +129,7 @@ config CONTEXT_TRACKING
- 
- config CONTEXT_TRACKING_FORCE
- 	bool "Force context tracking"
--	depends on CONTEXT_TRACKING
-+	depends on CONTEXT_TRACKING && !SCHED_MUQSS
- 	default y if !NO_HZ_FULL
- 	help
- 	  The major pre-requirement for full dynticks to work is to
-diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
-index f5490222e134..544c58c29267 100644
---- a/kernel/time/clockevents.c
-+++ b/kernel/time/clockevents.c
-@@ -190,8 +190,9 @@ int clockevents_tick_resume(struct clock_event_device *dev)
- 
- #ifdef CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST
- 
--/* Limit min_delta to a jiffie */
--#define MIN_DELTA_LIMIT		(NSEC_PER_SEC / HZ)
-+int __read_mostly hrtimer_granularity_us = 100;
-+/* Limit min_delta to 100us */
-+#define MIN_DELTA_LIMIT		(hrtimer_granularity_us * NSEC_PER_USEC)
- 
- /**
-  * clockevents_increase_min_delta - raise minimum delta of a clock event device
-diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
-index d89da1c7e005..e4f5b4c483a0 100644
---- a/kernel/time/hrtimer.c
-+++ b/kernel/time/hrtimer.c
-@@ -2216,3 +2216,113 @@ int __sched schedule_hrtimeout(ktime_t *expires,
- 	return schedule_hrtimeout_range(expires, 0, mode);
- }
- EXPORT_SYMBOL_GPL(schedule_hrtimeout);
-+
-+/*
-+ * As per schedule_hrtimeout but taskes a millisecond value and returns how
-+ * many milliseconds are left.
-+ */
-+long __sched schedule_msec_hrtimeout(long timeout)
-+{
-+	struct hrtimer_sleeper t;
-+	int delta, jiffs;
-+	ktime_t expires;
-+
-+	if (!timeout) {
-+		__set_current_state(TASK_RUNNING);
-+		return 0;
-+	}
-+
-+	jiffs = msecs_to_jiffies(timeout);
-+	/*
-+	 * If regular timer resolution is adequate or hrtimer resolution is not
-+	 * (yet) better than Hz, as would occur during startup, use regular
-+	 * timers.
-+	 */
-+	if (jiffs > 4 || hrtimer_resolution >= NSEC_PER_SEC / HZ || pm_freezing)
-+		return schedule_timeout(jiffs);
-+
-+	delta = (timeout % 1000) * NSEC_PER_MSEC;
-+	expires = ktime_set(0, delta);
-+
-+	hrtimer_init_sleeper_on_stack(&t, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-+	hrtimer_set_expires_range_ns(&t.timer, expires, delta);
-+
-+	hrtimer_sleeper_start_expires(&t, HRTIMER_MODE_REL);
-+
-+	if (likely(t.task))
-+		schedule();
-+
-+	hrtimer_cancel(&t.timer);
-+	destroy_hrtimer_on_stack(&t.timer);
-+
-+	__set_current_state(TASK_RUNNING);
-+
-+	expires = hrtimer_expires_remaining(&t.timer);
-+	timeout = ktime_to_ms(expires);
-+	return timeout < 0 ? 0 : timeout;
-+}
-+
-+EXPORT_SYMBOL(schedule_msec_hrtimeout);
-+
-+#define USECS_PER_SEC 1000000
-+extern int hrtimer_granularity_us;
-+
-+static inline long schedule_usec_hrtimeout(long timeout)
-+{
-+	struct hrtimer_sleeper t;
-+	ktime_t expires;
-+	int delta;
-+
-+	if (!timeout) {
-+		__set_current_state(TASK_RUNNING);
-+		return 0;
-+	}
-+
-+	if (hrtimer_resolution >= NSEC_PER_SEC / HZ)
-+		return schedule_timeout(usecs_to_jiffies(timeout));
-+
-+	if (timeout < hrtimer_granularity_us)
-+		timeout = hrtimer_granularity_us;
-+	delta = (timeout % USECS_PER_SEC) * NSEC_PER_USEC;
-+	expires = ktime_set(0, delta);
-+
-+	hrtimer_init_sleeper_on_stack(&t, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-+	hrtimer_set_expires_range_ns(&t.timer, expires, delta);
-+
-+	hrtimer_sleeper_start_expires(&t, HRTIMER_MODE_REL);
-+
-+	if (likely(t.task))
-+		schedule();
-+
-+	hrtimer_cancel(&t.timer);
-+	destroy_hrtimer_on_stack(&t.timer);
-+
-+	__set_current_state(TASK_RUNNING);
-+
-+	expires = hrtimer_expires_remaining(&t.timer);
-+	timeout = ktime_to_us(expires);
-+	return timeout < 0 ? 0 : timeout;
-+}
-+
-+int __read_mostly hrtimeout_min_us = 500;
-+
-+long __sched schedule_min_hrtimeout(void)
-+{
-+	return usecs_to_jiffies(schedule_usec_hrtimeout(hrtimeout_min_us));
-+}
-+
-+EXPORT_SYMBOL(schedule_min_hrtimeout);
-+
-+long __sched schedule_msec_hrtimeout_interruptible(long timeout)
-+{
-+	__set_current_state(TASK_INTERRUPTIBLE);
-+	return schedule_msec_hrtimeout(timeout);
-+}
-+EXPORT_SYMBOL(schedule_msec_hrtimeout_interruptible);
-+
-+long __sched schedule_msec_hrtimeout_uninterruptible(long timeout)
-+{
-+	__set_current_state(TASK_UNINTERRUPTIBLE);
-+	return schedule_msec_hrtimeout(timeout);
-+}
-+EXPORT_SYMBOL(schedule_msec_hrtimeout_uninterruptible);
-diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
-index 2fd3b3fa68bf..1202d7fe5d8e 100644
---- a/kernel/time/posix-cpu-timers.c
-+++ b/kernel/time/posix-cpu-timers.c
-@@ -236,7 +236,7 @@ static void task_sample_cputime(struct task_struct *p, u64 *samples)
- 	u64 stime, utime;
- 
- 	task_cputime(p, &utime, &stime);
--	store_samples(samples, stime, utime, p->se.sum_exec_runtime);
-+	store_samples(samples, stime, utime, tsk_seruntime(p));
- }
- 
- static void proc_sample_cputime_atomic(struct task_cputime_atomic *at,
-@@ -855,7 +855,7 @@ static void check_thread_timers(struct task_struct *tsk,
- 	soft = task_rlimit(tsk, RLIMIT_RTTIME);
- 	if (soft != RLIM_INFINITY) {
- 		/* Task RT timeout is accounted in jiffies. RTTIME is usec */
--		unsigned long rttime = tsk->rt.timeout * (USEC_PER_SEC / HZ);
-+		unsigned long rttime = tsk_rttimeout(tsk) * (USEC_PER_SEC / HZ);
- 		unsigned long hard = task_rlimit_max(tsk, RLIMIT_RTTIME);
- 
- 		/* At the hard limit, send SIGKILL. No further action. */
-diff --git a/kernel/time/timer.c b/kernel/time/timer.c
-index a5221abb4594..9a9287cb2a37 100644
---- a/kernel/time/timer.c
-+++ b/kernel/time/timer.c
-@@ -43,6 +43,7 @@
- #include <linux/slab.h>
- #include <linux/compat.h>
- #include <linux/random.h>
-+#include <linux/freezer.h>
- 
- #include <linux/uaccess.h>
- #include <asm/unistd.h>
-@@ -1568,7 +1569,7 @@ static unsigned long __next_timer_interrupt(struct timer_base *base)
-  * Check, if the next hrtimer event is before the next timer wheel
-  * event:
-  */
--static u64 cmp_next_hrtimer_event(u64 basem, u64 expires)
-+static u64 cmp_next_hrtimer_event(struct timer_base *base, u64 basem, u64 expires)
- {
- 	u64 nextevt = hrtimer_get_next_event();
- 
-@@ -1586,6 +1587,9 @@ static u64 cmp_next_hrtimer_event(u64 basem, u64 expires)
- 	if (nextevt <= basem)
- 		return basem;
- 
-+	if (nextevt < expires && nextevt - basem <= TICK_NSEC)
-+		base->is_idle = false;
-+
- 	/*
- 	 * Round up to the next jiffie. High resolution timers are
- 	 * off, so the hrtimers are expired in the tick and we need to
-@@ -1655,7 +1659,7 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
- 	}
- 	raw_spin_unlock(&base->lock);
- 
--	return cmp_next_hrtimer_event(basem, expires);
-+	return cmp_next_hrtimer_event(base, basem, expires);
- }
- 
- /**
-@@ -1892,6 +1896,18 @@ signed long __sched schedule_timeout(signed long timeout)
- 
- 	expire = timeout + jiffies;
- 
-+#ifdef CONFIG_HIGH_RES_TIMERS
-+	if (timeout == 1 && hrtimer_resolution < NSEC_PER_SEC / HZ) {
-+		/*
-+		 * Special case 1 as being a request for the minimum timeout
-+		 * and use highres timers to timeout after 1ms to workaround
-+		 * the granularity of low Hz tick timers.
-+		 */
-+		if (!schedule_min_hrtimeout())
-+			return 0;
-+		goto out_timeout;
-+	}
-+#endif
- 	timer.task = current;
- 	timer_setup_on_stack(&timer.timer, process_timeout, 0);
- 	__mod_timer(&timer.timer, expire, MOD_TIMER_NOTPENDING);
-@@ -1900,10 +1916,10 @@ signed long __sched schedule_timeout(signed long timeout)
- 
- 	/* Remove the timer from the object tracker */
- 	destroy_timer_on_stack(&timer.timer);
--
-+out_timeout:
- 	timeout = expire - jiffies;
- 
-- out:
-+out:
- 	return timeout < 0 ? 0 : timeout;
- }
- EXPORT_SYMBOL(schedule_timeout);
-@@ -2045,7 +2061,19 @@ void __init init_timers(void)
-  */
- void msleep(unsigned int msecs)
- {
--	unsigned long timeout = msecs_to_jiffies(msecs) + 1;
-+	int jiffs = msecs_to_jiffies(msecs);
-+	unsigned long timeout;
-+
-+	/*
-+	 * Use high resolution timers where the resolution of tick based
-+	 * timers is inadequate.
-+	 */
-+	if (jiffs < 5 && hrtimer_resolution < NSEC_PER_SEC / HZ && !pm_freezing) {
-+		while (msecs)
-+			msecs = schedule_msec_hrtimeout_uninterruptible(msecs);
-+		return;
-+	}
-+	timeout = jiffs + 1;
- 
- 	while (timeout)
- 		timeout = schedule_timeout_uninterruptible(timeout);
-@@ -2059,7 +2087,15 @@ EXPORT_SYMBOL(msleep);
-  */
- unsigned long msleep_interruptible(unsigned int msecs)
- {
--	unsigned long timeout = msecs_to_jiffies(msecs) + 1;
-+	int jiffs = msecs_to_jiffies(msecs);
-+	unsigned long timeout;
-+
-+	if (jiffs < 5 && hrtimer_resolution < NSEC_PER_SEC / HZ && !pm_freezing) {
-+		while (msecs && !signal_pending(current))
-+			msecs = schedule_msec_hrtimeout_interruptible(msecs);
-+		return msecs;
-+	}
-+	timeout = jiffs + 1;
- 
- 	while (timeout && !signal_pending(current))
- 		timeout = schedule_timeout_interruptible(timeout);
-diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
-index b5e3496cf803..68930e7f4d28 100644
---- a/kernel/trace/trace_selftest.c
-+++ b/kernel/trace/trace_selftest.c
-@@ -1048,10 +1048,15 @@ static int trace_wakeup_test_thread(void *data)
- {
- 	/* Make this a -deadline thread */
- 	static const struct sched_attr attr = {
-+#ifdef CONFIG_SCHED_MUQSS
-+		/* No deadline on MuQSS, use RR */
-+		.sched_policy = SCHED_RR,
-+#else
- 		.sched_policy = SCHED_DEADLINE,
- 		.sched_runtime = 100000ULL,
- 		.sched_deadline = 10000000ULL,
- 		.sched_period = 10000000ULL
-+#endif
- 	};
- 	struct wakeup_test_data *x = data;
- 
-diff --git a/mm/vmscan.c b/mm/vmscan.c
-index a37c87b5aee2..7b1d19e17af9 100644
---- a/mm/vmscan.c
-+++ b/mm/vmscan.c
-@@ -163,7 +163,7 @@ struct scan_control {
- /*
-  * From 0 .. 100.  Higher means more swappy.
-  */
--int vm_swappiness = 60;
-+int vm_swappiness = 33;
- /*
-  * The total number of pages which are beyond the high watermark within all
-  * zones.
-diff --git a/net/core/pktgen.c b/net/core/pktgen.c
-index 08e2811b5274..955fcfdd3c3c 100644
---- a/net/core/pktgen.c
-+++ b/net/core/pktgen.c
-@@ -1894,7 +1894,7 @@ static void pktgen_mark_device(const struct pktgen_net *pn, const char *ifname)
- 		mutex_unlock(&pktgen_thread_lock);
- 		pr_debug("%s: waiting for %s to disappear....\n",
- 			 __func__, ifname);
--		schedule_timeout_interruptible(msecs_to_jiffies(msec_per_try));
-+		schedule_msec_hrtimeout_interruptible((msec_per_try));
- 		mutex_lock(&pktgen_thread_lock);
- 
- 		if (++i >= max_tries) {
-diff --git a/sound/pci/maestro3.c b/sound/pci/maestro3.c
-index 40232a278b1a..d87fae1113aa 100644
---- a/sound/pci/maestro3.c
-+++ b/sound/pci/maestro3.c
-@@ -1995,7 +1995,7 @@ static void snd_m3_ac97_reset(struct snd_m3 *chip)
- 		outw(0, io + GPIO_DATA);
- 		outw(dir | GPO_PRIMARY_AC97, io + GPIO_DIRECTION);
- 
--		schedule_timeout_uninterruptible(msecs_to_jiffies(delay1));
-+		schedule_msec_hrtimeout_uninterruptible((delay1));
- 
- 		outw(GPO_PRIMARY_AC97, io + GPIO_DATA);
- 		udelay(5);
-@@ -2003,7 +2003,7 @@ static void snd_m3_ac97_reset(struct snd_m3 *chip)
- 		outw(IO_SRAM_ENABLE | SERIAL_AC_LINK_ENABLE, io + RING_BUS_CTRL_A);
- 		outw(~0, io + GPIO_MASK);
- 
--		schedule_timeout_uninterruptible(msecs_to_jiffies(delay2));
-+		schedule_msec_hrtimeout_uninterruptible((delay2));
- 
- 		if (! snd_m3_try_read_vendor(chip))
- 			break;
-diff --git a/sound/soc/codecs/rt5631.c b/sound/soc/codecs/rt5631.c
-index f70b9f7e68bb..77b65398ca07 100644
---- a/sound/soc/codecs/rt5631.c
-+++ b/sound/soc/codecs/rt5631.c
-@@ -415,7 +415,7 @@ static void onebit_depop_mute_stage(struct snd_soc_component *component, int ena
- 	hp_zc = snd_soc_component_read32(component, RT5631_INT_ST_IRQ_CTRL_2);
- 	snd_soc_component_write(component, RT5631_INT_ST_IRQ_CTRL_2, hp_zc & 0xf7ff);
- 	if (enable) {
--		schedule_timeout_uninterruptible(msecs_to_jiffies(10));
-+		schedule_msec_hrtimeout_uninterruptible((10));
- 		/* config one-bit depop parameter */
- 		rt5631_write_index(component, RT5631_SPK_INTL_CTRL, 0x307f);
- 		snd_soc_component_update_bits(component, RT5631_HP_OUT_VOL,
-@@ -525,7 +525,7 @@ static void depop_seq_mute_stage(struct snd_soc_component *component, int enable
- 	hp_zc = snd_soc_component_read32(component, RT5631_INT_ST_IRQ_CTRL_2);
- 	snd_soc_component_write(component, RT5631_INT_ST_IRQ_CTRL_2, hp_zc & 0xf7ff);
- 	if (enable) {
--		schedule_timeout_uninterruptible(msecs_to_jiffies(10));
-+		schedule_msec_hrtimeout_uninterruptible((10));
- 
- 		/* config depop sequence parameter */
- 		rt5631_write_index(component, RT5631_SPK_INTL_CTRL, 0x302f);
-diff --git a/sound/soc/codecs/wm8350.c b/sound/soc/codecs/wm8350.c
-index fe99584c917f..f1344d532a13 100644
---- a/sound/soc/codecs/wm8350.c
-+++ b/sound/soc/codecs/wm8350.c
-@@ -233,10 +233,10 @@ static void wm8350_pga_work(struct work_struct *work)
- 		    out2->ramp == WM8350_RAMP_UP) {
- 			/* delay is longer over 0dB as increases are larger */
- 			if (i >= WM8350_OUTn_0dB)
--				schedule_timeout_interruptible(msecs_to_jiffies
-+				schedule_msec_hrtimeout_interruptible(
- 							       (2));
- 			else
--				schedule_timeout_interruptible(msecs_to_jiffies
-+				schedule_msec_hrtimeout_interruptible(
- 							       (1));
- 		} else
- 			udelay(50);	/* doesn't matter if we delay longer */
-@@ -1120,7 +1120,7 @@ static int wm8350_set_bias_level(struct snd_soc_component *component,
- 					 (platform->dis_out4 << 6));
- 
- 			/* wait for discharge */
--			schedule_timeout_interruptible(msecs_to_jiffies
-+			schedule_msec_hrtimeout_interruptible(
- 						       (platform->
- 							cap_discharge_msecs));
- 
-@@ -1136,7 +1136,7 @@ static int wm8350_set_bias_level(struct snd_soc_component *component,
- 					 WM8350_VBUFEN);
- 
- 			/* wait for vmid */
--			schedule_timeout_interruptible(msecs_to_jiffies
-+			schedule_msec_hrtimeout_interruptible(
- 						       (platform->
- 							vmid_charge_msecs));
- 
-@@ -1187,7 +1187,7 @@ static int wm8350_set_bias_level(struct snd_soc_component *component,
- 		wm8350_reg_write(wm8350, WM8350_POWER_MGMT_1, pm1);
- 
- 		/* wait */
--		schedule_timeout_interruptible(msecs_to_jiffies
-+		schedule_msec_hrtimeout_interruptible(
- 					       (platform->
- 						vmid_discharge_msecs));
- 
-@@ -1205,7 +1205,7 @@ static int wm8350_set_bias_level(struct snd_soc_component *component,
- 				 pm1 | WM8350_OUTPUT_DRAIN_EN);
- 
- 		/* wait */
--		schedule_timeout_interruptible(msecs_to_jiffies
-+		schedule_msec_hrtimeout_interruptible(
- 					       (platform->drain_msecs));
- 
- 		pm1 &= ~WM8350_BIASEN;
-diff --git a/sound/soc/codecs/wm8900.c b/sound/soc/codecs/wm8900.c
-index 271235a69c01..3ec90e1b1eb4 100644
---- a/sound/soc/codecs/wm8900.c
-+++ b/sound/soc/codecs/wm8900.c
-@@ -1109,7 +1109,7 @@ static int wm8900_set_bias_level(struct snd_soc_component *component,
- 		/* Need to let things settle before stopping the clock
- 		 * to ensure that restart works, see "Stopping the
- 		 * master clock" in the datasheet. */
--		schedule_timeout_interruptible(msecs_to_jiffies(1));
-+		schedule_msec_hrtimeout_interruptible(1);
- 		snd_soc_component_write(component, WM8900_REG_POWER2,
- 			     WM8900_REG_POWER2_SYSCLK_ENA);
- 		break;
-diff --git a/sound/soc/codecs/wm9713.c b/sound/soc/codecs/wm9713.c
-index 6497c1ea6228..08fefeca9d82 100644
---- a/sound/soc/codecs/wm9713.c
-+++ b/sound/soc/codecs/wm9713.c
-@@ -199,7 +199,7 @@ static int wm9713_voice_shutdown(struct snd_soc_dapm_widget *w,
- 
- 	/* Gracefully shut down the voice interface. */
- 	snd_soc_component_update_bits(component, AC97_HANDSET_RATE, 0x0f00, 0x0200);
--	schedule_timeout_interruptible(msecs_to_jiffies(1));
-+	schedule_msec_hrtimeout_interruptible(1);
- 	snd_soc_component_update_bits(component, AC97_HANDSET_RATE, 0x0f00, 0x0f00);
- 	snd_soc_component_update_bits(component, AC97_EXTENDED_MID, 0x1000, 0x1000);
- 
-@@ -868,7 +868,7 @@ static int wm9713_set_pll(struct snd_soc_component *component,
- 	wm9713->pll_in = freq_in;
- 
- 	/* wait 10ms AC97 link frames for the link to stabilise */
--	schedule_timeout_interruptible(msecs_to_jiffies(10));
-+	schedule_msec_hrtimeout_interruptible((10));
- 	return 0;
- }
- 
-diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
-index e2632841b321..7a445c1a2167 100644
---- a/sound/soc/soc-dapm.c
-+++ b/sound/soc/soc-dapm.c
-@@ -154,7 +154,7 @@ static void dapm_assert_locked(struct snd_soc_dapm_context *dapm)
- static void pop_wait(u32 pop_time)
- {
- 	if (pop_time)
--		schedule_timeout_uninterruptible(msecs_to_jiffies(pop_time));
-+		schedule_msec_hrtimeout_uninterruptible((pop_time));
- }
- 
- __printf(3, 4)
-diff --git a/sound/usb/line6/pcm.c b/sound/usb/line6/pcm.c
-index fdbdfb7bce92..fa8e8faf3eb3 100644
---- a/sound/usb/line6/pcm.c
-+++ b/sound/usb/line6/pcm.c
-@@ -127,7 +127,7 @@ static void line6_wait_clear_audio_urbs(struct snd_line6_pcm *line6pcm,
- 		if (!alive)
- 			break;
- 		set_current_state(TASK_UNINTERRUPTIBLE);
--		schedule_timeout(1);
-+		schedule_min_hrtimeout();
- 	} while (--timeout > 0);
- 	if (alive)
- 		dev_err(line6pcm->line6->ifcdev,
-diff --git a/kernel/cpu.c b/kernel/cpu.c
-index 244d305443773..90b77028233b0 100644
---- a/kernel/cpu.c
-+++ b/kernel/cpu.c
-@@ -1565,7 +1565,11 @@ static struct cpuhp_step cpuhp_hp_states[] = {
- 	[CPUHP_BRINGUP_CPU] = {
- 		.name			= "cpu:bringup",
- 		.startup.single		= bringup_cpu,
-+#ifdef CONFIG_SCHED_MUQSS
-+		.teardown.single	= NULL,
-+#else
- 		.teardown.single	= finish_cpu,
-+#endif
- 		.cant_stop		= true,
- 	},
- 	/* Final state before CPU kills itself */
diff --git a/linux57-tkg/linux57-tkg-patches/0004-glitched-muqss.patch b/linux57-tkg/linux57-tkg-patches/0004-glitched-muqss.patch
deleted file mode 100644
index 2c4837e..0000000
--- a/linux57-tkg/linux57-tkg-patches/0004-glitched-muqss.patch
+++ /dev/null
@@ -1,78 +0,0 @@
-From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
-From: Tk-Glitch <ti3nou@gmail.com>
-Date: Wed, 4 Jul 2018 04:30:08 +0200
-Subject: glitched - MuQSS
-
-diff --git a/kernel/sched/MuQSS.c b/kernel/sched/MuQSS.c
-index 84a1d08d68551..57c3036a68952 100644
---- a/kernel/sched/MuQSS.c
-+++ b/kernel/sched/MuQSS.c
-@@ -163,7 +167,11 @@ int sched_interactive __read_mostly = 1;
-  * are allowed to run five seconds as real time tasks. This is the total over
-  * all online cpus.
-  */
-+#ifdef CONFIG_ZENIFY
-+int sched_iso_cpu __read_mostly = 25;
-+#else
- int sched_iso_cpu __read_mostly = 70;
-+#endif
- 
- /*
-  * sched_yield_type - Choose what sort of yield sched_yield will perform.
-
-diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
-index 2a202a846757..1d9c7ed79b11 100644
---- a/kernel/Kconfig.hz
-+++ b/kernel/Kconfig.hz
-@@ -5,7 +5,7 @@
- choice
- 	prompt "Timer frequency"
- 	default HZ_100 if SCHED_MUQSS
--	default HZ_250_NODEF if !SCHED_MUQSS
-+	default HZ_500_NODEF if !SCHED_MUQSS
- 	help
- 	 Allows the configuration of the timer frequency. It is customary
- 	 to have the timer interrupt run at 1000 Hz but 100 Hz may be more
-@@ -50,6 +50,20 @@ choice
- 	 on SMP and NUMA systems and exactly dividing by both PAL and
- 	 NTSC frame rates for video and multimedia work.
- 
-+	config HZ_500_NODEF
-+		bool "500 HZ"
-+	help
-+	 500 Hz is a good timer frequency for desktops. Provides fast
-+	 interactivity with great smoothness without sacrificing too
-+	 much throughput.
-+
-+	config HZ_750_NODEF
-+		bool "750 HZ"
-+	help
-+	 750 Hz is a good timer frequency for desktops. Provides fast
-+	 interactivity with great smoothness without sacrificing too
-+	 much throughput.
-+
- 	config HZ_1000_NODEF
- 		bool "1000 HZ"
- 	help
-@@ -63,6 +70,8 @@ config HZ
- 	default 100 if HZ_100
- 	default 250 if HZ_250_NODEF
- 	default 300 if HZ_300_NODEF
-+	default 500 if HZ_500_NODEF
-+	default 750 if HZ_750_NODEF
- 	default 1000 if HZ_1000_NODEF
- 
- config SCHED_HRTICK
-
-diff --git a/Makefile b/Makefile
-index d4d36c61940b..4a9dfe471f1f 100644
---- a/Makefile
-+++ b/Makefile
-@@ -15,7 +15,6 @@ NAME = Kleptomaniac Octopus
- 
- CKVERSION = -ck1
- CKNAME = MuQSS Powered
--EXTRAVERSION := $(EXTRAVERSION)$(CKVERSION)
- 
- # We are using a recursive build, so we need to do a little thinking
- # to get the ordering right.
diff --git a/linux57-tkg/linux57-tkg-patches/0004-glitched-ondemand-muqss.patch b/linux57-tkg/linux57-tkg-patches/0004-glitched-ondemand-muqss.patch
deleted file mode 100644
index 02933e4..0000000
--- a/linux57-tkg/linux57-tkg-patches/0004-glitched-ondemand-muqss.patch
+++ /dev/null
@@ -1,18 +0,0 @@
-diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
-index 6b423eebfd5d..61e3271675d6 100644
---- a/drivers/cpufreq/cpufreq_ondemand.c
-+++ b/drivers/cpufreq/cpufreq_ondemand.c
-@@ -21,10 +21,10 @@
- #include "cpufreq_ondemand.h"
- 
- /* On-demand governor macros */
--#define DEF_FREQUENCY_UP_THRESHOLD		(80)
--#define DEF_SAMPLING_DOWN_FACTOR		(1)
-+#define DEF_FREQUENCY_UP_THRESHOLD		(45)
-+#define DEF_SAMPLING_DOWN_FACTOR		(5)
- #define MAX_SAMPLING_DOWN_FACTOR		(100000)
--#define MICRO_FREQUENCY_UP_THRESHOLD		(95)
-+#define MICRO_FREQUENCY_UP_THRESHOLD		(45)
- #define MICRO_FREQUENCY_MIN_SAMPLE_RATE		(10000)
- #define MIN_FREQUENCY_UP_THRESHOLD		(1)
- #define MAX_FREQUENCY_UP_THRESHOLD		(100) 
diff --git a/linux57-tkg/linux57-tkg-patches/0005-glitched-ondemand-pds.patch b/linux57-tkg/linux57-tkg-patches/0005-glitched-ondemand-pds.patch
deleted file mode 100644
index c1929e8..0000000
--- a/linux57-tkg/linux57-tkg-patches/0005-glitched-ondemand-pds.patch
+++ /dev/null
@@ -1,18 +0,0 @@
-diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
-index 6b423eebfd5d..61e3271675d6 100644
---- a/drivers/cpufreq/cpufreq_ondemand.c
-+++ b/drivers/cpufreq/cpufreq_ondemand.c
-@@ -21,10 +21,10 @@
- #include "cpufreq_ondemand.h"
- 
- /* On-demand governor macros */
--#define DEF_FREQUENCY_UP_THRESHOLD		(63)
--#define DEF_SAMPLING_DOWN_FACTOR		(1)
-+#define DEF_FREQUENCY_UP_THRESHOLD		(55)
-+#define DEF_SAMPLING_DOWN_FACTOR		(5)
- #define MAX_SAMPLING_DOWN_FACTOR		(100000)
--#define MICRO_FREQUENCY_UP_THRESHOLD		(95)
-+#define MICRO_FREQUENCY_UP_THRESHOLD		(63)
- #define MICRO_FREQUENCY_MIN_SAMPLE_RATE		(10000)
- #define MIN_FREQUENCY_UP_THRESHOLD		(1)
- #define MAX_FREQUENCY_UP_THRESHOLD		(100) 
diff --git a/linux57-tkg/linux57-tkg-patches/0005-glitched-pds.patch b/linux57-tkg/linux57-tkg-patches/0005-glitched-pds.patch
deleted file mode 100644
index 23271f5..0000000
--- a/linux57-tkg/linux57-tkg-patches/0005-glitched-pds.patch
+++ /dev/null
@@ -1,166 +0,0 @@
-From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
-From: Tk-Glitch <ti3nou@gmail.com>
-Date: Wed, 4 Jul 2018 04:30:08 +0200
-Subject: glitched - PDS
-
-diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
-index 2a202a846757..1d9c7ed79b11 100644
---- a/kernel/Kconfig.hz
-+++ b/kernel/Kconfig.hz
-@@ -4,7 +4,7 @@
- 
- choice
- 	prompt "Timer frequency"
--	default HZ_250
-+	default HZ_500
- 	help
- 	 Allows the configuration of the timer frequency. It is customary
- 	 to have the timer interrupt run at 1000 Hz but 100 Hz may be more
-@@ -39,6 +39,13 @@ choice
- 	 on SMP and NUMA systems and exactly dividing by both PAL and
- 	 NTSC frame rates for video and multimedia work.
- 
-+	config HZ_500
-+		bool "500 HZ"
-+	help
-+	 500 Hz is a balanced timer frequency. Provides fast interactivity
-+	 on desktops with great smoothness without increasing CPU power
-+	 consumption and sacrificing the battery life on laptops.
-+
- 	config HZ_1000
- 		bool "1000 HZ"
- 	help
-@@ -52,6 +59,7 @@ config HZ
- 	default 100 if HZ_100
- 	default 250 if HZ_250
- 	default 300 if HZ_300
-+	default 500 if HZ_500
- 	default 1000 if HZ_1000
- 
- config SCHED_HRTICK
-
-diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
-index 2a202a846757..1d9c7ed79b11 100644
---- a/kernel/Kconfig.hz
-+++ b/kernel/Kconfig.hz
-@@ -4,7 +4,7 @@
- 
- choice
- 	prompt "Timer frequency"
--	default HZ_500
-+	default HZ_750
- 	help
- 	 Allows the configuration of the timer frequency. It is customary
- 	 to have the timer interrupt run at 1000 Hz but 100 Hz may be more
-@@ -46,6 +46,13 @@ choice
- 	 on desktops with great smoothness without increasing CPU power
- 	 consumption and sacrificing the battery life on laptops.
- 
-+	config HZ_750
-+		bool "750 HZ"
-+	help
-+	 750 Hz is a good timer frequency for desktops. Provides fast
-+	 interactivity with great smoothness without sacrificing too
-+	 much throughput.
-+
- 	config HZ_1000
- 		bool "1000 HZ"
- 	help
-@@ -60,6 +67,7 @@ config HZ
- 	default 250 if HZ_250
- 	default 300 if HZ_300
- 	default 500 if HZ_500
-+	default 750 if HZ_750
- 	default 1000 if HZ_1000
- 
- config SCHED_HRTICK
-
-diff --git a/mm/vmscan.c b/mm/vmscan.c
-index 9270a4370d54..30d01e647417 100644
---- a/mm/vmscan.c
-+++ b/mm/vmscan.c
-@@ -159,7 +159,7 @@ struct scan_control {
- /*
-  * From 0 .. 100.  Higher means more swappy.
-  */
--int vm_swappiness = 60;
-+int vm_swappiness = 20;
- /*
-  * The total number of pages which are beyond the high watermark within all
-  * zones.
-
-diff --git a/init/Kconfig b/init/Kconfig
-index 11fd9b502d06..e9bc34d3019b 100644
---- a/init/Kconfig
-+++ b/init/Kconfig
-@@ -715,6 +715,7 @@ menu "Scheduler features"
- config UCLAMP_TASK
- 	bool "Enable utilization clamping for RT/FAIR tasks"
- 	depends on CPU_FREQ_GOV_SCHEDUTIL
-+	depends on !SCHED_PDS
- 	help
- 	  This feature enables the scheduler to track the clamped utilization
- 	  of each CPU based on RUNNABLE tasks scheduled on that CPU.
-@@ -948,7 +948,6 @@ config CGROUP_DEVICE
- 
- config CGROUP_CPUACCT
- 	bool "Simple CPU accounting controller"
--	depends on !SCHED_PDS
- 	help
- 	  Provides a simple controller for monitoring the
- 	  total CPU consumed by the tasks in a cgroup.
-diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
-index b23231bae996..cab4e5c5b38e 100644
---- a/kernel/sched/Makefile
-+++ b/kernel/sched/Makefile
-@@ -24,13 +24,13 @@ obj-y += fair.o rt.o deadline.o
- obj-$(CONFIG_SMP) += cpudeadline.o topology.o stop_task.o
- obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
- obj-$(CONFIG_SCHED_DEBUG) += debug.o
--obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
- endif
- obj-y += loadavg.o clock.o cputime.o
- obj-y += idle.o
- obj-y += wait.o wait_bit.o swait.o completion.o
- obj-$(CONFIG_SMP) += cpupri.o pelt.o
- obj-$(CONFIG_SCHEDSTATS) += stats.o
-+obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
- obj-$(CONFIG_CPU_FREQ) += cpufreq.o
- obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o
- obj-$(CONFIG_MEMBARRIER) += membarrier.o
-
-diff --git a/kernel/sched/pds.c b/kernel/sched/pds.c
-index 9281ad164..f09a609cf 100644
---- a/kernel/sched/pds.c
-+++ b/kernel/sched/pds.c
-@@ -81,6 +81,18 @@ enum {
- 	NR_CPU_AFFINITY_CHK_LEVEL
- };
- 
-+/*
-+ * This allows printing both to /proc/sched_debug and
-+ * to the console
-+ */
-+#define SEQ_printf(m, x...)			\
-+ do {						\
-+	if (m)					\
-+		seq_printf(m, x);		\
-+	else					\
-+		pr_cont(x);			\
-+ } while (0)
-+
- static inline void print_scheduler_version(void)
- {
- 	printk(KERN_INFO "pds: PDS-mq CPU Scheduler 0.99o by Alfred Chen.\n");
-@@ -6353,7 +6365,10 @@ void ia64_set_curr_task(int cpu, struct task_struct *p)
- #ifdef CONFIG_SCHED_DEBUG
- void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
- 			  struct seq_file *m)
--{}
-+{
-+	SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, task_pid_nr_ns(p, ns),
-+						get_nr_threads(p));
-+}
- 
- void proc_sched_set_task(struct task_struct *p)
- {}
diff --git a/linux57-tkg/linux57-tkg-patches/0005-v5.7_undead-pds099o.patch b/linux57-tkg/linux57-tkg-patches/0005-v5.7_undead-pds099o.patch
deleted file mode 100644
index 59c8d8d..0000000
--- a/linux57-tkg/linux57-tkg-patches/0005-v5.7_undead-pds099o.patch
+++ /dev/null
@@ -1,8400 +0,0 @@
-From 68f1a9541ef3185b1021e8e54d2712c7039418d7 Mon Sep 17 00:00:00 2001
-From: Tk-Glitch <ti3nou@gmail.com>
-Date: Tue, 2 Jun 2020 18:55:09 +0200
-Subject: PDS 099o, 5.7 rebase (release/v2)
-
-
-diff --git a/Documentation/scheduler/sched-PDS-mq.txt b/Documentation/scheduler/sched-PDS-mq.txt
-new file mode 100644
-index 000000000000..709e86f6487e
---- /dev/null
-+++ b/Documentation/scheduler/sched-PDS-mq.txt
-@@ -0,0 +1,56 @@
-+        Priority and Deadline based Skiplist multiple queue Scheduler
-+        -------------------------------------------------------------
-+
-+CONTENT
-+========
-+
-+ 0. Development
-+ 1. Overview
-+   1.1 Design goal
-+   1.2 Design summary
-+ 2. Design Detail
-+   2.1 Skip list implementation
-+   2.2 Task preempt
-+   2.3 Task policy, priority and deadline
-+   2.4 Task selection
-+   2.5 Run queue balance
-+   2.6 Task migration
-+
-+
-+0. Development
-+==============
-+
-+Priority and Deadline based Skiplist multiple queue scheduler, referred to as
-+PDS from here on, is developed upon the enhancement patchset VRQ(Variable Run
-+Queue) for BFS(Brain Fuck Scheduler by Con Kolivas). PDS inherits the existing
-+design from VRQ and inspired by the introduction of skiplist data structure
-+to the scheduler by Con Kolivas. However, PDS is different from MuQSS(Multiple
-+Queue Skiplist Scheduler, the successor after BFS) in many ways.
-+
-+1. Overview
-+===========
-+
-+1.1 Design goal
-+---------------
-+
-+PDS is designed to make the cpu process scheduler code to be simple, but while
-+efficiency and scalable. Be Simple, the scheduler code will be easy to be read
-+and the behavious of scheduler will be easy to predict. Be efficiency, the
-+scheduler shall be well balance the thoughput performance and task interactivity
-+at the same time for different properties the tasks behave. Be scalable, the
-+performance of the scheduler should be in good shape with the glowing of
-+workload or with the growing of the cpu numbers.
-+
-+1.2 Design summary
-+------------------
-+
-+PDS is described as a multiple run queues cpu scheduler. Each cpu has its own
-+run queue. A heavry customized skiplist is used as the backend data structure
-+of the cpu run queue. Tasks in run queue is sorted by priority then virtual
-+deadline(simplfy to just deadline from here on). In PDS, balance action among
-+run queues are kept as less as possible to reduce the migration cost. Cpumask
-+data structure is widely used in cpu affinity checking and cpu preemption/
-+selection to make PDS scalable with increasing cpu number.
-+
-+
-+To be continued...
-diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
-index f18d5067cd0f..fe489fc01c73 100644
---- a/arch/powerpc/platforms/cell/spufs/sched.c
-+++ b/arch/powerpc/platforms/cell/spufs/sched.c
-@@ -51,11 +51,6 @@ static struct task_struct *spusched_task;
- static struct timer_list spusched_timer;
- static struct timer_list spuloadavg_timer;
- 
--/*
-- * Priority of a normal, non-rt, non-niced'd process (aka nice level 0).
-- */
--#define NORMAL_PRIO		120
--
- /*
-  * Frequency of the spu scheduler tick.  By default we do one SPU scheduler
-  * tick for every 10 CPU scheduler ticks.
-diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
-index 2d3f963fd6f1..5f41ead019b1 100644
---- a/arch/x86/Kconfig
-+++ b/arch/x86/Kconfig
-@@ -1006,6 +1006,22 @@ config NR_CPUS
- config SCHED_SMT
- 	def_bool y if SMP
- 
-+config SMT_NICE
-+	bool "SMT (Hyperthreading) aware nice priority and policy support"
-+	depends on SCHED_PDS && SCHED_SMT
-+	default y
-+	---help---
-+	  Enabling Hyperthreading on Intel CPUs decreases the effectiveness
-+	  of the use of 'nice' levels and different scheduling policies
-+	  (e.g. realtime) due to sharing of CPU power between hyperthreads.
-+	  SMT nice support makes each logical CPU aware of what is running on
-+	  its hyperthread siblings, maintaining appropriate distribution of
-+	  CPU according to nice levels and scheduling policies at the expense
-+	  of slightly increased overhead.
-+
-+	  If unsure say Y here.
-+
-+
- config SCHED_MC
- 	def_bool y
- 	prompt "Multi-core scheduler support"
-diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
-index 737ff3b9c2c0..b5bc5a1b6de7 100644
---- a/drivers/cpufreq/cpufreq_conservative.c
-+++ b/drivers/cpufreq/cpufreq_conservative.c
-@@ -28,8 +28,8 @@ struct cs_dbs_tuners {
- };
- 
- /* Conservative governor macros */
--#define DEF_FREQUENCY_UP_THRESHOLD		(80)
--#define DEF_FREQUENCY_DOWN_THRESHOLD		(20)
-+#define DEF_FREQUENCY_UP_THRESHOLD		(63)
-+#define DEF_FREQUENCY_DOWN_THRESHOLD		(26)
- #define DEF_FREQUENCY_STEP			(5)
- #define DEF_SAMPLING_DOWN_FACTOR		(1)
- #define MAX_SAMPLING_DOWN_FACTOR		(10)
-diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
-index 82a4d37ddecb..1130e0f5db72 100644
---- a/drivers/cpufreq/cpufreq_ondemand.c
-+++ b/drivers/cpufreq/cpufreq_ondemand.c
-@@ -18,7 +18,7 @@
- #include "cpufreq_ondemand.h"
- 
- /* On-demand governor macros */
--#define DEF_FREQUENCY_UP_THRESHOLD		(80)
-+#define DEF_FREQUENCY_UP_THRESHOLD		(63)
- #define DEF_SAMPLING_DOWN_FACTOR		(1)
- #define MAX_SAMPLING_DOWN_FACTOR		(100000)
- #define MICRO_FREQUENCY_UP_THRESHOLD		(95)
-@@ -127,7 +127,7 @@ static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq)
- }
- 
- /*
-- * Every sampling_rate, we check, if current idle time is less than 20%
-+ * Every sampling_rate, we check, if current idle time is less than 37%
-  * (default), then we try to increase frequency. Else, we adjust the frequency
-  * proportional to load.
-  */
-diff --git a/fs/proc/base.c b/fs/proc/base.c
-index eb2255e95f62..62b8cedbccb6 100644
---- a/fs/proc/base.c
-+++ b/fs/proc/base.c
-@@ -479,7 +479,7 @@ static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns,
- 		seq_puts(m, "0 0 0\n");
- 	else
- 		seq_printf(m, "%llu %llu %lu\n",
--		   (unsigned long long)task->se.sum_exec_runtime,
-+		   (unsigned long long)tsk_seruntime(task),
- 		   (unsigned long long)task->sched_info.run_delay,
- 		   task->sched_info.pcount);
- 
-diff --git a/include/linux/init_task.h b/include/linux/init_task.h
-index 2c620d7ac432..1a7987c40c80 100644
---- a/include/linux/init_task.h
-+++ b/include/linux/init_task.h
-@@ -36,7 +36,11 @@ extern struct cred init_cred;
- #define INIT_PREV_CPUTIME(x)
- #endif
- 
-+#ifdef CONFIG_SCHED_PDS
-+#define INIT_TASK_COMM "PDS"
-+#else
- #define INIT_TASK_COMM "swapper"
-+#endif /* !CONFIG_SCHED_PDS */
- 
- /* Attach to the init_task data structure for proper alignment */
- #ifdef CONFIG_ARCH_TASK_STRUCT_ON_STACK
-diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
-index fed6ba96c527..f03a5ee419a1 100644
---- a/include/linux/jiffies.h
-+++ b/include/linux/jiffies.h
-@@ -169,7 +169,7 @@ static inline u64 get_jiffies_64(void)
-  * Have the 32 bit jiffies value wrap 5 minutes after boot
-  * so jiffies wrap bugs show up earlier.
-  */
--#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-300*HZ))
-+#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-10*HZ))
- 
- /*
-  * Change timeval to jiffies, trying to avoid the
-diff --git a/include/linux/sched.h b/include/linux/sched.h
-index 4418f5cb8324..2b51afac5b06 100644
---- a/include/linux/sched.h
-+++ b/include/linux/sched.h
-@@ -31,6 +31,7 @@
- #include <linux/task_io_accounting.h>
- #include <linux/posix-timers.h>
- #include <linux/rseq.h>
-+#include <linux/skip_list.h>
- 
- /* task_struct member predeclarations (sorted alphabetically): */
- struct audit_context;
-@@ -652,9 +653,13 @@ struct task_struct {
- 	unsigned int			flags;
- 	unsigned int			ptrace;
- 
--#ifdef CONFIG_SMP
-+#if defined(CONFIG_SMP) && !defined(CONFIG_SCHED_PDS)
- 	struct llist_node		wake_entry;
-+#endif
-+#if defined(CONFIG_SMP) || defined(CONFIG_SCHED_PDS)
- 	int				on_cpu;
-+#endif
-+#ifdef CONFIG_SMP
- #ifdef CONFIG_THREAD_INFO_IN_TASK
- 	/* Current CPU: */
- 	unsigned int			cpu;
-@@ -663,6 +668,7 @@ struct task_struct {
- 	unsigned long			wakee_flip_decay_ts;
- 	struct task_struct		*last_wakee;
- 
-+#ifndef CONFIG_SCHED_PDS
- 	/*
- 	 * recent_used_cpu is initially set as the last CPU used by a task
- 	 * that wakes affine another task. Waker/wakee relationships can
-@@ -671,6 +677,7 @@ struct task_struct {
- 	 * used CPU that may be idle.
- 	 */
- 	int				recent_used_cpu;
-+#endif /* CONFIG_SCHED_PDS */
- 	int				wake_cpu;
- #endif
- 	int				on_rq;
-@@ -680,13 +687,27 @@ struct task_struct {
- 	int				normal_prio;
- 	unsigned int			rt_priority;
- 
-+#ifdef CONFIG_SCHED_PDS
-+	int				time_slice;
-+	u64				deadline;
-+	/* skip list level */
-+	int				sl_level;
-+	/* skip list node */
-+	struct skiplist_node		sl_node;
-+	/* 8bits prio and 56bits deadline for quick processing */
-+	u64				priodl;
-+	u64				last_ran;
-+	/* sched_clock time spent running */
-+	u64				sched_time;
-+#else /* CONFIG_SCHED_PDS */
- 	const struct sched_class	*sched_class;
- 	struct sched_entity		se;
- 	struct sched_rt_entity		rt;
-+	struct sched_dl_entity		dl;
-+#endif
- #ifdef CONFIG_CGROUP_SCHED
- 	struct task_group		*sched_task_group;
- #endif
--	struct sched_dl_entity		dl;
- 
- #ifdef CONFIG_UCLAMP_TASK
- 	/* Clamp values requested for a scheduling entity */
-@@ -1306,6 +1327,29 @@ struct task_struct {
- 	 */
- };
- 
-+#ifdef CONFIG_SCHED_PDS
-+void cpu_scaling(int cpu);
-+void cpu_nonscaling(int cpu);
-+#define tsk_seruntime(t)		((t)->sched_time)
-+/* replace the uncertian rt_timeout with 0UL */
-+#define tsk_rttimeout(t)		(0UL)
-+
-+#define task_running_idle(p)	((p)->prio == IDLE_PRIO)
-+#else /* CFS */
-+extern int runqueue_is_locked(int cpu);
-+static inline void cpu_scaling(int cpu)
-+{
-+}
-+
-+static inline void cpu_nonscaling(int cpu)
-+{
-+}
-+#define tsk_seruntime(t)	((t)->se.sum_exec_runtime)
-+#define tsk_rttimeout(t)	((t)->rt.timeout)
-+
-+#define iso_task(p)		(false)
-+#endif /* CONFIG_SCHED_PDS */
-+
- static inline struct pid *task_pid(struct task_struct *task)
- {
- 	return task->thread_pid;
-diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h
-index 1aff00b65f3c..a5e5fc2c9170 100644
---- a/include/linux/sched/deadline.h
-+++ b/include/linux/sched/deadline.h
-@@ -1,5 +1,22 @@
- /* SPDX-License-Identifier: GPL-2.0 */
- 
-+#ifdef CONFIG_SCHED_PDS
-+
-+#define __tsk_deadline(p)	((p)->deadline)
-+
-+static inline int dl_prio(int prio)
-+{
-+	return 1;
-+}
-+
-+static inline int dl_task(struct task_struct *p)
-+{
-+	return 1;
-+}
-+#else
-+
-+#define __tsk_deadline(p)	((p)->dl.deadline)
-+
- /*
-  * SCHED_DEADLINE tasks has negative priorities, reflecting
-  * the fact that any of them has higher prio than RT and
-@@ -19,6 +36,7 @@ static inline int dl_task(struct task_struct *p)
- {
- 	return dl_prio(p->prio);
- }
-+#endif /* CONFIG_SCHED_PDS */
- 
- static inline bool dl_time_before(u64 a, u64 b)
- {
-diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h
-index 7d64feafc408..fba04bb91492 100644
---- a/include/linux/sched/prio.h
-+++ b/include/linux/sched/prio.h
-@@ -20,7 +20,18 @@
-  */
- 
- #define MAX_USER_RT_PRIO	100
-+
-+#ifdef CONFIG_SCHED_PDS
-+#define ISO_PRIO		(MAX_USER_RT_PRIO)
-+
-+#define MAX_RT_PRIO		((MAX_USER_RT_PRIO) + 1)
-+
-+#define NORMAL_PRIO		(MAX_RT_PRIO)
-+#define IDLE_PRIO		((MAX_RT_PRIO) + 1)
-+#define PRIO_LIMIT		((IDLE_PRIO) + 1)
-+#else /* !CONFIG_SCHED_PDS */
- #define MAX_RT_PRIO		MAX_USER_RT_PRIO
-+#endif /* CONFIG_SCHED_PDS */
- 
- #define MAX_PRIO		(MAX_RT_PRIO + NICE_WIDTH)
- #define DEFAULT_PRIO		(MAX_RT_PRIO + NICE_WIDTH / 2)
-diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
-index e5af028c08b4..a96012e6f15e 100644
---- a/include/linux/sched/rt.h
-+++ b/include/linux/sched/rt.h
-@@ -24,8 +24,10 @@ static inline bool task_is_realtime(struct task_struct *tsk)
- 
- 	if (policy == SCHED_FIFO || policy == SCHED_RR)
- 		return true;
-+#ifndef CONFIG_SCHED_PDS
- 	if (policy == SCHED_DEADLINE)
- 		return true;
-+#endif
- 	return false;
- }
- 
-diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
-index 38359071236a..90328ccd527f 100644
---- a/include/linux/sched/task.h
-+++ b/include/linux/sched/task.h
-@@ -106,7 +106,7 @@ extern long kernel_wait4(pid_t, int __user *, int, struct rusage *);
- extern void free_task(struct task_struct *tsk);
- 
- /* sched_exec is called by processes performing an exec */
--#ifdef CONFIG_SMP
-+#if defined(CONFIG_SMP) && !defined(CONFIG_SCHED_PDS)
- extern void sched_exec(void);
- #else
- #define sched_exec()   {}
-diff --git a/include/linux/skip_list.h b/include/linux/skip_list.h
-new file mode 100644
-index 000000000000..713fedd8034f
---- /dev/null
-+++ b/include/linux/skip_list.h
-@@ -0,0 +1,177 @@
-+/*
-+  Copyright (C) 2016 Alfred Chen.
-+
-+  Code based on Con Kolivas's skip list implementation for BFS, and
-+  which is based on example originally by William Pugh.
-+
-+Skip Lists are a probabilistic alternative to balanced trees, as
-+described in the June 1990 issue of CACM and were invented by
-+William Pugh in 1987.
-+
-+A couple of comments about this implementation:
-+
-+This file only provides a infrastructure of skip list.
-+
-+skiplist_node is embedded into container data structure, to get rid the
-+dependency of kmalloc/kfree operation in scheduler code.
-+
-+A customized search function should be defined using DEFINE_SKIPLIST_INSERT
-+macro and be used for skip list insert operation.
-+
-+Random Level is also not defined in this file, instead, it should be customized
-+implemented and set to node->level then pass to the customized skiplist_insert
-+function.
-+
-+Levels start at zero and go up to (NUM_SKIPLIST_LEVEL -1)
-+
-+NUM_SKIPLIST_LEVEL in this implementation is 8 instead of origin 16,
-+considering that there will be 256 entries to enable the top level when using
-+random level p=0.5, and that number is more than enough for a run queue usage
-+in a scheduler usage. And it also help to reduce the memory usage of the
-+embedded skip list node in task_struct to about 50%.
-+
-+The insertion routine has been implemented so as to use the
-+dirty hack described in the CACM paper: if a random level is
-+generated that is more than the current maximum level, the
-+current maximum level plus one is used instead.
-+
-+BFS Notes: In this implementation of skiplists, there are bidirectional
-+next/prev pointers and the insert function returns a pointer to the actual
-+node the value is stored. The key here is chosen by the scheduler so as to
-+sort tasks according to the priority list requirements and is no longer used
-+by the scheduler after insertion. The scheduler lookup, however, occurs in
-+O(1) time because it is always the first item in the level 0 linked list.
-+Since the task struct stores a copy of the node pointer upon skiplist_insert,
-+it can also remove it much faster than the original implementation with the
-+aid of prev<->next pointer manipulation and no searching.
-+*/
-+#ifndef _LINUX_SKIP_LIST_H
-+#define _LINUX_SKIP_LIST_H
-+
-+#include <linux/kernel.h>
-+
-+#define NUM_SKIPLIST_LEVEL (8)
-+
-+struct skiplist_node {
-+	int level;	/* Levels in this node */
-+	struct skiplist_node *next[NUM_SKIPLIST_LEVEL];
-+	struct skiplist_node *prev[NUM_SKIPLIST_LEVEL];
-+};
-+
-+#define SKIPLIST_NODE_INIT(name) { 0,\
-+				   {&name, &name, &name, &name,\
-+				    &name, &name, &name, &name},\
-+				   {&name, &name, &name, &name,\
-+				    &name, &name, &name, &name},\
-+				 }
-+
-+static inline void INIT_SKIPLIST_NODE(struct skiplist_node *node)
-+{
-+	/* only level 0 ->next matters in skiplist_empty()*/
-+	WRITE_ONCE(node->next[0], node);
-+}
-+
-+/**
-+ * FULL_INIT_SKIPLIST_NODE -- fully init a skiplist_node, expecially for header
-+ * @node: the skip list node to be inited.
-+ */
-+static inline void FULL_INIT_SKIPLIST_NODE(struct skiplist_node *node)
-+{
-+	int i;
-+
-+	node->level = 0;
-+	for (i = 0; i < NUM_SKIPLIST_LEVEL; i++) {
-+		WRITE_ONCE(node->next[i], node);
-+		node->prev[i] = node;
-+	}
-+}
-+
-+/**
-+ * skiplist_empty - test whether a skip list is empty
-+ * @head: the skip list to test.
-+ */
-+static inline int skiplist_empty(const struct skiplist_node *head)
-+{
-+	return READ_ONCE(head->next[0]) == head;
-+}
-+
-+/**
-+ * skiplist_entry - get the struct for this entry
-+ * @ptr: the &struct skiplist_node pointer.
-+ * @type:       the type of the struct this is embedded in.
-+ * @member:     the name of the skiplist_node within the struct.
-+ */
-+#define skiplist_entry(ptr, type, member) \
-+	container_of(ptr, type, member)
-+
-+/**
-+ * DEFINE_SKIPLIST_INSERT_FUNC -- macro to define a customized skip list insert
-+ * function, which takes two parameters, first one is the header node of the
-+ * skip list, second one is the skip list node to be inserted
-+ * @func_name: the customized skip list insert function name
-+ * @search_func: the search function to be used, which takes two parameters,
-+ * 1st one is the itrator of skiplist_node in the list, the 2nd is the skip list
-+ * node to be inserted, the function should return true if search should be
-+ * continued, otherwise return false.
-+ * Returns 1 if @node is inserted as the first item of skip list at level zero,
-+ * otherwise 0
-+ */
-+#define DEFINE_SKIPLIST_INSERT_FUNC(func_name, search_func)\
-+static inline int func_name(struct skiplist_node *head, struct skiplist_node *node)\
-+{\
-+	struct skiplist_node *update[NUM_SKIPLIST_LEVEL];\
-+	struct skiplist_node *p, *q;\
-+	int k = head->level;\
-+\
-+	p = head;\
-+	do {\
-+		while (q = p->next[k], q != head && search_func(q, node))\
-+			p = q;\
-+		update[k] = p;\
-+	} while (--k >= 0);\
-+\
-+	k = node->level;\
-+	if (unlikely(k > head->level)) {\
-+		node->level = k = ++head->level;\
-+		update[k] = head;\
-+	}\
-+\
-+	do {\
-+		p = update[k];\
-+		q = p->next[k];\
-+		node->next[k] = q;\
-+		p->next[k] = node;\
-+		node->prev[k] = p;\
-+		q->prev[k] = node;\
-+	} while (--k >= 0);\
-+\
-+	return (p == head);\
-+}
-+
-+/**
-+ * skiplist_del_init -- delete skip list node from a skip list and reset it's
-+ * init state
-+ * @head: the header node of the skip list to be deleted from.
-+ * @node: the skip list node to be deleted, the caller need to ensure @node is
-+ * in skip list which @head represent.
-+ * Returns 1 if @node is the first item of skip level at level zero, otherwise 0
-+ */
-+static inline int
-+skiplist_del_init(struct skiplist_node *head, struct skiplist_node *node)
-+{
-+	int l, m = node->level;
-+
-+	for (l = 0; l <= m; l++) {
-+		node->prev[l]->next[l] = node->next[l];
-+		node->next[l]->prev[l] = node->prev[l];
-+	}
-+	if (m == head->level && m > 0) {
-+		while (head->next[m] == head && m > 0)
-+			m--;
-+		head->level = m;
-+	}
-+	INIT_SKIPLIST_NODE(node);
-+
-+	return (node->prev[0] == head);
-+}
-+#endif /* _LINUX_SKIP_LIST_H */
-diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
-index 3bac0a8ceab2..d6d384ddb57d 100644
---- a/include/uapi/linux/sched.h
-+++ b/include/uapi/linux/sched.h
-@@ -115,7 +115,10 @@ struct clone_args {
- #define SCHED_FIFO		1
- #define SCHED_RR		2
- #define SCHED_BATCH		3
--/* SCHED_ISO: reserved but not implemented yet */
-+/* SCHED_ISO: Implemented in BFS/MuQSSPDS only */
-+
-+#define SCHED_ISO		4
-+
- #define SCHED_IDLE		5
- #define SCHED_DEADLINE		6
- 
-diff --git a/init/Kconfig b/init/Kconfig
-index 74a5ac65644f..e4fd406b58dd 100644
---- a/init/Kconfig
-+++ b/init/Kconfig
-@@ -61,6 +61,21 @@ config THREAD_INFO_IN_TASK
- 
- menu "General setup"
- 
-+config SCHED_PDS
-+	bool "PDS-mq cpu scheduler"
-+	help
-+	  The Priority and Deadline based Skip list multiple queue CPU
-+	  Scheduler for excellent interactivity and responsiveness on the
-+	  desktop and solid scalability on normal hardware and commodity
-+	  servers.
-+
-+	  Currently incompatible with the Group CPU scheduler, and RCU TORTURE
-+          TEST so these options are disabled.
-+
-+          Say Y here.
-+	default y
-+
-+
- config BROKEN
- 	bool
- 
-@@ -777,6 +792,7 @@ config NUMA_BALANCING
- 	depends on ARCH_SUPPORTS_NUMA_BALANCING
- 	depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY
- 	depends on SMP && NUMA && MIGRATION
-+	depends on !SCHED_PDS
- 	help
- 	  This option adds support for automatic NUMA aware memory/task placement.
- 	  The mechanism is quite primitive and is based on migrating memory when
-@@ -878,7 +894,7 @@ menuconfig CGROUP_SCHED
- 	  bandwidth allocation to such task groups. It uses cgroups to group
- 	  tasks.
- 
--if CGROUP_SCHED
-+if CGROUP_SCHED && !SCHED_PDS
- config FAIR_GROUP_SCHED
- 	bool "Group scheduling for SCHED_OTHER"
- 	depends on CGROUP_SCHED
-@@ -1007,6 +1023,7 @@ config CGROUP_DEVICE
- 
- config CGROUP_CPUACCT
- 	bool "Simple CPU accounting controller"
-+	depends on !SCHED_PDS
- 	help
- 	  Provides a simple controller for monitoring the
- 	  total CPU consumed by the tasks in a cgroup.
-@@ -1134,6 +1151,7 @@ config CHECKPOINT_RESTORE
- 
- config SCHED_AUTOGROUP
- 	bool "Automatic process group scheduling"
-+	depends on !SCHED_PDS
- 	select CGROUPS
- 	select CGROUP_SCHED
- 	select FAIR_GROUP_SCHED
-diff --git a/init/init_task.c b/init/init_task.c
-index bd403ed3e418..162d3deddd45 100644
---- a/init/init_task.c
-+++ b/init/init_task.c
-@@ -59,6 +59,126 @@ struct task_struct init_task
- 	__init_task_data
- #endif
- = {
-+#ifdef CONFIG_SCHED_PDS
-+#ifdef CONFIG_THREAD_INFO_IN_TASK
-+	.thread_info	= INIT_THREAD_INFO(init_task),
-+	.stack_refcount	= ATOMIC_INIT(1),
-+#endif
-+	.state		= 0,
-+	.stack		= init_stack,
-+	.usage		= ATOMIC_INIT(2),
-+	.flags		= PF_KTHREAD,
-+	.prio		= NORMAL_PRIO,
-+	.static_prio	= MAX_PRIO - 20,
-+	.normal_prio	= NORMAL_PRIO,
-+	.deadline	= 0, /* PDS only */
-+	.policy		= SCHED_NORMAL,
-+	.cpus_ptr	= &init_task.cpus_mask,
-+	.cpus_mask	= CPU_MASK_ALL,
-+	.nr_cpus_allowed= NR_CPUS,
-+	.mm		= NULL,
-+	.active_mm	= &init_mm,
-+	.restart_block	= {
-+		.fn = do_no_restart_syscall,
-+	},
-+	.sl_level	= 0, /* PDS only */
-+	.sl_node	= SKIPLIST_NODE_INIT(init_task.sl_node), /* PDS only */
-+	.time_slice	= HZ, /* PDS only */
-+	.tasks		= LIST_HEAD_INIT(init_task.tasks),
-+#ifdef CONFIG_SMP
-+	.pushable_tasks	= PLIST_NODE_INIT(init_task.pushable_tasks, MAX_PRIO),
-+#endif
-+#ifdef CONFIG_CGROUP_SCHED
-+	.sched_task_group = &root_task_group,
-+#endif
-+	.ptraced	= LIST_HEAD_INIT(init_task.ptraced),
-+	.ptrace_entry	= LIST_HEAD_INIT(init_task.ptrace_entry),
-+	.real_parent	= &init_task,
-+	.parent		= &init_task,
-+	.children	= LIST_HEAD_INIT(init_task.children),
-+	.sibling	= LIST_HEAD_INIT(init_task.sibling),
-+	.group_leader	= &init_task,
-+	RCU_POINTER_INITIALIZER(real_cred, &init_cred),
-+	RCU_POINTER_INITIALIZER(cred, &init_cred),
-+	.comm		= INIT_TASK_COMM,
-+	.thread		= INIT_THREAD,
-+	.fs		= &init_fs,
-+	.files		= &init_files,
-+	.signal		= &init_signals,
-+	.sighand	= &init_sighand,
-+	.nsproxy	= &init_nsproxy,
-+	.pending	= {
-+		.list = LIST_HEAD_INIT(init_task.pending.list),
-+		.signal = {{0}}
-+	},
-+	.blocked	= {{0}},
-+	.alloc_lock	= __SPIN_LOCK_UNLOCKED(init_task.alloc_lock),
-+	.journal_info	= NULL,
-+	INIT_CPU_TIMERS(init_task)
-+	.pi_lock	= __RAW_SPIN_LOCK_UNLOCKED(init_task.pi_lock),
-+	.timer_slack_ns = 50000, /* 50 usec default slack */
-+	.thread_pid	= &init_struct_pid,
-+	.thread_group	= LIST_HEAD_INIT(init_task.thread_group),
-+	.thread_node	= LIST_HEAD_INIT(init_signals.thread_head),
-+#ifdef CONFIG_AUDITSYSCALL
-+	.loginuid	= INVALID_UID,
-+	.sessionid	= AUDIT_SID_UNSET,
-+#endif
-+#ifdef CONFIG_PERF_EVENTS
-+	.perf_event_mutex = __MUTEX_INITIALIZER(init_task.perf_event_mutex),
-+	.perf_event_list = LIST_HEAD_INIT(init_task.perf_event_list),
-+#endif
-+#ifdef CONFIG_PREEMPT_RCU
-+	.rcu_read_lock_nesting = 0,
-+	.rcu_read_unlock_special.s = 0,
-+	.rcu_node_entry = LIST_HEAD_INIT(init_task.rcu_node_entry),
-+	.rcu_blocked_node = NULL,
-+#endif
-+#ifdef CONFIG_TASKS_RCU
-+	.rcu_tasks_holdout = false,
-+	.rcu_tasks_holdout_list = LIST_HEAD_INIT(init_task.rcu_tasks_holdout_list),
-+	.rcu_tasks_idle_cpu = -1,
-+#endif
-+#ifdef CONFIG_CPUSETS
-+	.mems_allowed_seq = SEQCNT_ZERO(init_task.mems_allowed_seq),
-+#endif
-+#ifdef CONFIG_RT_MUTEXES
-+	.pi_waiters	= RB_ROOT_CACHED,
-+	.pi_top_task	= NULL,
-+#endif
-+	INIT_PREV_CPUTIME(init_task)
-+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-+	.vtime.seqcount	= SEQCNT_ZERO(init_task.vtime_seqcount),
-+	.vtime.starttime = 0,
-+	.vtime.state	= VTIME_SYS,
-+#endif
-+#ifdef CONFIG_NUMA_BALANCING
-+	.numa_preferred_nid = -1,
-+	.numa_group	= NULL,
-+	.numa_faults	= NULL,
-+#endif
-+#ifdef CONFIG_KASAN
-+	.kasan_depth	= 1,
-+#endif
-+#ifdef CONFIG_TRACE_IRQFLAGS
-+	.softirqs_enabled = 1,
-+#endif
-+#ifdef CONFIG_LOCKDEP
-+	.lockdep_recursion = 0,
-+#endif
-+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-+	.ret_stack	= NULL,
-+#endif
-+#if defined(CONFIG_TRACING) && defined(CONFIG_PREEMPT)
-+	.trace_recursion = 0,
-+#endif
-+#ifdef CONFIG_LIVEPATCH
-+	.patch_state	= KLP_UNDEFINED,
-+#endif
-+#ifdef CONFIG_SECURITY
-+	.security	= NULL,
-+#endif
-+#else /* CONFIG_SCHED_PDS */
- #ifdef CONFIG_THREAD_INFO_IN_TASK
- 	.thread_info	= INIT_THREAD_INFO(init_task),
- 	.stack_refcount	= REFCOUNT_INIT(1),
-@@ -182,6 +302,7 @@ struct task_struct init_task
- #ifdef CONFIG_SECURITY
- 	.security	= NULL,
- #endif
-+#endif /* CONFIG_SCHED_PDS */
- };
- EXPORT_SYMBOL(init_task);
- 
-diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
-index 729d3a5c772e..10a7c52b90d5 100644
---- a/kernel/cgroup/cpuset.c
-+++ b/kernel/cgroup/cpuset.c
-@@ -636,7 +636,7 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial)
- 	return ret;
- }
- 
--#ifdef CONFIG_SMP
-+#if defined(CONFIG_SMP) && !defined(CONFIG_SCHED_PDS)
- /*
-  * Helper routine for generate_sched_domains().
-  * Do cpusets a, b have overlapping effective cpus_allowed masks?
-@@ -1009,7 +1009,7 @@ static void rebuild_sched_domains_locked(void)
- 	/* Have scheduler rebuild the domains */
- 	partition_and_rebuild_sched_domains(ndoms, doms, attr);
- }
--#else /* !CONFIG_SMP */
-+#else /* !CONFIG_SMP || CONFIG_SCHED_PDS */
- static void rebuild_sched_domains_locked(void)
- {
- }
-diff --git a/kernel/delayacct.c b/kernel/delayacct.c
-index 27725754ac99..769d773c7182 100644
---- a/kernel/delayacct.c
-+++ b/kernel/delayacct.c
-@@ -106,7 +106,7 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
- 	 */
- 	t1 = tsk->sched_info.pcount;
- 	t2 = tsk->sched_info.run_delay;
--	t3 = tsk->se.sum_exec_runtime;
-+	t3 = tsk_seruntime(tsk);
- 
- 	d->cpu_count += t1;
- 
-diff --git a/kernel/exit.c b/kernel/exit.c
-index ce2a75bc0ade..f0f864bc1ab9 100644
---- a/kernel/exit.c
-+++ b/kernel/exit.c
-@@ -122,7 +122,7 @@ static void __exit_signal(struct task_struct *tsk)
- 			sig->curr_target = next_thread(tsk);
- 	}
- 
--	add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
-+	add_device_randomness((const void*) &tsk_seruntime(tsk),
- 			      sizeof(unsigned long long));
- 
- 	/*
-@@ -143,7 +143,7 @@ static void __exit_signal(struct task_struct *tsk)
- 	sig->inblock += task_io_get_inblock(tsk);
- 	sig->oublock += task_io_get_oublock(tsk);
- 	task_io_accounting_add(&sig->ioac, &tsk->ioac);
--	sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
-+	sig->sum_sched_runtime += tsk_seruntime(tsk);
- 	sig->nr_threads--;
- 	__unhash_process(tsk, group_dead);
- 	write_sequnlock(&sig->stats_lock);
-diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
-index f6310f848f34..b5de980c7d4e 100644
---- a/kernel/livepatch/transition.c
-+++ b/kernel/livepatch/transition.c
-@@ -306,7 +306,11 @@ static bool klp_try_switch_task(struct task_struct *task)
- 	 */
- 	rq = task_rq_lock(task, &flags);
- 
-+#ifdef	CONFIG_SCHED_PDS
-+	if (task_running(task) && task != current) {
-+#else
- 	if (task_running(rq, task) && task != current) {
-+#endif
- 		snprintf(err_buf, STACK_ERR_BUF_SIZE,
- 			 "%s: %s:%d is running\n", __func__, task->comm,
- 			 task->pid);
-diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
-index c9f090d64f00..063d15a1ab8b 100644
---- a/kernel/locking/rtmutex.c
-+++ b/kernel/locking/rtmutex.c
-@@ -229,7 +229,7 @@ static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
-  * Only use with rt_mutex_waiter_{less,equal}()
-  */
- #define task_to_waiter(p)	\
--	&(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline }
-+	&(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = __tsk_deadline(p) }
- 
- static inline int
- rt_mutex_waiter_less(struct rt_mutex_waiter *left,
-@@ -680,7 +680,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
- 	 * the values of the node being removed.
- 	 */
- 	waiter->prio = task->prio;
--	waiter->deadline = task->dl.deadline;
-+	waiter->deadline = __tsk_deadline(task);
- 
- 	rt_mutex_enqueue(lock, waiter);
- 
-@@ -953,7 +953,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
- 	waiter->task = task;
- 	waiter->lock = lock;
- 	waiter->prio = task->prio;
--	waiter->deadline = task->dl.deadline;
-+	waiter->deadline = __tsk_deadline(task);
- 
- 	/* Get the top priority waiter on the lock */
- 	if (rt_mutex_has_waiters(lock))
-diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
-index 21fb5a5662b5..8ebe4e33fb5f 100644
---- a/kernel/sched/Makefile
-+++ b/kernel/sched/Makefile
-@@ -16,15 +16,21 @@ ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
- CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
- endif
- 
--obj-y += core.o loadavg.o clock.o cputime.o
--obj-y += idle.o fair.o rt.o deadline.o
--obj-y += wait.o wait_bit.o swait.o completion.o
--
--obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o
-+ifdef CONFIG_SCHED_PDS
-+obj-y += pds.o
-+else
-+obj-y += core.o
-+obj-y += fair.o rt.o deadline.o
-+obj-$(CONFIG_SMP) += cpudeadline.o topology.o stop_task.o
- obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
--obj-$(CONFIG_SCHEDSTATS) += stats.o
- obj-$(CONFIG_SCHED_DEBUG) += debug.o
- obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
-+endif
-+obj-y += loadavg.o clock.o cputime.o
-+obj-y += idle.o
-+obj-y += wait.o wait_bit.o swait.o completion.o
-+obj-$(CONFIG_SMP) += cpupri.o pelt.o
-+obj-$(CONFIG_SCHEDSTATS) += stats.o
- obj-$(CONFIG_CPU_FREQ) += cpufreq.o
- obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o
- obj-$(CONFIG_MEMBARRIER) += membarrier.o
-diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
-index 7fbaee24c824..28377ad56248 100644
---- a/kernel/sched/cpufreq_schedutil.c
-+++ b/kernel/sched/cpufreq_schedutil.c
-@@ -183,6 +183,7 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy,
- 	return cpufreq_driver_resolve_freq(policy, freq);
- }
- 
-+#ifndef CONFIG_SCHED_PDS
- /*
-  * This function computes an effective utilization for the given CPU, to be
-  * used for frequency selection given the linear relation: f = u * f_max.
-@@ -300,6 +301,13 @@ static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu)
- 
- 	return schedutil_cpu_util(sg_cpu->cpu, util, max, FREQUENCY_UTIL, NULL);
- }
-+#else /* CONFIG_SCHED_PDS */
-+static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu)
-+{
-+	sg_cpu->max = arch_scale_cpu_capacity(sg_cpu->cpu);
-+	return sg_cpu->max;
-+}
-+#endif
- 
- /**
-  * sugov_iowait_reset() - Reset the IO boost status of a CPU.
-@@ -443,7 +451,9 @@ static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; }
-  */
- static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu, struct sugov_policy *sg_policy)
- {
-+#ifndef CONFIG_SCHED_PDS
- 	if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_dl)
-+#endif
- 		sg_policy->limits_changed = true;
- }
- 
-@@ -686,6 +696,7 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy)
- 	}
- 
- 	ret = sched_setattr_nocheck(thread, &attr);
-+
- 	if (ret) {
- 		kthread_stop(thread);
- 		pr_warn("%s: failed to set SCHED_DEADLINE\n", __func__);
-@@ -916,6 +927,7 @@ static int __init sugov_register(void)
- core_initcall(sugov_register);
- 
- #ifdef CONFIG_ENERGY_MODEL
-+#ifndef CONFIG_SCHED_PDS
- extern bool sched_energy_update;
- extern struct mutex sched_energy_mutex;
- 
-@@ -946,4 +958,10 @@ void sched_cpufreq_governor_change(struct cpufreq_policy *policy,
- 	}
- 
- }
-+#else /* CONFIG_SCHED_PDS */
-+void sched_cpufreq_governor_change(struct cpufreq_policy *policy,
-+				  struct cpufreq_governor *old_gov)
-+{
-+}
-+#endif
- #endif
-diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
-index ff9435dee1df..1377ea3d1b76 100644
---- a/kernel/sched/cputime.c
-+++ b/kernel/sched/cputime.c
-@@ -122,7 +122,12 @@ void account_user_time(struct task_struct *p, u64 cputime)
- 	p->utime += cputime;
- 	account_group_user_time(p, cputime);
- 
-+#ifdef	CONFIG_SCHED_PDS
-+	index = (task_nice(p) > 0 || task_running_idle(p)) ? CPUTIME_NICE :
-+		CPUTIME_USER;
-+#else
- 	index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
-+#endif
- 
- 	/* Add user time to cpustat. */
- 	task_group_account_field(p, index, cputime);
-@@ -146,7 +151,11 @@ void account_guest_time(struct task_struct *p, u64 cputime)
- 	p->gtime += cputime;
- 
- 	/* Add guest time to cpustat. */
-+#ifdef	CONFIG_SCHED_PDS
-+	if (task_nice(p) > 0 || task_running_idle(p)) {
-+#else
- 	if (task_nice(p) > 0) {
-+#endif
- 		cpustat[CPUTIME_NICE] += cputime;
- 		cpustat[CPUTIME_GUEST_NICE] += cputime;
- 	} else {
-@@ -269,7 +278,7 @@ static inline u64 account_other_time(u64 max)
- #ifdef CONFIG_64BIT
- static inline u64 read_sum_exec_runtime(struct task_struct *t)
- {
--	return t->se.sum_exec_runtime;
-+	return tsk_seruntime(t);
- }
- #else
- static u64 read_sum_exec_runtime(struct task_struct *t)
-@@ -279,7 +288,7 @@ static u64 read_sum_exec_runtime(struct task_struct *t)
- 	struct rq *rq;
- 
- 	rq = task_rq_lock(t, &rf);
--	ns = t->se.sum_exec_runtime;
-+	ns = tsk_seruntime(t);
- 	task_rq_unlock(rq, t, &rf);
- 
- 	return ns;
-@@ -658,7 +667,7 @@ void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev,
- void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
- {
- 	struct task_cputime cputime = {
--		.sum_exec_runtime = p->se.sum_exec_runtime,
-+		.sum_exec_runtime = tsk_seruntime(p),
- 	};
- 
- 	task_cputime(p, &cputime.utime, &cputime.stime);
-diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
-index b743bf38f08f..16e5754af1cf 100644
---- a/kernel/sched/idle.c
-+++ b/kernel/sched/idle.c
-@@ -361,6 +361,7 @@ void cpu_startup_entry(enum cpuhp_state state)
- 		do_idle();
- }
- 
-+#ifndef CONFIG_SCHED_PDS
- /*
-  * idle-task scheduling class.
-  */
-@@ -481,3 +482,4 @@ const struct sched_class idle_sched_class = {
- 	.switched_to		= switched_to_idle,
- 	.update_curr		= update_curr_idle,
- };
-+#endif
-diff --git a/kernel/sched/pds.c b/kernel/sched/pds.c
-new file mode 100644
-index 000000000000..02d7d5a67c77
---- /dev/null
-+++ b/kernel/sched/pds.c
-@@ -0,0 +1,6554 @@
-+/*
-+ *  kernel/sched/pds.c, was kernel/sched.c
-+ *
-+ *  PDS-mq Core kernel scheduler code and related syscalls
-+ *
-+ *  Copyright (C) 1991-2002  Linus Torvalds
-+ *
-+ *  2009-08-13	Brainfuck deadline scheduling policy by Con Kolivas deletes
-+ *		a whole lot of those previous things.
-+ *  2017-09-06	Priority and Deadline based Skip list multiple queue kernel
-+ *		scheduler by Alfred Chen.
-+ */
-+#include "pds_sched.h"
-+
-+#include <linux/sched/rt.h>
-+
-+#include <linux/context_tracking.h>
-+#include <linux/compat.h>
-+#include <linux/blkdev.h>
-+#include <linux/delayacct.h>
-+#include <linux/freezer.h>
-+#include <linux/init_task.h>
-+#include <linux/kprobes.h>
-+#include <linux/mmu_context.h>
-+#include <linux/nmi.h>
-+#include <linux/profile.h>
-+#include <linux/rcupdate_wait.h>
-+#include <linux/security.h>
-+#include <linux/syscalls.h>
-+#include <linux/wait_bit.h>
-+
-+#include <linux/kcov.h>
-+
-+#include <asm/switch_to.h>
-+
-+#include "../workqueue_internal.h"
-+#include "../../fs/io-wq.h"
-+#include "../smpboot.h"
-+
-+#include "pelt.h"
-+
-+#define CREATE_TRACE_POINTS
-+#include <trace/events/sched.h>
-+
-+
-+#define rt_prio(prio)		((prio) < MAX_RT_PRIO)
-+#define rt_task(p)		rt_prio((p)->prio)
-+#define rt_policy(policy)	((policy) == SCHED_FIFO || \
-+				 (policy) == SCHED_RR || \
-+				 (policy) == SCHED_ISO)
-+#define task_has_rt_policy(p)	(rt_policy((p)->policy))
-+
-+#define idle_policy(policy)	((policy) == SCHED_IDLE)
-+#define idleprio_task(p)	unlikely(idle_policy((p)->policy))
-+
-+#define STOP_PRIO		(MAX_RT_PRIO - 1)
-+
-+/*
-+ * Some helpers for converting to/from various scales. Use shifts to get
-+ * approximate multiples of ten for less overhead.
-+ */
-+#define JIFFIES_TO_NS(TIME)	((TIME) * (1000000000 / HZ))
-+#define JIFFY_NS		(1000000000 / HZ)
-+#define HALF_JIFFY_NS		(1000000000 / HZ / 2)
-+#define HALF_JIFFY_US		(1000000 / HZ / 2)
-+#define MS_TO_NS(TIME)		((TIME) << 20)
-+#define MS_TO_US(TIME)		((TIME) << 10)
-+#define NS_TO_MS(TIME)		((TIME) >> 20)
-+#define NS_TO_US(TIME)		((TIME) >> 10)
-+#define US_TO_NS(TIME)		((TIME) << 10)
-+
-+#define RESCHED_US	(100) /* Reschedule if less than this many μs left */
-+
-+enum {
-+	BASE_CPU_AFFINITY_CHK_LEVEL = 1,
-+#ifdef CONFIG_SCHED_SMT
-+	SMT_CPU_AFFINITY_CHK_LEVEL_SPACE_HOLDER,
-+#endif
-+#ifdef CONFIG_SCHED_MC
-+	MC_CPU_AFFINITY_CHK_LEVEL_SPACE_HOLDER,
-+#endif
-+	NR_CPU_AFFINITY_CHK_LEVEL
-+};
-+
-+static inline void print_scheduler_version(void)
-+{
-+	printk(KERN_INFO "pds: PDS-mq CPU Scheduler 0.99o by Alfred Chen and kept alive artificially by Tk-Glitch.\n");
-+}
-+
-+/*
-+ * This is the time all tasks within the same priority round robin.
-+ * Value is in ms and set to a minimum of 6ms. Scales with number of cpus.
-+ * Tunable via /proc interface.
-+ */
-+#define SCHED_DEFAULT_RR (4)
-+int rr_interval __read_mostly = SCHED_DEFAULT_RR;
-+
-+static int __init rr_interval_set(char *str)
-+{
-+	u32 rr;
-+
-+	pr_info("rr_interval: ");
-+	if (kstrtouint(str, 0, &rr)) {
-+		pr_cont("using default of %u, unable to parse %s\n",
-+			rr_interval, str);
-+		return 1;
-+	}
-+
-+	rr_interval = rr;
-+	pr_cont("%d\n", rr_interval);
-+
-+	return 1;
-+}
-+__setup("rr_interval=", rr_interval_set);
-+
-+
-+static const u64 sched_prio2deadline[NICE_WIDTH] = {
-+/* -20 */	  6291456,   6920601,   7612661,   8373927,   9211319,
-+/* -15 */	 10132450,  11145695,  12260264,  13486290,  14834919,
-+/* -10 */	 16318410,  17950251,  19745276,  21719803,  23891783,
-+/*  -5 */	 26280961,  28909057,  31799962,  34979958,  38477953,
-+/*   0 */	 42325748,  46558322,  51214154,  56335569,  61969125,
-+/*   5 */	 68166037,  74982640,  82480904,  90728994,  99801893,
-+/*  10 */	109782082, 120760290, 132836319, 146119950, 160731945,
-+/*  15 */	176805139, 194485652, 213934217, 235327638, 258860401
-+};
-+
-+/**
-+ * sched_yield_type - Choose what sort of yield sched_yield will perform.
-+ * 0: No yield.
-+ * 1: Yield only to better priority/deadline tasks. (default)
-+ * 2: Expire timeslice and recalculate deadline.
-+ */
-+int sched_yield_type __read_mostly = 1;
-+
-+/*
-+ * The quota handed out to tasks of all priority levels when refilling their
-+ * time_slice.
-+ */
-+static inline int timeslice(void)
-+{
-+	return MS_TO_US(rr_interval);
-+}
-+
-+#ifdef CONFIG_SMP
-+enum {
-+SCHED_RQ_EMPTY		=	0,
-+SCHED_RQ_IDLE,
-+SCHED_RQ_NORMAL_0,
-+SCHED_RQ_NORMAL_1,
-+SCHED_RQ_NORMAL_2,
-+SCHED_RQ_NORMAL_3,
-+SCHED_RQ_NORMAL_4,
-+SCHED_RQ_NORMAL_5,
-+SCHED_RQ_NORMAL_6,
-+SCHED_RQ_NORMAL_7,
-+SCHED_RQ_ISO,
-+SCHED_RQ_RT,
-+NR_SCHED_RQ_QUEUED_LEVEL
-+};
-+
-+static cpumask_t sched_rq_queued_masks[NR_SCHED_RQ_QUEUED_LEVEL]
-+____cacheline_aligned_in_smp;
-+
-+static DECLARE_BITMAP(sched_rq_queued_masks_bitmap, NR_SCHED_RQ_QUEUED_LEVEL)
-+____cacheline_aligned_in_smp;
-+
-+static cpumask_t sched_rq_pending_masks[NR_SCHED_RQ_QUEUED_LEVEL]
-+____cacheline_aligned_in_smp;
-+
-+static DECLARE_BITMAP(sched_rq_pending_masks_bitmap, NR_SCHED_RQ_QUEUED_LEVEL)
-+____cacheline_aligned_in_smp;
-+
-+DEFINE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_CHK_LEVEL], sched_cpu_affinity_chk_masks);
-+DEFINE_PER_CPU(cpumask_t *, sched_cpu_llc_start_mask);
-+DEFINE_PER_CPU(cpumask_t *, sched_cpu_affinity_chk_end_masks);
-+
-+#ifdef CONFIG_SCHED_SMT
-+DEFINE_PER_CPU(int, sched_sibling_cpu);
-+DEFINE_STATIC_KEY_FALSE(sched_smt_present);
-+EXPORT_SYMBOL_GPL(sched_smt_present);
-+
-+static cpumask_t sched_cpu_sg_idle_mask ____cacheline_aligned_in_smp;
-+
-+#ifdef CONFIG_SMT_NICE
-+/*
-+ * Preemptible sibling group mask
-+ * Which all sibling cpus are running at PRIO_LIMIT or IDLE_PRIO
-+ */
-+static cpumask_t sched_cpu_psg_mask ____cacheline_aligned_in_smp;
-+/*
-+ * SMT supressed mask
-+ * When a cpu is running task with NORMAL/ISO/RT policy, its sibling cpu
-+ * will be supressed to run IDLE priority task.
-+ */
-+static cpumask_t sched_smt_supressed_mask ____cacheline_aligned_in_smp;
-+#endif /* CONFIG_SMT_NICE */
-+#endif
-+
-+static int sched_rq_prio[NR_CPUS] ____cacheline_aligned;
-+
-+/*
-+ * Keep a unique ID per domain (we use the first CPUs number in the cpumask of
-+ * the domain), this allows us to quickly tell if two cpus are in the same cache
-+ * domain, see cpus_share_cache().
-+ */
-+DEFINE_PER_CPU(int, sd_llc_id);
-+
-+int __weak arch_sd_sibling_asym_packing(void)
-+{
-+       return 0*SD_ASYM_PACKING;
-+}
-+#else
-+struct rq *uprq;
-+#endif /* CONFIG_SMP */
-+
-+static DEFINE_MUTEX(sched_hotcpu_mutex);
-+
-+DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
-+
-+#ifndef prepare_arch_switch
-+# define prepare_arch_switch(next)	do { } while (0)
-+#endif
-+#ifndef finish_arch_post_lock_switch
-+# define finish_arch_post_lock_switch()	do { } while (0)
-+#endif
-+
-+/*
-+ * Context: p->pi_lock
-+ */
-+static inline struct rq
-+*__task_access_lock(struct task_struct *p, raw_spinlock_t **plock)
-+{
-+	struct rq *rq;
-+	for (;;) {
-+		rq = task_rq(p);
-+		if (p->on_cpu || task_on_rq_queued(p)) {
-+			raw_spin_lock(&rq->lock);
-+			if (likely((p->on_cpu || task_on_rq_queued(p))
-+				   && rq == task_rq(p))) {
-+				*plock = &rq->lock;
-+				return rq;
-+			}
-+			raw_spin_unlock(&rq->lock);
-+		} else if (task_on_rq_migrating(p)) {
-+			do {
-+				cpu_relax();
-+			} while (unlikely(task_on_rq_migrating(p)));
-+		} else {
-+			*plock = NULL;
-+			return rq;
-+		}
-+	}
-+}
-+
-+static inline void
-+__task_access_unlock(struct task_struct *p, raw_spinlock_t *lock)
-+{
-+	if (NULL != lock)
-+		raw_spin_unlock(lock);
-+}
-+
-+static inline struct rq
-+*task_access_lock_irqsave(struct task_struct *p, raw_spinlock_t **plock,
-+			  unsigned long *flags)
-+{
-+	struct rq *rq;
-+	for (;;) {
-+		rq = task_rq(p);
-+		if (p->on_cpu || task_on_rq_queued(p)) {
-+			raw_spin_lock_irqsave(&rq->lock, *flags);
-+			if (likely((p->on_cpu || task_on_rq_queued(p))
-+				   && rq == task_rq(p))) {
-+				*plock = &rq->lock;
-+				return rq;
-+			}
-+			raw_spin_unlock_irqrestore(&rq->lock, *flags);
-+		} else if (task_on_rq_migrating(p)) {
-+			do {
-+				cpu_relax();
-+			} while (unlikely(task_on_rq_migrating(p)));
-+		} else {
-+			raw_spin_lock_irqsave(&p->pi_lock, *flags);
-+			if (likely(!p->on_cpu && !p->on_rq &&
-+				   rq == task_rq(p))) {
-+				*plock = &p->pi_lock;
-+				return rq;
-+			}
-+			raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
-+		}
-+	}
-+}
-+
-+static inline void
-+task_access_unlock_irqrestore(struct task_struct *p, raw_spinlock_t *lock,
-+			      unsigned long *flags)
-+{
-+	raw_spin_unlock_irqrestore(lock, *flags);
-+}
-+
-+/*
-+ * __task_rq_lock - lock the rq @p resides on.
-+ */
-+struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
-+	__acquires(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	lockdep_assert_held(&p->pi_lock);
-+
-+	for (;;) {
-+		rq = task_rq(p);
-+		raw_spin_lock(&rq->lock);
-+		if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
-+			return rq;
-+		raw_spin_unlock(&rq->lock);
-+
-+		while (unlikely(task_on_rq_migrating(p)))
-+			cpu_relax();
-+	}
-+}
-+
-+/*
-+ * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
-+ */
-+struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
-+	__acquires(p->pi_lock)
-+	__acquires(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	for (;;) {
-+		raw_spin_lock_irqsave(&p->pi_lock, rf->flags);
-+		rq = task_rq(p);
-+		raw_spin_lock(&rq->lock);
-+		/*
-+		 *	move_queued_task()		task_rq_lock()
-+		 *
-+		 *	ACQUIRE (rq->lock)
-+		 *	[S] ->on_rq = MIGRATING		[L] rq = task_rq()
-+		 *	WMB (__set_task_cpu())		ACQUIRE (rq->lock);
-+		 *	[S] ->cpu = new_cpu		[L] task_rq()
-+		 *					[L] ->on_rq
-+		 *	RELEASE (rq->lock)
-+		 *
-+		 * If we observe the old CPU in task_rq_lock(), the acquire of
-+		 * the old rq->lock will fully serialize against the stores.
-+		 *
-+		 * If we observe the new CPU in task_rq_lock(), the address
-+		 * dependency headed by '[L] rq = task_rq()' and the acquire
-+		 * will pair with the WMB to ensure we then also see migrating.
-+		 */
-+		if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
-+			return rq;
-+		}
-+		raw_spin_unlock(&rq->lock);
-+		raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
-+
-+		while (unlikely(task_on_rq_migrating(p)))
-+			cpu_relax();
-+	}
-+}
-+
-+/*
-+ * RQ-clock updating methods:
-+ */
-+
-+static void update_rq_clock_task(struct rq *rq, s64 delta)
-+{
-+/*
-+ * In theory, the compile should just see 0 here, and optimize out the call
-+ * to sched_rt_avg_update. But I don't trust it...
-+ */
-+	s64 __maybe_unused steal = 0, irq_delta = 0;
-+
-+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-+	irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
-+
-+	/*
-+	 * Since irq_time is only updated on {soft,}irq_exit, we might run into
-+	 * this case when a previous update_rq_clock() happened inside a
-+	 * {soft,}irq region.
-+	 *
-+	 * When this happens, we stop ->clock_task and only update the
-+	 * prev_irq_time stamp to account for the part that fit, so that a next
-+	 * update will consume the rest. This ensures ->clock_task is
-+	 * monotonic.
-+	 *
-+	 * It does however cause some slight miss-attribution of {soft,}irq
-+	 * time, a more accurate solution would be to update the irq_time using
-+	 * the current rq->clock timestamp, except that would require using
-+	 * atomic ops.
-+	 */
-+	if (irq_delta > delta)
-+		irq_delta = delta;
-+
-+	rq->prev_irq_time += irq_delta;
-+	delta -= irq_delta;
-+#endif
-+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
-+	if (static_key_false((&paravirt_steal_rq_enabled))) {
-+		steal = paravirt_steal_clock(cpu_of(rq));
-+		steal -= rq->prev_steal_time_rq;
-+
-+		if (unlikely(steal > delta))
-+			steal = delta;
-+
-+		rq->prev_steal_time_rq += steal;
-+
-+		delta -= steal;
-+	}
-+#endif
-+
-+	rq->clock_task += delta;
-+
-+#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
-+	if ((irq_delta + steal))
-+		update_irq_load_avg(rq, irq_delta + steal);
-+#endif
-+}
-+
-+static inline void update_rq_clock(struct rq *rq)
-+{
-+	s64 delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
-+
-+	if (unlikely(delta <= 0))
-+		return;
-+	rq->clock += delta;
-+	update_rq_clock_task(rq, delta);
-+}
-+
-+static inline void update_task_priodl(struct task_struct *p)
-+{
-+	p->priodl = (((u64) (p->prio))<<56) | ((p->deadline)>>8);
-+}
-+
-+/*
-+ * Deadline is "now" in niffies + (offset by priority). Setting the deadline
-+ * is the key to everything. It distributes CPU fairly amongst tasks of the
-+ * same nice value, it proportions CPU according to nice level, it means the
-+ * task that last woke up the longest ago has the earliest deadline, thus
-+ * ensuring that interactive tasks get low latency on wake up. The CPU
-+ * proportion works out to the square of the virtual deadline difference, so
-+ * this equation will give nice 19 3% CPU compared to nice 0.
-+ */
-+static inline u64 task_deadline_diff(const struct task_struct *p)
-+{
-+	return sched_prio2deadline[TASK_USER_PRIO(p)];
-+}
-+
-+static inline u64 static_deadline_diff(int static_prio)
-+{
-+	return sched_prio2deadline[USER_PRIO(static_prio)];
-+}
-+
-+/*
-+ * The time_slice is only refilled when it is empty and that is when we set a
-+ * new deadline for non-rt tasks.
-+ */
-+static inline void time_slice_expired(struct task_struct *p, struct rq *rq)
-+{
-+	p->time_slice = timeslice();
-+	if (p->prio >= NORMAL_PRIO)
-+		p->deadline = rq->clock + task_deadline_diff(p);
-+
-+	update_task_priodl(p);
-+}
-+
-+static inline struct task_struct *rq_first_queued_task(struct rq *rq)
-+{
-+	struct skiplist_node *node = rq->sl_header.next[0];
-+
-+	if (node == &rq->sl_header)
-+		return rq->idle;
-+
-+	return skiplist_entry(node, struct task_struct, sl_node);
-+}
-+
-+static inline struct task_struct *rq_second_queued_task(struct rq *rq)
-+{
-+	struct skiplist_node *node = rq->sl_header.next[0]->next[0];
-+
-+	if (node == &rq->sl_header)
-+		return rq->idle;
-+
-+	return skiplist_entry(node, struct task_struct, sl_node);
-+}
-+
-+static inline int is_second_in_rq(struct task_struct *p, struct rq *rq)
-+{
-+	return (p->sl_node.prev[0]->prev[0] == &rq->sl_header);
-+}
-+
-+static const int task_dl_hash_tbl[] = {
-+/*	0           4           8           12           */
-+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
-+/*	16          20          24          28           */
-+	1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5, 6, 7
-+};
-+
-+static inline int
-+task_deadline_level(const struct task_struct *p, const struct rq *rq)
-+{
-+	u64 delta = (rq->clock + sched_prio2deadline[39] - p->deadline) >> 23;
-+
-+	delta = min((size_t)delta, ARRAY_SIZE(task_dl_hash_tbl) - 1);
-+	return task_dl_hash_tbl[delta];
-+}
-+
-+/*
-+ * cmpxchg based fetch_or, macro so it works for different integer types
-+ */
-+#define fetch_or(ptr, mask)						\
-+	({								\
-+		typeof(ptr) _ptr = (ptr);				\
-+		typeof(mask) _mask = (mask);				\
-+		typeof(*_ptr) _old, _val = *_ptr;			\
-+									\
-+		for (;;) {						\
-+			_old = cmpxchg(_ptr, _val, _val | _mask);	\
-+			if (_old == _val)				\
-+				break;					\
-+			_val = _old;					\
-+		}							\
-+	_old;								\
-+})
-+
-+#if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
-+/*
-+ * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG,
-+ * this avoids any races wrt polling state changes and thereby avoids
-+ * spurious IPIs.
-+ */
-+static bool set_nr_and_not_polling(struct task_struct *p)
-+{
-+	struct thread_info *ti = task_thread_info(p);
-+	return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG);
-+}
-+
-+/*
-+ * Atomically set TIF_NEED_RESCHED if TIF_POLLING_NRFLAG is set.
-+ *
-+ * If this returns true, then the idle task promises to call
-+ * sched_ttwu_pending() and reschedule soon.
-+ */
-+static bool set_nr_if_polling(struct task_struct *p)
-+{
-+	struct thread_info *ti = task_thread_info(p);
-+	typeof(ti->flags) old, val = READ_ONCE(ti->flags);
-+
-+	for (;;) {
-+		if (!(val & _TIF_POLLING_NRFLAG))
-+			return false;
-+		if (val & _TIF_NEED_RESCHED)
-+			return true;
-+		old = cmpxchg(&ti->flags, val, val | _TIF_NEED_RESCHED);
-+		if (old == val)
-+			break;
-+		val = old;
-+	}
-+	return true;
-+}
-+
-+#else
-+static bool set_nr_and_not_polling(struct task_struct *p)
-+{
-+	set_tsk_need_resched(p);
-+	return true;
-+}
-+
-+#ifdef CONFIG_SMP
-+static bool set_nr_if_polling(struct task_struct *p)
-+{
-+	return false;
-+}
-+#endif
-+#endif
-+
-+#ifdef	CONFIG_SMP
-+#ifdef	CONFIG_SMT_NICE
-+static void resched_cpu_if_curr_is(int cpu, int priority)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	rcu_read_lock();
-+
-+	if (rcu_dereference(rq->curr)->prio != priority)
-+		goto out;
-+
-+	if (set_nr_if_polling(rq->idle)) {
-+		trace_sched_wake_idle_without_ipi(cpu);
-+	} else {
-+		if (!do_raw_spin_trylock(&rq->lock))
-+			goto out;
-+		spin_acquire(&rq->lock.dep_map, SINGLE_DEPTH_NESTING, 1, _RET_IP_);
-+
-+		if (priority == rq->curr->prio)
-+			smp_send_reschedule(cpu);
-+		/* Else CPU is not idle, do nothing here */
-+
-+		spin_release(&rq->lock.dep_map, _RET_IP_);
-+		do_raw_spin_unlock(&rq->lock);
-+	}
-+
-+out:
-+	rcu_read_unlock();
-+}
-+#endif /* CONFIG_SMT_NICE */
-+
-+static inline bool
-+__update_cpumasks_bitmap(int cpu, unsigned long *plevel, unsigned long level,
-+			 cpumask_t cpumasks[], unsigned long bitmap[])
-+{
-+	if (*plevel == level)
-+		return false;
-+
-+	cpumask_clear_cpu(cpu, cpumasks + *plevel);
-+	if (cpumask_empty(cpumasks + *plevel))
-+		clear_bit(*plevel, bitmap);
-+	cpumask_set_cpu(cpu, cpumasks + level);
-+	set_bit(level, bitmap);
-+
-+	*plevel = level;
-+
-+	return true;
-+}
-+
-+static inline int
-+task_running_policy_level(const struct task_struct *p, const struct rq *rq)
-+{
-+	int prio = p->prio;
-+
-+	if (NORMAL_PRIO == prio)
-+		return SCHED_RQ_NORMAL_0 + task_deadline_level(p, rq);
-+
-+	if (ISO_PRIO == prio)
-+		return SCHED_RQ_ISO;
-+	if (prio < MAX_RT_PRIO)
-+		return SCHED_RQ_RT;
-+	return PRIO_LIMIT - prio;
-+}
-+
-+static inline void update_sched_rq_queued_masks_normal(struct rq *rq)
-+{
-+	struct task_struct *p = rq_first_queued_task(rq);
-+
-+	if (p->prio != NORMAL_PRIO)
-+		return;
-+
-+	__update_cpumasks_bitmap(cpu_of(rq), &rq->queued_level,
-+				 task_running_policy_level(p, rq),
-+				 &sched_rq_queued_masks[0],
-+				 &sched_rq_queued_masks_bitmap[0]);
-+}
-+
-+#ifdef CONFIG_SMT_NICE
-+static inline void update_sched_cpu_psg_mask(const int cpu)
-+{
-+	cpumask_t tmp;
-+
-+	cpumask_or(&tmp, &sched_rq_queued_masks[SCHED_RQ_EMPTY],
-+		   &sched_rq_queued_masks[SCHED_RQ_IDLE]);
-+	cpumask_and(&tmp, &tmp, cpu_smt_mask(cpu));
-+	if (cpumask_equal(&tmp, cpu_smt_mask(cpu)))
-+		cpumask_or(&sched_cpu_psg_mask, &sched_cpu_psg_mask,
-+			   cpu_smt_mask(cpu));
-+	else
-+		cpumask_andnot(&sched_cpu_psg_mask, &sched_cpu_psg_mask,
-+			       cpu_smt_mask(cpu));
-+}
-+#endif
-+
-+static inline void update_sched_rq_queued_masks(struct rq *rq)
-+{
-+	int cpu = cpu_of(rq);
-+	struct task_struct *p = rq_first_queued_task(rq);
-+	unsigned long level;
-+#ifdef CONFIG_SCHED_SMT
-+	unsigned long last_level = rq->queued_level;
-+#endif
-+
-+	level = task_running_policy_level(p, rq);
-+	sched_rq_prio[cpu] = p->prio;
-+
-+	if (!__update_cpumasks_bitmap(cpu, &rq->queued_level, level,
-+				      &sched_rq_queued_masks[0],
-+				      &sched_rq_queued_masks_bitmap[0]))
-+		return;
-+
-+#ifdef CONFIG_SCHED_SMT
-+	if (cpu == per_cpu(sched_sibling_cpu, cpu))
-+		return;
-+
-+	if (SCHED_RQ_EMPTY == last_level) {
-+		cpumask_andnot(&sched_cpu_sg_idle_mask, &sched_cpu_sg_idle_mask,
-+			       cpu_smt_mask(cpu));
-+	} else if (SCHED_RQ_EMPTY == level) {
-+		cpumask_t tmp;
-+
-+		cpumask_and(&tmp, cpu_smt_mask(cpu),
-+			    &sched_rq_queued_masks[SCHED_RQ_EMPTY]);
-+		if (cpumask_equal(&tmp, cpu_smt_mask(cpu)))
-+			cpumask_or(&sched_cpu_sg_idle_mask, cpu_smt_mask(cpu),
-+				   &sched_cpu_sg_idle_mask);
-+	}
-+
-+#ifdef CONFIG_SMT_NICE
-+	if (level <= SCHED_RQ_IDLE && last_level > SCHED_RQ_IDLE) {
-+		cpumask_clear_cpu(per_cpu(sched_sibling_cpu, cpu),
-+				  &sched_smt_supressed_mask);
-+		update_sched_cpu_psg_mask(cpu);
-+		resched_cpu_if_curr_is(per_cpu(sched_sibling_cpu, cpu), PRIO_LIMIT);
-+	} else if (last_level <= SCHED_RQ_IDLE && level > SCHED_RQ_IDLE) {
-+		cpumask_set_cpu(per_cpu(sched_sibling_cpu, cpu),
-+				&sched_smt_supressed_mask);
-+		update_sched_cpu_psg_mask(cpu);
-+		resched_cpu_if_curr_is(per_cpu(sched_sibling_cpu, cpu), IDLE_PRIO);
-+	}
-+#endif /* CONFIG_SMT_NICE */
-+#endif
-+}
-+
-+static inline void update_sched_rq_pending_masks(struct rq *rq)
-+{
-+	unsigned long level;
-+	struct task_struct *p = rq_second_queued_task(rq);
-+
-+	level = task_running_policy_level(p, rq);
-+
-+	__update_cpumasks_bitmap(cpu_of(rq), &rq->pending_level, level,
-+				 &sched_rq_pending_masks[0],
-+				 &sched_rq_pending_masks_bitmap[0]);
-+}
-+
-+#else /* CONFIG_SMP */
-+static inline void update_sched_rq_queued_masks(struct rq *rq) {}
-+static inline void update_sched_rq_queued_masks_normal(struct rq *rq) {}
-+static inline void update_sched_rq_pending_masks(struct rq *rq) {}
-+#endif
-+
-+#ifdef CONFIG_NO_HZ_FULL
-+/*
-+ * Tick may be needed by tasks in the runqueue depending on their policy and
-+ * requirements. If tick is needed, lets send the target an IPI to kick it out
-+ * of nohz mode if necessary.
-+ */
-+static inline void sched_update_tick_dependency(struct rq *rq)
-+{
-+	int cpu;
-+
-+	if (!tick_nohz_full_enabled())
-+		return;
-+
-+	cpu = cpu_of(rq);
-+
-+	if (!tick_nohz_full_cpu(cpu))
-+		return;
-+
-+	if (rq->nr_running < 2)
-+		tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED);
-+	else
-+		tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
-+}
-+#else /* !CONFIG_NO_HZ_FULL */
-+static inline void sched_update_tick_dependency(struct rq *rq) { }
-+#endif
-+
-+/*
-+ * Removing from the runqueue. Deleting a task from the skip list is done
-+ * via the stored node reference in the task struct and does not require a full
-+ * look up. Thus it occurs in O(k) time where k is the "level" of the list the
-+ * task was stored at - usually < 4, max 16.
-+ *
-+ * Context: rq->lock
-+ */
-+static inline void dequeue_task(struct task_struct *p, struct rq *rq, int flags)
-+{
-+	lockdep_assert_held(&rq->lock);
-+
-+	WARN_ONCE(task_rq(p) != rq, "pds: dequeue task reside on cpu%d from cpu%d\n",
-+		  task_cpu(p), cpu_of(rq));
-+	if (skiplist_del_init(&rq->sl_header, &p->sl_node)) {
-+		update_sched_rq_queued_masks(rq);
-+		update_sched_rq_pending_masks(rq);
-+	} else if (is_second_in_rq(p, rq))
-+		update_sched_rq_pending_masks(rq);
-+	rq->nr_running--;
-+
-+	sched_update_tick_dependency(rq);
-+	psi_dequeue(p, flags & DEQUEUE_SLEEP);
-+
-+	sched_info_dequeued(rq, p);
-+}
-+
-+/*
-+ * To determine if it's safe for a task of SCHED_IDLE to actually run as
-+ * an idle task, we ensure none of the following conditions are met.
-+ */
-+static inline bool idleprio_suitable(struct task_struct *p)
-+{
-+	return (!freezing(p) && !signal_pending(p) &&
-+		!(task_contributes_to_load(p)) && !(p->flags & (PF_EXITING)));
-+}
-+
-+/*
-+ * pds_skiplist_random_level -- Returns a pseudo-random level number for skip
-+ * list node which is used in PDS run queue.
-+ *
-+ * In current implementation, based on testing, the first 8 bits in microseconds
-+ * of niffies are suitable for random level population.
-+ * find_first_bit() is used to satisfy p = 0.5 between each levels, and there
-+ * should be platform hardware supported instruction(known as ctz/clz) to speed
-+ * up this function.
-+ * The skiplist level for a task is populated when task is created and doesn't
-+ * change in task's life time. When task is being inserted into run queue, this
-+ * skiplist level is set to task's sl_node->level, the skiplist insert function
-+ * may change it based on current level of the skip lsit.
-+ */
-+static inline int pds_skiplist_random_level(const struct task_struct *p)
-+{
-+	long unsigned int randseed;
-+
-+	/*
-+	 * 1. Some architectures don't have better than microsecond resolution
-+	 * so mask out ~microseconds as a factor of the random seed for skiplist
-+	 * insertion.
-+	 * 2. Use address of task structure pointer as another factor of the
-+	 * random seed for task burst forking scenario.
-+	 */
-+	randseed = (task_rq(p)->clock ^ (long unsigned int)p) >> 10;
-+
-+	return find_first_bit(&randseed, NUM_SKIPLIST_LEVEL - 1);
-+}
-+
-+/**
-+ * pds_skiplist_task_search -- search function used in PDS run queue skip list
-+ * node insert operation.
-+ * @it: iterator pointer to the node in the skip list
-+ * @node: pointer to the skiplist_node to be inserted
-+ *
-+ * Returns true if key of @it is less or equal to key value of @node, otherwise
-+ * false.
-+ */
-+static inline bool
-+pds_skiplist_task_search(struct skiplist_node *it, struct skiplist_node *node)
-+{
-+	return (skiplist_entry(it, struct task_struct, sl_node)->priodl <=
-+		skiplist_entry(node, struct task_struct, sl_node)->priodl);
-+}
-+
-+/*
-+ * Define the skip list insert function for PDS
-+ */
-+DEFINE_SKIPLIST_INSERT_FUNC(pds_skiplist_insert, pds_skiplist_task_search);
-+
-+/*
-+ * Adding task to the runqueue.
-+ *
-+ * Context: rq->lock
-+ */
-+static inline void enqueue_task(struct task_struct *p, struct rq *rq, int flags)
-+{
-+	lockdep_assert_held(&rq->lock);
-+
-+	WARN_ONCE(task_rq(p) != rq, "pds: enqueue task reside on cpu%d to cpu%d\n",
-+		  task_cpu(p), cpu_of(rq));
-+
-+	p->sl_node.level = p->sl_level;
-+	if (pds_skiplist_insert(&rq->sl_header, &p->sl_node)) {
-+		update_sched_rq_queued_masks(rq);
-+		update_sched_rq_pending_masks(rq);
-+	} else if (is_second_in_rq(p, rq))
-+		update_sched_rq_pending_masks(rq);
-+	rq->nr_running++;
-+
-+	sched_update_tick_dependency(rq);
-+
-+	sched_info_queued(rq, p);
-+	psi_enqueue(p, flags);
-+
-+	/*
-+	 * If in_iowait is set, the code below may not trigger any cpufreq
-+	 * utilization updates, so do it here explicitly with the IOWAIT flag
-+	 * passed.
-+	 */
-+	if (p->in_iowait)
-+		cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_IOWAIT);
-+}
-+
-+static inline void requeue_task(struct task_struct *p, struct rq *rq)
-+{
-+	bool b_first, b_second;
-+
-+	lockdep_assert_held(&rq->lock);
-+
-+	WARN_ONCE(task_rq(p) != rq, "pds: cpu[%d] requeue task reside on cpu%d\n",
-+		  cpu_of(rq), task_cpu(p));
-+
-+	b_first = skiplist_del_init(&rq->sl_header, &p->sl_node);
-+	b_second = is_second_in_rq(p, rq);
-+
-+	p->sl_node.level = p->sl_level;
-+	if (pds_skiplist_insert(&rq->sl_header, &p->sl_node) || b_first) {
-+		update_sched_rq_queued_masks(rq);
-+		update_sched_rq_pending_masks(rq);
-+	} else if (is_second_in_rq(p, rq) || b_second)
-+		update_sched_rq_pending_masks(rq);
-+}
-+
-+/*
-+ * resched_curr - mark rq's current task 'to be rescheduled now'.
-+ *
-+ * On UP this means the setting of the need_resched flag, on SMP it
-+ * might also involve a cross-CPU call to trigger the scheduler on
-+ * the target CPU.
-+ */
-+void resched_curr(struct rq *rq)
-+{
-+	struct task_struct *curr = rq->curr;
-+	int cpu;
-+
-+	lockdep_assert_held(&rq->lock);
-+
-+	if (test_tsk_need_resched(curr))
-+		return;
-+
-+	cpu = cpu_of(rq);
-+	if (cpu == smp_processor_id()) {
-+		set_tsk_need_resched(curr);
-+		set_preempt_need_resched();
-+		return;
-+	}
-+
-+	if (set_nr_and_not_polling(curr))
-+		smp_send_reschedule(cpu);
-+	else
-+		trace_sched_wake_idle_without_ipi(cpu);
-+}
-+
-+static inline void check_preempt_curr(struct rq *rq, struct task_struct *p)
-+{
-+	struct task_struct *curr = rq->curr;
-+
-+	if (curr->prio == PRIO_LIMIT)
-+		resched_curr(rq);
-+
-+	if (task_running_idle(p))
-+		return;
-+
-+	if (p->priodl < curr->priodl)
-+		resched_curr(rq);
-+}
-+
-+#ifdef CONFIG_SCHED_HRTICK
-+/*
-+ * Use HR-timers to deliver accurate preemption points.
-+ */
-+
-+static void hrtick_clear(struct rq *rq)
-+{
-+	if (hrtimer_active(&rq->hrtick_timer))
-+		hrtimer_cancel(&rq->hrtick_timer);
-+}
-+
-+/*
-+ * High-resolution timer tick.
-+ * Runs from hardirq context with interrupts disabled.
-+ */
-+static enum hrtimer_restart hrtick(struct hrtimer *timer)
-+{
-+	struct rq *rq = container_of(timer, struct rq, hrtick_timer);
-+	struct task_struct *p;
-+
-+	WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
-+
-+	raw_spin_lock(&rq->lock);
-+	p = rq->curr;
-+	p->time_slice = 0;
-+	resched_curr(rq);
-+	raw_spin_unlock(&rq->lock);
-+
-+	return HRTIMER_NORESTART;
-+}
-+
-+/*
-+ * Use hrtick when:
-+ *  - enabled by features
-+ *  - hrtimer is actually high res
-+ */
-+static inline int hrtick_enabled(struct rq *rq)
-+{
-+	/**
-+	 * PDS doesn't support sched_feat yet
-+	if (!sched_feat(HRTICK))
-+		return 0;
-+	*/
-+	if (!cpu_active(cpu_of(rq)))
-+		return 0;
-+	return hrtimer_is_hres_active(&rq->hrtick_timer);
-+}
-+
-+#ifdef CONFIG_SMP
-+
-+static void __hrtick_restart(struct rq *rq)
-+{
-+	struct hrtimer *timer = &rq->hrtick_timer;
-+
-+	hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED_HARD);
-+}
-+
-+/*
-+ * called from hardirq (IPI) context
-+ */
-+static void __hrtick_start(void *arg)
-+{
-+	struct rq *rq = arg;
-+
-+	raw_spin_lock(&rq->lock);
-+	__hrtick_restart(rq);
-+	raw_spin_unlock(&rq->lock);
-+}
-+
-+/*
-+ * Called to set the hrtick timer state.
-+ *
-+ * called with rq->lock held and irqs disabled
-+ */
-+void hrtick_start(struct rq *rq, u64 delay)
-+{
-+	struct hrtimer *timer = &rq->hrtick_timer;
-+	ktime_t time;
-+	s64 delta;
-+
-+	/*
-+	 * Don't schedule slices shorter than 10000ns, that just
-+	 * doesn't make sense and can cause timer DoS.
-+	 */
-+	delta = max_t(s64, delay, 10000LL);
-+	time = ktime_add_ns(timer->base->get_time(), delta);
-+
-+	hrtimer_set_expires(timer, time);
-+
-+	if (rq == this_rq())
-+		__hrtick_restart(rq);
-+	else
-+		smp_call_function_single_async(cpu_of(rq), &rq->hrtick_csd);
-+}
-+
-+#else
-+/*
-+ * Called to set the hrtick timer state.
-+ *
-+ * called with rq->lock held and irqs disabled
-+ */
-+void hrtick_start(struct rq *rq, u64 delay)
-+{
-+	/*
-+	 * Don't schedule slices shorter than 10000ns, that just
-+	 * doesn't make sense. Rely on vruntime for fairness.
-+	 */
-+	delay = max_t(u64, delay, 10000LL);
-+	hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay),
-+		      HRTIMER_MODE_REL_PINNED_HARD);
-+}
-+#endif /* CONFIG_SMP */
-+
-+static void hrtick_rq_init(struct rq *rq)
-+{
-+#ifdef CONFIG_SMP
-+	rq->hrtick_csd.flags = 0;
-+	rq->hrtick_csd.func = __hrtick_start;
-+	rq->hrtick_csd.info = rq;
-+#endif
-+
-+	hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
-+	rq->hrtick_timer.function = hrtick;
-+}
-+
-+static inline int rq_dither(struct rq *rq)
-+{
-+	if ((rq->clock - rq->last_tick > HALF_JIFFY_NS) || hrtick_enabled(rq))
-+		return 0;
-+
-+	return HALF_JIFFY_NS;
-+}
-+
-+#else	/* CONFIG_SCHED_HRTICK */
-+static inline int hrtick_enabled(struct rq *rq)
-+{
-+	return 0;
-+}
-+
-+static inline void hrtick_clear(struct rq *rq)
-+{
-+}
-+
-+static inline void hrtick_rq_init(struct rq *rq)
-+{
-+}
-+
-+static inline int rq_dither(struct rq *rq)
-+{
-+	return (rq->clock - rq->last_tick > HALF_JIFFY_NS)? 0:HALF_JIFFY_NS;
-+}
-+#endif	/* CONFIG_SCHED_HRTICK */
-+
-+static inline int normal_prio(struct task_struct *p)
-+{
-+	static const int policy_to_prio[] = {
-+		NORMAL_PRIO,	/* SCHED_NORMAL */
-+		0,		/* SCHED_FIFO */
-+		0,		/* SCHED_RR */
-+		IDLE_PRIO,	/* SCHED_BATCH */
-+		ISO_PRIO,	/* SCHED_ISO */
-+		IDLE_PRIO	/* SCHED_IDLE */
-+	};
-+
-+	if (task_has_rt_policy(p))
-+		return MAX_RT_PRIO - 1 - p->rt_priority;
-+	return policy_to_prio[p->policy];
-+}
-+
-+/*
-+ * Calculate the current priority, i.e. the priority
-+ * taken into account by the scheduler. This value might
-+ * be boosted by RT tasks as it will be RT if the task got
-+ * RT-boosted. If not then it returns p->normal_prio.
-+ */
-+static int effective_prio(struct task_struct *p)
-+{
-+	p->normal_prio = normal_prio(p);
-+	/*
-+	 * If we are RT tasks or we were boosted to RT priority,
-+	 * keep the priority unchanged. Otherwise, update priority
-+	 * to the normal priority:
-+	 */
-+	if (!rt_prio(p->prio))
-+		return p->normal_prio;
-+	return p->prio;
-+}
-+
-+/*
-+ * activate_task - move a task to the runqueue.
-+ *
-+ * Context: rq->lock
-+ */
-+static void activate_task(struct task_struct *p, struct rq *rq)
-+{
-+	if (task_contributes_to_load(p))
-+		rq->nr_uninterruptible--;
-+	enqueue_task(p, rq, ENQUEUE_WAKEUP);
-+	p->on_rq = 1;
-+	cpufreq_update_this_cpu(rq, 0);
-+}
-+
-+/*
-+ * deactivate_task - remove a task from the runqueue.
-+ *
-+ * Context: rq->lock
-+ */
-+static inline void deactivate_task(struct task_struct *p, struct rq *rq)
-+{
-+	if (task_contributes_to_load(p))
-+		rq->nr_uninterruptible++;
-+	dequeue_task(p, rq, DEQUEUE_SLEEP);
-+	p->on_rq = 0;
-+	cpufreq_update_this_cpu(rq, 0);
-+}
-+
-+static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
-+{
-+#ifdef CONFIG_SMP
-+	/*
-+	 * After ->cpu is set up to a new value, task_access_lock(p, ...) can be
-+	 * successfully executed on another CPU. We must ensure that updates of
-+	 * per-task data have been completed by this moment.
-+	 */
-+	smp_wmb();
-+
-+#ifdef CONFIG_THREAD_INFO_IN_TASK
-+	WRITE_ONCE(p->cpu, cpu);
-+#else
-+	WRITE_ONCE(task_thread_info(p)->cpu, cpu);
-+#endif
-+#endif
-+}
-+
-+#ifdef CONFIG_SMP
-+void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
-+{
-+#ifdef CONFIG_SCHED_DEBUG
-+	/*
-+	 * We should never call set_task_cpu() on a blocked task,
-+	 * ttwu() will sort out the placement.
-+	 */
-+	WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING &&
-+		     !p->on_rq);
-+#ifdef CONFIG_LOCKDEP
-+	/*
-+	 * The caller should hold either p->pi_lock or rq->lock, when changing
-+	 * a task's CPU. ->pi_lock for waking tasks, rq->lock for runnable tasks.
-+	 *
-+	 * sched_move_task() holds both and thus holding either pins the cgroup,
-+	 * see task_group().
-+	 */
-+	WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) ||
-+				      lockdep_is_held(&task_rq(p)->lock)));
-+#endif
-+	/*
-+	 * Clearly, migrating tasks to offline CPUs is a fairly daft thing.
-+	 */
-+	WARN_ON_ONCE(!cpu_online(new_cpu));
-+#endif
-+	if (task_cpu(p) == new_cpu)
-+		return;
-+	trace_sched_migrate_task(p, new_cpu);
-+	rseq_migrate(p);
-+	perf_event_task_migrate(p);
-+
-+	__set_task_cpu(p, new_cpu);
-+}
-+
-+static inline bool is_per_cpu_kthread(struct task_struct *p)
-+{
-+	return ((p->flags & PF_KTHREAD) && (1 == p->nr_cpus_allowed));
-+}
-+
-+/*
-+ * Per-CPU kthreads are allowed to run on !active && online CPUs, see
-+ * __set_cpus_allowed_ptr() and select_fallback_rq().
-+ */
-+static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
-+{
-+	if (!cpumask_test_cpu(cpu, &p->cpus_mask))
-+		return false;
-+
-+	if (is_per_cpu_kthread(p))
-+		return cpu_online(cpu);
-+
-+	return cpu_active(cpu);
-+}
-+
-+/*
-+ * This is how migration works:
-+ *
-+ * 1) we invoke migration_cpu_stop() on the target CPU using
-+ *    stop_one_cpu().
-+ * 2) stopper starts to run (implicitly forcing the migrated thread
-+ *    off the CPU)
-+ * 3) it checks whether the migrated task is still in the wrong runqueue.
-+ * 4) if it's in the wrong runqueue then the migration thread removes
-+ *    it and puts it into the right queue.
-+ * 5) stopper completes and stop_one_cpu() returns and the migration
-+ *    is done.
-+ */
-+
-+/*
-+ * move_queued_task - move a queued task to new rq.
-+ *
-+ * Returns (locked) new rq. Old rq's lock is released.
-+ */
-+static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int
-+				   new_cpu)
-+{
-+	lockdep_assert_held(&rq->lock);
-+
-+	p->on_rq = TASK_ON_RQ_MIGRATING;
-+	dequeue_task(p, rq, 0);
-+	set_task_cpu(p, new_cpu);
-+	raw_spin_unlock(&rq->lock);
-+
-+	rq = cpu_rq(new_cpu);
-+
-+	raw_spin_lock(&rq->lock);
-+	BUG_ON(task_cpu(p) != new_cpu);
-+	enqueue_task(p, rq, 0);
-+	p->on_rq = TASK_ON_RQ_QUEUED;
-+	check_preempt_curr(rq, p);
-+
-+	return rq;
-+}
-+
-+struct migration_arg {
-+	struct task_struct *task;
-+	int dest_cpu;
-+};
-+
-+/*
-+ * Move (not current) task off this CPU, onto the destination CPU. We're doing
-+ * this because either it can't run here any more (set_cpus_allowed()
-+ * away from this CPU, or CPU going down), or because we're
-+ * attempting to rebalance this task on exec (sched_exec).
-+ *
-+ * So we race with normal scheduler movements, but that's OK, as long
-+ * as the task is no longer on this CPU.
-+ */
-+static struct rq *__migrate_task(struct rq *rq, struct task_struct *p, int
-+				 dest_cpu)
-+{
-+	/* Affinity changed (again). */
-+	if (!is_cpu_allowed(p, dest_cpu))
-+		return rq;
-+
-+	update_rq_clock(rq);
-+	return move_queued_task(rq, p, dest_cpu);
-+}
-+
-+/*
-+ * migration_cpu_stop - this will be executed by a highprio stopper thread
-+ * and performs thread migration by bumping thread off CPU then
-+ * 'pushing' onto another runqueue.
-+ */
-+static int migration_cpu_stop(void *data)
-+{
-+	struct migration_arg *arg = data;
-+	struct task_struct *p = arg->task;
-+	struct rq *rq = this_rq();
-+
-+	/*
-+	 * The original target CPU might have gone down and we might
-+	 * be on another CPU but it doesn't matter.
-+	 */
-+	local_irq_disable();
-+
-+	raw_spin_lock(&p->pi_lock);
-+	raw_spin_lock(&rq->lock);
-+	/*
-+	 * If task_rq(p) != rq, it cannot be migrated here, because we're
-+	 * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because
-+	 * we're holding p->pi_lock.
-+	 */
-+	if (task_rq(p) == rq)
-+		if (task_on_rq_queued(p))
-+			rq = __migrate_task(rq, p, arg->dest_cpu);
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock(&p->pi_lock);
-+
-+	local_irq_enable();
-+	return 0;
-+}
-+
-+static inline void
-+set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	cpumask_copy(&p->cpus_mask, new_mask);
-+	p->nr_cpus_allowed = cpumask_weight(new_mask);
-+}
-+
-+void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	set_cpus_allowed_common(p, new_mask);
-+}
-+#endif
-+
-+/* Enter with rq lock held. We know p is on the local CPU */
-+static inline void __set_tsk_resched(struct task_struct *p)
-+{
-+	set_tsk_need_resched(p);
-+	set_preempt_need_resched();
-+}
-+
-+/**
-+ * task_curr - is this task currently executing on a CPU?
-+ * @p: the task in question.
-+ *
-+ * Return: 1 if the task is currently executing. 0 otherwise.
-+ */
-+inline int task_curr(const struct task_struct *p)
-+{
-+	return cpu_curr(task_cpu(p)) == p;
-+}
-+
-+#ifdef CONFIG_SMP
-+/*
-+ * wait_task_inactive - wait for a thread to unschedule.
-+ *
-+ * If @match_state is nonzero, it's the @p->state value just checked and
-+ * not expected to change.  If it changes, i.e. @p might have woken up,
-+ * then return zero.  When we succeed in waiting for @p to be off its CPU,
-+ * we return a positive number (its total switch count).  If a second call
-+ * a short while later returns the same number, the caller can be sure that
-+ * @p has remained unscheduled the whole time.
-+ *
-+ * The caller must ensure that the task *will* unschedule sometime soon,
-+ * else this function might spin for a *long* time. This function can't
-+ * be called with interrupts off, or it may introduce deadlock with
-+ * smp_call_function() if an IPI is sent by the same process we are
-+ * waiting to become inactive.
-+ */
-+unsigned long wait_task_inactive(struct task_struct *p, long match_state)
-+{
-+	unsigned long flags;
-+	bool running, on_rq;
-+	unsigned long ncsw;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+
-+	for (;;) {
-+		rq = task_rq(p);
-+
-+		/*
-+		 * If the task is actively running on another CPU
-+		 * still, just relax and busy-wait without holding
-+		 * any locks.
-+		 *
-+		 * NOTE! Since we don't hold any locks, it's not
-+		 * even sure that "rq" stays as the right runqueue!
-+		 * But we don't care, since this will return false
-+		 * if the runqueue has changed and p is actually now
-+		 * running somewhere else!
-+		 */
-+		while (task_running(p) && p == rq->curr) {
-+			if (match_state && unlikely(p->state != match_state))
-+				return 0;
-+			cpu_relax();
-+		}
-+
-+		/*
-+		 * Ok, time to look more closely! We need the rq
-+		 * lock now, to be *sure*. If we're wrong, we'll
-+		 * just go back and repeat.
-+		 */
-+		task_access_lock_irqsave(p, &lock, &flags);
-+		trace_sched_wait_task(p);
-+		running = task_running(p);
-+		on_rq = p->on_rq;
-+		ncsw = 0;
-+		if (!match_state || p->state == match_state)
-+			ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
-+		task_access_unlock_irqrestore(p, lock, &flags);
-+
-+		/*
-+		 * If it changed from the expected state, bail out now.
-+		 */
-+		if (unlikely(!ncsw))
-+			break;
-+
-+		/*
-+		 * Was it really running after all now that we
-+		 * checked with the proper locks actually held?
-+		 *
-+		 * Oops. Go back and try again..
-+		 */
-+		if (unlikely(running)) {
-+			cpu_relax();
-+			continue;
-+		}
-+
-+		/*
-+		 * It's not enough that it's not actively running,
-+		 * it must be off the runqueue _entirely_, and not
-+		 * preempted!
-+		 *
-+		 * So if it was still runnable (but just not actively
-+		 * running right now), it's preempted, and we should
-+		 * yield - it could be a while.
-+		 */
-+		if (unlikely(on_rq)) {
-+			ktime_t to = NSEC_PER_SEC / HZ;
-+
-+			set_current_state(TASK_UNINTERRUPTIBLE);
-+			schedule_hrtimeout(&to, HRTIMER_MODE_REL);
-+			continue;
-+		}
-+
-+		/*
-+		 * Ahh, all good. It wasn't running, and it wasn't
-+		 * runnable, which means that it will never become
-+		 * running in the future either. We're all done!
-+		 */
-+		break;
-+	}
-+
-+	return ncsw;
-+}
-+
-+/***
-+ * kick_process - kick a running thread to enter/exit the kernel
-+ * @p: the to-be-kicked thread
-+ *
-+ * Cause a process which is running on another CPU to enter
-+ * kernel-mode, without any delay. (to get signals handled.)
-+ *
-+ * NOTE: this function doesn't have to take the runqueue lock,
-+ * because all it wants to ensure is that the remote task enters
-+ * the kernel. If the IPI races and the task has been migrated
-+ * to another CPU then no harm is done and the purpose has been
-+ * achieved as well.
-+ */
-+void kick_process(struct task_struct *p)
-+{
-+	int cpu;
-+
-+	preempt_disable();
-+	cpu = task_cpu(p);
-+	if ((cpu != smp_processor_id()) && task_curr(p))
-+		smp_send_reschedule(cpu);
-+	preempt_enable();
-+}
-+EXPORT_SYMBOL_GPL(kick_process);
-+
-+/*
-+ * ->cpus_mask is protected by both rq->lock and p->pi_lock
-+ *
-+ * A few notes on cpu_active vs cpu_online:
-+ *
-+ *  - cpu_active must be a subset of cpu_online
-+ *
-+ *  - on CPU-up we allow per-CPU kthreads on the online && !active CPU,
-+ *    see __set_cpus_allowed_ptr(). At this point the newly online
-+ *    CPU isn't yet part of the sched domains, and balancing will not
-+ *    see it.
-+ *
-+ *  - on cpu-down we clear cpu_active() to mask the sched domains and
-+ *    avoid the load balancer to place new tasks on the to be removed
-+ *    CPU. Existing tasks will remain running there and will be taken
-+ *    off.
-+ *
-+ * This means that fallback selection must not select !active CPUs.
-+ * And can assume that any active CPU must be online. Conversely
-+ * select_task_rq() below may allow selection of !active CPUs in order
-+ * to satisfy the above rules.
-+ */
-+static int select_fallback_rq(int cpu, struct task_struct *p)
-+{
-+	int nid = cpu_to_node(cpu);
-+	const struct cpumask *nodemask = NULL;
-+	enum { cpuset, possible, fail } state = cpuset;
-+	int dest_cpu;
-+
-+	/*
-+	 * If the node that the CPU is on has been offlined, cpu_to_node()
-+	 * will return -1. There is no CPU on the node, and we should
-+	 * select the CPU on the other node.
-+	 */
-+	if (nid != -1) {
-+		nodemask = cpumask_of_node(nid);
-+
-+		/* Look for allowed, online CPU in same node. */
-+		for_each_cpu(dest_cpu, nodemask) {
-+			if (!cpu_active(dest_cpu))
-+				continue;
-+			if (cpumask_test_cpu(dest_cpu, &p->cpus_mask))
-+				return dest_cpu;
-+		}
-+	}
-+
-+	for (;;) {
-+		/* Any allowed, online CPU? */
-+		for_each_cpu(dest_cpu, &p->cpus_mask) {
-+			if (!is_cpu_allowed(p, dest_cpu))
-+				continue;
-+			goto out;
-+		}
-+
-+		/* No more Mr. Nice Guy. */
-+		switch (state) {
-+		case cpuset:
-+			if (IS_ENABLED(CONFIG_CPUSETS)) {
-+				cpuset_cpus_allowed_fallback(p);
-+				state = possible;
-+				break;
-+			}
-+			/* Fall-through */
-+		case possible:
-+			do_set_cpus_allowed(p, cpu_possible_mask);
-+			state = fail;
-+			break;
-+
-+		case fail:
-+			BUG();
-+			break;
-+		}
-+	}
-+
-+out:
-+	if (state != cpuset) {
-+		/*
-+		 * Don't tell them about moving exiting tasks or
-+		 * kernel threads (both mm NULL), since they never
-+		 * leave kernel.
-+		 */
-+		if (p->mm && printk_ratelimit()) {
-+			printk_deferred("process %d (%s) no longer affine to cpu%d\n",
-+					task_pid_nr(p), p->comm, cpu);
-+		}
-+	}
-+
-+	return dest_cpu;
-+}
-+
-+static inline int best_mask_cpu(int cpu, const cpumask_t *cpumask)
-+{
-+	cpumask_t *mask;
-+
-+	if (cpumask_test_cpu(cpu, cpumask))
-+		return cpu;
-+
-+	mask = &(per_cpu(sched_cpu_affinity_chk_masks, cpu)[0]);
-+	while ((cpu = cpumask_any_and(cpumask, mask)) >= nr_cpu_ids)
-+		mask++;
-+
-+	return cpu;
-+}
-+
-+/*
-+ * task_preemptible_rq - return the rq which the given task can preempt on
-+ * @p: task wants to preempt CPU
-+ * @only_preempt_low_policy: indicate only preempt rq running low policy than @p
-+ */
-+static inline int
-+task_preemptible_rq_idle(struct task_struct *p, cpumask_t *chk_mask)
-+{
-+	cpumask_t tmp;
-+
-+#ifdef CONFIG_SCHED_SMT
-+	if (cpumask_and(&tmp, chk_mask, &sched_cpu_sg_idle_mask))
-+		return best_mask_cpu(task_cpu(p), &tmp);
-+#endif
-+
-+#ifdef CONFIG_SMT_NICE
-+	/* Only ttwu on cpu which is not smt supressed */
-+	if (cpumask_andnot(&tmp, chk_mask, &sched_smt_supressed_mask)) {
-+		cpumask_t t;
-+		if (cpumask_and(&t, &tmp, &sched_rq_queued_masks[SCHED_RQ_EMPTY]))
-+			return best_mask_cpu(task_cpu(p), &t);
-+		return best_mask_cpu(task_cpu(p), &tmp);
-+	}
-+#endif
-+
-+	if (cpumask_and(&tmp, chk_mask, &sched_rq_queued_masks[SCHED_RQ_EMPTY]))
-+		return best_mask_cpu(task_cpu(p), &tmp);
-+	return best_mask_cpu(task_cpu(p), chk_mask);
-+}
-+
-+static inline int
-+task_preemptible_rq(struct task_struct *p, cpumask_t *chk_mask,
-+		    int preempt_level)
-+{
-+	cpumask_t tmp;
-+	int level;
-+
-+#ifdef CONFIG_SCHED_SMT
-+#ifdef CONFIG_SMT_NICE
-+	if (cpumask_and(&tmp, chk_mask, &sched_cpu_psg_mask))
-+		return best_mask_cpu(task_cpu(p), &tmp);
-+#else
-+	if (cpumask_and(&tmp, chk_mask, &sched_cpu_sg_idle_mask))
-+		return best_mask_cpu(task_cpu(p), &tmp);
-+#endif
-+#endif
-+
-+	level = find_first_bit(sched_rq_queued_masks_bitmap,
-+			       NR_SCHED_RQ_QUEUED_LEVEL);
-+
-+	while (level < preempt_level) {
-+		if (cpumask_and(&tmp, chk_mask, &sched_rq_queued_masks[level]))
-+			return best_mask_cpu(task_cpu(p), &tmp);
-+
-+		level = find_next_bit(sched_rq_queued_masks_bitmap,
-+				      NR_SCHED_RQ_QUEUED_LEVEL,
-+				      level + 1);
-+	}
-+
-+	if (unlikely(SCHED_RQ_RT == level &&
-+		     level == preempt_level &&
-+		     cpumask_and(&tmp, chk_mask,
-+				 &sched_rq_queued_masks[SCHED_RQ_RT]))) {
-+		unsigned int cpu;
-+
-+		for_each_cpu (cpu, &tmp)
-+			if (p->prio < sched_rq_prio[cpu])
-+				return cpu;
-+	}
-+
-+	return best_mask_cpu(task_cpu(p), chk_mask);
-+}
-+
-+static inline int select_task_rq(struct task_struct *p)
-+{
-+	cpumask_t chk_mask;
-+
-+	if (unlikely(!cpumask_and(&chk_mask, &p->cpus_mask, cpu_online_mask)))
-+		return select_fallback_rq(task_cpu(p), p);
-+
-+	/* Check IDLE tasks suitable to run normal priority */
-+	if (idleprio_task(p)) {
-+		if (idleprio_suitable(p)) {
-+			p->prio = p->normal_prio;
-+			update_task_priodl(p);
-+			return task_preemptible_rq_idle(p, &chk_mask);
-+		}
-+		p->prio = NORMAL_PRIO;
-+		update_task_priodl(p);
-+	}
-+
-+	return task_preemptible_rq(p, &chk_mask,
-+				   task_running_policy_level(p, this_rq()));
-+}
-+#else /* CONFIG_SMP */
-+static inline int select_task_rq(struct task_struct *p)
-+{
-+	return 0;
-+}
-+#endif /* CONFIG_SMP */
-+
-+static void
-+ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
-+{
-+	struct rq *rq;
-+
-+	if (!schedstat_enabled())
-+		return;
-+
-+	rq= this_rq();
-+
-+#ifdef CONFIG_SMP
-+	if (cpu == rq->cpu)
-+		__schedstat_inc(rq->ttwu_local);
-+	else {
-+		/** PDS ToDo:
-+		 * How to do ttwu_wake_remote
-+		 */
-+	}
-+#endif /* CONFIG_SMP */
-+
-+	__schedstat_inc(rq->ttwu_count);
-+}
-+
-+/*
-+ * Mark the task runnable and perform wakeup-preemption.
-+ */
-+static inline void
-+ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
-+{
-+	p->state = TASK_RUNNING;
-+	trace_sched_wakeup(p);
-+}
-+
-+static inline void
-+ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags)
-+{
-+#ifdef CONFIG_SMP
-+	if (p->sched_contributes_to_load)
-+		rq->nr_uninterruptible--;
-+#endif
-+
-+	activate_task(p, rq);
-+	ttwu_do_wakeup(rq, p, 0);
-+}
-+
-+static int ttwu_remote(struct task_struct *p, int wake_flags)
-+{
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+	int ret = 0;
-+
-+	rq = __task_access_lock(p, &lock);
-+	if (task_on_rq_queued(p)) {
-+		ttwu_do_wakeup(rq, p, wake_flags);
-+		ret = 1;
-+	}
-+	__task_access_unlock(p, lock);
-+
-+	return ret;
-+}
-+
-+/*
-+ * Notes on Program-Order guarantees on SMP systems.
-+ *
-+ *  MIGRATION
-+ *
-+ * The basic program-order guarantee on SMP systems is that when a task [t]
-+ * migrates, all its activity on its old CPU [c0] happens-before any subsequent
-+ * execution on its new CPU [c1].
-+ *
-+ * For migration (of runnable tasks) this is provided by the following means:
-+ *
-+ *  A) UNLOCK of the rq(c0)->lock scheduling out task t
-+ *  B) migration for t is required to synchronize *both* rq(c0)->lock and
-+ *     rq(c1)->lock (if not at the same time, then in that order).
-+ *  C) LOCK of the rq(c1)->lock scheduling in task
-+ *
-+ * Transitivity guarantees that B happens after A and C after B.
-+ * Note: we only require RCpc transitivity.
-+ * Note: the CPU doing B need not be c0 or c1
-+ *
-+ * Example:
-+ *
-+ *   CPU0            CPU1            CPU2
-+ *
-+ *   LOCK rq(0)->lock
-+ *   sched-out X
-+ *   sched-in Y
-+ *   UNLOCK rq(0)->lock
-+ *
-+ *                                   LOCK rq(0)->lock // orders against CPU0
-+ *                                   dequeue X
-+ *                                   UNLOCK rq(0)->lock
-+ *
-+ *                                   LOCK rq(1)->lock
-+ *                                   enqueue X
-+ *                                   UNLOCK rq(1)->lock
-+ *
-+ *                   LOCK rq(1)->lock // orders against CPU2
-+ *                   sched-out Z
-+ *                   sched-in X
-+ *                   UNLOCK rq(1)->lock
-+ *
-+ *
-+ *  BLOCKING -- aka. SLEEP + WAKEUP
-+ *
-+ * For blocking we (obviously) need to provide the same guarantee as for
-+ * migration. However the means are completely different as there is no lock
-+ * chain to provide order. Instead we do:
-+ *
-+ *   1) smp_store_release(X->on_cpu, 0)
-+ *   2) smp_cond_load_acquire(!X->on_cpu)
-+ *
-+ * Example:
-+ *
-+ *   CPU0 (schedule)  CPU1 (try_to_wake_up) CPU2 (schedule)
-+ *
-+ *   LOCK rq(0)->lock LOCK X->pi_lock
-+ *   dequeue X
-+ *   sched-out X
-+ *   smp_store_release(X->on_cpu, 0);
-+ *
-+ *                    smp_cond_load_acquire(&X->on_cpu, !VAL);
-+ *                    X->state = WAKING
-+ *                    set_task_cpu(X,2)
-+ *
-+ *                    LOCK rq(2)->lock
-+ *                    enqueue X
-+ *                    X->state = RUNNING
-+ *                    UNLOCK rq(2)->lock
-+ *
-+ *                                          LOCK rq(2)->lock // orders against CPU1
-+ *                                          sched-out Z
-+ *                                          sched-in X
-+ *                                          UNLOCK rq(2)->lock
-+ *
-+ *                    UNLOCK X->pi_lock
-+ *   UNLOCK rq(0)->lock
-+ *
-+ *
-+ * However; for wakeups there is a second guarantee we must provide, namely we
-+ * must observe the state that lead to our wakeup. That is, not only must our
-+ * task observe its own prior state, it must also observe the stores prior to
-+ * its wakeup.
-+ *
-+ * This means that any means of doing remote wakeups must order the CPU doing
-+ * the wakeup against the CPU the task is going to end up running on. This,
-+ * however, is already required for the regular Program-Order guarantee above,
-+ * since the waking CPU is the one issueing the ACQUIRE (smp_cond_load_acquire).
-+ *
-+ */
-+
-+/***
-+ * try_to_wake_up - wake up a thread
-+ * @p: the thread to be awakened
-+ * @state: the mask of task states that can be woken
-+ * @wake_flags: wake modifier flags (WF_*)
-+ *
-+ * Put it on the run-queue if it's not already there. The "current"
-+ * thread is always on the run-queue (except when the actual
-+ * re-schedule is in progress), and as such you're allowed to do
-+ * the simpler "current->state = TASK_RUNNING" to mark yourself
-+ * runnable without the overhead of this.
-+ *
-+ * Return: %true if @p was woken up, %false if it was already running.
-+ * or @state didn't match @p's state.
-+ */
-+static int try_to_wake_up(struct task_struct *p, unsigned int state,
-+			  int wake_flags)
-+{
-+	unsigned long flags;
-+	struct rq *rq;
-+	int cpu, success = 0;
-+
-+	/*
-+	 * If we are going to wake up a thread waiting for CONDITION we
-+	 * need to ensure that CONDITION=1 done by the caller can not be
-+	 * reordered with p->state check below. This pairs with mb() in
-+	 * set_current_state() the waiting thread does.
-+	 */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	smp_mb__after_spinlock();
-+	if (!(p->state & state))
-+		goto out;
-+
-+	trace_sched_waking(p);
-+
-+	/* We're going to change ->state: */
-+	success = 1;
-+	cpu = task_cpu(p);
-+
-+	/*
-+	 * Ensure we load p->on_rq _after_ p->state, otherwise it would
-+	 * be possible to, falsely, observe p->on_rq == 0 and get stuck
-+	 * in smp_cond_load_acquire() below.
-+	 *
-+	 * sched_ttwu_pending()			try_to_wake_up()
-+	 *   STORE p->on_rq = 1			  LOAD p->state
-+	 *   UNLOCK rq->lock
-+	 *
-+	 * __schedule() (switch to task 'p')
-+	 *   LOCK rq->lock			  smp_rmb();
-+	 *   smp_mb__after_spinlock();
-+	 *   UNLOCK rq->lock
-+	 *
-+	 * [task p]
-+	 *   STORE p->state = UNINTERRUPTIBLE	  LOAD p->on_rq
-+	 *
-+	 * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
-+	 * __schedule().  See the comment for smp_mb__after_spinlock().
-+	 */
-+	smp_rmb();
-+	if (p->on_rq && ttwu_remote(p, wake_flags))
-+		goto stat;
-+
-+#ifdef CONFIG_SMP
-+	/*
-+	 * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
-+	 * possible to, falsely, observe p->on_cpu == 0.
-+	 *
-+	 * One must be running (->on_cpu == 1) in order to remove oneself
-+	 * from the runqueue.
-+	 *
-+	 * __schedule() (switch to task 'p')	try_to_wake_up()
-+	 *   STORE p->on_cpu = 1		  LOAD p->on_rq
-+	 *   UNLOCK rq->lock
-+	 *
-+	 * __schedule() (put 'p' to sleep)
-+	 *   LOCK rq->lock			  smp_rmb();
-+	 *   smp_mb__after_spinlock();
-+	 *   STORE p->on_rq = 0			  LOAD p->on_cpu
-+	 *
-+	 * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
-+	 * __schedule().  See the comment for smp_mb__after_spinlock().
-+	 */
-+	smp_rmb();
-+
-+	/*
-+	 * If the owning (remote) CPU is still in the middle of schedule() with
-+	 * this task as prev, wait until its done referencing the task.
-+	 *
-+	 * Pairs with the smp_store_release() in finish_task().
-+	 *
-+	 * This ensures that tasks getting woken will be fully ordered against
-+	 * their previous state and preserve Program Order.
-+	 */
-+	smp_cond_load_acquire(&p->on_cpu, !VAL);
-+
-+	p->sched_contributes_to_load = !!task_contributes_to_load(p);
-+	p->state = TASK_WAKING;
-+
-+	if (p->in_iowait) {
-+		delayacct_blkio_end(p);
-+		atomic_dec(&task_rq(p)->nr_iowait);
-+	}
-+
-+	if (SCHED_ISO == p->policy && ISO_PRIO != p->prio) {
-+		p->prio = ISO_PRIO;
-+		p->deadline = 0UL;
-+		update_task_priodl(p);
-+	}
-+
-+	cpu = select_task_rq(p);
-+
-+	if (cpu != task_cpu(p)) {
-+		wake_flags |= WF_MIGRATED;
-+		psi_ttwu_dequeue(p);
-+		set_task_cpu(p, cpu);
-+	}
-+#else /* CONFIG_SMP */
-+	if (p->in_iowait) {
-+		delayacct_blkio_end(p);
-+		atomic_dec(&task_rq(p)->nr_iowait);
-+	}
-+#endif
-+
-+	rq = cpu_rq(cpu);
-+	raw_spin_lock(&rq->lock);
-+
-+	update_rq_clock(rq);
-+	ttwu_do_activate(rq, p, wake_flags);
-+	check_preempt_curr(rq, p);
-+
-+	raw_spin_unlock(&rq->lock);
-+
-+stat:
-+	ttwu_stat(p, cpu, wake_flags);
-+out:
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+
-+	return success;
-+}
-+
-+/**
-+ * wake_up_process - Wake up a specific process
-+ * @p: The process to be woken up.
-+ *
-+ * Attempt to wake up the nominated process and move it to the set of runnable
-+ * processes.
-+ *
-+ * Return: 1 if the process was woken up, 0 if it was already running.
-+ *
-+ * This function executes a full memory barrier before accessing the task state.
-+ */
-+int wake_up_process(struct task_struct *p)
-+{
-+	return try_to_wake_up(p, TASK_NORMAL, 0);
-+}
-+EXPORT_SYMBOL(wake_up_process);
-+
-+int wake_up_state(struct task_struct *p, unsigned int state)
-+{
-+	return try_to_wake_up(p, state, 0);
-+}
-+
-+/*
-+ * Perform scheduler related setup for a newly forked process p.
-+ * p is forked by current.
-+ */
-+int sched_fork(unsigned long __maybe_unused clone_flags, struct task_struct *p)
-+{
-+	unsigned long flags;
-+	int cpu = get_cpu();
-+	struct rq *rq = this_rq();
-+
-+#ifdef CONFIG_PREEMPT_NOTIFIERS
-+	INIT_HLIST_HEAD(&p->preempt_notifiers);
-+#endif
-+	/* Should be reset in fork.c but done here for ease of PDS patching */
-+	p->on_cpu =
-+	p->on_rq =
-+	p->utime =
-+	p->stime =
-+	p->sched_time = 0;
-+
-+	p->sl_level = pds_skiplist_random_level(p);
-+	INIT_SKIPLIST_NODE(&p->sl_node);
-+
-+#ifdef CONFIG_COMPACTION
-+	p->capture_control = NULL;
-+#endif
-+
-+	/*
-+	 * We mark the process as NEW here. This guarantees that
-+	 * nobody will actually run it, and a signal or other external
-+	 * event cannot wake it up and insert it on the runqueue either.
-+	 */
-+	p->state = TASK_NEW;
-+
-+	/*
-+	 * Make sure we do not leak PI boosting priority to the child.
-+	 */
-+	p->prio = current->normal_prio;
-+
-+	/*
-+	 * Revert to default priority/policy on fork if requested.
-+	 */
-+	if (unlikely(p->sched_reset_on_fork)) {
-+		if (task_has_rt_policy(p)) {
-+			p->policy = SCHED_NORMAL;
-+			p->static_prio = NICE_TO_PRIO(0);
-+			p->rt_priority = 0;
-+		} else if (PRIO_TO_NICE(p->static_prio) < 0)
-+			p->static_prio = NICE_TO_PRIO(0);
-+
-+		p->prio = p->normal_prio = normal_prio(p);
-+
-+		/*
-+		 * We don't need the reset flag anymore after the fork. It has
-+		 * fulfilled its duty:
-+		 */
-+		p->sched_reset_on_fork = 0;
-+	}
-+
-+	/*
-+	 * Share the timeslice between parent and child, thus the
-+	 * total amount of pending timeslices in the system doesn't change,
-+	 * resulting in more scheduling fairness.
-+	 */
-+	raw_spin_lock_irqsave(&rq->lock, flags);
-+	rq->curr->time_slice /= 2;
-+	p->time_slice = rq->curr->time_slice;
-+#ifdef CONFIG_SCHED_HRTICK
-+	hrtick_start(rq, US_TO_NS(rq->curr->time_slice));
-+#endif
-+
-+	if (p->time_slice < RESCHED_US) {
-+		update_rq_clock(rq);
-+		time_slice_expired(p, rq);
-+		resched_curr(rq);
-+	} else
-+		update_task_priodl(p);
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+
-+	/*
-+	 * The child is not yet in the pid-hash so no cgroup attach races,
-+	 * and the cgroup is pinned to this child due to cgroup_fork()
-+	 * is ran before sched_fork().
-+	 *
-+	 * Silence PROVE_RCU.
-+	 */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	/*
-+	 * We're setting the CPU for the first time, we don't migrate,
-+	 * so use __set_task_cpu().
-+	 */
-+	__set_task_cpu(p, cpu);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+
-+#ifdef CONFIG_SCHED_INFO
-+	if (unlikely(sched_info_on()))
-+		memset(&p->sched_info, 0, sizeof(p->sched_info));
-+#endif
-+	init_task_preempt_count(p);
-+
-+	put_cpu();
-+	return 0;
-+}
-+
-+#ifdef CONFIG_SCHEDSTATS
-+
-+DEFINE_STATIC_KEY_FALSE(sched_schedstats);
-+static bool __initdata __sched_schedstats = false;
-+
-+static void set_schedstats(bool enabled)
-+{
-+	if (enabled)
-+		static_branch_enable(&sched_schedstats);
-+	else
-+		static_branch_disable(&sched_schedstats);
-+}
-+
-+void force_schedstat_enabled(void)
-+{
-+	if (!schedstat_enabled()) {
-+		pr_info("kernel profiling enabled schedstats, disable via kernel.sched_schedstats.\n");
-+		static_branch_enable(&sched_schedstats);
-+	}
-+}
-+
-+static int __init setup_schedstats(char *str)
-+{
-+	int ret = 0;
-+	if (!str)
-+		goto out;
-+
-+	/*
-+	 * This code is called before jump labels have been set up, so we can't
-+	 * change the static branch directly just yet.  Instead set a temporary
-+	 * variable so init_schedstats() can do it later.
-+	 */
-+	if (!strcmp(str, "enable")) {
-+		__sched_schedstats = true;
-+		ret = 1;
-+	} else if (!strcmp(str, "disable")) {
-+		__sched_schedstats = false;
-+		ret = 1;
-+	}
-+out:
-+	if (!ret)
-+		pr_warn("Unable to parse schedstats=\n");
-+
-+	return ret;
-+}
-+__setup("schedstats=", setup_schedstats);
-+
-+static void __init init_schedstats(void)
-+{
-+	set_schedstats(__sched_schedstats);
-+}
-+
-+#ifdef CONFIG_PROC_SYSCTL
-+int sysctl_schedstats(struct ctl_table *table, int write,
-+			 void __user *buffer, size_t *lenp, loff_t *ppos)
-+{
-+	struct ctl_table t;
-+	int err;
-+	int state = static_branch_likely(&sched_schedstats);
-+
-+	if (write && !capable(CAP_SYS_ADMIN))
-+		return -EPERM;
-+
-+	t = *table;
-+	t.data = &state;
-+	err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
-+	if (err < 0)
-+		return err;
-+	if (write)
-+		set_schedstats(state);
-+	return err;
-+}
-+#endif /* CONFIG_PROC_SYSCTL */
-+#else  /* !CONFIG_SCHEDSTATS */
-+static inline void init_schedstats(void) {}
-+#endif /* CONFIG_SCHEDSTATS */
-+
-+/*
-+ * wake_up_new_task - wake up a newly created task for the first time.
-+ *
-+ * This function will do some initial scheduler statistics housekeeping
-+ * that must be done for every newly created context, then puts the task
-+ * on the runqueue and wakes it.
-+ */
-+void wake_up_new_task(struct task_struct *p)
-+{
-+	unsigned long flags;
-+	struct rq *rq;
-+
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+
-+	p->state = TASK_RUNNING;
-+
-+	rq = cpu_rq(select_task_rq(p));
-+#ifdef CONFIG_SMP
-+	/*
-+	 * Fork balancing, do it here and not earlier because:
-+	 * - cpus_mask can change in the fork path
-+	 * - any previously selected CPU might disappear through hotplug
-+	 * Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq,
-+	 * as we're not fully set-up yet.
-+	 */
-+	__set_task_cpu(p, cpu_of(rq));
-+#endif
-+
-+	raw_spin_lock(&rq->lock);
-+
-+	update_rq_clock(rq);
-+	activate_task(p, rq);
-+	trace_sched_wakeup_new(p);
-+	check_preempt_curr(rq, p);
-+
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+}
-+
-+#ifdef CONFIG_PREEMPT_NOTIFIERS
-+
-+static DEFINE_STATIC_KEY_FALSE(preempt_notifier_key);
-+
-+void preempt_notifier_inc(void)
-+{
-+	static_branch_inc(&preempt_notifier_key);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_inc);
-+
-+void preempt_notifier_dec(void)
-+{
-+	static_branch_dec(&preempt_notifier_key);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_dec);
-+
-+/**
-+ * preempt_notifier_register - tell me when current is being preempted & rescheduled
-+ * @notifier: notifier struct to register
-+ */
-+void preempt_notifier_register(struct preempt_notifier *notifier)
-+{
-+	if (!static_branch_unlikely(&preempt_notifier_key))
-+		WARN(1, "registering preempt_notifier while notifiers disabled\n");
-+
-+	hlist_add_head(&notifier->link, &current->preempt_notifiers);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_register);
-+
-+/**
-+ * preempt_notifier_unregister - no longer interested in preemption notifications
-+ * @notifier: notifier struct to unregister
-+ *
-+ * This is *not* safe to call from within a preemption notifier.
-+ */
-+void preempt_notifier_unregister(struct preempt_notifier *notifier)
-+{
-+	hlist_del(&notifier->link);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_unregister);
-+
-+static void __fire_sched_in_preempt_notifiers(struct task_struct *curr)
-+{
-+	struct preempt_notifier *notifier;
-+
-+	hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
-+		notifier->ops->sched_in(notifier, raw_smp_processor_id());
-+}
-+
-+static __always_inline void fire_sched_in_preempt_notifiers(struct task_struct *curr)
-+{
-+	if (static_branch_unlikely(&preempt_notifier_key))
-+		__fire_sched_in_preempt_notifiers(curr);
-+}
-+
-+static void
-+__fire_sched_out_preempt_notifiers(struct task_struct *curr,
-+				   struct task_struct *next)
-+{
-+	struct preempt_notifier *notifier;
-+
-+	hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
-+		notifier->ops->sched_out(notifier, next);
-+}
-+
-+static __always_inline void
-+fire_sched_out_preempt_notifiers(struct task_struct *curr,
-+				 struct task_struct *next)
-+{
-+	if (static_branch_unlikely(&preempt_notifier_key))
-+		__fire_sched_out_preempt_notifiers(curr, next);
-+}
-+
-+#else /* !CONFIG_PREEMPT_NOTIFIERS */
-+
-+static inline void fire_sched_in_preempt_notifiers(struct task_struct *curr)
-+{
-+}
-+
-+static inline void
-+fire_sched_out_preempt_notifiers(struct task_struct *curr,
-+				 struct task_struct *next)
-+{
-+}
-+
-+#endif /* CONFIG_PREEMPT_NOTIFIERS */
-+
-+static inline void prepare_task(struct task_struct *next)
-+{
-+	/*
-+	 * Claim the task as running, we do this before switching to it
-+	 * such that any running task will have this set.
-+	 */
-+	next->on_cpu = 1;
-+}
-+
-+static inline void finish_task(struct task_struct *prev)
-+{
-+#ifdef CONFIG_SMP
-+	/*
-+	 * After ->on_cpu is cleared, the task can be moved to a different CPU.
-+	 * We must ensure this doesn't happen until the switch is completely
-+	 * finished.
-+	 *
-+	 * In particular, the load of prev->state in finish_task_switch() must
-+	 * happen before this.
-+	 *
-+	 * Pairs with the smp_cond_load_acquire() in try_to_wake_up().
-+	 */
-+	smp_store_release(&prev->on_cpu, 0);
-+#else
-+	prev->on_cpu = 0;
-+#endif
-+}
-+
-+static inline void
-+prepare_lock_switch(struct rq *rq, struct task_struct *next)
-+{
-+	/*
-+	 * Since the runqueue lock will be released by the next
-+	 * task (which is an invalid locking op but in the case
-+	 * of the scheduler it's an obvious special-case), so we
-+	 * do an early lockdep release here:
-+	 */
-+	spin_release(&rq->lock.dep_map, _THIS_IP_);
-+#ifdef CONFIG_DEBUG_SPINLOCK
-+	/* this is a valid case when another task releases the spinlock */
-+	rq->lock.owner = next;
-+#endif
-+}
-+
-+static inline void finish_lock_switch(struct rq *rq)
-+{
-+	/*
-+	 * If we are tracking spinlock dependencies then we have to
-+	 * fix up the runqueue lock - which gets 'carried over' from
-+	 * prev into current:
-+	 */
-+	spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
-+	raw_spin_unlock_irq(&rq->lock);
-+}
-+
-+/**
-+ * prepare_task_switch - prepare to switch tasks
-+ * @rq: the runqueue preparing to switch
-+ * @next: the task we are going to switch to.
-+ *
-+ * This is called with the rq lock held and interrupts off. It must
-+ * be paired with a subsequent finish_task_switch after the context
-+ * switch.
-+ *
-+ * prepare_task_switch sets up locking and calls architecture specific
-+ * hooks.
-+ */
-+static inline void
-+prepare_task_switch(struct rq *rq, struct task_struct *prev,
-+		    struct task_struct *next)
-+{
-+	kcov_prepare_switch(prev);
-+	sched_info_switch(rq, prev, next);
-+	perf_event_task_sched_out(prev, next);
-+	rseq_preempt(prev);
-+	fire_sched_out_preempt_notifiers(prev, next);
-+	prepare_task(next);
-+	prepare_arch_switch(next);
-+}
-+
-+/**
-+ * finish_task_switch - clean up after a task-switch
-+ * @rq: runqueue associated with task-switch
-+ * @prev: the thread we just switched away from.
-+ *
-+ * finish_task_switch must be called after the context switch, paired
-+ * with a prepare_task_switch call before the context switch.
-+ * finish_task_switch will reconcile locking set up by prepare_task_switch,
-+ * and do any other architecture-specific cleanup actions.
-+ *
-+ * Note that we may have delayed dropping an mm in context_switch(). If
-+ * so, we finish that here outside of the runqueue lock.  (Doing it
-+ * with the lock held can cause deadlocks; see schedule() for
-+ * details.)
-+ *
-+ * The context switch have flipped the stack from under us and restored the
-+ * local variables which were saved when this task called schedule() in the
-+ * past. prev == current is still correct but we need to recalculate this_rq
-+ * because prev may have moved to another CPU.
-+ */
-+static struct rq *finish_task_switch(struct task_struct *prev)
-+	__releases(rq->lock)
-+{
-+	struct rq *rq = this_rq();
-+	struct mm_struct *mm = rq->prev_mm;
-+	long prev_state;
-+
-+	/*
-+	 * The previous task will have left us with a preempt_count of 2
-+	 * because it left us after:
-+	 *
-+	 *	schedule()
-+	 *	  preempt_disable();			// 1
-+	 *	  __schedule()
-+	 *	    raw_spin_lock_irq(&rq->lock)	// 2
-+	 *
-+	 * Also, see FORK_PREEMPT_COUNT.
-+	 */
-+	if (WARN_ONCE(preempt_count() != 2*PREEMPT_DISABLE_OFFSET,
-+		      "corrupted preempt_count: %s/%d/0x%x\n",
-+		      current->comm, current->pid, preempt_count()))
-+		preempt_count_set(FORK_PREEMPT_COUNT);
-+
-+	rq->prev_mm = NULL;
-+
-+	/*
-+	 * A task struct has one reference for the use as "current".
-+	 * If a task dies, then it sets TASK_DEAD in tsk->state and calls
-+	 * schedule one last time. The schedule call will never return, and
-+	 * the scheduled task must drop that reference.
-+	 *
-+	 * We must observe prev->state before clearing prev->on_cpu (in
-+	 * finish_task), otherwise a concurrent wakeup can get prev
-+	 * running on another CPU and we could rave with its RUNNING -> DEAD
-+	 * transition, resulting in a double drop.
-+	 */
-+	prev_state = prev->state;
-+	vtime_task_switch(prev);
-+	perf_event_task_sched_in(prev, current);
-+	finish_task(prev);
-+	finish_lock_switch(rq);
-+	finish_arch_post_lock_switch();
-+	kcov_finish_switch(current);
-+
-+	fire_sched_in_preempt_notifiers(current);
-+	/*
-+	 * When switching through a kernel thread, the loop in
-+	 * membarrier_{private,global}_expedited() may have observed that
-+	 * kernel thread and not issued an IPI. It is therefore possible to
-+	 * schedule between user->kernel->user threads without passing though
-+	 * switch_mm(). Membarrier requires a barrier after storing to
-+	 * rq->curr, before returning to userspace, so provide them here:
-+	 *
-+	 * - a full memory barrier for {PRIVATE,GLOBAL}_EXPEDITED, implicitly
-+	 *   provided by mmdrop(),
-+	 * - a sync_core for SYNC_CORE.
-+	 */
-+	if (mm) {
-+		membarrier_mm_sync_core_before_usermode(mm);
-+		mmdrop(mm);
-+	}
-+	if (unlikely(prev_state == TASK_DEAD)) {
-+		/*
-+		 * Remove function-return probe instances associated with this
-+		 * task and put them back on the free list.
-+		 */
-+		kprobe_flush_task(prev);
-+
-+		/* Task is done with its stack. */
-+		put_task_stack(prev);
-+
-+		put_task_struct_rcu_user(prev);
-+	}
-+
-+	tick_nohz_task_switch();
-+	return rq;
-+}
-+
-+/**
-+ * schedule_tail - first thing a freshly forked thread must call.
-+ * @prev: the thread we just switched away from.
-+ */
-+asmlinkage __visible void schedule_tail(struct task_struct *prev)
-+	__releases(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	/*
-+	 * New tasks start with FORK_PREEMPT_COUNT, see there and
-+	 * finish_task_switch() for details.
-+	 *
-+	 * finish_task_switch() will drop rq->lock() and lower preempt_count
-+	 * and the preempt_enable() will end up enabling preemption (on
-+	 * PREEMPT_COUNT kernels).
-+	 */
-+
-+	rq = finish_task_switch(prev);
-+	preempt_enable();
-+
-+	if (current->set_child_tid)
-+		put_user(task_pid_vnr(current), current->set_child_tid);
-+
-+	calculate_sigpending();
-+}
-+
-+/*
-+ * context_switch - switch to the new MM and the new thread's register state.
-+ */
-+static __always_inline struct rq *
-+context_switch(struct rq *rq, struct task_struct *prev,
-+	       struct task_struct *next)
-+{
-+	prepare_task_switch(rq, prev, next);
-+
-+	/*
-+	 * For paravirt, this is coupled with an exit in switch_to to
-+	 * combine the page table reload and the switch backend into
-+	 * one hypercall.
-+	 */
-+	arch_start_context_switch(prev);
-+
-+	/*
-+	 * kernel -> kernel   lazy + transfer active
-+	 *   user -> kernel   lazy + mmgrab() active
-+	 *
-+	 * kernel ->   user   switch + mmdrop() active
-+	 *   user ->   user   switch
-+	 */
-+	if (!next->mm) {                                // to kernel
-+		enter_lazy_tlb(prev->active_mm, next);
-+
-+		next->active_mm = prev->active_mm;
-+		if (prev->mm)                           // from user
-+			mmgrab(prev->active_mm);
-+		else
-+			prev->active_mm = NULL;
-+	} else {                                        // to user
-+		membarrier_switch_mm(rq, prev->active_mm, next->mm);
-+		/*
-+		 * sys_membarrier() requires an smp_mb() between setting
-+		 * rq->curr / membarrier_switch_mm() and returning to userspace.
-+		 *
-+		 * The below provides this either through switch_mm(), or in
-+		 * case 'prev->active_mm == next->mm' through
-+		 * finish_task_switch()'s mmdrop().
-+		 */
-+		switch_mm_irqs_off(prev->active_mm, next->mm, next);
-+
-+		if (!prev->mm) {                        // from kernel
-+			/* will mmdrop() in finish_task_switch(). */
-+			rq->prev_mm = prev->active_mm;
-+			prev->active_mm = NULL;
-+		}
-+	}
-+
-+	prepare_lock_switch(rq, next);
-+
-+	/* Here we just switch the register state and the stack. */
-+	switch_to(prev, next, prev);
-+	barrier();
-+
-+	return finish_task_switch(prev);
-+}
-+
-+/*
-+ * nr_running, nr_uninterruptible and nr_context_switches:
-+ *
-+ * externally visible scheduler statistics: current number of runnable
-+ * threads, total number of context switches performed since bootup.
-+ */
-+unsigned long nr_running(void)
-+{
-+	unsigned long i, sum = 0;
-+
-+	for_each_online_cpu(i)
-+		sum += cpu_rq(i)->nr_running;
-+
-+	return sum;
-+}
-+
-+/*
-+ * Check if only the current task is running on the CPU.
-+ *
-+ * Caution: this function does not check that the caller has disabled
-+ * preemption, thus the result might have a time-of-check-to-time-of-use
-+ * race.  The caller is responsible to use it correctly, for example:
-+ *
-+ * - from a non-preemptible section (of course)
-+ *
-+ * - from a thread that is bound to a single CPU
-+ *
-+ * - in a loop with very short iterations (e.g. a polling loop)
-+ */
-+bool single_task_running(void)
-+{
-+	return raw_rq()->nr_running == 1;
-+}
-+EXPORT_SYMBOL(single_task_running);
-+
-+unsigned long long nr_context_switches(void)
-+{
-+	int i;
-+	unsigned long long sum = 0;
-+
-+	for_each_possible_cpu(i)
-+		sum += cpu_rq(i)->nr_switches;
-+
-+	return sum;
-+}
-+
-+/*
-+ * Consumers of these two interfaces, like for example the cpuidle menu
-+ * governor, are using nonsensical data. Preferring shallow idle state selection
-+ * for a CPU that has IO-wait which might not even end up running the task when
-+ * it does become runnable.
-+ */
-+
-+unsigned long nr_iowait_cpu(int cpu)
-+{
-+	return atomic_read(&cpu_rq(cpu)->nr_iowait);
-+}
-+
-+/*
-+ * IO-wait accounting, and how its mostly bollocks (on SMP).
-+ *
-+ * The idea behind IO-wait account is to account the idle time that we could
-+ * have spend running if it were not for IO. That is, if we were to improve the
-+ * storage performance, we'd have a proportional reduction in IO-wait time.
-+ *
-+ * This all works nicely on UP, where, when a task blocks on IO, we account
-+ * idle time as IO-wait, because if the storage were faster, it could've been
-+ * running and we'd not be idle.
-+ *
-+ * This has been extended to SMP, by doing the same for each CPU. This however
-+ * is broken.
-+ *
-+ * Imagine for instance the case where two tasks block on one CPU, only the one
-+ * CPU will have IO-wait accounted, while the other has regular idle. Even
-+ * though, if the storage were faster, both could've ran at the same time,
-+ * utilising both CPUs.
-+ *
-+ * This means, that when looking globally, the current IO-wait accounting on
-+ * SMP is a lower bound, by reason of under accounting.
-+ *
-+ * Worse, since the numbers are provided per CPU, they are sometimes
-+ * interpreted per CPU, and that is nonsensical. A blocked task isn't strictly
-+ * associated with any one particular CPU, it can wake to another CPU than it
-+ * blocked on. This means the per CPU IO-wait number is meaningless.
-+ *
-+ * Task CPU affinities can make all that even more 'interesting'.
-+ */
-+
-+unsigned long nr_iowait(void)
-+{
-+	unsigned long i, sum = 0;
-+
-+	for_each_possible_cpu(i)
-+		sum += nr_iowait_cpu(i);
-+
-+	return sum;
-+}
-+
-+DEFINE_PER_CPU(struct kernel_stat, kstat);
-+DEFINE_PER_CPU(struct kernel_cpustat, kernel_cpustat);
-+
-+EXPORT_PER_CPU_SYMBOL(kstat);
-+EXPORT_PER_CPU_SYMBOL(kernel_cpustat);
-+
-+static inline void pds_update_curr(struct rq *rq, struct task_struct *p)
-+{
-+	s64 ns = rq->clock_task - p->last_ran;
-+
-+	p->sched_time += ns;
-+	account_group_exec_runtime(p, ns);
-+
-+	/* time_slice accounting is done in usecs to avoid overflow on 32bit */
-+	p->time_slice -= NS_TO_US(ns);
-+	p->last_ran = rq->clock_task;
-+}
-+
-+/*
-+ * Return accounted runtime for the task.
-+ * Return separately the current's pending runtime that have not been
-+ * accounted yet.
-+ */
-+unsigned long long task_sched_runtime(struct task_struct *p)
-+{
-+	unsigned long flags;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+	u64 ns;
-+
-+#if defined(CONFIG_64BIT) && defined(CONFIG_SMP)
-+	/*
-+	 * 64-bit doesn't need locks to atomically read a 64-bit value.
-+	 * So we have a optimization chance when the task's delta_exec is 0.
-+	 * Reading ->on_cpu is racy, but this is ok.
-+	 *
-+	 * If we race with it leaving CPU, we'll take a lock. So we're correct.
-+	 * If we race with it entering CPU, unaccounted time is 0. This is
-+	 * indistinguishable from the read occurring a few cycles earlier.
-+	 * If we see ->on_cpu without ->on_rq, the task is leaving, and has
-+	 * been accounted, so we're correct here as well.
-+	 */
-+	if (!p->on_cpu || !task_on_rq_queued(p))
-+		return tsk_seruntime(p);
-+#endif
-+
-+	rq = task_access_lock_irqsave(p, &lock, &flags);
-+	/*
-+	 * Must be ->curr _and_ ->on_rq.  If dequeued, we would
-+	 * project cycles that may never be accounted to this
-+	 * thread, breaking clock_gettime().
-+	 */
-+	if (p == rq->curr && task_on_rq_queued(p)) {
-+		update_rq_clock(rq);
-+		pds_update_curr(rq, p);
-+	}
-+	ns = tsk_seruntime(p);
-+	task_access_unlock_irqrestore(p, lock, &flags);
-+
-+	return ns;
-+}
-+
-+/* This manages tasks that have run out of timeslice during a scheduler_tick */
-+static inline void pds_scheduler_task_tick(struct rq *rq)
-+{
-+	struct task_struct *p = rq->curr;
-+
-+	if (is_idle_task(p))
-+		return;
-+
-+	pds_update_curr(rq, p);
-+
-+	cpufreq_update_util(rq, 0);
-+
-+	/*
-+	 * Tasks that were scheduled in the first half of a tick are not
-+	 * allowed to run into the 2nd half of the next tick if they will
-+	 * run out of time slice in the interim. Otherwise, if they have
-+	 * less than RESCHED_US μs of time slice left they will be rescheduled.
-+	 */
-+	if (p->time_slice - rq->dither >= RESCHED_US)
-+		return;
-+
-+	/**
-+	 * p->time_slice < RESCHED_US. We will modify task_struct under
-+	 * rq lock as p is rq->curr
-+	 */
-+	__set_tsk_resched(p);
-+}
-+
-+#ifdef CONFIG_SMP
-+
-+#ifdef CONFIG_SCHED_SMT
-+static int active_load_balance_cpu_stop(void *data)
-+{
-+	struct rq *rq = this_rq();
-+	struct task_struct *p = data;
-+	int cpu;
-+	unsigned long flags;
-+
-+	local_irq_save(flags);
-+
-+	raw_spin_lock(&p->pi_lock);
-+	raw_spin_lock(&rq->lock);
-+
-+	rq->active_balance = 0;
-+	/*
-+	 * _something_ may have changed the task, double check again
-+	 */
-+	if (task_on_rq_queued(p) && task_rq(p) == rq &&
-+	    (cpu = cpumask_any_and(&p->cpus_mask, &sched_cpu_sg_idle_mask)) < nr_cpu_ids)
-+		rq = __migrate_task(rq, p, cpu);
-+
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock(&p->pi_lock);
-+
-+	local_irq_restore(flags);
-+
-+	return 0;
-+}
-+
-+/* pds_sg_balance_trigger - trigger slibing group balance for @cpu */
-+static void pds_sg_balance_trigger(const int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+	struct task_struct *curr;
-+
-+	if (!raw_spin_trylock_irqsave(&rq->lock, flags))
-+		return;
-+	curr = rq->curr;
-+	if (!is_idle_task(curr) &&
-+	    cpumask_intersects(&curr->cpus_mask, &sched_cpu_sg_idle_mask)) {
-+		int active_balance = 0;
-+
-+		if (likely(!rq->active_balance)) {
-+			rq->active_balance = 1;
-+			active_balance = 1;
-+		}
-+
-+		raw_spin_unlock_irqrestore(&rq->lock, flags);
-+
-+		if (likely(active_balance))
-+			stop_one_cpu_nowait(cpu, active_load_balance_cpu_stop,
-+					    curr, &rq->active_balance_work);
-+	} else
-+		raw_spin_unlock_irqrestore(&rq->lock, flags);
-+}
-+
-+/*
-+ * pds_sg_balance_check - slibing group balance check for run queue @rq
-+ */
-+static inline void pds_sg_balance_check(const struct rq *rq)
-+{
-+	cpumask_t chk;
-+	int i;
-+
-+	/* Only online cpu will do sg balance checking */
-+	if (unlikely(!rq->online))
-+		return;
-+
-+	/* Only cpu in slibing idle group will do the checking */
-+	if (!cpumask_test_cpu(cpu_of(rq), &sched_cpu_sg_idle_mask))
-+		return;
-+
-+	/* Find potential cpus which can migrate the currently running task */
-+	if (!cpumask_andnot(&chk, &sched_rq_pending_masks[SCHED_RQ_EMPTY],
-+			    &sched_rq_queued_masks[SCHED_RQ_EMPTY]))
-+		return;
-+
-+	for_each_cpu(i, &chk) {
-+		/* skip the cpu which has idle slibing cpu */
-+		if (cpumask_test_cpu(per_cpu(sched_sibling_cpu, i),
-+				     &sched_rq_queued_masks[SCHED_RQ_EMPTY]))
-+			continue;
-+		pds_sg_balance_trigger(i);
-+	}
-+}
-+DEFINE_PER_CPU(unsigned long, thermal_pressure);
-+
-+void arch_set_thermal_pressure(struct cpumask *cpus,
-+			       unsigned long th_pressure)
-+{
-+	int cpu;
-+
-+	for_each_cpu(cpu, cpus)
-+		WRITE_ONCE(per_cpu(thermal_pressure, cpu), th_pressure);
-+}
-+#endif /* CONFIG_SCHED_SMT */
-+#endif /* CONFIG_SMP */
-+
-+/*
-+ * This function gets called by the timer code, with HZ frequency.
-+ * We call it with interrupts disabled.
-+ */
-+void scheduler_tick(void)
-+{
-+	int cpu __maybe_unused = smp_processor_id();
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	arch_scale_freq_tick();
-+	sched_clock_tick();
-+
-+	raw_spin_lock(&rq->lock);
-+	update_rq_clock(rq);
-+
-+	pds_scheduler_task_tick(rq);
-+	update_sched_rq_queued_masks_normal(rq);
-+	calc_global_load_tick(rq);
-+	psi_task_tick(rq);
-+
-+	rq->last_tick = rq->clock;
-+	raw_spin_unlock(&rq->lock);
-+
-+	perf_event_task_tick();
-+}
-+
-+#ifdef CONFIG_NO_HZ_FULL
-+struct tick_work {
-+	int			cpu;
-+	atomic_t		state;
-+	struct delayed_work	work;
-+};
-+/* Values for ->state, see diagram below. */
-+#define TICK_SCHED_REMOTE_OFFLINE	0
-+#define TICK_SCHED_REMOTE_OFFLINING	1
-+#define TICK_SCHED_REMOTE_RUNNING	2
-+
-+/*
-+ * State diagram for ->state:
-+ *
-+ *
-+ *          TICK_SCHED_REMOTE_OFFLINE
-+ *                    |   ^
-+ *                    |   |
-+ *                    |   | sched_tick_remote()
-+ *                    |   |
-+ *                    |   |
-+ *                    +--TICK_SCHED_REMOTE_OFFLINING
-+ *                    |   ^
-+ *                    |   |
-+ * sched_tick_start() |   | sched_tick_stop()
-+ *                    |   |
-+ *                    V   |
-+ *          TICK_SCHED_REMOTE_RUNNING
-+ *
-+ *
-+ * Other transitions get WARN_ON_ONCE(), except that sched_tick_remote()
-+ * and sched_tick_start() are happy to leave the state in RUNNING.
-+ */
-+
-+static struct tick_work __percpu *tick_work_cpu;
-+
-+static void sched_tick_remote(struct work_struct *work)
-+{
-+	struct delayed_work *dwork = to_delayed_work(work);
-+	struct tick_work *twork = container_of(dwork, struct tick_work, work);
-+	int cpu = twork->cpu;
-+	struct rq *rq = cpu_rq(cpu);
-+	struct task_struct *curr;
-+	unsigned long flags;
-+	u64 delta;
-+	int os;
-+
-+	/*
-+	 * Handle the tick only if it appears the remote CPU is running in full
-+	 * dynticks mode. The check is racy by nature, but missing a tick or
-+	 * having one too much is no big deal because the scheduler tick updates
-+	 * statistics and checks timeslices in a time-independent way, regardless
-+	 * of when exactly it is running.
-+	 */
-+	if (!tick_nohz_tick_stopped_cpu(cpu))
-+		goto out_requeue;
-+
-+	raw_spin_lock_irqsave(&rq->lock, flags);
-+	curr = rq->curr;
-+	if (cpu_is_offline(cpu))
-+		goto out_unlock;
-+
-+	update_rq_clock(rq);
-+	if (!is_idle_task(curr)) {
-+		/*
-+		 * Make sure the next tick runs within a reasonable
-+		 * amount of time.
-+		 */
-+		delta = rq_clock_task(rq) - curr->last_ran;
-+		WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
-+	}
-+	pds_scheduler_task_tick(rq);
-+	update_sched_rq_queued_masks_normal(rq);
-+	calc_load_nohz_remote(rq);
-+
-+out_unlock:
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+
-+out_requeue:
-+	/*
-+	 * Run the remote tick once per second (1Hz). This arbitrary
-+	 * frequency is large enough to avoid overload but short enough
-+	 * to keep scheduler internal stats reasonably up to date.  But
-+	 * first update state to reflect hotplug activity if required.
-+	 */
-+	os = atomic_fetch_add_unless(&twork->state, -1, TICK_SCHED_REMOTE_RUNNING);
-+	WARN_ON_ONCE(os == TICK_SCHED_REMOTE_OFFLINE);
-+	if (os == TICK_SCHED_REMOTE_RUNNING)
-+		queue_delayed_work(system_unbound_wq, dwork, HZ);
-+}
-+
-+static void sched_tick_start(int cpu)
-+{
-+	int os;
-+	struct tick_work *twork;
-+
-+	if (housekeeping_cpu(cpu, HK_FLAG_TICK))
-+		return;
-+
-+	WARN_ON_ONCE(!tick_work_cpu);
-+
-+	twork = per_cpu_ptr(tick_work_cpu, cpu);
-+	os = atomic_xchg(&twork->state, TICK_SCHED_REMOTE_RUNNING);
-+	WARN_ON_ONCE(os == TICK_SCHED_REMOTE_RUNNING);
-+	if (os == TICK_SCHED_REMOTE_OFFLINE) {
-+		twork->cpu = cpu;
-+		INIT_DELAYED_WORK(&twork->work, sched_tick_remote);
-+		queue_delayed_work(system_unbound_wq, &twork->work, HZ);
-+	}
-+}
-+
-+#ifdef CONFIG_HOTPLUG_CPU
-+static void sched_tick_stop(int cpu)
-+{
-+	struct tick_work *twork;
-+
-+	if (housekeeping_cpu(cpu, HK_FLAG_TICK))
-+		return;
-+
-+	WARN_ON_ONCE(!tick_work_cpu);
-+
-+	twork = per_cpu_ptr(tick_work_cpu, cpu);
-+	cancel_delayed_work_sync(&twork->work);
-+}
-+#endif /* CONFIG_HOTPLUG_CPU */
-+
-+int __init sched_tick_offload_init(void)
-+{
-+	tick_work_cpu = alloc_percpu(struct tick_work);
-+	BUG_ON(!tick_work_cpu);
-+	return 0;
-+}
-+
-+#else /* !CONFIG_NO_HZ_FULL */
-+static inline void sched_tick_start(int cpu) { }
-+static inline void sched_tick_stop(int cpu) { }
-+#endif
-+
-+#if defined(CONFIG_PREEMPTION) && (defined(CONFIG_DEBUG_PREEMPT) || \
-+				defined(CONFIG_PREEMPT_TRACER))
-+/*
-+ * If the value passed in is equal to the current preempt count
-+ * then we just disabled preemption. Start timing the latency.
-+ */
-+static inline void preempt_latency_start(int val)
-+{
-+	if (preempt_count() == val) {
-+		unsigned long ip = get_lock_parent_ip();
-+#ifdef CONFIG_DEBUG_PREEMPT
-+		current->preempt_disable_ip = ip;
-+#endif
-+		trace_preempt_off(CALLER_ADDR0, ip);
-+	}
-+}
-+
-+void preempt_count_add(int val)
-+{
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	/*
-+	 * Underflow?
-+	 */
-+	if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
-+		return;
-+#endif
-+	__preempt_count_add(val);
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	/*
-+	 * Spinlock count overflowing soon?
-+	 */
-+	DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
-+				PREEMPT_MASK - 10);
-+#endif
-+	preempt_latency_start(val);
-+}
-+EXPORT_SYMBOL(preempt_count_add);
-+NOKPROBE_SYMBOL(preempt_count_add);
-+
-+/*
-+ * If the value passed in equals to the current preempt count
-+ * then we just enabled preemption. Stop timing the latency.
-+ */
-+static inline void preempt_latency_stop(int val)
-+{
-+	if (preempt_count() == val)
-+		trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
-+}
-+
-+void preempt_count_sub(int val)
-+{
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	/*
-+	 * Underflow?
-+	 */
-+	if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
-+		return;
-+	/*
-+	 * Is the spinlock portion underflowing?
-+	 */
-+	if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) &&
-+			!(preempt_count() & PREEMPT_MASK)))
-+		return;
-+#endif
-+
-+	preempt_latency_stop(val);
-+	__preempt_count_sub(val);
-+}
-+EXPORT_SYMBOL(preempt_count_sub);
-+NOKPROBE_SYMBOL(preempt_count_sub);
-+
-+#else
-+static inline void preempt_latency_start(int val) { }
-+static inline void preempt_latency_stop(int val) { }
-+#endif
-+
-+/*
-+ * Timeslices below RESCHED_US are considered as good as expired as there's no
-+ * point rescheduling when there's so little time left. SCHED_BATCH tasks
-+ * have been flagged be not latency sensitive and likely to be fully CPU
-+ * bound so every time they're rescheduled they have their time_slice
-+ * refilled, but get a new later deadline to have little effect on
-+ * SCHED_NORMAL tasks.
-+
-+ */
-+static inline void check_deadline(struct task_struct *p, struct rq *rq)
-+{
-+	if (rq->idle == p)
-+		return;
-+
-+	pds_update_curr(rq, p);
-+
-+	if (p->time_slice < RESCHED_US) {
-+		time_slice_expired(p, rq);
-+		if (SCHED_ISO == p->policy && ISO_PRIO == p->prio) {
-+			p->prio = NORMAL_PRIO;
-+			p->deadline = rq->clock + task_deadline_diff(p);
-+			update_task_priodl(p);
-+		}
-+		if (SCHED_FIFO != p->policy && task_on_rq_queued(p))
-+			requeue_task(p, rq);
-+	}
-+}
-+
-+#ifdef	CONFIG_SMP
-+
-+#define SCHED_RQ_NR_MIGRATION (32UL)
-+/*
-+ * Migrate pending tasks in @rq to @dest_cpu
-+ * Will try to migrate mininal of half of @rq nr_running tasks and
-+ * SCHED_RQ_NR_MIGRATION to @dest_cpu
-+ */
-+static inline int
-+migrate_pending_tasks(struct rq *rq, struct rq *dest_rq, int filter_prio)
-+{
-+	struct task_struct *p;
-+	int dest_cpu = cpu_of(dest_rq);
-+	int nr_migrated = 0;
-+	int nr_tries = min((rq->nr_running + 1) / 2, SCHED_RQ_NR_MIGRATION);
-+	struct skiplist_node *node = rq->sl_header.next[0];
-+
-+	while (nr_tries && node != &rq->sl_header) {
-+		p = skiplist_entry(node, struct task_struct, sl_node);
-+		node = node->next[0];
-+
-+		if (task_running(p))
-+			continue;
-+		if (p->prio >= filter_prio)
-+			break;
-+		if (cpumask_test_cpu(dest_cpu, &p->cpus_mask)) {
-+			dequeue_task(p, rq, 0);
-+			set_task_cpu(p, dest_cpu);
-+			enqueue_task(p, dest_rq, 0);
-+			nr_migrated++;
-+		}
-+		nr_tries--;
-+		/* make a jump */
-+		if (node == &rq->sl_header)
-+			break;
-+		node = node->next[0];
-+	}
-+
-+	return nr_migrated;
-+}
-+
-+static inline int
-+take_queued_task_cpumask(struct rq *rq, cpumask_t *chk_mask, int filter_prio)
-+{
-+	int src_cpu;
-+
-+	for_each_cpu(src_cpu, chk_mask) {
-+		int nr_migrated;
-+		struct rq *src_rq = cpu_rq(src_cpu);
-+
-+		if (!do_raw_spin_trylock(&src_rq->lock)) {
-+			if (PRIO_LIMIT == filter_prio)
-+				continue;
-+			return 0;
-+		}
-+		spin_acquire(&src_rq->lock.dep_map, SINGLE_DEPTH_NESTING, 1, _RET_IP_);
-+
-+		update_rq_clock(src_rq);
-+		if ((nr_migrated = migrate_pending_tasks(src_rq, rq, filter_prio)))
-+			cpufreq_update_this_cpu(rq, 0);
-+
-+		spin_release(&src_rq->lock.dep_map, _RET_IP_);
-+		do_raw_spin_unlock(&src_rq->lock);
-+
-+		if (nr_migrated || PRIO_LIMIT != filter_prio)
-+			return nr_migrated;
-+	}
-+	return 0;
-+}
-+
-+static inline int take_other_rq_task(struct rq *rq, int cpu, int filter_prio)
-+{
-+	struct cpumask *affinity_mask, *end;
-+	struct cpumask chk;
-+
-+	if (PRIO_LIMIT == filter_prio) {
-+		cpumask_complement(&chk, &sched_rq_pending_masks[SCHED_RQ_EMPTY]);
-+#ifdef CONFIG_SMT_NICE
-+		{
-+		/* also try to take IDLE priority tasks from smt supressed cpu */
-+		struct cpumask t;
-+		if (cpumask_and(&t, &sched_smt_supressed_mask,
-+				&sched_rq_queued_masks[SCHED_RQ_IDLE]))
-+			cpumask_or(&chk, &chk, &t);
-+		}
-+#endif
-+	} else if (NORMAL_PRIO == filter_prio) {
-+		cpumask_or(&chk, &sched_rq_pending_masks[SCHED_RQ_RT],
-+			   &sched_rq_pending_masks[SCHED_RQ_ISO]);
-+	} else if (IDLE_PRIO == filter_prio) {
-+		cpumask_complement(&chk, &sched_rq_pending_masks[SCHED_RQ_EMPTY]);
-+		cpumask_andnot(&chk, &chk, &sched_rq_pending_masks[SCHED_RQ_IDLE]);
-+	} else
-+		cpumask_copy(&chk, &sched_rq_pending_masks[SCHED_RQ_RT]);
-+
-+	if (cpumask_empty(&chk))
-+		return 0;
-+
-+	affinity_mask = per_cpu(sched_cpu_llc_start_mask, cpu);
-+	end = per_cpu(sched_cpu_affinity_chk_end_masks, cpu);
-+	do {
-+		struct cpumask tmp;
-+
-+		if (cpumask_and(&tmp, &chk, affinity_mask) &&
-+		    take_queued_task_cpumask(rq, &tmp, filter_prio))
-+			return 1;
-+	} while (++affinity_mask < end);
-+
-+	return 0;
-+}
-+#endif
-+
-+static inline struct task_struct *
-+choose_next_task(struct rq *rq, int cpu, struct task_struct *prev)
-+{
-+	struct task_struct *next = rq_first_queued_task(rq);
-+
-+#ifdef CONFIG_SMT_NICE
-+	if (cpumask_test_cpu(cpu, &sched_smt_supressed_mask)) {
-+		if (next->prio >= IDLE_PRIO) {
-+			if (rq->online &&
-+			    take_other_rq_task(rq, cpu, IDLE_PRIO))
-+				return rq_first_queued_task(rq);
-+			return rq->idle;
-+		}
-+	}
-+#endif
-+
-+#ifdef	CONFIG_SMP
-+	if (likely(rq->online))
-+		if (take_other_rq_task(rq, cpu, next->prio)) {
-+			resched_curr(rq);
-+			return rq_first_queued_task(rq);
-+		}
-+#endif
-+	return next;
-+}
-+
-+static inline unsigned long get_preempt_disable_ip(struct task_struct *p)
-+{
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	return p->preempt_disable_ip;
-+#else
-+	return 0;
-+#endif
-+}
-+
-+/*
-+ * Print scheduling while atomic bug:
-+ */
-+static noinline void __schedule_bug(struct task_struct *prev)
-+{
-+	/* Save this before calling printk(), since that will clobber it */
-+	unsigned long preempt_disable_ip = get_preempt_disable_ip(current);
-+
-+	if (oops_in_progress)
-+		return;
-+
-+	printk(KERN_ERR "BUG: scheduling while atomic: %s/%d/0x%08x\n",
-+		prev->comm, prev->pid, preempt_count());
-+
-+	debug_show_held_locks(prev);
-+	print_modules();
-+	if (irqs_disabled())
-+		print_irqtrace_events(prev);
-+	if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)
-+	    && in_atomic_preempt_off()) {
-+		pr_err("Preemption disabled at:");
-+		print_ip_sym(preempt_disable_ip);
-+		pr_cont("\n");
-+	}
-+	if (panic_on_warn)
-+		panic("scheduling while atomic\n");
-+
-+	dump_stack();
-+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+}
-+
-+/*
-+ * Various schedule()-time debugging checks and statistics:
-+ */
-+static inline void schedule_debug(struct task_struct *prev, bool preempt)
-+{
-+#ifdef CONFIG_SCHED_STACK_END_CHECK
-+	if (task_stack_end_corrupted(prev))
-+		panic("corrupted stack end detected inside scheduler\n");
-+#endif
-+
-+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-+	if (!preempt && prev->state && prev->non_block_count) {
-+		printk(KERN_ERR "BUG: scheduling in a non-blocking section: %s/%d/%i\n",
-+			prev->comm, prev->pid, prev->non_block_count);
-+		dump_stack();
-+		add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+	}
-+#endif
-+
-+	if (unlikely(in_atomic_preempt_off())) {
-+		__schedule_bug(prev);
-+		preempt_count_set(PREEMPT_DISABLED);
-+	}
-+	rcu_sleep_check();
-+
-+	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
-+
-+	schedstat_inc(this_rq()->sched_count);
-+}
-+
-+static inline void set_rq_task(struct rq *rq, struct task_struct *p)
-+{
-+	p->last_ran = rq->clock_task;
-+
-+#ifdef CONFIG_HIGH_RES_TIMERS
-+	if (p != rq->idle)
-+		hrtick_start(rq, US_TO_NS(p->time_slice));
-+#endif
-+	/* update rq->dither */
-+	rq->dither = rq_dither(rq);
-+}
-+
-+/*
-+ * schedule() is the main scheduler function.
-+ *
-+ * The main means of driving the scheduler and thus entering this function are:
-+ *
-+ *   1. Explicit blocking: mutex, semaphore, waitqueue, etc.
-+ *
-+ *   2. TIF_NEED_RESCHED flag is checked on interrupt and userspace return
-+ *      paths. For example, see arch/x86/entry_64.S.
-+ *
-+ *      To drive preemption between tasks, the scheduler sets the flag in timer
-+ *      interrupt handler scheduler_tick().
-+ *
-+ *   3. Wakeups don't really cause entry into schedule(). They add a
-+ *      task to the run-queue and that's it.
-+ *
-+ *      Now, if the new task added to the run-queue preempts the current
-+ *      task, then the wakeup sets TIF_NEED_RESCHED and schedule() gets
-+ *      called on the nearest possible occasion:
-+ *
-+ *       - If the kernel is preemptible (CONFIG_PREEMPTION=y):
-+ *
-+ *         - in syscall or exception context, at the next outmost
-+ *           preempt_enable(). (this might be as soon as the wake_up()'s
-+ *           spin_unlock()!)
-+ *
-+ *         - in IRQ context, return from interrupt-handler to
-+ *           preemptible context
-+ *
-+ *       - If the kernel is not preemptible (CONFIG_PREEMPT is not set)
-+ *         then at the next:
-+ *
-+ *          - cond_resched() call
-+ *          - explicit schedule() call
-+ *          - return from syscall or exception to user-space
-+ *          - return from interrupt-handler to user-space
-+ *
-+ * WARNING: must be called with preemption disabled!
-+ */
-+static void __sched notrace __schedule(bool preempt)
-+{
-+	struct task_struct *prev, *next;
-+	unsigned long *switch_count;
-+	struct rq *rq;
-+	int cpu;
-+
-+	cpu = smp_processor_id();
-+	rq = cpu_rq(cpu);
-+	prev = rq->curr;
-+
-+	schedule_debug(prev, preempt);
-+
-+	/* by passing sched_feat(HRTICK) checking which PDS doesn't support */
-+	hrtick_clear(rq);
-+
-+	local_irq_disable();
-+	rcu_note_context_switch(preempt);
-+
-+	/*
-+	 * Make sure that signal_pending_state()->signal_pending() below
-+	 * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
-+	 * done by the caller to avoid the race with signal_wake_up().
-+	 *
-+	 * The membarrier system call requires a full memory barrier
-+	 * after coming from user-space, before storing to rq->curr.
-+	 */
-+	raw_spin_lock(&rq->lock);
-+	smp_mb__after_spinlock();
-+
-+	update_rq_clock(rq);
-+
-+	switch_count = &prev->nivcsw;
-+	if (!preempt && prev->state) {
-+		if (signal_pending_state(prev->state, prev)) {
-+			prev->state = TASK_RUNNING;
-+		} else {
-+			deactivate_task(prev, rq);
-+
-+			if (prev->in_iowait) {
-+				atomic_inc(&rq->nr_iowait);
-+				delayacct_blkio_start();
-+			}
-+		}
-+		switch_count = &prev->nvcsw;
-+	}
-+
-+	clear_tsk_need_resched(prev);
-+	clear_preempt_need_resched();
-+
-+	check_deadline(prev, rq);
-+
-+	next = choose_next_task(rq, cpu, prev);
-+
-+	set_rq_task(rq, next);
-+
-+	if (prev != next) {
-+		if (next->prio == PRIO_LIMIT)
-+			schedstat_inc(rq->sched_goidle);
-+
-+		/*
-+		 * RCU users of rcu_dereference(rq->curr) may not see
-+		 * changes to task_struct made by pick_next_task().
-+		 */
-+		RCU_INIT_POINTER(rq->curr, next);
-+		/*
-+		 * The membarrier system call requires each architecture
-+		 * to have a full memory barrier after updating
-+		 * rq->curr, before returning to user-space.
-+		 *
-+		 * Here are the schemes providing that barrier on the
-+		 * various architectures:
-+		 * - mm ? switch_mm() : mmdrop() for x86, s390, sparc, PowerPC.
-+		 *   switch_mm() rely on membarrier_arch_switch_mm() on PowerPC.
-+		 * - finish_lock_switch() for weakly-ordered
-+		 *   architectures where spin_unlock is a full barrier,
-+		 * - switch_to() for arm64 (weakly-ordered, spin_unlock
-+		 *   is a RELEASE barrier),
-+		 */
-+		++*switch_count;
-+		rq->nr_switches++;
-+
-+		psi_sched_switch(prev, next, !task_on_rq_queued(prev));
-+
-+		trace_sched_switch(preempt, prev, next);
-+
-+		/* Also unlocks the rq: */
-+		rq = context_switch(rq, prev, next);
-+#ifdef CONFIG_SCHED_SMT
-+		pds_sg_balance_check(rq);
-+#endif
-+	} else
-+		raw_spin_unlock_irq(&rq->lock);
-+}
-+
-+void __noreturn do_task_dead(void)
-+{
-+	/* Causes final put_task_struct in finish_task_switch(): */
-+	set_special_state(TASK_DEAD);
-+
-+	/* Tell freezer to ignore us: */
-+	current->flags |= PF_NOFREEZE;
-+	__schedule(false);
-+
-+	BUG();
-+
-+	/* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */
-+	for (;;)
-+		cpu_relax();
-+}
-+
-+static inline void sched_submit_work(struct task_struct *tsk)
-+{
-+	if (!tsk->state || tsk_is_pi_blocked(tsk) ||
-+	    signal_pending_state(tsk->state, tsk))
-+		return;
-+
-+	/*
-+	 * If a worker went to sleep, notify and ask workqueue whether
-+	 * it wants to wake up a task to maintain concurrency.
-+	 * As this function is called inside the schedule() context,
-+	 * we disable preemption to avoid it calling schedule() again
-+	 * in the possible wakeup of a kworker.
-+	 */
-+	if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) {
-+		preempt_disable();
-+		if (tsk->flags & PF_WQ_WORKER)
-+			wq_worker_sleeping(tsk);
-+		else
-+			io_wq_worker_sleeping(tsk);
-+		preempt_enable_no_resched();
-+	}
-+
-+	/*
-+	 * If we are going to sleep and we have plugged IO queued,
-+	 * make sure to submit it to avoid deadlocks.
-+	 */
-+	if (blk_needs_flush_plug(tsk))
-+		blk_schedule_flush_plug(tsk);
-+}
-+
-+static void sched_update_worker(struct task_struct *tsk)
-+{
-+	if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) {
-+		if (tsk->flags & PF_WQ_WORKER)
-+			wq_worker_running(tsk);
-+		else
-+			io_wq_worker_running(tsk);
-+	}
-+}
-+
-+asmlinkage __visible void __sched schedule(void)
-+{
-+	struct task_struct *tsk = current;
-+
-+	sched_submit_work(tsk);
-+	do {
-+		preempt_disable();
-+		__schedule(false);
-+		sched_preempt_enable_no_resched();
-+	} while (need_resched());
-+    sched_update_worker(tsk);
-+}
-+EXPORT_SYMBOL(schedule);
-+
-+/*
-+ * synchronize_rcu_tasks() makes sure that no task is stuck in preempted
-+ * state (have scheduled out non-voluntarily) by making sure that all
-+ * tasks have either left the run queue or have gone into user space.
-+ * As idle tasks do not do either, they must not ever be preempted
-+ * (schedule out non-voluntarily).
-+ *
-+ * schedule_idle() is similar to schedule_preempt_disable() except that it
-+ * never enables preemption because it does not call sched_submit_work().
-+ */
-+void __sched schedule_idle(void)
-+{
-+	/*
-+	 * As this skips calling sched_submit_work(), which the idle task does
-+	 * regardless because that function is a nop when the task is in a
-+	 * TASK_RUNNING state, make sure this isn't used someplace that the
-+	 * current task can be in any other state. Note, idle is always in the
-+	 * TASK_RUNNING state.
-+	 */
-+	WARN_ON_ONCE(current->state);
-+	do {
-+		__schedule(false);
-+	} while (need_resched());
-+}
-+
-+#ifdef CONFIG_CONTEXT_TRACKING
-+asmlinkage __visible void __sched schedule_user(void)
-+{
-+	/*
-+	 * If we come here after a random call to set_need_resched(),
-+	 * or we have been woken up remotely but the IPI has not yet arrived,
-+	 * we haven't yet exited the RCU idle mode. Do it here manually until
-+	 * we find a better solution.
-+	 *
-+	 * NB: There are buggy callers of this function.  Ideally we
-+	 * should warn if prev_state != CONTEXT_USER, but that will trigger
-+	 * too frequently to make sense yet.
-+	 */
-+	enum ctx_state prev_state = exception_enter();
-+	schedule();
-+	exception_exit(prev_state);
-+}
-+#endif
-+
-+/**
-+ * schedule_preempt_disabled - called with preemption disabled
-+ *
-+ * Returns with preemption disabled. Note: preempt_count must be 1
-+ */
-+void __sched schedule_preempt_disabled(void)
-+{
-+	sched_preempt_enable_no_resched();
-+	schedule();
-+	preempt_disable();
-+}
-+
-+static void __sched notrace preempt_schedule_common(void)
-+{
-+	do {
-+		/*
-+		 * Because the function tracer can trace preempt_count_sub()
-+		 * and it also uses preempt_enable/disable_notrace(), if
-+		 * NEED_RESCHED is set, the preempt_enable_notrace() called
-+		 * by the function tracer will call this function again and
-+		 * cause infinite recursion.
-+		 *
-+		 * Preemption must be disabled here before the function
-+		 * tracer can trace. Break up preempt_disable() into two
-+		 * calls. One to disable preemption without fear of being
-+		 * traced. The other to still record the preemption latency,
-+		 * which can also be traced by the function tracer.
-+		 */
-+		preempt_disable_notrace();
-+		preempt_latency_start(1);
-+		__schedule(true);
-+		preempt_latency_stop(1);
-+		preempt_enable_no_resched_notrace();
-+
-+		/*
-+		 * Check again in case we missed a preemption opportunity
-+		 * between schedule and now.
-+		 */
-+	} while (need_resched());
-+}
-+
-+#ifdef CONFIG_PREEMPTION
-+/*
-+ * This is the entry point to schedule() from in-kernel preemption
-+ * off of preempt_enable.
-+ */
-+asmlinkage __visible void __sched notrace preempt_schedule(void)
-+{
-+	/*
-+	 * If there is a non-zero preempt_count or interrupts are disabled,
-+	 * we do not want to preempt the current task. Just return..
-+	 */
-+	if (likely(!preemptible()))
-+		return;
-+
-+	preempt_schedule_common();
-+}
-+NOKPROBE_SYMBOL(preempt_schedule);
-+EXPORT_SYMBOL(preempt_schedule);
-+
-+/**
-+ * preempt_schedule_notrace - preempt_schedule called by tracing
-+ *
-+ * The tracing infrastructure uses preempt_enable_notrace to prevent
-+ * recursion and tracing preempt enabling caused by the tracing
-+ * infrastructure itself. But as tracing can happen in areas coming
-+ * from userspace or just about to enter userspace, a preempt enable
-+ * can occur before user_exit() is called. This will cause the scheduler
-+ * to be called when the system is still in usermode.
-+ *
-+ * To prevent this, the preempt_enable_notrace will use this function
-+ * instead of preempt_schedule() to exit user context if needed before
-+ * calling the scheduler.
-+ */
-+asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
-+{
-+	enum ctx_state prev_ctx;
-+
-+	if (likely(!preemptible()))
-+		return;
-+
-+	do {
-+		/*
-+		 * Because the function tracer can trace preempt_count_sub()
-+		 * and it also uses preempt_enable/disable_notrace(), if
-+		 * NEED_RESCHED is set, the preempt_enable_notrace() called
-+		 * by the function tracer will call this function again and
-+		 * cause infinite recursion.
-+		 *
-+		 * Preemption must be disabled here before the function
-+		 * tracer can trace. Break up preempt_disable() into two
-+		 * calls. One to disable preemption without fear of being
-+		 * traced. The other to still record the preemption latency,
-+		 * which can also be traced by the function tracer.
-+		 */
-+		preempt_disable_notrace();
-+		preempt_latency_start(1);
-+		/*
-+		 * Needs preempt disabled in case user_exit() is traced
-+		 * and the tracer calls preempt_enable_notrace() causing
-+		 * an infinite recursion.
-+		 */
-+		prev_ctx = exception_enter();
-+		__schedule(true);
-+		exception_exit(prev_ctx);
-+
-+		preempt_latency_stop(1);
-+		preempt_enable_no_resched_notrace();
-+	} while (need_resched());
-+}
-+EXPORT_SYMBOL_GPL(preempt_schedule_notrace);
-+
-+#endif /* CONFIG_PREEMPTION */
-+
-+/*
-+ * This is the entry point to schedule() from kernel preemption
-+ * off of irq context.
-+ * Note, that this is called and return with irqs disabled. This will
-+ * protect us against recursive calling from irq.
-+ */
-+asmlinkage __visible void __sched preempt_schedule_irq(void)
-+{
-+	enum ctx_state prev_state;
-+
-+	/* Catch callers which need to be fixed */
-+	BUG_ON(preempt_count() || !irqs_disabled());
-+
-+	prev_state = exception_enter();
-+
-+	do {
-+		preempt_disable();
-+		local_irq_enable();
-+		__schedule(true);
-+		local_irq_disable();
-+		sched_preempt_enable_no_resched();
-+	} while (need_resched());
-+
-+	exception_exit(prev_state);
-+}
-+
-+int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags,
-+			  void *key)
-+{
-+	return try_to_wake_up(curr->private, mode, wake_flags);
-+}
-+EXPORT_SYMBOL(default_wake_function);
-+
-+static inline void
-+check_task_changed(struct rq *rq, struct task_struct *p)
-+{
-+	/*
-+	 * Trigger changes when task priority/deadline modified.
-+	 */
-+	if (task_on_rq_queued(p)) {
-+		struct task_struct *first;
-+
-+		requeue_task(p, rq);
-+
-+		/* Resched if first queued task not running and not IDLE */
-+		if ((first = rq_first_queued_task(rq)) != rq->curr &&
-+		    !task_running_idle(first))
-+			resched_curr(rq);
-+	}
-+}
-+
-+#ifdef CONFIG_RT_MUTEXES
-+
-+static inline int __rt_effective_prio(struct task_struct *pi_task, int prio)
-+{
-+	if (pi_task)
-+		prio = min(prio, pi_task->prio);
-+
-+	return prio;
-+}
-+
-+static inline int rt_effective_prio(struct task_struct *p, int prio)
-+{
-+	struct task_struct *pi_task = rt_mutex_get_top_task(p);
-+
-+	return __rt_effective_prio(pi_task, prio);
-+}
-+
-+/*
-+ * rt_mutex_setprio - set the current priority of a task
-+ * @p: task to boost
-+ * @pi_task: donor task
-+ *
-+ * This function changes the 'effective' priority of a task. It does
-+ * not touch ->normal_prio like __setscheduler().
-+ *
-+ * Used by the rt_mutex code to implement priority inheritance
-+ * logic. Call site only calls if the priority of the task changed.
-+ */
-+void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
-+{
-+	int prio;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+
-+	/* XXX used to be waiter->prio, not waiter->task->prio */
-+	prio = __rt_effective_prio(pi_task, p->normal_prio);
-+
-+	/*
-+	 * If nothing changed; bail early.
-+	 */
-+	if (p->pi_top_task == pi_task && prio == p->prio)
-+		return;
-+
-+	rq = __task_access_lock(p, &lock);
-+	/*
-+	 * Set under pi_lock && rq->lock, such that the value can be used under
-+	 * either lock.
-+	 *
-+	 * Note that there is loads of tricky to make this pointer cache work
-+	 * right. rt_mutex_slowunlock()+rt_mutex_postunlock() work together to
-+	 * ensure a task is de-boosted (pi_task is set to NULL) before the
-+	 * task is allowed to run again (and can exit). This ensures the pointer
-+	 * points to a blocked task -- which guaratees the task is present.
-+	 */
-+	p->pi_top_task = pi_task;
-+
-+	/*
-+	 * For FIFO/RR we only need to set prio, if that matches we're done.
-+	 */
-+	if (prio == p->prio)
-+		goto out_unlock;
-+
-+	/*
-+	 * Idle task boosting is a nono in general. There is one
-+	 * exception, when PREEMPT_RT and NOHZ is active:
-+	 *
-+	 * The idle task calls get_next_timer_interrupt() and holds
-+	 * the timer wheel base->lock on the CPU and another CPU wants
-+	 * to access the timer (probably to cancel it). We can safely
-+	 * ignore the boosting request, as the idle CPU runs this code
-+	 * with interrupts disabled and will complete the lock
-+	 * protected section without being interrupted. So there is no
-+	 * real need to boost.
-+	 */
-+	if (unlikely(p == rq->idle)) {
-+		WARN_ON(p != rq->curr);
-+		WARN_ON(p->pi_blocked_on);
-+		goto out_unlock;
-+	}
-+
-+	trace_sched_pi_setprio(p, pi_task);
-+	p->prio = prio;
-+	update_task_priodl(p);
-+
-+	check_task_changed(rq, p);
-+
-+out_unlock:
-+	__task_access_unlock(p, lock);
-+}
-+#else
-+static inline int rt_effective_prio(struct task_struct *p, int prio)
-+{
-+	return prio;
-+}
-+#endif
-+
-+void set_user_nice(struct task_struct *p, long nice)
-+{
-+	int new_static;
-+	unsigned long flags;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+
-+	if (task_nice(p) == nice || nice < MIN_NICE || nice > MAX_NICE)
-+		return;
-+	new_static = NICE_TO_PRIO(nice);
-+	/*
-+	 * We have to be careful, if called from sys_setpriority(),
-+	 * the task might be in the middle of scheduling on another CPU.
-+	 */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	rq = __task_access_lock(p, &lock);
-+
-+	/* rq lock may not held!! */
-+	update_rq_clock(rq);
-+
-+	p->static_prio = new_static;
-+	/*
-+	 * The RT priorities are set via sched_setscheduler(), but we still
-+	 * allow the 'normal' nice value to be set - but as expected
-+	 * it wont have any effect on scheduling until the task is
-+	 * not SCHED_NORMAL/SCHED_BATCH:
-+	 */
-+	if (task_has_rt_policy(p))
-+		goto out_unlock;
-+
-+	p->deadline -= task_deadline_diff(p);
-+	p->deadline += static_deadline_diff(new_static);
-+	p->prio = effective_prio(p);
-+	update_task_priodl(p);
-+
-+	check_task_changed(rq, p);
-+out_unlock:
-+	__task_access_unlock(p, lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+}
-+EXPORT_SYMBOL(set_user_nice);
-+
-+/*
-+ * can_nice - check if a task can reduce its nice value
-+ * @p: task
-+ * @nice: nice value
-+ */
-+int can_nice(const struct task_struct *p, const int nice)
-+{
-+	/* Convert nice value [19,-20] to rlimit style value [1,40] */
-+	int nice_rlim = nice_to_rlimit(nice);
-+
-+	return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) ||
-+		capable(CAP_SYS_NICE));
-+}
-+
-+#ifdef __ARCH_WANT_SYS_NICE
-+
-+/*
-+ * sys_nice - change the priority of the current process.
-+ * @increment: priority increment
-+ *
-+ * sys_setpriority is a more generic, but much slower function that
-+ * does similar things.
-+ */
-+SYSCALL_DEFINE1(nice, int, increment)
-+{
-+	long nice, retval;
-+
-+	/*
-+	 * Setpriority might change our priority at the same moment.
-+	 * We don't have to worry. Conceptually one call occurs first
-+	 * and we have a single winner.
-+	 */
-+
-+	increment = clamp(increment, -NICE_WIDTH, NICE_WIDTH);
-+	nice = task_nice(current) + increment;
-+
-+	nice = clamp_val(nice, MIN_NICE, MAX_NICE);
-+	if (increment < 0 && !can_nice(current, nice))
-+		return -EPERM;
-+
-+	retval = security_task_setnice(current, nice);
-+	if (retval)
-+		return retval;
-+
-+	set_user_nice(current, nice);
-+	return 0;
-+}
-+
-+#endif
-+
-+/**
-+ * task_prio - return the priority value of a given task.
-+ * @p: the task in question.
-+ *
-+ * Return: The priority value as seen by users in /proc.
-+ * RT tasks are offset by -100. Normal tasks are centered around 1, value goes
-+ * from 0(SCHED_ISO) up to 82 (nice +19 SCHED_IDLE).
-+ */
-+int task_prio(const struct task_struct *p)
-+{
-+	int level, prio = p->prio - MAX_RT_PRIO;
-+	static const int level_to_nice_prio[] = {39, 33, 26, 20, 14, 7, 0, 0};
-+
-+	/* rt tasks */
-+	if (prio <= 0)
-+		goto out;
-+
-+	preempt_disable();
-+	level = task_deadline_level(p, this_rq());
-+	preempt_enable();
-+	prio += level_to_nice_prio[level];
-+	if (idleprio_task(p))
-+		prio += NICE_WIDTH;
-+out:
-+	return prio;
-+}
-+
-+/**
-+ * idle_cpu - is a given CPU idle currently?
-+ * @cpu: the processor in question.
-+ *
-+ * Return: 1 if the CPU is currently idle. 0 otherwise.
-+ */
-+int idle_cpu(int cpu)
-+{
-+	return cpu_curr(cpu) == cpu_rq(cpu)->idle;
-+}
-+
-+/**
-+ * idle_task - return the idle task for a given CPU.
-+ * @cpu: the processor in question.
-+ *
-+ * Return: The idle task for the cpu @cpu.
-+ */
-+struct task_struct *idle_task(int cpu)
-+{
-+	return cpu_rq(cpu)->idle;
-+}
-+
-+/**
-+ * find_process_by_pid - find a process with a matching PID value.
-+ * @pid: the pid in question.
-+ *
-+ * The task of @pid, if found. %NULL otherwise.
-+ */
-+static inline struct task_struct *find_process_by_pid(pid_t pid)
-+{
-+	return pid ? find_task_by_vpid(pid) : current;
-+}
-+
-+#ifdef CONFIG_SMP
-+void sched_set_stop_task(int cpu, struct task_struct *stop)
-+{
-+	struct sched_param stop_param = { .sched_priority = STOP_PRIO };
-+	struct sched_param start_param = { .sched_priority = 0 };
-+	struct task_struct *old_stop = cpu_rq(cpu)->stop;
-+
-+	if (stop) {
-+		/*
-+		 * Make it appear like a SCHED_FIFO task, its something
-+		 * userspace knows about and won't get confused about.
-+		 *
-+		 * Also, it will make PI more or less work without too
-+		 * much confusion -- but then, stop work should not
-+		 * rely on PI working anyway.
-+		 */
-+		sched_setscheduler_nocheck(stop, SCHED_FIFO, &stop_param);
-+	}
-+
-+	cpu_rq(cpu)->stop = stop;
-+
-+	if (old_stop) {
-+		/*
-+		 * Reset it back to a normal scheduling policy so that
-+		 * it can die in pieces.
-+		 */
-+		sched_setscheduler_nocheck(old_stop, SCHED_NORMAL, &start_param);
-+	}
-+}
-+
-+/*
-+ * Change a given task's CPU affinity. Migrate the thread to a
-+ * proper CPU and schedule it away if the CPU it's executing on
-+ * is removed from the allowed bitmask.
-+ *
-+ * NOTE: the caller must have a valid reference to the task, the
-+ * task must not exit() & deallocate itself prematurely. The
-+ * call is not atomic; no spinlocks may be held.
-+ */
-+static int __set_cpus_allowed_ptr(struct task_struct *p,
-+				  const struct cpumask *new_mask, bool check)
-+{
-+	const struct cpumask *cpu_valid_mask = cpu_active_mask;
-+	int dest_cpu;
-+	unsigned long flags;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+	int ret = 0;
-+
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	rq = __task_access_lock(p, &lock);
-+
-+	if (p->flags & PF_KTHREAD) {
-+		/*
-+		 * Kernel threads are allowed on online && !active CPUs
-+		 */
-+		cpu_valid_mask = cpu_online_mask;
-+	}
-+
-+	/*
-+	 * Must re-check here, to close a race against __kthread_bind(),
-+	 * sched_setaffinity() is not guaranteed to observe the flag.
-+	 */
-+	if (check && (p->flags & PF_NO_SETAFFINITY)) {
-+		ret = -EINVAL;
-+		goto out;
-+	}
-+
-+	if (cpumask_equal(&p->cpus_mask, new_mask))
-+		goto out;
-+
-+	dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
-+	if (dest_cpu >= nr_cpu_ids) {
-+		ret = -EINVAL;
-+		goto out;
-+	}
-+
-+	do_set_cpus_allowed(p, new_mask);
-+
-+	if (p->flags & PF_KTHREAD) {
-+		/*
-+		 * For kernel threads that do indeed end up on online &&
-+		 * !active we want to ensure they are strict per-CPU threads.
-+		 */
-+		WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) &&
-+			!cpumask_intersects(new_mask, cpu_active_mask) &&
-+			p->nr_cpus_allowed != 1);
-+	}
-+
-+	/* Can the task run on the task's current CPU? If so, we're done */
-+	if (cpumask_test_cpu(task_cpu(p), new_mask))
-+		goto out;
-+
-+	if (task_running(p) || p->state == TASK_WAKING) {
-+		struct migration_arg arg = { p, dest_cpu };
-+
-+		/* Need help from migration thread: drop lock and wait. */
-+		__task_access_unlock(p, lock);
-+		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+		stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
-+		return 0;
-+	}
-+	if (task_on_rq_queued(p)) {
-+		/*
-+		 * OK, since we're going to drop the lock immediately
-+		 * afterwards anyway.
-+		 */
-+		update_rq_clock(rq);
-+		rq = move_queued_task(rq, p, dest_cpu);
-+		lock = &rq->lock;
-+	}
-+
-+out:
-+	__task_access_unlock(p, lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+
-+	return ret;
-+}
-+
-+int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	return __set_cpus_allowed_ptr(p, new_mask, false);
-+}
-+EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
-+
-+#else
-+static inline int
-+__set_cpus_allowed_ptr(struct task_struct *p,
-+		       const struct cpumask *new_mask, bool check)
-+{
-+	return set_cpus_allowed_ptr(p, new_mask);
-+}
-+#endif
-+
-+static u64 task_init_deadline(const struct task_struct *p)
-+{
-+	return task_rq(p)->clock + task_deadline_diff(p);
-+}
-+
-+u64 (* task_init_deadline_func_tbl[])(const struct task_struct *p) = {
-+	task_init_deadline,	/* SCHED_NORMAL */
-+	NULL,			/* SCHED_FIFO */
-+	NULL,			/* SCHED_RR */
-+	task_init_deadline,	/* SCHED_BATCH */
-+	NULL,			/* SCHED_ISO */
-+	task_init_deadline	/* SCHED_IDLE */
-+};
-+
-+/*
-+ * sched_setparam() passes in -1 for its policy, to let the functions
-+ * it calls know not to change it.
-+ */
-+#define SETPARAM_POLICY -1
-+
-+static void __setscheduler_params(struct task_struct *p,
-+		const struct sched_attr *attr)
-+{
-+	int old_policy = p->policy;
-+	int policy = attr->sched_policy;
-+
-+	if (policy == SETPARAM_POLICY)
-+		policy = p->policy;
-+
-+	p->policy = policy;
-+
-+	/*
-+	 * allow normal nice value to be set, but will not have any
-+	 * effect on scheduling until the task not SCHED_NORMAL/
-+	 * SCHED_BATCH
-+	 */
-+	p->static_prio = NICE_TO_PRIO(attr->sched_nice);
-+
-+	/*
-+	 * __sched_setscheduler() ensures attr->sched_priority == 0 when
-+	 * !rt_policy. Always setting this ensures that things like
-+	 * getparam()/getattr() don't report silly values for !rt tasks.
-+	 */
-+	p->rt_priority = attr->sched_priority;
-+	p->normal_prio = normal_prio(p);
-+
-+	if (old_policy != policy)
-+		p->deadline = (task_init_deadline_func_tbl[p->policy])?
-+			task_init_deadline_func_tbl[p->policy](p):0ULL;
-+}
-+
-+/* Actually do priority change: must hold rq lock. */
-+static void __setscheduler(struct rq *rq, struct task_struct *p,
-+			   const struct sched_attr *attr, bool keep_boost)
-+{
-+	__setscheduler_params(p, attr);
-+
-+	/*
-+	 * Keep a potential priority boosting if called from
-+	 * sched_setscheduler().
-+	 */
-+	p->prio = normal_prio(p);
-+	if (keep_boost)
-+		p->prio = rt_effective_prio(p, p->prio);
-+	update_task_priodl(p);
-+}
-+
-+/*
-+ * check the target process has a UID that matches the current process's
-+ */
-+static bool check_same_owner(struct task_struct *p)
-+{
-+	const struct cred *cred = current_cred(), *pcred;
-+	bool match;
-+
-+	rcu_read_lock();
-+	pcred = __task_cred(p);
-+	match = (uid_eq(cred->euid, pcred->euid) ||
-+		 uid_eq(cred->euid, pcred->uid));
-+	rcu_read_unlock();
-+	return match;
-+}
-+
-+static int
-+__sched_setscheduler(struct task_struct *p,
-+		     const struct sched_attr *attr, bool user, bool pi)
-+{
-+	const struct sched_attr dl_squash_attr = {
-+		.size		= sizeof(struct sched_attr),
-+		.sched_policy	= SCHED_FIFO,
-+		.sched_nice	= 0,
-+		.sched_priority = 99,
-+	};
-+	int newprio = MAX_RT_PRIO - 1 - attr->sched_priority;
-+	int retval, oldpolicy = -1;
-+	int policy = attr->sched_policy;
-+	unsigned long flags;
-+	struct rq *rq;
-+	int reset_on_fork;
-+	raw_spinlock_t *lock;
-+
-+	/* The pi code expects interrupts enabled */
-+	BUG_ON(pi && in_interrupt());
-+
-+	/*
-+	 * PDS supports SCHED_DEADLINE by squash it as prio 0 SCHED_FIFO
-+	 */
-+	if (unlikely(SCHED_DEADLINE == policy)) {
-+		attr = &dl_squash_attr;
-+		policy = attr->sched_policy;
-+		newprio = MAX_RT_PRIO - 1 - attr->sched_priority;
-+	}
-+recheck:
-+	/* Double check policy once rq lock held */
-+	if (policy < 0) {
-+		reset_on_fork = p->sched_reset_on_fork;
-+		policy = oldpolicy = p->policy;
-+	} else {
-+		reset_on_fork = !!(attr->sched_flags & SCHED_RESET_ON_FORK);
-+
-+		if (policy > SCHED_IDLE)
-+			return -EINVAL;
-+	}
-+
-+	if (attr->sched_flags & ~(SCHED_FLAG_ALL))
-+		return -EINVAL;
-+
-+	/*
-+	 * Valid priorities for SCHED_FIFO and SCHED_RR are
-+	 * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL and
-+	 * SCHED_BATCH and SCHED_IDLE is 0.
-+	 */
-+	if (attr->sched_priority < 0 ||
-+	    (p->mm && attr->sched_priority > MAX_USER_RT_PRIO - 1) ||
-+	    (!p->mm && attr->sched_priority > MAX_RT_PRIO - 1))
-+		return -EINVAL;
-+	if ((SCHED_RR == policy || SCHED_FIFO == policy) !=
-+	    (attr->sched_priority != 0))
-+		return -EINVAL;
-+
-+	/*
-+	 * Allow unprivileged RT tasks to decrease priority:
-+	 */
-+	if (user && !capable(CAP_SYS_NICE)) {
-+		if (SCHED_FIFO == policy || SCHED_RR == policy) {
-+			unsigned long rlim_rtprio =
-+					task_rlimit(p, RLIMIT_RTPRIO);
-+
-+			/* Can't set/change the rt policy */
-+			if (policy != p->policy && !rlim_rtprio)
-+				return -EPERM;
-+
-+			/* Can't increase priority */
-+			if (attr->sched_priority > p->rt_priority &&
-+			    attr->sched_priority > rlim_rtprio)
-+				return -EPERM;
-+		}
-+
-+		/* Can't change other user's priorities */
-+		if (!check_same_owner(p))
-+			return -EPERM;
-+
-+		/* Normal users shall not reset the sched_reset_on_fork flag */
-+		if (p->sched_reset_on_fork && !reset_on_fork)
-+			return -EPERM;
-+	}
-+
-+	if (user) {
-+		retval = security_task_setscheduler(p);
-+		if (retval)
-+			return retval;
-+	}
-+
-+	if (pi)
-+		cpuset_read_lock();
-+
-+	/*
-+	 * Make sure no PI-waiters arrive (or leave) while we are
-+	 * changing the priority of the task:
-+	 */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+
-+	/*
-+	 * To be able to change p->policy safely, task_access_lock()
-+	 * must be called.
-+	 * IF use task_access_lock() here:
-+	 * For the task p which is not running, reading rq->stop is
-+	 * racy but acceptable as ->stop doesn't change much.
-+	 * An enhancemnet can be made to read rq->stop saftly.
-+	 */
-+	rq = __task_access_lock(p, &lock);
-+
-+	/*
-+	 * Changing the policy of the stop threads its a very bad idea
-+	 */
-+	if (p == rq->stop) {
-+		retval = -EINVAL;
-+		goto unlock;
-+	}
-+
-+	/*
-+	 * If not changing anything there's no need to proceed further:
-+	 */
-+	if (unlikely(policy == p->policy)) {
-+		if (rt_policy(policy) && attr->sched_priority != p->rt_priority)
-+			goto change;
-+		if (!rt_policy(policy) &&
-+		    NICE_TO_PRIO(attr->sched_nice) != p->static_prio)
-+			goto change;
-+
-+		p->sched_reset_on_fork = reset_on_fork;
-+		retval = 0;
-+		goto unlock;
-+	}
-+change:
-+
-+	/* Re-check policy now with rq lock held */
-+	if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
-+		policy = oldpolicy = -1;
-+		__task_access_unlock(p, lock);
-+		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+		if (pi)
-+			cpuset_read_unlock();
-+		goto recheck;
-+	}
-+
-+	p->sched_reset_on_fork = reset_on_fork;
-+
-+	if (pi) {
-+		/*
-+		 * Take priority boosted tasks into account. If the new
-+		 * effective priority is unchanged, we just store the new
-+		 * normal parameters and do not touch the scheduler class and
-+		 * the runqueue. This will be done when the task deboost
-+		 * itself.
-+		 */
-+		if (rt_effective_prio(p, newprio) == p->prio) {
-+			__setscheduler_params(p, attr);
-+			retval = 0;
-+			goto unlock;
-+		}
-+	}
-+
-+	__setscheduler(rq, p, attr, pi);
-+
-+	check_task_changed(rq, p);
-+
-+	/* Avoid rq from going away on us: */
-+	preempt_disable();
-+	__task_access_unlock(p, lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+
-+	if (pi) {
-+		cpuset_read_unlock();
-+		rt_mutex_adjust_pi(p);
-+	}
-+
-+	preempt_enable();
-+
-+	return 0;
-+
-+unlock:
-+	__task_access_unlock(p, lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+	if (pi)
-+		cpuset_read_unlock();
-+	return retval;
-+}
-+
-+static int _sched_setscheduler(struct task_struct *p, int policy,
-+			       const struct sched_param *param, bool check)
-+{
-+	struct sched_attr attr = {
-+		.sched_policy   = policy,
-+		.sched_priority = param->sched_priority,
-+		.sched_nice     = PRIO_TO_NICE(p->static_prio),
-+	};
-+
-+	/* Fixup the legacy SCHED_RESET_ON_FORK hack. */
-+	if ((policy != SETPARAM_POLICY) && (policy & SCHED_RESET_ON_FORK)) {
-+		attr.sched_flags |= SCHED_FLAG_RESET_ON_FORK;
-+		policy &= ~SCHED_RESET_ON_FORK;
-+		attr.sched_policy = policy;
-+	}
-+
-+	return __sched_setscheduler(p, &attr, check, true);
-+}
-+
-+/**
-+ * sched_setscheduler - change the scheduling policy and/or RT priority of a thread.
-+ * @p: the task in question.
-+ * @policy: new policy.
-+ * @param: structure containing the new RT priority.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ *
-+ * NOTE that the task may be already dead.
-+ */
-+int sched_setscheduler(struct task_struct *p, int policy,
-+		       const struct sched_param *param)
-+{
-+	return _sched_setscheduler(p, policy, param, true);
-+}
-+
-+EXPORT_SYMBOL_GPL(sched_setscheduler);
-+
-+int sched_setattr(struct task_struct *p, const struct sched_attr *attr)
-+{
-+	return __sched_setscheduler(p, attr, true, true);
-+}
-+EXPORT_SYMBOL_GPL(sched_setattr);
-+
-+int sched_setattr_nocheck(struct task_struct *p, const struct sched_attr *attr)
-+{
-+	return __sched_setscheduler(p, attr, false, true);
-+}
-+
-+/**
-+ * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace.
-+ * @p: the task in question.
-+ * @policy: new policy.
-+ * @param: structure containing the new RT priority.
-+ *
-+ * Just like sched_setscheduler, only don't bother checking if the
-+ * current context has permission.  For example, this is needed in
-+ * stop_machine(): we create temporary high priority worker threads,
-+ * but our caller might not have that capability.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+int sched_setscheduler_nocheck(struct task_struct *p, int policy,
-+			       const struct sched_param *param)
-+{
-+	return _sched_setscheduler(p, policy, param, false);
-+}
-+EXPORT_SYMBOL_GPL(sched_setscheduler_nocheck);
-+
-+static int
-+do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
-+{
-+	struct sched_param lparam;
-+	struct task_struct *p;
-+	int retval;
-+
-+	if (!param || pid < 0)
-+		return -EINVAL;
-+	if (copy_from_user(&lparam, param, sizeof(struct sched_param)))
-+		return -EFAULT;
-+
-+	rcu_read_lock();
-+	retval = -ESRCH;
-+	p = find_process_by_pid(pid);
-+	if (likely(p))
-+		get_task_struct(p);
-+	rcu_read_unlock();
-+
-+	if (likely(p)) {
-+		retval = sched_setscheduler(p, policy, &lparam);
-+		put_task_struct(p);
-+	}
-+
-+	return retval;
-+}
-+
-+/*
-+ * Mimics kernel/events/core.c perf_copy_attr().
-+ */
-+static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *attr)
-+{
-+	u32 size;
-+	int ret;
-+
-+	/* Zero the full structure, so that a short copy will be nice: */
-+	memset(attr, 0, sizeof(*attr));
-+
-+	ret = get_user(size, &uattr->size);
-+	if (ret)
-+		return ret;
-+
-+	/* ABI compatibility quirk: */
-+	if (!size)
-+		size = SCHED_ATTR_SIZE_VER0;
-+
-+	if (size < SCHED_ATTR_SIZE_VER0 || size > PAGE_SIZE)
-+		goto err_size;
-+
-+	ret = copy_struct_from_user(attr, sizeof(*attr), uattr, size);
-+	if (ret) {
-+		if (ret == -E2BIG)
-+			goto err_size;
-+		return ret;
-+	}
-+
-+	/*
-+	 * XXX: Do we want to be lenient like existing syscalls; or do we want
-+	 * to be strict and return an error on out-of-bounds values?
-+	 */
-+	attr->sched_nice = clamp(attr->sched_nice, -20, 19);
-+
-+	/* sched/core.c uses zero here but we already know ret is zero */
-+	return 0;
-+
-+err_size:
-+	put_user(sizeof(*attr), &uattr->size);
-+	return -E2BIG;
-+}
-+
-+/**
-+ * sys_sched_setscheduler - set/change the scheduler policy and RT priority
-+ * @pid: the pid in question.
-+ * @policy: new policy.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ * @param: structure containing the new RT priority.
-+ */
-+SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy, struct sched_param __user *, param)
-+{
-+	if (policy < 0)
-+		return -EINVAL;
-+
-+	return do_sched_setscheduler(pid, policy, param);
-+}
-+
-+/**
-+ * sys_sched_setparam - set/change the RT priority of a thread
-+ * @pid: the pid in question.
-+ * @param: structure containing the new RT priority.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
-+{
-+	return do_sched_setscheduler(pid, SETPARAM_POLICY, param);
-+}
-+
-+/**
-+ * sys_sched_setattr - same as above, but with extended sched_attr
-+ * @pid: the pid in question.
-+ * @uattr: structure containing the extended parameters.
-+ */
-+SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr,
-+			       unsigned int, flags)
-+{
-+	struct sched_attr attr;
-+	struct task_struct *p;
-+	int retval;
-+
-+	if (!uattr || pid < 0 || flags)
-+		return -EINVAL;
-+
-+	retval = sched_copy_attr(uattr, &attr);
-+	if (retval)
-+		return retval;
-+
-+	if ((int)attr.sched_policy < 0)
-+		return -EINVAL;
-+
-+	rcu_read_lock();
-+	retval = -ESRCH;
-+	p = find_process_by_pid(pid);
-+	if (p != NULL)
-+		retval = sched_setattr(p, &attr);
-+	rcu_read_unlock();
-+
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_getscheduler - get the policy (scheduling class) of a thread
-+ * @pid: the pid in question.
-+ *
-+ * Return: On success, the policy of the thread. Otherwise, a negative error
-+ * code.
-+ */
-+SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
-+{
-+	struct task_struct *p;
-+	int retval = -EINVAL;
-+
-+	if (pid < 0)
-+		goto out_nounlock;
-+
-+	retval = -ESRCH;
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	if (p) {
-+		retval = security_task_getscheduler(p);
-+		if (!retval)
-+			retval = p->policy;
-+	}
-+	rcu_read_unlock();
-+
-+out_nounlock:
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_getscheduler - get the RT priority of a thread
-+ * @pid: the pid in question.
-+ * @param: structure containing the RT priority.
-+ *
-+ * Return: On success, 0 and the RT priority is in @param. Otherwise, an error
-+ * code.
-+ */
-+SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
-+{
-+	struct sched_param lp = { .sched_priority = 0 };
-+	struct task_struct *p;
-+	int retval = -EINVAL;
-+
-+	if (!param || pid < 0)
-+		goto out_nounlock;
-+
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	retval = -ESRCH;
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	if (task_has_rt_policy(p))
-+		lp.sched_priority = p->rt_priority;
-+	rcu_read_unlock();
-+
-+	/*
-+	 * This one might sleep, we cannot do it with a spinlock held ...
-+	 */
-+	retval = copy_to_user(param, &lp, sizeof(*param)) ? -EFAULT : 0;
-+
-+out_nounlock:
-+	return retval;
-+
-+out_unlock:
-+	rcu_read_unlock();
-+	return retval;
-+}
-+
-+/*
-+ * Copy the kernel size attribute structure (which might be larger
-+ * than what user-space knows about) to user-space.
-+ *
-+ * Note that all cases are valid: user-space buffer can be larger or
-+ * smaller than the kernel-space buffer. The usual case is that both
-+ * have the same size.
-+ */
-+static int
-+sched_attr_copy_to_user(struct sched_attr __user *uattr,
-+			struct sched_attr *kattr,
-+			unsigned int usize)
-+{
-+	unsigned int ksize = sizeof(*kattr);
-+
-+	if (!access_ok(uattr, usize))
-+		return -EFAULT;
-+
-+	/*
-+	 * sched_getattr() ABI forwards and backwards compatibility:
-+	 *
-+	 * If usize == ksize then we just copy everything to user-space and all is good.
-+	 *
-+	 * If usize < ksize then we only copy as much as user-space has space for,
-+	 * this keeps ABI compatibility as well. We skip the rest.
-+	 *
-+	 * If usize > ksize then user-space is using a newer version of the ABI,
-+	 * which part the kernel doesn't know about. Just ignore it - tooling can
-+	 * detect the kernel's knowledge of attributes from the attr->size value
-+	 * which is set to ksize in this case.
-+	 */
-+	kattr->size = min(usize, ksize);
-+
-+	if (copy_to_user(uattr, kattr, kattr->size))
-+		return -EFAULT;
-+
-+	return 0;
-+}
-+
-+/**
-+ * sys_sched_getattr - similar to sched_getparam, but with sched_attr
-+ * @pid: the pid in question.
-+ * @uattr: structure containing the extended parameters.
-+ * @usize: sizeof(attr) for fwd/bwd comp.
-+ * @flags: for future extension.
-+ */
-+SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
-+		unsigned int, usize, unsigned int, flags)
-+{
-+	struct sched_attr kattr = { };
-+	struct task_struct *p;
-+	int retval;
-+
-+	if (!uattr || pid < 0 || usize > PAGE_SIZE ||
-+	    usize < SCHED_ATTR_SIZE_VER0 || flags)
-+		return -EINVAL;
-+
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	retval = -ESRCH;
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	kattr.sched_policy = p->policy;
-+	if (rt_task(p))
-+		kattr.sched_priority = p->rt_priority;
-+	else
-+		kattr.sched_nice = task_nice(p);
-+
-+#ifdef CONFIG_UCLAMP_TASK
-+	kattr.sched_util_min = p->uclamp_req[UCLAMP_MIN].value;
-+	kattr.sched_util_max = p->uclamp_req[UCLAMP_MAX].value;
-+#endif
-+
-+	rcu_read_unlock();
-+
-+	return sched_attr_copy_to_user(uattr, &kattr, usize);
-+
-+out_unlock:
-+	rcu_read_unlock();
-+	return retval;
-+}
-+
-+long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
-+{
-+	cpumask_var_t cpus_mask, new_mask;
-+	struct task_struct *p;
-+	int retval;
-+
-+	get_online_cpus();
-+	rcu_read_lock();
-+
-+	p = find_process_by_pid(pid);
-+	if (!p) {
-+		rcu_read_unlock();
-+		put_online_cpus();
-+		return -ESRCH;
-+	}
-+
-+	/* Prevent p going away */
-+	get_task_struct(p);
-+	rcu_read_unlock();
-+
-+	if (p->flags & PF_NO_SETAFFINITY) {
-+		retval = -EINVAL;
-+		goto out_put_task;
-+	}
-+	if (!alloc_cpumask_var(&cpus_mask, GFP_KERNEL)) {
-+		retval = -ENOMEM;
-+		goto out_put_task;
-+	}
-+	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) {
-+		retval = -ENOMEM;
-+		goto out_free_cpus_allowed;
-+	}
-+	retval = -EPERM;
-+	if (!check_same_owner(p)) {
-+		rcu_read_lock();
-+		if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
-+			rcu_read_unlock();
-+			goto out_unlock;
-+		}
-+		rcu_read_unlock();
-+	}
-+
-+	retval = security_task_setscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	cpuset_cpus_allowed(p, cpus_mask);
-+	cpumask_and(new_mask, in_mask, cpus_mask);
-+again:
-+	retval = __set_cpus_allowed_ptr(p, new_mask, true);
-+
-+	if (!retval) {
-+		cpuset_cpus_allowed(p, cpus_mask);
-+		if (!cpumask_subset(new_mask, cpus_mask)) {
-+			/*
-+			 * We must have raced with a concurrent cpuset
-+			 * update. Just reset the cpus_mask to the
-+			 * cpuset's cpus_mask
-+			 */
-+			cpumask_copy(new_mask, cpus_mask);
-+			goto again;
-+		}
-+	}
-+out_unlock:
-+	free_cpumask_var(new_mask);
-+out_free_cpus_allowed:
-+	free_cpumask_var(cpus_mask);
-+out_put_task:
-+	put_task_struct(p);
-+	put_online_cpus();
-+	return retval;
-+}
-+
-+static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
-+			     struct cpumask *new_mask)
-+{
-+	if (len < cpumask_size())
-+		cpumask_clear(new_mask);
-+	else if (len > cpumask_size())
-+		len = cpumask_size();
-+
-+	return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0;
-+}
-+
-+/**
-+ * sys_sched_setaffinity - set the CPU affinity of a process
-+ * @pid: pid of the process
-+ * @len: length in bytes of the bitmask pointed to by user_mask_ptr
-+ * @user_mask_ptr: user-space pointer to the new CPU mask
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
-+		unsigned long __user *, user_mask_ptr)
-+{
-+	cpumask_var_t new_mask;
-+	int retval;
-+
-+	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
-+		return -ENOMEM;
-+
-+	retval = get_user_cpu_mask(user_mask_ptr, len, new_mask);
-+	if (retval == 0)
-+		retval = sched_setaffinity(pid, new_mask);
-+	free_cpumask_var(new_mask);
-+	return retval;
-+}
-+
-+long sched_getaffinity(pid_t pid, cpumask_t *mask)
-+{
-+	struct task_struct *p;
-+	raw_spinlock_t *lock;
-+	unsigned long flags;
-+	int retval;
-+
-+	rcu_read_lock();
-+
-+	retval = -ESRCH;
-+	p = find_process_by_pid(pid);
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	task_access_lock_irqsave(p, &lock, &flags);
-+	cpumask_and(mask, &p->cpus_mask, cpu_active_mask);
-+	task_access_unlock_irqrestore(p, lock, &flags);
-+
-+out_unlock:
-+	rcu_read_unlock();
-+
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_getaffinity - get the CPU affinity of a process
-+ * @pid: pid of the process
-+ * @len: length in bytes of the bitmask pointed to by user_mask_ptr
-+ * @user_mask_ptr: user-space pointer to hold the current CPU mask
-+ *
-+ * Return: size of CPU mask copied to user_mask_ptr on success. An
-+ * error code otherwise.
-+ */
-+SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
-+		unsigned long __user *, user_mask_ptr)
-+{
-+	int ret;
-+	cpumask_var_t mask;
-+
-+	if ((len * BITS_PER_BYTE) < nr_cpu_ids)
-+		return -EINVAL;
-+	if (len & (sizeof(unsigned long)-1))
-+		return -EINVAL;
-+
-+	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
-+		return -ENOMEM;
-+
-+	ret = sched_getaffinity(pid, mask);
-+	if (ret == 0) {
-+		unsigned int retlen = min_t(size_t, len, cpumask_size());
-+
-+		if (copy_to_user(user_mask_ptr, mask, retlen))
-+			ret = -EFAULT;
-+		else
-+			ret = retlen;
-+	}
-+	free_cpumask_var(mask);
-+
-+	return ret;
-+}
-+
-+/**
-+ * sys_sched_yield - yield the current processor to other threads.
-+ *
-+ * This function yields the current CPU to other tasks. It does this by
-+ * scheduling away the current task. If it still has the earliest deadline
-+ * it will be scheduled again as the next task.
-+ *
-+ * Return: 0.
-+ */
-+static void do_sched_yield(void)
-+{
-+	struct rq *rq;
-+	struct rq_flags rf;
-+
-+	if (!sched_yield_type)
-+		return;
-+
-+	rq = this_rq_lock_irq(&rf);
-+
-+	if (sched_yield_type > 1) {
-+		time_slice_expired(current, rq);
-+		requeue_task(current, rq);
-+	}
-+	schedstat_inc(rq->yld_count);
-+
-+	/*
-+	 * Since we are going to call schedule() anyway, there's
-+	 * no need to preempt or enable interrupts:
-+	 */
-+	preempt_disable();
-+	raw_spin_unlock(&rq->lock);
-+	sched_preempt_enable_no_resched();
-+
-+	schedule();
-+}
-+
-+SYSCALL_DEFINE0(sched_yield)
-+{
-+	do_sched_yield();
-+	return 0;
-+}
-+
-+#ifndef CONFIG_PREEMPTION
-+int __sched _cond_resched(void)
-+{
-+	if (should_resched(0)) {
-+		preempt_schedule_common();
-+		return 1;
-+	}
-+	rcu_all_qs();
-+	return 0;
-+}
-+EXPORT_SYMBOL(_cond_resched);
-+#endif
-+
-+/*
-+ * __cond_resched_lock() - if a reschedule is pending, drop the given lock,
-+ * call schedule, and on return reacquire the lock.
-+ *
-+ * This works OK both with and without CONFIG_PREEMPTION.  We do strange low-level
-+ * operations here to prevent schedule() from being called twice (once via
-+ * spin_unlock(), once by hand).
-+ */
-+int __cond_resched_lock(spinlock_t *lock)
-+{
-+	int resched = should_resched(PREEMPT_LOCK_OFFSET);
-+	int ret = 0;
-+
-+	lockdep_assert_held(lock);
-+
-+	if (spin_needbreak(lock) || resched) {
-+		spin_unlock(lock);
-+		if (resched)
-+			preempt_schedule_common();
-+		else
-+			cpu_relax();
-+		ret = 1;
-+		spin_lock(lock);
-+	}
-+	return ret;
-+}
-+EXPORT_SYMBOL(__cond_resched_lock);
-+
-+/**
-+ * yield - yield the current processor to other threads.
-+ *
-+ * Do not ever use this function, there's a 99% chance you're doing it wrong.
-+ *
-+ * The scheduler is at all times free to pick the calling task as the most
-+ * eligible task to run, if removing the yield() call from your code breaks
-+ * it, its already broken.
-+ *
-+ * Typical broken usage is:
-+ *
-+ * while (!event)
-+ * 	yield();
-+ *
-+ * where one assumes that yield() will let 'the other' process run that will
-+ * make event true. If the current task is a SCHED_FIFO task that will never
-+ * happen. Never use yield() as a progress guarantee!!
-+ *
-+ * If you want to use yield() to wait for something, use wait_event().
-+ * If you want to use yield() to be 'nice' for others, use cond_resched().
-+ * If you still want to use yield(), do not!
-+ */
-+void __sched yield(void)
-+{
-+	set_current_state(TASK_RUNNING);
-+	do_sched_yield();
-+}
-+EXPORT_SYMBOL(yield);
-+
-+/**
-+ * yield_to - yield the current processor to another thread in
-+ * your thread group, or accelerate that thread toward the
-+ * processor it's on.
-+ * @p: target task
-+ * @preempt: whether task preemption is allowed or not
-+ *
-+ * It's the caller's job to ensure that the target task struct
-+ * can't go away on us before we can do any checks.
-+ *
-+ * In PDS, yield_to is not supported.
-+ *
-+ * Return:
-+ *	true (>0) if we indeed boosted the target task.
-+ *	false (0) if we failed to boost the target.
-+ *	-ESRCH if there's no task to yield to.
-+ */
-+int __sched yield_to(struct task_struct *p, bool preempt)
-+{
-+	return 0;
-+}
-+EXPORT_SYMBOL_GPL(yield_to);
-+
-+int io_schedule_prepare(void)
-+{
-+	int old_iowait = current->in_iowait;
-+
-+	current->in_iowait = 1;
-+	blk_schedule_flush_plug(current);
-+
-+	return old_iowait;
-+}
-+
-+void io_schedule_finish(int token)
-+{
-+	current->in_iowait = token;
-+}
-+
-+/*
-+ * This task is about to go to sleep on IO.  Increment rq->nr_iowait so
-+ * that process accounting knows that this is a task in IO wait state.
-+ *
-+ * But don't do that if it is a deliberate, throttling IO wait (this task
-+ * has set its backing_dev_info: the queue against which it should throttle)
-+ */
-+
-+long __sched io_schedule_timeout(long timeout)
-+{
-+	int token;
-+	long ret;
-+
-+	token = io_schedule_prepare();
-+	ret = schedule_timeout(timeout);
-+	io_schedule_finish(token);
-+
-+	return ret;
-+}
-+EXPORT_SYMBOL(io_schedule_timeout);
-+
-+void io_schedule(void)
-+{
-+	int token;
-+
-+	token = io_schedule_prepare();
-+	schedule();
-+	io_schedule_finish(token);
-+}
-+EXPORT_SYMBOL(io_schedule);
-+
-+/**
-+ * sys_sched_get_priority_max - return maximum RT priority.
-+ * @policy: scheduling class.
-+ *
-+ * Return: On success, this syscall returns the maximum
-+ * rt_priority that can be used by a given scheduling class.
-+ * On failure, a negative error code is returned.
-+ */
-+SYSCALL_DEFINE1(sched_get_priority_max, int, policy)
-+{
-+	int ret = -EINVAL;
-+
-+	switch (policy) {
-+	case SCHED_FIFO:
-+	case SCHED_RR:
-+		ret = MAX_USER_RT_PRIO-1;
-+		break;
-+	case SCHED_NORMAL:
-+	case SCHED_BATCH:
-+	case SCHED_ISO:
-+	case SCHED_IDLE:
-+		ret = 0;
-+		break;
-+	}
-+	return ret;
-+}
-+
-+/**
-+ * sys_sched_get_priority_min - return minimum RT priority.
-+ * @policy: scheduling class.
-+ *
-+ * Return: On success, this syscall returns the minimum
-+ * rt_priority that can be used by a given scheduling class.
-+ * On failure, a negative error code is returned.
-+ */
-+SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
-+{
-+	int ret = -EINVAL;
-+
-+	switch (policy) {
-+	case SCHED_FIFO:
-+	case SCHED_RR:
-+		ret = 1;
-+		break;
-+	case SCHED_NORMAL:
-+	case SCHED_BATCH:
-+	case SCHED_ISO:
-+	case SCHED_IDLE:
-+		ret = 0;
-+		break;
-+	}
-+	return ret;
-+}
-+
-+static int sched_rr_get_interval(pid_t pid, struct timespec64 *t)
-+{
-+	struct task_struct *p;
-+	int retval;
-+
-+	if (pid < 0)
-+		return -EINVAL;
-+
-+	retval = -ESRCH;
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+	rcu_read_unlock();
-+
-+	*t = ns_to_timespec64(MS_TO_NS(rr_interval));
-+	return 0;
-+
-+out_unlock:
-+	rcu_read_unlock();
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_rr_get_interval - return the default timeslice of a process.
-+ * @pid: pid of the process.
-+ * @interval: userspace pointer to the timeslice value.
-+ *
-+ *
-+ * Return: On success, 0 and the timeslice is in @interval. Otherwise,
-+ * an error code.
-+ */
-+SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
-+		struct __kernel_timespec __user *, interval)
-+{
-+	struct timespec64 t;
-+	int retval = sched_rr_get_interval(pid, &t);
-+
-+	if (retval == 0)
-+		retval = put_timespec64(&t, interval);
-+
-+	return retval;
-+}
-+
-+#ifdef CONFIG_COMPAT_32BIT_TIME
-+SYSCALL_DEFINE2(sched_rr_get_interval_time32, pid_t, pid,
-+		struct old_timespec32 __user *, interval)
-+{
-+	struct timespec64 t;
-+	int retval = sched_rr_get_interval(pid, &t);
-+
-+	if (retval == 0)
-+		retval = put_old_timespec32(&t, interval);
-+	return retval;
-+}
-+#endif
-+
-+void sched_show_task(struct task_struct *p)
-+{
-+	unsigned long free = 0;
-+	int ppid;
-+
-+	if (!try_get_task_stack(p))
-+		return;
-+
-+	printk(KERN_INFO "%-15.15s %c", p->comm, task_state_to_char(p));
-+
-+	if (p->state == TASK_RUNNING)
-+		printk(KERN_CONT "  running task    ");
-+#ifdef CONFIG_DEBUG_STACK_USAGE
-+	free = stack_not_used(p);
-+#endif
-+	ppid = 0;
-+	rcu_read_lock();
-+	if (pid_alive(p))
-+		ppid = task_pid_nr(rcu_dereference(p->real_parent));
-+	rcu_read_unlock();
-+	printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free,
-+		task_pid_nr(p), ppid,
-+		(unsigned long)task_thread_info(p)->flags);
-+
-+	print_worker_info(KERN_INFO, p);
-+	show_stack(p, NULL);
-+	put_task_stack(p);
-+}
-+EXPORT_SYMBOL_GPL(sched_show_task);
-+
-+static inline bool
-+state_filter_match(unsigned long state_filter, struct task_struct *p)
-+{
-+	/* no filter, everything matches */
-+	if (!state_filter)
-+		return true;
-+
-+	/* filter, but doesn't match */
-+	if (!(p->state & state_filter))
-+		return false;
-+
-+	/*
-+	 * When looking for TASK_UNINTERRUPTIBLE skip TASK_IDLE (allows
-+	 * TASK_KILLABLE).
-+	 */
-+	if (state_filter == TASK_UNINTERRUPTIBLE && p->state == TASK_IDLE)
-+		return false;
-+
-+	return true;
-+}
-+
-+
-+void show_state_filter(unsigned long state_filter)
-+{
-+	struct task_struct *g, *p;
-+
-+#if BITS_PER_LONG == 32
-+	printk(KERN_INFO
-+		"  task                PC stack   pid father\n");
-+#else
-+	printk(KERN_INFO
-+		"  task                        PC stack   pid father\n");
-+#endif
-+	rcu_read_lock();
-+	for_each_process_thread(g, p) {
-+		/*
-+		 * reset the NMI-timeout, listing all files on a slow
-+		 * console might take a lot of time:
-+		 * Also, reset softlockup watchdogs on all CPUs, because
-+		 * another CPU might be blocked waiting for us to process
-+		 * an IPI.
-+		 */
-+		touch_nmi_watchdog();
-+		touch_all_softlockup_watchdogs();
-+		if (state_filter_match(state_filter, p))
-+			sched_show_task(p);
-+	}
-+
-+#ifdef CONFIG_SCHED_DEBUG
-+	/* PDS TODO: should support this
-+	if (!state_filter)
-+		sysrq_sched_debug_show();
-+	*/
-+#endif
-+	rcu_read_unlock();
-+	/*
-+	 * Only show locks if all tasks are dumped:
-+	 */
-+	if (!state_filter)
-+		debug_show_all_locks();
-+}
-+
-+void dump_cpu_task(int cpu)
-+{
-+	pr_info("Task dump for CPU %d:\n", cpu);
-+	sched_show_task(cpu_curr(cpu));
-+}
-+
-+/**
-+ * init_idle - set up an idle thread for a given CPU
-+ * @idle: task in question
-+ * @cpu: cpu the idle task belongs to
-+ *
-+ * NOTE: this function does not set the idle thread's NEED_RESCHED
-+ * flag, to make booting more robust.
-+ */
-+void init_idle(struct task_struct *idle, int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	raw_spin_lock_irqsave(&idle->pi_lock, flags);
-+	raw_spin_lock(&rq->lock);
-+	update_rq_clock(rq);
-+
-+	idle->last_ran = rq->clock_task;
-+	idle->state = TASK_RUNNING;
-+	idle->flags |= PF_IDLE;
-+	/* Setting prio to illegal value shouldn't matter when never queued */
-+	idle->prio = PRIO_LIMIT;
-+	idle->deadline = rq_clock(rq) + task_deadline_diff(idle);
-+	update_task_priodl(idle);
-+
-+	kasan_unpoison_task_stack(idle);
-+
-+#ifdef CONFIG_SMP
-+	/*
-+	 * It's possible that init_idle() gets called multiple times on a task,
-+	 * in that case do_set_cpus_allowed() will not do the right thing.
-+	 *
-+	 * And since this is boot we can forgo the serialisation.
-+	 */
-+	set_cpus_allowed_common(idle, cpumask_of(cpu));
-+#endif
-+
-+	/* Silence PROVE_RCU */
-+	rcu_read_lock();
-+	__set_task_cpu(idle, cpu);
-+	rcu_read_unlock();
-+
-+	rq->idle = idle;
-+	rcu_assign_pointer(rq->curr, idle);
-+	idle->on_cpu = 1;
-+
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock_irqrestore(&idle->pi_lock, flags);
-+
-+	/* Set the preempt count _outside_ the spinlocks! */
-+	init_idle_preempt_count(idle, cpu);
-+
-+	ftrace_graph_init_idle_task(idle, cpu);
-+	vtime_init_idle(idle, cpu);
-+#ifdef CONFIG_SMP
-+	sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
-+#endif
-+}
-+
-+void resched_cpu(int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	raw_spin_lock_irqsave(&rq->lock, flags);
-+	if (cpu_online(cpu) || cpu == smp_processor_id())
-+		resched_curr(cpu_rq(cpu));
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+}
-+
-+static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task)
-+{
-+	struct wake_q_node *node = &task->wake_q;
-+
-+	/*
-+	 * Atomically grab the task, if ->wake_q is !nil already it means
-+	 * its already queued (either by us or someone else) and will get the
-+	 * wakeup due to that.
-+	 *
-+	 * In order to ensure that a pending wakeup will observe our pending
-+	 * state, even in the failed case, an explicit smp_mb() must be used.
-+	 */
-+	smp_mb__before_atomic();
-+	if (unlikely(cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL)))
-+		return false;
-+
-+	/*
-+	 * The head is context local, there can be no concurrency.
-+	 */
-+	*head->lastp = node;
-+	head->lastp = &node->next;
-+	return true;
-+}
-+
-+/**
-+ * wake_q_add() - queue a wakeup for 'later' waking.
-+ * @head: the wake_q_head to add @task to
-+ * @task: the task to queue for 'later' wakeup
-+ *
-+ * Queue a task for later wakeup, most likely by the wake_up_q() call in the
-+ * same context, _HOWEVER_ this is not guaranteed, the wakeup can come
-+ * instantly.
-+ *
-+ * This function must be used as-if it were wake_up_process(); IOW the task
-+ * must be ready to be woken at this location.
-+ */
-+void wake_q_add(struct wake_q_head *head, struct task_struct *task)
-+{
-+	if (__wake_q_add(head, task))
-+		get_task_struct(task);
-+}
-+
-+/**
-+ * wake_q_add_safe() - safely queue a wakeup for 'later' waking.
-+ * @head: the wake_q_head to add @task to
-+ * @task: the task to queue for 'later' wakeup
-+ *
-+ * Queue a task for later wakeup, most likely by the wake_up_q() call in the
-+ * same context, _HOWEVER_ this is not guaranteed, the wakeup can come
-+ * instantly.
-+ *
-+ * This function must be used as-if it were wake_up_process(); IOW the task
-+ * must be ready to be woken at this location.
-+ *
-+ * This function is essentially a task-safe equivalent to wake_q_add(). Callers
-+ * that already hold reference to @task can call the 'safe' version and trust
-+ * wake_q to do the right thing depending whether or not the @task is already
-+ * queued for wakeup.
-+ */
-+void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task)
-+{
-+	if (!__wake_q_add(head, task))
-+		put_task_struct(task);
-+}
-+
-+void wake_up_q(struct wake_q_head *head)
-+{
-+	struct wake_q_node *node = head->first;
-+
-+	while (node != WAKE_Q_TAIL) {
-+		struct task_struct *task;
-+
-+		task = container_of(node, struct task_struct, wake_q);
-+		BUG_ON(!task);
-+		/* task can safely be re-inserted now: */
-+		node = node->next;
-+		task->wake_q.next = NULL;
-+
-+		/*
-+		 * wake_up_process() executes a full barrier, which pairs with
-+		 * the queueing in wake_q_add() so as not to miss wakeups.
-+		 */
-+		wake_up_process(task);
-+		put_task_struct(task);
-+	}
-+}
-+
-+#ifdef CONFIG_SMP
-+
-+int cpuset_cpumask_can_shrink(const struct cpumask __maybe_unused *cur,
-+			      const struct cpumask __maybe_unused *trial)
-+{
-+	return 1;
-+}
-+
-+int task_can_attach(struct task_struct *p,
-+		    const struct cpumask *cs_cpus_allowed)
-+{
-+	int ret = 0;
-+
-+	/*
-+	 * Kthreads which disallow setaffinity shouldn't be moved
-+	 * to a new cpuset; we don't want to change their CPU
-+	 * affinity and isolating such threads by their set of
-+	 * allowed nodes is unnecessary.  Thus, cpusets are not
-+	 * applicable for such threads.  This prevents checking for
-+	 * success of set_cpus_allowed_ptr() on all attached tasks
-+	 * before cpus_mask may be changed.
-+	 */
-+	if (p->flags & PF_NO_SETAFFINITY)
-+		ret = -EINVAL;
-+
-+	return ret;
-+}
-+
-+static bool sched_smp_initialized __read_mostly;
-+
-+#ifdef CONFIG_NO_HZ_COMMON
-+void nohz_balance_enter_idle(int cpu)
-+{
-+}
-+
-+void select_nohz_load_balancer(int stop_tick)
-+{
-+}
-+
-+void set_cpu_sd_state_idle(void) {}
-+
-+/*
-+ * In the semi idle case, use the nearest busy CPU for migrating timers
-+ * from an idle CPU.  This is good for power-savings.
-+ *
-+ * We don't do similar optimization for completely idle system, as
-+ * selecting an idle CPU will add more delays to the timers than intended
-+ * (as that CPU's timer base may not be uptodate wrt jiffies etc).
-+ */
-+int get_nohz_timer_target(void)
-+{
-+	int i, cpu = smp_processor_id(), default_cpu = -1;
-+	struct cpumask *mask;
-+
-+	if (housekeeping_cpu(cpu, HK_FLAG_TIMER)) {
-+		if (!idle_cpu(cpu))
-+			return cpu;
-+		default_cpu = cpu;
-+	}
-+
-+	for (mask = &(per_cpu(sched_cpu_affinity_chk_masks, cpu)[0]);
-+	     mask < per_cpu(sched_cpu_affinity_chk_end_masks, cpu); mask++)
-+		for_each_cpu_and(i, mask, housekeeping_cpumask(HK_FLAG_TIMER))
-+			if (!idle_cpu(i))
-+				return i;
-+
-+	if (default_cpu == -1)
-+		default_cpu = housekeeping_any_cpu(HK_FLAG_TIMER);
-+	cpu = default_cpu;
-+
-+	return cpu;
-+}
-+
-+/*
-+ * When add_timer_on() enqueues a timer into the timer wheel of an
-+ * idle CPU then this timer might expire before the next timer event
-+ * which is scheduled to wake up that CPU. In case of a completely
-+ * idle system the next event might even be infinite time into the
-+ * future. wake_up_idle_cpu() ensures that the CPU is woken up and
-+ * leaves the inner idle loop so the newly added timer is taken into
-+ * account when the CPU goes back to idle and evaluates the timer
-+ * wheel for the next timer event.
-+ */
-+void wake_up_idle_cpu(int cpu)
-+{
-+	if (cpu == smp_processor_id())
-+		return;
-+
-+	set_tsk_need_resched(cpu_rq(cpu)->idle);
-+	smp_send_reschedule(cpu);
-+}
-+
-+void wake_up_nohz_cpu(int cpu)
-+{
-+	wake_up_idle_cpu(cpu);
-+}
-+#endif /* CONFIG_NO_HZ_COMMON */
-+
-+#ifdef CONFIG_HOTPLUG_CPU
-+/*
-+ * Ensures that the idle task is using init_mm right before its CPU goes
-+ * offline.
-+ */
-+void idle_task_exit(void)
-+{
-+	struct mm_struct *mm = current->active_mm;
-+
-+	BUG_ON(current != this_rq()->idle);
-+
-+	if (mm != &init_mm) {
-+		switch_mm(mm, &init_mm, current);
-+		finish_arch_post_lock_switch();
-+	}
-+
-+	/* finish_cpu(), as ran on the BP, will clean up the active_mm state */
-+}
-+
-+/*
-+ * Migrate all tasks from the rq, sleeping tasks will be migrated by
-+ * try_to_wake_up()->select_task_rq().
-+ *
-+ * Called with rq->lock held even though we'er in stop_machine() and
-+ * there's no concurrency possible, we hold the required locks anyway
-+ * because of lock validation efforts.
-+ */
-+static void migrate_tasks(struct rq *dead_rq)
-+{
-+	struct rq *rq = dead_rq;
-+	struct task_struct *p, *stop = rq->stop;
-+	struct skiplist_node *node;
-+	int count = 0;
-+
-+	/*
-+	 * Fudge the rq selection such that the below task selection loop
-+	 * doesn't get stuck on the currently eligible stop task.
-+	 *
-+	 * We're currently inside stop_machine() and the rq is either stuck
-+	 * in the stop_machine_cpu_stop() loop, or we're executing this code,
-+	 * either way we should never end up calling schedule() until we're
-+	 * done here.
-+	 */
-+	rq->stop = NULL;
-+
-+	node = &rq->sl_header;
-+	while ((node = node->next[0]) != &rq->sl_header) {
-+		int dest_cpu;
-+
-+		p = skiplist_entry(node, struct task_struct, sl_node);
-+
-+		/* skip the running task */
-+		if (task_running(p))
-+			continue;
-+
-+		/*
-+		 * Rules for changing task_struct::cpus_mask are holding
-+		 * both pi_lock and rq->lock, such that holding either
-+		 * stabilizes the mask.
-+		 *
-+		 * Drop rq->lock is not quite as disastrous as it usually is
-+		 * because !cpu_active at this point, which means load-balance
-+		 * will not interfere. Also, stop-machine.
-+		 */
-+		raw_spin_unlock(&rq->lock);
-+		raw_spin_lock(&p->pi_lock);
-+		raw_spin_lock(&rq->lock);
-+
-+		/*
-+		 * Since we're inside stop-machine, _nothing_ should have
-+		 * changed the task, WARN if weird stuff happened, because in
-+		 * that case the above rq->lock drop is a fail too.
-+		 */
-+		if (WARN_ON(task_rq(p) != rq || !task_on_rq_queued(p))) {
-+			raw_spin_unlock(&p->pi_lock);
-+			continue;
-+		}
-+
-+		count++;
-+		/* Find suitable destination for @next, with force if needed. */
-+		dest_cpu = select_fallback_rq(dead_rq->cpu, p);
-+
-+		rq = __migrate_task(rq, p, dest_cpu);
-+		raw_spin_unlock(&rq->lock);
-+		raw_spin_unlock(&p->pi_lock);
-+
-+		rq = dead_rq;
-+		raw_spin_lock(&rq->lock);
-+		/* Check queued task all over from the header again */
-+		node = &rq->sl_header;
-+	}
-+
-+	rq->stop = stop;
-+}
-+
-+static void set_rq_offline(struct rq *rq)
-+{
-+	if (rq->online)
-+		rq->online = false;
-+}
-+#endif /* CONFIG_HOTPLUG_CPU */
-+
-+static void set_rq_online(struct rq *rq)
-+{
-+	if (!rq->online)
-+		rq->online = true;
-+}
-+
-+#ifdef CONFIG_SCHED_DEBUG
-+
-+static __read_mostly int sched_debug_enabled;
-+
-+static int __init sched_debug_setup(char *str)
-+{
-+	sched_debug_enabled = 1;
-+
-+	return 0;
-+}
-+early_param("sched_debug", sched_debug_setup);
-+
-+static inline bool sched_debug(void)
-+{
-+	return sched_debug_enabled;
-+}
-+#else /* !CONFIG_SCHED_DEBUG */
-+static inline bool sched_debug(void)
-+{
-+	return false;
-+}
-+#endif /* CONFIG_SCHED_DEBUG */
-+
-+#ifdef CONFIG_SMP
-+void scheduler_ipi(void)
-+{
-+	/*
-+	 * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting
-+	 * TIF_NEED_RESCHED remotely (for the first time) will also send
-+	 * this IPI.
-+	 */
-+	preempt_fold_need_resched();
-+
-+	if (!idle_cpu(smp_processor_id()) || need_resched())
-+		return;
-+
-+	irq_enter();
-+	irq_exit();
-+}
-+
-+void wake_up_if_idle(int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	rcu_read_lock();
-+
-+	if (!is_idle_task(rcu_dereference(rq->curr)))
-+		goto out;
-+
-+	if (set_nr_if_polling(rq->idle)) {
-+		trace_sched_wake_idle_without_ipi(cpu);
-+	} else {
-+		raw_spin_lock_irqsave(&rq->lock, flags);
-+		if (is_idle_task(rq->curr))
-+			smp_send_reschedule(cpu);
-+		/* Else CPU is not idle, do nothing here */
-+		raw_spin_unlock_irqrestore(&rq->lock, flags);
-+	}
-+
-+out:
-+	rcu_read_unlock();
-+}
-+
-+bool cpus_share_cache(int this_cpu, int that_cpu)
-+{
-+	return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu);
-+}
-+#endif /* CONFIG_SMP */
-+
-+/*
-+ * Topology list, bottom-up.
-+ */
-+static struct sched_domain_topology_level default_topology[] = {
-+#ifdef CONFIG_SCHED_SMT
-+	{ cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
-+#endif
-+#ifdef CONFIG_SCHED_MC
-+	{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
-+#endif
-+	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
-+	{ NULL, },
-+};
-+
-+static struct sched_domain_topology_level *sched_domain_topology =
-+	default_topology;
-+
-+#define for_each_sd_topology(tl)			\
-+	for (tl = sched_domain_topology; tl->mask; tl++)
-+
-+void set_sched_topology(struct sched_domain_topology_level *tl)
-+{
-+	if (WARN_ON_ONCE(sched_smp_initialized))
-+		return;
-+
-+	sched_domain_topology = tl;
-+}
-+
-+/*
-+ * Initializers for schedule domains
-+ * Non-inlined to reduce accumulated stack pressure in build_sched_domains()
-+ */
-+
-+int sched_domain_level_max;
-+
-+/*
-+ * Partition sched domains as specified by the 'ndoms_new'
-+ * cpumasks in the array doms_new[] of cpumasks. This compares
-+ * doms_new[] to the current sched domain partitioning, doms_cur[].
-+ * It destroys each deleted domain and builds each new domain.
-+ *
-+ * 'doms_new' is an array of cpumask_var_t's of length 'ndoms_new'.
-+ * The masks don't intersect (don't overlap.) We should setup one
-+ * sched domain for each mask. CPUs not in any of the cpumasks will
-+ * not be load balanced. If the same cpumask appears both in the
-+ * current 'doms_cur' domains and in the new 'doms_new', we can leave
-+ * it as it is.
-+ *
-+ * The passed in 'doms_new' should be allocated using
-+ * alloc_sched_domains.  This routine takes ownership of it and will
-+ * free_sched_domains it when done with it. If the caller failed the
-+ * alloc call, then it can pass in doms_new == NULL && ndoms_new == 1,
-+ * and partition_sched_domains() will fallback to the single partition
-+ * 'fallback_doms', it also forces the domains to be rebuilt.
-+ *
-+ * If doms_new == NULL it will be replaced with cpu_online_mask.
-+ * ndoms_new == 0 is a special case for destroying existing domains,
-+ * and it will not create the default domain.
-+ *
-+ * Call with hotplug lock held
-+ */
-+void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
-+			     struct sched_domain_attr *dattr_new)
-+{
-+	/**
-+	 * PDS doesn't depend on sched domains, but just keep this api
-+	 */
-+}
-+
-+/*
-+ * used to mark begin/end of suspend/resume:
-+ */
-+static int num_cpus_frozen;
-+
-+#ifdef CONFIG_NUMA
-+int __read_mostly		node_reclaim_distance = RECLAIM_DISTANCE;
-+
-+/*
-+ * sched_numa_find_closest() - given the NUMA topology, find the cpu
-+ *                             closest to @cpu from @cpumask.
-+ * cpumask: cpumask to find a cpu from
-+ * cpu: cpu to be close to
-+ *
-+ * returns: cpu, or nr_cpu_ids when nothing found.
-+ */
-+int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
-+{
-+	return best_mask_cpu(cpu, cpus);
-+}
-+#endif /* CONFIG_NUMA */
-+
-+/*
-+ * Update cpusets according to cpu_active mask.  If cpusets are
-+ * disabled, cpuset_update_active_cpus() becomes a simple wrapper
-+ * around partition_sched_domains().
-+ *
-+ * If we come here as part of a suspend/resume, don't touch cpusets because we
-+ * want to restore it back to its original state upon resume anyway.
-+ */
-+static void cpuset_cpu_active(void)
-+{
-+	if (cpuhp_tasks_frozen) {
-+		/*
-+		 * num_cpus_frozen tracks how many CPUs are involved in suspend
-+		 * resume sequence. As long as this is not the last online
-+		 * operation in the resume sequence, just build a single sched
-+		 * domain, ignoring cpusets.
-+		 */
-+		partition_sched_domains(1, NULL, NULL);
-+		if (--num_cpus_frozen)
-+			return;
-+		/*
-+		 * This is the last CPU online operation. So fall through and
-+		 * restore the original sched domains by considering the
-+		 * cpuset configurations.
-+		 */
-+		cpuset_force_rebuild();
-+	}
-+
-+	cpuset_update_active_cpus();
-+}
-+
-+static int cpuset_cpu_inactive(unsigned int cpu)
-+{
-+	if (!cpuhp_tasks_frozen) {
-+		cpuset_update_active_cpus();
-+	} else {
-+		num_cpus_frozen++;
-+		partition_sched_domains(1, NULL, NULL);
-+	}
-+	return 0;
-+}
-+
-+int sched_cpu_activate(unsigned int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+#ifdef CONFIG_SCHED_SMT
-+	/*
-+	 * When going up, increment the number of cores with SMT present.
-+	 */
-+	if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
-+		static_branch_inc_cpuslocked(&sched_smt_present);
-+#endif
-+	set_cpu_active(cpu, true);
-+
-+	if (sched_smp_initialized)
-+		cpuset_cpu_active();
-+
-+	/*
-+	 * Put the rq online, if not already. This happens:
-+	 *
-+	 * 1) In the early boot process, because we build the real domains
-+	 *    after all cpus have been brought up.
-+	 *
-+	 * 2) At runtime, if cpuset_cpu_active() fails to rebuild the
-+	 *    domains.
-+	 */
-+	raw_spin_lock_irqsave(&rq->lock, flags);
-+	set_rq_online(rq);
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+
-+	return 0;
-+}
-+
-+int sched_cpu_deactivate(unsigned int cpu)
-+{
-+	int ret;
-+
-+	set_cpu_active(cpu, false);
-+	/*
-+	 * We've cleared cpu_active_mask, wait for all preempt-disabled and RCU
-+	 * users of this state to go away such that all new such users will
-+	 * observe it.
-+	 *
-+	 * Do sync before park smpboot threads to take care the rcu boost case.
-+	 */
-+	synchronize_rcu();
-+
-+#ifdef CONFIG_SCHED_SMT
-+	/*
-+	 * When going down, decrement the number of cores with SMT present.
-+	 */
-+	if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
-+		static_branch_dec_cpuslocked(&sched_smt_present);
-+#endif
-+
-+	if (!sched_smp_initialized)
-+		return 0;
-+
-+	ret = cpuset_cpu_inactive(cpu);
-+	if (ret) {
-+		set_cpu_active(cpu, true);
-+		return ret;
-+	}
-+	return 0;
-+}
-+
-+static void sched_rq_cpu_starting(unsigned int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	rq->calc_load_update = calc_load_update;
-+}
-+
-+int sched_cpu_starting(unsigned int cpu)
-+{
-+	sched_rq_cpu_starting(cpu);
-+	sched_tick_start(cpu);
-+	return 0;
-+}
-+
-+#ifdef CONFIG_HOTPLUG_CPU
-+int sched_cpu_dying(unsigned int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	sched_tick_stop(cpu);
-+	raw_spin_lock_irqsave(&rq->lock, flags);
-+	set_rq_offline(rq);
-+	migrate_tasks(rq);
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+
-+	hrtick_clear(rq);
-+	return 0;
-+}
-+#endif
-+
-+#ifdef CONFIG_SMP
-+static void sched_init_topology_cpumask_early(void)
-+{
-+	int cpu, level;
-+	cpumask_t *tmp;
-+
-+	for_each_possible_cpu(cpu) {
-+		for (level = 0; level < NR_CPU_AFFINITY_CHK_LEVEL; level++) {
-+			tmp = &(per_cpu(sched_cpu_affinity_chk_masks, cpu)[level]);
-+			cpumask_copy(tmp, cpu_possible_mask);
-+			cpumask_clear_cpu(cpu, tmp);
-+		}
-+		per_cpu(sched_cpu_llc_start_mask, cpu) =
-+			&(per_cpu(sched_cpu_affinity_chk_masks, cpu)[0]);
-+		per_cpu(sched_cpu_affinity_chk_end_masks, cpu) =
-+			&(per_cpu(sched_cpu_affinity_chk_masks, cpu)[1]);
-+	}
-+}
-+
-+static void sched_init_topology_cpumask(void)
-+{
-+	int cpu;
-+	cpumask_t *chk;
-+
-+	for_each_online_cpu(cpu) {
-+		chk = &(per_cpu(sched_cpu_affinity_chk_masks, cpu)[0]);
-+
-+#ifdef CONFIG_SCHED_SMT
-+		cpumask_setall(chk);
-+		cpumask_clear_cpu(cpu, chk);
-+		if (cpumask_and(chk, chk, topology_sibling_cpumask(cpu))) {
-+			per_cpu(sched_sibling_cpu, cpu) = cpumask_first(chk);
-+			printk(KERN_INFO "pds: cpu #%d affinity check mask - smt 0x%08lx",
-+			       cpu, (chk++)->bits[0]);
-+		}
-+#endif
-+#ifdef CONFIG_SCHED_MC
-+		cpumask_setall(chk);
-+		cpumask_clear_cpu(cpu, chk);
-+		if (cpumask_and(chk, chk, cpu_coregroup_mask(cpu))) {
-+			per_cpu(sched_cpu_llc_start_mask, cpu) = chk;
-+			printk(KERN_INFO "pds: cpu #%d affinity check mask - coregroup 0x%08lx",
-+			       cpu, (chk++)->bits[0]);
-+		}
-+		cpumask_complement(chk, cpu_coregroup_mask(cpu));
-+
-+		/**
-+		 * Set up sd_llc_id per CPU
-+		 */
-+		per_cpu(sd_llc_id, cpu) =
-+			cpumask_first(cpu_coregroup_mask(cpu));
-+#else
-+		per_cpu(sd_llc_id, cpu) =
-+			cpumask_first(topology_core_cpumask(cpu));
-+
-+		per_cpu(sched_cpu_llc_start_mask, cpu) = chk;
-+
-+		cpumask_setall(chk);
-+		cpumask_clear_cpu(cpu, chk);
-+#endif /* NOT CONFIG_SCHED_MC */
-+		if (cpumask_and(chk, chk, topology_core_cpumask(cpu)))
-+			printk(KERN_INFO "pds: cpu #%d affinity check mask - core 0x%08lx",
-+			       cpu, (chk++)->bits[0]);
-+		cpumask_complement(chk, topology_core_cpumask(cpu));
-+
-+		if (cpumask_and(chk, chk, cpu_online_mask))
-+			printk(KERN_INFO "pds: cpu #%d affinity check mask - others 0x%08lx",
-+			       cpu, (chk++)->bits[0]);
-+
-+		per_cpu(sched_cpu_affinity_chk_end_masks, cpu) = chk;
-+	}
-+}
-+#endif
-+
-+void __init sched_init_smp(void)
-+{
-+	/* Move init over to a non-isolated CPU */
-+	if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0)
-+		BUG();
-+
-+	cpumask_copy(&sched_rq_queued_masks[SCHED_RQ_EMPTY], cpu_online_mask);
-+
-+	sched_init_topology_cpumask();
-+
-+	sched_smp_initialized = true;
-+}
-+#else
-+void __init sched_init_smp(void)
-+{
-+}
-+#endif /* CONFIG_SMP */
-+
-+int in_sched_functions(unsigned long addr)
-+{
-+	return in_lock_functions(addr) ||
-+		(addr >= (unsigned long)__sched_text_start
-+		&& addr < (unsigned long)__sched_text_end);
-+}
-+
-+#ifdef CONFIG_CGROUP_SCHED
-+/* task group related information */
-+struct task_group {
-+	struct cgroup_subsys_state css;
-+
-+	struct rcu_head rcu;
-+	struct list_head list;
-+
-+	struct task_group *parent;
-+	struct list_head siblings;
-+	struct list_head children;
-+};
-+
-+/*
-+ * Default task group.
-+ * Every task in system belongs to this group at bootup.
-+ */
-+struct task_group root_task_group;
-+LIST_HEAD(task_groups);
-+
-+/* Cacheline aligned slab cache for task_group */
-+static struct kmem_cache *task_group_cache __read_mostly;
-+#endif /* CONFIG_CGROUP_SCHED */
-+
-+void __init sched_init(void)
-+{
-+	int i;
-+	struct rq *rq;
-+
-+	print_scheduler_version();
-+
-+	wait_bit_init();
-+
-+#ifdef CONFIG_SMP
-+	for (i = 0; i < NR_SCHED_RQ_QUEUED_LEVEL; i++)
-+		cpumask_clear(&sched_rq_queued_masks[i]);
-+	cpumask_setall(&sched_rq_queued_masks[SCHED_RQ_EMPTY]);
-+	set_bit(SCHED_RQ_EMPTY, sched_rq_queued_masks_bitmap);
-+
-+	cpumask_setall(&sched_rq_pending_masks[SCHED_RQ_EMPTY]);
-+	set_bit(SCHED_RQ_EMPTY, sched_rq_pending_masks_bitmap);
-+#else
-+	uprq = &per_cpu(runqueues, 0);
-+#endif
-+
-+#ifdef CONFIG_CGROUP_SCHED
-+	task_group_cache = KMEM_CACHE(task_group, 0);
-+
-+	list_add(&root_task_group.list, &task_groups);
-+	INIT_LIST_HEAD(&root_task_group.children);
-+	INIT_LIST_HEAD(&root_task_group.siblings);
-+#endif /* CONFIG_CGROUP_SCHED */
-+	for_each_possible_cpu(i) {
-+		rq = cpu_rq(i);
-+		FULL_INIT_SKIPLIST_NODE(&rq->sl_header);
-+		raw_spin_lock_init(&rq->lock);
-+		rq->dither = 0;
-+		rq->nr_running = rq->nr_uninterruptible = 0;
-+		rq->calc_load_active = 0;
-+		rq->calc_load_update = jiffies + LOAD_FREQ;
-+#ifdef CONFIG_SMP
-+		rq->online = false;
-+		rq->cpu = i;
-+
-+		rq->queued_level = SCHED_RQ_EMPTY;
-+		rq->pending_level = SCHED_RQ_EMPTY;
-+#ifdef CONFIG_SCHED_SMT
-+		per_cpu(sched_sibling_cpu, i) = i;
-+		rq->active_balance = 0;
-+#endif
-+#endif
-+		rq->nr_switches = 0;
-+		atomic_set(&rq->nr_iowait, 0);
-+		hrtick_rq_init(rq);
-+	}
-+#ifdef CONFIG_SMP
-+	/* Set rq->online for cpu 0 */
-+	cpu_rq(0)->online = true;
-+#endif
-+
-+	/*
-+	 * The boot idle thread does lazy MMU switching as well:
-+	 */
-+	mmgrab(&init_mm);
-+	enter_lazy_tlb(&init_mm, current);
-+
-+	/*
-+	 * Make us the idle thread. Technically, schedule() should not be
-+	 * called from this thread, however somewhere below it might be,
-+	 * but because we are the idle thread, we just pick up running again
-+	 * when this runqueue becomes "idle".
-+	 */
-+	init_idle(current, smp_processor_id());
-+
-+	calc_load_update = jiffies + LOAD_FREQ;
-+
-+#ifdef CONFIG_SMP
-+	idle_thread_set_boot_cpu();
-+
-+	sched_init_topology_cpumask_early();
-+#endif /* SMP */
-+
-+	init_schedstats();
-+
-+	psi_init();
-+}
-+
-+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-+static inline int preempt_count_equals(int preempt_offset)
-+{
-+	int nested = preempt_count() + rcu_preempt_depth();
-+
-+	return (nested == preempt_offset);
-+}
-+
-+void __might_sleep(const char *file, int line, int preempt_offset)
-+{
-+	/*
-+	 * Blocking primitives will set (and therefore destroy) current->state,
-+	 * since we will exit with TASK_RUNNING make sure we enter with it,
-+	 * otherwise we will destroy state.
-+	 */
-+	WARN_ONCE(current->state != TASK_RUNNING && current->task_state_change,
-+			"do not call blocking ops when !TASK_RUNNING; "
-+			"state=%lx set at [<%p>] %pS\n",
-+			current->state,
-+			(void *)current->task_state_change,
-+			(void *)current->task_state_change);
-+
-+	___might_sleep(file, line, preempt_offset);
-+}
-+EXPORT_SYMBOL(__might_sleep);
-+
-+void ___might_sleep(const char *file, int line, int preempt_offset)
-+{
-+	/* Ratelimiting timestamp: */
-+	static unsigned long prev_jiffy;
-+
-+	unsigned long preempt_disable_ip;
-+
-+	/* WARN_ON_ONCE() by default, no rate limit required: */
-+	rcu_sleep_check();
-+
-+	if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
-+	     !is_idle_task(current) && !current->non_block_count) ||
-+	    system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING ||
-+	    oops_in_progress)
-+		return;
-+	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
-+		return;
-+	prev_jiffy = jiffies;
-+
-+	/* Save this before calling printk(), since that will clobber it: */
-+	preempt_disable_ip = get_preempt_disable_ip(current);
-+
-+	printk(KERN_ERR
-+		"BUG: sleeping function called from invalid context at %s:%d\n",
-+			file, line);
-+	printk(KERN_ERR
-+		"in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n",
-+			in_atomic(), irqs_disabled(), current->non_block_count,
-+			current->pid, current->comm);
-+
-+	if (task_stack_end_corrupted(current))
-+		printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");
-+
-+	debug_show_held_locks(current);
-+	if (irqs_disabled())
-+		print_irqtrace_events(current);
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	if (!preempt_count_equals(preempt_offset)) {
-+		pr_err("Preemption disabled at:");
-+		print_ip_sym(preempt_disable_ip);
-+		pr_cont("\n");
-+	}
-+#endif
-+	dump_stack();
-+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+}
-+EXPORT_SYMBOL(___might_sleep);
-+
-+void __cant_sleep(const char *file, int line, int preempt_offset)
-+{
-+	static unsigned long prev_jiffy;
-+
-+	if (irqs_disabled())
-+		return;
-+
-+	if (!IS_ENABLED(CONFIG_PREEMPT_COUNT))
-+		return;
-+
-+	if (preempt_count() > preempt_offset)
-+		return;
-+
-+	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
-+		return;
-+	prev_jiffy = jiffies;
-+
-+	printk(KERN_ERR "BUG: assuming atomic context at %s:%d\n", file, line);
-+	printk(KERN_ERR "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
-+			in_atomic(), irqs_disabled(),
-+			current->pid, current->comm);
-+
-+	debug_show_held_locks(current);
-+	dump_stack();
-+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+}
-+EXPORT_SYMBOL_GPL(__cant_sleep);
-+#endif
-+
-+#ifdef CONFIG_MAGIC_SYSRQ
-+void normalize_rt_tasks(void)
-+{
-+	struct task_struct *g, *p;
-+	struct sched_attr attr = {
-+		.sched_policy = SCHED_NORMAL,
-+	};
-+
-+	read_lock(&tasklist_lock);
-+	for_each_process_thread(g, p) {
-+		/*
-+		 * Only normalize user tasks:
-+		 */
-+		if (p->flags & PF_KTHREAD)
-+			continue;
-+
-+		if (!rt_task(p)) {
-+			/*
-+			 * Renice negative nice level userspace
-+			 * tasks back to 0:
-+			 */
-+			if (task_nice(p) < 0)
-+				set_user_nice(p, 0);
-+			continue;
-+		}
-+
-+		__sched_setscheduler(p, &attr, false, false);
-+	}
-+	read_unlock(&tasklist_lock);
-+}
-+#endif /* CONFIG_MAGIC_SYSRQ */
-+
-+#if defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB)
-+/*
-+ * These functions are only useful for the IA64 MCA handling, or kdb.
-+ *
-+ * They can only be called when the whole system has been
-+ * stopped - every CPU needs to be quiescent, and no scheduling
-+ * activity can take place. Using them for anything else would
-+ * be a serious bug, and as a result, they aren't even visible
-+ * under any other configuration.
-+ */
-+
-+/**
-+ * curr_task - return the current task for a given CPU.
-+ * @cpu: the processor in question.
-+ *
-+ * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
-+ *
-+ * Return: The current task for @cpu.
-+ */
-+struct task_struct *curr_task(int cpu)
-+{
-+	return cpu_curr(cpu);
-+}
-+
-+#endif /* defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB) */
-+
-+#ifdef CONFIG_IA64
-+/**
-+ * ia64_set_curr_task - set the current task for a given CPU.
-+ * @cpu: the processor in question.
-+ * @p: the task pointer to set.
-+ *
-+ * Description: This function must only be used when non-maskable interrupts
-+ * are serviced on a separate stack.  It allows the architecture to switch the
-+ * notion of the current task on a CPU in a non-blocking manner.  This function
-+ * must be called with all CPU's synchronised, and interrupts disabled, the
-+ * and caller must save the original value of the current task (see
-+ * curr_task() above) and restore that value before reenabling interrupts and
-+ * re-starting the system.
-+ *
-+ * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
-+ */
-+void ia64_set_curr_task(int cpu, struct task_struct *p)
-+{
-+	cpu_curr(cpu) = p;
-+}
-+
-+#endif
-+
-+#ifdef CONFIG_SCHED_DEBUG
-+void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
-+			  struct seq_file *m)
-+{}
-+
-+void proc_sched_set_task(struct task_struct *p)
-+{}
-+#endif
-+
-+#ifdef CONFIG_CGROUP_SCHED
-+static void sched_free_group(struct task_group *tg)
-+{
-+	kmem_cache_free(task_group_cache, tg);
-+}
-+
-+/* allocate runqueue etc for a new task group */
-+struct task_group *sched_create_group(struct task_group *parent)
-+{
-+	struct task_group *tg;
-+
-+	tg = kmem_cache_alloc(task_group_cache, GFP_KERNEL | __GFP_ZERO);
-+	if (!tg)
-+		return ERR_PTR(-ENOMEM);
-+
-+	return tg;
-+}
-+
-+void sched_online_group(struct task_group *tg, struct task_group *parent)
-+{
-+}
-+
-+/* rcu callback to free various structures associated with a task group */
-+static void sched_free_group_rcu(struct rcu_head *rhp)
-+{
-+	/* Now it should be safe to free those cfs_rqs */
-+	sched_free_group(container_of(rhp, struct task_group, rcu));
-+}
-+
-+void sched_destroy_group(struct task_group *tg)
-+{
-+	/* Wait for possible concurrent references to cfs_rqs complete */
-+	call_rcu(&tg->rcu, sched_free_group_rcu);
-+}
-+
-+void sched_offline_group(struct task_group *tg)
-+{
-+}
-+
-+static inline struct task_group *css_tg(struct cgroup_subsys_state *css)
-+{
-+	return css ? container_of(css, struct task_group, css) : NULL;
-+}
-+
-+static struct cgroup_subsys_state *
-+cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
-+{
-+	struct task_group *parent = css_tg(parent_css);
-+	struct task_group *tg;
-+
-+	if (!parent) {
-+		/* This is early initialization for the top cgroup */
-+		return &root_task_group.css;
-+	}
-+
-+	tg = sched_create_group(parent);
-+	if (IS_ERR(tg))
-+		return ERR_PTR(-ENOMEM);
-+	return &tg->css;
-+}
-+
-+/* Expose task group only after completing cgroup initialization */
-+static int cpu_cgroup_css_online(struct cgroup_subsys_state *css)
-+{
-+	struct task_group *tg = css_tg(css);
-+	struct task_group *parent = css_tg(css->parent);
-+
-+	if (parent)
-+		sched_online_group(tg, parent);
-+	return 0;
-+}
-+
-+static void cpu_cgroup_css_released(struct cgroup_subsys_state *css)
-+{
-+	struct task_group *tg = css_tg(css);
-+
-+	sched_offline_group(tg);
-+}
-+
-+static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
-+{
-+	struct task_group *tg = css_tg(css);
-+
-+	/*
-+	 * Relies on the RCU grace period between css_released() and this.
-+	 */
-+	sched_free_group(tg);
-+}
-+
-+static void cpu_cgroup_fork(struct task_struct *task)
-+{
-+}
-+
-+static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
-+{
-+	return 0;
-+}
-+
-+static void cpu_cgroup_attach(struct cgroup_taskset *tset)
-+{
-+}
-+
-+static struct cftype cpu_legacy_files[] = {
-+	{ }	/* Terminate */
-+};
-+
-+static struct cftype cpu_files[] = {
-+	{ }	/* terminate */
-+};
-+
-+static int cpu_extra_stat_show(struct seq_file *sf,
-+			       struct cgroup_subsys_state *css)
-+{
-+	return 0;
-+}
-+
-+struct cgroup_subsys cpu_cgrp_subsys = {
-+	.css_alloc	= cpu_cgroup_css_alloc,
-+	.css_online	= cpu_cgroup_css_online,
-+	.css_released	= cpu_cgroup_css_released,
-+	.css_free	= cpu_cgroup_css_free,
-+	.css_extra_stat_show = cpu_extra_stat_show,
-+	.fork		= cpu_cgroup_fork,
-+	.can_attach	= cpu_cgroup_can_attach,
-+	.attach		= cpu_cgroup_attach,
-+	.legacy_cftypes	= cpu_files,
-+	.legacy_cftypes	= cpu_legacy_files,
-+	.dfl_cftypes	= cpu_files,
-+	.early_init	= true,
-+	.threaded	= true,
-+};
-+#endif	/* CONFIG_CGROUP_SCHED */
-+
-+#undef CREATE_TRACE_POINTS
-diff --git a/kernel/sched/pds_sched.h b/kernel/sched/pds_sched.h
-new file mode 100644
-index 000000000000..6c3361f06087
---- /dev/null
-+++ b/kernel/sched/pds_sched.h
-@@ -0,0 +1,518 @@
-+#ifndef PDS_SCHED_H
-+#define PDS_SCHED_H
-+
-+#include <linux/sched.h>
-+
-+#include <linux/sched/clock.h>
-+#include <linux/sched/cpufreq.h>
-+#include <linux/sched/cputime.h>
-+#include <linux/sched/debug.h>
-+#include <linux/sched/init.h>
-+#include <linux/sched/isolation.h>
-+#include <linux/sched/loadavg.h>
-+#include <linux/sched/mm.h>
-+#include <linux/sched/nohz.h>
-+#include <linux/sched/signal.h>
-+#include <linux/sched/stat.h>
-+#include <linux/sched/sysctl.h>
-+#include <linux/sched/task.h>
-+#include <linux/sched/topology.h>
-+#include <linux/sched/wake_q.h>
-+
-+#include <uapi/linux/sched/types.h>
-+
-+#include <linux/cgroup.h>
-+#include <linux/cpufreq.h>
-+#include <linux/cpuidle.h>
-+#include <linux/cpuset.h>
-+#include <linux/ctype.h>
-+#include <linux/kthread.h>
-+#include <linux/livepatch.h>
-+#include <linux/membarrier.h>
-+#include <linux/proc_fs.h>
-+#include <linux/psi.h>
-+#include <linux/slab.h>
-+#include <linux/stop_machine.h>
-+#include <linux/suspend.h>
-+#include <linux/swait.h>
-+#include <linux/syscalls.h>
-+#include <linux/tsacct_kern.h>
-+
-+#include <asm/tlb.h>
-+
-+#ifdef CONFIG_PARAVIRT
-+# include <asm/paravirt.h>
-+#endif
-+
-+#include "cpupri.h"
-+
-+/* task_struct::on_rq states: */
-+#define TASK_ON_RQ_QUEUED	1
-+#define TASK_ON_RQ_MIGRATING	2
-+
-+static inline int task_on_rq_queued(struct task_struct *p)
-+{
-+	return p->on_rq == TASK_ON_RQ_QUEUED;
-+}
-+
-+static inline int task_on_rq_migrating(struct task_struct *p)
-+{
-+	return READ_ONCE(p->on_rq) == TASK_ON_RQ_MIGRATING;
-+}
-+
-+/*
-+ * wake flags
-+ */
-+#define WF_SYNC		0x01		/* waker goes to sleep after wakeup */
-+#define WF_FORK		0x02		/* child wakeup after fork */
-+#define WF_MIGRATED	0x04		/* internal use, task got migrated */
-+
-+/*
-+ * This is the main, per-CPU runqueue data structure.
-+ * This data should only be modified by the local cpu.
-+ */
-+struct rq {
-+	/* runqueue lock: */
-+	raw_spinlock_t lock;
-+
-+	struct task_struct __rcu *curr;
-+	struct task_struct *idle, *stop;
-+	struct mm_struct *prev_mm;
-+
-+	struct skiplist_node sl_header;
-+
-+	/* switch count */
-+	u64 nr_switches;
-+
-+	atomic_t nr_iowait;
-+
-+#ifdef CONFIG_MEMBARRIER
-+	int membarrier_state;
-+#endif
-+
-+#ifdef CONFIG_SMP
-+	int cpu;		/* cpu of this runqueue */
-+	bool online;
-+
-+#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
-+	struct sched_avg	avg_irq;
-+#endif
-+#ifdef CONFIG_SCHED_THERMAL_PRESSURE
-+	struct sched_avg	avg_thermal;
-+#endif
-+
-+	unsigned long queued_level;
-+	unsigned long pending_level;
-+
-+#ifdef CONFIG_SCHED_SMT
-+	int active_balance;
-+	struct cpu_stop_work active_balance_work;
-+#endif
-+#endif /* CONFIG_SMP */
-+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-+	u64 prev_irq_time;
-+#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
-+#ifdef CONFIG_PARAVIRT
-+	u64 prev_steal_time;
-+#endif /* CONFIG_PARAVIRT */
-+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
-+	u64 prev_steal_time_rq;
-+#endif /* CONFIG_PARAVIRT_TIME_ACCOUNTING */
-+
-+	/* calc_load related fields */
-+	unsigned long calc_load_update;
-+	long calc_load_active;
-+
-+	u64 clock, last_tick;
-+	u64 clock_task;
-+	int dither;
-+
-+	unsigned long nr_running;
-+	unsigned long nr_uninterruptible;
-+
-+#ifdef CONFIG_SCHED_HRTICK
-+#ifdef CONFIG_SMP
-+	call_single_data_t hrtick_csd;
-+#endif
-+	struct hrtimer hrtick_timer;
-+#endif
-+
-+#ifdef CONFIG_SCHEDSTATS
-+
-+	/* latency stats */
-+	struct sched_info rq_sched_info;
-+	unsigned long long rq_cpu_time;
-+	/* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
-+
-+	/* sys_sched_yield() stats */
-+	unsigned int yld_count;
-+
-+	/* schedule() stats */
-+	unsigned int sched_switch;
-+	unsigned int sched_count;
-+	unsigned int sched_goidle;
-+
-+	/* try_to_wake_up() stats */
-+	unsigned int ttwu_count;
-+	unsigned int ttwu_local;
-+#endif /* CONFIG_SCHEDSTATS */
-+#ifdef CONFIG_CPU_IDLE
-+	/* Must be inspected within a rcu lock section */
-+	struct cpuidle_state *idle_state;
-+#endif
-+};
-+
-+extern unsigned long calc_load_update;
-+extern atomic_long_t calc_load_tasks;
-+
-+extern void calc_global_load_tick(struct rq *this_rq);
-+extern long calc_load_fold_active(struct rq *this_rq, long adjust);
-+
-+#ifndef CONFIG_SMP
-+extern struct rq *uprq;
-+#define cpu_rq(cpu)	(uprq)
-+#define this_rq()	(uprq)
-+#define raw_rq()	(uprq)
-+#define task_rq(p)	(uprq)
-+#define cpu_curr(cpu)	((uprq)->curr)
-+#else /* CONFIG_SMP */
-+DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
-+#define cpu_rq(cpu)		(&per_cpu(runqueues, (cpu)))
-+#define this_rq()		this_cpu_ptr(&runqueues)
-+#define raw_rq()		raw_cpu_ptr(&runqueues)
-+#define task_rq(p)		cpu_rq(task_cpu(p))
-+#define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
-+
-+#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
-+void register_sched_domain_sysctl(void);
-+void unregister_sched_domain_sysctl(void);
-+#else
-+static inline void register_sched_domain_sysctl(void)
-+{
-+}
-+static inline void unregister_sched_domain_sysctl(void)
-+{
-+}
-+#endif
-+
-+#endif /* CONFIG_SMP */
-+
-+#ifndef arch_scale_freq_tick
-+static __always_inline
-+void arch_scale_freq_tick(void)
-+{
-+}
-+#endif
-+
-+#ifndef arch_scale_freq_capacity
-+static __always_inline
-+unsigned long arch_scale_freq_capacity(int cpu)
-+{
-+		return SCHED_CAPACITY_SCALE;
-+}
-+#endif
-+
-+static inline u64 __rq_clock_broken(struct rq *rq)
-+{
-+	return READ_ONCE(rq->clock);
-+}
-+
-+static inline u64 rq_clock(struct rq *rq)
-+{
-+	/*
-+	 * Relax lockdep_assert_held() checking as in VRQ, call to
-+	 * sched_info_xxxx() may not held rq->lock
-+	 * lockdep_assert_held(&rq->lock);
-+	 */
-+	return rq->clock;
-+}
-+
-+static inline u64 rq_clock_task(struct rq *rq)
-+{
-+	/*
-+	 * Relax lockdep_assert_held() checking as in VRQ, call to
-+	 * sched_info_xxxx() may not held rq->lock
-+	 * lockdep_assert_held(&rq->lock);
-+	 */
-+	return rq->clock_task;
-+}
-+
-+/**
-+ * By default the decay is the default pelt decay period.
-+ * The decay shift can change the decay period in
-+ * multiples of 32.
-+ *  Decay shift		Decay period(ms)
-+ *	0			32
-+ *	1			64
-+ *	2			128
-+ *	3			256
-+ *	4			512
-+ */
-+extern int sched_thermal_decay_shift;
-+
-+static inline u64 rq_clock_thermal(struct rq *rq)
-+{
-+	return rq_clock_task(rq) >> sched_thermal_decay_shift;
-+}
-+
-+/*
-+ * {de,en}queue flags:
-+ *
-+ * DEQUEUE_SLEEP  - task is no longer runnable
-+ * ENQUEUE_WAKEUP - task just became runnable
-+ *
-+ */
-+
-+#define DEQUEUE_SLEEP		0x01
-+
-+#define ENQUEUE_WAKEUP		0x01
-+
-+
-+/*
-+ * Below are scheduler API which using in other kernel code
-+ * It use the dummy rq_flags
-+ * ToDo : PDS need to support these APIs for compatibility with mainline
-+ * scheduler code.
-+ */
-+struct rq_flags {
-+	unsigned long flags;
-+};
-+
-+struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
-+	__acquires(rq->lock);
-+
-+struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
-+	__acquires(p->pi_lock)
-+	__acquires(rq->lock);
-+
-+static inline void __task_rq_unlock(struct rq *rq, struct rq_flags *rf)
-+	__releases(rq->lock)
-+{
-+	raw_spin_unlock(&rq->lock);
-+}
-+
-+static inline void
-+task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
-+	__releases(rq->lock)
-+	__releases(p->pi_lock)
-+{
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
-+}
-+
-+static inline void
-+rq_unlock_irq(struct rq *rq, struct rq_flags *rf)
-+	__releases(rq->lock)
-+{
-+	raw_spin_unlock_irq(&rq->lock);
-+}
-+
-+static inline struct rq *
-+this_rq_lock_irq(struct rq_flags *rf)
-+	__acquires(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	local_irq_disable();
-+	rq = this_rq();
-+	raw_spin_lock(&rq->lock);
-+
-+	return rq;
-+}
-+
-+static inline int task_current(struct rq *rq, struct task_struct *p)
-+{
-+	return rq->curr == p;
-+}
-+
-+static inline bool task_running(struct task_struct *p)
-+{
-+	return p->on_cpu;
-+}
-+
-+extern struct static_key_false sched_schedstats;
-+
-+static inline void sched_ttwu_pending(void) { }
-+
-+#ifdef CONFIG_CPU_IDLE
-+static inline void idle_set_state(struct rq *rq,
-+				  struct cpuidle_state *idle_state)
-+{
-+	rq->idle_state = idle_state;
-+}
-+
-+static inline struct cpuidle_state *idle_get_state(struct rq *rq)
-+{
-+	WARN_ON(!rcu_read_lock_held());
-+	return rq->idle_state;
-+}
-+#else
-+static inline void idle_set_state(struct rq *rq,
-+				  struct cpuidle_state *idle_state)
-+{
-+}
-+
-+static inline struct cpuidle_state *idle_get_state(struct rq *rq)
-+{
-+	return NULL;
-+}
-+#endif
-+
-+static inline int cpu_of(const struct rq *rq)
-+{
-+#ifdef CONFIG_SMP
-+	return rq->cpu;
-+#else
-+	return 0;
-+#endif
-+}
-+
-+#include "stats.h"
-+
-+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-+struct irqtime {
-+	u64			total;
-+	u64			tick_delta;
-+	u64			irq_start_time;
-+	struct u64_stats_sync	sync;
-+};
-+
-+DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
-+
-+/*
-+ * Returns the irqtime minus the softirq time computed by ksoftirqd.
-+ * Otherwise ksoftirqd's sum_exec_runtime is substracted its own runtime
-+ * and never move forward.
-+ */
-+static inline u64 irq_time_read(int cpu)
-+{
-+	struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu);
-+	unsigned int seq;
-+	u64 total;
-+
-+	do {
-+		seq = __u64_stats_fetch_begin(&irqtime->sync);
-+		total = irqtime->total;
-+	} while (__u64_stats_fetch_retry(&irqtime->sync, seq));
-+
-+	return total;
-+}
-+#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
-+
-+#ifdef CONFIG_CPU_FREQ
-+DECLARE_PER_CPU(struct update_util_data __rcu *, cpufreq_update_util_data);
-+
-+/**
-+ * cpufreq_update_util - Take a note about CPU utilization changes.
-+ * @rq: Runqueue to carry out the update for.
-+ * @flags: Update reason flags.
-+ *
-+ * This function is called by the scheduler on the CPU whose utilization is
-+ * being updated.
-+ *
-+ * It can only be called from RCU-sched read-side critical sections.
-+ *
-+ * The way cpufreq is currently arranged requires it to evaluate the CPU
-+ * performance state (frequency/voltage) on a regular basis to prevent it from
-+ * being stuck in a completely inadequate performance level for too long.
-+ * That is not guaranteed to happen if the updates are only triggered from CFS
-+ * and DL, though, because they may not be coming in if only RT tasks are
-+ * active all the time (or there are RT tasks only).
-+ *
-+ * As a workaround for that issue, this function is called periodically by the
-+ * RT sched class to trigger extra cpufreq updates to prevent it from stalling,
-+ * but that really is a band-aid.  Going forward it should be replaced with
-+ * solutions targeted more specifically at RT tasks.
-+ */
-+static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
-+{
-+	struct update_util_data *data;
-+
-+	data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data));
-+	if (data)
-+		data->func(data, rq_clock(rq), flags);
-+}
-+
-+static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags)
-+{
-+	if (cpu_of(rq) == smp_processor_id())
-+		cpufreq_update_util(rq, flags);
-+}
-+#else
-+static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
-+static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags) {}
-+#endif /* CONFIG_CPU_FREQ */
-+
-+#ifdef CONFIG_NO_HZ_FULL
-+extern int __init sched_tick_offload_init(void);
-+#else
-+static inline int sched_tick_offload_init(void) { return 0; }
-+#endif
-+
-+#ifdef arch_scale_freq_capacity
-+#ifndef arch_scale_freq_invariant
-+#define arch_scale_freq_invariant()	(true)
-+#endif
-+#else /* arch_scale_freq_capacity */
-+#define arch_scale_freq_invariant()	(false)
-+#endif
-+
-+extern void schedule_idle(void);
-+
-+/*
-+ * !! For sched_setattr_nocheck() (kernel) only !!
-+ *
-+ * This is actually gross. :(
-+ *
-+ * It is used to make schedutil kworker(s) higher priority than SCHED_DEADLINE
-+ * tasks, but still be able to sleep. We need this on platforms that cannot
-+ * atomically change clock frequency. Remove once fast switching will be
-+ * available on such platforms.
-+ *
-+ * SUGOV stands for SchedUtil GOVernor.
-+ */
-+#define SCHED_FLAG_SUGOV	0x10000000
-+
-+#ifdef CONFIG_MEMBARRIER
-+/*
-+ * The scheduler provides memory barriers required by membarrier between:
-+ * - prior user-space memory accesses and store to rq->membarrier_state,
-+ * - store to rq->membarrier_state and following user-space memory accesses.
-+ * In the same way it provides those guarantees around store to rq->curr.
-+ */
-+static inline void membarrier_switch_mm(struct rq *rq,
-+					struct mm_struct *prev_mm,
-+					struct mm_struct *next_mm)
-+{
-+	int membarrier_state;
-+
-+	if (prev_mm == next_mm)
-+		return;
-+
-+	membarrier_state = atomic_read(&next_mm->membarrier_state);
-+	if (READ_ONCE(rq->membarrier_state) == membarrier_state)
-+		return;
-+
-+	WRITE_ONCE(rq->membarrier_state, membarrier_state);
-+}
-+#else
-+static inline void membarrier_switch_mm(struct rq *rq,
-+					struct mm_struct *prev_mm,
-+					struct mm_struct *next_mm)
-+{
-+}
-+#endif
-+
-+#ifdef CONFIG_NUMA
-+extern int sched_numa_find_closest(const struct cpumask *cpus, int cpu);
-+#else
-+static inline int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
-+{
-+	return nr_cpu_ids;
-+}
-+#endif
-+
-+void swake_up_all_locked(struct swait_queue_head *q);
-+void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
-+
-+#endif /* PDS_SCHED_H */
-diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c
-index b647d04d9c8b..05b6cfd91842 100644
---- a/kernel/sched/pelt.c
-+++ b/kernel/sched/pelt.c
-@@ -250,6 +250,7 @@ ___update_load_avg(struct sched_avg *sa, unsigned long load)
- 	WRITE_ONCE(sa->util_avg, sa->util_sum / divider);
- }
- 
-+#ifndef CONFIG_SCHED_PDS
- /*
-  * sched_entity:
-  *
-@@ -367,6 +368,7 @@ int update_dl_rq_load_avg(u64 now, struct rq *rq, int running)
- 
- 	return 0;
- }
-+#endif
- 
- #ifdef CONFIG_SCHED_THERMAL_PRESSURE
- /*
-diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h
-index eb034d9f024d..a074572f2976 100644
---- a/kernel/sched/pelt.h
-+++ b/kernel/sched/pelt.h
-@@ -1,11 +1,13 @@
- #ifdef CONFIG_SMP
- #include "sched-pelt.h"
- 
-+#ifndef CONFIG_SCHED_PDS
- int __update_load_avg_blocked_se(u64 now, struct sched_entity *se);
- int __update_load_avg_se(u64 now, struct cfs_rq *cfs_rq, struct sched_entity *se);
- int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq);
- int update_rt_rq_load_avg(u64 now, struct rq *rq, int running);
- int update_dl_rq_load_avg(u64 now, struct rq *rq, int running);
-+#endif
- 
- #ifdef CONFIG_SCHED_THERMAL_PRESSURE
- int update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity);
-@@ -37,6 +39,7 @@ update_irq_load_avg(struct rq *rq, u64 running)
- }
- #endif
- 
-+#ifndef CONFIG_SCHED_PDS
- /*
-  * When a task is dequeued, its estimated utilization should not be update if
-  * its util_avg has not been updated at least once.
-@@ -157,9 +160,11 @@ static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq)
- 	return rq_clock_pelt(rq_of(cfs_rq));
- }
- #endif
-+#endif /* CONFIG_SCHED_PDS */
- 
- #else
- 
-+#ifndef CONFIG_SCHED_PDS
- static inline int
- update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
- {
-@@ -188,6 +193,7 @@ static inline u64 thermal_load_avg(struct rq *rq)
- {
- 	return 0;
- }
-+#endif
- 
- static inline int
- update_irq_load_avg(struct rq *rq, u64 running)
-diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
-index db3a57675ccf..5a8060bd2343 100644
---- a/kernel/sched/sched.h
-+++ b/kernel/sched/sched.h
-@@ -2,6 +2,10 @@
- /*
-  * Scheduler internal types and methods:
-  */
-+#ifdef CONFIG_SCHED_PDS
-+#include "pds_sched.h"
-+#else
-+
- #include <linux/sched.h>
- 
- #include <linux/sched/autogroup.h>
-@@ -2546,3 +2550,5 @@ static inline bool is_per_cpu_kthread(struct task_struct *p)
- 
- void swake_up_all_locked(struct swait_queue_head *q);
- void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
-+
-+#endif /* !CONFIG_SCHED_PDS */
-diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c
-index 750fb3c67eed..45bd43942575 100644
---- a/kernel/sched/stats.c
-+++ b/kernel/sched/stats.c
-@@ -22,8 +22,10 @@ static int show_schedstat(struct seq_file *seq, void *v)
- 	} else {
- 		struct rq *rq;
- #ifdef CONFIG_SMP
-+#ifndef CONFIG_SCHED_PDS
- 		struct sched_domain *sd;
- 		int dcount = 0;
-+#endif
- #endif
- 		cpu = (unsigned long)(v - 2);
- 		rq = cpu_rq(cpu);
-@@ -40,6 +42,7 @@ static int show_schedstat(struct seq_file *seq, void *v)
- 		seq_printf(seq, "\n");
- 
- #ifdef CONFIG_SMP
-+#ifndef CONFIG_SCHED_PDS
- 		/* domain-specific stats */
- 		rcu_read_lock();
- 		for_each_domain(cpu, sd) {
-@@ -68,6 +71,7 @@ static int show_schedstat(struct seq_file *seq, void *v)
- 			    sd->ttwu_move_balance);
- 		}
- 		rcu_read_unlock();
-+#endif
- #endif
- 	}
- 	return 0;
-diff --git a/kernel/sysctl.c b/kernel/sysctl.c
-index 8a176d8727a3..b9dde576b576 100644
---- a/kernel/sysctl.c
-+++ b/kernel/sysctl.c
-@@ -130,8 +130,12 @@ static int __maybe_unused four = 4;
- static unsigned long zero_ul;
- static unsigned long one_ul = 1;
- static unsigned long long_max = LONG_MAX;
--static int one_hundred = 100;
--static int one_thousand = 1000;
-+static int __read_mostly one_hundred = 100;
-+static int __read_mostly one_thousand = 1000;
-+#ifdef CONFIG_SCHED_PDS
-+extern int rr_interval;
-+extern int sched_yield_type;
-+#endif
- #ifdef CONFIG_PRINTK
- static int ten_thousand = 10000;
- #endif
-@@ -288,7 +292,7 @@ static struct ctl_table sysctl_base_table[] = {
- 	{ }
- };
- 
--#ifdef CONFIG_SCHED_DEBUG
-+#if defined(CONFIG_SCHED_DEBUG) && !defined(CONFIG_SCHED_PDS)
- static int min_sched_granularity_ns = 100000;		/* 100 usecs */
- static int max_sched_granularity_ns = NSEC_PER_SEC;	/* 1 second */
- static int min_wakeup_granularity_ns;			/* 0 usecs */
-@@ -305,6 +309,7 @@ static int max_extfrag_threshold = 1000;
- #endif
- 
- static struct ctl_table kern_table[] = {
-+#ifndef CONFIG_SCHED_PDS
- 	{
- 		.procname	= "sched_child_runs_first",
- 		.data		= &sysctl_sched_child_runs_first,
-@@ -486,6 +491,7 @@ static struct ctl_table kern_table[] = {
- 		.extra2		= SYSCTL_ONE,
- 	},
- #endif
-+#endif /* !CONFIG_SCHED_PDS */
- #ifdef CONFIG_PROVE_LOCKING
- 	{
- 		.procname	= "prove_locking",
-@@ -1049,6 +1055,26 @@ static struct ctl_table kern_table[] = {
- 		.proc_handler	= proc_dointvec,
- 	},
- #endif
-+#ifdef CONFIG_SCHED_PDS
-+	{
-+		.procname	= "rr_interval",
-+		.data		= &rr_interval,
-+		.maxlen		= sizeof (int),
-+		.mode		= 0644,
-+		.proc_handler	= &proc_dointvec_minmax,
-+		.extra1		= SYSCTL_ONE,
-+		.extra2		= &one_thousand,
-+	},
-+	{
-+		.procname	= "yield_type",
-+		.data		= &sched_yield_type,
-+		.maxlen		= sizeof (int),
-+		.mode		= 0644,
-+		.proc_handler	= &proc_dointvec_minmax,
-+		.extra1		= SYSCTL_ZERO,
-+		.extra2		= &two,
-+	},
-+#endif
- #if defined(CONFIG_S390) && defined(CONFIG_SMP)
- 	{
- 		.procname	= "spin_retry",
-diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
-index 2fd3b3fa68bf..6f3b08afdd4c 100644
---- a/kernel/time/posix-cpu-timers.c
-+++ b/kernel/time/posix-cpu-timers.c
-@@ -236,7 +236,7 @@ static void task_sample_cputime(struct task_struct *p, u64 *samples)
- 	u64 stime, utime;
- 
- 	task_cputime(p, &utime, &stime);
--	store_samples(samples, stime, utime, p->se.sum_exec_runtime);
-+	store_samples(samples, stime, utime, tsk_seruntime(p));
- }
- 
- static void proc_sample_cputime_atomic(struct task_cputime_atomic *at,
-@@ -806,6 +806,7 @@ static void collect_posix_cputimers(struct posix_cputimers *pct, u64 *samples,
- 	}
- }
- 
-+#ifndef CONFIG_SCHED_PDS
- static inline void check_dl_overrun(struct task_struct *tsk)
- {
- 	if (tsk->dl.dl_overrun) {
-@@ -813,6 +814,7 @@ static inline void check_dl_overrun(struct task_struct *tsk)
- 		__group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
- 	}
- }
-+#endif
- 
- static bool check_rlimit(u64 time, u64 limit, int signo, bool rt, bool hard)
- {
-@@ -840,8 +842,10 @@ static void check_thread_timers(struct task_struct *tsk,
- 	u64 samples[CPUCLOCK_MAX];
- 	unsigned long soft;
- 
-+#ifndef CONFIG_SCHED_PDS
- 	if (dl_task(tsk))
- 		check_dl_overrun(tsk);
-+#endif
- 
- 	if (expiry_cache_is_inactive(pct))
- 		return;
-@@ -855,7 +859,7 @@ static void check_thread_timers(struct task_struct *tsk,
- 	soft = task_rlimit(tsk, RLIMIT_RTTIME);
- 	if (soft != RLIM_INFINITY) {
- 		/* Task RT timeout is accounted in jiffies. RTTIME is usec */
--		unsigned long rttime = tsk->rt.timeout * (USEC_PER_SEC / HZ);
-+		unsigned long rttime = tsk_rttimeout(tsk) * (USEC_PER_SEC / HZ);
- 		unsigned long hard = task_rlimit_max(tsk, RLIMIT_RTTIME);
- 
- 		/* At the hard limit, send SIGKILL. No further action. */
-@@ -1091,8 +1095,10 @@ static inline bool fastpath_timer_check(struct task_struct *tsk)
- 			return true;
- 	}
- 
-+#ifndef CONFIG_SCHED_PDS
- 	if (dl_task(tsk) && tsk->dl.dl_overrun)
- 		return true;
-+#endif
- 
- 	return false;
- }
-diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
-index b5e3496cf803..0816db0b9c16 100644
---- a/kernel/trace/trace_selftest.c
-+++ b/kernel/trace/trace_selftest.c
-@@ -1048,10 +1048,15 @@ static int trace_wakeup_test_thread(void *data)
- {
- 	/* Make this a -deadline thread */
- 	static const struct sched_attr attr = {
-+#ifdef CONFIG_SCHED_PDS
-+		/* No deadline on BFS, use RR */
-+		.sched_policy = SCHED_RR,
-+#else
- 		.sched_policy = SCHED_DEADLINE,
- 		.sched_runtime = 100000ULL,
- 		.sched_deadline = 10000000ULL,
- 		.sched_period = 10000000ULL
-+#endif
- 	};
- 	struct wakeup_test_data *x = data;
- 
diff --git a/linux57-tkg/linux57-tkg-patches/0006-add-acs-overrides_iommu.patch b/linux57-tkg/linux57-tkg-patches/0006-add-acs-overrides_iommu.patch
deleted file mode 100644
index d1303a5..0000000
--- a/linux57-tkg/linux57-tkg-patches/0006-add-acs-overrides_iommu.patch
+++ /dev/null
@@ -1,193 +0,0 @@
-From cdeab384f48dd9c88e2dff2e9ad8d57dca1a1b1c Mon Sep 17 00:00:00 2001
-From: Mark Weiman <mark.weiman@markzz.com>
-Date: Sun, 12 Aug 2018 11:36:21 -0400
-Subject: [PATCH] pci: Enable overrides for missing ACS capabilities
-
-This an updated version of Alex Williamson's patch from:
-https://lkml.org/lkml/2013/5/30/513
-
-Original commit message follows:
-
-PCIe ACS (Access Control Services) is the PCIe 2.0+ feature that
-allows us to control whether transactions are allowed to be redirected
-in various subnodes of a PCIe topology.  For instance, if two
-endpoints are below a root port or downsteam switch port, the
-downstream port may optionally redirect transactions between the
-devices, bypassing upstream devices.  The same can happen internally
-on multifunction devices.  The transaction may never be visible to the
-upstream devices.
-
-One upstream device that we particularly care about is the IOMMU.  If
-a redirection occurs in the topology below the IOMMU, then the IOMMU
-cannot provide isolation between devices.  This is why the PCIe spec
-encourages topologies to include ACS support.  Without it, we have to
-assume peer-to-peer DMA within a hierarchy can bypass IOMMU isolation.
-
-Unfortunately, far too many topologies do not support ACS to make this
-a steadfast requirement.  Even the latest chipsets from Intel are only
-sporadically supporting ACS.  We have trouble getting interconnect
-vendors to include the PCIe spec required PCIe capability, let alone
-suggested features.
-
-Therefore, we need to add some flexibility.  The pcie_acs_override=
-boot option lets users opt-in specific devices or sets of devices to
-assume ACS support.  The "downstream" option assumes full ACS support
-on root ports and downstream switch ports.  The "multifunction"
-option assumes the subset of ACS features available on multifunction
-endpoints and upstream switch ports are supported.  The "id:nnnn:nnnn"
-option enables ACS support on devices matching the provided vendor
-and device IDs, allowing more strategic ACS overrides.  These options
-may be combined in any order.  A maximum of 16 id specific overrides
-are available.  It's suggested to use the most limited set of options
-necessary to avoid completely disabling ACS across the topology.
-Note to hardware vendors, we have facilities to permanently quirk
-specific devices which enforce isolation but not provide an ACS
-capability.  Please contact me to have your devices added and save
-your customers the hassle of this boot option.
-
-Signed-off-by: Mark Weiman <mark.weiman@markzz.com>
----
- .../admin-guide/kernel-parameters.txt         |   9 ++
- drivers/pci/quirks.c                          | 101 ++++++++++++++++++
- 2 files changed, 110 insertions(+)
-
-diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
-index aefd358a5ca3..173b3596fd9e 100644
---- a/Documentation/admin-guide/kernel-parameters.txt
-+++ b/Documentation/admin-guide/kernel-parameters.txt
-@@ -3190,6 +3190,15 @@
- 		nomsi		[MSI] If the PCI_MSI kernel config parameter is
- 				enabled, this kernel boot option can be used to
- 				disable the use of MSI interrupts system-wide.
-+		pcie_acs_override =
-+					[PCIE] Override missing PCIe ACS support for:
-+				downstream
-+					All downstream ports - full ACS capabilities
-+				multifunction
-+					All multifunction devices - multifunction ACS subset
-+				id:nnnn:nnnn
-+					Specific device - full ACS capabilities
-+					Specified as vid:did (vendor/device ID) in hex
- 		noioapicquirk	[APIC] Disable all boot interrupt quirks.
- 				Safety option to keep boot IRQs enabled. This
- 				should never be necessary.
-diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
-index 4700d24e5d55..8f7a3d7fd9c1 100644
---- a/drivers/pci/quirks.c
-+++ b/drivers/pci/quirks.c
-@@ -3372,6 +3372,106 @@ static void quirk_no_bus_reset(struct pci_dev *dev)
- 	dev->dev_flags |= PCI_DEV_FLAGS_NO_BUS_RESET;
- }
- 
-+static bool acs_on_downstream;
-+static bool acs_on_multifunction;
-+
-+#define NUM_ACS_IDS 16
-+struct acs_on_id {
-+	unsigned short vendor;
-+	unsigned short device;
-+};
-+static struct acs_on_id acs_on_ids[NUM_ACS_IDS];
-+static u8 max_acs_id;
-+
-+static __init int pcie_acs_override_setup(char *p)
-+{
-+	if (!p)
-+		return -EINVAL;
-+
-+	while (*p) {
-+		if (!strncmp(p, "downstream", 10))
-+			acs_on_downstream = true;
-+		if (!strncmp(p, "multifunction", 13))
-+			acs_on_multifunction = true;
-+		if (!strncmp(p, "id:", 3)) {
-+			char opt[5];
-+			int ret;
-+			long val;
-+
-+			if (max_acs_id >= NUM_ACS_IDS - 1) {
-+				pr_warn("Out of PCIe ACS override slots (%d)\n",
-+						NUM_ACS_IDS);
-+				goto next;
-+			}
-+
-+			p += 3;
-+			snprintf(opt, 5, "%s", p);
-+			ret = kstrtol(opt, 16, &val);
-+			if (ret) {
-+				pr_warn("PCIe ACS ID parse error %d\n", ret);
-+				goto next;
-+			}
-+			acs_on_ids[max_acs_id].vendor = val;
-+
-+			p += strcspn(p, ":");
-+			if (*p != ':') {
-+				pr_warn("PCIe ACS invalid ID\n");
-+				goto next;
-+			}
-+
-+			p++;
-+			snprintf(opt, 5, "%s", p);
-+			ret = kstrtol(opt, 16, &val);
-+			if (ret) {
-+				pr_warn("PCIe ACS ID parse error %d\n", ret);
-+				goto next;
-+			}
-+			acs_on_ids[max_acs_id].device = val;
-+			max_acs_id++;
-+		}
-+next:
-+		p += strcspn(p, ",");
-+		if (*p == ',')
-+			p++;
-+	}
-+
-+	if (acs_on_downstream || acs_on_multifunction || max_acs_id)
-+		pr_warn("Warning: PCIe ACS overrides enabled; This may allow non-IOMMU protected peer-to-peer DMA\n");
-+
-+	return 0;
-+}
-+early_param("pcie_acs_override", pcie_acs_override_setup);
-+
-+static int pcie_acs_overrides(struct pci_dev *dev, u16 acs_flags)
-+{
-+	int i;
-+
-+	/* Never override ACS for legacy devices or devices with ACS caps */
-+	if (!pci_is_pcie(dev) ||
-+		pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS))
-+			return -ENOTTY;
-+
-+	for (i = 0; i < max_acs_id; i++)
-+		if (acs_on_ids[i].vendor == dev->vendor &&
-+			acs_on_ids[i].device == dev->device)
-+				return 1;
-+
-+	switch (pci_pcie_type(dev)) {
-+	case PCI_EXP_TYPE_DOWNSTREAM:
-+	case PCI_EXP_TYPE_ROOT_PORT:
-+		if (acs_on_downstream)
-+			return 1;
-+		break;
-+	case PCI_EXP_TYPE_ENDPOINT:
-+	case PCI_EXP_TYPE_UPSTREAM:
-+	case PCI_EXP_TYPE_LEG_END:
-+	case PCI_EXP_TYPE_RC_END:
-+		if (acs_on_multifunction && dev->multifunction)
-+			return 1;
-+	}
-+
-+	return -ENOTTY;
-+}
- /*
-  * Some Atheros AR9xxx and QCA988x chips do not behave after a bus reset.
-  * The device will throw a Link Down error on AER-capable systems and
-@@ -4513,6 +4613,7 @@ static const struct pci_dev_acs_enabled {
- 	{ PCI_VENDOR_ID_ZHAOXIN, 0x9083, pci_quirk_mf_endpoint_acs },
- 	/* Zhaoxin Root/Downstream Ports */
- 	{ PCI_VENDOR_ID_ZHAOXIN, PCI_ANY_ID, pci_quirk_zhaoxin_pcie_ports_acs },
-+ 	{ PCI_ANY_ID, PCI_ANY_ID, pcie_acs_overrides },
- 	{ 0 }
- };
- 
-
diff --git a/linux57-tkg/linux57-tkg-patches/0007-v5.7-fsync.patch b/linux57-tkg/linux57-tkg-patches/0007-v5.7-fsync.patch
deleted file mode 100644
index 01c86d8..0000000
--- a/linux57-tkg/linux57-tkg-patches/0007-v5.7-fsync.patch
+++ /dev/null
@@ -1,908 +0,0 @@
-From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
-From: Tk-Glitch <ti3nou@gmail.com>
-Date: Mon, 20 Apr 2020 14:09:11 +0200
-Subject: Import Fsync v3 patchset - Squashed from https://gitlab.collabora.com/tonyk/linux/-/commits/futex-proton-v3
-
-diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h
-index a89eb0accd5e2ee527be1e3e11b1117ff5bf94b4..580001e89c6caed57dd8b3cb491d65dce846caff 100644
---- a/include/uapi/linux/futex.h
-+++ b/include/uapi/linux/futex.h
-@@ -21,6 +21,7 @@
- #define FUTEX_WAKE_BITSET	10
- #define FUTEX_WAIT_REQUEUE_PI	11
- #define FUTEX_CMP_REQUEUE_PI	12
-+#define FUTEX_WAIT_MULTIPLE	13
- 
- #define FUTEX_PRIVATE_FLAG	128
- #define FUTEX_CLOCK_REALTIME	256
-@@ -40,6 +41,8 @@
- 					 FUTEX_PRIVATE_FLAG)
- #define FUTEX_CMP_REQUEUE_PI_PRIVATE	(FUTEX_CMP_REQUEUE_PI | \
- 					 FUTEX_PRIVATE_FLAG)
-+#define FUTEX_WAIT_MULTIPLE_PRIVATE	(FUTEX_WAIT_MULTIPLE | \
-+					 FUTEX_PRIVATE_FLAG)
- 
- /*
-  * Support for robust futexes: the kernel cleans up held futexes at
-@@ -150,4 +153,21 @@ struct robust_list_head {
-   (((op & 0xf) << 28) | ((cmp & 0xf) << 24)		\
-    | ((oparg & 0xfff) << 12) | (cmparg & 0xfff))
- 
-+/*
-+ * Maximum number of multiple futexes to wait for
-+ */
-+#define FUTEX_MULTIPLE_MAX_COUNT	128
-+
-+/**
-+ * struct futex_wait_block - Block of futexes to be waited for
-+ * @uaddr:	User address of the futex
-+ * @val:	Futex value expected by userspace
-+ * @bitset:	Bitset for the optional bitmasked wakeup
-+ */
-+struct futex_wait_block {
-+	__u32 __user *uaddr;
-+	__u32 val;
-+	__u32 bitset;
-+};
-+
- #endif /* _UAPI_LINUX_FUTEX_H */
-diff --git a/kernel/futex.c b/kernel/futex.c
-index 0cf84c8664f207c574325b899ef2e57f01295a94..58cf9eb2b851b4858e29b5ef4114a29a92e676ba 100644
---- a/kernel/futex.c
-+++ b/kernel/futex.c
-@@ -215,6 +215,8 @@ struct futex_pi_state {
-  * @rt_waiter:		rt_waiter storage for use with requeue_pi
-  * @requeue_pi_key:	the requeue_pi target futex key
-  * @bitset:		bitset for the optional bitmasked wakeup
-+ * @uaddr:             userspace address of futex
-+ * @uval:              expected futex's value
-  *
-  * We use this hashed waitqueue, instead of a normal wait_queue_entry_t, so
-  * we can wake only the relevant ones (hashed queues may be shared).
-@@ -237,6 +239,8 @@ struct futex_q {
- 	struct rt_mutex_waiter *rt_waiter;
- 	union futex_key *requeue_pi_key;
- 	u32 bitset;
-+	u32 __user *uaddr;
-+	u32 uval;
- } __randomize_layout;
- 
- static const struct futex_q futex_q_init = {
-@@ -2420,6 +2424,29 @@ static int unqueue_me(struct futex_q *q)
- 	return ret;
- }
- 
-+/**
-+ * unqueue_multiple() - Remove several futexes from their futex_hash_bucket
-+ * @q:	The list of futexes to unqueue
-+ * @count: Number of futexes in the list
-+ *
-+ * Helper to unqueue a list of futexes. This can't fail.
-+ *
-+ * Return:
-+ *  - >=0 - Index of the last futex that was awoken;
-+ *  - -1  - If no futex was awoken
-+ */
-+static int unqueue_multiple(struct futex_q *q, int count)
-+{
-+	int ret = -1;
-+	int i;
-+
-+	for (i = 0; i < count; i++) {
-+		if (!unqueue_me(&q[i]))
-+			ret = i;
-+	}
-+	return ret;
-+}
-+
- /*
-  * PI futexes can not be requeued and must remove themself from the
-  * hash bucket. The hash bucket lock (i.e. lock_ptr) is held on entry
-@@ -2783,6 +2810,211 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
- 	return ret;
- }
- 
-+/**
-+ * futex_wait_multiple_setup() - Prepare to wait and enqueue multiple futexes
-+ * @qs:		The corresponding futex list
-+ * @count:	The size of the lists
-+ * @flags:	Futex flags (FLAGS_SHARED, etc.)
-+ * @awaken:	Index of the last awoken futex
-+ *
-+ * Prepare multiple futexes in a single step and enqueue them. This may fail if
-+ * the futex list is invalid or if any futex was already awoken. On success the
-+ * task is ready to interruptible sleep.
-+ *
-+ * Return:
-+ *  -  1 - One of the futexes was awaken by another thread
-+ *  -  0 - Success
-+ *  - <0 - -EFAULT, -EWOULDBLOCK or -EINVAL
-+ */
-+static int futex_wait_multiple_setup(struct futex_q *qs, int count,
-+				     unsigned int flags, int *awaken)
-+{
-+	struct futex_hash_bucket *hb;
-+	int ret, i;
-+	u32 uval;
-+
-+	/*
-+	 * Enqueuing multiple futexes is tricky, because we need to
-+	 * enqueue each futex in the list before dealing with the next
-+	 * one to avoid deadlocking on the hash bucket.  But, before
-+	 * enqueuing, we need to make sure that current->state is
-+	 * TASK_INTERRUPTIBLE, so we don't absorb any awake events, which
-+	 * cannot be done before the get_futex_key of the next key,
-+	 * because it calls get_user_pages, which can sleep.  Thus, we
-+	 * fetch the list of futexes keys in two steps, by first pinning
-+	 * all the memory keys in the futex key, and only then we read
-+	 * each key and queue the corresponding futex.
-+	 */
-+retry:
-+	for (i = 0; i < count; i++) {
-+		qs[i].key = FUTEX_KEY_INIT;
-+		ret = get_futex_key(qs[i].uaddr, flags & FLAGS_SHARED,
-+				    &qs[i].key, FUTEX_READ);
-+		if (unlikely(ret)) {
-+			for (--i; i >= 0; i--)
-+				put_futex_key(&qs[i].key);
-+			return ret;
-+		}
-+	}
-+
-+	set_current_state(TASK_INTERRUPTIBLE);
-+
-+	for (i = 0; i < count; i++) {
-+		struct futex_q *q = &qs[i];
-+
-+		hb = queue_lock(q);
-+
-+		ret = get_futex_value_locked(&uval, q->uaddr);
-+		if (ret) {
-+			/*
-+			 * We need to try to handle the fault, which
-+			 * cannot be done without sleep, so we need to
-+			 * undo all the work already done, to make sure
-+			 * we don't miss any wake ups.  Therefore, clean
-+			 * up, handle the fault and retry from the
-+			 * beginning.
-+			 */
-+			queue_unlock(hb);
-+
-+			/*
-+			 * Keys 0..(i-1) are implicitly put
-+			 * on unqueue_multiple.
-+			 */
-+			put_futex_key(&q->key);
-+
-+			*awaken = unqueue_multiple(qs, i);
-+
-+			__set_current_state(TASK_RUNNING);
-+
-+			/*
-+			 * On a real fault, prioritize the error even if
-+			 * some other futex was awoken.  Userspace gave
-+			 * us a bad address, -EFAULT them.
-+			 */
-+			ret = get_user(uval, q->uaddr);
-+			if (ret)
-+				return ret;
-+
-+			/*
-+			 * Even if the page fault was handled, If
-+			 * something was already awaken, we can safely
-+			 * give up and succeed to give a hint for userspace to
-+			 * acquire the right futex faster.
-+			 */
-+			if (*awaken >= 0)
-+				return 1;
-+
-+			goto retry;
-+		}
-+
-+		if (uval != q->uval) {
-+			queue_unlock(hb);
-+
-+			put_futex_key(&qs[i].key);
-+
-+			/*
-+			 * If something was already awaken, we can
-+			 * safely ignore the error and succeed.
-+			 */
-+			*awaken = unqueue_multiple(qs, i);
-+			__set_current_state(TASK_RUNNING);
-+			if (*awaken >= 0)
-+				return 1;
-+
-+			return -EWOULDBLOCK;
-+		}
-+
-+		/*
-+		 * The bucket lock can't be held while dealing with the
-+		 * next futex. Queue each futex at this moment so hb can
-+		 * be unlocked.
-+		 */
-+		queue_me(&qs[i], hb);
-+	}
-+	return 0;
-+}
-+
-+/**
-+ * futex_wait_multiple() - Prepare to wait on and enqueue several futexes
-+ * @qs:		The list of futexes to wait on
-+ * @op:		Operation code from futex's syscall
-+ * @count:	The number of objects
-+ * @abs_time:	Timeout before giving up and returning to userspace
-+ *
-+ * Entry point for the FUTEX_WAIT_MULTIPLE futex operation, this function
-+ * sleeps on a group of futexes and returns on the first futex that
-+ * triggered, or after the timeout has elapsed.
-+ *
-+ * Return:
-+ *  - >=0 - Hint to the futex that was awoken
-+ *  - <0  - On error
-+ */
-+static int futex_wait_multiple(struct futex_q *qs, int op,
-+			       u32 count, ktime_t *abs_time)
-+{
-+	struct hrtimer_sleeper timeout, *to;
-+	int ret, flags = 0, hint = 0;
-+	unsigned int i;
-+
-+	if (!(op & FUTEX_PRIVATE_FLAG))
-+		flags |= FLAGS_SHARED;
-+
-+	if (op & FUTEX_CLOCK_REALTIME)
-+		flags |= FLAGS_CLOCKRT;
-+
-+	to = futex_setup_timer(abs_time, &timeout, flags, 0);
-+	while (1) {
-+		ret = futex_wait_multiple_setup(qs, count, flags, &hint);
-+		if (ret) {
-+			if (ret > 0) {
-+				/* A futex was awaken during setup */
-+				ret = hint;
-+			}
-+			break;
-+		}
-+
-+		if (to)
-+			hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS);
-+
-+		/*
-+		 * Avoid sleeping if another thread already tried to
-+		 * wake us.
-+		 */
-+		for (i = 0; i < count; i++) {
-+			if (plist_node_empty(&qs[i].list))
-+				break;
-+		}
-+
-+		if (i == count && (!to || to->task))
-+			freezable_schedule();
-+
-+		ret = unqueue_multiple(qs, count);
-+
-+		__set_current_state(TASK_RUNNING);
-+
-+		if (ret >= 0)
-+			break;
-+		if (to && !to->task) {
-+			ret = -ETIMEDOUT;
-+			break;
-+		} else if (signal_pending(current)) {
-+			ret = -ERESTARTSYS;
-+			break;
-+		}
-+		/*
-+		 * The final case is a spurious wakeup, for
-+		 * which just retry.
-+		 */
-+	}
-+
-+	if (to) {
-+		hrtimer_cancel(&to->timer);
-+		destroy_hrtimer_on_stack(&to->timer);
-+	}
-+
-+	return ret;
-+}
-+
- static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
- 		      ktime_t *abs_time, u32 bitset)
- {
-@@ -3907,6 +4139,43 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
- 	return -ENOSYS;
- }
- 
-+/**
-+ * futex_read_wait_block - Read an array of futex_wait_block from userspace
-+ * @uaddr:	Userspace address of the block
-+ * @count:	Number of blocks to be read
-+ *
-+ * This function creates and allocate an array of futex_q (we zero it to
-+ * initialize the fields) and then, for each futex_wait_block element from
-+ * userspace, fill a futex_q element with proper values.
-+ */
-+inline struct futex_q *futex_read_wait_block(u32 __user *uaddr, u32 count)
-+{
-+	unsigned int i;
-+	struct futex_q *qs;
-+	struct futex_wait_block fwb;
-+	struct futex_wait_block __user *entry =
-+		(struct futex_wait_block __user *)uaddr;
-+
-+	if (!count || count > FUTEX_MULTIPLE_MAX_COUNT)
-+		return ERR_PTR(-EINVAL);
-+
-+	qs = kcalloc(count, sizeof(*qs), GFP_KERNEL);
-+	if (!qs)
-+		return ERR_PTR(-ENOMEM);
-+
-+	for (i = 0; i < count; i++) {
-+		if (copy_from_user(&fwb, &entry[i], sizeof(fwb))) {
-+			kfree(qs);
-+			return ERR_PTR(-EFAULT);
-+		}
-+
-+		qs[i].uaddr = fwb.uaddr;
-+		qs[i].uval = fwb.val;
-+		qs[i].bitset = fwb.bitset;
-+	}
-+
-+	return qs;
-+}
- 
- SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
- 		struct __kernel_timespec __user *, utime, u32 __user *, uaddr2,
-@@ -3919,7 +4188,8 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
- 
- 	if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
- 		      cmd == FUTEX_WAIT_BITSET ||
--		      cmd == FUTEX_WAIT_REQUEUE_PI)) {
-+		      cmd == FUTEX_WAIT_REQUEUE_PI ||
-+		      cmd == FUTEX_WAIT_MULTIPLE)) {
- 		if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG))))
- 			return -EFAULT;
- 		if (get_timespec64(&ts, utime))
-@@ -3940,6 +4210,25 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
- 	    cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
- 		val2 = (u32) (unsigned long) utime;
- 
-+	if (cmd == FUTEX_WAIT_MULTIPLE) {
-+		int ret;
-+		struct futex_q *qs;
-+
-+#ifdef CONFIG_X86_X32
-+		if (unlikely(in_x32_syscall()))
-+			return -ENOSYS;
-+#endif
-+		qs = futex_read_wait_block(uaddr, val);
-+
-+		if (IS_ERR(qs))
-+			return PTR_ERR(qs);
-+
-+		ret = futex_wait_multiple(qs, op, val, tp);
-+		kfree(qs);
-+
-+		return ret;
-+	}
-+
- 	return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
- }
- 
-@@ -4102,6 +4391,57 @@ COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
- #endif /* CONFIG_COMPAT */
- 
- #ifdef CONFIG_COMPAT_32BIT_TIME
-+/**
-+ * struct compat_futex_wait_block - Block of futexes to be waited for
-+ * @uaddr:	User address of the futex (compatible pointer)
-+ * @val:	Futex value expected by userspace
-+ * @bitset:	Bitset for the optional bitmasked wakeup
-+ */
-+struct compat_futex_wait_block {
-+	compat_uptr_t	uaddr;
-+	__u32 val;
-+	__u32 bitset;
-+};
-+
-+/**
-+ * compat_futex_read_wait_block - Read an array of futex_wait_block from
-+ * userspace
-+ * @uaddr:	Userspace address of the block
-+ * @count:	Number of blocks to be read
-+ *
-+ * This function does the same as futex_read_wait_block(), except that it
-+ * converts the pointer to the futex from the compat version to the regular one.
-+ */
-+inline struct futex_q *compat_futex_read_wait_block(u32 __user *uaddr,
-+						    u32 count)
-+{
-+	unsigned int i;
-+	struct futex_q *qs;
-+	struct compat_futex_wait_block fwb;
-+	struct compat_futex_wait_block __user *entry =
-+		(struct compat_futex_wait_block __user *)uaddr;
-+
-+	if (!count || count > FUTEX_MULTIPLE_MAX_COUNT)
-+		return ERR_PTR(-EINVAL);
-+
-+	qs = kcalloc(count, sizeof(*qs), GFP_KERNEL);
-+	if (!qs)
-+		return ERR_PTR(-ENOMEM);
-+
-+	for (i = 0; i < count; i++) {
-+		if (copy_from_user(&fwb, &entry[i], sizeof(fwb))) {
-+			kfree(qs);
-+			return ERR_PTR(-EFAULT);
-+		}
-+
-+		qs[i].uaddr = compat_ptr(fwb.uaddr);
-+		qs[i].uval = fwb.val;
-+		qs[i].bitset = fwb.bitset;
-+	}
-+
-+	return qs;
-+}
-+
- SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
- 		struct old_timespec32 __user *, utime, u32 __user *, uaddr2,
- 		u32, val3)
-@@ -4113,7 +4453,8 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
- 
- 	if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
- 		      cmd == FUTEX_WAIT_BITSET ||
--		      cmd == FUTEX_WAIT_REQUEUE_PI)) {
-+		      cmd == FUTEX_WAIT_REQUEUE_PI ||
-+		      cmd == FUTEX_WAIT_MULTIPLE)) {
- 		if (get_old_timespec32(&ts, utime))
- 			return -EFAULT;
- 		if (!timespec64_valid(&ts))
-@@ -4128,6 +4469,19 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
- 	    cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
- 		val2 = (int) (unsigned long) utime;
- 
-+	if (cmd == FUTEX_WAIT_MULTIPLE) {
-+		int ret;
-+		struct futex_q *qs = compat_futex_read_wait_block(uaddr, val);
-+
-+		if (IS_ERR(qs))
-+			return PTR_ERR(qs);
-+
-+		ret = futex_wait_multiple(qs, op, val, tp);
-+		kfree(qs);
-+
-+		return ret;
-+	}
-+
- 	return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
- }
- #endif /* CONFIG_COMPAT_32BIT_TIME */
-diff --git a/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
-index ee55e6d389a3f053194435342c4e471dc7cf8786..2a63e1c2cfb6407a5988233217cff2e52787bc66 100644
---- a/tools/testing/selftests/futex/functional/futex_wait_timeout.c
-+++ b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
-@@ -11,6 +11,7 @@
-  *
-  * HISTORY
-  *      2009-Nov-6: Initial version by Darren Hart <dvhart@linux.intel.com>
-+ *      2019-Dec-13: Add WAIT_MULTIPLE test by Krisman <krisman@collabora.com>
-  *
-  *****************************************************************************/
- 
-@@ -41,6 +42,8 @@ int main(int argc, char *argv[])
- {
- 	futex_t f1 = FUTEX_INITIALIZER;
- 	struct timespec to;
-+	time_t secs;
-+	struct futex_wait_block fwb = {&f1, f1, 0};
- 	int res, ret = RET_PASS;
- 	int c;
- 
-@@ -65,7 +68,7 @@ int main(int argc, char *argv[])
- 	}
- 
- 	ksft_print_header();
--	ksft_set_plan(1);
-+	ksft_set_plan(2);
- 	ksft_print_msg("%s: Block on a futex and wait for timeout\n",
- 	       basename(argv[0]));
- 	ksft_print_msg("\tArguments: timeout=%ldns\n", timeout_ns);
-@@ -79,8 +82,39 @@ int main(int argc, char *argv[])
- 	if (!res || errno != ETIMEDOUT) {
- 		fail("futex_wait returned %d\n", ret < 0 ? errno : ret);
- 		ret = RET_FAIL;
-+	} else
-+		ksft_test_result_pass("futex_wait timeout succeeds\n");
-+
-+	info("Calling futex_wait_multiple on f1: %u @ %p\n", f1, &f1);
-+
-+	/* Setup absolute time */
-+	ret = clock_gettime(CLOCK_REALTIME, &to);
-+	secs = (to.tv_nsec + timeout_ns) / 1000000000;
-+	to.tv_nsec = ((int64_t)to.tv_nsec + timeout_ns) % 1000000000;
-+	to.tv_sec += secs;
-+	info("to.tv_sec  = %ld\n", to.tv_sec);
-+	info("to.tv_nsec = %ld\n", to.tv_nsec);
-+
-+	res = futex_wait_multiple(&fwb, 1, &to,
-+				  FUTEX_PRIVATE_FLAG | FUTEX_CLOCK_REALTIME);
-+
-+#ifdef __ILP32__
-+	if (res == -1 && errno == ENOSYS) {
-+		ksft_test_result_skip("futex_wait_multiple not supported at x32\n");
-+	} else {
-+		ksft_test_result_fail("futex_wait_multiple returned %d\n",
-+				      res < 0 ? errno : res);
-+		ret = RET_FAIL;
- 	}
-+#else
-+	if (!res || errno != ETIMEDOUT) {
-+		ksft_test_result_fail("futex_wait_multiple returned %d\n",
-+				      res < 0 ? errno : res);
-+		ret = RET_FAIL;
-+	} else
-+		ksft_test_result_pass("futex_wait_multiple timeout succeeds\n");
-+#endif /* __ILP32__ */
- 
--	print_result(TEST_NAME, ret);
-+	ksft_print_cnts();
- 	return ret;
- }
-diff --git a/tools/testing/selftests/futex/include/futextest.h b/tools/testing/selftests/futex/include/futextest.h
-index ddbcfc9b7bac4aebb5bac2f249e26ecfd948aa84..bb103bef4557012ef9a389ca74c868e4476a8a31 100644
---- a/tools/testing/selftests/futex/include/futextest.h
-+++ b/tools/testing/selftests/futex/include/futextest.h
-@@ -38,6 +38,14 @@ typedef volatile u_int32_t futex_t;
- #ifndef FUTEX_CMP_REQUEUE_PI
- #define FUTEX_CMP_REQUEUE_PI		12
- #endif
-+#ifndef FUTEX_WAIT_MULTIPLE
-+#define FUTEX_WAIT_MULTIPLE		13
-+struct futex_wait_block {
-+	futex_t *uaddr;
-+	futex_t val;
-+	__u32 bitset;
-+};
-+#endif
- #ifndef FUTEX_WAIT_REQUEUE_PI_PRIVATE
- #define FUTEX_WAIT_REQUEUE_PI_PRIVATE	(FUTEX_WAIT_REQUEUE_PI | \
- 					 FUTEX_PRIVATE_FLAG)
-@@ -80,6 +88,20 @@ futex_wait(futex_t *uaddr, futex_t val, struct timespec *timeout, int opflags)
- 	return futex(uaddr, FUTEX_WAIT, val, timeout, NULL, 0, opflags);
- }
- 
-+/**
-+ * futex_wait_multiple() - block on several futexes with optional timeout
-+ * @fwb:	wait block user space address
-+ * @count:	number of entities at fwb
-+ * @timeout:	absolute timeout
-+ */
-+static inline int
-+futex_wait_multiple(struct futex_wait_block *fwb, int count,
-+		    struct timespec *timeout, int opflags)
-+{
-+	return futex(fwb, FUTEX_WAIT_MULTIPLE, count, timeout, NULL, 0,
-+		     opflags);
-+}
-+
- /**
-  * futex_wake() - wake one or more tasks blocked on uaddr
-  * @nr_wake:	wake up to this many tasks
-diff --git a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
-index 0ae390ff816449c88d0bb655a26eb014382c2b4f..bcbac042992d447e0bc9ef5fefe94e875de310f2 100644
---- a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
-+++ b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
-@@ -12,6 +12,7 @@
-  *
-  * HISTORY
-  *      2009-Nov-14: Initial version by Gowrishankar <gowrishankar.m@in.ibm.com>
-+ *      2019-Dec-13: Add WAIT_MULTIPLE test by Krisman <krisman@collabora.com>
-  *
-  *****************************************************************************/
- 
-@@ -40,6 +41,7 @@ int main(int argc, char *argv[])
- {
- 	struct timespec to = {.tv_sec = 0, .tv_nsec = timeout_ns};
- 	futex_t f1 = FUTEX_INITIALIZER;
-+	struct futex_wait_block fwb = {&f1, f1+1, 0};
- 	int res, ret = RET_PASS;
- 	int c;
- 
-@@ -61,7 +63,7 @@ int main(int argc, char *argv[])
- 	}
- 
- 	ksft_print_header();
--	ksft_set_plan(1);
-+	ksft_set_plan(2);
- 	ksft_print_msg("%s: Test the unexpected futex value in FUTEX_WAIT\n",
- 	       basename(argv[0]));
- 
-@@ -71,8 +73,30 @@ int main(int argc, char *argv[])
- 		fail("futex_wait returned: %d %s\n",
- 		     res ? errno : res, res ? strerror(errno) : "");
- 		ret = RET_FAIL;
-+	} else
-+		ksft_test_result_pass("futex_wait wouldblock succeeds\n");
-+
-+	info("Calling futex_wait_multiple on f1: %u @ %p with val=%u\n",
-+	     f1, &f1, f1+1);
-+	res = futex_wait_multiple(&fwb, 1, NULL, FUTEX_PRIVATE_FLAG);
-+
-+#ifdef __ILP32__
-+	if (res != -1 || errno != ENOSYS) {
-+		ksft_test_result_fail("futex_wait_multiple returned %d\n",
-+				      res < 0 ? errno : res);
-+		ret = RET_FAIL;
-+	} else {
-+		ksft_test_result_skip("futex_wait_multiple not supported at x32\n");
-+	}
-+#else
-+	if (!res || errno != EWOULDBLOCK) {
-+		ksft_test_result_fail("futex_wait_multiple returned %d\n",
-+				      res < 0 ? errno : res);
-+		ret = RET_FAIL;
- 	}
-+	ksft_test_result_pass("futex_wait_multiple wouldblock succeeds\n");
-+#endif /* __ILP32__ */
- 
--	print_result(TEST_NAME, ret);
-+	ksft_print_cnts();
- 	return ret;
- }
-diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore
-index a09f570619023750f558c84004aff166b4337d72..4660128a545edb04a17cc6bd9760931c1386122f 100644
---- a/tools/testing/selftests/futex/functional/.gitignore
-+++ b/tools/testing/selftests/futex/functional/.gitignore
-@@ -5,3 +5,4 @@ futex_wait_private_mapped_file
- futex_wait_timeout
- futex_wait_uninitialized_heap
- futex_wait_wouldblock
-+futex_wait_multiple
-diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile
-index 30996306cabcfe89a47977643e529b122893bb7e..75f9fface11fa3c90c1bdb9a49b3ea51291afd58 100644
---- a/tools/testing/selftests/futex/functional/Makefile
-+++ b/tools/testing/selftests/futex/functional/Makefile
-@@ -14,7 +14,8 @@ TEST_GEN_FILES := \
- 	futex_requeue_pi_signal_restart \
- 	futex_requeue_pi_mismatched_ops \
- 	futex_wait_uninitialized_heap \
--	futex_wait_private_mapped_file
-+	futex_wait_private_mapped_file \
-+	futex_wait_multiple
- 
- TEST_PROGS := run.sh
- 
-diff --git a/tools/testing/selftests/futex/functional/futex_wait_multiple.c b/tools/testing/selftests/futex/functional/futex_wait_multiple.c
-new file mode 100644
-index 0000000000000000000000000000000000000000..b48422e79f42edba1653bb0bd2a4c4fd98d2d48d
---- /dev/null
-+++ b/tools/testing/selftests/futex/functional/futex_wait_multiple.c
-@@ -0,0 +1,173 @@
-+// SPDX-License-Identifier: GPL-2.0-or-later
-+/******************************************************************************
-+ *
-+ *   Copyright Â© Collabora, Ltd., 2019
-+ *
-+ * DESCRIPTION
-+ *      Test basic semantics of FUTEX_WAIT_MULTIPLE
-+ *
-+ * AUTHOR
-+ *      Gabriel Krisman Bertazi <krisman@collabora.com>
-+ *
-+ * HISTORY
-+ *      2019-Dec-13: Initial version by Krisman <krisman@collabora.com>
-+ *
-+ *****************************************************************************/
-+
-+#include <errno.h>
-+#include <getopt.h>
-+#include <stdio.h>
-+#include <stdlib.h>
-+#include <string.h>
-+#include <time.h>
-+#include <pthread.h>
-+#include "futextest.h"
-+#include "logging.h"
-+
-+#define TEST_NAME "futex-wait-multiple"
-+#define timeout_ns 100000
-+#define MAX_COUNT 128
-+#define WAKE_WAIT_US 3000000
-+
-+int ret = RET_PASS;
-+char *progname;
-+futex_t f[MAX_COUNT] = {0};
-+struct futex_wait_block fwb[MAX_COUNT];
-+
-+void usage(char *prog)
-+{
-+	printf("Usage: %s\n", prog);
-+	printf("  -c	Use color\n");
-+	printf("  -h	Display this help message\n");
-+	printf("  -v L	Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
-+	       VQUIET, VCRITICAL, VINFO);
-+}
-+
-+void test_count_overflow(void)
-+{
-+	futex_t f = FUTEX_INITIALIZER;
-+	struct futex_wait_block fwb[MAX_COUNT+1];
-+	int res, i;
-+
-+	ksft_print_msg("%s: Test a too big number of futexes\n", progname);
-+
-+	for (i = 0; i < MAX_COUNT+1; i++) {
-+		fwb[i].uaddr = &f;
-+		fwb[i].val = f;
-+		fwb[i].bitset = 0;
-+	}
-+
-+	res = futex_wait_multiple(fwb, MAX_COUNT+1, NULL, FUTEX_PRIVATE_FLAG);
-+
-+#ifdef __ILP32__
-+	if (res != -1 || errno != ENOSYS) {
-+		ksft_test_result_fail("futex_wait_multiple returned %d\n",
-+				      res < 0 ? errno : res);
-+		ret = RET_FAIL;
-+	} else {
-+		ksft_test_result_skip("futex_wait_multiple not supported at x32\n");
-+	}
-+#else
-+	if (res != -1 || errno != EINVAL) {
-+		ksft_test_result_fail("futex_wait_multiple returned %d\n",
-+				      res < 0 ? errno : res);
-+		ret = RET_FAIL;
-+	} else {
-+		ksft_test_result_pass("futex_wait_multiple count overflow succeed\n");
-+	}
-+
-+#endif /* __ILP32__ */
-+}
-+
-+void *waiterfn(void *arg)
-+{
-+	int res;
-+
-+	res = futex_wait_multiple(fwb, MAX_COUNT, NULL, FUTEX_PRIVATE_FLAG);
-+
-+#ifdef __ILP32__
-+	if (res != -1 || errno != ENOSYS) {
-+		ksft_test_result_fail("futex_wait_multiple returned %d\n",
-+				      res < 0 ? errno : res);
-+		ret = RET_FAIL;
-+	} else {
-+		ksft_test_result_skip("futex_wait_multiple not supported at x32\n");
-+	}
-+#else
-+	if (res < 0)
-+		ksft_print_msg("waiter failed %d\n", res);
-+
-+	info("futex_wait_multiple: Got hint futex %d was freed\n", res);
-+#endif /* __ILP32__ */
-+
-+	return NULL;
-+}
-+
-+void test_fwb_wakeup(void)
-+{
-+	int res, i;
-+	pthread_t waiter;
-+
-+	ksft_print_msg("%s: Test wake up in a list of futex\n", progname);
-+
-+	for (i = 0; i < MAX_COUNT; i++) {
-+		fwb[i].uaddr = &f[i];
-+		fwb[i].val = f[i];
-+		fwb[i].bitset = 0xffffffff;
-+	}
-+
-+	res = pthread_create(&waiter, NULL, waiterfn, NULL);
-+	if (res) {
-+		ksft_test_result_fail("Creating waiting thread failed");
-+		ksft_exit_fail();
-+	}
-+
-+	usleep(WAKE_WAIT_US);
-+	res = futex_wake(&(f[MAX_COUNT-1]), 1, FUTEX_PRIVATE_FLAG);
-+	if (res != 1) {
-+		ksft_test_result_fail("Failed to wake thread res=%d\n", res);
-+		ksft_exit_fail();
-+	}
-+
-+	pthread_join(waiter, NULL);
-+	ksft_test_result_pass("%s succeed\n", __func__);
-+}
-+
-+int main(int argc, char *argv[])
-+{
-+	int c;
-+
-+	while ((c = getopt(argc, argv, "cht:v:")) != -1) {
-+		switch (c) {
-+		case 'c':
-+			log_color(1);
-+			break;
-+		case 'h':
-+			usage(basename(argv[0]));
-+			exit(0);
-+		case 'v':
-+			log_verbosity(atoi(optarg));
-+			break;
-+		default:
-+			usage(basename(argv[0]));
-+			exit(1);
-+		}
-+	}
-+
-+	progname = basename(argv[0]);
-+
-+	ksft_print_header();
-+	ksft_set_plan(2);
-+
-+	test_count_overflow();
-+
-+#ifdef __ILP32__
-+	// if it's a 32x binary, there's no futex to wakeup
-+	ksft_test_result_skip("futex_wait_multiple not supported at x32\n");
-+#else
-+	test_fwb_wakeup();
-+#endif /* __ILP32__ */
-+
-+	ksft_print_cnts();
-+	return ret;
-+}
-diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh
-index 1acb6ace1680e8f3d6b3ee2dc528c19ddfdb018e..a8be94f28ff78b4879d2d19bca5d9b0fcb26c1f8 100755
---- a/tools/testing/selftests/futex/functional/run.sh
-+++ b/tools/testing/selftests/futex/functional/run.sh
-@@ -73,3 +73,6 @@ echo
- echo
- ./futex_wait_uninitialized_heap $COLOR
- ./futex_wait_private_mapped_file $COLOR
-+
-+echo
-+./futex_wait_multiple $COLOR
-diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h
-index 580001e89c6caed57dd8b3cb491d65dce846caff..a3e760886b8e7e74285fdcf2caaaa6f66ad16675 100644
---- a/include/uapi/linux/futex.h
-+++ b/include/uapi/linux/futex.h
-@@ -21,7 +21,7 @@
- #define FUTEX_WAKE_BITSET	10
- #define FUTEX_WAIT_REQUEUE_PI	11
- #define FUTEX_CMP_REQUEUE_PI	12
--#define FUTEX_WAIT_MULTIPLE	13
-+#define FUTEX_WAIT_MULTIPLE	31
- 
- #define FUTEX_PRIVATE_FLAG	128
- #define FUTEX_CLOCK_REALTIME	256
-diff --git a/kernel/futex.c b/kernel/futex.c
-index 58cf9eb2b851b4858e29b5ef4114a29a92e676ba..e0bb628a5e1988dcc9ae5442a4259edc229d578d 100644
---- a/kernel/futex.c
-+++ b/kernel/futex.c
-@@ -4198,7 +4198,7 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
- 			return -EINVAL;
- 
- 		t = timespec64_to_ktime(ts);
--		if (cmd == FUTEX_WAIT)
-+		if (cmd == FUTEX_WAIT || cmd == FUTEX_WAIT_MULTIPLE)
- 			t = ktime_add_safe(ktime_get(), t);
- 		tp = &t;
- 	}
-@@ -4399,6 +4399,7 @@ COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
-  */
- struct compat_futex_wait_block {
- 	compat_uptr_t	uaddr;
-+	__u32 pad;
- 	__u32 val;
- 	__u32 bitset;
- };
-@@ -4461,7 +4462,7 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
- 			return -EINVAL;
- 
- 		t = timespec64_to_ktime(ts);
--		if (cmd == FUTEX_WAIT)
-+		if (cmd == FUTEX_WAIT || cmd == FUTEX_WAIT_MULTIPLE)
- 			t = ktime_add_safe(ktime_get(), t);
- 		tp = &t;
- 	}
diff --git a/linux57-tkg/linux57-tkg-patches/0008-5.7-bcachefs.patch b/linux57-tkg/linux57-tkg-patches/0008-5.7-bcachefs.patch
deleted file mode 100644
index 4ca0a38..0000000
--- a/linux57-tkg/linux57-tkg-patches/0008-5.7-bcachefs.patch
+++ /dev/null
@@ -1,71085 +0,0 @@
-diff --git a/block/bio.c b/block/bio.c
-index 21cbaa6a1c20..8d236b819612 100644
---- a/block/bio.c
-+++ b/block/bio.c
-@@ -1049,6 +1049,7 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
- 		bio_set_flag(bio, BIO_NO_PAGE_REF);
- 	return bio->bi_vcnt ? 0 : ret;
- }
-+EXPORT_SYMBOL_GPL(bio_iov_iter_get_pages);
- 
- static void submit_bio_wait_endio(struct bio *bio)
- {
-@@ -1243,6 +1244,7 @@ void bio_set_pages_dirty(struct bio *bio)
- 			set_page_dirty_lock(bvec->bv_page);
- 	}
- }
-+EXPORT_SYMBOL_GPL(bio_set_pages_dirty);
- 
- /*
-  * bio_check_pages_dirty() will check that all the BIO's pages are still dirty.
-@@ -1302,6 +1304,7 @@ void bio_check_pages_dirty(struct bio *bio)
- 	spin_unlock_irqrestore(&bio_dirty_lock, flags);
- 	schedule_work(&bio_dirty_work);
- }
-+EXPORT_SYMBOL_GPL(bio_check_pages_dirty);
- 
- void update_io_ticks(struct hd_struct *part, unsigned long now, bool end)
- {
-diff --git a/block/blk-core.c b/block/blk-core.c
-index 9bfaee050c82..60a1a2907abf 100644
---- a/block/blk-core.c
-+++ b/block/blk-core.c
-@@ -210,18 +210,23 @@ int blk_status_to_errno(blk_status_t status)
- }
- EXPORT_SYMBOL_GPL(blk_status_to_errno);
- 
--static void print_req_error(struct request *req, blk_status_t status,
--		const char *caller)
-+const char *blk_status_to_str(blk_status_t status)
- {
- 	int idx = (__force int)status;
- 
- 	if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors)))
--		return;
-+		return "(invalid error)";
-+	return blk_errors[idx].name;
-+}
-+EXPORT_SYMBOL_GPL(blk_status_to_str);
- 
-+static void print_req_error(struct request *req, blk_status_t status,
-+		const char *caller)
-+{
- 	printk_ratelimited(KERN_ERR
- 		"%s: %s error, dev %s, sector %llu op 0x%x:(%s) flags 0x%x "
- 		"phys_seg %u prio class %u\n",
--		caller, blk_errors[idx].name,
-+		caller, blk_status_to_str(status),
- 		req->rq_disk ? req->rq_disk->disk_name : "?",
- 		blk_rq_pos(req), req_op(req), blk_op_str(req_op(req)),
- 		req->cmd_flags & ~REQ_OP_MASK,
-diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig
-index 6dfa653d30db..6b256291b924 100644
---- a/drivers/md/bcache/Kconfig
-+++ b/drivers/md/bcache/Kconfig
-@@ -3,6 +3,7 @@
- config BCACHE
- 	tristate "Block device as cache"
- 	select CRC64
-+	select CLOSURES
- 	help
- 	Allows a block device to be used as cache for other devices; uses
- 	a btree for indexing and the layout is optimized for SSDs.
-@@ -17,12 +18,3 @@ config BCACHE_DEBUG
- 
- 	Enables extra debugging tools, allows expensive runtime checks to be
- 	turned on.
--
--config BCACHE_CLOSURES_DEBUG
--	bool "Debug closures"
--	depends on BCACHE
--	select DEBUG_FS
--	help
--	Keeps all active closures in a linked list and provides a debugfs
--	interface to list them, which makes it possible to see asynchronous
--	operations that get stuck.
-diff --git a/drivers/md/bcache/Makefile b/drivers/md/bcache/Makefile
-index fd714628da6a..0fb1b6009da3 100644
---- a/drivers/md/bcache/Makefile
-+++ b/drivers/md/bcache/Makefile
-@@ -2,6 +2,6 @@
- 
- obj-$(CONFIG_BCACHE)	+= bcache.o
- 
--bcache-y		:= alloc.o bset.o btree.o closure.o debug.o extents.o\
--	io.o journal.o movinggc.o request.o stats.o super.o sysfs.o trace.o\
-+bcache-y		:= alloc.o bset.o btree.o debug.o extents.o io.o\
-+	journal.o movinggc.o request.o stats.o super.o sysfs.o trace.o\
- 	util.o writeback.o
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index 74a9849ea164..e03597696920 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -180,6 +180,7 @@
- 
- #include <linux/bcache.h>
- #include <linux/bio.h>
-+#include <linux/closure.h>
- #include <linux/kobject.h>
- #include <linux/list.h>
- #include <linux/mutex.h>
-@@ -192,7 +193,6 @@
- 
- #include "bset.h"
- #include "util.h"
--#include "closure.h"
- 
- struct bucket {
- 	atomic_t	pin;
-diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c
-deleted file mode 100644
-index 0164a1fe94a9..000000000000
---- a/drivers/md/bcache/closure.c
-+++ /dev/null
-@@ -1,217 +0,0 @@
--// SPDX-License-Identifier: GPL-2.0
--/*
-- * Asynchronous refcounty things
-- *
-- * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
-- * Copyright 2012 Google, Inc.
-- */
--
--#include <linux/debugfs.h>
--#include <linux/module.h>
--#include <linux/seq_file.h>
--#include <linux/sched/debug.h>
--
--#include "closure.h"
--
--static inline void closure_put_after_sub(struct closure *cl, int flags)
--{
--	int r = flags & CLOSURE_REMAINING_MASK;
--
--	BUG_ON(flags & CLOSURE_GUARD_MASK);
--	BUG_ON(!r && (flags & ~CLOSURE_DESTRUCTOR));
--
--	if (!r) {
--		if (cl->fn && !(flags & CLOSURE_DESTRUCTOR)) {
--			atomic_set(&cl->remaining,
--				   CLOSURE_REMAINING_INITIALIZER);
--			closure_queue(cl);
--		} else {
--			struct closure *parent = cl->parent;
--			closure_fn *destructor = cl->fn;
--
--			closure_debug_destroy(cl);
--
--			if (destructor)
--				destructor(cl);
--
--			if (parent)
--				closure_put(parent);
--		}
--	}
--}
--
--/* For clearing flags with the same atomic op as a put */
--void closure_sub(struct closure *cl, int v)
--{
--	closure_put_after_sub(cl, atomic_sub_return(v, &cl->remaining));
--}
--
--/*
-- * closure_put - decrement a closure's refcount
-- */
--void closure_put(struct closure *cl)
--{
--	closure_put_after_sub(cl, atomic_dec_return(&cl->remaining));
--}
--
--/*
-- * closure_wake_up - wake up all closures on a wait list, without memory barrier
-- */
--void __closure_wake_up(struct closure_waitlist *wait_list)
--{
--	struct llist_node *list;
--	struct closure *cl, *t;
--	struct llist_node *reverse = NULL;
--
--	list = llist_del_all(&wait_list->list);
--
--	/* We first reverse the list to preserve FIFO ordering and fairness */
--	reverse = llist_reverse_order(list);
--
--	/* Then do the wakeups */
--	llist_for_each_entry_safe(cl, t, reverse, list) {
--		closure_set_waiting(cl, 0);
--		closure_sub(cl, CLOSURE_WAITING + 1);
--	}
--}
--
--/**
-- * closure_wait - add a closure to a waitlist
-- * @waitlist: will own a ref on @cl, which will be released when
-- * closure_wake_up() is called on @waitlist.
-- * @cl: closure pointer.
-- *
-- */
--bool closure_wait(struct closure_waitlist *waitlist, struct closure *cl)
--{
--	if (atomic_read(&cl->remaining) & CLOSURE_WAITING)
--		return false;
--
--	closure_set_waiting(cl, _RET_IP_);
--	atomic_add(CLOSURE_WAITING + 1, &cl->remaining);
--	llist_add(&cl->list, &waitlist->list);
--
--	return true;
--}
--
--struct closure_syncer {
--	struct task_struct	*task;
--	int			done;
--};
--
--static void closure_sync_fn(struct closure *cl)
--{
--	struct closure_syncer *s = cl->s;
--	struct task_struct *p;
--
--	rcu_read_lock();
--	p = READ_ONCE(s->task);
--	s->done = 1;
--	wake_up_process(p);
--	rcu_read_unlock();
--}
--
--void __sched __closure_sync(struct closure *cl)
--{
--	struct closure_syncer s = { .task = current };
--
--	cl->s = &s;
--	continue_at(cl, closure_sync_fn, NULL);
--
--	while (1) {
--		set_current_state(TASK_UNINTERRUPTIBLE);
--		if (s.done)
--			break;
--		schedule();
--	}
--
--	__set_current_state(TASK_RUNNING);
--}
--
--#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
--
--static LIST_HEAD(closure_list);
--static DEFINE_SPINLOCK(closure_list_lock);
--
--void closure_debug_create(struct closure *cl)
--{
--	unsigned long flags;
--
--	BUG_ON(cl->magic == CLOSURE_MAGIC_ALIVE);
--	cl->magic = CLOSURE_MAGIC_ALIVE;
--
--	spin_lock_irqsave(&closure_list_lock, flags);
--	list_add(&cl->all, &closure_list);
--	spin_unlock_irqrestore(&closure_list_lock, flags);
--}
--
--void closure_debug_destroy(struct closure *cl)
--{
--	unsigned long flags;
--
--	BUG_ON(cl->magic != CLOSURE_MAGIC_ALIVE);
--	cl->magic = CLOSURE_MAGIC_DEAD;
--
--	spin_lock_irqsave(&closure_list_lock, flags);
--	list_del(&cl->all);
--	spin_unlock_irqrestore(&closure_list_lock, flags);
--}
--
--static struct dentry *closure_debug;
--
--static int debug_seq_show(struct seq_file *f, void *data)
--{
--	struct closure *cl;
--
--	spin_lock_irq(&closure_list_lock);
--
--	list_for_each_entry(cl, &closure_list, all) {
--		int r = atomic_read(&cl->remaining);
--
--		seq_printf(f, "%p: %pS -> %pS p %p r %i ",
--			   cl, (void *) cl->ip, cl->fn, cl->parent,
--			   r & CLOSURE_REMAINING_MASK);
--
--		seq_printf(f, "%s%s\n",
--			   test_bit(WORK_STRUCT_PENDING_BIT,
--				    work_data_bits(&cl->work)) ? "Q" : "",
--			   r & CLOSURE_RUNNING	? "R" : "");
--
--		if (r & CLOSURE_WAITING)
--			seq_printf(f, " W %pS\n",
--				   (void *) cl->waiting_on);
--
--		seq_printf(f, "\n");
--	}
--
--	spin_unlock_irq(&closure_list_lock);
--	return 0;
--}
--
--static int debug_seq_open(struct inode *inode, struct file *file)
--{
--	return single_open(file, debug_seq_show, NULL);
--}
--
--static const struct file_operations debug_ops = {
--	.owner		= THIS_MODULE,
--	.open		= debug_seq_open,
--	.read		= seq_read,
--	.release	= single_release
--};
--
--void  __init closure_debug_init(void)
--{
--	if (!IS_ERR_OR_NULL(bcache_debug))
--		/*
--		 * it is unnecessary to check return value of
--		 * debugfs_create_file(), we should not care
--		 * about this.
--		 */
--		closure_debug = debugfs_create_file(
--			"closures", 0400, bcache_debug, NULL, &debug_ops);
--}
--#endif
--
--MODULE_AUTHOR("Kent Overstreet <koverstreet@google.com>");
--MODULE_LICENSE("GPL");
-diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h
-deleted file mode 100644
-index c88cdc4ae4ec..000000000000
---- a/drivers/md/bcache/closure.h
-+++ /dev/null
-@@ -1,378 +0,0 @@
--/* SPDX-License-Identifier: GPL-2.0 */
--#ifndef _LINUX_CLOSURE_H
--#define _LINUX_CLOSURE_H
--
--#include <linux/llist.h>
--#include <linux/sched.h>
--#include <linux/sched/task_stack.h>
--#include <linux/workqueue.h>
--
--/*
-- * Closure is perhaps the most overused and abused term in computer science, but
-- * since I've been unable to come up with anything better you're stuck with it
-- * again.
-- *
-- * What are closures?
-- *
-- * They embed a refcount. The basic idea is they count "things that are in
-- * progress" - in flight bios, some other thread that's doing something else -
-- * anything you might want to wait on.
-- *
-- * The refcount may be manipulated with closure_get() and closure_put().
-- * closure_put() is where many of the interesting things happen, when it causes
-- * the refcount to go to 0.
-- *
-- * Closures can be used to wait on things both synchronously and asynchronously,
-- * and synchronous and asynchronous use can be mixed without restriction. To
-- * wait synchronously, use closure_sync() - you will sleep until your closure's
-- * refcount hits 1.
-- *
-- * To wait asynchronously, use
-- *   continue_at(cl, next_function, workqueue);
-- *
-- * passing it, as you might expect, the function to run when nothing is pending
-- * and the workqueue to run that function out of.
-- *
-- * continue_at() also, critically, requires a 'return' immediately following the
-- * location where this macro is referenced, to return to the calling function.
-- * There's good reason for this.
-- *
-- * To use safely closures asynchronously, they must always have a refcount while
-- * they are running owned by the thread that is running them. Otherwise, suppose
-- * you submit some bios and wish to have a function run when they all complete:
-- *
-- * foo_endio(struct bio *bio)
-- * {
-- *	closure_put(cl);
-- * }
-- *
-- * closure_init(cl);
-- *
-- * do_stuff();
-- * closure_get(cl);
-- * bio1->bi_endio = foo_endio;
-- * bio_submit(bio1);
-- *
-- * do_more_stuff();
-- * closure_get(cl);
-- * bio2->bi_endio = foo_endio;
-- * bio_submit(bio2);
-- *
-- * continue_at(cl, complete_some_read, system_wq);
-- *
-- * If closure's refcount started at 0, complete_some_read() could run before the
-- * second bio was submitted - which is almost always not what you want! More
-- * importantly, it wouldn't be possible to say whether the original thread or
-- * complete_some_read()'s thread owned the closure - and whatever state it was
-- * associated with!
-- *
-- * So, closure_init() initializes a closure's refcount to 1 - and when a
-- * closure_fn is run, the refcount will be reset to 1 first.
-- *
-- * Then, the rule is - if you got the refcount with closure_get(), release it
-- * with closure_put() (i.e, in a bio->bi_endio function). If you have a refcount
-- * on a closure because you called closure_init() or you were run out of a
-- * closure - _always_ use continue_at(). Doing so consistently will help
-- * eliminate an entire class of particularly pernicious races.
-- *
-- * Lastly, you might have a wait list dedicated to a specific event, and have no
-- * need for specifying the condition - you just want to wait until someone runs
-- * closure_wake_up() on the appropriate wait list. In that case, just use
-- * closure_wait(). It will return either true or false, depending on whether the
-- * closure was already on a wait list or not - a closure can only be on one wait
-- * list at a time.
-- *
-- * Parents:
-- *
-- * closure_init() takes two arguments - it takes the closure to initialize, and
-- * a (possibly null) parent.
-- *
-- * If parent is non null, the new closure will have a refcount for its lifetime;
-- * a closure is considered to be "finished" when its refcount hits 0 and the
-- * function to run is null. Hence
-- *
-- * continue_at(cl, NULL, NULL);
-- *
-- * returns up the (spaghetti) stack of closures, precisely like normal return
-- * returns up the C stack. continue_at() with non null fn is better thought of
-- * as doing a tail call.
-- *
-- * All this implies that a closure should typically be embedded in a particular
-- * struct (which its refcount will normally control the lifetime of), and that
-- * struct can very much be thought of as a stack frame.
-- */
--
--struct closure;
--struct closure_syncer;
--typedef void (closure_fn) (struct closure *);
--extern struct dentry *bcache_debug;
--
--struct closure_waitlist {
--	struct llist_head	list;
--};
--
--enum closure_state {
--	/*
--	 * CLOSURE_WAITING: Set iff the closure is on a waitlist. Must be set by
--	 * the thread that owns the closure, and cleared by the thread that's
--	 * waking up the closure.
--	 *
--	 * The rest are for debugging and don't affect behaviour:
--	 *
--	 * CLOSURE_RUNNING: Set when a closure is running (i.e. by
--	 * closure_init() and when closure_put() runs then next function), and
--	 * must be cleared before remaining hits 0. Primarily to help guard
--	 * against incorrect usage and accidentally transferring references.
--	 * continue_at() and closure_return() clear it for you, if you're doing
--	 * something unusual you can use closure_set_dead() which also helps
--	 * annotate where references are being transferred.
--	 */
--
--	CLOSURE_BITS_START	= (1U << 26),
--	CLOSURE_DESTRUCTOR	= (1U << 26),
--	CLOSURE_WAITING		= (1U << 28),
--	CLOSURE_RUNNING		= (1U << 30),
--};
--
--#define CLOSURE_GUARD_MASK					\
--	((CLOSURE_DESTRUCTOR|CLOSURE_WAITING|CLOSURE_RUNNING) << 1)
--
--#define CLOSURE_REMAINING_MASK		(CLOSURE_BITS_START - 1)
--#define CLOSURE_REMAINING_INITIALIZER	(1|CLOSURE_RUNNING)
--
--struct closure {
--	union {
--		struct {
--			struct workqueue_struct *wq;
--			struct closure_syncer	*s;
--			struct llist_node	list;
--			closure_fn		*fn;
--		};
--		struct work_struct	work;
--	};
--
--	struct closure		*parent;
--
--	atomic_t		remaining;
--
--#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
--#define CLOSURE_MAGIC_DEAD	0xc054dead
--#define CLOSURE_MAGIC_ALIVE	0xc054a11e
--
--	unsigned int		magic;
--	struct list_head	all;
--	unsigned long		ip;
--	unsigned long		waiting_on;
--#endif
--};
--
--void closure_sub(struct closure *cl, int v);
--void closure_put(struct closure *cl);
--void __closure_wake_up(struct closure_waitlist *list);
--bool closure_wait(struct closure_waitlist *list, struct closure *cl);
--void __closure_sync(struct closure *cl);
--
--/**
-- * closure_sync - sleep until a closure a closure has nothing left to wait on
-- *
-- * Sleeps until the refcount hits 1 - the thread that's running the closure owns
-- * the last refcount.
-- */
--static inline void closure_sync(struct closure *cl)
--{
--	if ((atomic_read(&cl->remaining) & CLOSURE_REMAINING_MASK) != 1)
--		__closure_sync(cl);
--}
--
--#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
--
--void closure_debug_init(void);
--void closure_debug_create(struct closure *cl);
--void closure_debug_destroy(struct closure *cl);
--
--#else
--
--static inline void closure_debug_init(void) {}
--static inline void closure_debug_create(struct closure *cl) {}
--static inline void closure_debug_destroy(struct closure *cl) {}
--
--#endif
--
--static inline void closure_set_ip(struct closure *cl)
--{
--#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
--	cl->ip = _THIS_IP_;
--#endif
--}
--
--static inline void closure_set_ret_ip(struct closure *cl)
--{
--#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
--	cl->ip = _RET_IP_;
--#endif
--}
--
--static inline void closure_set_waiting(struct closure *cl, unsigned long f)
--{
--#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
--	cl->waiting_on = f;
--#endif
--}
--
--static inline void closure_set_stopped(struct closure *cl)
--{
--	atomic_sub(CLOSURE_RUNNING, &cl->remaining);
--}
--
--static inline void set_closure_fn(struct closure *cl, closure_fn *fn,
--				  struct workqueue_struct *wq)
--{
--	closure_set_ip(cl);
--	cl->fn = fn;
--	cl->wq = wq;
--	/* between atomic_dec() in closure_put() */
--	smp_mb__before_atomic();
--}
--
--static inline void closure_queue(struct closure *cl)
--{
--	struct workqueue_struct *wq = cl->wq;
--	/**
--	 * Changes made to closure, work_struct, or a couple of other structs
--	 * may cause work.func not pointing to the right location.
--	 */
--	BUILD_BUG_ON(offsetof(struct closure, fn)
--		     != offsetof(struct work_struct, func));
--	if (wq) {
--		INIT_WORK(&cl->work, cl->work.func);
--		BUG_ON(!queue_work(wq, &cl->work));
--	} else
--		cl->fn(cl);
--}
--
--/**
-- * closure_get - increment a closure's refcount
-- */
--static inline void closure_get(struct closure *cl)
--{
--#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
--	BUG_ON((atomic_inc_return(&cl->remaining) &
--		CLOSURE_REMAINING_MASK) <= 1);
--#else
--	atomic_inc(&cl->remaining);
--#endif
--}
--
--/**
-- * closure_init - Initialize a closure, setting the refcount to 1
-- * @cl:		closure to initialize
-- * @parent:	parent of the new closure. cl will take a refcount on it for its
-- *		lifetime; may be NULL.
-- */
--static inline void closure_init(struct closure *cl, struct closure *parent)
--{
--	memset(cl, 0, sizeof(struct closure));
--	cl->parent = parent;
--	if (parent)
--		closure_get(parent);
--
--	atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
--
--	closure_debug_create(cl);
--	closure_set_ip(cl);
--}
--
--static inline void closure_init_stack(struct closure *cl)
--{
--	memset(cl, 0, sizeof(struct closure));
--	atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
--}
--
--/**
-- * closure_wake_up - wake up all closures on a wait list,
-- *		     with memory barrier
-- */
--static inline void closure_wake_up(struct closure_waitlist *list)
--{
--	/* Memory barrier for the wait list */
--	smp_mb();
--	__closure_wake_up(list);
--}
--
--/**
-- * continue_at - jump to another function with barrier
-- *
-- * After @cl is no longer waiting on anything (i.e. all outstanding refs have
-- * been dropped with closure_put()), it will resume execution at @fn running out
-- * of @wq (or, if @wq is NULL, @fn will be called by closure_put() directly).
-- *
-- * This is because after calling continue_at() you no longer have a ref on @cl,
-- * and whatever @cl owns may be freed out from under you - a running closure fn
-- * has a ref on its own closure which continue_at() drops.
-- *
-- * Note you are expected to immediately return after using this macro.
-- */
--#define continue_at(_cl, _fn, _wq)					\
--do {									\
--	set_closure_fn(_cl, _fn, _wq);					\
--	closure_sub(_cl, CLOSURE_RUNNING + 1);				\
--} while (0)
--
--/**
-- * closure_return - finish execution of a closure
-- *
-- * This is used to indicate that @cl is finished: when all outstanding refs on
-- * @cl have been dropped @cl's ref on its parent closure (as passed to
-- * closure_init()) will be dropped, if one was specified - thus this can be
-- * thought of as returning to the parent closure.
-- */
--#define closure_return(_cl)	continue_at((_cl), NULL, NULL)
--
--/**
-- * continue_at_nobarrier - jump to another function without barrier
-- *
-- * Causes @fn to be executed out of @cl, in @wq context (or called directly if
-- * @wq is NULL).
-- *
-- * The ref the caller of continue_at_nobarrier() had on @cl is now owned by @fn,
-- * thus it's not safe to touch anything protected by @cl after a
-- * continue_at_nobarrier().
-- */
--#define continue_at_nobarrier(_cl, _fn, _wq)				\
--do {									\
--	set_closure_fn(_cl, _fn, _wq);					\
--	closure_queue(_cl);						\
--} while (0)
--
--/**
-- * closure_return_with_destructor - finish execution of a closure,
-- *				    with destructor
-- *
-- * Works like closure_return(), except @destructor will be called when all
-- * outstanding refs on @cl have been dropped; @destructor may be used to safely
-- * free the memory occupied by @cl, and it is called with the ref on the parent
-- * closure still held - so @destructor could safely return an item to a
-- * freelist protected by @cl's parent.
-- */
--#define closure_return_with_destructor(_cl, _destructor)		\
--do {									\
--	set_closure_fn(_cl, _destructor, NULL);				\
--	closure_sub(_cl, CLOSURE_RUNNING - CLOSURE_DESTRUCTOR + 1);	\
--} while (0)
--
--/**
-- * closure_call - execute @fn out of a new, uninitialized closure
-- *
-- * Typically used when running out of one closure, and we want to run @fn
-- * asynchronously out of a new closure - @parent will then wait for @cl to
-- * finish.
-- */
--static inline void closure_call(struct closure *cl, closure_fn fn,
--				struct workqueue_struct *wq,
--				struct closure *parent)
--{
--	closure_init(cl, parent);
--	continue_at_nobarrier(cl, fn, wq);
--}
--
--#endif /* _LINUX_CLOSURE_H */
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index d98354fa28e3..9f3e769b5a67 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -2696,7 +2696,6 @@ static int __init bcache_init(void)
- 		goto err;
- 
- 	bch_debug_init();
--	closure_debug_init();
- 
- 	bcache_is_reboot = false;
- 
-diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
-index c029f7443190..59093f9f1793 100644
---- a/drivers/md/bcache/util.h
-+++ b/drivers/md/bcache/util.h
-@@ -4,6 +4,7 @@
- #define _BCACHE_UTIL_H
- 
- #include <linux/blkdev.h>
-+#include <linux/closure.h>
- #include <linux/errno.h>
- #include <linux/kernel.h>
- #include <linux/sched/clock.h>
-@@ -13,8 +14,6 @@
- #include <linux/workqueue.h>
- #include <linux/crc64.h>
- 
--#include "closure.h"
--
- #define PAGE_SECTORS		(PAGE_SIZE / 512)
- 
- struct closure;
-diff --git a/fs/Kconfig b/fs/Kconfig
-index f08fbbfafd9a..8502f8b7d8a7 100644
---- a/fs/Kconfig
-+++ b/fs/Kconfig
-@@ -40,6 +40,7 @@ source "fs/ocfs2/Kconfig"
- source "fs/btrfs/Kconfig"
- source "fs/nilfs2/Kconfig"
- source "fs/f2fs/Kconfig"
-+source "fs/bcachefs/Kconfig"
- source "fs/zonefs/Kconfig"
- 
- config FS_DAX
-diff --git a/fs/Makefile b/fs/Makefile
-index 2ce5112b02c8..8e926e6bf48f 100644
---- a/fs/Makefile
-+++ b/fs/Makefile
-@@ -130,6 +130,7 @@ obj-$(CONFIG_OCFS2_FS)		+= ocfs2/
- obj-$(CONFIG_BTRFS_FS)		+= btrfs/
- obj-$(CONFIG_GFS2_FS)           += gfs2/
- obj-$(CONFIG_F2FS_FS)		+= f2fs/
-+obj-$(CONFIG_BCACHEFS_FS)	+= bcachefs/
- obj-$(CONFIG_CEPH_FS)		+= ceph/
- obj-$(CONFIG_PSTORE)		+= pstore/
- obj-$(CONFIG_EFIVAR_FS)		+= efivarfs/
-diff --git a/fs/bcachefs/Kconfig b/fs/bcachefs/Kconfig
-new file mode 100644
-index 000000000000..10abddae6a80
---- /dev/null
-+++ b/fs/bcachefs/Kconfig
-@@ -0,0 +1,50 @@
-+
-+config BCACHEFS_FS
-+	tristate "bcachefs filesystem support"
-+	depends on BLOCK
-+	select EXPORTFS
-+	select CLOSURES
-+	select LIBCRC32C
-+	select CRC64
-+	select FS_POSIX_ACL
-+	select LZ4_COMPRESS
-+	select LZ4_DECOMPRESS
-+	select ZLIB_DEFLATE
-+	select ZLIB_INFLATE
-+	select ZSTD_COMPRESS
-+	select ZSTD_DECOMPRESS
-+	select CRYPTO_SHA256
-+	select CRYPTO_CHACHA20
-+	select CRYPTO_POLY1305
-+	select KEYS
-+	select SIXLOCKS
-+	select RAID6_PQ
-+	select XOR_BLOCKS
-+	---help---
-+	The bcachefs filesystem - a modern, copy on write filesystem, with
-+	support for multiple devices, compression, checksumming, etc.
-+
-+config BCACHEFS_QUOTA
-+	bool "bcachefs quota support"
-+	depends on BCACHEFS_FS
-+	select QUOTACTL
-+
-+config BCACHEFS_POSIX_ACL
-+	bool "bcachefs POSIX ACL support"
-+	depends on BCACHEFS_FS
-+	select FS_POSIX_ACL
-+
-+config BCACHEFS_DEBUG
-+	bool "bcachefs debugging"
-+	depends on BCACHEFS_FS
-+	---help---
-+	Enables many extra debugging checks and assertions.
-+
-+	The resulting code will be significantly slower than normal; you
-+	probably shouldn't select this option unless you're a developer.
-+
-+config BCACHEFS_TESTS
-+	bool "bcachefs unit and performance tests"
-+	depends on BCACHEFS_FS
-+	---help---
-+	Include some unit and performance tests for the core btree code
-diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile
-new file mode 100644
-index 000000000000..d85ced62c0dd
---- /dev/null
-+++ b/fs/bcachefs/Makefile
-@@ -0,0 +1,59 @@
-+
-+obj-$(CONFIG_BCACHEFS_FS)	+= bcachefs.o
-+
-+bcachefs-y		:=	\
-+	acl.o			\
-+	alloc_background.o	\
-+	alloc_foreground.o	\
-+	bkey.o			\
-+	bkey_methods.o		\
-+	bkey_sort.o		\
-+	bset.o			\
-+	btree_cache.o		\
-+	btree_gc.o		\
-+	btree_io.o		\
-+	btree_iter.o		\
-+	btree_key_cache.o	\
-+	btree_update_interior.o	\
-+	btree_update_leaf.o	\
-+	buckets.o		\
-+	chardev.o		\
-+	checksum.o		\
-+	clock.o			\
-+	compress.o		\
-+	debug.o			\
-+	dirent.o		\
-+	disk_groups.o		\
-+	ec.o			\
-+	error.o			\
-+	extents.o		\
-+	extent_update.o		\
-+	fs.o			\
-+	fs-common.o		\
-+	fs-ioctl.o		\
-+	fs-io.o			\
-+	fsck.o			\
-+	inode.o			\
-+	io.o			\
-+	journal.o		\
-+	journal_io.o		\
-+	journal_reclaim.o	\
-+	journal_seq_blacklist.o	\
-+	keylist.o		\
-+	migrate.o		\
-+	move.o			\
-+	movinggc.o		\
-+	opts.o			\
-+	quota.o			\
-+	rebalance.o		\
-+	recovery.o		\
-+	reflink.o		\
-+	replicas.o		\
-+	siphash.o		\
-+	super.o			\
-+	super-io.o		\
-+	sysfs.o			\
-+	tests.o			\
-+	trace.o			\
-+	util.o			\
-+	xattr.o
-diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c
-new file mode 100644
-index 000000000000..76c98ddbf628
---- /dev/null
-+++ b/fs/bcachefs/acl.c
-@@ -0,0 +1,388 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#ifdef CONFIG_BCACHEFS_POSIX_ACL
-+
-+#include "bcachefs.h"
-+
-+#include <linux/fs.h>
-+#include <linux/posix_acl.h>
-+#include <linux/posix_acl_xattr.h>
-+#include <linux/sched.h>
-+#include <linux/slab.h>
-+
-+#include "acl.h"
-+#include "fs.h"
-+#include "xattr.h"
-+
-+static inline size_t bch2_acl_size(unsigned nr_short, unsigned nr_long)
-+{
-+	return sizeof(bch_acl_header) +
-+		sizeof(bch_acl_entry_short) * nr_short +
-+		sizeof(bch_acl_entry) * nr_long;
-+}
-+
-+static inline int acl_to_xattr_type(int type)
-+{
-+	switch (type) {
-+	case ACL_TYPE_ACCESS:
-+		return KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS;
-+	case ACL_TYPE_DEFAULT:
-+		return KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT;
-+	default:
-+		BUG();
-+	}
-+}
-+
-+/*
-+ * Convert from filesystem to in-memory representation.
-+ */
-+static struct posix_acl *bch2_acl_from_disk(const void *value, size_t size)
-+{
-+	const void *p, *end = value + size;
-+	struct posix_acl *acl;
-+	struct posix_acl_entry *out;
-+	unsigned count = 0;
-+
-+	if (!value)
-+		return NULL;
-+	if (size < sizeof(bch_acl_header))
-+		goto invalid;
-+	if (((bch_acl_header *)value)->a_version !=
-+	    cpu_to_le32(BCH_ACL_VERSION))
-+		goto invalid;
-+
-+	p = value + sizeof(bch_acl_header);
-+	while (p < end) {
-+		const bch_acl_entry *entry = p;
-+
-+		if (p + sizeof(bch_acl_entry_short) > end)
-+			goto invalid;
-+
-+		switch (le16_to_cpu(entry->e_tag)) {
-+		case ACL_USER_OBJ:
-+		case ACL_GROUP_OBJ:
-+		case ACL_MASK:
-+		case ACL_OTHER:
-+			p += sizeof(bch_acl_entry_short);
-+			break;
-+		case ACL_USER:
-+		case ACL_GROUP:
-+			p += sizeof(bch_acl_entry);
-+			break;
-+		default:
-+			goto invalid;
-+		}
-+
-+		count++;
-+	}
-+
-+	if (p > end)
-+		goto invalid;
-+
-+	if (!count)
-+		return NULL;
-+
-+	acl = posix_acl_alloc(count, GFP_KERNEL);
-+	if (!acl)
-+		return ERR_PTR(-ENOMEM);
-+
-+	out = acl->a_entries;
-+
-+	p = value + sizeof(bch_acl_header);
-+	while (p < end) {
-+		const bch_acl_entry *in = p;
-+
-+		out->e_tag  = le16_to_cpu(in->e_tag);
-+		out->e_perm = le16_to_cpu(in->e_perm);
-+
-+		switch (out->e_tag) {
-+		case ACL_USER_OBJ:
-+		case ACL_GROUP_OBJ:
-+		case ACL_MASK:
-+		case ACL_OTHER:
-+			p += sizeof(bch_acl_entry_short);
-+			break;
-+		case ACL_USER:
-+			out->e_uid = make_kuid(&init_user_ns,
-+					       le32_to_cpu(in->e_id));
-+			p += sizeof(bch_acl_entry);
-+			break;
-+		case ACL_GROUP:
-+			out->e_gid = make_kgid(&init_user_ns,
-+					       le32_to_cpu(in->e_id));
-+			p += sizeof(bch_acl_entry);
-+			break;
-+		}
-+
-+		out++;
-+	}
-+
-+	BUG_ON(out != acl->a_entries + acl->a_count);
-+
-+	return acl;
-+invalid:
-+	pr_err("invalid acl entry");
-+	return ERR_PTR(-EINVAL);
-+}
-+
-+#define acl_for_each_entry(acl, acl_e)			\
-+	for (acl_e = acl->a_entries;			\
-+	     acl_e < acl->a_entries + acl->a_count;	\
-+	     acl_e++)
-+
-+/*
-+ * Convert from in-memory to filesystem representation.
-+ */
-+static struct bkey_i_xattr *
-+bch2_acl_to_xattr(struct btree_trans *trans,
-+		  const struct posix_acl *acl,
-+		  int type)
-+{
-+	struct bkey_i_xattr *xattr;
-+	bch_acl_header *acl_header;
-+	const struct posix_acl_entry *acl_e;
-+	void *outptr;
-+	unsigned nr_short = 0, nr_long = 0, acl_len, u64s;
-+
-+	acl_for_each_entry(acl, acl_e) {
-+		switch (acl_e->e_tag) {
-+		case ACL_USER:
-+		case ACL_GROUP:
-+			nr_long++;
-+			break;
-+		case ACL_USER_OBJ:
-+		case ACL_GROUP_OBJ:
-+		case ACL_MASK:
-+		case ACL_OTHER:
-+			nr_short++;
-+			break;
-+		default:
-+			return ERR_PTR(-EINVAL);
-+		}
-+	}
-+
-+	acl_len = bch2_acl_size(nr_short, nr_long);
-+	u64s = BKEY_U64s + xattr_val_u64s(0, acl_len);
-+
-+	if (u64s > U8_MAX)
-+		return ERR_PTR(-E2BIG);
-+
-+	xattr = bch2_trans_kmalloc(trans, u64s * sizeof(u64));
-+	if (IS_ERR(xattr))
-+		return xattr;
-+
-+	bkey_xattr_init(&xattr->k_i);
-+	xattr->k.u64s		= u64s;
-+	xattr->v.x_type		= acl_to_xattr_type(type);
-+	xattr->v.x_name_len	= 0,
-+	xattr->v.x_val_len	= cpu_to_le16(acl_len);
-+
-+	acl_header = xattr_val(&xattr->v);
-+	acl_header->a_version = cpu_to_le32(BCH_ACL_VERSION);
-+
-+	outptr = (void *) acl_header + sizeof(*acl_header);
-+
-+	acl_for_each_entry(acl, acl_e) {
-+		bch_acl_entry *entry = outptr;
-+
-+		entry->e_tag = cpu_to_le16(acl_e->e_tag);
-+		entry->e_perm = cpu_to_le16(acl_e->e_perm);
-+		switch (acl_e->e_tag) {
-+		case ACL_USER:
-+			entry->e_id = cpu_to_le32(
-+				from_kuid(&init_user_ns, acl_e->e_uid));
-+			outptr += sizeof(bch_acl_entry);
-+			break;
-+		case ACL_GROUP:
-+			entry->e_id = cpu_to_le32(
-+				from_kgid(&init_user_ns, acl_e->e_gid));
-+			outptr += sizeof(bch_acl_entry);
-+			break;
-+
-+		case ACL_USER_OBJ:
-+		case ACL_GROUP_OBJ:
-+		case ACL_MASK:
-+		case ACL_OTHER:
-+			outptr += sizeof(bch_acl_entry_short);
-+			break;
-+		}
-+	}
-+
-+	BUG_ON(outptr != xattr_val(&xattr->v) + acl_len);
-+
-+	return xattr;
-+}
-+
-+struct posix_acl *bch2_get_acl(struct inode *vinode, int type)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(vinode);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c_xattr xattr;
-+	struct posix_acl *acl = NULL;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+retry:
-+	bch2_trans_begin(&trans);
-+
-+	iter = bch2_hash_lookup(&trans, bch2_xattr_hash_desc,
-+			&inode->ei_str_hash, inode->v.i_ino,
-+			&X_SEARCH(acl_to_xattr_type(type), "", 0),
-+			0);
-+	if (IS_ERR(iter)) {
-+		if (PTR_ERR(iter) == -EINTR)
-+			goto retry;
-+
-+		if (PTR_ERR(iter) != -ENOENT)
-+			acl = ERR_CAST(iter);
-+		goto out;
-+	}
-+
-+	xattr = bkey_s_c_to_xattr(bch2_btree_iter_peek_slot(iter));
-+
-+	acl = bch2_acl_from_disk(xattr_val(xattr.v),
-+			le16_to_cpu(xattr.v->x_val_len));
-+
-+	if (!IS_ERR(acl))
-+		set_cached_acl(&inode->v, type, acl);
-+out:
-+	bch2_trans_exit(&trans);
-+	return acl;
-+}
-+
-+int bch2_set_acl_trans(struct btree_trans *trans,
-+		       struct bch_inode_unpacked *inode_u,
-+		       const struct bch_hash_info *hash_info,
-+		       struct posix_acl *acl, int type)
-+{
-+	int ret;
-+
-+	if (type == ACL_TYPE_DEFAULT &&
-+	    !S_ISDIR(inode_u->bi_mode))
-+		return acl ? -EACCES : 0;
-+
-+	if (acl) {
-+		struct bkey_i_xattr *xattr =
-+			bch2_acl_to_xattr(trans, acl, type);
-+		if (IS_ERR(xattr))
-+			return PTR_ERR(xattr);
-+
-+		ret = bch2_hash_set(trans, bch2_xattr_hash_desc, hash_info,
-+				    inode_u->bi_inum, &xattr->k_i, 0);
-+	} else {
-+		struct xattr_search_key search =
-+			X_SEARCH(acl_to_xattr_type(type), "", 0);
-+
-+		ret = bch2_hash_delete(trans, bch2_xattr_hash_desc, hash_info,
-+				       inode_u->bi_inum, &search);
-+	}
-+
-+	return ret == -ENOENT ? 0 : ret;
-+}
-+
-+int bch2_set_acl(struct inode *vinode, struct posix_acl *_acl, int type)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(vinode);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct btree_trans trans;
-+	struct btree_iter *inode_iter;
-+	struct bch_inode_unpacked inode_u;
-+	struct posix_acl *acl;
-+	umode_t mode;
-+	int ret;
-+
-+	mutex_lock(&inode->ei_update_lock);
-+	bch2_trans_init(&trans, c, 0, 0);
-+retry:
-+	bch2_trans_begin(&trans);
-+	acl = _acl;
-+
-+	inode_iter = bch2_inode_peek(&trans, &inode_u, inode->v.i_ino,
-+				     BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(inode_iter);
-+	if (ret)
-+		goto btree_err;
-+
-+	mode = inode_u.bi_mode;
-+
-+	if (type == ACL_TYPE_ACCESS) {
-+		ret = posix_acl_update_mode(&inode->v, &mode, &acl);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	ret = bch2_set_acl_trans(&trans, &inode_u,
-+				 &inode->ei_str_hash,
-+				 acl, type);
-+	if (ret)
-+		goto btree_err;
-+
-+	inode_u.bi_ctime	= bch2_current_time(c);
-+	inode_u.bi_mode		= mode;
-+
-+	ret =   bch2_inode_write(&trans, inode_iter, &inode_u) ?:
-+		bch2_trans_commit(&trans, NULL,
-+				  &inode->ei_journal_seq,
-+				  BTREE_INSERT_NOUNLOCK);
-+btree_err:
-+	if (ret == -EINTR)
-+		goto retry;
-+	if (unlikely(ret))
-+		goto err;
-+
-+	bch2_inode_update_after_write(c, inode, &inode_u,
-+				      ATTR_CTIME|ATTR_MODE);
-+
-+	set_cached_acl(&inode->v, type, acl);
-+err:
-+	bch2_trans_exit(&trans);
-+	mutex_unlock(&inode->ei_update_lock);
-+
-+	return ret;
-+}
-+
-+int bch2_acl_chmod(struct btree_trans *trans,
-+		   struct bch_inode_info *inode,
-+		   umode_t mode,
-+		   struct posix_acl **new_acl)
-+{
-+	struct btree_iter *iter;
-+	struct bkey_s_c_xattr xattr;
-+	struct bkey_i_xattr *new;
-+	struct posix_acl *acl;
-+	int ret = 0;
-+
-+	iter = bch2_hash_lookup(trans, bch2_xattr_hash_desc,
-+			&inode->ei_str_hash, inode->v.i_ino,
-+			&X_SEARCH(KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS, "", 0),
-+			BTREE_ITER_INTENT);
-+	if (IS_ERR(iter))
-+		return PTR_ERR(iter) != -ENOENT ? PTR_ERR(iter) : 0;
-+
-+	xattr = bkey_s_c_to_xattr(bch2_btree_iter_peek_slot(iter));
-+
-+	acl = bch2_acl_from_disk(xattr_val(xattr.v),
-+			le16_to_cpu(xattr.v->x_val_len));
-+	if (IS_ERR_OR_NULL(acl))
-+		return PTR_ERR(acl);
-+
-+	ret = __posix_acl_chmod(&acl, GFP_KERNEL, mode);
-+	if (ret)
-+		goto err;
-+
-+	new = bch2_acl_to_xattr(trans, acl, ACL_TYPE_ACCESS);
-+	if (IS_ERR(new)) {
-+		ret = PTR_ERR(new);
-+		goto err;
-+	}
-+
-+	new->k.p = iter->pos;
-+	bch2_trans_update(trans, iter, &new->k_i, 0);
-+	*new_acl = acl;
-+	acl = NULL;
-+err:
-+	kfree(acl);
-+	return ret;
-+}
-+
-+#endif /* CONFIG_BCACHEFS_POSIX_ACL */
-diff --git a/fs/bcachefs/acl.h b/fs/bcachefs/acl.h
-new file mode 100644
-index 000000000000..cb62d502a7ff
---- /dev/null
-+++ b/fs/bcachefs/acl.h
-@@ -0,0 +1,59 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_ACL_H
-+#define _BCACHEFS_ACL_H
-+
-+struct bch_inode_unpacked;
-+struct bch_hash_info;
-+struct bch_inode_info;
-+struct posix_acl;
-+
-+#ifdef CONFIG_BCACHEFS_POSIX_ACL
-+
-+#define BCH_ACL_VERSION	0x0001
-+
-+typedef struct {
-+	__le16		e_tag;
-+	__le16		e_perm;
-+	__le32		e_id;
-+} bch_acl_entry;
-+
-+typedef struct {
-+	__le16		e_tag;
-+	__le16		e_perm;
-+} bch_acl_entry_short;
-+
-+typedef struct {
-+	__le32		a_version;
-+} bch_acl_header;
-+
-+struct posix_acl *bch2_get_acl(struct inode *, int);
-+
-+int bch2_set_acl_trans(struct btree_trans *,
-+		       struct bch_inode_unpacked *,
-+		       const struct bch_hash_info *,
-+		       struct posix_acl *, int);
-+int bch2_set_acl(struct inode *, struct posix_acl *, int);
-+int bch2_acl_chmod(struct btree_trans *, struct bch_inode_info *,
-+		   umode_t, struct posix_acl **);
-+
-+#else
-+
-+static inline int bch2_set_acl_trans(struct btree_trans *trans,
-+				     struct bch_inode_unpacked *inode_u,
-+				     const struct bch_hash_info *hash_info,
-+				     struct posix_acl *acl, int type)
-+{
-+	return 0;
-+}
-+
-+static inline int bch2_acl_chmod(struct btree_trans *trans,
-+				 struct bch_inode_info *inode,
-+				 umode_t mode,
-+				 struct posix_acl **new_acl)
-+{
-+	return 0;
-+}
-+
-+#endif /* CONFIG_BCACHEFS_POSIX_ACL */
-+
-+#endif /* _BCACHEFS_ACL_H */
-diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
-new file mode 100644
-index 000000000000..cb720ee04b86
---- /dev/null
-+++ b/fs/bcachefs/alloc_background.c
-@@ -0,0 +1,1434 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "alloc_background.h"
-+#include "alloc_foreground.h"
-+#include "btree_cache.h"
-+#include "btree_io.h"
-+#include "btree_key_cache.h"
-+#include "btree_update.h"
-+#include "btree_update_interior.h"
-+#include "btree_gc.h"
-+#include "buckets.h"
-+#include "clock.h"
-+#include "debug.h"
-+#include "ec.h"
-+#include "error.h"
-+#include "recovery.h"
-+
-+#include <linux/kthread.h>
-+#include <linux/math64.h>
-+#include <linux/random.h>
-+#include <linux/rculist.h>
-+#include <linux/rcupdate.h>
-+#include <linux/sched/task.h>
-+#include <linux/sort.h>
-+#include <trace/events/bcachefs.h>
-+
-+static const char * const bch2_alloc_field_names[] = {
-+#define x(name, bytes) #name,
-+	BCH_ALLOC_FIELDS()
-+#undef x
-+	NULL
-+};
-+
-+static void bch2_recalc_oldest_io(struct bch_fs *, struct bch_dev *, int);
-+
-+/* Ratelimiting/PD controllers */
-+
-+static void pd_controllers_update(struct work_struct *work)
-+{
-+	struct bch_fs *c = container_of(to_delayed_work(work),
-+					   struct bch_fs,
-+					   pd_controllers_update);
-+	struct bch_dev *ca;
-+	unsigned i;
-+
-+	for_each_member_device(ca, c, i) {
-+		struct bch_dev_usage stats = bch2_dev_usage_read(c, ca);
-+
-+		u64 free = bucket_to_sector(ca,
-+				__dev_buckets_free(ca, stats)) << 9;
-+		/*
-+		 * Bytes of internal fragmentation, which can be
-+		 * reclaimed by copy GC
-+		 */
-+		s64 fragmented = (bucket_to_sector(ca,
-+					stats.buckets[BCH_DATA_USER] +
-+					stats.buckets[BCH_DATA_CACHED]) -
-+				  (stats.sectors[BCH_DATA_USER] +
-+				   stats.sectors[BCH_DATA_CACHED])) << 9;
-+
-+		fragmented = max(0LL, fragmented);
-+
-+		bch2_pd_controller_update(&ca->copygc_pd,
-+					 free, fragmented, -1);
-+	}
-+
-+	schedule_delayed_work(&c->pd_controllers_update,
-+			      c->pd_controllers_update_seconds * HZ);
-+}
-+
-+/* Persistent alloc info: */
-+
-+static inline u64 get_alloc_field(const struct bch_alloc *a,
-+				  const void **p, unsigned field)
-+{
-+	unsigned bytes = BCH_ALLOC_FIELD_BYTES[field];
-+	u64 v;
-+
-+	if (!(a->fields & (1 << field)))
-+		return 0;
-+
-+	switch (bytes) {
-+	case 1:
-+		v = *((const u8 *) *p);
-+		break;
-+	case 2:
-+		v = le16_to_cpup(*p);
-+		break;
-+	case 4:
-+		v = le32_to_cpup(*p);
-+		break;
-+	case 8:
-+		v = le64_to_cpup(*p);
-+		break;
-+	default:
-+		BUG();
-+	}
-+
-+	*p += bytes;
-+	return v;
-+}
-+
-+static inline void put_alloc_field(struct bkey_i_alloc *a, void **p,
-+				   unsigned field, u64 v)
-+{
-+	unsigned bytes = BCH_ALLOC_FIELD_BYTES[field];
-+
-+	if (!v)
-+		return;
-+
-+	a->v.fields |= 1 << field;
-+
-+	switch (bytes) {
-+	case 1:
-+		*((u8 *) *p) = v;
-+		break;
-+	case 2:
-+		*((__le16 *) *p) = cpu_to_le16(v);
-+		break;
-+	case 4:
-+		*((__le32 *) *p) = cpu_to_le32(v);
-+		break;
-+	case 8:
-+		*((__le64 *) *p) = cpu_to_le64(v);
-+		break;
-+	default:
-+		BUG();
-+	}
-+
-+	*p += bytes;
-+}
-+
-+struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k)
-+{
-+	struct bkey_alloc_unpacked ret = { .gen = 0 };
-+
-+	if (k.k->type == KEY_TYPE_alloc) {
-+		const struct bch_alloc *a = bkey_s_c_to_alloc(k).v;
-+		const void *d = a->data;
-+		unsigned idx = 0;
-+
-+		ret.gen = a->gen;
-+
-+#define x(_name, _bits)	ret._name = get_alloc_field(a, &d, idx++);
-+		BCH_ALLOC_FIELDS()
-+#undef  x
-+	}
-+	return ret;
-+}
-+
-+void bch2_alloc_pack(struct bkey_i_alloc *dst,
-+		     const struct bkey_alloc_unpacked src)
-+{
-+	unsigned idx = 0;
-+	void *d = dst->v.data;
-+	unsigned bytes;
-+
-+	dst->v.fields	= 0;
-+	dst->v.gen	= src.gen;
-+
-+#define x(_name, _bits)	put_alloc_field(dst, &d, idx++, src._name);
-+	BCH_ALLOC_FIELDS()
-+#undef  x
-+
-+	bytes = (void *) d - (void *) &dst->v;
-+	set_bkey_val_bytes(&dst->k, bytes);
-+	memset_u64s_tail(&dst->v, 0, bytes);
-+}
-+
-+static unsigned bch_alloc_val_u64s(const struct bch_alloc *a)
-+{
-+	unsigned i, bytes = offsetof(struct bch_alloc, data);
-+
-+	for (i = 0; i < ARRAY_SIZE(BCH_ALLOC_FIELD_BYTES); i++)
-+		if (a->fields & (1 << i))
-+			bytes += BCH_ALLOC_FIELD_BYTES[i];
-+
-+	return DIV_ROUND_UP(bytes, sizeof(u64));
-+}
-+
-+const char *bch2_alloc_invalid(const struct bch_fs *c, struct bkey_s_c k)
-+{
-+	struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k);
-+
-+	if (k.k->p.inode >= c->sb.nr_devices ||
-+	    !c->devs[k.k->p.inode])
-+		return "invalid device";
-+
-+	/* allow for unknown fields */
-+	if (bkey_val_u64s(a.k) < bch_alloc_val_u64s(a.v))
-+		return "incorrect value size";
-+
-+	return NULL;
-+}
-+
-+void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c,
-+			struct bkey_s_c k)
-+{
-+	struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k);
-+	const void *d = a.v->data;
-+	unsigned i;
-+
-+	pr_buf(out, "gen %u", a.v->gen);
-+
-+	for (i = 0; i < BCH_ALLOC_FIELD_NR; i++)
-+		if (a.v->fields & (1 << i))
-+			pr_buf(out, " %s %llu",
-+			       bch2_alloc_field_names[i],
-+			       get_alloc_field(a.v, &d, i));
-+}
-+
-+static int bch2_alloc_read_fn(struct bch_fs *c, enum btree_id id,
-+			      unsigned level, struct bkey_s_c k)
-+{
-+	if (!level)
-+		bch2_mark_key(c, k, 0, 0, NULL, 0,
-+			      BTREE_TRIGGER_ALLOC_READ|
-+			      BTREE_TRIGGER_NOATOMIC);
-+
-+	return 0;
-+}
-+
-+int bch2_alloc_read(struct bch_fs *c, struct journal_keys *journal_keys)
-+{
-+	struct bch_dev *ca;
-+	unsigned i;
-+	int ret = 0;
-+
-+	ret = bch2_btree_and_journal_walk(c, journal_keys, BTREE_ID_ALLOC,
-+					  NULL, bch2_alloc_read_fn);
-+	if (ret) {
-+		bch_err(c, "error reading alloc info: %i", ret);
-+		return ret;
-+	}
-+
-+	percpu_down_write(&c->mark_lock);
-+	bch2_dev_usage_from_buckets(c);
-+	percpu_up_write(&c->mark_lock);
-+
-+	mutex_lock(&c->bucket_clock[READ].lock);
-+	for_each_member_device(ca, c, i) {
-+		down_read(&ca->bucket_lock);
-+		bch2_recalc_oldest_io(c, ca, READ);
-+		up_read(&ca->bucket_lock);
-+	}
-+	mutex_unlock(&c->bucket_clock[READ].lock);
-+
-+	mutex_lock(&c->bucket_clock[WRITE].lock);
-+	for_each_member_device(ca, c, i) {
-+		down_read(&ca->bucket_lock);
-+		bch2_recalc_oldest_io(c, ca, WRITE);
-+		up_read(&ca->bucket_lock);
-+	}
-+	mutex_unlock(&c->bucket_clock[WRITE].lock);
-+
-+	return 0;
-+}
-+
-+enum alloc_write_ret {
-+	ALLOC_WROTE,
-+	ALLOC_NOWROTE,
-+	ALLOC_END,
-+};
-+
-+static int bch2_alloc_write_key(struct btree_trans *trans,
-+				struct btree_iter *iter,
-+				unsigned flags)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct bkey_s_c k;
-+	struct bch_dev *ca;
-+	struct bucket_array *ba;
-+	struct bucket *g;
-+	struct bucket_mark m;
-+	struct bkey_alloc_unpacked old_u, new_u;
-+	__BKEY_PADDED(k, 8) alloc_key; /* hack: */
-+	struct bkey_i_alloc *a;
-+	int ret;
-+retry:
-+	bch2_trans_begin(trans);
-+
-+	ret = bch2_btree_key_cache_flush(trans,
-+			BTREE_ID_ALLOC, iter->pos);
-+	if (ret)
-+		goto err;
-+
-+	k = bch2_btree_iter_peek_slot(iter);
-+	ret = bkey_err(k);
-+	if (ret)
-+		goto err;
-+
-+	old_u = bch2_alloc_unpack(k);
-+
-+	if (iter->pos.inode >= c->sb.nr_devices ||
-+	    !c->devs[iter->pos.inode])
-+		return ALLOC_END;
-+
-+	percpu_down_read(&c->mark_lock);
-+	ca	= bch_dev_bkey_exists(c, iter->pos.inode);
-+	ba	= bucket_array(ca);
-+
-+	if (iter->pos.offset >= ba->nbuckets) {
-+		percpu_up_read(&c->mark_lock);
-+		return ALLOC_END;
-+	}
-+
-+	g	= &ba->b[iter->pos.offset];
-+	m	= READ_ONCE(g->mark);
-+	new_u	= alloc_mem_to_key(g, m);
-+	percpu_up_read(&c->mark_lock);
-+
-+	if (!bkey_alloc_unpacked_cmp(old_u, new_u))
-+		return ALLOC_NOWROTE;
-+
-+	a = bkey_alloc_init(&alloc_key.k);
-+	a->k.p = iter->pos;
-+	bch2_alloc_pack(a, new_u);
-+
-+	bch2_trans_update(trans, iter, &a->k_i,
-+			  BTREE_TRIGGER_NORUN);
-+	ret = bch2_trans_commit(trans, NULL, NULL,
-+				BTREE_INSERT_NOFAIL|
-+				BTREE_INSERT_USE_RESERVE|
-+				flags);
-+err:
-+	if (ret == -EINTR)
-+		goto retry;
-+	return ret;
-+}
-+
-+int bch2_alloc_write(struct bch_fs *c, unsigned flags, bool *wrote)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bch_dev *ca;
-+	unsigned i;
-+	int ret = 0;
-+
-+	BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8);
-+
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC, POS_MIN,
-+				   BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
-+
-+	for_each_rw_member(ca, c, i) {
-+		unsigned first_bucket;
-+
-+		percpu_down_read(&c->mark_lock);
-+		first_bucket = bucket_array(ca)->first_bucket;
-+		percpu_up_read(&c->mark_lock);
-+
-+		bch2_btree_iter_set_pos(iter, POS(i, first_bucket));
-+
-+		while (1) {
-+			ret = bch2_alloc_write_key(&trans, iter, flags);
-+			if (ret < 0 || ret == ALLOC_END)
-+				break;
-+			if (ret == ALLOC_WROTE)
-+				*wrote = true;
-+			bch2_btree_iter_next_slot(iter);
-+		}
-+
-+		if (ret < 0) {
-+			percpu_ref_put(&ca->io_ref);
-+			break;
-+		}
-+	}
-+
-+	bch2_trans_exit(&trans);
-+
-+	return ret < 0 ? ret : 0;
-+}
-+
-+/* Bucket IO clocks: */
-+
-+static void bch2_recalc_oldest_io(struct bch_fs *c, struct bch_dev *ca, int rw)
-+{
-+	struct bucket_clock *clock = &c->bucket_clock[rw];
-+	struct bucket_array *buckets = bucket_array(ca);
-+	struct bucket *g;
-+	u16 max_last_io = 0;
-+	unsigned i;
-+
-+	lockdep_assert_held(&c->bucket_clock[rw].lock);
-+
-+	/* Recalculate max_last_io for this device: */
-+	for_each_bucket(g, buckets)
-+		max_last_io = max(max_last_io, bucket_last_io(c, g, rw));
-+
-+	ca->max_last_bucket_io[rw] = max_last_io;
-+
-+	/* Recalculate global max_last_io: */
-+	max_last_io = 0;
-+
-+	for_each_member_device(ca, c, i)
-+		max_last_io = max(max_last_io, ca->max_last_bucket_io[rw]);
-+
-+	clock->max_last_io = max_last_io;
-+}
-+
-+static void bch2_rescale_bucket_io_times(struct bch_fs *c, int rw)
-+{
-+	struct bucket_clock *clock = &c->bucket_clock[rw];
-+	struct bucket_array *buckets;
-+	struct bch_dev *ca;
-+	struct bucket *g;
-+	unsigned i;
-+
-+	trace_rescale_prios(c);
-+
-+	for_each_member_device(ca, c, i) {
-+		down_read(&ca->bucket_lock);
-+		buckets = bucket_array(ca);
-+
-+		for_each_bucket(g, buckets)
-+			g->io_time[rw] = clock->hand -
-+			bucket_last_io(c, g, rw) / 2;
-+
-+		bch2_recalc_oldest_io(c, ca, rw);
-+
-+		up_read(&ca->bucket_lock);
-+	}
-+}
-+
-+static inline u64 bucket_clock_freq(u64 capacity)
-+{
-+	return max(capacity >> 10, 2028ULL);
-+}
-+
-+static void bch2_inc_clock_hand(struct io_timer *timer)
-+{
-+	struct bucket_clock *clock = container_of(timer,
-+						struct bucket_clock, rescale);
-+	struct bch_fs *c = container_of(clock,
-+					struct bch_fs, bucket_clock[clock->rw]);
-+	struct bch_dev *ca;
-+	u64 capacity;
-+	unsigned i;
-+
-+	mutex_lock(&clock->lock);
-+
-+	/* if clock cannot be advanced more, rescale prio */
-+	if (clock->max_last_io >= U16_MAX - 2)
-+		bch2_rescale_bucket_io_times(c, clock->rw);
-+
-+	BUG_ON(clock->max_last_io >= U16_MAX - 2);
-+
-+	for_each_member_device(ca, c, i)
-+		ca->max_last_bucket_io[clock->rw]++;
-+	clock->max_last_io++;
-+	clock->hand++;
-+
-+	mutex_unlock(&clock->lock);
-+
-+	capacity = READ_ONCE(c->capacity);
-+
-+	if (!capacity)
-+		return;
-+
-+	/*
-+	 * we only increment when 0.1% of the filesystem capacity has been read
-+	 * or written too, this determines if it's time
-+	 *
-+	 * XXX: we shouldn't really be going off of the capacity of devices in
-+	 * RW mode (that will be 0 when we're RO, yet we can still service
-+	 * reads)
-+	 */
-+	timer->expire += bucket_clock_freq(capacity);
-+
-+	bch2_io_timer_add(&c->io_clock[clock->rw], timer);
-+}
-+
-+static void bch2_bucket_clock_init(struct bch_fs *c, int rw)
-+{
-+	struct bucket_clock *clock = &c->bucket_clock[rw];
-+
-+	clock->hand		= 1;
-+	clock->rw		= rw;
-+	clock->rescale.fn	= bch2_inc_clock_hand;
-+	clock->rescale.expire	= bucket_clock_freq(c->capacity);
-+	mutex_init(&clock->lock);
-+}
-+
-+/* Background allocator thread: */
-+
-+/*
-+ * Scans for buckets to be invalidated, invalidates them, rewrites prios/gens
-+ * (marking them as invalidated on disk), then optionally issues discard
-+ * commands to the newly free buckets, then puts them on the various freelists.
-+ */
-+
-+#define BUCKET_GC_GEN_MAX	96U
-+
-+/**
-+ * wait_buckets_available - wait on reclaimable buckets
-+ *
-+ * If there aren't enough available buckets to fill up free_inc, wait until
-+ * there are.
-+ */
-+static int wait_buckets_available(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	unsigned long gc_count = c->gc_count;
-+	u64 available;
-+	int ret = 0;
-+
-+	ca->allocator_state = ALLOCATOR_BLOCKED;
-+	closure_wake_up(&c->freelist_wait);
-+
-+	while (1) {
-+		set_current_state(TASK_INTERRUPTIBLE);
-+		if (kthread_should_stop()) {
-+			ret = 1;
-+			break;
-+		}
-+
-+		if (gc_count != c->gc_count)
-+			ca->inc_gen_really_needs_gc = 0;
-+
-+		available = max_t(s64, 0, dev_buckets_available(c, ca) -
-+				  ca->inc_gen_really_needs_gc);
-+
-+		if (available > fifo_free(&ca->free_inc) ||
-+		    (available && !fifo_full(&ca->free[RESERVE_BTREE])))
-+			break;
-+
-+		up_read(&c->gc_lock);
-+		schedule();
-+		try_to_freeze();
-+		down_read(&c->gc_lock);
-+	}
-+
-+	__set_current_state(TASK_RUNNING);
-+	ca->allocator_state = ALLOCATOR_RUNNING;
-+	closure_wake_up(&c->freelist_wait);
-+
-+	return ret;
-+}
-+
-+static bool bch2_can_invalidate_bucket(struct bch_dev *ca,
-+				       size_t bucket,
-+				       struct bucket_mark mark)
-+{
-+	u8 gc_gen;
-+
-+	if (!is_available_bucket(mark))
-+		return false;
-+
-+	if (ca->buckets_nouse &&
-+	    test_bit(bucket, ca->buckets_nouse))
-+		return false;
-+
-+	gc_gen = bucket_gc_gen(ca, bucket);
-+
-+	if (gc_gen >= BUCKET_GC_GEN_MAX / 2)
-+		ca->inc_gen_needs_gc++;
-+
-+	if (gc_gen >= BUCKET_GC_GEN_MAX)
-+		ca->inc_gen_really_needs_gc++;
-+
-+	return gc_gen < BUCKET_GC_GEN_MAX;
-+}
-+
-+/*
-+ * Determines what order we're going to reuse buckets, smallest bucket_key()
-+ * first.
-+ *
-+ *
-+ * - We take into account the read prio of the bucket, which gives us an
-+ *   indication of how hot the data is -- we scale the prio so that the prio
-+ *   farthest from the clock is worth 1/8th of the closest.
-+ *
-+ * - The number of sectors of cached data in the bucket, which gives us an
-+ *   indication of the cost in cache misses this eviction will cause.
-+ *
-+ * - If hotness * sectors used compares equal, we pick the bucket with the
-+ *   smallest bucket_gc_gen() - since incrementing the same bucket's generation
-+ *   number repeatedly forces us to run mark and sweep gc to avoid generation
-+ *   number wraparound.
-+ */
-+
-+static unsigned long bucket_sort_key(struct bch_fs *c, struct bch_dev *ca,
-+				     size_t b, struct bucket_mark m)
-+{
-+	unsigned last_io = bucket_last_io(c, bucket(ca, b), READ);
-+	unsigned max_last_io = ca->max_last_bucket_io[READ];
-+
-+	/*
-+	 * Time since last read, scaled to [0, 8) where larger value indicates
-+	 * more recently read data:
-+	 */
-+	unsigned long hotness = (max_last_io - last_io) * 7 / max_last_io;
-+
-+	/* How much we want to keep the data in this bucket: */
-+	unsigned long data_wantness =
-+		(hotness + 1) * bucket_sectors_used(m);
-+
-+	unsigned long needs_journal_commit =
-+		bucket_needs_journal_commit(m, c->journal.last_seq_ondisk);
-+
-+	return  (data_wantness << 9) |
-+		(needs_journal_commit << 8) |
-+		(bucket_gc_gen(ca, b) / 16);
-+}
-+
-+static inline int bucket_alloc_cmp(alloc_heap *h,
-+				   struct alloc_heap_entry l,
-+				   struct alloc_heap_entry r)
-+{
-+	return  cmp_int(l.key, r.key) ?:
-+		cmp_int(r.nr, l.nr) ?:
-+		cmp_int(l.bucket, r.bucket);
-+}
-+
-+static inline int bucket_idx_cmp(const void *_l, const void *_r)
-+{
-+	const struct alloc_heap_entry *l = _l, *r = _r;
-+
-+	return cmp_int(l->bucket, r->bucket);
-+}
-+
-+static void find_reclaimable_buckets_lru(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	struct bucket_array *buckets;
-+	struct alloc_heap_entry e = { 0 };
-+	size_t b, i, nr = 0;
-+
-+	ca->alloc_heap.used = 0;
-+
-+	mutex_lock(&c->bucket_clock[READ].lock);
-+	down_read(&ca->bucket_lock);
-+
-+	buckets = bucket_array(ca);
-+
-+	bch2_recalc_oldest_io(c, ca, READ);
-+
-+	/*
-+	 * Find buckets with lowest read priority, by building a maxheap sorted
-+	 * by read priority and repeatedly replacing the maximum element until
-+	 * all buckets have been visited.
-+	 */
-+	for (b = ca->mi.first_bucket; b < ca->mi.nbuckets; b++) {
-+		struct bucket_mark m = READ_ONCE(buckets->b[b].mark);
-+		unsigned long key = bucket_sort_key(c, ca, b, m);
-+
-+		if (!bch2_can_invalidate_bucket(ca, b, m))
-+			continue;
-+
-+		if (e.nr && e.bucket + e.nr == b && e.key == key) {
-+			e.nr++;
-+		} else {
-+			if (e.nr)
-+				heap_add_or_replace(&ca->alloc_heap, e,
-+					-bucket_alloc_cmp, NULL);
-+
-+			e = (struct alloc_heap_entry) {
-+				.bucket = b,
-+				.nr	= 1,
-+				.key	= key,
-+			};
-+		}
-+
-+		cond_resched();
-+	}
-+
-+	if (e.nr)
-+		heap_add_or_replace(&ca->alloc_heap, e,
-+				-bucket_alloc_cmp, NULL);
-+
-+	for (i = 0; i < ca->alloc_heap.used; i++)
-+		nr += ca->alloc_heap.data[i].nr;
-+
-+	while (nr - ca->alloc_heap.data[0].nr >= ALLOC_SCAN_BATCH(ca)) {
-+		nr -= ca->alloc_heap.data[0].nr;
-+		heap_pop(&ca->alloc_heap, e, -bucket_alloc_cmp, NULL);
-+	}
-+
-+	up_read(&ca->bucket_lock);
-+	mutex_unlock(&c->bucket_clock[READ].lock);
-+}
-+
-+static void find_reclaimable_buckets_fifo(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	struct bucket_array *buckets = bucket_array(ca);
-+	struct bucket_mark m;
-+	size_t b, start;
-+
-+	if (ca->fifo_last_bucket <  ca->mi.first_bucket ||
-+	    ca->fifo_last_bucket >= ca->mi.nbuckets)
-+		ca->fifo_last_bucket = ca->mi.first_bucket;
-+
-+	start = ca->fifo_last_bucket;
-+
-+	do {
-+		ca->fifo_last_bucket++;
-+		if (ca->fifo_last_bucket == ca->mi.nbuckets)
-+			ca->fifo_last_bucket = ca->mi.first_bucket;
-+
-+		b = ca->fifo_last_bucket;
-+		m = READ_ONCE(buckets->b[b].mark);
-+
-+		if (bch2_can_invalidate_bucket(ca, b, m)) {
-+			struct alloc_heap_entry e = { .bucket = b, .nr = 1, };
-+
-+			heap_add(&ca->alloc_heap, e, bucket_alloc_cmp, NULL);
-+			if (heap_full(&ca->alloc_heap))
-+				break;
-+		}
-+
-+		cond_resched();
-+	} while (ca->fifo_last_bucket != start);
-+}
-+
-+static void find_reclaimable_buckets_random(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	struct bucket_array *buckets = bucket_array(ca);
-+	struct bucket_mark m;
-+	size_t checked, i;
-+
-+	for (checked = 0;
-+	     checked < ca->mi.nbuckets / 2;
-+	     checked++) {
-+		size_t b = bch2_rand_range(ca->mi.nbuckets -
-+					   ca->mi.first_bucket) +
-+			ca->mi.first_bucket;
-+
-+		m = READ_ONCE(buckets->b[b].mark);
-+
-+		if (bch2_can_invalidate_bucket(ca, b, m)) {
-+			struct alloc_heap_entry e = { .bucket = b, .nr = 1, };
-+
-+			heap_add(&ca->alloc_heap, e, bucket_alloc_cmp, NULL);
-+			if (heap_full(&ca->alloc_heap))
-+				break;
-+		}
-+
-+		cond_resched();
-+	}
-+
-+	sort(ca->alloc_heap.data,
-+	     ca->alloc_heap.used,
-+	     sizeof(ca->alloc_heap.data[0]),
-+	     bucket_idx_cmp, NULL);
-+
-+	/* remove duplicates: */
-+	for (i = 0; i + 1 < ca->alloc_heap.used; i++)
-+		if (ca->alloc_heap.data[i].bucket ==
-+		    ca->alloc_heap.data[i + 1].bucket)
-+			ca->alloc_heap.data[i].nr = 0;
-+}
-+
-+static size_t find_reclaimable_buckets(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	size_t i, nr = 0;
-+
-+	ca->inc_gen_needs_gc			= 0;
-+
-+	switch (ca->mi.replacement) {
-+	case CACHE_REPLACEMENT_LRU:
-+		find_reclaimable_buckets_lru(c, ca);
-+		break;
-+	case CACHE_REPLACEMENT_FIFO:
-+		find_reclaimable_buckets_fifo(c, ca);
-+		break;
-+	case CACHE_REPLACEMENT_RANDOM:
-+		find_reclaimable_buckets_random(c, ca);
-+		break;
-+	}
-+
-+	heap_resort(&ca->alloc_heap, bucket_alloc_cmp, NULL);
-+
-+	for (i = 0; i < ca->alloc_heap.used; i++)
-+		nr += ca->alloc_heap.data[i].nr;
-+
-+	return nr;
-+}
-+
-+static inline long next_alloc_bucket(struct bch_dev *ca)
-+{
-+	struct alloc_heap_entry e, *top = ca->alloc_heap.data;
-+
-+	while (ca->alloc_heap.used) {
-+		if (top->nr) {
-+			size_t b = top->bucket;
-+
-+			top->bucket++;
-+			top->nr--;
-+			return b;
-+		}
-+
-+		heap_pop(&ca->alloc_heap, e, bucket_alloc_cmp, NULL);
-+	}
-+
-+	return -1;
-+}
-+
-+/*
-+ * returns sequence number of most recent journal entry that updated this
-+ * bucket:
-+ */
-+static u64 bucket_journal_seq(struct bch_fs *c, struct bucket_mark m)
-+{
-+	if (m.journal_seq_valid) {
-+		u64 journal_seq = atomic64_read(&c->journal.seq);
-+		u64 bucket_seq	= journal_seq;
-+
-+		bucket_seq &= ~((u64) U16_MAX);
-+		bucket_seq |= m.journal_seq;
-+
-+		if (bucket_seq > journal_seq)
-+			bucket_seq -= 1 << 16;
-+
-+		return bucket_seq;
-+	} else {
-+		return 0;
-+	}
-+}
-+
-+static int bch2_invalidate_one_bucket2(struct btree_trans *trans,
-+				       struct bch_dev *ca,
-+				       struct btree_iter *iter,
-+				       u64 *journal_seq, unsigned flags)
-+{
-+#if 0
-+	__BKEY_PADDED(k, BKEY_ALLOC_VAL_U64s_MAX) alloc_key;
-+#else
-+	/* hack: */
-+	__BKEY_PADDED(k, 8) alloc_key;
-+#endif
-+	struct bch_fs *c = trans->c;
-+	struct bkey_i_alloc *a;
-+	struct bkey_alloc_unpacked u;
-+	struct bucket *g;
-+	struct bucket_mark m;
-+	bool invalidating_cached_data;
-+	size_t b;
-+	int ret = 0;
-+
-+	BUG_ON(!ca->alloc_heap.used ||
-+	       !ca->alloc_heap.data[0].nr);
-+	b = ca->alloc_heap.data[0].bucket;
-+
-+	/* first, put on free_inc and mark as owned by allocator: */
-+	percpu_down_read(&c->mark_lock);
-+	spin_lock(&c->freelist_lock);
-+
-+	verify_not_on_freelist(c, ca, b);
-+
-+	BUG_ON(!fifo_push(&ca->free_inc, b));
-+
-+	g = bucket(ca, b);
-+	m = READ_ONCE(g->mark);
-+
-+	invalidating_cached_data = m.cached_sectors != 0;
-+
-+	/*
-+	 * If we're not invalidating cached data, we only increment the bucket
-+	 * gen in memory here, the incremented gen will be updated in the btree
-+	 * by bch2_trans_mark_pointer():
-+	 */
-+
-+	if (!invalidating_cached_data)
-+		bch2_invalidate_bucket(c, ca, b, &m);
-+	else
-+		bch2_mark_alloc_bucket(c, ca, b, true, gc_pos_alloc(c, NULL), 0);
-+
-+	spin_unlock(&c->freelist_lock);
-+	percpu_up_read(&c->mark_lock);
-+
-+	if (!invalidating_cached_data)
-+		goto out;
-+
-+	/*
-+	 * If the read-only path is trying to shut down, we can't be generating
-+	 * new btree updates:
-+	 */
-+	if (test_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags)) {
-+		ret = 1;
-+		goto out;
-+	}
-+
-+	BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8);
-+
-+	bch2_btree_iter_set_pos(iter, POS(ca->dev_idx, b));
-+retry:
-+	ret = bch2_btree_iter_traverse(iter);
-+	if (ret)
-+		return ret;
-+
-+	percpu_down_read(&c->mark_lock);
-+	g = bucket(ca, iter->pos.offset);
-+	m = READ_ONCE(g->mark);
-+	u = alloc_mem_to_key(g, m);
-+
-+	percpu_up_read(&c->mark_lock);
-+
-+	invalidating_cached_data = u.cached_sectors != 0;
-+
-+	u.gen++;
-+	u.data_type	= 0;
-+	u.dirty_sectors	= 0;
-+	u.cached_sectors = 0;
-+	u.read_time	= c->bucket_clock[READ].hand;
-+	u.write_time	= c->bucket_clock[WRITE].hand;
-+
-+	a = bkey_alloc_init(&alloc_key.k);
-+	a->k.p = iter->pos;
-+	bch2_alloc_pack(a, u);
-+
-+	bch2_trans_update(trans, iter, &a->k_i,
-+			  BTREE_TRIGGER_BUCKET_INVALIDATE);
-+
-+	/*
-+	 * XXX:
-+	 * when using deferred btree updates, we have journal reclaim doing
-+	 * btree updates and thus requiring the allocator to make forward
-+	 * progress, and here the allocator is requiring space in the journal -
-+	 * so we need a journal pre-reservation:
-+	 */
-+	ret = bch2_trans_commit(trans, NULL,
-+				invalidating_cached_data ? journal_seq : NULL,
-+				BTREE_INSERT_NOUNLOCK|
-+				BTREE_INSERT_NOCHECK_RW|
-+				BTREE_INSERT_NOFAIL|
-+				BTREE_INSERT_USE_RESERVE|
-+				BTREE_INSERT_USE_ALLOC_RESERVE|
-+				flags);
-+	if (ret == -EINTR)
-+		goto retry;
-+out:
-+	if (!ret) {
-+		/* remove from alloc_heap: */
-+		struct alloc_heap_entry e, *top = ca->alloc_heap.data;
-+
-+		top->bucket++;
-+		top->nr--;
-+
-+		if (!top->nr)
-+			heap_pop(&ca->alloc_heap, e, bucket_alloc_cmp, NULL);
-+
-+		/*
-+		 * Make sure we flush the last journal entry that updated this
-+		 * bucket (i.e. deleting the last reference) before writing to
-+		 * this bucket again:
-+		 */
-+		*journal_seq = max(*journal_seq, bucket_journal_seq(c, m));
-+	} else {
-+		size_t b2;
-+
-+		/* remove from free_inc: */
-+		percpu_down_read(&c->mark_lock);
-+		spin_lock(&c->freelist_lock);
-+
-+		bch2_mark_alloc_bucket(c, ca, b, false,
-+				       gc_pos_alloc(c, NULL), 0);
-+
-+		BUG_ON(!fifo_pop_back(&ca->free_inc, b2));
-+		BUG_ON(b != b2);
-+
-+		spin_unlock(&c->freelist_lock);
-+		percpu_up_read(&c->mark_lock);
-+	}
-+
-+	return ret < 0 ? ret : 0;
-+}
-+
-+/*
-+ * Pull buckets off ca->alloc_heap, invalidate them, move them to ca->free_inc:
-+ */
-+static int bch2_invalidate_buckets(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	u64 journal_seq = 0;
-+	int ret = 0;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC,
-+				   POS(ca->dev_idx, 0),
-+				   BTREE_ITER_CACHED|
-+				   BTREE_ITER_CACHED_NOFILL|
-+				   BTREE_ITER_INTENT);
-+
-+	/* Only use nowait if we've already invalidated at least one bucket: */
-+	while (!ret &&
-+	       !fifo_full(&ca->free_inc) &&
-+	       ca->alloc_heap.used)
-+		ret = bch2_invalidate_one_bucket2(&trans, ca, iter, &journal_seq,
-+				BTREE_INSERT_GC_LOCK_HELD|
-+				(!fifo_empty(&ca->free_inc)
-+				 ? BTREE_INSERT_NOWAIT : 0));
-+
-+	bch2_trans_exit(&trans);
-+
-+	/* If we used NOWAIT, don't return the error: */
-+	if (!fifo_empty(&ca->free_inc))
-+		ret = 0;
-+	if (ret) {
-+		bch_err(ca, "error invalidating buckets: %i", ret);
-+		return ret;
-+	}
-+
-+	if (journal_seq)
-+		ret = bch2_journal_flush_seq(&c->journal, journal_seq);
-+	if (ret) {
-+		bch_err(ca, "journal error: %i", ret);
-+		return ret;
-+	}
-+
-+	return 0;
-+}
-+
-+static int push_invalidated_bucket(struct bch_fs *c, struct bch_dev *ca, size_t bucket)
-+{
-+	unsigned i;
-+	int ret = 0;
-+
-+	while (1) {
-+		set_current_state(TASK_INTERRUPTIBLE);
-+
-+		spin_lock(&c->freelist_lock);
-+		for (i = 0; i < RESERVE_NR; i++) {
-+
-+			/*
-+			 * Don't strand buckets on the copygc freelist until
-+			 * after recovery is finished:
-+			 */
-+			if (!test_bit(BCH_FS_STARTED, &c->flags) &&
-+			    i == RESERVE_MOVINGGC)
-+				continue;
-+
-+			if (fifo_push(&ca->free[i], bucket)) {
-+				fifo_pop(&ca->free_inc, bucket);
-+
-+				closure_wake_up(&c->freelist_wait);
-+				ca->allocator_state = ALLOCATOR_RUNNING;
-+
-+				spin_unlock(&c->freelist_lock);
-+				goto out;
-+			}
-+		}
-+
-+		if (ca->allocator_state != ALLOCATOR_BLOCKED_FULL) {
-+			ca->allocator_state = ALLOCATOR_BLOCKED_FULL;
-+			closure_wake_up(&c->freelist_wait);
-+		}
-+
-+		spin_unlock(&c->freelist_lock);
-+
-+		if ((current->flags & PF_KTHREAD) &&
-+		    kthread_should_stop()) {
-+			ret = 1;
-+			break;
-+		}
-+
-+		schedule();
-+		try_to_freeze();
-+	}
-+out:
-+	__set_current_state(TASK_RUNNING);
-+	return ret;
-+}
-+
-+/*
-+ * Pulls buckets off free_inc, discards them (if enabled), then adds them to
-+ * freelists, waiting until there's room if necessary:
-+ */
-+static int discard_invalidated_buckets(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	while (!fifo_empty(&ca->free_inc)) {
-+		size_t bucket = fifo_peek(&ca->free_inc);
-+
-+		if (ca->mi.discard &&
-+		    blk_queue_discard(bdev_get_queue(ca->disk_sb.bdev)))
-+			blkdev_issue_discard(ca->disk_sb.bdev,
-+					     bucket_to_sector(ca, bucket),
-+					     ca->mi.bucket_size, GFP_NOIO, 0);
-+
-+		if (push_invalidated_bucket(c, ca, bucket))
-+			return 1;
-+	}
-+
-+	return 0;
-+}
-+
-+/**
-+ * bch_allocator_thread - move buckets from free_inc to reserves
-+ *
-+ * The free_inc FIFO is populated by find_reclaimable_buckets(), and
-+ * the reserves are depleted by bucket allocation. When we run out
-+ * of free_inc, try to invalidate some buckets and write out
-+ * prios and gens.
-+ */
-+static int bch2_allocator_thread(void *arg)
-+{
-+	struct bch_dev *ca = arg;
-+	struct bch_fs *c = ca->fs;
-+	size_t nr;
-+	int ret;
-+
-+	set_freezable();
-+	ca->allocator_state = ALLOCATOR_RUNNING;
-+
-+	while (1) {
-+		cond_resched();
-+		if (kthread_should_stop())
-+			break;
-+
-+		pr_debug("discarding %zu invalidated buckets",
-+			 fifo_used(&ca->free_inc));
-+
-+		ret = discard_invalidated_buckets(c, ca);
-+		if (ret)
-+			goto stop;
-+
-+		down_read(&c->gc_lock);
-+
-+		ret = bch2_invalidate_buckets(c, ca);
-+		if (ret) {
-+			up_read(&c->gc_lock);
-+			goto stop;
-+		}
-+
-+		if (!fifo_empty(&ca->free_inc)) {
-+			up_read(&c->gc_lock);
-+			continue;
-+		}
-+
-+		pr_debug("free_inc now empty");
-+
-+		do {
-+			/*
-+			 * Find some buckets that we can invalidate, either
-+			 * they're completely unused, or only contain clean data
-+			 * that's been written back to the backing device or
-+			 * another cache tier
-+			 */
-+
-+			pr_debug("scanning for reclaimable buckets");
-+
-+			nr = find_reclaimable_buckets(c, ca);
-+
-+			pr_debug("found %zu buckets", nr);
-+
-+			trace_alloc_batch(ca, nr, ca->alloc_heap.size);
-+
-+			if ((ca->inc_gen_needs_gc >= ALLOC_SCAN_BATCH(ca) ||
-+			     ca->inc_gen_really_needs_gc) &&
-+			    c->gc_thread) {
-+				atomic_inc(&c->kick_gc);
-+				wake_up_process(c->gc_thread);
-+			}
-+
-+			/*
-+			 * If we found any buckets, we have to invalidate them
-+			 * before we scan for more - but if we didn't find very
-+			 * many we may want to wait on more buckets being
-+			 * available so we don't spin:
-+			 */
-+			if (!nr ||
-+			    (nr < ALLOC_SCAN_BATCH(ca) &&
-+			     !fifo_empty(&ca->free[RESERVE_NONE]))) {
-+				ret = wait_buckets_available(c, ca);
-+				if (ret) {
-+					up_read(&c->gc_lock);
-+					goto stop;
-+				}
-+			}
-+		} while (!nr);
-+
-+		up_read(&c->gc_lock);
-+
-+		pr_debug("%zu buckets to invalidate", nr);
-+
-+		/*
-+		 * alloc_heap is now full of newly-invalidated buckets: next,
-+		 * write out the new bucket gens:
-+		 */
-+	}
-+
-+stop:
-+	pr_debug("alloc thread stopping (ret %i)", ret);
-+	ca->allocator_state = ALLOCATOR_STOPPED;
-+	closure_wake_up(&c->freelist_wait);
-+	return 0;
-+}
-+
-+/* Startup/shutdown (ro/rw): */
-+
-+void bch2_recalc_capacity(struct bch_fs *c)
-+{
-+	struct bch_dev *ca;
-+	u64 capacity = 0, reserved_sectors = 0, gc_reserve;
-+	unsigned bucket_size_max = 0;
-+	unsigned long ra_pages = 0;
-+	unsigned i, j;
-+
-+	lockdep_assert_held(&c->state_lock);
-+
-+	for_each_online_member(ca, c, i) {
-+		struct backing_dev_info *bdi = ca->disk_sb.bdev->bd_bdi;
-+
-+		ra_pages += bdi->ra_pages;
-+	}
-+
-+	bch2_set_ra_pages(c, ra_pages);
-+
-+	for_each_rw_member(ca, c, i) {
-+		u64 dev_reserve = 0;
-+
-+		/*
-+		 * We need to reserve buckets (from the number
-+		 * of currently available buckets) against
-+		 * foreground writes so that mainly copygc can
-+		 * make forward progress.
-+		 *
-+		 * We need enough to refill the various reserves
-+		 * from scratch - copygc will use its entire
-+		 * reserve all at once, then run against when
-+		 * its reserve is refilled (from the formerly
-+		 * available buckets).
-+		 *
-+		 * This reserve is just used when considering if
-+		 * allocations for foreground writes must wait -
-+		 * not -ENOSPC calculations.
-+		 */
-+		for (j = 0; j < RESERVE_NONE; j++)
-+			dev_reserve += ca->free[j].size;
-+
-+		dev_reserve += 1;	/* btree write point */
-+		dev_reserve += 1;	/* copygc write point */
-+		dev_reserve += 1;	/* rebalance write point */
-+
-+		dev_reserve *= ca->mi.bucket_size;
-+
-+		ca->copygc_threshold = dev_reserve;
-+
-+		capacity += bucket_to_sector(ca, ca->mi.nbuckets -
-+					     ca->mi.first_bucket);
-+
-+		reserved_sectors += dev_reserve * 2;
-+
-+		bucket_size_max = max_t(unsigned, bucket_size_max,
-+					ca->mi.bucket_size);
-+	}
-+
-+	gc_reserve = c->opts.gc_reserve_bytes
-+		? c->opts.gc_reserve_bytes >> 9
-+		: div64_u64(capacity * c->opts.gc_reserve_percent, 100);
-+
-+	reserved_sectors = max(gc_reserve, reserved_sectors);
-+
-+	reserved_sectors = min(reserved_sectors, capacity);
-+
-+	c->capacity = capacity - reserved_sectors;
-+
-+	c->bucket_size_max = bucket_size_max;
-+
-+	if (c->capacity) {
-+		bch2_io_timer_add(&c->io_clock[READ],
-+				 &c->bucket_clock[READ].rescale);
-+		bch2_io_timer_add(&c->io_clock[WRITE],
-+				 &c->bucket_clock[WRITE].rescale);
-+	} else {
-+		bch2_io_timer_del(&c->io_clock[READ],
-+				 &c->bucket_clock[READ].rescale);
-+		bch2_io_timer_del(&c->io_clock[WRITE],
-+				 &c->bucket_clock[WRITE].rescale);
-+	}
-+
-+	/* Wake up case someone was waiting for buckets */
-+	closure_wake_up(&c->freelist_wait);
-+}
-+
-+static bool bch2_dev_has_open_write_point(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	struct open_bucket *ob;
-+	bool ret = false;
-+
-+	for (ob = c->open_buckets;
-+	     ob < c->open_buckets + ARRAY_SIZE(c->open_buckets);
-+	     ob++) {
-+		spin_lock(&ob->lock);
-+		if (ob->valid && !ob->on_partial_list &&
-+		    ob->ptr.dev == ca->dev_idx)
-+			ret = true;
-+		spin_unlock(&ob->lock);
-+	}
-+
-+	return ret;
-+}
-+
-+/* device goes ro: */
-+void bch2_dev_allocator_remove(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	unsigned i;
-+
-+	BUG_ON(ca->alloc_thread);
-+
-+	/* First, remove device from allocation groups: */
-+
-+	for (i = 0; i < ARRAY_SIZE(c->rw_devs); i++)
-+		clear_bit(ca->dev_idx, c->rw_devs[i].d);
-+
-+	/*
-+	 * Capacity is calculated based off of devices in allocation groups:
-+	 */
-+	bch2_recalc_capacity(c);
-+
-+	/* Next, close write points that point to this device... */
-+	for (i = 0; i < ARRAY_SIZE(c->write_points); i++)
-+		bch2_writepoint_stop(c, ca, &c->write_points[i]);
-+
-+	bch2_writepoint_stop(c, ca, &ca->copygc_write_point);
-+	bch2_writepoint_stop(c, ca, &c->rebalance_write_point);
-+	bch2_writepoint_stop(c, ca, &c->btree_write_point);
-+
-+	mutex_lock(&c->btree_reserve_cache_lock);
-+	while (c->btree_reserve_cache_nr) {
-+		struct btree_alloc *a =
-+			&c->btree_reserve_cache[--c->btree_reserve_cache_nr];
-+
-+		bch2_open_buckets_put(c, &a->ob);
-+	}
-+	mutex_unlock(&c->btree_reserve_cache_lock);
-+
-+	while (1) {
-+		struct open_bucket *ob;
-+
-+		spin_lock(&c->freelist_lock);
-+		if (!ca->open_buckets_partial_nr) {
-+			spin_unlock(&c->freelist_lock);
-+			break;
-+		}
-+		ob = c->open_buckets +
-+			ca->open_buckets_partial[--ca->open_buckets_partial_nr];
-+		ob->on_partial_list = false;
-+		spin_unlock(&c->freelist_lock);
-+
-+		bch2_open_bucket_put(c, ob);
-+	}
-+
-+	bch2_ec_stop_dev(c, ca);
-+
-+	/*
-+	 * Wake up threads that were blocked on allocation, so they can notice
-+	 * the device can no longer be removed and the capacity has changed:
-+	 */
-+	closure_wake_up(&c->freelist_wait);
-+
-+	/*
-+	 * journal_res_get() can block waiting for free space in the journal -
-+	 * it needs to notice there may not be devices to allocate from anymore:
-+	 */
-+	wake_up(&c->journal.wait);
-+
-+	/* Now wait for any in flight writes: */
-+
-+	closure_wait_event(&c->open_buckets_wait,
-+			   !bch2_dev_has_open_write_point(c, ca));
-+}
-+
-+/* device goes rw: */
-+void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	unsigned i;
-+
-+	for (i = 0; i < ARRAY_SIZE(c->rw_devs); i++)
-+		if (ca->mi.data_allowed & (1 << i))
-+			set_bit(ca->dev_idx, c->rw_devs[i].d);
-+}
-+
-+void bch2_dev_allocator_quiesce(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	if (ca->alloc_thread)
-+		closure_wait_event(&c->freelist_wait,
-+				   ca->allocator_state != ALLOCATOR_RUNNING);
-+}
-+
-+/* stop allocator thread: */
-+void bch2_dev_allocator_stop(struct bch_dev *ca)
-+{
-+	struct task_struct *p;
-+
-+	p = rcu_dereference_protected(ca->alloc_thread, 1);
-+	ca->alloc_thread = NULL;
-+
-+	/*
-+	 * We need an rcu barrier between setting ca->alloc_thread = NULL and
-+	 * the thread shutting down to avoid bch2_wake_allocator() racing:
-+	 *
-+	 * XXX: it would be better to have the rcu barrier be asynchronous
-+	 * instead of blocking us here
-+	 */
-+	synchronize_rcu();
-+
-+	if (p) {
-+		kthread_stop(p);
-+		put_task_struct(p);
-+	}
-+}
-+
-+/* start allocator thread: */
-+int bch2_dev_allocator_start(struct bch_dev *ca)
-+{
-+	struct task_struct *p;
-+
-+	/*
-+	 * allocator thread already started?
-+	 */
-+	if (ca->alloc_thread)
-+		return 0;
-+
-+	p = kthread_create(bch2_allocator_thread, ca,
-+			   "bch_alloc[%s]", ca->name);
-+	if (IS_ERR(p))
-+		return PTR_ERR(p);
-+
-+	get_task_struct(p);
-+	rcu_assign_pointer(ca->alloc_thread, p);
-+	wake_up_process(p);
-+	return 0;
-+}
-+
-+void bch2_fs_allocator_background_init(struct bch_fs *c)
-+{
-+	spin_lock_init(&c->freelist_lock);
-+	bch2_bucket_clock_init(c, READ);
-+	bch2_bucket_clock_init(c, WRITE);
-+
-+	c->pd_controllers_update_seconds = 5;
-+	INIT_DELAYED_WORK(&c->pd_controllers_update, pd_controllers_update);
-+}
-diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h
-new file mode 100644
-index 000000000000..f6b9f27f0713
---- /dev/null
-+++ b/fs/bcachefs/alloc_background.h
-@@ -0,0 +1,97 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_ALLOC_BACKGROUND_H
-+#define _BCACHEFS_ALLOC_BACKGROUND_H
-+
-+#include "bcachefs.h"
-+#include "alloc_types.h"
-+#include "debug.h"
-+
-+struct bkey_alloc_unpacked {
-+	u8		gen;
-+#define x(_name, _bits)	u##_bits _name;
-+	BCH_ALLOC_FIELDS()
-+#undef  x
-+};
-+
-+/* returns true if not equal */
-+static inline bool bkey_alloc_unpacked_cmp(struct bkey_alloc_unpacked l,
-+					   struct bkey_alloc_unpacked r)
-+{
-+	return l.gen != r.gen
-+#define x(_name, _bits)	|| l._name != r._name
-+	BCH_ALLOC_FIELDS()
-+#undef  x
-+	;
-+}
-+
-+struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c);
-+void bch2_alloc_pack(struct bkey_i_alloc *,
-+		     const struct bkey_alloc_unpacked);
-+
-+static inline struct bkey_alloc_unpacked
-+alloc_mem_to_key(struct bucket *g, struct bucket_mark m)
-+{
-+	return (struct bkey_alloc_unpacked) {
-+		.gen		= m.gen,
-+		.oldest_gen	= g->oldest_gen,
-+		.data_type	= m.data_type,
-+		.dirty_sectors	= m.dirty_sectors,
-+		.cached_sectors	= m.cached_sectors,
-+		.read_time	= g->io_time[READ],
-+		.write_time	= g->io_time[WRITE],
-+	};
-+}
-+
-+#define ALLOC_SCAN_BATCH(ca)		max_t(size_t, 1, (ca)->mi.nbuckets >> 9)
-+
-+const char *bch2_alloc_invalid(const struct bch_fs *, struct bkey_s_c);
-+void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-+
-+#define bch2_bkey_ops_alloc (struct bkey_ops) {		\
-+	.key_invalid	= bch2_alloc_invalid,		\
-+	.val_to_text	= bch2_alloc_to_text,		\
-+}
-+
-+struct journal_keys;
-+int bch2_alloc_read(struct bch_fs *, struct journal_keys *);
-+
-+static inline void bch2_wake_allocator(struct bch_dev *ca)
-+{
-+	struct task_struct *p;
-+
-+	rcu_read_lock();
-+	p = rcu_dereference(ca->alloc_thread);
-+	if (p)
-+		wake_up_process(p);
-+	rcu_read_unlock();
-+}
-+
-+static inline void verify_not_on_freelist(struct bch_fs *c, struct bch_dev *ca,
-+					  size_t bucket)
-+{
-+	if (expensive_debug_checks(c)) {
-+		size_t iter;
-+		long i;
-+		unsigned j;
-+
-+		for (j = 0; j < RESERVE_NR; j++)
-+			fifo_for_each_entry(i, &ca->free[j], iter)
-+				BUG_ON(i == bucket);
-+		fifo_for_each_entry(i, &ca->free_inc, iter)
-+			BUG_ON(i == bucket);
-+	}
-+}
-+
-+void bch2_recalc_capacity(struct bch_fs *);
-+
-+void bch2_dev_allocator_remove(struct bch_fs *, struct bch_dev *);
-+void bch2_dev_allocator_add(struct bch_fs *, struct bch_dev *);
-+
-+void bch2_dev_allocator_quiesce(struct bch_fs *, struct bch_dev *);
-+void bch2_dev_allocator_stop(struct bch_dev *);
-+int bch2_dev_allocator_start(struct bch_dev *);
-+
-+int bch2_alloc_write(struct bch_fs *, unsigned, bool *);
-+void bch2_fs_allocator_background_init(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_ALLOC_BACKGROUND_H */
-diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
-new file mode 100644
-index 000000000000..979aba30bc9d
---- /dev/null
-+++ b/fs/bcachefs/alloc_foreground.c
-@@ -0,0 +1,1044 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Primary bucket allocation code
-+ *
-+ * Copyright 2012 Google, Inc.
-+ *
-+ * Allocation in bcache is done in terms of buckets:
-+ *
-+ * Each bucket has associated an 8 bit gen; this gen corresponds to the gen in
-+ * btree pointers - they must match for the pointer to be considered valid.
-+ *
-+ * Thus (assuming a bucket has no dirty data or metadata in it) we can reuse a
-+ * bucket simply by incrementing its gen.
-+ *
-+ * The gens (along with the priorities; it's really the gens are important but
-+ * the code is named as if it's the priorities) are written in an arbitrary list
-+ * of buckets on disk, with a pointer to them in the journal header.
-+ *
-+ * When we invalidate a bucket, we have to write its new gen to disk and wait
-+ * for that write to complete before we use it - otherwise after a crash we
-+ * could have pointers that appeared to be good but pointed to data that had
-+ * been overwritten.
-+ *
-+ * Since the gens and priorities are all stored contiguously on disk, we can
-+ * batch this up: We fill up the free_inc list with freshly invalidated buckets,
-+ * call prio_write(), and when prio_write() finishes we pull buckets off the
-+ * free_inc list and optionally discard them.
-+ *
-+ * free_inc isn't the only freelist - if it was, we'd often have to sleep while
-+ * priorities and gens were being written before we could allocate. c->free is a
-+ * smaller freelist, and buckets on that list are always ready to be used.
-+ *
-+ * If we've got discards enabled, that happens when a bucket moves from the
-+ * free_inc list to the free list.
-+ *
-+ * It's important to ensure that gens don't wrap around - with respect to
-+ * either the oldest gen in the btree or the gen on disk. This is quite
-+ * difficult to do in practice, but we explicitly guard against it anyways - if
-+ * a bucket is in danger of wrapping around we simply skip invalidating it that
-+ * time around, and we garbage collect or rewrite the priorities sooner than we
-+ * would have otherwise.
-+ *
-+ * bch2_bucket_alloc() allocates a single bucket from a specific device.
-+ *
-+ * bch2_bucket_alloc_set() allocates one or more buckets from different devices
-+ * in a given filesystem.
-+ *
-+ * invalidate_buckets() drives all the processes described above. It's called
-+ * from bch2_bucket_alloc() and a few other places that need to make sure free
-+ * buckets are ready.
-+ *
-+ * invalidate_buckets_(lru|fifo)() find buckets that are available to be
-+ * invalidated, and then invalidate them and stick them on the free_inc list -
-+ * in either lru or fifo order.
-+ */
-+
-+#include "bcachefs.h"
-+#include "alloc_background.h"
-+#include "alloc_foreground.h"
-+#include "btree_gc.h"
-+#include "buckets.h"
-+#include "clock.h"
-+#include "debug.h"
-+#include "disk_groups.h"
-+#include "ec.h"
-+#include "io.h"
-+
-+#include <linux/math64.h>
-+#include <linux/rculist.h>
-+#include <linux/rcupdate.h>
-+#include <trace/events/bcachefs.h>
-+
-+enum bucket_alloc_ret {
-+	ALLOC_SUCCESS,
-+	OPEN_BUCKETS_EMPTY,
-+	FREELIST_EMPTY,		/* Allocator thread not keeping up */
-+};
-+
-+/*
-+ * Open buckets represent a bucket that's currently being allocated from.  They
-+ * serve two purposes:
-+ *
-+ *  - They track buckets that have been partially allocated, allowing for
-+ *    sub-bucket sized allocations - they're used by the sector allocator below
-+ *
-+ *  - They provide a reference to the buckets they own that mark and sweep GC
-+ *    can find, until the new allocation has a pointer to it inserted into the
-+ *    btree
-+ *
-+ * When allocating some space with the sector allocator, the allocation comes
-+ * with a reference to an open bucket - the caller is required to put that
-+ * reference _after_ doing the index update that makes its allocation reachable.
-+ */
-+
-+void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
-+{
-+	struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
-+
-+	if (ob->ec) {
-+		bch2_ec_bucket_written(c, ob);
-+		return;
-+	}
-+
-+	percpu_down_read(&c->mark_lock);
-+	spin_lock(&ob->lock);
-+
-+	bch2_mark_alloc_bucket(c, ca, PTR_BUCKET_NR(ca, &ob->ptr),
-+			       false, gc_pos_alloc(c, ob), 0);
-+	ob->valid = false;
-+	ob->type = 0;
-+
-+	spin_unlock(&ob->lock);
-+	percpu_up_read(&c->mark_lock);
-+
-+	spin_lock(&c->freelist_lock);
-+	ob->freelist = c->open_buckets_freelist;
-+	c->open_buckets_freelist = ob - c->open_buckets;
-+	c->open_buckets_nr_free++;
-+	spin_unlock(&c->freelist_lock);
-+
-+	closure_wake_up(&c->open_buckets_wait);
-+}
-+
-+void bch2_open_bucket_write_error(struct bch_fs *c,
-+				  struct open_buckets *obs,
-+				  unsigned dev)
-+{
-+	struct open_bucket *ob;
-+	unsigned i;
-+
-+	open_bucket_for_each(c, obs, ob, i)
-+		if (ob->ptr.dev == dev &&
-+		    ob->ec)
-+			bch2_ec_bucket_cancel(c, ob);
-+}
-+
-+static struct open_bucket *bch2_open_bucket_alloc(struct bch_fs *c)
-+{
-+	struct open_bucket *ob;
-+
-+	BUG_ON(!c->open_buckets_freelist || !c->open_buckets_nr_free);
-+
-+	ob = c->open_buckets + c->open_buckets_freelist;
-+	c->open_buckets_freelist = ob->freelist;
-+	atomic_set(&ob->pin, 1);
-+	ob->type = 0;
-+
-+	c->open_buckets_nr_free--;
-+	return ob;
-+}
-+
-+static void open_bucket_free_unused(struct bch_fs *c,
-+				    struct open_bucket *ob,
-+				    bool may_realloc)
-+{
-+	struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
-+
-+	BUG_ON(ca->open_buckets_partial_nr >=
-+	       ARRAY_SIZE(ca->open_buckets_partial));
-+
-+	if (ca->open_buckets_partial_nr <
-+	    ARRAY_SIZE(ca->open_buckets_partial) &&
-+	    may_realloc) {
-+		spin_lock(&c->freelist_lock);
-+		ob->on_partial_list = true;
-+		ca->open_buckets_partial[ca->open_buckets_partial_nr++] =
-+			ob - c->open_buckets;
-+		spin_unlock(&c->freelist_lock);
-+
-+		closure_wake_up(&c->open_buckets_wait);
-+		closure_wake_up(&c->freelist_wait);
-+	} else {
-+		bch2_open_bucket_put(c, ob);
-+	}
-+}
-+
-+static void verify_not_stale(struct bch_fs *c, const struct open_buckets *obs)
-+{
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	struct open_bucket *ob;
-+	unsigned i;
-+
-+	open_bucket_for_each(c, obs, ob, i) {
-+		struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
-+
-+		BUG_ON(ptr_stale(ca, &ob->ptr));
-+	}
-+#endif
-+}
-+
-+/* _only_ for allocating the journal on a new device: */
-+long bch2_bucket_alloc_new_fs(struct bch_dev *ca)
-+{
-+	struct bucket_array *buckets;
-+	ssize_t b;
-+
-+	rcu_read_lock();
-+	buckets = bucket_array(ca);
-+
-+	for (b = ca->mi.first_bucket; b < ca->mi.nbuckets; b++)
-+		if (is_available_bucket(buckets->b[b].mark))
-+			goto success;
-+	b = -1;
-+success:
-+	rcu_read_unlock();
-+	return b;
-+}
-+
-+static inline unsigned open_buckets_reserved(enum alloc_reserve reserve)
-+{
-+	switch (reserve) {
-+	case RESERVE_ALLOC:
-+		return 0;
-+	case RESERVE_BTREE:
-+		return OPEN_BUCKETS_COUNT / 4;
-+	default:
-+		return OPEN_BUCKETS_COUNT / 2;
-+	}
-+}
-+
-+/**
-+ * bch_bucket_alloc - allocate a single bucket from a specific device
-+ *
-+ * Returns index of bucket on success, 0 on failure
-+ * */
-+struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
-+				      enum alloc_reserve reserve,
-+				      bool may_alloc_partial,
-+				      struct closure *cl)
-+{
-+	struct bucket_array *buckets;
-+	struct open_bucket *ob;
-+	long bucket = 0;
-+
-+	spin_lock(&c->freelist_lock);
-+
-+	if (may_alloc_partial &&
-+	    ca->open_buckets_partial_nr) {
-+		ob = c->open_buckets +
-+			ca->open_buckets_partial[--ca->open_buckets_partial_nr];
-+		ob->on_partial_list = false;
-+		spin_unlock(&c->freelist_lock);
-+		return ob;
-+	}
-+
-+	if (unlikely(c->open_buckets_nr_free <= open_buckets_reserved(reserve))) {
-+		if (cl)
-+			closure_wait(&c->open_buckets_wait, cl);
-+
-+		if (!c->blocked_allocate_open_bucket)
-+			c->blocked_allocate_open_bucket = local_clock();
-+
-+		spin_unlock(&c->freelist_lock);
-+		trace_open_bucket_alloc_fail(ca, reserve);
-+		return ERR_PTR(-OPEN_BUCKETS_EMPTY);
-+	}
-+
-+	if (likely(fifo_pop(&ca->free[RESERVE_NONE], bucket)))
-+		goto out;
-+
-+	switch (reserve) {
-+	case RESERVE_ALLOC:
-+		if (fifo_pop(&ca->free[RESERVE_BTREE], bucket))
-+			goto out;
-+		break;
-+	case RESERVE_BTREE:
-+		if (fifo_used(&ca->free[RESERVE_BTREE]) * 2 >=
-+		    ca->free[RESERVE_BTREE].size &&
-+		    fifo_pop(&ca->free[RESERVE_BTREE], bucket))
-+			goto out;
-+		break;
-+	case RESERVE_MOVINGGC:
-+		if (fifo_pop(&ca->free[RESERVE_MOVINGGC], bucket))
-+			goto out;
-+		break;
-+	default:
-+		break;
-+	}
-+
-+	if (cl)
-+		closure_wait(&c->freelist_wait, cl);
-+
-+	if (!c->blocked_allocate)
-+		c->blocked_allocate = local_clock();
-+
-+	spin_unlock(&c->freelist_lock);
-+
-+	trace_bucket_alloc_fail(ca, reserve);
-+	return ERR_PTR(-FREELIST_EMPTY);
-+out:
-+	verify_not_on_freelist(c, ca, bucket);
-+
-+	ob = bch2_open_bucket_alloc(c);
-+
-+	spin_lock(&ob->lock);
-+	buckets = bucket_array(ca);
-+
-+	ob->valid	= true;
-+	ob->sectors_free = ca->mi.bucket_size;
-+	ob->ptr		= (struct bch_extent_ptr) {
-+		.type	= 1 << BCH_EXTENT_ENTRY_ptr,
-+		.gen	= buckets->b[bucket].mark.gen,
-+		.offset	= bucket_to_sector(ca, bucket),
-+		.dev	= ca->dev_idx,
-+	};
-+
-+	bucket_io_clock_reset(c, ca, bucket, READ);
-+	bucket_io_clock_reset(c, ca, bucket, WRITE);
-+	spin_unlock(&ob->lock);
-+
-+	if (c->blocked_allocate_open_bucket) {
-+		bch2_time_stats_update(
-+			&c->times[BCH_TIME_blocked_allocate_open_bucket],
-+			c->blocked_allocate_open_bucket);
-+		c->blocked_allocate_open_bucket = 0;
-+	}
-+
-+	if (c->blocked_allocate) {
-+		bch2_time_stats_update(
-+			&c->times[BCH_TIME_blocked_allocate],
-+			c->blocked_allocate);
-+		c->blocked_allocate = 0;
-+	}
-+
-+	spin_unlock(&c->freelist_lock);
-+
-+	bch2_wake_allocator(ca);
-+
-+	trace_bucket_alloc(ca, reserve);
-+	return ob;
-+}
-+
-+static int __dev_stripe_cmp(struct dev_stripe_state *stripe,
-+			    unsigned l, unsigned r)
-+{
-+	return ((stripe->next_alloc[l] > stripe->next_alloc[r]) -
-+		(stripe->next_alloc[l] < stripe->next_alloc[r]));
-+}
-+
-+#define dev_stripe_cmp(l, r) __dev_stripe_cmp(stripe, l, r)
-+
-+struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *c,
-+					  struct dev_stripe_state *stripe,
-+					  struct bch_devs_mask *devs)
-+{
-+	struct dev_alloc_list ret = { .nr = 0 };
-+	struct bch_dev *ca;
-+	unsigned i;
-+
-+	for_each_member_device_rcu(ca, c, i, devs)
-+		ret.devs[ret.nr++] = i;
-+
-+	bubble_sort(ret.devs, ret.nr, dev_stripe_cmp);
-+	return ret;
-+}
-+
-+void bch2_dev_stripe_increment(struct bch_fs *c, struct bch_dev *ca,
-+			       struct dev_stripe_state *stripe)
-+{
-+	u64 *v = stripe->next_alloc + ca->dev_idx;
-+	u64 free_space = dev_buckets_free(c, ca);
-+	u64 free_space_inv = free_space
-+		? div64_u64(1ULL << 48, free_space)
-+		: 1ULL << 48;
-+	u64 scale = *v / 4;
-+
-+	if (*v + free_space_inv >= *v)
-+		*v += free_space_inv;
-+	else
-+		*v = U64_MAX;
-+
-+	for (v = stripe->next_alloc;
-+	     v < stripe->next_alloc + ARRAY_SIZE(stripe->next_alloc); v++)
-+		*v = *v < scale ? 0 : *v - scale;
-+}
-+
-+#define BUCKET_MAY_ALLOC_PARTIAL	(1 << 0)
-+#define BUCKET_ALLOC_USE_DURABILITY	(1 << 1)
-+
-+static void add_new_bucket(struct bch_fs *c,
-+			   struct open_buckets *ptrs,
-+			   struct bch_devs_mask *devs_may_alloc,
-+			   unsigned *nr_effective,
-+			   bool *have_cache,
-+			   unsigned flags,
-+			   struct open_bucket *ob)
-+{
-+	unsigned durability =
-+		bch_dev_bkey_exists(c, ob->ptr.dev)->mi.durability;
-+
-+	__clear_bit(ob->ptr.dev, devs_may_alloc->d);
-+	*nr_effective	+= (flags & BUCKET_ALLOC_USE_DURABILITY)
-+		? durability : 1;
-+	*have_cache	|= !durability;
-+
-+	ob_push(c, ptrs, ob);
-+}
-+
-+static int bch2_bucket_alloc_set(struct bch_fs *c,
-+				 struct open_buckets *ptrs,
-+				 struct dev_stripe_state *stripe,
-+				 struct bch_devs_mask *devs_may_alloc,
-+				 unsigned nr_replicas,
-+				 unsigned *nr_effective,
-+				 bool *have_cache,
-+				 enum alloc_reserve reserve,
-+				 unsigned flags,
-+				 struct closure *cl)
-+{
-+	struct dev_alloc_list devs_sorted =
-+		bch2_dev_alloc_list(c, stripe, devs_may_alloc);
-+	struct bch_dev *ca;
-+	bool alloc_failure = false;
-+	unsigned i;
-+
-+	BUG_ON(*nr_effective >= nr_replicas);
-+
-+	for (i = 0; i < devs_sorted.nr; i++) {
-+		struct open_bucket *ob;
-+
-+		ca = rcu_dereference(c->devs[devs_sorted.devs[i]]);
-+		if (!ca)
-+			continue;
-+
-+		if (!ca->mi.durability && *have_cache)
-+			continue;
-+
-+		ob = bch2_bucket_alloc(c, ca, reserve,
-+				flags & BUCKET_MAY_ALLOC_PARTIAL, cl);
-+		if (IS_ERR(ob)) {
-+			enum bucket_alloc_ret ret = -PTR_ERR(ob);
-+
-+			WARN_ON(reserve == RESERVE_MOVINGGC &&
-+				ret != OPEN_BUCKETS_EMPTY);
-+
-+			if (cl)
-+				return -EAGAIN;
-+			if (ret == OPEN_BUCKETS_EMPTY)
-+				return -ENOSPC;
-+			alloc_failure = true;
-+			continue;
-+		}
-+
-+		add_new_bucket(c, ptrs, devs_may_alloc,
-+			       nr_effective, have_cache, flags, ob);
-+
-+		bch2_dev_stripe_increment(c, ca, stripe);
-+
-+		if (*nr_effective >= nr_replicas)
-+			return 0;
-+	}
-+
-+	return alloc_failure ? -ENOSPC : -EROFS;
-+}
-+
-+/* Allocate from stripes: */
-+
-+/*
-+ * XXX: use a higher watermark for allocating open buckets here:
-+ */
-+static int ec_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
-+{
-+	struct bch_devs_mask devs;
-+	struct open_bucket *ob;
-+	unsigned i, nr_have = 0, nr_data =
-+		min_t(unsigned, h->nr_active_devs,
-+		      EC_STRIPE_MAX) - h->redundancy;
-+	bool have_cache = true;
-+	int ret = 0;
-+
-+	BUG_ON(h->blocks.nr > nr_data);
-+	BUG_ON(h->parity.nr > h->redundancy);
-+
-+	devs = h->devs;
-+
-+	open_bucket_for_each(c, &h->parity, ob, i)
-+		__clear_bit(ob->ptr.dev, devs.d);
-+	open_bucket_for_each(c, &h->blocks, ob, i)
-+		__clear_bit(ob->ptr.dev, devs.d);
-+
-+	percpu_down_read(&c->mark_lock);
-+	rcu_read_lock();
-+
-+	if (h->parity.nr < h->redundancy) {
-+		nr_have = h->parity.nr;
-+
-+		ret = bch2_bucket_alloc_set(c, &h->parity,
-+					    &h->parity_stripe,
-+					    &devs,
-+					    h->redundancy,
-+					    &nr_have,
-+					    &have_cache,
-+					    RESERVE_NONE,
-+					    0,
-+					    NULL);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	if (h->blocks.nr < nr_data) {
-+		nr_have = h->blocks.nr;
-+
-+		ret = bch2_bucket_alloc_set(c, &h->blocks,
-+					    &h->block_stripe,
-+					    &devs,
-+					    nr_data,
-+					    &nr_have,
-+					    &have_cache,
-+					    RESERVE_NONE,
-+					    0,
-+					    NULL);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	rcu_read_unlock();
-+	percpu_up_read(&c->mark_lock);
-+
-+	return bch2_ec_stripe_new_alloc(c, h);
-+err:
-+	rcu_read_unlock();
-+	percpu_up_read(&c->mark_lock);
-+	return -1;
-+}
-+
-+/*
-+ * if we can't allocate a new stripe because there are already too many
-+ * partially filled stripes, force allocating from an existing stripe even when
-+ * it's to a device we don't want:
-+ */
-+
-+static void bucket_alloc_from_stripe(struct bch_fs *c,
-+				     struct open_buckets *ptrs,
-+				     struct write_point *wp,
-+				     struct bch_devs_mask *devs_may_alloc,
-+				     u16 target,
-+				     unsigned erasure_code,
-+				     unsigned nr_replicas,
-+				     unsigned *nr_effective,
-+				     bool *have_cache,
-+				     unsigned flags)
-+{
-+	struct dev_alloc_list devs_sorted;
-+	struct ec_stripe_head *h;
-+	struct open_bucket *ob;
-+	struct bch_dev *ca;
-+	unsigned i, ec_idx;
-+
-+	if (!erasure_code)
-+		return;
-+
-+	if (nr_replicas < 2)
-+		return;
-+
-+	if (ec_open_bucket(c, ptrs))
-+		return;
-+
-+	h = bch2_ec_stripe_head_get(c, target, erasure_code, nr_replicas - 1);
-+	if (!h)
-+		return;
-+
-+	if (!h->s && ec_stripe_alloc(c, h))
-+		goto out_put_head;
-+
-+	rcu_read_lock();
-+	devs_sorted = bch2_dev_alloc_list(c, &wp->stripe, devs_may_alloc);
-+	rcu_read_unlock();
-+
-+	for (i = 0; i < devs_sorted.nr; i++)
-+		open_bucket_for_each(c, &h->s->blocks, ob, ec_idx)
-+			if (ob->ptr.dev == devs_sorted.devs[i] &&
-+			    !test_and_set_bit(ec_idx, h->s->blocks_allocated))
-+				goto got_bucket;
-+	goto out_put_head;
-+got_bucket:
-+	ca = bch_dev_bkey_exists(c, ob->ptr.dev);
-+
-+	ob->ec_idx	= ec_idx;
-+	ob->ec		= h->s;
-+
-+	add_new_bucket(c, ptrs, devs_may_alloc,
-+		       nr_effective, have_cache, flags, ob);
-+	atomic_inc(&h->s->pin);
-+out_put_head:
-+	bch2_ec_stripe_head_put(h);
-+}
-+
-+/* Sector allocator */
-+
-+static void get_buckets_from_writepoint(struct bch_fs *c,
-+					struct open_buckets *ptrs,
-+					struct write_point *wp,
-+					struct bch_devs_mask *devs_may_alloc,
-+					unsigned nr_replicas,
-+					unsigned *nr_effective,
-+					bool *have_cache,
-+					unsigned flags,
-+					bool need_ec)
-+{
-+	struct open_buckets ptrs_skip = { .nr = 0 };
-+	struct open_bucket *ob;
-+	unsigned i;
-+
-+	open_bucket_for_each(c, &wp->ptrs, ob, i) {
-+		struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
-+
-+		if (*nr_effective < nr_replicas &&
-+		    test_bit(ob->ptr.dev, devs_may_alloc->d) &&
-+		    (ca->mi.durability ||
-+		     (wp->type == BCH_DATA_USER && !*have_cache)) &&
-+		    (ob->ec || !need_ec)) {
-+			add_new_bucket(c, ptrs, devs_may_alloc,
-+				       nr_effective, have_cache,
-+				       flags, ob);
-+		} else {
-+			ob_push(c, &ptrs_skip, ob);
-+		}
-+	}
-+	wp->ptrs = ptrs_skip;
-+}
-+
-+static int open_bucket_add_buckets(struct bch_fs *c,
-+				   struct open_buckets *ptrs,
-+				   struct write_point *wp,
-+				   struct bch_devs_list *devs_have,
-+				   u16 target,
-+				   unsigned erasure_code,
-+				   unsigned nr_replicas,
-+				   unsigned *nr_effective,
-+				   bool *have_cache,
-+				   enum alloc_reserve reserve,
-+				   unsigned flags,
-+				   struct closure *_cl)
-+{
-+	struct bch_devs_mask devs;
-+	struct open_bucket *ob;
-+	struct closure *cl = NULL;
-+	unsigned i;
-+	int ret;
-+
-+	rcu_read_lock();
-+	devs = target_rw_devs(c, wp->type, target);
-+	rcu_read_unlock();
-+
-+	/* Don't allocate from devices we already have pointers to: */
-+	for (i = 0; i < devs_have->nr; i++)
-+		__clear_bit(devs_have->devs[i], devs.d);
-+
-+	open_bucket_for_each(c, ptrs, ob, i)
-+		__clear_bit(ob->ptr.dev, devs.d);
-+
-+	if (erasure_code) {
-+		get_buckets_from_writepoint(c, ptrs, wp, &devs,
-+					    nr_replicas, nr_effective,
-+					    have_cache, flags, true);
-+		if (*nr_effective >= nr_replicas)
-+			return 0;
-+
-+		bucket_alloc_from_stripe(c, ptrs, wp, &devs,
-+					 target, erasure_code,
-+					 nr_replicas, nr_effective,
-+					 have_cache, flags);
-+		if (*nr_effective >= nr_replicas)
-+			return 0;
-+	}
-+
-+	get_buckets_from_writepoint(c, ptrs, wp, &devs,
-+				    nr_replicas, nr_effective,
-+				    have_cache, flags, false);
-+	if (*nr_effective >= nr_replicas)
-+		return 0;
-+
-+	percpu_down_read(&c->mark_lock);
-+	rcu_read_lock();
-+
-+retry_blocking:
-+	/*
-+	 * Try nonblocking first, so that if one device is full we'll try from
-+	 * other devices:
-+	 */
-+	ret = bch2_bucket_alloc_set(c, ptrs, &wp->stripe, &devs,
-+				nr_replicas, nr_effective, have_cache,
-+				reserve, flags, cl);
-+	if (ret && ret != -EROFS && !cl && _cl) {
-+		cl = _cl;
-+		goto retry_blocking;
-+	}
-+
-+	rcu_read_unlock();
-+	percpu_up_read(&c->mark_lock);
-+
-+	return ret;
-+}
-+
-+void bch2_open_buckets_stop_dev(struct bch_fs *c, struct bch_dev *ca,
-+				struct open_buckets *obs)
-+{
-+	struct open_buckets ptrs = { .nr = 0 };
-+	struct open_bucket *ob, *ob2;
-+	unsigned i, j;
-+
-+	open_bucket_for_each(c, obs, ob, i) {
-+		bool drop = !ca || ob->ptr.dev == ca->dev_idx;
-+
-+		if (!drop && ob->ec) {
-+			mutex_lock(&ob->ec->lock);
-+			open_bucket_for_each(c, &ob->ec->blocks, ob2, j)
-+				drop |= ob2->ptr.dev == ca->dev_idx;
-+			open_bucket_for_each(c, &ob->ec->parity, ob2, j)
-+				drop |= ob2->ptr.dev == ca->dev_idx;
-+			mutex_unlock(&ob->ec->lock);
-+		}
-+
-+		if (drop)
-+			bch2_open_bucket_put(c, ob);
-+		else
-+			ob_push(c, &ptrs, ob);
-+	}
-+
-+	*obs = ptrs;
-+}
-+
-+void bch2_writepoint_stop(struct bch_fs *c, struct bch_dev *ca,
-+			  struct write_point *wp)
-+{
-+	mutex_lock(&wp->lock);
-+	bch2_open_buckets_stop_dev(c, ca, &wp->ptrs);
-+	mutex_unlock(&wp->lock);
-+}
-+
-+static inline struct hlist_head *writepoint_hash(struct bch_fs *c,
-+						 unsigned long write_point)
-+{
-+	unsigned hash =
-+		hash_long(write_point, ilog2(ARRAY_SIZE(c->write_points_hash)));
-+
-+	return &c->write_points_hash[hash];
-+}
-+
-+static struct write_point *__writepoint_find(struct hlist_head *head,
-+					     unsigned long write_point)
-+{
-+	struct write_point *wp;
-+
-+	hlist_for_each_entry_rcu(wp, head, node)
-+		if (wp->write_point == write_point)
-+			return wp;
-+
-+	return NULL;
-+}
-+
-+static inline bool too_many_writepoints(struct bch_fs *c, unsigned factor)
-+{
-+	u64 stranded	= c->write_points_nr * c->bucket_size_max;
-+	u64 free	= bch2_fs_usage_read_short(c).free;
-+
-+	return stranded * factor > free;
-+}
-+
-+static bool try_increase_writepoints(struct bch_fs *c)
-+{
-+	struct write_point *wp;
-+
-+	if (c->write_points_nr == ARRAY_SIZE(c->write_points) ||
-+	    too_many_writepoints(c, 32))
-+		return false;
-+
-+	wp = c->write_points + c->write_points_nr++;
-+	hlist_add_head_rcu(&wp->node, writepoint_hash(c, wp->write_point));
-+	return true;
-+}
-+
-+static bool try_decrease_writepoints(struct bch_fs *c,
-+				     unsigned old_nr)
-+{
-+	struct write_point *wp;
-+
-+	mutex_lock(&c->write_points_hash_lock);
-+	if (c->write_points_nr < old_nr) {
-+		mutex_unlock(&c->write_points_hash_lock);
-+		return true;
-+	}
-+
-+	if (c->write_points_nr == 1 ||
-+	    !too_many_writepoints(c, 8)) {
-+		mutex_unlock(&c->write_points_hash_lock);
-+		return false;
-+	}
-+
-+	wp = c->write_points + --c->write_points_nr;
-+
-+	hlist_del_rcu(&wp->node);
-+	mutex_unlock(&c->write_points_hash_lock);
-+
-+	bch2_writepoint_stop(c, NULL, wp);
-+	return true;
-+}
-+
-+static struct write_point *writepoint_find(struct bch_fs *c,
-+					   unsigned long write_point)
-+{
-+	struct write_point *wp, *oldest;
-+	struct hlist_head *head;
-+
-+	if (!(write_point & 1UL)) {
-+		wp = (struct write_point *) write_point;
-+		mutex_lock(&wp->lock);
-+		return wp;
-+	}
-+
-+	head = writepoint_hash(c, write_point);
-+restart_find:
-+	wp = __writepoint_find(head, write_point);
-+	if (wp) {
-+lock_wp:
-+		mutex_lock(&wp->lock);
-+		if (wp->write_point == write_point)
-+			goto out;
-+		mutex_unlock(&wp->lock);
-+		goto restart_find;
-+	}
-+restart_find_oldest:
-+	oldest = NULL;
-+	for (wp = c->write_points;
-+	     wp < c->write_points + c->write_points_nr; wp++)
-+		if (!oldest || time_before64(wp->last_used, oldest->last_used))
-+			oldest = wp;
-+
-+	mutex_lock(&oldest->lock);
-+	mutex_lock(&c->write_points_hash_lock);
-+	if (oldest >= c->write_points + c->write_points_nr ||
-+	    try_increase_writepoints(c)) {
-+		mutex_unlock(&c->write_points_hash_lock);
-+		mutex_unlock(&oldest->lock);
-+		goto restart_find_oldest;
-+	}
-+
-+	wp = __writepoint_find(head, write_point);
-+	if (wp && wp != oldest) {
-+		mutex_unlock(&c->write_points_hash_lock);
-+		mutex_unlock(&oldest->lock);
-+		goto lock_wp;
-+	}
-+
-+	wp = oldest;
-+	hlist_del_rcu(&wp->node);
-+	wp->write_point = write_point;
-+	hlist_add_head_rcu(&wp->node, head);
-+	mutex_unlock(&c->write_points_hash_lock);
-+out:
-+	wp->last_used = sched_clock();
-+	return wp;
-+}
-+
-+/*
-+ * Get us an open_bucket we can allocate from, return with it locked:
-+ */
-+struct write_point *bch2_alloc_sectors_start(struct bch_fs *c,
-+				unsigned target,
-+				unsigned erasure_code,
-+				struct write_point_specifier write_point,
-+				struct bch_devs_list *devs_have,
-+				unsigned nr_replicas,
-+				unsigned nr_replicas_required,
-+				enum alloc_reserve reserve,
-+				unsigned flags,
-+				struct closure *cl)
-+{
-+	struct write_point *wp;
-+	struct open_bucket *ob;
-+	struct open_buckets ptrs;
-+	unsigned nr_effective, write_points_nr;
-+	unsigned ob_flags = 0;
-+	bool have_cache;
-+	int ret, i;
-+
-+	if (!(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS))
-+		ob_flags |= BUCKET_ALLOC_USE_DURABILITY;
-+
-+	BUG_ON(!nr_replicas || !nr_replicas_required);
-+retry:
-+	ptrs.nr		= 0;
-+	nr_effective	= 0;
-+	write_points_nr = c->write_points_nr;
-+	have_cache	= false;
-+
-+	wp = writepoint_find(c, write_point.v);
-+
-+	if (wp->type == BCH_DATA_USER)
-+		ob_flags |= BUCKET_MAY_ALLOC_PARTIAL;
-+
-+	/* metadata may not allocate on cache devices: */
-+	if (wp->type != BCH_DATA_USER)
-+		have_cache = true;
-+
-+	if (!target || (flags & BCH_WRITE_ONLY_SPECIFIED_DEVS)) {
-+		ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have,
-+					      target, erasure_code,
-+					      nr_replicas, &nr_effective,
-+					      &have_cache, reserve,
-+					      ob_flags, cl);
-+	} else {
-+		ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have,
-+					      target, erasure_code,
-+					      nr_replicas, &nr_effective,
-+					      &have_cache, reserve,
-+					      ob_flags, NULL);
-+		if (!ret)
-+			goto alloc_done;
-+
-+		ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have,
-+					      0, erasure_code,
-+					      nr_replicas, &nr_effective,
-+					      &have_cache, reserve,
-+					      ob_flags, cl);
-+	}
-+alloc_done:
-+	BUG_ON(!ret && nr_effective < nr_replicas);
-+
-+	if (erasure_code && !ec_open_bucket(c, &ptrs))
-+		pr_debug("failed to get ec bucket: ret %u", ret);
-+
-+	if (ret == -EROFS &&
-+	    nr_effective >= nr_replicas_required)
-+		ret = 0;
-+
-+	if (ret)
-+		goto err;
-+
-+	/* Free buckets we didn't use: */
-+	open_bucket_for_each(c, &wp->ptrs, ob, i)
-+		open_bucket_free_unused(c, ob, wp->type == BCH_DATA_USER);
-+
-+	wp->ptrs = ptrs;
-+
-+	wp->sectors_free = UINT_MAX;
-+
-+	open_bucket_for_each(c, &wp->ptrs, ob, i)
-+		wp->sectors_free = min(wp->sectors_free, ob->sectors_free);
-+
-+	BUG_ON(!wp->sectors_free || wp->sectors_free == UINT_MAX);
-+
-+	verify_not_stale(c, &wp->ptrs);
-+
-+	return wp;
-+err:
-+	open_bucket_for_each(c, &wp->ptrs, ob, i)
-+		if (ptrs.nr < ARRAY_SIZE(ptrs.v))
-+			ob_push(c, &ptrs, ob);
-+		else
-+			open_bucket_free_unused(c, ob,
-+					wp->type == BCH_DATA_USER);
-+	wp->ptrs = ptrs;
-+
-+	mutex_unlock(&wp->lock);
-+
-+	if (ret == -ENOSPC &&
-+	    try_decrease_writepoints(c, write_points_nr))
-+		goto retry;
-+
-+	return ERR_PTR(ret);
-+}
-+
-+/*
-+ * Append pointers to the space we just allocated to @k, and mark @sectors space
-+ * as allocated out of @ob
-+ */
-+void bch2_alloc_sectors_append_ptrs(struct bch_fs *c, struct write_point *wp,
-+				    struct bkey_i *k, unsigned sectors)
-+
-+{
-+	struct open_bucket *ob;
-+	unsigned i;
-+
-+	BUG_ON(sectors > wp->sectors_free);
-+	wp->sectors_free -= sectors;
-+
-+	open_bucket_for_each(c, &wp->ptrs, ob, i) {
-+		struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
-+		struct bch_extent_ptr tmp = ob->ptr;
-+
-+		tmp.cached = !ca->mi.durability &&
-+			wp->type == BCH_DATA_USER;
-+
-+		tmp.offset += ca->mi.bucket_size - ob->sectors_free;
-+		bch2_bkey_append_ptr(k, tmp);
-+
-+		BUG_ON(sectors > ob->sectors_free);
-+		ob->sectors_free -= sectors;
-+	}
-+}
-+
-+/*
-+ * Append pointers to the space we just allocated to @k, and mark @sectors space
-+ * as allocated out of @ob
-+ */
-+void bch2_alloc_sectors_done(struct bch_fs *c, struct write_point *wp)
-+{
-+	struct open_buckets ptrs = { .nr = 0 }, keep = { .nr = 0 };
-+	struct open_bucket *ob;
-+	unsigned i;
-+
-+	open_bucket_for_each(c, &wp->ptrs, ob, i)
-+		ob_push(c, !ob->sectors_free ? &ptrs : &keep, ob);
-+	wp->ptrs = keep;
-+
-+	mutex_unlock(&wp->lock);
-+
-+	bch2_open_buckets_put(c, &ptrs);
-+}
-+
-+void bch2_fs_allocator_foreground_init(struct bch_fs *c)
-+{
-+	struct open_bucket *ob;
-+	struct write_point *wp;
-+
-+	mutex_init(&c->write_points_hash_lock);
-+	c->write_points_nr = ARRAY_SIZE(c->write_points);
-+
-+	/* open bucket 0 is a sentinal NULL: */
-+	spin_lock_init(&c->open_buckets[0].lock);
-+
-+	for (ob = c->open_buckets + 1;
-+	     ob < c->open_buckets + ARRAY_SIZE(c->open_buckets); ob++) {
-+		spin_lock_init(&ob->lock);
-+		c->open_buckets_nr_free++;
-+
-+		ob->freelist = c->open_buckets_freelist;
-+		c->open_buckets_freelist = ob - c->open_buckets;
-+	}
-+
-+	writepoint_init(&c->btree_write_point, BCH_DATA_BTREE);
-+	writepoint_init(&c->rebalance_write_point, BCH_DATA_USER);
-+
-+	for (wp = c->write_points;
-+	     wp < c->write_points + c->write_points_nr; wp++) {
-+		writepoint_init(wp, BCH_DATA_USER);
-+
-+		wp->last_used	= sched_clock();
-+		wp->write_point	= (unsigned long) wp;
-+		hlist_add_head_rcu(&wp->node,
-+				   writepoint_hash(c, wp->write_point));
-+	}
-+}
-diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h
-new file mode 100644
-index 000000000000..687f973e4b3a
---- /dev/null
-+++ b/fs/bcachefs/alloc_foreground.h
-@@ -0,0 +1,133 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_ALLOC_FOREGROUND_H
-+#define _BCACHEFS_ALLOC_FOREGROUND_H
-+
-+#include "bcachefs.h"
-+#include "alloc_types.h"
-+
-+#include <linux/hash.h>
-+
-+struct bkey;
-+struct bch_dev;
-+struct bch_fs;
-+struct bch_devs_List;
-+
-+struct dev_alloc_list {
-+	unsigned	nr;
-+	u8		devs[BCH_SB_MEMBERS_MAX];
-+};
-+
-+struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *,
-+					  struct dev_stripe_state *,
-+					  struct bch_devs_mask *);
-+void bch2_dev_stripe_increment(struct bch_fs *, struct bch_dev *,
-+			       struct dev_stripe_state *);
-+
-+long bch2_bucket_alloc_new_fs(struct bch_dev *);
-+
-+struct open_bucket *bch2_bucket_alloc(struct bch_fs *, struct bch_dev *,
-+				      enum alloc_reserve, bool,
-+				      struct closure *);
-+
-+static inline void ob_push(struct bch_fs *c, struct open_buckets *obs,
-+			   struct open_bucket *ob)
-+{
-+	BUG_ON(obs->nr >= ARRAY_SIZE(obs->v));
-+
-+	obs->v[obs->nr++] = ob - c->open_buckets;
-+}
-+
-+#define open_bucket_for_each(_c, _obs, _ob, _i)				\
-+	for ((_i) = 0;							\
-+	     (_i) < (_obs)->nr &&					\
-+	     ((_ob) = (_c)->open_buckets + (_obs)->v[_i], true);	\
-+	     (_i)++)
-+
-+static inline struct open_bucket *ec_open_bucket(struct bch_fs *c,
-+						 struct open_buckets *obs)
-+{
-+	struct open_bucket *ob;
-+	unsigned i;
-+
-+	open_bucket_for_each(c, obs, ob, i)
-+		if (ob->ec)
-+			return ob;
-+
-+	return NULL;
-+}
-+
-+void bch2_open_bucket_write_error(struct bch_fs *,
-+			struct open_buckets *, unsigned);
-+
-+void __bch2_open_bucket_put(struct bch_fs *, struct open_bucket *);
-+
-+static inline void bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
-+{
-+	if (atomic_dec_and_test(&ob->pin))
-+		__bch2_open_bucket_put(c, ob);
-+}
-+
-+static inline void bch2_open_buckets_put(struct bch_fs *c,
-+					 struct open_buckets *ptrs)
-+{
-+	struct open_bucket *ob;
-+	unsigned i;
-+
-+	open_bucket_for_each(c, ptrs, ob, i)
-+		bch2_open_bucket_put(c, ob);
-+	ptrs->nr = 0;
-+}
-+
-+static inline void bch2_open_bucket_get(struct bch_fs *c,
-+					struct write_point *wp,
-+					struct open_buckets *ptrs)
-+{
-+	struct open_bucket *ob;
-+	unsigned i;
-+
-+	open_bucket_for_each(c, &wp->ptrs, ob, i) {
-+		ob->type = wp->type;
-+		atomic_inc(&ob->pin);
-+		ob_push(c, ptrs, ob);
-+	}
-+}
-+
-+struct write_point *bch2_alloc_sectors_start(struct bch_fs *,
-+					     unsigned, unsigned,
-+					     struct write_point_specifier,
-+					     struct bch_devs_list *,
-+					     unsigned, unsigned,
-+					     enum alloc_reserve,
-+					     unsigned,
-+					     struct closure *);
-+
-+void bch2_alloc_sectors_append_ptrs(struct bch_fs *, struct write_point *,
-+				    struct bkey_i *, unsigned);
-+void bch2_alloc_sectors_done(struct bch_fs *, struct write_point *);
-+
-+void bch2_open_buckets_stop_dev(struct bch_fs *, struct bch_dev *,
-+				struct open_buckets *);
-+
-+void bch2_writepoint_stop(struct bch_fs *, struct bch_dev *,
-+			  struct write_point *);
-+
-+static inline struct write_point_specifier writepoint_hashed(unsigned long v)
-+{
-+	return (struct write_point_specifier) { .v = v | 1 };
-+}
-+
-+static inline struct write_point_specifier writepoint_ptr(struct write_point *wp)
-+{
-+	return (struct write_point_specifier) { .v = (unsigned long) wp };
-+}
-+
-+static inline void writepoint_init(struct write_point *wp,
-+				   enum bch_data_type type)
-+{
-+	mutex_init(&wp->lock);
-+	wp->type = type;
-+}
-+
-+void bch2_fs_allocator_foreground_init(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_ALLOC_FOREGROUND_H */
-diff --git a/fs/bcachefs/alloc_types.h b/fs/bcachefs/alloc_types.h
-new file mode 100644
-index 000000000000..4f1465077994
---- /dev/null
-+++ b/fs/bcachefs/alloc_types.h
-@@ -0,0 +1,112 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_ALLOC_TYPES_H
-+#define _BCACHEFS_ALLOC_TYPES_H
-+
-+#include <linux/mutex.h>
-+#include <linux/spinlock.h>
-+
-+#include "clock_types.h"
-+#include "fifo.h"
-+
-+struct ec_bucket_buf;
-+
-+/* There's two of these clocks, one for reads and one for writes: */
-+struct bucket_clock {
-+	/*
-+	 * "now" in (read/write) IO time - incremented whenever we do X amount
-+	 * of reads or writes.
-+	 *
-+	 * Goes with the bucket read/write prios: when we read or write to a
-+	 * bucket we reset the bucket's prio to the current hand; thus hand -
-+	 * prio = time since bucket was last read/written.
-+	 *
-+	 * The units are some amount (bytes/sectors) of data read/written, and
-+	 * the units can change on the fly if we need to rescale to fit
-+	 * everything in a u16 - your only guarantee is that the units are
-+	 * consistent.
-+	 */
-+	u16			hand;
-+	u16			max_last_io;
-+
-+	int			rw;
-+
-+	struct io_timer		rescale;
-+	struct mutex		lock;
-+};
-+
-+/* There is one reserve for each type of btree, one for prios and gens
-+ * and one for moving GC */
-+enum alloc_reserve {
-+	RESERVE_ALLOC		= -1,
-+	RESERVE_BTREE		= 0,
-+	RESERVE_MOVINGGC	= 1,
-+	RESERVE_NONE		= 2,
-+	RESERVE_NR		= 3,
-+};
-+
-+typedef FIFO(long)	alloc_fifo;
-+
-+#define OPEN_BUCKETS_COUNT	1024
-+
-+#define WRITE_POINT_HASH_NR	32
-+#define WRITE_POINT_MAX		32
-+
-+typedef u16			open_bucket_idx_t;
-+
-+struct open_bucket {
-+	spinlock_t		lock;
-+	atomic_t		pin;
-+	open_bucket_idx_t	freelist;
-+
-+	/*
-+	 * When an open bucket has an ec_stripe attached, this is the index of
-+	 * the block in the stripe this open_bucket corresponds to:
-+	 */
-+	u8			ec_idx;
-+	u8			type;
-+	unsigned		valid:1;
-+	unsigned		on_partial_list:1;
-+	unsigned		sectors_free;
-+	struct bch_extent_ptr	ptr;
-+	struct ec_stripe_new	*ec;
-+};
-+
-+#define OPEN_BUCKET_LIST_MAX	15
-+
-+struct open_buckets {
-+	open_bucket_idx_t	nr;
-+	open_bucket_idx_t	v[OPEN_BUCKET_LIST_MAX];
-+};
-+
-+struct dev_stripe_state {
-+	u64			next_alloc[BCH_SB_MEMBERS_MAX];
-+};
-+
-+struct write_point {
-+	struct hlist_node	node;
-+	struct mutex		lock;
-+	u64			last_used;
-+	unsigned long		write_point;
-+	enum bch_data_type	type;
-+	bool			is_ec;
-+
-+	/* calculated based on how many pointers we're actually going to use: */
-+	unsigned		sectors_free;
-+
-+	struct open_buckets	ptrs;
-+	struct dev_stripe_state	stripe;
-+};
-+
-+struct write_point_specifier {
-+	unsigned long		v;
-+};
-+
-+struct alloc_heap_entry {
-+	size_t			bucket;
-+	size_t			nr;
-+	unsigned long		key;
-+};
-+
-+typedef HEAP(struct alloc_heap_entry) alloc_heap;
-+
-+#endif /* _BCACHEFS_ALLOC_TYPES_H */
-diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
-new file mode 100644
-index 000000000000..893c89dbee60
---- /dev/null
-+++ b/fs/bcachefs/bcachefs.h
-@@ -0,0 +1,878 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_H
-+#define _BCACHEFS_H
-+
-+/*
-+ * SOME HIGH LEVEL CODE DOCUMENTATION:
-+ *
-+ * Bcache mostly works with cache sets, cache devices, and backing devices.
-+ *
-+ * Support for multiple cache devices hasn't quite been finished off yet, but
-+ * it's about 95% plumbed through. A cache set and its cache devices is sort of
-+ * like a md raid array and its component devices. Most of the code doesn't care
-+ * about individual cache devices, the main abstraction is the cache set.
-+ *
-+ * Multiple cache devices is intended to give us the ability to mirror dirty
-+ * cached data and metadata, without mirroring clean cached data.
-+ *
-+ * Backing devices are different, in that they have a lifetime independent of a
-+ * cache set. When you register a newly formatted backing device it'll come up
-+ * in passthrough mode, and then you can attach and detach a backing device from
-+ * a cache set at runtime - while it's mounted and in use. Detaching implicitly
-+ * invalidates any cached data for that backing device.
-+ *
-+ * A cache set can have multiple (many) backing devices attached to it.
-+ *
-+ * There's also flash only volumes - this is the reason for the distinction
-+ * between struct cached_dev and struct bcache_device. A flash only volume
-+ * works much like a bcache device that has a backing device, except the
-+ * "cached" data is always dirty. The end result is that we get thin
-+ * provisioning with very little additional code.
-+ *
-+ * Flash only volumes work but they're not production ready because the moving
-+ * garbage collector needs more work. More on that later.
-+ *
-+ * BUCKETS/ALLOCATION:
-+ *
-+ * Bcache is primarily designed for caching, which means that in normal
-+ * operation all of our available space will be allocated. Thus, we need an
-+ * efficient way of deleting things from the cache so we can write new things to
-+ * it.
-+ *
-+ * To do this, we first divide the cache device up into buckets. A bucket is the
-+ * unit of allocation; they're typically around 1 mb - anywhere from 128k to 2M+
-+ * works efficiently.
-+ *
-+ * Each bucket has a 16 bit priority, and an 8 bit generation associated with
-+ * it. The gens and priorities for all the buckets are stored contiguously and
-+ * packed on disk (in a linked list of buckets - aside from the superblock, all
-+ * of bcache's metadata is stored in buckets).
-+ *
-+ * The priority is used to implement an LRU. We reset a bucket's priority when
-+ * we allocate it or on cache it, and every so often we decrement the priority
-+ * of each bucket. It could be used to implement something more sophisticated,
-+ * if anyone ever gets around to it.
-+ *
-+ * The generation is used for invalidating buckets. Each pointer also has an 8
-+ * bit generation embedded in it; for a pointer to be considered valid, its gen
-+ * must match the gen of the bucket it points into.  Thus, to reuse a bucket all
-+ * we have to do is increment its gen (and write its new gen to disk; we batch
-+ * this up).
-+ *
-+ * Bcache is entirely COW - we never write twice to a bucket, even buckets that
-+ * contain metadata (including btree nodes).
-+ *
-+ * THE BTREE:
-+ *
-+ * Bcache is in large part design around the btree.
-+ *
-+ * At a high level, the btree is just an index of key -> ptr tuples.
-+ *
-+ * Keys represent extents, and thus have a size field. Keys also have a variable
-+ * number of pointers attached to them (potentially zero, which is handy for
-+ * invalidating the cache).
-+ *
-+ * The key itself is an inode:offset pair. The inode number corresponds to a
-+ * backing device or a flash only volume. The offset is the ending offset of the
-+ * extent within the inode - not the starting offset; this makes lookups
-+ * slightly more convenient.
-+ *
-+ * Pointers contain the cache device id, the offset on that device, and an 8 bit
-+ * generation number. More on the gen later.
-+ *
-+ * Index lookups are not fully abstracted - cache lookups in particular are
-+ * still somewhat mixed in with the btree code, but things are headed in that
-+ * direction.
-+ *
-+ * Updates are fairly well abstracted, though. There are two different ways of
-+ * updating the btree; insert and replace.
-+ *
-+ * BTREE_INSERT will just take a list of keys and insert them into the btree -
-+ * overwriting (possibly only partially) any extents they overlap with. This is
-+ * used to update the index after a write.
-+ *
-+ * BTREE_REPLACE is really cmpxchg(); it inserts a key into the btree iff it is
-+ * overwriting a key that matches another given key. This is used for inserting
-+ * data into the cache after a cache miss, and for background writeback, and for
-+ * the moving garbage collector.
-+ *
-+ * There is no "delete" operation; deleting things from the index is
-+ * accomplished by either by invalidating pointers (by incrementing a bucket's
-+ * gen) or by inserting a key with 0 pointers - which will overwrite anything
-+ * previously present at that location in the index.
-+ *
-+ * This means that there are always stale/invalid keys in the btree. They're
-+ * filtered out by the code that iterates through a btree node, and removed when
-+ * a btree node is rewritten.
-+ *
-+ * BTREE NODES:
-+ *
-+ * Our unit of allocation is a bucket, and we we can't arbitrarily allocate and
-+ * free smaller than a bucket - so, that's how big our btree nodes are.
-+ *
-+ * (If buckets are really big we'll only use part of the bucket for a btree node
-+ * - no less than 1/4th - but a bucket still contains no more than a single
-+ * btree node. I'd actually like to change this, but for now we rely on the
-+ * bucket's gen for deleting btree nodes when we rewrite/split a node.)
-+ *
-+ * Anyways, btree nodes are big - big enough to be inefficient with a textbook
-+ * btree implementation.
-+ *
-+ * The way this is solved is that btree nodes are internally log structured; we
-+ * can append new keys to an existing btree node without rewriting it. This
-+ * means each set of keys we write is sorted, but the node is not.
-+ *
-+ * We maintain this log structure in memory - keeping 1Mb of keys sorted would
-+ * be expensive, and we have to distinguish between the keys we have written and
-+ * the keys we haven't. So to do a lookup in a btree node, we have to search
-+ * each sorted set. But we do merge written sets together lazily, so the cost of
-+ * these extra searches is quite low (normally most of the keys in a btree node
-+ * will be in one big set, and then there'll be one or two sets that are much
-+ * smaller).
-+ *
-+ * This log structure makes bcache's btree more of a hybrid between a
-+ * conventional btree and a compacting data structure, with some of the
-+ * advantages of both.
-+ *
-+ * GARBAGE COLLECTION:
-+ *
-+ * We can't just invalidate any bucket - it might contain dirty data or
-+ * metadata. If it once contained dirty data, other writes might overwrite it
-+ * later, leaving no valid pointers into that bucket in the index.
-+ *
-+ * Thus, the primary purpose of garbage collection is to find buckets to reuse.
-+ * It also counts how much valid data it each bucket currently contains, so that
-+ * allocation can reuse buckets sooner when they've been mostly overwritten.
-+ *
-+ * It also does some things that are really internal to the btree
-+ * implementation. If a btree node contains pointers that are stale by more than
-+ * some threshold, it rewrites the btree node to avoid the bucket's generation
-+ * wrapping around. It also merges adjacent btree nodes if they're empty enough.
-+ *
-+ * THE JOURNAL:
-+ *
-+ * Bcache's journal is not necessary for consistency; we always strictly
-+ * order metadata writes so that the btree and everything else is consistent on
-+ * disk in the event of an unclean shutdown, and in fact bcache had writeback
-+ * caching (with recovery from unclean shutdown) before journalling was
-+ * implemented.
-+ *
-+ * Rather, the journal is purely a performance optimization; we can't complete a
-+ * write until we've updated the index on disk, otherwise the cache would be
-+ * inconsistent in the event of an unclean shutdown. This means that without the
-+ * journal, on random write workloads we constantly have to update all the leaf
-+ * nodes in the btree, and those writes will be mostly empty (appending at most
-+ * a few keys each) - highly inefficient in terms of amount of metadata writes,
-+ * and it puts more strain on the various btree resorting/compacting code.
-+ *
-+ * The journal is just a log of keys we've inserted; on startup we just reinsert
-+ * all the keys in the open journal entries. That means that when we're updating
-+ * a node in the btree, we can wait until a 4k block of keys fills up before
-+ * writing them out.
-+ *
-+ * For simplicity, we only journal updates to leaf nodes; updates to parent
-+ * nodes are rare enough (since our leaf nodes are huge) that it wasn't worth
-+ * the complexity to deal with journalling them (in particular, journal replay)
-+ * - updates to non leaf nodes just happen synchronously (see btree_split()).
-+ */
-+
-+#undef pr_fmt
-+#define pr_fmt(fmt) "bcachefs: %s() " fmt "\n", __func__
-+
-+#include <linux/bug.h>
-+#include <linux/bio.h>
-+#include <linux/closure.h>
-+#include <linux/kobject.h>
-+#include <linux/list.h>
-+#include <linux/math64.h>
-+#include <linux/mutex.h>
-+#include <linux/percpu-refcount.h>
-+#include <linux/percpu-rwsem.h>
-+#include <linux/rhashtable.h>
-+#include <linux/rwsem.h>
-+#include <linux/semaphore.h>
-+#include <linux/seqlock.h>
-+#include <linux/shrinker.h>
-+#include <linux/types.h>
-+#include <linux/workqueue.h>
-+#include <linux/zstd.h>
-+
-+#include "bcachefs_format.h"
-+#include "fifo.h"
-+#include "opts.h"
-+#include "util.h"
-+
-+#include <linux/dynamic_fault.h>
-+
-+#define bch2_fs_init_fault(name)					\
-+	dynamic_fault("bcachefs:bch_fs_init:" name)
-+#define bch2_meta_read_fault(name)					\
-+	 dynamic_fault("bcachefs:meta:read:" name)
-+#define bch2_meta_write_fault(name)					\
-+	 dynamic_fault("bcachefs:meta:write:" name)
-+
-+#ifdef __KERNEL__
-+#define bch2_fmt(_c, fmt)	"bcachefs (%s): " fmt "\n", ((_c)->name)
-+#else
-+#define bch2_fmt(_c, fmt)	fmt "\n"
-+#endif
-+
-+#define bch_info(c, fmt, ...) \
-+	printk(KERN_INFO bch2_fmt(c, fmt), ##__VA_ARGS__)
-+#define bch_notice(c, fmt, ...) \
-+	printk(KERN_NOTICE bch2_fmt(c, fmt), ##__VA_ARGS__)
-+#define bch_warn(c, fmt, ...) \
-+	printk(KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__)
-+#define bch_warn_ratelimited(c, fmt, ...) \
-+	printk_ratelimited(KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__)
-+#define bch_err(c, fmt, ...) \
-+	printk(KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__)
-+#define bch_err_ratelimited(c, fmt, ...) \
-+	printk_ratelimited(KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__)
-+
-+#define bch_verbose(c, fmt, ...)					\
-+do {									\
-+	if ((c)->opts.verbose)						\
-+		bch_info(c, fmt, ##__VA_ARGS__);			\
-+} while (0)
-+
-+#define pr_verbose_init(opts, fmt, ...)					\
-+do {									\
-+	if (opt_get(opts, verbose))					\
-+		pr_info(fmt, ##__VA_ARGS__);				\
-+} while (0)
-+
-+/* Parameters that are useful for debugging, but should always be compiled in: */
-+#define BCH_DEBUG_PARAMS_ALWAYS()					\
-+	BCH_DEBUG_PARAM(key_merging_disabled,				\
-+		"Disables merging of extents")				\
-+	BCH_DEBUG_PARAM(btree_gc_always_rewrite,			\
-+		"Causes mark and sweep to compact and rewrite every "	\
-+		"btree node it traverses")				\
-+	BCH_DEBUG_PARAM(btree_gc_rewrite_disabled,			\
-+		"Disables rewriting of btree nodes during mark and sweep")\
-+	BCH_DEBUG_PARAM(btree_shrinker_disabled,			\
-+		"Disables the shrinker callback for the btree node cache")
-+
-+/* Parameters that should only be compiled in in debug mode: */
-+#define BCH_DEBUG_PARAMS_DEBUG()					\
-+	BCH_DEBUG_PARAM(expensive_debug_checks,				\
-+		"Enables various runtime debugging checks that "	\
-+		"significantly affect performance")			\
-+	BCH_DEBUG_PARAM(debug_check_iterators,				\
-+		"Enables extra verification for btree iterators")	\
-+	BCH_DEBUG_PARAM(debug_check_bkeys,				\
-+		"Run bkey_debugcheck (primarily checking GC/allocation "\
-+		"information) when iterating over keys")		\
-+	BCH_DEBUG_PARAM(verify_btree_ondisk,				\
-+		"Reread btree nodes at various points to verify the "	\
-+		"mergesort in the read path against modifications "	\
-+		"done in memory")					\
-+	BCH_DEBUG_PARAM(journal_seq_verify,				\
-+		"Store the journal sequence number in the version "	\
-+		"number of every btree key, and verify that btree "	\
-+		"update ordering is preserved during recovery")		\
-+	BCH_DEBUG_PARAM(inject_invalid_keys,				\
-+		"Store the journal sequence number in the version "	\
-+		"number of every btree key, and verify that btree "	\
-+		"update ordering is preserved during recovery")		\
-+	BCH_DEBUG_PARAM(test_alloc_startup,				\
-+		"Force allocator startup to use the slowpath where it"	\
-+		"can't find enough free buckets without invalidating"	\
-+		"cached data")						\
-+	BCH_DEBUG_PARAM(force_reconstruct_read,				\
-+		"Force reads to use the reconstruct path, when reading"	\
-+		"from erasure coded extents")				\
-+	BCH_DEBUG_PARAM(test_restart_gc,				\
-+		"Test restarting mark and sweep gc when bucket gens change")
-+
-+#define BCH_DEBUG_PARAMS_ALL() BCH_DEBUG_PARAMS_ALWAYS() BCH_DEBUG_PARAMS_DEBUG()
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+#define BCH_DEBUG_PARAMS() BCH_DEBUG_PARAMS_ALL()
-+#else
-+#define BCH_DEBUG_PARAMS() BCH_DEBUG_PARAMS_ALWAYS()
-+#endif
-+
-+#define BCH_TIME_STATS()			\
-+	x(btree_node_mem_alloc)			\
-+	x(btree_node_split)			\
-+	x(btree_node_sort)			\
-+	x(btree_node_read)			\
-+	x(btree_gc)				\
-+	x(btree_lock_contended_read)		\
-+	x(btree_lock_contended_intent)		\
-+	x(btree_lock_contended_write)		\
-+	x(data_write)				\
-+	x(data_read)				\
-+	x(data_promote)				\
-+	x(journal_write)			\
-+	x(journal_delay)			\
-+	x(journal_flush_seq)			\
-+	x(blocked_journal)			\
-+	x(blocked_allocate)			\
-+	x(blocked_allocate_open_bucket)
-+
-+enum bch_time_stats {
-+#define x(name) BCH_TIME_##name,
-+	BCH_TIME_STATS()
-+#undef x
-+	BCH_TIME_STAT_NR
-+};
-+
-+#include "alloc_types.h"
-+#include "btree_types.h"
-+#include "buckets_types.h"
-+#include "clock_types.h"
-+#include "ec_types.h"
-+#include "journal_types.h"
-+#include "keylist_types.h"
-+#include "quota_types.h"
-+#include "rebalance_types.h"
-+#include "replicas_types.h"
-+#include "super_types.h"
-+
-+/* Number of nodes btree coalesce will try to coalesce at once */
-+#define GC_MERGE_NODES		4U
-+
-+/* Maximum number of nodes we might need to allocate atomically: */
-+#define BTREE_RESERVE_MAX	(BTREE_MAX_DEPTH + (BTREE_MAX_DEPTH - 1))
-+
-+/* Size of the freelist we allocate btree nodes from: */
-+#define BTREE_NODE_RESERVE	(BTREE_RESERVE_MAX * 4)
-+
-+#define BTREE_NODE_OPEN_BUCKET_RESERVE	(BTREE_RESERVE_MAX * BCH_REPLICAS_MAX)
-+
-+struct btree;
-+
-+enum gc_phase {
-+	GC_PHASE_NOT_RUNNING,
-+	GC_PHASE_START,
-+	GC_PHASE_SB,
-+
-+	GC_PHASE_BTREE_EC,
-+	GC_PHASE_BTREE_EXTENTS,
-+	GC_PHASE_BTREE_INODES,
-+	GC_PHASE_BTREE_DIRENTS,
-+	GC_PHASE_BTREE_XATTRS,
-+	GC_PHASE_BTREE_ALLOC,
-+	GC_PHASE_BTREE_QUOTAS,
-+	GC_PHASE_BTREE_REFLINK,
-+
-+	GC_PHASE_PENDING_DELETE,
-+	GC_PHASE_ALLOC,
-+};
-+
-+struct gc_pos {
-+	enum gc_phase		phase;
-+	struct bpos		pos;
-+	unsigned		level;
-+};
-+
-+struct io_count {
-+	u64			sectors[2][BCH_DATA_NR];
-+};
-+
-+struct bch_dev {
-+	struct kobject		kobj;
-+	struct percpu_ref	ref;
-+	struct completion	ref_completion;
-+	struct percpu_ref	io_ref;
-+	struct completion	io_ref_completion;
-+
-+	struct bch_fs		*fs;
-+
-+	u8			dev_idx;
-+	/*
-+	 * Cached version of this device's member info from superblock
-+	 * Committed by bch2_write_super() -> bch_fs_mi_update()
-+	 */
-+	struct bch_member_cpu	mi;
-+	uuid_le			uuid;
-+	char			name[BDEVNAME_SIZE];
-+
-+	struct bch_sb_handle	disk_sb;
-+	struct bch_sb		*sb_read_scratch;
-+	int			sb_write_error;
-+
-+	struct bch_devs_mask	self;
-+
-+	/* biosets used in cloned bios for writing multiple replicas */
-+	struct bio_set		replica_set;
-+
-+	/*
-+	 * Buckets:
-+	 * Per-bucket arrays are protected by c->mark_lock, bucket_lock and
-+	 * gc_lock, for device resize - holding any is sufficient for access:
-+	 * Or rcu_read_lock(), but only for ptr_stale():
-+	 */
-+	struct bucket_array __rcu *buckets[2];
-+	unsigned long		*buckets_nouse;
-+	struct rw_semaphore	bucket_lock;
-+
-+	struct bch_dev_usage __percpu *usage[2];
-+
-+	/* Allocator: */
-+	struct task_struct __rcu *alloc_thread;
-+
-+	/*
-+	 * free: Buckets that are ready to be used
-+	 *
-+	 * free_inc: Incoming buckets - these are buckets that currently have
-+	 * cached data in them, and we can't reuse them until after we write
-+	 * their new gen to disk. After prio_write() finishes writing the new
-+	 * gens/prios, they'll be moved to the free list (and possibly discarded
-+	 * in the process)
-+	 */
-+	alloc_fifo		free[RESERVE_NR];
-+	alloc_fifo		free_inc;
-+
-+	open_bucket_idx_t	open_buckets_partial[OPEN_BUCKETS_COUNT];
-+	open_bucket_idx_t	open_buckets_partial_nr;
-+
-+	size_t			fifo_last_bucket;
-+
-+	/* last calculated minimum prio */
-+	u16			max_last_bucket_io[2];
-+
-+	size_t			inc_gen_needs_gc;
-+	size_t			inc_gen_really_needs_gc;
-+
-+	/*
-+	 * XXX: this should be an enum for allocator state, so as to include
-+	 * error state
-+	 */
-+	enum {
-+		ALLOCATOR_STOPPED,
-+		ALLOCATOR_RUNNING,
-+		ALLOCATOR_BLOCKED,
-+		ALLOCATOR_BLOCKED_FULL,
-+	}			allocator_state;
-+
-+	alloc_heap		alloc_heap;
-+
-+	/* Copying GC: */
-+	struct task_struct	*copygc_thread;
-+	copygc_heap		copygc_heap;
-+	struct bch_pd_controller copygc_pd;
-+	struct write_point	copygc_write_point;
-+	u64			copygc_threshold;
-+
-+	atomic64_t		rebalance_work;
-+
-+	struct journal_device	journal;
-+
-+	struct work_struct	io_error_work;
-+
-+	/* The rest of this all shows up in sysfs */
-+	atomic64_t		cur_latency[2];
-+	struct time_stats	io_latency[2];
-+
-+#define CONGESTED_MAX		1024
-+	atomic_t		congested;
-+	u64			congested_last;
-+
-+	struct io_count __percpu *io_done;
-+};
-+
-+enum {
-+	/* startup: */
-+	BCH_FS_ALLOC_READ_DONE,
-+	BCH_FS_ALLOC_CLEAN,
-+	BCH_FS_ALLOCATOR_RUNNING,
-+	BCH_FS_ALLOCATOR_STOPPING,
-+	BCH_FS_INITIAL_GC_DONE,
-+	BCH_FS_BTREE_INTERIOR_REPLAY_DONE,
-+	BCH_FS_FSCK_DONE,
-+	BCH_FS_STARTED,
-+	BCH_FS_RW,
-+
-+	/* shutdown: */
-+	BCH_FS_STOPPING,
-+	BCH_FS_EMERGENCY_RO,
-+	BCH_FS_WRITE_DISABLE_COMPLETE,
-+
-+	/* errors: */
-+	BCH_FS_ERROR,
-+	BCH_FS_ERRORS_FIXED,
-+
-+	/* misc: */
-+	BCH_FS_BDEV_MOUNTED,
-+	BCH_FS_FIXED_GENS,
-+	BCH_FS_ALLOC_WRITTEN,
-+	BCH_FS_REBUILD_REPLICAS,
-+	BCH_FS_HOLD_BTREE_WRITES,
-+};
-+
-+struct btree_debug {
-+	unsigned		id;
-+	struct dentry		*btree;
-+	struct dentry		*btree_format;
-+	struct dentry		*failed;
-+};
-+
-+struct bch_fs_pcpu {
-+	u64			sectors_available;
-+};
-+
-+struct journal_seq_blacklist_table {
-+	size_t			nr;
-+	struct journal_seq_blacklist_table_entry {
-+		u64		start;
-+		u64		end;
-+		bool		dirty;
-+	}			entries[0];
-+};
-+
-+struct journal_keys {
-+	struct journal_key {
-+		enum btree_id	btree_id:8;
-+		unsigned	level:8;
-+		struct bkey_i	*k;
-+		u32		journal_seq;
-+		u32		journal_offset;
-+	}			*d;
-+	size_t			nr;
-+	u64			journal_seq_base;
-+};
-+
-+struct bch_fs {
-+	struct closure		cl;
-+
-+	struct list_head	list;
-+	struct kobject		kobj;
-+	struct kobject		internal;
-+	struct kobject		opts_dir;
-+	struct kobject		time_stats;
-+	unsigned long		flags;
-+
-+	int			minor;
-+	struct device		*chardev;
-+	struct super_block	*vfs_sb;
-+	char			name[40];
-+
-+	/* ro/rw, add/remove/resize devices: */
-+	struct rw_semaphore	state_lock;
-+
-+	/* Counts outstanding writes, for clean transition to read-only */
-+	struct percpu_ref	writes;
-+	struct work_struct	read_only_work;
-+
-+	struct bch_dev __rcu	*devs[BCH_SB_MEMBERS_MAX];
-+
-+	struct bch_replicas_cpu replicas;
-+	struct bch_replicas_cpu replicas_gc;
-+	struct mutex		replicas_gc_lock;
-+
-+	struct journal_entry_res replicas_journal_res;
-+
-+	struct bch_disk_groups_cpu __rcu *disk_groups;
-+
-+	struct bch_opts		opts;
-+
-+	/* Updated by bch2_sb_update():*/
-+	struct {
-+		uuid_le		uuid;
-+		uuid_le		user_uuid;
-+
-+		u16		version;
-+		u16		encoded_extent_max;
-+
-+		u8		nr_devices;
-+		u8		clean;
-+
-+		u8		encryption_type;
-+
-+		u64		time_base_lo;
-+		u32		time_base_hi;
-+		u32		time_precision;
-+		u64		features;
-+		u64		compat;
-+	}			sb;
-+
-+	struct bch_sb_handle	disk_sb;
-+
-+	unsigned short		block_bits;	/* ilog2(block_size) */
-+
-+	u16			btree_foreground_merge_threshold;
-+
-+	struct closure		sb_write;
-+	struct mutex		sb_lock;
-+
-+	/* BTREE CACHE */
-+	struct bio_set		btree_bio;
-+
-+	struct btree_root	btree_roots[BTREE_ID_NR];
-+	struct mutex		btree_root_lock;
-+
-+	struct btree_cache	btree_cache;
-+
-+	/*
-+	 * Cache of allocated btree nodes - if we allocate a btree node and
-+	 * don't use it, if we free it that space can't be reused until going
-+	 * _all_ the way through the allocator (which exposes us to a livelock
-+	 * when allocating btree reserves fail halfway through) - instead, we
-+	 * can stick them here:
-+	 */
-+	struct btree_alloc	btree_reserve_cache[BTREE_NODE_RESERVE * 2];
-+	unsigned		btree_reserve_cache_nr;
-+	struct mutex		btree_reserve_cache_lock;
-+
-+	mempool_t		btree_interior_update_pool;
-+	struct list_head	btree_interior_update_list;
-+	struct list_head	btree_interior_updates_unwritten;
-+	struct mutex		btree_interior_update_lock;
-+	struct closure_waitlist	btree_interior_update_wait;
-+
-+	struct workqueue_struct	*btree_interior_update_worker;
-+	struct work_struct	btree_interior_update_work;
-+
-+	/* btree_iter.c: */
-+	struct mutex		btree_trans_lock;
-+	struct list_head	btree_trans_list;
-+	mempool_t		btree_iters_pool;
-+
-+	struct btree_key_cache	btree_key_cache;
-+
-+	struct workqueue_struct	*wq;
-+	/* copygc needs its own workqueue for index updates.. */
-+	struct workqueue_struct	*copygc_wq;
-+	struct workqueue_struct	*journal_reclaim_wq;
-+
-+	/* ALLOCATION */
-+	struct delayed_work	pd_controllers_update;
-+	unsigned		pd_controllers_update_seconds;
-+
-+	struct bch_devs_mask	rw_devs[BCH_DATA_NR];
-+
-+	u64			capacity; /* sectors */
-+
-+	/*
-+	 * When capacity _decreases_ (due to a disk being removed), we
-+	 * increment capacity_gen - this invalidates outstanding reservations
-+	 * and forces them to be revalidated
-+	 */
-+	u32			capacity_gen;
-+	unsigned		bucket_size_max;
-+
-+	atomic64_t		sectors_available;
-+
-+	struct bch_fs_pcpu __percpu	*pcpu;
-+
-+	struct percpu_rw_semaphore	mark_lock;
-+
-+	seqcount_t			usage_lock;
-+	struct bch_fs_usage		*usage_base;
-+	struct bch_fs_usage __percpu	*usage[2];
-+	struct bch_fs_usage __percpu	*usage_gc;
-+
-+	/* single element mempool: */
-+	struct mutex		usage_scratch_lock;
-+	struct bch_fs_usage	*usage_scratch;
-+
-+	/*
-+	 * When we invalidate buckets, we use both the priority and the amount
-+	 * of good data to determine which buckets to reuse first - to weight
-+	 * those together consistently we keep track of the smallest nonzero
-+	 * priority of any bucket.
-+	 */
-+	struct bucket_clock	bucket_clock[2];
-+
-+	struct io_clock		io_clock[2];
-+
-+	/* JOURNAL SEQ BLACKLIST */
-+	struct journal_seq_blacklist_table *
-+				journal_seq_blacklist_table;
-+	struct work_struct	journal_seq_blacklist_gc_work;
-+
-+	/* ALLOCATOR */
-+	spinlock_t		freelist_lock;
-+	struct closure_waitlist	freelist_wait;
-+	u64			blocked_allocate;
-+	u64			blocked_allocate_open_bucket;
-+	open_bucket_idx_t	open_buckets_freelist;
-+	open_bucket_idx_t	open_buckets_nr_free;
-+	struct closure_waitlist	open_buckets_wait;
-+	struct open_bucket	open_buckets[OPEN_BUCKETS_COUNT];
-+
-+	struct write_point	btree_write_point;
-+	struct write_point	rebalance_write_point;
-+
-+	struct write_point	write_points[WRITE_POINT_MAX];
-+	struct hlist_head	write_points_hash[WRITE_POINT_HASH_NR];
-+	struct mutex		write_points_hash_lock;
-+	unsigned		write_points_nr;
-+
-+	/* GARBAGE COLLECTION */
-+	struct task_struct	*gc_thread;
-+	atomic_t		kick_gc;
-+	unsigned long		gc_count;
-+
-+	/*
-+	 * Tracks GC's progress - everything in the range [ZERO_KEY..gc_cur_pos]
-+	 * has been marked by GC.
-+	 *
-+	 * gc_cur_phase is a superset of btree_ids (BTREE_ID_EXTENTS etc.)
-+	 *
-+	 * Protected by gc_pos_lock. Only written to by GC thread, so GC thread
-+	 * can read without a lock.
-+	 */
-+	seqcount_t		gc_pos_lock;
-+	struct gc_pos		gc_pos;
-+
-+	/*
-+	 * The allocation code needs gc_mark in struct bucket to be correct, but
-+	 * it's not while a gc is in progress.
-+	 */
-+	struct rw_semaphore	gc_lock;
-+
-+	/* IO PATH */
-+	struct semaphore	io_in_flight;
-+	struct bio_set		bio_read;
-+	struct bio_set		bio_read_split;
-+	struct bio_set		bio_write;
-+	struct mutex		bio_bounce_pages_lock;
-+	mempool_t		bio_bounce_pages;
-+	struct rhashtable	promote_table;
-+
-+	mempool_t		compression_bounce[2];
-+	mempool_t		compress_workspace[BCH_COMPRESSION_TYPE_NR];
-+	mempool_t		decompress_workspace;
-+	ZSTD_parameters		zstd_params;
-+
-+	struct crypto_shash	*sha256;
-+	struct crypto_sync_skcipher *chacha20;
-+	struct crypto_shash	*poly1305;
-+
-+	atomic64_t		key_version;
-+
-+	mempool_t		large_bkey_pool;
-+
-+	/* REBALANCE */
-+	struct bch_fs_rebalance	rebalance;
-+
-+	/* STRIPES: */
-+	GENRADIX(struct stripe) stripes[2];
-+	struct mutex		ec_stripe_create_lock;
-+
-+	ec_stripes_heap		ec_stripes_heap;
-+	spinlock_t		ec_stripes_heap_lock;
-+
-+	/* ERASURE CODING */
-+	struct list_head	ec_new_stripe_list;
-+	struct mutex		ec_new_stripe_lock;
-+	u64			ec_stripe_hint;
-+
-+	struct bio_set		ec_bioset;
-+
-+	struct work_struct	ec_stripe_delete_work;
-+	struct llist_head	ec_stripe_delete_list;
-+
-+	/* REFLINK */
-+	u64			reflink_hint;
-+
-+	/* VFS IO PATH - fs-io.c */
-+	struct bio_set		writepage_bioset;
-+	struct bio_set		dio_write_bioset;
-+	struct bio_set		dio_read_bioset;
-+
-+	struct bio_list		btree_write_error_list;
-+	struct work_struct	btree_write_error_work;
-+	spinlock_t		btree_write_error_lock;
-+
-+	/* ERRORS */
-+	struct list_head	fsck_errors;
-+	struct mutex		fsck_error_lock;
-+	bool			fsck_alloc_err;
-+
-+	/* QUOTAS */
-+	struct bch_memquota_type quotas[QTYP_NR];
-+
-+	/* DEBUG JUNK */
-+	struct dentry		*debug;
-+	struct btree_debug	btree_debug[BTREE_ID_NR];
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	struct btree		*verify_data;
-+	struct btree_node	*verify_ondisk;
-+	struct mutex		verify_lock;
-+#endif
-+
-+	u64			unused_inode_hint;
-+
-+	/*
-+	 * A btree node on disk could have too many bsets for an iterator to fit
-+	 * on the stack - have to dynamically allocate them
-+	 */
-+	mempool_t		fill_iter;
-+
-+	mempool_t		btree_bounce_pool;
-+
-+	struct journal		journal;
-+	struct list_head	journal_entries;
-+	struct journal_keys	journal_keys;
-+
-+	u64			last_bucket_seq_cleanup;
-+
-+	/* The rest of this all shows up in sysfs */
-+	atomic_long_t		read_realloc_races;
-+	atomic_long_t		extent_migrate_done;
-+	atomic_long_t		extent_migrate_raced;
-+
-+	unsigned		btree_gc_periodic:1;
-+	unsigned		copy_gc_enabled:1;
-+	bool			promote_whole_extents;
-+
-+#define BCH_DEBUG_PARAM(name, description) bool name;
-+	BCH_DEBUG_PARAMS_ALL()
-+#undef BCH_DEBUG_PARAM
-+
-+	struct time_stats	times[BCH_TIME_STAT_NR];
-+};
-+
-+static inline void bch2_set_ra_pages(struct bch_fs *c, unsigned ra_pages)
-+{
-+#ifndef NO_BCACHEFS_FS
-+	if (c->vfs_sb)
-+		c->vfs_sb->s_bdi->ra_pages = ra_pages;
-+#endif
-+}
-+
-+static inline unsigned bucket_bytes(const struct bch_dev *ca)
-+{
-+	return ca->mi.bucket_size << 9;
-+}
-+
-+static inline unsigned block_bytes(const struct bch_fs *c)
-+{
-+	return c->opts.block_size << 9;
-+}
-+
-+static inline struct timespec64 bch2_time_to_timespec(struct bch_fs *c, u64 time)
-+{
-+	return ns_to_timespec64(time * c->sb.time_precision + c->sb.time_base_lo);
-+}
-+
-+static inline s64 timespec_to_bch2_time(struct bch_fs *c, struct timespec64 ts)
-+{
-+	s64 ns = timespec64_to_ns(&ts) - c->sb.time_base_lo;
-+
-+	if (c->sb.time_precision == 1)
-+		return ns;
-+
-+	return div_s64(ns, c->sb.time_precision);
-+}
-+
-+static inline s64 bch2_current_time(struct bch_fs *c)
-+{
-+	struct timespec64 now;
-+
-+	ktime_get_coarse_real_ts64(&now);
-+	return timespec_to_bch2_time(c, now);
-+}
-+
-+static inline bool bch2_dev_exists2(const struct bch_fs *c, unsigned dev)
-+{
-+	return dev < c->sb.nr_devices && c->devs[dev];
-+}
-+
-+#endif /* _BCACHEFS_H */
-diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
-new file mode 100644
-index 000000000000..f808e63a713d
---- /dev/null
-+++ b/fs/bcachefs/bcachefs_format.h
-@@ -0,0 +1,1666 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_FORMAT_H
-+#define _BCACHEFS_FORMAT_H
-+
-+/*
-+ * bcachefs on disk data structures
-+ *
-+ * OVERVIEW:
-+ *
-+ * There are three main types of on disk data structures in bcachefs (this is
-+ * reduced from 5 in bcache)
-+ *
-+ *  - superblock
-+ *  - journal
-+ *  - btree
-+ *
-+ * The btree is the primary structure; most metadata exists as keys in the
-+ * various btrees. There are only a small number of btrees, they're not
-+ * sharded - we have one btree for extents, another for inodes, et cetera.
-+ *
-+ * SUPERBLOCK:
-+ *
-+ * The superblock contains the location of the journal, the list of devices in
-+ * the filesystem, and in general any metadata we need in order to decide
-+ * whether we can start a filesystem or prior to reading the journal/btree
-+ * roots.
-+ *
-+ * The superblock is extensible, and most of the contents of the superblock are
-+ * in variable length, type tagged fields; see struct bch_sb_field.
-+ *
-+ * Backup superblocks do not reside in a fixed location; also, superblocks do
-+ * not have a fixed size. To locate backup superblocks we have struct
-+ * bch_sb_layout; we store a copy of this inside every superblock, and also
-+ * before the first superblock.
-+ *
-+ * JOURNAL:
-+ *
-+ * The journal primarily records btree updates in the order they occurred;
-+ * journal replay consists of just iterating over all the keys in the open
-+ * journal entries and re-inserting them into the btrees.
-+ *
-+ * The journal also contains entry types for the btree roots, and blacklisted
-+ * journal sequence numbers (see journal_seq_blacklist.c).
-+ *
-+ * BTREE:
-+ *
-+ * bcachefs btrees are copy on write b+ trees, where nodes are big (typically
-+ * 128k-256k) and log structured. We use struct btree_node for writing the first
-+ * entry in a given node (offset 0), and struct btree_node_entry for all
-+ * subsequent writes.
-+ *
-+ * After the header, btree node entries contain a list of keys in sorted order.
-+ * Values are stored inline with the keys; since values are variable length (and
-+ * keys effectively are variable length too, due to packing) we can't do random
-+ * access without building up additional in memory tables in the btree node read
-+ * path.
-+ *
-+ * BTREE KEYS (struct bkey):
-+ *
-+ * The various btrees share a common format for the key - so as to avoid
-+ * switching in fastpath lookup/comparison code - but define their own
-+ * structures for the key values.
-+ *
-+ * The size of a key/value pair is stored as a u8 in units of u64s, so the max
-+ * size is just under 2k. The common part also contains a type tag for the
-+ * value, and a format field indicating whether the key is packed or not (and
-+ * also meant to allow adding new key fields in the future, if desired).
-+ *
-+ * bkeys, when stored within a btree node, may also be packed. In that case, the
-+ * bkey_format in that node is used to unpack it. Packed bkeys mean that we can
-+ * be generous with field sizes in the common part of the key format (64 bit
-+ * inode number, 64 bit offset, 96 bit version field, etc.) for negligible cost.
-+ */
-+
-+#include <asm/types.h>
-+#include <asm/byteorder.h>
-+#include <linux/kernel.h>
-+#include <linux/uuid.h>
-+
-+#define LE_BITMASK(_bits, name, type, field, offset, end)		\
-+static const unsigned	name##_OFFSET = offset;				\
-+static const unsigned	name##_BITS = (end - offset);			\
-+static const __u##_bits	name##_MAX = (1ULL << (end - offset)) - 1;	\
-+									\
-+static inline __u64 name(const type *k)					\
-+{									\
-+	return (__le##_bits##_to_cpu(k->field) >> offset) &		\
-+		~(~0ULL << (end - offset));				\
-+}									\
-+									\
-+static inline void SET_##name(type *k, __u64 v)				\
-+{									\
-+	__u##_bits new = __le##_bits##_to_cpu(k->field);		\
-+									\
-+	new &= ~(~(~0ULL << (end - offset)) << offset);			\
-+	new |= (v & ~(~0ULL << (end - offset))) << offset;		\
-+	k->field = __cpu_to_le##_bits(new);				\
-+}
-+
-+#define LE16_BITMASK(n, t, f, o, e)	LE_BITMASK(16, n, t, f, o, e)
-+#define LE32_BITMASK(n, t, f, o, e)	LE_BITMASK(32, n, t, f, o, e)
-+#define LE64_BITMASK(n, t, f, o, e)	LE_BITMASK(64, n, t, f, o, e)
-+
-+struct bkey_format {
-+	__u8		key_u64s;
-+	__u8		nr_fields;
-+	/* One unused slot for now: */
-+	__u8		bits_per_field[6];
-+	__le64		field_offset[6];
-+};
-+
-+/* Btree keys - all units are in sectors */
-+
-+struct bpos {
-+	/*
-+	 * Word order matches machine byte order - btree code treats a bpos as a
-+	 * single large integer, for search/comparison purposes
-+	 *
-+	 * Note that wherever a bpos is embedded in another on disk data
-+	 * structure, it has to be byte swabbed when reading in metadata that
-+	 * wasn't written in native endian order:
-+	 */
-+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-+	__u32		snapshot;
-+	__u64		offset;
-+	__u64		inode;
-+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-+	__u64		inode;
-+	__u64		offset;		/* Points to end of extent - sectors */
-+	__u32		snapshot;
-+#else
-+#error edit for your odd byteorder.
-+#endif
-+} __attribute__((packed, aligned(4)));
-+
-+#define KEY_INODE_MAX			((__u64)~0ULL)
-+#define KEY_OFFSET_MAX			((__u64)~0ULL)
-+#define KEY_SNAPSHOT_MAX		((__u32)~0U)
-+#define KEY_SIZE_MAX			((__u32)~0U)
-+
-+static inline struct bpos POS(__u64 inode, __u64 offset)
-+{
-+	struct bpos ret;
-+
-+	ret.inode	= inode;
-+	ret.offset	= offset;
-+	ret.snapshot	= 0;
-+
-+	return ret;
-+}
-+
-+#define POS_MIN				POS(0, 0)
-+#define POS_MAX				POS(KEY_INODE_MAX, KEY_OFFSET_MAX)
-+
-+/* Empty placeholder struct, for container_of() */
-+struct bch_val {
-+	__u64		__nothing[0];
-+};
-+
-+struct bversion {
-+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-+	__u64		lo;
-+	__u32		hi;
-+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-+	__u32		hi;
-+	__u64		lo;
-+#endif
-+} __attribute__((packed, aligned(4)));
-+
-+struct bkey {
-+	/* Size of combined key and value, in u64s */
-+	__u8		u64s;
-+
-+	/* Format of key (0 for format local to btree node) */
-+#if defined(__LITTLE_ENDIAN_BITFIELD)
-+	__u8		format:7,
-+			needs_whiteout:1;
-+#elif defined (__BIG_ENDIAN_BITFIELD)
-+	__u8		needs_whiteout:1,
-+			format:7;
-+#else
-+#error edit for your odd byteorder.
-+#endif
-+
-+	/* Type of the value */
-+	__u8		type;
-+
-+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-+	__u8		pad[1];
-+
-+	struct bversion	version;
-+	__u32		size;		/* extent size, in sectors */
-+	struct bpos	p;
-+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-+	struct bpos	p;
-+	__u32		size;		/* extent size, in sectors */
-+	struct bversion	version;
-+
-+	__u8		pad[1];
-+#endif
-+} __attribute__((packed, aligned(8)));
-+
-+struct bkey_packed {
-+	__u64		_data[0];
-+
-+	/* Size of combined key and value, in u64s */
-+	__u8		u64s;
-+
-+	/* Format of key (0 for format local to btree node) */
-+
-+	/*
-+	 * XXX: next incompat on disk format change, switch format and
-+	 * needs_whiteout - bkey_packed() will be cheaper if format is the high
-+	 * bits of the bitfield
-+	 */
-+#if defined(__LITTLE_ENDIAN_BITFIELD)
-+	__u8		format:7,
-+			needs_whiteout:1;
-+#elif defined (__BIG_ENDIAN_BITFIELD)
-+	__u8		needs_whiteout:1,
-+			format:7;
-+#endif
-+
-+	/* Type of the value */
-+	__u8		type;
-+	__u8		key_start[0];
-+
-+	/*
-+	 * We copy bkeys with struct assignment in various places, and while
-+	 * that shouldn't be done with packed bkeys we can't disallow it in C,
-+	 * and it's legal to cast a bkey to a bkey_packed  - so padding it out
-+	 * to the same size as struct bkey should hopefully be safest.
-+	 */
-+	__u8		pad[sizeof(struct bkey) - 3];
-+} __attribute__((packed, aligned(8)));
-+
-+#define BKEY_U64s			(sizeof(struct bkey) / sizeof(__u64))
-+#define BKEY_U64s_MAX			U8_MAX
-+#define BKEY_VAL_U64s_MAX		(BKEY_U64s_MAX - BKEY_U64s)
-+
-+#define KEY_PACKED_BITS_START		24
-+
-+#define KEY_FORMAT_LOCAL_BTREE		0
-+#define KEY_FORMAT_CURRENT		1
-+
-+enum bch_bkey_fields {
-+	BKEY_FIELD_INODE,
-+	BKEY_FIELD_OFFSET,
-+	BKEY_FIELD_SNAPSHOT,
-+	BKEY_FIELD_SIZE,
-+	BKEY_FIELD_VERSION_HI,
-+	BKEY_FIELD_VERSION_LO,
-+	BKEY_NR_FIELDS,
-+};
-+
-+#define bkey_format_field(name, field)					\
-+	[BKEY_FIELD_##name] = (sizeof(((struct bkey *) NULL)->field) * 8)
-+
-+#define BKEY_FORMAT_CURRENT						\
-+((struct bkey_format) {							\
-+	.key_u64s	= BKEY_U64s,					\
-+	.nr_fields	= BKEY_NR_FIELDS,				\
-+	.bits_per_field = {						\
-+		bkey_format_field(INODE,	p.inode),		\
-+		bkey_format_field(OFFSET,	p.offset),		\
-+		bkey_format_field(SNAPSHOT,	p.snapshot),		\
-+		bkey_format_field(SIZE,		size),			\
-+		bkey_format_field(VERSION_HI,	version.hi),		\
-+		bkey_format_field(VERSION_LO,	version.lo),		\
-+	},								\
-+})
-+
-+/* bkey with inline value */
-+struct bkey_i {
-+	__u64			_data[0];
-+
-+	union {
-+	struct {
-+		/* Size of combined key and value, in u64s */
-+		__u8		u64s;
-+	};
-+	struct {
-+		struct bkey	k;
-+		struct bch_val	v;
-+	};
-+	};
-+};
-+
-+#define KEY(_inode, _offset, _size)					\
-+((struct bkey) {							\
-+	.u64s		= BKEY_U64s,					\
-+	.format		= KEY_FORMAT_CURRENT,				\
-+	.p		= POS(_inode, _offset),				\
-+	.size		= _size,					\
-+})
-+
-+static inline void bkey_init(struct bkey *k)
-+{
-+	*k = KEY(0, 0, 0);
-+}
-+
-+#define bkey_bytes(_k)		((_k)->u64s * sizeof(__u64))
-+
-+#define __BKEY_PADDED(key, pad)					\
-+	struct { struct bkey_i key; __u64 key ## _pad[pad]; }
-+
-+/*
-+ * - DELETED keys are used internally to mark keys that should be ignored but
-+ *   override keys in composition order.  Their version number is ignored.
-+ *
-+ * - DISCARDED keys indicate that the data is all 0s because it has been
-+ *   discarded. DISCARDs may have a version; if the version is nonzero the key
-+ *   will be persistent, otherwise the key will be dropped whenever the btree
-+ *   node is rewritten (like DELETED keys).
-+ *
-+ * - ERROR: any read of the data returns a read error, as the data was lost due
-+ *   to a failing device. Like DISCARDED keys, they can be removed (overridden)
-+ *   by new writes or cluster-wide GC. Node repair can also overwrite them with
-+ *   the same or a more recent version number, but not with an older version
-+ *   number.
-+ *
-+ * - WHITEOUT: for hash table btrees
-+*/
-+#define BCH_BKEY_TYPES()				\
-+	x(deleted,		0)			\
-+	x(discard,		1)			\
-+	x(error,		2)			\
-+	x(cookie,		3)			\
-+	x(whiteout,		4)			\
-+	x(btree_ptr,		5)			\
-+	x(extent,		6)			\
-+	x(reservation,		7)			\
-+	x(inode,		8)			\
-+	x(inode_generation,	9)			\
-+	x(dirent,		10)			\
-+	x(xattr,		11)			\
-+	x(alloc,		12)			\
-+	x(quota,		13)			\
-+	x(stripe,		14)			\
-+	x(reflink_p,		15)			\
-+	x(reflink_v,		16)			\
-+	x(inline_data,		17)			\
-+	x(btree_ptr_v2,		18)
-+
-+enum bch_bkey_type {
-+#define x(name, nr) KEY_TYPE_##name	= nr,
-+	BCH_BKEY_TYPES()
-+#undef x
-+	KEY_TYPE_MAX,
-+};
-+
-+struct bch_cookie {
-+	struct bch_val		v;
-+	__le64			cookie;
-+};
-+
-+/* Extents */
-+
-+/*
-+ * In extent bkeys, the value is a list of pointers (bch_extent_ptr), optionally
-+ * preceded by checksum/compression information (bch_extent_crc32 or
-+ * bch_extent_crc64).
-+ *
-+ * One major determining factor in the format of extents is how we handle and
-+ * represent extents that have been partially overwritten and thus trimmed:
-+ *
-+ * If an extent is not checksummed or compressed, when the extent is trimmed we
-+ * don't have to remember the extent we originally allocated and wrote: we can
-+ * merely adjust ptr->offset to point to the start of the data that is currently
-+ * live. The size field in struct bkey records the current (live) size of the
-+ * extent, and is also used to mean "size of region on disk that we point to" in
-+ * this case.
-+ *
-+ * Thus an extent that is not checksummed or compressed will consist only of a
-+ * list of bch_extent_ptrs, with none of the fields in
-+ * bch_extent_crc32/bch_extent_crc64.
-+ *
-+ * When an extent is checksummed or compressed, it's not possible to read only
-+ * the data that is currently live: we have to read the entire extent that was
-+ * originally written, and then return only the part of the extent that is
-+ * currently live.
-+ *
-+ * Thus, in addition to the current size of the extent in struct bkey, we need
-+ * to store the size of the originally allocated space - this is the
-+ * compressed_size and uncompressed_size fields in bch_extent_crc32/64. Also,
-+ * when the extent is trimmed, instead of modifying the offset field of the
-+ * pointer, we keep a second smaller offset field - "offset into the original
-+ * extent of the currently live region".
-+ *
-+ * The other major determining factor is replication and data migration:
-+ *
-+ * Each pointer may have its own bch_extent_crc32/64. When doing a replicated
-+ * write, we will initially write all the replicas in the same format, with the
-+ * same checksum type and compression format - however, when copygc runs later (or
-+ * tiering/cache promotion, anything that moves data), it is not in general
-+ * going to rewrite all the pointers at once - one of the replicas may be in a
-+ * bucket on one device that has very little fragmentation while another lives
-+ * in a bucket that has become heavily fragmented, and thus is being rewritten
-+ * sooner than the rest.
-+ *
-+ * Thus it will only move a subset of the pointers (or in the case of
-+ * tiering/cache promotion perhaps add a single pointer without dropping any
-+ * current pointers), and if the extent has been partially overwritten it must
-+ * write only the currently live portion (or copygc would not be able to reduce
-+ * fragmentation!) - which necessitates a different bch_extent_crc format for
-+ * the new pointer.
-+ *
-+ * But in the interests of space efficiency, we don't want to store one
-+ * bch_extent_crc for each pointer if we don't have to.
-+ *
-+ * Thus, a bch_extent consists of bch_extent_crc32s, bch_extent_crc64s, and
-+ * bch_extent_ptrs appended arbitrarily one after the other. We determine the
-+ * type of a given entry with a scheme similar to utf8 (except we're encoding a
-+ * type, not a size), encoding the type in the position of the first set bit:
-+ *
-+ * bch_extent_crc32	- 0b1
-+ * bch_extent_ptr	- 0b10
-+ * bch_extent_crc64	- 0b100
-+ *
-+ * We do it this way because bch_extent_crc32 is _very_ constrained on bits (and
-+ * bch_extent_crc64 is the least constrained).
-+ *
-+ * Then, each bch_extent_crc32/64 applies to the pointers that follow after it,
-+ * until the next bch_extent_crc32/64.
-+ *
-+ * If there are no bch_extent_crcs preceding a bch_extent_ptr, then that pointer
-+ * is neither checksummed nor compressed.
-+ */
-+
-+/* 128 bits, sufficient for cryptographic MACs: */
-+struct bch_csum {
-+	__le64			lo;
-+	__le64			hi;
-+} __attribute__((packed, aligned(8)));
-+
-+#define BCH_EXTENT_ENTRY_TYPES()		\
-+	x(ptr,			0)		\
-+	x(crc32,		1)		\
-+	x(crc64,		2)		\
-+	x(crc128,		3)		\
-+	x(stripe_ptr,		4)
-+#define BCH_EXTENT_ENTRY_MAX	5
-+
-+enum bch_extent_entry_type {
-+#define x(f, n) BCH_EXTENT_ENTRY_##f = n,
-+	BCH_EXTENT_ENTRY_TYPES()
-+#undef x
-+};
-+
-+/* Compressed/uncompressed size are stored biased by 1: */
-+struct bch_extent_crc32 {
-+#if defined(__LITTLE_ENDIAN_BITFIELD)
-+	__u32			type:2,
-+				_compressed_size:7,
-+				_uncompressed_size:7,
-+				offset:7,
-+				_unused:1,
-+				csum_type:4,
-+				compression_type:4;
-+	__u32			csum;
-+#elif defined (__BIG_ENDIAN_BITFIELD)
-+	__u32			csum;
-+	__u32			compression_type:4,
-+				csum_type:4,
-+				_unused:1,
-+				offset:7,
-+				_uncompressed_size:7,
-+				_compressed_size:7,
-+				type:2;
-+#endif
-+} __attribute__((packed, aligned(8)));
-+
-+#define CRC32_SIZE_MAX		(1U << 7)
-+#define CRC32_NONCE_MAX		0
-+
-+struct bch_extent_crc64 {
-+#if defined(__LITTLE_ENDIAN_BITFIELD)
-+	__u64			type:3,
-+				_compressed_size:9,
-+				_uncompressed_size:9,
-+				offset:9,
-+				nonce:10,
-+				csum_type:4,
-+				compression_type:4,
-+				csum_hi:16;
-+#elif defined (__BIG_ENDIAN_BITFIELD)
-+	__u64			csum_hi:16,
-+				compression_type:4,
-+				csum_type:4,
-+				nonce:10,
-+				offset:9,
-+				_uncompressed_size:9,
-+				_compressed_size:9,
-+				type:3;
-+#endif
-+	__u64			csum_lo;
-+} __attribute__((packed, aligned(8)));
-+
-+#define CRC64_SIZE_MAX		(1U << 9)
-+#define CRC64_NONCE_MAX		((1U << 10) - 1)
-+
-+struct bch_extent_crc128 {
-+#if defined(__LITTLE_ENDIAN_BITFIELD)
-+	__u64			type:4,
-+				_compressed_size:13,
-+				_uncompressed_size:13,
-+				offset:13,
-+				nonce:13,
-+				csum_type:4,
-+				compression_type:4;
-+#elif defined (__BIG_ENDIAN_BITFIELD)
-+	__u64			compression_type:4,
-+				csum_type:4,
-+				nonce:13,
-+				offset:13,
-+				_uncompressed_size:13,
-+				_compressed_size:13,
-+				type:4;
-+#endif
-+	struct bch_csum		csum;
-+} __attribute__((packed, aligned(8)));
-+
-+#define CRC128_SIZE_MAX		(1U << 13)
-+#define CRC128_NONCE_MAX	((1U << 13) - 1)
-+
-+/*
-+ * @reservation - pointer hasn't been written to, just reserved
-+ */
-+struct bch_extent_ptr {
-+#if defined(__LITTLE_ENDIAN_BITFIELD)
-+	__u64			type:1,
-+				cached:1,
-+				unused:1,
-+				reservation:1,
-+				offset:44, /* 8 petabytes */
-+				dev:8,
-+				gen:8;
-+#elif defined (__BIG_ENDIAN_BITFIELD)
-+	__u64			gen:8,
-+				dev:8,
-+				offset:44,
-+				reservation:1,
-+				unused:1,
-+				cached:1,
-+				type:1;
-+#endif
-+} __attribute__((packed, aligned(8)));
-+
-+struct bch_extent_stripe_ptr {
-+#if defined(__LITTLE_ENDIAN_BITFIELD)
-+	__u64			type:5,
-+				block:8,
-+				idx:51;
-+#elif defined (__BIG_ENDIAN_BITFIELD)
-+	__u64			idx:51,
-+				block:8,
-+				type:5;
-+#endif
-+};
-+
-+struct bch_extent_reservation {
-+#if defined(__LITTLE_ENDIAN_BITFIELD)
-+	__u64			type:6,
-+				unused:22,
-+				replicas:4,
-+				generation:32;
-+#elif defined (__BIG_ENDIAN_BITFIELD)
-+	__u64			generation:32,
-+				replicas:4,
-+				unused:22,
-+				type:6;
-+#endif
-+};
-+
-+union bch_extent_entry {
-+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ ||  __BITS_PER_LONG == 64
-+	unsigned long			type;
-+#elif __BITS_PER_LONG == 32
-+	struct {
-+		unsigned long		pad;
-+		unsigned long		type;
-+	};
-+#else
-+#error edit for your odd byteorder.
-+#endif
-+
-+#define x(f, n) struct bch_extent_##f	f;
-+	BCH_EXTENT_ENTRY_TYPES()
-+#undef x
-+};
-+
-+struct bch_btree_ptr {
-+	struct bch_val		v;
-+
-+	struct bch_extent_ptr	start[0];
-+	__u64			_data[0];
-+} __attribute__((packed, aligned(8)));
-+
-+struct bch_btree_ptr_v2 {
-+	struct bch_val		v;
-+
-+	__u64			mem_ptr;
-+	__le64			seq;
-+	__le16			sectors_written;
-+	/* In case we ever decide to do variable size btree nodes: */
-+	__le16			sectors;
-+	struct bpos		min_key;
-+	struct bch_extent_ptr	start[0];
-+	__u64			_data[0];
-+} __attribute__((packed, aligned(8)));
-+
-+struct bch_extent {
-+	struct bch_val		v;
-+
-+	union bch_extent_entry	start[0];
-+	__u64			_data[0];
-+} __attribute__((packed, aligned(8)));
-+
-+struct bch_reservation {
-+	struct bch_val		v;
-+
-+	__le32			generation;
-+	__u8			nr_replicas;
-+	__u8			pad[3];
-+} __attribute__((packed, aligned(8)));
-+
-+/* Maximum size (in u64s) a single pointer could be: */
-+#define BKEY_EXTENT_PTR_U64s_MAX\
-+	((sizeof(struct bch_extent_crc128) +			\
-+	  sizeof(struct bch_extent_ptr)) / sizeof(u64))
-+
-+/* Maximum possible size of an entire extent value: */
-+#define BKEY_EXTENT_VAL_U64s_MAX				\
-+	(1 + BKEY_EXTENT_PTR_U64s_MAX * (BCH_REPLICAS_MAX + 1))
-+
-+#define BKEY_PADDED(key)	__BKEY_PADDED(key, BKEY_EXTENT_VAL_U64s_MAX)
-+
-+/* * Maximum possible size of an entire extent, key + value: */
-+#define BKEY_EXTENT_U64s_MAX		(BKEY_U64s + BKEY_EXTENT_VAL_U64s_MAX)
-+
-+/* Btree pointers don't carry around checksums: */
-+#define BKEY_BTREE_PTR_VAL_U64s_MAX				\
-+	((sizeof(struct bch_btree_ptr_v2) +			\
-+	  sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX) / sizeof(u64))
-+#define BKEY_BTREE_PTR_U64s_MAX					\
-+	(BKEY_U64s + BKEY_BTREE_PTR_VAL_U64s_MAX)
-+
-+/* Inodes */
-+
-+#define BLOCKDEV_INODE_MAX	4096
-+
-+#define BCACHEFS_ROOT_INO	4096
-+
-+struct bch_inode {
-+	struct bch_val		v;
-+
-+	__le64			bi_hash_seed;
-+	__le32			bi_flags;
-+	__le16			bi_mode;
-+	__u8			fields[0];
-+} __attribute__((packed, aligned(8)));
-+
-+struct bch_inode_generation {
-+	struct bch_val		v;
-+
-+	__le32			bi_generation;
-+	__le32			pad;
-+} __attribute__((packed, aligned(8)));
-+
-+#define BCH_INODE_FIELDS()			\
-+	x(bi_atime,			64)	\
-+	x(bi_ctime,			64)	\
-+	x(bi_mtime,			64)	\
-+	x(bi_otime,			64)	\
-+	x(bi_size,			64)	\
-+	x(bi_sectors,			64)	\
-+	x(bi_uid,			32)	\
-+	x(bi_gid,			32)	\
-+	x(bi_nlink,			32)	\
-+	x(bi_generation,		32)	\
-+	x(bi_dev,			32)	\
-+	x(bi_data_checksum,		8)	\
-+	x(bi_compression,		8)	\
-+	x(bi_project,			32)	\
-+	x(bi_background_compression,	8)	\
-+	x(bi_data_replicas,		8)	\
-+	x(bi_promote_target,		16)	\
-+	x(bi_foreground_target,		16)	\
-+	x(bi_background_target,		16)	\
-+	x(bi_erasure_code,		16)	\
-+	x(bi_fields_set,		16)
-+
-+/* subset of BCH_INODE_FIELDS */
-+#define BCH_INODE_OPTS()			\
-+	x(data_checksum,		8)	\
-+	x(compression,			8)	\
-+	x(project,			32)	\
-+	x(background_compression,	8)	\
-+	x(data_replicas,		8)	\
-+	x(promote_target,		16)	\
-+	x(foreground_target,		16)	\
-+	x(background_target,		16)	\
-+	x(erasure_code,			16)
-+
-+enum inode_opt_id {
-+#define x(name, ...)				\
-+	Inode_opt_##name,
-+	BCH_INODE_OPTS()
-+#undef  x
-+	Inode_opt_nr,
-+};
-+
-+enum {
-+	/*
-+	 * User flags (get/settable with FS_IOC_*FLAGS, correspond to FS_*_FL
-+	 * flags)
-+	 */
-+	__BCH_INODE_SYNC	= 0,
-+	__BCH_INODE_IMMUTABLE	= 1,
-+	__BCH_INODE_APPEND	= 2,
-+	__BCH_INODE_NODUMP	= 3,
-+	__BCH_INODE_NOATIME	= 4,
-+
-+	__BCH_INODE_I_SIZE_DIRTY= 5,
-+	__BCH_INODE_I_SECTORS_DIRTY= 6,
-+	__BCH_INODE_UNLINKED	= 7,
-+
-+	/* bits 20+ reserved for packed fields below: */
-+};
-+
-+#define BCH_INODE_SYNC		(1 << __BCH_INODE_SYNC)
-+#define BCH_INODE_IMMUTABLE	(1 << __BCH_INODE_IMMUTABLE)
-+#define BCH_INODE_APPEND	(1 << __BCH_INODE_APPEND)
-+#define BCH_INODE_NODUMP	(1 << __BCH_INODE_NODUMP)
-+#define BCH_INODE_NOATIME	(1 << __BCH_INODE_NOATIME)
-+#define BCH_INODE_I_SIZE_DIRTY	(1 << __BCH_INODE_I_SIZE_DIRTY)
-+#define BCH_INODE_I_SECTORS_DIRTY (1 << __BCH_INODE_I_SECTORS_DIRTY)
-+#define BCH_INODE_UNLINKED	(1 << __BCH_INODE_UNLINKED)
-+
-+LE32_BITMASK(INODE_STR_HASH,	struct bch_inode, bi_flags, 20, 24);
-+LE32_BITMASK(INODE_NR_FIELDS,	struct bch_inode, bi_flags, 24, 32);
-+
-+/* Dirents */
-+
-+/*
-+ * Dirents (and xattrs) have to implement string lookups; since our b-tree
-+ * doesn't support arbitrary length strings for the key, we instead index by a
-+ * 64 bit hash (currently truncated sha1) of the string, stored in the offset
-+ * field of the key - using linear probing to resolve hash collisions. This also
-+ * provides us with the readdir cookie posix requires.
-+ *
-+ * Linear probing requires us to use whiteouts for deletions, in the event of a
-+ * collision:
-+ */
-+
-+struct bch_dirent {
-+	struct bch_val		v;
-+
-+	/* Target inode number: */
-+	__le64			d_inum;
-+
-+	/*
-+	 * Copy of mode bits 12-15 from the target inode - so userspace can get
-+	 * the filetype without having to do a stat()
-+	 */
-+	__u8			d_type;
-+
-+	__u8			d_name[];
-+} __attribute__((packed, aligned(8)));
-+
-+#define BCH_NAME_MAX	(U8_MAX * sizeof(u64) -				\
-+			 sizeof(struct bkey) -				\
-+			 offsetof(struct bch_dirent, d_name))
-+
-+
-+/* Xattrs */
-+
-+#define KEY_TYPE_XATTR_INDEX_USER			0
-+#define KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS	1
-+#define KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT	2
-+#define KEY_TYPE_XATTR_INDEX_TRUSTED			3
-+#define KEY_TYPE_XATTR_INDEX_SECURITY	        4
-+
-+struct bch_xattr {
-+	struct bch_val		v;
-+	__u8			x_type;
-+	__u8			x_name_len;
-+	__le16			x_val_len;
-+	__u8			x_name[];
-+} __attribute__((packed, aligned(8)));
-+
-+/* Bucket/allocation information: */
-+
-+struct bch_alloc {
-+	struct bch_val		v;
-+	__u8			fields;
-+	__u8			gen;
-+	__u8			data[];
-+} __attribute__((packed, aligned(8)));
-+
-+#define BCH_ALLOC_FIELDS()			\
-+	x(read_time,		16)		\
-+	x(write_time,		16)		\
-+	x(data_type,		8)		\
-+	x(dirty_sectors,	16)		\
-+	x(cached_sectors,	16)		\
-+	x(oldest_gen,		8)
-+
-+enum {
-+#define x(name, bytes) BCH_ALLOC_FIELD_##name,
-+	BCH_ALLOC_FIELDS()
-+#undef x
-+	BCH_ALLOC_FIELD_NR
-+};
-+
-+static const unsigned BCH_ALLOC_FIELD_BYTES[] = {
-+#define x(name, bits) [BCH_ALLOC_FIELD_##name] = bits / 8,
-+	BCH_ALLOC_FIELDS()
-+#undef x
-+};
-+
-+#define x(name, bits) + (bits / 8)
-+static const unsigned BKEY_ALLOC_VAL_U64s_MAX =
-+	DIV_ROUND_UP(offsetof(struct bch_alloc, data)
-+		     BCH_ALLOC_FIELDS(), sizeof(u64));
-+#undef x
-+
-+#define BKEY_ALLOC_U64s_MAX	(BKEY_U64s + BKEY_ALLOC_VAL_U64s_MAX)
-+
-+/* Quotas: */
-+
-+enum quota_types {
-+	QTYP_USR		= 0,
-+	QTYP_GRP		= 1,
-+	QTYP_PRJ		= 2,
-+	QTYP_NR			= 3,
-+};
-+
-+enum quota_counters {
-+	Q_SPC			= 0,
-+	Q_INO			= 1,
-+	Q_COUNTERS		= 2,
-+};
-+
-+struct bch_quota_counter {
-+	__le64			hardlimit;
-+	__le64			softlimit;
-+};
-+
-+struct bch_quota {
-+	struct bch_val		v;
-+	struct bch_quota_counter c[Q_COUNTERS];
-+} __attribute__((packed, aligned(8)));
-+
-+/* Erasure coding */
-+
-+struct bch_stripe {
-+	struct bch_val		v;
-+	__le16			sectors;
-+	__u8			algorithm;
-+	__u8			nr_blocks;
-+	__u8			nr_redundant;
-+
-+	__u8			csum_granularity_bits;
-+	__u8			csum_type;
-+	__u8			pad;
-+
-+	struct bch_extent_ptr	ptrs[0];
-+} __attribute__((packed, aligned(8)));
-+
-+/* Reflink: */
-+
-+struct bch_reflink_p {
-+	struct bch_val		v;
-+	__le64			idx;
-+
-+	__le32			reservation_generation;
-+	__u8			nr_replicas;
-+	__u8			pad[3];
-+};
-+
-+struct bch_reflink_v {
-+	struct bch_val		v;
-+	__le64			refcount;
-+	union bch_extent_entry	start[0];
-+	__u64			_data[0];
-+};
-+
-+/* Inline data */
-+
-+struct bch_inline_data {
-+	struct bch_val		v;
-+	u8			data[0];
-+};
-+
-+/* Optional/variable size superblock sections: */
-+
-+struct bch_sb_field {
-+	__u64			_data[0];
-+	__le32			u64s;
-+	__le32			type;
-+};
-+
-+#define BCH_SB_FIELDS()		\
-+	x(journal,	0)	\
-+	x(members,	1)	\
-+	x(crypt,	2)	\
-+	x(replicas_v0,	3)	\
-+	x(quota,	4)	\
-+	x(disk_groups,	5)	\
-+	x(clean,	6)	\
-+	x(replicas,	7)	\
-+	x(journal_seq_blacklist, 8)
-+
-+enum bch_sb_field_type {
-+#define x(f, nr)	BCH_SB_FIELD_##f = nr,
-+	BCH_SB_FIELDS()
-+#undef x
-+	BCH_SB_FIELD_NR
-+};
-+
-+/* BCH_SB_FIELD_journal: */
-+
-+struct bch_sb_field_journal {
-+	struct bch_sb_field	field;
-+	__le64			buckets[0];
-+};
-+
-+/* BCH_SB_FIELD_members: */
-+
-+#define BCH_MIN_NR_NBUCKETS	(1 << 6)
-+
-+struct bch_member {
-+	uuid_le			uuid;
-+	__le64			nbuckets;	/* device size */
-+	__le16			first_bucket;   /* index of first bucket used */
-+	__le16			bucket_size;	/* sectors */
-+	__le32			pad;
-+	__le64			last_mount;	/* time_t */
-+
-+	__le64			flags[2];
-+};
-+
-+LE64_BITMASK(BCH_MEMBER_STATE,		struct bch_member, flags[0],  0,  4)
-+/* 4-10 unused, was TIER, HAS_(META)DATA */
-+LE64_BITMASK(BCH_MEMBER_REPLACEMENT,	struct bch_member, flags[0], 10, 14)
-+LE64_BITMASK(BCH_MEMBER_DISCARD,	struct bch_member, flags[0], 14, 15)
-+LE64_BITMASK(BCH_MEMBER_DATA_ALLOWED,	struct bch_member, flags[0], 15, 20)
-+LE64_BITMASK(BCH_MEMBER_GROUP,		struct bch_member, flags[0], 20, 28)
-+LE64_BITMASK(BCH_MEMBER_DURABILITY,	struct bch_member, flags[0], 28, 30)
-+
-+#define BCH_TIER_MAX			4U
-+
-+#if 0
-+LE64_BITMASK(BCH_MEMBER_NR_READ_ERRORS,	struct bch_member, flags[1], 0,  20);
-+LE64_BITMASK(BCH_MEMBER_NR_WRITE_ERRORS,struct bch_member, flags[1], 20, 40);
-+#endif
-+
-+enum bch_member_state {
-+	BCH_MEMBER_STATE_RW		= 0,
-+	BCH_MEMBER_STATE_RO		= 1,
-+	BCH_MEMBER_STATE_FAILED		= 2,
-+	BCH_MEMBER_STATE_SPARE		= 3,
-+	BCH_MEMBER_STATE_NR		= 4,
-+};
-+
-+enum cache_replacement {
-+	CACHE_REPLACEMENT_LRU		= 0,
-+	CACHE_REPLACEMENT_FIFO		= 1,
-+	CACHE_REPLACEMENT_RANDOM	= 2,
-+	CACHE_REPLACEMENT_NR		= 3,
-+};
-+
-+struct bch_sb_field_members {
-+	struct bch_sb_field	field;
-+	struct bch_member	members[0];
-+};
-+
-+/* BCH_SB_FIELD_crypt: */
-+
-+struct nonce {
-+	__le32			d[4];
-+};
-+
-+struct bch_key {
-+	__le64			key[4];
-+};
-+
-+#define BCH_KEY_MAGIC					\
-+	(((u64) 'b' <<  0)|((u64) 'c' <<  8)|		\
-+	 ((u64) 'h' << 16)|((u64) '*' << 24)|		\
-+	 ((u64) '*' << 32)|((u64) 'k' << 40)|		\
-+	 ((u64) 'e' << 48)|((u64) 'y' << 56))
-+
-+struct bch_encrypted_key {
-+	__le64			magic;
-+	struct bch_key		key;
-+};
-+
-+/*
-+ * If this field is present in the superblock, it stores an encryption key which
-+ * is used encrypt all other data/metadata. The key will normally be encrypted
-+ * with the key userspace provides, but if encryption has been turned off we'll
-+ * just store the master key unencrypted in the superblock so we can access the
-+ * previously encrypted data.
-+ */
-+struct bch_sb_field_crypt {
-+	struct bch_sb_field	field;
-+
-+	__le64			flags;
-+	__le64			kdf_flags;
-+	struct bch_encrypted_key key;
-+};
-+
-+LE64_BITMASK(BCH_CRYPT_KDF_TYPE,	struct bch_sb_field_crypt, flags, 0, 4);
-+
-+enum bch_kdf_types {
-+	BCH_KDF_SCRYPT		= 0,
-+	BCH_KDF_NR		= 1,
-+};
-+
-+/* stored as base 2 log of scrypt params: */
-+LE64_BITMASK(BCH_KDF_SCRYPT_N,	struct bch_sb_field_crypt, kdf_flags,  0, 16);
-+LE64_BITMASK(BCH_KDF_SCRYPT_R,	struct bch_sb_field_crypt, kdf_flags, 16, 32);
-+LE64_BITMASK(BCH_KDF_SCRYPT_P,	struct bch_sb_field_crypt, kdf_flags, 32, 48);
-+
-+/* BCH_SB_FIELD_replicas: */
-+
-+enum bch_data_type {
-+	BCH_DATA_NONE		= 0,
-+	BCH_DATA_SB		= 1,
-+	BCH_DATA_JOURNAL	= 2,
-+	BCH_DATA_BTREE		= 3,
-+	BCH_DATA_USER		= 4,
-+	BCH_DATA_CACHED		= 5,
-+	BCH_DATA_NR		= 6,
-+};
-+
-+struct bch_replicas_entry_v0 {
-+	__u8			data_type;
-+	__u8			nr_devs;
-+	__u8			devs[0];
-+} __attribute__((packed));
-+
-+struct bch_sb_field_replicas_v0 {
-+	struct bch_sb_field	field;
-+	struct bch_replicas_entry_v0 entries[0];
-+} __attribute__((packed, aligned(8)));
-+
-+struct bch_replicas_entry {
-+	__u8			data_type;
-+	__u8			nr_devs;
-+	__u8			nr_required;
-+	__u8			devs[0];
-+} __attribute__((packed));
-+
-+#define replicas_entry_bytes(_i)					\
-+	(offsetof(typeof(*(_i)), devs) + (_i)->nr_devs)
-+
-+struct bch_sb_field_replicas {
-+	struct bch_sb_field	field;
-+	struct bch_replicas_entry entries[0];
-+} __attribute__((packed, aligned(8)));
-+
-+/* BCH_SB_FIELD_quota: */
-+
-+struct bch_sb_quota_counter {
-+	__le32				timelimit;
-+	__le32				warnlimit;
-+};
-+
-+struct bch_sb_quota_type {
-+	__le64				flags;
-+	struct bch_sb_quota_counter	c[Q_COUNTERS];
-+};
-+
-+struct bch_sb_field_quota {
-+	struct bch_sb_field		field;
-+	struct bch_sb_quota_type	q[QTYP_NR];
-+} __attribute__((packed, aligned(8)));
-+
-+/* BCH_SB_FIELD_disk_groups: */
-+
-+#define BCH_SB_LABEL_SIZE		32
-+
-+struct bch_disk_group {
-+	__u8			label[BCH_SB_LABEL_SIZE];
-+	__le64			flags[2];
-+} __attribute__((packed, aligned(8)));
-+
-+LE64_BITMASK(BCH_GROUP_DELETED,		struct bch_disk_group, flags[0], 0,  1)
-+LE64_BITMASK(BCH_GROUP_DATA_ALLOWED,	struct bch_disk_group, flags[0], 1,  6)
-+LE64_BITMASK(BCH_GROUP_PARENT,		struct bch_disk_group, flags[0], 6, 24)
-+
-+struct bch_sb_field_disk_groups {
-+	struct bch_sb_field	field;
-+	struct bch_disk_group	entries[0];
-+} __attribute__((packed, aligned(8)));
-+
-+/*
-+ * On clean shutdown, store btree roots and current journal sequence number in
-+ * the superblock:
-+ */
-+struct jset_entry {
-+	__le16			u64s;
-+	__u8			btree_id;
-+	__u8			level;
-+	__u8			type; /* designates what this jset holds */
-+	__u8			pad[3];
-+
-+	union {
-+		struct bkey_i	start[0];
-+		__u64		_data[0];
-+	};
-+};
-+
-+struct bch_sb_field_clean {
-+	struct bch_sb_field	field;
-+
-+	__le32			flags;
-+	__le16			read_clock;
-+	__le16			write_clock;
-+	__le64			journal_seq;
-+
-+	union {
-+		struct jset_entry start[0];
-+		__u64		_data[0];
-+	};
-+};
-+
-+struct journal_seq_blacklist_entry {
-+	__le64			start;
-+	__le64			end;
-+};
-+
-+struct bch_sb_field_journal_seq_blacklist {
-+	struct bch_sb_field	field;
-+
-+	union {
-+		struct journal_seq_blacklist_entry start[0];
-+		__u64		_data[0];
-+	};
-+};
-+
-+/* Superblock: */
-+
-+/*
-+ * New versioning scheme:
-+ * One common version number for all on disk data structures - superblock, btree
-+ * nodes, journal entries
-+ */
-+#define BCH_JSET_VERSION_OLD			2
-+#define BCH_BSET_VERSION_OLD			3
-+
-+enum bcachefs_metadata_version {
-+	bcachefs_metadata_version_min			= 9,
-+	bcachefs_metadata_version_new_versioning	= 10,
-+	bcachefs_metadata_version_bkey_renumber		= 10,
-+	bcachefs_metadata_version_inode_btree_change	= 11,
-+	bcachefs_metadata_version_max			= 12,
-+};
-+
-+#define bcachefs_metadata_version_current	(bcachefs_metadata_version_max - 1)
-+
-+#define BCH_SB_SECTOR			8
-+#define BCH_SB_MEMBERS_MAX		64 /* XXX kill */
-+
-+struct bch_sb_layout {
-+	uuid_le			magic;	/* bcachefs superblock UUID */
-+	__u8			layout_type;
-+	__u8			sb_max_size_bits; /* base 2 of 512 byte sectors */
-+	__u8			nr_superblocks;
-+	__u8			pad[5];
-+	__le64			sb_offset[61];
-+} __attribute__((packed, aligned(8)));
-+
-+#define BCH_SB_LAYOUT_SECTOR	7
-+
-+/*
-+ * @offset	- sector where this sb was written
-+ * @version	- on disk format version
-+ * @version_min	- Oldest metadata version this filesystem contains; so we can
-+ *		  safely drop compatibility code and refuse to mount filesystems
-+ *		  we'd need it for
-+ * @magic	- identifies as a bcachefs superblock (BCACHE_MAGIC)
-+ * @seq		- incremented each time superblock is written
-+ * @uuid	- used for generating various magic numbers and identifying
-+ *                member devices, never changes
-+ * @user_uuid	- user visible UUID, may be changed
-+ * @label	- filesystem label
-+ * @seq		- identifies most recent superblock, incremented each time
-+ *		  superblock is written
-+ * @features	- enabled incompatible features
-+ */
-+struct bch_sb {
-+	struct bch_csum		csum;
-+	__le16			version;
-+	__le16			version_min;
-+	__le16			pad[2];
-+	uuid_le			magic;
-+	uuid_le			uuid;
-+	uuid_le			user_uuid;
-+	__u8			label[BCH_SB_LABEL_SIZE];
-+	__le64			offset;
-+	__le64			seq;
-+
-+	__le16			block_size;
-+	__u8			dev_idx;
-+	__u8			nr_devices;
-+	__le32			u64s;
-+
-+	__le64			time_base_lo;
-+	__le32			time_base_hi;
-+	__le32			time_precision;
-+
-+	__le64			flags[8];
-+	__le64			features[2];
-+	__le64			compat[2];
-+
-+	struct bch_sb_layout	layout;
-+
-+	union {
-+		struct bch_sb_field start[0];
-+		__le64		_data[0];
-+	};
-+} __attribute__((packed, aligned(8)));
-+
-+/*
-+ * Flags:
-+ * BCH_SB_INITALIZED	- set on first mount
-+ * BCH_SB_CLEAN		- did we shut down cleanly? Just a hint, doesn't affect
-+ *			  behaviour of mount/recovery path:
-+ * BCH_SB_INODE_32BIT	- limit inode numbers to 32 bits
-+ * BCH_SB_128_BIT_MACS	- 128 bit macs instead of 80
-+ * BCH_SB_ENCRYPTION_TYPE - if nonzero encryption is enabled; overrides
-+ *			   DATA/META_CSUM_TYPE. Also indicates encryption
-+ *			   algorithm in use, if/when we get more than one
-+ */
-+
-+LE16_BITMASK(BCH_SB_BLOCK_SIZE,		struct bch_sb, block_size, 0, 16);
-+
-+LE64_BITMASK(BCH_SB_INITIALIZED,	struct bch_sb, flags[0],  0,  1);
-+LE64_BITMASK(BCH_SB_CLEAN,		struct bch_sb, flags[0],  1,  2);
-+LE64_BITMASK(BCH_SB_CSUM_TYPE,		struct bch_sb, flags[0],  2,  8);
-+LE64_BITMASK(BCH_SB_ERROR_ACTION,	struct bch_sb, flags[0],  8, 12);
-+
-+LE64_BITMASK(BCH_SB_BTREE_NODE_SIZE,	struct bch_sb, flags[0], 12, 28);
-+
-+LE64_BITMASK(BCH_SB_GC_RESERVE,		struct bch_sb, flags[0], 28, 33);
-+LE64_BITMASK(BCH_SB_ROOT_RESERVE,	struct bch_sb, flags[0], 33, 40);
-+
-+LE64_BITMASK(BCH_SB_META_CSUM_TYPE,	struct bch_sb, flags[0], 40, 44);
-+LE64_BITMASK(BCH_SB_DATA_CSUM_TYPE,	struct bch_sb, flags[0], 44, 48);
-+
-+LE64_BITMASK(BCH_SB_META_REPLICAS_WANT,	struct bch_sb, flags[0], 48, 52);
-+LE64_BITMASK(BCH_SB_DATA_REPLICAS_WANT,	struct bch_sb, flags[0], 52, 56);
-+
-+LE64_BITMASK(BCH_SB_POSIX_ACL,		struct bch_sb, flags[0], 56, 57);
-+LE64_BITMASK(BCH_SB_USRQUOTA,		struct bch_sb, flags[0], 57, 58);
-+LE64_BITMASK(BCH_SB_GRPQUOTA,		struct bch_sb, flags[0], 58, 59);
-+LE64_BITMASK(BCH_SB_PRJQUOTA,		struct bch_sb, flags[0], 59, 60);
-+
-+LE64_BITMASK(BCH_SB_HAS_ERRORS,		struct bch_sb, flags[0], 60, 61);
-+
-+LE64_BITMASK(BCH_SB_REFLINK,		struct bch_sb, flags[0], 61, 62);
-+
-+/* 61-64 unused */
-+
-+LE64_BITMASK(BCH_SB_STR_HASH_TYPE,	struct bch_sb, flags[1],  0,  4);
-+LE64_BITMASK(BCH_SB_COMPRESSION_TYPE,	struct bch_sb, flags[1],  4,  8);
-+LE64_BITMASK(BCH_SB_INODE_32BIT,	struct bch_sb, flags[1],  8,  9);
-+
-+LE64_BITMASK(BCH_SB_128_BIT_MACS,	struct bch_sb, flags[1],  9, 10);
-+LE64_BITMASK(BCH_SB_ENCRYPTION_TYPE,	struct bch_sb, flags[1], 10, 14);
-+
-+/*
-+ * Max size of an extent that may require bouncing to read or write
-+ * (checksummed, compressed): 64k
-+ */
-+LE64_BITMASK(BCH_SB_ENCODED_EXTENT_MAX_BITS,
-+					struct bch_sb, flags[1], 14, 20);
-+
-+LE64_BITMASK(BCH_SB_META_REPLICAS_REQ,	struct bch_sb, flags[1], 20, 24);
-+LE64_BITMASK(BCH_SB_DATA_REPLICAS_REQ,	struct bch_sb, flags[1], 24, 28);
-+
-+LE64_BITMASK(BCH_SB_PROMOTE_TARGET,	struct bch_sb, flags[1], 28, 40);
-+LE64_BITMASK(BCH_SB_FOREGROUND_TARGET,	struct bch_sb, flags[1], 40, 52);
-+LE64_BITMASK(BCH_SB_BACKGROUND_TARGET,	struct bch_sb, flags[1], 52, 64);
-+
-+LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE,
-+					struct bch_sb, flags[2],  0,  4);
-+LE64_BITMASK(BCH_SB_GC_RESERVE_BYTES,	struct bch_sb, flags[2],  4, 64);
-+
-+LE64_BITMASK(BCH_SB_ERASURE_CODE,	struct bch_sb, flags[3],  0, 16);
-+
-+/*
-+ * Features:
-+ *
-+ * journal_seq_blacklist_v3:	gates BCH_SB_FIELD_journal_seq_blacklist
-+ * reflink:			gates KEY_TYPE_reflink
-+ * inline_data:			gates KEY_TYPE_inline_data
-+ * new_siphash:			gates BCH_STR_HASH_SIPHASH
-+ * new_extent_overwrite:	gates BTREE_NODE_NEW_EXTENT_OVERWRITE
-+ */
-+#define BCH_SB_FEATURES()			\
-+	x(lz4,				0)	\
-+	x(gzip,				1)	\
-+	x(zstd,				2)	\
-+	x(atomic_nlink,			3)	\
-+	x(ec,				4)	\
-+	x(journal_seq_blacklist_v3,	5)	\
-+	x(reflink,			6)	\
-+	x(new_siphash,			7)	\
-+	x(inline_data,			8)	\
-+	x(new_extent_overwrite,		9)	\
-+	x(incompressible,		10)	\
-+	x(btree_ptr_v2,			11)	\
-+	x(extents_above_btree_updates,	12)	\
-+	x(btree_updates_journalled,	13)
-+
-+#define BCH_SB_FEATURES_ALL				\
-+	((1ULL << BCH_FEATURE_new_siphash)|		\
-+	 (1ULL << BCH_FEATURE_new_extent_overwrite)|	\
-+	 (1ULL << BCH_FEATURE_btree_ptr_v2)|		\
-+	 (1ULL << BCH_FEATURE_extents_above_btree_updates))
-+
-+enum bch_sb_feature {
-+#define x(f, n) BCH_FEATURE_##f,
-+	BCH_SB_FEATURES()
-+#undef x
-+	BCH_FEATURE_NR,
-+};
-+
-+enum bch_sb_compat {
-+	BCH_COMPAT_FEAT_ALLOC_INFO	= 0,
-+	BCH_COMPAT_FEAT_ALLOC_METADATA	= 1,
-+};
-+
-+/* options: */
-+
-+#define BCH_REPLICAS_MAX		4U
-+
-+enum bch_error_actions {
-+	BCH_ON_ERROR_CONTINUE		= 0,
-+	BCH_ON_ERROR_RO			= 1,
-+	BCH_ON_ERROR_PANIC		= 2,
-+	BCH_NR_ERROR_ACTIONS		= 3,
-+};
-+
-+enum bch_str_hash_type {
-+	BCH_STR_HASH_CRC32C		= 0,
-+	BCH_STR_HASH_CRC64		= 1,
-+	BCH_STR_HASH_SIPHASH_OLD	= 2,
-+	BCH_STR_HASH_SIPHASH		= 3,
-+	BCH_STR_HASH_NR			= 4,
-+};
-+
-+enum bch_str_hash_opts {
-+	BCH_STR_HASH_OPT_CRC32C		= 0,
-+	BCH_STR_HASH_OPT_CRC64		= 1,
-+	BCH_STR_HASH_OPT_SIPHASH	= 2,
-+	BCH_STR_HASH_OPT_NR		= 3,
-+};
-+
-+enum bch_csum_type {
-+	BCH_CSUM_NONE			= 0,
-+	BCH_CSUM_CRC32C_NONZERO		= 1,
-+	BCH_CSUM_CRC64_NONZERO		= 2,
-+	BCH_CSUM_CHACHA20_POLY1305_80	= 3,
-+	BCH_CSUM_CHACHA20_POLY1305_128	= 4,
-+	BCH_CSUM_CRC32C			= 5,
-+	BCH_CSUM_CRC64			= 6,
-+	BCH_CSUM_NR			= 7,
-+};
-+
-+static const unsigned bch_crc_bytes[] = {
-+	[BCH_CSUM_NONE]				= 0,
-+	[BCH_CSUM_CRC32C_NONZERO]		= 4,
-+	[BCH_CSUM_CRC32C]			= 4,
-+	[BCH_CSUM_CRC64_NONZERO]		= 8,
-+	[BCH_CSUM_CRC64]			= 8,
-+	[BCH_CSUM_CHACHA20_POLY1305_80]		= 10,
-+	[BCH_CSUM_CHACHA20_POLY1305_128]	= 16,
-+};
-+
-+static inline _Bool bch2_csum_type_is_encryption(enum bch_csum_type type)
-+{
-+	switch (type) {
-+	case BCH_CSUM_CHACHA20_POLY1305_80:
-+	case BCH_CSUM_CHACHA20_POLY1305_128:
-+		return true;
-+	default:
-+		return false;
-+	}
-+}
-+
-+enum bch_csum_opts {
-+	BCH_CSUM_OPT_NONE		= 0,
-+	BCH_CSUM_OPT_CRC32C		= 1,
-+	BCH_CSUM_OPT_CRC64		= 2,
-+	BCH_CSUM_OPT_NR			= 3,
-+};
-+
-+#define BCH_COMPRESSION_TYPES()		\
-+	x(none,			0)	\
-+	x(lz4_old,		1)	\
-+	x(gzip,			2)	\
-+	x(lz4,			3)	\
-+	x(zstd,			4)	\
-+	x(incompressible,	5)
-+
-+enum bch_compression_type {
-+#define x(t, n) BCH_COMPRESSION_TYPE_##t,
-+	BCH_COMPRESSION_TYPES()
-+#undef x
-+	BCH_COMPRESSION_TYPE_NR
-+};
-+
-+#define BCH_COMPRESSION_OPTS()		\
-+	x(none,		0)		\
-+	x(lz4,		1)		\
-+	x(gzip,		2)		\
-+	x(zstd,		3)
-+
-+enum bch_compression_opts {
-+#define x(t, n) BCH_COMPRESSION_OPT_##t,
-+	BCH_COMPRESSION_OPTS()
-+#undef x
-+	BCH_COMPRESSION_OPT_NR
-+};
-+
-+/*
-+ * Magic numbers
-+ *
-+ * The various other data structures have their own magic numbers, which are
-+ * xored with the first part of the cache set's UUID
-+ */
-+
-+#define BCACHE_MAGIC							\
-+	UUID_LE(0xf67385c6, 0x1a4e, 0xca45,				\
-+		0x82, 0x65, 0xf5, 0x7f, 0x48, 0xba, 0x6d, 0x81)
-+
-+#define BCACHEFS_STATFS_MAGIC		0xca451a4e
-+
-+#define JSET_MAGIC		__cpu_to_le64(0x245235c1a3625032ULL)
-+#define BSET_MAGIC		__cpu_to_le64(0x90135c78b99e07f5ULL)
-+
-+static inline __le64 __bch2_sb_magic(struct bch_sb *sb)
-+{
-+	__le64 ret;
-+	memcpy(&ret, &sb->uuid, sizeof(ret));
-+	return ret;
-+}
-+
-+static inline __u64 __jset_magic(struct bch_sb *sb)
-+{
-+	return __le64_to_cpu(__bch2_sb_magic(sb) ^ JSET_MAGIC);
-+}
-+
-+static inline __u64 __bset_magic(struct bch_sb *sb)
-+{
-+	return __le64_to_cpu(__bch2_sb_magic(sb) ^ BSET_MAGIC);
-+}
-+
-+/* Journal */
-+
-+#define JSET_KEYS_U64s	(sizeof(struct jset_entry) / sizeof(__u64))
-+
-+#define BCH_JSET_ENTRY_TYPES()			\
-+	x(btree_keys,		0)		\
-+	x(btree_root,		1)		\
-+	x(prio_ptrs,		2)		\
-+	x(blacklist,		3)		\
-+	x(blacklist_v2,		4)		\
-+	x(usage,		5)		\
-+	x(data_usage,		6)
-+
-+enum {
-+#define x(f, nr)	BCH_JSET_ENTRY_##f	= nr,
-+	BCH_JSET_ENTRY_TYPES()
-+#undef x
-+	BCH_JSET_ENTRY_NR
-+};
-+
-+/*
-+ * Journal sequence numbers can be blacklisted: bsets record the max sequence
-+ * number of all the journal entries they contain updates for, so that on
-+ * recovery we can ignore those bsets that contain index updates newer that what
-+ * made it into the journal.
-+ *
-+ * This means that we can't reuse that journal_seq - we have to skip it, and
-+ * then record that we skipped it so that the next time we crash and recover we
-+ * don't think there was a missing journal entry.
-+ */
-+struct jset_entry_blacklist {
-+	struct jset_entry	entry;
-+	__le64			seq;
-+};
-+
-+struct jset_entry_blacklist_v2 {
-+	struct jset_entry	entry;
-+	__le64			start;
-+	__le64			end;
-+};
-+
-+enum {
-+	FS_USAGE_RESERVED		= 0,
-+	FS_USAGE_INODES			= 1,
-+	FS_USAGE_KEY_VERSION		= 2,
-+	FS_USAGE_NR			= 3
-+};
-+
-+struct jset_entry_usage {
-+	struct jset_entry	entry;
-+	__le64			v;
-+} __attribute__((packed));
-+
-+struct jset_entry_data_usage {
-+	struct jset_entry	entry;
-+	__le64			v;
-+	struct bch_replicas_entry r;
-+} __attribute__((packed));
-+
-+/*
-+ * On disk format for a journal entry:
-+ * seq is monotonically increasing; every journal entry has its own unique
-+ * sequence number.
-+ *
-+ * last_seq is the oldest journal entry that still has keys the btree hasn't
-+ * flushed to disk yet.
-+ *
-+ * version is for on disk format changes.
-+ */
-+struct jset {
-+	struct bch_csum		csum;
-+
-+	__le64			magic;
-+	__le64			seq;
-+	__le32			version;
-+	__le32			flags;
-+
-+	__le32			u64s; /* size of d[] in u64s */
-+
-+	__u8			encrypted_start[0];
-+
-+	__le16			read_clock;
-+	__le16			write_clock;
-+
-+	/* Sequence number of oldest dirty journal entry */
-+	__le64			last_seq;
-+
-+
-+	union {
-+		struct jset_entry start[0];
-+		__u64		_data[0];
-+	};
-+} __attribute__((packed, aligned(8)));
-+
-+LE32_BITMASK(JSET_CSUM_TYPE,	struct jset, flags, 0, 4);
-+LE32_BITMASK(JSET_BIG_ENDIAN,	struct jset, flags, 4, 5);
-+
-+#define BCH_JOURNAL_BUCKETS_MIN		8
-+
-+/* Btree: */
-+
-+#define BCH_BTREE_IDS()					\
-+	x(EXTENTS,	0, "extents")			\
-+	x(INODES,	1, "inodes")			\
-+	x(DIRENTS,	2, "dirents")			\
-+	x(XATTRS,	3, "xattrs")			\
-+	x(ALLOC,	4, "alloc")			\
-+	x(QUOTAS,	5, "quotas")			\
-+	x(EC,		6, "stripes")			\
-+	x(REFLINK,	7, "reflink")
-+
-+enum btree_id {
-+#define x(kwd, val, name) BTREE_ID_##kwd = val,
-+	BCH_BTREE_IDS()
-+#undef x
-+	BTREE_ID_NR
-+};
-+
-+#define BTREE_MAX_DEPTH		4U
-+
-+/* Btree nodes */
-+
-+/*
-+ * Btree nodes
-+ *
-+ * On disk a btree node is a list/log of these; within each set the keys are
-+ * sorted
-+ */
-+struct bset {
-+	__le64			seq;
-+
-+	/*
-+	 * Highest journal entry this bset contains keys for.
-+	 * If on recovery we don't see that journal entry, this bset is ignored:
-+	 * this allows us to preserve the order of all index updates after a
-+	 * crash, since the journal records a total order of all index updates
-+	 * and anything that didn't make it to the journal doesn't get used.
-+	 */
-+	__le64			journal_seq;
-+
-+	__le32			flags;
-+	__le16			version;
-+	__le16			u64s; /* count of d[] in u64s */
-+
-+	union {
-+		struct bkey_packed start[0];
-+		__u64		_data[0];
-+	};
-+} __attribute__((packed, aligned(8)));
-+
-+LE32_BITMASK(BSET_CSUM_TYPE,	struct bset, flags, 0, 4);
-+
-+LE32_BITMASK(BSET_BIG_ENDIAN,	struct bset, flags, 4, 5);
-+LE32_BITMASK(BSET_SEPARATE_WHITEOUTS,
-+				struct bset, flags, 5, 6);
-+
-+struct btree_node {
-+	struct bch_csum		csum;
-+	__le64			magic;
-+
-+	/* this flags field is encrypted, unlike bset->flags: */
-+	__le64			flags;
-+
-+	/* Closed interval: */
-+	struct bpos		min_key;
-+	struct bpos		max_key;
-+	struct bch_extent_ptr	ptr;
-+	struct bkey_format	format;
-+
-+	union {
-+	struct bset		keys;
-+	struct {
-+		__u8		pad[22];
-+		__le16		u64s;
-+		__u64		_data[0];
-+
-+	};
-+	};
-+} __attribute__((packed, aligned(8)));
-+
-+LE64_BITMASK(BTREE_NODE_ID,	struct btree_node, flags,  0,  4);
-+LE64_BITMASK(BTREE_NODE_LEVEL,	struct btree_node, flags,  4,  8);
-+LE64_BITMASK(BTREE_NODE_NEW_EXTENT_OVERWRITE,
-+				struct btree_node, flags,  8,  9);
-+/* 9-32 unused */
-+LE64_BITMASK(BTREE_NODE_SEQ,	struct btree_node, flags, 32, 64);
-+
-+struct btree_node_entry {
-+	struct bch_csum		csum;
-+
-+	union {
-+	struct bset		keys;
-+	struct {
-+		__u8		pad[22];
-+		__le16		u64s;
-+		__u64		_data[0];
-+
-+	};
-+	};
-+} __attribute__((packed, aligned(8)));
-+
-+#endif /* _BCACHEFS_FORMAT_H */
-diff --git a/fs/bcachefs/bcachefs_ioctl.h b/fs/bcachefs/bcachefs_ioctl.h
-new file mode 100644
-index 000000000000..d71157a3e073
---- /dev/null
-+++ b/fs/bcachefs/bcachefs_ioctl.h
-@@ -0,0 +1,332 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_IOCTL_H
-+#define _BCACHEFS_IOCTL_H
-+
-+#include <linux/uuid.h>
-+#include <asm/ioctl.h>
-+#include "bcachefs_format.h"
-+
-+/*
-+ * Flags common to multiple ioctls:
-+ */
-+#define BCH_FORCE_IF_DATA_LOST		(1 << 0)
-+#define BCH_FORCE_IF_METADATA_LOST	(1 << 1)
-+#define BCH_FORCE_IF_DATA_DEGRADED	(1 << 2)
-+#define BCH_FORCE_IF_METADATA_DEGRADED	(1 << 3)
-+
-+#define BCH_FORCE_IF_DEGRADED			\
-+	(BCH_FORCE_IF_DATA_DEGRADED|		\
-+	 BCH_FORCE_IF_METADATA_DEGRADED)
-+
-+/*
-+ * If cleared, ioctl that refer to a device pass it as a pointer to a pathname
-+ * (e.g. /dev/sda1); if set, the dev field is the device's index within the
-+ * filesystem:
-+ */
-+#define BCH_BY_INDEX			(1 << 4)
-+
-+/*
-+ * For BCH_IOCTL_READ_SUPER: get superblock of a specific device, not filesystem
-+ * wide superblock:
-+ */
-+#define BCH_READ_DEV			(1 << 5)
-+
-+/* global control dev: */
-+
-+/* These are currently broken, and probably unnecessary: */
-+#if 0
-+#define BCH_IOCTL_ASSEMBLE	_IOW(0xbc, 1, struct bch_ioctl_assemble)
-+#define BCH_IOCTL_INCREMENTAL	_IOW(0xbc, 2, struct bch_ioctl_incremental)
-+
-+struct bch_ioctl_assemble {
-+	__u32			flags;
-+	__u32			nr_devs;
-+	__u64			pad;
-+	__u64			devs[];
-+};
-+
-+struct bch_ioctl_incremental {
-+	__u32			flags;
-+	__u64			pad;
-+	__u64			dev;
-+};
-+#endif
-+
-+/* filesystem ioctls: */
-+
-+#define BCH_IOCTL_QUERY_UUID	_IOR(0xbc,	1,  struct bch_ioctl_query_uuid)
-+
-+/* These only make sense when we also have incremental assembly */
-+#if 0
-+#define BCH_IOCTL_START		_IOW(0xbc,	2,  struct bch_ioctl_start)
-+#define BCH_IOCTL_STOP		_IO(0xbc,	3)
-+#endif
-+
-+#define BCH_IOCTL_DISK_ADD	_IOW(0xbc,	4,  struct bch_ioctl_disk)
-+#define BCH_IOCTL_DISK_REMOVE	_IOW(0xbc,	5,  struct bch_ioctl_disk)
-+#define BCH_IOCTL_DISK_ONLINE	_IOW(0xbc,	6,  struct bch_ioctl_disk)
-+#define BCH_IOCTL_DISK_OFFLINE	_IOW(0xbc,	7,  struct bch_ioctl_disk)
-+#define BCH_IOCTL_DISK_SET_STATE _IOW(0xbc,	8,  struct bch_ioctl_disk_set_state)
-+#define BCH_IOCTL_DATA		_IOW(0xbc,	10, struct bch_ioctl_data)
-+#define BCH_IOCTL_FS_USAGE	_IOWR(0xbc,	11, struct bch_ioctl_fs_usage)
-+#define BCH_IOCTL_DEV_USAGE	_IOWR(0xbc,	11, struct bch_ioctl_dev_usage)
-+#define BCH_IOCTL_READ_SUPER	_IOW(0xbc,	12, struct bch_ioctl_read_super)
-+#define BCH_IOCTL_DISK_GET_IDX	_IOW(0xbc,	13,  struct bch_ioctl_disk_get_idx)
-+#define BCH_IOCTL_DISK_RESIZE	_IOW(0xbc,	14,  struct bch_ioctl_disk_resize)
-+
-+/* ioctl below act on a particular file, not the filesystem as a whole: */
-+
-+#define BCHFS_IOC_REINHERIT_ATTRS	_IOR(0xbc, 64, const char __user *)
-+
-+/*
-+ * BCH_IOCTL_QUERY_UUID: get filesystem UUID
-+ *
-+ * Returns user visible UUID, not internal UUID (which may not ever be changed);
-+ * the filesystem's sysfs directory may be found under /sys/fs/bcachefs with
-+ * this UUID.
-+ */
-+struct bch_ioctl_query_uuid {
-+	uuid_le			uuid;
-+};
-+
-+#if 0
-+struct bch_ioctl_start {
-+	__u32			flags;
-+	__u32			pad;
-+};
-+#endif
-+
-+/*
-+ * BCH_IOCTL_DISK_ADD: add a new device to an existing filesystem
-+ *
-+ * The specified device must not be open or in use. On success, the new device
-+ * will be an online member of the filesystem just like any other member.
-+ *
-+ * The device must first be prepared by userspace by formatting with a bcachefs
-+ * superblock, which is only used for passing in superblock options/parameters
-+ * for that device (in struct bch_member). The new device's superblock should
-+ * not claim to be a member of any existing filesystem - UUIDs on it will be
-+ * ignored.
-+ */
-+
-+/*
-+ * BCH_IOCTL_DISK_REMOVE: permanently remove a member device from a filesystem
-+ *
-+ * Any data present on @dev will be permanently deleted, and @dev will be
-+ * removed from its slot in the filesystem's list of member devices. The device
-+ * may be either offline or offline.
-+ *
-+ * Will fail removing @dev would leave us with insufficient read write devices
-+ * or degraded/unavailable data, unless the approprate BCH_FORCE_IF_* flags are
-+ * set.
-+ */
-+
-+/*
-+ * BCH_IOCTL_DISK_ONLINE: given a disk that is already a member of a filesystem
-+ * but is not open (e.g. because we started in degraded mode), bring it online
-+ *
-+ * all existing data on @dev will be available once the device is online,
-+ * exactly as if @dev was present when the filesystem was first mounted
-+ */
-+
-+/*
-+ * BCH_IOCTL_DISK_OFFLINE: offline a disk, causing the kernel to close that
-+ * block device, without removing it from the filesystem (so it can be brought
-+ * back online later)
-+ *
-+ * Data present on @dev will be unavailable while @dev is offline (unless
-+ * replicated), but will still be intact and untouched if @dev is brought back
-+ * online
-+ *
-+ * Will fail (similarly to BCH_IOCTL_DISK_SET_STATE) if offlining @dev would
-+ * leave us with insufficient read write devices or degraded/unavailable data,
-+ * unless the approprate BCH_FORCE_IF_* flags are set.
-+ */
-+
-+struct bch_ioctl_disk {
-+	__u32			flags;
-+	__u32			pad;
-+	__u64			dev;
-+};
-+
-+/*
-+ * BCH_IOCTL_DISK_SET_STATE: modify state of a member device of a filesystem
-+ *
-+ * @new_state		- one of the bch_member_state states (rw, ro, failed,
-+ *			  spare)
-+ *
-+ * Will refuse to change member state if we would then have insufficient devices
-+ * to write to, or if it would result in degraded data (when @new_state is
-+ * failed or spare) unless the appropriate BCH_FORCE_IF_* flags are set.
-+ */
-+struct bch_ioctl_disk_set_state {
-+	__u32			flags;
-+	__u8			new_state;
-+	__u8			pad[3];
-+	__u64			dev;
-+};
-+
-+enum bch_data_ops {
-+	BCH_DATA_OP_SCRUB	= 0,
-+	BCH_DATA_OP_REREPLICATE	= 1,
-+	BCH_DATA_OP_MIGRATE	= 2,
-+	BCH_DATA_OP_NR		= 3,
-+};
-+
-+/*
-+ * BCH_IOCTL_DATA: operations that walk and manipulate filesystem data (e.g.
-+ * scrub, rereplicate, migrate).
-+ *
-+ * This ioctl kicks off a job in the background, and returns a file descriptor.
-+ * Reading from the file descriptor returns a struct bch_ioctl_data_event,
-+ * indicating current progress, and closing the file descriptor will stop the
-+ * job. The file descriptor is O_CLOEXEC.
-+ */
-+struct bch_ioctl_data {
-+	__u32			op;
-+	__u32			flags;
-+
-+	struct bpos		start;
-+	struct bpos		end;
-+
-+	union {
-+	struct {
-+		__u32		dev;
-+		__u32		pad;
-+	}			migrate;
-+	struct {
-+		__u64		pad[8];
-+	};
-+	};
-+} __attribute__((packed, aligned(8)));
-+
-+enum bch_data_event {
-+	BCH_DATA_EVENT_PROGRESS	= 0,
-+	/* XXX: add an event for reporting errors */
-+	BCH_DATA_EVENT_NR	= 1,
-+};
-+
-+struct bch_ioctl_data_progress {
-+	__u8			data_type;
-+	__u8			btree_id;
-+	__u8			pad[2];
-+	struct bpos		pos;
-+
-+	__u64			sectors_done;
-+	__u64			sectors_total;
-+} __attribute__((packed, aligned(8)));
-+
-+struct bch_ioctl_data_event {
-+	__u8			type;
-+	__u8			pad[7];
-+	union {
-+	struct bch_ioctl_data_progress p;
-+	__u64			pad2[15];
-+	};
-+} __attribute__((packed, aligned(8)));
-+
-+struct bch_replicas_usage {
-+	__u64			sectors;
-+	struct bch_replicas_entry r;
-+} __attribute__((packed));
-+
-+static inline struct bch_replicas_usage *
-+replicas_usage_next(struct bch_replicas_usage *u)
-+{
-+	return (void *) u + replicas_entry_bytes(&u->r) + 8;
-+}
-+
-+/*
-+ * BCH_IOCTL_FS_USAGE: query filesystem disk space usage
-+ *
-+ * Returns disk space usage broken out by data type, number of replicas, and
-+ * by component device
-+ *
-+ * @replica_entries_bytes - size, in bytes, allocated for replica usage entries
-+ *
-+ * On success, @replica_entries_bytes will be changed to indicate the number of
-+ * bytes actually used.
-+ *
-+ * Returns -ERANGE if @replica_entries_bytes was too small
-+ */
-+struct bch_ioctl_fs_usage {
-+	__u64			capacity;
-+	__u64			used;
-+	__u64			online_reserved;
-+	__u64			persistent_reserved[BCH_REPLICAS_MAX];
-+
-+	__u32			replica_entries_bytes;
-+	__u32			pad;
-+
-+	struct bch_replicas_usage replicas[0];
-+};
-+
-+/*
-+ * BCH_IOCTL_DEV_USAGE: query device disk space usage
-+ *
-+ * Returns disk space usage broken out by data type - both by buckets and
-+ * sectors.
-+ */
-+struct bch_ioctl_dev_usage {
-+	__u64			dev;
-+	__u32			flags;
-+	__u8			state;
-+	__u8			pad[7];
-+
-+	__u32			bucket_size;
-+	__u64			nr_buckets;
-+	__u64			available_buckets;
-+
-+	__u64			buckets[BCH_DATA_NR];
-+	__u64			sectors[BCH_DATA_NR];
-+
-+	__u64			ec_buckets;
-+	__u64			ec_sectors;
-+};
-+
-+/*
-+ * BCH_IOCTL_READ_SUPER: read filesystem superblock
-+ *
-+ * Equivalent to reading the superblock directly from the block device, except
-+ * avoids racing with the kernel writing the superblock or having to figure out
-+ * which block device to read
-+ *
-+ * @sb		- buffer to read into
-+ * @size	- size of userspace allocated buffer
-+ * @dev		- device to read superblock for, if BCH_READ_DEV flag is
-+ *		  specified
-+ *
-+ * Returns -ERANGE if buffer provided is too small
-+ */
-+struct bch_ioctl_read_super {
-+	__u32			flags;
-+	__u32			pad;
-+	__u64			dev;
-+	__u64			size;
-+	__u64			sb;
-+};
-+
-+/*
-+ * BCH_IOCTL_DISK_GET_IDX: give a path to a block device, query filesystem to
-+ * determine if disk is a (online) member - if so, returns device's index
-+ *
-+ * Returns -ENOENT if not found
-+ */
-+struct bch_ioctl_disk_get_idx {
-+	__u64			dev;
-+};
-+
-+/*
-+ * BCH_IOCTL_DISK_RESIZE: resize filesystem on a device
-+ *
-+ * @dev		- member to resize
-+ * @nbuckets	- new number of buckets
-+ */
-+struct bch_ioctl_disk_resize {
-+	__u32			flags;
-+	__u32			pad;
-+	__u64			dev;
-+	__u64			nbuckets;
-+};
-+
-+#endif /* _BCACHEFS_IOCTL_H */
-diff --git a/fs/bcachefs/bkey.c b/fs/bcachefs/bkey.c
-new file mode 100644
-index 000000000000..4d0c9129cd4a
---- /dev/null
-+++ b/fs/bcachefs/bkey.c
-@@ -0,0 +1,1154 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "bkey.h"
-+#include "bkey_methods.h"
-+#include "bset.h"
-+#include "util.h"
-+
-+#undef EBUG_ON
-+
-+#ifdef DEBUG_BKEYS
-+#define EBUG_ON(cond)		BUG_ON(cond)
-+#else
-+#define EBUG_ON(cond)
-+#endif
-+
-+const struct bkey_format bch2_bkey_format_current = BKEY_FORMAT_CURRENT;
-+
-+struct bkey __bch2_bkey_unpack_key(const struct bkey_format *,
-+			      const struct bkey_packed *);
-+
-+void bch2_to_binary(char *out, const u64 *p, unsigned nr_bits)
-+{
-+	unsigned bit = high_bit_offset, done = 0;
-+
-+	while (1) {
-+		while (bit < 64) {
-+			if (done && !(done % 8))
-+				*out++ = ' ';
-+			*out++ = *p & (1ULL << (63 - bit)) ? '1' : '0';
-+			bit++;
-+			done++;
-+			if (done == nr_bits) {
-+				*out++ = '\0';
-+				return;
-+			}
-+		}
-+
-+		p = next_word(p);
-+		bit = 0;
-+	}
-+}
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+
-+static void bch2_bkey_pack_verify(const struct bkey_packed *packed,
-+				 const struct bkey *unpacked,
-+				 const struct bkey_format *format)
-+{
-+	struct bkey tmp;
-+
-+	BUG_ON(bkeyp_val_u64s(format, packed) !=
-+	       bkey_val_u64s(unpacked));
-+
-+	BUG_ON(packed->u64s < bkeyp_key_u64s(format, packed));
-+
-+	tmp = __bch2_bkey_unpack_key(format, packed);
-+
-+	if (memcmp(&tmp, unpacked, sizeof(struct bkey))) {
-+		char buf1[160], buf2[160];
-+		char buf3[160], buf4[160];
-+
-+		bch2_bkey_to_text(&PBUF(buf1), unpacked);
-+		bch2_bkey_to_text(&PBUF(buf2), &tmp);
-+		bch2_to_binary(buf3, (void *) unpacked, 80);
-+		bch2_to_binary(buf4, high_word(format, packed), 80);
-+
-+		panic("keys differ: format u64s %u fields %u %u %u %u %u\n%s\n%s\n%s\n%s\n",
-+		      format->key_u64s,
-+		      format->bits_per_field[0],
-+		      format->bits_per_field[1],
-+		      format->bits_per_field[2],
-+		      format->bits_per_field[3],
-+		      format->bits_per_field[4],
-+		      buf1, buf2, buf3, buf4);
-+	}
-+}
-+
-+#else
-+static inline void bch2_bkey_pack_verify(const struct bkey_packed *packed,
-+					const struct bkey *unpacked,
-+					const struct bkey_format *format) {}
-+#endif
-+
-+struct pack_state {
-+	const struct bkey_format *format;
-+	unsigned		bits;	/* bits remaining in current word */
-+	u64			w;	/* current word */
-+	u64			*p;	/* pointer to next word */
-+};
-+
-+__always_inline
-+static struct pack_state pack_state_init(const struct bkey_format *format,
-+					 struct bkey_packed *k)
-+{
-+	u64 *p = high_word(format, k);
-+
-+	return (struct pack_state) {
-+		.format	= format,
-+		.bits	= 64 - high_bit_offset,
-+		.w	= 0,
-+		.p	= p,
-+	};
-+}
-+
-+__always_inline
-+static void pack_state_finish(struct pack_state *state,
-+			      struct bkey_packed *k)
-+{
-+	EBUG_ON(state->p <  k->_data);
-+	EBUG_ON(state->p >= k->_data + state->format->key_u64s);
-+
-+	*state->p = state->w;
-+}
-+
-+struct unpack_state {
-+	const struct bkey_format *format;
-+	unsigned		bits;	/* bits remaining in current word */
-+	u64			w;	/* current word */
-+	const u64		*p;	/* pointer to next word */
-+};
-+
-+__always_inline
-+static struct unpack_state unpack_state_init(const struct bkey_format *format,
-+					     const struct bkey_packed *k)
-+{
-+	const u64 *p = high_word(format, k);
-+
-+	return (struct unpack_state) {
-+		.format	= format,
-+		.bits	= 64 - high_bit_offset,
-+		.w	= *p << high_bit_offset,
-+		.p	= p,
-+	};
-+}
-+
-+__always_inline
-+static u64 get_inc_field(struct unpack_state *state, unsigned field)
-+{
-+	unsigned bits = state->format->bits_per_field[field];
-+	u64 v = 0, offset = le64_to_cpu(state->format->field_offset[field]);
-+
-+	if (bits >= state->bits) {
-+		v = state->w >> (64 - bits);
-+		bits -= state->bits;
-+
-+		state->p = next_word(state->p);
-+		state->w = *state->p;
-+		state->bits = 64;
-+	}
-+
-+	/* avoid shift by 64 if bits is 0 - bits is never 64 here: */
-+	v |= (state->w >> 1) >> (63 - bits);
-+	state->w <<= bits;
-+	state->bits -= bits;
-+
-+	return v + offset;
-+}
-+
-+__always_inline
-+static bool set_inc_field(struct pack_state *state, unsigned field, u64 v)
-+{
-+	unsigned bits = state->format->bits_per_field[field];
-+	u64 offset = le64_to_cpu(state->format->field_offset[field]);
-+
-+	if (v < offset)
-+		return false;
-+
-+	v -= offset;
-+
-+	if (fls64(v) > bits)
-+		return false;
-+
-+	if (bits > state->bits) {
-+		bits -= state->bits;
-+		/* avoid shift by 64 if bits is 0 - bits is never 64 here: */
-+		state->w |= (v >> 1) >> (bits - 1);
-+
-+		*state->p = state->w;
-+		state->p = next_word(state->p);
-+		state->w = 0;
-+		state->bits = 64;
-+	}
-+
-+	state->bits -= bits;
-+	state->w |= v << state->bits;
-+
-+	return true;
-+}
-+
-+/*
-+ * Note: does NOT set out->format (we don't know what it should be here!)
-+ *
-+ * Also: doesn't work on extents - it doesn't preserve the invariant that
-+ * if k is packed bkey_start_pos(k) will successfully pack
-+ */
-+static bool bch2_bkey_transform_key(const struct bkey_format *out_f,
-+				   struct bkey_packed *out,
-+				   const struct bkey_format *in_f,
-+				   const struct bkey_packed *in)
-+{
-+	struct pack_state out_s = pack_state_init(out_f, out);
-+	struct unpack_state in_s = unpack_state_init(in_f, in);
-+	unsigned i;
-+
-+	out->_data[0] = 0;
-+
-+	for (i = 0; i < BKEY_NR_FIELDS; i++)
-+		if (!set_inc_field(&out_s, i, get_inc_field(&in_s, i)))
-+			return false;
-+
-+	/* Can't happen because the val would be too big to unpack: */
-+	EBUG_ON(in->u64s - in_f->key_u64s + out_f->key_u64s > U8_MAX);
-+
-+	pack_state_finish(&out_s, out);
-+	out->u64s	= out_f->key_u64s + in->u64s - in_f->key_u64s;
-+	out->needs_whiteout = in->needs_whiteout;
-+	out->type	= in->type;
-+
-+	return true;
-+}
-+
-+bool bch2_bkey_transform(const struct bkey_format *out_f,
-+			struct bkey_packed *out,
-+			const struct bkey_format *in_f,
-+			const struct bkey_packed *in)
-+{
-+	if (!bch2_bkey_transform_key(out_f, out, in_f, in))
-+		return false;
-+
-+	memcpy_u64s((u64 *) out + out_f->key_u64s,
-+		    (u64 *) in + in_f->key_u64s,
-+		    (in->u64s - in_f->key_u64s));
-+	return true;
-+}
-+
-+#define bkey_fields()							\
-+	x(BKEY_FIELD_INODE,		p.inode)			\
-+	x(BKEY_FIELD_OFFSET,		p.offset)			\
-+	x(BKEY_FIELD_SNAPSHOT,		p.snapshot)			\
-+	x(BKEY_FIELD_SIZE,		size)				\
-+	x(BKEY_FIELD_VERSION_HI,	version.hi)			\
-+	x(BKEY_FIELD_VERSION_LO,	version.lo)
-+
-+struct bkey __bch2_bkey_unpack_key(const struct bkey_format *format,
-+			      const struct bkey_packed *in)
-+{
-+	struct unpack_state state = unpack_state_init(format, in);
-+	struct bkey out;
-+
-+	EBUG_ON(format->nr_fields != BKEY_NR_FIELDS);
-+	EBUG_ON(in->u64s < format->key_u64s);
-+	EBUG_ON(in->format != KEY_FORMAT_LOCAL_BTREE);
-+	EBUG_ON(in->u64s - format->key_u64s + BKEY_U64s > U8_MAX);
-+
-+	out.u64s	= BKEY_U64s + in->u64s - format->key_u64s;
-+	out.format	= KEY_FORMAT_CURRENT;
-+	out.needs_whiteout = in->needs_whiteout;
-+	out.type	= in->type;
-+	out.pad[0]	= 0;
-+
-+#define x(id, field)	out.field = get_inc_field(&state, id);
-+	bkey_fields()
-+#undef x
-+
-+	return out;
-+}
-+
-+#ifndef HAVE_BCACHEFS_COMPILED_UNPACK
-+struct bpos __bkey_unpack_pos(const struct bkey_format *format,
-+				     const struct bkey_packed *in)
-+{
-+	struct unpack_state state = unpack_state_init(format, in);
-+	struct bpos out;
-+
-+	EBUG_ON(format->nr_fields != BKEY_NR_FIELDS);
-+	EBUG_ON(in->u64s < format->key_u64s);
-+	EBUG_ON(in->format != KEY_FORMAT_LOCAL_BTREE);
-+
-+	out.inode	= get_inc_field(&state, BKEY_FIELD_INODE);
-+	out.offset	= get_inc_field(&state, BKEY_FIELD_OFFSET);
-+	out.snapshot	= get_inc_field(&state, BKEY_FIELD_SNAPSHOT);
-+
-+	return out;
-+}
-+#endif
-+
-+/**
-+ * bch2_bkey_pack_key -- pack just the key, not the value
-+ */
-+bool bch2_bkey_pack_key(struct bkey_packed *out, const struct bkey *in,
-+		   const struct bkey_format *format)
-+{
-+	struct pack_state state = pack_state_init(format, out);
-+
-+	EBUG_ON((void *) in == (void *) out);
-+	EBUG_ON(format->nr_fields != BKEY_NR_FIELDS);
-+	EBUG_ON(in->format != KEY_FORMAT_CURRENT);
-+
-+	out->_data[0] = 0;
-+
-+#define x(id, field)	if (!set_inc_field(&state, id, in->field)) return false;
-+	bkey_fields()
-+#undef x
-+
-+	/*
-+	 * Extents - we have to guarantee that if an extent is packed, a trimmed
-+	 * version will also pack:
-+	 */
-+	if (bkey_start_offset(in) <
-+	    le64_to_cpu(format->field_offset[BKEY_FIELD_OFFSET]))
-+		return false;
-+
-+	pack_state_finish(&state, out);
-+	out->u64s	= format->key_u64s + in->u64s - BKEY_U64s;
-+	out->format	= KEY_FORMAT_LOCAL_BTREE;
-+	out->needs_whiteout = in->needs_whiteout;
-+	out->type	= in->type;
-+
-+	bch2_bkey_pack_verify(out, in, format);
-+	return true;
-+}
-+
-+/**
-+ * bch2_bkey_unpack -- unpack the key and the value
-+ */
-+void bch2_bkey_unpack(const struct btree *b, struct bkey_i *dst,
-+		 const struct bkey_packed *src)
-+{
-+	__bkey_unpack_key(b, &dst->k, src);
-+
-+	memcpy_u64s(&dst->v,
-+		    bkeyp_val(&b->format, src),
-+		    bkeyp_val_u64s(&b->format, src));
-+}
-+
-+/**
-+ * bch2_bkey_pack -- pack the key and the value
-+ */
-+bool bch2_bkey_pack(struct bkey_packed *out, const struct bkey_i *in,
-+	       const struct bkey_format *format)
-+{
-+	struct bkey_packed tmp;
-+
-+	if (!bch2_bkey_pack_key(&tmp, &in->k, format))
-+		return false;
-+
-+	memmove_u64s((u64 *) out + format->key_u64s,
-+		     &in->v,
-+		     bkey_val_u64s(&in->k));
-+	memcpy_u64s(out, &tmp, format->key_u64s);
-+
-+	return true;
-+}
-+
-+__always_inline
-+static bool set_inc_field_lossy(struct pack_state *state, unsigned field, u64 v)
-+{
-+	unsigned bits = state->format->bits_per_field[field];
-+	u64 offset = le64_to_cpu(state->format->field_offset[field]);
-+	bool ret = true;
-+
-+	EBUG_ON(v < offset);
-+	v -= offset;
-+
-+	if (fls64(v) > bits) {
-+		v = ~(~0ULL << bits);
-+		ret = false;
-+	}
-+
-+	if (bits > state->bits) {
-+		bits -= state->bits;
-+		state->w |= (v >> 1) >> (bits - 1);
-+
-+		*state->p = state->w;
-+		state->p = next_word(state->p);
-+		state->w = 0;
-+		state->bits = 64;
-+	}
-+
-+	state->bits -= bits;
-+	state->w |= v << state->bits;
-+
-+	return ret;
-+}
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+static bool bkey_packed_successor(struct bkey_packed *out,
-+				  const struct btree *b,
-+				  struct bkey_packed k)
-+{
-+	const struct bkey_format *f = &b->format;
-+	unsigned nr_key_bits = b->nr_key_bits;
-+	unsigned first_bit, offset;
-+	u64 *p;
-+
-+	EBUG_ON(b->nr_key_bits != bkey_format_key_bits(f));
-+
-+	if (!nr_key_bits)
-+		return false;
-+
-+	*out = k;
-+
-+	first_bit = high_bit_offset + nr_key_bits - 1;
-+	p = nth_word(high_word(f, out), first_bit >> 6);
-+	offset = 63 - (first_bit & 63);
-+
-+	while (nr_key_bits) {
-+		unsigned bits = min(64 - offset, nr_key_bits);
-+		u64 mask = (~0ULL >> (64 - bits)) << offset;
-+
-+		if ((*p & mask) != mask) {
-+			*p += 1ULL << offset;
-+			EBUG_ON(bkey_cmp_packed(b, out, &k) <= 0);
-+			return true;
-+		}
-+
-+		*p &= ~mask;
-+		p = prev_word(p);
-+		nr_key_bits -= bits;
-+		offset = 0;
-+	}
-+
-+	return false;
-+}
-+#endif
-+
-+/*
-+ * Returns a packed key that compares <= in
-+ *
-+ * This is used in bset_search_tree(), where we need a packed pos in order to be
-+ * able to compare against the keys in the auxiliary search tree - and it's
-+ * legal to use a packed pos that isn't equivalent to the original pos,
-+ * _provided_ it compares <= to the original pos.
-+ */
-+enum bkey_pack_pos_ret bch2_bkey_pack_pos_lossy(struct bkey_packed *out,
-+					   struct bpos in,
-+					   const struct btree *b)
-+{
-+	const struct bkey_format *f = &b->format;
-+	struct pack_state state = pack_state_init(f, out);
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	struct bpos orig = in;
-+#endif
-+	bool exact = true;
-+
-+	out->_data[0] = 0;
-+
-+	if (unlikely(in.snapshot <
-+		     le64_to_cpu(f->field_offset[BKEY_FIELD_SNAPSHOT]))) {
-+		if (!in.offset-- &&
-+		    !in.inode--)
-+			return BKEY_PACK_POS_FAIL;
-+		in.snapshot	= KEY_SNAPSHOT_MAX;
-+		exact = false;
-+	}
-+
-+	if (unlikely(in.offset <
-+		     le64_to_cpu(f->field_offset[BKEY_FIELD_OFFSET]))) {
-+		if (!in.inode--)
-+			return BKEY_PACK_POS_FAIL;
-+		in.offset	= KEY_OFFSET_MAX;
-+		in.snapshot	= KEY_SNAPSHOT_MAX;
-+		exact = false;
-+	}
-+
-+	if (unlikely(in.inode <
-+		     le64_to_cpu(f->field_offset[BKEY_FIELD_INODE])))
-+		return BKEY_PACK_POS_FAIL;
-+
-+	if (!set_inc_field_lossy(&state, BKEY_FIELD_INODE, in.inode)) {
-+		in.offset	= KEY_OFFSET_MAX;
-+		in.snapshot	= KEY_SNAPSHOT_MAX;
-+		exact = false;
-+	}
-+
-+	if (!set_inc_field_lossy(&state, BKEY_FIELD_OFFSET, in.offset)) {
-+		in.snapshot	= KEY_SNAPSHOT_MAX;
-+		exact = false;
-+	}
-+
-+	if (!set_inc_field_lossy(&state, BKEY_FIELD_SNAPSHOT, in.snapshot))
-+		exact = false;
-+
-+	pack_state_finish(&state, out);
-+	out->u64s	= f->key_u64s;
-+	out->format	= KEY_FORMAT_LOCAL_BTREE;
-+	out->type	= KEY_TYPE_deleted;
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	if (exact) {
-+		BUG_ON(bkey_cmp_left_packed(b, out, &orig));
-+	} else {
-+		struct bkey_packed successor;
-+
-+		BUG_ON(bkey_cmp_left_packed(b, out, &orig) >= 0);
-+		BUG_ON(bkey_packed_successor(&successor, b, *out) &&
-+		       bkey_cmp_left_packed(b, &successor, &orig) < 0);
-+	}
-+#endif
-+
-+	return exact ? BKEY_PACK_POS_EXACT : BKEY_PACK_POS_SMALLER;
-+}
-+
-+void bch2_bkey_format_init(struct bkey_format_state *s)
-+{
-+	unsigned i;
-+
-+	for (i = 0; i < ARRAY_SIZE(s->field_min); i++)
-+		s->field_min[i] = U64_MAX;
-+
-+	for (i = 0; i < ARRAY_SIZE(s->field_max); i++)
-+		s->field_max[i] = 0;
-+
-+	/* Make sure we can store a size of 0: */
-+	s->field_min[BKEY_FIELD_SIZE] = 0;
-+}
-+
-+static void __bkey_format_add(struct bkey_format_state *s,
-+			      unsigned field, u64 v)
-+{
-+	s->field_min[field] = min(s->field_min[field], v);
-+	s->field_max[field] = max(s->field_max[field], v);
-+}
-+
-+/*
-+ * Changes @format so that @k can be successfully packed with @format
-+ */
-+void bch2_bkey_format_add_key(struct bkey_format_state *s, const struct bkey *k)
-+{
-+#define x(id, field) __bkey_format_add(s, id, k->field);
-+	bkey_fields()
-+#undef x
-+	__bkey_format_add(s, BKEY_FIELD_OFFSET, bkey_start_offset(k));
-+}
-+
-+void bch2_bkey_format_add_pos(struct bkey_format_state *s, struct bpos p)
-+{
-+	unsigned field = 0;
-+
-+	__bkey_format_add(s, field++, p.inode);
-+	__bkey_format_add(s, field++, p.offset);
-+	__bkey_format_add(s, field++, p.snapshot);
-+}
-+
-+/*
-+ * We don't want it to be possible for the packed format to represent fields
-+ * bigger than a u64... that will cause confusion and issues (like with
-+ * bkey_packed_successor())
-+ */
-+static void set_format_field(struct bkey_format *f, enum bch_bkey_fields i,
-+			     unsigned bits, u64 offset)
-+{
-+	offset = bits == 64 ? 0 : min(offset, U64_MAX - ((1ULL << bits) - 1));
-+
-+	f->bits_per_field[i]	= bits;
-+	f->field_offset[i]	= cpu_to_le64(offset);
-+}
-+
-+struct bkey_format bch2_bkey_format_done(struct bkey_format_state *s)
-+{
-+	unsigned i, bits = KEY_PACKED_BITS_START;
-+	struct bkey_format ret = {
-+		.nr_fields = BKEY_NR_FIELDS,
-+	};
-+
-+	for (i = 0; i < ARRAY_SIZE(s->field_min); i++) {
-+		s->field_min[i] = min(s->field_min[i], s->field_max[i]);
-+
-+		set_format_field(&ret, i,
-+				 fls64(s->field_max[i] - s->field_min[i]),
-+				 s->field_min[i]);
-+
-+		bits += ret.bits_per_field[i];
-+	}
-+
-+	/* allow for extent merging: */
-+	if (ret.bits_per_field[BKEY_FIELD_SIZE]) {
-+		ret.bits_per_field[BKEY_FIELD_SIZE] += 4;
-+		bits += 4;
-+	}
-+
-+	ret.key_u64s = DIV_ROUND_UP(bits, 64);
-+
-+	/* if we have enough spare bits, round fields up to nearest byte */
-+	bits = ret.key_u64s * 64 - bits;
-+
-+	for (i = 0; i < ARRAY_SIZE(ret.bits_per_field); i++) {
-+		unsigned r = round_up(ret.bits_per_field[i], 8) -
-+			ret.bits_per_field[i];
-+
-+		if (r <= bits) {
-+			set_format_field(&ret, i,
-+					 ret.bits_per_field[i] + r,
-+					 le64_to_cpu(ret.field_offset[i]));
-+			bits -= r;
-+		}
-+	}
-+
-+	EBUG_ON(bch2_bkey_format_validate(&ret));
-+	return ret;
-+}
-+
-+const char *bch2_bkey_format_validate(struct bkey_format *f)
-+{
-+	unsigned i, bits = KEY_PACKED_BITS_START;
-+
-+	if (f->nr_fields != BKEY_NR_FIELDS)
-+		return "incorrect number of fields";
-+
-+	for (i = 0; i < f->nr_fields; i++) {
-+		u64 field_offset = le64_to_cpu(f->field_offset[i]);
-+
-+		if (f->bits_per_field[i] > 64)
-+			return "field too large";
-+
-+		if (field_offset &&
-+		    (f->bits_per_field[i] == 64 ||
-+		    (field_offset + ((1ULL << f->bits_per_field[i]) - 1) <
-+		     field_offset)))
-+			return "offset + bits overflow";
-+
-+		bits += f->bits_per_field[i];
-+	}
-+
-+	if (f->key_u64s != DIV_ROUND_UP(bits, 64))
-+		return "incorrect key_u64s";
-+
-+	return NULL;
-+}
-+
-+/*
-+ * Most significant differing bit
-+ * Bits are indexed from 0 - return is [0, nr_key_bits)
-+ */
-+__pure
-+unsigned bch2_bkey_greatest_differing_bit(const struct btree *b,
-+					  const struct bkey_packed *l_k,
-+					  const struct bkey_packed *r_k)
-+{
-+	const u64 *l = high_word(&b->format, l_k);
-+	const u64 *r = high_word(&b->format, r_k);
-+	unsigned nr_key_bits = b->nr_key_bits;
-+	unsigned word_bits = 64 - high_bit_offset;
-+	u64 l_v, r_v;
-+
-+	EBUG_ON(b->nr_key_bits != bkey_format_key_bits(&b->format));
-+
-+	/* for big endian, skip past header */
-+	l_v = *l & (~0ULL >> high_bit_offset);
-+	r_v = *r & (~0ULL >> high_bit_offset);
-+
-+	while (nr_key_bits) {
-+		if (nr_key_bits < word_bits) {
-+			l_v >>= word_bits - nr_key_bits;
-+			r_v >>= word_bits - nr_key_bits;
-+			nr_key_bits = 0;
-+		} else {
-+			nr_key_bits -= word_bits;
-+		}
-+
-+		if (l_v != r_v)
-+			return fls64(l_v ^ r_v) - 1 + nr_key_bits;
-+
-+		l = next_word(l);
-+		r = next_word(r);
-+
-+		l_v = *l;
-+		r_v = *r;
-+		word_bits = 64;
-+	}
-+
-+	return 0;
-+}
-+
-+/*
-+ * First set bit
-+ * Bits are indexed from 0 - return is [0, nr_key_bits)
-+ */
-+__pure
-+unsigned bch2_bkey_ffs(const struct btree *b, const struct bkey_packed *k)
-+{
-+	const u64 *p = high_word(&b->format, k);
-+	unsigned nr_key_bits = b->nr_key_bits;
-+	unsigned ret = 0, offset;
-+
-+	EBUG_ON(b->nr_key_bits != bkey_format_key_bits(&b->format));
-+
-+	offset = nr_key_bits;
-+	while (offset > 64) {
-+		p = next_word(p);
-+		offset -= 64;
-+	}
-+
-+	offset = 64 - offset;
-+
-+	while (nr_key_bits) {
-+		unsigned bits = nr_key_bits + offset < 64
-+			? nr_key_bits
-+			: 64 - offset;
-+
-+		u64 mask = (~0ULL >> (64 - bits)) << offset;
-+
-+		if (*p & mask)
-+			return ret + __ffs64(*p & mask) - offset;
-+
-+		p = prev_word(p);
-+		nr_key_bits -= bits;
-+		ret += bits;
-+		offset = 0;
-+	}
-+
-+	return 0;
-+}
-+
-+#ifdef CONFIG_X86_64
-+
-+static inline int __bkey_cmp_bits(const u64 *l, const u64 *r,
-+				  unsigned nr_key_bits)
-+{
-+	long d0, d1, d2, d3;
-+	int cmp;
-+
-+	/* we shouldn't need asm for this, but gcc is being retarded: */
-+
-+	asm(".intel_syntax noprefix;"
-+	    "xor eax, eax;"
-+	    "xor edx, edx;"
-+	    "1:;"
-+	    "mov r8, [rdi];"
-+	    "mov r9, [rsi];"
-+	    "sub ecx, 64;"
-+	    "jl 2f;"
-+
-+	    "cmp r8, r9;"
-+	    "jnz 3f;"
-+
-+	    "lea rdi, [rdi - 8];"
-+	    "lea rsi, [rsi - 8];"
-+	    "jmp 1b;"
-+
-+	    "2:;"
-+	    "not ecx;"
-+	    "shr r8, 1;"
-+	    "shr r9, 1;"
-+	    "shr r8, cl;"
-+	    "shr r9, cl;"
-+	    "cmp r8, r9;"
-+
-+	    "3:\n"
-+	    "seta al;"
-+	    "setb dl;"
-+	    "sub eax, edx;"
-+	    ".att_syntax prefix;"
-+	    : "=&D" (d0), "=&S" (d1), "=&d" (d2), "=&c" (d3), "=&a" (cmp)
-+	    : "0" (l), "1" (r), "3" (nr_key_bits)
-+	    : "r8", "r9", "cc", "memory");
-+
-+	return cmp;
-+}
-+
-+#define I(_x)			(*(out)++ = (_x))
-+#define I1(i0)						I(i0)
-+#define I2(i0, i1)		(I1(i0),		I(i1))
-+#define I3(i0, i1, i2)		(I2(i0, i1),		I(i2))
-+#define I4(i0, i1, i2, i3)	(I3(i0, i1, i2),	I(i3))
-+#define I5(i0, i1, i2, i3, i4)	(I4(i0, i1, i2, i3),	I(i4))
-+
-+static u8 *compile_bkey_field(const struct bkey_format *format, u8 *out,
-+			      enum bch_bkey_fields field,
-+			      unsigned dst_offset, unsigned dst_size,
-+			      bool *eax_zeroed)
-+{
-+	unsigned bits = format->bits_per_field[field];
-+	u64 offset = le64_to_cpu(format->field_offset[field]);
-+	unsigned i, byte, bit_offset, align, shl, shr;
-+
-+	if (!bits && !offset) {
-+		if (!*eax_zeroed) {
-+			/* xor eax, eax */
-+			I2(0x31, 0xc0);
-+		}
-+
-+		*eax_zeroed = true;
-+		goto set_field;
-+	}
-+
-+	if (!bits) {
-+		/* just return offset: */
-+
-+		switch (dst_size) {
-+		case 8:
-+			if (offset > S32_MAX) {
-+				/* mov [rdi + dst_offset], offset */
-+				I3(0xc7, 0x47, dst_offset);
-+				memcpy(out, &offset, 4);
-+				out += 4;
-+
-+				I3(0xc7, 0x47, dst_offset + 4);
-+				memcpy(out, (void *) &offset + 4, 4);
-+				out += 4;
-+			} else {
-+				/* mov [rdi + dst_offset], offset */
-+				/* sign extended */
-+				I4(0x48, 0xc7, 0x47, dst_offset);
-+				memcpy(out, &offset, 4);
-+				out += 4;
-+			}
-+			break;
-+		case 4:
-+			/* mov [rdi + dst_offset], offset */
-+			I3(0xc7, 0x47, dst_offset);
-+			memcpy(out, &offset, 4);
-+			out += 4;
-+			break;
-+		default:
-+			BUG();
-+		}
-+
-+		return out;
-+	}
-+
-+	bit_offset = format->key_u64s * 64;
-+	for (i = 0; i <= field; i++)
-+		bit_offset -= format->bits_per_field[i];
-+
-+	byte = bit_offset / 8;
-+	bit_offset -= byte * 8;
-+
-+	*eax_zeroed = false;
-+
-+	if (bit_offset == 0 && bits == 8) {
-+		/* movzx eax, BYTE PTR [rsi + imm8] */
-+		I4(0x0f, 0xb6, 0x46, byte);
-+	} else if (bit_offset == 0 && bits == 16) {
-+		/* movzx eax, WORD PTR [rsi + imm8] */
-+		I4(0x0f, 0xb7, 0x46, byte);
-+	} else if (bit_offset + bits <= 32) {
-+		align = min(4 - DIV_ROUND_UP(bit_offset + bits, 8), byte & 3);
-+		byte -= align;
-+		bit_offset += align * 8;
-+
-+		BUG_ON(bit_offset + bits > 32);
-+
-+		/* mov eax, [rsi + imm8] */
-+		I3(0x8b, 0x46, byte);
-+
-+		if (bit_offset) {
-+			/* shr eax, imm8 */
-+			I3(0xc1, 0xe8, bit_offset);
-+		}
-+
-+		if (bit_offset + bits < 32) {
-+			unsigned mask = ~0U >> (32 - bits);
-+
-+			/* and eax, imm32 */
-+			I1(0x25);
-+			memcpy(out, &mask, 4);
-+			out += 4;
-+		}
-+	} else if (bit_offset + bits <= 64) {
-+		align = min(8 - DIV_ROUND_UP(bit_offset + bits, 8), byte & 7);
-+		byte -= align;
-+		bit_offset += align * 8;
-+
-+		BUG_ON(bit_offset + bits > 64);
-+
-+		/* mov rax, [rsi + imm8] */
-+		I4(0x48, 0x8b, 0x46, byte);
-+
-+		shl = 64 - bit_offset - bits;
-+		shr = bit_offset + shl;
-+
-+		if (shl) {
-+			/* shl rax, imm8 */
-+			I4(0x48, 0xc1, 0xe0, shl);
-+		}
-+
-+		if (shr) {
-+			/* shr rax, imm8 */
-+			I4(0x48, 0xc1, 0xe8, shr);
-+		}
-+	} else {
-+		align = min(4 - DIV_ROUND_UP(bit_offset + bits, 8), byte & 3);
-+		byte -= align;
-+		bit_offset += align * 8;
-+
-+		BUG_ON(bit_offset + bits > 96);
-+
-+		/* mov rax, [rsi + byte] */
-+		I4(0x48, 0x8b, 0x46, byte);
-+
-+		/* mov edx, [rsi + byte + 8] */
-+		I3(0x8b, 0x56, byte + 8);
-+
-+		/* bits from next word: */
-+		shr = bit_offset + bits - 64;
-+		BUG_ON(shr > bit_offset);
-+
-+		/* shr rax, bit_offset */
-+		I4(0x48, 0xc1, 0xe8, shr);
-+
-+		/* shl rdx, imm8 */
-+		I4(0x48, 0xc1, 0xe2, 64 - shr);
-+
-+		/* or rax, rdx */
-+		I3(0x48, 0x09, 0xd0);
-+
-+		shr = bit_offset - shr;
-+
-+		if (shr) {
-+			/* shr rax, imm8 */
-+			I4(0x48, 0xc1, 0xe8, shr);
-+		}
-+	}
-+
-+	/* rax += offset: */
-+	if (offset > S32_MAX) {
-+		/* mov rdx, imm64 */
-+		I2(0x48, 0xba);
-+		memcpy(out, &offset, 8);
-+		out += 8;
-+		/* add %rdx, %rax */
-+		I3(0x48, 0x01, 0xd0);
-+	} else if (offset + (~0ULL >> (64 - bits)) > U32_MAX) {
-+		/* add rax, imm32 */
-+		I2(0x48, 0x05);
-+		memcpy(out, &offset, 4);
-+		out += 4;
-+	} else if (offset) {
-+		/* add eax, imm32 */
-+		I1(0x05);
-+		memcpy(out, &offset, 4);
-+		out += 4;
-+	}
-+set_field:
-+	switch (dst_size) {
-+	case 8:
-+		/* mov [rdi + dst_offset], rax */
-+		I4(0x48, 0x89, 0x47, dst_offset);
-+		break;
-+	case 4:
-+		/* mov [rdi + dst_offset], eax */
-+		I3(0x89, 0x47, dst_offset);
-+		break;
-+	default:
-+		BUG();
-+	}
-+
-+	return out;
-+}
-+
-+int bch2_compile_bkey_format(const struct bkey_format *format, void *_out)
-+{
-+	bool eax_zeroed = false;
-+	u8 *out = _out;
-+
-+	/*
-+	 * rdi: dst - unpacked key
-+	 * rsi: src - packed key
-+	 */
-+
-+	/* k->u64s, k->format, k->type */
-+
-+	/* mov eax, [rsi] */
-+	I2(0x8b, 0x06);
-+
-+	/* add eax, BKEY_U64s - format->key_u64s */
-+	I5(0x05, BKEY_U64s - format->key_u64s, KEY_FORMAT_CURRENT, 0, 0);
-+
-+	/* and eax, imm32: mask out k->pad: */
-+	I5(0x25, 0xff, 0xff, 0xff, 0);
-+
-+	/* mov [rdi], eax */
-+	I2(0x89, 0x07);
-+
-+#define x(id, field)							\
-+	out = compile_bkey_field(format, out, id,			\
-+				 offsetof(struct bkey, field),		\
-+				 sizeof(((struct bkey *) NULL)->field),	\
-+				 &eax_zeroed);
-+	bkey_fields()
-+#undef x
-+
-+	/* retq */
-+	I1(0xc3);
-+
-+	return (void *) out - _out;
-+}
-+
-+#else
-+static inline int __bkey_cmp_bits(const u64 *l, const u64 *r,
-+				  unsigned nr_key_bits)
-+{
-+	u64 l_v, r_v;
-+
-+	if (!nr_key_bits)
-+		return 0;
-+
-+	/* for big endian, skip past header */
-+	nr_key_bits += high_bit_offset;
-+	l_v = *l & (~0ULL >> high_bit_offset);
-+	r_v = *r & (~0ULL >> high_bit_offset);
-+
-+	while (1) {
-+		if (nr_key_bits < 64) {
-+			l_v >>= 64 - nr_key_bits;
-+			r_v >>= 64 - nr_key_bits;
-+			nr_key_bits = 0;
-+		} else {
-+			nr_key_bits -= 64;
-+		}
-+
-+		if (!nr_key_bits || l_v != r_v)
-+			break;
-+
-+		l = next_word(l);
-+		r = next_word(r);
-+
-+		l_v = *l;
-+		r_v = *r;
-+	}
-+
-+	return cmp_int(l_v, r_v);
-+}
-+#endif
-+
-+__pure
-+int __bch2_bkey_cmp_packed_format_checked(const struct bkey_packed *l,
-+					  const struct bkey_packed *r,
-+					  const struct btree *b)
-+{
-+	const struct bkey_format *f = &b->format;
-+	int ret;
-+
-+	EBUG_ON(!bkey_packed(l) || !bkey_packed(r));
-+	EBUG_ON(b->nr_key_bits != bkey_format_key_bits(f));
-+
-+	ret = __bkey_cmp_bits(high_word(f, l),
-+			      high_word(f, r),
-+			      b->nr_key_bits);
-+
-+	EBUG_ON(ret != bkey_cmp(bkey_unpack_pos(b, l),
-+				bkey_unpack_pos(b, r)));
-+	return ret;
-+}
-+
-+__pure __flatten
-+int __bch2_bkey_cmp_left_packed_format_checked(const struct btree *b,
-+					       const struct bkey_packed *l,
-+					       const struct bpos *r)
-+{
-+	return bkey_cmp(bkey_unpack_pos_format_checked(b, l), *r);
-+}
-+
-+__pure __flatten
-+int __bch2_bkey_cmp_packed(const struct bkey_packed *l,
-+			   const struct bkey_packed *r,
-+			   const struct btree *b)
-+{
-+	struct bkey unpacked;
-+
-+	if (likely(bkey_packed(l) && bkey_packed(r)))
-+		return __bch2_bkey_cmp_packed_format_checked(l, r, b);
-+
-+	if (bkey_packed(l)) {
-+		__bkey_unpack_key_format_checked(b, &unpacked, l);
-+		l = (void*) &unpacked;
-+	} else if (bkey_packed(r)) {
-+		__bkey_unpack_key_format_checked(b, &unpacked, r);
-+		r = (void*) &unpacked;
-+	}
-+
-+	return bkey_cmp(((struct bkey *) l)->p, ((struct bkey *) r)->p);
-+}
-+
-+__pure __flatten
-+int __bch2_bkey_cmp_left_packed(const struct btree *b,
-+				const struct bkey_packed *l,
-+				const struct bpos *r)
-+{
-+	const struct bkey *l_unpacked;
-+
-+	return unlikely(l_unpacked = packed_to_bkey_c(l))
-+		? bkey_cmp(l_unpacked->p, *r)
-+		: __bch2_bkey_cmp_left_packed_format_checked(b, l, r);
-+}
-+
-+void bch2_bpos_swab(struct bpos *p)
-+{
-+	u8 *l = (u8 *) p;
-+	u8 *h = ((u8 *) &p[1]) - 1;
-+
-+	while (l < h) {
-+		swap(*l, *h);
-+		l++;
-+		--h;
-+	}
-+}
-+
-+void bch2_bkey_swab_key(const struct bkey_format *_f, struct bkey_packed *k)
-+{
-+	const struct bkey_format *f = bkey_packed(k) ? _f : &bch2_bkey_format_current;
-+	u8 *l = k->key_start;
-+	u8 *h = (u8 *) (k->_data + f->key_u64s) - 1;
-+
-+	while (l < h) {
-+		swap(*l, *h);
-+		l++;
-+		--h;
-+	}
-+}
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+void bch2_bkey_pack_test(void)
-+{
-+	struct bkey t = KEY(4134ULL, 1250629070527416633ULL, 0);
-+	struct bkey_packed p;
-+
-+	struct bkey_format test_format = {
-+		.key_u64s	= 2,
-+		.nr_fields	= BKEY_NR_FIELDS,
-+		.bits_per_field = {
-+			13,
-+			64,
-+		},
-+	};
-+
-+	struct unpack_state in_s =
-+		unpack_state_init(&bch2_bkey_format_current, (void *) &t);
-+	struct pack_state out_s = pack_state_init(&test_format, &p);
-+	unsigned i;
-+
-+	for (i = 0; i < out_s.format->nr_fields; i++) {
-+		u64 a, v = get_inc_field(&in_s, i);
-+
-+		switch (i) {
-+#define x(id, field)	case id: a = t.field; break;
-+	bkey_fields()
-+#undef x
-+		default:
-+			BUG();
-+		}
-+
-+		if (a != v)
-+			panic("got %llu actual %llu i %u\n", v, a, i);
-+
-+		if (!set_inc_field(&out_s, i, v))
-+			panic("failed at %u\n", i);
-+	}
-+
-+	BUG_ON(!bch2_bkey_pack_key(&p, &t, &test_format));
-+}
-+#endif
-diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h
-new file mode 100644
-index 000000000000..cbcfbd26bc58
---- /dev/null
-+++ b/fs/bcachefs/bkey.h
-@@ -0,0 +1,605 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BKEY_H
-+#define _BCACHEFS_BKEY_H
-+
-+#include <linux/bug.h>
-+#include "bcachefs_format.h"
-+
-+#include "util.h"
-+#include "vstructs.h"
-+
-+#ifdef CONFIG_X86_64
-+#define HAVE_BCACHEFS_COMPILED_UNPACK	1
-+#endif
-+
-+void bch2_to_binary(char *, const u64 *, unsigned);
-+
-+/* bkey with split value, const */
-+struct bkey_s_c {
-+	const struct bkey	*k;
-+	const struct bch_val	*v;
-+};
-+
-+/* bkey with split value */
-+struct bkey_s {
-+	union {
-+	struct {
-+		struct bkey	*k;
-+		struct bch_val	*v;
-+	};
-+	struct bkey_s_c		s_c;
-+	};
-+};
-+
-+#define bkey_next(_k)		vstruct_next(_k)
-+
-+static inline struct bkey_packed *bkey_next_skip_noops(struct bkey_packed *k,
-+						       struct bkey_packed *end)
-+{
-+	k = bkey_next(k);
-+
-+	while (k != end && !k->u64s)
-+		k = (void *) ((u64 *) k + 1);
-+	return k;
-+}
-+
-+#define bkey_val_u64s(_k)	((_k)->u64s - BKEY_U64s)
-+
-+static inline size_t bkey_val_bytes(const struct bkey *k)
-+{
-+	return bkey_val_u64s(k) * sizeof(u64);
-+}
-+
-+static inline void set_bkey_val_u64s(struct bkey *k, unsigned val_u64s)
-+{
-+	k->u64s = BKEY_U64s + val_u64s;
-+}
-+
-+static inline void set_bkey_val_bytes(struct bkey *k, unsigned bytes)
-+{
-+	k->u64s = BKEY_U64s + DIV_ROUND_UP(bytes, sizeof(u64));
-+}
-+
-+#define bkey_val_end(_k)	((void *) (((u64 *) (_k).v) + bkey_val_u64s((_k).k)))
-+
-+#define bkey_deleted(_k)	((_k)->type == KEY_TYPE_deleted)
-+
-+#define bkey_whiteout(_k)				\
-+	((_k)->type == KEY_TYPE_deleted || (_k)->type == KEY_TYPE_discard)
-+
-+#define bkey_packed_typecheck(_k)					\
-+({									\
-+	BUILD_BUG_ON(!type_is(_k, struct bkey *) &&			\
-+		     !type_is(_k, struct bkey_packed *));		\
-+	type_is(_k, struct bkey_packed *);				\
-+})
-+
-+enum bkey_lr_packed {
-+	BKEY_PACKED_BOTH,
-+	BKEY_PACKED_RIGHT,
-+	BKEY_PACKED_LEFT,
-+	BKEY_PACKED_NONE,
-+};
-+
-+#define bkey_lr_packed_typecheck(_l, _r)				\
-+	(!bkey_packed_typecheck(_l) + ((!bkey_packed_typecheck(_r)) << 1))
-+
-+#define bkey_lr_packed(_l, _r)						\
-+	((_l)->format + ((_r)->format << 1))
-+
-+#define bkey_copy(_dst, _src)					\
-+do {								\
-+	BUILD_BUG_ON(!type_is(_dst, struct bkey_i *) &&		\
-+		     !type_is(_dst, struct bkey_packed *));	\
-+	BUILD_BUG_ON(!type_is(_src, struct bkey_i *) &&		\
-+		     !type_is(_src, struct bkey_packed *));	\
-+	EBUG_ON((u64 *) (_dst) > (u64 *) (_src) &&		\
-+		(u64 *) (_dst) < (u64 *) (_src) +		\
-+		((struct bkey *) (_src))->u64s);		\
-+								\
-+	memcpy_u64s_small((_dst), (_src),			\
-+			  ((struct bkey *) (_src))->u64s);	\
-+} while (0)
-+
-+struct btree;
-+
-+struct bkey_format_state {
-+	u64 field_min[BKEY_NR_FIELDS];
-+	u64 field_max[BKEY_NR_FIELDS];
-+};
-+
-+void bch2_bkey_format_init(struct bkey_format_state *);
-+void bch2_bkey_format_add_key(struct bkey_format_state *, const struct bkey *);
-+void bch2_bkey_format_add_pos(struct bkey_format_state *, struct bpos);
-+struct bkey_format bch2_bkey_format_done(struct bkey_format_state *);
-+const char *bch2_bkey_format_validate(struct bkey_format *);
-+
-+__pure
-+unsigned bch2_bkey_greatest_differing_bit(const struct btree *,
-+					  const struct bkey_packed *,
-+					  const struct bkey_packed *);
-+__pure
-+unsigned bch2_bkey_ffs(const struct btree *, const struct bkey_packed *);
-+
-+__pure
-+int __bch2_bkey_cmp_packed_format_checked(const struct bkey_packed *,
-+				     const struct bkey_packed *,
-+				     const struct btree *);
-+
-+__pure
-+int __bch2_bkey_cmp_left_packed_format_checked(const struct btree *,
-+					  const struct bkey_packed *,
-+					  const struct bpos *);
-+
-+__pure
-+int __bch2_bkey_cmp_packed(const struct bkey_packed *,
-+			   const struct bkey_packed *,
-+			   const struct btree *);
-+
-+__pure
-+int __bch2_bkey_cmp_left_packed(const struct btree *,
-+				const struct bkey_packed *,
-+				const struct bpos *);
-+
-+static inline __pure
-+int bkey_cmp_left_packed(const struct btree *b,
-+			 const struct bkey_packed *l, const struct bpos *r)
-+{
-+	return __bch2_bkey_cmp_left_packed(b, l, r);
-+}
-+
-+/*
-+ * we prefer to pass bpos by ref, but it's often enough terribly convenient to
-+ * pass it by by val... as much as I hate c++, const ref would be nice here:
-+ */
-+__pure __flatten
-+static inline int bkey_cmp_left_packed_byval(const struct btree *b,
-+					     const struct bkey_packed *l,
-+					     struct bpos r)
-+{
-+	return bkey_cmp_left_packed(b, l, &r);
-+}
-+
-+/*
-+ * If @_l or @_r are struct bkey * (not bkey_packed *), uses type information to
-+ * skip dispatching on k->format:
-+ */
-+#define bkey_cmp_packed(_b, _l, _r)					\
-+({									\
-+	int _cmp;							\
-+									\
-+	switch (bkey_lr_packed_typecheck(_l, _r)) {			\
-+	case BKEY_PACKED_NONE:						\
-+		_cmp = bkey_cmp(((struct bkey *) (_l))->p,		\
-+				((struct bkey *) (_r))->p);		\
-+		break;							\
-+	case BKEY_PACKED_LEFT:						\
-+		_cmp = bkey_cmp_left_packed((_b),			\
-+				  (struct bkey_packed *) (_l),		\
-+				  &((struct bkey *) (_r))->p);		\
-+		break;							\
-+	case BKEY_PACKED_RIGHT:						\
-+		_cmp = -bkey_cmp_left_packed((_b),			\
-+				  (struct bkey_packed *) (_r),		\
-+				  &((struct bkey *) (_l))->p);		\
-+		break;							\
-+	case BKEY_PACKED_BOTH:						\
-+		_cmp = __bch2_bkey_cmp_packed((void *) (_l),		\
-+					 (void *) (_r), (_b));		\
-+		break;							\
-+	}								\
-+	_cmp;								\
-+})
-+
-+#if 1
-+static __always_inline int bkey_cmp(struct bpos l, struct bpos r)
-+{
-+	if (l.inode != r.inode)
-+		return l.inode < r.inode ? -1 : 1;
-+	if (l.offset != r.offset)
-+		return l.offset < r.offset ? -1 : 1;
-+	if (l.snapshot != r.snapshot)
-+		return l.snapshot < r.snapshot ? -1 : 1;
-+	return 0;
-+}
-+#else
-+int bkey_cmp(struct bpos l, struct bpos r);
-+#endif
-+
-+static inline struct bpos bpos_min(struct bpos l, struct bpos r)
-+{
-+	return bkey_cmp(l, r) < 0 ? l : r;
-+}
-+
-+void bch2_bpos_swab(struct bpos *);
-+void bch2_bkey_swab_key(const struct bkey_format *, struct bkey_packed *);
-+
-+static __always_inline int bversion_cmp(struct bversion l, struct bversion r)
-+{
-+	return  cmp_int(l.hi, r.hi) ?:
-+		cmp_int(l.lo, r.lo);
-+}
-+
-+#define ZERO_VERSION	((struct bversion) { .hi = 0, .lo = 0 })
-+#define MAX_VERSION	((struct bversion) { .hi = ~0, .lo = ~0ULL })
-+
-+static __always_inline int bversion_zero(struct bversion v)
-+{
-+	return !bversion_cmp(v, ZERO_VERSION);
-+}
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+/* statement expressions confusing unlikely()? */
-+#define bkey_packed(_k)							\
-+	({ EBUG_ON((_k)->format > KEY_FORMAT_CURRENT);			\
-+	 (_k)->format != KEY_FORMAT_CURRENT; })
-+#else
-+#define bkey_packed(_k)		((_k)->format != KEY_FORMAT_CURRENT)
-+#endif
-+
-+/*
-+ * It's safe to treat an unpacked bkey as a packed one, but not the reverse
-+ */
-+static inline struct bkey_packed *bkey_to_packed(struct bkey_i *k)
-+{
-+	return (struct bkey_packed *) k;
-+}
-+
-+static inline const struct bkey_packed *bkey_to_packed_c(const struct bkey_i *k)
-+{
-+	return (const struct bkey_packed *) k;
-+}
-+
-+static inline struct bkey_i *packed_to_bkey(struct bkey_packed *k)
-+{
-+	return bkey_packed(k) ? NULL : (struct bkey_i *) k;
-+}
-+
-+static inline const struct bkey *packed_to_bkey_c(const struct bkey_packed *k)
-+{
-+	return bkey_packed(k) ? NULL : (const struct bkey *) k;
-+}
-+
-+static inline unsigned bkey_format_key_bits(const struct bkey_format *format)
-+{
-+	return format->bits_per_field[BKEY_FIELD_INODE] +
-+		format->bits_per_field[BKEY_FIELD_OFFSET] +
-+		format->bits_per_field[BKEY_FIELD_SNAPSHOT];
-+}
-+
-+static inline struct bpos bkey_successor(struct bpos p)
-+{
-+	struct bpos ret = p;
-+
-+	if (!++ret.offset)
-+		BUG_ON(!++ret.inode);
-+
-+	return ret;
-+}
-+
-+static inline struct bpos bkey_predecessor(struct bpos p)
-+{
-+	struct bpos ret = p;
-+
-+	if (!ret.offset--)
-+		BUG_ON(!ret.inode--);
-+
-+	return ret;
-+}
-+
-+static inline u64 bkey_start_offset(const struct bkey *k)
-+{
-+	return k->p.offset - k->size;
-+}
-+
-+static inline struct bpos bkey_start_pos(const struct bkey *k)
-+{
-+	return (struct bpos) {
-+		.inode		= k->p.inode,
-+		.offset		= bkey_start_offset(k),
-+		.snapshot	= k->p.snapshot,
-+	};
-+}
-+
-+/* Packed helpers */
-+
-+static inline unsigned bkeyp_key_u64s(const struct bkey_format *format,
-+				      const struct bkey_packed *k)
-+{
-+	unsigned ret = bkey_packed(k) ? format->key_u64s : BKEY_U64s;
-+
-+	EBUG_ON(k->u64s < ret);
-+	return ret;
-+}
-+
-+static inline unsigned bkeyp_key_bytes(const struct bkey_format *format,
-+				       const struct bkey_packed *k)
-+{
-+	return bkeyp_key_u64s(format, k) * sizeof(u64);
-+}
-+
-+static inline unsigned bkeyp_val_u64s(const struct bkey_format *format,
-+				      const struct bkey_packed *k)
-+{
-+	return k->u64s - bkeyp_key_u64s(format, k);
-+}
-+
-+static inline size_t bkeyp_val_bytes(const struct bkey_format *format,
-+				     const struct bkey_packed *k)
-+{
-+	return bkeyp_val_u64s(format, k) * sizeof(u64);
-+}
-+
-+static inline void set_bkeyp_val_u64s(const struct bkey_format *format,
-+				      struct bkey_packed *k, unsigned val_u64s)
-+{
-+	k->u64s = bkeyp_key_u64s(format, k) + val_u64s;
-+}
-+
-+#define bkeyp_val(_format, _k)						\
-+	 ((struct bch_val *) ((_k)->_data + bkeyp_key_u64s(_format, _k)))
-+
-+extern const struct bkey_format bch2_bkey_format_current;
-+
-+bool bch2_bkey_transform(const struct bkey_format *,
-+			 struct bkey_packed *,
-+			 const struct bkey_format *,
-+			 const struct bkey_packed *);
-+
-+struct bkey __bch2_bkey_unpack_key(const struct bkey_format *,
-+				   const struct bkey_packed *);
-+
-+#ifndef HAVE_BCACHEFS_COMPILED_UNPACK
-+struct bpos __bkey_unpack_pos(const struct bkey_format *,
-+			      const struct bkey_packed *);
-+#endif
-+
-+bool bch2_bkey_pack_key(struct bkey_packed *, const struct bkey *,
-+		   const struct bkey_format *);
-+
-+enum bkey_pack_pos_ret {
-+	BKEY_PACK_POS_EXACT,
-+	BKEY_PACK_POS_SMALLER,
-+	BKEY_PACK_POS_FAIL,
-+};
-+
-+enum bkey_pack_pos_ret bch2_bkey_pack_pos_lossy(struct bkey_packed *, struct bpos,
-+					   const struct btree *);
-+
-+static inline bool bkey_pack_pos(struct bkey_packed *out, struct bpos in,
-+				 const struct btree *b)
-+{
-+	return bch2_bkey_pack_pos_lossy(out, in, b) == BKEY_PACK_POS_EXACT;
-+}
-+
-+void bch2_bkey_unpack(const struct btree *, struct bkey_i *,
-+		 const struct bkey_packed *);
-+bool bch2_bkey_pack(struct bkey_packed *, const struct bkey_i *,
-+	       const struct bkey_format *);
-+
-+static inline u64 bkey_field_max(const struct bkey_format *f,
-+				 enum bch_bkey_fields nr)
-+{
-+	return f->bits_per_field[nr] < 64
-+		? (le64_to_cpu(f->field_offset[nr]) +
-+		   ~(~0ULL << f->bits_per_field[nr]))
-+		: U64_MAX;
-+}
-+
-+#ifdef HAVE_BCACHEFS_COMPILED_UNPACK
-+
-+int bch2_compile_bkey_format(const struct bkey_format *, void *);
-+
-+#else
-+
-+static inline int bch2_compile_bkey_format(const struct bkey_format *format,
-+					  void *out) { return 0; }
-+
-+#endif
-+
-+static inline void bkey_reassemble(struct bkey_i *dst,
-+				   struct bkey_s_c src)
-+{
-+	dst->k = *src.k;
-+	memcpy_u64s_small(&dst->v, src.v, bkey_val_u64s(src.k));
-+}
-+
-+#define bkey_s_null		((struct bkey_s)   { .k = NULL })
-+#define bkey_s_c_null		((struct bkey_s_c) { .k = NULL })
-+
-+#define bkey_s_err(err)		((struct bkey_s)   { .k = ERR_PTR(err) })
-+#define bkey_s_c_err(err)	((struct bkey_s_c) { .k = ERR_PTR(err) })
-+
-+static inline struct bkey_s bkey_to_s(struct bkey *k)
-+{
-+	return (struct bkey_s) { .k = k, .v = NULL };
-+}
-+
-+static inline struct bkey_s_c bkey_to_s_c(const struct bkey *k)
-+{
-+	return (struct bkey_s_c) { .k = k, .v = NULL };
-+}
-+
-+static inline struct bkey_s bkey_i_to_s(struct bkey_i *k)
-+{
-+	return (struct bkey_s) { .k = &k->k, .v = &k->v };
-+}
-+
-+static inline struct bkey_s_c bkey_i_to_s_c(const struct bkey_i *k)
-+{
-+	return (struct bkey_s_c) { .k = &k->k, .v = &k->v };
-+}
-+
-+/*
-+ * For a given type of value (e.g. struct bch_extent), generates the types for
-+ * bkey + bch_extent - inline, split, split const - and also all the conversion
-+ * functions, which also check that the value is of the correct type.
-+ *
-+ * We use anonymous unions for upcasting - e.g. converting from e.g. a
-+ * bkey_i_extent to a bkey_i - since that's always safe, instead of conversion
-+ * functions.
-+ */
-+#define BKEY_VAL_ACCESSORS(name)					\
-+struct bkey_i_##name {							\
-+	union {								\
-+		struct bkey		k;				\
-+		struct bkey_i		k_i;				\
-+	};								\
-+	struct bch_##name		v;				\
-+};									\
-+									\
-+struct bkey_s_c_##name {						\
-+	union {								\
-+	struct {							\
-+		const struct bkey	*k;				\
-+		const struct bch_##name	*v;				\
-+	};								\
-+	struct bkey_s_c			s_c;				\
-+	};								\
-+};									\
-+									\
-+struct bkey_s_##name {							\
-+	union {								\
-+	struct {							\
-+		struct bkey		*k;				\
-+		struct bch_##name	*v;				\
-+	};								\
-+	struct bkey_s_c_##name		c;				\
-+	struct bkey_s			s;				\
-+	struct bkey_s_c			s_c;				\
-+	};								\
-+};									\
-+									\
-+static inline struct bkey_i_##name *bkey_i_to_##name(struct bkey_i *k)	\
-+{									\
-+	EBUG_ON(k->k.type != KEY_TYPE_##name);				\
-+	return container_of(&k->k, struct bkey_i_##name, k);		\
-+}									\
-+									\
-+static inline const struct bkey_i_##name *				\
-+bkey_i_to_##name##_c(const struct bkey_i *k)				\
-+{									\
-+	EBUG_ON(k->k.type != KEY_TYPE_##name);				\
-+	return container_of(&k->k, struct bkey_i_##name, k);		\
-+}									\
-+									\
-+static inline struct bkey_s_##name bkey_s_to_##name(struct bkey_s k)	\
-+{									\
-+	EBUG_ON(k.k->type != KEY_TYPE_##name);				\
-+	return (struct bkey_s_##name) {					\
-+		.k = k.k,						\
-+		.v = container_of(k.v, struct bch_##name, v),		\
-+	};								\
-+}									\
-+									\
-+static inline struct bkey_s_c_##name bkey_s_c_to_##name(struct bkey_s_c k)\
-+{									\
-+	EBUG_ON(k.k->type != KEY_TYPE_##name);				\
-+	return (struct bkey_s_c_##name) {				\
-+		.k = k.k,						\
-+		.v = container_of(k.v, struct bch_##name, v),		\
-+	};								\
-+}									\
-+									\
-+static inline struct bkey_s_##name name##_i_to_s(struct bkey_i_##name *k)\
-+{									\
-+	return (struct bkey_s_##name) {					\
-+		.k = &k->k,						\
-+		.v = &k->v,						\
-+	};								\
-+}									\
-+									\
-+static inline struct bkey_s_c_##name					\
-+name##_i_to_s_c(const struct bkey_i_##name *k)				\
-+{									\
-+	return (struct bkey_s_c_##name) {				\
-+		.k = &k->k,						\
-+		.v = &k->v,						\
-+	};								\
-+}									\
-+									\
-+static inline struct bkey_s_##name bkey_i_to_s_##name(struct bkey_i *k)	\
-+{									\
-+	EBUG_ON(k->k.type != KEY_TYPE_##name);				\
-+	return (struct bkey_s_##name) {					\
-+		.k = &k->k,						\
-+		.v = container_of(&k->v, struct bch_##name, v),		\
-+	};								\
-+}									\
-+									\
-+static inline struct bkey_s_c_##name					\
-+bkey_i_to_s_c_##name(const struct bkey_i *k)				\
-+{									\
-+	EBUG_ON(k->k.type != KEY_TYPE_##name);				\
-+	return (struct bkey_s_c_##name) {				\
-+		.k = &k->k,						\
-+		.v = container_of(&k->v, struct bch_##name, v),		\
-+	};								\
-+}									\
-+									\
-+static inline struct bkey_i_##name *bkey_##name##_init(struct bkey_i *_k)\
-+{									\
-+	struct bkey_i_##name *k =					\
-+		container_of(&_k->k, struct bkey_i_##name, k);		\
-+									\
-+	bkey_init(&k->k);						\
-+	memset(&k->v, 0, sizeof(k->v));					\
-+	k->k.type = KEY_TYPE_##name;					\
-+	set_bkey_val_bytes(&k->k, sizeof(k->v));			\
-+									\
-+	return k;							\
-+}
-+
-+BKEY_VAL_ACCESSORS(cookie);
-+BKEY_VAL_ACCESSORS(btree_ptr);
-+BKEY_VAL_ACCESSORS(extent);
-+BKEY_VAL_ACCESSORS(reservation);
-+BKEY_VAL_ACCESSORS(inode);
-+BKEY_VAL_ACCESSORS(inode_generation);
-+BKEY_VAL_ACCESSORS(dirent);
-+BKEY_VAL_ACCESSORS(xattr);
-+BKEY_VAL_ACCESSORS(alloc);
-+BKEY_VAL_ACCESSORS(quota);
-+BKEY_VAL_ACCESSORS(stripe);
-+BKEY_VAL_ACCESSORS(reflink_p);
-+BKEY_VAL_ACCESSORS(reflink_v);
-+BKEY_VAL_ACCESSORS(inline_data);
-+BKEY_VAL_ACCESSORS(btree_ptr_v2);
-+
-+/* byte order helpers */
-+
-+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-+
-+static inline unsigned high_word_offset(const struct bkey_format *f)
-+{
-+	return f->key_u64s - 1;
-+}
-+
-+#define high_bit_offset		0
-+#define nth_word(p, n)		((p) - (n))
-+
-+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-+
-+static inline unsigned high_word_offset(const struct bkey_format *f)
-+{
-+	return 0;
-+}
-+
-+#define high_bit_offset		KEY_PACKED_BITS_START
-+#define nth_word(p, n)		((p) + (n))
-+
-+#else
-+#error edit for your odd byteorder.
-+#endif
-+
-+#define high_word(f, k)		((k)->_data + high_word_offset(f))
-+#define next_word(p)		nth_word(p, 1)
-+#define prev_word(p)		nth_word(p, -1)
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+void bch2_bkey_pack_test(void);
-+#else
-+static inline void bch2_bkey_pack_test(void) {}
-+#endif
-+
-+#endif /* _BCACHEFS_BKEY_H */
-diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c
-new file mode 100644
-index 000000000000..36e0c5152b47
---- /dev/null
-+++ b/fs/bcachefs/bkey_methods.c
-@@ -0,0 +1,353 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "bkey_methods.h"
-+#include "btree_types.h"
-+#include "alloc_background.h"
-+#include "dirent.h"
-+#include "ec.h"
-+#include "error.h"
-+#include "extents.h"
-+#include "inode.h"
-+#include "quota.h"
-+#include "reflink.h"
-+#include "xattr.h"
-+
-+const char * const bch2_bkey_types[] = {
-+#define x(name, nr) #name,
-+	BCH_BKEY_TYPES()
-+#undef x
-+	NULL
-+};
-+
-+static const char *deleted_key_invalid(const struct bch_fs *c,
-+					struct bkey_s_c k)
-+{
-+	return NULL;
-+}
-+
-+#define bch2_bkey_ops_deleted (struct bkey_ops) {	\
-+	.key_invalid = deleted_key_invalid,		\
-+}
-+
-+#define bch2_bkey_ops_discard (struct bkey_ops) {	\
-+	.key_invalid = deleted_key_invalid,		\
-+}
-+
-+static const char *empty_val_key_invalid(const struct bch_fs *c, struct bkey_s_c k)
-+{
-+	if (bkey_val_bytes(k.k))
-+		return "value size should be zero";
-+
-+	return NULL;
-+}
-+
-+#define bch2_bkey_ops_error (struct bkey_ops) {		\
-+	.key_invalid = empty_val_key_invalid,		\
-+}
-+
-+static const char *key_type_cookie_invalid(const struct bch_fs *c,
-+					   struct bkey_s_c k)
-+{
-+	if (bkey_val_bytes(k.k) != sizeof(struct bch_cookie))
-+		return "incorrect value size";
-+
-+	return NULL;
-+}
-+
-+#define bch2_bkey_ops_cookie (struct bkey_ops) {	\
-+	.key_invalid = key_type_cookie_invalid,		\
-+}
-+
-+#define bch2_bkey_ops_whiteout (struct bkey_ops) {	\
-+	.key_invalid = empty_val_key_invalid,		\
-+}
-+
-+static const char *key_type_inline_data_invalid(const struct bch_fs *c,
-+					   struct bkey_s_c k)
-+{
-+	return NULL;
-+}
-+
-+static void key_type_inline_data_to_text(struct printbuf *out, struct bch_fs *c,
-+					 struct bkey_s_c k)
-+{
-+	pr_buf(out, "(%zu bytes)", bkey_val_bytes(k.k));
-+}
-+
-+#define bch2_bkey_ops_inline_data (struct bkey_ops) {	\
-+	.key_invalid	= key_type_inline_data_invalid,	\
-+	.val_to_text	= key_type_inline_data_to_text,	\
-+}
-+
-+static const struct bkey_ops bch2_bkey_ops[] = {
-+#define x(name, nr) [KEY_TYPE_##name]	= bch2_bkey_ops_##name,
-+	BCH_BKEY_TYPES()
-+#undef x
-+};
-+
-+const char *bch2_bkey_val_invalid(struct bch_fs *c, struct bkey_s_c k)
-+{
-+	if (k.k->type >= KEY_TYPE_MAX)
-+		return "invalid type";
-+
-+	return bch2_bkey_ops[k.k->type].key_invalid(c, k);
-+}
-+
-+const char *__bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
-+				enum btree_node_type type)
-+{
-+	if (k.k->u64s < BKEY_U64s)
-+		return "u64s too small";
-+
-+	if (type == BKEY_TYPE_BTREE &&
-+	    bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX)
-+		return "value too big";
-+
-+	if (btree_node_type_is_extents(type)) {
-+		if ((k.k->size == 0) != bkey_deleted(k.k))
-+			return "bad size field";
-+
-+		if (k.k->size > k.k->p.offset)
-+			return "size greater than offset";
-+	} else {
-+		if (k.k->size)
-+			return "nonzero size field";
-+	}
-+
-+	if (k.k->p.snapshot)
-+		return "nonzero snapshot";
-+
-+	if (type != BKEY_TYPE_BTREE &&
-+	    !bkey_cmp(k.k->p, POS_MAX))
-+		return "POS_MAX key";
-+
-+	return NULL;
-+}
-+
-+const char *bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
-+			      enum btree_node_type type)
-+{
-+	return __bch2_bkey_invalid(c, k, type) ?:
-+		bch2_bkey_val_invalid(c, k);
-+}
-+
-+const char *bch2_bkey_in_btree_node(struct btree *b, struct bkey_s_c k)
-+{
-+	if (bkey_cmp(k.k->p, b->data->min_key) < 0)
-+		return "key before start of btree node";
-+
-+	if (bkey_cmp(k.k->p, b->data->max_key) > 0)
-+		return "key past end of btree node";
-+
-+	return NULL;
-+}
-+
-+void bch2_bkey_debugcheck(struct bch_fs *c, struct btree *b, struct bkey_s_c k)
-+{
-+	const struct bkey_ops *ops = &bch2_bkey_ops[k.k->type];
-+	const char *invalid;
-+
-+	BUG_ON(!k.k->u64s);
-+
-+	invalid = bch2_bkey_invalid(c, k, btree_node_type(b)) ?:
-+		bch2_bkey_in_btree_node(b, k);
-+	if (invalid) {
-+		char buf[160];
-+
-+		bch2_bkey_val_to_text(&PBUF(buf), c, k);
-+		bch2_fs_inconsistent(c, "invalid bkey %s: %s", buf, invalid);
-+		return;
-+	}
-+
-+	if (ops->key_debugcheck)
-+		ops->key_debugcheck(c, k);
-+}
-+
-+void bch2_bpos_to_text(struct printbuf *out, struct bpos pos)
-+{
-+	if (!bkey_cmp(pos, POS_MIN))
-+		pr_buf(out, "POS_MIN");
-+	else if (!bkey_cmp(pos, POS_MAX))
-+		pr_buf(out, "POS_MAX");
-+	else
-+		pr_buf(out, "%llu:%llu", pos.inode, pos.offset);
-+}
-+
-+void bch2_bkey_to_text(struct printbuf *out, const struct bkey *k)
-+{
-+	if (k) {
-+		pr_buf(out, "u64s %u type %s ", k->u64s,
-+		       bch2_bkey_types[k->type]);
-+
-+		bch2_bpos_to_text(out, k->p);
-+
-+		pr_buf(out, " snap %u len %u ver %llu",
-+		       k->p.snapshot, k->size, k->version.lo);
-+	} else {
-+		pr_buf(out, "(null)");
-+	}
-+}
-+
-+void bch2_val_to_text(struct printbuf *out, struct bch_fs *c,
-+		      struct bkey_s_c k)
-+{
-+	const struct bkey_ops *ops = &bch2_bkey_ops[k.k->type];
-+
-+	if (likely(ops->val_to_text))
-+		ops->val_to_text(out, c, k);
-+}
-+
-+void bch2_bkey_val_to_text(struct printbuf *out, struct bch_fs *c,
-+			   struct bkey_s_c k)
-+{
-+	bch2_bkey_to_text(out, k.k);
-+
-+	if (k.k) {
-+		pr_buf(out, ": ");
-+		bch2_val_to_text(out, c, k);
-+	}
-+}
-+
-+void bch2_bkey_swab_val(struct bkey_s k)
-+{
-+	const struct bkey_ops *ops = &bch2_bkey_ops[k.k->type];
-+
-+	if (ops->swab)
-+		ops->swab(k);
-+}
-+
-+bool bch2_bkey_normalize(struct bch_fs *c, struct bkey_s k)
-+{
-+	const struct bkey_ops *ops = &bch2_bkey_ops[k.k->type];
-+
-+	return ops->key_normalize
-+		? ops->key_normalize(c, k)
-+		: false;
-+}
-+
-+enum merge_result bch2_bkey_merge(struct bch_fs *c,
-+				  struct bkey_s l, struct bkey_s r)
-+{
-+	const struct bkey_ops *ops = &bch2_bkey_ops[l.k->type];
-+	enum merge_result ret;
-+
-+	if (key_merging_disabled(c) ||
-+	    !ops->key_merge ||
-+	    l.k->type != r.k->type ||
-+	    bversion_cmp(l.k->version, r.k->version) ||
-+	    bkey_cmp(l.k->p, bkey_start_pos(r.k)))
-+		return BCH_MERGE_NOMERGE;
-+
-+	ret = ops->key_merge(c, l, r);
-+
-+	if (ret != BCH_MERGE_NOMERGE)
-+		l.k->needs_whiteout |= r.k->needs_whiteout;
-+	return ret;
-+}
-+
-+static const struct old_bkey_type {
-+	u8		btree_node_type;
-+	u8		old;
-+	u8		new;
-+} bkey_renumber_table[] = {
-+	{BKEY_TYPE_BTREE,	128, KEY_TYPE_btree_ptr		},
-+	{BKEY_TYPE_EXTENTS,	128, KEY_TYPE_extent		},
-+	{BKEY_TYPE_EXTENTS,	129, KEY_TYPE_extent		},
-+	{BKEY_TYPE_EXTENTS,	130, KEY_TYPE_reservation	},
-+	{BKEY_TYPE_INODES,	128, KEY_TYPE_inode		},
-+	{BKEY_TYPE_INODES,	130, KEY_TYPE_inode_generation	},
-+	{BKEY_TYPE_DIRENTS,	128, KEY_TYPE_dirent		},
-+	{BKEY_TYPE_DIRENTS,	129, KEY_TYPE_whiteout		},
-+	{BKEY_TYPE_XATTRS,	128, KEY_TYPE_xattr		},
-+	{BKEY_TYPE_XATTRS,	129, KEY_TYPE_whiteout		},
-+	{BKEY_TYPE_ALLOC,	128, KEY_TYPE_alloc		},
-+	{BKEY_TYPE_QUOTAS,	128, KEY_TYPE_quota		},
-+};
-+
-+void bch2_bkey_renumber(enum btree_node_type btree_node_type,
-+			struct bkey_packed *k,
-+			int write)
-+{
-+	const struct old_bkey_type *i;
-+
-+	for (i = bkey_renumber_table;
-+	     i < bkey_renumber_table + ARRAY_SIZE(bkey_renumber_table);
-+	     i++)
-+		if (btree_node_type == i->btree_node_type &&
-+		    k->type == (write ? i->new : i->old)) {
-+			k->type = write ? i->old : i->new;
-+			break;
-+		}
-+}
-+
-+void __bch2_bkey_compat(unsigned level, enum btree_id btree_id,
-+			unsigned version, unsigned big_endian,
-+			int write,
-+			struct bkey_format *f,
-+			struct bkey_packed *k)
-+{
-+	const struct bkey_ops *ops;
-+	struct bkey uk;
-+	struct bkey_s u;
-+	int i;
-+
-+	/*
-+	 * Do these operations in reverse order in the write path:
-+	 */
-+
-+	for (i = 0; i < 4; i++)
-+	switch (!write ? i : 3 - i) {
-+	case 0:
-+		if (big_endian != CPU_BIG_ENDIAN)
-+			bch2_bkey_swab_key(f, k);
-+		break;
-+	case 1:
-+		if (version < bcachefs_metadata_version_bkey_renumber)
-+			bch2_bkey_renumber(__btree_node_type(level, btree_id), k, write);
-+		break;
-+	case 2:
-+		if (version < bcachefs_metadata_version_inode_btree_change &&
-+		    btree_id == BTREE_ID_INODES) {
-+			if (!bkey_packed(k)) {
-+				struct bkey_i *u = packed_to_bkey(k);
-+				swap(u->k.p.inode, u->k.p.offset);
-+			} else if (f->bits_per_field[BKEY_FIELD_INODE] &&
-+				   f->bits_per_field[BKEY_FIELD_OFFSET]) {
-+				struct bkey_format tmp = *f, *in = f, *out = &tmp;
-+
-+				swap(tmp.bits_per_field[BKEY_FIELD_INODE],
-+				     tmp.bits_per_field[BKEY_FIELD_OFFSET]);
-+				swap(tmp.field_offset[BKEY_FIELD_INODE],
-+				     tmp.field_offset[BKEY_FIELD_OFFSET]);
-+
-+				if (!write)
-+					swap(in, out);
-+
-+				uk = __bch2_bkey_unpack_key(in, k);
-+				swap(uk.p.inode, uk.p.offset);
-+				BUG_ON(!bch2_bkey_pack_key(k, &uk, out));
-+			}
-+		}
-+		break;
-+	case 3:
-+		if (!bkey_packed(k)) {
-+			u = bkey_i_to_s(packed_to_bkey(k));
-+		} else {
-+			uk = __bch2_bkey_unpack_key(f, k);
-+			u.k = &uk;
-+			u.v = bkeyp_val(f, k);
-+		}
-+
-+		if (big_endian != CPU_BIG_ENDIAN)
-+			bch2_bkey_swab_val(u);
-+
-+		ops = &bch2_bkey_ops[k->type];
-+
-+		if (ops->compat)
-+			ops->compat(btree_id, version, big_endian, write, u);
-+		break;
-+	default:
-+		BUG();
-+	}
-+}
-diff --git a/fs/bcachefs/bkey_methods.h b/fs/bcachefs/bkey_methods.h
-new file mode 100644
-index 000000000000..0bca725ae3b8
---- /dev/null
-+++ b/fs/bcachefs/bkey_methods.h
-@@ -0,0 +1,82 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BKEY_METHODS_H
-+#define _BCACHEFS_BKEY_METHODS_H
-+
-+#include "bkey.h"
-+
-+struct bch_fs;
-+struct btree;
-+struct bkey;
-+enum btree_node_type;
-+
-+extern const char * const bch2_bkey_types[];
-+
-+enum merge_result {
-+	BCH_MERGE_NOMERGE,
-+
-+	/*
-+	 * The keys were mergeable, but would have overflowed size - so instead
-+	 * l was changed to the maximum size, and both keys were modified:
-+	 */
-+	BCH_MERGE_PARTIAL,
-+	BCH_MERGE_MERGE,
-+};
-+
-+struct bkey_ops {
-+	/* Returns reason for being invalid if invalid, else NULL: */
-+	const char *	(*key_invalid)(const struct bch_fs *,
-+				       struct bkey_s_c);
-+	void		(*key_debugcheck)(struct bch_fs *, struct bkey_s_c);
-+	void		(*val_to_text)(struct printbuf *, struct bch_fs *,
-+				       struct bkey_s_c);
-+	void		(*swab)(struct bkey_s);
-+	bool		(*key_normalize)(struct bch_fs *, struct bkey_s);
-+	enum merge_result (*key_merge)(struct bch_fs *,
-+				       struct bkey_s, struct bkey_s);
-+	void		(*compat)(enum btree_id id, unsigned version,
-+				  unsigned big_endian, int write,
-+				  struct bkey_s);
-+};
-+
-+const char *bch2_bkey_val_invalid(struct bch_fs *, struct bkey_s_c);
-+const char *__bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c,
-+				enum btree_node_type);
-+const char *bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c,
-+			      enum btree_node_type);
-+const char *bch2_bkey_in_btree_node(struct btree *, struct bkey_s_c);
-+
-+void bch2_bkey_debugcheck(struct bch_fs *, struct btree *, struct bkey_s_c);
-+
-+void bch2_bpos_to_text(struct printbuf *, struct bpos);
-+void bch2_bkey_to_text(struct printbuf *, const struct bkey *);
-+void bch2_val_to_text(struct printbuf *, struct bch_fs *,
-+		      struct bkey_s_c);
-+void bch2_bkey_val_to_text(struct printbuf *, struct bch_fs *,
-+			   struct bkey_s_c);
-+
-+void bch2_bkey_swab_val(struct bkey_s);
-+
-+bool bch2_bkey_normalize(struct bch_fs *, struct bkey_s);
-+
-+enum merge_result bch2_bkey_merge(struct bch_fs *,
-+				  struct bkey_s, struct bkey_s);
-+
-+void bch2_bkey_renumber(enum btree_node_type, struct bkey_packed *, int);
-+
-+void __bch2_bkey_compat(unsigned, enum btree_id, unsigned, unsigned,
-+			int, struct bkey_format *, struct bkey_packed *);
-+
-+static inline void bch2_bkey_compat(unsigned level, enum btree_id btree_id,
-+			       unsigned version, unsigned big_endian,
-+			       int write,
-+			       struct bkey_format *f,
-+			       struct bkey_packed *k)
-+{
-+	if (version < bcachefs_metadata_version_current ||
-+	    big_endian != CPU_BIG_ENDIAN)
-+		__bch2_bkey_compat(level, btree_id, version,
-+				   big_endian, write, f, k);
-+
-+}
-+
-+#endif /* _BCACHEFS_BKEY_METHODS_H */
-diff --git a/fs/bcachefs/bkey_on_stack.h b/fs/bcachefs/bkey_on_stack.h
-new file mode 100644
-index 000000000000..f607a0cb37ed
---- /dev/null
-+++ b/fs/bcachefs/bkey_on_stack.h
-@@ -0,0 +1,43 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BKEY_ON_STACK_H
-+#define _BCACHEFS_BKEY_ON_STACK_H
-+
-+#include "bcachefs.h"
-+
-+struct bkey_on_stack {
-+	struct bkey_i	*k;
-+	u64		onstack[12];
-+};
-+
-+static inline void bkey_on_stack_realloc(struct bkey_on_stack *s,
-+					 struct bch_fs *c, unsigned u64s)
-+{
-+	if (s->k == (void *) s->onstack &&
-+	    u64s > ARRAY_SIZE(s->onstack)) {
-+		s->k = mempool_alloc(&c->large_bkey_pool, GFP_NOFS);
-+		memcpy(s->k, s->onstack, sizeof(s->onstack));
-+	}
-+}
-+
-+static inline void bkey_on_stack_reassemble(struct bkey_on_stack *s,
-+					    struct bch_fs *c,
-+					    struct bkey_s_c k)
-+{
-+	bkey_on_stack_realloc(s, c, k.k->u64s);
-+	bkey_reassemble(s->k, k);
-+}
-+
-+static inline void bkey_on_stack_init(struct bkey_on_stack *s)
-+{
-+	s->k = (void *) s->onstack;
-+}
-+
-+static inline void bkey_on_stack_exit(struct bkey_on_stack *s,
-+				      struct bch_fs *c)
-+{
-+	if (s->k != (void *) s->onstack)
-+		mempool_free(s->k, &c->large_bkey_pool);
-+	s->k = NULL;
-+}
-+
-+#endif /* _BCACHEFS_BKEY_ON_STACK_H */
-diff --git a/fs/bcachefs/bkey_sort.c b/fs/bcachefs/bkey_sort.c
-new file mode 100644
-index 000000000000..839e78d1dc35
---- /dev/null
-+++ b/fs/bcachefs/bkey_sort.c
-@@ -0,0 +1,515 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "bkey_on_stack.h"
-+#include "bkey_sort.h"
-+#include "bset.h"
-+#include "extents.h"
-+
-+typedef int (*sort_cmp_fn)(struct btree *,
-+			   struct bkey_packed *,
-+			   struct bkey_packed *);
-+
-+static inline bool sort_iter_end(struct sort_iter *iter)
-+{
-+	return !iter->used;
-+}
-+
-+static inline void __sort_iter_sift(struct sort_iter *iter,
-+				    unsigned from,
-+				    sort_cmp_fn cmp)
-+{
-+	unsigned i;
-+
-+	for (i = from;
-+	     i + 1 < iter->used &&
-+	     cmp(iter->b, iter->data[i].k, iter->data[i + 1].k) > 0;
-+	     i++)
-+		swap(iter->data[i], iter->data[i + 1]);
-+}
-+
-+static inline void sort_iter_sift(struct sort_iter *iter, sort_cmp_fn cmp)
-+{
-+
-+	__sort_iter_sift(iter, 0, cmp);
-+}
-+
-+static inline void sort_iter_sort(struct sort_iter *iter, sort_cmp_fn cmp)
-+{
-+	unsigned i = iter->used;
-+
-+	while (i--)
-+		__sort_iter_sift(iter, i, cmp);
-+}
-+
-+static inline struct bkey_packed *sort_iter_peek(struct sort_iter *iter)
-+{
-+	return !sort_iter_end(iter) ? iter->data->k : NULL;
-+}
-+
-+static inline void __sort_iter_advance(struct sort_iter *iter,
-+				       unsigned idx, sort_cmp_fn cmp)
-+{
-+	struct sort_iter_set *i = iter->data + idx;
-+
-+	BUG_ON(idx >= iter->used);
-+
-+	i->k = bkey_next_skip_noops(i->k, i->end);
-+
-+	BUG_ON(i->k > i->end);
-+
-+	if (i->k == i->end)
-+		array_remove_item(iter->data, iter->used, idx);
-+	else
-+		__sort_iter_sift(iter, idx, cmp);
-+}
-+
-+static inline void sort_iter_advance(struct sort_iter *iter, sort_cmp_fn cmp)
-+{
-+	__sort_iter_advance(iter, 0, cmp);
-+}
-+
-+static inline struct bkey_packed *sort_iter_next(struct sort_iter *iter,
-+						 sort_cmp_fn cmp)
-+{
-+	struct bkey_packed *ret = sort_iter_peek(iter);
-+
-+	if (ret)
-+		sort_iter_advance(iter, cmp);
-+
-+	return ret;
-+}
-+
-+/*
-+ * If keys compare equal, compare by pointer order:
-+ */
-+static inline int key_sort_fix_overlapping_cmp(struct btree *b,
-+					       struct bkey_packed *l,
-+					       struct bkey_packed *r)
-+{
-+	return bkey_cmp_packed(b, l, r) ?:
-+		cmp_int((unsigned long) l, (unsigned long) r);
-+}
-+
-+static inline bool should_drop_next_key(struct sort_iter *iter)
-+{
-+	/*
-+	 * key_sort_cmp() ensures that when keys compare equal the older key
-+	 * comes first; so if l->k compares equal to r->k then l->k is older
-+	 * and should be dropped.
-+	 */
-+	return iter->used >= 2 &&
-+		!bkey_cmp_packed(iter->b,
-+				 iter->data[0].k,
-+				 iter->data[1].k);
-+}
-+
-+struct btree_nr_keys
-+bch2_key_sort_fix_overlapping(struct bch_fs *c, struct bset *dst,
-+			      struct sort_iter *iter)
-+{
-+	struct bkey_packed *out = dst->start;
-+	struct bkey_packed *k;
-+	struct btree_nr_keys nr;
-+
-+	memset(&nr, 0, sizeof(nr));
-+
-+	sort_iter_sort(iter, key_sort_fix_overlapping_cmp);
-+
-+	while ((k = sort_iter_peek(iter))) {
-+		if (!bkey_whiteout(k) &&
-+		    !should_drop_next_key(iter)) {
-+			bkey_copy(out, k);
-+			btree_keys_account_key_add(&nr, 0, out);
-+			out = bkey_next(out);
-+		}
-+
-+		sort_iter_advance(iter, key_sort_fix_overlapping_cmp);
-+	}
-+
-+	dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
-+	return nr;
-+}
-+
-+static void extent_sort_append(struct bch_fs *c,
-+			       struct bkey_format *f,
-+			       struct btree_nr_keys *nr,
-+			       struct bkey_packed **out,
-+			       struct bkey_s k)
-+{
-+	if (!bkey_whiteout(k.k)) {
-+		if (!bch2_bkey_pack_key(*out, k.k, f))
-+			memcpy_u64s_small(*out, k.k, BKEY_U64s);
-+
-+		memcpy_u64s_small(bkeyp_val(f, *out), k.v, bkey_val_u64s(k.k));
-+
-+		btree_keys_account_key_add(nr, 0, *out);
-+		*out = bkey_next(*out);
-+	}
-+}
-+
-+/* Sort + repack in a new format: */
-+struct btree_nr_keys
-+bch2_sort_repack(struct bset *dst, struct btree *src,
-+		 struct btree_node_iter *src_iter,
-+		 struct bkey_format *out_f,
-+		 bool filter_whiteouts)
-+{
-+	struct bkey_format *in_f = &src->format;
-+	struct bkey_packed *in, *out = vstruct_last(dst);
-+	struct btree_nr_keys nr;
-+
-+	memset(&nr, 0, sizeof(nr));
-+
-+	while ((in = bch2_btree_node_iter_next_all(src_iter, src))) {
-+		if (filter_whiteouts && bkey_whiteout(in))
-+			continue;
-+
-+		if (bch2_bkey_transform(out_f, out, bkey_packed(in)
-+				       ? in_f : &bch2_bkey_format_current, in))
-+			out->format = KEY_FORMAT_LOCAL_BTREE;
-+		else
-+			bch2_bkey_unpack(src, (void *) out, in);
-+
-+		btree_keys_account_key_add(&nr, 0, out);
-+		out = bkey_next(out);
-+	}
-+
-+	dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
-+	return nr;
-+}
-+
-+/* Sort, repack, and call bch2_bkey_normalize() to drop stale pointers: */
-+struct btree_nr_keys
-+bch2_sort_repack_merge(struct bch_fs *c,
-+		       struct bset *dst, struct btree *src,
-+		       struct btree_node_iter *iter,
-+		       struct bkey_format *out_f,
-+		       bool filter_whiteouts)
-+{
-+	struct bkey_packed *out = vstruct_last(dst), *k_packed;
-+	struct bkey_on_stack k;
-+	struct btree_nr_keys nr;
-+
-+	memset(&nr, 0, sizeof(nr));
-+	bkey_on_stack_init(&k);
-+
-+	while ((k_packed = bch2_btree_node_iter_next_all(iter, src))) {
-+		if (filter_whiteouts && bkey_whiteout(k_packed))
-+			continue;
-+
-+		/*
-+		 * NOTE:
-+		 * bch2_bkey_normalize may modify the key we pass it (dropping
-+		 * stale pointers) and we don't have a write lock on the src
-+		 * node; we have to make a copy of the entire key before calling
-+		 * normalize
-+		 */
-+		bkey_on_stack_realloc(&k, c, k_packed->u64s + BKEY_U64s);
-+		bch2_bkey_unpack(src, k.k, k_packed);
-+
-+		if (filter_whiteouts &&
-+		    bch2_bkey_normalize(c, bkey_i_to_s(k.k)))
-+			continue;
-+
-+		extent_sort_append(c, out_f, &nr, &out, bkey_i_to_s(k.k));
-+	}
-+
-+	dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
-+	bkey_on_stack_exit(&k, c);
-+	return nr;
-+}
-+
-+static inline int sort_keys_cmp(struct btree *b,
-+				struct bkey_packed *l,
-+				struct bkey_packed *r)
-+{
-+	return bkey_cmp_packed(b, l, r) ?:
-+		(int) bkey_deleted(r) - (int) bkey_deleted(l) ?:
-+		(int) l->needs_whiteout - (int) r->needs_whiteout;
-+}
-+
-+unsigned bch2_sort_keys(struct bkey_packed *dst,
-+			struct sort_iter *iter,
-+			bool filter_whiteouts)
-+{
-+	const struct bkey_format *f = &iter->b->format;
-+	struct bkey_packed *in, *next, *out = dst;
-+
-+	sort_iter_sort(iter, sort_keys_cmp);
-+
-+	while ((in = sort_iter_next(iter, sort_keys_cmp))) {
-+		bool needs_whiteout = false;
-+
-+		if (bkey_whiteout(in) &&
-+		    (filter_whiteouts || !in->needs_whiteout))
-+			continue;
-+
-+		while ((next = sort_iter_peek(iter)) &&
-+		       !bkey_cmp_packed(iter->b, in, next)) {
-+			BUG_ON(in->needs_whiteout &&
-+			       next->needs_whiteout);
-+			needs_whiteout |= in->needs_whiteout;
-+			in = sort_iter_next(iter, sort_keys_cmp);
-+		}
-+
-+		if (bkey_whiteout(in)) {
-+			memcpy_u64s(out, in, bkeyp_key_u64s(f, in));
-+			set_bkeyp_val_u64s(f, out, 0);
-+		} else {
-+			bkey_copy(out, in);
-+		}
-+		out->needs_whiteout |= needs_whiteout;
-+		out = bkey_next(out);
-+	}
-+
-+	return (u64 *) out - (u64 *) dst;
-+}
-+
-+/* Compat code for btree_node_old_extent_overwrite: */
-+
-+/*
-+ * If keys compare equal, compare by pointer order:
-+ *
-+ * Necessary for sort_fix_overlapping() - if there are multiple keys that
-+ * compare equal in different sets, we have to process them newest to oldest.
-+ */
-+static inline int extent_sort_fix_overlapping_cmp(struct btree *b,
-+						  struct bkey_packed *l,
-+						  struct bkey_packed *r)
-+{
-+	struct bkey ul = bkey_unpack_key(b, l);
-+	struct bkey ur = bkey_unpack_key(b, r);
-+
-+	return bkey_cmp(bkey_start_pos(&ul),
-+			bkey_start_pos(&ur)) ?:
-+		cmp_int((unsigned long) r, (unsigned long) l);
-+}
-+
-+/*
-+ * The algorithm in extent_sort_fix_overlapping() relies on keys in the same
-+ * bset being ordered by start offset - but 0 size whiteouts (which are always
-+ * KEY_TYPE_deleted) break this ordering, so we need to skip over them:
-+ */
-+static void extent_iter_advance(struct sort_iter *iter, unsigned idx)
-+{
-+	struct sort_iter_set *i = iter->data + idx;
-+
-+	do {
-+		i->k = bkey_next_skip_noops(i->k, i->end);
-+	} while (i->k != i->end && bkey_deleted(i->k));
-+
-+	if (i->k == i->end)
-+		array_remove_item(iter->data, iter->used, idx);
-+	else
-+		__sort_iter_sift(iter, idx, extent_sort_fix_overlapping_cmp);
-+}
-+
-+struct btree_nr_keys
-+bch2_extent_sort_fix_overlapping(struct bch_fs *c, struct bset *dst,
-+				 struct sort_iter *iter)
-+{
-+	struct btree *b = iter->b;
-+	struct bkey_format *f = &b->format;
-+	struct sort_iter_set *_l = iter->data, *_r = iter->data + 1;
-+	struct bkey_packed *out = dst->start;
-+	struct bkey l_unpacked, r_unpacked;
-+	struct bkey_s l, r;
-+	struct btree_nr_keys nr;
-+	struct bkey_on_stack split;
-+	unsigned i;
-+
-+	memset(&nr, 0, sizeof(nr));
-+	bkey_on_stack_init(&split);
-+
-+	sort_iter_sort(iter, extent_sort_fix_overlapping_cmp);
-+	for (i = 0; i < iter->used;) {
-+		if (bkey_deleted(iter->data[i].k))
-+			__sort_iter_advance(iter, i,
-+					    extent_sort_fix_overlapping_cmp);
-+		else
-+			i++;
-+	}
-+
-+	while (!sort_iter_end(iter)) {
-+		l = __bkey_disassemble(b, _l->k, &l_unpacked);
-+
-+		if (iter->used == 1) {
-+			extent_sort_append(c, f, &nr, &out, l);
-+			extent_iter_advance(iter, 0);
-+			continue;
-+		}
-+
-+		r = __bkey_disassemble(b, _r->k, &r_unpacked);
-+
-+		/* If current key and next key don't overlap, just append */
-+		if (bkey_cmp(l.k->p, bkey_start_pos(r.k)) <= 0) {
-+			extent_sort_append(c, f, &nr, &out, l);
-+			extent_iter_advance(iter, 0);
-+			continue;
-+		}
-+
-+		/* Skip 0 size keys */
-+		if (!r.k->size) {
-+			extent_iter_advance(iter, 1);
-+			continue;
-+		}
-+
-+		/*
-+		 * overlap: keep the newer key and trim the older key so they
-+		 * don't overlap. comparing pointers tells us which one is
-+		 * newer, since the bsets are appended one after the other.
-+		 */
-+
-+		/* can't happen because of comparison func */
-+		BUG_ON(_l->k < _r->k &&
-+		       !bkey_cmp(bkey_start_pos(l.k), bkey_start_pos(r.k)));
-+
-+		if (_l->k > _r->k) {
-+			/* l wins, trim r */
-+			if (bkey_cmp(l.k->p, r.k->p) >= 0) {
-+				extent_iter_advance(iter, 1);
-+			} else {
-+				bch2_cut_front_s(l.k->p, r);
-+				extent_save(b, _r->k, r.k);
-+				__sort_iter_sift(iter, 1,
-+					 extent_sort_fix_overlapping_cmp);
-+			}
-+		} else if (bkey_cmp(l.k->p, r.k->p) > 0) {
-+
-+			/*
-+			 * r wins, but it overlaps in the middle of l - split l:
-+			 */
-+			bkey_on_stack_reassemble(&split, c, l.s_c);
-+			bch2_cut_back(bkey_start_pos(r.k), split.k);
-+
-+			bch2_cut_front_s(r.k->p, l);
-+			extent_save(b, _l->k, l.k);
-+
-+			__sort_iter_sift(iter, 0,
-+					 extent_sort_fix_overlapping_cmp);
-+
-+			extent_sort_append(c, f, &nr, &out,
-+					   bkey_i_to_s(split.k));
-+		} else {
-+			bch2_cut_back_s(bkey_start_pos(r.k), l);
-+			extent_save(b, _l->k, l.k);
-+		}
-+	}
-+
-+	dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
-+
-+	bkey_on_stack_exit(&split, c);
-+	return nr;
-+}
-+
-+static inline int sort_extents_cmp(struct btree *b,
-+				   struct bkey_packed *l,
-+				   struct bkey_packed *r)
-+{
-+	return bkey_cmp_packed(b, l, r) ?:
-+		(int) bkey_deleted(l) - (int) bkey_deleted(r);
-+}
-+
-+unsigned bch2_sort_extents(struct bkey_packed *dst,
-+			   struct sort_iter *iter,
-+			   bool filter_whiteouts)
-+{
-+	struct bkey_packed *in, *out = dst;
-+
-+	sort_iter_sort(iter, sort_extents_cmp);
-+
-+	while ((in = sort_iter_next(iter, sort_extents_cmp))) {
-+		if (bkey_deleted(in))
-+			continue;
-+
-+		if (bkey_whiteout(in) &&
-+		    (filter_whiteouts || !in->needs_whiteout))
-+			continue;
-+
-+		bkey_copy(out, in);
-+		out = bkey_next(out);
-+	}
-+
-+	return (u64 *) out - (u64 *) dst;
-+}
-+
-+static inline int sort_extent_whiteouts_cmp(struct btree *b,
-+					    struct bkey_packed *l,
-+					    struct bkey_packed *r)
-+{
-+	struct bkey ul = bkey_unpack_key(b, l);
-+	struct bkey ur = bkey_unpack_key(b, r);
-+
-+	return bkey_cmp(bkey_start_pos(&ul), bkey_start_pos(&ur));
-+}
-+
-+unsigned bch2_sort_extent_whiteouts(struct bkey_packed *dst,
-+				    struct sort_iter *iter)
-+{
-+	const struct bkey_format *f = &iter->b->format;
-+	struct bkey_packed *in, *out = dst;
-+	struct bkey_i l, r;
-+	bool prev = false, l_packed = false;
-+	u64 max_packed_size	= bkey_field_max(f, BKEY_FIELD_SIZE);
-+	u64 max_packed_offset	= bkey_field_max(f, BKEY_FIELD_OFFSET);
-+	u64 new_size;
-+
-+	max_packed_size = min_t(u64, max_packed_size, KEY_SIZE_MAX);
-+
-+	sort_iter_sort(iter, sort_extent_whiteouts_cmp);
-+
-+	while ((in = sort_iter_next(iter, sort_extent_whiteouts_cmp))) {
-+		if (bkey_deleted(in))
-+			continue;
-+
-+		EBUG_ON(bkeyp_val_u64s(f, in));
-+		EBUG_ON(in->type != KEY_TYPE_discard);
-+
-+		r.k = bkey_unpack_key(iter->b, in);
-+
-+		if (prev &&
-+		    bkey_cmp(l.k.p, bkey_start_pos(&r.k)) >= 0) {
-+			if (bkey_cmp(l.k.p, r.k.p) >= 0)
-+				continue;
-+
-+			new_size = l_packed
-+				? min(max_packed_size, max_packed_offset -
-+				      bkey_start_offset(&l.k))
-+				: KEY_SIZE_MAX;
-+
-+			new_size = min(new_size, r.k.p.offset -
-+				       bkey_start_offset(&l.k));
-+
-+			BUG_ON(new_size < l.k.size);
-+
-+			bch2_key_resize(&l.k, new_size);
-+
-+			if (bkey_cmp(l.k.p, r.k.p) >= 0)
-+				continue;
-+
-+			bch2_cut_front(l.k.p, &r);
-+		}
-+
-+		if (prev) {
-+			if (!bch2_bkey_pack(out, &l, f)) {
-+				BUG_ON(l_packed);
-+				bkey_copy(out, &l);
-+			}
-+			out = bkey_next(out);
-+		}
-+
-+		l = r;
-+		prev = true;
-+		l_packed = bkey_packed(in);
-+	}
-+
-+	if (prev) {
-+		if (!bch2_bkey_pack(out, &l, f)) {
-+			BUG_ON(l_packed);
-+			bkey_copy(out, &l);
-+		}
-+		out = bkey_next(out);
-+	}
-+
-+	return (u64 *) out - (u64 *) dst;
-+}
-diff --git a/fs/bcachefs/bkey_sort.h b/fs/bcachefs/bkey_sort.h
-new file mode 100644
-index 000000000000..458a051fdac5
---- /dev/null
-+++ b/fs/bcachefs/bkey_sort.h
-@@ -0,0 +1,57 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BKEY_SORT_H
-+#define _BCACHEFS_BKEY_SORT_H
-+
-+struct sort_iter {
-+	struct btree		*b;
-+	unsigned		used;
-+	unsigned		size;
-+
-+	struct sort_iter_set {
-+		struct bkey_packed *k, *end;
-+	} data[MAX_BSETS + 1];
-+};
-+
-+static inline void sort_iter_init(struct sort_iter *iter, struct btree *b)
-+{
-+	iter->b = b;
-+	iter->used = 0;
-+	iter->size = ARRAY_SIZE(iter->data);
-+}
-+
-+static inline void sort_iter_add(struct sort_iter *iter,
-+				 struct bkey_packed *k,
-+				 struct bkey_packed *end)
-+{
-+	BUG_ON(iter->used >= iter->size);
-+
-+	if (k != end)
-+		iter->data[iter->used++] = (struct sort_iter_set) { k, end };
-+}
-+
-+struct btree_nr_keys
-+bch2_key_sort_fix_overlapping(struct bch_fs *, struct bset *,
-+			      struct sort_iter *);
-+struct btree_nr_keys
-+bch2_extent_sort_fix_overlapping(struct bch_fs *, struct bset *,
-+				 struct sort_iter *);
-+
-+struct btree_nr_keys
-+bch2_sort_repack(struct bset *, struct btree *,
-+		 struct btree_node_iter *,
-+		 struct bkey_format *, bool);
-+struct btree_nr_keys
-+bch2_sort_repack_merge(struct bch_fs *,
-+		       struct bset *, struct btree *,
-+		       struct btree_node_iter *,
-+		       struct bkey_format *, bool);
-+
-+unsigned bch2_sort_keys(struct bkey_packed *,
-+			struct sort_iter *, bool);
-+unsigned bch2_sort_extents(struct bkey_packed *,
-+			   struct sort_iter *, bool);
-+
-+unsigned bch2_sort_extent_whiteouts(struct bkey_packed *,
-+				    struct sort_iter *);
-+
-+#endif /* _BCACHEFS_BKEY_SORT_H */
-diff --git a/fs/bcachefs/bset.c b/fs/bcachefs/bset.c
-new file mode 100644
-index 000000000000..6fc91e6a35e8
---- /dev/null
-+++ b/fs/bcachefs/bset.c
-@@ -0,0 +1,1803 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Code for working with individual keys, and sorted sets of keys with in a
-+ * btree node
-+ *
-+ * Copyright 2012 Google, Inc.
-+ */
-+
-+#include "bcachefs.h"
-+#include "btree_cache.h"
-+#include "bset.h"
-+#include "eytzinger.h"
-+#include "util.h"
-+
-+#include <asm/unaligned.h>
-+#include <linux/console.h>
-+#include <linux/random.h>
-+#include <linux/prefetch.h>
-+
-+/* hack.. */
-+#include "alloc_types.h"
-+#include <trace/events/bcachefs.h>
-+
-+static inline void __bch2_btree_node_iter_advance(struct btree_node_iter *,
-+						  struct btree *);
-+
-+static inline unsigned __btree_node_iter_used(struct btree_node_iter *iter)
-+{
-+	unsigned n = ARRAY_SIZE(iter->data);
-+
-+	while (n && __btree_node_iter_set_end(iter, n - 1))
-+		--n;
-+
-+	return n;
-+}
-+
-+struct bset_tree *bch2_bkey_to_bset(struct btree *b, struct bkey_packed *k)
-+{
-+	unsigned offset = __btree_node_key_to_offset(b, k);
-+	struct bset_tree *t;
-+
-+	for_each_bset(b, t)
-+		if (offset <= t->end_offset) {
-+			EBUG_ON(offset < btree_bkey_first_offset(t));
-+			return t;
-+		}
-+
-+	BUG();
-+}
-+
-+/*
-+ * There are never duplicate live keys in the btree - but including keys that
-+ * have been flagged as deleted (and will be cleaned up later) we _will_ see
-+ * duplicates.
-+ *
-+ * Thus the sort order is: usual key comparison first, but for keys that compare
-+ * equal the deleted key(s) come first, and the (at most one) live version comes
-+ * last.
-+ *
-+ * The main reason for this is insertion: to handle overwrites, we first iterate
-+ * over keys that compare equal to our insert key, and then insert immediately
-+ * prior to the first key greater than the key we're inserting - our insert
-+ * position will be after all keys that compare equal to our insert key, which
-+ * by the time we actually do the insert will all be deleted.
-+ */
-+
-+void bch2_dump_bset(struct bch_fs *c, struct btree *b,
-+		    struct bset *i, unsigned set)
-+{
-+	struct bkey_packed *_k, *_n;
-+	struct bkey uk, n;
-+	struct bkey_s_c k;
-+	char buf[200];
-+
-+	if (!i->u64s)
-+		return;
-+
-+	for (_k = i->start;
-+	     _k < vstruct_last(i);
-+	     _k = _n) {
-+		_n = bkey_next_skip_noops(_k, vstruct_last(i));
-+
-+		k = bkey_disassemble(b, _k, &uk);
-+		if (c)
-+			bch2_bkey_val_to_text(&PBUF(buf), c, k);
-+		else
-+			bch2_bkey_to_text(&PBUF(buf), k.k);
-+		printk(KERN_ERR "block %u key %5zu: %s\n", set,
-+		       _k->_data - i->_data, buf);
-+
-+		if (_n == vstruct_last(i))
-+			continue;
-+
-+		n = bkey_unpack_key(b, _n);
-+
-+		if (bkey_cmp(bkey_start_pos(&n), k.k->p) < 0) {
-+			printk(KERN_ERR "Key skipped backwards\n");
-+			continue;
-+		}
-+
-+		if (!bkey_deleted(k.k) &&
-+		    !bkey_cmp(n.p, k.k->p))
-+			printk(KERN_ERR "Duplicate keys\n");
-+	}
-+}
-+
-+void bch2_dump_btree_node(struct bch_fs *c, struct btree *b)
-+{
-+	struct bset_tree *t;
-+
-+	console_lock();
-+	for_each_bset(b, t)
-+		bch2_dump_bset(c, b, bset(b, t), t - b->set);
-+	console_unlock();
-+}
-+
-+void bch2_dump_btree_node_iter(struct btree *b,
-+			      struct btree_node_iter *iter)
-+{
-+	struct btree_node_iter_set *set;
-+
-+	printk(KERN_ERR "btree node iter with %u/%u sets:\n",
-+	       __btree_node_iter_used(iter), b->nsets);
-+
-+	btree_node_iter_for_each(iter, set) {
-+		struct bkey_packed *k = __btree_node_offset_to_key(b, set->k);
-+		struct bset_tree *t = bch2_bkey_to_bset(b, k);
-+		struct bkey uk = bkey_unpack_key(b, k);
-+		char buf[100];
-+
-+		bch2_bkey_to_text(&PBUF(buf), &uk);
-+		printk(KERN_ERR "set %zu key %u: %s\n",
-+		       t - b->set, set->k, buf);
-+	}
-+}
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+
-+void __bch2_verify_btree_nr_keys(struct btree *b)
-+{
-+	struct bset_tree *t;
-+	struct bkey_packed *k;
-+	struct btree_nr_keys nr = { 0 };
-+
-+	for_each_bset(b, t)
-+		bset_tree_for_each_key(b, t, k)
-+			if (!bkey_whiteout(k))
-+				btree_keys_account_key_add(&nr, t - b->set, k);
-+
-+	BUG_ON(memcmp(&nr, &b->nr, sizeof(nr)));
-+}
-+
-+static void bch2_btree_node_iter_next_check(struct btree_node_iter *_iter,
-+					    struct btree *b)
-+{
-+	struct btree_node_iter iter = *_iter;
-+	const struct bkey_packed *k, *n;
-+
-+	k = bch2_btree_node_iter_peek_all(&iter, b);
-+	__bch2_btree_node_iter_advance(&iter, b);
-+	n = bch2_btree_node_iter_peek_all(&iter, b);
-+
-+	bkey_unpack_key(b, k);
-+
-+	if (n &&
-+	    bkey_iter_cmp(b, k, n) > 0) {
-+		struct btree_node_iter_set *set;
-+		struct bkey ku = bkey_unpack_key(b, k);
-+		struct bkey nu = bkey_unpack_key(b, n);
-+		char buf1[80], buf2[80];
-+
-+		bch2_dump_btree_node(NULL, b);
-+		bch2_bkey_to_text(&PBUF(buf1), &ku);
-+		bch2_bkey_to_text(&PBUF(buf2), &nu);
-+		printk(KERN_ERR "out of order/overlapping:\n%s\n%s\n",
-+		       buf1, buf2);
-+		printk(KERN_ERR "iter was:");
-+
-+		btree_node_iter_for_each(_iter, set) {
-+			struct bkey_packed *k = __btree_node_offset_to_key(b, set->k);
-+			struct bset_tree *t = bch2_bkey_to_bset(b, k);
-+			printk(" [%zi %zi]", t - b->set,
-+			       k->_data - bset(b, t)->_data);
-+		}
-+		panic("\n");
-+	}
-+}
-+
-+void bch2_btree_node_iter_verify(struct btree_node_iter *iter,
-+				 struct btree *b)
-+{
-+	struct btree_node_iter_set *set, *s2;
-+	struct bkey_packed *k, *p;
-+	struct bset_tree *t;
-+
-+	if (bch2_btree_node_iter_end(iter))
-+		return;
-+
-+	/* Verify no duplicates: */
-+	btree_node_iter_for_each(iter, set)
-+		btree_node_iter_for_each(iter, s2)
-+			BUG_ON(set != s2 && set->end == s2->end);
-+
-+	/* Verify that set->end is correct: */
-+	btree_node_iter_for_each(iter, set) {
-+		for_each_bset(b, t)
-+			if (set->end == t->end_offset)
-+				goto found;
-+		BUG();
-+found:
-+		BUG_ON(set->k < btree_bkey_first_offset(t) ||
-+		       set->k >= t->end_offset);
-+	}
-+
-+	/* Verify iterator is sorted: */
-+	btree_node_iter_for_each(iter, set)
-+		BUG_ON(set != iter->data &&
-+		       btree_node_iter_cmp(b, set[-1], set[0]) > 0);
-+
-+	k = bch2_btree_node_iter_peek_all(iter, b);
-+
-+	for_each_bset(b, t) {
-+		if (iter->data[0].end == t->end_offset)
-+			continue;
-+
-+		p = bch2_bkey_prev_all(b, t,
-+			bch2_btree_node_iter_bset_pos(iter, b, t));
-+
-+		BUG_ON(p && bkey_iter_cmp(b, k, p) < 0);
-+	}
-+}
-+
-+void bch2_verify_insert_pos(struct btree *b, struct bkey_packed *where,
-+			    struct bkey_packed *insert, unsigned clobber_u64s)
-+{
-+	struct bset_tree *t = bch2_bkey_to_bset(b, where);
-+	struct bkey_packed *prev = bch2_bkey_prev_all(b, t, where);
-+	struct bkey_packed *next = (void *) (where->_data + clobber_u64s);
-+#if 0
-+	BUG_ON(prev &&
-+	       bkey_iter_cmp(b, prev, insert) > 0);
-+#else
-+	if (prev &&
-+	    bkey_iter_cmp(b, prev, insert) > 0) {
-+		struct bkey k1 = bkey_unpack_key(b, prev);
-+		struct bkey k2 = bkey_unpack_key(b, insert);
-+		char buf1[100];
-+		char buf2[100];
-+
-+		bch2_dump_btree_node(NULL, b);
-+		bch2_bkey_to_text(&PBUF(buf1), &k1);
-+		bch2_bkey_to_text(&PBUF(buf2), &k2);
-+
-+		panic("prev > insert:\n"
-+		      "prev    key %s\n"
-+		      "insert  key %s\n",
-+		      buf1, buf2);
-+	}
-+#endif
-+#if 0
-+	BUG_ON(next != btree_bkey_last(b, t) &&
-+	       bkey_iter_cmp(b, insert, next) > 0);
-+#else
-+	if (next != btree_bkey_last(b, t) &&
-+	    bkey_iter_cmp(b, insert, next) > 0) {
-+		struct bkey k1 = bkey_unpack_key(b, insert);
-+		struct bkey k2 = bkey_unpack_key(b, next);
-+		char buf1[100];
-+		char buf2[100];
-+
-+		bch2_dump_btree_node(NULL, b);
-+		bch2_bkey_to_text(&PBUF(buf1), &k1);
-+		bch2_bkey_to_text(&PBUF(buf2), &k2);
-+
-+		panic("insert > next:\n"
-+		      "insert  key %s\n"
-+		      "next    key %s\n",
-+		      buf1, buf2);
-+	}
-+#endif
-+}
-+
-+#else
-+
-+static inline void bch2_btree_node_iter_next_check(struct btree_node_iter *iter,
-+						   struct btree *b) {}
-+
-+#endif
-+
-+/* Auxiliary search trees */
-+
-+#define BFLOAT_FAILED_UNPACKED	U8_MAX
-+#define BFLOAT_FAILED		U8_MAX
-+
-+struct bkey_float {
-+	u8		exponent;
-+	u8		key_offset;
-+	u16		mantissa;
-+};
-+#define BKEY_MANTISSA_BITS	16
-+
-+static unsigned bkey_float_byte_offset(unsigned idx)
-+{
-+	return idx * sizeof(struct bkey_float);
-+}
-+
-+struct ro_aux_tree {
-+	struct bkey_float	f[0];
-+};
-+
-+struct rw_aux_tree {
-+	u16		offset;
-+	struct bpos	k;
-+};
-+
-+/*
-+ * BSET_CACHELINE was originally intended to match the hardware cacheline size -
-+ * it used to be 64, but I realized the lookup code would touch slightly less
-+ * memory if it was 128.
-+ *
-+ * It definites the number of bytes (in struct bset) per struct bkey_float in
-+ * the auxiliar search tree - when we're done searching the bset_float tree we
-+ * have this many bytes left that we do a linear search over.
-+ *
-+ * Since (after level 5) every level of the bset_tree is on a new cacheline,
-+ * we're touching one fewer cacheline in the bset tree in exchange for one more
-+ * cacheline in the linear search - but the linear search might stop before it
-+ * gets to the second cacheline.
-+ */
-+
-+#define BSET_CACHELINE		128
-+
-+/* Space required for the btree node keys */
-+static inline size_t btree_keys_bytes(struct btree *b)
-+{
-+	return PAGE_SIZE << b->page_order;
-+}
-+
-+static inline size_t btree_keys_cachelines(struct btree *b)
-+{
-+	return btree_keys_bytes(b) / BSET_CACHELINE;
-+}
-+
-+static inline size_t btree_aux_data_bytes(struct btree *b)
-+{
-+	return btree_keys_cachelines(b) * 8;
-+}
-+
-+static inline size_t btree_aux_data_u64s(struct btree *b)
-+{
-+	return btree_aux_data_bytes(b) / sizeof(u64);
-+}
-+
-+static unsigned bset_aux_tree_buf_end(const struct bset_tree *t)
-+{
-+	BUG_ON(t->aux_data_offset == U16_MAX);
-+
-+	switch (bset_aux_tree_type(t)) {
-+	case BSET_NO_AUX_TREE:
-+		return t->aux_data_offset;
-+	case BSET_RO_AUX_TREE:
-+		return t->aux_data_offset +
-+			DIV_ROUND_UP(t->size * sizeof(struct bkey_float) +
-+				     t->size * sizeof(u8), 8);
-+	case BSET_RW_AUX_TREE:
-+		return t->aux_data_offset +
-+			DIV_ROUND_UP(sizeof(struct rw_aux_tree) * t->size, 8);
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static unsigned bset_aux_tree_buf_start(const struct btree *b,
-+					const struct bset_tree *t)
-+{
-+	return t == b->set
-+		? DIV_ROUND_UP(b->unpack_fn_len, 8)
-+		: bset_aux_tree_buf_end(t - 1);
-+}
-+
-+static void *__aux_tree_base(const struct btree *b,
-+			     const struct bset_tree *t)
-+{
-+	return b->aux_data + t->aux_data_offset * 8;
-+}
-+
-+static struct ro_aux_tree *ro_aux_tree_base(const struct btree *b,
-+					    const struct bset_tree *t)
-+{
-+	EBUG_ON(bset_aux_tree_type(t) != BSET_RO_AUX_TREE);
-+
-+	return __aux_tree_base(b, t);
-+}
-+
-+static u8 *ro_aux_tree_prev(const struct btree *b,
-+			    const struct bset_tree *t)
-+{
-+	EBUG_ON(bset_aux_tree_type(t) != BSET_RO_AUX_TREE);
-+
-+	return __aux_tree_base(b, t) + bkey_float_byte_offset(t->size);
-+}
-+
-+static struct bkey_float *bkey_float(const struct btree *b,
-+				     const struct bset_tree *t,
-+				     unsigned idx)
-+{
-+	return ro_aux_tree_base(b, t)->f + idx;
-+}
-+
-+static void bset_aux_tree_verify(struct btree *b)
-+{
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	struct bset_tree *t;
-+
-+	for_each_bset(b, t) {
-+		if (t->aux_data_offset == U16_MAX)
-+			continue;
-+
-+		BUG_ON(t != b->set &&
-+		       t[-1].aux_data_offset == U16_MAX);
-+
-+		BUG_ON(t->aux_data_offset < bset_aux_tree_buf_start(b, t));
-+		BUG_ON(t->aux_data_offset > btree_aux_data_u64s(b));
-+		BUG_ON(bset_aux_tree_buf_end(t) > btree_aux_data_u64s(b));
-+	}
-+#endif
-+}
-+
-+/* Memory allocation */
-+
-+void bch2_btree_keys_free(struct btree *b)
-+{
-+	vfree(b->aux_data);
-+	b->aux_data = NULL;
-+}
-+
-+#ifndef PAGE_KERNEL_EXEC
-+# define PAGE_KERNEL_EXEC PAGE_KERNEL
-+#endif
-+
-+int bch2_btree_keys_alloc(struct btree *b, unsigned page_order, gfp_t gfp)
-+{
-+	b->page_order	= page_order;
-+	b->aux_data	= __vmalloc(btree_aux_data_bytes(b), gfp,
-+				    PAGE_KERNEL_EXEC);
-+	if (!b->aux_data)
-+		return -ENOMEM;
-+
-+	return 0;
-+}
-+
-+void bch2_btree_keys_init(struct btree *b, bool *expensive_debug_checks)
-+{
-+	unsigned i;
-+
-+	b->nsets		= 0;
-+	memset(&b->nr, 0, sizeof(b->nr));
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	b->expensive_debug_checks = expensive_debug_checks;
-+#endif
-+	for (i = 0; i < MAX_BSETS; i++)
-+		b->set[i].data_offset = U16_MAX;
-+
-+	bch2_bset_set_no_aux_tree(b, b->set);
-+}
-+
-+/* Binary tree stuff for auxiliary search trees */
-+
-+/*
-+ * Cacheline/offset <-> bkey pointer arithmetic:
-+ *
-+ * t->tree is a binary search tree in an array; each node corresponds to a key
-+ * in one cacheline in t->set (BSET_CACHELINE bytes).
-+ *
-+ * This means we don't have to store the full index of the key that a node in
-+ * the binary tree points to; eytzinger1_to_inorder() gives us the cacheline, and
-+ * then bkey_float->m gives us the offset within that cacheline, in units of 8
-+ * bytes.
-+ *
-+ * cacheline_to_bkey() and friends abstract out all the pointer arithmetic to
-+ * make this work.
-+ *
-+ * To construct the bfloat for an arbitrary key we need to know what the key
-+ * immediately preceding it is: we have to check if the two keys differ in the
-+ * bits we're going to store in bkey_float->mantissa. t->prev[j] stores the size
-+ * of the previous key so we can walk backwards to it from t->tree[j]'s key.
-+ */
-+
-+static inline void *bset_cacheline(const struct btree *b,
-+				   const struct bset_tree *t,
-+				   unsigned cacheline)
-+{
-+	return (void *) round_down((unsigned long) btree_bkey_first(b, t),
-+				   L1_CACHE_BYTES) +
-+		cacheline * BSET_CACHELINE;
-+}
-+
-+static struct bkey_packed *cacheline_to_bkey(const struct btree *b,
-+					     const struct bset_tree *t,
-+					     unsigned cacheline,
-+					     unsigned offset)
-+{
-+	return bset_cacheline(b, t, cacheline) + offset * 8;
-+}
-+
-+static unsigned bkey_to_cacheline(const struct btree *b,
-+				  const struct bset_tree *t,
-+				  const struct bkey_packed *k)
-+{
-+	return ((void *) k - bset_cacheline(b, t, 0)) / BSET_CACHELINE;
-+}
-+
-+static ssize_t __bkey_to_cacheline_offset(const struct btree *b,
-+					  const struct bset_tree *t,
-+					  unsigned cacheline,
-+					  const struct bkey_packed *k)
-+{
-+	return (u64 *) k - (u64 *) bset_cacheline(b, t, cacheline);
-+}
-+
-+static unsigned bkey_to_cacheline_offset(const struct btree *b,
-+					 const struct bset_tree *t,
-+					 unsigned cacheline,
-+					 const struct bkey_packed *k)
-+{
-+	size_t m = __bkey_to_cacheline_offset(b, t, cacheline, k);
-+
-+	EBUG_ON(m > U8_MAX);
-+	return m;
-+}
-+
-+static inline struct bkey_packed *tree_to_bkey(const struct btree *b,
-+					       const struct bset_tree *t,
-+					       unsigned j)
-+{
-+	return cacheline_to_bkey(b, t,
-+			__eytzinger1_to_inorder(j, t->size, t->extra),
-+			bkey_float(b, t, j)->key_offset);
-+}
-+
-+static struct bkey_packed *tree_to_prev_bkey(const struct btree *b,
-+					     const struct bset_tree *t,
-+					     unsigned j)
-+{
-+	unsigned prev_u64s = ro_aux_tree_prev(b, t)[j];
-+
-+	return (void *) (tree_to_bkey(b, t, j)->_data - prev_u64s);
-+}
-+
-+static struct rw_aux_tree *rw_aux_tree(const struct btree *b,
-+				       const struct bset_tree *t)
-+{
-+	EBUG_ON(bset_aux_tree_type(t) != BSET_RW_AUX_TREE);
-+
-+	return __aux_tree_base(b, t);
-+}
-+
-+/*
-+ * For the write set - the one we're currently inserting keys into - we don't
-+ * maintain a full search tree, we just keep a simple lookup table in t->prev.
-+ */
-+static struct bkey_packed *rw_aux_to_bkey(const struct btree *b,
-+					  struct bset_tree *t,
-+					  unsigned j)
-+{
-+	return __btree_node_offset_to_key(b, rw_aux_tree(b, t)[j].offset);
-+}
-+
-+static void rw_aux_tree_set(const struct btree *b, struct bset_tree *t,
-+			    unsigned j, struct bkey_packed *k)
-+{
-+	EBUG_ON(k >= btree_bkey_last(b, t));
-+
-+	rw_aux_tree(b, t)[j] = (struct rw_aux_tree) {
-+		.offset	= __btree_node_key_to_offset(b, k),
-+		.k	= bkey_unpack_pos(b, k),
-+	};
-+}
-+
-+static void bch2_bset_verify_rw_aux_tree(struct btree *b,
-+					struct bset_tree *t)
-+{
-+	struct bkey_packed *k = btree_bkey_first(b, t);
-+	unsigned j = 0;
-+
-+	if (!btree_keys_expensive_checks(b))
-+		return;
-+
-+	BUG_ON(bset_has_ro_aux_tree(t));
-+
-+	if (!bset_has_rw_aux_tree(t))
-+		return;
-+
-+	BUG_ON(t->size < 1);
-+	BUG_ON(rw_aux_to_bkey(b, t, j) != k);
-+
-+	goto start;
-+	while (1) {
-+		if (rw_aux_to_bkey(b, t, j) == k) {
-+			BUG_ON(bkey_cmp(rw_aux_tree(b, t)[j].k,
-+					bkey_unpack_pos(b, k)));
-+start:
-+			if (++j == t->size)
-+				break;
-+
-+			BUG_ON(rw_aux_tree(b, t)[j].offset <=
-+			       rw_aux_tree(b, t)[j - 1].offset);
-+		}
-+
-+		k = bkey_next_skip_noops(k, btree_bkey_last(b, t));
-+		BUG_ON(k >= btree_bkey_last(b, t));
-+	}
-+}
-+
-+/* returns idx of first entry >= offset: */
-+static unsigned rw_aux_tree_bsearch(struct btree *b,
-+				    struct bset_tree *t,
-+				    unsigned offset)
-+{
-+	unsigned bset_offs = offset - btree_bkey_first_offset(t);
-+	unsigned bset_u64s = t->end_offset - btree_bkey_first_offset(t);
-+	unsigned idx = bset_u64s ? bset_offs * t->size / bset_u64s : 0;
-+
-+	EBUG_ON(bset_aux_tree_type(t) != BSET_RW_AUX_TREE);
-+	EBUG_ON(!t->size);
-+	EBUG_ON(idx > t->size);
-+
-+	while (idx < t->size &&
-+	       rw_aux_tree(b, t)[idx].offset < offset)
-+		idx++;
-+
-+	while (idx &&
-+	       rw_aux_tree(b, t)[idx - 1].offset >= offset)
-+		idx--;
-+
-+	EBUG_ON(idx < t->size &&
-+		rw_aux_tree(b, t)[idx].offset < offset);
-+	EBUG_ON(idx && rw_aux_tree(b, t)[idx - 1].offset >= offset);
-+	EBUG_ON(idx + 1 < t->size &&
-+		rw_aux_tree(b, t)[idx].offset ==
-+		rw_aux_tree(b, t)[idx + 1].offset);
-+
-+	return idx;
-+}
-+
-+static inline unsigned bkey_mantissa(const struct bkey_packed *k,
-+				     const struct bkey_float *f,
-+				     unsigned idx)
-+{
-+	u64 v;
-+
-+	EBUG_ON(!bkey_packed(k));
-+
-+	v = get_unaligned((u64 *) (((u8 *) k->_data) + (f->exponent >> 3)));
-+
-+	/*
-+	 * In little endian, we're shifting off low bits (and then the bits we
-+	 * want are at the low end), in big endian we're shifting off high bits
-+	 * (and then the bits we want are at the high end, so we shift them
-+	 * back down):
-+	 */
-+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-+	v >>= f->exponent & 7;
-+#else
-+	v >>= 64 - (f->exponent & 7) - BKEY_MANTISSA_BITS;
-+#endif
-+	return (u16) v;
-+}
-+
-+static void make_bfloat(struct btree *b, struct bset_tree *t,
-+			unsigned j,
-+			struct bkey_packed *min_key,
-+			struct bkey_packed *max_key)
-+{
-+	struct bkey_float *f = bkey_float(b, t, j);
-+	struct bkey_packed *m = tree_to_bkey(b, t, j);
-+	struct bkey_packed *l, *r;
-+	unsigned mantissa;
-+	int shift, exponent, high_bit;
-+
-+	if (is_power_of_2(j)) {
-+		l = min_key;
-+
-+		if (!l->u64s) {
-+			if (!bkey_pack_pos(l, b->data->min_key, b)) {
-+				struct bkey_i tmp;
-+
-+				bkey_init(&tmp.k);
-+				tmp.k.p = b->data->min_key;
-+				bkey_copy(l, &tmp);
-+			}
-+		}
-+	} else {
-+		l = tree_to_prev_bkey(b, t, j >> ffs(j));
-+
-+		EBUG_ON(m < l);
-+	}
-+
-+	if (is_power_of_2(j + 1)) {
-+		r = max_key;
-+
-+		if (!r->u64s) {
-+			if (!bkey_pack_pos(r, t->max_key, b)) {
-+				struct bkey_i tmp;
-+
-+				bkey_init(&tmp.k);
-+				tmp.k.p = t->max_key;
-+				bkey_copy(r, &tmp);
-+			}
-+		}
-+	} else {
-+		r = tree_to_bkey(b, t, j >> (ffz(j) + 1));
-+
-+		EBUG_ON(m > r);
-+	}
-+
-+	/*
-+	 * for failed bfloats, the lookup code falls back to comparing against
-+	 * the original key.
-+	 */
-+
-+	if (!bkey_packed(l) || !bkey_packed(r) || !bkey_packed(m) ||
-+	    !b->nr_key_bits) {
-+		f->exponent = BFLOAT_FAILED_UNPACKED;
-+		return;
-+	}
-+
-+	/*
-+	 * The greatest differing bit of l and r is the first bit we must
-+	 * include in the bfloat mantissa we're creating in order to do
-+	 * comparisons - that bit always becomes the high bit of
-+	 * bfloat->mantissa, and thus the exponent we're calculating here is
-+	 * the position of what will become the low bit in bfloat->mantissa:
-+	 *
-+	 * Note that this may be negative - we may be running off the low end
-+	 * of the key: we handle this later:
-+	 */
-+	high_bit = max(bch2_bkey_greatest_differing_bit(b, l, r),
-+		       min_t(unsigned, BKEY_MANTISSA_BITS, b->nr_key_bits) - 1);
-+	exponent = high_bit - (BKEY_MANTISSA_BITS - 1);
-+
-+	/*
-+	 * Then we calculate the actual shift value, from the start of the key
-+	 * (k->_data), to get the key bits starting at exponent:
-+	 */
-+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-+	shift = (int) (b->format.key_u64s * 64 - b->nr_key_bits) + exponent;
-+
-+	EBUG_ON(shift + BKEY_MANTISSA_BITS > b->format.key_u64s * 64);
-+#else
-+	shift = high_bit_offset +
-+		b->nr_key_bits -
-+		exponent -
-+		BKEY_MANTISSA_BITS;
-+
-+	EBUG_ON(shift < KEY_PACKED_BITS_START);
-+#endif
-+	EBUG_ON(shift < 0 || shift >= BFLOAT_FAILED);
-+
-+	f->exponent = shift;
-+	mantissa = bkey_mantissa(m, f, j);
-+
-+	/*
-+	 * If we've got garbage bits, set them to all 1s - it's legal for the
-+	 * bfloat to compare larger than the original key, but not smaller:
-+	 */
-+	if (exponent < 0)
-+		mantissa |= ~(~0U << -exponent);
-+
-+	f->mantissa = mantissa;
-+}
-+
-+/* bytes remaining - only valid for last bset: */
-+static unsigned __bset_tree_capacity(struct btree *b, struct bset_tree *t)
-+{
-+	bset_aux_tree_verify(b);
-+
-+	return btree_aux_data_bytes(b) - t->aux_data_offset * sizeof(u64);
-+}
-+
-+static unsigned bset_ro_tree_capacity(struct btree *b, struct bset_tree *t)
-+{
-+	return __bset_tree_capacity(b, t) /
-+		(sizeof(struct bkey_float) + sizeof(u8));
-+}
-+
-+static unsigned bset_rw_tree_capacity(struct btree *b, struct bset_tree *t)
-+{
-+	return __bset_tree_capacity(b, t) / sizeof(struct rw_aux_tree);
-+}
-+
-+static void __build_rw_aux_tree(struct btree *b, struct bset_tree *t)
-+{
-+	struct bkey_packed *k;
-+
-+	t->size = 1;
-+	t->extra = BSET_RW_AUX_TREE_VAL;
-+	rw_aux_tree(b, t)[0].offset =
-+		__btree_node_key_to_offset(b, btree_bkey_first(b, t));
-+
-+	bset_tree_for_each_key(b, t, k) {
-+		if (t->size == bset_rw_tree_capacity(b, t))
-+			break;
-+
-+		if ((void *) k - (void *) rw_aux_to_bkey(b, t, t->size - 1) >
-+		    L1_CACHE_BYTES)
-+			rw_aux_tree_set(b, t, t->size++, k);
-+	}
-+}
-+
-+static void __build_ro_aux_tree(struct btree *b, struct bset_tree *t)
-+{
-+	struct bkey_packed *prev = NULL, *k = btree_bkey_first(b, t);
-+	struct bkey_packed min_key, max_key;
-+	unsigned j, cacheline = 1;
-+
-+	/* signal to make_bfloat() that they're uninitialized: */
-+	min_key.u64s = max_key.u64s = 0;
-+
-+	t->size = min(bkey_to_cacheline(b, t, btree_bkey_last(b, t)),
-+		      bset_ro_tree_capacity(b, t));
-+retry:
-+	if (t->size < 2) {
-+		t->size = 0;
-+		t->extra = BSET_NO_AUX_TREE_VAL;
-+		return;
-+	}
-+
-+	t->extra = (t->size - rounddown_pow_of_two(t->size - 1)) << 1;
-+
-+	/* First we figure out where the first key in each cacheline is */
-+	eytzinger1_for_each(j, t->size) {
-+		while (bkey_to_cacheline(b, t, k) < cacheline)
-+			prev = k, k = bkey_next_skip_noops(k, btree_bkey_last(b, t));
-+
-+		if (k >= btree_bkey_last(b, t)) {
-+			/* XXX: this path sucks */
-+			t->size--;
-+			goto retry;
-+		}
-+
-+		ro_aux_tree_prev(b, t)[j] = prev->u64s;
-+		bkey_float(b, t, j)->key_offset =
-+			bkey_to_cacheline_offset(b, t, cacheline++, k);
-+
-+		EBUG_ON(tree_to_prev_bkey(b, t, j) != prev);
-+		EBUG_ON(tree_to_bkey(b, t, j) != k);
-+	}
-+
-+	while (k != btree_bkey_last(b, t))
-+		prev = k, k = bkey_next_skip_noops(k, btree_bkey_last(b, t));
-+
-+	t->max_key = bkey_unpack_pos(b, prev);
-+
-+	/* Then we build the tree */
-+	eytzinger1_for_each(j, t->size)
-+		make_bfloat(b, t, j, &min_key, &max_key);
-+}
-+
-+static void bset_alloc_tree(struct btree *b, struct bset_tree *t)
-+{
-+	struct bset_tree *i;
-+
-+	for (i = b->set; i != t; i++)
-+		BUG_ON(bset_has_rw_aux_tree(i));
-+
-+	bch2_bset_set_no_aux_tree(b, t);
-+
-+	/* round up to next cacheline: */
-+	t->aux_data_offset = round_up(bset_aux_tree_buf_start(b, t),
-+				      SMP_CACHE_BYTES / sizeof(u64));
-+
-+	bset_aux_tree_verify(b);
-+}
-+
-+void bch2_bset_build_aux_tree(struct btree *b, struct bset_tree *t,
-+			     bool writeable)
-+{
-+	if (writeable
-+	    ? bset_has_rw_aux_tree(t)
-+	    : bset_has_ro_aux_tree(t))
-+		return;
-+
-+	bset_alloc_tree(b, t);
-+
-+	if (!__bset_tree_capacity(b, t))
-+		return;
-+
-+	if (writeable)
-+		__build_rw_aux_tree(b, t);
-+	else
-+		__build_ro_aux_tree(b, t);
-+
-+	bset_aux_tree_verify(b);
-+}
-+
-+void bch2_bset_init_first(struct btree *b, struct bset *i)
-+{
-+	struct bset_tree *t;
-+
-+	BUG_ON(b->nsets);
-+
-+	memset(i, 0, sizeof(*i));
-+	get_random_bytes(&i->seq, sizeof(i->seq));
-+	SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN);
-+
-+	t = &b->set[b->nsets++];
-+	set_btree_bset(b, t, i);
-+}
-+
-+void bch2_bset_init_next(struct bch_fs *c, struct btree *b,
-+			 struct btree_node_entry *bne)
-+{
-+	struct bset *i = &bne->keys;
-+	struct bset_tree *t;
-+
-+	BUG_ON(bset_byte_offset(b, bne) >= btree_bytes(c));
-+	BUG_ON((void *) bne < (void *) btree_bkey_last(b, bset_tree_last(b)));
-+	BUG_ON(b->nsets >= MAX_BSETS);
-+
-+	memset(i, 0, sizeof(*i));
-+	i->seq = btree_bset_first(b)->seq;
-+	SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN);
-+
-+	t = &b->set[b->nsets++];
-+	set_btree_bset(b, t, i);
-+}
-+
-+/*
-+ * find _some_ key in the same bset as @k that precedes @k - not necessarily the
-+ * immediate predecessor:
-+ */
-+static struct bkey_packed *__bkey_prev(struct btree *b, struct bset_tree *t,
-+				       struct bkey_packed *k)
-+{
-+	struct bkey_packed *p;
-+	unsigned offset;
-+	int j;
-+
-+	EBUG_ON(k < btree_bkey_first(b, t) ||
-+		k > btree_bkey_last(b, t));
-+
-+	if (k == btree_bkey_first(b, t))
-+		return NULL;
-+
-+	switch (bset_aux_tree_type(t)) {
-+	case BSET_NO_AUX_TREE:
-+		p = btree_bkey_first(b, t);
-+		break;
-+	case BSET_RO_AUX_TREE:
-+		j = min_t(unsigned, t->size - 1, bkey_to_cacheline(b, t, k));
-+
-+		do {
-+			p = j ? tree_to_bkey(b, t,
-+					__inorder_to_eytzinger1(j--,
-+							t->size, t->extra))
-+			      : btree_bkey_first(b, t);
-+		} while (p >= k);
-+		break;
-+	case BSET_RW_AUX_TREE:
-+		offset = __btree_node_key_to_offset(b, k);
-+		j = rw_aux_tree_bsearch(b, t, offset);
-+		p = j ? rw_aux_to_bkey(b, t, j - 1)
-+		      : btree_bkey_first(b, t);
-+		break;
-+	}
-+
-+	return p;
-+}
-+
-+struct bkey_packed *bch2_bkey_prev_filter(struct btree *b,
-+					  struct bset_tree *t,
-+					  struct bkey_packed *k,
-+					  unsigned min_key_type)
-+{
-+	struct bkey_packed *p, *i, *ret = NULL, *orig_k = k;
-+
-+	while ((p = __bkey_prev(b, t, k)) && !ret) {
-+		for (i = p; i != k; i = bkey_next_skip_noops(i, k))
-+			if (i->type >= min_key_type)
-+				ret = i;
-+
-+		k = p;
-+	}
-+
-+	if (btree_keys_expensive_checks(b)) {
-+		BUG_ON(ret >= orig_k);
-+
-+		for (i = ret
-+			? bkey_next_skip_noops(ret, orig_k)
-+			: btree_bkey_first(b, t);
-+		     i != orig_k;
-+		     i = bkey_next_skip_noops(i, orig_k))
-+			BUG_ON(i->type >= min_key_type);
-+	}
-+
-+	return ret;
-+}
-+
-+/* Insert */
-+
-+static void rw_aux_tree_fix_invalidated_key(struct btree *b,
-+					    struct bset_tree *t,
-+					    struct bkey_packed *k)
-+{
-+	unsigned offset = __btree_node_key_to_offset(b, k);
-+	unsigned j = rw_aux_tree_bsearch(b, t, offset);
-+
-+	if (j < t->size &&
-+	    rw_aux_tree(b, t)[j].offset == offset)
-+		rw_aux_tree_set(b, t, j, k);
-+
-+	bch2_bset_verify_rw_aux_tree(b, t);
-+}
-+
-+static void ro_aux_tree_fix_invalidated_key(struct btree *b,
-+					    struct bset_tree *t,
-+					    struct bkey_packed *k)
-+{
-+	struct bkey_packed min_key, max_key;
-+	unsigned inorder, j;
-+
-+	EBUG_ON(bset_aux_tree_type(t) != BSET_RO_AUX_TREE);
-+
-+	/* signal to make_bfloat() that they're uninitialized: */
-+	min_key.u64s = max_key.u64s = 0;
-+
-+	if (bkey_next_skip_noops(k, btree_bkey_last(b, t)) == btree_bkey_last(b, t)) {
-+		t->max_key = bkey_unpack_pos(b, k);
-+
-+		for (j = 1; j < t->size; j = j * 2 + 1)
-+			make_bfloat(b, t, j, &min_key, &max_key);
-+	}
-+
-+	inorder = bkey_to_cacheline(b, t, k);
-+
-+	if (inorder &&
-+	    inorder < t->size) {
-+		j = __inorder_to_eytzinger1(inorder, t->size, t->extra);
-+
-+		if (k == tree_to_bkey(b, t, j)) {
-+			/* Fix the node this key corresponds to */
-+			make_bfloat(b, t, j, &min_key, &max_key);
-+
-+			/* Children for which this key is the right boundary */
-+			for (j = eytzinger1_left_child(j);
-+			     j < t->size;
-+			     j = eytzinger1_right_child(j))
-+				make_bfloat(b, t, j, &min_key, &max_key);
-+		}
-+	}
-+
-+	if (inorder + 1 < t->size) {
-+		j = __inorder_to_eytzinger1(inorder + 1, t->size, t->extra);
-+
-+		if (k == tree_to_prev_bkey(b, t, j)) {
-+			make_bfloat(b, t, j, &min_key, &max_key);
-+
-+			/* Children for which this key is the left boundary */
-+			for (j = eytzinger1_right_child(j);
-+			     j < t->size;
-+			     j = eytzinger1_left_child(j))
-+				make_bfloat(b, t, j, &min_key, &max_key);
-+		}
-+	}
-+}
-+
-+/**
-+ * bch2_bset_fix_invalidated_key() - given an existing  key @k that has been
-+ * modified, fix any auxiliary search tree by remaking all the nodes in the
-+ * auxiliary search tree that @k corresponds to
-+ */
-+void bch2_bset_fix_invalidated_key(struct btree *b, struct bkey_packed *k)
-+{
-+	struct bset_tree *t = bch2_bkey_to_bset(b, k);
-+
-+	switch (bset_aux_tree_type(t)) {
-+	case BSET_NO_AUX_TREE:
-+		break;
-+	case BSET_RO_AUX_TREE:
-+		ro_aux_tree_fix_invalidated_key(b, t, k);
-+		break;
-+	case BSET_RW_AUX_TREE:
-+		rw_aux_tree_fix_invalidated_key(b, t, k);
-+		break;
-+	}
-+}
-+
-+static void bch2_bset_fix_lookup_table(struct btree *b,
-+				       struct bset_tree *t,
-+				       struct bkey_packed *_where,
-+				       unsigned clobber_u64s,
-+				       unsigned new_u64s)
-+{
-+	int shift = new_u64s - clobber_u64s;
-+	unsigned l, j, where = __btree_node_key_to_offset(b, _where);
-+
-+	EBUG_ON(bset_has_ro_aux_tree(t));
-+
-+	if (!bset_has_rw_aux_tree(t))
-+		return;
-+
-+	/* returns first entry >= where */
-+	l = rw_aux_tree_bsearch(b, t, where);
-+
-+	if (!l) /* never delete first entry */
-+		l++;
-+	else if (l < t->size &&
-+		 where < t->end_offset &&
-+		 rw_aux_tree(b, t)[l].offset == where)
-+		rw_aux_tree_set(b, t, l++, _where);
-+
-+	/* l now > where */
-+
-+	for (j = l;
-+	     j < t->size &&
-+	     rw_aux_tree(b, t)[j].offset < where + clobber_u64s;
-+	     j++)
-+		;
-+
-+	if (j < t->size &&
-+	    rw_aux_tree(b, t)[j].offset + shift ==
-+	    rw_aux_tree(b, t)[l - 1].offset)
-+		j++;
-+
-+	memmove(&rw_aux_tree(b, t)[l],
-+		&rw_aux_tree(b, t)[j],
-+		(void *) &rw_aux_tree(b, t)[t->size] -
-+		(void *) &rw_aux_tree(b, t)[j]);
-+	t->size -= j - l;
-+
-+	for (j = l; j < t->size; j++)
-+	       rw_aux_tree(b, t)[j].offset += shift;
-+
-+	EBUG_ON(l < t->size &&
-+		rw_aux_tree(b, t)[l].offset ==
-+		rw_aux_tree(b, t)[l - 1].offset);
-+
-+	if (t->size < bset_rw_tree_capacity(b, t) &&
-+	    (l < t->size
-+	     ? rw_aux_tree(b, t)[l].offset
-+	     : t->end_offset) -
-+	    rw_aux_tree(b, t)[l - 1].offset >
-+	    L1_CACHE_BYTES / sizeof(u64)) {
-+		struct bkey_packed *start = rw_aux_to_bkey(b, t, l - 1);
-+		struct bkey_packed *end = l < t->size
-+			? rw_aux_to_bkey(b, t, l)
-+			: btree_bkey_last(b, t);
-+		struct bkey_packed *k = start;
-+
-+		while (1) {
-+			k = bkey_next_skip_noops(k, end);
-+			if (k == end)
-+				break;
-+
-+			if ((void *) k - (void *) start >= L1_CACHE_BYTES) {
-+				memmove(&rw_aux_tree(b, t)[l + 1],
-+					&rw_aux_tree(b, t)[l],
-+					(void *) &rw_aux_tree(b, t)[t->size] -
-+					(void *) &rw_aux_tree(b, t)[l]);
-+				t->size++;
-+				rw_aux_tree_set(b, t, l, k);
-+				break;
-+			}
-+		}
-+	}
-+
-+	bch2_bset_verify_rw_aux_tree(b, t);
-+	bset_aux_tree_verify(b);
-+}
-+
-+void bch2_bset_insert(struct btree *b,
-+		      struct btree_node_iter *iter,
-+		      struct bkey_packed *where,
-+		      struct bkey_i *insert,
-+		      unsigned clobber_u64s)
-+{
-+	struct bkey_format *f = &b->format;
-+	struct bset_tree *t = bset_tree_last(b);
-+	struct bkey_packed packed, *src = bkey_to_packed(insert);
-+
-+	bch2_bset_verify_rw_aux_tree(b, t);
-+	bch2_verify_insert_pos(b, where, bkey_to_packed(insert), clobber_u64s);
-+
-+	if (bch2_bkey_pack_key(&packed, &insert->k, f))
-+		src = &packed;
-+
-+	if (!bkey_whiteout(&insert->k))
-+		btree_keys_account_key_add(&b->nr, t - b->set, src);
-+
-+	if (src->u64s != clobber_u64s) {
-+		u64 *src_p = where->_data + clobber_u64s;
-+		u64 *dst_p = where->_data + src->u64s;
-+
-+		EBUG_ON((int) le16_to_cpu(bset(b, t)->u64s) <
-+			(int) clobber_u64s - src->u64s);
-+
-+		memmove_u64s(dst_p, src_p, btree_bkey_last(b, t)->_data - src_p);
-+		le16_add_cpu(&bset(b, t)->u64s, src->u64s - clobber_u64s);
-+		set_btree_bset_end(b, t);
-+	}
-+
-+	memcpy_u64s(where, src,
-+		    bkeyp_key_u64s(f, src));
-+	memcpy_u64s(bkeyp_val(f, where), &insert->v,
-+		    bkeyp_val_u64s(f, src));
-+
-+	if (src->u64s != clobber_u64s)
-+		bch2_bset_fix_lookup_table(b, t, where, clobber_u64s, src->u64s);
-+
-+	bch2_verify_btree_nr_keys(b);
-+}
-+
-+void bch2_bset_delete(struct btree *b,
-+		      struct bkey_packed *where,
-+		      unsigned clobber_u64s)
-+{
-+	struct bset_tree *t = bset_tree_last(b);
-+	u64 *src_p = where->_data + clobber_u64s;
-+	u64 *dst_p = where->_data;
-+
-+	bch2_bset_verify_rw_aux_tree(b, t);
-+
-+	EBUG_ON(le16_to_cpu(bset(b, t)->u64s) < clobber_u64s);
-+
-+	memmove_u64s_down(dst_p, src_p, btree_bkey_last(b, t)->_data - src_p);
-+	le16_add_cpu(&bset(b, t)->u64s, -clobber_u64s);
-+	set_btree_bset_end(b, t);
-+
-+	bch2_bset_fix_lookup_table(b, t, where, clobber_u64s, 0);
-+}
-+
-+/* Lookup */
-+
-+__flatten
-+static struct bkey_packed *bset_search_write_set(const struct btree *b,
-+				struct bset_tree *t,
-+				struct bpos *search,
-+				const struct bkey_packed *packed_search)
-+{
-+	unsigned l = 0, r = t->size;
-+
-+	while (l + 1 != r) {
-+		unsigned m = (l + r) >> 1;
-+
-+		if (bkey_cmp(rw_aux_tree(b, t)[m].k, *search) < 0)
-+			l = m;
-+		else
-+			r = m;
-+	}
-+
-+	return rw_aux_to_bkey(b, t, l);
-+}
-+
-+static inline void prefetch_four_cachelines(void *p)
-+{
-+#ifdef CONFIG_X86_64
-+	asm(".intel_syntax noprefix;"
-+	    "prefetcht0 [%0 - 127 + 64 * 0];"
-+	    "prefetcht0 [%0 - 127 + 64 * 1];"
-+	    "prefetcht0 [%0 - 127 + 64 * 2];"
-+	    "prefetcht0 [%0 - 127 + 64 * 3];"
-+	    ".att_syntax prefix;"
-+	    :
-+	    : "r" (p + 127));
-+#else
-+	prefetch(p + L1_CACHE_BYTES * 0);
-+	prefetch(p + L1_CACHE_BYTES * 1);
-+	prefetch(p + L1_CACHE_BYTES * 2);
-+	prefetch(p + L1_CACHE_BYTES * 3);
-+#endif
-+}
-+
-+static inline bool bkey_mantissa_bits_dropped(const struct btree *b,
-+					      const struct bkey_float *f,
-+					      unsigned idx)
-+{
-+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-+	unsigned key_bits_start = b->format.key_u64s * 64 - b->nr_key_bits;
-+
-+	return f->exponent > key_bits_start;
-+#else
-+	unsigned key_bits_end = high_bit_offset + b->nr_key_bits;
-+
-+	return f->exponent + BKEY_MANTISSA_BITS < key_bits_end;
-+#endif
-+}
-+
-+__flatten
-+static struct bkey_packed *bset_search_tree(const struct btree *b,
-+				struct bset_tree *t,
-+				struct bpos *search,
-+				const struct bkey_packed *packed_search)
-+{
-+	struct ro_aux_tree *base = ro_aux_tree_base(b, t);
-+	struct bkey_float *f;
-+	struct bkey_packed *k;
-+	unsigned inorder, n = 1, l, r;
-+	int cmp;
-+
-+	do {
-+		if (likely(n << 4 < t->size))
-+			prefetch(&base->f[n << 4]);
-+
-+		f = &base->f[n];
-+
-+		if (!unlikely(packed_search))
-+			goto slowpath;
-+		if (unlikely(f->exponent >= BFLOAT_FAILED))
-+			goto slowpath;
-+
-+		l = f->mantissa;
-+		r = bkey_mantissa(packed_search, f, n);
-+
-+		if (unlikely(l == r) && bkey_mantissa_bits_dropped(b, f, n))
-+			goto slowpath;
-+
-+		n = n * 2 + (l < r);
-+		continue;
-+slowpath:
-+		k = tree_to_bkey(b, t, n);
-+		cmp = bkey_cmp_p_or_unp(b, k, packed_search, search);
-+		if (!cmp)
-+			return k;
-+
-+		n = n * 2 + (cmp < 0);
-+	} while (n < t->size);
-+
-+	inorder = __eytzinger1_to_inorder(n >> 1, t->size, t->extra);
-+
-+	/*
-+	 * n would have been the node we recursed to - the low bit tells us if
-+	 * we recursed left or recursed right.
-+	 */
-+	if (likely(!(n & 1))) {
-+		--inorder;
-+		if (unlikely(!inorder))
-+			return btree_bkey_first(b, t);
-+
-+		f = &base->f[eytzinger1_prev(n >> 1, t->size)];
-+	}
-+
-+	return cacheline_to_bkey(b, t, inorder, f->key_offset);
-+}
-+
-+static __always_inline __flatten
-+struct bkey_packed *__bch2_bset_search(struct btree *b,
-+				struct bset_tree *t,
-+				struct bpos *search,
-+				const struct bkey_packed *lossy_packed_search)
-+{
-+
-+	/*
-+	 * First, we search for a cacheline, then lastly we do a linear search
-+	 * within that cacheline.
-+	 *
-+	 * To search for the cacheline, there's three different possibilities:
-+	 *  * The set is too small to have a search tree, so we just do a linear
-+	 *    search over the whole set.
-+	 *  * The set is the one we're currently inserting into; keeping a full
-+	 *    auxiliary search tree up to date would be too expensive, so we
-+	 *    use a much simpler lookup table to do a binary search -
-+	 *    bset_search_write_set().
-+	 *  * Or we use the auxiliary search tree we constructed earlier -
-+	 *    bset_search_tree()
-+	 */
-+
-+	switch (bset_aux_tree_type(t)) {
-+	case BSET_NO_AUX_TREE:
-+		return btree_bkey_first(b, t);
-+	case BSET_RW_AUX_TREE:
-+		return bset_search_write_set(b, t, search, lossy_packed_search);
-+	case BSET_RO_AUX_TREE:
-+		/*
-+		 * Each node in the auxiliary search tree covers a certain range
-+		 * of bits, and keys above and below the set it covers might
-+		 * differ outside those bits - so we have to special case the
-+		 * start and end - handle that here:
-+		 */
-+
-+		if (bkey_cmp(*search, t->max_key) > 0)
-+			return btree_bkey_last(b, t);
-+
-+		return bset_search_tree(b, t, search, lossy_packed_search);
-+	default:
-+		unreachable();
-+	}
-+}
-+
-+static __always_inline __flatten
-+struct bkey_packed *bch2_bset_search_linear(struct btree *b,
-+				struct bset_tree *t,
-+				struct bpos *search,
-+				struct bkey_packed *packed_search,
-+				const struct bkey_packed *lossy_packed_search,
-+				struct bkey_packed *m)
-+{
-+	if (lossy_packed_search)
-+		while (m != btree_bkey_last(b, t) &&
-+		       bkey_iter_cmp_p_or_unp(b, m,
-+					lossy_packed_search, search) < 0)
-+			m = bkey_next_skip_noops(m, btree_bkey_last(b, t));
-+
-+	if (!packed_search)
-+		while (m != btree_bkey_last(b, t) &&
-+		       bkey_iter_pos_cmp(b, m, search) < 0)
-+			m = bkey_next_skip_noops(m, btree_bkey_last(b, t));
-+
-+	if (btree_keys_expensive_checks(b)) {
-+		struct bkey_packed *prev = bch2_bkey_prev_all(b, t, m);
-+
-+		BUG_ON(prev &&
-+		       bkey_iter_cmp_p_or_unp(b, prev,
-+					packed_search, search) >= 0);
-+	}
-+
-+	return m;
-+}
-+
-+/*
-+ * Returns the first key greater than or equal to @search
-+ */
-+static __always_inline __flatten
-+struct bkey_packed *bch2_bset_search(struct btree *b,
-+				struct bset_tree *t,
-+				struct bpos *search,
-+				struct bkey_packed *packed_search,
-+				const struct bkey_packed *lossy_packed_search)
-+{
-+	struct bkey_packed *m = __bch2_bset_search(b, t, search,
-+						   lossy_packed_search);
-+
-+	return bch2_bset_search_linear(b, t, search,
-+				 packed_search, lossy_packed_search, m);
-+}
-+
-+/* Btree node iterator */
-+
-+static inline void __bch2_btree_node_iter_push(struct btree_node_iter *iter,
-+			      struct btree *b,
-+			      const struct bkey_packed *k,
-+			      const struct bkey_packed *end)
-+{
-+	if (k != end) {
-+		struct btree_node_iter_set *pos;
-+
-+		btree_node_iter_for_each(iter, pos)
-+			;
-+
-+		BUG_ON(pos >= iter->data + ARRAY_SIZE(iter->data));
-+		*pos = (struct btree_node_iter_set) {
-+			__btree_node_key_to_offset(b, k),
-+			__btree_node_key_to_offset(b, end)
-+		};
-+	}
-+}
-+
-+void bch2_btree_node_iter_push(struct btree_node_iter *iter,
-+			       struct btree *b,
-+			       const struct bkey_packed *k,
-+			       const struct bkey_packed *end)
-+{
-+	__bch2_btree_node_iter_push(iter, b, k, end);
-+	bch2_btree_node_iter_sort(iter, b);
-+}
-+
-+noinline __flatten __attribute__((cold))
-+static void btree_node_iter_init_pack_failed(struct btree_node_iter *iter,
-+			      struct btree *b, struct bpos *search)
-+{
-+	struct bset_tree *t;
-+
-+	trace_bkey_pack_pos_fail(search);
-+
-+	for_each_bset(b, t)
-+		__bch2_btree_node_iter_push(iter, b,
-+			bch2_bset_search(b, t, search, NULL, NULL),
-+			btree_bkey_last(b, t));
-+
-+	bch2_btree_node_iter_sort(iter, b);
-+}
-+
-+/**
-+ * bch_btree_node_iter_init - initialize a btree node iterator, starting from a
-+ * given position
-+ *
-+ * Main entry point to the lookup code for individual btree nodes:
-+ *
-+ * NOTE:
-+ *
-+ * When you don't filter out deleted keys, btree nodes _do_ contain duplicate
-+ * keys. This doesn't matter for most code, but it does matter for lookups.
-+ *
-+ * Some adjacent keys with a string of equal keys:
-+ *	i j k k k k l m
-+ *
-+ * If you search for k, the lookup code isn't guaranteed to return you any
-+ * specific k. The lookup code is conceptually doing a binary search and
-+ * iterating backwards is very expensive so if the pivot happens to land at the
-+ * last k that's what you'll get.
-+ *
-+ * This works out ok, but it's something to be aware of:
-+ *
-+ *  - For non extents, we guarantee that the live key comes last - see
-+ *    btree_node_iter_cmp(), keys_out_of_order(). So the duplicates you don't
-+ *    see will only be deleted keys you don't care about.
-+ *
-+ *  - For extents, deleted keys sort last (see the comment at the top of this
-+ *    file). But when you're searching for extents, you actually want the first
-+ *    key strictly greater than your search key - an extent that compares equal
-+ *    to the search key is going to have 0 sectors after the search key.
-+ *
-+ *    But this does mean that we can't just search for
-+ *    bkey_successor(start_of_range) to get the first extent that overlaps with
-+ *    the range we want - if we're unlucky and there's an extent that ends
-+ *    exactly where we searched, then there could be a deleted key at the same
-+ *    position and we'd get that when we search instead of the preceding extent
-+ *    we needed.
-+ *
-+ *    So we've got to search for start_of_range, then after the lookup iterate
-+ *    past any extents that compare equal to the position we searched for.
-+ */
-+__flatten
-+void bch2_btree_node_iter_init(struct btree_node_iter *iter,
-+			       struct btree *b, struct bpos *search)
-+{
-+	struct bkey_packed p, *packed_search = NULL;
-+	struct btree_node_iter_set *pos = iter->data;
-+	struct bkey_packed *k[MAX_BSETS];
-+	unsigned i;
-+
-+	EBUG_ON(bkey_cmp(*search, b->data->min_key) < 0);
-+	bset_aux_tree_verify(b);
-+
-+	memset(iter, 0, sizeof(*iter));
-+
-+	switch (bch2_bkey_pack_pos_lossy(&p, *search, b)) {
-+	case BKEY_PACK_POS_EXACT:
-+		packed_search = &p;
-+		break;
-+	case BKEY_PACK_POS_SMALLER:
-+		packed_search = NULL;
-+		break;
-+	case BKEY_PACK_POS_FAIL:
-+		btree_node_iter_init_pack_failed(iter, b, search);
-+		return;
-+	}
-+
-+	for (i = 0; i < b->nsets; i++) {
-+		k[i] = __bch2_bset_search(b, b->set + i, search, &p);
-+		prefetch_four_cachelines(k[i]);
-+	}
-+
-+	for (i = 0; i < b->nsets; i++) {
-+		struct bset_tree *t = b->set + i;
-+		struct bkey_packed *end = btree_bkey_last(b, t);
-+
-+		k[i] = bch2_bset_search_linear(b, t, search,
-+					       packed_search, &p, k[i]);
-+		if (k[i] != end)
-+			*pos++ = (struct btree_node_iter_set) {
-+				__btree_node_key_to_offset(b, k[i]),
-+				__btree_node_key_to_offset(b, end)
-+			};
-+	}
-+
-+	bch2_btree_node_iter_sort(iter, b);
-+}
-+
-+void bch2_btree_node_iter_init_from_start(struct btree_node_iter *iter,
-+					  struct btree *b)
-+{
-+	struct bset_tree *t;
-+
-+	memset(iter, 0, sizeof(*iter));
-+
-+	for_each_bset(b, t)
-+		__bch2_btree_node_iter_push(iter, b,
-+					   btree_bkey_first(b, t),
-+					   btree_bkey_last(b, t));
-+	bch2_btree_node_iter_sort(iter, b);
-+}
-+
-+struct bkey_packed *bch2_btree_node_iter_bset_pos(struct btree_node_iter *iter,
-+						  struct btree *b,
-+						  struct bset_tree *t)
-+{
-+	struct btree_node_iter_set *set;
-+
-+	btree_node_iter_for_each(iter, set)
-+		if (set->end == t->end_offset)
-+			return __btree_node_offset_to_key(b, set->k);
-+
-+	return btree_bkey_last(b, t);
-+}
-+
-+static inline bool btree_node_iter_sort_two(struct btree_node_iter *iter,
-+					    struct btree *b,
-+					    unsigned first)
-+{
-+	bool ret;
-+
-+	if ((ret = (btree_node_iter_cmp(b,
-+					iter->data[first],
-+					iter->data[first + 1]) > 0)))
-+		swap(iter->data[first], iter->data[first + 1]);
-+	return ret;
-+}
-+
-+void bch2_btree_node_iter_sort(struct btree_node_iter *iter,
-+			       struct btree *b)
-+{
-+	/* unrolled bubble sort: */
-+
-+	if (!__btree_node_iter_set_end(iter, 2)) {
-+		btree_node_iter_sort_two(iter, b, 0);
-+		btree_node_iter_sort_two(iter, b, 1);
-+	}
-+
-+	if (!__btree_node_iter_set_end(iter, 1))
-+		btree_node_iter_sort_two(iter, b, 0);
-+}
-+
-+void bch2_btree_node_iter_set_drop(struct btree_node_iter *iter,
-+				   struct btree_node_iter_set *set)
-+{
-+	struct btree_node_iter_set *last =
-+		iter->data + ARRAY_SIZE(iter->data) - 1;
-+
-+	memmove(&set[0], &set[1], (void *) last - (void *) set);
-+	*last = (struct btree_node_iter_set) { 0, 0 };
-+}
-+
-+static inline void __bch2_btree_node_iter_advance(struct btree_node_iter *iter,
-+						  struct btree *b)
-+{
-+	iter->data->k += __bch2_btree_node_iter_peek_all(iter, b)->u64s;
-+
-+	EBUG_ON(iter->data->k > iter->data->end);
-+
-+	while (!__btree_node_iter_set_end(iter, 0) &&
-+	       !__bch2_btree_node_iter_peek_all(iter, b)->u64s)
-+		iter->data->k++;
-+
-+	if (unlikely(__btree_node_iter_set_end(iter, 0))) {
-+		bch2_btree_node_iter_set_drop(iter, iter->data);
-+		return;
-+	}
-+
-+	if (__btree_node_iter_set_end(iter, 1))
-+		return;
-+
-+	if (!btree_node_iter_sort_two(iter, b, 0))
-+		return;
-+
-+	if (__btree_node_iter_set_end(iter, 2))
-+		return;
-+
-+	btree_node_iter_sort_two(iter, b, 1);
-+}
-+
-+void bch2_btree_node_iter_advance(struct btree_node_iter *iter,
-+				  struct btree *b)
-+{
-+	if (btree_keys_expensive_checks(b)) {
-+		bch2_btree_node_iter_verify(iter, b);
-+		bch2_btree_node_iter_next_check(iter, b);
-+	}
-+
-+	__bch2_btree_node_iter_advance(iter, b);
-+}
-+
-+/*
-+ * Expensive:
-+ */
-+struct bkey_packed *bch2_btree_node_iter_prev_all(struct btree_node_iter *iter,
-+						  struct btree *b)
-+{
-+	struct bkey_packed *k, *prev = NULL;
-+	struct btree_node_iter_set *set;
-+	struct bset_tree *t;
-+	unsigned end = 0;
-+
-+	if (btree_keys_expensive_checks(b))
-+		bch2_btree_node_iter_verify(iter, b);
-+
-+	for_each_bset(b, t) {
-+		k = bch2_bkey_prev_all(b, t,
-+			bch2_btree_node_iter_bset_pos(iter, b, t));
-+		if (k &&
-+		    (!prev || bkey_iter_cmp(b, k, prev) > 0)) {
-+			prev = k;
-+			end = t->end_offset;
-+		}
-+	}
-+
-+	if (!prev)
-+		return NULL;
-+
-+	/*
-+	 * We're manually memmoving instead of just calling sort() to ensure the
-+	 * prev we picked ends up in slot 0 - sort won't necessarily put it
-+	 * there because of duplicate deleted keys:
-+	 */
-+	btree_node_iter_for_each(iter, set)
-+		if (set->end == end)
-+			goto found;
-+
-+	BUG_ON(set != &iter->data[__btree_node_iter_used(iter)]);
-+found:
-+	BUG_ON(set >= iter->data + ARRAY_SIZE(iter->data));
-+
-+	memmove(&iter->data[1],
-+		&iter->data[0],
-+		(void *) set - (void *) &iter->data[0]);
-+
-+	iter->data[0].k = __btree_node_key_to_offset(b, prev);
-+	iter->data[0].end = end;
-+
-+	if (btree_keys_expensive_checks(b))
-+		bch2_btree_node_iter_verify(iter, b);
-+	return prev;
-+}
-+
-+struct bkey_packed *bch2_btree_node_iter_prev_filter(struct btree_node_iter *iter,
-+						     struct btree *b,
-+						     unsigned min_key_type)
-+{
-+	struct bkey_packed *prev;
-+
-+	do {
-+		prev = bch2_btree_node_iter_prev_all(iter, b);
-+	} while (prev && prev->type < min_key_type);
-+
-+	return prev;
-+}
-+
-+struct bkey_s_c bch2_btree_node_iter_peek_unpack(struct btree_node_iter *iter,
-+						 struct btree *b,
-+						 struct bkey *u)
-+{
-+	struct bkey_packed *k = bch2_btree_node_iter_peek(iter, b);
-+
-+	return k ? bkey_disassemble(b, k, u) : bkey_s_c_null;
-+}
-+
-+/* Mergesort */
-+
-+void bch2_btree_keys_stats(struct btree *b, struct bset_stats *stats)
-+{
-+	struct bset_tree *t;
-+
-+	for_each_bset(b, t) {
-+		enum bset_aux_tree_type type = bset_aux_tree_type(t);
-+		size_t j;
-+
-+		stats->sets[type].nr++;
-+		stats->sets[type].bytes += le16_to_cpu(bset(b, t)->u64s) *
-+			sizeof(u64);
-+
-+		if (bset_has_ro_aux_tree(t)) {
-+			stats->floats += t->size - 1;
-+
-+			for (j = 1; j < t->size; j++)
-+				stats->failed +=
-+					bkey_float(b, t, j)->exponent ==
-+					BFLOAT_FAILED;
-+		}
-+	}
-+}
-+
-+void bch2_bfloat_to_text(struct printbuf *out, struct btree *b,
-+			 struct bkey_packed *k)
-+{
-+	struct bset_tree *t = bch2_bkey_to_bset(b, k);
-+	struct bkey uk;
-+	unsigned j, inorder;
-+
-+	if (out->pos != out->end)
-+		*out->pos = '\0';
-+
-+	if (!bset_has_ro_aux_tree(t))
-+		return;
-+
-+	inorder = bkey_to_cacheline(b, t, k);
-+	if (!inorder || inorder >= t->size)
-+		return;
-+
-+	j = __inorder_to_eytzinger1(inorder, t->size, t->extra);
-+	if (k != tree_to_bkey(b, t, j))
-+		return;
-+
-+	switch (bkey_float(b, t, j)->exponent) {
-+	case BFLOAT_FAILED:
-+		uk = bkey_unpack_key(b, k);
-+		pr_buf(out,
-+		       "    failed unpacked at depth %u\n"
-+		       "\t%llu:%llu\n",
-+		       ilog2(j),
-+		       uk.p.inode, uk.p.offset);
-+		break;
-+	}
-+}
-diff --git a/fs/bcachefs/bset.h b/fs/bcachefs/bset.h
-new file mode 100644
-index 000000000000..652ffed4adfb
---- /dev/null
-+++ b/fs/bcachefs/bset.h
-@@ -0,0 +1,631 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BSET_H
-+#define _BCACHEFS_BSET_H
-+
-+#include <linux/kernel.h>
-+#include <linux/types.h>
-+
-+#include "bcachefs_format.h"
-+#include "bkey.h"
-+#include "bkey_methods.h"
-+#include "btree_types.h"
-+#include "util.h" /* for time_stats */
-+#include "vstructs.h"
-+
-+/*
-+ * BKEYS:
-+ *
-+ * A bkey contains a key, a size field, a variable number of pointers, and some
-+ * ancillary flag bits.
-+ *
-+ * We use two different functions for validating bkeys, bkey_invalid and
-+ * bkey_deleted().
-+ *
-+ * The one exception to the rule that ptr_invalid() filters out invalid keys is
-+ * that it also filters out keys of size 0 - these are keys that have been
-+ * completely overwritten. It'd be safe to delete these in memory while leaving
-+ * them on disk, just unnecessary work - so we filter them out when resorting
-+ * instead.
-+ *
-+ * We can't filter out stale keys when we're resorting, because garbage
-+ * collection needs to find them to ensure bucket gens don't wrap around -
-+ * unless we're rewriting the btree node those stale keys still exist on disk.
-+ *
-+ * We also implement functions here for removing some number of sectors from the
-+ * front or the back of a bkey - this is mainly used for fixing overlapping
-+ * extents, by removing the overlapping sectors from the older key.
-+ *
-+ * BSETS:
-+ *
-+ * A bset is an array of bkeys laid out contiguously in memory in sorted order,
-+ * along with a header. A btree node is made up of a number of these, written at
-+ * different times.
-+ *
-+ * There could be many of them on disk, but we never allow there to be more than
-+ * 4 in memory - we lazily resort as needed.
-+ *
-+ * We implement code here for creating and maintaining auxiliary search trees
-+ * (described below) for searching an individial bset, and on top of that we
-+ * implement a btree iterator.
-+ *
-+ * BTREE ITERATOR:
-+ *
-+ * Most of the code in bcache doesn't care about an individual bset - it needs
-+ * to search entire btree nodes and iterate over them in sorted order.
-+ *
-+ * The btree iterator code serves both functions; it iterates through the keys
-+ * in a btree node in sorted order, starting from either keys after a specific
-+ * point (if you pass it a search key) or the start of the btree node.
-+ *
-+ * AUXILIARY SEARCH TREES:
-+ *
-+ * Since keys are variable length, we can't use a binary search on a bset - we
-+ * wouldn't be able to find the start of the next key. But binary searches are
-+ * slow anyways, due to terrible cache behaviour; bcache originally used binary
-+ * searches and that code topped out at under 50k lookups/second.
-+ *
-+ * So we need to construct some sort of lookup table. Since we only insert keys
-+ * into the last (unwritten) set, most of the keys within a given btree node are
-+ * usually in sets that are mostly constant. We use two different types of
-+ * lookup tables to take advantage of this.
-+ *
-+ * Both lookup tables share in common that they don't index every key in the
-+ * set; they index one key every BSET_CACHELINE bytes, and then a linear search
-+ * is used for the rest.
-+ *
-+ * For sets that have been written to disk and are no longer being inserted
-+ * into, we construct a binary search tree in an array - traversing a binary
-+ * search tree in an array gives excellent locality of reference and is very
-+ * fast, since both children of any node are adjacent to each other in memory
-+ * (and their grandchildren, and great grandchildren...) - this means
-+ * prefetching can be used to great effect.
-+ *
-+ * It's quite useful performance wise to keep these nodes small - not just
-+ * because they're more likely to be in L2, but also because we can prefetch
-+ * more nodes on a single cacheline and thus prefetch more iterations in advance
-+ * when traversing this tree.
-+ *
-+ * Nodes in the auxiliary search tree must contain both a key to compare against
-+ * (we don't want to fetch the key from the set, that would defeat the purpose),
-+ * and a pointer to the key. We use a few tricks to compress both of these.
-+ *
-+ * To compress the pointer, we take advantage of the fact that one node in the
-+ * search tree corresponds to precisely BSET_CACHELINE bytes in the set. We have
-+ * a function (to_inorder()) that takes the index of a node in a binary tree and
-+ * returns what its index would be in an inorder traversal, so we only have to
-+ * store the low bits of the offset.
-+ *
-+ * The key is 84 bits (KEY_DEV + key->key, the offset on the device). To
-+ * compress that,  we take advantage of the fact that when we're traversing the
-+ * search tree at every iteration we know that both our search key and the key
-+ * we're looking for lie within some range - bounded by our previous
-+ * comparisons. (We special case the start of a search so that this is true even
-+ * at the root of the tree).
-+ *
-+ * So we know the key we're looking for is between a and b, and a and b don't
-+ * differ higher than bit 50, we don't need to check anything higher than bit
-+ * 50.
-+ *
-+ * We don't usually need the rest of the bits, either; we only need enough bits
-+ * to partition the key range we're currently checking.  Consider key n - the
-+ * key our auxiliary search tree node corresponds to, and key p, the key
-+ * immediately preceding n.  The lowest bit we need to store in the auxiliary
-+ * search tree is the highest bit that differs between n and p.
-+ *
-+ * Note that this could be bit 0 - we might sometimes need all 80 bits to do the
-+ * comparison. But we'd really like our nodes in the auxiliary search tree to be
-+ * of fixed size.
-+ *
-+ * The solution is to make them fixed size, and when we're constructing a node
-+ * check if p and n differed in the bits we needed them to. If they don't we
-+ * flag that node, and when doing lookups we fallback to comparing against the
-+ * real key. As long as this doesn't happen to often (and it seems to reliably
-+ * happen a bit less than 1% of the time), we win - even on failures, that key
-+ * is then more likely to be in cache than if we were doing binary searches all
-+ * the way, since we're touching so much less memory.
-+ *
-+ * The keys in the auxiliary search tree are stored in (software) floating
-+ * point, with an exponent and a mantissa. The exponent needs to be big enough
-+ * to address all the bits in the original key, but the number of bits in the
-+ * mantissa is somewhat arbitrary; more bits just gets us fewer failures.
-+ *
-+ * We need 7 bits for the exponent and 3 bits for the key's offset (since keys
-+ * are 8 byte aligned); using 22 bits for the mantissa means a node is 4 bytes.
-+ * We need one node per 128 bytes in the btree node, which means the auxiliary
-+ * search trees take up 3% as much memory as the btree itself.
-+ *
-+ * Constructing these auxiliary search trees is moderately expensive, and we
-+ * don't want to be constantly rebuilding the search tree for the last set
-+ * whenever we insert another key into it. For the unwritten set, we use a much
-+ * simpler lookup table - it's just a flat array, so index i in the lookup table
-+ * corresponds to the i range of BSET_CACHELINE bytes in the set. Indexing
-+ * within each byte range works the same as with the auxiliary search trees.
-+ *
-+ * These are much easier to keep up to date when we insert a key - we do it
-+ * somewhat lazily; when we shift a key up we usually just increment the pointer
-+ * to it, only when it would overflow do we go to the trouble of finding the
-+ * first key in that range of bytes again.
-+ */
-+
-+extern bool bch2_expensive_debug_checks;
-+
-+static inline bool btree_keys_expensive_checks(const struct btree *b)
-+{
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	return bch2_expensive_debug_checks || *b->expensive_debug_checks;
-+#else
-+	return false;
-+#endif
-+}
-+
-+enum bset_aux_tree_type {
-+	BSET_NO_AUX_TREE,
-+	BSET_RO_AUX_TREE,
-+	BSET_RW_AUX_TREE,
-+};
-+
-+#define BSET_TREE_NR_TYPES	3
-+
-+#define BSET_NO_AUX_TREE_VAL	(U16_MAX)
-+#define BSET_RW_AUX_TREE_VAL	(U16_MAX - 1)
-+
-+static inline enum bset_aux_tree_type bset_aux_tree_type(const struct bset_tree *t)
-+{
-+	switch (t->extra) {
-+	case BSET_NO_AUX_TREE_VAL:
-+		EBUG_ON(t->size);
-+		return BSET_NO_AUX_TREE;
-+	case BSET_RW_AUX_TREE_VAL:
-+		EBUG_ON(!t->size);
-+		return BSET_RW_AUX_TREE;
-+	default:
-+		EBUG_ON(!t->size);
-+		return BSET_RO_AUX_TREE;
-+	}
-+}
-+
-+typedef void (*compiled_unpack_fn)(struct bkey *, const struct bkey_packed *);
-+
-+static inline void
-+__bkey_unpack_key_format_checked(const struct btree *b,
-+			       struct bkey *dst,
-+			       const struct bkey_packed *src)
-+{
-+#ifdef HAVE_BCACHEFS_COMPILED_UNPACK
-+	{
-+		compiled_unpack_fn unpack_fn = b->aux_data;
-+		unpack_fn(dst, src);
-+
-+		if (btree_keys_expensive_checks(b)) {
-+			struct bkey dst2 = __bch2_bkey_unpack_key(&b->format, src);
-+
-+			BUG_ON(memcmp(dst, &dst2, sizeof(*dst)));
-+		}
-+	}
-+#else
-+	*dst = __bch2_bkey_unpack_key(&b->format, src);
-+#endif
-+}
-+
-+static inline struct bkey
-+bkey_unpack_key_format_checked(const struct btree *b,
-+			       const struct bkey_packed *src)
-+{
-+	struct bkey dst;
-+
-+	__bkey_unpack_key_format_checked(b, &dst, src);
-+	return dst;
-+}
-+
-+static inline void __bkey_unpack_key(const struct btree *b,
-+				     struct bkey *dst,
-+				     const struct bkey_packed *src)
-+{
-+	if (likely(bkey_packed(src)))
-+		__bkey_unpack_key_format_checked(b, dst, src);
-+	else
-+		*dst = *packed_to_bkey_c(src);
-+}
-+
-+/**
-+ * bkey_unpack_key -- unpack just the key, not the value
-+ */
-+static inline struct bkey bkey_unpack_key(const struct btree *b,
-+					  const struct bkey_packed *src)
-+{
-+	return likely(bkey_packed(src))
-+		? bkey_unpack_key_format_checked(b, src)
-+		: *packed_to_bkey_c(src);
-+}
-+
-+static inline struct bpos
-+bkey_unpack_pos_format_checked(const struct btree *b,
-+			       const struct bkey_packed *src)
-+{
-+#ifdef HAVE_BCACHEFS_COMPILED_UNPACK
-+	return bkey_unpack_key_format_checked(b, src).p;
-+#else
-+	return __bkey_unpack_pos(&b->format, src);
-+#endif
-+}
-+
-+static inline struct bpos bkey_unpack_pos(const struct btree *b,
-+					  const struct bkey_packed *src)
-+{
-+	return likely(bkey_packed(src))
-+		? bkey_unpack_pos_format_checked(b, src)
-+		: packed_to_bkey_c(src)->p;
-+}
-+
-+/* Disassembled bkeys */
-+
-+static inline struct bkey_s_c bkey_disassemble(struct btree *b,
-+					       const struct bkey_packed *k,
-+					       struct bkey *u)
-+{
-+	__bkey_unpack_key(b, u, k);
-+
-+	return (struct bkey_s_c) { u, bkeyp_val(&b->format, k), };
-+}
-+
-+/* non const version: */
-+static inline struct bkey_s __bkey_disassemble(struct btree *b,
-+					       struct bkey_packed *k,
-+					       struct bkey *u)
-+{
-+	__bkey_unpack_key(b, u, k);
-+
-+	return (struct bkey_s) { .k = u, .v = bkeyp_val(&b->format, k), };
-+}
-+
-+#define for_each_bset(_b, _t)						\
-+	for (_t = (_b)->set; _t < (_b)->set + (_b)->nsets; _t++)
-+
-+#define bset_tree_for_each_key(_b, _t, _k)				\
-+	for (_k = btree_bkey_first(_b, _t);				\
-+	     _k != btree_bkey_last(_b, _t);				\
-+	     _k = bkey_next_skip_noops(_k, btree_bkey_last(_b, _t)))
-+
-+static inline bool bset_has_ro_aux_tree(struct bset_tree *t)
-+{
-+	return bset_aux_tree_type(t) == BSET_RO_AUX_TREE;
-+}
-+
-+static inline bool bset_has_rw_aux_tree(struct bset_tree *t)
-+{
-+	return bset_aux_tree_type(t) == BSET_RW_AUX_TREE;
-+}
-+
-+static inline void bch2_bset_set_no_aux_tree(struct btree *b,
-+					    struct bset_tree *t)
-+{
-+	BUG_ON(t < b->set);
-+
-+	for (; t < b->set + ARRAY_SIZE(b->set); t++) {
-+		t->size = 0;
-+		t->extra = BSET_NO_AUX_TREE_VAL;
-+		t->aux_data_offset = U16_MAX;
-+	}
-+}
-+
-+static inline void btree_node_set_format(struct btree *b,
-+					 struct bkey_format f)
-+{
-+	int len;
-+
-+	b->format	= f;
-+	b->nr_key_bits	= bkey_format_key_bits(&f);
-+
-+	len = bch2_compile_bkey_format(&b->format, b->aux_data);
-+	BUG_ON(len < 0 || len > U8_MAX);
-+
-+	b->unpack_fn_len = len;
-+
-+	bch2_bset_set_no_aux_tree(b, b->set);
-+}
-+
-+static inline struct bset *bset_next_set(struct btree *b,
-+					 unsigned block_bytes)
-+{
-+	struct bset *i = btree_bset_last(b);
-+
-+	EBUG_ON(!is_power_of_2(block_bytes));
-+
-+	return ((void *) i) + round_up(vstruct_bytes(i), block_bytes);
-+}
-+
-+void bch2_btree_keys_free(struct btree *);
-+int bch2_btree_keys_alloc(struct btree *, unsigned, gfp_t);
-+void bch2_btree_keys_init(struct btree *, bool *);
-+
-+void bch2_bset_init_first(struct btree *, struct bset *);
-+void bch2_bset_init_next(struct bch_fs *, struct btree *,
-+			 struct btree_node_entry *);
-+void bch2_bset_build_aux_tree(struct btree *, struct bset_tree *, bool);
-+void bch2_bset_fix_invalidated_key(struct btree *, struct bkey_packed *);
-+
-+void bch2_bset_insert(struct btree *, struct btree_node_iter *,
-+		     struct bkey_packed *, struct bkey_i *, unsigned);
-+void bch2_bset_delete(struct btree *, struct bkey_packed *, unsigned);
-+
-+/* Bkey utility code */
-+
-+/* packed or unpacked */
-+static inline int bkey_cmp_p_or_unp(const struct btree *b,
-+				    const struct bkey_packed *l,
-+				    const struct bkey_packed *r_packed,
-+				    const struct bpos *r)
-+{
-+	EBUG_ON(r_packed && !bkey_packed(r_packed));
-+
-+	if (unlikely(!bkey_packed(l)))
-+		return bkey_cmp(packed_to_bkey_c(l)->p, *r);
-+
-+	if (likely(r_packed))
-+		return __bch2_bkey_cmp_packed_format_checked(l, r_packed, b);
-+
-+	return __bch2_bkey_cmp_left_packed_format_checked(b, l, r);
-+}
-+
-+struct bset_tree *bch2_bkey_to_bset(struct btree *, struct bkey_packed *);
-+
-+struct bkey_packed *bch2_bkey_prev_filter(struct btree *, struct bset_tree *,
-+					  struct bkey_packed *, unsigned);
-+
-+static inline struct bkey_packed *
-+bch2_bkey_prev_all(struct btree *b, struct bset_tree *t, struct bkey_packed *k)
-+{
-+	return bch2_bkey_prev_filter(b, t, k, 0);
-+}
-+
-+static inline struct bkey_packed *
-+bch2_bkey_prev(struct btree *b, struct bset_tree *t, struct bkey_packed *k)
-+{
-+	return bch2_bkey_prev_filter(b, t, k, KEY_TYPE_discard + 1);
-+}
-+
-+enum bch_extent_overlap {
-+	BCH_EXTENT_OVERLAP_ALL		= 0,
-+	BCH_EXTENT_OVERLAP_BACK		= 1,
-+	BCH_EXTENT_OVERLAP_FRONT	= 2,
-+	BCH_EXTENT_OVERLAP_MIDDLE	= 3,
-+};
-+
-+/* Returns how k overlaps with m */
-+static inline enum bch_extent_overlap bch2_extent_overlap(const struct bkey *k,
-+							  const struct bkey *m)
-+{
-+	int cmp1 = bkey_cmp(k->p, m->p) < 0;
-+	int cmp2 = bkey_cmp(bkey_start_pos(k),
-+			    bkey_start_pos(m)) > 0;
-+
-+	return (cmp1 << 1) + cmp2;
-+}
-+
-+/* Btree key iteration */
-+
-+void bch2_btree_node_iter_push(struct btree_node_iter *, struct btree *,
-+			      const struct bkey_packed *,
-+			      const struct bkey_packed *);
-+void bch2_btree_node_iter_init(struct btree_node_iter *, struct btree *,
-+			       struct bpos *);
-+void bch2_btree_node_iter_init_from_start(struct btree_node_iter *,
-+					  struct btree *);
-+struct bkey_packed *bch2_btree_node_iter_bset_pos(struct btree_node_iter *,
-+						 struct btree *,
-+						 struct bset_tree *);
-+
-+void bch2_btree_node_iter_sort(struct btree_node_iter *, struct btree *);
-+void bch2_btree_node_iter_set_drop(struct btree_node_iter *,
-+				   struct btree_node_iter_set *);
-+void bch2_btree_node_iter_advance(struct btree_node_iter *, struct btree *);
-+
-+#define btree_node_iter_for_each(_iter, _set)				\
-+	for (_set = (_iter)->data;					\
-+	     _set < (_iter)->data + ARRAY_SIZE((_iter)->data) &&	\
-+	     (_set)->k != (_set)->end;					\
-+	     _set++)
-+
-+static inline bool __btree_node_iter_set_end(struct btree_node_iter *iter,
-+					     unsigned i)
-+{
-+	return iter->data[i].k == iter->data[i].end;
-+}
-+
-+static inline bool bch2_btree_node_iter_end(struct btree_node_iter *iter)
-+{
-+	return __btree_node_iter_set_end(iter, 0);
-+}
-+
-+/*
-+ * When keys compare equal, deleted keys compare first:
-+ *
-+ * XXX: only need to compare pointers for keys that are both within a
-+ * btree_node_iterator - we need to break ties for prev() to work correctly
-+ */
-+static inline int bkey_iter_cmp(const struct btree *b,
-+				const struct bkey_packed *l,
-+				const struct bkey_packed *r)
-+{
-+	return bkey_cmp_packed(b, l, r)
-+		?: (int) bkey_deleted(r) - (int) bkey_deleted(l)
-+		?: cmp_int(l, r);
-+}
-+
-+static inline int btree_node_iter_cmp(const struct btree *b,
-+				      struct btree_node_iter_set l,
-+				      struct btree_node_iter_set r)
-+{
-+	return bkey_iter_cmp(b,
-+			__btree_node_offset_to_key(b, l.k),
-+			__btree_node_offset_to_key(b, r.k));
-+}
-+
-+/* These assume r (the search key) is not a deleted key: */
-+static inline int bkey_iter_pos_cmp(const struct btree *b,
-+			const struct bkey_packed *l,
-+			const struct bpos *r)
-+{
-+	return bkey_cmp_left_packed(b, l, r)
-+		?: -((int) bkey_deleted(l));
-+}
-+
-+static inline int bkey_iter_cmp_p_or_unp(const struct btree *b,
-+				    const struct bkey_packed *l,
-+				    const struct bkey_packed *r_packed,
-+				    const struct bpos *r)
-+{
-+	return bkey_cmp_p_or_unp(b, l, r_packed, r)
-+		?: -((int) bkey_deleted(l));
-+}
-+
-+static inline struct bkey_packed *
-+__bch2_btree_node_iter_peek_all(struct btree_node_iter *iter,
-+				struct btree *b)
-+{
-+	return __btree_node_offset_to_key(b, iter->data->k);
-+}
-+
-+static inline struct bkey_packed *
-+bch2_btree_node_iter_peek_filter(struct btree_node_iter *iter,
-+				 struct btree *b,
-+				 unsigned min_key_type)
-+{
-+	while (!bch2_btree_node_iter_end(iter)) {
-+		struct bkey_packed *k = __bch2_btree_node_iter_peek_all(iter, b);
-+
-+		if (k->type >= min_key_type)
-+			return k;
-+
-+		bch2_btree_node_iter_advance(iter, b);
-+	}
-+
-+	return NULL;
-+}
-+
-+static inline struct bkey_packed *
-+bch2_btree_node_iter_peek_all(struct btree_node_iter *iter,
-+			      struct btree *b)
-+{
-+	return bch2_btree_node_iter_peek_filter(iter, b, 0);
-+}
-+
-+static inline struct bkey_packed *
-+bch2_btree_node_iter_peek(struct btree_node_iter *iter, struct btree *b)
-+{
-+	return bch2_btree_node_iter_peek_filter(iter, b, KEY_TYPE_discard + 1);
-+}
-+
-+static inline struct bkey_packed *
-+bch2_btree_node_iter_next_all(struct btree_node_iter *iter, struct btree *b)
-+{
-+	struct bkey_packed *ret = bch2_btree_node_iter_peek_all(iter, b);
-+
-+	if (ret)
-+		bch2_btree_node_iter_advance(iter, b);
-+
-+	return ret;
-+}
-+
-+struct bkey_packed *bch2_btree_node_iter_prev_all(struct btree_node_iter *,
-+						  struct btree *);
-+struct bkey_packed *bch2_btree_node_iter_prev_filter(struct btree_node_iter *,
-+						     struct btree *, unsigned);
-+
-+static inline struct bkey_packed *
-+bch2_btree_node_iter_prev(struct btree_node_iter *iter, struct btree *b)
-+{
-+	return bch2_btree_node_iter_prev_filter(iter, b, KEY_TYPE_discard + 1);
-+}
-+
-+struct bkey_s_c bch2_btree_node_iter_peek_unpack(struct btree_node_iter *,
-+						struct btree *,
-+						struct bkey *);
-+
-+#define for_each_btree_node_key_unpack(b, k, iter, unpacked)		\
-+	for (bch2_btree_node_iter_init_from_start((iter), (b));		\
-+	     (k = bch2_btree_node_iter_peek_unpack((iter), (b), (unpacked))).k;\
-+	     bch2_btree_node_iter_advance(iter, b))
-+
-+/* Accounting: */
-+
-+static inline void btree_keys_account_key(struct btree_nr_keys *n,
-+					  unsigned bset,
-+					  struct bkey_packed *k,
-+					  int sign)
-+{
-+	n->live_u64s		+= k->u64s * sign;
-+	n->bset_u64s[bset]	+= k->u64s * sign;
-+
-+	if (bkey_packed(k))
-+		n->packed_keys	+= sign;
-+	else
-+		n->unpacked_keys += sign;
-+}
-+
-+static inline void btree_keys_account_val_delta(struct btree *b,
-+						struct bkey_packed *k,
-+						int delta)
-+{
-+	struct bset_tree *t = bch2_bkey_to_bset(b, k);
-+
-+	b->nr.live_u64s			+= delta;
-+	b->nr.bset_u64s[t - b->set]	+= delta;
-+}
-+
-+#define btree_keys_account_key_add(_nr, _bset_idx, _k)		\
-+	btree_keys_account_key(_nr, _bset_idx, _k, 1)
-+#define btree_keys_account_key_drop(_nr, _bset_idx, _k)	\
-+	btree_keys_account_key(_nr, _bset_idx, _k, -1)
-+
-+#define btree_account_key_add(_b, _k)				\
-+	btree_keys_account_key(&(_b)->nr,			\
-+		bch2_bkey_to_bset(_b, _k) - (_b)->set, _k, 1)
-+#define btree_account_key_drop(_b, _k)				\
-+	btree_keys_account_key(&(_b)->nr,			\
-+		bch2_bkey_to_bset(_b, _k) - (_b)->set, _k, -1)
-+
-+struct bset_stats {
-+	struct {
-+		size_t nr, bytes;
-+	} sets[BSET_TREE_NR_TYPES];
-+
-+	size_t floats;
-+	size_t failed;
-+};
-+
-+void bch2_btree_keys_stats(struct btree *, struct bset_stats *);
-+void bch2_bfloat_to_text(struct printbuf *, struct btree *,
-+			 struct bkey_packed *);
-+
-+/* Debug stuff */
-+
-+void bch2_dump_bset(struct bch_fs *, struct btree *, struct bset *, unsigned);
-+void bch2_dump_btree_node(struct bch_fs *, struct btree *);
-+void bch2_dump_btree_node_iter(struct btree *, struct btree_node_iter *);
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+
-+void __bch2_verify_btree_nr_keys(struct btree *);
-+void bch2_btree_node_iter_verify(struct btree_node_iter *, struct btree *);
-+void bch2_verify_insert_pos(struct btree *, struct bkey_packed *,
-+			    struct bkey_packed *, unsigned);
-+
-+#else
-+
-+static inline void __bch2_verify_btree_nr_keys(struct btree *b) {}
-+static inline void bch2_btree_node_iter_verify(struct btree_node_iter *iter,
-+					      struct btree *b) {}
-+static inline void bch2_verify_insert_pos(struct btree *b,
-+					  struct bkey_packed *where,
-+					  struct bkey_packed *insert,
-+					  unsigned clobber_u64s) {}
-+#endif
-+
-+static inline void bch2_verify_btree_nr_keys(struct btree *b)
-+{
-+	if (btree_keys_expensive_checks(b))
-+		__bch2_verify_btree_nr_keys(b);
-+}
-+
-+#endif /* _BCACHEFS_BSET_H */
-diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c
-new file mode 100644
-index 000000000000..d3addd3a8964
---- /dev/null
-+++ b/fs/bcachefs/btree_cache.c
-@@ -0,0 +1,1054 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "btree_cache.h"
-+#include "btree_io.h"
-+#include "btree_iter.h"
-+#include "btree_locking.h"
-+#include "debug.h"
-+
-+#include <linux/prefetch.h>
-+#include <linux/sched/mm.h>
-+#include <trace/events/bcachefs.h>
-+
-+const char * const bch2_btree_ids[] = {
-+#define x(kwd, val, name) name,
-+	BCH_BTREE_IDS()
-+#undef x
-+	NULL
-+};
-+
-+void bch2_recalc_btree_reserve(struct bch_fs *c)
-+{
-+	unsigned i, reserve = 16;
-+
-+	if (!c->btree_roots[0].b)
-+		reserve += 8;
-+
-+	for (i = 0; i < BTREE_ID_NR; i++)
-+		if (c->btree_roots[i].b)
-+			reserve += min_t(unsigned, 1,
-+					 c->btree_roots[i].b->c.level) * 8;
-+
-+	c->btree_cache.reserve = reserve;
-+}
-+
-+static inline unsigned btree_cache_can_free(struct btree_cache *bc)
-+{
-+	return max_t(int, 0, bc->used - bc->reserve);
-+}
-+
-+static void __btree_node_data_free(struct bch_fs *c, struct btree *b)
-+{
-+	EBUG_ON(btree_node_write_in_flight(b));
-+
-+	kvpfree(b->data, btree_bytes(c));
-+	b->data = NULL;
-+	bch2_btree_keys_free(b);
-+}
-+
-+static void btree_node_data_free(struct bch_fs *c, struct btree *b)
-+{
-+	struct btree_cache *bc = &c->btree_cache;
-+
-+	__btree_node_data_free(c, b);
-+	bc->used--;
-+	list_move(&b->list, &bc->freed);
-+}
-+
-+static int bch2_btree_cache_cmp_fn(struct rhashtable_compare_arg *arg,
-+				   const void *obj)
-+{
-+	const struct btree *b = obj;
-+	const u64 *v = arg->key;
-+
-+	return b->hash_val == *v ? 0 : 1;
-+}
-+
-+static const struct rhashtable_params bch_btree_cache_params = {
-+	.head_offset	= offsetof(struct btree, hash),
-+	.key_offset	= offsetof(struct btree, hash_val),
-+	.key_len	= sizeof(u64),
-+	.obj_cmpfn	= bch2_btree_cache_cmp_fn,
-+};
-+
-+static int __btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
-+{
-+	BUG_ON(b->data || b->aux_data);
-+
-+	b->data = kvpmalloc(btree_bytes(c), gfp);
-+	if (!b->data)
-+		return -ENOMEM;
-+
-+	if (bch2_btree_keys_alloc(b, btree_page_order(c), gfp)) {
-+		kvpfree(b->data, btree_bytes(c));
-+		b->data = NULL;
-+		return -ENOMEM;
-+	}
-+
-+	return 0;
-+}
-+
-+static void btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
-+{
-+	struct btree_cache *bc = &c->btree_cache;
-+
-+	if (!__btree_node_data_alloc(c, b, gfp)) {
-+		bc->used++;
-+		list_move(&b->list, &bc->freeable);
-+	} else {
-+		list_move(&b->list, &bc->freed);
-+	}
-+}
-+
-+static struct btree *btree_node_mem_alloc(struct bch_fs *c, gfp_t gfp)
-+{
-+	struct btree *b = kzalloc(sizeof(struct btree), gfp);
-+	if (!b)
-+		return NULL;
-+
-+	bkey_btree_ptr_init(&b->key);
-+	six_lock_init(&b->c.lock);
-+	INIT_LIST_HEAD(&b->list);
-+	INIT_LIST_HEAD(&b->write_blocked);
-+
-+	btree_node_data_alloc(c, b, gfp);
-+	return b->data ? b : NULL;
-+}
-+
-+/* Btree in memory cache - hash table */
-+
-+void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
-+{
-+	rhashtable_remove_fast(&bc->table, &b->hash, bch_btree_cache_params);
-+
-+	/* Cause future lookups for this node to fail: */
-+	b->hash_val = 0;
-+}
-+
-+int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b)
-+{
-+	BUG_ON(b->hash_val);
-+	b->hash_val = btree_ptr_hash_val(&b->key);
-+
-+	return rhashtable_lookup_insert_fast(&bc->table, &b->hash,
-+					     bch_btree_cache_params);
-+}
-+
-+int bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b,
-+				unsigned level, enum btree_id id)
-+{
-+	int ret;
-+
-+	b->c.level	= level;
-+	b->c.btree_id	= id;
-+
-+	mutex_lock(&bc->lock);
-+	ret = __bch2_btree_node_hash_insert(bc, b);
-+	if (!ret)
-+		list_add(&b->list, &bc->live);
-+	mutex_unlock(&bc->lock);
-+
-+	return ret;
-+}
-+
-+__flatten
-+static inline struct btree *btree_cache_find(struct btree_cache *bc,
-+				     const struct bkey_i *k)
-+{
-+	u64 v = btree_ptr_hash_val(k);
-+
-+	return rhashtable_lookup_fast(&bc->table, &v, bch_btree_cache_params);
-+}
-+
-+/*
-+ * this version is for btree nodes that have already been freed (we're not
-+ * reaping a real btree node)
-+ */
-+static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
-+{
-+	struct btree_cache *bc = &c->btree_cache;
-+	int ret = 0;
-+
-+	lockdep_assert_held(&bc->lock);
-+
-+	if (!six_trylock_intent(&b->c.lock))
-+		return -ENOMEM;
-+
-+	if (!six_trylock_write(&b->c.lock))
-+		goto out_unlock_intent;
-+
-+	if (btree_node_noevict(b))
-+		goto out_unlock;
-+
-+	if (!btree_node_may_write(b))
-+		goto out_unlock;
-+
-+	if (btree_node_dirty(b) &&
-+	    test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags))
-+		goto out_unlock;
-+
-+	if (btree_node_dirty(b) ||
-+	    btree_node_write_in_flight(b) ||
-+	    btree_node_read_in_flight(b)) {
-+		if (!flush)
-+			goto out_unlock;
-+
-+		wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight,
-+			       TASK_UNINTERRUPTIBLE);
-+
-+		/*
-+		 * Using the underscore version because we don't want to compact
-+		 * bsets after the write, since this node is about to be evicted
-+		 * - unless btree verify mode is enabled, since it runs out of
-+		 * the post write cleanup:
-+		 */
-+		if (verify_btree_ondisk(c))
-+			bch2_btree_node_write(c, b, SIX_LOCK_intent);
-+		else
-+			__bch2_btree_node_write(c, b, SIX_LOCK_read);
-+
-+		/* wait for any in flight btree write */
-+		btree_node_wait_on_io(b);
-+	}
-+out:
-+	if (b->hash_val && !ret)
-+		trace_btree_node_reap(c, b);
-+	return ret;
-+out_unlock:
-+	six_unlock_write(&b->c.lock);
-+out_unlock_intent:
-+	six_unlock_intent(&b->c.lock);
-+	ret = -ENOMEM;
-+	goto out;
-+}
-+
-+static int btree_node_reclaim(struct bch_fs *c, struct btree *b)
-+{
-+	return __btree_node_reclaim(c, b, false);
-+}
-+
-+static int btree_node_write_and_reclaim(struct bch_fs *c, struct btree *b)
-+{
-+	return __btree_node_reclaim(c, b, true);
-+}
-+
-+static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
-+					   struct shrink_control *sc)
-+{
-+	struct bch_fs *c = container_of(shrink, struct bch_fs,
-+					btree_cache.shrink);
-+	struct btree_cache *bc = &c->btree_cache;
-+	struct btree *b, *t;
-+	unsigned long nr = sc->nr_to_scan;
-+	unsigned long can_free;
-+	unsigned long touched = 0;
-+	unsigned long freed = 0;
-+	unsigned i;
-+
-+	if (btree_shrinker_disabled(c))
-+		return SHRINK_STOP;
-+
-+	/* Return -1 if we can't do anything right now */
-+	if (sc->gfp_mask & __GFP_FS)
-+		mutex_lock(&bc->lock);
-+	else if (!mutex_trylock(&bc->lock))
-+		return -1;
-+
-+	/*
-+	 * It's _really_ critical that we don't free too many btree nodes - we
-+	 * have to always leave ourselves a reserve. The reserve is how we
-+	 * guarantee that allocating memory for a new btree node can always
-+	 * succeed, so that inserting keys into the btree can always succeed and
-+	 * IO can always make forward progress:
-+	 */
-+	nr /= btree_pages(c);
-+	can_free = btree_cache_can_free(bc);
-+	nr = min_t(unsigned long, nr, can_free);
-+
-+	i = 0;
-+	list_for_each_entry_safe(b, t, &bc->freeable, list) {
-+		touched++;
-+
-+		if (freed >= nr)
-+			break;
-+
-+		if (++i > 3 &&
-+		    !btree_node_reclaim(c, b)) {
-+			btree_node_data_free(c, b);
-+			six_unlock_write(&b->c.lock);
-+			six_unlock_intent(&b->c.lock);
-+			freed++;
-+		}
-+	}
-+restart:
-+	list_for_each_entry_safe(b, t, &bc->live, list) {
-+		touched++;
-+
-+		if (freed >= nr) {
-+			/* Save position */
-+			if (&t->list != &bc->live)
-+				list_move_tail(&bc->live, &t->list);
-+			break;
-+		}
-+
-+		if (!btree_node_accessed(b) &&
-+		    !btree_node_reclaim(c, b)) {
-+			/* can't call bch2_btree_node_hash_remove under lock  */
-+			freed++;
-+			if (&t->list != &bc->live)
-+				list_move_tail(&bc->live, &t->list);
-+
-+			btree_node_data_free(c, b);
-+			mutex_unlock(&bc->lock);
-+
-+			bch2_btree_node_hash_remove(bc, b);
-+			six_unlock_write(&b->c.lock);
-+			six_unlock_intent(&b->c.lock);
-+
-+			if (freed >= nr)
-+				goto out;
-+
-+			if (sc->gfp_mask & __GFP_FS)
-+				mutex_lock(&bc->lock);
-+			else if (!mutex_trylock(&bc->lock))
-+				goto out;
-+			goto restart;
-+		} else
-+			clear_btree_node_accessed(b);
-+	}
-+
-+	mutex_unlock(&bc->lock);
-+out:
-+	return (unsigned long) freed * btree_pages(c);
-+}
-+
-+static unsigned long bch2_btree_cache_count(struct shrinker *shrink,
-+					    struct shrink_control *sc)
-+{
-+	struct bch_fs *c = container_of(shrink, struct bch_fs,
-+					btree_cache.shrink);
-+	struct btree_cache *bc = &c->btree_cache;
-+
-+	if (btree_shrinker_disabled(c))
-+		return 0;
-+
-+	return btree_cache_can_free(bc) * btree_pages(c);
-+}
-+
-+void bch2_fs_btree_cache_exit(struct bch_fs *c)
-+{
-+	struct btree_cache *bc = &c->btree_cache;
-+	struct btree *b;
-+	unsigned i;
-+
-+	if (bc->shrink.list.next)
-+		unregister_shrinker(&bc->shrink);
-+
-+	mutex_lock(&bc->lock);
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	if (c->verify_data)
-+		list_move(&c->verify_data->list, &bc->live);
-+
-+	kvpfree(c->verify_ondisk, btree_bytes(c));
-+#endif
-+
-+	for (i = 0; i < BTREE_ID_NR; i++)
-+		if (c->btree_roots[i].b)
-+			list_add(&c->btree_roots[i].b->list, &bc->live);
-+
-+	list_splice(&bc->freeable, &bc->live);
-+
-+	while (!list_empty(&bc->live)) {
-+		b = list_first_entry(&bc->live, struct btree, list);
-+
-+		BUG_ON(btree_node_read_in_flight(b) ||
-+		       btree_node_write_in_flight(b));
-+
-+		if (btree_node_dirty(b))
-+			bch2_btree_complete_write(c, b, btree_current_write(b));
-+		clear_btree_node_dirty(b);
-+
-+		btree_node_data_free(c, b);
-+	}
-+
-+	while (!list_empty(&bc->freed)) {
-+		b = list_first_entry(&bc->freed, struct btree, list);
-+		list_del(&b->list);
-+		kfree(b);
-+	}
-+
-+	mutex_unlock(&bc->lock);
-+
-+	if (bc->table_init_done)
-+		rhashtable_destroy(&bc->table);
-+}
-+
-+int bch2_fs_btree_cache_init(struct bch_fs *c)
-+{
-+	struct btree_cache *bc = &c->btree_cache;
-+	unsigned i;
-+	int ret = 0;
-+
-+	pr_verbose_init(c->opts, "");
-+
-+	ret = rhashtable_init(&bc->table, &bch_btree_cache_params);
-+	if (ret)
-+		goto out;
-+
-+	bc->table_init_done = true;
-+
-+	bch2_recalc_btree_reserve(c);
-+
-+	for (i = 0; i < bc->reserve; i++)
-+		if (!btree_node_mem_alloc(c, GFP_KERNEL)) {
-+			ret = -ENOMEM;
-+			goto out;
-+		}
-+
-+	list_splice_init(&bc->live, &bc->freeable);
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	mutex_init(&c->verify_lock);
-+
-+	c->verify_ondisk = kvpmalloc(btree_bytes(c), GFP_KERNEL);
-+	if (!c->verify_ondisk) {
-+		ret = -ENOMEM;
-+		goto out;
-+	}
-+
-+	c->verify_data = btree_node_mem_alloc(c, GFP_KERNEL);
-+	if (!c->verify_data) {
-+		ret = -ENOMEM;
-+		goto out;
-+	}
-+
-+	list_del_init(&c->verify_data->list);
-+#endif
-+
-+	bc->shrink.count_objects	= bch2_btree_cache_count;
-+	bc->shrink.scan_objects		= bch2_btree_cache_scan;
-+	bc->shrink.seeks		= 4;
-+	bc->shrink.batch		= btree_pages(c) * 2;
-+	register_shrinker(&bc->shrink);
-+out:
-+	pr_verbose_init(c->opts, "ret %i", ret);
-+	return ret;
-+}
-+
-+void bch2_fs_btree_cache_init_early(struct btree_cache *bc)
-+{
-+	mutex_init(&bc->lock);
-+	INIT_LIST_HEAD(&bc->live);
-+	INIT_LIST_HEAD(&bc->freeable);
-+	INIT_LIST_HEAD(&bc->freed);
-+}
-+
-+/*
-+ * We can only have one thread cannibalizing other cached btree nodes at a time,
-+ * or we'll deadlock. We use an open coded mutex to ensure that, which a
-+ * cannibalize_bucket() will take. This means every time we unlock the root of
-+ * the btree, we need to release this lock if we have it held.
-+ */
-+void bch2_btree_cache_cannibalize_unlock(struct bch_fs *c)
-+{
-+	struct btree_cache *bc = &c->btree_cache;
-+
-+	if (bc->alloc_lock == current) {
-+		trace_btree_node_cannibalize_unlock(c);
-+		bc->alloc_lock = NULL;
-+		closure_wake_up(&bc->alloc_wait);
-+	}
-+}
-+
-+int bch2_btree_cache_cannibalize_lock(struct bch_fs *c, struct closure *cl)
-+{
-+	struct btree_cache *bc = &c->btree_cache;
-+	struct task_struct *old;
-+
-+	old = cmpxchg(&bc->alloc_lock, NULL, current);
-+	if (old == NULL || old == current)
-+		goto success;
-+
-+	if (!cl) {
-+		trace_btree_node_cannibalize_lock_fail(c);
-+		return -ENOMEM;
-+	}
-+
-+	closure_wait(&bc->alloc_wait, cl);
-+
-+	/* Try again, after adding ourselves to waitlist */
-+	old = cmpxchg(&bc->alloc_lock, NULL, current);
-+	if (old == NULL || old == current) {
-+		/* We raced */
-+		closure_wake_up(&bc->alloc_wait);
-+		goto success;
-+	}
-+
-+	trace_btree_node_cannibalize_lock_fail(c);
-+	return -EAGAIN;
-+
-+success:
-+	trace_btree_node_cannibalize_lock(c);
-+	return 0;
-+}
-+
-+static struct btree *btree_node_cannibalize(struct bch_fs *c)
-+{
-+	struct btree_cache *bc = &c->btree_cache;
-+	struct btree *b;
-+
-+	list_for_each_entry_reverse(b, &bc->live, list)
-+		if (!btree_node_reclaim(c, b))
-+			return b;
-+
-+	while (1) {
-+		list_for_each_entry_reverse(b, &bc->live, list)
-+			if (!btree_node_write_and_reclaim(c, b))
-+				return b;
-+
-+		/*
-+		 * Rare case: all nodes were intent-locked.
-+		 * Just busy-wait.
-+		 */
-+		WARN_ONCE(1, "btree cache cannibalize failed\n");
-+		cond_resched();
-+	}
-+}
-+
-+struct btree *bch2_btree_node_mem_alloc(struct bch_fs *c)
-+{
-+	struct btree_cache *bc = &c->btree_cache;
-+	struct btree *b;
-+	u64 start_time = local_clock();
-+	unsigned flags;
-+
-+	flags = memalloc_nofs_save();
-+	mutex_lock(&bc->lock);
-+
-+	/*
-+	 * btree_free() doesn't free memory; it sticks the node on the end of
-+	 * the list. Check if there's any freed nodes there:
-+	 */
-+	list_for_each_entry(b, &bc->freeable, list)
-+		if (!btree_node_reclaim(c, b))
-+			goto got_node;
-+
-+	/*
-+	 * We never free struct btree itself, just the memory that holds the on
-+	 * disk node. Check the freed list before allocating a new one:
-+	 */
-+	list_for_each_entry(b, &bc->freed, list)
-+		if (!btree_node_reclaim(c, b))
-+			goto got_node;
-+
-+	b = NULL;
-+got_node:
-+	if (b)
-+		list_del_init(&b->list);
-+	mutex_unlock(&bc->lock);
-+
-+	if (!b) {
-+		b = kzalloc(sizeof(struct btree), GFP_KERNEL);
-+		if (!b)
-+			goto err;
-+
-+		bkey_btree_ptr_init(&b->key);
-+		six_lock_init(&b->c.lock);
-+		INIT_LIST_HEAD(&b->list);
-+		INIT_LIST_HEAD(&b->write_blocked);
-+
-+		BUG_ON(!six_trylock_intent(&b->c.lock));
-+		BUG_ON(!six_trylock_write(&b->c.lock));
-+	}
-+
-+	if (!b->data) {
-+		if (__btree_node_data_alloc(c, b, __GFP_NOWARN|GFP_KERNEL))
-+			goto err;
-+
-+		mutex_lock(&bc->lock);
-+		bc->used++;
-+		mutex_unlock(&bc->lock);
-+	}
-+
-+	BUG_ON(btree_node_hashed(b));
-+	BUG_ON(btree_node_write_in_flight(b));
-+out:
-+	b->flags		= 0;
-+	b->written		= 0;
-+	b->nsets		= 0;
-+	b->sib_u64s[0]		= 0;
-+	b->sib_u64s[1]		= 0;
-+	b->whiteout_u64s	= 0;
-+	bch2_btree_keys_init(b, &c->expensive_debug_checks);
-+
-+	bch2_time_stats_update(&c->times[BCH_TIME_btree_node_mem_alloc],
-+			       start_time);
-+
-+	memalloc_nofs_restore(flags);
-+	return b;
-+err:
-+	mutex_lock(&bc->lock);
-+
-+	if (b) {
-+		list_add(&b->list, &bc->freed);
-+		six_unlock_write(&b->c.lock);
-+		six_unlock_intent(&b->c.lock);
-+	}
-+
-+	/* Try to cannibalize another cached btree node: */
-+	if (bc->alloc_lock == current) {
-+		b = btree_node_cannibalize(c);
-+		list_del_init(&b->list);
-+		mutex_unlock(&bc->lock);
-+
-+		bch2_btree_node_hash_remove(bc, b);
-+
-+		trace_btree_node_cannibalize(c);
-+		goto out;
-+	}
-+
-+	mutex_unlock(&bc->lock);
-+	memalloc_nofs_restore(flags);
-+	return ERR_PTR(-ENOMEM);
-+}
-+
-+/* Slowpath, don't want it inlined into btree_iter_traverse() */
-+static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
-+				struct btree_iter *iter,
-+				const struct bkey_i *k,
-+				enum btree_id btree_id,
-+				unsigned level,
-+				enum six_lock_type lock_type,
-+				bool sync)
-+{
-+	struct btree_cache *bc = &c->btree_cache;
-+	struct btree *b;
-+
-+	BUG_ON(level + 1 >= BTREE_MAX_DEPTH);
-+	/*
-+	 * Parent node must be locked, else we could read in a btree node that's
-+	 * been freed:
-+	 */
-+	if (iter && !bch2_btree_node_relock(iter, level + 1))
-+		return ERR_PTR(-EINTR);
-+
-+	b = bch2_btree_node_mem_alloc(c);
-+	if (IS_ERR(b))
-+		return b;
-+
-+	bkey_copy(&b->key, k);
-+	if (bch2_btree_node_hash_insert(bc, b, level, btree_id)) {
-+		/* raced with another fill: */
-+
-+		/* mark as unhashed... */
-+		b->hash_val = 0;
-+
-+		mutex_lock(&bc->lock);
-+		list_add(&b->list, &bc->freeable);
-+		mutex_unlock(&bc->lock);
-+
-+		six_unlock_write(&b->c.lock);
-+		six_unlock_intent(&b->c.lock);
-+		return NULL;
-+	}
-+
-+	/*
-+	 * Unlock before doing IO:
-+	 *
-+	 * XXX: ideally should be dropping all btree node locks here
-+	 */
-+	if (iter && btree_node_read_locked(iter, level + 1))
-+		btree_node_unlock(iter, level + 1);
-+
-+	bch2_btree_node_read(c, b, sync);
-+
-+	six_unlock_write(&b->c.lock);
-+
-+	if (!sync) {
-+		six_unlock_intent(&b->c.lock);
-+		return NULL;
-+	}
-+
-+	if (lock_type == SIX_LOCK_read)
-+		six_lock_downgrade(&b->c.lock);
-+
-+	return b;
-+}
-+
-+static int lock_node_check_fn(struct six_lock *lock, void *p)
-+{
-+	struct btree *b = container_of(lock, struct btree, c.lock);
-+	const struct bkey_i *k = p;
-+
-+	return b->hash_val == btree_ptr_hash_val(k) ? 0 : -1;
-+}
-+
-+/**
-+ * bch_btree_node_get - find a btree node in the cache and lock it, reading it
-+ * in from disk if necessary.
-+ *
-+ * If IO is necessary and running under generic_make_request, returns -EAGAIN.
-+ *
-+ * The btree node will have either a read or a write lock held, depending on
-+ * the @write parameter.
-+ */
-+struct btree *bch2_btree_node_get(struct bch_fs *c, struct btree_iter *iter,
-+				  const struct bkey_i *k, unsigned level,
-+				  enum six_lock_type lock_type)
-+{
-+	struct btree_cache *bc = &c->btree_cache;
-+	struct btree *b;
-+	struct bset_tree *t;
-+
-+	EBUG_ON(level >= BTREE_MAX_DEPTH);
-+
-+	b = btree_node_mem_ptr(k);
-+	if (b)
-+		goto lock_node;
-+retry:
-+	b = btree_cache_find(bc, k);
-+	if (unlikely(!b)) {
-+		/*
-+		 * We must have the parent locked to call bch2_btree_node_fill(),
-+		 * else we could read in a btree node from disk that's been
-+		 * freed:
-+		 */
-+		b = bch2_btree_node_fill(c, iter, k, iter->btree_id,
-+					 level, lock_type, true);
-+
-+		/* We raced and found the btree node in the cache */
-+		if (!b)
-+			goto retry;
-+
-+		if (IS_ERR(b))
-+			return b;
-+	} else {
-+lock_node:
-+		/*
-+		 * There's a potential deadlock with splits and insertions into
-+		 * interior nodes we have to avoid:
-+		 *
-+		 * The other thread might be holding an intent lock on the node
-+		 * we want, and they want to update its parent node so they're
-+		 * going to upgrade their intent lock on the parent node to a
-+		 * write lock.
-+		 *
-+		 * But if we're holding a read lock on the parent, and we're
-+		 * trying to get the intent lock they're holding, we deadlock.
-+		 *
-+		 * So to avoid this we drop the read locks on parent nodes when
-+		 * we're starting to take intent locks - and handle the race.
-+		 *
-+		 * The race is that they might be about to free the node we
-+		 * want, and dropping our read lock on the parent node lets them
-+		 * update the parent marking the node we want as freed, and then
-+		 * free it:
-+		 *
-+		 * To guard against this, btree nodes are evicted from the cache
-+		 * when they're freed - and b->hash_val is zeroed out, which we
-+		 * check for after we lock the node.
-+		 *
-+		 * Then, bch2_btree_node_relock() on the parent will fail - because
-+		 * the parent was modified, when the pointer to the node we want
-+		 * was removed - and we'll bail out:
-+		 */
-+		if (btree_node_read_locked(iter, level + 1))
-+			btree_node_unlock(iter, level + 1);
-+
-+		if (!btree_node_lock(b, k->k.p, level, iter, lock_type,
-+				     lock_node_check_fn, (void *) k)) {
-+			if (b->hash_val != btree_ptr_hash_val(k))
-+				goto retry;
-+			return ERR_PTR(-EINTR);
-+		}
-+
-+		if (unlikely(b->hash_val != btree_ptr_hash_val(k) ||
-+			     b->c.level != level ||
-+			     race_fault())) {
-+			six_unlock_type(&b->c.lock, lock_type);
-+			if (bch2_btree_node_relock(iter, level + 1))
-+				goto retry;
-+
-+			trace_trans_restart_btree_node_reused(iter->trans->ip);
-+			return ERR_PTR(-EINTR);
-+		}
-+	}
-+
-+	/* XXX: waiting on IO with btree locks held: */
-+	wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight,
-+		       TASK_UNINTERRUPTIBLE);
-+
-+	prefetch(b->aux_data);
-+
-+	for_each_bset(b, t) {
-+		void *p = (u64 *) b->aux_data + t->aux_data_offset;
-+
-+		prefetch(p + L1_CACHE_BYTES * 0);
-+		prefetch(p + L1_CACHE_BYTES * 1);
-+		prefetch(p + L1_CACHE_BYTES * 2);
-+	}
-+
-+	/* avoid atomic set bit if it's not needed: */
-+	if (!btree_node_accessed(b))
-+		set_btree_node_accessed(b);
-+
-+	if (unlikely(btree_node_read_error(b))) {
-+		six_unlock_type(&b->c.lock, lock_type);
-+		return ERR_PTR(-EIO);
-+	}
-+
-+	EBUG_ON(b->c.btree_id != iter->btree_id ||
-+		BTREE_NODE_LEVEL(b->data) != level ||
-+		bkey_cmp(b->data->max_key, k->k.p));
-+
-+	return b;
-+}
-+
-+struct btree *bch2_btree_node_get_noiter(struct bch_fs *c,
-+					 const struct bkey_i *k,
-+					 enum btree_id btree_id,
-+					 unsigned level)
-+{
-+	struct btree_cache *bc = &c->btree_cache;
-+	struct btree *b;
-+	struct bset_tree *t;
-+	int ret;
-+
-+	EBUG_ON(level >= BTREE_MAX_DEPTH);
-+
-+	b = btree_node_mem_ptr(k);
-+	if (b)
-+		goto lock_node;
-+retry:
-+	b = btree_cache_find(bc, k);
-+	if (unlikely(!b)) {
-+		b = bch2_btree_node_fill(c, NULL, k, btree_id,
-+					 level, SIX_LOCK_read, true);
-+
-+		/* We raced and found the btree node in the cache */
-+		if (!b)
-+			goto retry;
-+
-+		if (IS_ERR(b))
-+			return b;
-+	} else {
-+lock_node:
-+		ret = six_lock_read(&b->c.lock, lock_node_check_fn, (void *) k);
-+		if (ret)
-+			goto retry;
-+
-+		if (unlikely(b->hash_val != btree_ptr_hash_val(k) ||
-+			     b->c.btree_id != btree_id ||
-+			     b->c.level != level)) {
-+			six_unlock_read(&b->c.lock);
-+			goto retry;
-+		}
-+	}
-+
-+	/* XXX: waiting on IO with btree locks held: */
-+	wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight,
-+		       TASK_UNINTERRUPTIBLE);
-+
-+	prefetch(b->aux_data);
-+
-+	for_each_bset(b, t) {
-+		void *p = (u64 *) b->aux_data + t->aux_data_offset;
-+
-+		prefetch(p + L1_CACHE_BYTES * 0);
-+		prefetch(p + L1_CACHE_BYTES * 1);
-+		prefetch(p + L1_CACHE_BYTES * 2);
-+	}
-+
-+	/* avoid atomic set bit if it's not needed: */
-+	if (!btree_node_accessed(b))
-+		set_btree_node_accessed(b);
-+
-+	if (unlikely(btree_node_read_error(b))) {
-+		six_unlock_read(&b->c.lock);
-+		return ERR_PTR(-EIO);
-+	}
-+
-+	EBUG_ON(b->c.btree_id != btree_id ||
-+		BTREE_NODE_LEVEL(b->data) != level ||
-+		bkey_cmp(b->data->max_key, k->k.p));
-+
-+	return b;
-+}
-+
-+struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
-+					  struct btree_iter *iter,
-+					  struct btree *b,
-+					  enum btree_node_sibling sib)
-+{
-+	struct btree_trans *trans = iter->trans;
-+	struct btree *parent;
-+	struct btree_node_iter node_iter;
-+	struct bkey_packed *k;
-+	BKEY_PADDED(k) tmp;
-+	struct btree *ret = NULL;
-+	unsigned level = b->c.level;
-+
-+	parent = btree_iter_node(iter, level + 1);
-+	if (!parent)
-+		return NULL;
-+
-+	/*
-+	 * There's a corner case where a btree_iter might have a node locked
-+	 * that is just outside its current pos - when
-+	 * bch2_btree_iter_set_pos_same_leaf() gets to the end of the node.
-+	 *
-+	 * But the lock ordering checks in __bch2_btree_node_lock() go off of
-+	 * iter->pos, not the node's key: so if the iterator is marked as
-+	 * needing to be traversed, we risk deadlock if we don't bail out here:
-+	 */
-+	if (iter->uptodate >= BTREE_ITER_NEED_TRAVERSE)
-+		return ERR_PTR(-EINTR);
-+
-+	if (!bch2_btree_node_relock(iter, level + 1)) {
-+		ret = ERR_PTR(-EINTR);
-+		goto out;
-+	}
-+
-+	node_iter = iter->l[parent->c.level].iter;
-+
-+	k = bch2_btree_node_iter_peek_all(&node_iter, parent);
-+	BUG_ON(bkey_cmp_left_packed(parent, k, &b->key.k.p));
-+
-+	k = sib == btree_prev_sib
-+		? bch2_btree_node_iter_prev(&node_iter, parent)
-+		: (bch2_btree_node_iter_advance(&node_iter, parent),
-+		   bch2_btree_node_iter_peek(&node_iter, parent));
-+	if (!k)
-+		goto out;
-+
-+	bch2_bkey_unpack(parent, &tmp.k, k);
-+
-+	ret = bch2_btree_node_get(c, iter, &tmp.k, level,
-+				  SIX_LOCK_intent);
-+
-+	if (PTR_ERR_OR_ZERO(ret) == -EINTR && !trans->nounlock) {
-+		struct btree_iter *linked;
-+
-+		if (!bch2_btree_node_relock(iter, level + 1))
-+			goto out;
-+
-+		/*
-+		 * We might have got -EINTR because trylock failed, and we're
-+		 * holding other locks that would cause us to deadlock:
-+		 */
-+		trans_for_each_iter(trans, linked)
-+			if (btree_iter_cmp(iter, linked) < 0)
-+				__bch2_btree_iter_unlock(linked);
-+
-+		if (sib == btree_prev_sib)
-+			btree_node_unlock(iter, level);
-+
-+		ret = bch2_btree_node_get(c, iter, &tmp.k, level,
-+					  SIX_LOCK_intent);
-+
-+		/*
-+		 * before btree_iter_relock() calls btree_iter_verify_locks():
-+		 */
-+		if (btree_lock_want(iter, level + 1) == BTREE_NODE_UNLOCKED)
-+			btree_node_unlock(iter, level + 1);
-+
-+		if (!bch2_btree_node_relock(iter, level)) {
-+			btree_iter_set_dirty(iter, BTREE_ITER_NEED_RELOCK);
-+
-+			if (!IS_ERR(ret)) {
-+				six_unlock_intent(&ret->c.lock);
-+				ret = ERR_PTR(-EINTR);
-+			}
-+		}
-+
-+		bch2_trans_relock(trans);
-+	}
-+out:
-+	if (btree_lock_want(iter, level + 1) == BTREE_NODE_UNLOCKED)
-+		btree_node_unlock(iter, level + 1);
-+
-+	if (PTR_ERR_OR_ZERO(ret) == -EINTR)
-+		bch2_btree_iter_upgrade(iter, level + 2);
-+
-+	BUG_ON(!IS_ERR(ret) && !btree_node_locked(iter, level));
-+
-+	if (!IS_ERR_OR_NULL(ret)) {
-+		struct btree *n1 = ret, *n2 = b;
-+
-+		if (sib != btree_prev_sib)
-+			swap(n1, n2);
-+
-+		BUG_ON(bkey_cmp(bkey_successor(n1->key.k.p),
-+				n2->data->min_key));
-+	}
-+
-+	bch2_btree_trans_verify_locks(trans);
-+
-+	return ret;
-+}
-+
-+void bch2_btree_node_prefetch(struct bch_fs *c, struct btree_iter *iter,
-+			      const struct bkey_i *k, unsigned level)
-+{
-+	struct btree_cache *bc = &c->btree_cache;
-+	struct btree *b;
-+
-+	BUG_ON(!btree_node_locked(iter, level + 1));
-+	BUG_ON(level >= BTREE_MAX_DEPTH);
-+
-+	b = btree_cache_find(bc, k);
-+	if (b)
-+		return;
-+
-+	bch2_btree_node_fill(c, iter, k, iter->btree_id,
-+			     level, SIX_LOCK_read, false);
-+}
-+
-+void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
-+			     struct btree *b)
-+{
-+	const struct bkey_format *f = &b->format;
-+	struct bset_stats stats;
-+
-+	memset(&stats, 0, sizeof(stats));
-+
-+	bch2_btree_keys_stats(b, &stats);
-+
-+	pr_buf(out,
-+	       "l %u %llu:%llu - %llu:%llu:\n"
-+	       "    ptrs: ",
-+	       b->c.level,
-+	       b->data->min_key.inode,
-+	       b->data->min_key.offset,
-+	       b->data->max_key.inode,
-+	       b->data->max_key.offset);
-+	bch2_val_to_text(out, c, bkey_i_to_s_c(&b->key));
-+	pr_buf(out, "\n"
-+	       "    format: u64s %u fields %u %u %u %u %u\n"
-+	       "    unpack fn len: %u\n"
-+	       "    bytes used %zu/%zu (%zu%% full)\n"
-+	       "    sib u64s: %u, %u (merge threshold %zu)\n"
-+	       "    nr packed keys %u\n"
-+	       "    nr unpacked keys %u\n"
-+	       "    floats %zu\n"
-+	       "    failed unpacked %zu\n",
-+	       f->key_u64s,
-+	       f->bits_per_field[0],
-+	       f->bits_per_field[1],
-+	       f->bits_per_field[2],
-+	       f->bits_per_field[3],
-+	       f->bits_per_field[4],
-+	       b->unpack_fn_len,
-+	       b->nr.live_u64s * sizeof(u64),
-+	       btree_bytes(c) - sizeof(struct btree_node),
-+	       b->nr.live_u64s * 100 / btree_max_u64s(c),
-+	       b->sib_u64s[0],
-+	       b->sib_u64s[1],
-+	       BTREE_FOREGROUND_MERGE_THRESHOLD(c),
-+	       b->nr.packed_keys,
-+	       b->nr.unpacked_keys,
-+	       stats.floats,
-+	       stats.failed);
-+}
-diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h
-new file mode 100644
-index 000000000000..2160012c734f
---- /dev/null
-+++ b/fs/bcachefs/btree_cache.h
-@@ -0,0 +1,109 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BTREE_CACHE_H
-+#define _BCACHEFS_BTREE_CACHE_H
-+
-+#include "bcachefs.h"
-+#include "btree_types.h"
-+
-+struct btree_iter;
-+
-+extern const char * const bch2_btree_ids[];
-+
-+void bch2_recalc_btree_reserve(struct bch_fs *);
-+
-+void bch2_btree_node_hash_remove(struct btree_cache *, struct btree *);
-+int __bch2_btree_node_hash_insert(struct btree_cache *, struct btree *);
-+int bch2_btree_node_hash_insert(struct btree_cache *, struct btree *,
-+				unsigned, enum btree_id);
-+
-+void bch2_btree_cache_cannibalize_unlock(struct bch_fs *);
-+int bch2_btree_cache_cannibalize_lock(struct bch_fs *, struct closure *);
-+
-+struct btree *bch2_btree_node_mem_alloc(struct bch_fs *);
-+
-+struct btree *bch2_btree_node_get(struct bch_fs *, struct btree_iter *,
-+				  const struct bkey_i *, unsigned,
-+				  enum six_lock_type);
-+
-+struct btree *bch2_btree_node_get_noiter(struct bch_fs *, const struct bkey_i *,
-+					 enum btree_id, unsigned);
-+
-+struct btree *bch2_btree_node_get_sibling(struct bch_fs *, struct btree_iter *,
-+				struct btree *, enum btree_node_sibling);
-+
-+void bch2_btree_node_prefetch(struct bch_fs *, struct btree_iter *,
-+			      const struct bkey_i *, unsigned);
-+
-+void bch2_fs_btree_cache_exit(struct bch_fs *);
-+int bch2_fs_btree_cache_init(struct bch_fs *);
-+void bch2_fs_btree_cache_init_early(struct btree_cache *);
-+
-+static inline u64 btree_ptr_hash_val(const struct bkey_i *k)
-+{
-+	switch (k->k.type) {
-+	case KEY_TYPE_btree_ptr:
-+		return *((u64 *) bkey_i_to_btree_ptr_c(k)->v.start);
-+	case KEY_TYPE_btree_ptr_v2:
-+		return bkey_i_to_btree_ptr_v2_c(k)->v.seq;
-+	default:
-+		return 0;
-+	}
-+}
-+
-+static inline struct btree *btree_node_mem_ptr(const struct bkey_i *k)
-+{
-+	return k->k.type == KEY_TYPE_btree_ptr_v2
-+		? (void *)(unsigned long)bkey_i_to_btree_ptr_v2_c(k)->v.mem_ptr
-+		: NULL;
-+}
-+
-+/* is btree node in hash table? */
-+static inline bool btree_node_hashed(struct btree *b)
-+{
-+	return b->hash_val != 0;
-+}
-+
-+#define for_each_cached_btree(_b, _c, _tbl, _iter, _pos)		\
-+	for ((_tbl) = rht_dereference_rcu((_c)->btree_cache.table.tbl,	\
-+					  &(_c)->btree_cache.table),	\
-+	     _iter = 0;	_iter < (_tbl)->size; _iter++)			\
-+		rht_for_each_entry_rcu((_b), (_pos), _tbl, _iter, hash)
-+
-+static inline size_t btree_bytes(struct bch_fs *c)
-+{
-+	return c->opts.btree_node_size << 9;
-+}
-+
-+static inline size_t btree_max_u64s(struct bch_fs *c)
-+{
-+	return (btree_bytes(c) - sizeof(struct btree_node)) / sizeof(u64);
-+}
-+
-+static inline size_t btree_page_order(struct bch_fs *c)
-+{
-+	return get_order(btree_bytes(c));
-+}
-+
-+static inline size_t btree_pages(struct bch_fs *c)
-+{
-+	return 1 << btree_page_order(c);
-+}
-+
-+static inline unsigned btree_blocks(struct bch_fs *c)
-+{
-+	return c->opts.btree_node_size >> c->block_bits;
-+}
-+
-+#define BTREE_SPLIT_THRESHOLD(c)		(btree_max_u64s(c) * 2 / 3)
-+
-+#define BTREE_FOREGROUND_MERGE_THRESHOLD(c)	(btree_max_u64s(c) * 1 / 3)
-+#define BTREE_FOREGROUND_MERGE_HYSTERESIS(c)			\
-+	(BTREE_FOREGROUND_MERGE_THRESHOLD(c) +			\
-+	 (BTREE_FOREGROUND_MERGE_THRESHOLD(c) << 2))
-+
-+#define btree_node_root(_c, _b)	((_c)->btree_roots[(_b)->c.btree_id].b)
-+
-+void bch2_btree_node_to_text(struct printbuf *, struct bch_fs *,
-+			     struct btree *);
-+
-+#endif /* _BCACHEFS_BTREE_CACHE_H */
-diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
-new file mode 100644
-index 000000000000..8771ef1f07cc
---- /dev/null
-+++ b/fs/bcachefs/btree_gc.c
-@@ -0,0 +1,1388 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Copyright (C) 2010 Kent Overstreet <kent.overstreet@gmail.com>
-+ * Copyright (C) 2014 Datera Inc.
-+ */
-+
-+#include "bcachefs.h"
-+#include "alloc_background.h"
-+#include "alloc_foreground.h"
-+#include "bkey_methods.h"
-+#include "btree_locking.h"
-+#include "btree_update_interior.h"
-+#include "btree_io.h"
-+#include "btree_gc.h"
-+#include "buckets.h"
-+#include "clock.h"
-+#include "debug.h"
-+#include "ec.h"
-+#include "error.h"
-+#include "extents.h"
-+#include "journal.h"
-+#include "keylist.h"
-+#include "move.h"
-+#include "recovery.h"
-+#include "replicas.h"
-+#include "super-io.h"
-+
-+#include <linux/slab.h>
-+#include <linux/bitops.h>
-+#include <linux/freezer.h>
-+#include <linux/kthread.h>
-+#include <linux/preempt.h>
-+#include <linux/rcupdate.h>
-+#include <linux/sched/task.h>
-+#include <trace/events/bcachefs.h>
-+
-+static inline void __gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
-+{
-+	write_seqcount_begin(&c->gc_pos_lock);
-+	c->gc_pos = new_pos;
-+	write_seqcount_end(&c->gc_pos_lock);
-+}
-+
-+static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
-+{
-+	BUG_ON(gc_pos_cmp(new_pos, c->gc_pos) <= 0);
-+	__gc_pos_set(c, new_pos);
-+}
-+
-+static int bch2_gc_check_topology(struct bch_fs *c,
-+				  struct bkey_s_c k,
-+				  struct bpos *expected_start,
-+				  struct bpos expected_end,
-+				  bool is_last)
-+{
-+	int ret = 0;
-+
-+	if (k.k->type == KEY_TYPE_btree_ptr_v2) {
-+		struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k);
-+
-+		if (fsck_err_on(bkey_cmp(*expected_start, bp.v->min_key), c,
-+				"btree node with incorrect min_key: got %llu:%llu, should be %llu:%llu",
-+				bp.v->min_key.inode,
-+				bp.v->min_key.offset,
-+				expected_start->inode,
-+				expected_start->offset)) {
-+			BUG();
-+		}
-+	}
-+
-+	*expected_start = bkey_cmp(k.k->p, POS_MAX)
-+		? bkey_successor(k.k->p)
-+		: k.k->p;
-+
-+	if (fsck_err_on(is_last &&
-+			bkey_cmp(k.k->p, expected_end), c,
-+			"btree node with incorrect max_key: got %llu:%llu, should be %llu:%llu",
-+			k.k->p.inode,
-+			k.k->p.offset,
-+			expected_end.inode,
-+			expected_end.offset)) {
-+		BUG();
-+	}
-+fsck_err:
-+	return ret;
-+}
-+
-+/* marking of btree keys/nodes: */
-+
-+static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k,
-+			    u8 *max_stale, bool initial)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const struct bch_extent_ptr *ptr;
-+	unsigned flags =
-+		BTREE_TRIGGER_GC|
-+		(initial ? BTREE_TRIGGER_NOATOMIC : 0);
-+	int ret = 0;
-+
-+	if (initial) {
-+		BUG_ON(journal_seq_verify(c) &&
-+		       k.k->version.lo > journal_cur_seq(&c->journal));
-+
-+		/* XXX change to fsck check */
-+		if (fsck_err_on(k.k->version.lo > atomic64_read(&c->key_version), c,
-+				"key version number higher than recorded: %llu > %llu",
-+				k.k->version.lo,
-+				atomic64_read(&c->key_version)))
-+			atomic64_set(&c->key_version, k.k->version.lo);
-+
-+		if (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
-+		    fsck_err_on(!bch2_bkey_replicas_marked(c, k, false), c,
-+				"superblock not marked as containing replicas (type %u)",
-+				k.k->type)) {
-+			ret = bch2_mark_bkey_replicas(c, k);
-+			if (ret)
-+				return ret;
-+		}
-+
-+		bkey_for_each_ptr(ptrs, ptr) {
-+			struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-+			struct bucket *g = PTR_BUCKET(ca, ptr, true);
-+			struct bucket *g2 = PTR_BUCKET(ca, ptr, false);
-+
-+			if (mustfix_fsck_err_on(!g->gen_valid, c,
-+					"bucket %u:%zu data type %s ptr gen %u missing in alloc btree",
-+					ptr->dev, PTR_BUCKET_NR(ca, ptr),
-+					bch2_data_types[ptr_data_type(k.k, ptr)],
-+					ptr->gen)) {
-+				g2->_mark.gen	= g->_mark.gen		= ptr->gen;
-+				g2->gen_valid	= g->gen_valid		= true;
-+			}
-+
-+			if (mustfix_fsck_err_on(gen_cmp(ptr->gen, g->mark.gen) > 0, c,
-+					"bucket %u:%zu data type %s ptr gen in the future: %u > %u",
-+					ptr->dev, PTR_BUCKET_NR(ca, ptr),
-+					bch2_data_types[ptr_data_type(k.k, ptr)],
-+					ptr->gen, g->mark.gen)) {
-+				g2->_mark.gen	= g->_mark.gen		= ptr->gen;
-+				g2->gen_valid	= g->gen_valid		= true;
-+				g2->_mark.data_type		= 0;
-+				g2->_mark.dirty_sectors		= 0;
-+				g2->_mark.cached_sectors	= 0;
-+				set_bit(BCH_FS_FIXED_GENS, &c->flags);
-+			}
-+		}
-+	}
-+
-+	bkey_for_each_ptr(ptrs, ptr) {
-+		struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-+		struct bucket *g = PTR_BUCKET(ca, ptr, true);
-+
-+		if (gen_after(g->oldest_gen, ptr->gen))
-+			g->oldest_gen = ptr->gen;
-+
-+		*max_stale = max(*max_stale, ptr_stale(ca, ptr));
-+	}
-+
-+	bch2_mark_key(c, k, 0, k.k->size, NULL, 0, flags);
-+fsck_err:
-+	return ret;
-+}
-+
-+static int btree_gc_mark_node(struct bch_fs *c, struct btree *b, u8 *max_stale,
-+			      bool initial)
-+{
-+	struct bpos next_node_start = b->data->min_key;
-+	struct btree_node_iter iter;
-+	struct bkey unpacked;
-+	struct bkey_s_c k;
-+	int ret = 0;
-+
-+	*max_stale = 0;
-+
-+	if (!btree_node_type_needs_gc(btree_node_type(b)))
-+		return 0;
-+
-+	bch2_btree_node_iter_init_from_start(&iter, b);
-+
-+	while ((k = bch2_btree_node_iter_peek_unpack(&iter, b, &unpacked)).k) {
-+		bch2_bkey_debugcheck(c, b, k);
-+
-+		ret = bch2_gc_mark_key(c, k, max_stale, initial);
-+		if (ret)
-+			break;
-+
-+		bch2_btree_node_iter_advance(&iter, b);
-+
-+		if (b->c.level) {
-+			ret = bch2_gc_check_topology(c, k,
-+					&next_node_start,
-+					b->data->max_key,
-+					bch2_btree_node_iter_end(&iter));
-+			if (ret)
-+				break;
-+		}
-+	}
-+
-+	return ret;
-+}
-+
-+static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
-+			 bool initial, bool metadata_only)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct btree *b;
-+	unsigned depth = metadata_only			? 1
-+		: expensive_debug_checks(c)		? 0
-+		: !btree_node_type_needs_gc(btree_id)	? 1
-+		: 0;
-+	u8 max_stale = 0;
-+	int ret = 0;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	gc_pos_set(c, gc_pos_btree(btree_id, POS_MIN, 0));
-+
-+	__for_each_btree_node(&trans, iter, btree_id, POS_MIN,
-+			      0, depth, BTREE_ITER_PREFETCH, b) {
-+		bch2_verify_btree_nr_keys(b);
-+
-+		gc_pos_set(c, gc_pos_btree_node(b));
-+
-+		ret = btree_gc_mark_node(c, b, &max_stale, initial);
-+		if (ret)
-+			break;
-+
-+		if (!initial) {
-+			if (max_stale > 64)
-+				bch2_btree_node_rewrite(c, iter,
-+						b->data->keys.seq,
-+						BTREE_INSERT_USE_RESERVE|
-+						BTREE_INSERT_NOWAIT|
-+						BTREE_INSERT_GC_LOCK_HELD);
-+			else if (!btree_gc_rewrite_disabled(c) &&
-+				 (btree_gc_always_rewrite(c) || max_stale > 16))
-+				bch2_btree_node_rewrite(c, iter,
-+						b->data->keys.seq,
-+						BTREE_INSERT_NOWAIT|
-+						BTREE_INSERT_GC_LOCK_HELD);
-+		}
-+
-+		bch2_trans_cond_resched(&trans);
-+	}
-+	ret = bch2_trans_exit(&trans) ?: ret;
-+	if (ret)
-+		return ret;
-+
-+	mutex_lock(&c->btree_root_lock);
-+	b = c->btree_roots[btree_id].b;
-+	if (!btree_node_fake(b))
-+		ret = bch2_gc_mark_key(c, bkey_i_to_s_c(&b->key),
-+				       &max_stale, initial);
-+	gc_pos_set(c, gc_pos_btree_root(b->c.btree_id));
-+	mutex_unlock(&c->btree_root_lock);
-+
-+	return ret;
-+}
-+
-+static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b,
-+				      struct journal_keys *journal_keys,
-+				      unsigned target_depth)
-+{
-+	struct btree_and_journal_iter iter;
-+	struct bkey_s_c k;
-+	struct bpos next_node_start = b->data->min_key;
-+	u8 max_stale = 0;
-+	int ret = 0;
-+
-+	bch2_btree_and_journal_iter_init_node_iter(&iter, journal_keys, b);
-+
-+	while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
-+		bch2_bkey_debugcheck(c, b, k);
-+
-+		BUG_ON(bkey_cmp(k.k->p, b->data->min_key) < 0);
-+		BUG_ON(bkey_cmp(k.k->p, b->data->max_key) > 0);
-+
-+		ret = bch2_gc_mark_key(c, k, &max_stale, true);
-+		if (ret)
-+			break;
-+
-+		if (b->c.level) {
-+			struct btree *child;
-+			BKEY_PADDED(k) tmp;
-+
-+			bkey_reassemble(&tmp.k, k);
-+			k = bkey_i_to_s_c(&tmp.k);
-+
-+			bch2_btree_and_journal_iter_advance(&iter);
-+
-+			ret = bch2_gc_check_topology(c, k,
-+					&next_node_start,
-+					b->data->max_key,
-+					!bch2_btree_and_journal_iter_peek(&iter).k);
-+			if (ret)
-+				break;
-+
-+			if (b->c.level > target_depth) {
-+				child = bch2_btree_node_get_noiter(c, &tmp.k,
-+							b->c.btree_id, b->c.level - 1);
-+				ret = PTR_ERR_OR_ZERO(child);
-+				if (ret)
-+					break;
-+
-+				ret = bch2_gc_btree_init_recurse(c, child,
-+						journal_keys, target_depth);
-+				six_unlock_read(&child->c.lock);
-+
-+				if (ret)
-+					break;
-+			}
-+		} else {
-+			bch2_btree_and_journal_iter_advance(&iter);
-+		}
-+	}
-+
-+	return ret;
-+}
-+
-+static int bch2_gc_btree_init(struct bch_fs *c,
-+			      struct journal_keys *journal_keys,
-+			      enum btree_id btree_id,
-+			      bool metadata_only)
-+{
-+	struct btree *b;
-+	unsigned target_depth = metadata_only		? 1
-+		: expensive_debug_checks(c)		? 0
-+		: !btree_node_type_needs_gc(btree_id)	? 1
-+		: 0;
-+	u8 max_stale = 0;
-+	int ret = 0;
-+
-+	b = c->btree_roots[btree_id].b;
-+
-+	if (btree_node_fake(b))
-+		return 0;
-+
-+	six_lock_read(&b->c.lock, NULL, NULL);
-+	if (fsck_err_on(bkey_cmp(b->data->min_key, POS_MIN), c,
-+			"btree root with incorrect min_key: %llu:%llu",
-+			b->data->min_key.inode,
-+			b->data->min_key.offset)) {
-+		BUG();
-+	}
-+
-+	if (fsck_err_on(bkey_cmp(b->data->max_key, POS_MAX), c,
-+			"btree root with incorrect min_key: %llu:%llu",
-+			b->data->max_key.inode,
-+			b->data->max_key.offset)) {
-+		BUG();
-+	}
-+
-+	if (b->c.level >= target_depth)
-+		ret = bch2_gc_btree_init_recurse(c, b,
-+					journal_keys, target_depth);
-+
-+	if (!ret)
-+		ret = bch2_gc_mark_key(c, bkey_i_to_s_c(&b->key),
-+				       &max_stale, true);
-+fsck_err:
-+	six_unlock_read(&b->c.lock);
-+
-+	return ret;
-+}
-+
-+static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r)
-+{
-+	return  (int) btree_id_to_gc_phase(l) -
-+		(int) btree_id_to_gc_phase(r);
-+}
-+
-+static int bch2_gc_btrees(struct bch_fs *c, struct journal_keys *journal_keys,
-+			  bool initial, bool metadata_only)
-+{
-+	enum btree_id ids[BTREE_ID_NR];
-+	unsigned i;
-+
-+	for (i = 0; i < BTREE_ID_NR; i++)
-+		ids[i] = i;
-+	bubble_sort(ids, BTREE_ID_NR, btree_id_gc_phase_cmp);
-+
-+	for (i = 0; i < BTREE_ID_NR; i++) {
-+		enum btree_id id = ids[i];
-+		int ret = initial
-+			? bch2_gc_btree_init(c, journal_keys,
-+					     id, metadata_only)
-+			: bch2_gc_btree(c, id, initial, metadata_only);
-+		if (ret)
-+			return ret;
-+	}
-+
-+	return 0;
-+}
-+
-+static void mark_metadata_sectors(struct bch_fs *c, struct bch_dev *ca,
-+				  u64 start, u64 end,
-+				  enum bch_data_type type,
-+				  unsigned flags)
-+{
-+	u64 b = sector_to_bucket(ca, start);
-+
-+	do {
-+		unsigned sectors =
-+			min_t(u64, bucket_to_sector(ca, b + 1), end) - start;
-+
-+		bch2_mark_metadata_bucket(c, ca, b, type, sectors,
-+					  gc_phase(GC_PHASE_SB), flags);
-+		b++;
-+		start += sectors;
-+	} while (start < end);
-+}
-+
-+void bch2_mark_dev_superblock(struct bch_fs *c, struct bch_dev *ca,
-+			      unsigned flags)
-+{
-+	struct bch_sb_layout *layout = &ca->disk_sb.sb->layout;
-+	unsigned i;
-+	u64 b;
-+
-+	/*
-+	 * This conditional is kind of gross, but we may be called from the
-+	 * device add path, before the new device has actually been added to the
-+	 * running filesystem:
-+	 */
-+	if (c) {
-+		lockdep_assert_held(&c->sb_lock);
-+		percpu_down_read(&c->mark_lock);
-+	}
-+
-+	for (i = 0; i < layout->nr_superblocks; i++) {
-+		u64 offset = le64_to_cpu(layout->sb_offset[i]);
-+
-+		if (offset == BCH_SB_SECTOR)
-+			mark_metadata_sectors(c, ca, 0, BCH_SB_SECTOR,
-+					      BCH_DATA_SB, flags);
-+
-+		mark_metadata_sectors(c, ca, offset,
-+				      offset + (1 << layout->sb_max_size_bits),
-+				      BCH_DATA_SB, flags);
-+	}
-+
-+	for (i = 0; i < ca->journal.nr; i++) {
-+		b = ca->journal.buckets[i];
-+		bch2_mark_metadata_bucket(c, ca, b, BCH_DATA_JOURNAL,
-+					  ca->mi.bucket_size,
-+					  gc_phase(GC_PHASE_SB), flags);
-+	}
-+
-+	if (c)
-+		percpu_up_read(&c->mark_lock);
-+}
-+
-+static void bch2_mark_superblocks(struct bch_fs *c)
-+{
-+	struct bch_dev *ca;
-+	unsigned i;
-+
-+	mutex_lock(&c->sb_lock);
-+	gc_pos_set(c, gc_phase(GC_PHASE_SB));
-+
-+	for_each_online_member(ca, c, i)
-+		bch2_mark_dev_superblock(c, ca, BTREE_TRIGGER_GC);
-+	mutex_unlock(&c->sb_lock);
-+}
-+
-+#if 0
-+/* Also see bch2_pending_btree_node_free_insert_done() */
-+static void bch2_mark_pending_btree_node_frees(struct bch_fs *c)
-+{
-+	struct btree_update *as;
-+	struct pending_btree_node_free *d;
-+
-+	mutex_lock(&c->btree_interior_update_lock);
-+	gc_pos_set(c, gc_phase(GC_PHASE_PENDING_DELETE));
-+
-+	for_each_pending_btree_node_free(c, as, d)
-+		if (d->index_update_done)
-+			bch2_mark_key(c, bkey_i_to_s_c(&d->key),
-+				      0, 0, NULL, 0,
-+				      BTREE_TRIGGER_GC);
-+
-+	mutex_unlock(&c->btree_interior_update_lock);
-+}
-+#endif
-+
-+static void bch2_mark_allocator_buckets(struct bch_fs *c)
-+{
-+	struct bch_dev *ca;
-+	struct open_bucket *ob;
-+	size_t i, j, iter;
-+	unsigned ci;
-+
-+	percpu_down_read(&c->mark_lock);
-+
-+	spin_lock(&c->freelist_lock);
-+	gc_pos_set(c, gc_pos_alloc(c, NULL));
-+
-+	for_each_member_device(ca, c, ci) {
-+		fifo_for_each_entry(i, &ca->free_inc, iter)
-+			bch2_mark_alloc_bucket(c, ca, i, true,
-+					       gc_pos_alloc(c, NULL),
-+					       BTREE_TRIGGER_GC);
-+
-+
-+
-+		for (j = 0; j < RESERVE_NR; j++)
-+			fifo_for_each_entry(i, &ca->free[j], iter)
-+				bch2_mark_alloc_bucket(c, ca, i, true,
-+						       gc_pos_alloc(c, NULL),
-+						       BTREE_TRIGGER_GC);
-+	}
-+
-+	spin_unlock(&c->freelist_lock);
-+
-+	for (ob = c->open_buckets;
-+	     ob < c->open_buckets + ARRAY_SIZE(c->open_buckets);
-+	     ob++) {
-+		spin_lock(&ob->lock);
-+		if (ob->valid) {
-+			gc_pos_set(c, gc_pos_alloc(c, ob));
-+			ca = bch_dev_bkey_exists(c, ob->ptr.dev);
-+			bch2_mark_alloc_bucket(c, ca, PTR_BUCKET_NR(ca, &ob->ptr), true,
-+					       gc_pos_alloc(c, ob),
-+					       BTREE_TRIGGER_GC);
-+		}
-+		spin_unlock(&ob->lock);
-+	}
-+
-+	percpu_up_read(&c->mark_lock);
-+}
-+
-+static void bch2_gc_free(struct bch_fs *c)
-+{
-+	struct bch_dev *ca;
-+	unsigned i;
-+
-+	genradix_free(&c->stripes[1]);
-+
-+	for_each_member_device(ca, c, i) {
-+		kvpfree(rcu_dereference_protected(ca->buckets[1], 1),
-+			sizeof(struct bucket_array) +
-+			ca->mi.nbuckets * sizeof(struct bucket));
-+		ca->buckets[1] = NULL;
-+
-+		free_percpu(ca->usage[1]);
-+		ca->usage[1] = NULL;
-+	}
-+
-+	free_percpu(c->usage_gc);
-+	c->usage_gc = NULL;
-+}
-+
-+static int bch2_gc_done(struct bch_fs *c,
-+			bool initial, bool metadata_only)
-+{
-+	struct bch_dev *ca;
-+	bool verify = !metadata_only &&
-+		(!initial ||
-+		 (c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)));
-+	unsigned i;
-+	int ret = 0;
-+
-+#define copy_field(_f, _msg, ...)					\
-+	if (dst->_f != src->_f) {					\
-+		if (verify)						\
-+			fsck_err(c, _msg ": got %llu, should be %llu"	\
-+				, ##__VA_ARGS__, dst->_f, src->_f);	\
-+		dst->_f = src->_f;					\
-+	}
-+#define copy_stripe_field(_f, _msg, ...)				\
-+	if (dst->_f != src->_f) {					\
-+		if (verify)						\
-+			fsck_err(c, "stripe %zu has wrong "_msg		\
-+				": got %u, should be %u",		\
-+				dst_iter.pos, ##__VA_ARGS__,		\
-+				dst->_f, src->_f);			\
-+		dst->_f = src->_f;					\
-+		dst->dirty = true;					\
-+	}
-+#define copy_bucket_field(_f)						\
-+	if (dst->b[b].mark._f != src->b[b].mark._f) {			\
-+		if (verify)						\
-+			fsck_err(c, "bucket %u:%zu gen %u data type %s has wrong " #_f	\
-+				": got %u, should be %u", i, b,		\
-+				dst->b[b].mark.gen,			\
-+				bch2_data_types[dst->b[b].mark.data_type],\
-+				dst->b[b].mark._f, src->b[b].mark._f);	\
-+		dst->b[b]._mark._f = src->b[b].mark._f;			\
-+	}
-+#define copy_dev_field(_f, _msg, ...)					\
-+	copy_field(_f, "dev %u has wrong " _msg, i, ##__VA_ARGS__)
-+#define copy_fs_field(_f, _msg, ...)					\
-+	copy_field(_f, "fs has wrong " _msg, ##__VA_ARGS__)
-+
-+	if (!metadata_only) {
-+		struct genradix_iter dst_iter = genradix_iter_init(&c->stripes[0], 0);
-+		struct genradix_iter src_iter = genradix_iter_init(&c->stripes[1], 0);
-+		struct stripe *dst, *src;
-+		unsigned i;
-+
-+		c->ec_stripes_heap.used = 0;
-+
-+		while ((dst = genradix_iter_peek(&dst_iter, &c->stripes[0])) &&
-+		       (src = genradix_iter_peek(&src_iter, &c->stripes[1]))) {
-+			BUG_ON(src_iter.pos != dst_iter.pos);
-+
-+			copy_stripe_field(alive,	"alive");
-+			copy_stripe_field(sectors,	"sectors");
-+			copy_stripe_field(algorithm,	"algorithm");
-+			copy_stripe_field(nr_blocks,	"nr_blocks");
-+			copy_stripe_field(nr_redundant,	"nr_redundant");
-+			copy_stripe_field(blocks_nonempty,
-+					  "blocks_nonempty");
-+
-+			for (i = 0; i < ARRAY_SIZE(dst->block_sectors); i++)
-+				copy_stripe_field(block_sectors[i],
-+						  "block_sectors[%u]", i);
-+
-+			if (dst->alive)
-+				bch2_stripes_heap_insert(c, dst, dst_iter.pos);
-+
-+			genradix_iter_advance(&dst_iter, &c->stripes[0]);
-+			genradix_iter_advance(&src_iter, &c->stripes[1]);
-+		}
-+	}
-+
-+	for_each_member_device(ca, c, i) {
-+		struct bucket_array *dst = __bucket_array(ca, 0);
-+		struct bucket_array *src = __bucket_array(ca, 1);
-+		size_t b;
-+
-+		for (b = 0; b < src->nbuckets; b++) {
-+			copy_bucket_field(gen);
-+			copy_bucket_field(data_type);
-+			copy_bucket_field(owned_by_allocator);
-+			copy_bucket_field(stripe);
-+			copy_bucket_field(dirty_sectors);
-+			copy_bucket_field(cached_sectors);
-+
-+			dst->b[b].oldest_gen = src->b[b].oldest_gen;
-+		}
-+	};
-+
-+	bch2_fs_usage_acc_to_base(c, 0);
-+	bch2_fs_usage_acc_to_base(c, 1);
-+
-+	bch2_dev_usage_from_buckets(c);
-+
-+	{
-+		unsigned nr = fs_usage_u64s(c);
-+		struct bch_fs_usage *dst = c->usage_base;
-+		struct bch_fs_usage *src = (void *)
-+			bch2_acc_percpu_u64s((void *) c->usage_gc, nr);
-+
-+		copy_fs_field(hidden,		"hidden");
-+		copy_fs_field(btree,		"btree");
-+
-+		if (!metadata_only) {
-+			copy_fs_field(data,	"data");
-+			copy_fs_field(cached,	"cached");
-+			copy_fs_field(reserved,	"reserved");
-+			copy_fs_field(nr_inodes,"nr_inodes");
-+
-+			for (i = 0; i < BCH_REPLICAS_MAX; i++)
-+				copy_fs_field(persistent_reserved[i],
-+					      "persistent_reserved[%i]", i);
-+		}
-+
-+		for (i = 0; i < c->replicas.nr; i++) {
-+			struct bch_replicas_entry *e =
-+				cpu_replicas_entry(&c->replicas, i);
-+			char buf[80];
-+
-+			if (metadata_only &&
-+			    (e->data_type == BCH_DATA_USER ||
-+			     e->data_type == BCH_DATA_CACHED))
-+				continue;
-+
-+			bch2_replicas_entry_to_text(&PBUF(buf), e);
-+
-+			copy_fs_field(replicas[i], "%s", buf);
-+		}
-+	}
-+
-+#undef copy_fs_field
-+#undef copy_dev_field
-+#undef copy_bucket_field
-+#undef copy_stripe_field
-+#undef copy_field
-+fsck_err:
-+	return ret;
-+}
-+
-+static int bch2_gc_start(struct bch_fs *c,
-+			 bool metadata_only)
-+{
-+	struct bch_dev *ca;
-+	unsigned i;
-+	int ret;
-+
-+	BUG_ON(c->usage_gc);
-+
-+	c->usage_gc = __alloc_percpu_gfp(fs_usage_u64s(c) * sizeof(u64),
-+					 sizeof(u64), GFP_KERNEL);
-+	if (!c->usage_gc) {
-+		bch_err(c, "error allocating c->usage_gc");
-+		return -ENOMEM;
-+	}
-+
-+	for_each_member_device(ca, c, i) {
-+		BUG_ON(ca->buckets[1]);
-+		BUG_ON(ca->usage[1]);
-+
-+		ca->buckets[1] = kvpmalloc(sizeof(struct bucket_array) +
-+				ca->mi.nbuckets * sizeof(struct bucket),
-+				GFP_KERNEL|__GFP_ZERO);
-+		if (!ca->buckets[1]) {
-+			percpu_ref_put(&ca->ref);
-+			bch_err(c, "error allocating ca->buckets[gc]");
-+			return -ENOMEM;
-+		}
-+
-+		ca->usage[1] = alloc_percpu(struct bch_dev_usage);
-+		if (!ca->usage[1]) {
-+			bch_err(c, "error allocating ca->usage[gc]");
-+			percpu_ref_put(&ca->ref);
-+			return -ENOMEM;
-+		}
-+	}
-+
-+	ret = bch2_ec_mem_alloc(c, true);
-+	if (ret) {
-+		bch_err(c, "error allocating ec gc mem");
-+		return ret;
-+	}
-+
-+	percpu_down_write(&c->mark_lock);
-+
-+	/*
-+	 * indicate to stripe code that we need to allocate for the gc stripes
-+	 * radix tree, too
-+	 */
-+	gc_pos_set(c, gc_phase(GC_PHASE_START));
-+
-+	for_each_member_device(ca, c, i) {
-+		struct bucket_array *dst = __bucket_array(ca, 1);
-+		struct bucket_array *src = __bucket_array(ca, 0);
-+		size_t b;
-+
-+		dst->first_bucket	= src->first_bucket;
-+		dst->nbuckets		= src->nbuckets;
-+
-+		for (b = 0; b < src->nbuckets; b++) {
-+			struct bucket *d = &dst->b[b];
-+			struct bucket *s = &src->b[b];
-+
-+			d->_mark.gen = dst->b[b].oldest_gen = s->mark.gen;
-+			d->gen_valid = s->gen_valid;
-+
-+			if (metadata_only &&
-+			    (s->mark.data_type == BCH_DATA_USER ||
-+			     s->mark.data_type == BCH_DATA_CACHED)) {
-+				d->_mark = s->mark;
-+				d->_mark.owned_by_allocator = 0;
-+			}
-+		}
-+	};
-+
-+	percpu_up_write(&c->mark_lock);
-+
-+	return 0;
-+}
-+
-+/**
-+ * bch2_gc - walk _all_ references to buckets, and recompute them:
-+ *
-+ * Order matters here:
-+ *  - Concurrent GC relies on the fact that we have a total ordering for
-+ *    everything that GC walks - see  gc_will_visit_node(),
-+ *    gc_will_visit_root()
-+ *
-+ *  - also, references move around in the course of index updates and
-+ *    various other crap: everything needs to agree on the ordering
-+ *    references are allowed to move around in - e.g., we're allowed to
-+ *    start with a reference owned by an open_bucket (the allocator) and
-+ *    move it to the btree, but not the reverse.
-+ *
-+ *    This is necessary to ensure that gc doesn't miss references that
-+ *    move around - if references move backwards in the ordering GC
-+ *    uses, GC could skip past them
-+ */
-+int bch2_gc(struct bch_fs *c, struct journal_keys *journal_keys,
-+	    bool initial, bool metadata_only)
-+{
-+	struct bch_dev *ca;
-+	u64 start_time = local_clock();
-+	unsigned i, iter = 0;
-+	int ret;
-+
-+	lockdep_assert_held(&c->state_lock);
-+	trace_gc_start(c);
-+
-+	down_write(&c->gc_lock);
-+
-+	/* flush interior btree updates: */
-+	closure_wait_event(&c->btree_interior_update_wait,
-+			   !bch2_btree_interior_updates_nr_pending(c));
-+again:
-+	ret = bch2_gc_start(c, metadata_only);
-+	if (ret)
-+		goto out;
-+
-+	bch2_mark_superblocks(c);
-+
-+	ret = bch2_gc_btrees(c, journal_keys, initial, metadata_only);
-+	if (ret)
-+		goto out;
-+
-+#if 0
-+	bch2_mark_pending_btree_node_frees(c);
-+#endif
-+	bch2_mark_allocator_buckets(c);
-+
-+	c->gc_count++;
-+out:
-+	if (!ret &&
-+	    (test_bit(BCH_FS_FIXED_GENS, &c->flags) ||
-+	     (!iter && test_restart_gc(c)))) {
-+		/*
-+		 * XXX: make sure gens we fixed got saved
-+		 */
-+		if (iter++ <= 2) {
-+			bch_info(c, "Fixed gens, restarting mark and sweep:");
-+			clear_bit(BCH_FS_FIXED_GENS, &c->flags);
-+			__gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
-+
-+			percpu_down_write(&c->mark_lock);
-+			bch2_gc_free(c);
-+			percpu_up_write(&c->mark_lock);
-+			/* flush fsck errors, reset counters */
-+			bch2_flush_fsck_errs(c);
-+
-+			goto again;
-+		}
-+
-+		bch_info(c, "Unable to fix bucket gens, looping");
-+		ret = -EINVAL;
-+	}
-+
-+	if (!ret) {
-+		bch2_journal_block(&c->journal);
-+
-+		percpu_down_write(&c->mark_lock);
-+		ret = bch2_gc_done(c, initial, metadata_only);
-+
-+		bch2_journal_unblock(&c->journal);
-+	} else {
-+		percpu_down_write(&c->mark_lock);
-+	}
-+
-+	/* Indicates that gc is no longer in progress: */
-+	__gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
-+
-+	bch2_gc_free(c);
-+	percpu_up_write(&c->mark_lock);
-+
-+	up_write(&c->gc_lock);
-+
-+	trace_gc_end(c);
-+	bch2_time_stats_update(&c->times[BCH_TIME_btree_gc], start_time);
-+
-+	/*
-+	 * Wake up allocator in case it was waiting for buckets
-+	 * because of not being able to inc gens
-+	 */
-+	for_each_member_device(ca, c, i)
-+		bch2_wake_allocator(ca);
-+
-+	/*
-+	 * At startup, allocations can happen directly instead of via the
-+	 * allocator thread - issue wakeup in case they blocked on gc_lock:
-+	 */
-+	closure_wake_up(&c->freelist_wait);
-+	return ret;
-+}
-+
-+/*
-+ * For recalculating oldest gen, we only need to walk keys in leaf nodes; btree
-+ * node pointers currently never have cached pointers that can become stale:
-+ */
-+static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id id)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for_each_btree_key(&trans, iter, id, POS_MIN, BTREE_ITER_PREFETCH, k, ret) {
-+		struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+		const struct bch_extent_ptr *ptr;
-+
-+		percpu_down_read(&c->mark_lock);
-+		bkey_for_each_ptr(ptrs, ptr) {
-+			struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-+			struct bucket *g = PTR_BUCKET(ca, ptr, false);
-+
-+			if (gen_after(g->gc_gen, ptr->gen))
-+				g->gc_gen = ptr->gen;
-+
-+			if (gen_after(g->mark.gen, ptr->gen) > 32) {
-+				/* rewrite btree node */
-+
-+			}
-+		}
-+		percpu_up_read(&c->mark_lock);
-+	}
-+
-+	bch2_trans_exit(&trans);
-+	return ret;
-+}
-+
-+int bch2_gc_gens(struct bch_fs *c)
-+{
-+	struct bch_dev *ca;
-+	struct bucket_array *buckets;
-+	struct bucket *g;
-+	unsigned i;
-+	int ret;
-+
-+	/*
-+	 * Ideally we would be using state_lock and not gc_lock here, but that
-+	 * introduces a deadlock in the RO path - we currently take the state
-+	 * lock at the start of going RO, thus the gc thread may get stuck:
-+	 */
-+	down_read(&c->gc_lock);
-+
-+	for_each_member_device(ca, c, i) {
-+		down_read(&ca->bucket_lock);
-+		buckets = bucket_array(ca);
-+
-+		for_each_bucket(g, buckets)
-+			g->gc_gen = g->mark.gen;
-+		up_read(&ca->bucket_lock);
-+	}
-+
-+	for (i = 0; i < BTREE_ID_NR; i++)
-+		if (btree_node_type_needs_gc(i)) {
-+			ret = bch2_gc_btree_gens(c, i);
-+			if (ret)
-+				goto err;
-+		}
-+
-+	for_each_member_device(ca, c, i) {
-+		down_read(&ca->bucket_lock);
-+		buckets = bucket_array(ca);
-+
-+		for_each_bucket(g, buckets)
-+			g->oldest_gen = g->gc_gen;
-+		up_read(&ca->bucket_lock);
-+	}
-+err:
-+	up_read(&c->gc_lock);
-+	return ret;
-+}
-+
-+/* Btree coalescing */
-+
-+static void recalc_packed_keys(struct btree *b)
-+{
-+	struct bset *i = btree_bset_first(b);
-+	struct bkey_packed *k;
-+
-+	memset(&b->nr, 0, sizeof(b->nr));
-+
-+	BUG_ON(b->nsets != 1);
-+
-+	vstruct_for_each(i, k)
-+		btree_keys_account_key_add(&b->nr, 0, k);
-+}
-+
-+static void bch2_coalesce_nodes(struct bch_fs *c, struct btree_iter *iter,
-+				struct btree *old_nodes[GC_MERGE_NODES])
-+{
-+	struct btree *parent = btree_node_parent(iter, old_nodes[0]);
-+	unsigned i, nr_old_nodes, nr_new_nodes, u64s = 0;
-+	unsigned blocks = btree_blocks(c) * 2 / 3;
-+	struct btree *new_nodes[GC_MERGE_NODES];
-+	struct btree_update *as;
-+	struct keylist keylist;
-+	struct bkey_format_state format_state;
-+	struct bkey_format new_format;
-+
-+	memset(new_nodes, 0, sizeof(new_nodes));
-+	bch2_keylist_init(&keylist, NULL);
-+
-+	/* Count keys that are not deleted */
-+	for (i = 0; i < GC_MERGE_NODES && old_nodes[i]; i++)
-+		u64s += old_nodes[i]->nr.live_u64s;
-+
-+	nr_old_nodes = nr_new_nodes = i;
-+
-+	/* Check if all keys in @old_nodes could fit in one fewer node */
-+	if (nr_old_nodes <= 1 ||
-+	    __vstruct_blocks(struct btree_node, c->block_bits,
-+			     DIV_ROUND_UP(u64s, nr_old_nodes - 1)) > blocks)
-+		return;
-+
-+	/* Find a format that all keys in @old_nodes can pack into */
-+	bch2_bkey_format_init(&format_state);
-+
-+	for (i = 0; i < nr_old_nodes; i++)
-+		__bch2_btree_calc_format(&format_state, old_nodes[i]);
-+
-+	new_format = bch2_bkey_format_done(&format_state);
-+
-+	/* Check if repacking would make any nodes too big to fit */
-+	for (i = 0; i < nr_old_nodes; i++)
-+		if (!bch2_btree_node_format_fits(c, old_nodes[i], &new_format)) {
-+			trace_btree_gc_coalesce_fail(c,
-+					BTREE_GC_COALESCE_FAIL_FORMAT_FITS);
-+			return;
-+		}
-+
-+	if (bch2_keylist_realloc(&keylist, NULL, 0,
-+			(BKEY_U64s + BKEY_EXTENT_U64s_MAX) * nr_old_nodes)) {
-+		trace_btree_gc_coalesce_fail(c,
-+				BTREE_GC_COALESCE_FAIL_KEYLIST_REALLOC);
-+		return;
-+	}
-+
-+	as = bch2_btree_update_start(iter->trans, iter->btree_id,
-+			btree_update_reserve_required(c, parent) + nr_old_nodes,
-+			BTREE_INSERT_NOFAIL|
-+			BTREE_INSERT_USE_RESERVE,
-+			NULL);
-+	if (IS_ERR(as)) {
-+		trace_btree_gc_coalesce_fail(c,
-+				BTREE_GC_COALESCE_FAIL_RESERVE_GET);
-+		bch2_keylist_free(&keylist, NULL);
-+		return;
-+	}
-+
-+	trace_btree_gc_coalesce(c, old_nodes[0]);
-+
-+	for (i = 0; i < nr_old_nodes; i++)
-+		bch2_btree_interior_update_will_free_node(as, old_nodes[i]);
-+
-+	/* Repack everything with @new_format and sort down to one bset */
-+	for (i = 0; i < nr_old_nodes; i++)
-+		new_nodes[i] =
-+			__bch2_btree_node_alloc_replacement(as, old_nodes[i],
-+							    new_format);
-+
-+	/*
-+	 * Conceptually we concatenate the nodes together and slice them
-+	 * up at different boundaries.
-+	 */
-+	for (i = nr_new_nodes - 1; i > 0; --i) {
-+		struct btree *n1 = new_nodes[i];
-+		struct btree *n2 = new_nodes[i - 1];
-+
-+		struct bset *s1 = btree_bset_first(n1);
-+		struct bset *s2 = btree_bset_first(n2);
-+		struct bkey_packed *k, *last = NULL;
-+
-+		/* Calculate how many keys from @n2 we could fit inside @n1 */
-+		u64s = 0;
-+
-+		for (k = s2->start;
-+		     k < vstruct_last(s2) &&
-+		     vstruct_blocks_plus(n1->data, c->block_bits,
-+					 u64s + k->u64s) <= blocks;
-+		     k = bkey_next_skip_noops(k, vstruct_last(s2))) {
-+			last = k;
-+			u64s += k->u64s;
-+		}
-+
-+		if (u64s == le16_to_cpu(s2->u64s)) {
-+			/* n2 fits entirely in n1 */
-+			n1->key.k.p = n1->data->max_key = n2->data->max_key;
-+
-+			memcpy_u64s(vstruct_last(s1),
-+				    s2->start,
-+				    le16_to_cpu(s2->u64s));
-+			le16_add_cpu(&s1->u64s, le16_to_cpu(s2->u64s));
-+
-+			set_btree_bset_end(n1, n1->set);
-+
-+			six_unlock_write(&n2->c.lock);
-+			bch2_btree_node_free_never_inserted(c, n2);
-+			six_unlock_intent(&n2->c.lock);
-+
-+			memmove(new_nodes + i - 1,
-+				new_nodes + i,
-+				sizeof(new_nodes[0]) * (nr_new_nodes - i));
-+			new_nodes[--nr_new_nodes] = NULL;
-+		} else if (u64s) {
-+			/* move part of n2 into n1 */
-+			n1->key.k.p = n1->data->max_key =
-+				bkey_unpack_pos(n1, last);
-+
-+			n2->data->min_key = bkey_successor(n1->data->max_key);
-+
-+			memcpy_u64s(vstruct_last(s1),
-+				    s2->start, u64s);
-+			le16_add_cpu(&s1->u64s, u64s);
-+
-+			memmove(s2->start,
-+				vstruct_idx(s2, u64s),
-+				(le16_to_cpu(s2->u64s) - u64s) * sizeof(u64));
-+			s2->u64s = cpu_to_le16(le16_to_cpu(s2->u64s) - u64s);
-+
-+			set_btree_bset_end(n1, n1->set);
-+			set_btree_bset_end(n2, n2->set);
-+		}
-+	}
-+
-+	for (i = 0; i < nr_new_nodes; i++) {
-+		struct btree *n = new_nodes[i];
-+
-+		recalc_packed_keys(n);
-+		btree_node_reset_sib_u64s(n);
-+
-+		bch2_btree_build_aux_trees(n);
-+
-+		bch2_btree_update_add_new_node(as, n);
-+		six_unlock_write(&n->c.lock);
-+
-+		bch2_btree_node_write(c, n, SIX_LOCK_intent);
-+	}
-+
-+	/*
-+	 * The keys for the old nodes get deleted. We don't want to insert keys
-+	 * that compare equal to the keys for the new nodes we'll also be
-+	 * inserting - we can't because keys on a keylist must be strictly
-+	 * greater than the previous keys, and we also don't need to since the
-+	 * key for the new node will serve the same purpose (overwriting the key
-+	 * for the old node).
-+	 */
-+	for (i = 0; i < nr_old_nodes; i++) {
-+		struct bkey_i delete;
-+		unsigned j;
-+
-+		for (j = 0; j < nr_new_nodes; j++)
-+			if (!bkey_cmp(old_nodes[i]->key.k.p,
-+				      new_nodes[j]->key.k.p))
-+				goto next;
-+
-+		bkey_init(&delete.k);
-+		delete.k.p = old_nodes[i]->key.k.p;
-+		bch2_keylist_add_in_order(&keylist, &delete);
-+next:
-+		i = i;
-+	}
-+
-+	/*
-+	 * Keys for the new nodes get inserted: bch2_btree_insert_keys() only
-+	 * does the lookup once and thus expects the keys to be in sorted order
-+	 * so we have to make sure the new keys are correctly ordered with
-+	 * respect to the deleted keys added in the previous loop
-+	 */
-+	for (i = 0; i < nr_new_nodes; i++)
-+		bch2_keylist_add_in_order(&keylist, &new_nodes[i]->key);
-+
-+	/* Insert the newly coalesced nodes */
-+	bch2_btree_insert_node(as, parent, iter, &keylist, 0);
-+
-+	BUG_ON(!bch2_keylist_empty(&keylist));
-+
-+	BUG_ON(iter->l[old_nodes[0]->c.level].b != old_nodes[0]);
-+
-+	bch2_btree_iter_node_replace(iter, new_nodes[0]);
-+
-+	for (i = 0; i < nr_new_nodes; i++)
-+		bch2_btree_update_get_open_buckets(as, new_nodes[i]);
-+
-+	/* Free the old nodes and update our sliding window */
-+	for (i = 0; i < nr_old_nodes; i++) {
-+		bch2_btree_node_free_inmem(c, old_nodes[i], iter);
-+
-+		/*
-+		 * the index update might have triggered a split, in which case
-+		 * the nodes we coalesced - the new nodes we just created -
-+		 * might not be sibling nodes anymore - don't add them to the
-+		 * sliding window (except the first):
-+		 */
-+		if (!i) {
-+			old_nodes[i] = new_nodes[i];
-+		} else {
-+			old_nodes[i] = NULL;
-+		}
-+	}
-+
-+	for (i = 0; i < nr_new_nodes; i++)
-+		six_unlock_intent(&new_nodes[i]->c.lock);
-+
-+	bch2_btree_update_done(as);
-+	bch2_keylist_free(&keylist, NULL);
-+}
-+
-+static int bch2_coalesce_btree(struct bch_fs *c, enum btree_id btree_id)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct btree *b;
-+	bool kthread = (current->flags & PF_KTHREAD) != 0;
-+	unsigned i;
-+
-+	/* Sliding window of adjacent btree nodes */
-+	struct btree *merge[GC_MERGE_NODES];
-+	u32 lock_seq[GC_MERGE_NODES];
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	/*
-+	 * XXX: We don't have a good way of positively matching on sibling nodes
-+	 * that have the same parent - this code works by handling the cases
-+	 * where they might not have the same parent, and is thus fragile. Ugh.
-+	 *
-+	 * Perhaps redo this to use multiple linked iterators?
-+	 */
-+	memset(merge, 0, sizeof(merge));
-+
-+	__for_each_btree_node(&trans, iter, btree_id, POS_MIN,
-+			      BTREE_MAX_DEPTH, 0,
-+			      BTREE_ITER_PREFETCH, b) {
-+		memmove(merge + 1, merge,
-+			sizeof(merge) - sizeof(merge[0]));
-+		memmove(lock_seq + 1, lock_seq,
-+			sizeof(lock_seq) - sizeof(lock_seq[0]));
-+
-+		merge[0] = b;
-+
-+		for (i = 1; i < GC_MERGE_NODES; i++) {
-+			if (!merge[i] ||
-+			    !six_relock_intent(&merge[i]->c.lock, lock_seq[i]))
-+				break;
-+
-+			if (merge[i]->c.level != merge[0]->c.level) {
-+				six_unlock_intent(&merge[i]->c.lock);
-+				break;
-+			}
-+		}
-+		memset(merge + i, 0, (GC_MERGE_NODES - i) * sizeof(merge[0]));
-+
-+		bch2_coalesce_nodes(c, iter, merge);
-+
-+		for (i = 1; i < GC_MERGE_NODES && merge[i]; i++) {
-+			lock_seq[i] = merge[i]->c.lock.state.seq;
-+			six_unlock_intent(&merge[i]->c.lock);
-+		}
-+
-+		lock_seq[0] = merge[0]->c.lock.state.seq;
-+
-+		if (kthread && kthread_should_stop()) {
-+			bch2_trans_exit(&trans);
-+			return -ESHUTDOWN;
-+		}
-+
-+		bch2_trans_cond_resched(&trans);
-+
-+		/*
-+		 * If the parent node wasn't relocked, it might have been split
-+		 * and the nodes in our sliding window might not have the same
-+		 * parent anymore - blow away the sliding window:
-+		 */
-+		if (btree_iter_node(iter, iter->level + 1) &&
-+		    !btree_node_intent_locked(iter, iter->level + 1))
-+			memset(merge + 1, 0,
-+			       (GC_MERGE_NODES - 1) * sizeof(merge[0]));
-+	}
-+	return bch2_trans_exit(&trans);
-+}
-+
-+/**
-+ * bch_coalesce - coalesce adjacent nodes with low occupancy
-+ */
-+void bch2_coalesce(struct bch_fs *c)
-+{
-+	enum btree_id id;
-+
-+	down_read(&c->gc_lock);
-+	trace_gc_coalesce_start(c);
-+
-+	for (id = 0; id < BTREE_ID_NR; id++) {
-+		int ret = c->btree_roots[id].b
-+			? bch2_coalesce_btree(c, id)
-+			: 0;
-+
-+		if (ret) {
-+			if (ret != -ESHUTDOWN)
-+				bch_err(c, "btree coalescing failed: %d", ret);
-+			return;
-+		}
-+	}
-+
-+	trace_gc_coalesce_end(c);
-+	up_read(&c->gc_lock);
-+}
-+
-+static int bch2_gc_thread(void *arg)
-+{
-+	struct bch_fs *c = arg;
-+	struct io_clock *clock = &c->io_clock[WRITE];
-+	unsigned long last = atomic_long_read(&clock->now);
-+	unsigned last_kick = atomic_read(&c->kick_gc);
-+	int ret;
-+
-+	set_freezable();
-+
-+	while (1) {
-+		while (1) {
-+			set_current_state(TASK_INTERRUPTIBLE);
-+
-+			if (kthread_should_stop()) {
-+				__set_current_state(TASK_RUNNING);
-+				return 0;
-+			}
-+
-+			if (atomic_read(&c->kick_gc) != last_kick)
-+				break;
-+
-+			if (c->btree_gc_periodic) {
-+				unsigned long next = last + c->capacity / 16;
-+
-+				if (atomic_long_read(&clock->now) >= next)
-+					break;
-+
-+				bch2_io_clock_schedule_timeout(clock, next);
-+			} else {
-+				schedule();
-+			}
-+
-+			try_to_freeze();
-+		}
-+		__set_current_state(TASK_RUNNING);
-+
-+		last = atomic_long_read(&clock->now);
-+		last_kick = atomic_read(&c->kick_gc);
-+
-+		/*
-+		 * Full gc is currently incompatible with btree key cache:
-+		 */
-+#if 0
-+		ret = bch2_gc(c, NULL, false, false);
-+#else
-+		ret = bch2_gc_gens(c);
-+#endif
-+		if (ret)
-+			bch_err(c, "btree gc failed: %i", ret);
-+
-+		debug_check_no_locks_held();
-+	}
-+
-+	return 0;
-+}
-+
-+void bch2_gc_thread_stop(struct bch_fs *c)
-+{
-+	struct task_struct *p;
-+
-+	p = c->gc_thread;
-+	c->gc_thread = NULL;
-+
-+	if (p) {
-+		kthread_stop(p);
-+		put_task_struct(p);
-+	}
-+}
-+
-+int bch2_gc_thread_start(struct bch_fs *c)
-+{
-+	struct task_struct *p;
-+
-+	BUG_ON(c->gc_thread);
-+
-+	p = kthread_create(bch2_gc_thread, c, "bch_gc");
-+	if (IS_ERR(p))
-+		return PTR_ERR(p);
-+
-+	get_task_struct(p);
-+	c->gc_thread = p;
-+	wake_up_process(p);
-+	return 0;
-+}
-diff --git a/fs/bcachefs/btree_gc.h b/fs/bcachefs/btree_gc.h
-new file mode 100644
-index 000000000000..3694a3df62a8
---- /dev/null
-+++ b/fs/bcachefs/btree_gc.h
-@@ -0,0 +1,121 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BTREE_GC_H
-+#define _BCACHEFS_BTREE_GC_H
-+
-+#include "btree_types.h"
-+
-+void bch2_coalesce(struct bch_fs *);
-+
-+struct journal_keys;
-+int bch2_gc(struct bch_fs *, struct journal_keys *, bool, bool);
-+int bch2_gc_gens(struct bch_fs *);
-+void bch2_gc_thread_stop(struct bch_fs *);
-+int bch2_gc_thread_start(struct bch_fs *);
-+void bch2_mark_dev_superblock(struct bch_fs *, struct bch_dev *, unsigned);
-+
-+/*
-+ * For concurrent mark and sweep (with other index updates), we define a total
-+ * ordering of _all_ references GC walks:
-+ *
-+ * Note that some references will have the same GC position as others - e.g.
-+ * everything within the same btree node; in those cases we're relying on
-+ * whatever locking exists for where those references live, i.e. the write lock
-+ * on a btree node.
-+ *
-+ * That locking is also required to ensure GC doesn't pass the updater in
-+ * between the updater adding/removing the reference and updating the GC marks;
-+ * without that, we would at best double count sometimes.
-+ *
-+ * That part is important - whenever calling bch2_mark_pointers(), a lock _must_
-+ * be held that prevents GC from passing the position the updater is at.
-+ *
-+ * (What about the start of gc, when we're clearing all the marks? GC clears the
-+ * mark with the gc pos seqlock held, and bch_mark_bucket checks against the gc
-+ * position inside its cmpxchg loop, so crap magically works).
-+ */
-+
-+/* Position of (the start of) a gc phase: */
-+static inline struct gc_pos gc_phase(enum gc_phase phase)
-+{
-+	return (struct gc_pos) {
-+		.phase	= phase,
-+		.pos	= POS_MIN,
-+		.level	= 0,
-+	};
-+}
-+
-+static inline int gc_pos_cmp(struct gc_pos l, struct gc_pos r)
-+{
-+	if (l.phase != r.phase)
-+		return l.phase < r.phase ? -1 : 1;
-+	if (bkey_cmp(l.pos, r.pos))
-+		return bkey_cmp(l.pos, r.pos);
-+	if (l.level != r.level)
-+		return l.level < r.level ? -1 : 1;
-+	return 0;
-+}
-+
-+static inline enum gc_phase btree_id_to_gc_phase(enum btree_id id)
-+{
-+	switch (id) {
-+#define x(n, v, s) case BTREE_ID_##n: return GC_PHASE_BTREE_##n;
-+	BCH_BTREE_IDS()
-+#undef x
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static inline struct gc_pos gc_pos_btree(enum btree_id id,
-+					 struct bpos pos, unsigned level)
-+{
-+	return (struct gc_pos) {
-+		.phase	= btree_id_to_gc_phase(id),
-+		.pos	= pos,
-+		.level	= level,
-+	};
-+}
-+
-+/*
-+ * GC position of the pointers within a btree node: note, _not_ for &b->key
-+ * itself, that lives in the parent node:
-+ */
-+static inline struct gc_pos gc_pos_btree_node(struct btree *b)
-+{
-+	return gc_pos_btree(b->c.btree_id, b->key.k.p, b->c.level);
-+}
-+
-+/*
-+ * GC position of the pointer to a btree root: we don't use
-+ * gc_pos_pointer_to_btree_node() here to avoid a potential race with
-+ * btree_split() increasing the tree depth - the new root will have level > the
-+ * old root and thus have a greater gc position than the old root, but that
-+ * would be incorrect since once gc has marked the root it's not coming back.
-+ */
-+static inline struct gc_pos gc_pos_btree_root(enum btree_id id)
-+{
-+	return gc_pos_btree(id, POS_MAX, BTREE_MAX_DEPTH);
-+}
-+
-+static inline struct gc_pos gc_pos_alloc(struct bch_fs *c, struct open_bucket *ob)
-+{
-+	return (struct gc_pos) {
-+		.phase	= GC_PHASE_ALLOC,
-+		.pos	= POS(ob ? ob - c->open_buckets : 0, 0),
-+	};
-+}
-+
-+static inline bool gc_visited(struct bch_fs *c, struct gc_pos pos)
-+{
-+	unsigned seq;
-+	bool ret;
-+
-+	do {
-+		seq = read_seqcount_begin(&c->gc_pos_lock);
-+		ret = gc_pos_cmp(pos, c->gc_pos) <= 0;
-+	} while (read_seqcount_retry(&c->gc_pos_lock, seq));
-+
-+	return ret;
-+}
-+
-+#endif /* _BCACHEFS_BTREE_GC_H */
-diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
-new file mode 100644
-index 000000000000..d2c28eb75bde
---- /dev/null
-+++ b/fs/bcachefs/btree_io.c
-@@ -0,0 +1,1868 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "bkey_methods.h"
-+#include "bkey_sort.h"
-+#include "btree_cache.h"
-+#include "btree_io.h"
-+#include "btree_iter.h"
-+#include "btree_locking.h"
-+#include "btree_update.h"
-+#include "btree_update_interior.h"
-+#include "buckets.h"
-+#include "checksum.h"
-+#include "debug.h"
-+#include "error.h"
-+#include "extents.h"
-+#include "io.h"
-+#include "journal_reclaim.h"
-+#include "journal_seq_blacklist.h"
-+#include "super-io.h"
-+
-+#include <linux/sched/mm.h>
-+#include <trace/events/bcachefs.h>
-+
-+static void verify_no_dups(struct btree *b,
-+			   struct bkey_packed *start,
-+			   struct bkey_packed *end,
-+			   bool extents)
-+{
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	struct bkey_packed *k, *p;
-+
-+	if (start == end)
-+		return;
-+
-+	for (p = start, k = bkey_next_skip_noops(start, end);
-+	     k != end;
-+	     p = k, k = bkey_next_skip_noops(k, end)) {
-+		struct bkey l = bkey_unpack_key(b, p);
-+		struct bkey r = bkey_unpack_key(b, k);
-+
-+		BUG_ON(extents
-+		       ? bkey_cmp(l.p, bkey_start_pos(&r)) > 0
-+		       : bkey_cmp(l.p, bkey_start_pos(&r)) >= 0);
-+		//BUG_ON(bkey_cmp_packed(&b->format, p, k) >= 0);
-+	}
-+#endif
-+}
-+
-+static void set_needs_whiteout(struct bset *i, int v)
-+{
-+	struct bkey_packed *k;
-+
-+	for (k = i->start;
-+	     k != vstruct_last(i);
-+	     k = bkey_next_skip_noops(k, vstruct_last(i)))
-+		k->needs_whiteout = v;
-+}
-+
-+static void btree_bounce_free(struct bch_fs *c, unsigned order,
-+			      bool used_mempool, void *p)
-+{
-+	if (used_mempool)
-+		mempool_free(p, &c->btree_bounce_pool);
-+	else
-+		vpfree(p, PAGE_SIZE << order);
-+}
-+
-+static void *btree_bounce_alloc(struct bch_fs *c, unsigned order,
-+				bool *used_mempool)
-+{
-+	unsigned flags = memalloc_nofs_save();
-+	void *p;
-+
-+	BUG_ON(order > btree_page_order(c));
-+
-+	*used_mempool = false;
-+	p = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOWAIT, order);
-+	if (!p) {
-+		*used_mempool = true;
-+		p = mempool_alloc(&c->btree_bounce_pool, GFP_NOIO);
-+	}
-+	memalloc_nofs_restore(flags);
-+	return p;
-+}
-+
-+static void sort_bkey_ptrs(const struct btree *bt,
-+			   struct bkey_packed **ptrs, unsigned nr)
-+{
-+	unsigned n = nr, a = nr / 2, b, c, d;
-+
-+	if (!a)
-+		return;
-+
-+	/* Heap sort: see lib/sort.c: */
-+	while (1) {
-+		if (a)
-+			a--;
-+		else if (--n)
-+			swap(ptrs[0], ptrs[n]);
-+		else
-+			break;
-+
-+		for (b = a; c = 2 * b + 1, (d = c + 1) < n;)
-+			b = bkey_cmp_packed(bt,
-+					    ptrs[c],
-+					    ptrs[d]) >= 0 ? c : d;
-+		if (d == n)
-+			b = c;
-+
-+		while (b != a &&
-+		       bkey_cmp_packed(bt,
-+				       ptrs[a],
-+				       ptrs[b]) >= 0)
-+			b = (b - 1) / 2;
-+		c = b;
-+		while (b != a) {
-+			b = (b - 1) / 2;
-+			swap(ptrs[b], ptrs[c]);
-+		}
-+	}
-+}
-+
-+static void bch2_sort_whiteouts(struct bch_fs *c, struct btree *b)
-+{
-+	struct bkey_packed *new_whiteouts, **ptrs, **ptrs_end, *k;
-+	bool used_mempool = false;
-+	unsigned order;
-+
-+	if (!b->whiteout_u64s)
-+		return;
-+
-+	order = get_order(b->whiteout_u64s * sizeof(u64));
-+
-+	new_whiteouts = btree_bounce_alloc(c, order, &used_mempool);
-+
-+	ptrs = ptrs_end = ((void *) new_whiteouts + (PAGE_SIZE << order));
-+
-+	for (k = unwritten_whiteouts_start(c, b);
-+	     k != unwritten_whiteouts_end(c, b);
-+	     k = bkey_next(k))
-+		*--ptrs = k;
-+
-+	sort_bkey_ptrs(b, ptrs, ptrs_end - ptrs);
-+
-+	k = new_whiteouts;
-+
-+	while (ptrs != ptrs_end) {
-+		bkey_copy(k, *ptrs);
-+		k = bkey_next(k);
-+		ptrs++;
-+	}
-+
-+	verify_no_dups(b, new_whiteouts,
-+		       (void *) ((u64 *) new_whiteouts + b->whiteout_u64s),
-+		       btree_node_old_extent_overwrite(b));
-+
-+	memcpy_u64s(unwritten_whiteouts_start(c, b),
-+		    new_whiteouts, b->whiteout_u64s);
-+
-+	btree_bounce_free(c, order, used_mempool, new_whiteouts);
-+}
-+
-+static bool should_compact_bset(struct btree *b, struct bset_tree *t,
-+				bool compacting, enum compact_mode mode)
-+{
-+	if (!bset_dead_u64s(b, t))
-+		return false;
-+
-+	switch (mode) {
-+	case COMPACT_LAZY:
-+		return should_compact_bset_lazy(b, t) ||
-+			(compacting && !bset_written(b, bset(b, t)));
-+	case COMPACT_ALL:
-+		return true;
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static bool bch2_compact_extent_whiteouts(struct bch_fs *c,
-+					  struct btree *b,
-+					  enum compact_mode mode)
-+{
-+	const struct bkey_format *f = &b->format;
-+	struct bset_tree *t;
-+	struct bkey_packed *whiteouts = NULL;
-+	struct bkey_packed *u_start, *u_pos;
-+	struct sort_iter sort_iter;
-+	unsigned order, whiteout_u64s = 0, u64s;
-+	bool used_mempool, compacting = false;
-+
-+	BUG_ON(!btree_node_is_extents(b));
-+
-+	for_each_bset(b, t)
-+		if (should_compact_bset(b, t, whiteout_u64s != 0, mode))
-+			whiteout_u64s += bset_dead_u64s(b, t);
-+
-+	if (!whiteout_u64s)
-+		return false;
-+
-+	bch2_sort_whiteouts(c, b);
-+
-+	sort_iter_init(&sort_iter, b);
-+
-+	whiteout_u64s += b->whiteout_u64s;
-+	order = get_order(whiteout_u64s * sizeof(u64));
-+
-+	whiteouts = btree_bounce_alloc(c, order, &used_mempool);
-+	u_start = u_pos = whiteouts;
-+
-+	memcpy_u64s(u_pos, unwritten_whiteouts_start(c, b),
-+		    b->whiteout_u64s);
-+	u_pos = (void *) u_pos + b->whiteout_u64s * sizeof(u64);
-+
-+	sort_iter_add(&sort_iter, u_start, u_pos);
-+
-+	for_each_bset(b, t) {
-+		struct bset *i = bset(b, t);
-+		struct bkey_packed *k, *n, *out, *start, *end;
-+		struct btree_node_entry *src = NULL, *dst = NULL;
-+
-+		if (t != b->set && !bset_written(b, i)) {
-+			src = container_of(i, struct btree_node_entry, keys);
-+			dst = max(write_block(b),
-+				  (void *) btree_bkey_last(b, t - 1));
-+		}
-+
-+		if (src != dst)
-+			compacting = true;
-+
-+		if (!should_compact_bset(b, t, compacting, mode)) {
-+			if (src != dst) {
-+				memmove(dst, src, sizeof(*src) +
-+					le16_to_cpu(src->keys.u64s) *
-+					sizeof(u64));
-+				i = &dst->keys;
-+				set_btree_bset(b, t, i);
-+			}
-+			continue;
-+		}
-+
-+		compacting = true;
-+		u_start = u_pos;
-+		start = i->start;
-+		end = vstruct_last(i);
-+
-+		if (src != dst) {
-+			memmove(dst, src, sizeof(*src));
-+			i = &dst->keys;
-+			set_btree_bset(b, t, i);
-+		}
-+
-+		out = i->start;
-+
-+		for (k = start; k != end; k = n) {
-+			n = bkey_next_skip_noops(k, end);
-+
-+			if (bkey_deleted(k))
-+				continue;
-+
-+			BUG_ON(bkey_whiteout(k) &&
-+			       k->needs_whiteout &&
-+			       bkey_written(b, k));
-+
-+			if (bkey_whiteout(k) && !k->needs_whiteout)
-+				continue;
-+
-+			if (bkey_whiteout(k)) {
-+				memcpy_u64s(u_pos, k, bkeyp_key_u64s(f, k));
-+				set_bkeyp_val_u64s(f, u_pos, 0);
-+				u_pos = bkey_next(u_pos);
-+			} else {
-+				bkey_copy(out, k);
-+				out = bkey_next(out);
-+			}
-+		}
-+
-+		sort_iter_add(&sort_iter, u_start, u_pos);
-+
-+		i->u64s = cpu_to_le16((u64 *) out - i->_data);
-+		set_btree_bset_end(b, t);
-+		bch2_bset_set_no_aux_tree(b, t);
-+	}
-+
-+	b->whiteout_u64s = (u64 *) u_pos - (u64 *) whiteouts;
-+
-+	BUG_ON((void *) unwritten_whiteouts_start(c, b) <
-+	       (void *) btree_bkey_last(b, bset_tree_last(b)));
-+
-+	u64s = bch2_sort_extent_whiteouts(unwritten_whiteouts_start(c, b),
-+					  &sort_iter);
-+
-+	BUG_ON(u64s > b->whiteout_u64s);
-+	BUG_ON(u_pos != whiteouts && !u64s);
-+
-+	if (u64s != b->whiteout_u64s) {
-+		void *src = unwritten_whiteouts_start(c, b);
-+
-+		b->whiteout_u64s = u64s;
-+		memmove_u64s_up(unwritten_whiteouts_start(c, b), src, u64s);
-+	}
-+
-+	verify_no_dups(b,
-+		       unwritten_whiteouts_start(c, b),
-+		       unwritten_whiteouts_end(c, b),
-+		       true);
-+
-+	btree_bounce_free(c, order, used_mempool, whiteouts);
-+
-+	bch2_btree_build_aux_trees(b);
-+
-+	bch_btree_keys_u64s_remaining(c, b);
-+	bch2_verify_btree_nr_keys(b);
-+
-+	return true;
-+}
-+
-+static bool bch2_drop_whiteouts(struct btree *b, enum compact_mode mode)
-+{
-+	struct bset_tree *t;
-+	bool ret = false;
-+
-+	for_each_bset(b, t) {
-+		struct bset *i = bset(b, t);
-+		struct bkey_packed *k, *n, *out, *start, *end;
-+		struct btree_node_entry *src = NULL, *dst = NULL;
-+
-+		if (t != b->set && !bset_written(b, i)) {
-+			src = container_of(i, struct btree_node_entry, keys);
-+			dst = max(write_block(b),
-+				  (void *) btree_bkey_last(b, t - 1));
-+		}
-+
-+		if (src != dst)
-+			ret = true;
-+
-+		if (!should_compact_bset(b, t, ret, mode)) {
-+			if (src != dst) {
-+				memmove(dst, src, sizeof(*src) +
-+					le16_to_cpu(src->keys.u64s) *
-+					sizeof(u64));
-+				i = &dst->keys;
-+				set_btree_bset(b, t, i);
-+			}
-+			continue;
-+		}
-+
-+		start	= btree_bkey_first(b, t);
-+		end	= btree_bkey_last(b, t);
-+
-+		if (src != dst) {
-+			memmove(dst, src, sizeof(*src));
-+			i = &dst->keys;
-+			set_btree_bset(b, t, i);
-+		}
-+
-+		out = i->start;
-+
-+		for (k = start; k != end; k = n) {
-+			n = bkey_next_skip_noops(k, end);
-+
-+			if (!bkey_whiteout(k)) {
-+				bkey_copy(out, k);
-+				out = bkey_next(out);
-+			} else {
-+				BUG_ON(k->needs_whiteout);
-+			}
-+		}
-+
-+		i->u64s = cpu_to_le16((u64 *) out - i->_data);
-+		set_btree_bset_end(b, t);
-+		bch2_bset_set_no_aux_tree(b, t);
-+		ret = true;
-+	}
-+
-+	bch2_verify_btree_nr_keys(b);
-+
-+	bch2_btree_build_aux_trees(b);
-+
-+	return ret;
-+}
-+
-+bool bch2_compact_whiteouts(struct bch_fs *c, struct btree *b,
-+			    enum compact_mode mode)
-+{
-+	return !btree_node_old_extent_overwrite(b)
-+		? bch2_drop_whiteouts(b, mode)
-+		: bch2_compact_extent_whiteouts(c, b, mode);
-+}
-+
-+static void btree_node_sort(struct bch_fs *c, struct btree *b,
-+			    struct btree_iter *iter,
-+			    unsigned start_idx,
-+			    unsigned end_idx,
-+			    bool filter_whiteouts)
-+{
-+	struct btree_node *out;
-+	struct sort_iter sort_iter;
-+	struct bset_tree *t;
-+	struct bset *start_bset = bset(b, &b->set[start_idx]);
-+	bool used_mempool = false;
-+	u64 start_time, seq = 0;
-+	unsigned i, u64s = 0, order, shift = end_idx - start_idx - 1;
-+	bool sorting_entire_node = start_idx == 0 &&
-+		end_idx == b->nsets;
-+
-+	sort_iter_init(&sort_iter, b);
-+
-+	for (t = b->set + start_idx;
-+	     t < b->set + end_idx;
-+	     t++) {
-+		u64s += le16_to_cpu(bset(b, t)->u64s);
-+		sort_iter_add(&sort_iter,
-+			      btree_bkey_first(b, t),
-+			      btree_bkey_last(b, t));
-+	}
-+
-+	order = sorting_entire_node
-+		? btree_page_order(c)
-+		: get_order(__vstruct_bytes(struct btree_node, u64s));
-+
-+	out = btree_bounce_alloc(c, order, &used_mempool);
-+
-+	start_time = local_clock();
-+
-+	if (btree_node_old_extent_overwrite(b))
-+		filter_whiteouts = bset_written(b, start_bset);
-+
-+	u64s = (btree_node_old_extent_overwrite(b)
-+		? bch2_sort_extents
-+		: bch2_sort_keys)(out->keys.start,
-+				  &sort_iter,
-+				  filter_whiteouts);
-+
-+	out->keys.u64s = cpu_to_le16(u64s);
-+
-+	BUG_ON(vstruct_end(&out->keys) > (void *) out + (PAGE_SIZE << order));
-+
-+	if (sorting_entire_node)
-+		bch2_time_stats_update(&c->times[BCH_TIME_btree_node_sort],
-+				       start_time);
-+
-+	/* Make sure we preserve bset journal_seq: */
-+	for (t = b->set + start_idx; t < b->set + end_idx; t++)
-+		seq = max(seq, le64_to_cpu(bset(b, t)->journal_seq));
-+	start_bset->journal_seq = cpu_to_le64(seq);
-+
-+	if (sorting_entire_node) {
-+		unsigned u64s = le16_to_cpu(out->keys.u64s);
-+
-+		BUG_ON(order != btree_page_order(c));
-+
-+		/*
-+		 * Our temporary buffer is the same size as the btree node's
-+		 * buffer, we can just swap buffers instead of doing a big
-+		 * memcpy()
-+		 */
-+		*out = *b->data;
-+		out->keys.u64s = cpu_to_le16(u64s);
-+		swap(out, b->data);
-+		set_btree_bset(b, b->set, &b->data->keys);
-+	} else {
-+		start_bset->u64s = out->keys.u64s;
-+		memcpy_u64s(start_bset->start,
-+			    out->keys.start,
-+			    le16_to_cpu(out->keys.u64s));
-+	}
-+
-+	for (i = start_idx + 1; i < end_idx; i++)
-+		b->nr.bset_u64s[start_idx] +=
-+			b->nr.bset_u64s[i];
-+
-+	b->nsets -= shift;
-+
-+	for (i = start_idx + 1; i < b->nsets; i++) {
-+		b->nr.bset_u64s[i]	= b->nr.bset_u64s[i + shift];
-+		b->set[i]		= b->set[i + shift];
-+	}
-+
-+	for (i = b->nsets; i < MAX_BSETS; i++)
-+		b->nr.bset_u64s[i] = 0;
-+
-+	set_btree_bset_end(b, &b->set[start_idx]);
-+	bch2_bset_set_no_aux_tree(b, &b->set[start_idx]);
-+
-+	btree_bounce_free(c, order, used_mempool, out);
-+
-+	bch2_verify_btree_nr_keys(b);
-+}
-+
-+void bch2_btree_sort_into(struct bch_fs *c,
-+			 struct btree *dst,
-+			 struct btree *src)
-+{
-+	struct btree_nr_keys nr;
-+	struct btree_node_iter src_iter;
-+	u64 start_time = local_clock();
-+
-+	BUG_ON(dst->nsets != 1);
-+
-+	bch2_bset_set_no_aux_tree(dst, dst->set);
-+
-+	bch2_btree_node_iter_init_from_start(&src_iter, src);
-+
-+	if (btree_node_is_extents(src))
-+		nr = bch2_sort_repack_merge(c, btree_bset_first(dst),
-+				src, &src_iter,
-+				&dst->format,
-+				true);
-+	else
-+		nr = bch2_sort_repack(btree_bset_first(dst),
-+				src, &src_iter,
-+				&dst->format,
-+				true);
-+
-+	bch2_time_stats_update(&c->times[BCH_TIME_btree_node_sort],
-+			       start_time);
-+
-+	set_btree_bset_end(dst, dst->set);
-+
-+	dst->nr.live_u64s	+= nr.live_u64s;
-+	dst->nr.bset_u64s[0]	+= nr.bset_u64s[0];
-+	dst->nr.packed_keys	+= nr.packed_keys;
-+	dst->nr.unpacked_keys	+= nr.unpacked_keys;
-+
-+	bch2_verify_btree_nr_keys(dst);
-+}
-+
-+#define SORT_CRIT	(4096 / sizeof(u64))
-+
-+/*
-+ * We're about to add another bset to the btree node, so if there's currently
-+ * too many bsets - sort some of them together:
-+ */
-+static bool btree_node_compact(struct bch_fs *c, struct btree *b,
-+			       struct btree_iter *iter)
-+{
-+	unsigned unwritten_idx;
-+	bool ret = false;
-+
-+	for (unwritten_idx = 0;
-+	     unwritten_idx < b->nsets;
-+	     unwritten_idx++)
-+		if (!bset_written(b, bset(b, &b->set[unwritten_idx])))
-+			break;
-+
-+	if (b->nsets - unwritten_idx > 1) {
-+		btree_node_sort(c, b, iter, unwritten_idx,
-+				b->nsets, false);
-+		ret = true;
-+	}
-+
-+	if (unwritten_idx > 1) {
-+		btree_node_sort(c, b, iter, 0, unwritten_idx, false);
-+		ret = true;
-+	}
-+
-+	return ret;
-+}
-+
-+void bch2_btree_build_aux_trees(struct btree *b)
-+{
-+	struct bset_tree *t;
-+
-+	for_each_bset(b, t)
-+		bch2_bset_build_aux_tree(b, t,
-+				!bset_written(b, bset(b, t)) &&
-+				t == bset_tree_last(b));
-+}
-+
-+/*
-+ * @bch_btree_init_next - initialize a new (unwritten) bset that can then be
-+ * inserted into
-+ *
-+ * Safe to call if there already is an unwritten bset - will only add a new bset
-+ * if @b doesn't already have one.
-+ *
-+ * Returns true if we sorted (i.e. invalidated iterators
-+ */
-+void bch2_btree_init_next(struct bch_fs *c, struct btree *b,
-+			  struct btree_iter *iter)
-+{
-+	struct btree_node_entry *bne;
-+	bool did_sort;
-+
-+	EBUG_ON(!(b->c.lock.state.seq & 1));
-+	EBUG_ON(iter && iter->l[b->c.level].b != b);
-+
-+	did_sort = btree_node_compact(c, b, iter);
-+
-+	bne = want_new_bset(c, b);
-+	if (bne)
-+		bch2_bset_init_next(c, b, bne);
-+
-+	bch2_btree_build_aux_trees(b);
-+
-+	if (iter && did_sort)
-+		bch2_btree_iter_reinit_node(iter, b);
-+}
-+
-+static struct nonce btree_nonce(struct bset *i, unsigned offset)
-+{
-+	return (struct nonce) {{
-+		[0] = cpu_to_le32(offset),
-+		[1] = ((__le32 *) &i->seq)[0],
-+		[2] = ((__le32 *) &i->seq)[1],
-+		[3] = ((__le32 *) &i->journal_seq)[0]^BCH_NONCE_BTREE,
-+	}};
-+}
-+
-+static void bset_encrypt(struct bch_fs *c, struct bset *i, unsigned offset)
-+{
-+	struct nonce nonce = btree_nonce(i, offset);
-+
-+	if (!offset) {
-+		struct btree_node *bn = container_of(i, struct btree_node, keys);
-+		unsigned bytes = (void *) &bn->keys - (void *) &bn->flags;
-+
-+		bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, &bn->flags,
-+			     bytes);
-+
-+		nonce = nonce_add(nonce, round_up(bytes, CHACHA_BLOCK_SIZE));
-+	}
-+
-+	bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, i->_data,
-+		     vstruct_end(i) - (void *) i->_data);
-+}
-+
-+static void btree_err_msg(struct printbuf *out, struct bch_fs *c,
-+			  struct btree *b, struct bset *i,
-+			  unsigned offset, int write)
-+{
-+	pr_buf(out, "error validating btree node %sat btree %u level %u/%u\n"
-+	       "pos ",
-+	       write ? "before write " : "",
-+	       b->c.btree_id, b->c.level,
-+	       c->btree_roots[b->c.btree_id].level);
-+	bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key));
-+
-+	pr_buf(out, " node offset %u", b->written);
-+	if (i)
-+		pr_buf(out, " bset u64s %u", le16_to_cpu(i->u64s));
-+}
-+
-+enum btree_err_type {
-+	BTREE_ERR_FIXABLE,
-+	BTREE_ERR_WANT_RETRY,
-+	BTREE_ERR_MUST_RETRY,
-+	BTREE_ERR_FATAL,
-+};
-+
-+enum btree_validate_ret {
-+	BTREE_RETRY_READ = 64,
-+};
-+
-+#define btree_err(type, c, b, i, msg, ...)				\
-+({									\
-+	__label__ out;							\
-+	char _buf[300];							\
-+	struct printbuf out = PBUF(_buf);				\
-+									\
-+	btree_err_msg(&out, c, b, i, b->written, write);		\
-+	pr_buf(&out, ": " msg, ##__VA_ARGS__);				\
-+									\
-+	if (type == BTREE_ERR_FIXABLE &&				\
-+	    write == READ &&						\
-+	    !test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)) {		\
-+		mustfix_fsck_err(c, "%s", _buf);			\
-+		goto out;						\
-+	}								\
-+									\
-+	switch (write) {						\
-+	case READ:							\
-+		bch_err(c, "%s", _buf);					\
-+									\
-+		switch (type) {						\
-+		case BTREE_ERR_FIXABLE:					\
-+			ret = BCH_FSCK_ERRORS_NOT_FIXED;		\
-+			goto fsck_err;					\
-+		case BTREE_ERR_WANT_RETRY:				\
-+			if (have_retry) {				\
-+				ret = BTREE_RETRY_READ;			\
-+				goto fsck_err;				\
-+			}						\
-+			break;						\
-+		case BTREE_ERR_MUST_RETRY:				\
-+			ret = BTREE_RETRY_READ;				\
-+			goto fsck_err;					\
-+		case BTREE_ERR_FATAL:					\
-+			ret = BCH_FSCK_ERRORS_NOT_FIXED;		\
-+			goto fsck_err;					\
-+		}							\
-+		break;							\
-+	case WRITE:							\
-+		bch_err(c, "corrupt metadata before write: %s", _buf);	\
-+									\
-+		if (bch2_fs_inconsistent(c)) {				\
-+			ret = BCH_FSCK_ERRORS_NOT_FIXED;		\
-+			goto fsck_err;					\
-+		}							\
-+		break;							\
-+	}								\
-+out:									\
-+	true;								\
-+})
-+
-+#define btree_err_on(cond, ...)	((cond) ? btree_err(__VA_ARGS__) : false)
-+
-+static int validate_bset(struct bch_fs *c, struct btree *b,
-+			 struct bset *i, unsigned sectors,
-+			 int write, bool have_retry)
-+{
-+	unsigned version = le16_to_cpu(i->version);
-+	const char *err;
-+	int ret = 0;
-+
-+	btree_err_on((version != BCH_BSET_VERSION_OLD &&
-+		      version < bcachefs_metadata_version_min) ||
-+		     version >= bcachefs_metadata_version_max,
-+		     BTREE_ERR_FATAL, c, b, i,
-+		     "unsupported bset version");
-+
-+	if (btree_err_on(b->written + sectors > c->opts.btree_node_size,
-+			 BTREE_ERR_FIXABLE, c, b, i,
-+			 "bset past end of btree node")) {
-+		i->u64s = 0;
-+		return 0;
-+	}
-+
-+	btree_err_on(b->written && !i->u64s,
-+		     BTREE_ERR_FIXABLE, c, b, i,
-+		     "empty bset");
-+
-+	if (!b->written) {
-+		struct btree_node *bn =
-+			container_of(i, struct btree_node, keys);
-+		/* These indicate that we read the wrong btree node: */
-+
-+		if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
-+			struct bch_btree_ptr_v2 *bp =
-+				&bkey_i_to_btree_ptr_v2(&b->key)->v;
-+
-+			/* XXX endianness */
-+			btree_err_on(bp->seq != bn->keys.seq,
-+				     BTREE_ERR_MUST_RETRY, c, b, NULL,
-+				     "incorrect sequence number (wrong btree node)");
-+		}
-+
-+		btree_err_on(BTREE_NODE_ID(bn) != b->c.btree_id,
-+			     BTREE_ERR_MUST_RETRY, c, b, i,
-+			     "incorrect btree id");
-+
-+		btree_err_on(BTREE_NODE_LEVEL(bn) != b->c.level,
-+			     BTREE_ERR_MUST_RETRY, c, b, i,
-+			     "incorrect level");
-+
-+		if (BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN) {
-+			u64 *p = (u64 *) &bn->ptr;
-+
-+			*p = swab64(*p);
-+		}
-+
-+		if (!write)
-+			compat_btree_node(b->c.level, b->c.btree_id, version,
-+					  BSET_BIG_ENDIAN(i), write, bn);
-+
-+		if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
-+			struct bch_btree_ptr_v2 *bp =
-+				&bkey_i_to_btree_ptr_v2(&b->key)->v;
-+
-+			btree_err_on(bkey_cmp(b->data->min_key, bp->min_key),
-+				     BTREE_ERR_MUST_RETRY, c, b, NULL,
-+				     "incorrect min_key: got %llu:%llu should be %llu:%llu",
-+				     b->data->min_key.inode,
-+				     b->data->min_key.offset,
-+				     bp->min_key.inode,
-+				     bp->min_key.offset);
-+		}
-+
-+		btree_err_on(bkey_cmp(bn->max_key, b->key.k.p),
-+			     BTREE_ERR_MUST_RETRY, c, b, i,
-+			     "incorrect max key");
-+
-+		if (write)
-+			compat_btree_node(b->c.level, b->c.btree_id, version,
-+					  BSET_BIG_ENDIAN(i), write, bn);
-+
-+		/* XXX: ideally we would be validating min_key too */
-+#if 0
-+		/*
-+		 * not correct anymore, due to btree node write error
-+		 * handling
-+		 *
-+		 * need to add bn->seq to btree keys and verify
-+		 * against that
-+		 */
-+		btree_err_on(!extent_contains_ptr(bkey_i_to_s_c_extent(&b->key),
-+						  bn->ptr),
-+			     BTREE_ERR_FATAL, c, b, i,
-+			     "incorrect backpointer");
-+#endif
-+		err = bch2_bkey_format_validate(&bn->format);
-+		btree_err_on(err,
-+			     BTREE_ERR_FATAL, c, b, i,
-+			     "invalid bkey format: %s", err);
-+
-+		compat_bformat(b->c.level, b->c.btree_id, version,
-+			       BSET_BIG_ENDIAN(i), write,
-+			       &bn->format);
-+	}
-+fsck_err:
-+	return ret;
-+}
-+
-+static int validate_bset_keys(struct bch_fs *c, struct btree *b,
-+			 struct bset *i, unsigned *whiteout_u64s,
-+			 int write, bool have_retry)
-+{
-+	unsigned version = le16_to_cpu(i->version);
-+	struct bkey_packed *k, *prev = NULL;
-+	bool seen_non_whiteout = false;
-+	int ret = 0;
-+
-+	if (!BSET_SEPARATE_WHITEOUTS(i)) {
-+		seen_non_whiteout = true;
-+		*whiteout_u64s = 0;
-+	}
-+
-+	for (k = i->start;
-+	     k != vstruct_last(i);) {
-+		struct bkey_s u;
-+		struct bkey tmp;
-+		const char *invalid;
-+
-+		if (btree_err_on(bkey_next(k) > vstruct_last(i),
-+				 BTREE_ERR_FIXABLE, c, b, i,
-+				 "key extends past end of bset")) {
-+			i->u64s = cpu_to_le16((u64 *) k - i->_data);
-+			break;
-+		}
-+
-+		if (btree_err_on(k->format > KEY_FORMAT_CURRENT,
-+				 BTREE_ERR_FIXABLE, c, b, i,
-+				 "invalid bkey format %u", k->format)) {
-+			i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
-+			memmove_u64s_down(k, bkey_next(k),
-+					  (u64 *) vstruct_end(i) - (u64 *) k);
-+			continue;
-+		}
-+
-+		/* XXX: validate k->u64s */
-+		if (!write)
-+			bch2_bkey_compat(b->c.level, b->c.btree_id, version,
-+				    BSET_BIG_ENDIAN(i), write,
-+				    &b->format, k);
-+
-+		u = __bkey_disassemble(b, k, &tmp);
-+
-+		invalid = __bch2_bkey_invalid(c, u.s_c, btree_node_type(b)) ?:
-+			bch2_bkey_in_btree_node(b, u.s_c) ?:
-+			(write ? bch2_bkey_val_invalid(c, u.s_c) : NULL);
-+		if (invalid) {
-+			char buf[160];
-+
-+			bch2_bkey_val_to_text(&PBUF(buf), c, u.s_c);
-+			btree_err(BTREE_ERR_FIXABLE, c, b, i,
-+				  "invalid bkey:\n%s\n%s", invalid, buf);
-+
-+			i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
-+			memmove_u64s_down(k, bkey_next(k),
-+					  (u64 *) vstruct_end(i) - (u64 *) k);
-+			continue;
-+		}
-+
-+		if (write)
-+			bch2_bkey_compat(b->c.level, b->c.btree_id, version,
-+				    BSET_BIG_ENDIAN(i), write,
-+				    &b->format, k);
-+
-+		/*
-+		 * with the separate whiteouts thing (used for extents), the
-+		 * second set of keys actually can have whiteouts too, so we
-+		 * can't solely go off bkey_whiteout()...
-+		 */
-+
-+		if (!seen_non_whiteout &&
-+		    (!bkey_whiteout(k) ||
-+		     (prev && bkey_iter_cmp(b, prev, k) > 0))) {
-+			*whiteout_u64s = k->_data - i->_data;
-+			seen_non_whiteout = true;
-+		} else if (prev && bkey_iter_cmp(b, prev, k) > 0) {
-+			char buf1[80];
-+			char buf2[80];
-+			struct bkey up = bkey_unpack_key(b, prev);
-+
-+			bch2_bkey_to_text(&PBUF(buf1), &up);
-+			bch2_bkey_to_text(&PBUF(buf2), u.k);
-+
-+			bch2_dump_bset(c, b, i, 0);
-+			btree_err(BTREE_ERR_FATAL, c, b, i,
-+				  "keys out of order: %s > %s",
-+				  buf1, buf2);
-+			/* XXX: repair this */
-+		}
-+
-+		prev = k;
-+		k = bkey_next_skip_noops(k, vstruct_last(i));
-+	}
-+fsck_err:
-+	return ret;
-+}
-+
-+int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry)
-+{
-+	struct btree_node_entry *bne;
-+	struct sort_iter *iter;
-+	struct btree_node *sorted;
-+	struct bkey_packed *k;
-+	struct bch_extent_ptr *ptr;
-+	struct bset *i;
-+	bool used_mempool, blacklisted;
-+	unsigned u64s;
-+	int ret, retry_read = 0, write = READ;
-+
-+	iter = mempool_alloc(&c->fill_iter, GFP_NOIO);
-+	sort_iter_init(iter, b);
-+	iter->size = (btree_blocks(c) + 1) * 2;
-+
-+	if (bch2_meta_read_fault("btree"))
-+		btree_err(BTREE_ERR_MUST_RETRY, c, b, NULL,
-+			  "dynamic fault");
-+
-+	btree_err_on(le64_to_cpu(b->data->magic) != bset_magic(c),
-+		     BTREE_ERR_MUST_RETRY, c, b, NULL,
-+		     "bad magic");
-+
-+	btree_err_on(!b->data->keys.seq,
-+		     BTREE_ERR_MUST_RETRY, c, b, NULL,
-+		     "bad btree header");
-+
-+	if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
-+		struct bch_btree_ptr_v2 *bp =
-+			&bkey_i_to_btree_ptr_v2(&b->key)->v;
-+
-+		btree_err_on(b->data->keys.seq != bp->seq,
-+			     BTREE_ERR_MUST_RETRY, c, b, NULL,
-+			     "got wrong btree node (seq %llx want %llx)",
-+			     b->data->keys.seq, bp->seq);
-+	}
-+
-+	while (b->written < c->opts.btree_node_size) {
-+		unsigned sectors, whiteout_u64s = 0;
-+		struct nonce nonce;
-+		struct bch_csum csum;
-+		bool first = !b->written;
-+
-+		if (!b->written) {
-+			i = &b->data->keys;
-+
-+			btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)),
-+				     BTREE_ERR_WANT_RETRY, c, b, i,
-+				     "unknown checksum type");
-+
-+			nonce = btree_nonce(i, b->written << 9);
-+			csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data);
-+
-+			btree_err_on(bch2_crc_cmp(csum, b->data->csum),
-+				     BTREE_ERR_WANT_RETRY, c, b, i,
-+				     "invalid checksum");
-+
-+			bset_encrypt(c, i, b->written << 9);
-+
-+			if (btree_node_is_extents(b) &&
-+			    !BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data)) {
-+				set_btree_node_old_extent_overwrite(b);
-+				set_btree_node_need_rewrite(b);
-+			}
-+
-+			sectors = vstruct_sectors(b->data, c->block_bits);
-+		} else {
-+			bne = write_block(b);
-+			i = &bne->keys;
-+
-+			if (i->seq != b->data->keys.seq)
-+				break;
-+
-+			btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)),
-+				     BTREE_ERR_WANT_RETRY, c, b, i,
-+				     "unknown checksum type");
-+
-+			nonce = btree_nonce(i, b->written << 9);
-+			csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
-+
-+			btree_err_on(bch2_crc_cmp(csum, bne->csum),
-+				     BTREE_ERR_WANT_RETRY, c, b, i,
-+				     "invalid checksum");
-+
-+			bset_encrypt(c, i, b->written << 9);
-+
-+			sectors = vstruct_sectors(bne, c->block_bits);
-+		}
-+
-+		ret = validate_bset(c, b, i, sectors,
-+				    READ, have_retry);
-+		if (ret)
-+			goto fsck_err;
-+
-+		if (!b->written)
-+			btree_node_set_format(b, b->data->format);
-+
-+		ret = validate_bset_keys(c, b, i, &whiteout_u64s,
-+				    READ, have_retry);
-+		if (ret)
-+			goto fsck_err;
-+
-+		SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN);
-+
-+		b->written += sectors;
-+
-+		blacklisted = bch2_journal_seq_is_blacklisted(c,
-+					le64_to_cpu(i->journal_seq),
-+					true);
-+
-+		btree_err_on(blacklisted && first,
-+			     BTREE_ERR_FIXABLE, c, b, i,
-+			     "first btree node bset has blacklisted journal seq");
-+		if (blacklisted && !first)
-+			continue;
-+
-+		sort_iter_add(iter, i->start,
-+			      vstruct_idx(i, whiteout_u64s));
-+
-+		sort_iter_add(iter,
-+			      vstruct_idx(i, whiteout_u64s),
-+			      vstruct_last(i));
-+	}
-+
-+	for (bne = write_block(b);
-+	     bset_byte_offset(b, bne) < btree_bytes(c);
-+	     bne = (void *) bne + block_bytes(c))
-+		btree_err_on(bne->keys.seq == b->data->keys.seq,
-+			     BTREE_ERR_WANT_RETRY, c, b, NULL,
-+			     "found bset signature after last bset");
-+
-+	sorted = btree_bounce_alloc(c, btree_page_order(c), &used_mempool);
-+	sorted->keys.u64s = 0;
-+
-+	set_btree_bset(b, b->set, &b->data->keys);
-+
-+	b->nr = (btree_node_old_extent_overwrite(b)
-+		 ? bch2_extent_sort_fix_overlapping
-+		 : bch2_key_sort_fix_overlapping)(c, &sorted->keys, iter);
-+
-+	u64s = le16_to_cpu(sorted->keys.u64s);
-+	*sorted = *b->data;
-+	sorted->keys.u64s = cpu_to_le16(u64s);
-+	swap(sorted, b->data);
-+	set_btree_bset(b, b->set, &b->data->keys);
-+	b->nsets = 1;
-+
-+	BUG_ON(b->nr.live_u64s != u64s);
-+
-+	btree_bounce_free(c, btree_page_order(c), used_mempool, sorted);
-+
-+	i = &b->data->keys;
-+	for (k = i->start; k != vstruct_last(i);) {
-+		struct bkey tmp;
-+		struct bkey_s u = __bkey_disassemble(b, k, &tmp);
-+		const char *invalid = bch2_bkey_val_invalid(c, u.s_c);
-+
-+		if (invalid ||
-+		    (inject_invalid_keys(c) &&
-+		     !bversion_cmp(u.k->version, MAX_VERSION))) {
-+			char buf[160];
-+
-+			bch2_bkey_val_to_text(&PBUF(buf), c, u.s_c);
-+			btree_err(BTREE_ERR_FIXABLE, c, b, i,
-+				  "invalid bkey %s: %s", buf, invalid);
-+
-+			btree_keys_account_key_drop(&b->nr, 0, k);
-+
-+			i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
-+			memmove_u64s_down(k, bkey_next(k),
-+					  (u64 *) vstruct_end(i) - (u64 *) k);
-+			set_btree_bset_end(b, b->set);
-+			continue;
-+		}
-+
-+		if (u.k->type == KEY_TYPE_btree_ptr_v2) {
-+			struct bkey_s_btree_ptr_v2 bp = bkey_s_to_btree_ptr_v2(u);
-+
-+			bp.v->mem_ptr = 0;
-+		}
-+
-+		k = bkey_next_skip_noops(k, vstruct_last(i));
-+	}
-+
-+	bch2_bset_build_aux_tree(b, b->set, false);
-+
-+	set_needs_whiteout(btree_bset_first(b), true);
-+
-+	btree_node_reset_sib_u64s(b);
-+
-+	bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&b->key)), ptr) {
-+		struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-+
-+		if (ca->mi.state != BCH_MEMBER_STATE_RW)
-+			set_btree_node_need_rewrite(b);
-+	}
-+out:
-+	mempool_free(iter, &c->fill_iter);
-+	return retry_read;
-+fsck_err:
-+	if (ret == BTREE_RETRY_READ) {
-+		retry_read = 1;
-+	} else {
-+		bch2_inconsistent_error(c);
-+		set_btree_node_read_error(b);
-+	}
-+	goto out;
-+}
-+
-+static void btree_node_read_work(struct work_struct *work)
-+{
-+	struct btree_read_bio *rb =
-+		container_of(work, struct btree_read_bio, work);
-+	struct bch_fs *c	= rb->c;
-+	struct bch_dev *ca	= bch_dev_bkey_exists(c, rb->pick.ptr.dev);
-+	struct btree *b		= rb->bio.bi_private;
-+	struct bio *bio		= &rb->bio;
-+	struct bch_io_failures failed = { .nr = 0 };
-+	bool can_retry;
-+
-+	goto start;
-+	while (1) {
-+		bch_info(c, "retrying read");
-+		ca = bch_dev_bkey_exists(c, rb->pick.ptr.dev);
-+		rb->have_ioref		= bch2_dev_get_ioref(ca, READ);
-+		bio_reset(bio);
-+		bio->bi_opf		= REQ_OP_READ|REQ_SYNC|REQ_META;
-+		bio->bi_iter.bi_sector	= rb->pick.ptr.offset;
-+		bio->bi_iter.bi_size	= btree_bytes(c);
-+
-+		if (rb->have_ioref) {
-+			bio_set_dev(bio, ca->disk_sb.bdev);
-+			submit_bio_wait(bio);
-+		} else {
-+			bio->bi_status = BLK_STS_REMOVED;
-+		}
-+start:
-+		bch2_dev_io_err_on(bio->bi_status, ca, "btree read: %s",
-+				   blk_status_to_str(bio->bi_status));
-+		if (rb->have_ioref)
-+			percpu_ref_put(&ca->io_ref);
-+		rb->have_ioref = false;
-+
-+		bch2_mark_io_failure(&failed, &rb->pick);
-+
-+		can_retry = bch2_bkey_pick_read_device(c,
-+				bkey_i_to_s_c(&b->key),
-+				&failed, &rb->pick) > 0;
-+
-+		if (!bio->bi_status &&
-+		    !bch2_btree_node_read_done(c, b, can_retry))
-+			break;
-+
-+		if (!can_retry) {
-+			set_btree_node_read_error(b);
-+			break;
-+		}
-+	}
-+
-+	bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read],
-+			       rb->start_time);
-+	bio_put(&rb->bio);
-+	clear_btree_node_read_in_flight(b);
-+	wake_up_bit(&b->flags, BTREE_NODE_read_in_flight);
-+}
-+
-+static void btree_node_read_endio(struct bio *bio)
-+{
-+	struct btree_read_bio *rb =
-+		container_of(bio, struct btree_read_bio, bio);
-+	struct bch_fs *c	= rb->c;
-+
-+	if (rb->have_ioref) {
-+		struct bch_dev *ca = bch_dev_bkey_exists(c, rb->pick.ptr.dev);
-+		bch2_latency_acct(ca, rb->start_time, READ);
-+	}
-+
-+	queue_work(system_unbound_wq, &rb->work);
-+}
-+
-+void bch2_btree_node_read(struct bch_fs *c, struct btree *b,
-+			  bool sync)
-+{
-+	struct extent_ptr_decoded pick;
-+	struct btree_read_bio *rb;
-+	struct bch_dev *ca;
-+	struct bio *bio;
-+	int ret;
-+
-+	trace_btree_read(c, b);
-+
-+	ret = bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key),
-+					 NULL, &pick);
-+	if (bch2_fs_fatal_err_on(ret <= 0, c,
-+			"btree node read error: no device to read from")) {
-+		set_btree_node_read_error(b);
-+		return;
-+	}
-+
-+	ca = bch_dev_bkey_exists(c, pick.ptr.dev);
-+
-+	bio = bio_alloc_bioset(GFP_NOIO, buf_pages(b->data,
-+						   btree_bytes(c)),
-+			       &c->btree_bio);
-+	rb = container_of(bio, struct btree_read_bio, bio);
-+	rb->c			= c;
-+	rb->start_time		= local_clock();
-+	rb->have_ioref		= bch2_dev_get_ioref(ca, READ);
-+	rb->pick		= pick;
-+	INIT_WORK(&rb->work, btree_node_read_work);
-+	bio->bi_opf		= REQ_OP_READ|REQ_SYNC|REQ_META;
-+	bio->bi_iter.bi_sector	= pick.ptr.offset;
-+	bio->bi_end_io		= btree_node_read_endio;
-+	bio->bi_private		= b;
-+	bch2_bio_map(bio, b->data, btree_bytes(c));
-+
-+	set_btree_node_read_in_flight(b);
-+
-+	if (rb->have_ioref) {
-+		this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_BTREE],
-+			     bio_sectors(bio));
-+		bio_set_dev(bio, ca->disk_sb.bdev);
-+
-+		if (sync) {
-+			submit_bio_wait(bio);
-+
-+			bio->bi_private	= b;
-+			btree_node_read_work(&rb->work);
-+		} else {
-+			submit_bio(bio);
-+		}
-+	} else {
-+		bio->bi_status = BLK_STS_REMOVED;
-+
-+		if (sync)
-+			btree_node_read_work(&rb->work);
-+		else
-+			queue_work(system_unbound_wq, &rb->work);
-+
-+	}
-+}
-+
-+int bch2_btree_root_read(struct bch_fs *c, enum btree_id id,
-+			const struct bkey_i *k, unsigned level)
-+{
-+	struct closure cl;
-+	struct btree *b;
-+	int ret;
-+
-+	closure_init_stack(&cl);
-+
-+	do {
-+		ret = bch2_btree_cache_cannibalize_lock(c, &cl);
-+		closure_sync(&cl);
-+	} while (ret);
-+
-+	b = bch2_btree_node_mem_alloc(c);
-+	bch2_btree_cache_cannibalize_unlock(c);
-+
-+	BUG_ON(IS_ERR(b));
-+
-+	bkey_copy(&b->key, k);
-+	BUG_ON(bch2_btree_node_hash_insert(&c->btree_cache, b, level, id));
-+
-+	bch2_btree_node_read(c, b, true);
-+
-+	if (btree_node_read_error(b)) {
-+		bch2_btree_node_hash_remove(&c->btree_cache, b);
-+
-+		mutex_lock(&c->btree_cache.lock);
-+		list_move(&b->list, &c->btree_cache.freeable);
-+		mutex_unlock(&c->btree_cache.lock);
-+
-+		ret = -EIO;
-+		goto err;
-+	}
-+
-+	bch2_btree_set_root_for_read(c, b);
-+err:
-+	six_unlock_write(&b->c.lock);
-+	six_unlock_intent(&b->c.lock);
-+
-+	return ret;
-+}
-+
-+void bch2_btree_complete_write(struct bch_fs *c, struct btree *b,
-+			      struct btree_write *w)
-+{
-+	unsigned long old, new, v = READ_ONCE(b->will_make_reachable);
-+
-+	do {
-+		old = new = v;
-+		if (!(old & 1))
-+			break;
-+
-+		new &= ~1UL;
-+	} while ((v = cmpxchg(&b->will_make_reachable, old, new)) != old);
-+
-+	if (old & 1)
-+		closure_put(&((struct btree_update *) new)->cl);
-+
-+	bch2_journal_pin_drop(&c->journal, &w->journal);
-+}
-+
-+static void btree_node_write_done(struct bch_fs *c, struct btree *b)
-+{
-+	struct btree_write *w = btree_prev_write(b);
-+
-+	bch2_btree_complete_write(c, b, w);
-+	btree_node_io_unlock(b);
-+}
-+
-+static void bch2_btree_node_write_error(struct bch_fs *c,
-+					struct btree_write_bio *wbio)
-+{
-+	struct btree *b		= wbio->wbio.bio.bi_private;
-+	__BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp;
-+	struct bch_extent_ptr *ptr;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = bch2_trans_get_node_iter(&trans, b->c.btree_id, b->key.k.p,
-+					BTREE_MAX_DEPTH, b->c.level, 0);
-+retry:
-+	ret = bch2_btree_iter_traverse(iter);
-+	if (ret)
-+		goto err;
-+
-+	/* has node been freed? */
-+	if (iter->l[b->c.level].b != b) {
-+		/* node has been freed: */
-+		BUG_ON(!btree_node_dying(b));
-+		goto out;
-+	}
-+
-+	BUG_ON(!btree_node_hashed(b));
-+
-+	bkey_copy(&tmp.k, &b->key);
-+
-+	bch2_bkey_drop_ptrs(bkey_i_to_s(&tmp.k), ptr,
-+		bch2_dev_list_has_dev(wbio->wbio.failed, ptr->dev));
-+
-+	if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(&tmp.k)))
-+		goto err;
-+
-+	ret = bch2_btree_node_update_key(c, iter, b, &tmp.k);
-+	if (ret == -EINTR)
-+		goto retry;
-+	if (ret)
-+		goto err;
-+out:
-+	bch2_trans_exit(&trans);
-+	bio_put(&wbio->wbio.bio);
-+	btree_node_write_done(c, b);
-+	return;
-+err:
-+	set_btree_node_noevict(b);
-+	bch2_fs_fatal_error(c, "fatal error writing btree node");
-+	goto out;
-+}
-+
-+void bch2_btree_write_error_work(struct work_struct *work)
-+{
-+	struct bch_fs *c = container_of(work, struct bch_fs,
-+					btree_write_error_work);
-+	struct bio *bio;
-+
-+	while (1) {
-+		spin_lock_irq(&c->btree_write_error_lock);
-+		bio = bio_list_pop(&c->btree_write_error_list);
-+		spin_unlock_irq(&c->btree_write_error_lock);
-+
-+		if (!bio)
-+			break;
-+
-+		bch2_btree_node_write_error(c,
-+			container_of(bio, struct btree_write_bio, wbio.bio));
-+	}
-+}
-+
-+static void btree_node_write_work(struct work_struct *work)
-+{
-+	struct btree_write_bio *wbio =
-+		container_of(work, struct btree_write_bio, work);
-+	struct bch_fs *c	= wbio->wbio.c;
-+	struct btree *b		= wbio->wbio.bio.bi_private;
-+
-+	btree_bounce_free(c,
-+		wbio->wbio.order,
-+		wbio->wbio.used_mempool,
-+		wbio->data);
-+
-+	if (wbio->wbio.failed.nr) {
-+		unsigned long flags;
-+
-+		spin_lock_irqsave(&c->btree_write_error_lock, flags);
-+		bio_list_add(&c->btree_write_error_list, &wbio->wbio.bio);
-+		spin_unlock_irqrestore(&c->btree_write_error_lock, flags);
-+
-+		queue_work(c->wq, &c->btree_write_error_work);
-+		return;
-+	}
-+
-+	bio_put(&wbio->wbio.bio);
-+	btree_node_write_done(c, b);
-+}
-+
-+static void btree_node_write_endio(struct bio *bio)
-+{
-+	struct bch_write_bio *wbio	= to_wbio(bio);
-+	struct bch_write_bio *parent	= wbio->split ? wbio->parent : NULL;
-+	struct bch_write_bio *orig	= parent ?: wbio;
-+	struct bch_fs *c		= wbio->c;
-+	struct bch_dev *ca		= bch_dev_bkey_exists(c, wbio->dev);
-+	unsigned long flags;
-+
-+	if (wbio->have_ioref)
-+		bch2_latency_acct(ca, wbio->submit_time, WRITE);
-+
-+	if (bch2_dev_io_err_on(bio->bi_status, ca, "btree write: %s",
-+			       blk_status_to_str(bio->bi_status)) ||
-+	    bch2_meta_write_fault("btree")) {
-+		spin_lock_irqsave(&c->btree_write_error_lock, flags);
-+		bch2_dev_list_add_dev(&orig->failed, wbio->dev);
-+		spin_unlock_irqrestore(&c->btree_write_error_lock, flags);
-+	}
-+
-+	if (wbio->have_ioref)
-+		percpu_ref_put(&ca->io_ref);
-+
-+	if (parent) {
-+		bio_put(bio);
-+		bio_endio(&parent->bio);
-+	} else {
-+		struct btree_write_bio *wb =
-+			container_of(orig, struct btree_write_bio, wbio);
-+
-+		INIT_WORK(&wb->work, btree_node_write_work);
-+		queue_work(system_unbound_wq, &wb->work);
-+	}
-+}
-+
-+static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
-+				   struct bset *i, unsigned sectors)
-+{
-+	unsigned whiteout_u64s = 0;
-+	int ret;
-+
-+	if (bch2_bkey_invalid(c, bkey_i_to_s_c(&b->key), BKEY_TYPE_BTREE))
-+		return -1;
-+
-+	ret = validate_bset(c, b, i, sectors, WRITE, false) ?:
-+		validate_bset_keys(c, b, i, &whiteout_u64s, WRITE, false);
-+	if (ret)
-+		bch2_inconsistent_error(c);
-+
-+	return ret;
-+}
-+
-+void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
-+			    enum six_lock_type lock_type_held)
-+{
-+	struct btree_write_bio *wbio;
-+	struct bset_tree *t;
-+	struct bset *i;
-+	struct btree_node *bn = NULL;
-+	struct btree_node_entry *bne = NULL;
-+	BKEY_PADDED(key) k;
-+	struct bch_extent_ptr *ptr;
-+	struct sort_iter sort_iter;
-+	struct nonce nonce;
-+	unsigned bytes_to_write, sectors_to_write, order, bytes, u64s;
-+	u64 seq = 0;
-+	bool used_mempool;
-+	unsigned long old, new;
-+	bool validate_before_checksum = false;
-+	void *data;
-+
-+	if (test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags))
-+		return;
-+
-+	/*
-+	 * We may only have a read lock on the btree node - the dirty bit is our
-+	 * "lock" against racing with other threads that may be trying to start
-+	 * a write, we do a write iff we clear the dirty bit. Since setting the
-+	 * dirty bit requires a write lock, we can't race with other threads
-+	 * redirtying it:
-+	 */
-+	do {
-+		old = new = READ_ONCE(b->flags);
-+
-+		if (!(old & (1 << BTREE_NODE_dirty)))
-+			return;
-+
-+		if (!btree_node_may_write(b))
-+			return;
-+
-+		if (old & (1 << BTREE_NODE_write_in_flight)) {
-+			btree_node_wait_on_io(b);
-+			continue;
-+		}
-+
-+		new &= ~(1 << BTREE_NODE_dirty);
-+		new &= ~(1 << BTREE_NODE_need_write);
-+		new |=  (1 << BTREE_NODE_write_in_flight);
-+		new |=  (1 << BTREE_NODE_just_written);
-+		new ^=  (1 << BTREE_NODE_write_idx);
-+	} while (cmpxchg_acquire(&b->flags, old, new) != old);
-+
-+	BUG_ON(btree_node_fake(b));
-+	BUG_ON((b->will_make_reachable != 0) != !b->written);
-+
-+	BUG_ON(b->written >= c->opts.btree_node_size);
-+	BUG_ON(b->written & (c->opts.block_size - 1));
-+	BUG_ON(bset_written(b, btree_bset_last(b)));
-+	BUG_ON(le64_to_cpu(b->data->magic) != bset_magic(c));
-+	BUG_ON(memcmp(&b->data->format, &b->format, sizeof(b->format)));
-+
-+	bch2_sort_whiteouts(c, b);
-+
-+	sort_iter_init(&sort_iter, b);
-+
-+	bytes = !b->written
-+		? sizeof(struct btree_node)
-+		: sizeof(struct btree_node_entry);
-+
-+	bytes += b->whiteout_u64s * sizeof(u64);
-+
-+	for_each_bset(b, t) {
-+		i = bset(b, t);
-+
-+		if (bset_written(b, i))
-+			continue;
-+
-+		bytes += le16_to_cpu(i->u64s) * sizeof(u64);
-+		sort_iter_add(&sort_iter,
-+			      btree_bkey_first(b, t),
-+			      btree_bkey_last(b, t));
-+		seq = max(seq, le64_to_cpu(i->journal_seq));
-+	}
-+
-+	order = get_order(bytes);
-+	data = btree_bounce_alloc(c, order, &used_mempool);
-+
-+	if (!b->written) {
-+		bn = data;
-+		*bn = *b->data;
-+		i = &bn->keys;
-+	} else {
-+		bne = data;
-+		bne->keys = b->data->keys;
-+		i = &bne->keys;
-+	}
-+
-+	i->journal_seq	= cpu_to_le64(seq);
-+	i->u64s		= 0;
-+
-+	if (!btree_node_old_extent_overwrite(b)) {
-+		sort_iter_add(&sort_iter,
-+			      unwritten_whiteouts_start(c, b),
-+			      unwritten_whiteouts_end(c, b));
-+		SET_BSET_SEPARATE_WHITEOUTS(i, false);
-+	} else {
-+		memcpy_u64s(i->start,
-+			    unwritten_whiteouts_start(c, b),
-+			    b->whiteout_u64s);
-+		i->u64s = cpu_to_le16(b->whiteout_u64s);
-+		SET_BSET_SEPARATE_WHITEOUTS(i, true);
-+	}
-+
-+	b->whiteout_u64s = 0;
-+
-+	u64s = btree_node_old_extent_overwrite(b)
-+		? bch2_sort_extents(vstruct_last(i), &sort_iter, false)
-+		: bch2_sort_keys(i->start, &sort_iter, false);
-+	le16_add_cpu(&i->u64s, u64s);
-+
-+	set_needs_whiteout(i, false);
-+
-+	/* do we have data to write? */
-+	if (b->written && !i->u64s)
-+		goto nowrite;
-+
-+	bytes_to_write = vstruct_end(i) - data;
-+	sectors_to_write = round_up(bytes_to_write, block_bytes(c)) >> 9;
-+
-+	memset(data + bytes_to_write, 0,
-+	       (sectors_to_write << 9) - bytes_to_write);
-+
-+	BUG_ON(b->written + sectors_to_write > c->opts.btree_node_size);
-+	BUG_ON(BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN);
-+	BUG_ON(i->seq != b->data->keys.seq);
-+
-+	i->version = c->sb.version < bcachefs_metadata_version_new_versioning
-+		? cpu_to_le16(BCH_BSET_VERSION_OLD)
-+		: cpu_to_le16(c->sb.version);
-+	SET_BSET_CSUM_TYPE(i, bch2_meta_checksum_type(c));
-+
-+	if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i)))
-+		validate_before_checksum = true;
-+
-+	/* validate_bset will be modifying: */
-+	if (le16_to_cpu(i->version) < bcachefs_metadata_version_max)
-+		validate_before_checksum = true;
-+
-+	/* if we're going to be encrypting, check metadata validity first: */
-+	if (validate_before_checksum &&
-+	    validate_bset_for_write(c, b, i, sectors_to_write))
-+		goto err;
-+
-+	bset_encrypt(c, i, b->written << 9);
-+
-+	nonce = btree_nonce(i, b->written << 9);
-+
-+	if (bn)
-+		bn->csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bn);
-+	else
-+		bne->csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
-+
-+	/* if we're not encrypting, check metadata after checksumming: */
-+	if (!validate_before_checksum &&
-+	    validate_bset_for_write(c, b, i, sectors_to_write))
-+		goto err;
-+
-+	/*
-+	 * We handle btree write errors by immediately halting the journal -
-+	 * after we've done that, we can't issue any subsequent btree writes
-+	 * because they might have pointers to new nodes that failed to write.
-+	 *
-+	 * Furthermore, there's no point in doing any more btree writes because
-+	 * with the journal stopped, we're never going to update the journal to
-+	 * reflect that those writes were done and the data flushed from the
-+	 * journal:
-+	 *
-+	 * Also on journal error, the pending write may have updates that were
-+	 * never journalled (interior nodes, see btree_update_nodes_written()) -
-+	 * it's critical that we don't do the write in that case otherwise we
-+	 * will have updates visible that weren't in the journal:
-+	 *
-+	 * Make sure to update b->written so bch2_btree_init_next() doesn't
-+	 * break:
-+	 */
-+	if (bch2_journal_error(&c->journal) ||
-+	    c->opts.nochanges)
-+		goto err;
-+
-+	trace_btree_write(b, bytes_to_write, sectors_to_write);
-+
-+	wbio = container_of(bio_alloc_bioset(GFP_NOIO,
-+				buf_pages(data, sectors_to_write << 9),
-+				&c->btree_bio),
-+			    struct btree_write_bio, wbio.bio);
-+	wbio_init(&wbio->wbio.bio);
-+	wbio->data			= data;
-+	wbio->wbio.order		= order;
-+	wbio->wbio.used_mempool		= used_mempool;
-+	wbio->wbio.bio.bi_opf		= REQ_OP_WRITE|REQ_META;
-+	wbio->wbio.bio.bi_end_io	= btree_node_write_endio;
-+	wbio->wbio.bio.bi_private	= b;
-+
-+	bch2_bio_map(&wbio->wbio.bio, data, sectors_to_write << 9);
-+
-+	/*
-+	 * If we're appending to a leaf node, we don't technically need FUA -
-+	 * this write just needs to be persisted before the next journal write,
-+	 * which will be marked FLUSH|FUA.
-+	 *
-+	 * Similarly if we're writing a new btree root - the pointer is going to
-+	 * be in the next journal entry.
-+	 *
-+	 * But if we're writing a new btree node (that isn't a root) or
-+	 * appending to a non leaf btree node, we need either FUA or a flush
-+	 * when we write the parent with the new pointer. FUA is cheaper than a
-+	 * flush, and writes appending to leaf nodes aren't blocking anything so
-+	 * just make all btree node writes FUA to keep things sane.
-+	 */
-+
-+	bkey_copy(&k.key, &b->key);
-+
-+	bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&k.key)), ptr)
-+		ptr->offset += b->written;
-+
-+	b->written += sectors_to_write;
-+
-+	/* XXX: submitting IO with btree locks held: */
-+	bch2_submit_wbio_replicas(&wbio->wbio, c, BCH_DATA_BTREE, &k.key);
-+	return;
-+err:
-+	set_btree_node_noevict(b);
-+	b->written += sectors_to_write;
-+nowrite:
-+	btree_bounce_free(c, order, used_mempool, data);
-+	btree_node_write_done(c, b);
-+}
-+
-+/*
-+ * Work that must be done with write lock held:
-+ */
-+bool bch2_btree_post_write_cleanup(struct bch_fs *c, struct btree *b)
-+{
-+	bool invalidated_iter = false;
-+	struct btree_node_entry *bne;
-+	struct bset_tree *t;
-+
-+	if (!btree_node_just_written(b))
-+		return false;
-+
-+	BUG_ON(b->whiteout_u64s);
-+
-+	clear_btree_node_just_written(b);
-+
-+	/*
-+	 * Note: immediately after write, bset_written() doesn't work - the
-+	 * amount of data we had to write after compaction might have been
-+	 * smaller than the offset of the last bset.
-+	 *
-+	 * However, we know that all bsets have been written here, as long as
-+	 * we're still holding the write lock:
-+	 */
-+
-+	/*
-+	 * XXX: decide if we really want to unconditionally sort down to a
-+	 * single bset:
-+	 */
-+	if (b->nsets > 1) {
-+		btree_node_sort(c, b, NULL, 0, b->nsets, true);
-+		invalidated_iter = true;
-+	} else {
-+		invalidated_iter = bch2_drop_whiteouts(b, COMPACT_ALL);
-+	}
-+
-+	for_each_bset(b, t)
-+		set_needs_whiteout(bset(b, t), true);
-+
-+	bch2_btree_verify(c, b);
-+
-+	/*
-+	 * If later we don't unconditionally sort down to a single bset, we have
-+	 * to ensure this is still true:
-+	 */
-+	BUG_ON((void *) btree_bkey_last(b, bset_tree_last(b)) > write_block(b));
-+
-+	bne = want_new_bset(c, b);
-+	if (bne)
-+		bch2_bset_init_next(c, b, bne);
-+
-+	bch2_btree_build_aux_trees(b);
-+
-+	return invalidated_iter;
-+}
-+
-+/*
-+ * Use this one if the node is intent locked:
-+ */
-+void bch2_btree_node_write(struct bch_fs *c, struct btree *b,
-+			  enum six_lock_type lock_type_held)
-+{
-+	BUG_ON(lock_type_held == SIX_LOCK_write);
-+
-+	if (lock_type_held == SIX_LOCK_intent ||
-+	    six_lock_tryupgrade(&b->c.lock)) {
-+		__bch2_btree_node_write(c, b, SIX_LOCK_intent);
-+
-+		/* don't cycle lock unnecessarily: */
-+		if (btree_node_just_written(b) &&
-+		    six_trylock_write(&b->c.lock)) {
-+			bch2_btree_post_write_cleanup(c, b);
-+			six_unlock_write(&b->c.lock);
-+		}
-+
-+		if (lock_type_held == SIX_LOCK_read)
-+			six_lock_downgrade(&b->c.lock);
-+	} else {
-+		__bch2_btree_node_write(c, b, SIX_LOCK_read);
-+	}
-+}
-+
-+static void __bch2_btree_flush_all(struct bch_fs *c, unsigned flag)
-+{
-+	struct bucket_table *tbl;
-+	struct rhash_head *pos;
-+	struct btree *b;
-+	unsigned i;
-+restart:
-+	rcu_read_lock();
-+	for_each_cached_btree(b, c, tbl, i, pos)
-+		if (test_bit(flag, &b->flags)) {
-+			rcu_read_unlock();
-+			wait_on_bit_io(&b->flags, flag, TASK_UNINTERRUPTIBLE);
-+			goto restart;
-+
-+		}
-+	rcu_read_unlock();
-+}
-+
-+void bch2_btree_flush_all_reads(struct bch_fs *c)
-+{
-+	__bch2_btree_flush_all(c, BTREE_NODE_read_in_flight);
-+}
-+
-+void bch2_btree_flush_all_writes(struct bch_fs *c)
-+{
-+	__bch2_btree_flush_all(c, BTREE_NODE_write_in_flight);
-+}
-+
-+void bch2_btree_verify_flushed(struct bch_fs *c)
-+{
-+	struct bucket_table *tbl;
-+	struct rhash_head *pos;
-+	struct btree *b;
-+	unsigned i;
-+
-+	rcu_read_lock();
-+	for_each_cached_btree(b, c, tbl, i, pos) {
-+		unsigned long flags = READ_ONCE(b->flags);
-+
-+		BUG_ON((flags & (1 << BTREE_NODE_dirty)) ||
-+		       (flags & (1 << BTREE_NODE_write_in_flight)));
-+	}
-+	rcu_read_unlock();
-+}
-+
-+ssize_t bch2_dirty_btree_nodes_print(struct bch_fs *c, char *buf)
-+{
-+	struct printbuf out = _PBUF(buf, PAGE_SIZE);
-+	struct bucket_table *tbl;
-+	struct rhash_head *pos;
-+	struct btree *b;
-+	unsigned i;
-+
-+	rcu_read_lock();
-+	for_each_cached_btree(b, c, tbl, i, pos) {
-+		unsigned long flags = READ_ONCE(b->flags);
-+
-+		if (!(flags & (1 << BTREE_NODE_dirty)))
-+			continue;
-+
-+		pr_buf(&out, "%p d %u n %u l %u w %u b %u r %u:%lu\n",
-+		       b,
-+		       (flags & (1 << BTREE_NODE_dirty)) != 0,
-+		       (flags & (1 << BTREE_NODE_need_write)) != 0,
-+		       b->c.level,
-+		       b->written,
-+		       !list_empty_careful(&b->write_blocked),
-+		       b->will_make_reachable != 0,
-+		       b->will_make_reachable & 1);
-+	}
-+	rcu_read_unlock();
-+
-+	return out.pos - buf;
-+}
-diff --git a/fs/bcachefs/btree_io.h b/fs/bcachefs/btree_io.h
-new file mode 100644
-index 000000000000..f3d7ec749b61
---- /dev/null
-+++ b/fs/bcachefs/btree_io.h
-@@ -0,0 +1,190 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BTREE_IO_H
-+#define _BCACHEFS_BTREE_IO_H
-+
-+#include "bkey_methods.h"
-+#include "bset.h"
-+#include "btree_locking.h"
-+#include "extents.h"
-+#include "io_types.h"
-+
-+struct bch_fs;
-+struct btree_write;
-+struct btree;
-+struct btree_iter;
-+
-+struct btree_read_bio {
-+	struct bch_fs		*c;
-+	u64			start_time;
-+	unsigned		have_ioref:1;
-+	struct extent_ptr_decoded	pick;
-+	struct work_struct	work;
-+	struct bio		bio;
-+};
-+
-+struct btree_write_bio {
-+	void			*data;
-+	struct work_struct	work;
-+	struct bch_write_bio	wbio;
-+};
-+
-+static inline void btree_node_io_unlock(struct btree *b)
-+{
-+	EBUG_ON(!btree_node_write_in_flight(b));
-+	clear_btree_node_write_in_flight(b);
-+	wake_up_bit(&b->flags, BTREE_NODE_write_in_flight);
-+}
-+
-+static inline void btree_node_io_lock(struct btree *b)
-+{
-+	wait_on_bit_lock_io(&b->flags, BTREE_NODE_write_in_flight,
-+			    TASK_UNINTERRUPTIBLE);
-+}
-+
-+static inline void btree_node_wait_on_io(struct btree *b)
-+{
-+	wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight,
-+		       TASK_UNINTERRUPTIBLE);
-+}
-+
-+static inline bool btree_node_may_write(struct btree *b)
-+{
-+	return list_empty_careful(&b->write_blocked) &&
-+		(!b->written || !b->will_make_reachable);
-+}
-+
-+enum compact_mode {
-+	COMPACT_LAZY,
-+	COMPACT_ALL,
-+};
-+
-+bool bch2_compact_whiteouts(struct bch_fs *, struct btree *,
-+			    enum compact_mode);
-+
-+static inline bool should_compact_bset_lazy(struct btree *b,
-+					    struct bset_tree *t)
-+{
-+	unsigned total_u64s = bset_u64s(t);
-+	unsigned dead_u64s = bset_dead_u64s(b, t);
-+
-+	return dead_u64s > 64 && dead_u64s * 3 > total_u64s;
-+}
-+
-+static inline bool bch2_maybe_compact_whiteouts(struct bch_fs *c, struct btree *b)
-+{
-+	struct bset_tree *t;
-+
-+	for_each_bset(b, t)
-+		if (should_compact_bset_lazy(b, t))
-+			return bch2_compact_whiteouts(c, b, COMPACT_LAZY);
-+
-+	return false;
-+}
-+
-+void bch2_btree_sort_into(struct bch_fs *, struct btree *, struct btree *);
-+
-+void bch2_btree_build_aux_trees(struct btree *);
-+void bch2_btree_init_next(struct bch_fs *, struct btree *,
-+			 struct btree_iter *);
-+
-+int bch2_btree_node_read_done(struct bch_fs *, struct btree *, bool);
-+void bch2_btree_node_read(struct bch_fs *, struct btree *, bool);
-+int bch2_btree_root_read(struct bch_fs *, enum btree_id,
-+			 const struct bkey_i *, unsigned);
-+
-+void bch2_btree_complete_write(struct bch_fs *, struct btree *,
-+			      struct btree_write *);
-+void bch2_btree_write_error_work(struct work_struct *);
-+
-+void __bch2_btree_node_write(struct bch_fs *, struct btree *,
-+			    enum six_lock_type);
-+bool bch2_btree_post_write_cleanup(struct bch_fs *, struct btree *);
-+
-+void bch2_btree_node_write(struct bch_fs *, struct btree *,
-+			  enum six_lock_type);
-+
-+static inline void btree_node_write_if_need(struct bch_fs *c, struct btree *b,
-+					    enum six_lock_type lock_held)
-+{
-+	while (b->written &&
-+	       btree_node_need_write(b) &&
-+	       btree_node_may_write(b)) {
-+		if (!btree_node_write_in_flight(b)) {
-+			bch2_btree_node_write(c, b, lock_held);
-+			break;
-+		}
-+
-+		six_unlock_type(&b->c.lock, lock_held);
-+		btree_node_wait_on_io(b);
-+		btree_node_lock_type(c, b, lock_held);
-+	}
-+}
-+
-+#define bch2_btree_node_write_cond(_c, _b, cond)			\
-+do {									\
-+	unsigned long old, new, v = READ_ONCE((_b)->flags);		\
-+									\
-+	do {								\
-+		old = new = v;						\
-+									\
-+		if (!(old & (1 << BTREE_NODE_dirty)) || !(cond))	\
-+			break;						\
-+									\
-+		new |= (1 << BTREE_NODE_need_write);			\
-+	} while ((v = cmpxchg(&(_b)->flags, old, new)) != old);		\
-+									\
-+	btree_node_write_if_need(_c, _b, SIX_LOCK_read);		\
-+} while (0)
-+
-+void bch2_btree_flush_all_reads(struct bch_fs *);
-+void bch2_btree_flush_all_writes(struct bch_fs *);
-+void bch2_btree_verify_flushed(struct bch_fs *);
-+ssize_t bch2_dirty_btree_nodes_print(struct bch_fs *, char *);
-+
-+static inline void compat_bformat(unsigned level, enum btree_id btree_id,
-+				 unsigned version, unsigned big_endian,
-+				 int write, struct bkey_format *f)
-+{
-+	if (version < bcachefs_metadata_version_inode_btree_change &&
-+	    btree_id == BTREE_ID_INODES) {
-+		swap(f->bits_per_field[BKEY_FIELD_INODE],
-+		     f->bits_per_field[BKEY_FIELD_OFFSET]);
-+		swap(f->field_offset[BKEY_FIELD_INODE],
-+		     f->field_offset[BKEY_FIELD_OFFSET]);
-+	}
-+}
-+
-+static inline void compat_bpos(unsigned level, enum btree_id btree_id,
-+			       unsigned version, unsigned big_endian,
-+			       int write, struct bpos *p)
-+{
-+	if (big_endian != CPU_BIG_ENDIAN)
-+		bch2_bpos_swab(p);
-+
-+	if (version < bcachefs_metadata_version_inode_btree_change &&
-+	    btree_id == BTREE_ID_INODES)
-+		swap(p->inode, p->offset);
-+}
-+
-+static inline void compat_btree_node(unsigned level, enum btree_id btree_id,
-+				     unsigned version, unsigned big_endian,
-+				     int write,
-+				     struct btree_node *bn)
-+{
-+	if (version < bcachefs_metadata_version_inode_btree_change &&
-+	    btree_node_type_is_extents(btree_id) &&
-+	    bkey_cmp(bn->min_key, POS_MIN) &&
-+	    write)
-+		bn->min_key = bkey_predecessor(bn->min_key);
-+
-+	compat_bpos(level, btree_id, version, big_endian, write, &bn->min_key);
-+	compat_bpos(level, btree_id, version, big_endian, write, &bn->max_key);
-+
-+	if (version < bcachefs_metadata_version_inode_btree_change &&
-+	    btree_node_type_is_extents(btree_id) &&
-+	    bkey_cmp(bn->min_key, POS_MIN) &&
-+	    !write)
-+		bn->min_key = bkey_successor(bn->min_key);
-+}
-+
-+#endif /* _BCACHEFS_BTREE_IO_H */
-diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
-new file mode 100644
-index 000000000000..6fab76c3220c
---- /dev/null
-+++ b/fs/bcachefs/btree_iter.c
-@@ -0,0 +1,2445 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "bkey_methods.h"
-+#include "btree_cache.h"
-+#include "btree_iter.h"
-+#include "btree_key_cache.h"
-+#include "btree_locking.h"
-+#include "btree_update.h"
-+#include "debug.h"
-+#include "extents.h"
-+#include "journal.h"
-+
-+#include <linux/prefetch.h>
-+#include <trace/events/bcachefs.h>
-+
-+static inline bool is_btree_node(struct btree_iter *iter, unsigned l)
-+{
-+	return l < BTREE_MAX_DEPTH &&
-+		(unsigned long) iter->l[l].b >= 128;
-+}
-+
-+static inline struct bpos btree_iter_search_key(struct btree_iter *iter)
-+{
-+	struct bpos pos = iter->pos;
-+
-+	if ((iter->flags & BTREE_ITER_IS_EXTENTS) &&
-+	    bkey_cmp(pos, POS_MAX))
-+		pos = bkey_successor(pos);
-+	return pos;
-+}
-+
-+static inline bool btree_iter_pos_before_node(struct btree_iter *iter,
-+					      struct btree *b)
-+{
-+	return bkey_cmp(btree_iter_search_key(iter), b->data->min_key) < 0;
-+}
-+
-+static inline bool btree_iter_pos_after_node(struct btree_iter *iter,
-+					     struct btree *b)
-+{
-+	return bkey_cmp(b->key.k.p, btree_iter_search_key(iter)) < 0;
-+}
-+
-+static inline bool btree_iter_pos_in_node(struct btree_iter *iter,
-+					  struct btree *b)
-+{
-+	return iter->btree_id == b->c.btree_id &&
-+		!btree_iter_pos_before_node(iter, b) &&
-+		!btree_iter_pos_after_node(iter, b);
-+}
-+
-+/* Btree node locking: */
-+
-+void bch2_btree_node_unlock_write(struct btree *b, struct btree_iter *iter)
-+{
-+	bch2_btree_node_unlock_write_inlined(b, iter);
-+}
-+
-+void __bch2_btree_node_lock_write(struct btree *b, struct btree_iter *iter)
-+{
-+	struct btree_iter *linked;
-+	unsigned readers = 0;
-+
-+	EBUG_ON(!btree_node_intent_locked(iter, b->c.level));
-+
-+	trans_for_each_iter(iter->trans, linked)
-+		if (linked->l[b->c.level].b == b &&
-+		    btree_node_read_locked(linked, b->c.level))
-+			readers++;
-+
-+	/*
-+	 * Must drop our read locks before calling six_lock_write() -
-+	 * six_unlock() won't do wakeups until the reader count
-+	 * goes to 0, and it's safe because we have the node intent
-+	 * locked:
-+	 */
-+	atomic64_sub(__SIX_VAL(read_lock, readers),
-+		     &b->c.lock.state.counter);
-+	btree_node_lock_type(iter->trans->c, b, SIX_LOCK_write);
-+	atomic64_add(__SIX_VAL(read_lock, readers),
-+		     &b->c.lock.state.counter);
-+}
-+
-+bool __bch2_btree_node_relock(struct btree_iter *iter, unsigned level)
-+{
-+	struct btree *b = btree_iter_node(iter, level);
-+	int want = __btree_lock_want(iter, level);
-+
-+	if (!is_btree_node(iter, level))
-+		return false;
-+
-+	if (race_fault())
-+		return false;
-+
-+	if (six_relock_type(&b->c.lock, want, iter->l[level].lock_seq) ||
-+	    (btree_node_lock_seq_matches(iter, b, level) &&
-+	     btree_node_lock_increment(iter->trans, b, level, want))) {
-+		mark_btree_node_locked(iter, level, want);
-+		return true;
-+	} else {
-+		return false;
-+	}
-+}
-+
-+static bool bch2_btree_node_upgrade(struct btree_iter *iter, unsigned level)
-+{
-+	struct btree *b = iter->l[level].b;
-+
-+	EBUG_ON(btree_lock_want(iter, level) != BTREE_NODE_INTENT_LOCKED);
-+
-+	if (!is_btree_node(iter, level))
-+		return false;
-+
-+	if (btree_node_intent_locked(iter, level))
-+		return true;
-+
-+	if (race_fault())
-+		return false;
-+
-+	if (btree_node_locked(iter, level)
-+	    ? six_lock_tryupgrade(&b->c.lock)
-+	    : six_relock_type(&b->c.lock, SIX_LOCK_intent, iter->l[level].lock_seq))
-+		goto success;
-+
-+	if (btree_node_lock_seq_matches(iter, b, level) &&
-+	    btree_node_lock_increment(iter->trans, b, level, BTREE_NODE_INTENT_LOCKED)) {
-+		btree_node_unlock(iter, level);
-+		goto success;
-+	}
-+
-+	return false;
-+success:
-+	mark_btree_node_intent_locked(iter, level);
-+	return true;
-+}
-+
-+static inline bool btree_iter_get_locks(struct btree_iter *iter,
-+					bool upgrade, bool trace)
-+{
-+	unsigned l = iter->level;
-+	int fail_idx = -1;
-+
-+	do {
-+		if (!btree_iter_node(iter, l))
-+			break;
-+
-+		if (!(upgrade
-+		      ? bch2_btree_node_upgrade(iter, l)
-+		      : bch2_btree_node_relock(iter, l))) {
-+			if (trace)
-+				(upgrade
-+				 ? trace_node_upgrade_fail
-+				 : trace_node_relock_fail)(l, iter->l[l].lock_seq,
-+						is_btree_node(iter, l)
-+						? 0
-+						: (unsigned long) iter->l[l].b,
-+						is_btree_node(iter, l)
-+						? iter->l[l].b->c.lock.state.seq
-+						: 0);
-+
-+			fail_idx = l;
-+			btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
-+		}
-+
-+		l++;
-+	} while (l < iter->locks_want);
-+
-+	/*
-+	 * When we fail to get a lock, we have to ensure that any child nodes
-+	 * can't be relocked so bch2_btree_iter_traverse has to walk back up to
-+	 * the node that we failed to relock:
-+	 */
-+	while (fail_idx >= 0) {
-+		btree_node_unlock(iter, fail_idx);
-+		iter->l[fail_idx].b = BTREE_ITER_NO_NODE_GET_LOCKS;
-+		--fail_idx;
-+	}
-+
-+	if (iter->uptodate == BTREE_ITER_NEED_RELOCK)
-+		iter->uptodate = BTREE_ITER_NEED_PEEK;
-+
-+	bch2_btree_trans_verify_locks(iter->trans);
-+
-+	return iter->uptodate < BTREE_ITER_NEED_RELOCK;
-+}
-+
-+static struct bpos btree_node_pos(struct btree_bkey_cached_common *_b,
-+				  enum btree_iter_type type)
-+{
-+	return  type != BTREE_ITER_CACHED
-+		? container_of(_b, struct btree, c)->key.k.p
-+		: container_of(_b, struct bkey_cached, c)->key.pos;
-+}
-+
-+/* Slowpath: */
-+bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
-+			    unsigned level, struct btree_iter *iter,
-+			    enum six_lock_type type,
-+			    six_lock_should_sleep_fn should_sleep_fn,
-+			    void *p)
-+{
-+	struct btree_trans *trans = iter->trans;
-+	struct btree_iter *linked;
-+	u64 start_time = local_clock();
-+	bool ret = true;
-+
-+	/* Check if it's safe to block: */
-+	trans_for_each_iter(trans, linked) {
-+		if (!linked->nodes_locked)
-+			continue;
-+
-+		/*
-+		 * Can't block taking an intent lock if we have _any_ nodes read
-+		 * locked:
-+		 *
-+		 * - Our read lock blocks another thread with an intent lock on
-+		 *   the same node from getting a write lock, and thus from
-+		 *   dropping its intent lock
-+		 *
-+		 * - And the other thread may have multiple nodes intent locked:
-+		 *   both the node we want to intent lock, and the node we
-+		 *   already have read locked - deadlock:
-+		 */
-+		if (type == SIX_LOCK_intent &&
-+		    linked->nodes_locked != linked->nodes_intent_locked) {
-+			if (!(trans->nounlock)) {
-+				linked->locks_want = max_t(unsigned,
-+						linked->locks_want,
-+						__fls(linked->nodes_locked) + 1);
-+				if (!btree_iter_get_locks(linked, true, false))
-+					ret = false;
-+			} else {
-+				ret = false;
-+			}
-+		}
-+
-+		/*
-+		 * Interior nodes must be locked before their descendants: if
-+		 * another iterator has possible descendants locked of the node
-+		 * we're about to lock, it must have the ancestors locked too:
-+		 */
-+		if (linked->btree_id == iter->btree_id &&
-+		    level > __fls(linked->nodes_locked)) {
-+			if (!(trans->nounlock)) {
-+				linked->locks_want =
-+					max(level + 1, max_t(unsigned,
-+					    linked->locks_want,
-+					    iter->locks_want));
-+				if (!btree_iter_get_locks(linked, true, false))
-+					ret = false;
-+			} else {
-+				ret = false;
-+			}
-+		}
-+
-+		/* Must lock btree nodes in key order: */
-+		if ((cmp_int(iter->btree_id, linked->btree_id) ?:
-+		     -cmp_int(btree_iter_type(iter), btree_iter_type(linked))) < 0)
-+			ret = false;
-+
-+		if (iter->btree_id == linked->btree_id &&
-+		    btree_node_locked(linked, level) &&
-+		    bkey_cmp(pos, btree_node_pos((void *) linked->l[level].b,
-+						 btree_iter_type(linked))) <= 0)
-+			ret = false;
-+
-+		/*
-+		 * Recheck if this is a node we already have locked - since one
-+		 * of the get_locks() calls might've successfully
-+		 * upgraded/relocked it:
-+		 */
-+		if (linked->l[level].b == b &&
-+		    btree_node_locked_type(linked, level) >= type) {
-+			six_lock_increment(&b->c.lock, type);
-+			return true;
-+		}
-+	}
-+
-+	if (unlikely(!ret)) {
-+		trace_trans_restart_would_deadlock(iter->trans->ip);
-+		return false;
-+	}
-+
-+	if (six_trylock_type(&b->c.lock, type))
-+		return true;
-+
-+	if (six_lock_type(&b->c.lock, type, should_sleep_fn, p))
-+		return false;
-+
-+	bch2_time_stats_update(&trans->c->times[lock_to_time_stat(type)],
-+			       start_time);
-+	return true;
-+}
-+
-+/* Btree iterator locking: */
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+static void bch2_btree_iter_verify_locks(struct btree_iter *iter)
-+{
-+	unsigned l;
-+
-+	if (!(iter->trans->iters_linked & (1ULL << iter->idx))) {
-+		BUG_ON(iter->nodes_locked);
-+		return;
-+	}
-+
-+	for (l = 0; is_btree_node(iter, l); l++) {
-+		if (iter->uptodate >= BTREE_ITER_NEED_RELOCK &&
-+		    !btree_node_locked(iter, l))
-+			continue;
-+
-+		BUG_ON(btree_lock_want(iter, l) !=
-+		       btree_node_locked_type(iter, l));
-+	}
-+}
-+
-+void bch2_btree_trans_verify_locks(struct btree_trans *trans)
-+{
-+	struct btree_iter *iter;
-+
-+	trans_for_each_iter_all(trans, iter)
-+		bch2_btree_iter_verify_locks(iter);
-+}
-+#else
-+static inline void bch2_btree_iter_verify_locks(struct btree_iter *iter) {}
-+#endif
-+
-+__flatten
-+bool bch2_btree_iter_relock(struct btree_iter *iter, bool trace)
-+{
-+	return btree_iter_get_locks(iter, false, trace);
-+}
-+
-+bool __bch2_btree_iter_upgrade(struct btree_iter *iter,
-+			       unsigned new_locks_want)
-+{
-+	struct btree_iter *linked;
-+
-+	EBUG_ON(iter->locks_want >= new_locks_want);
-+
-+	iter->locks_want = new_locks_want;
-+
-+	if (btree_iter_get_locks(iter, true, true))
-+		return true;
-+
-+	/*
-+	 * Ancestor nodes must be locked before child nodes, so set locks_want
-+	 * on iterators that might lock ancestors before us to avoid getting
-+	 * -EINTR later:
-+	 */
-+	trans_for_each_iter(iter->trans, linked)
-+		if (linked != iter &&
-+		    linked->btree_id == iter->btree_id &&
-+		    linked->locks_want < new_locks_want) {
-+			linked->locks_want = new_locks_want;
-+			btree_iter_get_locks(linked, true, false);
-+		}
-+
-+	return false;
-+}
-+
-+bool __bch2_btree_iter_upgrade_nounlock(struct btree_iter *iter,
-+					unsigned new_locks_want)
-+{
-+	unsigned l = iter->level;
-+
-+	EBUG_ON(iter->locks_want >= new_locks_want);
-+
-+	iter->locks_want = new_locks_want;
-+
-+	do {
-+		if (!btree_iter_node(iter, l))
-+			break;
-+
-+		if (!bch2_btree_node_upgrade(iter, l)) {
-+			iter->locks_want = l;
-+			return false;
-+		}
-+
-+		l++;
-+	} while (l < iter->locks_want);
-+
-+	return true;
-+}
-+
-+void __bch2_btree_iter_downgrade(struct btree_iter *iter,
-+				 unsigned downgrade_to)
-+{
-+	unsigned l, new_locks_want = downgrade_to ?:
-+		(iter->flags & BTREE_ITER_INTENT ? 1 : 0);
-+
-+	if (iter->locks_want < downgrade_to) {
-+		iter->locks_want = new_locks_want;
-+
-+		while (iter->nodes_locked &&
-+		       (l = __fls(iter->nodes_locked)) >= iter->locks_want) {
-+			if (l > iter->level) {
-+				btree_node_unlock(iter, l);
-+			} else {
-+				if (btree_node_intent_locked(iter, l)) {
-+					six_lock_downgrade(&iter->l[l].b->c.lock);
-+					iter->nodes_intent_locked ^= 1 << l;
-+				}
-+				break;
-+			}
-+		}
-+	}
-+
-+	bch2_btree_trans_verify_locks(iter->trans);
-+}
-+
-+void bch2_trans_downgrade(struct btree_trans *trans)
-+{
-+	struct btree_iter *iter;
-+
-+	trans_for_each_iter(trans, iter)
-+		bch2_btree_iter_downgrade(iter);
-+}
-+
-+/* Btree transaction locking: */
-+
-+bool bch2_trans_relock(struct btree_trans *trans)
-+{
-+	struct btree_iter *iter;
-+	bool ret = true;
-+
-+	trans_for_each_iter(trans, iter)
-+		if (iter->uptodate == BTREE_ITER_NEED_RELOCK)
-+			ret &= bch2_btree_iter_relock(iter, true);
-+
-+	return ret;
-+}
-+
-+void bch2_trans_unlock(struct btree_trans *trans)
-+{
-+	struct btree_iter *iter;
-+
-+	trans_for_each_iter(trans, iter)
-+		__bch2_btree_iter_unlock(iter);
-+}
-+
-+/* Btree iterator: */
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+
-+static void bch2_btree_iter_verify_cached(struct btree_iter *iter)
-+{
-+	struct bkey_cached *ck;
-+	bool locked = btree_node_locked(iter, 0);
-+
-+	if (!bch2_btree_node_relock(iter, 0))
-+		return;
-+
-+	ck = (void *) iter->l[0].b;
-+	BUG_ON(ck->key.btree_id != iter->btree_id ||
-+	       bkey_cmp(ck->key.pos, iter->pos));
-+
-+	if (!locked)
-+		btree_node_unlock(iter, 0);
-+}
-+
-+static void bch2_btree_iter_verify_level(struct btree_iter *iter,
-+					 unsigned level)
-+{
-+	struct bpos pos = btree_iter_search_key(iter);
-+	struct btree_iter_level *l = &iter->l[level];
-+	struct btree_node_iter tmp = l->iter;
-+	bool locked = btree_node_locked(iter, level);
-+	struct bkey_packed *p, *k;
-+	char buf1[100], buf2[100];
-+	const char *msg;
-+
-+	if (!debug_check_iterators(iter->trans->c))
-+		return;
-+
-+	if (btree_iter_type(iter) == BTREE_ITER_CACHED) {
-+		if (!level)
-+			bch2_btree_iter_verify_cached(iter);
-+		return;
-+	}
-+
-+	BUG_ON(iter->level < iter->min_depth);
-+
-+	if (!btree_iter_node(iter, level))
-+		return;
-+
-+	if (!bch2_btree_node_relock(iter, level))
-+		return;
-+
-+	/*
-+	 * Ideally this invariant would always be true, and hopefully in the
-+	 * future it will be, but for now set_pos_same_leaf() breaks it:
-+	 */
-+	BUG_ON(iter->uptodate < BTREE_ITER_NEED_TRAVERSE &&
-+	       !btree_iter_pos_in_node(iter, l->b));
-+
-+	/*
-+	 * node iterators don't use leaf node iterator:
-+	 */
-+	if (btree_iter_type(iter) == BTREE_ITER_NODES &&
-+	    level <= iter->min_depth)
-+		goto unlock;
-+
-+	bch2_btree_node_iter_verify(&l->iter, l->b);
-+
-+	/*
-+	 * For interior nodes, the iterator will have skipped past
-+	 * deleted keys:
-+	 *
-+	 * For extents, the iterator may have skipped past deleted keys (but not
-+	 * whiteouts)
-+	 */
-+	p = level || btree_node_type_is_extents(iter->btree_id)
-+		? bch2_btree_node_iter_prev_filter(&tmp, l->b, KEY_TYPE_discard)
-+		: bch2_btree_node_iter_prev_all(&tmp, l->b);
-+	k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
-+
-+	if (p && bkey_iter_pos_cmp(l->b, p, &pos) >= 0) {
-+		msg = "before";
-+		goto err;
-+	}
-+
-+	if (k && bkey_iter_pos_cmp(l->b, k, &pos) < 0) {
-+		msg = "after";
-+		goto err;
-+	}
-+unlock:
-+	if (!locked)
-+		btree_node_unlock(iter, level);
-+	return;
-+err:
-+	strcpy(buf1, "(none)");
-+	strcpy(buf2, "(none)");
-+
-+	if (p) {
-+		struct bkey uk = bkey_unpack_key(l->b, p);
-+		bch2_bkey_to_text(&PBUF(buf1), &uk);
-+	}
-+
-+	if (k) {
-+		struct bkey uk = bkey_unpack_key(l->b, k);
-+		bch2_bkey_to_text(&PBUF(buf2), &uk);
-+	}
-+
-+	panic("iterator should be %s key at level %u:\n"
-+	      "iter pos %s %llu:%llu\n"
-+	      "prev key %s\n"
-+	      "cur  key %s\n",
-+	      msg, level,
-+	      iter->flags & BTREE_ITER_IS_EXTENTS ? ">" : "=>",
-+	      iter->pos.inode, iter->pos.offset,
-+	      buf1, buf2);
-+}
-+
-+static void bch2_btree_iter_verify(struct btree_iter *iter)
-+{
-+	unsigned i;
-+
-+	bch2_btree_trans_verify_locks(iter->trans);
-+
-+	for (i = 0; i < BTREE_MAX_DEPTH; i++)
-+		bch2_btree_iter_verify_level(iter, i);
-+}
-+
-+void bch2_btree_trans_verify_iters(struct btree_trans *trans, struct btree *b)
-+{
-+	struct btree_iter *iter;
-+
-+	if (!debug_check_iterators(trans->c))
-+		return;
-+
-+	trans_for_each_iter_with_node(trans, b, iter)
-+		bch2_btree_iter_verify_level(iter, b->c.level);
-+}
-+
-+#else
-+
-+static inline void bch2_btree_iter_verify_level(struct btree_iter *iter, unsigned l) {}
-+static inline void bch2_btree_iter_verify(struct btree_iter *iter) {}
-+
-+#endif
-+
-+static void btree_node_iter_set_set_pos(struct btree_node_iter *iter,
-+					struct btree *b,
-+					struct bset_tree *t,
-+					struct bkey_packed *k)
-+{
-+	struct btree_node_iter_set *set;
-+
-+	btree_node_iter_for_each(iter, set)
-+		if (set->end == t->end_offset) {
-+			set->k = __btree_node_key_to_offset(b, k);
-+			bch2_btree_node_iter_sort(iter, b);
-+			return;
-+		}
-+
-+	bch2_btree_node_iter_push(iter, b, k, btree_bkey_last(b, t));
-+}
-+
-+static void __bch2_btree_iter_fix_key_modified(struct btree_iter *iter,
-+					       struct btree *b,
-+					       struct bkey_packed *where)
-+{
-+	struct btree_iter_level *l = &iter->l[b->c.level];
-+	struct bpos pos = btree_iter_search_key(iter);
-+
-+	if (where != bch2_btree_node_iter_peek_all(&l->iter, l->b))
-+		return;
-+
-+	if (bkey_iter_pos_cmp(l->b, where, &pos) < 0)
-+		bch2_btree_node_iter_advance(&l->iter, l->b);
-+
-+	btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
-+}
-+
-+void bch2_btree_iter_fix_key_modified(struct btree_iter *iter,
-+				      struct btree *b,
-+				      struct bkey_packed *where)
-+{
-+	struct btree_iter *linked;
-+
-+	trans_for_each_iter_with_node(iter->trans, b, linked) {
-+		__bch2_btree_iter_fix_key_modified(linked, b, where);
-+		bch2_btree_iter_verify_level(linked, b->c.level);
-+	}
-+}
-+
-+static void __bch2_btree_node_iter_fix(struct btree_iter *iter,
-+				      struct btree *b,
-+				      struct btree_node_iter *node_iter,
-+				      struct bset_tree *t,
-+				      struct bkey_packed *where,
-+				      unsigned clobber_u64s,
-+				      unsigned new_u64s)
-+{
-+	const struct bkey_packed *end = btree_bkey_last(b, t);
-+	struct btree_node_iter_set *set;
-+	unsigned offset = __btree_node_key_to_offset(b, where);
-+	int shift = new_u64s - clobber_u64s;
-+	unsigned old_end = t->end_offset - shift;
-+	unsigned orig_iter_pos = node_iter->data[0].k;
-+	bool iter_current_key_modified =
-+		orig_iter_pos >= offset &&
-+		orig_iter_pos <= offset + clobber_u64s;
-+	struct bpos iter_pos = btree_iter_search_key(iter);
-+
-+	btree_node_iter_for_each(node_iter, set)
-+		if (set->end == old_end)
-+			goto found;
-+
-+	/* didn't find the bset in the iterator - might have to readd it: */
-+	if (new_u64s &&
-+	    bkey_iter_pos_cmp(b, where, &iter_pos) >= 0) {
-+		bch2_btree_node_iter_push(node_iter, b, where, end);
-+		goto fixup_done;
-+	} else {
-+		/* Iterator is after key that changed */
-+		return;
-+	}
-+found:
-+	set->end = t->end_offset;
-+
-+	/* Iterator hasn't gotten to the key that changed yet: */
-+	if (set->k < offset)
-+		return;
-+
-+	if (new_u64s &&
-+	    bkey_iter_pos_cmp(b, where, &iter_pos) >= 0) {
-+		set->k = offset;
-+	} else if (set->k < offset + clobber_u64s) {
-+		set->k = offset + new_u64s;
-+		if (set->k == set->end)
-+			bch2_btree_node_iter_set_drop(node_iter, set);
-+	} else {
-+		/* Iterator is after key that changed */
-+		set->k = (int) set->k + shift;
-+		return;
-+	}
-+
-+	bch2_btree_node_iter_sort(node_iter, b);
-+fixup_done:
-+	if (node_iter->data[0].k != orig_iter_pos)
-+		iter_current_key_modified = true;
-+
-+	/*
-+	 * When a new key is added, and the node iterator now points to that
-+	 * key, the iterator might have skipped past deleted keys that should
-+	 * come after the key the iterator now points to. We have to rewind to
-+	 * before those deleted keys - otherwise
-+	 * bch2_btree_node_iter_prev_all() breaks:
-+	 */
-+	if (!bch2_btree_node_iter_end(node_iter) &&
-+	    iter_current_key_modified &&
-+	    (b->c.level ||
-+	     btree_node_type_is_extents(iter->btree_id))) {
-+		struct bset_tree *t;
-+		struct bkey_packed *k, *k2, *p;
-+
-+		k = bch2_btree_node_iter_peek_all(node_iter, b);
-+
-+		for_each_bset(b, t) {
-+			bool set_pos = false;
-+
-+			if (node_iter->data[0].end == t->end_offset)
-+				continue;
-+
-+			k2 = bch2_btree_node_iter_bset_pos(node_iter, b, t);
-+
-+			while ((p = bch2_bkey_prev_all(b, t, k2)) &&
-+			       bkey_iter_cmp(b, k, p) < 0) {
-+				k2 = p;
-+				set_pos = true;
-+			}
-+
-+			if (set_pos)
-+				btree_node_iter_set_set_pos(node_iter,
-+							    b, t, k2);
-+		}
-+	}
-+
-+	if (!b->c.level &&
-+	    node_iter == &iter->l[0].iter &&
-+	    iter_current_key_modified)
-+		btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
-+}
-+
-+void bch2_btree_node_iter_fix(struct btree_iter *iter,
-+			      struct btree *b,
-+			      struct btree_node_iter *node_iter,
-+			      struct bkey_packed *where,
-+			      unsigned clobber_u64s,
-+			      unsigned new_u64s)
-+{
-+	struct bset_tree *t = bch2_bkey_to_bset(b, where);
-+	struct btree_iter *linked;
-+
-+	if (node_iter != &iter->l[b->c.level].iter) {
-+		__bch2_btree_node_iter_fix(iter, b, node_iter, t,
-+					   where, clobber_u64s, new_u64s);
-+
-+		if (debug_check_iterators(iter->trans->c))
-+			bch2_btree_node_iter_verify(node_iter, b);
-+	}
-+
-+	trans_for_each_iter_with_node(iter->trans, b, linked) {
-+		__bch2_btree_node_iter_fix(linked, b,
-+					   &linked->l[b->c.level].iter, t,
-+					   where, clobber_u64s, new_u64s);
-+		bch2_btree_iter_verify_level(linked, b->c.level);
-+	}
-+}
-+
-+static inline struct bkey_s_c __btree_iter_unpack(struct btree_iter *iter,
-+						  struct btree_iter_level *l,
-+						  struct bkey *u,
-+						  struct bkey_packed *k)
-+{
-+	struct bkey_s_c ret;
-+
-+	if (unlikely(!k)) {
-+		/*
-+		 * signal to bch2_btree_iter_peek_slot() that we're currently at
-+		 * a hole
-+		 */
-+		u->type = KEY_TYPE_deleted;
-+		return bkey_s_c_null;
-+	}
-+
-+	ret = bkey_disassemble(l->b, k, u);
-+
-+	if (debug_check_bkeys(iter->trans->c))
-+		bch2_bkey_debugcheck(iter->trans->c, l->b, ret);
-+
-+	return ret;
-+}
-+
-+/* peek_all() doesn't skip deleted keys */
-+static inline struct bkey_s_c __btree_iter_peek_all(struct btree_iter *iter,
-+						    struct btree_iter_level *l,
-+						    struct bkey *u)
-+{
-+	return __btree_iter_unpack(iter, l, u,
-+			bch2_btree_node_iter_peek_all(&l->iter, l->b));
-+}
-+
-+static inline struct bkey_s_c __btree_iter_peek(struct btree_iter *iter,
-+						struct btree_iter_level *l)
-+{
-+	return __btree_iter_unpack(iter, l, &iter->k,
-+			bch2_btree_node_iter_peek(&l->iter, l->b));
-+}
-+
-+static inline struct bkey_s_c __btree_iter_prev(struct btree_iter *iter,
-+						struct btree_iter_level *l)
-+{
-+	return __btree_iter_unpack(iter, l, &iter->k,
-+			bch2_btree_node_iter_prev(&l->iter, l->b));
-+}
-+
-+static inline bool btree_iter_advance_to_pos(struct btree_iter *iter,
-+					     struct btree_iter_level *l,
-+					     int max_advance)
-+{
-+	struct bpos pos = btree_iter_search_key(iter);
-+	struct bkey_packed *k;
-+	int nr_advanced = 0;
-+
-+	while ((k = bch2_btree_node_iter_peek_all(&l->iter, l->b)) &&
-+	       bkey_iter_pos_cmp(l->b, k, &pos) < 0) {
-+		if (max_advance > 0 && nr_advanced >= max_advance)
-+			return false;
-+
-+		bch2_btree_node_iter_advance(&l->iter, l->b);
-+		nr_advanced++;
-+	}
-+
-+	return true;
-+}
-+
-+/*
-+ * Verify that iterator for parent node points to child node:
-+ */
-+static void btree_iter_verify_new_node(struct btree_iter *iter, struct btree *b)
-+{
-+	struct btree_iter_level *l;
-+	unsigned plevel;
-+	bool parent_locked;
-+	struct bkey_packed *k;
-+
-+	if (!IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
-+		return;
-+
-+	plevel = b->c.level + 1;
-+	if (!btree_iter_node(iter, plevel))
-+		return;
-+
-+	parent_locked = btree_node_locked(iter, plevel);
-+
-+	if (!bch2_btree_node_relock(iter, plevel))
-+		return;
-+
-+	l = &iter->l[plevel];
-+	k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
-+	if (!k ||
-+	    bkey_deleted(k) ||
-+	    bkey_cmp_left_packed(l->b, k, &b->key.k.p)) {
-+		char buf[100];
-+		struct bkey uk = bkey_unpack_key(b, k);
-+
-+		bch2_bkey_to_text(&PBUF(buf), &uk);
-+		panic("parent iter doesn't point to new node:\n%s\n%llu:%llu\n",
-+		      buf, b->key.k.p.inode, b->key.k.p.offset);
-+	}
-+
-+	if (!parent_locked)
-+		btree_node_unlock(iter, b->c.level + 1);
-+}
-+
-+static inline void __btree_iter_init(struct btree_iter *iter,
-+				     unsigned level)
-+{
-+	struct bpos pos = btree_iter_search_key(iter);
-+	struct btree_iter_level *l = &iter->l[level];
-+
-+	bch2_btree_node_iter_init(&l->iter, l->b, &pos);
-+
-+	btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
-+}
-+
-+static inline void btree_iter_node_set(struct btree_iter *iter,
-+				       struct btree *b)
-+{
-+	BUG_ON(btree_iter_type(iter) == BTREE_ITER_CACHED);
-+
-+	btree_iter_verify_new_node(iter, b);
-+
-+	EBUG_ON(!btree_iter_pos_in_node(iter, b));
-+	EBUG_ON(b->c.lock.state.seq & 1);
-+
-+	iter->l[b->c.level].lock_seq = b->c.lock.state.seq;
-+	iter->l[b->c.level].b = b;
-+	__btree_iter_init(iter, b->c.level);
-+}
-+
-+/*
-+ * A btree node is being replaced - update the iterator to point to the new
-+ * node:
-+ */
-+void bch2_btree_iter_node_replace(struct btree_iter *iter, struct btree *b)
-+{
-+	enum btree_node_locked_type t;
-+	struct btree_iter *linked;
-+
-+	trans_for_each_iter(iter->trans, linked)
-+		if (btree_iter_type(linked) != BTREE_ITER_CACHED &&
-+		    btree_iter_pos_in_node(linked, b)) {
-+			/*
-+			 * bch2_btree_iter_node_drop() has already been called -
-+			 * the old node we're replacing has already been
-+			 * unlocked and the pointer invalidated
-+			 */
-+			BUG_ON(btree_node_locked(linked, b->c.level));
-+
-+			t = btree_lock_want(linked, b->c.level);
-+			if (t != BTREE_NODE_UNLOCKED) {
-+				six_lock_increment(&b->c.lock, t);
-+				mark_btree_node_locked(linked, b->c.level, t);
-+			}
-+
-+			btree_iter_node_set(linked, b);
-+		}
-+}
-+
-+void bch2_btree_iter_node_drop(struct btree_iter *iter, struct btree *b)
-+{
-+	struct btree_iter *linked;
-+	unsigned level = b->c.level;
-+
-+	trans_for_each_iter(iter->trans, linked)
-+		if (linked->l[level].b == b) {
-+			__btree_node_unlock(linked, level);
-+			linked->l[level].b = BTREE_ITER_NO_NODE_DROP;
-+		}
-+}
-+
-+/*
-+ * A btree node has been modified in such a way as to invalidate iterators - fix
-+ * them:
-+ */
-+void bch2_btree_iter_reinit_node(struct btree_iter *iter, struct btree *b)
-+{
-+	struct btree_iter *linked;
-+
-+	trans_for_each_iter_with_node(iter->trans, b, linked)
-+		__btree_iter_init(linked, b->c.level);
-+}
-+
-+static int lock_root_check_fn(struct six_lock *lock, void *p)
-+{
-+	struct btree *b = container_of(lock, struct btree, c.lock);
-+	struct btree **rootp = p;
-+
-+	return b == *rootp ? 0 : -1;
-+}
-+
-+static inline int btree_iter_lock_root(struct btree_iter *iter,
-+				       unsigned depth_want)
-+{
-+	struct bch_fs *c = iter->trans->c;
-+	struct btree *b, **rootp = &c->btree_roots[iter->btree_id].b;
-+	enum six_lock_type lock_type;
-+	unsigned i;
-+
-+	EBUG_ON(iter->nodes_locked);
-+
-+	while (1) {
-+		b = READ_ONCE(*rootp);
-+		iter->level = READ_ONCE(b->c.level);
-+
-+		if (unlikely(iter->level < depth_want)) {
-+			/*
-+			 * the root is at a lower depth than the depth we want:
-+			 * got to the end of the btree, or we're walking nodes
-+			 * greater than some depth and there are no nodes >=
-+			 * that depth
-+			 */
-+			iter->level = depth_want;
-+			for (i = iter->level; i < BTREE_MAX_DEPTH; i++)
-+				iter->l[i].b = NULL;
-+			return 1;
-+		}
-+
-+		lock_type = __btree_lock_want(iter, iter->level);
-+		if (unlikely(!btree_node_lock(b, POS_MAX, iter->level,
-+					      iter, lock_type,
-+					      lock_root_check_fn, rootp)))
-+			return -EINTR;
-+
-+		if (likely(b == READ_ONCE(*rootp) &&
-+			   b->c.level == iter->level &&
-+			   !race_fault())) {
-+			for (i = 0; i < iter->level; i++)
-+				iter->l[i].b = BTREE_ITER_NO_NODE_LOCK_ROOT;
-+			iter->l[iter->level].b = b;
-+			for (i = iter->level + 1; i < BTREE_MAX_DEPTH; i++)
-+				iter->l[i].b = NULL;
-+
-+			mark_btree_node_locked(iter, iter->level, lock_type);
-+			btree_iter_node_set(iter, b);
-+			return 0;
-+		}
-+
-+		six_unlock_type(&b->c.lock, lock_type);
-+	}
-+}
-+
-+noinline
-+static void btree_iter_prefetch(struct btree_iter *iter)
-+{
-+	struct bch_fs *c = iter->trans->c;
-+	struct btree_iter_level *l = &iter->l[iter->level];
-+	struct btree_node_iter node_iter = l->iter;
-+	struct bkey_packed *k;
-+	BKEY_PADDED(k) tmp;
-+	unsigned nr = test_bit(BCH_FS_STARTED, &c->flags)
-+		? (iter->level > 1 ? 0 :  2)
-+		: (iter->level > 1 ? 1 : 16);
-+	bool was_locked = btree_node_locked(iter, iter->level);
-+
-+	while (nr) {
-+		if (!bch2_btree_node_relock(iter, iter->level))
-+			return;
-+
-+		bch2_btree_node_iter_advance(&node_iter, l->b);
-+		k = bch2_btree_node_iter_peek(&node_iter, l->b);
-+		if (!k)
-+			break;
-+
-+		bch2_bkey_unpack(l->b, &tmp.k, k);
-+		bch2_btree_node_prefetch(c, iter, &tmp.k, iter->level - 1);
-+	}
-+
-+	if (!was_locked)
-+		btree_node_unlock(iter, iter->level);
-+}
-+
-+static noinline void btree_node_mem_ptr_set(struct btree_iter *iter,
-+					    unsigned plevel, struct btree *b)
-+{
-+	struct btree_iter_level *l = &iter->l[plevel];
-+	bool locked = btree_node_locked(iter, plevel);
-+	struct bkey_packed *k;
-+	struct bch_btree_ptr_v2 *bp;
-+
-+	if (!bch2_btree_node_relock(iter, plevel))
-+		return;
-+
-+	k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
-+	BUG_ON(k->type != KEY_TYPE_btree_ptr_v2);
-+
-+	bp = (void *) bkeyp_val(&l->b->format, k);
-+	bp->mem_ptr = (unsigned long)b;
-+
-+	if (!locked)
-+		btree_node_unlock(iter, plevel);
-+}
-+
-+static __always_inline int btree_iter_down(struct btree_iter *iter)
-+{
-+	struct bch_fs *c = iter->trans->c;
-+	struct btree_iter_level *l = &iter->l[iter->level];
-+	struct btree *b;
-+	unsigned level = iter->level - 1;
-+	enum six_lock_type lock_type = __btree_lock_want(iter, level);
-+	BKEY_PADDED(k) tmp;
-+
-+	EBUG_ON(!btree_node_locked(iter, iter->level));
-+
-+	bch2_bkey_unpack(l->b, &tmp.k,
-+			 bch2_btree_node_iter_peek(&l->iter, l->b));
-+
-+	b = bch2_btree_node_get(c, iter, &tmp.k, level, lock_type);
-+	if (unlikely(IS_ERR(b)))
-+		return PTR_ERR(b);
-+
-+	mark_btree_node_locked(iter, level, lock_type);
-+	btree_iter_node_set(iter, b);
-+
-+	if (tmp.k.k.type == KEY_TYPE_btree_ptr_v2 &&
-+	    unlikely(b != btree_node_mem_ptr(&tmp.k)))
-+		btree_node_mem_ptr_set(iter, level + 1, b);
-+
-+	if (iter->flags & BTREE_ITER_PREFETCH)
-+		btree_iter_prefetch(iter);
-+
-+	iter->level = level;
-+
-+	return 0;
-+}
-+
-+static void btree_iter_up(struct btree_iter *iter)
-+{
-+	btree_node_unlock(iter, iter->level++);
-+}
-+
-+static int btree_iter_traverse_one(struct btree_iter *);
-+
-+static int __btree_iter_traverse_all(struct btree_trans *trans, int ret)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct btree_iter *iter;
-+	u8 sorted[BTREE_ITER_MAX];
-+	unsigned i, nr_sorted = 0;
-+
-+	if (trans->in_traverse_all)
-+		return -EINTR;
-+
-+	trans->in_traverse_all = true;
-+retry_all:
-+	nr_sorted = 0;
-+
-+	trans_for_each_iter(trans, iter)
-+		sorted[nr_sorted++] = iter->idx;
-+
-+#define btree_iter_cmp_by_idx(_l, _r)				\
-+		btree_iter_cmp(&trans->iters[_l], &trans->iters[_r])
-+
-+	bubble_sort(sorted, nr_sorted, btree_iter_cmp_by_idx);
-+#undef btree_iter_cmp_by_idx
-+	bch2_trans_unlock(trans);
-+
-+	if (unlikely(ret == -ENOMEM)) {
-+		struct closure cl;
-+
-+		closure_init_stack(&cl);
-+
-+		do {
-+			ret = bch2_btree_cache_cannibalize_lock(c, &cl);
-+			closure_sync(&cl);
-+		} while (ret);
-+	}
-+
-+	if (unlikely(ret == -EIO)) {
-+		trans->error = true;
-+		goto out;
-+	}
-+
-+	BUG_ON(ret && ret != -EINTR);
-+
-+	/* Now, redo traversals in correct order: */
-+	for (i = 0; i < nr_sorted; i++) {
-+		unsigned idx = sorted[i];
-+
-+		/*
-+		 * sucessfully traversing one iterator can cause another to be
-+		 * unlinked, in btree_key_cache_fill()
-+		 */
-+		if (!(trans->iters_linked & (1ULL << idx)))
-+			continue;
-+
-+		ret = btree_iter_traverse_one(&trans->iters[idx]);
-+		if (ret)
-+			goto retry_all;
-+	}
-+
-+	if (hweight64(trans->iters_live) > 1)
-+		ret = -EINTR;
-+	else
-+		trans_for_each_iter(trans, iter)
-+			if (iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT) {
-+				ret = -EINTR;
-+				break;
-+			}
-+out:
-+	bch2_btree_cache_cannibalize_unlock(c);
-+
-+	trans->in_traverse_all = false;
-+	return ret;
-+}
-+
-+int bch2_btree_iter_traverse_all(struct btree_trans *trans)
-+{
-+	return __btree_iter_traverse_all(trans, 0);
-+}
-+
-+static inline bool btree_iter_good_node(struct btree_iter *iter,
-+					unsigned l, int check_pos)
-+{
-+	if (!is_btree_node(iter, l) ||
-+	    !bch2_btree_node_relock(iter, l))
-+		return false;
-+
-+	if (check_pos <= 0 && btree_iter_pos_before_node(iter, iter->l[l].b))
-+		return false;
-+	if (check_pos >= 0 && btree_iter_pos_after_node(iter, iter->l[l].b))
-+		return false;
-+	return true;
-+}
-+
-+static inline unsigned btree_iter_up_until_good_node(struct btree_iter *iter,
-+						     int check_pos)
-+{
-+	unsigned l = iter->level;
-+
-+	while (btree_iter_node(iter, l) &&
-+	       !btree_iter_good_node(iter, l, check_pos)) {
-+		btree_node_unlock(iter, l);
-+		iter->l[l].b = BTREE_ITER_NO_NODE_UP;
-+		l++;
-+	}
-+
-+	return l;
-+}
-+
-+/*
-+ * This is the main state machine for walking down the btree - walks down to a
-+ * specified depth
-+ *
-+ * Returns 0 on success, -EIO on error (error reading in a btree node).
-+ *
-+ * On error, caller (peek_node()/peek_key()) must return NULL; the error is
-+ * stashed in the iterator and returned from bch2_trans_exit().
-+ */
-+static int btree_iter_traverse_one(struct btree_iter *iter)
-+{
-+	unsigned depth_want = iter->level;
-+
-+	/*
-+	 * if we need interior nodes locked, call btree_iter_relock() to make
-+	 * sure we walk back up enough that we lock them:
-+	 */
-+	if (iter->uptodate == BTREE_ITER_NEED_RELOCK ||
-+	    iter->locks_want > 1)
-+		bch2_btree_iter_relock(iter, false);
-+
-+	if (btree_iter_type(iter) == BTREE_ITER_CACHED)
-+		return bch2_btree_iter_traverse_cached(iter);
-+
-+	if (iter->uptodate < BTREE_ITER_NEED_RELOCK)
-+		return 0;
-+
-+	if (unlikely(iter->level >= BTREE_MAX_DEPTH))
-+		return 0;
-+
-+	/*
-+	 * XXX: correctly using BTREE_ITER_UPTODATE should make using check_pos
-+	 * here unnecessary
-+	 */
-+	iter->level = btree_iter_up_until_good_node(iter, 0);
-+
-+	/*
-+	 * If we've got a btree node locked (i.e. we aren't about to relock the
-+	 * root) - advance its node iterator if necessary:
-+	 *
-+	 * XXX correctly using BTREE_ITER_UPTODATE should make this unnecessary
-+	 */
-+	if (is_btree_node(iter, iter->level)) {
-+		BUG_ON(!btree_iter_pos_in_node(iter, iter->l[iter->level].b));
-+
-+		btree_iter_advance_to_pos(iter, &iter->l[iter->level], -1);
-+	}
-+
-+	/*
-+	 * Note: iter->nodes[iter->level] may be temporarily NULL here - that
-+	 * would indicate to other code that we got to the end of the btree,
-+	 * here it indicates that relocking the root failed - it's critical that
-+	 * btree_iter_lock_root() comes next and that it can't fail
-+	 */
-+	while (iter->level > depth_want) {
-+		int ret = btree_iter_node(iter, iter->level)
-+			? btree_iter_down(iter)
-+			: btree_iter_lock_root(iter, depth_want);
-+		if (unlikely(ret)) {
-+			if (ret == 1)
-+				return 0;
-+
-+			iter->level = depth_want;
-+
-+			if (ret == -EIO) {
-+				iter->flags |= BTREE_ITER_ERROR;
-+				iter->l[iter->level].b =
-+					BTREE_ITER_NO_NODE_ERROR;
-+			} else {
-+				iter->l[iter->level].b =
-+					BTREE_ITER_NO_NODE_DOWN;
-+			}
-+			return ret;
-+		}
-+	}
-+
-+	iter->uptodate = BTREE_ITER_NEED_PEEK;
-+
-+	bch2_btree_iter_verify(iter);
-+	return 0;
-+}
-+
-+int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
-+{
-+	struct btree_trans *trans = iter->trans;
-+	int ret;
-+
-+	ret =   bch2_trans_cond_resched(trans) ?:
-+		btree_iter_traverse_one(iter);
-+	if (unlikely(ret))
-+		ret = __btree_iter_traverse_all(trans, ret);
-+
-+	return ret;
-+}
-+
-+static inline void bch2_btree_iter_checks(struct btree_iter *iter)
-+{
-+	enum btree_iter_type type = btree_iter_type(iter);
-+
-+	EBUG_ON(iter->btree_id >= BTREE_ID_NR);
-+
-+	BUG_ON((type == BTREE_ITER_KEYS ||
-+		type == BTREE_ITER_CACHED) &&
-+	       (bkey_cmp(iter->pos, bkey_start_pos(&iter->k)) < 0 ||
-+		bkey_cmp(iter->pos, iter->k.p) > 0));
-+
-+	bch2_btree_iter_verify_locks(iter);
-+	bch2_btree_iter_verify_level(iter, iter->level);
-+}
-+
-+/* Iterate across nodes (leaf and interior nodes) */
-+
-+struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter)
-+{
-+	struct btree *b;
-+	int ret;
-+
-+	EBUG_ON(btree_iter_type(iter) != BTREE_ITER_NODES);
-+	bch2_btree_iter_checks(iter);
-+
-+	if (iter->uptodate == BTREE_ITER_UPTODATE)
-+		return iter->l[iter->level].b;
-+
-+	ret = bch2_btree_iter_traverse(iter);
-+	if (ret)
-+		return NULL;
-+
-+	b = btree_iter_node(iter, iter->level);
-+	if (!b)
-+		return NULL;
-+
-+	BUG_ON(bkey_cmp(b->key.k.p, iter->pos) < 0);
-+
-+	iter->pos = b->key.k.p;
-+	iter->uptodate = BTREE_ITER_UPTODATE;
-+
-+	bch2_btree_iter_verify(iter);
-+
-+	return b;
-+}
-+
-+struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
-+{
-+	struct btree *b;
-+	int ret;
-+
-+	EBUG_ON(btree_iter_type(iter) != BTREE_ITER_NODES);
-+	bch2_btree_iter_checks(iter);
-+
-+	/* already got to end? */
-+	if (!btree_iter_node(iter, iter->level))
-+		return NULL;
-+
-+	bch2_trans_cond_resched(iter->trans);
-+
-+	btree_iter_up(iter);
-+
-+	if (!bch2_btree_node_relock(iter, iter->level))
-+		btree_iter_set_dirty(iter, BTREE_ITER_NEED_RELOCK);
-+
-+	ret = bch2_btree_iter_traverse(iter);
-+	if (ret)
-+		return NULL;
-+
-+	/* got to end? */
-+	b = btree_iter_node(iter, iter->level);
-+	if (!b)
-+		return NULL;
-+
-+	if (bkey_cmp(iter->pos, b->key.k.p) < 0) {
-+		/*
-+		 * Haven't gotten to the end of the parent node: go back down to
-+		 * the next child node
-+		 */
-+
-+		/*
-+		 * We don't really want to be unlocking here except we can't
-+		 * directly tell btree_iter_traverse() "traverse to this level"
-+		 * except by setting iter->level, so we have to unlock so we
-+		 * don't screw up our lock invariants:
-+		 */
-+		if (btree_node_read_locked(iter, iter->level))
-+			btree_node_unlock(iter, iter->level);
-+
-+		iter->pos	= bkey_successor(iter->pos);
-+		iter->level	= iter->min_depth;
-+
-+		btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
-+		ret = bch2_btree_iter_traverse(iter);
-+		if (ret)
-+			return NULL;
-+
-+		b = iter->l[iter->level].b;
-+	}
-+
-+	iter->pos = b->key.k.p;
-+	iter->uptodate = BTREE_ITER_UPTODATE;
-+
-+	bch2_btree_iter_verify(iter);
-+
-+	return b;
-+}
-+
-+/* Iterate across keys (in leaf nodes only) */
-+
-+void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *iter, struct bpos new_pos)
-+{
-+	struct btree_iter_level *l = &iter->l[0];
-+
-+	EBUG_ON(iter->level != 0);
-+	EBUG_ON(bkey_cmp(new_pos, iter->pos) < 0);
-+	EBUG_ON(!btree_node_locked(iter, 0));
-+	EBUG_ON(bkey_cmp(new_pos, l->b->key.k.p) > 0);
-+
-+	bkey_init(&iter->k);
-+	iter->k.p = iter->pos = new_pos;
-+	btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
-+
-+	btree_iter_advance_to_pos(iter, l, -1);
-+
-+	/*
-+	 * XXX:
-+	 * keeping a node locked that's outside (even just outside) iter->pos
-+	 * breaks __bch2_btree_node_lock(). This seems to only affect
-+	 * bch2_btree_node_get_sibling so for now it's fixed there, but we
-+	 * should try to get rid of this corner case.
-+	 *
-+	 * (this behaviour is currently needed for BTREE_INSERT_NOUNLOCK)
-+	 */
-+
-+	if (bch2_btree_node_iter_end(&l->iter) &&
-+	    btree_iter_pos_after_node(iter, l->b))
-+		btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
-+}
-+
-+static void btree_iter_pos_changed(struct btree_iter *iter, int cmp)
-+{
-+	unsigned l = iter->level;
-+
-+	if (!cmp)
-+		goto out;
-+
-+	if (unlikely(btree_iter_type(iter) == BTREE_ITER_CACHED)) {
-+		btree_node_unlock(iter, 0);
-+		iter->l[0].b = BTREE_ITER_NO_NODE_UP;
-+		btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
-+		return;
-+	}
-+
-+	l = btree_iter_up_until_good_node(iter, cmp);
-+
-+	if (btree_iter_node(iter, l)) {
-+		/*
-+		 * We might have to skip over many keys, or just a few: try
-+		 * advancing the node iterator, and if we have to skip over too
-+		 * many keys just reinit it (or if we're rewinding, since that
-+		 * is expensive).
-+		 */
-+		if (cmp < 0 ||
-+		    !btree_iter_advance_to_pos(iter, &iter->l[l], 8))
-+			__btree_iter_init(iter, l);
-+
-+		/* Don't leave it locked if we're not supposed to: */
-+		if (btree_lock_want(iter, l) == BTREE_NODE_UNLOCKED)
-+			btree_node_unlock(iter, l);
-+	}
-+out:
-+	if (l != iter->level)
-+		btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
-+	else
-+		btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
-+}
-+
-+void __bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos,
-+			       bool strictly_greater)
-+{
-+	struct bpos old = btree_iter_search_key(iter);
-+	int cmp;
-+
-+	iter->flags &= ~BTREE_ITER_IS_EXTENTS;
-+	iter->flags |= strictly_greater ? BTREE_ITER_IS_EXTENTS : 0;
-+
-+	bkey_init(&iter->k);
-+	iter->k.p = iter->pos = new_pos;
-+
-+	cmp = bkey_cmp(btree_iter_search_key(iter), old);
-+
-+	btree_iter_pos_changed(iter, cmp);
-+}
-+
-+void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
-+{
-+	int cmp = bkey_cmp(new_pos, iter->pos);
-+
-+	bkey_init(&iter->k);
-+	iter->k.p = iter->pos = new_pos;
-+
-+	btree_iter_pos_changed(iter, cmp);
-+}
-+
-+static inline bool btree_iter_set_pos_to_next_leaf(struct btree_iter *iter)
-+{
-+	struct btree_iter_level *l = &iter->l[0];
-+	bool ret;
-+
-+	bkey_init(&iter->k);
-+	iter->k.p = iter->pos = l->b->key.k.p;
-+
-+	ret = bkey_cmp(iter->pos, POS_MAX) != 0;
-+	if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS))
-+		iter->k.p = iter->pos = bkey_successor(iter->pos);
-+
-+	btree_iter_pos_changed(iter, 1);
-+	return ret;
-+}
-+
-+static inline bool btree_iter_set_pos_to_prev_leaf(struct btree_iter *iter)
-+{
-+	struct btree_iter_level *l = &iter->l[0];
-+	bool ret;
-+
-+	bkey_init(&iter->k);
-+	iter->k.p = iter->pos = l->b->data->min_key;
-+	iter->uptodate	= BTREE_ITER_NEED_TRAVERSE;
-+
-+	ret = bkey_cmp(iter->pos, POS_MIN) != 0;
-+	if (ret) {
-+		iter->k.p = iter->pos = bkey_predecessor(iter->pos);
-+
-+		if (iter->flags & BTREE_ITER_IS_EXTENTS)
-+			iter->k.p = iter->pos = bkey_predecessor(iter->pos);
-+	}
-+
-+	btree_iter_pos_changed(iter, -1);
-+	return ret;
-+}
-+
-+/**
-+ * btree_iter_peek_uptodate - given an iterator that is uptodate, return the key
-+ * it currently points to
-+ */
-+static inline struct bkey_s_c btree_iter_peek_uptodate(struct btree_iter *iter)
-+{
-+	struct btree_iter_level *l = &iter->l[0];
-+	struct bkey_s_c ret = { .k = &iter->k };
-+
-+	if (!bkey_deleted(&iter->k)) {
-+		struct bkey_packed *_k =
-+			__bch2_btree_node_iter_peek_all(&l->iter, l->b);
-+
-+		ret.v = bkeyp_val(&l->b->format, _k);
-+
-+		if (debug_check_iterators(iter->trans->c)) {
-+			struct bkey k = bkey_unpack_key(l->b, _k);
-+
-+			BUG_ON(memcmp(&k, &iter->k, sizeof(k)));
-+		}
-+
-+		if (debug_check_bkeys(iter->trans->c))
-+			bch2_bkey_debugcheck(iter->trans->c, l->b, ret);
-+	}
-+
-+	return ret;
-+}
-+
-+/**
-+ * bch2_btree_iter_peek: returns first key greater than or equal to iterator's
-+ * current position
-+ */
-+struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
-+{
-+	struct btree_iter_level *l = &iter->l[0];
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	EBUG_ON(btree_iter_type(iter) != BTREE_ITER_KEYS);
-+	bch2_btree_iter_checks(iter);
-+
-+	if (iter->uptodate == BTREE_ITER_UPTODATE &&
-+	    !bkey_deleted(&iter->k))
-+		return btree_iter_peek_uptodate(iter);
-+
-+	while (1) {
-+		ret = bch2_btree_iter_traverse(iter);
-+		if (unlikely(ret))
-+			return bkey_s_c_err(ret);
-+
-+		k = __btree_iter_peek(iter, l);
-+		if (likely(k.k))
-+			break;
-+
-+		if (!btree_iter_set_pos_to_next_leaf(iter))
-+			return bkey_s_c_null;
-+	}
-+
-+	/*
-+	 * iter->pos should always be equal to the key we just
-+	 * returned - except extents can straddle iter->pos:
-+	 */
-+	if (!(iter->flags & BTREE_ITER_IS_EXTENTS) ||
-+	    bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0)
-+		iter->pos = bkey_start_pos(k.k);
-+
-+	iter->uptodate = BTREE_ITER_UPTODATE;
-+
-+	bch2_btree_iter_verify_level(iter, 0);
-+	return k;
-+}
-+
-+/**
-+ * bch2_btree_iter_next: returns first key greater than iterator's current
-+ * position
-+ */
-+struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
-+{
-+	if (unlikely(!bkey_cmp(iter->k.p, POS_MAX)))
-+		return bkey_s_c_null;
-+
-+	bch2_btree_iter_set_pos(iter,
-+		(iter->flags & BTREE_ITER_IS_EXTENTS)
-+		? iter->k.p
-+		: bkey_successor(iter->k.p));
-+
-+	return bch2_btree_iter_peek(iter);
-+}
-+
-+static struct bkey_s_c __btree_trans_updates_peek(struct btree_iter *iter)
-+{
-+	struct bpos pos = btree_iter_search_key(iter);
-+	struct btree_trans *trans = iter->trans;
-+	struct btree_insert_entry *i;
-+
-+	trans_for_each_update2(trans, i)
-+		if ((cmp_int(iter->btree_id,	i->iter->btree_id) ?:
-+		     bkey_cmp(pos,		i->k->k.p)) <= 0)
-+			break;
-+
-+	return i < trans->updates2 + trans->nr_updates2 &&
-+		iter->btree_id == i->iter->btree_id
-+		? bkey_i_to_s_c(i->k)
-+		: bkey_s_c_null;
-+}
-+
-+static struct bkey_s_c __bch2_btree_iter_peek_with_updates(struct btree_iter *iter)
-+{
-+	struct btree_iter_level *l = &iter->l[0];
-+	struct bkey_s_c k = __btree_iter_peek(iter, l);
-+	struct bkey_s_c u = __btree_trans_updates_peek(iter);
-+
-+	if (k.k && (!u.k || bkey_cmp(k.k->p, u.k->p) < 0))
-+		return k;
-+	if (u.k && bkey_cmp(u.k->p, l->b->key.k.p) <= 0) {
-+		iter->k = *u.k;
-+		return u;
-+	}
-+	return bkey_s_c_null;
-+}
-+
-+struct bkey_s_c bch2_btree_iter_peek_with_updates(struct btree_iter *iter)
-+{
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	EBUG_ON(btree_iter_type(iter) != BTREE_ITER_KEYS);
-+	bch2_btree_iter_checks(iter);
-+
-+	while (1) {
-+		ret = bch2_btree_iter_traverse(iter);
-+		if (unlikely(ret))
-+			return bkey_s_c_err(ret);
-+
-+		k = __bch2_btree_iter_peek_with_updates(iter);
-+
-+		if (k.k && bkey_deleted(k.k)) {
-+			bch2_btree_iter_set_pos(iter,
-+				(iter->flags & BTREE_ITER_IS_EXTENTS)
-+				? iter->k.p
-+				: bkey_successor(iter->k.p));
-+			continue;
-+		}
-+
-+		if (likely(k.k))
-+			break;
-+
-+		if (!btree_iter_set_pos_to_next_leaf(iter))
-+			return bkey_s_c_null;
-+	}
-+
-+	/*
-+	 * iter->pos should always be equal to the key we just
-+	 * returned - except extents can straddle iter->pos:
-+	 */
-+	if (!(iter->flags & BTREE_ITER_IS_EXTENTS) ||
-+	    bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0)
-+		iter->pos = bkey_start_pos(k.k);
-+
-+	iter->uptodate = BTREE_ITER_UPTODATE;
-+	return k;
-+}
-+
-+struct bkey_s_c bch2_btree_iter_next_with_updates(struct btree_iter *iter)
-+{
-+	if (unlikely(!bkey_cmp(iter->k.p, POS_MAX)))
-+		return bkey_s_c_null;
-+
-+	bch2_btree_iter_set_pos(iter,
-+		(iter->flags & BTREE_ITER_IS_EXTENTS)
-+		? iter->k.p
-+		: bkey_successor(iter->k.p));
-+
-+	return bch2_btree_iter_peek_with_updates(iter);
-+}
-+
-+/**
-+ * bch2_btree_iter_peek_prev: returns first key less than or equal to
-+ * iterator's current position
-+ */
-+struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
-+{
-+	struct bpos pos = iter->pos;
-+	struct btree_iter_level *l = &iter->l[0];
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	EBUG_ON(btree_iter_type(iter) != BTREE_ITER_KEYS);
-+	bch2_btree_iter_checks(iter);
-+
-+	if (iter->uptodate == BTREE_ITER_UPTODATE &&
-+	    !bkey_deleted(&iter->k))
-+		return btree_iter_peek_uptodate(iter);
-+
-+	while (1) {
-+		ret = bch2_btree_iter_traverse(iter);
-+		if (unlikely(ret))
-+			return bkey_s_c_err(ret);
-+
-+		k = __btree_iter_peek(iter, l);
-+		if (!k.k || bkey_cmp(bkey_start_pos(k.k), pos) > 0)
-+			k = __btree_iter_prev(iter, l);
-+
-+		if (likely(k.k))
-+			break;
-+
-+		if (!btree_iter_set_pos_to_prev_leaf(iter))
-+			return bkey_s_c_null;
-+	}
-+
-+	EBUG_ON(bkey_cmp(bkey_start_pos(k.k), pos) > 0);
-+	iter->pos	= bkey_start_pos(k.k);
-+	iter->uptodate	= BTREE_ITER_UPTODATE;
-+	return k;
-+}
-+
-+/**
-+ * bch2_btree_iter_prev: returns first key less than iterator's current
-+ * position
-+ */
-+struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter)
-+{
-+	struct bpos pos = bkey_start_pos(&iter->k);
-+
-+	EBUG_ON(btree_iter_type(iter) != BTREE_ITER_KEYS);
-+	bch2_btree_iter_checks(iter);
-+
-+	if (unlikely(!bkey_cmp(pos, POS_MIN)))
-+		return bkey_s_c_null;
-+
-+	bch2_btree_iter_set_pos(iter, bkey_predecessor(pos));
-+
-+	return bch2_btree_iter_peek_prev(iter);
-+}
-+
-+static inline struct bkey_s_c
-+__bch2_btree_iter_peek_slot_extents(struct btree_iter *iter)
-+{
-+	struct btree_iter_level *l = &iter->l[0];
-+	struct btree_node_iter node_iter;
-+	struct bkey_s_c k;
-+	struct bkey n;
-+	int ret;
-+
-+	/* keys & holes can't span inode numbers: */
-+	if (iter->pos.offset == KEY_OFFSET_MAX) {
-+		if (iter->pos.inode == KEY_INODE_MAX)
-+			return bkey_s_c_null;
-+
-+		bch2_btree_iter_set_pos(iter, bkey_successor(iter->pos));
-+
-+		ret = bch2_btree_iter_traverse(iter);
-+		if (unlikely(ret))
-+			return bkey_s_c_err(ret);
-+	}
-+
-+	/*
-+	 * iterator is now at the correct position for inserting at iter->pos,
-+	 * but we need to keep iterating until we find the first non whiteout so
-+	 * we know how big a hole we have, if any:
-+	 */
-+
-+	node_iter = l->iter;
-+	k = __btree_iter_unpack(iter, l, &iter->k,
-+		bch2_btree_node_iter_peek(&node_iter, l->b));
-+
-+	if (k.k && bkey_cmp(bkey_start_pos(k.k), iter->pos) <= 0) {
-+		/*
-+		 * We're not setting iter->uptodate because the node iterator
-+		 * doesn't necessarily point at the key we're returning:
-+		 */
-+
-+		EBUG_ON(bkey_cmp(k.k->p, iter->pos) <= 0);
-+		bch2_btree_iter_verify_level(iter, 0);
-+		return k;
-+	}
-+
-+	/* hole */
-+
-+	if (!k.k)
-+		k.k = &l->b->key.k;
-+
-+	bkey_init(&n);
-+	n.p = iter->pos;
-+	bch2_key_resize(&n,
-+			min_t(u64, KEY_SIZE_MAX,
-+			      (k.k->p.inode == n.p.inode
-+			       ? bkey_start_offset(k.k)
-+			       : KEY_OFFSET_MAX) -
-+			      n.p.offset));
-+
-+	EBUG_ON(!n.size);
-+
-+	iter->k	= n;
-+	iter->uptodate = BTREE_ITER_UPTODATE;
-+
-+	bch2_btree_iter_verify_level(iter, 0);
-+	return (struct bkey_s_c) { &iter->k, NULL };
-+}
-+
-+struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
-+{
-+	struct btree_iter_level *l = &iter->l[0];
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	EBUG_ON(btree_iter_type(iter) != BTREE_ITER_KEYS);
-+	bch2_btree_iter_checks(iter);
-+
-+	if (iter->uptodate == BTREE_ITER_UPTODATE)
-+		return btree_iter_peek_uptodate(iter);
-+
-+	ret = bch2_btree_iter_traverse(iter);
-+	if (unlikely(ret))
-+		return bkey_s_c_err(ret);
-+
-+	if (iter->flags & BTREE_ITER_IS_EXTENTS)
-+		return __bch2_btree_iter_peek_slot_extents(iter);
-+
-+	k = __btree_iter_peek_all(iter, l, &iter->k);
-+
-+	EBUG_ON(k.k && bkey_deleted(k.k) && bkey_cmp(k.k->p, iter->pos) == 0);
-+
-+	if (!k.k || bkey_cmp(iter->pos, k.k->p)) {
-+		/* hole */
-+		bkey_init(&iter->k);
-+		iter->k.p = iter->pos;
-+		k = (struct bkey_s_c) { &iter->k, NULL };
-+	}
-+
-+	iter->uptodate = BTREE_ITER_UPTODATE;
-+	bch2_btree_iter_verify_level(iter, 0);
-+	return k;
-+}
-+
-+struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *iter)
-+{
-+	if (unlikely(!bkey_cmp(iter->k.p, POS_MAX)))
-+		return bkey_s_c_null;
-+
-+	bch2_btree_iter_set_pos(iter,
-+		(iter->flags & BTREE_ITER_IS_EXTENTS)
-+		? iter->k.p
-+		: bkey_successor(iter->k.p));
-+
-+	return bch2_btree_iter_peek_slot(iter);
-+}
-+
-+struct bkey_s_c bch2_btree_iter_peek_cached(struct btree_iter *iter)
-+{
-+	struct bkey_cached *ck;
-+	int ret;
-+
-+	EBUG_ON(btree_iter_type(iter) != BTREE_ITER_CACHED);
-+	bch2_btree_iter_checks(iter);
-+
-+	ret = bch2_btree_iter_traverse(iter);
-+	if (unlikely(ret))
-+		return bkey_s_c_err(ret);
-+
-+	ck = (void *) iter->l[0].b;
-+
-+	EBUG_ON(iter->btree_id != ck->key.btree_id ||
-+		bkey_cmp(iter->pos, ck->key.pos));
-+	BUG_ON(!ck->valid);
-+
-+	return bkey_i_to_s_c(ck->k);
-+}
-+
-+static inline void bch2_btree_iter_init(struct btree_trans *trans,
-+			struct btree_iter *iter, enum btree_id btree_id,
-+			struct bpos pos, unsigned flags)
-+{
-+	struct bch_fs *c = trans->c;
-+	unsigned i;
-+
-+	if (btree_node_type_is_extents(btree_id) &&
-+	    !(flags & BTREE_ITER_NODES))
-+		flags |= BTREE_ITER_IS_EXTENTS;
-+
-+	iter->trans			= trans;
-+	iter->pos			= pos;
-+	bkey_init(&iter->k);
-+	iter->k.p			= pos;
-+	iter->flags			= flags;
-+	iter->uptodate			= BTREE_ITER_NEED_TRAVERSE;
-+	iter->btree_id			= btree_id;
-+	iter->level			= 0;
-+	iter->min_depth			= 0;
-+	iter->locks_want		= flags & BTREE_ITER_INTENT ? 1 : 0;
-+	iter->nodes_locked		= 0;
-+	iter->nodes_intent_locked	= 0;
-+	for (i = 0; i < ARRAY_SIZE(iter->l); i++)
-+		iter->l[i].b		= BTREE_ITER_NO_NODE_INIT;
-+
-+	prefetch(c->btree_roots[btree_id].b);
-+}
-+
-+/* new transactional stuff: */
-+
-+static inline void __bch2_trans_iter_free(struct btree_trans *trans,
-+					  unsigned idx)
-+{
-+	__bch2_btree_iter_unlock(&trans->iters[idx]);
-+	trans->iters_linked		&= ~(1ULL << idx);
-+	trans->iters_live		&= ~(1ULL << idx);
-+	trans->iters_touched		&= ~(1ULL << idx);
-+}
-+
-+int bch2_trans_iter_put(struct btree_trans *trans,
-+			struct btree_iter *iter)
-+{
-+	int ret;
-+
-+	if (IS_ERR_OR_NULL(iter))
-+		return 0;
-+
-+	BUG_ON(trans->iters + iter->idx != iter);
-+
-+	ret = btree_iter_err(iter);
-+
-+	if (!(trans->iters_touched & (1ULL << iter->idx)) &&
-+	    !(iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT))
-+		__bch2_trans_iter_free(trans, iter->idx);
-+
-+	trans->iters_live	&= ~(1ULL << iter->idx);
-+	return ret;
-+}
-+
-+int bch2_trans_iter_free(struct btree_trans *trans,
-+			 struct btree_iter *iter)
-+{
-+	if (IS_ERR_OR_NULL(iter))
-+		return 0;
-+
-+	trans->iters_touched &= ~(1ULL << iter->idx);
-+
-+	return bch2_trans_iter_put(trans, iter);
-+}
-+
-+static int bch2_trans_realloc_iters(struct btree_trans *trans,
-+				    unsigned new_size)
-+{
-+	void *p, *new_iters, *new_updates, *new_updates2;
-+	size_t iters_bytes;
-+	size_t updates_bytes;
-+
-+	new_size = roundup_pow_of_two(new_size);
-+
-+	BUG_ON(new_size > BTREE_ITER_MAX);
-+
-+	if (new_size <= trans->size)
-+		return 0;
-+
-+	BUG_ON(trans->used_mempool);
-+
-+	bch2_trans_unlock(trans);
-+
-+	iters_bytes	= sizeof(struct btree_iter) * new_size;
-+	updates_bytes	= sizeof(struct btree_insert_entry) * new_size;
-+
-+	p = kmalloc(iters_bytes +
-+		    updates_bytes +
-+		    updates_bytes, GFP_NOFS);
-+	if (p)
-+		goto success;
-+
-+	p = mempool_alloc(&trans->c->btree_iters_pool, GFP_NOFS);
-+	new_size = BTREE_ITER_MAX;
-+
-+	trans->used_mempool = true;
-+success:
-+	new_iters	= p; p += iters_bytes;
-+	new_updates	= p; p += updates_bytes;
-+	new_updates2	= p; p += updates_bytes;
-+
-+	memcpy(new_iters, trans->iters,
-+	       sizeof(struct btree_iter) * trans->nr_iters);
-+	memcpy(new_updates, trans->updates,
-+	       sizeof(struct btree_insert_entry) * trans->nr_updates);
-+	memcpy(new_updates2, trans->updates2,
-+	       sizeof(struct btree_insert_entry) * trans->nr_updates2);
-+
-+	if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
-+		memset(trans->iters, POISON_FREE,
-+		       sizeof(struct btree_iter) * trans->nr_iters +
-+		       sizeof(struct btree_insert_entry) * trans->nr_iters);
-+
-+	if (trans->iters != trans->iters_onstack)
-+		kfree(trans->iters);
-+
-+	trans->iters		= new_iters;
-+	trans->updates		= new_updates;
-+	trans->updates2		= new_updates2;
-+	trans->size		= new_size;
-+
-+	if (trans->iters_live) {
-+		trace_trans_restart_iters_realloced(trans->ip, trans->size);
-+		return -EINTR;
-+	}
-+
-+	return 0;
-+}
-+
-+static struct btree_iter *btree_trans_iter_alloc(struct btree_trans *trans)
-+{
-+	unsigned idx = __ffs64(~trans->iters_linked);
-+
-+	if (idx < trans->nr_iters)
-+		goto got_slot;
-+
-+	if (trans->nr_iters == trans->size) {
-+		int ret;
-+
-+		if (trans->nr_iters >= BTREE_ITER_MAX) {
-+			struct btree_iter *iter;
-+
-+			trans_for_each_iter(trans, iter) {
-+				pr_err("iter: btree %s pos %llu:%llu%s%s%s %ps",
-+				       bch2_btree_ids[iter->btree_id],
-+				       iter->pos.inode,
-+				       iter->pos.offset,
-+				       (trans->iters_live & (1ULL << iter->idx)) ? " live" : "",
-+				       (trans->iters_touched & (1ULL << iter->idx)) ? " touched" : "",
-+				       iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT ? " keep" : "",
-+				       (void *) iter->ip_allocated);
-+			}
-+
-+			panic("trans iter oveflow\n");
-+		}
-+
-+		ret = bch2_trans_realloc_iters(trans, trans->size * 2);
-+		if (ret)
-+			return ERR_PTR(ret);
-+	}
-+
-+	idx = trans->nr_iters++;
-+	BUG_ON(trans->nr_iters > trans->size);
-+
-+	trans->iters[idx].idx = idx;
-+got_slot:
-+	BUG_ON(trans->iters_linked & (1ULL << idx));
-+	trans->iters_linked |= 1ULL << idx;
-+	trans->iters[idx].flags = 0;
-+	return &trans->iters[idx];
-+}
-+
-+static inline void btree_iter_copy(struct btree_iter *dst,
-+				   struct btree_iter *src)
-+{
-+	unsigned i, idx = dst->idx;
-+
-+	*dst = *src;
-+	dst->idx = idx;
-+	dst->flags &= ~BTREE_ITER_KEEP_UNTIL_COMMIT;
-+
-+	for (i = 0; i < BTREE_MAX_DEPTH; i++)
-+		if (btree_node_locked(dst, i))
-+			six_lock_increment(&dst->l[i].b->c.lock,
-+					   __btree_lock_want(dst, i));
-+
-+	dst->flags &= ~BTREE_ITER_KEEP_UNTIL_COMMIT;
-+	dst->flags &= ~BTREE_ITER_SET_POS_AFTER_COMMIT;
-+}
-+
-+static inline struct bpos bpos_diff(struct bpos l, struct bpos r)
-+{
-+	if (bkey_cmp(l, r) > 0)
-+		swap(l, r);
-+
-+	return POS(r.inode - l.inode, r.offset - l.offset);
-+}
-+
-+static struct btree_iter *__btree_trans_get_iter(struct btree_trans *trans,
-+						 unsigned btree_id, struct bpos pos,
-+						 unsigned flags)
-+{
-+	struct btree_iter *iter, *best = NULL;
-+
-+	BUG_ON(trans->nr_iters > BTREE_ITER_MAX);
-+
-+	trans_for_each_iter(trans, iter) {
-+		if (btree_iter_type(iter) != (flags & BTREE_ITER_TYPE))
-+			continue;
-+
-+		if (iter->btree_id != btree_id)
-+			continue;
-+
-+		if (best &&
-+		    bkey_cmp(bpos_diff(best->pos, pos),
-+			     bpos_diff(iter->pos, pos)) < 0)
-+			continue;
-+
-+		best = iter;
-+	}
-+
-+	if (!best) {
-+		iter = btree_trans_iter_alloc(trans);
-+		if (IS_ERR(iter))
-+			return iter;
-+
-+		bch2_btree_iter_init(trans, iter, btree_id, pos, flags);
-+	} else if ((trans->iters_live & (1ULL << best->idx)) ||
-+		   (best->flags & BTREE_ITER_KEEP_UNTIL_COMMIT)) {
-+		iter = btree_trans_iter_alloc(trans);
-+		if (IS_ERR(iter))
-+			return iter;
-+
-+		btree_iter_copy(iter, best);
-+	} else {
-+		iter = best;
-+	}
-+
-+	iter->flags &= ~BTREE_ITER_KEEP_UNTIL_COMMIT;
-+	iter->flags &= ~BTREE_ITER_USER_FLAGS;
-+	iter->flags |= flags & BTREE_ITER_USER_FLAGS;
-+
-+	if (iter->flags & BTREE_ITER_INTENT)
-+		bch2_btree_iter_upgrade(iter, 1);
-+	else
-+		bch2_btree_iter_downgrade(iter);
-+
-+	BUG_ON(iter->btree_id != btree_id);
-+	BUG_ON((iter->flags ^ flags) & BTREE_ITER_TYPE);
-+	BUG_ON(iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT);
-+	BUG_ON(iter->flags & BTREE_ITER_SET_POS_AFTER_COMMIT);
-+	BUG_ON(trans->iters_live & (1ULL << iter->idx));
-+
-+	trans->iters_live	|= 1ULL << iter->idx;
-+	trans->iters_touched	|= 1ULL << iter->idx;
-+
-+	return iter;
-+}
-+
-+struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
-+					 enum btree_id btree_id,
-+					 struct bpos pos, unsigned flags)
-+{
-+	struct btree_iter *iter =
-+		__btree_trans_get_iter(trans, btree_id, pos, flags);
-+
-+	if (!IS_ERR(iter))
-+		__bch2_btree_iter_set_pos(iter, pos,
-+			btree_node_type_is_extents(btree_id));
-+	return iter;
-+}
-+
-+struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *trans,
-+					    enum btree_id btree_id,
-+					    struct bpos pos,
-+					    unsigned locks_want,
-+					    unsigned depth,
-+					    unsigned flags)
-+{
-+	struct btree_iter *iter =
-+		__btree_trans_get_iter(trans, btree_id, pos,
-+				       flags|BTREE_ITER_NODES);
-+	unsigned i;
-+
-+	BUG_ON(IS_ERR(iter));
-+	BUG_ON(bkey_cmp(iter->pos, pos));
-+
-+	iter->locks_want = locks_want;
-+	iter->level	= depth;
-+	iter->min_depth	= depth;
-+
-+	for (i = 0; i < ARRAY_SIZE(iter->l); i++)
-+		iter->l[i].b		= NULL;
-+	iter->l[iter->level].b		= BTREE_ITER_NO_NODE_INIT;
-+
-+	return iter;
-+}
-+
-+struct btree_iter *__bch2_trans_copy_iter(struct btree_trans *trans,
-+					struct btree_iter *src)
-+{
-+	struct btree_iter *iter;
-+
-+	iter = btree_trans_iter_alloc(trans);
-+	if (IS_ERR(iter))
-+		return iter;
-+
-+	btree_iter_copy(iter, src);
-+
-+	trans->iters_live |= 1ULL << iter->idx;
-+	/*
-+	 * We don't need to preserve this iter since it's cheap to copy it
-+	 * again - this will cause trans_iter_put() to free it right away:
-+	 */
-+	trans->iters_touched &= ~(1ULL << iter->idx);
-+
-+	return iter;
-+}
-+
-+static int bch2_trans_preload_mem(struct btree_trans *trans, size_t size)
-+{
-+	if (size > trans->mem_bytes) {
-+		size_t old_bytes = trans->mem_bytes;
-+		size_t new_bytes = roundup_pow_of_two(size);
-+		void *new_mem = krealloc(trans->mem, new_bytes, GFP_NOFS);
-+
-+		if (!new_mem)
-+			return -ENOMEM;
-+
-+		trans->mem = new_mem;
-+		trans->mem_bytes = new_bytes;
-+
-+		if (old_bytes) {
-+			trace_trans_restart_mem_realloced(trans->ip, new_bytes);
-+			return -EINTR;
-+		}
-+	}
-+
-+	return 0;
-+}
-+
-+void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
-+{
-+	void *p;
-+	int ret;
-+
-+	ret = bch2_trans_preload_mem(trans, trans->mem_top + size);
-+	if (ret)
-+		return ERR_PTR(ret);
-+
-+	p = trans->mem + trans->mem_top;
-+	trans->mem_top += size;
-+	return p;
-+}
-+
-+inline void bch2_trans_unlink_iters(struct btree_trans *trans)
-+{
-+	u64 iters = trans->iters_linked &
-+		~trans->iters_touched &
-+		~trans->iters_live;
-+
-+	while (iters) {
-+		unsigned idx = __ffs64(iters);
-+
-+		iters &= ~(1ULL << idx);
-+		__bch2_trans_iter_free(trans, idx);
-+	}
-+}
-+
-+void bch2_trans_reset(struct btree_trans *trans, unsigned flags)
-+{
-+	struct btree_iter *iter;
-+
-+	trans_for_each_iter(trans, iter)
-+		iter->flags &= ~(BTREE_ITER_KEEP_UNTIL_COMMIT|
-+				 BTREE_ITER_SET_POS_AFTER_COMMIT);
-+
-+	bch2_trans_unlink_iters(trans);
-+
-+	trans->iters_touched &= trans->iters_live;
-+
-+	trans->need_reset		= 0;
-+	trans->nr_updates		= 0;
-+	trans->nr_updates2		= 0;
-+	trans->mem_top			= 0;
-+
-+	trans->extra_journal_entries	= NULL;
-+	trans->extra_journal_entry_u64s	= 0;
-+
-+	if (trans->fs_usage_deltas) {
-+		trans->fs_usage_deltas->used = 0;
-+		memset(&trans->fs_usage_deltas->memset_start, 0,
-+		       (void *) &trans->fs_usage_deltas->memset_end -
-+		       (void *) &trans->fs_usage_deltas->memset_start);
-+	}
-+
-+	if (!(flags & TRANS_RESET_NOTRAVERSE))
-+		bch2_btree_iter_traverse_all(trans);
-+}
-+
-+void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
-+		     unsigned expected_nr_iters,
-+		     size_t expected_mem_bytes)
-+{
-+	memset(trans, 0, offsetof(struct btree_trans, iters_onstack));
-+
-+	/*
-+	 * reallocating iterators currently completely breaks
-+	 * bch2_trans_iter_put():
-+	 */
-+	expected_nr_iters = BTREE_ITER_MAX;
-+
-+	trans->c		= c;
-+	trans->ip		= _RET_IP_;
-+	trans->size		= ARRAY_SIZE(trans->iters_onstack);
-+	trans->iters		= trans->iters_onstack;
-+	trans->updates		= trans->updates_onstack;
-+	trans->updates2		= trans->updates2_onstack;
-+	trans->fs_usage_deltas	= NULL;
-+
-+	if (expected_nr_iters > trans->size)
-+		bch2_trans_realloc_iters(trans, expected_nr_iters);
-+
-+	if (expected_mem_bytes)
-+		bch2_trans_preload_mem(trans, expected_mem_bytes);
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	trans->pid = current->pid;
-+	mutex_lock(&c->btree_trans_lock);
-+	list_add(&trans->list, &c->btree_trans_list);
-+	mutex_unlock(&c->btree_trans_lock);
-+#endif
-+}
-+
-+int bch2_trans_exit(struct btree_trans *trans)
-+{
-+	bch2_trans_unlock(trans);
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	mutex_lock(&trans->c->btree_trans_lock);
-+	list_del(&trans->list);
-+	mutex_unlock(&trans->c->btree_trans_lock);
-+#endif
-+
-+	bch2_journal_preres_put(&trans->c->journal, &trans->journal_preres);
-+
-+	kfree(trans->fs_usage_deltas);
-+	kfree(trans->mem);
-+	if (trans->used_mempool)
-+		mempool_free(trans->iters, &trans->c->btree_iters_pool);
-+	else if (trans->iters != trans->iters_onstack)
-+		kfree(trans->iters);
-+	trans->mem	= (void *) 0x1;
-+	trans->iters	= (void *) 0x1;
-+
-+	return trans->error ? -EIO : 0;
-+}
-+
-+static void bch2_btree_iter_node_to_text(struct printbuf *out,
-+				 struct btree_bkey_cached_common *_b,
-+				 enum btree_iter_type type)
-+{
-+	pr_buf(out, "    %px l=%u %s:",
-+	       _b, _b->level, bch2_btree_ids[_b->btree_id]);
-+	bch2_bpos_to_text(out, btree_node_pos(_b, type));
-+}
-+
-+void bch2_btree_trans_to_text(struct printbuf *out, struct bch_fs *c)
-+{
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	struct btree_trans *trans;
-+	struct btree_iter *iter;
-+	struct btree *b;
-+	unsigned l;
-+
-+	mutex_lock(&c->btree_trans_lock);
-+	list_for_each_entry(trans, &c->btree_trans_list, list) {
-+		pr_buf(out, "%i %px %ps\n", trans->pid, trans, (void *) trans->ip);
-+
-+		trans_for_each_iter(trans, iter) {
-+			if (!iter->nodes_locked)
-+				continue;
-+
-+			pr_buf(out, "  iter %u %s:",
-+			       iter->idx,
-+			       bch2_btree_ids[iter->btree_id]);
-+			bch2_bpos_to_text(out, iter->pos);
-+			pr_buf(out, "\n");
-+
-+			for (l = 0; l < BTREE_MAX_DEPTH; l++) {
-+				if (btree_node_locked(iter, l)) {
-+					pr_buf(out, "    %s l=%u ",
-+					       btree_node_intent_locked(iter, l) ? "i" : "r", l);
-+					bch2_btree_iter_node_to_text(out,
-+							(void *) iter->l[l].b,
-+							btree_iter_type(iter));
-+					pr_buf(out, "\n");
-+				}
-+			}
-+		}
-+
-+		b = READ_ONCE(trans->locking);
-+		if (b) {
-+			pr_buf(out, "  locking iter %u l=%u %s:",
-+			       trans->locking_iter_idx,
-+			       trans->locking_level,
-+			       bch2_btree_ids[trans->locking_btree_id]);
-+			bch2_bpos_to_text(out, trans->locking_pos);
-+
-+
-+			pr_buf(out, " node ");
-+			bch2_btree_iter_node_to_text(out,
-+					(void *) b,
-+					btree_iter_type(&trans->iters[trans->locking_iter_idx]));
-+			pr_buf(out, "\n");
-+		}
-+	}
-+	mutex_unlock(&c->btree_trans_lock);
-+#endif
-+}
-+
-+void bch2_fs_btree_iter_exit(struct bch_fs *c)
-+{
-+	mempool_exit(&c->btree_iters_pool);
-+}
-+
-+int bch2_fs_btree_iter_init(struct bch_fs *c)
-+{
-+	unsigned nr = BTREE_ITER_MAX;
-+
-+	INIT_LIST_HEAD(&c->btree_trans_list);
-+	mutex_init(&c->btree_trans_lock);
-+
-+	return mempool_init_kmalloc_pool(&c->btree_iters_pool, 1,
-+			sizeof(struct btree_iter) * nr +
-+			sizeof(struct btree_insert_entry) * nr +
-+			sizeof(struct btree_insert_entry) * nr);
-+}
-diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h
-new file mode 100644
-index 000000000000..bd9ec3ec9a92
---- /dev/null
-+++ b/fs/bcachefs/btree_iter.h
-@@ -0,0 +1,314 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BTREE_ITER_H
-+#define _BCACHEFS_BTREE_ITER_H
-+
-+#include "bset.h"
-+#include "btree_types.h"
-+
-+static inline void btree_iter_set_dirty(struct btree_iter *iter,
-+					enum btree_iter_uptodate u)
-+{
-+	iter->uptodate = max_t(unsigned, iter->uptodate, u);
-+}
-+
-+static inline struct btree *btree_iter_node(struct btree_iter *iter,
-+					    unsigned level)
-+{
-+	return level < BTREE_MAX_DEPTH ? iter->l[level].b : NULL;
-+}
-+
-+static inline bool btree_node_lock_seq_matches(const struct btree_iter *iter,
-+					const struct btree *b, unsigned level)
-+{
-+	/*
-+	 * We don't compare the low bits of the lock sequence numbers because
-+	 * @iter might have taken a write lock on @b, and we don't want to skip
-+	 * the linked iterator if the sequence numbers were equal before taking
-+	 * that write lock. The lock sequence number is incremented by taking
-+	 * and releasing write locks and is even when unlocked:
-+	 */
-+	return iter->l[level].lock_seq >> 1 == b->c.lock.state.seq >> 1;
-+}
-+
-+static inline struct btree *btree_node_parent(struct btree_iter *iter,
-+					      struct btree *b)
-+{
-+	return btree_iter_node(iter, b->c.level + 1);
-+}
-+
-+static inline bool btree_trans_has_multiple_iters(const struct btree_trans *trans)
-+{
-+	return hweight64(trans->iters_linked) > 1;
-+}
-+
-+static inline int btree_iter_err(const struct btree_iter *iter)
-+{
-+	return iter->flags & BTREE_ITER_ERROR ? -EIO : 0;
-+}
-+
-+/* Iterate over iters within a transaction: */
-+
-+#define trans_for_each_iter_all(_trans, _iter)				\
-+	for (_iter = (_trans)->iters;					\
-+	     _iter < (_trans)->iters + (_trans)->nr_iters;		\
-+	     _iter++)
-+
-+static inline struct btree_iter *
-+__trans_next_iter(struct btree_trans *trans, unsigned idx)
-+{
-+	EBUG_ON(idx < trans->nr_iters && trans->iters[idx].idx != idx);
-+
-+	for (; idx < trans->nr_iters; idx++)
-+		if (trans->iters_linked & (1ULL << idx))
-+			return &trans->iters[idx];
-+
-+	return NULL;
-+}
-+
-+#define trans_for_each_iter(_trans, _iter)				\
-+	for (_iter = __trans_next_iter((_trans), 0);			\
-+	     (_iter);							\
-+	     _iter = __trans_next_iter((_trans), (_iter)->idx + 1))
-+
-+static inline bool __iter_has_node(const struct btree_iter *iter,
-+				   const struct btree *b)
-+{
-+	return iter->l[b->c.level].b == b &&
-+		btree_node_lock_seq_matches(iter, b, b->c.level);
-+}
-+
-+static inline struct btree_iter *
-+__trans_next_iter_with_node(struct btree_trans *trans, struct btree *b,
-+			    unsigned idx)
-+{
-+	struct btree_iter *iter = __trans_next_iter(trans, idx);
-+
-+	while (iter && !__iter_has_node(iter, b))
-+		iter = __trans_next_iter(trans, iter->idx + 1);
-+
-+	return iter;
-+}
-+
-+#define trans_for_each_iter_with_node(_trans, _b, _iter)		\
-+	for (_iter = __trans_next_iter_with_node((_trans), (_b), 0);	\
-+	     (_iter);							\
-+	     _iter = __trans_next_iter_with_node((_trans), (_b),	\
-+						 (_iter)->idx + 1))
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+void bch2_btree_trans_verify_iters(struct btree_trans *, struct btree *);
-+void bch2_btree_trans_verify_locks(struct btree_trans *);
-+#else
-+static inline void bch2_btree_trans_verify_iters(struct btree_trans *trans,
-+						 struct btree *b) {}
-+static inline void bch2_btree_trans_verify_locks(struct btree_trans *iter) {}
-+#endif
-+
-+void bch2_btree_iter_fix_key_modified(struct btree_iter *, struct btree *,
-+					   struct bkey_packed *);
-+void bch2_btree_node_iter_fix(struct btree_iter *, struct btree *,
-+			      struct btree_node_iter *, struct bkey_packed *,
-+			      unsigned, unsigned);
-+
-+bool bch2_btree_iter_relock(struct btree_iter *, bool);
-+bool bch2_trans_relock(struct btree_trans *);
-+void bch2_trans_unlock(struct btree_trans *);
-+
-+bool __bch2_btree_iter_upgrade(struct btree_iter *, unsigned);
-+bool __bch2_btree_iter_upgrade_nounlock(struct btree_iter *, unsigned);
-+
-+static inline bool bch2_btree_iter_upgrade(struct btree_iter *iter,
-+					   unsigned new_locks_want)
-+{
-+	new_locks_want = min(new_locks_want, BTREE_MAX_DEPTH);
-+
-+	return iter->locks_want < new_locks_want
-+		? (!iter->trans->nounlock
-+		   ? __bch2_btree_iter_upgrade(iter, new_locks_want)
-+		   : __bch2_btree_iter_upgrade_nounlock(iter, new_locks_want))
-+		: iter->uptodate <= BTREE_ITER_NEED_PEEK;
-+}
-+
-+void __bch2_btree_iter_downgrade(struct btree_iter *, unsigned);
-+
-+static inline void bch2_btree_iter_downgrade(struct btree_iter *iter)
-+{
-+	if (iter->locks_want > (iter->flags & BTREE_ITER_INTENT) ? 1 : 0)
-+		__bch2_btree_iter_downgrade(iter, 0);
-+}
-+
-+void bch2_trans_downgrade(struct btree_trans *);
-+
-+void bch2_btree_iter_node_replace(struct btree_iter *, struct btree *);
-+void bch2_btree_iter_node_drop(struct btree_iter *, struct btree *);
-+
-+void bch2_btree_iter_reinit_node(struct btree_iter *, struct btree *);
-+
-+int __must_check __bch2_btree_iter_traverse(struct btree_iter *);
-+
-+static inline int __must_check
-+bch2_btree_iter_traverse(struct btree_iter *iter)
-+{
-+	return iter->uptodate >= BTREE_ITER_NEED_RELOCK
-+		? __bch2_btree_iter_traverse(iter)
-+		: 0;
-+}
-+
-+int bch2_btree_iter_traverse_all(struct btree_trans *);
-+
-+struct btree *bch2_btree_iter_peek_node(struct btree_iter *);
-+struct btree *bch2_btree_iter_next_node(struct btree_iter *);
-+
-+struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *);
-+struct bkey_s_c bch2_btree_iter_next(struct btree_iter *);
-+
-+struct bkey_s_c bch2_btree_iter_peek_with_updates(struct btree_iter *);
-+struct bkey_s_c bch2_btree_iter_next_with_updates(struct btree_iter *);
-+
-+struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *);
-+struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *);
-+
-+struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *);
-+struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *);
-+
-+struct bkey_s_c bch2_btree_iter_peek_cached(struct btree_iter *);
-+
-+void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *, struct bpos);
-+void __bch2_btree_iter_set_pos(struct btree_iter *, struct bpos, bool);
-+void bch2_btree_iter_set_pos(struct btree_iter *, struct bpos);
-+
-+static inline int btree_iter_cmp(const struct btree_iter *l,
-+				 const struct btree_iter *r)
-+{
-+	return   cmp_int(l->btree_id, r->btree_id) ?:
-+		-cmp_int(btree_iter_type(l), btree_iter_type(r)) ?:
-+		 bkey_cmp(l->pos, r->pos);
-+}
-+
-+/*
-+ * Unlocks before scheduling
-+ * Note: does not revalidate iterator
-+ */
-+static inline int bch2_trans_cond_resched(struct btree_trans *trans)
-+{
-+	if (need_resched() || race_fault()) {
-+		bch2_trans_unlock(trans);
-+		schedule();
-+		return bch2_trans_relock(trans) ? 0 : -EINTR;
-+	} else {
-+		return 0;
-+	}
-+}
-+
-+#define __for_each_btree_node(_trans, _iter, _btree_id, _start,	\
-+			      _locks_want, _depth, _flags, _b)		\
-+	for (iter = bch2_trans_get_node_iter((_trans), (_btree_id),	\
-+				_start, _locks_want, _depth, _flags),	\
-+	     _b = bch2_btree_iter_peek_node(_iter);			\
-+	     (_b);							\
-+	     (_b) = bch2_btree_iter_next_node(_iter))
-+
-+#define for_each_btree_node(_trans, _iter, _btree_id, _start,		\
-+			    _flags, _b)					\
-+	__for_each_btree_node(_trans, _iter, _btree_id, _start,		\
-+			      0, 0, _flags, _b)
-+
-+static inline struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter,
-+						     unsigned flags)
-+{
-+	if ((flags & BTREE_ITER_TYPE) == BTREE_ITER_CACHED)
-+		return bch2_btree_iter_peek_cached(iter);
-+	else
-+		return flags & BTREE_ITER_SLOTS
-+			? bch2_btree_iter_peek_slot(iter)
-+			: bch2_btree_iter_peek(iter);
-+}
-+
-+static inline struct bkey_s_c __bch2_btree_iter_next(struct btree_iter *iter,
-+						     unsigned flags)
-+{
-+	return flags & BTREE_ITER_SLOTS
-+		? bch2_btree_iter_next_slot(iter)
-+		: bch2_btree_iter_next(iter);
-+}
-+
-+static inline int bkey_err(struct bkey_s_c k)
-+{
-+	return PTR_ERR_OR_ZERO(k.k);
-+}
-+
-+#define for_each_btree_key(_trans, _iter, _btree_id,			\
-+			   _start, _flags, _k, _ret)			\
-+	for ((_ret) = PTR_ERR_OR_ZERO((_iter) =				\
-+			bch2_trans_get_iter((_trans), (_btree_id),	\
-+					    (_start), (_flags))) ?:	\
-+		      PTR_ERR_OR_ZERO(((_k) =				\
-+			__bch2_btree_iter_peek(_iter, _flags)).k);	\
-+	     !_ret && (_k).k;						\
-+	     (_ret) = PTR_ERR_OR_ZERO(((_k) =				\
-+			__bch2_btree_iter_next(_iter, _flags)).k))
-+
-+#define for_each_btree_key_continue(_iter, _flags, _k, _ret)		\
-+	for ((_k) = __bch2_btree_iter_peek(_iter, _flags);		\
-+	     !((_ret) = bkey_err(_k)) && (_k).k;			\
-+	     (_k) = __bch2_btree_iter_next(_iter, _flags))
-+
-+/* new multiple iterator interface: */
-+
-+int bch2_trans_iter_put(struct btree_trans *, struct btree_iter *);
-+int bch2_trans_iter_free(struct btree_trans *, struct btree_iter *);
-+
-+void bch2_trans_unlink_iters(struct btree_trans *);
-+
-+struct btree_iter *__bch2_trans_get_iter(struct btree_trans *, enum btree_id,
-+					 struct bpos, unsigned);
-+
-+static inline struct btree_iter *
-+bch2_trans_get_iter(struct btree_trans *trans, enum btree_id btree_id,
-+		    struct bpos pos, unsigned flags)
-+{
-+	struct btree_iter *iter =
-+		__bch2_trans_get_iter(trans, btree_id, pos, flags);
-+
-+	if (!IS_ERR(iter))
-+		iter->ip_allocated = _THIS_IP_;
-+	return iter;
-+}
-+
-+struct btree_iter *__bch2_trans_copy_iter(struct btree_trans *,
-+					struct btree_iter *);
-+static inline struct btree_iter *
-+bch2_trans_copy_iter(struct btree_trans *trans, struct btree_iter *src)
-+{
-+	struct btree_iter *iter =
-+		__bch2_trans_copy_iter(trans, src);
-+
-+	if (!IS_ERR(iter))
-+		iter->ip_allocated = _THIS_IP_;
-+	return iter;
-+
-+}
-+
-+struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *,
-+				enum btree_id, struct bpos,
-+				unsigned, unsigned, unsigned);
-+
-+#define TRANS_RESET_NOTRAVERSE		(1 << 0)
-+
-+void bch2_trans_reset(struct btree_trans *, unsigned);
-+
-+static inline void bch2_trans_begin(struct btree_trans *trans)
-+{
-+	return bch2_trans_reset(trans, 0);
-+}
-+
-+void *bch2_trans_kmalloc(struct btree_trans *, size_t);
-+void bch2_trans_init(struct btree_trans *, struct bch_fs *, unsigned, size_t);
-+int bch2_trans_exit(struct btree_trans *);
-+
-+void bch2_btree_trans_to_text(struct printbuf *, struct bch_fs *);
-+
-+void bch2_fs_btree_iter_exit(struct bch_fs *);
-+int bch2_fs_btree_iter_init(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_BTREE_ITER_H */
-diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c
-new file mode 100644
-index 000000000000..d73cc8ddadac
---- /dev/null
-+++ b/fs/bcachefs/btree_key_cache.c
-@@ -0,0 +1,519 @@
-+
-+#include "bcachefs.h"
-+#include "btree_cache.h"
-+#include "btree_iter.h"
-+#include "btree_key_cache.h"
-+#include "btree_locking.h"
-+#include "btree_update.h"
-+#include "error.h"
-+#include "journal.h"
-+#include "journal_reclaim.h"
-+
-+#include <trace/events/bcachefs.h>
-+
-+static int bch2_btree_key_cache_cmp_fn(struct rhashtable_compare_arg *arg,
-+				       const void *obj)
-+{
-+	const struct bkey_cached *ck = obj;
-+	const struct bkey_cached_key *key = arg->key;
-+
-+	return cmp_int(ck->key.btree_id, key->btree_id) ?:
-+		bkey_cmp(ck->key.pos, key->pos);
-+}
-+
-+static const struct rhashtable_params bch2_btree_key_cache_params = {
-+	.head_offset	= offsetof(struct bkey_cached, hash),
-+	.key_offset	= offsetof(struct bkey_cached, key),
-+	.key_len	= sizeof(struct bkey_cached_key),
-+	.obj_cmpfn	= bch2_btree_key_cache_cmp_fn,
-+};
-+
-+__flatten
-+static inline struct bkey_cached *
-+btree_key_cache_find(struct bch_fs *c, enum btree_id btree_id, struct bpos pos)
-+{
-+	struct bkey_cached_key key = {
-+		.btree_id	= btree_id,
-+		.pos		= pos,
-+	};
-+
-+	return rhashtable_lookup_fast(&c->btree_key_cache.table, &key,
-+				      bch2_btree_key_cache_params);
-+}
-+
-+static bool bkey_cached_lock_for_evict(struct bkey_cached *ck)
-+{
-+	if (!six_trylock_intent(&ck->c.lock))
-+		return false;
-+
-+	if (!six_trylock_write(&ck->c.lock)) {
-+		six_unlock_intent(&ck->c.lock);
-+		return false;
-+	}
-+
-+	if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
-+		six_unlock_write(&ck->c.lock);
-+		six_unlock_intent(&ck->c.lock);
-+		return false;
-+	}
-+
-+	return true;
-+}
-+
-+static void bkey_cached_evict(struct btree_key_cache *c,
-+			      struct bkey_cached *ck)
-+{
-+	BUG_ON(rhashtable_remove_fast(&c->table, &ck->hash,
-+				      bch2_btree_key_cache_params));
-+	memset(&ck->key, ~0, sizeof(ck->key));
-+}
-+
-+static void bkey_cached_free(struct btree_key_cache *c,
-+			     struct bkey_cached *ck)
-+{
-+	list_move(&ck->list, &c->freed);
-+
-+	kfree(ck->k);
-+	ck->k		= NULL;
-+	ck->u64s	= 0;
-+
-+	six_unlock_write(&ck->c.lock);
-+	six_unlock_intent(&ck->c.lock);
-+}
-+
-+static struct bkey_cached *
-+bkey_cached_alloc(struct btree_key_cache *c)
-+{
-+	struct bkey_cached *ck;
-+
-+	list_for_each_entry(ck, &c->freed, list)
-+		if (bkey_cached_lock_for_evict(ck))
-+			return ck;
-+
-+	list_for_each_entry(ck, &c->clean, list)
-+		if (bkey_cached_lock_for_evict(ck)) {
-+			bkey_cached_evict(c, ck);
-+			return ck;
-+		}
-+
-+	ck = kzalloc(sizeof(*ck), GFP_NOFS);
-+	if (!ck)
-+		return NULL;
-+
-+	INIT_LIST_HEAD(&ck->list);
-+	six_lock_init(&ck->c.lock);
-+	BUG_ON(!six_trylock_intent(&ck->c.lock));
-+	BUG_ON(!six_trylock_write(&ck->c.lock));
-+
-+	return ck;
-+}
-+
-+static struct bkey_cached *
-+btree_key_cache_create(struct btree_key_cache *c,
-+		       enum btree_id btree_id,
-+		       struct bpos pos)
-+{
-+	struct bkey_cached *ck;
-+
-+	ck = bkey_cached_alloc(c);
-+	if (!ck)
-+		return ERR_PTR(-ENOMEM);
-+
-+	ck->c.level		= 0;
-+	ck->c.btree_id		= btree_id;
-+	ck->key.btree_id	= btree_id;
-+	ck->key.pos		= pos;
-+	ck->valid		= false;
-+
-+	BUG_ON(ck->flags);
-+
-+	if (rhashtable_lookup_insert_fast(&c->table,
-+					  &ck->hash,
-+					  bch2_btree_key_cache_params)) {
-+		/* We raced with another fill: */
-+		bkey_cached_free(c, ck);
-+		return NULL;
-+	}
-+
-+	list_move(&ck->list, &c->clean);
-+	six_unlock_write(&ck->c.lock);
-+
-+	return ck;
-+}
-+
-+static int btree_key_cache_fill(struct btree_trans *trans,
-+				struct btree_iter *ck_iter,
-+				struct bkey_cached *ck)
-+{
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	unsigned new_u64s = 0;
-+	struct bkey_i *new_k = NULL;
-+	int ret;
-+
-+	iter = bch2_trans_get_iter(trans, ck->key.btree_id,
-+				   ck->key.pos, BTREE_ITER_SLOTS);
-+	if (IS_ERR(iter))
-+		return PTR_ERR(iter);
-+
-+	k = bch2_btree_iter_peek_slot(iter);
-+	ret = bkey_err(k);
-+	if (ret) {
-+		bch2_trans_iter_put(trans, iter);
-+		return ret;
-+	}
-+
-+	if (!bch2_btree_node_relock(ck_iter, 0)) {
-+		bch2_trans_iter_put(trans, iter);
-+		trace_transaction_restart_ip(trans->ip, _THIS_IP_);
-+		return -EINTR;
-+	}
-+
-+	if (k.k->u64s > ck->u64s) {
-+		new_u64s = roundup_pow_of_two(k.k->u64s);
-+		new_k = kmalloc(new_u64s * sizeof(u64), GFP_NOFS);
-+		if (!new_k) {
-+			bch2_trans_iter_put(trans, iter);
-+			return -ENOMEM;
-+		}
-+	}
-+
-+	bch2_btree_node_lock_write(ck_iter->l[0].b, ck_iter);
-+	if (new_k) {
-+		kfree(ck->k);
-+		ck->u64s = new_u64s;
-+		ck->k = new_k;
-+	}
-+
-+	bkey_reassemble(ck->k, k);
-+	ck->valid = true;
-+	bch2_btree_node_unlock_write(ck_iter->l[0].b, ck_iter);
-+
-+	/* We're not likely to need this iterator again: */
-+	bch2_trans_iter_free(trans, iter);
-+
-+	return 0;
-+}
-+
-+static int bkey_cached_check_fn(struct six_lock *lock, void *p)
-+{
-+	struct bkey_cached *ck = container_of(lock, struct bkey_cached, c.lock);
-+	const struct btree_iter *iter = p;
-+
-+	return ck->key.btree_id == iter->btree_id &&
-+		!bkey_cmp(ck->key.pos, iter->pos) ? 0 : -1;
-+}
-+
-+int bch2_btree_iter_traverse_cached(struct btree_iter *iter)
-+{
-+	struct btree_trans *trans = iter->trans;
-+	struct bch_fs *c = trans->c;
-+	struct bkey_cached *ck;
-+	int ret = 0;
-+
-+	BUG_ON(iter->level);
-+
-+	if (btree_node_locked(iter, 0)) {
-+		ck = (void *) iter->l[0].b;
-+		goto fill;
-+	}
-+retry:
-+	ck = btree_key_cache_find(c, iter->btree_id, iter->pos);
-+	if (!ck) {
-+		if (iter->flags & BTREE_ITER_CACHED_NOCREATE) {
-+			iter->l[0].b = NULL;
-+			return 0;
-+		}
-+
-+		mutex_lock(&c->btree_key_cache.lock);
-+		ck = btree_key_cache_create(&c->btree_key_cache,
-+					    iter->btree_id, iter->pos);
-+		mutex_unlock(&c->btree_key_cache.lock);
-+
-+		ret = PTR_ERR_OR_ZERO(ck);
-+		if (ret)
-+			goto err;
-+		if (!ck)
-+			goto retry;
-+
-+		mark_btree_node_locked(iter, 0, SIX_LOCK_intent);
-+		iter->locks_want = 1;
-+	} else {
-+		enum six_lock_type lock_want = __btree_lock_want(iter, 0);
-+
-+		if (!btree_node_lock((void *) ck, iter->pos, 0, iter, lock_want,
-+				     bkey_cached_check_fn, iter)) {
-+			if (ck->key.btree_id != iter->btree_id ||
-+			    bkey_cmp(ck->key.pos, iter->pos)) {
-+				goto retry;
-+			}
-+
-+			trace_transaction_restart_ip(trans->ip, _THIS_IP_);
-+			ret = -EINTR;
-+			goto err;
-+		}
-+
-+		if (ck->key.btree_id != iter->btree_id ||
-+		    bkey_cmp(ck->key.pos, iter->pos)) {
-+			six_unlock_type(&ck->c.lock, lock_want);
-+			goto retry;
-+		}
-+
-+		mark_btree_node_locked(iter, 0, lock_want);
-+	}
-+
-+	iter->l[0].lock_seq	= ck->c.lock.state.seq;
-+	iter->l[0].b		= (void *) ck;
-+fill:
-+	if (!ck->valid && !(iter->flags & BTREE_ITER_CACHED_NOFILL)) {
-+		if (!btree_node_intent_locked(iter, 0))
-+			bch2_btree_iter_upgrade(iter, 1);
-+		if (!btree_node_intent_locked(iter, 0)) {
-+			trace_transaction_restart_ip(trans->ip, _THIS_IP_);
-+			ret = -EINTR;
-+			goto err;
-+		}
-+
-+		ret = btree_key_cache_fill(trans, iter, ck);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	iter->uptodate = BTREE_ITER_NEED_PEEK;
-+	bch2_btree_iter_downgrade(iter);
-+	return ret;
-+err:
-+	if (ret != -EINTR) {
-+		btree_node_unlock(iter, 0);
-+		iter->flags |= BTREE_ITER_ERROR;
-+		iter->l[0].b = BTREE_ITER_NO_NODE_ERROR;
-+	}
-+	return ret;
-+}
-+
-+static int btree_key_cache_flush_pos(struct btree_trans *trans,
-+				     struct bkey_cached_key key,
-+				     u64 journal_seq,
-+				     bool evict)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct journal *j = &c->journal;
-+	struct btree_iter *c_iter = NULL, *b_iter = NULL;
-+	struct bkey_cached *ck;
-+	int ret;
-+
-+	b_iter = bch2_trans_get_iter(trans, key.btree_id, key.pos,
-+				     BTREE_ITER_SLOTS|
-+				     BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(b_iter);
-+	if (ret)
-+		goto out;
-+
-+	c_iter = bch2_trans_get_iter(trans, key.btree_id, key.pos,
-+				     BTREE_ITER_CACHED|
-+				     BTREE_ITER_CACHED_NOFILL|
-+				     BTREE_ITER_CACHED_NOCREATE|
-+				     BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(c_iter);
-+	if (ret)
-+		goto out;
-+retry:
-+	ret = bch2_btree_iter_traverse(c_iter);
-+	if (ret)
-+		goto err;
-+
-+	ck = (void *) c_iter->l[0].b;
-+	if (!ck ||
-+	    (journal_seq && ck->journal.seq != journal_seq))
-+		goto out;
-+
-+	if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
-+		if (!evict)
-+			goto out;
-+		goto evict;
-+	}
-+
-+	ret   = bch2_btree_iter_traverse(b_iter) ?:
-+		bch2_trans_update(trans, b_iter, ck->k, BTREE_TRIGGER_NORUN) ?:
-+		bch2_trans_commit(trans, NULL, NULL,
-+				  BTREE_INSERT_NOUNLOCK|
-+				  BTREE_INSERT_NOCHECK_RW|
-+				  BTREE_INSERT_NOFAIL|
-+				  BTREE_INSERT_USE_RESERVE|
-+				  BTREE_INSERT_USE_ALLOC_RESERVE|
-+				  BTREE_INSERT_JOURNAL_RESERVED|
-+				  BTREE_INSERT_JOURNAL_RECLAIM);
-+err:
-+	if (ret == -EINTR)
-+		goto retry;
-+
-+	BUG_ON(ret && !bch2_journal_error(j));
-+
-+	if (ret)
-+		goto out;
-+
-+	bch2_journal_pin_drop(j, &ck->journal);
-+	bch2_journal_preres_put(j, &ck->res);
-+	clear_bit(BKEY_CACHED_DIRTY, &ck->flags);
-+
-+	if (!evict) {
-+		mutex_lock(&c->btree_key_cache.lock);
-+		list_move_tail(&ck->list, &c->btree_key_cache.clean);
-+		mutex_unlock(&c->btree_key_cache.lock);
-+	} else {
-+evict:
-+		BUG_ON(!btree_node_intent_locked(c_iter, 0));
-+
-+		mark_btree_node_unlocked(c_iter, 0);
-+		c_iter->l[0].b = NULL;
-+
-+		six_lock_write(&ck->c.lock, NULL, NULL);
-+
-+		mutex_lock(&c->btree_key_cache.lock);
-+		bkey_cached_evict(&c->btree_key_cache, ck);
-+		bkey_cached_free(&c->btree_key_cache, ck);
-+		mutex_unlock(&c->btree_key_cache.lock);
-+	}
-+out:
-+	bch2_trans_iter_put(trans, b_iter);
-+	bch2_trans_iter_put(trans, c_iter);
-+	return ret;
-+}
-+
-+static void btree_key_cache_journal_flush(struct journal *j,
-+					  struct journal_entry_pin *pin,
-+					  u64 seq)
-+{
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	struct bkey_cached *ck =
-+		container_of(pin, struct bkey_cached, journal);
-+	struct bkey_cached_key key;
-+	struct btree_trans trans;
-+
-+	six_lock_read(&ck->c.lock, NULL, NULL);
-+	key = READ_ONCE(ck->key);
-+
-+	if (ck->journal.seq != seq ||
-+	    !test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
-+		six_unlock_read(&ck->c.lock);
-+		return;
-+	}
-+	six_unlock_read(&ck->c.lock);
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+	btree_key_cache_flush_pos(&trans, key, seq, false);
-+	bch2_trans_exit(&trans);
-+}
-+
-+/*
-+ * Flush and evict a key from the key cache:
-+ */
-+int bch2_btree_key_cache_flush(struct btree_trans *trans,
-+			       enum btree_id id, struct bpos pos)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct bkey_cached_key key = { id, pos };
-+
-+	/* Fastpath - assume it won't be found: */
-+	if (!btree_key_cache_find(c, id, pos))
-+		return 0;
-+
-+	return btree_key_cache_flush_pos(trans, key, 0, true);
-+}
-+
-+bool bch2_btree_insert_key_cached(struct btree_trans *trans,
-+				  struct btree_iter *iter,
-+				  struct bkey_i *insert)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct bkey_cached *ck = (void *) iter->l[0].b;
-+
-+	BUG_ON(insert->u64s > ck->u64s);
-+
-+	if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) {
-+		int difference;
-+
-+		BUG_ON(jset_u64s(insert->u64s) > trans->journal_preres.u64s);
-+
-+		difference = jset_u64s(insert->u64s) - ck->res.u64s;
-+		if (difference > 0) {
-+			trans->journal_preres.u64s	-= difference;
-+			ck->res.u64s			+= difference;
-+		}
-+	}
-+
-+	bkey_copy(ck->k, insert);
-+	ck->valid = true;
-+
-+	if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
-+		mutex_lock(&c->btree_key_cache.lock);
-+		list_del_init(&ck->list);
-+
-+		set_bit(BKEY_CACHED_DIRTY, &ck->flags);
-+		mutex_unlock(&c->btree_key_cache.lock);
-+	}
-+
-+	bch2_journal_pin_update(&c->journal, trans->journal_res.seq,
-+				&ck->journal, btree_key_cache_journal_flush);
-+	return true;
-+}
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+void bch2_btree_key_cache_verify_clean(struct btree_trans *trans,
-+			       enum btree_id id, struct bpos pos)
-+{
-+	BUG_ON(btree_key_cache_find(trans->c, id, pos));
-+}
-+#endif
-+
-+void bch2_fs_btree_key_cache_exit(struct btree_key_cache *c)
-+{
-+	struct bkey_cached *ck, *n;
-+
-+	mutex_lock(&c->lock);
-+	list_for_each_entry_safe(ck, n, &c->clean, list) {
-+		kfree(ck->k);
-+		kfree(ck);
-+	}
-+	list_for_each_entry_safe(ck, n, &c->freed, list)
-+		kfree(ck);
-+	mutex_unlock(&c->lock);
-+
-+	rhashtable_destroy(&c->table);
-+}
-+
-+void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *c)
-+{
-+	mutex_init(&c->lock);
-+	INIT_LIST_HEAD(&c->freed);
-+	INIT_LIST_HEAD(&c->clean);
-+}
-+
-+int bch2_fs_btree_key_cache_init(struct btree_key_cache *c)
-+{
-+	return rhashtable_init(&c->table, &bch2_btree_key_cache_params);
-+}
-+
-+void bch2_btree_key_cache_to_text(struct printbuf *out, struct btree_key_cache *c)
-+{
-+	struct bucket_table *tbl;
-+	struct bkey_cached *ck;
-+	struct rhash_head *pos;
-+	size_t i;
-+
-+	mutex_lock(&c->lock);
-+	tbl = rht_dereference_rcu(c->table.tbl, &c->table);
-+
-+	for (i = 0; i < tbl->size; i++) {
-+		rht_for_each_entry_rcu(ck, pos, tbl, i, hash) {
-+			pr_buf(out, "%s:",
-+			       bch2_btree_ids[ck->key.btree_id]);
-+			bch2_bpos_to_text(out, ck->key.pos);
-+
-+			if (test_bit(BKEY_CACHED_DIRTY, &ck->flags))
-+				pr_buf(out, " journal seq %llu", ck->journal.seq);
-+			pr_buf(out, "\n");
-+		}
-+	}
-+	mutex_unlock(&c->lock);
-+}
-diff --git a/fs/bcachefs/btree_key_cache.h b/fs/bcachefs/btree_key_cache.h
-new file mode 100644
-index 000000000000..b1756c6c622c
---- /dev/null
-+++ b/fs/bcachefs/btree_key_cache.h
-@@ -0,0 +1,25 @@
-+#ifndef _BCACHEFS_BTREE_KEY_CACHE_H
-+#define _BCACHEFS_BTREE_KEY_CACHE_H
-+
-+int bch2_btree_iter_traverse_cached(struct btree_iter *);
-+
-+bool bch2_btree_insert_key_cached(struct btree_trans *,
-+			struct btree_iter *, struct bkey_i *);
-+int bch2_btree_key_cache_flush(struct btree_trans *,
-+			       enum btree_id, struct bpos);
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+void bch2_btree_key_cache_verify_clean(struct btree_trans *,
-+				enum btree_id, struct bpos);
-+#else
-+static inline void
-+bch2_btree_key_cache_verify_clean(struct btree_trans *trans,
-+				enum btree_id id, struct bpos pos) {}
-+#endif
-+
-+void bch2_fs_btree_key_cache_exit(struct btree_key_cache *);
-+void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *);
-+int bch2_fs_btree_key_cache_init(struct btree_key_cache *);
-+
-+void bch2_btree_key_cache_to_text(struct printbuf *, struct btree_key_cache *);
-+
-+#endif /* _BCACHEFS_BTREE_KEY_CACHE_H */
-diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h
-new file mode 100644
-index 000000000000..81fbf3e18647
---- /dev/null
-+++ b/fs/bcachefs/btree_locking.h
-@@ -0,0 +1,257 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BTREE_LOCKING_H
-+#define _BCACHEFS_BTREE_LOCKING_H
-+
-+/*
-+ * Only for internal btree use:
-+ *
-+ * The btree iterator tracks what locks it wants to take, and what locks it
-+ * currently has - here we have wrappers for locking/unlocking btree nodes and
-+ * updating the iterator state
-+ */
-+
-+#include <linux/six.h>
-+
-+#include "btree_iter.h"
-+
-+/* matches six lock types */
-+enum btree_node_locked_type {
-+	BTREE_NODE_UNLOCKED		= -1,
-+	BTREE_NODE_READ_LOCKED		= SIX_LOCK_read,
-+	BTREE_NODE_INTENT_LOCKED	= SIX_LOCK_intent,
-+};
-+
-+static inline int btree_node_locked_type(struct btree_iter *iter,
-+					 unsigned level)
-+{
-+	/*
-+	 * We're relying on the fact that if nodes_intent_locked is set
-+	 * nodes_locked must be set as well, so that we can compute without
-+	 * branches:
-+	 */
-+	return BTREE_NODE_UNLOCKED +
-+		((iter->nodes_locked >> level) & 1) +
-+		((iter->nodes_intent_locked >> level) & 1);
-+}
-+
-+static inline bool btree_node_intent_locked(struct btree_iter *iter,
-+					    unsigned level)
-+{
-+	return btree_node_locked_type(iter, level) == BTREE_NODE_INTENT_LOCKED;
-+}
-+
-+static inline bool btree_node_read_locked(struct btree_iter *iter,
-+					  unsigned level)
-+{
-+	return btree_node_locked_type(iter, level) == BTREE_NODE_READ_LOCKED;
-+}
-+
-+static inline bool btree_node_locked(struct btree_iter *iter, unsigned level)
-+{
-+	return iter->nodes_locked & (1 << level);
-+}
-+
-+static inline void mark_btree_node_unlocked(struct btree_iter *iter,
-+					    unsigned level)
-+{
-+	iter->nodes_locked &= ~(1 << level);
-+	iter->nodes_intent_locked &= ~(1 << level);
-+}
-+
-+static inline void mark_btree_node_locked(struct btree_iter *iter,
-+					  unsigned level,
-+					  enum six_lock_type type)
-+{
-+	/* relying on this to avoid a branch */
-+	BUILD_BUG_ON(SIX_LOCK_read   != 0);
-+	BUILD_BUG_ON(SIX_LOCK_intent != 1);
-+
-+	iter->nodes_locked |= 1 << level;
-+	iter->nodes_intent_locked |= type << level;
-+}
-+
-+static inline void mark_btree_node_intent_locked(struct btree_iter *iter,
-+						 unsigned level)
-+{
-+	mark_btree_node_locked(iter, level, SIX_LOCK_intent);
-+}
-+
-+static inline enum six_lock_type __btree_lock_want(struct btree_iter *iter, int level)
-+{
-+	return level < iter->locks_want
-+		? SIX_LOCK_intent
-+		: SIX_LOCK_read;
-+}
-+
-+static inline enum btree_node_locked_type
-+btree_lock_want(struct btree_iter *iter, int level)
-+{
-+	if (level < iter->level)
-+		return BTREE_NODE_UNLOCKED;
-+	if (level < iter->locks_want)
-+		return BTREE_NODE_INTENT_LOCKED;
-+	if (level == iter->level)
-+		return BTREE_NODE_READ_LOCKED;
-+	return BTREE_NODE_UNLOCKED;
-+}
-+
-+static inline void __btree_node_unlock(struct btree_iter *iter, unsigned level)
-+{
-+	int lock_type = btree_node_locked_type(iter, level);
-+
-+	EBUG_ON(level >= BTREE_MAX_DEPTH);
-+
-+	if (lock_type != BTREE_NODE_UNLOCKED)
-+		six_unlock_type(&iter->l[level].b->c.lock, lock_type);
-+	mark_btree_node_unlocked(iter, level);
-+}
-+
-+static inline void btree_node_unlock(struct btree_iter *iter, unsigned level)
-+{
-+	EBUG_ON(!level && iter->trans->nounlock);
-+
-+	__btree_node_unlock(iter, level);
-+}
-+
-+static inline void __bch2_btree_iter_unlock(struct btree_iter *iter)
-+{
-+	btree_iter_set_dirty(iter, BTREE_ITER_NEED_RELOCK);
-+
-+	while (iter->nodes_locked)
-+		btree_node_unlock(iter, __ffs(iter->nodes_locked));
-+}
-+
-+static inline enum bch_time_stats lock_to_time_stat(enum six_lock_type type)
-+{
-+	switch (type) {
-+	case SIX_LOCK_read:
-+		return BCH_TIME_btree_lock_contended_read;
-+	case SIX_LOCK_intent:
-+		return BCH_TIME_btree_lock_contended_intent;
-+	case SIX_LOCK_write:
-+		return BCH_TIME_btree_lock_contended_write;
-+	default:
-+		BUG();
-+	}
-+}
-+
-+/*
-+ * wrapper around six locks that just traces lock contended time
-+ */
-+static inline void __btree_node_lock_type(struct bch_fs *c, struct btree *b,
-+					  enum six_lock_type type)
-+{
-+	u64 start_time = local_clock();
-+
-+	six_lock_type(&b->c.lock, type, NULL, NULL);
-+	bch2_time_stats_update(&c->times[lock_to_time_stat(type)], start_time);
-+}
-+
-+static inline void btree_node_lock_type(struct bch_fs *c, struct btree *b,
-+					enum six_lock_type type)
-+{
-+	if (!six_trylock_type(&b->c.lock, type))
-+		__btree_node_lock_type(c, b, type);
-+}
-+
-+/*
-+ * Lock a btree node if we already have it locked on one of our linked
-+ * iterators:
-+ */
-+static inline bool btree_node_lock_increment(struct btree_trans *trans,
-+					     struct btree *b, unsigned level,
-+					     enum btree_node_locked_type want)
-+{
-+	struct btree_iter *iter;
-+
-+	trans_for_each_iter(trans, iter)
-+		if (iter->l[level].b == b &&
-+		    btree_node_locked_type(iter, level) >= want) {
-+			six_lock_increment(&b->c.lock, want);
-+			return true;
-+		}
-+
-+	return false;
-+}
-+
-+bool __bch2_btree_node_lock(struct btree *, struct bpos, unsigned,
-+			    struct btree_iter *, enum six_lock_type,
-+			    six_lock_should_sleep_fn, void *);
-+
-+static inline bool btree_node_lock(struct btree *b,
-+			struct bpos pos, unsigned level,
-+			struct btree_iter *iter,
-+			enum six_lock_type type,
-+			six_lock_should_sleep_fn should_sleep_fn, void *p)
-+{
-+	struct btree_trans *trans = iter->trans;
-+	bool ret;
-+
-+	EBUG_ON(level >= BTREE_MAX_DEPTH);
-+	EBUG_ON(!(trans->iters_linked & (1ULL << iter->idx)));
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	trans->locking		= b;
-+	trans->locking_iter_idx = iter->idx;
-+	trans->locking_pos	= pos;
-+	trans->locking_btree_id	= iter->btree_id;
-+	trans->locking_level	= level;
-+#endif
-+	ret   = likely(six_trylock_type(&b->c.lock, type)) ||
-+		btree_node_lock_increment(trans, b, level, type) ||
-+		__bch2_btree_node_lock(b, pos, level, iter, type,
-+				       should_sleep_fn, p);
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	trans->locking = NULL;
-+#endif
-+	return ret;
-+}
-+
-+bool __bch2_btree_node_relock(struct btree_iter *, unsigned);
-+
-+static inline bool bch2_btree_node_relock(struct btree_iter *iter,
-+					  unsigned level)
-+{
-+	EBUG_ON(btree_node_locked(iter, level) &&
-+		btree_node_locked_type(iter, level) !=
-+		__btree_lock_want(iter, level));
-+
-+	return likely(btree_node_locked(iter, level)) ||
-+		__bch2_btree_node_relock(iter, level);
-+}
-+
-+/*
-+ * Updates the saved lock sequence number, so that bch2_btree_node_relock() will
-+ * succeed:
-+ */
-+static inline void
-+bch2_btree_node_unlock_write_inlined(struct btree *b, struct btree_iter *iter)
-+{
-+	struct btree_iter *linked;
-+
-+	EBUG_ON(iter->l[b->c.level].b != b);
-+	EBUG_ON(iter->l[b->c.level].lock_seq + 1 != b->c.lock.state.seq);
-+
-+	trans_for_each_iter_with_node(iter->trans, b, linked)
-+		linked->l[b->c.level].lock_seq += 2;
-+
-+	six_unlock_write(&b->c.lock);
-+}
-+
-+void bch2_btree_node_unlock_write(struct btree *, struct btree_iter *);
-+
-+void __bch2_btree_node_lock_write(struct btree *, struct btree_iter *);
-+
-+static inline void bch2_btree_node_lock_write(struct btree *b, struct btree_iter *iter)
-+{
-+	EBUG_ON(iter->l[b->c.level].b != b);
-+	EBUG_ON(iter->l[b->c.level].lock_seq != b->c.lock.state.seq);
-+
-+	if (unlikely(!six_trylock_write(&b->c.lock)))
-+		__bch2_btree_node_lock_write(b, iter);
-+}
-+
-+#endif /* _BCACHEFS_BTREE_LOCKING_H */
-+
-+
-diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
-new file mode 100644
-index 000000000000..98611b1da1ed
---- /dev/null
-+++ b/fs/bcachefs/btree_types.h
-@@ -0,0 +1,666 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BTREE_TYPES_H
-+#define _BCACHEFS_BTREE_TYPES_H
-+
-+#include <linux/list.h>
-+#include <linux/rhashtable.h>
-+#include <linux/six.h>
-+
-+#include "bkey_methods.h"
-+#include "buckets_types.h"
-+#include "journal_types.h"
-+
-+struct open_bucket;
-+struct btree_update;
-+struct btree_trans;
-+
-+#define MAX_BSETS		3U
-+
-+struct btree_nr_keys {
-+
-+	/*
-+	 * Amount of live metadata (i.e. size of node after a compaction) in
-+	 * units of u64s
-+	 */
-+	u16			live_u64s;
-+	u16			bset_u64s[MAX_BSETS];
-+
-+	/* live keys only: */
-+	u16			packed_keys;
-+	u16			unpacked_keys;
-+};
-+
-+struct bset_tree {
-+	/*
-+	 * We construct a binary tree in an array as if the array
-+	 * started at 1, so that things line up on the same cachelines
-+	 * better: see comments in bset.c at cacheline_to_bkey() for
-+	 * details
-+	 */
-+
-+	/* size of the binary tree and prev array */
-+	u16			size;
-+
-+	/* function of size - precalculated for to_inorder() */
-+	u16			extra;
-+
-+	u16			data_offset;
-+	u16			aux_data_offset;
-+	u16			end_offset;
-+
-+	struct bpos		max_key;
-+};
-+
-+struct btree_write {
-+	struct journal_entry_pin	journal;
-+};
-+
-+struct btree_alloc {
-+	struct open_buckets	ob;
-+	BKEY_PADDED(k);
-+};
-+
-+struct btree_bkey_cached_common {
-+	struct six_lock		lock;
-+	u8			level;
-+	u8			btree_id;
-+};
-+
-+struct btree {
-+	struct btree_bkey_cached_common c;
-+
-+	struct rhash_head	hash;
-+	u64			hash_val;
-+
-+	unsigned long		flags;
-+	u16			written;
-+	u8			nsets;
-+	u8			nr_key_bits;
-+
-+	struct bkey_format	format;
-+
-+	struct btree_node	*data;
-+	void			*aux_data;
-+
-+	/*
-+	 * Sets of sorted keys - the real btree node - plus a binary search tree
-+	 *
-+	 * set[0] is special; set[0]->tree, set[0]->prev and set[0]->data point
-+	 * to the memory we have allocated for this btree node. Additionally,
-+	 * set[0]->data points to the entire btree node as it exists on disk.
-+	 */
-+	struct bset_tree	set[MAX_BSETS];
-+
-+	struct btree_nr_keys	nr;
-+	u16			sib_u64s[2];
-+	u16			whiteout_u64s;
-+	u8			page_order;
-+	u8			unpack_fn_len;
-+
-+	/*
-+	 * XXX: add a delete sequence number, so when bch2_btree_node_relock()
-+	 * fails because the lock sequence number has changed - i.e. the
-+	 * contents were modified - we can still relock the node if it's still
-+	 * the one we want, without redoing the traversal
-+	 */
-+
-+	/*
-+	 * For asynchronous splits/interior node updates:
-+	 * When we do a split, we allocate new child nodes and update the parent
-+	 * node to point to them: we update the parent in memory immediately,
-+	 * but then we must wait until the children have been written out before
-+	 * the update to the parent can be written - this is a list of the
-+	 * btree_updates that are blocking this node from being
-+	 * written:
-+	 */
-+	struct list_head	write_blocked;
-+
-+	/*
-+	 * Also for asynchronous splits/interior node updates:
-+	 * If a btree node isn't reachable yet, we don't want to kick off
-+	 * another write - because that write also won't yet be reachable and
-+	 * marking it as completed before it's reachable would be incorrect:
-+	 */
-+	unsigned long		will_make_reachable;
-+
-+	struct open_buckets	ob;
-+
-+	/* lru list */
-+	struct list_head	list;
-+
-+	struct btree_write	writes[2];
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	bool			*expensive_debug_checks;
-+#endif
-+
-+	/* Key/pointer for this btree node */
-+	__BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
-+};
-+
-+struct btree_cache {
-+	struct rhashtable	table;
-+	bool			table_init_done;
-+	/*
-+	 * We never free a struct btree, except on shutdown - we just put it on
-+	 * the btree_cache_freed list and reuse it later. This simplifies the
-+	 * code, and it doesn't cost us much memory as the memory usage is
-+	 * dominated by buffers that hold the actual btree node data and those
-+	 * can be freed - and the number of struct btrees allocated is
-+	 * effectively bounded.
-+	 *
-+	 * btree_cache_freeable effectively is a small cache - we use it because
-+	 * high order page allocations can be rather expensive, and it's quite
-+	 * common to delete and allocate btree nodes in quick succession. It
-+	 * should never grow past ~2-3 nodes in practice.
-+	 */
-+	struct mutex		lock;
-+	struct list_head	live;
-+	struct list_head	freeable;
-+	struct list_head	freed;
-+
-+	/* Number of elements in live + freeable lists */
-+	unsigned		used;
-+	unsigned		reserve;
-+	struct shrinker		shrink;
-+
-+	/*
-+	 * If we need to allocate memory for a new btree node and that
-+	 * allocation fails, we can cannibalize another node in the btree cache
-+	 * to satisfy the allocation - lock to guarantee only one thread does
-+	 * this at a time:
-+	 */
-+	struct task_struct	*alloc_lock;
-+	struct closure_waitlist	alloc_wait;
-+};
-+
-+struct btree_node_iter {
-+	struct btree_node_iter_set {
-+		u16	k, end;
-+	} data[MAX_BSETS];
-+};
-+
-+enum btree_iter_type {
-+	BTREE_ITER_KEYS,
-+	BTREE_ITER_NODES,
-+	BTREE_ITER_CACHED,
-+};
-+
-+#define BTREE_ITER_TYPE			((1 << 2) - 1)
-+
-+/*
-+ * Iterate over all possible positions, synthesizing deleted keys for holes:
-+ */
-+#define BTREE_ITER_SLOTS		(1 << 2)
-+/*
-+ * Indicates that intent locks should be taken on leaf nodes, because we expect
-+ * to be doing updates:
-+ */
-+#define BTREE_ITER_INTENT		(1 << 3)
-+/*
-+ * Causes the btree iterator code to prefetch additional btree nodes from disk:
-+ */
-+#define BTREE_ITER_PREFETCH		(1 << 4)
-+/*
-+ * Indicates that this iterator should not be reused until transaction commit,
-+ * either because a pending update references it or because the update depends
-+ * on that particular key being locked (e.g. by the str_hash code, for hash
-+ * table consistency)
-+ */
-+#define BTREE_ITER_KEEP_UNTIL_COMMIT	(1 << 5)
-+/*
-+ * Used in bch2_btree_iter_traverse(), to indicate whether we're searching for
-+ * @pos or the first key strictly greater than @pos
-+ */
-+#define BTREE_ITER_IS_EXTENTS		(1 << 6)
-+#define BTREE_ITER_ERROR		(1 << 7)
-+#define BTREE_ITER_SET_POS_AFTER_COMMIT	(1 << 8)
-+#define BTREE_ITER_CACHED_NOFILL	(1 << 9)
-+#define BTREE_ITER_CACHED_NOCREATE	(1 << 10)
-+
-+#define BTREE_ITER_USER_FLAGS				\
-+	(BTREE_ITER_SLOTS				\
-+	|BTREE_ITER_INTENT				\
-+	|BTREE_ITER_PREFETCH				\
-+	|BTREE_ITER_CACHED_NOFILL			\
-+	|BTREE_ITER_CACHED_NOCREATE)
-+
-+enum btree_iter_uptodate {
-+	BTREE_ITER_UPTODATE		= 0,
-+	BTREE_ITER_NEED_PEEK		= 1,
-+	BTREE_ITER_NEED_RELOCK		= 2,
-+	BTREE_ITER_NEED_TRAVERSE	= 3,
-+};
-+
-+#define BTREE_ITER_NO_NODE_GET_LOCKS	((struct btree *) 1)
-+#define BTREE_ITER_NO_NODE_DROP		((struct btree *) 2)
-+#define BTREE_ITER_NO_NODE_LOCK_ROOT	((struct btree *) 3)
-+#define BTREE_ITER_NO_NODE_UP		((struct btree *) 4)
-+#define BTREE_ITER_NO_NODE_DOWN		((struct btree *) 5)
-+#define BTREE_ITER_NO_NODE_INIT		((struct btree *) 6)
-+#define BTREE_ITER_NO_NODE_ERROR	((struct btree *) 7)
-+
-+/*
-+ * @pos			- iterator's current position
-+ * @level		- current btree depth
-+ * @locks_want		- btree level below which we start taking intent locks
-+ * @nodes_locked	- bitmask indicating which nodes in @nodes are locked
-+ * @nodes_intent_locked	- bitmask indicating which locks are intent locks
-+ */
-+struct btree_iter {
-+	struct btree_trans	*trans;
-+	struct bpos		pos;
-+	struct bpos		pos_after_commit;
-+
-+	u16			flags;
-+	u8			idx;
-+
-+	enum btree_id		btree_id:4;
-+	enum btree_iter_uptodate uptodate:4;
-+	unsigned		level:4,
-+				min_depth:4,
-+				locks_want:4,
-+				nodes_locked:4,
-+				nodes_intent_locked:4;
-+
-+	struct btree_iter_level {
-+		struct btree	*b;
-+		struct btree_node_iter iter;
-+		u32		lock_seq;
-+	}			l[BTREE_MAX_DEPTH];
-+
-+	/*
-+	 * Current unpacked key - so that bch2_btree_iter_next()/
-+	 * bch2_btree_iter_next_slot() can correctly advance pos.
-+	 */
-+	struct bkey		k;
-+	unsigned long		ip_allocated;
-+};
-+
-+static inline enum btree_iter_type
-+btree_iter_type(const struct btree_iter *iter)
-+{
-+	return iter->flags & BTREE_ITER_TYPE;
-+}
-+
-+static inline struct btree_iter_level *iter_l(struct btree_iter *iter)
-+{
-+	return iter->l + iter->level;
-+}
-+
-+struct btree_key_cache {
-+	struct mutex		lock;
-+	struct rhashtable	table;
-+	struct list_head	freed;
-+	struct list_head	clean;
-+};
-+
-+struct bkey_cached_key {
-+	u32			btree_id;
-+	struct bpos		pos;
-+} __attribute__((packed, aligned(4)));
-+
-+#define BKEY_CACHED_DIRTY		0
-+
-+struct bkey_cached {
-+	struct btree_bkey_cached_common c;
-+
-+	unsigned long		flags;
-+	u8			u64s;
-+	bool			valid;
-+	struct bkey_cached_key	key;
-+
-+	struct rhash_head	hash;
-+	struct list_head	list;
-+
-+	struct journal_preres	res;
-+	struct journal_entry_pin journal;
-+
-+	struct bkey_i		*k;
-+};
-+
-+struct btree_insert_entry {
-+	unsigned		trigger_flags;
-+	unsigned		trans_triggers_run:1;
-+	struct bkey_i		*k;
-+	struct btree_iter	*iter;
-+};
-+
-+#ifndef CONFIG_LOCKDEP
-+#define BTREE_ITER_MAX		64
-+#else
-+#define BTREE_ITER_MAX		32
-+#endif
-+
-+struct btree_trans {
-+	struct bch_fs		*c;
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	struct list_head	list;
-+	struct btree		*locking;
-+	unsigned		locking_iter_idx;
-+	struct bpos		locking_pos;
-+	u8			locking_btree_id;
-+	u8			locking_level;
-+	pid_t			pid;
-+#endif
-+	unsigned long		ip;
-+
-+	u64			iters_linked;
-+	u64			iters_live;
-+	u64			iters_touched;
-+
-+	u8			nr_iters;
-+	u8			nr_updates;
-+	u8			nr_updates2;
-+	u8			size;
-+	unsigned		used_mempool:1;
-+	unsigned		error:1;
-+	unsigned		nounlock:1;
-+	unsigned		need_reset:1;
-+	unsigned		in_traverse_all:1;
-+
-+	unsigned		mem_top;
-+	unsigned		mem_bytes;
-+	void			*mem;
-+
-+	struct btree_iter	*iters;
-+	struct btree_insert_entry *updates;
-+	struct btree_insert_entry *updates2;
-+
-+	/* update path: */
-+	struct jset_entry	*extra_journal_entries;
-+	unsigned		extra_journal_entry_u64s;
-+	struct journal_entry_pin *journal_pin;
-+
-+	struct journal_res	journal_res;
-+	struct journal_preres	journal_preres;
-+	u64			*journal_seq;
-+	struct disk_reservation *disk_res;
-+	unsigned		flags;
-+	unsigned		journal_u64s;
-+	unsigned		journal_preres_u64s;
-+	struct replicas_delta_list *fs_usage_deltas;
-+
-+	struct btree_iter	iters_onstack[2];
-+	struct btree_insert_entry updates_onstack[2];
-+	struct btree_insert_entry updates2_onstack[2];
-+};
-+
-+#define BTREE_FLAG(flag)						\
-+static inline bool btree_node_ ## flag(struct btree *b)			\
-+{	return test_bit(BTREE_NODE_ ## flag, &b->flags); }		\
-+									\
-+static inline void set_btree_node_ ## flag(struct btree *b)		\
-+{	set_bit(BTREE_NODE_ ## flag, &b->flags); }			\
-+									\
-+static inline void clear_btree_node_ ## flag(struct btree *b)		\
-+{	clear_bit(BTREE_NODE_ ## flag, &b->flags); }
-+
-+enum btree_flags {
-+	BTREE_NODE_read_in_flight,
-+	BTREE_NODE_read_error,
-+	BTREE_NODE_dirty,
-+	BTREE_NODE_need_write,
-+	BTREE_NODE_noevict,
-+	BTREE_NODE_write_idx,
-+	BTREE_NODE_accessed,
-+	BTREE_NODE_write_in_flight,
-+	BTREE_NODE_just_written,
-+	BTREE_NODE_dying,
-+	BTREE_NODE_fake,
-+	BTREE_NODE_old_extent_overwrite,
-+	BTREE_NODE_need_rewrite,
-+};
-+
-+BTREE_FLAG(read_in_flight);
-+BTREE_FLAG(read_error);
-+BTREE_FLAG(dirty);
-+BTREE_FLAG(need_write);
-+BTREE_FLAG(noevict);
-+BTREE_FLAG(write_idx);
-+BTREE_FLAG(accessed);
-+BTREE_FLAG(write_in_flight);
-+BTREE_FLAG(just_written);
-+BTREE_FLAG(dying);
-+BTREE_FLAG(fake);
-+BTREE_FLAG(old_extent_overwrite);
-+BTREE_FLAG(need_rewrite);
-+
-+static inline struct btree_write *btree_current_write(struct btree *b)
-+{
-+	return b->writes + btree_node_write_idx(b);
-+}
-+
-+static inline struct btree_write *btree_prev_write(struct btree *b)
-+{
-+	return b->writes + (btree_node_write_idx(b) ^ 1);
-+}
-+
-+static inline struct bset_tree *bset_tree_last(struct btree *b)
-+{
-+	EBUG_ON(!b->nsets);
-+	return b->set + b->nsets - 1;
-+}
-+
-+static inline void *
-+__btree_node_offset_to_ptr(const struct btree *b, u16 offset)
-+{
-+	return (void *) ((u64 *) b->data + 1 + offset);
-+}
-+
-+static inline u16
-+__btree_node_ptr_to_offset(const struct btree *b, const void *p)
-+{
-+	u16 ret = (u64 *) p - 1 - (u64 *) b->data;
-+
-+	EBUG_ON(__btree_node_offset_to_ptr(b, ret) != p);
-+	return ret;
-+}
-+
-+static inline struct bset *bset(const struct btree *b,
-+				const struct bset_tree *t)
-+{
-+	return __btree_node_offset_to_ptr(b, t->data_offset);
-+}
-+
-+static inline void set_btree_bset_end(struct btree *b, struct bset_tree *t)
-+{
-+	t->end_offset =
-+		__btree_node_ptr_to_offset(b, vstruct_last(bset(b, t)));
-+}
-+
-+static inline void set_btree_bset(struct btree *b, struct bset_tree *t,
-+				  const struct bset *i)
-+{
-+	t->data_offset = __btree_node_ptr_to_offset(b, i);
-+	set_btree_bset_end(b, t);
-+}
-+
-+static inline struct bset *btree_bset_first(struct btree *b)
-+{
-+	return bset(b, b->set);
-+}
-+
-+static inline struct bset *btree_bset_last(struct btree *b)
-+{
-+	return bset(b, bset_tree_last(b));
-+}
-+
-+static inline u16
-+__btree_node_key_to_offset(const struct btree *b, const struct bkey_packed *k)
-+{
-+	return __btree_node_ptr_to_offset(b, k);
-+}
-+
-+static inline struct bkey_packed *
-+__btree_node_offset_to_key(const struct btree *b, u16 k)
-+{
-+	return __btree_node_offset_to_ptr(b, k);
-+}
-+
-+static inline unsigned btree_bkey_first_offset(const struct bset_tree *t)
-+{
-+	return t->data_offset + offsetof(struct bset, _data) / sizeof(u64);
-+}
-+
-+#define btree_bkey_first(_b, _t)					\
-+({									\
-+	EBUG_ON(bset(_b, _t)->start !=					\
-+		__btree_node_offset_to_key(_b, btree_bkey_first_offset(_t)));\
-+									\
-+	bset(_b, _t)->start;						\
-+})
-+
-+#define btree_bkey_last(_b, _t)						\
-+({									\
-+	EBUG_ON(__btree_node_offset_to_key(_b, (_t)->end_offset) !=	\
-+		vstruct_last(bset(_b, _t)));				\
-+									\
-+	__btree_node_offset_to_key(_b, (_t)->end_offset);		\
-+})
-+
-+static inline unsigned bset_u64s(struct bset_tree *t)
-+{
-+	return t->end_offset - t->data_offset -
-+		sizeof(struct bset) / sizeof(u64);
-+}
-+
-+static inline unsigned bset_dead_u64s(struct btree *b, struct bset_tree *t)
-+{
-+	return bset_u64s(t) - b->nr.bset_u64s[t - b->set];
-+}
-+
-+static inline unsigned bset_byte_offset(struct btree *b, void *i)
-+{
-+	return i - (void *) b->data;
-+}
-+
-+enum btree_node_type {
-+#define x(kwd, val, name) BKEY_TYPE_##kwd = val,
-+	BCH_BTREE_IDS()
-+#undef x
-+	BKEY_TYPE_BTREE,
-+};
-+
-+/* Type of a key in btree @id at level @level: */
-+static inline enum btree_node_type __btree_node_type(unsigned level, enum btree_id id)
-+{
-+	return level ? BKEY_TYPE_BTREE : (enum btree_node_type) id;
-+}
-+
-+/* Type of keys @b contains: */
-+static inline enum btree_node_type btree_node_type(struct btree *b)
-+{
-+	return __btree_node_type(b->c.level, b->c.btree_id);
-+}
-+
-+static inline bool btree_node_type_is_extents(enum btree_node_type type)
-+{
-+	switch (type) {
-+	case BKEY_TYPE_EXTENTS:
-+	case BKEY_TYPE_REFLINK:
-+		return true;
-+	default:
-+		return false;
-+	}
-+}
-+
-+static inline bool btree_node_is_extents(struct btree *b)
-+{
-+	return btree_node_type_is_extents(btree_node_type(b));
-+}
-+
-+static inline enum btree_node_type btree_iter_key_type(struct btree_iter *iter)
-+{
-+	return __btree_node_type(iter->level, iter->btree_id);
-+}
-+
-+static inline bool btree_iter_is_extents(struct btree_iter *iter)
-+{
-+	return btree_node_type_is_extents(btree_iter_key_type(iter));
-+}
-+
-+#define BTREE_NODE_TYPE_HAS_TRIGGERS			\
-+	((1U << BKEY_TYPE_EXTENTS)|			\
-+	 (1U << BKEY_TYPE_ALLOC)|			\
-+	 (1U << BKEY_TYPE_INODES)|			\
-+	 (1U << BKEY_TYPE_REFLINK)|			\
-+	 (1U << BKEY_TYPE_EC)|				\
-+	 (1U << BKEY_TYPE_BTREE))
-+
-+#define BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS		\
-+	((1U << BKEY_TYPE_EXTENTS)|			\
-+	 (1U << BKEY_TYPE_INODES)|			\
-+	 (1U << BKEY_TYPE_REFLINK))
-+
-+enum btree_trigger_flags {
-+	__BTREE_TRIGGER_NORUN,		/* Don't run triggers at all */
-+	__BTREE_TRIGGER_NOOVERWRITES,	/* Don't run triggers on overwrites */
-+
-+	__BTREE_TRIGGER_INSERT,
-+	__BTREE_TRIGGER_OVERWRITE,
-+	__BTREE_TRIGGER_OVERWRITE_SPLIT,
-+
-+	__BTREE_TRIGGER_GC,
-+	__BTREE_TRIGGER_BUCKET_INVALIDATE,
-+	__BTREE_TRIGGER_ALLOC_READ,
-+	__BTREE_TRIGGER_NOATOMIC,
-+};
-+
-+#define BTREE_TRIGGER_NORUN		(1U << __BTREE_TRIGGER_NORUN)
-+#define BTREE_TRIGGER_NOOVERWRITES	(1U << __BTREE_TRIGGER_NOOVERWRITES)
-+
-+#define BTREE_TRIGGER_INSERT		(1U << __BTREE_TRIGGER_INSERT)
-+#define BTREE_TRIGGER_OVERWRITE		(1U << __BTREE_TRIGGER_OVERWRITE)
-+#define BTREE_TRIGGER_OVERWRITE_SPLIT	(1U << __BTREE_TRIGGER_OVERWRITE_SPLIT)
-+
-+#define BTREE_TRIGGER_GC		(1U << __BTREE_TRIGGER_GC)
-+#define BTREE_TRIGGER_BUCKET_INVALIDATE	(1U << __BTREE_TRIGGER_BUCKET_INVALIDATE)
-+#define BTREE_TRIGGER_ALLOC_READ	(1U << __BTREE_TRIGGER_ALLOC_READ)
-+#define BTREE_TRIGGER_NOATOMIC		(1U << __BTREE_TRIGGER_NOATOMIC)
-+
-+static inline bool btree_node_type_needs_gc(enum btree_node_type type)
-+{
-+	return BTREE_NODE_TYPE_HAS_TRIGGERS & (1U << type);
-+}
-+
-+struct btree_root {
-+	struct btree		*b;
-+
-+	/* On disk root - see async splits: */
-+	__BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
-+	u8			level;
-+	u8			alive;
-+	s8			error;
-+};
-+
-+/*
-+ * Optional hook that will be called just prior to a btree node update, when
-+ * we're holding the write lock and we know what key is about to be overwritten:
-+ */
-+
-+enum btree_insert_ret {
-+	BTREE_INSERT_OK,
-+	/* leaf node needs to be split */
-+	BTREE_INSERT_BTREE_NODE_FULL,
-+	BTREE_INSERT_ENOSPC,
-+	BTREE_INSERT_NEED_MARK_REPLICAS,
-+	BTREE_INSERT_NEED_JOURNAL_RES,
-+};
-+
-+enum btree_gc_coalesce_fail_reason {
-+	BTREE_GC_COALESCE_FAIL_RESERVE_GET,
-+	BTREE_GC_COALESCE_FAIL_KEYLIST_REALLOC,
-+	BTREE_GC_COALESCE_FAIL_FORMAT_FITS,
-+};
-+
-+enum btree_node_sibling {
-+	btree_prev_sib,
-+	btree_next_sib,
-+};
-+
-+typedef struct btree_nr_keys (*sort_fix_overlapping_fn)(struct bset *,
-+							struct btree *,
-+							struct btree_node_iter *);
-+
-+#endif /* _BCACHEFS_BTREE_TYPES_H */
-diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h
-new file mode 100644
-index 000000000000..e0b1bde37484
---- /dev/null
-+++ b/fs/bcachefs/btree_update.h
-@@ -0,0 +1,144 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BTREE_UPDATE_H
-+#define _BCACHEFS_BTREE_UPDATE_H
-+
-+#include "btree_iter.h"
-+#include "journal.h"
-+
-+struct bch_fs;
-+struct btree;
-+
-+void bch2_btree_node_lock_for_insert(struct bch_fs *, struct btree *,
-+				     struct btree_iter *);
-+bool bch2_btree_bset_insert_key(struct btree_iter *, struct btree *,
-+				struct btree_node_iter *, struct bkey_i *);
-+void bch2_btree_add_journal_pin(struct bch_fs *, struct btree *, u64);
-+
-+enum btree_insert_flags {
-+	__BTREE_INSERT_NOUNLOCK,
-+	__BTREE_INSERT_NOFAIL,
-+	__BTREE_INSERT_NOCHECK_RW,
-+	__BTREE_INSERT_LAZY_RW,
-+	__BTREE_INSERT_USE_RESERVE,
-+	__BTREE_INSERT_USE_ALLOC_RESERVE,
-+	__BTREE_INSERT_JOURNAL_REPLAY,
-+	__BTREE_INSERT_JOURNAL_RESERVED,
-+	__BTREE_INSERT_JOURNAL_RECLAIM,
-+	__BTREE_INSERT_NOWAIT,
-+	__BTREE_INSERT_GC_LOCK_HELD,
-+	__BCH_HASH_SET_MUST_CREATE,
-+	__BCH_HASH_SET_MUST_REPLACE,
-+};
-+
-+/*
-+ * Don't drop locks _after_ successfully updating btree:
-+ */
-+#define BTREE_INSERT_NOUNLOCK		(1 << __BTREE_INSERT_NOUNLOCK)
-+
-+/* Don't check for -ENOSPC: */
-+#define BTREE_INSERT_NOFAIL		(1 << __BTREE_INSERT_NOFAIL)
-+
-+#define BTREE_INSERT_NOCHECK_RW		(1 << __BTREE_INSERT_NOCHECK_RW)
-+#define BTREE_INSERT_LAZY_RW		(1 << __BTREE_INSERT_LAZY_RW)
-+
-+/* for copygc, or when merging btree nodes */
-+#define BTREE_INSERT_USE_RESERVE	(1 << __BTREE_INSERT_USE_RESERVE)
-+#define BTREE_INSERT_USE_ALLOC_RESERVE	(1 << __BTREE_INSERT_USE_ALLOC_RESERVE)
-+
-+/* Insert is for journal replay - don't get journal reservations: */
-+#define BTREE_INSERT_JOURNAL_REPLAY	(1 << __BTREE_INSERT_JOURNAL_REPLAY)
-+
-+/* Indicates that we have pre-reserved space in the journal: */
-+#define BTREE_INSERT_JOURNAL_RESERVED	(1 << __BTREE_INSERT_JOURNAL_RESERVED)
-+
-+/* Insert is being called from journal reclaim path: */
-+#define BTREE_INSERT_JOURNAL_RECLAIM (1 << __BTREE_INSERT_JOURNAL_RECLAIM)
-+
-+/* Don't block on allocation failure (for new btree nodes: */
-+#define BTREE_INSERT_NOWAIT		(1 << __BTREE_INSERT_NOWAIT)
-+#define BTREE_INSERT_GC_LOCK_HELD	(1 << __BTREE_INSERT_GC_LOCK_HELD)
-+
-+#define BCH_HASH_SET_MUST_CREATE	(1 << __BCH_HASH_SET_MUST_CREATE)
-+#define BCH_HASH_SET_MUST_REPLACE	(1 << __BCH_HASH_SET_MUST_REPLACE)
-+
-+int bch2_btree_delete_at(struct btree_trans *, struct btree_iter *, unsigned);
-+
-+int __bch2_btree_insert(struct btree_trans *, enum btree_id, struct bkey_i *);
-+int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *,
-+		     struct disk_reservation *, u64 *, int flags);
-+
-+int bch2_btree_delete_at_range(struct btree_trans *, struct btree_iter *,
-+			       struct bpos, u64 *);
-+int bch2_btree_delete_range(struct bch_fs *, enum btree_id,
-+			    struct bpos, struct bpos, u64 *);
-+
-+int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *,
-+			    __le64, unsigned);
-+int bch2_btree_node_update_key(struct bch_fs *, struct btree_iter *,
-+			       struct btree *, struct bkey_i *);
-+
-+int bch2_trans_update(struct btree_trans *, struct btree_iter *,
-+		      struct bkey_i *, enum btree_trigger_flags);
-+int __bch2_trans_commit(struct btree_trans *);
-+
-+/**
-+ * bch2_trans_commit - insert keys at given iterator positions
-+ *
-+ * This is main entry point for btree updates.
-+ *
-+ * Return values:
-+ * -EINTR: locking changed, this function should be called again.
-+ * -EROFS: filesystem read only
-+ * -EIO: journal or btree node IO error
-+ */
-+static inline int bch2_trans_commit(struct btree_trans *trans,
-+				    struct disk_reservation *disk_res,
-+				    u64 *journal_seq,
-+				    unsigned flags)
-+{
-+	trans->disk_res		= disk_res;
-+	trans->journal_seq	= journal_seq;
-+	trans->flags		= flags;
-+
-+	return __bch2_trans_commit(trans);
-+}
-+
-+#define __bch2_trans_do(_trans, _disk_res, _journal_seq, _flags, _do)	\
-+({									\
-+	int _ret;							\
-+									\
-+	while (1) {							\
-+		_ret = (_do) ?:	bch2_trans_commit(_trans, (_disk_res),	\
-+					(_journal_seq), (_flags));	\
-+		if (_ret != -EINTR)					\
-+			break;						\
-+		bch2_trans_reset(_trans, 0);				\
-+	}								\
-+									\
-+	_ret;								\
-+})
-+
-+#define bch2_trans_do(_c, _disk_res, _journal_seq, _flags, _do)		\
-+({									\
-+	struct btree_trans trans;					\
-+	int _ret, _ret2;						\
-+									\
-+	bch2_trans_init(&trans, (_c), 0, 0);				\
-+	_ret = __bch2_trans_do(&trans, _disk_res, _journal_seq, _flags,	\
-+			       _do);					\
-+	_ret2 = bch2_trans_exit(&trans);				\
-+									\
-+	_ret ?: _ret2;							\
-+})
-+
-+#define trans_for_each_update(_trans, _i)				\
-+	for ((_i) = (_trans)->updates;					\
-+	     (_i) < (_trans)->updates + (_trans)->nr_updates;		\
-+	     (_i)++)
-+
-+#define trans_for_each_update2(_trans, _i)				\
-+	for ((_i) = (_trans)->updates2;					\
-+	     (_i) < (_trans)->updates2 + (_trans)->nr_updates2;		\
-+	     (_i)++)
-+
-+#endif /* _BCACHEFS_BTREE_UPDATE_H */
-diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
-new file mode 100644
-index 000000000000..b41916f93c9b
---- /dev/null
-+++ b/fs/bcachefs/btree_update_interior.c
-@@ -0,0 +1,2076 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "alloc_foreground.h"
-+#include "bkey_methods.h"
-+#include "btree_cache.h"
-+#include "btree_gc.h"
-+#include "btree_update.h"
-+#include "btree_update_interior.h"
-+#include "btree_io.h"
-+#include "btree_iter.h"
-+#include "btree_locking.h"
-+#include "buckets.h"
-+#include "extents.h"
-+#include "journal.h"
-+#include "journal_reclaim.h"
-+#include "keylist.h"
-+#include "replicas.h"
-+#include "super-io.h"
-+
-+#include <linux/random.h>
-+#include <trace/events/bcachefs.h>
-+
-+/* Debug code: */
-+
-+/*
-+ * Verify that child nodes correctly span parent node's range:
-+ */
-+static void btree_node_interior_verify(struct btree *b)
-+{
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	struct bpos next_node = b->data->min_key;
-+	struct btree_node_iter iter;
-+	struct bkey_s_c k;
-+	struct bkey_s_c_btree_ptr_v2 bp;
-+	struct bkey unpacked;
-+
-+	BUG_ON(!b->c.level);
-+
-+	bch2_btree_node_iter_init_from_start(&iter, b);
-+
-+	while (1) {
-+		k = bch2_btree_node_iter_peek_unpack(&iter, b, &unpacked);
-+		if (k.k->type != KEY_TYPE_btree_ptr_v2)
-+			break;
-+		bp = bkey_s_c_to_btree_ptr_v2(k);
-+
-+		BUG_ON(bkey_cmp(next_node, bp.v->min_key));
-+
-+		bch2_btree_node_iter_advance(&iter, b);
-+
-+		if (bch2_btree_node_iter_end(&iter)) {
-+			BUG_ON(bkey_cmp(k.k->p, b->key.k.p));
-+			break;
-+		}
-+
-+		next_node = bkey_successor(k.k->p);
-+	}
-+#endif
-+}
-+
-+/* Calculate ideal packed bkey format for new btree nodes: */
-+
-+void __bch2_btree_calc_format(struct bkey_format_state *s, struct btree *b)
-+{
-+	struct bkey_packed *k;
-+	struct bset_tree *t;
-+	struct bkey uk;
-+
-+	bch2_bkey_format_add_pos(s, b->data->min_key);
-+
-+	for_each_bset(b, t)
-+		bset_tree_for_each_key(b, t, k)
-+			if (!bkey_whiteout(k)) {
-+				uk = bkey_unpack_key(b, k);
-+				bch2_bkey_format_add_key(s, &uk);
-+			}
-+}
-+
-+static struct bkey_format bch2_btree_calc_format(struct btree *b)
-+{
-+	struct bkey_format_state s;
-+
-+	bch2_bkey_format_init(&s);
-+	__bch2_btree_calc_format(&s, b);
-+
-+	return bch2_bkey_format_done(&s);
-+}
-+
-+static size_t btree_node_u64s_with_format(struct btree *b,
-+					  struct bkey_format *new_f)
-+{
-+	struct bkey_format *old_f = &b->format;
-+
-+	/* stupid integer promotion rules */
-+	ssize_t delta =
-+	    (((int) new_f->key_u64s - old_f->key_u64s) *
-+	     (int) b->nr.packed_keys) +
-+	    (((int) new_f->key_u64s - BKEY_U64s) *
-+	     (int) b->nr.unpacked_keys);
-+
-+	BUG_ON(delta + b->nr.live_u64s < 0);
-+
-+	return b->nr.live_u64s + delta;
-+}
-+
-+/**
-+ * btree_node_format_fits - check if we could rewrite node with a new format
-+ *
-+ * This assumes all keys can pack with the new format -- it just checks if
-+ * the re-packed keys would fit inside the node itself.
-+ */
-+bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *b,
-+				 struct bkey_format *new_f)
-+{
-+	size_t u64s = btree_node_u64s_with_format(b, new_f);
-+
-+	return __vstruct_bytes(struct btree_node, u64s) < btree_bytes(c);
-+}
-+
-+/* Btree node freeing/allocation: */
-+
-+static void __btree_node_free(struct bch_fs *c, struct btree *b)
-+{
-+	trace_btree_node_free(c, b);
-+
-+	BUG_ON(btree_node_dirty(b));
-+	BUG_ON(btree_node_need_write(b));
-+	BUG_ON(b == btree_node_root(c, b));
-+	BUG_ON(b->ob.nr);
-+	BUG_ON(!list_empty(&b->write_blocked));
-+	BUG_ON(b->will_make_reachable);
-+
-+	clear_btree_node_noevict(b);
-+
-+	bch2_btree_node_hash_remove(&c->btree_cache, b);
-+
-+	six_lock_wakeup_all(&b->c.lock);
-+
-+	mutex_lock(&c->btree_cache.lock);
-+	list_move(&b->list, &c->btree_cache.freeable);
-+	mutex_unlock(&c->btree_cache.lock);
-+}
-+
-+void bch2_btree_node_free_never_inserted(struct bch_fs *c, struct btree *b)
-+{
-+	struct open_buckets ob = b->ob;
-+
-+	b->ob.nr = 0;
-+
-+	clear_btree_node_dirty(b);
-+
-+	btree_node_lock_type(c, b, SIX_LOCK_write);
-+	__btree_node_free(c, b);
-+	six_unlock_write(&b->c.lock);
-+
-+	bch2_open_buckets_put(c, &ob);
-+}
-+
-+void bch2_btree_node_free_inmem(struct bch_fs *c, struct btree *b,
-+				struct btree_iter *iter)
-+{
-+	struct btree_iter *linked;
-+
-+	trans_for_each_iter(iter->trans, linked)
-+		BUG_ON(linked->l[b->c.level].b == b);
-+
-+	six_lock_write(&b->c.lock, NULL, NULL);
-+	__btree_node_free(c, b);
-+	six_unlock_write(&b->c.lock);
-+	six_unlock_intent(&b->c.lock);
-+}
-+
-+static struct btree *__bch2_btree_node_alloc(struct bch_fs *c,
-+					     struct disk_reservation *res,
-+					     struct closure *cl,
-+					     unsigned flags)
-+{
-+	struct write_point *wp;
-+	struct btree *b;
-+	BKEY_PADDED(k) tmp;
-+	struct open_buckets ob = { .nr = 0 };
-+	struct bch_devs_list devs_have = (struct bch_devs_list) { 0 };
-+	unsigned nr_reserve;
-+	enum alloc_reserve alloc_reserve;
-+
-+	if (flags & BTREE_INSERT_USE_ALLOC_RESERVE) {
-+		nr_reserve	= 0;
-+		alloc_reserve	= RESERVE_ALLOC;
-+	} else if (flags & BTREE_INSERT_USE_RESERVE) {
-+		nr_reserve	= BTREE_NODE_RESERVE / 2;
-+		alloc_reserve	= RESERVE_BTREE;
-+	} else {
-+		nr_reserve	= BTREE_NODE_RESERVE;
-+		alloc_reserve	= RESERVE_NONE;
-+	}
-+
-+	mutex_lock(&c->btree_reserve_cache_lock);
-+	if (c->btree_reserve_cache_nr > nr_reserve) {
-+		struct btree_alloc *a =
-+			&c->btree_reserve_cache[--c->btree_reserve_cache_nr];
-+
-+		ob = a->ob;
-+		bkey_copy(&tmp.k, &a->k);
-+		mutex_unlock(&c->btree_reserve_cache_lock);
-+		goto mem_alloc;
-+	}
-+	mutex_unlock(&c->btree_reserve_cache_lock);
-+
-+retry:
-+	wp = bch2_alloc_sectors_start(c, c->opts.foreground_target, 0,
-+				      writepoint_ptr(&c->btree_write_point),
-+				      &devs_have,
-+				      res->nr_replicas,
-+				      c->opts.metadata_replicas_required,
-+				      alloc_reserve, 0, cl);
-+	if (IS_ERR(wp))
-+		return ERR_CAST(wp);
-+
-+	if (wp->sectors_free < c->opts.btree_node_size) {
-+		struct open_bucket *ob;
-+		unsigned i;
-+
-+		open_bucket_for_each(c, &wp->ptrs, ob, i)
-+			if (ob->sectors_free < c->opts.btree_node_size)
-+				ob->sectors_free = 0;
-+
-+		bch2_alloc_sectors_done(c, wp);
-+		goto retry;
-+	}
-+
-+	if (c->sb.features & (1ULL << BCH_FEATURE_btree_ptr_v2))
-+		bkey_btree_ptr_v2_init(&tmp.k);
-+	else
-+		bkey_btree_ptr_init(&tmp.k);
-+
-+	bch2_alloc_sectors_append_ptrs(c, wp, &tmp.k, c->opts.btree_node_size);
-+
-+	bch2_open_bucket_get(c, wp, &ob);
-+	bch2_alloc_sectors_done(c, wp);
-+mem_alloc:
-+	b = bch2_btree_node_mem_alloc(c);
-+
-+	/* we hold cannibalize_lock: */
-+	BUG_ON(IS_ERR(b));
-+	BUG_ON(b->ob.nr);
-+
-+	bkey_copy(&b->key, &tmp.k);
-+	b->ob = ob;
-+
-+	return b;
-+}
-+
-+static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned level)
-+{
-+	struct bch_fs *c = as->c;
-+	struct btree *b;
-+	int ret;
-+
-+	BUG_ON(level >= BTREE_MAX_DEPTH);
-+	BUG_ON(!as->nr_prealloc_nodes);
-+
-+	b = as->prealloc_nodes[--as->nr_prealloc_nodes];
-+
-+	set_btree_node_accessed(b);
-+	set_btree_node_dirty(b);
-+	set_btree_node_need_write(b);
-+
-+	bch2_bset_init_first(b, &b->data->keys);
-+	b->c.level	= level;
-+	b->c.btree_id	= as->btree_id;
-+
-+	memset(&b->nr, 0, sizeof(b->nr));
-+	b->data->magic = cpu_to_le64(bset_magic(c));
-+	b->data->flags = 0;
-+	SET_BTREE_NODE_ID(b->data, as->btree_id);
-+	SET_BTREE_NODE_LEVEL(b->data, level);
-+	b->data->ptr = bch2_bkey_ptrs_c(bkey_i_to_s_c(&b->key)).start->ptr;
-+
-+	if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
-+		struct bkey_i_btree_ptr_v2 *bp = bkey_i_to_btree_ptr_v2(&b->key);
-+
-+		bp->v.mem_ptr		= 0;
-+		bp->v.seq		= b->data->keys.seq;
-+		bp->v.sectors_written	= 0;
-+		bp->v.sectors		= cpu_to_le16(c->opts.btree_node_size);
-+	}
-+
-+	if (c->sb.features & (1ULL << BCH_FEATURE_new_extent_overwrite))
-+		SET_BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data, true);
-+
-+	if (btree_node_is_extents(b) &&
-+	    !BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data)) {
-+		set_btree_node_old_extent_overwrite(b);
-+		set_btree_node_need_rewrite(b);
-+	}
-+
-+	bch2_btree_build_aux_trees(b);
-+
-+	ret = bch2_btree_node_hash_insert(&c->btree_cache, b, level, as->btree_id);
-+	BUG_ON(ret);
-+
-+	trace_btree_node_alloc(c, b);
-+	return b;
-+}
-+
-+static void btree_set_min(struct btree *b, struct bpos pos)
-+{
-+	if (b->key.k.type == KEY_TYPE_btree_ptr_v2)
-+		bkey_i_to_btree_ptr_v2(&b->key)->v.min_key = pos;
-+	b->data->min_key = pos;
-+}
-+
-+static void btree_set_max(struct btree *b, struct bpos pos)
-+{
-+	b->key.k.p = pos;
-+	b->data->max_key = pos;
-+}
-+
-+struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *as,
-+						  struct btree *b,
-+						  struct bkey_format format)
-+{
-+	struct btree *n;
-+
-+	n = bch2_btree_node_alloc(as, b->c.level);
-+
-+	SET_BTREE_NODE_SEQ(n->data, BTREE_NODE_SEQ(b->data) + 1);
-+
-+	btree_set_min(n, b->data->min_key);
-+	btree_set_max(n, b->data->max_key);
-+
-+	n->data->format		= format;
-+	btree_node_set_format(n, format);
-+
-+	bch2_btree_sort_into(as->c, n, b);
-+
-+	btree_node_reset_sib_u64s(n);
-+
-+	n->key.k.p = b->key.k.p;
-+	return n;
-+}
-+
-+static struct btree *bch2_btree_node_alloc_replacement(struct btree_update *as,
-+						       struct btree *b)
-+{
-+	struct bkey_format new_f = bch2_btree_calc_format(b);
-+
-+	/*
-+	 * The keys might expand with the new format - if they wouldn't fit in
-+	 * the btree node anymore, use the old format for now:
-+	 */
-+	if (!bch2_btree_node_format_fits(as->c, b, &new_f))
-+		new_f = b->format;
-+
-+	return __bch2_btree_node_alloc_replacement(as, b, new_f);
-+}
-+
-+static struct btree *__btree_root_alloc(struct btree_update *as, unsigned level)
-+{
-+	struct btree *b = bch2_btree_node_alloc(as, level);
-+
-+	btree_set_min(b, POS_MIN);
-+	btree_set_max(b, POS_MAX);
-+	b->data->format = bch2_btree_calc_format(b);
-+
-+	btree_node_set_format(b, b->data->format);
-+	bch2_btree_build_aux_trees(b);
-+
-+	bch2_btree_update_add_new_node(as, b);
-+	six_unlock_write(&b->c.lock);
-+
-+	return b;
-+}
-+
-+static void bch2_btree_reserve_put(struct btree_update *as)
-+{
-+	struct bch_fs *c = as->c;
-+
-+	mutex_lock(&c->btree_reserve_cache_lock);
-+
-+	while (as->nr_prealloc_nodes) {
-+		struct btree *b = as->prealloc_nodes[--as->nr_prealloc_nodes];
-+
-+		six_unlock_write(&b->c.lock);
-+
-+		if (c->btree_reserve_cache_nr <
-+		    ARRAY_SIZE(c->btree_reserve_cache)) {
-+			struct btree_alloc *a =
-+				&c->btree_reserve_cache[c->btree_reserve_cache_nr++];
-+
-+			a->ob = b->ob;
-+			b->ob.nr = 0;
-+			bkey_copy(&a->k, &b->key);
-+		} else {
-+			bch2_open_buckets_put(c, &b->ob);
-+		}
-+
-+		btree_node_lock_type(c, b, SIX_LOCK_write);
-+		__btree_node_free(c, b);
-+		six_unlock_write(&b->c.lock);
-+
-+		six_unlock_intent(&b->c.lock);
-+	}
-+
-+	mutex_unlock(&c->btree_reserve_cache_lock);
-+}
-+
-+static int bch2_btree_reserve_get(struct btree_update *as, unsigned nr_nodes,
-+				  unsigned flags, struct closure *cl)
-+{
-+	struct bch_fs *c = as->c;
-+	struct btree *b;
-+	int ret;
-+
-+	BUG_ON(nr_nodes > BTREE_RESERVE_MAX);
-+
-+	/*
-+	 * Protects reaping from the btree node cache and using the btree node
-+	 * open bucket reserve:
-+	 */
-+	ret = bch2_btree_cache_cannibalize_lock(c, cl);
-+	if (ret)
-+		return ret;
-+
-+	while (as->nr_prealloc_nodes < nr_nodes) {
-+		b = __bch2_btree_node_alloc(c, &as->disk_res,
-+					    flags & BTREE_INSERT_NOWAIT
-+					    ? NULL : cl, flags);
-+		if (IS_ERR(b)) {
-+			ret = PTR_ERR(b);
-+			goto err_free;
-+		}
-+
-+		ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&b->key));
-+		if (ret)
-+			goto err_free;
-+
-+		as->prealloc_nodes[as->nr_prealloc_nodes++] = b;
-+	}
-+
-+	bch2_btree_cache_cannibalize_unlock(c);
-+	return 0;
-+err_free:
-+	bch2_btree_cache_cannibalize_unlock(c);
-+	trace_btree_reserve_get_fail(c, nr_nodes, cl);
-+	return ret;
-+}
-+
-+/* Asynchronous interior node update machinery */
-+
-+static void bch2_btree_update_free(struct btree_update *as)
-+{
-+	struct bch_fs *c = as->c;
-+
-+	bch2_journal_preres_put(&c->journal, &as->journal_preres);
-+
-+	bch2_journal_pin_drop(&c->journal, &as->journal);
-+	bch2_journal_pin_flush(&c->journal, &as->journal);
-+	bch2_disk_reservation_put(c, &as->disk_res);
-+	bch2_btree_reserve_put(as);
-+
-+	mutex_lock(&c->btree_interior_update_lock);
-+	list_del(&as->unwritten_list);
-+	list_del(&as->list);
-+	mutex_unlock(&c->btree_interior_update_lock);
-+
-+	closure_debug_destroy(&as->cl);
-+	mempool_free(as, &c->btree_interior_update_pool);
-+
-+	closure_wake_up(&c->btree_interior_update_wait);
-+}
-+
-+static void btree_update_will_delete_key(struct btree_update *as,
-+					 struct bkey_i *k)
-+{
-+	BUG_ON(bch2_keylist_u64s(&as->old_keys) + k->k.u64s >
-+	       ARRAY_SIZE(as->_old_keys));
-+	bch2_keylist_add(&as->old_keys, k);
-+}
-+
-+static void btree_update_will_add_key(struct btree_update *as,
-+				      struct bkey_i *k)
-+{
-+	BUG_ON(bch2_keylist_u64s(&as->new_keys) + k->k.u64s >
-+	       ARRAY_SIZE(as->_new_keys));
-+	bch2_keylist_add(&as->new_keys, k);
-+}
-+
-+/*
-+ * The transactional part of an interior btree node update, where we journal the
-+ * update we did to the interior node and update alloc info:
-+ */
-+static int btree_update_nodes_written_trans(struct btree_trans *trans,
-+					    struct btree_update *as)
-+{
-+	struct bkey_i *k;
-+	int ret;
-+
-+	trans->extra_journal_entries = (void *) &as->journal_entries[0];
-+	trans->extra_journal_entry_u64s = as->journal_u64s;
-+	trans->journal_pin = &as->journal;
-+
-+	for_each_keylist_key(&as->new_keys, k) {
-+		ret = bch2_trans_mark_key(trans, bkey_i_to_s_c(k),
-+					  0, 0, BTREE_TRIGGER_INSERT);
-+		if (ret)
-+			return ret;
-+	}
-+
-+	for_each_keylist_key(&as->old_keys, k) {
-+		ret = bch2_trans_mark_key(trans, bkey_i_to_s_c(k),
-+					  0, 0, BTREE_TRIGGER_OVERWRITE);
-+		if (ret)
-+			return ret;
-+	}
-+
-+	return 0;
-+}
-+
-+static void btree_update_nodes_written(struct btree_update *as)
-+{
-+	struct bch_fs *c = as->c;
-+	struct btree *b = as->b;
-+	u64 journal_seq = 0;
-+	unsigned i;
-+	int ret;
-+
-+	/*
-+	 * We did an update to a parent node where the pointers we added pointed
-+	 * to child nodes that weren't written yet: now, the child nodes have
-+	 * been written so we can write out the update to the interior node.
-+	 */
-+
-+	/*
-+	 * We can't call into journal reclaim here: we'd block on the journal
-+	 * reclaim lock, but we may need to release the open buckets we have
-+	 * pinned in order for other btree updates to make forward progress, and
-+	 * journal reclaim does btree updates when flushing bkey_cached entries,
-+	 * which may require allocations as well.
-+	 */
-+	ret = bch2_trans_do(c, &as->disk_res, &journal_seq,
-+			    BTREE_INSERT_NOFAIL|
-+			    BTREE_INSERT_USE_RESERVE|
-+			    BTREE_INSERT_USE_ALLOC_RESERVE|
-+			    BTREE_INSERT_NOCHECK_RW|
-+			    BTREE_INSERT_JOURNAL_RECLAIM|
-+			    BTREE_INSERT_JOURNAL_RESERVED,
-+			    btree_update_nodes_written_trans(&trans, as));
-+	BUG_ON(ret && !bch2_journal_error(&c->journal));
-+
-+	if (b) {
-+		/*
-+		 * @b is the node we did the final insert into:
-+		 *
-+		 * On failure to get a journal reservation, we still have to
-+		 * unblock the write and allow most of the write path to happen
-+		 * so that shutdown works, but the i->journal_seq mechanism
-+		 * won't work to prevent the btree write from being visible (we
-+		 * didn't get a journal sequence number) - instead
-+		 * __bch2_btree_node_write() doesn't do the actual write if
-+		 * we're in journal error state:
-+		 */
-+
-+		btree_node_lock_type(c, b, SIX_LOCK_intent);
-+		btree_node_lock_type(c, b, SIX_LOCK_write);
-+		mutex_lock(&c->btree_interior_update_lock);
-+
-+		list_del(&as->write_blocked_list);
-+
-+		if (!ret && as->b == b) {
-+			struct bset *i = btree_bset_last(b);
-+
-+			BUG_ON(!b->c.level);
-+			BUG_ON(!btree_node_dirty(b));
-+
-+			i->journal_seq = cpu_to_le64(
-+				max(journal_seq,
-+				    le64_to_cpu(i->journal_seq)));
-+
-+			bch2_btree_add_journal_pin(c, b, journal_seq);
-+		}
-+
-+		mutex_unlock(&c->btree_interior_update_lock);
-+		six_unlock_write(&b->c.lock);
-+
-+		btree_node_write_if_need(c, b, SIX_LOCK_intent);
-+		six_unlock_intent(&b->c.lock);
-+	}
-+
-+	bch2_journal_pin_drop(&c->journal, &as->journal);
-+
-+	bch2_journal_preres_put(&c->journal, &as->journal_preres);
-+
-+	mutex_lock(&c->btree_interior_update_lock);
-+	for (i = 0; i < as->nr_new_nodes; i++) {
-+		b = as->new_nodes[i];
-+
-+		BUG_ON(b->will_make_reachable != (unsigned long) as);
-+		b->will_make_reachable = 0;
-+	}
-+	mutex_unlock(&c->btree_interior_update_lock);
-+
-+	for (i = 0; i < as->nr_new_nodes; i++) {
-+		b = as->new_nodes[i];
-+
-+		btree_node_lock_type(c, b, SIX_LOCK_read);
-+		btree_node_write_if_need(c, b, SIX_LOCK_read);
-+		six_unlock_read(&b->c.lock);
-+	}
-+
-+	for (i = 0; i < as->nr_open_buckets; i++)
-+		bch2_open_bucket_put(c, c->open_buckets + as->open_buckets[i]);
-+
-+	bch2_btree_update_free(as);
-+}
-+
-+static void btree_interior_update_work(struct work_struct *work)
-+{
-+	struct bch_fs *c =
-+		container_of(work, struct bch_fs, btree_interior_update_work);
-+	struct btree_update *as;
-+
-+	while (1) {
-+		mutex_lock(&c->btree_interior_update_lock);
-+		as = list_first_entry_or_null(&c->btree_interior_updates_unwritten,
-+					      struct btree_update, unwritten_list);
-+		if (as && !as->nodes_written)
-+			as = NULL;
-+		mutex_unlock(&c->btree_interior_update_lock);
-+
-+		if (!as)
-+			break;
-+
-+		btree_update_nodes_written(as);
-+	}
-+}
-+
-+static void btree_update_set_nodes_written(struct closure *cl)
-+{
-+	struct btree_update *as = container_of(cl, struct btree_update, cl);
-+	struct bch_fs *c = as->c;
-+
-+	mutex_lock(&c->btree_interior_update_lock);
-+	as->nodes_written = true;
-+	mutex_unlock(&c->btree_interior_update_lock);
-+
-+	queue_work(c->btree_interior_update_worker, &c->btree_interior_update_work);
-+}
-+
-+/*
-+ * We're updating @b with pointers to nodes that haven't finished writing yet:
-+ * block @b from being written until @as completes
-+ */
-+static void btree_update_updated_node(struct btree_update *as, struct btree *b)
-+{
-+	struct bch_fs *c = as->c;
-+
-+	mutex_lock(&c->btree_interior_update_lock);
-+	list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten);
-+
-+	BUG_ON(as->mode != BTREE_INTERIOR_NO_UPDATE);
-+	BUG_ON(!btree_node_dirty(b));
-+
-+	as->mode	= BTREE_INTERIOR_UPDATING_NODE;
-+	as->b		= b;
-+	list_add(&as->write_blocked_list, &b->write_blocked);
-+
-+	mutex_unlock(&c->btree_interior_update_lock);
-+}
-+
-+static void btree_update_reparent(struct btree_update *as,
-+				  struct btree_update *child)
-+{
-+	struct bch_fs *c = as->c;
-+
-+	lockdep_assert_held(&c->btree_interior_update_lock);
-+
-+	child->b = NULL;
-+	child->mode = BTREE_INTERIOR_UPDATING_AS;
-+
-+	/*
-+	 * When we write a new btree root, we have to drop our journal pin
-+	 * _before_ the new nodes are technically reachable; see
-+	 * btree_update_nodes_written().
-+	 *
-+	 * This goes for journal pins that are recursively blocked on us - so,
-+	 * just transfer the journal pin to the new interior update so
-+	 * btree_update_nodes_written() can drop it.
-+	 */
-+	bch2_journal_pin_copy(&c->journal, &as->journal, &child->journal, NULL);
-+	bch2_journal_pin_drop(&c->journal, &child->journal);
-+}
-+
-+static void btree_update_updated_root(struct btree_update *as, struct btree *b)
-+{
-+	struct bkey_i *insert = &b->key;
-+	struct bch_fs *c = as->c;
-+
-+	BUG_ON(as->mode != BTREE_INTERIOR_NO_UPDATE);
-+
-+	BUG_ON(as->journal_u64s + jset_u64s(insert->k.u64s) >
-+	       ARRAY_SIZE(as->journal_entries));
-+
-+	as->journal_u64s +=
-+		journal_entry_set((void *) &as->journal_entries[as->journal_u64s],
-+				  BCH_JSET_ENTRY_btree_root,
-+				  b->c.btree_id, b->c.level,
-+				  insert, insert->k.u64s);
-+
-+	mutex_lock(&c->btree_interior_update_lock);
-+	list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten);
-+
-+	as->mode	= BTREE_INTERIOR_UPDATING_ROOT;
-+	mutex_unlock(&c->btree_interior_update_lock);
-+}
-+
-+/*
-+ * bch2_btree_update_add_new_node:
-+ *
-+ * This causes @as to wait on @b to be written, before it gets to
-+ * bch2_btree_update_nodes_written
-+ *
-+ * Additionally, it sets b->will_make_reachable to prevent any additional writes
-+ * to @b from happening besides the first until @b is reachable on disk
-+ *
-+ * And it adds @b to the list of @as's new nodes, so that we can update sector
-+ * counts in bch2_btree_update_nodes_written:
-+ */
-+void bch2_btree_update_add_new_node(struct btree_update *as, struct btree *b)
-+{
-+	struct bch_fs *c = as->c;
-+
-+	closure_get(&as->cl);
-+
-+	mutex_lock(&c->btree_interior_update_lock);
-+	BUG_ON(as->nr_new_nodes >= ARRAY_SIZE(as->new_nodes));
-+	BUG_ON(b->will_make_reachable);
-+
-+	as->new_nodes[as->nr_new_nodes++] = b;
-+	b->will_make_reachable = 1UL|(unsigned long) as;
-+
-+	mutex_unlock(&c->btree_interior_update_lock);
-+
-+	btree_update_will_add_key(as, &b->key);
-+}
-+
-+/*
-+ * returns true if @b was a new node
-+ */
-+static void btree_update_drop_new_node(struct bch_fs *c, struct btree *b)
-+{
-+	struct btree_update *as;
-+	unsigned long v;
-+	unsigned i;
-+
-+	mutex_lock(&c->btree_interior_update_lock);
-+	/*
-+	 * When b->will_make_reachable != 0, it owns a ref on as->cl that's
-+	 * dropped when it gets written by bch2_btree_complete_write - the
-+	 * xchg() is for synchronization with bch2_btree_complete_write:
-+	 */
-+	v = xchg(&b->will_make_reachable, 0);
-+	as = (struct btree_update *) (v & ~1UL);
-+
-+	if (!as) {
-+		mutex_unlock(&c->btree_interior_update_lock);
-+		return;
-+	}
-+
-+	for (i = 0; i < as->nr_new_nodes; i++)
-+		if (as->new_nodes[i] == b)
-+			goto found;
-+
-+	BUG();
-+found:
-+	array_remove_item(as->new_nodes, as->nr_new_nodes, i);
-+	mutex_unlock(&c->btree_interior_update_lock);
-+
-+	if (v & 1)
-+		closure_put(&as->cl);
-+}
-+
-+void bch2_btree_update_get_open_buckets(struct btree_update *as, struct btree *b)
-+{
-+	while (b->ob.nr)
-+		as->open_buckets[as->nr_open_buckets++] =
-+			b->ob.v[--b->ob.nr];
-+}
-+
-+/*
-+ * @b is being split/rewritten: it may have pointers to not-yet-written btree
-+ * nodes and thus outstanding btree_updates - redirect @b's
-+ * btree_updates to point to this btree_update:
-+ */
-+void bch2_btree_interior_update_will_free_node(struct btree_update *as,
-+					       struct btree *b)
-+{
-+	struct bch_fs *c = as->c;
-+	struct btree_update *p, *n;
-+	struct btree_write *w;
-+
-+	set_btree_node_dying(b);
-+
-+	if (btree_node_fake(b))
-+		return;
-+
-+	mutex_lock(&c->btree_interior_update_lock);
-+
-+	/*
-+	 * Does this node have any btree_update operations preventing
-+	 * it from being written?
-+	 *
-+	 * If so, redirect them to point to this btree_update: we can
-+	 * write out our new nodes, but we won't make them visible until those
-+	 * operations complete
-+	 */
-+	list_for_each_entry_safe(p, n, &b->write_blocked, write_blocked_list) {
-+		list_del_init(&p->write_blocked_list);
-+		btree_update_reparent(as, p);
-+
-+		/*
-+		 * for flush_held_btree_writes() waiting on updates to flush or
-+		 * nodes to be writeable:
-+		 */
-+		closure_wake_up(&c->btree_interior_update_wait);
-+	}
-+
-+	clear_btree_node_dirty(b);
-+	clear_btree_node_need_write(b);
-+
-+	/*
-+	 * Does this node have unwritten data that has a pin on the journal?
-+	 *
-+	 * If so, transfer that pin to the btree_update operation -
-+	 * note that if we're freeing multiple nodes, we only need to keep the
-+	 * oldest pin of any of the nodes we're freeing. We'll release the pin
-+	 * when the new nodes are persistent and reachable on disk:
-+	 */
-+	w = btree_current_write(b);
-+	bch2_journal_pin_copy(&c->journal, &as->journal, &w->journal, NULL);
-+	bch2_journal_pin_drop(&c->journal, &w->journal);
-+
-+	w = btree_prev_write(b);
-+	bch2_journal_pin_copy(&c->journal, &as->journal, &w->journal, NULL);
-+	bch2_journal_pin_drop(&c->journal, &w->journal);
-+
-+	mutex_unlock(&c->btree_interior_update_lock);
-+
-+	/*
-+	 * Is this a node that isn't reachable on disk yet?
-+	 *
-+	 * Nodes that aren't reachable yet have writes blocked until they're
-+	 * reachable - now that we've cancelled any pending writes and moved
-+	 * things waiting on that write to wait on this update, we can drop this
-+	 * node from the list of nodes that the other update is making
-+	 * reachable, prior to freeing it:
-+	 */
-+	btree_update_drop_new_node(c, b);
-+
-+	btree_update_will_delete_key(as, &b->key);
-+}
-+
-+void bch2_btree_update_done(struct btree_update *as)
-+{
-+	BUG_ON(as->mode == BTREE_INTERIOR_NO_UPDATE);
-+
-+	bch2_btree_reserve_put(as);
-+
-+	continue_at(&as->cl, btree_update_set_nodes_written, system_freezable_wq);
-+}
-+
-+struct btree_update *
-+bch2_btree_update_start(struct btree_trans *trans, enum btree_id id,
-+			unsigned nr_nodes, unsigned flags,
-+			struct closure *cl)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct btree_update *as;
-+	int disk_res_flags = (flags & BTREE_INSERT_NOFAIL)
-+		? BCH_DISK_RESERVATION_NOFAIL : 0;
-+	int journal_flags = (flags & BTREE_INSERT_JOURNAL_RESERVED)
-+		? JOURNAL_RES_GET_RECLAIM : 0;
-+	int ret = 0;
-+
-+	/*
-+	 * This check isn't necessary for correctness - it's just to potentially
-+	 * prevent us from doing a lot of work that'll end up being wasted:
-+	 */
-+	ret = bch2_journal_error(&c->journal);
-+	if (ret)
-+		return ERR_PTR(ret);
-+
-+	as = mempool_alloc(&c->btree_interior_update_pool, GFP_NOIO);
-+	memset(as, 0, sizeof(*as));
-+	closure_init(&as->cl, NULL);
-+	as->c		= c;
-+	as->mode	= BTREE_INTERIOR_NO_UPDATE;
-+	as->btree_id	= id;
-+	INIT_LIST_HEAD(&as->list);
-+	INIT_LIST_HEAD(&as->unwritten_list);
-+	INIT_LIST_HEAD(&as->write_blocked_list);
-+	bch2_keylist_init(&as->old_keys, as->_old_keys);
-+	bch2_keylist_init(&as->new_keys, as->_new_keys);
-+	bch2_keylist_init(&as->parent_keys, as->inline_keys);
-+
-+	ret = bch2_journal_preres_get(&c->journal, &as->journal_preres,
-+				      BTREE_UPDATE_JOURNAL_RES,
-+				      journal_flags|JOURNAL_RES_GET_NONBLOCK);
-+	if (ret == -EAGAIN) {
-+		if (flags & BTREE_INSERT_NOUNLOCK)
-+			return ERR_PTR(-EINTR);
-+
-+		bch2_trans_unlock(trans);
-+
-+		ret = bch2_journal_preres_get(&c->journal, &as->journal_preres,
-+				BTREE_UPDATE_JOURNAL_RES,
-+				journal_flags);
-+		if (ret)
-+			return ERR_PTR(ret);
-+
-+		if (!bch2_trans_relock(trans)) {
-+			ret = -EINTR;
-+			goto err;
-+		}
-+	}
-+
-+	ret = bch2_disk_reservation_get(c, &as->disk_res,
-+			nr_nodes * c->opts.btree_node_size,
-+			c->opts.metadata_replicas,
-+			disk_res_flags);
-+	if (ret)
-+		goto err;
-+
-+	ret = bch2_btree_reserve_get(as, nr_nodes, flags, cl);
-+	if (ret)
-+		goto err;
-+
-+	mutex_lock(&c->btree_interior_update_lock);
-+	list_add_tail(&as->list, &c->btree_interior_update_list);
-+	mutex_unlock(&c->btree_interior_update_lock);
-+
-+	return as;
-+err:
-+	bch2_btree_update_free(as);
-+	return ERR_PTR(ret);
-+}
-+
-+/* Btree root updates: */
-+
-+static void bch2_btree_set_root_inmem(struct bch_fs *c, struct btree *b)
-+{
-+	/* Root nodes cannot be reaped */
-+	mutex_lock(&c->btree_cache.lock);
-+	list_del_init(&b->list);
-+	mutex_unlock(&c->btree_cache.lock);
-+
-+	mutex_lock(&c->btree_root_lock);
-+	BUG_ON(btree_node_root(c, b) &&
-+	       (b->c.level < btree_node_root(c, b)->c.level ||
-+		!btree_node_dying(btree_node_root(c, b))));
-+
-+	btree_node_root(c, b) = b;
-+	mutex_unlock(&c->btree_root_lock);
-+
-+	bch2_recalc_btree_reserve(c);
-+}
-+
-+/**
-+ * bch_btree_set_root - update the root in memory and on disk
-+ *
-+ * To ensure forward progress, the current task must not be holding any
-+ * btree node write locks. However, you must hold an intent lock on the
-+ * old root.
-+ *
-+ * Note: This allocates a journal entry but doesn't add any keys to
-+ * it.  All the btree roots are part of every journal write, so there
-+ * is nothing new to be done.  This just guarantees that there is a
-+ * journal write.
-+ */
-+static void bch2_btree_set_root(struct btree_update *as, struct btree *b,
-+				struct btree_iter *iter)
-+{
-+	struct bch_fs *c = as->c;
-+	struct btree *old;
-+
-+	trace_btree_set_root(c, b);
-+	BUG_ON(!b->written &&
-+	       !test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags));
-+
-+	old = btree_node_root(c, b);
-+
-+	/*
-+	 * Ensure no one is using the old root while we switch to the
-+	 * new root:
-+	 */
-+	bch2_btree_node_lock_write(old, iter);
-+
-+	bch2_btree_set_root_inmem(c, b);
-+
-+	btree_update_updated_root(as, b);
-+
-+	/*
-+	 * Unlock old root after new root is visible:
-+	 *
-+	 * The new root isn't persistent, but that's ok: we still have
-+	 * an intent lock on the new root, and any updates that would
-+	 * depend on the new root would have to update the new root.
-+	 */
-+	bch2_btree_node_unlock_write(old, iter);
-+}
-+
-+/* Interior node updates: */
-+
-+static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b,
-+					struct btree_iter *iter,
-+					struct bkey_i *insert,
-+					struct btree_node_iter *node_iter)
-+{
-+	struct bkey_packed *k;
-+
-+	BUG_ON(as->journal_u64s + jset_u64s(insert->k.u64s) >
-+	       ARRAY_SIZE(as->journal_entries));
-+
-+	as->journal_u64s +=
-+		journal_entry_set((void *) &as->journal_entries[as->journal_u64s],
-+				  BCH_JSET_ENTRY_btree_keys,
-+				  b->c.btree_id, b->c.level,
-+				  insert, insert->k.u64s);
-+
-+	while ((k = bch2_btree_node_iter_peek_all(node_iter, b)) &&
-+	       bkey_iter_pos_cmp(b, k, &insert->k.p) < 0)
-+		bch2_btree_node_iter_advance(node_iter, b);
-+
-+	bch2_btree_bset_insert_key(iter, b, node_iter, insert);
-+	set_btree_node_dirty(b);
-+	set_btree_node_need_write(b);
-+}
-+
-+/*
-+ * Move keys from n1 (original replacement node, now lower node) to n2 (higher
-+ * node)
-+ */
-+static struct btree *__btree_split_node(struct btree_update *as,
-+					struct btree *n1,
-+					struct btree_iter *iter)
-+{
-+	size_t nr_packed = 0, nr_unpacked = 0;
-+	struct btree *n2;
-+	struct bset *set1, *set2;
-+	struct bkey_packed *k, *prev = NULL;
-+
-+	n2 = bch2_btree_node_alloc(as, n1->c.level);
-+	bch2_btree_update_add_new_node(as, n2);
-+
-+	n2->data->max_key	= n1->data->max_key;
-+	n2->data->format	= n1->format;
-+	SET_BTREE_NODE_SEQ(n2->data, BTREE_NODE_SEQ(n1->data));
-+	n2->key.k.p = n1->key.k.p;
-+
-+	btree_node_set_format(n2, n2->data->format);
-+
-+	set1 = btree_bset_first(n1);
-+	set2 = btree_bset_first(n2);
-+
-+	/*
-+	 * Has to be a linear search because we don't have an auxiliary
-+	 * search tree yet
-+	 */
-+	k = set1->start;
-+	while (1) {
-+		struct bkey_packed *n = bkey_next_skip_noops(k, vstruct_last(set1));
-+
-+		if (n == vstruct_last(set1))
-+			break;
-+		if (k->_data - set1->_data >= (le16_to_cpu(set1->u64s) * 3) / 5)
-+			break;
-+
-+		if (bkey_packed(k))
-+			nr_packed++;
-+		else
-+			nr_unpacked++;
-+
-+		prev = k;
-+		k = n;
-+	}
-+
-+	BUG_ON(!prev);
-+
-+	btree_set_max(n1, bkey_unpack_pos(n1, prev));
-+	btree_set_min(n2, bkey_successor(n1->key.k.p));
-+
-+	set2->u64s = cpu_to_le16((u64 *) vstruct_end(set1) - (u64 *) k);
-+	set1->u64s = cpu_to_le16(le16_to_cpu(set1->u64s) - le16_to_cpu(set2->u64s));
-+
-+	set_btree_bset_end(n1, n1->set);
-+	set_btree_bset_end(n2, n2->set);
-+
-+	n2->nr.live_u64s	= le16_to_cpu(set2->u64s);
-+	n2->nr.bset_u64s[0]	= le16_to_cpu(set2->u64s);
-+	n2->nr.packed_keys	= n1->nr.packed_keys - nr_packed;
-+	n2->nr.unpacked_keys	= n1->nr.unpacked_keys - nr_unpacked;
-+
-+	n1->nr.live_u64s	= le16_to_cpu(set1->u64s);
-+	n1->nr.bset_u64s[0]	= le16_to_cpu(set1->u64s);
-+	n1->nr.packed_keys	= nr_packed;
-+	n1->nr.unpacked_keys	= nr_unpacked;
-+
-+	BUG_ON(!set1->u64s);
-+	BUG_ON(!set2->u64s);
-+
-+	memcpy_u64s(set2->start,
-+		    vstruct_end(set1),
-+		    le16_to_cpu(set2->u64s));
-+
-+	btree_node_reset_sib_u64s(n1);
-+	btree_node_reset_sib_u64s(n2);
-+
-+	bch2_verify_btree_nr_keys(n1);
-+	bch2_verify_btree_nr_keys(n2);
-+
-+	if (n1->c.level) {
-+		btree_node_interior_verify(n1);
-+		btree_node_interior_verify(n2);
-+	}
-+
-+	return n2;
-+}
-+
-+/*
-+ * For updates to interior nodes, we've got to do the insert before we split
-+ * because the stuff we're inserting has to be inserted atomically. Post split,
-+ * the keys might have to go in different nodes and the split would no longer be
-+ * atomic.
-+ *
-+ * Worse, if the insert is from btree node coalescing, if we do the insert after
-+ * we do the split (and pick the pivot) - the pivot we pick might be between
-+ * nodes that were coalesced, and thus in the middle of a child node post
-+ * coalescing:
-+ */
-+static void btree_split_insert_keys(struct btree_update *as, struct btree *b,
-+				    struct btree_iter *iter,
-+				    struct keylist *keys)
-+{
-+	struct btree_node_iter node_iter;
-+	struct bkey_i *k = bch2_keylist_front(keys);
-+	struct bkey_packed *src, *dst, *n;
-+	struct bset *i;
-+
-+	BUG_ON(btree_node_type(b) != BKEY_TYPE_BTREE);
-+
-+	bch2_btree_node_iter_init(&node_iter, b, &k->k.p);
-+
-+	while (!bch2_keylist_empty(keys)) {
-+		k = bch2_keylist_front(keys);
-+
-+		bch2_insert_fixup_btree_ptr(as, b, iter, k, &node_iter);
-+		bch2_keylist_pop_front(keys);
-+	}
-+
-+	/*
-+	 * We can't tolerate whiteouts here - with whiteouts there can be
-+	 * duplicate keys, and it would be rather bad if we picked a duplicate
-+	 * for the pivot:
-+	 */
-+	i = btree_bset_first(b);
-+	src = dst = i->start;
-+	while (src != vstruct_last(i)) {
-+		n = bkey_next_skip_noops(src, vstruct_last(i));
-+		if (!bkey_deleted(src)) {
-+			memmove_u64s_down(dst, src, src->u64s);
-+			dst = bkey_next(dst);
-+		}
-+		src = n;
-+	}
-+
-+	i->u64s = cpu_to_le16((u64 *) dst - i->_data);
-+	set_btree_bset_end(b, b->set);
-+
-+	BUG_ON(b->nsets != 1 ||
-+	       b->nr.live_u64s != le16_to_cpu(btree_bset_first(b)->u64s));
-+
-+	btree_node_interior_verify(b);
-+}
-+
-+static void btree_split(struct btree_update *as, struct btree *b,
-+			struct btree_iter *iter, struct keylist *keys,
-+			unsigned flags)
-+{
-+	struct bch_fs *c = as->c;
-+	struct btree *parent = btree_node_parent(iter, b);
-+	struct btree *n1, *n2 = NULL, *n3 = NULL;
-+	u64 start_time = local_clock();
-+
-+	BUG_ON(!parent && (b != btree_node_root(c, b)));
-+	BUG_ON(!btree_node_intent_locked(iter, btree_node_root(c, b)->c.level));
-+
-+	bch2_btree_interior_update_will_free_node(as, b);
-+
-+	n1 = bch2_btree_node_alloc_replacement(as, b);
-+	bch2_btree_update_add_new_node(as, n1);
-+
-+	if (keys)
-+		btree_split_insert_keys(as, n1, iter, keys);
-+
-+	if (bset_u64s(&n1->set[0]) > BTREE_SPLIT_THRESHOLD(c)) {
-+		trace_btree_split(c, b);
-+
-+		n2 = __btree_split_node(as, n1, iter);
-+
-+		bch2_btree_build_aux_trees(n2);
-+		bch2_btree_build_aux_trees(n1);
-+		six_unlock_write(&n2->c.lock);
-+		six_unlock_write(&n1->c.lock);
-+
-+		bch2_btree_node_write(c, n2, SIX_LOCK_intent);
-+
-+		/*
-+		 * Note that on recursive parent_keys == keys, so we
-+		 * can't start adding new keys to parent_keys before emptying it
-+		 * out (which we did with btree_split_insert_keys() above)
-+		 */
-+		bch2_keylist_add(&as->parent_keys, &n1->key);
-+		bch2_keylist_add(&as->parent_keys, &n2->key);
-+
-+		if (!parent) {
-+			/* Depth increases, make a new root */
-+			n3 = __btree_root_alloc(as, b->c.level + 1);
-+
-+			n3->sib_u64s[0] = U16_MAX;
-+			n3->sib_u64s[1] = U16_MAX;
-+
-+			btree_split_insert_keys(as, n3, iter, &as->parent_keys);
-+
-+			bch2_btree_node_write(c, n3, SIX_LOCK_intent);
-+		}
-+	} else {
-+		trace_btree_compact(c, b);
-+
-+		bch2_btree_build_aux_trees(n1);
-+		six_unlock_write(&n1->c.lock);
-+
-+		if (parent)
-+			bch2_keylist_add(&as->parent_keys, &n1->key);
-+	}
-+
-+	bch2_btree_node_write(c, n1, SIX_LOCK_intent);
-+
-+	/* New nodes all written, now make them visible: */
-+
-+	if (parent) {
-+		/* Split a non root node */
-+		bch2_btree_insert_node(as, parent, iter, &as->parent_keys, flags);
-+	} else if (n3) {
-+		bch2_btree_set_root(as, n3, iter);
-+	} else {
-+		/* Root filled up but didn't need to be split */
-+		bch2_btree_set_root(as, n1, iter);
-+	}
-+
-+	bch2_btree_update_get_open_buckets(as, n1);
-+	if (n2)
-+		bch2_btree_update_get_open_buckets(as, n2);
-+	if (n3)
-+		bch2_btree_update_get_open_buckets(as, n3);
-+
-+	/* Successful split, update the iterator to point to the new nodes: */
-+
-+	six_lock_increment(&b->c.lock, SIX_LOCK_intent);
-+	bch2_btree_iter_node_drop(iter, b);
-+	if (n3)
-+		bch2_btree_iter_node_replace(iter, n3);
-+	if (n2)
-+		bch2_btree_iter_node_replace(iter, n2);
-+	bch2_btree_iter_node_replace(iter, n1);
-+
-+	/*
-+	 * The old node must be freed (in memory) _before_ unlocking the new
-+	 * nodes - else another thread could re-acquire a read lock on the old
-+	 * node after another thread has locked and updated the new node, thus
-+	 * seeing stale data:
-+	 */
-+	bch2_btree_node_free_inmem(c, b, iter);
-+
-+	if (n3)
-+		six_unlock_intent(&n3->c.lock);
-+	if (n2)
-+		six_unlock_intent(&n2->c.lock);
-+	six_unlock_intent(&n1->c.lock);
-+
-+	bch2_btree_trans_verify_locks(iter->trans);
-+
-+	bch2_time_stats_update(&c->times[BCH_TIME_btree_node_split],
-+			       start_time);
-+}
-+
-+static void
-+bch2_btree_insert_keys_interior(struct btree_update *as, struct btree *b,
-+				struct btree_iter *iter, struct keylist *keys)
-+{
-+	struct btree_iter *linked;
-+	struct btree_node_iter node_iter;
-+	struct bkey_i *insert = bch2_keylist_front(keys);
-+	struct bkey_packed *k;
-+
-+	/* Don't screw up @iter's position: */
-+	node_iter = iter->l[b->c.level].iter;
-+
-+	/*
-+	 * btree_split(), btree_gc_coalesce() will insert keys before
-+	 * the iterator's current position - they know the keys go in
-+	 * the node the iterator points to:
-+	 */
-+	while ((k = bch2_btree_node_iter_prev_all(&node_iter, b)) &&
-+	       (bkey_cmp_packed(b, k, &insert->k) >= 0))
-+		;
-+
-+	for_each_keylist_key(keys, insert)
-+		bch2_insert_fixup_btree_ptr(as, b, iter, insert, &node_iter);
-+
-+	btree_update_updated_node(as, b);
-+
-+	trans_for_each_iter_with_node(iter->trans, b, linked)
-+		bch2_btree_node_iter_peek(&linked->l[b->c.level].iter, b);
-+
-+	bch2_btree_trans_verify_iters(iter->trans, b);
-+}
-+
-+/**
-+ * bch_btree_insert_node - insert bkeys into a given btree node
-+ *
-+ * @iter:		btree iterator
-+ * @keys:		list of keys to insert
-+ * @hook:		insert callback
-+ * @persistent:		if not null, @persistent will wait on journal write
-+ *
-+ * Inserts as many keys as it can into a given btree node, splitting it if full.
-+ * If a split occurred, this function will return early. This can only happen
-+ * for leaf nodes -- inserts into interior nodes have to be atomic.
-+ */
-+void bch2_btree_insert_node(struct btree_update *as, struct btree *b,
-+			    struct btree_iter *iter, struct keylist *keys,
-+			    unsigned flags)
-+{
-+	struct bch_fs *c = as->c;
-+	int old_u64s = le16_to_cpu(btree_bset_last(b)->u64s);
-+	int old_live_u64s = b->nr.live_u64s;
-+	int live_u64s_added, u64s_added;
-+
-+	BUG_ON(!btree_node_intent_locked(iter, btree_node_root(c, b)->c.level));
-+	BUG_ON(!b->c.level);
-+	BUG_ON(!as || as->b);
-+	bch2_verify_keylist_sorted(keys);
-+
-+	if (as->must_rewrite)
-+		goto split;
-+
-+	bch2_btree_node_lock_for_insert(c, b, iter);
-+
-+	if (!bch2_btree_node_insert_fits(c, b, bch2_keylist_u64s(keys))) {
-+		bch2_btree_node_unlock_write(b, iter);
-+		goto split;
-+	}
-+
-+	bch2_btree_insert_keys_interior(as, b, iter, keys);
-+
-+	live_u64s_added = (int) b->nr.live_u64s - old_live_u64s;
-+	u64s_added = (int) le16_to_cpu(btree_bset_last(b)->u64s) - old_u64s;
-+
-+	if (b->sib_u64s[0] != U16_MAX && live_u64s_added < 0)
-+		b->sib_u64s[0] = max(0, (int) b->sib_u64s[0] + live_u64s_added);
-+	if (b->sib_u64s[1] != U16_MAX && live_u64s_added < 0)
-+		b->sib_u64s[1] = max(0, (int) b->sib_u64s[1] + live_u64s_added);
-+
-+	if (u64s_added > live_u64s_added &&
-+	    bch2_maybe_compact_whiteouts(c, b))
-+		bch2_btree_iter_reinit_node(iter, b);
-+
-+	bch2_btree_node_unlock_write(b, iter);
-+
-+	btree_node_interior_verify(b);
-+
-+	/*
-+	 * when called from the btree_split path the new nodes aren't added to
-+	 * the btree iterator yet, so the merge path's unlock/wait/relock dance
-+	 * won't work:
-+	 */
-+	bch2_foreground_maybe_merge(c, iter, b->c.level,
-+				    flags|BTREE_INSERT_NOUNLOCK);
-+	return;
-+split:
-+	btree_split(as, b, iter, keys, flags);
-+}
-+
-+int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter,
-+			  unsigned flags)
-+{
-+	struct btree_trans *trans = iter->trans;
-+	struct btree *b = iter_l(iter)->b;
-+	struct btree_update *as;
-+	struct closure cl;
-+	int ret = 0;
-+	struct btree_insert_entry *i;
-+
-+	/*
-+	 * We already have a disk reservation and open buckets pinned; this
-+	 * allocation must not block:
-+	 */
-+	trans_for_each_update(trans, i)
-+		if (btree_node_type_needs_gc(i->iter->btree_id))
-+			flags |= BTREE_INSERT_USE_RESERVE;
-+
-+	closure_init_stack(&cl);
-+
-+	/* Hack, because gc and splitting nodes doesn't mix yet: */
-+	if (!(flags & BTREE_INSERT_GC_LOCK_HELD) &&
-+	    !down_read_trylock(&c->gc_lock)) {
-+		if (flags & BTREE_INSERT_NOUNLOCK) {
-+			trace_transaction_restart_ip(trans->ip, _THIS_IP_);
-+			return -EINTR;
-+		}
-+
-+		bch2_trans_unlock(trans);
-+		down_read(&c->gc_lock);
-+
-+		if (!bch2_trans_relock(trans))
-+			ret = -EINTR;
-+	}
-+
-+	/*
-+	 * XXX: figure out how far we might need to split,
-+	 * instead of locking/reserving all the way to the root:
-+	 */
-+	if (!bch2_btree_iter_upgrade(iter, U8_MAX)) {
-+		trace_trans_restart_iter_upgrade(trans->ip);
-+		ret = -EINTR;
-+		goto out;
-+	}
-+
-+	as = bch2_btree_update_start(trans, iter->btree_id,
-+		btree_update_reserve_required(c, b), flags,
-+		!(flags & BTREE_INSERT_NOUNLOCK) ? &cl : NULL);
-+	if (IS_ERR(as)) {
-+		ret = PTR_ERR(as);
-+		if (ret == -EAGAIN) {
-+			BUG_ON(flags & BTREE_INSERT_NOUNLOCK);
-+			bch2_trans_unlock(trans);
-+			ret = -EINTR;
-+
-+			trace_transaction_restart_ip(trans->ip, _THIS_IP_);
-+		}
-+		goto out;
-+	}
-+
-+	btree_split(as, b, iter, NULL, flags);
-+	bch2_btree_update_done(as);
-+
-+	/*
-+	 * We haven't successfully inserted yet, so don't downgrade all the way
-+	 * back to read locks;
-+	 */
-+	__bch2_btree_iter_downgrade(iter, 1);
-+out:
-+	if (!(flags & BTREE_INSERT_GC_LOCK_HELD))
-+		up_read(&c->gc_lock);
-+	closure_sync(&cl);
-+	return ret;
-+}
-+
-+void __bch2_foreground_maybe_merge(struct bch_fs *c,
-+				   struct btree_iter *iter,
-+				   unsigned level,
-+				   unsigned flags,
-+				   enum btree_node_sibling sib)
-+{
-+	struct btree_trans *trans = iter->trans;
-+	struct btree_update *as;
-+	struct bkey_format_state new_s;
-+	struct bkey_format new_f;
-+	struct bkey_i delete;
-+	struct btree *b, *m, *n, *prev, *next, *parent;
-+	struct closure cl;
-+	size_t sib_u64s;
-+	int ret = 0;
-+
-+	BUG_ON(!btree_node_locked(iter, level));
-+
-+	closure_init_stack(&cl);
-+retry:
-+	BUG_ON(!btree_node_locked(iter, level));
-+
-+	b = iter->l[level].b;
-+
-+	parent = btree_node_parent(iter, b);
-+	if (!parent)
-+		goto out;
-+
-+	if (b->sib_u64s[sib] > BTREE_FOREGROUND_MERGE_THRESHOLD(c))
-+		goto out;
-+
-+	/* XXX: can't be holding read locks */
-+	m = bch2_btree_node_get_sibling(c, iter, b, sib);
-+	if (IS_ERR(m)) {
-+		ret = PTR_ERR(m);
-+		goto err;
-+	}
-+
-+	/* NULL means no sibling: */
-+	if (!m) {
-+		b->sib_u64s[sib] = U16_MAX;
-+		goto out;
-+	}
-+
-+	if (sib == btree_prev_sib) {
-+		prev = m;
-+		next = b;
-+	} else {
-+		prev = b;
-+		next = m;
-+	}
-+
-+	bch2_bkey_format_init(&new_s);
-+	__bch2_btree_calc_format(&new_s, b);
-+	__bch2_btree_calc_format(&new_s, m);
-+	new_f = bch2_bkey_format_done(&new_s);
-+
-+	sib_u64s = btree_node_u64s_with_format(b, &new_f) +
-+		btree_node_u64s_with_format(m, &new_f);
-+
-+	if (sib_u64s > BTREE_FOREGROUND_MERGE_HYSTERESIS(c)) {
-+		sib_u64s -= BTREE_FOREGROUND_MERGE_HYSTERESIS(c);
-+		sib_u64s /= 2;
-+		sib_u64s += BTREE_FOREGROUND_MERGE_HYSTERESIS(c);
-+	}
-+
-+	sib_u64s = min(sib_u64s, btree_max_u64s(c));
-+	b->sib_u64s[sib] = sib_u64s;
-+
-+	if (b->sib_u64s[sib] > BTREE_FOREGROUND_MERGE_THRESHOLD(c)) {
-+		six_unlock_intent(&m->c.lock);
-+		goto out;
-+	}
-+
-+	/* We're changing btree topology, doesn't mix with gc: */
-+	if (!(flags & BTREE_INSERT_GC_LOCK_HELD) &&
-+	    !down_read_trylock(&c->gc_lock))
-+		goto err_cycle_gc_lock;
-+
-+	if (!bch2_btree_iter_upgrade(iter, U8_MAX)) {
-+		ret = -EINTR;
-+		goto err_unlock;
-+	}
-+
-+	as = bch2_btree_update_start(trans, iter->btree_id,
-+			 btree_update_reserve_required(c, parent) + 1,
-+			 flags|
-+			 BTREE_INSERT_NOFAIL|
-+			 BTREE_INSERT_USE_RESERVE,
-+			 !(flags & BTREE_INSERT_NOUNLOCK) ? &cl : NULL);
-+	if (IS_ERR(as)) {
-+		ret = PTR_ERR(as);
-+		goto err_unlock;
-+	}
-+
-+	trace_btree_merge(c, b);
-+
-+	bch2_btree_interior_update_will_free_node(as, b);
-+	bch2_btree_interior_update_will_free_node(as, m);
-+
-+	n = bch2_btree_node_alloc(as, b->c.level);
-+	bch2_btree_update_add_new_node(as, n);
-+
-+	btree_set_min(n, prev->data->min_key);
-+	btree_set_max(n, next->data->max_key);
-+	n->data->format		= new_f;
-+
-+	btree_node_set_format(n, new_f);
-+
-+	bch2_btree_sort_into(c, n, prev);
-+	bch2_btree_sort_into(c, n, next);
-+
-+	bch2_btree_build_aux_trees(n);
-+	six_unlock_write(&n->c.lock);
-+
-+	bkey_init(&delete.k);
-+	delete.k.p = prev->key.k.p;
-+	bch2_keylist_add(&as->parent_keys, &delete);
-+	bch2_keylist_add(&as->parent_keys, &n->key);
-+
-+	bch2_btree_node_write(c, n, SIX_LOCK_intent);
-+
-+	bch2_btree_insert_node(as, parent, iter, &as->parent_keys, flags);
-+
-+	bch2_btree_update_get_open_buckets(as, n);
-+
-+	six_lock_increment(&b->c.lock, SIX_LOCK_intent);
-+	bch2_btree_iter_node_drop(iter, b);
-+	bch2_btree_iter_node_drop(iter, m);
-+
-+	bch2_btree_iter_node_replace(iter, n);
-+
-+	bch2_btree_trans_verify_iters(trans, n);
-+
-+	bch2_btree_node_free_inmem(c, b, iter);
-+	bch2_btree_node_free_inmem(c, m, iter);
-+
-+	six_unlock_intent(&n->c.lock);
-+
-+	bch2_btree_update_done(as);
-+
-+	if (!(flags & BTREE_INSERT_GC_LOCK_HELD))
-+		up_read(&c->gc_lock);
-+out:
-+	bch2_btree_trans_verify_locks(trans);
-+
-+	/*
-+	 * Don't downgrade locks here: we're called after successful insert,
-+	 * and the caller will downgrade locks after a successful insert
-+	 * anyways (in case e.g. a split was required first)
-+	 *
-+	 * And we're also called when inserting into interior nodes in the
-+	 * split path, and downgrading to read locks in there is potentially
-+	 * confusing:
-+	 */
-+	closure_sync(&cl);
-+	return;
-+
-+err_cycle_gc_lock:
-+	six_unlock_intent(&m->c.lock);
-+
-+	if (flags & BTREE_INSERT_NOUNLOCK)
-+		goto out;
-+
-+	bch2_trans_unlock(trans);
-+
-+	down_read(&c->gc_lock);
-+	up_read(&c->gc_lock);
-+	ret = -EINTR;
-+	goto err;
-+
-+err_unlock:
-+	six_unlock_intent(&m->c.lock);
-+	if (!(flags & BTREE_INSERT_GC_LOCK_HELD))
-+		up_read(&c->gc_lock);
-+err:
-+	BUG_ON(ret == -EAGAIN && (flags & BTREE_INSERT_NOUNLOCK));
-+
-+	if ((ret == -EAGAIN || ret == -EINTR) &&
-+	    !(flags & BTREE_INSERT_NOUNLOCK)) {
-+		bch2_trans_unlock(trans);
-+		closure_sync(&cl);
-+		ret = bch2_btree_iter_traverse(iter);
-+		if (ret)
-+			goto out;
-+
-+		goto retry;
-+	}
-+
-+	goto out;
-+}
-+
-+static int __btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
-+				struct btree *b, unsigned flags,
-+				struct closure *cl)
-+{
-+	struct btree *n, *parent = btree_node_parent(iter, b);
-+	struct btree_update *as;
-+
-+	as = bch2_btree_update_start(iter->trans, iter->btree_id,
-+		(parent
-+		 ? btree_update_reserve_required(c, parent)
-+		 : 0) + 1,
-+		flags, cl);
-+	if (IS_ERR(as)) {
-+		trace_btree_gc_rewrite_node_fail(c, b);
-+		return PTR_ERR(as);
-+	}
-+
-+	bch2_btree_interior_update_will_free_node(as, b);
-+
-+	n = bch2_btree_node_alloc_replacement(as, b);
-+	bch2_btree_update_add_new_node(as, n);
-+
-+	bch2_btree_build_aux_trees(n);
-+	six_unlock_write(&n->c.lock);
-+
-+	trace_btree_gc_rewrite_node(c, b);
-+
-+	bch2_btree_node_write(c, n, SIX_LOCK_intent);
-+
-+	if (parent) {
-+		bch2_keylist_add(&as->parent_keys, &n->key);
-+		bch2_btree_insert_node(as, parent, iter, &as->parent_keys, flags);
-+	} else {
-+		bch2_btree_set_root(as, n, iter);
-+	}
-+
-+	bch2_btree_update_get_open_buckets(as, n);
-+
-+	six_lock_increment(&b->c.lock, SIX_LOCK_intent);
-+	bch2_btree_iter_node_drop(iter, b);
-+	bch2_btree_iter_node_replace(iter, n);
-+	bch2_btree_node_free_inmem(c, b, iter);
-+	six_unlock_intent(&n->c.lock);
-+
-+	bch2_btree_update_done(as);
-+	return 0;
-+}
-+
-+/**
-+ * bch_btree_node_rewrite - Rewrite/move a btree node
-+ *
-+ * Returns 0 on success, -EINTR or -EAGAIN on failure (i.e.
-+ * btree_check_reserve() has to wait)
-+ */
-+int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
-+			    __le64 seq, unsigned flags)
-+{
-+	struct btree_trans *trans = iter->trans;
-+	struct closure cl;
-+	struct btree *b;
-+	int ret;
-+
-+	flags |= BTREE_INSERT_NOFAIL;
-+
-+	closure_init_stack(&cl);
-+
-+	bch2_btree_iter_upgrade(iter, U8_MAX);
-+
-+	if (!(flags & BTREE_INSERT_GC_LOCK_HELD)) {
-+		if (!down_read_trylock(&c->gc_lock)) {
-+			bch2_trans_unlock(trans);
-+			down_read(&c->gc_lock);
-+		}
-+	}
-+
-+	while (1) {
-+		ret = bch2_btree_iter_traverse(iter);
-+		if (ret)
-+			break;
-+
-+		b = bch2_btree_iter_peek_node(iter);
-+		if (!b || b->data->keys.seq != seq)
-+			break;
-+
-+		ret = __btree_node_rewrite(c, iter, b, flags, &cl);
-+		if (ret != -EAGAIN &&
-+		    ret != -EINTR)
-+			break;
-+
-+		bch2_trans_unlock(trans);
-+		closure_sync(&cl);
-+	}
-+
-+	bch2_btree_iter_downgrade(iter);
-+
-+	if (!(flags & BTREE_INSERT_GC_LOCK_HELD))
-+		up_read(&c->gc_lock);
-+
-+	closure_sync(&cl);
-+	return ret;
-+}
-+
-+static void __bch2_btree_node_update_key(struct bch_fs *c,
-+					 struct btree_update *as,
-+					 struct btree_iter *iter,
-+					 struct btree *b, struct btree *new_hash,
-+					 struct bkey_i *new_key)
-+{
-+	struct btree *parent;
-+	int ret;
-+
-+	btree_update_will_delete_key(as, &b->key);
-+	btree_update_will_add_key(as, new_key);
-+
-+	parent = btree_node_parent(iter, b);
-+	if (parent) {
-+		if (new_hash) {
-+			bkey_copy(&new_hash->key, new_key);
-+			ret = bch2_btree_node_hash_insert(&c->btree_cache,
-+					new_hash, b->c.level, b->c.btree_id);
-+			BUG_ON(ret);
-+		}
-+
-+		bch2_keylist_add(&as->parent_keys, new_key);
-+		bch2_btree_insert_node(as, parent, iter, &as->parent_keys, 0);
-+
-+		if (new_hash) {
-+			mutex_lock(&c->btree_cache.lock);
-+			bch2_btree_node_hash_remove(&c->btree_cache, new_hash);
-+
-+			bch2_btree_node_hash_remove(&c->btree_cache, b);
-+
-+			bkey_copy(&b->key, new_key);
-+			ret = __bch2_btree_node_hash_insert(&c->btree_cache, b);
-+			BUG_ON(ret);
-+			mutex_unlock(&c->btree_cache.lock);
-+		} else {
-+			bkey_copy(&b->key, new_key);
-+		}
-+	} else {
-+		BUG_ON(btree_node_root(c, b) != b);
-+
-+		bch2_btree_node_lock_write(b, iter);
-+		bkey_copy(&b->key, new_key);
-+
-+		if (btree_ptr_hash_val(&b->key) != b->hash_val) {
-+			mutex_lock(&c->btree_cache.lock);
-+			bch2_btree_node_hash_remove(&c->btree_cache, b);
-+
-+			ret = __bch2_btree_node_hash_insert(&c->btree_cache, b);
-+			BUG_ON(ret);
-+			mutex_unlock(&c->btree_cache.lock);
-+		}
-+
-+		btree_update_updated_root(as, b);
-+		bch2_btree_node_unlock_write(b, iter);
-+	}
-+
-+	bch2_btree_update_done(as);
-+}
-+
-+int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
-+			       struct btree *b,
-+			       struct bkey_i *new_key)
-+{
-+	struct btree *parent = btree_node_parent(iter, b);
-+	struct btree_update *as = NULL;
-+	struct btree *new_hash = NULL;
-+	struct closure cl;
-+	int ret;
-+
-+	closure_init_stack(&cl);
-+
-+	if (!bch2_btree_iter_upgrade(iter, U8_MAX))
-+		return -EINTR;
-+
-+	if (!down_read_trylock(&c->gc_lock)) {
-+		bch2_trans_unlock(iter->trans);
-+		down_read(&c->gc_lock);
-+
-+		if (!bch2_trans_relock(iter->trans)) {
-+			ret = -EINTR;
-+			goto err;
-+		}
-+	}
-+
-+	/*
-+	 * check btree_ptr_hash_val() after @b is locked by
-+	 * btree_iter_traverse():
-+	 */
-+	if (btree_ptr_hash_val(new_key) != b->hash_val) {
-+		/* bch2_btree_reserve_get will unlock */
-+		ret = bch2_btree_cache_cannibalize_lock(c, &cl);
-+		if (ret) {
-+			bch2_trans_unlock(iter->trans);
-+			up_read(&c->gc_lock);
-+			closure_sync(&cl);
-+			down_read(&c->gc_lock);
-+
-+			if (!bch2_trans_relock(iter->trans)) {
-+				ret = -EINTR;
-+				goto err;
-+			}
-+		}
-+
-+		new_hash = bch2_btree_node_mem_alloc(c);
-+	}
-+
-+	as = bch2_btree_update_start(iter->trans, iter->btree_id,
-+		parent ? btree_update_reserve_required(c, parent) : 0,
-+		BTREE_INSERT_NOFAIL|
-+		BTREE_INSERT_USE_RESERVE|
-+		BTREE_INSERT_USE_ALLOC_RESERVE,
-+		&cl);
-+
-+	if (IS_ERR(as)) {
-+		ret = PTR_ERR(as);
-+		if (ret == -EAGAIN)
-+			ret = -EINTR;
-+
-+		if (ret != -EINTR)
-+			goto err;
-+
-+		bch2_trans_unlock(iter->trans);
-+		up_read(&c->gc_lock);
-+		closure_sync(&cl);
-+		down_read(&c->gc_lock);
-+
-+		if (!bch2_trans_relock(iter->trans))
-+			goto err;
-+	}
-+
-+	ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(new_key));
-+	if (ret)
-+		goto err_free_update;
-+
-+	__bch2_btree_node_update_key(c, as, iter, b, new_hash, new_key);
-+
-+	bch2_btree_iter_downgrade(iter);
-+err:
-+	if (new_hash) {
-+		mutex_lock(&c->btree_cache.lock);
-+		list_move(&new_hash->list, &c->btree_cache.freeable);
-+		mutex_unlock(&c->btree_cache.lock);
-+
-+		six_unlock_write(&new_hash->c.lock);
-+		six_unlock_intent(&new_hash->c.lock);
-+	}
-+	up_read(&c->gc_lock);
-+	closure_sync(&cl);
-+	return ret;
-+err_free_update:
-+	bch2_btree_update_free(as);
-+	goto err;
-+}
-+
-+/* Init code: */
-+
-+/*
-+ * Only for filesystem bringup, when first reading the btree roots or allocating
-+ * btree roots when initializing a new filesystem:
-+ */
-+void bch2_btree_set_root_for_read(struct bch_fs *c, struct btree *b)
-+{
-+	BUG_ON(btree_node_root(c, b));
-+
-+	bch2_btree_set_root_inmem(c, b);
-+}
-+
-+void bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id)
-+{
-+	struct closure cl;
-+	struct btree *b;
-+	int ret;
-+
-+	closure_init_stack(&cl);
-+
-+	do {
-+		ret = bch2_btree_cache_cannibalize_lock(c, &cl);
-+		closure_sync(&cl);
-+	} while (ret);
-+
-+	b = bch2_btree_node_mem_alloc(c);
-+	bch2_btree_cache_cannibalize_unlock(c);
-+
-+	set_btree_node_fake(b);
-+	set_btree_node_need_rewrite(b);
-+	b->c.level	= 0;
-+	b->c.btree_id	= id;
-+
-+	bkey_btree_ptr_init(&b->key);
-+	b->key.k.p = POS_MAX;
-+	*((u64 *) bkey_i_to_btree_ptr(&b->key)->v.start) = U64_MAX - id;
-+
-+	bch2_bset_init_first(b, &b->data->keys);
-+	bch2_btree_build_aux_trees(b);
-+
-+	b->data->flags = 0;
-+	btree_set_min(b, POS_MIN);
-+	btree_set_max(b, POS_MAX);
-+	b->data->format = bch2_btree_calc_format(b);
-+	btree_node_set_format(b, b->data->format);
-+
-+	ret = bch2_btree_node_hash_insert(&c->btree_cache, b,
-+					  b->c.level, b->c.btree_id);
-+	BUG_ON(ret);
-+
-+	bch2_btree_set_root_inmem(c, b);
-+
-+	six_unlock_write(&b->c.lock);
-+	six_unlock_intent(&b->c.lock);
-+}
-+
-+ssize_t bch2_btree_updates_print(struct bch_fs *c, char *buf)
-+{
-+	struct printbuf out = _PBUF(buf, PAGE_SIZE);
-+	struct btree_update *as;
-+
-+	mutex_lock(&c->btree_interior_update_lock);
-+	list_for_each_entry(as, &c->btree_interior_update_list, list)
-+		pr_buf(&out, "%p m %u w %u r %u j %llu\n",
-+		       as,
-+		       as->mode,
-+		       as->nodes_written,
-+		       atomic_read(&as->cl.remaining) & CLOSURE_REMAINING_MASK,
-+		       as->journal.seq);
-+	mutex_unlock(&c->btree_interior_update_lock);
-+
-+	return out.pos - buf;
-+}
-+
-+size_t bch2_btree_interior_updates_nr_pending(struct bch_fs *c)
-+{
-+	size_t ret = 0;
-+	struct list_head *i;
-+
-+	mutex_lock(&c->btree_interior_update_lock);
-+	list_for_each(i, &c->btree_interior_update_list)
-+		ret++;
-+	mutex_unlock(&c->btree_interior_update_lock);
-+
-+	return ret;
-+}
-+
-+void bch2_journal_entries_to_btree_roots(struct bch_fs *c, struct jset *jset)
-+{
-+	struct btree_root *r;
-+	struct jset_entry *entry;
-+
-+	mutex_lock(&c->btree_root_lock);
-+
-+	vstruct_for_each(jset, entry)
-+		if (entry->type == BCH_JSET_ENTRY_btree_root) {
-+			r = &c->btree_roots[entry->btree_id];
-+			r->level = entry->level;
-+			r->alive = true;
-+			bkey_copy(&r->key, &entry->start[0]);
-+		}
-+
-+	mutex_unlock(&c->btree_root_lock);
-+}
-+
-+struct jset_entry *
-+bch2_btree_roots_to_journal_entries(struct bch_fs *c,
-+				    struct jset_entry *start,
-+				    struct jset_entry *end)
-+{
-+	struct jset_entry *entry;
-+	unsigned long have = 0;
-+	unsigned i;
-+
-+	for (entry = start; entry < end; entry = vstruct_next(entry))
-+		if (entry->type == BCH_JSET_ENTRY_btree_root)
-+			__set_bit(entry->btree_id, &have);
-+
-+	mutex_lock(&c->btree_root_lock);
-+
-+	for (i = 0; i < BTREE_ID_NR; i++)
-+		if (c->btree_roots[i].alive && !test_bit(i, &have)) {
-+			journal_entry_set(end,
-+					  BCH_JSET_ENTRY_btree_root,
-+					  i, c->btree_roots[i].level,
-+					  &c->btree_roots[i].key,
-+					  c->btree_roots[i].key.u64s);
-+			end = vstruct_next(end);
-+		}
-+
-+	mutex_unlock(&c->btree_root_lock);
-+
-+	return end;
-+}
-+
-+void bch2_fs_btree_interior_update_exit(struct bch_fs *c)
-+{
-+	if (c->btree_interior_update_worker)
-+		destroy_workqueue(c->btree_interior_update_worker);
-+	mempool_exit(&c->btree_interior_update_pool);
-+}
-+
-+int bch2_fs_btree_interior_update_init(struct bch_fs *c)
-+{
-+	mutex_init(&c->btree_reserve_cache_lock);
-+	INIT_LIST_HEAD(&c->btree_interior_update_list);
-+	INIT_LIST_HEAD(&c->btree_interior_updates_unwritten);
-+	mutex_init(&c->btree_interior_update_lock);
-+	INIT_WORK(&c->btree_interior_update_work, btree_interior_update_work);
-+
-+	c->btree_interior_update_worker =
-+		alloc_workqueue("btree_update", WQ_UNBOUND|WQ_MEM_RECLAIM, 1);
-+	if (!c->btree_interior_update_worker)
-+		return -ENOMEM;
-+
-+	return mempool_init_kmalloc_pool(&c->btree_interior_update_pool, 1,
-+					 sizeof(struct btree_update));
-+}
-diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h
-new file mode 100644
-index 000000000000..4a5b9dcfbdd0
---- /dev/null
-+++ b/fs/bcachefs/btree_update_interior.h
-@@ -0,0 +1,331 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BTREE_UPDATE_INTERIOR_H
-+#define _BCACHEFS_BTREE_UPDATE_INTERIOR_H
-+
-+#include "btree_cache.h"
-+#include "btree_locking.h"
-+#include "btree_update.h"
-+
-+void __bch2_btree_calc_format(struct bkey_format_state *, struct btree *);
-+bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *,
-+				struct bkey_format *);
-+
-+#define BTREE_UPDATE_NODES_MAX		((BTREE_MAX_DEPTH - 2) * 2 + GC_MERGE_NODES)
-+
-+#define BTREE_UPDATE_JOURNAL_RES	(BTREE_UPDATE_NODES_MAX * (BKEY_BTREE_PTR_U64s_MAX + 1))
-+
-+/*
-+ * Tracks an in progress split/rewrite of a btree node and the update to the
-+ * parent node:
-+ *
-+ * When we split/rewrite a node, we do all the updates in memory without
-+ * waiting for any writes to complete - we allocate the new node(s) and update
-+ * the parent node, possibly recursively up to the root.
-+ *
-+ * The end result is that we have one or more new nodes being written -
-+ * possibly several, if there were multiple splits - and then a write (updating
-+ * an interior node) which will make all these new nodes visible.
-+ *
-+ * Additionally, as we split/rewrite nodes we free the old nodes - but the old
-+ * nodes can't be freed (their space on disk can't be reclaimed) until the
-+ * update to the interior node that makes the new node visible completes -
-+ * until then, the old nodes are still reachable on disk.
-+ *
-+ */
-+struct btree_update {
-+	struct closure			cl;
-+	struct bch_fs			*c;
-+
-+	struct list_head		list;
-+	struct list_head		unwritten_list;
-+
-+	/* What kind of update are we doing? */
-+	enum {
-+		BTREE_INTERIOR_NO_UPDATE,
-+		BTREE_INTERIOR_UPDATING_NODE,
-+		BTREE_INTERIOR_UPDATING_ROOT,
-+		BTREE_INTERIOR_UPDATING_AS,
-+	} mode;
-+
-+	unsigned			must_rewrite:1;
-+	unsigned			nodes_written:1;
-+
-+	enum btree_id			btree_id;
-+
-+	struct disk_reservation		disk_res;
-+	struct journal_preres		journal_preres;
-+
-+	/*
-+	 * BTREE_INTERIOR_UPDATING_NODE:
-+	 * The update that made the new nodes visible was a regular update to an
-+	 * existing interior node - @b. We can't write out the update to @b
-+	 * until the new nodes we created are finished writing, so we block @b
-+	 * from writing by putting this btree_interior update on the
-+	 * @b->write_blocked list with @write_blocked_list:
-+	 */
-+	struct btree			*b;
-+	struct list_head		write_blocked_list;
-+
-+	/*
-+	 * We may be freeing nodes that were dirty, and thus had journal entries
-+	 * pinned: we need to transfer the oldest of those pins to the
-+	 * btree_update operation, and release it when the new node(s)
-+	 * are all persistent and reachable:
-+	 */
-+	struct journal_entry_pin	journal;
-+
-+	/* Preallocated nodes we reserve when we start the update: */
-+	struct btree			*prealloc_nodes[BTREE_UPDATE_NODES_MAX];
-+	unsigned			nr_prealloc_nodes;
-+
-+	/* Nodes being freed: */
-+	struct keylist			old_keys;
-+	u64				_old_keys[BTREE_UPDATE_NODES_MAX *
-+						  BKEY_BTREE_PTR_VAL_U64s_MAX];
-+
-+	/* Nodes being added: */
-+	struct keylist			new_keys;
-+	u64				_new_keys[BTREE_UPDATE_NODES_MAX *
-+						  BKEY_BTREE_PTR_VAL_U64s_MAX];
-+
-+	/* New nodes, that will be made reachable by this update: */
-+	struct btree			*new_nodes[BTREE_UPDATE_NODES_MAX];
-+	unsigned			nr_new_nodes;
-+
-+	open_bucket_idx_t		open_buckets[BTREE_UPDATE_NODES_MAX *
-+						     BCH_REPLICAS_MAX];
-+	open_bucket_idx_t		nr_open_buckets;
-+
-+	unsigned			journal_u64s;
-+	u64				journal_entries[BTREE_UPDATE_JOURNAL_RES];
-+
-+	/* Only here to reduce stack usage on recursive splits: */
-+	struct keylist			parent_keys;
-+	/*
-+	 * Enough room for btree_split's keys without realloc - btree node
-+	 * pointers never have crc/compression info, so we only need to acount
-+	 * for the pointers for three keys
-+	 */
-+	u64				inline_keys[BKEY_BTREE_PTR_U64s_MAX * 3];
-+};
-+
-+void bch2_btree_node_free_inmem(struct bch_fs *, struct btree *,
-+				struct btree_iter *);
-+void bch2_btree_node_free_never_inserted(struct bch_fs *, struct btree *);
-+
-+void bch2_btree_update_get_open_buckets(struct btree_update *, struct btree *);
-+
-+struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *,
-+						  struct btree *,
-+						  struct bkey_format);
-+
-+void bch2_btree_update_done(struct btree_update *);
-+struct btree_update *
-+bch2_btree_update_start(struct btree_trans *, enum btree_id, unsigned,
-+			unsigned, struct closure *);
-+
-+void bch2_btree_interior_update_will_free_node(struct btree_update *,
-+					       struct btree *);
-+void bch2_btree_update_add_new_node(struct btree_update *, struct btree *);
-+
-+void bch2_btree_insert_node(struct btree_update *, struct btree *,
-+			    struct btree_iter *, struct keylist *,
-+			    unsigned);
-+int bch2_btree_split_leaf(struct bch_fs *, struct btree_iter *, unsigned);
-+
-+void __bch2_foreground_maybe_merge(struct bch_fs *, struct btree_iter *,
-+				   unsigned, unsigned, enum btree_node_sibling);
-+
-+static inline void bch2_foreground_maybe_merge_sibling(struct bch_fs *c,
-+					struct btree_iter *iter,
-+					unsigned level, unsigned flags,
-+					enum btree_node_sibling sib)
-+{
-+	struct btree *b;
-+
-+	if (iter->uptodate >= BTREE_ITER_NEED_TRAVERSE)
-+		return;
-+
-+	if (!bch2_btree_node_relock(iter, level))
-+		return;
-+
-+	b = iter->l[level].b;
-+	if (b->sib_u64s[sib] > c->btree_foreground_merge_threshold)
-+		return;
-+
-+	__bch2_foreground_maybe_merge(c, iter, level, flags, sib);
-+}
-+
-+static inline void bch2_foreground_maybe_merge(struct bch_fs *c,
-+					       struct btree_iter *iter,
-+					       unsigned level,
-+					       unsigned flags)
-+{
-+	bch2_foreground_maybe_merge_sibling(c, iter, level, flags,
-+					    btree_prev_sib);
-+	bch2_foreground_maybe_merge_sibling(c, iter, level, flags,
-+					    btree_next_sib);
-+}
-+
-+void bch2_btree_set_root_for_read(struct bch_fs *, struct btree *);
-+void bch2_btree_root_alloc(struct bch_fs *, enum btree_id);
-+
-+static inline unsigned btree_update_reserve_required(struct bch_fs *c,
-+						     struct btree *b)
-+{
-+	unsigned depth = btree_node_root(c, b)->c.level + 1;
-+
-+	/*
-+	 * Number of nodes we might have to allocate in a worst case btree
-+	 * split operation - we split all the way up to the root, then allocate
-+	 * a new root, unless we're already at max depth:
-+	 */
-+	if (depth < BTREE_MAX_DEPTH)
-+		return (depth - b->c.level) * 2 + 1;
-+	else
-+		return (depth - b->c.level) * 2 - 1;
-+}
-+
-+static inline void btree_node_reset_sib_u64s(struct btree *b)
-+{
-+	b->sib_u64s[0] = b->nr.live_u64s;
-+	b->sib_u64s[1] = b->nr.live_u64s;
-+}
-+
-+static inline void *btree_data_end(struct bch_fs *c, struct btree *b)
-+{
-+	return (void *) b->data + btree_bytes(c);
-+}
-+
-+static inline struct bkey_packed *unwritten_whiteouts_start(struct bch_fs *c,
-+							    struct btree *b)
-+{
-+	return (void *) ((u64 *) btree_data_end(c, b) - b->whiteout_u64s);
-+}
-+
-+static inline struct bkey_packed *unwritten_whiteouts_end(struct bch_fs *c,
-+							  struct btree *b)
-+{
-+	return btree_data_end(c, b);
-+}
-+
-+static inline void *write_block(struct btree *b)
-+{
-+	return (void *) b->data + (b->written << 9);
-+}
-+
-+static inline bool __btree_addr_written(struct btree *b, void *p)
-+{
-+	return p < write_block(b);
-+}
-+
-+static inline bool bset_written(struct btree *b, struct bset *i)
-+{
-+	return __btree_addr_written(b, i);
-+}
-+
-+static inline bool bkey_written(struct btree *b, struct bkey_packed *k)
-+{
-+	return __btree_addr_written(b, k);
-+}
-+
-+static inline ssize_t __bch_btree_u64s_remaining(struct bch_fs *c,
-+						 struct btree *b,
-+						 void *end)
-+{
-+	ssize_t used = bset_byte_offset(b, end) / sizeof(u64) +
-+		b->whiteout_u64s;
-+	ssize_t total = c->opts.btree_node_size << 6;
-+
-+	return total - used;
-+}
-+
-+static inline size_t bch_btree_keys_u64s_remaining(struct bch_fs *c,
-+						   struct btree *b)
-+{
-+	ssize_t remaining = __bch_btree_u64s_remaining(c, b,
-+				btree_bkey_last(b, bset_tree_last(b)));
-+
-+	BUG_ON(remaining < 0);
-+
-+	if (bset_written(b, btree_bset_last(b)))
-+		return 0;
-+
-+	return remaining;
-+}
-+
-+static inline unsigned btree_write_set_buffer(struct btree *b)
-+{
-+	/*
-+	 * Could buffer up larger amounts of keys for btrees with larger keys,
-+	 * pending benchmarking:
-+	 */
-+	return 4 << 10;
-+}
-+
-+static inline struct btree_node_entry *want_new_bset(struct bch_fs *c,
-+						     struct btree *b)
-+{
-+	struct bset_tree *t = bset_tree_last(b);
-+	struct btree_node_entry *bne = max(write_block(b),
-+			(void *) btree_bkey_last(b, bset_tree_last(b)));
-+	ssize_t remaining_space =
-+		__bch_btree_u64s_remaining(c, b, &bne->keys.start[0]);
-+
-+	if (unlikely(bset_written(b, bset(b, t)))) {
-+		if (remaining_space > (ssize_t) (block_bytes(c) >> 3))
-+			return bne;
-+	} else {
-+		if (unlikely(bset_u64s(t) * sizeof(u64) > btree_write_set_buffer(b)) &&
-+		    remaining_space > (ssize_t) (btree_write_set_buffer(b) >> 3))
-+			return bne;
-+	}
-+
-+	return NULL;
-+}
-+
-+static inline void push_whiteout(struct bch_fs *c, struct btree *b,
-+				 struct bpos pos)
-+{
-+	struct bkey_packed k;
-+
-+	BUG_ON(bch_btree_keys_u64s_remaining(c, b) < BKEY_U64s);
-+
-+	if (!bkey_pack_pos(&k, pos, b)) {
-+		struct bkey *u = (void *) &k;
-+
-+		bkey_init(u);
-+		u->p = pos;
-+	}
-+
-+	k.needs_whiteout = true;
-+
-+	b->whiteout_u64s += k.u64s;
-+	bkey_copy(unwritten_whiteouts_start(c, b), &k);
-+}
-+
-+/*
-+ * write lock must be held on @b (else the dirty bset that we were going to
-+ * insert into could be written out from under us)
-+ */
-+static inline bool bch2_btree_node_insert_fits(struct bch_fs *c,
-+					       struct btree *b, unsigned u64s)
-+{
-+	if (unlikely(btree_node_fake(b)))
-+		return false;
-+
-+	return u64s <= bch_btree_keys_u64s_remaining(c, b);
-+}
-+
-+ssize_t bch2_btree_updates_print(struct bch_fs *, char *);
-+
-+size_t bch2_btree_interior_updates_nr_pending(struct bch_fs *);
-+
-+void bch2_journal_entries_to_btree_roots(struct bch_fs *, struct jset *);
-+struct jset_entry *bch2_btree_roots_to_journal_entries(struct bch_fs *,
-+					struct jset_entry *, struct jset_entry *);
-+
-+void bch2_fs_btree_interior_update_exit(struct bch_fs *);
-+int bch2_fs_btree_interior_update_init(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_BTREE_UPDATE_INTERIOR_H */
-diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c
-new file mode 100644
-index 000000000000..cf4105e83eda
---- /dev/null
-+++ b/fs/bcachefs/btree_update_leaf.c
-@@ -0,0 +1,1174 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "btree_update.h"
-+#include "btree_update_interior.h"
-+#include "btree_gc.h"
-+#include "btree_io.h"
-+#include "btree_iter.h"
-+#include "btree_key_cache.h"
-+#include "btree_locking.h"
-+#include "buckets.h"
-+#include "debug.h"
-+#include "error.h"
-+#include "extent_update.h"
-+#include "journal.h"
-+#include "journal_reclaim.h"
-+#include "keylist.h"
-+#include "replicas.h"
-+
-+#include <linux/prefetch.h>
-+#include <linux/sort.h>
-+#include <trace/events/bcachefs.h>
-+
-+static inline bool same_leaf_as_prev(struct btree_trans *trans,
-+				     struct btree_insert_entry *i)
-+{
-+	return i != trans->updates2 &&
-+		iter_l(i[0].iter)->b == iter_l(i[-1].iter)->b;
-+}
-+
-+inline void bch2_btree_node_lock_for_insert(struct bch_fs *c, struct btree *b,
-+					    struct btree_iter *iter)
-+{
-+	bch2_btree_node_lock_write(b, iter);
-+
-+	if (btree_iter_type(iter) == BTREE_ITER_CACHED)
-+		return;
-+
-+	if (unlikely(btree_node_just_written(b)) &&
-+	    bch2_btree_post_write_cleanup(c, b))
-+		bch2_btree_iter_reinit_node(iter, b);
-+
-+	/*
-+	 * If the last bset has been written, or if it's gotten too big - start
-+	 * a new bset to insert into:
-+	 */
-+	if (want_new_bset(c, b))
-+		bch2_btree_init_next(c, b, iter);
-+}
-+
-+/* Inserting into a given leaf node (last stage of insert): */
-+
-+/* Handle overwrites and do insert, for non extents: */
-+bool bch2_btree_bset_insert_key(struct btree_iter *iter,
-+				struct btree *b,
-+				struct btree_node_iter *node_iter,
-+				struct bkey_i *insert)
-+{
-+	struct bkey_packed *k;
-+	unsigned clobber_u64s = 0, new_u64s = 0;
-+
-+	EBUG_ON(btree_node_just_written(b));
-+	EBUG_ON(bset_written(b, btree_bset_last(b)));
-+	EBUG_ON(bkey_deleted(&insert->k) && bkey_val_u64s(&insert->k));
-+	EBUG_ON(bkey_cmp(b->data->min_key, POS_MIN) &&
-+		bkey_cmp(bkey_start_pos(&insert->k),
-+			 bkey_predecessor(b->data->min_key)) < 0);
-+	EBUG_ON(bkey_cmp(insert->k.p, b->data->min_key) < 0);
-+	EBUG_ON(bkey_cmp(insert->k.p, b->data->max_key) > 0);
-+	EBUG_ON(insert->k.u64s >
-+		bch_btree_keys_u64s_remaining(iter->trans->c, b));
-+	EBUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS);
-+
-+	k = bch2_btree_node_iter_peek_all(node_iter, b);
-+	if (k && bkey_cmp_packed(b, k, &insert->k))
-+		k = NULL;
-+
-+	/* @k is the key being overwritten/deleted, if any: */
-+	EBUG_ON(k && bkey_whiteout(k));
-+
-+	/* Deleting, but not found? nothing to do: */
-+	if (bkey_whiteout(&insert->k) && !k)
-+		return false;
-+
-+	if (bkey_whiteout(&insert->k)) {
-+		/* Deleting: */
-+		btree_account_key_drop(b, k);
-+		k->type = KEY_TYPE_deleted;
-+
-+		if (k->needs_whiteout)
-+			push_whiteout(iter->trans->c, b, insert->k.p);
-+		k->needs_whiteout = false;
-+
-+		if (k >= btree_bset_last(b)->start) {
-+			clobber_u64s = k->u64s;
-+			bch2_bset_delete(b, k, clobber_u64s);
-+			goto fix_iter;
-+		} else {
-+			bch2_btree_iter_fix_key_modified(iter, b, k);
-+		}
-+
-+		return true;
-+	}
-+
-+	if (k) {
-+		/* Overwriting: */
-+		btree_account_key_drop(b, k);
-+		k->type = KEY_TYPE_deleted;
-+
-+		insert->k.needs_whiteout = k->needs_whiteout;
-+		k->needs_whiteout = false;
-+
-+		if (k >= btree_bset_last(b)->start) {
-+			clobber_u64s = k->u64s;
-+			goto overwrite;
-+		} else {
-+			bch2_btree_iter_fix_key_modified(iter, b, k);
-+		}
-+	}
-+
-+	k = bch2_btree_node_iter_bset_pos(node_iter, b, bset_tree_last(b));
-+overwrite:
-+	bch2_bset_insert(b, node_iter, k, insert, clobber_u64s);
-+	new_u64s = k->u64s;
-+fix_iter:
-+	if (clobber_u64s != new_u64s)
-+		bch2_btree_node_iter_fix(iter, b, node_iter, k,
-+					 clobber_u64s, new_u64s);
-+	return true;
-+}
-+
-+static void __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
-+			       unsigned i, u64 seq)
-+{
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	struct btree_write *w = container_of(pin, struct btree_write, journal);
-+	struct btree *b = container_of(w, struct btree, writes[i]);
-+
-+	btree_node_lock_type(c, b, SIX_LOCK_read);
-+	bch2_btree_node_write_cond(c, b,
-+		(btree_current_write(b) == w && w->journal.seq == seq));
-+	six_unlock_read(&b->c.lock);
-+}
-+
-+static void btree_node_flush0(struct journal *j, struct journal_entry_pin *pin, u64 seq)
-+{
-+	return __btree_node_flush(j, pin, 0, seq);
-+}
-+
-+static void btree_node_flush1(struct journal *j, struct journal_entry_pin *pin, u64 seq)
-+{
-+	return __btree_node_flush(j, pin, 1, seq);
-+}
-+
-+inline void bch2_btree_add_journal_pin(struct bch_fs *c,
-+				       struct btree *b, u64 seq)
-+{
-+	struct btree_write *w = btree_current_write(b);
-+
-+	bch2_journal_pin_add(&c->journal, seq, &w->journal,
-+			     btree_node_write_idx(b) == 0
-+			     ? btree_node_flush0
-+			     : btree_node_flush1);
-+}
-+
-+/**
-+ * btree_insert_key - insert a key one key into a leaf node
-+ */
-+static bool btree_insert_key_leaf(struct btree_trans *trans,
-+				  struct btree_iter *iter,
-+				  struct bkey_i *insert)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct btree *b = iter_l(iter)->b;
-+	struct bset_tree *t = bset_tree_last(b);
-+	struct bset *i = bset(b, t);
-+	int old_u64s = bset_u64s(t);
-+	int old_live_u64s = b->nr.live_u64s;
-+	int live_u64s_added, u64s_added;
-+
-+	EBUG_ON(!iter->level &&
-+		!test_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags));
-+
-+	if (unlikely(!bch2_btree_bset_insert_key(iter, b,
-+					&iter_l(iter)->iter, insert)))
-+		return false;
-+
-+	i->journal_seq = cpu_to_le64(max(trans->journal_res.seq,
-+					 le64_to_cpu(i->journal_seq)));
-+
-+	bch2_btree_add_journal_pin(c, b, trans->journal_res.seq);
-+
-+	if (unlikely(!btree_node_dirty(b)))
-+		set_btree_node_dirty(b);
-+
-+	live_u64s_added = (int) b->nr.live_u64s - old_live_u64s;
-+	u64s_added = (int) bset_u64s(t) - old_u64s;
-+
-+	if (b->sib_u64s[0] != U16_MAX && live_u64s_added < 0)
-+		b->sib_u64s[0] = max(0, (int) b->sib_u64s[0] + live_u64s_added);
-+	if (b->sib_u64s[1] != U16_MAX && live_u64s_added < 0)
-+		b->sib_u64s[1] = max(0, (int) b->sib_u64s[1] + live_u64s_added);
-+
-+	if (u64s_added > live_u64s_added &&
-+	    bch2_maybe_compact_whiteouts(c, b))
-+		bch2_btree_iter_reinit_node(iter, b);
-+
-+	trace_btree_insert_key(c, b, insert);
-+	return true;
-+}
-+
-+/* Cached btree updates: */
-+
-+/* Normal update interface: */
-+
-+static inline void btree_insert_entry_checks(struct btree_trans *trans,
-+					     struct btree_iter *iter,
-+					     struct bkey_i *insert)
-+{
-+	struct bch_fs *c = trans->c;
-+
-+	BUG_ON(bkey_cmp(insert->k.p, iter->pos));
-+	BUG_ON(debug_check_bkeys(c) &&
-+	       bch2_bkey_invalid(c, bkey_i_to_s_c(insert),
-+				 __btree_node_type(iter->level, iter->btree_id)));
-+}
-+
-+static noinline int
-+bch2_trans_journal_preres_get_cold(struct btree_trans *trans, unsigned u64s)
-+{
-+	struct bch_fs *c = trans->c;
-+	int ret;
-+
-+	bch2_trans_unlock(trans);
-+
-+	ret = bch2_journal_preres_get(&c->journal,
-+			&trans->journal_preres, u64s, 0);
-+	if (ret)
-+		return ret;
-+
-+	if (!bch2_trans_relock(trans)) {
-+		trace_trans_restart_journal_preres_get(trans->ip);
-+		return -EINTR;
-+	}
-+
-+	return 0;
-+}
-+
-+static inline int bch2_trans_journal_res_get(struct btree_trans *trans,
-+					     unsigned flags)
-+{
-+	struct bch_fs *c = trans->c;
-+	int ret;
-+
-+	if (trans->flags & BTREE_INSERT_JOURNAL_RESERVED)
-+		flags |= JOURNAL_RES_GET_RESERVED;
-+
-+	ret = bch2_journal_res_get(&c->journal, &trans->journal_res,
-+				   trans->journal_u64s, flags);
-+
-+	return ret == -EAGAIN ? BTREE_INSERT_NEED_JOURNAL_RES : ret;
-+}
-+
-+static enum btree_insert_ret
-+btree_key_can_insert(struct btree_trans *trans,
-+		     struct btree_iter *iter,
-+		     struct bkey_i *insert,
-+		     unsigned u64s)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct btree *b = iter_l(iter)->b;
-+
-+	if (unlikely(btree_node_need_rewrite(b)) ||
-+	    unlikely(u64s > bch_btree_keys_u64s_remaining(c, b)))
-+		return BTREE_INSERT_BTREE_NODE_FULL;
-+
-+	return BTREE_INSERT_OK;
-+}
-+
-+static enum btree_insert_ret
-+btree_key_can_insert_cached(struct btree_trans *trans,
-+			    struct btree_iter *iter,
-+			    struct bkey_i *insert,
-+			    unsigned u64s)
-+{
-+	struct bkey_cached *ck = (void *) iter->l[0].b;
-+	unsigned new_u64s;
-+	struct bkey_i *new_k;
-+
-+	BUG_ON(iter->level);
-+
-+	if (u64s <= ck->u64s)
-+		return BTREE_INSERT_OK;
-+
-+	new_u64s	= roundup_pow_of_two(u64s);
-+	new_k		= krealloc(ck->k, new_u64s * sizeof(u64), GFP_NOFS);
-+	if (!new_k)
-+		return -ENOMEM;
-+
-+	ck->u64s	= new_u64s;
-+	ck->k		= new_k;
-+	return BTREE_INSERT_OK;
-+}
-+
-+static inline void do_btree_insert_one(struct btree_trans *trans,
-+				       struct btree_iter *iter,
-+				       struct bkey_i *insert)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct journal *j = &c->journal;
-+	bool did_work;
-+
-+	EBUG_ON(trans->journal_res.ref !=
-+		!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY));
-+
-+	insert->k.needs_whiteout = false;
-+
-+	did_work = (btree_iter_type(iter) != BTREE_ITER_CACHED)
-+		? btree_insert_key_leaf(trans, iter, insert)
-+		: bch2_btree_insert_key_cached(trans, iter, insert);
-+	if (!did_work)
-+		return;
-+
-+	if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) {
-+		bch2_journal_add_keys(j, &trans->journal_res,
-+				      iter->btree_id, insert);
-+
-+		bch2_journal_set_has_inode(j, &trans->journal_res,
-+					   insert->k.p.inode);
-+
-+		if (trans->journal_seq)
-+			*trans->journal_seq = trans->journal_res.seq;
-+	}
-+}
-+
-+static inline bool iter_has_trans_triggers(struct btree_iter *iter)
-+{
-+	return BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS & (1U << iter->btree_id);
-+}
-+
-+static inline bool iter_has_nontrans_triggers(struct btree_iter *iter)
-+{
-+	return (BTREE_NODE_TYPE_HAS_TRIGGERS &
-+		~BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS) &
-+		(1U << iter->btree_id);
-+}
-+
-+static noinline void bch2_btree_iter_unlock_noinline(struct btree_iter *iter)
-+{
-+	__bch2_btree_iter_unlock(iter);
-+}
-+
-+static noinline void bch2_trans_mark_gc(struct btree_trans *trans)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct btree_insert_entry *i;
-+
-+	trans_for_each_update(trans, i) {
-+		/*
-+		 * XXX: synchronization of cached update triggers with gc
-+		 */
-+		BUG_ON(btree_iter_type(i->iter) == BTREE_ITER_CACHED);
-+
-+		if (gc_visited(c, gc_pos_btree_node(i->iter->l[0].b)))
-+			bch2_mark_update(trans, i->iter, i->k, NULL,
-+					 i->trigger_flags|BTREE_TRIGGER_GC);
-+	}
-+}
-+
-+static inline int
-+bch2_trans_commit_write_locked(struct btree_trans *trans,
-+			       struct btree_insert_entry **stopped_at)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct bch_fs_usage *fs_usage = NULL;
-+	struct btree_insert_entry *i;
-+	unsigned u64s = 0;
-+	bool marking = false;
-+	int ret;
-+
-+	if (race_fault()) {
-+		trace_trans_restart_fault_inject(trans->ip);
-+		return -EINTR;
-+	}
-+
-+	/*
-+	 * Check if the insert will fit in the leaf node with the write lock
-+	 * held, otherwise another thread could write the node changing the
-+	 * amount of space available:
-+	 */
-+
-+	prefetch(&trans->c->journal.flags);
-+
-+	trans_for_each_update2(trans, i) {
-+		/* Multiple inserts might go to same leaf: */
-+		if (!same_leaf_as_prev(trans, i))
-+			u64s = 0;
-+
-+		u64s += i->k->k.u64s;
-+		ret = btree_iter_type(i->iter) != BTREE_ITER_CACHED
-+			? btree_key_can_insert(trans, i->iter, i->k, u64s)
-+			: btree_key_can_insert_cached(trans, i->iter, i->k, u64s);
-+		if (ret) {
-+			*stopped_at = i;
-+			return ret;
-+		}
-+
-+		if (btree_node_type_needs_gc(i->iter->btree_id))
-+			marking = true;
-+	}
-+
-+	if (marking) {
-+		percpu_down_read(&c->mark_lock);
-+		fs_usage = bch2_fs_usage_scratch_get(c);
-+	}
-+
-+	/*
-+	 * Don't get journal reservation until after we know insert will
-+	 * succeed:
-+	 */
-+	if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) {
-+		ret = bch2_trans_journal_res_get(trans,
-+				JOURNAL_RES_GET_NONBLOCK);
-+		if (ret)
-+			goto err;
-+	} else {
-+		trans->journal_res.seq = c->journal.replay_journal_seq;
-+	}
-+
-+	if (unlikely(trans->extra_journal_entry_u64s)) {
-+		memcpy_u64s_small(journal_res_entry(&c->journal, &trans->journal_res),
-+				  trans->extra_journal_entries,
-+				  trans->extra_journal_entry_u64s);
-+
-+		trans->journal_res.offset	+= trans->extra_journal_entry_u64s;
-+		trans->journal_res.u64s		-= trans->extra_journal_entry_u64s;
-+	}
-+
-+	/*
-+	 * Not allowed to fail after we've gotten our journal reservation - we
-+	 * have to use it:
-+	 */
-+
-+	if (!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) {
-+		if (journal_seq_verify(c))
-+			trans_for_each_update2(trans, i)
-+				i->k->k.version.lo = trans->journal_res.seq;
-+		else if (inject_invalid_keys(c))
-+			trans_for_each_update2(trans, i)
-+				i->k->k.version = MAX_VERSION;
-+	}
-+
-+	/* Must be called under mark_lock: */
-+	if (marking && trans->fs_usage_deltas &&
-+	    bch2_replicas_delta_list_apply(c, fs_usage,
-+					   trans->fs_usage_deltas)) {
-+		ret = BTREE_INSERT_NEED_MARK_REPLICAS;
-+		goto err;
-+	}
-+
-+	trans_for_each_update(trans, i)
-+		if (iter_has_nontrans_triggers(i->iter))
-+			bch2_mark_update(trans, i->iter, i->k,
-+					 fs_usage, i->trigger_flags);
-+
-+	if (marking)
-+		bch2_trans_fs_usage_apply(trans, fs_usage);
-+
-+	if (unlikely(c->gc_pos.phase))
-+		bch2_trans_mark_gc(trans);
-+
-+	trans_for_each_update2(trans, i)
-+		do_btree_insert_one(trans, i->iter, i->k);
-+err:
-+	if (marking) {
-+		bch2_fs_usage_scratch_put(c, fs_usage);
-+		percpu_up_read(&c->mark_lock);
-+	}
-+
-+	return ret;
-+}
-+
-+/*
-+ * Get journal reservation, take write locks, and attempt to do btree update(s):
-+ */
-+static inline int do_bch2_trans_commit(struct btree_trans *trans,
-+				       struct btree_insert_entry **stopped_at)
-+{
-+	struct btree_insert_entry *i;
-+	struct btree_iter *iter;
-+	int ret;
-+
-+	trans_for_each_update2(trans, i)
-+		BUG_ON(!btree_node_intent_locked(i->iter, i->iter->level));
-+
-+	ret = bch2_journal_preres_get(&trans->c->journal,
-+			&trans->journal_preres, trans->journal_preres_u64s,
-+			JOURNAL_RES_GET_NONBLOCK|
-+			((trans->flags & BTREE_INSERT_JOURNAL_RECLAIM)
-+			 ? JOURNAL_RES_GET_RECLAIM : 0));
-+	if (unlikely(ret == -EAGAIN))
-+		ret = bch2_trans_journal_preres_get_cold(trans,
-+						trans->journal_preres_u64s);
-+	if (unlikely(ret))
-+		return ret;
-+
-+	/*
-+	 * Can't be holding any read locks when we go to take write locks:
-+	 *
-+	 * note - this must be done after bch2_trans_journal_preres_get_cold()
-+	 * or anything else that might call bch2_trans_relock(), since that
-+	 * would just retake the read locks:
-+	 */
-+	trans_for_each_iter(trans, iter) {
-+		if (iter->nodes_locked != iter->nodes_intent_locked) {
-+			EBUG_ON(iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT);
-+			EBUG_ON(trans->iters_live & (1ULL << iter->idx));
-+			bch2_btree_iter_unlock_noinline(iter);
-+		}
-+	}
-+
-+	if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
-+		trans_for_each_update2(trans, i)
-+			btree_insert_entry_checks(trans, i->iter, i->k);
-+	bch2_btree_trans_verify_locks(trans);
-+
-+	trans_for_each_update2(trans, i)
-+		if (!same_leaf_as_prev(trans, i))
-+			bch2_btree_node_lock_for_insert(trans->c,
-+					iter_l(i->iter)->b, i->iter);
-+
-+	ret = bch2_trans_commit_write_locked(trans, stopped_at);
-+
-+	trans_for_each_update2(trans, i)
-+		if (!same_leaf_as_prev(trans, i))
-+			bch2_btree_node_unlock_write_inlined(iter_l(i->iter)->b,
-+							     i->iter);
-+
-+	if (!ret && trans->journal_pin)
-+		bch2_journal_pin_add(&trans->c->journal, trans->journal_res.seq,
-+				     trans->journal_pin, NULL);
-+
-+	/*
-+	 * Drop journal reservation after dropping write locks, since dropping
-+	 * the journal reservation may kick off a journal write:
-+	 */
-+	bch2_journal_res_put(&trans->c->journal, &trans->journal_res);
-+
-+	if (unlikely(ret))
-+		return ret;
-+
-+	if (trans->flags & BTREE_INSERT_NOUNLOCK)
-+		trans->nounlock = true;
-+
-+	trans_for_each_update2(trans, i)
-+		if (btree_iter_type(i->iter) != BTREE_ITER_CACHED &&
-+		    !same_leaf_as_prev(trans, i))
-+			bch2_foreground_maybe_merge(trans->c, i->iter,
-+						    0, trans->flags);
-+
-+	trans->nounlock = false;
-+
-+	bch2_trans_downgrade(trans);
-+
-+	return 0;
-+}
-+
-+static noinline
-+int bch2_trans_commit_error(struct btree_trans *trans,
-+			    struct btree_insert_entry *i,
-+			    int ret)
-+{
-+	struct bch_fs *c = trans->c;
-+	unsigned flags = trans->flags;
-+
-+	/*
-+	 * BTREE_INSERT_NOUNLOCK means don't unlock _after_ successful btree
-+	 * update; if we haven't done anything yet it doesn't apply
-+	 */
-+	flags &= ~BTREE_INSERT_NOUNLOCK;
-+
-+	switch (ret) {
-+	case BTREE_INSERT_BTREE_NODE_FULL:
-+		ret = bch2_btree_split_leaf(c, i->iter, flags);
-+
-+		/*
-+		 * if the split succeeded without dropping locks the insert will
-+		 * still be atomic (what the caller peeked() and is overwriting
-+		 * won't have changed)
-+		 */
-+#if 0
-+		/*
-+		 * XXX:
-+		 * split -> btree node merging (of parent node) might still drop
-+		 * locks when we're not passing it BTREE_INSERT_NOUNLOCK
-+		 *
-+		 * we don't want to pass BTREE_INSERT_NOUNLOCK to split as that
-+		 * will inhibit merging - but we don't have a reliable way yet
-+		 * (do we?) of checking if we dropped locks in this path
-+		 */
-+		if (!ret)
-+			goto retry;
-+#endif
-+
-+		/*
-+		 * don't care if we got ENOSPC because we told split it
-+		 * couldn't block:
-+		 */
-+		if (!ret ||
-+		    ret == -EINTR ||
-+		    (flags & BTREE_INSERT_NOUNLOCK)) {
-+			trace_trans_restart_btree_node_split(trans->ip);
-+			ret = -EINTR;
-+		}
-+		break;
-+	case BTREE_INSERT_ENOSPC:
-+		ret = -ENOSPC;
-+		break;
-+	case BTREE_INSERT_NEED_MARK_REPLICAS:
-+		bch2_trans_unlock(trans);
-+
-+		trans_for_each_update(trans, i) {
-+			ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(i->k));
-+			if (ret)
-+				return ret;
-+		}
-+
-+		if (bch2_trans_relock(trans))
-+			return 0;
-+
-+		trace_trans_restart_mark_replicas(trans->ip);
-+		ret = -EINTR;
-+		break;
-+	case BTREE_INSERT_NEED_JOURNAL_RES:
-+		bch2_trans_unlock(trans);
-+
-+		ret = bch2_trans_journal_res_get(trans, JOURNAL_RES_GET_CHECK);
-+		if (ret)
-+			return ret;
-+
-+		if (bch2_trans_relock(trans))
-+			return 0;
-+
-+		trace_trans_restart_journal_res_get(trans->ip);
-+		ret = -EINTR;
-+		break;
-+	default:
-+		BUG_ON(ret >= 0);
-+		break;
-+	}
-+
-+	if (ret == -EINTR) {
-+		int ret2 = bch2_btree_iter_traverse_all(trans);
-+
-+		if (ret2) {
-+			trace_trans_restart_traverse(trans->ip);
-+			return ret2;
-+		}
-+
-+		trace_trans_restart_atomic(trans->ip);
-+	}
-+
-+	return ret;
-+}
-+
-+static noinline int
-+bch2_trans_commit_get_rw_cold(struct btree_trans *trans)
-+{
-+	struct bch_fs *c = trans->c;
-+	int ret;
-+
-+	if (likely(!(trans->flags & BTREE_INSERT_LAZY_RW)))
-+		return -EROFS;
-+
-+	bch2_trans_unlock(trans);
-+
-+	ret = bch2_fs_read_write_early(c);
-+	if (ret)
-+		return ret;
-+
-+	percpu_ref_get(&c->writes);
-+	return 0;
-+}
-+
-+static void bch2_trans_update2(struct btree_trans *trans,
-+			       struct btree_iter *iter,
-+			       struct bkey_i *insert)
-+{
-+	struct btree_insert_entry *i, n = (struct btree_insert_entry) {
-+		.iter = iter, .k = insert
-+	};
-+
-+	btree_insert_entry_checks(trans, n.iter, n.k);
-+
-+	BUG_ON(iter->uptodate > BTREE_ITER_NEED_PEEK);
-+
-+	EBUG_ON(trans->nr_updates2 >= trans->nr_iters);
-+
-+	iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
-+
-+	trans_for_each_update2(trans, i) {
-+		if (btree_iter_cmp(n.iter, i->iter) == 0) {
-+			*i = n;
-+			return;
-+		}
-+
-+		if (btree_iter_cmp(n.iter, i->iter) <= 0)
-+			break;
-+	}
-+
-+	array_insert_item(trans->updates2, trans->nr_updates2,
-+			  i - trans->updates2, n);
-+}
-+
-+static int extent_update_to_keys(struct btree_trans *trans,
-+				 struct btree_iter *orig_iter,
-+				 struct bkey_i *insert)
-+{
-+	struct btree_iter *iter;
-+	int ret;
-+
-+	ret = bch2_extent_can_insert(trans, orig_iter, insert);
-+	if (ret)
-+		return ret;
-+
-+	if (bkey_deleted(&insert->k))
-+		return 0;
-+
-+	iter = bch2_trans_copy_iter(trans, orig_iter);
-+	if (IS_ERR(iter))
-+		return PTR_ERR(iter);
-+
-+	iter->flags |= BTREE_ITER_INTENT;
-+	__bch2_btree_iter_set_pos(iter, insert->k.p, false);
-+	bch2_trans_update2(trans, iter, insert);
-+	bch2_trans_iter_put(trans, iter);
-+	return 0;
-+}
-+
-+static int extent_handle_overwrites(struct btree_trans *trans,
-+				    enum btree_id btree_id,
-+				    struct bpos start, struct bpos end)
-+{
-+	struct btree_iter *iter = NULL, *update_iter;
-+	struct bkey_i *update;
-+	struct bkey_s_c k;
-+	int ret = 0;
-+
-+	iter = bch2_trans_get_iter(trans, btree_id, start, BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(iter);
-+	if (ret)
-+		return ret;
-+
-+	k = bch2_btree_iter_peek_with_updates(iter);
-+
-+	while (k.k && !(ret = bkey_err(k))) {
-+		if (bkey_cmp(end, bkey_start_pos(k.k)) <= 0)
-+			break;
-+
-+		if (bkey_cmp(bkey_start_pos(k.k), start) < 0) {
-+			update_iter = bch2_trans_copy_iter(trans, iter);
-+			if ((ret = PTR_ERR_OR_ZERO(update_iter)))
-+				goto err;
-+
-+			update = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
-+			if ((ret = PTR_ERR_OR_ZERO(update)))
-+				goto err;
-+
-+			bkey_reassemble(update, k);
-+			bch2_cut_back(start, update);
-+
-+			__bch2_btree_iter_set_pos(update_iter, update->k.p, false);
-+			bch2_trans_update2(trans, update_iter, update);
-+			bch2_trans_iter_put(trans, update_iter);
-+		}
-+
-+		if (bkey_cmp(k.k->p, end) > 0) {
-+			update_iter = bch2_trans_copy_iter(trans, iter);
-+			if ((ret = PTR_ERR_OR_ZERO(update_iter)))
-+				goto err;
-+
-+			update = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
-+			if ((ret = PTR_ERR_OR_ZERO(update)))
-+				goto err;
-+
-+			bkey_reassemble(update, k);
-+			bch2_cut_front(end, update);
-+
-+			__bch2_btree_iter_set_pos(update_iter, update->k.p, false);
-+			bch2_trans_update2(trans, update_iter, update);
-+			bch2_trans_iter_put(trans, update_iter);
-+		} else {
-+			update_iter = bch2_trans_copy_iter(trans, iter);
-+			if ((ret = PTR_ERR_OR_ZERO(update_iter)))
-+				goto err;
-+
-+			update = bch2_trans_kmalloc(trans, sizeof(struct bkey));
-+			if ((ret = PTR_ERR_OR_ZERO(update)))
-+				goto err;
-+
-+			update->k = *k.k;
-+			set_bkey_val_u64s(&update->k, 0);
-+			update->k.type = KEY_TYPE_deleted;
-+			update->k.size = 0;
-+
-+			__bch2_btree_iter_set_pos(update_iter, update->k.p, false);
-+			bch2_trans_update2(trans, update_iter, update);
-+			bch2_trans_iter_put(trans, update_iter);
-+		}
-+
-+		k = bch2_btree_iter_next_with_updates(iter);
-+	}
-+err:
-+	if (!IS_ERR_OR_NULL(iter))
-+		bch2_trans_iter_put(trans, iter);
-+	return ret;
-+}
-+
-+int __bch2_trans_commit(struct btree_trans *trans)
-+{
-+	struct btree_insert_entry *i = NULL;
-+	struct btree_iter *iter;
-+	bool trans_trigger_run;
-+	unsigned u64s;
-+	int ret = 0;
-+
-+	BUG_ON(trans->need_reset);
-+
-+	if (!trans->nr_updates)
-+		goto out_noupdates;
-+
-+	if (trans->flags & BTREE_INSERT_GC_LOCK_HELD)
-+		lockdep_assert_held(&trans->c->gc_lock);
-+
-+	memset(&trans->journal_preres, 0, sizeof(trans->journal_preres));
-+
-+	trans->journal_u64s		= trans->extra_journal_entry_u64s;
-+	trans->journal_preres_u64s	= 0;
-+
-+	if (!(trans->flags & BTREE_INSERT_NOCHECK_RW) &&
-+	    unlikely(!percpu_ref_tryget(&trans->c->writes))) {
-+		ret = bch2_trans_commit_get_rw_cold(trans);
-+		if (ret)
-+			return ret;
-+	}
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	trans_for_each_update(trans, i)
-+		if (btree_iter_type(i->iter) != BTREE_ITER_CACHED &&
-+		    !(i->trigger_flags & BTREE_TRIGGER_NORUN))
-+			bch2_btree_key_cache_verify_clean(trans,
-+					i->iter->btree_id, i->iter->pos);
-+#endif
-+
-+	/*
-+	 * Running triggers will append more updates to the list of updates as
-+	 * we're walking it:
-+	 */
-+	do {
-+		trans_trigger_run = false;
-+
-+		trans_for_each_update(trans, i) {
-+			if (unlikely(i->iter->uptodate > BTREE_ITER_NEED_PEEK &&
-+				     (ret = bch2_btree_iter_traverse(i->iter)))) {
-+				trace_trans_restart_traverse(trans->ip);
-+				goto out;
-+			}
-+
-+			/*
-+			 * We're not using bch2_btree_iter_upgrade here because
-+			 * we know trans->nounlock can't be set:
-+			 */
-+			if (unlikely(i->iter->locks_want < 1 &&
-+				     !__bch2_btree_iter_upgrade(i->iter, 1))) {
-+				trace_trans_restart_upgrade(trans->ip);
-+				ret = -EINTR;
-+				goto out;
-+			}
-+
-+			if (iter_has_trans_triggers(i->iter) &&
-+			    !i->trans_triggers_run) {
-+				i->trans_triggers_run = true;
-+				trans_trigger_run = true;
-+
-+				ret = bch2_trans_mark_update(trans, i->iter, i->k,
-+							     i->trigger_flags);
-+				if (unlikely(ret)) {
-+					if (ret == -EINTR)
-+						trace_trans_restart_mark(trans->ip);
-+					goto out;
-+				}
-+			}
-+		}
-+	} while (trans_trigger_run);
-+
-+	/* Turn extents updates into keys: */
-+	trans_for_each_update(trans, i)
-+		if (i->iter->flags & BTREE_ITER_IS_EXTENTS) {
-+			struct bpos start = bkey_start_pos(&i->k->k);
-+
-+			while (i + 1 < trans->updates + trans->nr_updates &&
-+			       i[0].iter->btree_id == i[1].iter->btree_id &&
-+			       !bkey_cmp(i[0].k->k.p, bkey_start_pos(&i[1].k->k)))
-+				i++;
-+
-+			ret = extent_handle_overwrites(trans, i->iter->btree_id,
-+						       start, i->k->k.p);
-+			if (ret)
-+				goto out;
-+		}
-+
-+	trans_for_each_update(trans, i) {
-+		if (i->iter->flags & BTREE_ITER_IS_EXTENTS) {
-+			ret = extent_update_to_keys(trans, i->iter, i->k);
-+			if (ret)
-+				goto out;
-+		} else {
-+			bch2_trans_update2(trans, i->iter, i->k);
-+		}
-+	}
-+
-+	trans_for_each_update2(trans, i) {
-+		BUG_ON(i->iter->uptodate > BTREE_ITER_NEED_PEEK);
-+		BUG_ON(i->iter->locks_want < 1);
-+
-+		u64s = jset_u64s(i->k->k.u64s);
-+		if (btree_iter_type(i->iter) == BTREE_ITER_CACHED &&
-+		    likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)))
-+			trans->journal_preres_u64s += u64s;
-+		trans->journal_u64s += u64s;
-+	}
-+retry:
-+	memset(&trans->journal_res, 0, sizeof(trans->journal_res));
-+
-+	ret = do_bch2_trans_commit(trans, &i);
-+
-+	/* make sure we didn't drop or screw up locks: */
-+	bch2_btree_trans_verify_locks(trans);
-+
-+	if (ret)
-+		goto err;
-+
-+	trans_for_each_iter(trans, iter)
-+		if ((trans->iters_live & (1ULL << iter->idx)) &&
-+		    (iter->flags & BTREE_ITER_SET_POS_AFTER_COMMIT)) {
-+			if (trans->flags & BTREE_INSERT_NOUNLOCK)
-+				bch2_btree_iter_set_pos_same_leaf(iter, iter->pos_after_commit);
-+			else
-+				bch2_btree_iter_set_pos(iter, iter->pos_after_commit);
-+		}
-+out:
-+	bch2_journal_preres_put(&trans->c->journal, &trans->journal_preres);
-+
-+	if (likely(!(trans->flags & BTREE_INSERT_NOCHECK_RW)))
-+		percpu_ref_put(&trans->c->writes);
-+out_noupdates:
-+	bch2_trans_reset(trans, !ret ? TRANS_RESET_NOTRAVERSE : 0);
-+
-+	return ret;
-+err:
-+	ret = bch2_trans_commit_error(trans, i, ret);
-+	if (ret)
-+		goto out;
-+
-+	goto retry;
-+}
-+
-+int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
-+		      struct bkey_i *k, enum btree_trigger_flags flags)
-+{
-+	struct btree_insert_entry *i, n = (struct btree_insert_entry) {
-+		.trigger_flags = flags, .iter = iter, .k = k
-+	};
-+
-+	EBUG_ON(bkey_cmp(iter->pos,
-+			 (iter->flags & BTREE_ITER_IS_EXTENTS)
-+			 ? bkey_start_pos(&k->k)
-+			 : k->k.p));
-+
-+	iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
-+
-+	if (btree_node_type_is_extents(iter->btree_id)) {
-+		iter->pos_after_commit = k->k.p;
-+		iter->flags |= BTREE_ITER_SET_POS_AFTER_COMMIT;
-+	}
-+
-+	/*
-+	 * Pending updates are kept sorted: first, find position of new update:
-+	 */
-+	trans_for_each_update(trans, i)
-+		if (btree_iter_cmp(iter, i->iter) <= 0)
-+			break;
-+
-+	/*
-+	 * Now delete/trim any updates the new update overwrites:
-+	 */
-+	if (i > trans->updates &&
-+	    i[-1].iter->btree_id == iter->btree_id &&
-+	    bkey_cmp(iter->pos, i[-1].k->k.p) < 0)
-+		bch2_cut_back(n.iter->pos, i[-1].k);
-+
-+	while (i < trans->updates + trans->nr_updates &&
-+	       iter->btree_id == i->iter->btree_id &&
-+	       bkey_cmp(n.k->k.p, i->k->k.p) >= 0)
-+		array_remove_item(trans->updates, trans->nr_updates,
-+				  i - trans->updates);
-+
-+	if (i < trans->updates + trans->nr_updates &&
-+	    iter->btree_id == i->iter->btree_id &&
-+	    bkey_cmp(n.k->k.p, i->iter->pos) > 0) {
-+		/*
-+		 * When we have an extent that overwrites the start of another
-+		 * update, trimming that extent will mean the iterator's
-+		 * position has to change since the iterator position has to
-+		 * match the extent's start pos - but we don't want to change
-+		 * the iterator pos if some other code is using it, so we may
-+		 * need to clone it:
-+		 */
-+		if (trans->iters_live & (1ULL << i->iter->idx)) {
-+			i->iter = bch2_trans_copy_iter(trans, i->iter);
-+			if (IS_ERR(i->iter)) {
-+				trans->need_reset = true;
-+				return PTR_ERR(i->iter);
-+			}
-+
-+			i->iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
-+			bch2_trans_iter_put(trans, i->iter);
-+		}
-+
-+		bch2_cut_front(n.k->k.p, i->k);
-+		bch2_btree_iter_set_pos(i->iter, n.k->k.p);
-+	}
-+
-+	EBUG_ON(trans->nr_updates >= trans->nr_iters);
-+
-+	array_insert_item(trans->updates, trans->nr_updates,
-+			  i - trans->updates, n);
-+	return 0;
-+}
-+
-+int __bch2_btree_insert(struct btree_trans *trans,
-+			enum btree_id id, struct bkey_i *k)
-+{
-+	struct btree_iter *iter;
-+	int ret;
-+
-+	iter = bch2_trans_get_iter(trans, id, bkey_start_pos(&k->k),
-+				   BTREE_ITER_INTENT);
-+	if (IS_ERR(iter))
-+		return PTR_ERR(iter);
-+
-+	ret   = bch2_btree_iter_traverse(iter) ?:
-+		bch2_trans_update(trans, iter, k, 0);
-+	bch2_trans_iter_put(trans, iter);
-+	return ret;
-+}
-+
-+/**
-+ * bch2_btree_insert - insert keys into the extent btree
-+ * @c:			pointer to struct bch_fs
-+ * @id:			btree to insert into
-+ * @insert_keys:	list of keys to insert
-+ * @hook:		insert callback
-+ */
-+int bch2_btree_insert(struct bch_fs *c, enum btree_id id,
-+		      struct bkey_i *k,
-+		      struct disk_reservation *disk_res,
-+		      u64 *journal_seq, int flags)
-+{
-+	return bch2_trans_do(c, disk_res, journal_seq, flags,
-+			     __bch2_btree_insert(&trans, id, k));
-+}
-+
-+int bch2_btree_delete_at_range(struct btree_trans *trans,
-+			       struct btree_iter *iter,
-+			       struct bpos end,
-+			       u64 *journal_seq)
-+{
-+	struct bkey_s_c k;
-+	int ret = 0;
-+retry:
-+	while ((k = bch2_btree_iter_peek(iter)).k &&
-+	       !(ret = bkey_err(k)) &&
-+	       bkey_cmp(iter->pos, end) < 0) {
-+		struct bkey_i delete;
-+
-+		bch2_trans_begin(trans);
-+
-+		bkey_init(&delete.k);
-+
-+		/*
-+		 * For extents, iter.pos won't necessarily be the same as
-+		 * bkey_start_pos(k.k) (for non extents they always will be the
-+		 * same). It's important that we delete starting from iter.pos
-+		 * because the range we want to delete could start in the middle
-+		 * of k.
-+		 *
-+		 * (bch2_btree_iter_peek() does guarantee that iter.pos >=
-+		 * bkey_start_pos(k.k)).
-+		 */
-+		delete.k.p = iter->pos;
-+
-+		if (btree_node_type_is_extents(iter->btree_id)) {
-+			unsigned max_sectors =
-+				KEY_SIZE_MAX & (~0 << trans->c->block_bits);
-+
-+			/* create the biggest key we can */
-+			bch2_key_resize(&delete.k, max_sectors);
-+			bch2_cut_back(end, &delete);
-+
-+			ret = bch2_extent_trim_atomic(&delete, iter);
-+			if (ret)
-+				break;
-+		}
-+
-+		bch2_trans_update(trans, iter, &delete, 0);
-+		ret = bch2_trans_commit(trans, NULL, journal_seq,
-+					BTREE_INSERT_NOFAIL);
-+		if (ret)
-+			break;
-+
-+		bch2_trans_cond_resched(trans);
-+	}
-+
-+	if (ret == -EINTR) {
-+		ret = 0;
-+		goto retry;
-+	}
-+
-+	return ret;
-+
-+}
-+
-+int bch2_btree_delete_at(struct btree_trans *trans,
-+			 struct btree_iter *iter, unsigned flags)
-+{
-+	struct bkey_i k;
-+
-+	bkey_init(&k.k);
-+	k.k.p = iter->pos;
-+
-+	bch2_trans_update(trans, iter, &k, 0);
-+	return bch2_trans_commit(trans, NULL, NULL,
-+				 BTREE_INSERT_NOFAIL|
-+				 BTREE_INSERT_USE_RESERVE|flags);
-+}
-+
-+/*
-+ * bch_btree_delete_range - delete everything within a given range
-+ *
-+ * Range is a half open interval - [start, end)
-+ */
-+int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
-+			    struct bpos start, struct bpos end,
-+			    u64 *journal_seq)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	int ret = 0;
-+
-+	/*
-+	 * XXX: whether we need mem/more iters depends on whether this btree id
-+	 * has triggers
-+	 */
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 512);
-+
-+	iter = bch2_trans_get_iter(&trans, id, start, BTREE_ITER_INTENT);
-+
-+	ret = bch2_btree_delete_at_range(&trans, iter, end, journal_seq);
-+	ret = bch2_trans_exit(&trans) ?: ret;
-+
-+	BUG_ON(ret == -EINTR);
-+	return ret;
-+}
-diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
-new file mode 100644
-index 000000000000..0ec194b93c71
---- /dev/null
-+++ b/fs/bcachefs/buckets.c
-@@ -0,0 +1,2126 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Code for manipulating bucket marks for garbage collection.
-+ *
-+ * Copyright 2014 Datera, Inc.
-+ *
-+ * Bucket states:
-+ * - free bucket: mark == 0
-+ *   The bucket contains no data and will not be read
-+ *
-+ * - allocator bucket: owned_by_allocator == 1
-+ *   The bucket is on a free list, or it is an open bucket
-+ *
-+ * - cached bucket: owned_by_allocator == 0 &&
-+ *                  dirty_sectors == 0 &&
-+ *                  cached_sectors > 0
-+ *   The bucket contains data but may be safely discarded as there are
-+ *   enough replicas of the data on other cache devices, or it has been
-+ *   written back to the backing device
-+ *
-+ * - dirty bucket: owned_by_allocator == 0 &&
-+ *                 dirty_sectors > 0
-+ *   The bucket contains data that we must not discard (either only copy,
-+ *   or one of the 'main copies' for data requiring multiple replicas)
-+ *
-+ * - metadata bucket: owned_by_allocator == 0 && is_metadata == 1
-+ *   This is a btree node, journal or gen/prio bucket
-+ *
-+ * Lifecycle:
-+ *
-+ * bucket invalidated => bucket on freelist => open bucket =>
-+ *     [dirty bucket =>] cached bucket => bucket invalidated => ...
-+ *
-+ * Note that cache promotion can skip the dirty bucket step, as data
-+ * is copied from a deeper tier to a shallower tier, onto a cached
-+ * bucket.
-+ * Note also that a cached bucket can spontaneously become dirty --
-+ * see below.
-+ *
-+ * Only a traversal of the key space can determine whether a bucket is
-+ * truly dirty or cached.
-+ *
-+ * Transitions:
-+ *
-+ * - free => allocator: bucket was invalidated
-+ * - cached => allocator: bucket was invalidated
-+ *
-+ * - allocator => dirty: open bucket was filled up
-+ * - allocator => cached: open bucket was filled up
-+ * - allocator => metadata: metadata was allocated
-+ *
-+ * - dirty => cached: dirty sectors were copied to a deeper tier
-+ * - dirty => free: dirty sectors were overwritten or moved (copy gc)
-+ * - cached => free: cached sectors were overwritten
-+ *
-+ * - metadata => free: metadata was freed
-+ *
-+ * Oddities:
-+ * - cached => dirty: a device was removed so formerly replicated data
-+ *                    is no longer sufficiently replicated
-+ * - free => cached: cannot happen
-+ * - free => dirty: cannot happen
-+ * - free => metadata: cannot happen
-+ */
-+
-+#include "bcachefs.h"
-+#include "alloc_background.h"
-+#include "bset.h"
-+#include "btree_gc.h"
-+#include "btree_update.h"
-+#include "buckets.h"
-+#include "ec.h"
-+#include "error.h"
-+#include "movinggc.h"
-+#include "replicas.h"
-+
-+#include <linux/preempt.h>
-+#include <trace/events/bcachefs.h>
-+
-+/*
-+ * Clear journal_seq_valid for buckets for which it's not needed, to prevent
-+ * wraparound:
-+ */
-+void bch2_bucket_seq_cleanup(struct bch_fs *c)
-+{
-+	u64 journal_seq = atomic64_read(&c->journal.seq);
-+	u16 last_seq_ondisk = c->journal.last_seq_ondisk;
-+	struct bch_dev *ca;
-+	struct bucket_array *buckets;
-+	struct bucket *g;
-+	struct bucket_mark m;
-+	unsigned i;
-+
-+	if (journal_seq - c->last_bucket_seq_cleanup <
-+	    (1U << (BUCKET_JOURNAL_SEQ_BITS - 2)))
-+		return;
-+
-+	c->last_bucket_seq_cleanup = journal_seq;
-+
-+	for_each_member_device(ca, c, i) {
-+		down_read(&ca->bucket_lock);
-+		buckets = bucket_array(ca);
-+
-+		for_each_bucket(g, buckets) {
-+			bucket_cmpxchg(g, m, ({
-+				if (!m.journal_seq_valid ||
-+				    bucket_needs_journal_commit(m, last_seq_ondisk))
-+					break;
-+
-+				m.journal_seq_valid = 0;
-+			}));
-+		}
-+		up_read(&ca->bucket_lock);
-+	}
-+}
-+
-+void bch2_fs_usage_initialize(struct bch_fs *c)
-+{
-+	struct bch_fs_usage *usage;
-+	unsigned i;
-+
-+	percpu_down_write(&c->mark_lock);
-+	usage = c->usage_base;
-+
-+	bch2_fs_usage_acc_to_base(c, 0);
-+	bch2_fs_usage_acc_to_base(c, 1);
-+
-+	for (i = 0; i < BCH_REPLICAS_MAX; i++)
-+		usage->reserved += usage->persistent_reserved[i];
-+
-+	for (i = 0; i < c->replicas.nr; i++) {
-+		struct bch_replicas_entry *e =
-+			cpu_replicas_entry(&c->replicas, i);
-+
-+		switch (e->data_type) {
-+		case BCH_DATA_BTREE:
-+			usage->btree	+= usage->replicas[i];
-+			break;
-+		case BCH_DATA_USER:
-+			usage->data	+= usage->replicas[i];
-+			break;
-+		case BCH_DATA_CACHED:
-+			usage->cached	+= usage->replicas[i];
-+			break;
-+		}
-+	}
-+
-+	percpu_up_write(&c->mark_lock);
-+}
-+
-+void bch2_fs_usage_scratch_put(struct bch_fs *c, struct bch_fs_usage *fs_usage)
-+{
-+	if (fs_usage == c->usage_scratch)
-+		mutex_unlock(&c->usage_scratch_lock);
-+	else
-+		kfree(fs_usage);
-+}
-+
-+struct bch_fs_usage *bch2_fs_usage_scratch_get(struct bch_fs *c)
-+{
-+	struct bch_fs_usage *ret;
-+	unsigned bytes = fs_usage_u64s(c) * sizeof(u64);
-+
-+	ret = kzalloc(bytes, GFP_NOWAIT|__GFP_NOWARN);
-+	if (ret)
-+		return ret;
-+
-+	if (mutex_trylock(&c->usage_scratch_lock))
-+		goto out_pool;
-+
-+	ret = kzalloc(bytes, GFP_NOFS);
-+	if (ret)
-+		return ret;
-+
-+	mutex_lock(&c->usage_scratch_lock);
-+out_pool:
-+	ret = c->usage_scratch;
-+	memset(ret, 0, bytes);
-+	return ret;
-+}
-+
-+struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	struct bch_dev_usage ret;
-+
-+	memset(&ret, 0, sizeof(ret));
-+	acc_u64s_percpu((u64 *) &ret,
-+			(u64 __percpu *) ca->usage[0],
-+			sizeof(ret) / sizeof(u64));
-+
-+	return ret;
-+}
-+
-+static inline struct bch_fs_usage *fs_usage_ptr(struct bch_fs *c,
-+						unsigned journal_seq,
-+						bool gc)
-+{
-+	return this_cpu_ptr(gc
-+			    ? c->usage_gc
-+			    : c->usage[journal_seq & 1]);
-+}
-+
-+u64 bch2_fs_usage_read_one(struct bch_fs *c, u64 *v)
-+{
-+	ssize_t offset = v - (u64 *) c->usage_base;
-+	unsigned seq;
-+	u64 ret;
-+
-+	BUG_ON(offset < 0 || offset >= fs_usage_u64s(c));
-+	percpu_rwsem_assert_held(&c->mark_lock);
-+
-+	do {
-+		seq = read_seqcount_begin(&c->usage_lock);
-+		ret = *v +
-+			percpu_u64_get((u64 __percpu *) c->usage[0] + offset) +
-+			percpu_u64_get((u64 __percpu *) c->usage[1] + offset);
-+	} while (read_seqcount_retry(&c->usage_lock, seq));
-+
-+	return ret;
-+}
-+
-+struct bch_fs_usage *bch2_fs_usage_read(struct bch_fs *c)
-+{
-+	struct bch_fs_usage *ret;
-+	unsigned seq, v, u64s = fs_usage_u64s(c);
-+retry:
-+	ret = kmalloc(u64s * sizeof(u64), GFP_NOFS);
-+	if (unlikely(!ret))
-+		return NULL;
-+
-+	percpu_down_read(&c->mark_lock);
-+
-+	v = fs_usage_u64s(c);
-+	if (unlikely(u64s != v)) {
-+		u64s = v;
-+		percpu_up_read(&c->mark_lock);
-+		kfree(ret);
-+		goto retry;
-+	}
-+
-+	do {
-+		seq = read_seqcount_begin(&c->usage_lock);
-+		memcpy(ret, c->usage_base, u64s * sizeof(u64));
-+		acc_u64s_percpu((u64 *) ret, (u64 __percpu *) c->usage[0], u64s);
-+		acc_u64s_percpu((u64 *) ret, (u64 __percpu *) c->usage[1], u64s);
-+	} while (read_seqcount_retry(&c->usage_lock, seq));
-+
-+	return ret;
-+}
-+
-+void bch2_fs_usage_acc_to_base(struct bch_fs *c, unsigned idx)
-+{
-+	unsigned u64s = fs_usage_u64s(c);
-+
-+	BUG_ON(idx >= 2);
-+
-+	write_seqcount_begin(&c->usage_lock);
-+
-+	acc_u64s_percpu((u64 *) c->usage_base,
-+			(u64 __percpu *) c->usage[idx], u64s);
-+	percpu_memset(c->usage[idx], 0, u64s * sizeof(u64));
-+
-+	write_seqcount_end(&c->usage_lock);
-+}
-+
-+void bch2_fs_usage_to_text(struct printbuf *out,
-+			   struct bch_fs *c,
-+			   struct bch_fs_usage *fs_usage)
-+{
-+	unsigned i;
-+
-+	pr_buf(out, "capacity:\t\t\t%llu\n", c->capacity);
-+
-+	pr_buf(out, "hidden:\t\t\t\t%llu\n",
-+	       fs_usage->hidden);
-+	pr_buf(out, "data:\t\t\t\t%llu\n",
-+	       fs_usage->data);
-+	pr_buf(out, "cached:\t\t\t\t%llu\n",
-+	       fs_usage->cached);
-+	pr_buf(out, "reserved:\t\t\t%llu\n",
-+	       fs_usage->reserved);
-+	pr_buf(out, "nr_inodes:\t\t\t%llu\n",
-+	       fs_usage->nr_inodes);
-+	pr_buf(out, "online reserved:\t\t%llu\n",
-+	       fs_usage->online_reserved);
-+
-+	for (i = 0;
-+	     i < ARRAY_SIZE(fs_usage->persistent_reserved);
-+	     i++) {
-+		pr_buf(out, "%u replicas:\n", i + 1);
-+		pr_buf(out, "\treserved:\t\t%llu\n",
-+		       fs_usage->persistent_reserved[i]);
-+	}
-+
-+	for (i = 0; i < c->replicas.nr; i++) {
-+		struct bch_replicas_entry *e =
-+			cpu_replicas_entry(&c->replicas, i);
-+
-+		pr_buf(out, "\t");
-+		bch2_replicas_entry_to_text(out, e);
-+		pr_buf(out, ":\t%llu\n", fs_usage->replicas[i]);
-+	}
-+}
-+
-+#define RESERVE_FACTOR	6
-+
-+static u64 reserve_factor(u64 r)
-+{
-+	return r + (round_up(r, (1 << RESERVE_FACTOR)) >> RESERVE_FACTOR);
-+}
-+
-+static u64 avail_factor(u64 r)
-+{
-+	return (r << RESERVE_FACTOR) / ((1 << RESERVE_FACTOR) + 1);
-+}
-+
-+u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage *fs_usage)
-+{
-+	return min(fs_usage->hidden +
-+		   fs_usage->btree +
-+		   fs_usage->data +
-+		   reserve_factor(fs_usage->reserved +
-+				  fs_usage->online_reserved),
-+		   c->capacity);
-+}
-+
-+static struct bch_fs_usage_short
-+__bch2_fs_usage_read_short(struct bch_fs *c)
-+{
-+	struct bch_fs_usage_short ret;
-+	u64 data, reserved;
-+
-+	ret.capacity = c->capacity -
-+		bch2_fs_usage_read_one(c, &c->usage_base->hidden);
-+
-+	data		= bch2_fs_usage_read_one(c, &c->usage_base->data) +
-+		bch2_fs_usage_read_one(c, &c->usage_base->btree);
-+	reserved	= bch2_fs_usage_read_one(c, &c->usage_base->reserved) +
-+		bch2_fs_usage_read_one(c, &c->usage_base->online_reserved);
-+
-+	ret.used	= min(ret.capacity, data + reserve_factor(reserved));
-+	ret.free	= ret.capacity - ret.used;
-+
-+	ret.nr_inodes	= bch2_fs_usage_read_one(c, &c->usage_base->nr_inodes);
-+
-+	return ret;
-+}
-+
-+struct bch_fs_usage_short
-+bch2_fs_usage_read_short(struct bch_fs *c)
-+{
-+	struct bch_fs_usage_short ret;
-+
-+	percpu_down_read(&c->mark_lock);
-+	ret = __bch2_fs_usage_read_short(c);
-+	percpu_up_read(&c->mark_lock);
-+
-+	return ret;
-+}
-+
-+static inline int is_unavailable_bucket(struct bucket_mark m)
-+{
-+	return !is_available_bucket(m);
-+}
-+
-+static inline int is_fragmented_bucket(struct bucket_mark m,
-+				       struct bch_dev *ca)
-+{
-+	if (!m.owned_by_allocator &&
-+	    m.data_type == BCH_DATA_USER &&
-+	    bucket_sectors_used(m))
-+		return max_t(int, 0, (int) ca->mi.bucket_size -
-+			     bucket_sectors_used(m));
-+	return 0;
-+}
-+
-+static inline int bucket_stripe_sectors(struct bucket_mark m)
-+{
-+	return m.stripe ? m.dirty_sectors : 0;
-+}
-+
-+static inline enum bch_data_type bucket_type(struct bucket_mark m)
-+{
-+	return m.cached_sectors && !m.dirty_sectors
-+		? BCH_DATA_CACHED
-+		: m.data_type;
-+}
-+
-+static bool bucket_became_unavailable(struct bucket_mark old,
-+				      struct bucket_mark new)
-+{
-+	return is_available_bucket(old) &&
-+	       !is_available_bucket(new);
-+}
-+
-+int bch2_fs_usage_apply(struct bch_fs *c,
-+			struct bch_fs_usage *fs_usage,
-+			struct disk_reservation *disk_res,
-+			unsigned journal_seq)
-+{
-+	s64 added = fs_usage->data + fs_usage->reserved;
-+	s64 should_not_have_added;
-+	int ret = 0;
-+
-+	percpu_rwsem_assert_held(&c->mark_lock);
-+
-+	/*
-+	 * Not allowed to reduce sectors_available except by getting a
-+	 * reservation:
-+	 */
-+	should_not_have_added = added - (s64) (disk_res ? disk_res->sectors : 0);
-+	if (WARN_ONCE(should_not_have_added > 0,
-+		      "disk usage increased by %lli without a reservation",
-+		      should_not_have_added)) {
-+		atomic64_sub(should_not_have_added, &c->sectors_available);
-+		added -= should_not_have_added;
-+		ret = -1;
-+	}
-+
-+	if (added > 0) {
-+		disk_res->sectors		-= added;
-+		fs_usage->online_reserved	-= added;
-+	}
-+
-+	preempt_disable();
-+	acc_u64s((u64 *) fs_usage_ptr(c, journal_seq, false),
-+		 (u64 *) fs_usage, fs_usage_u64s(c));
-+	preempt_enable();
-+
-+	return ret;
-+}
-+
-+static inline void account_bucket(struct bch_fs_usage *fs_usage,
-+				  struct bch_dev_usage *dev_usage,
-+				  enum bch_data_type type,
-+				  int nr, s64 size)
-+{
-+	if (type == BCH_DATA_SB || type == BCH_DATA_JOURNAL)
-+		fs_usage->hidden	+= size;
-+
-+	dev_usage->buckets[type]	+= nr;
-+}
-+
-+static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
-+				  struct bch_fs_usage *fs_usage,
-+				  struct bucket_mark old, struct bucket_mark new,
-+				  bool gc)
-+{
-+	struct bch_dev_usage *u;
-+
-+	percpu_rwsem_assert_held(&c->mark_lock);
-+
-+	preempt_disable();
-+	u = this_cpu_ptr(ca->usage[gc]);
-+
-+	if (bucket_type(old))
-+		account_bucket(fs_usage, u, bucket_type(old),
-+			       -1, -ca->mi.bucket_size);
-+
-+	if (bucket_type(new))
-+		account_bucket(fs_usage, u, bucket_type(new),
-+			       1, ca->mi.bucket_size);
-+
-+	u->buckets_alloc +=
-+		(int) new.owned_by_allocator - (int) old.owned_by_allocator;
-+	u->buckets_unavailable +=
-+		is_unavailable_bucket(new) - is_unavailable_bucket(old);
-+
-+	u->buckets_ec += (int) new.stripe - (int) old.stripe;
-+	u->sectors_ec += bucket_stripe_sectors(new) -
-+			 bucket_stripe_sectors(old);
-+
-+	u->sectors[old.data_type] -= old.dirty_sectors;
-+	u->sectors[new.data_type] += new.dirty_sectors;
-+	u->sectors[BCH_DATA_CACHED] +=
-+		(int) new.cached_sectors - (int) old.cached_sectors;
-+	u->sectors_fragmented +=
-+		is_fragmented_bucket(new, ca) - is_fragmented_bucket(old, ca);
-+	preempt_enable();
-+
-+	if (!is_available_bucket(old) && is_available_bucket(new))
-+		bch2_wake_allocator(ca);
-+}
-+
-+void bch2_dev_usage_from_buckets(struct bch_fs *c)
-+{
-+	struct bch_dev *ca;
-+	struct bucket_mark old = { .v.counter = 0 };
-+	struct bucket_array *buckets;
-+	struct bucket *g;
-+	unsigned i;
-+	int cpu;
-+
-+	c->usage_base->hidden = 0;
-+
-+	for_each_member_device(ca, c, i) {
-+		for_each_possible_cpu(cpu)
-+			memset(per_cpu_ptr(ca->usage[0], cpu), 0,
-+			       sizeof(*ca->usage[0]));
-+
-+		buckets = bucket_array(ca);
-+
-+		for_each_bucket(g, buckets)
-+			bch2_dev_usage_update(c, ca, c->usage_base,
-+					      old, g->mark, false);
-+	}
-+}
-+
-+static inline int update_replicas(struct bch_fs *c,
-+				  struct bch_fs_usage *fs_usage,
-+				  struct bch_replicas_entry *r,
-+				  s64 sectors)
-+{
-+	int idx = bch2_replicas_entry_idx(c, r);
-+
-+	if (idx < 0)
-+		return -1;
-+
-+	if (!fs_usage)
-+		return 0;
-+
-+	switch (r->data_type) {
-+	case BCH_DATA_BTREE:
-+		fs_usage->btree		+= sectors;
-+		break;
-+	case BCH_DATA_USER:
-+		fs_usage->data		+= sectors;
-+		break;
-+	case BCH_DATA_CACHED:
-+		fs_usage->cached	+= sectors;
-+		break;
-+	}
-+	fs_usage->replicas[idx]		+= sectors;
-+	return 0;
-+}
-+
-+static inline void update_cached_sectors(struct bch_fs *c,
-+					 struct bch_fs_usage *fs_usage,
-+					 unsigned dev, s64 sectors)
-+{
-+	struct bch_replicas_padded r;
-+
-+	bch2_replicas_entry_cached(&r.e, dev);
-+
-+	update_replicas(c, fs_usage, &r.e, sectors);
-+}
-+
-+static struct replicas_delta_list *
-+replicas_deltas_realloc(struct btree_trans *trans, unsigned more)
-+{
-+	struct replicas_delta_list *d = trans->fs_usage_deltas;
-+	unsigned new_size = d ? (d->size + more) * 2 : 128;
-+
-+	if (!d || d->used + more > d->size) {
-+		d = krealloc(d, sizeof(*d) + new_size, GFP_NOIO|__GFP_ZERO);
-+		BUG_ON(!d);
-+
-+		d->size = new_size;
-+		trans->fs_usage_deltas = d;
-+	}
-+	return d;
-+}
-+
-+static inline void update_replicas_list(struct btree_trans *trans,
-+					struct bch_replicas_entry *r,
-+					s64 sectors)
-+{
-+	struct replicas_delta_list *d;
-+	struct replicas_delta *n;
-+	unsigned b;
-+
-+	if (!sectors)
-+		return;
-+
-+	b = replicas_entry_bytes(r) + 8;
-+	d = replicas_deltas_realloc(trans, b);
-+
-+	n = (void *) d->d + d->used;
-+	n->delta = sectors;
-+	memcpy(&n->r, r, replicas_entry_bytes(r));
-+	d->used += b;
-+}
-+
-+static inline void update_cached_sectors_list(struct btree_trans *trans,
-+					      unsigned dev, s64 sectors)
-+{
-+	struct bch_replicas_padded r;
-+
-+	bch2_replicas_entry_cached(&r.e, dev);
-+
-+	update_replicas_list(trans, &r.e, sectors);
-+}
-+
-+static inline struct replicas_delta *
-+replicas_delta_next(struct replicas_delta *d)
-+{
-+	return (void *) d + replicas_entry_bytes(&d->r) + 8;
-+}
-+
-+int bch2_replicas_delta_list_apply(struct bch_fs *c,
-+				   struct bch_fs_usage *fs_usage,
-+				   struct replicas_delta_list *r)
-+{
-+	struct replicas_delta *d = r->d;
-+	struct replicas_delta *top = (void *) r->d + r->used;
-+	unsigned i;
-+
-+	for (d = r->d; d != top; d = replicas_delta_next(d))
-+		if (update_replicas(c, fs_usage, &d->r, d->delta)) {
-+			top = d;
-+			goto unwind;
-+		}
-+
-+	if (!fs_usage)
-+		return 0;
-+
-+	fs_usage->nr_inodes += r->nr_inodes;
-+
-+	for (i = 0; i < BCH_REPLICAS_MAX; i++) {
-+		fs_usage->reserved += r->persistent_reserved[i];
-+		fs_usage->persistent_reserved[i] += r->persistent_reserved[i];
-+	}
-+
-+	return 0;
-+unwind:
-+	for (d = r->d; d != top; d = replicas_delta_next(d))
-+		update_replicas(c, fs_usage, &d->r, -d->delta);
-+	return -1;
-+}
-+
-+#define do_mark_fn(fn, c, pos, flags, ...)				\
-+({									\
-+	int gc, ret = 0;						\
-+									\
-+	percpu_rwsem_assert_held(&c->mark_lock);			\
-+									\
-+	for (gc = 0; gc < 2 && !ret; gc++)				\
-+		if (!gc == !(flags & BTREE_TRIGGER_GC) ||		\
-+		    (gc && gc_visited(c, pos)))				\
-+			ret = fn(c, __VA_ARGS__, gc);			\
-+	ret;								\
-+})
-+
-+static int __bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
-+				    size_t b, struct bucket_mark *ret,
-+				    bool gc)
-+{
-+	struct bch_fs_usage *fs_usage = fs_usage_ptr(c, 0, gc);
-+	struct bucket *g = __bucket(ca, b, gc);
-+	struct bucket_mark old, new;
-+
-+	old = bucket_cmpxchg(g, new, ({
-+		BUG_ON(!is_available_bucket(new));
-+
-+		new.owned_by_allocator	= true;
-+		new.data_type		= 0;
-+		new.cached_sectors	= 0;
-+		new.dirty_sectors	= 0;
-+		new.gen++;
-+	}));
-+
-+	bch2_dev_usage_update(c, ca, fs_usage, old, new, gc);
-+
-+	if (old.cached_sectors)
-+		update_cached_sectors(c, fs_usage, ca->dev_idx,
-+				      -((s64) old.cached_sectors));
-+
-+	if (!gc)
-+		*ret = old;
-+	return 0;
-+}
-+
-+void bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
-+			    size_t b, struct bucket_mark *old)
-+{
-+	do_mark_fn(__bch2_invalidate_bucket, c, gc_phase(GC_PHASE_START), 0,
-+		   ca, b, old);
-+
-+	if (!old->owned_by_allocator && old->cached_sectors)
-+		trace_invalidate(ca, bucket_to_sector(ca, b),
-+				 old->cached_sectors);
-+}
-+
-+static int __bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
-+				    size_t b, bool owned_by_allocator,
-+				    bool gc)
-+{
-+	struct bch_fs_usage *fs_usage = fs_usage_ptr(c, 0, gc);
-+	struct bucket *g = __bucket(ca, b, gc);
-+	struct bucket_mark old, new;
-+
-+	old = bucket_cmpxchg(g, new, ({
-+		new.owned_by_allocator	= owned_by_allocator;
-+	}));
-+
-+	bch2_dev_usage_update(c, ca, fs_usage, old, new, gc);
-+
-+	BUG_ON(!gc &&
-+	       !owned_by_allocator && !old.owned_by_allocator);
-+
-+	return 0;
-+}
-+
-+void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
-+			    size_t b, bool owned_by_allocator,
-+			    struct gc_pos pos, unsigned flags)
-+{
-+	preempt_disable();
-+
-+	do_mark_fn(__bch2_mark_alloc_bucket, c, pos, flags,
-+		   ca, b, owned_by_allocator);
-+
-+	preempt_enable();
-+}
-+
-+static int bch2_mark_alloc(struct bch_fs *c, struct bkey_s_c k,
-+			   struct bch_fs_usage *fs_usage,
-+			   u64 journal_seq, unsigned flags)
-+{
-+	bool gc = flags & BTREE_TRIGGER_GC;
-+	struct bkey_alloc_unpacked u;
-+	struct bch_dev *ca;
-+	struct bucket *g;
-+	struct bucket_mark old, m;
-+
-+	/*
-+	 * alloc btree is read in by bch2_alloc_read, not gc:
-+	 */
-+	if ((flags & BTREE_TRIGGER_GC) &&
-+	    !(flags & BTREE_TRIGGER_BUCKET_INVALIDATE))
-+		return 0;
-+
-+	ca = bch_dev_bkey_exists(c, k.k->p.inode);
-+
-+	if (k.k->p.offset >= ca->mi.nbuckets)
-+		return 0;
-+
-+	g = __bucket(ca, k.k->p.offset, gc);
-+	u = bch2_alloc_unpack(k);
-+
-+	old = bucket_cmpxchg(g, m, ({
-+		m.gen			= u.gen;
-+		m.data_type		= u.data_type;
-+		m.dirty_sectors		= u.dirty_sectors;
-+		m.cached_sectors	= u.cached_sectors;
-+
-+		if (journal_seq) {
-+			m.journal_seq_valid	= 1;
-+			m.journal_seq		= journal_seq;
-+		}
-+	}));
-+
-+	if (!(flags & BTREE_TRIGGER_ALLOC_READ))
-+		bch2_dev_usage_update(c, ca, fs_usage, old, m, gc);
-+
-+	g->io_time[READ]	= u.read_time;
-+	g->io_time[WRITE]	= u.write_time;
-+	g->oldest_gen		= u.oldest_gen;
-+	g->gen_valid		= 1;
-+
-+	/*
-+	 * need to know if we're getting called from the invalidate path or
-+	 * not:
-+	 */
-+
-+	if ((flags & BTREE_TRIGGER_BUCKET_INVALIDATE) &&
-+	    old.cached_sectors) {
-+		update_cached_sectors(c, fs_usage, ca->dev_idx,
-+				      -old.cached_sectors);
-+		trace_invalidate(ca, bucket_to_sector(ca, k.k->p.offset),
-+				 old.cached_sectors);
-+	}
-+
-+	return 0;
-+}
-+
-+#define checked_add(a, b)					\
-+({								\
-+	unsigned _res = (unsigned) (a) + (b);			\
-+	bool overflow = _res > U16_MAX;				\
-+	if (overflow)						\
-+		_res = U16_MAX;					\
-+	(a) = _res;						\
-+	overflow;						\
-+})
-+
-+static int __bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
-+				       size_t b, enum bch_data_type data_type,
-+				       unsigned sectors, bool gc)
-+{
-+	struct bucket *g = __bucket(ca, b, gc);
-+	struct bucket_mark old, new;
-+	bool overflow;
-+
-+	BUG_ON(data_type != BCH_DATA_SB &&
-+	       data_type != BCH_DATA_JOURNAL);
-+
-+	old = bucket_cmpxchg(g, new, ({
-+		new.data_type	= data_type;
-+		overflow = checked_add(new.dirty_sectors, sectors);
-+	}));
-+
-+	bch2_fs_inconsistent_on(old.data_type &&
-+				old.data_type != data_type, c,
-+		"different types of data in same bucket: %s, %s",
-+		bch2_data_types[old.data_type],
-+		bch2_data_types[data_type]);
-+
-+	bch2_fs_inconsistent_on(overflow, c,
-+		"bucket %u:%zu gen %u data type %s sector count overflow: %u + %u > U16_MAX",
-+		ca->dev_idx, b, new.gen,
-+		bch2_data_types[old.data_type ?: data_type],
-+		old.dirty_sectors, sectors);
-+
-+	if (c)
-+		bch2_dev_usage_update(c, ca, fs_usage_ptr(c, 0, gc),
-+				      old, new, gc);
-+
-+	return 0;
-+}
-+
-+void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
-+			       size_t b, enum bch_data_type type,
-+			       unsigned sectors, struct gc_pos pos,
-+			       unsigned flags)
-+{
-+	BUG_ON(type != BCH_DATA_SB &&
-+	       type != BCH_DATA_JOURNAL);
-+
-+	preempt_disable();
-+
-+	if (likely(c)) {
-+		do_mark_fn(__bch2_mark_metadata_bucket, c, pos, flags,
-+			   ca, b, type, sectors);
-+	} else {
-+		__bch2_mark_metadata_bucket(c, ca, b, type, sectors, 0);
-+	}
-+
-+	preempt_enable();
-+}
-+
-+static s64 disk_sectors_scaled(unsigned n, unsigned d, unsigned sectors)
-+{
-+	return DIV_ROUND_UP(sectors * n, d);
-+}
-+
-+static s64 __ptr_disk_sectors_delta(unsigned old_size,
-+				    unsigned offset, s64 delta,
-+				    unsigned flags,
-+				    unsigned n, unsigned d)
-+{
-+	BUG_ON(!n || !d);
-+
-+	if (flags & BTREE_TRIGGER_OVERWRITE_SPLIT) {
-+		BUG_ON(offset + -delta > old_size);
-+
-+		return -disk_sectors_scaled(n, d, old_size) +
-+			disk_sectors_scaled(n, d, offset) +
-+			disk_sectors_scaled(n, d, old_size - offset + delta);
-+	} else if (flags & BTREE_TRIGGER_OVERWRITE) {
-+		BUG_ON(offset + -delta > old_size);
-+
-+		return -disk_sectors_scaled(n, d, old_size) +
-+			disk_sectors_scaled(n, d, old_size + delta);
-+	} else {
-+		return  disk_sectors_scaled(n, d, delta);
-+	}
-+}
-+
-+static s64 ptr_disk_sectors_delta(struct extent_ptr_decoded p,
-+				  unsigned offset, s64 delta,
-+				  unsigned flags)
-+{
-+	return __ptr_disk_sectors_delta(p.crc.live_size,
-+					offset, delta, flags,
-+					p.crc.compressed_size,
-+					p.crc.uncompressed_size);
-+}
-+
-+static void bucket_set_stripe(struct bch_fs *c,
-+			      const struct bch_stripe *v,
-+			      struct bch_fs_usage *fs_usage,
-+			      u64 journal_seq,
-+			      unsigned flags)
-+{
-+	bool enabled = !(flags & BTREE_TRIGGER_OVERWRITE);
-+	bool gc = flags & BTREE_TRIGGER_GC;
-+	unsigned i;
-+
-+	for (i = 0; i < v->nr_blocks; i++) {
-+		const struct bch_extent_ptr *ptr = v->ptrs + i;
-+		struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-+		struct bucket *g = PTR_BUCKET(ca, ptr, gc);
-+		struct bucket_mark new, old;
-+
-+		old = bucket_cmpxchg(g, new, ({
-+			new.stripe			= enabled;
-+			if (journal_seq) {
-+				new.journal_seq_valid	= 1;
-+				new.journal_seq		= journal_seq;
-+			}
-+		}));
-+
-+		bch2_dev_usage_update(c, ca, fs_usage, old, new, gc);
-+
-+		/*
-+		 * XXX write repair code for these, flag stripe as possibly bad
-+		 */
-+		if (old.gen != ptr->gen)
-+			bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
-+				      "stripe with stale pointer");
-+#if 0
-+		/*
-+		 * We'd like to check for these, but these checks don't work
-+		 * yet:
-+		 */
-+		if (old.stripe && enabled)
-+			bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
-+				      "multiple stripes using same bucket");
-+
-+		if (!old.stripe && !enabled)
-+			bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
-+				      "deleting stripe but bucket not marked as stripe bucket");
-+#endif
-+	}
-+}
-+
-+static int __mark_pointer(struct bch_fs *c, struct bkey_s_c k,
-+			  struct extent_ptr_decoded p,
-+			  s64 sectors, enum bch_data_type ptr_data_type,
-+			  u8 bucket_gen, u8 *bucket_data_type,
-+			  u16 *dirty_sectors, u16 *cached_sectors)
-+{
-+	u16 *dst_sectors = !p.ptr.cached
-+		? dirty_sectors
-+		: cached_sectors;
-+	u16 orig_sectors = *dst_sectors;
-+	char buf[200];
-+
-+	if (gen_after(p.ptr.gen, bucket_gen)) {
-+		bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
-+			"bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n"
-+			"while marking %s",
-+			p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr),
-+			bucket_gen,
-+			bch2_data_types[*bucket_data_type ?: ptr_data_type],
-+			p.ptr.gen,
-+			(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
-+		return -EIO;
-+	}
-+
-+	if (gen_cmp(bucket_gen, p.ptr.gen) >= 96U) {
-+		bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
-+			"bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n"
-+			"while marking %s",
-+			p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr),
-+			bucket_gen,
-+			bch2_data_types[*bucket_data_type ?: ptr_data_type],
-+			p.ptr.gen,
-+			(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
-+		return -EIO;
-+	}
-+
-+	if (bucket_gen != p.ptr.gen && !p.ptr.cached) {
-+		bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
-+			"bucket %u:%zu gen %u data type %s: stale dirty ptr (gen %u)\n"
-+			"while marking %s",
-+			p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr),
-+			bucket_gen,
-+			bch2_data_types[*bucket_data_type ?: ptr_data_type],
-+			p.ptr.gen,
-+			(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
-+		return -EIO;
-+	}
-+
-+	if (bucket_gen != p.ptr.gen)
-+		return 1;
-+
-+	if (*bucket_data_type && *bucket_data_type != ptr_data_type) {
-+		bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
-+			"bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n"
-+			"while marking %s",
-+			p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr),
-+			bucket_gen,
-+			bch2_data_types[*bucket_data_type],
-+			bch2_data_types[ptr_data_type],
-+			(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
-+		return -EIO;
-+	}
-+
-+	if (checked_add(*dst_sectors, sectors)) {
-+		bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
-+			"bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U16_MAX\n"
-+			"while marking %s",
-+			p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr),
-+			bucket_gen,
-+			bch2_data_types[*bucket_data_type ?: ptr_data_type],
-+			orig_sectors, sectors,
-+			(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
-+		return -EIO;
-+	}
-+
-+	*bucket_data_type = *dirty_sectors || *cached_sectors
-+		? ptr_data_type : 0;
-+	return 0;
-+}
-+
-+static int bch2_mark_pointer(struct bch_fs *c, struct bkey_s_c k,
-+			     struct extent_ptr_decoded p,
-+			     s64 sectors, enum bch_data_type data_type,
-+			     struct bch_fs_usage *fs_usage,
-+			     u64 journal_seq, unsigned flags)
-+{
-+	bool gc = flags & BTREE_TRIGGER_GC;
-+	struct bucket_mark old, new;
-+	struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
-+	struct bucket *g = PTR_BUCKET(ca, &p.ptr, gc);
-+	u8 bucket_data_type;
-+	u64 v;
-+	int ret;
-+
-+	v = atomic64_read(&g->_mark.v);
-+	do {
-+		new.v.counter = old.v.counter = v;
-+		bucket_data_type = new.data_type;
-+
-+		ret = __mark_pointer(c, k, p, sectors, data_type, new.gen,
-+				     &bucket_data_type,
-+				     &new.dirty_sectors,
-+				     &new.cached_sectors);
-+		if (ret)
-+			return ret;
-+
-+		new.data_type = bucket_data_type;
-+
-+		if (journal_seq) {
-+			new.journal_seq_valid = 1;
-+			new.journal_seq = journal_seq;
-+		}
-+
-+		if (flags & BTREE_TRIGGER_NOATOMIC) {
-+			g->_mark = new;
-+			break;
-+		}
-+	} while ((v = atomic64_cmpxchg(&g->_mark.v,
-+			      old.v.counter,
-+			      new.v.counter)) != old.v.counter);
-+
-+	bch2_dev_usage_update(c, ca, fs_usage, old, new, gc);
-+
-+	BUG_ON(!gc && bucket_became_unavailable(old, new));
-+
-+	return 0;
-+}
-+
-+static int bch2_mark_stripe_ptr(struct bch_fs *c,
-+				struct bch_extent_stripe_ptr p,
-+				enum bch_data_type data_type,
-+				struct bch_fs_usage *fs_usage,
-+				s64 sectors, unsigned flags,
-+				struct bch_replicas_padded *r,
-+				unsigned *nr_data,
-+				unsigned *nr_parity)
-+{
-+	bool gc = flags & BTREE_TRIGGER_GC;
-+	struct stripe *m;
-+	unsigned old, new;
-+	int blocks_nonempty_delta;
-+
-+	m = genradix_ptr(&c->stripes[gc], p.idx);
-+
-+	spin_lock(&c->ec_stripes_heap_lock);
-+
-+	if (!m || !m->alive) {
-+		spin_unlock(&c->ec_stripes_heap_lock);
-+		bch_err_ratelimited(c, "pointer to nonexistent stripe %llu",
-+				    (u64) p.idx);
-+		return -EIO;
-+	}
-+
-+	BUG_ON(m->r.e.data_type != data_type);
-+
-+	*nr_data	= m->nr_blocks - m->nr_redundant;
-+	*nr_parity	= m->nr_redundant;
-+	*r = m->r;
-+
-+	old = m->block_sectors[p.block];
-+	m->block_sectors[p.block] += sectors;
-+	new = m->block_sectors[p.block];
-+
-+	blocks_nonempty_delta = (int) !!new - (int) !!old;
-+	if (blocks_nonempty_delta) {
-+		m->blocks_nonempty += blocks_nonempty_delta;
-+
-+		if (!gc)
-+			bch2_stripes_heap_update(c, m, p.idx);
-+	}
-+
-+	m->dirty = true;
-+
-+	spin_unlock(&c->ec_stripes_heap_lock);
-+
-+	return 0;
-+}
-+
-+static int bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
-+			    unsigned offset, s64 sectors,
-+			    enum bch_data_type data_type,
-+			    struct bch_fs_usage *fs_usage,
-+			    unsigned journal_seq, unsigned flags)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const union bch_extent_entry *entry;
-+	struct extent_ptr_decoded p;
-+	struct bch_replicas_padded r;
-+	s64 dirty_sectors = 0;
-+	bool stale;
-+	int ret;
-+
-+	r.e.data_type	= data_type;
-+	r.e.nr_devs	= 0;
-+	r.e.nr_required	= 1;
-+
-+	BUG_ON(!sectors);
-+
-+	bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
-+		s64 disk_sectors = data_type == BCH_DATA_BTREE
-+			? sectors
-+			: ptr_disk_sectors_delta(p, offset, sectors, flags);
-+
-+		ret = bch2_mark_pointer(c, k, p, disk_sectors, data_type,
-+					fs_usage, journal_seq, flags);
-+		if (ret < 0)
-+			return ret;
-+
-+		stale = ret > 0;
-+
-+		if (p.ptr.cached) {
-+			if (!stale)
-+				update_cached_sectors(c, fs_usage, p.ptr.dev,
-+						      disk_sectors);
-+		} else if (!p.has_ec) {
-+			dirty_sectors	       += disk_sectors;
-+			r.e.devs[r.e.nr_devs++]	= p.ptr.dev;
-+		} else {
-+			struct bch_replicas_padded ec_r;
-+			unsigned nr_data, nr_parity;
-+			s64 parity_sectors;
-+
-+			ret = bch2_mark_stripe_ptr(c, p.ec, data_type,
-+					fs_usage, disk_sectors, flags,
-+					&ec_r, &nr_data, &nr_parity);
-+			if (ret)
-+				return ret;
-+
-+			parity_sectors =
-+				__ptr_disk_sectors_delta(p.crc.live_size,
-+					offset, sectors, flags,
-+					p.crc.compressed_size * nr_parity,
-+					p.crc.uncompressed_size * nr_data);
-+
-+			update_replicas(c, fs_usage, &ec_r.e,
-+					disk_sectors + parity_sectors);
-+
-+			/*
-+			 * There may be other dirty pointers in this extent, but
-+			 * if so they're not required for mounting if we have an
-+			 * erasure coded pointer in this extent:
-+			 */
-+			r.e.nr_required = 0;
-+		}
-+	}
-+
-+	if (r.e.nr_devs)
-+		update_replicas(c, fs_usage, &r.e, dirty_sectors);
-+
-+	return 0;
-+}
-+
-+static int bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k,
-+			    struct bch_fs_usage *fs_usage,
-+			    u64 journal_seq, unsigned flags)
-+{
-+	bool gc = flags & BTREE_TRIGGER_GC;
-+	struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
-+	size_t idx = s.k->p.offset;
-+	struct stripe *m = genradix_ptr(&c->stripes[gc], idx);
-+	unsigned i;
-+
-+	spin_lock(&c->ec_stripes_heap_lock);
-+
-+	if (!m || ((flags & BTREE_TRIGGER_OVERWRITE) && !m->alive)) {
-+		spin_unlock(&c->ec_stripes_heap_lock);
-+		bch_err_ratelimited(c, "error marking nonexistent stripe %zu",
-+				    idx);
-+		return -1;
-+	}
-+
-+	if (!(flags & BTREE_TRIGGER_OVERWRITE)) {
-+		m->sectors	= le16_to_cpu(s.v->sectors);
-+		m->algorithm	= s.v->algorithm;
-+		m->nr_blocks	= s.v->nr_blocks;
-+		m->nr_redundant	= s.v->nr_redundant;
-+
-+		bch2_bkey_to_replicas(&m->r.e, k);
-+
-+		/*
-+		 * XXX: account for stripes somehow here
-+		 */
-+#if 0
-+		update_replicas(c, fs_usage, &m->r.e, stripe_sectors);
-+#endif
-+
-+		/* gc recalculates these fields: */
-+		if (!(flags & BTREE_TRIGGER_GC)) {
-+			for (i = 0; i < s.v->nr_blocks; i++) {
-+				m->block_sectors[i] =
-+					stripe_blockcount_get(s.v, i);
-+				m->blocks_nonempty += !!m->block_sectors[i];
-+			}
-+		}
-+
-+		if (!gc)
-+			bch2_stripes_heap_update(c, m, idx);
-+		m->alive	= true;
-+	} else {
-+		if (!gc)
-+			bch2_stripes_heap_del(c, m, idx);
-+		memset(m, 0, sizeof(*m));
-+	}
-+
-+	spin_unlock(&c->ec_stripes_heap_lock);
-+
-+	bucket_set_stripe(c, s.v, fs_usage, 0, flags);
-+	return 0;
-+}
-+
-+static int bch2_mark_key_locked(struct bch_fs *c,
-+		   struct bkey_s_c k,
-+		   unsigned offset, s64 sectors,
-+		   struct bch_fs_usage *fs_usage,
-+		   u64 journal_seq, unsigned flags)
-+{
-+	int ret = 0;
-+
-+	preempt_disable();
-+
-+	if (!fs_usage || (flags & BTREE_TRIGGER_GC))
-+		fs_usage = fs_usage_ptr(c, journal_seq,
-+					flags & BTREE_TRIGGER_GC);
-+
-+	switch (k.k->type) {
-+	case KEY_TYPE_alloc:
-+		ret = bch2_mark_alloc(c, k, fs_usage, journal_seq, flags);
-+		break;
-+	case KEY_TYPE_btree_ptr:
-+	case KEY_TYPE_btree_ptr_v2:
-+		sectors = !(flags & BTREE_TRIGGER_OVERWRITE)
-+			?  c->opts.btree_node_size
-+			: -c->opts.btree_node_size;
-+
-+		ret = bch2_mark_extent(c, k, offset, sectors, BCH_DATA_BTREE,
-+				fs_usage, journal_seq, flags);
-+		break;
-+	case KEY_TYPE_extent:
-+	case KEY_TYPE_reflink_v:
-+		ret = bch2_mark_extent(c, k, offset, sectors, BCH_DATA_USER,
-+				fs_usage, journal_seq, flags);
-+		break;
-+	case KEY_TYPE_stripe:
-+		ret = bch2_mark_stripe(c, k, fs_usage, journal_seq, flags);
-+		break;
-+	case KEY_TYPE_inode:
-+		if (!(flags & BTREE_TRIGGER_OVERWRITE))
-+			fs_usage->nr_inodes++;
-+		else
-+			fs_usage->nr_inodes--;
-+		break;
-+	case KEY_TYPE_reservation: {
-+		unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
-+
-+		sectors *= replicas;
-+		replicas = clamp_t(unsigned, replicas, 1,
-+				   ARRAY_SIZE(fs_usage->persistent_reserved));
-+
-+		fs_usage->reserved				+= sectors;
-+		fs_usage->persistent_reserved[replicas - 1]	+= sectors;
-+		break;
-+	}
-+	}
-+
-+	preempt_enable();
-+
-+	return ret;
-+}
-+
-+int bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
-+		  unsigned offset, s64 sectors,
-+		  struct bch_fs_usage *fs_usage,
-+		  u64 journal_seq, unsigned flags)
-+{
-+	int ret;
-+
-+	percpu_down_read(&c->mark_lock);
-+	ret = bch2_mark_key_locked(c, k, offset, sectors,
-+				   fs_usage, journal_seq, flags);
-+	percpu_up_read(&c->mark_lock);
-+
-+	return ret;
-+}
-+
-+inline int bch2_mark_overwrite(struct btree_trans *trans,
-+			       struct btree_iter *iter,
-+			       struct bkey_s_c old,
-+			       struct bkey_i *new,
-+			       struct bch_fs_usage *fs_usage,
-+			       unsigned flags,
-+			       bool is_extents)
-+{
-+	struct bch_fs		*c = trans->c;
-+	unsigned		offset = 0;
-+	s64			sectors = -((s64) old.k->size);
-+
-+	flags |= BTREE_TRIGGER_OVERWRITE;
-+
-+	if (is_extents
-+	    ? bkey_cmp(new->k.p, bkey_start_pos(old.k)) <= 0
-+	    : bkey_cmp(new->k.p, old.k->p))
-+		return 0;
-+
-+	if (is_extents) {
-+		switch (bch2_extent_overlap(&new->k, old.k)) {
-+		case BCH_EXTENT_OVERLAP_ALL:
-+			offset = 0;
-+			sectors = -((s64) old.k->size);
-+			break;
-+		case BCH_EXTENT_OVERLAP_BACK:
-+			offset = bkey_start_offset(&new->k) -
-+				bkey_start_offset(old.k);
-+			sectors = bkey_start_offset(&new->k) -
-+				old.k->p.offset;
-+			break;
-+		case BCH_EXTENT_OVERLAP_FRONT:
-+			offset = 0;
-+			sectors = bkey_start_offset(old.k) -
-+				new->k.p.offset;
-+			break;
-+		case BCH_EXTENT_OVERLAP_MIDDLE:
-+			offset = bkey_start_offset(&new->k) -
-+				bkey_start_offset(old.k);
-+			sectors = -((s64) new->k.size);
-+			flags |= BTREE_TRIGGER_OVERWRITE_SPLIT;
-+			break;
-+		}
-+
-+		BUG_ON(sectors >= 0);
-+	}
-+
-+	return bch2_mark_key_locked(c, old, offset, sectors, fs_usage,
-+				    trans->journal_res.seq, flags) ?: 1;
-+}
-+
-+int bch2_mark_update(struct btree_trans *trans,
-+		     struct btree_iter *iter,
-+		     struct bkey_i *insert,
-+		     struct bch_fs_usage *fs_usage,
-+		     unsigned flags)
-+{
-+	struct bch_fs		*c = trans->c;
-+	struct btree		*b = iter_l(iter)->b;
-+	struct btree_node_iter	node_iter = iter_l(iter)->iter;
-+	struct bkey_packed	*_k;
-+	int ret = 0;
-+
-+	if (unlikely(flags & BTREE_TRIGGER_NORUN))
-+		return 0;
-+
-+	if (!btree_node_type_needs_gc(iter->btree_id))
-+		return 0;
-+
-+	bch2_mark_key_locked(c, bkey_i_to_s_c(insert),
-+		0, insert->k.size,
-+		fs_usage, trans->journal_res.seq,
-+		BTREE_TRIGGER_INSERT|flags);
-+
-+	if (unlikely(flags & BTREE_TRIGGER_NOOVERWRITES))
-+		return 0;
-+
-+	/*
-+	 * For non extents, we only mark the new key, not the key being
-+	 * overwritten - unless we're actually deleting:
-+	 */
-+	if ((iter->btree_id == BTREE_ID_ALLOC ||
-+	     iter->btree_id == BTREE_ID_EC) &&
-+	    !bkey_deleted(&insert->k))
-+		return 0;
-+
-+	while ((_k = bch2_btree_node_iter_peek(&node_iter, b))) {
-+		struct bkey		unpacked;
-+		struct bkey_s_c		k = bkey_disassemble(b, _k, &unpacked);
-+
-+		ret = bch2_mark_overwrite(trans, iter, k, insert,
-+					  fs_usage, flags,
-+					  btree_node_type_is_extents(iter->btree_id));
-+		if (ret <= 0)
-+			break;
-+
-+		bch2_btree_node_iter_advance(&node_iter, b);
-+	}
-+
-+	return ret;
-+}
-+
-+void bch2_trans_fs_usage_apply(struct btree_trans *trans,
-+			       struct bch_fs_usage *fs_usage)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct btree_insert_entry *i;
-+	static int warned_disk_usage = 0;
-+	u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0;
-+	char buf[200];
-+
-+	if (!bch2_fs_usage_apply(c, fs_usage, trans->disk_res,
-+				 trans->journal_res.seq) ||
-+	    warned_disk_usage ||
-+	    xchg(&warned_disk_usage, 1))
-+		return;
-+
-+	bch_err(c, "disk usage increased more than %llu sectors reserved",
-+		disk_res_sectors);
-+
-+	trans_for_each_update(trans, i) {
-+		pr_err("while inserting");
-+		bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(i->k));
-+		pr_err("%s", buf);
-+		pr_err("overlapping with");
-+
-+		if (btree_iter_type(i->iter) != BTREE_ITER_CACHED) {
-+			struct btree		*b = iter_l(i->iter)->b;
-+			struct btree_node_iter	node_iter = iter_l(i->iter)->iter;
-+			struct bkey_packed	*_k;
-+
-+			while ((_k = bch2_btree_node_iter_peek(&node_iter, b))) {
-+				struct bkey		unpacked;
-+				struct bkey_s_c		k;
-+
-+				pr_info("_k %px format %u", _k, _k->format);
-+				k = bkey_disassemble(b, _k, &unpacked);
-+
-+				if (btree_node_is_extents(b)
-+				    ? bkey_cmp(i->k->k.p, bkey_start_pos(k.k)) <= 0
-+				    : bkey_cmp(i->k->k.p, k.k->p))
-+					break;
-+
-+				bch2_bkey_val_to_text(&PBUF(buf), c, k);
-+				pr_err("%s", buf);
-+
-+				bch2_btree_node_iter_advance(&node_iter, b);
-+			}
-+		} else {
-+			struct bkey_cached *ck = (void *) i->iter->l[0].b;
-+
-+			bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(ck->k));
-+			pr_err("%s", buf);
-+		}
-+	}
-+}
-+
-+/* trans_mark: */
-+
-+static struct btree_iter *trans_get_update(struct btree_trans *trans,
-+			    enum btree_id btree_id, struct bpos pos,
-+			    struct bkey_s_c *k)
-+{
-+	struct btree_insert_entry *i;
-+
-+	trans_for_each_update(trans, i)
-+		if (i->iter->btree_id == btree_id &&
-+		    (btree_node_type_is_extents(btree_id)
-+		     ? bkey_cmp(pos, bkey_start_pos(&i->k->k)) >= 0 &&
-+		       bkey_cmp(pos, i->k->k.p) < 0
-+		     : !bkey_cmp(pos, i->iter->pos))) {
-+			*k = bkey_i_to_s_c(i->k);
-+			return i->iter;
-+		}
-+
-+	return NULL;
-+}
-+
-+static int trans_get_key(struct btree_trans *trans,
-+			 enum btree_id btree_id, struct bpos pos,
-+			 struct btree_iter **iter,
-+			 struct bkey_s_c *k)
-+{
-+	unsigned flags = btree_id != BTREE_ID_ALLOC
-+		? BTREE_ITER_SLOTS
-+		: BTREE_ITER_CACHED;
-+	int ret;
-+
-+	*iter = trans_get_update(trans, btree_id, pos, k);
-+	if (*iter)
-+		return 1;
-+
-+	*iter = bch2_trans_get_iter(trans, btree_id, pos,
-+				    flags|BTREE_ITER_INTENT);
-+	if (IS_ERR(*iter))
-+		return PTR_ERR(*iter);
-+
-+	*k = __bch2_btree_iter_peek(*iter, flags);
-+	ret = bkey_err(*k);
-+	if (ret)
-+		bch2_trans_iter_put(trans, *iter);
-+	return ret;
-+}
-+
-+static int bch2_trans_mark_pointer(struct btree_trans *trans,
-+			struct bkey_s_c k, struct extent_ptr_decoded p,
-+			s64 sectors, enum bch_data_type data_type)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
-+	struct bpos pos = POS(p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr));
-+	struct btree_iter *iter;
-+	struct bkey_s_c k_a;
-+	struct bkey_alloc_unpacked u;
-+	struct bkey_i_alloc *a;
-+	struct bucket *g;
-+	int ret;
-+
-+	iter = trans_get_update(trans, BTREE_ID_ALLOC, pos, &k_a);
-+	if (iter) {
-+		u = bch2_alloc_unpack(k_a);
-+	} else {
-+		iter = bch2_trans_get_iter(trans, BTREE_ID_ALLOC, pos,
-+					   BTREE_ITER_CACHED|
-+					   BTREE_ITER_CACHED_NOFILL|
-+					   BTREE_ITER_INTENT);
-+		if (IS_ERR(iter))
-+			return PTR_ERR(iter);
-+
-+		ret = bch2_btree_iter_traverse(iter);
-+		if (ret)
-+			goto out;
-+
-+		percpu_down_read(&c->mark_lock);
-+		g = bucket(ca, pos.offset);
-+		u = alloc_mem_to_key(g, READ_ONCE(g->mark));
-+		percpu_up_read(&c->mark_lock);
-+	}
-+
-+	ret = __mark_pointer(c, k, p, sectors, data_type, u.gen, &u.data_type,
-+			     &u.dirty_sectors, &u.cached_sectors);
-+	if (ret)
-+		goto out;
-+
-+	a = bch2_trans_kmalloc(trans, BKEY_ALLOC_U64s_MAX * 8);
-+	ret = PTR_ERR_OR_ZERO(a);
-+	if (ret)
-+		goto out;
-+
-+	bkey_alloc_init(&a->k_i);
-+	a->k.p = pos;
-+	bch2_alloc_pack(a, u);
-+	bch2_trans_update(trans, iter, &a->k_i, 0);
-+out:
-+	bch2_trans_iter_put(trans, iter);
-+	return ret;
-+}
-+
-+static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
-+			struct bch_extent_stripe_ptr p,
-+			s64 sectors, enum bch_data_type data_type,
-+			struct bch_replicas_padded *r,
-+			unsigned *nr_data,
-+			unsigned *nr_parity)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct bkey_i_stripe *s;
-+	int ret = 0;
-+
-+	ret = trans_get_key(trans, BTREE_ID_EC, POS(0, p.idx), &iter, &k);
-+	if (ret < 0)
-+		return ret;
-+
-+	if (k.k->type != KEY_TYPE_stripe) {
-+		bch2_fs_inconsistent(c,
-+			"pointer to nonexistent stripe %llu",
-+			(u64) p.idx);
-+		ret = -EIO;
-+		goto out;
-+	}
-+
-+	s = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
-+	ret = PTR_ERR_OR_ZERO(s);
-+	if (ret)
-+		goto out;
-+
-+	bkey_reassemble(&s->k_i, k);
-+
-+	stripe_blockcount_set(&s->v, p.block,
-+		stripe_blockcount_get(&s->v, p.block) +
-+		sectors);
-+
-+	*nr_data	= s->v.nr_blocks - s->v.nr_redundant;
-+	*nr_parity	= s->v.nr_redundant;
-+	bch2_bkey_to_replicas(&r->e, bkey_i_to_s_c(&s->k_i));
-+	bch2_trans_update(trans, iter, &s->k_i, 0);
-+out:
-+	bch2_trans_iter_put(trans, iter);
-+	return ret;
-+}
-+
-+static int bch2_trans_mark_extent(struct btree_trans *trans,
-+			struct bkey_s_c k, unsigned offset,
-+			s64 sectors, unsigned flags,
-+			enum bch_data_type data_type)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const union bch_extent_entry *entry;
-+	struct extent_ptr_decoded p;
-+	struct bch_replicas_padded r;
-+	s64 dirty_sectors = 0;
-+	bool stale;
-+	int ret;
-+
-+	r.e.data_type	= data_type;
-+	r.e.nr_devs	= 0;
-+	r.e.nr_required	= 1;
-+
-+	BUG_ON(!sectors);
-+
-+	bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
-+		s64 disk_sectors = data_type == BCH_DATA_BTREE
-+			? sectors
-+			: ptr_disk_sectors_delta(p, offset, sectors, flags);
-+
-+		ret = bch2_trans_mark_pointer(trans, k, p, disk_sectors,
-+					      data_type);
-+		if (ret < 0)
-+			return ret;
-+
-+		stale = ret > 0;
-+
-+		if (p.ptr.cached) {
-+			if (!stale)
-+				update_cached_sectors_list(trans, p.ptr.dev,
-+							   disk_sectors);
-+		} else if (!p.has_ec) {
-+			dirty_sectors	       += disk_sectors;
-+			r.e.devs[r.e.nr_devs++]	= p.ptr.dev;
-+		} else {
-+			struct bch_replicas_padded ec_r;
-+			unsigned nr_data, nr_parity;
-+			s64 parity_sectors;
-+
-+			ret = bch2_trans_mark_stripe_ptr(trans, p.ec,
-+					disk_sectors, data_type,
-+					&ec_r, &nr_data, &nr_parity);
-+			if (ret)
-+				return ret;
-+
-+			parity_sectors =
-+				__ptr_disk_sectors_delta(p.crc.live_size,
-+					offset, sectors, flags,
-+					p.crc.compressed_size * nr_parity,
-+					p.crc.uncompressed_size * nr_data);
-+
-+			update_replicas_list(trans, &ec_r.e,
-+					     disk_sectors + parity_sectors);
-+
-+			r.e.nr_required = 0;
-+		}
-+	}
-+
-+	if (r.e.nr_devs)
-+		update_replicas_list(trans, &r.e, dirty_sectors);
-+
-+	return 0;
-+}
-+
-+static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
-+			struct bkey_s_c_reflink_p p,
-+			u64 idx, unsigned sectors,
-+			unsigned flags)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct bkey_i_reflink_v *r_v;
-+	s64 ret;
-+
-+	ret = trans_get_key(trans, BTREE_ID_REFLINK,
-+			    POS(0, idx), &iter, &k);
-+	if (ret < 0)
-+		return ret;
-+
-+	if (k.k->type != KEY_TYPE_reflink_v) {
-+		bch2_fs_inconsistent(c,
-+			"%llu:%llu len %u points to nonexistent indirect extent %llu",
-+			p.k->p.inode, p.k->p.offset, p.k->size, idx);
-+		ret = -EIO;
-+		goto err;
-+	}
-+
-+	if ((flags & BTREE_TRIGGER_OVERWRITE) &&
-+	    (bkey_start_offset(k.k) < idx ||
-+	     k.k->p.offset > idx + sectors))
-+		goto out;
-+
-+	sectors = k.k->p.offset - idx;
-+
-+	r_v = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
-+	ret = PTR_ERR_OR_ZERO(r_v);
-+	if (ret)
-+		goto err;
-+
-+	bkey_reassemble(&r_v->k_i, k);
-+
-+	le64_add_cpu(&r_v->v.refcount,
-+		     !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1);
-+
-+	if (!r_v->v.refcount) {
-+		r_v->k.type = KEY_TYPE_deleted;
-+		set_bkey_val_u64s(&r_v->k, 0);
-+	}
-+
-+	bch2_btree_iter_set_pos(iter, bkey_start_pos(k.k));
-+	BUG_ON(iter->uptodate > BTREE_ITER_NEED_PEEK);
-+
-+	bch2_trans_update(trans, iter, &r_v->k_i, 0);
-+out:
-+	ret = sectors;
-+err:
-+	bch2_trans_iter_put(trans, iter);
-+	return ret;
-+}
-+
-+static int bch2_trans_mark_reflink_p(struct btree_trans *trans,
-+			struct bkey_s_c_reflink_p p, unsigned offset,
-+			s64 sectors, unsigned flags)
-+{
-+	u64 idx = le64_to_cpu(p.v->idx) + offset;
-+	s64 ret = 0;
-+
-+	sectors = abs(sectors);
-+	BUG_ON(offset + sectors > p.k->size);
-+
-+	while (sectors) {
-+		ret = __bch2_trans_mark_reflink_p(trans, p, idx, sectors, flags);
-+		if (ret < 0)
-+			break;
-+
-+		idx += ret;
-+		sectors = max_t(s64, 0LL, sectors - ret);
-+		ret = 0;
-+	}
-+
-+	return ret;
-+}
-+
-+int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k,
-+			unsigned offset, s64 sectors, unsigned flags)
-+{
-+	struct replicas_delta_list *d;
-+	struct bch_fs *c = trans->c;
-+
-+	switch (k.k->type) {
-+	case KEY_TYPE_btree_ptr:
-+	case KEY_TYPE_btree_ptr_v2:
-+		sectors = !(flags & BTREE_TRIGGER_OVERWRITE)
-+			?  c->opts.btree_node_size
-+			: -c->opts.btree_node_size;
-+
-+		return bch2_trans_mark_extent(trans, k, offset, sectors,
-+					      flags, BCH_DATA_BTREE);
-+	case KEY_TYPE_extent:
-+	case KEY_TYPE_reflink_v:
-+		return bch2_trans_mark_extent(trans, k, offset, sectors,
-+					      flags, BCH_DATA_USER);
-+	case KEY_TYPE_inode:
-+		d = replicas_deltas_realloc(trans, 0);
-+
-+		if (!(flags & BTREE_TRIGGER_OVERWRITE))
-+			d->nr_inodes++;
-+		else
-+			d->nr_inodes--;
-+		return 0;
-+	case KEY_TYPE_reservation: {
-+		unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
-+
-+		d = replicas_deltas_realloc(trans, 0);
-+
-+		sectors *= replicas;
-+		replicas = clamp_t(unsigned, replicas, 1,
-+				   ARRAY_SIZE(d->persistent_reserved));
-+
-+		d->persistent_reserved[replicas - 1] += sectors;
-+		return 0;
-+	}
-+	case KEY_TYPE_reflink_p:
-+		return bch2_trans_mark_reflink_p(trans,
-+					bkey_s_c_to_reflink_p(k),
-+					offset, sectors, flags);
-+	default:
-+		return 0;
-+	}
-+}
-+
-+int bch2_trans_mark_update(struct btree_trans *trans,
-+			   struct btree_iter *iter,
-+			   struct bkey_i *insert,
-+			   unsigned flags)
-+{
-+	struct btree		*b = iter_l(iter)->b;
-+	struct btree_node_iter	node_iter = iter_l(iter)->iter;
-+	struct bkey_packed	*_k;
-+	int ret;
-+
-+	if (unlikely(flags & BTREE_TRIGGER_NORUN))
-+		return 0;
-+
-+	if (!btree_node_type_needs_gc(iter->btree_id))
-+		return 0;
-+
-+	ret = bch2_trans_mark_key(trans, bkey_i_to_s_c(insert),
-+			0, insert->k.size, BTREE_TRIGGER_INSERT);
-+	if (ret)
-+		return ret;
-+
-+	if (unlikely(flags & BTREE_TRIGGER_NOOVERWRITES))
-+		return 0;
-+
-+	if (btree_iter_type(iter) == BTREE_ITER_CACHED) {
-+		struct bkey_cached *ck = (void *) iter->l[0].b;
-+
-+		return bch2_trans_mark_key(trans, bkey_i_to_s_c(ck->k),
-+					   0, 0, BTREE_TRIGGER_OVERWRITE);
-+	}
-+
-+	while ((_k = bch2_btree_node_iter_peek(&node_iter, b))) {
-+		struct bkey		unpacked;
-+		struct bkey_s_c		k;
-+		unsigned		offset = 0;
-+		s64			sectors = 0;
-+		unsigned		flags = BTREE_TRIGGER_OVERWRITE;
-+
-+		k = bkey_disassemble(b, _k, &unpacked);
-+
-+		if (btree_node_is_extents(b)
-+		    ? bkey_cmp(insert->k.p, bkey_start_pos(k.k)) <= 0
-+		    : bkey_cmp(insert->k.p, k.k->p))
-+			break;
-+
-+		if (btree_node_is_extents(b)) {
-+			switch (bch2_extent_overlap(&insert->k, k.k)) {
-+			case BCH_EXTENT_OVERLAP_ALL:
-+				offset = 0;
-+				sectors = -((s64) k.k->size);
-+				break;
-+			case BCH_EXTENT_OVERLAP_BACK:
-+				offset = bkey_start_offset(&insert->k) -
-+					bkey_start_offset(k.k);
-+				sectors = bkey_start_offset(&insert->k) -
-+					k.k->p.offset;
-+				break;
-+			case BCH_EXTENT_OVERLAP_FRONT:
-+				offset = 0;
-+				sectors = bkey_start_offset(k.k) -
-+					insert->k.p.offset;
-+				break;
-+			case BCH_EXTENT_OVERLAP_MIDDLE:
-+				offset = bkey_start_offset(&insert->k) -
-+					bkey_start_offset(k.k);
-+				sectors = -((s64) insert->k.size);
-+				flags |= BTREE_TRIGGER_OVERWRITE_SPLIT;
-+				break;
-+			}
-+
-+			BUG_ON(sectors >= 0);
-+		}
-+
-+		ret = bch2_trans_mark_key(trans, k, offset, sectors, flags);
-+		if (ret)
-+			return ret;
-+
-+		bch2_btree_node_iter_advance(&node_iter, b);
-+	}
-+
-+	return 0;
-+}
-+
-+/* Disk reservations: */
-+
-+static u64 bch2_recalc_sectors_available(struct bch_fs *c)
-+{
-+	percpu_u64_set(&c->pcpu->sectors_available, 0);
-+
-+	return avail_factor(__bch2_fs_usage_read_short(c).free);
-+}
-+
-+void __bch2_disk_reservation_put(struct bch_fs *c, struct disk_reservation *res)
-+{
-+	percpu_down_read(&c->mark_lock);
-+	this_cpu_sub(c->usage[0]->online_reserved,
-+		     res->sectors);
-+	percpu_up_read(&c->mark_lock);
-+
-+	res->sectors = 0;
-+}
-+
-+#define SECTORS_CACHE	1024
-+
-+int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
-+			      unsigned sectors, int flags)
-+{
-+	struct bch_fs_pcpu *pcpu;
-+	u64 old, v, get;
-+	s64 sectors_available;
-+	int ret;
-+
-+	percpu_down_read(&c->mark_lock);
-+	preempt_disable();
-+	pcpu = this_cpu_ptr(c->pcpu);
-+
-+	if (sectors <= pcpu->sectors_available)
-+		goto out;
-+
-+	v = atomic64_read(&c->sectors_available);
-+	do {
-+		old = v;
-+		get = min((u64) sectors + SECTORS_CACHE, old);
-+
-+		if (get < sectors) {
-+			preempt_enable();
-+			percpu_up_read(&c->mark_lock);
-+			goto recalculate;
-+		}
-+	} while ((v = atomic64_cmpxchg(&c->sectors_available,
-+				       old, old - get)) != old);
-+
-+	pcpu->sectors_available		+= get;
-+
-+out:
-+	pcpu->sectors_available		-= sectors;
-+	this_cpu_add(c->usage[0]->online_reserved, sectors);
-+	res->sectors			+= sectors;
-+
-+	preempt_enable();
-+	percpu_up_read(&c->mark_lock);
-+	return 0;
-+
-+recalculate:
-+	percpu_down_write(&c->mark_lock);
-+
-+	sectors_available = bch2_recalc_sectors_available(c);
-+
-+	if (sectors <= sectors_available ||
-+	    (flags & BCH_DISK_RESERVATION_NOFAIL)) {
-+		atomic64_set(&c->sectors_available,
-+			     max_t(s64, 0, sectors_available - sectors));
-+		this_cpu_add(c->usage[0]->online_reserved, sectors);
-+		res->sectors			+= sectors;
-+		ret = 0;
-+	} else {
-+		atomic64_set(&c->sectors_available, sectors_available);
-+		ret = -ENOSPC;
-+	}
-+
-+	percpu_up_write(&c->mark_lock);
-+
-+	return ret;
-+}
-+
-+/* Startup/shutdown: */
-+
-+static void buckets_free_rcu(struct rcu_head *rcu)
-+{
-+	struct bucket_array *buckets =
-+		container_of(rcu, struct bucket_array, rcu);
-+
-+	kvpfree(buckets,
-+		sizeof(struct bucket_array) +
-+		buckets->nbuckets * sizeof(struct bucket));
-+}
-+
-+int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
-+{
-+	struct bucket_array *buckets = NULL, *old_buckets = NULL;
-+	unsigned long *buckets_nouse = NULL;
-+	alloc_fifo	free[RESERVE_NR];
-+	alloc_fifo	free_inc;
-+	alloc_heap	alloc_heap;
-+	copygc_heap	copygc_heap;
-+
-+	size_t btree_reserve	= DIV_ROUND_UP(BTREE_NODE_RESERVE,
-+			     ca->mi.bucket_size / c->opts.btree_node_size);
-+	/* XXX: these should be tunable */
-+	size_t reserve_none	= max_t(size_t, 1, nbuckets >> 9);
-+	size_t copygc_reserve	= max_t(size_t, 2, nbuckets >> 7);
-+	size_t free_inc_nr	= max(max_t(size_t, 1, nbuckets >> 12),
-+				      btree_reserve * 2);
-+	bool resize = ca->buckets[0] != NULL,
-+	     start_copygc = ca->copygc_thread != NULL;
-+	int ret = -ENOMEM;
-+	unsigned i;
-+
-+	memset(&free,		0, sizeof(free));
-+	memset(&free_inc,	0, sizeof(free_inc));
-+	memset(&alloc_heap,	0, sizeof(alloc_heap));
-+	memset(&copygc_heap,	0, sizeof(copygc_heap));
-+
-+	if (!(buckets		= kvpmalloc(sizeof(struct bucket_array) +
-+					    nbuckets * sizeof(struct bucket),
-+					    GFP_KERNEL|__GFP_ZERO)) ||
-+	    !(buckets_nouse	= kvpmalloc(BITS_TO_LONGS(nbuckets) *
-+					    sizeof(unsigned long),
-+					    GFP_KERNEL|__GFP_ZERO)) ||
-+	    !init_fifo(&free[RESERVE_BTREE], btree_reserve, GFP_KERNEL) ||
-+	    !init_fifo(&free[RESERVE_MOVINGGC],
-+		       copygc_reserve, GFP_KERNEL) ||
-+	    !init_fifo(&free[RESERVE_NONE], reserve_none, GFP_KERNEL) ||
-+	    !init_fifo(&free_inc,	free_inc_nr, GFP_KERNEL) ||
-+	    !init_heap(&alloc_heap,	ALLOC_SCAN_BATCH(ca) << 1, GFP_KERNEL) ||
-+	    !init_heap(&copygc_heap,	copygc_reserve, GFP_KERNEL))
-+		goto err;
-+
-+	buckets->first_bucket	= ca->mi.first_bucket;
-+	buckets->nbuckets	= nbuckets;
-+
-+	bch2_copygc_stop(ca);
-+
-+	if (resize) {
-+		down_write(&c->gc_lock);
-+		down_write(&ca->bucket_lock);
-+		percpu_down_write(&c->mark_lock);
-+	}
-+
-+	old_buckets = bucket_array(ca);
-+
-+	if (resize) {
-+		size_t n = min(buckets->nbuckets, old_buckets->nbuckets);
-+
-+		memcpy(buckets->b,
-+		       old_buckets->b,
-+		       n * sizeof(struct bucket));
-+		memcpy(buckets_nouse,
-+		       ca->buckets_nouse,
-+		       BITS_TO_LONGS(n) * sizeof(unsigned long));
-+	}
-+
-+	rcu_assign_pointer(ca->buckets[0], buckets);
-+	buckets = old_buckets;
-+
-+	swap(ca->buckets_nouse, buckets_nouse);
-+
-+	if (resize) {
-+		percpu_up_write(&c->mark_lock);
-+		up_write(&c->gc_lock);
-+	}
-+
-+	spin_lock(&c->freelist_lock);
-+	for (i = 0; i < RESERVE_NR; i++) {
-+		fifo_move(&free[i], &ca->free[i]);
-+		swap(ca->free[i], free[i]);
-+	}
-+	fifo_move(&free_inc, &ca->free_inc);
-+	swap(ca->free_inc, free_inc);
-+	spin_unlock(&c->freelist_lock);
-+
-+	/* with gc lock held, alloc_heap can't be in use: */
-+	swap(ca->alloc_heap, alloc_heap);
-+
-+	/* and we shut down copygc: */
-+	swap(ca->copygc_heap, copygc_heap);
-+
-+	nbuckets = ca->mi.nbuckets;
-+
-+	if (resize)
-+		up_write(&ca->bucket_lock);
-+
-+	if (start_copygc &&
-+	    bch2_copygc_start(c, ca))
-+		bch_err(ca, "error restarting copygc thread");
-+
-+	ret = 0;
-+err:
-+	free_heap(&copygc_heap);
-+	free_heap(&alloc_heap);
-+	free_fifo(&free_inc);
-+	for (i = 0; i < RESERVE_NR; i++)
-+		free_fifo(&free[i]);
-+	kvpfree(buckets_nouse,
-+		BITS_TO_LONGS(nbuckets) * sizeof(unsigned long));
-+	if (buckets)
-+		call_rcu(&old_buckets->rcu, buckets_free_rcu);
-+
-+	return ret;
-+}
-+
-+void bch2_dev_buckets_free(struct bch_dev *ca)
-+{
-+	unsigned i;
-+
-+	free_heap(&ca->copygc_heap);
-+	free_heap(&ca->alloc_heap);
-+	free_fifo(&ca->free_inc);
-+	for (i = 0; i < RESERVE_NR; i++)
-+		free_fifo(&ca->free[i]);
-+	kvpfree(ca->buckets_nouse,
-+		BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long));
-+	kvpfree(rcu_dereference_protected(ca->buckets[0], 1),
-+		sizeof(struct bucket_array) +
-+		ca->mi.nbuckets * sizeof(struct bucket));
-+
-+	free_percpu(ca->usage[0]);
-+}
-+
-+int bch2_dev_buckets_alloc(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	if (!(ca->usage[0] = alloc_percpu(struct bch_dev_usage)))
-+		return -ENOMEM;
-+
-+	return bch2_dev_buckets_resize(c, ca, ca->mi.nbuckets);;
-+}
-diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h
-new file mode 100644
-index 000000000000..97265fe90e96
---- /dev/null
-+++ b/fs/bcachefs/buckets.h
-@@ -0,0 +1,327 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+/*
-+ * Code for manipulating bucket marks for garbage collection.
-+ *
-+ * Copyright 2014 Datera, Inc.
-+ */
-+
-+#ifndef _BUCKETS_H
-+#define _BUCKETS_H
-+
-+#include "buckets_types.h"
-+#include "super.h"
-+
-+#define for_each_bucket(_b, _buckets)				\
-+	for (_b = (_buckets)->b + (_buckets)->first_bucket;	\
-+	     _b < (_buckets)->b + (_buckets)->nbuckets; _b++)
-+
-+#define bucket_cmpxchg(g, new, expr)				\
-+({								\
-+	struct bucket *_g = g;					\
-+	u64 _v = atomic64_read(&(g)->_mark.v);			\
-+	struct bucket_mark _old;				\
-+								\
-+	do {							\
-+		(new).v.counter = _old.v.counter = _v;		\
-+		expr;						\
-+	} while ((_v = atomic64_cmpxchg(&(_g)->_mark.v,		\
-+			       _old.v.counter,			\
-+			       (new).v.counter)) != _old.v.counter);\
-+	_old;							\
-+})
-+
-+static inline struct bucket_array *__bucket_array(struct bch_dev *ca,
-+						  bool gc)
-+{
-+	return rcu_dereference_check(ca->buckets[gc],
-+				     !ca->fs ||
-+				     percpu_rwsem_is_held(&ca->fs->mark_lock) ||
-+				     lockdep_is_held(&ca->fs->gc_lock) ||
-+				     lockdep_is_held(&ca->bucket_lock));
-+}
-+
-+static inline struct bucket_array *bucket_array(struct bch_dev *ca)
-+{
-+	return __bucket_array(ca, false);
-+}
-+
-+static inline struct bucket *__bucket(struct bch_dev *ca, size_t b, bool gc)
-+{
-+	struct bucket_array *buckets = __bucket_array(ca, gc);
-+
-+	BUG_ON(b < buckets->first_bucket || b >= buckets->nbuckets);
-+	return buckets->b + b;
-+}
-+
-+static inline struct bucket *bucket(struct bch_dev *ca, size_t b)
-+{
-+	return __bucket(ca, b, false);
-+}
-+
-+static inline void bucket_io_clock_reset(struct bch_fs *c, struct bch_dev *ca,
-+					 size_t b, int rw)
-+{
-+	bucket(ca, b)->io_time[rw] = c->bucket_clock[rw].hand;
-+}
-+
-+static inline u16 bucket_last_io(struct bch_fs *c, struct bucket *g, int rw)
-+{
-+	return c->bucket_clock[rw].hand - g->io_time[rw];
-+}
-+
-+/*
-+ * bucket_gc_gen() returns the difference between the bucket's current gen and
-+ * the oldest gen of any pointer into that bucket in the btree.
-+ */
-+
-+static inline u8 bucket_gc_gen(struct bch_dev *ca, size_t b)
-+{
-+	struct bucket *g = bucket(ca, b);
-+
-+	return g->mark.gen - g->oldest_gen;
-+}
-+
-+static inline size_t PTR_BUCKET_NR(const struct bch_dev *ca,
-+				   const struct bch_extent_ptr *ptr)
-+{
-+	return sector_to_bucket(ca, ptr->offset);
-+}
-+
-+static inline struct bucket *PTR_BUCKET(struct bch_dev *ca,
-+					const struct bch_extent_ptr *ptr,
-+					bool gc)
-+{
-+	return __bucket(ca, PTR_BUCKET_NR(ca, ptr), gc);
-+}
-+
-+static inline enum bch_data_type ptr_data_type(const struct bkey *k,
-+					       const struct bch_extent_ptr *ptr)
-+{
-+	if (k->type == KEY_TYPE_btree_ptr ||
-+	    k->type == KEY_TYPE_btree_ptr_v2)
-+		return BCH_DATA_BTREE;
-+
-+	return ptr->cached ? BCH_DATA_CACHED : BCH_DATA_USER;
-+}
-+
-+static inline struct bucket_mark ptr_bucket_mark(struct bch_dev *ca,
-+						 const struct bch_extent_ptr *ptr)
-+{
-+	struct bucket_mark m;
-+
-+	rcu_read_lock();
-+	m = READ_ONCE(PTR_BUCKET(ca, ptr, 0)->mark);
-+	rcu_read_unlock();
-+
-+	return m;
-+}
-+
-+static inline int gen_cmp(u8 a, u8 b)
-+{
-+	return (s8) (a - b);
-+}
-+
-+static inline int gen_after(u8 a, u8 b)
-+{
-+	int r = gen_cmp(a, b);
-+
-+	return r > 0 ? r : 0;
-+}
-+
-+/**
-+ * ptr_stale() - check if a pointer points into a bucket that has been
-+ * invalidated.
-+ */
-+static inline u8 ptr_stale(struct bch_dev *ca,
-+			   const struct bch_extent_ptr *ptr)
-+{
-+	return gen_after(ptr_bucket_mark(ca, ptr).gen, ptr->gen);
-+}
-+
-+static inline s64 __ptr_disk_sectors(struct extent_ptr_decoded p,
-+				     unsigned live_size)
-+{
-+	return live_size && p.crc.compression_type
-+		? max(1U, DIV_ROUND_UP(live_size * p.crc.compressed_size,
-+				       p.crc.uncompressed_size))
-+		: live_size;
-+}
-+
-+static inline s64 ptr_disk_sectors(struct extent_ptr_decoded p)
-+{
-+	return __ptr_disk_sectors(p, p.crc.live_size);
-+}
-+
-+/* bucket gc marks */
-+
-+static inline unsigned bucket_sectors_used(struct bucket_mark mark)
-+{
-+	return mark.dirty_sectors + mark.cached_sectors;
-+}
-+
-+static inline bool bucket_unused(struct bucket_mark mark)
-+{
-+	return !mark.owned_by_allocator &&
-+		!mark.data_type &&
-+		!bucket_sectors_used(mark);
-+}
-+
-+static inline bool is_available_bucket(struct bucket_mark mark)
-+{
-+	return (!mark.owned_by_allocator &&
-+		!mark.dirty_sectors &&
-+		!mark.stripe);
-+}
-+
-+static inline bool bucket_needs_journal_commit(struct bucket_mark m,
-+					       u16 last_seq_ondisk)
-+{
-+	return m.journal_seq_valid &&
-+		((s16) m.journal_seq - (s16) last_seq_ondisk > 0);
-+}
-+
-+/* Device usage: */
-+
-+struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *, struct bch_dev *);
-+
-+void bch2_dev_usage_from_buckets(struct bch_fs *);
-+
-+static inline u64 __dev_buckets_available(struct bch_dev *ca,
-+					  struct bch_dev_usage stats)
-+{
-+	u64 total = ca->mi.nbuckets - ca->mi.first_bucket;
-+
-+	if (WARN_ONCE(stats.buckets_unavailable > total,
-+		      "buckets_unavailable overflow (%llu > %llu)\n",
-+		      stats.buckets_unavailable, total))
-+		return 0;
-+
-+	return total - stats.buckets_unavailable;
-+}
-+
-+/*
-+ * Number of reclaimable buckets - only for use by the allocator thread:
-+ */
-+static inline u64 dev_buckets_available(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	return __dev_buckets_available(ca, bch2_dev_usage_read(c, ca));
-+}
-+
-+static inline u64 __dev_buckets_free(struct bch_dev *ca,
-+				     struct bch_dev_usage stats)
-+{
-+	return __dev_buckets_available(ca, stats) +
-+		fifo_used(&ca->free[RESERVE_NONE]) +
-+		fifo_used(&ca->free_inc);
-+}
-+
-+static inline u64 dev_buckets_free(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	return __dev_buckets_free(ca, bch2_dev_usage_read(c, ca));
-+}
-+
-+/* Filesystem usage: */
-+
-+static inline unsigned fs_usage_u64s(struct bch_fs *c)
-+{
-+
-+	return sizeof(struct bch_fs_usage) / sizeof(u64) +
-+		READ_ONCE(c->replicas.nr);
-+}
-+
-+void bch2_fs_usage_scratch_put(struct bch_fs *, struct bch_fs_usage *);
-+struct bch_fs_usage *bch2_fs_usage_scratch_get(struct bch_fs *);
-+
-+u64 bch2_fs_usage_read_one(struct bch_fs *, u64 *);
-+
-+struct bch_fs_usage *bch2_fs_usage_read(struct bch_fs *);
-+
-+void bch2_fs_usage_acc_to_base(struct bch_fs *, unsigned);
-+
-+void bch2_fs_usage_to_text(struct printbuf *,
-+			   struct bch_fs *, struct bch_fs_usage *);
-+
-+u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage *);
-+
-+struct bch_fs_usage_short
-+bch2_fs_usage_read_short(struct bch_fs *);
-+
-+/* key/bucket marking: */
-+
-+void bch2_bucket_seq_cleanup(struct bch_fs *);
-+void bch2_fs_usage_initialize(struct bch_fs *);
-+
-+void bch2_invalidate_bucket(struct bch_fs *, struct bch_dev *,
-+			    size_t, struct bucket_mark *);
-+void bch2_mark_alloc_bucket(struct bch_fs *, struct bch_dev *,
-+			    size_t, bool, struct gc_pos, unsigned);
-+void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *,
-+			       size_t, enum bch_data_type, unsigned,
-+			       struct gc_pos, unsigned);
-+
-+int bch2_mark_key(struct bch_fs *, struct bkey_s_c, unsigned, s64,
-+		  struct bch_fs_usage *, u64, unsigned);
-+int bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *,
-+			struct disk_reservation *, unsigned);
-+
-+int bch2_mark_overwrite(struct btree_trans *, struct btree_iter *,
-+			struct bkey_s_c, struct bkey_i *,
-+			struct bch_fs_usage *, unsigned, bool);
-+int bch2_mark_update(struct btree_trans *, struct btree_iter *,
-+		     struct bkey_i *, struct bch_fs_usage *, unsigned);
-+
-+int bch2_replicas_delta_list_apply(struct bch_fs *,
-+				   struct bch_fs_usage *,
-+				   struct replicas_delta_list *);
-+int bch2_trans_mark_key(struct btree_trans *, struct bkey_s_c,
-+			unsigned, s64, unsigned);
-+int bch2_trans_mark_update(struct btree_trans *, struct btree_iter *iter,
-+			   struct bkey_i *insert, unsigned);
-+void bch2_trans_fs_usage_apply(struct btree_trans *, struct bch_fs_usage *);
-+
-+/* disk reservations: */
-+
-+void __bch2_disk_reservation_put(struct bch_fs *, struct disk_reservation *);
-+
-+static inline void bch2_disk_reservation_put(struct bch_fs *c,
-+					     struct disk_reservation *res)
-+{
-+	if (res->sectors)
-+		__bch2_disk_reservation_put(c, res);
-+}
-+
-+#define BCH_DISK_RESERVATION_NOFAIL		(1 << 0)
-+
-+int bch2_disk_reservation_add(struct bch_fs *,
-+			     struct disk_reservation *,
-+			     unsigned, int);
-+
-+static inline struct disk_reservation
-+bch2_disk_reservation_init(struct bch_fs *c, unsigned nr_replicas)
-+{
-+	return (struct disk_reservation) {
-+		.sectors	= 0,
-+#if 0
-+		/* not used yet: */
-+		.gen		= c->capacity_gen,
-+#endif
-+		.nr_replicas	= nr_replicas,
-+	};
-+}
-+
-+static inline int bch2_disk_reservation_get(struct bch_fs *c,
-+					    struct disk_reservation *res,
-+					    unsigned sectors,
-+					    unsigned nr_replicas,
-+					    int flags)
-+{
-+	*res = bch2_disk_reservation_init(c, nr_replicas);
-+
-+	return bch2_disk_reservation_add(c, res, sectors * nr_replicas, flags);
-+}
-+
-+int bch2_dev_buckets_resize(struct bch_fs *, struct bch_dev *, u64);
-+void bch2_dev_buckets_free(struct bch_dev *);
-+int bch2_dev_buckets_alloc(struct bch_fs *, struct bch_dev *);
-+
-+#endif /* _BUCKETS_H */
-diff --git a/fs/bcachefs/buckets_types.h b/fs/bcachefs/buckets_types.h
-new file mode 100644
-index 000000000000..53f22726893d
---- /dev/null
-+++ b/fs/bcachefs/buckets_types.h
-@@ -0,0 +1,133 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BUCKETS_TYPES_H
-+#define _BUCKETS_TYPES_H
-+
-+#include "bcachefs_format.h"
-+#include "util.h"
-+
-+#define BUCKET_JOURNAL_SEQ_BITS		16
-+
-+struct bucket_mark {
-+	union {
-+	atomic64_t	v;
-+
-+	struct {
-+	u8		gen;
-+	u8		data_type:3,
-+			owned_by_allocator:1,
-+			journal_seq_valid:1,
-+			stripe:1;
-+	u16		dirty_sectors;
-+	u16		cached_sectors;
-+
-+	/*
-+	 * low bits of journal sequence number when this bucket was most
-+	 * recently modified: if journal_seq_valid is set, this bucket can't be
-+	 * reused until the journal sequence number written to disk is >= the
-+	 * bucket's journal sequence number:
-+	 */
-+	u16		journal_seq;
-+	};
-+	};
-+};
-+
-+struct bucket {
-+	union {
-+		struct bucket_mark	_mark;
-+		const struct bucket_mark mark;
-+	};
-+
-+	u16				io_time[2];
-+	u8				oldest_gen;
-+	u8				gc_gen;
-+	unsigned			gen_valid:1;
-+};
-+
-+struct bucket_array {
-+	struct rcu_head		rcu;
-+	u16			first_bucket;
-+	size_t			nbuckets;
-+	struct bucket		b[];
-+};
-+
-+struct bch_dev_usage {
-+	u64			buckets[BCH_DATA_NR];
-+	u64			buckets_alloc;
-+	u64			buckets_unavailable;
-+
-+	/* _compressed_ sectors: */
-+	u64			sectors[BCH_DATA_NR];
-+	u64			sectors_fragmented;
-+
-+	u64			buckets_ec;
-+	u64			sectors_ec;
-+};
-+
-+struct bch_fs_usage {
-+	/* all fields are in units of 512 byte sectors: */
-+
-+	u64			online_reserved;
-+
-+	/* fields after online_reserved are cleared/recalculated by gc: */
-+	u64			gc_start[0];
-+
-+	u64			hidden;
-+	u64			btree;
-+	u64			data;
-+	u64			cached;
-+	u64			reserved;
-+	u64			nr_inodes;
-+
-+	/* XXX: add stats for compression ratio */
-+#if 0
-+	u64			uncompressed;
-+	u64			compressed;
-+#endif
-+
-+	/* broken out: */
-+
-+	u64			persistent_reserved[BCH_REPLICAS_MAX];
-+	u64			replicas[];
-+};
-+
-+struct bch_fs_usage_short {
-+	u64			capacity;
-+	u64			used;
-+	u64			free;
-+	u64			nr_inodes;
-+};
-+
-+struct replicas_delta {
-+	s64			delta;
-+	struct bch_replicas_entry r;
-+} __packed;
-+
-+struct replicas_delta_list {
-+	unsigned		size;
-+	unsigned		used;
-+
-+	struct			{} memset_start;
-+	u64			nr_inodes;
-+	u64			persistent_reserved[BCH_REPLICAS_MAX];
-+	struct			{} memset_end;
-+	struct replicas_delta	d[0];
-+};
-+
-+/*
-+ * A reservation for space on disk:
-+ */
-+struct disk_reservation {
-+	u64			sectors;
-+	u32			gen;
-+	unsigned		nr_replicas;
-+};
-+
-+struct copygc_heap_entry {
-+	u8			gen;
-+	u32			sectors;
-+	u64			offset;
-+};
-+
-+typedef HEAP(struct copygc_heap_entry) copygc_heap;
-+
-+#endif /* _BUCKETS_TYPES_H */
-diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c
-new file mode 100644
-index 000000000000..3af521947502
---- /dev/null
-+++ b/fs/bcachefs/chardev.c
-@@ -0,0 +1,704 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#ifndef NO_BCACHEFS_CHARDEV
-+
-+#include "bcachefs.h"
-+#include "bcachefs_ioctl.h"
-+#include "buckets.h"
-+#include "chardev.h"
-+#include "move.h"
-+#include "replicas.h"
-+#include "super.h"
-+#include "super-io.h"
-+
-+#include <linux/anon_inodes.h>
-+#include <linux/cdev.h>
-+#include <linux/device.h>
-+#include <linux/file.h>
-+#include <linux/fs.h>
-+#include <linux/ioctl.h>
-+#include <linux/kthread.h>
-+#include <linux/major.h>
-+#include <linux/sched/task.h>
-+#include <linux/slab.h>
-+#include <linux/uaccess.h>
-+
-+/* returns with ref on ca->ref */
-+static struct bch_dev *bch2_device_lookup(struct bch_fs *c, u64 dev,
-+					  unsigned flags)
-+{
-+	struct bch_dev *ca;
-+
-+	if (flags & BCH_BY_INDEX) {
-+		if (dev >= c->sb.nr_devices)
-+			return ERR_PTR(-EINVAL);
-+
-+		rcu_read_lock();
-+		ca = rcu_dereference(c->devs[dev]);
-+		if (ca)
-+			percpu_ref_get(&ca->ref);
-+		rcu_read_unlock();
-+
-+		if (!ca)
-+			return ERR_PTR(-EINVAL);
-+	} else {
-+		char *path;
-+
-+		path = strndup_user((const char __user *)
-+				    (unsigned long) dev, PATH_MAX);
-+		if (IS_ERR(path))
-+			return ERR_CAST(path);
-+
-+		ca = bch2_dev_lookup(c, path);
-+		kfree(path);
-+	}
-+
-+	return ca;
-+}
-+
-+#if 0
-+static long bch2_ioctl_assemble(struct bch_ioctl_assemble __user *user_arg)
-+{
-+	struct bch_ioctl_assemble arg;
-+	struct bch_fs *c;
-+	u64 *user_devs = NULL;
-+	char **devs = NULL;
-+	unsigned i;
-+	int ret = -EFAULT;
-+
-+	if (copy_from_user(&arg, user_arg, sizeof(arg)))
-+		return -EFAULT;
-+
-+	if (arg.flags || arg.pad)
-+		return -EINVAL;
-+
-+	user_devs = kmalloc_array(arg.nr_devs, sizeof(u64), GFP_KERNEL);
-+	if (!user_devs)
-+		return -ENOMEM;
-+
-+	devs = kcalloc(arg.nr_devs, sizeof(char *), GFP_KERNEL);
-+
-+	if (copy_from_user(user_devs, user_arg->devs,
-+			   sizeof(u64) * arg.nr_devs))
-+		goto err;
-+
-+	for (i = 0; i < arg.nr_devs; i++) {
-+		devs[i] = strndup_user((const char __user *)(unsigned long)
-+				       user_devs[i],
-+				       PATH_MAX);
-+		if (!devs[i]) {
-+			ret = -ENOMEM;
-+			goto err;
-+		}
-+	}
-+
-+	c = bch2_fs_open(devs, arg.nr_devs, bch2_opts_empty());
-+	ret = PTR_ERR_OR_ZERO(c);
-+	if (!ret)
-+		closure_put(&c->cl);
-+err:
-+	if (devs)
-+		for (i = 0; i < arg.nr_devs; i++)
-+			kfree(devs[i]);
-+	kfree(devs);
-+	return ret;
-+}
-+
-+static long bch2_ioctl_incremental(struct bch_ioctl_incremental __user *user_arg)
-+{
-+	struct bch_ioctl_incremental arg;
-+	const char *err;
-+	char *path;
-+
-+	if (copy_from_user(&arg, user_arg, sizeof(arg)))
-+		return -EFAULT;
-+
-+	if (arg.flags || arg.pad)
-+		return -EINVAL;
-+
-+	path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX);
-+	if (!path)
-+		return -ENOMEM;
-+
-+	err = bch2_fs_open_incremental(path);
-+	kfree(path);
-+
-+	if (err) {
-+		pr_err("Could not register bcachefs devices: %s", err);
-+		return -EINVAL;
-+	}
-+
-+	return 0;
-+}
-+#endif
-+
-+static long bch2_global_ioctl(unsigned cmd, void __user *arg)
-+{
-+	switch (cmd) {
-+#if 0
-+	case BCH_IOCTL_ASSEMBLE:
-+		return bch2_ioctl_assemble(arg);
-+	case BCH_IOCTL_INCREMENTAL:
-+		return bch2_ioctl_incremental(arg);
-+#endif
-+	default:
-+		return -ENOTTY;
-+	}
-+}
-+
-+static long bch2_ioctl_query_uuid(struct bch_fs *c,
-+			struct bch_ioctl_query_uuid __user *user_arg)
-+{
-+	return copy_to_user(&user_arg->uuid,
-+			    &c->sb.user_uuid,
-+			    sizeof(c->sb.user_uuid));
-+}
-+
-+#if 0
-+static long bch2_ioctl_start(struct bch_fs *c, struct bch_ioctl_start arg)
-+{
-+	if (arg.flags || arg.pad)
-+		return -EINVAL;
-+
-+	return bch2_fs_start(c);
-+}
-+
-+static long bch2_ioctl_stop(struct bch_fs *c)
-+{
-+	bch2_fs_stop(c);
-+	return 0;
-+}
-+#endif
-+
-+static long bch2_ioctl_disk_add(struct bch_fs *c, struct bch_ioctl_disk arg)
-+{
-+	char *path;
-+	int ret;
-+
-+	if (arg.flags || arg.pad)
-+		return -EINVAL;
-+
-+	path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX);
-+	if (!path)
-+		return -ENOMEM;
-+
-+	ret = bch2_dev_add(c, path);
-+	kfree(path);
-+
-+	return ret;
-+}
-+
-+static long bch2_ioctl_disk_remove(struct bch_fs *c, struct bch_ioctl_disk arg)
-+{
-+	struct bch_dev *ca;
-+
-+	if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST|
-+			   BCH_FORCE_IF_METADATA_LOST|
-+			   BCH_FORCE_IF_DEGRADED|
-+			   BCH_BY_INDEX)) ||
-+	    arg.pad)
-+		return -EINVAL;
-+
-+	ca = bch2_device_lookup(c, arg.dev, arg.flags);
-+	if (IS_ERR(ca))
-+		return PTR_ERR(ca);
-+
-+	return bch2_dev_remove(c, ca, arg.flags);
-+}
-+
-+static long bch2_ioctl_disk_online(struct bch_fs *c, struct bch_ioctl_disk arg)
-+{
-+	char *path;
-+	int ret;
-+
-+	if (arg.flags || arg.pad)
-+		return -EINVAL;
-+
-+	path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX);
-+	if (!path)
-+		return -ENOMEM;
-+
-+	ret = bch2_dev_online(c, path);
-+	kfree(path);
-+	return ret;
-+}
-+
-+static long bch2_ioctl_disk_offline(struct bch_fs *c, struct bch_ioctl_disk arg)
-+{
-+	struct bch_dev *ca;
-+	int ret;
-+
-+	if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST|
-+			   BCH_FORCE_IF_METADATA_LOST|
-+			   BCH_FORCE_IF_DEGRADED|
-+			   BCH_BY_INDEX)) ||
-+	    arg.pad)
-+		return -EINVAL;
-+
-+	ca = bch2_device_lookup(c, arg.dev, arg.flags);
-+	if (IS_ERR(ca))
-+		return PTR_ERR(ca);
-+
-+	ret = bch2_dev_offline(c, ca, arg.flags);
-+	percpu_ref_put(&ca->ref);
-+	return ret;
-+}
-+
-+static long bch2_ioctl_disk_set_state(struct bch_fs *c,
-+			struct bch_ioctl_disk_set_state arg)
-+{
-+	struct bch_dev *ca;
-+	int ret;
-+
-+	if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST|
-+			   BCH_FORCE_IF_METADATA_LOST|
-+			   BCH_FORCE_IF_DEGRADED|
-+			   BCH_BY_INDEX)) ||
-+	    arg.pad[0] || arg.pad[1] || arg.pad[2])
-+		return -EINVAL;
-+
-+	ca = bch2_device_lookup(c, arg.dev, arg.flags);
-+	if (IS_ERR(ca))
-+		return PTR_ERR(ca);
-+
-+	ret = bch2_dev_set_state(c, ca, arg.new_state, arg.flags);
-+
-+	percpu_ref_put(&ca->ref);
-+	return ret;
-+}
-+
-+struct bch_data_ctx {
-+	struct bch_fs			*c;
-+	struct bch_ioctl_data		arg;
-+	struct bch_move_stats		stats;
-+
-+	int				ret;
-+
-+	struct task_struct		*thread;
-+};
-+
-+static int bch2_data_thread(void *arg)
-+{
-+	struct bch_data_ctx *ctx = arg;
-+
-+	ctx->ret = bch2_data_job(ctx->c, &ctx->stats, ctx->arg);
-+
-+	ctx->stats.data_type = U8_MAX;
-+	return 0;
-+}
-+
-+static int bch2_data_job_release(struct inode *inode, struct file *file)
-+{
-+	struct bch_data_ctx *ctx = file->private_data;
-+
-+	kthread_stop(ctx->thread);
-+	put_task_struct(ctx->thread);
-+	kfree(ctx);
-+	return 0;
-+}
-+
-+static ssize_t bch2_data_job_read(struct file *file, char __user *buf,
-+				  size_t len, loff_t *ppos)
-+{
-+	struct bch_data_ctx *ctx = file->private_data;
-+	struct bch_fs *c = ctx->c;
-+	struct bch_ioctl_data_event e = {
-+		.type			= BCH_DATA_EVENT_PROGRESS,
-+		.p.data_type		= ctx->stats.data_type,
-+		.p.btree_id		= ctx->stats.btree_id,
-+		.p.pos			= ctx->stats.pos,
-+		.p.sectors_done		= atomic64_read(&ctx->stats.sectors_seen),
-+		.p.sectors_total	= bch2_fs_usage_read_short(c).used,
-+	};
-+
-+	if (len < sizeof(e))
-+		return -EINVAL;
-+
-+	return copy_to_user(buf, &e, sizeof(e)) ?: sizeof(e);
-+}
-+
-+static const struct file_operations bcachefs_data_ops = {
-+	.release	= bch2_data_job_release,
-+	.read		= bch2_data_job_read,
-+	.llseek		= no_llseek,
-+};
-+
-+static long bch2_ioctl_data(struct bch_fs *c,
-+			    struct bch_ioctl_data arg)
-+{
-+	struct bch_data_ctx *ctx = NULL;
-+	struct file *file = NULL;
-+	unsigned flags = O_RDONLY|O_CLOEXEC|O_NONBLOCK;
-+	int ret, fd = -1;
-+
-+	if (arg.op >= BCH_DATA_OP_NR || arg.flags)
-+		return -EINVAL;
-+
-+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
-+	if (!ctx)
-+		return -ENOMEM;
-+
-+	ctx->c = c;
-+	ctx->arg = arg;
-+
-+	ctx->thread = kthread_create(bch2_data_thread, ctx, "[bcachefs]");
-+	if (IS_ERR(ctx->thread)) {
-+		ret = PTR_ERR(ctx->thread);
-+		goto err;
-+	}
-+
-+	ret = get_unused_fd_flags(flags);
-+	if (ret < 0)
-+		goto err;
-+	fd = ret;
-+
-+	file = anon_inode_getfile("[bcachefs]", &bcachefs_data_ops, ctx, flags);
-+	if (IS_ERR(file)) {
-+		ret = PTR_ERR(file);
-+		goto err;
-+	}
-+
-+	fd_install(fd, file);
-+
-+	get_task_struct(ctx->thread);
-+	wake_up_process(ctx->thread);
-+
-+	return fd;
-+err:
-+	if (fd >= 0)
-+		put_unused_fd(fd);
-+	if (!IS_ERR_OR_NULL(ctx->thread))
-+		kthread_stop(ctx->thread);
-+	kfree(ctx);
-+	return ret;
-+}
-+
-+static long bch2_ioctl_fs_usage(struct bch_fs *c,
-+				struct bch_ioctl_fs_usage __user *user_arg)
-+{
-+	struct bch_ioctl_fs_usage *arg = NULL;
-+	struct bch_replicas_usage *dst_e, *dst_end;
-+	struct bch_fs_usage *src;
-+	u32 replica_entries_bytes;
-+	unsigned i;
-+	int ret = 0;
-+
-+	if (!test_bit(BCH_FS_STARTED, &c->flags))
-+		return -EINVAL;
-+
-+	if (get_user(replica_entries_bytes, &user_arg->replica_entries_bytes))
-+		return -EFAULT;
-+
-+	arg = kzalloc(sizeof(*arg) + replica_entries_bytes, GFP_KERNEL);
-+	if (!arg)
-+		return -ENOMEM;
-+
-+	src = bch2_fs_usage_read(c);
-+	if (!src) {
-+		ret = -ENOMEM;
-+		goto err;
-+	}
-+
-+	arg->capacity		= c->capacity;
-+	arg->used		= bch2_fs_sectors_used(c, src);
-+	arg->online_reserved	= src->online_reserved;
-+
-+	for (i = 0; i < BCH_REPLICAS_MAX; i++)
-+		arg->persistent_reserved[i] = src->persistent_reserved[i];
-+
-+	dst_e	= arg->replicas;
-+	dst_end = (void *) arg->replicas + replica_entries_bytes;
-+
-+	for (i = 0; i < c->replicas.nr; i++) {
-+		struct bch_replicas_entry *src_e =
-+			cpu_replicas_entry(&c->replicas, i);
-+
-+		if (replicas_usage_next(dst_e) > dst_end) {
-+			ret = -ERANGE;
-+			break;
-+		}
-+
-+		dst_e->sectors		= src->replicas[i];
-+		dst_e->r		= *src_e;
-+
-+		/* recheck after setting nr_devs: */
-+		if (replicas_usage_next(dst_e) > dst_end) {
-+			ret = -ERANGE;
-+			break;
-+		}
-+
-+		memcpy(dst_e->r.devs, src_e->devs, src_e->nr_devs);
-+
-+		dst_e = replicas_usage_next(dst_e);
-+	}
-+
-+	arg->replica_entries_bytes = (void *) dst_e - (void *) arg->replicas;
-+
-+	percpu_up_read(&c->mark_lock);
-+	kfree(src);
-+
-+	if (!ret)
-+		ret = copy_to_user(user_arg, arg,
-+			sizeof(*arg) + arg->replica_entries_bytes);
-+err:
-+	kfree(arg);
-+	return ret;
-+}
-+
-+static long bch2_ioctl_dev_usage(struct bch_fs *c,
-+				 struct bch_ioctl_dev_usage __user *user_arg)
-+{
-+	struct bch_ioctl_dev_usage arg;
-+	struct bch_dev_usage src;
-+	struct bch_dev *ca;
-+	unsigned i;
-+
-+	if (!test_bit(BCH_FS_STARTED, &c->flags))
-+		return -EINVAL;
-+
-+	if (copy_from_user(&arg, user_arg, sizeof(arg)))
-+		return -EFAULT;
-+
-+	if ((arg.flags & ~BCH_BY_INDEX) ||
-+	    arg.pad[0] ||
-+	    arg.pad[1] ||
-+	    arg.pad[2])
-+		return -EINVAL;
-+
-+	ca = bch2_device_lookup(c, arg.dev, arg.flags);
-+	if (IS_ERR(ca))
-+		return PTR_ERR(ca);
-+
-+	src = bch2_dev_usage_read(c, ca);
-+
-+	arg.state		= ca->mi.state;
-+	arg.bucket_size		= ca->mi.bucket_size;
-+	arg.nr_buckets		= ca->mi.nbuckets - ca->mi.first_bucket;
-+	arg.available_buckets	= arg.nr_buckets - src.buckets_unavailable;
-+	arg.ec_buckets		= src.buckets_ec;
-+	arg.ec_sectors		= src.sectors_ec;
-+
-+	for (i = 0; i < BCH_DATA_NR; i++) {
-+		arg.buckets[i] = src.buckets[i];
-+		arg.sectors[i] = src.sectors[i];
-+	}
-+
-+	percpu_ref_put(&ca->ref);
-+
-+	return copy_to_user(user_arg, &arg, sizeof(arg));
-+}
-+
-+static long bch2_ioctl_read_super(struct bch_fs *c,
-+				  struct bch_ioctl_read_super arg)
-+{
-+	struct bch_dev *ca = NULL;
-+	struct bch_sb *sb;
-+	int ret = 0;
-+
-+	if ((arg.flags & ~(BCH_BY_INDEX|BCH_READ_DEV)) ||
-+	    arg.pad)
-+		return -EINVAL;
-+
-+	mutex_lock(&c->sb_lock);
-+
-+	if (arg.flags & BCH_READ_DEV) {
-+		ca = bch2_device_lookup(c, arg.dev, arg.flags);
-+
-+		if (IS_ERR(ca)) {
-+			ret = PTR_ERR(ca);
-+			goto err;
-+		}
-+
-+		sb = ca->disk_sb.sb;
-+	} else {
-+		sb = c->disk_sb.sb;
-+	}
-+
-+	if (vstruct_bytes(sb) > arg.size) {
-+		ret = -ERANGE;
-+		goto err;
-+	}
-+
-+	ret = copy_to_user((void __user *)(unsigned long)arg.sb,
-+			   sb, vstruct_bytes(sb));
-+err:
-+	if (ca)
-+		percpu_ref_put(&ca->ref);
-+	mutex_unlock(&c->sb_lock);
-+	return ret;
-+}
-+
-+static long bch2_ioctl_disk_get_idx(struct bch_fs *c,
-+				    struct bch_ioctl_disk_get_idx arg)
-+{
-+	dev_t dev = huge_decode_dev(arg.dev);
-+	struct bch_dev *ca;
-+	unsigned i;
-+
-+	for_each_online_member(ca, c, i)
-+		if (ca->disk_sb.bdev->bd_dev == dev) {
-+			percpu_ref_put(&ca->io_ref);
-+			return i;
-+		}
-+
-+	return -ENOENT;
-+}
-+
-+static long bch2_ioctl_disk_resize(struct bch_fs *c,
-+				   struct bch_ioctl_disk_resize arg)
-+{
-+	struct bch_dev *ca;
-+	int ret;
-+
-+	if ((arg.flags & ~BCH_BY_INDEX) ||
-+	    arg.pad)
-+		return -EINVAL;
-+
-+	ca = bch2_device_lookup(c, arg.dev, arg.flags);
-+	if (IS_ERR(ca))
-+		return PTR_ERR(ca);
-+
-+	ret = bch2_dev_resize(c, ca, arg.nbuckets);
-+
-+	percpu_ref_put(&ca->ref);
-+	return ret;
-+}
-+
-+#define BCH_IOCTL(_name, _argtype)					\
-+do {									\
-+	_argtype i;							\
-+									\
-+	if (copy_from_user(&i, arg, sizeof(i)))				\
-+		return -EFAULT;						\
-+	return bch2_ioctl_##_name(c, i);				\
-+} while (0)
-+
-+long bch2_fs_ioctl(struct bch_fs *c, unsigned cmd, void __user *arg)
-+{
-+	/* ioctls that don't require admin cap: */
-+	switch (cmd) {
-+	case BCH_IOCTL_QUERY_UUID:
-+		return bch2_ioctl_query_uuid(c, arg);
-+	case BCH_IOCTL_FS_USAGE:
-+		return bch2_ioctl_fs_usage(c, arg);
-+	case BCH_IOCTL_DEV_USAGE:
-+		return bch2_ioctl_dev_usage(c, arg);
-+	}
-+
-+	if (!capable(CAP_SYS_ADMIN))
-+		return -EPERM;
-+
-+	switch (cmd) {
-+#if 0
-+	case BCH_IOCTL_START:
-+		BCH_IOCTL(start, struct bch_ioctl_start);
-+	case BCH_IOCTL_STOP:
-+		return bch2_ioctl_stop(c);
-+#endif
-+	case BCH_IOCTL_READ_SUPER:
-+		BCH_IOCTL(read_super, struct bch_ioctl_read_super);
-+	case BCH_IOCTL_DISK_GET_IDX:
-+		BCH_IOCTL(disk_get_idx, struct bch_ioctl_disk_get_idx);
-+	}
-+
-+	if (!test_bit(BCH_FS_STARTED, &c->flags))
-+		return -EINVAL;
-+
-+	/* ioctls that do require admin cap: */
-+	switch (cmd) {
-+	case BCH_IOCTL_DISK_ADD:
-+		BCH_IOCTL(disk_add, struct bch_ioctl_disk);
-+	case BCH_IOCTL_DISK_REMOVE:
-+		BCH_IOCTL(disk_remove, struct bch_ioctl_disk);
-+	case BCH_IOCTL_DISK_ONLINE:
-+		BCH_IOCTL(disk_online, struct bch_ioctl_disk);
-+	case BCH_IOCTL_DISK_OFFLINE:
-+		BCH_IOCTL(disk_offline, struct bch_ioctl_disk);
-+	case BCH_IOCTL_DISK_SET_STATE:
-+		BCH_IOCTL(disk_set_state, struct bch_ioctl_disk_set_state);
-+	case BCH_IOCTL_DATA:
-+		BCH_IOCTL(data, struct bch_ioctl_data);
-+	case BCH_IOCTL_DISK_RESIZE:
-+		BCH_IOCTL(disk_resize, struct bch_ioctl_disk_resize);
-+
-+	default:
-+		return -ENOTTY;
-+	}
-+}
-+
-+static DEFINE_IDR(bch_chardev_minor);
-+
-+static long bch2_chardev_ioctl(struct file *filp, unsigned cmd, unsigned long v)
-+{
-+	unsigned minor = iminor(file_inode(filp));
-+	struct bch_fs *c = minor < U8_MAX ? idr_find(&bch_chardev_minor, minor) : NULL;
-+	void __user *arg = (void __user *) v;
-+
-+	return c
-+		? bch2_fs_ioctl(c, cmd, arg)
-+		: bch2_global_ioctl(cmd, arg);
-+}
-+
-+static const struct file_operations bch_chardev_fops = {
-+	.owner		= THIS_MODULE,
-+	.unlocked_ioctl = bch2_chardev_ioctl,
-+	.open		= nonseekable_open,
-+};
-+
-+static int bch_chardev_major;
-+static struct class *bch_chardev_class;
-+static struct device *bch_chardev;
-+
-+void bch2_fs_chardev_exit(struct bch_fs *c)
-+{
-+	if (!IS_ERR_OR_NULL(c->chardev))
-+		device_unregister(c->chardev);
-+	if (c->minor >= 0)
-+		idr_remove(&bch_chardev_minor, c->minor);
-+}
-+
-+int bch2_fs_chardev_init(struct bch_fs *c)
-+{
-+	c->minor = idr_alloc(&bch_chardev_minor, c, 0, 0, GFP_KERNEL);
-+	if (c->minor < 0)
-+		return c->minor;
-+
-+	c->chardev = device_create(bch_chardev_class, NULL,
-+				   MKDEV(bch_chardev_major, c->minor), c,
-+				   "bcachefs%u-ctl", c->minor);
-+	if (IS_ERR(c->chardev))
-+		return PTR_ERR(c->chardev);
-+
-+	return 0;
-+}
-+
-+void bch2_chardev_exit(void)
-+{
-+	if (!IS_ERR_OR_NULL(bch_chardev_class))
-+		device_destroy(bch_chardev_class,
-+			       MKDEV(bch_chardev_major, U8_MAX));
-+	if (!IS_ERR_OR_NULL(bch_chardev_class))
-+		class_destroy(bch_chardev_class);
-+	if (bch_chardev_major > 0)
-+		unregister_chrdev(bch_chardev_major, "bcachefs");
-+}
-+
-+int __init bch2_chardev_init(void)
-+{
-+	bch_chardev_major = register_chrdev(0, "bcachefs-ctl", &bch_chardev_fops);
-+	if (bch_chardev_major < 0)
-+		return bch_chardev_major;
-+
-+	bch_chardev_class = class_create(THIS_MODULE, "bcachefs");
-+	if (IS_ERR(bch_chardev_class))
-+		return PTR_ERR(bch_chardev_class);
-+
-+	bch_chardev = device_create(bch_chardev_class, NULL,
-+				    MKDEV(bch_chardev_major, U8_MAX),
-+				    NULL, "bcachefs-ctl");
-+	if (IS_ERR(bch_chardev))
-+		return PTR_ERR(bch_chardev);
-+
-+	return 0;
-+}
-+
-+#endif /* NO_BCACHEFS_CHARDEV */
-diff --git a/fs/bcachefs/chardev.h b/fs/bcachefs/chardev.h
-new file mode 100644
-index 000000000000..3a4890d39ff9
---- /dev/null
-+++ b/fs/bcachefs/chardev.h
-@@ -0,0 +1,31 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_CHARDEV_H
-+#define _BCACHEFS_CHARDEV_H
-+
-+#ifndef NO_BCACHEFS_FS
-+
-+long bch2_fs_ioctl(struct bch_fs *, unsigned, void __user *);
-+
-+void bch2_fs_chardev_exit(struct bch_fs *);
-+int bch2_fs_chardev_init(struct bch_fs *);
-+
-+void bch2_chardev_exit(void);
-+int __init bch2_chardev_init(void);
-+
-+#else
-+
-+static inline long bch2_fs_ioctl(struct bch_fs *c,
-+				unsigned cmd, void __user * arg)
-+{
-+	return -ENOSYS;
-+}
-+
-+static inline void bch2_fs_chardev_exit(struct bch_fs *c) {}
-+static inline int bch2_fs_chardev_init(struct bch_fs *c) { return 0; }
-+
-+static inline void bch2_chardev_exit(void) {}
-+static inline int __init bch2_chardev_init(void) { return 0; }
-+
-+#endif /* NO_BCACHEFS_FS */
-+
-+#endif /* _BCACHEFS_CHARDEV_H */
-diff --git a/fs/bcachefs/checksum.c b/fs/bcachefs/checksum.c
-new file mode 100644
-index 000000000000..3d88719ba86c
---- /dev/null
-+++ b/fs/bcachefs/checksum.c
-@@ -0,0 +1,618 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "checksum.h"
-+#include "super.h"
-+#include "super-io.h"
-+
-+#include <linux/crc32c.h>
-+#include <linux/crypto.h>
-+#include <linux/key.h>
-+#include <linux/random.h>
-+#include <linux/scatterlist.h>
-+#include <crypto/algapi.h>
-+#include <crypto/chacha.h>
-+#include <crypto/hash.h>
-+#include <crypto/poly1305.h>
-+#include <crypto/skcipher.h>
-+#include <keys/user-type.h>
-+
-+static u64 bch2_checksum_init(unsigned type)
-+{
-+	switch (type) {
-+	case BCH_CSUM_NONE:
-+		return 0;
-+	case BCH_CSUM_CRC32C_NONZERO:
-+		return U32_MAX;
-+	case BCH_CSUM_CRC64_NONZERO:
-+		return U64_MAX;
-+	case BCH_CSUM_CRC32C:
-+		return 0;
-+	case BCH_CSUM_CRC64:
-+		return 0;
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static u64 bch2_checksum_final(unsigned type, u64 crc)
-+{
-+	switch (type) {
-+	case BCH_CSUM_NONE:
-+		return 0;
-+	case BCH_CSUM_CRC32C_NONZERO:
-+		return crc ^ U32_MAX;
-+	case BCH_CSUM_CRC64_NONZERO:
-+		return crc ^ U64_MAX;
-+	case BCH_CSUM_CRC32C:
-+		return crc;
-+	case BCH_CSUM_CRC64:
-+		return crc;
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static u64 bch2_checksum_update(unsigned type, u64 crc, const void *data, size_t len)
-+{
-+	switch (type) {
-+	case BCH_CSUM_NONE:
-+		return 0;
-+	case BCH_CSUM_CRC32C_NONZERO:
-+	case BCH_CSUM_CRC32C:
-+		return crc32c(crc, data, len);
-+	case BCH_CSUM_CRC64_NONZERO:
-+	case BCH_CSUM_CRC64:
-+		return crc64_be(crc, data, len);
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static inline void do_encrypt_sg(struct crypto_sync_skcipher *tfm,
-+				 struct nonce nonce,
-+				 struct scatterlist *sg, size_t len)
-+{
-+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm);
-+	int ret;
-+
-+	skcipher_request_set_sync_tfm(req, tfm);
-+	skcipher_request_set_crypt(req, sg, sg, len, nonce.d);
-+
-+	ret = crypto_skcipher_encrypt(req);
-+	BUG_ON(ret);
-+}
-+
-+static inline void do_encrypt(struct crypto_sync_skcipher *tfm,
-+			      struct nonce nonce,
-+			      void *buf, size_t len)
-+{
-+	struct scatterlist sg;
-+
-+	sg_init_one(&sg, buf, len);
-+	do_encrypt_sg(tfm, nonce, &sg, len);
-+}
-+
-+int bch2_chacha_encrypt_key(struct bch_key *key, struct nonce nonce,
-+			    void *buf, size_t len)
-+{
-+	struct crypto_sync_skcipher *chacha20 =
-+		crypto_alloc_sync_skcipher("chacha20", 0, 0);
-+	int ret;
-+
-+	if (!chacha20) {
-+		pr_err("error requesting chacha20 module: %li", PTR_ERR(chacha20));
-+		return PTR_ERR(chacha20);
-+	}
-+
-+	ret = crypto_skcipher_setkey(&chacha20->base,
-+				     (void *) key, sizeof(*key));
-+	if (ret) {
-+		pr_err("crypto_skcipher_setkey() error: %i", ret);
-+		goto err;
-+	}
-+
-+	do_encrypt(chacha20, nonce, buf, len);
-+err:
-+	crypto_free_sync_skcipher(chacha20);
-+	return ret;
-+}
-+
-+static void gen_poly_key(struct bch_fs *c, struct shash_desc *desc,
-+			 struct nonce nonce)
-+{
-+	u8 key[POLY1305_KEY_SIZE];
-+
-+	nonce.d[3] ^= BCH_NONCE_POLY;
-+
-+	memset(key, 0, sizeof(key));
-+	do_encrypt(c->chacha20, nonce, key, sizeof(key));
-+
-+	desc->tfm = c->poly1305;
-+	crypto_shash_init(desc);
-+	crypto_shash_update(desc, key, sizeof(key));
-+}
-+
-+struct bch_csum bch2_checksum(struct bch_fs *c, unsigned type,
-+			      struct nonce nonce, const void *data, size_t len)
-+{
-+	switch (type) {
-+	case BCH_CSUM_NONE:
-+	case BCH_CSUM_CRC32C_NONZERO:
-+	case BCH_CSUM_CRC64_NONZERO:
-+	case BCH_CSUM_CRC32C:
-+	case BCH_CSUM_CRC64: {
-+		u64 crc = bch2_checksum_init(type);
-+
-+		crc = bch2_checksum_update(type, crc, data, len);
-+		crc = bch2_checksum_final(type, crc);
-+
-+		return (struct bch_csum) { .lo = cpu_to_le64(crc) };
-+	}
-+
-+	case BCH_CSUM_CHACHA20_POLY1305_80:
-+	case BCH_CSUM_CHACHA20_POLY1305_128: {
-+		SHASH_DESC_ON_STACK(desc, c->poly1305);
-+		u8 digest[POLY1305_DIGEST_SIZE];
-+		struct bch_csum ret = { 0 };
-+
-+		gen_poly_key(c, desc, nonce);
-+
-+		crypto_shash_update(desc, data, len);
-+		crypto_shash_final(desc, digest);
-+
-+		memcpy(&ret, digest, bch_crc_bytes[type]);
-+		return ret;
-+	}
-+	default:
-+		BUG();
-+	}
-+}
-+
-+void bch2_encrypt(struct bch_fs *c, unsigned type,
-+		  struct nonce nonce, void *data, size_t len)
-+{
-+	if (!bch2_csum_type_is_encryption(type))
-+		return;
-+
-+	do_encrypt(c->chacha20, nonce, data, len);
-+}
-+
-+static struct bch_csum __bch2_checksum_bio(struct bch_fs *c, unsigned type,
-+					   struct nonce nonce, struct bio *bio,
-+					   struct bvec_iter *iter)
-+{
-+	struct bio_vec bv;
-+
-+	switch (type) {
-+	case BCH_CSUM_NONE:
-+		return (struct bch_csum) { 0 };
-+	case BCH_CSUM_CRC32C_NONZERO:
-+	case BCH_CSUM_CRC64_NONZERO:
-+	case BCH_CSUM_CRC32C:
-+	case BCH_CSUM_CRC64: {
-+		u64 crc = bch2_checksum_init(type);
-+
-+#ifdef CONFIG_HIGHMEM
-+		__bio_for_each_segment(bv, bio, *iter, *iter) {
-+			void *p = kmap_atomic(bv.bv_page) + bv.bv_offset;
-+			crc = bch2_checksum_update(type,
-+				crc, p, bv.bv_len);
-+			kunmap_atomic(p);
-+		}
-+#else
-+		__bio_for_each_bvec(bv, bio, *iter, *iter)
-+			crc = bch2_checksum_update(type, crc,
-+				page_address(bv.bv_page) + bv.bv_offset,
-+				bv.bv_len);
-+#endif
-+		crc = bch2_checksum_final(type, crc);
-+		return (struct bch_csum) { .lo = cpu_to_le64(crc) };
-+	}
-+
-+	case BCH_CSUM_CHACHA20_POLY1305_80:
-+	case BCH_CSUM_CHACHA20_POLY1305_128: {
-+		SHASH_DESC_ON_STACK(desc, c->poly1305);
-+		u8 digest[POLY1305_DIGEST_SIZE];
-+		struct bch_csum ret = { 0 };
-+
-+		gen_poly_key(c, desc, nonce);
-+
-+#ifdef CONFIG_HIGHMEM
-+		__bio_for_each_segment(bv, bio, *iter, *iter) {
-+			void *p = kmap_atomic(bv.bv_page) + bv.bv_offset;
-+
-+			crypto_shash_update(desc, p, bv.bv_len);
-+			kunmap_atomic(p);
-+		}
-+#else
-+		__bio_for_each_bvec(bv, bio, *iter, *iter)
-+			crypto_shash_update(desc,
-+				page_address(bv.bv_page) + bv.bv_offset,
-+				bv.bv_len);
-+#endif
-+		crypto_shash_final(desc, digest);
-+
-+		memcpy(&ret, digest, bch_crc_bytes[type]);
-+		return ret;
-+	}
-+	default:
-+		BUG();
-+	}
-+}
-+
-+struct bch_csum bch2_checksum_bio(struct bch_fs *c, unsigned type,
-+				  struct nonce nonce, struct bio *bio)
-+{
-+	struct bvec_iter iter = bio->bi_iter;
-+
-+	return __bch2_checksum_bio(c, type, nonce, bio, &iter);
-+}
-+
-+void bch2_encrypt_bio(struct bch_fs *c, unsigned type,
-+		      struct nonce nonce, struct bio *bio)
-+{
-+	struct bio_vec bv;
-+	struct bvec_iter iter;
-+	struct scatterlist sgl[16], *sg = sgl;
-+	size_t bytes = 0;
-+
-+	if (!bch2_csum_type_is_encryption(type))
-+		return;
-+
-+	sg_init_table(sgl, ARRAY_SIZE(sgl));
-+
-+	bio_for_each_segment(bv, bio, iter) {
-+		if (sg == sgl + ARRAY_SIZE(sgl)) {
-+			sg_mark_end(sg - 1);
-+			do_encrypt_sg(c->chacha20, nonce, sgl, bytes);
-+
-+			nonce = nonce_add(nonce, bytes);
-+			bytes = 0;
-+
-+			sg_init_table(sgl, ARRAY_SIZE(sgl));
-+			sg = sgl;
-+		}
-+
-+		sg_set_page(sg++, bv.bv_page, bv.bv_len, bv.bv_offset);
-+		bytes += bv.bv_len;
-+	}
-+
-+	sg_mark_end(sg - 1);
-+	do_encrypt_sg(c->chacha20, nonce, sgl, bytes);
-+}
-+
-+struct bch_csum bch2_checksum_merge(unsigned type, struct bch_csum a,
-+				    struct bch_csum b, size_t b_len)
-+{
-+	BUG_ON(!bch2_checksum_mergeable(type));
-+
-+	while (b_len) {
-+		unsigned b = min_t(unsigned, b_len, PAGE_SIZE);
-+
-+		a.lo = bch2_checksum_update(type, a.lo,
-+				page_address(ZERO_PAGE(0)), b);
-+		b_len -= b;
-+	}
-+
-+	a.lo ^= b.lo;
-+	a.hi ^= b.hi;
-+	return a;
-+}
-+
-+int bch2_rechecksum_bio(struct bch_fs *c, struct bio *bio,
-+			struct bversion version,
-+			struct bch_extent_crc_unpacked crc_old,
-+			struct bch_extent_crc_unpacked *crc_a,
-+			struct bch_extent_crc_unpacked *crc_b,
-+			unsigned len_a, unsigned len_b,
-+			unsigned new_csum_type)
-+{
-+	struct bvec_iter iter = bio->bi_iter;
-+	struct nonce nonce = extent_nonce(version, crc_old);
-+	struct bch_csum merged = { 0 };
-+	struct crc_split {
-+		struct bch_extent_crc_unpacked	*crc;
-+		unsigned			len;
-+		unsigned			csum_type;
-+		struct bch_csum			csum;
-+	} splits[3] = {
-+		{ crc_a, len_a, new_csum_type },
-+		{ crc_b, len_b, new_csum_type },
-+		{ NULL,	 bio_sectors(bio) - len_a - len_b, new_csum_type },
-+	}, *i;
-+	bool mergeable = crc_old.csum_type == new_csum_type &&
-+		bch2_checksum_mergeable(new_csum_type);
-+	unsigned crc_nonce = crc_old.nonce;
-+
-+	BUG_ON(len_a + len_b > bio_sectors(bio));
-+	BUG_ON(crc_old.uncompressed_size != bio_sectors(bio));
-+	BUG_ON(crc_is_compressed(crc_old));
-+	BUG_ON(bch2_csum_type_is_encryption(crc_old.csum_type) !=
-+	       bch2_csum_type_is_encryption(new_csum_type));
-+
-+	for (i = splits; i < splits + ARRAY_SIZE(splits); i++) {
-+		iter.bi_size = i->len << 9;
-+		if (mergeable || i->crc)
-+			i->csum = __bch2_checksum_bio(c, i->csum_type,
-+						      nonce, bio, &iter);
-+		else
-+			bio_advance_iter(bio, &iter, i->len << 9);
-+		nonce = nonce_add(nonce, i->len << 9);
-+	}
-+
-+	if (mergeable)
-+		for (i = splits; i < splits + ARRAY_SIZE(splits); i++)
-+			merged = bch2_checksum_merge(new_csum_type, merged,
-+						     i->csum, i->len << 9);
-+	else
-+		merged = bch2_checksum_bio(c, crc_old.csum_type,
-+				extent_nonce(version, crc_old), bio);
-+
-+	if (bch2_crc_cmp(merged, crc_old.csum))
-+		return -EIO;
-+
-+	for (i = splits; i < splits + ARRAY_SIZE(splits); i++) {
-+		if (i->crc)
-+			*i->crc = (struct bch_extent_crc_unpacked) {
-+				.csum_type		= i->csum_type,
-+				.compression_type	= crc_old.compression_type,
-+				.compressed_size	= i->len,
-+				.uncompressed_size	= i->len,
-+				.offset			= 0,
-+				.live_size		= i->len,
-+				.nonce			= crc_nonce,
-+				.csum			= i->csum,
-+			};
-+
-+		if (bch2_csum_type_is_encryption(new_csum_type))
-+			crc_nonce += i->len;
-+	}
-+
-+	return 0;
-+}
-+
-+#ifdef __KERNEL__
-+int bch2_request_key(struct bch_sb *sb, struct bch_key *key)
-+{
-+	char key_description[60];
-+	struct key *keyring_key;
-+	const struct user_key_payload *ukp;
-+	int ret;
-+
-+	snprintf(key_description, sizeof(key_description),
-+		 "bcachefs:%pUb", &sb->user_uuid);
-+
-+	keyring_key = request_key(&key_type_logon, key_description, NULL);
-+	if (IS_ERR(keyring_key))
-+		return PTR_ERR(keyring_key);
-+
-+	down_read(&keyring_key->sem);
-+	ukp = dereference_key_locked(keyring_key);
-+	if (ukp->datalen == sizeof(*key)) {
-+		memcpy(key, ukp->data, ukp->datalen);
-+		ret = 0;
-+	} else {
-+		ret = -EINVAL;
-+	}
-+	up_read(&keyring_key->sem);
-+	key_put(keyring_key);
-+
-+	return ret;
-+}
-+#else
-+#include <keyutils.h>
-+#include <uuid/uuid.h>
-+
-+int bch2_request_key(struct bch_sb *sb, struct bch_key *key)
-+{
-+	key_serial_t key_id;
-+	char key_description[60];
-+	char uuid[40];
-+
-+	uuid_unparse_lower(sb->user_uuid.b, uuid);
-+	sprintf(key_description, "bcachefs:%s", uuid);
-+
-+	key_id = request_key("user", key_description, NULL,
-+			     KEY_SPEC_USER_KEYRING);
-+	if (key_id < 0)
-+		return -errno;
-+
-+	if (keyctl_read(key_id, (void *) key, sizeof(*key)) != sizeof(*key))
-+		return -1;
-+
-+	return 0;
-+}
-+#endif
-+
-+int bch2_decrypt_sb_key(struct bch_fs *c,
-+			struct bch_sb_field_crypt *crypt,
-+			struct bch_key *key)
-+{
-+	struct bch_encrypted_key sb_key = crypt->key;
-+	struct bch_key user_key;
-+	int ret = 0;
-+
-+	/* is key encrypted? */
-+	if (!bch2_key_is_encrypted(&sb_key))
-+		goto out;
-+
-+	ret = bch2_request_key(c->disk_sb.sb, &user_key);
-+	if (ret) {
-+		bch_err(c, "error requesting encryption key: %i", ret);
-+		goto err;
-+	}
-+
-+	/* decrypt real key: */
-+	ret = bch2_chacha_encrypt_key(&user_key, bch2_sb_key_nonce(c),
-+			     &sb_key, sizeof(sb_key));
-+	if (ret)
-+		goto err;
-+
-+	if (bch2_key_is_encrypted(&sb_key)) {
-+		bch_err(c, "incorrect encryption key");
-+		ret = -EINVAL;
-+		goto err;
-+	}
-+out:
-+	*key = sb_key.key;
-+err:
-+	memzero_explicit(&sb_key, sizeof(sb_key));
-+	memzero_explicit(&user_key, sizeof(user_key));
-+	return ret;
-+}
-+
-+static int bch2_alloc_ciphers(struct bch_fs *c)
-+{
-+	if (!c->chacha20)
-+		c->chacha20 = crypto_alloc_sync_skcipher("chacha20", 0, 0);
-+	if (IS_ERR(c->chacha20)) {
-+		bch_err(c, "error requesting chacha20 module: %li",
-+			PTR_ERR(c->chacha20));
-+		return PTR_ERR(c->chacha20);
-+	}
-+
-+	if (!c->poly1305)
-+		c->poly1305 = crypto_alloc_shash("poly1305", 0, 0);
-+	if (IS_ERR(c->poly1305)) {
-+		bch_err(c, "error requesting poly1305 module: %li",
-+			PTR_ERR(c->poly1305));
-+		return PTR_ERR(c->poly1305);
-+	}
-+
-+	return 0;
-+}
-+
-+int bch2_disable_encryption(struct bch_fs *c)
-+{
-+	struct bch_sb_field_crypt *crypt;
-+	struct bch_key key;
-+	int ret = -EINVAL;
-+
-+	mutex_lock(&c->sb_lock);
-+
-+	crypt = bch2_sb_get_crypt(c->disk_sb.sb);
-+	if (!crypt)
-+		goto out;
-+
-+	/* is key encrypted? */
-+	ret = 0;
-+	if (bch2_key_is_encrypted(&crypt->key))
-+		goto out;
-+
-+	ret = bch2_decrypt_sb_key(c, crypt, &key);
-+	if (ret)
-+		goto out;
-+
-+	crypt->key.magic	= BCH_KEY_MAGIC;
-+	crypt->key.key		= key;
-+
-+	SET_BCH_SB_ENCRYPTION_TYPE(c->disk_sb.sb, 0);
-+	bch2_write_super(c);
-+out:
-+	mutex_unlock(&c->sb_lock);
-+
-+	return ret;
-+}
-+
-+int bch2_enable_encryption(struct bch_fs *c, bool keyed)
-+{
-+	struct bch_encrypted_key key;
-+	struct bch_key user_key;
-+	struct bch_sb_field_crypt *crypt;
-+	int ret = -EINVAL;
-+
-+	mutex_lock(&c->sb_lock);
-+
-+	/* Do we already have an encryption key? */
-+	if (bch2_sb_get_crypt(c->disk_sb.sb))
-+		goto err;
-+
-+	ret = bch2_alloc_ciphers(c);
-+	if (ret)
-+		goto err;
-+
-+	key.magic = BCH_KEY_MAGIC;
-+	get_random_bytes(&key.key, sizeof(key.key));
-+
-+	if (keyed) {
-+		ret = bch2_request_key(c->disk_sb.sb, &user_key);
-+		if (ret) {
-+			bch_err(c, "error requesting encryption key: %i", ret);
-+			goto err;
-+		}
-+
-+		ret = bch2_chacha_encrypt_key(&user_key, bch2_sb_key_nonce(c),
-+					      &key, sizeof(key));
-+		if (ret)
-+			goto err;
-+	}
-+
-+	ret = crypto_skcipher_setkey(&c->chacha20->base,
-+			(void *) &key.key, sizeof(key.key));
-+	if (ret)
-+		goto err;
-+
-+	crypt = bch2_sb_resize_crypt(&c->disk_sb, sizeof(*crypt) / sizeof(u64));
-+	if (!crypt) {
-+		ret = -ENOMEM; /* XXX this technically could be -ENOSPC */
-+		goto err;
-+	}
-+
-+	crypt->key = key;
-+
-+	/* write superblock */
-+	SET_BCH_SB_ENCRYPTION_TYPE(c->disk_sb.sb, 1);
-+	bch2_write_super(c);
-+err:
-+	mutex_unlock(&c->sb_lock);
-+	memzero_explicit(&user_key, sizeof(user_key));
-+	memzero_explicit(&key, sizeof(key));
-+	return ret;
-+}
-+
-+void bch2_fs_encryption_exit(struct bch_fs *c)
-+{
-+	if (!IS_ERR_OR_NULL(c->poly1305))
-+		crypto_free_shash(c->poly1305);
-+	if (!IS_ERR_OR_NULL(c->chacha20))
-+		crypto_free_sync_skcipher(c->chacha20);
-+	if (!IS_ERR_OR_NULL(c->sha256))
-+		crypto_free_shash(c->sha256);
-+}
-+
-+int bch2_fs_encryption_init(struct bch_fs *c)
-+{
-+	struct bch_sb_field_crypt *crypt;
-+	struct bch_key key;
-+	int ret = 0;
-+
-+	pr_verbose_init(c->opts, "");
-+
-+	c->sha256 = crypto_alloc_shash("sha256", 0, 0);
-+	if (IS_ERR(c->sha256)) {
-+		bch_err(c, "error requesting sha256 module");
-+		ret = PTR_ERR(c->sha256);
-+		goto out;
-+	}
-+
-+	crypt = bch2_sb_get_crypt(c->disk_sb.sb);
-+	if (!crypt)
-+		goto out;
-+
-+	ret = bch2_alloc_ciphers(c);
-+	if (ret)
-+		goto out;
-+
-+	ret = bch2_decrypt_sb_key(c, crypt, &key);
-+	if (ret)
-+		goto out;
-+
-+	ret = crypto_skcipher_setkey(&c->chacha20->base,
-+			(void *) &key.key, sizeof(key.key));
-+	if (ret)
-+		goto out;
-+out:
-+	memzero_explicit(&key, sizeof(key));
-+	pr_verbose_init(c->opts, "ret %i", ret);
-+	return ret;
-+}
-diff --git a/fs/bcachefs/checksum.h b/fs/bcachefs/checksum.h
-new file mode 100644
-index 000000000000..24dee8039d57
---- /dev/null
-+++ b/fs/bcachefs/checksum.h
-@@ -0,0 +1,202 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_CHECKSUM_H
-+#define _BCACHEFS_CHECKSUM_H
-+
-+#include "bcachefs.h"
-+#include "extents_types.h"
-+#include "super-io.h"
-+
-+#include <linux/crc64.h>
-+#include <crypto/chacha.h>
-+
-+static inline bool bch2_checksum_mergeable(unsigned type)
-+{
-+
-+	switch (type) {
-+	case BCH_CSUM_NONE:
-+	case BCH_CSUM_CRC32C:
-+	case BCH_CSUM_CRC64:
-+		return true;
-+	default:
-+		return false;
-+	}
-+}
-+
-+struct bch_csum bch2_checksum_merge(unsigned, struct bch_csum,
-+				    struct bch_csum, size_t);
-+
-+#define BCH_NONCE_EXTENT	cpu_to_le32(1 << 28)
-+#define BCH_NONCE_BTREE		cpu_to_le32(2 << 28)
-+#define BCH_NONCE_JOURNAL	cpu_to_le32(3 << 28)
-+#define BCH_NONCE_PRIO		cpu_to_le32(4 << 28)
-+#define BCH_NONCE_POLY		cpu_to_le32(1 << 31)
-+
-+struct bch_csum bch2_checksum(struct bch_fs *, unsigned, struct nonce,
-+			     const void *, size_t);
-+
-+/*
-+ * This is used for various on disk data structures - bch_sb, prio_set, bset,
-+ * jset: The checksum is _always_ the first field of these structs
-+ */
-+#define csum_vstruct(_c, _type, _nonce, _i)				\
-+({									\
-+	const void *start = ((const void *) (_i)) + sizeof((_i)->csum);	\
-+	const void *end = vstruct_end(_i);				\
-+									\
-+	bch2_checksum(_c, _type, _nonce, start, end - start);		\
-+})
-+
-+int bch2_chacha_encrypt_key(struct bch_key *, struct nonce, void *, size_t);
-+int bch2_request_key(struct bch_sb *, struct bch_key *);
-+
-+void bch2_encrypt(struct bch_fs *, unsigned, struct nonce,
-+		 void *data, size_t);
-+
-+struct bch_csum bch2_checksum_bio(struct bch_fs *, unsigned,
-+				  struct nonce, struct bio *);
-+
-+int bch2_rechecksum_bio(struct bch_fs *, struct bio *, struct bversion,
-+			struct bch_extent_crc_unpacked,
-+			struct bch_extent_crc_unpacked *,
-+			struct bch_extent_crc_unpacked *,
-+			unsigned, unsigned, unsigned);
-+
-+void bch2_encrypt_bio(struct bch_fs *, unsigned,
-+		    struct nonce, struct bio *);
-+
-+int bch2_decrypt_sb_key(struct bch_fs *, struct bch_sb_field_crypt *,
-+			struct bch_key *);
-+
-+int bch2_disable_encryption(struct bch_fs *);
-+int bch2_enable_encryption(struct bch_fs *, bool);
-+
-+void bch2_fs_encryption_exit(struct bch_fs *);
-+int bch2_fs_encryption_init(struct bch_fs *);
-+
-+static inline enum bch_csum_type bch2_csum_opt_to_type(enum bch_csum_opts type,
-+						       bool data)
-+{
-+	switch (type) {
-+	case BCH_CSUM_OPT_NONE:
-+	     return BCH_CSUM_NONE;
-+	case BCH_CSUM_OPT_CRC32C:
-+	     return data ? BCH_CSUM_CRC32C : BCH_CSUM_CRC32C_NONZERO;
-+	case BCH_CSUM_OPT_CRC64:
-+	     return data ? BCH_CSUM_CRC64 : BCH_CSUM_CRC64_NONZERO;
-+	default:
-+	     BUG();
-+	}
-+}
-+
-+static inline enum bch_csum_type bch2_data_checksum_type(struct bch_fs *c,
-+							 unsigned opt)
-+{
-+	if (c->sb.encryption_type)
-+		return c->opts.wide_macs
-+			? BCH_CSUM_CHACHA20_POLY1305_128
-+			: BCH_CSUM_CHACHA20_POLY1305_80;
-+
-+	return bch2_csum_opt_to_type(opt, true);
-+}
-+
-+static inline enum bch_csum_type bch2_meta_checksum_type(struct bch_fs *c)
-+{
-+	if (c->sb.encryption_type)
-+		return BCH_CSUM_CHACHA20_POLY1305_128;
-+
-+	return bch2_csum_opt_to_type(c->opts.metadata_checksum, false);
-+}
-+
-+static const unsigned bch2_compression_opt_to_type[] = {
-+#define x(t, n) [BCH_COMPRESSION_OPT_##t] = BCH_COMPRESSION_TYPE_##t,
-+	BCH_COMPRESSION_OPTS()
-+#undef x
-+};
-+
-+static inline bool bch2_checksum_type_valid(const struct bch_fs *c,
-+					   unsigned type)
-+{
-+	if (type >= BCH_CSUM_NR)
-+		return false;
-+
-+	if (bch2_csum_type_is_encryption(type) && !c->chacha20)
-+		return false;
-+
-+	return true;
-+}
-+
-+/* returns true if not equal */
-+static inline bool bch2_crc_cmp(struct bch_csum l, struct bch_csum r)
-+{
-+	/*
-+	 * XXX: need some way of preventing the compiler from optimizing this
-+	 * into a form that isn't constant time..
-+	 */
-+	return ((l.lo ^ r.lo) | (l.hi ^ r.hi)) != 0;
-+}
-+
-+/* for skipping ahead and encrypting/decrypting at an offset: */
-+static inline struct nonce nonce_add(struct nonce nonce, unsigned offset)
-+{
-+	EBUG_ON(offset & (CHACHA_BLOCK_SIZE - 1));
-+
-+	le32_add_cpu(&nonce.d[0], offset / CHACHA_BLOCK_SIZE);
-+	return nonce;
-+}
-+
-+static inline struct nonce null_nonce(void)
-+{
-+	struct nonce ret;
-+
-+	memset(&ret, 0, sizeof(ret));
-+	return ret;
-+}
-+
-+static inline struct nonce extent_nonce(struct bversion version,
-+					struct bch_extent_crc_unpacked crc)
-+{
-+	unsigned compression_type = crc_is_compressed(crc)
-+		? crc.compression_type
-+		: 0;
-+	unsigned size = compression_type ? crc.uncompressed_size : 0;
-+	struct nonce nonce = (struct nonce) {{
-+		[0] = cpu_to_le32(size << 22),
-+		[1] = cpu_to_le32(version.lo),
-+		[2] = cpu_to_le32(version.lo >> 32),
-+		[3] = cpu_to_le32(version.hi|
-+				  (compression_type << 24))^BCH_NONCE_EXTENT,
-+	}};
-+
-+	return nonce_add(nonce, crc.nonce << 9);
-+}
-+
-+static inline bool bch2_key_is_encrypted(struct bch_encrypted_key *key)
-+{
-+	return le64_to_cpu(key->magic) != BCH_KEY_MAGIC;
-+}
-+
-+static inline struct nonce __bch2_sb_key_nonce(struct bch_sb *sb)
-+{
-+	__le64 magic = __bch2_sb_magic(sb);
-+
-+	return (struct nonce) {{
-+		[0] = 0,
-+		[1] = 0,
-+		[2] = ((__le32 *) &magic)[0],
-+		[3] = ((__le32 *) &magic)[1],
-+	}};
-+}
-+
-+static inline struct nonce bch2_sb_key_nonce(struct bch_fs *c)
-+{
-+	__le64 magic = bch2_sb_magic(c);
-+
-+	return (struct nonce) {{
-+		[0] = 0,
-+		[1] = 0,
-+		[2] = ((__le32 *) &magic)[0],
-+		[3] = ((__le32 *) &magic)[1],
-+	}};
-+}
-+
-+#endif /* _BCACHEFS_CHECKSUM_H */
-diff --git a/fs/bcachefs/clock.c b/fs/bcachefs/clock.c
-new file mode 100644
-index 000000000000..a9f5d5696622
---- /dev/null
-+++ b/fs/bcachefs/clock.c
-@@ -0,0 +1,194 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "clock.h"
-+
-+#include <linux/freezer.h>
-+#include <linux/kthread.h>
-+#include <linux/preempt.h>
-+
-+static inline long io_timer_cmp(io_timer_heap *h,
-+				struct io_timer *l,
-+				struct io_timer *r)
-+{
-+	return l->expire - r->expire;
-+}
-+
-+void bch2_io_timer_add(struct io_clock *clock, struct io_timer *timer)
-+{
-+	size_t i;
-+
-+	spin_lock(&clock->timer_lock);
-+
-+	if (time_after_eq((unsigned long) atomic_long_read(&clock->now),
-+			  timer->expire)) {
-+		spin_unlock(&clock->timer_lock);
-+		timer->fn(timer);
-+		return;
-+	}
-+
-+	for (i = 0; i < clock->timers.used; i++)
-+		if (clock->timers.data[i] == timer)
-+			goto out;
-+
-+	BUG_ON(!heap_add(&clock->timers, timer, io_timer_cmp, NULL));
-+out:
-+	spin_unlock(&clock->timer_lock);
-+}
-+
-+void bch2_io_timer_del(struct io_clock *clock, struct io_timer *timer)
-+{
-+	size_t i;
-+
-+	spin_lock(&clock->timer_lock);
-+
-+	for (i = 0; i < clock->timers.used; i++)
-+		if (clock->timers.data[i] == timer) {
-+			heap_del(&clock->timers, i, io_timer_cmp, NULL);
-+			break;
-+		}
-+
-+	spin_unlock(&clock->timer_lock);
-+}
-+
-+struct io_clock_wait {
-+	struct io_timer		io_timer;
-+	struct timer_list	cpu_timer;
-+	struct task_struct	*task;
-+	int			expired;
-+};
-+
-+static void io_clock_wait_fn(struct io_timer *timer)
-+{
-+	struct io_clock_wait *wait = container_of(timer,
-+				struct io_clock_wait, io_timer);
-+
-+	wait->expired = 1;
-+	wake_up_process(wait->task);
-+}
-+
-+static void io_clock_cpu_timeout(struct timer_list *timer)
-+{
-+	struct io_clock_wait *wait = container_of(timer,
-+				struct io_clock_wait, cpu_timer);
-+
-+	wait->expired = 1;
-+	wake_up_process(wait->task);
-+}
-+
-+void bch2_io_clock_schedule_timeout(struct io_clock *clock, unsigned long until)
-+{
-+	struct io_clock_wait wait;
-+
-+	/* XXX: calculate sleep time rigorously */
-+	wait.io_timer.expire	= until;
-+	wait.io_timer.fn	= io_clock_wait_fn;
-+	wait.task		= current;
-+	wait.expired		= 0;
-+	bch2_io_timer_add(clock, &wait.io_timer);
-+
-+	schedule();
-+
-+	bch2_io_timer_del(clock, &wait.io_timer);
-+}
-+
-+void bch2_kthread_io_clock_wait(struct io_clock *clock,
-+				unsigned long io_until,
-+				unsigned long cpu_timeout)
-+{
-+	bool kthread = (current->flags & PF_KTHREAD) != 0;
-+	struct io_clock_wait wait;
-+
-+	wait.io_timer.expire	= io_until;
-+	wait.io_timer.fn	= io_clock_wait_fn;
-+	wait.task		= current;
-+	wait.expired		= 0;
-+	bch2_io_timer_add(clock, &wait.io_timer);
-+
-+	timer_setup_on_stack(&wait.cpu_timer, io_clock_cpu_timeout, 0);
-+
-+	if (cpu_timeout != MAX_SCHEDULE_TIMEOUT)
-+		mod_timer(&wait.cpu_timer, cpu_timeout + jiffies);
-+
-+	while (1) {
-+		set_current_state(TASK_INTERRUPTIBLE);
-+		if (kthread && kthread_should_stop())
-+			break;
-+
-+		if (wait.expired)
-+			break;
-+
-+		schedule();
-+		try_to_freeze();
-+	}
-+
-+	__set_current_state(TASK_RUNNING);
-+	del_singleshot_timer_sync(&wait.cpu_timer);
-+	destroy_timer_on_stack(&wait.cpu_timer);
-+	bch2_io_timer_del(clock, &wait.io_timer);
-+}
-+
-+static struct io_timer *get_expired_timer(struct io_clock *clock,
-+					  unsigned long now)
-+{
-+	struct io_timer *ret = NULL;
-+
-+	spin_lock(&clock->timer_lock);
-+
-+	if (clock->timers.used &&
-+	    time_after_eq(now, clock->timers.data[0]->expire))
-+		heap_pop(&clock->timers, ret, io_timer_cmp, NULL);
-+
-+	spin_unlock(&clock->timer_lock);
-+
-+	return ret;
-+}
-+
-+void __bch2_increment_clock(struct io_clock *clock, unsigned sectors)
-+{
-+	struct io_timer *timer;
-+	unsigned long now = atomic_long_add_return(sectors, &clock->now);
-+
-+	while ((timer = get_expired_timer(clock, now)))
-+		timer->fn(timer);
-+}
-+
-+ssize_t bch2_io_timers_show(struct io_clock *clock, char *buf)
-+{
-+	struct printbuf out = _PBUF(buf, PAGE_SIZE);
-+	unsigned long now;
-+	unsigned i;
-+
-+	spin_lock(&clock->timer_lock);
-+	now = atomic_long_read(&clock->now);
-+
-+	for (i = 0; i < clock->timers.used; i++)
-+		pr_buf(&out, "%ps:\t%li\n",
-+		       clock->timers.data[i]->fn,
-+		       clock->timers.data[i]->expire - now);
-+	spin_unlock(&clock->timer_lock);
-+
-+	return out.pos - buf;
-+}
-+
-+void bch2_io_clock_exit(struct io_clock *clock)
-+{
-+	free_heap(&clock->timers);
-+	free_percpu(clock->pcpu_buf);
-+}
-+
-+int bch2_io_clock_init(struct io_clock *clock)
-+{
-+	atomic_long_set(&clock->now, 0);
-+	spin_lock_init(&clock->timer_lock);
-+
-+	clock->max_slop = IO_CLOCK_PCPU_SECTORS * num_possible_cpus();
-+
-+	clock->pcpu_buf = alloc_percpu(*clock->pcpu_buf);
-+	if (!clock->pcpu_buf)
-+		return -ENOMEM;
-+
-+	if (!init_heap(&clock->timers, NR_IO_TIMERS, GFP_KERNEL))
-+		return -ENOMEM;
-+
-+	return 0;
-+}
-diff --git a/fs/bcachefs/clock.h b/fs/bcachefs/clock.h
-new file mode 100644
-index 000000000000..da50afe206cc
---- /dev/null
-+++ b/fs/bcachefs/clock.h
-@@ -0,0 +1,38 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_CLOCK_H
-+#define _BCACHEFS_CLOCK_H
-+
-+void bch2_io_timer_add(struct io_clock *, struct io_timer *);
-+void bch2_io_timer_del(struct io_clock *, struct io_timer *);
-+void bch2_kthread_io_clock_wait(struct io_clock *, unsigned long,
-+				unsigned long);
-+
-+void __bch2_increment_clock(struct io_clock *, unsigned);
-+
-+static inline void bch2_increment_clock(struct bch_fs *c, unsigned sectors,
-+					int rw)
-+{
-+	struct io_clock *clock = &c->io_clock[rw];
-+
-+	if (unlikely(this_cpu_add_return(*clock->pcpu_buf, sectors) >=
-+		   IO_CLOCK_PCPU_SECTORS))
-+		__bch2_increment_clock(clock, this_cpu_xchg(*clock->pcpu_buf, 0));
-+}
-+
-+void bch2_io_clock_schedule_timeout(struct io_clock *, unsigned long);
-+
-+#define bch2_kthread_wait_event_ioclock_timeout(condition, clock, timeout)\
-+({									\
-+	long __ret = timeout;						\
-+	might_sleep();							\
-+	if (!___wait_cond_timeout(condition))				\
-+		__ret = __wait_event_timeout(wq, condition, timeout);	\
-+	__ret;								\
-+})
-+
-+ssize_t bch2_io_timers_show(struct io_clock *, char *);
-+
-+void bch2_io_clock_exit(struct io_clock *);
-+int bch2_io_clock_init(struct io_clock *);
-+
-+#endif /* _BCACHEFS_CLOCK_H */
-diff --git a/fs/bcachefs/clock_types.h b/fs/bcachefs/clock_types.h
-new file mode 100644
-index 000000000000..92c740a47565
---- /dev/null
-+++ b/fs/bcachefs/clock_types.h
-@@ -0,0 +1,37 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_CLOCK_TYPES_H
-+#define _BCACHEFS_CLOCK_TYPES_H
-+
-+#include "util.h"
-+
-+#define NR_IO_TIMERS		(BCH_SB_MEMBERS_MAX * 3)
-+
-+/*
-+ * Clocks/timers in units of sectors of IO:
-+ *
-+ * Note - they use percpu batching, so they're only approximate.
-+ */
-+
-+struct io_timer;
-+typedef void (*io_timer_fn)(struct io_timer *);
-+
-+struct io_timer {
-+	io_timer_fn		fn;
-+	unsigned long		expire;
-+};
-+
-+/* Amount to buffer up on a percpu counter */
-+#define IO_CLOCK_PCPU_SECTORS	128
-+
-+typedef HEAP(struct io_timer *)	io_timer_heap;
-+
-+struct io_clock {
-+	atomic_long_t		now;
-+	u16 __percpu		*pcpu_buf;
-+	unsigned		max_slop;
-+
-+	spinlock_t		timer_lock;
-+	io_timer_heap		timers;
-+};
-+
-+#endif /* _BCACHEFS_CLOCK_TYPES_H */
-diff --git a/fs/bcachefs/compress.c b/fs/bcachefs/compress.c
-new file mode 100644
-index 000000000000..3d75527d2d81
---- /dev/null
-+++ b/fs/bcachefs/compress.c
-@@ -0,0 +1,633 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "checksum.h"
-+#include "compress.h"
-+#include "extents.h"
-+#include "io.h"
-+#include "super-io.h"
-+
-+#include <linux/lz4.h>
-+#include <linux/sched/mm.h>
-+#include <linux/zlib.h>
-+#include <linux/zstd.h>
-+
-+/* Bounce buffer: */
-+struct bbuf {
-+	void		*b;
-+	enum {
-+		BB_NONE,
-+		BB_VMAP,
-+		BB_KMALLOC,
-+		BB_MEMPOOL,
-+	}		type;
-+	int		rw;
-+};
-+
-+static struct bbuf __bounce_alloc(struct bch_fs *c, unsigned size, int rw)
-+{
-+	void *b;
-+
-+	BUG_ON(size > c->sb.encoded_extent_max << 9);
-+
-+	b = kmalloc(size, GFP_NOIO|__GFP_NOWARN);
-+	if (b)
-+		return (struct bbuf) { .b = b, .type = BB_KMALLOC, .rw = rw };
-+
-+	b = mempool_alloc(&c->compression_bounce[rw], GFP_NOIO);
-+	if (b)
-+		return (struct bbuf) { .b = b, .type = BB_MEMPOOL, .rw = rw };
-+
-+	BUG();
-+}
-+
-+static bool bio_phys_contig(struct bio *bio, struct bvec_iter start)
-+{
-+	struct bio_vec bv;
-+	struct bvec_iter iter;
-+	void *expected_start = NULL;
-+
-+	__bio_for_each_bvec(bv, bio, iter, start) {
-+		if (expected_start &&
-+		    expected_start != page_address(bv.bv_page) + bv.bv_offset)
-+			return false;
-+
-+		expected_start = page_address(bv.bv_page) +
-+			bv.bv_offset + bv.bv_len;
-+	}
-+
-+	return true;
-+}
-+
-+static struct bbuf __bio_map_or_bounce(struct bch_fs *c, struct bio *bio,
-+				       struct bvec_iter start, int rw)
-+{
-+	struct bbuf ret;
-+	struct bio_vec bv;
-+	struct bvec_iter iter;
-+	unsigned nr_pages = 0, flags;
-+	struct page *stack_pages[16];
-+	struct page **pages = NULL;
-+	void *data;
-+
-+	BUG_ON(bvec_iter_sectors(start) > c->sb.encoded_extent_max);
-+
-+	if (!IS_ENABLED(CONFIG_HIGHMEM) &&
-+	    bio_phys_contig(bio, start))
-+		return (struct bbuf) {
-+			.b = page_address(bio_iter_page(bio, start)) +
-+				bio_iter_offset(bio, start),
-+			.type = BB_NONE, .rw = rw
-+		};
-+
-+	/* check if we can map the pages contiguously: */
-+	__bio_for_each_segment(bv, bio, iter, start) {
-+		if (iter.bi_size != start.bi_size &&
-+		    bv.bv_offset)
-+			goto bounce;
-+
-+		if (bv.bv_len < iter.bi_size &&
-+		    bv.bv_offset + bv.bv_len < PAGE_SIZE)
-+			goto bounce;
-+
-+		nr_pages++;
-+	}
-+
-+	BUG_ON(DIV_ROUND_UP(start.bi_size, PAGE_SIZE) > nr_pages);
-+
-+	pages = nr_pages > ARRAY_SIZE(stack_pages)
-+		? kmalloc_array(nr_pages, sizeof(struct page *), GFP_NOIO)
-+		: stack_pages;
-+	if (!pages)
-+		goto bounce;
-+
-+	nr_pages = 0;
-+	__bio_for_each_segment(bv, bio, iter, start)
-+		pages[nr_pages++] = bv.bv_page;
-+
-+	flags = memalloc_nofs_save();
-+	data = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
-+	memalloc_nofs_restore(flags);
-+
-+	if (pages != stack_pages)
-+		kfree(pages);
-+
-+	if (data)
-+		return (struct bbuf) {
-+			.b = data + bio_iter_offset(bio, start),
-+			.type = BB_VMAP, .rw = rw
-+		};
-+bounce:
-+	ret = __bounce_alloc(c, start.bi_size, rw);
-+
-+	if (rw == READ)
-+		memcpy_from_bio(ret.b, bio, start);
-+
-+	return ret;
-+}
-+
-+static struct bbuf bio_map_or_bounce(struct bch_fs *c, struct bio *bio, int rw)
-+{
-+	return __bio_map_or_bounce(c, bio, bio->bi_iter, rw);
-+}
-+
-+static void bio_unmap_or_unbounce(struct bch_fs *c, struct bbuf buf)
-+{
-+	switch (buf.type) {
-+	case BB_NONE:
-+		break;
-+	case BB_VMAP:
-+		vunmap((void *) ((unsigned long) buf.b & PAGE_MASK));
-+		break;
-+	case BB_KMALLOC:
-+		kfree(buf.b);
-+		break;
-+	case BB_MEMPOOL:
-+		mempool_free(buf.b, &c->compression_bounce[buf.rw]);
-+		break;
-+	}
-+}
-+
-+static inline void zlib_set_workspace(z_stream *strm, void *workspace)
-+{
-+#ifdef __KERNEL__
-+	strm->workspace = workspace;
-+#endif
-+}
-+
-+static int __bio_uncompress(struct bch_fs *c, struct bio *src,
-+			    void *dst_data, struct bch_extent_crc_unpacked crc)
-+{
-+	struct bbuf src_data = { NULL };
-+	size_t src_len = src->bi_iter.bi_size;
-+	size_t dst_len = crc.uncompressed_size << 9;
-+	void *workspace;
-+	int ret;
-+
-+	src_data = bio_map_or_bounce(c, src, READ);
-+
-+	switch (crc.compression_type) {
-+	case BCH_COMPRESSION_TYPE_lz4_old:
-+	case BCH_COMPRESSION_TYPE_lz4:
-+		ret = LZ4_decompress_safe_partial(src_data.b, dst_data,
-+						  src_len, dst_len, dst_len);
-+		if (ret != dst_len)
-+			goto err;
-+		break;
-+	case BCH_COMPRESSION_TYPE_gzip: {
-+		z_stream strm = {
-+			.next_in	= src_data.b,
-+			.avail_in	= src_len,
-+			.next_out	= dst_data,
-+			.avail_out	= dst_len,
-+		};
-+
-+		workspace = mempool_alloc(&c->decompress_workspace, GFP_NOIO);
-+
-+		zlib_set_workspace(&strm, workspace);
-+		zlib_inflateInit2(&strm, -MAX_WBITS);
-+		ret = zlib_inflate(&strm, Z_FINISH);
-+
-+		mempool_free(workspace, &c->decompress_workspace);
-+
-+		if (ret != Z_STREAM_END)
-+			goto err;
-+		break;
-+	}
-+	case BCH_COMPRESSION_TYPE_zstd: {
-+		ZSTD_DCtx *ctx;
-+		size_t real_src_len = le32_to_cpup(src_data.b);
-+
-+		if (real_src_len > src_len - 4)
-+			goto err;
-+
-+		workspace = mempool_alloc(&c->decompress_workspace, GFP_NOIO);
-+		ctx = ZSTD_initDCtx(workspace, ZSTD_DCtxWorkspaceBound());
-+
-+		ret = ZSTD_decompressDCtx(ctx,
-+				dst_data,	dst_len,
-+				src_data.b + 4, real_src_len);
-+
-+		mempool_free(workspace, &c->decompress_workspace);
-+
-+		if (ret != dst_len)
-+			goto err;
-+		break;
-+	}
-+	default:
-+		BUG();
-+	}
-+	ret = 0;
-+out:
-+	bio_unmap_or_unbounce(c, src_data);
-+	return ret;
-+err:
-+	ret = -EIO;
-+	goto out;
-+}
-+
-+int bch2_bio_uncompress_inplace(struct bch_fs *c, struct bio *bio,
-+				struct bch_extent_crc_unpacked *crc)
-+{
-+	struct bbuf data = { NULL };
-+	size_t dst_len = crc->uncompressed_size << 9;
-+
-+	/* bio must own its pages: */
-+	BUG_ON(!bio->bi_vcnt);
-+	BUG_ON(DIV_ROUND_UP(crc->live_size, PAGE_SECTORS) > bio->bi_max_vecs);
-+
-+	if (crc->uncompressed_size	> c->sb.encoded_extent_max ||
-+	    crc->compressed_size	> c->sb.encoded_extent_max) {
-+		bch_err(c, "error rewriting existing data: extent too big");
-+		return -EIO;
-+	}
-+
-+	data = __bounce_alloc(c, dst_len, WRITE);
-+
-+	if (__bio_uncompress(c, bio, data.b, *crc)) {
-+		bch_err(c, "error rewriting existing data: decompression error");
-+		bio_unmap_or_unbounce(c, data);
-+		return -EIO;
-+	}
-+
-+	/*
-+	 * XXX: don't have a good way to assert that the bio was allocated with
-+	 * enough space, we depend on bch2_move_extent doing the right thing
-+	 */
-+	bio->bi_iter.bi_size = crc->live_size << 9;
-+
-+	memcpy_to_bio(bio, bio->bi_iter, data.b + (crc->offset << 9));
-+
-+	crc->csum_type		= 0;
-+	crc->compression_type	= 0;
-+	crc->compressed_size	= crc->live_size;
-+	crc->uncompressed_size	= crc->live_size;
-+	crc->offset		= 0;
-+	crc->csum		= (struct bch_csum) { 0, 0 };
-+
-+	bio_unmap_or_unbounce(c, data);
-+	return 0;
-+}
-+
-+int bch2_bio_uncompress(struct bch_fs *c, struct bio *src,
-+		       struct bio *dst, struct bvec_iter dst_iter,
-+		       struct bch_extent_crc_unpacked crc)
-+{
-+	struct bbuf dst_data = { NULL };
-+	size_t dst_len = crc.uncompressed_size << 9;
-+	int ret = -ENOMEM;
-+
-+	if (crc.uncompressed_size	> c->sb.encoded_extent_max ||
-+	    crc.compressed_size		> c->sb.encoded_extent_max)
-+		return -EIO;
-+
-+	dst_data = dst_len == dst_iter.bi_size
-+		? __bio_map_or_bounce(c, dst, dst_iter, WRITE)
-+		: __bounce_alloc(c, dst_len, WRITE);
-+
-+	ret = __bio_uncompress(c, src, dst_data.b, crc);
-+	if (ret)
-+		goto err;
-+
-+	if (dst_data.type != BB_NONE &&
-+	    dst_data.type != BB_VMAP)
-+		memcpy_to_bio(dst, dst_iter, dst_data.b + (crc.offset << 9));
-+err:
-+	bio_unmap_or_unbounce(c, dst_data);
-+	return ret;
-+}
-+
-+static int attempt_compress(struct bch_fs *c,
-+			    void *workspace,
-+			    void *dst, size_t dst_len,
-+			    void *src, size_t src_len,
-+			    enum bch_compression_type compression_type)
-+{
-+	switch (compression_type) {
-+	case BCH_COMPRESSION_TYPE_lz4: {
-+		int len = src_len;
-+		int ret = LZ4_compress_destSize(
-+				src,		dst,
-+				&len,		dst_len,
-+				workspace);
-+
-+		if (len < src_len)
-+			return -len;
-+
-+		return ret;
-+	}
-+	case BCH_COMPRESSION_TYPE_gzip: {
-+		z_stream strm = {
-+			.next_in	= src,
-+			.avail_in	= src_len,
-+			.next_out	= dst,
-+			.avail_out	= dst_len,
-+		};
-+
-+		zlib_set_workspace(&strm, workspace);
-+		zlib_deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
-+				  Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL,
-+				  Z_DEFAULT_STRATEGY);
-+
-+		if (zlib_deflate(&strm, Z_FINISH) != Z_STREAM_END)
-+			return 0;
-+
-+		if (zlib_deflateEnd(&strm) != Z_OK)
-+			return 0;
-+
-+		return strm.total_out;
-+	}
-+	case BCH_COMPRESSION_TYPE_zstd: {
-+		ZSTD_CCtx *ctx = ZSTD_initCCtx(workspace,
-+			ZSTD_CCtxWorkspaceBound(c->zstd_params.cParams));
-+
-+		size_t len = ZSTD_compressCCtx(ctx,
-+				dst + 4,	dst_len - 4,
-+				src,		src_len,
-+				c->zstd_params);
-+		if (ZSTD_isError(len))
-+			return 0;
-+
-+		*((__le32 *) dst) = cpu_to_le32(len);
-+		return len + 4;
-+	}
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static unsigned __bio_compress(struct bch_fs *c,
-+			       struct bio *dst, size_t *dst_len,
-+			       struct bio *src, size_t *src_len,
-+			       enum bch_compression_type compression_type)
-+{
-+	struct bbuf src_data = { NULL }, dst_data = { NULL };
-+	void *workspace;
-+	unsigned pad;
-+	int ret = 0;
-+
-+	BUG_ON(compression_type >= BCH_COMPRESSION_TYPE_NR);
-+	BUG_ON(!mempool_initialized(&c->compress_workspace[compression_type]));
-+
-+	/* If it's only one block, don't bother trying to compress: */
-+	if (bio_sectors(src) <= c->opts.block_size)
-+		return 0;
-+
-+	dst_data = bio_map_or_bounce(c, dst, WRITE);
-+	src_data = bio_map_or_bounce(c, src, READ);
-+
-+	workspace = mempool_alloc(&c->compress_workspace[compression_type], GFP_NOIO);
-+
-+	*src_len = src->bi_iter.bi_size;
-+	*dst_len = dst->bi_iter.bi_size;
-+
-+	/*
-+	 * XXX: this algorithm sucks when the compression code doesn't tell us
-+	 * how much would fit, like LZ4 does:
-+	 */
-+	while (1) {
-+		if (*src_len <= block_bytes(c)) {
-+			ret = -1;
-+			break;
-+		}
-+
-+		ret = attempt_compress(c, workspace,
-+				       dst_data.b,	*dst_len,
-+				       src_data.b,	*src_len,
-+				       compression_type);
-+		if (ret > 0) {
-+			*dst_len = ret;
-+			ret = 0;
-+			break;
-+		}
-+
-+		/* Didn't fit: should we retry with a smaller amount?  */
-+		if (*src_len <= *dst_len) {
-+			ret = -1;
-+			break;
-+		}
-+
-+		/*
-+		 * If ret is negative, it's a hint as to how much data would fit
-+		 */
-+		BUG_ON(-ret >= *src_len);
-+
-+		if (ret < 0)
-+			*src_len = -ret;
-+		else
-+			*src_len -= (*src_len - *dst_len) / 2;
-+		*src_len = round_down(*src_len, block_bytes(c));
-+	}
-+
-+	mempool_free(workspace, &c->compress_workspace[compression_type]);
-+
-+	if (ret)
-+		goto err;
-+
-+	/* Didn't get smaller: */
-+	if (round_up(*dst_len, block_bytes(c)) >= *src_len)
-+		goto err;
-+
-+	pad = round_up(*dst_len, block_bytes(c)) - *dst_len;
-+
-+	memset(dst_data.b + *dst_len, 0, pad);
-+	*dst_len += pad;
-+
-+	if (dst_data.type != BB_NONE &&
-+	    dst_data.type != BB_VMAP)
-+		memcpy_to_bio(dst, dst->bi_iter, dst_data.b);
-+
-+	BUG_ON(!*dst_len || *dst_len > dst->bi_iter.bi_size);
-+	BUG_ON(!*src_len || *src_len > src->bi_iter.bi_size);
-+	BUG_ON(*dst_len & (block_bytes(c) - 1));
-+	BUG_ON(*src_len & (block_bytes(c) - 1));
-+out:
-+	bio_unmap_or_unbounce(c, src_data);
-+	bio_unmap_or_unbounce(c, dst_data);
-+	return compression_type;
-+err:
-+	compression_type = BCH_COMPRESSION_TYPE_incompressible;
-+	goto out;
-+}
-+
-+unsigned bch2_bio_compress(struct bch_fs *c,
-+			   struct bio *dst, size_t *dst_len,
-+			   struct bio *src, size_t *src_len,
-+			   unsigned compression_type)
-+{
-+	unsigned orig_dst = dst->bi_iter.bi_size;
-+	unsigned orig_src = src->bi_iter.bi_size;
-+
-+	/* Don't consume more than BCH_ENCODED_EXTENT_MAX from @src: */
-+	src->bi_iter.bi_size = min_t(unsigned, src->bi_iter.bi_size,
-+				     c->sb.encoded_extent_max << 9);
-+	/* Don't generate a bigger output than input: */
-+	dst->bi_iter.bi_size = min(dst->bi_iter.bi_size, src->bi_iter.bi_size);
-+
-+	if (compression_type == BCH_COMPRESSION_TYPE_lz4_old)
-+		compression_type = BCH_COMPRESSION_TYPE_lz4;
-+
-+	compression_type =
-+		__bio_compress(c, dst, dst_len, src, src_len, compression_type);
-+
-+	dst->bi_iter.bi_size = orig_dst;
-+	src->bi_iter.bi_size = orig_src;
-+	return compression_type;
-+}
-+
-+static int __bch2_fs_compress_init(struct bch_fs *, u64);
-+
-+#define BCH_FEATURE_none	0
-+
-+static const unsigned bch2_compression_opt_to_feature[] = {
-+#define x(t, n) [BCH_COMPRESSION_OPT_##t] = BCH_FEATURE_##t,
-+	BCH_COMPRESSION_OPTS()
-+#undef x
-+};
-+
-+#undef BCH_FEATURE_none
-+
-+static int __bch2_check_set_has_compressed_data(struct bch_fs *c, u64 f)
-+{
-+	int ret = 0;
-+
-+	if ((c->sb.features & f) == f)
-+		return 0;
-+
-+	mutex_lock(&c->sb_lock);
-+
-+	if ((c->sb.features & f) == f) {
-+		mutex_unlock(&c->sb_lock);
-+		return 0;
-+	}
-+
-+	ret = __bch2_fs_compress_init(c, c->sb.features|f);
-+	if (ret) {
-+		mutex_unlock(&c->sb_lock);
-+		return ret;
-+	}
-+
-+	c->disk_sb.sb->features[0] |= cpu_to_le64(f);
-+	bch2_write_super(c);
-+	mutex_unlock(&c->sb_lock);
-+
-+	return 0;
-+}
-+
-+int bch2_check_set_has_compressed_data(struct bch_fs *c,
-+				       unsigned compression_type)
-+{
-+	BUG_ON(compression_type >= ARRAY_SIZE(bch2_compression_opt_to_feature));
-+
-+	return compression_type
-+		? __bch2_check_set_has_compressed_data(c,
-+				1ULL << bch2_compression_opt_to_feature[compression_type])
-+		: 0;
-+}
-+
-+void bch2_fs_compress_exit(struct bch_fs *c)
-+{
-+	unsigned i;
-+
-+	mempool_exit(&c->decompress_workspace);
-+	for (i = 0; i < ARRAY_SIZE(c->compress_workspace); i++)
-+		mempool_exit(&c->compress_workspace[i]);
-+	mempool_exit(&c->compression_bounce[WRITE]);
-+	mempool_exit(&c->compression_bounce[READ]);
-+}
-+
-+static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
-+{
-+	size_t max_extent = c->sb.encoded_extent_max << 9;
-+	size_t decompress_workspace_size = 0;
-+	bool decompress_workspace_needed;
-+	ZSTD_parameters params = ZSTD_getParams(0, max_extent, 0);
-+	struct {
-+		unsigned	feature;
-+		unsigned	type;
-+		size_t		compress_workspace;
-+		size_t		decompress_workspace;
-+	} compression_types[] = {
-+		{ BCH_FEATURE_lz4, BCH_COMPRESSION_TYPE_lz4, LZ4_MEM_COMPRESS, 0 },
-+		{ BCH_FEATURE_gzip, BCH_COMPRESSION_TYPE_gzip,
-+			zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL),
-+			zlib_inflate_workspacesize(), },
-+		{ BCH_FEATURE_zstd, BCH_COMPRESSION_TYPE_zstd,
-+			ZSTD_CCtxWorkspaceBound(params.cParams),
-+			ZSTD_DCtxWorkspaceBound() },
-+	}, *i;
-+	int ret = 0;
-+
-+	pr_verbose_init(c->opts, "");
-+
-+	c->zstd_params = params;
-+
-+	for (i = compression_types;
-+	     i < compression_types + ARRAY_SIZE(compression_types);
-+	     i++)
-+		if (features & (1 << i->feature))
-+			goto have_compressed;
-+
-+	goto out;
-+have_compressed:
-+
-+	if (!mempool_initialized(&c->compression_bounce[READ])) {
-+		ret = mempool_init_kvpmalloc_pool(&c->compression_bounce[READ],
-+						  1, max_extent);
-+		if (ret)
-+			goto out;
-+	}
-+
-+	if (!mempool_initialized(&c->compression_bounce[WRITE])) {
-+		ret = mempool_init_kvpmalloc_pool(&c->compression_bounce[WRITE],
-+						  1, max_extent);
-+		if (ret)
-+			goto out;
-+	}
-+
-+	for (i = compression_types;
-+	     i < compression_types + ARRAY_SIZE(compression_types);
-+	     i++) {
-+		decompress_workspace_size =
-+			max(decompress_workspace_size, i->decompress_workspace);
-+
-+		if (!(features & (1 << i->feature)))
-+			continue;
-+
-+		if (i->decompress_workspace)
-+			decompress_workspace_needed = true;
-+
-+		if (mempool_initialized(&c->compress_workspace[i->type]))
-+			continue;
-+
-+		ret = mempool_init_kvpmalloc_pool(
-+				&c->compress_workspace[i->type],
-+				1, i->compress_workspace);
-+		if (ret)
-+			goto out;
-+	}
-+
-+	if (!mempool_initialized(&c->decompress_workspace)) {
-+		ret = mempool_init_kvpmalloc_pool(
-+				&c->decompress_workspace,
-+				1, decompress_workspace_size);
-+		if (ret)
-+			goto out;
-+	}
-+out:
-+	pr_verbose_init(c->opts, "ret %i", ret);
-+	return ret;
-+}
-+
-+int bch2_fs_compress_init(struct bch_fs *c)
-+{
-+	u64 f = c->sb.features;
-+
-+	if (c->opts.compression)
-+		f |= 1ULL << bch2_compression_opt_to_feature[c->opts.compression];
-+
-+	if (c->opts.background_compression)
-+		f |= 1ULL << bch2_compression_opt_to_feature[c->opts.background_compression];
-+
-+	return __bch2_fs_compress_init(c, f);
-+
-+}
-diff --git a/fs/bcachefs/compress.h b/fs/bcachefs/compress.h
-new file mode 100644
-index 000000000000..4bab1f61b3b5
---- /dev/null
-+++ b/fs/bcachefs/compress.h
-@@ -0,0 +1,18 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_COMPRESS_H
-+#define _BCACHEFS_COMPRESS_H
-+
-+#include "extents_types.h"
-+
-+int bch2_bio_uncompress_inplace(struct bch_fs *, struct bio *,
-+				struct bch_extent_crc_unpacked *);
-+int bch2_bio_uncompress(struct bch_fs *, struct bio *, struct bio *,
-+		       struct bvec_iter, struct bch_extent_crc_unpacked);
-+unsigned bch2_bio_compress(struct bch_fs *, struct bio *, size_t *,
-+			   struct bio *, size_t *, unsigned);
-+
-+int bch2_check_set_has_compressed_data(struct bch_fs *, unsigned);
-+void bch2_fs_compress_exit(struct bch_fs *);
-+int bch2_fs_compress_init(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_COMPRESS_H */
-diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c
-new file mode 100644
-index 000000000000..aa10591a3b1a
---- /dev/null
-+++ b/fs/bcachefs/debug.c
-@@ -0,0 +1,432 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Assorted bcachefs debug code
-+ *
-+ * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
-+ * Copyright 2012 Google, Inc.
-+ */
-+
-+#include "bcachefs.h"
-+#include "bkey_methods.h"
-+#include "btree_cache.h"
-+#include "btree_io.h"
-+#include "btree_iter.h"
-+#include "btree_update.h"
-+#include "buckets.h"
-+#include "debug.h"
-+#include "error.h"
-+#include "extents.h"
-+#include "fsck.h"
-+#include "inode.h"
-+#include "io.h"
-+#include "super.h"
-+
-+#include <linux/console.h>
-+#include <linux/debugfs.h>
-+#include <linux/module.h>
-+#include <linux/random.h>
-+#include <linux/seq_file.h>
-+
-+static struct dentry *bch_debug;
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+
-+void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
-+{
-+	struct btree *v = c->verify_data;
-+	struct btree_node *n_ondisk, *n_sorted, *n_inmemory;
-+	struct bset *sorted, *inmemory;
-+	struct extent_ptr_decoded pick;
-+	struct bch_dev *ca;
-+	struct bio *bio;
-+
-+	if (c->opts.nochanges)
-+		return;
-+
-+	btree_node_io_lock(b);
-+	mutex_lock(&c->verify_lock);
-+
-+	n_ondisk = c->verify_ondisk;
-+	n_sorted = c->verify_data->data;
-+	n_inmemory = b->data;
-+
-+	bkey_copy(&v->key, &b->key);
-+	v->written	= 0;
-+	v->c.level	= b->c.level;
-+	v->c.btree_id	= b->c.btree_id;
-+	bch2_btree_keys_init(v, &c->expensive_debug_checks);
-+
-+	if (bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key),
-+				       NULL, &pick) <= 0)
-+		return;
-+
-+	ca = bch_dev_bkey_exists(c, pick.ptr.dev);
-+	if (!bch2_dev_get_ioref(ca, READ))
-+		return;
-+
-+	bio = bio_alloc_bioset(GFP_NOIO,
-+			buf_pages(n_sorted, btree_bytes(c)),
-+			&c->btree_bio);
-+	bio_set_dev(bio, ca->disk_sb.bdev);
-+	bio->bi_opf		= REQ_OP_READ|REQ_META;
-+	bio->bi_iter.bi_sector	= pick.ptr.offset;
-+	bch2_bio_map(bio, n_sorted, btree_bytes(c));
-+
-+	submit_bio_wait(bio);
-+
-+	bio_put(bio);
-+	percpu_ref_put(&ca->io_ref);
-+
-+	memcpy(n_ondisk, n_sorted, btree_bytes(c));
-+
-+	if (bch2_btree_node_read_done(c, v, false))
-+		goto out;
-+
-+	n_sorted = c->verify_data->data;
-+	sorted = &n_sorted->keys;
-+	inmemory = &n_inmemory->keys;
-+
-+	if (inmemory->u64s != sorted->u64s ||
-+	    memcmp(inmemory->start,
-+		   sorted->start,
-+		   vstruct_end(inmemory) - (void *) inmemory->start)) {
-+		unsigned offset = 0, sectors;
-+		struct bset *i;
-+		unsigned j;
-+
-+		console_lock();
-+
-+		printk(KERN_ERR "*** in memory:\n");
-+		bch2_dump_bset(c, b, inmemory, 0);
-+
-+		printk(KERN_ERR "*** read back in:\n");
-+		bch2_dump_bset(c, v, sorted, 0);
-+
-+		while (offset < b->written) {
-+			if (!offset ) {
-+				i = &n_ondisk->keys;
-+				sectors = vstruct_blocks(n_ondisk, c->block_bits) <<
-+					c->block_bits;
-+			} else {
-+				struct btree_node_entry *bne =
-+					(void *) n_ondisk + (offset << 9);
-+				i = &bne->keys;
-+
-+				sectors = vstruct_blocks(bne, c->block_bits) <<
-+					c->block_bits;
-+			}
-+
-+			printk(KERN_ERR "*** on disk block %u:\n", offset);
-+			bch2_dump_bset(c, b, i, offset);
-+
-+			offset += sectors;
-+		}
-+
-+		printk(KERN_ERR "*** block %u/%u not written\n",
-+		       offset >> c->block_bits, btree_blocks(c));
-+
-+		for (j = 0; j < le16_to_cpu(inmemory->u64s); j++)
-+			if (inmemory->_data[j] != sorted->_data[j])
-+				break;
-+
-+		printk(KERN_ERR "b->written %u\n", b->written);
-+
-+		console_unlock();
-+		panic("verify failed at %u\n", j);
-+	}
-+out:
-+	mutex_unlock(&c->verify_lock);
-+	btree_node_io_unlock(b);
-+}
-+
-+#endif
-+
-+#ifdef CONFIG_DEBUG_FS
-+
-+/* XXX: bch_fs refcounting */
-+
-+struct dump_iter {
-+	struct bpos		from;
-+	struct bch_fs	*c;
-+	enum btree_id		id;
-+
-+	char			buf[PAGE_SIZE];
-+	size_t			bytes;	/* what's currently in buf */
-+
-+	char __user		*ubuf;	/* destination user buffer */
-+	size_t			size;	/* size of requested read */
-+	ssize_t			ret;	/* bytes read so far */
-+};
-+
-+static int flush_buf(struct dump_iter *i)
-+{
-+	if (i->bytes) {
-+		size_t bytes = min(i->bytes, i->size);
-+		int err = copy_to_user(i->ubuf, i->buf, bytes);
-+
-+		if (err)
-+			return err;
-+
-+		i->ret	 += bytes;
-+		i->ubuf	 += bytes;
-+		i->size	 -= bytes;
-+		i->bytes -= bytes;
-+		memmove(i->buf, i->buf + bytes, i->bytes);
-+	}
-+
-+	return 0;
-+}
-+
-+static int bch2_dump_open(struct inode *inode, struct file *file)
-+{
-+	struct btree_debug *bd = inode->i_private;
-+	struct dump_iter *i;
-+
-+	i = kzalloc(sizeof(struct dump_iter), GFP_KERNEL);
-+	if (!i)
-+		return -ENOMEM;
-+
-+	file->private_data = i;
-+	i->from = POS_MIN;
-+	i->c	= container_of(bd, struct bch_fs, btree_debug[bd->id]);
-+	i->id	= bd->id;
-+
-+	return 0;
-+}
-+
-+static int bch2_dump_release(struct inode *inode, struct file *file)
-+{
-+	kfree(file->private_data);
-+	return 0;
-+}
-+
-+static ssize_t bch2_read_btree(struct file *file, char __user *buf,
-+			       size_t size, loff_t *ppos)
-+{
-+	struct dump_iter *i = file->private_data;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int err;
-+
-+	i->ubuf = buf;
-+	i->size	= size;
-+	i->ret	= 0;
-+
-+	err = flush_buf(i);
-+	if (err)
-+		return err;
-+
-+	if (!i->size)
-+		return i->ret;
-+
-+	bch2_trans_init(&trans, i->c, 0, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, i->id, i->from, BTREE_ITER_PREFETCH);
-+	k = bch2_btree_iter_peek(iter);
-+
-+	while (k.k && !(err = bkey_err(k))) {
-+		bch2_bkey_val_to_text(&PBUF(i->buf), i->c, k);
-+		i->bytes = strlen(i->buf);
-+		BUG_ON(i->bytes >= PAGE_SIZE);
-+		i->buf[i->bytes] = '\n';
-+		i->bytes++;
-+
-+		k = bch2_btree_iter_next(iter);
-+		i->from = iter->pos;
-+
-+		err = flush_buf(i);
-+		if (err)
-+			break;
-+
-+		if (!i->size)
-+			break;
-+	}
-+	bch2_trans_exit(&trans);
-+
-+	return err < 0 ? err : i->ret;
-+}
-+
-+static const struct file_operations btree_debug_ops = {
-+	.owner		= THIS_MODULE,
-+	.open		= bch2_dump_open,
-+	.release	= bch2_dump_release,
-+	.read		= bch2_read_btree,
-+};
-+
-+static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf,
-+				       size_t size, loff_t *ppos)
-+{
-+	struct dump_iter *i = file->private_data;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct btree *b;
-+	int err;
-+
-+	i->ubuf = buf;
-+	i->size	= size;
-+	i->ret	= 0;
-+
-+	err = flush_buf(i);
-+	if (err)
-+		return err;
-+
-+	if (!i->size || !bkey_cmp(POS_MAX, i->from))
-+		return i->ret;
-+
-+	bch2_trans_init(&trans, i->c, 0, 0);
-+
-+	for_each_btree_node(&trans, iter, i->id, i->from, 0, b) {
-+		bch2_btree_node_to_text(&PBUF(i->buf), i->c, b);
-+		i->bytes = strlen(i->buf);
-+		err = flush_buf(i);
-+		if (err)
-+			break;
-+
-+		/*
-+		 * can't easily correctly restart a btree node traversal across
-+		 * all nodes, meh
-+		 */
-+		i->from = bkey_cmp(POS_MAX, b->key.k.p)
-+			? bkey_successor(b->key.k.p)
-+			: b->key.k.p;
-+
-+		if (!i->size)
-+			break;
-+	}
-+	bch2_trans_exit(&trans);
-+
-+	return err < 0 ? err : i->ret;
-+}
-+
-+static const struct file_operations btree_format_debug_ops = {
-+	.owner		= THIS_MODULE,
-+	.open		= bch2_dump_open,
-+	.release	= bch2_dump_release,
-+	.read		= bch2_read_btree_formats,
-+};
-+
-+static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
-+				       size_t size, loff_t *ppos)
-+{
-+	struct dump_iter *i = file->private_data;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct btree *prev_node = NULL;
-+	int err;
-+
-+	i->ubuf = buf;
-+	i->size	= size;
-+	i->ret	= 0;
-+
-+	err = flush_buf(i);
-+	if (err)
-+		return err;
-+
-+	if (!i->size)
-+		return i->ret;
-+
-+	bch2_trans_init(&trans, i->c, 0, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, i->id, i->from, BTREE_ITER_PREFETCH);
-+
-+	while ((k = bch2_btree_iter_peek(iter)).k &&
-+	       !(err = bkey_err(k))) {
-+		struct btree_iter_level *l = &iter->l[0];
-+		struct bkey_packed *_k =
-+			bch2_btree_node_iter_peek(&l->iter, l->b);
-+
-+		if (l->b != prev_node) {
-+			bch2_btree_node_to_text(&PBUF(i->buf), i->c, l->b);
-+			i->bytes = strlen(i->buf);
-+			err = flush_buf(i);
-+			if (err)
-+				break;
-+		}
-+		prev_node = l->b;
-+
-+		bch2_bfloat_to_text(&PBUF(i->buf), l->b, _k);
-+		i->bytes = strlen(i->buf);
-+		err = flush_buf(i);
-+		if (err)
-+			break;
-+
-+		bch2_btree_iter_next(iter);
-+		i->from = iter->pos;
-+
-+		err = flush_buf(i);
-+		if (err)
-+			break;
-+
-+		if (!i->size)
-+			break;
-+	}
-+	bch2_trans_exit(&trans);
-+
-+	return err < 0 ? err : i->ret;
-+}
-+
-+static const struct file_operations bfloat_failed_debug_ops = {
-+	.owner		= THIS_MODULE,
-+	.open		= bch2_dump_open,
-+	.release	= bch2_dump_release,
-+	.read		= bch2_read_bfloat_failed,
-+};
-+
-+void bch2_fs_debug_exit(struct bch_fs *c)
-+{
-+	if (!IS_ERR_OR_NULL(c->debug))
-+		debugfs_remove_recursive(c->debug);
-+}
-+
-+void bch2_fs_debug_init(struct bch_fs *c)
-+{
-+	struct btree_debug *bd;
-+	char name[100];
-+
-+	if (IS_ERR_OR_NULL(bch_debug))
-+		return;
-+
-+	snprintf(name, sizeof(name), "%pU", c->sb.user_uuid.b);
-+	c->debug = debugfs_create_dir(name, bch_debug);
-+	if (IS_ERR_OR_NULL(c->debug))
-+		return;
-+
-+	for (bd = c->btree_debug;
-+	     bd < c->btree_debug + ARRAY_SIZE(c->btree_debug);
-+	     bd++) {
-+		bd->id = bd - c->btree_debug;
-+		bd->btree = debugfs_create_file(bch2_btree_ids[bd->id],
-+						0400, c->debug, bd,
-+						&btree_debug_ops);
-+
-+		snprintf(name, sizeof(name), "%s-formats",
-+			 bch2_btree_ids[bd->id]);
-+
-+		bd->btree_format = debugfs_create_file(name, 0400, c->debug, bd,
-+						       &btree_format_debug_ops);
-+
-+		snprintf(name, sizeof(name), "%s-bfloat-failed",
-+			 bch2_btree_ids[bd->id]);
-+
-+		bd->failed = debugfs_create_file(name, 0400, c->debug, bd,
-+						 &bfloat_failed_debug_ops);
-+	}
-+}
-+
-+#endif
-+
-+void bch2_debug_exit(void)
-+{
-+	if (!IS_ERR_OR_NULL(bch_debug))
-+		debugfs_remove_recursive(bch_debug);
-+}
-+
-+int __init bch2_debug_init(void)
-+{
-+	int ret = 0;
-+
-+	bch_debug = debugfs_create_dir("bcachefs", NULL);
-+	return ret;
-+}
-diff --git a/fs/bcachefs/debug.h b/fs/bcachefs/debug.h
-new file mode 100644
-index 000000000000..56c2d1ab5f63
---- /dev/null
-+++ b/fs/bcachefs/debug.h
-@@ -0,0 +1,63 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_DEBUG_H
-+#define _BCACHEFS_DEBUG_H
-+
-+#include "bcachefs.h"
-+
-+struct bio;
-+struct btree;
-+struct bch_fs;
-+
-+#define BCH_DEBUG_PARAM(name, description) extern bool bch2_##name;
-+BCH_DEBUG_PARAMS()
-+#undef BCH_DEBUG_PARAM
-+
-+#define BCH_DEBUG_PARAM(name, description)				\
-+	static inline bool name(struct bch_fs *c)			\
-+	{ return bch2_##name || c->name;	}
-+BCH_DEBUG_PARAMS_ALWAYS()
-+#undef BCH_DEBUG_PARAM
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+
-+#define BCH_DEBUG_PARAM(name, description)				\
-+	static inline bool name(struct bch_fs *c)			\
-+	{ return bch2_##name || c->name;	}
-+BCH_DEBUG_PARAMS_DEBUG()
-+#undef BCH_DEBUG_PARAM
-+
-+void __bch2_btree_verify(struct bch_fs *, struct btree *);
-+
-+#define bypass_torture_test(d)		((d)->bypass_torture_test)
-+
-+#else /* DEBUG */
-+
-+#define BCH_DEBUG_PARAM(name, description)				\
-+	static inline bool name(struct bch_fs *c) { return false; }
-+BCH_DEBUG_PARAMS_DEBUG()
-+#undef BCH_DEBUG_PARAM
-+
-+static inline void __bch2_btree_verify(struct bch_fs *c, struct btree *b) {}
-+
-+#define bypass_torture_test(d)		0
-+
-+#endif
-+
-+static inline void bch2_btree_verify(struct bch_fs *c, struct btree *b)
-+{
-+	if (verify_btree_ondisk(c))
-+		__bch2_btree_verify(c, b);
-+}
-+
-+#ifdef CONFIG_DEBUG_FS
-+void bch2_fs_debug_exit(struct bch_fs *);
-+void bch2_fs_debug_init(struct bch_fs *);
-+#else
-+static inline void bch2_fs_debug_exit(struct bch_fs *c) {}
-+static inline void bch2_fs_debug_init(struct bch_fs *c) {}
-+#endif
-+
-+void bch2_debug_exit(void);
-+int bch2_debug_init(void);
-+
-+#endif /* _BCACHEFS_DEBUG_H */
-diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c
-new file mode 100644
-index 000000000000..f34bfda8ab0d
---- /dev/null
-+++ b/fs/bcachefs/dirent.c
-@@ -0,0 +1,385 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "bkey_methods.h"
-+#include "btree_update.h"
-+#include "extents.h"
-+#include "dirent.h"
-+#include "fs.h"
-+#include "keylist.h"
-+#include "str_hash.h"
-+
-+#include <linux/dcache.h>
-+
-+unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d)
-+{
-+	unsigned len = bkey_val_bytes(d.k) -
-+		offsetof(struct bch_dirent, d_name);
-+
-+	return strnlen(d.v->d_name, len);
-+}
-+
-+static u64 bch2_dirent_hash(const struct bch_hash_info *info,
-+			    const struct qstr *name)
-+{
-+	struct bch_str_hash_ctx ctx;
-+
-+	bch2_str_hash_init(&ctx, info);
-+	bch2_str_hash_update(&ctx, info, name->name, name->len);
-+
-+	/* [0,2) reserved for dots */
-+	return max_t(u64, bch2_str_hash_end(&ctx, info), 2);
-+}
-+
-+static u64 dirent_hash_key(const struct bch_hash_info *info, const void *key)
-+{
-+	return bch2_dirent_hash(info, key);
-+}
-+
-+static u64 dirent_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k)
-+{
-+	struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
-+	struct qstr name = QSTR_INIT(d.v->d_name, bch2_dirent_name_bytes(d));
-+
-+	return bch2_dirent_hash(info, &name);
-+}
-+
-+static bool dirent_cmp_key(struct bkey_s_c _l, const void *_r)
-+{
-+	struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l);
-+	int len = bch2_dirent_name_bytes(l);
-+	const struct qstr *r = _r;
-+
-+	return len - r->len ?: memcmp(l.v->d_name, r->name, len);
-+}
-+
-+static bool dirent_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r)
-+{
-+	struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l);
-+	struct bkey_s_c_dirent r = bkey_s_c_to_dirent(_r);
-+	int l_len = bch2_dirent_name_bytes(l);
-+	int r_len = bch2_dirent_name_bytes(r);
-+
-+	return l_len - r_len ?: memcmp(l.v->d_name, r.v->d_name, l_len);
-+}
-+
-+const struct bch_hash_desc bch2_dirent_hash_desc = {
-+	.btree_id	= BTREE_ID_DIRENTS,
-+	.key_type	= KEY_TYPE_dirent,
-+	.hash_key	= dirent_hash_key,
-+	.hash_bkey	= dirent_hash_bkey,
-+	.cmp_key	= dirent_cmp_key,
-+	.cmp_bkey	= dirent_cmp_bkey,
-+};
-+
-+const char *bch2_dirent_invalid(const struct bch_fs *c, struct bkey_s_c k)
-+{
-+	struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
-+	unsigned len;
-+
-+	if (bkey_val_bytes(k.k) < sizeof(struct bch_dirent))
-+		return "value too small";
-+
-+	len = bch2_dirent_name_bytes(d);
-+	if (!len)
-+		return "empty name";
-+
-+	/*
-+	 * older versions of bcachefs were buggy and creating dirent
-+	 * keys that were bigger than necessary:
-+	 */
-+	if (bkey_val_u64s(k.k) > dirent_val_u64s(len + 7))
-+		return "value too big";
-+
-+	if (len > BCH_NAME_MAX)
-+		return "dirent name too big";
-+
-+	return NULL;
-+}
-+
-+void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c,
-+			 struct bkey_s_c k)
-+{
-+	struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
-+
-+	bch_scnmemcpy(out, d.v->d_name,
-+		      bch2_dirent_name_bytes(d));
-+	pr_buf(out, " -> %llu type %u", d.v->d_inum, d.v->d_type);
-+}
-+
-+static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
-+				u8 type, const struct qstr *name, u64 dst)
-+{
-+	struct bkey_i_dirent *dirent;
-+	unsigned u64s = BKEY_U64s + dirent_val_u64s(name->len);
-+
-+	if (name->len > BCH_NAME_MAX)
-+		return ERR_PTR(-ENAMETOOLONG);
-+
-+	BUG_ON(u64s > U8_MAX);
-+
-+	dirent = bch2_trans_kmalloc(trans, u64s * sizeof(u64));
-+	if (IS_ERR(dirent))
-+		return dirent;
-+
-+	bkey_dirent_init(&dirent->k_i);
-+	dirent->k.u64s = u64s;
-+	dirent->v.d_inum = cpu_to_le64(dst);
-+	dirent->v.d_type = type;
-+
-+	memcpy(dirent->v.d_name, name->name, name->len);
-+	memset(dirent->v.d_name + name->len, 0,
-+	       bkey_val_bytes(&dirent->k) -
-+	       offsetof(struct bch_dirent, d_name) -
-+	       name->len);
-+
-+	EBUG_ON(bch2_dirent_name_bytes(dirent_i_to_s_c(dirent)) != name->len);
-+
-+	return dirent;
-+}
-+
-+int bch2_dirent_create(struct btree_trans *trans,
-+		       u64 dir_inum, const struct bch_hash_info *hash_info,
-+		       u8 type, const struct qstr *name, u64 dst_inum,
-+		       int flags)
-+{
-+	struct bkey_i_dirent *dirent;
-+	int ret;
-+
-+	dirent = dirent_create_key(trans, type, name, dst_inum);
-+	ret = PTR_ERR_OR_ZERO(dirent);
-+	if (ret)
-+		return ret;
-+
-+	return bch2_hash_set(trans, bch2_dirent_hash_desc, hash_info,
-+			     dir_inum, &dirent->k_i, flags);
-+}
-+
-+static void dirent_copy_target(struct bkey_i_dirent *dst,
-+			       struct bkey_s_c_dirent src)
-+{
-+	dst->v.d_inum = src.v->d_inum;
-+	dst->v.d_type = src.v->d_type;
-+}
-+
-+int bch2_dirent_rename(struct btree_trans *trans,
-+		       u64 src_dir, struct bch_hash_info *src_hash,
-+		       u64 dst_dir, struct bch_hash_info *dst_hash,
-+		       const struct qstr *src_name, u64 *src_inum,
-+		       const struct qstr *dst_name, u64 *dst_inum,
-+		       enum bch_rename_mode mode)
-+{
-+	struct btree_iter *src_iter = NULL, *dst_iter = NULL;
-+	struct bkey_s_c old_src, old_dst;
-+	struct bkey_i_dirent *new_src = NULL, *new_dst = NULL;
-+	struct bpos dst_pos =
-+		POS(dst_dir, bch2_dirent_hash(dst_hash, dst_name));
-+	int ret = 0;
-+
-+	*src_inum = *dst_inum = 0;
-+
-+	/*
-+	 * Lookup dst:
-+	 *
-+	 * Note that in BCH_RENAME mode, we're _not_ checking if
-+	 * the target already exists - we're relying on the VFS
-+	 * to do that check for us for correctness:
-+	 */
-+	dst_iter = mode == BCH_RENAME
-+		? bch2_hash_hole(trans, bch2_dirent_hash_desc,
-+				 dst_hash, dst_dir, dst_name)
-+		: bch2_hash_lookup(trans, bch2_dirent_hash_desc,
-+				   dst_hash, dst_dir, dst_name,
-+				   BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(dst_iter);
-+	if (ret)
-+		goto out;
-+
-+	old_dst = bch2_btree_iter_peek_slot(dst_iter);
-+
-+	if (mode != BCH_RENAME)
-+		*dst_inum = le64_to_cpu(bkey_s_c_to_dirent(old_dst).v->d_inum);
-+
-+	/* Lookup src: */
-+	src_iter = bch2_hash_lookup(trans, bch2_dirent_hash_desc,
-+				    src_hash, src_dir, src_name,
-+				    BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(src_iter);
-+	if (ret)
-+		goto out;
-+
-+	old_src = bch2_btree_iter_peek_slot(src_iter);
-+	*src_inum = le64_to_cpu(bkey_s_c_to_dirent(old_src).v->d_inum);
-+
-+	/* Create new dst key: */
-+	new_dst = dirent_create_key(trans, 0, dst_name, 0);
-+	ret = PTR_ERR_OR_ZERO(new_dst);
-+	if (ret)
-+		goto out;
-+
-+	dirent_copy_target(new_dst, bkey_s_c_to_dirent(old_src));
-+	new_dst->k.p = dst_iter->pos;
-+
-+	/* Create new src key: */
-+	if (mode == BCH_RENAME_EXCHANGE) {
-+		new_src = dirent_create_key(trans, 0, src_name, 0);
-+		ret = PTR_ERR_OR_ZERO(new_src);
-+		if (ret)
-+			goto out;
-+
-+		dirent_copy_target(new_src, bkey_s_c_to_dirent(old_dst));
-+		new_src->k.p = src_iter->pos;
-+	} else {
-+		new_src = bch2_trans_kmalloc(trans, sizeof(struct bkey_i));
-+		ret = PTR_ERR_OR_ZERO(new_src);
-+		if (ret)
-+			goto out;
-+
-+		bkey_init(&new_src->k);
-+		new_src->k.p = src_iter->pos;
-+
-+		if (bkey_cmp(dst_pos, src_iter->pos) <= 0 &&
-+		    bkey_cmp(src_iter->pos, dst_iter->pos) < 0) {
-+			/*
-+			 * We have a hash collision for the new dst key,
-+			 * and new_src - the key we're deleting - is between
-+			 * new_dst's hashed slot and the slot we're going to be
-+			 * inserting it into - oops.  This will break the hash
-+			 * table if we don't deal with it:
-+			 */
-+			if (mode == BCH_RENAME) {
-+				/*
-+				 * If we're not overwriting, we can just insert
-+				 * new_dst at the src position:
-+				 */
-+				new_dst->k.p = src_iter->pos;
-+				bch2_trans_update(trans, src_iter,
-+						  &new_dst->k_i, 0);
-+				goto out;
-+			} else {
-+				/* If we're overwriting, we can't insert new_dst
-+				 * at a different slot because it has to
-+				 * overwrite old_dst - just make sure to use a
-+				 * whiteout when deleting src:
-+				 */
-+				new_src->k.type = KEY_TYPE_whiteout;
-+			}
-+		} else {
-+			/* Check if we need a whiteout to delete src: */
-+			ret = bch2_hash_needs_whiteout(trans, bch2_dirent_hash_desc,
-+						       src_hash, src_iter);
-+			if (ret < 0)
-+				goto out;
-+
-+			if (ret)
-+				new_src->k.type = KEY_TYPE_whiteout;
-+		}
-+	}
-+
-+	bch2_trans_update(trans, src_iter, &new_src->k_i, 0);
-+	bch2_trans_update(trans, dst_iter, &new_dst->k_i, 0);
-+out:
-+	bch2_trans_iter_put(trans, src_iter);
-+	bch2_trans_iter_put(trans, dst_iter);
-+	return ret;
-+}
-+
-+int bch2_dirent_delete_at(struct btree_trans *trans,
-+			  const struct bch_hash_info *hash_info,
-+			  struct btree_iter *iter)
-+{
-+	return bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
-+				   hash_info, iter);
-+}
-+
-+struct btree_iter *
-+__bch2_dirent_lookup_trans(struct btree_trans *trans, u64 dir_inum,
-+			   const struct bch_hash_info *hash_info,
-+			   const struct qstr *name, unsigned flags)
-+{
-+	return bch2_hash_lookup(trans, bch2_dirent_hash_desc,
-+				hash_info, dir_inum, name, flags);
-+}
-+
-+u64 bch2_dirent_lookup(struct bch_fs *c, u64 dir_inum,
-+		       const struct bch_hash_info *hash_info,
-+		       const struct qstr *name)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	u64 inum = 0;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = __bch2_dirent_lookup_trans(&trans, dir_inum,
-+					  hash_info, name, 0);
-+	if (IS_ERR(iter)) {
-+		BUG_ON(PTR_ERR(iter) == -EINTR);
-+		goto out;
-+	}
-+
-+	k = bch2_btree_iter_peek_slot(iter);
-+	inum = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum);
-+out:
-+	bch2_trans_exit(&trans);
-+	return inum;
-+}
-+
-+int bch2_empty_dir_trans(struct btree_trans *trans, u64 dir_inum)
-+{
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	for_each_btree_key(trans, iter, BTREE_ID_DIRENTS,
-+			   POS(dir_inum, 0), 0, k, ret) {
-+		if (k.k->p.inode > dir_inum)
-+			break;
-+
-+		if (k.k->type == KEY_TYPE_dirent) {
-+			ret = -ENOTEMPTY;
-+			break;
-+		}
-+	}
-+	bch2_trans_iter_put(trans, iter);
-+
-+	return ret;
-+}
-+
-+int bch2_readdir(struct bch_fs *c, u64 inum, struct dir_context *ctx)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct bkey_s_c_dirent dirent;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS,
-+			   POS(inum, ctx->pos), 0, k, ret) {
-+		if (k.k->p.inode > inum)
-+			break;
-+
-+		if (k.k->type != KEY_TYPE_dirent)
-+			continue;
-+
-+		dirent = bkey_s_c_to_dirent(k);
-+
-+		/*
-+		 * XXX: dir_emit() can fault and block, while we're holding
-+		 * locks
-+		 */
-+		ctx->pos = dirent.k->p.offset;
-+		if (!dir_emit(ctx, dirent.v->d_name,
-+			      bch2_dirent_name_bytes(dirent),
-+			      le64_to_cpu(dirent.v->d_inum),
-+			      dirent.v->d_type))
-+			break;
-+		ctx->pos = dirent.k->p.offset + 1;
-+	}
-+	ret = bch2_trans_exit(&trans) ?: ret;
-+
-+	return ret;
-+}
-diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h
-new file mode 100644
-index 000000000000..34769371dd13
---- /dev/null
-+++ b/fs/bcachefs/dirent.h
-@@ -0,0 +1,63 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_DIRENT_H
-+#define _BCACHEFS_DIRENT_H
-+
-+#include "str_hash.h"
-+
-+extern const struct bch_hash_desc bch2_dirent_hash_desc;
-+
-+const char *bch2_dirent_invalid(const struct bch_fs *, struct bkey_s_c);
-+void bch2_dirent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-+
-+#define bch2_bkey_ops_dirent (struct bkey_ops) {	\
-+	.key_invalid	= bch2_dirent_invalid,		\
-+	.val_to_text	= bch2_dirent_to_text,		\
-+}
-+
-+struct qstr;
-+struct file;
-+struct dir_context;
-+struct bch_fs;
-+struct bch_hash_info;
-+struct bch_inode_info;
-+
-+unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent);
-+
-+static inline unsigned dirent_val_u64s(unsigned len)
-+{
-+	return DIV_ROUND_UP(offsetof(struct bch_dirent, d_name) + len,
-+			    sizeof(u64));
-+}
-+
-+int bch2_dirent_create(struct btree_trans *, u64,
-+		       const struct bch_hash_info *, u8,
-+		       const struct qstr *, u64, int);
-+
-+int bch2_dirent_delete_at(struct btree_trans *,
-+			  const struct bch_hash_info *,
-+			  struct btree_iter *);
-+
-+enum bch_rename_mode {
-+	BCH_RENAME,
-+	BCH_RENAME_OVERWRITE,
-+	BCH_RENAME_EXCHANGE,
-+};
-+
-+int bch2_dirent_rename(struct btree_trans *,
-+		       u64, struct bch_hash_info *,
-+		       u64, struct bch_hash_info *,
-+		       const struct qstr *, u64 *,
-+		       const struct qstr *, u64 *,
-+		       enum bch_rename_mode);
-+
-+struct btree_iter *
-+__bch2_dirent_lookup_trans(struct btree_trans *, u64,
-+			   const struct bch_hash_info *,
-+			   const struct qstr *, unsigned);
-+u64 bch2_dirent_lookup(struct bch_fs *, u64, const struct bch_hash_info *,
-+		       const struct qstr *);
-+
-+int bch2_empty_dir_trans(struct btree_trans *, u64);
-+int bch2_readdir(struct bch_fs *, u64, struct dir_context *);
-+
-+#endif /* _BCACHEFS_DIRENT_H */
-diff --git a/fs/bcachefs/disk_groups.c b/fs/bcachefs/disk_groups.c
-new file mode 100644
-index 000000000000..4a4ec8f46108
---- /dev/null
-+++ b/fs/bcachefs/disk_groups.c
-@@ -0,0 +1,481 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "disk_groups.h"
-+#include "super-io.h"
-+
-+#include <linux/sort.h>
-+
-+static int group_cmp(const void *_l, const void *_r)
-+{
-+	const struct bch_disk_group *l = _l;
-+	const struct bch_disk_group *r = _r;
-+
-+	return ((BCH_GROUP_DELETED(l) > BCH_GROUP_DELETED(r)) -
-+		(BCH_GROUP_DELETED(l) < BCH_GROUP_DELETED(r))) ?:
-+		((BCH_GROUP_PARENT(l) > BCH_GROUP_PARENT(r)) -
-+		 (BCH_GROUP_PARENT(l) < BCH_GROUP_PARENT(r))) ?:
-+		strncmp(l->label, r->label, sizeof(l->label));
-+}
-+
-+static const char *bch2_sb_disk_groups_validate(struct bch_sb *sb,
-+						struct bch_sb_field *f)
-+{
-+	struct bch_sb_field_disk_groups *groups =
-+		field_to_type(f, disk_groups);
-+	struct bch_disk_group *g, *sorted = NULL;
-+	struct bch_sb_field_members *mi;
-+	struct bch_member *m;
-+	unsigned i, nr_groups, len;
-+	const char *err = NULL;
-+
-+	mi		= bch2_sb_get_members(sb);
-+	groups		= bch2_sb_get_disk_groups(sb);
-+	nr_groups	= disk_groups_nr(groups);
-+
-+	for (m = mi->members;
-+	     m < mi->members + sb->nr_devices;
-+	     m++) {
-+		unsigned g;
-+
-+		if (!BCH_MEMBER_GROUP(m))
-+			continue;
-+
-+		g = BCH_MEMBER_GROUP(m) - 1;
-+
-+		if (g >= nr_groups ||
-+		    BCH_GROUP_DELETED(&groups->entries[g]))
-+			return "disk has invalid group";
-+	}
-+
-+	if (!nr_groups)
-+		return NULL;
-+
-+	for (g = groups->entries;
-+	     g < groups->entries + nr_groups;
-+	     g++) {
-+		if (BCH_GROUP_DELETED(g))
-+			continue;
-+
-+		len = strnlen(g->label, sizeof(g->label));
-+		if (!len) {
-+			err = "group with empty label";
-+			goto err;
-+		}
-+	}
-+
-+	sorted = kmalloc_array(nr_groups, sizeof(*sorted), GFP_KERNEL);
-+	if (!sorted)
-+		return "cannot allocate memory";
-+
-+	memcpy(sorted, groups->entries, nr_groups * sizeof(*sorted));
-+	sort(sorted, nr_groups, sizeof(*sorted), group_cmp, NULL);
-+
-+	for (i = 0; i + 1 < nr_groups; i++)
-+		if (!BCH_GROUP_DELETED(sorted + i) &&
-+		    !group_cmp(sorted + i, sorted + i + 1)) {
-+			err = "duplicate groups";
-+			goto err;
-+		}
-+
-+	err = NULL;
-+err:
-+	kfree(sorted);
-+	return err;
-+}
-+
-+static void bch2_sb_disk_groups_to_text(struct printbuf *out,
-+					struct bch_sb *sb,
-+					struct bch_sb_field *f)
-+{
-+	struct bch_sb_field_disk_groups *groups =
-+		field_to_type(f, disk_groups);
-+	struct bch_disk_group *g;
-+	unsigned nr_groups = disk_groups_nr(groups);
-+
-+	for (g = groups->entries;
-+	     g < groups->entries + nr_groups;
-+	     g++) {
-+		if (g != groups->entries)
-+			pr_buf(out, " ");
-+
-+		if (BCH_GROUP_DELETED(g))
-+			pr_buf(out, "[deleted]");
-+		else
-+			pr_buf(out, "[parent %llu name %s]",
-+			       BCH_GROUP_PARENT(g), g->label);
-+	}
-+}
-+
-+const struct bch_sb_field_ops bch_sb_field_ops_disk_groups = {
-+	.validate	= bch2_sb_disk_groups_validate,
-+	.to_text	= bch2_sb_disk_groups_to_text
-+};
-+
-+int bch2_sb_disk_groups_to_cpu(struct bch_fs *c)
-+{
-+	struct bch_sb_field_members *mi;
-+	struct bch_sb_field_disk_groups *groups;
-+	struct bch_disk_groups_cpu *cpu_g, *old_g;
-+	unsigned i, g, nr_groups;
-+
-+	lockdep_assert_held(&c->sb_lock);
-+
-+	mi		= bch2_sb_get_members(c->disk_sb.sb);
-+	groups		= bch2_sb_get_disk_groups(c->disk_sb.sb);
-+	nr_groups	= disk_groups_nr(groups);
-+
-+	if (!groups)
-+		return 0;
-+
-+	cpu_g = kzalloc(sizeof(*cpu_g) +
-+			sizeof(cpu_g->entries[0]) * nr_groups, GFP_KERNEL);
-+	if (!cpu_g)
-+		return -ENOMEM;
-+
-+	cpu_g->nr = nr_groups;
-+
-+	for (i = 0; i < nr_groups; i++) {
-+		struct bch_disk_group *src	= &groups->entries[i];
-+		struct bch_disk_group_cpu *dst	= &cpu_g->entries[i];
-+
-+		dst->deleted	= BCH_GROUP_DELETED(src);
-+		dst->parent	= BCH_GROUP_PARENT(src);
-+	}
-+
-+	for (i = 0; i < c->disk_sb.sb->nr_devices; i++) {
-+		struct bch_member *m = mi->members + i;
-+		struct bch_disk_group_cpu *dst =
-+			&cpu_g->entries[BCH_MEMBER_GROUP(m)];
-+
-+		if (!bch2_member_exists(m))
-+			continue;
-+
-+		g = BCH_MEMBER_GROUP(m);
-+		while (g) {
-+			dst = &cpu_g->entries[g - 1];
-+			__set_bit(i, dst->devs.d);
-+			g = dst->parent;
-+		}
-+	}
-+
-+	old_g = rcu_dereference_protected(c->disk_groups,
-+				lockdep_is_held(&c->sb_lock));
-+	rcu_assign_pointer(c->disk_groups, cpu_g);
-+	if (old_g)
-+		kfree_rcu(old_g, rcu);
-+
-+	return 0;
-+}
-+
-+const struct bch_devs_mask *bch2_target_to_mask(struct bch_fs *c, unsigned target)
-+{
-+	struct target t = target_decode(target);
-+
-+	switch (t.type) {
-+	case TARGET_NULL:
-+		return NULL;
-+	case TARGET_DEV: {
-+		struct bch_dev *ca = t.dev < c->sb.nr_devices
-+			? rcu_dereference(c->devs[t.dev])
-+			: NULL;
-+		return ca ? &ca->self : NULL;
-+	}
-+	case TARGET_GROUP: {
-+		struct bch_disk_groups_cpu *g = rcu_dereference(c->disk_groups);
-+
-+		return t.group < g->nr && !g->entries[t.group].deleted
-+			? &g->entries[t.group].devs
-+			: NULL;
-+	}
-+	default:
-+		BUG();
-+	}
-+}
-+
-+bool bch2_dev_in_target(struct bch_fs *c, unsigned dev, unsigned target)
-+{
-+	struct target t = target_decode(target);
-+
-+	switch (t.type) {
-+	case TARGET_NULL:
-+		return false;
-+	case TARGET_DEV:
-+		return dev == t.dev;
-+	case TARGET_GROUP: {
-+		struct bch_disk_groups_cpu *g;
-+		const struct bch_devs_mask *m;
-+		bool ret;
-+
-+		rcu_read_lock();
-+		g = rcu_dereference(c->disk_groups);
-+		m = t.group < g->nr && !g->entries[t.group].deleted
-+			? &g->entries[t.group].devs
-+			: NULL;
-+
-+		ret = m ? test_bit(dev, m->d) : false;
-+		rcu_read_unlock();
-+
-+		return ret;
-+	}
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static int __bch2_disk_group_find(struct bch_sb_field_disk_groups *groups,
-+				  unsigned parent,
-+				  const char *name, unsigned namelen)
-+{
-+	unsigned i, nr_groups = disk_groups_nr(groups);
-+
-+	if (!namelen || namelen > BCH_SB_LABEL_SIZE)
-+		return -EINVAL;
-+
-+	for (i = 0; i < nr_groups; i++) {
-+		struct bch_disk_group *g = groups->entries + i;
-+
-+		if (BCH_GROUP_DELETED(g))
-+			continue;
-+
-+		if (!BCH_GROUP_DELETED(g) &&
-+		    BCH_GROUP_PARENT(g) == parent &&
-+		    strnlen(g->label, sizeof(g->label)) == namelen &&
-+		    !memcmp(name, g->label, namelen))
-+			return i;
-+	}
-+
-+	return -1;
-+}
-+
-+static int __bch2_disk_group_add(struct bch_sb_handle *sb, unsigned parent,
-+				 const char *name, unsigned namelen)
-+{
-+	struct bch_sb_field_disk_groups *groups =
-+		bch2_sb_get_disk_groups(sb->sb);
-+	unsigned i, nr_groups = disk_groups_nr(groups);
-+	struct bch_disk_group *g;
-+
-+	if (!namelen || namelen > BCH_SB_LABEL_SIZE)
-+		return -EINVAL;
-+
-+	for (i = 0;
-+	     i < nr_groups && !BCH_GROUP_DELETED(&groups->entries[i]);
-+	     i++)
-+		;
-+
-+	if (i == nr_groups) {
-+		unsigned u64s =
-+			(sizeof(struct bch_sb_field_disk_groups) +
-+			 sizeof(struct bch_disk_group) * (nr_groups + 1)) /
-+			sizeof(u64);
-+
-+		groups = bch2_sb_resize_disk_groups(sb, u64s);
-+		if (!groups)
-+			return -ENOSPC;
-+
-+		nr_groups = disk_groups_nr(groups);
-+	}
-+
-+	BUG_ON(i >= nr_groups);
-+
-+	g = &groups->entries[i];
-+
-+	memcpy(g->label, name, namelen);
-+	if (namelen < sizeof(g->label))
-+		g->label[namelen] = '\0';
-+	SET_BCH_GROUP_DELETED(g, 0);
-+	SET_BCH_GROUP_PARENT(g, parent);
-+	SET_BCH_GROUP_DATA_ALLOWED(g, ~0);
-+
-+	return i;
-+}
-+
-+int bch2_disk_path_find(struct bch_sb_handle *sb, const char *name)
-+{
-+	struct bch_sb_field_disk_groups *groups =
-+		bch2_sb_get_disk_groups(sb->sb);
-+	int v = -1;
-+
-+	do {
-+		const char *next = strchrnul(name, '.');
-+		unsigned len = next - name;
-+
-+		if (*next == '.')
-+			next++;
-+
-+		v = __bch2_disk_group_find(groups, v + 1, name, len);
-+		name = next;
-+	} while (*name && v >= 0);
-+
-+	return v;
-+}
-+
-+int bch2_disk_path_find_or_create(struct bch_sb_handle *sb, const char *name)
-+{
-+	struct bch_sb_field_disk_groups *groups;
-+	unsigned parent = 0;
-+	int v = -1;
-+
-+	do {
-+		const char *next = strchrnul(name, '.');
-+		unsigned len = next - name;
-+
-+		if (*next == '.')
-+			next++;
-+
-+		groups = bch2_sb_get_disk_groups(sb->sb);
-+
-+		v = __bch2_disk_group_find(groups, parent, name, len);
-+		if (v < 0)
-+			v = __bch2_disk_group_add(sb, parent, name, len);
-+		if (v < 0)
-+			return v;
-+
-+		parent = v + 1;
-+		name = next;
-+	} while (*name && v >= 0);
-+
-+	return v;
-+}
-+
-+void bch2_disk_path_to_text(struct printbuf *out,
-+			    struct bch_sb_handle *sb,
-+			    unsigned v)
-+{
-+	struct bch_sb_field_disk_groups *groups =
-+		bch2_sb_get_disk_groups(sb->sb);
-+	struct bch_disk_group *g;
-+	unsigned nr = 0;
-+	u16 path[32];
-+
-+	while (1) {
-+		if (nr == ARRAY_SIZE(path))
-+			goto inval;
-+
-+		if (v >= disk_groups_nr(groups))
-+			goto inval;
-+
-+		g = groups->entries + v;
-+
-+		if (BCH_GROUP_DELETED(g))
-+			goto inval;
-+
-+		path[nr++] = v;
-+
-+		if (!BCH_GROUP_PARENT(g))
-+			break;
-+
-+		v = BCH_GROUP_PARENT(g) - 1;
-+	}
-+
-+	while (nr) {
-+		v = path[--nr];
-+		g = groups->entries + v;
-+
-+		bch_scnmemcpy(out, g->label,
-+			      strnlen(g->label, sizeof(g->label)));
-+
-+		if (nr)
-+			pr_buf(out, ".");
-+	}
-+	return;
-+inval:
-+	pr_buf(out, "invalid group %u", v);
-+}
-+
-+int bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name)
-+{
-+	struct bch_member *mi;
-+	int v = -1;
-+
-+	mutex_lock(&c->sb_lock);
-+
-+	if (!strlen(name) || !strcmp(name, "none"))
-+		goto write_sb;
-+
-+	v = bch2_disk_path_find_or_create(&c->disk_sb, name);
-+	if (v < 0) {
-+		mutex_unlock(&c->sb_lock);
-+		return v;
-+	}
-+
-+write_sb:
-+	mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
-+	SET_BCH_MEMBER_GROUP(mi, v + 1);
-+
-+	bch2_write_super(c);
-+	mutex_unlock(&c->sb_lock);
-+
-+	return 0;
-+}
-+
-+int bch2_opt_target_parse(struct bch_fs *c, const char *buf, u64 *v)
-+{
-+	struct bch_dev *ca;
-+	int g;
-+
-+	if (!strlen(buf) || !strcmp(buf, "none")) {
-+		*v = 0;
-+		return 0;
-+	}
-+
-+	/* Is it a device? */
-+	ca = bch2_dev_lookup(c, buf);
-+	if (!IS_ERR(ca)) {
-+		*v = dev_to_target(ca->dev_idx);
-+		percpu_ref_put(&ca->ref);
-+		return 0;
-+	}
-+
-+	mutex_lock(&c->sb_lock);
-+	g = bch2_disk_path_find(&c->disk_sb, buf);
-+	mutex_unlock(&c->sb_lock);
-+
-+	if (g >= 0) {
-+		*v = group_to_target(g);
-+		return 0;
-+	}
-+
-+	return -EINVAL;
-+}
-+
-+void bch2_opt_target_to_text(struct printbuf *out, struct bch_fs *c, u64 v)
-+{
-+	struct target t = target_decode(v);
-+
-+	switch (t.type) {
-+	case TARGET_NULL:
-+		pr_buf(out, "none");
-+		break;
-+	case TARGET_DEV: {
-+		struct bch_dev *ca;
-+
-+		rcu_read_lock();
-+		ca = t.dev < c->sb.nr_devices
-+			? rcu_dereference(c->devs[t.dev])
-+			: NULL;
-+
-+		if (ca && percpu_ref_tryget(&ca->io_ref)) {
-+			char b[BDEVNAME_SIZE];
-+
-+			pr_buf(out, "/dev/%s",
-+			     bdevname(ca->disk_sb.bdev, b));
-+			percpu_ref_put(&ca->io_ref);
-+		} else if (ca) {
-+			pr_buf(out, "offline device %u", t.dev);
-+		} else {
-+			pr_buf(out, "invalid device %u", t.dev);
-+		}
-+
-+		rcu_read_unlock();
-+		break;
-+	}
-+	case TARGET_GROUP:
-+		mutex_lock(&c->sb_lock);
-+		bch2_disk_path_to_text(out, &c->disk_sb, t.group);
-+		mutex_unlock(&c->sb_lock);
-+		break;
-+	default:
-+		BUG();
-+	}
-+}
-diff --git a/fs/bcachefs/disk_groups.h b/fs/bcachefs/disk_groups.h
-new file mode 100644
-index 000000000000..c8e0c37a5e1a
---- /dev/null
-+++ b/fs/bcachefs/disk_groups.h
-@@ -0,0 +1,88 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_DISK_GROUPS_H
-+#define _BCACHEFS_DISK_GROUPS_H
-+
-+extern const struct bch_sb_field_ops bch_sb_field_ops_disk_groups;
-+
-+static inline unsigned disk_groups_nr(struct bch_sb_field_disk_groups *groups)
-+{
-+	return groups
-+		? (vstruct_end(&groups->field) -
-+		   (void *) &groups->entries[0]) / sizeof(struct bch_disk_group)
-+		: 0;
-+}
-+
-+struct target {
-+	enum {
-+		TARGET_NULL,
-+		TARGET_DEV,
-+		TARGET_GROUP,
-+	}			type;
-+	union {
-+		unsigned	dev;
-+		unsigned	group;
-+	};
-+};
-+
-+#define TARGET_DEV_START	1
-+#define TARGET_GROUP_START	(256 + TARGET_DEV_START)
-+
-+static inline u16 dev_to_target(unsigned dev)
-+{
-+	return TARGET_DEV_START + dev;
-+}
-+
-+static inline u16 group_to_target(unsigned group)
-+{
-+	return TARGET_GROUP_START + group;
-+}
-+
-+static inline struct target target_decode(unsigned target)
-+{
-+	if (target >= TARGET_GROUP_START)
-+		return (struct target) {
-+			.type	= TARGET_GROUP,
-+			.group	= target - TARGET_GROUP_START
-+		};
-+
-+	if (target >= TARGET_DEV_START)
-+		return (struct target) {
-+			.type	= TARGET_DEV,
-+			.group	= target - TARGET_DEV_START
-+		};
-+
-+	return (struct target) { .type = TARGET_NULL };
-+}
-+
-+const struct bch_devs_mask *bch2_target_to_mask(struct bch_fs *, unsigned);
-+
-+static inline struct bch_devs_mask target_rw_devs(struct bch_fs *c,
-+						  enum bch_data_type data_type,
-+						  u16 target)
-+{
-+	struct bch_devs_mask devs = c->rw_devs[data_type];
-+	const struct bch_devs_mask *t = bch2_target_to_mask(c, target);
-+
-+	if (t)
-+		bitmap_and(devs.d, devs.d, t->d, BCH_SB_MEMBERS_MAX);
-+	return devs;
-+}
-+
-+bool bch2_dev_in_target(struct bch_fs *, unsigned, unsigned);
-+
-+int bch2_disk_path_find(struct bch_sb_handle *, const char *);
-+int bch2_disk_path_find_or_create(struct bch_sb_handle *, const char *);
-+void bch2_disk_path_to_text(struct printbuf *, struct bch_sb_handle *,
-+			    unsigned);
-+
-+int bch2_opt_target_parse(struct bch_fs *, const char *, u64 *);
-+void bch2_opt_target_to_text(struct printbuf *, struct bch_fs *, u64);
-+
-+int bch2_sb_disk_groups_to_cpu(struct bch_fs *);
-+
-+int bch2_dev_group_set(struct bch_fs *, struct bch_dev *, const char *);
-+
-+const char *bch2_sb_validate_disk_groups(struct bch_sb *,
-+					 struct bch_sb_field *);
-+
-+#endif /* _BCACHEFS_DISK_GROUPS_H */
-diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
-new file mode 100644
-index 000000000000..8c7e9cb74888
---- /dev/null
-+++ b/fs/bcachefs/ec.c
-@@ -0,0 +1,1368 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+/* erasure coding */
-+
-+#include "bcachefs.h"
-+#include "alloc_foreground.h"
-+#include "bkey_on_stack.h"
-+#include "bset.h"
-+#include "btree_gc.h"
-+#include "btree_update.h"
-+#include "buckets.h"
-+#include "disk_groups.h"
-+#include "ec.h"
-+#include "error.h"
-+#include "io.h"
-+#include "keylist.h"
-+#include "recovery.h"
-+#include "super-io.h"
-+#include "util.h"
-+
-+#include <linux/sort.h>
-+
-+#ifdef __KERNEL__
-+
-+#include <linux/raid/pq.h>
-+#include <linux/raid/xor.h>
-+
-+static void raid5_recov(unsigned disks, unsigned failed_idx,
-+			size_t size, void **data)
-+{
-+	unsigned i = 2, nr;
-+
-+	BUG_ON(failed_idx >= disks);
-+
-+	swap(data[0], data[failed_idx]);
-+	memcpy(data[0], data[1], size);
-+
-+	while (i < disks) {
-+		nr = min_t(unsigned, disks - i, MAX_XOR_BLOCKS);
-+		xor_blocks(nr, size, data[0], data + i);
-+		i += nr;
-+	}
-+
-+	swap(data[0], data[failed_idx]);
-+}
-+
-+static void raid_gen(int nd, int np, size_t size, void **v)
-+{
-+	if (np >= 1)
-+		raid5_recov(nd + np, nd, size, v);
-+	if (np >= 2)
-+		raid6_call.gen_syndrome(nd + np, size, v);
-+	BUG_ON(np > 2);
-+}
-+
-+static void raid_rec(int nr, int *ir, int nd, int np, size_t size, void **v)
-+{
-+	switch (nr) {
-+	case 0:
-+		break;
-+	case 1:
-+		if (ir[0] < nd + 1)
-+			raid5_recov(nd + 1, ir[0], size, v);
-+		else
-+			raid6_call.gen_syndrome(nd + np, size, v);
-+		break;
-+	case 2:
-+		if (ir[1] < nd) {
-+			/* data+data failure. */
-+			raid6_2data_recov(nd + np, size, ir[0], ir[1], v);
-+		} else if (ir[0] < nd) {
-+			/* data + p/q failure */
-+
-+			if (ir[1] == nd) /* data + p failure */
-+				raid6_datap_recov(nd + np, size, ir[0], v);
-+			else { /* data + q failure */
-+				raid5_recov(nd + 1, ir[0], size, v);
-+				raid6_call.gen_syndrome(nd + np, size, v);
-+			}
-+		} else {
-+			raid_gen(nd, np, size, v);
-+		}
-+		break;
-+	default:
-+		BUG();
-+	}
-+}
-+
-+#else
-+
-+#include <raid/raid.h>
-+
-+#endif
-+
-+struct ec_bio {
-+	struct bch_dev		*ca;
-+	struct ec_stripe_buf	*buf;
-+	size_t			idx;
-+	struct bio		bio;
-+};
-+
-+/* Stripes btree keys: */
-+
-+const char *bch2_stripe_invalid(const struct bch_fs *c, struct bkey_s_c k)
-+{
-+	const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
-+
-+	if (k.k->p.inode)
-+		return "invalid stripe key";
-+
-+	if (bkey_val_bytes(k.k) < sizeof(*s))
-+		return "incorrect value size";
-+
-+	if (bkey_val_bytes(k.k) < sizeof(*s) ||
-+	    bkey_val_u64s(k.k) < stripe_val_u64s(s))
-+		return "incorrect value size";
-+
-+	return bch2_bkey_ptrs_invalid(c, k);
-+}
-+
-+void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c,
-+			 struct bkey_s_c k)
-+{
-+	const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
-+	unsigned i;
-+
-+	pr_buf(out, "algo %u sectors %u blocks %u:%u csum %u gran %u",
-+	       s->algorithm,
-+	       le16_to_cpu(s->sectors),
-+	       s->nr_blocks - s->nr_redundant,
-+	       s->nr_redundant,
-+	       s->csum_type,
-+	       1U << s->csum_granularity_bits);
-+
-+	for (i = 0; i < s->nr_blocks; i++)
-+		pr_buf(out, " %u:%llu:%u", s->ptrs[i].dev,
-+		       (u64) s->ptrs[i].offset,
-+		       stripe_blockcount_get(s, i));
-+}
-+
-+static int ptr_matches_stripe(struct bch_fs *c,
-+			      struct bch_stripe *v,
-+			      const struct bch_extent_ptr *ptr)
-+{
-+	unsigned i;
-+
-+	for (i = 0; i < v->nr_blocks - v->nr_redundant; i++) {
-+		const struct bch_extent_ptr *ptr2 = v->ptrs + i;
-+
-+		if (ptr->dev == ptr2->dev &&
-+		    ptr->gen == ptr2->gen &&
-+		    ptr->offset >= ptr2->offset &&
-+		    ptr->offset <  ptr2->offset + le16_to_cpu(v->sectors))
-+			return i;
-+	}
-+
-+	return -1;
-+}
-+
-+static int extent_matches_stripe(struct bch_fs *c,
-+				 struct bch_stripe *v,
-+				 struct bkey_s_c k)
-+{
-+
-+	switch (k.k->type) {
-+	case KEY_TYPE_extent: {
-+		struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
-+		const struct bch_extent_ptr *ptr;
-+		int idx;
-+
-+		extent_for_each_ptr(e, ptr) {
-+			idx = ptr_matches_stripe(c, v, ptr);
-+			if (idx >= 0)
-+				return idx;
-+		}
-+		break;
-+	}
-+	}
-+
-+	return -1;
-+}
-+
-+static bool extent_has_stripe_ptr(struct bkey_s_c k, u64 idx)
-+{
-+	switch (k.k->type) {
-+	case KEY_TYPE_extent: {
-+		struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
-+		const union bch_extent_entry *entry;
-+
-+		extent_for_each_entry(e, entry)
-+			if (extent_entry_type(entry) ==
-+			    BCH_EXTENT_ENTRY_stripe_ptr &&
-+			    entry->stripe_ptr.idx == idx)
-+				return true;
-+
-+		break;
-+	}
-+	}
-+
-+	return false;
-+}
-+
-+static void ec_stripe_key_init(struct bch_fs *c,
-+			       struct bkey_i_stripe *s,
-+			       struct open_buckets *blocks,
-+			       struct open_buckets *parity,
-+			       unsigned stripe_size)
-+{
-+	struct open_bucket *ob;
-+	unsigned i, u64s;
-+
-+	bkey_stripe_init(&s->k_i);
-+	s->v.sectors			= cpu_to_le16(stripe_size);
-+	s->v.algorithm			= 0;
-+	s->v.nr_blocks			= parity->nr + blocks->nr;
-+	s->v.nr_redundant		= parity->nr;
-+	s->v.csum_granularity_bits	= ilog2(c->sb.encoded_extent_max);
-+	s->v.csum_type			= BCH_CSUM_CRC32C;
-+	s->v.pad			= 0;
-+
-+	open_bucket_for_each(c, blocks, ob, i)
-+		s->v.ptrs[i]			= ob->ptr;
-+
-+	open_bucket_for_each(c, parity, ob, i)
-+		s->v.ptrs[blocks->nr + i]	= ob->ptr;
-+
-+	while ((u64s = stripe_val_u64s(&s->v)) > BKEY_VAL_U64s_MAX) {
-+		BUG_ON(1 << s->v.csum_granularity_bits >=
-+		       le16_to_cpu(s->v.sectors) ||
-+		       s->v.csum_granularity_bits == U8_MAX);
-+		s->v.csum_granularity_bits++;
-+	}
-+
-+	set_bkey_val_u64s(&s->k, u64s);
-+}
-+
-+/* Checksumming: */
-+
-+static void ec_generate_checksums(struct ec_stripe_buf *buf)
-+{
-+	struct bch_stripe *v = &buf->key.v;
-+	unsigned csum_granularity = 1 << v->csum_granularity_bits;
-+	unsigned csums_per_device = stripe_csums_per_device(v);
-+	unsigned csum_bytes = bch_crc_bytes[v->csum_type];
-+	unsigned i, j;
-+
-+	if (!csum_bytes)
-+		return;
-+
-+	BUG_ON(buf->offset);
-+	BUG_ON(buf->size != le16_to_cpu(v->sectors));
-+
-+	for (i = 0; i < v->nr_blocks; i++) {
-+		for (j = 0; j < csums_per_device; j++) {
-+			unsigned offset = j << v->csum_granularity_bits;
-+			unsigned len = min(csum_granularity, buf->size - offset);
-+
-+			struct bch_csum csum =
-+				bch2_checksum(NULL, v->csum_type,
-+					      null_nonce(),
-+					      buf->data[i] + (offset << 9),
-+					      len << 9);
-+
-+			memcpy(stripe_csum(v, i, j), &csum, csum_bytes);
-+		}
-+	}
-+}
-+
-+static void ec_validate_checksums(struct bch_fs *c, struct ec_stripe_buf *buf)
-+{
-+	struct bch_stripe *v = &buf->key.v;
-+	unsigned csum_granularity = 1 << v->csum_granularity_bits;
-+	unsigned csum_bytes = bch_crc_bytes[v->csum_type];
-+	unsigned i;
-+
-+	if (!csum_bytes)
-+		return;
-+
-+	for (i = 0; i < v->nr_blocks; i++) {
-+		unsigned offset = buf->offset;
-+		unsigned end = buf->offset + buf->size;
-+
-+		if (!test_bit(i, buf->valid))
-+			continue;
-+
-+		while (offset < end) {
-+			unsigned j = offset >> v->csum_granularity_bits;
-+			unsigned len = min(csum_granularity, end - offset);
-+			struct bch_csum csum;
-+
-+			BUG_ON(offset & (csum_granularity - 1));
-+			BUG_ON(offset + len != le16_to_cpu(v->sectors) &&
-+			       ((offset + len) & (csum_granularity - 1)));
-+
-+			csum = bch2_checksum(NULL, v->csum_type,
-+					     null_nonce(),
-+					     buf->data[i] + ((offset - buf->offset) << 9),
-+					     len << 9);
-+
-+			if (memcmp(stripe_csum(v, i, j), &csum, csum_bytes)) {
-+				__bcache_io_error(c,
-+					"checksum error while doing reconstruct read (%u:%u)",
-+					i, j);
-+				clear_bit(i, buf->valid);
-+				break;
-+			}
-+
-+			offset += len;
-+		}
-+	}
-+}
-+
-+/* Erasure coding: */
-+
-+static void ec_generate_ec(struct ec_stripe_buf *buf)
-+{
-+	struct bch_stripe *v = &buf->key.v;
-+	unsigned nr_data = v->nr_blocks - v->nr_redundant;
-+	unsigned bytes = le16_to_cpu(v->sectors) << 9;
-+
-+	raid_gen(nr_data, v->nr_redundant, bytes, buf->data);
-+}
-+
-+static unsigned __ec_nr_failed(struct ec_stripe_buf *buf, unsigned nr)
-+{
-+	return nr - bitmap_weight(buf->valid, nr);
-+}
-+
-+static unsigned ec_nr_failed(struct ec_stripe_buf *buf)
-+{
-+	return __ec_nr_failed(buf, buf->key.v.nr_blocks);
-+}
-+
-+static int ec_do_recov(struct bch_fs *c, struct ec_stripe_buf *buf)
-+{
-+	struct bch_stripe *v = &buf->key.v;
-+	unsigned i, failed[EC_STRIPE_MAX], nr_failed = 0;
-+	unsigned nr_data = v->nr_blocks - v->nr_redundant;
-+	unsigned bytes = buf->size << 9;
-+
-+	if (ec_nr_failed(buf) > v->nr_redundant) {
-+		__bcache_io_error(c,
-+			"error doing reconstruct read: unable to read enough blocks");
-+		return -1;
-+	}
-+
-+	for (i = 0; i < nr_data; i++)
-+		if (!test_bit(i, buf->valid))
-+			failed[nr_failed++] = i;
-+
-+	raid_rec(nr_failed, failed, nr_data, v->nr_redundant, bytes, buf->data);
-+	return 0;
-+}
-+
-+/* IO: */
-+
-+static void ec_block_endio(struct bio *bio)
-+{
-+	struct ec_bio *ec_bio = container_of(bio, struct ec_bio, bio);
-+	struct bch_dev *ca = ec_bio->ca;
-+	struct closure *cl = bio->bi_private;
-+
-+	if (bch2_dev_io_err_on(bio->bi_status, ca, "erasure coding %s: %s",
-+			       bio_data_dir(bio) ? "write" : "read",
-+			       blk_status_to_str(bio->bi_status)))
-+		clear_bit(ec_bio->idx, ec_bio->buf->valid);
-+
-+	bio_put(&ec_bio->bio);
-+	percpu_ref_put(&ca->io_ref);
-+	closure_put(cl);
-+}
-+
-+static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf,
-+			unsigned rw, unsigned idx, struct closure *cl)
-+{
-+	struct bch_stripe *v = &buf->key.v;
-+	unsigned offset = 0, bytes = buf->size << 9;
-+	struct bch_extent_ptr *ptr = &v->ptrs[idx];
-+	struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-+
-+	if (!bch2_dev_get_ioref(ca, rw)) {
-+		clear_bit(idx, buf->valid);
-+		return;
-+	}
-+
-+	while (offset < bytes) {
-+		unsigned nr_iovecs = min_t(size_t, BIO_MAX_PAGES,
-+					   DIV_ROUND_UP(bytes, PAGE_SIZE));
-+		unsigned b = min_t(size_t, bytes - offset,
-+				   nr_iovecs << PAGE_SHIFT);
-+		struct ec_bio *ec_bio;
-+
-+		ec_bio = container_of(bio_alloc_bioset(GFP_KERNEL, nr_iovecs,
-+						       &c->ec_bioset),
-+				      struct ec_bio, bio);
-+
-+		ec_bio->ca			= ca;
-+		ec_bio->buf			= buf;
-+		ec_bio->idx			= idx;
-+
-+		bio_set_dev(&ec_bio->bio, ca->disk_sb.bdev);
-+		bio_set_op_attrs(&ec_bio->bio, rw, 0);
-+
-+		ec_bio->bio.bi_iter.bi_sector	= ptr->offset + buf->offset + (offset >> 9);
-+		ec_bio->bio.bi_end_io		= ec_block_endio;
-+		ec_bio->bio.bi_private		= cl;
-+
-+		bch2_bio_map(&ec_bio->bio, buf->data[idx] + offset, b);
-+
-+		closure_get(cl);
-+		percpu_ref_get(&ca->io_ref);
-+
-+		submit_bio(&ec_bio->bio);
-+
-+		offset += b;
-+	}
-+
-+	percpu_ref_put(&ca->io_ref);
-+}
-+
-+/* recovery read path: */
-+int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct ec_stripe_buf *buf;
-+	struct closure cl;
-+	struct bkey_s_c k;
-+	struct bch_stripe *v;
-+	unsigned stripe_idx;
-+	unsigned offset, end;
-+	unsigned i, nr_data, csum_granularity;
-+	int ret = 0, idx;
-+
-+	closure_init_stack(&cl);
-+
-+	BUG_ON(!rbio->pick.has_ec);
-+
-+	stripe_idx = rbio->pick.ec.idx;
-+
-+	buf = kzalloc(sizeof(*buf), GFP_NOIO);
-+	if (!buf)
-+		return -ENOMEM;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_EC,
-+				   POS(0, stripe_idx),
-+				   BTREE_ITER_SLOTS);
-+	k = bch2_btree_iter_peek_slot(iter);
-+	if (bkey_err(k) || k.k->type != KEY_TYPE_stripe) {
-+		__bcache_io_error(c,
-+			"error doing reconstruct read: stripe not found");
-+		kfree(buf);
-+		return bch2_trans_exit(&trans) ?: -EIO;
-+	}
-+
-+	bkey_reassemble(&buf->key.k_i, k);
-+	bch2_trans_exit(&trans);
-+
-+	v = &buf->key.v;
-+
-+	nr_data = v->nr_blocks - v->nr_redundant;
-+
-+	idx = ptr_matches_stripe(c, v, &rbio->pick.ptr);
-+	BUG_ON(idx < 0);
-+
-+	csum_granularity = 1U << v->csum_granularity_bits;
-+
-+	offset	= rbio->bio.bi_iter.bi_sector - v->ptrs[idx].offset;
-+	end	= offset + bio_sectors(&rbio->bio);
-+
-+	BUG_ON(end > le16_to_cpu(v->sectors));
-+
-+	buf->offset	= round_down(offset, csum_granularity);
-+	buf->size	= min_t(unsigned, le16_to_cpu(v->sectors),
-+				round_up(end, csum_granularity)) - buf->offset;
-+
-+	for (i = 0; i < v->nr_blocks; i++) {
-+		buf->data[i] = kmalloc(buf->size << 9, GFP_NOIO);
-+		if (!buf->data[i]) {
-+			ret = -ENOMEM;
-+			goto err;
-+		}
-+	}
-+
-+	memset(buf->valid, 0xFF, sizeof(buf->valid));
-+
-+	for (i = 0; i < v->nr_blocks; i++) {
-+		struct bch_extent_ptr *ptr = v->ptrs + i;
-+		struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-+
-+		if (ptr_stale(ca, ptr)) {
-+			__bcache_io_error(c,
-+					  "error doing reconstruct read: stale pointer");
-+			clear_bit(i, buf->valid);
-+			continue;
-+		}
-+
-+		ec_block_io(c, buf, REQ_OP_READ, i, &cl);
-+	}
-+
-+	closure_sync(&cl);
-+
-+	if (ec_nr_failed(buf) > v->nr_redundant) {
-+		__bcache_io_error(c,
-+			"error doing reconstruct read: unable to read enough blocks");
-+		ret = -EIO;
-+		goto err;
-+	}
-+
-+	ec_validate_checksums(c, buf);
-+
-+	ret = ec_do_recov(c, buf);
-+	if (ret)
-+		goto err;
-+
-+	memcpy_to_bio(&rbio->bio, rbio->bio.bi_iter,
-+		      buf->data[idx] + ((offset - buf->offset) << 9));
-+err:
-+	for (i = 0; i < v->nr_blocks; i++)
-+		kfree(buf->data[i]);
-+	kfree(buf);
-+	return ret;
-+}
-+
-+/* stripe bucket accounting: */
-+
-+static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp)
-+{
-+	ec_stripes_heap n, *h = &c->ec_stripes_heap;
-+
-+	if (idx >= h->size) {
-+		if (!init_heap(&n, max(1024UL, roundup_pow_of_two(idx + 1)), gfp))
-+			return -ENOMEM;
-+
-+		spin_lock(&c->ec_stripes_heap_lock);
-+		if (n.size > h->size) {
-+			memcpy(n.data, h->data, h->used * sizeof(h->data[0]));
-+			n.used = h->used;
-+			swap(*h, n);
-+		}
-+		spin_unlock(&c->ec_stripes_heap_lock);
-+
-+		free_heap(&n);
-+	}
-+
-+	if (!genradix_ptr_alloc(&c->stripes[0], idx, gfp))
-+		return -ENOMEM;
-+
-+	if (c->gc_pos.phase != GC_PHASE_NOT_RUNNING &&
-+	    !genradix_ptr_alloc(&c->stripes[1], idx, gfp))
-+		return -ENOMEM;
-+
-+	return 0;
-+}
-+
-+static int ec_stripe_mem_alloc(struct bch_fs *c,
-+			       struct btree_iter *iter)
-+{
-+	size_t idx = iter->pos.offset;
-+	int ret = 0;
-+
-+	if (!__ec_stripe_mem_alloc(c, idx, GFP_NOWAIT|__GFP_NOWARN))
-+		return ret;
-+
-+	bch2_trans_unlock(iter->trans);
-+	ret = -EINTR;
-+
-+	if (!__ec_stripe_mem_alloc(c, idx, GFP_KERNEL))
-+		return ret;
-+
-+	return -ENOMEM;
-+}
-+
-+static ssize_t stripe_idx_to_delete(struct bch_fs *c)
-+{
-+	ec_stripes_heap *h = &c->ec_stripes_heap;
-+
-+	return h->used && h->data[0].blocks_nonempty == 0
-+		? h->data[0].idx : -1;
-+}
-+
-+static inline int ec_stripes_heap_cmp(ec_stripes_heap *h,
-+				      struct ec_stripe_heap_entry l,
-+				      struct ec_stripe_heap_entry r)
-+{
-+	return ((l.blocks_nonempty > r.blocks_nonempty) -
-+		(l.blocks_nonempty < r.blocks_nonempty));
-+}
-+
-+static inline void ec_stripes_heap_set_backpointer(ec_stripes_heap *h,
-+						   size_t i)
-+{
-+	struct bch_fs *c = container_of(h, struct bch_fs, ec_stripes_heap);
-+
-+	genradix_ptr(&c->stripes[0], h->data[i].idx)->heap_idx = i;
-+}
-+
-+static void heap_verify_backpointer(struct bch_fs *c, size_t idx)
-+{
-+	ec_stripes_heap *h = &c->ec_stripes_heap;
-+	struct stripe *m = genradix_ptr(&c->stripes[0], idx);
-+
-+	BUG_ON(!m->alive);
-+	BUG_ON(m->heap_idx >= h->used);
-+	BUG_ON(h->data[m->heap_idx].idx != idx);
-+}
-+
-+void bch2_stripes_heap_update(struct bch_fs *c,
-+			      struct stripe *m, size_t idx)
-+{
-+	ec_stripes_heap *h = &c->ec_stripes_heap;
-+	size_t i;
-+
-+	if (m->alive) {
-+		heap_verify_backpointer(c, idx);
-+
-+		h->data[m->heap_idx].blocks_nonempty = m->blocks_nonempty;
-+
-+		i = m->heap_idx;
-+		heap_sift_up(h,	  i, ec_stripes_heap_cmp,
-+			     ec_stripes_heap_set_backpointer);
-+		heap_sift_down(h, i, ec_stripes_heap_cmp,
-+			       ec_stripes_heap_set_backpointer);
-+
-+		heap_verify_backpointer(c, idx);
-+	} else {
-+		bch2_stripes_heap_insert(c, m, idx);
-+	}
-+
-+	if (stripe_idx_to_delete(c) >= 0 &&
-+	    !percpu_ref_is_dying(&c->writes))
-+		schedule_work(&c->ec_stripe_delete_work);
-+}
-+
-+void bch2_stripes_heap_del(struct bch_fs *c,
-+			   struct stripe *m, size_t idx)
-+{
-+	heap_verify_backpointer(c, idx);
-+
-+	m->alive = false;
-+	heap_del(&c->ec_stripes_heap, m->heap_idx,
-+		 ec_stripes_heap_cmp,
-+		 ec_stripes_heap_set_backpointer);
-+}
-+
-+void bch2_stripes_heap_insert(struct bch_fs *c,
-+			      struct stripe *m, size_t idx)
-+{
-+	BUG_ON(heap_full(&c->ec_stripes_heap));
-+
-+	heap_add(&c->ec_stripes_heap, ((struct ec_stripe_heap_entry) {
-+			.idx = idx,
-+			.blocks_nonempty = m->blocks_nonempty,
-+		}),
-+		 ec_stripes_heap_cmp,
-+		 ec_stripes_heap_set_backpointer);
-+	m->alive = true;
-+
-+	heap_verify_backpointer(c, idx);
-+}
-+
-+/* stripe deletion */
-+
-+static int ec_stripe_delete(struct bch_fs *c, size_t idx)
-+{
-+	return bch2_btree_delete_range(c, BTREE_ID_EC,
-+				       POS(0, idx),
-+				       POS(0, idx + 1),
-+				       NULL);
-+}
-+
-+static void ec_stripe_delete_work(struct work_struct *work)
-+{
-+	struct bch_fs *c =
-+		container_of(work, struct bch_fs, ec_stripe_delete_work);
-+	ssize_t idx;
-+
-+	down_read(&c->gc_lock);
-+	mutex_lock(&c->ec_stripe_create_lock);
-+
-+	while (1) {
-+		spin_lock(&c->ec_stripes_heap_lock);
-+		idx = stripe_idx_to_delete(c);
-+		spin_unlock(&c->ec_stripes_heap_lock);
-+
-+		if (idx < 0)
-+			break;
-+
-+		if (ec_stripe_delete(c, idx))
-+			break;
-+	}
-+
-+	mutex_unlock(&c->ec_stripe_create_lock);
-+	up_read(&c->gc_lock);
-+}
-+
-+/* stripe creation: */
-+
-+static int ec_stripe_bkey_insert(struct bch_fs *c,
-+				 struct bkey_i_stripe *stripe)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct bpos start_pos = POS(0, c->ec_stripe_hint);
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+retry:
-+	bch2_trans_begin(&trans);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_EC, start_pos,
-+			   BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
-+		if (bkey_cmp(k.k->p, POS(0, U32_MAX)) > 0) {
-+			if (start_pos.offset) {
-+				start_pos = POS_MIN;
-+				bch2_btree_iter_set_pos(iter, start_pos);
-+				continue;
-+			}
-+
-+			ret = -ENOSPC;
-+			break;
-+		}
-+
-+		if (bkey_deleted(k.k))
-+			goto found_slot;
-+	}
-+
-+	goto err;
-+found_slot:
-+	start_pos = iter->pos;
-+
-+	ret = ec_stripe_mem_alloc(c, iter);
-+	if (ret)
-+		goto err;
-+
-+	stripe->k.p = iter->pos;
-+
-+	bch2_trans_update(&trans, iter, &stripe->k_i, 0);
-+
-+	ret = bch2_trans_commit(&trans, NULL, NULL,
-+				BTREE_INSERT_NOFAIL);
-+err:
-+	bch2_trans_iter_put(&trans, iter);
-+
-+	if (ret == -EINTR)
-+		goto retry;
-+
-+	c->ec_stripe_hint = ret ? start_pos.offset : start_pos.offset + 1;
-+	bch2_trans_exit(&trans);
-+
-+	return ret;
-+}
-+
-+static void extent_stripe_ptr_add(struct bkey_s_extent e,
-+				  struct ec_stripe_buf *s,
-+				  struct bch_extent_ptr *ptr,
-+				  unsigned block)
-+{
-+	struct bch_extent_stripe_ptr *dst = (void *) ptr;
-+	union bch_extent_entry *end = extent_entry_last(e);
-+
-+	memmove_u64s_up(dst + 1, dst, (u64 *) end - (u64 *) dst);
-+	e.k->u64s += sizeof(*dst) / sizeof(u64);
-+
-+	*dst = (struct bch_extent_stripe_ptr) {
-+		.type = 1 << BCH_EXTENT_ENTRY_stripe_ptr,
-+		.block		= block,
-+		.idx		= s->key.k.p.offset,
-+	};
-+}
-+
-+static int ec_stripe_update_ptrs(struct bch_fs *c,
-+				 struct ec_stripe_buf *s,
-+				 struct bkey *pos)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct bkey_s_extent e;
-+	struct bkey_on_stack sk;
-+	int ret = 0, dev, idx;
-+
-+	bkey_on_stack_init(&sk);
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
-+				   bkey_start_pos(pos),
-+				   BTREE_ITER_INTENT);
-+
-+	while ((k = bch2_btree_iter_peek(iter)).k &&
-+	       !(ret = bkey_err(k)) &&
-+	       bkey_cmp(bkey_start_pos(k.k), pos->p) < 0) {
-+		struct bch_extent_ptr *ptr, *ec_ptr = NULL;
-+
-+		if (extent_has_stripe_ptr(k, s->key.k.p.offset)) {
-+			bch2_btree_iter_next(iter);
-+			continue;
-+		}
-+
-+		idx = extent_matches_stripe(c, &s->key.v, k);
-+		if (idx < 0) {
-+			bch2_btree_iter_next(iter);
-+			continue;
-+		}
-+
-+		dev = s->key.v.ptrs[idx].dev;
-+
-+		bkey_on_stack_reassemble(&sk, c, k);
-+		e = bkey_i_to_s_extent(sk.k);
-+
-+		extent_for_each_ptr(e, ptr) {
-+			if (ptr->dev == dev)
-+				ec_ptr = ptr;
-+			else
-+				ptr->cached = true;
-+		}
-+
-+		extent_stripe_ptr_add(e, s, ec_ptr, idx);
-+
-+		bch2_btree_iter_set_pos(iter, bkey_start_pos(&sk.k->k));
-+		bch2_trans_update(&trans, iter, sk.k, 0);
-+
-+		ret = bch2_trans_commit(&trans, NULL, NULL,
-+					BTREE_INSERT_NOFAIL|
-+					BTREE_INSERT_USE_RESERVE);
-+		if (ret == -EINTR)
-+			ret = 0;
-+		if (ret)
-+			break;
-+	}
-+
-+	bch2_trans_exit(&trans);
-+	bkey_on_stack_exit(&sk, c);
-+
-+	return ret;
-+}
-+
-+/*
-+ * data buckets of new stripe all written: create the stripe
-+ */
-+static void ec_stripe_create(struct ec_stripe_new *s)
-+{
-+	struct bch_fs *c = s->c;
-+	struct open_bucket *ob;
-+	struct bkey_i *k;
-+	struct bch_stripe *v = &s->stripe.key.v;
-+	unsigned i, nr_data = v->nr_blocks - v->nr_redundant;
-+	struct closure cl;
-+	int ret;
-+
-+	BUG_ON(s->h->s == s);
-+
-+	closure_init_stack(&cl);
-+
-+	if (s->err) {
-+		bch_err(c, "error creating stripe: error writing data buckets");
-+		goto err;
-+	}
-+
-+	if (!percpu_ref_tryget(&c->writes))
-+		goto err;
-+
-+	BUG_ON(bitmap_weight(s->blocks_allocated,
-+			     s->blocks.nr) != s->blocks.nr);
-+
-+	ec_generate_ec(&s->stripe);
-+
-+	ec_generate_checksums(&s->stripe);
-+
-+	/* write p/q: */
-+	for (i = nr_data; i < v->nr_blocks; i++)
-+		ec_block_io(c, &s->stripe, REQ_OP_WRITE, i, &cl);
-+
-+	closure_sync(&cl);
-+
-+	for (i = nr_data; i < v->nr_blocks; i++)
-+		if (!test_bit(i, s->stripe.valid)) {
-+			bch_err(c, "error creating stripe: error writing redundancy buckets");
-+			goto err_put_writes;
-+		}
-+
-+	mutex_lock(&c->ec_stripe_create_lock);
-+
-+	ret = ec_stripe_bkey_insert(c, &s->stripe.key);
-+	if (ret) {
-+		bch_err(c, "error creating stripe: error creating stripe key");
-+		goto err_unlock;
-+	}
-+
-+	for_each_keylist_key(&s->keys, k) {
-+		ret = ec_stripe_update_ptrs(c, &s->stripe, &k->k);
-+		if (ret)
-+			break;
-+	}
-+
-+err_unlock:
-+	mutex_unlock(&c->ec_stripe_create_lock);
-+err_put_writes:
-+	percpu_ref_put(&c->writes);
-+err:
-+	open_bucket_for_each(c, &s->blocks, ob, i) {
-+		ob->ec = NULL;
-+		__bch2_open_bucket_put(c, ob);
-+	}
-+
-+	bch2_open_buckets_put(c, &s->parity);
-+
-+	bch2_keylist_free(&s->keys, s->inline_keys);
-+
-+	mutex_lock(&s->h->lock);
-+	list_del(&s->list);
-+	mutex_unlock(&s->h->lock);
-+
-+	for (i = 0; i < s->stripe.key.v.nr_blocks; i++)
-+		kvpfree(s->stripe.data[i], s->stripe.size << 9);
-+	kfree(s);
-+}
-+
-+static struct ec_stripe_new *ec_stripe_set_pending(struct ec_stripe_head *h)
-+{
-+	struct ec_stripe_new *s = h->s;
-+
-+	list_add(&s->list, &h->stripes);
-+	h->s = NULL;
-+
-+	return s;
-+}
-+
-+static void ec_stripe_new_put(struct ec_stripe_new *s)
-+{
-+	BUG_ON(atomic_read(&s->pin) <= 0);
-+	if (atomic_dec_and_test(&s->pin))
-+		ec_stripe_create(s);
-+}
-+
-+/* have a full bucket - hand it off to be erasure coded: */
-+void bch2_ec_bucket_written(struct bch_fs *c, struct open_bucket *ob)
-+{
-+	struct ec_stripe_new *s = ob->ec;
-+
-+	if (ob->sectors_free)
-+		s->err = -1;
-+
-+	ec_stripe_new_put(s);
-+}
-+
-+void bch2_ec_bucket_cancel(struct bch_fs *c, struct open_bucket *ob)
-+{
-+	struct ec_stripe_new *s = ob->ec;
-+
-+	s->err = -EIO;
-+}
-+
-+void *bch2_writepoint_ec_buf(struct bch_fs *c, struct write_point *wp)
-+{
-+	struct open_bucket *ob = ec_open_bucket(c, &wp->ptrs);
-+	struct bch_dev *ca;
-+	unsigned offset;
-+
-+	if (!ob)
-+		return NULL;
-+
-+	ca	= bch_dev_bkey_exists(c, ob->ptr.dev);
-+	offset	= ca->mi.bucket_size - ob->sectors_free;
-+
-+	return ob->ec->stripe.data[ob->ec_idx] + (offset << 9);
-+}
-+
-+void bch2_ec_add_backpointer(struct bch_fs *c, struct write_point *wp,
-+			     struct bpos pos, unsigned sectors)
-+{
-+	struct open_bucket *ob = ec_open_bucket(c, &wp->ptrs);
-+	struct ec_stripe_new *ec;
-+
-+	if (!ob)
-+		return;
-+
-+	ec = ob->ec;
-+	mutex_lock(&ec->lock);
-+
-+	if (bch2_keylist_realloc(&ec->keys, ec->inline_keys,
-+				 ARRAY_SIZE(ec->inline_keys),
-+				 BKEY_U64s)) {
-+		BUG();
-+	}
-+
-+	bkey_init(&ec->keys.top->k);
-+	ec->keys.top->k.p	= pos;
-+	bch2_key_resize(&ec->keys.top->k, sectors);
-+	bch2_keylist_push(&ec->keys);
-+
-+	mutex_unlock(&ec->lock);
-+}
-+
-+static int unsigned_cmp(const void *_l, const void *_r)
-+{
-+	unsigned l = *((const unsigned *) _l);
-+	unsigned r = *((const unsigned *) _r);
-+
-+	return cmp_int(l, r);
-+}
-+
-+/* pick most common bucket size: */
-+static unsigned pick_blocksize(struct bch_fs *c,
-+			       struct bch_devs_mask *devs)
-+{
-+	struct bch_dev *ca;
-+	unsigned i, nr = 0, sizes[BCH_SB_MEMBERS_MAX];
-+	struct {
-+		unsigned nr, size;
-+	} cur = { 0, 0 }, best = { 0, 0 };
-+
-+	for_each_member_device_rcu(ca, c, i, devs)
-+		sizes[nr++] = ca->mi.bucket_size;
-+
-+	sort(sizes, nr, sizeof(unsigned), unsigned_cmp, NULL);
-+
-+	for (i = 0; i < nr; i++) {
-+		if (sizes[i] != cur.size) {
-+			if (cur.nr > best.nr)
-+				best = cur;
-+
-+			cur.nr = 0;
-+			cur.size = sizes[i];
-+		}
-+
-+		cur.nr++;
-+	}
-+
-+	if (cur.nr > best.nr)
-+		best = cur;
-+
-+	return best.size;
-+}
-+
-+int bch2_ec_stripe_new_alloc(struct bch_fs *c, struct ec_stripe_head *h)
-+{
-+	struct ec_stripe_new *s;
-+	unsigned i;
-+
-+	BUG_ON(h->parity.nr != h->redundancy);
-+	BUG_ON(!h->blocks.nr);
-+	BUG_ON(h->parity.nr + h->blocks.nr > EC_STRIPE_MAX);
-+	lockdep_assert_held(&h->lock);
-+
-+	s = kzalloc(sizeof(*s), GFP_KERNEL);
-+	if (!s)
-+		return -ENOMEM;
-+
-+	mutex_init(&s->lock);
-+	atomic_set(&s->pin, 1);
-+	s->c		= c;
-+	s->h		= h;
-+	s->blocks	= h->blocks;
-+	s->parity	= h->parity;
-+
-+	memset(&h->blocks, 0, sizeof(h->blocks));
-+	memset(&h->parity, 0, sizeof(h->parity));
-+
-+	bch2_keylist_init(&s->keys, s->inline_keys);
-+
-+	s->stripe.offset	= 0;
-+	s->stripe.size		= h->blocksize;
-+	memset(s->stripe.valid, 0xFF, sizeof(s->stripe.valid));
-+
-+	ec_stripe_key_init(c, &s->stripe.key,
-+			   &s->blocks, &s->parity,
-+			   h->blocksize);
-+
-+	for (i = 0; i < s->stripe.key.v.nr_blocks; i++) {
-+		s->stripe.data[i] = kvpmalloc(s->stripe.size << 9, GFP_KERNEL);
-+		if (!s->stripe.data[i])
-+			goto err;
-+	}
-+
-+	h->s = s;
-+
-+	return 0;
-+err:
-+	for (i = 0; i < s->stripe.key.v.nr_blocks; i++)
-+		kvpfree(s->stripe.data[i], s->stripe.size << 9);
-+	kfree(s);
-+	return -ENOMEM;
-+}
-+
-+static struct ec_stripe_head *
-+ec_new_stripe_head_alloc(struct bch_fs *c, unsigned target,
-+			 unsigned algo, unsigned redundancy)
-+{
-+	struct ec_stripe_head *h;
-+	struct bch_dev *ca;
-+	unsigned i;
-+
-+	h = kzalloc(sizeof(*h), GFP_KERNEL);
-+	if (!h)
-+		return NULL;
-+
-+	mutex_init(&h->lock);
-+	mutex_lock(&h->lock);
-+	INIT_LIST_HEAD(&h->stripes);
-+
-+	h->target	= target;
-+	h->algo		= algo;
-+	h->redundancy	= redundancy;
-+
-+	rcu_read_lock();
-+	h->devs = target_rw_devs(c, BCH_DATA_USER, target);
-+
-+	for_each_member_device_rcu(ca, c, i, &h->devs)
-+		if (!ca->mi.durability)
-+			__clear_bit(i, h->devs.d);
-+
-+	h->blocksize = pick_blocksize(c, &h->devs);
-+
-+	for_each_member_device_rcu(ca, c, i, &h->devs)
-+		if (ca->mi.bucket_size == h->blocksize)
-+			h->nr_active_devs++;
-+
-+	rcu_read_unlock();
-+	list_add(&h->list, &c->ec_new_stripe_list);
-+	return h;
-+}
-+
-+void bch2_ec_stripe_head_put(struct ec_stripe_head *h)
-+{
-+	struct ec_stripe_new *s = NULL;
-+
-+	if (h->s &&
-+	    bitmap_weight(h->s->blocks_allocated,
-+			  h->s->blocks.nr) == h->s->blocks.nr)
-+		s = ec_stripe_set_pending(h);
-+
-+	mutex_unlock(&h->lock);
-+
-+	if (s)
-+		ec_stripe_new_put(s);
-+}
-+
-+struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
-+					       unsigned target,
-+					       unsigned algo,
-+					       unsigned redundancy)
-+{
-+	struct ec_stripe_head *h;
-+
-+	if (!redundancy)
-+		return NULL;
-+
-+	mutex_lock(&c->ec_new_stripe_lock);
-+	list_for_each_entry(h, &c->ec_new_stripe_list, list)
-+		if (h->target		== target &&
-+		    h->algo		== algo &&
-+		    h->redundancy	== redundancy) {
-+			mutex_lock(&h->lock);
-+			goto found;
-+		}
-+
-+	h = ec_new_stripe_head_alloc(c, target, algo, redundancy);
-+found:
-+	mutex_unlock(&c->ec_new_stripe_lock);
-+	return h;
-+}
-+
-+void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	struct ec_stripe_head *h;
-+	struct open_bucket *ob;
-+	unsigned i;
-+
-+	mutex_lock(&c->ec_new_stripe_lock);
-+	list_for_each_entry(h, &c->ec_new_stripe_list, list) {
-+		struct ec_stripe_new *s = NULL;
-+
-+		mutex_lock(&h->lock);
-+		bch2_open_buckets_stop_dev(c, ca, &h->blocks);
-+		bch2_open_buckets_stop_dev(c, ca, &h->parity);
-+
-+		if (!h->s)
-+			goto unlock;
-+
-+		open_bucket_for_each(c, &h->s->blocks, ob, i)
-+			if (ob->ptr.dev == ca->dev_idx)
-+				goto found;
-+		open_bucket_for_each(c, &h->s->parity, ob, i)
-+			if (ob->ptr.dev == ca->dev_idx)
-+				goto found;
-+		goto unlock;
-+found:
-+		h->s->err = -1;
-+		s = ec_stripe_set_pending(h);
-+unlock:
-+		mutex_unlock(&h->lock);
-+
-+		if (s)
-+			ec_stripe_new_put(s);
-+	}
-+	mutex_unlock(&c->ec_new_stripe_lock);
-+}
-+
-+static int __bch2_stripe_write_key(struct btree_trans *trans,
-+				   struct btree_iter *iter,
-+				   struct stripe *m,
-+				   size_t idx,
-+				   struct bkey_i_stripe *new_key)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct bkey_s_c k;
-+	unsigned i;
-+	int ret;
-+
-+	bch2_btree_iter_set_pos(iter, POS(0, idx));
-+
-+	k = bch2_btree_iter_peek_slot(iter);
-+	ret = bkey_err(k);
-+	if (ret)
-+		return ret;
-+
-+	if (k.k->type != KEY_TYPE_stripe)
-+		return -EIO;
-+
-+	bkey_reassemble(&new_key->k_i, k);
-+
-+	spin_lock(&c->ec_stripes_heap_lock);
-+
-+	for (i = 0; i < new_key->v.nr_blocks; i++)
-+		stripe_blockcount_set(&new_key->v, i,
-+				      m->block_sectors[i]);
-+	m->dirty = false;
-+
-+	spin_unlock(&c->ec_stripes_heap_lock);
-+
-+	bch2_trans_update(trans, iter, &new_key->k_i, 0);
-+	return 0;
-+}
-+
-+int bch2_stripes_write(struct bch_fs *c, unsigned flags, bool *wrote)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct genradix_iter giter;
-+	struct bkey_i_stripe *new_key;
-+	struct stripe *m;
-+	int ret = 0;
-+
-+	new_key = kmalloc(255 * sizeof(u64), GFP_KERNEL);
-+	BUG_ON(!new_key);
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_EC, POS_MIN,
-+				   BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
-+
-+	genradix_for_each(&c->stripes[0], giter, m) {
-+		if (!m->dirty)
-+			continue;
-+
-+		ret = __bch2_trans_do(&trans, NULL, NULL,
-+				      BTREE_INSERT_NOFAIL|flags,
-+			__bch2_stripe_write_key(&trans, iter, m,
-+					giter.pos, new_key));
-+
-+		if (ret)
-+			break;
-+
-+		*wrote = true;
-+	}
-+
-+	bch2_trans_exit(&trans);
-+
-+	kfree(new_key);
-+
-+	return ret;
-+}
-+
-+static int bch2_stripes_read_fn(struct bch_fs *c, enum btree_id id,
-+			      unsigned level, struct bkey_s_c k)
-+{
-+	int ret = 0;
-+
-+	if (k.k->type == KEY_TYPE_stripe)
-+		ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL) ?:
-+			bch2_mark_key(c, k, 0, 0, NULL, 0,
-+				      BTREE_TRIGGER_ALLOC_READ|
-+				      BTREE_TRIGGER_NOATOMIC);
-+
-+	return ret;
-+}
-+
-+int bch2_stripes_read(struct bch_fs *c, struct journal_keys *journal_keys)
-+{
-+	int ret = bch2_btree_and_journal_walk(c, journal_keys, BTREE_ID_EC,
-+					  NULL, bch2_stripes_read_fn);
-+	if (ret)
-+		bch_err(c, "error reading stripes: %i", ret);
-+
-+	return ret;
-+}
-+
-+int bch2_ec_mem_alloc(struct bch_fs *c, bool gc)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	size_t i, idx = 0;
-+	int ret = 0;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_EC, POS(0, U64_MAX), 0);
-+
-+	k = bch2_btree_iter_prev(iter);
-+	if (!IS_ERR_OR_NULL(k.k))
-+		idx = k.k->p.offset + 1;
-+	ret = bch2_trans_exit(&trans);
-+	if (ret)
-+		return ret;
-+
-+	if (!idx)
-+		return 0;
-+
-+	if (!gc &&
-+	    !init_heap(&c->ec_stripes_heap, roundup_pow_of_two(idx),
-+		       GFP_KERNEL))
-+		return -ENOMEM;
-+#if 0
-+	ret = genradix_prealloc(&c->stripes[gc], idx, GFP_KERNEL);
-+#else
-+	for (i = 0; i < idx; i++)
-+		if (!genradix_ptr_alloc(&c->stripes[gc], i, GFP_KERNEL))
-+			return -ENOMEM;
-+#endif
-+	return 0;
-+}
-+
-+void bch2_fs_ec_exit(struct bch_fs *c)
-+{
-+	struct ec_stripe_head *h;
-+
-+	while (1) {
-+		mutex_lock(&c->ec_new_stripe_lock);
-+		h = list_first_entry_or_null(&c->ec_new_stripe_list,
-+					     struct ec_stripe_head, list);
-+		if (h)
-+			list_del(&h->list);
-+		mutex_unlock(&c->ec_new_stripe_lock);
-+		if (!h)
-+			break;
-+
-+		BUG_ON(h->s);
-+		BUG_ON(!list_empty(&h->stripes));
-+		kfree(h);
-+	}
-+
-+	free_heap(&c->ec_stripes_heap);
-+	genradix_free(&c->stripes[0]);
-+	bioset_exit(&c->ec_bioset);
-+}
-+
-+int bch2_fs_ec_init(struct bch_fs *c)
-+{
-+	INIT_WORK(&c->ec_stripe_delete_work, ec_stripe_delete_work);
-+
-+	return bioset_init(&c->ec_bioset, 1, offsetof(struct ec_bio, bio),
-+			   BIOSET_NEED_BVECS);
-+}
-diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h
-new file mode 100644
-index 000000000000..4dfaac034886
---- /dev/null
-+++ b/fs/bcachefs/ec.h
-@@ -0,0 +1,163 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_EC_H
-+#define _BCACHEFS_EC_H
-+
-+#include "ec_types.h"
-+#include "keylist_types.h"
-+
-+const char *bch2_stripe_invalid(const struct bch_fs *, struct bkey_s_c);
-+void bch2_stripe_to_text(struct printbuf *, struct bch_fs *,
-+			 struct bkey_s_c);
-+
-+#define bch2_bkey_ops_stripe (struct bkey_ops) {	\
-+	.key_invalid	= bch2_stripe_invalid,		\
-+	.val_to_text	= bch2_stripe_to_text,		\
-+	.swab		= bch2_ptr_swab,		\
-+}
-+
-+static inline unsigned stripe_csums_per_device(const struct bch_stripe *s)
-+{
-+	return DIV_ROUND_UP(le16_to_cpu(s->sectors),
-+			    1 << s->csum_granularity_bits);
-+}
-+
-+static inline unsigned stripe_csum_offset(const struct bch_stripe *s,
-+					  unsigned dev, unsigned csum_idx)
-+{
-+	unsigned csum_bytes = bch_crc_bytes[s->csum_type];
-+
-+	return sizeof(struct bch_stripe) +
-+		sizeof(struct bch_extent_ptr) * s->nr_blocks +
-+		(dev * stripe_csums_per_device(s) + csum_idx) * csum_bytes;
-+}
-+
-+static inline unsigned stripe_blockcount_offset(const struct bch_stripe *s,
-+						unsigned idx)
-+{
-+	return stripe_csum_offset(s, s->nr_blocks, 0) +
-+		sizeof(u16) * idx;
-+}
-+
-+static inline unsigned stripe_blockcount_get(const struct bch_stripe *s,
-+					     unsigned idx)
-+{
-+	return le16_to_cpup((void *) s + stripe_blockcount_offset(s, idx));
-+}
-+
-+static inline void stripe_blockcount_set(struct bch_stripe *s,
-+					 unsigned idx, unsigned v)
-+{
-+	__le16 *p = (void *) s + stripe_blockcount_offset(s, idx);
-+
-+	*p = cpu_to_le16(v);
-+}
-+
-+static inline unsigned stripe_val_u64s(const struct bch_stripe *s)
-+{
-+	return DIV_ROUND_UP(stripe_blockcount_offset(s, s->nr_blocks),
-+			    sizeof(u64));
-+}
-+
-+static inline void *stripe_csum(struct bch_stripe *s,
-+				unsigned dev, unsigned csum_idx)
-+{
-+	return (void *) s + stripe_csum_offset(s, dev, csum_idx);
-+}
-+
-+struct bch_read_bio;
-+
-+struct ec_stripe_buf {
-+	/* might not be buffering the entire stripe: */
-+	unsigned		offset;
-+	unsigned		size;
-+	unsigned long		valid[BITS_TO_LONGS(EC_STRIPE_MAX)];
-+
-+	void			*data[EC_STRIPE_MAX];
-+
-+	union {
-+		struct bkey_i_stripe	key;
-+		u64			pad[255];
-+	};
-+};
-+
-+struct ec_stripe_head;
-+
-+struct ec_stripe_new {
-+	struct bch_fs		*c;
-+	struct ec_stripe_head	*h;
-+	struct mutex		lock;
-+	struct list_head	list;
-+
-+	/* counts in flight writes, stripe is created when pin == 0 */
-+	atomic_t		pin;
-+
-+	int			err;
-+
-+	unsigned long		blocks_allocated[BITS_TO_LONGS(EC_STRIPE_MAX)];
-+
-+	struct open_buckets	blocks;
-+	struct open_buckets	parity;
-+
-+	struct keylist		keys;
-+	u64			inline_keys[BKEY_U64s * 8];
-+
-+	struct ec_stripe_buf	stripe;
-+};
-+
-+struct ec_stripe_head {
-+	struct list_head	list;
-+	struct mutex		lock;
-+
-+	struct list_head	stripes;
-+
-+	unsigned		target;
-+	unsigned		algo;
-+	unsigned		redundancy;
-+
-+	struct bch_devs_mask	devs;
-+	unsigned		nr_active_devs;
-+
-+	unsigned		blocksize;
-+
-+	struct dev_stripe_state	block_stripe;
-+	struct dev_stripe_state	parity_stripe;
-+
-+	struct open_buckets	blocks;
-+	struct open_buckets	parity;
-+
-+	struct ec_stripe_new	*s;
-+};
-+
-+int bch2_ec_read_extent(struct bch_fs *, struct bch_read_bio *);
-+
-+void *bch2_writepoint_ec_buf(struct bch_fs *, struct write_point *);
-+void bch2_ec_add_backpointer(struct bch_fs *, struct write_point *,
-+			     struct bpos, unsigned);
-+
-+void bch2_ec_bucket_written(struct bch_fs *, struct open_bucket *);
-+void bch2_ec_bucket_cancel(struct bch_fs *, struct open_bucket *);
-+
-+int bch2_ec_stripe_new_alloc(struct bch_fs *, struct ec_stripe_head *);
-+
-+void bch2_ec_stripe_head_put(struct ec_stripe_head *);
-+struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *, unsigned,
-+					       unsigned, unsigned);
-+
-+void bch2_stripes_heap_update(struct bch_fs *, struct stripe *, size_t);
-+void bch2_stripes_heap_del(struct bch_fs *, struct stripe *, size_t);
-+void bch2_stripes_heap_insert(struct bch_fs *, struct stripe *, size_t);
-+
-+void bch2_ec_stop_dev(struct bch_fs *, struct bch_dev *);
-+
-+void bch2_ec_flush_new_stripes(struct bch_fs *);
-+
-+struct journal_keys;
-+int bch2_stripes_read(struct bch_fs *, struct journal_keys *);
-+int bch2_stripes_write(struct bch_fs *, unsigned, bool *);
-+
-+int bch2_ec_mem_alloc(struct bch_fs *, bool);
-+
-+void bch2_fs_ec_exit(struct bch_fs *);
-+int bch2_fs_ec_init(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_EC_H */
-diff --git a/fs/bcachefs/ec_types.h b/fs/bcachefs/ec_types.h
-new file mode 100644
-index 000000000000..5c3f77c8aac7
---- /dev/null
-+++ b/fs/bcachefs/ec_types.h
-@@ -0,0 +1,38 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_EC_TYPES_H
-+#define _BCACHEFS_EC_TYPES_H
-+
-+#include <linux/llist.h>
-+
-+#define EC_STRIPE_MAX	16
-+
-+struct bch_replicas_padded {
-+	struct bch_replicas_entry	e;
-+	u8				pad[EC_STRIPE_MAX];
-+};
-+
-+struct stripe {
-+	size_t			heap_idx;
-+
-+	u16			sectors;
-+	u8			algorithm;
-+
-+	u8			nr_blocks;
-+	u8			nr_redundant;
-+
-+	unsigned		alive:1;
-+	unsigned		dirty:1;
-+	u8			blocks_nonempty;
-+	u16			block_sectors[EC_STRIPE_MAX];
-+
-+	struct bch_replicas_padded r;
-+};
-+
-+struct ec_stripe_heap_entry {
-+	size_t			idx;
-+	unsigned		blocks_nonempty;
-+};
-+
-+typedef HEAP(struct ec_stripe_heap_entry) ec_stripes_heap;
-+
-+#endif /* _BCACHEFS_EC_TYPES_H */
-diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c
-new file mode 100644
-index 000000000000..cd46706fb6f5
---- /dev/null
-+++ b/fs/bcachefs/error.c
-@@ -0,0 +1,172 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "error.h"
-+#include "io.h"
-+#include "super.h"
-+
-+#define FSCK_ERR_RATELIMIT_NR	10
-+
-+bool bch2_inconsistent_error(struct bch_fs *c)
-+{
-+	set_bit(BCH_FS_ERROR, &c->flags);
-+
-+	switch (c->opts.errors) {
-+	case BCH_ON_ERROR_CONTINUE:
-+		return false;
-+	case BCH_ON_ERROR_RO:
-+		if (bch2_fs_emergency_read_only(c))
-+			bch_err(c, "emergency read only");
-+		return true;
-+	case BCH_ON_ERROR_PANIC:
-+		panic(bch2_fmt(c, "panic after error"));
-+		return true;
-+	default:
-+		BUG();
-+	}
-+}
-+
-+void bch2_fatal_error(struct bch_fs *c)
-+{
-+	if (bch2_fs_emergency_read_only(c))
-+		bch_err(c, "emergency read only");
-+}
-+
-+void bch2_io_error_work(struct work_struct *work)
-+{
-+	struct bch_dev *ca = container_of(work, struct bch_dev, io_error_work);
-+	struct bch_fs *c = ca->fs;
-+	bool dev;
-+
-+	down_write(&c->state_lock);
-+	dev = bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_RO,
-+				    BCH_FORCE_IF_DEGRADED);
-+	if (dev
-+	    ? __bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_RO,
-+				  BCH_FORCE_IF_DEGRADED)
-+	    : bch2_fs_emergency_read_only(c))
-+		bch_err(ca,
-+			"too many IO errors, setting %s RO",
-+			dev ? "device" : "filesystem");
-+	up_write(&c->state_lock);
-+}
-+
-+void bch2_io_error(struct bch_dev *ca)
-+{
-+	//queue_work(system_long_wq, &ca->io_error_work);
-+}
-+
-+#ifdef __KERNEL__
-+#define ask_yn()	false
-+#else
-+#include "tools-util.h"
-+#endif
-+
-+enum fsck_err_ret bch2_fsck_err(struct bch_fs *c, unsigned flags,
-+				const char *fmt, ...)
-+{
-+	struct fsck_err_state *s = NULL;
-+	va_list args;
-+	bool fix = false, print = true, suppressing = false;
-+	char _buf[sizeof(s->buf)], *buf = _buf;
-+
-+	if (test_bit(BCH_FS_FSCK_DONE, &c->flags)) {
-+		va_start(args, fmt);
-+		vprintk(fmt, args);
-+		va_end(args);
-+
-+		return bch2_inconsistent_error(c)
-+			? FSCK_ERR_EXIT
-+			: FSCK_ERR_FIX;
-+	}
-+
-+	mutex_lock(&c->fsck_error_lock);
-+
-+	list_for_each_entry(s, &c->fsck_errors, list)
-+		if (s->fmt == fmt)
-+			goto found;
-+
-+	s = kzalloc(sizeof(*s), GFP_NOFS);
-+	if (!s) {
-+		if (!c->fsck_alloc_err)
-+			bch_err(c, "kmalloc err, cannot ratelimit fsck errs");
-+		c->fsck_alloc_err = true;
-+		buf = _buf;
-+		goto print;
-+	}
-+
-+	INIT_LIST_HEAD(&s->list);
-+	s->fmt = fmt;
-+found:
-+	list_move(&s->list, &c->fsck_errors);
-+	s->nr++;
-+	if (c->opts.ratelimit_errors &&
-+	    s->nr >= FSCK_ERR_RATELIMIT_NR) {
-+		if (s->nr == FSCK_ERR_RATELIMIT_NR)
-+			suppressing = true;
-+		else
-+			print = false;
-+	}
-+	buf		= s->buf;
-+print:
-+	va_start(args, fmt);
-+	vscnprintf(buf, sizeof(_buf), fmt, args);
-+	va_end(args);
-+
-+	if (c->opts.fix_errors == FSCK_OPT_EXIT) {
-+		bch_err(c, "%s, exiting", buf);
-+	} else if (flags & FSCK_CAN_FIX) {
-+		if (c->opts.fix_errors == FSCK_OPT_ASK) {
-+			printk(KERN_ERR "%s: fix?", buf);
-+			fix = ask_yn();
-+		} else if (c->opts.fix_errors == FSCK_OPT_YES ||
-+			   (c->opts.nochanges &&
-+			    !(flags & FSCK_CAN_IGNORE))) {
-+			if (print)
-+				bch_err(c, "%s, fixing", buf);
-+			fix = true;
-+		} else {
-+			if (print)
-+				bch_err(c, "%s, not fixing", buf);
-+			fix = false;
-+		}
-+	} else if (flags & FSCK_NEED_FSCK) {
-+		if (print)
-+			bch_err(c, "%s (run fsck to correct)", buf);
-+	} else {
-+		if (print)
-+			bch_err(c, "%s (repair unimplemented)", buf);
-+	}
-+
-+	if (suppressing)
-+		bch_err(c, "Ratelimiting new instances of previous error");
-+
-+	mutex_unlock(&c->fsck_error_lock);
-+
-+	if (fix) {
-+		set_bit(BCH_FS_ERRORS_FIXED, &c->flags);
-+		return FSCK_ERR_FIX;
-+	} else {
-+		set_bit(BCH_FS_ERROR, &c->flags);
-+		return c->opts.fix_errors == FSCK_OPT_EXIT ||
-+			!(flags & FSCK_CAN_IGNORE)
-+			? FSCK_ERR_EXIT
-+			: FSCK_ERR_IGNORE;
-+	}
-+}
-+
-+void bch2_flush_fsck_errs(struct bch_fs *c)
-+{
-+	struct fsck_err_state *s, *n;
-+
-+	mutex_lock(&c->fsck_error_lock);
-+
-+	list_for_each_entry_safe(s, n, &c->fsck_errors, list) {
-+		if (s->ratelimited)
-+			bch_err(c, "Saw %llu errors like:\n    %s", s->nr, s->buf);
-+
-+		list_del(&s->list);
-+		kfree(s);
-+	}
-+
-+	mutex_unlock(&c->fsck_error_lock);
-+}
-diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h
-new file mode 100644
-index 000000000000..94b53312fbbd
---- /dev/null
-+++ b/fs/bcachefs/error.h
-@@ -0,0 +1,211 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_ERROR_H
-+#define _BCACHEFS_ERROR_H
-+
-+#include <linux/list.h>
-+#include <linux/printk.h>
-+
-+struct bch_dev;
-+struct bch_fs;
-+struct work_struct;
-+
-+/*
-+ * XXX: separate out errors that indicate on disk data is inconsistent, and flag
-+ * superblock as such
-+ */
-+
-+/* Error messages: */
-+
-+/*
-+ * Inconsistency errors: The on disk data is inconsistent. If these occur during
-+ * initial recovery, they don't indicate a bug in the running code - we walk all
-+ * the metadata before modifying anything. If they occur at runtime, they
-+ * indicate either a bug in the running code or (less likely) data is being
-+ * silently corrupted under us.
-+ *
-+ * XXX: audit all inconsistent errors and make sure they're all recoverable, in
-+ * BCH_ON_ERROR_CONTINUE mode
-+ */
-+
-+bool bch2_inconsistent_error(struct bch_fs *);
-+
-+#define bch2_fs_inconsistent(c, ...)					\
-+({									\
-+	bch_err(c, __VA_ARGS__);					\
-+	bch2_inconsistent_error(c);					\
-+})
-+
-+#define bch2_fs_inconsistent_on(cond, c, ...)				\
-+({									\
-+	int _ret = !!(cond);						\
-+									\
-+	if (_ret)							\
-+		bch2_fs_inconsistent(c, __VA_ARGS__);			\
-+	_ret;								\
-+})
-+
-+/*
-+ * Later we might want to mark only the particular device inconsistent, not the
-+ * entire filesystem:
-+ */
-+
-+#define bch2_dev_inconsistent(ca, ...)					\
-+do {									\
-+	bch_err(ca, __VA_ARGS__);					\
-+	bch2_inconsistent_error((ca)->fs);				\
-+} while (0)
-+
-+#define bch2_dev_inconsistent_on(cond, ca, ...)				\
-+({									\
-+	int _ret = !!(cond);						\
-+									\
-+	if (_ret)							\
-+		bch2_dev_inconsistent(ca, __VA_ARGS__);			\
-+	_ret;								\
-+})
-+
-+/*
-+ * Fsck errors: inconsistency errors we detect at mount time, and should ideally
-+ * be able to repair:
-+ */
-+
-+enum {
-+	BCH_FSCK_OK			= 0,
-+	BCH_FSCK_ERRORS_NOT_FIXED	= 1,
-+	BCH_FSCK_REPAIR_UNIMPLEMENTED	= 2,
-+	BCH_FSCK_REPAIR_IMPOSSIBLE	= 3,
-+	BCH_FSCK_UNKNOWN_VERSION	= 4,
-+};
-+
-+enum fsck_err_opts {
-+	FSCK_OPT_EXIT,
-+	FSCK_OPT_YES,
-+	FSCK_OPT_NO,
-+	FSCK_OPT_ASK,
-+};
-+
-+enum fsck_err_ret {
-+	FSCK_ERR_IGNORE	= 0,
-+	FSCK_ERR_FIX	= 1,
-+	FSCK_ERR_EXIT	= 2,
-+};
-+
-+struct fsck_err_state {
-+	struct list_head	list;
-+	const char		*fmt;
-+	u64			nr;
-+	bool			ratelimited;
-+	char			buf[512];
-+};
-+
-+#define FSCK_CAN_FIX		(1 << 0)
-+#define FSCK_CAN_IGNORE		(1 << 1)
-+#define FSCK_NEED_FSCK		(1 << 2)
-+
-+__printf(3, 4) __cold
-+enum fsck_err_ret bch2_fsck_err(struct bch_fs *,
-+				unsigned, const char *, ...);
-+void bch2_flush_fsck_errs(struct bch_fs *);
-+
-+#define __fsck_err(c, _flags, msg, ...)					\
-+({									\
-+	int _fix = bch2_fsck_err(c, _flags, msg, ##__VA_ARGS__);\
-+									\
-+	if (_fix == FSCK_ERR_EXIT) {					\
-+		bch_err(c, "Unable to continue, halting");		\
-+		ret = BCH_FSCK_ERRORS_NOT_FIXED;			\
-+		goto fsck_err;						\
-+	}								\
-+									\
-+	_fix;								\
-+})
-+
-+/* These macros return true if error should be fixed: */
-+
-+/* XXX: mark in superblock that filesystem contains errors, if we ignore: */
-+
-+#define __fsck_err_on(cond, c, _flags, ...)				\
-+	((cond) ? __fsck_err(c, _flags,	##__VA_ARGS__) : false)
-+
-+#define need_fsck_err_on(cond, c, ...)					\
-+	__fsck_err_on(cond, c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, ##__VA_ARGS__)
-+
-+#define need_fsck_err(c, ...)						\
-+	__fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, ##__VA_ARGS__)
-+
-+#define mustfix_fsck_err(c, ...)					\
-+	__fsck_err(c, FSCK_CAN_FIX, ##__VA_ARGS__)
-+
-+#define mustfix_fsck_err_on(cond, c, ...)				\
-+	__fsck_err_on(cond, c, FSCK_CAN_FIX, ##__VA_ARGS__)
-+
-+#define fsck_err(c, ...)						\
-+	__fsck_err(c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, ##__VA_ARGS__)
-+
-+#define fsck_err_on(cond, c, ...)					\
-+	__fsck_err_on(cond, c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, ##__VA_ARGS__)
-+
-+/*
-+ * Fatal errors: these don't indicate a bug, but we can't continue running in RW
-+ * mode - pretty much just due to metadata IO errors:
-+ */
-+
-+void bch2_fatal_error(struct bch_fs *);
-+
-+#define bch2_fs_fatal_error(c, ...)					\
-+do {									\
-+	bch_err(c, __VA_ARGS__);					\
-+	bch2_fatal_error(c);						\
-+} while (0)
-+
-+#define bch2_fs_fatal_err_on(cond, c, ...)				\
-+({									\
-+	int _ret = !!(cond);						\
-+									\
-+	if (_ret)							\
-+		bch2_fs_fatal_error(c, __VA_ARGS__);			\
-+	_ret;								\
-+})
-+
-+/*
-+ * IO errors: either recoverable metadata IO (because we have replicas), or data
-+ * IO - we need to log it and print out a message, but we don't (necessarily)
-+ * want to shut down the fs:
-+ */
-+
-+void bch2_io_error_work(struct work_struct *);
-+
-+/* Does the error handling without logging a message */
-+void bch2_io_error(struct bch_dev *);
-+
-+/* Logs message and handles the error: */
-+#define bch2_dev_io_error(ca, fmt, ...)					\
-+do {									\
-+	printk_ratelimited(KERN_ERR bch2_fmt((ca)->fs,			\
-+		"IO error on %s for " fmt),				\
-+		(ca)->name, ##__VA_ARGS__);				\
-+	bch2_io_error(ca);						\
-+} while (0)
-+
-+#define bch2_dev_io_err_on(cond, ca, ...)				\
-+({									\
-+	bool _ret = (cond);						\
-+									\
-+	if (_ret)							\
-+		bch2_dev_io_error(ca, __VA_ARGS__);			\
-+	_ret;								\
-+})
-+
-+/* kill? */
-+
-+#define __bcache_io_error(c, fmt, ...)					\
-+	printk_ratelimited(KERN_ERR bch2_fmt(c,				\
-+			"IO error: " fmt), ##__VA_ARGS__)
-+
-+#define bcache_io_error(c, bio, fmt, ...)				\
-+do {									\
-+	__bcache_io_error(c, fmt, ##__VA_ARGS__);			\
-+	(bio)->bi_status = BLK_STS_IOERR;					\
-+} while (0)
-+
-+#endif /* _BCACHEFS_ERROR_H */
-diff --git a/fs/bcachefs/extent_update.c b/fs/bcachefs/extent_update.c
-new file mode 100644
-index 000000000000..fd011df3cb99
---- /dev/null
-+++ b/fs/bcachefs/extent_update.c
-@@ -0,0 +1,229 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "bkey_on_stack.h"
-+#include "btree_update.h"
-+#include "btree_update_interior.h"
-+#include "buckets.h"
-+#include "debug.h"
-+#include "extents.h"
-+#include "extent_update.h"
-+
-+/*
-+ * This counts the number of iterators to the alloc & ec btrees we'll need
-+ * inserting/removing this extent:
-+ */
-+static unsigned bch2_bkey_nr_alloc_ptrs(struct bkey_s_c k)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const union bch_extent_entry *entry;
-+	unsigned ret = 0;
-+
-+	bkey_extent_entry_for_each(ptrs, entry) {
-+		switch (__extent_entry_type(entry)) {
-+		case BCH_EXTENT_ENTRY_ptr:
-+		case BCH_EXTENT_ENTRY_stripe_ptr:
-+			ret++;
-+		}
-+	}
-+
-+	return ret;
-+}
-+
-+static int count_iters_for_insert(struct btree_trans *trans,
-+				  struct bkey_s_c k,
-+				  unsigned offset,
-+				  struct bpos *end,
-+				  unsigned *nr_iters,
-+				  unsigned max_iters)
-+{
-+	int ret = 0, ret2 = 0;
-+
-+	if (*nr_iters >= max_iters) {
-+		*end = bpos_min(*end, k.k->p);
-+		ret = 1;
-+	}
-+
-+	switch (k.k->type) {
-+	case KEY_TYPE_extent:
-+	case KEY_TYPE_reflink_v:
-+		*nr_iters += bch2_bkey_nr_alloc_ptrs(k);
-+
-+		if (*nr_iters >= max_iters) {
-+			*end = bpos_min(*end, k.k->p);
-+			ret = 1;
-+		}
-+
-+		break;
-+	case KEY_TYPE_reflink_p: {
-+		struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
-+		u64 idx = le64_to_cpu(p.v->idx);
-+		unsigned sectors = bpos_min(*end, p.k->p).offset -
-+			bkey_start_offset(p.k);
-+		struct btree_iter *iter;
-+		struct bkey_s_c r_k;
-+
-+		for_each_btree_key(trans, iter,
-+				   BTREE_ID_REFLINK, POS(0, idx + offset),
-+				   BTREE_ITER_SLOTS, r_k, ret2) {
-+			if (bkey_cmp(bkey_start_pos(r_k.k),
-+				     POS(0, idx + sectors)) >= 0)
-+				break;
-+
-+			/* extent_update_to_keys(), for the reflink_v update */
-+			*nr_iters += 1;
-+
-+			*nr_iters += 1 + bch2_bkey_nr_alloc_ptrs(r_k);
-+
-+			if (*nr_iters >= max_iters) {
-+				struct bpos pos = bkey_start_pos(k.k);
-+				pos.offset += min_t(u64, k.k->size,
-+						    r_k.k->p.offset - idx);
-+
-+				*end = bpos_min(*end, pos);
-+				ret = 1;
-+				break;
-+			}
-+		}
-+
-+		bch2_trans_iter_put(trans, iter);
-+		break;
-+	}
-+	}
-+
-+	return ret2 ?: ret;
-+}
-+
-+#define EXTENT_ITERS_MAX	(BTREE_ITER_MAX / 3)
-+
-+int bch2_extent_atomic_end(struct btree_iter *iter,
-+			   struct bkey_i *insert,
-+			   struct bpos *end)
-+{
-+	struct btree_trans *trans = iter->trans;
-+	struct btree *b;
-+	struct btree_node_iter	node_iter;
-+	struct bkey_packed	*_k;
-+	unsigned		nr_iters = 0;
-+	int ret;
-+
-+	ret = bch2_btree_iter_traverse(iter);
-+	if (ret)
-+		return ret;
-+
-+	b = iter->l[0].b;
-+	node_iter = iter->l[0].iter;
-+
-+	BUG_ON(bkey_cmp(b->data->min_key, POS_MIN) &&
-+	       bkey_cmp(bkey_start_pos(&insert->k),
-+			bkey_predecessor(b->data->min_key)) < 0);
-+
-+	*end = bpos_min(insert->k.p, b->key.k.p);
-+
-+	/* extent_update_to_keys(): */
-+	nr_iters += 1;
-+
-+	ret = count_iters_for_insert(trans, bkey_i_to_s_c(insert), 0, end,
-+				     &nr_iters, EXTENT_ITERS_MAX / 2);
-+	if (ret < 0)
-+		return ret;
-+
-+	while ((_k = bch2_btree_node_iter_peek(&node_iter, b))) {
-+		struct bkey	unpacked;
-+		struct bkey_s_c	k = bkey_disassemble(b, _k, &unpacked);
-+		unsigned offset = 0;
-+
-+		if (bkey_cmp(bkey_start_pos(k.k), *end) >= 0)
-+			break;
-+
-+		if (bkey_cmp(bkey_start_pos(&insert->k),
-+			     bkey_start_pos(k.k)) > 0)
-+			offset = bkey_start_offset(&insert->k) -
-+				bkey_start_offset(k.k);
-+
-+		/* extent_handle_overwrites(): */
-+		switch (bch2_extent_overlap(&insert->k, k.k)) {
-+		case BCH_EXTENT_OVERLAP_ALL:
-+		case BCH_EXTENT_OVERLAP_FRONT:
-+			nr_iters += 1;
-+			break;
-+		case BCH_EXTENT_OVERLAP_BACK:
-+		case BCH_EXTENT_OVERLAP_MIDDLE:
-+			nr_iters += 2;
-+			break;
-+		}
-+
-+		ret = count_iters_for_insert(trans, k, offset, end,
-+					&nr_iters, EXTENT_ITERS_MAX);
-+		if (ret)
-+			break;
-+
-+		bch2_btree_node_iter_advance(&node_iter, b);
-+	}
-+
-+	return ret < 0 ? ret : 0;
-+}
-+
-+int bch2_extent_trim_atomic(struct bkey_i *k, struct btree_iter *iter)
-+{
-+	struct bpos end;
-+	int ret;
-+
-+	ret = bch2_extent_atomic_end(iter, k, &end);
-+	if (ret)
-+		return ret;
-+
-+	bch2_cut_back(end, k);
-+	return 0;
-+}
-+
-+int bch2_extent_is_atomic(struct bkey_i *k, struct btree_iter *iter)
-+{
-+	struct bpos end;
-+	int ret;
-+
-+	ret = bch2_extent_atomic_end(iter, k, &end);
-+	if (ret)
-+		return ret;
-+
-+	return !bkey_cmp(end, k->k.p);
-+}
-+
-+enum btree_insert_ret
-+bch2_extent_can_insert(struct btree_trans *trans,
-+		       struct btree_iter *iter,
-+		       struct bkey_i *insert)
-+{
-+	struct btree_iter_level *l = &iter->l[0];
-+	struct btree_node_iter node_iter = l->iter;
-+	struct bkey_packed *_k;
-+	struct bkey_s_c k;
-+	struct bkey unpacked;
-+	int sectors;
-+
-+	_k = bch2_btree_node_iter_peek(&node_iter, l->b);
-+	if (!_k)
-+		return BTREE_INSERT_OK;
-+
-+	k = bkey_disassemble(l->b, _k, &unpacked);
-+
-+	/* Check if we're splitting a compressed extent: */
-+
-+	if (bkey_cmp(bkey_start_pos(&insert->k), bkey_start_pos(k.k)) > 0 &&
-+	    bkey_cmp(insert->k.p, k.k->p) < 0 &&
-+	    (sectors = bch2_bkey_sectors_compressed(k))) {
-+		int flags = trans->flags & BTREE_INSERT_NOFAIL
-+			? BCH_DISK_RESERVATION_NOFAIL : 0;
-+
-+		switch (bch2_disk_reservation_add(trans->c, trans->disk_res,
-+						  sectors, flags)) {
-+		case 0:
-+			break;
-+		case -ENOSPC:
-+			return BTREE_INSERT_ENOSPC;
-+		default:
-+			BUG();
-+		}
-+	}
-+
-+	return BTREE_INSERT_OK;
-+}
-diff --git a/fs/bcachefs/extent_update.h b/fs/bcachefs/extent_update.h
-new file mode 100644
-index 000000000000..38dc084627d2
---- /dev/null
-+++ b/fs/bcachefs/extent_update.h
-@@ -0,0 +1,16 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_EXTENT_UPDATE_H
-+#define _BCACHEFS_EXTENT_UPDATE_H
-+
-+#include "bcachefs.h"
-+
-+int bch2_extent_atomic_end(struct btree_iter *, struct bkey_i *,
-+			   struct bpos *);
-+int bch2_extent_trim_atomic(struct bkey_i *, struct btree_iter *);
-+int bch2_extent_is_atomic(struct bkey_i *, struct btree_iter *);
-+
-+enum btree_insert_ret
-+bch2_extent_can_insert(struct btree_trans *, struct btree_iter *,
-+		       struct bkey_i *);
-+
-+#endif /* _BCACHEFS_EXTENT_UPDATE_H */
-diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
-new file mode 100644
-index 000000000000..251d4af773a5
---- /dev/null
-+++ b/fs/bcachefs/extents.c
-@@ -0,0 +1,1268 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Copyright (C) 2010 Kent Overstreet <kent.overstreet@gmail.com>
-+ *
-+ * Code for managing the extent btree and dynamically updating the writeback
-+ * dirty sector count.
-+ */
-+
-+#include "bcachefs.h"
-+#include "bkey_methods.h"
-+#include "btree_gc.h"
-+#include "btree_io.h"
-+#include "btree_iter.h"
-+#include "buckets.h"
-+#include "checksum.h"
-+#include "debug.h"
-+#include "disk_groups.h"
-+#include "error.h"
-+#include "extents.h"
-+#include "inode.h"
-+#include "journal.h"
-+#include "replicas.h"
-+#include "super.h"
-+#include "super-io.h"
-+#include "util.h"
-+
-+#include <trace/events/bcachefs.h>
-+
-+static unsigned bch2_crc_field_size_max[] = {
-+	[BCH_EXTENT_ENTRY_crc32] = CRC32_SIZE_MAX,
-+	[BCH_EXTENT_ENTRY_crc64] = CRC64_SIZE_MAX,
-+	[BCH_EXTENT_ENTRY_crc128] = CRC128_SIZE_MAX,
-+};
-+
-+static void bch2_extent_crc_pack(union bch_extent_crc *,
-+				 struct bch_extent_crc_unpacked,
-+				 enum bch_extent_entry_type);
-+
-+static struct bch_dev_io_failures *dev_io_failures(struct bch_io_failures *f,
-+						   unsigned dev)
-+{
-+	struct bch_dev_io_failures *i;
-+
-+	for (i = f->devs; i < f->devs + f->nr; i++)
-+		if (i->dev == dev)
-+			return i;
-+
-+	return NULL;
-+}
-+
-+void bch2_mark_io_failure(struct bch_io_failures *failed,
-+			  struct extent_ptr_decoded *p)
-+{
-+	struct bch_dev_io_failures *f = dev_io_failures(failed, p->ptr.dev);
-+
-+	if (!f) {
-+		BUG_ON(failed->nr >= ARRAY_SIZE(failed->devs));
-+
-+		f = &failed->devs[failed->nr++];
-+		f->dev		= p->ptr.dev;
-+		f->idx		= p->idx;
-+		f->nr_failed	= 1;
-+		f->nr_retries	= 0;
-+	} else if (p->idx != f->idx) {
-+		f->idx		= p->idx;
-+		f->nr_failed	= 1;
-+		f->nr_retries	= 0;
-+	} else {
-+		f->nr_failed++;
-+	}
-+}
-+
-+/*
-+ * returns true if p1 is better than p2:
-+ */
-+static inline bool ptr_better(struct bch_fs *c,
-+			      const struct extent_ptr_decoded p1,
-+			      const struct extent_ptr_decoded p2)
-+{
-+	if (likely(!p1.idx && !p2.idx)) {
-+		struct bch_dev *dev1 = bch_dev_bkey_exists(c, p1.ptr.dev);
-+		struct bch_dev *dev2 = bch_dev_bkey_exists(c, p2.ptr.dev);
-+
-+		u64 l1 = atomic64_read(&dev1->cur_latency[READ]);
-+		u64 l2 = atomic64_read(&dev2->cur_latency[READ]);
-+
-+		/* Pick at random, biased in favor of the faster device: */
-+
-+		return bch2_rand_range(l1 + l2) > l1;
-+	}
-+
-+	if (force_reconstruct_read(c))
-+		return p1.idx > p2.idx;
-+
-+	return p1.idx < p2.idx;
-+}
-+
-+/*
-+ * This picks a non-stale pointer, preferably from a device other than @avoid.
-+ * Avoid can be NULL, meaning pick any. If there are no non-stale pointers to
-+ * other devices, it will still pick a pointer from avoid.
-+ */
-+int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
-+			       struct bch_io_failures *failed,
-+			       struct extent_ptr_decoded *pick)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const union bch_extent_entry *entry;
-+	struct extent_ptr_decoded p;
-+	struct bch_dev_io_failures *f;
-+	struct bch_dev *ca;
-+	int ret = 0;
-+
-+	if (k.k->type == KEY_TYPE_error)
-+		return -EIO;
-+
-+	bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
-+		ca = bch_dev_bkey_exists(c, p.ptr.dev);
-+
-+		/*
-+		 * If there are any dirty pointers it's an error if we can't
-+		 * read:
-+		 */
-+		if (!ret && !p.ptr.cached)
-+			ret = -EIO;
-+
-+		if (p.ptr.cached && ptr_stale(ca, &p.ptr))
-+			continue;
-+
-+		f = failed ? dev_io_failures(failed, p.ptr.dev) : NULL;
-+		if (f)
-+			p.idx = f->nr_failed < f->nr_retries
-+				? f->idx
-+				: f->idx + 1;
-+
-+		if (!p.idx &&
-+		    !bch2_dev_is_readable(ca))
-+			p.idx++;
-+
-+		if (force_reconstruct_read(c) &&
-+		    !p.idx && p.has_ec)
-+			p.idx++;
-+
-+		if (p.idx >= (unsigned) p.has_ec + 1)
-+			continue;
-+
-+		if (ret > 0 && !ptr_better(c, p, *pick))
-+			continue;
-+
-+		*pick = p;
-+		ret = 1;
-+	}
-+
-+	return ret;
-+}
-+
-+/* KEY_TYPE_btree_ptr: */
-+
-+const char *bch2_btree_ptr_invalid(const struct bch_fs *c, struct bkey_s_c k)
-+{
-+	if (bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX)
-+		return "value too big";
-+
-+	return bch2_bkey_ptrs_invalid(c, k);
-+}
-+
-+void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct bkey_s_c k)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const struct bch_extent_ptr *ptr;
-+	const char *err;
-+	char buf[160];
-+	struct bucket_mark mark;
-+	struct bch_dev *ca;
-+
-+	if (!test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags))
-+		return;
-+
-+	if (!percpu_down_read_trylock(&c->mark_lock))
-+		return;
-+
-+	bch2_fs_inconsistent_on(!test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) &&
-+		!bch2_bkey_replicas_marked_locked(c, k, false), c,
-+		"btree key bad (replicas not marked in superblock):\n%s",
-+		(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
-+
-+	bkey_for_each_ptr(ptrs, ptr) {
-+		ca = bch_dev_bkey_exists(c, ptr->dev);
-+
-+		mark = ptr_bucket_mark(ca, ptr);
-+
-+		err = "stale";
-+		if (gen_after(mark.gen, ptr->gen))
-+			goto err;
-+
-+		err = "inconsistent";
-+		if (mark.data_type != BCH_DATA_BTREE ||
-+		    mark.dirty_sectors < c->opts.btree_node_size)
-+			goto err;
-+	}
-+out:
-+	percpu_up_read(&c->mark_lock);
-+	return;
-+err:
-+	bch2_fs_inconsistent(c, "%s btree pointer %s: bucket %zi gen %i mark %08x",
-+		err, (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf),
-+		PTR_BUCKET_NR(ca, ptr),
-+		mark.gen, (unsigned) mark.v.counter);
-+	goto out;
-+}
-+
-+void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c,
-+			    struct bkey_s_c k)
-+{
-+	bch2_bkey_ptrs_to_text(out, c, k);
-+}
-+
-+void bch2_btree_ptr_v2_to_text(struct printbuf *out, struct bch_fs *c,
-+			    struct bkey_s_c k)
-+{
-+	struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k);
-+
-+	pr_buf(out, "seq %llx sectors %u written %u min_key ",
-+	       le64_to_cpu(bp.v->seq),
-+	       le16_to_cpu(bp.v->sectors),
-+	       le16_to_cpu(bp.v->sectors_written));
-+
-+	bch2_bpos_to_text(out, bp.v->min_key);
-+	pr_buf(out, " ");
-+	bch2_bkey_ptrs_to_text(out, c, k);
-+}
-+
-+void bch2_btree_ptr_v2_compat(enum btree_id btree_id, unsigned version,
-+			      unsigned big_endian, int write,
-+			      struct bkey_s k)
-+{
-+	struct bkey_s_btree_ptr_v2 bp = bkey_s_to_btree_ptr_v2(k);
-+
-+	compat_bpos(0, btree_id, version, big_endian, write, &bp.v->min_key);
-+
-+	if (version < bcachefs_metadata_version_inode_btree_change &&
-+	    btree_node_type_is_extents(btree_id) &&
-+	    bkey_cmp(bp.v->min_key, POS_MIN))
-+		bp.v->min_key = write
-+			? bkey_predecessor(bp.v->min_key)
-+			: bkey_successor(bp.v->min_key);
-+}
-+
-+/* KEY_TYPE_extent: */
-+
-+const char *bch2_extent_invalid(const struct bch_fs *c, struct bkey_s_c k)
-+{
-+	return bch2_bkey_ptrs_invalid(c, k);
-+}
-+
-+void bch2_extent_debugcheck(struct bch_fs *c, struct bkey_s_c k)
-+{
-+	struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
-+	const union bch_extent_entry *entry;
-+	struct extent_ptr_decoded p;
-+	char buf[160];
-+
-+	if (!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags) ||
-+	    !test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags))
-+		return;
-+
-+	if (!percpu_down_read_trylock(&c->mark_lock))
-+		return;
-+
-+	bch2_fs_inconsistent_on(!test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) &&
-+		!bch2_bkey_replicas_marked_locked(c, e.s_c, false), c,
-+		"extent key bad (replicas not marked in superblock):\n%s",
-+		(bch2_bkey_val_to_text(&PBUF(buf), c, e.s_c), buf));
-+
-+	extent_for_each_ptr_decode(e, p, entry) {
-+		struct bch_dev *ca	= bch_dev_bkey_exists(c, p.ptr.dev);
-+		struct bucket_mark mark = ptr_bucket_mark(ca, &p.ptr);
-+		unsigned stale		= gen_after(mark.gen, p.ptr.gen);
-+		unsigned disk_sectors	= ptr_disk_sectors(p);
-+		unsigned mark_sectors	= p.ptr.cached
-+			? mark.cached_sectors
-+			: mark.dirty_sectors;
-+
-+		bch2_fs_inconsistent_on(stale && !p.ptr.cached, c,
-+			"stale dirty pointer (ptr gen %u bucket %u",
-+			p.ptr.gen, mark.gen);
-+
-+		bch2_fs_inconsistent_on(stale > 96, c,
-+			"key too stale: %i", stale);
-+
-+		bch2_fs_inconsistent_on(!stale &&
-+			(mark.data_type != BCH_DATA_USER ||
-+			 mark_sectors < disk_sectors), c,
-+			"extent pointer not marked: %s:\n"
-+			"type %u sectors %u < %u",
-+			(bch2_bkey_val_to_text(&PBUF(buf), c, e.s_c), buf),
-+			mark.data_type,
-+			mark_sectors, disk_sectors);
-+	}
-+
-+	percpu_up_read(&c->mark_lock);
-+}
-+
-+void bch2_extent_to_text(struct printbuf *out, struct bch_fs *c,
-+			 struct bkey_s_c k)
-+{
-+	bch2_bkey_ptrs_to_text(out, c, k);
-+}
-+
-+enum merge_result bch2_extent_merge(struct bch_fs *c,
-+				    struct bkey_s _l, struct bkey_s _r)
-+{
-+	struct bkey_s_extent l = bkey_s_to_extent(_l);
-+	struct bkey_s_extent r = bkey_s_to_extent(_r);
-+	union bch_extent_entry *en_l = l.v->start;
-+	union bch_extent_entry *en_r = r.v->start;
-+	struct bch_extent_crc_unpacked crc_l, crc_r;
-+
-+	if (bkey_val_u64s(l.k) != bkey_val_u64s(r.k))
-+		return BCH_MERGE_NOMERGE;
-+
-+	crc_l = bch2_extent_crc_unpack(l.k, NULL);
-+
-+	extent_for_each_entry(l, en_l) {
-+		en_r = vstruct_idx(r.v, (u64 *) en_l - l.v->_data);
-+
-+		if (extent_entry_type(en_l) != extent_entry_type(en_r))
-+			return BCH_MERGE_NOMERGE;
-+
-+		switch (extent_entry_type(en_l)) {
-+		case BCH_EXTENT_ENTRY_ptr: {
-+			const struct bch_extent_ptr *lp = &en_l->ptr;
-+			const struct bch_extent_ptr *rp = &en_r->ptr;
-+			struct bch_dev *ca;
-+
-+			if (lp->offset + crc_l.compressed_size != rp->offset ||
-+			    lp->dev			!= rp->dev ||
-+			    lp->gen			!= rp->gen)
-+				return BCH_MERGE_NOMERGE;
-+
-+			/* We don't allow extents to straddle buckets: */
-+			ca = bch_dev_bkey_exists(c, lp->dev);
-+
-+			if (PTR_BUCKET_NR(ca, lp) != PTR_BUCKET_NR(ca, rp))
-+				return BCH_MERGE_NOMERGE;
-+
-+			break;
-+		}
-+		case BCH_EXTENT_ENTRY_stripe_ptr:
-+			if (en_l->stripe_ptr.block	!= en_r->stripe_ptr.block ||
-+			    en_l->stripe_ptr.idx	!= en_r->stripe_ptr.idx)
-+				return BCH_MERGE_NOMERGE;
-+			break;
-+		case BCH_EXTENT_ENTRY_crc32:
-+		case BCH_EXTENT_ENTRY_crc64:
-+		case BCH_EXTENT_ENTRY_crc128:
-+			crc_l = bch2_extent_crc_unpack(l.k, entry_to_crc(en_l));
-+			crc_r = bch2_extent_crc_unpack(r.k, entry_to_crc(en_r));
-+
-+			if (crc_l.csum_type		!= crc_r.csum_type ||
-+			    crc_l.compression_type	!= crc_r.compression_type ||
-+			    crc_l.nonce			!= crc_r.nonce)
-+				return BCH_MERGE_NOMERGE;
-+
-+			if (crc_l.offset + crc_l.live_size != crc_l.compressed_size ||
-+			    crc_r.offset)
-+				return BCH_MERGE_NOMERGE;
-+
-+			if (!bch2_checksum_mergeable(crc_l.csum_type))
-+				return BCH_MERGE_NOMERGE;
-+
-+			if (crc_is_compressed(crc_l))
-+				return BCH_MERGE_NOMERGE;
-+
-+			if (crc_l.csum_type &&
-+			    crc_l.uncompressed_size +
-+			    crc_r.uncompressed_size > c->sb.encoded_extent_max)
-+				return BCH_MERGE_NOMERGE;
-+
-+			if (crc_l.uncompressed_size + crc_r.uncompressed_size >
-+			    bch2_crc_field_size_max[extent_entry_type(en_l)])
-+				return BCH_MERGE_NOMERGE;
-+
-+			break;
-+		default:
-+			return BCH_MERGE_NOMERGE;
-+		}
-+	}
-+
-+	extent_for_each_entry(l, en_l) {
-+		struct bch_extent_crc_unpacked crc_l, crc_r;
-+
-+		en_r = vstruct_idx(r.v, (u64 *) en_l - l.v->_data);
-+
-+		if (!extent_entry_is_crc(en_l))
-+			continue;
-+
-+		crc_l = bch2_extent_crc_unpack(l.k, entry_to_crc(en_l));
-+		crc_r = bch2_extent_crc_unpack(r.k, entry_to_crc(en_r));
-+
-+		crc_l.csum = bch2_checksum_merge(crc_l.csum_type,
-+						 crc_l.csum,
-+						 crc_r.csum,
-+						 crc_r.uncompressed_size << 9);
-+
-+		crc_l.uncompressed_size	+= crc_r.uncompressed_size;
-+		crc_l.compressed_size	+= crc_r.compressed_size;
-+
-+		bch2_extent_crc_pack(entry_to_crc(en_l), crc_l,
-+				     extent_entry_type(en_l));
-+	}
-+
-+	bch2_key_resize(l.k, l.k->size + r.k->size);
-+
-+	return BCH_MERGE_MERGE;
-+}
-+
-+/* KEY_TYPE_reservation: */
-+
-+const char *bch2_reservation_invalid(const struct bch_fs *c, struct bkey_s_c k)
-+{
-+	struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k);
-+
-+	if (bkey_val_bytes(k.k) != sizeof(struct bch_reservation))
-+		return "incorrect value size";
-+
-+	if (!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX)
-+		return "invalid nr_replicas";
-+
-+	return NULL;
-+}
-+
-+void bch2_reservation_to_text(struct printbuf *out, struct bch_fs *c,
-+			      struct bkey_s_c k)
-+{
-+	struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k);
-+
-+	pr_buf(out, "generation %u replicas %u",
-+	       le32_to_cpu(r.v->generation),
-+	       r.v->nr_replicas);
-+}
-+
-+enum merge_result bch2_reservation_merge(struct bch_fs *c,
-+					 struct bkey_s _l, struct bkey_s _r)
-+{
-+	struct bkey_s_reservation l = bkey_s_to_reservation(_l);
-+	struct bkey_s_reservation r = bkey_s_to_reservation(_r);
-+
-+	if (l.v->generation != r.v->generation ||
-+	    l.v->nr_replicas != r.v->nr_replicas)
-+		return BCH_MERGE_NOMERGE;
-+
-+	if ((u64) l.k->size + r.k->size > KEY_SIZE_MAX) {
-+		bch2_key_resize(l.k, KEY_SIZE_MAX);
-+		bch2_cut_front_s(l.k->p, r.s);
-+		return BCH_MERGE_PARTIAL;
-+	}
-+
-+	bch2_key_resize(l.k, l.k->size + r.k->size);
-+
-+	return BCH_MERGE_MERGE;
-+}
-+
-+/* Extent checksum entries: */
-+
-+/* returns true if not equal */
-+static inline bool bch2_crc_unpacked_cmp(struct bch_extent_crc_unpacked l,
-+					 struct bch_extent_crc_unpacked r)
-+{
-+	return (l.csum_type		!= r.csum_type ||
-+		l.compression_type	!= r.compression_type ||
-+		l.compressed_size	!= r.compressed_size ||
-+		l.uncompressed_size	!= r.uncompressed_size ||
-+		l.offset		!= r.offset ||
-+		l.live_size		!= r.live_size ||
-+		l.nonce			!= r.nonce ||
-+		bch2_crc_cmp(l.csum, r.csum));
-+}
-+
-+static inline bool can_narrow_crc(struct bch_extent_crc_unpacked u,
-+				  struct bch_extent_crc_unpacked n)
-+{
-+	return !crc_is_compressed(u) &&
-+		u.csum_type &&
-+		u.uncompressed_size > u.live_size &&
-+		bch2_csum_type_is_encryption(u.csum_type) ==
-+		bch2_csum_type_is_encryption(n.csum_type);
-+}
-+
-+bool bch2_can_narrow_extent_crcs(struct bkey_s_c k,
-+				 struct bch_extent_crc_unpacked n)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	struct bch_extent_crc_unpacked crc;
-+	const union bch_extent_entry *i;
-+
-+	if (!n.csum_type)
-+		return false;
-+
-+	bkey_for_each_crc(k.k, ptrs, crc, i)
-+		if (can_narrow_crc(crc, n))
-+			return true;
-+
-+	return false;
-+}
-+
-+/*
-+ * We're writing another replica for this extent, so while we've got the data in
-+ * memory we'll be computing a new checksum for the currently live data.
-+ *
-+ * If there are other replicas we aren't moving, and they are checksummed but
-+ * not compressed, we can modify them to point to only the data that is
-+ * currently live (so that readers won't have to bounce) while we've got the
-+ * checksum we need:
-+ */
-+bool bch2_bkey_narrow_crcs(struct bkey_i *k, struct bch_extent_crc_unpacked n)
-+{
-+	struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(k));
-+	struct bch_extent_crc_unpacked u;
-+	struct extent_ptr_decoded p;
-+	union bch_extent_entry *i;
-+	bool ret = false;
-+
-+	/* Find a checksum entry that covers only live data: */
-+	if (!n.csum_type) {
-+		bkey_for_each_crc(&k->k, ptrs, u, i)
-+			if (!crc_is_compressed(u) &&
-+			    u.csum_type &&
-+			    u.live_size == u.uncompressed_size) {
-+				n = u;
-+				goto found;
-+			}
-+		return false;
-+	}
-+found:
-+	BUG_ON(crc_is_compressed(n));
-+	BUG_ON(n.offset);
-+	BUG_ON(n.live_size != k->k.size);
-+
-+restart_narrow_pointers:
-+	ptrs = bch2_bkey_ptrs(bkey_i_to_s(k));
-+
-+	bkey_for_each_ptr_decode(&k->k, ptrs, p, i)
-+		if (can_narrow_crc(p.crc, n)) {
-+			bch2_bkey_drop_ptr(bkey_i_to_s(k), &i->ptr);
-+			p.ptr.offset += p.crc.offset;
-+			p.crc = n;
-+			bch2_extent_ptr_decoded_append(k, &p);
-+			ret = true;
-+			goto restart_narrow_pointers;
-+		}
-+
-+	return ret;
-+}
-+
-+static void bch2_extent_crc_pack(union bch_extent_crc *dst,
-+				 struct bch_extent_crc_unpacked src,
-+				 enum bch_extent_entry_type type)
-+{
-+#define set_common_fields(_dst, _src)					\
-+		_dst.type		= 1 << type;			\
-+		_dst.csum_type		= _src.csum_type,		\
-+		_dst.compression_type	= _src.compression_type,	\
-+		_dst._compressed_size	= _src.compressed_size - 1,	\
-+		_dst._uncompressed_size	= _src.uncompressed_size - 1,	\
-+		_dst.offset		= _src.offset
-+
-+	switch (type) {
-+	case BCH_EXTENT_ENTRY_crc32:
-+		set_common_fields(dst->crc32, src);
-+		dst->crc32.csum	 = *((__le32 *) &src.csum.lo);
-+		break;
-+	case BCH_EXTENT_ENTRY_crc64:
-+		set_common_fields(dst->crc64, src);
-+		dst->crc64.nonce	= src.nonce;
-+		dst->crc64.csum_lo	= src.csum.lo;
-+		dst->crc64.csum_hi	= *((__le16 *) &src.csum.hi);
-+		break;
-+	case BCH_EXTENT_ENTRY_crc128:
-+		set_common_fields(dst->crc128, src);
-+		dst->crc128.nonce	= src.nonce;
-+		dst->crc128.csum	= src.csum;
-+		break;
-+	default:
-+		BUG();
-+	}
-+#undef set_common_fields
-+}
-+
-+void bch2_extent_crc_append(struct bkey_i *k,
-+			    struct bch_extent_crc_unpacked new)
-+{
-+	struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(k));
-+	union bch_extent_crc *crc = (void *) ptrs.end;
-+	enum bch_extent_entry_type type;
-+
-+	if (bch_crc_bytes[new.csum_type]	<= 4 &&
-+	    new.uncompressed_size		<= CRC32_SIZE_MAX &&
-+	    new.nonce				<= CRC32_NONCE_MAX)
-+		type = BCH_EXTENT_ENTRY_crc32;
-+	else if (bch_crc_bytes[new.csum_type]	<= 10 &&
-+		   new.uncompressed_size	<= CRC64_SIZE_MAX &&
-+		   new.nonce			<= CRC64_NONCE_MAX)
-+		type = BCH_EXTENT_ENTRY_crc64;
-+	else if (bch_crc_bytes[new.csum_type]	<= 16 &&
-+		   new.uncompressed_size	<= CRC128_SIZE_MAX &&
-+		   new.nonce			<= CRC128_NONCE_MAX)
-+		type = BCH_EXTENT_ENTRY_crc128;
-+	else
-+		BUG();
-+
-+	bch2_extent_crc_pack(crc, new, type);
-+
-+	k->k.u64s += extent_entry_u64s(ptrs.end);
-+
-+	EBUG_ON(bkey_val_u64s(&k->k) > BKEY_EXTENT_VAL_U64s_MAX);
-+}
-+
-+/* Generic code for keys with pointers: */
-+
-+unsigned bch2_bkey_nr_ptrs(struct bkey_s_c k)
-+{
-+	return bch2_bkey_devs(k).nr;
-+}
-+
-+unsigned bch2_bkey_nr_ptrs_allocated(struct bkey_s_c k)
-+{
-+	return k.k->type == KEY_TYPE_reservation
-+		? bkey_s_c_to_reservation(k).v->nr_replicas
-+		: bch2_bkey_dirty_devs(k).nr;
-+}
-+
-+unsigned bch2_bkey_nr_ptrs_fully_allocated(struct bkey_s_c k)
-+{
-+	unsigned ret = 0;
-+
-+	if (k.k->type == KEY_TYPE_reservation) {
-+		ret = bkey_s_c_to_reservation(k).v->nr_replicas;
-+	} else {
-+		struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+		const union bch_extent_entry *entry;
-+		struct extent_ptr_decoded p;
-+
-+		bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
-+			ret += !p.ptr.cached && !crc_is_compressed(p.crc);
-+	}
-+
-+	return ret;
-+}
-+
-+unsigned bch2_bkey_sectors_compressed(struct bkey_s_c k)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const union bch_extent_entry *entry;
-+	struct extent_ptr_decoded p;
-+	unsigned ret = 0;
-+
-+	bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
-+		if (!p.ptr.cached && crc_is_compressed(p.crc))
-+			ret += p.crc.compressed_size;
-+
-+	return ret;
-+}
-+
-+bool bch2_bkey_is_incompressible(struct bkey_s_c k)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const union bch_extent_entry *entry;
-+	struct bch_extent_crc_unpacked crc;
-+
-+	bkey_for_each_crc(k.k, ptrs, crc, entry)
-+		if (crc.compression_type == BCH_COMPRESSION_TYPE_incompressible)
-+			return true;
-+	return false;
-+}
-+
-+bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size,
-+				unsigned nr_replicas)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bpos end = pos;
-+	struct bkey_s_c k;
-+	bool ret = true;
-+	int err;
-+
-+	end.offset += size;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, pos,
-+			   BTREE_ITER_SLOTS, k, err) {
-+		if (bkey_cmp(bkey_start_pos(k.k), end) >= 0)
-+			break;
-+
-+		if (nr_replicas > bch2_bkey_nr_ptrs_fully_allocated(k)) {
-+			ret = false;
-+			break;
-+		}
-+	}
-+	bch2_trans_exit(&trans);
-+
-+	return ret;
-+}
-+
-+static unsigned bch2_extent_ptr_durability(struct bch_fs *c,
-+					   struct extent_ptr_decoded p)
-+{
-+	unsigned durability = 0;
-+	struct bch_dev *ca;
-+
-+	if (p.ptr.cached)
-+		return 0;
-+
-+	ca = bch_dev_bkey_exists(c, p.ptr.dev);
-+
-+	if (ca->mi.state != BCH_MEMBER_STATE_FAILED)
-+		durability = max_t(unsigned, durability, ca->mi.durability);
-+
-+	if (p.has_ec) {
-+		struct stripe *s =
-+			genradix_ptr(&c->stripes[0], p.ec.idx);
-+
-+		if (WARN_ON(!s))
-+			goto out;
-+
-+		durability = max_t(unsigned, durability, s->nr_redundant);
-+	}
-+out:
-+	return durability;
-+}
-+
-+unsigned bch2_bkey_durability(struct bch_fs *c, struct bkey_s_c k)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const union bch_extent_entry *entry;
-+	struct extent_ptr_decoded p;
-+	unsigned durability = 0;
-+
-+	bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
-+		durability += bch2_extent_ptr_durability(c, p);
-+
-+	return durability;
-+}
-+
-+void bch2_bkey_mark_replicas_cached(struct bch_fs *c, struct bkey_s k,
-+				    unsigned target,
-+				    unsigned nr_desired_replicas)
-+{
-+	struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
-+	union bch_extent_entry *entry;
-+	struct extent_ptr_decoded p;
-+	int extra = bch2_bkey_durability(c, k.s_c) - nr_desired_replicas;
-+
-+	if (target && extra > 0)
-+		bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
-+			int n = bch2_extent_ptr_durability(c, p);
-+
-+			if (n && n <= extra &&
-+			    !bch2_dev_in_target(c, p.ptr.dev, target)) {
-+				entry->ptr.cached = true;
-+				extra -= n;
-+			}
-+		}
-+
-+	if (extra > 0)
-+		bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
-+			int n = bch2_extent_ptr_durability(c, p);
-+
-+			if (n && n <= extra) {
-+				entry->ptr.cached = true;
-+				extra -= n;
-+			}
-+		}
-+}
-+
-+void bch2_bkey_append_ptr(struct bkey_i *k,
-+			  struct bch_extent_ptr ptr)
-+{
-+	EBUG_ON(bch2_bkey_has_device(bkey_i_to_s_c(k), ptr.dev));
-+
-+	switch (k->k.type) {
-+	case KEY_TYPE_btree_ptr:
-+	case KEY_TYPE_btree_ptr_v2:
-+	case KEY_TYPE_extent:
-+		EBUG_ON(bkey_val_u64s(&k->k) >= BKEY_EXTENT_VAL_U64s_MAX);
-+
-+		ptr.type = 1 << BCH_EXTENT_ENTRY_ptr;
-+
-+		memcpy((void *) &k->v + bkey_val_bytes(&k->k),
-+		       &ptr,
-+		       sizeof(ptr));
-+		k->u64s++;
-+		break;
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static inline void __extent_entry_insert(struct bkey_i *k,
-+					 union bch_extent_entry *dst,
-+					 union bch_extent_entry *new)
-+{
-+	union bch_extent_entry *end = bkey_val_end(bkey_i_to_s(k));
-+
-+	memmove_u64s_up_small((u64 *) dst + extent_entry_u64s(new),
-+			      dst, (u64 *) end - (u64 *) dst);
-+	k->k.u64s += extent_entry_u64s(new);
-+	memcpy(dst, new, extent_entry_bytes(new));
-+}
-+
-+void bch2_extent_ptr_decoded_append(struct bkey_i *k,
-+				    struct extent_ptr_decoded *p)
-+{
-+	struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(k));
-+	struct bch_extent_crc_unpacked crc =
-+		bch2_extent_crc_unpack(&k->k, NULL);
-+	union bch_extent_entry *pos;
-+
-+	if (!bch2_crc_unpacked_cmp(crc, p->crc)) {
-+		pos = ptrs.start;
-+		goto found;
-+	}
-+
-+	bkey_for_each_crc(&k->k, ptrs, crc, pos)
-+		if (!bch2_crc_unpacked_cmp(crc, p->crc)) {
-+			pos = extent_entry_next(pos);
-+			goto found;
-+		}
-+
-+	bch2_extent_crc_append(k, p->crc);
-+	pos = bkey_val_end(bkey_i_to_s(k));
-+found:
-+	p->ptr.type = 1 << BCH_EXTENT_ENTRY_ptr;
-+	__extent_entry_insert(k, pos, to_entry(&p->ptr));
-+
-+	if (p->has_ec) {
-+		p->ec.type = 1 << BCH_EXTENT_ENTRY_stripe_ptr;
-+		__extent_entry_insert(k, pos, to_entry(&p->ec));
-+	}
-+}
-+
-+static union bch_extent_entry *extent_entry_prev(struct bkey_ptrs ptrs,
-+					  union bch_extent_entry *entry)
-+{
-+	union bch_extent_entry *i = ptrs.start;
-+
-+	if (i == entry)
-+		return NULL;
-+
-+	while (extent_entry_next(i) != entry)
-+		i = extent_entry_next(i);
-+	return i;
-+}
-+
-+union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s k,
-+					   struct bch_extent_ptr *ptr)
-+{
-+	struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
-+	union bch_extent_entry *dst, *src, *prev;
-+	bool drop_crc = true;
-+
-+	EBUG_ON(ptr < &ptrs.start->ptr ||
-+		ptr >= &ptrs.end->ptr);
-+	EBUG_ON(ptr->type != 1 << BCH_EXTENT_ENTRY_ptr);
-+
-+	src = extent_entry_next(to_entry(ptr));
-+	if (src != ptrs.end &&
-+	    !extent_entry_is_crc(src))
-+		drop_crc = false;
-+
-+	dst = to_entry(ptr);
-+	while ((prev = extent_entry_prev(ptrs, dst))) {
-+		if (extent_entry_is_ptr(prev))
-+			break;
-+
-+		if (extent_entry_is_crc(prev)) {
-+			if (drop_crc)
-+				dst = prev;
-+			break;
-+		}
-+
-+		dst = prev;
-+	}
-+
-+	memmove_u64s_down(dst, src,
-+			  (u64 *) ptrs.end - (u64 *) src);
-+	k.k->u64s -= (u64 *) src - (u64 *) dst;
-+
-+	return dst;
-+}
-+
-+void bch2_bkey_drop_device(struct bkey_s k, unsigned dev)
-+{
-+	struct bch_extent_ptr *ptr;
-+
-+	bch2_bkey_drop_ptrs(k, ptr, ptr->dev == dev);
-+}
-+
-+const struct bch_extent_ptr *
-+bch2_bkey_has_device(struct bkey_s_c k, unsigned dev)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const struct bch_extent_ptr *ptr;
-+
-+	bkey_for_each_ptr(ptrs, ptr)
-+		if (ptr->dev == dev)
-+			return ptr;
-+
-+	return NULL;
-+}
-+
-+bool bch2_bkey_has_target(struct bch_fs *c, struct bkey_s_c k, unsigned target)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const struct bch_extent_ptr *ptr;
-+
-+	bkey_for_each_ptr(ptrs, ptr)
-+		if (bch2_dev_in_target(c, ptr->dev, target) &&
-+		    (!ptr->cached ||
-+		     !ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr)))
-+			return true;
-+
-+	return false;
-+}
-+
-+bool bch2_bkey_matches_ptr(struct bch_fs *c, struct bkey_s_c k,
-+			   struct bch_extent_ptr m, u64 offset)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const union bch_extent_entry *entry;
-+	struct extent_ptr_decoded p;
-+
-+	bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
-+		if (p.ptr.dev	== m.dev &&
-+		    p.ptr.gen	== m.gen &&
-+		    (s64) p.ptr.offset + p.crc.offset - bkey_start_offset(k.k) ==
-+		    (s64) m.offset  - offset)
-+			return true;
-+
-+	return false;
-+}
-+
-+/*
-+ * bch_extent_normalize - clean up an extent, dropping stale pointers etc.
-+ *
-+ * Returns true if @k should be dropped entirely
-+ *
-+ * For existing keys, only called when btree nodes are being rewritten, not when
-+ * they're merely being compacted/resorted in memory.
-+ */
-+bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k)
-+{
-+	struct bch_extent_ptr *ptr;
-+
-+	bch2_bkey_drop_ptrs(k, ptr,
-+		ptr->cached &&
-+		ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr));
-+
-+	/* will only happen if all pointers were cached: */
-+	if (!bch2_bkey_nr_ptrs(k.s_c))
-+		k.k->type = KEY_TYPE_discard;
-+
-+	return bkey_whiteout(k.k);
-+}
-+
-+void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
-+			    struct bkey_s_c k)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const union bch_extent_entry *entry;
-+	struct bch_extent_crc_unpacked crc;
-+	const struct bch_extent_ptr *ptr;
-+	const struct bch_extent_stripe_ptr *ec;
-+	struct bch_dev *ca;
-+	bool first = true;
-+
-+	bkey_extent_entry_for_each(ptrs, entry) {
-+		if (!first)
-+			pr_buf(out, " ");
-+
-+		switch (__extent_entry_type(entry)) {
-+		case BCH_EXTENT_ENTRY_ptr:
-+			ptr = entry_to_ptr(entry);
-+			ca = ptr->dev < c->sb.nr_devices && c->devs[ptr->dev]
-+				? bch_dev_bkey_exists(c, ptr->dev)
-+				: NULL;
-+
-+			pr_buf(out, "ptr: %u:%llu gen %u%s%s", ptr->dev,
-+			       (u64) ptr->offset, ptr->gen,
-+			       ptr->cached ? " cached" : "",
-+			       ca && ptr_stale(ca, ptr)
-+			       ? " stale" : "");
-+			break;
-+		case BCH_EXTENT_ENTRY_crc32:
-+		case BCH_EXTENT_ENTRY_crc64:
-+		case BCH_EXTENT_ENTRY_crc128:
-+			crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry));
-+
-+			pr_buf(out, "crc: c_size %u size %u offset %u nonce %u csum %u compress %u",
-+			       crc.compressed_size,
-+			       crc.uncompressed_size,
-+			       crc.offset, crc.nonce,
-+			       crc.csum_type,
-+			       crc.compression_type);
-+			break;
-+		case BCH_EXTENT_ENTRY_stripe_ptr:
-+			ec = &entry->stripe_ptr;
-+
-+			pr_buf(out, "ec: idx %llu block %u",
-+			       (u64) ec->idx, ec->block);
-+			break;
-+		default:
-+			pr_buf(out, "(invalid extent entry %.16llx)", *((u64 *) entry));
-+			return;
-+		}
-+
-+		first = false;
-+	}
-+}
-+
-+static const char *extent_ptr_invalid(const struct bch_fs *c,
-+				      struct bkey_s_c k,
-+				      const struct bch_extent_ptr *ptr,
-+				      unsigned size_ondisk,
-+				      bool metadata)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const struct bch_extent_ptr *ptr2;
-+	struct bch_dev *ca;
-+
-+	if (!bch2_dev_exists2(c, ptr->dev))
-+		return "pointer to invalid device";
-+
-+	ca = bch_dev_bkey_exists(c, ptr->dev);
-+	if (!ca)
-+		return "pointer to invalid device";
-+
-+	bkey_for_each_ptr(ptrs, ptr2)
-+		if (ptr != ptr2 && ptr->dev == ptr2->dev)
-+			return "multiple pointers to same device";
-+
-+	if (ptr->offset + size_ondisk > bucket_to_sector(ca, ca->mi.nbuckets))
-+		return "offset past end of device";
-+
-+	if (ptr->offset < bucket_to_sector(ca, ca->mi.first_bucket))
-+		return "offset before first bucket";
-+
-+	if (bucket_remainder(ca, ptr->offset) +
-+	    size_ondisk > ca->mi.bucket_size)
-+		return "spans multiple buckets";
-+
-+	return NULL;
-+}
-+
-+const char *bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const union bch_extent_entry *entry;
-+	struct bch_extent_crc_unpacked crc;
-+	unsigned size_ondisk = k.k->size;
-+	const char *reason;
-+	unsigned nonce = UINT_MAX;
-+
-+	if (k.k->type == KEY_TYPE_btree_ptr)
-+		size_ondisk = c->opts.btree_node_size;
-+	if (k.k->type == KEY_TYPE_btree_ptr_v2)
-+		size_ondisk = le16_to_cpu(bkey_s_c_to_btree_ptr_v2(k).v->sectors);
-+
-+	bkey_extent_entry_for_each(ptrs, entry) {
-+		if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX)
-+			return "invalid extent entry type";
-+
-+		if (k.k->type == KEY_TYPE_btree_ptr &&
-+		    !extent_entry_is_ptr(entry))
-+			return "has non ptr field";
-+
-+		switch (extent_entry_type(entry)) {
-+		case BCH_EXTENT_ENTRY_ptr:
-+			reason = extent_ptr_invalid(c, k, &entry->ptr,
-+						    size_ondisk, false);
-+			if (reason)
-+				return reason;
-+			break;
-+		case BCH_EXTENT_ENTRY_crc32:
-+		case BCH_EXTENT_ENTRY_crc64:
-+		case BCH_EXTENT_ENTRY_crc128:
-+			crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry));
-+
-+			if (crc.offset + crc.live_size >
-+			    crc.uncompressed_size)
-+				return "checksum offset + key size > uncompressed size";
-+
-+			size_ondisk = crc.compressed_size;
-+
-+			if (!bch2_checksum_type_valid(c, crc.csum_type))
-+				return "invalid checksum type";
-+
-+			if (crc.compression_type >= BCH_COMPRESSION_TYPE_NR)
-+				return "invalid compression type";
-+
-+			if (bch2_csum_type_is_encryption(crc.csum_type)) {
-+				if (nonce == UINT_MAX)
-+					nonce = crc.offset + crc.nonce;
-+				else if (nonce != crc.offset + crc.nonce)
-+					return "incorrect nonce";
-+			}
-+			break;
-+		case BCH_EXTENT_ENTRY_stripe_ptr:
-+			break;
-+		}
-+	}
-+
-+	return NULL;
-+}
-+
-+void bch2_ptr_swab(struct bkey_s k)
-+{
-+	struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
-+	union bch_extent_entry *entry;
-+	u64 *d;
-+
-+	for (d =  (u64 *) ptrs.start;
-+	     d != (u64 *) ptrs.end;
-+	     d++)
-+		*d = swab64(*d);
-+
-+	for (entry = ptrs.start;
-+	     entry < ptrs.end;
-+	     entry = extent_entry_next(entry)) {
-+		switch (extent_entry_type(entry)) {
-+		case BCH_EXTENT_ENTRY_ptr:
-+			break;
-+		case BCH_EXTENT_ENTRY_crc32:
-+			entry->crc32.csum = swab32(entry->crc32.csum);
-+			break;
-+		case BCH_EXTENT_ENTRY_crc64:
-+			entry->crc64.csum_hi = swab16(entry->crc64.csum_hi);
-+			entry->crc64.csum_lo = swab64(entry->crc64.csum_lo);
-+			break;
-+		case BCH_EXTENT_ENTRY_crc128:
-+			entry->crc128.csum.hi = (__force __le64)
-+				swab64((__force u64) entry->crc128.csum.hi);
-+			entry->crc128.csum.lo = (__force __le64)
-+				swab64((__force u64) entry->crc128.csum.lo);
-+			break;
-+		case BCH_EXTENT_ENTRY_stripe_ptr:
-+			break;
-+		}
-+	}
-+}
-+
-+/* Generic extent code: */
-+
-+int bch2_cut_front_s(struct bpos where, struct bkey_s k)
-+{
-+	unsigned new_val_u64s = bkey_val_u64s(k.k);
-+	int val_u64s_delta;
-+	u64 sub;
-+
-+	if (bkey_cmp(where, bkey_start_pos(k.k)) <= 0)
-+		return 0;
-+
-+	EBUG_ON(bkey_cmp(where, k.k->p) > 0);
-+
-+	sub = where.offset - bkey_start_offset(k.k);
-+
-+	k.k->size -= sub;
-+
-+	if (!k.k->size) {
-+		k.k->type = KEY_TYPE_deleted;
-+		new_val_u64s = 0;
-+	}
-+
-+	switch (k.k->type) {
-+	case KEY_TYPE_extent:
-+	case KEY_TYPE_reflink_v: {
-+		struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
-+		union bch_extent_entry *entry;
-+		bool seen_crc = false;
-+
-+		bkey_extent_entry_for_each(ptrs, entry) {
-+			switch (extent_entry_type(entry)) {
-+			case BCH_EXTENT_ENTRY_ptr:
-+				if (!seen_crc)
-+					entry->ptr.offset += sub;
-+				break;
-+			case BCH_EXTENT_ENTRY_crc32:
-+				entry->crc32.offset += sub;
-+				break;
-+			case BCH_EXTENT_ENTRY_crc64:
-+				entry->crc64.offset += sub;
-+				break;
-+			case BCH_EXTENT_ENTRY_crc128:
-+				entry->crc128.offset += sub;
-+				break;
-+			case BCH_EXTENT_ENTRY_stripe_ptr:
-+				break;
-+			}
-+
-+			if (extent_entry_is_crc(entry))
-+				seen_crc = true;
-+		}
-+
-+		break;
-+	}
-+	case KEY_TYPE_reflink_p: {
-+		struct bkey_s_reflink_p p = bkey_s_to_reflink_p(k);
-+
-+		le64_add_cpu(&p.v->idx, sub);
-+		break;
-+	}
-+	case KEY_TYPE_inline_data: {
-+		struct bkey_s_inline_data d = bkey_s_to_inline_data(k);
-+
-+		sub = min_t(u64, sub << 9, bkey_val_bytes(d.k));
-+
-+		memmove(d.v->data,
-+			d.v->data + sub,
-+			bkey_val_bytes(d.k) - sub);
-+
-+		new_val_u64s -= sub >> 3;
-+		break;
-+	}
-+	}
-+
-+	val_u64s_delta = bkey_val_u64s(k.k) - new_val_u64s;
-+	BUG_ON(val_u64s_delta < 0);
-+
-+	set_bkey_val_u64s(k.k, new_val_u64s);
-+	memset(bkey_val_end(k), 0, val_u64s_delta * sizeof(u64));
-+	return -val_u64s_delta;
-+}
-+
-+int bch2_cut_back_s(struct bpos where, struct bkey_s k)
-+{
-+	unsigned new_val_u64s = bkey_val_u64s(k.k);
-+	int val_u64s_delta;
-+	u64 len = 0;
-+
-+	if (bkey_cmp(where, k.k->p) >= 0)
-+		return 0;
-+
-+	EBUG_ON(bkey_cmp(where, bkey_start_pos(k.k)) < 0);
-+
-+	len = where.offset - bkey_start_offset(k.k);
-+
-+	k.k->p = where;
-+	k.k->size = len;
-+
-+	if (!len) {
-+		k.k->type = KEY_TYPE_deleted;
-+		new_val_u64s = 0;
-+	}
-+
-+	switch (k.k->type) {
-+	case KEY_TYPE_inline_data:
-+		new_val_u64s = min(new_val_u64s, k.k->size << 6);
-+		break;
-+	}
-+
-+	val_u64s_delta = bkey_val_u64s(k.k) - new_val_u64s;
-+	BUG_ON(val_u64s_delta < 0);
-+
-+	set_bkey_val_u64s(k.k, new_val_u64s);
-+	memset(bkey_val_end(k), 0, val_u64s_delta * sizeof(u64));
-+	return -val_u64s_delta;
-+}
-diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h
-new file mode 100644
-index 000000000000..29b15365d19c
---- /dev/null
-+++ b/fs/bcachefs/extents.h
-@@ -0,0 +1,603 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_EXTENTS_H
-+#define _BCACHEFS_EXTENTS_H
-+
-+#include "bcachefs.h"
-+#include "bkey.h"
-+#include "extents_types.h"
-+
-+struct bch_fs;
-+struct btree_trans;
-+
-+/* extent entries: */
-+
-+#define extent_entry_last(_e)						\
-+	((typeof(&(_e).v->start[0])) bkey_val_end(_e))
-+
-+#define entry_to_ptr(_entry)						\
-+({									\
-+	EBUG_ON((_entry) && !extent_entry_is_ptr(_entry));		\
-+									\
-+	__builtin_choose_expr(						\
-+		type_is_exact(_entry, const union bch_extent_entry *),	\
-+		(const struct bch_extent_ptr *) (_entry),		\
-+		(struct bch_extent_ptr *) (_entry));			\
-+})
-+
-+/* downcast, preserves const */
-+#define to_entry(_entry)						\
-+({									\
-+	BUILD_BUG_ON(!type_is(_entry, union bch_extent_crc *) &&	\
-+		     !type_is(_entry, struct bch_extent_ptr *) &&	\
-+		     !type_is(_entry, struct bch_extent_stripe_ptr *));	\
-+									\
-+	__builtin_choose_expr(						\
-+		(type_is_exact(_entry, const union bch_extent_crc *) ||	\
-+		 type_is_exact(_entry, const struct bch_extent_ptr *) ||\
-+		 type_is_exact(_entry, const struct bch_extent_stripe_ptr *)),\
-+		(const union bch_extent_entry *) (_entry),		\
-+		(union bch_extent_entry *) (_entry));			\
-+})
-+
-+#define extent_entry_next(_entry)					\
-+	((typeof(_entry)) ((void *) (_entry) + extent_entry_bytes(_entry)))
-+
-+static inline unsigned
-+__extent_entry_type(const union bch_extent_entry *e)
-+{
-+	return e->type ? __ffs(e->type) : BCH_EXTENT_ENTRY_MAX;
-+}
-+
-+static inline enum bch_extent_entry_type
-+extent_entry_type(const union bch_extent_entry *e)
-+{
-+	int ret = __ffs(e->type);
-+
-+	EBUG_ON(ret < 0 || ret >= BCH_EXTENT_ENTRY_MAX);
-+
-+	return ret;
-+}
-+
-+static inline size_t extent_entry_bytes(const union bch_extent_entry *entry)
-+{
-+	switch (extent_entry_type(entry)) {
-+#define x(f, n)						\
-+	case BCH_EXTENT_ENTRY_##f:			\
-+		return sizeof(struct bch_extent_##f);
-+	BCH_EXTENT_ENTRY_TYPES()
-+#undef x
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static inline size_t extent_entry_u64s(const union bch_extent_entry *entry)
-+{
-+	return extent_entry_bytes(entry) / sizeof(u64);
-+}
-+
-+static inline bool extent_entry_is_ptr(const union bch_extent_entry *e)
-+{
-+	switch (extent_entry_type(e)) {
-+	case BCH_EXTENT_ENTRY_ptr:
-+		return true;
-+	default:
-+		return false;
-+	}
-+}
-+
-+static inline bool extent_entry_is_crc(const union bch_extent_entry *e)
-+{
-+	switch (extent_entry_type(e)) {
-+	case BCH_EXTENT_ENTRY_crc32:
-+	case BCH_EXTENT_ENTRY_crc64:
-+	case BCH_EXTENT_ENTRY_crc128:
-+		return true;
-+	default:
-+		return false;
-+	}
-+}
-+
-+union bch_extent_crc {
-+	u8				type;
-+	struct bch_extent_crc32		crc32;
-+	struct bch_extent_crc64		crc64;
-+	struct bch_extent_crc128	crc128;
-+};
-+
-+#define __entry_to_crc(_entry)						\
-+	__builtin_choose_expr(						\
-+		type_is_exact(_entry, const union bch_extent_entry *),	\
-+		(const union bch_extent_crc *) (_entry),		\
-+		(union bch_extent_crc *) (_entry))
-+
-+#define entry_to_crc(_entry)						\
-+({									\
-+	EBUG_ON((_entry) && !extent_entry_is_crc(_entry));		\
-+									\
-+	__entry_to_crc(_entry);						\
-+})
-+
-+static inline struct bch_extent_crc_unpacked
-+bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc)
-+{
-+#define common_fields(_crc)						\
-+		.csum_type		= _crc.csum_type,		\
-+		.compression_type	= _crc.compression_type,	\
-+		.compressed_size	= _crc._compressed_size + 1,	\
-+		.uncompressed_size	= _crc._uncompressed_size + 1,	\
-+		.offset			= _crc.offset,			\
-+		.live_size		= k->size
-+
-+	if (!crc)
-+		return (struct bch_extent_crc_unpacked) {
-+			.compressed_size	= k->size,
-+			.uncompressed_size	= k->size,
-+			.live_size		= k->size,
-+		};
-+
-+	switch (extent_entry_type(to_entry(crc))) {
-+	case BCH_EXTENT_ENTRY_crc32: {
-+		struct bch_extent_crc_unpacked ret = (struct bch_extent_crc_unpacked) {
-+			common_fields(crc->crc32),
-+		};
-+
-+		*((__le32 *) &ret.csum.lo) = crc->crc32.csum;
-+
-+		memcpy(&ret.csum.lo, &crc->crc32.csum,
-+		       sizeof(crc->crc32.csum));
-+
-+		return ret;
-+	}
-+	case BCH_EXTENT_ENTRY_crc64: {
-+		struct bch_extent_crc_unpacked ret = (struct bch_extent_crc_unpacked) {
-+			common_fields(crc->crc64),
-+			.nonce			= crc->crc64.nonce,
-+			.csum.lo		= (__force __le64) crc->crc64.csum_lo,
-+		};
-+
-+		*((__le16 *) &ret.csum.hi) = crc->crc64.csum_hi;
-+
-+		return ret;
-+	}
-+	case BCH_EXTENT_ENTRY_crc128: {
-+		struct bch_extent_crc_unpacked ret = (struct bch_extent_crc_unpacked) {
-+			common_fields(crc->crc128),
-+			.nonce			= crc->crc128.nonce,
-+			.csum			= crc->crc128.csum,
-+		};
-+
-+		return ret;
-+	}
-+	default:
-+		BUG();
-+	}
-+#undef common_fields
-+}
-+
-+static inline bool crc_is_compressed(struct bch_extent_crc_unpacked crc)
-+{
-+	return (crc.compression_type != BCH_COMPRESSION_TYPE_none &&
-+		crc.compression_type != BCH_COMPRESSION_TYPE_incompressible);
-+}
-+
-+/* bkey_ptrs: generically over any key type that has ptrs */
-+
-+struct bkey_ptrs_c {
-+	const union bch_extent_entry	*start;
-+	const union bch_extent_entry	*end;
-+};
-+
-+struct bkey_ptrs {
-+	union bch_extent_entry	*start;
-+	union bch_extent_entry	*end;
-+};
-+
-+static inline struct bkey_ptrs_c bch2_bkey_ptrs_c(struct bkey_s_c k)
-+{
-+	switch (k.k->type) {
-+	case KEY_TYPE_btree_ptr: {
-+		struct bkey_s_c_btree_ptr e = bkey_s_c_to_btree_ptr(k);
-+		return (struct bkey_ptrs_c) {
-+			to_entry(&e.v->start[0]),
-+			to_entry(extent_entry_last(e))
-+		};
-+	}
-+	case KEY_TYPE_extent: {
-+		struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
-+		return (struct bkey_ptrs_c) {
-+			e.v->start,
-+			extent_entry_last(e)
-+		};
-+	}
-+	case KEY_TYPE_stripe: {
-+		struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
-+		return (struct bkey_ptrs_c) {
-+			to_entry(&s.v->ptrs[0]),
-+			to_entry(&s.v->ptrs[s.v->nr_blocks]),
-+		};
-+	}
-+	case KEY_TYPE_reflink_v: {
-+		struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(k);
-+
-+		return (struct bkey_ptrs_c) {
-+			r.v->start,
-+			bkey_val_end(r),
-+		};
-+	}
-+	case KEY_TYPE_btree_ptr_v2: {
-+		struct bkey_s_c_btree_ptr_v2 e = bkey_s_c_to_btree_ptr_v2(k);
-+		return (struct bkey_ptrs_c) {
-+			to_entry(&e.v->start[0]),
-+			to_entry(extent_entry_last(e))
-+		};
-+	}
-+	default:
-+		return (struct bkey_ptrs_c) { NULL, NULL };
-+	}
-+}
-+
-+static inline struct bkey_ptrs bch2_bkey_ptrs(struct bkey_s k)
-+{
-+	struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k.s_c);
-+
-+	return (struct bkey_ptrs) {
-+		(void *) p.start,
-+		(void *) p.end
-+	};
-+}
-+
-+#define __bkey_extent_entry_for_each_from(_start, _end, _entry)		\
-+	for ((_entry) = (_start);					\
-+	     (_entry) < (_end);						\
-+	     (_entry) = extent_entry_next(_entry))
-+
-+#define __bkey_ptr_next(_ptr, _end)					\
-+({									\
-+	typeof(_end) _entry;						\
-+									\
-+	__bkey_extent_entry_for_each_from(to_entry(_ptr), _end, _entry)	\
-+		if (extent_entry_is_ptr(_entry))			\
-+			break;						\
-+									\
-+	_entry < (_end) ? entry_to_ptr(_entry) : NULL;			\
-+})
-+
-+#define bkey_extent_entry_for_each_from(_p, _entry, _start)		\
-+	__bkey_extent_entry_for_each_from(_start, (_p).end, _entry)
-+
-+#define bkey_extent_entry_for_each(_p, _entry)				\
-+	bkey_extent_entry_for_each_from(_p, _entry, _p.start)
-+
-+#define __bkey_for_each_ptr(_start, _end, _ptr)				\
-+	for ((_ptr) = (_start);						\
-+	     ((_ptr) = __bkey_ptr_next(_ptr, _end));			\
-+	     (_ptr)++)
-+
-+#define bkey_ptr_next(_p, _ptr)						\
-+	__bkey_ptr_next(_ptr, (_p).end)
-+
-+#define bkey_for_each_ptr(_p, _ptr)					\
-+	__bkey_for_each_ptr(&(_p).start->ptr, (_p).end, _ptr)
-+
-+#define __bkey_ptr_next_decode(_k, _end, _ptr, _entry)			\
-+({									\
-+	__label__ out;							\
-+									\
-+	(_ptr).idx	= 0;						\
-+	(_ptr).has_ec	= false;					\
-+									\
-+	__bkey_extent_entry_for_each_from(_entry, _end, _entry)		\
-+		switch (extent_entry_type(_entry)) {			\
-+		case BCH_EXTENT_ENTRY_ptr:				\
-+			(_ptr).ptr		= _entry->ptr;		\
-+			goto out;					\
-+		case BCH_EXTENT_ENTRY_crc32:				\
-+		case BCH_EXTENT_ENTRY_crc64:				\
-+		case BCH_EXTENT_ENTRY_crc128:				\
-+			(_ptr).crc = bch2_extent_crc_unpack(_k,		\
-+					entry_to_crc(_entry));		\
-+			break;						\
-+		case BCH_EXTENT_ENTRY_stripe_ptr:			\
-+			(_ptr).ec = _entry->stripe_ptr;			\
-+			(_ptr).has_ec	= true;				\
-+			break;						\
-+		}							\
-+out:									\
-+	_entry < (_end);						\
-+})
-+
-+#define __bkey_for_each_ptr_decode(_k, _start, _end, _ptr, _entry)	\
-+	for ((_ptr).crc = bch2_extent_crc_unpack(_k, NULL),		\
-+	     (_entry) = _start;						\
-+	     __bkey_ptr_next_decode(_k, _end, _ptr, _entry);		\
-+	     (_entry) = extent_entry_next(_entry))
-+
-+#define bkey_for_each_ptr_decode(_k, _p, _ptr, _entry)			\
-+	__bkey_for_each_ptr_decode(_k, (_p).start, (_p).end,		\
-+				   _ptr, _entry)
-+
-+#define bkey_crc_next(_k, _start, _end, _crc, _iter)			\
-+({									\
-+	__bkey_extent_entry_for_each_from(_iter, _end, _iter)		\
-+		if (extent_entry_is_crc(_iter)) {			\
-+			(_crc) = bch2_extent_crc_unpack(_k,		\
-+						entry_to_crc(_iter));	\
-+			break;						\
-+		}							\
-+									\
-+	(_iter) < (_end);						\
-+})
-+
-+#define __bkey_for_each_crc(_k, _start, _end, _crc, _iter)		\
-+	for ((_crc) = bch2_extent_crc_unpack(_k, NULL),			\
-+	     (_iter) = (_start);					\
-+	     bkey_crc_next(_k, _start, _end, _crc, _iter);		\
-+	     (_iter) = extent_entry_next(_iter))
-+
-+#define bkey_for_each_crc(_k, _p, _crc, _iter)				\
-+	__bkey_for_each_crc(_k, (_p).start, (_p).end, _crc, _iter)
-+
-+/* Iterate over pointers in KEY_TYPE_extent: */
-+
-+#define extent_for_each_entry_from(_e, _entry, _start)			\
-+	__bkey_extent_entry_for_each_from(_start,			\
-+				extent_entry_last(_e),_entry)
-+
-+#define extent_for_each_entry(_e, _entry)				\
-+	extent_for_each_entry_from(_e, _entry, (_e).v->start)
-+
-+#define extent_ptr_next(_e, _ptr)					\
-+	__bkey_ptr_next(_ptr, extent_entry_last(_e))
-+
-+#define extent_for_each_ptr(_e, _ptr)					\
-+	__bkey_for_each_ptr(&(_e).v->start->ptr, extent_entry_last(_e), _ptr)
-+
-+#define extent_for_each_ptr_decode(_e, _ptr, _entry)			\
-+	__bkey_for_each_ptr_decode((_e).k, (_e).v->start,		\
-+				   extent_entry_last(_e), _ptr, _entry)
-+
-+/* utility code common to all keys with pointers: */
-+
-+void bch2_mark_io_failure(struct bch_io_failures *,
-+			  struct extent_ptr_decoded *);
-+int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c,
-+			       struct bch_io_failures *,
-+			       struct extent_ptr_decoded *);
-+
-+/* KEY_TYPE_btree_ptr: */
-+
-+const char *bch2_btree_ptr_invalid(const struct bch_fs *, struct bkey_s_c);
-+void bch2_btree_ptr_debugcheck(struct bch_fs *, struct bkey_s_c);
-+void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *,
-+			    struct bkey_s_c);
-+
-+void bch2_btree_ptr_v2_to_text(struct printbuf *, struct bch_fs *,
-+			    struct bkey_s_c);
-+void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned,
-+			      int, struct bkey_s);
-+
-+#define bch2_bkey_ops_btree_ptr (struct bkey_ops) {		\
-+	.key_invalid	= bch2_btree_ptr_invalid,		\
-+	.key_debugcheck	= bch2_btree_ptr_debugcheck,		\
-+	.val_to_text	= bch2_btree_ptr_to_text,		\
-+	.swab		= bch2_ptr_swab,			\
-+}
-+
-+#define bch2_bkey_ops_btree_ptr_v2 (struct bkey_ops) {		\
-+	.key_invalid	= bch2_btree_ptr_invalid,		\
-+	.key_debugcheck	= bch2_btree_ptr_debugcheck,		\
-+	.val_to_text	= bch2_btree_ptr_v2_to_text,		\
-+	.swab		= bch2_ptr_swab,			\
-+	.compat		= bch2_btree_ptr_v2_compat,		\
-+}
-+
-+/* KEY_TYPE_extent: */
-+
-+const char *bch2_extent_invalid(const struct bch_fs *, struct bkey_s_c);
-+void bch2_extent_debugcheck(struct bch_fs *, struct bkey_s_c);
-+void bch2_extent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-+enum merge_result bch2_extent_merge(struct bch_fs *,
-+				    struct bkey_s, struct bkey_s);
-+
-+#define bch2_bkey_ops_extent (struct bkey_ops) {		\
-+	.key_invalid	= bch2_extent_invalid,			\
-+	.key_debugcheck	= bch2_extent_debugcheck,		\
-+	.val_to_text	= bch2_extent_to_text,			\
-+	.swab		= bch2_ptr_swab,			\
-+	.key_normalize	= bch2_extent_normalize,		\
-+	.key_merge	= bch2_extent_merge,			\
-+}
-+
-+/* KEY_TYPE_reservation: */
-+
-+const char *bch2_reservation_invalid(const struct bch_fs *, struct bkey_s_c);
-+void bch2_reservation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-+enum merge_result bch2_reservation_merge(struct bch_fs *,
-+					 struct bkey_s, struct bkey_s);
-+
-+#define bch2_bkey_ops_reservation (struct bkey_ops) {		\
-+	.key_invalid	= bch2_reservation_invalid,		\
-+	.val_to_text	= bch2_reservation_to_text,		\
-+	.key_merge	= bch2_reservation_merge,		\
-+}
-+
-+/* Extent checksum entries: */
-+
-+bool bch2_can_narrow_extent_crcs(struct bkey_s_c,
-+				 struct bch_extent_crc_unpacked);
-+bool bch2_bkey_narrow_crcs(struct bkey_i *, struct bch_extent_crc_unpacked);
-+void bch2_extent_crc_append(struct bkey_i *,
-+			    struct bch_extent_crc_unpacked);
-+
-+/* Generic code for keys with pointers: */
-+
-+static inline bool bkey_extent_is_direct_data(const struct bkey *k)
-+{
-+	switch (k->type) {
-+	case KEY_TYPE_btree_ptr:
-+	case KEY_TYPE_btree_ptr_v2:
-+	case KEY_TYPE_extent:
-+	case KEY_TYPE_reflink_v:
-+		return true;
-+	default:
-+		return false;
-+	}
-+}
-+
-+static inline bool bkey_extent_is_data(const struct bkey *k)
-+{
-+	return bkey_extent_is_direct_data(k) ||
-+		k->type == KEY_TYPE_inline_data ||
-+		k->type == KEY_TYPE_reflink_p;
-+}
-+
-+/*
-+ * Should extent be counted under inode->i_sectors?
-+ */
-+static inline bool bkey_extent_is_allocation(const struct bkey *k)
-+{
-+	switch (k->type) {
-+	case KEY_TYPE_extent:
-+	case KEY_TYPE_reservation:
-+	case KEY_TYPE_reflink_p:
-+	case KEY_TYPE_reflink_v:
-+	case KEY_TYPE_inline_data:
-+		return true;
-+	default:
-+		return false;
-+	}
-+}
-+
-+static inline struct bch_devs_list bch2_bkey_devs(struct bkey_s_c k)
-+{
-+	struct bch_devs_list ret = (struct bch_devs_list) { 0 };
-+	struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k);
-+	const struct bch_extent_ptr *ptr;
-+
-+	bkey_for_each_ptr(p, ptr)
-+		ret.devs[ret.nr++] = ptr->dev;
-+
-+	return ret;
-+}
-+
-+static inline struct bch_devs_list bch2_bkey_dirty_devs(struct bkey_s_c k)
-+{
-+	struct bch_devs_list ret = (struct bch_devs_list) { 0 };
-+	struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k);
-+	const struct bch_extent_ptr *ptr;
-+
-+	bkey_for_each_ptr(p, ptr)
-+		if (!ptr->cached)
-+			ret.devs[ret.nr++] = ptr->dev;
-+
-+	return ret;
-+}
-+
-+static inline struct bch_devs_list bch2_bkey_cached_devs(struct bkey_s_c k)
-+{
-+	struct bch_devs_list ret = (struct bch_devs_list) { 0 };
-+	struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k);
-+	const struct bch_extent_ptr *ptr;
-+
-+	bkey_for_each_ptr(p, ptr)
-+		if (ptr->cached)
-+			ret.devs[ret.nr++] = ptr->dev;
-+
-+	return ret;
-+}
-+
-+unsigned bch2_bkey_nr_ptrs(struct bkey_s_c);
-+unsigned bch2_bkey_nr_ptrs_allocated(struct bkey_s_c);
-+unsigned bch2_bkey_nr_ptrs_fully_allocated(struct bkey_s_c);
-+bool bch2_bkey_is_incompressible(struct bkey_s_c);
-+unsigned bch2_bkey_sectors_compressed(struct bkey_s_c);
-+bool bch2_check_range_allocated(struct bch_fs *, struct bpos, u64, unsigned);
-+unsigned bch2_bkey_durability(struct bch_fs *, struct bkey_s_c);
-+
-+void bch2_bkey_mark_replicas_cached(struct bch_fs *, struct bkey_s,
-+				    unsigned, unsigned);
-+
-+void bch2_bkey_append_ptr(struct bkey_i *, struct bch_extent_ptr);
-+void bch2_extent_ptr_decoded_append(struct bkey_i *,
-+				    struct extent_ptr_decoded *);
-+union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s,
-+					   struct bch_extent_ptr *);
-+
-+#define bch2_bkey_drop_ptrs(_k, _ptr, _cond)				\
-+do {									\
-+	struct bkey_ptrs _ptrs = bch2_bkey_ptrs(_k);			\
-+									\
-+	_ptr = &_ptrs.start->ptr;					\
-+									\
-+	while ((_ptr = bkey_ptr_next(_ptrs, _ptr))) {			\
-+		if (_cond) {						\
-+			_ptr = (void *) bch2_bkey_drop_ptr(_k, _ptr);	\
-+			_ptrs = bch2_bkey_ptrs(_k);			\
-+			continue;					\
-+		}							\
-+									\
-+		(_ptr)++;						\
-+	}								\
-+} while (0)
-+
-+void bch2_bkey_drop_device(struct bkey_s, unsigned);
-+const struct bch_extent_ptr *bch2_bkey_has_device(struct bkey_s_c, unsigned);
-+bool bch2_bkey_has_target(struct bch_fs *, struct bkey_s_c, unsigned);
-+
-+bool bch2_bkey_matches_ptr(struct bch_fs *, struct bkey_s_c,
-+			   struct bch_extent_ptr, u64);
-+
-+bool bch2_extent_normalize(struct bch_fs *, struct bkey_s);
-+void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *,
-+			    struct bkey_s_c);
-+const char *bch2_bkey_ptrs_invalid(const struct bch_fs *, struct bkey_s_c);
-+
-+void bch2_ptr_swab(struct bkey_s);
-+
-+/* Generic extent code: */
-+
-+int bch2_cut_front_s(struct bpos, struct bkey_s);
-+int bch2_cut_back_s(struct bpos, struct bkey_s);
-+
-+static inline void bch2_cut_front(struct bpos where, struct bkey_i *k)
-+{
-+	bch2_cut_front_s(where, bkey_i_to_s(k));
-+}
-+
-+static inline void bch2_cut_back(struct bpos where, struct bkey_i *k)
-+{
-+	bch2_cut_back_s(where, bkey_i_to_s(k));
-+}
-+
-+/**
-+ * bch_key_resize - adjust size of @k
-+ *
-+ * bkey_start_offset(k) will be preserved, modifies where the extent ends
-+ */
-+static inline void bch2_key_resize(struct bkey *k, unsigned new_size)
-+{
-+	k->p.offset -= k->size;
-+	k->p.offset += new_size;
-+	k->size = new_size;
-+}
-+
-+/*
-+ * In extent_sort_fix_overlapping(), insert_fixup_extent(),
-+ * extent_merge_inline() - we're modifying keys in place that are packed. To do
-+ * that we have to unpack the key, modify the unpacked key - then this
-+ * copies/repacks the unpacked to the original as necessary.
-+ */
-+static inline void extent_save(struct btree *b, struct bkey_packed *dst,
-+			       struct bkey *src)
-+{
-+	struct bkey_format *f = &b->format;
-+	struct bkey_i *dst_unpacked;
-+
-+	if ((dst_unpacked = packed_to_bkey(dst)))
-+		dst_unpacked->k = *src;
-+	else
-+		BUG_ON(!bch2_bkey_pack_key(dst, src, f));
-+}
-+
-+#endif /* _BCACHEFS_EXTENTS_H */
-diff --git a/fs/bcachefs/extents_types.h b/fs/bcachefs/extents_types.h
-new file mode 100644
-index 000000000000..43d6c341ecca
---- /dev/null
-+++ b/fs/bcachefs/extents_types.h
-@@ -0,0 +1,40 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_EXTENTS_TYPES_H
-+#define _BCACHEFS_EXTENTS_TYPES_H
-+
-+#include "bcachefs_format.h"
-+
-+struct bch_extent_crc_unpacked {
-+	u32			compressed_size;
-+	u32			uncompressed_size;
-+	u32			live_size;
-+
-+	u8			csum_type;
-+	u8			compression_type;
-+
-+	u16			offset;
-+
-+	u16			nonce;
-+
-+	struct bch_csum		csum;
-+};
-+
-+struct extent_ptr_decoded {
-+	unsigned			idx;
-+	bool				has_ec;
-+	struct bch_extent_crc_unpacked	crc;
-+	struct bch_extent_ptr		ptr;
-+	struct bch_extent_stripe_ptr	ec;
-+};
-+
-+struct bch_io_failures {
-+	u8			nr;
-+	struct bch_dev_io_failures {
-+		u8		dev;
-+		u8		idx;
-+		u8		nr_failed;
-+		u8		nr_retries;
-+	}			devs[BCH_REPLICAS_MAX];
-+};
-+
-+#endif /* _BCACHEFS_EXTENTS_TYPES_H */
-diff --git a/fs/bcachefs/eytzinger.h b/fs/bcachefs/eytzinger.h
-new file mode 100644
-index 000000000000..26d5cad7e6a5
---- /dev/null
-+++ b/fs/bcachefs/eytzinger.h
-@@ -0,0 +1,285 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _EYTZINGER_H
-+#define _EYTZINGER_H
-+
-+#include <linux/bitops.h>
-+#include <linux/log2.h>
-+
-+#include "util.h"
-+
-+/*
-+ * Traversal for trees in eytzinger layout - a full binary tree layed out in an
-+ * array
-+ */
-+
-+/*
-+ * One based indexing version:
-+ *
-+ * With one based indexing each level of the tree starts at a power of two -
-+ * good for cacheline alignment:
-+ *
-+ * Size parameter is treated as if we were using 0 based indexing, however:
-+ * valid nodes, and inorder indices, are in the range [1..size) - that is, there
-+ * are actually size - 1 elements
-+ */
-+
-+static inline unsigned eytzinger1_child(unsigned i, unsigned child)
-+{
-+	EBUG_ON(child > 1);
-+
-+	return (i << 1) + child;
-+}
-+
-+static inline unsigned eytzinger1_left_child(unsigned i)
-+{
-+	return eytzinger1_child(i, 0);
-+}
-+
-+static inline unsigned eytzinger1_right_child(unsigned i)
-+{
-+	return eytzinger1_child(i, 1);
-+}
-+
-+static inline unsigned eytzinger1_first(unsigned size)
-+{
-+	return rounddown_pow_of_two(size - 1);
-+}
-+
-+static inline unsigned eytzinger1_last(unsigned size)
-+{
-+	return rounddown_pow_of_two(size) - 1;
-+}
-+
-+/*
-+ * eytzinger1_next() and eytzinger1_prev() have the nice properties that
-+ *
-+ * eytzinger1_next(0) == eytzinger1_first())
-+ * eytzinger1_prev(0) == eytzinger1_last())
-+ *
-+ * eytzinger1_prev(eytzinger1_first()) == 0
-+ * eytzinger1_next(eytzinger1_last()) == 0
-+ */
-+
-+static inline unsigned eytzinger1_next(unsigned i, unsigned size)
-+{
-+	EBUG_ON(i >= size);
-+
-+	if (eytzinger1_right_child(i) < size) {
-+		i = eytzinger1_right_child(i);
-+
-+		i <<= __fls(size) - __fls(i);
-+		i >>= i >= size;
-+	} else {
-+		i >>= ffz(i) + 1;
-+	}
-+
-+	return i;
-+}
-+
-+static inline unsigned eytzinger1_prev(unsigned i, unsigned size)
-+{
-+	EBUG_ON(i >= size);
-+
-+	if (eytzinger1_left_child(i) < size) {
-+		i = eytzinger1_left_child(i) + 1;
-+
-+		i <<= __fls(size) - __fls(i);
-+		i -= 1;
-+		i >>= i >= size;
-+	} else {
-+		i >>= __ffs(i) + 1;
-+	}
-+
-+	return i;
-+}
-+
-+static inline unsigned eytzinger1_extra(unsigned size)
-+{
-+	return (size - rounddown_pow_of_two(size - 1)) << 1;
-+}
-+
-+static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size,
-+					      unsigned extra)
-+{
-+	unsigned b = __fls(i);
-+	unsigned shift = __fls(size - 1) - b;
-+	int s;
-+
-+	EBUG_ON(!i || i >= size);
-+
-+	i  ^= 1U << b;
-+	i <<= 1;
-+	i  |= 1;
-+	i <<= shift;
-+
-+	/*
-+	 * sign bit trick:
-+	 *
-+	 * if (i > extra)
-+	 *	i -= (i - extra) >> 1;
-+	 */
-+	s = extra - i;
-+	i += (s >> 1) & (s >> 31);
-+
-+	return i;
-+}
-+
-+static inline unsigned __inorder_to_eytzinger1(unsigned i, unsigned size,
-+					       unsigned extra)
-+{
-+	unsigned shift;
-+	int s;
-+
-+	EBUG_ON(!i || i >= size);
-+
-+	/*
-+	 * sign bit trick:
-+	 *
-+	 * if (i > extra)
-+	 *	i += i - extra;
-+	 */
-+	s = extra - i;
-+	i -= s & (s >> 31);
-+
-+	shift = __ffs(i);
-+
-+	i >>= shift + 1;
-+	i  |= 1U << (__fls(size - 1) - shift);
-+
-+	return i;
-+}
-+
-+static inline unsigned eytzinger1_to_inorder(unsigned i, unsigned size)
-+{
-+	return __eytzinger1_to_inorder(i, size, eytzinger1_extra(size));
-+}
-+
-+static inline unsigned inorder_to_eytzinger1(unsigned i, unsigned size)
-+{
-+	return __inorder_to_eytzinger1(i, size, eytzinger1_extra(size));
-+}
-+
-+#define eytzinger1_for_each(_i, _size)			\
-+	for ((_i) = eytzinger1_first((_size));		\
-+	     (_i) != 0;					\
-+	     (_i) = eytzinger1_next((_i), (_size)))
-+
-+/* Zero based indexing version: */
-+
-+static inline unsigned eytzinger0_child(unsigned i, unsigned child)
-+{
-+	EBUG_ON(child > 1);
-+
-+	return (i << 1) + 1 + child;
-+}
-+
-+static inline unsigned eytzinger0_left_child(unsigned i)
-+{
-+	return eytzinger0_child(i, 0);
-+}
-+
-+static inline unsigned eytzinger0_right_child(unsigned i)
-+{
-+	return eytzinger0_child(i, 1);
-+}
-+
-+static inline unsigned eytzinger0_first(unsigned size)
-+{
-+	return eytzinger1_first(size + 1) - 1;
-+}
-+
-+static inline unsigned eytzinger0_last(unsigned size)
-+{
-+	return eytzinger1_last(size + 1) - 1;
-+}
-+
-+static inline unsigned eytzinger0_next(unsigned i, unsigned size)
-+{
-+	return eytzinger1_next(i + 1, size + 1) - 1;
-+}
-+
-+static inline unsigned eytzinger0_prev(unsigned i, unsigned size)
-+{
-+	return eytzinger1_prev(i + 1, size + 1) - 1;
-+}
-+
-+static inline unsigned eytzinger0_extra(unsigned size)
-+{
-+	return eytzinger1_extra(size + 1);
-+}
-+
-+static inline unsigned __eytzinger0_to_inorder(unsigned i, unsigned size,
-+					       unsigned extra)
-+{
-+	return __eytzinger1_to_inorder(i + 1, size + 1, extra) - 1;
-+}
-+
-+static inline unsigned __inorder_to_eytzinger0(unsigned i, unsigned size,
-+					       unsigned extra)
-+{
-+	return __inorder_to_eytzinger1(i + 1, size + 1, extra) - 1;
-+}
-+
-+static inline unsigned eytzinger0_to_inorder(unsigned i, unsigned size)
-+{
-+	return __eytzinger0_to_inorder(i, size, eytzinger0_extra(size));
-+}
-+
-+static inline unsigned inorder_to_eytzinger0(unsigned i, unsigned size)
-+{
-+	return __inorder_to_eytzinger0(i, size, eytzinger0_extra(size));
-+}
-+
-+#define eytzinger0_for_each(_i, _size)			\
-+	for ((_i) = eytzinger0_first((_size));		\
-+	     (_i) != -1;				\
-+	     (_i) = eytzinger0_next((_i), (_size)))
-+
-+typedef int (*eytzinger_cmp_fn)(const void *l, const void *r, size_t size);
-+
-+/* return greatest node <= @search, or -1 if not found */
-+static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size,
-+					 eytzinger_cmp_fn cmp, const void *search)
-+{
-+	unsigned i, n = 0;
-+
-+	if (!nr)
-+		return -1;
-+
-+	do {
-+		i = n;
-+		n = eytzinger0_child(i, cmp(search, base + i * size, size) >= 0);
-+	} while (n < nr);
-+
-+	if (n & 1) {
-+		/* @i was greater than @search, return previous node: */
-+
-+		if (i == eytzinger0_first(nr))
-+			return -1;
-+
-+		return eytzinger0_prev(i, nr);
-+	} else {
-+		return i;
-+	}
-+}
-+
-+#define eytzinger0_find(base, nr, size, _cmp, search)			\
-+({									\
-+	void *_base	= (base);					\
-+	void *_search	= (search);					\
-+	size_t _nr	= (nr);						\
-+	size_t _size	= (size);					\
-+	size_t _i	= 0;						\
-+	int _res;							\
-+									\
-+	while (_i < _nr &&						\
-+	       (_res = _cmp(_search, _base + _i * _size, _size)))	\
-+		_i = eytzinger0_child(_i, _res > 0);			\
-+	_i;								\
-+})
-+
-+void eytzinger0_sort(void *, size_t, size_t,
-+		    int (*cmp_func)(const void *, const void *, size_t),
-+		    void (*swap_func)(void *, void *, size_t));
-+
-+#endif /* _EYTZINGER_H */
-diff --git a/fs/bcachefs/fifo.h b/fs/bcachefs/fifo.h
-new file mode 100644
-index 000000000000..cdb272708a4b
---- /dev/null
-+++ b/fs/bcachefs/fifo.h
-@@ -0,0 +1,127 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_FIFO_H
-+#define _BCACHEFS_FIFO_H
-+
-+#include "util.h"
-+
-+#define FIFO(type)							\
-+struct {								\
-+	size_t front, back, size, mask;					\
-+	type *data;							\
-+}
-+
-+#define DECLARE_FIFO(type, name)	FIFO(type) name
-+
-+#define fifo_buf_size(fifo)						\
-+	((fifo)->size							\
-+	 ? roundup_pow_of_two((fifo)->size) * sizeof((fifo)->data[0])	\
-+	 : 0)
-+
-+#define init_fifo(fifo, _size, _gfp)					\
-+({									\
-+	(fifo)->front	= (fifo)->back = 0;				\
-+	(fifo)->size	= (_size);					\
-+	(fifo)->mask	= (fifo)->size					\
-+		? roundup_pow_of_two((fifo)->size) - 1			\
-+		: 0;							\
-+	(fifo)->data	= kvpmalloc(fifo_buf_size(fifo), (_gfp));	\
-+})
-+
-+#define free_fifo(fifo)							\
-+do {									\
-+	kvpfree((fifo)->data, fifo_buf_size(fifo));			\
-+	(fifo)->data = NULL;						\
-+} while (0)
-+
-+#define fifo_swap(l, r)							\
-+do {									\
-+	swap((l)->front, (r)->front);					\
-+	swap((l)->back, (r)->back);					\
-+	swap((l)->size, (r)->size);					\
-+	swap((l)->mask, (r)->mask);					\
-+	swap((l)->data, (r)->data);					\
-+} while (0)
-+
-+#define fifo_move(dest, src)						\
-+do {									\
-+	typeof(*((dest)->data)) _t;					\
-+	while (!fifo_full(dest) &&					\
-+	       fifo_pop(src, _t))					\
-+		fifo_push(dest, _t);					\
-+} while (0)
-+
-+#define fifo_used(fifo)		(((fifo)->back - (fifo)->front))
-+#define fifo_free(fifo)		((fifo)->size - fifo_used(fifo))
-+
-+#define fifo_empty(fifo)	((fifo)->front == (fifo)->back)
-+#define fifo_full(fifo)		(fifo_used(fifo) == (fifo)->size)
-+
-+#define fifo_peek_front(fifo)	((fifo)->data[(fifo)->front & (fifo)->mask])
-+#define fifo_peek_back(fifo)	((fifo)->data[((fifo)->back - 1) & (fifo)->mask])
-+
-+#define fifo_entry_idx_abs(fifo, p)					\
-+	((((p) >= &fifo_peek_front(fifo)				\
-+	   ? (fifo)->front : (fifo)->back) & ~(fifo)->mask) +		\
-+	   (((p) - (fifo)->data)))
-+
-+#define fifo_entry_idx(fifo, p)	(((p) - &fifo_peek_front(fifo)) & (fifo)->mask)
-+#define fifo_idx_entry(fifo, i)	(fifo)->data[((fifo)->front + (i)) & (fifo)->mask]
-+
-+#define fifo_push_back_ref(f)						\
-+	(fifo_full((f)) ? NULL : &(f)->data[(f)->back++ & (f)->mask])
-+
-+#define fifo_push_front_ref(f)						\
-+	(fifo_full((f)) ? NULL : &(f)->data[--(f)->front & (f)->mask])
-+
-+#define fifo_push_back(fifo, new)					\
-+({									\
-+	typeof((fifo)->data) _r = fifo_push_back_ref(fifo);		\
-+	if (_r)								\
-+		*_r = (new);						\
-+	_r != NULL;							\
-+})
-+
-+#define fifo_push_front(fifo, new)					\
-+({									\
-+	typeof((fifo)->data) _r = fifo_push_front_ref(fifo);		\
-+	if (_r)								\
-+		*_r = (new);						\
-+	_r != NULL;							\
-+})
-+
-+#define fifo_pop_front(fifo, i)						\
-+({									\
-+	bool _r = !fifo_empty((fifo));					\
-+	if (_r)								\
-+		(i) = (fifo)->data[(fifo)->front++ & (fifo)->mask];	\
-+	_r;								\
-+})
-+
-+#define fifo_pop_back(fifo, i)						\
-+({									\
-+	bool _r = !fifo_empty((fifo));					\
-+	if (_r)								\
-+		(i) = (fifo)->data[--(fifo)->back & (fifo)->mask];	\
-+	_r;								\
-+})
-+
-+#define fifo_push_ref(fifo)	fifo_push_back_ref(fifo)
-+#define fifo_push(fifo, i)	fifo_push_back(fifo, (i))
-+#define fifo_pop(fifo, i)	fifo_pop_front(fifo, (i))
-+#define fifo_peek(fifo)		fifo_peek_front(fifo)
-+
-+#define fifo_for_each_entry(_entry, _fifo, _iter)			\
-+	for (typecheck(typeof((_fifo)->front), _iter),			\
-+	     (_iter) = (_fifo)->front;					\
-+	     ((_iter != (_fifo)->back) &&				\
-+	      (_entry = (_fifo)->data[(_iter) & (_fifo)->mask], true));	\
-+	     (_iter)++)
-+
-+#define fifo_for_each_entry_ptr(_ptr, _fifo, _iter)			\
-+	for (typecheck(typeof((_fifo)->front), _iter),			\
-+	     (_iter) = (_fifo)->front;					\
-+	     ((_iter != (_fifo)->back) &&				\
-+	      (_ptr = &(_fifo)->data[(_iter) & (_fifo)->mask], true));	\
-+	     (_iter)++)
-+
-+#endif /* _BCACHEFS_FIFO_H */
-diff --git a/fs/bcachefs/fs-common.c b/fs/bcachefs/fs-common.c
-new file mode 100644
-index 000000000000..878419d40992
---- /dev/null
-+++ b/fs/bcachefs/fs-common.c
-@@ -0,0 +1,317 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "acl.h"
-+#include "btree_update.h"
-+#include "dirent.h"
-+#include "fs-common.h"
-+#include "inode.h"
-+#include "xattr.h"
-+
-+#include <linux/posix_acl.h>
-+
-+int bch2_create_trans(struct btree_trans *trans, u64 dir_inum,
-+		      struct bch_inode_unpacked *dir_u,
-+		      struct bch_inode_unpacked *new_inode,
-+		      const struct qstr *name,
-+		      uid_t uid, gid_t gid, umode_t mode, dev_t rdev,
-+		      struct posix_acl *default_acl,
-+		      struct posix_acl *acl)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct btree_iter *dir_iter = NULL;
-+	struct bch_hash_info hash = bch2_hash_info_init(c, new_inode);
-+	u64 now = bch2_current_time(trans->c);
-+	int ret;
-+
-+	dir_iter = bch2_inode_peek(trans, dir_u, dir_inum, BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(dir_iter);
-+	if (ret)
-+		goto err;
-+
-+	bch2_inode_init_late(new_inode, now, uid, gid, mode, rdev, dir_u);
-+
-+	if (!name)
-+		new_inode->bi_flags |= BCH_INODE_UNLINKED;
-+
-+	ret = bch2_inode_create(trans, new_inode,
-+				BLOCKDEV_INODE_MAX, 0,
-+				&c->unused_inode_hint);
-+	if (ret)
-+		goto err;
-+
-+	if (default_acl) {
-+		ret = bch2_set_acl_trans(trans, new_inode, &hash,
-+					 default_acl, ACL_TYPE_DEFAULT);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	if (acl) {
-+		ret = bch2_set_acl_trans(trans, new_inode, &hash,
-+					 acl, ACL_TYPE_ACCESS);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	if (name) {
-+		struct bch_hash_info dir_hash = bch2_hash_info_init(c, dir_u);
-+		dir_u->bi_mtime = dir_u->bi_ctime = now;
-+
-+		if (S_ISDIR(new_inode->bi_mode))
-+			dir_u->bi_nlink++;
-+
-+		ret = bch2_inode_write(trans, dir_iter, dir_u);
-+		if (ret)
-+			goto err;
-+
-+		ret = bch2_dirent_create(trans, dir_inum, &dir_hash,
-+					 mode_to_type(new_inode->bi_mode),
-+					 name, new_inode->bi_inum,
-+					 BCH_HASH_SET_MUST_CREATE);
-+		if (ret)
-+			goto err;
-+	}
-+err:
-+	bch2_trans_iter_put(trans, dir_iter);
-+	return ret;
-+}
-+
-+int bch2_link_trans(struct btree_trans *trans, u64 dir_inum,
-+		    u64 inum, struct bch_inode_unpacked *dir_u,
-+		    struct bch_inode_unpacked *inode_u, const struct qstr *name)
-+{
-+	struct btree_iter *dir_iter = NULL, *inode_iter = NULL;
-+	struct bch_hash_info dir_hash;
-+	u64 now = bch2_current_time(trans->c);
-+	int ret;
-+
-+	inode_iter = bch2_inode_peek(trans, inode_u, inum, BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(inode_iter);
-+	if (ret)
-+		goto err;
-+
-+	inode_u->bi_ctime = now;
-+	bch2_inode_nlink_inc(inode_u);
-+
-+	dir_iter = bch2_inode_peek(trans, dir_u, dir_inum, 0);
-+	ret = PTR_ERR_OR_ZERO(dir_iter);
-+	if (ret)
-+		goto err;
-+
-+	dir_u->bi_mtime = dir_u->bi_ctime = now;
-+
-+	dir_hash = bch2_hash_info_init(trans->c, dir_u);
-+
-+	ret =   bch2_dirent_create(trans, dir_inum, &dir_hash,
-+				  mode_to_type(inode_u->bi_mode),
-+				  name, inum, BCH_HASH_SET_MUST_CREATE) ?:
-+		bch2_inode_write(trans, dir_iter, dir_u) ?:
-+		bch2_inode_write(trans, inode_iter, inode_u);
-+err:
-+	bch2_trans_iter_put(trans, dir_iter);
-+	bch2_trans_iter_put(trans, inode_iter);
-+	return ret;
-+}
-+
-+int bch2_unlink_trans(struct btree_trans *trans,
-+		      u64 dir_inum, struct bch_inode_unpacked *dir_u,
-+		      struct bch_inode_unpacked *inode_u,
-+		      const struct qstr *name)
-+{
-+	struct btree_iter *dir_iter = NULL, *dirent_iter = NULL,
-+			  *inode_iter = NULL;
-+	struct bch_hash_info dir_hash;
-+	u64 inum, now = bch2_current_time(trans->c);
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	dir_iter = bch2_inode_peek(trans, dir_u, dir_inum, BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(dir_iter);
-+	if (ret)
-+		goto err;
-+
-+	dir_hash = bch2_hash_info_init(trans->c, dir_u);
-+
-+	dirent_iter = __bch2_dirent_lookup_trans(trans, dir_inum, &dir_hash,
-+						 name, BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(dirent_iter);
-+	if (ret)
-+		goto err;
-+
-+	k = bch2_btree_iter_peek_slot(dirent_iter);
-+	inum = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum);
-+
-+	inode_iter = bch2_inode_peek(trans, inode_u, inum, BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(inode_iter);
-+	if (ret)
-+		goto err;
-+
-+	dir_u->bi_mtime = dir_u->bi_ctime = inode_u->bi_ctime = now;
-+	dir_u->bi_nlink -= S_ISDIR(inode_u->bi_mode);
-+	bch2_inode_nlink_dec(inode_u);
-+
-+	ret =   (S_ISDIR(inode_u->bi_mode)
-+		 ? bch2_empty_dir_trans(trans, inum)
-+		 : 0) ?:
-+		bch2_dirent_delete_at(trans, &dir_hash, dirent_iter) ?:
-+		bch2_inode_write(trans, dir_iter, dir_u) ?:
-+		bch2_inode_write(trans, inode_iter, inode_u);
-+err:
-+	bch2_trans_iter_put(trans, inode_iter);
-+	bch2_trans_iter_put(trans, dirent_iter);
-+	bch2_trans_iter_put(trans, dir_iter);
-+	return ret;
-+}
-+
-+bool bch2_reinherit_attrs(struct bch_inode_unpacked *dst_u,
-+			  struct bch_inode_unpacked *src_u)
-+{
-+	u64 src, dst;
-+	unsigned id;
-+	bool ret = false;
-+
-+	for (id = 0; id < Inode_opt_nr; id++) {
-+		if (dst_u->bi_fields_set & (1 << id))
-+			continue;
-+
-+		src = bch2_inode_opt_get(src_u, id);
-+		dst = bch2_inode_opt_get(dst_u, id);
-+
-+		if (src == dst)
-+			continue;
-+
-+		bch2_inode_opt_set(dst_u, id, src);
-+		ret = true;
-+	}
-+
-+	return ret;
-+}
-+
-+int bch2_rename_trans(struct btree_trans *trans,
-+		      u64 src_dir, struct bch_inode_unpacked *src_dir_u,
-+		      u64 dst_dir, struct bch_inode_unpacked *dst_dir_u,
-+		      struct bch_inode_unpacked *src_inode_u,
-+		      struct bch_inode_unpacked *dst_inode_u,
-+		      const struct qstr *src_name,
-+		      const struct qstr *dst_name,
-+		      enum bch_rename_mode mode)
-+{
-+	struct btree_iter *src_dir_iter = NULL, *dst_dir_iter = NULL;
-+	struct btree_iter *src_inode_iter = NULL, *dst_inode_iter = NULL;
-+	struct bch_hash_info src_hash, dst_hash;
-+	u64 src_inode, dst_inode, now = bch2_current_time(trans->c);
-+	int ret;
-+
-+	src_dir_iter = bch2_inode_peek(trans, src_dir_u, src_dir,
-+				       BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(src_dir_iter);
-+	if (ret)
-+		goto err;
-+
-+	src_hash = bch2_hash_info_init(trans->c, src_dir_u);
-+
-+	if (dst_dir != src_dir) {
-+		dst_dir_iter = bch2_inode_peek(trans, dst_dir_u, dst_dir,
-+					       BTREE_ITER_INTENT);
-+		ret = PTR_ERR_OR_ZERO(dst_dir_iter);
-+		if (ret)
-+			goto err;
-+
-+		dst_hash = bch2_hash_info_init(trans->c, dst_dir_u);
-+	} else {
-+		dst_dir_u = src_dir_u;
-+		dst_hash = src_hash;
-+	}
-+
-+	ret = bch2_dirent_rename(trans,
-+				 src_dir, &src_hash,
-+				 dst_dir, &dst_hash,
-+				 src_name, &src_inode,
-+				 dst_name, &dst_inode,
-+				 mode);
-+	if (ret)
-+		goto err;
-+
-+	src_inode_iter = bch2_inode_peek(trans, src_inode_u, src_inode,
-+					 BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(src_inode_iter);
-+	if (ret)
-+		goto err;
-+
-+	if (dst_inode) {
-+		dst_inode_iter = bch2_inode_peek(trans, dst_inode_u, dst_inode,
-+						 BTREE_ITER_INTENT);
-+		ret = PTR_ERR_OR_ZERO(dst_inode_iter);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	if (mode == BCH_RENAME_OVERWRITE) {
-+		if (S_ISDIR(src_inode_u->bi_mode) !=
-+		    S_ISDIR(dst_inode_u->bi_mode)) {
-+			ret = -ENOTDIR;
-+			goto err;
-+		}
-+
-+		if (S_ISDIR(dst_inode_u->bi_mode) &&
-+		    bch2_empty_dir_trans(trans, dst_inode)) {
-+			ret = -ENOTEMPTY;
-+			goto err;
-+		}
-+	}
-+
-+	if (bch2_reinherit_attrs(src_inode_u, dst_dir_u) &&
-+	    S_ISDIR(src_inode_u->bi_mode)) {
-+		ret = -EXDEV;
-+		goto err;
-+	}
-+
-+	if (mode == BCH_RENAME_EXCHANGE &&
-+	    bch2_reinherit_attrs(dst_inode_u, src_dir_u) &&
-+	    S_ISDIR(dst_inode_u->bi_mode)) {
-+		ret = -EXDEV;
-+		goto err;
-+	}
-+
-+	if (S_ISDIR(src_inode_u->bi_mode)) {
-+		src_dir_u->bi_nlink--;
-+		dst_dir_u->bi_nlink++;
-+	}
-+
-+	if (dst_inode && S_ISDIR(dst_inode_u->bi_mode)) {
-+		dst_dir_u->bi_nlink--;
-+		src_dir_u->bi_nlink += mode == BCH_RENAME_EXCHANGE;
-+	}
-+
-+	if (mode == BCH_RENAME_OVERWRITE)
-+		bch2_inode_nlink_dec(dst_inode_u);
-+
-+	src_dir_u->bi_mtime		= now;
-+	src_dir_u->bi_ctime		= now;
-+
-+	if (src_dir != dst_dir) {
-+		dst_dir_u->bi_mtime	= now;
-+		dst_dir_u->bi_ctime	= now;
-+	}
-+
-+	src_inode_u->bi_ctime		= now;
-+
-+	if (dst_inode)
-+		dst_inode_u->bi_ctime	= now;
-+
-+	ret =   bch2_inode_write(trans, src_dir_iter, src_dir_u) ?:
-+		(src_dir != dst_dir
-+		 ? bch2_inode_write(trans, dst_dir_iter, dst_dir_u)
-+		 : 0 ) ?:
-+		bch2_inode_write(trans, src_inode_iter, src_inode_u) ?:
-+		(dst_inode
-+		 ? bch2_inode_write(trans, dst_inode_iter, dst_inode_u)
-+		 : 0 );
-+err:
-+	bch2_trans_iter_put(trans, dst_inode_iter);
-+	bch2_trans_iter_put(trans, src_inode_iter);
-+	bch2_trans_iter_put(trans, dst_dir_iter);
-+	bch2_trans_iter_put(trans, src_dir_iter);
-+	return ret;
-+}
-diff --git a/fs/bcachefs/fs-common.h b/fs/bcachefs/fs-common.h
-new file mode 100644
-index 000000000000..2273b7961c9b
---- /dev/null
-+++ b/fs/bcachefs/fs-common.h
-@@ -0,0 +1,37 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_FS_COMMON_H
-+#define _BCACHEFS_FS_COMMON_H
-+
-+struct posix_acl;
-+
-+int bch2_create_trans(struct btree_trans *, u64,
-+		      struct bch_inode_unpacked *,
-+		      struct bch_inode_unpacked *,
-+		      const struct qstr *,
-+		      uid_t, gid_t, umode_t, dev_t,
-+		      struct posix_acl *,
-+		      struct posix_acl *);
-+
-+int bch2_link_trans(struct btree_trans *, u64,
-+		    u64, struct bch_inode_unpacked *,
-+		    struct bch_inode_unpacked *,
-+		    const struct qstr *);
-+
-+int bch2_unlink_trans(struct btree_trans *,
-+		      u64, struct bch_inode_unpacked *,
-+		      struct bch_inode_unpacked *,
-+		      const struct qstr *);
-+
-+int bch2_rename_trans(struct btree_trans *,
-+		      u64, struct bch_inode_unpacked *,
-+		      u64, struct bch_inode_unpacked *,
-+		      struct bch_inode_unpacked *,
-+		      struct bch_inode_unpacked *,
-+		      const struct qstr *,
-+		      const struct qstr *,
-+		      enum bch_rename_mode);
-+
-+bool bch2_reinherit_attrs(struct bch_inode_unpacked *,
-+			  struct bch_inode_unpacked *);
-+
-+#endif /* _BCACHEFS_FS_COMMON_H */
-diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c
-new file mode 100644
-index 000000000000..ec78e7b52375
---- /dev/null
-+++ b/fs/bcachefs/fs-io.c
-@@ -0,0 +1,3132 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#ifndef NO_BCACHEFS_FS
-+
-+#include "bcachefs.h"
-+#include "alloc_foreground.h"
-+#include "bkey_on_stack.h"
-+#include "btree_update.h"
-+#include "buckets.h"
-+#include "clock.h"
-+#include "error.h"
-+#include "extents.h"
-+#include "extent_update.h"
-+#include "fs.h"
-+#include "fs-io.h"
-+#include "fsck.h"
-+#include "inode.h"
-+#include "journal.h"
-+#include "io.h"
-+#include "keylist.h"
-+#include "quota.h"
-+#include "reflink.h"
-+
-+#include <linux/aio.h>
-+#include <linux/backing-dev.h>
-+#include <linux/falloc.h>
-+#include <linux/migrate.h>
-+#include <linux/mmu_context.h>
-+#include <linux/pagevec.h>
-+#include <linux/sched/signal.h>
-+#include <linux/task_io_accounting_ops.h>
-+#include <linux/uio.h>
-+#include <linux/writeback.h>
-+
-+#include <trace/events/bcachefs.h>
-+#include <trace/events/writeback.h>
-+
-+struct quota_res {
-+	u64				sectors;
-+};
-+
-+struct bch_writepage_io {
-+	struct closure			cl;
-+	struct bch_inode_info		*inode;
-+
-+	/* must be last: */
-+	struct bch_write_op		op;
-+};
-+
-+struct dio_write {
-+	struct completion		done;
-+	struct kiocb			*req;
-+	struct mm_struct		*mm;
-+	unsigned			loop:1,
-+					sync:1,
-+					free_iov:1;
-+	struct quota_res		quota_res;
-+	u64				written;
-+
-+	struct iov_iter			iter;
-+	struct iovec			inline_vecs[2];
-+
-+	/* must be last: */
-+	struct bch_write_op		op;
-+};
-+
-+struct dio_read {
-+	struct closure			cl;
-+	struct kiocb			*req;
-+	long				ret;
-+	struct bch_read_bio		rbio;
-+};
-+
-+/* pagecache_block must be held */
-+static int write_invalidate_inode_pages_range(struct address_space *mapping,
-+					      loff_t start, loff_t end)
-+{
-+	int ret;
-+
-+	/*
-+	 * XXX: the way this is currently implemented, we can spin if a process
-+	 * is continually redirtying a specific page
-+	 */
-+	do {
-+		if (!mapping->nrpages &&
-+		    !mapping->nrexceptional)
-+			return 0;
-+
-+		ret = filemap_write_and_wait_range(mapping, start, end);
-+		if (ret)
-+			break;
-+
-+		if (!mapping->nrpages)
-+			return 0;
-+
-+		ret = invalidate_inode_pages2_range(mapping,
-+				start >> PAGE_SHIFT,
-+				end >> PAGE_SHIFT);
-+	} while (ret == -EBUSY);
-+
-+	return ret;
-+}
-+
-+/* quotas */
-+
-+#ifdef CONFIG_BCACHEFS_QUOTA
-+
-+static void bch2_quota_reservation_put(struct bch_fs *c,
-+				       struct bch_inode_info *inode,
-+				       struct quota_res *res)
-+{
-+	if (!res->sectors)
-+		return;
-+
-+	mutex_lock(&inode->ei_quota_lock);
-+	BUG_ON(res->sectors > inode->ei_quota_reserved);
-+
-+	bch2_quota_acct(c, inode->ei_qid, Q_SPC,
-+			-((s64) res->sectors), KEY_TYPE_QUOTA_PREALLOC);
-+	inode->ei_quota_reserved -= res->sectors;
-+	mutex_unlock(&inode->ei_quota_lock);
-+
-+	res->sectors = 0;
-+}
-+
-+static int bch2_quota_reservation_add(struct bch_fs *c,
-+				      struct bch_inode_info *inode,
-+				      struct quota_res *res,
-+				      unsigned sectors,
-+				      bool check_enospc)
-+{
-+	int ret;
-+
-+	mutex_lock(&inode->ei_quota_lock);
-+	ret = bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors,
-+			      check_enospc ? KEY_TYPE_QUOTA_PREALLOC : KEY_TYPE_QUOTA_NOCHECK);
-+	if (likely(!ret)) {
-+		inode->ei_quota_reserved += sectors;
-+		res->sectors += sectors;
-+	}
-+	mutex_unlock(&inode->ei_quota_lock);
-+
-+	return ret;
-+}
-+
-+#else
-+
-+static void bch2_quota_reservation_put(struct bch_fs *c,
-+				       struct bch_inode_info *inode,
-+				       struct quota_res *res)
-+{
-+}
-+
-+static int bch2_quota_reservation_add(struct bch_fs *c,
-+				      struct bch_inode_info *inode,
-+				      struct quota_res *res,
-+				      unsigned sectors,
-+				      bool check_enospc)
-+{
-+	return 0;
-+}
-+
-+#endif
-+
-+/* i_size updates: */
-+
-+struct inode_new_size {
-+	loff_t		new_size;
-+	u64		now;
-+	unsigned	fields;
-+};
-+
-+static int inode_set_size(struct bch_inode_info *inode,
-+			  struct bch_inode_unpacked *bi,
-+			  void *p)
-+{
-+	struct inode_new_size *s = p;
-+
-+	bi->bi_size = s->new_size;
-+	if (s->fields & ATTR_ATIME)
-+		bi->bi_atime = s->now;
-+	if (s->fields & ATTR_MTIME)
-+		bi->bi_mtime = s->now;
-+	if (s->fields & ATTR_CTIME)
-+		bi->bi_ctime = s->now;
-+
-+	return 0;
-+}
-+
-+int __must_check bch2_write_inode_size(struct bch_fs *c,
-+				       struct bch_inode_info *inode,
-+				       loff_t new_size, unsigned fields)
-+{
-+	struct inode_new_size s = {
-+		.new_size	= new_size,
-+		.now		= bch2_current_time(c),
-+		.fields		= fields,
-+	};
-+
-+	return bch2_write_inode(c, inode, inode_set_size, &s, fields);
-+}
-+
-+static void i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode,
-+			   struct quota_res *quota_res, s64 sectors)
-+{
-+	if (!sectors)
-+		return;
-+
-+	mutex_lock(&inode->ei_quota_lock);
-+#ifdef CONFIG_BCACHEFS_QUOTA
-+	if (quota_res && sectors > 0) {
-+		BUG_ON(sectors > quota_res->sectors);
-+		BUG_ON(sectors > inode->ei_quota_reserved);
-+
-+		quota_res->sectors -= sectors;
-+		inode->ei_quota_reserved -= sectors;
-+	} else {
-+		bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, KEY_TYPE_QUOTA_WARN);
-+	}
-+#endif
-+	inode->v.i_blocks += sectors;
-+	mutex_unlock(&inode->ei_quota_lock);
-+}
-+
-+/* page state: */
-+
-+/* stored in page->private: */
-+
-+struct bch_page_sector {
-+	/* Uncompressed, fully allocated replicas: */
-+	unsigned		nr_replicas:3;
-+
-+	/* Owns PAGE_SECTORS * replicas_reserved sized reservation: */
-+	unsigned		replicas_reserved:3;
-+
-+	/* i_sectors: */
-+	enum {
-+		SECTOR_UNALLOCATED,
-+		SECTOR_RESERVED,
-+		SECTOR_DIRTY,
-+		SECTOR_ALLOCATED,
-+	}			state:2;
-+};
-+
-+struct bch_page_state {
-+	spinlock_t		lock;
-+	atomic_t		write_count;
-+	struct bch_page_sector	s[PAGE_SECTORS];
-+};
-+
-+static inline struct bch_page_state *__bch2_page_state(struct page *page)
-+{
-+	return page_has_private(page)
-+		? (struct bch_page_state *) page_private(page)
-+		: NULL;
-+}
-+
-+static inline struct bch_page_state *bch2_page_state(struct page *page)
-+{
-+	EBUG_ON(!PageLocked(page));
-+
-+	return __bch2_page_state(page);
-+}
-+
-+/* for newly allocated pages: */
-+static void __bch2_page_state_release(struct page *page)
-+{
-+	struct bch_page_state *s = __bch2_page_state(page);
-+
-+	if (!s)
-+		return;
-+
-+	ClearPagePrivate(page);
-+	set_page_private(page, 0);
-+	put_page(page);
-+	kfree(s);
-+}
-+
-+static void bch2_page_state_release(struct page *page)
-+{
-+	struct bch_page_state *s = bch2_page_state(page);
-+
-+	if (!s)
-+		return;
-+
-+	ClearPagePrivate(page);
-+	set_page_private(page, 0);
-+	put_page(page);
-+	kfree(s);
-+}
-+
-+/* for newly allocated pages: */
-+static struct bch_page_state *__bch2_page_state_create(struct page *page,
-+						       gfp_t gfp)
-+{
-+	struct bch_page_state *s;
-+
-+	s = kzalloc(sizeof(*s), GFP_NOFS|gfp);
-+	if (!s)
-+		return NULL;
-+
-+	spin_lock_init(&s->lock);
-+	/*
-+	 * migrate_page_move_mapping() assumes that pages with private data
-+	 * have their count elevated by 1.
-+	 */
-+	get_page(page);
-+	set_page_private(page, (unsigned long) s);
-+	SetPagePrivate(page);
-+	return s;
-+}
-+
-+static struct bch_page_state *bch2_page_state_create(struct page *page,
-+						     gfp_t gfp)
-+{
-+	return bch2_page_state(page) ?: __bch2_page_state_create(page, gfp);
-+}
-+
-+static inline unsigned inode_nr_replicas(struct bch_fs *c, struct bch_inode_info *inode)
-+{
-+	/* XXX: this should not be open coded */
-+	return inode->ei_inode.bi_data_replicas
-+		? inode->ei_inode.bi_data_replicas - 1
-+		: c->opts.data_replicas;
-+}
-+
-+static inline unsigned sectors_to_reserve(struct bch_page_sector *s,
-+						  unsigned nr_replicas)
-+{
-+	return max(0, (int) nr_replicas -
-+		   s->nr_replicas -
-+		   s->replicas_reserved);
-+}
-+
-+static int bch2_get_page_disk_reservation(struct bch_fs *c,
-+				struct bch_inode_info *inode,
-+				struct page *page, bool check_enospc)
-+{
-+	struct bch_page_state *s = bch2_page_state_create(page, 0);
-+	unsigned nr_replicas = inode_nr_replicas(c, inode);
-+	struct disk_reservation disk_res = { 0 };
-+	unsigned i, disk_res_sectors = 0;
-+	int ret;
-+
-+	if (!s)
-+		return -ENOMEM;
-+
-+	for (i = 0; i < ARRAY_SIZE(s->s); i++)
-+		disk_res_sectors += sectors_to_reserve(&s->s[i], nr_replicas);
-+
-+	if (!disk_res_sectors)
-+		return 0;
-+
-+	ret = bch2_disk_reservation_get(c, &disk_res,
-+					disk_res_sectors, 1,
-+					!check_enospc
-+					? BCH_DISK_RESERVATION_NOFAIL
-+					: 0);
-+	if (unlikely(ret))
-+		return ret;
-+
-+	for (i = 0; i < ARRAY_SIZE(s->s); i++)
-+		s->s[i].replicas_reserved +=
-+			sectors_to_reserve(&s->s[i], nr_replicas);
-+
-+	return 0;
-+}
-+
-+struct bch2_page_reservation {
-+	struct disk_reservation	disk;
-+	struct quota_res	quota;
-+};
-+
-+static void bch2_page_reservation_init(struct bch_fs *c,
-+			struct bch_inode_info *inode,
-+			struct bch2_page_reservation *res)
-+{
-+	memset(res, 0, sizeof(*res));
-+
-+	res->disk.nr_replicas = inode_nr_replicas(c, inode);
-+}
-+
-+static void bch2_page_reservation_put(struct bch_fs *c,
-+			struct bch_inode_info *inode,
-+			struct bch2_page_reservation *res)
-+{
-+	bch2_disk_reservation_put(c, &res->disk);
-+	bch2_quota_reservation_put(c, inode, &res->quota);
-+}
-+
-+static int bch2_page_reservation_get(struct bch_fs *c,
-+			struct bch_inode_info *inode, struct page *page,
-+			struct bch2_page_reservation *res,
-+			unsigned offset, unsigned len, bool check_enospc)
-+{
-+	struct bch_page_state *s = bch2_page_state_create(page, 0);
-+	unsigned i, disk_sectors = 0, quota_sectors = 0;
-+	int ret;
-+
-+	if (!s)
-+		return -ENOMEM;
-+
-+	for (i = round_down(offset, block_bytes(c)) >> 9;
-+	     i < round_up(offset + len, block_bytes(c)) >> 9;
-+	     i++) {
-+		disk_sectors += sectors_to_reserve(&s->s[i],
-+						res->disk.nr_replicas);
-+		quota_sectors += s->s[i].state == SECTOR_UNALLOCATED;
-+	}
-+
-+	if (disk_sectors) {
-+		ret = bch2_disk_reservation_add(c, &res->disk,
-+						disk_sectors,
-+						!check_enospc
-+						? BCH_DISK_RESERVATION_NOFAIL
-+						: 0);
-+		if (unlikely(ret))
-+			return ret;
-+	}
-+
-+	if (quota_sectors) {
-+		ret = bch2_quota_reservation_add(c, inode, &res->quota,
-+						 quota_sectors,
-+						 check_enospc);
-+		if (unlikely(ret)) {
-+			struct disk_reservation tmp = {
-+				.sectors = disk_sectors
-+			};
-+
-+			bch2_disk_reservation_put(c, &tmp);
-+			res->disk.sectors -= disk_sectors;
-+			return ret;
-+		}
-+	}
-+
-+	return 0;
-+}
-+
-+static void bch2_clear_page_bits(struct page *page)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(page->mapping->host);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct bch_page_state *s = bch2_page_state(page);
-+	struct disk_reservation disk_res = { 0 };
-+	int i, dirty_sectors = 0;
-+
-+	if (!s)
-+		return;
-+
-+	EBUG_ON(!PageLocked(page));
-+	EBUG_ON(PageWriteback(page));
-+
-+	for (i = 0; i < ARRAY_SIZE(s->s); i++) {
-+		disk_res.sectors += s->s[i].replicas_reserved;
-+		s->s[i].replicas_reserved = 0;
-+
-+		if (s->s[i].state == SECTOR_DIRTY) {
-+			dirty_sectors++;
-+			s->s[i].state = SECTOR_UNALLOCATED;
-+		}
-+	}
-+
-+	bch2_disk_reservation_put(c, &disk_res);
-+
-+	if (dirty_sectors)
-+		i_sectors_acct(c, inode, NULL, -dirty_sectors);
-+
-+	bch2_page_state_release(page);
-+}
-+
-+static void bch2_set_page_dirty(struct bch_fs *c,
-+			struct bch_inode_info *inode, struct page *page,
-+			struct bch2_page_reservation *res,
-+			unsigned offset, unsigned len)
-+{
-+	struct bch_page_state *s = bch2_page_state(page);
-+	unsigned i, dirty_sectors = 0;
-+
-+	WARN_ON((u64) page_offset(page) + offset + len >
-+		round_up((u64) i_size_read(&inode->v), block_bytes(c)));
-+
-+	spin_lock(&s->lock);
-+
-+	for (i = round_down(offset, block_bytes(c)) >> 9;
-+	     i < round_up(offset + len, block_bytes(c)) >> 9;
-+	     i++) {
-+		unsigned sectors = sectors_to_reserve(&s->s[i],
-+						res->disk.nr_replicas);
-+
-+		/*
-+		 * This can happen if we race with the error path in
-+		 * bch2_writepage_io_done():
-+		 */
-+		sectors = min_t(unsigned, sectors, res->disk.sectors);
-+
-+		s->s[i].replicas_reserved += sectors;
-+		res->disk.sectors -= sectors;
-+
-+		if (s->s[i].state == SECTOR_UNALLOCATED)
-+			dirty_sectors++;
-+
-+		s->s[i].state = max_t(unsigned, s->s[i].state, SECTOR_DIRTY);
-+	}
-+
-+	spin_unlock(&s->lock);
-+
-+	if (dirty_sectors)
-+		i_sectors_acct(c, inode, &res->quota, dirty_sectors);
-+
-+	if (!PageDirty(page))
-+		__set_page_dirty_nobuffers(page);
-+}
-+
-+vm_fault_t bch2_page_fault(struct vm_fault *vmf)
-+{
-+	struct file *file = vmf->vma->vm_file;
-+	struct bch_inode_info *inode = file_bch_inode(file);
-+	int ret;
-+
-+	bch2_pagecache_add_get(&inode->ei_pagecache_lock);
-+	ret = filemap_fault(vmf);
-+	bch2_pagecache_add_put(&inode->ei_pagecache_lock);
-+
-+	return ret;
-+}
-+
-+vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
-+{
-+	struct page *page = vmf->page;
-+	struct file *file = vmf->vma->vm_file;
-+	struct bch_inode_info *inode = file_bch_inode(file);
-+	struct address_space *mapping = file->f_mapping;
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct bch2_page_reservation res;
-+	unsigned len;
-+	loff_t isize;
-+	int ret = VM_FAULT_LOCKED;
-+
-+	bch2_page_reservation_init(c, inode, &res);
-+
-+	sb_start_pagefault(inode->v.i_sb);
-+	file_update_time(file);
-+
-+	/*
-+	 * Not strictly necessary, but helps avoid dio writes livelocking in
-+	 * write_invalidate_inode_pages_range() - can drop this if/when we get
-+	 * a write_invalidate_inode_pages_range() that works without dropping
-+	 * page lock before invalidating page
-+	 */
-+	bch2_pagecache_add_get(&inode->ei_pagecache_lock);
-+
-+	lock_page(page);
-+	isize = i_size_read(&inode->v);
-+
-+	if (page->mapping != mapping || page_offset(page) >= isize) {
-+		unlock_page(page);
-+		ret = VM_FAULT_NOPAGE;
-+		goto out;
-+	}
-+
-+	len = min_t(loff_t, PAGE_SIZE, isize - page_offset(page));
-+
-+	if (bch2_page_reservation_get(c, inode, page, &res, 0, len, true)) {
-+		unlock_page(page);
-+		ret = VM_FAULT_SIGBUS;
-+		goto out;
-+	}
-+
-+	bch2_set_page_dirty(c, inode, page, &res, 0, len);
-+	bch2_page_reservation_put(c, inode, &res);
-+
-+	wait_for_stable_page(page);
-+out:
-+	bch2_pagecache_add_put(&inode->ei_pagecache_lock);
-+	sb_end_pagefault(inode->v.i_sb);
-+
-+	return ret;
-+}
-+
-+void bch2_invalidatepage(struct page *page, unsigned int offset,
-+			 unsigned int length)
-+{
-+	if (offset || length < PAGE_SIZE)
-+		return;
-+
-+	bch2_clear_page_bits(page);
-+}
-+
-+int bch2_releasepage(struct page *page, gfp_t gfp_mask)
-+{
-+	if (PageDirty(page))
-+		return 0;
-+
-+	bch2_clear_page_bits(page);
-+	return 1;
-+}
-+
-+#ifdef CONFIG_MIGRATION
-+int bch2_migrate_page(struct address_space *mapping, struct page *newpage,
-+		      struct page *page, enum migrate_mode mode)
-+{
-+	int ret;
-+
-+	EBUG_ON(!PageLocked(page));
-+	EBUG_ON(!PageLocked(newpage));
-+
-+	ret = migrate_page_move_mapping(mapping, newpage, page, 0);
-+	if (ret != MIGRATEPAGE_SUCCESS)
-+		return ret;
-+
-+	if (PagePrivate(page)) {
-+		ClearPagePrivate(page);
-+		get_page(newpage);
-+		set_page_private(newpage, page_private(page));
-+		set_page_private(page, 0);
-+		put_page(page);
-+		SetPagePrivate(newpage);
-+	}
-+
-+	if (mode != MIGRATE_SYNC_NO_COPY)
-+		migrate_page_copy(newpage, page);
-+	else
-+		migrate_page_states(newpage, page);
-+	return MIGRATEPAGE_SUCCESS;
-+}
-+#endif
-+
-+/* readpage(s): */
-+
-+static void bch2_readpages_end_io(struct bio *bio)
-+{
-+	struct bvec_iter_all iter;
-+	struct bio_vec *bv;
-+
-+	bio_for_each_segment_all(bv, bio, iter) {
-+		struct page *page = bv->bv_page;
-+
-+		if (!bio->bi_status) {
-+			SetPageUptodate(page);
-+		} else {
-+			ClearPageUptodate(page);
-+			SetPageError(page);
-+		}
-+		unlock_page(page);
-+	}
-+
-+	bio_put(bio);
-+}
-+
-+static inline void page_state_init_for_read(struct page *page)
-+{
-+	SetPagePrivate(page);
-+	page->private = 0;
-+}
-+
-+struct readpages_iter {
-+	struct address_space	*mapping;
-+	struct page		**pages;
-+	unsigned		nr_pages;
-+	unsigned		nr_added;
-+	unsigned		idx;
-+	pgoff_t			offset;
-+};
-+
-+static int readpages_iter_init(struct readpages_iter *iter,
-+			       struct address_space *mapping,
-+			       struct list_head *pages, unsigned nr_pages)
-+{
-+	memset(iter, 0, sizeof(*iter));
-+
-+	iter->mapping	= mapping;
-+	iter->offset	= list_last_entry(pages, struct page, lru)->index;
-+
-+	iter->pages = kmalloc_array(nr_pages, sizeof(struct page *), GFP_NOFS);
-+	if (!iter->pages)
-+		return -ENOMEM;
-+
-+	while (!list_empty(pages)) {
-+		struct page *page = list_last_entry(pages, struct page, lru);
-+
-+		__bch2_page_state_create(page, __GFP_NOFAIL);
-+
-+		iter->pages[iter->nr_pages++] = page;
-+		list_del(&page->lru);
-+	}
-+
-+	return 0;
-+}
-+
-+static inline struct page *readpage_iter_next(struct readpages_iter *iter)
-+{
-+	struct page *page;
-+	unsigned i;
-+	int ret;
-+
-+	BUG_ON(iter->idx > iter->nr_added);
-+	BUG_ON(iter->nr_added > iter->nr_pages);
-+
-+	if (iter->idx < iter->nr_added)
-+		goto out;
-+
-+	while (1) {
-+		if (iter->idx == iter->nr_pages)
-+			return NULL;
-+
-+		ret = add_to_page_cache_lru_vec(iter->mapping,
-+				iter->pages	+ iter->nr_added,
-+				iter->nr_pages	- iter->nr_added,
-+				iter->offset	+ iter->nr_added,
-+				GFP_NOFS);
-+		if (ret > 0)
-+			break;
-+
-+		page = iter->pages[iter->nr_added];
-+		iter->idx++;
-+		iter->nr_added++;
-+
-+		__bch2_page_state_release(page);
-+		put_page(page);
-+	}
-+
-+	iter->nr_added += ret;
-+
-+	for (i = iter->idx; i < iter->nr_added; i++)
-+		put_page(iter->pages[i]);
-+out:
-+	EBUG_ON(iter->pages[iter->idx]->index != iter->offset + iter->idx);
-+
-+	return iter->pages[iter->idx];
-+}
-+
-+static void bch2_add_page_sectors(struct bio *bio, struct bkey_s_c k)
-+{
-+	struct bvec_iter iter;
-+	struct bio_vec bv;
-+	unsigned nr_ptrs = k.k->type == KEY_TYPE_reflink_v
-+		? 0 : bch2_bkey_nr_ptrs_fully_allocated(k);
-+	unsigned state = k.k->type == KEY_TYPE_reservation
-+		? SECTOR_RESERVED
-+		: SECTOR_ALLOCATED;
-+
-+	bio_for_each_segment(bv, bio, iter) {
-+		struct bch_page_state *s = bch2_page_state(bv.bv_page);
-+		unsigned i;
-+
-+		for (i = bv.bv_offset >> 9;
-+		     i < (bv.bv_offset + bv.bv_len) >> 9;
-+		     i++) {
-+			s->s[i].nr_replicas = nr_ptrs;
-+			s->s[i].state = state;
-+		}
-+	}
-+}
-+
-+static bool extent_partial_reads_expensive(struct bkey_s_c k)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	struct bch_extent_crc_unpacked crc;
-+	const union bch_extent_entry *i;
-+
-+	bkey_for_each_crc(k.k, ptrs, crc, i)
-+		if (crc.csum_type || crc.compression_type)
-+			return true;
-+	return false;
-+}
-+
-+static void readpage_bio_extend(struct readpages_iter *iter,
-+				struct bio *bio,
-+				unsigned sectors_this_extent,
-+				bool get_more)
-+{
-+	while (bio_sectors(bio) < sectors_this_extent &&
-+	       bio->bi_vcnt < bio->bi_max_vecs) {
-+		pgoff_t page_offset = bio_end_sector(bio) >> PAGE_SECTOR_SHIFT;
-+		struct page *page = readpage_iter_next(iter);
-+		int ret;
-+
-+		if (page) {
-+			if (iter->offset + iter->idx != page_offset)
-+				break;
-+
-+			iter->idx++;
-+		} else {
-+			if (!get_more)
-+				break;
-+
-+			page = xa_load(&iter->mapping->i_pages, page_offset);
-+			if (page && !xa_is_value(page))
-+				break;
-+
-+			page = __page_cache_alloc(readahead_gfp_mask(iter->mapping));
-+			if (!page)
-+				break;
-+
-+			if (!__bch2_page_state_create(page, 0)) {
-+				put_page(page);
-+				break;
-+			}
-+
-+			ret = add_to_page_cache_lru(page, iter->mapping,
-+						    page_offset, GFP_NOFS);
-+			if (ret) {
-+				__bch2_page_state_release(page);
-+				put_page(page);
-+				break;
-+			}
-+
-+			put_page(page);
-+		}
-+
-+		BUG_ON(!bio_add_page(bio, page, PAGE_SIZE, 0));
-+	}
-+}
-+
-+static void bchfs_read(struct btree_trans *trans, struct btree_iter *iter,
-+		       struct bch_read_bio *rbio, u64 inum,
-+		       struct readpages_iter *readpages_iter)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct bkey_on_stack sk;
-+	int flags = BCH_READ_RETRY_IF_STALE|
-+		BCH_READ_MAY_PROMOTE;
-+	int ret = 0;
-+
-+	rbio->c = c;
-+	rbio->start_time = local_clock();
-+
-+	bkey_on_stack_init(&sk);
-+retry:
-+	while (1) {
-+		struct bkey_s_c k;
-+		unsigned bytes, sectors, offset_into_extent;
-+
-+		bch2_btree_iter_set_pos(iter,
-+				POS(inum, rbio->bio.bi_iter.bi_sector));
-+
-+		k = bch2_btree_iter_peek_slot(iter);
-+		ret = bkey_err(k);
-+		if (ret)
-+			break;
-+
-+		bkey_on_stack_reassemble(&sk, c, k);
-+		k = bkey_i_to_s_c(sk.k);
-+
-+		offset_into_extent = iter->pos.offset -
-+			bkey_start_offset(k.k);
-+		sectors = k.k->size - offset_into_extent;
-+
-+		ret = bch2_read_indirect_extent(trans,
-+					&offset_into_extent, &sk);
-+		if (ret)
-+			break;
-+
-+		sectors = min(sectors, k.k->size - offset_into_extent);
-+
-+		bch2_trans_unlock(trans);
-+
-+		if (readpages_iter)
-+			readpage_bio_extend(readpages_iter, &rbio->bio, sectors,
-+					    extent_partial_reads_expensive(k));
-+
-+		bytes = min(sectors, bio_sectors(&rbio->bio)) << 9;
-+		swap(rbio->bio.bi_iter.bi_size, bytes);
-+
-+		if (rbio->bio.bi_iter.bi_size == bytes)
-+			flags |= BCH_READ_LAST_FRAGMENT;
-+
-+		if (bkey_extent_is_allocation(k.k))
-+			bch2_add_page_sectors(&rbio->bio, k);
-+
-+		bch2_read_extent(c, rbio, k, offset_into_extent, flags);
-+
-+		if (flags & BCH_READ_LAST_FRAGMENT)
-+			break;
-+
-+		swap(rbio->bio.bi_iter.bi_size, bytes);
-+		bio_advance(&rbio->bio, bytes);
-+	}
-+
-+	if (ret == -EINTR)
-+		goto retry;
-+
-+	if (ret) {
-+		bcache_io_error(c, &rbio->bio, "btree IO error %i", ret);
-+		bio_endio(&rbio->bio);
-+	}
-+
-+	bkey_on_stack_exit(&sk, c);
-+}
-+
-+int bch2_readpages(struct file *file, struct address_space *mapping,
-+		   struct list_head *pages, unsigned nr_pages)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(mapping->host);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct bch_io_opts opts = io_opts(c, &inode->ei_inode);
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct page *page;
-+	struct readpages_iter readpages_iter;
-+	int ret;
-+
-+	ret = readpages_iter_init(&readpages_iter, mapping, pages, nr_pages);
-+	BUG_ON(ret);
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, POS_MIN,
-+				   BTREE_ITER_SLOTS);
-+
-+	bch2_pagecache_add_get(&inode->ei_pagecache_lock);
-+
-+	while ((page = readpage_iter_next(&readpages_iter))) {
-+		pgoff_t index = readpages_iter.offset + readpages_iter.idx;
-+		unsigned n = min_t(unsigned,
-+				   readpages_iter.nr_pages -
-+				   readpages_iter.idx,
-+				   BIO_MAX_PAGES);
-+		struct bch_read_bio *rbio =
-+			rbio_init(bio_alloc_bioset(GFP_NOFS, n, &c->bio_read),
-+				  opts);
-+
-+		readpages_iter.idx++;
-+
-+		bio_set_op_attrs(&rbio->bio, REQ_OP_READ, 0);
-+		rbio->bio.bi_iter.bi_sector = (sector_t) index << PAGE_SECTOR_SHIFT;
-+		rbio->bio.bi_end_io = bch2_readpages_end_io;
-+		BUG_ON(!bio_add_page(&rbio->bio, page, PAGE_SIZE, 0));
-+
-+		bchfs_read(&trans, iter, rbio, inode->v.i_ino,
-+			   &readpages_iter);
-+	}
-+
-+	bch2_pagecache_add_put(&inode->ei_pagecache_lock);
-+
-+	bch2_trans_exit(&trans);
-+	kfree(readpages_iter.pages);
-+
-+	return 0;
-+}
-+
-+static void __bchfs_readpage(struct bch_fs *c, struct bch_read_bio *rbio,
-+			     u64 inum, struct page *page)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+
-+	bch2_page_state_create(page, __GFP_NOFAIL);
-+
-+	bio_set_op_attrs(&rbio->bio, REQ_OP_READ, REQ_SYNC);
-+	rbio->bio.bi_iter.bi_sector =
-+		(sector_t) page->index << PAGE_SECTOR_SHIFT;
-+	BUG_ON(!bio_add_page(&rbio->bio, page, PAGE_SIZE, 0));
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, POS_MIN,
-+				   BTREE_ITER_SLOTS);
-+
-+	bchfs_read(&trans, iter, rbio, inum, NULL);
-+
-+	bch2_trans_exit(&trans);
-+}
-+
-+int bch2_readpage(struct file *file, struct page *page)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(page->mapping->host);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct bch_io_opts opts = io_opts(c, &inode->ei_inode);
-+	struct bch_read_bio *rbio;
-+
-+	rbio = rbio_init(bio_alloc_bioset(GFP_NOFS, 1, &c->bio_read), opts);
-+	rbio->bio.bi_end_io = bch2_readpages_end_io;
-+
-+	__bchfs_readpage(c, rbio, inode->v.i_ino, page);
-+	return 0;
-+}
-+
-+static void bch2_read_single_page_end_io(struct bio *bio)
-+{
-+	complete(bio->bi_private);
-+}
-+
-+static int bch2_read_single_page(struct page *page,
-+				 struct address_space *mapping)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(mapping->host);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct bch_read_bio *rbio;
-+	int ret;
-+	DECLARE_COMPLETION_ONSTACK(done);
-+
-+	rbio = rbio_init(bio_alloc_bioset(GFP_NOFS, 1, &c->bio_read),
-+			 io_opts(c, &inode->ei_inode));
-+	rbio->bio.bi_private = &done;
-+	rbio->bio.bi_end_io = bch2_read_single_page_end_io;
-+
-+	__bchfs_readpage(c, rbio, inode->v.i_ino, page);
-+	wait_for_completion(&done);
-+
-+	ret = blk_status_to_errno(rbio->bio.bi_status);
-+	bio_put(&rbio->bio);
-+
-+	if (ret < 0)
-+		return ret;
-+
-+	SetPageUptodate(page);
-+	return 0;
-+}
-+
-+/* writepages: */
-+
-+struct bch_writepage_state {
-+	struct bch_writepage_io	*io;
-+	struct bch_io_opts	opts;
-+};
-+
-+static inline struct bch_writepage_state bch_writepage_state_init(struct bch_fs *c,
-+								  struct bch_inode_info *inode)
-+{
-+	return (struct bch_writepage_state) {
-+		.opts = io_opts(c, &inode->ei_inode)
-+	};
-+}
-+
-+static void bch2_writepage_io_free(struct closure *cl)
-+{
-+	struct bch_writepage_io *io = container_of(cl,
-+					struct bch_writepage_io, cl);
-+
-+	bio_put(&io->op.wbio.bio);
-+}
-+
-+static void bch2_writepage_io_done(struct closure *cl)
-+{
-+	struct bch_writepage_io *io = container_of(cl,
-+					struct bch_writepage_io, cl);
-+	struct bch_fs *c = io->op.c;
-+	struct bio *bio = &io->op.wbio.bio;
-+	struct bvec_iter_all iter;
-+	struct bio_vec *bvec;
-+	unsigned i;
-+
-+	if (io->op.error) {
-+		bio_for_each_segment_all(bvec, bio, iter) {
-+			struct bch_page_state *s;
-+
-+			SetPageError(bvec->bv_page);
-+			mapping_set_error(bvec->bv_page->mapping, -EIO);
-+
-+			s = __bch2_page_state(bvec->bv_page);
-+			spin_lock(&s->lock);
-+			for (i = 0; i < PAGE_SECTORS; i++)
-+				s->s[i].nr_replicas = 0;
-+			spin_unlock(&s->lock);
-+		}
-+	}
-+
-+	if (io->op.flags & BCH_WRITE_WROTE_DATA_INLINE) {
-+		bio_for_each_segment_all(bvec, bio, iter) {
-+			struct bch_page_state *s;
-+
-+			s = __bch2_page_state(bvec->bv_page);
-+			spin_lock(&s->lock);
-+			for (i = 0; i < PAGE_SECTORS; i++)
-+				s->s[i].nr_replicas = 0;
-+			spin_unlock(&s->lock);
-+		}
-+	}
-+
-+	/*
-+	 * racing with fallocate can cause us to add fewer sectors than
-+	 * expected - but we shouldn't add more sectors than expected:
-+	 */
-+	BUG_ON(io->op.i_sectors_delta > 0);
-+
-+	/*
-+	 * (error (due to going RO) halfway through a page can screw that up
-+	 * slightly)
-+	 * XXX wtf?
-+	   BUG_ON(io->op.op.i_sectors_delta >= PAGE_SECTORS);
-+	 */
-+
-+	/*
-+	 * PageWriteback is effectively our ref on the inode - fixup i_blocks
-+	 * before calling end_page_writeback:
-+	 */
-+	i_sectors_acct(c, io->inode, NULL, io->op.i_sectors_delta);
-+
-+	bio_for_each_segment_all(bvec, bio, iter) {
-+		struct bch_page_state *s = __bch2_page_state(bvec->bv_page);
-+
-+		if (atomic_dec_and_test(&s->write_count))
-+			end_page_writeback(bvec->bv_page);
-+	}
-+
-+	closure_return_with_destructor(&io->cl, bch2_writepage_io_free);
-+}
-+
-+static void bch2_writepage_do_io(struct bch_writepage_state *w)
-+{
-+	struct bch_writepage_io *io = w->io;
-+
-+	w->io = NULL;
-+	closure_call(&io->op.cl, bch2_write, NULL, &io->cl);
-+	continue_at(&io->cl, bch2_writepage_io_done, NULL);
-+}
-+
-+/*
-+ * Get a bch_writepage_io and add @page to it - appending to an existing one if
-+ * possible, else allocating a new one:
-+ */
-+static void bch2_writepage_io_alloc(struct bch_fs *c,
-+				    struct writeback_control *wbc,
-+				    struct bch_writepage_state *w,
-+				    struct bch_inode_info *inode,
-+				    u64 sector,
-+				    unsigned nr_replicas)
-+{
-+	struct bch_write_op *op;
-+
-+	w->io = container_of(bio_alloc_bioset(GFP_NOFS,
-+					      BIO_MAX_PAGES,
-+					      &c->writepage_bioset),
-+			     struct bch_writepage_io, op.wbio.bio);
-+
-+	closure_init(&w->io->cl, NULL);
-+	w->io->inode		= inode;
-+
-+	op			= &w->io->op;
-+	bch2_write_op_init(op, c, w->opts);
-+	op->target		= w->opts.foreground_target;
-+	op_journal_seq_set(op, &inode->ei_journal_seq);
-+	op->nr_replicas		= nr_replicas;
-+	op->res.nr_replicas	= nr_replicas;
-+	op->write_point		= writepoint_hashed(inode->ei_last_dirtied);
-+	op->pos			= POS(inode->v.i_ino, sector);
-+	op->wbio.bio.bi_iter.bi_sector = sector;
-+	op->wbio.bio.bi_opf	= wbc_to_write_flags(wbc);
-+}
-+
-+static int __bch2_writepage(struct page *page,
-+			    struct writeback_control *wbc,
-+			    void *data)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(page->mapping->host);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct bch_writepage_state *w = data;
-+	struct bch_page_state *s, orig;
-+	unsigned i, offset, nr_replicas_this_write = U32_MAX;
-+	loff_t i_size = i_size_read(&inode->v);
-+	pgoff_t end_index = i_size >> PAGE_SHIFT;
-+	int ret;
-+
-+	EBUG_ON(!PageUptodate(page));
-+
-+	/* Is the page fully inside i_size? */
-+	if (page->index < end_index)
-+		goto do_io;
-+
-+	/* Is the page fully outside i_size? (truncate in progress) */
-+	offset = i_size & (PAGE_SIZE - 1);
-+	if (page->index > end_index || !offset) {
-+		unlock_page(page);
-+		return 0;
-+	}
-+
-+	/*
-+	 * The page straddles i_size.  It must be zeroed out on each and every
-+	 * writepage invocation because it may be mmapped.  "A file is mapped
-+	 * in multiples of the page size.  For a file that is not a multiple of
-+	 * the  page size, the remaining memory is zeroed when mapped, and
-+	 * writes to that region are not written out to the file."
-+	 */
-+	zero_user_segment(page, offset, PAGE_SIZE);
-+do_io:
-+	s = bch2_page_state_create(page, __GFP_NOFAIL);
-+
-+	ret = bch2_get_page_disk_reservation(c, inode, page, true);
-+	if (ret) {
-+		SetPageError(page);
-+		mapping_set_error(page->mapping, ret);
-+		unlock_page(page);
-+		return 0;
-+	}
-+
-+	/* Before unlocking the page, get copy of reservations: */
-+	orig = *s;
-+
-+	for (i = 0; i < PAGE_SECTORS; i++) {
-+		if (s->s[i].state < SECTOR_DIRTY)
-+			continue;
-+
-+		nr_replicas_this_write =
-+			min_t(unsigned, nr_replicas_this_write,
-+			      s->s[i].nr_replicas +
-+			      s->s[i].replicas_reserved);
-+	}
-+
-+	for (i = 0; i < PAGE_SECTORS; i++) {
-+		if (s->s[i].state < SECTOR_DIRTY)
-+			continue;
-+
-+		s->s[i].nr_replicas = w->opts.compression
-+			? 0 : nr_replicas_this_write;
-+
-+		s->s[i].replicas_reserved = 0;
-+		s->s[i].state = SECTOR_ALLOCATED;
-+	}
-+
-+	BUG_ON(atomic_read(&s->write_count));
-+	atomic_set(&s->write_count, 1);
-+
-+	BUG_ON(PageWriteback(page));
-+	set_page_writeback(page);
-+
-+	unlock_page(page);
-+
-+	offset = 0;
-+	while (1) {
-+		unsigned sectors = 1, dirty_sectors = 0, reserved_sectors = 0;
-+		u64 sector;
-+
-+		while (offset < PAGE_SECTORS &&
-+		       orig.s[offset].state < SECTOR_DIRTY)
-+			offset++;
-+
-+		if (offset == PAGE_SECTORS)
-+			break;
-+
-+		sector = ((u64) page->index << PAGE_SECTOR_SHIFT) + offset;
-+
-+		while (offset + sectors < PAGE_SECTORS &&
-+		       orig.s[offset + sectors].state >= SECTOR_DIRTY)
-+			sectors++;
-+
-+		for (i = offset; i < offset + sectors; i++) {
-+			reserved_sectors += orig.s[i].replicas_reserved;
-+			dirty_sectors += orig.s[i].state == SECTOR_DIRTY;
-+		}
-+
-+		if (w->io &&
-+		    (w->io->op.res.nr_replicas != nr_replicas_this_write ||
-+		     bio_full(&w->io->op.wbio.bio, PAGE_SIZE) ||
-+		     w->io->op.wbio.bio.bi_iter.bi_size + (sectors << 9) >=
-+		     (BIO_MAX_PAGES * PAGE_SIZE) ||
-+		     bio_end_sector(&w->io->op.wbio.bio) != sector))
-+			bch2_writepage_do_io(w);
-+
-+		if (!w->io)
-+			bch2_writepage_io_alloc(c, wbc, w, inode, sector,
-+						nr_replicas_this_write);
-+
-+		atomic_inc(&s->write_count);
-+
-+		BUG_ON(inode != w->io->inode);
-+		BUG_ON(!bio_add_page(&w->io->op.wbio.bio, page,
-+				     sectors << 9, offset << 9));
-+
-+		/* Check for writing past i_size: */
-+		WARN_ON((bio_end_sector(&w->io->op.wbio.bio) << 9) >
-+			round_up(i_size, block_bytes(c)));
-+
-+		w->io->op.res.sectors += reserved_sectors;
-+		w->io->op.i_sectors_delta -= dirty_sectors;
-+		w->io->op.new_i_size = i_size;
-+
-+		offset += sectors;
-+	}
-+
-+	if (atomic_dec_and_test(&s->write_count))
-+		end_page_writeback(page);
-+
-+	return 0;
-+}
-+
-+int bch2_writepages(struct address_space *mapping, struct writeback_control *wbc)
-+{
-+	struct bch_fs *c = mapping->host->i_sb->s_fs_info;
-+	struct bch_writepage_state w =
-+		bch_writepage_state_init(c, to_bch_ei(mapping->host));
-+	struct blk_plug plug;
-+	int ret;
-+
-+	blk_start_plug(&plug);
-+	ret = write_cache_pages(mapping, wbc, __bch2_writepage, &w);
-+	if (w.io)
-+		bch2_writepage_do_io(&w);
-+	blk_finish_plug(&plug);
-+	return ret;
-+}
-+
-+int bch2_writepage(struct page *page, struct writeback_control *wbc)
-+{
-+	struct bch_fs *c = page->mapping->host->i_sb->s_fs_info;
-+	struct bch_writepage_state w =
-+		bch_writepage_state_init(c, to_bch_ei(page->mapping->host));
-+	int ret;
-+
-+	ret = __bch2_writepage(page, wbc, &w);
-+	if (w.io)
-+		bch2_writepage_do_io(&w);
-+
-+	return ret;
-+}
-+
-+/* buffered writes: */
-+
-+int bch2_write_begin(struct file *file, struct address_space *mapping,
-+		     loff_t pos, unsigned len, unsigned flags,
-+		     struct page **pagep, void **fsdata)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(mapping->host);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct bch2_page_reservation *res;
-+	pgoff_t index = pos >> PAGE_SHIFT;
-+	unsigned offset = pos & (PAGE_SIZE - 1);
-+	struct page *page;
-+	int ret = -ENOMEM;
-+
-+	res = kmalloc(sizeof(*res), GFP_KERNEL);
-+	if (!res)
-+		return -ENOMEM;
-+
-+	bch2_page_reservation_init(c, inode, res);
-+	*fsdata = res;
-+
-+	bch2_pagecache_add_get(&inode->ei_pagecache_lock);
-+
-+	page = grab_cache_page_write_begin(mapping, index, flags);
-+	if (!page)
-+		goto err_unlock;
-+
-+	if (PageUptodate(page))
-+		goto out;
-+
-+	/* If we're writing entire page, don't need to read it in first: */
-+	if (len == PAGE_SIZE)
-+		goto out;
-+
-+	if (!offset && pos + len >= inode->v.i_size) {
-+		zero_user_segment(page, len, PAGE_SIZE);
-+		flush_dcache_page(page);
-+		goto out;
-+	}
-+
-+	if (index > inode->v.i_size >> PAGE_SHIFT) {
-+		zero_user_segments(page, 0, offset, offset + len, PAGE_SIZE);
-+		flush_dcache_page(page);
-+		goto out;
-+	}
-+readpage:
-+	ret = bch2_read_single_page(page, mapping);
-+	if (ret)
-+		goto err;
-+out:
-+	ret = bch2_page_reservation_get(c, inode, page, res,
-+					offset, len, true);
-+	if (ret) {
-+		if (!PageUptodate(page)) {
-+			/*
-+			 * If the page hasn't been read in, we won't know if we
-+			 * actually need a reservation - we don't actually need
-+			 * to read here, we just need to check if the page is
-+			 * fully backed by uncompressed data:
-+			 */
-+			goto readpage;
-+		}
-+
-+		goto err;
-+	}
-+
-+	*pagep = page;
-+	return 0;
-+err:
-+	unlock_page(page);
-+	put_page(page);
-+	*pagep = NULL;
-+err_unlock:
-+	bch2_pagecache_add_put(&inode->ei_pagecache_lock);
-+	kfree(res);
-+	*fsdata = NULL;
-+	return ret;
-+}
-+
-+int bch2_write_end(struct file *file, struct address_space *mapping,
-+		   loff_t pos, unsigned len, unsigned copied,
-+		   struct page *page, void *fsdata)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(mapping->host);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct bch2_page_reservation *res = fsdata;
-+	unsigned offset = pos & (PAGE_SIZE - 1);
-+
-+	lockdep_assert_held(&inode->v.i_rwsem);
-+
-+	if (unlikely(copied < len && !PageUptodate(page))) {
-+		/*
-+		 * The page needs to be read in, but that would destroy
-+		 * our partial write - simplest thing is to just force
-+		 * userspace to redo the write:
-+		 */
-+		zero_user(page, 0, PAGE_SIZE);
-+		flush_dcache_page(page);
-+		copied = 0;
-+	}
-+
-+	spin_lock(&inode->v.i_lock);
-+	if (pos + copied > inode->v.i_size)
-+		i_size_write(&inode->v, pos + copied);
-+	spin_unlock(&inode->v.i_lock);
-+
-+	if (copied) {
-+		if (!PageUptodate(page))
-+			SetPageUptodate(page);
-+
-+		bch2_set_page_dirty(c, inode, page, res, offset, copied);
-+
-+		inode->ei_last_dirtied = (unsigned long) current;
-+	}
-+
-+	unlock_page(page);
-+	put_page(page);
-+	bch2_pagecache_add_put(&inode->ei_pagecache_lock);
-+
-+	bch2_page_reservation_put(c, inode, res);
-+	kfree(res);
-+
-+	return copied;
-+}
-+
-+#define WRITE_BATCH_PAGES	32
-+
-+static int __bch2_buffered_write(struct bch_inode_info *inode,
-+				 struct address_space *mapping,
-+				 struct iov_iter *iter,
-+				 loff_t pos, unsigned len)
-+{
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct page *pages[WRITE_BATCH_PAGES];
-+	struct bch2_page_reservation res;
-+	unsigned long index = pos >> PAGE_SHIFT;
-+	unsigned offset = pos & (PAGE_SIZE - 1);
-+	unsigned nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE);
-+	unsigned i, reserved = 0, set_dirty = 0;
-+	unsigned copied = 0, nr_pages_copied = 0;
-+	int ret = 0;
-+
-+	BUG_ON(!len);
-+	BUG_ON(nr_pages > ARRAY_SIZE(pages));
-+
-+	bch2_page_reservation_init(c, inode, &res);
-+
-+	for (i = 0; i < nr_pages; i++) {
-+		pages[i] = grab_cache_page_write_begin(mapping, index + i, 0);
-+		if (!pages[i]) {
-+			nr_pages = i;
-+			if (!i) {
-+				ret = -ENOMEM;
-+				goto out;
-+			}
-+			len = min_t(unsigned, len,
-+				    nr_pages * PAGE_SIZE - offset);
-+			break;
-+		}
-+	}
-+
-+	if (offset && !PageUptodate(pages[0])) {
-+		ret = bch2_read_single_page(pages[0], mapping);
-+		if (ret)
-+			goto out;
-+	}
-+
-+	if ((pos + len) & (PAGE_SIZE - 1) &&
-+	    !PageUptodate(pages[nr_pages - 1])) {
-+		if ((index + nr_pages - 1) << PAGE_SHIFT >= inode->v.i_size) {
-+			zero_user(pages[nr_pages - 1], 0, PAGE_SIZE);
-+		} else {
-+			ret = bch2_read_single_page(pages[nr_pages - 1], mapping);
-+			if (ret)
-+				goto out;
-+		}
-+	}
-+
-+	while (reserved < len) {
-+		struct page *page = pages[(offset + reserved) >> PAGE_SHIFT];
-+		unsigned pg_offset = (offset + reserved) & (PAGE_SIZE - 1);
-+		unsigned pg_len = min_t(unsigned, len - reserved,
-+					PAGE_SIZE - pg_offset);
-+retry_reservation:
-+		ret = bch2_page_reservation_get(c, inode, page, &res,
-+						pg_offset, pg_len, true);
-+
-+		if (ret && !PageUptodate(page)) {
-+			ret = bch2_read_single_page(page, mapping);
-+			if (!ret)
-+				goto retry_reservation;
-+		}
-+
-+		if (ret)
-+			goto out;
-+
-+		reserved += pg_len;
-+	}
-+
-+	if (mapping_writably_mapped(mapping))
-+		for (i = 0; i < nr_pages; i++)
-+			flush_dcache_page(pages[i]);
-+
-+	while (copied < len) {
-+		struct page *page = pages[(offset + copied) >> PAGE_SHIFT];
-+		unsigned pg_offset = (offset + copied) & (PAGE_SIZE - 1);
-+		unsigned pg_len = min_t(unsigned, len - copied,
-+					PAGE_SIZE - pg_offset);
-+		unsigned pg_copied = iov_iter_copy_from_user_atomic(page,
-+						iter, pg_offset, pg_len);
-+
-+		if (!pg_copied)
-+			break;
-+
-+		flush_dcache_page(page);
-+		iov_iter_advance(iter, pg_copied);
-+		copied += pg_copied;
-+	}
-+
-+	if (!copied)
-+		goto out;
-+
-+	if (copied < len &&
-+	    ((offset + copied) & (PAGE_SIZE - 1))) {
-+		struct page *page = pages[(offset + copied) >> PAGE_SHIFT];
-+
-+		if (!PageUptodate(page)) {
-+			zero_user(page, 0, PAGE_SIZE);
-+			copied -= (offset + copied) & (PAGE_SIZE - 1);
-+		}
-+	}
-+
-+	spin_lock(&inode->v.i_lock);
-+	if (pos + copied > inode->v.i_size)
-+		i_size_write(&inode->v, pos + copied);
-+	spin_unlock(&inode->v.i_lock);
-+
-+	while (set_dirty < copied) {
-+		struct page *page = pages[(offset + set_dirty) >> PAGE_SHIFT];
-+		unsigned pg_offset = (offset + set_dirty) & (PAGE_SIZE - 1);
-+		unsigned pg_len = min_t(unsigned, copied - set_dirty,
-+					PAGE_SIZE - pg_offset);
-+
-+		if (!PageUptodate(page))
-+			SetPageUptodate(page);
-+
-+		bch2_set_page_dirty(c, inode, page, &res, pg_offset, pg_len);
-+		unlock_page(page);
-+		put_page(page);
-+
-+		set_dirty += pg_len;
-+	}
-+
-+	nr_pages_copied = DIV_ROUND_UP(offset + copied, PAGE_SIZE);
-+	inode->ei_last_dirtied = (unsigned long) current;
-+out:
-+	for (i = nr_pages_copied; i < nr_pages; i++) {
-+		unlock_page(pages[i]);
-+		put_page(pages[i]);
-+	}
-+
-+	bch2_page_reservation_put(c, inode, &res);
-+
-+	return copied ?: ret;
-+}
-+
-+static ssize_t bch2_buffered_write(struct kiocb *iocb, struct iov_iter *iter)
-+{
-+	struct file *file = iocb->ki_filp;
-+	struct address_space *mapping = file->f_mapping;
-+	struct bch_inode_info *inode = file_bch_inode(file);
-+	loff_t pos = iocb->ki_pos;
-+	ssize_t written = 0;
-+	int ret = 0;
-+
-+	bch2_pagecache_add_get(&inode->ei_pagecache_lock);
-+
-+	do {
-+		unsigned offset = pos & (PAGE_SIZE - 1);
-+		unsigned bytes = min_t(unsigned long, iov_iter_count(iter),
-+			      PAGE_SIZE * WRITE_BATCH_PAGES - offset);
-+again:
-+		/*
-+		 * Bring in the user page that we will copy from _first_.
-+		 * Otherwise there's a nasty deadlock on copying from the
-+		 * same page as we're writing to, without it being marked
-+		 * up-to-date.
-+		 *
-+		 * Not only is this an optimisation, but it is also required
-+		 * to check that the address is actually valid, when atomic
-+		 * usercopies are used, below.
-+		 */
-+		if (unlikely(iov_iter_fault_in_readable(iter, bytes))) {
-+			bytes = min_t(unsigned long, iov_iter_count(iter),
-+				      PAGE_SIZE - offset);
-+
-+			if (unlikely(iov_iter_fault_in_readable(iter, bytes))) {
-+				ret = -EFAULT;
-+				break;
-+			}
-+		}
-+
-+		if (unlikely(fatal_signal_pending(current))) {
-+			ret = -EINTR;
-+			break;
-+		}
-+
-+		ret = __bch2_buffered_write(inode, mapping, iter, pos, bytes);
-+		if (unlikely(ret < 0))
-+			break;
-+
-+		cond_resched();
-+
-+		if (unlikely(ret == 0)) {
-+			/*
-+			 * If we were unable to copy any data at all, we must
-+			 * fall back to a single segment length write.
-+			 *
-+			 * If we didn't fallback here, we could livelock
-+			 * because not all segments in the iov can be copied at
-+			 * once without a pagefault.
-+			 */
-+			bytes = min_t(unsigned long, PAGE_SIZE - offset,
-+				      iov_iter_single_seg_count(iter));
-+			goto again;
-+		}
-+		pos += ret;
-+		written += ret;
-+
-+		balance_dirty_pages_ratelimited(mapping);
-+	} while (iov_iter_count(iter));
-+
-+	bch2_pagecache_add_put(&inode->ei_pagecache_lock);
-+
-+	return written ? written : ret;
-+}
-+
-+/* O_DIRECT reads */
-+
-+static void bch2_dio_read_complete(struct closure *cl)
-+{
-+	struct dio_read *dio = container_of(cl, struct dio_read, cl);
-+
-+	dio->req->ki_complete(dio->req, dio->ret, 0);
-+	bio_check_pages_dirty(&dio->rbio.bio);	/* transfers ownership */
-+}
-+
-+static void bch2_direct_IO_read_endio(struct bio *bio)
-+{
-+	struct dio_read *dio = bio->bi_private;
-+
-+	if (bio->bi_status)
-+		dio->ret = blk_status_to_errno(bio->bi_status);
-+
-+	closure_put(&dio->cl);
-+}
-+
-+static void bch2_direct_IO_read_split_endio(struct bio *bio)
-+{
-+	bch2_direct_IO_read_endio(bio);
-+	bio_check_pages_dirty(bio);	/* transfers ownership */
-+}
-+
-+static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter)
-+{
-+	struct file *file = req->ki_filp;
-+	struct bch_inode_info *inode = file_bch_inode(file);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct bch_io_opts opts = io_opts(c, &inode->ei_inode);
-+	struct dio_read *dio;
-+	struct bio *bio;
-+	loff_t offset = req->ki_pos;
-+	bool sync = is_sync_kiocb(req);
-+	size_t shorten;
-+	ssize_t ret;
-+
-+	if ((offset|iter->count) & (block_bytes(c) - 1))
-+		return -EINVAL;
-+
-+	ret = min_t(loff_t, iter->count,
-+		    max_t(loff_t, 0, i_size_read(&inode->v) - offset));
-+
-+	if (!ret)
-+		return ret;
-+
-+	shorten = iov_iter_count(iter) - round_up(ret, block_bytes(c));
-+	iter->count -= shorten;
-+
-+	bio = bio_alloc_bioset(GFP_KERNEL,
-+			       iov_iter_npages(iter, BIO_MAX_PAGES),
-+			       &c->dio_read_bioset);
-+
-+	bio->bi_end_io = bch2_direct_IO_read_endio;
-+
-+	dio = container_of(bio, struct dio_read, rbio.bio);
-+	closure_init(&dio->cl, NULL);
-+
-+	/*
-+	 * this is a _really_ horrible hack just to avoid an atomic sub at the
-+	 * end:
-+	 */
-+	if (!sync) {
-+		set_closure_fn(&dio->cl, bch2_dio_read_complete, NULL);
-+		atomic_set(&dio->cl.remaining,
-+			   CLOSURE_REMAINING_INITIALIZER -
-+			   CLOSURE_RUNNING +
-+			   CLOSURE_DESTRUCTOR);
-+	} else {
-+		atomic_set(&dio->cl.remaining,
-+			   CLOSURE_REMAINING_INITIALIZER + 1);
-+	}
-+
-+	dio->req	= req;
-+	dio->ret	= ret;
-+
-+	goto start;
-+	while (iter->count) {
-+		bio = bio_alloc_bioset(GFP_KERNEL,
-+				       iov_iter_npages(iter, BIO_MAX_PAGES),
-+				       &c->bio_read);
-+		bio->bi_end_io		= bch2_direct_IO_read_split_endio;
-+start:
-+		bio_set_op_attrs(bio, REQ_OP_READ, REQ_SYNC);
-+		bio->bi_iter.bi_sector	= offset >> 9;
-+		bio->bi_private		= dio;
-+
-+		ret = bio_iov_iter_get_pages(bio, iter);
-+		if (ret < 0) {
-+			/* XXX: fault inject this path */
-+			bio->bi_status = BLK_STS_RESOURCE;
-+			bio_endio(bio);
-+			break;
-+		}
-+
-+		offset += bio->bi_iter.bi_size;
-+		bio_set_pages_dirty(bio);
-+
-+		if (iter->count)
-+			closure_get(&dio->cl);
-+
-+		bch2_read(c, rbio_init(bio, opts), inode->v.i_ino);
-+	}
-+
-+	iter->count += shorten;
-+
-+	if (sync) {
-+		closure_sync(&dio->cl);
-+		closure_debug_destroy(&dio->cl);
-+		ret = dio->ret;
-+		bio_check_pages_dirty(&dio->rbio.bio); /* transfers ownership */
-+		return ret;
-+	} else {
-+		return -EIOCBQUEUED;
-+	}
-+}
-+
-+ssize_t bch2_read_iter(struct kiocb *iocb, struct iov_iter *iter)
-+{
-+	struct file *file = iocb->ki_filp;
-+	struct bch_inode_info *inode = file_bch_inode(file);
-+	struct address_space *mapping = file->f_mapping;
-+	size_t count = iov_iter_count(iter);
-+	ssize_t ret;
-+
-+	if (!count)
-+		return 0; /* skip atime */
-+
-+	if (iocb->ki_flags & IOCB_DIRECT) {
-+		struct blk_plug plug;
-+
-+		ret = filemap_write_and_wait_range(mapping,
-+					iocb->ki_pos,
-+					iocb->ki_pos + count - 1);
-+		if (ret < 0)
-+			return ret;
-+
-+		file_accessed(file);
-+
-+		blk_start_plug(&plug);
-+		ret = bch2_direct_IO_read(iocb, iter);
-+		blk_finish_plug(&plug);
-+
-+		if (ret >= 0)
-+			iocb->ki_pos += ret;
-+	} else {
-+		bch2_pagecache_add_get(&inode->ei_pagecache_lock);
-+		ret = generic_file_read_iter(iocb, iter);
-+		bch2_pagecache_add_put(&inode->ei_pagecache_lock);
-+	}
-+
-+	return ret;
-+}
-+
-+/* O_DIRECT writes */
-+
-+static void bch2_dio_write_loop_async(struct bch_write_op *);
-+
-+static long bch2_dio_write_loop(struct dio_write *dio)
-+{
-+	bool kthread = (current->flags & PF_KTHREAD) != 0;
-+	struct kiocb *req = dio->req;
-+	struct address_space *mapping = req->ki_filp->f_mapping;
-+	struct bch_inode_info *inode = file_bch_inode(req->ki_filp);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct bio *bio = &dio->op.wbio.bio;
-+	struct bvec_iter_all iter;
-+	struct bio_vec *bv;
-+	unsigned unaligned;
-+	bool sync = dio->sync;
-+	long ret;
-+
-+	if (dio->loop)
-+		goto loop;
-+
-+	while (1) {
-+		if (kthread)
-+			use_mm(dio->mm);
-+		BUG_ON(current->faults_disabled_mapping);
-+		current->faults_disabled_mapping = mapping;
-+
-+		ret = bio_iov_iter_get_pages(bio, &dio->iter);
-+
-+		current->faults_disabled_mapping = NULL;
-+		if (kthread)
-+			unuse_mm(dio->mm);
-+
-+		if (unlikely(ret < 0))
-+			goto err;
-+
-+		unaligned = bio->bi_iter.bi_size & (block_bytes(c) - 1);
-+		bio->bi_iter.bi_size -= unaligned;
-+		iov_iter_revert(&dio->iter, unaligned);
-+
-+		if (!bio->bi_iter.bi_size) {
-+			/*
-+			 * bio_iov_iter_get_pages was only able to get <
-+			 * blocksize worth of pages:
-+			 */
-+			bio_for_each_segment_all(bv, bio, iter)
-+				put_page(bv->bv_page);
-+			ret = -EFAULT;
-+			goto err;
-+		}
-+
-+		bch2_write_op_init(&dio->op, c, io_opts(c, &inode->ei_inode));
-+		dio->op.end_io		= bch2_dio_write_loop_async;
-+		dio->op.target		= dio->op.opts.foreground_target;
-+		op_journal_seq_set(&dio->op, &inode->ei_journal_seq);
-+		dio->op.write_point	= writepoint_hashed((unsigned long) current);
-+		dio->op.nr_replicas	= dio->op.opts.data_replicas;
-+		dio->op.pos		= POS(inode->v.i_ino, (u64) req->ki_pos >> 9);
-+
-+		if ((req->ki_flags & IOCB_DSYNC) &&
-+		    !c->opts.journal_flush_disabled)
-+			dio->op.flags |= BCH_WRITE_FLUSH;
-+
-+		ret = bch2_disk_reservation_get(c, &dio->op.res, bio_sectors(bio),
-+						dio->op.opts.data_replicas, 0);
-+		if (unlikely(ret) &&
-+		    !bch2_check_range_allocated(c, dio->op.pos,
-+				bio_sectors(bio), dio->op.opts.data_replicas))
-+			goto err;
-+
-+		task_io_account_write(bio->bi_iter.bi_size);
-+
-+		if (!dio->sync && !dio->loop && dio->iter.count) {
-+			struct iovec *iov = dio->inline_vecs;
-+
-+			if (dio->iter.nr_segs > ARRAY_SIZE(dio->inline_vecs)) {
-+				iov = kmalloc(dio->iter.nr_segs * sizeof(*iov),
-+					      GFP_KERNEL);
-+				if (unlikely(!iov)) {
-+					dio->sync = sync = true;
-+					goto do_io;
-+				}
-+
-+				dio->free_iov = true;
-+			}
-+
-+			memcpy(iov, dio->iter.iov, dio->iter.nr_segs * sizeof(*iov));
-+			dio->iter.iov = iov;
-+		}
-+do_io:
-+		dio->loop = true;
-+		closure_call(&dio->op.cl, bch2_write, NULL, NULL);
-+
-+		if (sync)
-+			wait_for_completion(&dio->done);
-+		else
-+			return -EIOCBQUEUED;
-+loop:
-+		i_sectors_acct(c, inode, &dio->quota_res,
-+			       dio->op.i_sectors_delta);
-+		req->ki_pos += (u64) dio->op.written << 9;
-+		dio->written += dio->op.written;
-+
-+		spin_lock(&inode->v.i_lock);
-+		if (req->ki_pos > inode->v.i_size)
-+			i_size_write(&inode->v, req->ki_pos);
-+		spin_unlock(&inode->v.i_lock);
-+
-+		bio_for_each_segment_all(bv, bio, iter)
-+			put_page(bv->bv_page);
-+		if (!dio->iter.count || dio->op.error)
-+			break;
-+
-+		bio_reset(bio);
-+		reinit_completion(&dio->done);
-+	}
-+
-+	ret = dio->op.error ?: ((long) dio->written << 9);
-+err:
-+	bch2_pagecache_block_put(&inode->ei_pagecache_lock);
-+	bch2_quota_reservation_put(c, inode, &dio->quota_res);
-+
-+	if (dio->free_iov)
-+		kfree(dio->iter.iov);
-+
-+	bio_put(bio);
-+
-+	/* inode->i_dio_count is our ref on inode and thus bch_fs */
-+	inode_dio_end(&inode->v);
-+
-+	if (!sync) {
-+		req->ki_complete(req, ret, 0);
-+		ret = -EIOCBQUEUED;
-+	}
-+	return ret;
-+}
-+
-+static void bch2_dio_write_loop_async(struct bch_write_op *op)
-+{
-+	struct dio_write *dio = container_of(op, struct dio_write, op);
-+
-+	if (dio->sync)
-+		complete(&dio->done);
-+	else
-+		bch2_dio_write_loop(dio);
-+}
-+
-+static noinline
-+ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
-+{
-+	struct file *file = req->ki_filp;
-+	struct address_space *mapping = file->f_mapping;
-+	struct bch_inode_info *inode = file_bch_inode(file);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct dio_write *dio;
-+	struct bio *bio;
-+	bool locked = true, extending;
-+	ssize_t ret;
-+
-+	prefetch(&c->opts);
-+	prefetch((void *) &c->opts + 64);
-+	prefetch(&inode->ei_inode);
-+	prefetch((void *) &inode->ei_inode + 64);
-+
-+	inode_lock(&inode->v);
-+
-+	ret = generic_write_checks(req, iter);
-+	if (unlikely(ret <= 0))
-+		goto err;
-+
-+	ret = file_remove_privs(file);
-+	if (unlikely(ret))
-+		goto err;
-+
-+	ret = file_update_time(file);
-+	if (unlikely(ret))
-+		goto err;
-+
-+	if (unlikely((req->ki_pos|iter->count) & (block_bytes(c) - 1)))
-+		goto err;
-+
-+	inode_dio_begin(&inode->v);
-+	bch2_pagecache_block_get(&inode->ei_pagecache_lock);
-+
-+	extending = req->ki_pos + iter->count > inode->v.i_size;
-+	if (!extending) {
-+		inode_unlock(&inode->v);
-+		locked = false;
-+	}
-+
-+	bio = bio_alloc_bioset(GFP_KERNEL,
-+			       iov_iter_npages(iter, BIO_MAX_PAGES),
-+			       &c->dio_write_bioset);
-+	dio = container_of(bio, struct dio_write, op.wbio.bio);
-+	init_completion(&dio->done);
-+	dio->req		= req;
-+	dio->mm			= current->mm;
-+	dio->loop		= false;
-+	dio->sync		= is_sync_kiocb(req) || extending;
-+	dio->free_iov		= false;
-+	dio->quota_res.sectors	= 0;
-+	dio->written		= 0;
-+	dio->iter		= *iter;
-+
-+	ret = bch2_quota_reservation_add(c, inode, &dio->quota_res,
-+					 iter->count >> 9, true);
-+	if (unlikely(ret))
-+		goto err_put_bio;
-+
-+	ret = write_invalidate_inode_pages_range(mapping,
-+					req->ki_pos,
-+					req->ki_pos + iter->count - 1);
-+	if (unlikely(ret))
-+		goto err_put_bio;
-+
-+	ret = bch2_dio_write_loop(dio);
-+err:
-+	if (locked)
-+		inode_unlock(&inode->v);
-+	return ret;
-+err_put_bio:
-+	bch2_pagecache_block_put(&inode->ei_pagecache_lock);
-+	bch2_quota_reservation_put(c, inode, &dio->quota_res);
-+	bio_put(bio);
-+	inode_dio_end(&inode->v);
-+	goto err;
-+}
-+
-+ssize_t bch2_write_iter(struct kiocb *iocb, struct iov_iter *from)
-+{
-+	struct file *file = iocb->ki_filp;
-+	struct bch_inode_info *inode = file_bch_inode(file);
-+	ssize_t ret;
-+
-+	if (iocb->ki_flags & IOCB_DIRECT)
-+		return bch2_direct_write(iocb, from);
-+
-+	/* We can write back this queue in page reclaim */
-+	current->backing_dev_info = inode_to_bdi(&inode->v);
-+	inode_lock(&inode->v);
-+
-+	ret = generic_write_checks(iocb, from);
-+	if (ret <= 0)
-+		goto unlock;
-+
-+	ret = file_remove_privs(file);
-+	if (ret)
-+		goto unlock;
-+
-+	ret = file_update_time(file);
-+	if (ret)
-+		goto unlock;
-+
-+	ret = bch2_buffered_write(iocb, from);
-+	if (likely(ret > 0))
-+		iocb->ki_pos += ret;
-+unlock:
-+	inode_unlock(&inode->v);
-+	current->backing_dev_info = NULL;
-+
-+	if (ret > 0)
-+		ret = generic_write_sync(iocb, ret);
-+
-+	return ret;
-+}
-+
-+/* fsync: */
-+
-+int bch2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
-+{
-+	struct bch_inode_info *inode = file_bch_inode(file);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	int ret, ret2;
-+
-+	ret = file_write_and_wait_range(file, start, end);
-+	if (ret)
-+		return ret;
-+
-+	if (datasync && !(inode->v.i_state & I_DIRTY_DATASYNC))
-+		goto out;
-+
-+	ret = sync_inode_metadata(&inode->v, 1);
-+	if (ret)
-+		return ret;
-+out:
-+	if (!c->opts.journal_flush_disabled)
-+		ret = bch2_journal_flush_seq(&c->journal,
-+					     inode->ei_journal_seq);
-+	ret2 = file_check_and_advance_wb_err(file);
-+
-+	return ret ?: ret2;
-+}
-+
-+/* truncate: */
-+
-+static inline int range_has_data(struct bch_fs *c,
-+				  struct bpos start,
-+				  struct bpos end)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int ret = 0;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, start, 0, k, ret) {
-+		if (bkey_cmp(bkey_start_pos(k.k), end) >= 0)
-+			break;
-+
-+		if (bkey_extent_is_data(k.k)) {
-+			ret = 1;
-+			break;
-+		}
-+	}
-+
-+	return bch2_trans_exit(&trans) ?: ret;
-+}
-+
-+static int __bch2_truncate_page(struct bch_inode_info *inode,
-+				pgoff_t index, loff_t start, loff_t end)
-+{
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct address_space *mapping = inode->v.i_mapping;
-+	struct bch_page_state *s;
-+	unsigned start_offset = start & (PAGE_SIZE - 1);
-+	unsigned end_offset = ((end - 1) & (PAGE_SIZE - 1)) + 1;
-+	unsigned i;
-+	struct page *page;
-+	int ret = 0;
-+
-+	/* Page boundary? Nothing to do */
-+	if (!((index == start >> PAGE_SHIFT && start_offset) ||
-+	      (index == end >> PAGE_SHIFT && end_offset != PAGE_SIZE)))
-+		return 0;
-+
-+	/* Above i_size? */
-+	if (index << PAGE_SHIFT >= inode->v.i_size)
-+		return 0;
-+
-+	page = find_lock_page(mapping, index);
-+	if (!page) {
-+		/*
-+		 * XXX: we're doing two index lookups when we end up reading the
-+		 * page
-+		 */
-+		ret = range_has_data(c,
-+				POS(inode->v.i_ino, index << PAGE_SECTOR_SHIFT),
-+				POS(inode->v.i_ino, (index + 1) << PAGE_SECTOR_SHIFT));
-+		if (ret <= 0)
-+			return ret;
-+
-+		page = find_or_create_page(mapping, index, GFP_KERNEL);
-+		if (unlikely(!page)) {
-+			ret = -ENOMEM;
-+			goto out;
-+		}
-+	}
-+
-+	s = bch2_page_state_create(page, 0);
-+	if (!s) {
-+		ret = -ENOMEM;
-+		goto unlock;
-+	}
-+
-+	if (!PageUptodate(page)) {
-+		ret = bch2_read_single_page(page, mapping);
-+		if (ret)
-+			goto unlock;
-+	}
-+
-+	if (index != start >> PAGE_SHIFT)
-+		start_offset = 0;
-+	if (index != end >> PAGE_SHIFT)
-+		end_offset = PAGE_SIZE;
-+
-+	for (i = round_up(start_offset, block_bytes(c)) >> 9;
-+	     i < round_down(end_offset, block_bytes(c)) >> 9;
-+	     i++) {
-+		s->s[i].nr_replicas	= 0;
-+		s->s[i].state		= SECTOR_UNALLOCATED;
-+	}
-+
-+	zero_user_segment(page, start_offset, end_offset);
-+
-+	/*
-+	 * Bit of a hack - we don't want truncate to fail due to -ENOSPC.
-+	 *
-+	 * XXX: because we aren't currently tracking whether the page has actual
-+	 * data in it (vs. just 0s, or only partially written) this wrong. ick.
-+	 */
-+	ret = bch2_get_page_disk_reservation(c, inode, page, false);
-+	BUG_ON(ret);
-+
-+	__set_page_dirty_nobuffers(page);
-+unlock:
-+	unlock_page(page);
-+	put_page(page);
-+out:
-+	return ret;
-+}
-+
-+static int bch2_truncate_page(struct bch_inode_info *inode, loff_t from)
-+{
-+	return __bch2_truncate_page(inode, from >> PAGE_SHIFT,
-+				    from, round_up(from, PAGE_SIZE));
-+}
-+
-+static int bch2_extend(struct bch_inode_info *inode,
-+		       struct bch_inode_unpacked *inode_u,
-+		       struct iattr *iattr)
-+{
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct address_space *mapping = inode->v.i_mapping;
-+	int ret;
-+
-+	/*
-+	 * sync appends:
-+	 *
-+	 * this has to be done _before_ extending i_size:
-+	 */
-+	ret = filemap_write_and_wait_range(mapping, inode_u->bi_size, S64_MAX);
-+	if (ret)
-+		return ret;
-+
-+	truncate_setsize(&inode->v, iattr->ia_size);
-+	setattr_copy(&inode->v, iattr);
-+
-+	mutex_lock(&inode->ei_update_lock);
-+	ret = bch2_write_inode_size(c, inode, inode->v.i_size,
-+				    ATTR_MTIME|ATTR_CTIME);
-+	mutex_unlock(&inode->ei_update_lock);
-+
-+	return ret;
-+}
-+
-+static int bch2_truncate_finish_fn(struct bch_inode_info *inode,
-+				   struct bch_inode_unpacked *bi,
-+				   void *p)
-+{
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+
-+	bi->bi_flags &= ~BCH_INODE_I_SIZE_DIRTY;
-+	bi->bi_mtime = bi->bi_ctime = bch2_current_time(c);
-+	return 0;
-+}
-+
-+static int bch2_truncate_start_fn(struct bch_inode_info *inode,
-+				  struct bch_inode_unpacked *bi, void *p)
-+{
-+	u64 *new_i_size = p;
-+
-+	bi->bi_flags |= BCH_INODE_I_SIZE_DIRTY;
-+	bi->bi_size = *new_i_size;
-+	return 0;
-+}
-+
-+int bch2_truncate(struct bch_inode_info *inode, struct iattr *iattr)
-+{
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct address_space *mapping = inode->v.i_mapping;
-+	struct bch_inode_unpacked inode_u;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	u64 new_i_size = iattr->ia_size;
-+	s64 i_sectors_delta = 0;
-+	int ret = 0;
-+
-+	inode_dio_wait(&inode->v);
-+	bch2_pagecache_block_get(&inode->ei_pagecache_lock);
-+
-+	/*
-+	 * fetch current on disk i_size: inode is locked, i_size can only
-+	 * increase underneath us:
-+	 */
-+	bch2_trans_init(&trans, c, 0, 0);
-+	iter = bch2_inode_peek(&trans, &inode_u, inode->v.i_ino, 0);
-+	ret = PTR_ERR_OR_ZERO(iter);
-+	bch2_trans_exit(&trans);
-+
-+	if (ret)
-+		goto err;
-+
-+	/*
-+	 * check this before next assertion; on filesystem error our normal
-+	 * invariants are a bit broken (truncate has to truncate the page cache
-+	 * before the inode).
-+	 */
-+	ret = bch2_journal_error(&c->journal);
-+	if (ret)
-+		goto err;
-+
-+	BUG_ON(inode->v.i_size < inode_u.bi_size);
-+
-+	if (iattr->ia_size > inode->v.i_size) {
-+		ret = bch2_extend(inode, &inode_u, iattr);
-+		goto err;
-+	}
-+
-+	ret = bch2_truncate_page(inode, iattr->ia_size);
-+	if (unlikely(ret))
-+		goto err;
-+
-+	/*
-+	 * When extending, we're going to write the new i_size to disk
-+	 * immediately so we need to flush anything above the current on disk
-+	 * i_size first:
-+	 *
-+	 * Also, when extending we need to flush the page that i_size currently
-+	 * straddles - if it's mapped to userspace, we need to ensure that
-+	 * userspace has to redirty it and call .mkwrite -> set_page_dirty
-+	 * again to allocate the part of the page that was extended.
-+	 */
-+	if (iattr->ia_size > inode_u.bi_size)
-+		ret = filemap_write_and_wait_range(mapping,
-+				inode_u.bi_size,
-+				iattr->ia_size - 1);
-+	else if (iattr->ia_size & (PAGE_SIZE - 1))
-+		ret = filemap_write_and_wait_range(mapping,
-+				round_down(iattr->ia_size, PAGE_SIZE),
-+				iattr->ia_size - 1);
-+	if (ret)
-+		goto err;
-+
-+	mutex_lock(&inode->ei_update_lock);
-+	ret = bch2_write_inode(c, inode, bch2_truncate_start_fn,
-+			       &new_i_size, 0);
-+	mutex_unlock(&inode->ei_update_lock);
-+
-+	if (unlikely(ret))
-+		goto err;
-+
-+	truncate_setsize(&inode->v, iattr->ia_size);
-+
-+	ret = bch2_fpunch(c, inode->v.i_ino,
-+			round_up(iattr->ia_size, block_bytes(c)) >> 9,
-+			U64_MAX, &inode->ei_journal_seq, &i_sectors_delta);
-+	i_sectors_acct(c, inode, NULL, i_sectors_delta);
-+
-+	if (unlikely(ret))
-+		goto err;
-+
-+	setattr_copy(&inode->v, iattr);
-+
-+	mutex_lock(&inode->ei_update_lock);
-+	ret = bch2_write_inode(c, inode, bch2_truncate_finish_fn, NULL,
-+			       ATTR_MTIME|ATTR_CTIME);
-+	mutex_unlock(&inode->ei_update_lock);
-+err:
-+	bch2_pagecache_block_put(&inode->ei_pagecache_lock);
-+	return ret;
-+}
-+
-+/* fallocate: */
-+
-+static long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, loff_t len)
-+{
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	u64 discard_start = round_up(offset, block_bytes(c)) >> 9;
-+	u64 discard_end = round_down(offset + len, block_bytes(c)) >> 9;
-+	int ret = 0;
-+
-+	inode_lock(&inode->v);
-+	inode_dio_wait(&inode->v);
-+	bch2_pagecache_block_get(&inode->ei_pagecache_lock);
-+
-+	ret = __bch2_truncate_page(inode,
-+				   offset >> PAGE_SHIFT,
-+				   offset, offset + len);
-+	if (unlikely(ret))
-+		goto err;
-+
-+	if (offset >> PAGE_SHIFT !=
-+	    (offset + len) >> PAGE_SHIFT) {
-+		ret = __bch2_truncate_page(inode,
-+					   (offset + len) >> PAGE_SHIFT,
-+					   offset, offset + len);
-+		if (unlikely(ret))
-+			goto err;
-+	}
-+
-+	truncate_pagecache_range(&inode->v, offset, offset + len - 1);
-+
-+	if (discard_start < discard_end) {
-+		s64 i_sectors_delta = 0;
-+
-+		ret = bch2_fpunch(c, inode->v.i_ino,
-+				  discard_start, discard_end,
-+				  &inode->ei_journal_seq,
-+				  &i_sectors_delta);
-+		i_sectors_acct(c, inode, NULL, i_sectors_delta);
-+	}
-+err:
-+	bch2_pagecache_block_put(&inode->ei_pagecache_lock);
-+	inode_unlock(&inode->v);
-+
-+	return ret;
-+}
-+
-+static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
-+				   loff_t offset, loff_t len,
-+				   bool insert)
-+{
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct address_space *mapping = inode->v.i_mapping;
-+	struct bkey_on_stack copy;
-+	struct btree_trans trans;
-+	struct btree_iter *src, *dst;
-+	loff_t shift, new_size;
-+	u64 src_start;
-+	int ret;
-+
-+	if ((offset | len) & (block_bytes(c) - 1))
-+		return -EINVAL;
-+
-+	bkey_on_stack_init(&copy);
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 256);
-+
-+	/*
-+	 * We need i_mutex to keep the page cache consistent with the extents
-+	 * btree, and the btree consistent with i_size - we don't need outside
-+	 * locking for the extents btree itself, because we're using linked
-+	 * iterators
-+	 */
-+	inode_lock(&inode->v);
-+	inode_dio_wait(&inode->v);
-+	bch2_pagecache_block_get(&inode->ei_pagecache_lock);
-+
-+	if (insert) {
-+		ret = -EFBIG;
-+		if (inode->v.i_sb->s_maxbytes - inode->v.i_size < len)
-+			goto err;
-+
-+		ret = -EINVAL;
-+		if (offset >= inode->v.i_size)
-+			goto err;
-+
-+		src_start	= U64_MAX;
-+		shift		= len;
-+	} else {
-+		ret = -EINVAL;
-+		if (offset + len >= inode->v.i_size)
-+			goto err;
-+
-+		src_start	= offset + len;
-+		shift		= -len;
-+	}
-+
-+	new_size = inode->v.i_size + shift;
-+
-+	ret = write_invalidate_inode_pages_range(mapping, offset, LLONG_MAX);
-+	if (ret)
-+		goto err;
-+
-+	if (insert) {
-+		i_size_write(&inode->v, new_size);
-+		mutex_lock(&inode->ei_update_lock);
-+		ret = bch2_write_inode_size(c, inode, new_size,
-+					    ATTR_MTIME|ATTR_CTIME);
-+		mutex_unlock(&inode->ei_update_lock);
-+	} else {
-+		s64 i_sectors_delta = 0;
-+
-+		ret = bch2_fpunch(c, inode->v.i_ino,
-+				  offset >> 9, (offset + len) >> 9,
-+				  &inode->ei_journal_seq,
-+				  &i_sectors_delta);
-+		i_sectors_acct(c, inode, NULL, i_sectors_delta);
-+
-+		if (ret)
-+			goto err;
-+	}
-+
-+	src = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
-+			POS(inode->v.i_ino, src_start >> 9),
-+			BTREE_ITER_INTENT);
-+	BUG_ON(IS_ERR_OR_NULL(src));
-+
-+	dst = bch2_trans_copy_iter(&trans, src);
-+	BUG_ON(IS_ERR_OR_NULL(dst));
-+
-+	while (1) {
-+		struct disk_reservation disk_res =
-+			bch2_disk_reservation_init(c, 0);
-+		struct bkey_i delete;
-+		struct bkey_s_c k;
-+		struct bpos next_pos;
-+		struct bpos move_pos = POS(inode->v.i_ino, offset >> 9);
-+		struct bpos atomic_end;
-+		unsigned trigger_flags = 0;
-+
-+		k = insert
-+			? bch2_btree_iter_peek_prev(src)
-+			: bch2_btree_iter_peek(src);
-+		if ((ret = bkey_err(k)))
-+			goto bkey_err;
-+
-+		if (!k.k || k.k->p.inode != inode->v.i_ino)
-+			break;
-+
-+		BUG_ON(bkey_cmp(src->pos, bkey_start_pos(k.k)));
-+
-+		if (insert &&
-+		    bkey_cmp(k.k->p, POS(inode->v.i_ino, offset >> 9)) <= 0)
-+			break;
-+reassemble:
-+		bkey_on_stack_reassemble(&copy, c, k);
-+
-+		if (insert &&
-+		    bkey_cmp(bkey_start_pos(k.k), move_pos) < 0)
-+			bch2_cut_front(move_pos, copy.k);
-+
-+		copy.k->k.p.offset += shift >> 9;
-+		bch2_btree_iter_set_pos(dst, bkey_start_pos(&copy.k->k));
-+
-+		ret = bch2_extent_atomic_end(dst, copy.k, &atomic_end);
-+		if (ret)
-+			goto bkey_err;
-+
-+		if (bkey_cmp(atomic_end, copy.k->k.p)) {
-+			if (insert) {
-+				move_pos = atomic_end;
-+				move_pos.offset -= shift >> 9;
-+				goto reassemble;
-+			} else {
-+				bch2_cut_back(atomic_end, copy.k);
-+			}
-+		}
-+
-+		bkey_init(&delete.k);
-+		delete.k.p = copy.k->k.p;
-+		delete.k.size = copy.k->k.size;
-+		delete.k.p.offset -= shift >> 9;
-+
-+		next_pos = insert ? bkey_start_pos(&delete.k) : delete.k.p;
-+
-+		if (copy.k->k.size == k.k->size) {
-+			/*
-+			 * If we're moving the entire extent, we can skip
-+			 * running triggers:
-+			 */
-+			trigger_flags |= BTREE_TRIGGER_NORUN;
-+		} else {
-+			/* We might end up splitting compressed extents: */
-+			unsigned nr_ptrs =
-+				bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(copy.k));
-+
-+			ret = bch2_disk_reservation_get(c, &disk_res,
-+					copy.k->k.size, nr_ptrs,
-+					BCH_DISK_RESERVATION_NOFAIL);
-+			BUG_ON(ret);
-+		}
-+
-+		bch2_btree_iter_set_pos(src, bkey_start_pos(&delete.k));
-+
-+		ret =   bch2_trans_update(&trans, src, &delete, trigger_flags) ?:
-+			bch2_trans_update(&trans, dst, copy.k, trigger_flags) ?:
-+			bch2_trans_commit(&trans, &disk_res,
-+					  &inode->ei_journal_seq,
-+					  BTREE_INSERT_NOFAIL);
-+		bch2_disk_reservation_put(c, &disk_res);
-+bkey_err:
-+		if (!ret)
-+			bch2_btree_iter_set_pos(src, next_pos);
-+
-+		if (ret == -EINTR)
-+			ret = 0;
-+		if (ret)
-+			goto err;
-+
-+		bch2_trans_cond_resched(&trans);
-+	}
-+	bch2_trans_unlock(&trans);
-+
-+	if (!insert) {
-+		i_size_write(&inode->v, new_size);
-+		mutex_lock(&inode->ei_update_lock);
-+		ret = bch2_write_inode_size(c, inode, new_size,
-+					    ATTR_MTIME|ATTR_CTIME);
-+		mutex_unlock(&inode->ei_update_lock);
-+	}
-+err:
-+	bch2_trans_exit(&trans);
-+	bkey_on_stack_exit(&copy, c);
-+	bch2_pagecache_block_put(&inode->ei_pagecache_lock);
-+	inode_unlock(&inode->v);
-+	return ret;
-+}
-+
-+static long bchfs_fallocate(struct bch_inode_info *inode, int mode,
-+			    loff_t offset, loff_t len)
-+{
-+	struct address_space *mapping = inode->v.i_mapping;
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bpos end_pos;
-+	loff_t end		= offset + len;
-+	loff_t block_start	= round_down(offset,	block_bytes(c));
-+	loff_t block_end	= round_up(end,		block_bytes(c));
-+	unsigned sectors;
-+	unsigned replicas = io_opts(c, &inode->ei_inode).data_replicas;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-+
-+	inode_lock(&inode->v);
-+	inode_dio_wait(&inode->v);
-+	bch2_pagecache_block_get(&inode->ei_pagecache_lock);
-+
-+	if (!(mode & FALLOC_FL_KEEP_SIZE) && end > inode->v.i_size) {
-+		ret = inode_newsize_ok(&inode->v, end);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	if (mode & FALLOC_FL_ZERO_RANGE) {
-+		ret = __bch2_truncate_page(inode,
-+					   offset >> PAGE_SHIFT,
-+					   offset, end);
-+
-+		if (!ret &&
-+		    offset >> PAGE_SHIFT != end >> PAGE_SHIFT)
-+			ret = __bch2_truncate_page(inode,
-+						   end >> PAGE_SHIFT,
-+						   offset, end);
-+
-+		if (unlikely(ret))
-+			goto err;
-+
-+		truncate_pagecache_range(&inode->v, offset, end - 1);
-+	}
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
-+			POS(inode->v.i_ino, block_start >> 9),
-+			BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
-+	end_pos = POS(inode->v.i_ino, block_end >> 9);
-+
-+	while (bkey_cmp(iter->pos, end_pos) < 0) {
-+		s64 i_sectors_delta = 0;
-+		struct disk_reservation disk_res = { 0 };
-+		struct quota_res quota_res = { 0 };
-+		struct bkey_i_reservation reservation;
-+		struct bkey_s_c k;
-+
-+		bch2_trans_begin(&trans);
-+
-+		k = bch2_btree_iter_peek_slot(iter);
-+		if ((ret = bkey_err(k)))
-+			goto bkey_err;
-+
-+		/* already reserved */
-+		if (k.k->type == KEY_TYPE_reservation &&
-+		    bkey_s_c_to_reservation(k).v->nr_replicas >= replicas) {
-+			bch2_btree_iter_next_slot(iter);
-+			continue;
-+		}
-+
-+		if (bkey_extent_is_data(k.k) &&
-+		    !(mode & FALLOC_FL_ZERO_RANGE)) {
-+			bch2_btree_iter_next_slot(iter);
-+			continue;
-+		}
-+
-+		bkey_reservation_init(&reservation.k_i);
-+		reservation.k.type	= KEY_TYPE_reservation;
-+		reservation.k.p		= k.k->p;
-+		reservation.k.size	= k.k->size;
-+
-+		bch2_cut_front(iter->pos,	&reservation.k_i);
-+		bch2_cut_back(end_pos,		&reservation.k_i);
-+
-+		sectors = reservation.k.size;
-+		reservation.v.nr_replicas = bch2_bkey_nr_ptrs_allocated(k);
-+
-+		if (!bkey_extent_is_allocation(k.k)) {
-+			ret = bch2_quota_reservation_add(c, inode,
-+					&quota_res,
-+					sectors, true);
-+			if (unlikely(ret))
-+				goto bkey_err;
-+		}
-+
-+		if (reservation.v.nr_replicas < replicas ||
-+		    bch2_bkey_sectors_compressed(k)) {
-+			ret = bch2_disk_reservation_get(c, &disk_res, sectors,
-+							replicas, 0);
-+			if (unlikely(ret))
-+				goto bkey_err;
-+
-+			reservation.v.nr_replicas = disk_res.nr_replicas;
-+		}
-+
-+		ret = bch2_extent_update(&trans, iter, &reservation.k_i,
-+				&disk_res, &inode->ei_journal_seq,
-+				0, &i_sectors_delta);
-+		i_sectors_acct(c, inode, &quota_res, i_sectors_delta);
-+bkey_err:
-+		bch2_quota_reservation_put(c, inode, &quota_res);
-+		bch2_disk_reservation_put(c, &disk_res);
-+		if (ret == -EINTR)
-+			ret = 0;
-+		if (ret)
-+			goto err;
-+	}
-+
-+	/*
-+	 * Do we need to extend the file?
-+	 *
-+	 * If we zeroed up to the end of the file, we dropped whatever writes
-+	 * were going to write out the current i_size, so we have to extend
-+	 * manually even if FL_KEEP_SIZE was set:
-+	 */
-+	if (end >= inode->v.i_size &&
-+	    (!(mode & FALLOC_FL_KEEP_SIZE) ||
-+	     (mode & FALLOC_FL_ZERO_RANGE))) {
-+		struct btree_iter *inode_iter;
-+		struct bch_inode_unpacked inode_u;
-+
-+		do {
-+			bch2_trans_begin(&trans);
-+			inode_iter = bch2_inode_peek(&trans, &inode_u,
-+						     inode->v.i_ino, 0);
-+			ret = PTR_ERR_OR_ZERO(inode_iter);
-+		} while (ret == -EINTR);
-+
-+		bch2_trans_unlock(&trans);
-+
-+		if (ret)
-+			goto err;
-+
-+		/*
-+		 * Sync existing appends before extending i_size,
-+		 * as in bch2_extend():
-+		 */
-+		ret = filemap_write_and_wait_range(mapping,
-+					inode_u.bi_size, S64_MAX);
-+		if (ret)
-+			goto err;
-+
-+		if (mode & FALLOC_FL_KEEP_SIZE)
-+			end = inode->v.i_size;
-+		else
-+			i_size_write(&inode->v, end);
-+
-+		mutex_lock(&inode->ei_update_lock);
-+		ret = bch2_write_inode_size(c, inode, end, 0);
-+		mutex_unlock(&inode->ei_update_lock);
-+	}
-+err:
-+	bch2_trans_exit(&trans);
-+	bch2_pagecache_block_put(&inode->ei_pagecache_lock);
-+	inode_unlock(&inode->v);
-+	return ret;
-+}
-+
-+long bch2_fallocate_dispatch(struct file *file, int mode,
-+			     loff_t offset, loff_t len)
-+{
-+	struct bch_inode_info *inode = file_bch_inode(file);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	long ret;
-+
-+	if (!percpu_ref_tryget(&c->writes))
-+		return -EROFS;
-+
-+	if (!(mode & ~(FALLOC_FL_KEEP_SIZE|FALLOC_FL_ZERO_RANGE)))
-+		ret = bchfs_fallocate(inode, mode, offset, len);
-+	else if (mode == (FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE))
-+		ret = bchfs_fpunch(inode, offset, len);
-+	else if (mode == FALLOC_FL_INSERT_RANGE)
-+		ret = bchfs_fcollapse_finsert(inode, offset, len, true);
-+	else if (mode == FALLOC_FL_COLLAPSE_RANGE)
-+		ret = bchfs_fcollapse_finsert(inode, offset, len, false);
-+	else
-+		ret = -EOPNOTSUPP;
-+
-+	percpu_ref_put(&c->writes);
-+
-+	return ret;
-+}
-+
-+static void mark_range_unallocated(struct bch_inode_info *inode,
-+				   loff_t start, loff_t end)
-+{
-+	pgoff_t index = start >> PAGE_SHIFT;
-+	pgoff_t end_index = (end - 1) >> PAGE_SHIFT;
-+	struct pagevec pvec;
-+
-+	pagevec_init(&pvec);
-+
-+	do {
-+		unsigned nr_pages, i, j;
-+
-+		nr_pages = pagevec_lookup_range(&pvec, inode->v.i_mapping,
-+						&index, end_index);
-+		if (nr_pages == 0)
-+			break;
-+
-+		for (i = 0; i < nr_pages; i++) {
-+			struct page *page = pvec.pages[i];
-+			struct bch_page_state *s;
-+
-+			lock_page(page);
-+			s = bch2_page_state(page);
-+
-+			if (s) {
-+				spin_lock(&s->lock);
-+				for (j = 0; j < PAGE_SECTORS; j++)
-+					s->s[j].nr_replicas = 0;
-+				spin_unlock(&s->lock);
-+			}
-+
-+			unlock_page(page);
-+		}
-+		pagevec_release(&pvec);
-+	} while (index <= end_index);
-+}
-+
-+loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src,
-+			     struct file *file_dst, loff_t pos_dst,
-+			     loff_t len, unsigned remap_flags)
-+{
-+	struct bch_inode_info *src = file_bch_inode(file_src);
-+	struct bch_inode_info *dst = file_bch_inode(file_dst);
-+	struct bch_fs *c = src->v.i_sb->s_fs_info;
-+	s64 i_sectors_delta = 0;
-+	u64 aligned_len;
-+	loff_t ret = 0;
-+
-+	if (!c->opts.reflink)
-+		return -EOPNOTSUPP;
-+
-+	if (remap_flags & ~(REMAP_FILE_DEDUP|REMAP_FILE_ADVISORY))
-+		return -EINVAL;
-+
-+	if (remap_flags & REMAP_FILE_DEDUP)
-+		return -EOPNOTSUPP;
-+
-+	if ((pos_src & (block_bytes(c) - 1)) ||
-+	    (pos_dst & (block_bytes(c) - 1)))
-+		return -EINVAL;
-+
-+	if (src == dst &&
-+	    abs(pos_src - pos_dst) < len)
-+		return -EINVAL;
-+
-+	bch2_lock_inodes(INODE_LOCK|INODE_PAGECACHE_BLOCK, src, dst);
-+
-+	file_update_time(file_dst);
-+
-+	inode_dio_wait(&src->v);
-+	inode_dio_wait(&dst->v);
-+
-+	ret = generic_remap_file_range_prep(file_src, pos_src,
-+					    file_dst, pos_dst,
-+					    &len, remap_flags);
-+	if (ret < 0 || len == 0)
-+		goto err;
-+
-+	aligned_len = round_up((u64) len, block_bytes(c));
-+
-+	ret = write_invalidate_inode_pages_range(dst->v.i_mapping,
-+				pos_dst, pos_dst + len - 1);
-+	if (ret)
-+		goto err;
-+
-+	mark_range_unallocated(src, pos_src, pos_src + aligned_len);
-+
-+	ret = bch2_remap_range(c,
-+			       POS(dst->v.i_ino, pos_dst >> 9),
-+			       POS(src->v.i_ino, pos_src >> 9),
-+			       aligned_len >> 9,
-+			       &dst->ei_journal_seq,
-+			       pos_dst + len, &i_sectors_delta);
-+	if (ret < 0)
-+		goto err;
-+
-+	/*
-+	 * due to alignment, we might have remapped slightly more than requsted
-+	 */
-+	ret = min((u64) ret << 9, (u64) len);
-+
-+	/* XXX get a quota reservation */
-+	i_sectors_acct(c, dst, NULL, i_sectors_delta);
-+
-+	spin_lock(&dst->v.i_lock);
-+	if (pos_dst + ret > dst->v.i_size)
-+		i_size_write(&dst->v, pos_dst + ret);
-+	spin_unlock(&dst->v.i_lock);
-+err:
-+	bch2_unlock_inodes(INODE_LOCK|INODE_PAGECACHE_BLOCK, src, dst);
-+
-+	return ret;
-+}
-+
-+/* fseek: */
-+
-+static int page_data_offset(struct page *page, unsigned offset)
-+{
-+	struct bch_page_state *s = bch2_page_state(page);
-+	unsigned i;
-+
-+	if (s)
-+		for (i = offset >> 9; i < PAGE_SECTORS; i++)
-+			if (s->s[i].state >= SECTOR_DIRTY)
-+				return i << 9;
-+
-+	return -1;
-+}
-+
-+static loff_t bch2_seek_pagecache_data(struct inode *vinode,
-+				       loff_t start_offset,
-+				       loff_t end_offset)
-+{
-+	struct address_space *mapping = vinode->i_mapping;
-+	struct page *page;
-+	pgoff_t start_index	= start_offset >> PAGE_SHIFT;
-+	pgoff_t end_index	= end_offset >> PAGE_SHIFT;
-+	pgoff_t index		= start_index;
-+	loff_t ret;
-+	int offset;
-+
-+	while (index <= end_index) {
-+		if (find_get_pages_range(mapping, &index, end_index, 1, &page)) {
-+			lock_page(page);
-+
-+			offset = page_data_offset(page,
-+					page->index == start_index
-+					? start_offset & (PAGE_SIZE - 1)
-+					: 0);
-+			if (offset >= 0) {
-+				ret = clamp(((loff_t) page->index << PAGE_SHIFT) +
-+					    offset,
-+					    start_offset, end_offset);
-+				unlock_page(page);
-+				put_page(page);
-+				return ret;
-+			}
-+
-+			unlock_page(page);
-+			put_page(page);
-+		} else {
-+			break;
-+		}
-+	}
-+
-+	return end_offset;
-+}
-+
-+static loff_t bch2_seek_data(struct file *file, u64 offset)
-+{
-+	struct bch_inode_info *inode = file_bch_inode(file);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	u64 isize, next_data = MAX_LFS_FILESIZE;
-+	int ret;
-+
-+	isize = i_size_read(&inode->v);
-+	if (offset >= isize)
-+		return -ENXIO;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
-+			   POS(inode->v.i_ino, offset >> 9), 0, k, ret) {
-+		if (k.k->p.inode != inode->v.i_ino) {
-+			break;
-+		} else if (bkey_extent_is_data(k.k)) {
-+			next_data = max(offset, bkey_start_offset(k.k) << 9);
-+			break;
-+		} else if (k.k->p.offset >> 9 > isize)
-+			break;
-+	}
-+
-+	ret = bch2_trans_exit(&trans) ?: ret;
-+	if (ret)
-+		return ret;
-+
-+	if (next_data > offset)
-+		next_data = bch2_seek_pagecache_data(&inode->v,
-+						     offset, next_data);
-+
-+	if (next_data >= isize)
-+		return -ENXIO;
-+
-+	return vfs_setpos(file, next_data, MAX_LFS_FILESIZE);
-+}
-+
-+static int __page_hole_offset(struct page *page, unsigned offset)
-+{
-+	struct bch_page_state *s = bch2_page_state(page);
-+	unsigned i;
-+
-+	if (!s)
-+		return 0;
-+
-+	for (i = offset >> 9; i < PAGE_SECTORS; i++)
-+		if (s->s[i].state < SECTOR_DIRTY)
-+			return i << 9;
-+
-+	return -1;
-+}
-+
-+static loff_t page_hole_offset(struct address_space *mapping, loff_t offset)
-+{
-+	pgoff_t index = offset >> PAGE_SHIFT;
-+	struct page *page;
-+	int pg_offset;
-+	loff_t ret = -1;
-+
-+	page = find_lock_entry(mapping, index);
-+	if (!page || xa_is_value(page))
-+		return offset;
-+
-+	pg_offset = __page_hole_offset(page, offset & (PAGE_SIZE - 1));
-+	if (pg_offset >= 0)
-+		ret = ((loff_t) index << PAGE_SHIFT) + pg_offset;
-+
-+	unlock_page(page);
-+
-+	return ret;
-+}
-+
-+static loff_t bch2_seek_pagecache_hole(struct inode *vinode,
-+				       loff_t start_offset,
-+				       loff_t end_offset)
-+{
-+	struct address_space *mapping = vinode->i_mapping;
-+	loff_t offset = start_offset, hole;
-+
-+	while (offset < end_offset) {
-+		hole = page_hole_offset(mapping, offset);
-+		if (hole >= 0 && hole <= end_offset)
-+			return max(start_offset, hole);
-+
-+		offset += PAGE_SIZE;
-+		offset &= PAGE_MASK;
-+	}
-+
-+	return end_offset;
-+}
-+
-+static loff_t bch2_seek_hole(struct file *file, u64 offset)
-+{
-+	struct bch_inode_info *inode = file_bch_inode(file);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	u64 isize, next_hole = MAX_LFS_FILESIZE;
-+	int ret;
-+
-+	isize = i_size_read(&inode->v);
-+	if (offset >= isize)
-+		return -ENXIO;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
-+			   POS(inode->v.i_ino, offset >> 9),
-+			   BTREE_ITER_SLOTS, k, ret) {
-+		if (k.k->p.inode != inode->v.i_ino) {
-+			next_hole = bch2_seek_pagecache_hole(&inode->v,
-+					offset, MAX_LFS_FILESIZE);
-+			break;
-+		} else if (!bkey_extent_is_data(k.k)) {
-+			next_hole = bch2_seek_pagecache_hole(&inode->v,
-+					max(offset, bkey_start_offset(k.k) << 9),
-+					k.k->p.offset << 9);
-+
-+			if (next_hole < k.k->p.offset << 9)
-+				break;
-+		} else {
-+			offset = max(offset, bkey_start_offset(k.k) << 9);
-+		}
-+	}
-+
-+	ret = bch2_trans_exit(&trans) ?: ret;
-+	if (ret)
-+		return ret;
-+
-+	if (next_hole > isize)
-+		next_hole = isize;
-+
-+	return vfs_setpos(file, next_hole, MAX_LFS_FILESIZE);
-+}
-+
-+loff_t bch2_llseek(struct file *file, loff_t offset, int whence)
-+{
-+	switch (whence) {
-+	case SEEK_SET:
-+	case SEEK_CUR:
-+	case SEEK_END:
-+		return generic_file_llseek(file, offset, whence);
-+	case SEEK_DATA:
-+		return bch2_seek_data(file, offset);
-+	case SEEK_HOLE:
-+		return bch2_seek_hole(file, offset);
-+	}
-+
-+	return -EINVAL;
-+}
-+
-+void bch2_fs_fsio_exit(struct bch_fs *c)
-+{
-+	bioset_exit(&c->dio_write_bioset);
-+	bioset_exit(&c->dio_read_bioset);
-+	bioset_exit(&c->writepage_bioset);
-+}
-+
-+int bch2_fs_fsio_init(struct bch_fs *c)
-+{
-+	int ret = 0;
-+
-+	pr_verbose_init(c->opts, "");
-+
-+	if (bioset_init(&c->writepage_bioset,
-+			4, offsetof(struct bch_writepage_io, op.wbio.bio),
-+			BIOSET_NEED_BVECS) ||
-+	    bioset_init(&c->dio_read_bioset,
-+			4, offsetof(struct dio_read, rbio.bio),
-+			BIOSET_NEED_BVECS) ||
-+	    bioset_init(&c->dio_write_bioset,
-+			4, offsetof(struct dio_write, op.wbio.bio),
-+			BIOSET_NEED_BVECS))
-+		ret = -ENOMEM;
-+
-+	pr_verbose_init(c->opts, "ret %i", ret);
-+	return ret;
-+}
-+
-+#endif /* NO_BCACHEFS_FS */
-diff --git a/fs/bcachefs/fs-io.h b/fs/bcachefs/fs-io.h
-new file mode 100644
-index 000000000000..7063556d289b
---- /dev/null
-+++ b/fs/bcachefs/fs-io.h
-@@ -0,0 +1,57 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_FS_IO_H
-+#define _BCACHEFS_FS_IO_H
-+
-+#ifndef NO_BCACHEFS_FS
-+
-+#include "buckets.h"
-+#include "io_types.h"
-+
-+#include <linux/uio.h>
-+
-+struct quota_res;
-+
-+int __must_check bch2_write_inode_size(struct bch_fs *,
-+				       struct bch_inode_info *,
-+				       loff_t, unsigned);
-+
-+int bch2_writepage(struct page *, struct writeback_control *);
-+int bch2_readpage(struct file *, struct page *);
-+
-+int bch2_writepages(struct address_space *, struct writeback_control *);
-+int bch2_readpages(struct file *, struct address_space *,
-+		   struct list_head *, unsigned);
-+
-+int bch2_write_begin(struct file *, struct address_space *, loff_t,
-+		     unsigned, unsigned, struct page **, void **);
-+int bch2_write_end(struct file *, struct address_space *, loff_t,
-+		   unsigned, unsigned, struct page *, void *);
-+
-+ssize_t bch2_read_iter(struct kiocb *, struct iov_iter *);
-+ssize_t bch2_write_iter(struct kiocb *, struct iov_iter *);
-+
-+int bch2_fsync(struct file *, loff_t, loff_t, int);
-+
-+int bch2_truncate(struct bch_inode_info *, struct iattr *);
-+long bch2_fallocate_dispatch(struct file *, int, loff_t, loff_t);
-+
-+loff_t bch2_remap_file_range(struct file *, loff_t, struct file *,
-+			     loff_t, loff_t, unsigned);
-+
-+loff_t bch2_llseek(struct file *, loff_t, int);
-+
-+vm_fault_t bch2_page_fault(struct vm_fault *);
-+vm_fault_t bch2_page_mkwrite(struct vm_fault *);
-+void bch2_invalidatepage(struct page *, unsigned int, unsigned int);
-+int bch2_releasepage(struct page *, gfp_t);
-+int bch2_migrate_page(struct address_space *, struct page *,
-+		      struct page *, enum migrate_mode);
-+
-+void bch2_fs_fsio_exit(struct bch_fs *);
-+int bch2_fs_fsio_init(struct bch_fs *);
-+#else
-+static inline void bch2_fs_fsio_exit(struct bch_fs *c) {}
-+static inline int bch2_fs_fsio_init(struct bch_fs *c) { return 0; }
-+#endif
-+
-+#endif /* _BCACHEFS_FS_IO_H */
-diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c
-new file mode 100644
-index 000000000000..031e6d931171
---- /dev/null
-+++ b/fs/bcachefs/fs-ioctl.c
-@@ -0,0 +1,308 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#ifndef NO_BCACHEFS_FS
-+
-+#include "bcachefs.h"
-+#include "chardev.h"
-+#include "dirent.h"
-+#include "fs.h"
-+#include "fs-common.h"
-+#include "fs-ioctl.h"
-+#include "quota.h"
-+
-+#include <linux/compat.h>
-+#include <linux/mount.h>
-+
-+#define FS_IOC_GOINGDOWN	     _IOR('X', 125, __u32)
-+
-+struct flags_set {
-+	unsigned		mask;
-+	unsigned		flags;
-+
-+	unsigned		projid;
-+};
-+
-+static int bch2_inode_flags_set(struct bch_inode_info *inode,
-+				struct bch_inode_unpacked *bi,
-+				void *p)
-+{
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	/*
-+	 * We're relying on btree locking here for exclusion with other ioctl
-+	 * calls - use the flags in the btree (@bi), not inode->i_flags:
-+	 */
-+	struct flags_set *s = p;
-+	unsigned newflags = s->flags;
-+	unsigned oldflags = bi->bi_flags & s->mask;
-+
-+	if (((newflags ^ oldflags) & (BCH_INODE_APPEND|BCH_INODE_IMMUTABLE)) &&
-+	    !capable(CAP_LINUX_IMMUTABLE))
-+		return -EPERM;
-+
-+	if (!S_ISREG(bi->bi_mode) &&
-+	    !S_ISDIR(bi->bi_mode) &&
-+	    (newflags & (BCH_INODE_NODUMP|BCH_INODE_NOATIME)) != newflags)
-+		return -EINVAL;
-+
-+	bi->bi_flags &= ~s->mask;
-+	bi->bi_flags |= newflags;
-+
-+	bi->bi_ctime = timespec_to_bch2_time(c, current_time(&inode->v));
-+	return 0;
-+}
-+
-+static int bch2_ioc_getflags(struct bch_inode_info *inode, int __user *arg)
-+{
-+	unsigned flags = map_flags(bch_flags_to_uflags, inode->ei_inode.bi_flags);
-+
-+	return put_user(flags, arg);
-+}
-+
-+static int bch2_ioc_setflags(struct bch_fs *c,
-+			     struct file *file,
-+			     struct bch_inode_info *inode,
-+			     void __user *arg)
-+{
-+	struct flags_set s = { .mask = map_defined(bch_flags_to_uflags) };
-+	unsigned uflags;
-+	int ret;
-+
-+	if (get_user(uflags, (int __user *) arg))
-+		return -EFAULT;
-+
-+	s.flags = map_flags_rev(bch_flags_to_uflags, uflags);
-+	if (uflags)
-+		return -EOPNOTSUPP;
-+
-+	ret = mnt_want_write_file(file);
-+	if (ret)
-+		return ret;
-+
-+	inode_lock(&inode->v);
-+	if (!inode_owner_or_capable(&inode->v)) {
-+		ret = -EACCES;
-+		goto setflags_out;
-+	}
-+
-+	mutex_lock(&inode->ei_update_lock);
-+	ret = bch2_write_inode(c, inode, bch2_inode_flags_set, &s,
-+			       ATTR_CTIME);
-+	mutex_unlock(&inode->ei_update_lock);
-+
-+setflags_out:
-+	inode_unlock(&inode->v);
-+	mnt_drop_write_file(file);
-+	return ret;
-+}
-+
-+static int bch2_ioc_fsgetxattr(struct bch_inode_info *inode,
-+			       struct fsxattr __user *arg)
-+{
-+	struct fsxattr fa = { 0 };
-+
-+	fa.fsx_xflags = map_flags(bch_flags_to_xflags, inode->ei_inode.bi_flags);
-+	fa.fsx_projid = inode->ei_qid.q[QTYP_PRJ];
-+
-+	return copy_to_user(arg, &fa, sizeof(fa));
-+}
-+
-+static int fssetxattr_inode_update_fn(struct bch_inode_info *inode,
-+				      struct bch_inode_unpacked *bi,
-+				      void *p)
-+{
-+	struct flags_set *s = p;
-+
-+	if (s->projid != bi->bi_project) {
-+		bi->bi_fields_set |= 1U << Inode_opt_project;
-+		bi->bi_project = s->projid;
-+	}
-+
-+	return bch2_inode_flags_set(inode, bi, p);
-+}
-+
-+static int bch2_ioc_fssetxattr(struct bch_fs *c,
-+			       struct file *file,
-+			       struct bch_inode_info *inode,
-+			       struct fsxattr __user *arg)
-+{
-+	struct flags_set s = { .mask = map_defined(bch_flags_to_xflags) };
-+	struct fsxattr fa;
-+	int ret;
-+
-+	if (copy_from_user(&fa, arg, sizeof(fa)))
-+		return -EFAULT;
-+
-+	s.flags = map_flags_rev(bch_flags_to_xflags, fa.fsx_xflags);
-+	if (fa.fsx_xflags)
-+		return -EOPNOTSUPP;
-+
-+	if (fa.fsx_projid >= U32_MAX)
-+		return -EINVAL;
-+
-+	s.projid = fa.fsx_projid + 1;
-+
-+	ret = mnt_want_write_file(file);
-+	if (ret)
-+		return ret;
-+
-+	inode_lock(&inode->v);
-+	if (!inode_owner_or_capable(&inode->v)) {
-+		ret = -EACCES;
-+		goto err;
-+	}
-+
-+	mutex_lock(&inode->ei_update_lock);
-+	ret = bch2_set_projid(c, inode, s.projid);
-+	if (ret)
-+		goto err_unlock;
-+
-+	ret = bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s,
-+			       ATTR_CTIME);
-+err_unlock:
-+	mutex_unlock(&inode->ei_update_lock);
-+err:
-+	inode_unlock(&inode->v);
-+	mnt_drop_write_file(file);
-+	return ret;
-+}
-+
-+static int bch2_reinherit_attrs_fn(struct bch_inode_info *inode,
-+				   struct bch_inode_unpacked *bi,
-+				   void *p)
-+{
-+	struct bch_inode_info *dir = p;
-+
-+	return !bch2_reinherit_attrs(bi, &dir->ei_inode);
-+}
-+
-+static int bch2_ioc_reinherit_attrs(struct bch_fs *c,
-+				    struct file *file,
-+				    struct bch_inode_info *src,
-+				    const char __user *name)
-+{
-+	struct bch_inode_info *dst;
-+	struct inode *vinode = NULL;
-+	char *kname = NULL;
-+	struct qstr qstr;
-+	int ret = 0;
-+	u64 inum;
-+
-+	kname = kmalloc(BCH_NAME_MAX + 1, GFP_KERNEL);
-+	if (!kname)
-+		return -ENOMEM;
-+
-+	ret = strncpy_from_user(kname, name, BCH_NAME_MAX);
-+	if (unlikely(ret < 0))
-+		goto err1;
-+
-+	qstr.len	= ret;
-+	qstr.name	= kname;
-+
-+	ret = -ENOENT;
-+	inum = bch2_dirent_lookup(c, src->v.i_ino,
-+				  &src->ei_str_hash,
-+				  &qstr);
-+	if (!inum)
-+		goto err1;
-+
-+	vinode = bch2_vfs_inode_get(c, inum);
-+	ret = PTR_ERR_OR_ZERO(vinode);
-+	if (ret)
-+		goto err1;
-+
-+	dst = to_bch_ei(vinode);
-+
-+	ret = mnt_want_write_file(file);
-+	if (ret)
-+		goto err2;
-+
-+	bch2_lock_inodes(INODE_UPDATE_LOCK, src, dst);
-+
-+	if (inode_attr_changing(src, dst, Inode_opt_project)) {
-+		ret = bch2_fs_quota_transfer(c, dst,
-+					     src->ei_qid,
-+					     1 << QTYP_PRJ,
-+					     KEY_TYPE_QUOTA_PREALLOC);
-+		if (ret)
-+			goto err3;
-+	}
-+
-+	ret = bch2_write_inode(c, dst, bch2_reinherit_attrs_fn, src, 0);
-+err3:
-+	bch2_unlock_inodes(INODE_UPDATE_LOCK, src, dst);
-+
-+	/* return true if we did work */
-+	if (ret >= 0)
-+		ret = !ret;
-+
-+	mnt_drop_write_file(file);
-+err2:
-+	iput(vinode);
-+err1:
-+	kfree(kname);
-+
-+	return ret;
-+}
-+
-+long bch2_fs_file_ioctl(struct file *file, unsigned cmd, unsigned long arg)
-+{
-+	struct bch_inode_info *inode = file_bch_inode(file);
-+	struct super_block *sb = inode->v.i_sb;
-+	struct bch_fs *c = sb->s_fs_info;
-+
-+	switch (cmd) {
-+	case FS_IOC_GETFLAGS:
-+		return bch2_ioc_getflags(inode, (int __user *) arg);
-+
-+	case FS_IOC_SETFLAGS:
-+		return bch2_ioc_setflags(c, file, inode, (int __user *) arg);
-+
-+	case FS_IOC_FSGETXATTR:
-+		return bch2_ioc_fsgetxattr(inode, (void __user *) arg);
-+	case FS_IOC_FSSETXATTR:
-+		return bch2_ioc_fssetxattr(c, file, inode,
-+					   (void __user *) arg);
-+
-+	case BCHFS_IOC_REINHERIT_ATTRS:
-+		return bch2_ioc_reinherit_attrs(c, file, inode,
-+						(void __user *) arg);
-+
-+	case FS_IOC_GETVERSION:
-+		return -ENOTTY;
-+	case FS_IOC_SETVERSION:
-+		return -ENOTTY;
-+
-+	case FS_IOC_GOINGDOWN:
-+		if (!capable(CAP_SYS_ADMIN))
-+			return -EPERM;
-+
-+		down_write(&sb->s_umount);
-+		sb->s_flags |= SB_RDONLY;
-+		if (bch2_fs_emergency_read_only(c))
-+			bch_err(c, "emergency read only due to ioctl");
-+		up_write(&sb->s_umount);
-+		return 0;
-+
-+	default:
-+		return bch2_fs_ioctl(c, cmd, (void __user *) arg);
-+	}
-+}
-+
-+#ifdef CONFIG_COMPAT
-+long bch2_compat_fs_ioctl(struct file *file, unsigned cmd, unsigned long arg)
-+{
-+	/* These are just misnamed, they actually get/put from/to user an int */
-+	switch (cmd) {
-+	case FS_IOC_GETFLAGS:
-+		cmd = FS_IOC_GETFLAGS;
-+		break;
-+	case FS_IOC32_SETFLAGS:
-+		cmd = FS_IOC_SETFLAGS;
-+		break;
-+	default:
-+		return -ENOIOCTLCMD;
-+	}
-+	return bch2_fs_file_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
-+}
-+#endif
-+
-+#endif /* NO_BCACHEFS_FS */
-diff --git a/fs/bcachefs/fs-ioctl.h b/fs/bcachefs/fs-ioctl.h
-new file mode 100644
-index 000000000000..f201980ef2c3
---- /dev/null
-+++ b/fs/bcachefs/fs-ioctl.h
-@@ -0,0 +1,81 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_FS_IOCTL_H
-+#define _BCACHEFS_FS_IOCTL_H
-+
-+/* Inode flags: */
-+
-+/* bcachefs inode flags -> vfs inode flags: */
-+static const unsigned bch_flags_to_vfs[] = {
-+	[__BCH_INODE_SYNC]	= S_SYNC,
-+	[__BCH_INODE_IMMUTABLE]	= S_IMMUTABLE,
-+	[__BCH_INODE_APPEND]	= S_APPEND,
-+	[__BCH_INODE_NOATIME]	= S_NOATIME,
-+};
-+
-+/* bcachefs inode flags -> FS_IOC_GETFLAGS: */
-+static const unsigned bch_flags_to_uflags[] = {
-+	[__BCH_INODE_SYNC]	= FS_SYNC_FL,
-+	[__BCH_INODE_IMMUTABLE]	= FS_IMMUTABLE_FL,
-+	[__BCH_INODE_APPEND]	= FS_APPEND_FL,
-+	[__BCH_INODE_NODUMP]	= FS_NODUMP_FL,
-+	[__BCH_INODE_NOATIME]	= FS_NOATIME_FL,
-+};
-+
-+/* bcachefs inode flags -> FS_IOC_FSGETXATTR: */
-+static const unsigned bch_flags_to_xflags[] = {
-+	[__BCH_INODE_SYNC]	= FS_XFLAG_SYNC,
-+	[__BCH_INODE_IMMUTABLE]	= FS_XFLAG_IMMUTABLE,
-+	[__BCH_INODE_APPEND]	= FS_XFLAG_APPEND,
-+	[__BCH_INODE_NODUMP]	= FS_XFLAG_NODUMP,
-+	[__BCH_INODE_NOATIME]	= FS_XFLAG_NOATIME,
-+	//[__BCH_INODE_PROJINHERIT] = FS_XFLAG_PROJINHERIT;
-+};
-+
-+#define set_flags(_map, _in, _out)					\
-+do {									\
-+	unsigned _i;							\
-+									\
-+	for (_i = 0; _i < ARRAY_SIZE(_map); _i++)			\
-+		if ((_in) & (1 << _i))					\
-+			(_out) |= _map[_i];				\
-+		else							\
-+			(_out) &= ~_map[_i];				\
-+} while (0)
-+
-+#define map_flags(_map, _in)						\
-+({									\
-+	unsigned _out = 0;						\
-+									\
-+	set_flags(_map, _in, _out);					\
-+	_out;								\
-+})
-+
-+#define map_flags_rev(_map, _in)					\
-+({									\
-+	unsigned _i, _out = 0;						\
-+									\
-+	for (_i = 0; _i < ARRAY_SIZE(_map); _i++)			\
-+		if ((_in) & _map[_i]) {					\
-+			(_out) |= 1 << _i;				\
-+			(_in) &= ~_map[_i];				\
-+		}							\
-+	(_out);								\
-+})
-+
-+#define map_defined(_map)						\
-+({									\
-+	unsigned _in = ~0;						\
-+									\
-+	map_flags_rev(_map, _in);					\
-+})
-+
-+/* Set VFS inode flags from bcachefs inode: */
-+static inline void bch2_inode_flags_to_vfs(struct bch_inode_info *inode)
-+{
-+	set_flags(bch_flags_to_vfs, inode->ei_inode.bi_flags, inode->v.i_flags);
-+}
-+
-+long bch2_fs_file_ioctl(struct file *, unsigned, unsigned long);
-+long bch2_compat_fs_ioctl(struct file *, unsigned, unsigned long);
-+
-+#endif /* _BCACHEFS_FS_IOCTL_H */
-diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
-new file mode 100644
-index 000000000000..a47923d67f7a
---- /dev/null
-+++ b/fs/bcachefs/fs.c
-@@ -0,0 +1,1605 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#ifndef NO_BCACHEFS_FS
-+
-+#include "bcachefs.h"
-+#include "acl.h"
-+#include "bkey_on_stack.h"
-+#include "btree_update.h"
-+#include "buckets.h"
-+#include "chardev.h"
-+#include "dirent.h"
-+#include "extents.h"
-+#include "fs.h"
-+#include "fs-common.h"
-+#include "fs-io.h"
-+#include "fs-ioctl.h"
-+#include "fsck.h"
-+#include "inode.h"
-+#include "io.h"
-+#include "journal.h"
-+#include "keylist.h"
-+#include "quota.h"
-+#include "super.h"
-+#include "xattr.h"
-+
-+#include <linux/aio.h>
-+#include <linux/backing-dev.h>
-+#include <linux/exportfs.h>
-+#include <linux/module.h>
-+#include <linux/posix_acl.h>
-+#include <linux/random.h>
-+#include <linux/statfs.h>
-+#include <linux/xattr.h>
-+
-+static struct kmem_cache *bch2_inode_cache;
-+
-+static void bch2_vfs_inode_init(struct bch_fs *,
-+				struct bch_inode_info *,
-+				struct bch_inode_unpacked *);
-+
-+static void journal_seq_copy(struct bch_inode_info *dst,
-+			     u64 journal_seq)
-+{
-+	u64 old, v = READ_ONCE(dst->ei_journal_seq);
-+
-+	do {
-+		old = v;
-+
-+		if (old >= journal_seq)
-+			break;
-+	} while ((v = cmpxchg(&dst->ei_journal_seq, old, journal_seq)) != old);
-+}
-+
-+static void __pagecache_lock_put(struct pagecache_lock *lock, long i)
-+{
-+	BUG_ON(atomic_long_read(&lock->v) == 0);
-+
-+	if (atomic_long_sub_return_release(i, &lock->v) == 0)
-+		wake_up_all(&lock->wait);
-+}
-+
-+static bool __pagecache_lock_tryget(struct pagecache_lock *lock, long i)
-+{
-+	long v = atomic_long_read(&lock->v), old;
-+
-+	do {
-+		old = v;
-+
-+		if (i > 0 ? v < 0 : v > 0)
-+			return false;
-+	} while ((v = atomic_long_cmpxchg_acquire(&lock->v,
-+					old, old + i)) != old);
-+	return true;
-+}
-+
-+static void __pagecache_lock_get(struct pagecache_lock *lock, long i)
-+{
-+	wait_event(lock->wait, __pagecache_lock_tryget(lock, i));
-+}
-+
-+void bch2_pagecache_add_put(struct pagecache_lock *lock)
-+{
-+	__pagecache_lock_put(lock, 1);
-+}
-+
-+void bch2_pagecache_add_get(struct pagecache_lock *lock)
-+{
-+	__pagecache_lock_get(lock, 1);
-+}
-+
-+void bch2_pagecache_block_put(struct pagecache_lock *lock)
-+{
-+	__pagecache_lock_put(lock, -1);
-+}
-+
-+void bch2_pagecache_block_get(struct pagecache_lock *lock)
-+{
-+	__pagecache_lock_get(lock, -1);
-+}
-+
-+void bch2_inode_update_after_write(struct bch_fs *c,
-+				   struct bch_inode_info *inode,
-+				   struct bch_inode_unpacked *bi,
-+				   unsigned fields)
-+{
-+	set_nlink(&inode->v, bch2_inode_nlink_get(bi));
-+	i_uid_write(&inode->v, bi->bi_uid);
-+	i_gid_write(&inode->v, bi->bi_gid);
-+	inode->v.i_mode	= bi->bi_mode;
-+
-+	if (fields & ATTR_ATIME)
-+		inode->v.i_atime = bch2_time_to_timespec(c, bi->bi_atime);
-+	if (fields & ATTR_MTIME)
-+		inode->v.i_mtime = bch2_time_to_timespec(c, bi->bi_mtime);
-+	if (fields & ATTR_CTIME)
-+		inode->v.i_ctime = bch2_time_to_timespec(c, bi->bi_ctime);
-+
-+	inode->ei_inode		= *bi;
-+
-+	bch2_inode_flags_to_vfs(inode);
-+}
-+
-+int __must_check bch2_write_inode(struct bch_fs *c,
-+				  struct bch_inode_info *inode,
-+				  inode_set_fn set,
-+				  void *p, unsigned fields)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bch_inode_unpacked inode_u;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+retry:
-+	bch2_trans_begin(&trans);
-+
-+	iter = bch2_inode_peek(&trans, &inode_u, inode->v.i_ino,
-+			       BTREE_ITER_INTENT);
-+	ret   = PTR_ERR_OR_ZERO(iter) ?:
-+		(set ? set(inode, &inode_u, p) : 0) ?:
-+		bch2_inode_write(&trans, iter, &inode_u) ?:
-+		bch2_trans_commit(&trans, NULL,
-+				  &inode->ei_journal_seq,
-+				  BTREE_INSERT_NOUNLOCK|
-+				  BTREE_INSERT_NOFAIL);
-+
-+	/*
-+	 * the btree node lock protects inode->ei_inode, not ei_update_lock;
-+	 * this is important for inode updates via bchfs_write_index_update
-+	 */
-+	if (!ret)
-+		bch2_inode_update_after_write(c, inode, &inode_u, fields);
-+
-+	bch2_trans_iter_put(&trans, iter);
-+
-+	if (ret == -EINTR)
-+		goto retry;
-+
-+	bch2_trans_exit(&trans);
-+	return ret < 0 ? ret : 0;
-+}
-+
-+int bch2_fs_quota_transfer(struct bch_fs *c,
-+			   struct bch_inode_info *inode,
-+			   struct bch_qid new_qid,
-+			   unsigned qtypes,
-+			   enum quota_acct_mode mode)
-+{
-+	unsigned i;
-+	int ret;
-+
-+	qtypes &= enabled_qtypes(c);
-+
-+	for (i = 0; i < QTYP_NR; i++)
-+		if (new_qid.q[i] == inode->ei_qid.q[i])
-+			qtypes &= ~(1U << i);
-+
-+	if (!qtypes)
-+		return 0;
-+
-+	mutex_lock(&inode->ei_quota_lock);
-+
-+	ret = bch2_quota_transfer(c, qtypes, new_qid,
-+				  inode->ei_qid,
-+				  inode->v.i_blocks +
-+				  inode->ei_quota_reserved,
-+				  mode);
-+	if (!ret)
-+		for (i = 0; i < QTYP_NR; i++)
-+			if (qtypes & (1 << i))
-+				inode->ei_qid.q[i] = new_qid.q[i];
-+
-+	mutex_unlock(&inode->ei_quota_lock);
-+
-+	return ret;
-+}
-+
-+struct inode *bch2_vfs_inode_get(struct bch_fs *c, u64 inum)
-+{
-+	struct bch_inode_unpacked inode_u;
-+	struct bch_inode_info *inode;
-+	int ret;
-+
-+	inode = to_bch_ei(iget_locked(c->vfs_sb, inum));
-+	if (unlikely(!inode))
-+		return ERR_PTR(-ENOMEM);
-+	if (!(inode->v.i_state & I_NEW))
-+		return &inode->v;
-+
-+	ret = bch2_inode_find_by_inum(c, inum, &inode_u);
-+	if (ret) {
-+		iget_failed(&inode->v);
-+		return ERR_PTR(ret);
-+	}
-+
-+	bch2_vfs_inode_init(c, inode, &inode_u);
-+
-+	inode->ei_journal_seq = bch2_inode_journal_seq(&c->journal, inum);
-+
-+	unlock_new_inode(&inode->v);
-+
-+	return &inode->v;
-+}
-+
-+static struct bch_inode_info *
-+__bch2_create(struct bch_inode_info *dir, struct dentry *dentry,
-+	      umode_t mode, dev_t rdev, bool tmpfile)
-+{
-+	struct bch_fs *c = dir->v.i_sb->s_fs_info;
-+	struct user_namespace *ns = dir->v.i_sb->s_user_ns;
-+	struct btree_trans trans;
-+	struct bch_inode_unpacked dir_u;
-+	struct bch_inode_info *inode, *old;
-+	struct bch_inode_unpacked inode_u;
-+	struct posix_acl *default_acl = NULL, *acl = NULL;
-+	u64 journal_seq = 0;
-+	int ret;
-+
-+	/*
-+	 * preallocate acls + vfs inode before btree transaction, so that
-+	 * nothing can fail after the transaction succeeds:
-+	 */
-+#ifdef CONFIG_BCACHEFS_POSIX_ACL
-+	ret = posix_acl_create(&dir->v, &mode, &default_acl, &acl);
-+	if (ret)
-+		return ERR_PTR(ret);
-+#endif
-+	inode = to_bch_ei(new_inode(c->vfs_sb));
-+	if (unlikely(!inode)) {
-+		inode = ERR_PTR(-ENOMEM);
-+		goto err;
-+	}
-+
-+	bch2_inode_init_early(c, &inode_u);
-+
-+	if (!tmpfile)
-+		mutex_lock(&dir->ei_update_lock);
-+
-+	bch2_trans_init(&trans, c, 8, 1024);
-+retry:
-+	bch2_trans_begin(&trans);
-+
-+	ret   = bch2_create_trans(&trans, dir->v.i_ino, &dir_u, &inode_u,
-+				  !tmpfile ? &dentry->d_name : NULL,
-+				  from_kuid(ns, current_fsuid()),
-+				  from_kgid(ns, current_fsgid()),
-+				  mode, rdev,
-+				  default_acl, acl) ?:
-+		bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1,
-+				KEY_TYPE_QUOTA_PREALLOC);
-+	if (unlikely(ret))
-+		goto err_before_quota;
-+
-+	ret   = bch2_trans_commit(&trans, NULL, &journal_seq,
-+				  BTREE_INSERT_NOUNLOCK);
-+	if (unlikely(ret)) {
-+		bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1,
-+				KEY_TYPE_QUOTA_WARN);
-+err_before_quota:
-+		if (ret == -EINTR)
-+			goto retry;
-+		goto err_trans;
-+	}
-+
-+	if (!tmpfile) {
-+		bch2_inode_update_after_write(c, dir, &dir_u,
-+					      ATTR_MTIME|ATTR_CTIME);
-+		journal_seq_copy(dir, journal_seq);
-+		mutex_unlock(&dir->ei_update_lock);
-+	}
-+
-+	bch2_vfs_inode_init(c, inode, &inode_u);
-+	journal_seq_copy(inode, journal_seq);
-+
-+	set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl);
-+	set_cached_acl(&inode->v, ACL_TYPE_DEFAULT, default_acl);
-+
-+	/*
-+	 * we must insert the new inode into the inode cache before calling
-+	 * bch2_trans_exit() and dropping locks, else we could race with another
-+	 * thread pulling the inode in and modifying it:
-+	 */
-+
-+	old = to_bch_ei(insert_inode_locked2(&inode->v));
-+	if (unlikely(old)) {
-+		/*
-+		 * We raced, another process pulled the new inode into cache
-+		 * before us:
-+		 */
-+		journal_seq_copy(old, journal_seq);
-+		make_bad_inode(&inode->v);
-+		iput(&inode->v);
-+
-+		inode = old;
-+	} else {
-+		/*
-+		 * we really don't want insert_inode_locked2() to be setting
-+		 * I_NEW...
-+		 */
-+		unlock_new_inode(&inode->v);
-+	}
-+
-+	bch2_trans_exit(&trans);
-+err:
-+	posix_acl_release(default_acl);
-+	posix_acl_release(acl);
-+	return inode;
-+err_trans:
-+	if (!tmpfile)
-+		mutex_unlock(&dir->ei_update_lock);
-+
-+	bch2_trans_exit(&trans);
-+	make_bad_inode(&inode->v);
-+	iput(&inode->v);
-+	inode = ERR_PTR(ret);
-+	goto err;
-+}
-+
-+/* methods */
-+
-+static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry,
-+				  unsigned int flags)
-+{
-+	struct bch_fs *c = vdir->i_sb->s_fs_info;
-+	struct bch_inode_info *dir = to_bch_ei(vdir);
-+	struct inode *vinode = NULL;
-+	u64 inum;
-+
-+	inum = bch2_dirent_lookup(c, dir->v.i_ino,
-+				  &dir->ei_str_hash,
-+				  &dentry->d_name);
-+
-+	if (inum)
-+		vinode = bch2_vfs_inode_get(c, inum);
-+
-+	return d_splice_alias(vinode, dentry);
-+}
-+
-+static int bch2_mknod(struct inode *vdir, struct dentry *dentry,
-+		      umode_t mode, dev_t rdev)
-+{
-+	struct bch_inode_info *inode =
-+		__bch2_create(to_bch_ei(vdir), dentry, mode, rdev, false);
-+
-+	if (IS_ERR(inode))
-+		return PTR_ERR(inode);
-+
-+	d_instantiate(dentry, &inode->v);
-+	return 0;
-+}
-+
-+static int bch2_create(struct inode *vdir, struct dentry *dentry,
-+		       umode_t mode, bool excl)
-+{
-+	return bch2_mknod(vdir, dentry, mode|S_IFREG, 0);
-+}
-+
-+static int __bch2_link(struct bch_fs *c,
-+		       struct bch_inode_info *inode,
-+		       struct bch_inode_info *dir,
-+		       struct dentry *dentry)
-+{
-+	struct btree_trans trans;
-+	struct bch_inode_unpacked dir_u, inode_u;
-+	int ret;
-+
-+	mutex_lock(&inode->ei_update_lock);
-+	bch2_trans_init(&trans, c, 4, 1024);
-+
-+	do {
-+		bch2_trans_begin(&trans);
-+		ret   = bch2_link_trans(&trans,
-+					dir->v.i_ino,
-+					inode->v.i_ino, &dir_u, &inode_u,
-+					&dentry->d_name) ?:
-+			bch2_trans_commit(&trans, NULL,
-+					&inode->ei_journal_seq,
-+					BTREE_INSERT_NOUNLOCK);
-+	} while (ret == -EINTR);
-+
-+	if (likely(!ret)) {
-+		BUG_ON(inode_u.bi_inum != inode->v.i_ino);
-+
-+		journal_seq_copy(inode, dir->ei_journal_seq);
-+		bch2_inode_update_after_write(c, dir, &dir_u,
-+					      ATTR_MTIME|ATTR_CTIME);
-+		bch2_inode_update_after_write(c, inode, &inode_u, ATTR_CTIME);
-+	}
-+
-+	bch2_trans_exit(&trans);
-+	mutex_unlock(&inode->ei_update_lock);
-+	return ret;
-+}
-+
-+static int bch2_link(struct dentry *old_dentry, struct inode *vdir,
-+		     struct dentry *dentry)
-+{
-+	struct bch_fs *c = vdir->i_sb->s_fs_info;
-+	struct bch_inode_info *dir = to_bch_ei(vdir);
-+	struct bch_inode_info *inode = to_bch_ei(old_dentry->d_inode);
-+	int ret;
-+
-+	lockdep_assert_held(&inode->v.i_rwsem);
-+
-+	ret = __bch2_link(c, inode, dir, dentry);
-+	if (unlikely(ret))
-+		return ret;
-+
-+	ihold(&inode->v);
-+	d_instantiate(dentry, &inode->v);
-+	return 0;
-+}
-+
-+static int bch2_unlink(struct inode *vdir, struct dentry *dentry)
-+{
-+	struct bch_fs *c = vdir->i_sb->s_fs_info;
-+	struct bch_inode_info *dir = to_bch_ei(vdir);
-+	struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
-+	struct bch_inode_unpacked dir_u, inode_u;
-+	struct btree_trans trans;
-+	int ret;
-+
-+	bch2_lock_inodes(INODE_UPDATE_LOCK, dir, inode);
-+	bch2_trans_init(&trans, c, 4, 1024);
-+
-+	do {
-+		bch2_trans_begin(&trans);
-+
-+		ret   = bch2_unlink_trans(&trans,
-+					  dir->v.i_ino, &dir_u,
-+					  &inode_u, &dentry->d_name) ?:
-+			bch2_trans_commit(&trans, NULL,
-+					  &dir->ei_journal_seq,
-+					  BTREE_INSERT_NOUNLOCK|
-+					  BTREE_INSERT_NOFAIL);
-+	} while (ret == -EINTR);
-+
-+	if (likely(!ret)) {
-+		BUG_ON(inode_u.bi_inum != inode->v.i_ino);
-+
-+		journal_seq_copy(inode, dir->ei_journal_seq);
-+		bch2_inode_update_after_write(c, dir, &dir_u,
-+					      ATTR_MTIME|ATTR_CTIME);
-+		bch2_inode_update_after_write(c, inode, &inode_u,
-+					      ATTR_MTIME);
-+	}
-+
-+	bch2_trans_exit(&trans);
-+	bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode);
-+
-+	return ret;
-+}
-+
-+static int bch2_symlink(struct inode *vdir, struct dentry *dentry,
-+			const char *symname)
-+{
-+	struct bch_fs *c = vdir->i_sb->s_fs_info;
-+	struct bch_inode_info *dir = to_bch_ei(vdir), *inode;
-+	int ret;
-+
-+	inode = __bch2_create(dir, dentry, S_IFLNK|S_IRWXUGO, 0, true);
-+	if (unlikely(IS_ERR(inode)))
-+		return PTR_ERR(inode);
-+
-+	inode_lock(&inode->v);
-+	ret = page_symlink(&inode->v, symname, strlen(symname) + 1);
-+	inode_unlock(&inode->v);
-+
-+	if (unlikely(ret))
-+		goto err;
-+
-+	ret = filemap_write_and_wait_range(inode->v.i_mapping, 0, LLONG_MAX);
-+	if (unlikely(ret))
-+		goto err;
-+
-+	journal_seq_copy(dir, inode->ei_journal_seq);
-+
-+	ret = __bch2_link(c, inode, dir, dentry);
-+	if (unlikely(ret))
-+		goto err;
-+
-+	d_instantiate(dentry, &inode->v);
-+	return 0;
-+err:
-+	iput(&inode->v);
-+	return ret;
-+}
-+
-+static int bch2_mkdir(struct inode *vdir, struct dentry *dentry, umode_t mode)
-+{
-+	return bch2_mknod(vdir, dentry, mode|S_IFDIR, 0);
-+}
-+
-+static int bch2_rename2(struct inode *src_vdir, struct dentry *src_dentry,
-+			struct inode *dst_vdir, struct dentry *dst_dentry,
-+			unsigned flags)
-+{
-+	struct bch_fs *c = src_vdir->i_sb->s_fs_info;
-+	struct bch_inode_info *src_dir = to_bch_ei(src_vdir);
-+	struct bch_inode_info *dst_dir = to_bch_ei(dst_vdir);
-+	struct bch_inode_info *src_inode = to_bch_ei(src_dentry->d_inode);
-+	struct bch_inode_info *dst_inode = to_bch_ei(dst_dentry->d_inode);
-+	struct bch_inode_unpacked dst_dir_u, src_dir_u;
-+	struct bch_inode_unpacked src_inode_u, dst_inode_u;
-+	struct btree_trans trans;
-+	enum bch_rename_mode mode = flags & RENAME_EXCHANGE
-+		? BCH_RENAME_EXCHANGE
-+		: dst_dentry->d_inode
-+		? BCH_RENAME_OVERWRITE : BCH_RENAME;
-+	u64 journal_seq = 0;
-+	int ret;
-+
-+	if (flags & ~(RENAME_NOREPLACE|RENAME_EXCHANGE))
-+		return -EINVAL;
-+
-+	if (mode == BCH_RENAME_OVERWRITE) {
-+		ret = filemap_write_and_wait_range(src_inode->v.i_mapping,
-+						   0, LLONG_MAX);
-+		if (ret)
-+			return ret;
-+	}
-+
-+	bch2_trans_init(&trans, c, 8, 2048);
-+
-+	bch2_lock_inodes(INODE_UPDATE_LOCK,
-+			 src_dir,
-+			 dst_dir,
-+			 src_inode,
-+			 dst_inode);
-+
-+	if (inode_attr_changing(dst_dir, src_inode, Inode_opt_project)) {
-+		ret = bch2_fs_quota_transfer(c, src_inode,
-+					     dst_dir->ei_qid,
-+					     1 << QTYP_PRJ,
-+					     KEY_TYPE_QUOTA_PREALLOC);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	if (mode == BCH_RENAME_EXCHANGE &&
-+	    inode_attr_changing(src_dir, dst_inode, Inode_opt_project)) {
-+		ret = bch2_fs_quota_transfer(c, dst_inode,
-+					     src_dir->ei_qid,
-+					     1 << QTYP_PRJ,
-+					     KEY_TYPE_QUOTA_PREALLOC);
-+		if (ret)
-+			goto err;
-+	}
-+
-+retry:
-+	bch2_trans_begin(&trans);
-+	ret   = bch2_rename_trans(&trans,
-+				  src_dir->v.i_ino, &src_dir_u,
-+				  dst_dir->v.i_ino, &dst_dir_u,
-+				  &src_inode_u,
-+				  &dst_inode_u,
-+				  &src_dentry->d_name,
-+				  &dst_dentry->d_name,
-+				  mode) ?:
-+		bch2_trans_commit(&trans, NULL,
-+				  &journal_seq,
-+				  BTREE_INSERT_NOUNLOCK);
-+	if (ret == -EINTR)
-+		goto retry;
-+	if (unlikely(ret))
-+		goto err;
-+
-+	BUG_ON(src_inode->v.i_ino != src_inode_u.bi_inum);
-+	BUG_ON(dst_inode &&
-+	       dst_inode->v.i_ino != dst_inode_u.bi_inum);
-+
-+	bch2_inode_update_after_write(c, src_dir, &src_dir_u,
-+				      ATTR_MTIME|ATTR_CTIME);
-+	journal_seq_copy(src_dir, journal_seq);
-+
-+	if (src_dir != dst_dir) {
-+		bch2_inode_update_after_write(c, dst_dir, &dst_dir_u,
-+					      ATTR_MTIME|ATTR_CTIME);
-+		journal_seq_copy(dst_dir, journal_seq);
-+	}
-+
-+	bch2_inode_update_after_write(c, src_inode, &src_inode_u,
-+				      ATTR_CTIME);
-+	journal_seq_copy(src_inode, journal_seq);
-+
-+	if (dst_inode) {
-+		bch2_inode_update_after_write(c, dst_inode, &dst_inode_u,
-+					      ATTR_CTIME);
-+		journal_seq_copy(dst_inode, journal_seq);
-+	}
-+err:
-+	bch2_trans_exit(&trans);
-+
-+	bch2_fs_quota_transfer(c, src_inode,
-+			       bch_qid(&src_inode->ei_inode),
-+			       1 << QTYP_PRJ,
-+			       KEY_TYPE_QUOTA_NOCHECK);
-+	if (dst_inode)
-+		bch2_fs_quota_transfer(c, dst_inode,
-+				       bch_qid(&dst_inode->ei_inode),
-+				       1 << QTYP_PRJ,
-+				       KEY_TYPE_QUOTA_NOCHECK);
-+
-+	bch2_unlock_inodes(INODE_UPDATE_LOCK,
-+			   src_dir,
-+			   dst_dir,
-+			   src_inode,
-+			   dst_inode);
-+
-+	return ret;
-+}
-+
-+void bch2_setattr_copy(struct bch_inode_info *inode,
-+		       struct bch_inode_unpacked *bi,
-+		       struct iattr *attr)
-+{
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	unsigned int ia_valid = attr->ia_valid;
-+
-+	if (ia_valid & ATTR_UID)
-+		bi->bi_uid = from_kuid(c->vfs_sb->s_user_ns, attr->ia_uid);
-+	if (ia_valid & ATTR_GID)
-+		bi->bi_gid = from_kgid(c->vfs_sb->s_user_ns, attr->ia_gid);
-+
-+	if (ia_valid & ATTR_ATIME)
-+		bi->bi_atime = timespec_to_bch2_time(c, attr->ia_atime);
-+	if (ia_valid & ATTR_MTIME)
-+		bi->bi_mtime = timespec_to_bch2_time(c, attr->ia_mtime);
-+	if (ia_valid & ATTR_CTIME)
-+		bi->bi_ctime = timespec_to_bch2_time(c, attr->ia_ctime);
-+
-+	if (ia_valid & ATTR_MODE) {
-+		umode_t mode = attr->ia_mode;
-+		kgid_t gid = ia_valid & ATTR_GID
-+			? attr->ia_gid
-+			: inode->v.i_gid;
-+
-+		if (!in_group_p(gid) &&
-+		    !capable_wrt_inode_uidgid(&inode->v, CAP_FSETID))
-+			mode &= ~S_ISGID;
-+		bi->bi_mode = mode;
-+	}
-+}
-+
-+static int bch2_setattr_nonsize(struct bch_inode_info *inode,
-+				struct iattr *attr)
-+{
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct bch_qid qid;
-+	struct btree_trans trans;
-+	struct btree_iter *inode_iter;
-+	struct bch_inode_unpacked inode_u;
-+	struct posix_acl *acl = NULL;
-+	int ret;
-+
-+	mutex_lock(&inode->ei_update_lock);
-+
-+	qid = inode->ei_qid;
-+
-+	if (attr->ia_valid & ATTR_UID)
-+		qid.q[QTYP_USR] = from_kuid(&init_user_ns, attr->ia_uid);
-+
-+	if (attr->ia_valid & ATTR_GID)
-+		qid.q[QTYP_GRP] = from_kgid(&init_user_ns, attr->ia_gid);
-+
-+	ret = bch2_fs_quota_transfer(c, inode, qid, ~0,
-+				     KEY_TYPE_QUOTA_PREALLOC);
-+	if (ret)
-+		goto err;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+retry:
-+	bch2_trans_begin(&trans);
-+	kfree(acl);
-+	acl = NULL;
-+
-+	inode_iter = bch2_inode_peek(&trans, &inode_u, inode->v.i_ino,
-+				     BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(inode_iter);
-+	if (ret)
-+		goto btree_err;
-+
-+	bch2_setattr_copy(inode, &inode_u, attr);
-+
-+	if (attr->ia_valid & ATTR_MODE) {
-+		ret = bch2_acl_chmod(&trans, inode, inode_u.bi_mode, &acl);
-+		if (ret)
-+			goto btree_err;
-+	}
-+
-+	ret =   bch2_inode_write(&trans, inode_iter, &inode_u) ?:
-+		bch2_trans_commit(&trans, NULL,
-+				  &inode->ei_journal_seq,
-+				  BTREE_INSERT_NOUNLOCK|
-+				  BTREE_INSERT_NOFAIL);
-+btree_err:
-+	if (ret == -EINTR)
-+		goto retry;
-+	if (unlikely(ret))
-+		goto err_trans;
-+
-+	bch2_inode_update_after_write(c, inode, &inode_u, attr->ia_valid);
-+
-+	if (acl)
-+		set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl);
-+err_trans:
-+	bch2_trans_exit(&trans);
-+err:
-+	mutex_unlock(&inode->ei_update_lock);
-+
-+	return ret;
-+}
-+
-+static int bch2_getattr(const struct path *path, struct kstat *stat,
-+			u32 request_mask, unsigned query_flags)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(d_inode(path->dentry));
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+
-+	stat->dev	= inode->v.i_sb->s_dev;
-+	stat->ino	= inode->v.i_ino;
-+	stat->mode	= inode->v.i_mode;
-+	stat->nlink	= inode->v.i_nlink;
-+	stat->uid	= inode->v.i_uid;
-+	stat->gid	= inode->v.i_gid;
-+	stat->rdev	= inode->v.i_rdev;
-+	stat->size	= i_size_read(&inode->v);
-+	stat->atime	= inode->v.i_atime;
-+	stat->mtime	= inode->v.i_mtime;
-+	stat->ctime	= inode->v.i_ctime;
-+	stat->blksize	= block_bytes(c);
-+	stat->blocks	= inode->v.i_blocks;
-+
-+	if (request_mask & STATX_BTIME) {
-+		stat->result_mask |= STATX_BTIME;
-+		stat->btime = bch2_time_to_timespec(c, inode->ei_inode.bi_otime);
-+	}
-+
-+	if (inode->ei_inode.bi_flags & BCH_INODE_IMMUTABLE)
-+		stat->attributes |= STATX_ATTR_IMMUTABLE;
-+	stat->attributes_mask	 |= STATX_ATTR_IMMUTABLE;
-+
-+	if (inode->ei_inode.bi_flags & BCH_INODE_APPEND)
-+		stat->attributes |= STATX_ATTR_APPEND;
-+	stat->attributes_mask	 |= STATX_ATTR_APPEND;
-+
-+	if (inode->ei_inode.bi_flags & BCH_INODE_NODUMP)
-+		stat->attributes |= STATX_ATTR_NODUMP;
-+	stat->attributes_mask	 |= STATX_ATTR_NODUMP;
-+
-+	return 0;
-+}
-+
-+static int bch2_setattr(struct dentry *dentry, struct iattr *iattr)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
-+	int ret;
-+
-+	lockdep_assert_held(&inode->v.i_rwsem);
-+
-+	ret = setattr_prepare(dentry, iattr);
-+	if (ret)
-+		return ret;
-+
-+	return iattr->ia_valid & ATTR_SIZE
-+		? bch2_truncate(inode, iattr)
-+		: bch2_setattr_nonsize(inode, iattr);
-+}
-+
-+static int bch2_tmpfile(struct inode *vdir, struct dentry *dentry, umode_t mode)
-+{
-+	struct bch_inode_info *inode =
-+		__bch2_create(to_bch_ei(vdir), dentry, mode, 0, true);
-+
-+	if (IS_ERR(inode))
-+		return PTR_ERR(inode);
-+
-+	d_mark_tmpfile(dentry, &inode->v);
-+	d_instantiate(dentry, &inode->v);
-+	return 0;
-+}
-+
-+static int bch2_fill_extent(struct bch_fs *c,
-+			    struct fiemap_extent_info *info,
-+			    struct bkey_s_c k, unsigned flags)
-+{
-+	if (bkey_extent_is_data(k.k)) {
-+		struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+		const union bch_extent_entry *entry;
-+		struct extent_ptr_decoded p;
-+		int ret;
-+
-+		if (k.k->type == KEY_TYPE_reflink_v)
-+			flags |= FIEMAP_EXTENT_SHARED;
-+
-+		bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
-+			int flags2 = 0;
-+			u64 offset = p.ptr.offset;
-+
-+			if (p.crc.compression_type)
-+				flags2 |= FIEMAP_EXTENT_ENCODED;
-+			else
-+				offset += p.crc.offset;
-+
-+			if ((offset & (c->opts.block_size - 1)) ||
-+			    (k.k->size & (c->opts.block_size - 1)))
-+				flags2 |= FIEMAP_EXTENT_NOT_ALIGNED;
-+
-+			ret = fiemap_fill_next_extent(info,
-+						bkey_start_offset(k.k) << 9,
-+						offset << 9,
-+						k.k->size << 9, flags|flags2);
-+			if (ret)
-+				return ret;
-+		}
-+
-+		return 0;
-+	} else if (k.k->type == KEY_TYPE_reservation) {
-+		return fiemap_fill_next_extent(info,
-+					       bkey_start_offset(k.k) << 9,
-+					       0, k.k->size << 9,
-+					       flags|
-+					       FIEMAP_EXTENT_DELALLOC|
-+					       FIEMAP_EXTENT_UNWRITTEN);
-+	} else {
-+		BUG();
-+	}
-+}
-+
-+static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
-+		       u64 start, u64 len)
-+{
-+	struct bch_fs *c = vinode->i_sb->s_fs_info;
-+	struct bch_inode_info *ei = to_bch_ei(vinode);
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct bkey_on_stack cur, prev;
-+	struct bpos end = POS(ei->v.i_ino, (start + len) >> 9);
-+	unsigned offset_into_extent, sectors;
-+	bool have_extent = false;
-+	int ret = 0;
-+
-+	if (start + len < start)
-+		return -EINVAL;
-+
-+	bkey_on_stack_init(&cur);
-+	bkey_on_stack_init(&prev);
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
-+				   POS(ei->v.i_ino, start >> 9), 0);
-+retry:
-+	while ((k = bch2_btree_iter_peek(iter)).k &&
-+	       !(ret = bkey_err(k)) &&
-+	       bkey_cmp(iter->pos, end) < 0) {
-+		if (!bkey_extent_is_data(k.k) &&
-+		    k.k->type != KEY_TYPE_reservation) {
-+			bch2_btree_iter_next(iter);
-+			continue;
-+		}
-+
-+		bkey_on_stack_realloc(&cur, c, k.k->u64s);
-+		bkey_on_stack_realloc(&prev, c, k.k->u64s);
-+		bkey_reassemble(cur.k, k);
-+		k = bkey_i_to_s_c(cur.k);
-+
-+		offset_into_extent	= iter->pos.offset -
-+			bkey_start_offset(k.k);
-+		sectors			= k.k->size - offset_into_extent;
-+
-+		ret = bch2_read_indirect_extent(&trans,
-+					&offset_into_extent, &cur);
-+		if (ret)
-+			break;
-+
-+		sectors = min(sectors, k.k->size - offset_into_extent);
-+
-+		if (offset_into_extent)
-+			bch2_cut_front(POS(k.k->p.inode,
-+					   bkey_start_offset(k.k) +
-+					   offset_into_extent),
-+				       cur.k);
-+		bch2_key_resize(&cur.k->k, sectors);
-+		cur.k->k.p = iter->pos;
-+		cur.k->k.p.offset += cur.k->k.size;
-+
-+		if (have_extent) {
-+			ret = bch2_fill_extent(c, info,
-+					bkey_i_to_s_c(prev.k), 0);
-+			if (ret)
-+				break;
-+		}
-+
-+		bkey_copy(prev.k, cur.k);
-+		have_extent = true;
-+
-+		if (k.k->type == KEY_TYPE_reflink_v)
-+			bch2_btree_iter_set_pos(iter, k.k->p);
-+		else
-+			bch2_btree_iter_next(iter);
-+	}
-+
-+	if (ret == -EINTR)
-+		goto retry;
-+
-+	if (!ret && have_extent)
-+		ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k),
-+				       FIEMAP_EXTENT_LAST);
-+
-+	ret = bch2_trans_exit(&trans) ?: ret;
-+	bkey_on_stack_exit(&cur, c);
-+	bkey_on_stack_exit(&prev, c);
-+	return ret < 0 ? ret : 0;
-+}
-+
-+static const struct vm_operations_struct bch_vm_ops = {
-+	.fault		= bch2_page_fault,
-+	.map_pages	= filemap_map_pages,
-+	.page_mkwrite   = bch2_page_mkwrite,
-+};
-+
-+static int bch2_mmap(struct file *file, struct vm_area_struct *vma)
-+{
-+	file_accessed(file);
-+
-+	vma->vm_ops = &bch_vm_ops;
-+	return 0;
-+}
-+
-+/* Directories: */
-+
-+static loff_t bch2_dir_llseek(struct file *file, loff_t offset, int whence)
-+{
-+	return generic_file_llseek_size(file, offset, whence,
-+					S64_MAX, S64_MAX);
-+}
-+
-+static int bch2_vfs_readdir(struct file *file, struct dir_context *ctx)
-+{
-+	struct bch_inode_info *inode = file_bch_inode(file);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+
-+	if (!dir_emit_dots(file, ctx))
-+		return 0;
-+
-+	return bch2_readdir(c, inode->v.i_ino, ctx);
-+}
-+
-+static const struct file_operations bch_file_operations = {
-+	.llseek		= bch2_llseek,
-+	.read_iter	= bch2_read_iter,
-+	.write_iter	= bch2_write_iter,
-+	.mmap		= bch2_mmap,
-+	.open		= generic_file_open,
-+	.fsync		= bch2_fsync,
-+	.splice_read	= generic_file_splice_read,
-+	/*
-+	 * Broken, on v5.3:
-+	.splice_write	= iter_file_splice_write,
-+	*/
-+	.fallocate	= bch2_fallocate_dispatch,
-+	.unlocked_ioctl = bch2_fs_file_ioctl,
-+#ifdef CONFIG_COMPAT
-+	.compat_ioctl	= bch2_compat_fs_ioctl,
-+#endif
-+	.remap_file_range = bch2_remap_file_range,
-+};
-+
-+static const struct inode_operations bch_file_inode_operations = {
-+	.getattr	= bch2_getattr,
-+	.setattr	= bch2_setattr,
-+	.fiemap		= bch2_fiemap,
-+	.listxattr	= bch2_xattr_list,
-+#ifdef CONFIG_BCACHEFS_POSIX_ACL
-+	.get_acl	= bch2_get_acl,
-+	.set_acl	= bch2_set_acl,
-+#endif
-+};
-+
-+static const struct inode_operations bch_dir_inode_operations = {
-+	.lookup		= bch2_lookup,
-+	.create		= bch2_create,
-+	.link		= bch2_link,
-+	.unlink		= bch2_unlink,
-+	.symlink	= bch2_symlink,
-+	.mkdir		= bch2_mkdir,
-+	.rmdir		= bch2_unlink,
-+	.mknod		= bch2_mknod,
-+	.rename		= bch2_rename2,
-+	.getattr	= bch2_getattr,
-+	.setattr	= bch2_setattr,
-+	.tmpfile	= bch2_tmpfile,
-+	.listxattr	= bch2_xattr_list,
-+#ifdef CONFIG_BCACHEFS_POSIX_ACL
-+	.get_acl	= bch2_get_acl,
-+	.set_acl	= bch2_set_acl,
-+#endif
-+};
-+
-+static const struct file_operations bch_dir_file_operations = {
-+	.llseek		= bch2_dir_llseek,
-+	.read		= generic_read_dir,
-+	.iterate_shared	= bch2_vfs_readdir,
-+	.fsync		= bch2_fsync,
-+	.unlocked_ioctl = bch2_fs_file_ioctl,
-+#ifdef CONFIG_COMPAT
-+	.compat_ioctl	= bch2_compat_fs_ioctl,
-+#endif
-+};
-+
-+static const struct inode_operations bch_symlink_inode_operations = {
-+	.get_link	= page_get_link,
-+	.getattr	= bch2_getattr,
-+	.setattr	= bch2_setattr,
-+	.listxattr	= bch2_xattr_list,
-+#ifdef CONFIG_BCACHEFS_POSIX_ACL
-+	.get_acl	= bch2_get_acl,
-+	.set_acl	= bch2_set_acl,
-+#endif
-+};
-+
-+static const struct inode_operations bch_special_inode_operations = {
-+	.getattr	= bch2_getattr,
-+	.setattr	= bch2_setattr,
-+	.listxattr	= bch2_xattr_list,
-+#ifdef CONFIG_BCACHEFS_POSIX_ACL
-+	.get_acl	= bch2_get_acl,
-+	.set_acl	= bch2_set_acl,
-+#endif
-+};
-+
-+static const struct address_space_operations bch_address_space_operations = {
-+	.writepage	= bch2_writepage,
-+	.readpage	= bch2_readpage,
-+	.writepages	= bch2_writepages,
-+	.readpages	= bch2_readpages,
-+	.set_page_dirty	= __set_page_dirty_nobuffers,
-+	.write_begin	= bch2_write_begin,
-+	.write_end	= bch2_write_end,
-+	.invalidatepage	= bch2_invalidatepage,
-+	.releasepage	= bch2_releasepage,
-+	.direct_IO	= noop_direct_IO,
-+#ifdef CONFIG_MIGRATION
-+	.migratepage	= bch2_migrate_page,
-+#endif
-+	.error_remove_page = generic_error_remove_page,
-+};
-+
-+static struct inode *bch2_nfs_get_inode(struct super_block *sb,
-+		u64 ino, u32 generation)
-+{
-+	struct bch_fs *c = sb->s_fs_info;
-+	struct inode *vinode;
-+
-+	if (ino < BCACHEFS_ROOT_INO)
-+		return ERR_PTR(-ESTALE);
-+
-+	vinode = bch2_vfs_inode_get(c, ino);
-+	if (IS_ERR(vinode))
-+		return ERR_CAST(vinode);
-+	if (generation && vinode->i_generation != generation) {
-+		/* we didn't find the right inode.. */
-+		iput(vinode);
-+		return ERR_PTR(-ESTALE);
-+	}
-+	return vinode;
-+}
-+
-+static struct dentry *bch2_fh_to_dentry(struct super_block *sb, struct fid *fid,
-+		int fh_len, int fh_type)
-+{
-+	return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
-+				    bch2_nfs_get_inode);
-+}
-+
-+static struct dentry *bch2_fh_to_parent(struct super_block *sb, struct fid *fid,
-+		int fh_len, int fh_type)
-+{
-+	return generic_fh_to_parent(sb, fid, fh_len, fh_type,
-+				    bch2_nfs_get_inode);
-+}
-+
-+static const struct export_operations bch_export_ops = {
-+	.fh_to_dentry	= bch2_fh_to_dentry,
-+	.fh_to_parent	= bch2_fh_to_parent,
-+	//.get_parent	= bch2_get_parent,
-+};
-+
-+static void bch2_vfs_inode_init(struct bch_fs *c,
-+				struct bch_inode_info *inode,
-+				struct bch_inode_unpacked *bi)
-+{
-+	bch2_inode_update_after_write(c, inode, bi, ~0);
-+
-+	inode->v.i_blocks	= bi->bi_sectors;
-+	inode->v.i_ino		= bi->bi_inum;
-+	inode->v.i_rdev		= bi->bi_dev;
-+	inode->v.i_generation	= bi->bi_generation;
-+	inode->v.i_size		= bi->bi_size;
-+
-+	inode->ei_journal_seq	= 0;
-+	inode->ei_quota_reserved = 0;
-+	inode->ei_str_hash	= bch2_hash_info_init(c, bi);
-+	inode->ei_qid		= bch_qid(bi);
-+
-+	inode->v.i_mapping->a_ops = &bch_address_space_operations;
-+
-+	switch (inode->v.i_mode & S_IFMT) {
-+	case S_IFREG:
-+		inode->v.i_op	= &bch_file_inode_operations;
-+		inode->v.i_fop	= &bch_file_operations;
-+		break;
-+	case S_IFDIR:
-+		inode->v.i_op	= &bch_dir_inode_operations;
-+		inode->v.i_fop	= &bch_dir_file_operations;
-+		break;
-+	case S_IFLNK:
-+		inode_nohighmem(&inode->v);
-+		inode->v.i_op	= &bch_symlink_inode_operations;
-+		break;
-+	default:
-+		init_special_inode(&inode->v, inode->v.i_mode, inode->v.i_rdev);
-+		inode->v.i_op	= &bch_special_inode_operations;
-+		break;
-+	}
-+}
-+
-+static struct inode *bch2_alloc_inode(struct super_block *sb)
-+{
-+	struct bch_inode_info *inode;
-+
-+	inode = kmem_cache_alloc(bch2_inode_cache, GFP_NOFS);
-+	if (!inode)
-+		return NULL;
-+
-+	inode_init_once(&inode->v);
-+	mutex_init(&inode->ei_update_lock);
-+	pagecache_lock_init(&inode->ei_pagecache_lock);
-+	mutex_init(&inode->ei_quota_lock);
-+	inode->ei_journal_seq = 0;
-+
-+	return &inode->v;
-+}
-+
-+static void bch2_i_callback(struct rcu_head *head)
-+{
-+	struct inode *vinode = container_of(head, struct inode, i_rcu);
-+	struct bch_inode_info *inode = to_bch_ei(vinode);
-+
-+	kmem_cache_free(bch2_inode_cache, inode);
-+}
-+
-+static void bch2_destroy_inode(struct inode *vinode)
-+{
-+	call_rcu(&vinode->i_rcu, bch2_i_callback);
-+}
-+
-+static int inode_update_times_fn(struct bch_inode_info *inode,
-+				 struct bch_inode_unpacked *bi,
-+				 void *p)
-+{
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+
-+	bi->bi_atime	= timespec_to_bch2_time(c, inode->v.i_atime);
-+	bi->bi_mtime	= timespec_to_bch2_time(c, inode->v.i_mtime);
-+	bi->bi_ctime	= timespec_to_bch2_time(c, inode->v.i_ctime);
-+
-+	return 0;
-+}
-+
-+static int bch2_vfs_write_inode(struct inode *vinode,
-+				struct writeback_control *wbc)
-+{
-+	struct bch_fs *c = vinode->i_sb->s_fs_info;
-+	struct bch_inode_info *inode = to_bch_ei(vinode);
-+	int ret;
-+
-+	mutex_lock(&inode->ei_update_lock);
-+	ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL,
-+			       ATTR_ATIME|ATTR_MTIME|ATTR_CTIME);
-+	mutex_unlock(&inode->ei_update_lock);
-+
-+	return ret;
-+}
-+
-+static void bch2_evict_inode(struct inode *vinode)
-+{
-+	struct bch_fs *c = vinode->i_sb->s_fs_info;
-+	struct bch_inode_info *inode = to_bch_ei(vinode);
-+
-+	truncate_inode_pages_final(&inode->v.i_data);
-+
-+	clear_inode(&inode->v);
-+
-+	BUG_ON(!is_bad_inode(&inode->v) && inode->ei_quota_reserved);
-+
-+	if (!inode->v.i_nlink && !is_bad_inode(&inode->v)) {
-+		bch2_quota_acct(c, inode->ei_qid, Q_SPC, -((s64) inode->v.i_blocks),
-+				KEY_TYPE_QUOTA_WARN);
-+		bch2_quota_acct(c, inode->ei_qid, Q_INO, -1,
-+				KEY_TYPE_QUOTA_WARN);
-+		bch2_inode_rm(c, inode->v.i_ino);
-+	}
-+}
-+
-+static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf)
-+{
-+	struct super_block *sb = dentry->d_sb;
-+	struct bch_fs *c = sb->s_fs_info;
-+	struct bch_fs_usage_short usage = bch2_fs_usage_read_short(c);
-+	unsigned shift = sb->s_blocksize_bits - 9;
-+	u64 fsid;
-+
-+	buf->f_type	= BCACHEFS_STATFS_MAGIC;
-+	buf->f_bsize	= sb->s_blocksize;
-+	buf->f_blocks	= usage.capacity >> shift;
-+	buf->f_bfree	= (usage.capacity - usage.used) >> shift;
-+	buf->f_bavail	= buf->f_bfree;
-+	buf->f_files	= usage.nr_inodes;
-+	buf->f_ffree	= U64_MAX;
-+
-+	fsid = le64_to_cpup((void *) c->sb.user_uuid.b) ^
-+	       le64_to_cpup((void *) c->sb.user_uuid.b + sizeof(u64));
-+	buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
-+	buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
-+	buf->f_namelen	= BCH_NAME_MAX;
-+
-+	return 0;
-+}
-+
-+static int bch2_sync_fs(struct super_block *sb, int wait)
-+{
-+	struct bch_fs *c = sb->s_fs_info;
-+
-+	if (c->opts.journal_flush_disabled)
-+		return 0;
-+
-+	if (!wait) {
-+		bch2_journal_flush_async(&c->journal, NULL);
-+		return 0;
-+	}
-+
-+	return bch2_journal_flush(&c->journal);
-+}
-+
-+static struct bch_fs *bch2_path_to_fs(const char *dev)
-+{
-+	struct bch_fs *c;
-+	struct block_device *bdev = lookup_bdev(dev);
-+
-+	if (IS_ERR(bdev))
-+		return ERR_CAST(bdev);
-+
-+	c = bch2_bdev_to_fs(bdev);
-+	bdput(bdev);
-+	return c ?: ERR_PTR(-ENOENT);
-+}
-+
-+static struct bch_fs *__bch2_open_as_blockdevs(const char *dev_name, char * const *devs,
-+					       unsigned nr_devs, struct bch_opts opts)
-+{
-+	struct bch_fs *c, *c1, *c2;
-+	size_t i;
-+
-+	if (!nr_devs)
-+		return ERR_PTR(-EINVAL);
-+
-+	c = bch2_fs_open(devs, nr_devs, opts);
-+
-+	if (IS_ERR(c) && PTR_ERR(c) == -EBUSY) {
-+		/*
-+		 * Already open?
-+		 * Look up each block device, make sure they all belong to a
-+		 * filesystem and they all belong to the _same_ filesystem
-+		 */
-+
-+		c1 = bch2_path_to_fs(devs[0]);
-+		if (IS_ERR(c1))
-+			return c;
-+
-+		for (i = 1; i < nr_devs; i++) {
-+			c2 = bch2_path_to_fs(devs[i]);
-+			if (!IS_ERR(c2))
-+				closure_put(&c2->cl);
-+
-+			if (c1 != c2) {
-+				closure_put(&c1->cl);
-+				return c;
-+			}
-+		}
-+
-+		c = c1;
-+	}
-+
-+	if (IS_ERR(c))
-+		return c;
-+
-+	down_write(&c->state_lock);
-+
-+	if (!test_bit(BCH_FS_STARTED, &c->flags)) {
-+		up_write(&c->state_lock);
-+		closure_put(&c->cl);
-+		pr_err("err mounting %s: incomplete filesystem", dev_name);
-+		return ERR_PTR(-EINVAL);
-+	}
-+
-+	up_write(&c->state_lock);
-+
-+	set_bit(BCH_FS_BDEV_MOUNTED, &c->flags);
-+	return c;
-+}
-+
-+static struct bch_fs *bch2_open_as_blockdevs(const char *_dev_name,
-+					     struct bch_opts opts)
-+{
-+	char *dev_name = NULL, **devs = NULL, *s;
-+	struct bch_fs *c = ERR_PTR(-ENOMEM);
-+	size_t i, nr_devs = 0;
-+
-+	dev_name = kstrdup(_dev_name, GFP_KERNEL);
-+	if (!dev_name)
-+		goto err;
-+
-+	for (s = dev_name; s; s = strchr(s + 1, ':'))
-+		nr_devs++;
-+
-+	devs = kcalloc(nr_devs, sizeof(const char *), GFP_KERNEL);
-+	if (!devs)
-+		goto err;
-+
-+	for (i = 0, s = dev_name;
-+	     s;
-+	     (s = strchr(s, ':')) && (*s++ = '\0'))
-+		devs[i++] = s;
-+
-+	c = __bch2_open_as_blockdevs(_dev_name, devs, nr_devs, opts);
-+err:
-+	kfree(devs);
-+	kfree(dev_name);
-+	return c;
-+}
-+
-+static int bch2_remount(struct super_block *sb, int *flags, char *data)
-+{
-+	struct bch_fs *c = sb->s_fs_info;
-+	struct bch_opts opts = bch2_opts_empty();
-+	int ret;
-+
-+	opt_set(opts, read_only, (*flags & SB_RDONLY) != 0);
-+
-+	ret = bch2_parse_mount_opts(&opts, data);
-+	if (ret)
-+		return ret;
-+
-+	if (opts.read_only != c->opts.read_only) {
-+		down_write(&c->state_lock);
-+
-+		if (opts.read_only) {
-+			bch2_fs_read_only(c);
-+
-+			sb->s_flags |= SB_RDONLY;
-+		} else {
-+			ret = bch2_fs_read_write(c);
-+			if (ret) {
-+				bch_err(c, "error going rw: %i", ret);
-+				up_write(&c->state_lock);
-+				return -EINVAL;
-+			}
-+
-+			sb->s_flags &= ~SB_RDONLY;
-+		}
-+
-+		c->opts.read_only = opts.read_only;
-+
-+		up_write(&c->state_lock);
-+	}
-+
-+	if (opts.errors >= 0)
-+		c->opts.errors = opts.errors;
-+
-+	return ret;
-+}
-+
-+static int bch2_show_options(struct seq_file *seq, struct dentry *root)
-+{
-+	struct bch_fs *c = root->d_sb->s_fs_info;
-+	enum bch_opt_id i;
-+	char buf[512];
-+
-+	for (i = 0; i < bch2_opts_nr; i++) {
-+		const struct bch_option *opt = &bch2_opt_table[i];
-+		u64 v = bch2_opt_get_by_id(&c->opts, i);
-+
-+		if (!(opt->mode & OPT_MOUNT))
-+			continue;
-+
-+		if (v == bch2_opt_get_by_id(&bch2_opts_default, i))
-+			continue;
-+
-+		bch2_opt_to_text(&PBUF(buf), c, opt, v,
-+				 OPT_SHOW_MOUNT_STYLE);
-+		seq_putc(seq, ',');
-+		seq_puts(seq, buf);
-+	}
-+
-+	return 0;
-+
-+}
-+
-+static const struct super_operations bch_super_operations = {
-+	.alloc_inode	= bch2_alloc_inode,
-+	.destroy_inode	= bch2_destroy_inode,
-+	.write_inode	= bch2_vfs_write_inode,
-+	.evict_inode	= bch2_evict_inode,
-+	.sync_fs	= bch2_sync_fs,
-+	.statfs		= bch2_statfs,
-+	.show_options	= bch2_show_options,
-+	.remount_fs	= bch2_remount,
-+#if 0
-+	.put_super	= bch2_put_super,
-+	.freeze_fs	= bch2_freeze,
-+	.unfreeze_fs	= bch2_unfreeze,
-+#endif
-+};
-+
-+static int bch2_test_super(struct super_block *s, void *data)
-+{
-+	return s->s_fs_info == data;
-+}
-+
-+static int bch2_set_super(struct super_block *s, void *data)
-+{
-+	s->s_fs_info = data;
-+	return 0;
-+}
-+
-+static struct dentry *bch2_mount(struct file_system_type *fs_type,
-+				 int flags, const char *dev_name, void *data)
-+{
-+	struct bch_fs *c;
-+	struct bch_dev *ca;
-+	struct super_block *sb;
-+	struct inode *vinode;
-+	struct bch_opts opts = bch2_opts_empty();
-+	unsigned i;
-+	int ret;
-+
-+	opt_set(opts, read_only, (flags & SB_RDONLY) != 0);
-+
-+	ret = bch2_parse_mount_opts(&opts, data);
-+	if (ret)
-+		return ERR_PTR(ret);
-+
-+	c = bch2_open_as_blockdevs(dev_name, opts);
-+	if (IS_ERR(c))
-+		return ERR_CAST(c);
-+
-+	sb = sget(fs_type, bch2_test_super, bch2_set_super, flags|SB_NOSEC, c);
-+	if (IS_ERR(sb)) {
-+		closure_put(&c->cl);
-+		return ERR_CAST(sb);
-+	}
-+
-+	BUG_ON(sb->s_fs_info != c);
-+
-+	if (sb->s_root) {
-+		closure_put(&c->cl);
-+
-+		if ((flags ^ sb->s_flags) & SB_RDONLY) {
-+			ret = -EBUSY;
-+			goto err_put_super;
-+		}
-+		goto out;
-+	}
-+
-+	sb->s_blocksize		= block_bytes(c);
-+	sb->s_blocksize_bits	= ilog2(block_bytes(c));
-+	sb->s_maxbytes		= MAX_LFS_FILESIZE;
-+	sb->s_op		= &bch_super_operations;
-+	sb->s_export_op		= &bch_export_ops;
-+#ifdef CONFIG_BCACHEFS_QUOTA
-+	sb->s_qcop		= &bch2_quotactl_operations;
-+	sb->s_quota_types	= QTYPE_MASK_USR|QTYPE_MASK_GRP|QTYPE_MASK_PRJ;
-+#endif
-+	sb->s_xattr		= bch2_xattr_handlers;
-+	sb->s_magic		= BCACHEFS_STATFS_MAGIC;
-+	sb->s_time_gran		= c->sb.time_precision;
-+	c->vfs_sb		= sb;
-+	strlcpy(sb->s_id, c->name, sizeof(sb->s_id));
-+
-+	ret = super_setup_bdi(sb);
-+	if (ret)
-+		goto err_put_super;
-+
-+	sb->s_bdi->congested_fn		= bch2_congested;
-+	sb->s_bdi->congested_data	= c;
-+	sb->s_bdi->ra_pages		= VM_READAHEAD_PAGES;
-+
-+	for_each_online_member(ca, c, i) {
-+		struct block_device *bdev = ca->disk_sb.bdev;
-+
-+		/* XXX: create an anonymous device for multi device filesystems */
-+		sb->s_bdev	= bdev;
-+		sb->s_dev	= bdev->bd_dev;
-+		percpu_ref_put(&ca->io_ref);
-+		break;
-+	}
-+
-+#ifdef CONFIG_BCACHEFS_POSIX_ACL
-+	if (c->opts.acl)
-+		sb->s_flags	|= SB_POSIXACL;
-+#endif
-+
-+	vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_INO);
-+	if (IS_ERR(vinode)) {
-+		bch_err(c, "error mounting: error getting root inode %i",
-+			(int) PTR_ERR(vinode));
-+		ret = PTR_ERR(vinode);
-+		goto err_put_super;
-+	}
-+
-+	sb->s_root = d_make_root(vinode);
-+	if (!sb->s_root) {
-+		bch_err(c, "error mounting: error allocating root dentry");
-+		ret = -ENOMEM;
-+		goto err_put_super;
-+	}
-+
-+	sb->s_flags |= SB_ACTIVE;
-+out:
-+	return dget(sb->s_root);
-+
-+err_put_super:
-+	deactivate_locked_super(sb);
-+	return ERR_PTR(ret);
-+}
-+
-+static void bch2_kill_sb(struct super_block *sb)
-+{
-+	struct bch_fs *c = sb->s_fs_info;
-+
-+	generic_shutdown_super(sb);
-+
-+	if (test_bit(BCH_FS_BDEV_MOUNTED, &c->flags))
-+		bch2_fs_stop(c);
-+	else
-+		closure_put(&c->cl);
-+}
-+
-+static struct file_system_type bcache_fs_type = {
-+	.owner		= THIS_MODULE,
-+	.name		= "bcachefs",
-+	.mount		= bch2_mount,
-+	.kill_sb	= bch2_kill_sb,
-+	.fs_flags	= FS_REQUIRES_DEV,
-+};
-+
-+MODULE_ALIAS_FS("bcachefs");
-+
-+void bch2_vfs_exit(void)
-+{
-+	unregister_filesystem(&bcache_fs_type);
-+	if (bch2_inode_cache)
-+		kmem_cache_destroy(bch2_inode_cache);
-+}
-+
-+int __init bch2_vfs_init(void)
-+{
-+	int ret = -ENOMEM;
-+
-+	bch2_inode_cache = KMEM_CACHE(bch_inode_info, 0);
-+	if (!bch2_inode_cache)
-+		goto err;
-+
-+	ret = register_filesystem(&bcache_fs_type);
-+	if (ret)
-+		goto err;
-+
-+	return 0;
-+err:
-+	bch2_vfs_exit();
-+	return ret;
-+}
-+
-+#endif /* NO_BCACHEFS_FS */
-diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h
-new file mode 100644
-index 000000000000..eda903a45325
---- /dev/null
-+++ b/fs/bcachefs/fs.h
-@@ -0,0 +1,174 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_FS_H
-+#define _BCACHEFS_FS_H
-+
-+#include "inode.h"
-+#include "opts.h"
-+#include "str_hash.h"
-+#include "quota_types.h"
-+
-+#include <linux/seqlock.h>
-+#include <linux/stat.h>
-+
-+/*
-+ * Two-state lock - can be taken for add or block - both states are shared,
-+ * like read side of rwsem, but conflict with other state:
-+ */
-+struct pagecache_lock {
-+	atomic_long_t		v;
-+	wait_queue_head_t	wait;
-+};
-+
-+static inline void pagecache_lock_init(struct pagecache_lock *lock)
-+{
-+	atomic_long_set(&lock->v, 0);
-+	init_waitqueue_head(&lock->wait);
-+}
-+
-+void bch2_pagecache_add_put(struct pagecache_lock *);
-+void bch2_pagecache_add_get(struct pagecache_lock *);
-+void bch2_pagecache_block_put(struct pagecache_lock *);
-+void bch2_pagecache_block_get(struct pagecache_lock *);
-+
-+struct bch_inode_info {
-+	struct inode		v;
-+
-+	struct mutex		ei_update_lock;
-+	u64			ei_journal_seq;
-+	u64			ei_quota_reserved;
-+	unsigned long		ei_last_dirtied;
-+
-+	struct pagecache_lock	ei_pagecache_lock;
-+
-+	struct mutex		ei_quota_lock;
-+	struct bch_qid		ei_qid;
-+
-+	struct bch_hash_info	ei_str_hash;
-+
-+	/* copy of inode in btree: */
-+	struct bch_inode_unpacked ei_inode;
-+};
-+
-+#define to_bch_ei(_inode)					\
-+	container_of_or_null(_inode, struct bch_inode_info, v)
-+
-+static inline int ptrcmp(void *l, void *r)
-+{
-+	return cmp_int(l, r);
-+}
-+
-+enum bch_inode_lock_op {
-+	INODE_LOCK		= (1U << 0),
-+	INODE_PAGECACHE_BLOCK	= (1U << 1),
-+	INODE_UPDATE_LOCK	= (1U << 2),
-+};
-+
-+#define bch2_lock_inodes(_locks, ...)					\
-+do {									\
-+	struct bch_inode_info *a[] = { NULL, __VA_ARGS__ };		\
-+	unsigned i;							\
-+									\
-+	bubble_sort(&a[1], ARRAY_SIZE(a) - 1, ptrcmp);			\
-+									\
-+	for (i = 1; i < ARRAY_SIZE(a); i++)				\
-+		if (a[i] != a[i - 1]) {					\
-+			if ((_locks) & INODE_LOCK)			\
-+				down_write_nested(&a[i]->v.i_rwsem, i);	\
-+			if ((_locks) & INODE_PAGECACHE_BLOCK)		\
-+				bch2_pagecache_block_get(&a[i]->ei_pagecache_lock);\
-+			if ((_locks) & INODE_UPDATE_LOCK)			\
-+				mutex_lock_nested(&a[i]->ei_update_lock, i);\
-+		}							\
-+} while (0)
-+
-+#define bch2_unlock_inodes(_locks, ...)					\
-+do {									\
-+	struct bch_inode_info *a[] = { NULL, __VA_ARGS__ };		\
-+	unsigned i;							\
-+									\
-+	bubble_sort(&a[1], ARRAY_SIZE(a) - 1, ptrcmp);			\
-+									\
-+	for (i = 1; i < ARRAY_SIZE(a); i++)				\
-+		if (a[i] != a[i - 1]) {					\
-+			if ((_locks) & INODE_LOCK)			\
-+				up_write(&a[i]->v.i_rwsem);		\
-+			if ((_locks) & INODE_PAGECACHE_BLOCK)		\
-+				bch2_pagecache_block_put(&a[i]->ei_pagecache_lock);\
-+			if ((_locks) & INODE_UPDATE_LOCK)			\
-+				mutex_unlock(&a[i]->ei_update_lock);	\
-+		}							\
-+} while (0)
-+
-+static inline struct bch_inode_info *file_bch_inode(struct file *file)
-+{
-+	return to_bch_ei(file_inode(file));
-+}
-+
-+static inline bool inode_attr_changing(struct bch_inode_info *dir,
-+				struct bch_inode_info *inode,
-+				enum inode_opt_id id)
-+{
-+	return !(inode->ei_inode.bi_fields_set & (1 << id)) &&
-+		bch2_inode_opt_get(&dir->ei_inode, id) !=
-+		bch2_inode_opt_get(&inode->ei_inode, id);
-+}
-+
-+static inline bool inode_attrs_changing(struct bch_inode_info *dir,
-+				 struct bch_inode_info *inode)
-+{
-+	unsigned id;
-+
-+	for (id = 0; id < Inode_opt_nr; id++)
-+		if (inode_attr_changing(dir, inode, id))
-+			return true;
-+
-+	return false;
-+}
-+
-+struct bch_inode_unpacked;
-+
-+#ifndef NO_BCACHEFS_FS
-+
-+int bch2_fs_quota_transfer(struct bch_fs *,
-+			   struct bch_inode_info *,
-+			   struct bch_qid,
-+			   unsigned,
-+			   enum quota_acct_mode);
-+
-+static inline int bch2_set_projid(struct bch_fs *c,
-+				  struct bch_inode_info *inode,
-+				  u32 projid)
-+{
-+	struct bch_qid qid = inode->ei_qid;
-+
-+	qid.q[QTYP_PRJ] = projid;
-+
-+	return bch2_fs_quota_transfer(c, inode, qid,
-+				      1 << QTYP_PRJ,
-+				      KEY_TYPE_QUOTA_PREALLOC);
-+}
-+
-+struct inode *bch2_vfs_inode_get(struct bch_fs *, u64);
-+
-+/* returns 0 if we want to do the update, or error is passed up */
-+typedef int (*inode_set_fn)(struct bch_inode_info *,
-+			    struct bch_inode_unpacked *, void *);
-+
-+void bch2_inode_update_after_write(struct bch_fs *,
-+				   struct bch_inode_info *,
-+				   struct bch_inode_unpacked *,
-+				   unsigned);
-+int __must_check bch2_write_inode(struct bch_fs *, struct bch_inode_info *,
-+				  inode_set_fn, void *, unsigned);
-+
-+void bch2_vfs_exit(void);
-+int bch2_vfs_init(void);
-+
-+#else
-+
-+static inline void bch2_vfs_exit(void) {}
-+static inline int bch2_vfs_init(void) { return 0; }
-+
-+#endif /* NO_BCACHEFS_FS */
-+
-+#endif /* _BCACHEFS_FS_H */
-diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
-new file mode 100644
-index 000000000000..c6ca5968a2e0
---- /dev/null
-+++ b/fs/bcachefs/fsck.c
-@@ -0,0 +1,1498 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "bkey_on_stack.h"
-+#include "btree_update.h"
-+#include "dirent.h"
-+#include "error.h"
-+#include "fs-common.h"
-+#include "fsck.h"
-+#include "inode.h"
-+#include "keylist.h"
-+#include "super.h"
-+#include "xattr.h"
-+
-+#include <linux/dcache.h> /* struct qstr */
-+#include <linux/generic-radix-tree.h>
-+
-+#define QSTR(n) { { { .len = strlen(n) } }, .name = n }
-+
-+static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum)
-+{
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	u64 sectors = 0;
-+	int ret;
-+
-+	for_each_btree_key(trans, iter, BTREE_ID_EXTENTS,
-+			   POS(inum, 0), 0, k, ret) {
-+		if (k.k->p.inode != inum)
-+			break;
-+
-+		if (bkey_extent_is_allocation(k.k))
-+			sectors += k.k->size;
-+	}
-+
-+	bch2_trans_iter_free(trans, iter);
-+
-+	return ret ?: sectors;
-+}
-+
-+static int __remove_dirent(struct btree_trans *trans,
-+			   struct bkey_s_c_dirent dirent)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct qstr name;
-+	struct bch_inode_unpacked dir_inode;
-+	struct bch_hash_info dir_hash_info;
-+	u64 dir_inum = dirent.k->p.inode;
-+	int ret;
-+	char *buf;
-+
-+	name.len = bch2_dirent_name_bytes(dirent);
-+	buf = bch2_trans_kmalloc(trans, name.len + 1);
-+	if (IS_ERR(buf))
-+		return PTR_ERR(buf);
-+
-+	memcpy(buf, dirent.v->d_name, name.len);
-+	buf[name.len] = '\0';
-+	name.name = buf;
-+
-+	ret = bch2_inode_find_by_inum_trans(trans, dir_inum, &dir_inode);
-+	if (ret && ret != -EINTR)
-+		bch_err(c, "remove_dirent: err %i looking up directory inode", ret);
-+	if (ret)
-+		return ret;
-+
-+	dir_hash_info = bch2_hash_info_init(c, &dir_inode);
-+
-+	ret = bch2_hash_delete(trans, bch2_dirent_hash_desc,
-+			       &dir_hash_info, dir_inum, &name);
-+	if (ret && ret != -EINTR)
-+		bch_err(c, "remove_dirent: err %i deleting dirent", ret);
-+	if (ret)
-+		return ret;
-+
-+	return 0;
-+}
-+
-+static int remove_dirent(struct btree_trans *trans,
-+			 struct bkey_s_c_dirent dirent)
-+{
-+	return __bch2_trans_do(trans, NULL, NULL,
-+			       BTREE_INSERT_NOFAIL|
-+			       BTREE_INSERT_LAZY_RW,
-+			       __remove_dirent(trans, dirent));
-+}
-+
-+static int reattach_inode(struct bch_fs *c,
-+			  struct bch_inode_unpacked *lostfound_inode,
-+			  u64 inum)
-+{
-+	struct bch_inode_unpacked dir_u, inode_u;
-+	char name_buf[20];
-+	struct qstr name;
-+	int ret;
-+
-+	snprintf(name_buf, sizeof(name_buf), "%llu", inum);
-+	name = (struct qstr) QSTR(name_buf);
-+
-+	ret = bch2_trans_do(c, NULL, NULL,
-+			    BTREE_INSERT_LAZY_RW,
-+		bch2_link_trans(&trans, lostfound_inode->bi_inum,
-+				inum, &dir_u, &inode_u, &name));
-+	if (ret)
-+		bch_err(c, "error %i reattaching inode %llu", ret, inum);
-+
-+	return ret;
-+}
-+
-+struct inode_walker {
-+	bool			first_this_inode;
-+	bool			have_inode;
-+	u64			cur_inum;
-+	struct bch_inode_unpacked inode;
-+};
-+
-+static struct inode_walker inode_walker_init(void)
-+{
-+	return (struct inode_walker) {
-+		.cur_inum	= -1,
-+		.have_inode	= false,
-+	};
-+}
-+
-+static int walk_inode(struct btree_trans *trans,
-+		      struct inode_walker *w, u64 inum)
-+{
-+	if (inum != w->cur_inum) {
-+		int ret = bch2_inode_find_by_inum_trans(trans, inum,
-+							&w->inode);
-+
-+		if (ret && ret != -ENOENT)
-+			return ret;
-+
-+		w->have_inode	= !ret;
-+		w->cur_inum	= inum;
-+		w->first_this_inode = true;
-+	} else {
-+		w->first_this_inode = false;
-+	}
-+
-+	return 0;
-+}
-+
-+struct hash_check {
-+	struct bch_hash_info	info;
-+
-+	/* start of current chain of hash collisions: */
-+	struct btree_iter	*chain;
-+
-+	/* next offset in current chain of hash collisions: */
-+	u64			chain_end;
-+};
-+
-+static void hash_check_init(struct hash_check *h)
-+{
-+	h->chain = NULL;
-+	h->chain_end = 0;
-+}
-+
-+static void hash_stop_chain(struct btree_trans *trans,
-+			    struct hash_check *h)
-+{
-+	if (h->chain)
-+		bch2_trans_iter_free(trans, h->chain);
-+	h->chain = NULL;
-+}
-+
-+static void hash_check_set_inode(struct btree_trans *trans,
-+				 struct hash_check *h,
-+				 const struct bch_inode_unpacked *bi)
-+{
-+	h->info = bch2_hash_info_init(trans->c, bi);
-+	hash_stop_chain(trans, h);
-+}
-+
-+static int hash_redo_key(const struct bch_hash_desc desc,
-+			 struct btree_trans *trans, struct hash_check *h,
-+			 struct btree_iter *k_iter, struct bkey_s_c k,
-+			 u64 hashed)
-+{
-+	struct bkey_i delete;
-+	struct bkey_i *tmp;
-+
-+	tmp = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
-+	if (IS_ERR(tmp))
-+		return PTR_ERR(tmp);
-+
-+	bkey_reassemble(tmp, k);
-+
-+	bkey_init(&delete.k);
-+	delete.k.p = k_iter->pos;
-+	bch2_trans_update(trans, k_iter, &delete, 0);
-+
-+	return bch2_hash_set(trans, desc, &h->info, k_iter->pos.inode,
-+			     tmp, BCH_HASH_SET_MUST_CREATE);
-+}
-+
-+static int fsck_hash_delete_at(struct btree_trans *trans,
-+			       const struct bch_hash_desc desc,
-+			       struct bch_hash_info *info,
-+			       struct btree_iter *iter)
-+{
-+	int ret;
-+retry:
-+	ret   = bch2_hash_delete_at(trans, desc, info, iter) ?:
-+		bch2_trans_commit(trans, NULL, NULL,
-+				  BTREE_INSERT_NOFAIL|
-+				  BTREE_INSERT_LAZY_RW);
-+	if (ret == -EINTR) {
-+		ret = bch2_btree_iter_traverse(iter);
-+		if (!ret)
-+			goto retry;
-+	}
-+
-+	return ret;
-+}
-+
-+static int hash_check_duplicates(struct btree_trans *trans,
-+			const struct bch_hash_desc desc, struct hash_check *h,
-+			struct btree_iter *k_iter, struct bkey_s_c k)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k2;
-+	char buf[200];
-+	int ret = 0;
-+
-+	if (!bkey_cmp(h->chain->pos, k_iter->pos))
-+		return 0;
-+
-+	iter = bch2_trans_copy_iter(trans, h->chain);
-+	BUG_ON(IS_ERR(iter));
-+
-+	for_each_btree_key_continue(iter, 0, k2, ret) {
-+		if (bkey_cmp(k2.k->p, k.k->p) >= 0)
-+			break;
-+
-+		if (fsck_err_on(k2.k->type == desc.key_type &&
-+				!desc.cmp_bkey(k, k2), c,
-+				"duplicate hash table keys:\n%s",
-+				(bch2_bkey_val_to_text(&PBUF(buf), c,
-+						       k), buf))) {
-+			ret = fsck_hash_delete_at(trans, desc, &h->info, k_iter);
-+			if (ret)
-+				return ret;
-+			ret = 1;
-+			break;
-+		}
-+	}
-+fsck_err:
-+	bch2_trans_iter_free(trans, iter);
-+	return ret;
-+}
-+
-+static void hash_set_chain_start(struct btree_trans *trans,
-+			const struct bch_hash_desc desc,
-+			struct hash_check *h,
-+			struct btree_iter *k_iter, struct bkey_s_c k)
-+{
-+	bool hole = (k.k->type != KEY_TYPE_whiteout &&
-+		     k.k->type != desc.key_type);
-+
-+	if (hole || k.k->p.offset > h->chain_end + 1)
-+		hash_stop_chain(trans, h);
-+
-+	if (!hole) {
-+		if (!h->chain) {
-+			h->chain = bch2_trans_copy_iter(trans, k_iter);
-+			BUG_ON(IS_ERR(h->chain));
-+		}
-+
-+		h->chain_end = k.k->p.offset;
-+	}
-+}
-+
-+static bool key_has_correct_hash(struct btree_trans *trans,
-+			const struct bch_hash_desc desc,
-+			struct hash_check *h,
-+			struct btree_iter *k_iter, struct bkey_s_c k)
-+{
-+	u64 hash;
-+
-+	hash_set_chain_start(trans, desc, h, k_iter, k);
-+
-+	if (k.k->type != desc.key_type)
-+		return true;
-+
-+	hash = desc.hash_bkey(&h->info, k);
-+
-+	return hash >= h->chain->pos.offset &&
-+		hash <= k.k->p.offset;
-+}
-+
-+static int hash_check_key(struct btree_trans *trans,
-+			const struct bch_hash_desc desc, struct hash_check *h,
-+			struct btree_iter *k_iter, struct bkey_s_c k)
-+{
-+	struct bch_fs *c = trans->c;
-+	char buf[200];
-+	u64 hashed;
-+	int ret = 0;
-+
-+	hash_set_chain_start(trans, desc, h, k_iter, k);
-+
-+	if (k.k->type != desc.key_type)
-+		return 0;
-+
-+	hashed = desc.hash_bkey(&h->info, k);
-+
-+	if (fsck_err_on(hashed < h->chain->pos.offset ||
-+			hashed > k.k->p.offset, c,
-+			"hash table key at wrong offset: btree %u, %llu, "
-+			"hashed to %llu chain starts at %llu\n%s",
-+			desc.btree_id, k.k->p.offset,
-+			hashed, h->chain->pos.offset,
-+			(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf))) {
-+		ret = __bch2_trans_do(trans, NULL, NULL,
-+				      BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
-+			hash_redo_key(desc, trans, h, k_iter, k, hashed));
-+		if (ret) {
-+			bch_err(c, "hash_redo_key err %i", ret);
-+			return ret;
-+		}
-+		return 1;
-+	}
-+
-+	ret = hash_check_duplicates(trans, desc, h, k_iter, k);
-+fsck_err:
-+	return ret;
-+}
-+
-+static int check_dirent_hash(struct btree_trans *trans, struct hash_check *h,
-+			     struct btree_iter *iter, struct bkey_s_c *k)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct bkey_i_dirent *d = NULL;
-+	int ret = -EINVAL;
-+	char buf[200];
-+	unsigned len;
-+	u64 hash;
-+
-+	if (key_has_correct_hash(trans, bch2_dirent_hash_desc, h, iter, *k))
-+		return 0;
-+
-+	len = bch2_dirent_name_bytes(bkey_s_c_to_dirent(*k));
-+	BUG_ON(!len);
-+
-+	memcpy(buf, bkey_s_c_to_dirent(*k).v->d_name, len);
-+	buf[len] = '\0';
-+
-+	d = kmalloc(bkey_bytes(k->k), GFP_KERNEL);
-+	if (!d) {
-+		bch_err(c, "memory allocation failure");
-+		return -ENOMEM;
-+	}
-+
-+	bkey_reassemble(&d->k_i, *k);
-+
-+	do {
-+		--len;
-+		if (!len)
-+			goto err_redo;
-+
-+		d->k.u64s = BKEY_U64s + dirent_val_u64s(len);
-+
-+		BUG_ON(bkey_val_bytes(&d->k) <
-+		       offsetof(struct bch_dirent, d_name) + len);
-+
-+		memset(d->v.d_name + len, 0,
-+		       bkey_val_bytes(&d->k) -
-+		       offsetof(struct bch_dirent, d_name) - len);
-+
-+		hash = bch2_dirent_hash_desc.hash_bkey(&h->info,
-+						bkey_i_to_s_c(&d->k_i));
-+	} while (hash < h->chain->pos.offset ||
-+		 hash > k->k->p.offset);
-+
-+	if (fsck_err(c, "dirent with junk at end, was %s (%zu) now %s (%u)",
-+		     buf, strlen(buf), d->v.d_name, len)) {
-+		ret = __bch2_trans_do(trans, NULL, NULL,
-+				      BTREE_INSERT_NOFAIL|
-+				      BTREE_INSERT_LAZY_RW,
-+			(bch2_trans_update(trans, iter, &d->k_i, 0), 0));
-+		if (ret)
-+			goto err;
-+
-+		*k = bch2_btree_iter_peek(iter);
-+
-+		BUG_ON(k->k->type != KEY_TYPE_dirent);
-+	}
-+err:
-+fsck_err:
-+	kfree(d);
-+	return ret;
-+err_redo:
-+	hash = bch2_dirent_hash_desc.hash_bkey(&h->info, *k);
-+
-+	if (fsck_err(c, "cannot fix dirent by removing trailing garbage %s (%zu)\n"
-+		     "hash table key at wrong offset: btree %u, offset %llu, "
-+		     "hashed to %llu chain starts at %llu\n%s",
-+		     buf, strlen(buf), BTREE_ID_DIRENTS,
-+		     k->k->p.offset, hash, h->chain->pos.offset,
-+		     (bch2_bkey_val_to_text(&PBUF(buf), c,
-+					    *k), buf))) {
-+		ret = __bch2_trans_do(trans, NULL, NULL,
-+				      BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
-+			hash_redo_key(bch2_dirent_hash_desc, trans,
-+				      h, iter, *k, hash));
-+		if (ret)
-+			bch_err(c, "hash_redo_key err %i", ret);
-+		else
-+			ret = 1;
-+	}
-+
-+	goto err;
-+}
-+
-+static int bch2_inode_truncate(struct bch_fs *c, u64 inode_nr, u64 new_size)
-+{
-+	return bch2_btree_delete_range(c, BTREE_ID_EXTENTS,
-+			POS(inode_nr, round_up(new_size, block_bytes(c)) >> 9),
-+			POS(inode_nr + 1, 0), NULL);
-+}
-+
-+static int bch2_fix_overlapping_extent(struct btree_trans *trans,
-+				       struct btree_iter *iter,
-+				       struct bkey_s_c k, struct bpos cut_at)
-+{
-+	struct btree_iter *u_iter;
-+	struct bkey_i *u;
-+	int ret;
-+
-+	u = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
-+	ret = PTR_ERR_OR_ZERO(u);
-+	if (ret)
-+		return ret;
-+
-+	bkey_reassemble(u, k);
-+	bch2_cut_front(cut_at, u);
-+
-+	u_iter = bch2_trans_copy_iter(trans, iter);
-+	ret = PTR_ERR_OR_ZERO(u_iter);
-+	if (ret)
-+		return ret;
-+
-+	/*
-+	 * We don't want to go through the
-+	 * extent_handle_overwrites path:
-+	 */
-+	__bch2_btree_iter_set_pos(u_iter, u->k.p, false);
-+
-+	/*
-+	 * XXX: this is going to leave disk space
-+	 * accounting slightly wrong
-+	 */
-+	ret = bch2_trans_update(trans, u_iter, u, 0);
-+	bch2_trans_iter_put(trans, u_iter);
-+	return ret;
-+}
-+
-+/*
-+ * Walk extents: verify that extents have a corresponding S_ISREG inode, and
-+ * that i_size an i_sectors are consistent
-+ */
-+noinline_for_stack
-+static int check_extents(struct bch_fs *c)
-+{
-+	struct inode_walker w = inode_walker_init();
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct bkey_on_stack prev;
-+	u64 i_sectors;
-+	int ret = 0;
-+
-+	bkey_on_stack_init(&prev);
-+	prev.k->k = KEY(0, 0, 0);
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-+
-+	bch_verbose(c, "checking extents");
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
-+				   POS(BCACHEFS_ROOT_INO, 0),
-+				   BTREE_ITER_INTENT);
-+retry:
-+	for_each_btree_key_continue(iter, 0, k, ret) {
-+		if (bkey_cmp(prev.k->k.p, bkey_start_pos(k.k)) > 0) {
-+			char buf1[200];
-+			char buf2[200];
-+
-+			bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(prev.k));
-+			bch2_bkey_val_to_text(&PBUF(buf2), c, k);
-+
-+			if (fsck_err(c, "overlapping extents:\n%s\n%s", buf1, buf2)) {
-+				ret = __bch2_trans_do(&trans, NULL, NULL,
-+						      BTREE_INSERT_NOFAIL|
-+						      BTREE_INSERT_LAZY_RW,
-+						bch2_fix_overlapping_extent(&trans,
-+								iter, k, prev.k->k.p));
-+				if (ret)
-+					goto err;
-+			}
-+		}
-+		bkey_on_stack_reassemble(&prev, c, k);
-+
-+		ret = walk_inode(&trans, &w, k.k->p.inode);
-+		if (ret)
-+			break;
-+
-+		if (fsck_err_on(!w.have_inode, c,
-+			"extent type %u for missing inode %llu",
-+			k.k->type, k.k->p.inode) ||
-+		    fsck_err_on(w.have_inode &&
-+			!S_ISREG(w.inode.bi_mode) && !S_ISLNK(w.inode.bi_mode), c,
-+			"extent type %u for non regular file, inode %llu mode %o",
-+			k.k->type, k.k->p.inode, w.inode.bi_mode)) {
-+			bch2_trans_unlock(&trans);
-+
-+			ret = bch2_inode_truncate(c, k.k->p.inode, 0);
-+			if (ret)
-+				goto err;
-+			continue;
-+		}
-+
-+		if (fsck_err_on(w.first_this_inode &&
-+			w.have_inode &&
-+			!(w.inode.bi_flags & BCH_INODE_I_SECTORS_DIRTY) &&
-+			w.inode.bi_sectors !=
-+			(i_sectors = bch2_count_inode_sectors(&trans, w.cur_inum)),
-+			c, "inode %llu has incorrect i_sectors: got %llu, should be %llu",
-+			w.inode.bi_inum,
-+			w.inode.bi_sectors, i_sectors)) {
-+			struct bkey_inode_buf p;
-+
-+			w.inode.bi_sectors = i_sectors;
-+
-+			bch2_trans_unlock(&trans);
-+
-+			bch2_inode_pack(&p, &w.inode);
-+
-+			ret = bch2_btree_insert(c, BTREE_ID_INODES,
-+						&p.inode.k_i, NULL, NULL,
-+						BTREE_INSERT_NOFAIL|
-+						BTREE_INSERT_LAZY_RW);
-+			if (ret) {
-+				bch_err(c, "error in fsck: error %i updating inode", ret);
-+				goto err;
-+			}
-+
-+			/* revalidate iterator: */
-+			k = bch2_btree_iter_peek(iter);
-+		}
-+
-+		if (fsck_err_on(w.have_inode &&
-+			!(w.inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
-+			k.k->type != KEY_TYPE_reservation &&
-+			k.k->p.offset > round_up(w.inode.bi_size, block_bytes(c)) >> 9, c,
-+			"extent type %u offset %llu past end of inode %llu, i_size %llu",
-+			k.k->type, k.k->p.offset, k.k->p.inode, w.inode.bi_size)) {
-+			bch2_trans_unlock(&trans);
-+
-+			ret = bch2_inode_truncate(c, k.k->p.inode,
-+						  w.inode.bi_size);
-+			if (ret)
-+				goto err;
-+			continue;
-+		}
-+	}
-+err:
-+fsck_err:
-+	if (ret == -EINTR)
-+		goto retry;
-+	bkey_on_stack_exit(&prev, c);
-+	return bch2_trans_exit(&trans) ?: ret;
-+}
-+
-+/*
-+ * Walk dirents: verify that they all have a corresponding S_ISDIR inode,
-+ * validate d_type
-+ */
-+noinline_for_stack
-+static int check_dirents(struct bch_fs *c)
-+{
-+	struct inode_walker w = inode_walker_init();
-+	struct hash_check h;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	unsigned name_len;
-+	char buf[200];
-+	int ret = 0;
-+
-+	bch_verbose(c, "checking dirents");
-+
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-+
-+	hash_check_init(&h);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS,
-+				   POS(BCACHEFS_ROOT_INO, 0), 0);
-+retry:
-+	for_each_btree_key_continue(iter, 0, k, ret) {
-+		struct bkey_s_c_dirent d;
-+		struct bch_inode_unpacked target;
-+		bool have_target;
-+		u64 d_inum;
-+
-+		ret = walk_inode(&trans, &w, k.k->p.inode);
-+		if (ret)
-+			break;
-+
-+		if (fsck_err_on(!w.have_inode, c,
-+				"dirent in nonexisting directory:\n%s",
-+				(bch2_bkey_val_to_text(&PBUF(buf), c,
-+						       k), buf)) ||
-+		    fsck_err_on(!S_ISDIR(w.inode.bi_mode), c,
-+				"dirent in non directory inode type %u:\n%s",
-+				mode_to_type(w.inode.bi_mode),
-+				(bch2_bkey_val_to_text(&PBUF(buf), c,
-+						       k), buf))) {
-+			ret = bch2_btree_delete_at(&trans, iter, 0);
-+			if (ret)
-+				goto err;
-+			continue;
-+		}
-+
-+		if (w.first_this_inode && w.have_inode)
-+			hash_check_set_inode(&trans, &h, &w.inode);
-+
-+		ret = check_dirent_hash(&trans, &h, iter, &k);
-+		if (ret > 0) {
-+			ret = 0;
-+			continue;
-+		}
-+		if (ret)
-+			goto fsck_err;
-+
-+		if (ret)
-+			goto fsck_err;
-+
-+		if (k.k->type != KEY_TYPE_dirent)
-+			continue;
-+
-+		d = bkey_s_c_to_dirent(k);
-+		d_inum = le64_to_cpu(d.v->d_inum);
-+
-+		name_len = bch2_dirent_name_bytes(d);
-+
-+		if (fsck_err_on(!name_len, c, "empty dirent") ||
-+		    fsck_err_on(name_len == 1 &&
-+				!memcmp(d.v->d_name, ".", 1), c,
-+				". dirent") ||
-+		    fsck_err_on(name_len == 2 &&
-+				!memcmp(d.v->d_name, "..", 2), c,
-+				".. dirent") ||
-+		    fsck_err_on(name_len == 2 &&
-+				!memcmp(d.v->d_name, "..", 2), c,
-+				".. dirent") ||
-+		    fsck_err_on(memchr(d.v->d_name, '/', name_len), c,
-+				"dirent name has invalid chars")) {
-+			ret = remove_dirent(&trans, d);
-+			if (ret)
-+				goto err;
-+			continue;
-+		}
-+
-+		if (fsck_err_on(d_inum == d.k->p.inode, c,
-+				"dirent points to own directory:\n%s",
-+				(bch2_bkey_val_to_text(&PBUF(buf), c,
-+						       k), buf))) {
-+			ret = remove_dirent(&trans, d);
-+			if (ret)
-+				goto err;
-+			continue;
-+		}
-+
-+		ret = bch2_inode_find_by_inum_trans(&trans, d_inum, &target);
-+		if (ret && ret != -ENOENT)
-+			break;
-+
-+		have_target = !ret;
-+		ret = 0;
-+
-+		if (fsck_err_on(!have_target, c,
-+				"dirent points to missing inode:\n%s",
-+				(bch2_bkey_val_to_text(&PBUF(buf), c,
-+						       k), buf))) {
-+			ret = remove_dirent(&trans, d);
-+			if (ret)
-+				goto err;
-+			continue;
-+		}
-+
-+		if (fsck_err_on(have_target &&
-+				d.v->d_type !=
-+				mode_to_type(target.bi_mode), c,
-+				"incorrect d_type: should be %u:\n%s",
-+				mode_to_type(target.bi_mode),
-+				(bch2_bkey_val_to_text(&PBUF(buf), c,
-+						       k), buf))) {
-+			struct bkey_i_dirent *n;
-+
-+			n = kmalloc(bkey_bytes(d.k), GFP_KERNEL);
-+			if (!n) {
-+				ret = -ENOMEM;
-+				goto err;
-+			}
-+
-+			bkey_reassemble(&n->k_i, d.s_c);
-+			n->v.d_type = mode_to_type(target.bi_mode);
-+
-+			ret = __bch2_trans_do(&trans, NULL, NULL,
-+					      BTREE_INSERT_NOFAIL|
-+					      BTREE_INSERT_LAZY_RW,
-+				(bch2_trans_update(&trans, iter, &n->k_i, 0), 0));
-+			kfree(n);
-+			if (ret)
-+				goto err;
-+
-+		}
-+	}
-+
-+	hash_stop_chain(&trans, &h);
-+err:
-+fsck_err:
-+	if (ret == -EINTR)
-+		goto retry;
-+
-+	return bch2_trans_exit(&trans) ?: ret;
-+}
-+
-+/*
-+ * Walk xattrs: verify that they all have a corresponding inode
-+ */
-+noinline_for_stack
-+static int check_xattrs(struct bch_fs *c)
-+{
-+	struct inode_walker w = inode_walker_init();
-+	struct hash_check h;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int ret = 0;
-+
-+	bch_verbose(c, "checking xattrs");
-+
-+	hash_check_init(&h);
-+
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS,
-+				   POS(BCACHEFS_ROOT_INO, 0), 0);
-+retry:
-+	for_each_btree_key_continue(iter, 0, k, ret) {
-+		ret = walk_inode(&trans, &w, k.k->p.inode);
-+		if (ret)
-+			break;
-+
-+		if (fsck_err_on(!w.have_inode, c,
-+				"xattr for missing inode %llu",
-+				k.k->p.inode)) {
-+			ret = bch2_btree_delete_at(&trans, iter, 0);
-+			if (ret)
-+				goto err;
-+			continue;
-+		}
-+
-+		if (w.first_this_inode && w.have_inode)
-+			hash_check_set_inode(&trans, &h, &w.inode);
-+
-+		ret = hash_check_key(&trans, bch2_xattr_hash_desc,
-+				     &h, iter, k);
-+		if (ret)
-+			goto fsck_err;
-+	}
-+err:
-+fsck_err:
-+	if (ret == -EINTR)
-+		goto retry;
-+	return bch2_trans_exit(&trans) ?: ret;
-+}
-+
-+/* Get root directory, create if it doesn't exist: */
-+static int check_root(struct bch_fs *c, struct bch_inode_unpacked *root_inode)
-+{
-+	struct bkey_inode_buf packed;
-+	int ret;
-+
-+	bch_verbose(c, "checking root directory");
-+
-+	ret = bch2_inode_find_by_inum(c, BCACHEFS_ROOT_INO, root_inode);
-+	if (ret && ret != -ENOENT)
-+		return ret;
-+
-+	if (fsck_err_on(ret, c, "root directory missing"))
-+		goto create_root;
-+
-+	if (fsck_err_on(!S_ISDIR(root_inode->bi_mode), c,
-+			"root inode not a directory"))
-+		goto create_root;
-+
-+	return 0;
-+fsck_err:
-+	return ret;
-+create_root:
-+	bch2_inode_init(c, root_inode, 0, 0, S_IFDIR|0755,
-+			0, NULL);
-+	root_inode->bi_inum = BCACHEFS_ROOT_INO;
-+
-+	bch2_inode_pack(&packed, root_inode);
-+
-+	return bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
-+				 NULL, NULL,
-+				 BTREE_INSERT_NOFAIL|
-+				 BTREE_INSERT_LAZY_RW);
-+}
-+
-+/* Get lost+found, create if it doesn't exist: */
-+static int check_lostfound(struct bch_fs *c,
-+			   struct bch_inode_unpacked *root_inode,
-+			   struct bch_inode_unpacked *lostfound_inode)
-+{
-+	struct qstr lostfound = QSTR("lost+found");
-+	struct bch_hash_info root_hash_info =
-+		bch2_hash_info_init(c, root_inode);
-+	u64 inum;
-+	int ret;
-+
-+	bch_verbose(c, "checking lost+found");
-+
-+	inum = bch2_dirent_lookup(c, BCACHEFS_ROOT_INO, &root_hash_info,
-+				 &lostfound);
-+	if (!inum) {
-+		bch_notice(c, "creating lost+found");
-+		goto create_lostfound;
-+	}
-+
-+	ret = bch2_inode_find_by_inum(c, inum, lostfound_inode);
-+	if (ret && ret != -ENOENT)
-+		return ret;
-+
-+	if (fsck_err_on(ret, c, "lost+found missing"))
-+		goto create_lostfound;
-+
-+	if (fsck_err_on(!S_ISDIR(lostfound_inode->bi_mode), c,
-+			"lost+found inode not a directory"))
-+		goto create_lostfound;
-+
-+	return 0;
-+fsck_err:
-+	return ret;
-+create_lostfound:
-+	bch2_inode_init_early(c, lostfound_inode);
-+
-+	ret = bch2_trans_do(c, NULL, NULL,
-+			    BTREE_INSERT_NOFAIL|
-+			    BTREE_INSERT_LAZY_RW,
-+		bch2_create_trans(&trans,
-+				  BCACHEFS_ROOT_INO, root_inode,
-+				  lostfound_inode, &lostfound,
-+				  0, 0, S_IFDIR|0700, 0, NULL, NULL));
-+	if (ret)
-+		bch_err(c, "error creating lost+found: %i", ret);
-+
-+	return ret;
-+}
-+
-+struct inode_bitmap {
-+	unsigned long	*bits;
-+	size_t		size;
-+};
-+
-+static inline bool inode_bitmap_test(struct inode_bitmap *b, size_t nr)
-+{
-+	return nr < b->size ? test_bit(nr, b->bits) : false;
-+}
-+
-+static inline int inode_bitmap_set(struct inode_bitmap *b, size_t nr)
-+{
-+	if (nr >= b->size) {
-+		size_t new_size = max_t(size_t, max_t(size_t,
-+					PAGE_SIZE * 8,
-+					b->size * 2),
-+					nr + 1);
-+		void *n;
-+
-+		new_size = roundup_pow_of_two(new_size);
-+		n = krealloc(b->bits, new_size / 8, GFP_KERNEL|__GFP_ZERO);
-+		if (!n) {
-+			return -ENOMEM;
-+		}
-+
-+		b->bits = n;
-+		b->size = new_size;
-+	}
-+
-+	__set_bit(nr, b->bits);
-+	return 0;
-+}
-+
-+struct pathbuf {
-+	size_t		nr;
-+	size_t		size;
-+
-+	struct pathbuf_entry {
-+		u64	inum;
-+		u64	offset;
-+	}		*entries;
-+};
-+
-+static int path_down(struct pathbuf *p, u64 inum)
-+{
-+	if (p->nr == p->size) {
-+		size_t new_size = max_t(size_t, 256UL, p->size * 2);
-+		void *n = krealloc(p->entries,
-+				   new_size * sizeof(p->entries[0]),
-+				   GFP_KERNEL);
-+		if (!n)
-+			return -ENOMEM;
-+
-+		p->entries = n;
-+		p->size = new_size;
-+	};
-+
-+	p->entries[p->nr++] = (struct pathbuf_entry) {
-+		.inum = inum,
-+		.offset = 0,
-+	};
-+	return 0;
-+}
-+
-+noinline_for_stack
-+static int check_directory_structure(struct bch_fs *c,
-+				     struct bch_inode_unpacked *lostfound_inode)
-+{
-+	struct inode_bitmap dirs_done = { NULL, 0 };
-+	struct pathbuf path = { 0, 0, NULL };
-+	struct pathbuf_entry *e;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct bkey_s_c_dirent dirent;
-+	bool had_unreachable;
-+	u64 d_inum;
-+	int ret = 0;
-+
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-+
-+	bch_verbose(c, "checking directory structure");
-+
-+	/* DFS: */
-+restart_dfs:
-+	had_unreachable = false;
-+
-+	ret = inode_bitmap_set(&dirs_done, BCACHEFS_ROOT_INO);
-+	if (ret) {
-+		bch_err(c, "memory allocation failure in inode_bitmap_set()");
-+		goto err;
-+	}
-+
-+	ret = path_down(&path, BCACHEFS_ROOT_INO);
-+	if (ret)
-+		goto err;
-+
-+	while (path.nr) {
-+next:
-+		e = &path.entries[path.nr - 1];
-+
-+		if (e->offset == U64_MAX)
-+			goto up;
-+
-+		for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS,
-+				   POS(e->inum, e->offset + 1), 0, k, ret) {
-+			if (k.k->p.inode != e->inum)
-+				break;
-+
-+			e->offset = k.k->p.offset;
-+
-+			if (k.k->type != KEY_TYPE_dirent)
-+				continue;
-+
-+			dirent = bkey_s_c_to_dirent(k);
-+
-+			if (dirent.v->d_type != DT_DIR)
-+				continue;
-+
-+			d_inum = le64_to_cpu(dirent.v->d_inum);
-+
-+			if (fsck_err_on(inode_bitmap_test(&dirs_done, d_inum), c,
-+					"directory %llu has multiple hardlinks",
-+					d_inum)) {
-+				ret = remove_dirent(&trans, dirent);
-+				if (ret)
-+					goto err;
-+				continue;
-+			}
-+
-+			ret = inode_bitmap_set(&dirs_done, d_inum);
-+			if (ret) {
-+				bch_err(c, "memory allocation failure in inode_bitmap_set()");
-+				goto err;
-+			}
-+
-+			ret = path_down(&path, d_inum);
-+			if (ret) {
-+				goto err;
-+			}
-+
-+			ret = bch2_trans_iter_free(&trans, iter);
-+			if (ret) {
-+				bch_err(c, "btree error %i in fsck", ret);
-+				goto err;
-+			}
-+			goto next;
-+		}
-+		ret = bch2_trans_iter_free(&trans, iter) ?: ret;
-+		if (ret) {
-+			bch_err(c, "btree error %i in fsck", ret);
-+			goto err;
-+		}
-+up:
-+		path.nr--;
-+	}
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES, POS_MIN, 0);
-+retry:
-+	for_each_btree_key_continue(iter, 0, k, ret) {
-+		if (k.k->type != KEY_TYPE_inode)
-+			continue;
-+
-+		if (!S_ISDIR(le16_to_cpu(bkey_s_c_to_inode(k).v->bi_mode)))
-+			continue;
-+
-+		ret = bch2_empty_dir_trans(&trans, k.k->p.inode);
-+		if (ret == -EINTR)
-+			goto retry;
-+		if (!ret)
-+			continue;
-+
-+		if (fsck_err_on(!inode_bitmap_test(&dirs_done, k.k->p.offset), c,
-+				"unreachable directory found (inum %llu)",
-+				k.k->p.offset)) {
-+			bch2_trans_unlock(&trans);
-+
-+			ret = reattach_inode(c, lostfound_inode, k.k->p.offset);
-+			if (ret) {
-+				goto err;
-+			}
-+
-+			had_unreachable = true;
-+		}
-+	}
-+	bch2_trans_iter_free(&trans, iter);
-+	if (ret)
-+		goto err;
-+
-+	if (had_unreachable) {
-+		bch_info(c, "reattached unreachable directories, restarting pass to check for loops");
-+		kfree(dirs_done.bits);
-+		kfree(path.entries);
-+		memset(&dirs_done, 0, sizeof(dirs_done));
-+		memset(&path, 0, sizeof(path));
-+		goto restart_dfs;
-+	}
-+err:
-+fsck_err:
-+	ret = bch2_trans_exit(&trans) ?: ret;
-+	kfree(dirs_done.bits);
-+	kfree(path.entries);
-+	return ret;
-+}
-+
-+struct nlink {
-+	u32	count;
-+	u32	dir_count;
-+};
-+
-+typedef GENRADIX(struct nlink) nlink_table;
-+
-+static void inc_link(struct bch_fs *c, nlink_table *links,
-+		     u64 range_start, u64 *range_end,
-+		     u64 inum, bool dir)
-+{
-+	struct nlink *link;
-+
-+	if (inum < range_start || inum >= *range_end)
-+		return;
-+
-+	link = genradix_ptr_alloc(links, inum - range_start, GFP_KERNEL);
-+	if (!link) {
-+		bch_verbose(c, "allocation failed during fsck - will need another pass");
-+		*range_end = inum;
-+		return;
-+	}
-+
-+	if (dir)
-+		link->dir_count++;
-+	else
-+		link->count++;
-+}
-+
-+noinline_for_stack
-+static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links,
-+			       u64 range_start, u64 *range_end)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct bkey_s_c_dirent d;
-+	u64 d_inum;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-+
-+	inc_link(c, links, range_start, range_end, BCACHEFS_ROOT_INO, false);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS_MIN, 0, k, ret) {
-+		switch (k.k->type) {
-+		case KEY_TYPE_dirent:
-+			d = bkey_s_c_to_dirent(k);
-+			d_inum = le64_to_cpu(d.v->d_inum);
-+
-+			if (d.v->d_type == DT_DIR)
-+				inc_link(c, links, range_start, range_end,
-+					 d.k->p.inode, true);
-+
-+			inc_link(c, links, range_start, range_end,
-+				 d_inum, false);
-+
-+			break;
-+		}
-+
-+		bch2_trans_cond_resched(&trans);
-+	}
-+	ret = bch2_trans_exit(&trans) ?: ret;
-+	if (ret)
-+		bch_err(c, "error in fsck: btree error %i while walking dirents", ret);
-+
-+	return ret;
-+}
-+
-+static int check_inode_nlink(struct bch_fs *c,
-+			     struct bch_inode_unpacked *lostfound_inode,
-+			     struct bch_inode_unpacked *u,
-+			     struct nlink *link,
-+			     bool *do_update)
-+{
-+	u32 i_nlink = bch2_inode_nlink_get(u);
-+	u32 real_i_nlink =
-+		link->count * nlink_bias(u->bi_mode) +
-+		link->dir_count;
-+	int ret = 0;
-+
-+	/*
-+	 * These should have been caught/fixed by earlier passes, we don't
-+	 * repair them here:
-+	 */
-+	if (S_ISDIR(u->bi_mode) && link->count > 1) {
-+		need_fsck_err(c, "directory %llu with multiple hardlinks: %u",
-+			      u->bi_inum, link->count);
-+		return 0;
-+	}
-+
-+	if (S_ISDIR(u->bi_mode) && !link->count) {
-+		need_fsck_err(c, "unreachable directory found (inum %llu)",
-+			      u->bi_inum);
-+		return 0;
-+	}
-+
-+	if (!S_ISDIR(u->bi_mode) && link->dir_count) {
-+		need_fsck_err(c, "non directory with subdirectories (inum %llu)",
-+			      u->bi_inum);
-+		return 0;
-+	}
-+
-+	if (!link->count &&
-+	    !(u->bi_flags & BCH_INODE_UNLINKED) &&
-+	    (c->sb.features & (1 << BCH_FEATURE_atomic_nlink))) {
-+		if (fsck_err(c, "unreachable inode %llu not marked as unlinked (type %u)",
-+			     u->bi_inum, mode_to_type(u->bi_mode)) ==
-+		    FSCK_ERR_IGNORE)
-+			return 0;
-+
-+		ret = reattach_inode(c, lostfound_inode, u->bi_inum);
-+		if (ret)
-+			return ret;
-+
-+		link->count = 1;
-+		real_i_nlink = nlink_bias(u->bi_mode) + link->dir_count;
-+		goto set_i_nlink;
-+	}
-+
-+	if (i_nlink < link->count) {
-+		if (fsck_err(c, "inode %llu i_link too small (%u < %u, type %i)",
-+			     u->bi_inum, i_nlink, link->count,
-+			     mode_to_type(u->bi_mode)) == FSCK_ERR_IGNORE)
-+			return 0;
-+		goto set_i_nlink;
-+	}
-+
-+	if (i_nlink != real_i_nlink &&
-+	    c->sb.clean) {
-+		if (fsck_err(c, "filesystem marked clean, "
-+			     "but inode %llu has wrong i_nlink "
-+			     "(type %u i_nlink %u, should be %u)",
-+			     u->bi_inum, mode_to_type(u->bi_mode),
-+			     i_nlink, real_i_nlink) == FSCK_ERR_IGNORE)
-+			return 0;
-+		goto set_i_nlink;
-+	}
-+
-+	if (i_nlink != real_i_nlink &&
-+	    (c->sb.features & (1 << BCH_FEATURE_atomic_nlink))) {
-+		if (fsck_err(c, "inode %llu has wrong i_nlink "
-+			     "(type %u i_nlink %u, should be %u)",
-+			     u->bi_inum, mode_to_type(u->bi_mode),
-+			     i_nlink, real_i_nlink) == FSCK_ERR_IGNORE)
-+			return 0;
-+		goto set_i_nlink;
-+	}
-+
-+	if (real_i_nlink && i_nlink != real_i_nlink)
-+		bch_verbose(c, "setting inode %llu nlink from %u to %u",
-+			    u->bi_inum, i_nlink, real_i_nlink);
-+set_i_nlink:
-+	if (i_nlink != real_i_nlink) {
-+		bch2_inode_nlink_set(u, real_i_nlink);
-+		*do_update = true;
-+	}
-+fsck_err:
-+	return ret;
-+}
-+
-+static int check_inode(struct btree_trans *trans,
-+		       struct bch_inode_unpacked *lostfound_inode,
-+		       struct btree_iter *iter,
-+		       struct bkey_s_c_inode inode,
-+		       struct nlink *link)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct bch_inode_unpacked u;
-+	bool do_update = false;
-+	int ret = 0;
-+
-+	ret = bch2_inode_unpack(inode, &u);
-+
-+	bch2_trans_unlock(trans);
-+
-+	if (bch2_fs_inconsistent_on(ret, c,
-+			 "error unpacking inode %llu in fsck",
-+			 inode.k->p.inode))
-+		return ret;
-+
-+	if (link) {
-+		ret = check_inode_nlink(c, lostfound_inode, &u, link,
-+					&do_update);
-+		if (ret)
-+			return ret;
-+	}
-+
-+	if (u.bi_flags & BCH_INODE_UNLINKED &&
-+	    (!c->sb.clean ||
-+	     fsck_err(c, "filesystem marked clean, but inode %llu unlinked",
-+		      u.bi_inum))) {
-+		bch_verbose(c, "deleting inode %llu", u.bi_inum);
-+
-+		ret = bch2_inode_rm(c, u.bi_inum);
-+		if (ret)
-+			bch_err(c, "error in fsck: error %i while deleting inode", ret);
-+		return ret;
-+	}
-+
-+	if (u.bi_flags & BCH_INODE_I_SIZE_DIRTY &&
-+	    (!c->sb.clean ||
-+	     fsck_err(c, "filesystem marked clean, but inode %llu has i_size dirty",
-+		      u.bi_inum))) {
-+		bch_verbose(c, "truncating inode %llu", u.bi_inum);
-+
-+		/*
-+		 * XXX: need to truncate partial blocks too here - or ideally
-+		 * just switch units to bytes and that issue goes away
-+		 */
-+
-+		ret = bch2_inode_truncate(c, u.bi_inum, u.bi_size);
-+		if (ret) {
-+			bch_err(c, "error in fsck: error %i truncating inode", ret);
-+			return ret;
-+		}
-+
-+		/*
-+		 * We truncated without our normal sector accounting hook, just
-+		 * make sure we recalculate it:
-+		 */
-+		u.bi_flags |= BCH_INODE_I_SECTORS_DIRTY;
-+
-+		u.bi_flags &= ~BCH_INODE_I_SIZE_DIRTY;
-+		do_update = true;
-+	}
-+
-+	if (u.bi_flags & BCH_INODE_I_SECTORS_DIRTY &&
-+	    (!c->sb.clean ||
-+	     fsck_err(c, "filesystem marked clean, but inode %llu has i_sectors dirty",
-+		      u.bi_inum))) {
-+		s64 sectors;
-+
-+		bch_verbose(c, "recounting sectors for inode %llu",
-+			    u.bi_inum);
-+
-+		sectors = bch2_count_inode_sectors(trans, u.bi_inum);
-+		if (sectors < 0) {
-+			bch_err(c, "error in fsck: error %i recounting inode sectors",
-+				(int) sectors);
-+			return sectors;
-+		}
-+
-+		u.bi_sectors = sectors;
-+		u.bi_flags &= ~BCH_INODE_I_SECTORS_DIRTY;
-+		do_update = true;
-+	}
-+
-+	if (do_update) {
-+		struct bkey_inode_buf p;
-+
-+		bch2_inode_pack(&p, &u);
-+
-+		ret = __bch2_trans_do(trans, NULL, NULL,
-+				      BTREE_INSERT_NOFAIL|
-+				      BTREE_INSERT_LAZY_RW,
-+			(bch2_trans_update(trans, iter, &p.inode.k_i, 0), 0));
-+		if (ret)
-+			bch_err(c, "error in fsck: error %i "
-+				"updating inode", ret);
-+	}
-+fsck_err:
-+	return ret;
-+}
-+
-+noinline_for_stack
-+static int bch2_gc_walk_inodes(struct bch_fs *c,
-+			       struct bch_inode_unpacked *lostfound_inode,
-+			       nlink_table *links,
-+			       u64 range_start, u64 range_end)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct nlink *link, zero_links = { 0, 0 };
-+	struct genradix_iter nlinks_iter;
-+	int ret = 0, ret2 = 0;
-+	u64 nlinks_pos;
-+
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES,
-+				   POS(0, range_start), 0);
-+	nlinks_iter = genradix_iter_init(links, 0);
-+
-+	while ((k = bch2_btree_iter_peek(iter)).k &&
-+	       !(ret2 = bkey_err(k))) {
-+peek_nlinks:	link = genradix_iter_peek(&nlinks_iter, links);
-+
-+		if (!link && (!k.k || iter->pos.offset >= range_end))
-+			break;
-+
-+		nlinks_pos = range_start + nlinks_iter.pos;
-+		if (iter->pos.offset > nlinks_pos) {
-+			/* Should have been caught by dirents pass: */
-+			need_fsck_err_on(link && link->count, c,
-+				"missing inode %llu (nlink %u)",
-+				nlinks_pos, link->count);
-+			genradix_iter_advance(&nlinks_iter, links);
-+			goto peek_nlinks;
-+		}
-+
-+		if (iter->pos.offset < nlinks_pos || !link)
-+			link = &zero_links;
-+
-+		if (k.k && k.k->type == KEY_TYPE_inode) {
-+			ret = check_inode(&trans, lostfound_inode, iter,
-+					  bkey_s_c_to_inode(k), link);
-+			BUG_ON(ret == -EINTR);
-+			if (ret)
-+				break;
-+		} else {
-+			/* Should have been caught by dirents pass: */
-+			need_fsck_err_on(link->count, c,
-+				"missing inode %llu (nlink %u)",
-+				nlinks_pos, link->count);
-+		}
-+
-+		if (nlinks_pos == iter->pos.offset)
-+			genradix_iter_advance(&nlinks_iter, links);
-+
-+		bch2_btree_iter_next(iter);
-+		bch2_trans_cond_resched(&trans);
-+	}
-+fsck_err:
-+	bch2_trans_exit(&trans);
-+
-+	if (ret2)
-+		bch_err(c, "error in fsck: btree error %i while walking inodes", ret2);
-+
-+	return ret ?: ret2;
-+}
-+
-+noinline_for_stack
-+static int check_inode_nlinks(struct bch_fs *c,
-+			      struct bch_inode_unpacked *lostfound_inode)
-+{
-+	nlink_table links;
-+	u64 this_iter_range_start, next_iter_range_start = 0;
-+	int ret = 0;
-+
-+	bch_verbose(c, "checking inode nlinks");
-+
-+	genradix_init(&links);
-+
-+	do {
-+		this_iter_range_start = next_iter_range_start;
-+		next_iter_range_start = U64_MAX;
-+
-+		ret = bch2_gc_walk_dirents(c, &links,
-+					  this_iter_range_start,
-+					  &next_iter_range_start);
-+		if (ret)
-+			break;
-+
-+		ret = bch2_gc_walk_inodes(c, lostfound_inode, &links,
-+					 this_iter_range_start,
-+					 next_iter_range_start);
-+		if (ret)
-+			break;
-+
-+		genradix_free(&links);
-+	} while (next_iter_range_start != U64_MAX);
-+
-+	genradix_free(&links);
-+
-+	return ret;
-+}
-+
-+/*
-+ * Checks for inconsistencies that shouldn't happen, unless we have a bug.
-+ * Doesn't fix them yet, mainly because they haven't yet been observed:
-+ */
-+int bch2_fsck_full(struct bch_fs *c)
-+{
-+	struct bch_inode_unpacked root_inode, lostfound_inode;
-+
-+	return  check_extents(c) ?:
-+		check_dirents(c) ?:
-+		check_xattrs(c) ?:
-+		check_root(c, &root_inode) ?:
-+		check_lostfound(c, &root_inode, &lostfound_inode) ?:
-+		check_directory_structure(c, &lostfound_inode) ?:
-+		check_inode_nlinks(c, &lostfound_inode);
-+}
-+
-+int bch2_fsck_inode_nlink(struct bch_fs *c)
-+{
-+	struct bch_inode_unpacked root_inode, lostfound_inode;
-+
-+	return  check_root(c, &root_inode) ?:
-+		check_lostfound(c, &root_inode, &lostfound_inode) ?:
-+		check_inode_nlinks(c, &lostfound_inode);
-+}
-+
-+int bch2_fsck_walk_inodes_only(struct bch_fs *c)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct bkey_s_c_inode inode;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_INODES, POS_MIN, 0, k, ret) {
-+		if (k.k->type != KEY_TYPE_inode)
-+			continue;
-+
-+		inode = bkey_s_c_to_inode(k);
-+
-+		if (inode.v->bi_flags &
-+		    (BCH_INODE_I_SIZE_DIRTY|
-+		     BCH_INODE_I_SECTORS_DIRTY|
-+		     BCH_INODE_UNLINKED)) {
-+			ret = check_inode(&trans, NULL, iter, inode, NULL);
-+			BUG_ON(ret == -EINTR);
-+			if (ret)
-+				break;
-+		}
-+	}
-+	BUG_ON(ret == -EINTR);
-+
-+	return bch2_trans_exit(&trans) ?: ret;
-+}
-diff --git a/fs/bcachefs/fsck.h b/fs/bcachefs/fsck.h
-new file mode 100644
-index 000000000000..9e4af02bde1e
---- /dev/null
-+++ b/fs/bcachefs/fsck.h
-@@ -0,0 +1,9 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_FSCK_H
-+#define _BCACHEFS_FSCK_H
-+
-+int bch2_fsck_full(struct bch_fs *);
-+int bch2_fsck_inode_nlink(struct bch_fs *);
-+int bch2_fsck_walk_inodes_only(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_FSCK_H */
-diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c
-new file mode 100644
-index 000000000000..7d20f082ad45
---- /dev/null
-+++ b/fs/bcachefs/inode.c
-@@ -0,0 +1,554 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "bkey_methods.h"
-+#include "btree_update.h"
-+#include "error.h"
-+#include "extents.h"
-+#include "inode.h"
-+#include "str_hash.h"
-+
-+#include <linux/random.h>
-+
-+#include <asm/unaligned.h>
-+
-+const char * const bch2_inode_opts[] = {
-+#define x(name, ...)	#name,
-+	BCH_INODE_OPTS()
-+#undef  x
-+	NULL,
-+};
-+
-+static const u8 byte_table[8] = { 1, 2, 3, 4, 6, 8, 10, 13 };
-+static const u8 bits_table[8] = {
-+	1  * 8 - 1,
-+	2  * 8 - 2,
-+	3  * 8 - 3,
-+	4  * 8 - 4,
-+	6  * 8 - 5,
-+	8  * 8 - 6,
-+	10 * 8 - 7,
-+	13 * 8 - 8,
-+};
-+
-+static int inode_encode_field(u8 *out, u8 *end, u64 hi, u64 lo)
-+{
-+	__be64 in[2] = { cpu_to_be64(hi), cpu_to_be64(lo), };
-+	unsigned shift, bytes, bits = likely(!hi)
-+		? fls64(lo)
-+		: fls64(hi) + 64;
-+
-+	for (shift = 1; shift <= 8; shift++)
-+		if (bits < bits_table[shift - 1])
-+			goto got_shift;
-+
-+	BUG();
-+got_shift:
-+	bytes = byte_table[shift - 1];
-+
-+	BUG_ON(out + bytes > end);
-+
-+	memcpy(out, (u8 *) in + 16 - bytes, bytes);
-+	*out |= (1 << 8) >> shift;
-+
-+	return bytes;
-+}
-+
-+static int inode_decode_field(const u8 *in, const u8 *end,
-+			      u64 out[2], unsigned *out_bits)
-+{
-+	__be64 be[2] = { 0, 0 };
-+	unsigned bytes, shift;
-+	u8 *p;
-+
-+	if (in >= end)
-+		return -1;
-+
-+	if (!*in)
-+		return -1;
-+
-+	/*
-+	 * position of highest set bit indicates number of bytes:
-+	 * shift = number of bits to remove in high byte:
-+	 */
-+	shift	= 8 - __fls(*in); /* 1 <= shift <= 8 */
-+	bytes	= byte_table[shift - 1];
-+
-+	if (in + bytes > end)
-+		return -1;
-+
-+	p = (u8 *) be + 16 - bytes;
-+	memcpy(p, in, bytes);
-+	*p ^= (1 << 8) >> shift;
-+
-+	out[0] = be64_to_cpu(be[0]);
-+	out[1] = be64_to_cpu(be[1]);
-+	*out_bits = out[0] ? 64 + fls64(out[0]) : fls64(out[1]);
-+
-+	return bytes;
-+}
-+
-+void bch2_inode_pack(struct bkey_inode_buf *packed,
-+		     const struct bch_inode_unpacked *inode)
-+{
-+	u8 *out = packed->inode.v.fields;
-+	u8 *end = (void *) &packed[1];
-+	u8 *last_nonzero_field = out;
-+	unsigned nr_fields = 0, last_nonzero_fieldnr = 0;
-+	unsigned bytes;
-+
-+	bkey_inode_init(&packed->inode.k_i);
-+	packed->inode.k.p.offset	= inode->bi_inum;
-+	packed->inode.v.bi_hash_seed	= inode->bi_hash_seed;
-+	packed->inode.v.bi_flags	= cpu_to_le32(inode->bi_flags);
-+	packed->inode.v.bi_mode		= cpu_to_le16(inode->bi_mode);
-+
-+#define x(_name, _bits)					\
-+	out += inode_encode_field(out, end, 0, inode->_name);		\
-+	nr_fields++;							\
-+									\
-+	if (inode->_name) {						\
-+		last_nonzero_field = out;				\
-+		last_nonzero_fieldnr = nr_fields;			\
-+	}
-+
-+	BCH_INODE_FIELDS()
-+#undef  x
-+
-+	out = last_nonzero_field;
-+	nr_fields = last_nonzero_fieldnr;
-+
-+	bytes = out - (u8 *) &packed->inode.v;
-+	set_bkey_val_bytes(&packed->inode.k, bytes);
-+	memset_u64s_tail(&packed->inode.v, 0, bytes);
-+
-+	SET_INODE_NR_FIELDS(&packed->inode.v, nr_fields);
-+
-+	if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
-+		struct bch_inode_unpacked unpacked;
-+
-+		int ret = bch2_inode_unpack(inode_i_to_s_c(&packed->inode),
-+					   &unpacked);
-+		BUG_ON(ret);
-+		BUG_ON(unpacked.bi_inum		!= inode->bi_inum);
-+		BUG_ON(unpacked.bi_hash_seed	!= inode->bi_hash_seed);
-+		BUG_ON(unpacked.bi_mode		!= inode->bi_mode);
-+
-+#define x(_name, _bits)	BUG_ON(unpacked._name != inode->_name);
-+		BCH_INODE_FIELDS()
-+#undef  x
-+	}
-+}
-+
-+int bch2_inode_unpack(struct bkey_s_c_inode inode,
-+		      struct bch_inode_unpacked *unpacked)
-+{
-+	const u8 *in = inode.v->fields;
-+	const u8 *end = (void *) inode.v + bkey_val_bytes(inode.k);
-+	u64 field[2];
-+	unsigned fieldnr = 0, field_bits;
-+	int ret;
-+
-+	unpacked->bi_inum	= inode.k->p.offset;
-+	unpacked->bi_hash_seed	= inode.v->bi_hash_seed;
-+	unpacked->bi_flags	= le32_to_cpu(inode.v->bi_flags);
-+	unpacked->bi_mode	= le16_to_cpu(inode.v->bi_mode);
-+
-+#define x(_name, _bits)					\
-+	if (fieldnr++ == INODE_NR_FIELDS(inode.v)) {			\
-+		memset(&unpacked->_name, 0,				\
-+		       sizeof(*unpacked) -				\
-+		       offsetof(struct bch_inode_unpacked, _name));	\
-+		return 0;						\
-+	}								\
-+									\
-+	ret = inode_decode_field(in, end, field, &field_bits);		\
-+	if (ret < 0)							\
-+		return ret;						\
-+									\
-+	if (field_bits > sizeof(unpacked->_name) * 8)			\
-+		return -1;						\
-+									\
-+	unpacked->_name = field[1];					\
-+	in += ret;
-+
-+	BCH_INODE_FIELDS()
-+#undef  x
-+
-+	/* XXX: signal if there were more fields than expected? */
-+
-+	return 0;
-+}
-+
-+struct btree_iter *bch2_inode_peek(struct btree_trans *trans,
-+				   struct bch_inode_unpacked *inode,
-+				   u64 inum, unsigned flags)
-+{
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	iter = bch2_trans_get_iter(trans, BTREE_ID_INODES, POS(0, inum),
-+				   BTREE_ITER_SLOTS|flags);
-+	if (IS_ERR(iter))
-+		return iter;
-+
-+	k = bch2_btree_iter_peek_slot(iter);
-+	ret = bkey_err(k);
-+	if (ret)
-+		goto err;
-+
-+	ret = k.k->type == KEY_TYPE_inode ? 0 : -EIO;
-+	if (ret)
-+		goto err;
-+
-+	ret = bch2_inode_unpack(bkey_s_c_to_inode(k), inode);
-+	if (ret)
-+		goto err;
-+
-+	return iter;
-+err:
-+	bch2_trans_iter_put(trans, iter);
-+	return ERR_PTR(ret);
-+}
-+
-+int bch2_inode_write(struct btree_trans *trans,
-+		     struct btree_iter *iter,
-+		     struct bch_inode_unpacked *inode)
-+{
-+	struct bkey_inode_buf *inode_p;
-+
-+	inode_p = bch2_trans_kmalloc(trans, sizeof(*inode_p));
-+	if (IS_ERR(inode_p))
-+		return PTR_ERR(inode_p);
-+
-+	bch2_inode_pack(inode_p, inode);
-+	bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0);
-+	return 0;
-+}
-+
-+const char *bch2_inode_invalid(const struct bch_fs *c, struct bkey_s_c k)
-+{
-+		struct bkey_s_c_inode inode = bkey_s_c_to_inode(k);
-+		struct bch_inode_unpacked unpacked;
-+
-+	if (k.k->p.inode)
-+		return "nonzero k.p.inode";
-+
-+	if (bkey_val_bytes(k.k) < sizeof(struct bch_inode))
-+		return "incorrect value size";
-+
-+	if (k.k->p.offset < BLOCKDEV_INODE_MAX)
-+		return "fs inode in blockdev range";
-+
-+	if (INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR)
-+		return "invalid str hash type";
-+
-+	if (bch2_inode_unpack(inode, &unpacked))
-+		return "invalid variable length fields";
-+
-+	if (unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1)
-+		return "invalid data checksum type";
-+
-+	if (unpacked.bi_compression >= BCH_COMPRESSION_OPT_NR + 1)
-+		return "invalid data checksum type";
-+
-+	if ((unpacked.bi_flags & BCH_INODE_UNLINKED) &&
-+	    unpacked.bi_nlink != 0)
-+		return "flagged as unlinked but bi_nlink != 0";
-+
-+	return NULL;
-+}
-+
-+void bch2_inode_to_text(struct printbuf *out, struct bch_fs *c,
-+		       struct bkey_s_c k)
-+{
-+	struct bkey_s_c_inode inode = bkey_s_c_to_inode(k);
-+	struct bch_inode_unpacked unpacked;
-+
-+	if (bch2_inode_unpack(inode, &unpacked)) {
-+		pr_buf(out, "(unpack error)");
-+		return;
-+	}
-+
-+#define x(_name, _bits)						\
-+	pr_buf(out, #_name ": %llu ", (u64) unpacked._name);
-+	BCH_INODE_FIELDS()
-+#undef  x
-+}
-+
-+const char *bch2_inode_generation_invalid(const struct bch_fs *c,
-+					  struct bkey_s_c k)
-+{
-+	if (k.k->p.inode)
-+		return "nonzero k.p.inode";
-+
-+	if (bkey_val_bytes(k.k) != sizeof(struct bch_inode_generation))
-+		return "incorrect value size";
-+
-+	return NULL;
-+}
-+
-+void bch2_inode_generation_to_text(struct printbuf *out, struct bch_fs *c,
-+				   struct bkey_s_c k)
-+{
-+	struct bkey_s_c_inode_generation gen = bkey_s_c_to_inode_generation(k);
-+
-+	pr_buf(out, "generation: %u", le32_to_cpu(gen.v->bi_generation));
-+}
-+
-+void bch2_inode_init_early(struct bch_fs *c,
-+			   struct bch_inode_unpacked *inode_u)
-+{
-+	enum bch_str_hash_type str_hash =
-+		bch2_str_hash_opt_to_type(c, c->opts.str_hash);
-+
-+	memset(inode_u, 0, sizeof(*inode_u));
-+
-+	/* ick */
-+	inode_u->bi_flags |= str_hash << INODE_STR_HASH_OFFSET;
-+	get_random_bytes(&inode_u->bi_hash_seed,
-+			 sizeof(inode_u->bi_hash_seed));
-+}
-+
-+void bch2_inode_init_late(struct bch_inode_unpacked *inode_u, u64 now,
-+			  uid_t uid, gid_t gid, umode_t mode, dev_t rdev,
-+			  struct bch_inode_unpacked *parent)
-+{
-+	inode_u->bi_mode	= mode;
-+	inode_u->bi_uid		= uid;
-+	inode_u->bi_gid		= gid;
-+	inode_u->bi_dev		= rdev;
-+	inode_u->bi_atime	= now;
-+	inode_u->bi_mtime	= now;
-+	inode_u->bi_ctime	= now;
-+	inode_u->bi_otime	= now;
-+
-+	if (parent && parent->bi_mode & S_ISGID) {
-+		inode_u->bi_gid = parent->bi_gid;
-+		if (S_ISDIR(mode))
-+			inode_u->bi_mode |= S_ISGID;
-+	}
-+
-+	if (parent) {
-+#define x(_name, ...)	inode_u->bi_##_name = parent->bi_##_name;
-+		BCH_INODE_OPTS()
-+#undef x
-+	}
-+}
-+
-+void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u,
-+		     uid_t uid, gid_t gid, umode_t mode, dev_t rdev,
-+		     struct bch_inode_unpacked *parent)
-+{
-+	bch2_inode_init_early(c, inode_u);
-+	bch2_inode_init_late(inode_u, bch2_current_time(c),
-+			     uid, gid, mode, rdev, parent);
-+}
-+
-+static inline u32 bkey_generation(struct bkey_s_c k)
-+{
-+	switch (k.k->type) {
-+	case KEY_TYPE_inode:
-+		BUG();
-+	case KEY_TYPE_inode_generation:
-+		return le32_to_cpu(bkey_s_c_to_inode_generation(k).v->bi_generation);
-+	default:
-+		return 0;
-+	}
-+}
-+
-+int bch2_inode_create(struct btree_trans *trans,
-+		      struct bch_inode_unpacked *inode_u,
-+		      u64 min, u64 max, u64 *hint)
-+{
-+	struct bkey_inode_buf *inode_p;
-+	struct btree_iter *iter = NULL;
-+	struct bkey_s_c k;
-+	u64 start;
-+	int ret;
-+
-+	if (!max)
-+		max = ULLONG_MAX;
-+
-+	if (trans->c->opts.inodes_32bit)
-+		max = min_t(u64, max, U32_MAX);
-+
-+	start = READ_ONCE(*hint);
-+
-+	if (start >= max || start < min)
-+		start = min;
-+
-+	inode_p = bch2_trans_kmalloc(trans, sizeof(*inode_p));
-+	if (IS_ERR(inode_p))
-+		return PTR_ERR(inode_p);
-+again:
-+	for_each_btree_key(trans, iter, BTREE_ID_INODES, POS(0, start),
-+			   BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
-+		if (bkey_cmp(iter->pos, POS(0, max)) > 0)
-+			break;
-+
-+		if (k.k->type != KEY_TYPE_inode)
-+			goto found_slot;
-+	}
-+
-+	bch2_trans_iter_put(trans, iter);
-+
-+	if (ret)
-+		return ret;
-+
-+	if (start != min) {
-+		/* Retry from start */
-+		start = min;
-+		goto again;
-+	}
-+
-+	return -ENOSPC;
-+found_slot:
-+	*hint			= k.k->p.offset;
-+	inode_u->bi_inum	= k.k->p.offset;
-+	inode_u->bi_generation	= bkey_generation(k);
-+
-+	bch2_inode_pack(inode_p, inode_u);
-+	bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0);
-+	bch2_trans_iter_put(trans, iter);
-+	return 0;
-+}
-+
-+int bch2_inode_rm(struct bch_fs *c, u64 inode_nr)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_i_inode_generation delete;
-+	struct bpos start = POS(inode_nr, 0);
-+	struct bpos end = POS(inode_nr + 1, 0);
-+	int ret;
-+
-+	/*
-+	 * If this was a directory, there shouldn't be any real dirents left -
-+	 * but there could be whiteouts (from hash collisions) that we should
-+	 * delete:
-+	 *
-+	 * XXX: the dirent could ideally would delete whiteouts when they're no
-+	 * longer needed
-+	 */
-+	ret   = bch2_btree_delete_range(c, BTREE_ID_EXTENTS,
-+					start, end, NULL) ?:
-+		bch2_btree_delete_range(c, BTREE_ID_XATTRS,
-+					start, end, NULL) ?:
-+		bch2_btree_delete_range(c, BTREE_ID_DIRENTS,
-+					start, end, NULL);
-+	if (ret)
-+		return ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES, POS(0, inode_nr),
-+				   BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
-+	do {
-+		struct bkey_s_c k = bch2_btree_iter_peek_slot(iter);
-+		u32 bi_generation = 0;
-+
-+		ret = bkey_err(k);
-+		if (ret)
-+			break;
-+
-+		bch2_fs_inconsistent_on(k.k->type != KEY_TYPE_inode, c,
-+					"inode %llu not found when deleting",
-+					inode_nr);
-+
-+		switch (k.k->type) {
-+		case KEY_TYPE_inode: {
-+			struct bch_inode_unpacked inode_u;
-+
-+			if (!bch2_inode_unpack(bkey_s_c_to_inode(k), &inode_u))
-+				bi_generation = inode_u.bi_generation + 1;
-+			break;
-+		}
-+		case KEY_TYPE_inode_generation: {
-+			struct bkey_s_c_inode_generation g =
-+				bkey_s_c_to_inode_generation(k);
-+			bi_generation = le32_to_cpu(g.v->bi_generation);
-+			break;
-+		}
-+		}
-+
-+		if (!bi_generation) {
-+			bkey_init(&delete.k);
-+			delete.k.p.offset = inode_nr;
-+		} else {
-+			bkey_inode_generation_init(&delete.k_i);
-+			delete.k.p.offset = inode_nr;
-+			delete.v.bi_generation = cpu_to_le32(bi_generation);
-+		}
-+
-+		bch2_trans_update(&trans, iter, &delete.k_i, 0);
-+
-+		ret = bch2_trans_commit(&trans, NULL, NULL,
-+					BTREE_INSERT_NOFAIL);
-+	} while (ret == -EINTR);
-+
-+	bch2_trans_exit(&trans);
-+	return ret;
-+}
-+
-+int bch2_inode_find_by_inum_trans(struct btree_trans *trans, u64 inode_nr,
-+				  struct bch_inode_unpacked *inode)
-+{
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	iter = bch2_trans_get_iter(trans, BTREE_ID_INODES,
-+			POS(0, inode_nr), BTREE_ITER_SLOTS);
-+	if (IS_ERR(iter))
-+		return PTR_ERR(iter);
-+
-+	k = bch2_btree_iter_peek_slot(iter);
-+	ret = bkey_err(k);
-+	if (ret)
-+		goto err;
-+
-+	ret = k.k->type == KEY_TYPE_inode
-+		? bch2_inode_unpack(bkey_s_c_to_inode(k), inode)
-+		: -ENOENT;
-+err:
-+	bch2_trans_iter_put(trans, iter);
-+	return ret;
-+}
-+
-+int bch2_inode_find_by_inum(struct bch_fs *c, u64 inode_nr,
-+			    struct bch_inode_unpacked *inode)
-+{
-+	return bch2_trans_do(c, NULL, NULL, 0,
-+		bch2_inode_find_by_inum_trans(&trans, inode_nr, inode));
-+}
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+void bch2_inode_pack_test(void)
-+{
-+	struct bch_inode_unpacked *u, test_inodes[] = {
-+		{
-+			.bi_atime	= U64_MAX,
-+			.bi_ctime	= U64_MAX,
-+			.bi_mtime	= U64_MAX,
-+			.bi_otime	= U64_MAX,
-+			.bi_size	= U64_MAX,
-+			.bi_sectors	= U64_MAX,
-+			.bi_uid		= U32_MAX,
-+			.bi_gid		= U32_MAX,
-+			.bi_nlink	= U32_MAX,
-+			.bi_generation	= U32_MAX,
-+			.bi_dev		= U32_MAX,
-+		},
-+	};
-+
-+	for (u = test_inodes;
-+	     u < test_inodes + ARRAY_SIZE(test_inodes);
-+	     u++) {
-+		struct bkey_inode_buf p;
-+
-+		bch2_inode_pack(&p, u);
-+	}
-+}
-+#endif
-diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h
-new file mode 100644
-index 000000000000..bb759a46dc41
---- /dev/null
-+++ b/fs/bcachefs/inode.h
-@@ -0,0 +1,177 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_INODE_H
-+#define _BCACHEFS_INODE_H
-+
-+#include "opts.h"
-+
-+extern const char * const bch2_inode_opts[];
-+
-+const char *bch2_inode_invalid(const struct bch_fs *, struct bkey_s_c);
-+void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-+
-+#define bch2_bkey_ops_inode (struct bkey_ops) {		\
-+	.key_invalid	= bch2_inode_invalid,		\
-+	.val_to_text	= bch2_inode_to_text,		\
-+}
-+
-+const char *bch2_inode_generation_invalid(const struct bch_fs *,
-+					  struct bkey_s_c);
-+void bch2_inode_generation_to_text(struct printbuf *, struct bch_fs *,
-+				   struct bkey_s_c);
-+
-+#define bch2_bkey_ops_inode_generation (struct bkey_ops) {	\
-+	.key_invalid	= bch2_inode_generation_invalid,	\
-+	.val_to_text	= bch2_inode_generation_to_text,	\
-+}
-+
-+struct bch_inode_unpacked {
-+	u64			bi_inum;
-+	__le64			bi_hash_seed;
-+	u32			bi_flags;
-+	u16			bi_mode;
-+
-+#define x(_name, _bits)	u##_bits _name;
-+	BCH_INODE_FIELDS()
-+#undef  x
-+};
-+
-+struct bkey_inode_buf {
-+	struct bkey_i_inode	inode;
-+
-+#define x(_name, _bits)		+ 8 + _bits / 8
-+	u8		_pad[0 + BCH_INODE_FIELDS()];
-+#undef  x
-+} __attribute__((packed, aligned(8)));
-+
-+void bch2_inode_pack(struct bkey_inode_buf *, const struct bch_inode_unpacked *);
-+int bch2_inode_unpack(struct bkey_s_c_inode, struct bch_inode_unpacked *);
-+
-+struct btree_iter *bch2_inode_peek(struct btree_trans *,
-+			struct bch_inode_unpacked *, u64, unsigned);
-+int bch2_inode_write(struct btree_trans *, struct btree_iter *,
-+		     struct bch_inode_unpacked *);
-+
-+void bch2_inode_init_early(struct bch_fs *,
-+			   struct bch_inode_unpacked *);
-+void bch2_inode_init_late(struct bch_inode_unpacked *, u64,
-+			  uid_t, gid_t, umode_t, dev_t,
-+			  struct bch_inode_unpacked *);
-+void bch2_inode_init(struct bch_fs *, struct bch_inode_unpacked *,
-+		     uid_t, gid_t, umode_t, dev_t,
-+		     struct bch_inode_unpacked *);
-+
-+int bch2_inode_create(struct btree_trans *,
-+		      struct bch_inode_unpacked *,
-+		      u64, u64, u64 *);
-+
-+int bch2_inode_rm(struct bch_fs *, u64);
-+
-+int bch2_inode_find_by_inum_trans(struct btree_trans *, u64,
-+				  struct bch_inode_unpacked *);
-+int bch2_inode_find_by_inum(struct bch_fs *, u64, struct bch_inode_unpacked *);
-+
-+static inline struct bch_io_opts bch2_inode_opts_get(struct bch_inode_unpacked *inode)
-+{
-+	struct bch_io_opts ret = { 0 };
-+
-+#define x(_name, _bits)					\
-+	if (inode->bi_##_name)						\
-+		opt_set(ret, _name, inode->bi_##_name - 1);
-+	BCH_INODE_OPTS()
-+#undef x
-+	return ret;
-+}
-+
-+static inline void bch2_inode_opt_set(struct bch_inode_unpacked *inode,
-+				      enum inode_opt_id id, u64 v)
-+{
-+	switch (id) {
-+#define x(_name, ...)							\
-+	case Inode_opt_##_name:						\
-+		inode->bi_##_name = v;					\
-+		break;
-+	BCH_INODE_OPTS()
-+#undef x
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static inline u64 bch2_inode_opt_get(struct bch_inode_unpacked *inode,
-+				     enum inode_opt_id id)
-+{
-+	switch (id) {
-+#define x(_name, ...)							\
-+	case Inode_opt_##_name:						\
-+		return inode->bi_##_name;
-+	BCH_INODE_OPTS()
-+#undef x
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static inline struct bch_io_opts
-+io_opts(struct bch_fs *c, struct bch_inode_unpacked *inode)
-+{
-+	struct bch_io_opts opts = bch2_opts_to_inode_opts(c->opts);
-+
-+	bch2_io_opts_apply(&opts, bch2_inode_opts_get(inode));
-+	return opts;
-+}
-+
-+static inline u8 mode_to_type(umode_t mode)
-+{
-+	return (mode >> 12) & 15;
-+}
-+
-+/* i_nlink: */
-+
-+static inline unsigned nlink_bias(umode_t mode)
-+{
-+	return S_ISDIR(mode) ? 2 : 1;
-+}
-+
-+static inline void bch2_inode_nlink_inc(struct bch_inode_unpacked *bi)
-+{
-+	if (bi->bi_flags & BCH_INODE_UNLINKED)
-+		bi->bi_flags &= ~BCH_INODE_UNLINKED;
-+	else
-+		bi->bi_nlink++;
-+}
-+
-+static inline void bch2_inode_nlink_dec(struct bch_inode_unpacked *bi)
-+{
-+	BUG_ON(bi->bi_flags & BCH_INODE_UNLINKED);
-+	if (bi->bi_nlink)
-+		bi->bi_nlink--;
-+	else
-+		bi->bi_flags |= BCH_INODE_UNLINKED;
-+}
-+
-+static inline unsigned bch2_inode_nlink_get(struct bch_inode_unpacked *bi)
-+{
-+	return bi->bi_flags & BCH_INODE_UNLINKED
-+		  ? 0
-+		  : bi->bi_nlink + nlink_bias(bi->bi_mode);
-+}
-+
-+static inline void bch2_inode_nlink_set(struct bch_inode_unpacked *bi,
-+					unsigned nlink)
-+{
-+	if (nlink) {
-+		bi->bi_nlink = nlink - nlink_bias(bi->bi_mode);
-+		bi->bi_flags &= ~BCH_INODE_UNLINKED;
-+	} else {
-+		bi->bi_nlink = 0;
-+		bi->bi_flags |= BCH_INODE_UNLINKED;
-+	}
-+}
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+void bch2_inode_pack_test(void);
-+#else
-+static inline void bch2_inode_pack_test(void) {}
-+#endif
-+
-+#endif /* _BCACHEFS_INODE_H */
-diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c
-new file mode 100644
-index 000000000000..8d608c900525
---- /dev/null
-+++ b/fs/bcachefs/io.c
-@@ -0,0 +1,2355 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Some low level IO code, and hacks for various block layer limitations
-+ *
-+ * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
-+ * Copyright 2012 Google, Inc.
-+ */
-+
-+#include "bcachefs.h"
-+#include "alloc_foreground.h"
-+#include "bkey_on_stack.h"
-+#include "bset.h"
-+#include "btree_update.h"
-+#include "buckets.h"
-+#include "checksum.h"
-+#include "compress.h"
-+#include "clock.h"
-+#include "debug.h"
-+#include "disk_groups.h"
-+#include "ec.h"
-+#include "error.h"
-+#include "extent_update.h"
-+#include "inode.h"
-+#include "io.h"
-+#include "journal.h"
-+#include "keylist.h"
-+#include "move.h"
-+#include "rebalance.h"
-+#include "super.h"
-+#include "super-io.h"
-+
-+#include <linux/blkdev.h>
-+#include <linux/random.h>
-+
-+#include <trace/events/bcachefs.h>
-+
-+static bool bch2_target_congested(struct bch_fs *c, u16 target)
-+{
-+	const struct bch_devs_mask *devs;
-+	unsigned d, nr = 0, total = 0;
-+	u64 now = local_clock(), last;
-+	s64 congested;
-+	struct bch_dev *ca;
-+
-+	if (!target)
-+		return false;
-+
-+	rcu_read_lock();
-+	devs = bch2_target_to_mask(c, target);
-+	for_each_set_bit(d, devs->d, BCH_SB_MEMBERS_MAX) {
-+		ca = rcu_dereference(c->devs[d]);
-+		if (!ca)
-+			continue;
-+
-+		congested = atomic_read(&ca->congested);
-+		last = READ_ONCE(ca->congested_last);
-+		if (time_after64(now, last))
-+			congested -= (now - last) >> 12;
-+
-+		total += max(congested, 0LL);
-+		nr++;
-+	}
-+	rcu_read_unlock();
-+
-+	return bch2_rand_range(nr * CONGESTED_MAX) < total;
-+}
-+
-+static inline void bch2_congested_acct(struct bch_dev *ca, u64 io_latency,
-+				       u64 now, int rw)
-+{
-+	u64 latency_capable =
-+		ca->io_latency[rw].quantiles.entries[QUANTILE_IDX(1)].m;
-+	/* ideally we'd be taking into account the device's variance here: */
-+	u64 latency_threshold = latency_capable << (rw == READ ? 2 : 3);
-+	s64 latency_over = io_latency - latency_threshold;
-+
-+	if (latency_threshold && latency_over > 0) {
-+		/*
-+		 * bump up congested by approximately latency_over * 4 /
-+		 * latency_threshold - we don't need much accuracy here so don't
-+		 * bother with the divide:
-+		 */
-+		if (atomic_read(&ca->congested) < CONGESTED_MAX)
-+			atomic_add(latency_over >>
-+				   max_t(int, ilog2(latency_threshold) - 2, 0),
-+				   &ca->congested);
-+
-+		ca->congested_last = now;
-+	} else if (atomic_read(&ca->congested) > 0) {
-+		atomic_dec(&ca->congested);
-+	}
-+}
-+
-+void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw)
-+{
-+	atomic64_t *latency = &ca->cur_latency[rw];
-+	u64 now = local_clock();
-+	u64 io_latency = time_after64(now, submit_time)
-+		? now - submit_time
-+		: 0;
-+	u64 old, new, v = atomic64_read(latency);
-+
-+	do {
-+		old = v;
-+
-+		/*
-+		 * If the io latency was reasonably close to the current
-+		 * latency, skip doing the update and atomic operation - most of
-+		 * the time:
-+		 */
-+		if (abs((int) (old - io_latency)) < (old >> 1) &&
-+		    now & ~(~0 << 5))
-+			break;
-+
-+		new = ewma_add(old, io_latency, 5);
-+	} while ((v = atomic64_cmpxchg(latency, old, new)) != old);
-+
-+	bch2_congested_acct(ca, io_latency, now, rw);
-+
-+	__bch2_time_stats_update(&ca->io_latency[rw], submit_time, now);
-+}
-+
-+/* Allocate, free from mempool: */
-+
-+void bch2_bio_free_pages_pool(struct bch_fs *c, struct bio *bio)
-+{
-+	struct bvec_iter_all iter;
-+	struct bio_vec *bv;
-+
-+	bio_for_each_segment_all(bv, bio, iter)
-+		if (bv->bv_page != ZERO_PAGE(0))
-+			mempool_free(bv->bv_page, &c->bio_bounce_pages);
-+	bio->bi_vcnt = 0;
-+}
-+
-+static struct page *__bio_alloc_page_pool(struct bch_fs *c, bool *using_mempool)
-+{
-+	struct page *page;
-+
-+	if (likely(!*using_mempool)) {
-+		page = alloc_page(GFP_NOIO);
-+		if (unlikely(!page)) {
-+			mutex_lock(&c->bio_bounce_pages_lock);
-+			*using_mempool = true;
-+			goto pool_alloc;
-+
-+		}
-+	} else {
-+pool_alloc:
-+		page = mempool_alloc(&c->bio_bounce_pages, GFP_NOIO);
-+	}
-+
-+	return page;
-+}
-+
-+void bch2_bio_alloc_pages_pool(struct bch_fs *c, struct bio *bio,
-+			       size_t size)
-+{
-+	bool using_mempool = false;
-+
-+	while (size) {
-+		struct page *page = __bio_alloc_page_pool(c, &using_mempool);
-+		unsigned len = min(PAGE_SIZE, size);
-+
-+		BUG_ON(!bio_add_page(bio, page, len, 0));
-+		size -= len;
-+	}
-+
-+	if (using_mempool)
-+		mutex_unlock(&c->bio_bounce_pages_lock);
-+}
-+
-+/* Extent update path: */
-+
-+static int sum_sector_overwrites(struct btree_trans *trans,
-+				 struct btree_iter *extent_iter,
-+				 struct bkey_i *new,
-+				 bool may_allocate,
-+				 bool *maybe_extending,
-+				 s64 *delta)
-+{
-+	struct btree_iter *iter;
-+	struct bkey_s_c old;
-+	int ret = 0;
-+
-+	*maybe_extending = true;
-+	*delta = 0;
-+
-+	iter = bch2_trans_copy_iter(trans, extent_iter);
-+	if (IS_ERR(iter))
-+		return PTR_ERR(iter);
-+
-+	for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, old, ret) {
-+		if (!may_allocate &&
-+		    bch2_bkey_nr_ptrs_fully_allocated(old) <
-+		    bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(new))) {
-+			ret = -ENOSPC;
-+			break;
-+		}
-+
-+		*delta += (min(new->k.p.offset,
-+			      old.k->p.offset) -
-+			  max(bkey_start_offset(&new->k),
-+			      bkey_start_offset(old.k))) *
-+			(bkey_extent_is_allocation(&new->k) -
-+			 bkey_extent_is_allocation(old.k));
-+
-+		if (bkey_cmp(old.k->p, new->k.p) >= 0) {
-+			/*
-+			 * Check if there's already data above where we're
-+			 * going to be writing to - this means we're definitely
-+			 * not extending the file:
-+			 *
-+			 * Note that it's not sufficient to check if there's
-+			 * data up to the sector offset we're going to be
-+			 * writing to, because i_size could be up to one block
-+			 * less:
-+			 */
-+			if (!bkey_cmp(old.k->p, new->k.p))
-+				old = bch2_btree_iter_next(iter);
-+
-+			if (old.k && !bkey_err(old) &&
-+			    old.k->p.inode == extent_iter->pos.inode &&
-+			    bkey_extent_is_data(old.k))
-+				*maybe_extending = false;
-+
-+			break;
-+		}
-+	}
-+
-+	bch2_trans_iter_put(trans, iter);
-+	return ret;
-+}
-+
-+int bch2_extent_update(struct btree_trans *trans,
-+		       struct btree_iter *iter,
-+		       struct bkey_i *k,
-+		       struct disk_reservation *disk_res,
-+		       u64 *journal_seq,
-+		       u64 new_i_size,
-+		       s64 *i_sectors_delta)
-+{
-+	/* this must live until after bch2_trans_commit(): */
-+	struct bkey_inode_buf inode_p;
-+	bool extending = false;
-+	s64 delta = 0;
-+	int ret;
-+
-+	ret = bch2_extent_trim_atomic(k, iter);
-+	if (ret)
-+		return ret;
-+
-+	ret = sum_sector_overwrites(trans, iter, k,
-+			disk_res && disk_res->sectors != 0,
-+			&extending, &delta);
-+	if (ret)
-+		return ret;
-+
-+	new_i_size = extending
-+		? min(k->k.p.offset << 9, new_i_size)
-+		: 0;
-+
-+	if (delta || new_i_size) {
-+		struct btree_iter *inode_iter;
-+		struct bch_inode_unpacked inode_u;
-+
-+		inode_iter = bch2_inode_peek(trans, &inode_u,
-+				k->k.p.inode, BTREE_ITER_INTENT);
-+		if (IS_ERR(inode_iter))
-+			return PTR_ERR(inode_iter);
-+
-+		/*
-+		 * XXX:
-+		 * writeback can race a bit with truncate, because truncate
-+		 * first updates the inode then truncates the pagecache. This is
-+		 * ugly, but lets us preserve the invariant that the in memory
-+		 * i_size is always >= the on disk i_size.
-+		 *
-+		BUG_ON(new_i_size > inode_u.bi_size &&
-+		       (inode_u.bi_flags & BCH_INODE_I_SIZE_DIRTY));
-+		 */
-+		BUG_ON(new_i_size > inode_u.bi_size && !extending);
-+
-+		if (!(inode_u.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
-+		    new_i_size > inode_u.bi_size)
-+			inode_u.bi_size = new_i_size;
-+		else
-+			new_i_size = 0;
-+
-+		inode_u.bi_sectors += delta;
-+
-+		if (delta || new_i_size) {
-+			bch2_inode_pack(&inode_p, &inode_u);
-+			bch2_trans_update(trans, inode_iter,
-+					  &inode_p.inode.k_i, 0);
-+		}
-+
-+		bch2_trans_iter_put(trans, inode_iter);
-+	}
-+
-+	bch2_trans_update(trans, iter, k, 0);
-+
-+	ret = bch2_trans_commit(trans, disk_res, journal_seq,
-+				BTREE_INSERT_NOCHECK_RW|
-+				BTREE_INSERT_NOFAIL|
-+				BTREE_INSERT_USE_RESERVE);
-+	if (!ret && i_sectors_delta)
-+		*i_sectors_delta += delta;
-+
-+	return ret;
-+}
-+
-+int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter,
-+		   struct bpos end, u64 *journal_seq,
-+		   s64 *i_sectors_delta)
-+{
-+	struct bch_fs *c	= trans->c;
-+	unsigned max_sectors	= KEY_SIZE_MAX & (~0 << c->block_bits);
-+	struct bkey_s_c k;
-+	int ret = 0, ret2 = 0;
-+
-+	while ((k = bch2_btree_iter_peek(iter)).k &&
-+	       bkey_cmp(iter->pos, end) < 0) {
-+		struct disk_reservation disk_res =
-+			bch2_disk_reservation_init(c, 0);
-+		struct bkey_i delete;
-+
-+		bch2_trans_begin(trans);
-+
-+		ret = bkey_err(k);
-+		if (ret)
-+			goto btree_err;
-+
-+		bkey_init(&delete.k);
-+		delete.k.p = iter->pos;
-+
-+		/* create the biggest key we can */
-+		bch2_key_resize(&delete.k, max_sectors);
-+		bch2_cut_back(end, &delete);
-+
-+		ret = bch2_extent_update(trans, iter, &delete,
-+				&disk_res, journal_seq,
-+				0, i_sectors_delta);
-+		bch2_disk_reservation_put(c, &disk_res);
-+btree_err:
-+		if (ret == -EINTR) {
-+			ret2 = ret;
-+			ret = 0;
-+		}
-+		if (ret)
-+			break;
-+	}
-+
-+	if (bkey_cmp(iter->pos, end) > 0) {
-+		bch2_btree_iter_set_pos(iter, end);
-+		ret = bch2_btree_iter_traverse(iter);
-+	}
-+
-+	return ret ?: ret2;
-+}
-+
-+int bch2_fpunch(struct bch_fs *c, u64 inum, u64 start, u64 end,
-+		u64 *journal_seq, s64 *i_sectors_delta)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	int ret = 0;
-+
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024);
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
-+				   POS(inum, start),
-+				   BTREE_ITER_INTENT);
-+
-+	ret = bch2_fpunch_at(&trans, iter, POS(inum, end),
-+			     journal_seq, i_sectors_delta);
-+	bch2_trans_exit(&trans);
-+
-+	if (ret == -EINTR)
-+		ret = 0;
-+
-+	return ret;
-+}
-+
-+int bch2_write_index_default(struct bch_write_op *op)
-+{
-+	struct bch_fs *c = op->c;
-+	struct bkey_on_stack sk;
-+	struct keylist *keys = &op->insert_keys;
-+	struct bkey_i *k = bch2_keylist_front(keys);
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	int ret;
-+
-+	bkey_on_stack_init(&sk);
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
-+				   bkey_start_pos(&k->k),
-+				   BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
-+
-+	do {
-+		bch2_trans_begin(&trans);
-+
-+		k = bch2_keylist_front(keys);
-+
-+		bkey_on_stack_realloc(&sk, c, k->k.u64s);
-+		bkey_copy(sk.k, k);
-+		bch2_cut_front(iter->pos, sk.k);
-+
-+		ret = bch2_extent_update(&trans, iter, sk.k,
-+					 &op->res, op_journal_seq(op),
-+					 op->new_i_size, &op->i_sectors_delta);
-+		if (ret == -EINTR)
-+			continue;
-+		if (ret)
-+			break;
-+
-+		if (bkey_cmp(iter->pos, k->k.p) >= 0)
-+			bch2_keylist_pop_front(keys);
-+	} while (!bch2_keylist_empty(keys));
-+
-+	bch2_trans_exit(&trans);
-+	bkey_on_stack_exit(&sk, c);
-+
-+	return ret;
-+}
-+
-+/* Writes */
-+
-+void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
-+			       enum bch_data_type type,
-+			       const struct bkey_i *k)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(k));
-+	const struct bch_extent_ptr *ptr;
-+	struct bch_write_bio *n;
-+	struct bch_dev *ca;
-+
-+	BUG_ON(c->opts.nochanges);
-+
-+	bkey_for_each_ptr(ptrs, ptr) {
-+		BUG_ON(ptr->dev >= BCH_SB_MEMBERS_MAX ||
-+		       !c->devs[ptr->dev]);
-+
-+		ca = bch_dev_bkey_exists(c, ptr->dev);
-+
-+		if (to_entry(ptr + 1) < ptrs.end) {
-+			n = to_wbio(bio_clone_fast(&wbio->bio, GFP_NOIO,
-+						   &ca->replica_set));
-+
-+			n->bio.bi_end_io	= wbio->bio.bi_end_io;
-+			n->bio.bi_private	= wbio->bio.bi_private;
-+			n->parent		= wbio;
-+			n->split		= true;
-+			n->bounce		= false;
-+			n->put_bio		= true;
-+			n->bio.bi_opf		= wbio->bio.bi_opf;
-+			bio_inc_remaining(&wbio->bio);
-+		} else {
-+			n = wbio;
-+			n->split		= false;
-+		}
-+
-+		n->c			= c;
-+		n->dev			= ptr->dev;
-+		n->have_ioref		= bch2_dev_get_ioref(ca, WRITE);
-+		n->submit_time		= local_clock();
-+		n->bio.bi_iter.bi_sector = ptr->offset;
-+
-+		if (!journal_flushes_device(ca))
-+			n->bio.bi_opf |= REQ_FUA;
-+
-+		if (likely(n->have_ioref)) {
-+			this_cpu_add(ca->io_done->sectors[WRITE][type],
-+				     bio_sectors(&n->bio));
-+
-+			bio_set_dev(&n->bio, ca->disk_sb.bdev);
-+			submit_bio(&n->bio);
-+		} else {
-+			n->bio.bi_status	= BLK_STS_REMOVED;
-+			bio_endio(&n->bio);
-+		}
-+	}
-+}
-+
-+static void __bch2_write(struct closure *);
-+
-+static void bch2_write_done(struct closure *cl)
-+{
-+	struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
-+	struct bch_fs *c = op->c;
-+
-+	if (!op->error && (op->flags & BCH_WRITE_FLUSH))
-+		op->error = bch2_journal_error(&c->journal);
-+
-+	bch2_disk_reservation_put(c, &op->res);
-+	percpu_ref_put(&c->writes);
-+	bch2_keylist_free(&op->insert_keys, op->inline_keys);
-+
-+	bch2_time_stats_update(&c->times[BCH_TIME_data_write], op->start_time);
-+
-+	if (!(op->flags & BCH_WRITE_FROM_INTERNAL))
-+		up(&c->io_in_flight);
-+
-+	if (op->end_io) {
-+		EBUG_ON(cl->parent);
-+		closure_debug_destroy(cl);
-+		op->end_io(op);
-+	} else {
-+		closure_return(cl);
-+	}
-+}
-+
-+/**
-+ * bch_write_index - after a write, update index to point to new data
-+ */
-+static void __bch2_write_index(struct bch_write_op *op)
-+{
-+	struct bch_fs *c = op->c;
-+	struct keylist *keys = &op->insert_keys;
-+	struct bch_extent_ptr *ptr;
-+	struct bkey_i *src, *dst = keys->keys, *n, *k;
-+	unsigned dev;
-+	int ret;
-+
-+	for (src = keys->keys; src != keys->top; src = n) {
-+		n = bkey_next(src);
-+
-+		if (bkey_extent_is_direct_data(&src->k)) {
-+			bch2_bkey_drop_ptrs(bkey_i_to_s(src), ptr,
-+					    test_bit(ptr->dev, op->failed.d));
-+
-+			if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(src))) {
-+				ret = -EIO;
-+				goto err;
-+			}
-+		}
-+
-+		if (dst != src)
-+			memmove_u64s_down(dst, src, src->u64s);
-+		dst = bkey_next(dst);
-+	}
-+
-+	keys->top = dst;
-+
-+	/*
-+	 * probably not the ideal place to hook this in, but I don't
-+	 * particularly want to plumb io_opts all the way through the btree
-+	 * update stack right now
-+	 */
-+	for_each_keylist_key(keys, k) {
-+		bch2_rebalance_add_key(c, bkey_i_to_s_c(k), &op->opts);
-+
-+		if (bch2_bkey_is_incompressible(bkey_i_to_s_c(k)))
-+			bch2_check_set_feature(op->c, BCH_FEATURE_incompressible);
-+
-+	}
-+
-+	if (!bch2_keylist_empty(keys)) {
-+		u64 sectors_start = keylist_sectors(keys);
-+		int ret = op->index_update_fn(op);
-+
-+		BUG_ON(ret == -EINTR);
-+		BUG_ON(keylist_sectors(keys) && !ret);
-+
-+		op->written += sectors_start - keylist_sectors(keys);
-+
-+		if (ret) {
-+			__bcache_io_error(c, "btree IO error %i", ret);
-+			op->error = ret;
-+		}
-+	}
-+out:
-+	/* If some a bucket wasn't written, we can't erasure code it: */
-+	for_each_set_bit(dev, op->failed.d, BCH_SB_MEMBERS_MAX)
-+		bch2_open_bucket_write_error(c, &op->open_buckets, dev);
-+
-+	bch2_open_buckets_put(c, &op->open_buckets);
-+	return;
-+err:
-+	keys->top = keys->keys;
-+	op->error = ret;
-+	goto out;
-+}
-+
-+static void bch2_write_index(struct closure *cl)
-+{
-+	struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
-+	struct bch_fs *c = op->c;
-+
-+	__bch2_write_index(op);
-+
-+	if (!(op->flags & BCH_WRITE_DONE)) {
-+		continue_at(cl, __bch2_write, index_update_wq(op));
-+	} else if (!op->error && (op->flags & BCH_WRITE_FLUSH)) {
-+		bch2_journal_flush_seq_async(&c->journal,
-+					     *op_journal_seq(op),
-+					     cl);
-+		continue_at(cl, bch2_write_done, index_update_wq(op));
-+	} else {
-+		continue_at_nobarrier(cl, bch2_write_done, NULL);
-+	}
-+}
-+
-+static void bch2_write_endio(struct bio *bio)
-+{
-+	struct closure *cl		= bio->bi_private;
-+	struct bch_write_op *op		= container_of(cl, struct bch_write_op, cl);
-+	struct bch_write_bio *wbio	= to_wbio(bio);
-+	struct bch_write_bio *parent	= wbio->split ? wbio->parent : NULL;
-+	struct bch_fs *c		= wbio->c;
-+	struct bch_dev *ca		= bch_dev_bkey_exists(c, wbio->dev);
-+
-+	if (bch2_dev_io_err_on(bio->bi_status, ca, "data write: %s",
-+			       blk_status_to_str(bio->bi_status)))
-+		set_bit(wbio->dev, op->failed.d);
-+
-+	if (wbio->have_ioref) {
-+		bch2_latency_acct(ca, wbio->submit_time, WRITE);
-+		percpu_ref_put(&ca->io_ref);
-+	}
-+
-+	if (wbio->bounce)
-+		bch2_bio_free_pages_pool(c, bio);
-+
-+	if (wbio->put_bio)
-+		bio_put(bio);
-+
-+	if (parent)
-+		bio_endio(&parent->bio);
-+	else if (!(op->flags & BCH_WRITE_SKIP_CLOSURE_PUT))
-+		closure_put(cl);
-+	else
-+		continue_at_nobarrier(cl, bch2_write_index, index_update_wq(op));
-+}
-+
-+static void init_append_extent(struct bch_write_op *op,
-+			       struct write_point *wp,
-+			       struct bversion version,
-+			       struct bch_extent_crc_unpacked crc)
-+{
-+	struct bch_fs *c = op->c;
-+	struct bkey_i_extent *e;
-+	struct open_bucket *ob;
-+	unsigned i;
-+
-+	BUG_ON(crc.compressed_size > wp->sectors_free);
-+	wp->sectors_free -= crc.compressed_size;
-+	op->pos.offset += crc.uncompressed_size;
-+
-+	e = bkey_extent_init(op->insert_keys.top);
-+	e->k.p		= op->pos;
-+	e->k.size	= crc.uncompressed_size;
-+	e->k.version	= version;
-+
-+	if (crc.csum_type ||
-+	    crc.compression_type ||
-+	    crc.nonce)
-+		bch2_extent_crc_append(&e->k_i, crc);
-+
-+	open_bucket_for_each(c, &wp->ptrs, ob, i) {
-+		struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
-+		union bch_extent_entry *end =
-+			bkey_val_end(bkey_i_to_s(&e->k_i));
-+
-+		end->ptr = ob->ptr;
-+		end->ptr.type = 1 << BCH_EXTENT_ENTRY_ptr;
-+		end->ptr.cached = !ca->mi.durability ||
-+			(op->flags & BCH_WRITE_CACHED) != 0;
-+		end->ptr.offset += ca->mi.bucket_size - ob->sectors_free;
-+
-+		e->k.u64s++;
-+
-+		BUG_ON(crc.compressed_size > ob->sectors_free);
-+		ob->sectors_free -= crc.compressed_size;
-+	}
-+
-+	bch2_keylist_push(&op->insert_keys);
-+}
-+
-+static struct bio *bch2_write_bio_alloc(struct bch_fs *c,
-+					struct write_point *wp,
-+					struct bio *src,
-+					bool *page_alloc_failed,
-+					void *buf)
-+{
-+	struct bch_write_bio *wbio;
-+	struct bio *bio;
-+	unsigned output_available =
-+		min(wp->sectors_free << 9, src->bi_iter.bi_size);
-+	unsigned pages = DIV_ROUND_UP(output_available +
-+				      (buf
-+				       ? ((unsigned long) buf & (PAGE_SIZE - 1))
-+				       : 0), PAGE_SIZE);
-+
-+	bio = bio_alloc_bioset(GFP_NOIO, pages, &c->bio_write);
-+	wbio			= wbio_init(bio);
-+	wbio->put_bio		= true;
-+	/* copy WRITE_SYNC flag */
-+	wbio->bio.bi_opf	= src->bi_opf;
-+
-+	if (buf) {
-+		bch2_bio_map(bio, buf, output_available);
-+		return bio;
-+	}
-+
-+	wbio->bounce		= true;
-+
-+	/*
-+	 * We can't use mempool for more than c->sb.encoded_extent_max
-+	 * worth of pages, but we'd like to allocate more if we can:
-+	 */
-+	bch2_bio_alloc_pages_pool(c, bio,
-+				  min_t(unsigned, output_available,
-+					c->sb.encoded_extent_max << 9));
-+
-+	if (bio->bi_iter.bi_size < output_available)
-+		*page_alloc_failed =
-+			bch2_bio_alloc_pages(bio,
-+					     output_available -
-+					     bio->bi_iter.bi_size,
-+					     GFP_NOFS) != 0;
-+
-+	return bio;
-+}
-+
-+static int bch2_write_rechecksum(struct bch_fs *c,
-+				 struct bch_write_op *op,
-+				 unsigned new_csum_type)
-+{
-+	struct bio *bio = &op->wbio.bio;
-+	struct bch_extent_crc_unpacked new_crc;
-+	int ret;
-+
-+	/* bch2_rechecksum_bio() can't encrypt or decrypt data: */
-+
-+	if (bch2_csum_type_is_encryption(op->crc.csum_type) !=
-+	    bch2_csum_type_is_encryption(new_csum_type))
-+		new_csum_type = op->crc.csum_type;
-+
-+	ret = bch2_rechecksum_bio(c, bio, op->version, op->crc,
-+				  NULL, &new_crc,
-+				  op->crc.offset, op->crc.live_size,
-+				  new_csum_type);
-+	if (ret)
-+		return ret;
-+
-+	bio_advance(bio, op->crc.offset << 9);
-+	bio->bi_iter.bi_size = op->crc.live_size << 9;
-+	op->crc = new_crc;
-+	return 0;
-+}
-+
-+static int bch2_write_decrypt(struct bch_write_op *op)
-+{
-+	struct bch_fs *c = op->c;
-+	struct nonce nonce = extent_nonce(op->version, op->crc);
-+	struct bch_csum csum;
-+
-+	if (!bch2_csum_type_is_encryption(op->crc.csum_type))
-+		return 0;
-+
-+	/*
-+	 * If we need to decrypt data in the write path, we'll no longer be able
-+	 * to verify the existing checksum (poly1305 mac, in this case) after
-+	 * it's decrypted - this is the last point we'll be able to reverify the
-+	 * checksum:
-+	 */
-+	csum = bch2_checksum_bio(c, op->crc.csum_type, nonce, &op->wbio.bio);
-+	if (bch2_crc_cmp(op->crc.csum, csum))
-+		return -EIO;
-+
-+	bch2_encrypt_bio(c, op->crc.csum_type, nonce, &op->wbio.bio);
-+	op->crc.csum_type = 0;
-+	op->crc.csum = (struct bch_csum) { 0, 0 };
-+	return 0;
-+}
-+
-+static enum prep_encoded_ret {
-+	PREP_ENCODED_OK,
-+	PREP_ENCODED_ERR,
-+	PREP_ENCODED_CHECKSUM_ERR,
-+	PREP_ENCODED_DO_WRITE,
-+} bch2_write_prep_encoded_data(struct bch_write_op *op, struct write_point *wp)
-+{
-+	struct bch_fs *c = op->c;
-+	struct bio *bio = &op->wbio.bio;
-+
-+	if (!(op->flags & BCH_WRITE_DATA_ENCODED))
-+		return PREP_ENCODED_OK;
-+
-+	BUG_ON(bio_sectors(bio) != op->crc.compressed_size);
-+
-+	/* Can we just write the entire extent as is? */
-+	if (op->crc.uncompressed_size == op->crc.live_size &&
-+	    op->crc.compressed_size <= wp->sectors_free &&
-+	    (op->crc.compression_type == op->compression_type ||
-+	     op->incompressible)) {
-+		if (!crc_is_compressed(op->crc) &&
-+		    op->csum_type != op->crc.csum_type &&
-+		    bch2_write_rechecksum(c, op, op->csum_type))
-+			return PREP_ENCODED_CHECKSUM_ERR;
-+
-+		return PREP_ENCODED_DO_WRITE;
-+	}
-+
-+	/*
-+	 * If the data is compressed and we couldn't write the entire extent as
-+	 * is, we have to decompress it:
-+	 */
-+	if (crc_is_compressed(op->crc)) {
-+		struct bch_csum csum;
-+
-+		if (bch2_write_decrypt(op))
-+			return PREP_ENCODED_CHECKSUM_ERR;
-+
-+		/* Last point we can still verify checksum: */
-+		csum = bch2_checksum_bio(c, op->crc.csum_type,
-+					 extent_nonce(op->version, op->crc),
-+					 bio);
-+		if (bch2_crc_cmp(op->crc.csum, csum))
-+			return PREP_ENCODED_CHECKSUM_ERR;
-+
-+		if (bch2_bio_uncompress_inplace(c, bio, &op->crc))
-+			return PREP_ENCODED_ERR;
-+	}
-+
-+	/*
-+	 * No longer have compressed data after this point - data might be
-+	 * encrypted:
-+	 */
-+
-+	/*
-+	 * If the data is checksummed and we're only writing a subset,
-+	 * rechecksum and adjust bio to point to currently live data:
-+	 */
-+	if ((op->crc.live_size != op->crc.uncompressed_size ||
-+	     op->crc.csum_type != op->csum_type) &&
-+	    bch2_write_rechecksum(c, op, op->csum_type))
-+		return PREP_ENCODED_CHECKSUM_ERR;
-+
-+	/*
-+	 * If we want to compress the data, it has to be decrypted:
-+	 */
-+	if ((op->compression_type ||
-+	     bch2_csum_type_is_encryption(op->crc.csum_type) !=
-+	     bch2_csum_type_is_encryption(op->csum_type)) &&
-+	    bch2_write_decrypt(op))
-+		return PREP_ENCODED_CHECKSUM_ERR;
-+
-+	return PREP_ENCODED_OK;
-+}
-+
-+static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
-+			     struct bio **_dst)
-+{
-+	struct bch_fs *c = op->c;
-+	struct bio *src = &op->wbio.bio, *dst = src;
-+	struct bvec_iter saved_iter;
-+	void *ec_buf;
-+	struct bpos ec_pos = op->pos;
-+	unsigned total_output = 0, total_input = 0;
-+	bool bounce = false;
-+	bool page_alloc_failed = false;
-+	int ret, more = 0;
-+
-+	BUG_ON(!bio_sectors(src));
-+
-+	ec_buf = bch2_writepoint_ec_buf(c, wp);
-+
-+	switch (bch2_write_prep_encoded_data(op, wp)) {
-+	case PREP_ENCODED_OK:
-+		break;
-+	case PREP_ENCODED_ERR:
-+		ret = -EIO;
-+		goto err;
-+	case PREP_ENCODED_CHECKSUM_ERR:
-+		BUG();
-+		goto csum_err;
-+	case PREP_ENCODED_DO_WRITE:
-+		/* XXX look for bug here */
-+		if (ec_buf) {
-+			dst = bch2_write_bio_alloc(c, wp, src,
-+						   &page_alloc_failed,
-+						   ec_buf);
-+			bio_copy_data(dst, src);
-+			bounce = true;
-+		}
-+		init_append_extent(op, wp, op->version, op->crc);
-+		goto do_write;
-+	}
-+
-+	if (ec_buf ||
-+	    op->compression_type ||
-+	    (op->csum_type &&
-+	     !(op->flags & BCH_WRITE_PAGES_STABLE)) ||
-+	    (bch2_csum_type_is_encryption(op->csum_type) &&
-+	     !(op->flags & BCH_WRITE_PAGES_OWNED))) {
-+		dst = bch2_write_bio_alloc(c, wp, src,
-+					   &page_alloc_failed,
-+					   ec_buf);
-+		bounce = true;
-+	}
-+
-+	saved_iter = dst->bi_iter;
-+
-+	do {
-+		struct bch_extent_crc_unpacked crc =
-+			(struct bch_extent_crc_unpacked) { 0 };
-+		struct bversion version = op->version;
-+		size_t dst_len, src_len;
-+
-+		if (page_alloc_failed &&
-+		    bio_sectors(dst) < wp->sectors_free &&
-+		    bio_sectors(dst) < c->sb.encoded_extent_max)
-+			break;
-+
-+		BUG_ON(op->compression_type &&
-+		       (op->flags & BCH_WRITE_DATA_ENCODED) &&
-+		       bch2_csum_type_is_encryption(op->crc.csum_type));
-+		BUG_ON(op->compression_type && !bounce);
-+
-+		crc.compression_type = op->incompressible
-+			? BCH_COMPRESSION_TYPE_incompressible
-+			: op->compression_type
-+			? bch2_bio_compress(c, dst, &dst_len, src, &src_len,
-+					    op->compression_type)
-+			: 0;
-+		if (!crc_is_compressed(crc)) {
-+			dst_len = min(dst->bi_iter.bi_size, src->bi_iter.bi_size);
-+			dst_len = min_t(unsigned, dst_len, wp->sectors_free << 9);
-+
-+			if (op->csum_type)
-+				dst_len = min_t(unsigned, dst_len,
-+						c->sb.encoded_extent_max << 9);
-+
-+			if (bounce) {
-+				swap(dst->bi_iter.bi_size, dst_len);
-+				bio_copy_data(dst, src);
-+				swap(dst->bi_iter.bi_size, dst_len);
-+			}
-+
-+			src_len = dst_len;
-+		}
-+
-+		BUG_ON(!src_len || !dst_len);
-+
-+		if (bch2_csum_type_is_encryption(op->csum_type)) {
-+			if (bversion_zero(version)) {
-+				version.lo = atomic64_inc_return(&c->key_version);
-+			} else {
-+				crc.nonce = op->nonce;
-+				op->nonce += src_len >> 9;
-+			}
-+		}
-+
-+		if ((op->flags & BCH_WRITE_DATA_ENCODED) &&
-+		    !crc_is_compressed(crc) &&
-+		    bch2_csum_type_is_encryption(op->crc.csum_type) ==
-+		    bch2_csum_type_is_encryption(op->csum_type)) {
-+			/*
-+			 * Note: when we're using rechecksum(), we need to be
-+			 * checksumming @src because it has all the data our
-+			 * existing checksum covers - if we bounced (because we
-+			 * were trying to compress), @dst will only have the
-+			 * part of the data the new checksum will cover.
-+			 *
-+			 * But normally we want to be checksumming post bounce,
-+			 * because part of the reason for bouncing is so the
-+			 * data can't be modified (by userspace) while it's in
-+			 * flight.
-+			 */
-+			if (bch2_rechecksum_bio(c, src, version, op->crc,
-+					&crc, &op->crc,
-+					src_len >> 9,
-+					bio_sectors(src) - (src_len >> 9),
-+					op->csum_type))
-+				goto csum_err;
-+		} else {
-+			if ((op->flags & BCH_WRITE_DATA_ENCODED) &&
-+			    bch2_rechecksum_bio(c, src, version, op->crc,
-+					NULL, &op->crc,
-+					src_len >> 9,
-+					bio_sectors(src) - (src_len >> 9),
-+					op->crc.csum_type))
-+				goto csum_err;
-+
-+			crc.compressed_size	= dst_len >> 9;
-+			crc.uncompressed_size	= src_len >> 9;
-+			crc.live_size		= src_len >> 9;
-+
-+			swap(dst->bi_iter.bi_size, dst_len);
-+			bch2_encrypt_bio(c, op->csum_type,
-+					 extent_nonce(version, crc), dst);
-+			crc.csum = bch2_checksum_bio(c, op->csum_type,
-+					 extent_nonce(version, crc), dst);
-+			crc.csum_type = op->csum_type;
-+			swap(dst->bi_iter.bi_size, dst_len);
-+		}
-+
-+		init_append_extent(op, wp, version, crc);
-+
-+		if (dst != src)
-+			bio_advance(dst, dst_len);
-+		bio_advance(src, src_len);
-+		total_output	+= dst_len;
-+		total_input	+= src_len;
-+	} while (dst->bi_iter.bi_size &&
-+		 src->bi_iter.bi_size &&
-+		 wp->sectors_free &&
-+		 !bch2_keylist_realloc(&op->insert_keys,
-+				      op->inline_keys,
-+				      ARRAY_SIZE(op->inline_keys),
-+				      BKEY_EXTENT_U64s_MAX));
-+
-+	more = src->bi_iter.bi_size != 0;
-+
-+	dst->bi_iter = saved_iter;
-+
-+	if (dst == src && more) {
-+		BUG_ON(total_output != total_input);
-+
-+		dst = bio_split(src, total_input >> 9,
-+				GFP_NOIO, &c->bio_write);
-+		wbio_init(dst)->put_bio	= true;
-+		/* copy WRITE_SYNC flag */
-+		dst->bi_opf		= src->bi_opf;
-+	}
-+
-+	dst->bi_iter.bi_size = total_output;
-+do_write:
-+	/* might have done a realloc... */
-+	bch2_ec_add_backpointer(c, wp, ec_pos, total_input >> 9);
-+
-+	*_dst = dst;
-+	return more;
-+csum_err:
-+	bch_err(c, "error verifying existing checksum while "
-+		"rewriting existing data (memory corruption?)");
-+	ret = -EIO;
-+err:
-+	if (to_wbio(dst)->bounce)
-+		bch2_bio_free_pages_pool(c, dst);
-+	if (to_wbio(dst)->put_bio)
-+		bio_put(dst);
-+
-+	return ret;
-+}
-+
-+static void __bch2_write(struct closure *cl)
-+{
-+	struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
-+	struct bch_fs *c = op->c;
-+	struct write_point *wp;
-+	struct bio *bio;
-+	bool skip_put = true;
-+	int ret;
-+again:
-+	memset(&op->failed, 0, sizeof(op->failed));
-+
-+	do {
-+		struct bkey_i *key_to_write;
-+		unsigned key_to_write_offset = op->insert_keys.top_p -
-+			op->insert_keys.keys_p;
-+
-+		/* +1 for possible cache device: */
-+		if (op->open_buckets.nr + op->nr_replicas + 1 >
-+		    ARRAY_SIZE(op->open_buckets.v))
-+			goto flush_io;
-+
-+		if (bch2_keylist_realloc(&op->insert_keys,
-+					op->inline_keys,
-+					ARRAY_SIZE(op->inline_keys),
-+					BKEY_EXTENT_U64s_MAX))
-+			goto flush_io;
-+
-+		if ((op->flags & BCH_WRITE_FROM_INTERNAL) &&
-+		    percpu_ref_is_dying(&c->writes)) {
-+			ret = -EROFS;
-+			goto err;
-+		}
-+
-+		wp = bch2_alloc_sectors_start(c,
-+			op->target,
-+			op->opts.erasure_code,
-+			op->write_point,
-+			&op->devs_have,
-+			op->nr_replicas,
-+			op->nr_replicas_required,
-+			op->alloc_reserve,
-+			op->flags,
-+			(op->flags & BCH_WRITE_ALLOC_NOWAIT) ? NULL : cl);
-+		EBUG_ON(!wp);
-+
-+		if (unlikely(IS_ERR(wp))) {
-+			if (unlikely(PTR_ERR(wp) != -EAGAIN)) {
-+				ret = PTR_ERR(wp);
-+				goto err;
-+			}
-+
-+			goto flush_io;
-+		}
-+
-+		bch2_open_bucket_get(c, wp, &op->open_buckets);
-+		ret = bch2_write_extent(op, wp, &bio);
-+		bch2_alloc_sectors_done(c, wp);
-+
-+		if (ret < 0)
-+			goto err;
-+
-+		if (ret) {
-+			skip_put = false;
-+		} else {
-+			/*
-+			 * for the skip_put optimization this has to be set
-+			 * before we submit the bio:
-+			 */
-+			op->flags |= BCH_WRITE_DONE;
-+		}
-+
-+		bio->bi_end_io	= bch2_write_endio;
-+		bio->bi_private	= &op->cl;
-+		bio->bi_opf |= REQ_OP_WRITE;
-+
-+		if (!skip_put)
-+			closure_get(bio->bi_private);
-+		else
-+			op->flags |= BCH_WRITE_SKIP_CLOSURE_PUT;
-+
-+		key_to_write = (void *) (op->insert_keys.keys_p +
-+					 key_to_write_offset);
-+
-+		bch2_submit_wbio_replicas(to_wbio(bio), c, BCH_DATA_USER,
-+					  key_to_write);
-+	} while (ret);
-+
-+	if (!skip_put)
-+		continue_at(cl, bch2_write_index, index_update_wq(op));
-+	return;
-+err:
-+	op->error = ret;
-+	op->flags |= BCH_WRITE_DONE;
-+
-+	continue_at(cl, bch2_write_index, index_update_wq(op));
-+	return;
-+flush_io:
-+	/*
-+	 * If the write can't all be submitted at once, we generally want to
-+	 * block synchronously as that signals backpressure to the caller.
-+	 *
-+	 * However, if we're running out of a workqueue, we can't block here
-+	 * because we'll be blocking other work items from completing:
-+	 */
-+	if (current->flags & PF_WQ_WORKER) {
-+		continue_at(cl, bch2_write_index, index_update_wq(op));
-+		return;
-+	}
-+
-+	closure_sync(cl);
-+
-+	if (!bch2_keylist_empty(&op->insert_keys)) {
-+		__bch2_write_index(op);
-+
-+		if (op->error) {
-+			op->flags |= BCH_WRITE_DONE;
-+			continue_at_nobarrier(cl, bch2_write_done, NULL);
-+			return;
-+		}
-+	}
-+
-+	goto again;
-+}
-+
-+static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len)
-+{
-+	struct closure *cl = &op->cl;
-+	struct bio *bio = &op->wbio.bio;
-+	struct bvec_iter iter;
-+	struct bkey_i_inline_data *id;
-+	unsigned sectors;
-+	int ret;
-+
-+	bch2_check_set_feature(op->c, BCH_FEATURE_inline_data);
-+
-+	ret = bch2_keylist_realloc(&op->insert_keys, op->inline_keys,
-+				   ARRAY_SIZE(op->inline_keys),
-+				   BKEY_U64s + DIV_ROUND_UP(data_len, 8));
-+	if (ret) {
-+		op->error = ret;
-+		goto err;
-+	}
-+
-+	sectors = bio_sectors(bio);
-+	op->pos.offset += sectors;
-+
-+	id = bkey_inline_data_init(op->insert_keys.top);
-+	id->k.p		= op->pos;
-+	id->k.version	= op->version;
-+	id->k.size	= sectors;
-+
-+	iter = bio->bi_iter;
-+	iter.bi_size = data_len;
-+	memcpy_from_bio(id->v.data, bio, iter);
-+
-+	while (data_len & 7)
-+		id->v.data[data_len++] = '\0';
-+	set_bkey_val_bytes(&id->k, data_len);
-+	bch2_keylist_push(&op->insert_keys);
-+
-+	op->flags |= BCH_WRITE_WROTE_DATA_INLINE;
-+	op->flags |= BCH_WRITE_DONE;
-+
-+	continue_at_nobarrier(cl, bch2_write_index, NULL);
-+	return;
-+err:
-+	bch2_write_done(&op->cl);
-+}
-+
-+/**
-+ * bch_write - handle a write to a cache device or flash only volume
-+ *
-+ * This is the starting point for any data to end up in a cache device; it could
-+ * be from a normal write, or a writeback write, or a write to a flash only
-+ * volume - it's also used by the moving garbage collector to compact data in
-+ * mostly empty buckets.
-+ *
-+ * It first writes the data to the cache, creating a list of keys to be inserted
-+ * (if the data won't fit in a single open bucket, there will be multiple keys);
-+ * after the data is written it calls bch_journal, and after the keys have been
-+ * added to the next journal write they're inserted into the btree.
-+ *
-+ * If op->discard is true, instead of inserting the data it invalidates the
-+ * region of the cache represented by op->bio and op->inode.
-+ */
-+void bch2_write(struct closure *cl)
-+{
-+	struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
-+	struct bio *bio = &op->wbio.bio;
-+	struct bch_fs *c = op->c;
-+	unsigned data_len;
-+
-+	BUG_ON(!op->nr_replicas);
-+	BUG_ON(!op->write_point.v);
-+	BUG_ON(!bkey_cmp(op->pos, POS_MAX));
-+
-+	op->start_time = local_clock();
-+	bch2_keylist_init(&op->insert_keys, op->inline_keys);
-+	wbio_init(bio)->put_bio = false;
-+
-+	if (bio_sectors(bio) & (c->opts.block_size - 1)) {
-+		__bcache_io_error(c, "misaligned write");
-+		op->error = -EIO;
-+		goto err;
-+	}
-+
-+	if (c->opts.nochanges ||
-+	    !percpu_ref_tryget(&c->writes)) {
-+		if (!(op->flags & BCH_WRITE_FROM_INTERNAL))
-+			__bcache_io_error(c, "read only");
-+		op->error = -EROFS;
-+		goto err;
-+	}
-+
-+	/*
-+	 * Can't ratelimit copygc - we'd deadlock:
-+	 */
-+	if (!(op->flags & BCH_WRITE_FROM_INTERNAL))
-+		down(&c->io_in_flight);
-+
-+	bch2_increment_clock(c, bio_sectors(bio), WRITE);
-+
-+	data_len = min_t(u64, bio->bi_iter.bi_size,
-+			 op->new_i_size - (op->pos.offset << 9));
-+
-+	if (c->opts.inline_data &&
-+	    data_len <= min(block_bytes(c) / 2, 1024U)) {
-+		bch2_write_data_inline(op, data_len);
-+		return;
-+	}
-+
-+	continue_at_nobarrier(cl, __bch2_write, NULL);
-+	return;
-+err:
-+	bch2_disk_reservation_put(c, &op->res);
-+
-+	if (op->end_io) {
-+		EBUG_ON(cl->parent);
-+		closure_debug_destroy(cl);
-+		op->end_io(op);
-+	} else {
-+		closure_return(cl);
-+	}
-+}
-+
-+/* Cache promotion on read */
-+
-+struct promote_op {
-+	struct closure		cl;
-+	struct rcu_head		rcu;
-+	u64			start_time;
-+
-+	struct rhash_head	hash;
-+	struct bpos		pos;
-+
-+	struct migrate_write	write;
-+	struct bio_vec		bi_inline_vecs[0]; /* must be last */
-+};
-+
-+static const struct rhashtable_params bch_promote_params = {
-+	.head_offset	= offsetof(struct promote_op, hash),
-+	.key_offset	= offsetof(struct promote_op, pos),
-+	.key_len	= sizeof(struct bpos),
-+};
-+
-+static inline bool should_promote(struct bch_fs *c, struct bkey_s_c k,
-+				  struct bpos pos,
-+				  struct bch_io_opts opts,
-+				  unsigned flags)
-+{
-+	if (!(flags & BCH_READ_MAY_PROMOTE))
-+		return false;
-+
-+	if (!opts.promote_target)
-+		return false;
-+
-+	if (bch2_bkey_has_target(c, k, opts.promote_target))
-+		return false;
-+
-+	if (bch2_target_congested(c, opts.promote_target)) {
-+		/* XXX trace this */
-+		return false;
-+	}
-+
-+	if (rhashtable_lookup_fast(&c->promote_table, &pos,
-+				   bch_promote_params))
-+		return false;
-+
-+	return true;
-+}
-+
-+static void promote_free(struct bch_fs *c, struct promote_op *op)
-+{
-+	int ret;
-+
-+	ret = rhashtable_remove_fast(&c->promote_table, &op->hash,
-+				     bch_promote_params);
-+	BUG_ON(ret);
-+	percpu_ref_put(&c->writes);
-+	kfree_rcu(op, rcu);
-+}
-+
-+static void promote_done(struct closure *cl)
-+{
-+	struct promote_op *op =
-+		container_of(cl, struct promote_op, cl);
-+	struct bch_fs *c = op->write.op.c;
-+
-+	bch2_time_stats_update(&c->times[BCH_TIME_data_promote],
-+			       op->start_time);
-+
-+	bch2_bio_free_pages_pool(c, &op->write.op.wbio.bio);
-+	promote_free(c, op);
-+}
-+
-+static void promote_start(struct promote_op *op, struct bch_read_bio *rbio)
-+{
-+	struct bch_fs *c = rbio->c;
-+	struct closure *cl = &op->cl;
-+	struct bio *bio = &op->write.op.wbio.bio;
-+
-+	trace_promote(&rbio->bio);
-+
-+	/* we now own pages: */
-+	BUG_ON(!rbio->bounce);
-+	BUG_ON(rbio->bio.bi_vcnt > bio->bi_max_vecs);
-+
-+	memcpy(bio->bi_io_vec, rbio->bio.bi_io_vec,
-+	       sizeof(struct bio_vec) * rbio->bio.bi_vcnt);
-+	swap(bio->bi_vcnt, rbio->bio.bi_vcnt);
-+
-+	bch2_migrate_read_done(&op->write, rbio);
-+
-+	closure_init(cl, NULL);
-+	closure_call(&op->write.op.cl, bch2_write, c->wq, cl);
-+	closure_return_with_destructor(cl, promote_done);
-+}
-+
-+static struct promote_op *__promote_alloc(struct bch_fs *c,
-+					  enum btree_id btree_id,
-+					  struct bkey_s_c k,
-+					  struct bpos pos,
-+					  struct extent_ptr_decoded *pick,
-+					  struct bch_io_opts opts,
-+					  unsigned sectors,
-+					  struct bch_read_bio **rbio)
-+{
-+	struct promote_op *op = NULL;
-+	struct bio *bio;
-+	unsigned pages = DIV_ROUND_UP(sectors, PAGE_SECTORS);
-+	int ret;
-+
-+	if (!percpu_ref_tryget(&c->writes))
-+		return NULL;
-+
-+	op = kzalloc(sizeof(*op) + sizeof(struct bio_vec) * pages, GFP_NOIO);
-+	if (!op)
-+		goto err;
-+
-+	op->start_time = local_clock();
-+	op->pos = pos;
-+
-+	/*
-+	 * We don't use the mempool here because extents that aren't
-+	 * checksummed or compressed can be too big for the mempool:
-+	 */
-+	*rbio = kzalloc(sizeof(struct bch_read_bio) +
-+			sizeof(struct bio_vec) * pages,
-+			GFP_NOIO);
-+	if (!*rbio)
-+		goto err;
-+
-+	rbio_init(&(*rbio)->bio, opts);
-+	bio_init(&(*rbio)->bio, (*rbio)->bio.bi_inline_vecs, pages);
-+
-+	if (bch2_bio_alloc_pages(&(*rbio)->bio, sectors << 9,
-+				 GFP_NOIO))
-+		goto err;
-+
-+	(*rbio)->bounce		= true;
-+	(*rbio)->split		= true;
-+	(*rbio)->kmalloc	= true;
-+
-+	if (rhashtable_lookup_insert_fast(&c->promote_table, &op->hash,
-+					  bch_promote_params))
-+		goto err;
-+
-+	bio = &op->write.op.wbio.bio;
-+	bio_init(bio, bio->bi_inline_vecs, pages);
-+
-+	ret = bch2_migrate_write_init(c, &op->write,
-+			writepoint_hashed((unsigned long) current),
-+			opts,
-+			DATA_PROMOTE,
-+			(struct data_opts) {
-+				.target = opts.promote_target
-+			},
-+			btree_id, k);
-+	BUG_ON(ret);
-+
-+	return op;
-+err:
-+	if (*rbio)
-+		bio_free_pages(&(*rbio)->bio);
-+	kfree(*rbio);
-+	*rbio = NULL;
-+	kfree(op);
-+	percpu_ref_put(&c->writes);
-+	return NULL;
-+}
-+
-+noinline
-+static struct promote_op *promote_alloc(struct bch_fs *c,
-+					       struct bvec_iter iter,
-+					       struct bkey_s_c k,
-+					       struct extent_ptr_decoded *pick,
-+					       struct bch_io_opts opts,
-+					       unsigned flags,
-+					       struct bch_read_bio **rbio,
-+					       bool *bounce,
-+					       bool *read_full)
-+{
-+	bool promote_full = *read_full || READ_ONCE(c->promote_whole_extents);
-+	/* data might have to be decompressed in the write path: */
-+	unsigned sectors = promote_full
-+		? max(pick->crc.compressed_size, pick->crc.live_size)
-+		: bvec_iter_sectors(iter);
-+	struct bpos pos = promote_full
-+		? bkey_start_pos(k.k)
-+		: POS(k.k->p.inode, iter.bi_sector);
-+	struct promote_op *promote;
-+
-+	if (!should_promote(c, k, pos, opts, flags))
-+		return NULL;
-+
-+	promote = __promote_alloc(c,
-+				  k.k->type == KEY_TYPE_reflink_v
-+				  ? BTREE_ID_REFLINK
-+				  : BTREE_ID_EXTENTS,
-+				  k, pos, pick, opts, sectors, rbio);
-+	if (!promote)
-+		return NULL;
-+
-+	*bounce		= true;
-+	*read_full	= promote_full;
-+	return promote;
-+}
-+
-+/* Read */
-+
-+#define READ_RETRY_AVOID	1
-+#define READ_RETRY		2
-+#define READ_ERR		3
-+
-+enum rbio_context {
-+	RBIO_CONTEXT_NULL,
-+	RBIO_CONTEXT_HIGHPRI,
-+	RBIO_CONTEXT_UNBOUND,
-+};
-+
-+static inline struct bch_read_bio *
-+bch2_rbio_parent(struct bch_read_bio *rbio)
-+{
-+	return rbio->split ? rbio->parent : rbio;
-+}
-+
-+__always_inline
-+static void bch2_rbio_punt(struct bch_read_bio *rbio, work_func_t fn,
-+			   enum rbio_context context,
-+			   struct workqueue_struct *wq)
-+{
-+	if (context <= rbio->context) {
-+		fn(&rbio->work);
-+	} else {
-+		rbio->work.func		= fn;
-+		rbio->context		= context;
-+		queue_work(wq, &rbio->work);
-+	}
-+}
-+
-+static inline struct bch_read_bio *bch2_rbio_free(struct bch_read_bio *rbio)
-+{
-+	BUG_ON(rbio->bounce && !rbio->split);
-+
-+	if (rbio->promote)
-+		promote_free(rbio->c, rbio->promote);
-+	rbio->promote = NULL;
-+
-+	if (rbio->bounce)
-+		bch2_bio_free_pages_pool(rbio->c, &rbio->bio);
-+
-+	if (rbio->split) {
-+		struct bch_read_bio *parent = rbio->parent;
-+
-+		if (rbio->kmalloc)
-+			kfree(rbio);
-+		else
-+			bio_put(&rbio->bio);
-+
-+		rbio = parent;
-+	}
-+
-+	return rbio;
-+}
-+
-+/*
-+ * Only called on a top level bch_read_bio to complete an entire read request,
-+ * not a split:
-+ */
-+static void bch2_rbio_done(struct bch_read_bio *rbio)
-+{
-+	if (rbio->start_time)
-+		bch2_time_stats_update(&rbio->c->times[BCH_TIME_data_read],
-+				       rbio->start_time);
-+	bio_endio(&rbio->bio);
-+}
-+
-+static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio,
-+				     struct bvec_iter bvec_iter, u64 inode,
-+				     struct bch_io_failures *failed,
-+				     unsigned flags)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_on_stack sk;
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	flags &= ~BCH_READ_LAST_FRAGMENT;
-+	flags |= BCH_READ_MUST_CLONE;
-+
-+	bkey_on_stack_init(&sk);
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
-+				   rbio->pos, BTREE_ITER_SLOTS);
-+retry:
-+	rbio->bio.bi_status = 0;
-+
-+	k = bch2_btree_iter_peek_slot(iter);
-+	if (bkey_err(k))
-+		goto err;
-+
-+	bkey_on_stack_reassemble(&sk, c, k);
-+	k = bkey_i_to_s_c(sk.k);
-+	bch2_trans_unlock(&trans);
-+
-+	if (!bch2_bkey_matches_ptr(c, k,
-+				   rbio->pick.ptr,
-+				   rbio->pos.offset -
-+				   rbio->pick.crc.offset)) {
-+		/* extent we wanted to read no longer exists: */
-+		rbio->hole = true;
-+		goto out;
-+	}
-+
-+	ret = __bch2_read_extent(c, rbio, bvec_iter, k, 0, failed, flags);
-+	if (ret == READ_RETRY)
-+		goto retry;
-+	if (ret)
-+		goto err;
-+out:
-+	bch2_rbio_done(rbio);
-+	bch2_trans_exit(&trans);
-+	bkey_on_stack_exit(&sk, c);
-+	return;
-+err:
-+	rbio->bio.bi_status = BLK_STS_IOERR;
-+	goto out;
-+}
-+
-+static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio,
-+			    struct bvec_iter bvec_iter, u64 inode,
-+			    struct bch_io_failures *failed, unsigned flags)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_on_stack sk;
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	flags &= ~BCH_READ_LAST_FRAGMENT;
-+	flags |= BCH_READ_MUST_CLONE;
-+
-+	bkey_on_stack_init(&sk);
-+	bch2_trans_init(&trans, c, 0, 0);
-+retry:
-+	bch2_trans_begin(&trans);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
-+			   POS(inode, bvec_iter.bi_sector),
-+			   BTREE_ITER_SLOTS, k, ret) {
-+		unsigned bytes, sectors, offset_into_extent;
-+
-+		bkey_on_stack_reassemble(&sk, c, k);
-+		k = bkey_i_to_s_c(sk.k);
-+
-+		offset_into_extent = iter->pos.offset -
-+			bkey_start_offset(k.k);
-+		sectors = k.k->size - offset_into_extent;
-+
-+		ret = bch2_read_indirect_extent(&trans,
-+					&offset_into_extent, &sk);
-+		if (ret)
-+			break;
-+
-+		sectors = min(sectors, k.k->size - offset_into_extent);
-+
-+		bch2_trans_unlock(&trans);
-+
-+		bytes = min(sectors, bvec_iter_sectors(bvec_iter)) << 9;
-+		swap(bvec_iter.bi_size, bytes);
-+
-+		ret = __bch2_read_extent(c, rbio, bvec_iter, k,
-+				offset_into_extent, failed, flags);
-+		switch (ret) {
-+		case READ_RETRY:
-+			goto retry;
-+		case READ_ERR:
-+			goto err;
-+		};
-+
-+		if (bytes == bvec_iter.bi_size)
-+			goto out;
-+
-+		swap(bvec_iter.bi_size, bytes);
-+		bio_advance_iter(&rbio->bio, &bvec_iter, bytes);
-+	}
-+
-+	if (ret == -EINTR)
-+		goto retry;
-+	/*
-+	 * If we get here, it better have been because there was an error
-+	 * reading a btree node
-+	 */
-+	BUG_ON(!ret);
-+	__bcache_io_error(c, "btree IO error: %i", ret);
-+err:
-+	rbio->bio.bi_status = BLK_STS_IOERR;
-+out:
-+	bch2_trans_exit(&trans);
-+	bkey_on_stack_exit(&sk, c);
-+	bch2_rbio_done(rbio);
-+}
-+
-+static void bch2_rbio_retry(struct work_struct *work)
-+{
-+	struct bch_read_bio *rbio =
-+		container_of(work, struct bch_read_bio, work);
-+	struct bch_fs *c	= rbio->c;
-+	struct bvec_iter iter	= rbio->bvec_iter;
-+	unsigned flags		= rbio->flags;
-+	u64 inode		= rbio->pos.inode;
-+	struct bch_io_failures failed = { .nr = 0 };
-+
-+	trace_read_retry(&rbio->bio);
-+
-+	if (rbio->retry == READ_RETRY_AVOID)
-+		bch2_mark_io_failure(&failed, &rbio->pick);
-+
-+	rbio->bio.bi_status = 0;
-+
-+	rbio = bch2_rbio_free(rbio);
-+
-+	flags |= BCH_READ_IN_RETRY;
-+	flags &= ~BCH_READ_MAY_PROMOTE;
-+
-+	if (flags & BCH_READ_NODECODE)
-+		bch2_read_retry_nodecode(c, rbio, iter, inode, &failed, flags);
-+	else
-+		bch2_read_retry(c, rbio, iter, inode, &failed, flags);
-+}
-+
-+static void bch2_rbio_error(struct bch_read_bio *rbio, int retry,
-+			    blk_status_t error)
-+{
-+	rbio->retry = retry;
-+
-+	if (rbio->flags & BCH_READ_IN_RETRY)
-+		return;
-+
-+	if (retry == READ_ERR) {
-+		rbio = bch2_rbio_free(rbio);
-+
-+		rbio->bio.bi_status = error;
-+		bch2_rbio_done(rbio);
-+	} else {
-+		bch2_rbio_punt(rbio, bch2_rbio_retry,
-+			       RBIO_CONTEXT_UNBOUND, system_unbound_wq);
-+	}
-+}
-+
-+static int __bch2_rbio_narrow_crcs(struct btree_trans *trans,
-+				   struct bch_read_bio *rbio)
-+{
-+	struct bch_fs *c = rbio->c;
-+	u64 data_offset = rbio->pos.offset - rbio->pick.crc.offset;
-+	struct bch_extent_crc_unpacked new_crc;
-+	struct btree_iter *iter = NULL;
-+	struct bkey_i *new;
-+	struct bkey_s_c k;
-+	int ret = 0;
-+
-+	if (crc_is_compressed(rbio->pick.crc))
-+		return 0;
-+
-+	iter = bch2_trans_get_iter(trans, BTREE_ID_EXTENTS, rbio->pos,
-+				   BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
-+	if ((ret = PTR_ERR_OR_ZERO(iter)))
-+		goto out;
-+
-+	k = bch2_btree_iter_peek_slot(iter);
-+	if ((ret = bkey_err(k)))
-+		goto out;
-+
-+	/*
-+	 * going to be temporarily appending another checksum entry:
-+	 */
-+	new = bch2_trans_kmalloc(trans, bkey_bytes(k.k) +
-+				 BKEY_EXTENT_U64s_MAX * 8);
-+	if ((ret = PTR_ERR_OR_ZERO(new)))
-+		goto out;
-+
-+	bkey_reassemble(new, k);
-+	k = bkey_i_to_s_c(new);
-+
-+	if (bversion_cmp(k.k->version, rbio->version) ||
-+	    !bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, data_offset))
-+		goto out;
-+
-+	/* Extent was merged? */
-+	if (bkey_start_offset(k.k) < data_offset ||
-+	    k.k->p.offset > data_offset + rbio->pick.crc.uncompressed_size)
-+		goto out;
-+
-+	if (bch2_rechecksum_bio(c, &rbio->bio, rbio->version,
-+			rbio->pick.crc, NULL, &new_crc,
-+			bkey_start_offset(k.k) - data_offset, k.k->size,
-+			rbio->pick.crc.csum_type)) {
-+		bch_err(c, "error verifying existing checksum while narrowing checksum (memory corruption?)");
-+		ret = 0;
-+		goto out;
-+	}
-+
-+	if (!bch2_bkey_narrow_crcs(new, new_crc))
-+		goto out;
-+
-+	bch2_trans_update(trans, iter, new, 0);
-+out:
-+	bch2_trans_iter_put(trans, iter);
-+	return ret;
-+}
-+
-+static noinline void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio)
-+{
-+	bch2_trans_do(rbio->c, NULL, NULL, BTREE_INSERT_NOFAIL,
-+		      __bch2_rbio_narrow_crcs(&trans, rbio));
-+}
-+
-+/* Inner part that may run in process context */
-+static void __bch2_read_endio(struct work_struct *work)
-+{
-+	struct bch_read_bio *rbio =
-+		container_of(work, struct bch_read_bio, work);
-+	struct bch_fs *c	= rbio->c;
-+	struct bch_dev *ca	= bch_dev_bkey_exists(c, rbio->pick.ptr.dev);
-+	struct bio *src		= &rbio->bio;
-+	struct bio *dst		= &bch2_rbio_parent(rbio)->bio;
-+	struct bvec_iter dst_iter = rbio->bvec_iter;
-+	struct bch_extent_crc_unpacked crc = rbio->pick.crc;
-+	struct nonce nonce = extent_nonce(rbio->version, crc);
-+	struct bch_csum csum;
-+
-+	/* Reset iterator for checksumming and copying bounced data: */
-+	if (rbio->bounce) {
-+		src->bi_iter.bi_size		= crc.compressed_size << 9;
-+		src->bi_iter.bi_idx		= 0;
-+		src->bi_iter.bi_bvec_done	= 0;
-+	} else {
-+		src->bi_iter			= rbio->bvec_iter;
-+	}
-+
-+	csum = bch2_checksum_bio(c, crc.csum_type, nonce, src);
-+	if (bch2_crc_cmp(csum, rbio->pick.crc.csum))
-+		goto csum_err;
-+
-+	if (unlikely(rbio->narrow_crcs))
-+		bch2_rbio_narrow_crcs(rbio);
-+
-+	if (rbio->flags & BCH_READ_NODECODE)
-+		goto nodecode;
-+
-+	/* Adjust crc to point to subset of data we want: */
-+	crc.offset     += rbio->offset_into_extent;
-+	crc.live_size	= bvec_iter_sectors(rbio->bvec_iter);
-+
-+	if (crc_is_compressed(crc)) {
-+		bch2_encrypt_bio(c, crc.csum_type, nonce, src);
-+		if (bch2_bio_uncompress(c, src, dst, dst_iter, crc))
-+			goto decompression_err;
-+	} else {
-+		/* don't need to decrypt the entire bio: */
-+		nonce = nonce_add(nonce, crc.offset << 9);
-+		bio_advance(src, crc.offset << 9);
-+
-+		BUG_ON(src->bi_iter.bi_size < dst_iter.bi_size);
-+		src->bi_iter.bi_size = dst_iter.bi_size;
-+
-+		bch2_encrypt_bio(c, crc.csum_type, nonce, src);
-+
-+		if (rbio->bounce) {
-+			struct bvec_iter src_iter = src->bi_iter;
-+			bio_copy_data_iter(dst, &dst_iter, src, &src_iter);
-+		}
-+	}
-+
-+	if (rbio->promote) {
-+		/*
-+		 * Re encrypt data we decrypted, so it's consistent with
-+		 * rbio->crc:
-+		 */
-+		bch2_encrypt_bio(c, crc.csum_type, nonce, src);
-+		promote_start(rbio->promote, rbio);
-+		rbio->promote = NULL;
-+	}
-+nodecode:
-+	if (likely(!(rbio->flags & BCH_READ_IN_RETRY))) {
-+		rbio = bch2_rbio_free(rbio);
-+		bch2_rbio_done(rbio);
-+	}
-+	return;
-+csum_err:
-+	/*
-+	 * Checksum error: if the bio wasn't bounced, we may have been
-+	 * reading into buffers owned by userspace (that userspace can
-+	 * scribble over) - retry the read, bouncing it this time:
-+	 */
-+	if (!rbio->bounce && (rbio->flags & BCH_READ_USER_MAPPED)) {
-+		rbio->flags |= BCH_READ_MUST_BOUNCE;
-+		bch2_rbio_error(rbio, READ_RETRY, BLK_STS_IOERR);
-+		return;
-+	}
-+
-+	bch2_dev_io_error(ca,
-+		"data checksum error, inode %llu offset %llu: expected %0llx:%0llx got %0llx:%0llx (type %u)",
-+		rbio->pos.inode, (u64) rbio->bvec_iter.bi_sector,
-+		rbio->pick.crc.csum.hi, rbio->pick.crc.csum.lo,
-+		csum.hi, csum.lo, crc.csum_type);
-+	bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
-+	return;
-+decompression_err:
-+	__bcache_io_error(c, "decompression error, inode %llu offset %llu",
-+			  rbio->pos.inode,
-+			  (u64) rbio->bvec_iter.bi_sector);
-+	bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR);
-+	return;
-+}
-+
-+static void bch2_read_endio(struct bio *bio)
-+{
-+	struct bch_read_bio *rbio =
-+		container_of(bio, struct bch_read_bio, bio);
-+	struct bch_fs *c	= rbio->c;
-+	struct bch_dev *ca	= bch_dev_bkey_exists(c, rbio->pick.ptr.dev);
-+	struct workqueue_struct *wq = NULL;
-+	enum rbio_context context = RBIO_CONTEXT_NULL;
-+
-+	if (rbio->have_ioref) {
-+		bch2_latency_acct(ca, rbio->submit_time, READ);
-+		percpu_ref_put(&ca->io_ref);
-+	}
-+
-+	if (!rbio->split)
-+		rbio->bio.bi_end_io = rbio->end_io;
-+
-+	if (bch2_dev_io_err_on(bio->bi_status, ca, "data read; %s",
-+			       blk_status_to_str(bio->bi_status))) {
-+		bch2_rbio_error(rbio, READ_RETRY_AVOID, bio->bi_status);
-+		return;
-+	}
-+
-+	if (rbio->pick.ptr.cached &&
-+	    (((rbio->flags & BCH_READ_RETRY_IF_STALE) && race_fault()) ||
-+	     ptr_stale(ca, &rbio->pick.ptr))) {
-+		atomic_long_inc(&c->read_realloc_races);
-+
-+		if (rbio->flags & BCH_READ_RETRY_IF_STALE)
-+			bch2_rbio_error(rbio, READ_RETRY, BLK_STS_AGAIN);
-+		else
-+			bch2_rbio_error(rbio, READ_ERR, BLK_STS_AGAIN);
-+		return;
-+	}
-+
-+	if (rbio->narrow_crcs ||
-+	    crc_is_compressed(rbio->pick.crc) ||
-+	    bch2_csum_type_is_encryption(rbio->pick.crc.csum_type))
-+		context = RBIO_CONTEXT_UNBOUND,	wq = system_unbound_wq;
-+	else if (rbio->pick.crc.csum_type)
-+		context = RBIO_CONTEXT_HIGHPRI,	wq = system_highpri_wq;
-+
-+	bch2_rbio_punt(rbio, __bch2_read_endio, context, wq);
-+}
-+
-+int __bch2_read_indirect_extent(struct btree_trans *trans,
-+				unsigned *offset_into_extent,
-+				struct bkey_on_stack *orig_k)
-+{
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	u64 reflink_offset;
-+	int ret;
-+
-+	reflink_offset = le64_to_cpu(bkey_i_to_reflink_p(orig_k->k)->v.idx) +
-+		*offset_into_extent;
-+
-+	iter = bch2_trans_get_iter(trans, BTREE_ID_REFLINK,
-+				   POS(0, reflink_offset),
-+				   BTREE_ITER_SLOTS);
-+	ret = PTR_ERR_OR_ZERO(iter);
-+	if (ret)
-+		return ret;
-+
-+	k = bch2_btree_iter_peek_slot(iter);
-+	ret = bkey_err(k);
-+	if (ret)
-+		goto err;
-+
-+	if (k.k->type != KEY_TYPE_reflink_v) {
-+		__bcache_io_error(trans->c,
-+				"pointer to nonexistent indirect extent");
-+		ret = -EIO;
-+		goto err;
-+	}
-+
-+	*offset_into_extent = iter->pos.offset - bkey_start_offset(k.k);
-+	bkey_on_stack_reassemble(orig_k, trans->c, k);
-+err:
-+	bch2_trans_iter_put(trans, iter);
-+	return ret;
-+}
-+
-+int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
-+		       struct bvec_iter iter, struct bkey_s_c k,
-+		       unsigned offset_into_extent,
-+		       struct bch_io_failures *failed, unsigned flags)
-+{
-+	struct extent_ptr_decoded pick;
-+	struct bch_read_bio *rbio = NULL;
-+	struct bch_dev *ca;
-+	struct promote_op *promote = NULL;
-+	bool bounce = false, read_full = false, narrow_crcs = false;
-+	struct bpos pos = bkey_start_pos(k.k);
-+	int pick_ret;
-+
-+	if (k.k->type == KEY_TYPE_inline_data) {
-+		struct bkey_s_c_inline_data d = bkey_s_c_to_inline_data(k);
-+		unsigned bytes = min_t(unsigned, iter.bi_size,
-+				       bkey_val_bytes(d.k));
-+
-+		swap(iter.bi_size, bytes);
-+		memcpy_to_bio(&orig->bio, iter, d.v->data);
-+		swap(iter.bi_size, bytes);
-+		bio_advance_iter(&orig->bio, &iter, bytes);
-+		zero_fill_bio_iter(&orig->bio, iter);
-+		goto out_read_done;
-+	}
-+
-+	pick_ret = bch2_bkey_pick_read_device(c, k, failed, &pick);
-+
-+	/* hole or reservation - just zero fill: */
-+	if (!pick_ret)
-+		goto hole;
-+
-+	if (pick_ret < 0) {
-+		__bcache_io_error(c, "no device to read from");
-+		goto err;
-+	}
-+
-+	if (pick_ret > 0)
-+		ca = bch_dev_bkey_exists(c, pick.ptr.dev);
-+
-+	if (flags & BCH_READ_NODECODE) {
-+		/*
-+		 * can happen if we retry, and the extent we were going to read
-+		 * has been merged in the meantime:
-+		 */
-+		if (pick.crc.compressed_size > orig->bio.bi_vcnt * PAGE_SECTORS)
-+			goto hole;
-+
-+		iter.bi_size	= pick.crc.compressed_size << 9;
-+		goto get_bio;
-+	}
-+
-+	if (!(flags & BCH_READ_LAST_FRAGMENT) ||
-+	    bio_flagged(&orig->bio, BIO_CHAIN))
-+		flags |= BCH_READ_MUST_CLONE;
-+
-+	narrow_crcs = !(flags & BCH_READ_IN_RETRY) &&
-+		bch2_can_narrow_extent_crcs(k, pick.crc);
-+
-+	if (narrow_crcs && (flags & BCH_READ_USER_MAPPED))
-+		flags |= BCH_READ_MUST_BOUNCE;
-+
-+	EBUG_ON(offset_into_extent + bvec_iter_sectors(iter) > k.k->size);
-+
-+	if (crc_is_compressed(pick.crc) ||
-+	    (pick.crc.csum_type != BCH_CSUM_NONE &&
-+	     (bvec_iter_sectors(iter) != pick.crc.uncompressed_size ||
-+	      (bch2_csum_type_is_encryption(pick.crc.csum_type) &&
-+	       (flags & BCH_READ_USER_MAPPED)) ||
-+	      (flags & BCH_READ_MUST_BOUNCE)))) {
-+		read_full = true;
-+		bounce = true;
-+	}
-+
-+	if (orig->opts.promote_target)
-+		promote = promote_alloc(c, iter, k, &pick, orig->opts, flags,
-+					&rbio, &bounce, &read_full);
-+
-+	if (!read_full) {
-+		EBUG_ON(crc_is_compressed(pick.crc));
-+		EBUG_ON(pick.crc.csum_type &&
-+			(bvec_iter_sectors(iter) != pick.crc.uncompressed_size ||
-+			 bvec_iter_sectors(iter) != pick.crc.live_size ||
-+			 pick.crc.offset ||
-+			 offset_into_extent));
-+
-+		pos.offset += offset_into_extent;
-+		pick.ptr.offset += pick.crc.offset +
-+			offset_into_extent;
-+		offset_into_extent		= 0;
-+		pick.crc.compressed_size	= bvec_iter_sectors(iter);
-+		pick.crc.uncompressed_size	= bvec_iter_sectors(iter);
-+		pick.crc.offset			= 0;
-+		pick.crc.live_size		= bvec_iter_sectors(iter);
-+		offset_into_extent		= 0;
-+	}
-+get_bio:
-+	if (rbio) {
-+		/*
-+		 * promote already allocated bounce rbio:
-+		 * promote needs to allocate a bio big enough for uncompressing
-+		 * data in the write path, but we're not going to use it all
-+		 * here:
-+		 */
-+		EBUG_ON(rbio->bio.bi_iter.bi_size <
-+		       pick.crc.compressed_size << 9);
-+		rbio->bio.bi_iter.bi_size =
-+			pick.crc.compressed_size << 9;
-+	} else if (bounce) {
-+		unsigned sectors = pick.crc.compressed_size;
-+
-+		rbio = rbio_init(bio_alloc_bioset(GFP_NOIO,
-+						  DIV_ROUND_UP(sectors, PAGE_SECTORS),
-+						  &c->bio_read_split),
-+				 orig->opts);
-+
-+		bch2_bio_alloc_pages_pool(c, &rbio->bio, sectors << 9);
-+		rbio->bounce	= true;
-+		rbio->split	= true;
-+	} else if (flags & BCH_READ_MUST_CLONE) {
-+		/*
-+		 * Have to clone if there were any splits, due to error
-+		 * reporting issues (if a split errored, and retrying didn't
-+		 * work, when it reports the error to its parent (us) we don't
-+		 * know if the error was from our bio, and we should retry, or
-+		 * from the whole bio, in which case we don't want to retry and
-+		 * lose the error)
-+		 */
-+		rbio = rbio_init(bio_clone_fast(&orig->bio, GFP_NOIO,
-+						&c->bio_read_split),
-+				 orig->opts);
-+		rbio->bio.bi_iter = iter;
-+		rbio->split	= true;
-+	} else {
-+		rbio = orig;
-+		rbio->bio.bi_iter = iter;
-+		EBUG_ON(bio_flagged(&rbio->bio, BIO_CHAIN));
-+	}
-+
-+	EBUG_ON(bio_sectors(&rbio->bio) != pick.crc.compressed_size);
-+
-+	rbio->c			= c;
-+	rbio->submit_time	= local_clock();
-+	if (rbio->split)
-+		rbio->parent	= orig;
-+	else
-+		rbio->end_io	= orig->bio.bi_end_io;
-+	rbio->bvec_iter		= iter;
-+	rbio->offset_into_extent= offset_into_extent;
-+	rbio->flags		= flags;
-+	rbio->have_ioref	= pick_ret > 0 && bch2_dev_get_ioref(ca, READ);
-+	rbio->narrow_crcs	= narrow_crcs;
-+	rbio->hole		= 0;
-+	rbio->retry		= 0;
-+	rbio->context		= 0;
-+	/* XXX: only initialize this if needed */
-+	rbio->devs_have		= bch2_bkey_devs(k);
-+	rbio->pick		= pick;
-+	rbio->pos		= pos;
-+	rbio->version		= k.k->version;
-+	rbio->promote		= promote;
-+	INIT_WORK(&rbio->work, NULL);
-+
-+	rbio->bio.bi_opf	= orig->bio.bi_opf;
-+	rbio->bio.bi_iter.bi_sector = pick.ptr.offset;
-+	rbio->bio.bi_end_io	= bch2_read_endio;
-+
-+	if (rbio->bounce)
-+		trace_read_bounce(&rbio->bio);
-+
-+	bch2_increment_clock(c, bio_sectors(&rbio->bio), READ);
-+
-+	rcu_read_lock();
-+	bucket_io_clock_reset(c, ca, PTR_BUCKET_NR(ca, &pick.ptr), READ);
-+	rcu_read_unlock();
-+
-+	if (!(flags & (BCH_READ_IN_RETRY|BCH_READ_LAST_FRAGMENT))) {
-+		bio_inc_remaining(&orig->bio);
-+		trace_read_split(&orig->bio);
-+	}
-+
-+	if (!rbio->pick.idx) {
-+		if (!rbio->have_ioref) {
-+			__bcache_io_error(c, "no device to read from");
-+			bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
-+			goto out;
-+		}
-+
-+		this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_USER],
-+			     bio_sectors(&rbio->bio));
-+		bio_set_dev(&rbio->bio, ca->disk_sb.bdev);
-+
-+		if (likely(!(flags & BCH_READ_IN_RETRY)))
-+			submit_bio(&rbio->bio);
-+		else
-+			submit_bio_wait(&rbio->bio);
-+	} else {
-+		/* Attempting reconstruct read: */
-+		if (bch2_ec_read_extent(c, rbio)) {
-+			bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
-+			goto out;
-+		}
-+
-+		if (likely(!(flags & BCH_READ_IN_RETRY)))
-+			bio_endio(&rbio->bio);
-+	}
-+out:
-+	if (likely(!(flags & BCH_READ_IN_RETRY))) {
-+		return 0;
-+	} else {
-+		int ret;
-+
-+		rbio->context = RBIO_CONTEXT_UNBOUND;
-+		bch2_read_endio(&rbio->bio);
-+
-+		ret = rbio->retry;
-+		rbio = bch2_rbio_free(rbio);
-+
-+		if (ret == READ_RETRY_AVOID) {
-+			bch2_mark_io_failure(failed, &pick);
-+			ret = READ_RETRY;
-+		}
-+
-+		return ret;
-+	}
-+
-+err:
-+	if (flags & BCH_READ_IN_RETRY)
-+		return READ_ERR;
-+
-+	orig->bio.bi_status = BLK_STS_IOERR;
-+	goto out_read_done;
-+
-+hole:
-+	/*
-+	 * won't normally happen in the BCH_READ_NODECODE
-+	 * (bch2_move_extent()) path, but if we retry and the extent we wanted
-+	 * to read no longer exists we have to signal that:
-+	 */
-+	if (flags & BCH_READ_NODECODE)
-+		orig->hole = true;
-+
-+	zero_fill_bio_iter(&orig->bio, iter);
-+out_read_done:
-+	if (flags & BCH_READ_LAST_FRAGMENT)
-+		bch2_rbio_done(orig);
-+	return 0;
-+}
-+
-+void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_on_stack sk;
-+	struct bkey_s_c k;
-+	unsigned flags = BCH_READ_RETRY_IF_STALE|
-+		BCH_READ_MAY_PROMOTE|
-+		BCH_READ_USER_MAPPED;
-+	int ret;
-+
-+	BUG_ON(rbio->_state);
-+	BUG_ON(flags & BCH_READ_NODECODE);
-+	BUG_ON(flags & BCH_READ_IN_RETRY);
-+
-+	rbio->c = c;
-+	rbio->start_time = local_clock();
-+
-+	bkey_on_stack_init(&sk);
-+	bch2_trans_init(&trans, c, 0, 0);
-+retry:
-+	bch2_trans_begin(&trans);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
-+				   POS(inode, rbio->bio.bi_iter.bi_sector),
-+				   BTREE_ITER_SLOTS);
-+	while (1) {
-+		unsigned bytes, sectors, offset_into_extent;
-+
-+		bch2_btree_iter_set_pos(iter,
-+				POS(inode, rbio->bio.bi_iter.bi_sector));
-+
-+		k = bch2_btree_iter_peek_slot(iter);
-+		ret = bkey_err(k);
-+		if (ret)
-+			goto err;
-+
-+		offset_into_extent = iter->pos.offset -
-+			bkey_start_offset(k.k);
-+		sectors = k.k->size - offset_into_extent;
-+
-+		bkey_on_stack_reassemble(&sk, c, k);
-+		k = bkey_i_to_s_c(sk.k);
-+
-+		ret = bch2_read_indirect_extent(&trans,
-+					&offset_into_extent, &sk);
-+		if (ret)
-+			goto err;
-+
-+		/*
-+		 * With indirect extents, the amount of data to read is the min
-+		 * of the original extent and the indirect extent:
-+		 */
-+		sectors = min(sectors, k.k->size - offset_into_extent);
-+
-+		/*
-+		 * Unlock the iterator while the btree node's lock is still in
-+		 * cache, before doing the IO:
-+		 */
-+		bch2_trans_unlock(&trans);
-+
-+		bytes = min(sectors, bio_sectors(&rbio->bio)) << 9;
-+		swap(rbio->bio.bi_iter.bi_size, bytes);
-+
-+		if (rbio->bio.bi_iter.bi_size == bytes)
-+			flags |= BCH_READ_LAST_FRAGMENT;
-+
-+		bch2_read_extent(c, rbio, k, offset_into_extent, flags);
-+
-+		if (flags & BCH_READ_LAST_FRAGMENT)
-+			break;
-+
-+		swap(rbio->bio.bi_iter.bi_size, bytes);
-+		bio_advance(&rbio->bio, bytes);
-+	}
-+out:
-+	bch2_trans_exit(&trans);
-+	bkey_on_stack_exit(&sk, c);
-+	return;
-+err:
-+	if (ret == -EINTR)
-+		goto retry;
-+
-+	bcache_io_error(c, &rbio->bio, "btree IO error: %i", ret);
-+	bch2_rbio_done(rbio);
-+	goto out;
-+}
-+
-+void bch2_fs_io_exit(struct bch_fs *c)
-+{
-+	if (c->promote_table.tbl)
-+		rhashtable_destroy(&c->promote_table);
-+	mempool_exit(&c->bio_bounce_pages);
-+	bioset_exit(&c->bio_write);
-+	bioset_exit(&c->bio_read_split);
-+	bioset_exit(&c->bio_read);
-+}
-+
-+int bch2_fs_io_init(struct bch_fs *c)
-+{
-+	if (bioset_init(&c->bio_read, 1, offsetof(struct bch_read_bio, bio),
-+			BIOSET_NEED_BVECS) ||
-+	    bioset_init(&c->bio_read_split, 1, offsetof(struct bch_read_bio, bio),
-+			BIOSET_NEED_BVECS) ||
-+	    bioset_init(&c->bio_write, 1, offsetof(struct bch_write_bio, bio),
-+			BIOSET_NEED_BVECS) ||
-+	    mempool_init_page_pool(&c->bio_bounce_pages,
-+				   max_t(unsigned,
-+					 c->opts.btree_node_size,
-+					 c->sb.encoded_extent_max) /
-+				   PAGE_SECTORS, 0) ||
-+	    rhashtable_init(&c->promote_table, &bch_promote_params))
-+		return -ENOMEM;
-+
-+	return 0;
-+}
-diff --git a/fs/bcachefs/io.h b/fs/bcachefs/io.h
-new file mode 100644
-index 000000000000..0ad293bd6295
---- /dev/null
-+++ b/fs/bcachefs/io.h
-@@ -0,0 +1,167 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_IO_H
-+#define _BCACHEFS_IO_H
-+
-+#include "checksum.h"
-+#include "bkey_on_stack.h"
-+#include "io_types.h"
-+
-+#define to_wbio(_bio)			\
-+	container_of((_bio), struct bch_write_bio, bio)
-+
-+#define to_rbio(_bio)			\
-+	container_of((_bio), struct bch_read_bio, bio)
-+
-+void bch2_bio_free_pages_pool(struct bch_fs *, struct bio *);
-+void bch2_bio_alloc_pages_pool(struct bch_fs *, struct bio *, size_t);
-+
-+void bch2_latency_acct(struct bch_dev *, u64, int);
-+
-+void bch2_submit_wbio_replicas(struct bch_write_bio *, struct bch_fs *,
-+			       enum bch_data_type, const struct bkey_i *);
-+
-+#define BLK_STS_REMOVED		((__force blk_status_t)128)
-+
-+enum bch_write_flags {
-+	BCH_WRITE_ALLOC_NOWAIT		= (1 << 0),
-+	BCH_WRITE_CACHED		= (1 << 1),
-+	BCH_WRITE_FLUSH			= (1 << 2),
-+	BCH_WRITE_DATA_ENCODED		= (1 << 3),
-+	BCH_WRITE_PAGES_STABLE		= (1 << 4),
-+	BCH_WRITE_PAGES_OWNED		= (1 << 5),
-+	BCH_WRITE_ONLY_SPECIFIED_DEVS	= (1 << 6),
-+	BCH_WRITE_WROTE_DATA_INLINE	= (1 << 7),
-+	BCH_WRITE_FROM_INTERNAL		= (1 << 8),
-+
-+	/* Internal: */
-+	BCH_WRITE_JOURNAL_SEQ_PTR	= (1 << 9),
-+	BCH_WRITE_SKIP_CLOSURE_PUT	= (1 << 10),
-+	BCH_WRITE_DONE			= (1 << 11),
-+};
-+
-+static inline u64 *op_journal_seq(struct bch_write_op *op)
-+{
-+	return (op->flags & BCH_WRITE_JOURNAL_SEQ_PTR)
-+		? op->journal_seq_p : &op->journal_seq;
-+}
-+
-+static inline void op_journal_seq_set(struct bch_write_op *op, u64 *journal_seq)
-+{
-+	op->journal_seq_p = journal_seq;
-+	op->flags |= BCH_WRITE_JOURNAL_SEQ_PTR;
-+}
-+
-+static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op)
-+{
-+	return op->alloc_reserve == RESERVE_MOVINGGC
-+		? op->c->copygc_wq
-+		: op->c->wq;
-+}
-+
-+int bch2_extent_update(struct btree_trans *, struct btree_iter *,
-+		       struct bkey_i *, struct disk_reservation *,
-+		       u64 *, u64, s64 *);
-+int bch2_fpunch_at(struct btree_trans *, struct btree_iter *,
-+		   struct bpos, u64 *, s64 *);
-+int bch2_fpunch(struct bch_fs *c, u64, u64, u64, u64 *, s64 *);
-+
-+int bch2_write_index_default(struct bch_write_op *);
-+
-+static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c,
-+				      struct bch_io_opts opts)
-+{
-+	op->c			= c;
-+	op->end_io		= NULL;
-+	op->flags		= 0;
-+	op->written		= 0;
-+	op->error		= 0;
-+	op->csum_type		= bch2_data_checksum_type(c, opts.data_checksum);
-+	op->compression_type	= bch2_compression_opt_to_type[opts.compression];
-+	op->nr_replicas		= 0;
-+	op->nr_replicas_required = c->opts.data_replicas_required;
-+	op->alloc_reserve	= RESERVE_NONE;
-+	op->incompressible	= 0;
-+	op->open_buckets.nr	= 0;
-+	op->devs_have.nr	= 0;
-+	op->target		= 0;
-+	op->opts		= opts;
-+	op->pos			= POS_MAX;
-+	op->version		= ZERO_VERSION;
-+	op->write_point		= (struct write_point_specifier) { 0 };
-+	op->res			= (struct disk_reservation) { 0 };
-+	op->journal_seq		= 0;
-+	op->new_i_size		= U64_MAX;
-+	op->i_sectors_delta	= 0;
-+	op->index_update_fn	= bch2_write_index_default;
-+}
-+
-+void bch2_write(struct closure *);
-+
-+static inline struct bch_write_bio *wbio_init(struct bio *bio)
-+{
-+	struct bch_write_bio *wbio = to_wbio(bio);
-+
-+	memset(wbio, 0, offsetof(struct bch_write_bio, bio));
-+	return wbio;
-+}
-+
-+struct bch_devs_mask;
-+struct cache_promote_op;
-+struct extent_ptr_decoded;
-+
-+int __bch2_read_indirect_extent(struct btree_trans *, unsigned *,
-+				struct bkey_on_stack *);
-+
-+static inline int bch2_read_indirect_extent(struct btree_trans *trans,
-+					    unsigned *offset_into_extent,
-+					    struct bkey_on_stack *k)
-+{
-+	return k->k->k.type == KEY_TYPE_reflink_p
-+		? __bch2_read_indirect_extent(trans, offset_into_extent, k)
-+		: 0;
-+}
-+
-+enum bch_read_flags {
-+	BCH_READ_RETRY_IF_STALE		= 1 << 0,
-+	BCH_READ_MAY_PROMOTE		= 1 << 1,
-+	BCH_READ_USER_MAPPED		= 1 << 2,
-+	BCH_READ_NODECODE		= 1 << 3,
-+	BCH_READ_LAST_FRAGMENT		= 1 << 4,
-+
-+	/* internal: */
-+	BCH_READ_MUST_BOUNCE		= 1 << 5,
-+	BCH_READ_MUST_CLONE		= 1 << 6,
-+	BCH_READ_IN_RETRY		= 1 << 7,
-+};
-+
-+int __bch2_read_extent(struct bch_fs *, struct bch_read_bio *,
-+		       struct bvec_iter, struct bkey_s_c, unsigned,
-+		       struct bch_io_failures *, unsigned);
-+
-+static inline void bch2_read_extent(struct bch_fs *c,
-+				    struct bch_read_bio *rbio,
-+				    struct bkey_s_c k,
-+				    unsigned offset_into_extent,
-+				    unsigned flags)
-+{
-+	__bch2_read_extent(c, rbio, rbio->bio.bi_iter, k,
-+			   offset_into_extent, NULL, flags);
-+}
-+
-+void bch2_read(struct bch_fs *, struct bch_read_bio *, u64);
-+
-+static inline struct bch_read_bio *rbio_init(struct bio *bio,
-+					     struct bch_io_opts opts)
-+{
-+	struct bch_read_bio *rbio = to_rbio(bio);
-+
-+	rbio->_state	= 0;
-+	rbio->promote	= NULL;
-+	rbio->opts	= opts;
-+	return rbio;
-+}
-+
-+void bch2_fs_io_exit(struct bch_fs *);
-+int bch2_fs_io_init(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_IO_H */
-diff --git a/fs/bcachefs/io_types.h b/fs/bcachefs/io_types.h
-new file mode 100644
-index 000000000000..684e4c9a5d98
---- /dev/null
-+++ b/fs/bcachefs/io_types.h
-@@ -0,0 +1,149 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_IO_TYPES_H
-+#define _BCACHEFS_IO_TYPES_H
-+
-+#include "alloc_types.h"
-+#include "btree_types.h"
-+#include "buckets_types.h"
-+#include "extents_types.h"
-+#include "keylist_types.h"
-+#include "opts.h"
-+#include "super_types.h"
-+
-+#include <linux/llist.h>
-+#include <linux/workqueue.h>
-+
-+struct bch_read_bio {
-+	struct bch_fs		*c;
-+	u64			start_time;
-+	u64			submit_time;
-+
-+	/*
-+	 * Reads will often have to be split, and if the extent being read from
-+	 * was checksummed or compressed we'll also have to allocate bounce
-+	 * buffers and copy the data back into the original bio.
-+	 *
-+	 * If we didn't have to split, we have to save and restore the original
-+	 * bi_end_io - @split below indicates which:
-+	 */
-+	union {
-+	struct bch_read_bio	*parent;
-+	bio_end_io_t		*end_io;
-+	};
-+
-+	/*
-+	 * Saved copy of bio->bi_iter, from submission time - allows us to
-+	 * resubmit on IO error, and also to copy data back to the original bio
-+	 * when we're bouncing:
-+	 */
-+	struct bvec_iter	bvec_iter;
-+
-+	unsigned		offset_into_extent;
-+
-+	u16			flags;
-+	union {
-+	struct {
-+	u16			bounce:1,
-+				split:1,
-+				kmalloc:1,
-+				have_ioref:1,
-+				narrow_crcs:1,
-+				hole:1,
-+				retry:2,
-+				context:2;
-+	};
-+	u16			_state;
-+	};
-+
-+	struct bch_devs_list	devs_have;
-+
-+	struct extent_ptr_decoded pick;
-+	/* start pos of data we read (may not be pos of data we want) */
-+	struct bpos		pos;
-+	struct bversion		version;
-+
-+	struct promote_op	*promote;
-+
-+	struct bch_io_opts	opts;
-+
-+	struct work_struct	work;
-+
-+	struct bio		bio;
-+};
-+
-+struct bch_write_bio {
-+	struct bch_fs		*c;
-+	struct bch_write_bio	*parent;
-+
-+	u64			submit_time;
-+
-+	struct bch_devs_list	failed;
-+	u8			order;
-+	u8			dev;
-+
-+	unsigned		split:1,
-+				bounce:1,
-+				put_bio:1,
-+				have_ioref:1,
-+				used_mempool:1;
-+
-+	struct bio		bio;
-+};
-+
-+struct bch_write_op {
-+	struct closure		cl;
-+	struct bch_fs		*c;
-+	void			(*end_io)(struct bch_write_op *);
-+	u64			start_time;
-+
-+	unsigned		written; /* sectors */
-+	u16			flags;
-+	s16			error; /* dio write path expects it to hold -ERESTARTSYS... */
-+
-+	unsigned		csum_type:4;
-+	unsigned		compression_type:4;
-+	unsigned		nr_replicas:4;
-+	unsigned		nr_replicas_required:4;
-+	unsigned		alloc_reserve:3;
-+	unsigned		incompressible:1;
-+
-+	struct bch_devs_list	devs_have;
-+	u16			target;
-+	u16			nonce;
-+	struct bch_io_opts	opts;
-+
-+	struct bpos		pos;
-+	struct bversion		version;
-+
-+	/* For BCH_WRITE_DATA_ENCODED: */
-+	struct bch_extent_crc_unpacked crc;
-+
-+	struct write_point_specifier write_point;
-+
-+	struct disk_reservation	res;
-+
-+	struct open_buckets	open_buckets;
-+
-+	/*
-+	 * If caller wants to flush but hasn't passed us a journal_seq ptr, we
-+	 * still need to stash the journal_seq somewhere:
-+	 */
-+	union {
-+		u64			*journal_seq_p;
-+		u64			journal_seq;
-+	};
-+	u64			new_i_size;
-+	s64			i_sectors_delta;
-+
-+	int			(*index_update_fn)(struct bch_write_op *);
-+
-+	struct bch_devs_mask	failed;
-+
-+	struct keylist		insert_keys;
-+	u64			inline_keys[BKEY_EXTENT_U64s_MAX * 2];
-+
-+	/* Must be last: */
-+	struct bch_write_bio	wbio;
-+};
-+
-+#endif /* _BCACHEFS_IO_TYPES_H */
-diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
-new file mode 100644
-index 000000000000..b4f7b61ba9ac
---- /dev/null
-+++ b/fs/bcachefs/journal.c
-@@ -0,0 +1,1254 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * bcachefs journalling code, for btree insertions
-+ *
-+ * Copyright 2012 Google, Inc.
-+ */
-+
-+#include "bcachefs.h"
-+#include "alloc_foreground.h"
-+#include "bkey_methods.h"
-+#include "btree_gc.h"
-+#include "buckets.h"
-+#include "journal.h"
-+#include "journal_io.h"
-+#include "journal_reclaim.h"
-+#include "journal_seq_blacklist.h"
-+#include "super-io.h"
-+
-+#include <trace/events/bcachefs.h>
-+
-+static bool __journal_entry_is_open(union journal_res_state state)
-+{
-+	return state.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL;
-+}
-+
-+static bool journal_entry_is_open(struct journal *j)
-+{
-+	return __journal_entry_is_open(j->reservations);
-+}
-+
-+static void journal_pin_new_entry(struct journal *j, int count)
-+{
-+	struct journal_entry_pin_list *p;
-+
-+	/*
-+	 * The fifo_push() needs to happen at the same time as j->seq is
-+	 * incremented for journal_last_seq() to be calculated correctly
-+	 */
-+	atomic64_inc(&j->seq);
-+	p = fifo_push_ref(&j->pin);
-+
-+	INIT_LIST_HEAD(&p->list);
-+	INIT_LIST_HEAD(&p->flushed);
-+	atomic_set(&p->count, count);
-+	p->devs.nr = 0;
-+}
-+
-+static void bch2_journal_buf_init(struct journal *j)
-+{
-+	struct journal_buf *buf = journal_cur_buf(j);
-+
-+	memset(buf->has_inode, 0, sizeof(buf->has_inode));
-+
-+	memset(buf->data, 0, sizeof(*buf->data));
-+	buf->data->seq	= cpu_to_le64(journal_cur_seq(j));
-+	buf->data->u64s	= 0;
-+}
-+
-+void bch2_journal_halt(struct journal *j)
-+{
-+	union journal_res_state old, new;
-+	u64 v = atomic64_read(&j->reservations.counter);
-+
-+	do {
-+		old.v = new.v = v;
-+		if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL)
-+			return;
-+
-+		new.cur_entry_offset = JOURNAL_ENTRY_ERROR_VAL;
-+	} while ((v = atomic64_cmpxchg(&j->reservations.counter,
-+				       old.v, new.v)) != old.v);
-+
-+	journal_wake(j);
-+	closure_wake_up(&journal_cur_buf(j)->wait);
-+}
-+
-+/* journal entry close/open: */
-+
-+void __bch2_journal_buf_put(struct journal *j, bool need_write_just_set)
-+{
-+	if (!need_write_just_set &&
-+	    test_bit(JOURNAL_NEED_WRITE, &j->flags))
-+		bch2_time_stats_update(j->delay_time,
-+				       j->need_write_time);
-+
-+	clear_bit(JOURNAL_NEED_WRITE, &j->flags);
-+
-+	closure_call(&j->io, bch2_journal_write, system_highpri_wq, NULL);
-+}
-+
-+/*
-+ * Returns true if journal entry is now closed:
-+ */
-+static bool __journal_entry_close(struct journal *j)
-+{
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	struct journal_buf *buf = journal_cur_buf(j);
-+	union journal_res_state old, new;
-+	u64 v = atomic64_read(&j->reservations.counter);
-+	bool set_need_write = false;
-+	unsigned sectors;
-+
-+	lockdep_assert_held(&j->lock);
-+
-+	do {
-+		old.v = new.v = v;
-+		if (old.cur_entry_offset == JOURNAL_ENTRY_CLOSED_VAL)
-+			return true;
-+
-+		if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL) {
-+			/* this entry will never be written: */
-+			closure_wake_up(&buf->wait);
-+			return true;
-+		}
-+
-+		if (!test_bit(JOURNAL_NEED_WRITE, &j->flags)) {
-+			set_bit(JOURNAL_NEED_WRITE, &j->flags);
-+			j->need_write_time = local_clock();
-+			set_need_write = true;
-+		}
-+
-+		if (new.prev_buf_unwritten)
-+			return false;
-+
-+		new.cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL;
-+		new.idx++;
-+		new.prev_buf_unwritten = 1;
-+
-+		BUG_ON(journal_state_count(new, new.idx));
-+	} while ((v = atomic64_cmpxchg(&j->reservations.counter,
-+				       old.v, new.v)) != old.v);
-+
-+	buf->data->u64s		= cpu_to_le32(old.cur_entry_offset);
-+
-+	sectors = vstruct_blocks_plus(buf->data, c->block_bits,
-+				      buf->u64s_reserved) << c->block_bits;
-+	BUG_ON(sectors > buf->sectors);
-+	buf->sectors = sectors;
-+
-+	bkey_extent_init(&buf->key);
-+
-+	/*
-+	 * We have to set last_seq here, _before_ opening a new journal entry:
-+	 *
-+	 * A threads may replace an old pin with a new pin on their current
-+	 * journal reservation - the expectation being that the journal will
-+	 * contain either what the old pin protected or what the new pin
-+	 * protects.
-+	 *
-+	 * After the old pin is dropped journal_last_seq() won't include the old
-+	 * pin, so we can only write the updated last_seq on the entry that
-+	 * contains whatever the new pin protects.
-+	 *
-+	 * Restated, we can _not_ update last_seq for a given entry if there
-+	 * could be a newer entry open with reservations/pins that have been
-+	 * taken against it.
-+	 *
-+	 * Hence, we want update/set last_seq on the current journal entry right
-+	 * before we open a new one:
-+	 */
-+	buf->data->last_seq	= cpu_to_le64(journal_last_seq(j));
-+
-+	if (journal_entry_empty(buf->data))
-+		clear_bit(JOURNAL_NOT_EMPTY, &j->flags);
-+	else
-+		set_bit(JOURNAL_NOT_EMPTY, &j->flags);
-+
-+	journal_pin_new_entry(j, 1);
-+
-+	bch2_journal_buf_init(j);
-+
-+	cancel_delayed_work(&j->write_work);
-+
-+	bch2_journal_space_available(j);
-+
-+	bch2_journal_buf_put(j, old.idx, set_need_write);
-+	return true;
-+}
-+
-+static bool journal_entry_close(struct journal *j)
-+{
-+	bool ret;
-+
-+	spin_lock(&j->lock);
-+	ret = __journal_entry_close(j);
-+	spin_unlock(&j->lock);
-+
-+	return ret;
-+}
-+
-+/*
-+ * should _only_ called from journal_res_get() - when we actually want a
-+ * journal reservation - journal entry is open means journal is dirty:
-+ *
-+ * returns:
-+ * 0:		success
-+ * -ENOSPC:	journal currently full, must invoke reclaim
-+ * -EAGAIN:	journal blocked, must wait
-+ * -EROFS:	insufficient rw devices or journal error
-+ */
-+static int journal_entry_open(struct journal *j)
-+{
-+	struct journal_buf *buf = journal_cur_buf(j);
-+	union journal_res_state old, new;
-+	int u64s;
-+	u64 v;
-+
-+	lockdep_assert_held(&j->lock);
-+	BUG_ON(journal_entry_is_open(j));
-+
-+	if (j->blocked)
-+		return -EAGAIN;
-+
-+	if (j->cur_entry_error)
-+		return j->cur_entry_error;
-+
-+	BUG_ON(!j->cur_entry_sectors);
-+
-+	buf->u64s_reserved	= j->entry_u64s_reserved;
-+	buf->disk_sectors	= j->cur_entry_sectors;
-+	buf->sectors		= min(buf->disk_sectors, buf->buf_size >> 9);
-+
-+	u64s = (int) (buf->sectors << 9) / sizeof(u64) -
-+		journal_entry_overhead(j);
-+	u64s  = clamp_t(int, u64s, 0, JOURNAL_ENTRY_CLOSED_VAL - 1);
-+
-+	if (u64s <= le32_to_cpu(buf->data->u64s))
-+		return -ENOSPC;
-+
-+	/*
-+	 * Must be set before marking the journal entry as open:
-+	 */
-+	j->cur_entry_u64s = u64s;
-+
-+	v = atomic64_read(&j->reservations.counter);
-+	do {
-+		old.v = new.v = v;
-+
-+		if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL)
-+			return -EROFS;
-+
-+		/* Handle any already added entries */
-+		new.cur_entry_offset = le32_to_cpu(buf->data->u64s);
-+
-+		EBUG_ON(journal_state_count(new, new.idx));
-+		journal_state_inc(&new);
-+	} while ((v = atomic64_cmpxchg(&j->reservations.counter,
-+				       old.v, new.v)) != old.v);
-+
-+	if (j->res_get_blocked_start)
-+		bch2_time_stats_update(j->blocked_time,
-+				       j->res_get_blocked_start);
-+	j->res_get_blocked_start = 0;
-+
-+	mod_delayed_work(system_freezable_wq,
-+			 &j->write_work,
-+			 msecs_to_jiffies(j->write_delay_ms));
-+	journal_wake(j);
-+	return 0;
-+}
-+
-+static bool journal_quiesced(struct journal *j)
-+{
-+	union journal_res_state state = READ_ONCE(j->reservations);
-+	bool ret = !state.prev_buf_unwritten && !__journal_entry_is_open(state);
-+
-+	if (!ret)
-+		journal_entry_close(j);
-+	return ret;
-+}
-+
-+static void journal_quiesce(struct journal *j)
-+{
-+	wait_event(j->wait, journal_quiesced(j));
-+}
-+
-+static void journal_write_work(struct work_struct *work)
-+{
-+	struct journal *j = container_of(work, struct journal, write_work.work);
-+
-+	journal_entry_close(j);
-+}
-+
-+/*
-+ * Given an inode number, if that inode number has data in the journal that
-+ * hasn't yet been flushed, return the journal sequence number that needs to be
-+ * flushed:
-+ */
-+u64 bch2_inode_journal_seq(struct journal *j, u64 inode)
-+{
-+	size_t h = hash_64(inode, ilog2(sizeof(j->buf[0].has_inode) * 8));
-+	u64 seq = 0;
-+
-+	if (!test_bit(h, j->buf[0].has_inode) &&
-+	    !test_bit(h, j->buf[1].has_inode))
-+		return 0;
-+
-+	spin_lock(&j->lock);
-+	if (test_bit(h, journal_cur_buf(j)->has_inode))
-+		seq = journal_cur_seq(j);
-+	else if (test_bit(h, journal_prev_buf(j)->has_inode))
-+		seq = journal_cur_seq(j) - 1;
-+	spin_unlock(&j->lock);
-+
-+	return seq;
-+}
-+
-+static int __journal_res_get(struct journal *j, struct journal_res *res,
-+			     unsigned flags)
-+{
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	struct journal_buf *buf;
-+	bool can_discard;
-+	int ret;
-+retry:
-+	if (journal_res_get_fast(j, res, flags))
-+		return 0;
-+
-+	if (bch2_journal_error(j))
-+		return -EROFS;
-+
-+	spin_lock(&j->lock);
-+
-+	/*
-+	 * Recheck after taking the lock, so we don't race with another thread
-+	 * that just did journal_entry_open() and call journal_entry_close()
-+	 * unnecessarily
-+	 */
-+	if (journal_res_get_fast(j, res, flags)) {
-+		spin_unlock(&j->lock);
-+		return 0;
-+	}
-+
-+	if (!(flags & JOURNAL_RES_GET_RESERVED) &&
-+	    !test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags)) {
-+		/*
-+		 * Don't want to close current journal entry, just need to
-+		 * invoke reclaim:
-+		 */
-+		ret = -ENOSPC;
-+		goto unlock;
-+	}
-+
-+	/*
-+	 * If we couldn't get a reservation because the current buf filled up,
-+	 * and we had room for a bigger entry on disk, signal that we want to
-+	 * realloc the journal bufs:
-+	 */
-+	buf = journal_cur_buf(j);
-+	if (journal_entry_is_open(j) &&
-+	    buf->buf_size >> 9 < buf->disk_sectors &&
-+	    buf->buf_size < JOURNAL_ENTRY_SIZE_MAX)
-+		j->buf_size_want = max(j->buf_size_want, buf->buf_size << 1);
-+
-+	if (journal_entry_is_open(j) &&
-+	    !__journal_entry_close(j)) {
-+		/*
-+		 * We failed to get a reservation on the current open journal
-+		 * entry because it's full, and we can't close it because
-+		 * there's still a previous one in flight:
-+		 */
-+		trace_journal_entry_full(c);
-+		ret = -EAGAIN;
-+	} else {
-+		ret = journal_entry_open(j);
-+	}
-+unlock:
-+	if ((ret == -EAGAIN || ret == -ENOSPC) &&
-+	    !j->res_get_blocked_start)
-+		j->res_get_blocked_start = local_clock() ?: 1;
-+
-+	can_discard = j->can_discard;
-+	spin_unlock(&j->lock);
-+
-+	if (!ret)
-+		goto retry;
-+
-+	if (ret == -ENOSPC) {
-+		WARN_ONCE(!can_discard && (flags & JOURNAL_RES_GET_RESERVED),
-+			  "JOURNAL_RES_GET_RESERVED set but journal full");
-+
-+		/*
-+		 * Journal is full - can't rely on reclaim from work item due to
-+		 * freezing:
-+		 */
-+		trace_journal_full(c);
-+
-+		if (!(flags & JOURNAL_RES_GET_NONBLOCK)) {
-+			if (can_discard) {
-+				bch2_journal_do_discards(j);
-+				goto retry;
-+			}
-+
-+			if (mutex_trylock(&j->reclaim_lock)) {
-+				bch2_journal_reclaim(j);
-+				mutex_unlock(&j->reclaim_lock);
-+			}
-+		}
-+
-+		ret = -EAGAIN;
-+	}
-+
-+	return ret;
-+}
-+
-+/*
-+ * Essentially the entry function to the journaling code. When bcachefs is doing
-+ * a btree insert, it calls this function to get the current journal write.
-+ * Journal write is the structure used set up journal writes. The calling
-+ * function will then add its keys to the structure, queuing them for the next
-+ * write.
-+ *
-+ * To ensure forward progress, the current task must not be holding any
-+ * btree node write locks.
-+ */
-+int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res,
-+				  unsigned flags)
-+{
-+	int ret;
-+
-+	closure_wait_event(&j->async_wait,
-+		   (ret = __journal_res_get(j, res, flags)) != -EAGAIN ||
-+		   (flags & JOURNAL_RES_GET_NONBLOCK));
-+	return ret;
-+}
-+
-+/* journal_preres: */
-+
-+static bool journal_preres_available(struct journal *j,
-+				     struct journal_preres *res,
-+				     unsigned new_u64s,
-+				     unsigned flags)
-+{
-+	bool ret = bch2_journal_preres_get_fast(j, res, new_u64s, flags);
-+
-+	if (!ret)
-+		bch2_journal_reclaim_work(&j->reclaim_work.work);
-+
-+	return ret;
-+}
-+
-+int __bch2_journal_preres_get(struct journal *j,
-+			      struct journal_preres *res,
-+			      unsigned new_u64s,
-+			      unsigned flags)
-+{
-+	int ret;
-+
-+	closure_wait_event(&j->preres_wait,
-+		   (ret = bch2_journal_error(j)) ||
-+		   journal_preres_available(j, res, new_u64s, flags));
-+	return ret;
-+}
-+
-+/* journal_entry_res: */
-+
-+void bch2_journal_entry_res_resize(struct journal *j,
-+				   struct journal_entry_res *res,
-+				   unsigned new_u64s)
-+{
-+	union journal_res_state state;
-+	int d = new_u64s - res->u64s;
-+
-+	spin_lock(&j->lock);
-+
-+	j->entry_u64s_reserved += d;
-+	if (d <= 0)
-+		goto out;
-+
-+	j->cur_entry_u64s = max_t(int, 0, j->cur_entry_u64s - d);
-+	smp_mb();
-+	state = READ_ONCE(j->reservations);
-+
-+	if (state.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL &&
-+	    state.cur_entry_offset > j->cur_entry_u64s) {
-+		j->cur_entry_u64s += d;
-+		/*
-+		 * Not enough room in current journal entry, have to flush it:
-+		 */
-+		__journal_entry_close(j);
-+	} else {
-+		journal_cur_buf(j)->u64s_reserved += d;
-+	}
-+out:
-+	spin_unlock(&j->lock);
-+	res->u64s += d;
-+}
-+
-+/* journal flushing: */
-+
-+u64 bch2_journal_last_unwritten_seq(struct journal *j)
-+{
-+	u64 seq;
-+
-+	spin_lock(&j->lock);
-+	seq = journal_cur_seq(j);
-+	if (j->reservations.prev_buf_unwritten)
-+		seq--;
-+	spin_unlock(&j->lock);
-+
-+	return seq;
-+}
-+
-+/**
-+ * bch2_journal_open_seq_async - try to open a new journal entry if @seq isn't
-+ * open yet, or wait if we cannot
-+ *
-+ * used by the btree interior update machinery, when it needs to write a new
-+ * btree root - every journal entry contains the roots of all the btrees, so it
-+ * doesn't need to bother with getting a journal reservation
-+ */
-+int bch2_journal_open_seq_async(struct journal *j, u64 seq, struct closure *cl)
-+{
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	int ret;
-+
-+	spin_lock(&j->lock);
-+
-+	/*
-+	 * Can't try to open more than one sequence number ahead:
-+	 */
-+	BUG_ON(journal_cur_seq(j) < seq && !journal_entry_is_open(j));
-+
-+	if (journal_cur_seq(j) > seq ||
-+	    journal_entry_is_open(j)) {
-+		spin_unlock(&j->lock);
-+		return 0;
-+	}
-+
-+	if (journal_cur_seq(j) < seq &&
-+	    !__journal_entry_close(j)) {
-+		/* haven't finished writing out the previous one: */
-+		trace_journal_entry_full(c);
-+		ret = -EAGAIN;
-+	} else {
-+		BUG_ON(journal_cur_seq(j) != seq);
-+
-+		ret = journal_entry_open(j);
-+	}
-+
-+	if ((ret == -EAGAIN || ret == -ENOSPC) &&
-+	    !j->res_get_blocked_start)
-+		j->res_get_blocked_start = local_clock() ?: 1;
-+
-+	if (ret == -EAGAIN || ret == -ENOSPC)
-+		closure_wait(&j->async_wait, cl);
-+
-+	spin_unlock(&j->lock);
-+
-+	if (ret == -ENOSPC) {
-+		trace_journal_full(c);
-+		bch2_journal_reclaim_work(&j->reclaim_work.work);
-+		ret = -EAGAIN;
-+	}
-+
-+	return ret;
-+}
-+
-+static int journal_seq_error(struct journal *j, u64 seq)
-+{
-+	union journal_res_state state = READ_ONCE(j->reservations);
-+
-+	if (seq == journal_cur_seq(j))
-+		return bch2_journal_error(j);
-+
-+	if (seq + 1 == journal_cur_seq(j) &&
-+	    !state.prev_buf_unwritten &&
-+	    seq > j->seq_ondisk)
-+		return -EIO;
-+
-+	return 0;
-+}
-+
-+static inline struct journal_buf *
-+journal_seq_to_buf(struct journal *j, u64 seq)
-+{
-+	/* seq should be for a journal entry that has been opened: */
-+	BUG_ON(seq > journal_cur_seq(j));
-+	BUG_ON(seq == journal_cur_seq(j) &&
-+	       j->reservations.cur_entry_offset == JOURNAL_ENTRY_CLOSED_VAL);
-+
-+	if (seq == journal_cur_seq(j))
-+		return journal_cur_buf(j);
-+	if (seq + 1 == journal_cur_seq(j) &&
-+	    j->reservations.prev_buf_unwritten)
-+		return journal_prev_buf(j);
-+	return NULL;
-+}
-+
-+/**
-+ * bch2_journal_wait_on_seq - wait for a journal entry to be written
-+ *
-+ * does _not_ cause @seq to be written immediately - if there is no other
-+ * activity to cause the relevant journal entry to be filled up or flushed it
-+ * can wait for an arbitrary amount of time (up to @j->write_delay_ms, which is
-+ * configurable).
-+ */
-+void bch2_journal_wait_on_seq(struct journal *j, u64 seq,
-+			      struct closure *parent)
-+{
-+	struct journal_buf *buf;
-+
-+	spin_lock(&j->lock);
-+
-+	if ((buf = journal_seq_to_buf(j, seq))) {
-+		if (!closure_wait(&buf->wait, parent))
-+			BUG();
-+
-+		if (seq == journal_cur_seq(j)) {
-+			smp_mb();
-+			if (bch2_journal_error(j))
-+				closure_wake_up(&buf->wait);
-+		}
-+	}
-+
-+	spin_unlock(&j->lock);
-+}
-+
-+/**
-+ * bch2_journal_flush_seq_async - wait for a journal entry to be written
-+ *
-+ * like bch2_journal_wait_on_seq, except that it triggers a write immediately if
-+ * necessary
-+ */
-+void bch2_journal_flush_seq_async(struct journal *j, u64 seq,
-+				  struct closure *parent)
-+{
-+	struct journal_buf *buf;
-+
-+	spin_lock(&j->lock);
-+
-+	if (parent &&
-+	    (buf = journal_seq_to_buf(j, seq)))
-+		if (!closure_wait(&buf->wait, parent))
-+			BUG();
-+
-+	if (seq == journal_cur_seq(j))
-+		__journal_entry_close(j);
-+	spin_unlock(&j->lock);
-+}
-+
-+static int journal_seq_flushed(struct journal *j, u64 seq)
-+{
-+	int ret;
-+
-+	spin_lock(&j->lock);
-+	ret = seq <= j->seq_ondisk ? 1 : journal_seq_error(j, seq);
-+
-+	if (seq == journal_cur_seq(j))
-+		__journal_entry_close(j);
-+	spin_unlock(&j->lock);
-+
-+	return ret;
-+}
-+
-+int bch2_journal_flush_seq(struct journal *j, u64 seq)
-+{
-+	u64 start_time = local_clock();
-+	int ret, ret2;
-+
-+	ret = wait_event_killable(j->wait, (ret2 = journal_seq_flushed(j, seq)));
-+
-+	bch2_time_stats_update(j->flush_seq_time, start_time);
-+
-+	return ret ?: ret2 < 0 ? ret2 : 0;
-+}
-+
-+/**
-+ * bch2_journal_meta_async - force a journal entry to be written
-+ */
-+void bch2_journal_meta_async(struct journal *j, struct closure *parent)
-+{
-+	struct journal_res res;
-+
-+	memset(&res, 0, sizeof(res));
-+
-+	bch2_journal_res_get(j, &res, jset_u64s(0), 0);
-+	bch2_journal_res_put(j, &res);
-+
-+	bch2_journal_flush_seq_async(j, res.seq, parent);
-+}
-+
-+int bch2_journal_meta(struct journal *j)
-+{
-+	struct journal_res res;
-+	int ret;
-+
-+	memset(&res, 0, sizeof(res));
-+
-+	ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0);
-+	if (ret)
-+		return ret;
-+
-+	bch2_journal_res_put(j, &res);
-+
-+	return bch2_journal_flush_seq(j, res.seq);
-+}
-+
-+/*
-+ * bch2_journal_flush_async - if there is an open journal entry, or a journal
-+ * still being written, write it and wait for the write to complete
-+ */
-+void bch2_journal_flush_async(struct journal *j, struct closure *parent)
-+{
-+	u64 seq, journal_seq;
-+
-+	spin_lock(&j->lock);
-+	journal_seq = journal_cur_seq(j);
-+
-+	if (journal_entry_is_open(j)) {
-+		seq = journal_seq;
-+	} else if (journal_seq) {
-+		seq = journal_seq - 1;
-+	} else {
-+		spin_unlock(&j->lock);
-+		return;
-+	}
-+	spin_unlock(&j->lock);
-+
-+	bch2_journal_flush_seq_async(j, seq, parent);
-+}
-+
-+int bch2_journal_flush(struct journal *j)
-+{
-+	u64 seq, journal_seq;
-+
-+	spin_lock(&j->lock);
-+	journal_seq = journal_cur_seq(j);
-+
-+	if (journal_entry_is_open(j)) {
-+		seq = journal_seq;
-+	} else if (journal_seq) {
-+		seq = journal_seq - 1;
-+	} else {
-+		spin_unlock(&j->lock);
-+		return 0;
-+	}
-+	spin_unlock(&j->lock);
-+
-+	return bch2_journal_flush_seq(j, seq);
-+}
-+
-+/* block/unlock the journal: */
-+
-+void bch2_journal_unblock(struct journal *j)
-+{
-+	spin_lock(&j->lock);
-+	j->blocked--;
-+	spin_unlock(&j->lock);
-+
-+	journal_wake(j);
-+}
-+
-+void bch2_journal_block(struct journal *j)
-+{
-+	spin_lock(&j->lock);
-+	j->blocked++;
-+	spin_unlock(&j->lock);
-+
-+	journal_quiesce(j);
-+}
-+
-+/* allocate journal on a device: */
-+
-+static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
-+					 bool new_fs, struct closure *cl)
-+{
-+	struct bch_fs *c = ca->fs;
-+	struct journal_device *ja = &ca->journal;
-+	struct bch_sb_field_journal *journal_buckets;
-+	u64 *new_bucket_seq = NULL, *new_buckets = NULL;
-+	int ret = 0;
-+
-+	/* don't handle reducing nr of buckets yet: */
-+	if (nr <= ja->nr)
-+		return 0;
-+
-+	ret = -ENOMEM;
-+	new_buckets	= kzalloc(nr * sizeof(u64), GFP_KERNEL);
-+	new_bucket_seq	= kzalloc(nr * sizeof(u64), GFP_KERNEL);
-+	if (!new_buckets || !new_bucket_seq)
-+		goto err;
-+
-+	journal_buckets = bch2_sb_resize_journal(&ca->disk_sb,
-+						 nr + sizeof(*journal_buckets) / sizeof(u64));
-+	if (!journal_buckets)
-+		goto err;
-+
-+	/*
-+	 * We may be called from the device add path, before the new device has
-+	 * actually been added to the running filesystem:
-+	 */
-+	if (c)
-+		spin_lock(&c->journal.lock);
-+
-+	memcpy(new_buckets,	ja->buckets,	ja->nr * sizeof(u64));
-+	memcpy(new_bucket_seq,	ja->bucket_seq,	ja->nr * sizeof(u64));
-+	swap(new_buckets,	ja->buckets);
-+	swap(new_bucket_seq,	ja->bucket_seq);
-+
-+	if (c)
-+		spin_unlock(&c->journal.lock);
-+
-+	while (ja->nr < nr) {
-+		struct open_bucket *ob = NULL;
-+		unsigned pos;
-+		long bucket;
-+
-+		if (new_fs) {
-+			bucket = bch2_bucket_alloc_new_fs(ca);
-+			if (bucket < 0) {
-+				ret = -ENOSPC;
-+				goto err;
-+			}
-+		} else {
-+			ob = bch2_bucket_alloc(c, ca, RESERVE_ALLOC,
-+					       false, cl);
-+			if (IS_ERR(ob)) {
-+				ret = cl ? -EAGAIN : -ENOSPC;
-+				goto err;
-+			}
-+
-+			bucket = sector_to_bucket(ca, ob->ptr.offset);
-+		}
-+
-+		if (c) {
-+			percpu_down_read(&c->mark_lock);
-+			spin_lock(&c->journal.lock);
-+		}
-+
-+		pos = ja->nr ? (ja->cur_idx + 1) % ja->nr : 0;
-+		__array_insert_item(ja->buckets,		ja->nr, pos);
-+		__array_insert_item(ja->bucket_seq,		ja->nr, pos);
-+		__array_insert_item(journal_buckets->buckets,	ja->nr, pos);
-+		ja->nr++;
-+
-+		ja->buckets[pos] = bucket;
-+		ja->bucket_seq[pos] = 0;
-+		journal_buckets->buckets[pos] = cpu_to_le64(bucket);
-+
-+		if (pos <= ja->discard_idx)
-+			ja->discard_idx = (ja->discard_idx + 1) % ja->nr;
-+		if (pos <= ja->dirty_idx_ondisk)
-+			ja->dirty_idx_ondisk = (ja->dirty_idx_ondisk + 1) % ja->nr;
-+		if (pos <= ja->dirty_idx)
-+			ja->dirty_idx = (ja->dirty_idx + 1) % ja->nr;
-+		if (pos <= ja->cur_idx)
-+			ja->cur_idx = (ja->cur_idx + 1) % ja->nr;
-+
-+		bch2_mark_metadata_bucket(c, ca, bucket, BCH_DATA_JOURNAL,
-+					  ca->mi.bucket_size,
-+					  gc_phase(GC_PHASE_SB),
-+					  0);
-+
-+		if (c) {
-+			spin_unlock(&c->journal.lock);
-+			percpu_up_read(&c->mark_lock);
-+		}
-+
-+		if (!new_fs)
-+			bch2_open_bucket_put(c, ob);
-+	}
-+
-+	ret = 0;
-+err:
-+	kfree(new_bucket_seq);
-+	kfree(new_buckets);
-+
-+	return ret;
-+}
-+
-+/*
-+ * Allocate more journal space at runtime - not currently making use if it, but
-+ * the code works:
-+ */
-+int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca,
-+				unsigned nr)
-+{
-+	struct journal_device *ja = &ca->journal;
-+	struct closure cl;
-+	unsigned current_nr;
-+	int ret;
-+
-+	closure_init_stack(&cl);
-+
-+	do {
-+		struct disk_reservation disk_res = { 0, 0 };
-+
-+		closure_sync(&cl);
-+
-+		mutex_lock(&c->sb_lock);
-+		current_nr = ja->nr;
-+
-+		/*
-+		 * note: journal buckets aren't really counted as _sectors_ used yet, so
-+		 * we don't need the disk reservation to avoid the BUG_ON() in buckets.c
-+		 * when space used goes up without a reservation - but we do need the
-+		 * reservation to ensure we'll actually be able to allocate:
-+		 */
-+
-+		if (bch2_disk_reservation_get(c, &disk_res,
-+					      bucket_to_sector(ca, nr - ja->nr), 1, 0)) {
-+			mutex_unlock(&c->sb_lock);
-+			return -ENOSPC;
-+		}
-+
-+		ret = __bch2_set_nr_journal_buckets(ca, nr, false, &cl);
-+
-+		bch2_disk_reservation_put(c, &disk_res);
-+
-+		if (ja->nr != current_nr)
-+			bch2_write_super(c);
-+		mutex_unlock(&c->sb_lock);
-+	} while (ret == -EAGAIN);
-+
-+	return ret;
-+}
-+
-+int bch2_dev_journal_alloc(struct bch_dev *ca)
-+{
-+	unsigned nr;
-+
-+	if (dynamic_fault("bcachefs:add:journal_alloc"))
-+		return -ENOMEM;
-+
-+	/*
-+	 * clamp journal size to 1024 buckets or 512MB (in sectors), whichever
-+	 * is smaller:
-+	 */
-+	nr = clamp_t(unsigned, ca->mi.nbuckets >> 8,
-+		     BCH_JOURNAL_BUCKETS_MIN,
-+		     min(1 << 10,
-+			 (1 << 20) / ca->mi.bucket_size));
-+
-+	return __bch2_set_nr_journal_buckets(ca, nr, true, NULL);
-+}
-+
-+/* startup/shutdown: */
-+
-+static bool bch2_journal_writing_to_device(struct journal *j, unsigned dev_idx)
-+{
-+	union journal_res_state state;
-+	struct journal_buf *w;
-+	bool ret;
-+
-+	spin_lock(&j->lock);
-+	state = READ_ONCE(j->reservations);
-+	w = j->buf + !state.idx;
-+
-+	ret = state.prev_buf_unwritten &&
-+		bch2_bkey_has_device(bkey_i_to_s_c(&w->key), dev_idx);
-+	spin_unlock(&j->lock);
-+
-+	return ret;
-+}
-+
-+void bch2_dev_journal_stop(struct journal *j, struct bch_dev *ca)
-+{
-+	wait_event(j->wait, !bch2_journal_writing_to_device(j, ca->dev_idx));
-+}
-+
-+void bch2_fs_journal_stop(struct journal *j)
-+{
-+	bch2_journal_flush_all_pins(j);
-+
-+	wait_event(j->wait, journal_entry_close(j));
-+
-+	/* do we need to write another journal entry? */
-+	if (test_bit(JOURNAL_NOT_EMPTY, &j->flags))
-+		bch2_journal_meta(j);
-+
-+	journal_quiesce(j);
-+
-+	BUG_ON(!bch2_journal_error(j) &&
-+	       test_bit(JOURNAL_NOT_EMPTY, &j->flags));
-+
-+	cancel_delayed_work_sync(&j->write_work);
-+	cancel_delayed_work_sync(&j->reclaim_work);
-+}
-+
-+int bch2_fs_journal_start(struct journal *j, u64 cur_seq,
-+			  struct list_head *journal_entries)
-+{
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	struct journal_entry_pin_list *p;
-+	struct journal_replay *i;
-+	u64 last_seq = cur_seq, nr, seq;
-+
-+	if (!list_empty(journal_entries))
-+		last_seq = le64_to_cpu(list_last_entry(journal_entries,
-+				struct journal_replay, list)->j.last_seq);
-+
-+	nr = cur_seq - last_seq;
-+
-+	if (nr + 1 > j->pin.size) {
-+		free_fifo(&j->pin);
-+		init_fifo(&j->pin, roundup_pow_of_two(nr + 1), GFP_KERNEL);
-+		if (!j->pin.data) {
-+			bch_err(c, "error reallocating journal fifo (%llu open entries)", nr);
-+			return -ENOMEM;
-+		}
-+	}
-+
-+	j->replay_journal_seq	= last_seq;
-+	j->replay_journal_seq_end = cur_seq;
-+	j->last_seq_ondisk	= last_seq;
-+	j->pin.front		= last_seq;
-+	j->pin.back		= cur_seq;
-+	atomic64_set(&j->seq, cur_seq - 1);
-+
-+	fifo_for_each_entry_ptr(p, &j->pin, seq) {
-+		INIT_LIST_HEAD(&p->list);
-+		INIT_LIST_HEAD(&p->flushed);
-+		atomic_set(&p->count, 1);
-+		p->devs.nr = 0;
-+	}
-+
-+	list_for_each_entry(i, journal_entries, list) {
-+		seq = le64_to_cpu(i->j.seq);
-+		BUG_ON(seq >= cur_seq);
-+
-+		if (seq < last_seq)
-+			continue;
-+
-+		journal_seq_pin(j, seq)->devs = i->devs;
-+	}
-+
-+	spin_lock(&j->lock);
-+
-+	set_bit(JOURNAL_STARTED, &j->flags);
-+
-+	journal_pin_new_entry(j, 1);
-+	bch2_journal_buf_init(j);
-+
-+	c->last_bucket_seq_cleanup = journal_cur_seq(j);
-+
-+	bch2_journal_space_available(j);
-+	spin_unlock(&j->lock);
-+
-+	return 0;
-+}
-+
-+/* init/exit: */
-+
-+void bch2_dev_journal_exit(struct bch_dev *ca)
-+{
-+	kfree(ca->journal.bio);
-+	kfree(ca->journal.buckets);
-+	kfree(ca->journal.bucket_seq);
-+
-+	ca->journal.bio		= NULL;
-+	ca->journal.buckets	= NULL;
-+	ca->journal.bucket_seq	= NULL;
-+}
-+
-+int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb)
-+{
-+	struct journal_device *ja = &ca->journal;
-+	struct bch_sb_field_journal *journal_buckets =
-+		bch2_sb_get_journal(sb);
-+	unsigned i;
-+
-+	ja->nr = bch2_nr_journal_buckets(journal_buckets);
-+
-+	ja->bucket_seq = kcalloc(ja->nr, sizeof(u64), GFP_KERNEL);
-+	if (!ja->bucket_seq)
-+		return -ENOMEM;
-+
-+	ca->journal.bio = bio_kmalloc(GFP_KERNEL,
-+			DIV_ROUND_UP(JOURNAL_ENTRY_SIZE_MAX, PAGE_SIZE));
-+	if (!ca->journal.bio)
-+		return -ENOMEM;
-+
-+	ja->buckets = kcalloc(ja->nr, sizeof(u64), GFP_KERNEL);
-+	if (!ja->buckets)
-+		return -ENOMEM;
-+
-+	for (i = 0; i < ja->nr; i++)
-+		ja->buckets[i] = le64_to_cpu(journal_buckets->buckets[i]);
-+
-+	return 0;
-+}
-+
-+void bch2_fs_journal_exit(struct journal *j)
-+{
-+	kvpfree(j->buf[1].data, j->buf[1].buf_size);
-+	kvpfree(j->buf[0].data, j->buf[0].buf_size);
-+	free_fifo(&j->pin);
-+}
-+
-+int bch2_fs_journal_init(struct journal *j)
-+{
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	static struct lock_class_key res_key;
-+	int ret = 0;
-+
-+	pr_verbose_init(c->opts, "");
-+
-+	spin_lock_init(&j->lock);
-+	spin_lock_init(&j->err_lock);
-+	init_waitqueue_head(&j->wait);
-+	INIT_DELAYED_WORK(&j->write_work, journal_write_work);
-+	INIT_DELAYED_WORK(&j->reclaim_work, bch2_journal_reclaim_work);
-+	init_waitqueue_head(&j->pin_flush_wait);
-+	mutex_init(&j->reclaim_lock);
-+	mutex_init(&j->discard_lock);
-+
-+	lockdep_init_map(&j->res_map, "journal res", &res_key, 0);
-+
-+	j->buf[0].buf_size	= JOURNAL_ENTRY_SIZE_MIN;
-+	j->buf[1].buf_size	= JOURNAL_ENTRY_SIZE_MIN;
-+	j->write_delay_ms	= 1000;
-+	j->reclaim_delay_ms	= 100;
-+
-+	/* Btree roots: */
-+	j->entry_u64s_reserved +=
-+		BTREE_ID_NR * (JSET_KEYS_U64s + BKEY_EXTENT_U64s_MAX);
-+
-+	atomic64_set(&j->reservations.counter,
-+		((union journal_res_state)
-+		 { .cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL }).v);
-+
-+	if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) ||
-+	    !(j->buf[0].data = kvpmalloc(j->buf[0].buf_size, GFP_KERNEL)) ||
-+	    !(j->buf[1].data = kvpmalloc(j->buf[1].buf_size, GFP_KERNEL))) {
-+		ret = -ENOMEM;
-+		goto out;
-+	}
-+
-+	j->pin.front = j->pin.back = 1;
-+out:
-+	pr_verbose_init(c->opts, "ret %i", ret);
-+	return ret;
-+}
-+
-+/* debug: */
-+
-+ssize_t bch2_journal_print_debug(struct journal *j, char *buf)
-+{
-+	struct printbuf out = _PBUF(buf, PAGE_SIZE);
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	union journal_res_state s;
-+	struct bch_dev *ca;
-+	unsigned iter;
-+
-+	rcu_read_lock();
-+	spin_lock(&j->lock);
-+	s = READ_ONCE(j->reservations);
-+
-+	pr_buf(&out,
-+	       "active journal entries:\t%llu\n"
-+	       "seq:\t\t\t%llu\n"
-+	       "last_seq:\t\t%llu\n"
-+	       "last_seq_ondisk:\t%llu\n"
-+	       "prereserved:\t\t%u/%u\n"
-+	       "current entry sectors:\t%u\n"
-+	       "current entry:\t\t",
-+	       fifo_used(&j->pin),
-+	       journal_cur_seq(j),
-+	       journal_last_seq(j),
-+	       j->last_seq_ondisk,
-+	       j->prereserved.reserved,
-+	       j->prereserved.remaining,
-+	       j->cur_entry_sectors);
-+
-+	switch (s.cur_entry_offset) {
-+	case JOURNAL_ENTRY_ERROR_VAL:
-+		pr_buf(&out, "error\n");
-+		break;
-+	case JOURNAL_ENTRY_CLOSED_VAL:
-+		pr_buf(&out, "closed\n");
-+		break;
-+	default:
-+		pr_buf(&out, "%u/%u\n",
-+		       s.cur_entry_offset,
-+		       j->cur_entry_u64s);
-+		break;
-+	}
-+
-+	pr_buf(&out,
-+	       "current entry refs:\t%u\n"
-+	       "prev entry unwritten:\t",
-+	       journal_state_count(s, s.idx));
-+
-+	if (s.prev_buf_unwritten)
-+		pr_buf(&out, "yes, ref %u sectors %u\n",
-+		       journal_state_count(s, !s.idx),
-+		       journal_prev_buf(j)->sectors);
-+	else
-+		pr_buf(&out, "no\n");
-+
-+	pr_buf(&out,
-+	       "need write:\t\t%i\n"
-+	       "replay done:\t\t%i\n",
-+	       test_bit(JOURNAL_NEED_WRITE,	&j->flags),
-+	       test_bit(JOURNAL_REPLAY_DONE,	&j->flags));
-+
-+	for_each_member_device_rcu(ca, c, iter,
-+				   &c->rw_devs[BCH_DATA_JOURNAL]) {
-+		struct journal_device *ja = &ca->journal;
-+
-+		if (!ja->nr)
-+			continue;
-+
-+		pr_buf(&out,
-+		       "dev %u:\n"
-+		       "\tnr\t\t%u\n"
-+		       "\tavailable\t%u:%u\n"
-+		       "\tdiscard_idx\t\t%u\n"
-+		       "\tdirty_idx_ondisk\t%u (seq %llu)\n"
-+		       "\tdirty_idx\t\t%u (seq %llu)\n"
-+		       "\tcur_idx\t\t%u (seq %llu)\n",
-+		       iter, ja->nr,
-+		       bch2_journal_dev_buckets_available(j, ja, journal_space_discarded),
-+		       ja->sectors_free,
-+		       ja->discard_idx,
-+		       ja->dirty_idx_ondisk,	ja->bucket_seq[ja->dirty_idx_ondisk],
-+		       ja->dirty_idx,		ja->bucket_seq[ja->dirty_idx],
-+		       ja->cur_idx,		ja->bucket_seq[ja->cur_idx]);
-+	}
-+
-+	spin_unlock(&j->lock);
-+	rcu_read_unlock();
-+
-+	return out.pos - buf;
-+}
-+
-+ssize_t bch2_journal_print_pins(struct journal *j, char *buf)
-+{
-+	struct printbuf out = _PBUF(buf, PAGE_SIZE);
-+	struct journal_entry_pin_list *pin_list;
-+	struct journal_entry_pin *pin;
-+	u64 i;
-+
-+	spin_lock(&j->lock);
-+	fifo_for_each_entry_ptr(pin_list, &j->pin, i) {
-+		pr_buf(&out, "%llu: count %u\n",
-+		       i, atomic_read(&pin_list->count));
-+
-+		list_for_each_entry(pin, &pin_list->list, list)
-+			pr_buf(&out, "\t%px %ps\n",
-+			       pin, pin->flush);
-+
-+		if (!list_empty(&pin_list->flushed))
-+			pr_buf(&out, "flushed:\n");
-+
-+		list_for_each_entry(pin, &pin_list->flushed, list)
-+			pr_buf(&out, "\t%px %ps\n",
-+			       pin, pin->flush);
-+	}
-+	spin_unlock(&j->lock);
-+
-+	return out.pos - buf;
-+}
-diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h
-new file mode 100644
-index 000000000000..30de6d96188e
---- /dev/null
-+++ b/fs/bcachefs/journal.h
-@@ -0,0 +1,519 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_JOURNAL_H
-+#define _BCACHEFS_JOURNAL_H
-+
-+/*
-+ * THE JOURNAL:
-+ *
-+ * The primary purpose of the journal is to log updates (insertions) to the
-+ * b-tree, to avoid having to do synchronous updates to the b-tree on disk.
-+ *
-+ * Without the journal, the b-tree is always internally consistent on
-+ * disk - and in fact, in the earliest incarnations bcache didn't have a journal
-+ * but did handle unclean shutdowns by doing all index updates synchronously
-+ * (with coalescing).
-+ *
-+ * Updates to interior nodes still happen synchronously and without the journal
-+ * (for simplicity) - this may change eventually but updates to interior nodes
-+ * are rare enough it's not a huge priority.
-+ *
-+ * This means the journal is relatively separate from the b-tree; it consists of
-+ * just a list of keys and journal replay consists of just redoing those
-+ * insertions in same order that they appear in the journal.
-+ *
-+ * PERSISTENCE:
-+ *
-+ * For synchronous updates (where we're waiting on the index update to hit
-+ * disk), the journal entry will be written out immediately (or as soon as
-+ * possible, if the write for the previous journal entry was still in flight).
-+ *
-+ * Synchronous updates are specified by passing a closure (@flush_cl) to
-+ * bch2_btree_insert() or bch_btree_insert_node(), which then pass that parameter
-+ * down to the journalling code. That closure will will wait on the journal
-+ * write to complete (via closure_wait()).
-+ *
-+ * If the index update wasn't synchronous, the journal entry will be
-+ * written out after 10 ms have elapsed, by default (the delay_ms field
-+ * in struct journal).
-+ *
-+ * JOURNAL ENTRIES:
-+ *
-+ * A journal entry is variable size (struct jset), it's got a fixed length
-+ * header and then a variable number of struct jset_entry entries.
-+ *
-+ * Journal entries are identified by monotonically increasing 64 bit sequence
-+ * numbers - jset->seq; other places in the code refer to this sequence number.
-+ *
-+ * A jset_entry entry contains one or more bkeys (which is what gets inserted
-+ * into the b-tree). We need a container to indicate which b-tree the key is
-+ * for; also, the roots of the various b-trees are stored in jset_entry entries
-+ * (one for each b-tree) - this lets us add new b-tree types without changing
-+ * the on disk format.
-+ *
-+ * We also keep some things in the journal header that are logically part of the
-+ * superblock - all the things that are frequently updated. This is for future
-+ * bcache on raw flash support; the superblock (which will become another
-+ * journal) can't be moved or wear leveled, so it contains just enough
-+ * information to find the main journal, and the superblock only has to be
-+ * rewritten when we want to move/wear level the main journal.
-+ *
-+ * JOURNAL LAYOUT ON DISK:
-+ *
-+ * The journal is written to a ringbuffer of buckets (which is kept in the
-+ * superblock); the individual buckets are not necessarily contiguous on disk
-+ * which means that journal entries are not allowed to span buckets, but also
-+ * that we can resize the journal at runtime if desired (unimplemented).
-+ *
-+ * The journal buckets exist in the same pool as all the other buckets that are
-+ * managed by the allocator and garbage collection - garbage collection marks
-+ * the journal buckets as metadata buckets.
-+ *
-+ * OPEN/DIRTY JOURNAL ENTRIES:
-+ *
-+ * Open/dirty journal entries are journal entries that contain b-tree updates
-+ * that have not yet been written out to the b-tree on disk. We have to track
-+ * which journal entries are dirty, and we also have to avoid wrapping around
-+ * the journal and overwriting old but still dirty journal entries with new
-+ * journal entries.
-+ *
-+ * On disk, this is represented with the "last_seq" field of struct jset;
-+ * last_seq is the first sequence number that journal replay has to replay.
-+ *
-+ * To avoid overwriting dirty journal entries on disk, we keep a mapping (in
-+ * journal_device->seq) of for each journal bucket, the highest sequence number
-+ * any journal entry it contains. Then, by comparing that against last_seq we
-+ * can determine whether that journal bucket contains dirty journal entries or
-+ * not.
-+ *
-+ * To track which journal entries are dirty, we maintain a fifo of refcounts
-+ * (where each entry corresponds to a specific sequence number) - when a ref
-+ * goes to 0, that journal entry is no longer dirty.
-+ *
-+ * Journalling of index updates is done at the same time as the b-tree itself is
-+ * being modified (see btree_insert_key()); when we add the key to the journal
-+ * the pending b-tree write takes a ref on the journal entry the key was added
-+ * to. If a pending b-tree write would need to take refs on multiple dirty
-+ * journal entries, it only keeps the ref on the oldest one (since a newer
-+ * journal entry will still be replayed if an older entry was dirty).
-+ *
-+ * JOURNAL FILLING UP:
-+ *
-+ * There are two ways the journal could fill up; either we could run out of
-+ * space to write to, or we could have too many open journal entries and run out
-+ * of room in the fifo of refcounts. Since those refcounts are decremented
-+ * without any locking we can't safely resize that fifo, so we handle it the
-+ * same way.
-+ *
-+ * If the journal fills up, we start flushing dirty btree nodes until we can
-+ * allocate space for a journal write again - preferentially flushing btree
-+ * nodes that are pinning the oldest journal entries first.
-+ */
-+
-+#include <linux/hash.h>
-+
-+#include "journal_types.h"
-+
-+struct bch_fs;
-+
-+static inline void journal_wake(struct journal *j)
-+{
-+	wake_up(&j->wait);
-+	closure_wake_up(&j->async_wait);
-+	closure_wake_up(&j->preres_wait);
-+}
-+
-+static inline struct journal_buf *journal_cur_buf(struct journal *j)
-+{
-+	return j->buf + j->reservations.idx;
-+}
-+
-+static inline struct journal_buf *journal_prev_buf(struct journal *j)
-+{
-+	return j->buf + !j->reservations.idx;
-+}
-+
-+/* Sequence number of oldest dirty journal entry */
-+
-+static inline u64 journal_last_seq(struct journal *j)
-+{
-+	return j->pin.front;
-+}
-+
-+static inline u64 journal_cur_seq(struct journal *j)
-+{
-+	BUG_ON(j->pin.back - 1 != atomic64_read(&j->seq));
-+
-+	return j->pin.back - 1;
-+}
-+
-+u64 bch2_inode_journal_seq(struct journal *, u64);
-+
-+static inline int journal_state_count(union journal_res_state s, int idx)
-+{
-+	return idx == 0 ? s.buf0_count : s.buf1_count;
-+}
-+
-+static inline void journal_state_inc(union journal_res_state *s)
-+{
-+	s->buf0_count += s->idx == 0;
-+	s->buf1_count += s->idx == 1;
-+}
-+
-+static inline void bch2_journal_set_has_inode(struct journal *j,
-+					      struct journal_res *res,
-+					      u64 inum)
-+{
-+	struct journal_buf *buf = &j->buf[res->idx];
-+	unsigned long bit = hash_64(inum, ilog2(sizeof(buf->has_inode) * 8));
-+
-+	/* avoid atomic op if possible */
-+	if (unlikely(!test_bit(bit, buf->has_inode)))
-+		set_bit(bit, buf->has_inode);
-+}
-+
-+/*
-+ * Amount of space that will be taken up by some keys in the journal (i.e.
-+ * including the jset header)
-+ */
-+static inline unsigned jset_u64s(unsigned u64s)
-+{
-+	return u64s + sizeof(struct jset_entry) / sizeof(u64);
-+}
-+
-+static inline int journal_entry_overhead(struct journal *j)
-+{
-+	return sizeof(struct jset) / sizeof(u64) + j->entry_u64s_reserved;
-+}
-+
-+static inline struct jset_entry *
-+bch2_journal_add_entry_noreservation(struct journal_buf *buf, size_t u64s)
-+{
-+	struct jset *jset = buf->data;
-+	struct jset_entry *entry = vstruct_idx(jset, le32_to_cpu(jset->u64s));
-+
-+	memset(entry, 0, sizeof(*entry));
-+	entry->u64s = cpu_to_le16(u64s);
-+
-+	le32_add_cpu(&jset->u64s, jset_u64s(u64s));
-+
-+	return entry;
-+}
-+
-+static inline struct jset_entry *
-+journal_res_entry(struct journal *j, struct journal_res *res)
-+{
-+	return vstruct_idx(j->buf[res->idx].data, res->offset);
-+}
-+
-+static inline unsigned journal_entry_set(struct jset_entry *entry, unsigned type,
-+					  enum btree_id id, unsigned level,
-+					  const void *data, unsigned u64s)
-+{
-+	memset(entry, 0, sizeof(*entry));
-+	entry->u64s	= cpu_to_le16(u64s);
-+	entry->type	= type;
-+	entry->btree_id = id;
-+	entry->level	= level;
-+	memcpy_u64s_small(entry->_data, data, u64s);
-+
-+	return jset_u64s(u64s);
-+}
-+
-+static inline void bch2_journal_add_entry(struct journal *j, struct journal_res *res,
-+					  unsigned type, enum btree_id id,
-+					  unsigned level,
-+					  const void *data, unsigned u64s)
-+{
-+	unsigned actual = journal_entry_set(journal_res_entry(j, res),
-+			       type, id, level, data, u64s);
-+
-+	EBUG_ON(!res->ref);
-+	EBUG_ON(actual > res->u64s);
-+
-+	res->offset	+= actual;
-+	res->u64s	-= actual;
-+}
-+
-+static inline void bch2_journal_add_keys(struct journal *j, struct journal_res *res,
-+					enum btree_id id, const struct bkey_i *k)
-+{
-+	bch2_journal_add_entry(j, res, BCH_JSET_ENTRY_btree_keys,
-+			       id, 0, k, k->k.u64s);
-+}
-+
-+static inline bool journal_entry_empty(struct jset *j)
-+{
-+	struct jset_entry *i;
-+
-+	if (j->seq != j->last_seq)
-+		return false;
-+
-+	vstruct_for_each(j, i)
-+		if (i->type == BCH_JSET_ENTRY_btree_keys && i->u64s)
-+			return false;
-+	return true;
-+}
-+
-+void __bch2_journal_buf_put(struct journal *, bool);
-+
-+static inline void bch2_journal_buf_put(struct journal *j, unsigned idx,
-+				       bool need_write_just_set)
-+{
-+	union journal_res_state s;
-+
-+	s.v = atomic64_sub_return(((union journal_res_state) {
-+				    .buf0_count = idx == 0,
-+				    .buf1_count = idx == 1,
-+				    }).v, &j->reservations.counter);
-+	if (!journal_state_count(s, idx)) {
-+		EBUG_ON(s.idx == idx || !s.prev_buf_unwritten);
-+		__bch2_journal_buf_put(j, need_write_just_set);
-+	}
-+}
-+
-+/*
-+ * This function releases the journal write structure so other threads can
-+ * then proceed to add their keys as well.
-+ */
-+static inline void bch2_journal_res_put(struct journal *j,
-+				       struct journal_res *res)
-+{
-+	if (!res->ref)
-+		return;
-+
-+	lock_release(&j->res_map, _THIS_IP_);
-+
-+	while (res->u64s)
-+		bch2_journal_add_entry(j, res,
-+				       BCH_JSET_ENTRY_btree_keys,
-+				       0, 0, NULL, 0);
-+
-+	bch2_journal_buf_put(j, res->idx, false);
-+
-+	res->ref = 0;
-+}
-+
-+int bch2_journal_res_get_slowpath(struct journal *, struct journal_res *,
-+				  unsigned);
-+
-+#define JOURNAL_RES_GET_NONBLOCK	(1 << 0)
-+#define JOURNAL_RES_GET_CHECK		(1 << 1)
-+#define JOURNAL_RES_GET_RESERVED	(1 << 2)
-+#define JOURNAL_RES_GET_RECLAIM		(1 << 3)
-+
-+static inline int journal_res_get_fast(struct journal *j,
-+				       struct journal_res *res,
-+				       unsigned flags)
-+{
-+	union journal_res_state old, new;
-+	u64 v = atomic64_read(&j->reservations.counter);
-+
-+	do {
-+		old.v = new.v = v;
-+
-+		/*
-+		 * Check if there is still room in the current journal
-+		 * entry:
-+		 */
-+		if (new.cur_entry_offset + res->u64s > j->cur_entry_u64s)
-+			return 0;
-+
-+		EBUG_ON(!journal_state_count(new, new.idx));
-+
-+		if (!(flags & JOURNAL_RES_GET_RESERVED) &&
-+		    !test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags))
-+			return 0;
-+
-+		if (flags & JOURNAL_RES_GET_CHECK)
-+			return 1;
-+
-+		new.cur_entry_offset += res->u64s;
-+		journal_state_inc(&new);
-+	} while ((v = atomic64_cmpxchg(&j->reservations.counter,
-+				       old.v, new.v)) != old.v);
-+
-+	res->ref	= true;
-+	res->idx	= old.idx;
-+	res->offset	= old.cur_entry_offset;
-+	res->seq	= le64_to_cpu(j->buf[old.idx].data->seq);
-+	return 1;
-+}
-+
-+static inline int bch2_journal_res_get(struct journal *j, struct journal_res *res,
-+				       unsigned u64s, unsigned flags)
-+{
-+	int ret;
-+
-+	EBUG_ON(res->ref);
-+	EBUG_ON(!test_bit(JOURNAL_STARTED, &j->flags));
-+
-+	res->u64s = u64s;
-+
-+	if (journal_res_get_fast(j, res, flags))
-+		goto out;
-+
-+	ret = bch2_journal_res_get_slowpath(j, res, flags);
-+	if (ret)
-+		return ret;
-+out:
-+	if (!(flags & JOURNAL_RES_GET_CHECK)) {
-+		lock_acquire_shared(&j->res_map, 0,
-+				    (flags & JOURNAL_RES_GET_NONBLOCK) != 0,
-+				    NULL, _THIS_IP_);
-+		EBUG_ON(!res->ref);
-+	}
-+	return 0;
-+}
-+
-+/* journal_preres: */
-+
-+static inline bool journal_check_may_get_unreserved(struct journal *j)
-+{
-+	union journal_preres_state s = READ_ONCE(j->prereserved);
-+	bool ret = s.reserved <= s.remaining &&
-+		fifo_free(&j->pin) > 8;
-+
-+	lockdep_assert_held(&j->lock);
-+
-+	if (ret != test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags)) {
-+		if (ret) {
-+			set_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags);
-+			journal_wake(j);
-+		} else {
-+			clear_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags);
-+		}
-+	}
-+	return ret;
-+}
-+
-+static inline void bch2_journal_preres_put(struct journal *j,
-+					   struct journal_preres *res)
-+{
-+	union journal_preres_state s = { .reserved = res->u64s };
-+
-+	if (!res->u64s)
-+		return;
-+
-+	s.v = atomic64_sub_return(s.v, &j->prereserved.counter);
-+	res->u64s = 0;
-+	closure_wake_up(&j->preres_wait);
-+
-+	if (s.reserved <= s.remaining &&
-+	    !test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags)) {
-+		spin_lock(&j->lock);
-+		journal_check_may_get_unreserved(j);
-+		spin_unlock(&j->lock);
-+	}
-+}
-+
-+int __bch2_journal_preres_get(struct journal *,
-+			struct journal_preres *, unsigned, unsigned);
-+
-+static inline int bch2_journal_preres_get_fast(struct journal *j,
-+					       struct journal_preres *res,
-+					       unsigned new_u64s,
-+					       unsigned flags)
-+{
-+	int d = new_u64s - res->u64s;
-+	union journal_preres_state old, new;
-+	u64 v = atomic64_read(&j->prereserved.counter);
-+
-+	do {
-+		old.v = new.v = v;
-+
-+		new.reserved += d;
-+
-+		/*
-+		 * If we're being called from the journal reclaim path, we have
-+		 * to unconditionally give out the pre-reservation, there's
-+		 * nothing else sensible we can do - otherwise we'd recurse back
-+		 * into the reclaim path and deadlock:
-+		 */
-+
-+		if (!(flags & JOURNAL_RES_GET_RECLAIM) &&
-+		    new.reserved > new.remaining)
-+			return 0;
-+	} while ((v = atomic64_cmpxchg(&j->prereserved.counter,
-+				       old.v, new.v)) != old.v);
-+
-+	res->u64s += d;
-+	return 1;
-+}
-+
-+static inline int bch2_journal_preres_get(struct journal *j,
-+					  struct journal_preres *res,
-+					  unsigned new_u64s,
-+					  unsigned flags)
-+{
-+	if (new_u64s <= res->u64s)
-+		return 0;
-+
-+	if (bch2_journal_preres_get_fast(j, res, new_u64s, flags))
-+		return 0;
-+
-+	if (flags & JOURNAL_RES_GET_NONBLOCK)
-+		return -EAGAIN;
-+
-+	return __bch2_journal_preres_get(j, res, new_u64s, flags);
-+}
-+
-+/* journal_entry_res: */
-+
-+void bch2_journal_entry_res_resize(struct journal *,
-+				   struct journal_entry_res *,
-+				   unsigned);
-+
-+u64 bch2_journal_last_unwritten_seq(struct journal *);
-+int bch2_journal_open_seq_async(struct journal *, u64, struct closure *);
-+
-+void bch2_journal_wait_on_seq(struct journal *, u64, struct closure *);
-+void bch2_journal_flush_seq_async(struct journal *, u64, struct closure *);
-+void bch2_journal_flush_async(struct journal *, struct closure *);
-+void bch2_journal_meta_async(struct journal *, struct closure *);
-+
-+int bch2_journal_flush_seq(struct journal *, u64);
-+int bch2_journal_flush(struct journal *);
-+int bch2_journal_meta(struct journal *);
-+
-+void bch2_journal_halt(struct journal *);
-+
-+static inline int bch2_journal_error(struct journal *j)
-+{
-+	return j->reservations.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL
-+		? -EIO : 0;
-+}
-+
-+struct bch_dev;
-+
-+static inline bool journal_flushes_device(struct bch_dev *ca)
-+{
-+	return true;
-+}
-+
-+static inline void bch2_journal_set_replay_done(struct journal *j)
-+{
-+	BUG_ON(!test_bit(JOURNAL_STARTED, &j->flags));
-+	set_bit(JOURNAL_REPLAY_DONE, &j->flags);
-+}
-+
-+void bch2_journal_unblock(struct journal *);
-+void bch2_journal_block(struct journal *);
-+
-+ssize_t bch2_journal_print_debug(struct journal *, char *);
-+ssize_t bch2_journal_print_pins(struct journal *, char *);
-+
-+int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *,
-+				unsigned nr);
-+int bch2_dev_journal_alloc(struct bch_dev *);
-+
-+void bch2_dev_journal_stop(struct journal *, struct bch_dev *);
-+
-+void bch2_fs_journal_stop(struct journal *);
-+int bch2_fs_journal_start(struct journal *, u64, struct list_head *);
-+
-+void bch2_dev_journal_exit(struct bch_dev *);
-+int bch2_dev_journal_init(struct bch_dev *, struct bch_sb *);
-+void bch2_fs_journal_exit(struct journal *);
-+int bch2_fs_journal_init(struct journal *);
-+
-+#endif /* _BCACHEFS_JOURNAL_H */
-diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
-new file mode 100644
-index 000000000000..c298c2b7721d
---- /dev/null
-+++ b/fs/bcachefs/journal_io.c
-@@ -0,0 +1,1150 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "alloc_foreground.h"
-+#include "btree_io.h"
-+#include "btree_update_interior.h"
-+#include "buckets.h"
-+#include "checksum.h"
-+#include "error.h"
-+#include "journal.h"
-+#include "journal_io.h"
-+#include "journal_reclaim.h"
-+#include "replicas.h"
-+
-+#include <trace/events/bcachefs.h>
-+
-+struct journal_list {
-+	struct closure		cl;
-+	struct mutex		lock;
-+	struct list_head	*head;
-+	int			ret;
-+};
-+
-+#define JOURNAL_ENTRY_ADD_OK		0
-+#define JOURNAL_ENTRY_ADD_OUT_OF_RANGE	5
-+
-+/*
-+ * Given a journal entry we just read, add it to the list of journal entries to
-+ * be replayed:
-+ */
-+static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca,
-+			     struct journal_list *jlist, struct jset *j)
-+{
-+	struct journal_replay *i, *pos;
-+	struct list_head *where;
-+	size_t bytes = vstruct_bytes(j);
-+	__le64 last_seq;
-+	int ret;
-+
-+	last_seq = !list_empty(jlist->head)
-+		? list_last_entry(jlist->head, struct journal_replay,
-+				  list)->j.last_seq
-+		: 0;
-+
-+	if (!c->opts.read_entire_journal) {
-+		/* Is this entry older than the range we need? */
-+		if (le64_to_cpu(j->seq) < le64_to_cpu(last_seq)) {
-+			ret = JOURNAL_ENTRY_ADD_OUT_OF_RANGE;
-+			goto out;
-+		}
-+
-+		/* Drop entries we don't need anymore */
-+		list_for_each_entry_safe(i, pos, jlist->head, list) {
-+			if (le64_to_cpu(i->j.seq) >= le64_to_cpu(j->last_seq))
-+				break;
-+			list_del(&i->list);
-+			kvpfree(i, offsetof(struct journal_replay, j) +
-+				vstruct_bytes(&i->j));
-+		}
-+	}
-+
-+	list_for_each_entry_reverse(i, jlist->head, list) {
-+		/* Duplicate? */
-+		if (le64_to_cpu(j->seq) == le64_to_cpu(i->j.seq)) {
-+			fsck_err_on(bytes != vstruct_bytes(&i->j) ||
-+				    memcmp(j, &i->j, bytes), c,
-+				    "found duplicate but non identical journal entries (seq %llu)",
-+				    le64_to_cpu(j->seq));
-+			goto found;
-+		}
-+
-+		if (le64_to_cpu(j->seq) > le64_to_cpu(i->j.seq)) {
-+			where = &i->list;
-+			goto add;
-+		}
-+	}
-+
-+	where = jlist->head;
-+add:
-+	i = kvpmalloc(offsetof(struct journal_replay, j) + bytes, GFP_KERNEL);
-+	if (!i) {
-+		ret = -ENOMEM;
-+		goto out;
-+	}
-+
-+	list_add(&i->list, where);
-+	i->devs.nr = 0;
-+	memcpy(&i->j, j, bytes);
-+found:
-+	if (!bch2_dev_list_has_dev(i->devs, ca->dev_idx))
-+		bch2_dev_list_add_dev(&i->devs, ca->dev_idx);
-+	else
-+		fsck_err_on(1, c, "duplicate journal entries on same device");
-+	ret = JOURNAL_ENTRY_ADD_OK;
-+out:
-+fsck_err:
-+	return ret;
-+}
-+
-+static struct nonce journal_nonce(const struct jset *jset)
-+{
-+	return (struct nonce) {{
-+		[0] = 0,
-+		[1] = ((__le32 *) &jset->seq)[0],
-+		[2] = ((__le32 *) &jset->seq)[1],
-+		[3] = BCH_NONCE_JOURNAL,
-+	}};
-+}
-+
-+/* this fills in a range with empty jset_entries: */
-+static void journal_entry_null_range(void *start, void *end)
-+{
-+	struct jset_entry *entry;
-+
-+	for (entry = start; entry != end; entry = vstruct_next(entry))
-+		memset(entry, 0, sizeof(*entry));
-+}
-+
-+#define JOURNAL_ENTRY_REREAD	5
-+#define JOURNAL_ENTRY_NONE	6
-+#define JOURNAL_ENTRY_BAD	7
-+
-+#define journal_entry_err(c, msg, ...)					\
-+({									\
-+	switch (write) {						\
-+	case READ:							\
-+		mustfix_fsck_err(c, msg, ##__VA_ARGS__);		\
-+		break;							\
-+	case WRITE:							\
-+		bch_err(c, "corrupt metadata before write:\n"		\
-+			msg, ##__VA_ARGS__);				\
-+		if (bch2_fs_inconsistent(c)) {				\
-+			ret = BCH_FSCK_ERRORS_NOT_FIXED;		\
-+			goto fsck_err;					\
-+		}							\
-+		break;							\
-+	}								\
-+	true;								\
-+})
-+
-+#define journal_entry_err_on(cond, c, msg, ...)				\
-+	((cond) ? journal_entry_err(c, msg, ##__VA_ARGS__) : false)
-+
-+static int journal_validate_key(struct bch_fs *c, struct jset *jset,
-+				struct jset_entry *entry,
-+				unsigned level, enum btree_id btree_id,
-+				struct bkey_i *k,
-+				const char *type, int write)
-+{
-+	void *next = vstruct_next(entry);
-+	const char *invalid;
-+	unsigned version = le32_to_cpu(jset->version);
-+	int ret = 0;
-+
-+	if (journal_entry_err_on(!k->k.u64s, c,
-+			"invalid %s in journal: k->u64s 0", type)) {
-+		entry->u64s = cpu_to_le16((u64 *) k - entry->_data);
-+		journal_entry_null_range(vstruct_next(entry), next);
-+		return 0;
-+	}
-+
-+	if (journal_entry_err_on((void *) bkey_next(k) >
-+				(void *) vstruct_next(entry), c,
-+			"invalid %s in journal: extends past end of journal entry",
-+			type)) {
-+		entry->u64s = cpu_to_le16((u64 *) k - entry->_data);
-+		journal_entry_null_range(vstruct_next(entry), next);
-+		return 0;
-+	}
-+
-+	if (journal_entry_err_on(k->k.format != KEY_FORMAT_CURRENT, c,
-+			"invalid %s in journal: bad format %u",
-+			type, k->k.format)) {
-+		le16_add_cpu(&entry->u64s, -k->k.u64s);
-+		memmove(k, bkey_next(k), next - (void *) bkey_next(k));
-+		journal_entry_null_range(vstruct_next(entry), next);
-+		return 0;
-+	}
-+
-+	if (!write)
-+		bch2_bkey_compat(level, btree_id, version,
-+			    JSET_BIG_ENDIAN(jset), write,
-+			    NULL, bkey_to_packed(k));
-+
-+	invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(k),
-+				    __btree_node_type(level, btree_id));
-+	if (invalid) {
-+		char buf[160];
-+
-+		bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(k));
-+		mustfix_fsck_err(c, "invalid %s in journal: %s\n%s",
-+				 type, invalid, buf);
-+
-+		le16_add_cpu(&entry->u64s, -k->k.u64s);
-+		memmove(k, bkey_next(k), next - (void *) bkey_next(k));
-+		journal_entry_null_range(vstruct_next(entry), next);
-+		return 0;
-+	}
-+
-+	if (write)
-+		bch2_bkey_compat(level, btree_id, version,
-+			    JSET_BIG_ENDIAN(jset), write,
-+			    NULL, bkey_to_packed(k));
-+fsck_err:
-+	return ret;
-+}
-+
-+static int journal_entry_validate_btree_keys(struct bch_fs *c,
-+					     struct jset *jset,
-+					     struct jset_entry *entry,
-+					     int write)
-+{
-+	struct bkey_i *k;
-+
-+	vstruct_for_each(entry, k) {
-+		int ret = journal_validate_key(c, jset, entry,
-+					       entry->level,
-+					       entry->btree_id,
-+					       k, "key", write);
-+		if (ret)
-+			return ret;
-+	}
-+
-+	return 0;
-+}
-+
-+static int journal_entry_validate_btree_root(struct bch_fs *c,
-+					     struct jset *jset,
-+					     struct jset_entry *entry,
-+					     int write)
-+{
-+	struct bkey_i *k = entry->start;
-+	int ret = 0;
-+
-+	if (journal_entry_err_on(!entry->u64s ||
-+				 le16_to_cpu(entry->u64s) != k->k.u64s, c,
-+				 "invalid btree root journal entry: wrong number of keys")) {
-+		void *next = vstruct_next(entry);
-+		/*
-+		 * we don't want to null out this jset_entry,
-+		 * just the contents, so that later we can tell
-+		 * we were _supposed_ to have a btree root
-+		 */
-+		entry->u64s = 0;
-+		journal_entry_null_range(vstruct_next(entry), next);
-+		return 0;
-+	}
-+
-+	return journal_validate_key(c, jset, entry, 1, entry->btree_id, k,
-+				    "btree root", write);
-+fsck_err:
-+	return ret;
-+}
-+
-+static int journal_entry_validate_prio_ptrs(struct bch_fs *c,
-+					    struct jset *jset,
-+					    struct jset_entry *entry,
-+					    int write)
-+{
-+	/* obsolete, don't care: */
-+	return 0;
-+}
-+
-+static int journal_entry_validate_blacklist(struct bch_fs *c,
-+					    struct jset *jset,
-+					    struct jset_entry *entry,
-+					    int write)
-+{
-+	int ret = 0;
-+
-+	if (journal_entry_err_on(le16_to_cpu(entry->u64s) != 1, c,
-+		"invalid journal seq blacklist entry: bad size")) {
-+		journal_entry_null_range(entry, vstruct_next(entry));
-+	}
-+fsck_err:
-+	return ret;
-+}
-+
-+static int journal_entry_validate_blacklist_v2(struct bch_fs *c,
-+					       struct jset *jset,
-+					       struct jset_entry *entry,
-+					       int write)
-+{
-+	struct jset_entry_blacklist_v2 *bl_entry;
-+	int ret = 0;
-+
-+	if (journal_entry_err_on(le16_to_cpu(entry->u64s) != 2, c,
-+		"invalid journal seq blacklist entry: bad size")) {
-+		journal_entry_null_range(entry, vstruct_next(entry));
-+		goto out;
-+	}
-+
-+	bl_entry = container_of(entry, struct jset_entry_blacklist_v2, entry);
-+
-+	if (journal_entry_err_on(le64_to_cpu(bl_entry->start) >
-+				 le64_to_cpu(bl_entry->end), c,
-+		"invalid journal seq blacklist entry: start > end")) {
-+		journal_entry_null_range(entry, vstruct_next(entry));
-+	}
-+out:
-+fsck_err:
-+	return ret;
-+}
-+
-+static int journal_entry_validate_usage(struct bch_fs *c,
-+					struct jset *jset,
-+					struct jset_entry *entry,
-+					int write)
-+{
-+	struct jset_entry_usage *u =
-+		container_of(entry, struct jset_entry_usage, entry);
-+	unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64);
-+	int ret = 0;
-+
-+	if (journal_entry_err_on(bytes < sizeof(*u),
-+				 c,
-+				 "invalid journal entry usage: bad size")) {
-+		journal_entry_null_range(entry, vstruct_next(entry));
-+		return ret;
-+	}
-+
-+fsck_err:
-+	return ret;
-+}
-+
-+static int journal_entry_validate_data_usage(struct bch_fs *c,
-+					struct jset *jset,
-+					struct jset_entry *entry,
-+					int write)
-+{
-+	struct jset_entry_data_usage *u =
-+		container_of(entry, struct jset_entry_data_usage, entry);
-+	unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64);
-+	int ret = 0;
-+
-+	if (journal_entry_err_on(bytes < sizeof(*u) ||
-+				 bytes < sizeof(*u) + u->r.nr_devs,
-+				 c,
-+				 "invalid journal entry usage: bad size")) {
-+		journal_entry_null_range(entry, vstruct_next(entry));
-+		return ret;
-+	}
-+
-+fsck_err:
-+	return ret;
-+}
-+
-+struct jset_entry_ops {
-+	int (*validate)(struct bch_fs *, struct jset *,
-+			struct jset_entry *, int);
-+};
-+
-+static const struct jset_entry_ops bch2_jset_entry_ops[] = {
-+#define x(f, nr)						\
-+	[BCH_JSET_ENTRY_##f]	= (struct jset_entry_ops) {	\
-+		.validate	= journal_entry_validate_##f,	\
-+	},
-+	BCH_JSET_ENTRY_TYPES()
-+#undef x
-+};
-+
-+static int journal_entry_validate(struct bch_fs *c, struct jset *jset,
-+				  struct jset_entry *entry, int write)
-+{
-+	return entry->type < BCH_JSET_ENTRY_NR
-+		? bch2_jset_entry_ops[entry->type].validate(c, jset,
-+							    entry, write)
-+		: 0;
-+}
-+
-+static int jset_validate_entries(struct bch_fs *c, struct jset *jset,
-+				 int write)
-+{
-+	struct jset_entry *entry;
-+	int ret = 0;
-+
-+	vstruct_for_each(jset, entry) {
-+		if (journal_entry_err_on(vstruct_next(entry) >
-+					 vstruct_last(jset), c,
-+				"journal entry extends past end of jset")) {
-+			jset->u64s = cpu_to_le32((u64 *) entry - jset->_data);
-+			break;
-+		}
-+
-+		ret = journal_entry_validate(c, jset, entry, write);
-+		if (ret)
-+			break;
-+	}
-+fsck_err:
-+	return ret;
-+}
-+
-+static int jset_validate(struct bch_fs *c,
-+			 struct jset *jset, u64 sector,
-+			 unsigned bucket_sectors_left,
-+			 unsigned sectors_read,
-+			 int write)
-+{
-+	size_t bytes = vstruct_bytes(jset);
-+	struct bch_csum csum;
-+	unsigned version;
-+	int ret = 0;
-+
-+	if (le64_to_cpu(jset->magic) != jset_magic(c))
-+		return JOURNAL_ENTRY_NONE;
-+
-+	version = le32_to_cpu(jset->version);
-+	if ((version != BCH_JSET_VERSION_OLD &&
-+	     version < bcachefs_metadata_version_min) ||
-+	    version >= bcachefs_metadata_version_max) {
-+		bch_err(c, "unknown journal entry version %u", jset->version);
-+		return BCH_FSCK_UNKNOWN_VERSION;
-+	}
-+
-+	if (journal_entry_err_on(bytes > bucket_sectors_left << 9, c,
-+				 "journal entry too big (%zu bytes), sector %lluu",
-+				 bytes, sector)) {
-+		/* XXX: note we might have missing journal entries */
-+		return JOURNAL_ENTRY_BAD;
-+	}
-+
-+	if (bytes > sectors_read << 9)
-+		return JOURNAL_ENTRY_REREAD;
-+
-+	if (fsck_err_on(!bch2_checksum_type_valid(c, JSET_CSUM_TYPE(jset)), c,
-+			"journal entry with unknown csum type %llu sector %lluu",
-+			JSET_CSUM_TYPE(jset), sector))
-+		return JOURNAL_ENTRY_BAD;
-+
-+	csum = csum_vstruct(c, JSET_CSUM_TYPE(jset), journal_nonce(jset), jset);
-+	if (journal_entry_err_on(bch2_crc_cmp(csum, jset->csum), c,
-+				 "journal checksum bad, sector %llu", sector)) {
-+		/* XXX: retry IO, when we start retrying checksum errors */
-+		/* XXX: note we might have missing journal entries */
-+		return JOURNAL_ENTRY_BAD;
-+	}
-+
-+	bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset),
-+		     jset->encrypted_start,
-+		     vstruct_end(jset) - (void *) jset->encrypted_start);
-+
-+	if (journal_entry_err_on(le64_to_cpu(jset->last_seq) > le64_to_cpu(jset->seq), c,
-+				 "invalid journal entry: last_seq > seq"))
-+		jset->last_seq = jset->seq;
-+
-+	return 0;
-+fsck_err:
-+	return ret;
-+}
-+
-+struct journal_read_buf {
-+	void		*data;
-+	size_t		size;
-+};
-+
-+static int journal_read_buf_realloc(struct journal_read_buf *b,
-+				    size_t new_size)
-+{
-+	void *n;
-+
-+	/* the bios are sized for this many pages, max: */
-+	if (new_size > JOURNAL_ENTRY_SIZE_MAX)
-+		return -ENOMEM;
-+
-+	new_size = roundup_pow_of_two(new_size);
-+	n = kvpmalloc(new_size, GFP_KERNEL);
-+	if (!n)
-+		return -ENOMEM;
-+
-+	kvpfree(b->data, b->size);
-+	b->data = n;
-+	b->size = new_size;
-+	return 0;
-+}
-+
-+static int journal_read_bucket(struct bch_dev *ca,
-+			       struct journal_read_buf *buf,
-+			       struct journal_list *jlist,
-+			       unsigned bucket)
-+{
-+	struct bch_fs *c = ca->fs;
-+	struct journal_device *ja = &ca->journal;
-+	struct jset *j = NULL;
-+	unsigned sectors, sectors_read = 0;
-+	u64 offset = bucket_to_sector(ca, ja->buckets[bucket]),
-+	    end = offset + ca->mi.bucket_size;
-+	bool saw_bad = false;
-+	int ret = 0;
-+
-+	pr_debug("reading %u", bucket);
-+
-+	while (offset < end) {
-+		if (!sectors_read) {
-+			struct bio *bio;
-+reread:
-+			sectors_read = min_t(unsigned,
-+				end - offset, buf->size >> 9);
-+
-+			bio = bio_kmalloc(GFP_KERNEL,
-+					  buf_pages(buf->data,
-+						    sectors_read << 9));
-+			bio_set_dev(bio, ca->disk_sb.bdev);
-+			bio->bi_iter.bi_sector	= offset;
-+			bio_set_op_attrs(bio, REQ_OP_READ, 0);
-+			bch2_bio_map(bio, buf->data, sectors_read << 9);
-+
-+			ret = submit_bio_wait(bio);
-+			bio_put(bio);
-+
-+			if (bch2_dev_io_err_on(ret, ca,
-+					       "journal read from sector %llu",
-+					       offset) ||
-+			    bch2_meta_read_fault("journal"))
-+				return -EIO;
-+
-+			j = buf->data;
-+		}
-+
-+		ret = jset_validate(c, j, offset,
-+				    end - offset, sectors_read,
-+				    READ);
-+		switch (ret) {
-+		case BCH_FSCK_OK:
-+			break;
-+		case JOURNAL_ENTRY_REREAD:
-+			if (vstruct_bytes(j) > buf->size) {
-+				ret = journal_read_buf_realloc(buf,
-+							vstruct_bytes(j));
-+				if (ret)
-+					return ret;
-+			}
-+			goto reread;
-+		case JOURNAL_ENTRY_NONE:
-+			if (!saw_bad)
-+				return 0;
-+			sectors = c->opts.block_size;
-+			goto next_block;
-+		case JOURNAL_ENTRY_BAD:
-+			saw_bad = true;
-+			sectors = c->opts.block_size;
-+			goto next_block;
-+		default:
-+			return ret;
-+		}
-+
-+		/*
-+		 * This happens sometimes if we don't have discards on -
-+		 * when we've partially overwritten a bucket with new
-+		 * journal entries. We don't need the rest of the
-+		 * bucket:
-+		 */
-+		if (le64_to_cpu(j->seq) < ja->bucket_seq[bucket])
-+			return 0;
-+
-+		ja->bucket_seq[bucket] = le64_to_cpu(j->seq);
-+
-+		mutex_lock(&jlist->lock);
-+		ret = journal_entry_add(c, ca, jlist, j);
-+		mutex_unlock(&jlist->lock);
-+
-+		switch (ret) {
-+		case JOURNAL_ENTRY_ADD_OK:
-+			break;
-+		case JOURNAL_ENTRY_ADD_OUT_OF_RANGE:
-+			break;
-+		default:
-+			return ret;
-+		}
-+
-+		sectors = vstruct_sectors(j, c->block_bits);
-+next_block:
-+		pr_debug("next");
-+		offset		+= sectors;
-+		sectors_read	-= sectors;
-+		j = ((void *) j) + (sectors << 9);
-+	}
-+
-+	return 0;
-+}
-+
-+static void bch2_journal_read_device(struct closure *cl)
-+{
-+	struct journal_device *ja =
-+		container_of(cl, struct journal_device, read);
-+	struct bch_dev *ca = container_of(ja, struct bch_dev, journal);
-+	struct journal_list *jlist =
-+		container_of(cl->parent, struct journal_list, cl);
-+	struct journal_read_buf buf = { NULL, 0 };
-+	u64 min_seq = U64_MAX;
-+	unsigned i;
-+	int ret;
-+
-+	if (!ja->nr)
-+		goto out;
-+
-+	ret = journal_read_buf_realloc(&buf, PAGE_SIZE);
-+	if (ret)
-+		goto err;
-+
-+	pr_debug("%u journal buckets", ja->nr);
-+
-+	for (i = 0; i < ja->nr; i++) {
-+		ret = journal_read_bucket(ca, &buf, jlist, i);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	/* Find the journal bucket with the highest sequence number: */
-+	for (i = 0; i < ja->nr; i++) {
-+		if (ja->bucket_seq[i] > ja->bucket_seq[ja->cur_idx])
-+			ja->cur_idx = i;
-+
-+		min_seq = min(ja->bucket_seq[i], min_seq);
-+	}
-+
-+	/*
-+	 * If there's duplicate journal entries in multiple buckets (which
-+	 * definitely isn't supposed to happen, but...) - make sure to start
-+	 * cur_idx at the last of those buckets, so we don't deadlock trying to
-+	 * allocate
-+	 */
-+	while (ja->bucket_seq[ja->cur_idx] > min_seq &&
-+	       ja->bucket_seq[ja->cur_idx] >
-+	       ja->bucket_seq[(ja->cur_idx + 1) % ja->nr])
-+		ja->cur_idx = (ja->cur_idx + 1) % ja->nr;
-+
-+	ja->sectors_free = 0;
-+
-+	/*
-+	 * Set dirty_idx to indicate the entire journal is full and needs to be
-+	 * reclaimed - journal reclaim will immediately reclaim whatever isn't
-+	 * pinned when it first runs:
-+	 */
-+	ja->discard_idx = ja->dirty_idx_ondisk =
-+		ja->dirty_idx = (ja->cur_idx + 1) % ja->nr;
-+out:
-+	kvpfree(buf.data, buf.size);
-+	percpu_ref_put(&ca->io_ref);
-+	closure_return(cl);
-+	return;
-+err:
-+	mutex_lock(&jlist->lock);
-+	jlist->ret = ret;
-+	mutex_unlock(&jlist->lock);
-+	goto out;
-+}
-+
-+int bch2_journal_read(struct bch_fs *c, struct list_head *list)
-+{
-+	struct journal_list jlist;
-+	struct journal_replay *i;
-+	struct bch_dev *ca;
-+	unsigned iter;
-+	size_t keys = 0, entries = 0;
-+	bool degraded = false;
-+	int ret = 0;
-+
-+	closure_init_stack(&jlist.cl);
-+	mutex_init(&jlist.lock);
-+	jlist.head = list;
-+	jlist.ret = 0;
-+
-+	for_each_member_device(ca, c, iter) {
-+		if (!test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) &&
-+		    !(bch2_dev_has_data(c, ca) & (1 << BCH_DATA_JOURNAL)))
-+			continue;
-+
-+		if ((ca->mi.state == BCH_MEMBER_STATE_RW ||
-+		     ca->mi.state == BCH_MEMBER_STATE_RO) &&
-+		    percpu_ref_tryget(&ca->io_ref))
-+			closure_call(&ca->journal.read,
-+				     bch2_journal_read_device,
-+				     system_unbound_wq,
-+				     &jlist.cl);
-+		else
-+			degraded = true;
-+	}
-+
-+	closure_sync(&jlist.cl);
-+
-+	if (jlist.ret)
-+		return jlist.ret;
-+
-+	list_for_each_entry(i, list, list) {
-+		struct jset_entry *entry;
-+		struct bkey_i *k, *_n;
-+		struct bch_replicas_padded replicas;
-+		char buf[80];
-+
-+		ret = jset_validate_entries(c, &i->j, READ);
-+		if (ret)
-+			goto fsck_err;
-+
-+		/*
-+		 * If we're mounting in degraded mode - if we didn't read all
-+		 * the devices - this is wrong:
-+		 */
-+
-+		bch2_devlist_to_replicas(&replicas.e, BCH_DATA_JOURNAL, i->devs);
-+
-+		if (!degraded &&
-+		    (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
-+		     fsck_err_on(!bch2_replicas_marked(c, &replicas.e, false), c,
-+				 "superblock not marked as containing replicas %s",
-+				 (bch2_replicas_entry_to_text(&PBUF(buf),
-+							      &replicas.e), buf)))) {
-+			ret = bch2_mark_replicas(c, &replicas.e);
-+			if (ret)
-+				return ret;
-+		}
-+
-+		for_each_jset_key(k, _n, entry, &i->j)
-+			keys++;
-+		entries++;
-+	}
-+
-+	if (!list_empty(list)) {
-+		i = list_last_entry(list, struct journal_replay, list);
-+
-+		bch_info(c, "journal read done, %zu keys in %zu entries, seq %llu",
-+			 keys, entries, le64_to_cpu(i->j.seq));
-+	}
-+fsck_err:
-+	return ret;
-+}
-+
-+/* journal write: */
-+
-+static void __journal_write_alloc(struct journal *j,
-+				  struct journal_buf *w,
-+				  struct dev_alloc_list *devs_sorted,
-+				  unsigned sectors,
-+				  unsigned *replicas,
-+				  unsigned replicas_want)
-+{
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	struct journal_device *ja;
-+	struct bch_dev *ca;
-+	unsigned i;
-+
-+	if (*replicas >= replicas_want)
-+		return;
-+
-+	for (i = 0; i < devs_sorted->nr; i++) {
-+		ca = rcu_dereference(c->devs[devs_sorted->devs[i]]);
-+		if (!ca)
-+			continue;
-+
-+		ja = &ca->journal;
-+
-+		/*
-+		 * Check that we can use this device, and aren't already using
-+		 * it:
-+		 */
-+		if (!ca->mi.durability ||
-+		    ca->mi.state != BCH_MEMBER_STATE_RW ||
-+		    !ja->nr ||
-+		    bch2_bkey_has_device(bkey_i_to_s_c(&w->key),
-+					 ca->dev_idx) ||
-+		    sectors > ja->sectors_free)
-+			continue;
-+
-+		bch2_dev_stripe_increment(c, ca, &j->wp.stripe);
-+
-+		bch2_bkey_append_ptr(&w->key,
-+			(struct bch_extent_ptr) {
-+				  .offset = bucket_to_sector(ca,
-+					ja->buckets[ja->cur_idx]) +
-+					ca->mi.bucket_size -
-+					ja->sectors_free,
-+				  .dev = ca->dev_idx,
-+		});
-+
-+		ja->sectors_free -= sectors;
-+		ja->bucket_seq[ja->cur_idx] = le64_to_cpu(w->data->seq);
-+
-+		*replicas += ca->mi.durability;
-+
-+		if (*replicas >= replicas_want)
-+			break;
-+	}
-+}
-+
-+/**
-+ * journal_next_bucket - move on to the next journal bucket if possible
-+ */
-+static int journal_write_alloc(struct journal *j, struct journal_buf *w,
-+			       unsigned sectors)
-+{
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	struct journal_device *ja;
-+	struct bch_dev *ca;
-+	struct dev_alloc_list devs_sorted;
-+	unsigned i, replicas = 0, replicas_want =
-+		READ_ONCE(c->opts.metadata_replicas);
-+
-+	rcu_read_lock();
-+
-+	devs_sorted = bch2_dev_alloc_list(c, &j->wp.stripe,
-+					  &c->rw_devs[BCH_DATA_JOURNAL]);
-+
-+	__journal_write_alloc(j, w, &devs_sorted,
-+			      sectors, &replicas, replicas_want);
-+
-+	if (replicas >= replicas_want)
-+		goto done;
-+
-+	for (i = 0; i < devs_sorted.nr; i++) {
-+		ca = rcu_dereference(c->devs[devs_sorted.devs[i]]);
-+		if (!ca)
-+			continue;
-+
-+		ja = &ca->journal;
-+
-+		if (sectors > ja->sectors_free &&
-+		    sectors <= ca->mi.bucket_size &&
-+		    bch2_journal_dev_buckets_available(j, ja,
-+					journal_space_discarded)) {
-+			ja->cur_idx = (ja->cur_idx + 1) % ja->nr;
-+			ja->sectors_free = ca->mi.bucket_size;
-+
-+			/*
-+			 * ja->bucket_seq[ja->cur_idx] must always have
-+			 * something sensible:
-+			 */
-+			ja->bucket_seq[ja->cur_idx] = le64_to_cpu(w->data->seq);
-+		}
-+	}
-+
-+	__journal_write_alloc(j, w, &devs_sorted,
-+			      sectors, &replicas, replicas_want);
-+done:
-+	rcu_read_unlock();
-+
-+	return replicas >= c->opts.metadata_replicas_required ? 0 : -EROFS;
-+}
-+
-+static void journal_write_compact(struct jset *jset)
-+{
-+	struct jset_entry *i, *next, *prev = NULL;
-+
-+	/*
-+	 * Simple compaction, dropping empty jset_entries (from journal
-+	 * reservations that weren't fully used) and merging jset_entries that
-+	 * can be.
-+	 *
-+	 * If we wanted to be really fancy here, we could sort all the keys in
-+	 * the jset and drop keys that were overwritten - probably not worth it:
-+	 */
-+	vstruct_for_each_safe(jset, i, next) {
-+		unsigned u64s = le16_to_cpu(i->u64s);
-+
-+		/* Empty entry: */
-+		if (!u64s)
-+			continue;
-+
-+		/* Can we merge with previous entry? */
-+		if (prev &&
-+		    i->btree_id == prev->btree_id &&
-+		    i->level	== prev->level &&
-+		    i->type	== prev->type &&
-+		    i->type	== BCH_JSET_ENTRY_btree_keys &&
-+		    le16_to_cpu(prev->u64s) + u64s <= U16_MAX) {
-+			memmove_u64s_down(vstruct_next(prev),
-+					  i->_data,
-+					  u64s);
-+			le16_add_cpu(&prev->u64s, u64s);
-+			continue;
-+		}
-+
-+		/* Couldn't merge, move i into new position (after prev): */
-+		prev = prev ? vstruct_next(prev) : jset->start;
-+		if (i != prev)
-+			memmove_u64s_down(prev, i, jset_u64s(u64s));
-+	}
-+
-+	prev = prev ? vstruct_next(prev) : jset->start;
-+	jset->u64s = cpu_to_le32((u64 *) prev - jset->_data);
-+}
-+
-+static void journal_buf_realloc(struct journal *j, struct journal_buf *buf)
-+{
-+	/* we aren't holding j->lock: */
-+	unsigned new_size = READ_ONCE(j->buf_size_want);
-+	void *new_buf;
-+
-+	if (buf->buf_size >= new_size)
-+		return;
-+
-+	new_buf = kvpmalloc(new_size, GFP_NOIO|__GFP_NOWARN);
-+	if (!new_buf)
-+		return;
-+
-+	memcpy(new_buf, buf->data, buf->buf_size);
-+	kvpfree(buf->data, buf->buf_size);
-+	buf->data	= new_buf;
-+	buf->buf_size	= new_size;
-+}
-+
-+static void journal_write_done(struct closure *cl)
-+{
-+	struct journal *j = container_of(cl, struct journal, io);
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	struct journal_buf *w = journal_prev_buf(j);
-+	struct bch_devs_list devs =
-+		bch2_bkey_devs(bkey_i_to_s_c(&w->key));
-+	struct bch_replicas_padded replicas;
-+	u64 seq = le64_to_cpu(w->data->seq);
-+	u64 last_seq = le64_to_cpu(w->data->last_seq);
-+
-+	bch2_time_stats_update(j->write_time, j->write_start_time);
-+
-+	if (!devs.nr) {
-+		bch_err(c, "unable to write journal to sufficient devices");
-+		goto err;
-+	}
-+
-+	bch2_devlist_to_replicas(&replicas.e, BCH_DATA_JOURNAL, devs);
-+
-+	if (bch2_mark_replicas(c, &replicas.e))
-+		goto err;
-+
-+	spin_lock(&j->lock);
-+	if (seq >= j->pin.front)
-+		journal_seq_pin(j, seq)->devs = devs;
-+
-+	j->seq_ondisk		= seq;
-+	j->last_seq_ondisk	= last_seq;
-+	bch2_journal_space_available(j);
-+
-+	/*
-+	 * Updating last_seq_ondisk may let bch2_journal_reclaim_work() discard
-+	 * more buckets:
-+	 *
-+	 * Must come before signaling write completion, for
-+	 * bch2_fs_journal_stop():
-+	 */
-+	mod_delayed_work(c->journal_reclaim_wq, &j->reclaim_work, 0);
-+out:
-+	/* also must come before signalling write completion: */
-+	closure_debug_destroy(cl);
-+
-+	BUG_ON(!j->reservations.prev_buf_unwritten);
-+	atomic64_sub(((union journal_res_state) { .prev_buf_unwritten = 1 }).v,
-+		     &j->reservations.counter);
-+
-+	closure_wake_up(&w->wait);
-+	journal_wake(j);
-+
-+	if (test_bit(JOURNAL_NEED_WRITE, &j->flags))
-+		mod_delayed_work(system_freezable_wq, &j->write_work, 0);
-+	spin_unlock(&j->lock);
-+	return;
-+err:
-+	bch2_fatal_error(c);
-+	spin_lock(&j->lock);
-+	goto out;
-+}
-+
-+static void journal_write_endio(struct bio *bio)
-+{
-+	struct bch_dev *ca = bio->bi_private;
-+	struct journal *j = &ca->fs->journal;
-+
-+	if (bch2_dev_io_err_on(bio->bi_status, ca, "journal write: %s",
-+			       blk_status_to_str(bio->bi_status)) ||
-+	    bch2_meta_write_fault("journal")) {
-+		struct journal_buf *w = journal_prev_buf(j);
-+		unsigned long flags;
-+
-+		spin_lock_irqsave(&j->err_lock, flags);
-+		bch2_bkey_drop_device(bkey_i_to_s(&w->key), ca->dev_idx);
-+		spin_unlock_irqrestore(&j->err_lock, flags);
-+	}
-+
-+	closure_put(&j->io);
-+	percpu_ref_put(&ca->io_ref);
-+}
-+
-+void bch2_journal_write(struct closure *cl)
-+{
-+	struct journal *j = container_of(cl, struct journal, io);
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	struct bch_dev *ca;
-+	struct journal_buf *w = journal_prev_buf(j);
-+	struct jset_entry *start, *end;
-+	struct jset *jset;
-+	struct bio *bio;
-+	struct bch_extent_ptr *ptr;
-+	bool validate_before_checksum = false;
-+	unsigned i, sectors, bytes, u64s;
-+	int ret;
-+
-+	bch2_journal_pin_put(j, le64_to_cpu(w->data->seq));
-+
-+	journal_buf_realloc(j, w);
-+	jset = w->data;
-+
-+	j->write_start_time = local_clock();
-+
-+	/*
-+	 * New btree roots are set by journalling them; when the journal entry
-+	 * gets written we have to propagate them to c->btree_roots
-+	 *
-+	 * But, every journal entry we write has to contain all the btree roots
-+	 * (at least for now); so after we copy btree roots to c->btree_roots we
-+	 * have to get any missing btree roots and add them to this journal
-+	 * entry:
-+	 */
-+
-+	bch2_journal_entries_to_btree_roots(c, jset);
-+
-+	start = end = vstruct_last(jset);
-+
-+	end	= bch2_btree_roots_to_journal_entries(c, jset->start, end);
-+
-+	end	= bch2_journal_super_entries_add_common(c, end,
-+						le64_to_cpu(jset->seq));
-+	u64s	= (u64 *) end - (u64 *) start;
-+	BUG_ON(u64s > j->entry_u64s_reserved);
-+
-+	le32_add_cpu(&jset->u64s, u64s);
-+	BUG_ON(vstruct_sectors(jset, c->block_bits) > w->sectors);
-+
-+	journal_write_compact(jset);
-+
-+	jset->read_clock	= cpu_to_le16(c->bucket_clock[READ].hand);
-+	jset->write_clock	= cpu_to_le16(c->bucket_clock[WRITE].hand);
-+	jset->magic		= cpu_to_le64(jset_magic(c));
-+
-+	jset->version		= c->sb.version < bcachefs_metadata_version_new_versioning
-+		? cpu_to_le32(BCH_JSET_VERSION_OLD)
-+		: cpu_to_le32(c->sb.version);
-+
-+	SET_JSET_BIG_ENDIAN(jset, CPU_BIG_ENDIAN);
-+	SET_JSET_CSUM_TYPE(jset, bch2_meta_checksum_type(c));
-+
-+	if (bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset)))
-+		validate_before_checksum = true;
-+
-+	if (le32_to_cpu(jset->version) < bcachefs_metadata_version_max)
-+		validate_before_checksum = true;
-+
-+	if (validate_before_checksum &&
-+	    jset_validate_entries(c, jset, WRITE))
-+		goto err;
-+
-+	bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset),
-+		    jset->encrypted_start,
-+		    vstruct_end(jset) - (void *) jset->encrypted_start);
-+
-+	jset->csum = csum_vstruct(c, JSET_CSUM_TYPE(jset),
-+				  journal_nonce(jset), jset);
-+
-+	if (!validate_before_checksum &&
-+	    jset_validate_entries(c, jset, WRITE))
-+		goto err;
-+
-+	sectors = vstruct_sectors(jset, c->block_bits);
-+	BUG_ON(sectors > w->sectors);
-+
-+	bytes = vstruct_bytes(jset);
-+	memset((void *) jset + bytes, 0, (sectors << 9) - bytes);
-+
-+retry_alloc:
-+	spin_lock(&j->lock);
-+	ret = journal_write_alloc(j, w, sectors);
-+
-+	if (ret && j->can_discard) {
-+		spin_unlock(&j->lock);
-+		bch2_journal_do_discards(j);
-+		goto retry_alloc;
-+	}
-+
-+	/*
-+	 * write is allocated, no longer need to account for it in
-+	 * bch2_journal_space_available():
-+	 */
-+	w->sectors = 0;
-+
-+	/*
-+	 * journal entry has been compacted and allocated, recalculate space
-+	 * available:
-+	 */
-+	bch2_journal_space_available(j);
-+	spin_unlock(&j->lock);
-+
-+	if (ret) {
-+		bch_err(c, "Unable to allocate journal write");
-+		bch2_fatal_error(c);
-+		continue_at(cl, journal_write_done, system_highpri_wq);
-+		return;
-+	}
-+
-+	/*
-+	 * XXX: we really should just disable the entire journal in nochanges
-+	 * mode
-+	 */
-+	if (c->opts.nochanges)
-+		goto no_io;
-+
-+	extent_for_each_ptr(bkey_i_to_s_extent(&w->key), ptr) {
-+		ca = bch_dev_bkey_exists(c, ptr->dev);
-+		if (!percpu_ref_tryget(&ca->io_ref)) {
-+			/* XXX: fix this */
-+			bch_err(c, "missing device for journal write\n");
-+			continue;
-+		}
-+
-+		this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_JOURNAL],
-+			     sectors);
-+
-+		bio = ca->journal.bio;
-+		bio_reset(bio);
-+		bio_set_dev(bio, ca->disk_sb.bdev);
-+		bio->bi_iter.bi_sector	= ptr->offset;
-+		bio->bi_end_io		= journal_write_endio;
-+		bio->bi_private		= ca;
-+		bio_set_op_attrs(bio, REQ_OP_WRITE,
-+				 REQ_SYNC|REQ_META|REQ_PREFLUSH|REQ_FUA);
-+		bch2_bio_map(bio, jset, sectors << 9);
-+
-+		trace_journal_write(bio);
-+		closure_bio_submit(bio, cl);
-+
-+		ca->journal.bucket_seq[ca->journal.cur_idx] = le64_to_cpu(jset->seq);
-+	}
-+
-+	for_each_rw_member(ca, c, i)
-+		if (journal_flushes_device(ca) &&
-+		    !bch2_bkey_has_device(bkey_i_to_s_c(&w->key), i)) {
-+			percpu_ref_get(&ca->io_ref);
-+
-+			bio = ca->journal.bio;
-+			bio_reset(bio);
-+			bio_set_dev(bio, ca->disk_sb.bdev);
-+			bio->bi_opf		= REQ_OP_FLUSH;
-+			bio->bi_end_io		= journal_write_endio;
-+			bio->bi_private		= ca;
-+			closure_bio_submit(bio, cl);
-+		}
-+
-+no_io:
-+	bch2_bucket_seq_cleanup(c);
-+
-+	continue_at(cl, journal_write_done, system_highpri_wq);
-+	return;
-+err:
-+	bch2_inconsistent_error(c);
-+	continue_at(cl, journal_write_done, system_highpri_wq);
-+}
-diff --git a/fs/bcachefs/journal_io.h b/fs/bcachefs/journal_io.h
-new file mode 100644
-index 000000000000..72e575f360af
---- /dev/null
-+++ b/fs/bcachefs/journal_io.h
-@@ -0,0 +1,42 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_JOURNAL_IO_H
-+#define _BCACHEFS_JOURNAL_IO_H
-+
-+/*
-+ * Only used for holding the journal entries we read in btree_journal_read()
-+ * during cache_registration
-+ */
-+struct journal_replay {
-+	struct list_head	list;
-+	struct bch_devs_list	devs;
-+	/* must be last: */
-+	struct jset		j;
-+};
-+
-+static inline struct jset_entry *__jset_entry_type_next(struct jset *jset,
-+					struct jset_entry *entry, unsigned type)
-+{
-+	while (entry < vstruct_last(jset)) {
-+		if (entry->type == type)
-+			return entry;
-+
-+		entry = vstruct_next(entry);
-+	}
-+
-+	return NULL;
-+}
-+
-+#define for_each_jset_entry_type(entry, jset, type)			\
-+	for (entry = (jset)->start;					\
-+	     (entry = __jset_entry_type_next(jset, entry, type));	\
-+	     entry = vstruct_next(entry))
-+
-+#define for_each_jset_key(k, _n, entry, jset)				\
-+	for_each_jset_entry_type(entry, jset, BCH_JSET_ENTRY_btree_keys)	\
-+		vstruct_for_each_safe(entry, k, _n)
-+
-+int bch2_journal_read(struct bch_fs *, struct list_head *);
-+
-+void bch2_journal_write(struct closure *);
-+
-+#endif /* _BCACHEFS_JOURNAL_IO_H */
-diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c
-new file mode 100644
-index 000000000000..4811ab9f879e
---- /dev/null
-+++ b/fs/bcachefs/journal_reclaim.c
-@@ -0,0 +1,644 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "journal.h"
-+#include "journal_io.h"
-+#include "journal_reclaim.h"
-+#include "replicas.h"
-+#include "super.h"
-+
-+/* Free space calculations: */
-+
-+static unsigned journal_space_from(struct journal_device *ja,
-+				   enum journal_space_from from)
-+{
-+	switch (from) {
-+	case journal_space_discarded:
-+		return ja->discard_idx;
-+	case journal_space_clean_ondisk:
-+		return ja->dirty_idx_ondisk;
-+	case journal_space_clean:
-+		return ja->dirty_idx;
-+	default:
-+		BUG();
-+	}
-+}
-+
-+unsigned bch2_journal_dev_buckets_available(struct journal *j,
-+					    struct journal_device *ja,
-+					    enum journal_space_from from)
-+{
-+	unsigned available = (journal_space_from(ja, from) -
-+			      ja->cur_idx - 1 + ja->nr) % ja->nr;
-+
-+	/*
-+	 * Don't use the last bucket unless writing the new last_seq
-+	 * will make another bucket available:
-+	 */
-+	if (available && ja->dirty_idx_ondisk == ja->dirty_idx)
-+		--available;
-+
-+	return available;
-+}
-+
-+static void journal_set_remaining(struct journal *j, unsigned u64s_remaining)
-+{
-+	union journal_preres_state old, new;
-+	u64 v = atomic64_read(&j->prereserved.counter);
-+
-+	do {
-+		old.v = new.v = v;
-+		new.remaining = u64s_remaining;
-+	} while ((v = atomic64_cmpxchg(&j->prereserved.counter,
-+				       old.v, new.v)) != old.v);
-+}
-+
-+static struct journal_space {
-+	unsigned	next_entry;
-+	unsigned	remaining;
-+} __journal_space_available(struct journal *j, unsigned nr_devs_want,
-+			    enum journal_space_from from)
-+{
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	struct bch_dev *ca;
-+	unsigned sectors_next_entry	= UINT_MAX;
-+	unsigned sectors_total		= UINT_MAX;
-+	unsigned i, nr_devs = 0;
-+	unsigned unwritten_sectors = j->reservations.prev_buf_unwritten
-+		? journal_prev_buf(j)->sectors
-+		: 0;
-+
-+	rcu_read_lock();
-+	for_each_member_device_rcu(ca, c, i,
-+				   &c->rw_devs[BCH_DATA_JOURNAL]) {
-+		struct journal_device *ja = &ca->journal;
-+		unsigned buckets_this_device, sectors_this_device;
-+
-+		if (!ja->nr)
-+			continue;
-+
-+		buckets_this_device = bch2_journal_dev_buckets_available(j, ja, from);
-+		sectors_this_device = ja->sectors_free;
-+
-+		/*
-+		 * We that we don't allocate the space for a journal entry
-+		 * until we write it out - thus, account for it here:
-+		 */
-+		if (unwritten_sectors >= sectors_this_device) {
-+			if (!buckets_this_device)
-+				continue;
-+
-+			buckets_this_device--;
-+			sectors_this_device = ca->mi.bucket_size;
-+		}
-+
-+		sectors_this_device -= unwritten_sectors;
-+
-+		if (sectors_this_device < ca->mi.bucket_size &&
-+		    buckets_this_device) {
-+			buckets_this_device--;
-+			sectors_this_device = ca->mi.bucket_size;
-+		}
-+
-+		if (!sectors_this_device)
-+			continue;
-+
-+		sectors_next_entry = min(sectors_next_entry,
-+					 sectors_this_device);
-+
-+		sectors_total = min(sectors_total,
-+			buckets_this_device * ca->mi.bucket_size +
-+			sectors_this_device);
-+
-+		nr_devs++;
-+	}
-+	rcu_read_unlock();
-+
-+	if (nr_devs < nr_devs_want)
-+		return (struct journal_space) { 0, 0 };
-+
-+	return (struct journal_space) {
-+		.next_entry	= sectors_next_entry,
-+		.remaining	= max_t(int, 0, sectors_total - sectors_next_entry),
-+	};
-+}
-+
-+void bch2_journal_space_available(struct journal *j)
-+{
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	struct bch_dev *ca;
-+	struct journal_space discarded, clean_ondisk, clean;
-+	unsigned overhead, u64s_remaining = 0;
-+	unsigned max_entry_size	 = min(j->buf[0].buf_size >> 9,
-+				       j->buf[1].buf_size >> 9);
-+	unsigned i, nr_online = 0, nr_devs_want;
-+	bool can_discard = false;
-+	int ret = 0;
-+
-+	lockdep_assert_held(&j->lock);
-+
-+	rcu_read_lock();
-+	for_each_member_device_rcu(ca, c, i,
-+				   &c->rw_devs[BCH_DATA_JOURNAL]) {
-+		struct journal_device *ja = &ca->journal;
-+
-+		if (!ja->nr)
-+			continue;
-+
-+		while (ja->dirty_idx != ja->cur_idx &&
-+		       ja->bucket_seq[ja->dirty_idx] < journal_last_seq(j))
-+			ja->dirty_idx = (ja->dirty_idx + 1) % ja->nr;
-+
-+		while (ja->dirty_idx_ondisk != ja->dirty_idx &&
-+		       ja->bucket_seq[ja->dirty_idx_ondisk] < j->last_seq_ondisk)
-+			ja->dirty_idx_ondisk = (ja->dirty_idx_ondisk + 1) % ja->nr;
-+
-+		if (ja->discard_idx != ja->dirty_idx_ondisk)
-+			can_discard = true;
-+
-+		max_entry_size = min_t(unsigned, max_entry_size, ca->mi.bucket_size);
-+		nr_online++;
-+	}
-+	rcu_read_unlock();
-+
-+	j->can_discard = can_discard;
-+
-+	if (nr_online < c->opts.metadata_replicas_required) {
-+		ret = -EROFS;
-+		goto out;
-+	}
-+
-+	if (!fifo_free(&j->pin)) {
-+		ret = -ENOSPC;
-+		goto out;
-+	}
-+
-+	nr_devs_want = min_t(unsigned, nr_online, c->opts.metadata_replicas);
-+
-+	discarded	= __journal_space_available(j, nr_devs_want, journal_space_discarded);
-+	clean_ondisk	= __journal_space_available(j, nr_devs_want, journal_space_clean_ondisk);
-+	clean		= __journal_space_available(j, nr_devs_want, journal_space_clean);
-+
-+	if (!discarded.next_entry)
-+		ret = -ENOSPC;
-+
-+	overhead = DIV_ROUND_UP(clean.remaining, max_entry_size) *
-+		journal_entry_overhead(j);
-+	u64s_remaining = clean.remaining << 6;
-+	u64s_remaining = max_t(int, 0, u64s_remaining - overhead);
-+	u64s_remaining /= 4;
-+out:
-+	j->cur_entry_sectors	= !ret ? discarded.next_entry : 0;
-+	j->cur_entry_error	= ret;
-+	journal_set_remaining(j, u64s_remaining);
-+	journal_check_may_get_unreserved(j);
-+
-+	if (!ret)
-+		journal_wake(j);
-+}
-+
-+/* Discards - last part of journal reclaim: */
-+
-+static bool should_discard_bucket(struct journal *j, struct journal_device *ja)
-+{
-+	bool ret;
-+
-+	spin_lock(&j->lock);
-+	ret = ja->discard_idx != ja->dirty_idx_ondisk;
-+	spin_unlock(&j->lock);
-+
-+	return ret;
-+}
-+
-+/*
-+ * Advance ja->discard_idx as long as it points to buckets that are no longer
-+ * dirty, issuing discards if necessary:
-+ */
-+void bch2_journal_do_discards(struct journal *j)
-+{
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	struct bch_dev *ca;
-+	unsigned iter;
-+
-+	mutex_lock(&j->discard_lock);
-+
-+	for_each_rw_member(ca, c, iter) {
-+		struct journal_device *ja = &ca->journal;
-+
-+		while (should_discard_bucket(j, ja)) {
-+			if (ca->mi.discard &&
-+			    blk_queue_discard(bdev_get_queue(ca->disk_sb.bdev)))
-+				blkdev_issue_discard(ca->disk_sb.bdev,
-+					bucket_to_sector(ca,
-+						ja->buckets[ja->discard_idx]),
-+					ca->mi.bucket_size, GFP_NOIO, 0);
-+
-+			spin_lock(&j->lock);
-+			ja->discard_idx = (ja->discard_idx + 1) % ja->nr;
-+
-+			bch2_journal_space_available(j);
-+			spin_unlock(&j->lock);
-+		}
-+	}
-+
-+	mutex_unlock(&j->discard_lock);
-+}
-+
-+/*
-+ * Journal entry pinning - machinery for holding a reference on a given journal
-+ * entry, holding it open to ensure it gets replayed during recovery:
-+ */
-+
-+static void bch2_journal_reclaim_fast(struct journal *j)
-+{
-+	struct journal_entry_pin_list temp;
-+	bool popped = false;
-+
-+	lockdep_assert_held(&j->lock);
-+
-+	/*
-+	 * Unpin journal entries whose reference counts reached zero, meaning
-+	 * all btree nodes got written out
-+	 */
-+	while (!fifo_empty(&j->pin) &&
-+	       !atomic_read(&fifo_peek_front(&j->pin).count)) {
-+		BUG_ON(!list_empty(&fifo_peek_front(&j->pin).list));
-+		BUG_ON(!fifo_pop(&j->pin, temp));
-+		popped = true;
-+	}
-+
-+	if (popped)
-+		bch2_journal_space_available(j);
-+}
-+
-+void bch2_journal_pin_put(struct journal *j, u64 seq)
-+{
-+	struct journal_entry_pin_list *pin_list = journal_seq_pin(j, seq);
-+
-+	if (atomic_dec_and_test(&pin_list->count)) {
-+		spin_lock(&j->lock);
-+		bch2_journal_reclaim_fast(j);
-+		spin_unlock(&j->lock);
-+	}
-+}
-+
-+static inline void __journal_pin_drop(struct journal *j,
-+				      struct journal_entry_pin *pin)
-+{
-+	struct journal_entry_pin_list *pin_list;
-+
-+	if (!journal_pin_active(pin))
-+		return;
-+
-+	pin_list = journal_seq_pin(j, pin->seq);
-+	pin->seq = 0;
-+	list_del_init(&pin->list);
-+
-+	/*
-+	 * Unpinning a journal entry make make journal_next_bucket() succeed, if
-+	 * writing a new last_seq will now make another bucket available:
-+	 */
-+	if (atomic_dec_and_test(&pin_list->count) &&
-+	    pin_list == &fifo_peek_front(&j->pin))
-+		bch2_journal_reclaim_fast(j);
-+	else if (fifo_used(&j->pin) == 1 &&
-+		 atomic_read(&pin_list->count) == 1)
-+		journal_wake(j);
-+}
-+
-+void bch2_journal_pin_drop(struct journal *j,
-+			   struct journal_entry_pin *pin)
-+{
-+	spin_lock(&j->lock);
-+	__journal_pin_drop(j, pin);
-+	spin_unlock(&j->lock);
-+}
-+
-+static void bch2_journal_pin_add_locked(struct journal *j, u64 seq,
-+			    struct journal_entry_pin *pin,
-+			    journal_pin_flush_fn flush_fn)
-+{
-+	struct journal_entry_pin_list *pin_list = journal_seq_pin(j, seq);
-+
-+	__journal_pin_drop(j, pin);
-+
-+	BUG_ON(!atomic_read(&pin_list->count) && seq == journal_last_seq(j));
-+
-+	atomic_inc(&pin_list->count);
-+	pin->seq	= seq;
-+	pin->flush	= flush_fn;
-+
-+	list_add(&pin->list, flush_fn ? &pin_list->list : &pin_list->flushed);
-+}
-+
-+void __bch2_journal_pin_add(struct journal *j, u64 seq,
-+			    struct journal_entry_pin *pin,
-+			    journal_pin_flush_fn flush_fn)
-+{
-+	spin_lock(&j->lock);
-+	bch2_journal_pin_add_locked(j, seq, pin, flush_fn);
-+	spin_unlock(&j->lock);
-+
-+	/*
-+	 * If the journal is currently full,  we might want to call flush_fn
-+	 * immediately:
-+	 */
-+	journal_wake(j);
-+}
-+
-+void bch2_journal_pin_update(struct journal *j, u64 seq,
-+			     struct journal_entry_pin *pin,
-+			     journal_pin_flush_fn flush_fn)
-+{
-+	if (journal_pin_active(pin) && pin->seq < seq)
-+		return;
-+
-+	spin_lock(&j->lock);
-+
-+	if (pin->seq != seq) {
-+		bch2_journal_pin_add_locked(j, seq, pin, flush_fn);
-+	} else {
-+		struct journal_entry_pin_list *pin_list =
-+			journal_seq_pin(j, seq);
-+
-+		/*
-+		 * If the pin is already pinning the right sequence number, it
-+		 * still might've already been flushed:
-+		 */
-+		list_move(&pin->list, &pin_list->list);
-+	}
-+
-+	spin_unlock(&j->lock);
-+
-+	/*
-+	 * If the journal is currently full,  we might want to call flush_fn
-+	 * immediately:
-+	 */
-+	journal_wake(j);
-+}
-+
-+void bch2_journal_pin_copy(struct journal *j,
-+			   struct journal_entry_pin *dst,
-+			   struct journal_entry_pin *src,
-+			   journal_pin_flush_fn flush_fn)
-+{
-+	spin_lock(&j->lock);
-+
-+	if (journal_pin_active(src) &&
-+	    (!journal_pin_active(dst) || src->seq < dst->seq))
-+		bch2_journal_pin_add_locked(j, src->seq, dst, flush_fn);
-+
-+	spin_unlock(&j->lock);
-+}
-+
-+/**
-+ * bch2_journal_pin_flush: ensure journal pin callback is no longer running
-+ */
-+void bch2_journal_pin_flush(struct journal *j, struct journal_entry_pin *pin)
-+{
-+	BUG_ON(journal_pin_active(pin));
-+
-+	wait_event(j->pin_flush_wait, j->flush_in_progress != pin);
-+}
-+
-+/*
-+ * Journal reclaim: flush references to open journal entries to reclaim space in
-+ * the journal
-+ *
-+ * May be done by the journal code in the background as needed to free up space
-+ * for more journal entries, or as part of doing a clean shutdown, or to migrate
-+ * data off of a specific device:
-+ */
-+
-+static struct journal_entry_pin *
-+journal_get_next_pin(struct journal *j, u64 max_seq, u64 *seq)
-+{
-+	struct journal_entry_pin_list *pin_list;
-+	struct journal_entry_pin *ret = NULL;
-+
-+	if (!test_bit(JOURNAL_RECLAIM_STARTED, &j->flags))
-+		return NULL;
-+
-+	spin_lock(&j->lock);
-+
-+	fifo_for_each_entry_ptr(pin_list, &j->pin, *seq)
-+		if (*seq > max_seq ||
-+		    (ret = list_first_entry_or_null(&pin_list->list,
-+				struct journal_entry_pin, list)))
-+			break;
-+
-+	if (ret) {
-+		list_move(&ret->list, &pin_list->flushed);
-+		BUG_ON(j->flush_in_progress);
-+		j->flush_in_progress = ret;
-+		j->last_flushed = jiffies;
-+	}
-+
-+	spin_unlock(&j->lock);
-+
-+	return ret;
-+}
-+
-+/* returns true if we did work */
-+static bool journal_flush_pins(struct journal *j, u64 seq_to_flush,
-+			       unsigned min_nr)
-+{
-+	struct journal_entry_pin *pin;
-+	bool ret = false;
-+	u64 seq;
-+
-+	lockdep_assert_held(&j->reclaim_lock);
-+
-+	while ((pin = journal_get_next_pin(j, min_nr
-+				? U64_MAX : seq_to_flush, &seq))) {
-+		if (min_nr)
-+			min_nr--;
-+
-+		pin->flush(j, pin, seq);
-+
-+		BUG_ON(j->flush_in_progress != pin);
-+		j->flush_in_progress = NULL;
-+		wake_up(&j->pin_flush_wait);
-+		ret = true;
-+	}
-+
-+	return ret;
-+}
-+
-+/**
-+ * bch2_journal_reclaim - free up journal buckets
-+ *
-+ * Background journal reclaim writes out btree nodes. It should be run
-+ * early enough so that we never completely run out of journal buckets.
-+ *
-+ * High watermarks for triggering background reclaim:
-+ * - FIFO has fewer than 512 entries left
-+ * - fewer than 25% journal buckets free
-+ *
-+ * Background reclaim runs until low watermarks are reached:
-+ * - FIFO has more than 1024 entries left
-+ * - more than 50% journal buckets free
-+ *
-+ * As long as a reclaim can complete in the time it takes to fill up
-+ * 512 journal entries or 25% of all journal buckets, then
-+ * journal_next_bucket() should not stall.
-+ */
-+void bch2_journal_reclaim(struct journal *j)
-+{
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	struct bch_dev *ca;
-+	unsigned iter, min_nr = 0;
-+	u64 seq_to_flush = 0;
-+
-+	lockdep_assert_held(&j->reclaim_lock);
-+
-+	bch2_journal_do_discards(j);
-+
-+	spin_lock(&j->lock);
-+
-+	for_each_rw_member(ca, c, iter) {
-+		struct journal_device *ja = &ca->journal;
-+		unsigned nr_buckets, bucket_to_flush;
-+
-+		if (!ja->nr)
-+			continue;
-+
-+		/* Try to keep the journal at most half full: */
-+		nr_buckets = ja->nr / 2;
-+
-+		/* And include pre-reservations: */
-+		nr_buckets += DIV_ROUND_UP(j->prereserved.reserved,
-+					   (ca->mi.bucket_size << 6) -
-+					   journal_entry_overhead(j));
-+
-+		nr_buckets = min(nr_buckets, ja->nr);
-+
-+		bucket_to_flush = (ja->cur_idx + nr_buckets) % ja->nr;
-+		seq_to_flush = max(seq_to_flush,
-+				   ja->bucket_seq[bucket_to_flush]);
-+	}
-+
-+	/* Also flush if the pin fifo is more than half full */
-+	seq_to_flush = max_t(s64, seq_to_flush,
-+			     (s64) journal_cur_seq(j) -
-+			     (j->pin.size >> 1));
-+	spin_unlock(&j->lock);
-+
-+	/*
-+	 * If it's been longer than j->reclaim_delay_ms since we last flushed,
-+	 * make sure to flush at least one journal pin:
-+	 */
-+	if (time_after(jiffies, j->last_flushed +
-+		       msecs_to_jiffies(j->reclaim_delay_ms)))
-+		min_nr = 1;
-+
-+	if (j->prereserved.reserved * 2 > j->prereserved.remaining) {
-+		seq_to_flush = max(seq_to_flush, journal_last_seq(j));
-+		min_nr = 1;
-+	}
-+
-+	journal_flush_pins(j, seq_to_flush, min_nr);
-+
-+	if (!bch2_journal_error(j))
-+		queue_delayed_work(c->journal_reclaim_wq, &j->reclaim_work,
-+				   msecs_to_jiffies(j->reclaim_delay_ms));
-+}
-+
-+void bch2_journal_reclaim_work(struct work_struct *work)
-+{
-+	struct journal *j = container_of(to_delayed_work(work),
-+				struct journal, reclaim_work);
-+
-+	mutex_lock(&j->reclaim_lock);
-+	bch2_journal_reclaim(j);
-+	mutex_unlock(&j->reclaim_lock);
-+}
-+
-+static int journal_flush_done(struct journal *j, u64 seq_to_flush,
-+			      bool *did_work)
-+{
-+	int ret;
-+
-+	ret = bch2_journal_error(j);
-+	if (ret)
-+		return ret;
-+
-+	mutex_lock(&j->reclaim_lock);
-+
-+	*did_work = journal_flush_pins(j, seq_to_flush, 0);
-+
-+	spin_lock(&j->lock);
-+	/*
-+	 * If journal replay hasn't completed, the unreplayed journal entries
-+	 * hold refs on their corresponding sequence numbers
-+	 */
-+	ret = !test_bit(JOURNAL_REPLAY_DONE, &j->flags) ||
-+		journal_last_seq(j) > seq_to_flush ||
-+		(fifo_used(&j->pin) == 1 &&
-+		 atomic_read(&fifo_peek_front(&j->pin).count) == 1);
-+
-+	spin_unlock(&j->lock);
-+	mutex_unlock(&j->reclaim_lock);
-+
-+	return ret;
-+}
-+
-+bool bch2_journal_flush_pins(struct journal *j, u64 seq_to_flush)
-+{
-+	bool did_work = false;
-+
-+	if (!test_bit(JOURNAL_STARTED, &j->flags))
-+		return false;
-+
-+	closure_wait_event(&j->async_wait,
-+		journal_flush_done(j, seq_to_flush, &did_work));
-+
-+	return did_work;
-+}
-+
-+int bch2_journal_flush_device_pins(struct journal *j, int dev_idx)
-+{
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	struct journal_entry_pin_list *p;
-+	u64 iter, seq = 0;
-+	int ret = 0;
-+
-+	spin_lock(&j->lock);
-+	fifo_for_each_entry_ptr(p, &j->pin, iter)
-+		if (dev_idx >= 0
-+		    ? bch2_dev_list_has_dev(p->devs, dev_idx)
-+		    : p->devs.nr < c->opts.metadata_replicas)
-+			seq = iter;
-+	spin_unlock(&j->lock);
-+
-+	bch2_journal_flush_pins(j, seq);
-+
-+	ret = bch2_journal_error(j);
-+	if (ret)
-+		return ret;
-+
-+	mutex_lock(&c->replicas_gc_lock);
-+	bch2_replicas_gc_start(c, 1 << BCH_DATA_JOURNAL);
-+
-+	seq = 0;
-+
-+	spin_lock(&j->lock);
-+	while (!ret && seq < j->pin.back) {
-+		struct bch_replicas_padded replicas;
-+
-+		seq = max(seq, journal_last_seq(j));
-+		bch2_devlist_to_replicas(&replicas.e, BCH_DATA_JOURNAL,
-+					 journal_seq_pin(j, seq)->devs);
-+		seq++;
-+
-+		spin_unlock(&j->lock);
-+		ret = bch2_mark_replicas(c, &replicas.e);
-+		spin_lock(&j->lock);
-+	}
-+	spin_unlock(&j->lock);
-+
-+	ret = bch2_replicas_gc_end(c, ret);
-+	mutex_unlock(&c->replicas_gc_lock);
-+
-+	return ret;
-+}
-diff --git a/fs/bcachefs/journal_reclaim.h b/fs/bcachefs/journal_reclaim.h
-new file mode 100644
-index 000000000000..8128907a7623
---- /dev/null
-+++ b/fs/bcachefs/journal_reclaim.h
-@@ -0,0 +1,69 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_JOURNAL_RECLAIM_H
-+#define _BCACHEFS_JOURNAL_RECLAIM_H
-+
-+#define JOURNAL_PIN	(32 * 1024)
-+
-+enum journal_space_from {
-+	journal_space_discarded,
-+	journal_space_clean_ondisk,
-+	journal_space_clean,
-+};
-+
-+unsigned bch2_journal_dev_buckets_available(struct journal *,
-+					    struct journal_device *,
-+					    enum journal_space_from);
-+void bch2_journal_space_available(struct journal *);
-+
-+static inline bool journal_pin_active(struct journal_entry_pin *pin)
-+{
-+	return pin->seq != 0;
-+}
-+
-+static inline struct journal_entry_pin_list *
-+journal_seq_pin(struct journal *j, u64 seq)
-+{
-+	EBUG_ON(seq < j->pin.front || seq >= j->pin.back);
-+
-+	return &j->pin.data[seq & j->pin.mask];
-+}
-+
-+void bch2_journal_pin_put(struct journal *, u64);
-+void bch2_journal_pin_drop(struct journal *, struct journal_entry_pin *);
-+
-+void __bch2_journal_pin_add(struct journal *, u64, struct journal_entry_pin *,
-+			    journal_pin_flush_fn);
-+
-+static inline void bch2_journal_pin_add(struct journal *j, u64 seq,
-+					struct journal_entry_pin *pin,
-+					journal_pin_flush_fn flush_fn)
-+{
-+	if (unlikely(!journal_pin_active(pin) || pin->seq > seq))
-+		__bch2_journal_pin_add(j, seq, pin, flush_fn);
-+}
-+
-+void bch2_journal_pin_update(struct journal *, u64,
-+			     struct journal_entry_pin *,
-+			     journal_pin_flush_fn);
-+
-+void bch2_journal_pin_copy(struct journal *,
-+			   struct journal_entry_pin *,
-+			   struct journal_entry_pin *,
-+			   journal_pin_flush_fn);
-+
-+void bch2_journal_pin_flush(struct journal *, struct journal_entry_pin *);
-+
-+void bch2_journal_do_discards(struct journal *);
-+void bch2_journal_reclaim(struct journal *);
-+void bch2_journal_reclaim_work(struct work_struct *);
-+
-+bool bch2_journal_flush_pins(struct journal *, u64);
-+
-+static inline bool bch2_journal_flush_all_pins(struct journal *j)
-+{
-+	return bch2_journal_flush_pins(j, U64_MAX);
-+}
-+
-+int bch2_journal_flush_device_pins(struct journal *, int);
-+
-+#endif /* _BCACHEFS_JOURNAL_RECLAIM_H */
-diff --git a/fs/bcachefs/journal_seq_blacklist.c b/fs/bcachefs/journal_seq_blacklist.c
-new file mode 100644
-index 000000000000..a21de0088753
---- /dev/null
-+++ b/fs/bcachefs/journal_seq_blacklist.c
-@@ -0,0 +1,318 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "btree_iter.h"
-+#include "eytzinger.h"
-+#include "journal_seq_blacklist.h"
-+#include "super-io.h"
-+
-+/*
-+ * journal_seq_blacklist machinery:
-+ *
-+ * To guarantee order of btree updates after a crash, we need to detect when a
-+ * btree node entry (bset) is newer than the newest journal entry that was
-+ * successfully written, and ignore it - effectively ignoring any btree updates
-+ * that didn't make it into the journal.
-+ *
-+ * If we didn't do this, we might have two btree nodes, a and b, both with
-+ * updates that weren't written to the journal yet: if b was updated after a,
-+ * but b was flushed and not a - oops; on recovery we'll find that the updates
-+ * to b happened, but not the updates to a that happened before it.
-+ *
-+ * Ignoring bsets that are newer than the newest journal entry is always safe,
-+ * because everything they contain will also have been journalled - and must
-+ * still be present in the journal on disk until a journal entry has been
-+ * written _after_ that bset was written.
-+ *
-+ * To accomplish this, bsets record the newest journal sequence number they
-+ * contain updates for; then, on startup, the btree code queries the journal
-+ * code to ask "Is this sequence number newer than the newest journal entry? If
-+ * so, ignore it."
-+ *
-+ * When this happens, we must blacklist that journal sequence number: the
-+ * journal must not write any entries with that sequence number, and it must
-+ * record that it was blacklisted so that a) on recovery we don't think we have
-+ * missing journal entries and b) so that the btree code continues to ignore
-+ * that bset, until that btree node is rewritten.
-+ */
-+
-+static unsigned
-+blacklist_nr_entries(struct bch_sb_field_journal_seq_blacklist *bl)
-+{
-+	return bl
-+		? ((vstruct_end(&bl->field) - (void *) &bl->start[0]) /
-+		   sizeof(struct journal_seq_blacklist_entry))
-+		: 0;
-+}
-+
-+static unsigned sb_blacklist_u64s(unsigned nr)
-+{
-+	struct bch_sb_field_journal_seq_blacklist *bl;
-+
-+	return (sizeof(*bl) + sizeof(bl->start[0]) * nr) / sizeof(u64);
-+}
-+
-+static struct bch_sb_field_journal_seq_blacklist *
-+blacklist_entry_try_merge(struct bch_fs *c,
-+			  struct bch_sb_field_journal_seq_blacklist *bl,
-+			  unsigned i)
-+{
-+	unsigned nr = blacklist_nr_entries(bl);
-+
-+	if (le64_to_cpu(bl->start[i].end) >=
-+	    le64_to_cpu(bl->start[i + 1].start)) {
-+		bl->start[i].end = bl->start[i + 1].end;
-+		--nr;
-+		memmove(&bl->start[i],
-+			&bl->start[i + 1],
-+			sizeof(bl->start[0]) * (nr - i));
-+
-+		bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb,
-+							sb_blacklist_u64s(nr));
-+		BUG_ON(!bl);
-+	}
-+
-+	return bl;
-+}
-+
-+int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end)
-+{
-+	struct bch_sb_field_journal_seq_blacklist *bl;
-+	unsigned i, nr;
-+	int ret = 0;
-+
-+	mutex_lock(&c->sb_lock);
-+	bl = bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
-+	nr = blacklist_nr_entries(bl);
-+
-+	if (bl) {
-+		for (i = 0; i < nr; i++) {
-+			struct journal_seq_blacklist_entry *e =
-+				bl->start + i;
-+
-+			if (start == le64_to_cpu(e->start) &&
-+			    end   == le64_to_cpu(e->end))
-+				goto out;
-+
-+			if (start <= le64_to_cpu(e->start) &&
-+			    end   >= le64_to_cpu(e->end)) {
-+				e->start = cpu_to_le64(start);
-+				e->end	= cpu_to_le64(end);
-+
-+				if (i + 1 < nr)
-+					bl = blacklist_entry_try_merge(c,
-+								bl, i);
-+				if (i)
-+					bl = blacklist_entry_try_merge(c,
-+								bl, i - 1);
-+				goto out_write_sb;
-+			}
-+		}
-+	}
-+
-+	bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb,
-+					sb_blacklist_u64s(nr + 1));
-+	if (!bl) {
-+		ret = -ENOMEM;
-+		goto out;
-+	}
-+
-+	bl->start[nr].start	= cpu_to_le64(start);
-+	bl->start[nr].end	= cpu_to_le64(end);
-+out_write_sb:
-+	c->disk_sb.sb->features[0] |=
-+		1ULL << BCH_FEATURE_journal_seq_blacklist_v3;
-+
-+	ret = bch2_write_super(c);
-+out:
-+	mutex_unlock(&c->sb_lock);
-+
-+	return ret;
-+}
-+
-+static int journal_seq_blacklist_table_cmp(const void *_l,
-+					   const void *_r, size_t size)
-+{
-+	const struct journal_seq_blacklist_table_entry *l = _l;
-+	const struct journal_seq_blacklist_table_entry *r = _r;
-+
-+	return cmp_int(l->start, r->start);
-+}
-+
-+bool bch2_journal_seq_is_blacklisted(struct bch_fs *c, u64 seq,
-+				     bool dirty)
-+{
-+	struct journal_seq_blacklist_table *t = c->journal_seq_blacklist_table;
-+	struct journal_seq_blacklist_table_entry search = { .start = seq };
-+	int idx;
-+
-+	if (!t)
-+		return false;
-+
-+	idx = eytzinger0_find_le(t->entries, t->nr,
-+				 sizeof(t->entries[0]),
-+				 journal_seq_blacklist_table_cmp,
-+				 &search);
-+	if (idx < 0)
-+		return false;
-+
-+	BUG_ON(t->entries[idx].start > seq);
-+
-+	if (seq >= t->entries[idx].end)
-+		return false;
-+
-+	if (dirty)
-+		t->entries[idx].dirty = true;
-+	return true;
-+}
-+
-+int bch2_blacklist_table_initialize(struct bch_fs *c)
-+{
-+	struct bch_sb_field_journal_seq_blacklist *bl =
-+		bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
-+	struct journal_seq_blacklist_table *t;
-+	unsigned i, nr = blacklist_nr_entries(bl);
-+
-+	BUG_ON(c->journal_seq_blacklist_table);
-+
-+	if (!bl)
-+		return 0;
-+
-+	t = kzalloc(sizeof(*t) + sizeof(t->entries[0]) * nr,
-+		    GFP_KERNEL);
-+	if (!t)
-+		return -ENOMEM;
-+
-+	t->nr = nr;
-+
-+	for (i = 0; i < nr; i++) {
-+		t->entries[i].start	= le64_to_cpu(bl->start[i].start);
-+		t->entries[i].end	= le64_to_cpu(bl->start[i].end);
-+	}
-+
-+	eytzinger0_sort(t->entries,
-+			t->nr,
-+			sizeof(t->entries[0]),
-+			journal_seq_blacklist_table_cmp,
-+			NULL);
-+
-+	c->journal_seq_blacklist_table = t;
-+	return 0;
-+}
-+
-+static const char *
-+bch2_sb_journal_seq_blacklist_validate(struct bch_sb *sb,
-+				       struct bch_sb_field *f)
-+{
-+	struct bch_sb_field_journal_seq_blacklist *bl =
-+		field_to_type(f, journal_seq_blacklist);
-+	struct journal_seq_blacklist_entry *i;
-+	unsigned nr = blacklist_nr_entries(bl);
-+
-+	for (i = bl->start; i < bl->start + nr; i++) {
-+		if (le64_to_cpu(i->start) >=
-+		    le64_to_cpu(i->end))
-+			return "entry start >= end";
-+
-+		if (i + 1 < bl->start + nr &&
-+		    le64_to_cpu(i[0].end) >
-+		    le64_to_cpu(i[1].start))
-+			return "entries out of order";
-+	}
-+
-+	return NULL;
-+}
-+
-+static void bch2_sb_journal_seq_blacklist_to_text(struct printbuf *out,
-+						  struct bch_sb *sb,
-+						  struct bch_sb_field *f)
-+{
-+	struct bch_sb_field_journal_seq_blacklist *bl =
-+		field_to_type(f, journal_seq_blacklist);
-+	struct journal_seq_blacklist_entry *i;
-+	unsigned nr = blacklist_nr_entries(bl);
-+
-+	for (i = bl->start; i < bl->start + nr; i++) {
-+		if (i != bl->start)
-+			pr_buf(out, " ");
-+
-+		pr_buf(out, "%llu-%llu",
-+		       le64_to_cpu(i->start),
-+		       le64_to_cpu(i->end));
-+	}
-+}
-+
-+const struct bch_sb_field_ops bch_sb_field_ops_journal_seq_blacklist = {
-+	.validate	= bch2_sb_journal_seq_blacklist_validate,
-+	.to_text	= bch2_sb_journal_seq_blacklist_to_text
-+};
-+
-+void bch2_blacklist_entries_gc(struct work_struct *work)
-+{
-+	struct bch_fs *c = container_of(work, struct bch_fs,
-+					journal_seq_blacklist_gc_work);
-+	struct journal_seq_blacklist_table *t;
-+	struct bch_sb_field_journal_seq_blacklist *bl;
-+	struct journal_seq_blacklist_entry *src, *dst;
-+	struct btree_trans trans;
-+	unsigned i, nr, new_nr;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for (i = 0; i < BTREE_ID_NR; i++) {
-+		struct btree_iter *iter;
-+		struct btree *b;
-+
-+		for_each_btree_node(&trans, iter, i, POS_MIN,
-+				    BTREE_ITER_PREFETCH, b)
-+			if (test_bit(BCH_FS_STOPPING, &c->flags)) {
-+				bch2_trans_exit(&trans);
-+				return;
-+			}
-+		bch2_trans_iter_free(&trans, iter);
-+	}
-+
-+	ret = bch2_trans_exit(&trans);
-+	if (ret)
-+		return;
-+
-+	mutex_lock(&c->sb_lock);
-+	bl = bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
-+	if (!bl)
-+		goto out;
-+
-+	nr = blacklist_nr_entries(bl);
-+	dst = bl->start;
-+
-+	t = c->journal_seq_blacklist_table;
-+	BUG_ON(nr != t->nr);
-+
-+	for (src = bl->start, i = eytzinger0_first(t->nr);
-+	     src < bl->start + nr;
-+	     src++, i = eytzinger0_next(i, nr)) {
-+		BUG_ON(t->entries[i].start	!= le64_to_cpu(src->start));
-+		BUG_ON(t->entries[i].end	!= le64_to_cpu(src->end));
-+
-+		if (t->entries[i].dirty)
-+			*dst++ = *src;
-+	}
-+
-+	new_nr = dst - bl->start;
-+
-+	bch_info(c, "nr blacklist entries was %u, now %u", nr, new_nr);
-+
-+	if (new_nr != nr) {
-+		bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb,
-+				new_nr ? sb_blacklist_u64s(new_nr) : 0);
-+		BUG_ON(new_nr && !bl);
-+
-+		if (!new_nr)
-+			c->disk_sb.sb->features[0] &=
-+				~(1ULL << BCH_FEATURE_journal_seq_blacklist_v3);
-+
-+		bch2_write_super(c);
-+	}
-+out:
-+	mutex_unlock(&c->sb_lock);
-+}
-diff --git a/fs/bcachefs/journal_seq_blacklist.h b/fs/bcachefs/journal_seq_blacklist.h
-new file mode 100644
-index 000000000000..03f4b97247fd
---- /dev/null
-+++ b/fs/bcachefs/journal_seq_blacklist.h
-@@ -0,0 +1,13 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H
-+#define _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H
-+
-+bool bch2_journal_seq_is_blacklisted(struct bch_fs *, u64, bool);
-+int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64, u64);
-+int bch2_blacklist_table_initialize(struct bch_fs *);
-+
-+extern const struct bch_sb_field_ops bch_sb_field_ops_journal_seq_blacklist;
-+
-+void bch2_blacklist_entries_gc(struct work_struct *);
-+
-+#endif /* _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H */
-diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h
-new file mode 100644
-index 000000000000..154b51b891d3
---- /dev/null
-+++ b/fs/bcachefs/journal_types.h
-@@ -0,0 +1,277 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_JOURNAL_TYPES_H
-+#define _BCACHEFS_JOURNAL_TYPES_H
-+
-+#include <linux/cache.h>
-+#include <linux/workqueue.h>
-+
-+#include "alloc_types.h"
-+#include "super_types.h"
-+#include "fifo.h"
-+
-+struct journal_res;
-+
-+/*
-+ * We put two of these in struct journal; we used them for writes to the
-+ * journal that are being staged or in flight.
-+ */
-+struct journal_buf {
-+	struct jset		*data;
-+
-+	BKEY_PADDED(key);
-+
-+	struct closure_waitlist	wait;
-+
-+	unsigned		buf_size;	/* size in bytes of @data */
-+	unsigned		sectors;	/* maximum size for current entry */
-+	unsigned		disk_sectors;	/* maximum size entry could have been, if
-+						   buf_size was bigger */
-+	unsigned		u64s_reserved;
-+	/* bloom filter: */
-+	unsigned long		has_inode[1024 / sizeof(unsigned long)];
-+};
-+
-+/*
-+ * Something that makes a journal entry dirty - i.e. a btree node that has to be
-+ * flushed:
-+ */
-+
-+struct journal_entry_pin_list {
-+	struct list_head		list;
-+	struct list_head		flushed;
-+	atomic_t			count;
-+	struct bch_devs_list		devs;
-+};
-+
-+struct journal;
-+struct journal_entry_pin;
-+typedef void (*journal_pin_flush_fn)(struct journal *j,
-+				struct journal_entry_pin *, u64);
-+
-+struct journal_entry_pin {
-+	struct list_head		list;
-+	journal_pin_flush_fn		flush;
-+	u64				seq;
-+};
-+
-+struct journal_res {
-+	bool			ref;
-+	u8			idx;
-+	u16			u64s;
-+	u32			offset;
-+	u64			seq;
-+};
-+
-+/*
-+ * For reserving space in the journal prior to getting a reservation on a
-+ * particular journal entry:
-+ */
-+struct journal_preres {
-+	unsigned		u64s;
-+};
-+
-+union journal_res_state {
-+	struct {
-+		atomic64_t	counter;
-+	};
-+
-+	struct {
-+		u64		v;
-+	};
-+
-+	struct {
-+		u64		cur_entry_offset:20,
-+				idx:1,
-+				prev_buf_unwritten:1,
-+				buf0_count:21,
-+				buf1_count:21;
-+	};
-+};
-+
-+union journal_preres_state {
-+	struct {
-+		atomic64_t	counter;
-+	};
-+
-+	struct {
-+		u64		v;
-+	};
-+
-+	struct {
-+		u32		reserved;
-+		u32		remaining;
-+	};
-+};
-+
-+/* bytes: */
-+#define JOURNAL_ENTRY_SIZE_MIN		(64U << 10) /* 64k */
-+#define JOURNAL_ENTRY_SIZE_MAX		(4U  << 20) /* 4M */
-+
-+/*
-+ * We stash some journal state as sentinal values in cur_entry_offset:
-+ * note - cur_entry_offset is in units of u64s
-+ */
-+#define JOURNAL_ENTRY_OFFSET_MAX	((1U << 20) - 1)
-+
-+#define JOURNAL_ENTRY_CLOSED_VAL	(JOURNAL_ENTRY_OFFSET_MAX - 1)
-+#define JOURNAL_ENTRY_ERROR_VAL		(JOURNAL_ENTRY_OFFSET_MAX)
-+
-+/*
-+ * JOURNAL_NEED_WRITE - current (pending) journal entry should be written ASAP,
-+ * either because something's waiting on the write to complete or because it's
-+ * been dirty too long and the timer's expired.
-+ */
-+
-+enum {
-+	JOURNAL_REPLAY_DONE,
-+	JOURNAL_STARTED,
-+	JOURNAL_RECLAIM_STARTED,
-+	JOURNAL_NEED_WRITE,
-+	JOURNAL_NOT_EMPTY,
-+	JOURNAL_MAY_GET_UNRESERVED,
-+};
-+
-+/* Embedded in struct bch_fs */
-+struct journal {
-+	/* Fastpath stuff up front: */
-+
-+	unsigned long		flags;
-+
-+	union journal_res_state reservations;
-+
-+	/* Max size of current journal entry */
-+	unsigned		cur_entry_u64s;
-+	unsigned		cur_entry_sectors;
-+
-+	/*
-+	 * 0, or -ENOSPC if waiting on journal reclaim, or -EROFS if
-+	 * insufficient devices:
-+	 */
-+	int			cur_entry_error;
-+
-+	union journal_preres_state prereserved;
-+
-+	/* Reserved space in journal entry to be used just prior to write */
-+	unsigned		entry_u64s_reserved;
-+
-+	unsigned		buf_size_want;
-+
-+	/*
-+	 * Two journal entries -- one is currently open for new entries, the
-+	 * other is possibly being written out.
-+	 */
-+	struct journal_buf	buf[2];
-+
-+	spinlock_t		lock;
-+
-+	/* if nonzero, we may not open a new journal entry: */
-+	unsigned		blocked;
-+
-+	/* Used when waiting because the journal was full */
-+	wait_queue_head_t	wait;
-+	struct closure_waitlist	async_wait;
-+	struct closure_waitlist	preres_wait;
-+
-+	struct closure		io;
-+	struct delayed_work	write_work;
-+
-+	/* Sequence number of most recent journal entry (last entry in @pin) */
-+	atomic64_t		seq;
-+
-+	/* seq, last_seq from the most recent journal entry successfully written */
-+	u64			seq_ondisk;
-+	u64			last_seq_ondisk;
-+
-+	/*
-+	 * FIFO of journal entries whose btree updates have not yet been
-+	 * written out.
-+	 *
-+	 * Each entry is a reference count. The position in the FIFO is the
-+	 * entry's sequence number relative to @seq.
-+	 *
-+	 * The journal entry itself holds a reference count, put when the
-+	 * journal entry is written out. Each btree node modified by the journal
-+	 * entry also holds a reference count, put when the btree node is
-+	 * written.
-+	 *
-+	 * When a reference count reaches zero, the journal entry is no longer
-+	 * needed. When all journal entries in the oldest journal bucket are no
-+	 * longer needed, the bucket can be discarded and reused.
-+	 */
-+	struct {
-+		u64 front, back, size, mask;
-+		struct journal_entry_pin_list *data;
-+	}			pin;
-+
-+	u64			replay_journal_seq;
-+	u64			replay_journal_seq_end;
-+
-+	struct write_point	wp;
-+	spinlock_t		err_lock;
-+
-+	struct delayed_work	reclaim_work;
-+	struct mutex		reclaim_lock;
-+	unsigned long		last_flushed;
-+	struct journal_entry_pin *flush_in_progress;
-+	wait_queue_head_t	pin_flush_wait;
-+
-+	/* protects advancing ja->discard_idx: */
-+	struct mutex		discard_lock;
-+	bool			can_discard;
-+
-+	unsigned		write_delay_ms;
-+	unsigned		reclaim_delay_ms;
-+
-+	u64			res_get_blocked_start;
-+	u64			need_write_time;
-+	u64			write_start_time;
-+
-+	struct time_stats	*write_time;
-+	struct time_stats	*delay_time;
-+	struct time_stats	*blocked_time;
-+	struct time_stats	*flush_seq_time;
-+
-+#ifdef CONFIG_DEBUG_LOCK_ALLOC
-+	struct lockdep_map	res_map;
-+#endif
-+};
-+
-+/*
-+ * Embedded in struct bch_dev. First three fields refer to the array of journal
-+ * buckets, in bch_sb.
-+ */
-+struct journal_device {
-+	/*
-+	 * For each journal bucket, contains the max sequence number of the
-+	 * journal writes it contains - so we know when a bucket can be reused.
-+	 */
-+	u64			*bucket_seq;
-+
-+	unsigned		sectors_free;
-+
-+	/*
-+	 * discard_idx <= dirty_idx_ondisk <= dirty_idx <= cur_idx:
-+	 */
-+	unsigned		discard_idx;		/* Next bucket to discard */
-+	unsigned		dirty_idx_ondisk;
-+	unsigned		dirty_idx;
-+	unsigned		cur_idx;		/* Journal bucket we're currently writing to */
-+	unsigned		nr;
-+
-+	u64			*buckets;
-+
-+	/* Bio for journal reads/writes to this device */
-+	struct bio		*bio;
-+
-+	/* for bch_journal_read_device */
-+	struct closure		read;
-+};
-+
-+/*
-+ * journal_entry_res - reserve space in every journal entry:
-+ */
-+struct journal_entry_res {
-+	unsigned		u64s;
-+};
-+
-+#endif /* _BCACHEFS_JOURNAL_TYPES_H */
-diff --git a/fs/bcachefs/keylist.c b/fs/bcachefs/keylist.c
-new file mode 100644
-index 000000000000..864dfaa67b7a
---- /dev/null
-+++ b/fs/bcachefs/keylist.c
-@@ -0,0 +1,67 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "keylist.h"
-+
-+int bch2_keylist_realloc(struct keylist *l, u64 *inline_u64s,
-+			size_t nr_inline_u64s, size_t new_u64s)
-+{
-+	size_t oldsize = bch2_keylist_u64s(l);
-+	size_t newsize = oldsize + new_u64s;
-+	u64 *old_buf = l->keys_p == inline_u64s ? NULL : l->keys_p;
-+	u64 *new_keys;
-+
-+	newsize = roundup_pow_of_two(newsize);
-+
-+	if (newsize <= nr_inline_u64s ||
-+	    (old_buf && roundup_pow_of_two(oldsize) == newsize))
-+		return 0;
-+
-+	new_keys = krealloc(old_buf, sizeof(u64) * newsize, GFP_NOIO);
-+	if (!new_keys)
-+		return -ENOMEM;
-+
-+	if (!old_buf)
-+		memcpy_u64s(new_keys, inline_u64s, oldsize);
-+
-+	l->keys_p = new_keys;
-+	l->top_p = new_keys + oldsize;
-+
-+	return 0;
-+}
-+
-+void bch2_keylist_add_in_order(struct keylist *l, struct bkey_i *insert)
-+{
-+	struct bkey_i *where;
-+
-+	for_each_keylist_key(l, where)
-+		if (bkey_cmp(insert->k.p, where->k.p) < 0)
-+			break;
-+
-+	memmove_u64s_up((u64 *) where + insert->k.u64s,
-+			where,
-+			((u64 *) l->top) - ((u64 *) where));
-+
-+	l->top_p += insert->k.u64s;
-+	bkey_copy(where, insert);
-+}
-+
-+void bch2_keylist_pop_front(struct keylist *l)
-+{
-+	l->top_p -= bch2_keylist_front(l)->k.u64s;
-+
-+	memmove_u64s_down(l->keys,
-+			  bkey_next(l->keys),
-+			  bch2_keylist_u64s(l));
-+}
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+void bch2_verify_keylist_sorted(struct keylist *l)
-+{
-+	struct bkey_i *k;
-+
-+	for_each_keylist_key(l, k)
-+		BUG_ON(bkey_next(k) != l->top &&
-+		       bkey_cmp(k->k.p, bkey_next(k)->k.p) >= 0);
-+}
-+#endif
-diff --git a/fs/bcachefs/keylist.h b/fs/bcachefs/keylist.h
-new file mode 100644
-index 000000000000..195799bb20bc
---- /dev/null
-+++ b/fs/bcachefs/keylist.h
-@@ -0,0 +1,76 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_KEYLIST_H
-+#define _BCACHEFS_KEYLIST_H
-+
-+#include "keylist_types.h"
-+
-+int bch2_keylist_realloc(struct keylist *, u64 *, size_t, size_t);
-+void bch2_keylist_add_in_order(struct keylist *, struct bkey_i *);
-+void bch2_keylist_pop_front(struct keylist *);
-+
-+static inline void bch2_keylist_init(struct keylist *l, u64 *inline_keys)
-+{
-+	l->top_p = l->keys_p = inline_keys;
-+}
-+
-+static inline void bch2_keylist_free(struct keylist *l, u64 *inline_keys)
-+{
-+	if (l->keys_p != inline_keys)
-+		kfree(l->keys_p);
-+	bch2_keylist_init(l, inline_keys);
-+}
-+
-+static inline void bch2_keylist_push(struct keylist *l)
-+{
-+	l->top = bkey_next(l->top);
-+}
-+
-+static inline void bch2_keylist_add(struct keylist *l, const struct bkey_i *k)
-+{
-+	bkey_copy(l->top, k);
-+	bch2_keylist_push(l);
-+}
-+
-+static inline bool bch2_keylist_empty(struct keylist *l)
-+{
-+	return l->top == l->keys;
-+}
-+
-+static inline size_t bch2_keylist_u64s(struct keylist *l)
-+{
-+	return l->top_p - l->keys_p;
-+}
-+
-+static inline size_t bch2_keylist_bytes(struct keylist *l)
-+{
-+	return bch2_keylist_u64s(l) * sizeof(u64);
-+}
-+
-+static inline struct bkey_i *bch2_keylist_front(struct keylist *l)
-+{
-+	return l->keys;
-+}
-+
-+#define for_each_keylist_key(_keylist, _k)			\
-+	for (_k = (_keylist)->keys;				\
-+	     _k != (_keylist)->top;				\
-+	     _k = bkey_next(_k))
-+
-+static inline u64 keylist_sectors(struct keylist *keys)
-+{
-+	struct bkey_i *k;
-+	u64 ret = 0;
-+
-+	for_each_keylist_key(keys, k)
-+		ret += k->k.size;
-+
-+	return ret;
-+}
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+void bch2_verify_keylist_sorted(struct keylist *);
-+#else
-+static inline void bch2_verify_keylist_sorted(struct keylist *l) {}
-+#endif
-+
-+#endif /* _BCACHEFS_KEYLIST_H */
-diff --git a/fs/bcachefs/keylist_types.h b/fs/bcachefs/keylist_types.h
-new file mode 100644
-index 000000000000..4b3ff7d8a875
---- /dev/null
-+++ b/fs/bcachefs/keylist_types.h
-@@ -0,0 +1,16 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_KEYLIST_TYPES_H
-+#define _BCACHEFS_KEYLIST_TYPES_H
-+
-+struct keylist {
-+	union {
-+		struct bkey_i		*keys;
-+		u64			*keys_p;
-+	};
-+	union {
-+		struct bkey_i		*top;
-+		u64			*top_p;
-+	};
-+};
-+
-+#endif /* _BCACHEFS_KEYLIST_TYPES_H */
-diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c
-new file mode 100644
-index 000000000000..96c8690adc5b
---- /dev/null
-+++ b/fs/bcachefs/migrate.c
-@@ -0,0 +1,170 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Code for moving data off a device.
-+ */
-+
-+#include "bcachefs.h"
-+#include "bkey_on_stack.h"
-+#include "btree_update.h"
-+#include "btree_update_interior.h"
-+#include "buckets.h"
-+#include "extents.h"
-+#include "io.h"
-+#include "journal.h"
-+#include "keylist.h"
-+#include "migrate.h"
-+#include "move.h"
-+#include "replicas.h"
-+#include "super-io.h"
-+
-+static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s k,
-+			 unsigned dev_idx, int flags, bool metadata)
-+{
-+	unsigned replicas = metadata ? c->opts.metadata_replicas : c->opts.data_replicas;
-+	unsigned lost = metadata ? BCH_FORCE_IF_METADATA_LOST : BCH_FORCE_IF_DATA_LOST;
-+	unsigned degraded = metadata ? BCH_FORCE_IF_METADATA_DEGRADED : BCH_FORCE_IF_DATA_DEGRADED;
-+	unsigned nr_good;
-+
-+	bch2_bkey_drop_device(k, dev_idx);
-+
-+	nr_good = bch2_bkey_durability(c, k.s_c);
-+	if ((!nr_good && !(flags & lost)) ||
-+	    (nr_good < replicas && !(flags & degraded)))
-+		return -EINVAL;
-+
-+	return 0;
-+}
-+
-+static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags,
-+				   enum btree_id btree_id)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct bkey_on_stack sk;
-+	int ret = 0;
-+
-+	bkey_on_stack_init(&sk);
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, btree_id, POS_MIN,
-+				   BTREE_ITER_PREFETCH);
-+
-+	while ((k = bch2_btree_iter_peek(iter)).k &&
-+	       !(ret = bkey_err(k))) {
-+		if (!bch2_bkey_has_device(k, dev_idx)) {
-+			bch2_btree_iter_next(iter);
-+			continue;
-+		}
-+
-+		bkey_on_stack_reassemble(&sk, c, k);
-+
-+		ret = drop_dev_ptrs(c, bkey_i_to_s(sk.k),
-+				    dev_idx, flags, false);
-+		if (ret)
-+			break;
-+
-+		/*
-+		 * If the new extent no longer has any pointers, bch2_extent_normalize()
-+		 * will do the appropriate thing with it (turning it into a
-+		 * KEY_TYPE_error key, or just a discard if it was a cached extent)
-+		 */
-+		bch2_extent_normalize(c, bkey_i_to_s(sk.k));
-+
-+		bch2_btree_iter_set_pos(iter, bkey_start_pos(&sk.k->k));
-+
-+		bch2_trans_update(&trans, iter, sk.k, 0);
-+
-+		ret = bch2_trans_commit(&trans, NULL, NULL,
-+					BTREE_INSERT_NOFAIL);
-+
-+		/*
-+		 * don't want to leave ret == -EINTR, since if we raced and
-+		 * something else overwrote the key we could spuriously return
-+		 * -EINTR below:
-+		 */
-+		if (ret == -EINTR)
-+			ret = 0;
-+		if (ret)
-+			break;
-+	}
-+
-+	ret = bch2_trans_exit(&trans) ?: ret;
-+	bkey_on_stack_exit(&sk, c);
-+
-+	BUG_ON(ret == -EINTR);
-+
-+	return ret;
-+}
-+
-+static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
-+{
-+	return  __bch2_dev_usrdata_drop(c, dev_idx, flags, BTREE_ID_EXTENTS) ?:
-+		__bch2_dev_usrdata_drop(c, dev_idx, flags, BTREE_ID_REFLINK);
-+}
-+
-+static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct closure cl;
-+	struct btree *b;
-+	unsigned id;
-+	int ret;
-+
-+	/* don't handle this yet: */
-+	if (flags & BCH_FORCE_IF_METADATA_LOST)
-+		return -EINVAL;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+	closure_init_stack(&cl);
-+
-+	for (id = 0; id < BTREE_ID_NR; id++) {
-+		for_each_btree_node(&trans, iter, id, POS_MIN,
-+				    BTREE_ITER_PREFETCH, b) {
-+			__BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp;
-+retry:
-+			if (!bch2_bkey_has_device(bkey_i_to_s_c(&b->key),
-+						  dev_idx))
-+				continue;
-+
-+			bkey_copy(&tmp.k, &b->key);
-+
-+			ret = drop_dev_ptrs(c, bkey_i_to_s(&tmp.k),
-+					    dev_idx, flags, true);
-+			if (ret) {
-+				bch_err(c, "Cannot drop device without losing data");
-+				goto err;
-+			}
-+
-+			ret = bch2_btree_node_update_key(c, iter, b, &tmp.k);
-+			if (ret == -EINTR) {
-+				b = bch2_btree_iter_peek_node(iter);
-+				goto retry;
-+			}
-+			if (ret) {
-+				bch_err(c, "Error updating btree node key: %i", ret);
-+				goto err;
-+			}
-+		}
-+		bch2_trans_iter_free(&trans, iter);
-+	}
-+
-+	/* flush relevant btree updates */
-+	closure_wait_event(&c->btree_interior_update_wait,
-+			   !bch2_btree_interior_updates_nr_pending(c));
-+
-+	ret = 0;
-+err:
-+	ret = bch2_trans_exit(&trans) ?: ret;
-+
-+	BUG_ON(ret == -EINTR);
-+
-+	return ret;
-+}
-+
-+int bch2_dev_data_drop(struct bch_fs *c, unsigned dev_idx, int flags)
-+{
-+	return bch2_dev_usrdata_drop(c, dev_idx, flags) ?:
-+		bch2_dev_metadata_drop(c, dev_idx, flags);
-+}
-diff --git a/fs/bcachefs/migrate.h b/fs/bcachefs/migrate.h
-new file mode 100644
-index 000000000000..027efaa0d575
---- /dev/null
-+++ b/fs/bcachefs/migrate.h
-@@ -0,0 +1,7 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_MIGRATE_H
-+#define _BCACHEFS_MIGRATE_H
-+
-+int bch2_dev_data_drop(struct bch_fs *, unsigned, int);
-+
-+#endif /* _BCACHEFS_MIGRATE_H */
-diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
-new file mode 100644
-index 000000000000..b42350f9e9fb
---- /dev/null
-+++ b/fs/bcachefs/move.c
-@@ -0,0 +1,815 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "alloc_foreground.h"
-+#include "bkey_on_stack.h"
-+#include "btree_gc.h"
-+#include "btree_update.h"
-+#include "btree_update_interior.h"
-+#include "buckets.h"
-+#include "disk_groups.h"
-+#include "inode.h"
-+#include "io.h"
-+#include "journal_reclaim.h"
-+#include "move.h"
-+#include "replicas.h"
-+#include "super-io.h"
-+#include "keylist.h"
-+
-+#include <linux/ioprio.h>
-+#include <linux/kthread.h>
-+
-+#include <trace/events/bcachefs.h>
-+
-+#define SECTORS_IN_FLIGHT_PER_DEVICE	2048
-+
-+struct moving_io {
-+	struct list_head	list;
-+	struct closure		cl;
-+	bool			read_completed;
-+
-+	unsigned		read_sectors;
-+	unsigned		write_sectors;
-+
-+	struct bch_read_bio	rbio;
-+
-+	struct migrate_write	write;
-+	/* Must be last since it is variable size */
-+	struct bio_vec		bi_inline_vecs[0];
-+};
-+
-+struct moving_context {
-+	/* Closure for waiting on all reads and writes to complete */
-+	struct closure		cl;
-+
-+	struct bch_move_stats	*stats;
-+
-+	struct list_head	reads;
-+
-+	/* in flight sectors: */
-+	atomic_t		read_sectors;
-+	atomic_t		write_sectors;
-+
-+	wait_queue_head_t	wait;
-+};
-+
-+static int bch2_migrate_index_update(struct bch_write_op *op)
-+{
-+	struct bch_fs *c = op->c;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct migrate_write *m =
-+		container_of(op, struct migrate_write, op);
-+	struct keylist *keys = &op->insert_keys;
-+	int ret = 0;
-+
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, m->btree_id,
-+				   bkey_start_pos(&bch2_keylist_front(keys)->k),
-+				   BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
-+
-+	while (1) {
-+		struct bkey_s_c k;
-+		struct bkey_i *insert;
-+		struct bkey_i_extent *new;
-+		BKEY_PADDED(k) _new, _insert;
-+		const union bch_extent_entry *entry;
-+		struct extent_ptr_decoded p;
-+		bool did_work = false;
-+		int nr;
-+
-+		bch2_trans_reset(&trans, 0);
-+
-+		k = bch2_btree_iter_peek_slot(iter);
-+		ret = bkey_err(k);
-+		if (ret) {
-+			if (ret == -EINTR)
-+				continue;
-+			break;
-+		}
-+
-+		new = bkey_i_to_extent(bch2_keylist_front(keys));
-+
-+		if (bversion_cmp(k.k->version, new->k.version) ||
-+		    !bch2_bkey_matches_ptr(c, k, m->ptr, m->offset))
-+			goto nomatch;
-+
-+		if (m->data_cmd == DATA_REWRITE &&
-+		    !bch2_bkey_has_device(k, m->data_opts.rewrite_dev))
-+			goto nomatch;
-+
-+		bkey_reassemble(&_insert.k, k);
-+		insert = &_insert.k;
-+
-+		bkey_copy(&_new.k, bch2_keylist_front(keys));
-+		new = bkey_i_to_extent(&_new.k);
-+		bch2_cut_front(iter->pos, &new->k_i);
-+
-+		bch2_cut_front(iter->pos,	insert);
-+		bch2_cut_back(new->k.p,		insert);
-+		bch2_cut_back(insert->k.p,	&new->k_i);
-+
-+		if (m->data_cmd == DATA_REWRITE)
-+			bch2_bkey_drop_device(bkey_i_to_s(insert),
-+					      m->data_opts.rewrite_dev);
-+
-+		extent_for_each_ptr_decode(extent_i_to_s(new), p, entry) {
-+			if (bch2_bkey_has_device(bkey_i_to_s_c(insert), p.ptr.dev)) {
-+				/*
-+				 * raced with another move op? extent already
-+				 * has a pointer to the device we just wrote
-+				 * data to
-+				 */
-+				continue;
-+			}
-+
-+			bch2_extent_ptr_decoded_append(insert, &p);
-+			did_work = true;
-+		}
-+
-+		if (!did_work)
-+			goto nomatch;
-+
-+		bch2_bkey_narrow_crcs(insert,
-+				(struct bch_extent_crc_unpacked) { 0 });
-+		bch2_extent_normalize(c, bkey_i_to_s(insert));
-+		bch2_bkey_mark_replicas_cached(c, bkey_i_to_s(insert),
-+					       op->opts.background_target,
-+					       op->opts.data_replicas);
-+
-+		/*
-+		 * If we're not fully overwriting @k, and it's compressed, we
-+		 * need a reservation for all the pointers in @insert
-+		 */
-+		nr = bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(insert)) -
-+			 m->nr_ptrs_reserved;
-+
-+		if (insert->k.size < k.k->size &&
-+		    bch2_bkey_sectors_compressed(k) &&
-+		    nr > 0) {
-+			ret = bch2_disk_reservation_add(c, &op->res,
-+					keylist_sectors(keys) * nr, 0);
-+			if (ret)
-+				goto out;
-+
-+			m->nr_ptrs_reserved += nr;
-+			goto next;
-+		}
-+
-+		bch2_trans_update(&trans, iter, insert, 0);
-+
-+		ret = bch2_trans_commit(&trans, &op->res,
-+				op_journal_seq(op),
-+				BTREE_INSERT_NOFAIL|
-+				BTREE_INSERT_USE_RESERVE|
-+				m->data_opts.btree_insert_flags);
-+		if (!ret)
-+			atomic_long_inc(&c->extent_migrate_done);
-+		if (ret == -EINTR)
-+			ret = 0;
-+		if (ret)
-+			break;
-+next:
-+		while (bkey_cmp(iter->pos, bch2_keylist_front(keys)->k.p) >= 0) {
-+			bch2_keylist_pop_front(keys);
-+			if (bch2_keylist_empty(keys))
-+				goto out;
-+		}
-+		continue;
-+nomatch:
-+		if (m->ctxt) {
-+			BUG_ON(k.k->p.offset <= iter->pos.offset);
-+			atomic64_inc(&m->ctxt->stats->keys_raced);
-+			atomic64_add(k.k->p.offset - iter->pos.offset,
-+				     &m->ctxt->stats->sectors_raced);
-+		}
-+		atomic_long_inc(&c->extent_migrate_raced);
-+		trace_move_race(&new->k);
-+		bch2_btree_iter_next_slot(iter);
-+		goto next;
-+	}
-+out:
-+	bch2_trans_exit(&trans);
-+	BUG_ON(ret == -EINTR);
-+	return ret;
-+}
-+
-+void bch2_migrate_read_done(struct migrate_write *m, struct bch_read_bio *rbio)
-+{
-+	/* write bio must own pages: */
-+	BUG_ON(!m->op.wbio.bio.bi_vcnt);
-+
-+	m->ptr		= rbio->pick.ptr;
-+	m->offset	= rbio->pos.offset - rbio->pick.crc.offset;
-+	m->op.devs_have	= rbio->devs_have;
-+	m->op.pos	= rbio->pos;
-+	m->op.version	= rbio->version;
-+	m->op.crc	= rbio->pick.crc;
-+	m->op.wbio.bio.bi_iter.bi_size = m->op.crc.compressed_size << 9;
-+
-+	if (bch2_csum_type_is_encryption(m->op.crc.csum_type)) {
-+		m->op.nonce	= m->op.crc.nonce + m->op.crc.offset;
-+		m->op.csum_type = m->op.crc.csum_type;
-+	}
-+
-+	if (m->data_cmd == DATA_REWRITE)
-+		bch2_dev_list_drop_dev(&m->op.devs_have, m->data_opts.rewrite_dev);
-+}
-+
-+int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m,
-+			    struct write_point_specifier wp,
-+			    struct bch_io_opts io_opts,
-+			    enum data_cmd data_cmd,
-+			    struct data_opts data_opts,
-+			    enum btree_id btree_id,
-+			    struct bkey_s_c k)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const union bch_extent_entry *entry;
-+	struct extent_ptr_decoded p;
-+	int ret;
-+
-+	m->btree_id	= btree_id;
-+	m->data_cmd	= data_cmd;
-+	m->data_opts	= data_opts;
-+	m->nr_ptrs_reserved = 0;
-+
-+	bch2_write_op_init(&m->op, c, io_opts);
-+
-+	if (!bch2_bkey_is_incompressible(k))
-+		m->op.compression_type =
-+			bch2_compression_opt_to_type[io_opts.background_compression ?:
-+						     io_opts.compression];
-+	else
-+		m->op.incompressible = true;
-+
-+	m->op.target	= data_opts.target,
-+	m->op.write_point = wp;
-+
-+	if (m->data_opts.btree_insert_flags & BTREE_INSERT_USE_RESERVE)
-+		m->op.alloc_reserve = RESERVE_MOVINGGC;
-+
-+	m->op.flags |= BCH_WRITE_ONLY_SPECIFIED_DEVS|
-+		BCH_WRITE_PAGES_STABLE|
-+		BCH_WRITE_PAGES_OWNED|
-+		BCH_WRITE_DATA_ENCODED|
-+		BCH_WRITE_FROM_INTERNAL;
-+
-+	m->op.nr_replicas	= 1;
-+	m->op.nr_replicas_required = 1;
-+	m->op.index_update_fn	= bch2_migrate_index_update;
-+
-+	switch (data_cmd) {
-+	case DATA_ADD_REPLICAS: {
-+		/*
-+		 * DATA_ADD_REPLICAS is used for moving data to a different
-+		 * device in the background, and due to compression the new copy
-+		 * might take up more space than the old copy:
-+		 */
-+#if 0
-+		int nr = (int) io_opts.data_replicas -
-+			bch2_bkey_nr_ptrs_allocated(k);
-+#endif
-+		int nr = (int) io_opts.data_replicas;
-+
-+		if (nr > 0) {
-+			m->op.nr_replicas = m->nr_ptrs_reserved = nr;
-+
-+			ret = bch2_disk_reservation_get(c, &m->op.res,
-+					k.k->size, m->op.nr_replicas, 0);
-+			if (ret)
-+				return ret;
-+		}
-+		break;
-+	}
-+	case DATA_REWRITE: {
-+		unsigned compressed_sectors = 0;
-+
-+		bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
-+			if (!p.ptr.cached &&
-+			    crc_is_compressed(p.crc) &&
-+			    bch2_dev_in_target(c, p.ptr.dev, data_opts.target))
-+				compressed_sectors += p.crc.compressed_size;
-+
-+		if (compressed_sectors) {
-+			ret = bch2_disk_reservation_add(c, &m->op.res,
-+					compressed_sectors,
-+					BCH_DISK_RESERVATION_NOFAIL);
-+			if (ret)
-+				return ret;
-+		}
-+		break;
-+	}
-+	case DATA_PROMOTE:
-+		m->op.flags	|= BCH_WRITE_ALLOC_NOWAIT;
-+		m->op.flags	|= BCH_WRITE_CACHED;
-+		break;
-+	default:
-+		BUG();
-+	}
-+
-+	return 0;
-+}
-+
-+static void move_free(struct closure *cl)
-+{
-+	struct moving_io *io = container_of(cl, struct moving_io, cl);
-+	struct moving_context *ctxt = io->write.ctxt;
-+	struct bvec_iter_all iter;
-+	struct bio_vec *bv;
-+
-+	bch2_disk_reservation_put(io->write.op.c, &io->write.op.res);
-+
-+	bio_for_each_segment_all(bv, &io->write.op.wbio.bio, iter)
-+		if (bv->bv_page)
-+			__free_page(bv->bv_page);
-+
-+	wake_up(&ctxt->wait);
-+
-+	kfree(io);
-+}
-+
-+static void move_write_done(struct closure *cl)
-+{
-+	struct moving_io *io = container_of(cl, struct moving_io, cl);
-+
-+	atomic_sub(io->write_sectors, &io->write.ctxt->write_sectors);
-+	closure_return_with_destructor(cl, move_free);
-+}
-+
-+static void move_write(struct closure *cl)
-+{
-+	struct moving_io *io = container_of(cl, struct moving_io, cl);
-+
-+	if (unlikely(io->rbio.bio.bi_status || io->rbio.hole)) {
-+		closure_return_with_destructor(cl, move_free);
-+		return;
-+	}
-+
-+	bch2_migrate_read_done(&io->write, &io->rbio);
-+
-+	atomic_add(io->write_sectors, &io->write.ctxt->write_sectors);
-+	closure_call(&io->write.op.cl, bch2_write, NULL, cl);
-+	continue_at(cl, move_write_done, NULL);
-+}
-+
-+static inline struct moving_io *next_pending_write(struct moving_context *ctxt)
-+{
-+	struct moving_io *io =
-+		list_first_entry_or_null(&ctxt->reads, struct moving_io, list);
-+
-+	return io && io->read_completed ? io : NULL;
-+}
-+
-+static void move_read_endio(struct bio *bio)
-+{
-+	struct moving_io *io = container_of(bio, struct moving_io, rbio.bio);
-+	struct moving_context *ctxt = io->write.ctxt;
-+
-+	atomic_sub(io->read_sectors, &ctxt->read_sectors);
-+	io->read_completed = true;
-+
-+	if (next_pending_write(ctxt))
-+		wake_up(&ctxt->wait);
-+
-+	closure_put(&ctxt->cl);
-+}
-+
-+static void do_pending_writes(struct moving_context *ctxt)
-+{
-+	struct moving_io *io;
-+
-+	while ((io = next_pending_write(ctxt))) {
-+		list_del(&io->list);
-+		closure_call(&io->cl, move_write, NULL, &ctxt->cl);
-+	}
-+}
-+
-+#define move_ctxt_wait_event(_ctxt, _cond)			\
-+do {								\
-+	do_pending_writes(_ctxt);				\
-+								\
-+	if (_cond)						\
-+		break;						\
-+	__wait_event((_ctxt)->wait,				\
-+		     next_pending_write(_ctxt) || (_cond));	\
-+} while (1)
-+
-+static void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt)
-+{
-+	unsigned sectors_pending = atomic_read(&ctxt->write_sectors);
-+
-+	move_ctxt_wait_event(ctxt,
-+		!atomic_read(&ctxt->write_sectors) ||
-+		atomic_read(&ctxt->write_sectors) != sectors_pending);
-+}
-+
-+static int bch2_move_extent(struct bch_fs *c,
-+			    struct moving_context *ctxt,
-+			    struct write_point_specifier wp,
-+			    struct bch_io_opts io_opts,
-+			    enum btree_id btree_id,
-+			    struct bkey_s_c k,
-+			    enum data_cmd data_cmd,
-+			    struct data_opts data_opts)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	struct moving_io *io;
-+	const union bch_extent_entry *entry;
-+	struct extent_ptr_decoded p;
-+	unsigned sectors = k.k->size, pages;
-+	int ret = -ENOMEM;
-+
-+	move_ctxt_wait_event(ctxt,
-+		atomic_read(&ctxt->write_sectors) <
-+		SECTORS_IN_FLIGHT_PER_DEVICE);
-+
-+	move_ctxt_wait_event(ctxt,
-+		atomic_read(&ctxt->read_sectors) <
-+		SECTORS_IN_FLIGHT_PER_DEVICE);
-+
-+	/* write path might have to decompress data: */
-+	bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
-+		sectors = max_t(unsigned, sectors, p.crc.uncompressed_size);
-+
-+	pages = DIV_ROUND_UP(sectors, PAGE_SECTORS);
-+	io = kzalloc(sizeof(struct moving_io) +
-+		     sizeof(struct bio_vec) * pages, GFP_KERNEL);
-+	if (!io)
-+		goto err;
-+
-+	io->write.ctxt		= ctxt;
-+	io->read_sectors	= k.k->size;
-+	io->write_sectors	= k.k->size;
-+
-+	bio_init(&io->write.op.wbio.bio, io->bi_inline_vecs, pages);
-+	bio_set_prio(&io->write.op.wbio.bio,
-+		     IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
-+
-+	if (bch2_bio_alloc_pages(&io->write.op.wbio.bio, sectors << 9,
-+				 GFP_KERNEL))
-+		goto err_free;
-+
-+	io->rbio.c		= c;
-+	io->rbio.opts		= io_opts;
-+	bio_init(&io->rbio.bio, io->bi_inline_vecs, pages);
-+	io->rbio.bio.bi_vcnt = pages;
-+	bio_set_prio(&io->rbio.bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
-+	io->rbio.bio.bi_iter.bi_size = sectors << 9;
-+
-+	bio_set_op_attrs(&io->rbio.bio, REQ_OP_READ, 0);
-+	io->rbio.bio.bi_iter.bi_sector	= bkey_start_offset(k.k);
-+	io->rbio.bio.bi_end_io		= move_read_endio;
-+
-+	ret = bch2_migrate_write_init(c, &io->write, wp, io_opts,
-+				      data_cmd, data_opts, btree_id, k);
-+	if (ret)
-+		goto err_free_pages;
-+
-+	atomic64_inc(&ctxt->stats->keys_moved);
-+	atomic64_add(k.k->size, &ctxt->stats->sectors_moved);
-+
-+	trace_move_extent(k.k);
-+
-+	atomic_add(io->read_sectors, &ctxt->read_sectors);
-+	list_add_tail(&io->list, &ctxt->reads);
-+
-+	/*
-+	 * dropped by move_read_endio() - guards against use after free of
-+	 * ctxt when doing wakeup
-+	 */
-+	closure_get(&ctxt->cl);
-+	bch2_read_extent(c, &io->rbio, k, 0,
-+			 BCH_READ_NODECODE|
-+			 BCH_READ_LAST_FRAGMENT);
-+	return 0;
-+err_free_pages:
-+	bio_free_pages(&io->write.op.wbio.bio);
-+err_free:
-+	kfree(io);
-+err:
-+	trace_move_alloc_fail(k.k);
-+	return ret;
-+}
-+
-+static int __bch2_move_data(struct bch_fs *c,
-+		struct moving_context *ctxt,
-+		struct bch_ratelimit *rate,
-+		struct write_point_specifier wp,
-+		struct bpos start,
-+		struct bpos end,
-+		move_pred_fn pred, void *arg,
-+		struct bch_move_stats *stats,
-+		enum btree_id btree_id)
-+{
-+	bool kthread = (current->flags & PF_KTHREAD) != 0;
-+	struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
-+	struct bkey_on_stack sk;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct data_opts data_opts;
-+	enum data_cmd data_cmd;
-+	u64 delay, cur_inum = U64_MAX;
-+	int ret = 0, ret2;
-+
-+	bkey_on_stack_init(&sk);
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	stats->data_type = BCH_DATA_USER;
-+	stats->btree_id	= btree_id;
-+	stats->pos	= POS_MIN;
-+
-+	iter = bch2_trans_get_iter(&trans, btree_id, start,
-+				   BTREE_ITER_PREFETCH);
-+
-+	if (rate)
-+		bch2_ratelimit_reset(rate);
-+
-+	while (1) {
-+		do {
-+			delay = rate ? bch2_ratelimit_delay(rate) : 0;
-+
-+			if (delay) {
-+				bch2_trans_unlock(&trans);
-+				set_current_state(TASK_INTERRUPTIBLE);
-+			}
-+
-+			if (kthread && (ret = kthread_should_stop())) {
-+				__set_current_state(TASK_RUNNING);
-+				goto out;
-+			}
-+
-+			if (delay)
-+				schedule_timeout(delay);
-+
-+			if (unlikely(freezing(current))) {
-+				bch2_trans_unlock(&trans);
-+				move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads));
-+				try_to_freeze();
-+			}
-+		} while (delay);
-+peek:
-+		k = bch2_btree_iter_peek(iter);
-+
-+		stats->pos = iter->pos;
-+
-+		if (!k.k)
-+			break;
-+		ret = bkey_err(k);
-+		if (ret)
-+			break;
-+		if (bkey_cmp(bkey_start_pos(k.k), end) >= 0)
-+			break;
-+
-+		if (!bkey_extent_is_direct_data(k.k))
-+			goto next_nondata;
-+
-+		if (btree_id == BTREE_ID_EXTENTS &&
-+		    cur_inum != k.k->p.inode) {
-+			struct bch_inode_unpacked inode;
-+
-+			/* don't hold btree locks while looking up inode: */
-+			bch2_trans_unlock(&trans);
-+
-+			io_opts = bch2_opts_to_inode_opts(c->opts);
-+			if (!bch2_inode_find_by_inum(c, k.k->p.inode, &inode))
-+				bch2_io_opts_apply(&io_opts, bch2_inode_opts_get(&inode));
-+			cur_inum = k.k->p.inode;
-+			goto peek;
-+		}
-+
-+		switch ((data_cmd = pred(c, arg, k, &io_opts, &data_opts))) {
-+		case DATA_SKIP:
-+			goto next;
-+		case DATA_SCRUB:
-+			BUG();
-+		case DATA_ADD_REPLICAS:
-+		case DATA_REWRITE:
-+		case DATA_PROMOTE:
-+			break;
-+		default:
-+			BUG();
-+		}
-+
-+		/* unlock before doing IO: */
-+		bkey_on_stack_reassemble(&sk, c, k);
-+		k = bkey_i_to_s_c(sk.k);
-+		bch2_trans_unlock(&trans);
-+
-+		ret2 = bch2_move_extent(c, ctxt, wp, io_opts, btree_id, k,
-+					data_cmd, data_opts);
-+		if (ret2) {
-+			if (ret2 == -ENOMEM) {
-+				/* memory allocation failure, wait for some IO to finish */
-+				bch2_move_ctxt_wait_for_io(ctxt);
-+				continue;
-+			}
-+
-+			/* XXX signal failure */
-+			goto next;
-+		}
-+
-+		if (rate)
-+			bch2_ratelimit_increment(rate, k.k->size);
-+next:
-+		atomic64_add(k.k->size * bch2_bkey_nr_ptrs_allocated(k),
-+			     &stats->sectors_seen);
-+next_nondata:
-+		bch2_btree_iter_next(iter);
-+		bch2_trans_cond_resched(&trans);
-+	}
-+out:
-+	ret = bch2_trans_exit(&trans) ?: ret;
-+	bkey_on_stack_exit(&sk, c);
-+
-+	return ret;
-+}
-+
-+int bch2_move_data(struct bch_fs *c,
-+		   struct bch_ratelimit *rate,
-+		   struct write_point_specifier wp,
-+		   struct bpos start,
-+		   struct bpos end,
-+		   move_pred_fn pred, void *arg,
-+		   struct bch_move_stats *stats)
-+{
-+	struct moving_context ctxt = { .stats = stats };
-+	int ret;
-+
-+	closure_init_stack(&ctxt.cl);
-+	INIT_LIST_HEAD(&ctxt.reads);
-+	init_waitqueue_head(&ctxt.wait);
-+
-+	stats->data_type = BCH_DATA_USER;
-+
-+	ret =   __bch2_move_data(c, &ctxt, rate, wp, start, end,
-+				 pred, arg, stats, BTREE_ID_EXTENTS) ?:
-+		__bch2_move_data(c, &ctxt, rate, wp, start, end,
-+				 pred, arg, stats, BTREE_ID_REFLINK);
-+
-+	move_ctxt_wait_event(&ctxt, list_empty(&ctxt.reads));
-+	closure_sync(&ctxt.cl);
-+
-+	EBUG_ON(atomic_read(&ctxt.write_sectors));
-+
-+	trace_move_data(c,
-+			atomic64_read(&stats->sectors_moved),
-+			atomic64_read(&stats->keys_moved));
-+
-+	return ret;
-+}
-+
-+static int bch2_move_btree(struct bch_fs *c,
-+			   move_pred_fn pred,
-+			   void *arg,
-+			   struct bch_move_stats *stats)
-+{
-+	struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct btree *b;
-+	unsigned id;
-+	struct data_opts data_opts;
-+	enum data_cmd cmd;
-+	int ret = 0;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	stats->data_type = BCH_DATA_BTREE;
-+
-+	for (id = 0; id < BTREE_ID_NR; id++) {
-+		stats->btree_id = id;
-+
-+		for_each_btree_node(&trans, iter, id, POS_MIN,
-+				    BTREE_ITER_PREFETCH, b) {
-+			stats->pos = iter->pos;
-+
-+			switch ((cmd = pred(c, arg,
-+					    bkey_i_to_s_c(&b->key),
-+					    &io_opts, &data_opts))) {
-+			case DATA_SKIP:
-+				goto next;
-+			case DATA_SCRUB:
-+				BUG();
-+			case DATA_ADD_REPLICAS:
-+			case DATA_REWRITE:
-+				break;
-+			default:
-+				BUG();
-+			}
-+
-+			ret = bch2_btree_node_rewrite(c, iter,
-+					b->data->keys.seq, 0) ?: ret;
-+next:
-+			bch2_trans_cond_resched(&trans);
-+		}
-+
-+		ret = bch2_trans_iter_free(&trans, iter) ?: ret;
-+	}
-+
-+	bch2_trans_exit(&trans);
-+
-+	return ret;
-+}
-+
-+#if 0
-+static enum data_cmd scrub_pred(struct bch_fs *c, void *arg,
-+				struct bkey_s_c k,
-+				struct bch_io_opts *io_opts,
-+				struct data_opts *data_opts)
-+{
-+	return DATA_SCRUB;
-+}
-+#endif
-+
-+static enum data_cmd rereplicate_pred(struct bch_fs *c, void *arg,
-+				      struct bkey_s_c k,
-+				      struct bch_io_opts *io_opts,
-+				      struct data_opts *data_opts)
-+{
-+	unsigned nr_good = bch2_bkey_durability(c, k);
-+	unsigned replicas = 0;
-+
-+	switch (k.k->type) {
-+	case KEY_TYPE_btree_ptr:
-+		replicas = c->opts.metadata_replicas;
-+		break;
-+	case KEY_TYPE_extent:
-+		replicas = io_opts->data_replicas;
-+		break;
-+	}
-+
-+	if (!nr_good || nr_good >= replicas)
-+		return DATA_SKIP;
-+
-+	data_opts->target		= 0;
-+	data_opts->btree_insert_flags	= 0;
-+	return DATA_ADD_REPLICAS;
-+}
-+
-+static enum data_cmd migrate_pred(struct bch_fs *c, void *arg,
-+				  struct bkey_s_c k,
-+				  struct bch_io_opts *io_opts,
-+				  struct data_opts *data_opts)
-+{
-+	struct bch_ioctl_data *op = arg;
-+
-+	if (!bch2_bkey_has_device(k, op->migrate.dev))
-+		return DATA_SKIP;
-+
-+	data_opts->target		= 0;
-+	data_opts->btree_insert_flags	= 0;
-+	data_opts->rewrite_dev		= op->migrate.dev;
-+	return DATA_REWRITE;
-+}
-+
-+int bch2_data_job(struct bch_fs *c,
-+		  struct bch_move_stats *stats,
-+		  struct bch_ioctl_data op)
-+{
-+	int ret = 0;
-+
-+	switch (op.op) {
-+	case BCH_DATA_OP_REREPLICATE:
-+		stats->data_type = BCH_DATA_JOURNAL;
-+		ret = bch2_journal_flush_device_pins(&c->journal, -1);
-+
-+		ret = bch2_move_btree(c, rereplicate_pred, c, stats) ?: ret;
-+
-+		closure_wait_event(&c->btree_interior_update_wait,
-+				   !bch2_btree_interior_updates_nr_pending(c));
-+
-+		ret = bch2_replicas_gc2(c) ?: ret;
-+
-+		ret = bch2_move_data(c, NULL,
-+				     writepoint_hashed((unsigned long) current),
-+				     op.start,
-+				     op.end,
-+				     rereplicate_pred, c, stats) ?: ret;
-+		ret = bch2_replicas_gc2(c) ?: ret;
-+		break;
-+	case BCH_DATA_OP_MIGRATE:
-+		if (op.migrate.dev >= c->sb.nr_devices)
-+			return -EINVAL;
-+
-+		stats->data_type = BCH_DATA_JOURNAL;
-+		ret = bch2_journal_flush_device_pins(&c->journal, op.migrate.dev);
-+
-+		ret = bch2_move_btree(c, migrate_pred, &op, stats) ?: ret;
-+		ret = bch2_replicas_gc2(c) ?: ret;
-+
-+		ret = bch2_move_data(c, NULL,
-+				     writepoint_hashed((unsigned long) current),
-+				     op.start,
-+				     op.end,
-+				     migrate_pred, &op, stats) ?: ret;
-+		ret = bch2_replicas_gc2(c) ?: ret;
-+		break;
-+	default:
-+		ret = -EINVAL;
-+	}
-+
-+	return ret;
-+}
-diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h
-new file mode 100644
-index 000000000000..0acd1720d4f8
---- /dev/null
-+++ b/fs/bcachefs/move.h
-@@ -0,0 +1,64 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_MOVE_H
-+#define _BCACHEFS_MOVE_H
-+
-+#include "btree_iter.h"
-+#include "buckets.h"
-+#include "io_types.h"
-+#include "move_types.h"
-+
-+struct bch_read_bio;
-+struct moving_context;
-+
-+enum data_cmd {
-+	DATA_SKIP,
-+	DATA_SCRUB,
-+	DATA_ADD_REPLICAS,
-+	DATA_REWRITE,
-+	DATA_PROMOTE,
-+};
-+
-+struct data_opts {
-+	u16		target;
-+	unsigned	rewrite_dev;
-+	int		btree_insert_flags;
-+};
-+
-+struct migrate_write {
-+	enum btree_id		btree_id;
-+	enum data_cmd		data_cmd;
-+	struct data_opts	data_opts;
-+
-+	unsigned		nr_ptrs_reserved;
-+
-+	struct moving_context	*ctxt;
-+
-+	/* what we read: */
-+	struct bch_extent_ptr	ptr;
-+	u64			offset;
-+
-+	struct bch_write_op	op;
-+};
-+
-+void bch2_migrate_read_done(struct migrate_write *, struct bch_read_bio *);
-+int bch2_migrate_write_init(struct bch_fs *, struct migrate_write *,
-+			    struct write_point_specifier,
-+			    struct bch_io_opts,
-+			    enum data_cmd, struct data_opts,
-+			    enum btree_id, struct bkey_s_c);
-+
-+typedef enum data_cmd (*move_pred_fn)(struct bch_fs *, void *,
-+				struct bkey_s_c,
-+				struct bch_io_opts *, struct data_opts *);
-+
-+int bch2_move_data(struct bch_fs *, struct bch_ratelimit *,
-+		   struct write_point_specifier,
-+		   struct bpos, struct bpos,
-+		   move_pred_fn, void *,
-+		   struct bch_move_stats *);
-+
-+int bch2_data_job(struct bch_fs *,
-+		  struct bch_move_stats *,
-+		  struct bch_ioctl_data);
-+
-+#endif /* _BCACHEFS_MOVE_H */
-diff --git a/fs/bcachefs/move_types.h b/fs/bcachefs/move_types.h
-new file mode 100644
-index 000000000000..fc0de165af9f
---- /dev/null
-+++ b/fs/bcachefs/move_types.h
-@@ -0,0 +1,17 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_MOVE_TYPES_H
-+#define _BCACHEFS_MOVE_TYPES_H
-+
-+struct bch_move_stats {
-+	enum bch_data_type	data_type;
-+	enum btree_id		btree_id;
-+	struct bpos		pos;
-+
-+	atomic64_t		keys_moved;
-+	atomic64_t		keys_raced;
-+	atomic64_t		sectors_moved;
-+	atomic64_t		sectors_seen;
-+	atomic64_t		sectors_raced;
-+};
-+
-+#endif /* _BCACHEFS_MOVE_TYPES_H */
-diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c
-new file mode 100644
-index 000000000000..0a87cd7405dd
---- /dev/null
-+++ b/fs/bcachefs/movinggc.c
-@@ -0,0 +1,322 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Moving/copying garbage collector
-+ *
-+ * Copyright 2012 Google, Inc.
-+ */
-+
-+#include "bcachefs.h"
-+#include "alloc_foreground.h"
-+#include "btree_iter.h"
-+#include "btree_update.h"
-+#include "buckets.h"
-+#include "clock.h"
-+#include "disk_groups.h"
-+#include "extents.h"
-+#include "eytzinger.h"
-+#include "io.h"
-+#include "keylist.h"
-+#include "move.h"
-+#include "movinggc.h"
-+#include "super-io.h"
-+
-+#include <trace/events/bcachefs.h>
-+#include <linux/freezer.h>
-+#include <linux/kthread.h>
-+#include <linux/math64.h>
-+#include <linux/sched/task.h>
-+#include <linux/sort.h>
-+#include <linux/wait.h>
-+
-+/*
-+ * We can't use the entire copygc reserve in one iteration of copygc: we may
-+ * need the buckets we're freeing up to go back into the copygc reserve to make
-+ * forward progress, but if the copygc reserve is full they'll be available for
-+ * any allocation - and it's possible that in a given iteration, we free up most
-+ * of the buckets we're going to free before we allocate most of the buckets
-+ * we're going to allocate.
-+ *
-+ * If we only use half of the reserve per iteration, then in steady state we'll
-+ * always have room in the reserve for the buckets we're going to need in the
-+ * next iteration:
-+ */
-+#define COPYGC_BUCKETS_PER_ITER(ca)					\
-+	((ca)->free[RESERVE_MOVINGGC].size / 2)
-+
-+/*
-+ * Max sectors to move per iteration: Have to take into account internal
-+ * fragmentation from the multiple write points for each generation:
-+ */
-+#define COPYGC_SECTORS_PER_ITER(ca)					\
-+	((ca)->mi.bucket_size *	COPYGC_BUCKETS_PER_ITER(ca))
-+
-+static inline int sectors_used_cmp(copygc_heap *heap,
-+				   struct copygc_heap_entry l,
-+				   struct copygc_heap_entry r)
-+{
-+	return cmp_int(l.sectors, r.sectors);
-+}
-+
-+static int bucket_offset_cmp(const void *_l, const void *_r, size_t size)
-+{
-+	const struct copygc_heap_entry *l = _l;
-+	const struct copygc_heap_entry *r = _r;
-+
-+	return cmp_int(l->offset, r->offset);
-+}
-+
-+static bool __copygc_pred(struct bch_dev *ca,
-+			  struct bkey_s_c k)
-+{
-+	copygc_heap *h = &ca->copygc_heap;
-+	const struct bch_extent_ptr *ptr =
-+		bch2_bkey_has_device(k, ca->dev_idx);
-+
-+	if (ptr) {
-+		struct copygc_heap_entry search = { .offset = ptr->offset };
-+
-+		ssize_t i = eytzinger0_find_le(h->data, h->used,
-+					       sizeof(h->data[0]),
-+					       bucket_offset_cmp, &search);
-+#if 0
-+		/* eytzinger search verify code: */
-+		ssize_t j = -1, k;
-+
-+		for (k = 0; k < h->used; k++)
-+			if (h->data[k].offset <= ptr->offset &&
-+			    (j < 0 || h->data[k].offset > h->data[j].offset))
-+				j = k;
-+
-+		BUG_ON(i != j);
-+#endif
-+		return (i >= 0 &&
-+			ptr->offset < h->data[i].offset + ca->mi.bucket_size &&
-+			ptr->gen == h->data[i].gen);
-+	}
-+
-+	return false;
-+}
-+
-+static enum data_cmd copygc_pred(struct bch_fs *c, void *arg,
-+				 struct bkey_s_c k,
-+				 struct bch_io_opts *io_opts,
-+				 struct data_opts *data_opts)
-+{
-+	struct bch_dev *ca = arg;
-+
-+	if (!__copygc_pred(ca, k))
-+		return DATA_SKIP;
-+
-+	data_opts->target		= dev_to_target(ca->dev_idx);
-+	data_opts->btree_insert_flags	= BTREE_INSERT_USE_RESERVE;
-+	data_opts->rewrite_dev		= ca->dev_idx;
-+	return DATA_REWRITE;
-+}
-+
-+static bool have_copygc_reserve(struct bch_dev *ca)
-+{
-+	bool ret;
-+
-+	spin_lock(&ca->fs->freelist_lock);
-+	ret = fifo_full(&ca->free[RESERVE_MOVINGGC]) ||
-+		ca->allocator_state != ALLOCATOR_RUNNING;
-+	spin_unlock(&ca->fs->freelist_lock);
-+
-+	return ret;
-+}
-+
-+static void bch2_copygc(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	copygc_heap *h = &ca->copygc_heap;
-+	struct copygc_heap_entry e, *i;
-+	struct bucket_array *buckets;
-+	struct bch_move_stats move_stats;
-+	u64 sectors_to_move = 0, sectors_not_moved = 0;
-+	u64 buckets_to_move, buckets_not_moved = 0;
-+	size_t b;
-+	int ret;
-+
-+	memset(&move_stats, 0, sizeof(move_stats));
-+	closure_wait_event(&c->freelist_wait, have_copygc_reserve(ca));
-+
-+	/*
-+	 * Find buckets with lowest sector counts, skipping completely
-+	 * empty buckets, by building a maxheap sorted by sector count,
-+	 * and repeatedly replacing the maximum element until all
-+	 * buckets have been visited.
-+	 */
-+	h->used = 0;
-+
-+	/*
-+	 * We need bucket marks to be up to date - gc can't be recalculating
-+	 * them:
-+	 */
-+	down_read(&c->gc_lock);
-+	down_read(&ca->bucket_lock);
-+	buckets = bucket_array(ca);
-+
-+	for (b = buckets->first_bucket; b < buckets->nbuckets; b++) {
-+		struct bucket_mark m = READ_ONCE(buckets->b[b].mark);
-+		struct copygc_heap_entry e;
-+
-+		if (m.owned_by_allocator ||
-+		    m.data_type != BCH_DATA_USER ||
-+		    !bucket_sectors_used(m) ||
-+		    bucket_sectors_used(m) >= ca->mi.bucket_size)
-+			continue;
-+
-+		e = (struct copygc_heap_entry) {
-+			.gen		= m.gen,
-+			.sectors	= bucket_sectors_used(m),
-+			.offset		= bucket_to_sector(ca, b),
-+		};
-+		heap_add_or_replace(h, e, -sectors_used_cmp, NULL);
-+	}
-+	up_read(&ca->bucket_lock);
-+	up_read(&c->gc_lock);
-+
-+	for (i = h->data; i < h->data + h->used; i++)
-+		sectors_to_move += i->sectors;
-+
-+	while (sectors_to_move > COPYGC_SECTORS_PER_ITER(ca)) {
-+		BUG_ON(!heap_pop(h, e, -sectors_used_cmp, NULL));
-+		sectors_to_move -= e.sectors;
-+	}
-+
-+	buckets_to_move = h->used;
-+
-+	if (!buckets_to_move)
-+		return;
-+
-+	eytzinger0_sort(h->data, h->used,
-+			sizeof(h->data[0]),
-+			bucket_offset_cmp, NULL);
-+
-+	ret = bch2_move_data(c, &ca->copygc_pd.rate,
-+			     writepoint_ptr(&ca->copygc_write_point),
-+			     POS_MIN, POS_MAX,
-+			     copygc_pred, ca,
-+			     &move_stats);
-+
-+	down_read(&ca->bucket_lock);
-+	buckets = bucket_array(ca);
-+	for (i = h->data; i < h->data + h->used; i++) {
-+		size_t b = sector_to_bucket(ca, i->offset);
-+		struct bucket_mark m = READ_ONCE(buckets->b[b].mark);
-+
-+		if (i->gen == m.gen && bucket_sectors_used(m)) {
-+			sectors_not_moved += bucket_sectors_used(m);
-+			buckets_not_moved++;
-+		}
-+	}
-+	up_read(&ca->bucket_lock);
-+
-+	if (sectors_not_moved && !ret)
-+		bch_warn_ratelimited(c,
-+			"copygc finished but %llu/%llu sectors, %llu/%llu buckets not moved (move stats: moved %llu sectors, raced %llu keys, %llu sectors)",
-+			 sectors_not_moved, sectors_to_move,
-+			 buckets_not_moved, buckets_to_move,
-+			 atomic64_read(&move_stats.sectors_moved),
-+			 atomic64_read(&move_stats.keys_raced),
-+			 atomic64_read(&move_stats.sectors_raced));
-+
-+	trace_copygc(ca,
-+		     atomic64_read(&move_stats.sectors_moved), sectors_not_moved,
-+		     buckets_to_move, buckets_not_moved);
-+}
-+
-+/*
-+ * Copygc runs when the amount of fragmented data is above some arbitrary
-+ * threshold:
-+ *
-+ * The threshold at the limit - when the device is full - is the amount of space
-+ * we reserved in bch2_recalc_capacity; we can't have more than that amount of
-+ * disk space stranded due to fragmentation and store everything we have
-+ * promised to store.
-+ *
-+ * But we don't want to be running copygc unnecessarily when the device still
-+ * has plenty of free space - rather, we want copygc to smoothly run every so
-+ * often and continually reduce the amount of fragmented space as the device
-+ * fills up. So, we increase the threshold by half the current free space.
-+ */
-+unsigned long bch2_copygc_wait_amount(struct bch_dev *ca)
-+{
-+	struct bch_fs *c = ca->fs;
-+	struct bch_dev_usage usage = bch2_dev_usage_read(c, ca);
-+	u64 fragmented_allowed = ca->copygc_threshold +
-+		((__dev_buckets_available(ca, usage) * ca->mi.bucket_size) >> 1);
-+
-+	return max_t(s64, 0, fragmented_allowed - usage.sectors_fragmented);
-+}
-+
-+static int bch2_copygc_thread(void *arg)
-+{
-+	struct bch_dev *ca = arg;
-+	struct bch_fs *c = ca->fs;
-+	struct io_clock *clock = &c->io_clock[WRITE];
-+	unsigned long last, wait;
-+
-+	set_freezable();
-+
-+	while (!kthread_should_stop()) {
-+		if (kthread_wait_freezable(c->copy_gc_enabled))
-+			break;
-+
-+		last = atomic_long_read(&clock->now);
-+		wait = bch2_copygc_wait_amount(ca);
-+
-+		if (wait > clock->max_slop) {
-+			bch2_kthread_io_clock_wait(clock, last + wait,
-+					MAX_SCHEDULE_TIMEOUT);
-+			continue;
-+		}
-+
-+		bch2_copygc(c, ca);
-+	}
-+
-+	return 0;
-+}
-+
-+void bch2_copygc_stop(struct bch_dev *ca)
-+{
-+	ca->copygc_pd.rate.rate = UINT_MAX;
-+	bch2_ratelimit_reset(&ca->copygc_pd.rate);
-+
-+	if (ca->copygc_thread) {
-+		kthread_stop(ca->copygc_thread);
-+		put_task_struct(ca->copygc_thread);
-+	}
-+	ca->copygc_thread = NULL;
-+}
-+
-+int bch2_copygc_start(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	struct task_struct *t;
-+
-+	if (ca->copygc_thread)
-+		return 0;
-+
-+	if (c->opts.nochanges)
-+		return 0;
-+
-+	if (bch2_fs_init_fault("copygc_start"))
-+		return -ENOMEM;
-+
-+	t = kthread_create(bch2_copygc_thread, ca,
-+			   "bch_copygc[%s]", ca->name);
-+	if (IS_ERR(t))
-+		return PTR_ERR(t);
-+
-+	get_task_struct(t);
-+
-+	ca->copygc_thread = t;
-+	wake_up_process(ca->copygc_thread);
-+
-+	return 0;
-+}
-+
-+void bch2_dev_copygc_init(struct bch_dev *ca)
-+{
-+	bch2_pd_controller_init(&ca->copygc_pd);
-+	ca->copygc_pd.d_term = 0;
-+}
-diff --git a/fs/bcachefs/movinggc.h b/fs/bcachefs/movinggc.h
-new file mode 100644
-index 000000000000..dcd479632cf1
---- /dev/null
-+++ b/fs/bcachefs/movinggc.h
-@@ -0,0 +1,9 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_MOVINGGC_H
-+#define _BCACHEFS_MOVINGGC_H
-+
-+void bch2_copygc_stop(struct bch_dev *);
-+int bch2_copygc_start(struct bch_fs *, struct bch_dev *);
-+void bch2_dev_copygc_init(struct bch_dev *);
-+
-+#endif /* _BCACHEFS_MOVINGGC_H */
-diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c
-new file mode 100644
-index 000000000000..94d6c044a27d
---- /dev/null
-+++ b/fs/bcachefs/opts.c
-@@ -0,0 +1,440 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include <linux/kernel.h>
-+
-+#include "bcachefs.h"
-+#include "compress.h"
-+#include "disk_groups.h"
-+#include "opts.h"
-+#include "super-io.h"
-+#include "util.h"
-+
-+const char * const bch2_error_actions[] = {
-+	"continue",
-+	"remount-ro",
-+	"panic",
-+	NULL
-+};
-+
-+const char * const bch2_sb_features[] = {
-+#define x(f, n) #f,
-+	BCH_SB_FEATURES()
-+#undef x
-+	NULL
-+};
-+
-+const char * const bch2_csum_opts[] = {
-+	"none",
-+	"crc32c",
-+	"crc64",
-+	NULL
-+};
-+
-+const char * const bch2_compression_opts[] = {
-+#define x(t, n) #t,
-+	BCH_COMPRESSION_OPTS()
-+#undef x
-+	NULL
-+};
-+
-+const char * const bch2_str_hash_types[] = {
-+	"crc32c",
-+	"crc64",
-+	"siphash",
-+	NULL
-+};
-+
-+const char * const bch2_data_types[] = {
-+	"none",
-+	"sb",
-+	"journal",
-+	"btree",
-+	"data",
-+	"cached",
-+	NULL
-+};
-+
-+const char * const bch2_cache_replacement_policies[] = {
-+	"lru",
-+	"fifo",
-+	"random",
-+	NULL
-+};
-+
-+/* Default is -1; we skip past it for struct cached_dev's cache mode */
-+const char * const bch2_cache_modes[] = {
-+	"default",
-+	"writethrough",
-+	"writeback",
-+	"writearound",
-+	"none",
-+	NULL
-+};
-+
-+const char * const bch2_dev_state[] = {
-+	"readwrite",
-+	"readonly",
-+	"failed",
-+	"spare",
-+	NULL
-+};
-+
-+void bch2_opts_apply(struct bch_opts *dst, struct bch_opts src)
-+{
-+#define x(_name, ...)						\
-+	if (opt_defined(src, _name))					\
-+		opt_set(*dst, _name, src._name);
-+
-+	BCH_OPTS()
-+#undef x
-+}
-+
-+bool bch2_opt_defined_by_id(const struct bch_opts *opts, enum bch_opt_id id)
-+{
-+	switch (id) {
-+#define x(_name, ...)						\
-+	case Opt_##_name:						\
-+		return opt_defined(*opts, _name);
-+	BCH_OPTS()
-+#undef x
-+	default:
-+		BUG();
-+	}
-+}
-+
-+u64 bch2_opt_get_by_id(const struct bch_opts *opts, enum bch_opt_id id)
-+{
-+	switch (id) {
-+#define x(_name, ...)						\
-+	case Opt_##_name:						\
-+		return opts->_name;
-+	BCH_OPTS()
-+#undef x
-+	default:
-+		BUG();
-+	}
-+}
-+
-+void bch2_opt_set_by_id(struct bch_opts *opts, enum bch_opt_id id, u64 v)
-+{
-+	switch (id) {
-+#define x(_name, ...)						\
-+	case Opt_##_name:						\
-+		opt_set(*opts, _name, v);				\
-+		break;
-+	BCH_OPTS()
-+#undef x
-+	default:
-+		BUG();
-+	}
-+}
-+
-+/*
-+ * Initial options from superblock - here we don't want any options undefined,
-+ * any options the superblock doesn't specify are set to 0:
-+ */
-+struct bch_opts bch2_opts_from_sb(struct bch_sb *sb)
-+{
-+	struct bch_opts opts = bch2_opts_empty();
-+
-+#define x(_name, _bits, _mode, _type, _sb_opt, ...)			\
-+	if (_sb_opt != NO_SB_OPT)					\
-+		opt_set(opts, _name, _sb_opt(sb));
-+	BCH_OPTS()
-+#undef x
-+
-+	return opts;
-+}
-+
-+const struct bch_option bch2_opt_table[] = {
-+#define OPT_BOOL()		.type = BCH_OPT_BOOL
-+#define OPT_UINT(_min, _max)	.type = BCH_OPT_UINT, .min = _min, .max = _max
-+#define OPT_SECTORS(_min, _max)	.type = BCH_OPT_SECTORS, .min = _min, .max = _max
-+#define OPT_STR(_choices)	.type = BCH_OPT_STR, .choices = _choices
-+#define OPT_FN(_fn)		.type = BCH_OPT_FN,			\
-+				.parse = _fn##_parse,			\
-+				.to_text = _fn##_to_text
-+
-+#define x(_name, _bits, _mode, _type, _sb_opt, _default, _hint, _help)	\
-+	[Opt_##_name] = {						\
-+		.attr	= {						\
-+			.name	= #_name,				\
-+			.mode = (_mode) & OPT_RUNTIME ? 0644 : 0444,	\
-+		},							\
-+		.mode	= _mode,					\
-+		.hint	= _hint,					\
-+		.help	= _help,					\
-+		.set_sb	= SET_##_sb_opt,				\
-+		_type							\
-+	},
-+
-+	BCH_OPTS()
-+#undef x
-+};
-+
-+int bch2_opt_lookup(const char *name)
-+{
-+	const struct bch_option *i;
-+
-+	for (i = bch2_opt_table;
-+	     i < bch2_opt_table + ARRAY_SIZE(bch2_opt_table);
-+	     i++)
-+		if (!strcmp(name, i->attr.name))
-+			return i - bch2_opt_table;
-+
-+	return -1;
-+}
-+
-+struct synonym {
-+	const char	*s1, *s2;
-+};
-+
-+static const struct synonym bch_opt_synonyms[] = {
-+	{ "quota",	"usrquota" },
-+};
-+
-+static int bch2_mount_opt_lookup(const char *name)
-+{
-+	const struct synonym *i;
-+
-+	for (i = bch_opt_synonyms;
-+	     i < bch_opt_synonyms + ARRAY_SIZE(bch_opt_synonyms);
-+	     i++)
-+		if (!strcmp(name, i->s1))
-+			name = i->s2;
-+
-+	return bch2_opt_lookup(name);
-+}
-+
-+int bch2_opt_parse(struct bch_fs *c, const struct bch_option *opt,
-+		   const char *val, u64 *res)
-+{
-+	ssize_t ret;
-+
-+	switch (opt->type) {
-+	case BCH_OPT_BOOL:
-+		ret = kstrtou64(val, 10, res);
-+		if (ret < 0)
-+			return ret;
-+
-+		if (*res > 1)
-+			return -ERANGE;
-+		break;
-+	case BCH_OPT_UINT:
-+		ret = kstrtou64(val, 10, res);
-+		if (ret < 0)
-+			return ret;
-+
-+		if (*res < opt->min || *res >= opt->max)
-+			return -ERANGE;
-+		break;
-+	case BCH_OPT_SECTORS:
-+		ret = bch2_strtou64_h(val, res);
-+		if (ret < 0)
-+			return ret;
-+
-+		if (*res & 511)
-+			return -EINVAL;
-+
-+		*res >>= 9;
-+
-+		if (*res < opt->min || *res >= opt->max)
-+			return -ERANGE;
-+		break;
-+	case BCH_OPT_STR:
-+		ret = match_string(opt->choices, -1, val);
-+		if (ret < 0)
-+			return ret;
-+
-+		*res = ret;
-+		break;
-+	case BCH_OPT_FN:
-+		if (!c)
-+			return -EINVAL;
-+
-+		return opt->parse(c, val, res);
-+	}
-+
-+	return 0;
-+}
-+
-+void bch2_opt_to_text(struct printbuf *out, struct bch_fs *c,
-+		      const struct bch_option *opt, u64 v,
-+		      unsigned flags)
-+{
-+	if (flags & OPT_SHOW_MOUNT_STYLE) {
-+		if (opt->type == BCH_OPT_BOOL) {
-+			pr_buf(out, "%s%s",
-+			       v ? "" : "no",
-+			       opt->attr.name);
-+			return;
-+		}
-+
-+		pr_buf(out, "%s=", opt->attr.name);
-+	}
-+
-+	switch (opt->type) {
-+	case BCH_OPT_BOOL:
-+	case BCH_OPT_UINT:
-+		pr_buf(out, "%lli", v);
-+		break;
-+	case BCH_OPT_SECTORS:
-+		bch2_hprint(out, v);
-+		break;
-+	case BCH_OPT_STR:
-+		if (flags & OPT_SHOW_FULL_LIST)
-+			bch2_string_opt_to_text(out, opt->choices, v);
-+		else
-+			pr_buf(out, opt->choices[v]);
-+		break;
-+	case BCH_OPT_FN:
-+		opt->to_text(out, c, v);
-+		break;
-+	default:
-+		BUG();
-+	}
-+}
-+
-+int bch2_opt_check_may_set(struct bch_fs *c, int id, u64 v)
-+{
-+	int ret = 0;
-+
-+	switch (id) {
-+	case Opt_compression:
-+	case Opt_background_compression:
-+		ret = bch2_check_set_has_compressed_data(c, v);
-+		break;
-+	case Opt_erasure_code:
-+		if (v)
-+			bch2_check_set_feature(c, BCH_FEATURE_ec);
-+		break;
-+	}
-+
-+	return ret;
-+}
-+
-+int bch2_opts_check_may_set(struct bch_fs *c)
-+{
-+	unsigned i;
-+	int ret;
-+
-+	for (i = 0; i < bch2_opts_nr; i++) {
-+		ret = bch2_opt_check_may_set(c, i,
-+				bch2_opt_get_by_id(&c->opts, i));
-+		if (ret)
-+			return ret;
-+	}
-+
-+	return 0;
-+}
-+
-+int bch2_parse_mount_opts(struct bch_opts *opts, char *options)
-+{
-+	char *opt, *name, *val;
-+	int ret, id;
-+	u64 v;
-+
-+	while ((opt = strsep(&options, ",")) != NULL) {
-+		name	= strsep(&opt, "=");
-+		val	= opt;
-+
-+		if (val) {
-+			id = bch2_mount_opt_lookup(name);
-+			if (id < 0)
-+				goto bad_opt;
-+
-+			ret = bch2_opt_parse(NULL, &bch2_opt_table[id], val, &v);
-+			if (ret < 0)
-+				goto bad_val;
-+		} else {
-+			id = bch2_mount_opt_lookup(name);
-+			v = 1;
-+
-+			if (id < 0 &&
-+			    !strncmp("no", name, 2)) {
-+				id = bch2_mount_opt_lookup(name + 2);
-+				v = 0;
-+			}
-+
-+			if (id < 0)
-+				goto bad_opt;
-+
-+			if (bch2_opt_table[id].type != BCH_OPT_BOOL)
-+				goto no_val;
-+		}
-+
-+		if (!(bch2_opt_table[id].mode & OPT_MOUNT))
-+			goto bad_opt;
-+
-+		if (id == Opt_acl &&
-+		    !IS_ENABLED(CONFIG_BCACHEFS_POSIX_ACL))
-+			goto bad_opt;
-+
-+		if ((id == Opt_usrquota ||
-+		     id == Opt_grpquota) &&
-+		    !IS_ENABLED(CONFIG_BCACHEFS_QUOTA))
-+			goto bad_opt;
-+
-+		bch2_opt_set_by_id(opts, id, v);
-+	}
-+
-+	return 0;
-+bad_opt:
-+	pr_err("Bad mount option %s", name);
-+	return -1;
-+bad_val:
-+	pr_err("Invalid value %s for mount option %s", val, name);
-+	return -1;
-+no_val:
-+	pr_err("Mount option %s requires a value", name);
-+	return -1;
-+}
-+
-+/* io opts: */
-+
-+struct bch_io_opts bch2_opts_to_inode_opts(struct bch_opts src)
-+{
-+	struct bch_io_opts ret = { 0 };
-+#define x(_name, _bits)					\
-+	if (opt_defined(src, _name))					\
-+		opt_set(ret, _name, src._name);
-+	BCH_INODE_OPTS()
-+#undef x
-+	return ret;
-+}
-+
-+struct bch_opts bch2_inode_opts_to_opts(struct bch_io_opts src)
-+{
-+	struct bch_opts ret = { 0 };
-+#define x(_name, _bits)					\
-+	if (opt_defined(src, _name))					\
-+		opt_set(ret, _name, src._name);
-+	BCH_INODE_OPTS()
-+#undef x
-+	return ret;
-+}
-+
-+void bch2_io_opts_apply(struct bch_io_opts *dst, struct bch_io_opts src)
-+{
-+#define x(_name, _bits)					\
-+	if (opt_defined(src, _name))					\
-+		opt_set(*dst, _name, src._name);
-+	BCH_INODE_OPTS()
-+#undef x
-+}
-+
-+bool bch2_opt_is_inode_opt(enum bch_opt_id id)
-+{
-+	static const enum bch_opt_id inode_opt_list[] = {
-+#define x(_name, _bits)	Opt_##_name,
-+	BCH_INODE_OPTS()
-+#undef x
-+	};
-+	unsigned i;
-+
-+	for (i = 0; i < ARRAY_SIZE(inode_opt_list); i++)
-+		if (inode_opt_list[i] == id)
-+			return true;
-+
-+	return false;
-+}
-diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h
-new file mode 100644
-index 000000000000..3b051e7a8f1d
---- /dev/null
-+++ b/fs/bcachefs/opts.h
-@@ -0,0 +1,435 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_OPTS_H
-+#define _BCACHEFS_OPTS_H
-+
-+#include <linux/bug.h>
-+#include <linux/log2.h>
-+#include <linux/string.h>
-+#include <linux/sysfs.h>
-+#include "bcachefs_format.h"
-+
-+extern const char * const bch2_error_actions[];
-+extern const char * const bch2_sb_features[];
-+extern const char * const bch2_csum_opts[];
-+extern const char * const bch2_compression_opts[];
-+extern const char * const bch2_str_hash_types[];
-+extern const char * const bch2_data_types[];
-+extern const char * const bch2_cache_replacement_policies[];
-+extern const char * const bch2_cache_modes[];
-+extern const char * const bch2_dev_state[];
-+
-+/*
-+ * Mount options; we also store defaults in the superblock.
-+ *
-+ * Also exposed via sysfs: if an option is writeable, and it's also stored in
-+ * the superblock, changing it via sysfs (currently? might change this) also
-+ * updates the superblock.
-+ *
-+ * We store options as signed integers, where -1 means undefined. This means we
-+ * can pass the mount options to bch2_fs_alloc() as a whole struct, and then only
-+ * apply the options from that struct that are defined.
-+ */
-+
-+/* dummy option, for options that aren't stored in the superblock */
-+LE64_BITMASK(NO_SB_OPT,		struct bch_sb, flags[0], 0, 0);
-+
-+/* When can be set: */
-+enum opt_mode {
-+	OPT_FORMAT	= (1 << 0),
-+	OPT_MOUNT	= (1 << 1),
-+	OPT_RUNTIME	= (1 << 2),
-+	OPT_INODE	= (1 << 3),
-+	OPT_DEVICE	= (1 << 4),
-+};
-+
-+enum opt_type {
-+	BCH_OPT_BOOL,
-+	BCH_OPT_UINT,
-+	BCH_OPT_SECTORS,
-+	BCH_OPT_STR,
-+	BCH_OPT_FN,
-+};
-+
-+/**
-+ * x(name, shortopt, type, in mem type, mode, sb_opt)
-+ *
-+ * @name	- name of mount option, sysfs attribute, and struct bch_opts
-+ *		  member
-+ *
-+ * @mode	- when opt may be set
-+ *
-+ * @sb_option	- name of corresponding superblock option
-+ *
-+ * @type	- one of OPT_BOOL, OPT_UINT, OPT_STR
-+ */
-+
-+/*
-+ * XXX: add fields for
-+ *  - default value
-+ *  - helptext
-+ */
-+
-+#ifdef __KERNEL__
-+#define RATELIMIT_ERRORS true
-+#else
-+#define RATELIMIT_ERRORS false
-+#endif
-+
-+#define BCH_OPTS()							\
-+	x(block_size,			u16,				\
-+	  OPT_FORMAT,							\
-+	  OPT_SECTORS(1, 128),						\
-+	  BCH_SB_BLOCK_SIZE,		8,				\
-+	  "size",	NULL)						\
-+	x(btree_node_size,		u16,				\
-+	  OPT_FORMAT,							\
-+	  OPT_SECTORS(1, 128),						\
-+	  BCH_SB_BTREE_NODE_SIZE,	512,				\
-+	  "size",	"Btree node size, default 256k")		\
-+	x(errors,			u8,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME,				\
-+	  OPT_STR(bch2_error_actions),					\
-+	  BCH_SB_ERROR_ACTION,		BCH_ON_ERROR_RO,		\
-+	  NULL,		"Action to take on filesystem error")		\
-+	x(metadata_replicas,		u8,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME,				\
-+	  OPT_UINT(1, BCH_REPLICAS_MAX),				\
-+	  BCH_SB_META_REPLICAS_WANT,	1,				\
-+	  "#",		"Number of metadata replicas")			\
-+	x(data_replicas,		u8,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE,			\
-+	  OPT_UINT(1, BCH_REPLICAS_MAX),				\
-+	  BCH_SB_DATA_REPLICAS_WANT,	1,				\
-+	  "#",		"Number of data replicas")			\
-+	x(metadata_replicas_required, u8,				\
-+	  OPT_FORMAT|OPT_MOUNT,						\
-+	  OPT_UINT(1, BCH_REPLICAS_MAX),				\
-+	  BCH_SB_META_REPLICAS_REQ,	1,				\
-+	  "#",		NULL)						\
-+	x(data_replicas_required,	u8,				\
-+	  OPT_FORMAT|OPT_MOUNT,						\
-+	  OPT_UINT(1, BCH_REPLICAS_MAX),				\
-+	  BCH_SB_DATA_REPLICAS_REQ,	1,				\
-+	  "#",		NULL)						\
-+	x(metadata_checksum,		u8,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME,				\
-+	  OPT_STR(bch2_csum_opts),					\
-+	  BCH_SB_META_CSUM_TYPE,	BCH_CSUM_OPT_CRC32C,		\
-+	  NULL,		NULL)						\
-+	x(data_checksum,		u8,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE,			\
-+	  OPT_STR(bch2_csum_opts),					\
-+	  BCH_SB_DATA_CSUM_TYPE,	BCH_CSUM_OPT_CRC32C,		\
-+	  NULL,		NULL)						\
-+	x(compression,			u8,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE,			\
-+	  OPT_STR(bch2_compression_opts),				\
-+	  BCH_SB_COMPRESSION_TYPE,	BCH_COMPRESSION_OPT_none,	\
-+	  NULL,		NULL)						\
-+	x(background_compression,	u8,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE,			\
-+	  OPT_STR(bch2_compression_opts),				\
-+	  BCH_SB_BACKGROUND_COMPRESSION_TYPE,BCH_COMPRESSION_OPT_none,	\
-+	  NULL,		NULL)						\
-+	x(str_hash,			u8,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME,				\
-+	  OPT_STR(bch2_str_hash_types),					\
-+	  BCH_SB_STR_HASH_TYPE,		BCH_STR_HASH_OPT_SIPHASH,	\
-+	  NULL,		"Hash function for directory entries and xattrs")\
-+	x(foreground_target,		u16,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE,			\
-+	  OPT_FN(bch2_opt_target),					\
-+	  BCH_SB_FOREGROUND_TARGET,	0,				\
-+	  "(target)",	"Device or disk group for foreground writes")	\
-+	x(background_target,		u16,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE,			\
-+	  OPT_FN(bch2_opt_target),					\
-+	  BCH_SB_BACKGROUND_TARGET,	0,				\
-+	  "(target)",	"Device or disk group to move data to in the background")\
-+	x(promote_target,		u16,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE,			\
-+	  OPT_FN(bch2_opt_target),					\
-+	  BCH_SB_PROMOTE_TARGET,	0,				\
-+	  "(target)",	"Device or disk group to promote data to on read")\
-+	x(erasure_code,			u16,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE,			\
-+	  OPT_BOOL(),							\
-+	  BCH_SB_ERASURE_CODE,		false,				\
-+	  NULL,		"Enable erasure coding (DO NOT USE YET)")	\
-+	x(inodes_32bit,			u8,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME,				\
-+	  OPT_BOOL(),							\
-+	  BCH_SB_INODE_32BIT,		false,				\
-+	  NULL,		"Constrain inode numbers to 32 bits")		\
-+	x(gc_reserve_percent,		u8,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME,				\
-+	  OPT_UINT(5, 21),						\
-+	  BCH_SB_GC_RESERVE,		8,				\
-+	  "%",		"Percentage of disk space to reserve for copygc")\
-+	x(gc_reserve_bytes,		u64,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME,				\
-+	  OPT_SECTORS(0, U64_MAX),					\
-+	  BCH_SB_GC_RESERVE_BYTES,	0,				\
-+	  "%",		"Amount of disk space to reserve for copygc\n"	\
-+			"Takes precedence over gc_reserve_percent if set")\
-+	x(root_reserve_percent,		u8,				\
-+	  OPT_FORMAT|OPT_MOUNT,						\
-+	  OPT_UINT(0, 100),						\
-+	  BCH_SB_ROOT_RESERVE,		0,				\
-+	  "%",		"Percentage of disk space to reserve for superuser")\
-+	x(wide_macs,			u8,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME,				\
-+	  OPT_BOOL(),							\
-+	  BCH_SB_128_BIT_MACS,		false,				\
-+	  NULL,		"Store full 128 bits of cryptographic MACs, instead of 80")\
-+	x(inline_data,			u8,				\
-+	  OPT_MOUNT|OPT_RUNTIME,					\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Enable inline data extents")			\
-+	x(acl,				u8,				\
-+	  OPT_FORMAT|OPT_MOUNT,						\
-+	  OPT_BOOL(),							\
-+	  BCH_SB_POSIX_ACL,		true,				\
-+	  NULL,		"Enable POSIX acls")				\
-+	x(usrquota,			u8,				\
-+	  OPT_FORMAT|OPT_MOUNT,						\
-+	  OPT_BOOL(),							\
-+	  BCH_SB_USRQUOTA,		false,				\
-+	  NULL,		"Enable user quotas")				\
-+	x(grpquota,			u8,				\
-+	  OPT_FORMAT|OPT_MOUNT,						\
-+	  OPT_BOOL(),							\
-+	  BCH_SB_GRPQUOTA,		false,				\
-+	  NULL,		"Enable group quotas")				\
-+	x(prjquota,			u8,				\
-+	  OPT_FORMAT|OPT_MOUNT,						\
-+	  OPT_BOOL(),							\
-+	  BCH_SB_PRJQUOTA,		false,				\
-+	  NULL,		"Enable project quotas")			\
-+	x(reflink,			u8,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME,				\
-+	  OPT_BOOL(),							\
-+	  BCH_SB_REFLINK,		true,				\
-+	  NULL,		"Enable reflink support")			\
-+	x(degraded,			u8,				\
-+	  OPT_MOUNT,							\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Allow mounting in degraded mode")		\
-+	x(discard,			u8,				\
-+	  OPT_MOUNT|OPT_DEVICE,						\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Enable discard/TRIM support")			\
-+	x(verbose,			u8,				\
-+	  OPT_MOUNT,							\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Extra debugging information during mount/recovery")\
-+	x(journal_flush_disabled,	u8,				\
-+	  OPT_MOUNT|OPT_RUNTIME,					\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Disable journal flush on sync/fsync\n"		\
-+			"If enabled, writes can be lost, but only since the\n"\
-+			"last journal write (default 1 second)")	\
-+	x(fsck,				u8,				\
-+	  OPT_MOUNT,							\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Run fsck on mount")				\
-+	x(fix_errors,			u8,				\
-+	  OPT_MOUNT,							\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Fix errors during fsck without asking")	\
-+	x(ratelimit_errors,		u8,				\
-+	  OPT_MOUNT,							\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			RATELIMIT_ERRORS,		\
-+	  NULL,		"Ratelimit error messages during fsck")		\
-+	x(nochanges,			u8,				\
-+	  OPT_MOUNT,							\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Super read only mode - no writes at all will be issued,\n"\
-+			"even if we have to replay the journal")	\
-+	x(norecovery,			u8,				\
-+	  OPT_MOUNT,							\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Don't replay the journal")			\
-+	x(keep_journal,			u8,				\
-+	  OPT_MOUNT,							\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Don't free journal entries/keys after startup")\
-+	x(read_entire_journal,		u8,				\
-+	  0,								\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Read all journal entries, not just dirty ones")\
-+	x(noexcl,			u8,				\
-+	  OPT_MOUNT,							\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Don't open device in exclusive mode")		\
-+	x(sb,				u64,				\
-+	  OPT_MOUNT,							\
-+	  OPT_UINT(0, S64_MAX),						\
-+	  NO_SB_OPT,			BCH_SB_SECTOR,			\
-+	  "offset",	"Sector offset of superblock")			\
-+	x(read_only,			u8,				\
-+	  0,								\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		NULL)						\
-+	x(nostart,			u8,				\
-+	  0,								\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Don\'t start filesystem, only open devices")	\
-+	x(reconstruct_alloc,		u8,				\
-+	  OPT_MOUNT,							\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Reconstruct alloc btree")			\
-+	x(version_upgrade,		u8,				\
-+	  OPT_MOUNT,							\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Set superblock to latest version,\n"		\
-+			"allowing any new features to be used")		\
-+	x(project,			u8,				\
-+	  OPT_INODE,							\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		NULL)						\
-+	x(fs_size,			u64,				\
-+	  OPT_DEVICE,							\
-+	  OPT_SECTORS(0, S64_MAX),					\
-+	  NO_SB_OPT,			0,				\
-+	  "size",	"Size of filesystem on device")			\
-+	x(bucket,			u32,				\
-+	  OPT_DEVICE,							\
-+	  OPT_SECTORS(0, S64_MAX),					\
-+	  NO_SB_OPT,			0,				\
-+	  "size",	"Size of filesystem on device")			\
-+	x(durability,			u8,				\
-+	  OPT_DEVICE,							\
-+	  OPT_UINT(0, BCH_REPLICAS_MAX),				\
-+	  NO_SB_OPT,			1,				\
-+	  "n",		"Data written to this device will be considered\n"\
-+			"to have already been replicated n times")
-+
-+struct bch_opts {
-+#define x(_name, _bits, ...)	unsigned _name##_defined:1;
-+	BCH_OPTS()
-+#undef x
-+
-+#define x(_name, _bits, ...)	_bits	_name;
-+	BCH_OPTS()
-+#undef x
-+};
-+
-+static const struct bch_opts bch2_opts_default = {
-+#define x(_name, _bits, _mode, _type, _sb_opt, _default, ...)		\
-+	._name##_defined = true,					\
-+	._name = _default,						\
-+
-+	BCH_OPTS()
-+#undef x
-+};
-+
-+#define opt_defined(_opts, _name)	((_opts)._name##_defined)
-+
-+#define opt_get(_opts, _name)						\
-+	(opt_defined(_opts, _name) ? (_opts)._name : bch2_opts_default._name)
-+
-+#define opt_set(_opts, _name, _v)					\
-+do {									\
-+	(_opts)._name##_defined = true;					\
-+	(_opts)._name = _v;						\
-+} while (0)
-+
-+static inline struct bch_opts bch2_opts_empty(void)
-+{
-+	return (struct bch_opts) { 0 };
-+}
-+
-+void bch2_opts_apply(struct bch_opts *, struct bch_opts);
-+
-+enum bch_opt_id {
-+#define x(_name, ...)	Opt_##_name,
-+	BCH_OPTS()
-+#undef x
-+	bch2_opts_nr
-+};
-+
-+struct bch_fs;
-+struct printbuf;
-+
-+struct bch_option {
-+	struct attribute	attr;
-+	void			(*set_sb)(struct bch_sb *, u64);
-+	enum opt_mode		mode;
-+	enum opt_type		type;
-+
-+	union {
-+	struct {
-+		u64		min, max;
-+	};
-+	struct {
-+		const char * const *choices;
-+	};
-+	struct {
-+		int (*parse)(struct bch_fs *, const char *, u64 *);
-+		void (*to_text)(struct printbuf *, struct bch_fs *, u64);
-+	};
-+	};
-+
-+	const char		*hint;
-+	const char		*help;
-+
-+};
-+
-+extern const struct bch_option bch2_opt_table[];
-+
-+bool bch2_opt_defined_by_id(const struct bch_opts *, enum bch_opt_id);
-+u64 bch2_opt_get_by_id(const struct bch_opts *, enum bch_opt_id);
-+void bch2_opt_set_by_id(struct bch_opts *, enum bch_opt_id, u64);
-+
-+struct bch_opts bch2_opts_from_sb(struct bch_sb *);
-+
-+int bch2_opt_lookup(const char *);
-+int bch2_opt_parse(struct bch_fs *, const struct bch_option *, const char *, u64 *);
-+
-+#define OPT_SHOW_FULL_LIST	(1 << 0)
-+#define OPT_SHOW_MOUNT_STYLE	(1 << 1)
-+
-+void bch2_opt_to_text(struct printbuf *, struct bch_fs *,
-+		      const struct bch_option *, u64, unsigned);
-+
-+int bch2_opt_check_may_set(struct bch_fs *, int, u64);
-+int bch2_opts_check_may_set(struct bch_fs *);
-+int bch2_parse_mount_opts(struct bch_opts *, char *);
-+
-+/* inode opts: */
-+
-+struct bch_io_opts {
-+#define x(_name, _bits)	unsigned _name##_defined:1;
-+	BCH_INODE_OPTS()
-+#undef x
-+
-+#define x(_name, _bits)	u##_bits _name;
-+	BCH_INODE_OPTS()
-+#undef x
-+};
-+
-+struct bch_io_opts bch2_opts_to_inode_opts(struct bch_opts);
-+struct bch_opts bch2_inode_opts_to_opts(struct bch_io_opts);
-+void bch2_io_opts_apply(struct bch_io_opts *, struct bch_io_opts);
-+bool bch2_opt_is_inode_opt(enum bch_opt_id);
-+
-+#endif /* _BCACHEFS_OPTS_H */
-diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c
-new file mode 100644
-index 000000000000..d3032a46e7f3
---- /dev/null
-+++ b/fs/bcachefs/quota.c
-@@ -0,0 +1,783 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "btree_update.h"
-+#include "inode.h"
-+#include "quota.h"
-+#include "super-io.h"
-+
-+static const char *bch2_sb_validate_quota(struct bch_sb *sb,
-+					  struct bch_sb_field *f)
-+{
-+	struct bch_sb_field_quota *q = field_to_type(f, quota);
-+
-+	if (vstruct_bytes(&q->field) != sizeof(*q))
-+		return "invalid field quota: wrong size";
-+
-+	return NULL;
-+}
-+
-+const struct bch_sb_field_ops bch_sb_field_ops_quota = {
-+	.validate	= bch2_sb_validate_quota,
-+};
-+
-+const char *bch2_quota_invalid(const struct bch_fs *c, struct bkey_s_c k)
-+{
-+	if (k.k->p.inode >= QTYP_NR)
-+		return "invalid quota type";
-+
-+	if (bkey_val_bytes(k.k) != sizeof(struct bch_quota))
-+		return "incorrect value size";
-+
-+	return NULL;
-+}
-+
-+static const char * const bch2_quota_counters[] = {
-+	"space",
-+	"inodes",
-+};
-+
-+void bch2_quota_to_text(struct printbuf *out, struct bch_fs *c,
-+			struct bkey_s_c k)
-+{
-+	struct bkey_s_c_quota dq = bkey_s_c_to_quota(k);
-+	unsigned i;
-+
-+	for (i = 0; i < Q_COUNTERS; i++)
-+		pr_buf(out, "%s hardlimit %llu softlimit %llu",
-+		       bch2_quota_counters[i],
-+		       le64_to_cpu(dq.v->c[i].hardlimit),
-+		       le64_to_cpu(dq.v->c[i].softlimit));
-+}
-+
-+#ifdef CONFIG_BCACHEFS_QUOTA
-+
-+#include <linux/cred.h>
-+#include <linux/fs.h>
-+#include <linux/quota.h>
-+
-+static inline unsigned __next_qtype(unsigned i, unsigned qtypes)
-+{
-+	qtypes >>= i;
-+	return qtypes ? i + __ffs(qtypes) : QTYP_NR;
-+}
-+
-+#define for_each_set_qtype(_c, _i, _q, _qtypes)				\
-+	for (_i = 0;							\
-+	     (_i = __next_qtype(_i, _qtypes),				\
-+	      _q = &(_c)->quotas[_i],					\
-+	      _i < QTYP_NR);						\
-+	     _i++)
-+
-+static bool ignore_hardlimit(struct bch_memquota_type *q)
-+{
-+	if (capable(CAP_SYS_RESOURCE))
-+		return true;
-+#if 0
-+	struct mem_dqinfo *info = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_id.type];
-+
-+	return capable(CAP_SYS_RESOURCE) &&
-+	       (info->dqi_format->qf_fmt_id != QFMT_VFS_OLD ||
-+		!(info->dqi_flags & DQF_ROOT_SQUASH));
-+#endif
-+	return false;
-+}
-+
-+enum quota_msg {
-+	SOFTWARN,	/* Softlimit reached */
-+	SOFTLONGWARN,	/* Grace time expired */
-+	HARDWARN,	/* Hardlimit reached */
-+
-+	HARDBELOW,	/* Usage got below inode hardlimit */
-+	SOFTBELOW,	/* Usage got below inode softlimit */
-+};
-+
-+static int quota_nl[][Q_COUNTERS] = {
-+	[HARDWARN][Q_SPC]	= QUOTA_NL_BHARDWARN,
-+	[SOFTLONGWARN][Q_SPC]	= QUOTA_NL_BSOFTLONGWARN,
-+	[SOFTWARN][Q_SPC]	= QUOTA_NL_BSOFTWARN,
-+	[HARDBELOW][Q_SPC]	= QUOTA_NL_BHARDBELOW,
-+	[SOFTBELOW][Q_SPC]	= QUOTA_NL_BSOFTBELOW,
-+
-+	[HARDWARN][Q_INO]	= QUOTA_NL_IHARDWARN,
-+	[SOFTLONGWARN][Q_INO]	= QUOTA_NL_ISOFTLONGWARN,
-+	[SOFTWARN][Q_INO]	= QUOTA_NL_ISOFTWARN,
-+	[HARDBELOW][Q_INO]	= QUOTA_NL_IHARDBELOW,
-+	[SOFTBELOW][Q_INO]	= QUOTA_NL_ISOFTBELOW,
-+};
-+
-+struct quota_msgs {
-+	u8		nr;
-+	struct {
-+		u8	qtype;
-+		u8	msg;
-+	}		m[QTYP_NR * Q_COUNTERS];
-+};
-+
-+static void prepare_msg(unsigned qtype,
-+			enum quota_counters counter,
-+			struct quota_msgs *msgs,
-+			enum quota_msg msg_type)
-+{
-+	BUG_ON(msgs->nr >= ARRAY_SIZE(msgs->m));
-+
-+	msgs->m[msgs->nr].qtype	= qtype;
-+	msgs->m[msgs->nr].msg	= quota_nl[msg_type][counter];
-+	msgs->nr++;
-+}
-+
-+static void prepare_warning(struct memquota_counter *qc,
-+			    unsigned qtype,
-+			    enum quota_counters counter,
-+			    struct quota_msgs *msgs,
-+			    enum quota_msg msg_type)
-+{
-+	if (qc->warning_issued & (1 << msg_type))
-+		return;
-+
-+	prepare_msg(qtype, counter, msgs, msg_type);
-+}
-+
-+static void flush_warnings(struct bch_qid qid,
-+			   struct super_block *sb,
-+			   struct quota_msgs *msgs)
-+{
-+	unsigned i;
-+
-+	for (i = 0; i < msgs->nr; i++)
-+		quota_send_warning(make_kqid(&init_user_ns, msgs->m[i].qtype, qid.q[i]),
-+				   sb->s_dev, msgs->m[i].msg);
-+}
-+
-+static int bch2_quota_check_limit(struct bch_fs *c,
-+				  unsigned qtype,
-+				  struct bch_memquota *mq,
-+				  struct quota_msgs *msgs,
-+				  enum quota_counters counter,
-+				  s64 v,
-+				  enum quota_acct_mode mode)
-+{
-+	struct bch_memquota_type *q = &c->quotas[qtype];
-+	struct memquota_counter *qc = &mq->c[counter];
-+	u64 n = qc->v + v;
-+
-+	BUG_ON((s64) n < 0);
-+
-+	if (mode == KEY_TYPE_QUOTA_NOCHECK)
-+		return 0;
-+
-+	if (v <= 0) {
-+		if (n < qc->hardlimit &&
-+		    (qc->warning_issued & (1 << HARDWARN))) {
-+			qc->warning_issued &= ~(1 << HARDWARN);
-+			prepare_msg(qtype, counter, msgs, HARDBELOW);
-+		}
-+
-+		if (n < qc->softlimit &&
-+		    (qc->warning_issued & (1 << SOFTWARN))) {
-+			qc->warning_issued &= ~(1 << SOFTWARN);
-+			prepare_msg(qtype, counter, msgs, SOFTBELOW);
-+		}
-+
-+		qc->warning_issued = 0;
-+		return 0;
-+	}
-+
-+	if (qc->hardlimit &&
-+	    qc->hardlimit < n &&
-+	    !ignore_hardlimit(q)) {
-+		if (mode == KEY_TYPE_QUOTA_PREALLOC)
-+			return -EDQUOT;
-+
-+		prepare_warning(qc, qtype, counter, msgs, HARDWARN);
-+	}
-+
-+	if (qc->softlimit &&
-+	    qc->softlimit < n &&
-+	    qc->timer &&
-+	    ktime_get_real_seconds() >= qc->timer &&
-+	    !ignore_hardlimit(q)) {
-+		if (mode == KEY_TYPE_QUOTA_PREALLOC)
-+			return -EDQUOT;
-+
-+		prepare_warning(qc, qtype, counter, msgs, SOFTLONGWARN);
-+	}
-+
-+	if (qc->softlimit &&
-+	    qc->softlimit < n &&
-+	    qc->timer == 0) {
-+		if (mode == KEY_TYPE_QUOTA_PREALLOC)
-+			return -EDQUOT;
-+
-+		prepare_warning(qc, qtype, counter, msgs, SOFTWARN);
-+
-+		/* XXX is this the right one? */
-+		qc->timer = ktime_get_real_seconds() +
-+			q->limits[counter].warnlimit;
-+	}
-+
-+	return 0;
-+}
-+
-+int bch2_quota_acct(struct bch_fs *c, struct bch_qid qid,
-+		    enum quota_counters counter, s64 v,
-+		    enum quota_acct_mode mode)
-+{
-+	unsigned qtypes = enabled_qtypes(c);
-+	struct bch_memquota_type *q;
-+	struct bch_memquota *mq[QTYP_NR];
-+	struct quota_msgs msgs;
-+	unsigned i;
-+	int ret = 0;
-+
-+	memset(&msgs, 0, sizeof(msgs));
-+
-+	for_each_set_qtype(c, i, q, qtypes)
-+		mutex_lock_nested(&q->lock, i);
-+
-+	for_each_set_qtype(c, i, q, qtypes) {
-+		mq[i] = genradix_ptr_alloc(&q->table, qid.q[i], GFP_NOFS);
-+		if (!mq[i]) {
-+			ret = -ENOMEM;
-+			goto err;
-+		}
-+
-+		ret = bch2_quota_check_limit(c, i, mq[i], &msgs, counter, v, mode);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	for_each_set_qtype(c, i, q, qtypes)
-+		mq[i]->c[counter].v += v;
-+err:
-+	for_each_set_qtype(c, i, q, qtypes)
-+		mutex_unlock(&q->lock);
-+
-+	flush_warnings(qid, c->vfs_sb, &msgs);
-+
-+	return ret;
-+}
-+
-+static void __bch2_quota_transfer(struct bch_memquota *src_q,
-+				  struct bch_memquota *dst_q,
-+				  enum quota_counters counter, s64 v)
-+{
-+	BUG_ON(v > src_q->c[counter].v);
-+	BUG_ON(v + dst_q->c[counter].v < v);
-+
-+	src_q->c[counter].v -= v;
-+	dst_q->c[counter].v += v;
-+}
-+
-+int bch2_quota_transfer(struct bch_fs *c, unsigned qtypes,
-+			struct bch_qid dst,
-+			struct bch_qid src, u64 space,
-+			enum quota_acct_mode mode)
-+{
-+	struct bch_memquota_type *q;
-+	struct bch_memquota *src_q[3], *dst_q[3];
-+	struct quota_msgs msgs;
-+	unsigned i;
-+	int ret = 0;
-+
-+	qtypes &= enabled_qtypes(c);
-+
-+	memset(&msgs, 0, sizeof(msgs));
-+
-+	for_each_set_qtype(c, i, q, qtypes)
-+		mutex_lock_nested(&q->lock, i);
-+
-+	for_each_set_qtype(c, i, q, qtypes) {
-+		src_q[i] = genradix_ptr_alloc(&q->table, src.q[i], GFP_NOFS);
-+		dst_q[i] = genradix_ptr_alloc(&q->table, dst.q[i], GFP_NOFS);
-+
-+		if (!src_q[i] || !dst_q[i]) {
-+			ret = -ENOMEM;
-+			goto err;
-+		}
-+
-+		ret = bch2_quota_check_limit(c, i, dst_q[i], &msgs, Q_SPC,
-+					     dst_q[i]->c[Q_SPC].v + space,
-+					     mode);
-+		if (ret)
-+			goto err;
-+
-+		ret = bch2_quota_check_limit(c, i, dst_q[i], &msgs, Q_INO,
-+					     dst_q[i]->c[Q_INO].v + 1,
-+					     mode);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	for_each_set_qtype(c, i, q, qtypes) {
-+		__bch2_quota_transfer(src_q[i], dst_q[i], Q_SPC, space);
-+		__bch2_quota_transfer(src_q[i], dst_q[i], Q_INO, 1);
-+	}
-+
-+err:
-+	for_each_set_qtype(c, i, q, qtypes)
-+		mutex_unlock(&q->lock);
-+
-+	flush_warnings(dst, c->vfs_sb, &msgs);
-+
-+	return ret;
-+}
-+
-+static int __bch2_quota_set(struct bch_fs *c, struct bkey_s_c k)
-+{
-+	struct bkey_s_c_quota dq;
-+	struct bch_memquota_type *q;
-+	struct bch_memquota *mq;
-+	unsigned i;
-+
-+	BUG_ON(k.k->p.inode >= QTYP_NR);
-+
-+	switch (k.k->type) {
-+	case KEY_TYPE_quota:
-+		dq = bkey_s_c_to_quota(k);
-+		q = &c->quotas[k.k->p.inode];
-+
-+		mutex_lock(&q->lock);
-+		mq = genradix_ptr_alloc(&q->table, k.k->p.offset, GFP_KERNEL);
-+		if (!mq) {
-+			mutex_unlock(&q->lock);
-+			return -ENOMEM;
-+		}
-+
-+		for (i = 0; i < Q_COUNTERS; i++) {
-+			mq->c[i].hardlimit = le64_to_cpu(dq.v->c[i].hardlimit);
-+			mq->c[i].softlimit = le64_to_cpu(dq.v->c[i].softlimit);
-+		}
-+
-+		mutex_unlock(&q->lock);
-+	}
-+
-+	return 0;
-+}
-+
-+static int bch2_quota_init_type(struct bch_fs *c, enum quota_types type)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int ret = 0;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_QUOTAS, POS(type, 0),
-+			   BTREE_ITER_PREFETCH, k, ret) {
-+		if (k.k->p.inode != type)
-+			break;
-+
-+		ret = __bch2_quota_set(c, k);
-+		if (ret)
-+			break;
-+	}
-+
-+	return bch2_trans_exit(&trans) ?: ret;
-+}
-+
-+void bch2_fs_quota_exit(struct bch_fs *c)
-+{
-+	unsigned i;
-+
-+	for (i = 0; i < ARRAY_SIZE(c->quotas); i++)
-+		genradix_free(&c->quotas[i].table);
-+}
-+
-+void bch2_fs_quota_init(struct bch_fs *c)
-+{
-+	unsigned i;
-+
-+	for (i = 0; i < ARRAY_SIZE(c->quotas); i++)
-+		mutex_init(&c->quotas[i].lock);
-+}
-+
-+static void bch2_sb_quota_read(struct bch_fs *c)
-+{
-+	struct bch_sb_field_quota *sb_quota;
-+	unsigned i, j;
-+
-+	sb_quota = bch2_sb_get_quota(c->disk_sb.sb);
-+	if (!sb_quota)
-+		return;
-+
-+	for (i = 0; i < QTYP_NR; i++) {
-+		struct bch_memquota_type *q = &c->quotas[i];
-+
-+		for (j = 0; j < Q_COUNTERS; j++) {
-+			q->limits[j].timelimit =
-+				le32_to_cpu(sb_quota->q[i].c[j].timelimit);
-+			q->limits[j].warnlimit =
-+				le32_to_cpu(sb_quota->q[i].c[j].warnlimit);
-+		}
-+	}
-+}
-+
-+int bch2_fs_quota_read(struct bch_fs *c)
-+{
-+	unsigned i, qtypes = enabled_qtypes(c);
-+	struct bch_memquota_type *q;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bch_inode_unpacked u;
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	mutex_lock(&c->sb_lock);
-+	bch2_sb_quota_read(c);
-+	mutex_unlock(&c->sb_lock);
-+
-+	for_each_set_qtype(c, i, q, qtypes) {
-+		ret = bch2_quota_init_type(c, i);
-+		if (ret)
-+			return ret;
-+	}
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_INODES, POS_MIN,
-+			   BTREE_ITER_PREFETCH, k, ret) {
-+		switch (k.k->type) {
-+		case KEY_TYPE_inode:
-+			ret = bch2_inode_unpack(bkey_s_c_to_inode(k), &u);
-+			if (ret)
-+				return ret;
-+
-+			bch2_quota_acct(c, bch_qid(&u), Q_SPC, u.bi_sectors,
-+					KEY_TYPE_QUOTA_NOCHECK);
-+			bch2_quota_acct(c, bch_qid(&u), Q_INO, 1,
-+					KEY_TYPE_QUOTA_NOCHECK);
-+		}
-+	}
-+	return bch2_trans_exit(&trans) ?: ret;
-+}
-+
-+/* Enable/disable/delete quotas for an entire filesystem: */
-+
-+static int bch2_quota_enable(struct super_block	*sb, unsigned uflags)
-+{
-+	struct bch_fs *c = sb->s_fs_info;
-+
-+	if (sb->s_flags & SB_RDONLY)
-+		return -EROFS;
-+
-+	/* Accounting must be enabled at mount time: */
-+	if (uflags & (FS_QUOTA_UDQ_ACCT|FS_QUOTA_GDQ_ACCT|FS_QUOTA_PDQ_ACCT))
-+		return -EINVAL;
-+
-+	/* Can't enable enforcement without accounting: */
-+	if ((uflags & FS_QUOTA_UDQ_ENFD) && !c->opts.usrquota)
-+		return -EINVAL;
-+
-+	if ((uflags & FS_QUOTA_GDQ_ENFD) && !c->opts.grpquota)
-+		return -EINVAL;
-+
-+	if (uflags & FS_QUOTA_PDQ_ENFD && !c->opts.prjquota)
-+		return -EINVAL;
-+
-+	mutex_lock(&c->sb_lock);
-+	if (uflags & FS_QUOTA_UDQ_ENFD)
-+		SET_BCH_SB_USRQUOTA(c->disk_sb.sb, true);
-+
-+	if (uflags & FS_QUOTA_GDQ_ENFD)
-+		SET_BCH_SB_GRPQUOTA(c->disk_sb.sb, true);
-+
-+	if (uflags & FS_QUOTA_PDQ_ENFD)
-+		SET_BCH_SB_PRJQUOTA(c->disk_sb.sb, true);
-+
-+	bch2_write_super(c);
-+	mutex_unlock(&c->sb_lock);
-+
-+	return 0;
-+}
-+
-+static int bch2_quota_disable(struct super_block *sb, unsigned uflags)
-+{
-+	struct bch_fs *c = sb->s_fs_info;
-+
-+	if (sb->s_flags & SB_RDONLY)
-+		return -EROFS;
-+
-+	mutex_lock(&c->sb_lock);
-+	if (uflags & FS_QUOTA_UDQ_ENFD)
-+		SET_BCH_SB_USRQUOTA(c->disk_sb.sb, false);
-+
-+	if (uflags & FS_QUOTA_GDQ_ENFD)
-+		SET_BCH_SB_GRPQUOTA(c->disk_sb.sb, false);
-+
-+	if (uflags & FS_QUOTA_PDQ_ENFD)
-+		SET_BCH_SB_PRJQUOTA(c->disk_sb.sb, false);
-+
-+	bch2_write_super(c);
-+	mutex_unlock(&c->sb_lock);
-+
-+	return 0;
-+}
-+
-+static int bch2_quota_remove(struct super_block *sb, unsigned uflags)
-+{
-+	struct bch_fs *c = sb->s_fs_info;
-+	int ret;
-+
-+	if (sb->s_flags & SB_RDONLY)
-+		return -EROFS;
-+
-+	if (uflags & FS_USER_QUOTA) {
-+		if (c->opts.usrquota)
-+			return -EINVAL;
-+
-+		ret = bch2_btree_delete_range(c, BTREE_ID_QUOTAS,
-+					      POS(QTYP_USR, 0),
-+					      POS(QTYP_USR + 1, 0),
-+					      NULL);
-+		if (ret)
-+			return ret;
-+	}
-+
-+	if (uflags & FS_GROUP_QUOTA) {
-+		if (c->opts.grpquota)
-+			return -EINVAL;
-+
-+		ret = bch2_btree_delete_range(c, BTREE_ID_QUOTAS,
-+					      POS(QTYP_GRP, 0),
-+					      POS(QTYP_GRP + 1, 0),
-+					      NULL);
-+		if (ret)
-+			return ret;
-+	}
-+
-+	if (uflags & FS_PROJ_QUOTA) {
-+		if (c->opts.prjquota)
-+			return -EINVAL;
-+
-+		ret = bch2_btree_delete_range(c, BTREE_ID_QUOTAS,
-+					      POS(QTYP_PRJ, 0),
-+					      POS(QTYP_PRJ + 1, 0),
-+					      NULL);
-+		if (ret)
-+			return ret;
-+	}
-+
-+	return 0;
-+}
-+
-+/*
-+ * Return quota status information, such as enforcements, quota file inode
-+ * numbers etc.
-+ */
-+static int bch2_quota_get_state(struct super_block *sb, struct qc_state *state)
-+{
-+	struct bch_fs *c = sb->s_fs_info;
-+	unsigned qtypes = enabled_qtypes(c);
-+	unsigned i;
-+
-+	memset(state, 0, sizeof(*state));
-+
-+	for (i = 0; i < QTYP_NR; i++) {
-+		state->s_state[i].flags |= QCI_SYSFILE;
-+
-+		if (!(qtypes & (1 << i)))
-+			continue;
-+
-+		state->s_state[i].flags |= QCI_ACCT_ENABLED;
-+
-+		state->s_state[i].spc_timelimit = c->quotas[i].limits[Q_SPC].timelimit;
-+		state->s_state[i].spc_warnlimit = c->quotas[i].limits[Q_SPC].warnlimit;
-+
-+		state->s_state[i].ino_timelimit = c->quotas[i].limits[Q_INO].timelimit;
-+		state->s_state[i].ino_warnlimit = c->quotas[i].limits[Q_INO].warnlimit;
-+	}
-+
-+	return 0;
-+}
-+
-+/*
-+ * Adjust quota timers & warnings
-+ */
-+static int bch2_quota_set_info(struct super_block *sb, int type,
-+			       struct qc_info *info)
-+{
-+	struct bch_fs *c = sb->s_fs_info;
-+	struct bch_sb_field_quota *sb_quota;
-+	struct bch_memquota_type *q;
-+
-+	if (sb->s_flags & SB_RDONLY)
-+		return -EROFS;
-+
-+	if (type >= QTYP_NR)
-+		return -EINVAL;
-+
-+	if (!((1 << type) & enabled_qtypes(c)))
-+		return -ESRCH;
-+
-+	if (info->i_fieldmask &
-+	    ~(QC_SPC_TIMER|QC_INO_TIMER|QC_SPC_WARNS|QC_INO_WARNS))
-+		return -EINVAL;
-+
-+	q = &c->quotas[type];
-+
-+	mutex_lock(&c->sb_lock);
-+	sb_quota = bch2_sb_get_quota(c->disk_sb.sb);
-+	if (!sb_quota) {
-+		sb_quota = bch2_sb_resize_quota(&c->disk_sb,
-+					sizeof(*sb_quota) / sizeof(u64));
-+		if (!sb_quota)
-+			return -ENOSPC;
-+	}
-+
-+	if (info->i_fieldmask & QC_SPC_TIMER)
-+		sb_quota->q[type].c[Q_SPC].timelimit =
-+			cpu_to_le32(info->i_spc_timelimit);
-+
-+	if (info->i_fieldmask & QC_SPC_WARNS)
-+		sb_quota->q[type].c[Q_SPC].warnlimit =
-+			cpu_to_le32(info->i_spc_warnlimit);
-+
-+	if (info->i_fieldmask & QC_INO_TIMER)
-+		sb_quota->q[type].c[Q_INO].timelimit =
-+			cpu_to_le32(info->i_ino_timelimit);
-+
-+	if (info->i_fieldmask & QC_INO_WARNS)
-+		sb_quota->q[type].c[Q_INO].warnlimit =
-+			cpu_to_le32(info->i_ino_warnlimit);
-+
-+	bch2_sb_quota_read(c);
-+
-+	bch2_write_super(c);
-+	mutex_unlock(&c->sb_lock);
-+
-+	return 0;
-+}
-+
-+/* Get/set individual quotas: */
-+
-+static void __bch2_quota_get(struct qc_dqblk *dst, struct bch_memquota *src)
-+{
-+	dst->d_space		= src->c[Q_SPC].v << 9;
-+	dst->d_spc_hardlimit	= src->c[Q_SPC].hardlimit << 9;
-+	dst->d_spc_softlimit	= src->c[Q_SPC].softlimit << 9;
-+	dst->d_spc_timer	= src->c[Q_SPC].timer;
-+	dst->d_spc_warns	= src->c[Q_SPC].warns;
-+
-+	dst->d_ino_count	= src->c[Q_INO].v;
-+	dst->d_ino_hardlimit	= src->c[Q_INO].hardlimit;
-+	dst->d_ino_softlimit	= src->c[Q_INO].softlimit;
-+	dst->d_ino_timer	= src->c[Q_INO].timer;
-+	dst->d_ino_warns	= src->c[Q_INO].warns;
-+}
-+
-+static int bch2_get_quota(struct super_block *sb, struct kqid kqid,
-+			  struct qc_dqblk *qdq)
-+{
-+	struct bch_fs *c		= sb->s_fs_info;
-+	struct bch_memquota_type *q	= &c->quotas[kqid.type];
-+	qid_t qid			= from_kqid(&init_user_ns, kqid);
-+	struct bch_memquota *mq;
-+
-+	memset(qdq, 0, sizeof(*qdq));
-+
-+	mutex_lock(&q->lock);
-+	mq = genradix_ptr(&q->table, qid);
-+	if (mq)
-+		__bch2_quota_get(qdq, mq);
-+	mutex_unlock(&q->lock);
-+
-+	return 0;
-+}
-+
-+static int bch2_get_next_quota(struct super_block *sb, struct kqid *kqid,
-+			       struct qc_dqblk *qdq)
-+{
-+	struct bch_fs *c		= sb->s_fs_info;
-+	struct bch_memquota_type *q	= &c->quotas[kqid->type];
-+	qid_t qid			= from_kqid(&init_user_ns, *kqid);
-+	struct genradix_iter iter;
-+	struct bch_memquota *mq;
-+	int ret = 0;
-+
-+	mutex_lock(&q->lock);
-+
-+	genradix_for_each_from(&q->table, iter, mq, qid)
-+		if (memcmp(mq, page_address(ZERO_PAGE(0)), sizeof(*mq))) {
-+			__bch2_quota_get(qdq, mq);
-+			*kqid = make_kqid(current_user_ns(), kqid->type, iter.pos);
-+			goto found;
-+		}
-+
-+	ret = -ENOENT;
-+found:
-+	mutex_unlock(&q->lock);
-+	return ret;
-+}
-+
-+static int bch2_set_quota_trans(struct btree_trans *trans,
-+				struct bkey_i_quota *new_quota,
-+				struct qc_dqblk *qdq)
-+{
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	iter = bch2_trans_get_iter(trans, BTREE_ID_QUOTAS, new_quota->k.p,
-+				   BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
-+	k = bch2_btree_iter_peek_slot(iter);
-+
-+	ret = bkey_err(k);
-+	if (unlikely(ret))
-+		return ret;
-+
-+	if (k.k->type == KEY_TYPE_quota)
-+		new_quota->v = *bkey_s_c_to_quota(k).v;
-+
-+	if (qdq->d_fieldmask & QC_SPC_SOFT)
-+		new_quota->v.c[Q_SPC].softlimit = cpu_to_le64(qdq->d_spc_softlimit >> 9);
-+	if (qdq->d_fieldmask & QC_SPC_HARD)
-+		new_quota->v.c[Q_SPC].hardlimit = cpu_to_le64(qdq->d_spc_hardlimit >> 9);
-+
-+	if (qdq->d_fieldmask & QC_INO_SOFT)
-+		new_quota->v.c[Q_INO].softlimit = cpu_to_le64(qdq->d_ino_softlimit);
-+	if (qdq->d_fieldmask & QC_INO_HARD)
-+		new_quota->v.c[Q_INO].hardlimit = cpu_to_le64(qdq->d_ino_hardlimit);
-+
-+	return bch2_trans_update(trans, iter, &new_quota->k_i, 0);
-+}
-+
-+static int bch2_set_quota(struct super_block *sb, struct kqid qid,
-+			  struct qc_dqblk *qdq)
-+{
-+	struct bch_fs *c = sb->s_fs_info;
-+	struct btree_trans trans;
-+	struct bkey_i_quota new_quota;
-+	int ret;
-+
-+	if (sb->s_flags & SB_RDONLY)
-+		return -EROFS;
-+
-+	bkey_quota_init(&new_quota.k_i);
-+	new_quota.k.p = POS(qid.type, from_kqid(&init_user_ns, qid));
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOUNLOCK,
-+			    bch2_set_quota_trans(&trans, &new_quota, qdq)) ?:
-+		__bch2_quota_set(c, bkey_i_to_s_c(&new_quota.k_i));
-+
-+	bch2_trans_exit(&trans);
-+
-+	return ret;
-+}
-+
-+const struct quotactl_ops bch2_quotactl_operations = {
-+	.quota_enable		= bch2_quota_enable,
-+	.quota_disable		= bch2_quota_disable,
-+	.rm_xquota		= bch2_quota_remove,
-+
-+	.get_state		= bch2_quota_get_state,
-+	.set_info		= bch2_quota_set_info,
-+
-+	.get_dqblk		= bch2_get_quota,
-+	.get_nextdqblk		= bch2_get_next_quota,
-+	.set_dqblk		= bch2_set_quota,
-+};
-+
-+#endif /* CONFIG_BCACHEFS_QUOTA */
-diff --git a/fs/bcachefs/quota.h b/fs/bcachefs/quota.h
-new file mode 100644
-index 000000000000..51e4f9713ef0
---- /dev/null
-+++ b/fs/bcachefs/quota.h
-@@ -0,0 +1,71 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_QUOTA_H
-+#define _BCACHEFS_QUOTA_H
-+
-+#include "inode.h"
-+#include "quota_types.h"
-+
-+extern const struct bch_sb_field_ops bch_sb_field_ops_quota;
-+
-+const char *bch2_quota_invalid(const struct bch_fs *, struct bkey_s_c);
-+void bch2_quota_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-+
-+#define bch2_bkey_ops_quota (struct bkey_ops) {		\
-+	.key_invalid	= bch2_quota_invalid,		\
-+	.val_to_text	= bch2_quota_to_text,		\
-+}
-+
-+static inline struct bch_qid bch_qid(struct bch_inode_unpacked *u)
-+{
-+	return (struct bch_qid) {
-+		.q[QTYP_USR] = u->bi_uid,
-+		.q[QTYP_GRP] = u->bi_gid,
-+		.q[QTYP_PRJ] = u->bi_project ? u->bi_project - 1 : 0,
-+	};
-+}
-+
-+static inline unsigned enabled_qtypes(struct bch_fs *c)
-+{
-+	return ((c->opts.usrquota << QTYP_USR)|
-+		(c->opts.grpquota << QTYP_GRP)|
-+		(c->opts.prjquota << QTYP_PRJ));
-+}
-+
-+#ifdef CONFIG_BCACHEFS_QUOTA
-+
-+int bch2_quota_acct(struct bch_fs *, struct bch_qid, enum quota_counters,
-+		    s64, enum quota_acct_mode);
-+
-+int bch2_quota_transfer(struct bch_fs *, unsigned, struct bch_qid,
-+			struct bch_qid, u64, enum quota_acct_mode);
-+
-+void bch2_fs_quota_exit(struct bch_fs *);
-+void bch2_fs_quota_init(struct bch_fs *);
-+int bch2_fs_quota_read(struct bch_fs *);
-+
-+extern const struct quotactl_ops bch2_quotactl_operations;
-+
-+#else
-+
-+static inline int bch2_quota_acct(struct bch_fs *c, struct bch_qid qid,
-+				  enum quota_counters counter, s64 v,
-+				  enum quota_acct_mode mode)
-+{
-+	return 0;
-+}
-+
-+static inline int bch2_quota_transfer(struct bch_fs *c, unsigned qtypes,
-+				      struct bch_qid dst,
-+				      struct bch_qid src, u64 space,
-+				      enum quota_acct_mode mode)
-+{
-+	return 0;
-+}
-+
-+static inline void bch2_fs_quota_exit(struct bch_fs *c) {}
-+static inline void bch2_fs_quota_init(struct bch_fs *c) {}
-+static inline int bch2_fs_quota_read(struct bch_fs *c) { return 0; }
-+
-+#endif
-+
-+#endif /* _BCACHEFS_QUOTA_H */
-diff --git a/fs/bcachefs/quota_types.h b/fs/bcachefs/quota_types.h
-new file mode 100644
-index 000000000000..6a136083d389
---- /dev/null
-+++ b/fs/bcachefs/quota_types.h
-@@ -0,0 +1,43 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_QUOTA_TYPES_H
-+#define _BCACHEFS_QUOTA_TYPES_H
-+
-+#include <linux/generic-radix-tree.h>
-+
-+struct bch_qid {
-+	u32		q[QTYP_NR];
-+};
-+
-+enum quota_acct_mode {
-+	KEY_TYPE_QUOTA_PREALLOC,
-+	KEY_TYPE_QUOTA_WARN,
-+	KEY_TYPE_QUOTA_NOCHECK,
-+};
-+
-+struct memquota_counter {
-+	u64				v;
-+	u64				hardlimit;
-+	u64				softlimit;
-+	s64				timer;
-+	int				warns;
-+	int				warning_issued;
-+};
-+
-+struct bch_memquota {
-+	struct memquota_counter		c[Q_COUNTERS];
-+};
-+
-+typedef GENRADIX(struct bch_memquota)	bch_memquota_table;
-+
-+struct quota_limit {
-+	u32				timelimit;
-+	u32				warnlimit;
-+};
-+
-+struct bch_memquota_type {
-+	struct quota_limit		limits[Q_COUNTERS];
-+	bch_memquota_table		table;
-+	struct mutex			lock;
-+};
-+
-+#endif /* _BCACHEFS_QUOTA_TYPES_H */
-diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c
-new file mode 100644
-index 000000000000..e15a2b1dc5d0
---- /dev/null
-+++ b/fs/bcachefs/rebalance.c
-@@ -0,0 +1,334 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "alloc_foreground.h"
-+#include "btree_iter.h"
-+#include "buckets.h"
-+#include "clock.h"
-+#include "disk_groups.h"
-+#include "extents.h"
-+#include "io.h"
-+#include "move.h"
-+#include "rebalance.h"
-+#include "super-io.h"
-+
-+#include <linux/freezer.h>
-+#include <linux/kthread.h>
-+#include <linux/sched/cputime.h>
-+#include <trace/events/bcachefs.h>
-+
-+/*
-+ * Check if an extent should be moved:
-+ * returns -1 if it should not be moved, or
-+ * device of pointer that should be moved, if known, or INT_MAX if unknown
-+ */
-+static int __bch2_rebalance_pred(struct bch_fs *c,
-+				 struct bkey_s_c k,
-+				 struct bch_io_opts *io_opts)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const union bch_extent_entry *entry;
-+	struct extent_ptr_decoded p;
-+
-+	if (io_opts->background_compression &&
-+	    !bch2_bkey_is_incompressible(k))
-+		bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
-+			if (!p.ptr.cached &&
-+			    p.crc.compression_type !=
-+			    bch2_compression_opt_to_type[io_opts->background_compression])
-+				return p.ptr.dev;
-+
-+	if (io_opts->background_target)
-+		bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
-+			if (!p.ptr.cached &&
-+			    !bch2_dev_in_target(c, p.ptr.dev, io_opts->background_target))
-+				return p.ptr.dev;
-+
-+	return -1;
-+}
-+
-+void bch2_rebalance_add_key(struct bch_fs *c,
-+			    struct bkey_s_c k,
-+			    struct bch_io_opts *io_opts)
-+{
-+	atomic64_t *counter;
-+	int dev;
-+
-+	dev = __bch2_rebalance_pred(c, k, io_opts);
-+	if (dev < 0)
-+		return;
-+
-+	counter = dev < INT_MAX
-+		? &bch_dev_bkey_exists(c, dev)->rebalance_work
-+		: &c->rebalance.work_unknown_dev;
-+
-+	if (atomic64_add_return(k.k->size, counter) == k.k->size)
-+		rebalance_wakeup(c);
-+}
-+
-+static enum data_cmd rebalance_pred(struct bch_fs *c, void *arg,
-+				    struct bkey_s_c k,
-+				    struct bch_io_opts *io_opts,
-+				    struct data_opts *data_opts)
-+{
-+	if (__bch2_rebalance_pred(c, k, io_opts) >= 0) {
-+		data_opts->target		= io_opts->background_target;
-+		data_opts->btree_insert_flags	= 0;
-+		return DATA_ADD_REPLICAS;
-+	} else {
-+		return DATA_SKIP;
-+	}
-+}
-+
-+void bch2_rebalance_add_work(struct bch_fs *c, u64 sectors)
-+{
-+	if (atomic64_add_return(sectors, &c->rebalance.work_unknown_dev) ==
-+	    sectors)
-+		rebalance_wakeup(c);
-+}
-+
-+struct rebalance_work {
-+	int		dev_most_full_idx;
-+	unsigned	dev_most_full_percent;
-+	u64		dev_most_full_work;
-+	u64		dev_most_full_capacity;
-+	u64		total_work;
-+};
-+
-+static void rebalance_work_accumulate(struct rebalance_work *w,
-+		u64 dev_work, u64 unknown_dev, u64 capacity, int idx)
-+{
-+	unsigned percent_full;
-+	u64 work = dev_work + unknown_dev;
-+
-+	if (work < dev_work || work < unknown_dev)
-+		work = U64_MAX;
-+	work = min(work, capacity);
-+
-+	percent_full = div64_u64(work * 100, capacity);
-+
-+	if (percent_full >= w->dev_most_full_percent) {
-+		w->dev_most_full_idx		= idx;
-+		w->dev_most_full_percent	= percent_full;
-+		w->dev_most_full_work		= work;
-+		w->dev_most_full_capacity	= capacity;
-+	}
-+
-+	if (w->total_work + dev_work >= w->total_work &&
-+	    w->total_work + dev_work >= dev_work)
-+		w->total_work += dev_work;
-+}
-+
-+static struct rebalance_work rebalance_work(struct bch_fs *c)
-+{
-+	struct bch_dev *ca;
-+	struct rebalance_work ret = { .dev_most_full_idx = -1 };
-+	u64 unknown_dev = atomic64_read(&c->rebalance.work_unknown_dev);
-+	unsigned i;
-+
-+	for_each_online_member(ca, c, i)
-+		rebalance_work_accumulate(&ret,
-+			atomic64_read(&ca->rebalance_work),
-+			unknown_dev,
-+			bucket_to_sector(ca, ca->mi.nbuckets -
-+					 ca->mi.first_bucket),
-+			i);
-+
-+	rebalance_work_accumulate(&ret,
-+		unknown_dev, 0, c->capacity, -1);
-+
-+	return ret;
-+}
-+
-+static void rebalance_work_reset(struct bch_fs *c)
-+{
-+	struct bch_dev *ca;
-+	unsigned i;
-+
-+	for_each_online_member(ca, c, i)
-+		atomic64_set(&ca->rebalance_work, 0);
-+
-+	atomic64_set(&c->rebalance.work_unknown_dev, 0);
-+}
-+
-+static unsigned long curr_cputime(void)
-+{
-+	u64 utime, stime;
-+
-+	task_cputime_adjusted(current, &utime, &stime);
-+	return nsecs_to_jiffies(utime + stime);
-+}
-+
-+static int bch2_rebalance_thread(void *arg)
-+{
-+	struct bch_fs *c = arg;
-+	struct bch_fs_rebalance *r = &c->rebalance;
-+	struct io_clock *clock = &c->io_clock[WRITE];
-+	struct rebalance_work w, p;
-+	unsigned long start, prev_start;
-+	unsigned long prev_run_time, prev_run_cputime;
-+	unsigned long cputime, prev_cputime;
-+	unsigned long io_start;
-+	long throttle;
-+
-+	set_freezable();
-+
-+	io_start	= atomic_long_read(&clock->now);
-+	p		= rebalance_work(c);
-+	prev_start	= jiffies;
-+	prev_cputime	= curr_cputime();
-+
-+	while (!kthread_wait_freezable(r->enabled)) {
-+		cond_resched();
-+
-+		start			= jiffies;
-+		cputime			= curr_cputime();
-+
-+		prev_run_time		= start - prev_start;
-+		prev_run_cputime	= cputime - prev_cputime;
-+
-+		w			= rebalance_work(c);
-+		BUG_ON(!w.dev_most_full_capacity);
-+
-+		if (!w.total_work) {
-+			r->state = REBALANCE_WAITING;
-+			kthread_wait_freezable(rebalance_work(c).total_work);
-+			continue;
-+		}
-+
-+		/*
-+		 * If there isn't much work to do, throttle cpu usage:
-+		 */
-+		throttle = prev_run_cputime * 100 /
-+			max(1U, w.dev_most_full_percent) -
-+			prev_run_time;
-+
-+		if (w.dev_most_full_percent < 20 && throttle > 0) {
-+			r->throttled_until_iotime = io_start +
-+				div_u64(w.dev_most_full_capacity *
-+					(20 - w.dev_most_full_percent),
-+					50);
-+
-+			if (atomic_long_read(&clock->now) + clock->max_slop <
-+			    r->throttled_until_iotime) {
-+				r->throttled_until_cputime = start + throttle;
-+				r->state = REBALANCE_THROTTLED;
-+
-+				bch2_kthread_io_clock_wait(clock,
-+					r->throttled_until_iotime,
-+					throttle);
-+				continue;
-+			}
-+		}
-+
-+		/* minimum 1 mb/sec: */
-+		r->pd.rate.rate =
-+			max_t(u64, 1 << 11,
-+			      r->pd.rate.rate *
-+			      max(p.dev_most_full_percent, 1U) /
-+			      max(w.dev_most_full_percent, 1U));
-+
-+		io_start	= atomic_long_read(&clock->now);
-+		p		= w;
-+		prev_start	= start;
-+		prev_cputime	= cputime;
-+
-+		r->state = REBALANCE_RUNNING;
-+		memset(&r->move_stats, 0, sizeof(r->move_stats));
-+		rebalance_work_reset(c);
-+
-+		bch2_move_data(c,
-+			       /* ratelimiting disabled for now */
-+			       NULL, /*  &r->pd.rate, */
-+			       writepoint_ptr(&c->rebalance_write_point),
-+			       POS_MIN, POS_MAX,
-+			       rebalance_pred, NULL,
-+			       &r->move_stats);
-+	}
-+
-+	return 0;
-+}
-+
-+ssize_t bch2_rebalance_work_show(struct bch_fs *c, char *buf)
-+{
-+	struct printbuf out = _PBUF(buf, PAGE_SIZE);
-+	struct bch_fs_rebalance *r = &c->rebalance;
-+	struct rebalance_work w = rebalance_work(c);
-+	char h1[21], h2[21];
-+
-+	bch2_hprint(&PBUF(h1), w.dev_most_full_work << 9);
-+	bch2_hprint(&PBUF(h2), w.dev_most_full_capacity << 9);
-+	pr_buf(&out, "fullest_dev (%i):\t%s/%s\n",
-+	       w.dev_most_full_idx, h1, h2);
-+
-+	bch2_hprint(&PBUF(h1), w.total_work << 9);
-+	bch2_hprint(&PBUF(h2), c->capacity << 9);
-+	pr_buf(&out, "total work:\t\t%s/%s\n", h1, h2);
-+
-+	pr_buf(&out, "rate:\t\t\t%u\n", r->pd.rate.rate);
-+
-+	switch (r->state) {
-+	case REBALANCE_WAITING:
-+		pr_buf(&out, "waiting\n");
-+		break;
-+	case REBALANCE_THROTTLED:
-+		bch2_hprint(&PBUF(h1),
-+			    (r->throttled_until_iotime -
-+			     atomic_long_read(&c->io_clock[WRITE].now)) << 9);
-+		pr_buf(&out, "throttled for %lu sec or %s io\n",
-+		       (r->throttled_until_cputime - jiffies) / HZ,
-+		       h1);
-+		break;
-+	case REBALANCE_RUNNING:
-+		pr_buf(&out, "running\n");
-+		pr_buf(&out, "pos %llu:%llu\n",
-+		       r->move_stats.pos.inode,
-+		       r->move_stats.pos.offset);
-+		break;
-+	}
-+
-+	return out.pos - buf;
-+}
-+
-+void bch2_rebalance_stop(struct bch_fs *c)
-+{
-+	struct task_struct *p;
-+
-+	c->rebalance.pd.rate.rate = UINT_MAX;
-+	bch2_ratelimit_reset(&c->rebalance.pd.rate);
-+
-+	p = rcu_dereference_protected(c->rebalance.thread, 1);
-+	c->rebalance.thread = NULL;
-+
-+	if (p) {
-+		/* for sychronizing with rebalance_wakeup() */
-+		synchronize_rcu();
-+
-+		kthread_stop(p);
-+		put_task_struct(p);
-+	}
-+}
-+
-+int bch2_rebalance_start(struct bch_fs *c)
-+{
-+	struct task_struct *p;
-+
-+	if (c->opts.nochanges)
-+		return 0;
-+
-+	p = kthread_create(bch2_rebalance_thread, c, "bch_rebalance");
-+	if (IS_ERR(p))
-+		return PTR_ERR(p);
-+
-+	get_task_struct(p);
-+	rcu_assign_pointer(c->rebalance.thread, p);
-+	wake_up_process(p);
-+	return 0;
-+}
-+
-+void bch2_fs_rebalance_init(struct bch_fs *c)
-+{
-+	bch2_pd_controller_init(&c->rebalance.pd);
-+
-+	atomic64_set(&c->rebalance.work_unknown_dev, S64_MAX);
-+}
-diff --git a/fs/bcachefs/rebalance.h b/fs/bcachefs/rebalance.h
-new file mode 100644
-index 000000000000..99e2a1fb6084
---- /dev/null
-+++ b/fs/bcachefs/rebalance.h
-@@ -0,0 +1,28 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_REBALANCE_H
-+#define _BCACHEFS_REBALANCE_H
-+
-+#include "rebalance_types.h"
-+
-+static inline void rebalance_wakeup(struct bch_fs *c)
-+{
-+	struct task_struct *p;
-+
-+	rcu_read_lock();
-+	p = rcu_dereference(c->rebalance.thread);
-+	if (p)
-+		wake_up_process(p);
-+	rcu_read_unlock();
-+}
-+
-+void bch2_rebalance_add_key(struct bch_fs *, struct bkey_s_c,
-+			    struct bch_io_opts *);
-+void bch2_rebalance_add_work(struct bch_fs *, u64);
-+
-+ssize_t bch2_rebalance_work_show(struct bch_fs *, char *);
-+
-+void bch2_rebalance_stop(struct bch_fs *);
-+int bch2_rebalance_start(struct bch_fs *);
-+void bch2_fs_rebalance_init(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_REBALANCE_H */
-diff --git a/fs/bcachefs/rebalance_types.h b/fs/bcachefs/rebalance_types.h
-new file mode 100644
-index 000000000000..192c6be20ced
---- /dev/null
-+++ b/fs/bcachefs/rebalance_types.h
-@@ -0,0 +1,27 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_REBALANCE_TYPES_H
-+#define _BCACHEFS_REBALANCE_TYPES_H
-+
-+#include "move_types.h"
-+
-+enum rebalance_state {
-+	REBALANCE_WAITING,
-+	REBALANCE_THROTTLED,
-+	REBALANCE_RUNNING,
-+};
-+
-+struct bch_fs_rebalance {
-+	struct task_struct __rcu *thread;
-+	struct bch_pd_controller pd;
-+
-+	atomic64_t		work_unknown_dev;
-+
-+	enum rebalance_state	state;
-+	unsigned long		throttled_until_iotime;
-+	unsigned long		throttled_until_cputime;
-+	struct bch_move_stats	move_stats;
-+
-+	unsigned		enabled:1;
-+};
-+
-+#endif /* _BCACHEFS_REBALANCE_TYPES_H */
-diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
-new file mode 100644
-index 000000000000..41b864dcdc39
---- /dev/null
-+++ b/fs/bcachefs/recovery.c
-@@ -0,0 +1,1317 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "alloc_background.h"
-+#include "btree_gc.h"
-+#include "btree_update.h"
-+#include "btree_update_interior.h"
-+#include "btree_io.h"
-+#include "buckets.h"
-+#include "dirent.h"
-+#include "ec.h"
-+#include "error.h"
-+#include "fs-common.h"
-+#include "fsck.h"
-+#include "journal_io.h"
-+#include "journal_reclaim.h"
-+#include "journal_seq_blacklist.h"
-+#include "quota.h"
-+#include "recovery.h"
-+#include "replicas.h"
-+#include "super-io.h"
-+
-+#include <linux/sort.h>
-+#include <linux/stat.h>
-+
-+#define QSTR(n) { { { .len = strlen(n) } }, .name = n }
-+
-+/* iterate over keys read from the journal: */
-+
-+static struct journal_key *journal_key_search(struct journal_keys *journal_keys,
-+					      enum btree_id id, unsigned level,
-+					      struct bpos pos)
-+{
-+	size_t l = 0, r = journal_keys->nr, m;
-+
-+	while (l < r) {
-+		m = l + ((r - l) >> 1);
-+		if ((cmp_int(id,	journal_keys->d[m].btree_id) ?:
-+		     cmp_int(level,	journal_keys->d[m].level) ?:
-+		     bkey_cmp(pos,	journal_keys->d[m].k->k.p)) > 0)
-+			l = m + 1;
-+		else
-+			r = m;
-+	}
-+
-+	BUG_ON(l < journal_keys->nr &&
-+	       (cmp_int(id,	journal_keys->d[l].btree_id) ?:
-+		cmp_int(level,	journal_keys->d[l].level) ?:
-+		bkey_cmp(pos,	journal_keys->d[l].k->k.p)) > 0);
-+
-+	BUG_ON(l &&
-+	       (cmp_int(id,	journal_keys->d[l - 1].btree_id) ?:
-+		cmp_int(level,	journal_keys->d[l - 1].level) ?:
-+		bkey_cmp(pos,	journal_keys->d[l - 1].k->k.p)) <= 0);
-+
-+	return l < journal_keys->nr ? journal_keys->d + l : NULL;
-+}
-+
-+static struct bkey_i *bch2_journal_iter_peek(struct journal_iter *iter)
-+{
-+	if (iter->k &&
-+	    iter->k < iter->keys->d + iter->keys->nr &&
-+	    iter->k->btree_id	== iter->btree_id &&
-+	    iter->k->level	== iter->level)
-+		return iter->k->k;
-+
-+	iter->k = NULL;
-+	return NULL;
-+}
-+
-+static void bch2_journal_iter_advance(struct journal_iter *iter)
-+{
-+	if (iter->k)
-+		iter->k++;
-+}
-+
-+static void bch2_journal_iter_init(struct journal_iter *iter,
-+				   struct journal_keys *journal_keys,
-+				   enum btree_id id, unsigned level,
-+				   struct bpos pos)
-+{
-+	iter->btree_id	= id;
-+	iter->level	= level;
-+	iter->keys	= journal_keys;
-+	iter->k		= journal_key_search(journal_keys, id, level, pos);
-+}
-+
-+static struct bkey_s_c bch2_journal_iter_peek_btree(struct btree_and_journal_iter *iter)
-+{
-+	return iter->btree
-+		? bch2_btree_iter_peek(iter->btree)
-+		: bch2_btree_node_iter_peek_unpack(&iter->node_iter,
-+						   iter->b, &iter->unpacked);
-+}
-+
-+static void bch2_journal_iter_advance_btree(struct btree_and_journal_iter *iter)
-+{
-+	if (iter->btree)
-+		bch2_btree_iter_next(iter->btree);
-+	else
-+		bch2_btree_node_iter_advance(&iter->node_iter, iter->b);
-+}
-+
-+void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *iter)
-+{
-+	switch (iter->last) {
-+	case none:
-+		break;
-+	case btree:
-+		bch2_journal_iter_advance_btree(iter);
-+		break;
-+	case journal:
-+		bch2_journal_iter_advance(&iter->journal);
-+		break;
-+	}
-+
-+	iter->last = none;
-+}
-+
-+struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *iter)
-+{
-+	struct bkey_s_c ret;
-+
-+	while (1) {
-+		struct bkey_s_c btree_k		=
-+			bch2_journal_iter_peek_btree(iter);
-+		struct bkey_s_c journal_k	=
-+			bkey_i_to_s_c(bch2_journal_iter_peek(&iter->journal));
-+
-+		if (btree_k.k && journal_k.k) {
-+			int cmp = bkey_cmp(btree_k.k->p, journal_k.k->p);
-+
-+			if (!cmp)
-+				bch2_journal_iter_advance_btree(iter);
-+
-+			iter->last = cmp < 0 ? btree : journal;
-+		} else if (btree_k.k) {
-+			iter->last = btree;
-+		} else if (journal_k.k) {
-+			iter->last = journal;
-+		} else {
-+			iter->last = none;
-+			return bkey_s_c_null;
-+		}
-+
-+		ret = iter->last == journal ? journal_k : btree_k;
-+
-+		if (iter->b &&
-+		    bkey_cmp(ret.k->p, iter->b->data->max_key) > 0) {
-+			iter->journal.k = NULL;
-+			iter->last = none;
-+			return bkey_s_c_null;
-+		}
-+
-+		if (!bkey_deleted(ret.k))
-+			break;
-+
-+		bch2_btree_and_journal_iter_advance(iter);
-+	}
-+
-+	return ret;
-+}
-+
-+struct bkey_s_c bch2_btree_and_journal_iter_next(struct btree_and_journal_iter *iter)
-+{
-+	bch2_btree_and_journal_iter_advance(iter);
-+
-+	return bch2_btree_and_journal_iter_peek(iter);
-+}
-+
-+void bch2_btree_and_journal_iter_init(struct btree_and_journal_iter *iter,
-+				      struct btree_trans *trans,
-+				      struct journal_keys *journal_keys,
-+				      enum btree_id id, struct bpos pos)
-+{
-+	memset(iter, 0, sizeof(*iter));
-+
-+	iter->btree = bch2_trans_get_iter(trans, id, pos, 0);
-+	bch2_journal_iter_init(&iter->journal, journal_keys, id, 0, pos);
-+}
-+
-+void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *iter,
-+						struct journal_keys *journal_keys,
-+						struct btree *b)
-+{
-+	memset(iter, 0, sizeof(*iter));
-+
-+	iter->b = b;
-+	bch2_btree_node_iter_init_from_start(&iter->node_iter, iter->b);
-+	bch2_journal_iter_init(&iter->journal, journal_keys,
-+			       b->c.btree_id, b->c.level, b->data->min_key);
-+}
-+
-+/* Walk btree, overlaying keys from the journal: */
-+
-+static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b,
-+				struct journal_keys *journal_keys,
-+				enum btree_id btree_id,
-+				btree_walk_node_fn node_fn,
-+				btree_walk_key_fn key_fn)
-+{
-+	struct btree_and_journal_iter iter;
-+	struct bkey_s_c k;
-+	int ret = 0;
-+
-+	bch2_btree_and_journal_iter_init_node_iter(&iter, journal_keys, b);
-+
-+	while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
-+		ret = key_fn(c, btree_id, b->c.level, k);
-+		if (ret)
-+			break;
-+
-+		if (b->c.level) {
-+			struct btree *child;
-+			BKEY_PADDED(k) tmp;
-+
-+			bkey_reassemble(&tmp.k, k);
-+			k = bkey_i_to_s_c(&tmp.k);
-+
-+			bch2_btree_and_journal_iter_advance(&iter);
-+
-+			if (b->c.level > 0) {
-+				child = bch2_btree_node_get_noiter(c, &tmp.k,
-+							b->c.btree_id, b->c.level - 1);
-+				ret = PTR_ERR_OR_ZERO(child);
-+				if (ret)
-+					break;
-+
-+				ret   = (node_fn ? node_fn(c, b) : 0) ?:
-+					bch2_btree_and_journal_walk_recurse(c, child,
-+						journal_keys, btree_id, node_fn, key_fn);
-+				six_unlock_read(&child->c.lock);
-+
-+				if (ret)
-+					break;
-+			}
-+		} else {
-+			bch2_btree_and_journal_iter_advance(&iter);
-+		}
-+	}
-+
-+	return ret;
-+}
-+
-+int bch2_btree_and_journal_walk(struct bch_fs *c, struct journal_keys *journal_keys,
-+				enum btree_id btree_id,
-+				btree_walk_node_fn node_fn,
-+				btree_walk_key_fn key_fn)
-+{
-+	struct btree *b = c->btree_roots[btree_id].b;
-+	int ret = 0;
-+
-+	if (btree_node_fake(b))
-+		return 0;
-+
-+	six_lock_read(&b->c.lock, NULL, NULL);
-+	ret   = (node_fn ? node_fn(c, b) : 0) ?:
-+		bch2_btree_and_journal_walk_recurse(c, b, journal_keys, btree_id,
-+						    node_fn, key_fn) ?:
-+		key_fn(c, btree_id, b->c.level + 1, bkey_i_to_s_c(&b->key));
-+	six_unlock_read(&b->c.lock);
-+
-+	return ret;
-+}
-+
-+/* sort and dedup all keys in the journal: */
-+
-+void bch2_journal_entries_free(struct list_head *list)
-+{
-+
-+	while (!list_empty(list)) {
-+		struct journal_replay *i =
-+			list_first_entry(list, struct journal_replay, list);
-+		list_del(&i->list);
-+		kvpfree(i, offsetof(struct journal_replay, j) +
-+			vstruct_bytes(&i->j));
-+	}
-+}
-+
-+/*
-+ * When keys compare equal, oldest compares first:
-+ */
-+static int journal_sort_key_cmp(const void *_l, const void *_r)
-+{
-+	const struct journal_key *l = _l;
-+	const struct journal_key *r = _r;
-+
-+	return  cmp_int(l->btree_id,	r->btree_id) ?:
-+		cmp_int(l->level,	r->level) ?:
-+		bkey_cmp(l->k->k.p, r->k->k.p) ?:
-+		cmp_int(l->journal_seq, r->journal_seq) ?:
-+		cmp_int(l->journal_offset, r->journal_offset);
-+}
-+
-+void bch2_journal_keys_free(struct journal_keys *keys)
-+{
-+	kvfree(keys->d);
-+	keys->d = NULL;
-+	keys->nr = 0;
-+}
-+
-+static struct journal_keys journal_keys_sort(struct list_head *journal_entries)
-+{
-+	struct journal_replay *p;
-+	struct jset_entry *entry;
-+	struct bkey_i *k, *_n;
-+	struct journal_keys keys = { NULL };
-+	struct journal_key *src, *dst;
-+	size_t nr_keys = 0;
-+
-+	if (list_empty(journal_entries))
-+		return keys;
-+
-+	keys.journal_seq_base =
-+		le64_to_cpu(list_last_entry(journal_entries,
-+				struct journal_replay, list)->j.last_seq);
-+
-+	list_for_each_entry(p, journal_entries, list) {
-+		if (le64_to_cpu(p->j.seq) < keys.journal_seq_base)
-+			continue;
-+
-+		for_each_jset_key(k, _n, entry, &p->j)
-+			nr_keys++;
-+	}
-+
-+
-+	keys.d = kvmalloc(sizeof(keys.d[0]) * nr_keys, GFP_KERNEL);
-+	if (!keys.d)
-+		goto err;
-+
-+	list_for_each_entry(p, journal_entries, list) {
-+		if (le64_to_cpu(p->j.seq) < keys.journal_seq_base)
-+			continue;
-+
-+		for_each_jset_key(k, _n, entry, &p->j)
-+			keys.d[keys.nr++] = (struct journal_key) {
-+				.btree_id	= entry->btree_id,
-+				.level		= entry->level,
-+				.k		= k,
-+				.journal_seq	= le64_to_cpu(p->j.seq) -
-+					keys.journal_seq_base,
-+				.journal_offset	= k->_data - p->j._data,
-+			};
-+	}
-+
-+	sort(keys.d, keys.nr, sizeof(keys.d[0]), journal_sort_key_cmp, NULL);
-+
-+	src = dst = keys.d;
-+	while (src < keys.d + keys.nr) {
-+		while (src + 1 < keys.d + keys.nr &&
-+		       src[0].btree_id	== src[1].btree_id &&
-+		       src[0].level	== src[1].level &&
-+		       !bkey_cmp(src[0].k->k.p, src[1].k->k.p))
-+			src++;
-+
-+		*dst++ = *src++;
-+	}
-+
-+	keys.nr = dst - keys.d;
-+err:
-+	return keys;
-+}
-+
-+/* journal replay: */
-+
-+static void replay_now_at(struct journal *j, u64 seq)
-+{
-+	BUG_ON(seq < j->replay_journal_seq);
-+	BUG_ON(seq > j->replay_journal_seq_end);
-+
-+	while (j->replay_journal_seq < seq)
-+		bch2_journal_pin_put(j, j->replay_journal_seq++);
-+}
-+
-+static int bch2_extent_replay_key(struct bch_fs *c, enum btree_id btree_id,
-+				  struct bkey_i *k)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter, *split_iter;
-+	/*
-+	 * We might cause compressed extents to be split, so we need to pass in
-+	 * a disk_reservation:
-+	 */
-+	struct disk_reservation disk_res =
-+		bch2_disk_reservation_init(c, 0);
-+	struct bkey_i *split;
-+	struct bpos atomic_end;
-+	/*
-+	 * Some extents aren't equivalent - w.r.t. what the triggers do
-+	 * - if they're split:
-+	 */
-+	bool remark_if_split = bch2_bkey_sectors_compressed(bkey_i_to_s_c(k)) ||
-+		k->k.type == KEY_TYPE_reflink_p;
-+	bool remark = false;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-+retry:
-+	bch2_trans_begin(&trans);
-+
-+	iter = bch2_trans_get_iter(&trans, btree_id,
-+				   bkey_start_pos(&k->k),
-+				   BTREE_ITER_INTENT);
-+
-+	do {
-+		ret = bch2_btree_iter_traverse(iter);
-+		if (ret)
-+			goto err;
-+
-+		atomic_end = bpos_min(k->k.p, iter->l[0].b->key.k.p);
-+
-+		split = bch2_trans_kmalloc(&trans, bkey_bytes(&k->k));
-+		ret = PTR_ERR_OR_ZERO(split);
-+		if (ret)
-+			goto err;
-+
-+		if (!remark &&
-+		    remark_if_split &&
-+		    bkey_cmp(atomic_end, k->k.p) < 0) {
-+			ret = bch2_disk_reservation_add(c, &disk_res,
-+					k->k.size *
-+					bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(k)),
-+					BCH_DISK_RESERVATION_NOFAIL);
-+			BUG_ON(ret);
-+
-+			remark = true;
-+		}
-+
-+		bkey_copy(split, k);
-+		bch2_cut_front(iter->pos, split);
-+		bch2_cut_back(atomic_end, split);
-+
-+		split_iter = bch2_trans_copy_iter(&trans, iter);
-+		ret = PTR_ERR_OR_ZERO(split_iter);
-+		if (ret)
-+			goto err;
-+
-+		/*
-+		 * It's important that we don't go through the
-+		 * extent_handle_overwrites() and extent_update_to_keys() path
-+		 * here: journal replay is supposed to treat extents like
-+		 * regular keys
-+		 */
-+		__bch2_btree_iter_set_pos(split_iter, split->k.p, false);
-+		bch2_trans_update(&trans, split_iter, split, !remark
-+				  ? BTREE_TRIGGER_NORUN
-+				  : BTREE_TRIGGER_NOOVERWRITES);
-+
-+		bch2_btree_iter_set_pos(iter, split->k.p);
-+	} while (bkey_cmp(iter->pos, k->k.p) < 0);
-+
-+	if (remark) {
-+		ret = bch2_trans_mark_key(&trans, bkey_i_to_s_c(k),
-+					  0, -((s64) k->k.size),
-+					  BTREE_TRIGGER_OVERWRITE);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	ret = bch2_trans_commit(&trans, &disk_res, NULL,
-+				BTREE_INSERT_NOFAIL|
-+				BTREE_INSERT_LAZY_RW|
-+				BTREE_INSERT_JOURNAL_REPLAY);
-+err:
-+	if (ret == -EINTR)
-+		goto retry;
-+
-+	bch2_disk_reservation_put(c, &disk_res);
-+
-+	return bch2_trans_exit(&trans) ?: ret;
-+}
-+
-+static int __bch2_journal_replay_key(struct btree_trans *trans,
-+				     enum btree_id id, unsigned level,
-+				     struct bkey_i *k)
-+{
-+	struct btree_iter *iter;
-+	int ret;
-+
-+	iter = bch2_trans_get_node_iter(trans, id, k->k.p,
-+					BTREE_MAX_DEPTH, level,
-+					BTREE_ITER_INTENT);
-+	if (IS_ERR(iter))
-+		return PTR_ERR(iter);
-+
-+	/*
-+	 * iter->flags & BTREE_ITER_IS_EXTENTS triggers the update path to run
-+	 * extent_handle_overwrites() and extent_update_to_keys() - but we don't
-+	 * want that here, journal replay is supposed to treat extents like
-+	 * regular keys:
-+	 */
-+	__bch2_btree_iter_set_pos(iter, k->k.p, false);
-+
-+	ret   = bch2_btree_iter_traverse(iter) ?:
-+		bch2_trans_update(trans, iter, k, BTREE_TRIGGER_NORUN);
-+	bch2_trans_iter_put(trans, iter);
-+	return ret;
-+}
-+
-+static int bch2_journal_replay_key(struct bch_fs *c, enum btree_id id,
-+				   unsigned level, struct bkey_i *k)
-+{
-+	return bch2_trans_do(c, NULL, NULL,
-+			     BTREE_INSERT_NOFAIL|
-+			     BTREE_INSERT_LAZY_RW|
-+			     BTREE_INSERT_JOURNAL_REPLAY,
-+			     __bch2_journal_replay_key(&trans, id, level, k));
-+}
-+
-+static int __bch2_alloc_replay_key(struct btree_trans *trans, struct bkey_i *k)
-+{
-+	struct btree_iter *iter;
-+	int ret;
-+
-+	iter = bch2_trans_get_iter(trans, BTREE_ID_ALLOC, k->k.p,
-+				   BTREE_ITER_CACHED|
-+				   BTREE_ITER_CACHED_NOFILL|
-+				   BTREE_ITER_INTENT);
-+	ret =   PTR_ERR_OR_ZERO(iter) ?:
-+		bch2_trans_update(trans, iter, k, BTREE_TRIGGER_NORUN);
-+	bch2_trans_iter_put(trans, iter);
-+	return ret;
-+}
-+
-+static int bch2_alloc_replay_key(struct bch_fs *c, struct bkey_i *k)
-+{
-+	return bch2_trans_do(c, NULL, NULL,
-+			     BTREE_INSERT_NOFAIL|
-+			     BTREE_INSERT_USE_RESERVE|
-+			     BTREE_INSERT_LAZY_RW|
-+			     BTREE_INSERT_JOURNAL_REPLAY,
-+			__bch2_alloc_replay_key(&trans, k));
-+}
-+
-+static int journal_sort_seq_cmp(const void *_l, const void *_r)
-+{
-+	const struct journal_key *l = _l;
-+	const struct journal_key *r = _r;
-+
-+	return  cmp_int(r->level,	l->level) ?:
-+		cmp_int(l->journal_seq, r->journal_seq) ?:
-+		cmp_int(l->btree_id,	r->btree_id) ?:
-+		bkey_cmp(l->k->k.p,	r->k->k.p);
-+}
-+
-+static int bch2_journal_replay(struct bch_fs *c,
-+			       struct journal_keys keys)
-+{
-+	struct journal *j = &c->journal;
-+	struct journal_key *i;
-+	u64 seq;
-+	int ret;
-+
-+	sort(keys.d, keys.nr, sizeof(keys.d[0]), journal_sort_seq_cmp, NULL);
-+
-+	if (keys.nr)
-+		replay_now_at(j, keys.journal_seq_base);
-+
-+	seq = j->replay_journal_seq;
-+
-+	/*
-+	 * First replay updates to the alloc btree - these will only update the
-+	 * btree key cache:
-+	 */
-+	for_each_journal_key(keys, i) {
-+		cond_resched();
-+
-+		if (!i->level && i->btree_id == BTREE_ID_ALLOC) {
-+			j->replay_journal_seq = keys.journal_seq_base + i->journal_seq;
-+			ret = bch2_alloc_replay_key(c, i->k);
-+			if (ret)
-+				goto err;
-+		}
-+	}
-+
-+	/*
-+	 * Next replay updates to interior btree nodes:
-+	 */
-+	for_each_journal_key(keys, i) {
-+		cond_resched();
-+
-+		if (i->level) {
-+			j->replay_journal_seq = keys.journal_seq_base + i->journal_seq;
-+			ret = bch2_journal_replay_key(c, i->btree_id, i->level, i->k);
-+			if (ret)
-+				goto err;
-+		}
-+	}
-+
-+	/*
-+	 * Now that the btree is in a consistent state, we can start journal
-+	 * reclaim (which will be flushing entries from the btree key cache back
-+	 * to the btree:
-+	 */
-+	set_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags);
-+	set_bit(JOURNAL_RECLAIM_STARTED, &j->flags);
-+
-+	j->replay_journal_seq = seq;
-+
-+	/*
-+	 * Now replay leaf node updates:
-+	 */
-+	for_each_journal_key(keys, i) {
-+		cond_resched();
-+
-+		if (i->level || i->btree_id == BTREE_ID_ALLOC)
-+			continue;
-+
-+		replay_now_at(j, keys.journal_seq_base + i->journal_seq);
-+
-+		ret = i->k->k.size
-+			? bch2_extent_replay_key(c, i->btree_id, i->k)
-+			: bch2_journal_replay_key(c, i->btree_id, i->level, i->k);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	replay_now_at(j, j->replay_journal_seq_end);
-+	j->replay_journal_seq = 0;
-+
-+	bch2_journal_set_replay_done(j);
-+	bch2_journal_flush_all_pins(j);
-+	return bch2_journal_error(j);
-+err:
-+	bch_err(c, "journal replay: error %d while replaying key", ret);
-+	return ret;
-+}
-+
-+static bool journal_empty(struct list_head *journal)
-+{
-+	return list_empty(journal) ||
-+		journal_entry_empty(&list_last_entry(journal,
-+					struct journal_replay, list)->j);
-+}
-+
-+static int
-+verify_journal_entries_not_blacklisted_or_missing(struct bch_fs *c,
-+						  struct list_head *journal)
-+{
-+	struct journal_replay *i =
-+		list_last_entry(journal, struct journal_replay, list);
-+	u64 start_seq	= le64_to_cpu(i->j.last_seq);
-+	u64 end_seq	= le64_to_cpu(i->j.seq);
-+	u64 seq		= start_seq;
-+	int ret = 0;
-+
-+	list_for_each_entry(i, journal, list) {
-+		if (le64_to_cpu(i->j.seq) < start_seq)
-+			continue;
-+
-+		fsck_err_on(seq != le64_to_cpu(i->j.seq), c,
-+			"journal entries %llu-%llu missing! (replaying %llu-%llu)",
-+			seq, le64_to_cpu(i->j.seq) - 1,
-+			start_seq, end_seq);
-+
-+		seq = le64_to_cpu(i->j.seq);
-+
-+		fsck_err_on(bch2_journal_seq_is_blacklisted(c, seq, false), c,
-+			    "found blacklisted journal entry %llu", seq);
-+
-+		do {
-+			seq++;
-+		} while (bch2_journal_seq_is_blacklisted(c, seq, false));
-+	}
-+fsck_err:
-+	return ret;
-+}
-+
-+/* journal replay early: */
-+
-+static int journal_replay_entry_early(struct bch_fs *c,
-+				      struct jset_entry *entry)
-+{
-+	int ret = 0;
-+
-+	switch (entry->type) {
-+	case BCH_JSET_ENTRY_btree_root: {
-+		struct btree_root *r;
-+
-+		if (entry->btree_id >= BTREE_ID_NR) {
-+			bch_err(c, "filesystem has unknown btree type %u",
-+				entry->btree_id);
-+			return -EINVAL;
-+		}
-+
-+		r = &c->btree_roots[entry->btree_id];
-+
-+		if (entry->u64s) {
-+			r->level = entry->level;
-+			bkey_copy(&r->key, &entry->start[0]);
-+			r->error = 0;
-+		} else {
-+			r->error = -EIO;
-+		}
-+		r->alive = true;
-+		break;
-+	}
-+	case BCH_JSET_ENTRY_usage: {
-+		struct jset_entry_usage *u =
-+			container_of(entry, struct jset_entry_usage, entry);
-+
-+		switch (entry->btree_id) {
-+		case FS_USAGE_RESERVED:
-+			if (entry->level < BCH_REPLICAS_MAX)
-+				c->usage_base->persistent_reserved[entry->level] =
-+					le64_to_cpu(u->v);
-+			break;
-+		case FS_USAGE_INODES:
-+			c->usage_base->nr_inodes = le64_to_cpu(u->v);
-+			break;
-+		case FS_USAGE_KEY_VERSION:
-+			atomic64_set(&c->key_version,
-+				     le64_to_cpu(u->v));
-+			break;
-+		}
-+
-+		break;
-+	}
-+	case BCH_JSET_ENTRY_data_usage: {
-+		struct jset_entry_data_usage *u =
-+			container_of(entry, struct jset_entry_data_usage, entry);
-+		ret = bch2_replicas_set_usage(c, &u->r,
-+					      le64_to_cpu(u->v));
-+		break;
-+	}
-+	case BCH_JSET_ENTRY_blacklist: {
-+		struct jset_entry_blacklist *bl_entry =
-+			container_of(entry, struct jset_entry_blacklist, entry);
-+
-+		ret = bch2_journal_seq_blacklist_add(c,
-+				le64_to_cpu(bl_entry->seq),
-+				le64_to_cpu(bl_entry->seq) + 1);
-+		break;
-+	}
-+	case BCH_JSET_ENTRY_blacklist_v2: {
-+		struct jset_entry_blacklist_v2 *bl_entry =
-+			container_of(entry, struct jset_entry_blacklist_v2, entry);
-+
-+		ret = bch2_journal_seq_blacklist_add(c,
-+				le64_to_cpu(bl_entry->start),
-+				le64_to_cpu(bl_entry->end) + 1);
-+		break;
-+	}
-+	}
-+
-+	return ret;
-+}
-+
-+static int journal_replay_early(struct bch_fs *c,
-+				struct bch_sb_field_clean *clean,
-+				struct list_head *journal)
-+{
-+	struct jset_entry *entry;
-+	int ret;
-+
-+	if (clean) {
-+		c->bucket_clock[READ].hand = le16_to_cpu(clean->read_clock);
-+		c->bucket_clock[WRITE].hand = le16_to_cpu(clean->write_clock);
-+
-+		for (entry = clean->start;
-+		     entry != vstruct_end(&clean->field);
-+		     entry = vstruct_next(entry)) {
-+			ret = journal_replay_entry_early(c, entry);
-+			if (ret)
-+				return ret;
-+		}
-+	} else {
-+		struct journal_replay *i =
-+			list_last_entry(journal, struct journal_replay, list);
-+
-+		c->bucket_clock[READ].hand = le16_to_cpu(i->j.read_clock);
-+		c->bucket_clock[WRITE].hand = le16_to_cpu(i->j.write_clock);
-+
-+		list_for_each_entry(i, journal, list)
-+			vstruct_for_each(&i->j, entry) {
-+				ret = journal_replay_entry_early(c, entry);
-+				if (ret)
-+					return ret;
-+			}
-+	}
-+
-+	bch2_fs_usage_initialize(c);
-+
-+	return 0;
-+}
-+
-+/* sb clean section: */
-+
-+static struct bkey_i *btree_root_find(struct bch_fs *c,
-+				      struct bch_sb_field_clean *clean,
-+				      struct jset *j,
-+				      enum btree_id id, unsigned *level)
-+{
-+	struct bkey_i *k;
-+	struct jset_entry *entry, *start, *end;
-+
-+	if (clean) {
-+		start = clean->start;
-+		end = vstruct_end(&clean->field);
-+	} else {
-+		start = j->start;
-+		end = vstruct_last(j);
-+	}
-+
-+	for (entry = start; entry < end; entry = vstruct_next(entry))
-+		if (entry->type == BCH_JSET_ENTRY_btree_root &&
-+		    entry->btree_id == id)
-+			goto found;
-+
-+	return NULL;
-+found:
-+	if (!entry->u64s)
-+		return ERR_PTR(-EINVAL);
-+
-+	k = entry->start;
-+	*level = entry->level;
-+	return k;
-+}
-+
-+static int verify_superblock_clean(struct bch_fs *c,
-+				   struct bch_sb_field_clean **cleanp,
-+				   struct jset *j)
-+{
-+	unsigned i;
-+	struct bch_sb_field_clean *clean = *cleanp;
-+	int ret = 0;
-+
-+	if (!c->sb.clean || !j)
-+		return 0;
-+
-+	if (mustfix_fsck_err_on(j->seq != clean->journal_seq, c,
-+			"superblock journal seq (%llu) doesn't match journal (%llu) after clean shutdown",
-+			le64_to_cpu(clean->journal_seq),
-+			le64_to_cpu(j->seq))) {
-+		kfree(clean);
-+		*cleanp = NULL;
-+		return 0;
-+	}
-+
-+	mustfix_fsck_err_on(j->read_clock != clean->read_clock, c,
-+			"superblock read clock doesn't match journal after clean shutdown");
-+	mustfix_fsck_err_on(j->write_clock != clean->write_clock, c,
-+			"superblock read clock doesn't match journal after clean shutdown");
-+
-+	for (i = 0; i < BTREE_ID_NR; i++) {
-+		char buf1[200], buf2[200];
-+		struct bkey_i *k1, *k2;
-+		unsigned l1 = 0, l2 = 0;
-+
-+		k1 = btree_root_find(c, clean, NULL, i, &l1);
-+		k2 = btree_root_find(c, NULL, j, i, &l2);
-+
-+		if (!k1 && !k2)
-+			continue;
-+
-+		mustfix_fsck_err_on(!k1 || !k2 ||
-+				    IS_ERR(k1) ||
-+				    IS_ERR(k2) ||
-+				    k1->k.u64s != k2->k.u64s ||
-+				    memcmp(k1, k2, bkey_bytes(k1)) ||
-+				    l1 != l2, c,
-+			"superblock btree root %u doesn't match journal after clean shutdown\n"
-+			"sb:      l=%u %s\n"
-+			"journal: l=%u %s\n", i,
-+			l1, (bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(k1)), buf1),
-+			l2, (bch2_bkey_val_to_text(&PBUF(buf2), c, bkey_i_to_s_c(k2)), buf2));
-+	}
-+fsck_err:
-+	return ret;
-+}
-+
-+static struct bch_sb_field_clean *read_superblock_clean(struct bch_fs *c)
-+{
-+	struct bch_sb_field_clean *clean, *sb_clean;
-+	int ret;
-+
-+	mutex_lock(&c->sb_lock);
-+	sb_clean = bch2_sb_get_clean(c->disk_sb.sb);
-+
-+	if (fsck_err_on(!sb_clean, c,
-+			"superblock marked clean but clean section not present")) {
-+		SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
-+		c->sb.clean = false;
-+		mutex_unlock(&c->sb_lock);
-+		return NULL;
-+	}
-+
-+	clean = kmemdup(sb_clean, vstruct_bytes(&sb_clean->field),
-+			GFP_KERNEL);
-+	if (!clean) {
-+		mutex_unlock(&c->sb_lock);
-+		return ERR_PTR(-ENOMEM);
-+	}
-+
-+	if (le16_to_cpu(c->disk_sb.sb->version) <
-+	    bcachefs_metadata_version_bkey_renumber)
-+		bch2_sb_clean_renumber(clean, READ);
-+
-+	mutex_unlock(&c->sb_lock);
-+
-+	return clean;
-+fsck_err:
-+	mutex_unlock(&c->sb_lock);
-+	return ERR_PTR(ret);
-+}
-+
-+static int read_btree_roots(struct bch_fs *c)
-+{
-+	unsigned i;
-+	int ret = 0;
-+
-+	for (i = 0; i < BTREE_ID_NR; i++) {
-+		struct btree_root *r = &c->btree_roots[i];
-+
-+		if (!r->alive)
-+			continue;
-+
-+		if (i == BTREE_ID_ALLOC &&
-+		    c->opts.reconstruct_alloc) {
-+			c->sb.compat &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
-+			continue;
-+		}
-+
-+
-+		if (r->error) {
-+			__fsck_err(c, i == BTREE_ID_ALLOC
-+				   ? FSCK_CAN_IGNORE : 0,
-+				   "invalid btree root %s",
-+				   bch2_btree_ids[i]);
-+			if (i == BTREE_ID_ALLOC)
-+				c->sb.compat &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
-+		}
-+
-+		ret = bch2_btree_root_read(c, i, &r->key, r->level);
-+		if (ret) {
-+			__fsck_err(c, i == BTREE_ID_ALLOC
-+				   ? FSCK_CAN_IGNORE : 0,
-+				   "error reading btree root %s",
-+				   bch2_btree_ids[i]);
-+			if (i == BTREE_ID_ALLOC)
-+				c->sb.compat &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
-+		}
-+	}
-+
-+	for (i = 0; i < BTREE_ID_NR; i++)
-+		if (!c->btree_roots[i].b)
-+			bch2_btree_root_alloc(c, i);
-+fsck_err:
-+	return ret;
-+}
-+
-+int bch2_fs_recovery(struct bch_fs *c)
-+{
-+	const char *err = "cannot allocate memory";
-+	struct bch_sb_field_clean *clean = NULL;
-+	u64 journal_seq;
-+	bool wrote = false, write_sb = false;
-+	int ret;
-+
-+	if (c->sb.clean)
-+		clean = read_superblock_clean(c);
-+	ret = PTR_ERR_OR_ZERO(clean);
-+	if (ret)
-+		goto err;
-+
-+	if (c->sb.clean)
-+		bch_info(c, "recovering from clean shutdown, journal seq %llu",
-+			 le64_to_cpu(clean->journal_seq));
-+
-+	if (!c->replicas.entries) {
-+		bch_info(c, "building replicas info");
-+		set_bit(BCH_FS_REBUILD_REPLICAS, &c->flags);
-+	}
-+
-+	if (!c->sb.clean || c->opts.fsck || c->opts.keep_journal) {
-+		struct jset *j;
-+
-+		ret = bch2_journal_read(c, &c->journal_entries);
-+		if (ret)
-+			goto err;
-+
-+		if (mustfix_fsck_err_on(c->sb.clean && !journal_empty(&c->journal_entries), c,
-+				"filesystem marked clean but journal not empty")) {
-+			c->sb.compat &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
-+			SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
-+			c->sb.clean = false;
-+		}
-+
-+		if (!c->sb.clean && list_empty(&c->journal_entries)) {
-+			bch_err(c, "no journal entries found");
-+			ret = BCH_FSCK_REPAIR_IMPOSSIBLE;
-+			goto err;
-+		}
-+
-+		c->journal_keys = journal_keys_sort(&c->journal_entries);
-+		if (!c->journal_keys.d) {
-+			ret = -ENOMEM;
-+			goto err;
-+		}
-+
-+		j = &list_last_entry(&c->journal_entries,
-+				     struct journal_replay, list)->j;
-+
-+		ret = verify_superblock_clean(c, &clean, j);
-+		if (ret)
-+			goto err;
-+
-+		journal_seq = le64_to_cpu(j->seq) + 1;
-+	} else {
-+		journal_seq = le64_to_cpu(clean->journal_seq) + 1;
-+	}
-+
-+	if (!c->sb.clean &&
-+	    !(c->sb.features & (1ULL << BCH_FEATURE_extents_above_btree_updates))) {
-+		bch_err(c, "filesystem needs recovery from older version; run fsck from older bcachefs-tools to fix");
-+		ret = -EINVAL;
-+		goto err;
-+	}
-+
-+	ret = journal_replay_early(c, clean, &c->journal_entries);
-+	if (ret)
-+		goto err;
-+
-+	if (!c->sb.clean) {
-+		ret = bch2_journal_seq_blacklist_add(c,
-+						     journal_seq,
-+						     journal_seq + 4);
-+		if (ret) {
-+			bch_err(c, "error creating new journal seq blacklist entry");
-+			goto err;
-+		}
-+
-+		journal_seq += 4;
-+	}
-+
-+	ret = bch2_blacklist_table_initialize(c);
-+
-+	if (!list_empty(&c->journal_entries)) {
-+		ret = verify_journal_entries_not_blacklisted_or_missing(c,
-+							&c->journal_entries);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	ret = bch2_fs_journal_start(&c->journal, journal_seq,
-+				    &c->journal_entries);
-+	if (ret)
-+		goto err;
-+
-+	ret = read_btree_roots(c);
-+	if (ret)
-+		goto err;
-+
-+	bch_verbose(c, "starting alloc read");
-+	err = "error reading allocation information";
-+	ret = bch2_alloc_read(c, &c->journal_keys);
-+	if (ret)
-+		goto err;
-+	bch_verbose(c, "alloc read done");
-+
-+	bch_verbose(c, "starting stripes_read");
-+	err = "error reading stripes";
-+	ret = bch2_stripes_read(c, &c->journal_keys);
-+	if (ret)
-+		goto err;
-+	bch_verbose(c, "stripes_read done");
-+
-+	set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
-+
-+	if ((c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)) &&
-+	    !(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_METADATA))) {
-+		/*
-+		 * interior btree node updates aren't consistent with the
-+		 * journal; after an unclean shutdown we have to walk all
-+		 * pointers to metadata:
-+		 */
-+		bch_info(c, "starting metadata mark and sweep");
-+		err = "error in mark and sweep";
-+		ret = bch2_gc(c, &c->journal_keys, true, true);
-+		if (ret)
-+			goto err;
-+		bch_verbose(c, "mark and sweep done");
-+	}
-+
-+	if (c->opts.fsck ||
-+	    !(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)) ||
-+	    test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags)) {
-+		bch_info(c, "starting mark and sweep");
-+		err = "error in mark and sweep";
-+		ret = bch2_gc(c, &c->journal_keys, true, false);
-+		if (ret)
-+			goto err;
-+		bch_verbose(c, "mark and sweep done");
-+	}
-+
-+	clear_bit(BCH_FS_REBUILD_REPLICAS, &c->flags);
-+	set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
-+
-+	/*
-+	 * Skip past versions that might have possibly been used (as nonces),
-+	 * but hadn't had their pointers written:
-+	 */
-+	if (c->sb.encryption_type && !c->sb.clean)
-+		atomic64_add(1 << 16, &c->key_version);
-+
-+	if (c->opts.norecovery)
-+		goto out;
-+
-+	bch_verbose(c, "starting journal replay");
-+	err = "journal replay failed";
-+	ret = bch2_journal_replay(c, c->journal_keys);
-+	if (ret)
-+		goto err;
-+	bch_verbose(c, "journal replay done");
-+
-+	if (!c->opts.nochanges) {
-+		/*
-+		 * note that even when filesystem was clean there might be work
-+		 * to do here, if we ran gc (because of fsck) which recalculated
-+		 * oldest_gen:
-+		 */
-+		bch_verbose(c, "writing allocation info");
-+		err = "error writing out alloc info";
-+		ret = bch2_stripes_write(c, BTREE_INSERT_LAZY_RW, &wrote) ?:
-+			bch2_alloc_write(c, BTREE_INSERT_LAZY_RW, &wrote);
-+		if (ret) {
-+			bch_err(c, "error writing alloc info");
-+			goto err;
-+		}
-+		bch_verbose(c, "alloc write done");
-+
-+		set_bit(BCH_FS_ALLOC_WRITTEN, &c->flags);
-+	}
-+
-+	if (!c->sb.clean) {
-+		if (!(c->sb.features & (1 << BCH_FEATURE_atomic_nlink))) {
-+			bch_info(c, "checking inode link counts");
-+			err = "error in recovery";
-+			ret = bch2_fsck_inode_nlink(c);
-+			if (ret)
-+				goto err;
-+			bch_verbose(c, "check inodes done");
-+
-+		} else {
-+			bch_verbose(c, "checking for deleted inodes");
-+			err = "error in recovery";
-+			ret = bch2_fsck_walk_inodes_only(c);
-+			if (ret)
-+				goto err;
-+			bch_verbose(c, "check inodes done");
-+		}
-+	}
-+
-+	if (c->opts.fsck) {
-+		bch_info(c, "starting fsck");
-+		err = "error in fsck";
-+		ret = bch2_fsck_full(c);
-+		if (ret)
-+			goto err;
-+		bch_verbose(c, "fsck done");
-+	}
-+
-+	if (enabled_qtypes(c)) {
-+		bch_verbose(c, "reading quotas");
-+		ret = bch2_fs_quota_read(c);
-+		if (ret)
-+			goto err;
-+		bch_verbose(c, "quotas done");
-+	}
-+
-+	mutex_lock(&c->sb_lock);
-+	if (c->opts.version_upgrade) {
-+		if (c->sb.version < bcachefs_metadata_version_new_versioning)
-+			c->disk_sb.sb->version_min =
-+				le16_to_cpu(bcachefs_metadata_version_min);
-+		c->disk_sb.sb->version = le16_to_cpu(bcachefs_metadata_version_current);
-+		c->disk_sb.sb->features[0] |= BCH_SB_FEATURES_ALL;
-+		write_sb = true;
-+	}
-+
-+	if (!test_bit(BCH_FS_ERROR, &c->flags)) {
-+		c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_ALLOC_INFO;
-+		write_sb = true;
-+	}
-+
-+	if (c->opts.fsck &&
-+	    !test_bit(BCH_FS_ERROR, &c->flags)) {
-+		c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_atomic_nlink;
-+		SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 0);
-+		write_sb = true;
-+	}
-+
-+	if (write_sb)
-+		bch2_write_super(c);
-+	mutex_unlock(&c->sb_lock);
-+
-+	if (c->journal_seq_blacklist_table &&
-+	    c->journal_seq_blacklist_table->nr > 128)
-+		queue_work(system_long_wq, &c->journal_seq_blacklist_gc_work);
-+out:
-+	ret = 0;
-+err:
-+fsck_err:
-+	set_bit(BCH_FS_FSCK_DONE, &c->flags);
-+	bch2_flush_fsck_errs(c);
-+
-+	if (!c->opts.keep_journal) {
-+		bch2_journal_keys_free(&c->journal_keys);
-+		bch2_journal_entries_free(&c->journal_entries);
-+	}
-+	kfree(clean);
-+	if (ret)
-+		bch_err(c, "Error in recovery: %s (%i)", err, ret);
-+	else
-+		bch_verbose(c, "ret %i", ret);
-+	return ret;
-+}
-+
-+int bch2_fs_initialize(struct bch_fs *c)
-+{
-+	struct bch_inode_unpacked root_inode, lostfound_inode;
-+	struct bkey_inode_buf packed_inode;
-+	struct qstr lostfound = QSTR("lost+found");
-+	const char *err = "cannot allocate memory";
-+	struct bch_dev *ca;
-+	LIST_HEAD(journal);
-+	unsigned i;
-+	int ret;
-+
-+	bch_notice(c, "initializing new filesystem");
-+
-+	mutex_lock(&c->sb_lock);
-+	for_each_online_member(ca, c, i)
-+		bch2_mark_dev_superblock(c, ca, 0);
-+	mutex_unlock(&c->sb_lock);
-+
-+	mutex_lock(&c->sb_lock);
-+	c->disk_sb.sb->version = c->disk_sb.sb->version_min =
-+		le16_to_cpu(bcachefs_metadata_version_current);
-+	c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_atomic_nlink;
-+	c->disk_sb.sb->features[0] |= BCH_SB_FEATURES_ALL;
-+
-+	bch2_write_super(c);
-+	mutex_unlock(&c->sb_lock);
-+
-+	set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
-+	set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
-+
-+	for (i = 0; i < BTREE_ID_NR; i++)
-+		bch2_btree_root_alloc(c, i);
-+
-+	set_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags);
-+	set_bit(JOURNAL_RECLAIM_STARTED, &c->journal.flags);
-+
-+	err = "unable to allocate journal buckets";
-+	for_each_online_member(ca, c, i) {
-+		ret = bch2_dev_journal_alloc(ca);
-+		if (ret) {
-+			percpu_ref_put(&ca->io_ref);
-+			goto err;
-+		}
-+	}
-+
-+	/*
-+	 * journal_res_get() will crash if called before this has
-+	 * set up the journal.pin FIFO and journal.cur pointer:
-+	 */
-+	bch2_fs_journal_start(&c->journal, 1, &journal);
-+	bch2_journal_set_replay_done(&c->journal);
-+
-+	bch2_inode_init(c, &root_inode, 0, 0,
-+			S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0, NULL);
-+	root_inode.bi_inum = BCACHEFS_ROOT_INO;
-+	bch2_inode_pack(&packed_inode, &root_inode);
-+
-+	err = "error creating root directory";
-+	ret = bch2_btree_insert(c, BTREE_ID_INODES,
-+				&packed_inode.inode.k_i,
-+				NULL, NULL, BTREE_INSERT_LAZY_RW);
-+	if (ret)
-+		goto err;
-+
-+	bch2_inode_init_early(c, &lostfound_inode);
-+
-+	err = "error creating lost+found";
-+	ret = bch2_trans_do(c, NULL, NULL, 0,
-+		bch2_create_trans(&trans, BCACHEFS_ROOT_INO,
-+				  &root_inode, &lostfound_inode,
-+				  &lostfound,
-+				  0, 0, S_IFDIR|0700, 0,
-+				  NULL, NULL));
-+	if (ret)
-+		goto err;
-+
-+	if (enabled_qtypes(c)) {
-+		ret = bch2_fs_quota_read(c);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	err = "error writing first journal entry";
-+	ret = bch2_journal_meta(&c->journal);
-+	if (ret)
-+		goto err;
-+
-+	mutex_lock(&c->sb_lock);
-+	SET_BCH_SB_INITIALIZED(c->disk_sb.sb, true);
-+	SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
-+
-+	bch2_write_super(c);
-+	mutex_unlock(&c->sb_lock);
-+
-+	return 0;
-+err:
-+	pr_err("Error initializing new filesystem: %s (%i)", err, ret);
-+	return ret;
-+}
-diff --git a/fs/bcachefs/recovery.h b/fs/bcachefs/recovery.h
-new file mode 100644
-index 000000000000..a66827c9addf
---- /dev/null
-+++ b/fs/bcachefs/recovery.h
-@@ -0,0 +1,60 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_RECOVERY_H
-+#define _BCACHEFS_RECOVERY_H
-+
-+#define for_each_journal_key(keys, i)				\
-+	for (i = (keys).d; i < (keys).d + (keys).nr; (i)++)
-+
-+struct journal_iter {
-+	enum btree_id		btree_id;
-+	unsigned		level;
-+	struct journal_keys	*keys;
-+	struct journal_key	*k;
-+};
-+
-+/*
-+ * Iterate over keys in the btree, with keys from the journal overlaid on top:
-+ */
-+
-+struct btree_and_journal_iter {
-+	struct btree_iter	*btree;
-+
-+	struct btree		*b;
-+	struct btree_node_iter	node_iter;
-+	struct bkey		unpacked;
-+
-+	struct journal_iter	journal;
-+
-+	enum last_key_returned {
-+		none,
-+		btree,
-+		journal,
-+	}			last;
-+};
-+
-+void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *);
-+struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *);
-+struct bkey_s_c bch2_btree_and_journal_iter_next(struct btree_and_journal_iter *);
-+
-+void bch2_btree_and_journal_iter_init(struct btree_and_journal_iter *,
-+				      struct btree_trans *,
-+				      struct journal_keys *,
-+				      enum btree_id, struct bpos);
-+void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
-+						struct journal_keys *,
-+						struct btree *);
-+
-+typedef int (*btree_walk_node_fn)(struct bch_fs *c, struct btree *b);
-+typedef int (*btree_walk_key_fn)(struct bch_fs *c, enum btree_id id,
-+				 unsigned level, struct bkey_s_c k);
-+
-+int bch2_btree_and_journal_walk(struct bch_fs *, struct journal_keys *, enum btree_id,
-+				btree_walk_node_fn, btree_walk_key_fn);
-+
-+void bch2_journal_keys_free(struct journal_keys *);
-+void bch2_journal_entries_free(struct list_head *);
-+
-+int bch2_fs_recovery(struct bch_fs *);
-+int bch2_fs_initialize(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_RECOVERY_H */
-diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c
-new file mode 100644
-index 000000000000..3c473f1380a6
---- /dev/null
-+++ b/fs/bcachefs/reflink.c
-@@ -0,0 +1,303 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "bkey_on_stack.h"
-+#include "btree_update.h"
-+#include "extents.h"
-+#include "inode.h"
-+#include "io.h"
-+#include "reflink.h"
-+
-+#include <linux/sched/signal.h>
-+
-+/* reflink pointers */
-+
-+const char *bch2_reflink_p_invalid(const struct bch_fs *c, struct bkey_s_c k)
-+{
-+	struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
-+
-+	if (bkey_val_bytes(p.k) != sizeof(*p.v))
-+		return "incorrect value size";
-+
-+	return NULL;
-+}
-+
-+void bch2_reflink_p_to_text(struct printbuf *out, struct bch_fs *c,
-+			    struct bkey_s_c k)
-+{
-+	struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
-+
-+	pr_buf(out, "idx %llu", le64_to_cpu(p.v->idx));
-+}
-+
-+enum merge_result bch2_reflink_p_merge(struct bch_fs *c,
-+				       struct bkey_s _l, struct bkey_s _r)
-+{
-+	struct bkey_s_reflink_p l = bkey_s_to_reflink_p(_l);
-+	struct bkey_s_reflink_p r = bkey_s_to_reflink_p(_r);
-+
-+	if (le64_to_cpu(l.v->idx) + l.k->size != le64_to_cpu(r.v->idx))
-+		return BCH_MERGE_NOMERGE;
-+
-+	if ((u64) l.k->size + r.k->size > KEY_SIZE_MAX) {
-+		bch2_key_resize(l.k, KEY_SIZE_MAX);
-+		bch2_cut_front_s(l.k->p, _r);
-+		return BCH_MERGE_PARTIAL;
-+	}
-+
-+	bch2_key_resize(l.k, l.k->size + r.k->size);
-+
-+	return BCH_MERGE_MERGE;
-+}
-+
-+/* indirect extents */
-+
-+const char *bch2_reflink_v_invalid(const struct bch_fs *c, struct bkey_s_c k)
-+{
-+	struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(k);
-+
-+	if (bkey_val_bytes(r.k) < sizeof(*r.v))
-+		return "incorrect value size";
-+
-+	return bch2_bkey_ptrs_invalid(c, k);
-+}
-+
-+void bch2_reflink_v_to_text(struct printbuf *out, struct bch_fs *c,
-+			    struct bkey_s_c k)
-+{
-+	struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(k);
-+
-+	pr_buf(out, "refcount: %llu ", le64_to_cpu(r.v->refcount));
-+
-+	bch2_bkey_ptrs_to_text(out, c, k);
-+}
-+
-+static int bch2_make_extent_indirect(struct btree_trans *trans,
-+				     struct btree_iter *extent_iter,
-+				     struct bkey_i_extent *e)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct btree_iter *reflink_iter;
-+	struct bkey_s_c k;
-+	struct bkey_i_reflink_v *r_v;
-+	struct bkey_i_reflink_p *r_p;
-+	int ret;
-+
-+	for_each_btree_key(trans, reflink_iter, BTREE_ID_REFLINK,
-+			   POS(0, c->reflink_hint),
-+			   BTREE_ITER_INTENT|BTREE_ITER_SLOTS, k, ret) {
-+		if (reflink_iter->pos.inode) {
-+			bch2_btree_iter_set_pos(reflink_iter, POS_MIN);
-+			continue;
-+		}
-+
-+		if (bkey_deleted(k.k) && e->k.size <= k.k->size)
-+			break;
-+	}
-+
-+	if (ret)
-+		goto err;
-+
-+	/* rewind iter to start of hole, if necessary: */
-+	bch2_btree_iter_set_pos(reflink_iter, bkey_start_pos(k.k));
-+
-+	r_v = bch2_trans_kmalloc(trans, sizeof(*r_v) + bkey_val_bytes(&e->k));
-+	ret = PTR_ERR_OR_ZERO(r_v);
-+	if (ret)
-+		goto err;
-+
-+	bkey_reflink_v_init(&r_v->k_i);
-+	r_v->k.p	= reflink_iter->pos;
-+	bch2_key_resize(&r_v->k, e->k.size);
-+	r_v->k.version	= e->k.version;
-+
-+	set_bkey_val_u64s(&r_v->k, bkey_val_u64s(&r_v->k) +
-+			  bkey_val_u64s(&e->k));
-+	r_v->v.refcount	= 0;
-+	memcpy(r_v->v.start, e->v.start, bkey_val_bytes(&e->k));
-+
-+	bch2_trans_update(trans, reflink_iter, &r_v->k_i, 0);
-+
-+	r_p = bch2_trans_kmalloc(trans, sizeof(*r_p));
-+	if (IS_ERR(r_p))
-+		return PTR_ERR(r_p);
-+
-+	e->k.type = KEY_TYPE_reflink_p;
-+	r_p = bkey_i_to_reflink_p(&e->k_i);
-+	set_bkey_val_bytes(&r_p->k, sizeof(r_p->v));
-+	r_p->v.idx = cpu_to_le64(bkey_start_offset(&r_v->k));
-+
-+	bch2_trans_update(trans, extent_iter, &r_p->k_i, 0);
-+err:
-+	if (!IS_ERR(reflink_iter))
-+		c->reflink_hint = reflink_iter->pos.offset;
-+	bch2_trans_iter_put(trans, reflink_iter);
-+
-+	return ret;
-+}
-+
-+static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end)
-+{
-+	struct bkey_s_c k = bch2_btree_iter_peek(iter);
-+	int ret;
-+
-+	for_each_btree_key_continue(iter, 0, k, ret) {
-+		if (bkey_cmp(iter->pos, end) >= 0)
-+			return bkey_s_c_null;
-+
-+		if (k.k->type == KEY_TYPE_extent ||
-+		    k.k->type == KEY_TYPE_reflink_p)
-+			break;
-+	}
-+
-+	return k;
-+}
-+
-+s64 bch2_remap_range(struct bch_fs *c,
-+		     struct bpos dst_start, struct bpos src_start,
-+		     u64 remap_sectors, u64 *journal_seq,
-+		     u64 new_i_size, s64 *i_sectors_delta)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *dst_iter, *src_iter;
-+	struct bkey_s_c src_k;
-+	BKEY_PADDED(k) new_dst;
-+	struct bkey_on_stack new_src;
-+	struct bpos dst_end = dst_start, src_end = src_start;
-+	struct bpos dst_want, src_want;
-+	u64 src_done, dst_done;
-+	int ret = 0, ret2 = 0;
-+
-+	if (!c->opts.reflink)
-+		return -EOPNOTSUPP;
-+
-+	if (!percpu_ref_tryget(&c->writes))
-+		return -EROFS;
-+
-+	bch2_check_set_feature(c, BCH_FEATURE_reflink);
-+
-+	dst_end.offset += remap_sectors;
-+	src_end.offset += remap_sectors;
-+
-+	bkey_on_stack_init(&new_src);
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 4096);
-+
-+	src_iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, src_start,
-+				       BTREE_ITER_INTENT);
-+	dst_iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, dst_start,
-+				       BTREE_ITER_INTENT);
-+
-+	while (1) {
-+		bch2_trans_begin(&trans);
-+
-+		trans.mem_top = 0;
-+
-+		if (fatal_signal_pending(current)) {
-+			ret = -EINTR;
-+			goto err;
-+		}
-+
-+		src_k = get_next_src(src_iter, src_end);
-+		ret = bkey_err(src_k);
-+		if (ret)
-+			goto btree_err;
-+
-+		src_done = bpos_min(src_iter->pos, src_end).offset -
-+			src_start.offset;
-+		dst_want = POS(dst_start.inode, dst_start.offset + src_done);
-+
-+		if (bkey_cmp(dst_iter->pos, dst_want) < 0) {
-+			ret = bch2_fpunch_at(&trans, dst_iter, dst_want,
-+					     journal_seq, i_sectors_delta);
-+			if (ret)
-+				goto btree_err;
-+			continue;
-+		}
-+
-+		BUG_ON(bkey_cmp(dst_iter->pos, dst_want));
-+
-+		if (!bkey_cmp(dst_iter->pos, dst_end))
-+			break;
-+
-+		if (src_k.k->type == KEY_TYPE_extent) {
-+			bkey_on_stack_reassemble(&new_src, c, src_k);
-+			src_k = bkey_i_to_s_c(new_src.k);
-+
-+			bch2_cut_front(src_iter->pos,	new_src.k);
-+			bch2_cut_back(src_end,		new_src.k);
-+
-+			ret = bch2_make_extent_indirect(&trans, src_iter,
-+						bkey_i_to_extent(new_src.k));
-+			if (ret)
-+				goto btree_err;
-+
-+			BUG_ON(src_k.k->type != KEY_TYPE_reflink_p);
-+		}
-+
-+		if (src_k.k->type == KEY_TYPE_reflink_p) {
-+			struct bkey_s_c_reflink_p src_p =
-+				bkey_s_c_to_reflink_p(src_k);
-+			struct bkey_i_reflink_p *dst_p =
-+				bkey_reflink_p_init(&new_dst.k);
-+
-+			u64 offset = le64_to_cpu(src_p.v->idx) +
-+				(src_iter->pos.offset -
-+				 bkey_start_offset(src_k.k));
-+
-+			dst_p->v.idx = cpu_to_le64(offset);
-+		} else {
-+			BUG();
-+		}
-+
-+		new_dst.k.k.p = dst_iter->pos;
-+		bch2_key_resize(&new_dst.k.k,
-+				min(src_k.k->p.offset - src_iter->pos.offset,
-+				    dst_end.offset - dst_iter->pos.offset));
-+
-+		ret = bch2_extent_update(&trans, dst_iter, &new_dst.k,
-+					 NULL, journal_seq,
-+					 new_i_size, i_sectors_delta);
-+		if (ret)
-+			goto btree_err;
-+
-+		dst_done = dst_iter->pos.offset - dst_start.offset;
-+		src_want = POS(src_start.inode, src_start.offset + dst_done);
-+		bch2_btree_iter_set_pos(src_iter, src_want);
-+btree_err:
-+		if (ret == -EINTR)
-+			ret = 0;
-+		if (ret)
-+			goto err;
-+	}
-+
-+	BUG_ON(bkey_cmp(dst_iter->pos, dst_end));
-+err:
-+	BUG_ON(bkey_cmp(dst_iter->pos, dst_end) > 0);
-+
-+	dst_done = dst_iter->pos.offset - dst_start.offset;
-+	new_i_size = min(dst_iter->pos.offset << 9, new_i_size);
-+
-+	bch2_trans_begin(&trans);
-+
-+	do {
-+		struct bch_inode_unpacked inode_u;
-+		struct btree_iter *inode_iter;
-+
-+		inode_iter = bch2_inode_peek(&trans, &inode_u,
-+				dst_start.inode, BTREE_ITER_INTENT);
-+		ret2 = PTR_ERR_OR_ZERO(inode_iter);
-+
-+		if (!ret2 &&
-+		    inode_u.bi_size < new_i_size) {
-+			inode_u.bi_size = new_i_size;
-+			ret2  = bch2_inode_write(&trans, inode_iter, &inode_u) ?:
-+				bch2_trans_commit(&trans, NULL, journal_seq, 0);
-+		}
-+	} while (ret2 == -EINTR);
-+
-+	ret = bch2_trans_exit(&trans) ?: ret;
-+	bkey_on_stack_exit(&new_src, c);
-+
-+	percpu_ref_put(&c->writes);
-+
-+	return dst_done ?: ret ?: ret2;
-+}
-diff --git a/fs/bcachefs/reflink.h b/fs/bcachefs/reflink.h
-new file mode 100644
-index 000000000000..5445c1cf0797
---- /dev/null
-+++ b/fs/bcachefs/reflink.h
-@@ -0,0 +1,31 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_REFLINK_H
-+#define _BCACHEFS_REFLINK_H
-+
-+const char *bch2_reflink_p_invalid(const struct bch_fs *, struct bkey_s_c);
-+void bch2_reflink_p_to_text(struct printbuf *, struct bch_fs *,
-+			    struct bkey_s_c);
-+enum merge_result bch2_reflink_p_merge(struct bch_fs *,
-+				       struct bkey_s, struct bkey_s);
-+
-+#define bch2_bkey_ops_reflink_p (struct bkey_ops) {		\
-+	.key_invalid	= bch2_reflink_p_invalid,		\
-+	.val_to_text	= bch2_reflink_p_to_text,		\
-+	.key_merge	= bch2_reflink_p_merge,		\
-+}
-+
-+const char *bch2_reflink_v_invalid(const struct bch_fs *, struct bkey_s_c);
-+void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *,
-+			    struct bkey_s_c);
-+
-+
-+#define bch2_bkey_ops_reflink_v (struct bkey_ops) {		\
-+	.key_invalid	= bch2_reflink_v_invalid,		\
-+	.val_to_text	= bch2_reflink_v_to_text,		\
-+	.swab		= bch2_ptr_swab,			\
-+}
-+
-+s64 bch2_remap_range(struct bch_fs *, struct bpos, struct bpos,
-+		     u64, u64 *, u64, s64 *);
-+
-+#endif /* _BCACHEFS_REFLINK_H */
-diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c
-new file mode 100644
-index 000000000000..67a7128fd9af
---- /dev/null
-+++ b/fs/bcachefs/replicas.c
-@@ -0,0 +1,1084 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "buckets.h"
-+#include "journal.h"
-+#include "replicas.h"
-+#include "super-io.h"
-+
-+static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *,
-+					    struct bch_replicas_cpu *);
-+
-+/* Replicas tracking - in memory: */
-+
-+static inline int u8_cmp(u8 l, u8 r)
-+{
-+	return cmp_int(l, r);
-+}
-+
-+static void verify_replicas_entry(struct bch_replicas_entry *e)
-+{
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	unsigned i;
-+
-+	BUG_ON(e->data_type >= BCH_DATA_NR);
-+	BUG_ON(!e->nr_devs);
-+	BUG_ON(e->nr_required > 1 &&
-+	       e->nr_required >= e->nr_devs);
-+
-+	for (i = 0; i + 1 < e->nr_devs; i++)
-+		BUG_ON(e->devs[i] >= e->devs[i + 1]);
-+#endif
-+}
-+
-+static void replicas_entry_sort(struct bch_replicas_entry *e)
-+{
-+	bubble_sort(e->devs, e->nr_devs, u8_cmp);
-+}
-+
-+static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r)
-+{
-+	eytzinger0_sort(r->entries, r->nr, r->entry_size, memcmp, NULL);
-+}
-+
-+void bch2_replicas_entry_to_text(struct printbuf *out,
-+				 struct bch_replicas_entry *e)
-+{
-+	unsigned i;
-+
-+	pr_buf(out, "%s: %u/%u [",
-+	       bch2_data_types[e->data_type],
-+	       e->nr_required,
-+	       e->nr_devs);
-+
-+	for (i = 0; i < e->nr_devs; i++)
-+		pr_buf(out, i ? " %u" : "%u", e->devs[i]);
-+	pr_buf(out, "]");
-+}
-+
-+void bch2_cpu_replicas_to_text(struct printbuf *out,
-+			      struct bch_replicas_cpu *r)
-+{
-+	struct bch_replicas_entry *e;
-+	bool first = true;
-+
-+	for_each_cpu_replicas_entry(r, e) {
-+		if (!first)
-+			pr_buf(out, " ");
-+		first = false;
-+
-+		bch2_replicas_entry_to_text(out, e);
-+	}
-+}
-+
-+static void extent_to_replicas(struct bkey_s_c k,
-+			       struct bch_replicas_entry *r)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const union bch_extent_entry *entry;
-+	struct extent_ptr_decoded p;
-+
-+	r->nr_required	= 1;
-+
-+	bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
-+		if (p.ptr.cached)
-+			continue;
-+
-+		if (!p.has_ec)
-+			r->devs[r->nr_devs++] = p.ptr.dev;
-+		else
-+			r->nr_required = 0;
-+	}
-+}
-+
-+static void stripe_to_replicas(struct bkey_s_c k,
-+			       struct bch_replicas_entry *r)
-+{
-+	struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
-+	const struct bch_extent_ptr *ptr;
-+
-+	r->nr_required	= s.v->nr_blocks - s.v->nr_redundant;
-+
-+	for (ptr = s.v->ptrs;
-+	     ptr < s.v->ptrs + s.v->nr_blocks;
-+	     ptr++)
-+		r->devs[r->nr_devs++] = ptr->dev;
-+}
-+
-+void bch2_bkey_to_replicas(struct bch_replicas_entry *e,
-+			   struct bkey_s_c k)
-+{
-+	e->nr_devs = 0;
-+
-+	switch (k.k->type) {
-+	case KEY_TYPE_btree_ptr:
-+	case KEY_TYPE_btree_ptr_v2:
-+		e->data_type = BCH_DATA_BTREE;
-+		extent_to_replicas(k, e);
-+		break;
-+	case KEY_TYPE_extent:
-+	case KEY_TYPE_reflink_v:
-+		e->data_type = BCH_DATA_USER;
-+		extent_to_replicas(k, e);
-+		break;
-+	case KEY_TYPE_stripe:
-+		e->data_type = BCH_DATA_USER;
-+		stripe_to_replicas(k, e);
-+		break;
-+	}
-+
-+	replicas_entry_sort(e);
-+}
-+
-+void bch2_devlist_to_replicas(struct bch_replicas_entry *e,
-+			      enum bch_data_type data_type,
-+			      struct bch_devs_list devs)
-+{
-+	unsigned i;
-+
-+	BUG_ON(!data_type ||
-+	       data_type == BCH_DATA_SB ||
-+	       data_type >= BCH_DATA_NR);
-+
-+	e->data_type	= data_type;
-+	e->nr_devs	= 0;
-+	e->nr_required	= 1;
-+
-+	for (i = 0; i < devs.nr; i++)
-+		e->devs[e->nr_devs++] = devs.devs[i];
-+
-+	replicas_entry_sort(e);
-+}
-+
-+static struct bch_replicas_cpu
-+cpu_replicas_add_entry(struct bch_replicas_cpu *old,
-+		       struct bch_replicas_entry *new_entry)
-+{
-+	unsigned i;
-+	struct bch_replicas_cpu new = {
-+		.nr		= old->nr + 1,
-+		.entry_size	= max_t(unsigned, old->entry_size,
-+					replicas_entry_bytes(new_entry)),
-+	};
-+
-+	BUG_ON(!new_entry->data_type);
-+	verify_replicas_entry(new_entry);
-+
-+	new.entries = kcalloc(new.nr, new.entry_size, GFP_NOIO);
-+	if (!new.entries)
-+		return new;
-+
-+	for (i = 0; i < old->nr; i++)
-+		memcpy(cpu_replicas_entry(&new, i),
-+		       cpu_replicas_entry(old, i),
-+		       old->entry_size);
-+
-+	memcpy(cpu_replicas_entry(&new, old->nr),
-+	       new_entry,
-+	       replicas_entry_bytes(new_entry));
-+
-+	bch2_cpu_replicas_sort(&new);
-+	return new;
-+}
-+
-+static inline int __replicas_entry_idx(struct bch_replicas_cpu *r,
-+				       struct bch_replicas_entry *search)
-+{
-+	int idx, entry_size = replicas_entry_bytes(search);
-+
-+	if (unlikely(entry_size > r->entry_size))
-+		return -1;
-+
-+	verify_replicas_entry(search);
-+
-+#define entry_cmp(_l, _r, size)	memcmp(_l, _r, entry_size)
-+	idx = eytzinger0_find(r->entries, r->nr, r->entry_size,
-+			      entry_cmp, search);
-+#undef entry_cmp
-+
-+	return idx < r->nr ? idx : -1;
-+}
-+
-+int bch2_replicas_entry_idx(struct bch_fs *c,
-+			    struct bch_replicas_entry *search)
-+{
-+	replicas_entry_sort(search);
-+
-+	return __replicas_entry_idx(&c->replicas, search);
-+}
-+
-+static bool __replicas_has_entry(struct bch_replicas_cpu *r,
-+				 struct bch_replicas_entry *search)
-+{
-+	return __replicas_entry_idx(r, search) >= 0;
-+}
-+
-+static bool bch2_replicas_marked_locked(struct bch_fs *c,
-+			  struct bch_replicas_entry *search,
-+			  bool check_gc_replicas)
-+{
-+	if (!search->nr_devs)
-+		return true;
-+
-+	verify_replicas_entry(search);
-+
-+	return __replicas_has_entry(&c->replicas, search) &&
-+		(!check_gc_replicas ||
-+		 likely((!c->replicas_gc.entries)) ||
-+		 __replicas_has_entry(&c->replicas_gc, search));
-+}
-+
-+bool bch2_replicas_marked(struct bch_fs *c,
-+			  struct bch_replicas_entry *search,
-+			  bool check_gc_replicas)
-+{
-+	bool marked;
-+
-+	percpu_down_read(&c->mark_lock);
-+	marked = bch2_replicas_marked_locked(c, search, check_gc_replicas);
-+	percpu_up_read(&c->mark_lock);
-+
-+	return marked;
-+}
-+
-+static void __replicas_table_update(struct bch_fs_usage *dst,
-+				    struct bch_replicas_cpu *dst_r,
-+				    struct bch_fs_usage *src,
-+				    struct bch_replicas_cpu *src_r)
-+{
-+	int src_idx, dst_idx;
-+
-+	*dst = *src;
-+
-+	for (src_idx = 0; src_idx < src_r->nr; src_idx++) {
-+		if (!src->replicas[src_idx])
-+			continue;
-+
-+		dst_idx = __replicas_entry_idx(dst_r,
-+				cpu_replicas_entry(src_r, src_idx));
-+		BUG_ON(dst_idx < 0);
-+
-+		dst->replicas[dst_idx] = src->replicas[src_idx];
-+	}
-+}
-+
-+static void __replicas_table_update_pcpu(struct bch_fs_usage __percpu *dst_p,
-+				    struct bch_replicas_cpu *dst_r,
-+				    struct bch_fs_usage __percpu *src_p,
-+				    struct bch_replicas_cpu *src_r)
-+{
-+	unsigned src_nr = sizeof(struct bch_fs_usage) / sizeof(u64) + src_r->nr;
-+	struct bch_fs_usage *dst, *src = (void *)
-+		bch2_acc_percpu_u64s((void *) src_p, src_nr);
-+
-+	preempt_disable();
-+	dst = this_cpu_ptr(dst_p);
-+	preempt_enable();
-+
-+	__replicas_table_update(dst, dst_r, src, src_r);
-+}
-+
-+/*
-+ * Resize filesystem accounting:
-+ */
-+static int replicas_table_update(struct bch_fs *c,
-+				 struct bch_replicas_cpu *new_r)
-+{
-+	struct bch_fs_usage __percpu *new_usage[2] = { NULL, NULL };
-+	struct bch_fs_usage *new_scratch = NULL;
-+	struct bch_fs_usage __percpu *new_gc = NULL;
-+	struct bch_fs_usage *new_base = NULL;
-+	unsigned bytes = sizeof(struct bch_fs_usage) +
-+		sizeof(u64) * new_r->nr;
-+	int ret = -ENOMEM;
-+
-+	if (!(new_base = kzalloc(bytes, GFP_NOIO)) ||
-+	    !(new_usage[0] = __alloc_percpu_gfp(bytes, sizeof(u64),
-+						GFP_NOIO)) ||
-+	    !(new_usage[1] = __alloc_percpu_gfp(bytes, sizeof(u64),
-+						GFP_NOIO)) ||
-+	    !(new_scratch  = kmalloc(bytes, GFP_NOIO)) ||
-+	    (c->usage_gc &&
-+	     !(new_gc = __alloc_percpu_gfp(bytes, sizeof(u64), GFP_NOIO)))) {
-+		bch_err(c, "error updating replicas table: memory allocation failure");
-+		goto err;
-+	}
-+
-+	if (c->usage_base)
-+		__replicas_table_update(new_base,		new_r,
-+					c->usage_base,		&c->replicas);
-+	if (c->usage[0])
-+		__replicas_table_update_pcpu(new_usage[0],	new_r,
-+					     c->usage[0],	&c->replicas);
-+	if (c->usage[1])
-+		__replicas_table_update_pcpu(new_usage[1],	new_r,
-+					     c->usage[1],	&c->replicas);
-+	if (c->usage_gc)
-+		__replicas_table_update_pcpu(new_gc,		new_r,
-+					     c->usage_gc,	&c->replicas);
-+
-+	swap(c->usage_base,	new_base);
-+	swap(c->usage[0],	new_usage[0]);
-+	swap(c->usage[1],	new_usage[1]);
-+	swap(c->usage_scratch,	new_scratch);
-+	swap(c->usage_gc,	new_gc);
-+	swap(c->replicas,	*new_r);
-+	ret = 0;
-+err:
-+	free_percpu(new_gc);
-+	kfree(new_scratch);
-+	free_percpu(new_usage[1]);
-+	free_percpu(new_usage[0]);
-+	kfree(new_base);
-+	return ret;
-+}
-+
-+static unsigned reserve_journal_replicas(struct bch_fs *c,
-+				     struct bch_replicas_cpu *r)
-+{
-+	struct bch_replicas_entry *e;
-+	unsigned journal_res_u64s = 0;
-+
-+	/* nr_inodes: */
-+	journal_res_u64s +=
-+		DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64));
-+
-+	/* key_version: */
-+	journal_res_u64s +=
-+		DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64));
-+
-+	/* persistent_reserved: */
-+	journal_res_u64s +=
-+		DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64)) *
-+		BCH_REPLICAS_MAX;
-+
-+	for_each_cpu_replicas_entry(r, e)
-+		journal_res_u64s +=
-+			DIV_ROUND_UP(sizeof(struct jset_entry_data_usage) +
-+				     e->nr_devs, sizeof(u64));
-+	return journal_res_u64s;
-+}
-+
-+noinline
-+static int bch2_mark_replicas_slowpath(struct bch_fs *c,
-+				struct bch_replicas_entry *new_entry)
-+{
-+	struct bch_replicas_cpu new_r, new_gc;
-+	int ret = 0;
-+
-+	verify_replicas_entry(new_entry);
-+
-+	memset(&new_r, 0, sizeof(new_r));
-+	memset(&new_gc, 0, sizeof(new_gc));
-+
-+	mutex_lock(&c->sb_lock);
-+
-+	if (c->replicas_gc.entries &&
-+	    !__replicas_has_entry(&c->replicas_gc, new_entry)) {
-+		new_gc = cpu_replicas_add_entry(&c->replicas_gc, new_entry);
-+		if (!new_gc.entries)
-+			goto err;
-+	}
-+
-+	if (!__replicas_has_entry(&c->replicas, new_entry)) {
-+		new_r = cpu_replicas_add_entry(&c->replicas, new_entry);
-+		if (!new_r.entries)
-+			goto err;
-+
-+		ret = bch2_cpu_replicas_to_sb_replicas(c, &new_r);
-+		if (ret)
-+			goto err;
-+
-+		bch2_journal_entry_res_resize(&c->journal,
-+				&c->replicas_journal_res,
-+				reserve_journal_replicas(c, &new_r));
-+	}
-+
-+	if (!new_r.entries &&
-+	    !new_gc.entries)
-+		goto out;
-+
-+	/* allocations done, now commit: */
-+
-+	if (new_r.entries)
-+		bch2_write_super(c);
-+
-+	/* don't update in memory replicas until changes are persistent */
-+	percpu_down_write(&c->mark_lock);
-+	if (new_r.entries)
-+		ret = replicas_table_update(c, &new_r);
-+	if (new_gc.entries)
-+		swap(new_gc, c->replicas_gc);
-+	percpu_up_write(&c->mark_lock);
-+out:
-+	mutex_unlock(&c->sb_lock);
-+
-+	kfree(new_r.entries);
-+	kfree(new_gc.entries);
-+
-+	return ret;
-+err:
-+	bch_err(c, "error adding replicas entry: memory allocation failure");
-+	ret = -ENOMEM;
-+	goto out;
-+}
-+
-+int bch2_mark_replicas(struct bch_fs *c,
-+		       struct bch_replicas_entry *r)
-+{
-+	return likely(bch2_replicas_marked(c, r, true))
-+		? 0
-+		: bch2_mark_replicas_slowpath(c, r);
-+}
-+
-+bool bch2_bkey_replicas_marked_locked(struct bch_fs *c,
-+				      struct bkey_s_c k,
-+				      bool check_gc_replicas)
-+{
-+	struct bch_replicas_padded search;
-+	struct bch_devs_list cached = bch2_bkey_cached_devs(k);
-+	unsigned i;
-+
-+	for (i = 0; i < cached.nr; i++) {
-+		bch2_replicas_entry_cached(&search.e, cached.devs[i]);
-+
-+		if (!bch2_replicas_marked_locked(c, &search.e,
-+						 check_gc_replicas))
-+			return false;
-+	}
-+
-+	bch2_bkey_to_replicas(&search.e, k);
-+
-+	return bch2_replicas_marked_locked(c, &search.e, check_gc_replicas);
-+}
-+
-+bool bch2_bkey_replicas_marked(struct bch_fs *c,
-+			       struct bkey_s_c k,
-+			       bool check_gc_replicas)
-+{
-+	bool marked;
-+
-+	percpu_down_read(&c->mark_lock);
-+	marked = bch2_bkey_replicas_marked_locked(c, k, check_gc_replicas);
-+	percpu_up_read(&c->mark_lock);
-+
-+	return marked;
-+}
-+
-+int bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k)
-+{
-+	struct bch_replicas_padded search;
-+	struct bch_devs_list cached = bch2_bkey_cached_devs(k);
-+	unsigned i;
-+	int ret;
-+
-+	for (i = 0; i < cached.nr; i++) {
-+		bch2_replicas_entry_cached(&search.e, cached.devs[i]);
-+
-+		ret = bch2_mark_replicas(c, &search.e);
-+		if (ret)
-+			return ret;
-+	}
-+
-+	bch2_bkey_to_replicas(&search.e, k);
-+
-+	return bch2_mark_replicas(c, &search.e);
-+}
-+
-+int bch2_replicas_gc_end(struct bch_fs *c, int ret)
-+{
-+	unsigned i;
-+
-+	lockdep_assert_held(&c->replicas_gc_lock);
-+
-+	mutex_lock(&c->sb_lock);
-+	percpu_down_write(&c->mark_lock);
-+
-+	/*
-+	 * this is kind of crappy; the replicas gc mechanism needs to be ripped
-+	 * out
-+	 */
-+
-+	for (i = 0; i < c->replicas.nr; i++) {
-+		struct bch_replicas_entry *e =
-+			cpu_replicas_entry(&c->replicas, i);
-+		struct bch_replicas_cpu n;
-+
-+		if (!__replicas_has_entry(&c->replicas_gc, e) &&
-+		    (c->usage_base->replicas[i] ||
-+		     percpu_u64_get(&c->usage[0]->replicas[i]) ||
-+		     percpu_u64_get(&c->usage[1]->replicas[i]))) {
-+			n = cpu_replicas_add_entry(&c->replicas_gc, e);
-+			if (!n.entries) {
-+				ret = -ENOSPC;
-+				goto err;
-+			}
-+
-+			swap(n, c->replicas_gc);
-+			kfree(n.entries);
-+		}
-+	}
-+
-+	if (bch2_cpu_replicas_to_sb_replicas(c, &c->replicas_gc)) {
-+		ret = -ENOSPC;
-+		goto err;
-+	}
-+
-+	ret = replicas_table_update(c, &c->replicas_gc);
-+err:
-+	kfree(c->replicas_gc.entries);
-+	c->replicas_gc.entries = NULL;
-+
-+	percpu_up_write(&c->mark_lock);
-+
-+	if (!ret)
-+		bch2_write_super(c);
-+
-+	mutex_unlock(&c->sb_lock);
-+
-+	return ret;
-+}
-+
-+int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask)
-+{
-+	struct bch_replicas_entry *e;
-+	unsigned i = 0;
-+
-+	lockdep_assert_held(&c->replicas_gc_lock);
-+
-+	mutex_lock(&c->sb_lock);
-+	BUG_ON(c->replicas_gc.entries);
-+
-+	c->replicas_gc.nr		= 0;
-+	c->replicas_gc.entry_size	= 0;
-+
-+	for_each_cpu_replicas_entry(&c->replicas, e)
-+		if (!((1 << e->data_type) & typemask)) {
-+			c->replicas_gc.nr++;
-+			c->replicas_gc.entry_size =
-+				max_t(unsigned, c->replicas_gc.entry_size,
-+				      replicas_entry_bytes(e));
-+		}
-+
-+	c->replicas_gc.entries = kcalloc(c->replicas_gc.nr,
-+					 c->replicas_gc.entry_size,
-+					 GFP_NOIO);
-+	if (!c->replicas_gc.entries) {
-+		mutex_unlock(&c->sb_lock);
-+		bch_err(c, "error allocating c->replicas_gc");
-+		return -ENOMEM;
-+	}
-+
-+	for_each_cpu_replicas_entry(&c->replicas, e)
-+		if (!((1 << e->data_type) & typemask))
-+			memcpy(cpu_replicas_entry(&c->replicas_gc, i++),
-+			       e, c->replicas_gc.entry_size);
-+
-+	bch2_cpu_replicas_sort(&c->replicas_gc);
-+	mutex_unlock(&c->sb_lock);
-+
-+	return 0;
-+}
-+
-+int bch2_replicas_gc2(struct bch_fs *c)
-+{
-+	struct bch_replicas_cpu new = { 0 };
-+	unsigned i, nr;
-+	int ret = 0;
-+
-+	bch2_journal_meta(&c->journal);
-+retry:
-+	nr		= READ_ONCE(c->replicas.nr);
-+	new.entry_size	= READ_ONCE(c->replicas.entry_size);
-+	new.entries	= kcalloc(nr, new.entry_size, GFP_KERNEL);
-+	if (!new.entries) {
-+		bch_err(c, "error allocating c->replicas_gc");
-+		return -ENOMEM;
-+	}
-+
-+	mutex_lock(&c->sb_lock);
-+	percpu_down_write(&c->mark_lock);
-+
-+	if (nr			!= c->replicas.nr ||
-+	    new.entry_size	!= c->replicas.entry_size) {
-+		percpu_up_write(&c->mark_lock);
-+		mutex_unlock(&c->sb_lock);
-+		kfree(new.entries);
-+		goto retry;
-+	}
-+
-+	for (i = 0; i < c->replicas.nr; i++) {
-+		struct bch_replicas_entry *e =
-+			cpu_replicas_entry(&c->replicas, i);
-+
-+		if (e->data_type == BCH_DATA_JOURNAL ||
-+		    c->usage_base->replicas[i] ||
-+		    percpu_u64_get(&c->usage[0]->replicas[i]) ||
-+		    percpu_u64_get(&c->usage[1]->replicas[i]))
-+			memcpy(cpu_replicas_entry(&new, new.nr++),
-+			       e, new.entry_size);
-+	}
-+
-+	bch2_cpu_replicas_sort(&new);
-+
-+	if (bch2_cpu_replicas_to_sb_replicas(c, &new)) {
-+		ret = -ENOSPC;
-+		goto err;
-+	}
-+
-+	ret = replicas_table_update(c, &new);
-+err:
-+	kfree(new.entries);
-+
-+	percpu_up_write(&c->mark_lock);
-+
-+	if (!ret)
-+		bch2_write_super(c);
-+
-+	mutex_unlock(&c->sb_lock);
-+
-+	return ret;
-+}
-+
-+int bch2_replicas_set_usage(struct bch_fs *c,
-+			    struct bch_replicas_entry *r,
-+			    u64 sectors)
-+{
-+	int ret, idx = bch2_replicas_entry_idx(c, r);
-+
-+	if (idx < 0) {
-+		struct bch_replicas_cpu n;
-+
-+		n = cpu_replicas_add_entry(&c->replicas, r);
-+		if (!n.entries)
-+			return -ENOMEM;
-+
-+		ret = replicas_table_update(c, &n);
-+		if (ret)
-+			return ret;
-+
-+		kfree(n.entries);
-+
-+		idx = bch2_replicas_entry_idx(c, r);
-+		BUG_ON(ret < 0);
-+	}
-+
-+	c->usage_base->replicas[idx] = sectors;
-+
-+	return 0;
-+}
-+
-+/* Replicas tracking - superblock: */
-+
-+static int
-+__bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r,
-+				   struct bch_replicas_cpu *cpu_r)
-+{
-+	struct bch_replicas_entry *e, *dst;
-+	unsigned nr = 0, entry_size = 0, idx = 0;
-+
-+	for_each_replicas_entry(sb_r, e) {
-+		entry_size = max_t(unsigned, entry_size,
-+				   replicas_entry_bytes(e));
-+		nr++;
-+	}
-+
-+	cpu_r->entries = kcalloc(nr, entry_size, GFP_NOIO);
-+	if (!cpu_r->entries)
-+		return -ENOMEM;
-+
-+	cpu_r->nr		= nr;
-+	cpu_r->entry_size	= entry_size;
-+
-+	for_each_replicas_entry(sb_r, e) {
-+		dst = cpu_replicas_entry(cpu_r, idx++);
-+		memcpy(dst, e, replicas_entry_bytes(e));
-+		replicas_entry_sort(dst);
-+	}
-+
-+	return 0;
-+}
-+
-+static int
-+__bch2_sb_replicas_v0_to_cpu_replicas(struct bch_sb_field_replicas_v0 *sb_r,
-+				      struct bch_replicas_cpu *cpu_r)
-+{
-+	struct bch_replicas_entry_v0 *e;
-+	unsigned nr = 0, entry_size = 0, idx = 0;
-+
-+	for_each_replicas_entry(sb_r, e) {
-+		entry_size = max_t(unsigned, entry_size,
-+				   replicas_entry_bytes(e));
-+		nr++;
-+	}
-+
-+	entry_size += sizeof(struct bch_replicas_entry) -
-+		sizeof(struct bch_replicas_entry_v0);
-+
-+	cpu_r->entries = kcalloc(nr, entry_size, GFP_NOIO);
-+	if (!cpu_r->entries)
-+		return -ENOMEM;
-+
-+	cpu_r->nr		= nr;
-+	cpu_r->entry_size	= entry_size;
-+
-+	for_each_replicas_entry(sb_r, e) {
-+		struct bch_replicas_entry *dst =
-+			cpu_replicas_entry(cpu_r, idx++);
-+
-+		dst->data_type	= e->data_type;
-+		dst->nr_devs	= e->nr_devs;
-+		dst->nr_required = 1;
-+		memcpy(dst->devs, e->devs, e->nr_devs);
-+		replicas_entry_sort(dst);
-+	}
-+
-+	return 0;
-+}
-+
-+int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c)
-+{
-+	struct bch_sb_field_replicas *sb_v1;
-+	struct bch_sb_field_replicas_v0 *sb_v0;
-+	struct bch_replicas_cpu new_r = { 0, 0, NULL };
-+	int ret = 0;
-+
-+	if ((sb_v1 = bch2_sb_get_replicas(c->disk_sb.sb)))
-+		ret = __bch2_sb_replicas_to_cpu_replicas(sb_v1, &new_r);
-+	else if ((sb_v0 = bch2_sb_get_replicas_v0(c->disk_sb.sb)))
-+		ret = __bch2_sb_replicas_v0_to_cpu_replicas(sb_v0, &new_r);
-+
-+	if (ret)
-+		return -ENOMEM;
-+
-+	bch2_cpu_replicas_sort(&new_r);
-+
-+	percpu_down_write(&c->mark_lock);
-+
-+	ret = replicas_table_update(c, &new_r);
-+	percpu_up_write(&c->mark_lock);
-+
-+	kfree(new_r.entries);
-+
-+	return 0;
-+}
-+
-+static int bch2_cpu_replicas_to_sb_replicas_v0(struct bch_fs *c,
-+					       struct bch_replicas_cpu *r)
-+{
-+	struct bch_sb_field_replicas_v0 *sb_r;
-+	struct bch_replicas_entry_v0 *dst;
-+	struct bch_replicas_entry *src;
-+	size_t bytes;
-+
-+	bytes = sizeof(struct bch_sb_field_replicas);
-+
-+	for_each_cpu_replicas_entry(r, src)
-+		bytes += replicas_entry_bytes(src) - 1;
-+
-+	sb_r = bch2_sb_resize_replicas_v0(&c->disk_sb,
-+			DIV_ROUND_UP(bytes, sizeof(u64)));
-+	if (!sb_r)
-+		return -ENOSPC;
-+
-+	bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas);
-+	sb_r = bch2_sb_get_replicas_v0(c->disk_sb.sb);
-+
-+	memset(&sb_r->entries, 0,
-+	       vstruct_end(&sb_r->field) -
-+	       (void *) &sb_r->entries);
-+
-+	dst = sb_r->entries;
-+	for_each_cpu_replicas_entry(r, src) {
-+		dst->data_type	= src->data_type;
-+		dst->nr_devs	= src->nr_devs;
-+		memcpy(dst->devs, src->devs, src->nr_devs);
-+
-+		dst = replicas_entry_next(dst);
-+
-+		BUG_ON((void *) dst > vstruct_end(&sb_r->field));
-+	}
-+
-+	return 0;
-+}
-+
-+static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c,
-+					    struct bch_replicas_cpu *r)
-+{
-+	struct bch_sb_field_replicas *sb_r;
-+	struct bch_replicas_entry *dst, *src;
-+	bool need_v1 = false;
-+	size_t bytes;
-+
-+	bytes = sizeof(struct bch_sb_field_replicas);
-+
-+	for_each_cpu_replicas_entry(r, src) {
-+		bytes += replicas_entry_bytes(src);
-+		if (src->nr_required != 1)
-+			need_v1 = true;
-+	}
-+
-+	if (!need_v1)
-+		return bch2_cpu_replicas_to_sb_replicas_v0(c, r);
-+
-+	sb_r = bch2_sb_resize_replicas(&c->disk_sb,
-+			DIV_ROUND_UP(bytes, sizeof(u64)));
-+	if (!sb_r)
-+		return -ENOSPC;
-+
-+	bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas_v0);
-+	sb_r = bch2_sb_get_replicas(c->disk_sb.sb);
-+
-+	memset(&sb_r->entries, 0,
-+	       vstruct_end(&sb_r->field) -
-+	       (void *) &sb_r->entries);
-+
-+	dst = sb_r->entries;
-+	for_each_cpu_replicas_entry(r, src) {
-+		memcpy(dst, src, replicas_entry_bytes(src));
-+
-+		dst = replicas_entry_next(dst);
-+
-+		BUG_ON((void *) dst > vstruct_end(&sb_r->field));
-+	}
-+
-+	return 0;
-+}
-+
-+static const char *check_dup_replicas_entries(struct bch_replicas_cpu *cpu_r)
-+{
-+	unsigned i;
-+
-+	sort_cmp_size(cpu_r->entries,
-+		      cpu_r->nr,
-+		      cpu_r->entry_size,
-+		      memcmp, NULL);
-+
-+	for (i = 0; i + 1 < cpu_r->nr; i++) {
-+		struct bch_replicas_entry *l =
-+			cpu_replicas_entry(cpu_r, i);
-+		struct bch_replicas_entry *r =
-+			cpu_replicas_entry(cpu_r, i + 1);
-+
-+		BUG_ON(memcmp(l, r, cpu_r->entry_size) > 0);
-+
-+		if (!memcmp(l, r, cpu_r->entry_size))
-+			return "duplicate replicas entry";
-+	}
-+
-+	return NULL;
-+}
-+
-+static const char *bch2_sb_validate_replicas(struct bch_sb *sb, struct bch_sb_field *f)
-+{
-+	struct bch_sb_field_replicas *sb_r = field_to_type(f, replicas);
-+	struct bch_sb_field_members *mi = bch2_sb_get_members(sb);
-+	struct bch_replicas_cpu cpu_r = { .entries = NULL };
-+	struct bch_replicas_entry *e;
-+	const char *err;
-+	unsigned i;
-+
-+	for_each_replicas_entry(sb_r, e) {
-+		err = "invalid replicas entry: invalid data type";
-+		if (e->data_type >= BCH_DATA_NR)
-+			goto err;
-+
-+		err = "invalid replicas entry: no devices";
-+		if (!e->nr_devs)
-+			goto err;
-+
-+		err = "invalid replicas entry: bad nr_required";
-+		if (e->nr_required > 1 &&
-+		    e->nr_required >= e->nr_devs)
-+			goto err;
-+
-+		err = "invalid replicas entry: invalid device";
-+		for (i = 0; i < e->nr_devs; i++)
-+			if (!bch2_dev_exists(sb, mi, e->devs[i]))
-+				goto err;
-+	}
-+
-+	err = "cannot allocate memory";
-+	if (__bch2_sb_replicas_to_cpu_replicas(sb_r, &cpu_r))
-+		goto err;
-+
-+	err = check_dup_replicas_entries(&cpu_r);
-+err:
-+	kfree(cpu_r.entries);
-+	return err;
-+}
-+
-+static void bch2_sb_replicas_to_text(struct printbuf *out,
-+				     struct bch_sb *sb,
-+				     struct bch_sb_field *f)
-+{
-+	struct bch_sb_field_replicas *r = field_to_type(f, replicas);
-+	struct bch_replicas_entry *e;
-+	bool first = true;
-+
-+	for_each_replicas_entry(r, e) {
-+		if (!first)
-+			pr_buf(out, " ");
-+		first = false;
-+
-+		bch2_replicas_entry_to_text(out, e);
-+	}
-+}
-+
-+const struct bch_sb_field_ops bch_sb_field_ops_replicas = {
-+	.validate	= bch2_sb_validate_replicas,
-+	.to_text	= bch2_sb_replicas_to_text,
-+};
-+
-+static const char *bch2_sb_validate_replicas_v0(struct bch_sb *sb, struct bch_sb_field *f)
-+{
-+	struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0);
-+	struct bch_sb_field_members *mi = bch2_sb_get_members(sb);
-+	struct bch_replicas_cpu cpu_r = { .entries = NULL };
-+	struct bch_replicas_entry_v0 *e;
-+	const char *err;
-+	unsigned i;
-+
-+	for_each_replicas_entry_v0(sb_r, e) {
-+		err = "invalid replicas entry: invalid data type";
-+		if (e->data_type >= BCH_DATA_NR)
-+			goto err;
-+
-+		err = "invalid replicas entry: no devices";
-+		if (!e->nr_devs)
-+			goto err;
-+
-+		err = "invalid replicas entry: invalid device";
-+		for (i = 0; i < e->nr_devs; i++)
-+			if (!bch2_dev_exists(sb, mi, e->devs[i]))
-+				goto err;
-+	}
-+
-+	err = "cannot allocate memory";
-+	if (__bch2_sb_replicas_v0_to_cpu_replicas(sb_r, &cpu_r))
-+		goto err;
-+
-+	err = check_dup_replicas_entries(&cpu_r);
-+err:
-+	kfree(cpu_r.entries);
-+	return err;
-+}
-+
-+const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = {
-+	.validate	= bch2_sb_validate_replicas_v0,
-+};
-+
-+/* Query replicas: */
-+
-+struct replicas_status __bch2_replicas_status(struct bch_fs *c,
-+					      struct bch_devs_mask online_devs)
-+{
-+	struct bch_sb_field_members *mi;
-+	struct bch_replicas_entry *e;
-+	unsigned i, nr_online, nr_offline;
-+	struct replicas_status ret;
-+
-+	memset(&ret, 0, sizeof(ret));
-+
-+	for (i = 0; i < ARRAY_SIZE(ret.replicas); i++)
-+		ret.replicas[i].redundancy = INT_MAX;
-+
-+	mi = bch2_sb_get_members(c->disk_sb.sb);
-+
-+	percpu_down_read(&c->mark_lock);
-+
-+	for_each_cpu_replicas_entry(&c->replicas, e) {
-+		if (e->data_type >= ARRAY_SIZE(ret.replicas))
-+			panic("e %p data_type %u\n", e, e->data_type);
-+
-+		nr_online = nr_offline = 0;
-+
-+		for (i = 0; i < e->nr_devs; i++) {
-+			BUG_ON(!bch2_dev_exists(c->disk_sb.sb, mi,
-+						e->devs[i]));
-+
-+			if (test_bit(e->devs[i], online_devs.d))
-+				nr_online++;
-+			else
-+				nr_offline++;
-+		}
-+
-+		ret.replicas[e->data_type].redundancy =
-+			min(ret.replicas[e->data_type].redundancy,
-+			    (int) nr_online - (int) e->nr_required);
-+
-+		ret.replicas[e->data_type].nr_offline =
-+			max(ret.replicas[e->data_type].nr_offline,
-+			    nr_offline);
-+	}
-+
-+	percpu_up_read(&c->mark_lock);
-+
-+	for (i = 0; i < ARRAY_SIZE(ret.replicas); i++)
-+		if (ret.replicas[i].redundancy == INT_MAX)
-+			ret.replicas[i].redundancy = 0;
-+
-+	return ret;
-+}
-+
-+struct replicas_status bch2_replicas_status(struct bch_fs *c)
-+{
-+	return __bch2_replicas_status(c, bch2_online_devs(c));
-+}
-+
-+static bool have_enough_devs(struct replicas_status s,
-+			     enum bch_data_type type,
-+			     bool force_if_degraded,
-+			     bool force_if_lost)
-+{
-+	return (!s.replicas[type].nr_offline || force_if_degraded) &&
-+		(s.replicas[type].redundancy >= 0 || force_if_lost);
-+}
-+
-+bool bch2_have_enough_devs(struct replicas_status s, unsigned flags)
-+{
-+	return (have_enough_devs(s, BCH_DATA_JOURNAL,
-+				 flags & BCH_FORCE_IF_METADATA_DEGRADED,
-+				 flags & BCH_FORCE_IF_METADATA_LOST) &&
-+		have_enough_devs(s, BCH_DATA_BTREE,
-+				 flags & BCH_FORCE_IF_METADATA_DEGRADED,
-+				 flags & BCH_FORCE_IF_METADATA_LOST) &&
-+		have_enough_devs(s, BCH_DATA_USER,
-+				 flags & BCH_FORCE_IF_DATA_DEGRADED,
-+				 flags & BCH_FORCE_IF_DATA_LOST));
-+}
-+
-+int bch2_replicas_online(struct bch_fs *c, bool meta)
-+{
-+	struct replicas_status s = bch2_replicas_status(c);
-+
-+	return (meta
-+		? min(s.replicas[BCH_DATA_JOURNAL].redundancy,
-+		      s.replicas[BCH_DATA_BTREE].redundancy)
-+		: s.replicas[BCH_DATA_USER].redundancy) + 1;
-+}
-+
-+unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	struct bch_replicas_entry *e;
-+	unsigned i, ret = 0;
-+
-+	percpu_down_read(&c->mark_lock);
-+
-+	for_each_cpu_replicas_entry(&c->replicas, e)
-+		for (i = 0; i < e->nr_devs; i++)
-+			if (e->devs[i] == ca->dev_idx)
-+				ret |= 1 << e->data_type;
-+
-+	percpu_up_read(&c->mark_lock);
-+
-+	return ret;
-+}
-+
-+int bch2_fs_replicas_init(struct bch_fs *c)
-+{
-+	c->journal.entry_u64s_reserved +=
-+		reserve_journal_replicas(c, &c->replicas);
-+
-+	return replicas_table_update(c, &c->replicas);
-+}
-diff --git a/fs/bcachefs/replicas.h b/fs/bcachefs/replicas.h
-new file mode 100644
-index 000000000000..8527d82841bb
---- /dev/null
-+++ b/fs/bcachefs/replicas.h
-@@ -0,0 +1,95 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_REPLICAS_H
-+#define _BCACHEFS_REPLICAS_H
-+
-+#include "eytzinger.h"
-+#include "replicas_types.h"
-+
-+void bch2_replicas_entry_to_text(struct printbuf *,
-+				 struct bch_replicas_entry *);
-+void bch2_cpu_replicas_to_text(struct printbuf *, struct bch_replicas_cpu *);
-+
-+static inline struct bch_replicas_entry *
-+cpu_replicas_entry(struct bch_replicas_cpu *r, unsigned i)
-+{
-+	return (void *) r->entries + r->entry_size * i;
-+}
-+
-+int bch2_replicas_entry_idx(struct bch_fs *,
-+			    struct bch_replicas_entry *);
-+
-+void bch2_devlist_to_replicas(struct bch_replicas_entry *,
-+			      enum bch_data_type,
-+			      struct bch_devs_list);
-+bool bch2_replicas_marked(struct bch_fs *,
-+			  struct bch_replicas_entry *, bool);
-+int bch2_mark_replicas(struct bch_fs *,
-+		       struct bch_replicas_entry *);
-+
-+bool bch2_bkey_replicas_marked_locked(struct bch_fs *,
-+				      struct bkey_s_c, bool);
-+void bch2_bkey_to_replicas(struct bch_replicas_entry *, struct bkey_s_c);
-+bool bch2_bkey_replicas_marked(struct bch_fs *,
-+			       struct bkey_s_c, bool);
-+int bch2_mark_bkey_replicas(struct bch_fs *, struct bkey_s_c);
-+
-+static inline void bch2_replicas_entry_cached(struct bch_replicas_entry *e,
-+					      unsigned dev)
-+{
-+	e->data_type	= BCH_DATA_CACHED;
-+	e->nr_devs	= 1;
-+	e->nr_required	= 1;
-+	e->devs[0]	= dev;
-+}
-+
-+struct replicas_status {
-+	struct {
-+		int		redundancy;
-+		unsigned	nr_offline;
-+	}			replicas[BCH_DATA_NR];
-+};
-+
-+struct replicas_status __bch2_replicas_status(struct bch_fs *,
-+					      struct bch_devs_mask);
-+struct replicas_status bch2_replicas_status(struct bch_fs *);
-+bool bch2_have_enough_devs(struct replicas_status, unsigned);
-+
-+int bch2_replicas_online(struct bch_fs *, bool);
-+unsigned bch2_dev_has_data(struct bch_fs *, struct bch_dev *);
-+
-+int bch2_replicas_gc_end(struct bch_fs *, int);
-+int bch2_replicas_gc_start(struct bch_fs *, unsigned);
-+int bch2_replicas_gc2(struct bch_fs *);
-+
-+int bch2_replicas_set_usage(struct bch_fs *,
-+			    struct bch_replicas_entry *,
-+			    u64);
-+
-+#define for_each_cpu_replicas_entry(_r, _i)				\
-+	for (_i = (_r)->entries;					\
-+	     (void *) (_i) < (void *) (_r)->entries + (_r)->nr * (_r)->entry_size;\
-+	     _i = (void *) (_i) + (_r)->entry_size)
-+
-+/* iterate over superblock replicas - used by userspace tools: */
-+
-+#define replicas_entry_next(_i)						\
-+	((typeof(_i)) ((void *) (_i) + replicas_entry_bytes(_i)))
-+
-+#define for_each_replicas_entry(_r, _i)					\
-+	for (_i = (_r)->entries;					\
-+	     (void *) (_i) < vstruct_end(&(_r)->field) && (_i)->data_type;\
-+	     (_i) = replicas_entry_next(_i))
-+
-+#define for_each_replicas_entry_v0(_r, _i)				\
-+	for (_i = (_r)->entries;					\
-+	     (void *) (_i) < vstruct_end(&(_r)->field) && (_i)->data_type;\
-+	     (_i) = replicas_entry_next(_i))
-+
-+int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *);
-+
-+extern const struct bch_sb_field_ops bch_sb_field_ops_replicas;
-+extern const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0;
-+
-+int bch2_fs_replicas_init(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_REPLICAS_H */
-diff --git a/fs/bcachefs/replicas_types.h b/fs/bcachefs/replicas_types.h
-new file mode 100644
-index 000000000000..0535b1d3760e
---- /dev/null
-+++ b/fs/bcachefs/replicas_types.h
-@@ -0,0 +1,10 @@
-+#ifndef _BCACHEFS_REPLICAS_TYPES_H
-+#define _BCACHEFS_REPLICAS_TYPES_H
-+
-+struct bch_replicas_cpu {
-+	unsigned		nr;
-+	unsigned		entry_size;
-+	struct bch_replicas_entry *entries;
-+};
-+
-+#endif /* _BCACHEFS_REPLICAS_TYPES_H */
-diff --git a/fs/bcachefs/siphash.c b/fs/bcachefs/siphash.c
-new file mode 100644
-index 000000000000..c062edb3fbc2
---- /dev/null
-+++ b/fs/bcachefs/siphash.c
-@@ -0,0 +1,173 @@
-+// SPDX-License-Identifier: BSD-3-Clause
-+/*	$OpenBSD: siphash.c,v 1.3 2015/02/20 11:51:03 tedu Exp $ */
-+
-+/*-
-+ * Copyright (c) 2013 Andre Oppermann <andre@FreeBSD.org>
-+ * All rights reserved.
-+ *
-+ * Redistribution and use in source and binary forms, with or without
-+ * modification, are permitted provided that the following conditions
-+ * are met:
-+ * 1. Redistributions of source code must retain the above copyright
-+ *    notice, this list of conditions and the following disclaimer.
-+ * 2. Redistributions in binary form must reproduce the above copyright
-+ *    notice, this list of conditions and the following disclaimer in the
-+ *    documentation and/or other materials provided with the distribution.
-+ * 3. The name of the author may not be used to endorse or promote
-+ *    products derived from this software without specific prior written
-+ *    permission.
-+ *
-+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
-+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-+ * SUCH DAMAGE.
-+ */
-+
-+/*
-+ * SipHash is a family of PRFs SipHash-c-d where the integer parameters c and d
-+ * are the number of compression rounds and the number of finalization rounds.
-+ * A compression round is identical to a finalization round and this round
-+ * function is called SipRound.  Given a 128-bit key k and a (possibly empty)
-+ * byte string m, SipHash-c-d returns a 64-bit value SipHash-c-d(k; m).
-+ *
-+ * Implemented from the paper "SipHash: a fast short-input PRF", 2012.09.18,
-+ * by Jean-Philippe Aumasson and Daniel J. Bernstein,
-+ * Permanent Document ID b9a943a805fbfc6fde808af9fc0ecdfa
-+ * https://131002.net/siphash/siphash.pdf
-+ * https://131002.net/siphash/
-+ */
-+
-+#include <asm/byteorder.h>
-+#include <asm/unaligned.h>
-+#include <linux/bitops.h>
-+#include <linux/string.h>
-+
-+#include "siphash.h"
-+
-+static void SipHash_Rounds(SIPHASH_CTX *ctx, int rounds)
-+{
-+	while (rounds--) {
-+		ctx->v[0] += ctx->v[1];
-+		ctx->v[2] += ctx->v[3];
-+		ctx->v[1] = rol64(ctx->v[1], 13);
-+		ctx->v[3] = rol64(ctx->v[3], 16);
-+
-+		ctx->v[1] ^= ctx->v[0];
-+		ctx->v[3] ^= ctx->v[2];
-+		ctx->v[0] = rol64(ctx->v[0], 32);
-+
-+		ctx->v[2] += ctx->v[1];
-+		ctx->v[0] += ctx->v[3];
-+		ctx->v[1] = rol64(ctx->v[1], 17);
-+		ctx->v[3] = rol64(ctx->v[3], 21);
-+
-+		ctx->v[1] ^= ctx->v[2];
-+		ctx->v[3] ^= ctx->v[0];
-+		ctx->v[2] = rol64(ctx->v[2], 32);
-+	}
-+}
-+
-+static void SipHash_CRounds(SIPHASH_CTX *ctx, const void *ptr, int rounds)
-+{
-+	u64 m = get_unaligned_le64(ptr);
-+
-+	ctx->v[3] ^= m;
-+	SipHash_Rounds(ctx, rounds);
-+	ctx->v[0] ^= m;
-+}
-+
-+void SipHash_Init(SIPHASH_CTX *ctx, const SIPHASH_KEY *key)
-+{
-+	u64 k0, k1;
-+
-+	k0 = le64_to_cpu(key->k0);
-+	k1 = le64_to_cpu(key->k1);
-+
-+	ctx->v[0] = 0x736f6d6570736575ULL ^ k0;
-+	ctx->v[1] = 0x646f72616e646f6dULL ^ k1;
-+	ctx->v[2] = 0x6c7967656e657261ULL ^ k0;
-+	ctx->v[3] = 0x7465646279746573ULL ^ k1;
-+
-+	memset(ctx->buf, 0, sizeof(ctx->buf));
-+	ctx->bytes = 0;
-+}
-+
-+void SipHash_Update(SIPHASH_CTX *ctx, int rc, int rf,
-+		    const void *src, size_t len)
-+{
-+	const u8 *ptr = src;
-+	size_t left, used;
-+
-+	if (len == 0)
-+		return;
-+
-+	used = ctx->bytes % sizeof(ctx->buf);
-+	ctx->bytes += len;
-+
-+	if (used > 0) {
-+		left = sizeof(ctx->buf) - used;
-+
-+		if (len >= left) {
-+			memcpy(&ctx->buf[used], ptr, left);
-+			SipHash_CRounds(ctx, ctx->buf, rc);
-+			len -= left;
-+			ptr += left;
-+		} else {
-+			memcpy(&ctx->buf[used], ptr, len);
-+			return;
-+		}
-+	}
-+
-+	while (len >= sizeof(ctx->buf)) {
-+		SipHash_CRounds(ctx, ptr, rc);
-+		len -= sizeof(ctx->buf);
-+		ptr += sizeof(ctx->buf);
-+	}
-+
-+	if (len > 0)
-+		memcpy(&ctx->buf[used], ptr, len);
-+}
-+
-+void SipHash_Final(void *dst, SIPHASH_CTX *ctx, int rc, int rf)
-+{
-+	u64 r;
-+
-+	r = SipHash_End(ctx, rc, rf);
-+
-+	*((__le64 *) dst) = cpu_to_le64(r);
-+}
-+
-+u64 SipHash_End(SIPHASH_CTX *ctx, int rc, int rf)
-+{
-+	u64 r;
-+	size_t left, used;
-+
-+	used = ctx->bytes % sizeof(ctx->buf);
-+	left = sizeof(ctx->buf) - used;
-+	memset(&ctx->buf[used], 0, left - 1);
-+	ctx->buf[7] = ctx->bytes;
-+
-+	SipHash_CRounds(ctx, ctx->buf, rc);
-+	ctx->v[2] ^= 0xff;
-+	SipHash_Rounds(ctx, rf);
-+
-+	r = (ctx->v[0] ^ ctx->v[1]) ^ (ctx->v[2] ^ ctx->v[3]);
-+	memset(ctx, 0, sizeof(*ctx));
-+	return (r);
-+}
-+
-+u64 SipHash(const SIPHASH_KEY *key, int rc, int rf, const void *src, size_t len)
-+{
-+	SIPHASH_CTX ctx;
-+
-+	SipHash_Init(&ctx, key);
-+	SipHash_Update(&ctx, rc, rf, src, len);
-+	return SipHash_End(&ctx, rc, rf);
-+}
-diff --git a/fs/bcachefs/siphash.h b/fs/bcachefs/siphash.h
-new file mode 100644
-index 000000000000..3dfaf34a43b2
---- /dev/null
-+++ b/fs/bcachefs/siphash.h
-@@ -0,0 +1,87 @@
-+/* SPDX-License-Identifier: BSD-3-Clause */
-+/* $OpenBSD: siphash.h,v 1.5 2015/02/20 11:51:03 tedu Exp $ */
-+/*-
-+ * Copyright (c) 2013 Andre Oppermann <andre@FreeBSD.org>
-+ * All rights reserved.
-+ *
-+ * Redistribution and use in source and binary forms, with or without
-+ * modification, are permitted provided that the following conditions
-+ * are met:
-+ * 1. Redistributions of source code must retain the above copyright
-+ *    notice, this list of conditions and the following disclaimer.
-+ * 2. Redistributions in binary form must reproduce the above copyright
-+ *    notice, this list of conditions and the following disclaimer in the
-+ *    documentation and/or other materials provided with the distribution.
-+ * 3. The name of the author may not be used to endorse or promote
-+ *    products derived from this software without specific prior written
-+ *    permission.
-+ *
-+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
-+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-+ * SUCH DAMAGE.
-+ *
-+ * $FreeBSD$
-+ */
-+
-+/*
-+ * SipHash is a family of pseudorandom functions (a.k.a. keyed hash functions)
-+ * optimized for speed on short messages returning a 64bit hash/digest value.
-+ *
-+ * The number of rounds is defined during the initialization:
-+ *  SipHash24_Init() for the fast and resonable strong version
-+ *  SipHash48_Init() for the strong version (half as fast)
-+ *
-+ * struct SIPHASH_CTX ctx;
-+ * SipHash24_Init(&ctx);
-+ * SipHash_SetKey(&ctx, "16bytes long key");
-+ * SipHash_Update(&ctx, pointer_to_string, length_of_string);
-+ * SipHash_Final(output, &ctx);
-+ */
-+
-+#ifndef _SIPHASH_H_
-+#define _SIPHASH_H_
-+
-+#include <linux/types.h>
-+
-+#define SIPHASH_BLOCK_LENGTH	 8
-+#define SIPHASH_KEY_LENGTH	16
-+#define SIPHASH_DIGEST_LENGTH	 8
-+
-+typedef struct _SIPHASH_CTX {
-+	u64		v[4];
-+	u8		buf[SIPHASH_BLOCK_LENGTH];
-+	u32		bytes;
-+} SIPHASH_CTX;
-+
-+typedef struct {
-+	__le64		k0;
-+	__le64		k1;
-+} SIPHASH_KEY;
-+
-+void	SipHash_Init(SIPHASH_CTX *, const SIPHASH_KEY *);
-+void	SipHash_Update(SIPHASH_CTX *, int, int, const void *, size_t);
-+u64	SipHash_End(SIPHASH_CTX *, int, int);
-+void	SipHash_Final(void *, SIPHASH_CTX *, int, int);
-+u64	SipHash(const SIPHASH_KEY *, int, int, const void *, size_t);
-+
-+#define SipHash24_Init(_c, _k)		SipHash_Init((_c), (_k))
-+#define SipHash24_Update(_c, _p, _l)	SipHash_Update((_c), 2, 4, (_p), (_l))
-+#define SipHash24_End(_d)		SipHash_End((_d), 2, 4)
-+#define SipHash24_Final(_d, _c)		SipHash_Final((_d), (_c), 2, 4)
-+#define SipHash24(_k, _p, _l)		SipHash((_k), 2, 4, (_p), (_l))
-+
-+#define SipHash48_Init(_c, _k)		SipHash_Init((_c), (_k))
-+#define SipHash48_Update(_c, _p, _l)	SipHash_Update((_c), 4, 8, (_p), (_l))
-+#define SipHash48_End(_d)		SipHash_End((_d), 4, 8)
-+#define SipHash48_Final(_d, _c)		SipHash_Final((_d), (_c), 4, 8)
-+#define SipHash48(_k, _p, _l)		SipHash((_k), 4, 8, (_p), (_l))
-+
-+#endif /* _SIPHASH_H_ */
-diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h
-new file mode 100644
-index 000000000000..dea9b7252b88
---- /dev/null
-+++ b/fs/bcachefs/str_hash.h
-@@ -0,0 +1,336 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_STR_HASH_H
-+#define _BCACHEFS_STR_HASH_H
-+
-+#include "btree_iter.h"
-+#include "btree_update.h"
-+#include "checksum.h"
-+#include "error.h"
-+#include "inode.h"
-+#include "siphash.h"
-+#include "super.h"
-+
-+#include <linux/crc32c.h>
-+#include <crypto/hash.h>
-+#include <crypto/sha.h>
-+
-+static inline enum bch_str_hash_type
-+bch2_str_hash_opt_to_type(struct bch_fs *c, enum bch_str_hash_opts opt)
-+{
-+	switch (opt) {
-+	case BCH_STR_HASH_OPT_CRC32C:
-+		return BCH_STR_HASH_CRC32C;
-+	case BCH_STR_HASH_OPT_CRC64:
-+		return BCH_STR_HASH_CRC64;
-+	case BCH_STR_HASH_OPT_SIPHASH:
-+		return c->sb.features & (1ULL << BCH_FEATURE_new_siphash)
-+			? BCH_STR_HASH_SIPHASH
-+			: BCH_STR_HASH_SIPHASH_OLD;
-+	default:
-+	     BUG();
-+	}
-+}
-+
-+struct bch_hash_info {
-+	u8			type;
-+	union {
-+		__le64		crc_key;
-+		SIPHASH_KEY	siphash_key;
-+	};
-+};
-+
-+static inline struct bch_hash_info
-+bch2_hash_info_init(struct bch_fs *c, const struct bch_inode_unpacked *bi)
-+{
-+	/* XXX ick */
-+	struct bch_hash_info info = {
-+		.type = (bi->bi_flags >> INODE_STR_HASH_OFFSET) &
-+			~(~0U << INODE_STR_HASH_BITS),
-+		.crc_key = bi->bi_hash_seed,
-+	};
-+
-+	if (unlikely(info.type == BCH_STR_HASH_SIPHASH_OLD)) {
-+		SHASH_DESC_ON_STACK(desc, c->sha256);
-+		u8 digest[SHA256_DIGEST_SIZE];
-+
-+		desc->tfm = c->sha256;
-+
-+		crypto_shash_digest(desc, (void *) &bi->bi_hash_seed,
-+				    sizeof(bi->bi_hash_seed), digest);
-+		memcpy(&info.siphash_key, digest, sizeof(info.siphash_key));
-+	}
-+
-+	return info;
-+}
-+
-+struct bch_str_hash_ctx {
-+	union {
-+		u32		crc32c;
-+		u64		crc64;
-+		SIPHASH_CTX	siphash;
-+	};
-+};
-+
-+static inline void bch2_str_hash_init(struct bch_str_hash_ctx *ctx,
-+				     const struct bch_hash_info *info)
-+{
-+	switch (info->type) {
-+	case BCH_STR_HASH_CRC32C:
-+		ctx->crc32c = crc32c(~0, &info->crc_key, sizeof(info->crc_key));
-+		break;
-+	case BCH_STR_HASH_CRC64:
-+		ctx->crc64 = crc64_be(~0, &info->crc_key, sizeof(info->crc_key));
-+		break;
-+	case BCH_STR_HASH_SIPHASH_OLD:
-+	case BCH_STR_HASH_SIPHASH:
-+		SipHash24_Init(&ctx->siphash, &info->siphash_key);
-+		break;
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static inline void bch2_str_hash_update(struct bch_str_hash_ctx *ctx,
-+				       const struct bch_hash_info *info,
-+				       const void *data, size_t len)
-+{
-+	switch (info->type) {
-+	case BCH_STR_HASH_CRC32C:
-+		ctx->crc32c = crc32c(ctx->crc32c, data, len);
-+		break;
-+	case BCH_STR_HASH_CRC64:
-+		ctx->crc64 = crc64_be(ctx->crc64, data, len);
-+		break;
-+	case BCH_STR_HASH_SIPHASH_OLD:
-+	case BCH_STR_HASH_SIPHASH:
-+		SipHash24_Update(&ctx->siphash, data, len);
-+		break;
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static inline u64 bch2_str_hash_end(struct bch_str_hash_ctx *ctx,
-+				   const struct bch_hash_info *info)
-+{
-+	switch (info->type) {
-+	case BCH_STR_HASH_CRC32C:
-+		return ctx->crc32c;
-+	case BCH_STR_HASH_CRC64:
-+		return ctx->crc64 >> 1;
-+	case BCH_STR_HASH_SIPHASH_OLD:
-+	case BCH_STR_HASH_SIPHASH:
-+		return SipHash24_End(&ctx->siphash) >> 1;
-+	default:
-+		BUG();
-+	}
-+}
-+
-+struct bch_hash_desc {
-+	enum btree_id	btree_id;
-+	u8		key_type;
-+
-+	u64		(*hash_key)(const struct bch_hash_info *, const void *);
-+	u64		(*hash_bkey)(const struct bch_hash_info *, struct bkey_s_c);
-+	bool		(*cmp_key)(struct bkey_s_c, const void *);
-+	bool		(*cmp_bkey)(struct bkey_s_c, struct bkey_s_c);
-+};
-+
-+static __always_inline struct btree_iter *
-+bch2_hash_lookup(struct btree_trans *trans,
-+		 const struct bch_hash_desc desc,
-+		 const struct bch_hash_info *info,
-+		 u64 inode, const void *key,
-+		 unsigned flags)
-+{
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	for_each_btree_key(trans, iter, desc.btree_id,
-+			   POS(inode, desc.hash_key(info, key)),
-+			   BTREE_ITER_SLOTS|flags, k, ret) {
-+		if (iter->pos.inode != inode)
-+			break;
-+
-+		if (k.k->type == desc.key_type) {
-+			if (!desc.cmp_key(k, key))
-+				return iter;
-+		} else if (k.k->type == KEY_TYPE_whiteout) {
-+			;
-+		} else {
-+			/* hole, not found */
-+			break;
-+		}
-+	}
-+	bch2_trans_iter_put(trans, iter);
-+
-+	return ERR_PTR(ret ?: -ENOENT);
-+}
-+
-+static __always_inline struct btree_iter *
-+bch2_hash_hole(struct btree_trans *trans,
-+	       const struct bch_hash_desc desc,
-+	       const struct bch_hash_info *info,
-+	       u64 inode, const void *key)
-+{
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	for_each_btree_key(trans, iter, desc.btree_id,
-+			   POS(inode, desc.hash_key(info, key)),
-+			   BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
-+		if (iter->pos.inode != inode)
-+			break;
-+
-+		if (k.k->type != desc.key_type)
-+			return iter;
-+	}
-+
-+	iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
-+	bch2_trans_iter_put(trans, iter);
-+
-+	return ERR_PTR(ret ?: -ENOSPC);
-+}
-+
-+static __always_inline
-+int bch2_hash_needs_whiteout(struct btree_trans *trans,
-+			     const struct bch_hash_desc desc,
-+			     const struct bch_hash_info *info,
-+			     struct btree_iter *start)
-+{
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	iter = bch2_trans_copy_iter(trans, start);
-+	if (IS_ERR(iter))
-+		return PTR_ERR(iter);
-+
-+	bch2_btree_iter_next_slot(iter);
-+
-+	for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, k, ret) {
-+		if (k.k->type != desc.key_type &&
-+		    k.k->type != KEY_TYPE_whiteout)
-+			break;
-+
-+		if (k.k->type == desc.key_type &&
-+		    desc.hash_bkey(info, k) <= start->pos.offset) {
-+			iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
-+			ret = 1;
-+			break;
-+		}
-+	}
-+
-+	bch2_trans_iter_put(trans, iter);
-+	return ret;
-+}
-+
-+static __always_inline
-+int bch2_hash_set(struct btree_trans *trans,
-+		  const struct bch_hash_desc desc,
-+		  const struct bch_hash_info *info,
-+		  u64 inode, struct bkey_i *insert, int flags)
-+{
-+	struct btree_iter *iter, *slot = NULL;
-+	struct bkey_s_c k;
-+	bool found = false;
-+	int ret;
-+
-+	for_each_btree_key(trans, iter, desc.btree_id,
-+			   POS(inode, desc.hash_bkey(info, bkey_i_to_s_c(insert))),
-+			   BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
-+		if (iter->pos.inode != inode)
-+			break;
-+
-+		if (k.k->type == desc.key_type) {
-+			if (!desc.cmp_bkey(k, bkey_i_to_s_c(insert)))
-+				goto found;
-+
-+			/* hash collision: */
-+			continue;
-+		}
-+
-+		if (!slot &&
-+		    !(flags & BCH_HASH_SET_MUST_REPLACE)) {
-+			slot = bch2_trans_copy_iter(trans, iter);
-+			if (IS_ERR(slot))
-+				return PTR_ERR(slot);
-+		}
-+
-+		if (k.k->type != KEY_TYPE_whiteout)
-+			goto not_found;
-+	}
-+
-+	if (!ret)
-+		ret = -ENOSPC;
-+out:
-+	bch2_trans_iter_put(trans, slot);
-+	bch2_trans_iter_put(trans, iter);
-+
-+	return ret;
-+found:
-+	found = true;
-+not_found:
-+
-+	if (!found && (flags & BCH_HASH_SET_MUST_REPLACE)) {
-+		ret = -ENOENT;
-+	} else if (found && (flags & BCH_HASH_SET_MUST_CREATE)) {
-+		ret = -EEXIST;
-+	} else {
-+		if (!found && slot)
-+			swap(iter, slot);
-+
-+		insert->k.p = iter->pos;
-+		bch2_trans_update(trans, iter, insert, 0);
-+	}
-+
-+	goto out;
-+}
-+
-+static __always_inline
-+int bch2_hash_delete_at(struct btree_trans *trans,
-+			const struct bch_hash_desc desc,
-+			const struct bch_hash_info *info,
-+			struct btree_iter *iter)
-+{
-+	struct bkey_i *delete;
-+	int ret;
-+
-+	ret = bch2_hash_needs_whiteout(trans, desc, info, iter);
-+	if (ret < 0)
-+		return ret;
-+
-+	delete = bch2_trans_kmalloc(trans, sizeof(*delete));
-+	if (IS_ERR(delete))
-+		return PTR_ERR(delete);
-+
-+	bkey_init(&delete->k);
-+	delete->k.p = iter->pos;
-+	delete->k.type = ret ? KEY_TYPE_whiteout : KEY_TYPE_deleted;
-+
-+	bch2_trans_update(trans, iter, delete, 0);
-+	return 0;
-+}
-+
-+static __always_inline
-+int bch2_hash_delete(struct btree_trans *trans,
-+		     const struct bch_hash_desc desc,
-+		     const struct bch_hash_info *info,
-+		     u64 inode, const void *key)
-+{
-+	struct btree_iter *iter;
-+	int ret;
-+
-+	iter = bch2_hash_lookup(trans, desc, info, inode, key,
-+				BTREE_ITER_INTENT);
-+	if (IS_ERR(iter))
-+		return PTR_ERR(iter);
-+
-+	ret = bch2_hash_delete_at(trans, desc, info, iter);
-+	bch2_trans_iter_put(trans, iter);
-+	return ret;
-+}
-+
-+#endif /* _BCACHEFS_STR_HASH_H */
-diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
-new file mode 100644
-index 000000000000..9a221d3e1652
---- /dev/null
-+++ b/fs/bcachefs/super-io.c
-@@ -0,0 +1,1158 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "btree_update_interior.h"
-+#include "buckets.h"
-+#include "checksum.h"
-+#include "disk_groups.h"
-+#include "ec.h"
-+#include "error.h"
-+#include "io.h"
-+#include "journal.h"
-+#include "journal_seq_blacklist.h"
-+#include "replicas.h"
-+#include "quota.h"
-+#include "super-io.h"
-+#include "super.h"
-+#include "vstructs.h"
-+
-+#include <linux/backing-dev.h>
-+#include <linux/sort.h>
-+
-+const char * const bch2_sb_fields[] = {
-+#define x(name, nr)	#name,
-+	BCH_SB_FIELDS()
-+#undef x
-+	NULL
-+};
-+
-+static const char *bch2_sb_field_validate(struct bch_sb *,
-+					  struct bch_sb_field *);
-+
-+struct bch_sb_field *bch2_sb_field_get(struct bch_sb *sb,
-+				      enum bch_sb_field_type type)
-+{
-+	struct bch_sb_field *f;
-+
-+	/* XXX: need locking around superblock to access optional fields */
-+
-+	vstruct_for_each(sb, f)
-+		if (le32_to_cpu(f->type) == type)
-+			return f;
-+	return NULL;
-+}
-+
-+static struct bch_sb_field *__bch2_sb_field_resize(struct bch_sb_handle *sb,
-+						   struct bch_sb_field *f,
-+						   unsigned u64s)
-+{
-+	unsigned old_u64s = f ? le32_to_cpu(f->u64s) : 0;
-+	unsigned sb_u64s = le32_to_cpu(sb->sb->u64s) + u64s - old_u64s;
-+
-+	BUG_ON(get_order(__vstruct_bytes(struct bch_sb, sb_u64s)) >
-+	       sb->page_order);
-+
-+	if (!f && !u64s) {
-+		/* nothing to do: */
-+	} else if (!f) {
-+		f = vstruct_last(sb->sb);
-+		memset(f, 0, sizeof(u64) * u64s);
-+		f->u64s = cpu_to_le32(u64s);
-+		f->type = 0;
-+	} else {
-+		void *src, *dst;
-+
-+		src = vstruct_end(f);
-+
-+		if (u64s) {
-+			f->u64s = cpu_to_le32(u64s);
-+			dst = vstruct_end(f);
-+		} else {
-+			dst = f;
-+		}
-+
-+		memmove(dst, src, vstruct_end(sb->sb) - src);
-+
-+		if (dst > src)
-+			memset(src, 0, dst - src);
-+	}
-+
-+	sb->sb->u64s = cpu_to_le32(sb_u64s);
-+
-+	return u64s ? f : NULL;
-+}
-+
-+void bch2_sb_field_delete(struct bch_sb_handle *sb,
-+			  enum bch_sb_field_type type)
-+{
-+	struct bch_sb_field *f = bch2_sb_field_get(sb->sb, type);
-+
-+	if (f)
-+		__bch2_sb_field_resize(sb, f, 0);
-+}
-+
-+/* Superblock realloc/free: */
-+
-+void bch2_free_super(struct bch_sb_handle *sb)
-+{
-+	if (sb->bio)
-+		bio_put(sb->bio);
-+	if (!IS_ERR_OR_NULL(sb->bdev))
-+		blkdev_put(sb->bdev, sb->mode);
-+
-+	free_pages((unsigned long) sb->sb, sb->page_order);
-+	memset(sb, 0, sizeof(*sb));
-+}
-+
-+int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s)
-+{
-+	size_t new_bytes = __vstruct_bytes(struct bch_sb, u64s);
-+	unsigned order = get_order(new_bytes);
-+	struct bch_sb *new_sb;
-+	struct bio *bio;
-+
-+	if (sb->sb && sb->page_order >= order)
-+		return 0;
-+
-+	if (sb->have_layout) {
-+		u64 max_bytes = 512 << sb->sb->layout.sb_max_size_bits;
-+
-+		if (new_bytes > max_bytes) {
-+			char buf[BDEVNAME_SIZE];
-+
-+			pr_err("%s: superblock too big: want %zu but have %llu",
-+			       bdevname(sb->bdev, buf), new_bytes, max_bytes);
-+			return -ENOSPC;
-+		}
-+	}
-+
-+	if (sb->page_order >= order && sb->sb)
-+		return 0;
-+
-+	if (dynamic_fault("bcachefs:add:super_realloc"))
-+		return -ENOMEM;
-+
-+	if (sb->have_bio) {
-+		bio = bio_kmalloc(GFP_KERNEL, 1 << order);
-+		if (!bio)
-+			return -ENOMEM;
-+
-+		if (sb->bio)
-+			bio_put(sb->bio);
-+		sb->bio = bio;
-+	}
-+
-+	new_sb = (void *) __get_free_pages(GFP_NOFS|__GFP_ZERO, order);
-+	if (!new_sb)
-+		return -ENOMEM;
-+
-+	if (sb->sb)
-+		memcpy(new_sb, sb->sb, PAGE_SIZE << sb->page_order);
-+
-+	free_pages((unsigned long) sb->sb, sb->page_order);
-+	sb->sb = new_sb;
-+
-+	sb->page_order = order;
-+
-+	return 0;
-+}
-+
-+struct bch_sb_field *bch2_sb_field_resize(struct bch_sb_handle *sb,
-+					  enum bch_sb_field_type type,
-+					  unsigned u64s)
-+{
-+	struct bch_sb_field *f = bch2_sb_field_get(sb->sb, type);
-+	ssize_t old_u64s = f ? le32_to_cpu(f->u64s) : 0;
-+	ssize_t d = -old_u64s + u64s;
-+
-+	if (bch2_sb_realloc(sb, le32_to_cpu(sb->sb->u64s) + d))
-+		return NULL;
-+
-+	if (sb->fs_sb) {
-+		struct bch_fs *c = container_of(sb, struct bch_fs, disk_sb);
-+		struct bch_dev *ca;
-+		unsigned i;
-+
-+		lockdep_assert_held(&c->sb_lock);
-+
-+		/* XXX: we're not checking that offline device have enough space */
-+
-+		for_each_online_member(ca, c, i) {
-+			struct bch_sb_handle *sb = &ca->disk_sb;
-+
-+			if (bch2_sb_realloc(sb, le32_to_cpu(sb->sb->u64s) + d)) {
-+				percpu_ref_put(&ca->ref);
-+				return NULL;
-+			}
-+		}
-+	}
-+
-+	f = bch2_sb_field_get(sb->sb, type);
-+	f = __bch2_sb_field_resize(sb, f, u64s);
-+	if (f)
-+		f->type = cpu_to_le32(type);
-+	return f;
-+}
-+
-+/* Superblock validate: */
-+
-+static inline void __bch2_sb_layout_size_assert(void)
-+{
-+	BUILD_BUG_ON(sizeof(struct bch_sb_layout) != 512);
-+}
-+
-+static const char *validate_sb_layout(struct bch_sb_layout *layout)
-+{
-+	u64 offset, prev_offset, max_sectors;
-+	unsigned i;
-+
-+	if (uuid_le_cmp(layout->magic, BCACHE_MAGIC))
-+		return "Not a bcachefs superblock layout";
-+
-+	if (layout->layout_type != 0)
-+		return "Invalid superblock layout type";
-+
-+	if (!layout->nr_superblocks)
-+		return "Invalid superblock layout: no superblocks";
-+
-+	if (layout->nr_superblocks > ARRAY_SIZE(layout->sb_offset))
-+		return "Invalid superblock layout: too many superblocks";
-+
-+	max_sectors = 1 << layout->sb_max_size_bits;
-+
-+	prev_offset = le64_to_cpu(layout->sb_offset[0]);
-+
-+	for (i = 1; i < layout->nr_superblocks; i++) {
-+		offset = le64_to_cpu(layout->sb_offset[i]);
-+
-+		if (offset < prev_offset + max_sectors)
-+			return "Invalid superblock layout: superblocks overlap";
-+		prev_offset = offset;
-+	}
-+
-+	return NULL;
-+}
-+
-+const char *bch2_sb_validate(struct bch_sb_handle *disk_sb)
-+{
-+	struct bch_sb *sb = disk_sb->sb;
-+	struct bch_sb_field *f;
-+	struct bch_sb_field_members *mi;
-+	const char *err;
-+	u32 version, version_min;
-+	u16 block_size;
-+
-+	version		= le16_to_cpu(sb->version);
-+	version_min	= version >= bcachefs_metadata_version_new_versioning
-+		? le16_to_cpu(sb->version_min)
-+		: version;
-+
-+	if (version    >= bcachefs_metadata_version_max ||
-+	    version_min < bcachefs_metadata_version_min)
-+		return "Unsupported superblock version";
-+
-+	if (version_min > version)
-+		return "Bad minimum version";
-+
-+	if (sb->features[1] ||
-+	    (le64_to_cpu(sb->features[0]) & (~0ULL << BCH_FEATURE_NR)))
-+		return "Filesystem has incompatible features";
-+
-+	block_size = le16_to_cpu(sb->block_size);
-+
-+	if (!is_power_of_2(block_size) ||
-+	    block_size > PAGE_SECTORS)
-+		return "Bad block size";
-+
-+	if (bch2_is_zero(sb->user_uuid.b, sizeof(uuid_le)))
-+		return "Bad user UUID";
-+
-+	if (bch2_is_zero(sb->uuid.b, sizeof(uuid_le)))
-+		return "Bad internal UUID";
-+
-+	if (!sb->nr_devices ||
-+	    sb->nr_devices <= sb->dev_idx ||
-+	    sb->nr_devices > BCH_SB_MEMBERS_MAX)
-+		return "Bad number of member devices";
-+
-+	if (!BCH_SB_META_REPLICAS_WANT(sb) ||
-+	    BCH_SB_META_REPLICAS_WANT(sb) >= BCH_REPLICAS_MAX)
-+		return "Invalid number of metadata replicas";
-+
-+	if (!BCH_SB_META_REPLICAS_REQ(sb) ||
-+	    BCH_SB_META_REPLICAS_REQ(sb) >= BCH_REPLICAS_MAX)
-+		return "Invalid number of metadata replicas";
-+
-+	if (!BCH_SB_DATA_REPLICAS_WANT(sb) ||
-+	    BCH_SB_DATA_REPLICAS_WANT(sb) >= BCH_REPLICAS_MAX)
-+		return "Invalid number of data replicas";
-+
-+	if (!BCH_SB_DATA_REPLICAS_REQ(sb) ||
-+	    BCH_SB_DATA_REPLICAS_REQ(sb) >= BCH_REPLICAS_MAX)
-+		return "Invalid number of data replicas";
-+
-+	if (BCH_SB_META_CSUM_TYPE(sb) >= BCH_CSUM_OPT_NR)
-+		return "Invalid metadata checksum type";
-+
-+	if (BCH_SB_DATA_CSUM_TYPE(sb) >= BCH_CSUM_OPT_NR)
-+		return "Invalid metadata checksum type";
-+
-+	if (BCH_SB_COMPRESSION_TYPE(sb) >= BCH_COMPRESSION_OPT_NR)
-+		return "Invalid compression type";
-+
-+	if (!BCH_SB_BTREE_NODE_SIZE(sb))
-+		return "Btree node size not set";
-+
-+	if (!is_power_of_2(BCH_SB_BTREE_NODE_SIZE(sb)))
-+		return "Btree node size not a power of two";
-+
-+	if (BCH_SB_GC_RESERVE(sb) < 5)
-+		return "gc reserve percentage too small";
-+
-+	if (!sb->time_precision ||
-+	    le32_to_cpu(sb->time_precision) > NSEC_PER_SEC)
-+		return "invalid time precision";
-+
-+	/* validate layout */
-+	err = validate_sb_layout(&sb->layout);
-+	if (err)
-+		return err;
-+
-+	vstruct_for_each(sb, f) {
-+		if (!f->u64s)
-+			return "Invalid superblock: invalid optional field";
-+
-+		if (vstruct_next(f) > vstruct_last(sb))
-+			return "Invalid superblock: invalid optional field";
-+	}
-+
-+	/* members must be validated first: */
-+	mi = bch2_sb_get_members(sb);
-+	if (!mi)
-+		return "Invalid superblock: member info area missing";
-+
-+	err = bch2_sb_field_validate(sb, &mi->field);
-+	if (err)
-+		return err;
-+
-+	vstruct_for_each(sb, f) {
-+		if (le32_to_cpu(f->type) == BCH_SB_FIELD_members)
-+			continue;
-+
-+		err = bch2_sb_field_validate(sb, f);
-+		if (err)
-+			return err;
-+	}
-+
-+	return NULL;
-+}
-+
-+/* device open: */
-+
-+static void bch2_sb_update(struct bch_fs *c)
-+{
-+	struct bch_sb *src = c->disk_sb.sb;
-+	struct bch_sb_field_members *mi = bch2_sb_get_members(src);
-+	struct bch_dev *ca;
-+	unsigned i;
-+
-+	lockdep_assert_held(&c->sb_lock);
-+
-+	c->sb.uuid		= src->uuid;
-+	c->sb.user_uuid		= src->user_uuid;
-+	c->sb.version		= le16_to_cpu(src->version);
-+	c->sb.nr_devices	= src->nr_devices;
-+	c->sb.clean		= BCH_SB_CLEAN(src);
-+	c->sb.encryption_type	= BCH_SB_ENCRYPTION_TYPE(src);
-+	c->sb.encoded_extent_max= 1 << BCH_SB_ENCODED_EXTENT_MAX_BITS(src);
-+	c->sb.time_base_lo	= le64_to_cpu(src->time_base_lo);
-+	c->sb.time_base_hi	= le32_to_cpu(src->time_base_hi);
-+	c->sb.time_precision	= le32_to_cpu(src->time_precision);
-+	c->sb.features		= le64_to_cpu(src->features[0]);
-+	c->sb.compat		= le64_to_cpu(src->compat[0]);
-+
-+	for_each_member_device(ca, c, i)
-+		ca->mi = bch2_mi_to_cpu(mi->members + i);
-+}
-+
-+/* doesn't copy member info */
-+static void __copy_super(struct bch_sb_handle *dst_handle, struct bch_sb *src)
-+{
-+	struct bch_sb_field *src_f, *dst_f;
-+	struct bch_sb *dst = dst_handle->sb;
-+	unsigned i;
-+
-+	dst->version		= src->version;
-+	dst->version_min	= src->version_min;
-+	dst->seq		= src->seq;
-+	dst->uuid		= src->uuid;
-+	dst->user_uuid		= src->user_uuid;
-+	memcpy(dst->label,	src->label, sizeof(dst->label));
-+
-+	dst->block_size		= src->block_size;
-+	dst->nr_devices		= src->nr_devices;
-+
-+	dst->time_base_lo	= src->time_base_lo;
-+	dst->time_base_hi	= src->time_base_hi;
-+	dst->time_precision	= src->time_precision;
-+
-+	memcpy(dst->flags,	src->flags,	sizeof(dst->flags));
-+	memcpy(dst->features,	src->features,	sizeof(dst->features));
-+	memcpy(dst->compat,	src->compat,	sizeof(dst->compat));
-+
-+	for (i = 0; i < BCH_SB_FIELD_NR; i++) {
-+		if (i == BCH_SB_FIELD_journal)
-+			continue;
-+
-+		src_f = bch2_sb_field_get(src, i);
-+		dst_f = bch2_sb_field_get(dst, i);
-+		dst_f = __bch2_sb_field_resize(dst_handle, dst_f,
-+				src_f ? le32_to_cpu(src_f->u64s) : 0);
-+
-+		if (src_f)
-+			memcpy(dst_f, src_f, vstruct_bytes(src_f));
-+	}
-+}
-+
-+int bch2_sb_to_fs(struct bch_fs *c, struct bch_sb *src)
-+{
-+	struct bch_sb_field_journal *journal_buckets =
-+		bch2_sb_get_journal(src);
-+	unsigned journal_u64s = journal_buckets
-+		? le32_to_cpu(journal_buckets->field.u64s)
-+		: 0;
-+	int ret;
-+
-+	lockdep_assert_held(&c->sb_lock);
-+
-+	ret = bch2_sb_realloc(&c->disk_sb,
-+			      le32_to_cpu(src->u64s) - journal_u64s);
-+	if (ret)
-+		return ret;
-+
-+	__copy_super(&c->disk_sb, src);
-+
-+	ret = bch2_sb_replicas_to_cpu_replicas(c);
-+	if (ret)
-+		return ret;
-+
-+	ret = bch2_sb_disk_groups_to_cpu(c);
-+	if (ret)
-+		return ret;
-+
-+	bch2_sb_update(c);
-+	return 0;
-+}
-+
-+int bch2_sb_from_fs(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	struct bch_sb *src = c->disk_sb.sb, *dst = ca->disk_sb.sb;
-+	struct bch_sb_field_journal *journal_buckets =
-+		bch2_sb_get_journal(dst);
-+	unsigned journal_u64s = journal_buckets
-+		? le32_to_cpu(journal_buckets->field.u64s)
-+		: 0;
-+	unsigned u64s = le32_to_cpu(src->u64s) + journal_u64s;
-+	int ret;
-+
-+	ret = bch2_sb_realloc(&ca->disk_sb, u64s);
-+	if (ret)
-+		return ret;
-+
-+	__copy_super(&ca->disk_sb, src);
-+	return 0;
-+}
-+
-+/* read superblock: */
-+
-+static const char *read_one_super(struct bch_sb_handle *sb, u64 offset)
-+{
-+	struct bch_csum csum;
-+	size_t bytes;
-+reread:
-+	bio_reset(sb->bio);
-+	bio_set_dev(sb->bio, sb->bdev);
-+	sb->bio->bi_iter.bi_sector = offset;
-+	bio_set_op_attrs(sb->bio, REQ_OP_READ, REQ_SYNC|REQ_META);
-+	bch2_bio_map(sb->bio, sb->sb, PAGE_SIZE << sb->page_order);
-+
-+	if (submit_bio_wait(sb->bio))
-+		return "IO error";
-+
-+	if (uuid_le_cmp(sb->sb->magic, BCACHE_MAGIC))
-+		return "Not a bcachefs superblock";
-+
-+	if (le16_to_cpu(sb->sb->version) <  bcachefs_metadata_version_min ||
-+	    le16_to_cpu(sb->sb->version) >= bcachefs_metadata_version_max)
-+		return "Unsupported superblock version";
-+
-+	bytes = vstruct_bytes(sb->sb);
-+
-+	if (bytes > 512 << sb->sb->layout.sb_max_size_bits)
-+		return "Bad superblock: too big";
-+
-+	if (get_order(bytes) > sb->page_order) {
-+		if (bch2_sb_realloc(sb, le32_to_cpu(sb->sb->u64s)))
-+			return "cannot allocate memory";
-+		goto reread;
-+	}
-+
-+	if (BCH_SB_CSUM_TYPE(sb->sb) >= BCH_CSUM_NR)
-+		return "unknown csum type";
-+
-+	/* XXX: verify MACs */
-+	csum = csum_vstruct(NULL, BCH_SB_CSUM_TYPE(sb->sb),
-+			    null_nonce(), sb->sb);
-+
-+	if (bch2_crc_cmp(csum, sb->sb->csum))
-+		return "bad checksum reading superblock";
-+
-+	sb->seq = le64_to_cpu(sb->sb->seq);
-+
-+	return NULL;
-+}
-+
-+int bch2_read_super(const char *path, struct bch_opts *opts,
-+		    struct bch_sb_handle *sb)
-+{
-+	u64 offset = opt_get(*opts, sb);
-+	struct bch_sb_layout layout;
-+	const char *err;
-+	__le64 *i;
-+	int ret;
-+
-+	pr_verbose_init(*opts, "");
-+
-+	memset(sb, 0, sizeof(*sb));
-+	sb->mode	= FMODE_READ;
-+	sb->have_bio	= true;
-+
-+	if (!opt_get(*opts, noexcl))
-+		sb->mode |= FMODE_EXCL;
-+
-+	if (!opt_get(*opts, nochanges))
-+		sb->mode |= FMODE_WRITE;
-+
-+	sb->bdev = blkdev_get_by_path(path, sb->mode, sb);
-+	if (IS_ERR(sb->bdev) &&
-+	    PTR_ERR(sb->bdev) == -EACCES &&
-+	    opt_get(*opts, read_only)) {
-+		sb->mode &= ~FMODE_WRITE;
-+
-+		sb->bdev = blkdev_get_by_path(path, sb->mode, sb);
-+		if (!IS_ERR(sb->bdev))
-+			opt_set(*opts, nochanges, true);
-+	}
-+
-+	if (IS_ERR(sb->bdev)) {
-+		ret = PTR_ERR(sb->bdev);
-+		goto out;
-+	}
-+
-+	err = "cannot allocate memory";
-+	ret = bch2_sb_realloc(sb, 0);
-+	if (ret)
-+		goto err;
-+
-+	ret = -EFAULT;
-+	err = "dynamic fault";
-+	if (bch2_fs_init_fault("read_super"))
-+		goto err;
-+
-+	ret = -EINVAL;
-+	err = read_one_super(sb, offset);
-+	if (!err)
-+		goto got_super;
-+
-+	if (opt_defined(*opts, sb))
-+		goto err;
-+
-+	pr_err("error reading default superblock: %s", err);
-+
-+	/*
-+	 * Error reading primary superblock - read location of backup
-+	 * superblocks:
-+	 */
-+	bio_reset(sb->bio);
-+	bio_set_dev(sb->bio, sb->bdev);
-+	sb->bio->bi_iter.bi_sector = BCH_SB_LAYOUT_SECTOR;
-+	bio_set_op_attrs(sb->bio, REQ_OP_READ, REQ_SYNC|REQ_META);
-+	/*
-+	 * use sb buffer to read layout, since sb buffer is page aligned but
-+	 * layout won't be:
-+	 */
-+	bch2_bio_map(sb->bio, sb->sb, sizeof(struct bch_sb_layout));
-+
-+	err = "IO error";
-+	if (submit_bio_wait(sb->bio))
-+		goto err;
-+
-+	memcpy(&layout, sb->sb, sizeof(layout));
-+	err = validate_sb_layout(&layout);
-+	if (err)
-+		goto err;
-+
-+	for (i = layout.sb_offset;
-+	     i < layout.sb_offset + layout.nr_superblocks; i++) {
-+		offset = le64_to_cpu(*i);
-+
-+		if (offset == opt_get(*opts, sb))
-+			continue;
-+
-+		err = read_one_super(sb, offset);
-+		if (!err)
-+			goto got_super;
-+	}
-+
-+	ret = -EINVAL;
-+	goto err;
-+
-+got_super:
-+	err = "Superblock block size smaller than device block size";
-+	ret = -EINVAL;
-+	if (le16_to_cpu(sb->sb->block_size) << 9 <
-+	    bdev_logical_block_size(sb->bdev))
-+		goto err;
-+
-+	if (sb->mode & FMODE_WRITE)
-+		bdev_get_queue(sb->bdev)->backing_dev_info->capabilities
-+			|= BDI_CAP_STABLE_WRITES;
-+	ret = 0;
-+	sb->have_layout = true;
-+out:
-+	pr_verbose_init(*opts, "ret %i", ret);
-+	return ret;
-+err:
-+	bch2_free_super(sb);
-+	pr_err("error reading superblock: %s", err);
-+	goto out;
-+}
-+
-+/* write superblock: */
-+
-+static void write_super_endio(struct bio *bio)
-+{
-+	struct bch_dev *ca = bio->bi_private;
-+
-+	/* XXX: return errors directly */
-+
-+	if (bch2_dev_io_err_on(bio->bi_status, ca, "superblock write: %s",
-+			       blk_status_to_str(bio->bi_status)))
-+		ca->sb_write_error = 1;
-+
-+	closure_put(&ca->fs->sb_write);
-+	percpu_ref_put(&ca->io_ref);
-+}
-+
-+static void read_back_super(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	struct bch_sb *sb = ca->disk_sb.sb;
-+	struct bio *bio = ca->disk_sb.bio;
-+
-+	bio_reset(bio);
-+	bio_set_dev(bio, ca->disk_sb.bdev);
-+	bio->bi_iter.bi_sector	= le64_to_cpu(sb->layout.sb_offset[0]);
-+	bio->bi_end_io		= write_super_endio;
-+	bio->bi_private		= ca;
-+	bio_set_op_attrs(bio, REQ_OP_READ, REQ_SYNC|REQ_META);
-+	bch2_bio_map(bio, ca->sb_read_scratch, PAGE_SIZE);
-+
-+	this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_SB],
-+		     bio_sectors(bio));
-+
-+	percpu_ref_get(&ca->io_ref);
-+	closure_bio_submit(bio, &c->sb_write);
-+}
-+
-+static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx)
-+{
-+	struct bch_sb *sb = ca->disk_sb.sb;
-+	struct bio *bio = ca->disk_sb.bio;
-+
-+	sb->offset = sb->layout.sb_offset[idx];
-+
-+	SET_BCH_SB_CSUM_TYPE(sb, c->opts.metadata_checksum);
-+	sb->csum = csum_vstruct(c, BCH_SB_CSUM_TYPE(sb),
-+				null_nonce(), sb);
-+
-+	bio_reset(bio);
-+	bio_set_dev(bio, ca->disk_sb.bdev);
-+	bio->bi_iter.bi_sector	= le64_to_cpu(sb->offset);
-+	bio->bi_end_io		= write_super_endio;
-+	bio->bi_private		= ca;
-+	bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC|REQ_META);
-+	bch2_bio_map(bio, sb,
-+		     roundup((size_t) vstruct_bytes(sb),
-+			     bdev_logical_block_size(ca->disk_sb.bdev)));
-+
-+	this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_SB],
-+		     bio_sectors(bio));
-+
-+	percpu_ref_get(&ca->io_ref);
-+	closure_bio_submit(bio, &c->sb_write);
-+}
-+
-+int bch2_write_super(struct bch_fs *c)
-+{
-+	struct closure *cl = &c->sb_write;
-+	struct bch_dev *ca;
-+	unsigned i, sb = 0, nr_wrote;
-+	const char *err;
-+	struct bch_devs_mask sb_written;
-+	bool wrote, can_mount_without_written, can_mount_with_written;
-+	int ret = 0;
-+
-+	lockdep_assert_held(&c->sb_lock);
-+
-+	closure_init_stack(cl);
-+	memset(&sb_written, 0, sizeof(sb_written));
-+
-+	le64_add_cpu(&c->disk_sb.sb->seq, 1);
-+
-+	if (test_bit(BCH_FS_ERROR, &c->flags))
-+		SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 1);
-+
-+	for_each_online_member(ca, c, i)
-+		bch2_sb_from_fs(c, ca);
-+
-+	for_each_online_member(ca, c, i) {
-+		err = bch2_sb_validate(&ca->disk_sb);
-+		if (err) {
-+			bch2_fs_inconsistent(c, "sb invalid before write: %s", err);
-+			ret = -1;
-+			goto out;
-+		}
-+	}
-+
-+	if (c->opts.nochanges)
-+		goto out;
-+
-+	for_each_online_member(ca, c, i) {
-+		__set_bit(ca->dev_idx, sb_written.d);
-+		ca->sb_write_error = 0;
-+	}
-+
-+	for_each_online_member(ca, c, i)
-+		read_back_super(c, ca);
-+	closure_sync(cl);
-+
-+	for_each_online_member(ca, c, i) {
-+		if (!ca->sb_write_error &&
-+		    ca->disk_sb.seq !=
-+		    le64_to_cpu(ca->sb_read_scratch->seq)) {
-+			bch2_fs_fatal_error(c,
-+				"Superblock modified by another process");
-+			percpu_ref_put(&ca->io_ref);
-+			ret = -EROFS;
-+			goto out;
-+		}
-+	}
-+
-+	do {
-+		wrote = false;
-+		for_each_online_member(ca, c, i)
-+			if (!ca->sb_write_error &&
-+			    sb < ca->disk_sb.sb->layout.nr_superblocks) {
-+				write_one_super(c, ca, sb);
-+				wrote = true;
-+			}
-+		closure_sync(cl);
-+		sb++;
-+	} while (wrote);
-+
-+	for_each_online_member(ca, c, i) {
-+		if (ca->sb_write_error)
-+			__clear_bit(ca->dev_idx, sb_written.d);
-+		else
-+			ca->disk_sb.seq = le64_to_cpu(ca->disk_sb.sb->seq);
-+	}
-+
-+	nr_wrote = dev_mask_nr(&sb_written);
-+
-+	can_mount_with_written =
-+		bch2_have_enough_devs(__bch2_replicas_status(c, sb_written),
-+				      BCH_FORCE_IF_DEGRADED);
-+
-+	for (i = 0; i < ARRAY_SIZE(sb_written.d); i++)
-+		sb_written.d[i] = ~sb_written.d[i];
-+
-+	can_mount_without_written =
-+		bch2_have_enough_devs(__bch2_replicas_status(c, sb_written),
-+				      BCH_FORCE_IF_DEGRADED);
-+
-+	/*
-+	 * If we would be able to mount _without_ the devices we successfully
-+	 * wrote superblocks to, we weren't able to write to enough devices:
-+	 *
-+	 * Exception: if we can mount without the successes because we haven't
-+	 * written anything (new filesystem), we continue if we'd be able to
-+	 * mount with the devices we did successfully write to:
-+	 */
-+	if (bch2_fs_fatal_err_on(!nr_wrote ||
-+				 (can_mount_without_written &&
-+				  !can_mount_with_written), c,
-+		"Unable to write superblock to sufficient devices"))
-+		ret = -1;
-+out:
-+	/* Make new options visible after they're persistent: */
-+	bch2_sb_update(c);
-+	return ret;
-+}
-+
-+void __bch2_check_set_feature(struct bch_fs *c, unsigned feat)
-+{
-+	mutex_lock(&c->sb_lock);
-+	if (!(c->sb.features & (1ULL << feat))) {
-+		c->disk_sb.sb->features[0] |= cpu_to_le64(1ULL << feat);
-+
-+		bch2_write_super(c);
-+	}
-+	mutex_unlock(&c->sb_lock);
-+}
-+
-+/* BCH_SB_FIELD_journal: */
-+
-+static int u64_cmp(const void *_l, const void *_r)
-+{
-+	u64 l = *((const u64 *) _l), r = *((const u64 *) _r);
-+
-+	return l < r ? -1 : l > r ? 1 : 0;
-+}
-+
-+static const char *bch2_sb_validate_journal(struct bch_sb *sb,
-+					    struct bch_sb_field *f)
-+{
-+	struct bch_sb_field_journal *journal = field_to_type(f, journal);
-+	struct bch_member *m = bch2_sb_get_members(sb)->members + sb->dev_idx;
-+	const char *err;
-+	unsigned nr;
-+	unsigned i;
-+	u64 *b;
-+
-+	journal = bch2_sb_get_journal(sb);
-+	if (!journal)
-+		return NULL;
-+
-+	nr = bch2_nr_journal_buckets(journal);
-+	if (!nr)
-+		return NULL;
-+
-+	b = kmalloc_array(sizeof(u64), nr, GFP_KERNEL);
-+	if (!b)
-+		return "cannot allocate memory";
-+
-+	for (i = 0; i < nr; i++)
-+		b[i] = le64_to_cpu(journal->buckets[i]);
-+
-+	sort(b, nr, sizeof(u64), u64_cmp, NULL);
-+
-+	err = "journal bucket at sector 0";
-+	if (!b[0])
-+		goto err;
-+
-+	err = "journal bucket before first bucket";
-+	if (m && b[0] < le16_to_cpu(m->first_bucket))
-+		goto err;
-+
-+	err = "journal bucket past end of device";
-+	if (m && b[nr - 1] >= le64_to_cpu(m->nbuckets))
-+		goto err;
-+
-+	err = "duplicate journal buckets";
-+	for (i = 0; i + 1 < nr; i++)
-+		if (b[i] == b[i + 1])
-+			goto err;
-+
-+	err = NULL;
-+err:
-+	kfree(b);
-+	return err;
-+}
-+
-+static const struct bch_sb_field_ops bch_sb_field_ops_journal = {
-+	.validate	= bch2_sb_validate_journal,
-+};
-+
-+/* BCH_SB_FIELD_members: */
-+
-+static const char *bch2_sb_validate_members(struct bch_sb *sb,
-+					    struct bch_sb_field *f)
-+{
-+	struct bch_sb_field_members *mi = field_to_type(f, members);
-+	struct bch_member *m;
-+
-+	if ((void *) (mi->members + sb->nr_devices) >
-+	    vstruct_end(&mi->field))
-+		return "Invalid superblock: bad member info";
-+
-+	for (m = mi->members;
-+	     m < mi->members + sb->nr_devices;
-+	     m++) {
-+		if (!bch2_member_exists(m))
-+			continue;
-+
-+		if (le64_to_cpu(m->nbuckets) > LONG_MAX)
-+			return "Too many buckets";
-+
-+		if (le64_to_cpu(m->nbuckets) -
-+		    le16_to_cpu(m->first_bucket) < BCH_MIN_NR_NBUCKETS)
-+			return "Not enough buckets";
-+
-+		if (le16_to_cpu(m->bucket_size) <
-+		    le16_to_cpu(sb->block_size))
-+			return "bucket size smaller than block size";
-+
-+		if (le16_to_cpu(m->bucket_size) <
-+		    BCH_SB_BTREE_NODE_SIZE(sb))
-+			return "bucket size smaller than btree node size";
-+	}
-+
-+	return NULL;
-+}
-+
-+static const struct bch_sb_field_ops bch_sb_field_ops_members = {
-+	.validate	= bch2_sb_validate_members,
-+};
-+
-+/* BCH_SB_FIELD_crypt: */
-+
-+static const char *bch2_sb_validate_crypt(struct bch_sb *sb,
-+					  struct bch_sb_field *f)
-+{
-+	struct bch_sb_field_crypt *crypt = field_to_type(f, crypt);
-+
-+	if (vstruct_bytes(&crypt->field) != sizeof(*crypt))
-+		return "invalid field crypt: wrong size";
-+
-+	if (BCH_CRYPT_KDF_TYPE(crypt))
-+		return "invalid field crypt: bad kdf type";
-+
-+	return NULL;
-+}
-+
-+static const struct bch_sb_field_ops bch_sb_field_ops_crypt = {
-+	.validate	= bch2_sb_validate_crypt,
-+};
-+
-+/* BCH_SB_FIELD_clean: */
-+
-+void bch2_sb_clean_renumber(struct bch_sb_field_clean *clean, int write)
-+{
-+	struct jset_entry *entry;
-+
-+	for (entry = clean->start;
-+	     entry < (struct jset_entry *) vstruct_end(&clean->field);
-+	     entry = vstruct_next(entry))
-+		bch2_bkey_renumber(BKEY_TYPE_BTREE, bkey_to_packed(entry->start), write);
-+}
-+
-+int bch2_fs_mark_dirty(struct bch_fs *c)
-+{
-+	int ret;
-+
-+	/*
-+	 * Unconditionally write superblock, to verify it hasn't changed before
-+	 * we go rw:
-+	 */
-+
-+	mutex_lock(&c->sb_lock);
-+	SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
-+	c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_new_extent_overwrite;
-+	c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_extents_above_btree_updates;
-+	c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_btree_updates_journalled;
-+	ret = bch2_write_super(c);
-+	mutex_unlock(&c->sb_lock);
-+
-+	return ret;
-+}
-+
-+static void
-+entry_init_u64s(struct jset_entry *entry, unsigned u64s)
-+{
-+	memset(entry, 0, u64s * sizeof(u64));
-+
-+	/*
-+	 * The u64s field counts from the start of data, ignoring the shared
-+	 * fields.
-+	 */
-+	entry->u64s = u64s - 1;
-+}
-+
-+static void
-+entry_init_size(struct jset_entry *entry, size_t size)
-+{
-+	unsigned u64s = DIV_ROUND_UP(size, sizeof(u64));
-+	entry_init_u64s(entry, u64s);
-+}
-+
-+struct jset_entry *
-+bch2_journal_super_entries_add_common(struct bch_fs *c,
-+				      struct jset_entry *entry,
-+				      u64 journal_seq)
-+{
-+	unsigned i;
-+
-+	percpu_down_write(&c->mark_lock);
-+
-+	if (!journal_seq) {
-+		bch2_fs_usage_acc_to_base(c, 0);
-+		bch2_fs_usage_acc_to_base(c, 1);
-+	} else {
-+		bch2_fs_usage_acc_to_base(c, journal_seq & 1);
-+	}
-+
-+	{
-+		struct jset_entry_usage *u =
-+			container_of(entry, struct jset_entry_usage, entry);
-+
-+		entry_init_size(entry, sizeof(*u));
-+		u->entry.type	= BCH_JSET_ENTRY_usage;
-+		u->entry.btree_id = FS_USAGE_INODES;
-+		u->v		= cpu_to_le64(c->usage_base->nr_inodes);
-+
-+		entry = vstruct_next(entry);
-+	}
-+
-+	{
-+		struct jset_entry_usage *u =
-+			container_of(entry, struct jset_entry_usage, entry);
-+
-+		entry_init_size(entry, sizeof(*u));
-+		u->entry.type	= BCH_JSET_ENTRY_usage;
-+		u->entry.btree_id = FS_USAGE_KEY_VERSION;
-+		u->v		= cpu_to_le64(atomic64_read(&c->key_version));
-+
-+		entry = vstruct_next(entry);
-+	}
-+
-+	for (i = 0; i < BCH_REPLICAS_MAX; i++) {
-+		struct jset_entry_usage *u =
-+			container_of(entry, struct jset_entry_usage, entry);
-+
-+		entry_init_size(entry, sizeof(*u));
-+		u->entry.type	= BCH_JSET_ENTRY_usage;
-+		u->entry.btree_id = FS_USAGE_RESERVED;
-+		u->entry.level	= i;
-+		u->v		= cpu_to_le64(c->usage_base->persistent_reserved[i]);
-+
-+		entry = vstruct_next(entry);
-+	}
-+
-+	for (i = 0; i < c->replicas.nr; i++) {
-+		struct bch_replicas_entry *e =
-+			cpu_replicas_entry(&c->replicas, i);
-+		struct jset_entry_data_usage *u =
-+			container_of(entry, struct jset_entry_data_usage, entry);
-+
-+		entry_init_size(entry, sizeof(*u) + e->nr_devs);
-+		u->entry.type	= BCH_JSET_ENTRY_data_usage;
-+		u->v		= cpu_to_le64(c->usage_base->replicas[i]);
-+		memcpy(&u->r, e, replicas_entry_bytes(e));
-+
-+		entry = vstruct_next(entry);
-+	}
-+
-+	percpu_up_write(&c->mark_lock);
-+
-+	return entry;
-+}
-+
-+void bch2_fs_mark_clean(struct bch_fs *c)
-+{
-+	struct bch_sb_field_clean *sb_clean;
-+	struct jset_entry *entry;
-+	unsigned u64s;
-+
-+	mutex_lock(&c->sb_lock);
-+	if (BCH_SB_CLEAN(c->disk_sb.sb))
-+		goto out;
-+
-+	SET_BCH_SB_CLEAN(c->disk_sb.sb, true);
-+
-+	c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_ALLOC_INFO;
-+	c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_ALLOC_METADATA;
-+	c->disk_sb.sb->features[0] &= ~(1ULL << BCH_FEATURE_extents_above_btree_updates);
-+	c->disk_sb.sb->features[0] &= ~(1ULL << BCH_FEATURE_btree_updates_journalled);
-+
-+	u64s = sizeof(*sb_clean) / sizeof(u64) + c->journal.entry_u64s_reserved;
-+
-+	sb_clean = bch2_sb_resize_clean(&c->disk_sb, u64s);
-+	if (!sb_clean) {
-+		bch_err(c, "error resizing superblock while setting filesystem clean");
-+		goto out;
-+	}
-+
-+	sb_clean->flags		= 0;
-+	sb_clean->read_clock	= cpu_to_le16(c->bucket_clock[READ].hand);
-+	sb_clean->write_clock	= cpu_to_le16(c->bucket_clock[WRITE].hand);
-+	sb_clean->journal_seq	= cpu_to_le64(journal_cur_seq(&c->journal) - 1);
-+
-+	/* Trying to catch outstanding bug: */
-+	BUG_ON(le64_to_cpu(sb_clean->journal_seq) > S64_MAX);
-+
-+	entry = sb_clean->start;
-+	entry = bch2_journal_super_entries_add_common(c, entry, 0);
-+	entry = bch2_btree_roots_to_journal_entries(c, entry, entry);
-+	BUG_ON((void *) entry > vstruct_end(&sb_clean->field));
-+
-+	memset(entry, 0,
-+	       vstruct_end(&sb_clean->field) - (void *) entry);
-+
-+	if (le16_to_cpu(c->disk_sb.sb->version) <
-+	    bcachefs_metadata_version_bkey_renumber)
-+		bch2_sb_clean_renumber(sb_clean, WRITE);
-+
-+	bch2_write_super(c);
-+out:
-+	mutex_unlock(&c->sb_lock);
-+}
-+
-+static const char *bch2_sb_validate_clean(struct bch_sb *sb,
-+					  struct bch_sb_field *f)
-+{
-+	struct bch_sb_field_clean *clean = field_to_type(f, clean);
-+
-+	if (vstruct_bytes(&clean->field) < sizeof(*clean))
-+		return "invalid field crypt: wrong size";
-+
-+	return NULL;
-+}
-+
-+static const struct bch_sb_field_ops bch_sb_field_ops_clean = {
-+	.validate	= bch2_sb_validate_clean,
-+};
-+
-+static const struct bch_sb_field_ops *bch2_sb_field_ops[] = {
-+#define x(f, nr)					\
-+	[BCH_SB_FIELD_##f] = &bch_sb_field_ops_##f,
-+	BCH_SB_FIELDS()
-+#undef x
-+};
-+
-+static const char *bch2_sb_field_validate(struct bch_sb *sb,
-+					  struct bch_sb_field *f)
-+{
-+	unsigned type = le32_to_cpu(f->type);
-+
-+	return type < BCH_SB_FIELD_NR
-+		? bch2_sb_field_ops[type]->validate(sb, f)
-+		: NULL;
-+}
-+
-+void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb,
-+			   struct bch_sb_field *f)
-+{
-+	unsigned type = le32_to_cpu(f->type);
-+	const struct bch_sb_field_ops *ops = type < BCH_SB_FIELD_NR
-+		? bch2_sb_field_ops[type] : NULL;
-+
-+	if (ops)
-+		pr_buf(out, "%s", bch2_sb_fields[type]);
-+	else
-+		pr_buf(out, "(unknown field %u)", type);
-+
-+	pr_buf(out, " (size %llu):", vstruct_bytes(f));
-+
-+	if (ops && ops->to_text)
-+		bch2_sb_field_ops[type]->to_text(out, sb, f);
-+}
-diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h
-new file mode 100644
-index 000000000000..7a068158efca
---- /dev/null
-+++ b/fs/bcachefs/super-io.h
-@@ -0,0 +1,137 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_SUPER_IO_H
-+#define _BCACHEFS_SUPER_IO_H
-+
-+#include "extents.h"
-+#include "eytzinger.h"
-+#include "super_types.h"
-+#include "super.h"
-+
-+#include <asm/byteorder.h>
-+
-+struct bch_sb_field *bch2_sb_field_get(struct bch_sb *, enum bch_sb_field_type);
-+struct bch_sb_field *bch2_sb_field_resize(struct bch_sb_handle *,
-+					  enum bch_sb_field_type, unsigned);
-+void bch2_sb_field_delete(struct bch_sb_handle *, enum bch_sb_field_type);
-+
-+#define field_to_type(_f, _name)					\
-+	container_of_or_null(_f, struct bch_sb_field_##_name, field)
-+
-+#define x(_name, _nr)							\
-+static inline struct bch_sb_field_##_name *				\
-+bch2_sb_get_##_name(struct bch_sb *sb)					\
-+{									\
-+	return field_to_type(bch2_sb_field_get(sb,			\
-+				BCH_SB_FIELD_##_name), _name);		\
-+}									\
-+									\
-+static inline struct bch_sb_field_##_name *				\
-+bch2_sb_resize_##_name(struct bch_sb_handle *sb, unsigned u64s)	\
-+{									\
-+	return field_to_type(bch2_sb_field_resize(sb,			\
-+				BCH_SB_FIELD_##_name, u64s), _name);	\
-+}
-+
-+BCH_SB_FIELDS()
-+#undef x
-+
-+extern const char * const bch2_sb_fields[];
-+
-+struct bch_sb_field_ops {
-+	const char *	(*validate)(struct bch_sb *, struct bch_sb_field *);
-+	void		(*to_text)(struct printbuf *, struct bch_sb *,
-+				   struct bch_sb_field *);
-+};
-+
-+static inline __le64 bch2_sb_magic(struct bch_fs *c)
-+{
-+	__le64 ret;
-+	memcpy(&ret, &c->sb.uuid, sizeof(ret));
-+	return ret;
-+}
-+
-+static inline __u64 jset_magic(struct bch_fs *c)
-+{
-+	return __le64_to_cpu(bch2_sb_magic(c) ^ JSET_MAGIC);
-+}
-+
-+static inline __u64 bset_magic(struct bch_fs *c)
-+{
-+	return __le64_to_cpu(bch2_sb_magic(c) ^ BSET_MAGIC);
-+}
-+
-+int bch2_sb_to_fs(struct bch_fs *, struct bch_sb *);
-+int bch2_sb_from_fs(struct bch_fs *, struct bch_dev *);
-+
-+void bch2_free_super(struct bch_sb_handle *);
-+int bch2_sb_realloc(struct bch_sb_handle *, unsigned);
-+
-+const char *bch2_sb_validate(struct bch_sb_handle *);
-+
-+int bch2_read_super(const char *, struct bch_opts *, struct bch_sb_handle *);
-+int bch2_write_super(struct bch_fs *);
-+void __bch2_check_set_feature(struct bch_fs *, unsigned);
-+
-+static inline void bch2_check_set_feature(struct bch_fs *c, unsigned feat)
-+{
-+	if (!(c->sb.features & (1ULL << feat)))
-+		__bch2_check_set_feature(c, feat);
-+}
-+
-+/* BCH_SB_FIELD_journal: */
-+
-+static inline unsigned bch2_nr_journal_buckets(struct bch_sb_field_journal *j)
-+{
-+	return j
-+		? (__le64 *) vstruct_end(&j->field) - j->buckets
-+		: 0;
-+}
-+
-+/* BCH_SB_FIELD_members: */
-+
-+static inline bool bch2_member_exists(struct bch_member *m)
-+{
-+	return !bch2_is_zero(m->uuid.b, sizeof(uuid_le));
-+}
-+
-+static inline bool bch2_dev_exists(struct bch_sb *sb,
-+				   struct bch_sb_field_members *mi,
-+				   unsigned dev)
-+{
-+	return dev < sb->nr_devices &&
-+		bch2_member_exists(&mi->members[dev]);
-+}
-+
-+static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi)
-+{
-+	return (struct bch_member_cpu) {
-+		.nbuckets	= le64_to_cpu(mi->nbuckets),
-+		.first_bucket	= le16_to_cpu(mi->first_bucket),
-+		.bucket_size	= le16_to_cpu(mi->bucket_size),
-+		.group		= BCH_MEMBER_GROUP(mi),
-+		.state		= BCH_MEMBER_STATE(mi),
-+		.replacement	= BCH_MEMBER_REPLACEMENT(mi),
-+		.discard	= BCH_MEMBER_DISCARD(mi),
-+		.data_allowed	= BCH_MEMBER_DATA_ALLOWED(mi),
-+		.durability	= BCH_MEMBER_DURABILITY(mi)
-+			? BCH_MEMBER_DURABILITY(mi) - 1
-+			: 1,
-+		.valid		= !bch2_is_zero(mi->uuid.b, sizeof(uuid_le)),
-+	};
-+}
-+
-+/* BCH_SB_FIELD_clean: */
-+
-+struct jset_entry *
-+bch2_journal_super_entries_add_common(struct bch_fs *,
-+				      struct jset_entry *, u64);
-+
-+void bch2_sb_clean_renumber(struct bch_sb_field_clean *, int);
-+
-+int bch2_fs_mark_dirty(struct bch_fs *);
-+void bch2_fs_mark_clean(struct bch_fs *);
-+
-+void bch2_sb_field_to_text(struct printbuf *, struct bch_sb *,
-+			   struct bch_sb_field *);
-+
-+#endif /* _BCACHEFS_SUPER_IO_H */
-diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
-new file mode 100644
-index 000000000000..0cdf285e4ffd
---- /dev/null
-+++ b/fs/bcachefs/super.c
-@@ -0,0 +1,2046 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * bcachefs setup/teardown code, and some metadata io - read a superblock and
-+ * figure out what to do with it.
-+ *
-+ * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
-+ * Copyright 2012 Google, Inc.
-+ */
-+
-+#include "bcachefs.h"
-+#include "alloc_background.h"
-+#include "alloc_foreground.h"
-+#include "bkey_sort.h"
-+#include "btree_cache.h"
-+#include "btree_gc.h"
-+#include "btree_key_cache.h"
-+#include "btree_update_interior.h"
-+#include "btree_io.h"
-+#include "chardev.h"
-+#include "checksum.h"
-+#include "clock.h"
-+#include "compress.h"
-+#include "debug.h"
-+#include "disk_groups.h"
-+#include "ec.h"
-+#include "error.h"
-+#include "fs.h"
-+#include "fs-io.h"
-+#include "fsck.h"
-+#include "inode.h"
-+#include "io.h"
-+#include "journal.h"
-+#include "journal_reclaim.h"
-+#include "journal_seq_blacklist.h"
-+#include "move.h"
-+#include "migrate.h"
-+#include "movinggc.h"
-+#include "quota.h"
-+#include "rebalance.h"
-+#include "recovery.h"
-+#include "replicas.h"
-+#include "super.h"
-+#include "super-io.h"
-+#include "sysfs.h"
-+
-+#include <linux/backing-dev.h>
-+#include <linux/blkdev.h>
-+#include <linux/debugfs.h>
-+#include <linux/device.h>
-+#include <linux/genhd.h>
-+#include <linux/idr.h>
-+#include <linux/kthread.h>
-+#include <linux/module.h>
-+#include <linux/percpu.h>
-+#include <linux/random.h>
-+#include <linux/sysfs.h>
-+#include <crypto/hash.h>
-+
-+#include <trace/events/bcachefs.h>
-+
-+MODULE_LICENSE("GPL");
-+MODULE_AUTHOR("Kent Overstreet <kent.overstreet@gmail.com>");
-+
-+#define KTYPE(type)							\
-+struct kobj_type type ## _ktype = {					\
-+	.release	= type ## _release,				\
-+	.sysfs_ops	= &type ## _sysfs_ops,				\
-+	.default_attrs	= type ## _files				\
-+}
-+
-+static void bch2_fs_release(struct kobject *);
-+static void bch2_dev_release(struct kobject *);
-+
-+static void bch2_fs_internal_release(struct kobject *k)
-+{
-+}
-+
-+static void bch2_fs_opts_dir_release(struct kobject *k)
-+{
-+}
-+
-+static void bch2_fs_time_stats_release(struct kobject *k)
-+{
-+}
-+
-+static KTYPE(bch2_fs);
-+static KTYPE(bch2_fs_internal);
-+static KTYPE(bch2_fs_opts_dir);
-+static KTYPE(bch2_fs_time_stats);
-+static KTYPE(bch2_dev);
-+
-+static struct kset *bcachefs_kset;
-+static LIST_HEAD(bch_fs_list);
-+static DEFINE_MUTEX(bch_fs_list_lock);
-+
-+static DECLARE_WAIT_QUEUE_HEAD(bch_read_only_wait);
-+
-+static void bch2_dev_free(struct bch_dev *);
-+static int bch2_dev_alloc(struct bch_fs *, unsigned);
-+static int bch2_dev_sysfs_online(struct bch_fs *, struct bch_dev *);
-+static void __bch2_dev_read_only(struct bch_fs *, struct bch_dev *);
-+
-+struct bch_fs *bch2_bdev_to_fs(struct block_device *bdev)
-+{
-+	struct bch_fs *c;
-+	struct bch_dev *ca;
-+	unsigned i;
-+
-+	mutex_lock(&bch_fs_list_lock);
-+	rcu_read_lock();
-+
-+	list_for_each_entry(c, &bch_fs_list, list)
-+		for_each_member_device_rcu(ca, c, i, NULL)
-+			if (ca->disk_sb.bdev == bdev) {
-+				closure_get(&c->cl);
-+				goto found;
-+			}
-+	c = NULL;
-+found:
-+	rcu_read_unlock();
-+	mutex_unlock(&bch_fs_list_lock);
-+
-+	return c;
-+}
-+
-+static struct bch_fs *__bch2_uuid_to_fs(uuid_le uuid)
-+{
-+	struct bch_fs *c;
-+
-+	lockdep_assert_held(&bch_fs_list_lock);
-+
-+	list_for_each_entry(c, &bch_fs_list, list)
-+		if (!memcmp(&c->disk_sb.sb->uuid, &uuid, sizeof(uuid_le)))
-+			return c;
-+
-+	return NULL;
-+}
-+
-+struct bch_fs *bch2_uuid_to_fs(uuid_le uuid)
-+{
-+	struct bch_fs *c;
-+
-+	mutex_lock(&bch_fs_list_lock);
-+	c = __bch2_uuid_to_fs(uuid);
-+	if (c)
-+		closure_get(&c->cl);
-+	mutex_unlock(&bch_fs_list_lock);
-+
-+	return c;
-+}
-+
-+int bch2_congested(void *data, int bdi_bits)
-+{
-+	struct bch_fs *c = data;
-+	struct backing_dev_info *bdi;
-+	struct bch_dev *ca;
-+	unsigned i;
-+	int ret = 0;
-+
-+	rcu_read_lock();
-+	if (bdi_bits & (1 << WB_sync_congested)) {
-+		/* Reads - check all devices: */
-+		for_each_readable_member(ca, c, i) {
-+			bdi = ca->disk_sb.bdev->bd_bdi;
-+
-+			if (bdi_congested(bdi, bdi_bits)) {
-+				ret = 1;
-+				break;
-+			}
-+		}
-+	} else {
-+		unsigned target = READ_ONCE(c->opts.foreground_target);
-+		const struct bch_devs_mask *devs = target
-+			? bch2_target_to_mask(c, target)
-+			: &c->rw_devs[BCH_DATA_USER];
-+
-+		for_each_member_device_rcu(ca, c, i, devs) {
-+			bdi = ca->disk_sb.bdev->bd_bdi;
-+
-+			if (bdi_congested(bdi, bdi_bits)) {
-+				ret = 1;
-+				break;
-+			}
-+		}
-+	}
-+	rcu_read_unlock();
-+
-+	return ret;
-+}
-+
-+/* Filesystem RO/RW: */
-+
-+/*
-+ * For startup/shutdown of RW stuff, the dependencies are:
-+ *
-+ * - foreground writes depend on copygc and rebalance (to free up space)
-+ *
-+ * - copygc and rebalance depend on mark and sweep gc (they actually probably
-+ *   don't because they either reserve ahead of time or don't block if
-+ *   allocations fail, but allocations can require mark and sweep gc to run
-+ *   because of generation number wraparound)
-+ *
-+ * - all of the above depends on the allocator threads
-+ *
-+ * - allocator depends on the journal (when it rewrites prios and gens)
-+ */
-+
-+static void __bch2_fs_read_only(struct bch_fs *c)
-+{
-+	struct bch_dev *ca;
-+	bool wrote = false;
-+	unsigned i, clean_passes = 0;
-+	int ret;
-+
-+	bch2_rebalance_stop(c);
-+
-+	for_each_member_device(ca, c, i)
-+		bch2_copygc_stop(ca);
-+
-+	bch2_gc_thread_stop(c);
-+
-+	/*
-+	 * Flush journal before stopping allocators, because flushing journal
-+	 * blacklist entries involves allocating new btree nodes:
-+	 */
-+	bch2_journal_flush_all_pins(&c->journal);
-+
-+	/*
-+	 * If the allocator threads didn't all start up, the btree updates to
-+	 * write out alloc info aren't going to work:
-+	 */
-+	if (!test_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags))
-+		goto nowrote_alloc;
-+
-+	bch_verbose(c, "writing alloc info");
-+	/*
-+	 * This should normally just be writing the bucket read/write clocks:
-+	 */
-+	ret = bch2_stripes_write(c, BTREE_INSERT_NOCHECK_RW, &wrote) ?:
-+		bch2_alloc_write(c, BTREE_INSERT_NOCHECK_RW, &wrote);
-+	bch_verbose(c, "writing alloc info complete");
-+
-+	if (ret && !test_bit(BCH_FS_EMERGENCY_RO, &c->flags))
-+		bch2_fs_inconsistent(c, "error writing out alloc info %i", ret);
-+
-+	if (ret)
-+		goto nowrote_alloc;
-+
-+	bch_verbose(c, "flushing journal and stopping allocators");
-+
-+	bch2_journal_flush_all_pins(&c->journal);
-+	set_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags);
-+
-+	do {
-+		clean_passes++;
-+
-+		if (bch2_journal_flush_all_pins(&c->journal))
-+			clean_passes = 0;
-+
-+		/*
-+		 * In flight interior btree updates will generate more journal
-+		 * updates and btree updates (alloc btree):
-+		 */
-+		if (bch2_btree_interior_updates_nr_pending(c)) {
-+			closure_wait_event(&c->btree_interior_update_wait,
-+					   !bch2_btree_interior_updates_nr_pending(c));
-+			clean_passes = 0;
-+		}
-+		flush_work(&c->btree_interior_update_work);
-+
-+		if (bch2_journal_flush_all_pins(&c->journal))
-+			clean_passes = 0;
-+	} while (clean_passes < 2);
-+	bch_verbose(c, "flushing journal and stopping allocators complete");
-+
-+	set_bit(BCH_FS_ALLOC_CLEAN, &c->flags);
-+nowrote_alloc:
-+	closure_wait_event(&c->btree_interior_update_wait,
-+			   !bch2_btree_interior_updates_nr_pending(c));
-+	flush_work(&c->btree_interior_update_work);
-+
-+	for_each_member_device(ca, c, i)
-+		bch2_dev_allocator_stop(ca);
-+
-+	clear_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags);
-+	clear_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags);
-+
-+	bch2_fs_journal_stop(&c->journal);
-+
-+	/*
-+	 * the journal kicks off btree writes via reclaim - wait for in flight
-+	 * writes after stopping journal:
-+	 */
-+	if (test_bit(BCH_FS_EMERGENCY_RO, &c->flags))
-+		bch2_btree_flush_all_writes(c);
-+	else
-+		bch2_btree_verify_flushed(c);
-+
-+	/*
-+	 * After stopping journal:
-+	 */
-+	for_each_member_device(ca, c, i)
-+		bch2_dev_allocator_remove(c, ca);
-+}
-+
-+static void bch2_writes_disabled(struct percpu_ref *writes)
-+{
-+	struct bch_fs *c = container_of(writes, struct bch_fs, writes);
-+
-+	set_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags);
-+	wake_up(&bch_read_only_wait);
-+}
-+
-+void bch2_fs_read_only(struct bch_fs *c)
-+{
-+	if (!test_bit(BCH_FS_RW, &c->flags)) {
-+		cancel_delayed_work_sync(&c->journal.reclaim_work);
-+		return;
-+	}
-+
-+	BUG_ON(test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags));
-+
-+	/*
-+	 * Block new foreground-end write operations from starting - any new
-+	 * writes will return -EROFS:
-+	 *
-+	 * (This is really blocking new _allocations_, writes to previously
-+	 * allocated space can still happen until stopping the allocator in
-+	 * bch2_dev_allocator_stop()).
-+	 */
-+	percpu_ref_kill(&c->writes);
-+
-+	cancel_work_sync(&c->ec_stripe_delete_work);
-+	cancel_delayed_work(&c->pd_controllers_update);
-+
-+	/*
-+	 * If we're not doing an emergency shutdown, we want to wait on
-+	 * outstanding writes to complete so they don't see spurious errors due
-+	 * to shutting down the allocator:
-+	 *
-+	 * If we are doing an emergency shutdown outstanding writes may
-+	 * hang until we shutdown the allocator so we don't want to wait
-+	 * on outstanding writes before shutting everything down - but
-+	 * we do need to wait on them before returning and signalling
-+	 * that going RO is complete:
-+	 */
-+	wait_event(bch_read_only_wait,
-+		   test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags) ||
-+		   test_bit(BCH_FS_EMERGENCY_RO, &c->flags));
-+
-+	__bch2_fs_read_only(c);
-+
-+	wait_event(bch_read_only_wait,
-+		   test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags));
-+
-+	clear_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags);
-+
-+	if (!bch2_journal_error(&c->journal) &&
-+	    !test_bit(BCH_FS_ERROR, &c->flags) &&
-+	    !test_bit(BCH_FS_EMERGENCY_RO, &c->flags) &&
-+	    test_bit(BCH_FS_STARTED, &c->flags) &&
-+	    test_bit(BCH_FS_ALLOC_CLEAN, &c->flags) &&
-+	    !c->opts.norecovery) {
-+		bch_verbose(c, "marking filesystem clean");
-+		bch2_fs_mark_clean(c);
-+	}
-+
-+	clear_bit(BCH_FS_RW, &c->flags);
-+}
-+
-+static void bch2_fs_read_only_work(struct work_struct *work)
-+{
-+	struct bch_fs *c =
-+		container_of(work, struct bch_fs, read_only_work);
-+
-+	down_write(&c->state_lock);
-+	bch2_fs_read_only(c);
-+	up_write(&c->state_lock);
-+}
-+
-+static void bch2_fs_read_only_async(struct bch_fs *c)
-+{
-+	queue_work(system_long_wq, &c->read_only_work);
-+}
-+
-+bool bch2_fs_emergency_read_only(struct bch_fs *c)
-+{
-+	bool ret = !test_and_set_bit(BCH_FS_EMERGENCY_RO, &c->flags);
-+
-+	bch2_fs_read_only_async(c);
-+	bch2_journal_halt(&c->journal);
-+
-+	wake_up(&bch_read_only_wait);
-+	return ret;
-+}
-+
-+static int bch2_fs_read_write_late(struct bch_fs *c)
-+{
-+	struct bch_dev *ca;
-+	unsigned i;
-+	int ret;
-+
-+	ret = bch2_gc_thread_start(c);
-+	if (ret) {
-+		bch_err(c, "error starting gc thread");
-+		return ret;
-+	}
-+
-+	for_each_rw_member(ca, c, i) {
-+		ret = bch2_copygc_start(c, ca);
-+		if (ret) {
-+			bch_err(c, "error starting copygc threads");
-+			percpu_ref_put(&ca->io_ref);
-+			return ret;
-+		}
-+	}
-+
-+	ret = bch2_rebalance_start(c);
-+	if (ret) {
-+		bch_err(c, "error starting rebalance thread");
-+		return ret;
-+	}
-+
-+	schedule_delayed_work(&c->pd_controllers_update, 5 * HZ);
-+
-+	schedule_work(&c->ec_stripe_delete_work);
-+
-+	return 0;
-+}
-+
-+static int __bch2_fs_read_write(struct bch_fs *c, bool early)
-+{
-+	struct bch_dev *ca;
-+	unsigned i;
-+	int ret;
-+
-+	if (test_bit(BCH_FS_RW, &c->flags))
-+		return 0;
-+
-+	/*
-+	 * nochanges is used for fsck -n mode - we have to allow going rw
-+	 * during recovery for that to work:
-+	 */
-+	if (c->opts.norecovery ||
-+	    (c->opts.nochanges &&
-+	     (!early || c->opts.read_only)))
-+		return -EROFS;
-+
-+	ret = bch2_fs_mark_dirty(c);
-+	if (ret)
-+		goto err;
-+
-+	clear_bit(BCH_FS_ALLOC_CLEAN, &c->flags);
-+
-+	for_each_rw_member(ca, c, i)
-+		bch2_dev_allocator_add(c, ca);
-+	bch2_recalc_capacity(c);
-+
-+	for_each_rw_member(ca, c, i) {
-+		ret = bch2_dev_allocator_start(ca);
-+		if (ret) {
-+			bch_err(c, "error starting allocator threads");
-+			percpu_ref_put(&ca->io_ref);
-+			goto err;
-+		}
-+	}
-+
-+	set_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags);
-+
-+	if (!early) {
-+		ret = bch2_fs_read_write_late(c);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	percpu_ref_reinit(&c->writes);
-+	set_bit(BCH_FS_RW, &c->flags);
-+
-+	queue_delayed_work(c->journal_reclaim_wq,
-+			   &c->journal.reclaim_work, 0);
-+	return 0;
-+err:
-+	__bch2_fs_read_only(c);
-+	return ret;
-+}
-+
-+int bch2_fs_read_write(struct bch_fs *c)
-+{
-+	return __bch2_fs_read_write(c, false);
-+}
-+
-+int bch2_fs_read_write_early(struct bch_fs *c)
-+{
-+	lockdep_assert_held(&c->state_lock);
-+
-+	return __bch2_fs_read_write(c, true);
-+}
-+
-+/* Filesystem startup/shutdown: */
-+
-+static void bch2_fs_free(struct bch_fs *c)
-+{
-+	unsigned i;
-+
-+	for (i = 0; i < BCH_TIME_STAT_NR; i++)
-+		bch2_time_stats_exit(&c->times[i]);
-+
-+	bch2_fs_quota_exit(c);
-+	bch2_fs_fsio_exit(c);
-+	bch2_fs_ec_exit(c);
-+	bch2_fs_encryption_exit(c);
-+	bch2_fs_io_exit(c);
-+	bch2_fs_btree_interior_update_exit(c);
-+	bch2_fs_btree_iter_exit(c);
-+	bch2_fs_btree_key_cache_exit(&c->btree_key_cache);
-+	bch2_fs_btree_cache_exit(c);
-+	bch2_fs_journal_exit(&c->journal);
-+	bch2_io_clock_exit(&c->io_clock[WRITE]);
-+	bch2_io_clock_exit(&c->io_clock[READ]);
-+	bch2_fs_compress_exit(c);
-+	bch2_journal_keys_free(&c->journal_keys);
-+	bch2_journal_entries_free(&c->journal_entries);
-+	percpu_free_rwsem(&c->mark_lock);
-+	kfree(c->usage_scratch);
-+	free_percpu(c->usage[1]);
-+	free_percpu(c->usage[0]);
-+	kfree(c->usage_base);
-+	free_percpu(c->pcpu);
-+	mempool_exit(&c->large_bkey_pool);
-+	mempool_exit(&c->btree_bounce_pool);
-+	bioset_exit(&c->btree_bio);
-+	mempool_exit(&c->fill_iter);
-+	percpu_ref_exit(&c->writes);
-+	kfree(c->replicas.entries);
-+	kfree(c->replicas_gc.entries);
-+	kfree(rcu_dereference_protected(c->disk_groups, 1));
-+	kfree(c->journal_seq_blacklist_table);
-+
-+	if (c->journal_reclaim_wq)
-+		destroy_workqueue(c->journal_reclaim_wq);
-+	if (c->copygc_wq)
-+		destroy_workqueue(c->copygc_wq);
-+	if (c->wq)
-+		destroy_workqueue(c->wq);
-+
-+	free_pages((unsigned long) c->disk_sb.sb,
-+		   c->disk_sb.page_order);
-+	kvpfree(c, sizeof(*c));
-+	module_put(THIS_MODULE);
-+}
-+
-+static void bch2_fs_release(struct kobject *kobj)
-+{
-+	struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
-+
-+	bch2_fs_free(c);
-+}
-+
-+void bch2_fs_stop(struct bch_fs *c)
-+{
-+	struct bch_dev *ca;
-+	unsigned i;
-+
-+	bch_verbose(c, "shutting down");
-+
-+	set_bit(BCH_FS_STOPPING, &c->flags);
-+
-+	cancel_work_sync(&c->journal_seq_blacklist_gc_work);
-+
-+	down_write(&c->state_lock);
-+	bch2_fs_read_only(c);
-+	up_write(&c->state_lock);
-+
-+	for_each_member_device(ca, c, i)
-+		if (ca->kobj.state_in_sysfs &&
-+		    ca->disk_sb.bdev)
-+			sysfs_remove_link(&part_to_dev(ca->disk_sb.bdev->bd_part)->kobj,
-+					  "bcachefs");
-+
-+	if (c->kobj.state_in_sysfs)
-+		kobject_del(&c->kobj);
-+
-+	bch2_fs_debug_exit(c);
-+	bch2_fs_chardev_exit(c);
-+
-+	kobject_put(&c->time_stats);
-+	kobject_put(&c->opts_dir);
-+	kobject_put(&c->internal);
-+
-+	mutex_lock(&bch_fs_list_lock);
-+	list_del(&c->list);
-+	mutex_unlock(&bch_fs_list_lock);
-+
-+	closure_sync(&c->cl);
-+	closure_debug_destroy(&c->cl);
-+
-+	/* btree prefetch might have kicked off reads in the background: */
-+	bch2_btree_flush_all_reads(c);
-+
-+	for_each_member_device(ca, c, i)
-+		cancel_work_sync(&ca->io_error_work);
-+
-+	cancel_work_sync(&c->btree_write_error_work);
-+	cancel_delayed_work_sync(&c->pd_controllers_update);
-+	cancel_work_sync(&c->read_only_work);
-+
-+	for (i = 0; i < c->sb.nr_devices; i++)
-+		if (c->devs[i])
-+			bch2_dev_free(rcu_dereference_protected(c->devs[i], 1));
-+
-+	bch_verbose(c, "shutdown complete");
-+
-+	kobject_put(&c->kobj);
-+}
-+
-+static const char *bch2_fs_online(struct bch_fs *c)
-+{
-+	struct bch_dev *ca;
-+	const char *err = NULL;
-+	unsigned i;
-+	int ret;
-+
-+	lockdep_assert_held(&bch_fs_list_lock);
-+
-+	if (!list_empty(&c->list))
-+		return NULL;
-+
-+	if (__bch2_uuid_to_fs(c->sb.uuid))
-+		return "filesystem UUID already open";
-+
-+	ret = bch2_fs_chardev_init(c);
-+	if (ret)
-+		return "error creating character device";
-+
-+	bch2_fs_debug_init(c);
-+
-+	if (kobject_add(&c->kobj, NULL, "%pU", c->sb.user_uuid.b) ||
-+	    kobject_add(&c->internal, &c->kobj, "internal") ||
-+	    kobject_add(&c->opts_dir, &c->kobj, "options") ||
-+	    kobject_add(&c->time_stats, &c->kobj, "time_stats") ||
-+	    bch2_opts_create_sysfs_files(&c->opts_dir))
-+		return "error creating sysfs objects";
-+
-+	down_write(&c->state_lock);
-+
-+	err = "error creating sysfs objects";
-+	__for_each_member_device(ca, c, i, NULL)
-+		if (bch2_dev_sysfs_online(c, ca))
-+			goto err;
-+
-+	list_add(&c->list, &bch_fs_list);
-+	err = NULL;
-+err:
-+	up_write(&c->state_lock);
-+	return err;
-+}
-+
-+static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
-+{
-+	struct bch_sb_field_members *mi;
-+	struct bch_fs *c;
-+	unsigned i, iter_size;
-+	const char *err;
-+
-+	pr_verbose_init(opts, "");
-+
-+	c = kvpmalloc(sizeof(struct bch_fs), GFP_KERNEL|__GFP_ZERO);
-+	if (!c)
-+		goto out;
-+
-+	__module_get(THIS_MODULE);
-+
-+	c->minor		= -1;
-+	c->disk_sb.fs_sb	= true;
-+
-+	init_rwsem(&c->state_lock);
-+	mutex_init(&c->sb_lock);
-+	mutex_init(&c->replicas_gc_lock);
-+	mutex_init(&c->btree_root_lock);
-+	INIT_WORK(&c->read_only_work, bch2_fs_read_only_work);
-+
-+	init_rwsem(&c->gc_lock);
-+
-+	for (i = 0; i < BCH_TIME_STAT_NR; i++)
-+		bch2_time_stats_init(&c->times[i]);
-+
-+	bch2_fs_btree_key_cache_init_early(&c->btree_key_cache);
-+	bch2_fs_allocator_background_init(c);
-+	bch2_fs_allocator_foreground_init(c);
-+	bch2_fs_rebalance_init(c);
-+	bch2_fs_quota_init(c);
-+
-+	INIT_LIST_HEAD(&c->list);
-+
-+	mutex_init(&c->usage_scratch_lock);
-+
-+	mutex_init(&c->bio_bounce_pages_lock);
-+
-+	bio_list_init(&c->btree_write_error_list);
-+	spin_lock_init(&c->btree_write_error_lock);
-+	INIT_WORK(&c->btree_write_error_work, bch2_btree_write_error_work);
-+
-+	INIT_WORK(&c->journal_seq_blacklist_gc_work,
-+		  bch2_blacklist_entries_gc);
-+
-+	INIT_LIST_HEAD(&c->journal_entries);
-+
-+	INIT_LIST_HEAD(&c->fsck_errors);
-+	mutex_init(&c->fsck_error_lock);
-+
-+	INIT_LIST_HEAD(&c->ec_new_stripe_list);
-+	mutex_init(&c->ec_new_stripe_lock);
-+	mutex_init(&c->ec_stripe_create_lock);
-+	spin_lock_init(&c->ec_stripes_heap_lock);
-+
-+	seqcount_init(&c->gc_pos_lock);
-+
-+	seqcount_init(&c->usage_lock);
-+
-+	sema_init(&c->io_in_flight, 64);
-+
-+	c->copy_gc_enabled		= 1;
-+	c->rebalance.enabled		= 1;
-+	c->promote_whole_extents	= true;
-+
-+	c->journal.write_time	= &c->times[BCH_TIME_journal_write];
-+	c->journal.delay_time	= &c->times[BCH_TIME_journal_delay];
-+	c->journal.blocked_time	= &c->times[BCH_TIME_blocked_journal];
-+	c->journal.flush_seq_time = &c->times[BCH_TIME_journal_flush_seq];
-+
-+	bch2_fs_btree_cache_init_early(&c->btree_cache);
-+
-+	if (percpu_init_rwsem(&c->mark_lock))
-+		goto err;
-+
-+	mutex_lock(&c->sb_lock);
-+
-+	if (bch2_sb_to_fs(c, sb)) {
-+		mutex_unlock(&c->sb_lock);
-+		goto err;
-+	}
-+
-+	mutex_unlock(&c->sb_lock);
-+
-+	scnprintf(c->name, sizeof(c->name), "%pU", &c->sb.user_uuid);
-+
-+	c->opts = bch2_opts_default;
-+	bch2_opts_apply(&c->opts, bch2_opts_from_sb(sb));
-+	bch2_opts_apply(&c->opts, opts);
-+
-+	c->block_bits		= ilog2(c->opts.block_size);
-+	c->btree_foreground_merge_threshold = BTREE_FOREGROUND_MERGE_THRESHOLD(c);
-+
-+	if (bch2_fs_init_fault("fs_alloc"))
-+		goto err;
-+
-+	iter_size = sizeof(struct sort_iter) +
-+		(btree_blocks(c) + 1) * 2 *
-+		sizeof(struct sort_iter_set);
-+
-+	if (!(c->wq = alloc_workqueue("bcachefs",
-+				WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
-+	    !(c->copygc_wq = alloc_workqueue("bcache_copygc",
-+				WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
-+	    !(c->journal_reclaim_wq = alloc_workqueue("bcache_journal",
-+				WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) ||
-+	    percpu_ref_init(&c->writes, bch2_writes_disabled,
-+			    PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
-+	    mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) ||
-+	    bioset_init(&c->btree_bio, 1,
-+			max(offsetof(struct btree_read_bio, bio),
-+			    offsetof(struct btree_write_bio, wbio.bio)),
-+			BIOSET_NEED_BVECS) ||
-+	    !(c->pcpu = alloc_percpu(struct bch_fs_pcpu)) ||
-+	    mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1,
-+					btree_bytes(c)) ||
-+	    mempool_init_kmalloc_pool(&c->large_bkey_pool, 1, 2048) ||
-+	    bch2_io_clock_init(&c->io_clock[READ]) ||
-+	    bch2_io_clock_init(&c->io_clock[WRITE]) ||
-+	    bch2_fs_journal_init(&c->journal) ||
-+	    bch2_fs_replicas_init(c) ||
-+	    bch2_fs_btree_cache_init(c) ||
-+	    bch2_fs_btree_key_cache_init(&c->btree_key_cache) ||
-+	    bch2_fs_btree_iter_init(c) ||
-+	    bch2_fs_btree_interior_update_init(c) ||
-+	    bch2_fs_io_init(c) ||
-+	    bch2_fs_encryption_init(c) ||
-+	    bch2_fs_compress_init(c) ||
-+	    bch2_fs_ec_init(c) ||
-+	    bch2_fs_fsio_init(c))
-+		goto err;
-+
-+	mi = bch2_sb_get_members(c->disk_sb.sb);
-+	for (i = 0; i < c->sb.nr_devices; i++)
-+		if (bch2_dev_exists(c->disk_sb.sb, mi, i) &&
-+		    bch2_dev_alloc(c, i))
-+			goto err;
-+
-+	/*
-+	 * Now that all allocations have succeeded, init various refcounty
-+	 * things that let us shutdown:
-+	 */
-+	closure_init(&c->cl, NULL);
-+
-+	c->kobj.kset = bcachefs_kset;
-+	kobject_init(&c->kobj, &bch2_fs_ktype);
-+	kobject_init(&c->internal, &bch2_fs_internal_ktype);
-+	kobject_init(&c->opts_dir, &bch2_fs_opts_dir_ktype);
-+	kobject_init(&c->time_stats, &bch2_fs_time_stats_ktype);
-+
-+	mutex_lock(&bch_fs_list_lock);
-+	err = bch2_fs_online(c);
-+	mutex_unlock(&bch_fs_list_lock);
-+	if (err) {
-+		bch_err(c, "bch2_fs_online() error: %s", err);
-+		goto err;
-+	}
-+out:
-+	pr_verbose_init(opts, "ret %i", c ? 0 : -ENOMEM);
-+	return c;
-+err:
-+	bch2_fs_free(c);
-+	c = NULL;
-+	goto out;
-+}
-+
-+noinline_for_stack
-+static void print_mount_opts(struct bch_fs *c)
-+{
-+	enum bch_opt_id i;
-+	char buf[512];
-+	struct printbuf p = PBUF(buf);
-+	bool first = true;
-+
-+	strcpy(buf, "(null)");
-+
-+	if (c->opts.read_only) {
-+		pr_buf(&p, "ro");
-+		first = false;
-+	}
-+
-+	for (i = 0; i < bch2_opts_nr; i++) {
-+		const struct bch_option *opt = &bch2_opt_table[i];
-+		u64 v = bch2_opt_get_by_id(&c->opts, i);
-+
-+		if (!(opt->mode & OPT_MOUNT))
-+			continue;
-+
-+		if (v == bch2_opt_get_by_id(&bch2_opts_default, i))
-+			continue;
-+
-+		if (!first)
-+			pr_buf(&p, ",");
-+		first = false;
-+		bch2_opt_to_text(&p, c, opt, v, OPT_SHOW_MOUNT_STYLE);
-+	}
-+
-+	bch_info(c, "mounted with opts: %s", buf);
-+}
-+
-+int bch2_fs_start(struct bch_fs *c)
-+{
-+	const char *err = "cannot allocate memory";
-+	struct bch_sb_field_members *mi;
-+	struct bch_dev *ca;
-+	time64_t now = ktime_get_real_seconds();
-+	unsigned i;
-+	int ret = -EINVAL;
-+
-+	down_write(&c->state_lock);
-+
-+	BUG_ON(test_bit(BCH_FS_STARTED, &c->flags));
-+
-+	mutex_lock(&c->sb_lock);
-+
-+	for_each_online_member(ca, c, i)
-+		bch2_sb_from_fs(c, ca);
-+
-+	mi = bch2_sb_get_members(c->disk_sb.sb);
-+	for_each_online_member(ca, c, i)
-+		mi->members[ca->dev_idx].last_mount = cpu_to_le64(now);
-+
-+	mutex_unlock(&c->sb_lock);
-+
-+	for_each_rw_member(ca, c, i)
-+		bch2_dev_allocator_add(c, ca);
-+	bch2_recalc_capacity(c);
-+
-+	ret = BCH_SB_INITIALIZED(c->disk_sb.sb)
-+		? bch2_fs_recovery(c)
-+		: bch2_fs_initialize(c);
-+	if (ret)
-+		goto err;
-+
-+	ret = bch2_opts_check_may_set(c);
-+	if (ret)
-+		goto err;
-+
-+	err = "dynamic fault";
-+	ret = -EINVAL;
-+	if (bch2_fs_init_fault("fs_start"))
-+		goto err;
-+
-+	set_bit(BCH_FS_STARTED, &c->flags);
-+
-+	if (c->opts.read_only || c->opts.nochanges) {
-+		bch2_fs_read_only(c);
-+	} else {
-+		err = "error going read write";
-+		ret = !test_bit(BCH_FS_RW, &c->flags)
-+			? bch2_fs_read_write(c)
-+			: bch2_fs_read_write_late(c);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	print_mount_opts(c);
-+	ret = 0;
-+out:
-+	up_write(&c->state_lock);
-+	return ret;
-+err:
-+	switch (ret) {
-+	case BCH_FSCK_ERRORS_NOT_FIXED:
-+		bch_err(c, "filesystem contains errors: please report this to the developers");
-+		pr_cont("mount with -o fix_errors to repair\n");
-+		err = "fsck error";
-+		break;
-+	case BCH_FSCK_REPAIR_UNIMPLEMENTED:
-+		bch_err(c, "filesystem contains errors: please report this to the developers");
-+		pr_cont("repair unimplemented: inform the developers so that it can be added\n");
-+		err = "fsck error";
-+		break;
-+	case BCH_FSCK_REPAIR_IMPOSSIBLE:
-+		bch_err(c, "filesystem contains errors, but repair impossible");
-+		err = "fsck error";
-+		break;
-+	case BCH_FSCK_UNKNOWN_VERSION:
-+		err = "unknown metadata version";;
-+		break;
-+	case -ENOMEM:
-+		err = "cannot allocate memory";
-+		break;
-+	case -EIO:
-+		err = "IO error";
-+		break;
-+	}
-+
-+	if (ret >= 0)
-+		ret = -EIO;
-+	goto out;
-+}
-+
-+static const char *bch2_dev_may_add(struct bch_sb *sb, struct bch_fs *c)
-+{
-+	struct bch_sb_field_members *sb_mi;
-+
-+	sb_mi = bch2_sb_get_members(sb);
-+	if (!sb_mi)
-+		return "Invalid superblock: member info area missing";
-+
-+	if (le16_to_cpu(sb->block_size) != c->opts.block_size)
-+		return "mismatched block size";
-+
-+	if (le16_to_cpu(sb_mi->members[sb->dev_idx].bucket_size) <
-+	    BCH_SB_BTREE_NODE_SIZE(c->disk_sb.sb))
-+		return "new cache bucket size is too small";
-+
-+	return NULL;
-+}
-+
-+static const char *bch2_dev_in_fs(struct bch_sb *fs, struct bch_sb *sb)
-+{
-+	struct bch_sb *newest =
-+		le64_to_cpu(fs->seq) > le64_to_cpu(sb->seq) ? fs : sb;
-+	struct bch_sb_field_members *mi = bch2_sb_get_members(newest);
-+
-+	if (uuid_le_cmp(fs->uuid, sb->uuid))
-+		return "device not a member of filesystem";
-+
-+	if (!bch2_dev_exists(newest, mi, sb->dev_idx))
-+		return "device has been removed";
-+
-+	if (fs->block_size != sb->block_size)
-+		return "mismatched block size";
-+
-+	return NULL;
-+}
-+
-+/* Device startup/shutdown: */
-+
-+static void bch2_dev_release(struct kobject *kobj)
-+{
-+	struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj);
-+
-+	kfree(ca);
-+}
-+
-+static void bch2_dev_free(struct bch_dev *ca)
-+{
-+	cancel_work_sync(&ca->io_error_work);
-+
-+	if (ca->kobj.state_in_sysfs &&
-+	    ca->disk_sb.bdev)
-+		sysfs_remove_link(&part_to_dev(ca->disk_sb.bdev->bd_part)->kobj,
-+				  "bcachefs");
-+
-+	if (ca->kobj.state_in_sysfs)
-+		kobject_del(&ca->kobj);
-+
-+	bch2_free_super(&ca->disk_sb);
-+	bch2_dev_journal_exit(ca);
-+
-+	free_percpu(ca->io_done);
-+	bioset_exit(&ca->replica_set);
-+	bch2_dev_buckets_free(ca);
-+	free_page((unsigned long) ca->sb_read_scratch);
-+
-+	bch2_time_stats_exit(&ca->io_latency[WRITE]);
-+	bch2_time_stats_exit(&ca->io_latency[READ]);
-+
-+	percpu_ref_exit(&ca->io_ref);
-+	percpu_ref_exit(&ca->ref);
-+	kobject_put(&ca->kobj);
-+}
-+
-+static void __bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca)
-+{
-+
-+	lockdep_assert_held(&c->state_lock);
-+
-+	if (percpu_ref_is_zero(&ca->io_ref))
-+		return;
-+
-+	__bch2_dev_read_only(c, ca);
-+
-+	reinit_completion(&ca->io_ref_completion);
-+	percpu_ref_kill(&ca->io_ref);
-+	wait_for_completion(&ca->io_ref_completion);
-+
-+	if (ca->kobj.state_in_sysfs) {
-+		struct kobject *block =
-+			&part_to_dev(ca->disk_sb.bdev->bd_part)->kobj;
-+
-+		sysfs_remove_link(block, "bcachefs");
-+		sysfs_remove_link(&ca->kobj, "block");
-+	}
-+
-+	bch2_free_super(&ca->disk_sb);
-+	bch2_dev_journal_exit(ca);
-+}
-+
-+static void bch2_dev_ref_complete(struct percpu_ref *ref)
-+{
-+	struct bch_dev *ca = container_of(ref, struct bch_dev, ref);
-+
-+	complete(&ca->ref_completion);
-+}
-+
-+static void bch2_dev_io_ref_complete(struct percpu_ref *ref)
-+{
-+	struct bch_dev *ca = container_of(ref, struct bch_dev, io_ref);
-+
-+	complete(&ca->io_ref_completion);
-+}
-+
-+static int bch2_dev_sysfs_online(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	int ret;
-+
-+	if (!c->kobj.state_in_sysfs)
-+		return 0;
-+
-+	if (!ca->kobj.state_in_sysfs) {
-+		ret = kobject_add(&ca->kobj, &c->kobj,
-+				  "dev-%u", ca->dev_idx);
-+		if (ret)
-+			return ret;
-+	}
-+
-+	if (ca->disk_sb.bdev) {
-+		struct kobject *block =
-+			&part_to_dev(ca->disk_sb.bdev->bd_part)->kobj;
-+
-+		ret = sysfs_create_link(block, &ca->kobj, "bcachefs");
-+		if (ret)
-+			return ret;
-+		ret = sysfs_create_link(&ca->kobj, block, "block");
-+		if (ret)
-+			return ret;
-+	}
-+
-+	return 0;
-+}
-+
-+static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c,
-+					struct bch_member *member)
-+{
-+	struct bch_dev *ca;
-+
-+	ca = kzalloc(sizeof(*ca), GFP_KERNEL);
-+	if (!ca)
-+		return NULL;
-+
-+	kobject_init(&ca->kobj, &bch2_dev_ktype);
-+	init_completion(&ca->ref_completion);
-+	init_completion(&ca->io_ref_completion);
-+
-+	init_rwsem(&ca->bucket_lock);
-+
-+	writepoint_init(&ca->copygc_write_point, BCH_DATA_USER);
-+
-+	bch2_dev_copygc_init(ca);
-+
-+	INIT_WORK(&ca->io_error_work, bch2_io_error_work);
-+
-+	bch2_time_stats_init(&ca->io_latency[READ]);
-+	bch2_time_stats_init(&ca->io_latency[WRITE]);
-+
-+	ca->mi = bch2_mi_to_cpu(member);
-+	ca->uuid = member->uuid;
-+
-+	if (opt_defined(c->opts, discard))
-+		ca->mi.discard = opt_get(c->opts, discard);
-+
-+	if (percpu_ref_init(&ca->ref, bch2_dev_ref_complete,
-+			    0, GFP_KERNEL) ||
-+	    percpu_ref_init(&ca->io_ref, bch2_dev_io_ref_complete,
-+			    PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
-+	    !(ca->sb_read_scratch = (void *) __get_free_page(GFP_KERNEL)) ||
-+	    bch2_dev_buckets_alloc(c, ca) ||
-+	    bioset_init(&ca->replica_set, 4,
-+			offsetof(struct bch_write_bio, bio), 0) ||
-+	    !(ca->io_done	= alloc_percpu(*ca->io_done)))
-+		goto err;
-+
-+	return ca;
-+err:
-+	bch2_dev_free(ca);
-+	return NULL;
-+}
-+
-+static void bch2_dev_attach(struct bch_fs *c, struct bch_dev *ca,
-+			    unsigned dev_idx)
-+{
-+	ca->dev_idx = dev_idx;
-+	__set_bit(ca->dev_idx, ca->self.d);
-+	scnprintf(ca->name, sizeof(ca->name), "dev-%u", dev_idx);
-+
-+	ca->fs = c;
-+	rcu_assign_pointer(c->devs[ca->dev_idx], ca);
-+
-+	if (bch2_dev_sysfs_online(c, ca))
-+		pr_warn("error creating sysfs objects");
-+}
-+
-+static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx)
-+{
-+	struct bch_member *member =
-+		bch2_sb_get_members(c->disk_sb.sb)->members + dev_idx;
-+	struct bch_dev *ca = NULL;
-+	int ret = 0;
-+
-+	pr_verbose_init(c->opts, "");
-+
-+	if (bch2_fs_init_fault("dev_alloc"))
-+		goto err;
-+
-+	ca = __bch2_dev_alloc(c, member);
-+	if (!ca)
-+		goto err;
-+
-+	bch2_dev_attach(c, ca, dev_idx);
-+out:
-+	pr_verbose_init(c->opts, "ret %i", ret);
-+	return ret;
-+err:
-+	if (ca)
-+		bch2_dev_free(ca);
-+	ret = -ENOMEM;
-+	goto out;
-+}
-+
-+static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb)
-+{
-+	unsigned ret;
-+
-+	if (bch2_dev_is_online(ca)) {
-+		bch_err(ca, "already have device online in slot %u",
-+			sb->sb->dev_idx);
-+		return -EINVAL;
-+	}
-+
-+	if (get_capacity(sb->bdev->bd_disk) <
-+	    ca->mi.bucket_size * ca->mi.nbuckets) {
-+		bch_err(ca, "cannot online: device too small");
-+		return -EINVAL;
-+	}
-+
-+	BUG_ON(!percpu_ref_is_zero(&ca->io_ref));
-+
-+	if (get_capacity(sb->bdev->bd_disk) <
-+	    ca->mi.bucket_size * ca->mi.nbuckets) {
-+		bch_err(ca, "device too small");
-+		return -EINVAL;
-+	}
-+
-+	ret = bch2_dev_journal_init(ca, sb->sb);
-+	if (ret)
-+		return ret;
-+
-+	/* Commit: */
-+	ca->disk_sb = *sb;
-+	if (sb->mode & FMODE_EXCL)
-+		ca->disk_sb.bdev->bd_holder = ca;
-+	memset(sb, 0, sizeof(*sb));
-+
-+	percpu_ref_reinit(&ca->io_ref);
-+
-+	return 0;
-+}
-+
-+static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb)
-+{
-+	struct bch_dev *ca;
-+	int ret;
-+
-+	lockdep_assert_held(&c->state_lock);
-+
-+	if (le64_to_cpu(sb->sb->seq) >
-+	    le64_to_cpu(c->disk_sb.sb->seq))
-+		bch2_sb_to_fs(c, sb->sb);
-+
-+	BUG_ON(sb->sb->dev_idx >= c->sb.nr_devices ||
-+	       !c->devs[sb->sb->dev_idx]);
-+
-+	ca = bch_dev_locked(c, sb->sb->dev_idx);
-+
-+	ret = __bch2_dev_attach_bdev(ca, sb);
-+	if (ret)
-+		return ret;
-+
-+	if (test_bit(BCH_FS_ALLOC_READ_DONE, &c->flags) &&
-+	    !percpu_u64_get(&ca->usage[0]->buckets[BCH_DATA_SB])) {
-+		mutex_lock(&c->sb_lock);
-+		bch2_mark_dev_superblock(ca->fs, ca, 0);
-+		mutex_unlock(&c->sb_lock);
-+	}
-+
-+	bch2_dev_sysfs_online(c, ca);
-+
-+	if (c->sb.nr_devices == 1)
-+		bdevname(ca->disk_sb.bdev, c->name);
-+	bdevname(ca->disk_sb.bdev, ca->name);
-+
-+	rebalance_wakeup(c);
-+	return 0;
-+}
-+
-+/* Device management: */
-+
-+/*
-+ * Note: this function is also used by the error paths - when a particular
-+ * device sees an error, we call it to determine whether we can just set the
-+ * device RO, or - if this function returns false - we'll set the whole
-+ * filesystem RO:
-+ *
-+ * XXX: maybe we should be more explicit about whether we're changing state
-+ * because we got an error or what have you?
-+ */
-+bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
-+			    enum bch_member_state new_state, int flags)
-+{
-+	struct bch_devs_mask new_online_devs;
-+	struct replicas_status s;
-+	struct bch_dev *ca2;
-+	int i, nr_rw = 0, required;
-+
-+	lockdep_assert_held(&c->state_lock);
-+
-+	switch (new_state) {
-+	case BCH_MEMBER_STATE_RW:
-+		return true;
-+	case BCH_MEMBER_STATE_RO:
-+		if (ca->mi.state != BCH_MEMBER_STATE_RW)
-+			return true;
-+
-+		/* do we have enough devices to write to?  */
-+		for_each_member_device(ca2, c, i)
-+			if (ca2 != ca)
-+				nr_rw += ca2->mi.state == BCH_MEMBER_STATE_RW;
-+
-+		required = max(!(flags & BCH_FORCE_IF_METADATA_DEGRADED)
-+			       ? c->opts.metadata_replicas
-+			       : c->opts.metadata_replicas_required,
-+			       !(flags & BCH_FORCE_IF_DATA_DEGRADED)
-+			       ? c->opts.data_replicas
-+			       : c->opts.data_replicas_required);
-+
-+		return nr_rw >= required;
-+	case BCH_MEMBER_STATE_FAILED:
-+	case BCH_MEMBER_STATE_SPARE:
-+		if (ca->mi.state != BCH_MEMBER_STATE_RW &&
-+		    ca->mi.state != BCH_MEMBER_STATE_RO)
-+			return true;
-+
-+		/* do we have enough devices to read from?  */
-+		new_online_devs = bch2_online_devs(c);
-+		__clear_bit(ca->dev_idx, new_online_devs.d);
-+
-+		s = __bch2_replicas_status(c, new_online_devs);
-+
-+		return bch2_have_enough_devs(s, flags);
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static bool bch2_fs_may_start(struct bch_fs *c)
-+{
-+	struct replicas_status s;
-+	struct bch_sb_field_members *mi;
-+	struct bch_dev *ca;
-+	unsigned i, flags = c->opts.degraded
-+		? BCH_FORCE_IF_DEGRADED
-+		: 0;
-+
-+	if (!c->opts.degraded) {
-+		mutex_lock(&c->sb_lock);
-+		mi = bch2_sb_get_members(c->disk_sb.sb);
-+
-+		for (i = 0; i < c->disk_sb.sb->nr_devices; i++) {
-+			if (!bch2_dev_exists(c->disk_sb.sb, mi, i))
-+				continue;
-+
-+			ca = bch_dev_locked(c, i);
-+
-+			if (!bch2_dev_is_online(ca) &&
-+			    (ca->mi.state == BCH_MEMBER_STATE_RW ||
-+			     ca->mi.state == BCH_MEMBER_STATE_RO)) {
-+				mutex_unlock(&c->sb_lock);
-+				return false;
-+			}
-+		}
-+		mutex_unlock(&c->sb_lock);
-+	}
-+
-+	s = bch2_replicas_status(c);
-+
-+	return bch2_have_enough_devs(s, flags);
-+}
-+
-+static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	bch2_copygc_stop(ca);
-+
-+	/*
-+	 * The allocator thread itself allocates btree nodes, so stop it first:
-+	 */
-+	bch2_dev_allocator_stop(ca);
-+	bch2_dev_allocator_remove(c, ca);
-+	bch2_dev_journal_stop(&c->journal, ca);
-+}
-+
-+static const char *__bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	lockdep_assert_held(&c->state_lock);
-+
-+	BUG_ON(ca->mi.state != BCH_MEMBER_STATE_RW);
-+
-+	bch2_dev_allocator_add(c, ca);
-+	bch2_recalc_capacity(c);
-+
-+	if (bch2_dev_allocator_start(ca))
-+		return "error starting allocator thread";
-+
-+	if (bch2_copygc_start(c, ca))
-+		return "error starting copygc thread";
-+
-+	return NULL;
-+}
-+
-+int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
-+			 enum bch_member_state new_state, int flags)
-+{
-+	struct bch_sb_field_members *mi;
-+	int ret = 0;
-+
-+	if (ca->mi.state == new_state)
-+		return 0;
-+
-+	if (!bch2_dev_state_allowed(c, ca, new_state, flags))
-+		return -EINVAL;
-+
-+	if (new_state != BCH_MEMBER_STATE_RW)
-+		__bch2_dev_read_only(c, ca);
-+
-+	bch_notice(ca, "%s", bch2_dev_state[new_state]);
-+
-+	mutex_lock(&c->sb_lock);
-+	mi = bch2_sb_get_members(c->disk_sb.sb);
-+	SET_BCH_MEMBER_STATE(&mi->members[ca->dev_idx], new_state);
-+	bch2_write_super(c);
-+	mutex_unlock(&c->sb_lock);
-+
-+	if (new_state == BCH_MEMBER_STATE_RW &&
-+	    __bch2_dev_read_write(c, ca))
-+		ret = -ENOMEM;
-+
-+	rebalance_wakeup(c);
-+
-+	return ret;
-+}
-+
-+int bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
-+		       enum bch_member_state new_state, int flags)
-+{
-+	int ret;
-+
-+	down_write(&c->state_lock);
-+	ret = __bch2_dev_set_state(c, ca, new_state, flags);
-+	up_write(&c->state_lock);
-+
-+	return ret;
-+}
-+
-+/* Device add/removal: */
-+
-+int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	struct btree_trans trans;
-+	size_t i;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for (i = 0; i < ca->mi.nbuckets; i++) {
-+		ret = bch2_btree_key_cache_flush(&trans,
-+				BTREE_ID_ALLOC, POS(ca->dev_idx, i));
-+		if (ret)
-+			break;
-+	}
-+	bch2_trans_exit(&trans);
-+
-+	if (ret)
-+		return ret;
-+
-+	return bch2_btree_delete_range(c, BTREE_ID_ALLOC,
-+				       POS(ca->dev_idx, 0),
-+				       POS(ca->dev_idx + 1, 0),
-+				       NULL);
-+}
-+
-+int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
-+{
-+	struct bch_sb_field_members *mi;
-+	unsigned dev_idx = ca->dev_idx, data;
-+	int ret = -EINVAL;
-+
-+	down_write(&c->state_lock);
-+
-+	/*
-+	 * We consume a reference to ca->ref, regardless of whether we succeed
-+	 * or fail:
-+	 */
-+	percpu_ref_put(&ca->ref);
-+
-+	if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_FAILED, flags)) {
-+		bch_err(ca, "Cannot remove without losing data");
-+		goto err;
-+	}
-+
-+	__bch2_dev_read_only(c, ca);
-+
-+	ret = bch2_dev_data_drop(c, ca->dev_idx, flags);
-+	if (ret) {
-+		bch_err(ca, "Remove failed: error %i dropping data", ret);
-+		goto err;
-+	}
-+
-+	ret = bch2_journal_flush_device_pins(&c->journal, ca->dev_idx);
-+	if (ret) {
-+		bch_err(ca, "Remove failed: error %i flushing journal", ret);
-+		goto err;
-+	}
-+
-+	ret = bch2_dev_remove_alloc(c, ca);
-+	if (ret) {
-+		bch_err(ca, "Remove failed, error deleting alloc info");
-+		goto err;
-+	}
-+
-+	/*
-+	 * must flush all existing journal entries, they might have
-+	 * (overwritten) keys that point to the device we're removing:
-+	 */
-+	bch2_journal_flush_all_pins(&c->journal);
-+	/*
-+	 * hack to ensure bch2_replicas_gc2() clears out entries to this device
-+	 */
-+	bch2_journal_meta(&c->journal);
-+	ret = bch2_journal_error(&c->journal);
-+	if (ret) {
-+		bch_err(ca, "Remove failed, journal error");
-+		goto err;
-+	}
-+
-+	ret = bch2_replicas_gc2(c);
-+	if (ret) {
-+		bch_err(ca, "Remove failed: error %i from replicas gc", ret);
-+		goto err;
-+	}
-+
-+	data = bch2_dev_has_data(c, ca);
-+	if (data) {
-+		char data_has_str[100];
-+
-+		bch2_flags_to_text(&PBUF(data_has_str),
-+				   bch2_data_types, data);
-+		bch_err(ca, "Remove failed, still has data (%s)", data_has_str);
-+		ret = -EBUSY;
-+		goto err;
-+	}
-+
-+	__bch2_dev_offline(c, ca);
-+
-+	mutex_lock(&c->sb_lock);
-+	rcu_assign_pointer(c->devs[ca->dev_idx], NULL);
-+	mutex_unlock(&c->sb_lock);
-+
-+	percpu_ref_kill(&ca->ref);
-+	wait_for_completion(&ca->ref_completion);
-+
-+	bch2_dev_free(ca);
-+
-+	/*
-+	 * Free this device's slot in the bch_member array - all pointers to
-+	 * this device must be gone:
-+	 */
-+	mutex_lock(&c->sb_lock);
-+	mi = bch2_sb_get_members(c->disk_sb.sb);
-+	memset(&mi->members[dev_idx].uuid, 0, sizeof(mi->members[dev_idx].uuid));
-+
-+	bch2_write_super(c);
-+
-+	mutex_unlock(&c->sb_lock);
-+	up_write(&c->state_lock);
-+	return 0;
-+err:
-+	if (ca->mi.state == BCH_MEMBER_STATE_RW &&
-+	    !percpu_ref_is_zero(&ca->io_ref))
-+		__bch2_dev_read_write(c, ca);
-+	up_write(&c->state_lock);
-+	return ret;
-+}
-+
-+static void dev_usage_clear(struct bch_dev *ca)
-+{
-+	struct bucket_array *buckets;
-+
-+	percpu_memset(ca->usage[0], 0, sizeof(*ca->usage[0]));
-+
-+	down_read(&ca->bucket_lock);
-+	buckets = bucket_array(ca);
-+
-+	memset(buckets->b, 0, sizeof(buckets->b[0]) * buckets->nbuckets);
-+	up_read(&ca->bucket_lock);
-+}
-+
-+/* Add new device to running filesystem: */
-+int bch2_dev_add(struct bch_fs *c, const char *path)
-+{
-+	struct bch_opts opts = bch2_opts_empty();
-+	struct bch_sb_handle sb;
-+	const char *err;
-+	struct bch_dev *ca = NULL;
-+	struct bch_sb_field_members *mi;
-+	struct bch_member dev_mi;
-+	unsigned dev_idx, nr_devices, u64s;
-+	int ret;
-+
-+	ret = bch2_read_super(path, &opts, &sb);
-+	if (ret)
-+		return ret;
-+
-+	err = bch2_sb_validate(&sb);
-+	if (err)
-+		return -EINVAL;
-+
-+	dev_mi = bch2_sb_get_members(sb.sb)->members[sb.sb->dev_idx];
-+
-+	err = bch2_dev_may_add(sb.sb, c);
-+	if (err)
-+		return -EINVAL;
-+
-+	ca = __bch2_dev_alloc(c, &dev_mi);
-+	if (!ca) {
-+		bch2_free_super(&sb);
-+		return -ENOMEM;
-+	}
-+
-+	ret = __bch2_dev_attach_bdev(ca, &sb);
-+	if (ret) {
-+		bch2_dev_free(ca);
-+		return ret;
-+	}
-+
-+	/*
-+	 * We want to allocate journal on the new device before adding the new
-+	 * device to the filesystem because allocating after we attach requires
-+	 * spinning up the allocator thread, and the allocator thread requires
-+	 * doing btree writes, which if the existing devices are RO isn't going
-+	 * to work
-+	 *
-+	 * So we have to mark where the superblocks are, but marking allocated
-+	 * data normally updates the filesystem usage too, so we have to mark,
-+	 * allocate the journal, reset all the marks, then remark after we
-+	 * attach...
-+	 */
-+	bch2_mark_dev_superblock(ca->fs, ca, 0);
-+
-+	err = "journal alloc failed";
-+	ret = bch2_dev_journal_alloc(ca);
-+	if (ret)
-+		goto err;
-+
-+	dev_usage_clear(ca);
-+
-+	down_write(&c->state_lock);
-+	mutex_lock(&c->sb_lock);
-+
-+	err = "insufficient space in new superblock";
-+	ret = bch2_sb_from_fs(c, ca);
-+	if (ret)
-+		goto err_unlock;
-+
-+	mi = bch2_sb_get_members(ca->disk_sb.sb);
-+
-+	if (!bch2_sb_resize_members(&ca->disk_sb,
-+				le32_to_cpu(mi->field.u64s) +
-+				sizeof(dev_mi) / sizeof(u64))) {
-+		ret = -ENOSPC;
-+		goto err_unlock;
-+	}
-+
-+	if (dynamic_fault("bcachefs:add:no_slot"))
-+		goto no_slot;
-+
-+	mi = bch2_sb_get_members(c->disk_sb.sb);
-+	for (dev_idx = 0; dev_idx < BCH_SB_MEMBERS_MAX; dev_idx++)
-+		if (!bch2_dev_exists(c->disk_sb.sb, mi, dev_idx))
-+			goto have_slot;
-+no_slot:
-+	err = "no slots available in superblock";
-+	ret = -ENOSPC;
-+	goto err_unlock;
-+
-+have_slot:
-+	nr_devices = max_t(unsigned, dev_idx + 1, c->sb.nr_devices);
-+	u64s = (sizeof(struct bch_sb_field_members) +
-+		sizeof(struct bch_member) * nr_devices) / sizeof(u64);
-+
-+	err = "no space in superblock for member info";
-+	ret = -ENOSPC;
-+
-+	mi = bch2_sb_resize_members(&c->disk_sb, u64s);
-+	if (!mi)
-+		goto err_unlock;
-+
-+	/* success: */
-+
-+	mi->members[dev_idx] = dev_mi;
-+	mi->members[dev_idx].last_mount = cpu_to_le64(ktime_get_real_seconds());
-+	c->disk_sb.sb->nr_devices	= nr_devices;
-+
-+	ca->disk_sb.sb->dev_idx	= dev_idx;
-+	bch2_dev_attach(c, ca, dev_idx);
-+
-+	bch2_mark_dev_superblock(c, ca, 0);
-+
-+	bch2_write_super(c);
-+	mutex_unlock(&c->sb_lock);
-+
-+	if (ca->mi.state == BCH_MEMBER_STATE_RW) {
-+		err = __bch2_dev_read_write(c, ca);
-+		if (err)
-+			goto err_late;
-+	}
-+
-+	up_write(&c->state_lock);
-+	return 0;
-+
-+err_unlock:
-+	mutex_unlock(&c->sb_lock);
-+	up_write(&c->state_lock);
-+err:
-+	if (ca)
-+		bch2_dev_free(ca);
-+	bch2_free_super(&sb);
-+	bch_err(c, "Unable to add device: %s", err);
-+	return ret;
-+err_late:
-+	bch_err(c, "Error going rw after adding device: %s", err);
-+	return -EINVAL;
-+}
-+
-+/* Hot add existing device to running filesystem: */
-+int bch2_dev_online(struct bch_fs *c, const char *path)
-+{
-+	struct bch_opts opts = bch2_opts_empty();
-+	struct bch_sb_handle sb = { NULL };
-+	struct bch_sb_field_members *mi;
-+	struct bch_dev *ca;
-+	unsigned dev_idx;
-+	const char *err;
-+	int ret;
-+
-+	down_write(&c->state_lock);
-+
-+	ret = bch2_read_super(path, &opts, &sb);
-+	if (ret) {
-+		up_write(&c->state_lock);
-+		return ret;
-+	}
-+
-+	dev_idx = sb.sb->dev_idx;
-+
-+	err = bch2_dev_in_fs(c->disk_sb.sb, sb.sb);
-+	if (err)
-+		goto err;
-+
-+	if (bch2_dev_attach_bdev(c, &sb)) {
-+		err = "bch2_dev_attach_bdev() error";
-+		goto err;
-+	}
-+
-+	ca = bch_dev_locked(c, dev_idx);
-+	if (ca->mi.state == BCH_MEMBER_STATE_RW) {
-+		err = __bch2_dev_read_write(c, ca);
-+		if (err)
-+			goto err;
-+	}
-+
-+	mutex_lock(&c->sb_lock);
-+	mi = bch2_sb_get_members(c->disk_sb.sb);
-+
-+	mi->members[ca->dev_idx].last_mount =
-+		cpu_to_le64(ktime_get_real_seconds());
-+
-+	bch2_write_super(c);
-+	mutex_unlock(&c->sb_lock);
-+
-+	up_write(&c->state_lock);
-+	return 0;
-+err:
-+	up_write(&c->state_lock);
-+	bch2_free_super(&sb);
-+	bch_err(c, "error bringing %s online: %s", path, err);
-+	return -EINVAL;
-+}
-+
-+int bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca, int flags)
-+{
-+	down_write(&c->state_lock);
-+
-+	if (!bch2_dev_is_online(ca)) {
-+		bch_err(ca, "Already offline");
-+		up_write(&c->state_lock);
-+		return 0;
-+	}
-+
-+	if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_FAILED, flags)) {
-+		bch_err(ca, "Cannot offline required disk");
-+		up_write(&c->state_lock);
-+		return -EINVAL;
-+	}
-+
-+	__bch2_dev_offline(c, ca);
-+
-+	up_write(&c->state_lock);
-+	return 0;
-+}
-+
-+int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
-+{
-+	struct bch_member *mi;
-+	int ret = 0;
-+
-+	down_write(&c->state_lock);
-+
-+	if (nbuckets < ca->mi.nbuckets) {
-+		bch_err(ca, "Cannot shrink yet");
-+		ret = -EINVAL;
-+		goto err;
-+	}
-+
-+	if (bch2_dev_is_online(ca) &&
-+	    get_capacity(ca->disk_sb.bdev->bd_disk) <
-+	    ca->mi.bucket_size * nbuckets) {
-+		bch_err(ca, "New size larger than device");
-+		ret = -EINVAL;
-+		goto err;
-+	}
-+
-+	ret = bch2_dev_buckets_resize(c, ca, nbuckets);
-+	if (ret) {
-+		bch_err(ca, "Resize error: %i", ret);
-+		goto err;
-+	}
-+
-+	mutex_lock(&c->sb_lock);
-+	mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
-+	mi->nbuckets = cpu_to_le64(nbuckets);
-+
-+	bch2_write_super(c);
-+	mutex_unlock(&c->sb_lock);
-+
-+	bch2_recalc_capacity(c);
-+err:
-+	up_write(&c->state_lock);
-+	return ret;
-+}
-+
-+/* return with ref on ca->ref: */
-+struct bch_dev *bch2_dev_lookup(struct bch_fs *c, const char *path)
-+{
-+
-+	struct block_device *bdev = lookup_bdev(path);
-+	struct bch_dev *ca;
-+	unsigned i;
-+
-+	if (IS_ERR(bdev))
-+		return ERR_CAST(bdev);
-+
-+	for_each_member_device(ca, c, i)
-+		if (ca->disk_sb.bdev == bdev)
-+			goto found;
-+
-+	ca = ERR_PTR(-ENOENT);
-+found:
-+	bdput(bdev);
-+	return ca;
-+}
-+
-+/* Filesystem open: */
-+
-+struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
-+			    struct bch_opts opts)
-+{
-+	struct bch_sb_handle *sb = NULL;
-+	struct bch_fs *c = NULL;
-+	unsigned i, best_sb = 0;
-+	const char *err;
-+	int ret = -ENOMEM;
-+
-+	pr_verbose_init(opts, "");
-+
-+	if (!nr_devices) {
-+		c = ERR_PTR(-EINVAL);
-+		goto out2;
-+	}
-+
-+	if (!try_module_get(THIS_MODULE)) {
-+		c = ERR_PTR(-ENODEV);
-+		goto out2;
-+	}
-+
-+	sb = kcalloc(nr_devices, sizeof(*sb), GFP_KERNEL);
-+	if (!sb)
-+		goto err;
-+
-+	for (i = 0; i < nr_devices; i++) {
-+		ret = bch2_read_super(devices[i], &opts, &sb[i]);
-+		if (ret)
-+			goto err;
-+
-+		err = bch2_sb_validate(&sb[i]);
-+		if (err)
-+			goto err_print;
-+	}
-+
-+	for (i = 1; i < nr_devices; i++)
-+		if (le64_to_cpu(sb[i].sb->seq) >
-+		    le64_to_cpu(sb[best_sb].sb->seq))
-+			best_sb = i;
-+
-+	for (i = 0; i < nr_devices; i++) {
-+		err = bch2_dev_in_fs(sb[best_sb].sb, sb[i].sb);
-+		if (err)
-+			goto err_print;
-+	}
-+
-+	ret = -ENOMEM;
-+	c = bch2_fs_alloc(sb[best_sb].sb, opts);
-+	if (!c)
-+		goto err;
-+
-+	err = "bch2_dev_online() error";
-+	down_write(&c->state_lock);
-+	for (i = 0; i < nr_devices; i++)
-+		if (bch2_dev_attach_bdev(c, &sb[i])) {
-+			up_write(&c->state_lock);
-+			goto err_print;
-+		}
-+	up_write(&c->state_lock);
-+
-+	err = "insufficient devices";
-+	if (!bch2_fs_may_start(c))
-+		goto err_print;
-+
-+	if (!c->opts.nostart) {
-+		ret = bch2_fs_start(c);
-+		if (ret)
-+			goto err;
-+	}
-+out:
-+	kfree(sb);
-+	module_put(THIS_MODULE);
-+out2:
-+	pr_verbose_init(opts, "ret %i", PTR_ERR_OR_ZERO(c));
-+	return c;
-+err_print:
-+	pr_err("bch_fs_open err opening %s: %s",
-+	       devices[0], err);
-+	ret = -EINVAL;
-+err:
-+	if (c)
-+		bch2_fs_stop(c);
-+	for (i = 0; i < nr_devices; i++)
-+		bch2_free_super(&sb[i]);
-+	c = ERR_PTR(ret);
-+	goto out;
-+}
-+
-+static const char *__bch2_fs_open_incremental(struct bch_sb_handle *sb,
-+					      struct bch_opts opts)
-+{
-+	const char *err;
-+	struct bch_fs *c;
-+	bool allocated_fs = false;
-+	int ret;
-+
-+	err = bch2_sb_validate(sb);
-+	if (err)
-+		return err;
-+
-+	mutex_lock(&bch_fs_list_lock);
-+	c = __bch2_uuid_to_fs(sb->sb->uuid);
-+	if (c) {
-+		closure_get(&c->cl);
-+
-+		err = bch2_dev_in_fs(c->disk_sb.sb, sb->sb);
-+		if (err)
-+			goto err;
-+	} else {
-+		c = bch2_fs_alloc(sb->sb, opts);
-+		err = "cannot allocate memory";
-+		if (!c)
-+			goto err;
-+
-+		allocated_fs = true;
-+	}
-+
-+	err = "bch2_dev_online() error";
-+
-+	mutex_lock(&c->sb_lock);
-+	if (bch2_dev_attach_bdev(c, sb)) {
-+		mutex_unlock(&c->sb_lock);
-+		goto err;
-+	}
-+	mutex_unlock(&c->sb_lock);
-+
-+	if (!c->opts.nostart && bch2_fs_may_start(c)) {
-+		err = "error starting filesystem";
-+		ret = bch2_fs_start(c);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	closure_put(&c->cl);
-+	mutex_unlock(&bch_fs_list_lock);
-+
-+	return NULL;
-+err:
-+	mutex_unlock(&bch_fs_list_lock);
-+
-+	if (allocated_fs)
-+		bch2_fs_stop(c);
-+	else if (c)
-+		closure_put(&c->cl);
-+
-+	return err;
-+}
-+
-+const char *bch2_fs_open_incremental(const char *path)
-+{
-+	struct bch_sb_handle sb;
-+	struct bch_opts opts = bch2_opts_empty();
-+	const char *err;
-+
-+	if (bch2_read_super(path, &opts, &sb))
-+		return "error reading superblock";
-+
-+	err = __bch2_fs_open_incremental(&sb, opts);
-+	bch2_free_super(&sb);
-+
-+	return err;
-+}
-+
-+/* Global interfaces/init */
-+
-+static void bcachefs_exit(void)
-+{
-+	bch2_debug_exit();
-+	bch2_vfs_exit();
-+	bch2_chardev_exit();
-+	if (bcachefs_kset)
-+		kset_unregister(bcachefs_kset);
-+}
-+
-+static int __init bcachefs_init(void)
-+{
-+	bch2_bkey_pack_test();
-+	bch2_inode_pack_test();
-+
-+	if (!(bcachefs_kset = kset_create_and_add("bcachefs", NULL, fs_kobj)) ||
-+	    bch2_chardev_init() ||
-+	    bch2_vfs_init() ||
-+	    bch2_debug_init())
-+		goto err;
-+
-+	return 0;
-+err:
-+	bcachefs_exit();
-+	return -ENOMEM;
-+}
-+
-+#define BCH_DEBUG_PARAM(name, description)			\
-+	bool bch2_##name;					\
-+	module_param_named(name, bch2_##name, bool, 0644);	\
-+	MODULE_PARM_DESC(name, description);
-+BCH_DEBUG_PARAMS()
-+#undef BCH_DEBUG_PARAM
-+
-+module_exit(bcachefs_exit);
-+module_init(bcachefs_init);
-diff --git a/fs/bcachefs/super.h b/fs/bcachefs/super.h
-new file mode 100644
-index 000000000000..4aa5dd7917cf
---- /dev/null
-+++ b/fs/bcachefs/super.h
-@@ -0,0 +1,231 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_SUPER_H
-+#define _BCACHEFS_SUPER_H
-+
-+#include "extents.h"
-+
-+#include "bcachefs_ioctl.h"
-+
-+#include <linux/math64.h>
-+
-+static inline size_t sector_to_bucket(const struct bch_dev *ca, sector_t s)
-+{
-+	return div_u64(s, ca->mi.bucket_size);
-+}
-+
-+static inline sector_t bucket_to_sector(const struct bch_dev *ca, size_t b)
-+{
-+	return ((sector_t) b) * ca->mi.bucket_size;
-+}
-+
-+static inline sector_t bucket_remainder(const struct bch_dev *ca, sector_t s)
-+{
-+	u32 remainder;
-+
-+	div_u64_rem(s, ca->mi.bucket_size, &remainder);
-+	return remainder;
-+}
-+
-+static inline bool bch2_dev_is_online(struct bch_dev *ca)
-+{
-+	return !percpu_ref_is_zero(&ca->io_ref);
-+}
-+
-+static inline bool bch2_dev_is_readable(struct bch_dev *ca)
-+{
-+	return bch2_dev_is_online(ca) &&
-+		ca->mi.state != BCH_MEMBER_STATE_FAILED;
-+}
-+
-+static inline bool bch2_dev_get_ioref(struct bch_dev *ca, int rw)
-+{
-+	if (!percpu_ref_tryget(&ca->io_ref))
-+		return false;
-+
-+	if (ca->mi.state == BCH_MEMBER_STATE_RW ||
-+	    (ca->mi.state == BCH_MEMBER_STATE_RO && rw == READ))
-+		return true;
-+
-+	percpu_ref_put(&ca->io_ref);
-+	return false;
-+}
-+
-+static inline unsigned dev_mask_nr(const struct bch_devs_mask *devs)
-+{
-+	return bitmap_weight(devs->d, BCH_SB_MEMBERS_MAX);
-+}
-+
-+static inline bool bch2_dev_list_has_dev(struct bch_devs_list devs,
-+					 unsigned dev)
-+{
-+	unsigned i;
-+
-+	for (i = 0; i < devs.nr; i++)
-+		if (devs.devs[i] == dev)
-+			return true;
-+
-+	return false;
-+}
-+
-+static inline void bch2_dev_list_drop_dev(struct bch_devs_list *devs,
-+					  unsigned dev)
-+{
-+	unsigned i;
-+
-+	for (i = 0; i < devs->nr; i++)
-+		if (devs->devs[i] == dev) {
-+			array_remove_item(devs->devs, devs->nr, i);
-+			return;
-+		}
-+}
-+
-+static inline void bch2_dev_list_add_dev(struct bch_devs_list *devs,
-+					 unsigned dev)
-+{
-+	BUG_ON(bch2_dev_list_has_dev(*devs, dev));
-+	BUG_ON(devs->nr >= BCH_REPLICAS_MAX);
-+	devs->devs[devs->nr++] = dev;
-+}
-+
-+static inline struct bch_devs_list bch2_dev_list_single(unsigned dev)
-+{
-+	return (struct bch_devs_list) { .nr = 1, .devs[0] = dev };
-+}
-+
-+static inline struct bch_dev *__bch2_next_dev(struct bch_fs *c, unsigned *iter,
-+					      const struct bch_devs_mask *mask)
-+{
-+	struct bch_dev *ca = NULL;
-+
-+	while ((*iter = mask
-+		? find_next_bit(mask->d, c->sb.nr_devices, *iter)
-+		: *iter) < c->sb.nr_devices &&
-+	       !(ca = rcu_dereference_check(c->devs[*iter],
-+					    lockdep_is_held(&c->state_lock))))
-+		(*iter)++;
-+
-+	return ca;
-+}
-+
-+#define __for_each_member_device(ca, c, iter, mask)			\
-+	for ((iter) = 0; ((ca) = __bch2_next_dev((c), &(iter), mask)); (iter)++)
-+
-+#define for_each_member_device_rcu(ca, c, iter, mask)			\
-+	__for_each_member_device(ca, c, iter, mask)
-+
-+static inline struct bch_dev *bch2_get_next_dev(struct bch_fs *c, unsigned *iter)
-+{
-+	struct bch_dev *ca;
-+
-+	rcu_read_lock();
-+	if ((ca = __bch2_next_dev(c, iter, NULL)))
-+		percpu_ref_get(&ca->ref);
-+	rcu_read_unlock();
-+
-+	return ca;
-+}
-+
-+/*
-+ * If you break early, you must drop your ref on the current device
-+ */
-+#define for_each_member_device(ca, c, iter)				\
-+	for ((iter) = 0;						\
-+	     (ca = bch2_get_next_dev(c, &(iter)));			\
-+	     percpu_ref_put(&ca->ref), (iter)++)
-+
-+static inline struct bch_dev *bch2_get_next_online_dev(struct bch_fs *c,
-+						      unsigned *iter,
-+						      int state_mask)
-+{
-+	struct bch_dev *ca;
-+
-+	rcu_read_lock();
-+	while ((ca = __bch2_next_dev(c, iter, NULL)) &&
-+	       (!((1 << ca->mi.state) & state_mask) ||
-+		!percpu_ref_tryget(&ca->io_ref)))
-+		(*iter)++;
-+	rcu_read_unlock();
-+
-+	return ca;
-+}
-+
-+#define __for_each_online_member(ca, c, iter, state_mask)		\
-+	for ((iter) = 0;						\
-+	     (ca = bch2_get_next_online_dev(c, &(iter), state_mask));	\
-+	     percpu_ref_put(&ca->io_ref), (iter)++)
-+
-+#define for_each_online_member(ca, c, iter)				\
-+	__for_each_online_member(ca, c, iter, ~0)
-+
-+#define for_each_rw_member(ca, c, iter)					\
-+	__for_each_online_member(ca, c, iter, 1 << BCH_MEMBER_STATE_RW)
-+
-+#define for_each_readable_member(ca, c, iter)				\
-+	__for_each_online_member(ca, c, iter,				\
-+		(1 << BCH_MEMBER_STATE_RW)|(1 << BCH_MEMBER_STATE_RO))
-+
-+/*
-+ * If a key exists that references a device, the device won't be going away and
-+ * we can omit rcu_read_lock():
-+ */
-+static inline struct bch_dev *bch_dev_bkey_exists(const struct bch_fs *c, unsigned idx)
-+{
-+	EBUG_ON(idx >= c->sb.nr_devices || !c->devs[idx]);
-+
-+	return rcu_dereference_check(c->devs[idx], 1);
-+}
-+
-+static inline struct bch_dev *bch_dev_locked(struct bch_fs *c, unsigned idx)
-+{
-+	EBUG_ON(idx >= c->sb.nr_devices || !c->devs[idx]);
-+
-+	return rcu_dereference_protected(c->devs[idx],
-+					 lockdep_is_held(&c->sb_lock) ||
-+					 lockdep_is_held(&c->state_lock));
-+}
-+
-+/* XXX kill, move to struct bch_fs */
-+static inline struct bch_devs_mask bch2_online_devs(struct bch_fs *c)
-+{
-+	struct bch_devs_mask devs;
-+	struct bch_dev *ca;
-+	unsigned i;
-+
-+	memset(&devs, 0, sizeof(devs));
-+	for_each_online_member(ca, c, i)
-+		__set_bit(ca->dev_idx, devs.d);
-+	return devs;
-+}
-+
-+struct bch_fs *bch2_bdev_to_fs(struct block_device *);
-+struct bch_fs *bch2_uuid_to_fs(uuid_le);
-+int bch2_congested(void *, int);
-+
-+bool bch2_dev_state_allowed(struct bch_fs *, struct bch_dev *,
-+			   enum bch_member_state, int);
-+int __bch2_dev_set_state(struct bch_fs *, struct bch_dev *,
-+			enum bch_member_state, int);
-+int bch2_dev_set_state(struct bch_fs *, struct bch_dev *,
-+		      enum bch_member_state, int);
-+
-+int bch2_dev_fail(struct bch_dev *, int);
-+int bch2_dev_remove(struct bch_fs *, struct bch_dev *, int);
-+int bch2_dev_add(struct bch_fs *, const char *);
-+int bch2_dev_online(struct bch_fs *, const char *);
-+int bch2_dev_offline(struct bch_fs *, struct bch_dev *, int);
-+int bch2_dev_resize(struct bch_fs *, struct bch_dev *, u64);
-+struct bch_dev *bch2_dev_lookup(struct bch_fs *, const char *);
-+
-+bool bch2_fs_emergency_read_only(struct bch_fs *);
-+void bch2_fs_read_only(struct bch_fs *);
-+
-+int bch2_fs_read_write(struct bch_fs *);
-+int bch2_fs_read_write_early(struct bch_fs *);
-+
-+void bch2_fs_stop(struct bch_fs *);
-+
-+int bch2_fs_start(struct bch_fs *);
-+struct bch_fs *bch2_fs_open(char * const *, unsigned, struct bch_opts);
-+const char *bch2_fs_open_incremental(const char *path);
-+
-+#endif /* _BCACHEFS_SUPER_H */
-diff --git a/fs/bcachefs/super_types.h b/fs/bcachefs/super_types.h
-new file mode 100644
-index 000000000000..20406ebd6f5b
---- /dev/null
-+++ b/fs/bcachefs/super_types.h
-@@ -0,0 +1,51 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_SUPER_TYPES_H
-+#define _BCACHEFS_SUPER_TYPES_H
-+
-+struct bch_sb_handle {
-+	struct bch_sb		*sb;
-+	struct block_device	*bdev;
-+	struct bio		*bio;
-+	unsigned		page_order;
-+	fmode_t			mode;
-+	unsigned		have_layout:1;
-+	unsigned		have_bio:1;
-+	unsigned		fs_sb:1;
-+	u64			seq;
-+};
-+
-+struct bch_devs_mask {
-+	unsigned long d[BITS_TO_LONGS(BCH_SB_MEMBERS_MAX)];
-+};
-+
-+struct bch_devs_list {
-+	u8			nr;
-+	u8			devs[BCH_REPLICAS_MAX + 1];
-+};
-+
-+struct bch_member_cpu {
-+	u64			nbuckets;	/* device size */
-+	u16			first_bucket;   /* index of first bucket used */
-+	u16			bucket_size;	/* sectors */
-+	u16			group;
-+	u8			state;
-+	u8			replacement;
-+	u8			discard;
-+	u8			data_allowed;
-+	u8			durability;
-+	u8			valid;
-+};
-+
-+struct bch_disk_group_cpu {
-+	bool				deleted;
-+	u16				parent;
-+	struct bch_devs_mask		devs;
-+};
-+
-+struct bch_disk_groups_cpu {
-+	struct rcu_head			rcu;
-+	unsigned			nr;
-+	struct bch_disk_group_cpu	entries[];
-+};
-+
-+#endif /* _BCACHEFS_SUPER_TYPES_H */
-diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c
-new file mode 100644
-index 000000000000..c169d282a1f9
---- /dev/null
-+++ b/fs/bcachefs/sysfs.c
-@@ -0,0 +1,1091 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * bcache sysfs interfaces
-+ *
-+ * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
-+ * Copyright 2012 Google, Inc.
-+ */
-+
-+#ifndef NO_BCACHEFS_SYSFS
-+
-+#include "bcachefs.h"
-+#include "alloc_background.h"
-+#include "sysfs.h"
-+#include "btree_cache.h"
-+#include "btree_io.h"
-+#include "btree_iter.h"
-+#include "btree_key_cache.h"
-+#include "btree_update.h"
-+#include "btree_update_interior.h"
-+#include "btree_gc.h"
-+#include "buckets.h"
-+#include "clock.h"
-+#include "disk_groups.h"
-+#include "ec.h"
-+#include "inode.h"
-+#include "journal.h"
-+#include "keylist.h"
-+#include "move.h"
-+#include "opts.h"
-+#include "rebalance.h"
-+#include "replicas.h"
-+#include "super-io.h"
-+#include "tests.h"
-+
-+#include <linux/blkdev.h>
-+#include <linux/sort.h>
-+#include <linux/sched/clock.h>
-+
-+#include "util.h"
-+
-+#define SYSFS_OPS(type)							\
-+struct sysfs_ops type ## _sysfs_ops = {					\
-+	.show	= type ## _show,					\
-+	.store	= type ## _store					\
-+}
-+
-+#define SHOW(fn)							\
-+static ssize_t fn ## _show(struct kobject *kobj, struct attribute *attr,\
-+			   char *buf)					\
-+
-+#define STORE(fn)							\
-+static ssize_t fn ## _store(struct kobject *kobj, struct attribute *attr,\
-+			    const char *buf, size_t size)		\
-+
-+#define __sysfs_attribute(_name, _mode)					\
-+	static struct attribute sysfs_##_name =				\
-+		{ .name = #_name, .mode = _mode }
-+
-+#define write_attribute(n)	__sysfs_attribute(n, S_IWUSR)
-+#define read_attribute(n)	__sysfs_attribute(n, S_IRUGO)
-+#define rw_attribute(n)		__sysfs_attribute(n, S_IRUGO|S_IWUSR)
-+
-+#define sysfs_printf(file, fmt, ...)					\
-+do {									\
-+	if (attr == &sysfs_ ## file)					\
-+		return scnprintf(buf, PAGE_SIZE, fmt "\n", __VA_ARGS__);\
-+} while (0)
-+
-+#define sysfs_print(file, var)						\
-+do {									\
-+	if (attr == &sysfs_ ## file)					\
-+		return snprint(buf, PAGE_SIZE, var);			\
-+} while (0)
-+
-+#define sysfs_hprint(file, val)						\
-+do {									\
-+	if (attr == &sysfs_ ## file) {					\
-+		struct printbuf out = _PBUF(buf, PAGE_SIZE);		\
-+		bch2_hprint(&out, val);					\
-+		pr_buf(&out, "\n");					\
-+		return out.pos - buf;					\
-+	}								\
-+} while (0)
-+
-+#define var_printf(_var, fmt)	sysfs_printf(_var, fmt, var(_var))
-+#define var_print(_var)		sysfs_print(_var, var(_var))
-+#define var_hprint(_var)	sysfs_hprint(_var, var(_var))
-+
-+#define sysfs_strtoul(file, var)					\
-+do {									\
-+	if (attr == &sysfs_ ## file)					\
-+		return strtoul_safe(buf, var) ?: (ssize_t) size;	\
-+} while (0)
-+
-+#define sysfs_strtoul_clamp(file, var, min, max)			\
-+do {									\
-+	if (attr == &sysfs_ ## file)					\
-+		return strtoul_safe_clamp(buf, var, min, max)		\
-+			?: (ssize_t) size;				\
-+} while (0)
-+
-+#define strtoul_or_return(cp)						\
-+({									\
-+	unsigned long _v;						\
-+	int _r = kstrtoul(cp, 10, &_v);					\
-+	if (_r)								\
-+		return _r;						\
-+	_v;								\
-+})
-+
-+#define strtoul_restrict_or_return(cp, min, max)			\
-+({									\
-+	unsigned long __v = 0;						\
-+	int _r = strtoul_safe_restrict(cp, __v, min, max);		\
-+	if (_r)								\
-+		return _r;						\
-+	__v;								\
-+})
-+
-+#define strtoi_h_or_return(cp)						\
-+({									\
-+	u64 _v;								\
-+	int _r = strtoi_h(cp, &_v);					\
-+	if (_r)								\
-+		return _r;						\
-+	_v;								\
-+})
-+
-+#define sysfs_hatoi(file, var)						\
-+do {									\
-+	if (attr == &sysfs_ ## file)					\
-+		return strtoi_h(buf, &var) ?: (ssize_t) size;		\
-+} while (0)
-+
-+write_attribute(trigger_journal_flush);
-+write_attribute(trigger_btree_coalesce);
-+write_attribute(trigger_gc);
-+write_attribute(prune_cache);
-+rw_attribute(btree_gc_periodic);
-+
-+read_attribute(uuid);
-+read_attribute(minor);
-+read_attribute(bucket_size);
-+read_attribute(block_size);
-+read_attribute(btree_node_size);
-+read_attribute(first_bucket);
-+read_attribute(nbuckets);
-+read_attribute(durability);
-+read_attribute(iodone);
-+
-+read_attribute(io_latency_read);
-+read_attribute(io_latency_write);
-+read_attribute(io_latency_stats_read);
-+read_attribute(io_latency_stats_write);
-+read_attribute(congested);
-+
-+read_attribute(bucket_quantiles_last_read);
-+read_attribute(bucket_quantiles_last_write);
-+read_attribute(bucket_quantiles_fragmentation);
-+read_attribute(bucket_quantiles_oldest_gen);
-+
-+read_attribute(reserve_stats);
-+read_attribute(btree_cache_size);
-+read_attribute(compression_stats);
-+read_attribute(journal_debug);
-+read_attribute(journal_pins);
-+read_attribute(btree_updates);
-+read_attribute(dirty_btree_nodes);
-+read_attribute(btree_key_cache);
-+read_attribute(btree_transactions);
-+
-+read_attribute(internal_uuid);
-+
-+read_attribute(has_data);
-+read_attribute(alloc_debug);
-+write_attribute(wake_allocator);
-+
-+read_attribute(read_realloc_races);
-+read_attribute(extent_migrate_done);
-+read_attribute(extent_migrate_raced);
-+
-+rw_attribute(journal_write_delay_ms);
-+rw_attribute(journal_reclaim_delay_ms);
-+
-+rw_attribute(discard);
-+rw_attribute(cache_replacement_policy);
-+rw_attribute(label);
-+
-+rw_attribute(copy_gc_enabled);
-+sysfs_pd_controller_attribute(copy_gc);
-+
-+rw_attribute(rebalance_enabled);
-+sysfs_pd_controller_attribute(rebalance);
-+read_attribute(rebalance_work);
-+rw_attribute(promote_whole_extents);
-+
-+read_attribute(new_stripes);
-+
-+rw_attribute(pd_controllers_update_seconds);
-+
-+read_attribute(meta_replicas_have);
-+read_attribute(data_replicas_have);
-+
-+read_attribute(io_timers_read);
-+read_attribute(io_timers_write);
-+
-+#ifdef CONFIG_BCACHEFS_TESTS
-+write_attribute(perf_test);
-+#endif /* CONFIG_BCACHEFS_TESTS */
-+
-+#define BCH_DEBUG_PARAM(name, description)				\
-+	rw_attribute(name);
-+
-+	BCH_DEBUG_PARAMS()
-+#undef BCH_DEBUG_PARAM
-+
-+#define x(_name)						\
-+	static struct attribute sysfs_time_stat_##_name =		\
-+		{ .name = #_name, .mode = S_IRUGO };
-+	BCH_TIME_STATS()
-+#undef x
-+
-+static struct attribute sysfs_state_rw = {
-+	.name = "state",
-+	.mode = S_IRUGO
-+};
-+
-+static size_t bch2_btree_cache_size(struct bch_fs *c)
-+{
-+	size_t ret = 0;
-+	struct btree *b;
-+
-+	mutex_lock(&c->btree_cache.lock);
-+	list_for_each_entry(b, &c->btree_cache.live, list)
-+		ret += btree_bytes(c);
-+
-+	mutex_unlock(&c->btree_cache.lock);
-+	return ret;
-+}
-+
-+static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf)
-+{
-+	struct printbuf out = _PBUF(buf, PAGE_SIZE);
-+	struct bch_fs_usage *fs_usage = bch2_fs_usage_read(c);
-+
-+	if (!fs_usage)
-+		return -ENOMEM;
-+
-+	bch2_fs_usage_to_text(&out, c, fs_usage);
-+
-+	percpu_up_read(&c->mark_lock);
-+
-+	kfree(fs_usage);
-+
-+	return out.pos - buf;
-+}
-+
-+static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	u64 nr_uncompressed_extents = 0, uncompressed_sectors = 0,
-+	    nr_compressed_extents = 0,
-+	    compressed_sectors_compressed = 0,
-+	    compressed_sectors_uncompressed = 0;
-+	int ret;
-+
-+	if (!test_bit(BCH_FS_STARTED, &c->flags))
-+		return -EPERM;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS_MIN, 0, k, ret)
-+		if (k.k->type == KEY_TYPE_extent) {
-+			struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
-+			const union bch_extent_entry *entry;
-+			struct extent_ptr_decoded p;
-+
-+			extent_for_each_ptr_decode(e, p, entry) {
-+				if (!crc_is_compressed(p.crc)) {
-+					nr_uncompressed_extents++;
-+					uncompressed_sectors += e.k->size;
-+				} else {
-+					nr_compressed_extents++;
-+					compressed_sectors_compressed +=
-+						p.crc.compressed_size;
-+					compressed_sectors_uncompressed +=
-+						p.crc.uncompressed_size;
-+				}
-+
-+				/* only looking at the first ptr */
-+				break;
-+			}
-+		}
-+
-+	ret = bch2_trans_exit(&trans) ?: ret;
-+	if (ret)
-+		return ret;
-+
-+	return scnprintf(buf, PAGE_SIZE,
-+			"uncompressed data:\n"
-+			"	nr extents:			%llu\n"
-+			"	size (bytes):			%llu\n"
-+			"compressed data:\n"
-+			"	nr extents:			%llu\n"
-+			"	compressed size (bytes):	%llu\n"
-+			"	uncompressed size (bytes):	%llu\n",
-+			nr_uncompressed_extents,
-+			uncompressed_sectors << 9,
-+			nr_compressed_extents,
-+			compressed_sectors_compressed << 9,
-+			compressed_sectors_uncompressed << 9);
-+}
-+
-+static ssize_t bch2_new_stripes(struct bch_fs *c, char *buf)
-+{
-+	char *out = buf, *end = buf + PAGE_SIZE;
-+	struct ec_stripe_head *h;
-+	struct ec_stripe_new *s;
-+
-+	mutex_lock(&c->ec_new_stripe_lock);
-+	list_for_each_entry(h, &c->ec_new_stripe_list, list) {
-+		out += scnprintf(out, end - out,
-+				 "target %u algo %u redundancy %u:\n",
-+				 h->target, h->algo, h->redundancy);
-+
-+		if (h->s)
-+			out += scnprintf(out, end - out,
-+					 "\tpending: blocks %u allocated %u\n",
-+					 h->s->blocks.nr,
-+					 bitmap_weight(h->s->blocks_allocated,
-+						       h->s->blocks.nr));
-+
-+		mutex_lock(&h->lock);
-+		list_for_each_entry(s, &h->stripes, list)
-+			out += scnprintf(out, end - out,
-+					 "\tin flight: blocks %u allocated %u pin %u\n",
-+					 s->blocks.nr,
-+					 bitmap_weight(s->blocks_allocated,
-+						       s->blocks.nr),
-+					 atomic_read(&s->pin));
-+		mutex_unlock(&h->lock);
-+
-+	}
-+	mutex_unlock(&c->ec_new_stripe_lock);
-+
-+	return out - buf;
-+}
-+
-+SHOW(bch2_fs)
-+{
-+	struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
-+
-+	sysfs_print(minor,			c->minor);
-+	sysfs_printf(internal_uuid, "%pU",	c->sb.uuid.b);
-+
-+	sysfs_print(journal_write_delay_ms,	c->journal.write_delay_ms);
-+	sysfs_print(journal_reclaim_delay_ms,	c->journal.reclaim_delay_ms);
-+
-+	sysfs_print(block_size,			block_bytes(c));
-+	sysfs_print(btree_node_size,		btree_bytes(c));
-+	sysfs_hprint(btree_cache_size,		bch2_btree_cache_size(c));
-+
-+	sysfs_print(read_realloc_races,
-+		    atomic_long_read(&c->read_realloc_races));
-+	sysfs_print(extent_migrate_done,
-+		    atomic_long_read(&c->extent_migrate_done));
-+	sysfs_print(extent_migrate_raced,
-+		    atomic_long_read(&c->extent_migrate_raced));
-+
-+	sysfs_printf(btree_gc_periodic, "%u",	(int) c->btree_gc_periodic);
-+
-+	sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled);
-+
-+	sysfs_print(pd_controllers_update_seconds,
-+		    c->pd_controllers_update_seconds);
-+
-+	sysfs_printf(rebalance_enabled,		"%i", c->rebalance.enabled);
-+	sysfs_pd_controller_show(rebalance,	&c->rebalance.pd); /* XXX */
-+
-+	if (attr == &sysfs_rebalance_work)
-+		return bch2_rebalance_work_show(c, buf);
-+
-+	sysfs_print(promote_whole_extents,	c->promote_whole_extents);
-+
-+	sysfs_printf(meta_replicas_have, "%i",	bch2_replicas_online(c, true));
-+	sysfs_printf(data_replicas_have, "%i",	bch2_replicas_online(c, false));
-+
-+	/* Debugging: */
-+
-+	if (attr == &sysfs_alloc_debug)
-+		return show_fs_alloc_debug(c, buf);
-+
-+	if (attr == &sysfs_journal_debug)
-+		return bch2_journal_print_debug(&c->journal, buf);
-+
-+	if (attr == &sysfs_journal_pins)
-+		return bch2_journal_print_pins(&c->journal, buf);
-+
-+	if (attr == &sysfs_btree_updates)
-+		return bch2_btree_updates_print(c, buf);
-+
-+	if (attr == &sysfs_dirty_btree_nodes)
-+		return bch2_dirty_btree_nodes_print(c, buf);
-+
-+	if (attr == &sysfs_btree_key_cache) {
-+		struct printbuf out = _PBUF(buf, PAGE_SIZE);
-+
-+		bch2_btree_key_cache_to_text(&out, &c->btree_key_cache);
-+		return out.pos - buf;
-+	}
-+
-+	if (attr == &sysfs_btree_transactions) {
-+		struct printbuf out = _PBUF(buf, PAGE_SIZE);
-+
-+		bch2_btree_trans_to_text(&out, c);
-+		return out.pos - buf;
-+	}
-+
-+	if (attr == &sysfs_compression_stats)
-+		return bch2_compression_stats(c, buf);
-+
-+	if (attr == &sysfs_new_stripes)
-+		return bch2_new_stripes(c, buf);
-+
-+	if (attr == &sysfs_io_timers_read)
-+		return bch2_io_timers_show(&c->io_clock[READ], buf);
-+	if (attr == &sysfs_io_timers_write)
-+		return bch2_io_timers_show(&c->io_clock[WRITE], buf);
-+
-+#define BCH_DEBUG_PARAM(name, description) sysfs_print(name, c->name);
-+	BCH_DEBUG_PARAMS()
-+#undef BCH_DEBUG_PARAM
-+
-+	return 0;
-+}
-+
-+STORE(bch2_fs)
-+{
-+	struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
-+
-+	sysfs_strtoul(journal_write_delay_ms, c->journal.write_delay_ms);
-+	sysfs_strtoul(journal_reclaim_delay_ms, c->journal.reclaim_delay_ms);
-+
-+	if (attr == &sysfs_btree_gc_periodic) {
-+		ssize_t ret = strtoul_safe(buf, c->btree_gc_periodic)
-+			?: (ssize_t) size;
-+
-+		wake_up_process(c->gc_thread);
-+		return ret;
-+	}
-+
-+	if (attr == &sysfs_copy_gc_enabled) {
-+		struct bch_dev *ca;
-+		unsigned i;
-+		ssize_t ret = strtoul_safe(buf, c->copy_gc_enabled)
-+			?: (ssize_t) size;
-+
-+		for_each_member_device(ca, c, i)
-+			if (ca->copygc_thread)
-+				wake_up_process(ca->copygc_thread);
-+		return ret;
-+	}
-+
-+	if (attr == &sysfs_rebalance_enabled) {
-+		ssize_t ret = strtoul_safe(buf, c->rebalance.enabled)
-+			?: (ssize_t) size;
-+
-+		rebalance_wakeup(c);
-+		return ret;
-+	}
-+
-+	sysfs_strtoul(pd_controllers_update_seconds,
-+		      c->pd_controllers_update_seconds);
-+	sysfs_pd_controller_store(rebalance,	&c->rebalance.pd);
-+
-+	sysfs_strtoul(promote_whole_extents,	c->promote_whole_extents);
-+
-+	/* Debugging: */
-+
-+#define BCH_DEBUG_PARAM(name, description) sysfs_strtoul(name, c->name);
-+	BCH_DEBUG_PARAMS()
-+#undef BCH_DEBUG_PARAM
-+
-+	if (!test_bit(BCH_FS_STARTED, &c->flags))
-+		return -EPERM;
-+
-+	/* Debugging: */
-+
-+	if (attr == &sysfs_trigger_journal_flush)
-+		bch2_journal_meta_async(&c->journal, NULL);
-+
-+	if (attr == &sysfs_trigger_btree_coalesce)
-+		bch2_coalesce(c);
-+
-+	if (attr == &sysfs_trigger_gc) {
-+		/*
-+		 * Full gc is currently incompatible with btree key cache:
-+		 */
-+#if 0
-+		down_read(&c->state_lock);
-+		bch2_gc(c, NULL, false, false);
-+		up_read(&c->state_lock);
-+#else
-+		bch2_gc_gens(c);
-+#endif
-+	}
-+
-+	if (attr == &sysfs_prune_cache) {
-+		struct shrink_control sc;
-+
-+		sc.gfp_mask = GFP_KERNEL;
-+		sc.nr_to_scan = strtoul_or_return(buf);
-+		c->btree_cache.shrink.scan_objects(&c->btree_cache.shrink, &sc);
-+	}
-+
-+#ifdef CONFIG_BCACHEFS_TESTS
-+	if (attr == &sysfs_perf_test) {
-+		char *tmp = kstrdup(buf, GFP_KERNEL), *p = tmp;
-+		char *test		= strsep(&p, " \t\n");
-+		char *nr_str		= strsep(&p, " \t\n");
-+		char *threads_str	= strsep(&p, " \t\n");
-+		unsigned threads;
-+		u64 nr;
-+		int ret = -EINVAL;
-+
-+		if (threads_str &&
-+		    !(ret = kstrtouint(threads_str, 10, &threads)) &&
-+		    !(ret = bch2_strtoull_h(nr_str, &nr)))
-+			bch2_btree_perf_test(c, test, nr, threads);
-+		else
-+			size = ret;
-+		kfree(tmp);
-+	}
-+#endif
-+	return size;
-+}
-+SYSFS_OPS(bch2_fs);
-+
-+struct attribute *bch2_fs_files[] = {
-+	&sysfs_minor,
-+	&sysfs_block_size,
-+	&sysfs_btree_node_size,
-+	&sysfs_btree_cache_size,
-+
-+	&sysfs_meta_replicas_have,
-+	&sysfs_data_replicas_have,
-+
-+	&sysfs_journal_write_delay_ms,
-+	&sysfs_journal_reclaim_delay_ms,
-+
-+	&sysfs_promote_whole_extents,
-+
-+	&sysfs_compression_stats,
-+
-+#ifdef CONFIG_BCACHEFS_TESTS
-+	&sysfs_perf_test,
-+#endif
-+	NULL
-+};
-+
-+/* internal dir - just a wrapper */
-+
-+SHOW(bch2_fs_internal)
-+{
-+	struct bch_fs *c = container_of(kobj, struct bch_fs, internal);
-+	return bch2_fs_show(&c->kobj, attr, buf);
-+}
-+
-+STORE(bch2_fs_internal)
-+{
-+	struct bch_fs *c = container_of(kobj, struct bch_fs, internal);
-+	return bch2_fs_store(&c->kobj, attr, buf, size);
-+}
-+SYSFS_OPS(bch2_fs_internal);
-+
-+struct attribute *bch2_fs_internal_files[] = {
-+	&sysfs_alloc_debug,
-+	&sysfs_journal_debug,
-+	&sysfs_journal_pins,
-+	&sysfs_btree_updates,
-+	&sysfs_dirty_btree_nodes,
-+	&sysfs_btree_key_cache,
-+	&sysfs_btree_transactions,
-+
-+	&sysfs_read_realloc_races,
-+	&sysfs_extent_migrate_done,
-+	&sysfs_extent_migrate_raced,
-+
-+	&sysfs_trigger_journal_flush,
-+	&sysfs_trigger_btree_coalesce,
-+	&sysfs_trigger_gc,
-+	&sysfs_prune_cache,
-+
-+	&sysfs_copy_gc_enabled,
-+
-+	&sysfs_rebalance_enabled,
-+	&sysfs_rebalance_work,
-+	sysfs_pd_controller_files(rebalance),
-+
-+	&sysfs_new_stripes,
-+
-+	&sysfs_io_timers_read,
-+	&sysfs_io_timers_write,
-+
-+	&sysfs_internal_uuid,
-+
-+#define BCH_DEBUG_PARAM(name, description) &sysfs_##name,
-+	BCH_DEBUG_PARAMS()
-+#undef BCH_DEBUG_PARAM
-+
-+	NULL
-+};
-+
-+/* options */
-+
-+SHOW(bch2_fs_opts_dir)
-+{
-+	struct printbuf out = _PBUF(buf, PAGE_SIZE);
-+	struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
-+	const struct bch_option *opt = container_of(attr, struct bch_option, attr);
-+	int id = opt - bch2_opt_table;
-+	u64 v = bch2_opt_get_by_id(&c->opts, id);
-+
-+	bch2_opt_to_text(&out, c, opt, v, OPT_SHOW_FULL_LIST);
-+	pr_buf(&out, "\n");
-+
-+	return out.pos - buf;
-+}
-+
-+STORE(bch2_fs_opts_dir)
-+{
-+	struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
-+	const struct bch_option *opt = container_of(attr, struct bch_option, attr);
-+	int ret, id = opt - bch2_opt_table;
-+	char *tmp;
-+	u64 v;
-+
-+	tmp = kstrdup(buf, GFP_KERNEL);
-+	if (!tmp)
-+		return -ENOMEM;
-+
-+	ret = bch2_opt_parse(c, opt, strim(tmp), &v);
-+	kfree(tmp);
-+
-+	if (ret < 0)
-+		return ret;
-+
-+	ret = bch2_opt_check_may_set(c, id, v);
-+	if (ret < 0)
-+		return ret;
-+
-+	if (opt->set_sb != SET_NO_SB_OPT) {
-+		mutex_lock(&c->sb_lock);
-+		opt->set_sb(c->disk_sb.sb, v);
-+		bch2_write_super(c);
-+		mutex_unlock(&c->sb_lock);
-+	}
-+
-+	bch2_opt_set_by_id(&c->opts, id, v);
-+
-+	if ((id == Opt_background_target ||
-+	     id == Opt_background_compression) && v) {
-+		bch2_rebalance_add_work(c, S64_MAX);
-+		rebalance_wakeup(c);
-+	}
-+
-+	return size;
-+}
-+SYSFS_OPS(bch2_fs_opts_dir);
-+
-+struct attribute *bch2_fs_opts_dir_files[] = { NULL };
-+
-+int bch2_opts_create_sysfs_files(struct kobject *kobj)
-+{
-+	const struct bch_option *i;
-+	int ret;
-+
-+	for (i = bch2_opt_table;
-+	     i < bch2_opt_table + bch2_opts_nr;
-+	     i++) {
-+		if (!(i->mode & (OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME)))
-+			continue;
-+
-+		ret = sysfs_create_file(kobj, &i->attr);
-+		if (ret)
-+			return ret;
-+	}
-+
-+	return 0;
-+}
-+
-+/* time stats */
-+
-+SHOW(bch2_fs_time_stats)
-+{
-+	struct bch_fs *c = container_of(kobj, struct bch_fs, time_stats);
-+
-+#define x(name)						\
-+	if (attr == &sysfs_time_stat_##name)				\
-+		return bch2_time_stats_print(&c->times[BCH_TIME_##name],\
-+					     buf, PAGE_SIZE);
-+	BCH_TIME_STATS()
-+#undef x
-+
-+	return 0;
-+}
-+
-+STORE(bch2_fs_time_stats)
-+{
-+	return size;
-+}
-+SYSFS_OPS(bch2_fs_time_stats);
-+
-+struct attribute *bch2_fs_time_stats_files[] = {
-+#define x(name)						\
-+	&sysfs_time_stat_##name,
-+	BCH_TIME_STATS()
-+#undef x
-+	NULL
-+};
-+
-+typedef unsigned (bucket_map_fn)(struct bch_fs *, struct bch_dev *,
-+				 size_t, void *);
-+
-+static unsigned bucket_last_io_fn(struct bch_fs *c, struct bch_dev *ca,
-+				  size_t b, void *private)
-+{
-+	int rw = (private ? 1 : 0);
-+
-+	return bucket_last_io(c, bucket(ca, b), rw);
-+}
-+
-+static unsigned bucket_sectors_used_fn(struct bch_fs *c, struct bch_dev *ca,
-+				       size_t b, void *private)
-+{
-+	struct bucket *g = bucket(ca, b);
-+	return bucket_sectors_used(g->mark);
-+}
-+
-+static unsigned bucket_oldest_gen_fn(struct bch_fs *c, struct bch_dev *ca,
-+				     size_t b, void *private)
-+{
-+	return bucket_gc_gen(ca, b);
-+}
-+
-+static int unsigned_cmp(const void *_l, const void *_r)
-+{
-+	const unsigned *l = _l;
-+	const unsigned *r = _r;
-+
-+	return cmp_int(*l, *r);
-+}
-+
-+static ssize_t show_quantiles(struct bch_fs *c, struct bch_dev *ca,
-+			      char *buf, bucket_map_fn *fn, void *private)
-+{
-+	size_t i, n;
-+	/* Compute 31 quantiles */
-+	unsigned q[31], *p;
-+	ssize_t ret = 0;
-+
-+	down_read(&ca->bucket_lock);
-+	n = ca->mi.nbuckets;
-+
-+	p = vzalloc(n * sizeof(unsigned));
-+	if (!p) {
-+		up_read(&ca->bucket_lock);
-+		return -ENOMEM;
-+	}
-+
-+	for (i = ca->mi.first_bucket; i < n; i++)
-+		p[i] = fn(c, ca, i, private);
-+
-+	sort(p, n, sizeof(unsigned), unsigned_cmp, NULL);
-+	up_read(&ca->bucket_lock);
-+
-+	while (n &&
-+	       !p[n - 1])
-+		--n;
-+
-+	for (i = 0; i < ARRAY_SIZE(q); i++)
-+		q[i] = p[n * (i + 1) / (ARRAY_SIZE(q) + 1)];
-+
-+	vfree(p);
-+
-+	for (i = 0; i < ARRAY_SIZE(q); i++)
-+		ret += scnprintf(buf + ret, PAGE_SIZE - ret,
-+				 "%u ", q[i]);
-+	buf[ret - 1] = '\n';
-+
-+	return ret;
-+}
-+
-+static ssize_t show_reserve_stats(struct bch_dev *ca, char *buf)
-+{
-+	struct printbuf out = _PBUF(buf, PAGE_SIZE);
-+	enum alloc_reserve i;
-+
-+	spin_lock(&ca->fs->freelist_lock);
-+
-+	pr_buf(&out, "free_inc:\t%zu\t%zu\n",
-+	       fifo_used(&ca->free_inc),
-+	       ca->free_inc.size);
-+
-+	for (i = 0; i < RESERVE_NR; i++)
-+		pr_buf(&out, "free[%u]:\t%zu\t%zu\n", i,
-+		       fifo_used(&ca->free[i]),
-+		       ca->free[i].size);
-+
-+	spin_unlock(&ca->fs->freelist_lock);
-+
-+	return out.pos - buf;
-+}
-+
-+static ssize_t show_dev_alloc_debug(struct bch_dev *ca, char *buf)
-+{
-+	struct bch_fs *c = ca->fs;
-+	struct bch_dev_usage stats = bch2_dev_usage_read(c, ca);
-+	unsigned i, nr[BCH_DATA_NR];
-+
-+	memset(nr, 0, sizeof(nr));
-+
-+	for (i = 0; i < ARRAY_SIZE(c->open_buckets); i++)
-+		nr[c->open_buckets[i].type]++;
-+
-+	return scnprintf(buf, PAGE_SIZE,
-+		"free_inc:               %zu/%zu\n"
-+		"free[RESERVE_BTREE]:    %zu/%zu\n"
-+		"free[RESERVE_MOVINGGC]: %zu/%zu\n"
-+		"free[RESERVE_NONE]:     %zu/%zu\n"
-+		"buckets:\n"
-+		"    capacity:           %llu\n"
-+		"    alloc:              %llu\n"
-+		"    sb:                 %llu\n"
-+		"    journal:            %llu\n"
-+		"    meta:               %llu\n"
-+		"    user:               %llu\n"
-+		"    cached:             %llu\n"
-+		"    erasure coded:      %llu\n"
-+		"    available:          %lli\n"
-+		"sectors:\n"
-+		"    sb:                 %llu\n"
-+		"    journal:            %llu\n"
-+		"    meta:               %llu\n"
-+		"    user:               %llu\n"
-+		"    cached:             %llu\n"
-+		"    erasure coded:      %llu\n"
-+		"    fragmented:         %llu\n"
-+		"    copygc threshold:   %llu\n"
-+		"freelist_wait:          %s\n"
-+		"open buckets:           %u/%u (reserved %u)\n"
-+		"open_buckets_wait:      %s\n"
-+		"open_buckets_btree:     %u\n"
-+		"open_buckets_user:      %u\n"
-+		"btree reserve cache:    %u\n",
-+		fifo_used(&ca->free_inc),		ca->free_inc.size,
-+		fifo_used(&ca->free[RESERVE_BTREE]),	ca->free[RESERVE_BTREE].size,
-+		fifo_used(&ca->free[RESERVE_MOVINGGC]),	ca->free[RESERVE_MOVINGGC].size,
-+		fifo_used(&ca->free[RESERVE_NONE]),	ca->free[RESERVE_NONE].size,
-+		ca->mi.nbuckets - ca->mi.first_bucket,
-+		stats.buckets_alloc,
-+		stats.buckets[BCH_DATA_SB],
-+		stats.buckets[BCH_DATA_JOURNAL],
-+		stats.buckets[BCH_DATA_BTREE],
-+		stats.buckets[BCH_DATA_USER],
-+		stats.buckets[BCH_DATA_CACHED],
-+		stats.buckets_ec,
-+		ca->mi.nbuckets - ca->mi.first_bucket - stats.buckets_unavailable,
-+		stats.sectors[BCH_DATA_SB],
-+		stats.sectors[BCH_DATA_JOURNAL],
-+		stats.sectors[BCH_DATA_BTREE],
-+		stats.sectors[BCH_DATA_USER],
-+		stats.sectors[BCH_DATA_CACHED],
-+		stats.sectors_ec,
-+		stats.sectors_fragmented,
-+		ca->copygc_threshold,
-+		c->freelist_wait.list.first		? "waiting" : "empty",
-+		c->open_buckets_nr_free, OPEN_BUCKETS_COUNT,
-+		BTREE_NODE_OPEN_BUCKET_RESERVE,
-+		c->open_buckets_wait.list.first		? "waiting" : "empty",
-+		nr[BCH_DATA_BTREE],
-+		nr[BCH_DATA_USER],
-+		c->btree_reserve_cache_nr);
-+}
-+
-+static const char * const bch2_rw[] = {
-+	"read",
-+	"write",
-+	NULL
-+};
-+
-+static ssize_t show_dev_iodone(struct bch_dev *ca, char *buf)
-+{
-+	struct printbuf out = _PBUF(buf, PAGE_SIZE);
-+	int rw, i;
-+
-+	for (rw = 0; rw < 2; rw++) {
-+		pr_buf(&out, "%s:\n", bch2_rw[rw]);
-+
-+		for (i = 1; i < BCH_DATA_NR; i++)
-+			pr_buf(&out, "%-12s:%12llu\n",
-+			       bch2_data_types[i],
-+			       percpu_u64_get(&ca->io_done->sectors[rw][i]) << 9);
-+	}
-+
-+	return out.pos - buf;
-+}
-+
-+SHOW(bch2_dev)
-+{
-+	struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj);
-+	struct bch_fs *c = ca->fs;
-+	struct printbuf out = _PBUF(buf, PAGE_SIZE);
-+
-+	sysfs_printf(uuid,		"%pU\n", ca->uuid.b);
-+
-+	sysfs_print(bucket_size,	bucket_bytes(ca));
-+	sysfs_print(block_size,		block_bytes(c));
-+	sysfs_print(first_bucket,	ca->mi.first_bucket);
-+	sysfs_print(nbuckets,		ca->mi.nbuckets);
-+	sysfs_print(durability,		ca->mi.durability);
-+	sysfs_print(discard,		ca->mi.discard);
-+
-+	if (attr == &sysfs_label) {
-+		if (ca->mi.group) {
-+			mutex_lock(&c->sb_lock);
-+			bch2_disk_path_to_text(&out, &c->disk_sb,
-+					       ca->mi.group - 1);
-+			mutex_unlock(&c->sb_lock);
-+		}
-+
-+		pr_buf(&out, "\n");
-+		return out.pos - buf;
-+	}
-+
-+	if (attr == &sysfs_has_data) {
-+		bch2_flags_to_text(&out, bch2_data_types,
-+				   bch2_dev_has_data(c, ca));
-+		pr_buf(&out, "\n");
-+		return out.pos - buf;
-+	}
-+
-+	sysfs_pd_controller_show(copy_gc, &ca->copygc_pd);
-+
-+	if (attr == &sysfs_cache_replacement_policy) {
-+		bch2_string_opt_to_text(&out,
-+					bch2_cache_replacement_policies,
-+					ca->mi.replacement);
-+		pr_buf(&out, "\n");
-+		return out.pos - buf;
-+	}
-+
-+	if (attr == &sysfs_state_rw) {
-+		bch2_string_opt_to_text(&out, bch2_dev_state,
-+					ca->mi.state);
-+		pr_buf(&out, "\n");
-+		return out.pos - buf;
-+	}
-+
-+	if (attr == &sysfs_iodone)
-+		return show_dev_iodone(ca, buf);
-+
-+	sysfs_print(io_latency_read,		atomic64_read(&ca->cur_latency[READ]));
-+	sysfs_print(io_latency_write,		atomic64_read(&ca->cur_latency[WRITE]));
-+
-+	if (attr == &sysfs_io_latency_stats_read)
-+		return bch2_time_stats_print(&ca->io_latency[READ], buf, PAGE_SIZE);
-+	if (attr == &sysfs_io_latency_stats_write)
-+		return bch2_time_stats_print(&ca->io_latency[WRITE], buf, PAGE_SIZE);
-+
-+	sysfs_printf(congested,			"%u%%",
-+		     clamp(atomic_read(&ca->congested), 0, CONGESTED_MAX)
-+		     * 100 / CONGESTED_MAX);
-+
-+	if (attr == &sysfs_bucket_quantiles_last_read)
-+		return show_quantiles(c, ca, buf, bucket_last_io_fn, (void *) 0);
-+	if (attr == &sysfs_bucket_quantiles_last_write)
-+		return show_quantiles(c, ca, buf, bucket_last_io_fn, (void *) 1);
-+	if (attr == &sysfs_bucket_quantiles_fragmentation)
-+		return show_quantiles(c, ca, buf, bucket_sectors_used_fn, NULL);
-+	if (attr == &sysfs_bucket_quantiles_oldest_gen)
-+		return show_quantiles(c, ca, buf, bucket_oldest_gen_fn, NULL);
-+
-+	if (attr == &sysfs_reserve_stats)
-+		return show_reserve_stats(ca, buf);
-+	if (attr == &sysfs_alloc_debug)
-+		return show_dev_alloc_debug(ca, buf);
-+
-+	return 0;
-+}
-+
-+STORE(bch2_dev)
-+{
-+	struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj);
-+	struct bch_fs *c = ca->fs;
-+	struct bch_member *mi;
-+
-+	sysfs_pd_controller_store(copy_gc, &ca->copygc_pd);
-+
-+	if (attr == &sysfs_discard) {
-+		bool v = strtoul_or_return(buf);
-+
-+		mutex_lock(&c->sb_lock);
-+		mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
-+
-+		if (v != BCH_MEMBER_DISCARD(mi)) {
-+			SET_BCH_MEMBER_DISCARD(mi, v);
-+			bch2_write_super(c);
-+		}
-+		mutex_unlock(&c->sb_lock);
-+	}
-+
-+	if (attr == &sysfs_cache_replacement_policy) {
-+		ssize_t v = __sysfs_match_string(bch2_cache_replacement_policies, -1, buf);
-+
-+		if (v < 0)
-+			return v;
-+
-+		mutex_lock(&c->sb_lock);
-+		mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
-+
-+		if ((unsigned) v != BCH_MEMBER_REPLACEMENT(mi)) {
-+			SET_BCH_MEMBER_REPLACEMENT(mi, v);
-+			bch2_write_super(c);
-+		}
-+		mutex_unlock(&c->sb_lock);
-+	}
-+
-+	if (attr == &sysfs_label) {
-+		char *tmp;
-+		int ret;
-+
-+		tmp = kstrdup(buf, GFP_KERNEL);
-+		if (!tmp)
-+			return -ENOMEM;
-+
-+		ret = bch2_dev_group_set(c, ca, strim(tmp));
-+		kfree(tmp);
-+		if (ret)
-+			return ret;
-+	}
-+
-+	if (attr == &sysfs_wake_allocator)
-+		bch2_wake_allocator(ca);
-+
-+	return size;
-+}
-+SYSFS_OPS(bch2_dev);
-+
-+struct attribute *bch2_dev_files[] = {
-+	&sysfs_uuid,
-+	&sysfs_bucket_size,
-+	&sysfs_block_size,
-+	&sysfs_first_bucket,
-+	&sysfs_nbuckets,
-+	&sysfs_durability,
-+
-+	/* settings: */
-+	&sysfs_discard,
-+	&sysfs_cache_replacement_policy,
-+	&sysfs_state_rw,
-+	&sysfs_label,
-+
-+	&sysfs_has_data,
-+	&sysfs_iodone,
-+
-+	&sysfs_io_latency_read,
-+	&sysfs_io_latency_write,
-+	&sysfs_io_latency_stats_read,
-+	&sysfs_io_latency_stats_write,
-+	&sysfs_congested,
-+
-+	/* alloc info - other stats: */
-+	&sysfs_bucket_quantiles_last_read,
-+	&sysfs_bucket_quantiles_last_write,
-+	&sysfs_bucket_quantiles_fragmentation,
-+	&sysfs_bucket_quantiles_oldest_gen,
-+
-+	&sysfs_reserve_stats,
-+
-+	/* debug: */
-+	&sysfs_alloc_debug,
-+	&sysfs_wake_allocator,
-+
-+	sysfs_pd_controller_files(copy_gc),
-+	NULL
-+};
-+
-+#endif  /* _BCACHEFS_SYSFS_H_ */
-diff --git a/fs/bcachefs/sysfs.h b/fs/bcachefs/sysfs.h
-new file mode 100644
-index 000000000000..525fd05d91f7
---- /dev/null
-+++ b/fs/bcachefs/sysfs.h
-@@ -0,0 +1,44 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_SYSFS_H_
-+#define _BCACHEFS_SYSFS_H_
-+
-+#include <linux/sysfs.h>
-+
-+#ifndef NO_BCACHEFS_SYSFS
-+
-+struct attribute;
-+struct sysfs_ops;
-+
-+extern struct attribute *bch2_fs_files[];
-+extern struct attribute *bch2_fs_internal_files[];
-+extern struct attribute *bch2_fs_opts_dir_files[];
-+extern struct attribute *bch2_fs_time_stats_files[];
-+extern struct attribute *bch2_dev_files[];
-+
-+extern struct sysfs_ops bch2_fs_sysfs_ops;
-+extern struct sysfs_ops bch2_fs_internal_sysfs_ops;
-+extern struct sysfs_ops bch2_fs_opts_dir_sysfs_ops;
-+extern struct sysfs_ops bch2_fs_time_stats_sysfs_ops;
-+extern struct sysfs_ops bch2_dev_sysfs_ops;
-+
-+int bch2_opts_create_sysfs_files(struct kobject *);
-+
-+#else
-+
-+static struct attribute *bch2_fs_files[] = {};
-+static struct attribute *bch2_fs_internal_files[] = {};
-+static struct attribute *bch2_fs_opts_dir_files[] = {};
-+static struct attribute *bch2_fs_time_stats_files[] = {};
-+static struct attribute *bch2_dev_files[] = {};
-+
-+static const struct sysfs_ops bch2_fs_sysfs_ops;
-+static const struct sysfs_ops bch2_fs_internal_sysfs_ops;
-+static const struct sysfs_ops bch2_fs_opts_dir_sysfs_ops;
-+static const struct sysfs_ops bch2_fs_time_stats_sysfs_ops;
-+static const struct sysfs_ops bch2_dev_sysfs_ops;
-+
-+static inline int bch2_opts_create_sysfs_files(struct kobject *kobj) { return 0; }
-+
-+#endif /* NO_BCACHEFS_SYSFS */
-+
-+#endif  /* _BCACHEFS_SYSFS_H_ */
-diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c
-new file mode 100644
-index 000000000000..4dcace650416
---- /dev/null
-+++ b/fs/bcachefs/tests.c
-@@ -0,0 +1,725 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#ifdef CONFIG_BCACHEFS_TESTS
-+
-+#include "bcachefs.h"
-+#include "btree_update.h"
-+#include "journal_reclaim.h"
-+#include "tests.h"
-+
-+#include "linux/kthread.h"
-+#include "linux/random.h"
-+
-+static void delete_test_keys(struct bch_fs *c)
-+{
-+	int ret;
-+
-+	ret = bch2_btree_delete_range(c, BTREE_ID_EXTENTS,
-+				      POS(0, 0), POS(0, U64_MAX),
-+				      NULL);
-+	BUG_ON(ret);
-+
-+	ret = bch2_btree_delete_range(c, BTREE_ID_XATTRS,
-+				      POS(0, 0), POS(0, U64_MAX),
-+				      NULL);
-+	BUG_ON(ret);
-+}
-+
-+/* unit tests */
-+
-+static void test_delete(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_i_cookie k;
-+	int ret;
-+
-+	bkey_cookie_init(&k.k_i);
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS, k.k.p,
-+				   BTREE_ITER_INTENT);
-+
-+	ret = bch2_btree_iter_traverse(iter);
-+	BUG_ON(ret);
-+
-+	ret = __bch2_trans_do(&trans, NULL, NULL, 0,
-+		bch2_trans_update(&trans, iter, &k.k_i, 0));
-+	BUG_ON(ret);
-+
-+	pr_info("deleting once");
-+	ret = bch2_btree_delete_at(&trans, iter, 0);
-+	BUG_ON(ret);
-+
-+	pr_info("deleting twice");
-+	ret = bch2_btree_delete_at(&trans, iter, 0);
-+	BUG_ON(ret);
-+
-+	bch2_trans_exit(&trans);
-+}
-+
-+static void test_delete_written(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_i_cookie k;
-+	int ret;
-+
-+	bkey_cookie_init(&k.k_i);
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS, k.k.p,
-+				   BTREE_ITER_INTENT);
-+
-+	ret = bch2_btree_iter_traverse(iter);
-+	BUG_ON(ret);
-+
-+	ret = __bch2_trans_do(&trans, NULL, NULL, 0,
-+		bch2_trans_update(&trans, iter, &k.k_i, 0));
-+	BUG_ON(ret);
-+
-+	bch2_journal_flush_all_pins(&c->journal);
-+
-+	ret = bch2_btree_delete_at(&trans, iter, 0);
-+	BUG_ON(ret);
-+
-+	bch2_trans_exit(&trans);
-+}
-+
-+static void test_iterate(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	u64 i;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	delete_test_keys(c);
-+
-+	pr_info("inserting test keys");
-+
-+	for (i = 0; i < nr; i++) {
-+		struct bkey_i_cookie k;
-+
-+		bkey_cookie_init(&k.k_i);
-+		k.k.p.offset = i;
-+
-+		ret = bch2_btree_insert(c, BTREE_ID_XATTRS, &k.k_i,
-+					NULL, NULL, 0);
-+		BUG_ON(ret);
-+	}
-+
-+	pr_info("iterating forwards");
-+
-+	i = 0;
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_XATTRS,
-+			   POS_MIN, 0, k, ret) {
-+		if (k.k->p.inode)
-+			break;
-+
-+		BUG_ON(k.k->p.offset != i++);
-+	}
-+
-+	BUG_ON(i != nr);
-+
-+	pr_info("iterating backwards");
-+
-+	while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(iter)).k))
-+		BUG_ON(k.k->p.offset != --i);
-+
-+	BUG_ON(i);
-+
-+	bch2_trans_exit(&trans);
-+}
-+
-+static void test_iterate_extents(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	u64 i;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	delete_test_keys(c);
-+
-+	pr_info("inserting test extents");
-+
-+	for (i = 0; i < nr; i += 8) {
-+		struct bkey_i_cookie k;
-+
-+		bkey_cookie_init(&k.k_i);
-+		k.k.p.offset = i + 8;
-+		k.k.size = 8;
-+
-+		ret = bch2_btree_insert(c, BTREE_ID_EXTENTS, &k.k_i,
-+					NULL, NULL, 0);
-+		BUG_ON(ret);
-+	}
-+
-+	pr_info("iterating forwards");
-+
-+	i = 0;
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
-+			   POS_MIN, 0, k, ret) {
-+		BUG_ON(bkey_start_offset(k.k) != i);
-+		i = k.k->p.offset;
-+	}
-+
-+	BUG_ON(i != nr);
-+
-+	pr_info("iterating backwards");
-+
-+	while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(iter)).k)) {
-+		BUG_ON(k.k->p.offset != i);
-+		i = bkey_start_offset(k.k);
-+	}
-+
-+	BUG_ON(i);
-+
-+	bch2_trans_exit(&trans);
-+}
-+
-+static void test_iterate_slots(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	u64 i;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	delete_test_keys(c);
-+
-+	pr_info("inserting test keys");
-+
-+	for (i = 0; i < nr; i++) {
-+		struct bkey_i_cookie k;
-+
-+		bkey_cookie_init(&k.k_i);
-+		k.k.p.offset = i * 2;
-+
-+		ret = bch2_btree_insert(c, BTREE_ID_XATTRS, &k.k_i,
-+					NULL, NULL, 0);
-+		BUG_ON(ret);
-+	}
-+
-+	pr_info("iterating forwards");
-+
-+	i = 0;
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_XATTRS, POS_MIN,
-+			   0, k, ret) {
-+		if (k.k->p.inode)
-+			break;
-+
-+		BUG_ON(k.k->p.offset != i);
-+		i += 2;
-+	}
-+	bch2_trans_iter_free(&trans, iter);
-+
-+	BUG_ON(i != nr * 2);
-+
-+	pr_info("iterating forwards by slots");
-+
-+	i = 0;
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_XATTRS, POS_MIN,
-+			   BTREE_ITER_SLOTS, k, ret) {
-+		BUG_ON(k.k->p.offset != i);
-+		BUG_ON(bkey_deleted(k.k) != (i & 1));
-+
-+		i++;
-+		if (i == nr * 2)
-+			break;
-+	}
-+
-+	bch2_trans_exit(&trans);
-+}
-+
-+static void test_iterate_slots_extents(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	u64 i;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	delete_test_keys(c);
-+
-+	pr_info("inserting test keys");
-+
-+	for (i = 0; i < nr; i += 16) {
-+		struct bkey_i_cookie k;
-+
-+		bkey_cookie_init(&k.k_i);
-+		k.k.p.offset = i + 16;
-+		k.k.size = 8;
-+
-+		ret = bch2_btree_insert(c, BTREE_ID_EXTENTS, &k.k_i,
-+					NULL, NULL, 0);
-+		BUG_ON(ret);
-+	}
-+
-+	pr_info("iterating forwards");
-+
-+	i = 0;
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS_MIN,
-+			   0, k, ret) {
-+		BUG_ON(bkey_start_offset(k.k) != i + 8);
-+		BUG_ON(k.k->size != 8);
-+		i += 16;
-+	}
-+	bch2_trans_iter_free(&trans, iter);
-+
-+	BUG_ON(i != nr);
-+
-+	pr_info("iterating forwards by slots");
-+
-+	i = 0;
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS_MIN,
-+			   BTREE_ITER_SLOTS, k, ret) {
-+		BUG_ON(bkey_deleted(k.k) != !(i % 16));
-+
-+		BUG_ON(bkey_start_offset(k.k) != i);
-+		BUG_ON(k.k->size != 8);
-+		i = k.k->p.offset;
-+
-+		if (i == nr)
-+			break;
-+	}
-+
-+	bch2_trans_exit(&trans);
-+}
-+
-+/*
-+ * XXX: we really want to make sure we've got a btree with depth > 0 for these
-+ * tests
-+ */
-+static void test_peek_end(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS, POS_MIN, 0);
-+
-+	k = bch2_btree_iter_peek(iter);
-+	BUG_ON(k.k);
-+
-+	k = bch2_btree_iter_peek(iter);
-+	BUG_ON(k.k);
-+
-+	bch2_trans_exit(&trans);
-+}
-+
-+static void test_peek_end_extents(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, POS_MIN, 0);
-+
-+	k = bch2_btree_iter_peek(iter);
-+	BUG_ON(k.k);
-+
-+	k = bch2_btree_iter_peek(iter);
-+	BUG_ON(k.k);
-+
-+	bch2_trans_exit(&trans);
-+}
-+
-+/* extent unit tests */
-+
-+u64 test_version;
-+
-+static void insert_test_extent(struct bch_fs *c,
-+			       u64 start, u64 end)
-+{
-+	struct bkey_i_cookie k;
-+	int ret;
-+
-+	//pr_info("inserting %llu-%llu v %llu", start, end, test_version);
-+
-+	bkey_cookie_init(&k.k_i);
-+	k.k_i.k.p.offset = end;
-+	k.k_i.k.size = end - start;
-+	k.k_i.k.version.lo = test_version++;
-+
-+	ret = bch2_btree_insert(c, BTREE_ID_EXTENTS, &k.k_i,
-+				NULL, NULL, 0);
-+	BUG_ON(ret);
-+}
-+
-+static void __test_extent_overwrite(struct bch_fs *c,
-+				    u64 e1_start, u64 e1_end,
-+				    u64 e2_start, u64 e2_end)
-+{
-+	insert_test_extent(c, e1_start, e1_end);
-+	insert_test_extent(c, e2_start, e2_end);
-+
-+	delete_test_keys(c);
-+}
-+
-+static void test_extent_overwrite_front(struct bch_fs *c, u64 nr)
-+{
-+	__test_extent_overwrite(c, 0, 64, 0, 32);
-+	__test_extent_overwrite(c, 8, 64, 0, 32);
-+}
-+
-+static void test_extent_overwrite_back(struct bch_fs *c, u64 nr)
-+{
-+	__test_extent_overwrite(c, 0, 64, 32, 64);
-+	__test_extent_overwrite(c, 0, 64, 32, 72);
-+}
-+
-+static void test_extent_overwrite_middle(struct bch_fs *c, u64 nr)
-+{
-+	__test_extent_overwrite(c, 0, 64, 32, 40);
-+}
-+
-+static void test_extent_overwrite_all(struct bch_fs *c, u64 nr)
-+{
-+	__test_extent_overwrite(c, 32, 64,  0,  64);
-+	__test_extent_overwrite(c, 32, 64,  0, 128);
-+	__test_extent_overwrite(c, 32, 64, 32,  64);
-+	__test_extent_overwrite(c, 32, 64, 32, 128);
-+}
-+
-+/* perf tests */
-+
-+static u64 test_rand(void)
-+{
-+	u64 v;
-+#if 0
-+	v = prandom_u32();
-+#else
-+	prandom_bytes(&v, sizeof(v));
-+#endif
-+	return v;
-+}
-+
-+static void rand_insert(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	struct bkey_i_cookie k;
-+	int ret;
-+	u64 i;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for (i = 0; i < nr; i++) {
-+		bkey_cookie_init(&k.k_i);
-+		k.k.p.offset = test_rand();
-+
-+		ret = __bch2_trans_do(&trans, NULL, NULL, 0,
-+			__bch2_btree_insert(&trans, BTREE_ID_XATTRS, &k.k_i));
-+
-+		BUG_ON(ret);
-+	}
-+
-+	bch2_trans_exit(&trans);
-+}
-+
-+static void rand_lookup(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	u64 i;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for (i = 0; i < nr; i++) {
-+		iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS,
-+					   POS(0, test_rand()), 0);
-+
-+		k = bch2_btree_iter_peek(iter);
-+		bch2_trans_iter_free(&trans, iter);
-+	}
-+
-+	bch2_trans_exit(&trans);
-+}
-+
-+static void rand_mixed(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int ret;
-+	u64 i;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for (i = 0; i < nr; i++) {
-+		iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS,
-+					   POS(0, test_rand()), 0);
-+
-+		k = bch2_btree_iter_peek(iter);
-+
-+		if (!(i & 3) && k.k) {
-+			struct bkey_i_cookie k;
-+
-+			bkey_cookie_init(&k.k_i);
-+			k.k.p = iter->pos;
-+
-+			ret = __bch2_trans_do(&trans, NULL, NULL, 0,
-+				bch2_trans_update(&trans, iter, &k.k_i, 0));
-+
-+			BUG_ON(ret);
-+		}
-+
-+		bch2_trans_iter_free(&trans, iter);
-+	}
-+
-+	bch2_trans_exit(&trans);
-+}
-+
-+static int __do_delete(struct btree_trans *trans, struct bpos pos)
-+{
-+	struct btree_iter *iter;
-+	struct bkey_i delete;
-+	struct bkey_s_c k;
-+	int ret = 0;
-+
-+	iter = bch2_trans_get_iter(trans, BTREE_ID_XATTRS, pos,
-+				   BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(iter);
-+	if (ret)
-+		goto err;
-+
-+	k = bch2_btree_iter_peek(iter);
-+	ret = bkey_err(k);
-+	if (ret)
-+		goto err;
-+
-+	bkey_init(&delete.k);
-+	delete.k.p = k.k->p;
-+
-+	bch2_trans_update(trans, iter, &delete, 0);
-+err:
-+	bch2_trans_iter_put(trans, iter);
-+	return ret;
-+}
-+
-+static void rand_delete(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	int ret;
-+	u64 i;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for (i = 0; i < nr; i++) {
-+		struct bpos pos = POS(0, test_rand());
-+
-+		ret = __bch2_trans_do(&trans, NULL, NULL, 0,
-+			__do_delete(&trans, pos));
-+		BUG_ON(ret);
-+	}
-+
-+	bch2_trans_exit(&trans);
-+}
-+
-+static void seq_insert(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct bkey_i_cookie insert;
-+	int ret;
-+	u64 i = 0;
-+
-+	bkey_cookie_init(&insert.k_i);
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_XATTRS, POS_MIN,
-+			   BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
-+		insert.k.p = iter->pos;
-+
-+		ret = __bch2_trans_do(&trans, NULL, NULL, 0,
-+			bch2_trans_update(&trans, iter, &insert.k_i, 0));
-+
-+		BUG_ON(ret);
-+
-+		if (++i == nr)
-+			break;
-+	}
-+	bch2_trans_exit(&trans);
-+}
-+
-+static void seq_lookup(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_XATTRS, POS_MIN, 0, k, ret)
-+		;
-+	bch2_trans_exit(&trans);
-+}
-+
-+static void seq_overwrite(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_XATTRS, POS_MIN,
-+			   BTREE_ITER_INTENT, k, ret) {
-+		struct bkey_i_cookie u;
-+
-+		bkey_reassemble(&u.k_i, k);
-+
-+		ret = __bch2_trans_do(&trans, NULL, NULL, 0,
-+			bch2_trans_update(&trans, iter, &u.k_i, 0));
-+
-+		BUG_ON(ret);
-+	}
-+	bch2_trans_exit(&trans);
-+}
-+
-+static void seq_delete(struct bch_fs *c, u64 nr)
-+{
-+	int ret;
-+
-+	ret = bch2_btree_delete_range(c, BTREE_ID_XATTRS,
-+				      POS(0, 0), POS(0, U64_MAX),
-+				      NULL);
-+	BUG_ON(ret);
-+}
-+
-+typedef void (*perf_test_fn)(struct bch_fs *, u64);
-+
-+struct test_job {
-+	struct bch_fs			*c;
-+	u64				nr;
-+	unsigned			nr_threads;
-+	perf_test_fn			fn;
-+
-+	atomic_t			ready;
-+	wait_queue_head_t		ready_wait;
-+
-+	atomic_t			done;
-+	struct completion		done_completion;
-+
-+	u64				start;
-+	u64				finish;
-+};
-+
-+static int btree_perf_test_thread(void *data)
-+{
-+	struct test_job *j = data;
-+
-+	if (atomic_dec_and_test(&j->ready)) {
-+		wake_up(&j->ready_wait);
-+		j->start = sched_clock();
-+	} else {
-+		wait_event(j->ready_wait, !atomic_read(&j->ready));
-+	}
-+
-+	j->fn(j->c, j->nr / j->nr_threads);
-+
-+	if (atomic_dec_and_test(&j->done)) {
-+		j->finish = sched_clock();
-+		complete(&j->done_completion);
-+	}
-+
-+	return 0;
-+}
-+
-+void bch2_btree_perf_test(struct bch_fs *c, const char *testname,
-+			  u64 nr, unsigned nr_threads)
-+{
-+	struct test_job j = { .c = c, .nr = nr, .nr_threads = nr_threads };
-+	char name_buf[20], nr_buf[20], per_sec_buf[20];
-+	unsigned i;
-+	u64 time;
-+
-+	atomic_set(&j.ready, nr_threads);
-+	init_waitqueue_head(&j.ready_wait);
-+
-+	atomic_set(&j.done, nr_threads);
-+	init_completion(&j.done_completion);
-+
-+#define perf_test(_test)				\
-+	if (!strcmp(testname, #_test)) j.fn = _test
-+
-+	perf_test(rand_insert);
-+	perf_test(rand_lookup);
-+	perf_test(rand_mixed);
-+	perf_test(rand_delete);
-+
-+	perf_test(seq_insert);
-+	perf_test(seq_lookup);
-+	perf_test(seq_overwrite);
-+	perf_test(seq_delete);
-+
-+	/* a unit test, not a perf test: */
-+	perf_test(test_delete);
-+	perf_test(test_delete_written);
-+	perf_test(test_iterate);
-+	perf_test(test_iterate_extents);
-+	perf_test(test_iterate_slots);
-+	perf_test(test_iterate_slots_extents);
-+	perf_test(test_peek_end);
-+	perf_test(test_peek_end_extents);
-+
-+	perf_test(test_extent_overwrite_front);
-+	perf_test(test_extent_overwrite_back);
-+	perf_test(test_extent_overwrite_middle);
-+	perf_test(test_extent_overwrite_all);
-+
-+	if (!j.fn) {
-+		pr_err("unknown test %s", testname);
-+		return;
-+	}
-+
-+	//pr_info("running test %s:", testname);
-+
-+	if (nr_threads == 1)
-+		btree_perf_test_thread(&j);
-+	else
-+		for (i = 0; i < nr_threads; i++)
-+			kthread_run(btree_perf_test_thread, &j,
-+				    "bcachefs perf test[%u]", i);
-+
-+	while (wait_for_completion_interruptible(&j.done_completion))
-+		;
-+
-+	time = j.finish - j.start;
-+
-+	scnprintf(name_buf, sizeof(name_buf), "%s:", testname);
-+	bch2_hprint(&PBUF(nr_buf), nr);
-+	bch2_hprint(&PBUF(per_sec_buf), nr * NSEC_PER_SEC / time);
-+	printk(KERN_INFO "%-12s %s with %u threads in %5llu sec, %5llu nsec per iter, %5s per sec\n",
-+		name_buf, nr_buf, nr_threads,
-+		time / NSEC_PER_SEC,
-+		time * nr_threads / nr,
-+		per_sec_buf);
-+}
-+
-+#endif /* CONFIG_BCACHEFS_TESTS */
-diff --git a/fs/bcachefs/tests.h b/fs/bcachefs/tests.h
-new file mode 100644
-index 000000000000..551d0764225e
---- /dev/null
-+++ b/fs/bcachefs/tests.h
-@@ -0,0 +1,15 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_TEST_H
-+#define _BCACHEFS_TEST_H
-+
-+struct bch_fs;
-+
-+#ifdef CONFIG_BCACHEFS_TESTS
-+
-+void bch2_btree_perf_test(struct bch_fs *, const char *, u64, unsigned);
-+
-+#else
-+
-+#endif /* CONFIG_BCACHEFS_TESTS */
-+
-+#endif /* _BCACHEFS_TEST_H */
-diff --git a/fs/bcachefs/trace.c b/fs/bcachefs/trace.c
-new file mode 100644
-index 000000000000..59e8dfa3d245
---- /dev/null
-+++ b/fs/bcachefs/trace.c
-@@ -0,0 +1,12 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "alloc_types.h"
-+#include "buckets.h"
-+#include "btree_types.h"
-+#include "keylist.h"
-+
-+#include <linux/blktrace_api.h>
-+#include "keylist.h"
-+
-+#define CREATE_TRACE_POINTS
-+#include <trace/events/bcachefs.h>
-diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c
-new file mode 100644
-index 000000000000..e69d03d1109f
---- /dev/null
-+++ b/fs/bcachefs/util.c
-@@ -0,0 +1,910 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * random utiility code, for bcache but in theory not specific to bcache
-+ *
-+ * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
-+ * Copyright 2012 Google, Inc.
-+ */
-+
-+#include <linux/bio.h>
-+#include <linux/blkdev.h>
-+#include <linux/ctype.h>
-+#include <linux/debugfs.h>
-+#include <linux/freezer.h>
-+#include <linux/kthread.h>
-+#include <linux/log2.h>
-+#include <linux/math64.h>
-+#include <linux/percpu.h>
-+#include <linux/preempt.h>
-+#include <linux/random.h>
-+#include <linux/seq_file.h>
-+#include <linux/string.h>
-+#include <linux/types.h>
-+#include <linux/sched/clock.h>
-+
-+#include "eytzinger.h"
-+#include "util.h"
-+
-+static const char si_units[] = "?kMGTPEZY";
-+
-+static int __bch2_strtoh(const char *cp, u64 *res,
-+			 u64 t_max, bool t_signed)
-+{
-+	bool positive = *cp != '-';
-+	unsigned u;
-+	u64 v = 0;
-+
-+	if (*cp == '+' || *cp == '-')
-+		cp++;
-+
-+	if (!isdigit(*cp))
-+		return -EINVAL;
-+
-+	do {
-+		if (v > U64_MAX / 10)
-+			return -ERANGE;
-+		v *= 10;
-+		if (v > U64_MAX - (*cp - '0'))
-+			return -ERANGE;
-+		v += *cp - '0';
-+		cp++;
-+	} while (isdigit(*cp));
-+
-+	for (u = 1; u < strlen(si_units); u++)
-+		if (*cp == si_units[u]) {
-+			cp++;
-+			goto got_unit;
-+		}
-+	u = 0;
-+got_unit:
-+	if (*cp == '\n')
-+		cp++;
-+	if (*cp)
-+		return -EINVAL;
-+
-+	if (fls64(v) + u * 10 > 64)
-+		return -ERANGE;
-+
-+	v <<= u * 10;
-+
-+	if (positive) {
-+		if (v > t_max)
-+			return -ERANGE;
-+	} else {
-+		if (v && !t_signed)
-+			return -ERANGE;
-+
-+		if (v > t_max + 1)
-+			return -ERANGE;
-+		v = -v;
-+	}
-+
-+	*res = v;
-+	return 0;
-+}
-+
-+#define STRTO_H(name, type)					\
-+int bch2_ ## name ## _h(const char *cp, type *res)		\
-+{								\
-+	u64 v;							\
-+	int ret = __bch2_strtoh(cp, &v, ANYSINT_MAX(type),	\
-+			ANYSINT_MAX(type) != ((type) ~0ULL));	\
-+	*res = v;						\
-+	return ret;						\
-+}
-+
-+STRTO_H(strtoint, int)
-+STRTO_H(strtouint, unsigned int)
-+STRTO_H(strtoll, long long)
-+STRTO_H(strtoull, unsigned long long)
-+STRTO_H(strtou64, u64)
-+
-+void bch2_hprint(struct printbuf *buf, s64 v)
-+{
-+	int u, t = 0;
-+
-+	for (u = 0; v >= 1024 || v <= -1024; u++) {
-+		t = v & ~(~0U << 10);
-+		v >>= 10;
-+	}
-+
-+	pr_buf(buf, "%lli", v);
-+
-+	/*
-+	 * 103 is magic: t is in the range [-1023, 1023] and we want
-+	 * to turn it into [-9, 9]
-+	 */
-+	if (u && v < 100 && v > -100)
-+		pr_buf(buf, ".%i", t / 103);
-+	if (u)
-+		pr_buf(buf, "%c", si_units[u]);
-+}
-+
-+void bch2_string_opt_to_text(struct printbuf *out,
-+			     const char * const list[],
-+			     size_t selected)
-+{
-+	size_t i;
-+
-+	for (i = 0; list[i]; i++)
-+		pr_buf(out, i == selected ? "[%s] " : "%s ", list[i]);
-+}
-+
-+void bch2_flags_to_text(struct printbuf *out,
-+			const char * const list[], u64 flags)
-+{
-+	unsigned bit, nr = 0;
-+	bool first = true;
-+
-+	if (out->pos != out->end)
-+		*out->pos = '\0';
-+
-+	while (list[nr])
-+		nr++;
-+
-+	while (flags && (bit = __ffs(flags)) < nr) {
-+		if (!first)
-+			pr_buf(out, ",");
-+		first = false;
-+		pr_buf(out, "%s", list[bit]);
-+		flags ^= 1 << bit;
-+	}
-+}
-+
-+u64 bch2_read_flag_list(char *opt, const char * const list[])
-+{
-+	u64 ret = 0;
-+	char *p, *s, *d = kstrndup(opt, PAGE_SIZE - 1, GFP_KERNEL);
-+
-+	if (!d)
-+		return -ENOMEM;
-+
-+	s = strim(d);
-+
-+	while ((p = strsep(&s, ","))) {
-+		int flag = match_string(list, -1, p);
-+		if (flag < 0) {
-+			ret = -1;
-+			break;
-+		}
-+
-+		ret |= 1 << flag;
-+	}
-+
-+	kfree(d);
-+
-+	return ret;
-+}
-+
-+bool bch2_is_zero(const void *_p, size_t n)
-+{
-+	const char *p = _p;
-+	size_t i;
-+
-+	for (i = 0; i < n; i++)
-+		if (p[i])
-+			return false;
-+	return true;
-+}
-+
-+static void bch2_quantiles_update(struct quantiles *q, u64 v)
-+{
-+	unsigned i = 0;
-+
-+	while (i < ARRAY_SIZE(q->entries)) {
-+		struct quantile_entry *e = q->entries + i;
-+
-+		if (unlikely(!e->step)) {
-+			e->m = v;
-+			e->step = max_t(unsigned, v / 2, 1024);
-+		} else if (e->m > v) {
-+			e->m = e->m >= e->step
-+				? e->m - e->step
-+				: 0;
-+		} else if (e->m < v) {
-+			e->m = e->m + e->step > e->m
-+				? e->m + e->step
-+				: U32_MAX;
-+		}
-+
-+		if ((e->m > v ? e->m - v : v - e->m) < e->step)
-+			e->step = max_t(unsigned, e->step / 2, 1);
-+
-+		if (v >= e->m)
-+			break;
-+
-+		i = eytzinger0_child(i, v > e->m);
-+	}
-+}
-+
-+/* time stats: */
-+
-+static void bch2_time_stats_update_one(struct time_stats *stats,
-+				       u64 start, u64 end)
-+{
-+	u64 duration, freq;
-+
-+	duration	= time_after64(end, start)
-+		? end - start : 0;
-+	freq		= time_after64(end, stats->last_event)
-+		? end - stats->last_event : 0;
-+
-+	stats->count++;
-+
-+	stats->average_duration = stats->average_duration
-+		? ewma_add(stats->average_duration, duration, 6)
-+		: duration;
-+
-+	stats->average_frequency = stats->average_frequency
-+		? ewma_add(stats->average_frequency, freq, 6)
-+		: freq;
-+
-+	stats->max_duration = max(stats->max_duration, duration);
-+
-+	stats->last_event = end;
-+
-+	bch2_quantiles_update(&stats->quantiles, duration);
-+}
-+
-+void __bch2_time_stats_update(struct time_stats *stats, u64 start, u64 end)
-+{
-+	unsigned long flags;
-+
-+	if (!stats->buffer) {
-+		spin_lock_irqsave(&stats->lock, flags);
-+		bch2_time_stats_update_one(stats, start, end);
-+
-+		if (stats->average_frequency < 32 &&
-+		    stats->count > 1024)
-+			stats->buffer =
-+				alloc_percpu_gfp(struct time_stat_buffer,
-+						 GFP_ATOMIC);
-+		spin_unlock_irqrestore(&stats->lock, flags);
-+	} else {
-+		struct time_stat_buffer_entry *i;
-+		struct time_stat_buffer *b;
-+
-+		preempt_disable();
-+		b = this_cpu_ptr(stats->buffer);
-+
-+		BUG_ON(b->nr >= ARRAY_SIZE(b->entries));
-+		b->entries[b->nr++] = (struct time_stat_buffer_entry) {
-+			.start = start,
-+			.end = end
-+		};
-+
-+		if (b->nr == ARRAY_SIZE(b->entries)) {
-+			spin_lock_irqsave(&stats->lock, flags);
-+			for (i = b->entries;
-+			     i < b->entries + ARRAY_SIZE(b->entries);
-+			     i++)
-+				bch2_time_stats_update_one(stats, i->start, i->end);
-+			spin_unlock_irqrestore(&stats->lock, flags);
-+
-+			b->nr = 0;
-+		}
-+
-+		preempt_enable();
-+	}
-+}
-+
-+static const struct time_unit {
-+	const char	*name;
-+	u32		nsecs;
-+} time_units[] = {
-+	{ "ns",		1		},
-+	{ "us",		NSEC_PER_USEC	},
-+	{ "ms",		NSEC_PER_MSEC	},
-+	{ "sec",	NSEC_PER_SEC	},
-+};
-+
-+static const struct time_unit *pick_time_units(u64 ns)
-+{
-+	const struct time_unit *u;
-+
-+	for (u = time_units;
-+	     u + 1 < time_units + ARRAY_SIZE(time_units) &&
-+	     ns >= u[1].nsecs << 1;
-+	     u++)
-+		;
-+
-+	return u;
-+}
-+
-+static void pr_time_units(struct printbuf *out, u64 ns)
-+{
-+	const struct time_unit *u = pick_time_units(ns);
-+
-+	pr_buf(out, "%llu %s", div_u64(ns, u->nsecs), u->name);
-+}
-+
-+size_t bch2_time_stats_print(struct time_stats *stats, char *buf, size_t len)
-+{
-+	struct printbuf out = _PBUF(buf, len);
-+	const struct time_unit *u;
-+	u64 freq = READ_ONCE(stats->average_frequency);
-+	u64 q, last_q = 0;
-+	int i;
-+
-+	pr_buf(&out, "count:\t\t%llu\n",
-+			 stats->count);
-+	pr_buf(&out, "rate:\t\t%llu/sec\n",
-+	       freq ?  div64_u64(NSEC_PER_SEC, freq) : 0);
-+
-+	pr_buf(&out, "frequency:\t");
-+	pr_time_units(&out, freq);
-+
-+	pr_buf(&out, "\navg duration:\t");
-+	pr_time_units(&out, stats->average_duration);
-+
-+	pr_buf(&out, "\nmax duration:\t");
-+	pr_time_units(&out, stats->max_duration);
-+
-+	i = eytzinger0_first(NR_QUANTILES);
-+	u = pick_time_units(stats->quantiles.entries[i].m);
-+
-+	pr_buf(&out, "\nquantiles (%s):\t", u->name);
-+	eytzinger0_for_each(i, NR_QUANTILES) {
-+		bool is_last = eytzinger0_next(i, NR_QUANTILES) == -1;
-+
-+		q = max(stats->quantiles.entries[i].m, last_q);
-+		pr_buf(&out, "%llu%s",
-+		       div_u64(q, u->nsecs),
-+		       is_last ? "\n" : " ");
-+		last_q = q;
-+	}
-+
-+	return out.pos - buf;
-+}
-+
-+void bch2_time_stats_exit(struct time_stats *stats)
-+{
-+	free_percpu(stats->buffer);
-+}
-+
-+void bch2_time_stats_init(struct time_stats *stats)
-+{
-+	memset(stats, 0, sizeof(*stats));
-+	spin_lock_init(&stats->lock);
-+}
-+
-+/* ratelimit: */
-+
-+/**
-+ * bch2_ratelimit_delay() - return how long to delay until the next time to do
-+ * some work
-+ *
-+ * @d - the struct bch_ratelimit to update
-+ *
-+ * Returns the amount of time to delay by, in jiffies
-+ */
-+u64 bch2_ratelimit_delay(struct bch_ratelimit *d)
-+{
-+	u64 now = local_clock();
-+
-+	return time_after64(d->next, now)
-+		? nsecs_to_jiffies(d->next - now)
-+		: 0;
-+}
-+
-+/**
-+ * bch2_ratelimit_increment() - increment @d by the amount of work done
-+ *
-+ * @d - the struct bch_ratelimit to update
-+ * @done - the amount of work done, in arbitrary units
-+ */
-+void bch2_ratelimit_increment(struct bch_ratelimit *d, u64 done)
-+{
-+	u64 now = local_clock();
-+
-+	d->next += div_u64(done * NSEC_PER_SEC, d->rate);
-+
-+	if (time_before64(now + NSEC_PER_SEC, d->next))
-+		d->next = now + NSEC_PER_SEC;
-+
-+	if (time_after64(now - NSEC_PER_SEC * 2, d->next))
-+		d->next = now - NSEC_PER_SEC * 2;
-+}
-+
-+/* pd controller: */
-+
-+/*
-+ * Updates pd_controller. Attempts to scale inputed values to units per second.
-+ * @target: desired value
-+ * @actual: current value
-+ *
-+ * @sign: 1 or -1; 1 if increasing the rate makes actual go up, -1 if increasing
-+ * it makes actual go down.
-+ */
-+void bch2_pd_controller_update(struct bch_pd_controller *pd,
-+			      s64 target, s64 actual, int sign)
-+{
-+	s64 proportional, derivative, change;
-+
-+	unsigned long seconds_since_update = (jiffies - pd->last_update) / HZ;
-+
-+	if (seconds_since_update == 0)
-+		return;
-+
-+	pd->last_update = jiffies;
-+
-+	proportional = actual - target;
-+	proportional *= seconds_since_update;
-+	proportional = div_s64(proportional, pd->p_term_inverse);
-+
-+	derivative = actual - pd->last_actual;
-+	derivative = div_s64(derivative, seconds_since_update);
-+	derivative = ewma_add(pd->smoothed_derivative, derivative,
-+			      (pd->d_term / seconds_since_update) ?: 1);
-+	derivative = derivative * pd->d_term;
-+	derivative = div_s64(derivative, pd->p_term_inverse);
-+
-+	change = proportional + derivative;
-+
-+	/* Don't increase rate if not keeping up */
-+	if (change > 0 &&
-+	    pd->backpressure &&
-+	    time_after64(local_clock(),
-+			 pd->rate.next + NSEC_PER_MSEC))
-+		change = 0;
-+
-+	change *= (sign * -1);
-+
-+	pd->rate.rate = clamp_t(s64, (s64) pd->rate.rate + change,
-+				1, UINT_MAX);
-+
-+	pd->last_actual		= actual;
-+	pd->last_derivative	= derivative;
-+	pd->last_proportional	= proportional;
-+	pd->last_change		= change;
-+	pd->last_target		= target;
-+}
-+
-+void bch2_pd_controller_init(struct bch_pd_controller *pd)
-+{
-+	pd->rate.rate		= 1024;
-+	pd->last_update		= jiffies;
-+	pd->p_term_inverse	= 6000;
-+	pd->d_term		= 30;
-+	pd->d_smooth		= pd->d_term;
-+	pd->backpressure	= 1;
-+}
-+
-+size_t bch2_pd_controller_print_debug(struct bch_pd_controller *pd, char *buf)
-+{
-+	/* 2^64 - 1 is 20 digits, plus null byte */
-+	char rate[21];
-+	char actual[21];
-+	char target[21];
-+	char proportional[21];
-+	char derivative[21];
-+	char change[21];
-+	s64 next_io;
-+
-+	bch2_hprint(&PBUF(rate),	pd->rate.rate);
-+	bch2_hprint(&PBUF(actual),	pd->last_actual);
-+	bch2_hprint(&PBUF(target),	pd->last_target);
-+	bch2_hprint(&PBUF(proportional), pd->last_proportional);
-+	bch2_hprint(&PBUF(derivative),	pd->last_derivative);
-+	bch2_hprint(&PBUF(change),	pd->last_change);
-+
-+	next_io = div64_s64(pd->rate.next - local_clock(), NSEC_PER_MSEC);
-+
-+	return sprintf(buf,
-+		       "rate:\t\t%s/sec\n"
-+		       "target:\t\t%s\n"
-+		       "actual:\t\t%s\n"
-+		       "proportional:\t%s\n"
-+		       "derivative:\t%s\n"
-+		       "change:\t\t%s/sec\n"
-+		       "next io:\t%llims\n",
-+		       rate, target, actual, proportional,
-+		       derivative, change, next_io);
-+}
-+
-+/* misc: */
-+
-+void bch2_bio_map(struct bio *bio, void *base, size_t size)
-+{
-+	while (size) {
-+		struct page *page = is_vmalloc_addr(base)
-+				? vmalloc_to_page(base)
-+				: virt_to_page(base);
-+		unsigned offset = offset_in_page(base);
-+		unsigned len = min_t(size_t, PAGE_SIZE - offset, size);
-+
-+		BUG_ON(!bio_add_page(bio, page, len, offset));
-+		size -= len;
-+		base += len;
-+	}
-+}
-+
-+int bch2_bio_alloc_pages(struct bio *bio, size_t size, gfp_t gfp_mask)
-+{
-+	while (size) {
-+		struct page *page = alloc_page(gfp_mask);
-+		unsigned len = min(PAGE_SIZE, size);
-+
-+		if (!page)
-+			return -ENOMEM;
-+
-+		BUG_ON(!bio_add_page(bio, page, len, 0));
-+		size -= len;
-+	}
-+
-+	return 0;
-+}
-+
-+size_t bch2_rand_range(size_t max)
-+{
-+	size_t rand;
-+
-+	if (!max)
-+		return 0;
-+
-+	do {
-+		rand = get_random_long();
-+		rand &= roundup_pow_of_two(max) - 1;
-+	} while (rand >= max);
-+
-+	return rand;
-+}
-+
-+void memcpy_to_bio(struct bio *dst, struct bvec_iter dst_iter, const void *src)
-+{
-+	struct bio_vec bv;
-+	struct bvec_iter iter;
-+
-+	__bio_for_each_segment(bv, dst, iter, dst_iter) {
-+		void *dstp = kmap_atomic(bv.bv_page);
-+		memcpy(dstp + bv.bv_offset, src, bv.bv_len);
-+		kunmap_atomic(dstp);
-+
-+		src += bv.bv_len;
-+	}
-+}
-+
-+void memcpy_from_bio(void *dst, struct bio *src, struct bvec_iter src_iter)
-+{
-+	struct bio_vec bv;
-+	struct bvec_iter iter;
-+
-+	__bio_for_each_segment(bv, src, iter, src_iter) {
-+		void *srcp = kmap_atomic(bv.bv_page);
-+		memcpy(dst, srcp + bv.bv_offset, bv.bv_len);
-+		kunmap_atomic(srcp);
-+
-+		dst += bv.bv_len;
-+	}
-+}
-+
-+void bch_scnmemcpy(struct printbuf *out,
-+		   const char *src, size_t len)
-+{
-+	size_t n = printbuf_remaining(out);
-+
-+	if (n) {
-+		n = min(n - 1, len);
-+		memcpy(out->pos, src, n);
-+		out->pos += n;
-+		*out->pos = '\0';
-+	}
-+}
-+
-+#include "eytzinger.h"
-+
-+static int alignment_ok(const void *base, size_t align)
-+{
-+	return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
-+		((unsigned long)base & (align - 1)) == 0;
-+}
-+
-+static void u32_swap(void *a, void *b, size_t size)
-+{
-+	u32 t = *(u32 *)a;
-+	*(u32 *)a = *(u32 *)b;
-+	*(u32 *)b = t;
-+}
-+
-+static void u64_swap(void *a, void *b, size_t size)
-+{
-+	u64 t = *(u64 *)a;
-+	*(u64 *)a = *(u64 *)b;
-+	*(u64 *)b = t;
-+}
-+
-+static void generic_swap(void *a, void *b, size_t size)
-+{
-+	char t;
-+
-+	do {
-+		t = *(char *)a;
-+		*(char *)a++ = *(char *)b;
-+		*(char *)b++ = t;
-+	} while (--size > 0);
-+}
-+
-+static inline int do_cmp(void *base, size_t n, size_t size,
-+			 int (*cmp_func)(const void *, const void *, size_t),
-+			 size_t l, size_t r)
-+{
-+	return cmp_func(base + inorder_to_eytzinger0(l, n) * size,
-+			base + inorder_to_eytzinger0(r, n) * size,
-+			size);
-+}
-+
-+static inline void do_swap(void *base, size_t n, size_t size,
-+			   void (*swap_func)(void *, void *, size_t),
-+			   size_t l, size_t r)
-+{
-+	swap_func(base + inorder_to_eytzinger0(l, n) * size,
-+		  base + inorder_to_eytzinger0(r, n) * size,
-+		  size);
-+}
-+
-+void eytzinger0_sort(void *base, size_t n, size_t size,
-+		     int (*cmp_func)(const void *, const void *, size_t),
-+		     void (*swap_func)(void *, void *, size_t))
-+{
-+	int i, c, r;
-+
-+	if (!swap_func) {
-+		if (size == 4 && alignment_ok(base, 4))
-+			swap_func = u32_swap;
-+		else if (size == 8 && alignment_ok(base, 8))
-+			swap_func = u64_swap;
-+		else
-+			swap_func = generic_swap;
-+	}
-+
-+	/* heapify */
-+	for (i = n / 2 - 1; i >= 0; --i) {
-+		for (r = i; r * 2 + 1 < n; r = c) {
-+			c = r * 2 + 1;
-+
-+			if (c + 1 < n &&
-+			    do_cmp(base, n, size, cmp_func, c, c + 1) < 0)
-+				c++;
-+
-+			if (do_cmp(base, n, size, cmp_func, r, c) >= 0)
-+				break;
-+
-+			do_swap(base, n, size, swap_func, r, c);
-+		}
-+	}
-+
-+	/* sort */
-+	for (i = n - 1; i > 0; --i) {
-+		do_swap(base, n, size, swap_func, 0, i);
-+
-+		for (r = 0; r * 2 + 1 < i; r = c) {
-+			c = r * 2 + 1;
-+
-+			if (c + 1 < i &&
-+			    do_cmp(base, n, size, cmp_func, c, c + 1) < 0)
-+				c++;
-+
-+			if (do_cmp(base, n, size, cmp_func, r, c) >= 0)
-+				break;
-+
-+			do_swap(base, n, size, swap_func, r, c);
-+		}
-+	}
-+}
-+
-+void sort_cmp_size(void *base, size_t num, size_t size,
-+	  int (*cmp_func)(const void *, const void *, size_t),
-+	  void (*swap_func)(void *, void *, size_t size))
-+{
-+	/* pre-scale counters for performance */
-+	int i = (num/2 - 1) * size, n = num * size, c, r;
-+
-+	if (!swap_func) {
-+		if (size == 4 && alignment_ok(base, 4))
-+			swap_func = u32_swap;
-+		else if (size == 8 && alignment_ok(base, 8))
-+			swap_func = u64_swap;
-+		else
-+			swap_func = generic_swap;
-+	}
-+
-+	/* heapify */
-+	for ( ; i >= 0; i -= size) {
-+		for (r = i; r * 2 + size < n; r  = c) {
-+			c = r * 2 + size;
-+			if (c < n - size &&
-+			    cmp_func(base + c, base + c + size, size) < 0)
-+				c += size;
-+			if (cmp_func(base + r, base + c, size) >= 0)
-+				break;
-+			swap_func(base + r, base + c, size);
-+		}
-+	}
-+
-+	/* sort */
-+	for (i = n - size; i > 0; i -= size) {
-+		swap_func(base, base + i, size);
-+		for (r = 0; r * 2 + size < i; r = c) {
-+			c = r * 2 + size;
-+			if (c < i - size &&
-+			    cmp_func(base + c, base + c + size, size) < 0)
-+				c += size;
-+			if (cmp_func(base + r, base + c, size) >= 0)
-+				break;
-+			swap_func(base + r, base + c, size);
-+		}
-+	}
-+}
-+
-+static void mempool_free_vp(void *element, void *pool_data)
-+{
-+	size_t size = (size_t) pool_data;
-+
-+	vpfree(element, size);
-+}
-+
-+static void *mempool_alloc_vp(gfp_t gfp_mask, void *pool_data)
-+{
-+	size_t size = (size_t) pool_data;
-+
-+	return vpmalloc(size, gfp_mask);
-+}
-+
-+int mempool_init_kvpmalloc_pool(mempool_t *pool, int min_nr, size_t size)
-+{
-+	return size < PAGE_SIZE
-+		? mempool_init_kmalloc_pool(pool, min_nr, size)
-+		: mempool_init(pool, min_nr, mempool_alloc_vp,
-+			       mempool_free_vp, (void *) size);
-+}
-+
-+#if 0
-+void eytzinger1_test(void)
-+{
-+	unsigned inorder, eytz, size;
-+
-+	pr_info("1 based eytzinger test:");
-+
-+	for (size = 2;
-+	     size < 65536;
-+	     size++) {
-+		unsigned extra = eytzinger1_extra(size);
-+
-+		if (!(size % 4096))
-+			pr_info("tree size %u", size);
-+
-+		BUG_ON(eytzinger1_prev(0, size) != eytzinger1_last(size));
-+		BUG_ON(eytzinger1_next(0, size) != eytzinger1_first(size));
-+
-+		BUG_ON(eytzinger1_prev(eytzinger1_first(size), size)	!= 0);
-+		BUG_ON(eytzinger1_next(eytzinger1_last(size), size)	!= 0);
-+
-+		inorder = 1;
-+		eytzinger1_for_each(eytz, size) {
-+			BUG_ON(__inorder_to_eytzinger1(inorder, size, extra) != eytz);
-+			BUG_ON(__eytzinger1_to_inorder(eytz, size, extra) != inorder);
-+			BUG_ON(eytz != eytzinger1_last(size) &&
-+			       eytzinger1_prev(eytzinger1_next(eytz, size), size) != eytz);
-+
-+			inorder++;
-+		}
-+	}
-+}
-+
-+void eytzinger0_test(void)
-+{
-+
-+	unsigned inorder, eytz, size;
-+
-+	pr_info("0 based eytzinger test:");
-+
-+	for (size = 1;
-+	     size < 65536;
-+	     size++) {
-+		unsigned extra = eytzinger0_extra(size);
-+
-+		if (!(size % 4096))
-+			pr_info("tree size %u", size);
-+
-+		BUG_ON(eytzinger0_prev(-1, size) != eytzinger0_last(size));
-+		BUG_ON(eytzinger0_next(-1, size) != eytzinger0_first(size));
-+
-+		BUG_ON(eytzinger0_prev(eytzinger0_first(size), size)	!= -1);
-+		BUG_ON(eytzinger0_next(eytzinger0_last(size), size)	!= -1);
-+
-+		inorder = 0;
-+		eytzinger0_for_each(eytz, size) {
-+			BUG_ON(__inorder_to_eytzinger0(inorder, size, extra) != eytz);
-+			BUG_ON(__eytzinger0_to_inorder(eytz, size, extra) != inorder);
-+			BUG_ON(eytz != eytzinger0_last(size) &&
-+			       eytzinger0_prev(eytzinger0_next(eytz, size), size) != eytz);
-+
-+			inorder++;
-+		}
-+	}
-+}
-+
-+static inline int cmp_u16(const void *_l, const void *_r, size_t size)
-+{
-+	const u16 *l = _l, *r = _r;
-+
-+	return (*l > *r) - (*r - *l);
-+}
-+
-+static void eytzinger0_find_test_val(u16 *test_array, unsigned nr, u16 search)
-+{
-+	int i, c1 = -1, c2 = -1;
-+	ssize_t r;
-+
-+	r = eytzinger0_find_le(test_array, nr,
-+			       sizeof(test_array[0]),
-+			       cmp_u16, &search);
-+	if (r >= 0)
-+		c1 = test_array[r];
-+
-+	for (i = 0; i < nr; i++)
-+		if (test_array[i] <= search && test_array[i] > c2)
-+			c2 = test_array[i];
-+
-+	if (c1 != c2) {
-+		eytzinger0_for_each(i, nr)
-+			pr_info("[%3u] = %12u", i, test_array[i]);
-+		pr_info("find_le(%2u) -> [%2zi] = %2i should be %2i",
-+			i, r, c1, c2);
-+	}
-+}
-+
-+void eytzinger0_find_test(void)
-+{
-+	unsigned i, nr, allocated = 1 << 12;
-+	u16 *test_array = kmalloc_array(allocated, sizeof(test_array[0]), GFP_KERNEL);
-+
-+	for (nr = 1; nr < allocated; nr++) {
-+		pr_info("testing %u elems", nr);
-+
-+		get_random_bytes(test_array, nr * sizeof(test_array[0]));
-+		eytzinger0_sort(test_array, nr, sizeof(test_array[0]), cmp_u16, NULL);
-+
-+		/* verify array is sorted correctly: */
-+		eytzinger0_for_each(i, nr)
-+			BUG_ON(i != eytzinger0_last(nr) &&
-+			       test_array[i] > test_array[eytzinger0_next(i, nr)]);
-+
-+		for (i = 0; i < U16_MAX; i += 1 << 12)
-+			eytzinger0_find_test_val(test_array, nr, i);
-+
-+		for (i = 0; i < nr; i++) {
-+			eytzinger0_find_test_val(test_array, nr, test_array[i] - 1);
-+			eytzinger0_find_test_val(test_array, nr, test_array[i]);
-+			eytzinger0_find_test_val(test_array, nr, test_array[i] + 1);
-+		}
-+	}
-+
-+	kfree(test_array);
-+}
-+#endif
-+
-+/*
-+ * Accumulate percpu counters onto one cpu's copy - only valid when access
-+ * against any percpu counter is guarded against
-+ */
-+u64 *bch2_acc_percpu_u64s(u64 __percpu *p, unsigned nr)
-+{
-+	u64 *ret;
-+	int cpu;
-+
-+	preempt_disable();
-+	ret = this_cpu_ptr(p);
-+	preempt_enable();
-+
-+	for_each_possible_cpu(cpu) {
-+		u64 *i = per_cpu_ptr(p, cpu);
-+
-+		if (i != ret) {
-+			acc_u64s(ret, i, nr);
-+			memset(i, 0, nr * sizeof(u64));
-+		}
-+	}
-+
-+	return ret;
-+}
-diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
-new file mode 100644
-index 000000000000..0128daba5970
---- /dev/null
-+++ b/fs/bcachefs/util.h
-@@ -0,0 +1,761 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_UTIL_H
-+#define _BCACHEFS_UTIL_H
-+
-+#include <linux/bio.h>
-+#include <linux/blkdev.h>
-+#include <linux/closure.h>
-+#include <linux/errno.h>
-+#include <linux/freezer.h>
-+#include <linux/kernel.h>
-+#include <linux/sched/clock.h>
-+#include <linux/llist.h>
-+#include <linux/log2.h>
-+#include <linux/percpu.h>
-+#include <linux/preempt.h>
-+#include <linux/ratelimit.h>
-+#include <linux/slab.h>
-+#include <linux/vmalloc.h>
-+#include <linux/workqueue.h>
-+
-+#define PAGE_SECTOR_SHIFT	(PAGE_SHIFT - 9)
-+#define PAGE_SECTORS		(1UL << PAGE_SECTOR_SHIFT)
-+
-+struct closure;
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+
-+#define EBUG_ON(cond)		BUG_ON(cond)
-+#define atomic_dec_bug(v)	BUG_ON(atomic_dec_return(v) < 0)
-+#define atomic_inc_bug(v, i)	BUG_ON(atomic_inc_return(v) <= i)
-+#define atomic_sub_bug(i, v)	BUG_ON(atomic_sub_return(i, v) < 0)
-+#define atomic_add_bug(i, v)	BUG_ON(atomic_add_return(i, v) < 0)
-+#define atomic_long_dec_bug(v)		BUG_ON(atomic_long_dec_return(v) < 0)
-+#define atomic_long_sub_bug(i, v)	BUG_ON(atomic_long_sub_return(i, v) < 0)
-+#define atomic64_dec_bug(v)	BUG_ON(atomic64_dec_return(v) < 0)
-+#define atomic64_inc_bug(v, i)	BUG_ON(atomic64_inc_return(v) <= i)
-+#define atomic64_sub_bug(i, v)	BUG_ON(atomic64_sub_return(i, v) < 0)
-+#define atomic64_add_bug(i, v)	BUG_ON(atomic64_add_return(i, v) < 0)
-+
-+#define memcpy(dst, src, len)						\
-+({									\
-+	void *_dst = (dst);						\
-+	const void *_src = (src);					\
-+	size_t _len = (len);						\
-+									\
-+	BUG_ON(!((void *) (_dst) >= (void *) (_src) + (_len) ||		\
-+		 (void *) (_dst) + (_len) <= (void *) (_src)));		\
-+	memcpy(_dst, _src, _len);					\
-+})
-+
-+#else /* DEBUG */
-+
-+#define EBUG_ON(cond)
-+#define atomic_dec_bug(v)	atomic_dec(v)
-+#define atomic_inc_bug(v, i)	atomic_inc(v)
-+#define atomic_sub_bug(i, v)	atomic_sub(i, v)
-+#define atomic_add_bug(i, v)	atomic_add(i, v)
-+#define atomic_long_dec_bug(v)		atomic_long_dec(v)
-+#define atomic_long_sub_bug(i, v)	atomic_long_sub(i, v)
-+#define atomic64_dec_bug(v)	atomic64_dec(v)
-+#define atomic64_inc_bug(v, i)	atomic64_inc(v)
-+#define atomic64_sub_bug(i, v)	atomic64_sub(i, v)
-+#define atomic64_add_bug(i, v)	atomic64_add(i, v)
-+
-+#endif
-+
-+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-+#define CPU_BIG_ENDIAN		0
-+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-+#define CPU_BIG_ENDIAN		1
-+#endif
-+
-+/* type hackery */
-+
-+#define type_is_exact(_val, _type)					\
-+	__builtin_types_compatible_p(typeof(_val), _type)
-+
-+#define type_is(_val, _type)						\
-+	(__builtin_types_compatible_p(typeof(_val), _type) ||		\
-+	 __builtin_types_compatible_p(typeof(_val), const _type))
-+
-+/* Userspace doesn't align allocations as nicely as the kernel allocators: */
-+static inline size_t buf_pages(void *p, size_t len)
-+{
-+	return DIV_ROUND_UP(len +
-+			    ((unsigned long) p & (PAGE_SIZE - 1)),
-+			    PAGE_SIZE);
-+}
-+
-+static inline void vpfree(void *p, size_t size)
-+{
-+	if (is_vmalloc_addr(p))
-+		vfree(p);
-+	else
-+		free_pages((unsigned long) p, get_order(size));
-+}
-+
-+static inline void *vpmalloc(size_t size, gfp_t gfp_mask)
-+{
-+	return (void *) __get_free_pages(gfp_mask|__GFP_NOWARN,
-+					 get_order(size)) ?:
-+		__vmalloc(size, gfp_mask, PAGE_KERNEL);
-+}
-+
-+static inline void kvpfree(void *p, size_t size)
-+{
-+	if (size < PAGE_SIZE)
-+		kfree(p);
-+	else
-+		vpfree(p, size);
-+}
-+
-+static inline void *kvpmalloc(size_t size, gfp_t gfp_mask)
-+{
-+	return size < PAGE_SIZE
-+		? kmalloc(size, gfp_mask)
-+		: vpmalloc(size, gfp_mask);
-+}
-+
-+int mempool_init_kvpmalloc_pool(mempool_t *, int, size_t);
-+
-+#define HEAP(type)							\
-+struct {								\
-+	size_t size, used;						\
-+	type *data;							\
-+}
-+
-+#define DECLARE_HEAP(type, name) HEAP(type) name
-+
-+#define init_heap(heap, _size, gfp)					\
-+({									\
-+	(heap)->used = 0;						\
-+	(heap)->size = (_size);						\
-+	(heap)->data = kvpmalloc((heap)->size * sizeof((heap)->data[0]),\
-+				 (gfp));				\
-+})
-+
-+#define free_heap(heap)							\
-+do {									\
-+	kvpfree((heap)->data, (heap)->size * sizeof((heap)->data[0]));	\
-+	(heap)->data = NULL;						\
-+} while (0)
-+
-+#define heap_set_backpointer(h, i, _fn)					\
-+do {									\
-+	void (*fn)(typeof(h), size_t) = _fn;				\
-+	if (fn)								\
-+		fn(h, i);						\
-+} while (0)
-+
-+#define heap_swap(h, i, j, set_backpointer)				\
-+do {									\
-+	swap((h)->data[i], (h)->data[j]);				\
-+	heap_set_backpointer(h, i, set_backpointer);			\
-+	heap_set_backpointer(h, j, set_backpointer);			\
-+} while (0)
-+
-+#define heap_peek(h)							\
-+({									\
-+	EBUG_ON(!(h)->used);						\
-+	(h)->data[0];							\
-+})
-+
-+#define heap_full(h)	((h)->used == (h)->size)
-+
-+#define heap_sift_down(h, i, cmp, set_backpointer)			\
-+do {									\
-+	size_t _c, _j = i;						\
-+									\
-+	for (; _j * 2 + 1 < (h)->used; _j = _c) {			\
-+		_c = _j * 2 + 1;					\
-+		if (_c + 1 < (h)->used &&				\
-+		    cmp(h, (h)->data[_c], (h)->data[_c + 1]) >= 0)	\
-+			_c++;						\
-+									\
-+		if (cmp(h, (h)->data[_c], (h)->data[_j]) >= 0)		\
-+			break;						\
-+		heap_swap(h, _c, _j, set_backpointer);			\
-+	}								\
-+} while (0)
-+
-+#define heap_sift_up(h, i, cmp, set_backpointer)			\
-+do {									\
-+	while (i) {							\
-+		size_t p = (i - 1) / 2;					\
-+		if (cmp(h, (h)->data[i], (h)->data[p]) >= 0)		\
-+			break;						\
-+		heap_swap(h, i, p, set_backpointer);			\
-+		i = p;							\
-+	}								\
-+} while (0)
-+
-+#define __heap_add(h, d, cmp, set_backpointer)				\
-+({									\
-+	size_t _i = (h)->used++;					\
-+	(h)->data[_i] = d;						\
-+	heap_set_backpointer(h, _i, set_backpointer);			\
-+									\
-+	heap_sift_up(h, _i, cmp, set_backpointer);			\
-+	_i;								\
-+})
-+
-+#define heap_add(h, d, cmp, set_backpointer)				\
-+({									\
-+	bool _r = !heap_full(h);					\
-+	if (_r)								\
-+		__heap_add(h, d, cmp, set_backpointer);			\
-+	_r;								\
-+})
-+
-+#define heap_add_or_replace(h, new, cmp, set_backpointer)		\
-+do {									\
-+	if (!heap_add(h, new, cmp, set_backpointer) &&			\
-+	    cmp(h, new, heap_peek(h)) >= 0) {				\
-+		(h)->data[0] = new;					\
-+		heap_set_backpointer(h, 0, set_backpointer);		\
-+		heap_sift_down(h, 0, cmp, set_backpointer);		\
-+	}								\
-+} while (0)
-+
-+#define heap_del(h, i, cmp, set_backpointer)				\
-+do {									\
-+	size_t _i = (i);						\
-+									\
-+	BUG_ON(_i >= (h)->used);					\
-+	(h)->used--;							\
-+	heap_swap(h, _i, (h)->used, set_backpointer);			\
-+	heap_sift_up(h, _i, cmp, set_backpointer);			\
-+	heap_sift_down(h, _i, cmp, set_backpointer);			\
-+} while (0)
-+
-+#define heap_pop(h, d, cmp, set_backpointer)				\
-+({									\
-+	bool _r = (h)->used;						\
-+	if (_r) {							\
-+		(d) = (h)->data[0];					\
-+		heap_del(h, 0, cmp, set_backpointer);			\
-+	}								\
-+	_r;								\
-+})
-+
-+#define heap_resort(heap, cmp, set_backpointer)				\
-+do {									\
-+	ssize_t _i;							\
-+	for (_i = (ssize_t) (heap)->used / 2 -  1; _i >= 0; --_i)	\
-+		heap_sift_down(heap, _i, cmp, set_backpointer);		\
-+} while (0)
-+
-+#define ANYSINT_MAX(t)							\
-+	((((t) 1 << (sizeof(t) * 8 - 2)) - (t) 1) * (t) 2 + (t) 1)
-+
-+struct printbuf {
-+	char		*pos;
-+	char		*end;
-+};
-+
-+static inline size_t printbuf_remaining(struct printbuf *buf)
-+{
-+	return buf->end - buf->pos;
-+}
-+
-+#define _PBUF(_buf, _len)						\
-+	((struct printbuf) {						\
-+		.pos	= _buf,						\
-+		.end	= _buf + _len,					\
-+	})
-+
-+#define PBUF(_buf) _PBUF(_buf, sizeof(_buf))
-+
-+#define pr_buf(_out, ...)						\
-+do {									\
-+	(_out)->pos += scnprintf((_out)->pos, printbuf_remaining(_out),	\
-+				 __VA_ARGS__);				\
-+} while (0)
-+
-+void bch_scnmemcpy(struct printbuf *, const char *, size_t);
-+
-+int bch2_strtoint_h(const char *, int *);
-+int bch2_strtouint_h(const char *, unsigned int *);
-+int bch2_strtoll_h(const char *, long long *);
-+int bch2_strtoull_h(const char *, unsigned long long *);
-+int bch2_strtou64_h(const char *, u64 *);
-+
-+static inline int bch2_strtol_h(const char *cp, long *res)
-+{
-+#if BITS_PER_LONG == 32
-+	return bch2_strtoint_h(cp, (int *) res);
-+#else
-+	return bch2_strtoll_h(cp, (long long *) res);
-+#endif
-+}
-+
-+static inline int bch2_strtoul_h(const char *cp, long *res)
-+{
-+#if BITS_PER_LONG == 32
-+	return bch2_strtouint_h(cp, (unsigned int *) res);
-+#else
-+	return bch2_strtoull_h(cp, (unsigned long long *) res);
-+#endif
-+}
-+
-+#define strtoi_h(cp, res)						\
-+	( type_is(*res, int)		? bch2_strtoint_h(cp, (void *) res)\
-+	: type_is(*res, long)		? bch2_strtol_h(cp, (void *) res)\
-+	: type_is(*res, long long)	? bch2_strtoll_h(cp, (void *) res)\
-+	: type_is(*res, unsigned)	? bch2_strtouint_h(cp, (void *) res)\
-+	: type_is(*res, unsigned long)	? bch2_strtoul_h(cp, (void *) res)\
-+	: type_is(*res, unsigned long long) ? bch2_strtoull_h(cp, (void *) res)\
-+	: -EINVAL)
-+
-+#define strtoul_safe(cp, var)						\
-+({									\
-+	unsigned long _v;						\
-+	int _r = kstrtoul(cp, 10, &_v);					\
-+	if (!_r)							\
-+		var = _v;						\
-+	_r;								\
-+})
-+
-+#define strtoul_safe_clamp(cp, var, min, max)				\
-+({									\
-+	unsigned long _v;						\
-+	int _r = kstrtoul(cp, 10, &_v);					\
-+	if (!_r)							\
-+		var = clamp_t(typeof(var), _v, min, max);		\
-+	_r;								\
-+})
-+
-+#define strtoul_safe_restrict(cp, var, min, max)			\
-+({									\
-+	unsigned long _v;						\
-+	int _r = kstrtoul(cp, 10, &_v);					\
-+	if (!_r && _v >= min && _v <= max)				\
-+		var = _v;						\
-+	else								\
-+		_r = -EINVAL;						\
-+	_r;								\
-+})
-+
-+#define snprint(buf, size, var)						\
-+	snprintf(buf, size,						\
-+		   type_is(var, int)		? "%i\n"		\
-+		 : type_is(var, unsigned)	? "%u\n"		\
-+		 : type_is(var, long)		? "%li\n"		\
-+		 : type_is(var, unsigned long)	? "%lu\n"		\
-+		 : type_is(var, s64)		? "%lli\n"		\
-+		 : type_is(var, u64)		? "%llu\n"		\
-+		 : type_is(var, char *)		? "%s\n"		\
-+		 : "%i\n", var)
-+
-+void bch2_hprint(struct printbuf *, s64);
-+
-+bool bch2_is_zero(const void *, size_t);
-+
-+void bch2_string_opt_to_text(struct printbuf *,
-+			     const char * const [], size_t);
-+
-+void bch2_flags_to_text(struct printbuf *, const char * const[], u64);
-+u64 bch2_read_flag_list(char *, const char * const[]);
-+
-+#define NR_QUANTILES	15
-+#define QUANTILE_IDX(i)	inorder_to_eytzinger0(i, NR_QUANTILES)
-+#define QUANTILE_FIRST	eytzinger0_first(NR_QUANTILES)
-+#define QUANTILE_LAST	eytzinger0_last(NR_QUANTILES)
-+
-+struct quantiles {
-+	struct quantile_entry {
-+		u64	m;
-+		u64	step;
-+	}		entries[NR_QUANTILES];
-+};
-+
-+struct time_stat_buffer {
-+	unsigned	nr;
-+	struct time_stat_buffer_entry {
-+		u64	start;
-+		u64	end;
-+	}		entries[32];
-+};
-+
-+struct time_stats {
-+	spinlock_t	lock;
-+	u64		count;
-+	/* all fields are in nanoseconds */
-+	u64		average_duration;
-+	u64		average_frequency;
-+	u64		max_duration;
-+	u64		last_event;
-+	struct quantiles quantiles;
-+
-+	struct time_stat_buffer __percpu *buffer;
-+};
-+
-+void __bch2_time_stats_update(struct time_stats *stats, u64, u64);
-+
-+static inline void bch2_time_stats_update(struct time_stats *stats, u64 start)
-+{
-+	__bch2_time_stats_update(stats, start, local_clock());
-+}
-+
-+size_t bch2_time_stats_print(struct time_stats *, char *, size_t);
-+
-+void bch2_time_stats_exit(struct time_stats *);
-+void bch2_time_stats_init(struct time_stats *);
-+
-+#define ewma_add(ewma, val, weight)					\
-+({									\
-+	typeof(ewma) _ewma = (ewma);					\
-+	typeof(weight) _weight = (weight);				\
-+									\
-+	(((_ewma << _weight) - _ewma) + (val)) >> _weight;		\
-+})
-+
-+struct bch_ratelimit {
-+	/* Next time we want to do some work, in nanoseconds */
-+	u64			next;
-+
-+	/*
-+	 * Rate at which we want to do work, in units per nanosecond
-+	 * The units here correspond to the units passed to
-+	 * bch2_ratelimit_increment()
-+	 */
-+	unsigned		rate;
-+};
-+
-+static inline void bch2_ratelimit_reset(struct bch_ratelimit *d)
-+{
-+	d->next = local_clock();
-+}
-+
-+u64 bch2_ratelimit_delay(struct bch_ratelimit *);
-+void bch2_ratelimit_increment(struct bch_ratelimit *, u64);
-+
-+struct bch_pd_controller {
-+	struct bch_ratelimit	rate;
-+	unsigned long		last_update;
-+
-+	s64			last_actual;
-+	s64			smoothed_derivative;
-+
-+	unsigned		p_term_inverse;
-+	unsigned		d_smooth;
-+	unsigned		d_term;
-+
-+	/* for exporting to sysfs (no effect on behavior) */
-+	s64			last_derivative;
-+	s64			last_proportional;
-+	s64			last_change;
-+	s64			last_target;
-+
-+	/* If true, the rate will not increase if bch2_ratelimit_delay()
-+	 * is not being called often enough. */
-+	bool			backpressure;
-+};
-+
-+void bch2_pd_controller_update(struct bch_pd_controller *, s64, s64, int);
-+void bch2_pd_controller_init(struct bch_pd_controller *);
-+size_t bch2_pd_controller_print_debug(struct bch_pd_controller *, char *);
-+
-+#define sysfs_pd_controller_attribute(name)				\
-+	rw_attribute(name##_rate);					\
-+	rw_attribute(name##_rate_bytes);				\
-+	rw_attribute(name##_rate_d_term);				\
-+	rw_attribute(name##_rate_p_term_inverse);			\
-+	read_attribute(name##_rate_debug)
-+
-+#define sysfs_pd_controller_files(name)					\
-+	&sysfs_##name##_rate,						\
-+	&sysfs_##name##_rate_bytes,					\
-+	&sysfs_##name##_rate_d_term,					\
-+	&sysfs_##name##_rate_p_term_inverse,				\
-+	&sysfs_##name##_rate_debug
-+
-+#define sysfs_pd_controller_show(name, var)				\
-+do {									\
-+	sysfs_hprint(name##_rate,		(var)->rate.rate);	\
-+	sysfs_print(name##_rate_bytes,		(var)->rate.rate);	\
-+	sysfs_print(name##_rate_d_term,		(var)->d_term);		\
-+	sysfs_print(name##_rate_p_term_inverse,	(var)->p_term_inverse);	\
-+									\
-+	if (attr == &sysfs_##name##_rate_debug)				\
-+		return bch2_pd_controller_print_debug(var, buf);		\
-+} while (0)
-+
-+#define sysfs_pd_controller_store(name, var)				\
-+do {									\
-+	sysfs_strtoul_clamp(name##_rate,				\
-+			    (var)->rate.rate, 1, UINT_MAX);		\
-+	sysfs_strtoul_clamp(name##_rate_bytes,				\
-+			    (var)->rate.rate, 1, UINT_MAX);		\
-+	sysfs_strtoul(name##_rate_d_term,	(var)->d_term);		\
-+	sysfs_strtoul_clamp(name##_rate_p_term_inverse,			\
-+			    (var)->p_term_inverse, 1, INT_MAX);		\
-+} while (0)
-+
-+#define container_of_or_null(ptr, type, member)				\
-+({									\
-+	typeof(ptr) _ptr = ptr;						\
-+	_ptr ? container_of(_ptr, type, member) : NULL;			\
-+})
-+
-+/* Does linear interpolation between powers of two */
-+static inline unsigned fract_exp_two(unsigned x, unsigned fract_bits)
-+{
-+	unsigned fract = x & ~(~0 << fract_bits);
-+
-+	x >>= fract_bits;
-+	x   = 1 << x;
-+	x  += (x * fract) >> fract_bits;
-+
-+	return x;
-+}
-+
-+void bch2_bio_map(struct bio *bio, void *base, size_t);
-+int bch2_bio_alloc_pages(struct bio *, size_t, gfp_t);
-+
-+static inline sector_t bdev_sectors(struct block_device *bdev)
-+{
-+	return bdev->bd_inode->i_size >> 9;
-+}
-+
-+#define closure_bio_submit(bio, cl)					\
-+do {									\
-+	closure_get(cl);						\
-+	submit_bio(bio);						\
-+} while (0)
-+
-+#define kthread_wait_freezable(cond)					\
-+({									\
-+	int _ret = 0;							\
-+	while (1) {							\
-+		set_current_state(TASK_INTERRUPTIBLE);			\
-+		if (kthread_should_stop()) {				\
-+			_ret = -1;					\
-+			break;						\
-+		}							\
-+									\
-+		if (cond)						\
-+			break;						\
-+									\
-+		schedule();						\
-+		try_to_freeze();					\
-+	}								\
-+	set_current_state(TASK_RUNNING);				\
-+	_ret;								\
-+})
-+
-+size_t bch2_rand_range(size_t);
-+
-+void memcpy_to_bio(struct bio *, struct bvec_iter, const void *);
-+void memcpy_from_bio(void *, struct bio *, struct bvec_iter);
-+
-+static inline void memcpy_u64s_small(void *dst, const void *src,
-+				     unsigned u64s)
-+{
-+	u64 *d = dst;
-+	const u64 *s = src;
-+
-+	while (u64s--)
-+		*d++ = *s++;
-+}
-+
-+static inline void __memcpy_u64s(void *dst, const void *src,
-+				 unsigned u64s)
-+{
-+#ifdef CONFIG_X86_64
-+	long d0, d1, d2;
-+	asm volatile("rep ; movsq"
-+		     : "=&c" (d0), "=&D" (d1), "=&S" (d2)
-+		     : "0" (u64s), "1" (dst), "2" (src)
-+		     : "memory");
-+#else
-+	u64 *d = dst;
-+	const u64 *s = src;
-+
-+	while (u64s--)
-+		*d++ = *s++;
-+#endif
-+}
-+
-+static inline void memcpy_u64s(void *dst, const void *src,
-+			       unsigned u64s)
-+{
-+	EBUG_ON(!(dst >= src + u64s * sizeof(u64) ||
-+		 dst + u64s * sizeof(u64) <= src));
-+
-+	__memcpy_u64s(dst, src, u64s);
-+}
-+
-+static inline void __memmove_u64s_down(void *dst, const void *src,
-+				       unsigned u64s)
-+{
-+	__memcpy_u64s(dst, src, u64s);
-+}
-+
-+static inline void memmove_u64s_down(void *dst, const void *src,
-+				     unsigned u64s)
-+{
-+	EBUG_ON(dst > src);
-+
-+	__memmove_u64s_down(dst, src, u64s);
-+}
-+
-+static inline void __memmove_u64s_up_small(void *_dst, const void *_src,
-+					   unsigned u64s)
-+{
-+	u64 *dst = (u64 *) _dst + u64s;
-+	u64 *src = (u64 *) _src + u64s;
-+
-+	while (u64s--)
-+		*--dst = *--src;
-+}
-+
-+static inline void memmove_u64s_up_small(void *dst, const void *src,
-+					 unsigned u64s)
-+{
-+	EBUG_ON(dst < src);
-+
-+	__memmove_u64s_up_small(dst, src, u64s);
-+}
-+
-+static inline void __memmove_u64s_up(void *_dst, const void *_src,
-+				     unsigned u64s)
-+{
-+	u64 *dst = (u64 *) _dst + u64s - 1;
-+	u64 *src = (u64 *) _src + u64s - 1;
-+
-+#ifdef CONFIG_X86_64
-+	long d0, d1, d2;
-+	asm volatile("std ;\n"
-+		     "rep ; movsq\n"
-+		     "cld ;\n"
-+		     : "=&c" (d0), "=&D" (d1), "=&S" (d2)
-+		     : "0" (u64s), "1" (dst), "2" (src)
-+		     : "memory");
-+#else
-+	while (u64s--)
-+		*dst-- = *src--;
-+#endif
-+}
-+
-+static inline void memmove_u64s_up(void *dst, const void *src,
-+				   unsigned u64s)
-+{
-+	EBUG_ON(dst < src);
-+
-+	__memmove_u64s_up(dst, src, u64s);
-+}
-+
-+static inline void memmove_u64s(void *dst, const void *src,
-+				unsigned u64s)
-+{
-+	if (dst < src)
-+		__memmove_u64s_down(dst, src, u64s);
-+	else
-+		__memmove_u64s_up(dst, src, u64s);
-+}
-+
-+/* Set the last few bytes up to a u64 boundary given an offset into a buffer. */
-+static inline void memset_u64s_tail(void *s, int c, unsigned bytes)
-+{
-+	unsigned rem = round_up(bytes, sizeof(u64)) - bytes;
-+
-+	memset(s + bytes, c, rem);
-+}
-+
-+void sort_cmp_size(void *base, size_t num, size_t size,
-+	  int (*cmp_func)(const void *, const void *, size_t),
-+	  void (*swap_func)(void *, void *, size_t));
-+
-+/* just the memmove, doesn't update @_nr */
-+#define __array_insert_item(_array, _nr, _pos)				\
-+	memmove(&(_array)[(_pos) + 1],					\
-+		&(_array)[(_pos)],					\
-+		sizeof((_array)[0]) * ((_nr) - (_pos)))
-+
-+#define array_insert_item(_array, _nr, _pos, _new_item)			\
-+do {									\
-+	__array_insert_item(_array, _nr, _pos);				\
-+	(_nr)++;							\
-+	(_array)[(_pos)] = (_new_item);					\
-+} while (0)
-+
-+#define array_remove_items(_array, _nr, _pos, _nr_to_remove)		\
-+do {									\
-+	(_nr) -= (_nr_to_remove);					\
-+	memmove(&(_array)[(_pos)],					\
-+		&(_array)[(_pos) + (_nr_to_remove)],			\
-+		sizeof((_array)[0]) * ((_nr) - (_pos)));		\
-+} while (0)
-+
-+#define array_remove_item(_array, _nr, _pos)				\
-+	array_remove_items(_array, _nr, _pos, 1)
-+
-+#define bubble_sort(_base, _nr, _cmp)					\
-+do {									\
-+	ssize_t _i, _end;						\
-+	bool _swapped = true;						\
-+									\
-+	for (_end = (ssize_t) (_nr) - 1; _end > 0 && _swapped; --_end) {\
-+		_swapped = false;					\
-+		for (_i = 0; _i < _end; _i++)				\
-+			if (_cmp((_base)[_i], (_base)[_i + 1]) > 0) {	\
-+				swap((_base)[_i], (_base)[_i + 1]);	\
-+				_swapped = true;			\
-+			}						\
-+	}								\
-+} while (0)
-+
-+static inline u64 percpu_u64_get(u64 __percpu *src)
-+{
-+	u64 ret = 0;
-+	int cpu;
-+
-+	for_each_possible_cpu(cpu)
-+		ret += *per_cpu_ptr(src, cpu);
-+	return ret;
-+}
-+
-+static inline void percpu_u64_set(u64 __percpu *dst, u64 src)
-+{
-+	int cpu;
-+
-+	for_each_possible_cpu(cpu)
-+		*per_cpu_ptr(dst, cpu) = 0;
-+
-+	preempt_disable();
-+	*this_cpu_ptr(dst) = src;
-+	preempt_enable();
-+}
-+
-+static inline void acc_u64s(u64 *acc, const u64 *src, unsigned nr)
-+{
-+	unsigned i;
-+
-+	for (i = 0; i < nr; i++)
-+		acc[i] += src[i];
-+}
-+
-+static inline void acc_u64s_percpu(u64 *acc, const u64 __percpu *src,
-+				   unsigned nr)
-+{
-+	int cpu;
-+
-+	for_each_possible_cpu(cpu)
-+		acc_u64s(acc, per_cpu_ptr(src, cpu), nr);
-+}
-+
-+static inline void percpu_memset(void __percpu *p, int c, size_t bytes)
-+{
-+	int cpu;
-+
-+	for_each_possible_cpu(cpu)
-+		memset(per_cpu_ptr(p, cpu), c, bytes);
-+}
-+
-+u64 *bch2_acc_percpu_u64s(u64 __percpu *, unsigned);
-+
-+#define cmp_int(l, r)		((l > r) - (l < r))
-+
-+#endif /* _BCACHEFS_UTIL_H */
-diff --git a/fs/bcachefs/vstructs.h b/fs/bcachefs/vstructs.h
-new file mode 100644
-index 000000000000..c099cdc0605f
---- /dev/null
-+++ b/fs/bcachefs/vstructs.h
-@@ -0,0 +1,63 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _VSTRUCTS_H
-+#define _VSTRUCTS_H
-+
-+#include "util.h"
-+
-+/*
-+ * NOTE: we can't differentiate between __le64 and u64 with type_is - this
-+ * assumes u64 is little endian:
-+ */
-+#define __vstruct_u64s(_s)						\
-+({									\
-+	( type_is((_s)->u64s, u64) ? le64_to_cpu((__force __le64) (_s)->u64s)		\
-+	: type_is((_s)->u64s, u32) ? le32_to_cpu((__force __le32) (_s)->u64s)		\
-+	: type_is((_s)->u64s, u16) ? le16_to_cpu((__force __le16) (_s)->u64s)		\
-+	: ((__force u8) ((_s)->u64s)));						\
-+})
-+
-+#define __vstruct_bytes(_type, _u64s)					\
-+({									\
-+	BUILD_BUG_ON(offsetof(_type, _data) % sizeof(u64));		\
-+									\
-+	(offsetof(_type, _data) + (_u64s) * sizeof(u64));		\
-+})
-+
-+#define vstruct_bytes(_s)						\
-+	__vstruct_bytes(typeof(*(_s)), __vstruct_u64s(_s))
-+
-+#define __vstruct_blocks(_type, _sector_block_bits, _u64s)		\
-+	(round_up(__vstruct_bytes(_type, _u64s),			\
-+		  512 << (_sector_block_bits)) >> (9 + (_sector_block_bits)))
-+
-+#define vstruct_blocks(_s, _sector_block_bits)				\
-+	__vstruct_blocks(typeof(*(_s)), _sector_block_bits, __vstruct_u64s(_s))
-+
-+#define vstruct_blocks_plus(_s, _sector_block_bits, _u64s)		\
-+	__vstruct_blocks(typeof(*(_s)), _sector_block_bits,		\
-+			 __vstruct_u64s(_s) + (_u64s))
-+
-+#define vstruct_sectors(_s, _sector_block_bits)				\
-+	(round_up(vstruct_bytes(_s), 512 << (_sector_block_bits)) >> 9)
-+
-+#define vstruct_next(_s)						\
-+	((typeof(_s))			((_s)->_data + __vstruct_u64s(_s)))
-+#define vstruct_last(_s)						\
-+	((typeof(&(_s)->start[0]))	((_s)->_data + __vstruct_u64s(_s)))
-+#define vstruct_end(_s)							\
-+	((void *)			((_s)->_data + __vstruct_u64s(_s)))
-+
-+#define vstruct_for_each(_s, _i)					\
-+	for (_i = (_s)->start;						\
-+	     _i < vstruct_last(_s);					\
-+	     _i = vstruct_next(_i))
-+
-+#define vstruct_for_each_safe(_s, _i, _t)				\
-+	for (_i = (_s)->start;						\
-+	     _i < vstruct_last(_s) && (_t = vstruct_next(_i), true);	\
-+	     _i = _t)
-+
-+#define vstruct_idx(_s, _idx)						\
-+	((typeof(&(_s)->start[0])) ((_s)->_data + (_idx)))
-+
-+#endif /* _VSTRUCTS_H */
-diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c
-new file mode 100644
-index 000000000000..725a6f3ef8ce
---- /dev/null
-+++ b/fs/bcachefs/xattr.c
-@@ -0,0 +1,582 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "bkey_methods.h"
-+#include "btree_update.h"
-+#include "extents.h"
-+#include "fs.h"
-+#include "rebalance.h"
-+#include "str_hash.h"
-+#include "xattr.h"
-+
-+#include <linux/dcache.h>
-+#include <linux/posix_acl_xattr.h>
-+#include <linux/xattr.h>
-+
-+static const struct xattr_handler *bch2_xattr_type_to_handler(unsigned);
-+
-+static u64 bch2_xattr_hash(const struct bch_hash_info *info,
-+			  const struct xattr_search_key *key)
-+{
-+	struct bch_str_hash_ctx ctx;
-+
-+	bch2_str_hash_init(&ctx, info);
-+	bch2_str_hash_update(&ctx, info, &key->type, sizeof(key->type));
-+	bch2_str_hash_update(&ctx, info, key->name.name, key->name.len);
-+
-+	return bch2_str_hash_end(&ctx, info);
-+}
-+
-+static u64 xattr_hash_key(const struct bch_hash_info *info, const void *key)
-+{
-+	return bch2_xattr_hash(info, key);
-+}
-+
-+static u64 xattr_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k)
-+{
-+	struct bkey_s_c_xattr x = bkey_s_c_to_xattr(k);
-+
-+	return bch2_xattr_hash(info,
-+		 &X_SEARCH(x.v->x_type, x.v->x_name, x.v->x_name_len));
-+}
-+
-+static bool xattr_cmp_key(struct bkey_s_c _l, const void *_r)
-+{
-+	struct bkey_s_c_xattr l = bkey_s_c_to_xattr(_l);
-+	const struct xattr_search_key *r = _r;
-+
-+	return l.v->x_type != r->type ||
-+		l.v->x_name_len != r->name.len ||
-+		memcmp(l.v->x_name, r->name.name, r->name.len);
-+}
-+
-+static bool xattr_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r)
-+{
-+	struct bkey_s_c_xattr l = bkey_s_c_to_xattr(_l);
-+	struct bkey_s_c_xattr r = bkey_s_c_to_xattr(_r);
-+
-+	return l.v->x_type != r.v->x_type ||
-+		l.v->x_name_len != r.v->x_name_len ||
-+		memcmp(l.v->x_name, r.v->x_name, r.v->x_name_len);
-+}
-+
-+const struct bch_hash_desc bch2_xattr_hash_desc = {
-+	.btree_id	= BTREE_ID_XATTRS,
-+	.key_type	= KEY_TYPE_xattr,
-+	.hash_key	= xattr_hash_key,
-+	.hash_bkey	= xattr_hash_bkey,
-+	.cmp_key	= xattr_cmp_key,
-+	.cmp_bkey	= xattr_cmp_bkey,
-+};
-+
-+const char *bch2_xattr_invalid(const struct bch_fs *c, struct bkey_s_c k)
-+{
-+	const struct xattr_handler *handler;
-+	struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k);
-+
-+	if (bkey_val_bytes(k.k) < sizeof(struct bch_xattr))
-+		return "value too small";
-+
-+	if (bkey_val_u64s(k.k) <
-+	    xattr_val_u64s(xattr.v->x_name_len,
-+			   le16_to_cpu(xattr.v->x_val_len)))
-+		return "value too small";
-+
-+	if (bkey_val_u64s(k.k) >
-+	    xattr_val_u64s(xattr.v->x_name_len,
-+			   le16_to_cpu(xattr.v->x_val_len) + 4))
-+		return "value too big";
-+
-+	handler = bch2_xattr_type_to_handler(xattr.v->x_type);
-+	if (!handler)
-+		return "invalid type";
-+
-+	if (memchr(xattr.v->x_name, '\0', xattr.v->x_name_len))
-+		return "xattr name has invalid characters";
-+
-+	return NULL;
-+}
-+
-+void bch2_xattr_to_text(struct printbuf *out, struct bch_fs *c,
-+			struct bkey_s_c k)
-+{
-+	const struct xattr_handler *handler;
-+	struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k);
-+
-+	handler = bch2_xattr_type_to_handler(xattr.v->x_type);
-+	if (handler && handler->prefix)
-+		pr_buf(out, "%s", handler->prefix);
-+	else if (handler)
-+		pr_buf(out, "(type %u)", xattr.v->x_type);
-+	else
-+		pr_buf(out, "(unknown type %u)", xattr.v->x_type);
-+
-+	bch_scnmemcpy(out, xattr.v->x_name,
-+		      xattr.v->x_name_len);
-+	pr_buf(out, ":");
-+	bch_scnmemcpy(out, xattr_val(xattr.v),
-+		      le16_to_cpu(xattr.v->x_val_len));
-+}
-+
-+int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode,
-+		   const char *name, void *buffer, size_t size, int type)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c_xattr xattr;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = bch2_hash_lookup(&trans, bch2_xattr_hash_desc,
-+				&inode->ei_str_hash, inode->v.i_ino,
-+				&X_SEARCH(type, name, strlen(name)),
-+				0);
-+	if (IS_ERR(iter)) {
-+		bch2_trans_exit(&trans);
-+		BUG_ON(PTR_ERR(iter) == -EINTR);
-+
-+		return PTR_ERR(iter) == -ENOENT ? -ENODATA : PTR_ERR(iter);
-+	}
-+
-+	xattr = bkey_s_c_to_xattr(bch2_btree_iter_peek_slot(iter));
-+	ret = le16_to_cpu(xattr.v->x_val_len);
-+	if (buffer) {
-+		if (ret > size)
-+			ret = -ERANGE;
-+		else
-+			memcpy(buffer, xattr_val(xattr.v), ret);
-+	}
-+
-+	bch2_trans_exit(&trans);
-+	return ret;
-+}
-+
-+int bch2_xattr_set(struct btree_trans *trans, u64 inum,
-+		   const struct bch_hash_info *hash_info,
-+		   const char *name, const void *value, size_t size,
-+		   int type, int flags)
-+{
-+	int ret;
-+
-+	if (value) {
-+		struct bkey_i_xattr *xattr;
-+		unsigned namelen = strlen(name);
-+		unsigned u64s = BKEY_U64s +
-+			xattr_val_u64s(namelen, size);
-+
-+		if (u64s > U8_MAX)
-+			return -ERANGE;
-+
-+		xattr = bch2_trans_kmalloc(trans, u64s * sizeof(u64));
-+		if (IS_ERR(xattr))
-+			return PTR_ERR(xattr);
-+
-+		bkey_xattr_init(&xattr->k_i);
-+		xattr->k.u64s		= u64s;
-+		xattr->v.x_type		= type;
-+		xattr->v.x_name_len	= namelen;
-+		xattr->v.x_val_len	= cpu_to_le16(size);
-+		memcpy(xattr->v.x_name, name, namelen);
-+		memcpy(xattr_val(&xattr->v), value, size);
-+
-+		ret = bch2_hash_set(trans, bch2_xattr_hash_desc, hash_info,
-+			      inum, &xattr->k_i,
-+			      (flags & XATTR_CREATE ? BCH_HASH_SET_MUST_CREATE : 0)|
-+			      (flags & XATTR_REPLACE ? BCH_HASH_SET_MUST_REPLACE : 0));
-+	} else {
-+		struct xattr_search_key search =
-+			X_SEARCH(type, name, strlen(name));
-+
-+		ret = bch2_hash_delete(trans, bch2_xattr_hash_desc,
-+				       hash_info, inum, &search);
-+	}
-+
-+	if (ret == -ENOENT)
-+		ret = flags & XATTR_REPLACE ? -ENODATA : 0;
-+
-+	return ret;
-+}
-+
-+struct xattr_buf {
-+	char		*buf;
-+	size_t		len;
-+	size_t		used;
-+};
-+
-+static int __bch2_xattr_emit(const char *prefix,
-+			     const char *name, size_t name_len,
-+			     struct xattr_buf *buf)
-+{
-+	const size_t prefix_len = strlen(prefix);
-+	const size_t total_len = prefix_len + name_len + 1;
-+
-+	if (buf->buf) {
-+		if (buf->used + total_len > buf->len)
-+			return -ERANGE;
-+
-+		memcpy(buf->buf + buf->used, prefix, prefix_len);
-+		memcpy(buf->buf + buf->used + prefix_len,
-+		       name, name_len);
-+		buf->buf[buf->used + prefix_len + name_len] = '\0';
-+	}
-+
-+	buf->used += total_len;
-+	return 0;
-+}
-+
-+static int bch2_xattr_emit(struct dentry *dentry,
-+			    const struct bch_xattr *xattr,
-+			    struct xattr_buf *buf)
-+{
-+	const struct xattr_handler *handler =
-+		bch2_xattr_type_to_handler(xattr->x_type);
-+
-+	return handler && (!handler->list || handler->list(dentry))
-+		? __bch2_xattr_emit(handler->prefix ?: handler->name,
-+				    xattr->x_name, xattr->x_name_len, buf)
-+		: 0;
-+}
-+
-+static int bch2_xattr_list_bcachefs(struct bch_fs *c,
-+				    struct bch_inode_info *inode,
-+				    struct xattr_buf *buf,
-+				    bool all)
-+{
-+	const char *prefix = all ? "bcachefs_effective." : "bcachefs.";
-+	unsigned id;
-+	int ret = 0;
-+	u64 v;
-+
-+	for (id = 0; id < Inode_opt_nr; id++) {
-+		v = bch2_inode_opt_get(&inode->ei_inode, id);
-+		if (!v)
-+			continue;
-+
-+		if (!all &&
-+		    !(inode->ei_inode.bi_fields_set & (1 << id)))
-+			continue;
-+
-+		ret = __bch2_xattr_emit(prefix, bch2_inode_opts[id],
-+					strlen(bch2_inode_opts[id]), buf);
-+		if (ret)
-+			break;
-+	}
-+
-+	return ret;
-+}
-+
-+ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
-+{
-+	struct bch_fs *c = dentry->d_sb->s_fs_info;
-+	struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct xattr_buf buf = { .buf = buffer, .len = buffer_size };
-+	u64 inum = dentry->d_inode->i_ino;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_XATTRS,
-+			   POS(inum, 0), 0, k, ret) {
-+		BUG_ON(k.k->p.inode < inum);
-+
-+		if (k.k->p.inode > inum)
-+			break;
-+
-+		if (k.k->type != KEY_TYPE_xattr)
-+			continue;
-+
-+		ret = bch2_xattr_emit(dentry, bkey_s_c_to_xattr(k).v, &buf);
-+		if (ret)
-+			break;
-+	}
-+	ret = bch2_trans_exit(&trans) ?: ret;
-+
-+	if (ret)
-+		return ret;
-+
-+	ret = bch2_xattr_list_bcachefs(c, inode, &buf, false);
-+	if (ret)
-+		return ret;
-+
-+	ret = bch2_xattr_list_bcachefs(c, inode, &buf, true);
-+	if (ret)
-+		return ret;
-+
-+	return buf.used;
-+}
-+
-+static int bch2_xattr_get_handler(const struct xattr_handler *handler,
-+				  struct dentry *dentry, struct inode *vinode,
-+				  const char *name, void *buffer, size_t size)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(vinode);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+
-+	return bch2_xattr_get(c, inode, name, buffer, size, handler->flags);
-+}
-+
-+static int bch2_xattr_set_handler(const struct xattr_handler *handler,
-+				  struct dentry *dentry, struct inode *vinode,
-+				  const char *name, const void *value,
-+				  size_t size, int flags)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(vinode);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+
-+	return bch2_trans_do(c, NULL, &inode->ei_journal_seq, 0,
-+			bch2_xattr_set(&trans, inode->v.i_ino,
-+				       &inode->ei_str_hash,
-+				       name, value, size,
-+				       handler->flags, flags));
-+}
-+
-+static const struct xattr_handler bch_xattr_user_handler = {
-+	.prefix	= XATTR_USER_PREFIX,
-+	.get	= bch2_xattr_get_handler,
-+	.set	= bch2_xattr_set_handler,
-+	.flags	= KEY_TYPE_XATTR_INDEX_USER,
-+};
-+
-+static bool bch2_xattr_trusted_list(struct dentry *dentry)
-+{
-+	return capable(CAP_SYS_ADMIN);
-+}
-+
-+static const struct xattr_handler bch_xattr_trusted_handler = {
-+	.prefix	= XATTR_TRUSTED_PREFIX,
-+	.list	= bch2_xattr_trusted_list,
-+	.get	= bch2_xattr_get_handler,
-+	.set	= bch2_xattr_set_handler,
-+	.flags	= KEY_TYPE_XATTR_INDEX_TRUSTED,
-+};
-+
-+static const struct xattr_handler bch_xattr_security_handler = {
-+	.prefix	= XATTR_SECURITY_PREFIX,
-+	.get	= bch2_xattr_get_handler,
-+	.set	= bch2_xattr_set_handler,
-+	.flags	= KEY_TYPE_XATTR_INDEX_SECURITY,
-+};
-+
-+#ifndef NO_BCACHEFS_FS
-+
-+static int opt_to_inode_opt(int id)
-+{
-+	switch (id) {
-+#define x(name, ...)				\
-+	case Opt_##name: return Inode_opt_##name;
-+	BCH_INODE_OPTS()
-+#undef  x
-+	default:
-+		return -1;
-+	}
-+}
-+
-+static int __bch2_xattr_bcachefs_get(const struct xattr_handler *handler,
-+				struct dentry *dentry, struct inode *vinode,
-+				const char *name, void *buffer, size_t size,
-+				bool all)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(vinode);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct bch_opts opts =
-+		bch2_inode_opts_to_opts(bch2_inode_opts_get(&inode->ei_inode));
-+	const struct bch_option *opt;
-+	int id, inode_opt_id;
-+	char buf[512];
-+	struct printbuf out = PBUF(buf);
-+	unsigned val_len;
-+	u64 v;
-+
-+	id = bch2_opt_lookup(name);
-+	if (id < 0 || !bch2_opt_is_inode_opt(id))
-+		return -EINVAL;
-+
-+	inode_opt_id = opt_to_inode_opt(id);
-+	if (inode_opt_id < 0)
-+		return -EINVAL;
-+
-+	opt = bch2_opt_table + id;
-+
-+	if (!bch2_opt_defined_by_id(&opts, id))
-+		return -ENODATA;
-+
-+	if (!all &&
-+	    !(inode->ei_inode.bi_fields_set & (1 << inode_opt_id)))
-+		return -ENODATA;
-+
-+	v = bch2_opt_get_by_id(&opts, id);
-+	bch2_opt_to_text(&out, c, opt, v, 0);
-+
-+	val_len = out.pos - buf;
-+
-+	if (buffer && val_len > size)
-+		return -ERANGE;
-+
-+	if (buffer)
-+		memcpy(buffer, buf, val_len);
-+	return val_len;
-+}
-+
-+static int bch2_xattr_bcachefs_get(const struct xattr_handler *handler,
-+				   struct dentry *dentry, struct inode *vinode,
-+				   const char *name, void *buffer, size_t size)
-+{
-+	return __bch2_xattr_bcachefs_get(handler, dentry, vinode,
-+					 name, buffer, size, false);
-+}
-+
-+struct inode_opt_set {
-+	int			id;
-+	u64			v;
-+	bool			defined;
-+};
-+
-+static int inode_opt_set_fn(struct bch_inode_info *inode,
-+			    struct bch_inode_unpacked *bi,
-+			    void *p)
-+{
-+	struct inode_opt_set *s = p;
-+
-+	if (s->defined)
-+		bi->bi_fields_set |= 1U << s->id;
-+	else
-+		bi->bi_fields_set &= ~(1U << s->id);
-+
-+	bch2_inode_opt_set(bi, s->id, s->v);
-+
-+	return 0;
-+}
-+
-+static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler,
-+				   struct dentry *dentry, struct inode *vinode,
-+				   const char *name, const void *value,
-+				   size_t size, int flags)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(vinode);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	const struct bch_option *opt;
-+	char *buf;
-+	struct inode_opt_set s;
-+	int opt_id, inode_opt_id, ret;
-+
-+	opt_id = bch2_opt_lookup(name);
-+	if (opt_id < 0)
-+		return -EINVAL;
-+
-+	opt = bch2_opt_table + opt_id;
-+
-+	inode_opt_id = opt_to_inode_opt(opt_id);
-+	if (inode_opt_id < 0)
-+		return -EINVAL;
-+
-+	s.id = inode_opt_id;
-+
-+	if (value) {
-+		u64 v = 0;
-+
-+		buf = kmalloc(size + 1, GFP_KERNEL);
-+		if (!buf)
-+			return -ENOMEM;
-+		memcpy(buf, value, size);
-+		buf[size] = '\0';
-+
-+		ret = bch2_opt_parse(c, opt, buf, &v);
-+		kfree(buf);
-+
-+		if (ret < 0)
-+			return ret;
-+
-+		ret = bch2_opt_check_may_set(c, opt_id, v);
-+		if (ret < 0)
-+			return ret;
-+
-+		s.v = v + 1;
-+		s.defined = true;
-+	} else {
-+		if (!IS_ROOT(dentry)) {
-+			struct bch_inode_info *dir =
-+				to_bch_ei(d_inode(dentry->d_parent));
-+
-+			s.v = bch2_inode_opt_get(&dir->ei_inode, inode_opt_id);
-+		} else {
-+			s.v = 0;
-+		}
-+
-+		s.defined = false;
-+	}
-+
-+	mutex_lock(&inode->ei_update_lock);
-+	if (inode_opt_id == Inode_opt_project) {
-+		ret = bch2_set_projid(c, inode, s.v);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	ret = bch2_write_inode(c, inode, inode_opt_set_fn, &s, 0);
-+err:
-+	mutex_unlock(&inode->ei_update_lock);
-+
-+	if (value &&
-+	    (opt_id == Opt_background_compression ||
-+	     opt_id == Opt_background_target))
-+		bch2_rebalance_add_work(c, inode->v.i_blocks);
-+
-+	return ret;
-+}
-+
-+static const struct xattr_handler bch_xattr_bcachefs_handler = {
-+	.prefix	= "bcachefs.",
-+	.get	= bch2_xattr_bcachefs_get,
-+	.set	= bch2_xattr_bcachefs_set,
-+};
-+
-+static int bch2_xattr_bcachefs_get_effective(
-+				const struct xattr_handler *handler,
-+				struct dentry *dentry, struct inode *vinode,
-+				const char *name, void *buffer, size_t size)
-+{
-+	return __bch2_xattr_bcachefs_get(handler, dentry, vinode,
-+					 name, buffer, size, true);
-+}
-+
-+static const struct xattr_handler bch_xattr_bcachefs_effective_handler = {
-+	.prefix	= "bcachefs_effective.",
-+	.get	= bch2_xattr_bcachefs_get_effective,
-+	.set	= bch2_xattr_bcachefs_set,
-+};
-+
-+#endif /* NO_BCACHEFS_FS */
-+
-+const struct xattr_handler *bch2_xattr_handlers[] = {
-+	&bch_xattr_user_handler,
-+	&posix_acl_access_xattr_handler,
-+	&posix_acl_default_xattr_handler,
-+	&bch_xattr_trusted_handler,
-+	&bch_xattr_security_handler,
-+#ifndef NO_BCACHEFS_FS
-+	&bch_xattr_bcachefs_handler,
-+	&bch_xattr_bcachefs_effective_handler,
-+#endif
-+	NULL
-+};
-+
-+static const struct xattr_handler *bch_xattr_handler_map[] = {
-+	[KEY_TYPE_XATTR_INDEX_USER]			= &bch_xattr_user_handler,
-+	[KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS]	=
-+		&posix_acl_access_xattr_handler,
-+	[KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT]	=
-+		&posix_acl_default_xattr_handler,
-+	[KEY_TYPE_XATTR_INDEX_TRUSTED]		= &bch_xattr_trusted_handler,
-+	[KEY_TYPE_XATTR_INDEX_SECURITY]		= &bch_xattr_security_handler,
-+};
-+
-+static const struct xattr_handler *bch2_xattr_type_to_handler(unsigned type)
-+{
-+	return type < ARRAY_SIZE(bch_xattr_handler_map)
-+		? bch_xattr_handler_map[type]
-+		: NULL;
-+}
-diff --git a/fs/bcachefs/xattr.h b/fs/bcachefs/xattr.h
-new file mode 100644
-index 000000000000..4151065ab853
---- /dev/null
-+++ b/fs/bcachefs/xattr.h
-@@ -0,0 +1,49 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_XATTR_H
-+#define _BCACHEFS_XATTR_H
-+
-+#include "str_hash.h"
-+
-+extern const struct bch_hash_desc bch2_xattr_hash_desc;
-+
-+const char *bch2_xattr_invalid(const struct bch_fs *, struct bkey_s_c);
-+void bch2_xattr_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-+
-+#define bch2_bkey_ops_xattr (struct bkey_ops) {		\
-+	.key_invalid	= bch2_xattr_invalid,		\
-+	.val_to_text	= bch2_xattr_to_text,		\
-+}
-+
-+static inline unsigned xattr_val_u64s(unsigned name_len, unsigned val_len)
-+{
-+	return DIV_ROUND_UP(offsetof(struct bch_xattr, x_name) +
-+			    name_len + val_len, sizeof(u64));
-+}
-+
-+#define xattr_val(_xattr)					\
-+	((void *) (_xattr)->x_name + (_xattr)->x_name_len)
-+
-+struct xattr_search_key {
-+	u8		type;
-+	struct qstr	name;
-+};
-+
-+#define X_SEARCH(_type, _name, _len) ((struct xattr_search_key)	\
-+	{ .type = _type, .name = QSTR_INIT(_name, _len) })
-+
-+struct dentry;
-+struct xattr_handler;
-+struct bch_hash_info;
-+struct bch_inode_info;
-+
-+int bch2_xattr_get(struct bch_fs *, struct bch_inode_info *,
-+		  const char *, void *, size_t, int);
-+
-+int bch2_xattr_set(struct btree_trans *, u64, const struct bch_hash_info *,
-+		   const char *, const void *, size_t, int, int);
-+
-+ssize_t bch2_xattr_list(struct dentry *, char *, size_t);
-+
-+extern const struct xattr_handler *bch2_xattr_handlers[];
-+
-+#endif /* _BCACHEFS_XATTR_H */
-diff --git a/fs/cifs/file.c b/fs/cifs/file.c
-index 75ddce8ef456..31d4aff3bbe5 100644
---- a/fs/cifs/file.c
-+++ b/fs/cifs/file.c
-@@ -4299,20 +4299,12 @@ readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
- 
- 	page = lru_to_page(page_list);
- 
--	/*
--	 * Lock the page and put it in the cache. Since no one else
--	 * should have access to this page, we're safe to simply set
--	 * PG_locked without checking it first.
--	 */
--	__SetPageLocked(page);
--	rc = add_to_page_cache_locked(page, mapping,
--				      page->index, gfp);
-+	rc = add_to_page_cache(page, mapping,
-+			       page->index, gfp);
- 
- 	/* give up if we can't stick it in the cache */
--	if (rc) {
--		__ClearPageLocked(page);
-+	if (rc)
- 		return rc;
--	}
- 
- 	/* move first page to the tmplist */
- 	*offset = (loff_t)page->index << PAGE_SHIFT;
-@@ -4331,11 +4323,8 @@ readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
- 		if (*bytes + PAGE_SIZE > rsize)
- 			break;
- 
--		__SetPageLocked(page);
--		if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
--			__ClearPageLocked(page);
-+		if (add_to_page_cache(page, mapping, page->index, gfp))
- 			break;
--		}
- 		list_move_tail(&page->lru, tmplist);
- 		(*bytes) += PAGE_SIZE;
- 		expected_index++;
-diff --git a/fs/dcache.c b/fs/dcache.c
-index b280e07e162b..7a73f5bf9c76 100644
---- a/fs/dcache.c
-+++ b/fs/dcache.c
-@@ -3113,9 +3113,8 @@ void d_genocide(struct dentry *parent)
- 
- EXPORT_SYMBOL(d_genocide);
- 
--void d_tmpfile(struct dentry *dentry, struct inode *inode)
-+void d_mark_tmpfile(struct dentry *dentry, struct inode *inode)
- {
--	inode_dec_link_count(inode);
- 	BUG_ON(dentry->d_name.name != dentry->d_iname ||
- 		!hlist_unhashed(&dentry->d_u.d_alias) ||
- 		!d_unlinked(dentry));
-@@ -3125,6 +3124,13 @@ void d_tmpfile(struct dentry *dentry, struct inode *inode)
- 				(unsigned long long)inode->i_ino);
- 	spin_unlock(&dentry->d_lock);
- 	spin_unlock(&dentry->d_parent->d_lock);
-+}
-+EXPORT_SYMBOL(d_mark_tmpfile);
-+
-+void d_tmpfile(struct dentry *dentry, struct inode *inode)
-+{
-+	inode_dec_link_count(inode);
-+	d_mark_tmpfile(dentry, inode);
- 	d_instantiate(dentry, inode);
- }
- EXPORT_SYMBOL(d_tmpfile);
-diff --git a/fs/inode.c b/fs/inode.c
-index 93d9252a00ab..f2b6d24f3456 100644
---- a/fs/inode.c
-+++ b/fs/inode.c
-@@ -1503,6 +1503,46 @@ int insert_inode_locked(struct inode *inode)
- }
- EXPORT_SYMBOL(insert_inode_locked);
- 
-+struct inode *insert_inode_locked2(struct inode *inode)
-+{
-+	struct super_block *sb = inode->i_sb;
-+	ino_t ino = inode->i_ino;
-+	struct hlist_head *head = inode_hashtable + hash(sb, ino);
-+
-+	while (1) {
-+		struct inode *old = NULL;
-+		spin_lock(&inode_hash_lock);
-+		hlist_for_each_entry(old, head, i_hash) {
-+			if (old->i_ino != ino)
-+				continue;
-+			if (old->i_sb != sb)
-+				continue;
-+			spin_lock(&old->i_lock);
-+			if (old->i_state & (I_FREEING|I_WILL_FREE)) {
-+				spin_unlock(&old->i_lock);
-+				continue;
-+			}
-+			break;
-+		}
-+		if (likely(!old)) {
-+			spin_lock(&inode->i_lock);
-+			inode->i_state |= I_NEW | I_CREATING;
-+			hlist_add_head(&inode->i_hash, head);
-+			spin_unlock(&inode->i_lock);
-+			spin_unlock(&inode_hash_lock);
-+			return NULL;
-+		}
-+		__iget(old);
-+		spin_unlock(&old->i_lock);
-+		spin_unlock(&inode_hash_lock);
-+		wait_on_inode(old);
-+		if (unlikely(!inode_unhashed(old)))
-+			return old;
-+		iput(old);
-+	}
-+}
-+EXPORT_SYMBOL(insert_inode_locked2);
-+
- int insert_inode_locked4(struct inode *inode, unsigned long hashval,
- 		int (*test)(struct inode *, void *), void *data)
- {
-diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
-index 71e387a5fe90..e916f046fed4 100644
---- a/include/asm-generic/vmlinux.lds.h
-+++ b/include/asm-generic/vmlinux.lds.h
-@@ -323,6 +323,10 @@
- 	__start___verbose = .;						\
- 	KEEP(*(__verbose))                                              \
- 	__stop___verbose = .;						\
-+	. = ALIGN(8);							\
-+	__start___faults = .;						\
-+	*(__faults)							\
-+	__stop___faults = .;						\
- 	LIKELY_PROFILE()		       				\
- 	BRANCH_PROFILE()						\
- 	TRACE_PRINTKS()							\
-diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
-index 32868fbedc9e..2979f9082a98 100644
---- a/include/linux/blkdev.h
-+++ b/include/linux/blkdev.h
-@@ -889,6 +889,7 @@ extern const char *blk_op_str(unsigned int op);
- 
- int blk_status_to_errno(blk_status_t status);
- blk_status_t errno_to_blk_status(int errno);
-+const char *blk_status_to_str(blk_status_t status);
- 
- int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin);
- 
-diff --git a/include/linux/closure.h b/include/linux/closure.h
-new file mode 100644
-index 000000000000..abacb91c3565
---- /dev/null
-+++ b/include/linux/closure.h
-@@ -0,0 +1,404 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _LINUX_CLOSURE_H
-+#define _LINUX_CLOSURE_H
-+
-+#include <linux/llist.h>
-+#include <linux/sched.h>
-+#include <linux/sched/task_stack.h>
-+#include <linux/workqueue.h>
-+
-+/*
-+ * Closure is perhaps the most overused and abused term in computer science, but
-+ * since I've been unable to come up with anything better you're stuck with it
-+ * again.
-+ *
-+ * What are closures?
-+ *
-+ * They embed a refcount. The basic idea is they count "things that are in
-+ * progress" - in flight bios, some other thread that's doing something else -
-+ * anything you might want to wait on.
-+ *
-+ * The refcount may be manipulated with closure_get() and closure_put().
-+ * closure_put() is where many of the interesting things happen, when it causes
-+ * the refcount to go to 0.
-+ *
-+ * Closures can be used to wait on things both synchronously and asynchronously,
-+ * and synchronous and asynchronous use can be mixed without restriction. To
-+ * wait synchronously, use closure_sync() - you will sleep until your closure's
-+ * refcount hits 1.
-+ *
-+ * To wait asynchronously, use
-+ *   continue_at(cl, next_function, workqueue);
-+ *
-+ * passing it, as you might expect, the function to run when nothing is pending
-+ * and the workqueue to run that function out of.
-+ *
-+ * continue_at() also, critically, requires a 'return' immediately following the
-+ * location where this macro is referenced, to return to the calling function.
-+ * There's good reason for this.
-+ *
-+ * To use safely closures asynchronously, they must always have a refcount while
-+ * they are running owned by the thread that is running them. Otherwise, suppose
-+ * you submit some bios and wish to have a function run when they all complete:
-+ *
-+ * foo_endio(struct bio *bio)
-+ * {
-+ *	closure_put(cl);
-+ * }
-+ *
-+ * closure_init(cl);
-+ *
-+ * do_stuff();
-+ * closure_get(cl);
-+ * bio1->bi_endio = foo_endio;
-+ * bio_submit(bio1);
-+ *
-+ * do_more_stuff();
-+ * closure_get(cl);
-+ * bio2->bi_endio = foo_endio;
-+ * bio_submit(bio2);
-+ *
-+ * continue_at(cl, complete_some_read, system_wq);
-+ *
-+ * If closure's refcount started at 0, complete_some_read() could run before the
-+ * second bio was submitted - which is almost always not what you want! More
-+ * importantly, it wouldn't be possible to say whether the original thread or
-+ * complete_some_read()'s thread owned the closure - and whatever state it was
-+ * associated with!
-+ *
-+ * So, closure_init() initializes a closure's refcount to 1 - and when a
-+ * closure_fn is run, the refcount will be reset to 1 first.
-+ *
-+ * Then, the rule is - if you got the refcount with closure_get(), release it
-+ * with closure_put() (i.e, in a bio->bi_endio function). If you have a refcount
-+ * on a closure because you called closure_init() or you were run out of a
-+ * closure - _always_ use continue_at(). Doing so consistently will help
-+ * eliminate an entire class of particularly pernicious races.
-+ *
-+ * Lastly, you might have a wait list dedicated to a specific event, and have no
-+ * need for specifying the condition - you just want to wait until someone runs
-+ * closure_wake_up() on the appropriate wait list. In that case, just use
-+ * closure_wait(). It will return either true or false, depending on whether the
-+ * closure was already on a wait list or not - a closure can only be on one wait
-+ * list at a time.
-+ *
-+ * Parents:
-+ *
-+ * closure_init() takes two arguments - it takes the closure to initialize, and
-+ * a (possibly null) parent.
-+ *
-+ * If parent is non null, the new closure will have a refcount for its lifetime;
-+ * a closure is considered to be "finished" when its refcount hits 0 and the
-+ * function to run is null. Hence
-+ *
-+ * continue_at(cl, NULL, NULL);
-+ *
-+ * returns up the (spaghetti) stack of closures, precisely like normal return
-+ * returns up the C stack. continue_at() with non null fn is better thought of
-+ * as doing a tail call.
-+ *
-+ * All this implies that a closure should typically be embedded in a particular
-+ * struct (which its refcount will normally control the lifetime of), and that
-+ * struct can very much be thought of as a stack frame.
-+ */
-+
-+struct closure;
-+struct closure_syncer;
-+typedef void (closure_fn) (struct closure *);
-+extern struct dentry *bcache_debug;
-+
-+struct closure_waitlist {
-+	struct llist_head	list;
-+};
-+
-+enum closure_state {
-+	/*
-+	 * CLOSURE_WAITING: Set iff the closure is on a waitlist. Must be set by
-+	 * the thread that owns the closure, and cleared by the thread that's
-+	 * waking up the closure.
-+	 *
-+	 * The rest are for debugging and don't affect behaviour:
-+	 *
-+	 * CLOSURE_RUNNING: Set when a closure is running (i.e. by
-+	 * closure_init() and when closure_put() runs then next function), and
-+	 * must be cleared before remaining hits 0. Primarily to help guard
-+	 * against incorrect usage and accidentally transferring references.
-+	 * continue_at() and closure_return() clear it for you, if you're doing
-+	 * something unusual you can use closure_set_dead() which also helps
-+	 * annotate where references are being transferred.
-+	 */
-+
-+	CLOSURE_BITS_START	= (1U << 26),
-+	CLOSURE_DESTRUCTOR	= (1U << 26),
-+	CLOSURE_WAITING		= (1U << 28),
-+	CLOSURE_RUNNING		= (1U << 30),
-+};
-+
-+#define CLOSURE_GUARD_MASK					\
-+	((CLOSURE_DESTRUCTOR|CLOSURE_WAITING|CLOSURE_RUNNING) << 1)
-+
-+#define CLOSURE_REMAINING_MASK		(CLOSURE_BITS_START - 1)
-+#define CLOSURE_REMAINING_INITIALIZER	(1|CLOSURE_RUNNING)
-+
-+struct closure {
-+	union {
-+		struct {
-+			struct workqueue_struct *wq;
-+			struct closure_syncer	*s;
-+			struct llist_node	list;
-+			closure_fn		*fn;
-+		};
-+		struct work_struct	work;
-+	};
-+
-+	struct closure		*parent;
-+
-+	atomic_t		remaining;
-+
-+#ifdef CONFIG_DEBUG_CLOSURES
-+#define CLOSURE_MAGIC_DEAD	0xc054dead
-+#define CLOSURE_MAGIC_ALIVE	0xc054a11e
-+
-+	unsigned int		magic;
-+	struct list_head	all;
-+	unsigned long		ip;
-+	unsigned long		waiting_on;
-+#endif
-+};
-+
-+void closure_sub(struct closure *cl, int v);
-+void closure_put(struct closure *cl);
-+void __closure_wake_up(struct closure_waitlist *list);
-+bool closure_wait(struct closure_waitlist *list, struct closure *cl);
-+void __closure_sync(struct closure *cl);
-+
-+/**
-+ * closure_sync - sleep until a closure a closure has nothing left to wait on
-+ *
-+ * Sleeps until the refcount hits 1 - the thread that's running the closure owns
-+ * the last refcount.
-+ */
-+static inline void closure_sync(struct closure *cl)
-+{
-+	if ((atomic_read(&cl->remaining) & CLOSURE_REMAINING_MASK) != 1)
-+		__closure_sync(cl);
-+}
-+
-+#ifdef CONFIG_DEBUG_CLOSURES
-+
-+void closure_debug_create(struct closure *cl);
-+void closure_debug_destroy(struct closure *cl);
-+
-+#else
-+
-+static inline void closure_debug_create(struct closure *cl) {}
-+static inline void closure_debug_destroy(struct closure *cl) {}
-+
-+#endif
-+
-+static inline void closure_set_ip(struct closure *cl)
-+{
-+#ifdef CONFIG_DEBUG_CLOSURES
-+	cl->ip = _THIS_IP_;
-+#endif
-+}
-+
-+static inline void closure_set_ret_ip(struct closure *cl)
-+{
-+#ifdef CONFIG_DEBUG_CLOSURES
-+	cl->ip = _RET_IP_;
-+#endif
-+}
-+
-+static inline void closure_set_waiting(struct closure *cl, unsigned long f)
-+{
-+#ifdef CONFIG_DEBUG_CLOSURES
-+	cl->waiting_on = f;
-+#endif
-+}
-+
-+static inline void closure_set_stopped(struct closure *cl)
-+{
-+	atomic_sub(CLOSURE_RUNNING, &cl->remaining);
-+}
-+
-+static inline void set_closure_fn(struct closure *cl, closure_fn *fn,
-+				  struct workqueue_struct *wq)
-+{
-+	closure_set_ip(cl);
-+	cl->fn = fn;
-+	cl->wq = wq;
-+	/* between atomic_dec() in closure_put() */
-+	smp_mb__before_atomic();
-+}
-+
-+static inline void closure_queue(struct closure *cl)
-+{
-+	struct workqueue_struct *wq = cl->wq;
-+	/**
-+	 * Changes made to closure, work_struct, or a couple of other structs
-+	 * may cause work.func not pointing to the right location.
-+	 */
-+	BUILD_BUG_ON(offsetof(struct closure, fn)
-+		     != offsetof(struct work_struct, func));
-+
-+	if (wq) {
-+		INIT_WORK(&cl->work, cl->work.func);
-+		queue_work(wq, &cl->work);
-+	} else
-+		cl->fn(cl);
-+}
-+
-+/**
-+ * closure_get - increment a closure's refcount
-+ */
-+static inline void closure_get(struct closure *cl)
-+{
-+#ifdef CONFIG_DEBUG_CLOSURES
-+	BUG_ON((atomic_inc_return(&cl->remaining) &
-+		CLOSURE_REMAINING_MASK) <= 1);
-+#else
-+	atomic_inc(&cl->remaining);
-+#endif
-+}
-+
-+/**
-+ * closure_init - Initialize a closure, setting the refcount to 1
-+ * @cl:		closure to initialize
-+ * @parent:	parent of the new closure. cl will take a refcount on it for its
-+ *		lifetime; may be NULL.
-+ */
-+static inline void closure_init(struct closure *cl, struct closure *parent)
-+{
-+	cl->fn = NULL;
-+	cl->parent = parent;
-+	if (parent)
-+		closure_get(parent);
-+
-+	atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
-+
-+	closure_debug_create(cl);
-+	closure_set_ip(cl);
-+}
-+
-+static inline void closure_init_stack(struct closure *cl)
-+{
-+	memset(cl, 0, sizeof(struct closure));
-+	atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
-+}
-+
-+/**
-+ * closure_wake_up - wake up all closures on a wait list,
-+ *		     with memory barrier
-+ */
-+static inline void closure_wake_up(struct closure_waitlist *list)
-+{
-+	/* Memory barrier for the wait list */
-+	smp_mb();
-+	__closure_wake_up(list);
-+}
-+
-+/**
-+ * continue_at - jump to another function with barrier
-+ *
-+ * After @cl is no longer waiting on anything (i.e. all outstanding refs have
-+ * been dropped with closure_put()), it will resume execution at @fn running out
-+ * of @wq (or, if @wq is NULL, @fn will be called by closure_put() directly).
-+ *
-+ * This is because after calling continue_at() you no longer have a ref on @cl,
-+ * and whatever @cl owns may be freed out from under you - a running closure fn
-+ * has a ref on its own closure which continue_at() drops.
-+ *
-+ * Note you are expected to immediately return after using this macro.
-+ */
-+#define continue_at(_cl, _fn, _wq)					\
-+do {									\
-+	set_closure_fn(_cl, _fn, _wq);					\
-+	closure_sub(_cl, CLOSURE_RUNNING + 1);				\
-+} while (0)
-+
-+/**
-+ * closure_return - finish execution of a closure
-+ *
-+ * This is used to indicate that @cl is finished: when all outstanding refs on
-+ * @cl have been dropped @cl's ref on its parent closure (as passed to
-+ * closure_init()) will be dropped, if one was specified - thus this can be
-+ * thought of as returning to the parent closure.
-+ */
-+#define closure_return(_cl)	continue_at((_cl), NULL, NULL)
-+
-+/**
-+ * continue_at_nobarrier - jump to another function without barrier
-+ *
-+ * Causes @fn to be executed out of @cl, in @wq context (or called directly if
-+ * @wq is NULL).
-+ *
-+ * The ref the caller of continue_at_nobarrier() had on @cl is now owned by @fn,
-+ * thus it's not safe to touch anything protected by @cl after a
-+ * continue_at_nobarrier().
-+ */
-+#define continue_at_nobarrier(_cl, _fn, _wq)				\
-+do {									\
-+	closure_set_ip(_cl);						\
-+	if (_wq) {							\
-+		INIT_WORK(&(_cl)->work, (void *) _fn);			\
-+		queue_work((_wq), &(_cl)->work);			\
-+	} else {							\
-+		(_fn)(_cl);						\
-+	}								\
-+} while (0)
-+
-+/**
-+ * closure_return_with_destructor - finish execution of a closure,
-+ *				    with destructor
-+ *
-+ * Works like closure_return(), except @destructor will be called when all
-+ * outstanding refs on @cl have been dropped; @destructor may be used to safely
-+ * free the memory occupied by @cl, and it is called with the ref on the parent
-+ * closure still held - so @destructor could safely return an item to a
-+ * freelist protected by @cl's parent.
-+ */
-+#define closure_return_with_destructor(_cl, _destructor)		\
-+do {									\
-+	set_closure_fn(_cl, _destructor, NULL);				\
-+	closure_sub(_cl, CLOSURE_RUNNING - CLOSURE_DESTRUCTOR + 1);	\
-+} while (0)
-+
-+/**
-+ * closure_call - execute @fn out of a new, uninitialized closure
-+ *
-+ * Typically used when running out of one closure, and we want to run @fn
-+ * asynchronously out of a new closure - @parent will then wait for @cl to
-+ * finish.
-+ */
-+static inline void closure_call(struct closure *cl, closure_fn fn,
-+				struct workqueue_struct *wq,
-+				struct closure *parent)
-+{
-+	closure_init(cl, parent);
-+	continue_at_nobarrier(cl, fn, wq);
-+}
-+
-+#define __closure_wait_event(waitlist, _cond)				\
-+do {									\
-+	struct closure cl;						\
-+									\
-+	closure_init_stack(&cl);					\
-+									\
-+	while (1) {							\
-+		closure_wait(waitlist, &cl);				\
-+		if (_cond)						\
-+			break;						\
-+		closure_sync(&cl);					\
-+	}								\
-+	closure_wake_up(waitlist);					\
-+	closure_sync(&cl);						\
-+} while (0)
-+
-+#define closure_wait_event(waitlist, _cond)				\
-+do {									\
-+	if (!(_cond))							\
-+		__closure_wait_event(waitlist, _cond);			\
-+} while (0)
-+
-+#endif /* _LINUX_CLOSURE_H */
-diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h
-index cdf016596659..d3ab422fd4bf 100644
---- a/include/linux/compiler_attributes.h
-+++ b/include/linux/compiler_attributes.h
-@@ -270,4 +270,9 @@
-  */
- #define __weak                          __attribute__((__weak__))
- 
-+/*
-+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-flatten-function-attribute
-+ */
-+#define __flatten __attribute__((flatten))
-+
- #endif /* __LINUX_COMPILER_ATTRIBUTES_H */
-diff --git a/include/linux/dcache.h b/include/linux/dcache.h
-index c1488cc84fd9..3d6c4102ecc1 100644
---- a/include/linux/dcache.h
-+++ b/include/linux/dcache.h
-@@ -254,6 +254,7 @@ extern struct dentry * d_make_root(struct inode *);
- /* <clickety>-<click> the ramfs-type tree */
- extern void d_genocide(struct dentry *);
- 
-+extern void d_mark_tmpfile(struct dentry *, struct inode *);
- extern void d_tmpfile(struct dentry *, struct inode *);
- 
- extern struct dentry *d_find_alias(struct inode *);
-diff --git a/include/linux/dynamic_fault.h b/include/linux/dynamic_fault.h
-new file mode 100644
-index 000000000000..6e7bb56ae8b4
---- /dev/null
-+++ b/include/linux/dynamic_fault.h
-@@ -0,0 +1,117 @@
-+#ifndef _DYNAMIC_FAULT_H
-+#define _DYNAMIC_FAULT_H
-+
-+#include <linux/bio.h>
-+#include <linux/jump_label.h>
-+#include <linux/slab.h>
-+
-+enum dfault_enabled {
-+	DFAULT_DISABLED,
-+	DFAULT_ENABLED,
-+	DFAULT_ONESHOT,
-+};
-+
-+union dfault_state {
-+	struct {
-+		unsigned		enabled:2;
-+		unsigned		count:30;
-+	};
-+
-+	struct {
-+		unsigned		v;
-+	};
-+};
-+
-+/*
-+ * An instance of this structure is created in a special
-+ * ELF section at every dynamic fault callsite.  At runtime,
-+ * the special section is treated as an array of these.
-+ */
-+struct _dfault {
-+	const char		*modname;
-+	const char		*function;
-+	const char		*filename;
-+	const char		*class;
-+
-+	const u16		line;
-+
-+	unsigned		frequency;
-+	union dfault_state	state;
-+
-+	struct static_key	enabled;
-+} __aligned(8);
-+
-+
-+#ifdef CONFIG_DYNAMIC_FAULT
-+
-+int dfault_add_module(struct _dfault *tab, unsigned int n, const char *mod);
-+int dfault_remove_module(char *mod_name);
-+bool __dynamic_fault_enabled(struct _dfault *);
-+
-+#define dynamic_fault(_class)						\
-+({									\
-+	static struct _dfault descriptor				\
-+	__used __aligned(8) __attribute__((section("__faults"))) = {	\
-+		.modname	= KBUILD_MODNAME,			\
-+		.function	= __func__,				\
-+		.filename	= __FILE__,				\
-+		.line		= __LINE__,				\
-+		.class		= _class,				\
-+	};								\
-+									\
-+	static_key_false(&descriptor.enabled) &&			\
-+		__dynamic_fault_enabled(&descriptor);			\
-+})
-+
-+#define memory_fault()		dynamic_fault("memory")
-+#define race_fault()		dynamic_fault("race")
-+
-+#define kmalloc(...)							\
-+	(memory_fault() ? NULL	: kmalloc(__VA_ARGS__))
-+#define kzalloc(...)							\
-+	(memory_fault() ? NULL	: kzalloc(__VA_ARGS__))
-+#define krealloc(...)							\
-+	(memory_fault() ? NULL	: krealloc(__VA_ARGS__))
-+
-+#define mempool_alloc(pool, gfp_mask)					\
-+	((!gfpflags_allow_blocking(gfp_mask) && memory_fault())		\
-+		? NULL : mempool_alloc(pool, gfp_mask))
-+
-+#define __get_free_pages(...)						\
-+	(memory_fault() ? 0	: __get_free_pages(__VA_ARGS__))
-+#define alloc_pages_node(...)						\
-+	(memory_fault() ? NULL	: alloc_pages_node(__VA_ARGS__))
-+#define alloc_pages_nodemask(...)					\
-+	(memory_fault() ? NULL	: alloc_pages_nodemask(__VA_ARGS__))
-+
-+#define bio_alloc_bioset(gfp_mask, ...)					\
-+	((!gfpflags_allow_blocking(gfp_mask) && memory_fault())		\
-+	 ? NULL	: bio_alloc_bioset(gfp_mask, __VA_ARGS__))
-+
-+#define bio_clone(bio, gfp_mask)					\
-+	((!gfpflags_allow_blocking(gfp_mask) && memory_fault())		\
-+	 ? NULL	: bio_clone(bio, gfp_mask))
-+
-+#define bio_clone_bioset(bio, gfp_mask, bs)				\
-+	((!gfpflags_allow_blocking(gfp_mask) && memory_fault())		\
-+	 ? NULL	: bio_clone_bioset(bio, gfp_mask, bs))
-+
-+#define bio_kmalloc(...)						\
-+	(memory_fault() ? NULL		: bio_kmalloc(__VA_ARGS__))
-+#define bio_clone_kmalloc(...)						\
-+	(memory_fault() ? NULL		: bio_clone_kmalloc(__VA_ARGS__))
-+
-+#define bio_iov_iter_get_pages(...)					\
-+	(memory_fault() ? -ENOMEM	: bio_iov_iter_get_pages(__VA_ARGS__))
-+
-+#else /* CONFIG_DYNAMIC_FAULT */
-+
-+#define dfault_add_module(tab, n, modname)	0
-+#define dfault_remove_module(mod)		0
-+#define dynamic_fault(_class)			0
-+#define memory_fault()				0
-+#define race_fault()				0
-+
-+#endif /* CONFIG_DYNAMIC_FAULT */
-+
-+#endif
-diff --git a/include/linux/fs.h b/include/linux/fs.h
-index 45cc10cdf6dd..51f2268a3eaa 100644
---- a/include/linux/fs.h
-+++ b/include/linux/fs.h
-@@ -3072,6 +3072,7 @@ extern struct inode *find_inode_nowait(struct super_block *,
- 				       void *data);
- extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *);
- extern int insert_inode_locked(struct inode *);
-+extern struct inode *insert_inode_locked2(struct inode *);
- #ifdef CONFIG_DEBUG_LOCK_ALLOC
- extern void lockdep_annotate_inode_mutex_key(struct inode *inode);
- #else
-diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
-index a8f7bd8ea1c6..2b41ba4377ec 100644
---- a/include/linux/pagemap.h
-+++ b/include/linux/pagemap.h
-@@ -605,32 +605,21 @@ static inline int fault_in_pages_readable(const char __user *uaddr, int size)
- 	return 0;
- }
- 
--int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
--				pgoff_t index, gfp_t gfp_mask);
-+int add_to_page_cache(struct page *page, struct address_space *mapping,
-+		      pgoff_t index, gfp_t gfp_mask);
- int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
- 				pgoff_t index, gfp_t gfp_mask);
-+int add_to_page_cache_lru_vec(struct address_space *mapping,
-+			      struct page **pages,
-+			      unsigned nr_pages,
-+			      pgoff_t offset, gfp_t gfp_mask);
-+
- extern void delete_from_page_cache(struct page *page);
- extern void __delete_from_page_cache(struct page *page, void *shadow);
- int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask);
- void delete_from_page_cache_batch(struct address_space *mapping,
- 				  struct pagevec *pvec);
- 
--/*
-- * Like add_to_page_cache_locked, but used to add newly allocated pages:
-- * the page is new, so we can just run __SetPageLocked() against it.
-- */
--static inline int add_to_page_cache(struct page *page,
--		struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask)
--{
--	int error;
--
--	__SetPageLocked(page);
--	error = add_to_page_cache_locked(page, mapping, offset, gfp_mask);
--	if (unlikely(error))
--		__ClearPageLocked(page);
--	return error;
--}
--
- static inline unsigned long dir_pages(struct inode *inode)
- {
- 	return (unsigned long)(inode->i_size + PAGE_SIZE - 1) >>
-diff --git a/include/linux/sched.h b/include/linux/sched.h
-index 4418f5cb8324..3f99f17a095b 100644
---- a/include/linux/sched.h
-+++ b/include/linux/sched.h
-@@ -45,6 +45,7 @@ struct io_context;
- struct mempolicy;
- struct nameidata;
- struct nsproxy;
-+struct pagecache_lock;
- struct perf_event_context;
- struct pid_namespace;
- struct pipe_inode_info;
-@@ -734,6 +735,7 @@ struct task_struct {
- 
- 	struct mm_struct		*mm;
- 	struct mm_struct		*active_mm;
-+	struct address_space		*faults_disabled_mapping;
- 
- 	/* Per-thread vma caching: */
- 	struct vmacache			vmacache;
-diff --git a/include/linux/six.h b/include/linux/six.h
-new file mode 100644
-index 000000000000..a16e94f482e9
---- /dev/null
-+++ b/include/linux/six.h
-@@ -0,0 +1,197 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+
-+#ifndef _LINUX_SIX_H
-+#define _LINUX_SIX_H
-+
-+/*
-+ * Shared/intent/exclusive locks: sleepable read/write locks, much like rw
-+ * semaphores, except with a third intermediate state, intent. Basic operations
-+ * are:
-+ *
-+ * six_lock_read(&foo->lock);
-+ * six_unlock_read(&foo->lock);
-+ *
-+ * six_lock_intent(&foo->lock);
-+ * six_unlock_intent(&foo->lock);
-+ *
-+ * six_lock_write(&foo->lock);
-+ * six_unlock_write(&foo->lock);
-+ *
-+ * Intent locks block other intent locks, but do not block read locks, and you
-+ * must have an intent lock held before taking a write lock, like so:
-+ *
-+ * six_lock_intent(&foo->lock);
-+ * six_lock_write(&foo->lock);
-+ * six_unlock_write(&foo->lock);
-+ * six_unlock_intent(&foo->lock);
-+ *
-+ * Other operations:
-+ *
-+ *   six_trylock_read()
-+ *   six_trylock_intent()
-+ *   six_trylock_write()
-+ *
-+ *   six_lock_downgrade():	convert from intent to read
-+ *   six_lock_tryupgrade():	attempt to convert from read to intent
-+ *
-+ * Locks also embed a sequence number, which is incremented when the lock is
-+ * locked or unlocked for write. The current sequence number can be grabbed
-+ * while a lock is held from lock->state.seq; then, if you drop the lock you can
-+ * use six_relock_(read|intent_write)(lock, seq) to attempt to retake the lock
-+ * iff it hasn't been locked for write in the meantime.
-+ *
-+ * There are also operations that take the lock type as a parameter, where the
-+ * type is one of SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write:
-+ *
-+ *   six_lock_type(lock, type)
-+ *   six_unlock_type(lock, type)
-+ *   six_relock(lock, type, seq)
-+ *   six_trylock_type(lock, type)
-+ *   six_trylock_convert(lock, from, to)
-+ *
-+ * A lock may be held multiple types by the same thread (for read or intent,
-+ * not write). However, the six locks code does _not_ implement the actual
-+ * recursive checks itself though - rather, if your code (e.g. btree iterator
-+ * code) knows that the current thread already has a lock held, and for the
-+ * correct type, six_lock_increment() may be used to bump up the counter for
-+ * that type - the only effect is that one more call to unlock will be required
-+ * before the lock is unlocked.
-+ */
-+
-+#include <linux/lockdep.h>
-+#include <linux/osq_lock.h>
-+#include <linux/sched.h>
-+#include <linux/types.h>
-+
-+#define SIX_LOCK_SEPARATE_LOCKFNS
-+
-+union six_lock_state {
-+	struct {
-+		atomic64_t	counter;
-+	};
-+
-+	struct {
-+		u64		v;
-+	};
-+
-+	struct {
-+		/* for waitlist_bitnr() */
-+		unsigned long	l;
-+	};
-+
-+	struct {
-+		unsigned	read_lock:28;
-+		unsigned	intent_lock:1;
-+		unsigned	waiters:3;
-+		/*
-+		 * seq works much like in seqlocks: it's incremented every time
-+		 * we lock and unlock for write.
-+		 *
-+		 * If it's odd write lock is held, even unlocked.
-+		 *
-+		 * Thus readers can unlock, and then lock again later iff it
-+		 * hasn't been modified in the meantime.
-+		 */
-+		u32		seq;
-+	};
-+};
-+
-+enum six_lock_type {
-+	SIX_LOCK_read,
-+	SIX_LOCK_intent,
-+	SIX_LOCK_write,
-+};
-+
-+struct six_lock {
-+	union six_lock_state	state;
-+	unsigned		intent_lock_recurse;
-+	struct task_struct	*owner;
-+	struct optimistic_spin_queue osq;
-+
-+	raw_spinlock_t		wait_lock;
-+	struct list_head	wait_list[2];
-+#ifdef CONFIG_DEBUG_LOCK_ALLOC
-+	struct lockdep_map	dep_map;
-+#endif
-+};
-+
-+typedef int (*six_lock_should_sleep_fn)(struct six_lock *lock, void *);
-+
-+static __always_inline void __six_lock_init(struct six_lock *lock,
-+					    const char *name,
-+					    struct lock_class_key *key)
-+{
-+	atomic64_set(&lock->state.counter, 0);
-+	raw_spin_lock_init(&lock->wait_lock);
-+	INIT_LIST_HEAD(&lock->wait_list[SIX_LOCK_read]);
-+	INIT_LIST_HEAD(&lock->wait_list[SIX_LOCK_intent]);
-+#ifdef CONFIG_DEBUG_LOCK_ALLOC
-+	debug_check_no_locks_freed((void *) lock, sizeof(*lock));
-+	lockdep_init_map(&lock->dep_map, name, key, 0);
-+#endif
-+}
-+
-+#define six_lock_init(lock)						\
-+do {									\
-+	static struct lock_class_key __key;				\
-+									\
-+	__six_lock_init((lock), #lock, &__key);				\
-+} while (0)
-+
-+#define __SIX_VAL(field, _v)	(((union six_lock_state) { .field = _v }).v)
-+
-+#define __SIX_LOCK(type)						\
-+bool six_trylock_##type(struct six_lock *);				\
-+bool six_relock_##type(struct six_lock *, u32);				\
-+int six_lock_##type(struct six_lock *, six_lock_should_sleep_fn, void *);\
-+void six_unlock_##type(struct six_lock *);
-+
-+__SIX_LOCK(read)
-+__SIX_LOCK(intent)
-+__SIX_LOCK(write)
-+#undef __SIX_LOCK
-+
-+#define SIX_LOCK_DISPATCH(type, fn, ...)			\
-+	switch (type) {						\
-+	case SIX_LOCK_read:					\
-+		return fn##_read(__VA_ARGS__);			\
-+	case SIX_LOCK_intent:					\
-+		return fn##_intent(__VA_ARGS__);		\
-+	case SIX_LOCK_write:					\
-+		return fn##_write(__VA_ARGS__);			\
-+	default:						\
-+		BUG();						\
-+	}
-+
-+static inline bool six_trylock_type(struct six_lock *lock, enum six_lock_type type)
-+{
-+	SIX_LOCK_DISPATCH(type, six_trylock, lock);
-+}
-+
-+static inline bool six_relock_type(struct six_lock *lock, enum six_lock_type type,
-+				   unsigned seq)
-+{
-+	SIX_LOCK_DISPATCH(type, six_relock, lock, seq);
-+}
-+
-+static inline int six_lock_type(struct six_lock *lock, enum six_lock_type type,
-+				six_lock_should_sleep_fn should_sleep_fn, void *p)
-+{
-+	SIX_LOCK_DISPATCH(type, six_lock, lock, should_sleep_fn, p);
-+}
-+
-+static inline void six_unlock_type(struct six_lock *lock, enum six_lock_type type)
-+{
-+	SIX_LOCK_DISPATCH(type, six_unlock, lock);
-+}
-+
-+void six_lock_downgrade(struct six_lock *);
-+bool six_lock_tryupgrade(struct six_lock *);
-+bool six_trylock_convert(struct six_lock *, enum six_lock_type,
-+			 enum six_lock_type);
-+
-+void six_lock_increment(struct six_lock *, enum six_lock_type);
-+
-+void six_lock_wakeup_all(struct six_lock *);
-+
-+#endif /* _LINUX_SIX_H */
-diff --git a/include/trace/events/bcachefs.h b/include/trace/events/bcachefs.h
-new file mode 100644
-index 000000000000..bafbccafae30
---- /dev/null
-+++ b/include/trace/events/bcachefs.h
-@@ -0,0 +1,664 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#undef TRACE_SYSTEM
-+#define TRACE_SYSTEM bcachefs
-+
-+#if !defined(_TRACE_BCACHE_H) || defined(TRACE_HEADER_MULTI_READ)
-+#define _TRACE_BCACHE_H
-+
-+#include <linux/tracepoint.h>
-+
-+DECLARE_EVENT_CLASS(bpos,
-+	TP_PROTO(struct bpos *p),
-+	TP_ARGS(p),
-+
-+	TP_STRUCT__entry(
-+		__field(u64,	inode				)
-+		__field(u64,	offset				)
-+	),
-+
-+	TP_fast_assign(
-+		__entry->inode	= p->inode;
-+		__entry->offset	= p->offset;
-+	),
-+
-+	TP_printk("%llu:%llu", __entry->inode, __entry->offset)
-+);
-+
-+DECLARE_EVENT_CLASS(bkey,
-+	TP_PROTO(const struct bkey *k),
-+	TP_ARGS(k),
-+
-+	TP_STRUCT__entry(
-+		__field(u64,	inode				)
-+		__field(u64,	offset				)
-+		__field(u32,	size				)
-+	),
-+
-+	TP_fast_assign(
-+		__entry->inode	= k->p.inode;
-+		__entry->offset	= k->p.offset;
-+		__entry->size	= k->size;
-+	),
-+
-+	TP_printk("%llu:%llu len %u", __entry->inode,
-+		  __entry->offset, __entry->size)
-+);
-+
-+DECLARE_EVENT_CLASS(bch_fs,
-+	TP_PROTO(struct bch_fs *c),
-+	TP_ARGS(c),
-+
-+	TP_STRUCT__entry(
-+		__array(char,		uuid,	16 )
-+	),
-+
-+	TP_fast_assign(
-+		memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
-+	),
-+
-+	TP_printk("%pU", __entry->uuid)
-+);
-+
-+DECLARE_EVENT_CLASS(bio,
-+	TP_PROTO(struct bio *bio),
-+	TP_ARGS(bio),
-+
-+	TP_STRUCT__entry(
-+		__field(dev_t,		dev			)
-+		__field(sector_t,	sector			)
-+		__field(unsigned int,	nr_sector		)
-+		__array(char,		rwbs,	6		)
-+	),
-+
-+	TP_fast_assign(
-+		__entry->dev		= bio->bi_disk ? bio_dev(bio) : 0;
-+		__entry->sector		= bio->bi_iter.bi_sector;
-+		__entry->nr_sector	= bio->bi_iter.bi_size >> 9;
-+		blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
-+	),
-+
-+	TP_printk("%d,%d  %s %llu + %u",
-+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
-+		  (unsigned long long)__entry->sector, __entry->nr_sector)
-+);
-+
-+/* io.c: */
-+
-+DEFINE_EVENT(bio, read_split,
-+	TP_PROTO(struct bio *bio),
-+	TP_ARGS(bio)
-+);
-+
-+DEFINE_EVENT(bio, read_bounce,
-+	TP_PROTO(struct bio *bio),
-+	TP_ARGS(bio)
-+);
-+
-+DEFINE_EVENT(bio, read_retry,
-+	TP_PROTO(struct bio *bio),
-+	TP_ARGS(bio)
-+);
-+
-+DEFINE_EVENT(bio, promote,
-+	TP_PROTO(struct bio *bio),
-+	TP_ARGS(bio)
-+);
-+
-+/* Journal */
-+
-+DEFINE_EVENT(bch_fs, journal_full,
-+	TP_PROTO(struct bch_fs *c),
-+	TP_ARGS(c)
-+);
-+
-+DEFINE_EVENT(bch_fs, journal_entry_full,
-+	TP_PROTO(struct bch_fs *c),
-+	TP_ARGS(c)
-+);
-+
-+DEFINE_EVENT(bio, journal_write,
-+	TP_PROTO(struct bio *bio),
-+	TP_ARGS(bio)
-+);
-+
-+/* bset.c: */
-+
-+DEFINE_EVENT(bpos, bkey_pack_pos_fail,
-+	TP_PROTO(struct bpos *p),
-+	TP_ARGS(p)
-+);
-+
-+/* Btree */
-+
-+DECLARE_EVENT_CLASS(btree_node,
-+	TP_PROTO(struct bch_fs *c, struct btree *b),
-+	TP_ARGS(c, b),
-+
-+	TP_STRUCT__entry(
-+		__array(char,		uuid,		16	)
-+		__field(u8,		level			)
-+		__field(u8,		id			)
-+		__field(u64,		inode			)
-+		__field(u64,		offset			)
-+	),
-+
-+	TP_fast_assign(
-+		memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
-+		__entry->level		= b->c.level;
-+		__entry->id		= b->c.btree_id;
-+		__entry->inode		= b->key.k.p.inode;
-+		__entry->offset		= b->key.k.p.offset;
-+	),
-+
-+	TP_printk("%pU  %u id %u %llu:%llu",
-+		  __entry->uuid, __entry->level, __entry->id,
-+		  __entry->inode, __entry->offset)
-+);
-+
-+DEFINE_EVENT(btree_node, btree_read,
-+	TP_PROTO(struct bch_fs *c, struct btree *b),
-+	TP_ARGS(c, b)
-+);
-+
-+TRACE_EVENT(btree_write,
-+	TP_PROTO(struct btree *b, unsigned bytes, unsigned sectors),
-+	TP_ARGS(b, bytes, sectors),
-+
-+	TP_STRUCT__entry(
-+		__field(enum btree_node_type,	type)
-+		__field(unsigned,	bytes			)
-+		__field(unsigned,	sectors			)
-+	),
-+
-+	TP_fast_assign(
-+		__entry->type	= btree_node_type(b);
-+		__entry->bytes	= bytes;
-+		__entry->sectors = sectors;
-+	),
-+
-+	TP_printk("bkey type %u bytes %u sectors %u",
-+		  __entry->type , __entry->bytes, __entry->sectors)
-+);
-+
-+DEFINE_EVENT(btree_node, btree_node_alloc,
-+	TP_PROTO(struct bch_fs *c, struct btree *b),
-+	TP_ARGS(c, b)
-+);
-+
-+DEFINE_EVENT(btree_node, btree_node_free,
-+	TP_PROTO(struct bch_fs *c, struct btree *b),
-+	TP_ARGS(c, b)
-+);
-+
-+DEFINE_EVENT(btree_node, btree_node_reap,
-+	TP_PROTO(struct bch_fs *c, struct btree *b),
-+	TP_ARGS(c, b)
-+);
-+
-+DECLARE_EVENT_CLASS(btree_node_cannibalize_lock,
-+	TP_PROTO(struct bch_fs *c),
-+	TP_ARGS(c),
-+
-+	TP_STRUCT__entry(
-+		__array(char,			uuid,	16	)
-+	),
-+
-+	TP_fast_assign(
-+		memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
-+	),
-+
-+	TP_printk("%pU", __entry->uuid)
-+);
-+
-+DEFINE_EVENT(btree_node_cannibalize_lock, btree_node_cannibalize_lock_fail,
-+	TP_PROTO(struct bch_fs *c),
-+	TP_ARGS(c)
-+);
-+
-+DEFINE_EVENT(btree_node_cannibalize_lock, btree_node_cannibalize_lock,
-+	TP_PROTO(struct bch_fs *c),
-+	TP_ARGS(c)
-+);
-+
-+DEFINE_EVENT(btree_node_cannibalize_lock, btree_node_cannibalize,
-+	TP_PROTO(struct bch_fs *c),
-+	TP_ARGS(c)
-+);
-+
-+DEFINE_EVENT(bch_fs, btree_node_cannibalize_unlock,
-+	TP_PROTO(struct bch_fs *c),
-+	TP_ARGS(c)
-+);
-+
-+TRACE_EVENT(btree_reserve_get_fail,
-+	TP_PROTO(struct bch_fs *c, size_t required, struct closure *cl),
-+	TP_ARGS(c, required, cl),
-+
-+	TP_STRUCT__entry(
-+		__array(char,			uuid,	16	)
-+		__field(size_t,			required	)
-+		__field(struct closure *,	cl		)
-+	),
-+
-+	TP_fast_assign(
-+		memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
-+		__entry->required = required;
-+		__entry->cl = cl;
-+	),
-+
-+	TP_printk("%pU required %zu by %p", __entry->uuid,
-+		  __entry->required, __entry->cl)
-+);
-+
-+TRACE_EVENT(btree_insert_key,
-+	TP_PROTO(struct bch_fs *c, struct btree *b, struct bkey_i *k),
-+	TP_ARGS(c, b, k),
-+
-+	TP_STRUCT__entry(
-+		__field(u8,		id			)
-+		__field(u64,		inode			)
-+		__field(u64,		offset			)
-+		__field(u32,		size			)
-+	),
-+
-+	TP_fast_assign(
-+		__entry->id		= b->c.btree_id;
-+		__entry->inode		= k->k.p.inode;
-+		__entry->offset		= k->k.p.offset;
-+		__entry->size		= k->k.size;
-+	),
-+
-+	TP_printk("btree %u: %llu:%llu len %u", __entry->id,
-+		  __entry->inode, __entry->offset, __entry->size)
-+);
-+
-+DEFINE_EVENT(btree_node, btree_split,
-+	TP_PROTO(struct bch_fs *c, struct btree *b),
-+	TP_ARGS(c, b)
-+);
-+
-+DEFINE_EVENT(btree_node, btree_compact,
-+	TP_PROTO(struct bch_fs *c, struct btree *b),
-+	TP_ARGS(c, b)
-+);
-+
-+DEFINE_EVENT(btree_node, btree_merge,
-+	TP_PROTO(struct bch_fs *c, struct btree *b),
-+	TP_ARGS(c, b)
-+);
-+
-+DEFINE_EVENT(btree_node, btree_set_root,
-+	TP_PROTO(struct bch_fs *c, struct btree *b),
-+	TP_ARGS(c, b)
-+);
-+
-+/* Garbage collection */
-+
-+DEFINE_EVENT(btree_node, btree_gc_coalesce,
-+	TP_PROTO(struct bch_fs *c, struct btree *b),
-+	TP_ARGS(c, b)
-+);
-+
-+TRACE_EVENT(btree_gc_coalesce_fail,
-+	TP_PROTO(struct bch_fs *c, int reason),
-+	TP_ARGS(c, reason),
-+
-+	TP_STRUCT__entry(
-+		__field(u8,		reason			)
-+		__array(char,		uuid,	16		)
-+	),
-+
-+	TP_fast_assign(
-+		__entry->reason		= reason;
-+		memcpy(__entry->uuid, c->disk_sb.sb->user_uuid.b, 16);
-+	),
-+
-+	TP_printk("%pU: %u", __entry->uuid, __entry->reason)
-+);
-+
-+DEFINE_EVENT(btree_node, btree_gc_rewrite_node,
-+	TP_PROTO(struct bch_fs *c, struct btree *b),
-+	TP_ARGS(c, b)
-+);
-+
-+DEFINE_EVENT(btree_node, btree_gc_rewrite_node_fail,
-+	TP_PROTO(struct bch_fs *c, struct btree *b),
-+	TP_ARGS(c, b)
-+);
-+
-+DEFINE_EVENT(bch_fs, gc_start,
-+	TP_PROTO(struct bch_fs *c),
-+	TP_ARGS(c)
-+);
-+
-+DEFINE_EVENT(bch_fs, gc_end,
-+	TP_PROTO(struct bch_fs *c),
-+	TP_ARGS(c)
-+);
-+
-+DEFINE_EVENT(bch_fs, gc_coalesce_start,
-+	TP_PROTO(struct bch_fs *c),
-+	TP_ARGS(c)
-+);
-+
-+DEFINE_EVENT(bch_fs, gc_coalesce_end,
-+	TP_PROTO(struct bch_fs *c),
-+	TP_ARGS(c)
-+);
-+
-+DEFINE_EVENT(bch_fs, gc_cannot_inc_gens,
-+	TP_PROTO(struct bch_fs *c),
-+	TP_ARGS(c)
-+);
-+
-+/* Allocator */
-+
-+TRACE_EVENT(alloc_batch,
-+	TP_PROTO(struct bch_dev *ca, size_t free, size_t total),
-+	TP_ARGS(ca, free, total),
-+
-+	TP_STRUCT__entry(
-+		__array(char,		uuid,	16	)
-+		__field(size_t,		free		)
-+		__field(size_t,		total		)
-+	),
-+
-+	TP_fast_assign(
-+		memcpy(__entry->uuid, ca->uuid.b, 16);
-+		__entry->free = free;
-+		__entry->total = total;
-+	),
-+
-+	TP_printk("%pU free %zu total %zu",
-+		__entry->uuid, __entry->free, __entry->total)
-+);
-+
-+TRACE_EVENT(invalidate,
-+	TP_PROTO(struct bch_dev *ca, u64 offset, unsigned sectors),
-+	TP_ARGS(ca, offset, sectors),
-+
-+	TP_STRUCT__entry(
-+		__field(unsigned,	sectors			)
-+		__field(dev_t,		dev			)
-+		__field(__u64,		offset			)
-+	),
-+
-+	TP_fast_assign(
-+		__entry->dev		= ca->disk_sb.bdev->bd_dev;
-+		__entry->offset		= offset,
-+		__entry->sectors	= sectors;
-+	),
-+
-+	TP_printk("invalidated %u sectors at %d,%d sector=%llu",
-+		  __entry->sectors, MAJOR(__entry->dev),
-+		  MINOR(__entry->dev), __entry->offset)
-+);
-+
-+DEFINE_EVENT(bch_fs, rescale_prios,
-+	TP_PROTO(struct bch_fs *c),
-+	TP_ARGS(c)
-+);
-+
-+DECLARE_EVENT_CLASS(bucket_alloc,
-+	TP_PROTO(struct bch_dev *ca, enum alloc_reserve reserve),
-+	TP_ARGS(ca, reserve),
-+
-+	TP_STRUCT__entry(
-+		__array(char,			uuid,	16)
-+		__field(enum alloc_reserve,	reserve	  )
-+	),
-+
-+	TP_fast_assign(
-+		memcpy(__entry->uuid, ca->uuid.b, 16);
-+		__entry->reserve = reserve;
-+	),
-+
-+	TP_printk("%pU reserve %d", __entry->uuid, __entry->reserve)
-+);
-+
-+DEFINE_EVENT(bucket_alloc, bucket_alloc,
-+	TP_PROTO(struct bch_dev *ca, enum alloc_reserve reserve),
-+	TP_ARGS(ca, reserve)
-+);
-+
-+DEFINE_EVENT(bucket_alloc, bucket_alloc_fail,
-+	TP_PROTO(struct bch_dev *ca, enum alloc_reserve reserve),
-+	TP_ARGS(ca, reserve)
-+);
-+
-+DEFINE_EVENT(bucket_alloc, open_bucket_alloc_fail,
-+	TP_PROTO(struct bch_dev *ca, enum alloc_reserve reserve),
-+	TP_ARGS(ca, reserve)
-+);
-+
-+/* Moving IO */
-+
-+DEFINE_EVENT(bkey, move_extent,
-+	TP_PROTO(const struct bkey *k),
-+	TP_ARGS(k)
-+);
-+
-+DEFINE_EVENT(bkey, move_alloc_fail,
-+	TP_PROTO(const struct bkey *k),
-+	TP_ARGS(k)
-+);
-+
-+DEFINE_EVENT(bkey, move_race,
-+	TP_PROTO(const struct bkey *k),
-+	TP_ARGS(k)
-+);
-+
-+TRACE_EVENT(move_data,
-+	TP_PROTO(struct bch_fs *c, u64 sectors_moved,
-+		 u64 keys_moved),
-+	TP_ARGS(c, sectors_moved, keys_moved),
-+
-+	TP_STRUCT__entry(
-+		__array(char,		uuid,	16	)
-+		__field(u64,		sectors_moved	)
-+		__field(u64,		keys_moved	)
-+	),
-+
-+	TP_fast_assign(
-+		memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
-+		__entry->sectors_moved = sectors_moved;
-+		__entry->keys_moved = keys_moved;
-+	),
-+
-+	TP_printk("%pU sectors_moved %llu keys_moved %llu",
-+		__entry->uuid, __entry->sectors_moved, __entry->keys_moved)
-+);
-+
-+TRACE_EVENT(copygc,
-+	TP_PROTO(struct bch_dev *ca,
-+		 u64 sectors_moved, u64 sectors_not_moved,
-+		 u64 buckets_moved, u64 buckets_not_moved),
-+	TP_ARGS(ca,
-+		sectors_moved, sectors_not_moved,
-+		buckets_moved, buckets_not_moved),
-+
-+	TP_STRUCT__entry(
-+		__array(char,		uuid,	16		)
-+		__field(u64,		sectors_moved		)
-+		__field(u64,		sectors_not_moved	)
-+		__field(u64,		buckets_moved		)
-+		__field(u64,		buckets_not_moved	)
-+	),
-+
-+	TP_fast_assign(
-+		memcpy(__entry->uuid, ca->uuid.b, 16);
-+		__entry->sectors_moved		= sectors_moved;
-+		__entry->sectors_not_moved	= sectors_not_moved;
-+		__entry->buckets_moved		= buckets_moved;
-+		__entry->buckets_not_moved = buckets_moved;
-+	),
-+
-+	TP_printk("%pU sectors moved %llu remain %llu buckets moved %llu remain %llu",
-+		__entry->uuid,
-+		__entry->sectors_moved, __entry->sectors_not_moved,
-+		__entry->buckets_moved, __entry->buckets_not_moved)
-+);
-+
-+TRACE_EVENT(transaction_restart_ip,
-+	TP_PROTO(unsigned long caller, unsigned long ip),
-+	TP_ARGS(caller, ip),
-+
-+	TP_STRUCT__entry(
-+		__field(unsigned long,		caller	)
-+		__field(unsigned long,		ip	)
-+	),
-+
-+	TP_fast_assign(
-+		__entry->caller	= caller;
-+		__entry->ip	= ip;
-+	),
-+
-+	TP_printk("%pF %pF", (void *) __entry->caller, (void *) __entry->ip)
-+);
-+
-+DECLARE_EVENT_CLASS(transaction_restart,
-+	TP_PROTO(unsigned long ip),
-+	TP_ARGS(ip),
-+
-+	TP_STRUCT__entry(
-+		__field(unsigned long,		ip	)
-+	),
-+
-+	TP_fast_assign(
-+		__entry->ip = ip;
-+	),
-+
-+	TP_printk("%pf", (void *) __entry->ip)
-+);
-+
-+DEFINE_EVENT(transaction_restart,	trans_restart_btree_node_reused,
-+	TP_PROTO(unsigned long ip),
-+	TP_ARGS(ip)
-+);
-+
-+DEFINE_EVENT(transaction_restart,	trans_restart_would_deadlock,
-+	TP_PROTO(unsigned long ip),
-+	TP_ARGS(ip)
-+);
-+
-+TRACE_EVENT(trans_restart_iters_realloced,
-+	TP_PROTO(unsigned long ip, unsigned nr),
-+	TP_ARGS(ip, nr),
-+
-+	TP_STRUCT__entry(
-+		__field(unsigned long,		ip	)
-+		__field(unsigned,		nr	)
-+	),
-+
-+	TP_fast_assign(
-+		__entry->ip	= ip;
-+		__entry->nr	= nr;
-+	),
-+
-+	TP_printk("%pf nr %u", (void *) __entry->ip, __entry->nr)
-+);
-+
-+TRACE_EVENT(trans_restart_mem_realloced,
-+	TP_PROTO(unsigned long ip, unsigned long bytes),
-+	TP_ARGS(ip, bytes),
-+
-+	TP_STRUCT__entry(
-+		__field(unsigned long,		ip	)
-+		__field(unsigned long,		bytes	)
-+	),
-+
-+	TP_fast_assign(
-+		__entry->ip	= ip;
-+		__entry->bytes	= bytes;
-+	),
-+
-+	TP_printk("%pf bytes %lu", (void *) __entry->ip, __entry->bytes)
-+);
-+
-+DEFINE_EVENT(transaction_restart,	trans_restart_journal_res_get,
-+	TP_PROTO(unsigned long ip),
-+	TP_ARGS(ip)
-+);
-+
-+DEFINE_EVENT(transaction_restart,	trans_restart_journal_preres_get,
-+	TP_PROTO(unsigned long ip),
-+	TP_ARGS(ip)
-+);
-+
-+DEFINE_EVENT(transaction_restart,	trans_restart_mark_replicas,
-+	TP_PROTO(unsigned long ip),
-+	TP_ARGS(ip)
-+);
-+
-+DEFINE_EVENT(transaction_restart,	trans_restart_fault_inject,
-+	TP_PROTO(unsigned long ip),
-+	TP_ARGS(ip)
-+);
-+
-+DEFINE_EVENT(transaction_restart,	trans_restart_btree_node_split,
-+	TP_PROTO(unsigned long ip),
-+	TP_ARGS(ip)
-+);
-+
-+DEFINE_EVENT(transaction_restart,	trans_restart_mark,
-+	TP_PROTO(unsigned long ip),
-+	TP_ARGS(ip)
-+);
-+
-+DEFINE_EVENT(transaction_restart,	trans_restart_upgrade,
-+	TP_PROTO(unsigned long ip),
-+	TP_ARGS(ip)
-+);
-+
-+DEFINE_EVENT(transaction_restart,	trans_restart_iter_upgrade,
-+	TP_PROTO(unsigned long ip),
-+	TP_ARGS(ip)
-+);
-+
-+DEFINE_EVENT(transaction_restart,	trans_restart_traverse,
-+	TP_PROTO(unsigned long ip),
-+	TP_ARGS(ip)
-+);
-+
-+DEFINE_EVENT(transaction_restart,	trans_restart_atomic,
-+	TP_PROTO(unsigned long ip),
-+	TP_ARGS(ip)
-+);
-+
-+DECLARE_EVENT_CLASS(node_lock_fail,
-+	TP_PROTO(unsigned level, u32 iter_seq, unsigned node, u32 node_seq),
-+	TP_ARGS(level, iter_seq, node, node_seq),
-+
-+	TP_STRUCT__entry(
-+		__field(u32,		level)
-+		__field(u32,		iter_seq)
-+		__field(u32,		node)
-+		__field(u32,		node_seq)
-+	),
-+
-+	TP_fast_assign(
-+		__entry->level		= level;
-+		__entry->iter_seq	= iter_seq;
-+		__entry->node		= node;
-+		__entry->node_seq	= node_seq;
-+	),
-+
-+	TP_printk("level %u iter seq %u node %u node seq %u",
-+		  __entry->level, __entry->iter_seq,
-+		  __entry->node, __entry->node_seq)
-+);
-+
-+DEFINE_EVENT(node_lock_fail, node_upgrade_fail,
-+	TP_PROTO(unsigned level, u32 iter_seq, unsigned node, u32 node_seq),
-+	TP_ARGS(level, iter_seq, node, node_seq)
-+);
-+
-+DEFINE_EVENT(node_lock_fail, node_relock_fail,
-+	TP_PROTO(unsigned level, u32 iter_seq, unsigned node, u32 node_seq),
-+	TP_ARGS(level, iter_seq, node, node_seq)
-+);
-+
-+#endif /* _TRACE_BCACHE_H */
-+
-+/* This part must be outside protection */
-+#include <trace/define_trace.h>
-diff --git a/init/init_task.c b/init/init_task.c
-index bd403ed3e418..3035fffd976b 100644
---- a/init/init_task.c
-+++ b/init/init_task.c
-@@ -76,6 +76,7 @@ struct task_struct init_task
- 	.nr_cpus_allowed= NR_CPUS,
- 	.mm		= NULL,
- 	.active_mm	= &init_mm,
-+	.faults_disabled_mapping = NULL,
- 	.restart_block	= {
- 		.fn = do_no_restart_syscall,
- 	},
-diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
-index 3de8fd11873b..ab8aa082ce56 100644
---- a/kernel/Kconfig.locks
-+++ b/kernel/Kconfig.locks
-@@ -259,3 +259,6 @@ config ARCH_HAS_MMIOWB
- config MMIOWB
- 	def_bool y if ARCH_HAS_MMIOWB
- 	depends on SMP
-+
-+config SIXLOCKS
-+	bool
-diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
-index 45452facff3b..6c8f7340c0a2 100644
---- a/kernel/locking/Makefile
-+++ b/kernel/locking/Makefile
-@@ -29,3 +29,4 @@ obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o
- obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o
- obj-$(CONFIG_WW_MUTEX_SELFTEST) += test-ww_mutex.o
- obj-$(CONFIG_LOCK_EVENT_COUNTS) += lock_events.o
-+obj-$(CONFIG_SIXLOCKS) += six.o
-diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h
-index baca699b94e9..4abb462d914d 100644
---- a/kernel/locking/lockdep_internals.h
-+++ b/kernel/locking/lockdep_internals.h
-@@ -96,7 +96,7 @@ static const unsigned long LOCKF_USED_IN_IRQ_READ =
- #else
- #define MAX_LOCKDEP_ENTRIES	32768UL
- 
--#define MAX_LOCKDEP_CHAINS_BITS	16
-+#define MAX_LOCKDEP_CHAINS_BITS	18
- 
- /*
-  * Stack-trace: tightly packed array of stack backtrace
-@@ -114,7 +114,7 @@ static const unsigned long LOCKF_USED_IN_IRQ_READ =
- 
- #define MAX_LOCKDEP_CHAINS	(1UL << MAX_LOCKDEP_CHAINS_BITS)
- 
--#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5)
-+#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*10)
- 
- extern struct list_head all_lock_classes;
- extern struct lock_chain lock_chains[];
-diff --git a/kernel/locking/six.c b/kernel/locking/six.c
-new file mode 100644
-index 000000000000..49d46ed2e18e
---- /dev/null
-+++ b/kernel/locking/six.c
-@@ -0,0 +1,553 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include <linux/export.h>
-+#include <linux/log2.h>
-+#include <linux/preempt.h>
-+#include <linux/rcupdate.h>
-+#include <linux/sched.h>
-+#include <linux/sched/rt.h>
-+#include <linux/six.h>
-+
-+#ifdef DEBUG
-+#define EBUG_ON(cond)		BUG_ON(cond)
-+#else
-+#define EBUG_ON(cond)		do {} while (0)
-+#endif
-+
-+#define six_acquire(l, t)	lock_acquire(l, 0, t, 0, 0, NULL, _RET_IP_)
-+#define six_release(l)		lock_release(l, _RET_IP_)
-+
-+struct six_lock_vals {
-+	/* Value we add to the lock in order to take the lock: */
-+	u64			lock_val;
-+
-+	/* If the lock has this value (used as a mask), taking the lock fails: */
-+	u64			lock_fail;
-+
-+	/* Value we add to the lock in order to release the lock: */
-+	u64			unlock_val;
-+
-+	/* Mask that indicates lock is held for this type: */
-+	u64			held_mask;
-+
-+	/* Waitlist we wakeup when releasing the lock: */
-+	enum six_lock_type	unlock_wakeup;
-+};
-+
-+#define __SIX_LOCK_HELD_read	__SIX_VAL(read_lock, ~0)
-+#define __SIX_LOCK_HELD_intent	__SIX_VAL(intent_lock, ~0)
-+#define __SIX_LOCK_HELD_write	__SIX_VAL(seq, 1)
-+
-+#define LOCK_VALS {							\
-+	[SIX_LOCK_read] = {						\
-+		.lock_val	= __SIX_VAL(read_lock, 1),		\
-+		.lock_fail	= __SIX_LOCK_HELD_write,		\
-+		.unlock_val	= -__SIX_VAL(read_lock, 1),		\
-+		.held_mask	= __SIX_LOCK_HELD_read,			\
-+		.unlock_wakeup	= SIX_LOCK_write,			\
-+	},								\
-+	[SIX_LOCK_intent] = {						\
-+		.lock_val	= __SIX_VAL(intent_lock, 1),		\
-+		.lock_fail	= __SIX_LOCK_HELD_intent,		\
-+		.unlock_val	= -__SIX_VAL(intent_lock, 1),		\
-+		.held_mask	= __SIX_LOCK_HELD_intent,		\
-+		.unlock_wakeup	= SIX_LOCK_intent,			\
-+	},								\
-+	[SIX_LOCK_write] = {						\
-+		.lock_val	= __SIX_VAL(seq, 1),			\
-+		.lock_fail	= __SIX_LOCK_HELD_read,			\
-+		.unlock_val	= __SIX_VAL(seq, 1),			\
-+		.held_mask	= __SIX_LOCK_HELD_write,		\
-+		.unlock_wakeup	= SIX_LOCK_read,			\
-+	},								\
-+}
-+
-+static inline void six_set_owner(struct six_lock *lock, enum six_lock_type type,
-+				 union six_lock_state old)
-+{
-+	if (type != SIX_LOCK_intent)
-+		return;
-+
-+	if (!old.intent_lock) {
-+		EBUG_ON(lock->owner);
-+		lock->owner = current;
-+	} else {
-+		EBUG_ON(lock->owner != current);
-+	}
-+}
-+
-+static __always_inline bool do_six_trylock_type(struct six_lock *lock,
-+						enum six_lock_type type)
-+{
-+	const struct six_lock_vals l[] = LOCK_VALS;
-+	union six_lock_state old;
-+	u64 v = READ_ONCE(lock->state.v);
-+
-+	EBUG_ON(type == SIX_LOCK_write && lock->owner != current);
-+
-+	do {
-+		old.v = v;
-+
-+		EBUG_ON(type == SIX_LOCK_write &&
-+			((old.v & __SIX_LOCK_HELD_write) ||
-+			 !(old.v & __SIX_LOCK_HELD_intent)));
-+
-+		if (old.v & l[type].lock_fail)
-+			return false;
-+	} while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
-+				old.v,
-+				old.v + l[type].lock_val)) != old.v);
-+
-+	six_set_owner(lock, type, old);
-+	return true;
-+}
-+
-+__always_inline __flatten
-+static bool __six_trylock_type(struct six_lock *lock, enum six_lock_type type)
-+{
-+	if (!do_six_trylock_type(lock, type))
-+		return false;
-+
-+	if (type != SIX_LOCK_write)
-+		six_acquire(&lock->dep_map, 1);
-+	return true;
-+}
-+
-+__always_inline __flatten
-+static bool __six_relock_type(struct six_lock *lock, enum six_lock_type type,
-+			      unsigned seq)
-+{
-+	const struct six_lock_vals l[] = LOCK_VALS;
-+	union six_lock_state old;
-+	u64 v = READ_ONCE(lock->state.v);
-+
-+	do {
-+		old.v = v;
-+
-+		if (old.seq != seq || old.v & l[type].lock_fail)
-+			return false;
-+	} while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
-+				old.v,
-+				old.v + l[type].lock_val)) != old.v);
-+
-+	six_set_owner(lock, type, old);
-+	if (type != SIX_LOCK_write)
-+		six_acquire(&lock->dep_map, 1);
-+	return true;
-+}
-+
-+struct six_lock_waiter {
-+	struct list_head	list;
-+	struct task_struct	*task;
-+};
-+
-+/* This is probably up there with the more evil things I've done */
-+#define waitlist_bitnr(id) ilog2((((union six_lock_state) { .waiters = 1 << (id) }).l))
-+
-+#ifdef CONFIG_LOCK_SPIN_ON_OWNER
-+
-+static inline int six_can_spin_on_owner(struct six_lock *lock)
-+{
-+	struct task_struct *owner;
-+	int retval = 1;
-+
-+	if (need_resched())
-+		return 0;
-+
-+	rcu_read_lock();
-+	owner = READ_ONCE(lock->owner);
-+	if (owner)
-+		retval = owner->on_cpu;
-+	rcu_read_unlock();
-+	/*
-+	 * if lock->owner is not set, the mutex owner may have just acquired
-+	 * it and not set the owner yet or the mutex has been released.
-+	 */
-+	return retval;
-+}
-+
-+static inline bool six_spin_on_owner(struct six_lock *lock,
-+				     struct task_struct *owner)
-+{
-+	bool ret = true;
-+
-+	rcu_read_lock();
-+	while (lock->owner == owner) {
-+		/*
-+		 * Ensure we emit the owner->on_cpu, dereference _after_
-+		 * checking lock->owner still matches owner. If that fails,
-+		 * owner might point to freed memory. If it still matches,
-+		 * the rcu_read_lock() ensures the memory stays valid.
-+		 */
-+		barrier();
-+
-+		if (!owner->on_cpu || need_resched()) {
-+			ret = false;
-+			break;
-+		}
-+
-+		cpu_relax();
-+	}
-+	rcu_read_unlock();
-+
-+	return ret;
-+}
-+
-+static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
-+{
-+	struct task_struct *task = current;
-+
-+	if (type == SIX_LOCK_write)
-+		return false;
-+
-+	preempt_disable();
-+	if (!six_can_spin_on_owner(lock))
-+		goto fail;
-+
-+	if (!osq_lock(&lock->osq))
-+		goto fail;
-+
-+	while (1) {
-+		struct task_struct *owner;
-+
-+		/*
-+		 * If there's an owner, wait for it to either
-+		 * release the lock or go to sleep.
-+		 */
-+		owner = READ_ONCE(lock->owner);
-+		if (owner && !six_spin_on_owner(lock, owner))
-+			break;
-+
-+		if (do_six_trylock_type(lock, type)) {
-+			osq_unlock(&lock->osq);
-+			preempt_enable();
-+			return true;
-+		}
-+
-+		/*
-+		 * When there's no owner, we might have preempted between the
-+		 * owner acquiring the lock and setting the owner field. If
-+		 * we're an RT task that will live-lock because we won't let
-+		 * the owner complete.
-+		 */
-+		if (!owner && (need_resched() || rt_task(task)))
-+			break;
-+
-+		/*
-+		 * The cpu_relax() call is a compiler barrier which forces
-+		 * everything in this loop to be re-loaded. We don't need
-+		 * memory barriers as we'll eventually observe the right
-+		 * values at the cost of a few extra spins.
-+		 */
-+		cpu_relax();
-+	}
-+
-+	osq_unlock(&lock->osq);
-+fail:
-+	preempt_enable();
-+
-+	/*
-+	 * If we fell out of the spin path because of need_resched(),
-+	 * reschedule now, before we try-lock again. This avoids getting
-+	 * scheduled out right after we obtained the lock.
-+	 */
-+	if (need_resched())
-+		schedule();
-+
-+	return false;
-+}
-+
-+#else /* CONFIG_LOCK_SPIN_ON_OWNER */
-+
-+static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
-+{
-+	return false;
-+}
-+
-+#endif
-+
-+noinline
-+static int __six_lock_type_slowpath(struct six_lock *lock, enum six_lock_type type,
-+				    six_lock_should_sleep_fn should_sleep_fn, void *p)
-+{
-+	const struct six_lock_vals l[] = LOCK_VALS;
-+	union six_lock_state old, new;
-+	struct six_lock_waiter wait;
-+	int ret = 0;
-+	u64 v;
-+
-+	ret = should_sleep_fn ? should_sleep_fn(lock, p) : 0;
-+	if (ret)
-+		return ret;
-+
-+	if (six_optimistic_spin(lock, type))
-+		return 0;
-+
-+	lock_contended(&lock->dep_map, _RET_IP_);
-+
-+	INIT_LIST_HEAD(&wait.list);
-+	wait.task = current;
-+
-+	while (1) {
-+		set_current_state(TASK_UNINTERRUPTIBLE);
-+		if (type == SIX_LOCK_write)
-+			EBUG_ON(lock->owner != current);
-+		else if (list_empty_careful(&wait.list)) {
-+			raw_spin_lock(&lock->wait_lock);
-+			list_add_tail(&wait.list, &lock->wait_list[type]);
-+			raw_spin_unlock(&lock->wait_lock);
-+		}
-+
-+		ret = should_sleep_fn ? should_sleep_fn(lock, p) : 0;
-+		if (ret)
-+			break;
-+
-+		v = READ_ONCE(lock->state.v);
-+		do {
-+			new.v = old.v = v;
-+
-+			if (!(old.v & l[type].lock_fail))
-+				new.v += l[type].lock_val;
-+			else if (!(new.waiters & (1 << type)))
-+				new.waiters |= 1 << type;
-+			else
-+				break; /* waiting bit already set */
-+		} while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
-+					old.v, new.v)) != old.v);
-+
-+		if (!(old.v & l[type].lock_fail))
-+			break;
-+
-+		schedule();
-+	}
-+
-+	if (!ret)
-+		six_set_owner(lock, type, old);
-+
-+	__set_current_state(TASK_RUNNING);
-+
-+	if (!list_empty_careful(&wait.list)) {
-+		raw_spin_lock(&lock->wait_lock);
-+		list_del_init(&wait.list);
-+		raw_spin_unlock(&lock->wait_lock);
-+	}
-+
-+	return ret;
-+}
-+
-+__always_inline
-+static int __six_lock_type(struct six_lock *lock, enum six_lock_type type,
-+			   six_lock_should_sleep_fn should_sleep_fn, void *p)
-+{
-+	int ret;
-+
-+	if (type != SIX_LOCK_write)
-+		six_acquire(&lock->dep_map, 0);
-+
-+	ret = do_six_trylock_type(lock, type) ? 0
-+		: __six_lock_type_slowpath(lock, type, should_sleep_fn, p);
-+
-+	if (ret && type != SIX_LOCK_write)
-+		six_release(&lock->dep_map);
-+	if (!ret)
-+		lock_acquired(&lock->dep_map, _RET_IP_);
-+
-+	return ret;
-+}
-+
-+static inline void six_lock_wakeup(struct six_lock *lock,
-+				   union six_lock_state state,
-+				   unsigned waitlist_id)
-+{
-+	struct list_head *wait_list = &lock->wait_list[waitlist_id];
-+	struct six_lock_waiter *w, *next;
-+
-+	if (waitlist_id == SIX_LOCK_write && state.read_lock)
-+		return;
-+
-+	if (!(state.waiters & (1 << waitlist_id)))
-+		return;
-+
-+	clear_bit(waitlist_bitnr(waitlist_id),
-+		  (unsigned long *) &lock->state.v);
-+
-+	if (waitlist_id == SIX_LOCK_write) {
-+		struct task_struct *p = READ_ONCE(lock->owner);
-+
-+		if (p)
-+			wake_up_process(p);
-+		return;
-+	}
-+
-+	raw_spin_lock(&lock->wait_lock);
-+
-+	list_for_each_entry_safe(w, next, wait_list, list) {
-+		list_del_init(&w->list);
-+
-+		if (wake_up_process(w->task) &&
-+		    waitlist_id != SIX_LOCK_read) {
-+			if (!list_empty(wait_list))
-+				set_bit(waitlist_bitnr(waitlist_id),
-+					(unsigned long *) &lock->state.v);
-+			break;
-+		}
-+	}
-+
-+	raw_spin_unlock(&lock->wait_lock);
-+}
-+
-+__always_inline __flatten
-+static void __six_unlock_type(struct six_lock *lock, enum six_lock_type type)
-+{
-+	const struct six_lock_vals l[] = LOCK_VALS;
-+	union six_lock_state state;
-+
-+	EBUG_ON(!(lock->state.v & l[type].held_mask));
-+	EBUG_ON(type == SIX_LOCK_write &&
-+		!(lock->state.v & __SIX_LOCK_HELD_intent));
-+
-+	if (type != SIX_LOCK_write)
-+		six_release(&lock->dep_map);
-+
-+	if (type == SIX_LOCK_intent) {
-+		EBUG_ON(lock->owner != current);
-+
-+		if (lock->intent_lock_recurse) {
-+			--lock->intent_lock_recurse;
-+			return;
-+		}
-+
-+		lock->owner = NULL;
-+	}
-+
-+	state.v = atomic64_add_return_release(l[type].unlock_val,
-+					      &lock->state.counter);
-+	six_lock_wakeup(lock, state, l[type].unlock_wakeup);
-+}
-+
-+#define __SIX_LOCK(type)						\
-+bool six_trylock_##type(struct six_lock *lock)				\
-+{									\
-+	return __six_trylock_type(lock, SIX_LOCK_##type);		\
-+}									\
-+EXPORT_SYMBOL_GPL(six_trylock_##type);					\
-+									\
-+bool six_relock_##type(struct six_lock *lock, u32 seq)			\
-+{									\
-+	return __six_relock_type(lock, SIX_LOCK_##type, seq);		\
-+}									\
-+EXPORT_SYMBOL_GPL(six_relock_##type);					\
-+									\
-+int six_lock_##type(struct six_lock *lock,				\
-+		    six_lock_should_sleep_fn should_sleep_fn, void *p)	\
-+{									\
-+	return __six_lock_type(lock, SIX_LOCK_##type, should_sleep_fn, p);\
-+}									\
-+EXPORT_SYMBOL_GPL(six_lock_##type);					\
-+									\
-+void six_unlock_##type(struct six_lock *lock)				\
-+{									\
-+	__six_unlock_type(lock, SIX_LOCK_##type);			\
-+}									\
-+EXPORT_SYMBOL_GPL(six_unlock_##type);
-+
-+__SIX_LOCK(read)
-+__SIX_LOCK(intent)
-+__SIX_LOCK(write)
-+
-+#undef __SIX_LOCK
-+
-+/* Convert from intent to read: */
-+void six_lock_downgrade(struct six_lock *lock)
-+{
-+	six_lock_increment(lock, SIX_LOCK_read);
-+	six_unlock_intent(lock);
-+}
-+EXPORT_SYMBOL_GPL(six_lock_downgrade);
-+
-+bool six_lock_tryupgrade(struct six_lock *lock)
-+{
-+	const struct six_lock_vals l[] = LOCK_VALS;
-+	union six_lock_state old, new;
-+	u64 v = READ_ONCE(lock->state.v);
-+
-+	do {
-+		new.v = old.v = v;
-+
-+		EBUG_ON(!(old.v & l[SIX_LOCK_read].held_mask));
-+
-+		new.v += l[SIX_LOCK_read].unlock_val;
-+
-+		if (new.v & l[SIX_LOCK_intent].lock_fail)
-+			return false;
-+
-+		new.v += l[SIX_LOCK_intent].lock_val;
-+	} while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
-+				old.v, new.v)) != old.v);
-+
-+	six_set_owner(lock, SIX_LOCK_intent, old);
-+	six_lock_wakeup(lock, new, l[SIX_LOCK_read].unlock_wakeup);
-+
-+	return true;
-+}
-+EXPORT_SYMBOL_GPL(six_lock_tryupgrade);
-+
-+bool six_trylock_convert(struct six_lock *lock,
-+			 enum six_lock_type from,
-+			 enum six_lock_type to)
-+{
-+	EBUG_ON(to == SIX_LOCK_write || from == SIX_LOCK_write);
-+
-+	if (to == from)
-+		return true;
-+
-+	if (to == SIX_LOCK_read) {
-+		six_lock_downgrade(lock);
-+		return true;
-+	} else {
-+		return six_lock_tryupgrade(lock);
-+	}
-+}
-+EXPORT_SYMBOL_GPL(six_trylock_convert);
-+
-+/*
-+ * Increment read/intent lock count, assuming we already have it read or intent
-+ * locked:
-+ */
-+void six_lock_increment(struct six_lock *lock, enum six_lock_type type)
-+{
-+	const struct six_lock_vals l[] = LOCK_VALS;
-+
-+	EBUG_ON(type == SIX_LOCK_write);
-+	six_acquire(&lock->dep_map, 0);
-+
-+	/* XXX: assert already locked, and that we don't overflow: */
-+
-+	switch (type) {
-+	case SIX_LOCK_read:
-+		atomic64_add(l[type].lock_val, &lock->state.counter);
-+		break;
-+	case SIX_LOCK_intent:
-+		lock->intent_lock_recurse++;
-+		break;
-+	case SIX_LOCK_write:
-+		BUG();
-+		break;
-+	}
-+}
-+EXPORT_SYMBOL_GPL(six_lock_increment);
-+
-+void six_lock_wakeup_all(struct six_lock *lock)
-+{
-+	struct six_lock_waiter *w;
-+
-+	raw_spin_lock(&lock->wait_lock);
-+
-+	list_for_each_entry(w, &lock->wait_list[0], list)
-+		wake_up_process(w->task);
-+	list_for_each_entry(w, &lock->wait_list[1], list)
-+		wake_up_process(w->task);
-+
-+	raw_spin_unlock(&lock->wait_lock);
-+}
-+EXPORT_SYMBOL_GPL(six_lock_wakeup_all);
-diff --git a/lib/Kconfig b/lib/Kconfig
-index 5d53f9609c25..a7024d19e000 100644
---- a/lib/Kconfig
-+++ b/lib/Kconfig
-@@ -451,6 +451,9 @@ config ASSOCIATIVE_ARRAY
- 
- 	  for more information.
- 
-+config CLOSURES
-+	bool
-+
- config HAS_IOMEM
- 	bool
- 	depends on !NO_IOMEM
-diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
-index 21d9c5f6e7ec..aa82ecff7123 100644
---- a/lib/Kconfig.debug
-+++ b/lib/Kconfig.debug
-@@ -1411,6 +1411,15 @@ config DEBUG_CREDENTIALS
- 
- source "kernel/rcu/Kconfig.debug"
- 
-+config DEBUG_CLOSURES
-+	bool "Debug closures (bcache async widgits)"
-+	depends on CLOSURES
-+	select DEBUG_FS
-+	help
-+	Keeps all active closures in a linked list and provides a debugfs
-+	interface to list them, which makes it possible to see asynchronous
-+	operations that get stuck.
-+
- config DEBUG_WQ_FORCE_RR_CPU
- 	bool "Force round-robin CPU selection for unbound work items"
- 	depends on DEBUG_KERNEL
-@@ -1721,6 +1730,11 @@ config FAULT_INJECTION_STACKTRACE_FILTER
- 	help
- 	  Provide stacktrace filter for fault-injection capabilities
- 
-+config DYNAMIC_FAULT
-+	bool "Enable dynamic fault support"
-+	default n
-+	depends on DEBUG_FS
-+
- config ARCH_HAS_KCOV
- 	bool
- 	help
-diff --git a/lib/Makefile b/lib/Makefile
-index 685aee60de1d..74a60979c41c 100644
---- a/lib/Makefile
-+++ b/lib/Makefile
-@@ -189,6 +189,8 @@ obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o
- obj-$(CONFIG_DYNAMIC_DEBUG) += dynamic_debug.o
- obj-$(CONFIG_SYMBOLIC_ERRNAME) += errname.o
- 
-+obj-$(CONFIG_DYNAMIC_FAULT) += dynamic_fault.o
-+
- obj-$(CONFIG_NLATTR) += nlattr.o
- 
- obj-$(CONFIG_LRU_CACHE) += lru_cache.o
-@@ -201,6 +203,8 @@ obj-$(CONFIG_ATOMIC64_SELFTEST) += atomic64_test.o
- 
- obj-$(CONFIG_CPU_RMAP) += cpu_rmap.o
- 
-+obj-$(CONFIG_CLOSURES) += closure.o
-+
- obj-$(CONFIG_DQL) += dynamic_queue_limits.o
- 
- obj-$(CONFIG_GLOB) += glob.o
-diff --git a/lib/closure.c b/lib/closure.c
-new file mode 100644
-index 000000000000..3e6366c26209
---- /dev/null
-+++ b/lib/closure.c
-@@ -0,0 +1,214 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Asynchronous refcounty things
-+ *
-+ * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
-+ * Copyright 2012 Google, Inc.
-+ */
-+
-+#include <linux/closure.h>
-+#include <linux/debugfs.h>
-+#include <linux/export.h>
-+#include <linux/seq_file.h>
-+#include <linux/sched/debug.h>
-+
-+static inline void closure_put_after_sub(struct closure *cl, int flags)
-+{
-+	int r = flags & CLOSURE_REMAINING_MASK;
-+
-+	BUG_ON(flags & CLOSURE_GUARD_MASK);
-+	BUG_ON(!r && (flags & ~CLOSURE_DESTRUCTOR));
-+
-+	if (!r) {
-+		if (cl->fn && !(flags & CLOSURE_DESTRUCTOR)) {
-+			atomic_set(&cl->remaining,
-+				   CLOSURE_REMAINING_INITIALIZER);
-+			closure_queue(cl);
-+		} else {
-+			struct closure *parent = cl->parent;
-+			closure_fn *destructor = cl->fn;
-+
-+			closure_debug_destroy(cl);
-+
-+			if (destructor)
-+				destructor(cl);
-+
-+			if (parent)
-+				closure_put(parent);
-+		}
-+	}
-+}
-+
-+/* For clearing flags with the same atomic op as a put */
-+void closure_sub(struct closure *cl, int v)
-+{
-+	closure_put_after_sub(cl, atomic_sub_return(v, &cl->remaining));
-+}
-+EXPORT_SYMBOL(closure_sub);
-+
-+/*
-+ * closure_put - decrement a closure's refcount
-+ */
-+void closure_put(struct closure *cl)
-+{
-+	closure_put_after_sub(cl, atomic_dec_return(&cl->remaining));
-+}
-+EXPORT_SYMBOL(closure_put);
-+
-+/*
-+ * closure_wake_up - wake up all closures on a wait list, without memory barrier
-+ */
-+void __closure_wake_up(struct closure_waitlist *wait_list)
-+{
-+	struct llist_node *list;
-+	struct closure *cl, *t;
-+	struct llist_node *reverse = NULL;
-+
-+	list = llist_del_all(&wait_list->list);
-+
-+	/* We first reverse the list to preserve FIFO ordering and fairness */
-+	reverse = llist_reverse_order(list);
-+
-+	/* Then do the wakeups */
-+	llist_for_each_entry_safe(cl, t, reverse, list) {
-+		closure_set_waiting(cl, 0);
-+		closure_sub(cl, CLOSURE_WAITING + 1);
-+	}
-+}
-+EXPORT_SYMBOL(__closure_wake_up);
-+
-+/**
-+ * closure_wait - add a closure to a waitlist
-+ * @waitlist: will own a ref on @cl, which will be released when
-+ * closure_wake_up() is called on @waitlist.
-+ * @cl: closure pointer.
-+ *
-+ */
-+bool closure_wait(struct closure_waitlist *waitlist, struct closure *cl)
-+{
-+	if (atomic_read(&cl->remaining) & CLOSURE_WAITING)
-+		return false;
-+
-+	closure_set_waiting(cl, _RET_IP_);
-+	atomic_add(CLOSURE_WAITING + 1, &cl->remaining);
-+	llist_add(&cl->list, &waitlist->list);
-+
-+	return true;
-+}
-+EXPORT_SYMBOL(closure_wait);
-+
-+struct closure_syncer {
-+	struct task_struct	*task;
-+	int			done;
-+};
-+
-+static void closure_sync_fn(struct closure *cl)
-+{
-+	struct closure_syncer *s = cl->s;
-+	struct task_struct *p;
-+
-+	rcu_read_lock();
-+	p = READ_ONCE(s->task);
-+	s->done = 1;
-+	wake_up_process(p);
-+	rcu_read_unlock();
-+}
-+
-+void __sched __closure_sync(struct closure *cl)
-+{
-+	struct closure_syncer s = { .task = current };
-+
-+	cl->s = &s;
-+	continue_at(cl, closure_sync_fn, NULL);
-+
-+	while (1) {
-+		set_current_state(TASK_UNINTERRUPTIBLE);
-+		if (s.done)
-+			break;
-+		schedule();
-+	}
-+
-+	__set_current_state(TASK_RUNNING);
-+}
-+EXPORT_SYMBOL(__closure_sync);
-+
-+#ifdef CONFIG_DEBUG_CLOSURES
-+
-+static LIST_HEAD(closure_list);
-+static DEFINE_SPINLOCK(closure_list_lock);
-+
-+void closure_debug_create(struct closure *cl)
-+{
-+	unsigned long flags;
-+
-+	BUG_ON(cl->magic == CLOSURE_MAGIC_ALIVE);
-+	cl->magic = CLOSURE_MAGIC_ALIVE;
-+
-+	spin_lock_irqsave(&closure_list_lock, flags);
-+	list_add(&cl->all, &closure_list);
-+	spin_unlock_irqrestore(&closure_list_lock, flags);
-+}
-+EXPORT_SYMBOL(closure_debug_create);
-+
-+void closure_debug_destroy(struct closure *cl)
-+{
-+	unsigned long flags;
-+
-+	BUG_ON(cl->magic != CLOSURE_MAGIC_ALIVE);
-+	cl->magic = CLOSURE_MAGIC_DEAD;
-+
-+	spin_lock_irqsave(&closure_list_lock, flags);
-+	list_del(&cl->all);
-+	spin_unlock_irqrestore(&closure_list_lock, flags);
-+}
-+EXPORT_SYMBOL(closure_debug_destroy);
-+
-+static int debug_seq_show(struct seq_file *f, void *data)
-+{
-+	struct closure *cl;
-+
-+	spin_lock_irq(&closure_list_lock);
-+
-+	list_for_each_entry(cl, &closure_list, all) {
-+		int r = atomic_read(&cl->remaining);
-+
-+		seq_printf(f, "%p: %pS -> %pS p %p r %i ",
-+			   cl, (void *) cl->ip, cl->fn, cl->parent,
-+			   r & CLOSURE_REMAINING_MASK);
-+
-+		seq_printf(f, "%s%s\n",
-+			   test_bit(WORK_STRUCT_PENDING_BIT,
-+				    work_data_bits(&cl->work)) ? "Q" : "",
-+			   r & CLOSURE_RUNNING	? "R" : "");
-+
-+		if (r & CLOSURE_WAITING)
-+			seq_printf(f, " W %pS\n",
-+				   (void *) cl->waiting_on);
-+
-+		seq_puts(f, "\n");
-+	}
-+
-+	spin_unlock_irq(&closure_list_lock);
-+	return 0;
-+}
-+
-+static int debug_seq_open(struct inode *inode, struct file *file)
-+{
-+	return single_open(file, debug_seq_show, NULL);
-+}
-+
-+static const struct file_operations debug_ops = {
-+	.owner		= THIS_MODULE,
-+	.open		= debug_seq_open,
-+	.read		= seq_read,
-+	.release	= single_release
-+};
-+
-+static int __init closure_debug_init(void)
-+{
-+	debugfs_create_file("closures", 0400, NULL, NULL, &debug_ops);
-+	return 0;
-+}
-+late_initcall(closure_debug_init)
-+
-+#endif
-diff --git a/lib/dynamic_fault.c b/lib/dynamic_fault.c
-new file mode 100644
-index 000000000000..75fc9a1b4bce
---- /dev/null
-+++ b/lib/dynamic_fault.c
-@@ -0,0 +1,760 @@
-+/*
-+ * lib/dynamic_fault.c
-+ *
-+ * make dynamic_fault() calls runtime configurable based upon their
-+ * source module.
-+ *
-+ * Copyright (C) 2011 Adam Berkan <aberkan@google.com>
-+ * Based on dynamic_debug.c:
-+ * Copyright (C) 2008 Jason Baron <jbaron@redhat.com>
-+ * By Greg Banks <gnb@melbourne.sgi.com>
-+ * Copyright (c) 2008 Silicon Graphics Inc.  All Rights Reserved.
-+ *
-+ */
-+
-+#define pr_fmt(fmt) "dfault: " fmt "\n"
-+
-+#include <linux/kernel.h>
-+#include <linux/module.h>
-+#include <linux/moduleparam.h>
-+#include <linux/kallsyms.h>
-+#include <linux/version.h>
-+#include <linux/types.h>
-+#include <linux/mutex.h>
-+#include <linux/proc_fs.h>
-+#include <linux/seq_file.h>
-+#include <linux/list.h>
-+#include <linux/sysctl.h>
-+#include <linux/ctype.h>
-+#include <linux/string.h>
-+#include <linux/uaccess.h>
-+#include <linux/dynamic_fault.h>
-+#include <linux/debugfs.h>
-+#include <linux/slab.h>
-+
-+#undef kzalloc
-+
-+extern struct _dfault __start___faults[];
-+extern struct _dfault __stop___faults[];
-+
-+struct dfault_table {
-+	struct list_head link;
-+	char *mod_name;
-+	unsigned int num_dfaults;
-+	struct _dfault *dfaults;
-+};
-+
-+struct dfault_query {
-+	const char	*filename;
-+	const char	*module;
-+	const char	*function;
-+	const char	*class;
-+	unsigned int	first_line, last_line;
-+	unsigned int	first_index, last_index;
-+
-+	unsigned	match_line:1;
-+	unsigned	match_index:1;
-+
-+	unsigned	set_enabled:1;
-+	unsigned	enabled:2;
-+
-+	unsigned	set_frequency:1;
-+	unsigned	frequency;
-+};
-+
-+struct dfault_iter {
-+	struct dfault_table *table;
-+	unsigned int idx;
-+};
-+
-+static DEFINE_MUTEX(dfault_lock);
-+static LIST_HEAD(dfault_tables);
-+
-+bool __dynamic_fault_enabled(struct _dfault *df)
-+{
-+	union dfault_state old, new;
-+	unsigned v = df->state.v;
-+	bool ret;
-+
-+	do {
-+		old.v = new.v = v;
-+
-+		if (new.enabled == DFAULT_DISABLED)
-+			return false;
-+
-+		ret = df->frequency
-+			? ++new.count >= df->frequency
-+			: true;
-+		if (ret)
-+			new.count = 0;
-+		if (ret && new.enabled == DFAULT_ONESHOT)
-+			new.enabled = DFAULT_DISABLED;
-+	} while ((v = cmpxchg(&df->state.v, old.v, new.v)) != old.v);
-+
-+	if (ret)
-+		pr_debug("returned true for %s:%u", df->filename, df->line);
-+
-+	return ret;
-+}
-+EXPORT_SYMBOL(__dynamic_fault_enabled);
-+
-+/* Return the last part of a pathname */
-+static inline const char *basename(const char *path)
-+{
-+	const char *tail = strrchr(path, '/');
-+
-+	return tail ? tail + 1 : path;
-+}
-+
-+/* format a string into buf[] which describes the _dfault's flags */
-+static char *dfault_describe_flags(struct _dfault *df, char *buf, size_t buflen)
-+{
-+	switch (df->state.enabled) {
-+	case DFAULT_DISABLED:
-+		strlcpy(buf, "disabled", buflen);
-+		break;
-+	case DFAULT_ENABLED:
-+		strlcpy(buf, "enabled", buflen);
-+		break;
-+	case DFAULT_ONESHOT:
-+		strlcpy(buf, "oneshot", buflen);
-+		break;
-+	default:
-+		BUG();
-+	}
-+
-+	return buf;
-+}
-+
-+/*
-+ * must be called with dfault_lock held
-+ */
-+
-+/*
-+ * Search the tables for _dfault's which match the given
-+ * `query' and apply the `flags' and `mask' to them.  Tells
-+ * the user which dfault's were changed, or whether none
-+ * were matched.
-+ */
-+static int dfault_change(const struct dfault_query *query)
-+{
-+	struct dfault_table *dt;
-+	unsigned int nfound = 0;
-+	unsigned i, index = 0;
-+	char flagbuf[16];
-+
-+	/* search for matching dfaults */
-+	mutex_lock(&dfault_lock);
-+	list_for_each_entry(dt, &dfault_tables, link) {
-+
-+		/* match against the module name */
-+		if (query->module != NULL &&
-+		    strcmp(query->module, dt->mod_name))
-+			continue;
-+
-+		for (i = 0 ; i < dt->num_dfaults ; i++) {
-+			struct _dfault *df = &dt->dfaults[i];
-+
-+			/* match against the source filename */
-+			if (query->filename != NULL &&
-+			    strcmp(query->filename, df->filename) &&
-+			    strcmp(query->filename, basename(df->filename)))
-+				continue;
-+
-+			/* match against the function */
-+			if (query->function != NULL &&
-+			    strcmp(query->function, df->function))
-+				continue;
-+
-+			/* match against the class */
-+			if (query->class) {
-+				size_t len = strlen(query->class);
-+
-+				if (strncmp(query->class, df->class, len))
-+					continue;
-+
-+				if (df->class[len] && df->class[len] != ':')
-+					continue;
-+			}
-+
-+			/* match against the line number range */
-+			if (query->match_line &&
-+			    (df->line < query->first_line ||
-+			     df->line > query->last_line))
-+				continue;
-+
-+			/* match against the fault index */
-+			if (query->match_index &&
-+			    (index < query->first_index ||
-+			     index > query->last_index)) {
-+				index++;
-+				continue;
-+			}
-+
-+			if (query->set_enabled &&
-+			    query->enabled != df->state.enabled) {
-+				if (query->enabled != DFAULT_DISABLED)
-+					static_key_slow_inc(&df->enabled);
-+				else if (df->state.enabled != DFAULT_DISABLED)
-+					static_key_slow_dec(&df->enabled);
-+
-+				df->state.enabled = query->enabled;
-+			}
-+
-+			if (query->set_frequency)
-+				df->frequency = query->frequency;
-+
-+			pr_debug("changed %s:%d [%s]%s #%d %s",
-+				 df->filename, df->line, dt->mod_name,
-+				 df->function, index,
-+				 dfault_describe_flags(df, flagbuf,
-+						       sizeof(flagbuf)));
-+
-+			index++;
-+			nfound++;
-+		}
-+	}
-+	mutex_unlock(&dfault_lock);
-+
-+	pr_debug("dfault: %u matches", nfound);
-+
-+	return nfound ? 0 : -ENOENT;
-+}
-+
-+/*
-+ * Split the buffer `buf' into space-separated words.
-+ * Handles simple " and ' quoting, i.e. without nested,
-+ * embedded or escaped \".  Return the number of words
-+ * or <0 on error.
-+ */
-+static int dfault_tokenize(char *buf, char *words[], int maxwords)
-+{
-+	int nwords = 0;
-+
-+	while (*buf) {
-+		char *end;
-+
-+		/* Skip leading whitespace */
-+		buf = skip_spaces(buf);
-+		if (!*buf)
-+			break;	/* oh, it was trailing whitespace */
-+
-+		/* Run `end' over a word, either whitespace separated or quoted
-+		 */
-+		if (*buf == '"' || *buf == '\'') {
-+			int quote = *buf++;
-+
-+			for (end = buf ; *end && *end != quote ; end++)
-+				;
-+			if (!*end)
-+				return -EINVAL;	/* unclosed quote */
-+		} else {
-+			for (end = buf ; *end && !isspace(*end) ; end++)
-+				;
-+			BUG_ON(end == buf);
-+		}
-+		/* Here `buf' is the start of the word, `end' is one past the
-+		 * end
-+		 */
-+
-+		if (nwords == maxwords)
-+			return -EINVAL;	/* ran out of words[] before bytes */
-+		if (*end)
-+			*end++ = '\0';	/* terminate the word */
-+		words[nwords++] = buf;
-+		buf = end;
-+	}
-+
-+	return nwords;
-+}
-+
-+/*
-+ * Parse a range.
-+ */
-+static inline int parse_range(char *str,
-+			      unsigned int *first,
-+			      unsigned int *last)
-+{
-+	char *first_str = str;
-+	char *last_str = strchr(first_str, '-');
-+
-+	if (last_str)
-+		*last_str++ = '\0';
-+
-+	if (kstrtouint(first_str, 10, first))
-+		return -EINVAL;
-+
-+	if (!last_str)
-+		*last = *first;
-+	else if (kstrtouint(last_str, 10, last))
-+		return -EINVAL;
-+
-+	return 0;
-+}
-+
-+enum dfault_token {
-+	TOK_INVALID,
-+
-+	/* Queries */
-+	TOK_FUNC,
-+	TOK_FILE,
-+	TOK_LINE,
-+	TOK_MODULE,
-+	TOK_CLASS,
-+	TOK_INDEX,
-+
-+	/* Commands */
-+	TOK_DISABLE,
-+	TOK_ENABLE,
-+	TOK_ONESHOT,
-+	TOK_FREQUENCY,
-+};
-+
-+static const struct {
-+	const char		*str;
-+	enum dfault_token	tok;
-+	unsigned		args_required;
-+} dfault_token_strs[] = {
-+	{ "func",	TOK_FUNC,	1,	},
-+	{ "file",	TOK_FILE,	1,	},
-+	{ "line",	TOK_LINE,	1,	},
-+	{ "module",	TOK_MODULE,	1,	},
-+	{ "class",	TOK_CLASS,	1,	},
-+	{ "index",	TOK_INDEX,	1,	},
-+	{ "disable",	TOK_DISABLE,	0,	},
-+	{ "enable",	TOK_ENABLE,	0,	},
-+	{ "oneshot",	TOK_ONESHOT,	0,	},
-+	{ "frequency",	TOK_FREQUENCY,	1,	},
-+};
-+
-+static enum dfault_token str_to_token(const char *word, unsigned nr_words)
-+{
-+	unsigned i;
-+
-+	for (i = 0; i < ARRAY_SIZE(dfault_token_strs); i++)
-+		if (!strcmp(word, dfault_token_strs[i].str)) {
-+			if (nr_words < dfault_token_strs[i].args_required) {
-+				pr_debug("insufficient arguments to \"%s\"",
-+					 word);
-+				return TOK_INVALID;
-+			}
-+
-+			return dfault_token_strs[i].tok;
-+		}
-+
-+	pr_debug("unknown keyword \"%s\"", word);
-+
-+	return TOK_INVALID;
-+}
-+
-+static int dfault_parse_command(struct dfault_query *query,
-+				enum dfault_token tok,
-+				char *words[], size_t nr_words)
-+{
-+	unsigned i = 0;
-+	int ret;
-+
-+	switch (tok) {
-+	case TOK_INVALID:
-+		return -EINVAL;
-+	case TOK_FUNC:
-+		query->function = words[i++];
-+	case TOK_FILE:
-+		query->filename = words[i++];
-+		return 1;
-+	case TOK_LINE:
-+		ret = parse_range(words[i++],
-+				  &query->first_line,
-+				  &query->last_line);
-+		if (ret)
-+			return ret;
-+		query->match_line = true;
-+		break;
-+	case TOK_MODULE:
-+		query->module = words[i++];
-+		break;
-+	case TOK_CLASS:
-+		query->class = words[i++];
-+		break;
-+	case TOK_INDEX:
-+		ret = parse_range(words[i++],
-+				  &query->first_index,
-+				  &query->last_index);
-+		if (ret)
-+			return ret;
-+		query->match_index = true;
-+		break;
-+	case TOK_DISABLE:
-+		query->set_enabled = true;
-+		query->enabled = DFAULT_DISABLED;
-+		break;
-+	case TOK_ENABLE:
-+		query->set_enabled = true;
-+		query->enabled = DFAULT_ENABLED;
-+		break;
-+	case TOK_ONESHOT:
-+		query->set_enabled = true;
-+		query->enabled = DFAULT_ONESHOT;
-+		break;
-+	case TOK_FREQUENCY:
-+		query->set_frequency = 1;
-+		ret = kstrtouint(words[i++], 10, &query->frequency);
-+		if (ret)
-+			return ret;
-+
-+		if (!query->set_enabled) {
-+			query->set_enabled = 1;
-+			query->enabled = DFAULT_ENABLED;
-+		}
-+		break;
-+	}
-+
-+	return i;
-+}
-+
-+/*
-+ * Parse words[] as a dfault query specification, which is a series
-+ * of (keyword, value) pairs chosen from these possibilities:
-+ *
-+ * func <function-name>
-+ * file <full-pathname>
-+ * file <base-filename>
-+ * module <module-name>
-+ * line <lineno>
-+ * line <first-lineno>-<last-lineno> // where either may be empty
-+ * index <m>-<n>                     // dynamic faults numbered from <m>
-+ *                                   // to <n> inside each matching function
-+ */
-+static int dfault_parse_query(struct dfault_query *query,
-+			      char *words[], size_t nr_words)
-+{
-+	unsigned i = 0;
-+
-+	while (i < nr_words) {
-+		const char *tok_str = words[i++];
-+		enum dfault_token tok = str_to_token(tok_str, nr_words - i);
-+		int ret = dfault_parse_command(query, tok, words + i,
-+					       nr_words - i);
-+
-+		if (ret < 0)
-+			return ret;
-+		i += ret;
-+		BUG_ON(i > nr_words);
-+	}
-+
-+	return 0;
-+}
-+
-+/*
-+ * File_ops->write method for <debugfs>/dynamic_fault/conrol.  Gathers the
-+ * command text from userspace, parses and executes it.
-+ */
-+static ssize_t dfault_proc_write(struct file *file, const char __user *ubuf,
-+				  size_t len, loff_t *offp)
-+{
-+	struct dfault_query query;
-+#define MAXWORDS 9
-+	int nwords;
-+	char *words[MAXWORDS];
-+	char tmpbuf[256];
-+	int ret;
-+
-+	memset(&query, 0, sizeof(query));
-+
-+	if (len == 0)
-+		return 0;
-+	/* we don't check *offp -- multiple writes() are allowed */
-+	if (len > sizeof(tmpbuf)-1)
-+		return -E2BIG;
-+	if (copy_from_user(tmpbuf, ubuf, len))
-+		return -EFAULT;
-+	tmpbuf[len] = '\0';
-+
-+	pr_debug("read %zu bytes from userspace", len);
-+
-+	nwords = dfault_tokenize(tmpbuf, words, MAXWORDS);
-+	if (nwords < 0)
-+		return -EINVAL;
-+	if (dfault_parse_query(&query, words, nwords))
-+		return -EINVAL;
-+
-+	/* actually go and implement the change */
-+	ret = dfault_change(&query);
-+	if (ret < 0)
-+		return ret;
-+
-+	*offp += len;
-+	return len;
-+}
-+
-+/* Control file read code */
-+
-+/*
-+ * Set the iterator to point to the first _dfault object
-+ * and return a pointer to that first object.  Returns
-+ * NULL if there are no _dfaults at all.
-+ */
-+static struct _dfault *dfault_iter_first(struct dfault_iter *iter)
-+{
-+	if (list_empty(&dfault_tables)) {
-+		iter->table = NULL;
-+		iter->idx = 0;
-+		return NULL;
-+	}
-+	iter->table = list_entry(dfault_tables.next,
-+				 struct dfault_table, link);
-+	iter->idx = 0;
-+	return &iter->table->dfaults[iter->idx];
-+}
-+
-+/*
-+ * Advance the iterator to point to the next _dfault
-+ * object from the one the iterator currently points at,
-+ * and returns a pointer to the new _dfault.  Returns
-+ * NULL if the iterator has seen all the _dfaults.
-+ */
-+static struct _dfault *dfault_iter_next(struct dfault_iter *iter)
-+{
-+	if (iter->table == NULL)
-+		return NULL;
-+	if (++iter->idx == iter->table->num_dfaults) {
-+		/* iterate to next table */
-+		iter->idx = 0;
-+		if (list_is_last(&iter->table->link, &dfault_tables)) {
-+			iter->table = NULL;
-+			return NULL;
-+		}
-+		iter->table = list_entry(iter->table->link.next,
-+					 struct dfault_table, link);
-+	}
-+	return &iter->table->dfaults[iter->idx];
-+}
-+
-+/*
-+ * Seq_ops start method.  Called at the start of every
-+ * read() call from userspace.  Takes the dfault_lock and
-+ * seeks the seq_file's iterator to the given position.
-+ */
-+static void *dfault_proc_start(struct seq_file *m, loff_t *pos)
-+{
-+	struct dfault_iter *iter = m->private;
-+	struct _dfault *dp;
-+	int n = *pos;
-+
-+	mutex_lock(&dfault_lock);
-+
-+	if (n < 0)
-+		return NULL;
-+	dp = dfault_iter_first(iter);
-+	while (dp != NULL && --n >= 0)
-+		dp = dfault_iter_next(iter);
-+	return dp;
-+}
-+
-+/*
-+ * Seq_ops next method.  Called several times within a read()
-+ * call from userspace, with dfault_lock held.  Walks to the
-+ * next _dfault object with a special case for the header line.
-+ */
-+static void *dfault_proc_next(struct seq_file *m, void *p, loff_t *pos)
-+{
-+	struct dfault_iter *iter = m->private;
-+	struct _dfault *dp;
-+
-+	if (p == SEQ_START_TOKEN)
-+		dp = dfault_iter_first(iter);
-+	else
-+		dp = dfault_iter_next(iter);
-+	++*pos;
-+	return dp;
-+}
-+
-+/*
-+ * Seq_ops show method.  Called several times within a read()
-+ * call from userspace, with dfault_lock held.  Formats the
-+ * current _dfault as a single human-readable line, with a
-+ * special case for the header line.
-+ */
-+static int dfault_proc_show(struct seq_file *m, void *p)
-+{
-+	struct dfault_iter *iter = m->private;
-+	struct _dfault *df = p;
-+	char flagsbuf[8];
-+
-+	seq_printf(m, "%s:%u class:%s module:%s func:%s %s \"\"\n",
-+		   df->filename, df->line, df->class,
-+		   iter->table->mod_name, df->function,
-+		   dfault_describe_flags(df, flagsbuf, sizeof(flagsbuf)));
-+
-+	return 0;
-+}
-+
-+/*
-+ * Seq_ops stop method.  Called at the end of each read()
-+ * call from userspace.  Drops dfault_lock.
-+ */
-+static void dfault_proc_stop(struct seq_file *m, void *p)
-+{
-+	mutex_unlock(&dfault_lock);
-+}
-+
-+static const struct seq_operations dfault_proc_seqops = {
-+	.start = dfault_proc_start,
-+	.next = dfault_proc_next,
-+	.show = dfault_proc_show,
-+	.stop = dfault_proc_stop
-+};
-+
-+/*
-+ * File_ops->open method for <debugfs>/dynamic_fault/control.  Does the seq_file
-+ * setup dance, and also creates an iterator to walk the _dfaults.
-+ * Note that we create a seq_file always, even for O_WRONLY files
-+ * where it's not needed, as doing so simplifies the ->release method.
-+ */
-+static int dfault_proc_open(struct inode *inode, struct file *file)
-+{
-+	struct dfault_iter *iter;
-+	int err;
-+
-+	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
-+	if (iter == NULL)
-+		return -ENOMEM;
-+
-+	err = seq_open(file, &dfault_proc_seqops);
-+	if (err) {
-+		kfree(iter);
-+		return err;
-+	}
-+	((struct seq_file *) file->private_data)->private = iter;
-+	return 0;
-+}
-+
-+static const struct file_operations dfault_proc_fops = {
-+	.owner = THIS_MODULE,
-+	.open = dfault_proc_open,
-+	.read = seq_read,
-+	.llseek = seq_lseek,
-+	.release = seq_release_private,
-+	.write = dfault_proc_write
-+};
-+
-+/*
-+ * Allocate a new dfault_table for the given module
-+ * and add it to the global list.
-+ */
-+int dfault_add_module(struct _dfault *tab, unsigned int n,
-+		      const char *name)
-+{
-+	struct dfault_table *dt;
-+	char *new_name;
-+	const char *func = NULL;
-+	int i;
-+
-+	dt = kzalloc(sizeof(*dt), GFP_KERNEL);
-+	if (dt == NULL)
-+		return -ENOMEM;
-+	new_name = kstrdup(name, GFP_KERNEL);
-+	if (new_name == NULL) {
-+		kfree(dt);
-+		return -ENOMEM;
-+	}
-+	dt->mod_name = new_name;
-+	dt->num_dfaults = n;
-+	dt->dfaults = tab;
-+
-+	mutex_lock(&dfault_lock);
-+	list_add_tail(&dt->link, &dfault_tables);
-+	mutex_unlock(&dfault_lock);
-+
-+	/* __attribute__(("section")) emits things in reverse order */
-+	for (i = n - 1; i >= 0; i--)
-+		if (!func || strcmp(tab[i].function, func))
-+			func = tab[i].function;
-+
-+	return 0;
-+}
-+EXPORT_SYMBOL_GPL(dfault_add_module);
-+
-+static void dfault_table_free(struct dfault_table *dt)
-+{
-+	list_del_init(&dt->link);
-+	kfree(dt->mod_name);
-+	kfree(dt);
-+}
-+
-+/*
-+ * Called in response to a module being unloaded.  Removes
-+ * any dfault_table's which point at the module.
-+ */
-+int dfault_remove_module(char *mod_name)
-+{
-+	struct dfault_table *dt, *nextdt;
-+	int ret = -ENOENT;
-+
-+	mutex_lock(&dfault_lock);
-+	list_for_each_entry_safe(dt, nextdt, &dfault_tables, link) {
-+		if (!strcmp(dt->mod_name, mod_name)) {
-+			dfault_table_free(dt);
-+			ret = 0;
-+		}
-+	}
-+	mutex_unlock(&dfault_lock);
-+	return ret;
-+}
-+EXPORT_SYMBOL_GPL(dfault_remove_module);
-+
-+static void dfault_remove_all_tables(void)
-+{
-+	mutex_lock(&dfault_lock);
-+	while (!list_empty(&dfault_tables)) {
-+		struct dfault_table *dt = list_entry(dfault_tables.next,
-+						      struct dfault_table,
-+						      link);
-+		dfault_table_free(dt);
-+	}
-+	mutex_unlock(&dfault_lock);
-+}
-+
-+static int __init dynamic_fault_init(void)
-+{
-+	struct dentry *dir, *file;
-+	struct _dfault *iter, *iter_start;
-+	const char *modname = NULL;
-+	int ret = 0;
-+	int n = 0;
-+
-+	dir = debugfs_create_dir("dynamic_fault", NULL);
-+	if (!dir)
-+		return -ENOMEM;
-+	file = debugfs_create_file("control", 0644, dir, NULL,
-+					&dfault_proc_fops);
-+	if (!file) {
-+		debugfs_remove(dir);
-+		return -ENOMEM;
-+	}
-+	if (__start___faults != __stop___faults) {
-+		iter = __start___faults;
-+		modname = iter->modname;
-+		iter_start = iter;
-+		for (; iter < __stop___faults; iter++) {
-+			if (strcmp(modname, iter->modname)) {
-+				ret = dfault_add_module(iter_start, n, modname);
-+				if (ret)
-+					goto out_free;
-+				n = 0;
-+				modname = iter->modname;
-+				iter_start = iter;
-+			}
-+			n++;
-+		}
-+		ret = dfault_add_module(iter_start, n, modname);
-+	}
-+out_free:
-+	if (ret) {
-+		dfault_remove_all_tables();
-+		debugfs_remove(dir);
-+		debugfs_remove(file);
-+	}
-+	return 0;
-+}
-+module_init(dynamic_fault_init);
-diff --git a/mm/filemap.c b/mm/filemap.c
-index 23a051a7ef0f..d39a3f28d6a9 100644
---- a/mm/filemap.c
-+++ b/mm/filemap.c
-@@ -116,6 +116,69 @@
-  *   ->tasklist_lock            (memory_failure, collect_procs_ao)
-  */
- 
-+static int page_cache_tree_insert_vec(struct page *pages[],
-+				      unsigned nr_pages,
-+				      struct address_space *mapping,
-+				      pgoff_t index,
-+				      gfp_t gfp_mask,
-+				      void *shadow[])
-+{
-+	XA_STATE(xas, &mapping->i_pages, index);
-+	void *old;
-+	int i = 0, error = 0;
-+
-+	mapping_set_update(&xas, mapping);
-+
-+	if (!nr_pages)
-+		return 0;
-+
-+	xa_lock_irq(&mapping->i_pages);
-+
-+	while (1) {
-+		old = xas_load(&xas);
-+		if (old && !xa_is_value(old)) {
-+			error = -EEXIST;
-+			break;
-+		}
-+
-+		xas_store(&xas, pages[i]);
-+		error = xas_error(&xas);
-+
-+		if (error == -ENOMEM) {
-+			xa_unlock_irq(&mapping->i_pages);
-+			if (xas_nomem(&xas, gfp_mask & GFP_RECLAIM_MASK))
-+				error = 0;
-+			xa_lock_irq(&mapping->i_pages);
-+
-+			if (!error)
-+				continue;
-+			break;
-+		}
-+
-+		if (error)
-+			break;
-+
-+		if (shadow)
-+			shadow[i] = old;
-+		if (xa_is_value(old))
-+			mapping->nrexceptional--;
-+		mapping->nrpages++;
-+
-+		/* hugetlb pages do not participate in page cache accounting. */
-+		if (!PageHuge(pages[i]))
-+			__inc_node_page_state(pages[i], NR_FILE_PAGES);
-+
-+		if (++i == nr_pages)
-+			break;
-+
-+		xas_next(&xas);
-+	}
-+
-+	xa_unlock_irq(&mapping->i_pages);
-+
-+	return i ?: error;
-+}
-+
- static void page_cache_delete(struct address_space *mapping,
- 				   struct page *page, void *shadow)
- {
-@@ -825,118 +888,154 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
- }
- EXPORT_SYMBOL_GPL(replace_page_cache_page);
- 
--static int __add_to_page_cache_locked(struct page *page,
--				      struct address_space *mapping,
--				      pgoff_t offset, gfp_t gfp_mask,
--				      void **shadowp)
-+static int add_to_page_cache_vec(struct page **pages, unsigned nr_pages,
-+				 struct address_space *mapping,
-+				 pgoff_t index, gfp_t gfp_mask,
-+				 void *shadow[])
- {
--	XA_STATE(xas, &mapping->i_pages, offset);
--	int huge = PageHuge(page);
- 	struct mem_cgroup *memcg;
--	int error;
--	void *old;
-+	int i, nr_added = 0, error = 0;
- 
--	VM_BUG_ON_PAGE(!PageLocked(page), page);
--	VM_BUG_ON_PAGE(PageSwapBacked(page), page);
--	mapping_set_update(&xas, mapping);
-+	for (i = 0; i < nr_pages; i++) {
-+		struct page *page = pages[i];
- 
--	if (!huge) {
--		error = mem_cgroup_try_charge(page, current->mm,
--					      gfp_mask, &memcg, false);
--		if (error)
--			return error;
-+		VM_BUG_ON_PAGE(PageSwapBacked(page), page);
-+		VM_BUG_ON_PAGE(PageSwapCache(page), page);
-+
-+		if (!PageHuge(page)) {
-+			error = mem_cgroup_try_charge(page, current->mm,
-+						      gfp_mask, &memcg, false);
-+			if (error) {
-+				if (!i)
-+					return error;
-+				nr_pages = i;
-+				break;
-+			}
-+		}
-+
-+		__SetPageLocked(page);
-+		get_page(page);
-+		page->mapping = mapping;
-+		page->index = index + i;
- 	}
- 
--	get_page(page);
--	page->mapping = mapping;
--	page->index = offset;
-+	error = page_cache_tree_insert_vec(pages, nr_pages, mapping,
-+					   index, gfp_mask, shadow);
-+	if (error > 0) {
-+		nr_added = error;
-+		error = 0;
-+	}
- 
--	do {
--		xas_lock_irq(&xas);
--		old = xas_load(&xas);
--		if (old && !xa_is_value(old))
--			xas_set_err(&xas, -EEXIST);
--		xas_store(&xas, page);
--		if (xas_error(&xas))
--			goto unlock;
-+	for (i = 0; i < nr_added; i++) {
-+		struct page *page = pages[i];
- 
--		if (xa_is_value(old)) {
--			mapping->nrexceptional--;
--			if (shadowp)
--				*shadowp = old;
--		}
--		mapping->nrpages++;
-+		if (!PageHuge(page))
-+			mem_cgroup_commit_charge(page, memcg, false, false);
- 
--		/* hugetlb pages do not participate in page cache accounting */
--		if (!huge)
--			__inc_node_page_state(page, NR_FILE_PAGES);
--unlock:
--		xas_unlock_irq(&xas);
--	} while (xas_nomem(&xas, gfp_mask & GFP_RECLAIM_MASK));
-+		trace_mm_filemap_add_to_page_cache(page);
-+	}
- 
--	if (xas_error(&xas))
--		goto error;
-+	for (i = nr_added; i < nr_pages; i++) {
-+		struct page *page = pages[i];
- 
--	if (!huge)
--		mem_cgroup_commit_charge(page, memcg, false, false);
--	trace_mm_filemap_add_to_page_cache(page);
--	return 0;
--error:
--	page->mapping = NULL;
--	/* Leave page->index set: truncation relies upon it */
--	if (!huge)
--		mem_cgroup_cancel_charge(page, memcg, false);
--	put_page(page);
--	return xas_error(&xas);
-+		if (!PageHuge(page))
-+			mem_cgroup_cancel_charge(page, memcg, false);
-+
-+		/* Leave page->index set: truncation relies upon it */
-+		page->mapping = NULL;
-+		put_page(page);
-+		__ClearPageLocked(page);
-+	}
-+
-+	return nr_added ?: error;
- }
--ALLOW_ERROR_INJECTION(__add_to_page_cache_locked, ERRNO);
- 
- /**
-- * add_to_page_cache_locked - add a locked page to the pagecache
-+ * add_to_page_cache - add a newly allocated page to the pagecache
-  * @page:	page to add
-  * @mapping:	the page's address_space
-  * @offset:	page index
-  * @gfp_mask:	page allocation mode
-  *
-- * This function is used to add a page to the pagecache. It must be locked.
-- * This function does not add the page to the LRU.  The caller must do that.
-+ * This function is used to add a page to the pagecache. It must be newly
-+ * allocated.  This function does not add the page to the LRU.  The caller must
-+ * do that.
-  *
-  * Return: %0 on success, negative error code otherwise.
-  */
--int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
--		pgoff_t offset, gfp_t gfp_mask)
-+int add_to_page_cache(struct page *page, struct address_space *mapping,
-+		      pgoff_t offset, gfp_t gfp_mask)
- {
--	return __add_to_page_cache_locked(page, mapping, offset,
--					  gfp_mask, NULL);
-+	int ret = add_to_page_cache_vec(&page, 1, mapping, offset,
-+					gfp_mask, NULL);
-+	if (ret < 0)
-+		return ret;
-+	return 0;
- }
--EXPORT_SYMBOL(add_to_page_cache_locked);
-+EXPORT_SYMBOL(add_to_page_cache);
-+ALLOW_ERROR_INJECTION(add_to_page_cache, ERRNO);
- 
--int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
--				pgoff_t offset, gfp_t gfp_mask)
-+int add_to_page_cache_lru_vec(struct address_space *mapping,
-+			      struct page **pages,
-+			      unsigned nr_pages,
-+			      pgoff_t offset, gfp_t gfp_mask)
- {
--	void *shadow = NULL;
--	int ret;
-+	void *shadow_stack[8], **shadow = shadow_stack;
-+	int i, ret = 0, err = 0, nr_added;
-+
-+	if (nr_pages > ARRAY_SIZE(shadow_stack)) {
-+		shadow = kmalloc_array(nr_pages, sizeof(void *), gfp_mask);
-+		if (!shadow)
-+			goto slowpath;
-+	}
-+
-+	for (i = 0; i < nr_pages; i++)
-+		VM_BUG_ON_PAGE(PageActive(pages[i]), pages[i]);
-+
-+	ret = add_to_page_cache_vec(pages, nr_pages, mapping,
-+				    offset, gfp_mask, shadow);
-+	nr_added = ret > 0 ? ret : 0;
-+
-+	/*
-+	 * The page might have been evicted from cache only recently, in which
-+	 * case it should be activated like any other repeatedly accessed page.
-+	 * The exception is pages getting rewritten; evicting other data from
-+	 * the working set, only to cache data that will get overwritten with
-+	 * something else, is a waste of memory.
-+	 */
-+	for (i = 0; i < nr_added; i++) {
-+		struct page *page = pages[i];
-+		void *s = shadow[i];
- 
--	__SetPageLocked(page);
--	ret = __add_to_page_cache_locked(page, mapping, offset,
--					 gfp_mask, &shadow);
--	if (unlikely(ret))
--		__ClearPageLocked(page);
--	else {
--		/*
--		 * The page might have been evicted from cache only
--		 * recently, in which case it should be activated like
--		 * any other repeatedly accessed page.
--		 * The exception is pages getting rewritten; evicting other
--		 * data from the working set, only to cache data that will
--		 * get overwritten with something else, is a waste of memory.
--		 */
- 		WARN_ON_ONCE(PageActive(page));
--		if (!(gfp_mask & __GFP_WRITE) && shadow)
--			workingset_refault(page, shadow);
-+		if (!(gfp_mask & __GFP_WRITE) && s)
-+			workingset_refault(page, s);
- 		lru_cache_add(page);
- 	}
-+
-+	if (shadow != shadow_stack)
-+		kfree(shadow);
-+
- 	return ret;
-+slowpath:
-+	for (i = 0; i < nr_pages; i++) {
-+		err = add_to_page_cache_lru(pages[i], mapping,
-+					    offset + i, gfp_mask);
-+		if (err)
-+			break;
-+	}
-+
-+	return i ?: err;
-+}
-+EXPORT_SYMBOL_GPL(add_to_page_cache_lru_vec);
-+
-+int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
-+				pgoff_t offset, gfp_t gfp_mask)
-+{
-+	int ret = add_to_page_cache_lru_vec(mapping, &page, 1, offset, gfp_mask);
-+	if (ret < 0)
-+		return ret;
-+	return 0;
- }
- EXPORT_SYMBOL_GPL(add_to_page_cache_lru);
- 
-@@ -1827,6 +1926,7 @@ unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start,
- 
- 	return ret;
- }
-+EXPORT_SYMBOL(find_get_pages_range);
- 
- /**
-  * find_get_pages_contig - gang contiguous pagecache lookup
-@@ -1975,6 +2075,222 @@ static void shrink_readahead_size_eio(struct file_ra_state *ra)
- 	ra->ra_pages /= 4;
- }
- 
-+static struct page *
-+generic_file_buffered_read_readpage(struct file *filp,
-+				    struct address_space *mapping,
-+				    struct page *page)
-+{
-+	struct file_ra_state *ra = &filp->f_ra;
-+	int error;
-+
-+	/*
-+	 * A previous I/O error may have been due to temporary
-+	 * failures, eg. multipath errors.
-+	 * PG_error will be set again if readpage fails.
-+	 */
-+	ClearPageError(page);
-+	/* Start the actual read. The read will unlock the page. */
-+	error = mapping->a_ops->readpage(filp, page);
-+
-+	if (unlikely(error)) {
-+		put_page(page);
-+		return error != AOP_TRUNCATED_PAGE ? ERR_PTR(error) : NULL;
-+	}
-+
-+	if (!PageUptodate(page)) {
-+		error = lock_page_killable(page);
-+		if (unlikely(error)) {
-+			put_page(page);
-+			return ERR_PTR(error);
-+		}
-+		if (!PageUptodate(page)) {
-+			if (page->mapping == NULL) {
-+				/*
-+				 * invalidate_mapping_pages got it
-+				 */
-+				unlock_page(page);
-+				put_page(page);
-+				return NULL;
-+			}
-+			unlock_page(page);
-+			shrink_readahead_size_eio(ra);
-+			put_page(page);
-+			return ERR_PTR(-EIO);
-+		}
-+		unlock_page(page);
-+	}
-+
-+	return page;
-+}
-+
-+static struct page *
-+generic_file_buffered_read_pagenotuptodate(struct file *filp,
-+					   struct iov_iter *iter,
-+					   struct page *page,
-+					   loff_t pos, loff_t count)
-+{
-+	struct address_space *mapping = filp->f_mapping;
-+	struct inode *inode = mapping->host;
-+	int error;
-+
-+	/*
-+	 * See comment in do_read_cache_page on why
-+	 * wait_on_page_locked is used to avoid unnecessarily
-+	 * serialisations and why it's safe.
-+	 */
-+	error = wait_on_page_locked_killable(page);
-+	if (unlikely(error)) {
-+		put_page(page);
-+		return ERR_PTR(error);
-+	}
-+
-+	if (PageUptodate(page))
-+		return page;
-+
-+	if (inode->i_blkbits == PAGE_SHIFT ||
-+	    !mapping->a_ops->is_partially_uptodate)
-+		goto page_not_up_to_date;
-+	/* pipes can't handle partially uptodate pages */
-+	if (unlikely(iov_iter_is_pipe(iter)))
-+		goto page_not_up_to_date;
-+	if (!trylock_page(page))
-+		goto page_not_up_to_date;
-+	/* Did it get truncated before we got the lock? */
-+	if (!page->mapping)
-+		goto page_not_up_to_date_locked;
-+
-+	if (!mapping->a_ops->is_partially_uptodate(page,
-+				pos & ~PAGE_MASK, count))
-+		goto page_not_up_to_date_locked;
-+	unlock_page(page);
-+	return page;
-+
-+page_not_up_to_date:
-+	/* Get exclusive access to the page ... */
-+	error = lock_page_killable(page);
-+	if (unlikely(error)) {
-+		put_page(page);
-+		return ERR_PTR(error);
-+	}
-+
-+page_not_up_to_date_locked:
-+	/* Did it get truncated before we got the lock? */
-+	if (!page->mapping) {
-+		unlock_page(page);
-+		put_page(page);
-+		return NULL;
-+	}
-+
-+	/* Did somebody else fill it already? */
-+	if (PageUptodate(page)) {
-+		unlock_page(page);
-+		return page;
-+	}
-+
-+	return generic_file_buffered_read_readpage(filp, mapping, page);
-+}
-+
-+static struct page *
-+generic_file_buffered_read_no_cached_page(struct kiocb *iocb,
-+					  struct iov_iter *iter)
-+{
-+	struct file *filp = iocb->ki_filp;
-+	struct address_space *mapping = filp->f_mapping;
-+	pgoff_t index = iocb->ki_pos >> PAGE_SHIFT;
-+	struct page *page;
-+	int error;
-+
-+	/*
-+	 * Ok, it wasn't cached, so we need to create a new
-+	 * page..
-+	 */
-+	page = page_cache_alloc(mapping);
-+	if (!page)
-+		return ERR_PTR(-ENOMEM);
-+
-+	error = add_to_page_cache_lru(page, mapping, index,
-+				      mapping_gfp_constraint(mapping, GFP_KERNEL));
-+	if (error) {
-+		put_page(page);
-+		return error != -EEXIST ? ERR_PTR(error) : NULL;
-+	}
-+
-+	return generic_file_buffered_read_readpage(filp, mapping, page);
-+}
-+
-+static int generic_file_buffered_read_get_pages(struct kiocb *iocb,
-+						struct iov_iter *iter,
-+						struct page **pages,
-+						unsigned nr)
-+{
-+	struct file *filp = iocb->ki_filp;
-+	struct address_space *mapping = filp->f_mapping;
-+	struct file_ra_state *ra = &filp->f_ra;
-+	pgoff_t index = iocb->ki_pos >> PAGE_SHIFT;
-+	pgoff_t last_index = (iocb->ki_pos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
-+	int i, j, ret, err = 0;
-+
-+	nr = min_t(unsigned long, last_index - index, nr);
-+find_page:
-+	if (fatal_signal_pending(current))
-+		return -EINTR;
-+
-+	ret = find_get_pages_contig(mapping, index, nr, pages);
-+	if (ret)
-+		goto got_pages;
-+
-+	if (iocb->ki_flags & IOCB_NOWAIT)
-+		return -EAGAIN;
-+
-+	page_cache_sync_readahead(mapping, ra, filp, index, last_index - index);
-+
-+	ret = find_get_pages_contig(mapping, index, nr, pages);
-+	if (ret)
-+		goto got_pages;
-+
-+	pages[0] = generic_file_buffered_read_no_cached_page(iocb, iter);
-+	err = PTR_ERR_OR_ZERO(pages[0]);
-+	ret = !IS_ERR_OR_NULL(pages[0]);
-+got_pages:
-+	for (i = 0; i < ret; i++) {
-+		struct page *page = pages[i];
-+		pgoff_t pg_index = index +i;
-+		loff_t pg_pos = max(iocb->ki_pos,
-+				    (loff_t) pg_index << PAGE_SHIFT);
-+		loff_t pg_count = iocb->ki_pos + iter->count - pg_pos;
-+
-+		if (PageReadahead(page))
-+			page_cache_async_readahead(mapping, ra, filp, page,
-+					pg_index, last_index - pg_index);
-+
-+		if (!PageUptodate(page)) {
-+			if (iocb->ki_flags & IOCB_NOWAIT) {
-+				for (j = i; j < ret; j++)
-+					put_page(pages[j]);
-+				ret = i;
-+				err = -EAGAIN;
-+				break;
-+			}
-+
-+			page = generic_file_buffered_read_pagenotuptodate(filp,
-+						iter, page, pg_pos, pg_count);
-+			if (IS_ERR_OR_NULL(page)) {
-+				for (j = i + 1; j < ret; j++)
-+					put_page(pages[j]);
-+				ret = i;
-+				err = PTR_ERR_OR_ZERO(page);
-+				break;
-+			}
-+		}
-+	}
-+
-+	if (likely(ret))
-+		return ret;
-+	if (err)
-+		return err;
-+	goto find_page;
-+}
-+
- /**
-  * generic_file_buffered_read - generic file read routine
-  * @iocb:	the iocb to read
-@@ -1995,252 +2311,108 @@ static ssize_t generic_file_buffered_read(struct kiocb *iocb,
- 		struct iov_iter *iter, ssize_t written)
- {
- 	struct file *filp = iocb->ki_filp;
-+	struct file_ra_state *ra = &filp->f_ra;
- 	struct address_space *mapping = filp->f_mapping;
- 	struct inode *inode = mapping->host;
--	struct file_ra_state *ra = &filp->f_ra;
--	loff_t *ppos = &iocb->ki_pos;
--	pgoff_t index;
--	pgoff_t last_index;
--	pgoff_t prev_index;
--	unsigned long offset;      /* offset into pagecache page */
--	unsigned int prev_offset;
--	int error = 0;
--
--	if (unlikely(*ppos >= inode->i_sb->s_maxbytes))
-+	size_t orig_count = iov_iter_count(iter);
-+	struct page *page_array[8], **pages;
-+	unsigned nr_pages = ARRAY_SIZE(page_array);
-+	unsigned read_nr_pages = ((iocb->ki_pos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT) -
-+		(iocb->ki_pos >> PAGE_SHIFT);
-+	int i, pg_nr, error = 0;
-+	bool writably_mapped;
-+	loff_t isize, end_offset;
-+
-+	if (unlikely(iocb->ki_pos >= inode->i_sb->s_maxbytes))
- 		return 0;
- 	iov_iter_truncate(iter, inode->i_sb->s_maxbytes);
- 
--	index = *ppos >> PAGE_SHIFT;
--	prev_index = ra->prev_pos >> PAGE_SHIFT;
--	prev_offset = ra->prev_pos & (PAGE_SIZE-1);
--	last_index = (*ppos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
--	offset = *ppos & ~PAGE_MASK;
--
--	for (;;) {
--		struct page *page;
--		pgoff_t end_index;
--		loff_t isize;
--		unsigned long nr, ret;
-+	if (read_nr_pages > nr_pages &&
-+	    (pages = kmalloc_array(read_nr_pages, sizeof(void *), GFP_KERNEL)))
-+		nr_pages = read_nr_pages;
-+	else
-+		pages = page_array;
- 
-+	do {
- 		cond_resched();
--find_page:
--		if (fatal_signal_pending(current)) {
--			error = -EINTR;
--			goto out;
--		}
- 
--		page = find_get_page(mapping, index);
--		if (!page) {
--			if (iocb->ki_flags & IOCB_NOWAIT)
--				goto would_block;
--			page_cache_sync_readahead(mapping,
--					ra, filp,
--					index, last_index - index);
--			page = find_get_page(mapping, index);
--			if (unlikely(page == NULL))
--				goto no_cached_page;
--		}
--		if (PageReadahead(page)) {
--			page_cache_async_readahead(mapping,
--					ra, filp, page,
--					index, last_index - index);
-+		i = 0;
-+		pg_nr = generic_file_buffered_read_get_pages(iocb, iter,
-+							 pages, nr_pages);
-+		if (pg_nr < 0) {
-+			error = pg_nr;
-+			break;
- 		}
--		if (!PageUptodate(page)) {
--			if (iocb->ki_flags & IOCB_NOWAIT) {
--				put_page(page);
--				goto would_block;
--			}
- 
--			/*
--			 * See comment in do_read_cache_page on why
--			 * wait_on_page_locked is used to avoid unnecessarily
--			 * serialisations and why it's safe.
--			 */
--			error = wait_on_page_locked_killable(page);
--			if (unlikely(error))
--				goto readpage_error;
--			if (PageUptodate(page))
--				goto page_ok;
--
--			if (inode->i_blkbits == PAGE_SHIFT ||
--					!mapping->a_ops->is_partially_uptodate)
--				goto page_not_up_to_date;
--			/* pipes can't handle partially uptodate pages */
--			if (unlikely(iov_iter_is_pipe(iter)))
--				goto page_not_up_to_date;
--			if (!trylock_page(page))
--				goto page_not_up_to_date;
--			/* Did it get truncated before we got the lock? */
--			if (!page->mapping)
--				goto page_not_up_to_date_locked;
--			if (!mapping->a_ops->is_partially_uptodate(page,
--							offset, iter->count))
--				goto page_not_up_to_date_locked;
--			unlock_page(page);
--		}
--page_ok:
- 		/*
--		 * i_size must be checked after we know the page is Uptodate.
-+		 * i_size must be checked after we know the pages are Uptodate.
- 		 *
- 		 * Checking i_size after the check allows us to calculate
- 		 * the correct value for "nr", which means the zero-filled
- 		 * part of the page is not copied back to userspace (unless
- 		 * another truncate extends the file - this is desired though).
- 		 */
--
- 		isize = i_size_read(inode);
--		end_index = (isize - 1) >> PAGE_SHIFT;
--		if (unlikely(!isize || index > end_index)) {
--			put_page(page);
--			goto out;
--		}
-+		if (unlikely(iocb->ki_pos >= isize))
-+			goto put_pages;
- 
--		/* nr is the maximum number of bytes to copy from this page */
--		nr = PAGE_SIZE;
--		if (index == end_index) {
--			nr = ((isize - 1) & ~PAGE_MASK) + 1;
--			if (nr <= offset) {
--				put_page(page);
--				goto out;
--			}
--		}
--		nr = nr - offset;
-+		end_offset = min_t(loff_t, isize, iocb->ki_pos + iter->count);
- 
--		/* If users can be writing to this page using arbitrary
--		 * virtual addresses, take care about potential aliasing
--		 * before reading the page on the kernel side.
--		 */
--		if (mapping_writably_mapped(mapping))
--			flush_dcache_page(page);
-+		while ((iocb->ki_pos >> PAGE_SHIFT) + pg_nr >
-+		       (end_offset + PAGE_SIZE - 1) >> PAGE_SHIFT)
-+			put_page(pages[--pg_nr]);
- 
- 		/*
--		 * When a sequential read accesses a page several times,
--		 * only mark it as accessed the first time.
-+		 * Once we start copying data, we don't want to be touching any
-+		 * cachelines that might be contended:
- 		 */
--		if (prev_index != index || offset != prev_offset)
--			mark_page_accessed(page);
--		prev_index = index;
-+		writably_mapped = mapping_writably_mapped(mapping);
- 
- 		/*
--		 * Ok, we have the page, and it's up-to-date, so
--		 * now we can copy it to user space...
-+		 * When a sequential read accesses a page several times, only
-+		 * mark it as accessed the first time.
- 		 */
-+		if (iocb->ki_pos >> PAGE_SHIFT !=
-+		    ra->prev_pos >> PAGE_SHIFT)
-+			mark_page_accessed(pages[0]);
-+		for (i = 1; i < pg_nr; i++)
-+			mark_page_accessed(pages[i]);
-+
-+		for (i = 0; i < pg_nr; i++) {
-+			unsigned offset = iocb->ki_pos & ~PAGE_MASK;
-+			unsigned bytes = min_t(loff_t, end_offset - iocb->ki_pos,
-+					       PAGE_SIZE - offset);
-+			unsigned copied;
- 
--		ret = copy_page_to_iter(page, offset, nr, iter);
--		offset += ret;
--		index += offset >> PAGE_SHIFT;
--		offset &= ~PAGE_MASK;
--		prev_offset = offset;
--
--		put_page(page);
--		written += ret;
--		if (!iov_iter_count(iter))
--			goto out;
--		if (ret < nr) {
--			error = -EFAULT;
--			goto out;
--		}
--		continue;
--
--page_not_up_to_date:
--		/* Get exclusive access to the page ... */
--		error = lock_page_killable(page);
--		if (unlikely(error))
--			goto readpage_error;
--
--page_not_up_to_date_locked:
--		/* Did it get truncated before we got the lock? */
--		if (!page->mapping) {
--			unlock_page(page);
--			put_page(page);
--			continue;
--		}
-+			/*
-+			 * If users can be writing to this page using arbitrary
-+			 * virtual addresses, take care about potential aliasing
-+			 * before reading the page on the kernel side.
-+			 */
-+			if (writably_mapped)
-+				flush_dcache_page(pages[i]);
- 
--		/* Did somebody else fill it already? */
--		if (PageUptodate(page)) {
--			unlock_page(page);
--			goto page_ok;
--		}
-+			copied = copy_page_to_iter(pages[i], offset, bytes, iter);
- 
--readpage:
--		/*
--		 * A previous I/O error may have been due to temporary
--		 * failures, eg. multipath errors.
--		 * PG_error will be set again if readpage fails.
--		 */
--		ClearPageError(page);
--		/* Start the actual read. The read will unlock the page. */
--		error = mapping->a_ops->readpage(filp, page);
-+			iocb->ki_pos += copied;
-+			ra->prev_pos = iocb->ki_pos;
- 
--		if (unlikely(error)) {
--			if (error == AOP_TRUNCATED_PAGE) {
--				put_page(page);
--				error = 0;
--				goto find_page;
--			}
--			goto readpage_error;
--		}
--
--		if (!PageUptodate(page)) {
--			error = lock_page_killable(page);
--			if (unlikely(error))
--				goto readpage_error;
--			if (!PageUptodate(page)) {
--				if (page->mapping == NULL) {
--					/*
--					 * invalidate_mapping_pages got it
--					 */
--					unlock_page(page);
--					put_page(page);
--					goto find_page;
--				}
--				unlock_page(page);
--				shrink_readahead_size_eio(ra);
--				error = -EIO;
--				goto readpage_error;
-+			if (copied < bytes) {
-+				error = -EFAULT;
-+				break;
- 			}
--			unlock_page(page);
- 		}
-+put_pages:
-+		for (i = 0; i < pg_nr; i++)
-+			put_page(pages[i]);
-+	} while (iov_iter_count(iter) && iocb->ki_pos < isize && !error);
- 
--		goto page_ok;
--
--readpage_error:
--		/* UHHUH! A synchronous read error occurred. Report it */
--		put_page(page);
--		goto out;
--
--no_cached_page:
--		/*
--		 * Ok, it wasn't cached, so we need to create a new
--		 * page..
--		 */
--		page = page_cache_alloc(mapping);
--		if (!page) {
--			error = -ENOMEM;
--			goto out;
--		}
--		error = add_to_page_cache_lru(page, mapping, index,
--				mapping_gfp_constraint(mapping, GFP_KERNEL));
--		if (error) {
--			put_page(page);
--			if (error == -EEXIST) {
--				error = 0;
--				goto find_page;
--			}
--			goto out;
--		}
--		goto readpage;
--	}
-+	file_accessed(filp);
-+	written += orig_count - iov_iter_count(iter);
- 
--would_block:
--	error = -EAGAIN;
--out:
--	ra->prev_pos = prev_index;
--	ra->prev_pos <<= PAGE_SHIFT;
--	ra->prev_pos |= prev_offset;
-+	if (pages != page_array)
-+		kfree(pages);
- 
--	*ppos = ((loff_t)index << PAGE_SHIFT) + offset;
--	file_accessed(filp);
- 	return written ? written : error;
- }
- 
-diff --git a/mm/gup.c b/mm/gup.c
-index 87a6a59fe667..6ecc36d28c04 100644
---- a/mm/gup.c
-+++ b/mm/gup.c
-@@ -1093,6 +1093,13 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
- 		}
- 		cond_resched();
- 
-+		if (current->faults_disabled_mapping &&
-+		    vma->vm_file &&
-+		    vma->vm_file->f_mapping == current->faults_disabled_mapping) {
-+			ret = -EFAULT;
-+			goto out;
-+		}
-+
- 		page = follow_page_mask(vma, start, foll_flags, &ctx);
- 		if (!page) {
- 			ret = faultin_page(tsk, vma, start, &foll_flags,
-diff --git a/mm/page-writeback.c b/mm/page-writeback.c
-index 7326b54ab728..bdc2eb057b3b 100644
---- a/mm/page-writeback.c
-+++ b/mm/page-writeback.c
-@@ -2467,20 +2467,19 @@ int __set_page_dirty_nobuffers(struct page *page)
- 	lock_page_memcg(page);
- 	if (!TestSetPageDirty(page)) {
- 		struct address_space *mapping = page_mapping(page);
--		unsigned long flags;
- 
- 		if (!mapping) {
- 			unlock_page_memcg(page);
- 			return 1;
- 		}
- 
--		xa_lock_irqsave(&mapping->i_pages, flags);
-+		xa_lock_irq(&mapping->i_pages);
- 		BUG_ON(page_mapping(page) != mapping);
- 		WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
- 		account_page_dirtied(page, mapping);
- 		__xa_set_mark(&mapping->i_pages, page_index(page),
- 				   PAGECACHE_TAG_DIRTY);
--		xa_unlock_irqrestore(&mapping->i_pages, flags);
-+		xa_unlock_irq(&mapping->i_pages);
- 		unlock_page_memcg(page);
- 
- 		if (mapping->host) {
diff --git a/linux57-tkg/linux57-tkg-patches/0009-glitched-bmq.patch b/linux57-tkg/linux57-tkg-patches/0009-glitched-bmq.patch
deleted file mode 100644
index 38666e4..0000000
--- a/linux57-tkg/linux57-tkg-patches/0009-glitched-bmq.patch
+++ /dev/null
@@ -1,90 +0,0 @@
-From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
-From: Tk-Glitch <ti3nou@gmail.com>
-Date: Wed, 4 Jul 2018 04:30:08 +0200
-Subject: glitched - BMQ
-
-diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
-index 2a202a846757..1d9c7ed79b11 100644
---- a/kernel/Kconfig.hz
-+++ b/kernel/Kconfig.hz
-@@ -4,7 +4,7 @@
- 
- choice
- 	prompt "Timer frequency"
--	default HZ_250
-+	default HZ_500
- 	help
- 	 Allows the configuration of the timer frequency. It is customary
- 	 to have the timer interrupt run at 1000 Hz but 100 Hz may be more
-@@ -39,6 +39,13 @@ choice
- 	 on SMP and NUMA systems and exactly dividing by both PAL and
- 	 NTSC frame rates for video and multimedia work.
- 
-+	config HZ_500
-+		bool "500 HZ"
-+	help
-+	 500 Hz is a balanced timer frequency. Provides fast interactivity
-+	 on desktops with great smoothness without increasing CPU power
-+	 consumption and sacrificing the battery life on laptops.
-+
- 	config HZ_1000
- 		bool "1000 HZ"
- 	help
-@@ -52,6 +59,7 @@ config HZ
- 	default 100 if HZ_100
- 	default 250 if HZ_250
- 	default 300 if HZ_300
-+	default 500 if HZ_500
- 	default 1000 if HZ_1000
- 
- config SCHED_HRTICK
-
-diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
-index 2a202a846757..1d9c7ed79b11 100644
---- a/kernel/Kconfig.hz
-+++ b/kernel/Kconfig.hz
-@@ -4,7 +4,7 @@
- 
- choice
- 	prompt "Timer frequency"
--	default HZ_500
-+	default HZ_750
- 	help
- 	 Allows the configuration of the timer frequency. It is customary
- 	 to have the timer interrupt run at 1000 Hz but 100 Hz may be more
-@@ -46,6 +46,13 @@ choice
- 	 on desktops with great smoothness without increasing CPU power
- 	 consumption and sacrificing the battery life on laptops.
- 
-+	config HZ_750
-+		bool "750 HZ"
-+	help
-+	 750 Hz is a good timer frequency for desktops. Provides fast
-+	 interactivity with great smoothness without sacrificing too
-+	 much throughput.
-+
- 	config HZ_1000
- 		bool "1000 HZ"
- 	help
-@@ -60,6 +67,7 @@ config HZ
- 	default 250 if HZ_250
- 	default 300 if HZ_300
- 	default 500 if HZ_500
-+	default 750 if HZ_750
- 	default 1000 if HZ_1000
- 
- config SCHED_HRTICK
-
-diff --git a/mm/vmscan.c b/mm/vmscan.c
-index 9270a4370d54..30d01e647417 100644
---- a/mm/vmscan.c
-+++ b/mm/vmscan.c
-@@ -159,7 +159,7 @@ struct scan_control {
- /*
-  * From 0 .. 100.  Higher means more swappy.
-  */
--int vm_swappiness = 60;
-+int vm_swappiness = 20;
- /*
-  * The total number of pages which are beyond the high watermark within all
-  * zones.
diff --git a/linux57-tkg/linux57-tkg-patches/0009-glitched-ondemand-bmq.patch b/linux57-tkg/linux57-tkg-patches/0009-glitched-ondemand-bmq.patch
deleted file mode 100644
index a926040..0000000
--- a/linux57-tkg/linux57-tkg-patches/0009-glitched-ondemand-bmq.patch
+++ /dev/null
@@ -1,18 +0,0 @@
-diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
-index 6b423eebfd5d..61e3271675d6 100644
---- a/drivers/cpufreq/cpufreq_ondemand.c
-+++ b/drivers/cpufreq/cpufreq_ondemand.c
-@@ -21,10 +21,10 @@
- #include "cpufreq_ondemand.h"
- 
- /* On-demand governor macros */
--#define DEF_FREQUENCY_UP_THRESHOLD		(80)
--#define DEF_SAMPLING_DOWN_FACTOR		(1)
-+#define DEF_FREQUENCY_UP_THRESHOLD		(55)
-+#define DEF_SAMPLING_DOWN_FACTOR		(5)
- #define MAX_SAMPLING_DOWN_FACTOR		(100000)
--#define MICRO_FREQUENCY_UP_THRESHOLD		(95)
-+#define MICRO_FREQUENCY_UP_THRESHOLD		(63)
- #define MICRO_FREQUENCY_MIN_SAMPLE_RATE		(10000)
- #define MIN_FREQUENCY_UP_THRESHOLD		(1)
- #define MAX_FREQUENCY_UP_THRESHOLD		(100)
diff --git a/linux57-tkg/linux57-tkg-patches/0009-prjc_v5.7-r3.patch b/linux57-tkg/linux57-tkg-patches/0009-prjc_v5.7-r3.patch
deleted file mode 100644
index d95c1c6..0000000
--- a/linux57-tkg/linux57-tkg-patches/0009-prjc_v5.7-r3.patch
+++ /dev/null
@@ -1,7817 +0,0 @@
-diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
-index 5e2ce88d6eda..eda08ad54201 100644
---- a/Documentation/admin-guide/kernel-parameters.txt
-+++ b/Documentation/admin-guide/kernel-parameters.txt
-@@ -4445,6 +4445,12 @@
- 
- 	sbni=		[NET] Granch SBNI12 leased line adapter
- 
-+	sched_timeslice=
-+			[KNL] Time slice in us for BMQ scheduler.
-+			Format: <int> (must be >= 1000)
-+			Default: 4000
-+			See Documentation/scheduler/sched-BMQ.txt
-+
- 	sched_debug	[KNL] Enables verbose scheduler debug messages.
- 
- 	schedstats=	[KNL,X86] Enable or disable scheduled statistics.
-diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
-index 0d427fd10941..e0e112c68fa5 100644
---- a/Documentation/admin-guide/sysctl/kernel.rst
-+++ b/Documentation/admin-guide/sysctl/kernel.rst
-@@ -1230,3 +1230,13 @@ is 10 seconds.
- 
- The softlockup threshold is (``2 * watchdog_thresh``). Setting this
- tunable to zero will disable lockup detection altogether.
-+
-+yield_type:
-+===========
-+
-+BMQ CPU scheduler only. This determines what type of yield calls to
-+sched_yield will perform.
-+
-+  0 - No yield.
-+  1 - Deboost and requeue task. (default)
-+  2 - Set run queue skip task.
-diff --git a/Documentation/scheduler/sched-BMQ.txt b/Documentation/scheduler/sched-BMQ.txt
-new file mode 100644
-index 000000000000..05c84eec0f31
---- /dev/null
-+++ b/Documentation/scheduler/sched-BMQ.txt
-@@ -0,0 +1,110 @@
-+                         BitMap queue CPU Scheduler
-+                         --------------------------
-+
-+CONTENT
-+========
-+
-+ Background
-+ Design
-+   Overview
-+   Task policy
-+   Priority management
-+   BitMap Queue
-+   CPU Assignment and Migration
-+
-+
-+Background
-+==========
-+
-+BitMap Queue CPU scheduler, referred to as BMQ from here on, is an evolution
-+of previous Priority and Deadline based Skiplist multiple queue scheduler(PDS),
-+and inspired by Zircon scheduler. The goal of it is to keep the scheduler code
-+simple, while efficiency and scalable for interactive tasks, such as desktop,
-+movie playback and gaming etc.
-+
-+Design
-+======
-+
-+Overview
-+--------
-+
-+BMQ use per CPU run queue design, each CPU(logical) has it's own run queue,
-+each CPU is responsible for scheduling the tasks that are putting into it's
-+run queue.
-+
-+The run queue is a set of priority queues. Note that these queues are fifo
-+queue for non-rt tasks or priority queue for rt tasks in data structure. See
-+BitMap Queue below for details. BMQ is optimized for non-rt tasks in the fact
-+that most applications are non-rt tasks. No matter the queue is fifo or
-+priority, In each queue is an ordered list of runnable tasks awaiting execution
-+and the data structures are the same. When it is time for a new task to run,
-+the scheduler simply looks the lowest numbered queueue that contains a task,
-+and runs the first task from the head of that queue. And per CPU idle task is
-+also in the run queue, so the scheduler can always find a task to run on from
-+its run queue.
-+
-+Each task will assigned the same timeslice(default 4ms) when it is picked to
-+start running. Task will be reinserted at the end of the appropriate priority
-+queue when it uses its whole timeslice. When the scheduler selects a new task
-+from the priority queue it sets the CPU's preemption timer for the remainder of
-+the previous timeslice. When that timer fires the scheduler will stop execution
-+on that task, select another task and start over again.
-+
-+If a task blocks waiting for a shared resource then it's taken out of its
-+priority queue and is placed in a wait queue for the shared resource. When it
-+is unblocked it will be reinserted in the appropriate priority queue of an
-+eligible CPU.
-+
-+Task policy
-+-----------
-+
-+BMQ supports DEADLINE, FIFO, RR, NORMAL, BATCH and IDLE task policy like the
-+mainline CFS scheduler. But BMQ is heavy optimized for non-rt task, that's
-+NORMAL/BATCH/IDLE policy tasks. Below is the implementation detail of each
-+policy.
-+
-+DEADLINE
-+	It is squashed as priority 0 FIFO task.
-+
-+FIFO/RR
-+	All RT tasks share one single priority queue in BMQ run queue designed. The
-+complexity of insert operation is O(n). BMQ is not designed for system runs
-+with major rt policy tasks.
-+
-+NORMAL/BATCH/IDLE
-+	BATCH and IDLE tasks are treated as the same policy. They compete CPU with
-+NORMAL policy tasks, but they just don't boost. To control the priority of
-+NORMAL/BATCH/IDLE tasks, simply use nice level.
-+
-+ISO
-+	ISO policy is not supported in BMQ. Please use nice level -20 NORMAL policy
-+task instead.
-+
-+Priority management
-+-------------------
-+
-+RT tasks have priority from 0-99. For non-rt tasks, there are three different
-+factors used to determine the effective priority of a task. The effective
-+priority being what is used to determine which queue it will be in.
-+
-+The first factor is simply the task’s static priority. Which is assigned from
-+task's nice level, within [-20, 19] in userland's point of view and [0, 39]
-+internally.
-+
-+The second factor is the priority boost. This is a value bounded between
-+[-MAX_PRIORITY_ADJ, MAX_PRIORITY_ADJ] used to offset the base priority, it is
-+modified by the following cases:
-+
-+*When a thread has used up its entire timeslice, always deboost its boost by
-+increasing by one.
-+*When a thread gives up cpu control(voluntary or non-voluntary) to reschedule,
-+and its switch-in time(time after last switch and run) below the thredhold
-+based on its priority boost, will boost its boost by decreasing by one buti is
-+capped at 0 (won’t go negative).
-+
-+The intent in this system is to ensure that interactive threads are serviced
-+quickly. These are usually the threads that interact directly with the user
-+and cause user-perceivable latency. These threads usually do little work and
-+spend most of their time blocked awaiting another user event. So they get the
-+priority boost from unblocking while background threads that do most of the
-+processing receive the priority penalty for using their entire timeslice.
-diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
-index f18d5067cd0f..fe489fc01c73 100644
---- a/arch/powerpc/platforms/cell/spufs/sched.c
-+++ b/arch/powerpc/platforms/cell/spufs/sched.c
-@@ -51,11 +51,6 @@ static struct task_struct *spusched_task;
- static struct timer_list spusched_timer;
- static struct timer_list spuloadavg_timer;
- 
--/*
-- * Priority of a normal, non-rt, non-niced'd process (aka nice level 0).
-- */
--#define NORMAL_PRIO		120
--
- /*
-  * Frequency of the spu scheduler tick.  By default we do one SPU scheduler
-  * tick for every 10 CPU scheduler ticks.
-diff --git a/fs/proc/base.c b/fs/proc/base.c
-index eb2255e95f62..62b8cedbccb6 100644
---- a/fs/proc/base.c
-+++ b/fs/proc/base.c
-@@ -479,7 +479,7 @@ static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns,
- 		seq_puts(m, "0 0 0\n");
- 	else
- 		seq_printf(m, "%llu %llu %lu\n",
--		   (unsigned long long)task->se.sum_exec_runtime,
-+		   (unsigned long long)tsk_seruntime(task),
- 		   (unsigned long long)task->sched_info.run_delay,
- 		   task->sched_info.pcount);
- 
-diff --git a/include/asm-generic/resource.h b/include/asm-generic/resource.h
-index 8874f681b056..59eb72bf7d5f 100644
---- a/include/asm-generic/resource.h
-+++ b/include/asm-generic/resource.h
-@@ -23,7 +23,7 @@
- 	[RLIMIT_LOCKS]		= {  RLIM_INFINITY,  RLIM_INFINITY },	\
- 	[RLIMIT_SIGPENDING]	= { 		0,	       0 },	\
- 	[RLIMIT_MSGQUEUE]	= {   MQ_BYTES_MAX,   MQ_BYTES_MAX },	\
--	[RLIMIT_NICE]		= { 0, 0 },				\
-+	[RLIMIT_NICE]		= { 30, 30 },				\
- 	[RLIMIT_RTPRIO]		= { 0, 0 },				\
- 	[RLIMIT_RTTIME]		= {  RLIM_INFINITY,  RLIM_INFINITY },	\
- }
-diff --git a/include/linux/sched.h b/include/linux/sched.h
-index 4418f5cb8324..1e8030513489 100644
---- a/include/linux/sched.h
-+++ b/include/linux/sched.h
-@@ -652,13 +652,18 @@ struct task_struct {
- 	unsigned int			flags;
- 	unsigned int			ptrace;
- 
--#ifdef CONFIG_SMP
-+#if defined(CONFIG_SMP)
- 	struct llist_node		wake_entry;
-+#endif
-+#if defined(CONFIG_SMP) || defined(CONFIG_SCHED_ALT)
- 	int				on_cpu;
-+#endif
-+#ifdef CONFIG_SMP
- #ifdef CONFIG_THREAD_INFO_IN_TASK
- 	/* Current CPU: */
- 	unsigned int			cpu;
- #endif
-+#ifndef CONFIG_SCHED_ALT
- 	unsigned int			wakee_flips;
- 	unsigned long			wakee_flip_decay_ts;
- 	struct task_struct		*last_wakee;
-@@ -672,6 +677,7 @@ struct task_struct {
- 	 */
- 	int				recent_used_cpu;
- 	int				wake_cpu;
-+#endif /* !CONFIG_SCHED_ALT */
- #endif
- 	int				on_rq;
- 
-@@ -680,13 +686,25 @@ struct task_struct {
- 	int				normal_prio;
- 	unsigned int			rt_priority;
- 
-+#ifdef CONFIG_SCHED_ALT
-+	u64				last_ran;
-+	s64				time_slice;
-+	int				boost_prio;
-+#ifdef CONFIG_SCHED_BMQ
-+	int				bmq_idx;
-+	struct list_head		bmq_node;
-+#endif /* CONFIG_SCHED_BMQ */
-+	/* sched_clock time spent running */
-+	u64				sched_time;
-+#else /* !CONFIG_SCHED_ALT */
- 	const struct sched_class	*sched_class;
- 	struct sched_entity		se;
- 	struct sched_rt_entity		rt;
-+	struct sched_dl_entity		dl;
-+#endif
- #ifdef CONFIG_CGROUP_SCHED
- 	struct task_group		*sched_task_group;
- #endif
--	struct sched_dl_entity		dl;
- 
- #ifdef CONFIG_UCLAMP_TASK
- 	/* Clamp values requested for a scheduling entity */
-@@ -1306,6 +1324,15 @@ struct task_struct {
- 	 */
- };
- 
-+#ifdef CONFIG_SCHED_ALT
-+#define tsk_seruntime(t)		((t)->sched_time)
-+/* replace the uncertian rt_timeout with 0UL */
-+#define tsk_rttimeout(t)		(0UL)
-+#else /* CFS */
-+#define tsk_seruntime(t)	((t)->se.sum_exec_runtime)
-+#define tsk_rttimeout(t)	((t)->rt.timeout)
-+#endif /* !CONFIG_SCHED_ALT */
-+
- static inline struct pid *task_pid(struct task_struct *task)
- {
- 	return task->thread_pid;
-diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h
-index 1aff00b65f3c..da0306d2fedb 100644
---- a/include/linux/sched/deadline.h
-+++ b/include/linux/sched/deadline.h
-@@ -1,5 +1,20 @@
- /* SPDX-License-Identifier: GPL-2.0 */
- 
-+#ifdef CONFIG_SCHED_ALT
-+
-+#ifdef CONFIG_SCHED_BMQ
-+#define __tsk_deadline(p)	(0UL)
-+
-+static inline int dl_task(struct task_struct *p)
-+{
-+	return 0;
-+}
-+#endif
-+
-+#else
-+
-+#define __tsk_deadline(p)	((p)->dl.deadline)
-+
- /*
-  * SCHED_DEADLINE tasks has negative priorities, reflecting
-  * the fact that any of them has higher prio than RT and
-@@ -19,6 +34,7 @@ static inline int dl_task(struct task_struct *p)
- {
- 	return dl_prio(p->prio);
- }
-+#endif /* CONFIG_SCHED_ALT */
- 
- static inline bool dl_time_before(u64 a, u64 b)
- {
-diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h
-index 7d64feafc408..ba6fd6a5b4b1 100644
---- a/include/linux/sched/prio.h
-+++ b/include/linux/sched/prio.h
-@@ -20,11 +20,17 @@
-  */
- 
- #define MAX_USER_RT_PRIO	100
-+
- #define MAX_RT_PRIO		MAX_USER_RT_PRIO
- 
- #define MAX_PRIO		(MAX_RT_PRIO + NICE_WIDTH)
- #define DEFAULT_PRIO		(MAX_RT_PRIO + NICE_WIDTH / 2)
- 
-+#ifdef CONFIG_SCHED_ALT
-+/* +/- priority levels from the base priority */
-+#define MAX_PRIORITY_ADJ	4
-+#endif
-+
- /*
-  * Convert user-nice values [ -20 ... 0 ... 19 ]
-  * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
-diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
-index e5af028c08b4..0a7565d0d3cf 100644
---- a/include/linux/sched/rt.h
-+++ b/include/linux/sched/rt.h
-@@ -24,8 +24,10 @@ static inline bool task_is_realtime(struct task_struct *tsk)
- 
- 	if (policy == SCHED_FIFO || policy == SCHED_RR)
- 		return true;
-+#ifndef CONFIG_SCHED_ALT
- 	if (policy == SCHED_DEADLINE)
- 		return true;
-+#endif
- 	return false;
- }
- 
-diff --git a/init/Kconfig b/init/Kconfig
-index 74a5ac65644f..4ef358fc7b51 100644
---- a/init/Kconfig
-+++ b/init/Kconfig
-@@ -689,9 +689,33 @@ config GENERIC_SCHED_CLOCK
- 
- menu "Scheduler features"
- 
-+menuconfig SCHED_ALT
-+	bool "Alternative CPU Schedulers"
-+	default y
-+	help
-+	  This feature enable alternative CPU scheduler"
-+
-+if SCHED_ALT
-+
-+choice
-+	prompt "Alternative CPU Scheduler"
-+	default SCHED_BMQ
-+
-+config SCHED_BMQ
-+	bool "BMQ CPU scheduler"
-+	help
-+	  The BitMap Queue CPU scheduler for excellent interactivity and
-+	  responsiveness on the desktop and solid scalability on normal
-+	  hardware and commodity servers.
-+
-+endchoice
-+
-+endif
-+
- config UCLAMP_TASK
- 	bool "Enable utilization clamping for RT/FAIR tasks"
- 	depends on CPU_FREQ_GOV_SCHEDUTIL
-+	depends on !SCHED_BMQ
- 	help
- 	  This feature enables the scheduler to track the clamped utilization
- 	  of each CPU based on RUNNABLE tasks scheduled on that CPU.
-@@ -777,6 +801,7 @@ config NUMA_BALANCING
- 	depends on ARCH_SUPPORTS_NUMA_BALANCING
- 	depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY
- 	depends on SMP && NUMA && MIGRATION
-+	depends on !SCHED_BMQ
- 	help
- 	  This option adds support for automatic NUMA aware memory/task placement.
- 	  The mechanism is quite primitive and is based on migrating memory when
-@@ -878,7 +903,7 @@ menuconfig CGROUP_SCHED
- 	  bandwidth allocation to such task groups. It uses cgroups to group
- 	  tasks.
- 
--if CGROUP_SCHED
-+if CGROUP_SCHED && !SCHED_BMQ
- config FAIR_GROUP_SCHED
- 	bool "Group scheduling for SCHED_OTHER"
- 	depends on CGROUP_SCHED
-@@ -1134,6 +1159,7 @@ config CHECKPOINT_RESTORE
- 
- config SCHED_AUTOGROUP
- 	bool "Automatic process group scheduling"
-+	depends on !SCHED_BMQ
- 	select CGROUPS
- 	select CGROUP_SCHED
- 	select FAIR_GROUP_SCHED
-diff --git a/init/init_task.c b/init/init_task.c
-index bd403ed3e418..737a814482d6 100644
---- a/init/init_task.c
-+++ b/init/init_task.c
-@@ -67,9 +67,15 @@ struct task_struct init_task
- 	.stack		= init_stack,
- 	.usage		= REFCOUNT_INIT(2),
- 	.flags		= PF_KTHREAD,
-+#ifdef CONFIG_SCHED_ALT
-+	.prio		= DEFAULT_PRIO + MAX_PRIORITY_ADJ,
-+	.static_prio	= DEFAULT_PRIO,
-+	.normal_prio	= DEFAULT_PRIO + MAX_PRIORITY_ADJ,
-+#else
- 	.prio		= MAX_PRIO - 20,
- 	.static_prio	= MAX_PRIO - 20,
- 	.normal_prio	= MAX_PRIO - 20,
-+#endif
- 	.policy		= SCHED_NORMAL,
- 	.cpus_ptr	= &init_task.cpus_mask,
- 	.cpus_mask	= CPU_MASK_ALL,
-@@ -79,6 +85,14 @@ struct task_struct init_task
- 	.restart_block	= {
- 		.fn = do_no_restart_syscall,
- 	},
-+#ifdef CONFIG_SCHED_ALT
-+	.boost_prio	= 0,
-+#ifdef CONFIG_SCHED_BMQ
-+	.bmq_idx	= 15,
-+	.bmq_node	= LIST_HEAD_INIT(init_task.bmq_node),
-+#endif
-+	.time_slice	= HZ,
-+#else
- 	.se		= {
- 		.group_node 	= LIST_HEAD_INIT(init_task.se.group_node),
- 	},
-@@ -86,6 +100,7 @@ struct task_struct init_task
- 		.run_list	= LIST_HEAD_INIT(init_task.rt.run_list),
- 		.time_slice	= RR_TIMESLICE,
- 	},
-+#endif
- 	.tasks		= LIST_HEAD_INIT(init_task.tasks),
- #ifdef CONFIG_SMP
- 	.pushable_tasks	= PLIST_NODE_INIT(init_task.pushable_tasks, MAX_PRIO),
-diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
-index 729d3a5c772e..1e3dac9b6a43 100644
---- a/kernel/cgroup/cpuset.c
-+++ b/kernel/cgroup/cpuset.c
-@@ -636,7 +636,7 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial)
- 	return ret;
- }
- 
--#ifdef CONFIG_SMP
-+#if defined(CONFIG_SMP) && !defined(CONFIG_SCHED_ALT)
- /*
-  * Helper routine for generate_sched_domains().
-  * Do cpusets a, b have overlapping effective cpus_allowed masks?
-@@ -1009,7 +1009,7 @@ static void rebuild_sched_domains_locked(void)
- 	/* Have scheduler rebuild the domains */
- 	partition_and_rebuild_sched_domains(ndoms, doms, attr);
- }
--#else /* !CONFIG_SMP */
-+#else /* !CONFIG_SMP || CONFIG_SCHED_ALT */
- static void rebuild_sched_domains_locked(void)
- {
- }
-diff --git a/kernel/delayacct.c b/kernel/delayacct.c
-index 27725754ac99..769d773c7182 100644
---- a/kernel/delayacct.c
-+++ b/kernel/delayacct.c
-@@ -106,7 +106,7 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
- 	 */
- 	t1 = tsk->sched_info.pcount;
- 	t2 = tsk->sched_info.run_delay;
--	t3 = tsk->se.sum_exec_runtime;
-+	t3 = tsk_seruntime(tsk);
- 
- 	d->cpu_count += t1;
- 
-diff --git a/kernel/exit.c b/kernel/exit.c
-index d56fe51bdf07..3aa2c1e822b0 100644
---- a/kernel/exit.c
-+++ b/kernel/exit.c
-@@ -122,7 +122,7 @@ static void __exit_signal(struct task_struct *tsk)
- 			sig->curr_target = next_thread(tsk);
- 	}
- 
--	add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
-+	add_device_randomness((const void*) &tsk_seruntime(tsk),
- 			      sizeof(unsigned long long));
- 
- 	/*
-@@ -143,7 +143,7 @@ static void __exit_signal(struct task_struct *tsk)
- 	sig->inblock += task_io_get_inblock(tsk);
- 	sig->oublock += task_io_get_oublock(tsk);
- 	task_io_accounting_add(&sig->ioac, &tsk->ioac);
--	sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
-+	sig->sum_sched_runtime += tsk_seruntime(tsk);
- 	sig->nr_threads--;
- 	__unhash_process(tsk, group_dead);
- 	write_sequnlock(&sig->stats_lock);
-diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
-index f6310f848f34..4176ad070bc9 100644
---- a/kernel/livepatch/transition.c
-+++ b/kernel/livepatch/transition.c
-@@ -306,7 +306,11 @@ static bool klp_try_switch_task(struct task_struct *task)
- 	 */
- 	rq = task_rq_lock(task, &flags);
- 
-+#ifdef	CONFIG_SCHED_ALT
-+	if (task_running(task) && task != current) {
-+#else
- 	if (task_running(rq, task) && task != current) {
-+#endif
- 		snprintf(err_buf, STACK_ERR_BUF_SIZE,
- 			 "%s: %s:%d is running\n", __func__, task->comm,
- 			 task->pid);
-diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
-index c9f090d64f00..b5d0c7088021 100644
---- a/kernel/locking/rtmutex.c
-+++ b/kernel/locking/rtmutex.c
-@@ -229,7 +229,7 @@ static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
-  * Only use with rt_mutex_waiter_{less,equal}()
-  */
- #define task_to_waiter(p)	\
--	&(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline }
-+	&(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = __tsk_deadline(p) }
- 
- static inline int
- rt_mutex_waiter_less(struct rt_mutex_waiter *left,
-@@ -238,6 +238,7 @@ rt_mutex_waiter_less(struct rt_mutex_waiter *left,
- 	if (left->prio < right->prio)
- 		return 1;
- 
-+#ifndef CONFIG_SCHED_BMQ
- 	/*
- 	 * If both waiters have dl_prio(), we check the deadlines of the
- 	 * associated tasks.
-@@ -246,6 +247,7 @@ rt_mutex_waiter_less(struct rt_mutex_waiter *left,
- 	 */
- 	if (dl_prio(left->prio))
- 		return dl_time_before(left->deadline, right->deadline);
-+#endif
- 
- 	return 0;
- }
-@@ -257,6 +259,7 @@ rt_mutex_waiter_equal(struct rt_mutex_waiter *left,
- 	if (left->prio != right->prio)
- 		return 0;
- 
-+#ifndef CONFIG_SCHED_BMQ
- 	/*
- 	 * If both waiters have dl_prio(), we check the deadlines of the
- 	 * associated tasks.
-@@ -265,6 +268,7 @@ rt_mutex_waiter_equal(struct rt_mutex_waiter *left,
- 	 */
- 	if (dl_prio(left->prio))
- 		return left->deadline == right->deadline;
-+#endif
- 
- 	return 1;
- }
-@@ -680,7 +684,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
- 	 * the values of the node being removed.
- 	 */
- 	waiter->prio = task->prio;
--	waiter->deadline = task->dl.deadline;
-+	waiter->deadline = __tsk_deadline(task);
- 
- 	rt_mutex_enqueue(lock, waiter);
- 
-@@ -953,7 +957,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
- 	waiter->task = task;
- 	waiter->lock = lock;
- 	waiter->prio = task->prio;
--	waiter->deadline = task->dl.deadline;
-+	waiter->deadline = __tsk_deadline(task);
- 
- 	/* Get the top priority waiter on the lock */
- 	if (rt_mutex_has_waiters(lock))
-diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
-index 21fb5a5662b5..1cad9ff599a4 100644
---- a/kernel/sched/Makefile
-+++ b/kernel/sched/Makefile
-@@ -16,14 +16,20 @@ ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
- CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
- endif
- 
--obj-y += core.o loadavg.o clock.o cputime.o
--obj-y += idle.o fair.o rt.o deadline.o
--obj-y += wait.o wait_bit.o swait.o completion.o
--
--obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o
-+ifdef CONFIG_SCHED_ALT
-+obj-y += alt_core.o alt_debug.o
-+else
-+obj-y += core.o
-+obj-y += fair.o rt.o deadline.o
-+obj-$(CONFIG_SMP) += cpudeadline.o stop_task.o
- obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
--obj-$(CONFIG_SCHEDSTATS) += stats.o
- obj-$(CONFIG_SCHED_DEBUG) += debug.o
-+endif
-+obj-y += loadavg.o clock.o cputime.o
-+obj-y += idle.o
-+obj-y += wait.o wait_bit.o swait.o completion.o
-+obj-$(CONFIG_SMP) += cpupri.o pelt.o topology.o
-+obj-$(CONFIG_SCHEDSTATS) += stats.o
- obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
- obj-$(CONFIG_CPU_FREQ) += cpufreq.o
- obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o
-diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c
-new file mode 100644
-index 000000000000..48e5fac710bc
---- /dev/null
-+++ b/kernel/sched/alt_core.c
-@@ -0,0 +1,6057 @@
-+/*
-+ *  kernel/sched/alt_core.c
-+ *
-+ *  Core alternative kernel scheduler code and related syscalls
-+ *
-+ *  Copyright (C) 1991-2002  Linus Torvalds
-+ *
-+ *  2009-08-13	Brainfuck deadline scheduling policy by Con Kolivas deletes
-+ *		a whole lot of those previous things.
-+ *  2017-09-06	Priority and Deadline based Skip list multiple queue kernel
-+ *		scheduler by Alfred Chen.
-+ *  2019-02-20	BMQ(BitMap Queue) kernel scheduler by Alfred Chen.
-+ */
-+#include "sched.h"
-+
-+#include <linux/sched/rt.h>
-+
-+#include <linux/context_tracking.h>
-+#include <linux/compat.h>
-+#include <linux/blkdev.h>
-+#include <linux/delayacct.h>
-+#include <linux/freezer.h>
-+#include <linux/init_task.h>
-+#include <linux/kprobes.h>
-+#include <linux/mmu_context.h>
-+#include <linux/nmi.h>
-+#include <linux/profile.h>
-+#include <linux/rcupdate_wait.h>
-+#include <linux/security.h>
-+#include <linux/syscalls.h>
-+#include <linux/wait_bit.h>
-+
-+#include <linux/kcov.h>
-+
-+#include <asm/switch_to.h>
-+
-+#include "../workqueue_internal.h"
-+#include "../../fs/io-wq.h"
-+#include "../smpboot.h"
-+
-+#include "pelt.h"
-+
-+#define CREATE_TRACE_POINTS
-+#include <trace/events/sched.h>
-+
-+/* rt_prio(prio) defined in include/linux/sched/rt.h */
-+#define rt_task(p)		rt_prio((p)->prio)
-+#define rt_policy(policy)	((policy) == SCHED_FIFO || (policy) == SCHED_RR)
-+#define task_has_rt_policy(p)	(rt_policy((p)->policy))
-+
-+#define STOP_PRIO		(MAX_RT_PRIO - 1)
-+
-+/* Default time slice is 4 in ms, can be set via kernel parameter "sched_timeslice" */
-+u64 sched_timeslice_ns __read_mostly = (4 * 1000 * 1000);
-+
-+static int __init sched_timeslice(char *str)
-+{
-+	int timeslice_us;
-+
-+	get_option(&str, &timeslice_us);
-+	if (timeslice_us >= 1000)
-+		sched_timeslice_ns = timeslice_us * 1000;
-+
-+	return 0;
-+}
-+early_param("sched_timeslice", sched_timeslice);
-+
-+/* Reschedule if less than this many μs left */
-+#define RESCHED_NS		(100 * 1000)
-+
-+/**
-+ * sched_yield_type - Choose what sort of yield sched_yield will perform.
-+ * 0: No yield.
-+ * 1: Deboost and requeue task. (default)
-+ * 2: Set rq skip task.
-+ */
-+int sched_yield_type __read_mostly = 1;
-+
-+#define rq_switch_time(rq)	((rq)->clock - (rq)->last_ts_switch)
-+#define boost_threshold(p)	(sched_timeslice_ns >>\
-+				 (10 - MAX_PRIORITY_ADJ -  (p)->boost_prio))
-+
-+static inline void boost_task(struct task_struct *p)
-+{
-+	int limit;
-+
-+	switch (p->policy) {
-+	case SCHED_NORMAL:
-+		limit = -MAX_PRIORITY_ADJ;
-+		break;
-+	case SCHED_BATCH:
-+	case SCHED_IDLE:
-+		limit = 0;
-+		break;
-+	default:
-+		return;
-+	}
-+
-+	if (p->boost_prio > limit)
-+		p->boost_prio--;
-+}
-+
-+static inline void deboost_task(struct task_struct *p)
-+{
-+	if (p->boost_prio < MAX_PRIORITY_ADJ)
-+		p->boost_prio++;
-+}
-+
-+#ifdef CONFIG_SMP
-+static cpumask_t sched_rq_pending_mask ____cacheline_aligned_in_smp;
-+
-+DEFINE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_CHK_LEVEL], sched_cpu_affinity_masks);
-+DEFINE_PER_CPU(cpumask_t *, sched_cpu_affinity_end_mask);
-+DEFINE_PER_CPU(cpumask_t *, sched_cpu_llc_mask);
-+
-+#ifdef CONFIG_SCHED_SMT
-+DEFINE_STATIC_KEY_FALSE(sched_smt_present);
-+EXPORT_SYMBOL_GPL(sched_smt_present);
-+#endif
-+
-+/*
-+ * Keep a unique ID per domain (we use the first CPUs number in the cpumask of
-+ * the domain), this allows us to quickly tell if two cpus are in the same cache
-+ * domain, see cpus_share_cache().
-+ */
-+DEFINE_PER_CPU(int, sd_llc_id);
-+#endif /* CONFIG_SMP */
-+
-+static DEFINE_MUTEX(sched_hotcpu_mutex);
-+
-+DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
-+
-+#ifndef prepare_arch_switch
-+# define prepare_arch_switch(next)	do { } while (0)
-+#endif
-+#ifndef finish_arch_post_lock_switch
-+# define finish_arch_post_lock_switch()	do { } while (0)
-+#endif
-+
-+#define IDLE_WM	(IDLE_TASK_SCHED_PRIO)
-+
-+static cpumask_t sched_sg_idle_mask ____cacheline_aligned_in_smp;
-+static cpumask_t sched_rq_watermark[SCHED_BITS] ____cacheline_aligned_in_smp;
-+
-+static inline void update_sched_rq_watermark(struct rq *rq)
-+{
-+	unsigned long watermark = find_first_bit(rq->queue.bitmap, SCHED_BITS);
-+	unsigned long last_wm = rq->watermark;
-+	unsigned long i;
-+	int cpu;
-+
-+	if (watermark == last_wm)
-+		return;
-+
-+	rq->watermark = watermark;
-+	cpu = cpu_of(rq);
-+	if (watermark < last_wm) {
-+		for (i = watermark + 1; i <= last_wm; i++)
-+			cpumask_andnot(&sched_rq_watermark[i],
-+				       &sched_rq_watermark[i], cpumask_of(cpu));
-+#ifdef CONFIG_SCHED_SMT
-+		if (!static_branch_likely(&sched_smt_present))
-+			return;
-+		if (IDLE_WM == last_wm)
-+			cpumask_andnot(&sched_sg_idle_mask,
-+				       &sched_sg_idle_mask, cpu_smt_mask(cpu));
-+#endif
-+		return;
-+	}
-+	/* last_wm < watermark */
-+	for (i = last_wm + 1; i <= watermark; i++)
-+		cpumask_set_cpu(cpu, &sched_rq_watermark[i]);
-+#ifdef CONFIG_SCHED_SMT
-+	if (!static_branch_likely(&sched_smt_present))
-+		return;
-+	if (IDLE_WM == watermark) {
-+		cpumask_t tmp;
-+		cpumask_and(&tmp, cpu_smt_mask(cpu), &sched_rq_watermark[IDLE_WM]);
-+		if (cpumask_equal(&tmp, cpu_smt_mask(cpu)))
-+			cpumask_or(&sched_sg_idle_mask, cpu_smt_mask(cpu),
-+				   &sched_sg_idle_mask);
-+	}
-+#endif
-+}
-+
-+static inline int task_sched_prio(struct task_struct *p)
-+{
-+	return (p->prio < MAX_RT_PRIO)? p->prio : p->prio + p->boost_prio;
-+}
-+
-+#include "bmq_imp.h"
-+
-+static inline struct task_struct *rq_runnable_task(struct rq *rq)
-+{
-+	struct task_struct *next = sched_rq_first_task(rq);
-+
-+	if (unlikely(next == rq->skip))
-+		next = sched_rq_next_task(next, rq);
-+
-+	return next;
-+}
-+
-+/*
-+ * Context: p->pi_lock
-+ */
-+static inline struct rq
-+*__task_access_lock(struct task_struct *p, raw_spinlock_t **plock)
-+{
-+	struct rq *rq;
-+	for (;;) {
-+		rq = task_rq(p);
-+		if (p->on_cpu || task_on_rq_queued(p)) {
-+			raw_spin_lock(&rq->lock);
-+			if (likely((p->on_cpu || task_on_rq_queued(p))
-+				   && rq == task_rq(p))) {
-+				*plock = &rq->lock;
-+				return rq;
-+			}
-+			raw_spin_unlock(&rq->lock);
-+		} else if (task_on_rq_migrating(p)) {
-+			do {
-+				cpu_relax();
-+			} while (unlikely(task_on_rq_migrating(p)));
-+		} else {
-+			*plock = NULL;
-+			return rq;
-+		}
-+	}
-+}
-+
-+static inline void
-+__task_access_unlock(struct task_struct *p, raw_spinlock_t *lock)
-+{
-+	if (NULL != lock)
-+		raw_spin_unlock(lock);
-+}
-+
-+static inline struct rq
-+*task_access_lock_irqsave(struct task_struct *p, raw_spinlock_t **plock,
-+			  unsigned long *flags)
-+{
-+	struct rq *rq;
-+	for (;;) {
-+		rq = task_rq(p);
-+		if (p->on_cpu || task_on_rq_queued(p)) {
-+			raw_spin_lock_irqsave(&rq->lock, *flags);
-+			if (likely((p->on_cpu || task_on_rq_queued(p))
-+				   && rq == task_rq(p))) {
-+				*plock = &rq->lock;
-+				return rq;
-+			}
-+			raw_spin_unlock_irqrestore(&rq->lock, *flags);
-+		} else if (task_on_rq_migrating(p)) {
-+			do {
-+				cpu_relax();
-+			} while (unlikely(task_on_rq_migrating(p)));
-+		} else {
-+			raw_spin_lock_irqsave(&p->pi_lock, *flags);
-+			if (likely(!p->on_cpu && !p->on_rq &&
-+				   rq == task_rq(p))) {
-+				*plock = &p->pi_lock;
-+				return rq;
-+			}
-+			raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
-+		}
-+	}
-+}
-+
-+static inline void
-+task_access_unlock_irqrestore(struct task_struct *p, raw_spinlock_t *lock,
-+			      unsigned long *flags)
-+{
-+	raw_spin_unlock_irqrestore(lock, *flags);
-+}
-+
-+/*
-+ * __task_rq_lock - lock the rq @p resides on.
-+ */
-+struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
-+	__acquires(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	lockdep_assert_held(&p->pi_lock);
-+
-+	for (;;) {
-+		rq = task_rq(p);
-+		raw_spin_lock(&rq->lock);
-+		if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
-+			return rq;
-+		raw_spin_unlock(&rq->lock);
-+
-+		while (unlikely(task_on_rq_migrating(p)))
-+			cpu_relax();
-+	}
-+}
-+
-+/*
-+ * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
-+ */
-+struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
-+	__acquires(p->pi_lock)
-+	__acquires(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	for (;;) {
-+		raw_spin_lock_irqsave(&p->pi_lock, rf->flags);
-+		rq = task_rq(p);
-+		raw_spin_lock(&rq->lock);
-+		/*
-+		 *	move_queued_task()		task_rq_lock()
-+		 *
-+		 *	ACQUIRE (rq->lock)
-+		 *	[S] ->on_rq = MIGRATING		[L] rq = task_rq()
-+		 *	WMB (__set_task_cpu())		ACQUIRE (rq->lock);
-+		 *	[S] ->cpu = new_cpu		[L] task_rq()
-+		 *					[L] ->on_rq
-+		 *	RELEASE (rq->lock)
-+		 *
-+		 * If we observe the old CPU in task_rq_lock(), the acquire of
-+		 * the old rq->lock will fully serialize against the stores.
-+		 *
-+		 * If we observe the new CPU in task_rq_lock(), the address
-+		 * dependency headed by '[L] rq = task_rq()' and the acquire
-+		 * will pair with the WMB to ensure we then also see migrating.
-+		 */
-+		if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
-+			return rq;
-+		}
-+		raw_spin_unlock(&rq->lock);
-+		raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
-+
-+		while (unlikely(task_on_rq_migrating(p)))
-+			cpu_relax();
-+	}
-+}
-+
-+static inline void
-+rq_lock_irqsave(struct rq *rq, struct rq_flags *rf)
-+	__acquires(rq->lock)
-+{
-+	raw_spin_lock_irqsave(&rq->lock, rf->flags);
-+}
-+
-+static inline void
-+rq_unlock_irqrestore(struct rq *rq, struct rq_flags *rf)
-+	__releases(rq->lock)
-+{
-+	raw_spin_unlock_irqrestore(&rq->lock, rf->flags);
-+}
-+
-+/*
-+ * RQ-clock updating methods:
-+ */
-+
-+static void update_rq_clock_task(struct rq *rq, s64 delta)
-+{
-+/*
-+ * In theory, the compile should just see 0 here, and optimize out the call
-+ * to sched_rt_avg_update. But I don't trust it...
-+ */
-+	s64 __maybe_unused steal = 0, irq_delta = 0;
-+
-+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-+	irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
-+
-+	/*
-+	 * Since irq_time is only updated on {soft,}irq_exit, we might run into
-+	 * this case when a previous update_rq_clock() happened inside a
-+	 * {soft,}irq region.
-+	 *
-+	 * When this happens, we stop ->clock_task and only update the
-+	 * prev_irq_time stamp to account for the part that fit, so that a next
-+	 * update will consume the rest. This ensures ->clock_task is
-+	 * monotonic.
-+	 *
-+	 * It does however cause some slight miss-attribution of {soft,}irq
-+	 * time, a more accurate solution would be to update the irq_time using
-+	 * the current rq->clock timestamp, except that would require using
-+	 * atomic ops.
-+	 */
-+	if (irq_delta > delta)
-+		irq_delta = delta;
-+
-+	rq->prev_irq_time += irq_delta;
-+	delta -= irq_delta;
-+#endif
-+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
-+	if (static_key_false((&paravirt_steal_rq_enabled))) {
-+		steal = paravirt_steal_clock(cpu_of(rq));
-+		steal -= rq->prev_steal_time_rq;
-+
-+		if (unlikely(steal > delta))
-+			steal = delta;
-+
-+		rq->prev_steal_time_rq += steal;
-+		delta -= steal;
-+	}
-+#endif
-+
-+	rq->clock_task += delta;
-+
-+#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
-+	if ((irq_delta + steal))
-+		update_irq_load_avg(rq, irq_delta + steal);
-+#endif
-+}
-+
-+static inline void update_rq_clock(struct rq *rq)
-+{
-+	s64 delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
-+
-+	if (unlikely(delta <= 0))
-+		return;
-+	rq->clock += delta;
-+	update_rq_clock_task(rq, delta);
-+}
-+
-+#ifdef CONFIG_NO_HZ_FULL
-+/*
-+ * Tick may be needed by tasks in the runqueue depending on their policy and
-+ * requirements. If tick is needed, lets send the target an IPI to kick it out
-+ * of nohz mode if necessary.
-+ */
-+static inline void sched_update_tick_dependency(struct rq *rq)
-+{
-+	int cpu;
-+
-+	if (!tick_nohz_full_enabled())
-+		return;
-+
-+	cpu = cpu_of(rq);
-+
-+	if (!tick_nohz_full_cpu(cpu))
-+		return;
-+
-+	if (rq->nr_running < 2)
-+		tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED);
-+	else
-+		tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
-+}
-+#else /* !CONFIG_NO_HZ_FULL */
-+static inline void sched_update_tick_dependency(struct rq *rq) { }
-+#endif
-+
-+/*
-+ * Add/Remove/Requeue task to/from the runqueue routines
-+ * Context: rq->lock
-+ */
-+static inline void dequeue_task(struct task_struct *p, struct rq *rq, int flags)
-+{
-+	lockdep_assert_held(&rq->lock);
-+
-+	WARN_ONCE(task_rq(p) != rq, "sched: dequeue task reside on cpu%d from cpu%d\n",
-+		  task_cpu(p), cpu_of(rq));
-+
-+	__SCHED_DEQUEUE_TASK(p, rq, flags, update_sched_rq_watermark(rq));
-+	--rq->nr_running;
-+#ifdef CONFIG_SMP
-+	if (1 == rq->nr_running)
-+		cpumask_clear_cpu(cpu_of(rq), &sched_rq_pending_mask);
-+#endif
-+
-+	sched_update_tick_dependency(rq);
-+}
-+
-+static inline void enqueue_task(struct task_struct *p, struct rq *rq, int flags)
-+{
-+	lockdep_assert_held(&rq->lock);
-+
-+	WARN_ONCE(task_rq(p) != rq, "sched: enqueue task reside on cpu%d to cpu%d\n",
-+		  task_cpu(p), cpu_of(rq));
-+
-+	__SCHED_ENQUEUE_TASK(p, rq, flags);
-+	update_sched_rq_watermark(rq);
-+	++rq->nr_running;
-+#ifdef CONFIG_SMP
-+	if (2 == rq->nr_running)
-+		cpumask_set_cpu(cpu_of(rq), &sched_rq_pending_mask);
-+#endif
-+
-+	sched_update_tick_dependency(rq);
-+
-+	/*
-+	 * If in_iowait is set, the code below may not trigger any cpufreq
-+	 * utilization updates, so do it here explicitly with the IOWAIT flag
-+	 * passed.
-+	 */
-+	if (p->in_iowait)
-+		cpufreq_update_util(rq, SCHED_CPUFREQ_IOWAIT);
-+}
-+
-+static inline void requeue_task(struct task_struct *p, struct rq *rq)
-+{
-+	lockdep_assert_held(&rq->lock);
-+	WARN_ONCE(task_rq(p) != rq, "sched: cpu[%d] requeue task reside on cpu%d\n",
-+		  cpu_of(rq), task_cpu(p));
-+
-+	__requeue_task(p, rq);
-+}
-+
-+/*
-+ * cmpxchg based fetch_or, macro so it works for different integer types
-+ */
-+#define fetch_or(ptr, mask)						\
-+	({								\
-+		typeof(ptr) _ptr = (ptr);				\
-+		typeof(mask) _mask = (mask);				\
-+		typeof(*_ptr) _old, _val = *_ptr;			\
-+									\
-+		for (;;) {						\
-+			_old = cmpxchg(_ptr, _val, _val | _mask);	\
-+			if (_old == _val)				\
-+				break;					\
-+			_val = _old;					\
-+		}							\
-+	_old;								\
-+})
-+
-+#if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
-+/*
-+ * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG,
-+ * this avoids any races wrt polling state changes and thereby avoids
-+ * spurious IPIs.
-+ */
-+static bool set_nr_and_not_polling(struct task_struct *p)
-+{
-+	struct thread_info *ti = task_thread_info(p);
-+	return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG);
-+}
-+
-+/*
-+ * Atomically set TIF_NEED_RESCHED if TIF_POLLING_NRFLAG is set.
-+ *
-+ * If this returns true, then the idle task promises to call
-+ * sched_ttwu_pending() and reschedule soon.
-+ */
-+static bool set_nr_if_polling(struct task_struct *p)
-+{
-+	struct thread_info *ti = task_thread_info(p);
-+	typeof(ti->flags) old, val = READ_ONCE(ti->flags);
-+
-+	for (;;) {
-+		if (!(val & _TIF_POLLING_NRFLAG))
-+			return false;
-+		if (val & _TIF_NEED_RESCHED)
-+			return true;
-+		old = cmpxchg(&ti->flags, val, val | _TIF_NEED_RESCHED);
-+		if (old == val)
-+			break;
-+		val = old;
-+	}
-+	return true;
-+}
-+
-+#else
-+static bool set_nr_and_not_polling(struct task_struct *p)
-+{
-+	set_tsk_need_resched(p);
-+	return true;
-+}
-+
-+#ifdef CONFIG_SMP
-+static bool set_nr_if_polling(struct task_struct *p)
-+{
-+	return false;
-+}
-+#endif
-+#endif
-+
-+static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task)
-+{
-+	struct wake_q_node *node = &task->wake_q;
-+
-+	/*
-+	 * Atomically grab the task, if ->wake_q is !nil already it means
-+	 * its already queued (either by us or someone else) and will get the
-+	 * wakeup due to that.
-+	 *
-+	 * In order to ensure that a pending wakeup will observe our pending
-+	 * state, even in the failed case, an explicit smp_mb() must be used.
-+	 */
-+	smp_mb__before_atomic();
-+	if (unlikely(cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL)))
-+		return false;
-+
-+	/*
-+	 * The head is context local, there can be no concurrency.
-+	 */
-+	*head->lastp = node;
-+	head->lastp = &node->next;
-+	return true;
-+}
-+
-+/**
-+ * wake_q_add() - queue a wakeup for 'later' waking.
-+ * @head: the wake_q_head to add @task to
-+ * @task: the task to queue for 'later' wakeup
-+ *
-+ * Queue a task for later wakeup, most likely by the wake_up_q() call in the
-+ * same context, _HOWEVER_ this is not guaranteed, the wakeup can come
-+ * instantly.
-+ *
-+ * This function must be used as-if it were wake_up_process(); IOW the task
-+ * must be ready to be woken at this location.
-+ */
-+void wake_q_add(struct wake_q_head *head, struct task_struct *task)
-+{
-+	if (__wake_q_add(head, task))
-+		get_task_struct(task);
-+}
-+
-+/**
-+ * wake_q_add_safe() - safely queue a wakeup for 'later' waking.
-+ * @head: the wake_q_head to add @task to
-+ * @task: the task to queue for 'later' wakeup
-+ *
-+ * Queue a task for later wakeup, most likely by the wake_up_q() call in the
-+ * same context, _HOWEVER_ this is not guaranteed, the wakeup can come
-+ * instantly.
-+ *
-+ * This function must be used as-if it were wake_up_process(); IOW the task
-+ * must be ready to be woken at this location.
-+ *
-+ * This function is essentially a task-safe equivalent to wake_q_add(). Callers
-+ * that already hold reference to @task can call the 'safe' version and trust
-+ * wake_q to do the right thing depending whether or not the @task is already
-+ * queued for wakeup.
-+ */
-+void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task)
-+{
-+	if (!__wake_q_add(head, task))
-+		put_task_struct(task);
-+}
-+
-+void wake_up_q(struct wake_q_head *head)
-+{
-+	struct wake_q_node *node = head->first;
-+
-+	while (node != WAKE_Q_TAIL) {
-+		struct task_struct *task;
-+
-+		task = container_of(node, struct task_struct, wake_q);
-+		BUG_ON(!task);
-+		/* task can safely be re-inserted now: */
-+		node = node->next;
-+		task->wake_q.next = NULL;
-+
-+		/*
-+		 * wake_up_process() executes a full barrier, which pairs with
-+		 * the queueing in wake_q_add() so as not to miss wakeups.
-+		 */
-+		wake_up_process(task);
-+		put_task_struct(task);
-+	}
-+}
-+
-+/*
-+ * resched_curr - mark rq's current task 'to be rescheduled now'.
-+ *
-+ * On UP this means the setting of the need_resched flag, on SMP it
-+ * might also involve a cross-CPU call to trigger the scheduler on
-+ * the target CPU.
-+ */
-+void resched_curr(struct rq *rq)
-+{
-+	struct task_struct *curr = rq->curr;
-+	int cpu;
-+
-+	lockdep_assert_held(&rq->lock);
-+
-+	if (test_tsk_need_resched(curr))
-+		return;
-+
-+	cpu = cpu_of(rq);
-+	if (cpu == smp_processor_id()) {
-+		set_tsk_need_resched(curr);
-+		set_preempt_need_resched();
-+		return;
-+	}
-+
-+	if (set_nr_and_not_polling(curr))
-+		smp_send_reschedule(cpu);
-+	else
-+		trace_sched_wake_idle_without_ipi(cpu);
-+}
-+
-+void resched_cpu(int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	raw_spin_lock_irqsave(&rq->lock, flags);
-+	if (cpu_online(cpu) || cpu == smp_processor_id())
-+		resched_curr(cpu_rq(cpu));
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+}
-+
-+#ifdef CONFIG_SMP
-+#ifdef CONFIG_NO_HZ_COMMON
-+void nohz_balance_enter_idle(int cpu)
-+{
-+}
-+
-+void select_nohz_load_balancer(int stop_tick)
-+{
-+}
-+
-+void set_cpu_sd_state_idle(void) {}
-+
-+/*
-+ * In the semi idle case, use the nearest busy CPU for migrating timers
-+ * from an idle CPU.  This is good for power-savings.
-+ *
-+ * We don't do similar optimization for completely idle system, as
-+ * selecting an idle CPU will add more delays to the timers than intended
-+ * (as that CPU's timer base may not be uptodate wrt jiffies etc).
-+ */
-+int get_nohz_timer_target(void)
-+{
-+	int i, cpu = smp_processor_id(), default_cpu = -1;
-+	struct cpumask *mask;
-+
-+	if (housekeeping_cpu(cpu, HK_FLAG_TIMER)) {
-+		if (!idle_cpu(cpu))
-+			return cpu;
-+		default_cpu = cpu;
-+	}
-+
-+	for (mask = &(per_cpu(sched_cpu_affinity_masks, cpu)[0]);
-+	     mask < per_cpu(sched_cpu_affinity_end_mask, cpu); mask++)
-+		for_each_cpu_and(i, mask, housekeeping_cpumask(HK_FLAG_TIMER))
-+			if (!idle_cpu(i))
-+				return i;
-+
-+	if (default_cpu == -1)
-+		default_cpu = housekeeping_any_cpu(HK_FLAG_TIMER);
-+	cpu = default_cpu;
-+
-+	return cpu;
-+}
-+
-+/*
-+ * When add_timer_on() enqueues a timer into the timer wheel of an
-+ * idle CPU then this timer might expire before the next timer event
-+ * which is scheduled to wake up that CPU. In case of a completely
-+ * idle system the next event might even be infinite time into the
-+ * future. wake_up_idle_cpu() ensures that the CPU is woken up and
-+ * leaves the inner idle loop so the newly added timer is taken into
-+ * account when the CPU goes back to idle and evaluates the timer
-+ * wheel for the next timer event.
-+ */
-+static inline void wake_up_idle_cpu(int cpu)
-+{
-+	if (cpu == smp_processor_id())
-+		return;
-+
-+	set_tsk_need_resched(cpu_rq(cpu)->idle);
-+	smp_send_reschedule(cpu);
-+}
-+
-+static inline bool wake_up_full_nohz_cpu(int cpu)
-+{
-+	/*
-+	 * We just need the target to call irq_exit() and re-evaluate
-+	 * the next tick. The nohz full kick at least implies that.
-+	 * If needed we can still optimize that later with an
-+	 * empty IRQ.
-+	 */
-+	if (tick_nohz_full_cpu(cpu)) {
-+		if (cpu != smp_processor_id() ||
-+		    tick_nohz_tick_stopped())
-+			tick_nohz_full_kick_cpu(cpu);
-+		return true;
-+	}
-+
-+	return false;
-+}
-+
-+void wake_up_nohz_cpu(int cpu)
-+{
-+	if (cpu_online(cpu) && !wake_up_full_nohz_cpu(cpu))
-+		wake_up_idle_cpu(cpu);
-+}
-+
-+static inline bool got_nohz_idle_kick(void)
-+{
-+	int cpu = smp_processor_id();
-+
-+	/* TODO: need to support nohz_flag
-+	if (!(atomic_read(nohz_flags(cpu)) & NOHZ_KICK_MASK))
-+		return false;
-+	*/
-+
-+	if (idle_cpu(cpu) && !need_resched())
-+		return true;
-+
-+	/*
-+	 * We can't run Idle Load Balance on this CPU for this time so we
-+	 * cancel it and clear NOHZ_BALANCE_KICK
-+	 */
-+	/* TODO: need to support nohz_flag
-+	atomic_andnot(NOHZ_KICK_MASK, nohz_flags(cpu));
-+	*/
-+	return false;
-+}
-+
-+#else /* CONFIG_NO_HZ_COMMON */
-+
-+static inline bool got_nohz_idle_kick(void)
-+{
-+	return false;
-+}
-+#endif /* CONFIG_NO_HZ_COMMON */
-+#endif /* CONFIG_SMP */
-+
-+static inline void check_preempt_curr(struct rq *rq)
-+{
-+	if (sched_rq_first_task(rq) != rq->curr)
-+		resched_curr(rq);
-+}
-+
-+#ifdef CONFIG_SCHED_HRTICK
-+/*
-+ * Use HR-timers to deliver accurate preemption points.
-+ */
-+
-+static void hrtick_clear(struct rq *rq)
-+{
-+	if (hrtimer_active(&rq->hrtick_timer))
-+		hrtimer_cancel(&rq->hrtick_timer);
-+}
-+
-+/*
-+ * High-resolution timer tick.
-+ * Runs from hardirq context with interrupts disabled.
-+ */
-+static enum hrtimer_restart hrtick(struct hrtimer *timer)
-+{
-+	struct rq *rq = container_of(timer, struct rq, hrtick_timer);
-+	struct task_struct *p;
-+
-+	WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
-+
-+	raw_spin_lock(&rq->lock);
-+	p = rq->curr;
-+	p->time_slice = 0;
-+	resched_curr(rq);
-+	raw_spin_unlock(&rq->lock);
-+
-+	return HRTIMER_NORESTART;
-+}
-+
-+/*
-+ * Use hrtick when:
-+ *  - enabled by features
-+ *  - hrtimer is actually high res
-+ */
-+static inline int hrtick_enabled(struct rq *rq)
-+{
-+	/**
-+	 * Alt schedule FW doesn't support sched_feat yet
-+	if (!sched_feat(HRTICK))
-+		return 0;
-+	*/
-+	if (!cpu_active(cpu_of(rq)))
-+		return 0;
-+	return hrtimer_is_hres_active(&rq->hrtick_timer);
-+}
-+
-+#ifdef CONFIG_SMP
-+
-+static void __hrtick_restart(struct rq *rq)
-+{
-+	struct hrtimer *timer = &rq->hrtick_timer;
-+
-+	hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED_HARD);
-+}
-+
-+/*
-+ * called from hardirq (IPI) context
-+ */
-+static void __hrtick_start(void *arg)
-+{
-+	struct rq *rq = arg;
-+
-+	raw_spin_lock(&rq->lock);
-+	__hrtick_restart(rq);
-+	raw_spin_unlock(&rq->lock);
-+}
-+
-+/*
-+ * Called to set the hrtick timer state.
-+ *
-+ * called with rq->lock held and irqs disabled
-+ */
-+void hrtick_start(struct rq *rq, u64 delay)
-+{
-+	struct hrtimer *timer = &rq->hrtick_timer;
-+	ktime_t time;
-+	s64 delta;
-+
-+	/*
-+	 * Don't schedule slices shorter than 10000ns, that just
-+	 * doesn't make sense and can cause timer DoS.
-+	 */
-+	delta = max_t(s64, delay, 10000LL);
-+	time = ktime_add_ns(timer->base->get_time(), delta);
-+
-+	hrtimer_set_expires(timer, time);
-+
-+	if (rq == this_rq())
-+		__hrtick_restart(rq);
-+	else
-+		smp_call_function_single_async(cpu_of(rq), &rq->hrtick_csd);
-+}
-+
-+#else
-+/*
-+ * Called to set the hrtick timer state.
-+ *
-+ * called with rq->lock held and irqs disabled
-+ */
-+void hrtick_start(struct rq *rq, u64 delay)
-+{
-+	/*
-+	 * Don't schedule slices shorter than 10000ns, that just
-+	 * doesn't make sense. Rely on vruntime for fairness.
-+	 */
-+	delay = max_t(u64, delay, 10000LL);
-+	hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay),
-+		      HRTIMER_MODE_REL_PINNED_HARD);
-+}
-+#endif /* CONFIG_SMP */
-+
-+static void hrtick_rq_init(struct rq *rq)
-+{
-+#ifdef CONFIG_SMP
-+	rq->hrtick_csd.flags = 0;
-+	rq->hrtick_csd.func = __hrtick_start;
-+	rq->hrtick_csd.info = rq;
-+#endif
-+
-+	hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
-+	rq->hrtick_timer.function = hrtick;
-+}
-+#else	/* CONFIG_SCHED_HRTICK */
-+static inline int hrtick_enabled(struct rq *rq)
-+{
-+	return 0;
-+}
-+
-+static inline void hrtick_clear(struct rq *rq)
-+{
-+}
-+
-+static inline void hrtick_rq_init(struct rq *rq)
-+{
-+}
-+#endif	/* CONFIG_SCHED_HRTICK */
-+
-+static inline int normal_prio(struct task_struct *p)
-+{
-+	if (task_has_rt_policy(p))
-+		return MAX_RT_PRIO - 1 - p->rt_priority;
-+
-+	return p->static_prio + MAX_PRIORITY_ADJ;
-+}
-+
-+/*
-+ * Calculate the current priority, i.e. the priority
-+ * taken into account by the scheduler. This value might
-+ * be boosted by RT tasks as it will be RT if the task got
-+ * RT-boosted. If not then it returns p->normal_prio.
-+ */
-+static int effective_prio(struct task_struct *p)
-+{
-+	p->normal_prio = normal_prio(p);
-+	/*
-+	 * If we are RT tasks or we were boosted to RT priority,
-+	 * keep the priority unchanged. Otherwise, update priority
-+	 * to the normal priority:
-+	 */
-+	if (!rt_prio(p->prio))
-+		return p->normal_prio;
-+	return p->prio;
-+}
-+
-+/*
-+ * activate_task - move a task to the runqueue.
-+ *
-+ * Context: rq->lock
-+ */
-+static void activate_task(struct task_struct *p, struct rq *rq)
-+{
-+	if (task_contributes_to_load(p))
-+		rq->nr_uninterruptible--;
-+	enqueue_task(p, rq, ENQUEUE_WAKEUP);
-+	p->on_rq = TASK_ON_RQ_QUEUED;
-+	cpufreq_update_util(rq, 0);
-+}
-+
-+/*
-+ * deactivate_task - remove a task from the runqueue.
-+ *
-+ * Context: rq->lock
-+ */
-+static inline void deactivate_task(struct task_struct *p, struct rq *rq)
-+{
-+	if (task_contributes_to_load(p))
-+		rq->nr_uninterruptible++;
-+	dequeue_task(p, rq, DEQUEUE_SLEEP);
-+	p->on_rq = 0;
-+	cpufreq_update_util(rq, 0);
-+}
-+
-+static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
-+{
-+#ifdef CONFIG_SMP
-+	/*
-+	 * After ->cpu is set up to a new value, task_access_lock(p, ...) can be
-+	 * successfully executed on another CPU. We must ensure that updates of
-+	 * per-task data have been completed by this moment.
-+	 */
-+	smp_wmb();
-+
-+#ifdef CONFIG_THREAD_INFO_IN_TASK
-+	WRITE_ONCE(p->cpu, cpu);
-+#else
-+	WRITE_ONCE(task_thread_info(p)->cpu, cpu);
-+#endif
-+#endif
-+}
-+
-+#ifdef CONFIG_SMP
-+void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
-+{
-+#ifdef CONFIG_SCHED_DEBUG
-+	/*
-+	 * We should never call set_task_cpu() on a blocked task,
-+	 * ttwu() will sort out the placement.
-+	 */
-+	WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING &&
-+		     !p->on_rq);
-+#ifdef CONFIG_LOCKDEP
-+	/*
-+	 * The caller should hold either p->pi_lock or rq->lock, when changing
-+	 * a task's CPU. ->pi_lock for waking tasks, rq->lock for runnable tasks.
-+	 *
-+	 * sched_move_task() holds both and thus holding either pins the cgroup,
-+	 * see task_group().
-+	 */
-+	WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) ||
-+				      lockdep_is_held(&task_rq(p)->lock)));
-+#endif
-+	/*
-+	 * Clearly, migrating tasks to offline CPUs is a fairly daft thing.
-+	 */
-+	WARN_ON_ONCE(!cpu_online(new_cpu));
-+#endif
-+	if (task_cpu(p) == new_cpu)
-+		return;
-+	trace_sched_migrate_task(p, new_cpu);
-+	rseq_migrate(p);
-+	perf_event_task_migrate(p);
-+
-+	__set_task_cpu(p, new_cpu);
-+}
-+
-+static inline bool is_per_cpu_kthread(struct task_struct *p)
-+{
-+	return ((p->flags & PF_KTHREAD) && (1 == p->nr_cpus_allowed));
-+}
-+
-+/*
-+ * Per-CPU kthreads are allowed to run on !active && online CPUs, see
-+ * __set_cpus_allowed_ptr() and select_fallback_rq().
-+ */
-+static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
-+{
-+	if (!cpumask_test_cpu(cpu, p->cpus_ptr))
-+		return false;
-+
-+	if (is_per_cpu_kthread(p))
-+		return cpu_online(cpu);
-+
-+	return cpu_active(cpu);
-+}
-+
-+/*
-+ * This is how migration works:
-+ *
-+ * 1) we invoke migration_cpu_stop() on the target CPU using
-+ *    stop_one_cpu().
-+ * 2) stopper starts to run (implicitly forcing the migrated thread
-+ *    off the CPU)
-+ * 3) it checks whether the migrated task is still in the wrong runqueue.
-+ * 4) if it's in the wrong runqueue then the migration thread removes
-+ *    it and puts it into the right queue.
-+ * 5) stopper completes and stop_one_cpu() returns and the migration
-+ *    is done.
-+ */
-+
-+/*
-+ * move_queued_task - move a queued task to new rq.
-+ *
-+ * Returns (locked) new rq. Old rq's lock is released.
-+ */
-+static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int
-+				   new_cpu)
-+{
-+	lockdep_assert_held(&rq->lock);
-+
-+	WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING);
-+	dequeue_task(p, rq, 0);
-+	set_task_cpu(p, new_cpu);
-+	raw_spin_unlock(&rq->lock);
-+
-+	rq = cpu_rq(new_cpu);
-+
-+	raw_spin_lock(&rq->lock);
-+	BUG_ON(task_cpu(p) != new_cpu);
-+	enqueue_task(p, rq, 0);
-+	p->on_rq = TASK_ON_RQ_QUEUED;
-+	check_preempt_curr(rq);
-+
-+	return rq;
-+}
-+
-+struct migration_arg {
-+	struct task_struct *task;
-+	int dest_cpu;
-+};
-+
-+/*
-+ * Move (not current) task off this CPU, onto the destination CPU. We're doing
-+ * this because either it can't run here any more (set_cpus_allowed()
-+ * away from this CPU, or CPU going down), or because we're
-+ * attempting to rebalance this task on exec (sched_exec).
-+ *
-+ * So we race with normal scheduler movements, but that's OK, as long
-+ * as the task is no longer on this CPU.
-+ */
-+static struct rq *__migrate_task(struct rq *rq, struct task_struct *p, int
-+				 dest_cpu)
-+{
-+	/* Affinity changed (again). */
-+	if (!is_cpu_allowed(p, dest_cpu))
-+		return rq;
-+
-+	update_rq_clock(rq);
-+	return move_queued_task(rq, p, dest_cpu);
-+}
-+
-+/*
-+ * migration_cpu_stop - this will be executed by a highprio stopper thread
-+ * and performs thread migration by bumping thread off CPU then
-+ * 'pushing' onto another runqueue.
-+ */
-+static int migration_cpu_stop(void *data)
-+{
-+	struct migration_arg *arg = data;
-+	struct task_struct *p = arg->task;
-+	struct rq *rq = this_rq();
-+
-+	/*
-+	 * The original target CPU might have gone down and we might
-+	 * be on another CPU but it doesn't matter.
-+	 */
-+	local_irq_disable();
-+	/*
-+	 * We need to explicitly wake pending tasks before running
-+	 * __migrate_task() such that we will not miss enforcing cpus_ptr
-+	 * during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test.
-+	 */
-+	sched_ttwu_pending();
-+
-+	raw_spin_lock(&p->pi_lock);
-+	raw_spin_lock(&rq->lock);
-+	/*
-+	 * If task_rq(p) != rq, it cannot be migrated here, because we're
-+	 * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because
-+	 * we're holding p->pi_lock.
-+	 */
-+	if (task_rq(p) == rq && task_on_rq_queued(p))
-+		rq = __migrate_task(rq, p, arg->dest_cpu);
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock(&p->pi_lock);
-+
-+	local_irq_enable();
-+	return 0;
-+}
-+
-+static inline void
-+set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	cpumask_copy(&p->cpus_mask, new_mask);
-+	p->nr_cpus_allowed = cpumask_weight(new_mask);
-+}
-+
-+void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	set_cpus_allowed_common(p, new_mask);
-+}
-+#endif
-+
-+/**
-+ * task_curr - is this task currently executing on a CPU?
-+ * @p: the task in question.
-+ *
-+ * Return: 1 if the task is currently executing. 0 otherwise.
-+ */
-+inline int task_curr(const struct task_struct *p)
-+{
-+	return cpu_curr(task_cpu(p)) == p;
-+}
-+
-+#ifdef CONFIG_SMP
-+/*
-+ * wait_task_inactive - wait for a thread to unschedule.
-+ *
-+ * If @match_state is nonzero, it's the @p->state value just checked and
-+ * not expected to change.  If it changes, i.e. @p might have woken up,
-+ * then return zero.  When we succeed in waiting for @p to be off its CPU,
-+ * we return a positive number (its total switch count).  If a second call
-+ * a short while later returns the same number, the caller can be sure that
-+ * @p has remained unscheduled the whole time.
-+ *
-+ * The caller must ensure that the task *will* unschedule sometime soon,
-+ * else this function might spin for a *long* time. This function can't
-+ * be called with interrupts off, or it may introduce deadlock with
-+ * smp_call_function() if an IPI is sent by the same process we are
-+ * waiting to become inactive.
-+ */
-+unsigned long wait_task_inactive(struct task_struct *p, long match_state)
-+{
-+	unsigned long flags;
-+	bool running, on_rq;
-+	unsigned long ncsw;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+
-+	for (;;) {
-+		rq = task_rq(p);
-+
-+		/*
-+		 * If the task is actively running on another CPU
-+		 * still, just relax and busy-wait without holding
-+		 * any locks.
-+		 *
-+		 * NOTE! Since we don't hold any locks, it's not
-+		 * even sure that "rq" stays as the right runqueue!
-+		 * But we don't care, since this will return false
-+		 * if the runqueue has changed and p is actually now
-+		 * running somewhere else!
-+		 */
-+		while (task_running(p) && p == rq->curr) {
-+			if (match_state && unlikely(p->state != match_state))
-+				return 0;
-+			cpu_relax();
-+		}
-+
-+		/*
-+		 * Ok, time to look more closely! We need the rq
-+		 * lock now, to be *sure*. If we're wrong, we'll
-+		 * just go back and repeat.
-+		 */
-+		task_access_lock_irqsave(p, &lock, &flags);
-+		trace_sched_wait_task(p);
-+		running = task_running(p);
-+		on_rq = p->on_rq;
-+		ncsw = 0;
-+		if (!match_state || p->state == match_state)
-+			ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
-+		task_access_unlock_irqrestore(p, lock, &flags);
-+
-+		/*
-+		 * If it changed from the expected state, bail out now.
-+		 */
-+		if (unlikely(!ncsw))
-+			break;
-+
-+		/*
-+		 * Was it really running after all now that we
-+		 * checked with the proper locks actually held?
-+		 *
-+		 * Oops. Go back and try again..
-+		 */
-+		if (unlikely(running)) {
-+			cpu_relax();
-+			continue;
-+		}
-+
-+		/*
-+		 * It's not enough that it's not actively running,
-+		 * it must be off the runqueue _entirely_, and not
-+		 * preempted!
-+		 *
-+		 * So if it was still runnable (but just not actively
-+		 * running right now), it's preempted, and we should
-+		 * yield - it could be a while.
-+		 */
-+		if (unlikely(on_rq)) {
-+			ktime_t to = NSEC_PER_SEC / HZ;
-+
-+			set_current_state(TASK_UNINTERRUPTIBLE);
-+			schedule_hrtimeout(&to, HRTIMER_MODE_REL);
-+			continue;
-+		}
-+
-+		/*
-+		 * Ahh, all good. It wasn't running, and it wasn't
-+		 * runnable, which means that it will never become
-+		 * running in the future either. We're all done!
-+		 */
-+		break;
-+	}
-+
-+	return ncsw;
-+}
-+
-+/***
-+ * kick_process - kick a running thread to enter/exit the kernel
-+ * @p: the to-be-kicked thread
-+ *
-+ * Cause a process which is running on another CPU to enter
-+ * kernel-mode, without any delay. (to get signals handled.)
-+ *
-+ * NOTE: this function doesn't have to take the runqueue lock,
-+ * because all it wants to ensure is that the remote task enters
-+ * the kernel. If the IPI races and the task has been migrated
-+ * to another CPU then no harm is done and the purpose has been
-+ * achieved as well.
-+ */
-+void kick_process(struct task_struct *p)
-+{
-+	int cpu;
-+
-+	preempt_disable();
-+	cpu = task_cpu(p);
-+	if ((cpu != smp_processor_id()) && task_curr(p))
-+		smp_send_reschedule(cpu);
-+	preempt_enable();
-+}
-+EXPORT_SYMBOL_GPL(kick_process);
-+
-+/*
-+ * ->cpus_ptr is protected by both rq->lock and p->pi_lock
-+ *
-+ * A few notes on cpu_active vs cpu_online:
-+ *
-+ *  - cpu_active must be a subset of cpu_online
-+ *
-+ *  - on CPU-up we allow per-CPU kthreads on the online && !active CPU,
-+ *    see __set_cpus_allowed_ptr(). At this point the newly online
-+ *    CPU isn't yet part of the sched domains, and balancing will not
-+ *    see it.
-+ *
-+ *  - on cpu-down we clear cpu_active() to mask the sched domains and
-+ *    avoid the load balancer to place new tasks on the to be removed
-+ *    CPU. Existing tasks will remain running there and will be taken
-+ *    off.
-+ *
-+ * This means that fallback selection must not select !active CPUs.
-+ * And can assume that any active CPU must be online. Conversely
-+ * select_task_rq() below may allow selection of !active CPUs in order
-+ * to satisfy the above rules.
-+ */
-+static int select_fallback_rq(int cpu, struct task_struct *p)
-+{
-+	int nid = cpu_to_node(cpu);
-+	const struct cpumask *nodemask = NULL;
-+	enum { cpuset, possible, fail } state = cpuset;
-+	int dest_cpu;
-+
-+	/*
-+	 * If the node that the CPU is on has been offlined, cpu_to_node()
-+	 * will return -1. There is no CPU on the node, and we should
-+	 * select the CPU on the other node.
-+	 */
-+	if (nid != -1) {
-+		nodemask = cpumask_of_node(nid);
-+
-+		/* Look for allowed, online CPU in same node. */
-+		for_each_cpu(dest_cpu, nodemask) {
-+			if (!cpu_active(dest_cpu))
-+				continue;
-+			if (cpumask_test_cpu(dest_cpu, p->cpus_ptr))
-+				return dest_cpu;
-+		}
-+	}
-+
-+	for (;;) {
-+		/* Any allowed, online CPU? */
-+		for_each_cpu(dest_cpu, p->cpus_ptr) {
-+			if (!is_cpu_allowed(p, dest_cpu))
-+				continue;
-+			goto out;
-+		}
-+
-+		/* No more Mr. Nice Guy. */
-+		switch (state) {
-+		case cpuset:
-+			if (IS_ENABLED(CONFIG_CPUSETS)) {
-+				cpuset_cpus_allowed_fallback(p);
-+				state = possible;
-+				break;
-+			}
-+			/* Fall-through */
-+		case possible:
-+			do_set_cpus_allowed(p, cpu_possible_mask);
-+			state = fail;
-+			break;
-+
-+		case fail:
-+			BUG();
-+			break;
-+		}
-+	}
-+
-+out:
-+	if (state != cpuset) {
-+		/*
-+		 * Don't tell them about moving exiting tasks or
-+		 * kernel threads (both mm NULL), since they never
-+		 * leave kernel.
-+		 */
-+		if (p->mm && printk_ratelimit()) {
-+			printk_deferred("process %d (%s) no longer affine to cpu%d\n",
-+					task_pid_nr(p), p->comm, cpu);
-+		}
-+	}
-+
-+	return dest_cpu;
-+}
-+
-+static inline int select_task_rq(struct task_struct *p)
-+{
-+	cpumask_t chk_mask, tmp;
-+
-+	if (unlikely(!cpumask_and(&chk_mask, p->cpus_ptr, cpu_online_mask)))
-+		return select_fallback_rq(task_cpu(p), p);
-+
-+	if (
-+#ifdef CONFIG_SCHED_SMT
-+	    cpumask_and(&tmp, &chk_mask, &sched_sg_idle_mask) ||
-+#endif
-+	    cpumask_and(&tmp, &chk_mask, &sched_rq_watermark[IDLE_WM]) ||
-+	    cpumask_and(&tmp, &chk_mask,
-+			&sched_rq_watermark[task_sched_prio(p) + 1]))
-+		return best_mask_cpu(task_cpu(p), &tmp);
-+
-+	return best_mask_cpu(task_cpu(p), &chk_mask);
-+}
-+
-+void sched_set_stop_task(int cpu, struct task_struct *stop)
-+{
-+	struct sched_param stop_param = { .sched_priority = STOP_PRIO };
-+	struct sched_param start_param = { .sched_priority = 0 };
-+	struct task_struct *old_stop = cpu_rq(cpu)->stop;
-+
-+	if (stop) {
-+		/*
-+		 * Make it appear like a SCHED_FIFO task, its something
-+		 * userspace knows about and won't get confused about.
-+		 *
-+		 * Also, it will make PI more or less work without too
-+		 * much confusion -- but then, stop work should not
-+		 * rely on PI working anyway.
-+		 */
-+		sched_setscheduler_nocheck(stop, SCHED_FIFO, &stop_param);
-+	}
-+
-+	cpu_rq(cpu)->stop = stop;
-+
-+	if (old_stop) {
-+		/*
-+		 * Reset it back to a normal scheduling policy so that
-+		 * it can die in pieces.
-+		 */
-+		sched_setscheduler_nocheck(old_stop, SCHED_NORMAL, &start_param);
-+	}
-+}
-+
-+/*
-+ * Change a given task's CPU affinity. Migrate the thread to a
-+ * proper CPU and schedule it away if the CPU it's executing on
-+ * is removed from the allowed bitmask.
-+ *
-+ * NOTE: the caller must have a valid reference to the task, the
-+ * task must not exit() & deallocate itself prematurely. The
-+ * call is not atomic; no spinlocks may be held.
-+ */
-+static int __set_cpus_allowed_ptr(struct task_struct *p,
-+				  const struct cpumask *new_mask, bool check)
-+{
-+	const struct cpumask *cpu_valid_mask = cpu_active_mask;
-+	int dest_cpu;
-+	unsigned long flags;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+	int ret = 0;
-+
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	rq = __task_access_lock(p, &lock);
-+
-+	if (p->flags & PF_KTHREAD) {
-+		/*
-+		 * Kernel threads are allowed on online && !active CPUs
-+		 */
-+		cpu_valid_mask = cpu_online_mask;
-+	}
-+
-+	/*
-+	 * Must re-check here, to close a race against __kthread_bind(),
-+	 * sched_setaffinity() is not guaranteed to observe the flag.
-+	 */
-+	if (check && (p->flags & PF_NO_SETAFFINITY)) {
-+		ret = -EINVAL;
-+		goto out;
-+	}
-+
-+	if (cpumask_equal(&p->cpus_mask, new_mask))
-+		goto out;
-+
-+	dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
-+	if (dest_cpu >= nr_cpu_ids) {
-+		ret = -EINVAL;
-+		goto out;
-+	}
-+
-+	do_set_cpus_allowed(p, new_mask);
-+
-+	if (p->flags & PF_KTHREAD) {
-+		/*
-+		 * For kernel threads that do indeed end up on online &&
-+		 * !active we want to ensure they are strict per-CPU threads.
-+		 */
-+		WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) &&
-+			!cpumask_intersects(new_mask, cpu_active_mask) &&
-+			p->nr_cpus_allowed != 1);
-+	}
-+
-+	/* Can the task run on the task's current CPU? If so, we're done */
-+	if (cpumask_test_cpu(task_cpu(p), new_mask))
-+		goto out;
-+
-+	if (task_running(p) || p->state == TASK_WAKING) {
-+		struct migration_arg arg = { p, dest_cpu };
-+
-+		/* Need help from migration thread: drop lock and wait. */
-+		__task_access_unlock(p, lock);
-+		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+		stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
-+		return 0;
-+	}
-+	if (task_on_rq_queued(p)) {
-+		/*
-+		 * OK, since we're going to drop the lock immediately
-+		 * afterwards anyway.
-+		 */
-+		update_rq_clock(rq);
-+		rq = move_queued_task(rq, p, dest_cpu);
-+		lock = &rq->lock;
-+	}
-+
-+out:
-+	__task_access_unlock(p, lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+
-+	return ret;
-+}
-+
-+int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	return __set_cpus_allowed_ptr(p, new_mask, false);
-+}
-+EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
-+
-+#else /* CONFIG_SMP */
-+
-+static inline int select_task_rq(struct task_struct *p)
-+{
-+	return 0;
-+}
-+
-+static inline int
-+__set_cpus_allowed_ptr(struct task_struct *p,
-+		       const struct cpumask *new_mask, bool check)
-+{
-+	return set_cpus_allowed_ptr(p, new_mask);
-+}
-+
-+#endif /* CONFIG_SMP */
-+
-+static void
-+ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
-+{
-+	struct rq *rq;
-+
-+	if (!schedstat_enabled())
-+		return;
-+
-+	rq= this_rq();
-+
-+#ifdef CONFIG_SMP
-+	if (cpu == rq->cpu)
-+		__schedstat_inc(rq->ttwu_local);
-+	else {
-+		/** Alt schedule FW ToDo:
-+		 * How to do ttwu_wake_remote
-+		 */
-+	}
-+#endif /* CONFIG_SMP */
-+
-+	__schedstat_inc(rq->ttwu_count);
-+}
-+
-+/*
-+ * Mark the task runnable and perform wakeup-preemption.
-+ */
-+static inline void
-+ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
-+{
-+	p->state = TASK_RUNNING;
-+	trace_sched_wakeup(p);
-+}
-+
-+static inline void
-+ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags)
-+{
-+#ifdef CONFIG_SMP
-+	if (p->sched_contributes_to_load)
-+		rq->nr_uninterruptible--;
-+#endif
-+
-+	activate_task(p, rq);
-+	ttwu_do_wakeup(rq, p, 0);
-+}
-+
-+static int ttwu_remote(struct task_struct *p, int wake_flags)
-+{
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+	int ret = 0;
-+
-+	rq = __task_access_lock(p, &lock);
-+	if (task_on_rq_queued(p)) {
-+		ttwu_do_wakeup(rq, p, wake_flags);
-+		ret = 1;
-+	}
-+	__task_access_unlock(p, lock);
-+
-+	return ret;
-+}
-+
-+#ifdef CONFIG_SMP
-+void sched_ttwu_pending(void)
-+{
-+	struct rq *rq = this_rq();
-+	struct llist_node *llist = llist_del_all(&rq->wake_list);
-+	struct task_struct *p, *t;
-+	struct rq_flags rf;
-+
-+	if (!llist)
-+		return;
-+
-+	rq_lock_irqsave(rq, &rf);
-+	update_rq_clock(rq);
-+
-+	llist_for_each_entry_safe(p, t, llist, wake_entry)
-+		ttwu_do_activate(rq, p, p->sched_remote_wakeup ? WF_MIGRATED : 0);
-+	check_preempt_curr(rq);
-+
-+	rq_unlock_irqrestore(rq, &rf);
-+}
-+
-+void scheduler_ipi(void)
-+{
-+	/*
-+	 * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting
-+	 * TIF_NEED_RESCHED remotely (for the first time) will also send
-+	 * this IPI.
-+	 */
-+	preempt_fold_need_resched();
-+
-+	if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick())
-+		return;
-+
-+	irq_enter();
-+	sched_ttwu_pending();
-+
-+	/*
-+	 * Check if someone kicked us for doing the nohz idle load balance.
-+	 */
-+	if (unlikely(got_nohz_idle_kick())) {
-+		/* TODO need to kick off balance
-+		this_rq()->idle_balance = 1;
-+		raise_softirq_irqoff(SCHED_SOFTIRQ);
-+		*/
-+	}
-+	irq_exit();
-+}
-+
-+static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	p->sched_remote_wakeup = !!(wake_flags & WF_MIGRATED);
-+
-+	if (llist_add(&p->wake_entry, &cpu_rq(cpu)->wake_list)) {
-+		if (!set_nr_if_polling(rq->idle))
-+			smp_send_reschedule(cpu);
-+		else
-+			trace_sched_wake_idle_without_ipi(cpu);
-+	}
-+}
-+
-+void wake_up_if_idle(int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	rcu_read_lock();
-+
-+	if (!is_idle_task(rcu_dereference(rq->curr)))
-+		goto out;
-+
-+	if (set_nr_if_polling(rq->idle)) {
-+		trace_sched_wake_idle_without_ipi(cpu);
-+	} else {
-+		raw_spin_lock_irqsave(&rq->lock, flags);
-+		if (is_idle_task(rq->curr))
-+			smp_send_reschedule(cpu);
-+		/* Else CPU is not idle, do nothing here */
-+		raw_spin_unlock_irqrestore(&rq->lock, flags);
-+	}
-+
-+out:
-+	rcu_read_unlock();
-+}
-+
-+bool cpus_share_cache(int this_cpu, int that_cpu)
-+{
-+	return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu);
-+}
-+#endif /* CONFIG_SMP */
-+
-+static inline void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+#if defined(CONFIG_SMP)
-+	if (!cpus_share_cache(smp_processor_id(), cpu)) {
-+		sched_clock_cpu(cpu); /* Sync clocks across CPUs */
-+		ttwu_queue_remote(p, cpu, wake_flags);
-+		return;
-+	}
-+#endif
-+
-+	raw_spin_lock(&rq->lock);
-+	update_rq_clock(rq);
-+	ttwu_do_activate(rq, p, wake_flags);
-+	check_preempt_curr(rq);
-+	raw_spin_unlock(&rq->lock);
-+}
-+
-+/*
-+ * Notes on Program-Order guarantees on SMP systems.
-+ *
-+ *  MIGRATION
-+ *
-+ * The basic program-order guarantee on SMP systems is that when a task [t]
-+ * migrates, all its activity on its old CPU [c0] happens-before any subsequent
-+ * execution on its new CPU [c1].
-+ *
-+ * For migration (of runnable tasks) this is provided by the following means:
-+ *
-+ *  A) UNLOCK of the rq(c0)->lock scheduling out task t
-+ *  B) migration for t is required to synchronize *both* rq(c0)->lock and
-+ *     rq(c1)->lock (if not at the same time, then in that order).
-+ *  C) LOCK of the rq(c1)->lock scheduling in task
-+ *
-+ * Transitivity guarantees that B happens after A and C after B.
-+ * Note: we only require RCpc transitivity.
-+ * Note: the CPU doing B need not be c0 or c1
-+ *
-+ * Example:
-+ *
-+ *   CPU0            CPU1            CPU2
-+ *
-+ *   LOCK rq(0)->lock
-+ *   sched-out X
-+ *   sched-in Y
-+ *   UNLOCK rq(0)->lock
-+ *
-+ *                                   LOCK rq(0)->lock // orders against CPU0
-+ *                                   dequeue X
-+ *                                   UNLOCK rq(0)->lock
-+ *
-+ *                                   LOCK rq(1)->lock
-+ *                                   enqueue X
-+ *                                   UNLOCK rq(1)->lock
-+ *
-+ *                   LOCK rq(1)->lock // orders against CPU2
-+ *                   sched-out Z
-+ *                   sched-in X
-+ *                   UNLOCK rq(1)->lock
-+ *
-+ *
-+ *  BLOCKING -- aka. SLEEP + WAKEUP
-+ *
-+ * For blocking we (obviously) need to provide the same guarantee as for
-+ * migration. However the means are completely different as there is no lock
-+ * chain to provide order. Instead we do:
-+ *
-+ *   1) smp_store_release(X->on_cpu, 0)
-+ *   2) smp_cond_load_acquire(!X->on_cpu)
-+ *
-+ * Example:
-+ *
-+ *   CPU0 (schedule)  CPU1 (try_to_wake_up) CPU2 (schedule)
-+ *
-+ *   LOCK rq(0)->lock LOCK X->pi_lock
-+ *   dequeue X
-+ *   sched-out X
-+ *   smp_store_release(X->on_cpu, 0);
-+ *
-+ *                    smp_cond_load_acquire(&X->on_cpu, !VAL);
-+ *                    X->state = WAKING
-+ *                    set_task_cpu(X,2)
-+ *
-+ *                    LOCK rq(2)->lock
-+ *                    enqueue X
-+ *                    X->state = RUNNING
-+ *                    UNLOCK rq(2)->lock
-+ *
-+ *                                          LOCK rq(2)->lock // orders against CPU1
-+ *                                          sched-out Z
-+ *                                          sched-in X
-+ *                                          UNLOCK rq(2)->lock
-+ *
-+ *                    UNLOCK X->pi_lock
-+ *   UNLOCK rq(0)->lock
-+ *
-+ *
-+ * However; for wakeups there is a second guarantee we must provide, namely we
-+ * must observe the state that lead to our wakeup. That is, not only must our
-+ * task observe its own prior state, it must also observe the stores prior to
-+ * its wakeup.
-+ *
-+ * This means that any means of doing remote wakeups must order the CPU doing
-+ * the wakeup against the CPU the task is going to end up running on. This,
-+ * however, is already required for the regular Program-Order guarantee above,
-+ * since the waking CPU is the one issueing the ACQUIRE (smp_cond_load_acquire).
-+ *
-+ */
-+
-+/***
-+ * try_to_wake_up - wake up a thread
-+ * @p: the thread to be awakened
-+ * @state: the mask of task states that can be woken
-+ * @wake_flags: wake modifier flags (WF_*)
-+ *
-+ * Put it on the run-queue if it's not already there. The "current"
-+ * thread is always on the run-queue (except when the actual
-+ * re-schedule is in progress), and as such you're allowed to do
-+ * the simpler "current->state = TASK_RUNNING" to mark yourself
-+ * runnable without the overhead of this.
-+ *
-+ * Return: %true if @p was woken up, %false if it was already running.
-+ * or @state didn't match @p's state.
-+ */
-+static int try_to_wake_up(struct task_struct *p, unsigned int state,
-+			  int wake_flags)
-+{
-+	unsigned long flags;
-+	int cpu, success = 0;
-+
-+	preempt_disable();
-+	if (p == current) {
-+		/*
-+		 * We're waking current, this means 'p->on_rq' and 'task_cpu(p)
-+		 * == smp_processor_id()'. Together this means we can special
-+		 * case the whole 'p->on_rq && ttwu_remote()' case below
-+		 * without taking any locks.
-+		 *
-+		 * In particular:
-+		 *  - we rely on Program-Order guarantees for all the ordering,
-+		 *  - we're serialized against set_special_state() by virtue of
-+		 *    it disabling IRQs (this allows not taking ->pi_lock).
-+		 */
-+		if (!(p->state & state))
-+			goto out;
-+
-+		success = 1;
-+		cpu = task_cpu(p);
-+		trace_sched_waking(p);
-+		p->state = TASK_RUNNING;
-+		trace_sched_wakeup(p);
-+		goto out;
-+	}
-+
-+	/*
-+	 * If we are going to wake up a thread waiting for CONDITION we
-+	 * need to ensure that CONDITION=1 done by the caller can not be
-+	 * reordered with p->state check below. This pairs with mb() in
-+	 * set_current_state() the waiting thread does.
-+	 */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	smp_mb__after_spinlock();
-+	if (!(p->state & state))
-+		goto unlock;
-+
-+	trace_sched_waking(p);
-+
-+	/* We're going to change ->state: */
-+	success = 1;
-+	cpu = task_cpu(p);
-+
-+	/*
-+	 * Ensure we load p->on_rq _after_ p->state, otherwise it would
-+	 * be possible to, falsely, observe p->on_rq == 0 and get stuck
-+	 * in smp_cond_load_acquire() below.
-+	 *
-+	 * sched_ttwu_pending()			try_to_wake_up()
-+	 *   STORE p->on_rq = 1			  LOAD p->state
-+	 *   UNLOCK rq->lock
-+	 *
-+	 * __schedule() (switch to task 'p')
-+	 *   LOCK rq->lock			  smp_rmb();
-+	 *   smp_mb__after_spinlock();
-+	 *   UNLOCK rq->lock
-+	 *
-+	 * [task p]
-+	 *   STORE p->state = UNINTERRUPTIBLE	  LOAD p->on_rq
-+	 *
-+	 * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
-+	 * __schedule().  See the comment for smp_mb__after_spinlock().
-+	 */
-+	smp_rmb();
-+	if (p->on_rq && ttwu_remote(p, wake_flags))
-+		goto unlock;
-+
-+#ifdef CONFIG_SMP
-+	/*
-+	 * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
-+	 * possible to, falsely, observe p->on_cpu == 0.
-+	 *
-+	 * One must be running (->on_cpu == 1) in order to remove oneself
-+	 * from the runqueue.
-+	 *
-+	 * __schedule() (switch to task 'p')	try_to_wake_up()
-+	 *   STORE p->on_cpu = 1		  LOAD p->on_rq
-+	 *   UNLOCK rq->lock
-+	 *
-+	 * __schedule() (put 'p' to sleep)
-+	 *   LOCK rq->lock			  smp_rmb();
-+	 *   smp_mb__after_spinlock();
-+	 *   STORE p->on_rq = 0			  LOAD p->on_cpu
-+	 *
-+	 * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
-+	 * __schedule().  See the comment for smp_mb__after_spinlock().
-+	 */
-+	smp_rmb();
-+
-+	/*
-+	 * If the owning (remote) CPU is still in the middle of schedule() with
-+	 * this task as prev, wait until its done referencing the task.
-+	 *
-+	 * Pairs with the smp_store_release() in finish_task().
-+	 *
-+	 * This ensures that tasks getting woken will be fully ordered against
-+	 * their previous state and preserve Program Order.
-+	 */
-+	smp_cond_load_acquire(&p->on_cpu, !VAL);
-+
-+	p->sched_contributes_to_load = !!task_contributes_to_load(p);
-+	p->state = TASK_WAKING;
-+
-+	if (p->in_iowait) {
-+		delayacct_blkio_end(p);
-+		atomic_dec(&task_rq(p)->nr_iowait);
-+	}
-+
-+	if(this_rq()->clock_task - p->last_ran > sched_timeslice_ns)
-+		boost_task(p);
-+
-+	cpu = select_task_rq(p);
-+
-+	if (cpu != task_cpu(p)) {
-+		wake_flags |= WF_MIGRATED;
-+		psi_ttwu_dequeue(p);
-+		set_task_cpu(p, cpu);
-+	}
-+#else /* CONFIG_SMP */
-+	if (p->in_iowait) {
-+		delayacct_blkio_end(p);
-+		atomic_dec(&task_rq(p)->nr_iowait);
-+	}
-+#endif /* CONFIG_SMP */
-+
-+	ttwu_queue(p, cpu, wake_flags);
-+unlock:
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+out:
-+	if (success)
-+		ttwu_stat(p, cpu, wake_flags);
-+	preempt_enable();
-+
-+	return success;
-+}
-+
-+/**
-+ * wake_up_process - Wake up a specific process
-+ * @p: The process to be woken up.
-+ *
-+ * Attempt to wake up the nominated process and move it to the set of runnable
-+ * processes.
-+ *
-+ * Return: 1 if the process was woken up, 0 if it was already running.
-+ *
-+ * This function executes a full memory barrier before accessing the task state.
-+ */
-+int wake_up_process(struct task_struct *p)
-+{
-+	return try_to_wake_up(p, TASK_NORMAL, 0);
-+}
-+EXPORT_SYMBOL(wake_up_process);
-+
-+int wake_up_state(struct task_struct *p, unsigned int state)
-+{
-+	return try_to_wake_up(p, state, 0);
-+}
-+
-+/*
-+ * Perform scheduler related setup for a newly forked process p.
-+ * p is forked by current.
-+ *
-+ * __sched_fork() is basic setup used by init_idle() too:
-+ */
-+static inline void __sched_fork(unsigned long clone_flags, struct task_struct *p)
-+{
-+	p->on_rq			= 0;
-+	p->on_cpu			= 0;
-+	p->utime			= 0;
-+	p->stime			= 0;
-+	p->sched_time			= 0;
-+
-+#ifdef CONFIG_PREEMPT_NOTIFIERS
-+	INIT_HLIST_HEAD(&p->preempt_notifiers);
-+#endif
-+
-+#ifdef CONFIG_COMPACTION
-+	p->capture_control = NULL;
-+#endif
-+}
-+
-+/*
-+ * fork()/clone()-time setup:
-+ */
-+int sched_fork(unsigned long clone_flags, struct task_struct *p)
-+{
-+	unsigned long flags;
-+	struct rq *rq;
-+
-+	__sched_fork(clone_flags, p);
-+	/*
-+	 * We mark the process as NEW here. This guarantees that
-+	 * nobody will actually run it, and a signal or other external
-+	 * event cannot wake it up and insert it on the runqueue either.
-+	 */
-+	p->state = TASK_NEW;
-+
-+	/*
-+	 * Make sure we do not leak PI boosting priority to the child.
-+	 */
-+	p->prio = current->normal_prio;
-+
-+	/*
-+	 * Revert to default priority/policy on fork if requested.
-+	 */
-+	if (unlikely(p->sched_reset_on_fork)) {
-+		if (task_has_rt_policy(p)) {
-+			p->policy = SCHED_NORMAL;
-+			p->static_prio = NICE_TO_PRIO(0);
-+			p->rt_priority = 0;
-+		} else if (PRIO_TO_NICE(p->static_prio) < 0)
-+			p->static_prio = NICE_TO_PRIO(0);
-+
-+		p->prio = p->normal_prio = normal_prio(p);
-+
-+		/*
-+		 * We don't need the reset flag anymore after the fork. It has
-+		 * fulfilled its duty:
-+		 */
-+		p->sched_reset_on_fork = 0;
-+	}
-+
-+	p->boost_prio = (p->boost_prio < 0) ?
-+		p->boost_prio + MAX_PRIORITY_ADJ : MAX_PRIORITY_ADJ;
-+	/*
-+	 * The child is not yet in the pid-hash so no cgroup attach races,
-+	 * and the cgroup is pinned to this child due to cgroup_fork()
-+	 * is ran before sched_fork().
-+	 *
-+	 * Silence PROVE_RCU.
-+	 */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	/*
-+	 * Share the timeslice between parent and child, thus the
-+	 * total amount of pending timeslices in the system doesn't change,
-+	 * resulting in more scheduling fairness.
-+	 */
-+	rq = this_rq();
-+	raw_spin_lock(&rq->lock);
-+	rq->curr->time_slice /= 2;
-+	p->time_slice = rq->curr->time_slice;
-+#ifdef CONFIG_SCHED_HRTICK
-+	hrtick_start(rq, rq->curr->time_slice);
-+#endif
-+
-+	if (p->time_slice < RESCHED_NS) {
-+		p->time_slice = sched_timeslice_ns;
-+		resched_curr(rq);
-+	}
-+	raw_spin_unlock(&rq->lock);
-+
-+	/*
-+	 * We're setting the CPU for the first time, we don't migrate,
-+	 * so use __set_task_cpu().
-+	 */
-+	__set_task_cpu(p, cpu_of(rq));
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+
-+#ifdef CONFIG_SCHED_INFO
-+	if (unlikely(sched_info_on()))
-+		memset(&p->sched_info, 0, sizeof(p->sched_info));
-+#endif
-+	init_task_preempt_count(p);
-+
-+	return 0;
-+}
-+
-+#ifdef CONFIG_SCHEDSTATS
-+
-+DEFINE_STATIC_KEY_FALSE(sched_schedstats);
-+static bool __initdata __sched_schedstats = false;
-+
-+static void set_schedstats(bool enabled)
-+{
-+	if (enabled)
-+		static_branch_enable(&sched_schedstats);
-+	else
-+		static_branch_disable(&sched_schedstats);
-+}
-+
-+void force_schedstat_enabled(void)
-+{
-+	if (!schedstat_enabled()) {
-+		pr_info("kernel profiling enabled schedstats, disable via kernel.sched_schedstats.\n");
-+		static_branch_enable(&sched_schedstats);
-+	}
-+}
-+
-+static int __init setup_schedstats(char *str)
-+{
-+	int ret = 0;
-+	if (!str)
-+		goto out;
-+
-+	/*
-+	 * This code is called before jump labels have been set up, so we can't
-+	 * change the static branch directly just yet.  Instead set a temporary
-+	 * variable so init_schedstats() can do it later.
-+	 */
-+	if (!strcmp(str, "enable")) {
-+		__sched_schedstats = true;
-+		ret = 1;
-+	} else if (!strcmp(str, "disable")) {
-+		__sched_schedstats = false;
-+		ret = 1;
-+	}
-+out:
-+	if (!ret)
-+		pr_warn("Unable to parse schedstats=\n");
-+
-+	return ret;
-+}
-+__setup("schedstats=", setup_schedstats);
-+
-+static void __init init_schedstats(void)
-+{
-+	set_schedstats(__sched_schedstats);
-+}
-+
-+#ifdef CONFIG_PROC_SYSCTL
-+int sysctl_schedstats(struct ctl_table *table, int write,
-+			 void __user *buffer, size_t *lenp, loff_t *ppos)
-+{
-+	struct ctl_table t;
-+	int err;
-+	int state = static_branch_likely(&sched_schedstats);
-+
-+	if (write && !capable(CAP_SYS_ADMIN))
-+		return -EPERM;
-+
-+	t = *table;
-+	t.data = &state;
-+	err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
-+	if (err < 0)
-+		return err;
-+	if (write)
-+		set_schedstats(state);
-+	return err;
-+}
-+#endif /* CONFIG_PROC_SYSCTL */
-+#else  /* !CONFIG_SCHEDSTATS */
-+static inline void init_schedstats(void) {}
-+#endif /* CONFIG_SCHEDSTATS */
-+
-+/*
-+ * wake_up_new_task - wake up a newly created task for the first time.
-+ *
-+ * This function will do some initial scheduler statistics housekeeping
-+ * that must be done for every newly created context, then puts the task
-+ * on the runqueue and wakes it.
-+ */
-+void wake_up_new_task(struct task_struct *p)
-+{
-+	unsigned long flags;
-+	struct rq *rq;
-+
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+
-+	p->state = TASK_RUNNING;
-+
-+	rq = cpu_rq(select_task_rq(p));
-+#ifdef CONFIG_SMP
-+	/*
-+	 * Fork balancing, do it here and not earlier because:
-+	 * - cpus_ptr can change in the fork path
-+	 * - any previously selected CPU might disappear through hotplug
-+	 * Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq,
-+	 * as we're not fully set-up yet.
-+	 */
-+	__set_task_cpu(p, cpu_of(rq));
-+#endif
-+
-+	raw_spin_lock(&rq->lock);
-+
-+	update_rq_clock(rq);
-+	activate_task(p, rq);
-+	trace_sched_wakeup_new(p);
-+	check_preempt_curr(rq);
-+
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+}
-+
-+#ifdef CONFIG_PREEMPT_NOTIFIERS
-+
-+static DEFINE_STATIC_KEY_FALSE(preempt_notifier_key);
-+
-+void preempt_notifier_inc(void)
-+{
-+	static_branch_inc(&preempt_notifier_key);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_inc);
-+
-+void preempt_notifier_dec(void)
-+{
-+	static_branch_dec(&preempt_notifier_key);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_dec);
-+
-+/**
-+ * preempt_notifier_register - tell me when current is being preempted & rescheduled
-+ * @notifier: notifier struct to register
-+ */
-+void preempt_notifier_register(struct preempt_notifier *notifier)
-+{
-+	if (!static_branch_unlikely(&preempt_notifier_key))
-+		WARN(1, "registering preempt_notifier while notifiers disabled\n");
-+
-+	hlist_add_head(&notifier->link, &current->preempt_notifiers);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_register);
-+
-+/**
-+ * preempt_notifier_unregister - no longer interested in preemption notifications
-+ * @notifier: notifier struct to unregister
-+ *
-+ * This is *not* safe to call from within a preemption notifier.
-+ */
-+void preempt_notifier_unregister(struct preempt_notifier *notifier)
-+{
-+	hlist_del(&notifier->link);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_unregister);
-+
-+static void __fire_sched_in_preempt_notifiers(struct task_struct *curr)
-+{
-+	struct preempt_notifier *notifier;
-+
-+	hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
-+		notifier->ops->sched_in(notifier, raw_smp_processor_id());
-+}
-+
-+static __always_inline void fire_sched_in_preempt_notifiers(struct task_struct *curr)
-+{
-+	if (static_branch_unlikely(&preempt_notifier_key))
-+		__fire_sched_in_preempt_notifiers(curr);
-+}
-+
-+static void
-+__fire_sched_out_preempt_notifiers(struct task_struct *curr,
-+				   struct task_struct *next)
-+{
-+	struct preempt_notifier *notifier;
-+
-+	hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
-+		notifier->ops->sched_out(notifier, next);
-+}
-+
-+static __always_inline void
-+fire_sched_out_preempt_notifiers(struct task_struct *curr,
-+				 struct task_struct *next)
-+{
-+	if (static_branch_unlikely(&preempt_notifier_key))
-+		__fire_sched_out_preempt_notifiers(curr, next);
-+}
-+
-+#else /* !CONFIG_PREEMPT_NOTIFIERS */
-+
-+static inline void fire_sched_in_preempt_notifiers(struct task_struct *curr)
-+{
-+}
-+
-+static inline void
-+fire_sched_out_preempt_notifiers(struct task_struct *curr,
-+				 struct task_struct *next)
-+{
-+}
-+
-+#endif /* CONFIG_PREEMPT_NOTIFIERS */
-+
-+static inline void prepare_task(struct task_struct *next)
-+{
-+	/*
-+	 * Claim the task as running, we do this before switching to it
-+	 * such that any running task will have this set.
-+	 */
-+	next->on_cpu = 1;
-+}
-+
-+static inline void finish_task(struct task_struct *prev)
-+{
-+#ifdef CONFIG_SMP
-+	/*
-+	 * After ->on_cpu is cleared, the task can be moved to a different CPU.
-+	 * We must ensure this doesn't happen until the switch is completely
-+	 * finished.
-+	 *
-+	 * In particular, the load of prev->state in finish_task_switch() must
-+	 * happen before this.
-+	 *
-+	 * Pairs with the smp_cond_load_acquire() in try_to_wake_up().
-+	 */
-+	smp_store_release(&prev->on_cpu, 0);
-+#else
-+	prev->on_cpu = 0;
-+#endif
-+}
-+
-+static inline void
-+prepare_lock_switch(struct rq *rq, struct task_struct *next)
-+{
-+	/*
-+	 * Since the runqueue lock will be released by the next
-+	 * task (which is an invalid locking op but in the case
-+	 * of the scheduler it's an obvious special-case), so we
-+	 * do an early lockdep release here:
-+	 */
-+	spin_release(&rq->lock.dep_map, _THIS_IP_);
-+#ifdef CONFIG_DEBUG_SPINLOCK
-+	/* this is a valid case when another task releases the spinlock */
-+	rq->lock.owner = next;
-+#endif
-+}
-+
-+static inline void finish_lock_switch(struct rq *rq)
-+{
-+	/*
-+	 * If we are tracking spinlock dependencies then we have to
-+	 * fix up the runqueue lock - which gets 'carried over' from
-+	 * prev into current:
-+	 */
-+	spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
-+	raw_spin_unlock_irq(&rq->lock);
-+}
-+
-+/**
-+ * prepare_task_switch - prepare to switch tasks
-+ * @rq: the runqueue preparing to switch
-+ * @next: the task we are going to switch to.
-+ *
-+ * This is called with the rq lock held and interrupts off. It must
-+ * be paired with a subsequent finish_task_switch after the context
-+ * switch.
-+ *
-+ * prepare_task_switch sets up locking and calls architecture specific
-+ * hooks.
-+ */
-+static inline void
-+prepare_task_switch(struct rq *rq, struct task_struct *prev,
-+		    struct task_struct *next)
-+{
-+	kcov_prepare_switch(prev);
-+	sched_info_switch(rq, prev, next);
-+	perf_event_task_sched_out(prev, next);
-+	rseq_preempt(prev);
-+	fire_sched_out_preempt_notifiers(prev, next);
-+	prepare_task(next);
-+	prepare_arch_switch(next);
-+}
-+
-+/**
-+ * finish_task_switch - clean up after a task-switch
-+ * @rq: runqueue associated with task-switch
-+ * @prev: the thread we just switched away from.
-+ *
-+ * finish_task_switch must be called after the context switch, paired
-+ * with a prepare_task_switch call before the context switch.
-+ * finish_task_switch will reconcile locking set up by prepare_task_switch,
-+ * and do any other architecture-specific cleanup actions.
-+ *
-+ * Note that we may have delayed dropping an mm in context_switch(). If
-+ * so, we finish that here outside of the runqueue lock.  (Doing it
-+ * with the lock held can cause deadlocks; see schedule() for
-+ * details.)
-+ *
-+ * The context switch have flipped the stack from under us and restored the
-+ * local variables which were saved when this task called schedule() in the
-+ * past. prev == current is still correct but we need to recalculate this_rq
-+ * because prev may have moved to another CPU.
-+ */
-+static struct rq *finish_task_switch(struct task_struct *prev)
-+	__releases(rq->lock)
-+{
-+	struct rq *rq = this_rq();
-+	struct mm_struct *mm = rq->prev_mm;
-+	long prev_state;
-+
-+	/*
-+	 * The previous task will have left us with a preempt_count of 2
-+	 * because it left us after:
-+	 *
-+	 *	schedule()
-+	 *	  preempt_disable();			// 1
-+	 *	  __schedule()
-+	 *	    raw_spin_lock_irq(&rq->lock)	// 2
-+	 *
-+	 * Also, see FORK_PREEMPT_COUNT.
-+	 */
-+	if (WARN_ONCE(preempt_count() != 2*PREEMPT_DISABLE_OFFSET,
-+		      "corrupted preempt_count: %s/%d/0x%x\n",
-+		      current->comm, current->pid, preempt_count()))
-+		preempt_count_set(FORK_PREEMPT_COUNT);
-+
-+	rq->prev_mm = NULL;
-+
-+	/*
-+	 * A task struct has one reference for the use as "current".
-+	 * If a task dies, then it sets TASK_DEAD in tsk->state and calls
-+	 * schedule one last time. The schedule call will never return, and
-+	 * the scheduled task must drop that reference.
-+	 *
-+	 * We must observe prev->state before clearing prev->on_cpu (in
-+	 * finish_task), otherwise a concurrent wakeup can get prev
-+	 * running on another CPU and we could rave with its RUNNING -> DEAD
-+	 * transition, resulting in a double drop.
-+	 */
-+	prev_state = prev->state;
-+	vtime_task_switch(prev);
-+	perf_event_task_sched_in(prev, current);
-+	finish_task(prev);
-+	finish_lock_switch(rq);
-+	finish_arch_post_lock_switch();
-+	kcov_finish_switch(current);
-+
-+	fire_sched_in_preempt_notifiers(current);
-+	/*
-+	 * When switching through a kernel thread, the loop in
-+	 * membarrier_{private,global}_expedited() may have observed that
-+	 * kernel thread and not issued an IPI. It is therefore possible to
-+	 * schedule between user->kernel->user threads without passing though
-+	 * switch_mm(). Membarrier requires a barrier after storing to
-+	 * rq->curr, before returning to userspace, so provide them here:
-+	 *
-+	 * - a full memory barrier for {PRIVATE,GLOBAL}_EXPEDITED, implicitly
-+	 *   provided by mmdrop(),
-+	 * - a sync_core for SYNC_CORE.
-+	 */
-+	if (mm) {
-+		membarrier_mm_sync_core_before_usermode(mm);
-+		mmdrop(mm);
-+	}
-+	if (unlikely(prev_state == TASK_DEAD)) {
-+		/*
-+		 * Remove function-return probe instances associated with this
-+		 * task and put them back on the free list.
-+		 */
-+		kprobe_flush_task(prev);
-+
-+		/* Task is done with its stack. */
-+		put_task_stack(prev);
-+
-+		put_task_struct_rcu_user(prev);
-+	}
-+
-+	tick_nohz_task_switch();
-+	return rq;
-+}
-+
-+/**
-+ * schedule_tail - first thing a freshly forked thread must call.
-+ * @prev: the thread we just switched away from.
-+ */
-+asmlinkage __visible void schedule_tail(struct task_struct *prev)
-+	__releases(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	/*
-+	 * New tasks start with FORK_PREEMPT_COUNT, see there and
-+	 * finish_task_switch() for details.
-+	 *
-+	 * finish_task_switch() will drop rq->lock() and lower preempt_count
-+	 * and the preempt_enable() will end up enabling preemption (on
-+	 * PREEMPT_COUNT kernels).
-+	 */
-+
-+	rq = finish_task_switch(prev);
-+	preempt_enable();
-+
-+	if (current->set_child_tid)
-+		put_user(task_pid_vnr(current), current->set_child_tid);
-+
-+	calculate_sigpending();
-+}
-+
-+/*
-+ * context_switch - switch to the new MM and the new thread's register state.
-+ */
-+static __always_inline struct rq *
-+context_switch(struct rq *rq, struct task_struct *prev,
-+	       struct task_struct *next)
-+{
-+	prepare_task_switch(rq, prev, next);
-+
-+	/*
-+	 * For paravirt, this is coupled with an exit in switch_to to
-+	 * combine the page table reload and the switch backend into
-+	 * one hypercall.
-+	 */
-+	arch_start_context_switch(prev);
-+
-+	/*
-+	 * kernel -> kernel   lazy + transfer active
-+	 *   user -> kernel   lazy + mmgrab() active
-+	 *
-+	 * kernel ->   user   switch + mmdrop() active
-+	 *   user ->   user   switch
-+	 */
-+	if (!next->mm) {                                // to kernel
-+		enter_lazy_tlb(prev->active_mm, next);
-+
-+		next->active_mm = prev->active_mm;
-+		if (prev->mm)                           // from user
-+			mmgrab(prev->active_mm);
-+		else
-+			prev->active_mm = NULL;
-+	} else {                                        // to user
-+		membarrier_switch_mm(rq, prev->active_mm, next->mm);
-+		/*
-+		 * sys_membarrier() requires an smp_mb() between setting
-+		 * rq->curr / membarrier_switch_mm() and returning to userspace.
-+		 *
-+		 * The below provides this either through switch_mm(), or in
-+		 * case 'prev->active_mm == next->mm' through
-+		 * finish_task_switch()'s mmdrop().
-+		 */
-+		switch_mm_irqs_off(prev->active_mm, next->mm, next);
-+
-+		if (!prev->mm) {                        // from kernel
-+			/* will mmdrop() in finish_task_switch(). */
-+			rq->prev_mm = prev->active_mm;
-+			prev->active_mm = NULL;
-+		}
-+	}
-+
-+	prepare_lock_switch(rq, next);
-+
-+	/* Here we just switch the register state and the stack. */
-+	switch_to(prev, next, prev);
-+	barrier();
-+
-+	return finish_task_switch(prev);
-+}
-+
-+/*
-+ * nr_running, nr_uninterruptible and nr_context_switches:
-+ *
-+ * externally visible scheduler statistics: current number of runnable
-+ * threads, total number of context switches performed since bootup.
-+ */
-+unsigned long nr_running(void)
-+{
-+	unsigned long i, sum = 0;
-+
-+	for_each_online_cpu(i)
-+		sum += cpu_rq(i)->nr_running;
-+
-+	return sum;
-+}
-+
-+/*
-+ * Check if only the current task is running on the CPU.
-+ *
-+ * Caution: this function does not check that the caller has disabled
-+ * preemption, thus the result might have a time-of-check-to-time-of-use
-+ * race.  The caller is responsible to use it correctly, for example:
-+ *
-+ * - from a non-preemptible section (of course)
-+ *
-+ * - from a thread that is bound to a single CPU
-+ *
-+ * - in a loop with very short iterations (e.g. a polling loop)
-+ */
-+bool single_task_running(void)
-+{
-+	return raw_rq()->nr_running == 1;
-+}
-+EXPORT_SYMBOL(single_task_running);
-+
-+unsigned long long nr_context_switches(void)
-+{
-+	int i;
-+	unsigned long long sum = 0;
-+
-+	for_each_possible_cpu(i)
-+		sum += cpu_rq(i)->nr_switches;
-+
-+	return sum;
-+}
-+
-+/*
-+ * Consumers of these two interfaces, like for example the cpuidle menu
-+ * governor, are using nonsensical data. Preferring shallow idle state selection
-+ * for a CPU that has IO-wait which might not even end up running the task when
-+ * it does become runnable.
-+ */
-+
-+unsigned long nr_iowait_cpu(int cpu)
-+{
-+	return atomic_read(&cpu_rq(cpu)->nr_iowait);
-+}
-+
-+/*
-+ * IO-wait accounting, and how its mostly bollocks (on SMP).
-+ *
-+ * The idea behind IO-wait account is to account the idle time that we could
-+ * have spend running if it were not for IO. That is, if we were to improve the
-+ * storage performance, we'd have a proportional reduction in IO-wait time.
-+ *
-+ * This all works nicely on UP, where, when a task blocks on IO, we account
-+ * idle time as IO-wait, because if the storage were faster, it could've been
-+ * running and we'd not be idle.
-+ *
-+ * This has been extended to SMP, by doing the same for each CPU. This however
-+ * is broken.
-+ *
-+ * Imagine for instance the case where two tasks block on one CPU, only the one
-+ * CPU will have IO-wait accounted, while the other has regular idle. Even
-+ * though, if the storage were faster, both could've ran at the same time,
-+ * utilising both CPUs.
-+ *
-+ * This means, that when looking globally, the current IO-wait accounting on
-+ * SMP is a lower bound, by reason of under accounting.
-+ *
-+ * Worse, since the numbers are provided per CPU, they are sometimes
-+ * interpreted per CPU, and that is nonsensical. A blocked task isn't strictly
-+ * associated with any one particular CPU, it can wake to another CPU than it
-+ * blocked on. This means the per CPU IO-wait number is meaningless.
-+ *
-+ * Task CPU affinities can make all that even more 'interesting'.
-+ */
-+
-+unsigned long nr_iowait(void)
-+{
-+	unsigned long i, sum = 0;
-+
-+	for_each_possible_cpu(i)
-+		sum += nr_iowait_cpu(i);
-+
-+	return sum;
-+}
-+
-+#ifdef CONFIG_SMP
-+
-+/*
-+ * sched_exec - execve() is a valuable balancing opportunity, because at
-+ * this point the task has the smallest effective memory and cache
-+ * footprint.
-+ */
-+void sched_exec(void)
-+{
-+	struct task_struct *p = current;
-+	int dest_cpu;
-+
-+	if (task_rq(p)->nr_running < 2)
-+		return;
-+
-+	dest_cpu = cpumask_any_and(p->cpus_ptr, &sched_rq_watermark[IDLE_WM]);
-+	if ( dest_cpu < nr_cpu_ids) {
-+#ifdef CONFIG_SCHED_SMT
-+		int smt = cpumask_any_and(p->cpus_ptr, &sched_sg_idle_mask);
-+		if (smt < nr_cpu_ids)
-+			dest_cpu = smt;
-+#endif
-+		if (likely(cpu_active(dest_cpu))) {
-+			struct migration_arg arg = { p, dest_cpu };
-+
-+			stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg);
-+			return;
-+		}
-+	}
-+}
-+
-+#endif
-+
-+DEFINE_PER_CPU(struct kernel_stat, kstat);
-+DEFINE_PER_CPU(struct kernel_cpustat, kernel_cpustat);
-+
-+EXPORT_PER_CPU_SYMBOL(kstat);
-+EXPORT_PER_CPU_SYMBOL(kernel_cpustat);
-+
-+static inline void update_curr(struct rq *rq, struct task_struct *p)
-+{
-+	s64 ns = rq->clock_task - p->last_ran;
-+
-+	p->sched_time += ns;
-+	account_group_exec_runtime(p, ns);
-+
-+	p->time_slice -= ns;
-+	p->last_ran = rq->clock_task;
-+}
-+
-+/*
-+ * Return accounted runtime for the task.
-+ * Return separately the current's pending runtime that have not been
-+ * accounted yet.
-+ */
-+unsigned long long task_sched_runtime(struct task_struct *p)
-+{
-+	unsigned long flags;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+	u64 ns;
-+
-+#if defined(CONFIG_64BIT) && defined(CONFIG_SMP)
-+	/*
-+	 * 64-bit doesn't need locks to atomically read a 64-bit value.
-+	 * So we have a optimization chance when the task's delta_exec is 0.
-+	 * Reading ->on_cpu is racy, but this is ok.
-+	 *
-+	 * If we race with it leaving CPU, we'll take a lock. So we're correct.
-+	 * If we race with it entering CPU, unaccounted time is 0. This is
-+	 * indistinguishable from the read occurring a few cycles earlier.
-+	 * If we see ->on_cpu without ->on_rq, the task is leaving, and has
-+	 * been accounted, so we're correct here as well.
-+	 */
-+	if (!p->on_cpu || !task_on_rq_queued(p))
-+		return tsk_seruntime(p);
-+#endif
-+
-+	rq = task_access_lock_irqsave(p, &lock, &flags);
-+	/*
-+	 * Must be ->curr _and_ ->on_rq.  If dequeued, we would
-+	 * project cycles that may never be accounted to this
-+	 * thread, breaking clock_gettime().
-+	 */
-+	if (p == rq->curr && task_on_rq_queued(p)) {
-+		update_rq_clock(rq);
-+		update_curr(rq, p);
-+	}
-+	ns = tsk_seruntime(p);
-+	task_access_unlock_irqrestore(p, lock, &flags);
-+
-+	return ns;
-+}
-+
-+DEFINE_PER_CPU(unsigned long, thermal_pressure);
-+
-+void arch_set_thermal_pressure(struct cpumask *cpus,
-+			       unsigned long th_pressure)
-+{
-+	int cpu;
-+
-+	for_each_cpu(cpu, cpus)
-+		WRITE_ONCE(per_cpu(thermal_pressure, cpu), th_pressure);
-+}
-+
-+/* This manages tasks that have run out of timeslice during a scheduler_tick */
-+static inline void scheduler_task_tick(struct rq *rq)
-+{
-+	struct task_struct *p = rq->curr;
-+
-+	if (is_idle_task(p))
-+		return;
-+
-+	update_curr(rq, p);
-+	cpufreq_update_util(rq, 0);
-+
-+	/*
-+	 * Tasks have less than RESCHED_NS of time slice left they will be
-+	 * rescheduled.
-+	 */
-+	if (p->time_slice >= RESCHED_NS)
-+		return;
-+	set_tsk_need_resched(p);
-+	set_preempt_need_resched();
-+}
-+
-+/*
-+ * This function gets called by the timer code, with HZ frequency.
-+ * We call it with interrupts disabled.
-+ */
-+void scheduler_tick(void)
-+{
-+	int cpu __maybe_unused = smp_processor_id();
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	arch_scale_freq_tick();
-+	sched_clock_tick();
-+
-+	raw_spin_lock(&rq->lock);
-+	update_rq_clock(rq);
-+
-+	scheduler_task_tick(rq);
-+	calc_global_load_tick(rq);
-+	psi_task_tick(rq);
-+
-+	rq->last_tick = rq->clock;
-+	raw_spin_unlock(&rq->lock);
-+
-+	perf_event_task_tick();
-+}
-+
-+#ifdef CONFIG_SCHED_SMT
-+static inline int active_load_balance_cpu_stop(void *data)
-+{
-+	struct rq *rq = this_rq();
-+	struct task_struct *p = data;
-+	cpumask_t tmp;
-+	unsigned long flags;
-+
-+	local_irq_save(flags);
-+
-+	raw_spin_lock(&p->pi_lock);
-+	raw_spin_lock(&rq->lock);
-+
-+	rq->active_balance = 0;
-+	/* _something_ may have changed the task, double check again */
-+	if (task_on_rq_queued(p) && task_rq(p) == rq &&
-+	    cpumask_and(&tmp, p->cpus_ptr, &sched_sg_idle_mask)) {
-+		int cpu = cpu_of(rq);
-+		int dcpu = __best_mask_cpu(cpu, &tmp,
-+					   per_cpu(sched_cpu_llc_mask, cpu));
-+		rq = move_queued_task(rq, p, dcpu);
-+	}
-+
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock(&p->pi_lock);
-+
-+	local_irq_restore(flags);
-+
-+	return 0;
-+}
-+
-+/* sg_balance_trigger - trigger slibing group balance for @cpu */
-+static inline int sg_balance_trigger(const int cpu)
-+{
-+	struct rq *rq= cpu_rq(cpu);
-+	unsigned long flags;
-+	struct task_struct *curr;
-+	int res;
-+
-+	if (!raw_spin_trylock_irqsave(&rq->lock, flags))
-+		return 0;
-+	curr = rq->curr;
-+	res = (!is_idle_task(curr)) && (1 == rq->nr_running) &&\
-+	      cpumask_intersects(curr->cpus_ptr, &sched_sg_idle_mask) &&\
-+	      (!rq->active_balance);
-+
-+	if (res)
-+		rq->active_balance = 1;
-+
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+
-+	if (res)
-+		stop_one_cpu_nowait(cpu, active_load_balance_cpu_stop,
-+				    curr, &rq->active_balance_work);
-+	return res;
-+}
-+
-+/*
-+ * sg_balance_check - slibing group balance check for run queue @rq
-+ */
-+static inline void sg_balance_check(struct rq *rq)
-+{
-+	cpumask_t chk;
-+	int cpu;
-+
-+	/* exit when no sg in idle */
-+	if (cpumask_empty(&sched_sg_idle_mask))
-+		return;
-+
-+	cpu = cpu_of(rq);
-+	/*
-+	 * Only cpu in slibing idle group will do the checking and then
-+	 * find potential cpus which can migrate the current running task
-+	 */
-+	if (cpumask_test_cpu(cpu, &sched_sg_idle_mask) &&
-+	    cpumask_andnot(&chk, cpu_online_mask, &sched_rq_pending_mask) &&
-+	    cpumask_andnot(&chk, &chk, &sched_rq_watermark[IDLE_WM])) {
-+		int i, tried = 0;
-+
-+		for_each_cpu_wrap(i, &chk, cpu) {
-+			if (cpumask_subset(cpu_smt_mask(i), &chk)) {
-+				if (sg_balance_trigger(i))
-+					return;
-+				if (tried)
-+					return;
-+				tried++;
-+			}
-+		}
-+	}
-+}
-+#endif /* CONFIG_SCHED_SMT */
-+
-+#ifdef CONFIG_NO_HZ_FULL
-+
-+struct tick_work {
-+	int			cpu;
-+	atomic_t		state;
-+	struct delayed_work	work;
-+};
-+/* Values for ->state, see diagram below. */
-+#define TICK_SCHED_REMOTE_OFFLINE	0
-+#define TICK_SCHED_REMOTE_OFFLINING	1
-+#define TICK_SCHED_REMOTE_RUNNING	2
-+
-+/*
-+ * State diagram for ->state:
-+ *
-+ *
-+ *          TICK_SCHED_REMOTE_OFFLINE
-+ *                    |   ^
-+ *                    |   |
-+ *                    |   | sched_tick_remote()
-+ *                    |   |
-+ *                    |   |
-+ *                    +--TICK_SCHED_REMOTE_OFFLINING
-+ *                    |   ^
-+ *                    |   |
-+ * sched_tick_start() |   | sched_tick_stop()
-+ *                    |   |
-+ *                    V   |
-+ *          TICK_SCHED_REMOTE_RUNNING
-+ *
-+ *
-+ * Other transitions get WARN_ON_ONCE(), except that sched_tick_remote()
-+ * and sched_tick_start() are happy to leave the state in RUNNING.
-+ */
-+
-+static struct tick_work __percpu *tick_work_cpu;
-+
-+static void sched_tick_remote(struct work_struct *work)
-+{
-+	struct delayed_work *dwork = to_delayed_work(work);
-+	struct tick_work *twork = container_of(dwork, struct tick_work, work);
-+	int cpu = twork->cpu;
-+	struct rq *rq = cpu_rq(cpu);
-+	struct task_struct *curr;
-+	unsigned long flags;
-+	u64 delta;
-+	int os;
-+
-+	/*
-+	 * Handle the tick only if it appears the remote CPU is running in full
-+	 * dynticks mode. The check is racy by nature, but missing a tick or
-+	 * having one too much is no big deal because the scheduler tick updates
-+	 * statistics and checks timeslices in a time-independent way, regardless
-+	 * of when exactly it is running.
-+	 */
-+	if (!tick_nohz_tick_stopped_cpu(cpu))
-+		goto out_requeue;
-+
-+	raw_spin_lock_irqsave(&rq->lock, flags);
-+	curr = rq->curr;
-+	if (cpu_is_offline(cpu))
-+		goto out_unlock;
-+
-+	update_rq_clock(rq);
-+	if (!is_idle_task(curr)) {
-+		/*
-+		 * Make sure the next tick runs within a reasonable
-+		 * amount of time.
-+		 */
-+		delta = rq_clock_task(rq) - curr->last_ran;
-+		WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
-+	}
-+	scheduler_task_tick(rq);
-+
-+	calc_load_nohz_remote(rq);
-+out_unlock:
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+
-+out_requeue:
-+	/*
-+	 * Run the remote tick once per second (1Hz). This arbitrary
-+	 * frequency is large enough to avoid overload but short enough
-+	 * to keep scheduler internal stats reasonably up to date.  But
-+	 * first update state to reflect hotplug activity if required.
-+	 */
-+	os = atomic_fetch_add_unless(&twork->state, -1, TICK_SCHED_REMOTE_RUNNING);
-+	WARN_ON_ONCE(os == TICK_SCHED_REMOTE_OFFLINE);
-+	if (os == TICK_SCHED_REMOTE_RUNNING)
-+		queue_delayed_work(system_unbound_wq, dwork, HZ);
-+}
-+
-+static void sched_tick_start(int cpu)
-+{
-+	int os;
-+	struct tick_work *twork;
-+
-+	if (housekeeping_cpu(cpu, HK_FLAG_TICK))
-+		return;
-+
-+	WARN_ON_ONCE(!tick_work_cpu);
-+
-+	twork = per_cpu_ptr(tick_work_cpu, cpu);
-+	os = atomic_xchg(&twork->state, TICK_SCHED_REMOTE_RUNNING);
-+	WARN_ON_ONCE(os == TICK_SCHED_REMOTE_RUNNING);
-+	if (os == TICK_SCHED_REMOTE_OFFLINE) {
-+		twork->cpu = cpu;
-+		INIT_DELAYED_WORK(&twork->work, sched_tick_remote);
-+		queue_delayed_work(system_unbound_wq, &twork->work, HZ);
-+	}
-+}
-+
-+#ifdef CONFIG_HOTPLUG_CPU
-+static void sched_tick_stop(int cpu)
-+{
-+	struct tick_work *twork;
-+
-+	if (housekeeping_cpu(cpu, HK_FLAG_TICK))
-+		return;
-+
-+	WARN_ON_ONCE(!tick_work_cpu);
-+
-+	twork = per_cpu_ptr(tick_work_cpu, cpu);
-+	cancel_delayed_work_sync(&twork->work);
-+}
-+#endif /* CONFIG_HOTPLUG_CPU */
-+
-+int __init sched_tick_offload_init(void)
-+{
-+	tick_work_cpu = alloc_percpu(struct tick_work);
-+	BUG_ON(!tick_work_cpu);
-+	return 0;
-+}
-+
-+#else /* !CONFIG_NO_HZ_FULL */
-+static inline void sched_tick_start(int cpu) { }
-+static inline void sched_tick_stop(int cpu) { }
-+#endif
-+
-+#if defined(CONFIG_PREEMPTION) && (defined(CONFIG_DEBUG_PREEMPT) || \
-+				defined(CONFIG_PREEMPT_TRACER))
-+/*
-+ * If the value passed in is equal to the current preempt count
-+ * then we just disabled preemption. Start timing the latency.
-+ */
-+static inline void preempt_latency_start(int val)
-+{
-+	if (preempt_count() == val) {
-+		unsigned long ip = get_lock_parent_ip();
-+#ifdef CONFIG_DEBUG_PREEMPT
-+		current->preempt_disable_ip = ip;
-+#endif
-+		trace_preempt_off(CALLER_ADDR0, ip);
-+	}
-+}
-+
-+void preempt_count_add(int val)
-+{
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	/*
-+	 * Underflow?
-+	 */
-+	if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
-+		return;
-+#endif
-+	__preempt_count_add(val);
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	/*
-+	 * Spinlock count overflowing soon?
-+	 */
-+	DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
-+				PREEMPT_MASK - 10);
-+#endif
-+	preempt_latency_start(val);
-+}
-+EXPORT_SYMBOL(preempt_count_add);
-+NOKPROBE_SYMBOL(preempt_count_add);
-+
-+/*
-+ * If the value passed in equals to the current preempt count
-+ * then we just enabled preemption. Stop timing the latency.
-+ */
-+static inline void preempt_latency_stop(int val)
-+{
-+	if (preempt_count() == val)
-+		trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
-+}
-+
-+void preempt_count_sub(int val)
-+{
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	/*
-+	 * Underflow?
-+	 */
-+	if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
-+		return;
-+	/*
-+	 * Is the spinlock portion underflowing?
-+	 */
-+	if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) &&
-+			!(preempt_count() & PREEMPT_MASK)))
-+		return;
-+#endif
-+
-+	preempt_latency_stop(val);
-+	__preempt_count_sub(val);
-+}
-+EXPORT_SYMBOL(preempt_count_sub);
-+NOKPROBE_SYMBOL(preempt_count_sub);
-+
-+#else
-+static inline void preempt_latency_start(int val) { }
-+static inline void preempt_latency_stop(int val) { }
-+#endif
-+
-+static inline unsigned long get_preempt_disable_ip(struct task_struct *p)
-+{
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	return p->preempt_disable_ip;
-+#else
-+	return 0;
-+#endif
-+}
-+
-+/*
-+ * Print scheduling while atomic bug:
-+ */
-+static noinline void __schedule_bug(struct task_struct *prev)
-+{
-+	/* Save this before calling printk(), since that will clobber it */
-+	unsigned long preempt_disable_ip = get_preempt_disable_ip(current);
-+
-+	if (oops_in_progress)
-+		return;
-+
-+	printk(KERN_ERR "BUG: scheduling while atomic: %s/%d/0x%08x\n",
-+		prev->comm, prev->pid, preempt_count());
-+
-+	debug_show_held_locks(prev);
-+	print_modules();
-+	if (irqs_disabled())
-+		print_irqtrace_events(prev);
-+	if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)
-+	    && in_atomic_preempt_off()) {
-+		pr_err("Preemption disabled at:");
-+		print_ip_sym(preempt_disable_ip);
-+		pr_cont("\n");
-+	}
-+	if (panic_on_warn)
-+		panic("scheduling while atomic\n");
-+
-+	dump_stack();
-+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+}
-+
-+/*
-+ * Various schedule()-time debugging checks and statistics:
-+ */
-+static inline void schedule_debug(struct task_struct *prev, bool preempt)
-+{
-+#ifdef CONFIG_SCHED_STACK_END_CHECK
-+	if (task_stack_end_corrupted(prev))
-+		panic("corrupted stack end detected inside scheduler\n");
-+#endif
-+
-+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-+	if (!preempt && prev->state && prev->non_block_count) {
-+		printk(KERN_ERR "BUG: scheduling in a non-blocking section: %s/%d/%i\n",
-+			prev->comm, prev->pid, prev->non_block_count);
-+		dump_stack();
-+		add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+	}
-+#endif
-+
-+	if (unlikely(in_atomic_preempt_off())) {
-+		__schedule_bug(prev);
-+		preempt_count_set(PREEMPT_DISABLED);
-+	}
-+	rcu_sleep_check();
-+
-+	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
-+
-+	schedstat_inc(this_rq()->sched_count);
-+}
-+
-+#ifdef	CONFIG_SMP
-+
-+#define SCHED_RQ_NR_MIGRATION (32UL)
-+/*
-+ * Migrate pending tasks in @rq to @dest_cpu
-+ * Will try to migrate mininal of half of @rq nr_running tasks and
-+ * SCHED_RQ_NR_MIGRATION to @dest_cpu
-+ */
-+static inline int
-+migrate_pending_tasks(struct rq *rq, struct rq *dest_rq, const int dest_cpu)
-+{
-+	struct task_struct *p, *skip = rq->curr;
-+	int nr_migrated = 0;
-+	int nr_tries = min(rq->nr_running / 2, SCHED_RQ_NR_MIGRATION);
-+
-+	while (skip != rq->idle && nr_tries &&
-+	       (p = sched_rq_next_task(skip, rq)) != rq->idle) {
-+		skip = sched_rq_next_task(p, rq);
-+		if (cpumask_test_cpu(dest_cpu, p->cpus_ptr)) {
-+			__SCHED_DEQUEUE_TASK(p, rq, 0, );
-+			set_task_cpu(p, dest_cpu);
-+			__SCHED_ENQUEUE_TASK(p, dest_rq, 0);
-+			nr_migrated++;
-+		}
-+		nr_tries--;
-+	}
-+
-+	return nr_migrated;
-+}
-+
-+static inline int take_other_rq_tasks(struct rq *rq, int cpu)
-+{
-+	struct cpumask *affinity_mask, *end_mask;
-+
-+	if (unlikely(!rq->online))
-+		return 0;
-+
-+	if (cpumask_empty(&sched_rq_pending_mask))
-+		return 0;
-+
-+	affinity_mask = &(per_cpu(sched_cpu_affinity_masks, cpu)[0]);
-+	end_mask = per_cpu(sched_cpu_affinity_end_mask, cpu);
-+	do {
-+		int i;
-+		for_each_cpu_and(i, &sched_rq_pending_mask, affinity_mask) {
-+			int nr_migrated;
-+			struct rq *src_rq;
-+
-+			src_rq = cpu_rq(i);
-+			if (!do_raw_spin_trylock(&src_rq->lock))
-+				continue;
-+			spin_acquire(&src_rq->lock.dep_map,
-+				     SINGLE_DEPTH_NESTING, 1, _RET_IP_);
-+
-+			if ((nr_migrated = migrate_pending_tasks(src_rq, rq, cpu))) {
-+				src_rq->nr_running -= nr_migrated;
-+#ifdef CONFIG_SMP
-+				if (src_rq->nr_running < 2)
-+					cpumask_clear_cpu(i, &sched_rq_pending_mask);
-+#endif
-+				rq->nr_running += nr_migrated;
-+#ifdef CONFIG_SMP
-+				if (rq->nr_running > 1)
-+					cpumask_set_cpu(cpu, &sched_rq_pending_mask);
-+#endif
-+				update_sched_rq_watermark(rq);
-+				cpufreq_update_util(rq, 0);
-+
-+				spin_release(&src_rq->lock.dep_map, _RET_IP_);
-+				do_raw_spin_unlock(&src_rq->lock);
-+
-+				return 1;
-+			}
-+
-+			spin_release(&src_rq->lock.dep_map, _RET_IP_);
-+			do_raw_spin_unlock(&src_rq->lock);
-+		}
-+	} while (++affinity_mask < end_mask);
-+
-+	return 0;
-+}
-+#endif
-+
-+/*
-+ * Timeslices below RESCHED_NS are considered as good as expired as there's no
-+ * point rescheduling when there's so little time left.
-+ */
-+static inline void check_curr(struct task_struct *p, struct rq *rq)
-+{
-+	if (unlikely(rq->idle == p))
-+		return;
-+
-+	update_curr(rq, p);
-+
-+	if (p->time_slice < RESCHED_NS) {
-+		p->time_slice = sched_timeslice_ns;
-+		if (SCHED_FIFO != p->policy && task_on_rq_queued(p)) {
-+			if (SCHED_RR != p->policy)
-+				deboost_task(p);
-+			requeue_task(p, rq);
-+		}
-+	}
-+}
-+
-+static inline struct task_struct *
-+choose_next_task(struct rq *rq, int cpu, struct task_struct *prev)
-+{
-+	struct task_struct *next;
-+
-+	if (unlikely(rq->skip)) {
-+		next = rq_runnable_task(rq);
-+		if (next == rq->idle) {
-+#ifdef	CONFIG_SMP
-+			if (!take_other_rq_tasks(rq, cpu)) {
-+#endif
-+				rq->skip = NULL;
-+				schedstat_inc(rq->sched_goidle);
-+				return next;
-+#ifdef	CONFIG_SMP
-+			}
-+			next = rq_runnable_task(rq);
-+#endif
-+		}
-+		rq->skip = NULL;
-+#ifdef CONFIG_HIGH_RES_TIMERS
-+		hrtick_start(rq, next->time_slice);
-+#endif
-+		return next;
-+	}
-+
-+	next = sched_rq_first_task(rq);
-+	if (next == rq->idle) {
-+#ifdef	CONFIG_SMP
-+		if (!take_other_rq_tasks(rq, cpu)) {
-+#endif
-+			schedstat_inc(rq->sched_goidle);
-+			return next;
-+#ifdef	CONFIG_SMP
-+		}
-+		next = sched_rq_first_task(rq);
-+#endif
-+	}
-+#ifdef CONFIG_HIGH_RES_TIMERS
-+	hrtick_start(rq, next->time_slice);
-+#endif
-+	return next;
-+}
-+
-+/*
-+ * schedule() is the main scheduler function.
-+ *
-+ * The main means of driving the scheduler and thus entering this function are:
-+ *
-+ *   1. Explicit blocking: mutex, semaphore, waitqueue, etc.
-+ *
-+ *   2. TIF_NEED_RESCHED flag is checked on interrupt and userspace return
-+ *      paths. For example, see arch/x86/entry_64.S.
-+ *
-+ *      To drive preemption between tasks, the scheduler sets the flag in timer
-+ *      interrupt handler scheduler_tick().
-+ *
-+ *   3. Wakeups don't really cause entry into schedule(). They add a
-+ *      task to the run-queue and that's it.
-+ *
-+ *      Now, if the new task added to the run-queue preempts the current
-+ *      task, then the wakeup sets TIF_NEED_RESCHED and schedule() gets
-+ *      called on the nearest possible occasion:
-+ *
-+ *       - If the kernel is preemptible (CONFIG_PREEMPTION=y):
-+ *
-+ *         - in syscall or exception context, at the next outmost
-+ *           preempt_enable(). (this might be as soon as the wake_up()'s
-+ *           spin_unlock()!)
-+ *
-+ *         - in IRQ context, return from interrupt-handler to
-+ *           preemptible context
-+ *
-+ *       - If the kernel is not preemptible (CONFIG_PREEMPTION is not set)
-+ *         then at the next:
-+ *
-+ *          - cond_resched() call
-+ *          - explicit schedule() call
-+ *          - return from syscall or exception to user-space
-+ *          - return from interrupt-handler to user-space
-+ *
-+ * WARNING: must be called with preemption disabled!
-+ */
-+static void __sched notrace __schedule(bool preempt)
-+{
-+	struct task_struct *prev, *next;
-+	unsigned long *switch_count;
-+	struct rq *rq;
-+	int cpu;
-+
-+	cpu = smp_processor_id();
-+	rq = cpu_rq(cpu);
-+	prev = rq->curr;
-+
-+	schedule_debug(prev, preempt);
-+
-+	/* by passing sched_feat(HRTICK) checking which Alt schedule FW doesn't support */
-+	hrtick_clear(rq);
-+
-+	local_irq_disable();
-+	rcu_note_context_switch(preempt);
-+
-+	/*
-+	 * Make sure that signal_pending_state()->signal_pending() below
-+	 * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
-+	 * done by the caller to avoid the race with signal_wake_up().
-+	 *
-+	 * The membarrier system call requires a full memory barrier
-+	 * after coming from user-space, before storing to rq->curr.
-+	 */
-+	raw_spin_lock(&rq->lock);
-+	smp_mb__after_spinlock();
-+
-+	update_rq_clock(rq);
-+
-+	switch_count = &prev->nivcsw;
-+	if (!preempt && prev->state) {
-+		if (signal_pending_state(prev->state, prev)) {
-+			prev->state = TASK_RUNNING;
-+		} else {
-+			if (rq_switch_time(rq) < boost_threshold(prev))
-+				boost_task(prev);
-+			deactivate_task(prev, rq);
-+
-+			if (prev->in_iowait) {
-+				atomic_inc(&rq->nr_iowait);
-+				delayacct_blkio_start();
-+			}
-+		}
-+		switch_count = &prev->nvcsw;
-+	}
-+
-+	clear_tsk_need_resched(prev);
-+	clear_preempt_need_resched();
-+
-+	check_curr(prev, rq);
-+
-+	next = choose_next_task(rq, cpu, prev);
-+
-+	if (likely(prev != next)) {
-+		next->last_ran = rq->clock_task;
-+		rq->last_ts_switch = rq->clock;
-+
-+		rq->nr_switches++;
-+		/*
-+		 * RCU users of rcu_dereference(rq->curr) may not see
-+		 * changes to task_struct made by pick_next_task().
-+		 */
-+		RCU_INIT_POINTER(rq->curr, next);
-+		/*
-+		 * The membarrier system call requires each architecture
-+		 * to have a full memory barrier after updating
-+		 * rq->curr, before returning to user-space.
-+		 *
-+		 * Here are the schemes providing that barrier on the
-+		 * various architectures:
-+		 * - mm ? switch_mm() : mmdrop() for x86, s390, sparc, PowerPC.
-+		 *   switch_mm() rely on membarrier_arch_switch_mm() on PowerPC.
-+		 * - finish_lock_switch() for weakly-ordered
-+		 *   architectures where spin_unlock is a full barrier,
-+		 * - switch_to() for arm64 (weakly-ordered, spin_unlock
-+		 *   is a RELEASE barrier),
-+		 */
-+		++*switch_count;
-+
-+		psi_sched_switch(prev, next, !task_on_rq_queued(prev));
-+
-+		trace_sched_switch(preempt, prev, next);
-+
-+		/* Also unlocks the rq: */
-+		rq = context_switch(rq, prev, next);
-+	} else
-+		raw_spin_unlock_irq(&rq->lock);
-+
-+#ifdef CONFIG_SCHED_SMT
-+	sg_balance_check(rq);
-+#endif
-+}
-+
-+void __noreturn do_task_dead(void)
-+{
-+	/* Causes final put_task_struct in finish_task_switch(): */
-+	set_special_state(TASK_DEAD);
-+
-+	/* Tell freezer to ignore us: */
-+	current->flags |= PF_NOFREEZE;
-+
-+	__schedule(false);
-+	BUG();
-+
-+	/* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */
-+	for (;;)
-+		cpu_relax();
-+}
-+
-+static inline void sched_submit_work(struct task_struct *tsk)
-+{
-+	if (!tsk->state)
-+		return;
-+
-+	/*
-+	 * If a worker went to sleep, notify and ask workqueue whether
-+	 * it wants to wake up a task to maintain concurrency.
-+	 * As this function is called inside the schedule() context,
-+	 * we disable preemption to avoid it calling schedule() again
-+	 * in the possible wakeup of a kworker and because wq_worker_sleeping()
-+	 * requires it.
-+	 */
-+	if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) {
-+		preempt_disable();
-+		if (tsk->flags & PF_WQ_WORKER)
-+			wq_worker_sleeping(tsk);
-+		else
-+			io_wq_worker_sleeping(tsk);
-+		preempt_enable_no_resched();
-+	}
-+
-+	if (tsk_is_pi_blocked(tsk))
-+		return;
-+
-+	/*
-+	 * If we are going to sleep and we have plugged IO queued,
-+	 * make sure to submit it to avoid deadlocks.
-+	 */
-+	if (blk_needs_flush_plug(tsk))
-+		blk_schedule_flush_plug(tsk);
-+}
-+
-+static void sched_update_worker(struct task_struct *tsk)
-+{
-+	if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) {
-+		if (tsk->flags & PF_WQ_WORKER)
-+			wq_worker_running(tsk);
-+		else
-+			io_wq_worker_running(tsk);
-+	}
-+}
-+
-+asmlinkage __visible void __sched schedule(void)
-+{
-+	struct task_struct *tsk = current;
-+
-+	sched_submit_work(tsk);
-+	do {
-+		preempt_disable();
-+		__schedule(false);
-+		sched_preempt_enable_no_resched();
-+	} while (need_resched());
-+	sched_update_worker(tsk);
-+}
-+EXPORT_SYMBOL(schedule);
-+
-+/*
-+ * synchronize_rcu_tasks() makes sure that no task is stuck in preempted
-+ * state (have scheduled out non-voluntarily) by making sure that all
-+ * tasks have either left the run queue or have gone into user space.
-+ * As idle tasks do not do either, they must not ever be preempted
-+ * (schedule out non-voluntarily).
-+ *
-+ * schedule_idle() is similar to schedule_preempt_disable() except that it
-+ * never enables preemption because it does not call sched_submit_work().
-+ */
-+void __sched schedule_idle(void)
-+{
-+	/*
-+	 * As this skips calling sched_submit_work(), which the idle task does
-+	 * regardless because that function is a nop when the task is in a
-+	 * TASK_RUNNING state, make sure this isn't used someplace that the
-+	 * current task can be in any other state. Note, idle is always in the
-+	 * TASK_RUNNING state.
-+	 */
-+	WARN_ON_ONCE(current->state);
-+	do {
-+		__schedule(false);
-+	} while (need_resched());
-+}
-+
-+#ifdef CONFIG_CONTEXT_TRACKING
-+asmlinkage __visible void __sched schedule_user(void)
-+{
-+	/*
-+	 * If we come here after a random call to set_need_resched(),
-+	 * or we have been woken up remotely but the IPI has not yet arrived,
-+	 * we haven't yet exited the RCU idle mode. Do it here manually until
-+	 * we find a better solution.
-+	 *
-+	 * NB: There are buggy callers of this function.  Ideally we
-+	 * should warn if prev_state != CONTEXT_USER, but that will trigger
-+	 * too frequently to make sense yet.
-+	 */
-+	enum ctx_state prev_state = exception_enter();
-+	schedule();
-+	exception_exit(prev_state);
-+}
-+#endif
-+
-+/**
-+ * schedule_preempt_disabled - called with preemption disabled
-+ *
-+ * Returns with preemption disabled. Note: preempt_count must be 1
-+ */
-+void __sched schedule_preempt_disabled(void)
-+{
-+	sched_preempt_enable_no_resched();
-+	schedule();
-+	preempt_disable();
-+}
-+
-+static void __sched notrace preempt_schedule_common(void)
-+{
-+	do {
-+		/*
-+		 * Because the function tracer can trace preempt_count_sub()
-+		 * and it also uses preempt_enable/disable_notrace(), if
-+		 * NEED_RESCHED is set, the preempt_enable_notrace() called
-+		 * by the function tracer will call this function again and
-+		 * cause infinite recursion.
-+		 *
-+		 * Preemption must be disabled here before the function
-+		 * tracer can trace. Break up preempt_disable() into two
-+		 * calls. One to disable preemption without fear of being
-+		 * traced. The other to still record the preemption latency,
-+		 * which can also be traced by the function tracer.
-+		 */
-+		preempt_disable_notrace();
-+		preempt_latency_start(1);
-+		__schedule(true);
-+		preempt_latency_stop(1);
-+		preempt_enable_no_resched_notrace();
-+
-+		/*
-+		 * Check again in case we missed a preemption opportunity
-+		 * between schedule and now.
-+		 */
-+	} while (need_resched());
-+}
-+
-+#ifdef CONFIG_PREEMPTION
-+/*
-+ * This is the entry point to schedule() from in-kernel preemption
-+ * off of preempt_enable.
-+ */
-+asmlinkage __visible void __sched notrace preempt_schedule(void)
-+{
-+	/*
-+	 * If there is a non-zero preempt_count or interrupts are disabled,
-+	 * we do not want to preempt the current task. Just return..
-+	 */
-+	if (likely(!preemptible()))
-+		return;
-+
-+	preempt_schedule_common();
-+}
-+NOKPROBE_SYMBOL(preempt_schedule);
-+EXPORT_SYMBOL(preempt_schedule);
-+
-+/**
-+ * preempt_schedule_notrace - preempt_schedule called by tracing
-+ *
-+ * The tracing infrastructure uses preempt_enable_notrace to prevent
-+ * recursion and tracing preempt enabling caused by the tracing
-+ * infrastructure itself. But as tracing can happen in areas coming
-+ * from userspace or just about to enter userspace, a preempt enable
-+ * can occur before user_exit() is called. This will cause the scheduler
-+ * to be called when the system is still in usermode.
-+ *
-+ * To prevent this, the preempt_enable_notrace will use this function
-+ * instead of preempt_schedule() to exit user context if needed before
-+ * calling the scheduler.
-+ */
-+asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
-+{
-+	enum ctx_state prev_ctx;
-+
-+	if (likely(!preemptible()))
-+		return;
-+
-+	do {
-+		/*
-+		 * Because the function tracer can trace preempt_count_sub()
-+		 * and it also uses preempt_enable/disable_notrace(), if
-+		 * NEED_RESCHED is set, the preempt_enable_notrace() called
-+		 * by the function tracer will call this function again and
-+		 * cause infinite recursion.
-+		 *
-+		 * Preemption must be disabled here before the function
-+		 * tracer can trace. Break up preempt_disable() into two
-+		 * calls. One to disable preemption without fear of being
-+		 * traced. The other to still record the preemption latency,
-+		 * which can also be traced by the function tracer.
-+		 */
-+		preempt_disable_notrace();
-+		preempt_latency_start(1);
-+		/*
-+		 * Needs preempt disabled in case user_exit() is traced
-+		 * and the tracer calls preempt_enable_notrace() causing
-+		 * an infinite recursion.
-+		 */
-+		prev_ctx = exception_enter();
-+		__schedule(true);
-+		exception_exit(prev_ctx);
-+
-+		preempt_latency_stop(1);
-+		preempt_enable_no_resched_notrace();
-+	} while (need_resched());
-+}
-+EXPORT_SYMBOL_GPL(preempt_schedule_notrace);
-+
-+#endif /* CONFIG_PREEMPTION */
-+
-+/*
-+ * This is the entry point to schedule() from kernel preemption
-+ * off of irq context.
-+ * Note, that this is called and return with irqs disabled. This will
-+ * protect us against recursive calling from irq.
-+ */
-+asmlinkage __visible void __sched preempt_schedule_irq(void)
-+{
-+	enum ctx_state prev_state;
-+
-+	/* Catch callers which need to be fixed */
-+	BUG_ON(preempt_count() || !irqs_disabled());
-+
-+	prev_state = exception_enter();
-+
-+	do {
-+		preempt_disable();
-+		local_irq_enable();
-+		__schedule(true);
-+		local_irq_disable();
-+		sched_preempt_enable_no_resched();
-+	} while (need_resched());
-+
-+	exception_exit(prev_state);
-+}
-+
-+int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags,
-+			  void *key)
-+{
-+	return try_to_wake_up(curr->private, mode, wake_flags);
-+}
-+EXPORT_SYMBOL(default_wake_function);
-+
-+static inline void check_task_changed(struct rq *rq, struct task_struct *p)
-+{
-+	/* Trigger resched if task sched_prio has been modified. */
-+	if (task_on_rq_queued(p) && sched_task_need_requeue(p)) {
-+		requeue_task(p, rq);
-+		check_preempt_curr(rq);
-+	}
-+}
-+
-+#ifdef CONFIG_RT_MUTEXES
-+
-+static inline int __rt_effective_prio(struct task_struct *pi_task, int prio)
-+{
-+	if (pi_task)
-+		prio = min(prio, pi_task->prio);
-+
-+	return prio;
-+}
-+
-+static inline int rt_effective_prio(struct task_struct *p, int prio)
-+{
-+	struct task_struct *pi_task = rt_mutex_get_top_task(p);
-+
-+	return __rt_effective_prio(pi_task, prio);
-+}
-+
-+/*
-+ * rt_mutex_setprio - set the current priority of a task
-+ * @p: task to boost
-+ * @pi_task: donor task
-+ *
-+ * This function changes the 'effective' priority of a task. It does
-+ * not touch ->normal_prio like __setscheduler().
-+ *
-+ * Used by the rt_mutex code to implement priority inheritance
-+ * logic. Call site only calls if the priority of the task changed.
-+ */
-+void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
-+{
-+	int prio;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+
-+	/* XXX used to be waiter->prio, not waiter->task->prio */
-+	prio = __rt_effective_prio(pi_task, p->normal_prio);
-+
-+	/*
-+	 * If nothing changed; bail early.
-+	 */
-+	if (p->pi_top_task == pi_task && prio == p->prio)
-+		return;
-+
-+	rq = __task_access_lock(p, &lock);
-+	/*
-+	 * Set under pi_lock && rq->lock, such that the value can be used under
-+	 * either lock.
-+	 *
-+	 * Note that there is loads of tricky to make this pointer cache work
-+	 * right. rt_mutex_slowunlock()+rt_mutex_postunlock() work together to
-+	 * ensure a task is de-boosted (pi_task is set to NULL) before the
-+	 * task is allowed to run again (and can exit). This ensures the pointer
-+	 * points to a blocked task -- which guaratees the task is present.
-+	 */
-+	p->pi_top_task = pi_task;
-+
-+	/*
-+	 * For FIFO/RR we only need to set prio, if that matches we're done.
-+	 */
-+	if (prio == p->prio)
-+		goto out_unlock;
-+
-+	/*
-+	 * Idle task boosting is a nono in general. There is one
-+	 * exception, when PREEMPT_RT and NOHZ is active:
-+	 *
-+	 * The idle task calls get_next_timer_interrupt() and holds
-+	 * the timer wheel base->lock on the CPU and another CPU wants
-+	 * to access the timer (probably to cancel it). We can safely
-+	 * ignore the boosting request, as the idle CPU runs this code
-+	 * with interrupts disabled and will complete the lock
-+	 * protected section without being interrupted. So there is no
-+	 * real need to boost.
-+	 */
-+	if (unlikely(p == rq->idle)) {
-+		WARN_ON(p != rq->curr);
-+		WARN_ON(p->pi_blocked_on);
-+		goto out_unlock;
-+	}
-+
-+	trace_sched_pi_setprio(p, pi_task);
-+	p->prio = prio;
-+
-+	check_task_changed(rq, p);
-+out_unlock:
-+	__task_access_unlock(p, lock);
-+}
-+#else
-+static inline int rt_effective_prio(struct task_struct *p, int prio)
-+{
-+	return prio;
-+}
-+#endif
-+
-+void set_user_nice(struct task_struct *p, long nice)
-+{
-+	unsigned long flags;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+
-+	if (task_nice(p) == nice || nice < MIN_NICE || nice > MAX_NICE)
-+		return;
-+	/*
-+	 * We have to be careful, if called from sys_setpriority(),
-+	 * the task might be in the middle of scheduling on another CPU.
-+	 */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	rq = __task_access_lock(p, &lock);
-+
-+	p->static_prio = NICE_TO_PRIO(nice);
-+	/*
-+	 * The RT priorities are set via sched_setscheduler(), but we still
-+	 * allow the 'normal' nice value to be set - but as expected
-+	 * it wont have any effect on scheduling until the task is
-+	 * not SCHED_NORMAL/SCHED_BATCH:
-+	 */
-+	if (task_has_rt_policy(p))
-+		goto out_unlock;
-+
-+	p->prio = effective_prio(p);
-+	check_task_changed(rq, p);
-+out_unlock:
-+	__task_access_unlock(p, lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+}
-+EXPORT_SYMBOL(set_user_nice);
-+
-+/*
-+ * can_nice - check if a task can reduce its nice value
-+ * @p: task
-+ * @nice: nice value
-+ */
-+int can_nice(const struct task_struct *p, const int nice)
-+{
-+	/* Convert nice value [19,-20] to rlimit style value [1,40] */
-+	int nice_rlim = nice_to_rlimit(nice);
-+
-+	return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) ||
-+		capable(CAP_SYS_NICE));
-+}
-+
-+#ifdef __ARCH_WANT_SYS_NICE
-+
-+/*
-+ * sys_nice - change the priority of the current process.
-+ * @increment: priority increment
-+ *
-+ * sys_setpriority is a more generic, but much slower function that
-+ * does similar things.
-+ */
-+SYSCALL_DEFINE1(nice, int, increment)
-+{
-+	long nice, retval;
-+
-+	/*
-+	 * Setpriority might change our priority at the same moment.
-+	 * We don't have to worry. Conceptually one call occurs first
-+	 * and we have a single winner.
-+	 */
-+
-+	increment = clamp(increment, -NICE_WIDTH, NICE_WIDTH);
-+	nice = task_nice(current) + increment;
-+
-+	nice = clamp_val(nice, MIN_NICE, MAX_NICE);
-+	if (increment < 0 && !can_nice(current, nice))
-+		return -EPERM;
-+
-+	retval = security_task_setnice(current, nice);
-+	if (retval)
-+		return retval;
-+
-+	set_user_nice(current, nice);
-+	return 0;
-+}
-+
-+#endif
-+
-+/**
-+ * task_prio - return the priority value of a given task.
-+ * @p: the task in question.
-+ *
-+ * Return: The priority value as seen by users in /proc.
-+ * RT tasks are offset by -100. Normal tasks are centered around 1, value goes
-+ * from 0(SCHED_ISO) up to 82 (nice +19 SCHED_IDLE).
-+ */
-+int task_prio(const struct task_struct *p)
-+{
-+	if (p->prio < MAX_RT_PRIO)
-+		return (p->prio - MAX_RT_PRIO);
-+	return (p->prio - MAX_RT_PRIO + p->boost_prio);
-+}
-+
-+/**
-+ * idle_cpu - is a given CPU idle currently?
-+ * @cpu: the processor in question.
-+ *
-+ * Return: 1 if the CPU is currently idle. 0 otherwise.
-+ */
-+int idle_cpu(int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	if (rq->curr != rq->idle)
-+		return 0;
-+
-+	if (rq->nr_running)
-+		return 0;
-+
-+#ifdef CONFIG_SMP
-+	if (!llist_empty(&rq->wake_list))
-+		return 0;
-+#endif
-+
-+	return 1;
-+}
-+
-+/**
-+ * idle_task - return the idle task for a given CPU.
-+ * @cpu: the processor in question.
-+ *
-+ * Return: The idle task for the cpu @cpu.
-+ */
-+struct task_struct *idle_task(int cpu)
-+{
-+	return cpu_rq(cpu)->idle;
-+}
-+
-+/**
-+ * find_process_by_pid - find a process with a matching PID value.
-+ * @pid: the pid in question.
-+ *
-+ * The task of @pid, if found. %NULL otherwise.
-+ */
-+static inline struct task_struct *find_process_by_pid(pid_t pid)
-+{
-+	return pid ? find_task_by_vpid(pid) : current;
-+}
-+
-+/*
-+ * sched_setparam() passes in -1 for its policy, to let the functions
-+ * it calls know not to change it.
-+ */
-+#define SETPARAM_POLICY -1
-+
-+static void __setscheduler_params(struct task_struct *p,
-+		const struct sched_attr *attr)
-+{
-+	int policy = attr->sched_policy;
-+
-+	if (policy == SETPARAM_POLICY)
-+		policy = p->policy;
-+
-+	p->policy = policy;
-+
-+	/*
-+	 * allow normal nice value to be set, but will not have any
-+	 * effect on scheduling until the task not SCHED_NORMAL/
-+	 * SCHED_BATCH
-+	 */
-+	p->static_prio = NICE_TO_PRIO(attr->sched_nice);
-+
-+	/*
-+	 * __sched_setscheduler() ensures attr->sched_priority == 0 when
-+	 * !rt_policy. Always setting this ensures that things like
-+	 * getparam()/getattr() don't report silly values for !rt tasks.
-+	 */
-+	p->rt_priority = attr->sched_priority;
-+	p->normal_prio = normal_prio(p);
-+}
-+
-+/* Actually do priority change: must hold rq lock. */
-+static void __setscheduler(struct rq *rq, struct task_struct *p,
-+			   const struct sched_attr *attr, bool keep_boost)
-+{
-+	__setscheduler_params(p, attr);
-+
-+	/*
-+	 * Keep a potential priority boosting if called from
-+	 * sched_setscheduler().
-+	 */
-+	p->prio = normal_prio(p);
-+	if (keep_boost)
-+		p->prio = rt_effective_prio(p, p->prio);
-+}
-+
-+/*
-+ * check the target process has a UID that matches the current process's
-+ */
-+static bool check_same_owner(struct task_struct *p)
-+{
-+	const struct cred *cred = current_cred(), *pcred;
-+	bool match;
-+
-+	rcu_read_lock();
-+	pcred = __task_cred(p);
-+	match = (uid_eq(cred->euid, pcred->euid) ||
-+		 uid_eq(cred->euid, pcred->uid));
-+	rcu_read_unlock();
-+	return match;
-+}
-+
-+static int __sched_setscheduler(struct task_struct *p,
-+				const struct sched_attr *attr,
-+				bool user, bool pi)
-+{
-+	const struct sched_attr dl_squash_attr = {
-+		.size		= sizeof(struct sched_attr),
-+		.sched_policy	= SCHED_FIFO,
-+		.sched_nice	= 0,
-+		.sched_priority = 99,
-+	};
-+	int newprio = MAX_RT_PRIO - 1 - attr->sched_priority;
-+	int retval, oldpolicy = -1;
-+	int policy = attr->sched_policy;
-+	unsigned long flags;
-+	struct rq *rq;
-+	int reset_on_fork;
-+	raw_spinlock_t *lock;
-+
-+	/* The pi code expects interrupts enabled */
-+	BUG_ON(pi && in_interrupt());
-+
-+	/*
-+	 * Alt schedule FW supports SCHED_DEADLINE by squash it as prio 0 SCHED_FIFO
-+	 */
-+	if (unlikely(SCHED_DEADLINE == policy)) {
-+		attr = &dl_squash_attr;
-+		policy = attr->sched_policy;
-+		newprio = MAX_RT_PRIO - 1 - attr->sched_priority;
-+	}
-+recheck:
-+	/* Double check policy once rq lock held */
-+	if (policy < 0) {
-+		reset_on_fork = p->sched_reset_on_fork;
-+		policy = oldpolicy = p->policy;
-+	} else {
-+		reset_on_fork = !!(attr->sched_flags & SCHED_RESET_ON_FORK);
-+
-+		if (policy > SCHED_IDLE)
-+			return -EINVAL;
-+	}
-+
-+	if (attr->sched_flags & ~(SCHED_FLAG_ALL))
-+		return -EINVAL;
-+
-+	/*
-+	 * Valid priorities for SCHED_FIFO and SCHED_RR are
-+	 * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL and
-+	 * SCHED_BATCH and SCHED_IDLE is 0.
-+	 */
-+	if (attr->sched_priority < 0 ||
-+	    (p->mm && attr->sched_priority > MAX_USER_RT_PRIO - 1) ||
-+	    (!p->mm && attr->sched_priority > MAX_RT_PRIO - 1))
-+		return -EINVAL;
-+	if ((SCHED_RR == policy || SCHED_FIFO == policy) !=
-+	    (attr->sched_priority != 0))
-+		return -EINVAL;
-+
-+	/*
-+	 * Allow unprivileged RT tasks to decrease priority:
-+	 */
-+	if (user && !capable(CAP_SYS_NICE)) {
-+		if (SCHED_FIFO == policy || SCHED_RR == policy) {
-+			unsigned long rlim_rtprio =
-+					task_rlimit(p, RLIMIT_RTPRIO);
-+
-+			/* Can't set/change the rt policy */
-+			if (policy != p->policy && !rlim_rtprio)
-+				return -EPERM;
-+
-+			/* Can't increase priority */
-+			if (attr->sched_priority > p->rt_priority &&
-+			    attr->sched_priority > rlim_rtprio)
-+				return -EPERM;
-+		}
-+
-+		/* Can't change other user's priorities */
-+		if (!check_same_owner(p))
-+			return -EPERM;
-+
-+		/* Normal users shall not reset the sched_reset_on_fork flag */
-+		if (p->sched_reset_on_fork && !reset_on_fork)
-+			return -EPERM;
-+	}
-+
-+	if (user) {
-+		retval = security_task_setscheduler(p);
-+		if (retval)
-+			return retval;
-+	}
-+
-+	if (pi)
-+		cpuset_read_lock();
-+
-+	/*
-+	 * Make sure no PI-waiters arrive (or leave) while we are
-+	 * changing the priority of the task:
-+	 */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+
-+	/*
-+	 * To be able to change p->policy safely, task_access_lock()
-+	 * must be called.
-+	 * IF use task_access_lock() here:
-+	 * For the task p which is not running, reading rq->stop is
-+	 * racy but acceptable as ->stop doesn't change much.
-+	 * An enhancemnet can be made to read rq->stop saftly.
-+	 */
-+	rq = __task_access_lock(p, &lock);
-+
-+	/*
-+	 * Changing the policy of the stop threads its a very bad idea
-+	 */
-+	if (p == rq->stop) {
-+		retval = -EINVAL;
-+		goto unlock;
-+	}
-+
-+	/*
-+	 * If not changing anything there's no need to proceed further:
-+	 */
-+	if (unlikely(policy == p->policy)) {
-+		if (rt_policy(policy) && attr->sched_priority != p->rt_priority)
-+			goto change;
-+		if (!rt_policy(policy) &&
-+		    NICE_TO_PRIO(attr->sched_nice) != p->static_prio)
-+			goto change;
-+
-+		p->sched_reset_on_fork = reset_on_fork;
-+		retval = 0;
-+		goto unlock;
-+	}
-+change:
-+
-+	/* Re-check policy now with rq lock held */
-+	if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
-+		policy = oldpolicy = -1;
-+		__task_access_unlock(p, lock);
-+		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+		if (pi)
-+			cpuset_read_unlock();
-+		goto recheck;
-+	}
-+
-+	p->sched_reset_on_fork = reset_on_fork;
-+
-+	if (pi) {
-+		/*
-+		 * Take priority boosted tasks into account. If the new
-+		 * effective priority is unchanged, we just store the new
-+		 * normal parameters and do not touch the scheduler class and
-+		 * the runqueue. This will be done when the task deboost
-+		 * itself.
-+		 */
-+		if (rt_effective_prio(p, newprio) == p->prio) {
-+			__setscheduler_params(p, attr);
-+			retval = 0;
-+			goto unlock;
-+		}
-+	}
-+
-+	__setscheduler(rq, p, attr, pi);
-+
-+	check_task_changed(rq, p);
-+
-+	/* Avoid rq from going away on us: */
-+	preempt_disable();
-+	__task_access_unlock(p, lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+
-+	if (pi) {
-+		cpuset_read_unlock();
-+		rt_mutex_adjust_pi(p);
-+	}
-+
-+	preempt_enable();
-+
-+	return 0;
-+
-+unlock:
-+	__task_access_unlock(p, lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+	if (pi)
-+		cpuset_read_unlock();
-+	return retval;
-+}
-+
-+static int _sched_setscheduler(struct task_struct *p, int policy,
-+			       const struct sched_param *param, bool check)
-+{
-+	struct sched_attr attr = {
-+		.sched_policy   = policy,
-+		.sched_priority = param->sched_priority,
-+		.sched_nice     = PRIO_TO_NICE(p->static_prio),
-+	};
-+
-+	/* Fixup the legacy SCHED_RESET_ON_FORK hack. */
-+	if ((policy != SETPARAM_POLICY) && (policy & SCHED_RESET_ON_FORK)) {
-+		attr.sched_flags |= SCHED_FLAG_RESET_ON_FORK;
-+		policy &= ~SCHED_RESET_ON_FORK;
-+		attr.sched_policy = policy;
-+	}
-+
-+	return __sched_setscheduler(p, &attr, check, true);
-+}
-+
-+/**
-+ * sched_setscheduler - change the scheduling policy and/or RT priority of a thread.
-+ * @p: the task in question.
-+ * @policy: new policy.
-+ * @param: structure containing the new RT priority.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ *
-+ * NOTE that the task may be already dead.
-+ */
-+int sched_setscheduler(struct task_struct *p, int policy,
-+		       const struct sched_param *param)
-+{
-+	return _sched_setscheduler(p, policy, param, true);
-+}
-+
-+EXPORT_SYMBOL_GPL(sched_setscheduler);
-+
-+int sched_setattr(struct task_struct *p, const struct sched_attr *attr)
-+{
-+	return __sched_setscheduler(p, attr, true, true);
-+}
-+EXPORT_SYMBOL_GPL(sched_setattr);
-+
-+int sched_setattr_nocheck(struct task_struct *p, const struct sched_attr *attr)
-+{
-+	return __sched_setscheduler(p, attr, false, true);
-+}
-+
-+/**
-+ * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace.
-+ * @p: the task in question.
-+ * @policy: new policy.
-+ * @param: structure containing the new RT priority.
-+ *
-+ * Just like sched_setscheduler, only don't bother checking if the
-+ * current context has permission.  For example, this is needed in
-+ * stop_machine(): we create temporary high priority worker threads,
-+ * but our caller might not have that capability.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+int sched_setscheduler_nocheck(struct task_struct *p, int policy,
-+			       const struct sched_param *param)
-+{
-+	return _sched_setscheduler(p, policy, param, false);
-+}
-+EXPORT_SYMBOL_GPL(sched_setscheduler_nocheck);
-+
-+static int
-+do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
-+{
-+	struct sched_param lparam;
-+	struct task_struct *p;
-+	int retval;
-+
-+	if (!param || pid < 0)
-+		return -EINVAL;
-+	if (copy_from_user(&lparam, param, sizeof(struct sched_param)))
-+		return -EFAULT;
-+
-+	rcu_read_lock();
-+	retval = -ESRCH;
-+	p = find_process_by_pid(pid);
-+	if (likely(p))
-+		get_task_struct(p);
-+	rcu_read_unlock();
-+
-+	if (likely(p)) {
-+		retval = sched_setscheduler(p, policy, &lparam);
-+		put_task_struct(p);
-+	}
-+
-+	return retval;
-+}
-+
-+/*
-+ * Mimics kernel/events/core.c perf_copy_attr().
-+ */
-+static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *attr)
-+{
-+	u32 size;
-+	int ret;
-+
-+	/* Zero the full structure, so that a short copy will be nice: */
-+	memset(attr, 0, sizeof(*attr));
-+
-+	ret = get_user(size, &uattr->size);
-+	if (ret)
-+		return ret;
-+
-+	/* ABI compatibility quirk: */
-+	if (!size)
-+		size = SCHED_ATTR_SIZE_VER0;
-+
-+	if (size < SCHED_ATTR_SIZE_VER0 || size > PAGE_SIZE)
-+		goto err_size;
-+
-+	ret = copy_struct_from_user(attr, sizeof(*attr), uattr, size);
-+	if (ret) {
-+		if (ret == -E2BIG)
-+			goto err_size;
-+		return ret;
-+	}
-+
-+	/*
-+	 * XXX: Do we want to be lenient like existing syscalls; or do we want
-+	 * to be strict and return an error on out-of-bounds values?
-+	 */
-+	attr->sched_nice = clamp(attr->sched_nice, -20, 19);
-+
-+	/* sched/core.c uses zero here but we already know ret is zero */
-+	return 0;
-+
-+err_size:
-+	put_user(sizeof(*attr), &uattr->size);
-+	return -E2BIG;
-+}
-+
-+/**
-+ * sys_sched_setscheduler - set/change the scheduler policy and RT priority
-+ * @pid: the pid in question.
-+ * @policy: new policy.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ * @param: structure containing the new RT priority.
-+ */
-+SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy, struct sched_param __user *, param)
-+{
-+	if (policy < 0)
-+		return -EINVAL;
-+
-+	return do_sched_setscheduler(pid, policy, param);
-+}
-+
-+/**
-+ * sys_sched_setparam - set/change the RT priority of a thread
-+ * @pid: the pid in question.
-+ * @param: structure containing the new RT priority.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
-+{
-+	return do_sched_setscheduler(pid, SETPARAM_POLICY, param);
-+}
-+
-+/**
-+ * sys_sched_setattr - same as above, but with extended sched_attr
-+ * @pid: the pid in question.
-+ * @uattr: structure containing the extended parameters.
-+ */
-+SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr,
-+			       unsigned int, flags)
-+{
-+	struct sched_attr attr;
-+	struct task_struct *p;
-+	int retval;
-+
-+	if (!uattr || pid < 0 || flags)
-+		return -EINVAL;
-+
-+	retval = sched_copy_attr(uattr, &attr);
-+	if (retval)
-+		return retval;
-+
-+	if ((int)attr.sched_policy < 0)
-+		return -EINVAL;
-+
-+	rcu_read_lock();
-+	retval = -ESRCH;
-+	p = find_process_by_pid(pid);
-+	if (p != NULL)
-+		retval = sched_setattr(p, &attr);
-+	rcu_read_unlock();
-+
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_getscheduler - get the policy (scheduling class) of a thread
-+ * @pid: the pid in question.
-+ *
-+ * Return: On success, the policy of the thread. Otherwise, a negative error
-+ * code.
-+ */
-+SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
-+{
-+	struct task_struct *p;
-+	int retval = -EINVAL;
-+
-+	if (pid < 0)
-+		goto out_nounlock;
-+
-+	retval = -ESRCH;
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	if (p) {
-+		retval = security_task_getscheduler(p);
-+		if (!retval)
-+			retval = p->policy;
-+	}
-+	rcu_read_unlock();
-+
-+out_nounlock:
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_getscheduler - get the RT priority of a thread
-+ * @pid: the pid in question.
-+ * @param: structure containing the RT priority.
-+ *
-+ * Return: On success, 0 and the RT priority is in @param. Otherwise, an error
-+ * code.
-+ */
-+SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
-+{
-+	struct sched_param lp = { .sched_priority = 0 };
-+	struct task_struct *p;
-+	int retval = -EINVAL;
-+
-+	if (!param || pid < 0)
-+		goto out_nounlock;
-+
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	retval = -ESRCH;
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	if (task_has_rt_policy(p))
-+		lp.sched_priority = p->rt_priority;
-+	rcu_read_unlock();
-+
-+	/*
-+	 * This one might sleep, we cannot do it with a spinlock held ...
-+	 */
-+	retval = copy_to_user(param, &lp, sizeof(*param)) ? -EFAULT : 0;
-+
-+out_nounlock:
-+	return retval;
-+
-+out_unlock:
-+	rcu_read_unlock();
-+	return retval;
-+}
-+
-+/*
-+ * Copy the kernel size attribute structure (which might be larger
-+ * than what user-space knows about) to user-space.
-+ *
-+ * Note that all cases are valid: user-space buffer can be larger or
-+ * smaller than the kernel-space buffer. The usual case is that both
-+ * have the same size.
-+ */
-+static int
-+sched_attr_copy_to_user(struct sched_attr __user *uattr,
-+			struct sched_attr *kattr,
-+			unsigned int usize)
-+{
-+	unsigned int ksize = sizeof(*kattr);
-+
-+	if (!access_ok(uattr, usize))
-+		return -EFAULT;
-+
-+	/*
-+	 * sched_getattr() ABI forwards and backwards compatibility:
-+	 *
-+	 * If usize == ksize then we just copy everything to user-space and all is good.
-+	 *
-+	 * If usize < ksize then we only copy as much as user-space has space for,
-+	 * this keeps ABI compatibility as well. We skip the rest.
-+	 *
-+	 * If usize > ksize then user-space is using a newer version of the ABI,
-+	 * which part the kernel doesn't know about. Just ignore it - tooling can
-+	 * detect the kernel's knowledge of attributes from the attr->size value
-+	 * which is set to ksize in this case.
-+	 */
-+	kattr->size = min(usize, ksize);
-+
-+	if (copy_to_user(uattr, kattr, kattr->size))
-+		return -EFAULT;
-+
-+	return 0;
-+}
-+
-+/**
-+ * sys_sched_getattr - similar to sched_getparam, but with sched_attr
-+ * @pid: the pid in question.
-+ * @uattr: structure containing the extended parameters.
-+ * @usize: sizeof(attr) for fwd/bwd comp.
-+ * @flags: for future extension.
-+ */
-+SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
-+		unsigned int, usize, unsigned int, flags)
-+{
-+	struct sched_attr kattr = { };
-+	struct task_struct *p;
-+	int retval;
-+
-+	if (!uattr || pid < 0 || usize > PAGE_SIZE ||
-+	    usize < SCHED_ATTR_SIZE_VER0 || flags)
-+		return -EINVAL;
-+
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	retval = -ESRCH;
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	kattr.sched_policy = p->policy;
-+	if (p->sched_reset_on_fork)
-+		kattr.sched_flags |= SCHED_FLAG_RESET_ON_FORK;
-+	if (task_has_rt_policy(p))
-+		kattr.sched_priority = p->rt_priority;
-+	else
-+		kattr.sched_nice = task_nice(p);
-+
-+#ifdef CONFIG_UCLAMP_TASK
-+	kattr.sched_util_min = p->uclamp_req[UCLAMP_MIN].value;
-+	kattr.sched_util_max = p->uclamp_req[UCLAMP_MAX].value;
-+#endif
-+
-+	rcu_read_unlock();
-+
-+	return sched_attr_copy_to_user(uattr, &kattr, usize);
-+
-+out_unlock:
-+	rcu_read_unlock();
-+	return retval;
-+}
-+
-+long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
-+{
-+	cpumask_var_t cpus_allowed, new_mask;
-+	struct task_struct *p;
-+	int retval;
-+
-+	get_online_cpus();
-+	rcu_read_lock();
-+
-+	p = find_process_by_pid(pid);
-+	if (!p) {
-+		rcu_read_unlock();
-+		put_online_cpus();
-+		return -ESRCH;
-+	}
-+
-+	/* Prevent p going away */
-+	get_task_struct(p);
-+	rcu_read_unlock();
-+
-+	if (p->flags & PF_NO_SETAFFINITY) {
-+		retval = -EINVAL;
-+		goto out_put_task;
-+	}
-+	if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
-+		retval = -ENOMEM;
-+		goto out_put_task;
-+	}
-+	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) {
-+		retval = -ENOMEM;
-+		goto out_free_cpus_allowed;
-+	}
-+	retval = -EPERM;
-+	if (!check_same_owner(p)) {
-+		rcu_read_lock();
-+		if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
-+			rcu_read_unlock();
-+			goto out_unlock;
-+		}
-+		rcu_read_unlock();
-+	}
-+
-+	retval = security_task_setscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	cpuset_cpus_allowed(p, cpus_allowed);
-+	cpumask_and(new_mask, in_mask, cpus_allowed);
-+again:
-+	retval = __set_cpus_allowed_ptr(p, new_mask, true);
-+
-+	if (!retval) {
-+		cpuset_cpus_allowed(p, cpus_allowed);
-+		if (!cpumask_subset(new_mask, cpus_allowed)) {
-+			/*
-+			 * We must have raced with a concurrent cpuset
-+			 * update. Just reset the cpus_allowed to the
-+			 * cpuset's cpus_allowed
-+			 */
-+			cpumask_copy(new_mask, cpus_allowed);
-+			goto again;
-+		}
-+	}
-+out_unlock:
-+	free_cpumask_var(new_mask);
-+out_free_cpus_allowed:
-+	free_cpumask_var(cpus_allowed);
-+out_put_task:
-+	put_task_struct(p);
-+	put_online_cpus();
-+	return retval;
-+}
-+
-+static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
-+			     struct cpumask *new_mask)
-+{
-+	if (len < cpumask_size())
-+		cpumask_clear(new_mask);
-+	else if (len > cpumask_size())
-+		len = cpumask_size();
-+
-+	return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0;
-+}
-+
-+/**
-+ * sys_sched_setaffinity - set the CPU affinity of a process
-+ * @pid: pid of the process
-+ * @len: length in bytes of the bitmask pointed to by user_mask_ptr
-+ * @user_mask_ptr: user-space pointer to the new CPU mask
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
-+		unsigned long __user *, user_mask_ptr)
-+{
-+	cpumask_var_t new_mask;
-+	int retval;
-+
-+	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
-+		return -ENOMEM;
-+
-+	retval = get_user_cpu_mask(user_mask_ptr, len, new_mask);
-+	if (retval == 0)
-+		retval = sched_setaffinity(pid, new_mask);
-+	free_cpumask_var(new_mask);
-+	return retval;
-+}
-+
-+long sched_getaffinity(pid_t pid, cpumask_t *mask)
-+{
-+	struct task_struct *p;
-+	raw_spinlock_t *lock;
-+	unsigned long flags;
-+	int retval;
-+
-+	rcu_read_lock();
-+
-+	retval = -ESRCH;
-+	p = find_process_by_pid(pid);
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	task_access_lock_irqsave(p, &lock, &flags);
-+	cpumask_and(mask, &p->cpus_mask, cpu_active_mask);
-+	task_access_unlock_irqrestore(p, lock, &flags);
-+
-+out_unlock:
-+	rcu_read_unlock();
-+
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_getaffinity - get the CPU affinity of a process
-+ * @pid: pid of the process
-+ * @len: length in bytes of the bitmask pointed to by user_mask_ptr
-+ * @user_mask_ptr: user-space pointer to hold the current CPU mask
-+ *
-+ * Return: size of CPU mask copied to user_mask_ptr on success. An
-+ * error code otherwise.
-+ */
-+SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
-+		unsigned long __user *, user_mask_ptr)
-+{
-+	int ret;
-+	cpumask_var_t mask;
-+
-+	if ((len * BITS_PER_BYTE) < nr_cpu_ids)
-+		return -EINVAL;
-+	if (len & (sizeof(unsigned long)-1))
-+		return -EINVAL;
-+
-+	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
-+		return -ENOMEM;
-+
-+	ret = sched_getaffinity(pid, mask);
-+	if (ret == 0) {
-+		unsigned int retlen = min_t(size_t, len, cpumask_size());
-+
-+		if (copy_to_user(user_mask_ptr, mask, retlen))
-+			ret = -EFAULT;
-+		else
-+			ret = retlen;
-+	}
-+	free_cpumask_var(mask);
-+
-+	return ret;
-+}
-+
-+/**
-+ * sys_sched_yield - yield the current processor to other threads.
-+ *
-+ * This function yields the current CPU to other tasks. It does this by
-+ * scheduling away the current task. If it still has the earliest deadline
-+ * it will be scheduled again as the next task.
-+ *
-+ * Return: 0.
-+ */
-+static void do_sched_yield(void)
-+{
-+	struct rq *rq;
-+	struct rq_flags rf;
-+
-+	if (!sched_yield_type)
-+		return;
-+
-+	rq = this_rq_lock_irq(&rf);
-+
-+	schedstat_inc(rq->yld_count);
-+
-+	if (1 == sched_yield_type) {
-+		if (!rt_task(current)) {
-+			current->boost_prio = MAX_PRIORITY_ADJ;
-+			requeue_task(current, rq);
-+		}
-+	} else if (2 == sched_yield_type) {
-+		if (rq->nr_running > 1)
-+			rq->skip = current;
-+	}
-+
-+	/*
-+	 * Since we are going to call schedule() anyway, there's
-+	 * no need to preempt or enable interrupts:
-+	 */
-+	preempt_disable();
-+	raw_spin_unlock(&rq->lock);
-+	sched_preempt_enable_no_resched();
-+
-+	schedule();
-+}
-+
-+SYSCALL_DEFINE0(sched_yield)
-+{
-+	do_sched_yield();
-+	return 0;
-+}
-+
-+#ifndef CONFIG_PREEMPTION
-+int __sched _cond_resched(void)
-+{
-+	if (should_resched(0)) {
-+		preempt_schedule_common();
-+		return 1;
-+	}
-+	rcu_all_qs();
-+	return 0;
-+}
-+EXPORT_SYMBOL(_cond_resched);
-+#endif
-+
-+/*
-+ * __cond_resched_lock() - if a reschedule is pending, drop the given lock,
-+ * call schedule, and on return reacquire the lock.
-+ *
-+ * This works OK both with and without CONFIG_PREEMPTION.  We do strange low-level
-+ * operations here to prevent schedule() from being called twice (once via
-+ * spin_unlock(), once by hand).
-+ */
-+int __cond_resched_lock(spinlock_t *lock)
-+{
-+	int resched = should_resched(PREEMPT_LOCK_OFFSET);
-+	int ret = 0;
-+
-+	lockdep_assert_held(lock);
-+
-+	if (spin_needbreak(lock) || resched) {
-+		spin_unlock(lock);
-+		if (resched)
-+			preempt_schedule_common();
-+		else
-+			cpu_relax();
-+		ret = 1;
-+		spin_lock(lock);
-+	}
-+	return ret;
-+}
-+EXPORT_SYMBOL(__cond_resched_lock);
-+
-+/**
-+ * yield - yield the current processor to other threads.
-+ *
-+ * Do not ever use this function, there's a 99% chance you're doing it wrong.
-+ *
-+ * The scheduler is at all times free to pick the calling task as the most
-+ * eligible task to run, if removing the yield() call from your code breaks
-+ * it, its already broken.
-+ *
-+ * Typical broken usage is:
-+ *
-+ * while (!event)
-+ * 	yield();
-+ *
-+ * where one assumes that yield() will let 'the other' process run that will
-+ * make event true. If the current task is a SCHED_FIFO task that will never
-+ * happen. Never use yield() as a progress guarantee!!
-+ *
-+ * If you want to use yield() to wait for something, use wait_event().
-+ * If you want to use yield() to be 'nice' for others, use cond_resched().
-+ * If you still want to use yield(), do not!
-+ */
-+void __sched yield(void)
-+{
-+	set_current_state(TASK_RUNNING);
-+	do_sched_yield();
-+}
-+EXPORT_SYMBOL(yield);
-+
-+/**
-+ * yield_to - yield the current processor to another thread in
-+ * your thread group, or accelerate that thread toward the
-+ * processor it's on.
-+ * @p: target task
-+ * @preempt: whether task preemption is allowed or not
-+ *
-+ * It's the caller's job to ensure that the target task struct
-+ * can't go away on us before we can do any checks.
-+ *
-+ * In Alt schedule FW, yield_to is not supported.
-+ *
-+ * Return:
-+ *	true (>0) if we indeed boosted the target task.
-+ *	false (0) if we failed to boost the target.
-+ *	-ESRCH if there's no task to yield to.
-+ */
-+int __sched yield_to(struct task_struct *p, bool preempt)
-+{
-+	return 0;
-+}
-+EXPORT_SYMBOL_GPL(yield_to);
-+
-+int io_schedule_prepare(void)
-+{
-+	int old_iowait = current->in_iowait;
-+
-+	current->in_iowait = 1;
-+	blk_schedule_flush_plug(current);
-+
-+	return old_iowait;
-+}
-+
-+void io_schedule_finish(int token)
-+{
-+	current->in_iowait = token;
-+}
-+
-+/*
-+ * This task is about to go to sleep on IO.  Increment rq->nr_iowait so
-+ * that process accounting knows that this is a task in IO wait state.
-+ *
-+ * But don't do that if it is a deliberate, throttling IO wait (this task
-+ * has set its backing_dev_info: the queue against which it should throttle)
-+ */
-+
-+long __sched io_schedule_timeout(long timeout)
-+{
-+	int token;
-+	long ret;
-+
-+	token = io_schedule_prepare();
-+	ret = schedule_timeout(timeout);
-+	io_schedule_finish(token);
-+
-+	return ret;
-+}
-+EXPORT_SYMBOL(io_schedule_timeout);
-+
-+void __sched io_schedule(void)
-+{
-+	int token;
-+
-+	token = io_schedule_prepare();
-+	schedule();
-+	io_schedule_finish(token);
-+}
-+EXPORT_SYMBOL(io_schedule);
-+
-+/**
-+ * sys_sched_get_priority_max - return maximum RT priority.
-+ * @policy: scheduling class.
-+ *
-+ * Return: On success, this syscall returns the maximum
-+ * rt_priority that can be used by a given scheduling class.
-+ * On failure, a negative error code is returned.
-+ */
-+SYSCALL_DEFINE1(sched_get_priority_max, int, policy)
-+{
-+	int ret = -EINVAL;
-+
-+	switch (policy) {
-+	case SCHED_FIFO:
-+	case SCHED_RR:
-+		ret = MAX_USER_RT_PRIO-1;
-+		break;
-+	case SCHED_NORMAL:
-+	case SCHED_BATCH:
-+	case SCHED_IDLE:
-+		ret = 0;
-+		break;
-+	}
-+	return ret;
-+}
-+
-+/**
-+ * sys_sched_get_priority_min - return minimum RT priority.
-+ * @policy: scheduling class.
-+ *
-+ * Return: On success, this syscall returns the minimum
-+ * rt_priority that can be used by a given scheduling class.
-+ * On failure, a negative error code is returned.
-+ */
-+SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
-+{
-+	int ret = -EINVAL;
-+
-+	switch (policy) {
-+	case SCHED_FIFO:
-+	case SCHED_RR:
-+		ret = 1;
-+		break;
-+	case SCHED_NORMAL:
-+	case SCHED_BATCH:
-+	case SCHED_IDLE:
-+		ret = 0;
-+		break;
-+	}
-+	return ret;
-+}
-+
-+static int sched_rr_get_interval(pid_t pid, struct timespec64 *t)
-+{
-+	struct task_struct *p;
-+	int retval;
-+
-+	if (pid < 0)
-+		return -EINVAL;
-+
-+	retval = -ESRCH;
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+	rcu_read_unlock();
-+
-+	*t = ns_to_timespec64(sched_timeslice_ns);
-+	return 0;
-+
-+out_unlock:
-+	rcu_read_unlock();
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_rr_get_interval - return the default timeslice of a process.
-+ * @pid: pid of the process.
-+ * @interval: userspace pointer to the timeslice value.
-+ *
-+ *
-+ * Return: On success, 0 and the timeslice is in @interval. Otherwise,
-+ * an error code.
-+ */
-+SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
-+		struct __kernel_timespec __user *, interval)
-+{
-+	struct timespec64 t;
-+	int retval = sched_rr_get_interval(pid, &t);
-+
-+	if (retval == 0)
-+		retval = put_timespec64(&t, interval);
-+
-+	return retval;
-+}
-+
-+#ifdef CONFIG_COMPAT_32BIT_TIME
-+SYSCALL_DEFINE2(sched_rr_get_interval_time32, pid_t, pid,
-+		struct old_timespec32 __user *, interval)
-+{
-+	struct timespec64 t;
-+	int retval = sched_rr_get_interval(pid, &t);
-+
-+	if (retval == 0)
-+		retval = put_old_timespec32(&t, interval);
-+	return retval;
-+}
-+#endif
-+
-+void sched_show_task(struct task_struct *p)
-+{
-+	unsigned long free = 0;
-+	int ppid;
-+
-+	if (!try_get_task_stack(p))
-+		return;
-+
-+	printk(KERN_INFO "%-15.15s %c", p->comm, task_state_to_char(p));
-+
-+	if (p->state == TASK_RUNNING)
-+		printk(KERN_CONT "  running task    ");
-+#ifdef CONFIG_DEBUG_STACK_USAGE
-+	free = stack_not_used(p);
-+#endif
-+	ppid = 0;
-+	rcu_read_lock();
-+	if (pid_alive(p))
-+		ppid = task_pid_nr(rcu_dereference(p->real_parent));
-+	rcu_read_unlock();
-+	printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free,
-+		task_pid_nr(p), ppid,
-+		(unsigned long)task_thread_info(p)->flags);
-+
-+	print_worker_info(KERN_INFO, p);
-+	show_stack(p, NULL);
-+	put_task_stack(p);
-+}
-+EXPORT_SYMBOL_GPL(sched_show_task);
-+
-+static inline bool
-+state_filter_match(unsigned long state_filter, struct task_struct *p)
-+{
-+	/* no filter, everything matches */
-+	if (!state_filter)
-+		return true;
-+
-+	/* filter, but doesn't match */
-+	if (!(p->state & state_filter))
-+		return false;
-+
-+	/*
-+	 * When looking for TASK_UNINTERRUPTIBLE skip TASK_IDLE (allows
-+	 * TASK_KILLABLE).
-+	 */
-+	if (state_filter == TASK_UNINTERRUPTIBLE && p->state == TASK_IDLE)
-+		return false;
-+
-+	return true;
-+}
-+
-+
-+void show_state_filter(unsigned long state_filter)
-+{
-+	struct task_struct *g, *p;
-+
-+#if BITS_PER_LONG == 32
-+	printk(KERN_INFO
-+		"  task                PC stack   pid father\n");
-+#else
-+	printk(KERN_INFO
-+		"  task                        PC stack   pid father\n");
-+#endif
-+	rcu_read_lock();
-+	for_each_process_thread(g, p) {
-+		/*
-+		 * reset the NMI-timeout, listing all files on a slow
-+		 * console might take a lot of time:
-+		 * Also, reset softlockup watchdogs on all CPUs, because
-+		 * another CPU might be blocked waiting for us to process
-+		 * an IPI.
-+		 */
-+		touch_nmi_watchdog();
-+		touch_all_softlockup_watchdogs();
-+		if (state_filter_match(state_filter, p))
-+			sched_show_task(p);
-+	}
-+
-+#ifdef CONFIG_SCHED_DEBUG
-+	/* TODO: Alt schedule FW should support this
-+	if (!state_filter)
-+		sysrq_sched_debug_show();
-+	*/
-+#endif
-+	rcu_read_unlock();
-+	/*
-+	 * Only show locks if all tasks are dumped:
-+	 */
-+	if (!state_filter)
-+		debug_show_all_locks();
-+}
-+
-+void dump_cpu_task(int cpu)
-+{
-+	pr_info("Task dump for CPU %d:\n", cpu);
-+	sched_show_task(cpu_curr(cpu));
-+}
-+
-+/**
-+ * init_idle - set up an idle thread for a given CPU
-+ * @idle: task in question
-+ * @cpu: CPU the idle task belongs to
-+ *
-+ * NOTE: this function does not set the idle thread's NEED_RESCHED
-+ * flag, to make booting more robust.
-+ */
-+void init_idle(struct task_struct *idle, int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	__sched_fork(0, idle);
-+
-+	raw_spin_lock_irqsave(&idle->pi_lock, flags);
-+	raw_spin_lock(&rq->lock);
-+	update_rq_clock(rq);
-+
-+	idle->last_ran = rq->clock_task;
-+	idle->state = TASK_RUNNING;
-+	idle->flags |= PF_IDLE;
-+	sched_queue_init_idle(rq, idle);
-+
-+	kasan_unpoison_task_stack(idle);
-+
-+#ifdef CONFIG_SMP
-+	/*
-+	 * It's possible that init_idle() gets called multiple times on a task,
-+	 * in that case do_set_cpus_allowed() will not do the right thing.
-+	 *
-+	 * And since this is boot we can forgo the serialisation.
-+	 */
-+	set_cpus_allowed_common(idle, cpumask_of(cpu));
-+#endif
-+
-+	/* Silence PROVE_RCU */
-+	rcu_read_lock();
-+	__set_task_cpu(idle, cpu);
-+	rcu_read_unlock();
-+
-+	rq->idle = idle;
-+	rcu_assign_pointer(rq->curr, idle);
-+	idle->on_cpu = 1;
-+
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock_irqrestore(&idle->pi_lock, flags);
-+
-+	/* Set the preempt count _outside_ the spinlocks! */
-+	init_idle_preempt_count(idle, cpu);
-+
-+	ftrace_graph_init_idle_task(idle, cpu);
-+	vtime_init_idle(idle, cpu);
-+#ifdef CONFIG_SMP
-+	sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
-+#endif
-+}
-+
-+#ifdef CONFIG_SMP
-+
-+int cpuset_cpumask_can_shrink(const struct cpumask __maybe_unused *cur,
-+			      const struct cpumask __maybe_unused *trial)
-+{
-+	return 1;
-+}
-+
-+int task_can_attach(struct task_struct *p,
-+		    const struct cpumask *cs_cpus_allowed)
-+{
-+	int ret = 0;
-+
-+	/*
-+	 * Kthreads which disallow setaffinity shouldn't be moved
-+	 * to a new cpuset; we don't want to change their CPU
-+	 * affinity and isolating such threads by their set of
-+	 * allowed nodes is unnecessary.  Thus, cpusets are not
-+	 * applicable for such threads.  This prevents checking for
-+	 * success of set_cpus_allowed_ptr() on all attached tasks
-+	 * before cpus_mask may be changed.
-+	 */
-+	if (p->flags & PF_NO_SETAFFINITY)
-+		ret = -EINVAL;
-+
-+	return ret;
-+}
-+
-+bool sched_smp_initialized __read_mostly;
-+
-+#ifdef CONFIG_HOTPLUG_CPU
-+/*
-+ * Ensures that the idle task is using init_mm right before its CPU goes
-+ * offline.
-+ */
-+void idle_task_exit(void)
-+{
-+	struct mm_struct *mm = current->active_mm;
-+
-+	BUG_ON(current != this_rq()->idle);
-+
-+	if (mm != &init_mm) {
-+		switch_mm(mm, &init_mm, current);
-+		finish_arch_post_lock_switch();
-+	}
-+
-+	/* finish_cpu(), as ran on the BP, will clean up the active_mm state */
-+}
-+
-+/*
-+ * Migrate all tasks from the rq, sleeping tasks will be migrated by
-+ * try_to_wake_up()->select_task_rq().
-+ *
-+ * Called with rq->lock held even though we'er in stop_machine() and
-+ * there's no concurrency possible, we hold the required locks anyway
-+ * because of lock validation efforts.
-+ */
-+static void migrate_tasks(struct rq *dead_rq)
-+{
-+	struct rq *rq = dead_rq;
-+	struct task_struct *p, *stop = rq->stop;
-+	int count = 0;
-+
-+	/*
-+	 * Fudge the rq selection such that the below task selection loop
-+	 * doesn't get stuck on the currently eligible stop task.
-+	 *
-+	 * We're currently inside stop_machine() and the rq is either stuck
-+	 * in the stop_machine_cpu_stop() loop, or we're executing this code,
-+	 * either way we should never end up calling schedule() until we're
-+	 * done here.
-+	 */
-+	rq->stop = NULL;
-+
-+	p = sched_rq_first_task(rq);
-+	while (p != rq->idle) {
-+		int dest_cpu;
-+
-+		/* skip the running task */
-+		if (task_running(p) || 1 == p->nr_cpus_allowed) {
-+			p = sched_rq_next_task(p, rq);
-+			continue;
-+		}
-+
-+		/*
-+		 * Rules for changing task_struct::cpus_allowed are holding
-+		 * both pi_lock and rq->lock, such that holding either
-+		 * stabilizes the mask.
-+		 *
-+		 * Drop rq->lock is not quite as disastrous as it usually is
-+		 * because !cpu_active at this point, which means load-balance
-+		 * will not interfere. Also, stop-machine.
-+		 */
-+		raw_spin_unlock(&rq->lock);
-+		raw_spin_lock(&p->pi_lock);
-+		raw_spin_lock(&rq->lock);
-+
-+		/*
-+		 * Since we're inside stop-machine, _nothing_ should have
-+		 * changed the task, WARN if weird stuff happened, because in
-+		 * that case the above rq->lock drop is a fail too.
-+		 */
-+		if (WARN_ON(task_rq(p) != rq || !task_on_rq_queued(p))) {
-+			raw_spin_unlock(&p->pi_lock);
-+			p = sched_rq_next_task(p, rq);
-+			continue;
-+		}
-+
-+		count++;
-+		/* Find suitable destination for @next, with force if needed. */
-+		dest_cpu = select_fallback_rq(dead_rq->cpu, p);
-+		rq = __migrate_task(rq, p, dest_cpu);
-+		raw_spin_unlock(&rq->lock);
-+		raw_spin_unlock(&p->pi_lock);
-+
-+		rq = dead_rq;
-+		raw_spin_lock(&rq->lock);
-+		/* Check queued task all over from the header again */
-+		p = sched_rq_first_task(rq);
-+	}
-+
-+	rq->stop = stop;
-+}
-+
-+static void set_rq_offline(struct rq *rq)
-+{
-+	if (rq->online)
-+		rq->online = false;
-+}
-+#endif /* CONFIG_HOTPLUG_CPU */
-+
-+static void set_rq_online(struct rq *rq)
-+{
-+	if (!rq->online)
-+		rq->online = true;
-+}
-+
-+/*
-+ * used to mark begin/end of suspend/resume:
-+ */
-+static int num_cpus_frozen;
-+
-+/*
-+ * Update cpusets according to cpu_active mask.  If cpusets are
-+ * disabled, cpuset_update_active_cpus() becomes a simple wrapper
-+ * around partition_sched_domains().
-+ *
-+ * If we come here as part of a suspend/resume, don't touch cpusets because we
-+ * want to restore it back to its original state upon resume anyway.
-+ */
-+static void cpuset_cpu_active(void)
-+{
-+	if (cpuhp_tasks_frozen) {
-+		/*
-+		 * num_cpus_frozen tracks how many CPUs are involved in suspend
-+		 * resume sequence. As long as this is not the last online
-+		 * operation in the resume sequence, just build a single sched
-+		 * domain, ignoring cpusets.
-+		 */
-+		partition_sched_domains(1, NULL, NULL);
-+		if (--num_cpus_frozen)
-+			return;
-+		/*
-+		 * This is the last CPU online operation. So fall through and
-+		 * restore the original sched domains by considering the
-+		 * cpuset configurations.
-+		 */
-+		cpuset_force_rebuild();
-+	}
-+
-+	cpuset_update_active_cpus();
-+}
-+
-+static int cpuset_cpu_inactive(unsigned int cpu)
-+{
-+	if (!cpuhp_tasks_frozen) {
-+		cpuset_update_active_cpus();
-+	} else {
-+		num_cpus_frozen++;
-+		partition_sched_domains(1, NULL, NULL);
-+	}
-+	return 0;
-+}
-+
-+int sched_cpu_activate(unsigned int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+#ifdef CONFIG_SCHED_SMT
-+	/*
-+	 * When going up, increment the number of cores with SMT present.
-+	 */
-+	if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
-+		static_branch_inc_cpuslocked(&sched_smt_present);
-+#endif
-+	set_cpu_active(cpu, true);
-+
-+	if (sched_smp_initialized)
-+		cpuset_cpu_active();
-+
-+	/*
-+	 * Put the rq online, if not already. This happens:
-+	 *
-+	 * 1) In the early boot process, because we build the real domains
-+	 *    after all cpus have been brought up.
-+	 *
-+	 * 2) At runtime, if cpuset_cpu_active() fails to rebuild the
-+	 *    domains.
-+	 */
-+	raw_spin_lock_irqsave(&rq->lock, flags);
-+	set_rq_online(rq);
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+
-+	return 0;
-+}
-+
-+int sched_cpu_deactivate(unsigned int cpu)
-+{
-+	int ret;
-+
-+	set_cpu_active(cpu, false);
-+	/*
-+	 * We've cleared cpu_active_mask, wait for all preempt-disabled and RCU
-+	 * users of this state to go away such that all new such users will
-+	 * observe it.
-+	 *
-+	 * Do sync before park smpboot threads to take care the rcu boost case.
-+	 */
-+	synchronize_rcu();
-+
-+#ifdef CONFIG_SCHED_SMT
-+	/*
-+	 * When going down, decrement the number of cores with SMT present.
-+	 */
-+	if (cpumask_weight(cpu_smt_mask(cpu)) == 2) {
-+		static_branch_dec_cpuslocked(&sched_smt_present);
-+		if (!static_branch_likely(&sched_smt_present))
-+			cpumask_clear(&sched_sg_idle_mask);
-+	}
-+#endif
-+
-+	if (!sched_smp_initialized)
-+		return 0;
-+
-+	ret = cpuset_cpu_inactive(cpu);
-+	if (ret) {
-+		set_cpu_active(cpu, true);
-+		return ret;
-+	}
-+	return 0;
-+}
-+
-+static void sched_rq_cpu_starting(unsigned int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	rq->calc_load_update = calc_load_update;
-+}
-+
-+int sched_cpu_starting(unsigned int cpu)
-+{
-+	sched_rq_cpu_starting(cpu);
-+	sched_tick_start(cpu);
-+	return 0;
-+}
-+
-+#ifdef CONFIG_HOTPLUG_CPU
-+int sched_cpu_dying(unsigned int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	/* Handle pending wakeups and then migrate everything off */
-+	sched_ttwu_pending();
-+
-+	sched_tick_stop(cpu);
-+	raw_spin_lock_irqsave(&rq->lock, flags);
-+	set_rq_offline(rq);
-+	migrate_tasks(rq);
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+
-+	hrtick_clear(rq);
-+	return 0;
-+}
-+#endif
-+
-+#ifdef CONFIG_SMP
-+static void sched_init_topology_cpumask_early(void)
-+{
-+	int cpu, level;
-+	cpumask_t *tmp;
-+
-+	for_each_possible_cpu(cpu) {
-+		for (level = 0; level < NR_CPU_AFFINITY_CHK_LEVEL; level++) {
-+			tmp = &(per_cpu(sched_cpu_affinity_masks, cpu)[level]);
-+			cpumask_copy(tmp, cpu_possible_mask);
-+			cpumask_clear_cpu(cpu, tmp);
-+		}
-+		per_cpu(sched_cpu_llc_mask, cpu) =
-+			&(per_cpu(sched_cpu_affinity_masks, cpu)[0]);
-+		per_cpu(sched_cpu_affinity_end_mask, cpu) =
-+			&(per_cpu(sched_cpu_affinity_masks, cpu)[1]);
-+		/*per_cpu(sd_llc_id, cpu) = cpu;*/
-+	}
-+}
-+
-+#define TOPOLOGY_CPUMASK(name, mask, last) \
-+	if (cpumask_and(chk, chk, mask))					\
-+		printk(KERN_INFO "sched: cpu#%02d affinity mask: 0x%08lx - "#name,\
-+		       cpu, (chk++)->bits[0]);					\
-+	if (!last)								\
-+		cpumask_complement(chk, mask)
-+
-+static void sched_init_topology_cpumask(void)
-+{
-+	int cpu;
-+	cpumask_t *chk;
-+
-+	for_each_online_cpu(cpu) {
-+		/* take chance to reset time slice for idle tasks */
-+		cpu_rq(cpu)->idle->time_slice = sched_timeslice_ns;
-+
-+		chk = &(per_cpu(sched_cpu_affinity_masks, cpu)[0]);
-+
-+		cpumask_complement(chk, cpumask_of(cpu));
-+#ifdef CONFIG_SCHED_SMT
-+		TOPOLOGY_CPUMASK(smt, topology_sibling_cpumask(cpu), false);
-+#endif
-+		per_cpu(sd_llc_id, cpu) = cpumask_first(cpu_coregroup_mask(cpu));
-+		per_cpu(sched_cpu_llc_mask, cpu) = chk;
-+		TOPOLOGY_CPUMASK(coregroup, cpu_coregroup_mask(cpu), false);
-+
-+		TOPOLOGY_CPUMASK(core, topology_core_cpumask(cpu), false);
-+
-+		TOPOLOGY_CPUMASK(others, cpu_online_mask, true);
-+
-+		per_cpu(sched_cpu_affinity_end_mask, cpu) = chk;
-+		printk(KERN_INFO "sched: cpu#%02d llc_id = %d, llc_mask idx = %d\n",
-+		       cpu, per_cpu(sd_llc_id, cpu),
-+		       (int) (per_cpu(sched_cpu_llc_mask, cpu) -
-+			      &(per_cpu(sched_cpu_affinity_masks, cpu)[0])));
-+	}
-+}
-+#endif
-+
-+void __init sched_init_smp(void)
-+{
-+	/* Move init over to a non-isolated CPU */
-+	if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0)
-+		BUG();
-+
-+	sched_init_topology_cpumask();
-+
-+	sched_smp_initialized = true;
-+}
-+#else
-+void __init sched_init_smp(void)
-+{
-+	cpu_rq(0)->idle->time_slice = sched_timeslice_ns;
-+}
-+#endif /* CONFIG_SMP */
-+
-+int in_sched_functions(unsigned long addr)
-+{
-+	return in_lock_functions(addr) ||
-+		(addr >= (unsigned long)__sched_text_start
-+		&& addr < (unsigned long)__sched_text_end);
-+}
-+
-+#ifdef CONFIG_CGROUP_SCHED
-+/* task group related information */
-+struct task_group {
-+	struct cgroup_subsys_state css;
-+
-+	struct rcu_head rcu;
-+	struct list_head list;
-+
-+	struct task_group *parent;
-+	struct list_head siblings;
-+	struct list_head children;
-+};
-+
-+/*
-+ * Default task group.
-+ * Every task in system belongs to this group at bootup.
-+ */
-+struct task_group root_task_group;
-+LIST_HEAD(task_groups);
-+
-+/* Cacheline aligned slab cache for task_group */
-+static struct kmem_cache *task_group_cache __read_mostly;
-+#endif /* CONFIG_CGROUP_SCHED */
-+
-+void __init sched_init(void)
-+{
-+	int i;
-+	struct rq *rq;
-+
-+	printk(KERN_INFO ALT_SCHED_VERSION_MSG);
-+
-+	wait_bit_init();
-+
-+#ifdef CONFIG_SMP
-+	for (i = 0; i < SCHED_BITS; i++)
-+		cpumask_copy(&sched_rq_watermark[i], cpu_present_mask);
-+#endif
-+
-+#ifdef CONFIG_CGROUP_SCHED
-+	task_group_cache = KMEM_CACHE(task_group, 0);
-+
-+	list_add(&root_task_group.list, &task_groups);
-+	INIT_LIST_HEAD(&root_task_group.children);
-+	INIT_LIST_HEAD(&root_task_group.siblings);
-+#endif /* CONFIG_CGROUP_SCHED */
-+	for_each_possible_cpu(i) {
-+		rq = cpu_rq(i);
-+
-+		sched_queue_init(rq);
-+		rq->watermark = IDLE_WM;
-+		rq->skip = NULL;
-+
-+		raw_spin_lock_init(&rq->lock);
-+		rq->nr_running = rq->nr_uninterruptible = 0;
-+		rq->calc_load_active = 0;
-+		rq->calc_load_update = jiffies + LOAD_FREQ;
-+#ifdef CONFIG_SMP
-+		rq->online = false;
-+		rq->cpu = i;
-+
-+#ifdef CONFIG_SCHED_SMT
-+		rq->active_balance = 0;
-+#endif
-+#endif
-+		rq->nr_switches = 0;
-+		atomic_set(&rq->nr_iowait, 0);
-+		hrtick_rq_init(rq);
-+	}
-+#ifdef CONFIG_SMP
-+	/* Set rq->online for cpu 0 */
-+	cpu_rq(0)->online = true;
-+#endif
-+	/*
-+	 * The boot idle thread does lazy MMU switching as well:
-+	 */
-+	mmgrab(&init_mm);
-+	enter_lazy_tlb(&init_mm, current);
-+
-+	/*
-+	 * Make us the idle thread. Technically, schedule() should not be
-+	 * called from this thread, however somewhere below it might be,
-+	 * but because we are the idle thread, we just pick up running again
-+	 * when this runqueue becomes "idle".
-+	 */
-+	init_idle(current, smp_processor_id());
-+
-+	calc_load_update = jiffies + LOAD_FREQ;
-+
-+#ifdef CONFIG_SMP
-+	idle_thread_set_boot_cpu();
-+
-+	sched_init_topology_cpumask_early();
-+#endif /* SMP */
-+
-+	init_schedstats();
-+
-+	psi_init();
-+}
-+
-+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-+static inline int preempt_count_equals(int preempt_offset)
-+{
-+	int nested = preempt_count() + rcu_preempt_depth();
-+
-+	return (nested == preempt_offset);
-+}
-+
-+void __might_sleep(const char *file, int line, int preempt_offset)
-+{
-+	/*
-+	 * Blocking primitives will set (and therefore destroy) current->state,
-+	 * since we will exit with TASK_RUNNING make sure we enter with it,
-+	 * otherwise we will destroy state.
-+	 */
-+	WARN_ONCE(current->state != TASK_RUNNING && current->task_state_change,
-+			"do not call blocking ops when !TASK_RUNNING; "
-+			"state=%lx set at [<%p>] %pS\n",
-+			current->state,
-+			(void *)current->task_state_change,
-+			(void *)current->task_state_change);
-+
-+	___might_sleep(file, line, preempt_offset);
-+}
-+EXPORT_SYMBOL(__might_sleep);
-+
-+void ___might_sleep(const char *file, int line, int preempt_offset)
-+{
-+	/* Ratelimiting timestamp: */
-+	static unsigned long prev_jiffy;
-+
-+	unsigned long preempt_disable_ip;
-+
-+	/* WARN_ON_ONCE() by default, no rate limit required: */
-+	rcu_sleep_check();
-+
-+	if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
-+	     !is_idle_task(current) && !current->non_block_count) ||
-+	    system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING ||
-+	    oops_in_progress)
-+		return;
-+	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
-+		return;
-+	prev_jiffy = jiffies;
-+
-+	/* Save this before calling printk(), since that will clobber it: */
-+	preempt_disable_ip = get_preempt_disable_ip(current);
-+
-+	printk(KERN_ERR
-+		"BUG: sleeping function called from invalid context at %s:%d\n",
-+			file, line);
-+	printk(KERN_ERR
-+		"in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n",
-+			in_atomic(), irqs_disabled(), current->non_block_count,
-+			current->pid, current->comm);
-+
-+	if (task_stack_end_corrupted(current))
-+		printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");
-+
-+	debug_show_held_locks(current);
-+	if (irqs_disabled())
-+		print_irqtrace_events(current);
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	if (!preempt_count_equals(preempt_offset)) {
-+		pr_err("Preemption disabled at:");
-+		print_ip_sym(preempt_disable_ip);
-+		pr_cont("\n");
-+	}
-+#endif
-+	dump_stack();
-+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+}
-+EXPORT_SYMBOL(___might_sleep);
-+
-+void __cant_sleep(const char *file, int line, int preempt_offset)
-+{
-+	static unsigned long prev_jiffy;
-+
-+	if (irqs_disabled())
-+		return;
-+
-+	if (!IS_ENABLED(CONFIG_PREEMPT_COUNT))
-+		return;
-+
-+	if (preempt_count() > preempt_offset)
-+		return;
-+
-+	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
-+		return;
-+	prev_jiffy = jiffies;
-+
-+	printk(KERN_ERR "BUG: assuming atomic context at %s:%d\n", file, line);
-+	printk(KERN_ERR "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
-+			in_atomic(), irqs_disabled(),
-+			current->pid, current->comm);
-+
-+	debug_show_held_locks(current);
-+	dump_stack();
-+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+}
-+EXPORT_SYMBOL_GPL(__cant_sleep);
-+#endif
-+
-+#ifdef CONFIG_MAGIC_SYSRQ
-+void normalize_rt_tasks(void)
-+{
-+	struct task_struct *g, *p;
-+	struct sched_attr attr = {
-+		.sched_policy = SCHED_NORMAL,
-+	};
-+
-+	read_lock(&tasklist_lock);
-+	for_each_process_thread(g, p) {
-+		/*
-+		 * Only normalize user tasks:
-+		 */
-+		if (p->flags & PF_KTHREAD)
-+			continue;
-+
-+		if (!rt_task(p)) {
-+			/*
-+			 * Renice negative nice level userspace
-+			 * tasks back to 0:
-+			 */
-+			if (task_nice(p) < 0)
-+				set_user_nice(p, 0);
-+			continue;
-+		}
-+
-+		__sched_setscheduler(p, &attr, false, false);
-+	}
-+	read_unlock(&tasklist_lock);
-+}
-+#endif /* CONFIG_MAGIC_SYSRQ */
-+
-+#if defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB)
-+/*
-+ * These functions are only useful for the IA64 MCA handling, or kdb.
-+ *
-+ * They can only be called when the whole system has been
-+ * stopped - every CPU needs to be quiescent, and no scheduling
-+ * activity can take place. Using them for anything else would
-+ * be a serious bug, and as a result, they aren't even visible
-+ * under any other configuration.
-+ */
-+
-+/**
-+ * curr_task - return the current task for a given CPU.
-+ * @cpu: the processor in question.
-+ *
-+ * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
-+ *
-+ * Return: The current task for @cpu.
-+ */
-+struct task_struct *curr_task(int cpu)
-+{
-+	return cpu_curr(cpu);
-+}
-+
-+#endif /* defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB) */
-+
-+#ifdef CONFIG_IA64
-+/**
-+ * ia64_set_curr_task - set the current task for a given CPU.
-+ * @cpu: the processor in question.
-+ * @p: the task pointer to set.
-+ *
-+ * Description: This function must only be used when non-maskable interrupts
-+ * are serviced on a separate stack.  It allows the architecture to switch the
-+ * notion of the current task on a CPU in a non-blocking manner.  This function
-+ * must be called with all CPU's synchronised, and interrupts disabled, the
-+ * and caller must save the original value of the current task (see
-+ * curr_task() above) and restore that value before reenabling interrupts and
-+ * re-starting the system.
-+ *
-+ * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
-+ */
-+void ia64_set_curr_task(int cpu, struct task_struct *p)
-+{
-+	cpu_curr(cpu) = p;
-+}
-+
-+#endif
-+
-+#ifdef CONFIG_CGROUP_SCHED
-+static void sched_free_group(struct task_group *tg)
-+{
-+	kmem_cache_free(task_group_cache, tg);
-+}
-+
-+/* allocate runqueue etc for a new task group */
-+struct task_group *sched_create_group(struct task_group *parent)
-+{
-+	struct task_group *tg;
-+
-+	tg = kmem_cache_alloc(task_group_cache, GFP_KERNEL | __GFP_ZERO);
-+	if (!tg)
-+		return ERR_PTR(-ENOMEM);
-+
-+	return tg;
-+}
-+
-+void sched_online_group(struct task_group *tg, struct task_group *parent)
-+{
-+}
-+
-+/* rcu callback to free various structures associated with a task group */
-+static void sched_free_group_rcu(struct rcu_head *rhp)
-+{
-+	/* Now it should be safe to free those cfs_rqs */
-+	sched_free_group(container_of(rhp, struct task_group, rcu));
-+}
-+
-+void sched_destroy_group(struct task_group *tg)
-+{
-+	/* Wait for possible concurrent references to cfs_rqs complete */
-+	call_rcu(&tg->rcu, sched_free_group_rcu);
-+}
-+
-+void sched_offline_group(struct task_group *tg)
-+{
-+}
-+
-+static inline struct task_group *css_tg(struct cgroup_subsys_state *css)
-+{
-+	return css ? container_of(css, struct task_group, css) : NULL;
-+}
-+
-+static struct cgroup_subsys_state *
-+cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
-+{
-+	struct task_group *parent = css_tg(parent_css);
-+	struct task_group *tg;
-+
-+	if (!parent) {
-+		/* This is early initialization for the top cgroup */
-+		return &root_task_group.css;
-+	}
-+
-+	tg = sched_create_group(parent);
-+	if (IS_ERR(tg))
-+		return ERR_PTR(-ENOMEM);
-+	return &tg->css;
-+}
-+
-+/* Expose task group only after completing cgroup initialization */
-+static int cpu_cgroup_css_online(struct cgroup_subsys_state *css)
-+{
-+	struct task_group *tg = css_tg(css);
-+	struct task_group *parent = css_tg(css->parent);
-+
-+	if (parent)
-+		sched_online_group(tg, parent);
-+	return 0;
-+}
-+
-+static void cpu_cgroup_css_released(struct cgroup_subsys_state *css)
-+{
-+	struct task_group *tg = css_tg(css);
-+
-+	sched_offline_group(tg);
-+}
-+
-+static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
-+{
-+	struct task_group *tg = css_tg(css);
-+
-+	/*
-+	 * Relies on the RCU grace period between css_released() and this.
-+	 */
-+	sched_free_group(tg);
-+}
-+
-+static void cpu_cgroup_fork(struct task_struct *task)
-+{
-+}
-+
-+static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
-+{
-+	return 0;
-+}
-+
-+static void cpu_cgroup_attach(struct cgroup_taskset *tset)
-+{
-+}
-+
-+static struct cftype cpu_legacy_files[] = {
-+	{ }	/* Terminate */
-+};
-+
-+
-+static struct cftype cpu_files[] = {
-+	{ }	/* terminate */
-+};
-+
-+static int cpu_extra_stat_show(struct seq_file *sf,
-+			       struct cgroup_subsys_state *css)
-+{
-+	return 0;
-+}
-+
-+struct cgroup_subsys cpu_cgrp_subsys = {
-+	.css_alloc	= cpu_cgroup_css_alloc,
-+	.css_online	= cpu_cgroup_css_online,
-+	.css_released	= cpu_cgroup_css_released,
-+	.css_free	= cpu_cgroup_css_free,
-+	.css_extra_stat_show = cpu_extra_stat_show,
-+	.fork		= cpu_cgroup_fork,
-+	.can_attach	= cpu_cgroup_can_attach,
-+	.attach		= cpu_cgroup_attach,
-+	.legacy_cftypes	= cpu_files,
-+	.legacy_cftypes	= cpu_legacy_files,
-+	.dfl_cftypes	= cpu_files,
-+	.early_init	= true,
-+	.threaded	= true,
-+};
-+#endif	/* CONFIG_CGROUP_SCHED */
-+
-+#undef CREATE_TRACE_POINTS
-diff --git a/kernel/sched/alt_debug.c b/kernel/sched/alt_debug.c
-new file mode 100644
-index 000000000000..835e6bb98dda
---- /dev/null
-+++ b/kernel/sched/alt_debug.c
-@@ -0,0 +1,31 @@
-+/*
-+ * kernel/sched/alt_debug.c
-+ *
-+ * Print the BMQ debugging details
-+ *
-+ * Author: Alfred Chen
-+ * Date  : 2020
-+ */
-+#include "sched.h"
-+
-+/*
-+ * This allows printing both to /proc/sched_debug and
-+ * to the console
-+ */
-+#define SEQ_printf(m, x...)			\
-+ do {						\
-+	if (m)					\
-+		seq_printf(m, x);		\
-+	else					\
-+		pr_cont(x);			\
-+ } while (0)
-+
-+void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
-+			  struct seq_file *m)
-+{
-+	SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, task_pid_nr_ns(p, ns),
-+						get_nr_threads(p));
-+}
-+
-+void proc_sched_set_task(struct task_struct *p)
-+{}
-diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h
-new file mode 100644
-index 000000000000..2b66983cce42
---- /dev/null
-+++ b/kernel/sched/alt_sched.h
-@@ -0,0 +1,527 @@
-+#ifndef ALT_SCHED_H
-+#define ALT_SCHED_H
-+
-+#include <linux/sched.h>
-+
-+#include <linux/sched/clock.h>
-+#include <linux/sched/cpufreq.h>
-+#include <linux/sched/cputime.h>
-+#include <linux/sched/debug.h>
-+#include <linux/sched/init.h>
-+#include <linux/sched/isolation.h>
-+#include <linux/sched/loadavg.h>
-+#include <linux/sched/mm.h>
-+#include <linux/sched/nohz.h>
-+#include <linux/sched/signal.h>
-+#include <linux/sched/stat.h>
-+#include <linux/sched/sysctl.h>
-+#include <linux/sched/task.h>
-+#include <linux/sched/topology.h>
-+#include <linux/sched/wake_q.h>
-+
-+#include <uapi/linux/sched/types.h>
-+
-+#include <linux/cgroup.h>
-+#include <linux/cpufreq.h>
-+#include <linux/cpuidle.h>
-+#include <linux/cpuset.h>
-+#include <linux/ctype.h>
-+#include <linux/kthread.h>
-+#include <linux/livepatch.h>
-+#include <linux/membarrier.h>
-+#include <linux/proc_fs.h>
-+#include <linux/psi.h>
-+#include <linux/slab.h>
-+#include <linux/stop_machine.h>
-+#include <linux/suspend.h>
-+#include <linux/swait.h>
-+#include <linux/syscalls.h>
-+#include <linux/tsacct_kern.h>
-+
-+#include <asm/tlb.h>
-+
-+#ifdef CONFIG_PARAVIRT
-+# include <asm/paravirt.h>
-+#endif
-+
-+#include "cpupri.h"
-+
-+#ifdef CONFIG_SCHED_BMQ
-+#include "bmq.h"
-+#endif
-+
-+/* task_struct::on_rq states: */
-+#define TASK_ON_RQ_QUEUED	1
-+#define TASK_ON_RQ_MIGRATING	2
-+
-+static inline int task_on_rq_queued(struct task_struct *p)
-+{
-+	return p->on_rq == TASK_ON_RQ_QUEUED;
-+}
-+
-+static inline int task_on_rq_migrating(struct task_struct *p)
-+{
-+	return READ_ONCE(p->on_rq) == TASK_ON_RQ_MIGRATING;
-+}
-+
-+/*
-+ * wake flags
-+ */
-+#define WF_SYNC		0x01		/* waker goes to sleep after wakeup */
-+#define WF_FORK		0x02		/* child wakeup after fork */
-+#define WF_MIGRATED	0x04		/* internal use, task got migrated */
-+
-+/*
-+ * This is the main, per-CPU runqueue data structure.
-+ * This data should only be modified by the local cpu.
-+ */
-+struct rq {
-+	/* runqueue lock: */
-+	raw_spinlock_t lock;
-+
-+	struct task_struct __rcu *curr;
-+	struct task_struct *idle, *stop, *skip;
-+	struct mm_struct *prev_mm;
-+
-+#ifdef CONFIG_SCHED_BMQ
-+	struct bmq queue;
-+#endif
-+	unsigned long watermark;
-+
-+	/* switch count */
-+	u64 nr_switches;
-+
-+	atomic_t nr_iowait;
-+
-+#ifdef CONFIG_MEMBARRIER
-+	int membarrier_state;
-+#endif
-+
-+#ifdef CONFIG_SMP
-+	int cpu;		/* cpu of this runqueue */
-+	bool online;
-+
-+#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
-+	struct sched_avg	avg_irq;
-+#endif
-+
-+#ifdef CONFIG_SCHED_SMT
-+	int active_balance;
-+	struct cpu_stop_work active_balance_work;
-+#endif
-+#endif /* CONFIG_SMP */
-+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-+	u64 prev_irq_time;
-+#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
-+#ifdef CONFIG_PARAVIRT
-+	u64 prev_steal_time;
-+#endif /* CONFIG_PARAVIRT */
-+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
-+	u64 prev_steal_time_rq;
-+#endif /* CONFIG_PARAVIRT_TIME_ACCOUNTING */
-+
-+	/* calc_load related fields */
-+	unsigned long calc_load_update;
-+	long calc_load_active;
-+
-+	u64 clock, last_tick;
-+	u64 last_ts_switch;
-+	u64 clock_task;
-+
-+	unsigned long nr_running;
-+	unsigned long nr_uninterruptible;
-+
-+#ifdef CONFIG_SCHED_HRTICK
-+#ifdef CONFIG_SMP
-+	call_single_data_t hrtick_csd;
-+#endif
-+	struct hrtimer hrtick_timer;
-+#endif
-+
-+#ifdef CONFIG_SCHEDSTATS
-+
-+	/* latency stats */
-+	struct sched_info rq_sched_info;
-+	unsigned long long rq_cpu_time;
-+	/* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
-+
-+	/* sys_sched_yield() stats */
-+	unsigned int yld_count;
-+
-+	/* schedule() stats */
-+	unsigned int sched_switch;
-+	unsigned int sched_count;
-+	unsigned int sched_goidle;
-+
-+	/* try_to_wake_up() stats */
-+	unsigned int ttwu_count;
-+	unsigned int ttwu_local;
-+#endif /* CONFIG_SCHEDSTATS */
-+
-+#ifdef CONFIG_SMP
-+	struct llist_head	wake_list;
-+#endif
-+
-+#ifdef CONFIG_CPU_IDLE
-+	/* Must be inspected within a rcu lock section */
-+	struct cpuidle_state *idle_state;
-+#endif
-+};
-+
-+extern unsigned long calc_load_update;
-+extern atomic_long_t calc_load_tasks;
-+
-+extern void calc_global_load_tick(struct rq *this_rq);
-+extern long calc_load_fold_active(struct rq *this_rq, long adjust);
-+
-+DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
-+#define cpu_rq(cpu)		(&per_cpu(runqueues, (cpu)))
-+#define this_rq()		this_cpu_ptr(&runqueues)
-+#define task_rq(p)		cpu_rq(task_cpu(p))
-+#define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
-+#define raw_rq()		raw_cpu_ptr(&runqueues)
-+
-+#ifdef CONFIG_SMP
-+#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
-+void register_sched_domain_sysctl(void);
-+void unregister_sched_domain_sysctl(void);
-+#else
-+static inline void register_sched_domain_sysctl(void)
-+{
-+}
-+static inline void unregister_sched_domain_sysctl(void)
-+{
-+}
-+#endif
-+
-+extern bool sched_smp_initialized;
-+
-+enum {
-+	BASE_CPU_AFFINITY_CHK_LEVEL = 1,
-+#ifdef CONFIG_SCHED_SMT
-+	SMT_CPU_AFFINITY_CHK_LEVEL_SPACE_HOLDER,
-+#endif
-+#ifdef CONFIG_SCHED_MC
-+	MC_CPU_AFFINITY_CHK_LEVEL_SPACE_HOLDER,
-+#endif
-+	NR_CPU_AFFINITY_CHK_LEVEL
-+};
-+
-+DECLARE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_CHK_LEVEL], sched_cpu_affinity_masks);
-+
-+static inline int __best_mask_cpu(int cpu, const cpumask_t *cpumask,
-+				  const cpumask_t *mask)
-+{
-+	while ((cpu = cpumask_any_and(cpumask, mask)) >= nr_cpu_ids)
-+		mask++;
-+	return cpu;
-+}
-+
-+static inline int best_mask_cpu(int cpu, const cpumask_t *cpumask)
-+{
-+	return cpumask_test_cpu(cpu, cpumask)? cpu :
-+		__best_mask_cpu(cpu, cpumask, &(per_cpu(sched_cpu_affinity_masks, cpu)[0]));
-+}
-+
-+extern void sched_ttwu_pending(void);
-+#else  /* !CONFIG_SMP */
-+static inline void sched_ttwu_pending(void) { }
-+#endif /* CONFIG_SMP */
-+
-+#ifndef arch_scale_freq_tick
-+static __always_inline
-+void arch_scale_freq_tick(void)
-+{
-+}
-+#endif
-+
-+#ifndef arch_scale_freq_capacity
-+static __always_inline
-+unsigned long arch_scale_freq_capacity(int cpu)
-+{
-+	return SCHED_CAPACITY_SCALE;
-+}
-+#endif
-+
-+static inline u64 __rq_clock_broken(struct rq *rq)
-+{
-+	return READ_ONCE(rq->clock);
-+}
-+
-+static inline u64 rq_clock(struct rq *rq)
-+{
-+	/*
-+	 * Relax lockdep_assert_held() checking as in VRQ, call to
-+	 * sched_info_xxxx() may not held rq->lock
-+	 * lockdep_assert_held(&rq->lock);
-+	 */
-+	return rq->clock;
-+}
-+
-+static inline u64 rq_clock_task(struct rq *rq)
-+{
-+	/*
-+	 * Relax lockdep_assert_held() checking as in VRQ, call to
-+	 * sched_info_xxxx() may not held rq->lock
-+	 * lockdep_assert_held(&rq->lock);
-+	 */
-+	return rq->clock_task;
-+}
-+
-+/*
-+ * {de,en}queue flags:
-+ *
-+ * DEQUEUE_SLEEP  - task is no longer runnable
-+ * ENQUEUE_WAKEUP - task just became runnable
-+ *
-+ */
-+
-+#define DEQUEUE_SLEEP		0x01
-+
-+#define ENQUEUE_WAKEUP		0x01
-+
-+
-+/*
-+ * Below are scheduler API which using in other kernel code
-+ * It use the dummy rq_flags
-+ * ToDo : BMQ need to support these APIs for compatibility with mainline
-+ * scheduler code.
-+ */
-+struct rq_flags {
-+	unsigned long flags;
-+};
-+
-+struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
-+	__acquires(rq->lock);
-+
-+struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
-+	__acquires(p->pi_lock)
-+	__acquires(rq->lock);
-+
-+static inline void __task_rq_unlock(struct rq *rq, struct rq_flags *rf)
-+	__releases(rq->lock)
-+{
-+	raw_spin_unlock(&rq->lock);
-+}
-+
-+static inline void
-+task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
-+	__releases(rq->lock)
-+	__releases(p->pi_lock)
-+{
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
-+}
-+
-+static inline void
-+rq_unlock_irq(struct rq *rq, struct rq_flags *rf)
-+	__releases(rq->lock)
-+{
-+	raw_spin_unlock_irq(&rq->lock);
-+}
-+
-+static inline struct rq *
-+this_rq_lock_irq(struct rq_flags *rf)
-+	__acquires(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	local_irq_disable();
-+	rq = this_rq();
-+	raw_spin_lock(&rq->lock);
-+
-+	return rq;
-+}
-+
-+static inline int task_current(struct rq *rq, struct task_struct *p)
-+{
-+	return rq->curr == p;
-+}
-+
-+static inline bool task_running(struct task_struct *p)
-+{
-+	return p->on_cpu;
-+}
-+
-+extern struct static_key_false sched_schedstats;
-+
-+#ifdef CONFIG_CPU_IDLE
-+static inline void idle_set_state(struct rq *rq,
-+				  struct cpuidle_state *idle_state)
-+{
-+	rq->idle_state = idle_state;
-+}
-+
-+static inline struct cpuidle_state *idle_get_state(struct rq *rq)
-+{
-+	WARN_ON(!rcu_read_lock_held());
-+	return rq->idle_state;
-+}
-+#else
-+static inline void idle_set_state(struct rq *rq,
-+				  struct cpuidle_state *idle_state)
-+{
-+}
-+
-+static inline struct cpuidle_state *idle_get_state(struct rq *rq)
-+{
-+	return NULL;
-+}
-+#endif
-+
-+static inline int cpu_of(const struct rq *rq)
-+{
-+#ifdef CONFIG_SMP
-+	return rq->cpu;
-+#else
-+	return 0;
-+#endif
-+}
-+
-+#include "stats.h"
-+
-+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-+struct irqtime {
-+	u64			total;
-+	u64			tick_delta;
-+	u64			irq_start_time;
-+	struct u64_stats_sync	sync;
-+};
-+
-+DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
-+
-+/*
-+ * Returns the irqtime minus the softirq time computed by ksoftirqd.
-+ * Otherwise ksoftirqd's sum_exec_runtime is substracted its own runtime
-+ * and never move forward.
-+ */
-+static inline u64 irq_time_read(int cpu)
-+{
-+	struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu);
-+	unsigned int seq;
-+	u64 total;
-+
-+	do {
-+		seq = __u64_stats_fetch_begin(&irqtime->sync);
-+		total = irqtime->total;
-+	} while (__u64_stats_fetch_retry(&irqtime->sync, seq));
-+
-+	return total;
-+}
-+#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
-+
-+#ifdef CONFIG_CPU_FREQ
-+DECLARE_PER_CPU(struct update_util_data __rcu *, cpufreq_update_util_data);
-+
-+/**
-+ * cpufreq_update_util - Take a note about CPU utilization changes.
-+ * @rq: Runqueue to carry out the update for.
-+ * @flags: Update reason flags.
-+ *
-+ * This function is called by the scheduler on the CPU whose utilization is
-+ * being updated.
-+ *
-+ * It can only be called from RCU-sched read-side critical sections.
-+ *
-+ * The way cpufreq is currently arranged requires it to evaluate the CPU
-+ * performance state (frequency/voltage) on a regular basis to prevent it from
-+ * being stuck in a completely inadequate performance level for too long.
-+ * That is not guaranteed to happen if the updates are only triggered from CFS
-+ * and DL, though, because they may not be coming in if only RT tasks are
-+ * active all the time (or there are RT tasks only).
-+ *
-+ * As a workaround for that issue, this function is called periodically by the
-+ * RT sched class to trigger extra cpufreq updates to prevent it from stalling,
-+ * but that really is a band-aid.  Going forward it should be replaced with
-+ * solutions targeted more specifically at RT tasks.
-+ */
-+static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
-+{
-+	struct update_util_data *data;
-+
-+	data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data));
-+	if (data)
-+		data->func(data, rq_clock(rq), flags);
-+}
-+#else
-+static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
-+#endif /* CONFIG_CPU_FREQ */
-+
-+#ifdef CONFIG_NO_HZ_FULL
-+extern int __init sched_tick_offload_init(void);
-+#else
-+static inline int sched_tick_offload_init(void) { return 0; }
-+#endif
-+
-+#ifdef arch_scale_freq_capacity
-+#ifndef arch_scale_freq_invariant
-+#define arch_scale_freq_invariant()	(true)
-+#endif
-+#else /* arch_scale_freq_capacity */
-+#define arch_scale_freq_invariant()	(false)
-+#endif
-+
-+extern void schedule_idle(void);
-+
-+/*
-+ * !! For sched_setattr_nocheck() (kernel) only !!
-+ *
-+ * This is actually gross. :(
-+ *
-+ * It is used to make schedutil kworker(s) higher priority than SCHED_DEADLINE
-+ * tasks, but still be able to sleep. We need this on platforms that cannot
-+ * atomically change clock frequency. Remove once fast switching will be
-+ * available on such platforms.
-+ *
-+ * SUGOV stands for SchedUtil GOVernor.
-+ */
-+#define SCHED_FLAG_SUGOV	0x10000000
-+
-+#ifdef CONFIG_MEMBARRIER
-+/*
-+ * The scheduler provides memory barriers required by membarrier between:
-+ * - prior user-space memory accesses and store to rq->membarrier_state,
-+ * - store to rq->membarrier_state and following user-space memory accesses.
-+ * In the same way it provides those guarantees around store to rq->curr.
-+ */
-+static inline void membarrier_switch_mm(struct rq *rq,
-+					struct mm_struct *prev_mm,
-+					struct mm_struct *next_mm)
-+{
-+	int membarrier_state;
-+
-+	if (prev_mm == next_mm)
-+		return;
-+
-+	membarrier_state = atomic_read(&next_mm->membarrier_state);
-+	if (READ_ONCE(rq->membarrier_state) == membarrier_state)
-+		return;
-+
-+	WRITE_ONCE(rq->membarrier_state, membarrier_state);
-+}
-+#else
-+static inline void membarrier_switch_mm(struct rq *rq,
-+					struct mm_struct *prev_mm,
-+					struct mm_struct *next_mm)
-+{
-+}
-+#endif
-+
-+static inline int task_running_nice(struct task_struct *p)
-+{
-+	return (p->prio + p->boost_prio > DEFAULT_PRIO + MAX_PRIORITY_ADJ);
-+}
-+
-+#ifdef CONFIG_NUMA
-+extern int sched_numa_find_closest(const struct cpumask *cpus, int cpu);
-+#else
-+static inline int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
-+{
-+	return nr_cpu_ids;
-+}
-+#endif
-+
-+void swake_up_all_locked(struct swait_queue_head *q);
-+void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
-+
-+#endif /* ALT_SCHED_H */
-diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h
-new file mode 100644
-index 000000000000..4ce30c30bd3e
---- /dev/null
-+++ b/kernel/sched/bmq.h
-@@ -0,0 +1,14 @@
-+#ifndef BMQ_H
-+#define BMQ_H
-+
-+/* bits:
-+ * RT(0-99), Low prio adj range, nice width, high prio adj range, cpu idle task */
-+#define SCHED_BITS	(MAX_RT_PRIO + NICE_WIDTH + 2 * MAX_PRIORITY_ADJ + 1)
-+#define IDLE_TASK_SCHED_PRIO	(SCHED_BITS - 1)
-+
-+struct bmq {
-+	DECLARE_BITMAP(bitmap, SCHED_BITS);
-+	struct list_head heads[SCHED_BITS];
-+};
-+
-+#endif
-diff --git a/kernel/sched/bmq_imp.h b/kernel/sched/bmq_imp.h
-new file mode 100644
-index 000000000000..cb0fc0688a89
---- /dev/null
-+++ b/kernel/sched/bmq_imp.h
-@@ -0,0 +1,86 @@
-+#define ALT_SCHED_VERSION_MSG "sched/bmq: BMQ CPU Scheduler 5.7-r3 by Alfred Chen.\n"
-+
-+static inline void sched_queue_init(struct rq *rq)
-+{
-+	struct bmq *q = &rq->queue;
-+	int i;
-+
-+	bitmap_zero(q->bitmap, SCHED_BITS);
-+	for(i = 0; i < SCHED_BITS; i++)
-+		INIT_LIST_HEAD(&q->heads[i]);
-+}
-+
-+static inline void sched_queue_init_idle(struct rq *rq, struct task_struct *idle)
-+{
-+	struct bmq *q = &rq->queue;
-+
-+	idle->bmq_idx = IDLE_TASK_SCHED_PRIO;
-+	INIT_LIST_HEAD(&q->heads[idle->bmq_idx]);
-+	list_add(&idle->bmq_node, &q->heads[idle->bmq_idx]);
-+	set_bit(idle->bmq_idx, q->bitmap);
-+}
-+
-+/*
-+ * This routine used in bmq scheduler only which assume the idle task in the bmq
-+ */
-+static inline struct task_struct *sched_rq_first_task(struct rq *rq)
-+{
-+	unsigned long idx = find_first_bit(rq->queue.bitmap, SCHED_BITS);
-+	const struct list_head *head = &rq->queue.heads[idx];
-+
-+	return list_first_entry(head, struct task_struct, bmq_node);
-+}
-+
-+static inline struct task_struct *
-+sched_rq_next_task(struct task_struct *p, struct rq *rq)
-+{
-+	unsigned long idx = p->bmq_idx;
-+	struct list_head *head = &rq->queue.heads[idx];
-+
-+	if (list_is_last(&p->bmq_node, head)) {
-+		idx = find_next_bit(rq->queue.bitmap, SCHED_BITS, idx + 1);
-+		head = &rq->queue.heads[idx];
-+
-+		return list_first_entry(head, struct task_struct, bmq_node);
-+	}
-+
-+	return list_next_entry(p, bmq_node);
-+}
-+
-+#define __SCHED_DEQUEUE_TASK(p, rq, flags, func)	\
-+	psi_dequeue(p, flags & DEQUEUE_SLEEP);		\
-+	sched_info_dequeued(rq, p);			\
-+							\
-+	list_del(&p->bmq_node);				\
-+	if (list_empty(&rq->queue.heads[p->bmq_idx])) {	\
-+		clear_bit(p->bmq_idx, rq->queue.bitmap);\
-+		func;					\
-+	}
-+
-+#define __SCHED_ENQUEUE_TASK(p, rq, flags)				\
-+	sched_info_queued(rq, p);					\
-+	psi_enqueue(p, flags);						\
-+									\
-+	p->bmq_idx = task_sched_prio(p);				\
-+	list_add_tail(&p->bmq_node, &rq->queue.heads[p->bmq_idx]);	\
-+	set_bit(p->bmq_idx, rq->queue.bitmap)
-+
-+static inline void __requeue_task(struct task_struct *p, struct rq *rq)
-+{
-+	int idx = task_sched_prio(p);
-+
-+	list_del(&p->bmq_node);
-+	list_add_tail(&p->bmq_node, &rq->queue.heads[idx]);
-+	if (idx != p->bmq_idx) {
-+		if (list_empty(&rq->queue.heads[p->bmq_idx]))
-+			clear_bit(p->bmq_idx, rq->queue.bitmap);
-+		p->bmq_idx = idx;
-+		set_bit(p->bmq_idx, rq->queue.bitmap);
-+		update_sched_rq_watermark(rq);
-+	}
-+}
-+
-+static inline bool sched_task_need_requeue(struct task_struct *p)
-+{
-+	return (task_sched_prio(p) != p->bmq_idx);
-+}
-diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
-index 7fbaee24c824..0d7ad05b84fe 100644
---- a/kernel/sched/cpufreq_schedutil.c
-+++ b/kernel/sched/cpufreq_schedutil.c
-@@ -183,6 +183,7 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy,
- 	return cpufreq_driver_resolve_freq(policy, freq);
- }
- 
-+#ifndef CONFIG_SCHED_ALT
- /*
-  * This function computes an effective utilization for the given CPU, to be
-  * used for frequency selection given the linear relation: f = u * f_max.
-@@ -300,6 +301,13 @@ static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu)
- 
- 	return schedutil_cpu_util(sg_cpu->cpu, util, max, FREQUENCY_UTIL, NULL);
- }
-+#else /* CONFIG_SCHED_ALT */
-+static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu)
-+{
-+	sg_cpu->max = arch_scale_cpu_capacity(sg_cpu->cpu);
-+	return sg_cpu->max;
-+}
-+#endif
- 
- /**
-  * sugov_iowait_reset() - Reset the IO boost status of a CPU.
-@@ -443,7 +451,9 @@ static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; }
-  */
- static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu, struct sugov_policy *sg_policy)
- {
-+#ifndef CONFIG_SCHED_ALT
- 	if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_dl)
-+#endif
- 		sg_policy->limits_changed = true;
- }
- 
-@@ -686,6 +696,7 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy)
- 	}
- 
- 	ret = sched_setattr_nocheck(thread, &attr);
-+
- 	if (ret) {
- 		kthread_stop(thread);
- 		pr_warn("%s: failed to set SCHED_DEADLINE\n", __func__);
-@@ -916,6 +927,7 @@ static int __init sugov_register(void)
- core_initcall(sugov_register);
- 
- #ifdef CONFIG_ENERGY_MODEL
-+#ifndef CONFIG_SCHED_ALT
- extern bool sched_energy_update;
- extern struct mutex sched_energy_mutex;
- 
-@@ -946,4 +958,10 @@ void sched_cpufreq_governor_change(struct cpufreq_policy *policy,
- 	}
- 
- }
-+#else /* CONFIG_SCHED_ALT */
-+void sched_cpufreq_governor_change(struct cpufreq_policy *policy,
-+				  struct cpufreq_governor *old_gov)
-+{
-+}
-+#endif
- #endif
-diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
-index ff9435dee1df..0ee9967d2d74 100644
---- a/kernel/sched/cputime.c
-+++ b/kernel/sched/cputime.c
-@@ -122,7 +122,7 @@ void account_user_time(struct task_struct *p, u64 cputime)
- 	p->utime += cputime;
- 	account_group_user_time(p, cputime);
- 
--	index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
-+	index = task_running_nice(p) ? CPUTIME_NICE : CPUTIME_USER;
- 
- 	/* Add user time to cpustat. */
- 	task_group_account_field(p, index, cputime);
-@@ -146,7 +146,7 @@ void account_guest_time(struct task_struct *p, u64 cputime)
- 	p->gtime += cputime;
- 
- 	/* Add guest time to cpustat. */
--	if (task_nice(p) > 0) {
-+	if (task_running_nice(p)) {
- 		cpustat[CPUTIME_NICE] += cputime;
- 		cpustat[CPUTIME_GUEST_NICE] += cputime;
- 	} else {
-@@ -269,7 +269,7 @@ static inline u64 account_other_time(u64 max)
- #ifdef CONFIG_64BIT
- static inline u64 read_sum_exec_runtime(struct task_struct *t)
- {
--	return t->se.sum_exec_runtime;
-+	return tsk_seruntime(t);
- }
- #else
- static u64 read_sum_exec_runtime(struct task_struct *t)
-@@ -279,7 +279,7 @@ static u64 read_sum_exec_runtime(struct task_struct *t)
- 	struct rq *rq;
- 
- 	rq = task_rq_lock(t, &rf);
--	ns = t->se.sum_exec_runtime;
-+	ns = tsk_seruntime(t);
- 	task_rq_unlock(rq, t, &rf);
- 
- 	return ns;
-@@ -658,7 +658,7 @@ void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev,
- void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
- {
- 	struct task_cputime cputime = {
--		.sum_exec_runtime = p->se.sum_exec_runtime,
-+		.sum_exec_runtime = tsk_seruntime(p),
- 	};
- 
- 	task_cputime(p, &cputime.utime, &cputime.stime);
-diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
-index b743bf38f08f..472478a4f2a8 100644
---- a/kernel/sched/idle.c
-+++ b/kernel/sched/idle.c
-@@ -361,6 +361,7 @@ void cpu_startup_entry(enum cpuhp_state state)
- 		do_idle();
- }
- 
-+#ifndef CONFIG_SCHED_ALT
- /*
-  * idle-task scheduling class.
-  */
-@@ -481,3 +482,4 @@ const struct sched_class idle_sched_class = {
- 	.switched_to		= switched_to_idle,
- 	.update_curr		= update_curr_idle,
- };
-+#endif
-diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c
-index b647d04d9c8b..f1983eb87f13 100644
---- a/kernel/sched/pelt.c
-+++ b/kernel/sched/pelt.c
-@@ -250,6 +250,7 @@ ___update_load_avg(struct sched_avg *sa, unsigned long load)
- 	WRITE_ONCE(sa->util_avg, sa->util_sum / divider);
- }
- 
-+#ifndef CONFIG_SCHED_ALT
- /*
-  * sched_entity:
-  *
-@@ -367,6 +368,7 @@ int update_dl_rq_load_avg(u64 now, struct rq *rq, int running)
- 
- 	return 0;
- }
-+#endif
- 
- #ifdef CONFIG_SCHED_THERMAL_PRESSURE
- /*
-diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h
-index eb034d9f024d..49aa805750c5 100644
---- a/kernel/sched/pelt.h
-+++ b/kernel/sched/pelt.h
-@@ -1,11 +1,13 @@
- #ifdef CONFIG_SMP
- #include "sched-pelt.h"
- 
-+#ifndef CONFIG_SCHED_ALT
- int __update_load_avg_blocked_se(u64 now, struct sched_entity *se);
- int __update_load_avg_se(u64 now, struct cfs_rq *cfs_rq, struct sched_entity *se);
- int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq);
- int update_rt_rq_load_avg(u64 now, struct rq *rq, int running);
- int update_dl_rq_load_avg(u64 now, struct rq *rq, int running);
-+#endif
- 
- #ifdef CONFIG_SCHED_THERMAL_PRESSURE
- int update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity);
-@@ -37,6 +39,7 @@ update_irq_load_avg(struct rq *rq, u64 running)
- }
- #endif
- 
-+#ifndef CONFIG_SCHED_ALT
- /*
-  * When a task is dequeued, its estimated utilization should not be update if
-  * its util_avg has not been updated at least once.
-@@ -157,9 +160,11 @@ static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq)
- 	return rq_clock_pelt(rq_of(cfs_rq));
- }
- #endif
-+#endif /* CONFIG_SCHED_ALT */
- 
- #else
- 
-+#ifndef CONFIG_SCHED_ALT
- static inline int
- update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
- {
-@@ -177,6 +182,7 @@ update_dl_rq_load_avg(u64 now, struct rq *rq, int running)
- {
- 	return 0;
- }
-+#endif
- 
- static inline int
- update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity)
-diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
-index 1f58677a8f23..682e6b3802c1 100644
---- a/kernel/sched/sched.h
-+++ b/kernel/sched/sched.h
-@@ -2,6 +2,10 @@
- /*
-  * Scheduler internal types and methods:
-  */
-+#ifdef CONFIG_SCHED_ALT
-+#include "alt_sched.h"
-+#else
-+
- #include <linux/sched.h>
- 
- #include <linux/sched/autogroup.h>
-@@ -2548,3 +2552,9 @@ static inline bool is_per_cpu_kthread(struct task_struct *p)
- 
- void swake_up_all_locked(struct swait_queue_head *q);
- void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
-+
-+static inline int task_running_nice(struct task_struct *p)
-+{
-+	return (task_nice(p) > 0);
-+}
-+#endif /* !CONFIG_SCHED_ALT */
-diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c
-index 750fb3c67eed..108422ebc7bf 100644
---- a/kernel/sched/stats.c
-+++ b/kernel/sched/stats.c
-@@ -22,8 +22,10 @@ static int show_schedstat(struct seq_file *seq, void *v)
- 	} else {
- 		struct rq *rq;
- #ifdef CONFIG_SMP
-+#ifndef CONFIG_SCHED_ALT
- 		struct sched_domain *sd;
- 		int dcount = 0;
-+#endif
- #endif
- 		cpu = (unsigned long)(v - 2);
- 		rq = cpu_rq(cpu);
-@@ -40,6 +42,7 @@ static int show_schedstat(struct seq_file *seq, void *v)
- 		seq_printf(seq, "\n");
- 
- #ifdef CONFIG_SMP
-+#ifndef CONFIG_SCHED_ALT
- 		/* domain-specific stats */
- 		rcu_read_lock();
- 		for_each_domain(cpu, sd) {
-@@ -68,6 +71,7 @@ static int show_schedstat(struct seq_file *seq, void *v)
- 			    sd->ttwu_move_balance);
- 		}
- 		rcu_read_unlock();
-+#endif
- #endif
- 	}
- 	return 0;
-diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
-index 8344757bba6e..558ce8a70926 100644
---- a/kernel/sched/topology.c
-+++ b/kernel/sched/topology.c
-@@ -4,6 +4,7 @@
-  */
- #include "sched.h"
- 
-+#ifndef CONFIG_SCHED_ALT
- DEFINE_MUTEX(sched_domains_mutex);
- 
- /* Protected by sched_domains_mutex: */
-@@ -1190,8 +1191,10 @@ static void init_sched_groups_capacity(int cpu, struct sched_domain *sd)
-  */
- 
- static int default_relax_domain_level = -1;
-+#endif /* CONFIG_SCHED_ALT */
- int sched_domain_level_max;
- 
-+#ifndef CONFIG_SCHED_ALT
- static int __init setup_relax_domain_level(char *str)
- {
- 	if (kstrtoint(str, 0, &default_relax_domain_level))
-@@ -1424,6 +1427,7 @@ sd_init(struct sched_domain_topology_level *tl,
- 
- 	return sd;
- }
-+#endif /* CONFIG_SCHED_ALT */
- 
- /*
-  * Topology list, bottom-up.
-@@ -1453,6 +1457,7 @@ void set_sched_topology(struct sched_domain_topology_level *tl)
- 	sched_domain_topology = tl;
- }
- 
-+#ifndef CONFIG_SCHED_ALT
- #ifdef CONFIG_NUMA
- 
- static const struct cpumask *sd_numa_mask(int cpu)
-@@ -2327,3 +2332,17 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
- 	partition_sched_domains_locked(ndoms_new, doms_new, dattr_new);
- 	mutex_unlock(&sched_domains_mutex);
- }
-+#else /* CONFIG_SCHED_ALT */
-+void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
-+			     struct sched_domain_attr *dattr_new)
-+{}
-+
-+#ifdef CONFIG_NUMA
-+int __read_mostly		node_reclaim_distance = RECLAIM_DISTANCE;
-+
-+int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
-+{
-+	return best_mask_cpu(cpu, cpus);
-+}
-+#endif /* CONFIG_NUMA */
-+#endif
-diff --git a/kernel/sysctl.c b/kernel/sysctl.c
-index 8a176d8727a3..8e2ba49be0e1 100644
---- a/kernel/sysctl.c
-+++ b/kernel/sysctl.c
-@@ -132,6 +132,10 @@ static unsigned long one_ul = 1;
- static unsigned long long_max = LONG_MAX;
- static int one_hundred = 100;
- static int one_thousand = 1000;
-+#ifdef CONFIG_SCHED_ALT
-+static int __maybe_unused zero = 0;
-+extern int sched_yield_type;
-+#endif
- #ifdef CONFIG_PRINTK
- static int ten_thousand = 10000;
- #endif
-@@ -288,7 +292,7 @@ static struct ctl_table sysctl_base_table[] = {
- 	{ }
- };
- 
--#ifdef CONFIG_SCHED_DEBUG
-+#if defined(CONFIG_SCHED_DEBUG) && !defined(CONFIG_SCHED_ALT)
- static int min_sched_granularity_ns = 100000;		/* 100 usecs */
- static int max_sched_granularity_ns = NSEC_PER_SEC;	/* 1 second */
- static int min_wakeup_granularity_ns;			/* 0 usecs */
-@@ -305,6 +309,7 @@ static int max_extfrag_threshold = 1000;
- #endif
- 
- static struct ctl_table kern_table[] = {
-+#ifndef CONFIG_SCHED_ALT
- 	{
- 		.procname	= "sched_child_runs_first",
- 		.data		= &sysctl_sched_child_runs_first,
-@@ -486,6 +491,7 @@ static struct ctl_table kern_table[] = {
- 		.extra2		= SYSCTL_ONE,
- 	},
- #endif
-+#endif /* !CONFIG_SCHED_ALT */
- #ifdef CONFIG_PROVE_LOCKING
- 	{
- 		.procname	= "prove_locking",
-@@ -1049,6 +1055,17 @@ static struct ctl_table kern_table[] = {
- 		.proc_handler	= proc_dointvec,
- 	},
- #endif
-+#ifdef CONFIG_SCHED_ALT
-+	{
-+		.procname	= "yield_type",
-+		.data		= &sched_yield_type,
-+		.maxlen		= sizeof (int),
-+		.mode		= 0644,
-+		.proc_handler	= &proc_dointvec_minmax,
-+		.extra1		= &zero,
-+		.extra2		= &two,
-+	},
-+#endif
- #if defined(CONFIG_S390) && defined(CONFIG_SMP)
- 	{
- 		.procname	= "spin_retry",
-diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
-index d89da1c7e005..a73adff9f309 100644
---- a/kernel/time/hrtimer.c
-+++ b/kernel/time/hrtimer.c
-@@ -1923,8 +1923,10 @@ long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode,
- 	int ret = 0;
- 	u64 slack;
- 
-+#ifndef CONFIG_SCHED_ALT
- 	slack = current->timer_slack_ns;
- 	if (dl_task(current) || rt_task(current))
-+#endif
- 		slack = 0;
- 
- 	hrtimer_init_sleeper_on_stack(&t, clockid, mode);
-diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
-index 2fd3b3fa68bf..e053bc56c019 100644
---- a/kernel/time/posix-cpu-timers.c
-+++ b/kernel/time/posix-cpu-timers.c
-@@ -236,7 +236,7 @@ static void task_sample_cputime(struct task_struct *p, u64 *samples)
- 	u64 stime, utime;
- 
- 	task_cputime(p, &utime, &stime);
--	store_samples(samples, stime, utime, p->se.sum_exec_runtime);
-+	store_samples(samples, stime, utime, tsk_seruntime(p));
- }
- 
- static void proc_sample_cputime_atomic(struct task_cputime_atomic *at,
-@@ -806,6 +806,7 @@ static void collect_posix_cputimers(struct posix_cputimers *pct, u64 *samples,
- 	}
- }
- 
-+#ifndef CONFIG_SCHED_ALT
- static inline void check_dl_overrun(struct task_struct *tsk)
- {
- 	if (tsk->dl.dl_overrun) {
-@@ -813,6 +814,7 @@ static inline void check_dl_overrun(struct task_struct *tsk)
- 		__group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
- 	}
- }
-+#endif
- 
- static bool check_rlimit(u64 time, u64 limit, int signo, bool rt, bool hard)
- {
-@@ -840,8 +842,10 @@ static void check_thread_timers(struct task_struct *tsk,
- 	u64 samples[CPUCLOCK_MAX];
- 	unsigned long soft;
- 
-+#ifndef CONFIG_SCHED_ALT
- 	if (dl_task(tsk))
- 		check_dl_overrun(tsk);
-+#endif
- 
- 	if (expiry_cache_is_inactive(pct))
- 		return;
-@@ -855,7 +859,7 @@ static void check_thread_timers(struct task_struct *tsk,
- 	soft = task_rlimit(tsk, RLIMIT_RTTIME);
- 	if (soft != RLIM_INFINITY) {
- 		/* Task RT timeout is accounted in jiffies. RTTIME is usec */
--		unsigned long rttime = tsk->rt.timeout * (USEC_PER_SEC / HZ);
-+		unsigned long rttime = tsk_rttimeout(tsk) * (USEC_PER_SEC / HZ);
- 		unsigned long hard = task_rlimit_max(tsk, RLIMIT_RTTIME);
- 
- 		/* At the hard limit, send SIGKILL. No further action. */
-@@ -1091,8 +1095,10 @@ static inline bool fastpath_timer_check(struct task_struct *tsk)
- 			return true;
- 	}
- 
-+#ifndef CONFIG_SCHED_ALT
- 	if (dl_task(tsk) && tsk->dl.dl_overrun)
- 		return true;
-+#endif
- 
- 	return false;
- }
-diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
-index b5e3496cf803..cfbae0a21cef 100644
---- a/kernel/trace/trace_selftest.c
-+++ b/kernel/trace/trace_selftest.c
-@@ -1048,10 +1048,15 @@ static int trace_wakeup_test_thread(void *data)
- {
- 	/* Make this a -deadline thread */
- 	static const struct sched_attr attr = {
-+#ifdef CONFIG_SCHED_ALT
-+		/* No deadline on BMQ, use RR */
-+		.sched_policy = SCHED_RR,
-+#else
- 		.sched_policy = SCHED_DEADLINE,
- 		.sched_runtime = 100000ULL,
- 		.sched_deadline = 10000000ULL,
- 		.sched_period = 10000000ULL
-+#endif
- 	};
- 	struct wakeup_test_data *x = data;
- 
diff --git a/linux57-tkg/linux57-tkg-patches/0011-ZFS-fix.patch b/linux57-tkg/linux57-tkg-patches/0011-ZFS-fix.patch
deleted file mode 100644
index af71d04..0000000
--- a/linux57-tkg/linux57-tkg-patches/0011-ZFS-fix.patch
+++ /dev/null
@@ -1,43 +0,0 @@
-From 1e010beda2896bdf3082fb37a3e49f8ce20e04d8 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?J=C3=B6rg=20Thalheim?= <joerg@thalheim.io>
-Date: Thu, 2 May 2019 05:28:08 +0100
-Subject: [PATCH] x86/fpu: Export kernel_fpu_{begin,end}() with
- EXPORT_SYMBOL_GPL
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-We need these symbols in zfs as the fpu implementation breaks userspace:
-
-https://github.com/zfsonlinux/zfs/issues/9346
-Signed-off-by: Jörg Thalheim <joerg@thalheim.io>
----
- arch/x86/kernel/fpu/core.c | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
-index 12c70840980e..352538b3bb5d 100644
---- a/arch/x86/kernel/fpu/core.c
-+++ b/arch/x86/kernel/fpu/core.c
-@@ -102,7 +102,7 @@ void kernel_fpu_begin(void)
- 	}
- 	__cpu_invalidate_fpregs_state();
- }
--EXPORT_SYMBOL_GPL(kernel_fpu_begin);
-+EXPORT_SYMBOL(kernel_fpu_begin);
- 
- void kernel_fpu_end(void)
- {
-@@ -111,7 +111,7 @@ void kernel_fpu_end(void)
- 	this_cpu_write(in_kernel_fpu, false);
- 	preempt_enable();
- }
--EXPORT_SYMBOL_GPL(kernel_fpu_end);
-+EXPORT_SYMBOL(kernel_fpu_end);
- 
- /*
-  * Save the FPU state (mark it for reload if necessary):
--- 
-2.23.0
-
-
diff --git a/linux57-tkg/linux57-tkg-patches/0012-linux-hardened.patch b/linux57-tkg/linux57-tkg-patches/0012-linux-hardened.patch
deleted file mode 100644
index 6f20939..0000000
--- a/linux57-tkg/linux57-tkg-patches/0012-linux-hardened.patch
+++ /dev/null
@@ -1,2916 +0,0 @@
-diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
-index 5e2ce88d6eda..5cdeccf3459f 100644
---- a/Documentation/admin-guide/kernel-parameters.txt
-+++ b/Documentation/admin-guide/kernel-parameters.txt
-@@ -518,17 +518,6 @@
- 			nosocket -- Disable socket memory accounting.
- 			nokmem -- Disable kernel memory accounting.
- 
--	checkreqprot	[SELINUX] Set initial checkreqprot flag value.
--			Format: { "0" | "1" }
--			See security/selinux/Kconfig help text.
--			0 -- check protection applied by kernel (includes
--				any implied execute protection).
--			1 -- check protection requested by application.
--			Default value is set via a kernel config option.
--			Value can be changed at runtime via
--				/sys/fs/selinux/checkreqprot.
--			Setting checkreqprot to 1 is deprecated.
--
- 	cio_ignore=	[S390]
- 			See Documentation/s390/common_io.rst for details.
- 	clk_ignore_unused
-@@ -3446,6 +3435,11 @@
- 			the specified number of seconds.  This is to be used if
- 			your oopses keep scrolling off the screen.
- 
-+	extra_latent_entropy
-+			Enable a very simple form of latent entropy extraction
-+			from the first 4GB of memory as the bootmem allocator
-+			passes the memory pages to the buddy allocator.
-+
- 	pcbit=		[HW,ISDN]
- 
- 	pcd.		[PARIDE]
-diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
-index 0d427fd10941..e0042d797c38 100644
---- a/Documentation/admin-guide/sysctl/kernel.rst
-+++ b/Documentation/admin-guide/sysctl/kernel.rst
-@@ -1167,6 +1167,26 @@ If a value outside of this range is written to ``threads-max`` an
- ``EINVAL`` error occurs.
- 
- 
-+tiocsti_restrict
-+================
-+
-+This toggle indicates whether unprivileged users are prevented from
-+using the ``TIOCSTI`` ioctl to inject commands into other processes
-+which share a tty session.
-+
-+When ``tiocsti_restrict`` is set to (0) there are no restrictions(accept
-+the default restriction of only being able to injection commands into
-+one's own tty). When ``tiocsti_restrict`` is set to (1), users must have
-+``CAP_SYS_ADMIN`` to use the ``TIOCSTI`` ioctl.
-+
-+When user namespaces are in use, the check for the capability
-+``CAP_SYS_ADMIN`` is done against the user namespace that originally
-+opened the tty.
-+
-+The kernel config option ``CONFIG_SECURITY_TIOCSTI_RESTRICT`` sets the
-+default value of ``tiocsti_restrict``.
-+
-+
- unknown_nmi_panic
- =================
- 
-diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
-index 9375324aa8e1..7bd9b330493c 100644
---- a/Documentation/networking/ip-sysctl.txt
-+++ b/Documentation/networking/ip-sysctl.txt
-@@ -587,6 +587,23 @@ tcp_comp_sack_nr - INTEGER
- 
- 	Default : 44
- 
-+tcp_simult_connect - BOOLEAN
-+	Enable TCP simultaneous connect that adds a weakness in Linux's strict
-+	implementation of TCP that allows two clients to connect to each other
-+	without either entering a listening state. The weakness allows an attacker
-+	to easily prevent a client from connecting to a known server provided the
-+	source port for the connection is guessed correctly.
-+
-+	As the weakness could be used to prevent an antivirus or IPS from fetching
-+	updates, or prevent an SSL gateway from fetching a CRL, it should be
-+	eliminated by disabling this option. Though Linux is one of few operating
-+	systems supporting simultaneous connect, it has no legitimate use in
-+	practice and is rarely supported by firewalls.
-+
-+	Disabling this may break TCP STUNT which is used by some applications for
-+	NAT traversal.
-+	Default: Value of CONFIG_TCP_SIMULT_CONNECT_DEFAULT_ON
-+
- tcp_slow_start_after_idle - BOOLEAN
- 	If set, provide RFC2861 behavior and time out the congestion
- 	window after an idle period.  An idle period is defined at
-diff --git a/arch/Kconfig b/arch/Kconfig
-index 786a85d4ad40..78ae69e78a81 100644
---- a/arch/Kconfig
-+++ b/arch/Kconfig
-@@ -671,7 +671,7 @@ config ARCH_MMAP_RND_BITS
- 	int "Number of bits to use for ASLR of mmap base address" if EXPERT
- 	range ARCH_MMAP_RND_BITS_MIN ARCH_MMAP_RND_BITS_MAX
- 	default ARCH_MMAP_RND_BITS_DEFAULT if ARCH_MMAP_RND_BITS_DEFAULT
--	default ARCH_MMAP_RND_BITS_MIN
-+	default ARCH_MMAP_RND_BITS_MAX
- 	depends on HAVE_ARCH_MMAP_RND_BITS
- 	help
- 	  This value can be used to select the number of bits to use to
-@@ -705,7 +705,7 @@ config ARCH_MMAP_RND_COMPAT_BITS
- 	int "Number of bits to use for ASLR of mmap base address for compatible applications" if EXPERT
- 	range ARCH_MMAP_RND_COMPAT_BITS_MIN ARCH_MMAP_RND_COMPAT_BITS_MAX
- 	default ARCH_MMAP_RND_COMPAT_BITS_DEFAULT if ARCH_MMAP_RND_COMPAT_BITS_DEFAULT
--	default ARCH_MMAP_RND_COMPAT_BITS_MIN
-+	default ARCH_MMAP_RND_COMPAT_BITS_MAX
- 	depends on HAVE_ARCH_MMAP_RND_COMPAT_BITS
- 	help
- 	  This value can be used to select the number of bits to use to
-diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
-index 5d513f461957..39abe5fd57fb 100644
---- a/arch/arm64/Kconfig
-+++ b/arch/arm64/Kconfig
-@@ -1216,6 +1216,7 @@ config RODATA_FULL_DEFAULT_ENABLED
- 
- config ARM64_SW_TTBR0_PAN
- 	bool "Emulate Privileged Access Never using TTBR0_EL1 switching"
-+	default y
- 	help
- 	  Enabling this option prevents the kernel from accessing
- 	  user-space memory directly by pointing TTBR0_EL1 to a reserved
-@@ -1706,6 +1707,7 @@ config RANDOMIZE_BASE
- 	bool "Randomize the address of the kernel image"
- 	select ARM64_MODULE_PLTS if MODULES
- 	select RELOCATABLE
-+	default y
- 	help
- 	  Randomizes the virtual address at which the kernel image is
- 	  loaded, as a security feature that deters exploit attempts
-diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug
-index a1efa246c9ed..ccacb3619b59 100644
---- a/arch/arm64/Kconfig.debug
-+++ b/arch/arm64/Kconfig.debug
-@@ -26,6 +26,7 @@ config ARM64_RANDOMIZE_TEXT_OFFSET
- config DEBUG_WX
- 	bool "Warn on W+X mappings at boot"
- 	select PTDUMP_CORE
-+	default y
- 	---help---
- 	  Generate a warning if any W+X mappings are found at boot.
- 
-diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
-index 03d0189f7d68..d7c642f8f063 100644
---- a/arch/arm64/configs/defconfig
-+++ b/arch/arm64/configs/defconfig
-@@ -1,4 +1,3 @@
--CONFIG_SYSVIPC=y
- CONFIG_POSIX_MQUEUE=y
- CONFIG_AUDIT=y
- CONFIG_NO_HZ_IDLE=y
-diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
-index b618017205a3..0a228dbcad65 100644
---- a/arch/arm64/include/asm/elf.h
-+++ b/arch/arm64/include/asm/elf.h
-@@ -103,14 +103,10 @@
- 
- /*
-  * This is the base location for PIE (ET_DYN with INTERP) loads. On
-- * 64-bit, this is above 4GB to leave the entire 32-bit address
-+ * 64-bit, this is raised to 4GB to leave the entire 32-bit address
-  * space open for things that want to use the area for 32-bit pointers.
-  */
--#ifdef CONFIG_ARM64_FORCE_52BIT
--#define ELF_ET_DYN_BASE		(2 * TASK_SIZE_64 / 3)
--#else
--#define ELF_ET_DYN_BASE		(2 * DEFAULT_MAP_WINDOW_64 / 3)
--#endif /* CONFIG_ARM64_FORCE_52BIT */
-+#define ELF_ET_DYN_BASE		0x100000000UL
- 
- #ifndef __ASSEMBLY__
- 
-@@ -164,10 +160,10 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm,
- /* 1GB of VA */
- #ifdef CONFIG_COMPAT
- #define STACK_RND_MASK			(test_thread_flag(TIF_32BIT) ? \
--						0x7ff >> (PAGE_SHIFT - 12) : \
--						0x3ffff >> (PAGE_SHIFT - 12))
-+						((1UL << mmap_rnd_compat_bits) - 1) >> (PAGE_SHIFT - 12) : \
-+						((1UL << mmap_rnd_bits) - 1) >> (PAGE_SHIFT - 12))
- #else
--#define STACK_RND_MASK			(0x3ffff >> (PAGE_SHIFT - 12))
-+#define STACK_RND_MASK			(((1UL << mmap_rnd_bits) - 1) >> (PAGE_SHIFT - 12))
- #endif
- 
- #ifdef __AARCH64EB__
-diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
-index 2d3f963fd6f1..7b5923dd44e1 100644
---- a/arch/x86/Kconfig
-+++ b/arch/x86/Kconfig
-@@ -1191,8 +1191,7 @@ config VM86
- 	default X86_LEGACY_VM86
- 
- config X86_16BIT
--	bool "Enable support for 16-bit segments" if EXPERT
--	default y
-+	bool "Enable support for 16-bit segments"
- 	depends on MODIFY_LDT_SYSCALL
- 	---help---
- 	  This option is required by programs like Wine to run 16-bit
-@@ -2329,7 +2328,7 @@ config COMPAT_VDSO
- choice
- 	prompt "vsyscall table for legacy applications"
- 	depends on X86_64
--	default LEGACY_VSYSCALL_XONLY
-+	default LEGACY_VSYSCALL_NONE
- 	help
- 	  Legacy user code that does not know how to find the vDSO expects
- 	  to be able to issue three syscalls by calling fixed addresses in
-@@ -2425,8 +2424,7 @@ config CMDLINE_OVERRIDE
- 	  be set to 'N' under normal conditions.
- 
- config MODIFY_LDT_SYSCALL
--	bool "Enable the LDT (local descriptor table)" if EXPERT
--	default y
-+	bool "Enable the LDT (local descriptor table)"
- 	---help---
- 	  Linux can allow user programs to install a per-process x86
- 	  Local Descriptor Table (LDT) using the modify_ldt(2) system
-diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
-index 2e74690b028a..87c7294dd172 100644
---- a/arch/x86/Kconfig.debug
-+++ b/arch/x86/Kconfig.debug
-@@ -75,6 +75,7 @@ config EFI_PGT_DUMP
- config DEBUG_WX
- 	bool "Warn on W+X mappings at boot"
- 	select PTDUMP_CORE
-+	default y
- 	---help---
- 	  Generate a warning if any W+X mappings are found at boot.
- 
-diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
-index 614961009075..06c473ba6b1a 100644
---- a/arch/x86/configs/x86_64_defconfig
-+++ b/arch/x86/configs/x86_64_defconfig
-@@ -1,5 +1,4 @@
- # CONFIG_LOCALVERSION_AUTO is not set
--CONFIG_SYSVIPC=y
- CONFIG_POSIX_MQUEUE=y
- CONFIG_BSD_PROCESS_ACCT=y
- CONFIG_TASKSTATS=y
-diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
-index 43428cc514c8..1b01bf6a6fe7 100644
---- a/arch/x86/entry/vdso/vma.c
-+++ b/arch/x86/entry/vdso/vma.c
-@@ -316,55 +316,9 @@ static int map_vdso(const struct vdso_image *image, unsigned long addr)
- }
- 
- #ifdef CONFIG_X86_64
--/*
-- * Put the vdso above the (randomized) stack with another randomized
-- * offset.  This way there is no hole in the middle of address space.
-- * To save memory make sure it is still in the same PTE as the stack
-- * top.  This doesn't give that many random bits.
-- *
-- * Note that this algorithm is imperfect: the distribution of the vdso
-- * start address within a PMD is biased toward the end.
-- *
-- * Only used for the 64-bit and x32 vdsos.
-- */
--static unsigned long vdso_addr(unsigned long start, unsigned len)
--{
--	unsigned long addr, end;
--	unsigned offset;
--
--	/*
--	 * Round up the start address.  It can start out unaligned as a result
--	 * of stack start randomization.
--	 */
--	start = PAGE_ALIGN(start);
--
--	/* Round the lowest possible end address up to a PMD boundary. */
--	end = (start + len + PMD_SIZE - 1) & PMD_MASK;
--	if (end >= TASK_SIZE_MAX)
--		end = TASK_SIZE_MAX;
--	end -= len;
--
--	if (end > start) {
--		offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1);
--		addr = start + (offset << PAGE_SHIFT);
--	} else {
--		addr = start;
--	}
--
--	/*
--	 * Forcibly align the final address in case we have a hardware
--	 * issue that requires alignment for performance reasons.
--	 */
--	addr = align_vdso_addr(addr);
--
--	return addr;
--}
--
- static int map_vdso_randomized(const struct vdso_image *image)
- {
--	unsigned long addr = vdso_addr(current->mm->start_stack, image->size-image->sym_vvar_start);
--
--	return map_vdso(image, addr);
-+	return map_vdso(image, 0);
- }
- #endif
- 
-diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
-index 69c0f892e310..f9f7a85bb71e 100644
---- a/arch/x86/include/asm/elf.h
-+++ b/arch/x86/include/asm/elf.h
-@@ -248,11 +248,11 @@ extern int force_personality32;
- 
- /*
-  * This is the base location for PIE (ET_DYN with INTERP) loads. On
-- * 64-bit, this is above 4GB to leave the entire 32-bit address
-+ * 64-bit, this is raised to 4GB to leave the entire 32-bit address
-  * space open for things that want to use the area for 32-bit pointers.
-  */
- #define ELF_ET_DYN_BASE		(mmap_is_ia32() ? 0x000400000UL : \
--						  (DEFAULT_MAP_WINDOW / 3 * 2))
-+						  0x100000000UL)
- 
- /* This yields a mask that user programs can use to figure out what
-    instruction set this CPU supports.  This could be done in user space,
-@@ -312,8 +312,8 @@ extern bool mmap_address_hint_valid(unsigned long addr, unsigned long len);
- 
- #ifdef CONFIG_X86_32
- 
--#define __STACK_RND_MASK(is32bit) (0x7ff)
--#define STACK_RND_MASK (0x7ff)
-+#define __STACK_RND_MASK(is32bit) ((1UL << mmap_rnd_bits) - 1)
-+#define STACK_RND_MASK ((1UL << mmap_rnd_bits) - 1)
- 
- #define ARCH_DLINFO		ARCH_DLINFO_IA32
- 
-@@ -322,7 +322,11 @@ extern bool mmap_address_hint_valid(unsigned long addr, unsigned long len);
- #else /* CONFIG_X86_32 */
- 
- /* 1GB for 64bit, 8MB for 32bit */
--#define __STACK_RND_MASK(is32bit) ((is32bit) ? 0x7ff : 0x3fffff)
-+#ifdef CONFIG_COMPAT
-+#define __STACK_RND_MASK(is32bit) ((is32bit) ? (1UL << mmap_rnd_compat_bits) - 1 : (1UL << mmap_rnd_bits) - 1)
-+#else
-+#define __STACK_RND_MASK(is32bit) ((1UL << mmap_rnd_bits) - 1)
-+#endif
- #define STACK_RND_MASK __STACK_RND_MASK(mmap_is_ia32())
- 
- #define ARCH_DLINFO							\
-@@ -380,5 +384,4 @@ struct va_alignment {
- } ____cacheline_aligned;
- 
- extern struct va_alignment va_align;
--extern unsigned long align_vdso_addr(unsigned long);
- #endif /* _ASM_X86_ELF_H */
-diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
-index 6f66d841262d..b786e7cb395d 100644
---- a/arch/x86/include/asm/tlbflush.h
-+++ b/arch/x86/include/asm/tlbflush.h
-@@ -295,6 +295,7 @@ static inline void cr4_set_bits_irqsoff(unsigned long mask)
- 	unsigned long cr4;
- 
- 	cr4 = this_cpu_read(cpu_tlbstate.cr4);
-+	BUG_ON(cr4 != __read_cr4());
- 	if ((cr4 | mask) != cr4)
- 		__cr4_set(cr4 | mask);
- }
-@@ -305,6 +306,7 @@ static inline void cr4_clear_bits_irqsoff(unsigned long mask)
- 	unsigned long cr4;
- 
- 	cr4 = this_cpu_read(cpu_tlbstate.cr4);
-+	BUG_ON(cr4 != __read_cr4());
- 	if ((cr4 & ~mask) != cr4)
- 		__cr4_set(cr4 & ~mask);
- }
-@@ -334,6 +336,7 @@ static inline void cr4_toggle_bits_irqsoff(unsigned long mask)
- 	unsigned long cr4;
- 
- 	cr4 = this_cpu_read(cpu_tlbstate.cr4);
-+        BUG_ON(cr4 != __read_cr4());
- 	__cr4_set(cr4 ^ mask);
- }
- 
-@@ -440,6 +443,7 @@ static inline void __native_flush_tlb_global(void)
- 	raw_local_irq_save(flags);
- 
- 	cr4 = this_cpu_read(cpu_tlbstate.cr4);
-+	BUG_ON(cr4 != __read_cr4());
- 	/* toggle PGE */
- 	native_write_cr4(cr4 ^ X86_CR4_PGE);
- 	/* write old PGE again and flush TLBs */
-diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
-index 8f4533c1a4ec..632ef7ef4615 100644
---- a/arch/x86/kernel/process.c
-+++ b/arch/x86/kernel/process.c
-@@ -42,6 +42,8 @@
- #include <asm/spec-ctrl.h>
- #include <asm/io_bitmap.h>
- #include <asm/proto.h>
-+#include <asm/elf.h>
-+#include <linux/sizes.h>
- 
- #include "process.h"
- 
-@@ -907,7 +909,10 @@ unsigned long arch_align_stack(unsigned long sp)
- 
- unsigned long arch_randomize_brk(struct mm_struct *mm)
- {
--	return randomize_page(mm->brk, 0x02000000);
-+	if (mmap_is_ia32())
-+		return mm->brk + get_random_long() % SZ_32M + PAGE_SIZE;
-+	else
-+		return mm->brk + get_random_long() % SZ_1G + PAGE_SIZE;
- }
- 
- /*
-diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
-index 504fa5425bce..e30ec4c750d1 100644
---- a/arch/x86/kernel/sys_x86_64.c
-+++ b/arch/x86/kernel/sys_x86_64.c
-@@ -52,13 +52,6 @@ static unsigned long get_align_bits(void)
- 	return va_align.bits & get_align_mask();
- }
- 
--unsigned long align_vdso_addr(unsigned long addr)
--{
--	unsigned long align_mask = get_align_mask();
--	addr = (addr + align_mask) & ~align_mask;
--	return addr | get_align_bits();
--}
--
- static int __init control_va_addr_alignment(char *str)
- {
- 	/* guard against enabling this on other CPU families */
-@@ -120,10 +113,7 @@ static void find_start_end(unsigned long addr, unsigned long flags,
- 	}
- 
- 	*begin	= get_mmap_base(1);
--	if (in_32bit_syscall())
--		*end = task_size_32bit();
--	else
--		*end = task_size_64bit(addr > DEFAULT_MAP_WINDOW);
-+	*end	= get_mmap_base(0);
- }
- 
- unsigned long
-@@ -200,7 +190,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
- 
- 	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
- 	info.length = len;
--	info.low_limit = PAGE_SIZE;
-+	info.low_limit = get_mmap_base(1);
- 	info.high_limit = get_mmap_base(0);
- 
- 	/*
-diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
-index 4222a010057a..2c0c6b47b75b 100644
---- a/arch/x86/mm/init_32.c
-+++ b/arch/x86/mm/init_32.c
-@@ -566,9 +566,9 @@ static void __init pagetable_init(void)
- 
- #define DEFAULT_PTE_MASK ~(_PAGE_NX | _PAGE_GLOBAL)
- /* Bits supported by the hardware: */
--pteval_t __supported_pte_mask __read_mostly = DEFAULT_PTE_MASK;
-+pteval_t __supported_pte_mask __ro_after_init = DEFAULT_PTE_MASK;
- /* Bits allowed in normal kernel mappings: */
--pteval_t __default_kernel_pte_mask __read_mostly = DEFAULT_PTE_MASK;
-+pteval_t __default_kernel_pte_mask __ro_after_init = DEFAULT_PTE_MASK;
- EXPORT_SYMBOL_GPL(__supported_pte_mask);
- /* Used in PAGE_KERNEL_* macros which are reasonably used out-of-tree: */
- EXPORT_SYMBOL(__default_kernel_pte_mask);
-diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
-index 8b5f73f5e207..83f76a72f684 100644
---- a/arch/x86/mm/init_64.c
-+++ b/arch/x86/mm/init_64.c
-@@ -98,9 +98,9 @@ DEFINE_ENTRY(pte, pte, init)
-  */
- 
- /* Bits supported by the hardware: */
--pteval_t __supported_pte_mask __read_mostly = ~0;
-+pteval_t __supported_pte_mask __ro_after_init = ~0;
- /* Bits allowed in normal kernel mappings: */
--pteval_t __default_kernel_pte_mask __read_mostly = ~0;
-+pteval_t __default_kernel_pte_mask __ro_after_init = ~0;
- EXPORT_SYMBOL_GPL(__supported_pte_mask);
- /* Used in PAGE_KERNEL_* macros which are reasonably used out-of-tree: */
- EXPORT_SYMBOL(__default_kernel_pte_mask);
-diff --git a/block/blk-softirq.c b/block/blk-softirq.c
-index 6e7ec87d49fa..d6ee3f8b3e74 100644
---- a/block/blk-softirq.c
-+++ b/block/blk-softirq.c
-@@ -20,7 +20,7 @@ static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
-  * Softirq action handler - move entries to local list and loop over them
-  * while passing them to the queue registered handler.
-  */
--static __latent_entropy void blk_done_softirq(struct softirq_action *h)
-+static __latent_entropy void blk_done_softirq(void)
- {
- 	struct list_head *cpu_list, local_list;
- 
-diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
-index e74c8fe2a5fd..ec43f04b1687 100644
---- a/drivers/ata/libata-core.c
-+++ b/drivers/ata/libata-core.c
-@@ -4541,7 +4541,7 @@ void ata_qc_free(struct ata_queued_cmd *qc)
- 	struct ata_port *ap;
- 	unsigned int tag;
- 
--	WARN_ON_ONCE(qc == NULL); /* ata_qc_from_tag _might_ return NULL */
-+	BUG_ON(qc == NULL); /* ata_qc_from_tag _might_ return NULL */
- 	ap = qc->ap;
- 
- 	qc->flags = 0;
-@@ -4558,7 +4558,7 @@ void __ata_qc_complete(struct ata_queued_cmd *qc)
- 	struct ata_port *ap;
- 	struct ata_link *link;
- 
--	WARN_ON_ONCE(qc == NULL); /* ata_qc_from_tag _might_ return NULL */
-+	BUG_ON(qc == NULL); /* ata_qc_from_tag _might_ return NULL */
- 	WARN_ON_ONCE(!(qc->flags & ATA_QCFLAG_ACTIVE));
- 	ap = qc->ap;
- 	link = qc->dev->link;
-diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
-index d4665fe9ccd2..315576465ca0 100644
---- a/drivers/char/Kconfig
-+++ b/drivers/char/Kconfig
-@@ -326,7 +326,6 @@ config NSC_GPIO
- 
- config DEVMEM
- 	bool "/dev/mem virtual device support"
--	default y
- 	help
- 	  Say Y here if you want to support the /dev/mem device.
- 	  The /dev/mem device is used to access areas of physical
-@@ -390,7 +389,6 @@ config MAX_RAW_DEVS
- config DEVPORT
- 	bool "/dev/port character device"
- 	depends on ISA || PCI
--	default y
- 	help
- 	  Say Y here if you want to support the /dev/port device. The /dev/port
- 	  device is similar to /dev/mem, but for I/O ports.
-diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig
-index 2dff93d7a501..f1da13f791cd 100644
---- a/drivers/tty/Kconfig
-+++ b/drivers/tty/Kconfig
-@@ -122,7 +122,6 @@ config UNIX98_PTYS
- 
- config LEGACY_PTYS
- 	bool "Legacy (BSD) PTY support"
--	default y
- 	---help---
- 	  A pseudo terminal (PTY) is a software device consisting of two
- 	  halves: a master and a slave. The slave device behaves identical to
-diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
-index 5a6f36b391d9..616d82a19160 100644
---- a/drivers/tty/tty_io.c
-+++ b/drivers/tty/tty_io.c
-@@ -174,6 +174,7 @@ static void free_tty_struct(struct tty_struct *tty)
- 	put_device(tty->dev);
- 	kfree(tty->write_buf);
- 	tty->magic = 0xDEADDEAD;
-+	put_user_ns(tty->owner_user_ns);
- 	kfree(tty);
- }
- 
-@@ -2179,11 +2180,19 @@ static int tty_fasync(int fd, struct file *filp, int on)
-  *	FIXME: may race normal receive processing
-  */
- 
-+int tiocsti_restrict = IS_ENABLED(CONFIG_SECURITY_TIOCSTI_RESTRICT);
-+
- static int tiocsti(struct tty_struct *tty, char __user *p)
- {
- 	char ch, mbz = 0;
- 	struct tty_ldisc *ld;
- 
-+	if (tiocsti_restrict &&
-+		!ns_capable(tty->owner_user_ns, CAP_SYS_ADMIN)) {
-+		dev_warn_ratelimited(tty->dev,
-+			"Denied TIOCSTI ioctl for non-privileged process\n");
-+		return -EPERM;
-+	}
- 	if ((current->signal->tty != tty) && !capable(CAP_SYS_ADMIN))
- 		return -EPERM;
- 	if (get_user(ch, p))
-@@ -3009,6 +3018,7 @@ struct tty_struct *alloc_tty_struct(struct tty_driver *driver, int idx)
- 	tty->index = idx;
- 	tty_line_name(driver, idx, tty->name);
- 	tty->dev = tty_get_device(tty);
-+	tty->owner_user_ns = get_user_ns(current_user_ns());
- 
- 	return tty;
- }
-diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
-index fc748c731832..f745c9ee5885 100644
---- a/drivers/usb/core/hub.c
-+++ b/drivers/usb/core/hub.c
-@@ -46,6 +46,8 @@
- #define USB_TP_TRANSMISSION_DELAY	40	/* ns */
- #define USB_TP_TRANSMISSION_DELAY_MAX	65535	/* ns */
- 
-+extern int deny_new_usb;
-+
- /* Protect struct usb_device->state and ->children members
-  * Note: Both are also protected by ->dev.sem, except that ->state can
-  * change to USB_STATE_NOTATTACHED even when the semaphore isn't held. */
-@@ -5100,6 +5102,12 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus,
- 			goto done;
- 		return;
- 	}
-+
-+	if (deny_new_usb) {
-+		dev_err(&port_dev->dev, "denied insert of USB device on port %d\n", port1);
-+		goto done;
-+	}
-+
- 	if (hub_is_superspeed(hub->hdev))
- 		unit_load = 150;
- 	else
-diff --git a/fs/exec.c b/fs/exec.c
-index 2c465119affc..bf220ff8c019 100644
---- a/fs/exec.c
-+++ b/fs/exec.c
-@@ -62,6 +62,7 @@
- #include <linux/oom.h>
- #include <linux/compat.h>
- #include <linux/vmalloc.h>
-+#include <linux/random.h>
- 
- #include <linux/uaccess.h>
- #include <asm/mmu_context.h>
-@@ -274,6 +275,8 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
- 	mm->stack_vm = mm->total_vm = 1;
- 	up_write(&mm->mmap_sem);
- 	bprm->p = vma->vm_end - sizeof(void *);
-+	if (randomize_va_space)
-+		bprm->p ^= get_random_int() & ~PAGE_MASK;
- 	return 0;
- err:
- 	up_write(&mm->mmap_sem);
-diff --git a/fs/namei.c b/fs/namei.c
-index a320371899cf..6cc595eed647 100644
---- a/fs/namei.c
-+++ b/fs/namei.c
-@@ -918,10 +918,10 @@ static inline void put_link(struct nameidata *nd)
- 		path_put(&last->link);
- }
- 
--int sysctl_protected_symlinks __read_mostly = 0;
--int sysctl_protected_hardlinks __read_mostly = 0;
--int sysctl_protected_fifos __read_mostly;
--int sysctl_protected_regular __read_mostly;
-+int sysctl_protected_symlinks __read_mostly = 1;
-+int sysctl_protected_hardlinks __read_mostly = 1;
-+int sysctl_protected_fifos __read_mostly = 2;
-+int sysctl_protected_regular __read_mostly = 2;
- 
- /**
-  * may_follow_link - Check symlink following for unsafe situations
-diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
-index 88e1763e02f3..71820a515c91 100644
---- a/fs/nfs/Kconfig
-+++ b/fs/nfs/Kconfig
-@@ -195,7 +195,6 @@ config NFS_DEBUG
- 	bool
- 	depends on NFS_FS && SUNRPC_DEBUG
- 	select CRC32
--	default y
- 
- config NFS_DISABLE_UDP_SUPPORT
-        bool "NFS: Disable NFS UDP protocol support"
-diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
-index 27ef84d99f59..fb27f99a5e66 100644
---- a/fs/proc/Kconfig
-+++ b/fs/proc/Kconfig
-@@ -41,7 +41,6 @@ config PROC_KCORE
- config PROC_VMCORE
- 	bool "/proc/vmcore support"
- 	depends on PROC_FS && CRASH_DUMP
--	default y
- 	help
- 	  Exports the dump image of crashed kernel in ELF format.
- 
-diff --git a/fs/stat.c b/fs/stat.c
-index 030008796479..b1c2c0d5b874 100644
---- a/fs/stat.c
-+++ b/fs/stat.c
-@@ -42,8 +42,13 @@ void generic_fillattr(struct inode *inode, struct kstat *stat)
- 	stat->gid = inode->i_gid;
- 	stat->rdev = inode->i_rdev;
- 	stat->size = i_size_read(inode);
--	stat->atime = inode->i_atime;
--	stat->mtime = inode->i_mtime;
-+	if (is_sidechannel_device(inode) && !capable_noaudit(CAP_MKNOD)) {
-+		stat->atime = inode->i_ctime;
-+		stat->mtime = inode->i_ctime;
-+	} else {
-+		stat->atime = inode->i_atime;
-+		stat->mtime = inode->i_mtime;
-+	}
- 	stat->ctime = inode->i_ctime;
- 	stat->blksize = i_blocksize(inode);
- 	stat->blocks = inode->i_blocks;
-@@ -79,9 +84,14 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat,
- 	if (IS_AUTOMOUNT(inode))
- 		stat->attributes |= STATX_ATTR_AUTOMOUNT;
- 
--	if (inode->i_op->getattr)
--		return inode->i_op->getattr(path, stat, request_mask,
--					    query_flags);
-+	if (inode->i_op->getattr) {
-+		int retval = inode->i_op->getattr(path, stat, request_mask, query_flags);
-+		if (!retval && is_sidechannel_device(inode) && !capable_noaudit(CAP_MKNOD)) {
-+			stat->atime = stat->ctime;
-+			stat->mtime = stat->ctime;
-+		}
-+		return retval;
-+	}
- 
- 	generic_fillattr(inode, stat);
- 	return 0;
-diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
-index e39fdec8a0b0..08610405fdae 100644
---- a/fs/userfaultfd.c
-+++ b/fs/userfaultfd.c
-@@ -28,7 +28,11 @@
- #include <linux/security.h>
- #include <linux/hugetlb.h>
- 
-+#ifdef CONFIG_USERFAULTFD_UNPRIVILEGED
- int sysctl_unprivileged_userfaultfd __read_mostly = 1;
-+#else
-+int sysctl_unprivileged_userfaultfd __read_mostly;
-+#endif
- 
- static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly;
- 
-diff --git a/include/linux/cache.h b/include/linux/cache.h
-index 750621e41d1c..e7157c18c62c 100644
---- a/include/linux/cache.h
-+++ b/include/linux/cache.h
-@@ -31,6 +31,8 @@
- #define __ro_after_init __attribute__((__section__(".data..ro_after_init")))
- #endif
- 
-+#define __read_only __ro_after_init
-+
- #ifndef ____cacheline_aligned
- #define ____cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES)))
- #endif
-diff --git a/include/linux/capability.h b/include/linux/capability.h
-index ecce0f43c73a..e46306dd4401 100644
---- a/include/linux/capability.h
-+++ b/include/linux/capability.h
-@@ -208,6 +208,7 @@ extern bool has_capability_noaudit(struct task_struct *t, int cap);
- extern bool has_ns_capability_noaudit(struct task_struct *t,
- 				      struct user_namespace *ns, int cap);
- extern bool capable(int cap);
-+extern bool capable_noaudit(int cap);
- extern bool ns_capable(struct user_namespace *ns, int cap);
- extern bool ns_capable_noaudit(struct user_namespace *ns, int cap);
- extern bool ns_capable_setid(struct user_namespace *ns, int cap);
-@@ -234,6 +235,10 @@ static inline bool capable(int cap)
- {
- 	return true;
- }
-+static inline bool capable_noaudit(int cap)
-+{
-+	return true;
-+}
- static inline bool ns_capable(struct user_namespace *ns, int cap)
- {
- 	return true;
-diff --git a/include/linux/fs.h b/include/linux/fs.h
-index 45cc10cdf6dd..162d589f120a 100644
---- a/include/linux/fs.h
-+++ b/include/linux/fs.h
-@@ -3659,4 +3659,15 @@ static inline int inode_drain_writes(struct inode *inode)
- 	return filemap_write_and_wait(inode->i_mapping);
- }
- 
-+extern int device_sidechannel_restrict;
-+
-+static inline bool is_sidechannel_device(const struct inode *inode)
-+{
-+	umode_t mode;
-+	if (!device_sidechannel_restrict)
-+		return false;
-+	mode = inode->i_mode;
-+	return ((S_ISCHR(mode) || S_ISBLK(mode)) && (mode & (S_IROTH | S_IWOTH)));
-+}
-+
- #endif /* _LINUX_FS_H */
-diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
-index 5ab28f6c7d26..6333478e581c 100644
---- a/include/linux/fsnotify.h
-+++ b/include/linux/fsnotify.h
-@@ -65,6 +65,9 @@ static inline int fsnotify_file(struct file *file, __u32 mask)
- 	struct inode *inode = file_inode(file);
- 	int ret;
- 
-+	if (mask & (FS_ACCESS | FS_MODIFY) && is_sidechannel_device(inode))
-+		return 0;
-+
- 	if (file->f_mode & FMODE_NONOTIFY)
- 		return 0;
- 
-diff --git a/include/linux/gfp.h b/include/linux/gfp.h
-index 4aba4c86c626..7d2bd45f35ed 100644
---- a/include/linux/gfp.h
-+++ b/include/linux/gfp.h
-@@ -561,9 +561,9 @@ extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
- extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
- extern unsigned long get_zeroed_page(gfp_t gfp_mask);
- 
--void *alloc_pages_exact(size_t size, gfp_t gfp_mask);
-+void *alloc_pages_exact(size_t size, gfp_t gfp_mask) __attribute__((alloc_size(1)));
- void free_pages_exact(void *virt, size_t size);
--void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask);
-+void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) __attribute__((alloc_size(2)));
- 
- #define __get_free_page(gfp_mask) \
- 		__get_free_pages((gfp_mask), 0)
-diff --git a/include/linux/highmem.h b/include/linux/highmem.h
-index ea5cdbd8c2c3..805b84d6bbca 100644
---- a/include/linux/highmem.h
-+++ b/include/linux/highmem.h
-@@ -215,6 +215,13 @@ static inline void clear_highpage(struct page *page)
- 	kunmap_atomic(kaddr);
- }
- 
-+static inline void verify_zero_highpage(struct page *page)
-+{
-+	void *kaddr = kmap_atomic(page);
-+	BUG_ON(memchr_inv(kaddr, 0, PAGE_SIZE));
-+	kunmap_atomic(kaddr);
-+}
-+
- static inline void zero_user_segments(struct page *page,
- 	unsigned start1, unsigned end1,
- 	unsigned start2, unsigned end2)
-diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
-index 80f637c3a6f3..0188c5fa11cb 100644
---- a/include/linux/interrupt.h
-+++ b/include/linux/interrupt.h
-@@ -554,7 +554,7 @@ extern const char * const softirq_to_name[NR_SOFTIRQS];
- 
- struct softirq_action
- {
--	void	(*action)(struct softirq_action *);
-+	void	(*action)(void);
- };
- 
- asmlinkage void do_softirq(void);
-@@ -569,7 +569,7 @@ static inline void do_softirq_own_stack(void)
- }
- #endif
- 
--extern void open_softirq(int nr, void (*action)(struct softirq_action *));
-+extern void __init open_softirq(int nr, void (*action)(void));
- extern void softirq_init(void);
- extern void __raise_softirq_irqoff(unsigned int nr);
- 
-diff --git a/include/linux/kobject_ns.h b/include/linux/kobject_ns.h
-index 069aa2ebef90..cb9e3637a620 100644
---- a/include/linux/kobject_ns.h
-+++ b/include/linux/kobject_ns.h
-@@ -45,7 +45,7 @@ struct kobj_ns_type_operations {
- 	void (*drop_ns)(void *);
- };
- 
--int kobj_ns_type_register(const struct kobj_ns_type_operations *ops);
-+int __init kobj_ns_type_register(const struct kobj_ns_type_operations *ops);
- int kobj_ns_type_registered(enum kobj_ns_type type);
- const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent);
- const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj);
-diff --git a/include/linux/mm.h b/include/linux/mm.h
-index 465e8ad671f8..57f78e2fcdac 100644
---- a/include/linux/mm.h
-+++ b/include/linux/mm.h
-@@ -751,7 +751,7 @@ static inline int is_vmalloc_or_module_addr(const void *x)
- }
- #endif
- 
--extern void *kvmalloc_node(size_t size, gfp_t flags, int node);
-+extern void *kvmalloc_node(size_t size, gfp_t flags, int node) __attribute__((alloc_size(1)));
- static inline void *kvmalloc(size_t size, gfp_t flags)
- {
- 	return kvmalloc_node(size, flags, NUMA_NO_NODE);
-diff --git a/include/linux/percpu.h b/include/linux/percpu.h
-index 5e76af742c80..9a6c682ec127 100644
---- a/include/linux/percpu.h
-+++ b/include/linux/percpu.h
-@@ -123,7 +123,7 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size,
- 				pcpu_fc_populate_pte_fn_t populate_pte_fn);
- #endif
- 
--extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align);
-+extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align) __attribute__((alloc_size(1)));
- extern bool __is_kernel_percpu_address(unsigned long addr, unsigned long *can_addr);
- extern bool is_kernel_percpu_address(unsigned long addr);
- 
-@@ -131,8 +131,8 @@ extern bool is_kernel_percpu_address(unsigned long addr);
- extern void __init setup_per_cpu_areas(void);
- #endif
- 
--extern void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp);
--extern void __percpu *__alloc_percpu(size_t size, size_t align);
-+extern void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp) __attribute__((alloc_size(1)));
-+extern void __percpu *__alloc_percpu(size_t size, size_t align) __attribute__((alloc_size(1)));
- extern void free_percpu(void __percpu *__pdata);
- extern phys_addr_t per_cpu_ptr_to_phys(void *addr);
- 
-diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
-index 9c3e7619c929..2976a90b927c 100644
---- a/include/linux/perf_event.h
-+++ b/include/linux/perf_event.h
-@@ -1303,6 +1303,14 @@ static inline int perf_is_paranoid(void)
- 	return sysctl_perf_event_paranoid > -1;
- }
- 
-+static inline int perf_allow_open(struct perf_event_attr *attr)
-+{
-+	if (sysctl_perf_event_paranoid > 2 && !capable(CAP_SYS_ADMIN))
-+		return -EACCES;
-+
-+	return security_perf_event_open(attr, PERF_SECURITY_OPEN);
-+}
-+
- static inline int perf_allow_kernel(struct perf_event_attr *attr)
- {
- 	if (sysctl_perf_event_paranoid > 1 && !capable(CAP_SYS_ADMIN))
-diff --git a/include/linux/slab.h b/include/linux/slab.h
-index 6d454886bcaf..60e0df2ccc59 100644
---- a/include/linux/slab.h
-+++ b/include/linux/slab.h
-@@ -184,7 +184,7 @@ void memcg_deactivate_kmem_caches(struct mem_cgroup *, struct mem_cgroup *);
- /*
-  * Common kmalloc functions provided by all allocators
-  */
--void * __must_check krealloc(const void *, size_t, gfp_t);
-+void * __must_check krealloc(const void *, size_t, gfp_t) __attribute((alloc_size(2)));
- void kfree(const void *);
- void kzfree(const void *);
- size_t __ksize(const void *);
-@@ -389,7 +389,7 @@ static __always_inline unsigned int kmalloc_index(size_t size)
- }
- #endif /* !CONFIG_SLOB */
- 
--void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __malloc;
-+void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __malloc __attribute__((alloc_size(1)));
- void *kmem_cache_alloc(struct kmem_cache *, gfp_t flags) __assume_slab_alignment __malloc;
- void kmem_cache_free(struct kmem_cache *, void *);
- 
-@@ -413,7 +413,7 @@ static __always_inline void kfree_bulk(size_t size, void **p)
- }
- 
- #ifdef CONFIG_NUMA
--void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment __malloc;
-+void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment __malloc __attribute__((alloc_size(1)));
- void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node) __assume_slab_alignment __malloc;
- #else
- static __always_inline void *__kmalloc_node(size_t size, gfp_t flags, int node)
-@@ -538,7 +538,7 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
-  *	Try really hard to succeed the allocation but fail
-  *	eventually.
-  */
--static __always_inline void *kmalloc(size_t size, gfp_t flags)
-+static __always_inline __attribute__((alloc_size(1))) void *kmalloc(size_t size, gfp_t flags)
- {
- 	if (__builtin_constant_p(size)) {
- #ifndef CONFIG_SLOB
-@@ -560,7 +560,7 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags)
- 	return __kmalloc(size, flags);
- }
- 
--static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
-+static __always_inline __attribute__((alloc_size(1))) void *kmalloc_node(size_t size, gfp_t flags, int node)
- {
- #ifndef CONFIG_SLOB
- 	if (__builtin_constant_p(size) &&
-diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
-index d2153789bd9f..97da977d6060 100644
---- a/include/linux/slub_def.h
-+++ b/include/linux/slub_def.h
-@@ -121,6 +121,11 @@ struct kmem_cache {
- 	unsigned long random;
- #endif
- 
-+#ifdef CONFIG_SLAB_CANARY
-+	unsigned long random_active;
-+	unsigned long random_inactive;
-+#endif
-+
- #ifdef CONFIG_NUMA
- 	/*
- 	 * Defragmentation by allocating from a remote node.
-diff --git a/include/linux/string.h b/include/linux/string.h
-index 9b7a0632e87a..5c2420dfe2e7 100644
---- a/include/linux/string.h
-+++ b/include/linux/string.h
-@@ -271,6 +271,12 @@ void __read_overflow2(void) __compiletime_error("detected read beyond size of ob
- void __read_overflow3(void) __compiletime_error("detected read beyond size of object passed as 3rd parameter");
- void __write_overflow(void) __compiletime_error("detected write beyond size of object passed as 1st parameter");
- 
-+#ifdef CONFIG_FORTIFY_SOURCE_STRICT_STRING
-+#define __string_size(p) __builtin_object_size(p, 1)
-+#else
-+#define __string_size(p) __builtin_object_size(p, 0)
-+#endif
-+
- #if !defined(__NO_FORTIFY) && defined(__OPTIMIZE__) && defined(CONFIG_FORTIFY_SOURCE)
- 
- #ifdef CONFIG_KASAN
-@@ -299,7 +305,7 @@ extern char *__underlying_strncpy(char *p, const char *q, __kernel_size_t size)
- 
- __FORTIFY_INLINE char *strncpy(char *p, const char *q, __kernel_size_t size)
- {
--	size_t p_size = __builtin_object_size(p, 0);
-+	size_t p_size = __string_size(p);
- 	if (__builtin_constant_p(size) && p_size < size)
- 		__write_overflow();
- 	if (p_size < size)
-@@ -309,7 +315,7 @@ __FORTIFY_INLINE char *strncpy(char *p, const char *q, __kernel_size_t size)
- 
- __FORTIFY_INLINE char *strcat(char *p, const char *q)
- {
--	size_t p_size = __builtin_object_size(p, 0);
-+	size_t p_size = __string_size(p);
- 	if (p_size == (size_t)-1)
- 		return __underlying_strcat(p, q);
- 	if (strlcat(p, q, p_size) >= p_size)
-@@ -320,7 +326,7 @@ __FORTIFY_INLINE char *strcat(char *p, const char *q)
- __FORTIFY_INLINE __kernel_size_t strlen(const char *p)
- {
- 	__kernel_size_t ret;
--	size_t p_size = __builtin_object_size(p, 0);
-+	size_t p_size = __string_size(p);
- 
- 	/* Work around gcc excess stack consumption issue */
- 	if (p_size == (size_t)-1 ||
-@@ -335,7 +341,7 @@ __FORTIFY_INLINE __kernel_size_t strlen(const char *p)
- extern __kernel_size_t __real_strnlen(const char *, __kernel_size_t) __RENAME(strnlen);
- __FORTIFY_INLINE __kernel_size_t strnlen(const char *p, __kernel_size_t maxlen)
- {
--	size_t p_size = __builtin_object_size(p, 0);
-+	size_t p_size = __string_size(p);
- 	__kernel_size_t ret = __real_strnlen(p, maxlen < p_size ? maxlen : p_size);
- 	if (p_size <= ret && maxlen != ret)
- 		fortify_panic(__func__);
-@@ -347,8 +353,8 @@ extern size_t __real_strlcpy(char *, const char *, size_t) __RENAME(strlcpy);
- __FORTIFY_INLINE size_t strlcpy(char *p, const char *q, size_t size)
- {
- 	size_t ret;
--	size_t p_size = __builtin_object_size(p, 0);
--	size_t q_size = __builtin_object_size(q, 0);
-+	size_t p_size = __string_size(p);
-+	size_t q_size = __string_size(q);
- 	if (p_size == (size_t)-1 && q_size == (size_t)-1)
- 		return __real_strlcpy(p, q, size);
- 	ret = strlen(q);
-@@ -368,8 +374,8 @@ __FORTIFY_INLINE size_t strlcpy(char *p, const char *q, size_t size)
- __FORTIFY_INLINE char *strncat(char *p, const char *q, __kernel_size_t count)
- {
- 	size_t p_len, copy_len;
--	size_t p_size = __builtin_object_size(p, 0);
--	size_t q_size = __builtin_object_size(q, 0);
-+	size_t p_size = __string_size(p);
-+	size_t q_size = __string_size(q);
- 	if (p_size == (size_t)-1 && q_size == (size_t)-1)
- 		return __underlying_strncat(p, q, count);
- 	p_len = strlen(p);
-@@ -482,8 +488,8 @@ __FORTIFY_INLINE void *kmemdup(const void *p, size_t size, gfp_t gfp)
- /* defined after fortified strlen and memcpy to reuse them */
- __FORTIFY_INLINE char *strcpy(char *p, const char *q)
- {
--	size_t p_size = __builtin_object_size(p, 0);
--	size_t q_size = __builtin_object_size(q, 0);
-+	size_t p_size = __string_size(p);
-+	size_t q_size = __string_size(q);
- 	if (p_size == (size_t)-1 && q_size == (size_t)-1)
- 		return __underlying_strcpy(p, q);
- 	memcpy(p, q, strlen(q) + 1);
-diff --git a/include/linux/tty.h b/include/linux/tty.h
-index a99e9b8e4e31..ee272abea5f9 100644
---- a/include/linux/tty.h
-+++ b/include/linux/tty.h
-@@ -14,6 +14,7 @@
- #include <uapi/linux/tty.h>
- #include <linux/rwsem.h>
- #include <linux/llist.h>
-+#include <linux/user_namespace.h>
- 
- 
- /*
-@@ -338,6 +339,7 @@ struct tty_struct {
- 	/* If the tty has a pending do_SAK, queue it here - akpm */
- 	struct work_struct SAK_work;
- 	struct tty_port *port;
-+	struct user_namespace *owner_user_ns;
- } __randomize_layout;
- 
- /* Each of a tty's open files has private_data pointing to tty_file_private */
-@@ -347,6 +349,8 @@ struct tty_file_private {
- 	struct list_head list;
- };
- 
-+extern int tiocsti_restrict;
-+
- /* tty magic number */
- #define TTY_MAGIC		0x5401
- 
-diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
-index a95d3cc74d79..93c9cc5baa23 100644
---- a/include/linux/vmalloc.h
-+++ b/include/linux/vmalloc.h
-@@ -102,20 +102,20 @@ static inline void vmalloc_init(void)
- static inline unsigned long vmalloc_nr_pages(void) { return 0; }
- #endif
- 
--extern void *vmalloc(unsigned long size);
--extern void *vzalloc(unsigned long size);
--extern void *vmalloc_user(unsigned long size);
--extern void *vmalloc_node(unsigned long size, int node);
--extern void *vzalloc_node(unsigned long size, int node);
--extern void *vmalloc_user_node_flags(unsigned long size, int node, gfp_t flags);
--extern void *vmalloc_exec(unsigned long size);
--extern void *vmalloc_32(unsigned long size);
--extern void *vmalloc_32_user(unsigned long size);
--extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot);
-+extern void *vmalloc(unsigned long size) __attribute__((alloc_size(1)));
-+extern void *vzalloc(unsigned long size) __attribute__((alloc_size(1)));
-+extern void *vmalloc_user(unsigned long size) __attribute__((alloc_size(1)));
-+extern void *vmalloc_node(unsigned long size, int node) __attribute__((alloc_size(1)));
-+extern void *vzalloc_node(unsigned long size, int node) __attribute__((alloc_size(1)));
-+extern void *vmalloc_user_node_flags(unsigned long size, int node, gfp_t flags) __attribute__((alloc_size(1)));
-+extern void *vmalloc_exec(unsigned long size) __attribute__((alloc_size(1)));
-+extern void *vmalloc_32(unsigned long size) __attribute__((alloc_size(1)));
-+extern void *vmalloc_32_user(unsigned long size) __attribute__((alloc_size(1)));
-+extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) __attribute__((alloc_size(1)));
- extern void *__vmalloc_node_range(unsigned long size, unsigned long align,
- 			unsigned long start, unsigned long end, gfp_t gfp_mask,
- 			pgprot_t prot, unsigned long vm_flags, int node,
--			const void *caller);
-+			const void *caller) __attribute__((alloc_size(1)));
- #ifndef CONFIG_MMU
- extern void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags);
- static inline void *__vmalloc_node_flags_caller(unsigned long size, int node,
-diff --git a/include/net/tcp.h b/include/net/tcp.h
-index 6f8e60c6fbc7..fe971ed1978b 100644
---- a/include/net/tcp.h
-+++ b/include/net/tcp.h
-@@ -244,6 +244,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
- /* sysctl variables for tcp */
- extern int sysctl_tcp_max_orphans;
- extern long sysctl_tcp_mem[3];
-+extern int sysctl_tcp_simult_connect;
- 
- #define TCP_RACK_LOSS_DETECTION  0x1 /* Use RACK to detect losses */
- #define TCP_RACK_STATIC_REO_WND  0x2 /* Use static RACK reo wnd */
-diff --git a/init/Kconfig b/init/Kconfig
-index 74a5ac65644f..b0f67731c203 100644
---- a/init/Kconfig
-+++ b/init/Kconfig
-@@ -349,6 +349,7 @@ config USELIB
- config AUDIT
- 	bool "Auditing support"
- 	depends on NET
-+	default y
- 	help
- 	  Enable auditing infrastructure that can be used with another
- 	  kernel subsystem, such as SELinux (which requires this for
-@@ -1102,6 +1103,22 @@ config USER_NS
- 
- 	  If unsure, say N.
- 
-+config USER_NS_UNPRIVILEGED
-+	bool "Allow unprivileged users to create namespaces"
-+	depends on USER_NS
-+	default n
-+	help
-+	  When disabled, unprivileged users will not be able to create
-+	  new namespaces. Allowing users to create their own namespaces
-+	  has been part of several recent local privilege escalation
-+	  exploits, so if you need user namespaces but are
-+	  paranoid^Wsecurity-conscious you want to disable this.
-+
-+	  This setting can be overridden at runtime via the
-+	  kernel.unprivileged_userns_clone sysctl.
-+
-+	  If unsure, say N.
-+
- config PID_NS
- 	bool "PID Namespaces"
- 	default y
-@@ -1515,8 +1532,7 @@ config SHMEM
- 	  which may be appropriate on small systems without swap.
- 
- config AIO
--	bool "Enable AIO support" if EXPERT
--	default y
-+	bool "Enable AIO support"
- 	help
- 	  This option enables POSIX asynchronous I/O which may by used
- 	  by some high performance threaded applications. Disabling
-@@ -1652,6 +1668,23 @@ config USERFAULTFD
- 	  Enable the userfaultfd() system call that allows to intercept and
- 	  handle page faults in userland.
- 
-+config USERFAULTFD_UNPRIVILEGED
-+	bool "Allow unprivileged users to use the userfaultfd syscall"
-+	depends on USERFAULTFD
-+	default n
-+	help
-+	  When disabled, unprivileged users will not be able to use the userfaultfd
-+	  syscall. Userfaultfd provide attackers with a way to stall a kernel
-+	  thread in the middle of memory accesses from userspace by initiating an
-+	  access on an unmapped page. To avoid various heap grooming and heap
-+	  spraying techniques for exploiting use-after-free flaws this should be
-+	  disabled by default.
-+
-+	  This setting can be overridden at runtime via the
-+	  vm.unprivileged_userfaultfd sysctl.
-+
-+	  If unsure, say N.
-+
- config ARCH_HAS_MEMBARRIER_CALLBACKS
- 	bool
- 
-@@ -1764,7 +1797,7 @@ config VM_EVENT_COUNTERS
- 
- config SLUB_DEBUG
- 	default y
--	bool "Enable SLUB debugging support" if EXPERT
-+	bool "Enable SLUB debugging support"
- 	depends on SLUB && SYSFS
- 	help
- 	  SLUB has extensive debug support features. Disabling these can
-@@ -1788,7 +1821,6 @@ config SLUB_MEMCG_SYSFS_ON
- 
- config COMPAT_BRK
- 	bool "Disable heap randomization"
--	default y
- 	help
- 	  Randomizing heap placement makes heap exploits harder, but it
- 	  also breaks ancient binaries (including anything libc5 based).
-@@ -1835,7 +1867,6 @@ endchoice
- 
- config SLAB_MERGE_DEFAULT
- 	bool "Allow slab caches to be merged"
--	default y
- 	help
- 	  For reduced kernel memory fragmentation, slab caches can be
- 	  merged when they share the same size and other characteristics.
-@@ -1848,9 +1879,9 @@ config SLAB_MERGE_DEFAULT
- 	  command line.
- 
- config SLAB_FREELIST_RANDOM
--	default n
- 	depends on SLAB || SLUB
- 	bool "SLAB freelist randomization"
-+	default y
- 	help
- 	  Randomizes the freelist order used on creating new pages. This
- 	  security feature reduces the predictability of the kernel slab
-@@ -1859,12 +1890,30 @@ config SLAB_FREELIST_RANDOM
- config SLAB_FREELIST_HARDENED
- 	bool "Harden slab freelist metadata"
- 	depends on SLUB
-+	default y
- 	help
- 	  Many kernel heap attacks try to target slab cache metadata and
- 	  other infrastructure. This options makes minor performance
- 	  sacrifices to harden the kernel slab allocator against common
- 	  freelist exploit methods.
- 
-+config SLAB_CANARY
-+	depends on SLUB
-+	depends on !SLAB_MERGE_DEFAULT
-+	bool "SLAB canaries"
-+	default y
-+	help
-+	  Place canaries at the end of kernel slab allocations, sacrificing
-+	  some performance and memory usage for security.
-+
-+	  Canaries can detect some forms of heap corruption when allocations
-+	  are freed and as part of the HARDENED_USERCOPY feature. It provides
-+	  basic use-after-free detection for HARDENED_USERCOPY.
-+
-+	  Canaries absorb small overflows (rendering them harmless), mitigate
-+	  non-NUL terminated C string overflows on 64-bit via a guaranteed zero
-+	  byte and provide basic double-free detection.
-+
- config SHUFFLE_PAGE_ALLOCATOR
- 	bool "Page allocator randomization"
- 	default SLAB_FREELIST_RANDOM && ACPI_NUMA
-diff --git a/kernel/audit.c b/kernel/audit.c
-index f711f424a28a..f15d1d41244c 100644
---- a/kernel/audit.c
-+++ b/kernel/audit.c
-@@ -1642,6 +1642,9 @@ static int __init audit_enable(char *str)
- 
- 	if (audit_default == AUDIT_OFF)
- 		audit_initialized = AUDIT_DISABLED;
-+	else if (!audit_ever_enabled)
-+		audit_initialized = AUDIT_UNINITIALIZED;
-+
- 	if (audit_set_enabled(audit_default))
- 		pr_err("audit: error setting audit state (%d)\n",
- 		       audit_default);
-diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
-index 916f5132a984..296a07014999 100644
---- a/kernel/bpf/core.c
-+++ b/kernel/bpf/core.c
-@@ -520,7 +520,7 @@ void bpf_prog_kallsyms_del_all(struct bpf_prog *fp)
- /* All BPF JIT sysctl knobs here. */
- int bpf_jit_enable   __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON);
- int bpf_jit_kallsyms __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON);
--int bpf_jit_harden   __read_mostly;
-+int bpf_jit_harden   __read_mostly = 2;
- long bpf_jit_limit   __read_mostly;
- 
- static void
-diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
-index c8acc8f37583..ccf05cdfd932 100644
---- a/kernel/bpf/syscall.c
-+++ b/kernel/bpf/syscall.c
-@@ -43,7 +43,7 @@ static DEFINE_SPINLOCK(prog_idr_lock);
- static DEFINE_IDR(map_idr);
- static DEFINE_SPINLOCK(map_idr_lock);
- 
--int sysctl_unprivileged_bpf_disabled __read_mostly;
-+int sysctl_unprivileged_bpf_disabled __read_mostly = 1;
- 
- static const struct bpf_map_ops * const bpf_map_types[] = {
- #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
-diff --git a/kernel/capability.c b/kernel/capability.c
-index 1444f3954d75..8cc9dd7992f2 100644
---- a/kernel/capability.c
-+++ b/kernel/capability.c
-@@ -449,6 +449,12 @@ bool capable(int cap)
- 	return ns_capable(&init_user_ns, cap);
- }
- EXPORT_SYMBOL(capable);
-+
-+bool capable_noaudit(int cap)
-+{
-+	return ns_capable_noaudit(&init_user_ns, cap);
-+}
-+EXPORT_SYMBOL(capable_noaudit);
- #endif /* CONFIG_MULTIUSER */
- 
- /**
-diff --git a/kernel/events/core.c b/kernel/events/core.c
-index 1dd91f960839..90a629557f9e 100644
---- a/kernel/events/core.c
-+++ b/kernel/events/core.c
-@@ -406,8 +406,13 @@ static cpumask_var_t perf_online_mask;
-  *   0 - disallow raw tracepoint access for unpriv
-  *   1 - disallow cpu events for unpriv
-  *   2 - disallow kernel profiling for unpriv
-+ *   3 - disallow all unpriv perf event use
-  */
-+#ifdef CONFIG_SECURITY_PERF_EVENTS_RESTRICT
-+int sysctl_perf_event_paranoid __read_mostly = 3;
-+#else
- int sysctl_perf_event_paranoid __read_mostly = 2;
-+#endif
- 
- /* Minimum for 512 kiB + 1 user control page */
- int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024); /* 'free' kiB per user */
-@@ -11501,7 +11506,7 @@ SYSCALL_DEFINE5(perf_event_open,
- 		return -EINVAL;
- 
- 	/* Do we allow access to perf_event_open(2) ? */
--	err = security_perf_event_open(&attr, PERF_SECURITY_OPEN);
-+	err = perf_allow_open(&attr);
- 	if (err)
- 		return err;
- 
-diff --git a/kernel/fork.c b/kernel/fork.c
-index 48ed22774efa..ec61454a18d5 100644
---- a/kernel/fork.c
-+++ b/kernel/fork.c
-@@ -106,6 +106,11 @@
- 
- #define CREATE_TRACE_POINTS
- #include <trace/events/task.h>
-+#ifdef CONFIG_USER_NS
-+extern int unprivileged_userns_clone;
-+#else
-+#define unprivileged_userns_clone 0
-+#endif
- 
- /*
-  * Minimum number of threads to boot the kernel
-@@ -1848,6 +1853,10 @@ static __latent_entropy struct task_struct *copy_process(
- 	if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
- 		return ERR_PTR(-EINVAL);
- 
-+	if ((clone_flags & CLONE_NEWUSER) && !unprivileged_userns_clone)
-+		if (!capable(CAP_SYS_ADMIN))
-+			return ERR_PTR(-EPERM);
-+
- 	/*
- 	 * Thread groups must share signals as well, and detached threads
- 	 * can only be started up within the thread group.
-@@ -2948,6 +2957,12 @@ int ksys_unshare(unsigned long unshare_flags)
- 	if (unshare_flags & CLONE_NEWNS)
- 		unshare_flags |= CLONE_FS;
- 
-+	if ((unshare_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) {
-+		err = -EPERM;
-+		if (!capable(CAP_SYS_ADMIN))
-+			goto bad_unshare_out;
-+	}
-+
- 	err = check_unshare_flags(unshare_flags);
- 	if (err)
- 		goto bad_unshare_out;
-diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
-index dd572ce7c747..95af139ac6ba 100644
---- a/kernel/rcu/tiny.c
-+++ b/kernel/rcu/tiny.c
-@@ -100,7 +100,7 @@ static inline bool rcu_reclaim_tiny(struct rcu_head *head)
- }
- 
- /* Invoke the RCU callbacks whose grace period has elapsed.  */
--static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused)
-+static __latent_entropy void rcu_process_callbacks(void)
- {
- 	struct rcu_head *next, *list;
- 	unsigned long flags;
-diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
-index d9a49cd6065a..9f63b28e3ebe 100644
---- a/kernel/rcu/tree.c
-+++ b/kernel/rcu/tree.c
-@@ -2437,7 +2437,7 @@ static __latent_entropy void rcu_core(void)
- 	trace_rcu_utilization(TPS("End RCU core"));
- }
- 
--static void rcu_core_si(struct softirq_action *h)
-+static void rcu_core_si(void)
- {
- 	rcu_core();
- }
-diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
-index 5725199b32dc..dfb99620cb41 100644
---- a/kernel/sched/fair.c
-+++ b/kernel/sched/fair.c
-@@ -10568,7 +10568,7 @@ int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
-  * run_rebalance_domains is triggered when needed from the scheduler tick.
-  * Also triggered for nohz idle balancing (with nohz_balancing_kick set).
-  */
--static __latent_entropy void run_rebalance_domains(struct softirq_action *h)
-+static __latent_entropy void run_rebalance_domains(void)
- {
- 	struct rq *this_rq = this_rq();
- 	enum cpu_idle_type idle = this_rq->idle_balance ?
-diff --git a/kernel/softirq.c b/kernel/softirq.c
-index a47c6dd57452..c12cb85a6504 100644
---- a/kernel/softirq.c
-+++ b/kernel/softirq.c
-@@ -52,7 +52,7 @@ DEFINE_PER_CPU_ALIGNED(irq_cpustat_t, irq_stat);
- EXPORT_PER_CPU_SYMBOL(irq_stat);
- #endif
- 
--static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
-+static struct softirq_action softirq_vec[NR_SOFTIRQS] __ro_after_init __aligned(PAGE_SIZE);
- 
- DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
- 
-@@ -289,7 +289,7 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
- 		kstat_incr_softirqs_this_cpu(vec_nr);
- 
- 		trace_softirq_entry(vec_nr);
--		h->action(h);
-+		h->action();
- 		trace_softirq_exit(vec_nr);
- 		if (unlikely(prev_count != preempt_count())) {
- 			pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
-@@ -453,7 +453,7 @@ void __raise_softirq_irqoff(unsigned int nr)
- 	or_softirq_pending(1UL << nr);
- }
- 
--void open_softirq(int nr, void (*action)(struct softirq_action *))
-+void __init open_softirq(int nr, void (*action)(void))
- {
- 	softirq_vec[nr].action = action;
- }
-@@ -499,8 +499,7 @@ void __tasklet_hi_schedule(struct tasklet_struct *t)
- }
- EXPORT_SYMBOL(__tasklet_hi_schedule);
- 
--static void tasklet_action_common(struct softirq_action *a,
--				  struct tasklet_head *tl_head,
-+static void tasklet_action_common(struct tasklet_head *tl_head,
- 				  unsigned int softirq_nr)
- {
- 	struct tasklet_struct *list;
-@@ -537,14 +536,14 @@ static void tasklet_action_common(struct softirq_action *a,
- 	}
- }
- 
--static __latent_entropy void tasklet_action(struct softirq_action *a)
-+static __latent_entropy void tasklet_action(void)
- {
--	tasklet_action_common(a, this_cpu_ptr(&tasklet_vec), TASKLET_SOFTIRQ);
-+	tasklet_action_common(this_cpu_ptr(&tasklet_vec), TASKLET_SOFTIRQ);
- }
- 
--static __latent_entropy void tasklet_hi_action(struct softirq_action *a)
-+static __latent_entropy void tasklet_hi_action(void)
- {
--	tasklet_action_common(a, this_cpu_ptr(&tasklet_hi_vec), HI_SOFTIRQ);
-+	tasklet_action_common(this_cpu_ptr(&tasklet_hi_vec), HI_SOFTIRQ);
- }
- 
- void tasklet_init(struct tasklet_struct *t,
-diff --git a/kernel/sysctl.c b/kernel/sysctl.c
-index 8a176d8727a3..87bc1d26c376 100644
---- a/kernel/sysctl.c
-+++ b/kernel/sysctl.c
-@@ -68,6 +68,7 @@
- #include <linux/bpf.h>
- #include <linux/mount.h>
- #include <linux/userfaultfd_k.h>
-+#include <linux/tty.h>
- 
- #include "../lib/kstrtox.h"
- 
-@@ -104,12 +105,19 @@
- #if defined(CONFIG_SYSCTL)
- 
- /* External variables not in a header file. */
-+#if IS_ENABLED(CONFIG_USB)
-+int deny_new_usb __read_mostly = 0;
-+EXPORT_SYMBOL(deny_new_usb);
-+#endif
- extern int suid_dumpable;
- #ifdef CONFIG_COREDUMP
- extern int core_uses_pid;
- extern char core_pattern[];
- extern unsigned int core_pipe_limit;
- #endif
-+#ifdef CONFIG_USER_NS
-+extern int unprivileged_userns_clone;
-+#endif
- extern int pid_max;
- extern int pid_max_min, pid_max_max;
- extern int percpu_pagelist_fraction;
-@@ -121,32 +129,32 @@ extern int sysctl_nr_trim_pages;
- 
- /* Constants used for minimum and  maximum */
- #ifdef CONFIG_LOCKUP_DETECTOR
--static int sixty = 60;
-+static int sixty __read_only = 60;
- #endif
- 
--static int __maybe_unused neg_one = -1;
--static int __maybe_unused two = 2;
--static int __maybe_unused four = 4;
--static unsigned long zero_ul;
--static unsigned long one_ul = 1;
--static unsigned long long_max = LONG_MAX;
--static int one_hundred = 100;
--static int one_thousand = 1000;
-+static int __maybe_unused neg_one __read_only = -1;
-+static int __maybe_unused two __read_only = 2;
-+static int __maybe_unused four __read_only = 4;
-+static unsigned long zero_ul __read_only;
-+static unsigned long one_ul __read_only = 1;
-+static unsigned long long_max __read_only = LONG_MAX;
-+static int one_hundred __read_only = 100;
-+static int one_thousand __read_only = 1000;
- #ifdef CONFIG_PRINTK
--static int ten_thousand = 10000;
-+static int ten_thousand __read_only = 10000;
- #endif
- #ifdef CONFIG_PERF_EVENTS
--static int six_hundred_forty_kb = 640 * 1024;
-+static int six_hundred_forty_kb __read_only = 640 * 1024;
- #endif
- 
- /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
--static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
-+static unsigned long dirty_bytes_min __read_only = 2 * PAGE_SIZE;
- 
- /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
--static int maxolduid = 65535;
--static int minolduid;
-+static int maxolduid __read_only = 65535;
-+static int minolduid __read_only;
- 
--static int ngroups_max = NGROUPS_MAX;
-+static int ngroups_max __read_only = NGROUPS_MAX;
- static const int cap_last_cap = CAP_LAST_CAP;
- 
- /*
-@@ -154,9 +162,12 @@ static const int cap_last_cap = CAP_LAST_CAP;
-  * and hung_task_check_interval_secs
-  */
- #ifdef CONFIG_DETECT_HUNG_TASK
--static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
-+static unsigned long hung_task_timeout_max __read_only = (LONG_MAX/HZ);
- #endif
- 
-+int device_sidechannel_restrict __read_mostly = 1;
-+EXPORT_SYMBOL(device_sidechannel_restrict);
-+
- #ifdef CONFIG_INOTIFY_USER
- #include <linux/inotify.h>
- #endif
-@@ -289,19 +300,19 @@ static struct ctl_table sysctl_base_table[] = {
- };
- 
- #ifdef CONFIG_SCHED_DEBUG
--static int min_sched_granularity_ns = 100000;		/* 100 usecs */
--static int max_sched_granularity_ns = NSEC_PER_SEC;	/* 1 second */
--static int min_wakeup_granularity_ns;			/* 0 usecs */
--static int max_wakeup_granularity_ns = NSEC_PER_SEC;	/* 1 second */
-+static int min_sched_granularity_ns __read_only = 100000;		/* 100 usecs */
-+static int max_sched_granularity_ns __read_only = NSEC_PER_SEC;	/* 1 second */
-+static int min_wakeup_granularity_ns __read_only;			/* 0 usecs */
-+static int max_wakeup_granularity_ns __read_only = NSEC_PER_SEC;	/* 1 second */
- #ifdef CONFIG_SMP
--static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
--static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
-+static int min_sched_tunable_scaling __read_only = SCHED_TUNABLESCALING_NONE;
-+static int max_sched_tunable_scaling __read_only = SCHED_TUNABLESCALING_END-1;
- #endif /* CONFIG_SMP */
- #endif /* CONFIG_SCHED_DEBUG */
- 
- #ifdef CONFIG_COMPACTION
--static int min_extfrag_threshold;
--static int max_extfrag_threshold = 1000;
-+static int min_extfrag_threshold __read_only;
-+static int max_extfrag_threshold __read_only = 1000;
- #endif
- 
- static struct ctl_table kern_table[] = {
-@@ -534,6 +545,15 @@ static struct ctl_table kern_table[] = {
- 		.proc_handler	= proc_dointvec,
- 	},
- #endif
-+#ifdef CONFIG_USER_NS
-+	{
-+		.procname	= "unprivileged_userns_clone",
-+		.data		= &unprivileged_userns_clone,
-+		.maxlen		= sizeof(int),
-+		.mode		= 0644,
-+		.proc_handler	= proc_dointvec,
-+	},
-+#endif
- #ifdef CONFIG_PROC_SYSCTL
- 	{
- 		.procname	= "tainted",
-@@ -880,6 +900,37 @@ static struct ctl_table kern_table[] = {
- 		.extra1		= SYSCTL_ZERO,
- 		.extra2		= &two,
- 	},
-+#endif
-+#if defined CONFIG_TTY
-+	{
-+		.procname	= "tiocsti_restrict",
-+		.data		= &tiocsti_restrict,
-+		.maxlen		= sizeof(int),
-+		.mode		= 0644,
-+		.proc_handler	= proc_dointvec_minmax_sysadmin,
-+		.extra1		= SYSCTL_ZERO,
-+		.extra2		= SYSCTL_ONE,
-+	},
-+#endif
-+	{
-+		.procname	= "device_sidechannel_restrict",
-+		.data		= &device_sidechannel_restrict,
-+		.maxlen		= sizeof(int),
-+		.mode		= 0644,
-+		.proc_handler	= proc_dointvec_minmax_sysadmin,
-+		.extra1		= SYSCTL_ZERO,
-+		.extra2		= SYSCTL_ONE,
-+	},
-+#if IS_ENABLED(CONFIG_USB)
-+	{
-+		.procname	= "deny_new_usb",
-+		.data		= &deny_new_usb,
-+		.maxlen		= sizeof(int),
-+		.mode		= 0644,
-+		.proc_handler	= proc_dointvec_minmax_sysadmin,
-+		.extra1		= SYSCTL_ZERO,
-+		.extra2		= SYSCTL_ONE,
-+	},
- #endif
- 	{
- 		.procname	= "ngroups_max",
-diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
-index d89da1c7e005..8e1003ef3ebb 100644
---- a/kernel/time/hrtimer.c
-+++ b/kernel/time/hrtimer.c
-@@ -1588,7 +1588,7 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now,
- 	}
- }
- 
--static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h)
-+static __latent_entropy void hrtimer_run_softirq(void)
- {
- 	struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
- 	unsigned long flags;
-diff --git a/kernel/time/timer.c b/kernel/time/timer.c
-index a5221abb4594..636f4f9566fa 100644
---- a/kernel/time/timer.c
-+++ b/kernel/time/timer.c
-@@ -1780,7 +1780,7 @@ static inline void __run_timers(struct timer_base *base)
- /*
-  * This function runs timers and the timer-tq in bottom half context.
-  */
--static __latent_entropy void run_timer_softirq(struct softirq_action *h)
-+static __latent_entropy void run_timer_softirq(void)
- {
- 	struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
- 
-diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
-index 8eadadc478f9..c36ecd19562c 100644
---- a/kernel/user_namespace.c
-+++ b/kernel/user_namespace.c
-@@ -21,6 +21,13 @@
- #include <linux/bsearch.h>
- #include <linux/sort.h>
- 
-+/* sysctl */
-+#ifdef CONFIG_USER_NS_UNPRIVILEGED
-+int unprivileged_userns_clone = 1;
-+#else
-+int unprivileged_userns_clone;
-+#endif
-+
- static struct kmem_cache *user_ns_cachep __read_mostly;
- static DEFINE_MUTEX(userns_state_mutex);
- 
-diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
-index 21d9c5f6e7ec..ab5ae07fa69a 100644
---- a/lib/Kconfig.debug
-+++ b/lib/Kconfig.debug
-@@ -337,6 +337,9 @@ config SECTION_MISMATCH_WARN_ONLY
- 
- 	  If unsure, say Y.
- 
-+config DEBUG_WRITABLE_FUNCTION_POINTERS_VERBOSE
-+	bool "Enable verbose reporting of writable function pointers"
-+
- #
- # Select this config option from the architecture Kconfig, if it
- # is preferred to always offer frame pointers as a config
-@@ -798,6 +801,7 @@ menu "Debug Oops, Lockups and Hangs"
- 
- config PANIC_ON_OOPS
- 	bool "Panic on Oops"
-+	default y
- 	help
- 	  Say Y here to enable the kernel to panic when it oopses. This
- 	  has the same effect as setting oops=panic on the kernel command
-@@ -807,7 +811,7 @@ config PANIC_ON_OOPS
- 	  anything erroneous after an oops which could result in data
- 	  corruption or other issues.
- 
--	  Say N if unsure.
-+	  Say Y if unsure.
- 
- config PANIC_ON_OOPS_VALUE
- 	int
-@@ -1346,6 +1350,7 @@ menu "Debug kernel data structures"
- config DEBUG_LIST
- 	bool "Debug linked list manipulation"
- 	depends on DEBUG_KERNEL || BUG_ON_DATA_CORRUPTION
-+	default y
- 	help
- 	  Enable this to turn on extended checks in the linked-list
- 	  walking routines.
-@@ -1385,6 +1390,7 @@ config DEBUG_NOTIFIERS
- config BUG_ON_DATA_CORRUPTION
- 	bool "Trigger a BUG when data corruption is detected"
- 	select DEBUG_LIST
-+	default y
- 	help
- 	  Select this option if the kernel should BUG when it encounters
- 	  data corruption in kernel memory structures when they get checked
-@@ -1540,6 +1546,7 @@ config STRICT_DEVMEM
- config IO_STRICT_DEVMEM
- 	bool "Filter I/O access to /dev/mem"
- 	depends on STRICT_DEVMEM
-+	default y
- 	help
- 	  If this option is disabled, you allow userspace (root) access to all
- 	  io-memory regardless of whether a driver is actively using that
-diff --git a/lib/irq_poll.c b/lib/irq_poll.c
-index 2f17b488d58e..b6e7996a0058 100644
---- a/lib/irq_poll.c
-+++ b/lib/irq_poll.c
-@@ -75,7 +75,7 @@ void irq_poll_complete(struct irq_poll *iop)
- }
- EXPORT_SYMBOL(irq_poll_complete);
- 
--static void __latent_entropy irq_poll_softirq(struct softirq_action *h)
-+static void __latent_entropy irq_poll_softirq(void)
- {
- 	struct list_head *list = this_cpu_ptr(&blk_cpu_iopoll);
- 	int rearm = 0, budget = irq_poll_budget;
-diff --git a/lib/kobject.c b/lib/kobject.c
-index 83198cb37d8d..4a053b7aef42 100644
---- a/lib/kobject.c
-+++ b/lib/kobject.c
-@@ -1009,9 +1009,9 @@ EXPORT_SYMBOL_GPL(kset_create_and_add);
- 
- 
- static DEFINE_SPINLOCK(kobj_ns_type_lock);
--static const struct kobj_ns_type_operations *kobj_ns_ops_tbl[KOBJ_NS_TYPES];
-+static const struct kobj_ns_type_operations *kobj_ns_ops_tbl[KOBJ_NS_TYPES] __ro_after_init;
- 
--int kobj_ns_type_register(const struct kobj_ns_type_operations *ops)
-+int __init kobj_ns_type_register(const struct kobj_ns_type_operations *ops)
- {
- 	enum kobj_ns_type type = ops->type;
- 	int error;
-diff --git a/lib/nlattr.c b/lib/nlattr.c
-index cace9b307781..39ba1387045d 100644
---- a/lib/nlattr.c
-+++ b/lib/nlattr.c
-@@ -571,6 +571,8 @@ int nla_memcpy(void *dest, const struct nlattr *src, int count)
- {
- 	int minlen = min_t(int, count, nla_len(src));
- 
-+	BUG_ON(minlen < 0);
-+
- 	memcpy(dest, nla_data(src), minlen);
- 	if (count > minlen)
- 		memset(dest + minlen, 0, count - minlen);
-diff --git a/lib/vsprintf.c b/lib/vsprintf.c
-index 7c47ad52ce2f..d1e002579732 100644
---- a/lib/vsprintf.c
-+++ b/lib/vsprintf.c
-@@ -817,7 +817,7 @@ static char *ptr_to_id(char *buf, char *end, const void *ptr,
- 	return pointer_string(buf, end, (const void *)hashval, spec);
- }
- 
--int kptr_restrict __read_mostly;
-+int kptr_restrict __read_mostly = 2;
- 
- static noinline_for_stack
- char *restricted_pointer(char *buf, char *end, const void *ptr,
-diff --git a/mm/Kconfig b/mm/Kconfig
-index c1acc34c1c35..06dd0aa41a1b 100644
---- a/mm/Kconfig
-+++ b/mm/Kconfig
-@@ -320,7 +320,8 @@ config KSM
- config DEFAULT_MMAP_MIN_ADDR
- 	int "Low address space to protect from user allocation"
- 	depends on MMU
--	default 4096
-+	default 32768 if ARM || (ARM64 && COMPAT)
-+	default 65536
- 	help
- 	  This is the portion of low virtual memory which should be protected
- 	  from userspace allocation.  Keeping a user from writing to low pages
-diff --git a/mm/mmap.c b/mm/mmap.c
-index f609e9ec4a25..66297ff169d9 100644
---- a/mm/mmap.c
-+++ b/mm/mmap.c
-@@ -231,6 +231,13 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
- 
- 	newbrk = PAGE_ALIGN(brk);
- 	oldbrk = PAGE_ALIGN(mm->brk);
-+	/* properly handle unaligned min_brk as an empty heap */
-+	if (min_brk & ~PAGE_MASK) {
-+		if (brk == min_brk)
-+			newbrk -= PAGE_SIZE;
-+		if (mm->brk == min_brk)
-+			oldbrk -= PAGE_SIZE;
-+	}
- 	if (oldbrk == newbrk) {
- 		mm->brk = brk;
- 		goto success;
-diff --git a/mm/page_alloc.c b/mm/page_alloc.c
-index d0c0d9364aa6..1f1a45afac2a 100644
---- a/mm/page_alloc.c
-+++ b/mm/page_alloc.c
-@@ -68,6 +68,7 @@
- #include <linux/lockdep.h>
- #include <linux/nmi.h>
- #include <linux/psi.h>
-+#include <linux/random.h>
- 
- #include <asm/sections.h>
- #include <asm/tlbflush.h>
-@@ -106,6 +107,15 @@ struct pcpu_drain {
- static DEFINE_MUTEX(pcpu_drain_mutex);
- static DEFINE_PER_CPU(struct pcpu_drain, pcpu_drain);
- 
-+bool __meminitdata extra_latent_entropy;
-+
-+static int __init setup_extra_latent_entropy(char *str)
-+{
-+	extra_latent_entropy = true;
-+	return 0;
-+}
-+early_param("extra_latent_entropy", setup_extra_latent_entropy);
-+
- #ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY
- volatile unsigned long latent_entropy __latent_entropy;
- EXPORT_SYMBOL(latent_entropy);
-@@ -1479,6 +1489,25 @@ static void __free_pages_ok(struct page *page, unsigned int order)
- 	local_irq_restore(flags);
- }
- 
-+static void __init __gather_extra_latent_entropy(struct page *page,
-+						 unsigned int nr_pages)
-+{
-+	if (extra_latent_entropy && !PageHighMem(page) && page_to_pfn(page) < 0x100000) {
-+		unsigned long hash = 0;
-+		size_t index, end = PAGE_SIZE * nr_pages / sizeof hash;
-+		const unsigned long *data = lowmem_page_address(page);
-+
-+		for (index = 0; index < end; index++)
-+			hash ^= hash + data[index];
-+#ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY
-+		latent_entropy ^= hash;
-+		add_device_randomness((const void *)&latent_entropy, sizeof(latent_entropy));
-+#else
-+		add_device_randomness((const void *)&hash, sizeof(hash));
-+#endif
-+	}
-+}
-+
- void __free_pages_core(struct page *page, unsigned int order)
- {
- 	unsigned int nr_pages = 1 << order;
-@@ -1493,7 +1522,6 @@ void __free_pages_core(struct page *page, unsigned int order)
- 	}
- 	__ClearPageReserved(p);
- 	set_page_count(p, 0);
--
- 	atomic_long_add(nr_pages, &page_zone(page)->managed_pages);
- 	set_page_refcounted(page);
- 	__free_pages(page, order);
-@@ -1544,6 +1572,7 @@ void __init memblock_free_pages(struct page *page, unsigned long pfn,
- {
- 	if (early_page_uninitialised(pfn))
- 		return;
-+	__gather_extra_latent_entropy(page, 1 << order);
- 	__free_pages_core(page, order);
- }
- 
-@@ -1635,6 +1664,7 @@ static void __init deferred_free_range(unsigned long pfn,
- 	if (nr_pages == pageblock_nr_pages &&
- 	    (pfn & (pageblock_nr_pages - 1)) == 0) {
- 		set_pageblock_migratetype(page, MIGRATE_MOVABLE);
-+		__gather_extra_latent_entropy(page, 1 << pageblock_order);
- 		__free_pages_core(page, pageblock_order);
- 		return;
- 	}
-@@ -1642,6 +1672,7 @@ static void __init deferred_free_range(unsigned long pfn,
- 	for (i = 0; i < nr_pages; i++, page++, pfn++) {
- 		if ((pfn & (pageblock_nr_pages - 1)) == 0)
- 			set_pageblock_migratetype(page, MIGRATE_MOVABLE);
-+		__gather_extra_latent_entropy(page, 1);
- 		__free_pages_core(page, 0);
- 	}
- }
-@@ -2202,6 +2233,12 @@ static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags
- {
- 	post_alloc_hook(page, order, gfp_flags);
- 
-+	if (IS_ENABLED(CONFIG_PAGE_SANITIZE_VERIFY) && want_init_on_free()) {
-+		int i;
-+		for (i = 0; i < (1 << order); i++)
-+			verify_zero_highpage(page + i);
-+	}
-+
- 	if (!free_pages_prezeroed() && want_init_on_alloc(gfp_flags))
- 		kernel_init_free_pages(page, 1 << order);
- 
-diff --git a/mm/slab.h b/mm/slab.h
-index 74f7e09a7cfd..ce786e0af610 100644
---- a/mm/slab.h
-+++ b/mm/slab.h
-@@ -472,9 +472,13 @@ static inline struct kmem_cache *virt_to_cache(const void *obj)
- 	struct page *page;
- 
- 	page = virt_to_head_page(obj);
-+#ifdef CONFIG_BUG_ON_DATA_CORRUPTION
-+	BUG_ON(!PageSlab(page));
-+#else
- 	if (WARN_ONCE(!PageSlab(page), "%s: Object is not a Slab page!\n",
- 					__func__))
- 		return NULL;
-+#endif
- 	return page->slab_cache;
- }
- 
-@@ -520,9 +524,14 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
- 		return s;
- 
- 	cachep = virt_to_cache(x);
--	WARN_ONCE(cachep && !slab_equal_or_root(cachep, s),
--		  "%s: Wrong slab cache. %s but object is from %s\n",
--		  __func__, s->name, cachep->name);
-+	if (cachep && !slab_equal_or_root(cachep, s)) {
-+#ifdef CONFIG_BUG_ON_DATA_CORRUPTION
-+		BUG();
-+#else
-+		WARN_ONCE(1, "%s: Wrong slab cache. %s but object is from %s\n",
-+			     __func__, s->name, cachep->name);
-+#endif
-+	}
- 	return cachep;
- }
- 
-@@ -547,7 +556,7 @@ static inline size_t slab_ksize(const struct kmem_cache *s)
- 	 * back there or track user information then we can
- 	 * only use the space before that information.
- 	 */
--	if (s->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_STORE_USER))
-+	if ((s->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_STORE_USER)) || IS_ENABLED(CONFIG_SLAB_CANARY))
- 		return s->inuse;
- 	/*
- 	 * Else we can use all the padding etc for the allocation
-@@ -676,8 +685,10 @@ static inline void cache_random_seq_destroy(struct kmem_cache *cachep) { }
- static inline bool slab_want_init_on_alloc(gfp_t flags, struct kmem_cache *c)
- {
- 	if (static_branch_unlikely(&init_on_alloc)) {
-+#ifndef CONFIG_SLUB
- 		if (c->ctor)
- 			return false;
-+#endif
- 		if (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON))
- 			return flags & __GFP_ZERO;
- 		return true;
-@@ -687,9 +698,15 @@ static inline bool slab_want_init_on_alloc(gfp_t flags, struct kmem_cache *c)
- 
- static inline bool slab_want_init_on_free(struct kmem_cache *c)
- {
--	if (static_branch_unlikely(&init_on_free))
--		return !(c->ctor ||
--			 (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)));
-+	if (static_branch_unlikely(&init_on_free)) {
-+#ifndef CONFIG_SLUB
-+		if (c->ctor)
-+			return false;
-+#endif
-+		if (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON))
-+			return false;
-+		return true;
-+	}
- 	return false;
- }
- 
-diff --git a/mm/slab_common.c b/mm/slab_common.c
-index 37d48a56431d..b8947336d0e1 100644
---- a/mm/slab_common.c
-+++ b/mm/slab_common.c
-@@ -28,10 +28,10 @@
- 
- #include "slab.h"
- 
--enum slab_state slab_state;
-+enum slab_state slab_state __ro_after_init;
- LIST_HEAD(slab_caches);
- DEFINE_MUTEX(slab_mutex);
--struct kmem_cache *kmem_cache;
-+struct kmem_cache *kmem_cache __ro_after_init;
- 
- #ifdef CONFIG_HARDENED_USERCOPY
- bool usercopy_fallback __ro_after_init =
-@@ -59,7 +59,7 @@ static DECLARE_WORK(slab_caches_to_rcu_destroy_work,
- /*
-  * Merge control. If this is set then no merging of slab caches will occur.
-  */
--static bool slab_nomerge = !IS_ENABLED(CONFIG_SLAB_MERGE_DEFAULT);
-+static bool slab_nomerge __ro_after_init = !IS_ENABLED(CONFIG_SLAB_MERGE_DEFAULT);
- 
- static int __init setup_slab_nomerge(char *str)
- {
-diff --git a/mm/slub.c b/mm/slub.c
-index 660f4324c097..54c3291a7571 100644
---- a/mm/slub.c
-+++ b/mm/slub.c
-@@ -123,6 +123,12 @@ static inline int kmem_cache_debug(struct kmem_cache *s)
- #endif
- }
- 
-+static inline bool has_sanitize_verify(struct kmem_cache *s)
-+{
-+	return IS_ENABLED(CONFIG_SLAB_SANITIZE_VERIFY) &&
-+	       slab_want_init_on_free(s);
-+}
-+
- void *fixup_red_left(struct kmem_cache *s, void *p)
- {
- 	if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE)
-@@ -494,13 +500,13 @@ static inline void *restore_red_left(struct kmem_cache *s, void *p)
-  * Debug settings:
-  */
- #if defined(CONFIG_SLUB_DEBUG_ON)
--static slab_flags_t slub_debug = DEBUG_DEFAULT_FLAGS;
-+static slab_flags_t slub_debug __ro_after_init = DEBUG_DEFAULT_FLAGS;
- #else
--static slab_flags_t slub_debug;
-+static slab_flags_t slub_debug __ro_after_init;
- #endif
- 
--static char *slub_debug_slabs;
--static int disable_higher_order_debug;
-+static char *slub_debug_slabs __ro_after_init;
-+static int disable_higher_order_debug __ro_after_init;
- 
- /*
-  * slub is about to manipulate internal object metadata.  This memory lies
-@@ -571,6 +577,33 @@ static inline unsigned int get_info_end(struct kmem_cache *s)
- 		return s->inuse;
- }
- 
-+#ifdef CONFIG_SLAB_CANARY
-+static inline unsigned long *get_canary(struct kmem_cache *s, void *object)
-+{
-+	return object + get_info_end(s);
-+}
-+
-+static inline unsigned long get_canary_value(const void *canary, unsigned long value)
-+{
-+	return (value ^ (unsigned long)canary) & CANARY_MASK;
-+}
-+
-+static inline void set_canary(struct kmem_cache *s, void *object, unsigned long value)
-+{
-+	unsigned long *canary = get_canary(s, object);
-+	*canary = get_canary_value(canary, value);
-+}
-+
-+static inline void check_canary(struct kmem_cache *s, void *object, unsigned long value)
-+{
-+	unsigned long *canary = get_canary(s, object);
-+	BUG_ON(*canary != get_canary_value(canary, value));
-+}
-+#else
-+#define set_canary(s, object, value)
-+#define check_canary(s, object, value)
-+#endif
-+
- static struct track *get_track(struct kmem_cache *s, void *object,
- 	enum track_item alloc)
- {
-@@ -578,6 +611,9 @@ static struct track *get_track(struct kmem_cache *s, void *object,
- 
- 	p = object + get_info_end(s);
- 
-+	if (IS_ENABLED(CONFIG_SLAB_CANARY))
-+		p = (void *)p + sizeof(void *);
-+
- 	return p + alloc;
- }
- 
-@@ -719,6 +755,9 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
- 
- 	off = get_info_end(s);
- 
-+	if (IS_ENABLED(CONFIG_SLAB_CANARY))
-+		off += sizeof(void *);
-+
- 	if (s->flags & SLAB_STORE_USER)
- 		off += 2 * sizeof(struct track);
- 
-@@ -827,8 +866,9 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
-  * 	Meta data starts here.
-  *
-  * 	A. Free pointer (if we cannot overwrite object on free)
-- * 	B. Tracking data for SLAB_STORE_USER
-- * 	C. Padding to reach required alignment boundary or at mininum
-+ * 	B. Canary for SLAB_CANARY
-+ * 	C. Tracking data for SLAB_STORE_USER
-+ * 	D. Padding to reach required alignment boundary or at mininum
-  * 		one word if debugging is on to be able to detect writes
-  * 		before the word boundary.
-  *
-@@ -846,6 +886,9 @@ static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
- {
- 	unsigned long off = get_info_end(s);	/* The end of info */
- 
-+	if (IS_ENABLED(CONFIG_SLAB_CANARY))
-+		off += sizeof(void *);
-+
- 	if (s->flags & SLAB_STORE_USER)
- 		/* We also have user information there */
- 		off += 2 * sizeof(struct track);
-@@ -1491,6 +1534,8 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s,
- 		object = next;
- 		next = get_freepointer(s, object);
- 
-+		check_canary(s, object, s->random_active);
-+
- 		if (slab_want_init_on_free(s)) {
- 			/*
- 			 * Clear the object and the metadata, but don't touch
-@@ -1501,8 +1546,12 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s,
- 							   : 0;
- 			memset((char *)object + s->inuse, 0,
- 			       s->size - s->inuse - rsize);
--
-+			if (!IS_ENABLED(CONFIG_SLAB_SANITIZE_VERIFY) && s->ctor)
-+				s->ctor(object);
- 		}
-+
-+		set_canary(s, object, s->random_inactive);
-+
- 		/* If object's reuse doesn't have to be delayed */
- 		if (!slab_free_hook(s, object)) {
- 			/* Move object to the new freelist */
-@@ -1510,6 +1559,18 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s,
- 			*head = object;
- 			if (!*tail)
- 				*tail = object;
-+		} else if (slab_want_init_on_free(s) && s->ctor) {
-+			/* Objects that are put into quarantine by KASAN will
-+			 * still undergo free_consistency_checks() and thus
-+			 * need to show a valid freepointer to check_object().
-+			 *
-+			 * Note that doing this for all caches (not just ctor
-+			 * ones, which have s->offset >= object_size)) causes a
-+			 * GPF, due to KASAN poisoning and the way
-+			 * set_freepointer() eventually dereferences the
-+			 * freepointer.
-+			 */
-+			set_freepointer(s, object, NULL);
- 		}
- 	} while (object != old_tail);
- 
-@@ -1523,8 +1584,9 @@ static void *setup_object(struct kmem_cache *s, struct page *page,
- 				void *object)
- {
- 	setup_object_debug(s, page, object);
-+	set_canary(s, object, s->random_inactive);
- 	object = kasan_init_slab_obj(s, object);
--	if (unlikely(s->ctor)) {
-+	if (unlikely(s->ctor) && !has_sanitize_verify(s)) {
- 		kasan_unpoison_object_data(s, object);
- 		s->ctor(object);
- 		kasan_poison_object_data(s, object);
-@@ -2818,8 +2880,28 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
- 
- 	maybe_wipe_obj_freeptr(s, object);
- 
--	if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object)
-+	if (has_sanitize_verify(s) && object) {
-+		/* KASAN hasn't unpoisoned the object yet (this is done in the
-+		 * post-alloc hook), so let's do it temporarily.
-+		 */
-+		kasan_unpoison_object_data(s, object);
-+		BUG_ON(memchr_inv(object, 0, s->object_size));
-+		if (s->ctor)
-+			s->ctor(object);
-+		kasan_poison_object_data(s, object);
-+	} else if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object) {
- 		memset(object, 0, s->object_size);
-+		if (s->ctor) {
-+			kasan_unpoison_object_data(s, object);
-+			s->ctor(object);
-+			kasan_poison_object_data(s, object);
-+		}
-+	}
-+
-+	if (object) {
-+		check_canary(s, object, s->random_inactive);
-+		set_canary(s, object, s->random_active);
-+	}
- 
- 	slab_post_alloc_hook(s, gfpflags, 1, &object);
- 
-@@ -3204,7 +3286,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
- 			  void **p)
- {
- 	struct kmem_cache_cpu *c;
--	int i;
-+	int i, k;
- 
- 	/* memcg and kmem_cache debug support */
- 	s = slab_pre_alloc_hook(s, flags);
-@@ -3253,11 +3335,35 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
- 	local_irq_enable();
- 
- 	/* Clear memory outside IRQ disabled fastpath loop */
--	if (unlikely(slab_want_init_on_alloc(flags, s))) {
-+	if (has_sanitize_verify(s)) {
-+		int j;
-+
-+		for (j = 0; j < i; j++) {
-+			/* KASAN hasn't unpoisoned the object yet (this is done
-+			 * in the post-alloc hook), so let's do it temporarily.
-+			 */
-+			kasan_unpoison_object_data(s, p[j]);
-+			BUG_ON(memchr_inv(p[j], 0, s->object_size));
-+			if (s->ctor)
-+				s->ctor(p[j]);
-+			kasan_poison_object_data(s, p[j]);
-+		}
-+	} else if (unlikely(slab_want_init_on_alloc(flags, s))) {
- 		int j;
- 
--		for (j = 0; j < i; j++)
-+		for (j = 0; j < i; j++) {
- 			memset(p[j], 0, s->object_size);
-+			if (s->ctor) {
-+				kasan_unpoison_object_data(s, p[j]);
-+				s->ctor(p[j]);
-+				kasan_poison_object_data(s, p[j]);
-+			}
-+		}
-+	}
-+
-+	for (k = 0; k < i; k++) {
-+		check_canary(s, p[k], s->random_inactive);
-+		set_canary(s, p[k], s->random_active);
- 	}
- 
- 	/* memcg and kmem_cache debug support */
-@@ -3291,9 +3397,9 @@ EXPORT_SYMBOL(kmem_cache_alloc_bulk);
-  * and increases the number of allocations possible without having to
-  * take the list_lock.
-  */
--static unsigned int slub_min_order;
--static unsigned int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
--static unsigned int slub_min_objects;
-+static unsigned int slub_min_order __ro_after_init;
-+static unsigned int slub_max_order __ro_after_init = PAGE_ALLOC_COSTLY_ORDER;
-+static unsigned int slub_min_objects __ro_after_init;
- 
- /*
-  * Calculate the order of allocation given an slab object size.
-@@ -3461,6 +3567,7 @@ static void early_kmem_cache_node_alloc(int node)
- 	init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
- 	init_tracking(kmem_cache_node, n);
- #endif
-+	set_canary(kmem_cache_node, n, kmem_cache_node->random_active);
- 	n = kasan_kmalloc(kmem_cache_node, n, sizeof(struct kmem_cache_node),
- 		      GFP_KERNEL);
- 	page->freelist = get_freepointer(kmem_cache_node, n);
-@@ -3641,6 +3748,9 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
- 		s->offset = ALIGN(freepointer_area / 2, sizeof(void *));
- 	}
- 
-+	if (IS_ENABLED(CONFIG_SLAB_CANARY))
-+		size += sizeof(void *);
-+
- #ifdef CONFIG_SLUB_DEBUG
- 	if (flags & SLAB_STORE_USER)
- 		/*
-@@ -3713,6 +3823,10 @@ static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
- #ifdef CONFIG_SLAB_FREELIST_HARDENED
- 	s->random = get_random_long();
- #endif
-+#ifdef CONFIG_SLAB_CANARY
-+	s->random_active = get_random_long();
-+	s->random_inactive = get_random_long();
-+#endif
- 
- 	if (!calculate_sizes(s, -1))
- 		goto error;
-@@ -3988,6 +4102,8 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
- 		offset -= s->red_left_pad;
- 	}
- 
-+	check_canary(s, (void *)ptr - offset, s->random_active);
-+
- 	/* Allow address range falling entirely within usercopy region. */
- 	if (offset >= s->useroffset &&
- 	    offset - s->useroffset <= s->usersize &&
-@@ -4021,7 +4137,11 @@ size_t __ksize(const void *object)
- 	page = virt_to_head_page(object);
- 
- 	if (unlikely(!PageSlab(page))) {
-+#ifdef CONFIG_BUG_ON_DATA_CORRUPTION
-+		BUG_ON(!PageCompound(page));
-+#else
- 		WARN_ON(!PageCompound(page));
-+#endif
- 		return page_size(page);
- 	}
- 
-@@ -4848,7 +4968,7 @@ enum slab_stat_type {
- #define SO_TOTAL	(1 << SL_TOTAL)
- 
- #ifdef CONFIG_MEMCG
--static bool memcg_sysfs_enabled = IS_ENABLED(CONFIG_SLUB_MEMCG_SYSFS_ON);
-+static bool memcg_sysfs_enabled __ro_after_init = IS_ENABLED(CONFIG_SLUB_MEMCG_SYSFS_ON);
- 
- static int __init setup_slub_memcg_sysfs(char *str)
- {
-diff --git a/mm/swap.c b/mm/swap.c
-index bf9a79fed62d..3375d4cf4ee8 100644
---- a/mm/swap.c
-+++ b/mm/swap.c
-@@ -94,6 +94,13 @@ static void __put_compound_page(struct page *page)
- 	if (!PageHuge(page))
- 		__page_cache_release(page);
- 	dtor = get_compound_page_dtor(page);
-+	if (!PageHuge(page))
-+		BUG_ON(dtor != free_compound_page
-+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-+			&& dtor != free_transhuge_page
-+#endif
-+		);
-+
- 	(*dtor)(page);
- }
- 
-diff --git a/mm/util.c b/mm/util.c
-index dc1c877d5481..4872ec1b8858 100644
---- a/mm/util.c
-+++ b/mm/util.c
-@@ -335,9 +335,9 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
- {
- 	/* Is the current task 32bit ? */
- 	if (!IS_ENABLED(CONFIG_64BIT) || is_compat_task())
--		return randomize_page(mm->brk, SZ_32M);
-+		return mm->brk + get_random_long() % SZ_32M + PAGE_SIZE;
- 
--	return randomize_page(mm->brk, SZ_1G);
-+	return mm->brk + get_random_long() % SZ_1G + PAGE_SIZE;
- }
- 
- unsigned long arch_mmap_rnd(void)
-diff --git a/net/core/dev.c b/net/core/dev.c
-index c9ee5d80d5ea..9904a4aefa8b 100644
---- a/net/core/dev.c
-+++ b/net/core/dev.c
-@@ -4750,7 +4750,7 @@ int netif_rx_ni(struct sk_buff *skb)
- }
- EXPORT_SYMBOL(netif_rx_ni);
- 
--static __latent_entropy void net_tx_action(struct softirq_action *h)
-+static __latent_entropy void net_tx_action(void)
- {
- 	struct softnet_data *sd = this_cpu_ptr(&softnet_data);
- 
-@@ -6622,7 +6622,7 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
- 	return work;
- }
- 
--static __latent_entropy void net_rx_action(struct softirq_action *h)
-+static __latent_entropy void net_rx_action(void)
- {
- 	struct softnet_data *sd = this_cpu_ptr(&softnet_data);
- 	unsigned long time_limit = jiffies +
-diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
-index 25a8888826b8..7343a827e166 100644
---- a/net/ipv4/Kconfig
-+++ b/net/ipv4/Kconfig
-@@ -267,6 +267,7 @@ config IP_PIMSM_V2
- 
- config SYN_COOKIES
- 	bool "IP: TCP syncookie support"
-+	default y
- 	---help---
- 	  Normal TCP/IP networking is open to an attack known as "SYN
- 	  flooding". This denial-of-service attack prevents legitimate remote
-@@ -739,3 +740,26 @@ config TCP_MD5SIG
- 	  on the Internet.
- 
- 	  If unsure, say N.
-+
-+config TCP_SIMULT_CONNECT_DEFAULT_ON
-+	bool "Enable TCP simultaneous connect"
-+	help
-+	  Enable TCP simultaneous connect that adds a weakness in Linux's strict
-+	  implementation of TCP that allows two clients to connect to each other
-+	  without either entering a listening state. The weakness allows an
-+	  attacker to easily prevent a client from connecting to a known server
-+	  provided the source port for the connection is guessed correctly.
-+
-+	  As the weakness could be used to prevent an antivirus or IPS from
-+	  fetching updates, or prevent an SSL gateway from fetching a CRL, it
-+	  should be eliminated by disabling this option. Though Linux is one of
-+	  few operating systems supporting simultaneous connect, it has no
-+	  legitimate use in practice and is rarely supported by firewalls.
-+
-+	  Disabling this may break TCP STUNT which is used by some applications
-+	  for NAT traversal.
-+
-+	  This setting can be overridden at runtime via the
-+	  net.ipv4.tcp_simult_connect sysctl.
-+
-+	  If unsure, say N.
-diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
-index 81b267e990a1..587dbfdbcf1a 100644
---- a/net/ipv4/sysctl_net_ipv4.c
-+++ b/net/ipv4/sysctl_net_ipv4.c
-@@ -604,6 +604,15 @@ static struct ctl_table ipv4_table[] = {
- 		.mode		= 0644,
- 		.proc_handler	= proc_do_static_key,
- 	},
-+	{
-+		.procname	= "tcp_simult_connect",
-+		.data		= &sysctl_tcp_simult_connect,
-+		.maxlen		= sizeof(int),
-+		.mode		= 0644,
-+		.proc_handler	= proc_dointvec_minmax,
-+		.extra1		= SYSCTL_ZERO,
-+		.extra2		= SYSCTL_ONE,
-+	},
- 	{ }
- };
- 
-diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
-index 1fa009999f57..43aa2340feb2 100644
---- a/net/ipv4/tcp_input.c
-+++ b/net/ipv4/tcp_input.c
-@@ -82,6 +82,7 @@
- #include <net/mptcp.h>
- 
- int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
-+int sysctl_tcp_simult_connect __read_mostly = IS_ENABLED(CONFIG_TCP_SIMULT_CONNECT_DEFAULT_ON);
- 
- #define FLAG_DATA		0x01 /* Incoming frame contained data.		*/
- #define FLAG_WIN_UPDATE		0x02 /* Incoming ACK was a window update.	*/
-@@ -6064,7 +6065,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
- 	    tcp_paws_reject(&tp->rx_opt, 0))
- 		goto discard_and_undo;
- 
--	if (th->syn) {
-+	if (th->syn && sysctl_tcp_simult_connect) {
- 		/* We see SYN without ACK. It is attempt of
- 		 * simultaneous connect with crossed SYNs.
- 		 * Particularly, it can be connect to self.
-diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost
-index 33aaa572f686..447648fc48f4 100644
---- a/scripts/Makefile.modpost
-+++ b/scripts/Makefile.modpost
-@@ -53,6 +53,7 @@ MODPOST = scripts/mod/modpost								\
- 	$(if $(KBUILD_EXTMOD),$(addprefix -e ,$(KBUILD_EXTRA_SYMBOLS)))			\
- 	$(if $(KBUILD_EXTMOD),-o $(modulesymfile))					\
- 	$(if $(CONFIG_SECTION_MISMATCH_WARN_ONLY),,-E)					\
-+	$(if $(CONFIG_DEBUG_WRITABLE_FUNCTION_POINTERS_VERBOSE),-f)			\
- 	$(if $(CONFIG_MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS)$(KBUILD_NSDEPS),-N) 	\
- 	$(if $(KBUILD_MODPOST_WARN),-w)
- 
-diff --git a/scripts/gcc-plugins/Kconfig b/scripts/gcc-plugins/Kconfig
-index 013ba3a57669..31ce967a1959 100644
---- a/scripts/gcc-plugins/Kconfig
-+++ b/scripts/gcc-plugins/Kconfig
-@@ -53,6 +53,11 @@ config GCC_PLUGIN_LATENT_ENTROPY
- 	  is some slowdown of the boot process (about 0.5%) and fork and
- 	  irq processing.
- 
-+	  When extra_latent_entropy is passed on the kernel command line,
-+	  entropy will be extracted from up to the first 4GB of RAM while the
-+	  runtime memory allocator is being initialized.  This costs even more
-+	  slowdown of the boot process.
-+
- 	  Note that entropy extracted this way is not cryptographically
- 	  secure!
- 
-diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
-index 5c3c50c5ec52..b539cd7159be 100644
---- a/scripts/mod/modpost.c
-+++ b/scripts/mod/modpost.c
-@@ -37,6 +37,8 @@ static int warn_unresolved = 0;
- /* How a symbol is exported */
- static int sec_mismatch_count = 0;
- static int sec_mismatch_fatal = 0;
-+static int writable_fptr_count = 0;
-+static int writable_fptr_verbose = 0;
- /* ignore missing files */
- static int ignore_missing_files;
- /* If set to 1, only warn (instead of error) about missing ns imports */
-@@ -1007,6 +1009,7 @@ enum mismatch {
- 	ANY_EXIT_TO_ANY_INIT,
- 	EXPORT_TO_INIT_EXIT,
- 	EXTABLE_TO_NON_TEXT,
-+	DATA_TO_TEXT
- };
- 
- /**
-@@ -1133,6 +1136,12 @@ static const struct sectioncheck sectioncheck[] = {
- 	.good_tosec = {ALL_TEXT_SECTIONS , NULL},
- 	.mismatch = EXTABLE_TO_NON_TEXT,
- 	.handler = extable_mismatch_handler,
-+},
-+/* Do not reference code from writable data */
-+{
-+	.fromsec = { DATA_SECTIONS, NULL },
-+	.bad_tosec = { ALL_TEXT_SECTIONS, NULL },
-+	.mismatch = DATA_TO_TEXT
- }
- };
- 
-@@ -1320,10 +1329,10 @@ static Elf_Sym *find_elf_symbol(struct elf_info *elf, Elf64_Sword addr,
- 			continue;
- 		if (!is_valid_name(elf, sym))
- 			continue;
--		if (sym->st_value == addr)
--			return sym;
- 		/* Find a symbol nearby - addr are maybe negative */
- 		d = sym->st_value - addr;
-+		if (d == 0)
-+			return sym;
- 		if (d < 0)
- 			d = addr - sym->st_value;
- 		if (d < distance) {
-@@ -1458,7 +1467,13 @@ static void report_sec_mismatch(const char *modname,
- 	char *prl_from;
- 	char *prl_to;
- 
--	sec_mismatch_count++;
-+	if (mismatch->mismatch == DATA_TO_TEXT) {
-+		writable_fptr_count++;
-+		if (!writable_fptr_verbose)
-+			return;
-+	} else {
-+		sec_mismatch_count++;
-+	}
- 
- 	get_pretty_name(from_is_func, &from, &from_p);
- 	get_pretty_name(to_is_func, &to, &to_p);
-@@ -1580,6 +1595,12 @@ static void report_sec_mismatch(const char *modname,
- 		fatal("There's a special handler for this mismatch type, "
- 		      "we should never get here.");
- 		break;
-+	case DATA_TO_TEXT:
-+		fprintf(stderr,
-+		"The %s %s:%s references\n"
-+		"the %s %s:%s%s\n",
-+		from, fromsec, fromsym, to, tosec, tosym, to_p);
-+		break;
- 	}
- 	fprintf(stderr, "\n");
- }
-@@ -2559,7 +2580,7 @@ int main(int argc, char **argv)
- 	struct ext_sym_list *extsym_iter;
- 	struct ext_sym_list *extsym_start = NULL;
- 
--	while ((opt = getopt(argc, argv, "i:e:mnsT:o:awENd:")) != -1) {
-+	while ((opt = getopt(argc, argv, "i:e:fmnsT:o:awENd:")) != -1) {
- 		switch (opt) {
- 		case 'i':
- 			kernel_read = optarg;
-@@ -2573,6 +2594,9 @@ int main(int argc, char **argv)
- 			extsym_iter->file = optarg;
- 			extsym_start = extsym_iter;
- 			break;
-+		case 'f':
-+			writable_fptr_verbose = 1;
-+			break;
- 		case 'm':
- 			modversions = 1;
- 			break;
-@@ -2676,6 +2700,11 @@ int main(int argc, char **argv)
- 	}
- 
- 	free(buf.p);
-+	if (writable_fptr_count && !writable_fptr_verbose)
-+		warn("modpost: Found %d writable function pointer%s.\n"
-+		     "To see full details build your kernel with:\n"
-+		     "'make CONFIG_DEBUG_WRITABLE_FUNCTION_POINTERS_VERBOSE=y'\n",
-+		     writable_fptr_count, (writable_fptr_count == 1 ? "" : "s"));
- 
- 	return err;
- }
-diff --git a/security/Kconfig b/security/Kconfig
-index cd3cc7da3a55..127b54aecf87 100644
---- a/security/Kconfig
-+++ b/security/Kconfig
-@@ -9,7 +9,7 @@ source "security/keys/Kconfig"
- 
- config SECURITY_DMESG_RESTRICT
- 	bool "Restrict unprivileged access to the kernel syslog"
--	default n
-+	default y
- 	help
- 	  This enforces restrictions on unprivileged users reading the kernel
- 	  syslog via dmesg(8).
-@@ -19,10 +19,34 @@ config SECURITY_DMESG_RESTRICT
- 
- 	  If you are unsure how to answer this question, answer N.
- 
-+config SECURITY_PERF_EVENTS_RESTRICT
-+	bool "Restrict unprivileged use of performance events"
-+	depends on PERF_EVENTS
-+	default y
-+	help
-+	  If you say Y here, the kernel.perf_event_paranoid sysctl
-+	  will be set to 3 by default, and no unprivileged use of the
-+	  perf_event_open syscall will be permitted unless it is
-+	  changed.
-+
-+config SECURITY_TIOCSTI_RESTRICT
-+	bool "Restrict unprivileged use of tiocsti command injection"
-+	default y
-+	help
-+	  This enforces restrictions on unprivileged users injecting commands
-+	  into other processes which share a tty session using the TIOCSTI
-+	  ioctl. This option makes TIOCSTI use require CAP_SYS_ADMIN.
-+
-+	  If this option is not selected, no restrictions will be enforced
-+	  unless the tiocsti_restrict sysctl is explicitly set to (1).
-+
-+	  If you are unsure how to answer this question, answer N.
-+
- config SECURITY
- 	bool "Enable different security models"
- 	depends on SYSFS
- 	depends on MULTIUSER
-+	default y
- 	help
- 	  This allows you to choose different security modules to be
- 	  configured into your kernel.
-@@ -48,6 +72,7 @@ config SECURITYFS
- config SECURITY_NETWORK
- 	bool "Socket and Networking Security Hooks"
- 	depends on SECURITY
-+	default y
- 	help
- 	  This enables the socket and networking security hooks.
- 	  If enabled, a security module can use these hooks to
-@@ -154,6 +179,7 @@ config HARDENED_USERCOPY
- 	bool "Harden memory copies between kernel and userspace"
- 	depends on HAVE_HARDENED_USERCOPY_ALLOCATOR
- 	imply STRICT_DEVMEM
-+	default y
- 	help
- 	  This option checks for obviously wrong memory regions when
- 	  copying memory to/from the kernel (via copy_to_user() and
-@@ -166,7 +192,6 @@ config HARDENED_USERCOPY
- config HARDENED_USERCOPY_FALLBACK
- 	bool "Allow usercopy whitelist violations to fallback to object size"
- 	depends on HARDENED_USERCOPY
--	default y
- 	help
- 	  This is a temporary option that allows missing usercopy whitelists
- 	  to be discovered via a WARN() to the kernel log, instead of
-@@ -191,10 +216,21 @@ config HARDENED_USERCOPY_PAGESPAN
- config FORTIFY_SOURCE
- 	bool "Harden common str/mem functions against buffer overflows"
- 	depends on ARCH_HAS_FORTIFY_SOURCE
-+	default y
- 	help
- 	  Detect overflows of buffers in common string and memory functions
- 	  where the compiler can determine and validate the buffer sizes.
- 
-+config FORTIFY_SOURCE_STRICT_STRING
-+	bool "Harden common functions against buffer overflows"
-+	depends on FORTIFY_SOURCE
-+	depends on EXPERT
-+	help
-+	  Perform stricter overflow checks catching overflows within objects
-+	  for common C string functions rather than only between objects.
-+
-+	  This is not yet intended for production use, only bug finding.
-+
- config STATIC_USERMODEHELPER
- 	bool "Force all usermode helper calls through a single binary"
- 	help
-diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening
-index af4c979b38ee..001796a391e9 100644
---- a/security/Kconfig.hardening
-+++ b/security/Kconfig.hardening
-@@ -169,6 +169,7 @@ config STACKLEAK_RUNTIME_DISABLE
- 
- config INIT_ON_ALLOC_DEFAULT_ON
- 	bool "Enable heap memory zeroing on allocation by default"
-+	default yes
- 	help
- 	  This has the effect of setting "init_on_alloc=1" on the kernel
- 	  command line. This can be disabled with "init_on_alloc=0".
-@@ -181,6 +182,7 @@ config INIT_ON_ALLOC_DEFAULT_ON
- 
- config INIT_ON_FREE_DEFAULT_ON
- 	bool "Enable heap memory zeroing on free by default"
-+	default yes
- 	help
- 	  This has the effect of setting "init_on_free=1" on the kernel
- 	  command line. This can be disabled with "init_on_free=0".
-@@ -196,6 +198,21 @@ config INIT_ON_FREE_DEFAULT_ON
- 	  touching "cold" memory areas. Most cases see 3-5% impact. Some
- 	  synthetic workloads have measured as high as 8%.
- 
-+config PAGE_SANITIZE_VERIFY
-+	bool "Verify sanitized pages"
-+	default y
-+	help
-+	  When init_on_free is enabled, verify that newly allocated pages
-+	  are zeroed to detect write-after-free bugs.
-+
-+config SLAB_SANITIZE_VERIFY
-+	default y
-+	bool "Verify sanitized SLAB allocations"
-+	depends on !KASAN
-+	help
-+	  When init_on_free is enabled, verify that newly allocated slab
-+	  objects are zeroed to detect write-after-free bugs.
-+
- endmenu
- 
- endmenu
-diff --git a/security/selinux/Kconfig b/security/selinux/Kconfig
-index 9e921fc72538..ae851a826c26 100644
---- a/security/selinux/Kconfig
-+++ b/security/selinux/Kconfig
-@@ -3,7 +3,7 @@ config SECURITY_SELINUX
- 	bool "NSA SELinux Support"
- 	depends on SECURITY_NETWORK && AUDIT && NET && INET
- 	select NETWORK_SECMARK
--	default n
-+	default y
- 	help
- 	  This selects NSA Security-Enhanced Linux (SELinux).
- 	  You will also need a policy configuration and a labeled filesystem.
-@@ -70,29 +70,6 @@ config SECURITY_SELINUX_AVC_STATS
- 	  /sys/fs/selinux/avc/cache_stats, which may be monitored via
- 	  tools such as avcstat.
- 
--config SECURITY_SELINUX_CHECKREQPROT_VALUE
--	int "NSA SELinux checkreqprot default value"
--	depends on SECURITY_SELINUX
--	range 0 1
--	default 0
--	help
--	  This option sets the default value for the 'checkreqprot' flag
--	  that determines whether SELinux checks the protection requested
--	  by the application or the protection that will be applied by the
--	  kernel (including any implied execute for read-implies-exec) for
--	  mmap and mprotect calls.  If this option is set to 0 (zero),
--	  SELinux will default to checking the protection that will be applied
--	  by the kernel.  If this option is set to 1 (one), SELinux will
--	  default to checking the protection requested by the application.
--	  The checkreqprot flag may be changed from the default via the
--	  'checkreqprot=' boot parameter.  It may also be changed at runtime
--	  via /sys/fs/selinux/checkreqprot if authorized by policy.
--
--	  WARNING: this option is deprecated and will be removed in a future
--	  kernel release.
--
--	  If you are unsure how to answer this question, answer 0.
--
- config SECURITY_SELINUX_SIDTAB_HASH_BITS
- 	int "NSA SELinux sidtab hashtable size"
- 	depends on SECURITY_SELINUX
-diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
-index 4c037c2545c1..2437a1895baa 100644
---- a/security/selinux/hooks.c
-+++ b/security/selinux/hooks.c
-@@ -135,21 +135,7 @@ static int __init selinux_enabled_setup(char *str)
- __setup("selinux=", selinux_enabled_setup);
- #endif
- 
--static unsigned int selinux_checkreqprot_boot =
--	CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE;
--
--static int __init checkreqprot_setup(char *str)
--{
--	unsigned long checkreqprot;
--
--	if (!kstrtoul(str, 0, &checkreqprot)) {
--		selinux_checkreqprot_boot = checkreqprot ? 1 : 0;
--		if (checkreqprot)
--			pr_warn("SELinux: checkreqprot set to 1 via kernel parameter.  This is deprecated and will be rejected in a future kernel release.\n");
--	}
--	return 1;
--}
--__setup("checkreqprot=", checkreqprot_setup);
-+static const unsigned int selinux_checkreqprot_boot;
- 
- /**
-  * selinux_secmark_enabled - Check to see if SECMARK is currently enabled
-diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
-index 4781314c2510..7f068515d799 100644
---- a/security/selinux/selinuxfs.c
-+++ b/security/selinux/selinuxfs.c
-@@ -641,7 +641,6 @@ static ssize_t sel_read_checkreqprot(struct file *filp, char __user *buf,
- static ssize_t sel_write_checkreqprot(struct file *file, const char __user *buf,
- 				      size_t count, loff_t *ppos)
- {
--	struct selinux_fs_info *fsi = file_inode(file)->i_sb->s_fs_info;
- 	char *page;
- 	ssize_t length;
- 	unsigned int new_value;
-@@ -665,18 +664,9 @@ static ssize_t sel_write_checkreqprot(struct file *file, const char __user *buf,
- 		return PTR_ERR(page);
- 
- 	length = -EINVAL;
--	if (sscanf(page, "%u", &new_value) != 1)
-+	if (sscanf(page, "%u", &new_value) != 1 || new_value)
- 		goto out;
- 
--	if (new_value) {
--		char comm[sizeof(current->comm)];
--
--		memcpy(comm, current->comm, sizeof(comm));
--		pr_warn_once("SELinux: %s (%d) set checkreqprot to 1. This is deprecated and will be rejected in a future kernel release.\n",
--			     comm, current->pid);
--	}
--
--	fsi->state->checkreqprot = new_value ? 1 : 0;
- 	length = count;
- out:
- 	kfree(page);
-diff --git a/security/yama/Kconfig b/security/yama/Kconfig
-index a810304123ca..b809050b25d2 100644
---- a/security/yama/Kconfig
-+++ b/security/yama/Kconfig
-@@ -2,7 +2,7 @@
- config SECURITY_YAMA
- 	bool "Yama support"
- 	depends on SECURITY
--	default n
-+	default y
- 	help
- 	  This selects Yama, which extends DAC support with additional
- 	  system-wide security settings beyond regular Linux discretionary
diff --git a/linux57-tkg/linux57-tkg-patches/0012-misc-additions.patch b/linux57-tkg/linux57-tkg-patches/0012-misc-additions.patch
deleted file mode 100644
index 33f5502..0000000
--- a/linux57-tkg/linux57-tkg-patches/0012-misc-additions.patch
+++ /dev/null
@@ -1,55 +0,0 @@
-diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig
-index 0840d27381ea..73aba9a31064 100644
---- a/drivers/tty/Kconfig
-+++ b/drivers/tty/Kconfig
-@@ -75,6 +75,19 @@ config VT_CONSOLE_SLEEP
- 	def_bool y
- 	depends on VT_CONSOLE && PM_SLEEP
- 
-+config NR_TTY_DEVICES
-+        int "Maximum tty device number"
-+        depends on VT
-+        range 12 63
-+        default 63
-+        ---help---
-+          This option is used to change the number of tty devices in /dev.
-+          The default value is 63. The lowest number you can set is 12,
-+          63 is also the upper limit so we don't overrun the serial
-+          consoles.
-+
-+          If unsure, say 63.
-+
- config HW_CONSOLE
- 	bool
- 	depends on VT && !UML
-diff --git a/include/uapi/linux/vt.h b/include/uapi/linux/vt.h
-index e9d39c48520a..3bceead8da40 100644
---- a/include/uapi/linux/vt.h
-+++ b/include/uapi/linux/vt.h
-@@ -3,12 +3,25 @@
- #define _UAPI_LINUX_VT_H
- 
- 
-+/*
-+ * We will make this definition solely for the purpose of making packages
-+ * such as splashutils build, because they can not understand that
-+ * NR_TTY_DEVICES is defined in the kernel configuration.
-+ */
-+#ifndef CONFIG_NR_TTY_DEVICES
-+#define CONFIG_NR_TTY_DEVICES 63
-+#endif
-+
- /*
-  * These constants are also useful for user-level apps (e.g., VC
-  * resizing).
-  */
- #define MIN_NR_CONSOLES 1       /* must be at least 1 */
--#define MAX_NR_CONSOLES	63	/* serial lines start at 64 */
-+/*
-+ * NR_TTY_DEVICES:
-+ * Value MUST be at least 12 and must never be higher then 63
-+ */
-+#define MAX_NR_CONSOLES CONFIG_NR_TTY_DEVICES	/* serial lines start above this */
- 		/* Note: the ioctl VT_GETSTATE does not work for
- 		   consoles 16 and higher (since it returns a short) */
-  
diff --git a/linux58-tkg/PKGBUILD b/linux58-tkg/PKGBUILD
deleted file mode 100644
index 1d3be7e..0000000
--- a/linux58-tkg/PKGBUILD
+++ /dev/null
@@ -1,285 +0,0 @@
-# Based on the file created for Arch Linux by:
-# Tobias Powalowski <tpowa@archlinux.org>
-# Thomas Baechler <thomas@archlinux.org>
-
-# Contributor: Tk-Glitch <ti3nou at gmail dot com>
-
-plain '       .---.`               `.---.'
-plain '    `/syhhhyso-           -osyhhhys/`'
-plain '   .syNMdhNNhss/``.---.``/sshNNhdMNys.'
-plain '   +sdMh.`+MNsssssssssssssssNM+`.hMds+'
-plain '   :syNNdhNNhssssssssssssssshNNhdNNys:'
-plain '    /ssyhhhysssssssssssssssssyhhhyss/'
-plain '    .ossssssssssssssssssssssssssssso.'
-plain '   :sssssssssssssssssssssssssssssssss:'
-plain '  /sssssssssssssssssssssssssssssssssss/'
-plain ' :sssssssssssssoosssssssoosssssssssssss:'
-plain ' osssssssssssssoosssssssoossssssssssssso'
-plain ' osssssssssssyyyyhhhhhhhyyyyssssssssssso'
-plain ' /yyyyyyhhdmmmmNNNNNNNNNNNmmmmdhhyyyyyy/'
-plain '  smmmNNNNNNNNNNNNNNNNNNNNNNNNNNNNNmmms'
-plain '   /dNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNd/'
-plain '    `:sdNNNNNNNNNNNNNNNNNNNNNNNNNds:`'
-plain '       `-+shdNNNNNNNNNNNNNNNdhs+-`'
-plain '             `.-:///////:-.`'
-
-_where="$PWD" # track basedir as different Arch based distros are moving srcdir around
-
-source "$_where"/customization.cfg # load default configuration from file
-source "$_where"/linux*-tkg-config/prepare
-
-if [[ "$_sub" = rc* ]]; then
-  _srcpath="linux-${_basekernel}-${_sub}"
-else
-  _srcpath="linux-${_basekernel}"
-fi
-
-_tkg_initscript
-
-_distro="Arch"
-
-if [ -n "$_custom_pkgbase" ]; then
-  pkgbase="${_custom_pkgbase}"
-else
-  pkgbase=linux"${_basever}"-tkg-"${_cpusched}"
-fi
-pkgname=("${pkgbase}" "${pkgbase}-headers")
-pkgver="${_basekernel}"."${_sub}"
-pkgrel=25
-pkgdesc='Linux-tkg'
-arch=('x86_64') # no i686 in here
-url="http://www.kernel.org/"
-license=('GPL2')
-makedepends=('xmlto' 'docbook-xsl' 'kmod' 'inetutils' 'bc' 'libelf' 'pahole' 'patchutils' 'flex' 'python-sphinx' 'python-sphinx_rtd_theme' 'graphviz' 'imagemagick' 'git')
-optdepends=('schedtool')
-options=('!strip' 'docs')
-source=("https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-${_basekernel}.tar.xz"
-        "https://cdn.kernel.org/pub/linux/kernel/v5.x/patch-${pkgver}.xz"
-        "https://raw.githubusercontent.com/graysky2/kernel_gcc_patch/master/enable_additional_cpu_optimizations_for_gcc_v10.1%2B_kernel_v5.8%2B.patch"
-        'config.x86_64' # stock Arch config
-        #'config_hardened.x86_64' # hardened Arch config
-        90-cleanup.hook
-        cleanup
-        # ARCH Patches
-        0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch
-        # TkG
-        0002-clear-patches.patch
-        0003-glitched-base.patch
-        0003-glitched-cfs.patch
-        #0004-glitched-ondemand-muqss.patch
-        #0004-glitched-muqss.patch
-        #0004-5.8-ck1.patch
-        0005-undead-glitched-ondemand-pds.patch
-        0005-undead-glitched-pds.patch
-        0005-v5.8_undead-pds099o.patch
-        0005-glitched-pds.patch
-        0006-add-acs-overrides_iommu.patch
-        0007-v5.8-fsync.patch
-        0008-5.8-bcachefs.patch
-        0009-glitched-ondemand-bmq.patch
-        0009-glitched-bmq.patch
-        0009-prjc_v5.8-r3.patch
-        0011-ZFS-fix.patch
-        #0012-linux-hardened.patch
-        0012-misc-additions.patch
-)
-sha256sums=('e7f75186aa0642114af8f19d99559937300ca27acaf7451b36d4f9b0f85cf1f5'
-            '2ea49982bd10e4c880d49051535bd820e276dd3235c3c913b255aaaadc707e1d'
-            '5ab29eb64e57df83b395a29a6a4f89030d142feffbfbf73b3afc6d97a2a7fd12'
-            '181330a9cf4517abbbe29b93165bc859ad8ca14a43582f4e1d69aae2b5ecc2c9'
-            '1e15fc2ef3fa770217ecc63a220e5df2ddbcf3295eb4a021171e7edd4c6cc898'
-            '66a03c246037451a77b4d448565b1d7e9368270c7d02872fbd0b5d024ed0a997'
-            'f6383abef027fd9a430fd33415355e0df492cdc3c90e9938bf2d98f4f63b32e6'
-            '35a7cde86fb94939c0f25a62b8c47f3de0dbd3c65f876f460b263181b3e92fc0'
-            'b9ebe0ae69bc2b2091d6bfcf6c7875a87ea7969fcfa4e306c48d47a60f9ef4d6'
-            '7058e57fd68367b029adc77f2a82928f1433daaf02c8c279cb2d13556c8804d7'
-            '62496f9ca788996181ef145f96ad26291282fcc3fb95cdc04080dcf84365be33'
-            '7fd8e776209dac98627453fda754bdf9aff4a09f27cb0b3766d7983612eb3c74'
-            '31b172eb6a0c635a8d64cc1c2e8181d9f928ee991bd44f6e556d1713b815f8d9'
-            '87bca363416655bc865fcb2cc0d1532cb010a61d9b9f625e3c15cd12eeee3a59'
-            '19661ec0d39f9663452b34433214c755179894528bf73a42f6ba52ccf572832a'
-            'cd225e86d72eaf6c31ef3d7b20df397f4cc44ddd04389850691292cdf292b204'
-            '86414a20225deec084e0e48b35552b3a4eef67f76755b32a10febb7b6308dcb7'
-            '9fad4a40449e09522899955762c8928ae17f4cdaa16e01239fd12592e9d58177'
-            '965a517a283f265a012545fbb5cc9e516efc9f6166d2aa1baf7293a32a1086b7'
-            'f5dbff4833a2e3ca94c202e5197894d5f1006c689ff149355353e77d2e17c943'
-            '49262ce4a8089fa70275aad742fc914baa28d9c384f710c9a62f64796d13e104'
-            '98311deeb474b39e821cd1e64198793d5c4d797155b3b8bbcb1938b7f11e8d74')
-
-export KBUILD_BUILD_HOST=archlinux
-export KBUILD_BUILD_USER=$pkgbase
-export KBUILD_BUILD_TIMESTAMP="$(date -Ru${SOURCE_DATE_EPOCH:+d @$SOURCE_DATE_EPOCH})"
-
-prepare() {
-  rm -rf $pkgdir # Nuke the entire pkg folder so it'll get regenerated clean on next build
-
-  ln -s "${_where}/customization.cfg" "${srcdir}" # workaround
-
-  cd "${srcdir}/${_srcpath}"
-
-  _tkg_srcprep
-}
-
-build() {
-  cd "${srcdir}/${_srcpath}"
-
-  # Use custom compiler paths if defined
-  if [ -n "${CUSTOM_GCC_PATH}" ]; then
-    PATH=${CUSTOM_GCC_PATH}/bin:${CUSTOM_GCC_PATH}/lib:${CUSTOM_GCC_PATH}/include:${PATH}
-  fi
-
-  if [ "$_force_all_threads" = "true" ]; then
-    _force_all_threads="-j$((`nproc`*2))"
-  else
-    _force_all_threads="${MAKEFLAGS}"
-  fi
-
-  # ccache
-  if [ "$_noccache" != "true" ] && pacman -Qq ccache &> /dev/null; then
-    export PATH="/usr/lib/ccache/bin/:$PATH"
-    export CCACHE_SLOPPINESS="file_macro,locale,time_macros"
-    export CCACHE_NOHASHDIR="true"
-    msg2 'ccache was found and will be used'
-  fi
-
-  # document the TkG variables, excluding "_", "_EXT_CONFIG_PATH", and "_where".
-  declare -p | cut -d ' ' -f 3 | grep -P '^_(?!=|EXT_CONFIG_PATH|where)' > "${srcdir}/customization-full.cfg"
-
-  # build!
-  _runtime=$( time ( schedtool -B -n 1 -e ionice -n 1 make ${_force_all_threads} LOCALVERSION= bzImage modules 2>&1 ) 3>&1 1>&2 2>&3 ) || _runtime=$( time ( make ${_force_all_threads} LOCALVERSION= bzImage modules 2>&1 ) 3>&1 1>&2 2>&3 )
-}
-
-hackbase() {
-  pkgdesc="The $pkgdesc kernel and modules"
-  depends=('coreutils' 'kmod' 'initramfs')
-  optdepends=('linux-docs: Kernel hackers manual - HTML documentation that comes with the Linux kernel.'
-              'crda: to set the correct wireless channels of your country.'
-              'linux-firmware: Firmware files for Linux'
-              'modprobed-db: Keeps track of EVERY kernel module that has ever been probed. Useful for make localmodconfig.'
-              'nvidia-tkg: NVIDIA drivers for all installed kernels - non-dkms version.'
-              'nvidia-dkms-tkg: NVIDIA drivers for all installed kernels - dkms version.'
-              'update-grub: Simple wrapper around grub-mkconfig.')
-  provides=("linux=${pkgver}" "${pkgbase}" VIRTUALBOX-GUEST-MODULES WIREGUARD-MODULE)
-  replaces=(virtualbox-guest-modules-arch wireguard-arch)
-
-  cd "${srcdir}/${_srcpath}"
-
-  # get kernel version
-  local _kernver="$(<version)"
-  local modulesdir="$pkgdir/usr/lib/modules/$_kernver"
-
-  msg2 "Installing boot image..."
-  # systemd expects to find the kernel here to allow hibernation
-  # https://github.com/systemd/systemd/commit/edda44605f06a41fb86b7ab8128dcf99161d2344
-  install -Dm644 "$(make -s image_name)" "$modulesdir/vmlinuz"
-
-  # Used by mkinitcpio to name the kernel
-  echo "$pkgbase" | install -Dm644 /dev/stdin "$modulesdir/pkgbase"
-
-  msg2 "Installing modules..."
-  make INSTALL_MOD_PATH="$pkgdir/usr" INSTALL_MOD_STRIP=1 modules_install
-
-  # remove build and source links
-  rm "$modulesdir"/{source,build}
-
-  # install cleanup pacman hook and script
-  sed -e "s|cleanup|${pkgbase}-cleanup|g" "${srcdir}"/90-cleanup.hook |
-    install -Dm644 /dev/stdin "${pkgdir}/usr/share/libalpm/hooks/90-${pkgbase}.hook"
-  install -Dm755 "${srcdir}"/cleanup "${pkgdir}/usr/share/libalpm/scripts/${pkgbase}-cleanup"
-
-  # install customization file, for reference
-  install -Dm644 "${srcdir}"/customization-full.cfg "${pkgdir}/usr/share/doc/${pkgbase}/customization.cfg"
-}
-
-hackheaders() {
-  pkgdesc="Headers and scripts for building modules for the $pkgdesc kernel"
-  provides=("linux-headers=${pkgver}" "${pkgbase}-headers=${pkgver}")
-
-  cd "${srcdir}/${_srcpath}"
-  local builddir="${pkgdir}/usr/lib/modules/$(<version)/build"
-
-  msg2 "Installing build files..."
-  install -Dt "$builddir" -m644 .config Makefile Module.symvers System.map \
-    localversion.* version vmlinux
-  install -Dt "$builddir/kernel" -m644 kernel/Makefile
-  install -Dt "$builddir/arch/x86" -m644 arch/x86/Makefile
-  cp -t "$builddir" -a scripts
-
-  # add objtool for external module building and enabled VALIDATION_STACK option
-  install -Dt "$builddir/tools/objtool" tools/objtool/objtool
-
-  # add xfs and shmem for aufs building
-  mkdir -p "$builddir"/{fs/xfs,mm}
-
-  msg2 "Installing headers..."
-  cp -t "$builddir" -a include
-  cp -t "$builddir/arch/x86" -a arch/x86/include
-  install -Dt "$builddir/arch/x86/kernel" -m644 arch/x86/kernel/asm-offsets.s
-
-  install -Dt "$builddir/drivers/md" -m644 drivers/md/*.h
-  install -Dt "$builddir/net/mac80211" -m644 net/mac80211/*.h
-
-  # http://bugs.archlinux.org/task/13146
-  install -Dt "$builddir/drivers/media/i2c" -m644 drivers/media/i2c/msp3400-driver.h
-
-  # http://bugs.archlinux.org/task/20402
-  install -Dt "$builddir/drivers/media/usb/dvb-usb" -m644 drivers/media/usb/dvb-usb/*.h
-  install -Dt "$builddir/drivers/media/dvb-frontends" -m644 drivers/media/dvb-frontends/*.h
-  install -Dt "$builddir/drivers/media/tuners" -m644 drivers/media/tuners/*.h
-
-  msg2 "Installing KConfig files..."
-  find . -name 'Kconfig*' -exec install -Dm644 {} "$builddir/{}" \;
-
-  msg2 "Removing unneeded architectures..."
-  local arch
-  for arch in "$builddir"/arch/*/; do
-    [[ $arch = */x86/ ]] && continue
-    echo "Removing $(basename "$arch")"
-    rm -r "$arch"
-  done
-
-  msg2 "Removing documentation..."
-  rm -r "$builddir/Documentation"
-
-  msg2 "Removing broken symlinks..."
-  find -L "$builddir" -type l -printf 'Removing %P\n' -delete
-
-  msg2 "Removing loose objects..."
-  find "$builddir" -type f -name '*.o' -printf 'Removing %P\n' -delete
-
-  msg2 "Stripping build tools..."
-  local file
-  while read -rd '' file; do
-    case "$(file -bi "$file")" in
-      application/x-sharedlib\;*)      # Libraries (.so)
-        strip -v $STRIP_SHARED "$file" ;;
-      application/x-archive\;*)        # Libraries (.a)
-        strip -v $STRIP_STATIC "$file" ;;
-      application/x-executable\;*)     # Binaries
-        strip -v $STRIP_BINARIES "$file" ;;
-      application/x-pie-executable\;*) # Relocatable binaries
-        strip -v $STRIP_SHARED "$file" ;;
-    esac
-  done < <(find "$builddir" -type f -perm -u+x ! -name vmlinux -print0)
-
-  msg2 "Adding symlink..."
-  mkdir -p "$pkgdir/usr/src"
-  ln -sr "$builddir" "$pkgdir/usr/src/$pkgbase"
-
-  echo "Stripping vmlinux..."
-  strip -v $STRIP_STATIC "$builddir/vmlinux"
-
-  if [ $_NUKR = "true" ]; then
-    rm -rf "$srcdir" # Nuke the entire src folder so it'll get regenerated clean on next build
-  fi
-}
-
-source /dev/stdin <<EOF
-package_${pkgbase}() {
-hackbase
-}
-
-package_${pkgbase}-headers() {
-hackheaders
-}
-EOF
diff --git a/linux58-tkg/README.md b/linux58-tkg/README.md
deleted file mode 100644
index 6daad7f..0000000
--- a/linux58-tkg/README.md
+++ /dev/null
@@ -1,55 +0,0 @@
-**Due to intel_pstate poor performances as of late, I have decided to set it to passive mode to make use of the acpi_cpufreq governors passthrough, keeping full support for turbo frequencies.**
-
-### MuQSS is not an available option for this revision yet
-
-A custom Linux kernel 5.8.y with specific Undead PDS, Project C / PDS & BMQ CPU schedulers related patchsets selector (stock CFS is also an option) and added tweaks for a nice interactivity/performance balance, aiming for the best gaming experience.
-
-- Undead PDS : PDS-mq was originally created by Alfred Chen : http://cchalpha.blogspot.com/
-
-While he dropped it with kernel 5.1 in favor of its BMQ evolution/rework, my pretty bad gaming experiences with BMQ up to this point convinced me to keep PDS afloat for as long as it'll make sense/I'll be able to.
-
-- Project C / PDS & BMQ : http://cchalpha.blogspot.com/
-
-Various personalization options available and userpatches support (put your own patches in the same dir as the PKGBUILD, with the ".mypatch" extension). The options built with are installed to `/usr/share/doc/$pkgbase/customization.cfg`, where `$pkgbase` is the package name.
-
-Comes with a slightly modified Arch config asking for a few core personalization settings at compilation time.
-If you want to streamline your kernel config for lower footprint and faster compilations : https://wiki.archlinux.org/index.php/Modprobed-db
-You can enable support for it at the beginning of the PKGBUILD file. Make sure to read everything you need to know about it.
-
-## Other stuff included:
-- Graysky's per-CPU-arch native optimizations - https://github.com/graysky2/kernel_gcc_patch
-- memory management and swapping tweaks
-- scheduling tweaks
-- optional "Zenify" patchset using core blk, mm and scheduler tweaks from Zen
-- CFS tweaks
-- using yeah TCP congestion algo by default
-- using cake network queue management system
-- using vm.max_map_count=524288 by default
-- cherry-picked clear linux patches
-- **optional** overrides for missing ACS capabilities
-- **optional** Fsync support (proton)
-
-## Install procedure
-
-### DEB (Debian, Ubuntu and derivatives) and RPM (Fedora, SUSE and derivatives) based distributions
-```
-git clone https://github.com/Frogging-Family/linux-tkg.git
-cd linux-tkg/linux58-tkg
-# Optional: edit customization.cfg file
-./install.sh install
-```
-Uninstalling custom kernels installed through the script has to be done 
-manually, the script can can help out with some useful information:
-```
-cd path/to/linux-tkg/linux58-tkg
-./install.sh uninstall-help
-```
-
-### Other linux distributions
-If your distro is not DEB or RPM based, `install.sh` script can clone the kernel tree, patch and edit a `.config` file from your current distro's 
-that is expected at ``/boot/config-`uname -r`.config`` (otherwise it won't work as-is)
-
-The command to do for that is:
-```
-./install.sh config
-```
diff --git a/linux58-tkg/customization.cfg b/linux58-tkg/customization.cfg
deleted file mode 100644
index 54e115f..0000000
--- a/linux58-tkg/customization.cfg
+++ /dev/null
@@ -1,188 +0,0 @@
-# linux58-TkG config file
-
-# Linux distribution you are using, options are "Arch", "Ubuntu", "Debian", "Fedora" or "Suse". 
-# It is automatically set to "Arch" when using PKGBUILD.
-# If left empty, the script will prompt
-_distro=""
-
-#### MISC OPTIONS #### 
-
-# External config file to use - If the given file exists in path, it will override default config (customization.cfg) - Default is ~/.config/frogminer/linux52-tkg.cfg
-_EXT_CONFIG_PATH=~/.config/frogminer/linux58-tkg.cfg
-
-# [Arch specific] Set to anything else than "true" to limit cleanup operations and keep source and files generated during compilation.
-# Default is "true".
-_NUKR="true"
-
-# Custom compiler root dirs - Leave empty to use system compilers
-# Example: CUSTOM_GCC_PATH="/home/frog/PKGBUILDS/mostlyportable-gcc/gcc-mostlyportable-9.2.0"
-CUSTOM_GCC_PATH=""
-
-# Set to the number corresponding to a predefined profile to use it. Current list of available profiles :
-# 1 - Custom (meaning nothing will be enforced and you get to configure everything)
-# 2 - Ryzen desktop (performance)
-# 3 - Generic Desktop (Performance)
-_OPTIPROFILE=""
-
-# Set to true to bypass makepkg.conf and use all available threads for compilation. False will respect your makepkg.conf options.
-_force_all_threads="true"
-
-# Set to true to prevent ccache from being used and set CONFIG_GCC_PLUGINS=y (which needs to be disabled for ccache to work properly)
-_noccache="false"
-
-# Set to true to use modprobed db to clean config from unneeded modules. Speeds up compilation considerably. Requires root - https://wiki.archlinux.org/index.php/Modprobed-db
-# !!!! Make sure to have a well populated db !!!! - Leave empty to be asked about it at build time
-_modprobeddb="false"
-
-# Set to "1" to call make menuconfig, "2" to call make nconfig, "3" to call make xconfig, before building the kernel. Set to false to disable and skip the prompt.
-_menunconfig=""
-
-# Set to true to generate a kernel config fragment from your changes in menuconfig/nconfig. Set to false to disable and skip the prompt.
-_diffconfig=""
-
-# Set to the file name where the generated config fragment should be written to. Only used if _diffconfig is active.
-_diffconfig_name=""
-
-#### KERNEL OPTIONS ####
-
-# [Arch specific] Name of the default config file to use from the linux???-tkg-config folder, use "distro" to use the config file of the kernel you are currently using. Arch default is "config.x86_64" and Arch hardened is "config_hardened.x86_64".
-# To get a complete hardened setup, you have to use "cfs" as _cpusched
-_configfile="config.x86_64"
-
-# Disable some non-module debugging - See PKGBUILD for the list
-_debugdisable="false"
-
-# LEAVE AN EMPTY VALUE TO BE PROMPTED ABOUT FOLLOWING OPTIONS AT BUILD TIME
-
-# CPU scheduler - Options are "upds" (TkG's Undead PDS), "pds", "bmq" or "cfs"
-# "upds" is the recommended option for gaming
-_cpusched=""
-
-# CPU sched_yield_type - Choose what sort of yield sched_yield will perform
-# For PDS and MuQSS: 0: No yield. (Recommended option for gaming on PDS and MuQSS)
-#                    1: Yield only to better priority/deadline tasks. (Default - can be unstable with PDS on some platforms)
-#                    2: Expire timeslice and recalculate deadline. (Usually the slowest option for PDS and MuQSS, not recommended)
-# For BMQ:           0: No yield.
-#                    1: Deboost and requeue task. (Default)
-#                    2: Set rq skip task.
-_sched_yield_type="0"
-
-# Round Robin interval is the longest duration two tasks with the same nice level will be delayed for. When CPU time is requested by a task, it receives a time slice equal
-# to the rr_interval in addition to a virtual deadline. When using yield_type 2, a low value can help offset the disadvantages of rescheduling a process that has yielded.
-# MuQSS default: 6ms"
-# PDS default: 4ms"
-# BMQ default: 2ms"
-# Set to "1" for 2ms, "2" for 4ms, "3" for 6ms, "4" for 8ms, or "default" to keep the chosen scheduler defaults.
-_rr_interval=""
-
-# Set to "true" to disable FUNCTION_TRACER/GRAPH_TRACER, lowering overhead but limiting debugging and analyzing of kernel functions - Kernel default is "false"
-_ftracedisable="false"
-
-# Set to "true" to disable NUMA, lowering overhead, but breaking CUDA/NvEnc on Nvidia equipped systems - Kernel default is "false"
-_numadisable="false"
-
-# Set to "true" to enable misc additions - May contain temporary fixes pending upstream or changes that can break on non-Arch - Kernel default is "true"
-_misc_adds="true"
-
-# Set to "1" to use CattaRappa mode (enabling full tickless), "2" for tickless idle only, or "0" for periodic ticks.
-# Full tickless can give higher performances in various cases but, depending on hardware, lower consistency. Just tickless idle can perform better on some platforms (mostly AMD based).
-_tickless=""
-
-# Setting this to to "true" can improve latency on PDS (at the cost of throughput) and improve throughput on other schedulers (at the cost of latency) - Can improve VMs performance - Kernel default is "false"
-_voluntary_preempt=""
-
-# Set to "true" to enable Device Tree and Open Firmware support. If you don't know about it, you don't need it - Default is "false"
-_OFenable="false"
-
-# Set to "true" to use ACS override patch - https://wiki.archlinux.org/index.php/PCI_passthrough_via_OVMF#Bypassing_the_IOMMU_groups_.28ACS_override_patch.29 - Kernel default is "false"
-_acs_override=""
-
-# Set to "true" to add Bcache filesystem support. You'll have to install bcachefs-tools-git from AUR for utilities - https://bcachefs.org/ - If in doubt, set to "false"
-_bcachefs=""
-
-# Set to "true" to add back missing symbol for AES-NI/AVX support on ZFS - https://github.com/NixOS/nixpkgs/blob/master/pkgs/os-specific/linux/kernel/export_kernel_fpu_functions.patch - Kernel default is "false"
-_zfsfix="true"
-
-# Set to "true" to enable support for fsync, an experimental replacement for esync found in Valve Proton 4.11+ - https://steamcommunity.com/games/221410/announcements/detail/2957094910196249305
-_fsync=""
-
-# A selection of patches from Zen/Liquorix kernel and additional tweaks for a better gaming experience (ZENIFY) - Default is "true"
-_zenify="true"
-
-# compiler optimization level - 1. Optimize for performance (-O2); 2. Optimize harder (-O3); 3. Optimize for size (-Os) - Kernel default is "1"
-_compileroptlevel="1"
-
-# CPU compiler optimizations - Defaults to generic optimizations if left empty
-# AMD CPUs : "k8" "k8sse3" "k10" "barcelona" "bobcat" "jaguar" "bulldozer" "piledriver" "steamroller" "excavator" "zen" "zen2"
-# Intel CPUs : "mpsc"(P4 & older Netburst based Xeon) "atom" "core2" "nehalem" "westmere" "silvermont" "sandybridge" "ivybridge" "haswell" "broadwell" "skylake" "skylakex" "cannonlake" "icelake" "goldmont" "goldmontplus" "cascadelake" "cooperlake" "tigerlake"
-# Other options :
-# - "generic" (to share the package between machines with different CPUs)
-# - "native" (use compiler autodetection and will prompt for P6_NOPS - Selecting your arch manually in the list above is recommended instead of this option)
-_processor_opt=""
-
-# MuQSS only - Make IRQ threading compulsory (FORCE_IRQ_THREADING) - Default is "false"
-_irq_threading="false"
-
-# MuQSS and PDS only - SMT (Hyperthreading) aware nice priority and policy support (SMT_NICE) - Kernel default is "true" - You can disable this on non-SMT/HT CPUs for lower overhead
-_smt_nice=""
-
-# Trust the CPU manufacturer to initialize Linux's CRNG (RANDOM_TRUST_CPU) - Kernel default is "false"
-_random_trust_cpu="false"
-
-# MuQSS only - CPU scheduler runqueue sharing - No sharing (RQ_NONE), SMT (hyperthread) siblings (RQ_SMT), Multicore siblings (RQ_MC), Symmetric Multi-Processing (RQ_SMP), NUMA (RQ_ALL)
-# Valid values are "none", "smt", "mc", "mc-llc"(for zen), "smp", "all" - Kernel default is "smt"
-_runqueue_sharing=""
-
-# Timer frequency - "100" "500", "750" or "1000" - More options available in kernel config prompt when left empty depending on selected cpusched - Kernel default is "500" - For MuQSS, 100Hz is recommended
-_timer_freq=""
-
-# Default CPU governor - "performance", "ondemand", "schedutil" or leave empty for default (schedutil)
-_default_cpu_gov="ondemand"
-
-# Use an aggressive ondemand governor instead of default ondemand to improve performance on low loads/high core count CPUs while keeping some power efficiency from frequency scaling.
-# It still requires you to either set ondemand as default governor or to select it some way.
-_aggressive_ondemand="true"
-
-# On some platforms, an acpi_cpufreq bug affects performance negatively. Set to "true" to disable it as a workaround, but it will use more power.
-# https://github.com/Tk-Glitch/PKGBUILDS/issues/263
-_disable_acpi_cpufreq=""
-
-# You can pass a default set of kernel command line options here - example: "intel_pstate=passive nowatchdog amdgpu.ppfeaturemask=0xfffd7fff mitigations=off"
-_custom_commandline="intel_pstate=passive"
-
-
-#### SPESHUL OPTION ####
-
-# If you want to bypass the stock naming scheme and enforce something else (example : "linux") - Useful for some bootloaders requiring manual entry editing on each release.
-# !!! It will also change pkgname - If you don't explicitely need this, don't use it !!!
-_custom_pkgbase=""
-
-# [non-Arch specific] Kernel localversion. Putting it to "Mario" will make for example the kernel version be 5.7.0-tkg-Mario (given by uname -r)
-# If left empty, it will use -tkg-"${_cpusched}" where "${_cpusched}" will be replaced by the user chosen scheduler
-_kernel_localversion=""
-
-#### USER PATCHES ####
-
-# community patches - add patches (separated by a space) of your choice by name from the community-patches dir
-# example: _community_patches="clear_nack_in_tend_isr.myrevert ffb_regression_fix.mypatch 0008-drm-amd-powerplay-force-the-trim-of-the-mclk-dpm-levels-if-OD-is-enabled.mypatch"
-_community_patches=""
-
-# You can use your own patches by putting them in the same folder as the PKGBUILD and giving them the .mypatch extension.
-# You can also revert patches by putting them in the same folder as the PKGBUILD and giving them the .myrevert extension.
-
-# Also, userpatches variable below must be set to true for the above to work.
-_user_patches="true"
-
-# Apply all user patches without confirmation - !!! NOT RECOMMENDED !!!
-_user_patches_no_confirm="false"
-
-
-#### CONFIG FRAGMENTS ####
-
-# You can use your own kernel config fragments by putting them in the same folder as the PKGBUILD and giving them the .myfrag extension.
-
-# Also, the config fragments variable below must be set to true for the above to work.
-_config_fragments="true"
-
-# Apply all config fragments without confirmation - !!! NOT RECOMMENDED !!!
-_config_fragments_no_confirm="false"
diff --git a/linux58-tkg/install.sh b/linux58-tkg/install.sh
deleted file mode 100755
index 9b3a410..0000000
--- a/linux58-tkg/install.sh
+++ /dev/null
@@ -1,283 +0,0 @@
-#!/bin/bash
-
-msg2() {
- echo -e " \033[1;34m->\033[1;0m \033[1;1m$1\033[1;0m" >&2
-}
-
-error() {
- echo -e " \033[1;31m==> ERROR: $1\033[1;0m" >&2
-}
-
-warning() {
- echo -e " \033[1;33m==> WARNING: $1\033[1;0m" >&2
-}
-
-plain() {
- echo "$1" >&2
-}
-
-# Stop the script at any ecountered error
-set -e
-
-_where=`pwd`
-srcdir="$_where"
-
-source linux*-tkg-config/prepare
-
-_cpu_opt_patch_link="https://raw.githubusercontent.com/graysky2/kernel_gcc_patch/master/enable_additional_cpu_optimizations_for_gcc_v10.1%2B_kernel_v${_basekernel}%2B.patch"  
-
-source customization.cfg
-
-if [ "$1" != "install" ] && [ "$1" != "config" ] && [ "$1" != "uninstall-help" ]; then
-  msg2 "Argument not recognised, options are:
-        - config : shallow clones the linux ${_basekernel}.x git tree into the folder linux-${_basekernel}, then applies on it the extra patches and prepares the .config file 
-                   by copying the one from the current linux system in /boot/config-`uname -r` and updates it. 
-        - install : [RPM and DEB based distros only], does the config step, proceeds to compile, then prompts to install
-        - uninstall-help : [RPM and DEB based distros only], lists the installed kernels in this system, then gives a hint on how to uninstall them manually."
-  exit 0
-fi
-
-# Load external configuration file if present. Available variable values will overwrite customization.cfg ones.
-if [ -e "$_EXT_CONFIG_PATH" ]; then
-  msg2 "External configuration file $_EXT_CONFIG_PATH will be used and will override customization.cfg values."
-  source "$_EXT_CONFIG_PATH"
-fi
-
-_misc_adds="false" # We currently don't want this enabled on non-Arch
-
-if [ "$1" = "install" ] || [ "$1" = "config" ]; then
-
-  if [ -z $_distro ] && [ "$1" = "install" ]; then
-    while true; do
-      echo "Which linux distribution are you running ?"
-      echo "if it's not on the list, chose the closest one to it: Fedora/Suse for RPM, Ubuntu/Debian for DEB"
-      echo "   1) Debian"
-      echo "   2) Fedora"
-      echo "   3) Suse"
-      echo "   4) Ubuntu"
-      read -p "[1-4]: " _distro_index
-
-      if [ "$_distro_index" = "1" ]; then
-        _distro="Debian"
-        break
-      elif [ "$_distro_index" = "2" ]; then
-        _distro="Fedora"
-        break
-      elif [ "$_distro_index" = "3" ]; then
-        _distro="Suse"
-        break
-      elif [ "$_distro_index" = "4" ]; then
-        _distro="Ubuntu"
-        break
-      else
-        echo "Wrong index."
-      fi
-    done
-  fi
-
-  if [[ $1 = "install" && "$_distro" != "Ubuntu" && "$_distro" != "Debian" &&  "$_distro" != "Fedora" && "$_distro" != "Suse" ]]; then 
-    msg2 "Variable \"_distro\" in \"customization.cfg\" hasn't been set to \"Ubuntu\", \"Debian\",  \"Fedora\" or \"Suse\""
-    msg2 "This script can only install custom kernels for RPM and DEB based distros, though only those keywords are permitted. Exiting..."
-    exit 0
-  fi
-
-  if [ "$_distro" = "Ubuntu" ] || [ "$_distro" = "Debian" ]; then
-    msg2 "Installing dependencies"
-    sudo apt install git build-essential kernel-package fakeroot libncurses5-dev libssl-dev ccache bison flex qtbase5-dev -y
-  elif [ "$_distro" = "Fedora" ]; then
-    msg2 "Installing dependencies"
-    sudo dnf install fedpkg fedora-packager rpmdevtools ncurses-devel pesign grubby qt5-devel libXi-devel gcc-c++ git ccache flex bison elfutils-libelf-devel openssl-devel dwarves rpm-build -y
-  elif [ "$_distro" = "Suse" ]; then
-    msg2 "Installing dependencies"
-    sudo zypper install -y rpmdevtools ncurses-devel pesign libXi-devel gcc-c++ git ccache flex bison elfutils libelf-devel openssl-devel dwarves make patch bc rpm-build libqt5-qtbase-common-devel libqt5-qtbase-devel lz4
-  fi
-
-  # Force prepare script to avoid Arch specific commands if the user is using `config`
-  if [ "$1" = "config" ]; then
-    _distro=""
-  fi
-
-  if [ -d linux-${_basekernel}.orig ]; then
-    rm -rf linux-${_basekernel}.orig
-  fi
-
-  if [ -d linux-${_basekernel} ]; then
-    msg2 "Reseting files in linux-$_basekernel to their original state and getting latest updates"
-    cd "$_where"/linux-${_basekernel}
-    git checkout --force linux-$_basekernel.y
-    git clean -f -d -x
-    git pull
-    msg2 "Done" 
-    cd "$_where"
-  else
-    msg2 "Shallow git cloning linux $_basekernel"
-    git clone --branch linux-$_basekernel.y --single-branch --depth=1 https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git linux-${_basekernel}
-    msg2 "Done"
-  fi
-
-  # Define current kernel subversion
-  if [ -z $_kernel_subver ]; then
-    cd "$_where"/linux-${_basekernel}
-    _kernelverstr=`git describe`
-    _kernel_subver=${_kernelverstr:5}
-    cd "$_where"
-  fi
-
-
-  # Run init script that is also run in PKGBUILD, it will define some env vars that we will use
-  _tkg_initscript
-
-  cd "$_where"
-  msg2 "Downloading Graysky2's CPU optimisations patch"
-  wget "$_cpu_opt_patch_link"
-
-  # Follow Ubuntu install isntructions in https://wiki.ubuntu.com/KernelTeam/GitKernelBuild
-
-  # cd in linux folder, copy Ubuntu's current config file, update with new params
-  cd "$_where"/linux-${_basekernel}
-
-  msg2 "Copying current kernel's config and running make oldconfig..."
-  cp /boot/config-`uname -r` .config
-  if [ "$_distro" = "Debian" ]; then #Help Debian cert problem.
-    sed -i -e 's#CONFIG_SYSTEM_TRUSTED_KEYS="debian/certs/test-signing-certs.pem"#CONFIG_SYSTEM_TRUSTED_KEYS=""#g' .config
-    sed -i -e 's#CONFIG_SYSTEM_TRUSTED_KEYS="debian/certs/debian-uefi-certs.pem"#CONFIG_SYSTEM_TRUSTED_KEYS=""#g' .config
-  fi
-  yes '' | make oldconfig
-  msg2 "Done"
-
-  # apply linux-tkg patching script
-  _tkg_srcprep
-
-  msg2 "Configuration done."
-fi
-
-if [ "$1" = "install" ]; then
-
-  # Use custom compiler paths if defined
-  if [ -n "${CUSTOM_GCC_PATH}" ]; then
-    PATH=${CUSTOM_GCC_PATH}/bin:${CUSTOM_GCC_PATH}/lib:${CUSTOM_GCC_PATH}/include:${PATH}
-  fi
-
-  if [ "$_force_all_threads" = "true" ]; then
-    _thread_num=`nproc`
-  else
-    _thread_num=`expr \`nproc\` / 4`
-    if [ "$_thread_num" = "0" ]; then
-      _thread_num=1
-    fi
-  fi
-
-  # ccache
-  if [ "$_noccache" != "true" ]; then
-
-    if [ "$_distro" = "Ubuntu" ] || [ "$_distro" = "Debian" ]; then
-      export PATH="/usr/lib/ccache/bin/:$PATH"
-    elif [ "$_distro" = "Fedora" ] || [ "$_distro" = "Suse" ]; then
-      export PATH="/usr/lib64/ccache/:$PATH" 
-    fi
-
-    export CCACHE_SLOPPINESS="file_macro,locale,time_macros"
-    export CCACHE_NOHASHDIR="true"
-    msg2 'ccache was found and will be used'
-
-  fi
-
-  if [ -z $_kernel_localversion ]; then
-    _kernel_flavor="tkg-${_cpusched}"
-  else
-    _kernel_flavor="tkg-${_kernel_localversion}"
-  fi
-
-  if [ "$_distro" = "Ubuntu" ]  || [ "$_distro" = "Debian" ]; then
-
-    if make -j ${_thread_num} deb-pkg LOCALVERSION=-${_kernel_flavor}; then
-      msg2 "Building successfully finished!"
-
-      cd "$_where"
-
-      # Create DEBS folder if it doesn't exist
-      mkdir -p DEBS
-      
-      # Move rpm files to RPMS folder inside the linux-tkg folder
-      mv "$_where"/*.deb "$_where"/DEBS/
-
-      read -p "Do you want to install the new Kernel ? y/[n]: " _install
-      if [[ $_install =~ [yY] ]] || [ $_install = "yes" ] || [ $_install = "Yes" ]; then
-        cd "$_where"
-        _kernelname=$_basekernel.$_kernel_subver-$_kernel_flavor
-        _headers_deb="linux-headers-${_kernelname}*.deb"
-        _image_deb="linux-image-${_kernelname}_*.deb"
-        _kernel_devel_deb="linux-libc-dev_${_kernelname}*.deb"
-        
-        cd DEBS
-        sudo dpkg -i $_headers_deb $_image_deb $_kernel_devel_deb
-      fi
-    fi
-
-  elif [[ "$_distro" = "Fedora" ||  "$_distro" = "Suse" ]]; then
-
-    # Replace dashes with underscores, it seems that it's being done by binrpm-pkg
-    # Se we can actually refer properly to the rpm files.
-    _kernel_flavor=${_kernel_flavor//-/_}
-
-    if make -j ${_thread_num} rpm-pkg EXTRAVERSION="_${_kernel_flavor}"; then
-      msg2 "Building successfully finished!"
-
-      cd "$_where"
-
-      # Create RPMS folder if it doesn't exist
-      mkdir -p RPMS
-      
-      # Move rpm files to RPMS folder inside the linux-tkg folder
-      mv ~/rpmbuild/RPMS/x86_64/* "$_where"/RPMS/
-
-      #Clean up the original folder, unneeded and takes a lot of space
-      rm -rf ~/rpmbuild/
-
-      read -p "Do you want to install the new Kernel ? y/[n]: " _install
-      if [ "$_install" = "y" ] || [ "$_install" = "Y" ] || [ "$_install" = "yes" ] || [ "$_install" = "Yes" ]; then
-        
-        _kernelname=$_basekernel.${_kernel_subver}_$_kernel_flavor
-        _headers_rpm="kernel-headers-${_kernelname}*.rpm"
-        _kernel_rpm="kernel-${_kernelname}*.rpm"
-        _kernel_devel_rpm="kernel-devel-${_kernelname}*.rpm"
-        
-        cd RPMS
-        if [ "$_distro" = "Fedora" ]; then
-          sudo dnf install $_headers_rpm $_kernel_rpm $_kernel_devel_rpm
-        elif [ "$_distro" = "Suse" ]; then
-          msg2 "Some files from 'linux-glibc-devel' will be replaced by files from the custom kernel-hearders package"
-          msg2 "To revert back to the original kernel headers do 'sudo zypper install -f linux-glibc-devel'" 
-          sudo zypper install --replacefiles --allow-unsigned-rpm $_headers_rpm $_kernel_rpm $_kernel_devel_rpm
-        fi
-        
-        msg2 "Install successful" 
-      fi
-    fi
-  fi
-fi
-
-if [ "$1" = "uninstall-help" ]; then
-
-  cd "$_where"
-  msg2 "List of installed custom tkg kernels: "
-
-  if [ "$_distro" = "Ubuntu" ]; then
-    dpkg -l "*tkg*" | grep "linux.*tkg"
-    dpkg -l "*linux-libc-dev*" | grep "linux.*tkg"
-    msg2 "To uninstall a version, you should remove the linux-image, linux-headers and linux-libc-dev associated to it (if installed), with: "
-    msg2 "      sudo apt remove linux-image-VERSION linux-headers-VERSION linux-libc-dev-VERSION"
-    msg2 "       where VERSION is displayed in the lists above, uninstall only versions that have \"tkg\" in its name"
-  elif [ "$_distro" = "Fedora" ]; then
-    dnf list --installed kernel*
-    msg2 "To uninstall a version, you should remove the kernel, kernel-headers and kernel-devel associated to it (if installed), with: "
-    msg2 "      sudo dnf remove --noautoremove kernel-VERSION kernel-devel-VERSION kernel-headers-VERSION"
-    msg2 "       where VERSION is displayed in the second column"
-  elif [ "$_distro" = "Suse" ]; then
-    zypper packages --installed-only | grep "kernel.*tkg"
-    msg2 "To uninstall a version, you should remove the kernel, kernel-headers and kernel-devel associated to it (if installed), with: "
-    msg2 "      sudo zypper remove --no-clean-deps kernel-VERSION kernel-devel-VERSION kernel-headers-VERSION"
-    msg2 "       where VERSION is displayed in the second to last column"
-  fi
-
-fi
diff --git a/linux58-tkg/linux58-tkg-config/90-cleanup.hook b/linux58-tkg/linux58-tkg-config/90-cleanup.hook
deleted file mode 100644
index 99f5221..0000000
--- a/linux58-tkg/linux58-tkg-config/90-cleanup.hook
+++ /dev/null
@@ -1,14 +0,0 @@
-[Trigger]
-Type = File
-Operation = Install
-Operation = Upgrade
-Operation = Remove
-Target = usr/lib/modules/*/
-Target = !usr/lib/modules/*/?*
-
-[Action]
-Description = Cleaning up...
-When = PostTransaction
-Exec = /usr/share/libalpm/scripts/cleanup
-NeedsTargets
- 
diff --git a/linux58-tkg/linux58-tkg-config/cleanup b/linux58-tkg/linux58-tkg-config/cleanup
deleted file mode 100755
index c00c08d..0000000
--- a/linux58-tkg/linux58-tkg-config/cleanup
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/bash
-
-for _f in /usr/lib/modules/*tkg*; do
-  if [[ ! -e ${_f}/vmlinuz ]]; then
-    rm -rf "$_f"
-  fi
-done
-
-# vim:set ft=sh sw=2 et:
- 
diff --git a/linux58-tkg/linux58-tkg-config/config.x86_64 b/linux58-tkg/linux58-tkg-config/config.x86_64
deleted file mode 100644
index 5cd21b9..0000000
--- a/linux58-tkg/linux58-tkg-config/config.x86_64
+++ /dev/null
@@ -1,11020 +0,0 @@
-#
-# Automatically generated file; DO NOT EDIT.
-# Linux/x86 5.8.6-arch1 Kernel Configuration
-#
-CONFIG_CC_VERSION_TEXT="gcc (GCC) 10.2.0"
-CONFIG_CC_IS_GCC=y
-CONFIG_GCC_VERSION=100200
-CONFIG_LD_VERSION=235000000
-CONFIG_CLANG_VERSION=0
-CONFIG_CC_CAN_LINK=y
-CONFIG_CC_CAN_LINK_STATIC=y
-CONFIG_CC_HAS_ASM_GOTO=y
-CONFIG_CC_HAS_ASM_INLINE=y
-CONFIG_IRQ_WORK=y
-CONFIG_BUILDTIME_TABLE_SORT=y
-CONFIG_THREAD_INFO_IN_TASK=y
-
-#
-# General setup
-#
-CONFIG_INIT_ENV_ARG_LIMIT=32
-# CONFIG_COMPILE_TEST is not set
-CONFIG_LOCALVERSION=""
-CONFIG_LOCALVERSION_AUTO=y
-CONFIG_BUILD_SALT=""
-CONFIG_HAVE_KERNEL_GZIP=y
-CONFIG_HAVE_KERNEL_BZIP2=y
-CONFIG_HAVE_KERNEL_LZMA=y
-CONFIG_HAVE_KERNEL_XZ=y
-CONFIG_HAVE_KERNEL_LZO=y
-CONFIG_HAVE_KERNEL_LZ4=y
-# CONFIG_KERNEL_GZIP is not set
-# CONFIG_KERNEL_BZIP2 is not set
-# CONFIG_KERNEL_LZMA is not set
-CONFIG_KERNEL_XZ=y
-# CONFIG_KERNEL_LZO is not set
-# CONFIG_KERNEL_LZ4 is not set
-CONFIG_DEFAULT_INIT=""
-CONFIG_DEFAULT_HOSTNAME="archlinux"
-CONFIG_SWAP=y
-CONFIG_SYSVIPC=y
-CONFIG_SYSVIPC_SYSCTL=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_POSIX_MQUEUE_SYSCTL=y
-CONFIG_WATCH_QUEUE=y
-CONFIG_CROSS_MEMORY_ATTACH=y
-# CONFIG_USELIB is not set
-CONFIG_AUDIT=y
-CONFIG_HAVE_ARCH_AUDITSYSCALL=y
-CONFIG_AUDITSYSCALL=y
-
-#
-# IRQ subsystem
-#
-CONFIG_GENERIC_IRQ_PROBE=y
-CONFIG_GENERIC_IRQ_SHOW=y
-CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK=y
-CONFIG_GENERIC_PENDING_IRQ=y
-CONFIG_GENERIC_IRQ_MIGRATION=y
-CONFIG_HARDIRQS_SW_RESEND=y
-CONFIG_GENERIC_IRQ_CHIP=y
-CONFIG_IRQ_DOMAIN=y
-CONFIG_IRQ_SIM=y
-CONFIG_IRQ_DOMAIN_HIERARCHY=y
-CONFIG_GENERIC_MSI_IRQ=y
-CONFIG_GENERIC_MSI_IRQ_DOMAIN=y
-CONFIG_IRQ_MSI_IOMMU=y
-CONFIG_GENERIC_IRQ_MATRIX_ALLOCATOR=y
-CONFIG_GENERIC_IRQ_RESERVATION_MODE=y
-CONFIG_IRQ_FORCED_THREADING=y
-CONFIG_SPARSE_IRQ=y
-# CONFIG_GENERIC_IRQ_DEBUGFS is not set
-# end of IRQ subsystem
-
-CONFIG_CLOCKSOURCE_WATCHDOG=y
-CONFIG_ARCH_CLOCKSOURCE_INIT=y
-CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE=y
-CONFIG_GENERIC_TIME_VSYSCALL=y
-CONFIG_GENERIC_CLOCKEVENTS=y
-CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y
-CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST=y
-CONFIG_GENERIC_CMOS_UPDATE=y
-
-#
-# Timers subsystem
-#
-CONFIG_TICK_ONESHOT=y
-CONFIG_NO_HZ_COMMON=y
-# CONFIG_HZ_PERIODIC is not set
-CONFIG_NO_HZ_IDLE=y
-# CONFIG_NO_HZ_FULL is not set
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-# end of Timers subsystem
-
-# CONFIG_PREEMPT_NONE is not set
-# CONFIG_PREEMPT_VOLUNTARY is not set
-CONFIG_PREEMPT=y
-CONFIG_PREEMPT_COUNT=y
-CONFIG_PREEMPTION=y
-
-#
-# CPU/Task time and stats accounting
-#
-CONFIG_TICK_CPU_ACCOUNTING=y
-# CONFIG_VIRT_CPU_ACCOUNTING_GEN is not set
-CONFIG_IRQ_TIME_ACCOUNTING=y
-CONFIG_HAVE_SCHED_AVG_IRQ=y
-# CONFIG_SCHED_THERMAL_PRESSURE is not set
-CONFIG_BSD_PROCESS_ACCT=y
-CONFIG_BSD_PROCESS_ACCT_V3=y
-CONFIG_TASKSTATS=y
-CONFIG_TASK_DELAY_ACCT=y
-CONFIG_TASK_XACCT=y
-CONFIG_TASK_IO_ACCOUNTING=y
-CONFIG_PSI=y
-# CONFIG_PSI_DEFAULT_DISABLED is not set
-# end of CPU/Task time and stats accounting
-
-CONFIG_CPU_ISOLATION=y
-
-#
-# RCU Subsystem
-#
-CONFIG_TREE_RCU=y
-CONFIG_PREEMPT_RCU=y
-CONFIG_RCU_EXPERT=y
-CONFIG_SRCU=y
-CONFIG_TREE_SRCU=y
-CONFIG_TASKS_RCU_GENERIC=y
-CONFIG_TASKS_RCU=y
-CONFIG_TASKS_RUDE_RCU=y
-CONFIG_RCU_STALL_COMMON=y
-CONFIG_RCU_NEED_SEGCBLIST=y
-CONFIG_RCU_FANOUT=64
-CONFIG_RCU_FANOUT_LEAF=16
-CONFIG_RCU_FAST_NO_HZ=y
-CONFIG_RCU_BOOST=y
-CONFIG_RCU_BOOST_DELAY=500
-# CONFIG_RCU_NOCB_CPU is not set
-# CONFIG_TASKS_TRACE_RCU_READ_MB is not set
-# end of RCU Subsystem
-
-CONFIG_BUILD_BIN2C=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-# CONFIG_IKHEADERS is not set
-CONFIG_LOG_BUF_SHIFT=17
-CONFIG_LOG_CPU_MAX_BUF_SHIFT=12
-CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=13
-CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y
-
-#
-# Scheduler features
-#
-CONFIG_UCLAMP_TASK=y
-CONFIG_UCLAMP_BUCKETS_COUNT=5
-# end of Scheduler features
-
-CONFIG_ARCH_SUPPORTS_NUMA_BALANCING=y
-CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH=y
-CONFIG_CC_HAS_INT128=y
-CONFIG_ARCH_SUPPORTS_INT128=y
-CONFIG_NUMA_BALANCING=y
-CONFIG_NUMA_BALANCING_DEFAULT_ENABLED=y
-CONFIG_CGROUPS=y
-CONFIG_PAGE_COUNTER=y
-CONFIG_MEMCG=y
-CONFIG_MEMCG_SWAP=y
-CONFIG_MEMCG_KMEM=y
-CONFIG_BLK_CGROUP=y
-CONFIG_CGROUP_WRITEBACK=y
-CONFIG_CGROUP_SCHED=y
-CONFIG_FAIR_GROUP_SCHED=y
-CONFIG_CFS_BANDWIDTH=y
-# CONFIG_RT_GROUP_SCHED is not set
-CONFIG_UCLAMP_TASK_GROUP=y
-CONFIG_CGROUP_PIDS=y
-CONFIG_CGROUP_RDMA=y
-CONFIG_CGROUP_FREEZER=y
-CONFIG_CGROUP_HUGETLB=y
-CONFIG_CPUSETS=y
-CONFIG_PROC_PID_CPUSET=y
-CONFIG_CGROUP_DEVICE=y
-CONFIG_CGROUP_CPUACCT=y
-CONFIG_CGROUP_PERF=y
-CONFIG_CGROUP_BPF=y
-# CONFIG_CGROUP_DEBUG is not set
-CONFIG_SOCK_CGROUP_DATA=y
-CONFIG_NAMESPACES=y
-CONFIG_UTS_NS=y
-CONFIG_TIME_NS=y
-CONFIG_IPC_NS=y
-CONFIG_USER_NS=y
-CONFIG_USER_NS_UNPRIVILEGED=y
-CONFIG_PID_NS=y
-CONFIG_NET_NS=y
-CONFIG_CHECKPOINT_RESTORE=y
-CONFIG_SCHED_AUTOGROUP=y
-# CONFIG_SYSFS_DEPRECATED is not set
-CONFIG_RELAY=y
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE=""
-CONFIG_RD_GZIP=y
-CONFIG_RD_BZIP2=y
-CONFIG_RD_LZMA=y
-CONFIG_RD_XZ=y
-CONFIG_RD_LZO=y
-CONFIG_RD_LZ4=y
-CONFIG_BOOT_CONFIG=y
-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-CONFIG_SYSCTL=y
-CONFIG_HAVE_UID16=y
-CONFIG_SYSCTL_EXCEPTION_TRACE=y
-CONFIG_HAVE_PCSPKR_PLATFORM=y
-CONFIG_BPF=y
-CONFIG_EXPERT=y
-# CONFIG_UID16 is not set
-CONFIG_MULTIUSER=y
-CONFIG_SGETMASK_SYSCALL=y
-# CONFIG_SYSFS_SYSCALL is not set
-CONFIG_FHANDLE=y
-CONFIG_POSIX_TIMERS=y
-CONFIG_PRINTK=y
-CONFIG_PRINTK_NMI=y
-CONFIG_BUG=y
-CONFIG_ELF_CORE=y
-CONFIG_PCSPKR_PLATFORM=y
-CONFIG_BASE_FULL=y
-CONFIG_FUTEX=y
-CONFIG_FUTEX_PI=y
-CONFIG_EPOLL=y
-CONFIG_SIGNALFD=y
-CONFIG_TIMERFD=y
-CONFIG_EVENTFD=y
-CONFIG_SHMEM=y
-CONFIG_AIO=y
-CONFIG_IO_URING=y
-CONFIG_ADVISE_SYSCALLS=y
-CONFIG_HAVE_ARCH_USERFAULTFD_WP=y
-CONFIG_MEMBARRIER=y
-CONFIG_KALLSYMS=y
-CONFIG_KALLSYMS_ALL=y
-CONFIG_KALLSYMS_ABSOLUTE_PERCPU=y
-CONFIG_KALLSYMS_BASE_RELATIVE=y
-CONFIG_BPF_LSM=y
-CONFIG_BPF_SYSCALL=y
-CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y
-CONFIG_BPF_JIT_ALWAYS_ON=y
-CONFIG_BPF_JIT_DEFAULT_ON=y
-CONFIG_USERFAULTFD=y
-CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE=y
-CONFIG_RSEQ=y
-# CONFIG_DEBUG_RSEQ is not set
-# CONFIG_EMBEDDED is not set
-CONFIG_HAVE_PERF_EVENTS=y
-# CONFIG_PC104 is not set
-
-#
-# Kernel Performance Events And Counters
-#
-CONFIG_PERF_EVENTS=y
-# CONFIG_DEBUG_PERF_USE_VMALLOC is not set
-# end of Kernel Performance Events And Counters
-
-CONFIG_VM_EVENT_COUNTERS=y
-CONFIG_SLUB_DEBUG=y
-# CONFIG_SLUB_MEMCG_SYSFS_ON is not set
-# CONFIG_COMPAT_BRK is not set
-# CONFIG_SLAB is not set
-CONFIG_SLUB=y
-# CONFIG_SLOB is not set
-CONFIG_SLAB_MERGE_DEFAULT=y
-CONFIG_SLAB_FREELIST_RANDOM=y
-CONFIG_SLAB_FREELIST_HARDENED=y
-CONFIG_SHUFFLE_PAGE_ALLOCATOR=y
-CONFIG_SLUB_CPU_PARTIAL=y
-CONFIG_SYSTEM_DATA_VERIFICATION=y
-CONFIG_PROFILING=y
-CONFIG_TRACEPOINTS=y
-# end of General setup
-
-CONFIG_64BIT=y
-CONFIG_X86_64=y
-CONFIG_X86=y
-CONFIG_INSTRUCTION_DECODER=y
-CONFIG_OUTPUT_FORMAT="elf64-x86-64"
-CONFIG_LOCKDEP_SUPPORT=y
-CONFIG_STACKTRACE_SUPPORT=y
-CONFIG_MMU=y
-CONFIG_ARCH_MMAP_RND_BITS_MIN=28
-CONFIG_ARCH_MMAP_RND_BITS_MAX=32
-CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN=8
-CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX=16
-CONFIG_GENERIC_ISA_DMA=y
-CONFIG_GENERIC_BUG=y
-CONFIG_GENERIC_BUG_RELATIVE_POINTERS=y
-CONFIG_ARCH_MAY_HAVE_PC_FDC=y
-CONFIG_GENERIC_CALIBRATE_DELAY=y
-CONFIG_ARCH_HAS_CPU_RELAX=y
-CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
-CONFIG_ARCH_HAS_FILTER_PGPROT=y
-CONFIG_HAVE_SETUP_PER_CPU_AREA=y
-CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y
-CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y
-CONFIG_ARCH_HIBERNATION_POSSIBLE=y
-CONFIG_ARCH_SUSPEND_POSSIBLE=y
-CONFIG_ARCH_WANT_GENERAL_HUGETLB=y
-CONFIG_ZONE_DMA32=y
-CONFIG_AUDIT_ARCH=y
-CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y
-CONFIG_HAVE_INTEL_TXT=y
-CONFIG_X86_64_SMP=y
-CONFIG_ARCH_SUPPORTS_UPROBES=y
-CONFIG_FIX_EARLYCON_MEM=y
-CONFIG_DYNAMIC_PHYSICAL_MASK=y
-CONFIG_PGTABLE_LEVELS=5
-CONFIG_CC_HAS_SANE_STACKPROTECTOR=y
-
-#
-# Processor type and features
-#
-CONFIG_ZONE_DMA=y
-CONFIG_SMP=y
-CONFIG_X86_FEATURE_NAMES=y
-CONFIG_X86_X2APIC=y
-CONFIG_X86_MPPARSE=y
-# CONFIG_GOLDFISH is not set
-CONFIG_RETPOLINE=y
-CONFIG_X86_CPU_RESCTRL=y
-# CONFIG_X86_EXTENDED_PLATFORM is not set
-CONFIG_X86_INTEL_LPSS=y
-CONFIG_X86_AMD_PLATFORM_DEVICE=y
-CONFIG_IOSF_MBI=y
-# CONFIG_IOSF_MBI_DEBUG is not set
-CONFIG_X86_SUPPORTS_MEMORY_FAILURE=y
-CONFIG_SCHED_OMIT_FRAME_POINTER=y
-CONFIG_HYPERVISOR_GUEST=y
-CONFIG_PARAVIRT=y
-CONFIG_PARAVIRT_XXL=y
-# CONFIG_PARAVIRT_DEBUG is not set
-CONFIG_PARAVIRT_SPINLOCKS=y
-CONFIG_X86_HV_CALLBACK_VECTOR=y
-CONFIG_XEN=y
-CONFIG_XEN_PV=y
-CONFIG_XEN_PV_SMP=y
-CONFIG_XEN_DOM0=y
-CONFIG_XEN_PVHVM=y
-CONFIG_XEN_PVHVM_SMP=y
-CONFIG_XEN_512GB=y
-CONFIG_XEN_SAVE_RESTORE=y
-# CONFIG_XEN_DEBUG_FS is not set
-CONFIG_XEN_PVH=y
-CONFIG_KVM_GUEST=y
-CONFIG_ARCH_CPUIDLE_HALTPOLL=y
-CONFIG_PVH=y
-CONFIG_PARAVIRT_TIME_ACCOUNTING=y
-CONFIG_PARAVIRT_CLOCK=y
-CONFIG_JAILHOUSE_GUEST=y
-CONFIG_ACRN_GUEST=y
-# CONFIG_MK8 is not set
-# CONFIG_MPSC is not set
-# CONFIG_MCORE2 is not set
-# CONFIG_MATOM is not set
-CONFIG_GENERIC_CPU=y
-CONFIG_X86_INTERNODE_CACHE_SHIFT=6
-CONFIG_X86_L1_CACHE_SHIFT=6
-CONFIG_X86_TSC=y
-CONFIG_X86_CMPXCHG64=y
-CONFIG_X86_CMOV=y
-CONFIG_X86_MINIMUM_CPU_FAMILY=64
-CONFIG_X86_DEBUGCTLMSR=y
-CONFIG_IA32_FEAT_CTL=y
-CONFIG_X86_VMX_FEATURE_NAMES=y
-CONFIG_PROCESSOR_SELECT=y
-CONFIG_CPU_SUP_INTEL=y
-CONFIG_CPU_SUP_AMD=y
-CONFIG_CPU_SUP_HYGON=y
-CONFIG_CPU_SUP_CENTAUR=y
-CONFIG_CPU_SUP_ZHAOXIN=y
-CONFIG_HPET_TIMER=y
-CONFIG_HPET_EMULATE_RTC=y
-CONFIG_DMI=y
-CONFIG_GART_IOMMU=y
-# CONFIG_MAXSMP is not set
-CONFIG_NR_CPUS_RANGE_BEGIN=2
-CONFIG_NR_CPUS_RANGE_END=512
-CONFIG_NR_CPUS_DEFAULT=64
-CONFIG_NR_CPUS=320
-CONFIG_SCHED_SMT=y
-CONFIG_SCHED_MC=y
-CONFIG_SCHED_MC_PRIO=y
-CONFIG_X86_LOCAL_APIC=y
-CONFIG_X86_IO_APIC=y
-CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
-CONFIG_X86_MCE=y
-# CONFIG_X86_MCELOG_LEGACY is not set
-CONFIG_X86_MCE_INTEL=y
-CONFIG_X86_MCE_AMD=y
-CONFIG_X86_MCE_THRESHOLD=y
-CONFIG_X86_MCE_INJECT=m
-CONFIG_X86_THERMAL_VECTOR=y
-
-#
-# Performance monitoring
-#
-CONFIG_PERF_EVENTS_INTEL_UNCORE=m
-CONFIG_PERF_EVENTS_INTEL_RAPL=m
-CONFIG_PERF_EVENTS_INTEL_CSTATE=m
-CONFIG_PERF_EVENTS_AMD_POWER=m
-# end of Performance monitoring
-
-CONFIG_X86_16BIT=y
-CONFIG_X86_ESPFIX64=y
-CONFIG_X86_VSYSCALL_EMULATION=y
-CONFIG_X86_IOPL_IOPERM=y
-CONFIG_I8K=m
-CONFIG_MICROCODE=y
-CONFIG_MICROCODE_INTEL=y
-CONFIG_MICROCODE_AMD=y
-CONFIG_MICROCODE_OLD_INTERFACE=y
-CONFIG_X86_MSR=m
-CONFIG_X86_CPUID=m
-CONFIG_X86_5LEVEL=y
-CONFIG_X86_DIRECT_GBPAGES=y
-# CONFIG_X86_CPA_STATISTICS is not set
-CONFIG_AMD_MEM_ENCRYPT=y
-# CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT is not set
-CONFIG_NUMA=y
-CONFIG_AMD_NUMA=y
-CONFIG_X86_64_ACPI_NUMA=y
-# CONFIG_NUMA_EMU is not set
-CONFIG_NODES_SHIFT=5
-CONFIG_ARCH_SPARSEMEM_ENABLE=y
-CONFIG_ARCH_SPARSEMEM_DEFAULT=y
-CONFIG_ARCH_SELECT_MEMORY_MODEL=y
-CONFIG_ARCH_MEMORY_PROBE=y
-CONFIG_ARCH_PROC_KCORE_TEXT=y
-CONFIG_ILLEGAL_POINTER_VALUE=0xdead000000000000
-CONFIG_X86_PMEM_LEGACY_DEVICE=y
-CONFIG_X86_PMEM_LEGACY=m
-CONFIG_X86_CHECK_BIOS_CORRUPTION=y
-CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK=y
-CONFIG_X86_RESERVE_LOW=64
-CONFIG_MTRR=y
-CONFIG_MTRR_SANITIZER=y
-CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT=1
-CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT=0
-CONFIG_X86_PAT=y
-CONFIG_ARCH_USES_PG_UNCACHED=y
-CONFIG_ARCH_RANDOM=y
-CONFIG_X86_SMAP=y
-CONFIG_X86_UMIP=y
-CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS=y
-# CONFIG_X86_INTEL_TSX_MODE_OFF is not set
-# CONFIG_X86_INTEL_TSX_MODE_ON is not set
-CONFIG_X86_INTEL_TSX_MODE_AUTO=y
-CONFIG_EFI=y
-CONFIG_EFI_STUB=y
-CONFIG_EFI_MIXED=y
-CONFIG_SECCOMP=y
-# CONFIG_HZ_100 is not set
-# CONFIG_HZ_250 is not set
-CONFIG_HZ_300=y
-# CONFIG_HZ_1000 is not set
-CONFIG_HZ=300
-CONFIG_SCHED_HRTICK=y
-CONFIG_KEXEC=y
-CONFIG_KEXEC_FILE=y
-CONFIG_ARCH_HAS_KEXEC_PURGATORY=y
-# CONFIG_KEXEC_SIG is not set
-CONFIG_CRASH_DUMP=y
-CONFIG_KEXEC_JUMP=y
-CONFIG_PHYSICAL_START=0x1000000
-CONFIG_RELOCATABLE=y
-CONFIG_RANDOMIZE_BASE=y
-CONFIG_X86_NEED_RELOCS=y
-CONFIG_PHYSICAL_ALIGN=0x200000
-CONFIG_DYNAMIC_MEMORY_LAYOUT=y
-CONFIG_RANDOMIZE_MEMORY=y
-CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING=0x1
-CONFIG_HOTPLUG_CPU=y
-# CONFIG_BOOTPARAM_HOTPLUG_CPU0 is not set
-# CONFIG_DEBUG_HOTPLUG_CPU0 is not set
-# CONFIG_COMPAT_VDSO is not set
-# CONFIG_LEGACY_VSYSCALL_EMULATE is not set
-CONFIG_LEGACY_VSYSCALL_XONLY=y
-# CONFIG_LEGACY_VSYSCALL_NONE is not set
-# CONFIG_CMDLINE_BOOL is not set
-CONFIG_MODIFY_LDT_SYSCALL=y
-CONFIG_HAVE_LIVEPATCH=y
-# CONFIG_LIVEPATCH is not set
-# end of Processor type and features
-
-CONFIG_ARCH_HAS_ADD_PAGES=y
-CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
-CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y
-CONFIG_USE_PERCPU_NUMA_NODE_ID=y
-CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK=y
-CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION=y
-CONFIG_ARCH_ENABLE_THP_MIGRATION=y
-
-#
-# Power management and ACPI options
-#
-CONFIG_ARCH_HIBERNATION_HEADER=y
-CONFIG_SUSPEND=y
-CONFIG_SUSPEND_FREEZER=y
-# CONFIG_SUSPEND_SKIP_SYNC is not set
-CONFIG_HIBERNATE_CALLBACKS=y
-CONFIG_HIBERNATION=y
-CONFIG_HIBERNATION_SNAPSHOT_DEV=y
-CONFIG_PM_STD_PARTITION=""
-CONFIG_PM_SLEEP=y
-CONFIG_PM_SLEEP_SMP=y
-CONFIG_PM_AUTOSLEEP=y
-CONFIG_PM_WAKELOCKS=y
-CONFIG_PM_WAKELOCKS_LIMIT=100
-CONFIG_PM_WAKELOCKS_GC=y
-CONFIG_PM=y
-CONFIG_PM_DEBUG=y
-CONFIG_PM_ADVANCED_DEBUG=y
-# CONFIG_PM_TEST_SUSPEND is not set
-CONFIG_PM_SLEEP_DEBUG=y
-# CONFIG_DPM_WATCHDOG is not set
-CONFIG_PM_TRACE=y
-CONFIG_PM_TRACE_RTC=y
-CONFIG_PM_CLK=y
-CONFIG_PM_GENERIC_DOMAINS=y
-CONFIG_WQ_POWER_EFFICIENT_DEFAULT=y
-CONFIG_PM_GENERIC_DOMAINS_SLEEP=y
-CONFIG_PM_GENERIC_DOMAINS_OF=y
-CONFIG_ENERGY_MODEL=y
-CONFIG_ARCH_SUPPORTS_ACPI=y
-CONFIG_ACPI=y
-CONFIG_ACPI_LEGACY_TABLES_LOOKUP=y
-CONFIG_ARCH_MIGHT_HAVE_ACPI_PDC=y
-CONFIG_ACPI_SYSTEM_POWER_STATES_SUPPORT=y
-# CONFIG_ACPI_DEBUGGER is not set
-CONFIG_ACPI_SPCR_TABLE=y
-CONFIG_ACPI_LPIT=y
-CONFIG_ACPI_SLEEP=y
-# CONFIG_ACPI_PROCFS_POWER is not set
-CONFIG_ACPI_REV_OVERRIDE_POSSIBLE=y
-CONFIG_ACPI_EC_DEBUGFS=y
-CONFIG_ACPI_AC=m
-CONFIG_ACPI_BATTERY=m
-CONFIG_ACPI_BUTTON=y
-CONFIG_ACPI_VIDEO=y
-CONFIG_ACPI_FAN=y
-CONFIG_ACPI_TAD=m
-CONFIG_ACPI_DOCK=y
-CONFIG_ACPI_CPU_FREQ_PSS=y
-CONFIG_ACPI_PROCESSOR_CSTATE=y
-CONFIG_ACPI_PROCESSOR_IDLE=y
-CONFIG_ACPI_CPPC_LIB=y
-CONFIG_ACPI_PROCESSOR=y
-CONFIG_ACPI_IPMI=m
-CONFIG_ACPI_HOTPLUG_CPU=y
-CONFIG_ACPI_PROCESSOR_AGGREGATOR=y
-CONFIG_ACPI_THERMAL=y
-CONFIG_ARCH_HAS_ACPI_TABLE_UPGRADE=y
-CONFIG_ACPI_TABLE_UPGRADE=y
-CONFIG_ACPI_DEBUG=y
-CONFIG_ACPI_PCI_SLOT=y
-CONFIG_ACPI_CONTAINER=y
-CONFIG_ACPI_HOTPLUG_MEMORY=y
-CONFIG_ACPI_HOTPLUG_IOAPIC=y
-CONFIG_ACPI_SBS=m
-CONFIG_ACPI_HED=y
-CONFIG_ACPI_CUSTOM_METHOD=m
-CONFIG_ACPI_BGRT=y
-# CONFIG_ACPI_REDUCED_HARDWARE_ONLY is not set
-CONFIG_ACPI_NFIT=m
-# CONFIG_NFIT_SECURITY_DEBUG is not set
-CONFIG_ACPI_NUMA=y
-CONFIG_ACPI_HMAT=y
-CONFIG_HAVE_ACPI_APEI=y
-CONFIG_HAVE_ACPI_APEI_NMI=y
-CONFIG_ACPI_APEI=y
-CONFIG_ACPI_APEI_GHES=y
-CONFIG_ACPI_APEI_PCIEAER=y
-CONFIG_ACPI_APEI_MEMORY_FAILURE=y
-CONFIG_ACPI_APEI_EINJ=m
-CONFIG_ACPI_APEI_ERST_DEBUG=m
-CONFIG_DPTF_POWER=m
-CONFIG_ACPI_WATCHDOG=y
-CONFIG_ACPI_EXTLOG=m
-CONFIG_ACPI_ADXL=y
-CONFIG_PMIC_OPREGION=y
-CONFIG_BYTCRC_PMIC_OPREGION=y
-CONFIG_CHTCRC_PMIC_OPREGION=y
-CONFIG_XPOWER_PMIC_OPREGION=y
-CONFIG_BXT_WC_PMIC_OPREGION=y
-CONFIG_CHT_WC_PMIC_OPREGION=y
-CONFIG_CHT_DC_TI_PMIC_OPREGION=y
-CONFIG_ACPI_CONFIGFS=m
-CONFIG_TPS68470_PMIC_OPREGION=y
-CONFIG_X86_PM_TIMER=y
-CONFIG_SFI=y
-
-#
-# CPU Frequency scaling
-#
-CONFIG_CPU_FREQ=y
-CONFIG_CPU_FREQ_GOV_ATTR_SET=y
-CONFIG_CPU_FREQ_GOV_COMMON=y
-CONFIG_CPU_FREQ_STAT=y
-# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set
-# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set
-# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set
-# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set
-# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set
-CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL=y
-CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
-CONFIG_CPU_FREQ_GOV_POWERSAVE=m
-CONFIG_CPU_FREQ_GOV_USERSPACE=m
-CONFIG_CPU_FREQ_GOV_ONDEMAND=m
-CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m
-CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y
-
-#
-# CPU frequency scaling drivers
-#
-CONFIG_CPUFREQ_DT=m
-CONFIG_CPUFREQ_DT_PLATDEV=y
-CONFIG_X86_INTEL_PSTATE=y
-CONFIG_X86_PCC_CPUFREQ=m
-CONFIG_X86_ACPI_CPUFREQ=m
-CONFIG_X86_ACPI_CPUFREQ_CPB=y
-CONFIG_X86_POWERNOW_K8=m
-CONFIG_X86_AMD_FREQ_SENSITIVITY=m
-# CONFIG_X86_SPEEDSTEP_CENTRINO is not set
-CONFIG_X86_P4_CLOCKMOD=m
-
-#
-# shared options
-#
-CONFIG_X86_SPEEDSTEP_LIB=m
-# end of CPU Frequency scaling
-
-#
-# CPU Idle
-#
-CONFIG_CPU_IDLE=y
-CONFIG_CPU_IDLE_GOV_LADDER=y
-CONFIG_CPU_IDLE_GOV_MENU=y
-CONFIG_CPU_IDLE_GOV_TEO=y
-CONFIG_CPU_IDLE_GOV_HALTPOLL=y
-CONFIG_HALTPOLL_CPUIDLE=m
-# end of CPU Idle
-
-CONFIG_INTEL_IDLE=y
-# end of Power management and ACPI options
-
-#
-# Bus options (PCI etc.)
-#
-CONFIG_PCI_DIRECT=y
-CONFIG_PCI_MMCONFIG=y
-CONFIG_PCI_XEN=y
-CONFIG_MMCONF_FAM10H=y
-# CONFIG_PCI_CNB20LE_QUIRK is not set
-# CONFIG_ISA_BUS is not set
-CONFIG_ISA_DMA_API=y
-CONFIG_AMD_NB=y
-# CONFIG_X86_SYSFB is not set
-# end of Bus options (PCI etc.)
-
-#
-# Binary Emulations
-#
-CONFIG_IA32_EMULATION=y
-# CONFIG_X86_X32 is not set
-CONFIG_COMPAT_32=y
-CONFIG_COMPAT=y
-CONFIG_COMPAT_FOR_U64_ALIGNMENT=y
-CONFIG_SYSVIPC_COMPAT=y
-# end of Binary Emulations
-
-#
-# Firmware Drivers
-#
-CONFIG_EDD=m
-# CONFIG_EDD_OFF is not set
-CONFIG_FIRMWARE_MEMMAP=y
-CONFIG_DMIID=y
-CONFIG_DMI_SYSFS=m
-CONFIG_DMI_SCAN_MACHINE_NON_EFI_FALLBACK=y
-CONFIG_ISCSI_IBFT_FIND=y
-CONFIG_ISCSI_IBFT=m
-CONFIG_FW_CFG_SYSFS=m
-# CONFIG_FW_CFG_SYSFS_CMDLINE is not set
-CONFIG_GOOGLE_FIRMWARE=y
-# CONFIG_GOOGLE_SMI is not set
-CONFIG_GOOGLE_COREBOOT_TABLE=m
-CONFIG_GOOGLE_MEMCONSOLE=m
-# CONFIG_GOOGLE_MEMCONSOLE_X86_LEGACY is not set
-CONFIG_GOOGLE_FRAMEBUFFER_COREBOOT=m
-CONFIG_GOOGLE_MEMCONSOLE_COREBOOT=m
-CONFIG_GOOGLE_VPD=m
-
-#
-# EFI (Extensible Firmware Interface) Support
-#
-# CONFIG_EFI_VARS is not set
-CONFIG_EFI_ESRT=y
-CONFIG_EFI_RUNTIME_MAP=y
-# CONFIG_EFI_FAKE_MEMMAP is not set
-CONFIG_EFI_SOFT_RESERVE=y
-CONFIG_EFI_RUNTIME_WRAPPERS=y
-CONFIG_EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER=y
-CONFIG_EFI_CAPSULE_LOADER=m
-# CONFIG_EFI_TEST is not set
-CONFIG_APPLE_PROPERTIES=y
-# CONFIG_RESET_ATTACK_MITIGATION is not set
-CONFIG_EFI_RCI2_TABLE=y
-# CONFIG_EFI_DISABLE_PCI_DMA is not set
-# end of EFI (Extensible Firmware Interface) Support
-
-CONFIG_EFI_EMBEDDED_FIRMWARE=y
-CONFIG_UEFI_CPER=y
-CONFIG_UEFI_CPER_X86=y
-CONFIG_EFI_DEV_PATH_PARSER=y
-CONFIG_EFI_EARLYCON=y
-
-#
-# Tegra firmware driver
-#
-# end of Tegra firmware driver
-# end of Firmware Drivers
-
-CONFIG_HAVE_KVM=y
-CONFIG_HAVE_KVM_IRQCHIP=y
-CONFIG_HAVE_KVM_IRQFD=y
-CONFIG_HAVE_KVM_IRQ_ROUTING=y
-CONFIG_HAVE_KVM_EVENTFD=y
-CONFIG_KVM_MMIO=y
-CONFIG_KVM_ASYNC_PF=y
-CONFIG_HAVE_KVM_MSI=y
-CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT=y
-CONFIG_KVM_VFIO=y
-CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT=y
-CONFIG_KVM_COMPAT=y
-CONFIG_HAVE_KVM_IRQ_BYPASS=y
-CONFIG_HAVE_KVM_NO_POLL=y
-CONFIG_VIRTUALIZATION=y
-CONFIG_KVM=m
-CONFIG_KVM_WERROR=y
-CONFIG_KVM_INTEL=m
-CONFIG_KVM_AMD=m
-CONFIG_KVM_AMD_SEV=y
-CONFIG_KVM_MMU_AUDIT=y
-CONFIG_AS_AVX512=y
-CONFIG_AS_SHA1_NI=y
-CONFIG_AS_SHA256_NI=y
-CONFIG_AS_TPAUSE=y
-
-#
-# General architecture-dependent options
-#
-CONFIG_CRASH_CORE=y
-CONFIG_KEXEC_CORE=y
-CONFIG_HOTPLUG_SMT=y
-CONFIG_OPROFILE=m
-# CONFIG_OPROFILE_EVENT_MULTIPLEX is not set
-CONFIG_HAVE_OPROFILE=y
-CONFIG_OPROFILE_NMI_TIMER=y
-CONFIG_KPROBES=y
-CONFIG_JUMP_LABEL=y
-# CONFIG_STATIC_KEYS_SELFTEST is not set
-CONFIG_OPTPROBES=y
-CONFIG_KPROBES_ON_FTRACE=y
-CONFIG_UPROBES=y
-CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y
-CONFIG_ARCH_USE_BUILTIN_BSWAP=y
-CONFIG_KRETPROBES=y
-CONFIG_USER_RETURN_NOTIFIER=y
-CONFIG_HAVE_IOREMAP_PROT=y
-CONFIG_HAVE_KPROBES=y
-CONFIG_HAVE_KRETPROBES=y
-CONFIG_HAVE_OPTPROBES=y
-CONFIG_HAVE_KPROBES_ON_FTRACE=y
-CONFIG_HAVE_FUNCTION_ERROR_INJECTION=y
-CONFIG_HAVE_NMI=y
-CONFIG_HAVE_ARCH_TRACEHOOK=y
-CONFIG_HAVE_DMA_CONTIGUOUS=y
-CONFIG_GENERIC_SMP_IDLE_THREAD=y
-CONFIG_ARCH_HAS_FORTIFY_SOURCE=y
-CONFIG_ARCH_HAS_SET_MEMORY=y
-CONFIG_ARCH_HAS_SET_DIRECT_MAP=y
-CONFIG_HAVE_ARCH_THREAD_STRUCT_WHITELIST=y
-CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT=y
-CONFIG_HAVE_ASM_MODVERSIONS=y
-CONFIG_HAVE_REGS_AND_STACK_ACCESS_API=y
-CONFIG_HAVE_RSEQ=y
-CONFIG_HAVE_FUNCTION_ARG_ACCESS_API=y
-CONFIG_HAVE_HW_BREAKPOINT=y
-CONFIG_HAVE_MIXED_BREAKPOINTS_REGS=y
-CONFIG_HAVE_USER_RETURN_NOTIFIER=y
-CONFIG_HAVE_PERF_EVENTS_NMI=y
-CONFIG_HAVE_HARDLOCKUP_DETECTOR_PERF=y
-CONFIG_HAVE_PERF_REGS=y
-CONFIG_HAVE_PERF_USER_STACK_DUMP=y
-CONFIG_HAVE_ARCH_JUMP_LABEL=y
-CONFIG_HAVE_ARCH_JUMP_LABEL_RELATIVE=y
-CONFIG_MMU_GATHER_TABLE_FREE=y
-CONFIG_MMU_GATHER_RCU_TABLE_FREE=y
-CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG=y
-CONFIG_HAVE_ALIGNED_STRUCT_PAGE=y
-CONFIG_HAVE_CMPXCHG_LOCAL=y
-CONFIG_HAVE_CMPXCHG_DOUBLE=y
-CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION=y
-CONFIG_ARCH_WANT_OLD_COMPAT_IPC=y
-CONFIG_HAVE_ARCH_SECCOMP_FILTER=y
-CONFIG_SECCOMP_FILTER=y
-CONFIG_HAVE_ARCH_STACKLEAK=y
-CONFIG_HAVE_STACKPROTECTOR=y
-CONFIG_CC_HAS_STACKPROTECTOR_NONE=y
-CONFIG_STACKPROTECTOR=y
-CONFIG_STACKPROTECTOR_STRONG=y
-CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES=y
-CONFIG_HAVE_CONTEXT_TRACKING=y
-CONFIG_HAVE_VIRT_CPU_ACCOUNTING_GEN=y
-CONFIG_HAVE_IRQ_TIME_ACCOUNTING=y
-CONFIG_HAVE_MOVE_PMD=y
-CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE=y
-CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD=y
-CONFIG_HAVE_ARCH_HUGE_VMAP=y
-CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y
-CONFIG_HAVE_ARCH_SOFT_DIRTY=y
-CONFIG_HAVE_MOD_ARCH_SPECIFIC=y
-CONFIG_MODULES_USE_ELF_RELA=y
-CONFIG_ARCH_HAS_ELF_RANDOMIZE=y
-CONFIG_HAVE_ARCH_MMAP_RND_BITS=y
-CONFIG_HAVE_EXIT_THREAD=y
-CONFIG_ARCH_MMAP_RND_BITS=28
-CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS=y
-CONFIG_ARCH_MMAP_RND_COMPAT_BITS=8
-CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES=y
-CONFIG_HAVE_COPY_THREAD_TLS=y
-CONFIG_HAVE_STACK_VALIDATION=y
-CONFIG_HAVE_RELIABLE_STACKTRACE=y
-CONFIG_ISA_BUS_API=y
-CONFIG_OLD_SIGSUSPEND3=y
-CONFIG_COMPAT_OLD_SIGACTION=y
-CONFIG_COMPAT_32BIT_TIME=y
-CONFIG_HAVE_ARCH_VMAP_STACK=y
-CONFIG_VMAP_STACK=y
-CONFIG_ARCH_HAS_STRICT_KERNEL_RWX=y
-CONFIG_STRICT_KERNEL_RWX=y
-CONFIG_ARCH_HAS_STRICT_MODULE_RWX=y
-CONFIG_STRICT_MODULE_RWX=y
-CONFIG_HAVE_ARCH_PREL32_RELOCATIONS=y
-CONFIG_ARCH_USE_MEMREMAP_PROT=y
-CONFIG_LOCK_EVENT_COUNTS=y
-CONFIG_ARCH_HAS_MEM_ENCRYPT=y
-
-#
-# GCOV-based kernel profiling
-#
-# CONFIG_GCOV_KERNEL is not set
-CONFIG_ARCH_HAS_GCOV_PROFILE_ALL=y
-# end of GCOV-based kernel profiling
-
-CONFIG_HAVE_GCC_PLUGINS=y
-CONFIG_GCC_PLUGINS=y
-# CONFIG_GCC_PLUGIN_CYC_COMPLEXITY is not set
-# CONFIG_GCC_PLUGIN_LATENT_ENTROPY is not set
-# CONFIG_GCC_PLUGIN_RANDSTRUCT is not set
-# end of General architecture-dependent options
-
-CONFIG_RT_MUTEXES=y
-CONFIG_BASE_SMALL=0
-CONFIG_MODULE_SIG_FORMAT=y
-CONFIG_MODULES=y
-CONFIG_MODULE_FORCE_LOAD=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODULE_FORCE_UNLOAD=y
-# CONFIG_MODVERSIONS is not set
-CONFIG_MODULE_SRCVERSION_ALL=y
-CONFIG_MODULE_SIG=y
-# CONFIG_MODULE_SIG_FORCE is not set
-CONFIG_MODULE_SIG_ALL=y
-# CONFIG_MODULE_SIG_SHA1 is not set
-# CONFIG_MODULE_SIG_SHA224 is not set
-# CONFIG_MODULE_SIG_SHA256 is not set
-# CONFIG_MODULE_SIG_SHA384 is not set
-CONFIG_MODULE_SIG_SHA512=y
-CONFIG_MODULE_SIG_HASH="sha512"
-CONFIG_MODULE_COMPRESS=y
-# CONFIG_MODULE_COMPRESS_GZIP is not set
-CONFIG_MODULE_COMPRESS_XZ=y
-CONFIG_MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS=y
-CONFIG_UNUSED_SYMBOLS=y
-CONFIG_MODULES_TREE_LOOKUP=y
-CONFIG_BLOCK=y
-CONFIG_BLK_RQ_ALLOC_TIME=y
-CONFIG_BLK_SCSI_REQUEST=y
-CONFIG_BLK_CGROUP_RWSTAT=y
-CONFIG_BLK_DEV_BSG=y
-CONFIG_BLK_DEV_BSGLIB=y
-CONFIG_BLK_DEV_INTEGRITY=y
-CONFIG_BLK_DEV_INTEGRITY_T10=y
-CONFIG_BLK_DEV_ZONED=y
-CONFIG_BLK_DEV_THROTTLING=y
-CONFIG_BLK_DEV_THROTTLING_LOW=y
-# CONFIG_BLK_CMDLINE_PARSER is not set
-CONFIG_BLK_WBT=y
-CONFIG_BLK_CGROUP_IOLATENCY=y
-CONFIG_BLK_CGROUP_IOCOST=y
-CONFIG_BLK_WBT_MQ=y
-CONFIG_BLK_DEBUG_FS=y
-CONFIG_BLK_DEBUG_FS_ZONED=y
-CONFIG_BLK_SED_OPAL=y
-CONFIG_BLK_INLINE_ENCRYPTION=y
-CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK=y
-
-#
-# Partition Types
-#
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_ACORN_PARTITION is not set
-CONFIG_AIX_PARTITION=y
-# CONFIG_OSF_PARTITION is not set
-# CONFIG_AMIGA_PARTITION is not set
-# CONFIG_ATARI_PARTITION is not set
-CONFIG_MAC_PARTITION=y
-CONFIG_MSDOS_PARTITION=y
-CONFIG_BSD_DISKLABEL=y
-CONFIG_MINIX_SUBPARTITION=y
-CONFIG_SOLARIS_X86_PARTITION=y
-# CONFIG_UNIXWARE_DISKLABEL is not set
-CONFIG_LDM_PARTITION=y
-# CONFIG_LDM_DEBUG is not set
-# CONFIG_SGI_PARTITION is not set
-# CONFIG_ULTRIX_PARTITION is not set
-# CONFIG_SUN_PARTITION is not set
-CONFIG_KARMA_PARTITION=y
-CONFIG_EFI_PARTITION=y
-# CONFIG_SYSV68_PARTITION is not set
-# CONFIG_CMDLINE_PARTITION is not set
-# end of Partition Types
-
-CONFIG_BLOCK_COMPAT=y
-CONFIG_BLK_MQ_PCI=y
-CONFIG_BLK_MQ_VIRTIO=y
-CONFIG_BLK_MQ_RDMA=y
-CONFIG_BLK_PM=y
-
-#
-# IO Schedulers
-#
-CONFIG_MQ_IOSCHED_DEADLINE=y
-CONFIG_MQ_IOSCHED_KYBER=y
-CONFIG_IOSCHED_BFQ=y
-CONFIG_BFQ_GROUP_IOSCHED=y
-# CONFIG_BFQ_CGROUP_DEBUG is not set
-# end of IO Schedulers
-
-CONFIG_PREEMPT_NOTIFIERS=y
-CONFIG_PADATA=y
-CONFIG_ASN1=y
-CONFIG_UNINLINE_SPIN_UNLOCK=y
-CONFIG_ARCH_SUPPORTS_ATOMIC_RMW=y
-CONFIG_MUTEX_SPIN_ON_OWNER=y
-CONFIG_RWSEM_SPIN_ON_OWNER=y
-CONFIG_LOCK_SPIN_ON_OWNER=y
-CONFIG_ARCH_USE_QUEUED_SPINLOCKS=y
-CONFIG_QUEUED_SPINLOCKS=y
-CONFIG_ARCH_USE_QUEUED_RWLOCKS=y
-CONFIG_QUEUED_RWLOCKS=y
-CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE=y
-CONFIG_ARCH_HAS_SYNC_CORE_BEFORE_USERMODE=y
-CONFIG_ARCH_HAS_SYSCALL_WRAPPER=y
-CONFIG_FREEZER=y
-
-#
-# Executable file formats
-#
-CONFIG_BINFMT_ELF=y
-CONFIG_COMPAT_BINFMT_ELF=y
-CONFIG_ELFCORE=y
-CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
-CONFIG_BINFMT_SCRIPT=y
-CONFIG_BINFMT_MISC=y
-CONFIG_COREDUMP=y
-# end of Executable file formats
-
-#
-# Memory Management options
-#
-CONFIG_SELECT_MEMORY_MODEL=y
-CONFIG_SPARSEMEM_MANUAL=y
-CONFIG_SPARSEMEM=y
-CONFIG_NEED_MULTIPLE_NODES=y
-CONFIG_HAVE_MEMORY_PRESENT=y
-CONFIG_SPARSEMEM_EXTREME=y
-CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
-CONFIG_SPARSEMEM_VMEMMAP=y
-CONFIG_HAVE_FAST_GUP=y
-CONFIG_NUMA_KEEP_MEMINFO=y
-CONFIG_MEMORY_ISOLATION=y
-CONFIG_HAVE_BOOTMEM_INFO_NODE=y
-CONFIG_MEMORY_HOTPLUG=y
-CONFIG_MEMORY_HOTPLUG_SPARSE=y
-CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE=y
-CONFIG_MEMORY_HOTREMOVE=y
-CONFIG_SPLIT_PTLOCK_CPUS=4
-CONFIG_MEMORY_BALLOON=y
-CONFIG_BALLOON_COMPACTION=y
-CONFIG_COMPACTION=y
-CONFIG_PAGE_REPORTING=y
-CONFIG_MIGRATION=y
-CONFIG_CONTIG_ALLOC=y
-CONFIG_PHYS_ADDR_T_64BIT=y
-CONFIG_BOUNCE=y
-CONFIG_VIRT_TO_BUS=y
-CONFIG_MMU_NOTIFIER=y
-CONFIG_KSM=y
-CONFIG_DEFAULT_MMAP_MIN_ADDR=65536
-CONFIG_ARCH_SUPPORTS_MEMORY_FAILURE=y
-CONFIG_MEMORY_FAILURE=y
-CONFIG_HWPOISON_INJECT=m
-CONFIG_TRANSPARENT_HUGEPAGE=y
-# CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS is not set
-CONFIG_TRANSPARENT_HUGEPAGE_MADVISE=y
-CONFIG_ARCH_WANTS_THP_SWAP=y
-CONFIG_THP_SWAP=y
-CONFIG_CLEANCACHE=y
-CONFIG_FRONTSWAP=y
-# CONFIG_CMA is not set
-CONFIG_MEM_SOFT_DIRTY=y
-CONFIG_ZSWAP=y
-# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_DEFLATE is not set
-# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZO is not set
-# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_842 is not set
-CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZ4=y
-# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZ4HC is not set
-# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_ZSTD is not set
-CONFIG_ZSWAP_COMPRESSOR_DEFAULT="lz4"
-# CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD is not set
-CONFIG_ZSWAP_ZPOOL_DEFAULT_Z3FOLD=y
-# CONFIG_ZSWAP_ZPOOL_DEFAULT_ZSMALLOC is not set
-CONFIG_ZSWAP_ZPOOL_DEFAULT="z3fold"
-CONFIG_ZSWAP_DEFAULT_ON=y
-CONFIG_ZPOOL=y
-CONFIG_ZBUD=y
-CONFIG_Z3FOLD=y
-CONFIG_ZSMALLOC=y
-# CONFIG_ZSMALLOC_PGTABLE_MAPPING is not set
-# CONFIG_ZSMALLOC_STAT is not set
-CONFIG_GENERIC_EARLY_IOREMAP=y
-# CONFIG_DEFERRED_STRUCT_PAGE_INIT is not set
-# CONFIG_IDLE_PAGE_TRACKING is not set
-CONFIG_ARCH_HAS_PTE_DEVMAP=y
-CONFIG_ZONE_DEVICE=y
-CONFIG_DEV_PAGEMAP_OPS=y
-CONFIG_HMM_MIRROR=y
-CONFIG_DEVICE_PRIVATE=y
-CONFIG_FRAME_VECTOR=y
-CONFIG_ARCH_USES_HIGH_VMA_FLAGS=y
-CONFIG_ARCH_HAS_PKEYS=y
-# CONFIG_PERCPU_STATS is not set
-# CONFIG_GUP_BENCHMARK is not set
-CONFIG_READ_ONLY_THP_FOR_FS=y
-CONFIG_ARCH_HAS_PTE_SPECIAL=y
-CONFIG_MAPPING_DIRTY_HELPERS=y
-# end of Memory Management options
-
-CONFIG_NET=y
-CONFIG_COMPAT_NETLINK_MESSAGES=y
-CONFIG_NET_INGRESS=y
-CONFIG_NET_EGRESS=y
-CONFIG_NET_REDIRECT=y
-CONFIG_SKB_EXTENSIONS=y
-
-#
-# Networking options
-#
-CONFIG_PACKET=y
-CONFIG_PACKET_DIAG=y
-CONFIG_UNIX=y
-CONFIG_UNIX_SCM=y
-CONFIG_UNIX_DIAG=y
-CONFIG_TLS=m
-CONFIG_TLS_DEVICE=y
-# CONFIG_TLS_TOE is not set
-CONFIG_XFRM=y
-CONFIG_XFRM_OFFLOAD=y
-CONFIG_XFRM_ALGO=m
-CONFIG_XFRM_USER=m
-CONFIG_XFRM_INTERFACE=m
-CONFIG_XFRM_SUB_POLICY=y
-CONFIG_XFRM_MIGRATE=y
-CONFIG_XFRM_STATISTICS=y
-CONFIG_XFRM_AH=m
-CONFIG_XFRM_ESP=m
-CONFIG_XFRM_IPCOMP=m
-CONFIG_NET_KEY=m
-CONFIG_NET_KEY_MIGRATE=y
-CONFIG_XFRM_ESPINTCP=y
-CONFIG_SMC=m
-CONFIG_SMC_DIAG=m
-CONFIG_XDP_SOCKETS=y
-CONFIG_XDP_SOCKETS_DIAG=y
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_ADVANCED_ROUTER=y
-# CONFIG_IP_FIB_TRIE_STATS is not set
-CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_ROUTE_MULTIPATH=y
-CONFIG_IP_ROUTE_VERBOSE=y
-CONFIG_IP_ROUTE_CLASSID=y
-# CONFIG_IP_PNP is not set
-CONFIG_NET_IPIP=m
-CONFIG_NET_IPGRE_DEMUX=m
-CONFIG_NET_IP_TUNNEL=m
-CONFIG_NET_IPGRE=m
-# CONFIG_NET_IPGRE_BROADCAST is not set
-CONFIG_IP_MROUTE_COMMON=y
-CONFIG_IP_MROUTE=y
-CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
-CONFIG_IP_PIMSM_V1=y
-CONFIG_IP_PIMSM_V2=y
-CONFIG_SYN_COOKIES=y
-CONFIG_NET_IPVTI=m
-CONFIG_NET_UDP_TUNNEL=m
-CONFIG_NET_FOU=m
-CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_INET_AH=m
-CONFIG_INET_ESP=m
-CONFIG_INET_ESP_OFFLOAD=m
-CONFIG_INET_ESPINTCP=y
-CONFIG_INET_IPCOMP=m
-CONFIG_INET_XFRM_TUNNEL=m
-CONFIG_INET_TUNNEL=m
-CONFIG_INET_DIAG=m
-CONFIG_INET_TCP_DIAG=m
-CONFIG_INET_UDP_DIAG=m
-CONFIG_INET_RAW_DIAG=m
-CONFIG_INET_DIAG_DESTROY=y
-CONFIG_TCP_CONG_ADVANCED=y
-CONFIG_TCP_CONG_BIC=m
-CONFIG_TCP_CONG_CUBIC=y
-CONFIG_TCP_CONG_WESTWOOD=m
-CONFIG_TCP_CONG_HTCP=m
-CONFIG_TCP_CONG_HSTCP=m
-CONFIG_TCP_CONG_HYBLA=m
-CONFIG_TCP_CONG_VEGAS=m
-CONFIG_TCP_CONG_NV=m
-CONFIG_TCP_CONG_SCALABLE=m
-CONFIG_TCP_CONG_LP=m
-CONFIG_TCP_CONG_VENO=m
-CONFIG_TCP_CONG_YEAH=m
-CONFIG_TCP_CONG_ILLINOIS=m
-CONFIG_TCP_CONG_DCTCP=m
-CONFIG_TCP_CONG_CDG=m
-CONFIG_TCP_CONG_BBR=m
-CONFIG_DEFAULT_CUBIC=y
-# CONFIG_DEFAULT_RENO is not set
-CONFIG_DEFAULT_TCP_CONG="cubic"
-CONFIG_TCP_MD5SIG=y
-CONFIG_IPV6=y
-CONFIG_IPV6_ROUTER_PREF=y
-CONFIG_IPV6_ROUTE_INFO=y
-CONFIG_IPV6_OPTIMISTIC_DAD=y
-CONFIG_INET6_AH=m
-CONFIG_INET6_ESP=m
-CONFIG_INET6_ESP_OFFLOAD=m
-CONFIG_INET6_ESPINTCP=y
-CONFIG_INET6_IPCOMP=m
-CONFIG_IPV6_MIP6=m
-CONFIG_IPV6_ILA=m
-CONFIG_INET6_XFRM_TUNNEL=m
-CONFIG_INET6_TUNNEL=m
-CONFIG_IPV6_VTI=m
-CONFIG_IPV6_SIT=m
-CONFIG_IPV6_SIT_6RD=y
-CONFIG_IPV6_NDISC_NODETYPE=y
-CONFIG_IPV6_TUNNEL=m
-CONFIG_IPV6_GRE=m
-CONFIG_IPV6_FOU=m
-CONFIG_IPV6_FOU_TUNNEL=m
-CONFIG_IPV6_MULTIPLE_TABLES=y
-CONFIG_IPV6_SUBTREES=y
-CONFIG_IPV6_MROUTE=y
-CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y
-CONFIG_IPV6_PIMSM_V2=y
-CONFIG_IPV6_SEG6_LWTUNNEL=y
-CONFIG_IPV6_SEG6_HMAC=y
-CONFIG_IPV6_SEG6_BPF=y
-CONFIG_IPV6_RPL_LWTUNNEL=y
-CONFIG_NETLABEL=y
-CONFIG_MPTCP=y
-CONFIG_MPTCP_IPV6=y
-# CONFIG_MPTCP_HMAC_TEST is not set
-CONFIG_NETWORK_SECMARK=y
-CONFIG_NET_PTP_CLASSIFY=y
-CONFIG_NETWORK_PHY_TIMESTAMPING=y
-CONFIG_NETFILTER=y
-CONFIG_NETFILTER_ADVANCED=y
-CONFIG_BRIDGE_NETFILTER=m
-
-#
-# Core Netfilter Configuration
-#
-CONFIG_NETFILTER_INGRESS=y
-CONFIG_NETFILTER_NETLINK=m
-CONFIG_NETFILTER_FAMILY_BRIDGE=y
-CONFIG_NETFILTER_FAMILY_ARP=y
-CONFIG_NETFILTER_NETLINK_ACCT=m
-CONFIG_NETFILTER_NETLINK_QUEUE=m
-CONFIG_NETFILTER_NETLINK_LOG=m
-CONFIG_NETFILTER_NETLINK_OSF=m
-CONFIG_NF_CONNTRACK=m
-CONFIG_NF_LOG_COMMON=m
-CONFIG_NF_LOG_NETDEV=m
-CONFIG_NETFILTER_CONNCOUNT=m
-CONFIG_NF_CONNTRACK_MARK=y
-CONFIG_NF_CONNTRACK_SECMARK=y
-CONFIG_NF_CONNTRACK_ZONES=y
-CONFIG_NF_CONNTRACK_PROCFS=y
-CONFIG_NF_CONNTRACK_EVENTS=y
-CONFIG_NF_CONNTRACK_TIMEOUT=y
-CONFIG_NF_CONNTRACK_TIMESTAMP=y
-CONFIG_NF_CONNTRACK_LABELS=y
-CONFIG_NF_CT_PROTO_DCCP=y
-CONFIG_NF_CT_PROTO_GRE=y
-CONFIG_NF_CT_PROTO_SCTP=y
-CONFIG_NF_CT_PROTO_UDPLITE=y
-CONFIG_NF_CONNTRACK_AMANDA=m
-CONFIG_NF_CONNTRACK_FTP=m
-CONFIG_NF_CONNTRACK_H323=m
-CONFIG_NF_CONNTRACK_IRC=m
-CONFIG_NF_CONNTRACK_BROADCAST=m
-CONFIG_NF_CONNTRACK_NETBIOS_NS=m
-CONFIG_NF_CONNTRACK_SNMP=m
-CONFIG_NF_CONNTRACK_PPTP=m
-CONFIG_NF_CONNTRACK_SANE=m
-CONFIG_NF_CONNTRACK_SIP=m
-CONFIG_NF_CONNTRACK_TFTP=m
-CONFIG_NF_CT_NETLINK=m
-CONFIG_NF_CT_NETLINK_TIMEOUT=m
-CONFIG_NF_CT_NETLINK_HELPER=m
-CONFIG_NETFILTER_NETLINK_GLUE_CT=y
-CONFIG_NF_NAT=m
-CONFIG_NF_NAT_AMANDA=m
-CONFIG_NF_NAT_FTP=m
-CONFIG_NF_NAT_IRC=m
-CONFIG_NF_NAT_SIP=m
-CONFIG_NF_NAT_TFTP=m
-CONFIG_NF_NAT_REDIRECT=y
-CONFIG_NF_NAT_MASQUERADE=y
-CONFIG_NETFILTER_SYNPROXY=m
-CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_INET=y
-CONFIG_NF_TABLES_NETDEV=y
-CONFIG_NFT_NUMGEN=m
-CONFIG_NFT_CT=m
-CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NFT_COUNTER=m
-CONFIG_NFT_CONNLIMIT=m
-CONFIG_NFT_LOG=m
-CONFIG_NFT_LIMIT=m
-CONFIG_NFT_MASQ=m
-CONFIG_NFT_REDIR=m
-CONFIG_NFT_NAT=m
-CONFIG_NFT_TUNNEL=m
-CONFIG_NFT_OBJREF=m
-CONFIG_NFT_QUEUE=m
-CONFIG_NFT_QUOTA=m
-CONFIG_NFT_REJECT=m
-CONFIG_NFT_REJECT_INET=m
-CONFIG_NFT_COMPAT=m
-CONFIG_NFT_HASH=m
-CONFIG_NFT_FIB=m
-CONFIG_NFT_FIB_INET=m
-CONFIG_NFT_XFRM=m
-CONFIG_NFT_SOCKET=m
-CONFIG_NFT_OSF=m
-CONFIG_NFT_TPROXY=m
-CONFIG_NFT_SYNPROXY=m
-CONFIG_NF_DUP_NETDEV=m
-CONFIG_NFT_DUP_NETDEV=m
-CONFIG_NFT_FWD_NETDEV=m
-CONFIG_NFT_FIB_NETDEV=m
-CONFIG_NF_FLOW_TABLE_INET=m
-CONFIG_NF_FLOW_TABLE=m
-CONFIG_NETFILTER_XTABLES=m
-
-#
-# Xtables combined modules
-#
-CONFIG_NETFILTER_XT_MARK=m
-CONFIG_NETFILTER_XT_CONNMARK=m
-CONFIG_NETFILTER_XT_SET=m
-
-#
-# Xtables targets
-#
-CONFIG_NETFILTER_XT_TARGET_AUDIT=m
-CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
-CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
-CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
-CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
-CONFIG_NETFILTER_XT_TARGET_CT=m
-CONFIG_NETFILTER_XT_TARGET_DSCP=m
-CONFIG_NETFILTER_XT_TARGET_HL=m
-CONFIG_NETFILTER_XT_TARGET_HMARK=m
-CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
-CONFIG_NETFILTER_XT_TARGET_LED=m
-CONFIG_NETFILTER_XT_TARGET_LOG=m
-CONFIG_NETFILTER_XT_TARGET_MARK=m
-CONFIG_NETFILTER_XT_NAT=m
-CONFIG_NETFILTER_XT_TARGET_NETMAP=m
-CONFIG_NETFILTER_XT_TARGET_NFLOG=m
-CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
-CONFIG_NETFILTER_XT_TARGET_NOTRACK=m
-CONFIG_NETFILTER_XT_TARGET_RATEEST=m
-CONFIG_NETFILTER_XT_TARGET_REDIRECT=m
-CONFIG_NETFILTER_XT_TARGET_MASQUERADE=m
-CONFIG_NETFILTER_XT_TARGET_TEE=m
-CONFIG_NETFILTER_XT_TARGET_TPROXY=m
-CONFIG_NETFILTER_XT_TARGET_TRACE=m
-CONFIG_NETFILTER_XT_TARGET_SECMARK=m
-CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
-CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
-
-#
-# Xtables matches
-#
-CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
-CONFIG_NETFILTER_XT_MATCH_BPF=m
-CONFIG_NETFILTER_XT_MATCH_CGROUP=m
-CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
-CONFIG_NETFILTER_XT_MATCH_COMMENT=m
-CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
-CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m
-CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
-CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
-CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
-CONFIG_NETFILTER_XT_MATCH_CPU=m
-CONFIG_NETFILTER_XT_MATCH_DCCP=m
-CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m
-CONFIG_NETFILTER_XT_MATCH_DSCP=m
-CONFIG_NETFILTER_XT_MATCH_ECN=m
-CONFIG_NETFILTER_XT_MATCH_ESP=m
-CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
-CONFIG_NETFILTER_XT_MATCH_HELPER=m
-CONFIG_NETFILTER_XT_MATCH_HL=m
-CONFIG_NETFILTER_XT_MATCH_IPCOMP=m
-CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
-CONFIG_NETFILTER_XT_MATCH_IPVS=m
-CONFIG_NETFILTER_XT_MATCH_L2TP=m
-CONFIG_NETFILTER_XT_MATCH_LENGTH=m
-CONFIG_NETFILTER_XT_MATCH_LIMIT=m
-CONFIG_NETFILTER_XT_MATCH_MAC=m
-CONFIG_NETFILTER_XT_MATCH_MARK=m
-CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
-CONFIG_NETFILTER_XT_MATCH_NFACCT=m
-CONFIG_NETFILTER_XT_MATCH_OSF=m
-CONFIG_NETFILTER_XT_MATCH_OWNER=m
-CONFIG_NETFILTER_XT_MATCH_POLICY=m
-CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
-CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
-CONFIG_NETFILTER_XT_MATCH_QUOTA=m
-CONFIG_NETFILTER_XT_MATCH_RATEEST=m
-CONFIG_NETFILTER_XT_MATCH_REALM=m
-CONFIG_NETFILTER_XT_MATCH_RECENT=m
-CONFIG_NETFILTER_XT_MATCH_SCTP=m
-CONFIG_NETFILTER_XT_MATCH_SOCKET=m
-CONFIG_NETFILTER_XT_MATCH_STATE=m
-CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
-CONFIG_NETFILTER_XT_MATCH_STRING=m
-CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
-CONFIG_NETFILTER_XT_MATCH_TIME=m
-CONFIG_NETFILTER_XT_MATCH_U32=m
-# end of Core Netfilter Configuration
-
-CONFIG_IP_SET=m
-CONFIG_IP_SET_MAX=256
-CONFIG_IP_SET_BITMAP_IP=m
-CONFIG_IP_SET_BITMAP_IPMAC=m
-CONFIG_IP_SET_BITMAP_PORT=m
-CONFIG_IP_SET_HASH_IP=m
-CONFIG_IP_SET_HASH_IPMARK=m
-CONFIG_IP_SET_HASH_IPPORT=m
-CONFIG_IP_SET_HASH_IPPORTIP=m
-CONFIG_IP_SET_HASH_IPPORTNET=m
-CONFIG_IP_SET_HASH_IPMAC=m
-CONFIG_IP_SET_HASH_MAC=m
-CONFIG_IP_SET_HASH_NETPORTNET=m
-CONFIG_IP_SET_HASH_NET=m
-CONFIG_IP_SET_HASH_NETNET=m
-CONFIG_IP_SET_HASH_NETPORT=m
-CONFIG_IP_SET_HASH_NETIFACE=m
-CONFIG_IP_SET_LIST_SET=m
-CONFIG_IP_VS=m
-CONFIG_IP_VS_IPV6=y
-# CONFIG_IP_VS_DEBUG is not set
-CONFIG_IP_VS_TAB_BITS=15
-
-#
-# IPVS transport protocol load balancing support
-#
-CONFIG_IP_VS_PROTO_TCP=y
-CONFIG_IP_VS_PROTO_UDP=y
-CONFIG_IP_VS_PROTO_AH_ESP=y
-CONFIG_IP_VS_PROTO_ESP=y
-CONFIG_IP_VS_PROTO_AH=y
-CONFIG_IP_VS_PROTO_SCTP=y
-
-#
-# IPVS scheduler
-#
-CONFIG_IP_VS_RR=m
-CONFIG_IP_VS_WRR=m
-CONFIG_IP_VS_LC=m
-CONFIG_IP_VS_WLC=m
-CONFIG_IP_VS_FO=m
-CONFIG_IP_VS_OVF=m
-CONFIG_IP_VS_LBLC=m
-CONFIG_IP_VS_LBLCR=m
-CONFIG_IP_VS_DH=m
-CONFIG_IP_VS_SH=m
-CONFIG_IP_VS_MH=m
-CONFIG_IP_VS_SED=m
-CONFIG_IP_VS_NQ=m
-
-#
-# IPVS SH scheduler
-#
-CONFIG_IP_VS_SH_TAB_BITS=8
-
-#
-# IPVS MH scheduler
-#
-CONFIG_IP_VS_MH_TAB_INDEX=12
-
-#
-# IPVS application helper
-#
-CONFIG_IP_VS_FTP=m
-CONFIG_IP_VS_NFCT=y
-CONFIG_IP_VS_PE_SIP=m
-
-#
-# IP: Netfilter Configuration
-#
-CONFIG_NF_DEFRAG_IPV4=m
-CONFIG_NF_SOCKET_IPV4=m
-CONFIG_NF_TPROXY_IPV4=m
-CONFIG_NF_TABLES_IPV4=y
-CONFIG_NFT_REJECT_IPV4=m
-CONFIG_NFT_DUP_IPV4=m
-CONFIG_NFT_FIB_IPV4=m
-CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_FLOW_TABLE_IPV4=m
-CONFIG_NF_DUP_IPV4=m
-CONFIG_NF_LOG_ARP=m
-CONFIG_NF_LOG_IPV4=m
-CONFIG_NF_REJECT_IPV4=m
-CONFIG_NF_NAT_SNMP_BASIC=m
-CONFIG_NF_NAT_PPTP=m
-CONFIG_NF_NAT_H323=m
-CONFIG_IP_NF_IPTABLES=m
-CONFIG_IP_NF_MATCH_AH=m
-CONFIG_IP_NF_MATCH_ECN=m
-CONFIG_IP_NF_MATCH_RPFILTER=m
-CONFIG_IP_NF_MATCH_TTL=m
-CONFIG_IP_NF_FILTER=m
-CONFIG_IP_NF_TARGET_REJECT=m
-CONFIG_IP_NF_TARGET_SYNPROXY=m
-CONFIG_IP_NF_NAT=m
-CONFIG_IP_NF_TARGET_MASQUERADE=m
-CONFIG_IP_NF_TARGET_NETMAP=m
-CONFIG_IP_NF_TARGET_REDIRECT=m
-CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
-CONFIG_IP_NF_TARGET_ECN=m
-CONFIG_IP_NF_TARGET_TTL=m
-CONFIG_IP_NF_RAW=m
-CONFIG_IP_NF_SECURITY=m
-CONFIG_IP_NF_ARPTABLES=m
-CONFIG_IP_NF_ARPFILTER=m
-CONFIG_IP_NF_ARP_MANGLE=m
-# end of IP: Netfilter Configuration
-
-#
-# IPv6: Netfilter Configuration
-#
-CONFIG_NF_SOCKET_IPV6=m
-CONFIG_NF_TPROXY_IPV6=m
-CONFIG_NF_TABLES_IPV6=y
-CONFIG_NFT_REJECT_IPV6=m
-CONFIG_NFT_DUP_IPV6=m
-CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
-CONFIG_NF_DUP_IPV6=m
-CONFIG_NF_REJECT_IPV6=m
-CONFIG_NF_LOG_IPV6=m
-CONFIG_IP6_NF_IPTABLES=m
-CONFIG_IP6_NF_MATCH_AH=m
-CONFIG_IP6_NF_MATCH_EUI64=m
-CONFIG_IP6_NF_MATCH_FRAG=m
-CONFIG_IP6_NF_MATCH_OPTS=m
-CONFIG_IP6_NF_MATCH_HL=m
-CONFIG_IP6_NF_MATCH_IPV6HEADER=m
-CONFIG_IP6_NF_MATCH_MH=m
-CONFIG_IP6_NF_MATCH_RPFILTER=m
-CONFIG_IP6_NF_MATCH_RT=m
-CONFIG_IP6_NF_MATCH_SRH=m
-CONFIG_IP6_NF_TARGET_HL=m
-CONFIG_IP6_NF_FILTER=m
-CONFIG_IP6_NF_TARGET_REJECT=m
-CONFIG_IP6_NF_TARGET_SYNPROXY=m
-CONFIG_IP6_NF_MANGLE=m
-CONFIG_IP6_NF_RAW=m
-CONFIG_IP6_NF_SECURITY=m
-CONFIG_IP6_NF_NAT=m
-CONFIG_IP6_NF_TARGET_MASQUERADE=m
-CONFIG_IP6_NF_TARGET_NPT=m
-# end of IPv6: Netfilter Configuration
-
-CONFIG_NF_DEFRAG_IPV6=m
-CONFIG_NF_TABLES_BRIDGE=m
-CONFIG_NFT_BRIDGE_META=m
-CONFIG_NFT_BRIDGE_REJECT=m
-CONFIG_NF_LOG_BRIDGE=m
-CONFIG_NF_CONNTRACK_BRIDGE=m
-CONFIG_BRIDGE_NF_EBTABLES=m
-CONFIG_BRIDGE_EBT_BROUTE=m
-CONFIG_BRIDGE_EBT_T_FILTER=m
-CONFIG_BRIDGE_EBT_T_NAT=m
-CONFIG_BRIDGE_EBT_802_3=m
-CONFIG_BRIDGE_EBT_AMONG=m
-CONFIG_BRIDGE_EBT_ARP=m
-CONFIG_BRIDGE_EBT_IP=m
-CONFIG_BRIDGE_EBT_IP6=m
-CONFIG_BRIDGE_EBT_LIMIT=m
-CONFIG_BRIDGE_EBT_MARK=m
-CONFIG_BRIDGE_EBT_PKTTYPE=m
-CONFIG_BRIDGE_EBT_STP=m
-CONFIG_BRIDGE_EBT_VLAN=m
-CONFIG_BRIDGE_EBT_ARPREPLY=m
-CONFIG_BRIDGE_EBT_DNAT=m
-CONFIG_BRIDGE_EBT_MARK_T=m
-CONFIG_BRIDGE_EBT_REDIRECT=m
-CONFIG_BRIDGE_EBT_SNAT=m
-CONFIG_BRIDGE_EBT_LOG=m
-CONFIG_BRIDGE_EBT_NFLOG=m
-# CONFIG_BPFILTER is not set
-CONFIG_IP_DCCP=m
-CONFIG_INET_DCCP_DIAG=m
-
-#
-# DCCP CCIDs Configuration
-#
-# CONFIG_IP_DCCP_CCID2_DEBUG is not set
-CONFIG_IP_DCCP_CCID3=y
-# CONFIG_IP_DCCP_CCID3_DEBUG is not set
-CONFIG_IP_DCCP_TFRC_LIB=y
-# end of DCCP CCIDs Configuration
-
-#
-# DCCP Kernel Hacking
-#
-# CONFIG_IP_DCCP_DEBUG is not set
-# end of DCCP Kernel Hacking
-
-CONFIG_IP_SCTP=m
-# CONFIG_SCTP_DBG_OBJCNT is not set
-# CONFIG_SCTP_DEFAULT_COOKIE_HMAC_MD5 is not set
-CONFIG_SCTP_DEFAULT_COOKIE_HMAC_SHA1=y
-# CONFIG_SCTP_DEFAULT_COOKIE_HMAC_NONE is not set
-CONFIG_SCTP_COOKIE_HMAC_MD5=y
-CONFIG_SCTP_COOKIE_HMAC_SHA1=y
-CONFIG_INET_SCTP_DIAG=m
-CONFIG_RDS=m
-CONFIG_RDS_RDMA=m
-CONFIG_RDS_TCP=m
-# CONFIG_RDS_DEBUG is not set
-CONFIG_TIPC=m
-CONFIG_TIPC_MEDIA_IB=y
-CONFIG_TIPC_MEDIA_UDP=y
-CONFIG_TIPC_CRYPTO=y
-CONFIG_TIPC_DIAG=m
-CONFIG_ATM=m
-CONFIG_ATM_CLIP=m
-# CONFIG_ATM_CLIP_NO_ICMP is not set
-CONFIG_ATM_LANE=m
-CONFIG_ATM_MPOA=m
-CONFIG_ATM_BR2684=m
-# CONFIG_ATM_BR2684_IPFILTER is not set
-CONFIG_L2TP=m
-# CONFIG_L2TP_DEBUGFS is not set
-CONFIG_L2TP_V3=y
-CONFIG_L2TP_IP=m
-CONFIG_L2TP_ETH=m
-CONFIG_STP=m
-CONFIG_GARP=m
-CONFIG_MRP=m
-CONFIG_BRIDGE=m
-CONFIG_BRIDGE_IGMP_SNOOPING=y
-CONFIG_BRIDGE_VLAN_FILTERING=y
-CONFIG_BRIDGE_MRP=y
-CONFIG_HAVE_NET_DSA=y
-CONFIG_NET_DSA=m
-CONFIG_NET_DSA_TAG_8021Q=m
-CONFIG_NET_DSA_TAG_AR9331=m
-CONFIG_NET_DSA_TAG_BRCM_COMMON=m
-CONFIG_NET_DSA_TAG_BRCM=m
-CONFIG_NET_DSA_TAG_BRCM_PREPEND=m
-CONFIG_NET_DSA_TAG_GSWIP=m
-CONFIG_NET_DSA_TAG_DSA=m
-CONFIG_NET_DSA_TAG_EDSA=m
-CONFIG_NET_DSA_TAG_MTK=m
-CONFIG_NET_DSA_TAG_KSZ=m
-CONFIG_NET_DSA_TAG_OCELOT=m
-CONFIG_NET_DSA_TAG_QCA=m
-CONFIG_NET_DSA_TAG_LAN9303=m
-CONFIG_NET_DSA_TAG_SJA1105=m
-CONFIG_NET_DSA_TAG_TRAILER=m
-CONFIG_VLAN_8021Q=m
-CONFIG_VLAN_8021Q_GVRP=y
-CONFIG_VLAN_8021Q_MVRP=y
-# CONFIG_DECNET is not set
-CONFIG_LLC=m
-CONFIG_LLC2=m
-CONFIG_ATALK=m
-CONFIG_DEV_APPLETALK=m
-CONFIG_IPDDP=m
-CONFIG_IPDDP_ENCAP=y
-# CONFIG_X25 is not set
-# CONFIG_LAPB is not set
-CONFIG_PHONET=m
-CONFIG_6LOWPAN=m
-# CONFIG_6LOWPAN_DEBUGFS is not set
-CONFIG_6LOWPAN_NHC=m
-CONFIG_6LOWPAN_NHC_DEST=m
-CONFIG_6LOWPAN_NHC_FRAGMENT=m
-CONFIG_6LOWPAN_NHC_HOP=m
-CONFIG_6LOWPAN_NHC_IPV6=m
-CONFIG_6LOWPAN_NHC_MOBILITY=m
-CONFIG_6LOWPAN_NHC_ROUTING=m
-CONFIG_6LOWPAN_NHC_UDP=m
-CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m
-CONFIG_6LOWPAN_GHC_UDP=m
-CONFIG_6LOWPAN_GHC_ICMPV6=m
-CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m
-CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
-CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
-CONFIG_IEEE802154=m
-CONFIG_IEEE802154_NL802154_EXPERIMENTAL=y
-CONFIG_IEEE802154_SOCKET=m
-CONFIG_IEEE802154_6LOWPAN=m
-CONFIG_MAC802154=m
-CONFIG_NET_SCHED=y
-
-#
-# Queueing/Scheduling
-#
-CONFIG_NET_SCH_CBQ=m
-CONFIG_NET_SCH_HTB=m
-CONFIG_NET_SCH_HFSC=m
-CONFIG_NET_SCH_ATM=m
-CONFIG_NET_SCH_PRIO=m
-CONFIG_NET_SCH_MULTIQ=m
-CONFIG_NET_SCH_RED=m
-CONFIG_NET_SCH_SFB=m
-CONFIG_NET_SCH_SFQ=m
-CONFIG_NET_SCH_TEQL=m
-CONFIG_NET_SCH_TBF=m
-CONFIG_NET_SCH_CBS=m
-CONFIG_NET_SCH_ETF=m
-CONFIG_NET_SCH_TAPRIO=m
-CONFIG_NET_SCH_GRED=m
-CONFIG_NET_SCH_DSMARK=m
-CONFIG_NET_SCH_NETEM=m
-CONFIG_NET_SCH_DRR=m
-CONFIG_NET_SCH_MQPRIO=m
-CONFIG_NET_SCH_SKBPRIO=m
-CONFIG_NET_SCH_CHOKE=m
-CONFIG_NET_SCH_QFQ=m
-CONFIG_NET_SCH_CODEL=m
-CONFIG_NET_SCH_FQ_CODEL=y
-CONFIG_NET_SCH_CAKE=m
-CONFIG_NET_SCH_FQ=m
-CONFIG_NET_SCH_HHF=m
-CONFIG_NET_SCH_PIE=m
-CONFIG_NET_SCH_FQ_PIE=m
-CONFIG_NET_SCH_INGRESS=m
-CONFIG_NET_SCH_PLUG=m
-CONFIG_NET_SCH_ETS=m
-CONFIG_NET_SCH_DEFAULT=y
-# CONFIG_DEFAULT_FQ is not set
-# CONFIG_DEFAULT_CODEL is not set
-CONFIG_DEFAULT_FQ_CODEL=y
-# CONFIG_DEFAULT_SFQ is not set
-# CONFIG_DEFAULT_PFIFO_FAST is not set
-CONFIG_DEFAULT_NET_SCH="fq_codel"
-
-#
-# Classification
-#
-CONFIG_NET_CLS=y
-CONFIG_NET_CLS_BASIC=m
-CONFIG_NET_CLS_TCINDEX=m
-CONFIG_NET_CLS_ROUTE4=m
-CONFIG_NET_CLS_FW=m
-CONFIG_NET_CLS_U32=m
-CONFIG_CLS_U32_PERF=y
-CONFIG_CLS_U32_MARK=y
-CONFIG_NET_CLS_RSVP=m
-CONFIG_NET_CLS_RSVP6=m
-CONFIG_NET_CLS_FLOW=m
-CONFIG_NET_CLS_CGROUP=m
-CONFIG_NET_CLS_BPF=m
-CONFIG_NET_CLS_FLOWER=m
-CONFIG_NET_CLS_MATCHALL=m
-CONFIG_NET_EMATCH=y
-CONFIG_NET_EMATCH_STACK=32
-CONFIG_NET_EMATCH_CMP=m
-CONFIG_NET_EMATCH_NBYTE=m
-CONFIG_NET_EMATCH_U32=m
-CONFIG_NET_EMATCH_META=m
-CONFIG_NET_EMATCH_TEXT=m
-CONFIG_NET_EMATCH_CANID=m
-CONFIG_NET_EMATCH_IPSET=m
-CONFIG_NET_EMATCH_IPT=m
-CONFIG_NET_CLS_ACT=y
-CONFIG_NET_ACT_POLICE=m
-CONFIG_NET_ACT_GACT=m
-CONFIG_GACT_PROB=y
-CONFIG_NET_ACT_MIRRED=m
-CONFIG_NET_ACT_SAMPLE=m
-CONFIG_NET_ACT_IPT=m
-CONFIG_NET_ACT_NAT=m
-CONFIG_NET_ACT_PEDIT=m
-CONFIG_NET_ACT_SIMP=m
-CONFIG_NET_ACT_SKBEDIT=m
-CONFIG_NET_ACT_CSUM=m
-CONFIG_NET_ACT_MPLS=m
-CONFIG_NET_ACT_VLAN=m
-CONFIG_NET_ACT_BPF=m
-CONFIG_NET_ACT_CONNMARK=m
-CONFIG_NET_ACT_CTINFO=m
-CONFIG_NET_ACT_SKBMOD=m
-CONFIG_NET_ACT_IFE=m
-CONFIG_NET_ACT_TUNNEL_KEY=m
-CONFIG_NET_ACT_CT=m
-CONFIG_NET_ACT_GATE=m
-CONFIG_NET_IFE_SKBMARK=m
-CONFIG_NET_IFE_SKBPRIO=m
-CONFIG_NET_IFE_SKBTCINDEX=m
-CONFIG_NET_TC_SKB_EXT=y
-CONFIG_NET_SCH_FIFO=y
-CONFIG_DCB=y
-CONFIG_DNS_RESOLVER=m
-CONFIG_BATMAN_ADV=m
-CONFIG_BATMAN_ADV_BATMAN_V=y
-CONFIG_BATMAN_ADV_BLA=y
-CONFIG_BATMAN_ADV_DAT=y
-CONFIG_BATMAN_ADV_NC=y
-CONFIG_BATMAN_ADV_MCAST=y
-CONFIG_BATMAN_ADV_DEBUGFS=y
-# CONFIG_BATMAN_ADV_DEBUG is not set
-CONFIG_BATMAN_ADV_SYSFS=y
-# CONFIG_BATMAN_ADV_TRACING is not set
-CONFIG_OPENVSWITCH=m
-CONFIG_OPENVSWITCH_GRE=m
-CONFIG_OPENVSWITCH_VXLAN=m
-CONFIG_OPENVSWITCH_GENEVE=m
-CONFIG_VSOCKETS=m
-CONFIG_VSOCKETS_DIAG=m
-CONFIG_VSOCKETS_LOOPBACK=m
-CONFIG_VMWARE_VMCI_VSOCKETS=m
-CONFIG_VIRTIO_VSOCKETS=m
-CONFIG_VIRTIO_VSOCKETS_COMMON=m
-CONFIG_HYPERV_VSOCKETS=m
-CONFIG_NETLINK_DIAG=m
-CONFIG_MPLS=y
-CONFIG_NET_MPLS_GSO=m
-CONFIG_MPLS_ROUTING=m
-CONFIG_MPLS_IPTUNNEL=m
-CONFIG_NET_NSH=m
-CONFIG_HSR=m
-CONFIG_NET_SWITCHDEV=y
-CONFIG_NET_L3_MASTER_DEV=y
-CONFIG_QRTR=m
-CONFIG_QRTR_SMD=m
-CONFIG_QRTR_TUN=m
-CONFIG_QRTR_MHI=m
-CONFIG_NET_NCSI=y
-CONFIG_NCSI_OEM_CMD_GET_MAC=y
-CONFIG_RPS=y
-CONFIG_RFS_ACCEL=y
-CONFIG_XPS=y
-CONFIG_CGROUP_NET_PRIO=y
-CONFIG_CGROUP_NET_CLASSID=y
-CONFIG_NET_RX_BUSY_POLL=y
-CONFIG_BQL=y
-CONFIG_BPF_JIT=y
-CONFIG_BPF_STREAM_PARSER=y
-CONFIG_NET_FLOW_LIMIT=y
-
-#
-# Network testing
-#
-CONFIG_NET_PKTGEN=m
-CONFIG_NET_DROP_MONITOR=y
-# end of Network testing
-# end of Networking options
-
-CONFIG_HAMRADIO=y
-
-#
-# Packet Radio protocols
-#
-CONFIG_AX25=m
-CONFIG_AX25_DAMA_SLAVE=y
-CONFIG_NETROM=m
-CONFIG_ROSE=m
-
-#
-# AX.25 network device drivers
-#
-CONFIG_MKISS=m
-CONFIG_6PACK=m
-CONFIG_BPQETHER=m
-CONFIG_BAYCOM_SER_FDX=m
-CONFIG_BAYCOM_SER_HDX=m
-CONFIG_BAYCOM_PAR=m
-CONFIG_YAM=m
-# end of AX.25 network device drivers
-
-CONFIG_CAN=m
-CONFIG_CAN_RAW=m
-CONFIG_CAN_BCM=m
-CONFIG_CAN_GW=m
-CONFIG_CAN_J1939=m
-
-#
-# CAN Device Drivers
-#
-CONFIG_CAN_VCAN=m
-CONFIG_CAN_VXCAN=m
-CONFIG_CAN_SLCAN=m
-CONFIG_CAN_DEV=m
-CONFIG_CAN_CALC_BITTIMING=y
-CONFIG_CAN_FLEXCAN=m
-CONFIG_CAN_GRCAN=m
-CONFIG_CAN_JANZ_ICAN3=m
-CONFIG_CAN_KVASER_PCIEFD=m
-CONFIG_CAN_C_CAN=m
-CONFIG_CAN_C_CAN_PLATFORM=m
-CONFIG_CAN_C_CAN_PCI=m
-CONFIG_CAN_CC770=m
-# CONFIG_CAN_CC770_ISA is not set
-CONFIG_CAN_CC770_PLATFORM=m
-CONFIG_CAN_IFI_CANFD=m
-CONFIG_CAN_M_CAN=m
-CONFIG_CAN_M_CAN_PLATFORM=m
-CONFIG_CAN_M_CAN_TCAN4X5X=m
-CONFIG_CAN_PEAK_PCIEFD=m
-CONFIG_CAN_SJA1000=m
-CONFIG_CAN_EMS_PCI=m
-# CONFIG_CAN_EMS_PCMCIA is not set
-CONFIG_CAN_F81601=m
-CONFIG_CAN_KVASER_PCI=m
-CONFIG_CAN_PEAK_PCI=m
-CONFIG_CAN_PEAK_PCIEC=y
-CONFIG_CAN_PEAK_PCMCIA=m
-CONFIG_CAN_PLX_PCI=m
-# CONFIG_CAN_SJA1000_ISA is not set
-CONFIG_CAN_SJA1000_PLATFORM=m
-CONFIG_CAN_SOFTING=m
-CONFIG_CAN_SOFTING_CS=m
-
-#
-# CAN SPI interfaces
-#
-CONFIG_CAN_HI311X=m
-CONFIG_CAN_MCP251X=m
-# end of CAN SPI interfaces
-
-#
-# CAN USB interfaces
-#
-CONFIG_CAN_8DEV_USB=m
-CONFIG_CAN_EMS_USB=m
-CONFIG_CAN_ESD_USB2=m
-CONFIG_CAN_GS_USB=m
-CONFIG_CAN_KVASER_USB=m
-CONFIG_CAN_MCBA_USB=m
-CONFIG_CAN_PEAK_USB=m
-CONFIG_CAN_UCAN=m
-# end of CAN USB interfaces
-
-# CONFIG_CAN_DEBUG_DEVICES is not set
-# end of CAN Device Drivers
-
-CONFIG_BT=m
-CONFIG_BT_BREDR=y
-CONFIG_BT_RFCOMM=m
-CONFIG_BT_RFCOMM_TTY=y
-CONFIG_BT_BNEP=m
-CONFIG_BT_BNEP_MC_FILTER=y
-CONFIG_BT_BNEP_PROTO_FILTER=y
-CONFIG_BT_CMTP=m
-CONFIG_BT_HIDP=m
-CONFIG_BT_HS=y
-CONFIG_BT_LE=y
-CONFIG_BT_6LOWPAN=m
-CONFIG_BT_LEDS=y
-CONFIG_BT_MSFTEXT=y
-CONFIG_BT_DEBUGFS=y
-# CONFIG_BT_SELFTEST is not set
-
-#
-# Bluetooth device drivers
-#
-CONFIG_BT_INTEL=m
-CONFIG_BT_BCM=m
-CONFIG_BT_RTL=m
-CONFIG_BT_QCA=m
-CONFIG_BT_HCIBTUSB=m
-CONFIG_BT_HCIBTUSB_AUTOSUSPEND=y
-CONFIG_BT_HCIBTUSB_BCM=y
-CONFIG_BT_HCIBTUSB_MTK=y
-CONFIG_BT_HCIBTUSB_RTL=y
-CONFIG_BT_HCIBTSDIO=m
-CONFIG_BT_HCIUART=m
-CONFIG_BT_HCIUART_SERDEV=y
-CONFIG_BT_HCIUART_H4=y
-CONFIG_BT_HCIUART_NOKIA=m
-CONFIG_BT_HCIUART_BCSP=y
-CONFIG_BT_HCIUART_ATH3K=y
-CONFIG_BT_HCIUART_LL=y
-CONFIG_BT_HCIUART_3WIRE=y
-CONFIG_BT_HCIUART_INTEL=y
-CONFIG_BT_HCIUART_BCM=y
-CONFIG_BT_HCIUART_RTL=y
-CONFIG_BT_HCIUART_QCA=y
-CONFIG_BT_HCIUART_AG6XX=y
-CONFIG_BT_HCIUART_MRVL=y
-CONFIG_BT_HCIBCM203X=m
-CONFIG_BT_HCIBPA10X=m
-CONFIG_BT_HCIBFUSB=m
-CONFIG_BT_HCIDTL1=m
-CONFIG_BT_HCIBT3C=m
-CONFIG_BT_HCIBLUECARD=m
-CONFIG_BT_HCIVHCI=m
-CONFIG_BT_MRVL=m
-CONFIG_BT_MRVL_SDIO=m
-CONFIG_BT_ATH3K=m
-CONFIG_BT_MTKSDIO=m
-CONFIG_BT_MTKUART=m
-CONFIG_BT_HCIRSI=m
-# end of Bluetooth device drivers
-
-CONFIG_AF_RXRPC=m
-CONFIG_AF_RXRPC_IPV6=y
-# CONFIG_AF_RXRPC_INJECT_LOSS is not set
-CONFIG_AF_RXRPC_DEBUG=y
-CONFIG_RXKAD=y
-CONFIG_AF_KCM=m
-CONFIG_STREAM_PARSER=y
-CONFIG_FIB_RULES=y
-CONFIG_WIRELESS=y
-CONFIG_WIRELESS_EXT=y
-CONFIG_WEXT_CORE=y
-CONFIG_WEXT_PROC=y
-CONFIG_WEXT_SPY=y
-CONFIG_WEXT_PRIV=y
-CONFIG_CFG80211=m
-# CONFIG_NL80211_TESTMODE is not set
-# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set
-# CONFIG_CFG80211_CERTIFICATION_ONUS is not set
-CONFIG_CFG80211_REQUIRE_SIGNED_REGDB=y
-CONFIG_CFG80211_USE_KERNEL_REGDB_KEYS=y
-CONFIG_CFG80211_DEFAULT_PS=y
-CONFIG_CFG80211_DEBUGFS=y
-CONFIG_CFG80211_CRDA_SUPPORT=y
-CONFIG_CFG80211_WEXT=y
-CONFIG_CFG80211_WEXT_EXPORT=y
-CONFIG_LIB80211=m
-CONFIG_LIB80211_CRYPT_WEP=m
-CONFIG_LIB80211_CRYPT_CCMP=m
-CONFIG_LIB80211_CRYPT_TKIP=m
-# CONFIG_LIB80211_DEBUG is not set
-CONFIG_MAC80211=m
-CONFIG_MAC80211_HAS_RC=y
-CONFIG_MAC80211_RC_MINSTREL=y
-CONFIG_MAC80211_RC_DEFAULT_MINSTREL=y
-CONFIG_MAC80211_RC_DEFAULT="minstrel_ht"
-CONFIG_MAC80211_MESH=y
-CONFIG_MAC80211_LEDS=y
-CONFIG_MAC80211_DEBUGFS=y
-# CONFIG_MAC80211_MESSAGE_TRACING is not set
-# CONFIG_MAC80211_DEBUG_MENU is not set
-CONFIG_MAC80211_STA_HASH_MAX_SIZE=0
-CONFIG_WIMAX=m
-CONFIG_WIMAX_DEBUG_LEVEL=8
-CONFIG_RFKILL=m
-CONFIG_RFKILL_LEDS=y
-CONFIG_RFKILL_INPUT=y
-CONFIG_RFKILL_GPIO=m
-CONFIG_NET_9P=m
-CONFIG_NET_9P_VIRTIO=m
-CONFIG_NET_9P_XEN=m
-CONFIG_NET_9P_RDMA=m
-# CONFIG_NET_9P_DEBUG is not set
-CONFIG_CAIF=m
-# CONFIG_CAIF_DEBUG is not set
-CONFIG_CAIF_NETDEV=m
-CONFIG_CAIF_USB=m
-CONFIG_CEPH_LIB=m
-CONFIG_CEPH_LIB_PRETTYDEBUG=y
-CONFIG_CEPH_LIB_USE_DNS_RESOLVER=y
-CONFIG_NFC=m
-CONFIG_NFC_DIGITAL=m
-CONFIG_NFC_NCI=m
-CONFIG_NFC_NCI_SPI=m
-CONFIG_NFC_NCI_UART=m
-CONFIG_NFC_HCI=m
-CONFIG_NFC_SHDLC=y
-
-#
-# Near Field Communication (NFC) devices
-#
-CONFIG_NFC_TRF7970A=m
-CONFIG_NFC_MEI_PHY=m
-CONFIG_NFC_SIM=m
-CONFIG_NFC_PORT100=m
-CONFIG_NFC_FDP=m
-CONFIG_NFC_FDP_I2C=m
-CONFIG_NFC_PN544=m
-CONFIG_NFC_PN544_I2C=m
-CONFIG_NFC_PN544_MEI=m
-CONFIG_NFC_PN533=m
-CONFIG_NFC_PN533_USB=m
-CONFIG_NFC_PN533_I2C=m
-CONFIG_NFC_PN532_UART=m
-CONFIG_NFC_MICROREAD=m
-CONFIG_NFC_MICROREAD_I2C=m
-CONFIG_NFC_MICROREAD_MEI=m
-CONFIG_NFC_MRVL=m
-CONFIG_NFC_MRVL_USB=m
-CONFIG_NFC_MRVL_UART=m
-CONFIG_NFC_MRVL_I2C=m
-CONFIG_NFC_MRVL_SPI=m
-CONFIG_NFC_ST21NFCA=m
-CONFIG_NFC_ST21NFCA_I2C=m
-CONFIG_NFC_ST_NCI=m
-CONFIG_NFC_ST_NCI_I2C=m
-CONFIG_NFC_ST_NCI_SPI=m
-CONFIG_NFC_NXP_NCI=m
-CONFIG_NFC_NXP_NCI_I2C=m
-CONFIG_NFC_S3FWRN5=m
-CONFIG_NFC_S3FWRN5_I2C=m
-CONFIG_NFC_ST95HF=m
-# end of Near Field Communication (NFC) devices
-
-CONFIG_PSAMPLE=m
-CONFIG_NET_IFE=m
-CONFIG_LWTUNNEL=y
-CONFIG_LWTUNNEL_BPF=y
-CONFIG_DST_CACHE=y
-CONFIG_GRO_CELLS=y
-CONFIG_SOCK_VALIDATE_XMIT=y
-CONFIG_NET_SOCK_MSG=y
-CONFIG_NET_DEVLINK=y
-CONFIG_PAGE_POOL=y
-CONFIG_FAILOVER=m
-CONFIG_HAVE_EBPF_JIT=y
-
-#
-# Device Drivers
-#
-CONFIG_HAVE_EISA=y
-# CONFIG_EISA is not set
-CONFIG_HAVE_PCI=y
-CONFIG_PCI=y
-CONFIG_PCI_DOMAINS=y
-CONFIG_PCIEPORTBUS=y
-CONFIG_HOTPLUG_PCI_PCIE=y
-CONFIG_PCIEAER=y
-# CONFIG_PCIEAER_INJECT is not set
-CONFIG_PCIE_ECRC=y
-CONFIG_PCIEASPM=y
-CONFIG_PCIEASPM_DEFAULT=y
-# CONFIG_PCIEASPM_POWERSAVE is not set
-# CONFIG_PCIEASPM_POWER_SUPERSAVE is not set
-# CONFIG_PCIEASPM_PERFORMANCE is not set
-CONFIG_PCIE_PME=y
-CONFIG_PCIE_DPC=y
-CONFIG_PCIE_PTM=y
-# CONFIG_PCIE_BW is not set
-CONFIG_PCIE_EDR=y
-CONFIG_PCI_MSI=y
-CONFIG_PCI_MSI_IRQ_DOMAIN=y
-CONFIG_PCI_QUIRKS=y
-# CONFIG_PCI_DEBUG is not set
-CONFIG_PCI_REALLOC_ENABLE_AUTO=y
-CONFIG_PCI_STUB=y
-CONFIG_PCI_PF_STUB=m
-CONFIG_XEN_PCIDEV_FRONTEND=m
-CONFIG_PCI_ATS=y
-CONFIG_PCI_ECAM=y
-CONFIG_PCI_LOCKLESS_CONFIG=y
-CONFIG_PCI_IOV=y
-CONFIG_PCI_PRI=y
-CONFIG_PCI_PASID=y
-CONFIG_PCI_P2PDMA=y
-CONFIG_PCI_LABEL=y
-CONFIG_PCI_HYPERV=m
-CONFIG_HOTPLUG_PCI=y
-CONFIG_HOTPLUG_PCI_ACPI=y
-CONFIG_HOTPLUG_PCI_ACPI_IBM=m
-CONFIG_HOTPLUG_PCI_CPCI=y
-CONFIG_HOTPLUG_PCI_CPCI_ZT5550=m
-CONFIG_HOTPLUG_PCI_CPCI_GENERIC=m
-CONFIG_HOTPLUG_PCI_SHPC=y
-
-#
-# PCI controller drivers
-#
-CONFIG_PCI_FTPCI100=y
-CONFIG_PCI_HOST_COMMON=y
-CONFIG_PCI_HOST_GENERIC=y
-CONFIG_PCIE_XILINX=y
-CONFIG_VMD=m
-CONFIG_PCI_HYPERV_INTERFACE=m
-
-#
-# DesignWare PCI Core Support
-#
-CONFIG_PCIE_DW=y
-CONFIG_PCIE_DW_HOST=y
-CONFIG_PCIE_DW_EP=y
-CONFIG_PCIE_DW_PLAT=y
-CONFIG_PCIE_DW_PLAT_HOST=y
-CONFIG_PCIE_DW_PLAT_EP=y
-CONFIG_PCIE_INTEL_GW=y
-CONFIG_PCI_MESON=y
-# end of DesignWare PCI Core Support
-
-#
-# Mobiveil PCIe Core Support
-#
-# end of Mobiveil PCIe Core Support
-
-#
-# Cadence PCIe controllers support
-#
-CONFIG_PCIE_CADENCE=y
-CONFIG_PCIE_CADENCE_HOST=y
-CONFIG_PCIE_CADENCE_EP=y
-CONFIG_PCIE_CADENCE_PLAT=y
-CONFIG_PCIE_CADENCE_PLAT_HOST=y
-CONFIG_PCIE_CADENCE_PLAT_EP=y
-# end of Cadence PCIe controllers support
-# end of PCI controller drivers
-
-#
-# PCI Endpoint
-#
-CONFIG_PCI_ENDPOINT=y
-CONFIG_PCI_ENDPOINT_CONFIGFS=y
-# CONFIG_PCI_EPF_TEST is not set
-# end of PCI Endpoint
-
-#
-# PCI switch controller drivers
-#
-CONFIG_PCI_SW_SWITCHTEC=m
-# end of PCI switch controller drivers
-
-CONFIG_PCCARD=m
-CONFIG_PCMCIA=m
-CONFIG_PCMCIA_LOAD_CIS=y
-CONFIG_CARDBUS=y
-
-#
-# PC-card bridges
-#
-CONFIG_YENTA=m
-CONFIG_YENTA_O2=y
-CONFIG_YENTA_RICOH=y
-CONFIG_YENTA_TI=y
-CONFIG_YENTA_ENE_TUNE=y
-CONFIG_YENTA_TOSHIBA=y
-CONFIG_PD6729=m
-CONFIG_I82092=m
-CONFIG_PCCARD_NONSTATIC=y
-CONFIG_RAPIDIO=m
-CONFIG_RAPIDIO_TSI721=m
-CONFIG_RAPIDIO_DISC_TIMEOUT=30
-CONFIG_RAPIDIO_ENABLE_RX_TX_PORTS=y
-CONFIG_RAPIDIO_DMA_ENGINE=y
-# CONFIG_RAPIDIO_DEBUG is not set
-CONFIG_RAPIDIO_ENUM_BASIC=m
-CONFIG_RAPIDIO_CHMAN=m
-CONFIG_RAPIDIO_MPORT_CDEV=m
-
-#
-# RapidIO Switch drivers
-#
-CONFIG_RAPIDIO_TSI57X=m
-CONFIG_RAPIDIO_CPS_XX=m
-CONFIG_RAPIDIO_TSI568=m
-CONFIG_RAPIDIO_CPS_GEN2=m
-CONFIG_RAPIDIO_RXS_GEN3=m
-# end of RapidIO Switch drivers
-
-#
-# Generic Driver Options
-#
-# CONFIG_UEVENT_HELPER is not set
-CONFIG_DEVTMPFS=y
-CONFIG_DEVTMPFS_MOUNT=y
-CONFIG_STANDALONE=y
-CONFIG_PREVENT_FIRMWARE_BUILD=y
-
-#
-# Firmware loader
-#
-CONFIG_FW_LOADER=y
-CONFIG_FW_LOADER_PAGED_BUF=y
-CONFIG_EXTRA_FIRMWARE=""
-# CONFIG_FW_LOADER_USER_HELPER is not set
-CONFIG_FW_LOADER_COMPRESS=y
-CONFIG_FW_CACHE=y
-# end of Firmware loader
-
-CONFIG_WANT_DEV_COREDUMP=y
-CONFIG_ALLOW_DEV_COREDUMP=y
-CONFIG_DEV_COREDUMP=y
-# CONFIG_DEBUG_DRIVER is not set
-# CONFIG_DEBUG_DEVRES is not set
-# CONFIG_DEBUG_TEST_DRIVER_REMOVE is not set
-CONFIG_HMEM_REPORTING=y
-# CONFIG_TEST_ASYNC_DRIVER_PROBE is not set
-CONFIG_SYS_HYPERVISOR=y
-CONFIG_GENERIC_CPU_AUTOPROBE=y
-CONFIG_GENERIC_CPU_VULNERABILITIES=y
-CONFIG_REGMAP=y
-CONFIG_REGMAP_I2C=y
-CONFIG_REGMAP_SLIMBUS=m
-CONFIG_REGMAP_SPI=y
-CONFIG_REGMAP_SPMI=m
-CONFIG_REGMAP_W1=m
-CONFIG_REGMAP_MMIO=y
-CONFIG_REGMAP_IRQ=y
-CONFIG_REGMAP_SOUNDWIRE=m
-CONFIG_REGMAP_SCCB=m
-CONFIG_REGMAP_I3C=m
-CONFIG_DMA_SHARED_BUFFER=y
-# CONFIG_DMA_FENCE_TRACE is not set
-# end of Generic Driver Options
-
-#
-# Bus devices
-#
-CONFIG_MOXTET=m
-CONFIG_SIMPLE_PM_BUS=y
-CONFIG_MHI_BUS=m
-# end of Bus devices
-
-CONFIG_CONNECTOR=y
-CONFIG_PROC_EVENTS=y
-CONFIG_GNSS=m
-CONFIG_GNSS_SERIAL=m
-CONFIG_GNSS_MTK_SERIAL=m
-CONFIG_GNSS_SIRF_SERIAL=m
-CONFIG_GNSS_UBX_SERIAL=m
-CONFIG_MTD=m
-CONFIG_MTD_TESTS=m
-
-#
-# Partition parsers
-#
-CONFIG_MTD_AR7_PARTS=m
-CONFIG_MTD_CMDLINE_PARTS=m
-CONFIG_MTD_OF_PARTS=m
-CONFIG_MTD_REDBOOT_PARTS=m
-CONFIG_MTD_REDBOOT_DIRECTORY_BLOCK=-1
-# CONFIG_MTD_REDBOOT_PARTS_UNALLOCATED is not set
-# CONFIG_MTD_REDBOOT_PARTS_READONLY is not set
-# end of Partition parsers
-
-#
-# User Modules And Translation Layers
-#
-CONFIG_MTD_BLKDEVS=m
-CONFIG_MTD_BLOCK=m
-CONFIG_MTD_BLOCK_RO=m
-CONFIG_FTL=m
-CONFIG_NFTL=m
-CONFIG_NFTL_RW=y
-CONFIG_INFTL=m
-CONFIG_RFD_FTL=m
-CONFIG_SSFDC=m
-CONFIG_SM_FTL=m
-CONFIG_MTD_OOPS=m
-CONFIG_MTD_PSTORE=m
-CONFIG_MTD_SWAP=m
-CONFIG_MTD_PARTITIONED_MASTER=y
-
-#
-# RAM/ROM/Flash chip drivers
-#
-CONFIG_MTD_CFI=m
-CONFIG_MTD_JEDECPROBE=m
-CONFIG_MTD_GEN_PROBE=m
-# CONFIG_MTD_CFI_ADV_OPTIONS is not set
-CONFIG_MTD_MAP_BANK_WIDTH_1=y
-CONFIG_MTD_MAP_BANK_WIDTH_2=y
-CONFIG_MTD_MAP_BANK_WIDTH_4=y
-CONFIG_MTD_CFI_I1=y
-CONFIG_MTD_CFI_I2=y
-CONFIG_MTD_CFI_INTELEXT=m
-CONFIG_MTD_CFI_AMDSTD=m
-CONFIG_MTD_CFI_STAA=m
-CONFIG_MTD_CFI_UTIL=m
-CONFIG_MTD_RAM=m
-CONFIG_MTD_ROM=m
-CONFIG_MTD_ABSENT=m
-# end of RAM/ROM/Flash chip drivers
-
-#
-# Mapping drivers for chip access
-#
-CONFIG_MTD_COMPLEX_MAPPINGS=y
-CONFIG_MTD_PHYSMAP=m
-# CONFIG_MTD_PHYSMAP_COMPAT is not set
-CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_MTD_PHYSMAP_VERSATILE=y
-CONFIG_MTD_PHYSMAP_GEMINI=y
-CONFIG_MTD_PHYSMAP_GPIO_ADDR=y
-CONFIG_MTD_SBC_GXX=m
-CONFIG_MTD_AMD76XROM=m
-CONFIG_MTD_ICHXROM=m
-CONFIG_MTD_ESB2ROM=m
-CONFIG_MTD_CK804XROM=m
-CONFIG_MTD_SCB2_FLASH=m
-CONFIG_MTD_NETtel=m
-CONFIG_MTD_L440GX=m
-CONFIG_MTD_PCI=m
-CONFIG_MTD_PCMCIA=m
-# CONFIG_MTD_PCMCIA_ANONYMOUS is not set
-CONFIG_MTD_INTEL_VR_NOR=m
-CONFIG_MTD_PLATRAM=m
-# end of Mapping drivers for chip access
-
-#
-# Self-contained MTD device drivers
-#
-CONFIG_MTD_PMC551=m
-# CONFIG_MTD_PMC551_BUGFIX is not set
-# CONFIG_MTD_PMC551_DEBUG is not set
-CONFIG_MTD_DATAFLASH=m
-# CONFIG_MTD_DATAFLASH_WRITE_VERIFY is not set
-CONFIG_MTD_DATAFLASH_OTP=y
-CONFIG_MTD_MCHP23K256=m
-CONFIG_MTD_SST25L=m
-CONFIG_MTD_SLRAM=m
-CONFIG_MTD_PHRAM=m
-CONFIG_MTD_MTDRAM=m
-CONFIG_MTDRAM_TOTAL_SIZE=4096
-CONFIG_MTDRAM_ERASE_SIZE=128
-CONFIG_MTD_BLOCK2MTD=m
-
-#
-# Disk-On-Chip Device Drivers
-#
-CONFIG_MTD_DOCG3=m
-CONFIG_BCH_CONST_M=14
-CONFIG_BCH_CONST_T=4
-# end of Self-contained MTD device drivers
-
-CONFIG_MTD_NAND_CORE=m
-CONFIG_MTD_ONENAND=m
-# CONFIG_MTD_ONENAND_VERIFY_WRITE is not set
-CONFIG_MTD_ONENAND_GENERIC=m
-CONFIG_MTD_ONENAND_OTP=y
-CONFIG_MTD_ONENAND_2X_PROGRAM=y
-CONFIG_MTD_NAND_ECC_SW_HAMMING=m
-CONFIG_MTD_NAND_ECC_SW_HAMMING_SMC=y
-CONFIG_MTD_RAW_NAND=m
-CONFIG_MTD_NAND_ECC_SW_BCH=y
-
-#
-# Raw/parallel NAND flash controllers
-#
-CONFIG_MTD_NAND_DENALI=m
-CONFIG_MTD_NAND_DENALI_PCI=m
-CONFIG_MTD_NAND_DENALI_DT=m
-CONFIG_MTD_NAND_CAFE=m
-CONFIG_MTD_NAND_MXIC=m
-CONFIG_MTD_NAND_GPIO=m
-CONFIG_MTD_NAND_PLATFORM=m
-CONFIG_MTD_NAND_CADENCE=m
-CONFIG_MTD_NAND_ARASAN=m
-
-#
-# Misc
-#
-CONFIG_MTD_SM_COMMON=m
-CONFIG_MTD_NAND_NANDSIM=m
-CONFIG_MTD_NAND_RICOH=m
-CONFIG_MTD_NAND_DISKONCHIP=m
-# CONFIG_MTD_NAND_DISKONCHIP_PROBE_ADVANCED is not set
-CONFIG_MTD_NAND_DISKONCHIP_PROBE_ADDRESS=0
-CONFIG_MTD_NAND_DISKONCHIP_BBTWRITE=y
-CONFIG_MTD_SPI_NAND=m
-
-#
-# LPDDR & LPDDR2 PCM memory drivers
-#
-CONFIG_MTD_LPDDR=m
-CONFIG_MTD_QINFO_PROBE=m
-# end of LPDDR & LPDDR2 PCM memory drivers
-
-CONFIG_MTD_SPI_NOR=m
-CONFIG_MTD_SPI_NOR_USE_4K_SECTORS=y
-CONFIG_SPI_INTEL_SPI=m
-CONFIG_SPI_INTEL_SPI_PCI=m
-CONFIG_SPI_INTEL_SPI_PLATFORM=m
-CONFIG_MTD_UBI=m
-CONFIG_MTD_UBI_WL_THRESHOLD=4096
-CONFIG_MTD_UBI_BEB_LIMIT=20
-CONFIG_MTD_UBI_FASTMAP=y
-CONFIG_MTD_UBI_GLUEBI=m
-CONFIG_MTD_UBI_BLOCK=y
-CONFIG_MTD_HYPERBUS=m
-CONFIG_DTC=y
-CONFIG_OF=y
-# CONFIG_OF_UNITTEST is not set
-CONFIG_OF_FLATTREE=y
-CONFIG_OF_EARLY_FLATTREE=y
-CONFIG_OF_KOBJ=y
-CONFIG_OF_DYNAMIC=y
-CONFIG_OF_ADDRESS=y
-CONFIG_OF_IRQ=y
-CONFIG_OF_NET=y
-CONFIG_OF_MDIO=m
-CONFIG_OF_RESERVED_MEM=y
-CONFIG_OF_RESOLVE=y
-CONFIG_OF_OVERLAY=y
-CONFIG_ARCH_MIGHT_HAVE_PC_PARPORT=y
-CONFIG_PARPORT=m
-CONFIG_PARPORT_PC=m
-CONFIG_PARPORT_SERIAL=m
-CONFIG_PARPORT_PC_FIFO=y
-CONFIG_PARPORT_PC_SUPERIO=y
-CONFIG_PARPORT_PC_PCMCIA=m
-CONFIG_PARPORT_AX88796=m
-CONFIG_PARPORT_1284=y
-CONFIG_PARPORT_NOT_PC=y
-CONFIG_PNP=y
-CONFIG_PNP_DEBUG_MESSAGES=y
-
-#
-# Protocols
-#
-CONFIG_PNPACPI=y
-CONFIG_BLK_DEV=y
-# CONFIG_BLK_DEV_NULL_BLK is not set
-CONFIG_BLK_DEV_FD=m
-CONFIG_CDROM=m
-# CONFIG_PARIDE is not set
-CONFIG_BLK_DEV_PCIESSD_MTIP32XX=m
-CONFIG_ZRAM=m
-CONFIG_ZRAM_WRITEBACK=y
-# CONFIG_ZRAM_MEMORY_TRACKING is not set
-CONFIG_BLK_DEV_UMEM=m
-CONFIG_BLK_DEV_LOOP=m
-CONFIG_BLK_DEV_LOOP_MIN_COUNT=8
-CONFIG_BLK_DEV_CRYPTOLOOP=m
-CONFIG_BLK_DEV_DRBD=m
-# CONFIG_DRBD_FAULT_INJECTION is not set
-CONFIG_BLK_DEV_NBD=m
-CONFIG_BLK_DEV_SKD=m
-CONFIG_BLK_DEV_SX8=m
-CONFIG_BLK_DEV_RAM=m
-CONFIG_BLK_DEV_RAM_COUNT=16
-CONFIG_BLK_DEV_RAM_SIZE=16384
-CONFIG_CDROM_PKTCDVD=m
-CONFIG_CDROM_PKTCDVD_BUFFERS=8
-# CONFIG_CDROM_PKTCDVD_WCACHE is not set
-CONFIG_ATA_OVER_ETH=m
-CONFIG_XEN_BLKDEV_FRONTEND=m
-CONFIG_XEN_BLKDEV_BACKEND=m
-CONFIG_VIRTIO_BLK=m
-CONFIG_BLK_DEV_RBD=m
-CONFIG_BLK_DEV_RSXX=m
-CONFIG_BLK_DEV_RNBD=y
-CONFIG_BLK_DEV_RNBD_CLIENT=m
-CONFIG_BLK_DEV_RNBD_SERVER=m
-
-#
-# NVME Support
-#
-CONFIG_NVME_CORE=y
-CONFIG_BLK_DEV_NVME=y
-CONFIG_NVME_MULTIPATH=y
-CONFIG_NVME_HWMON=y
-CONFIG_NVME_FABRICS=m
-CONFIG_NVME_RDMA=m
-CONFIG_NVME_FC=m
-CONFIG_NVME_TCP=m
-CONFIG_NVME_TARGET=m
-CONFIG_NVME_TARGET_LOOP=m
-CONFIG_NVME_TARGET_RDMA=m
-CONFIG_NVME_TARGET_FC=m
-CONFIG_NVME_TARGET_FCLOOP=m
-CONFIG_NVME_TARGET_TCP=m
-# end of NVME Support
-
-#
-# Misc devices
-#
-CONFIG_SENSORS_LIS3LV02D=m
-CONFIG_AD525X_DPOT=m
-CONFIG_AD525X_DPOT_I2C=m
-CONFIG_AD525X_DPOT_SPI=m
-# CONFIG_DUMMY_IRQ is not set
-CONFIG_IBM_ASM=m
-CONFIG_PHANTOM=m
-CONFIG_TIFM_CORE=m
-CONFIG_TIFM_7XX1=m
-CONFIG_ICS932S401=m
-CONFIG_ENCLOSURE_SERVICES=m
-CONFIG_HP_ILO=m
-CONFIG_APDS9802ALS=m
-CONFIG_ISL29003=m
-CONFIG_ISL29020=m
-CONFIG_SENSORS_TSL2550=m
-CONFIG_SENSORS_BH1770=m
-CONFIG_SENSORS_APDS990X=m
-CONFIG_HMC6352=m
-CONFIG_DS1682=m
-CONFIG_VMWARE_BALLOON=m
-CONFIG_LATTICE_ECP3_CONFIG=m
-# CONFIG_SRAM is not set
-CONFIG_PCI_ENDPOINT_TEST=m
-CONFIG_XILINX_SDFEC=m
-CONFIG_MISC_RTSX=m
-CONFIG_PVPANIC=m
-CONFIG_C2PORT=m
-CONFIG_C2PORT_DURAMAR_2150=m
-
-#
-# EEPROM support
-#
-CONFIG_EEPROM_AT24=m
-# CONFIG_EEPROM_AT25 is not set
-CONFIG_EEPROM_LEGACY=m
-CONFIG_EEPROM_MAX6875=m
-CONFIG_EEPROM_93CX6=m
-# CONFIG_EEPROM_93XX46 is not set
-CONFIG_EEPROM_IDT_89HPESX=m
-CONFIG_EEPROM_EE1004=m
-# end of EEPROM support
-
-CONFIG_CB710_CORE=m
-# CONFIG_CB710_DEBUG is not set
-CONFIG_CB710_DEBUG_ASSUMPTIONS=y
-
-#
-# Texas Instruments shared transport line discipline
-#
-CONFIG_TI_ST=m
-# end of Texas Instruments shared transport line discipline
-
-CONFIG_SENSORS_LIS3_I2C=m
-CONFIG_ALTERA_STAPL=m
-CONFIG_INTEL_MEI=m
-CONFIG_INTEL_MEI_ME=m
-CONFIG_INTEL_MEI_TXE=m
-CONFIG_INTEL_MEI_HDCP=m
-CONFIG_VMWARE_VMCI=m
-
-#
-# Intel MIC & related support
-#
-CONFIG_INTEL_MIC_BUS=m
-CONFIG_SCIF_BUS=m
-CONFIG_VOP_BUS=m
-CONFIG_INTEL_MIC_HOST=m
-CONFIG_INTEL_MIC_CARD=m
-CONFIG_SCIF=m
-CONFIG_MIC_COSM=m
-CONFIG_VOP=m
-# end of Intel MIC & related support
-
-CONFIG_GENWQE=m
-CONFIG_GENWQE_PLATFORM_ERROR_RECOVERY=0
-CONFIG_ECHO=m
-CONFIG_MISC_ALCOR_PCI=m
-CONFIG_MISC_RTSX_PCI=m
-CONFIG_MISC_RTSX_USB=m
-CONFIG_HABANA_AI=m
-CONFIG_UACCE=m
-# end of Misc devices
-
-CONFIG_HAVE_IDE=y
-# CONFIG_IDE is not set
-
-#
-# SCSI device support
-#
-CONFIG_SCSI_MOD=y
-CONFIG_RAID_ATTRS=m
-CONFIG_SCSI=y
-CONFIG_SCSI_DMA=y
-CONFIG_SCSI_NETLINK=y
-CONFIG_SCSI_PROC_FS=y
-
-#
-# SCSI support type (disk, tape, CD-ROM)
-#
-CONFIG_BLK_DEV_SD=y
-CONFIG_CHR_DEV_ST=m
-CONFIG_BLK_DEV_SR=m
-CONFIG_CHR_DEV_SG=m
-CONFIG_CHR_DEV_SCH=m
-CONFIG_SCSI_ENCLOSURE=m
-CONFIG_SCSI_CONSTANTS=y
-CONFIG_SCSI_LOGGING=y
-CONFIG_SCSI_SCAN_ASYNC=y
-
-#
-# SCSI Transports
-#
-CONFIG_SCSI_SPI_ATTRS=m
-CONFIG_SCSI_FC_ATTRS=m
-CONFIG_SCSI_ISCSI_ATTRS=m
-CONFIG_SCSI_SAS_ATTRS=m
-CONFIG_SCSI_SAS_LIBSAS=m
-CONFIG_SCSI_SAS_ATA=y
-CONFIG_SCSI_SAS_HOST_SMP=y
-CONFIG_SCSI_SRP_ATTRS=m
-# end of SCSI Transports
-
-CONFIG_SCSI_LOWLEVEL=y
-CONFIG_ISCSI_TCP=m
-CONFIG_ISCSI_BOOT_SYSFS=m
-CONFIG_SCSI_CXGB3_ISCSI=m
-CONFIG_SCSI_CXGB4_ISCSI=m
-CONFIG_SCSI_BNX2_ISCSI=m
-CONFIG_SCSI_BNX2X_FCOE=m
-CONFIG_BE2ISCSI=m
-CONFIG_BLK_DEV_3W_XXXX_RAID=m
-CONFIG_SCSI_HPSA=m
-CONFIG_SCSI_3W_9XXX=m
-CONFIG_SCSI_3W_SAS=m
-CONFIG_SCSI_ACARD=m
-CONFIG_SCSI_AACRAID=m
-CONFIG_SCSI_AIC7XXX=m
-CONFIG_AIC7XXX_CMDS_PER_DEVICE=32
-CONFIG_AIC7XXX_RESET_DELAY_MS=15000
-CONFIG_AIC7XXX_DEBUG_ENABLE=y
-CONFIG_AIC7XXX_DEBUG_MASK=0
-CONFIG_AIC7XXX_REG_PRETTY_PRINT=y
-CONFIG_SCSI_AIC79XX=m
-CONFIG_AIC79XX_CMDS_PER_DEVICE=32
-CONFIG_AIC79XX_RESET_DELAY_MS=15000
-CONFIG_AIC79XX_DEBUG_ENABLE=y
-CONFIG_AIC79XX_DEBUG_MASK=0
-CONFIG_AIC79XX_REG_PRETTY_PRINT=y
-CONFIG_SCSI_AIC94XX=m
-CONFIG_AIC94XX_DEBUG=y
-CONFIG_SCSI_MVSAS=m
-CONFIG_SCSI_MVSAS_DEBUG=y
-CONFIG_SCSI_MVSAS_TASKLET=y
-CONFIG_SCSI_MVUMI=m
-CONFIG_SCSI_DPT_I2O=m
-CONFIG_SCSI_ADVANSYS=m
-CONFIG_SCSI_ARCMSR=m
-CONFIG_SCSI_ESAS2R=m
-CONFIG_MEGARAID_NEWGEN=y
-CONFIG_MEGARAID_MM=m
-CONFIG_MEGARAID_MAILBOX=m
-CONFIG_MEGARAID_LEGACY=m
-CONFIG_MEGARAID_SAS=m
-CONFIG_SCSI_MPT3SAS=m
-CONFIG_SCSI_MPT2SAS_MAX_SGE=128
-CONFIG_SCSI_MPT3SAS_MAX_SGE=128
-CONFIG_SCSI_MPT2SAS=m
-CONFIG_SCSI_SMARTPQI=m
-CONFIG_SCSI_UFSHCD=m
-CONFIG_SCSI_UFSHCD_PCI=m
-# CONFIG_SCSI_UFS_DWC_TC_PCI is not set
-CONFIG_SCSI_UFSHCD_PLATFORM=m
-CONFIG_SCSI_UFS_CDNS_PLATFORM=m
-# CONFIG_SCSI_UFS_DWC_TC_PLATFORM is not set
-CONFIG_SCSI_UFS_BSG=y
-CONFIG_SCSI_HPTIOP=m
-CONFIG_SCSI_BUSLOGIC=m
-CONFIG_SCSI_FLASHPOINT=y
-CONFIG_SCSI_MYRB=m
-CONFIG_SCSI_MYRS=m
-CONFIG_VMWARE_PVSCSI=m
-CONFIG_XEN_SCSI_FRONTEND=m
-CONFIG_HYPERV_STORAGE=m
-CONFIG_LIBFC=m
-CONFIG_LIBFCOE=m
-CONFIG_FCOE=m
-CONFIG_FCOE_FNIC=m
-CONFIG_SCSI_SNIC=m
-# CONFIG_SCSI_SNIC_DEBUG_FS is not set
-CONFIG_SCSI_DMX3191D=m
-CONFIG_SCSI_FDOMAIN=m
-CONFIG_SCSI_FDOMAIN_PCI=m
-CONFIG_SCSI_GDTH=m
-CONFIG_SCSI_ISCI=m
-CONFIG_SCSI_IPS=m
-CONFIG_SCSI_INITIO=m
-CONFIG_SCSI_INIA100=m
-CONFIG_SCSI_PPA=m
-CONFIG_SCSI_IMM=m
-# CONFIG_SCSI_IZIP_EPP16 is not set
-# CONFIG_SCSI_IZIP_SLOW_CTR is not set
-CONFIG_SCSI_STEX=m
-CONFIG_SCSI_SYM53C8XX_2=m
-CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1
-CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
-CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
-CONFIG_SCSI_SYM53C8XX_MMIO=y
-CONFIG_SCSI_IPR=m
-CONFIG_SCSI_IPR_TRACE=y
-CONFIG_SCSI_IPR_DUMP=y
-CONFIG_SCSI_QLOGIC_1280=m
-CONFIG_SCSI_QLA_FC=m
-CONFIG_TCM_QLA2XXX=m
-# CONFIG_TCM_QLA2XXX_DEBUG is not set
-CONFIG_SCSI_QLA_ISCSI=m
-CONFIG_QEDI=m
-CONFIG_QEDF=m
-CONFIG_SCSI_LPFC=m
-# CONFIG_SCSI_LPFC_DEBUG_FS is not set
-CONFIG_SCSI_DC395x=m
-CONFIG_SCSI_AM53C974=m
-CONFIG_SCSI_WD719X=m
-CONFIG_SCSI_DEBUG=m
-CONFIG_SCSI_PMCRAID=m
-CONFIG_SCSI_PM8001=m
-CONFIG_SCSI_BFA_FC=m
-CONFIG_SCSI_VIRTIO=m
-CONFIG_SCSI_CHELSIO_FCOE=m
-CONFIG_SCSI_LOWLEVEL_PCMCIA=y
-CONFIG_PCMCIA_AHA152X=m
-CONFIG_PCMCIA_FDOMAIN=m
-CONFIG_PCMCIA_QLOGIC=m
-CONFIG_PCMCIA_SYM53C500=m
-CONFIG_SCSI_DH=y
-CONFIG_SCSI_DH_RDAC=m
-CONFIG_SCSI_DH_HP_SW=m
-CONFIG_SCSI_DH_EMC=m
-CONFIG_SCSI_DH_ALUA=m
-# end of SCSI device support
-
-CONFIG_ATA=y
-CONFIG_SATA_HOST=y
-CONFIG_PATA_TIMINGS=y
-CONFIG_ATA_VERBOSE_ERROR=y
-CONFIG_ATA_FORCE=y
-CONFIG_ATA_ACPI=y
-CONFIG_SATA_ZPODD=y
-CONFIG_SATA_PMP=y
-
-#
-# Controllers with non-SFF native interface
-#
-CONFIG_SATA_AHCI=y
-CONFIG_SATA_MOBILE_LPM_POLICY=3
-CONFIG_SATA_AHCI_PLATFORM=m
-CONFIG_AHCI_CEVA=m
-CONFIG_AHCI_QORIQ=m
-CONFIG_SATA_INIC162X=m
-CONFIG_SATA_ACARD_AHCI=m
-CONFIG_SATA_SIL24=m
-CONFIG_ATA_SFF=y
-
-#
-# SFF controllers with custom DMA interface
-#
-CONFIG_PDC_ADMA=m
-CONFIG_SATA_QSTOR=m
-CONFIG_SATA_SX4=m
-CONFIG_ATA_BMDMA=y
-
-#
-# SATA SFF controllers with BMDMA
-#
-CONFIG_ATA_PIIX=m
-CONFIG_SATA_DWC=m
-# CONFIG_SATA_DWC_OLD_DMA is not set
-# CONFIG_SATA_DWC_DEBUG is not set
-CONFIG_SATA_MV=m
-CONFIG_SATA_NV=m
-CONFIG_SATA_PROMISE=m
-CONFIG_SATA_SIL=m
-CONFIG_SATA_SIS=m
-CONFIG_SATA_SVW=m
-CONFIG_SATA_ULI=m
-CONFIG_SATA_VIA=m
-CONFIG_SATA_VITESSE=m
-
-#
-# PATA SFF controllers with BMDMA
-#
-CONFIG_PATA_ALI=m
-CONFIG_PATA_AMD=m
-CONFIG_PATA_ARTOP=m
-CONFIG_PATA_ATIIXP=m
-CONFIG_PATA_ATP867X=m
-CONFIG_PATA_CMD64X=m
-CONFIG_PATA_CYPRESS=m
-CONFIG_PATA_EFAR=m
-CONFIG_PATA_HPT366=m
-CONFIG_PATA_HPT37X=m
-CONFIG_PATA_HPT3X2N=m
-CONFIG_PATA_HPT3X3=m
-CONFIG_PATA_HPT3X3_DMA=y
-CONFIG_PATA_IT8213=m
-CONFIG_PATA_IT821X=m
-CONFIG_PATA_JMICRON=m
-CONFIG_PATA_MARVELL=m
-CONFIG_PATA_NETCELL=m
-CONFIG_PATA_NINJA32=m
-CONFIG_PATA_NS87415=m
-CONFIG_PATA_OLDPIIX=m
-CONFIG_PATA_OPTIDMA=m
-CONFIG_PATA_PDC2027X=m
-CONFIG_PATA_PDC_OLD=m
-CONFIG_PATA_RADISYS=m
-CONFIG_PATA_RDC=m
-CONFIG_PATA_SCH=m
-CONFIG_PATA_SERVERWORKS=m
-CONFIG_PATA_SIL680=m
-CONFIG_PATA_SIS=m
-CONFIG_PATA_TOSHIBA=m
-CONFIG_PATA_TRIFLEX=m
-CONFIG_PATA_VIA=m
-CONFIG_PATA_WINBOND=m
-
-#
-# PIO-only SFF controllers
-#
-CONFIG_PATA_CMD640_PCI=m
-CONFIG_PATA_MPIIX=m
-CONFIG_PATA_NS87410=m
-CONFIG_PATA_OPTI=m
-CONFIG_PATA_PCMCIA=m
-# CONFIG_PATA_PLATFORM is not set
-CONFIG_PATA_RZ1000=m
-
-#
-# Generic fallback / legacy drivers
-#
-CONFIG_PATA_ACPI=m
-CONFIG_ATA_GENERIC=m
-CONFIG_PATA_LEGACY=m
-CONFIG_MD=y
-CONFIG_BLK_DEV_MD=m
-CONFIG_MD_LINEAR=m
-CONFIG_MD_RAID0=m
-CONFIG_MD_RAID1=m
-CONFIG_MD_RAID10=m
-CONFIG_MD_RAID456=m
-CONFIG_MD_MULTIPATH=m
-CONFIG_MD_FAULTY=m
-CONFIG_MD_CLUSTER=m
-CONFIG_BCACHE=m
-# CONFIG_BCACHE_DEBUG is not set
-# CONFIG_BCACHE_CLOSURES_DEBUG is not set
-CONFIG_BCACHE_ASYNC_REGISTRAION=y
-CONFIG_BLK_DEV_DM_BUILTIN=y
-CONFIG_BLK_DEV_DM=m
-CONFIG_DM_DEBUG=y
-CONFIG_DM_BUFIO=m
-# CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING is not set
-CONFIG_DM_BIO_PRISON=m
-CONFIG_DM_PERSISTENT_DATA=m
-CONFIG_DM_UNSTRIPED=m
-CONFIG_DM_CRYPT=m
-CONFIG_DM_SNAPSHOT=m
-CONFIG_DM_THIN_PROVISIONING=m
-CONFIG_DM_CACHE=m
-CONFIG_DM_CACHE_SMQ=m
-CONFIG_DM_WRITECACHE=m
-CONFIG_DM_EBS=m
-CONFIG_DM_ERA=m
-CONFIG_DM_CLONE=m
-CONFIG_DM_MIRROR=m
-CONFIG_DM_LOG_USERSPACE=m
-CONFIG_DM_RAID=m
-CONFIG_DM_ZERO=m
-CONFIG_DM_MULTIPATH=m
-CONFIG_DM_MULTIPATH_QL=m
-CONFIG_DM_MULTIPATH_ST=m
-CONFIG_DM_MULTIPATH_HST=m
-CONFIG_DM_DELAY=m
-CONFIG_DM_DUST=m
-CONFIG_DM_UEVENT=y
-CONFIG_DM_FLAKEY=m
-CONFIG_DM_VERITY=m
-CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG=y
-CONFIG_DM_VERITY_FEC=y
-CONFIG_DM_SWITCH=m
-CONFIG_DM_LOG_WRITES=m
-CONFIG_DM_INTEGRITY=m
-CONFIG_DM_ZONED=m
-CONFIG_TARGET_CORE=m
-CONFIG_TCM_IBLOCK=m
-CONFIG_TCM_FILEIO=m
-CONFIG_TCM_PSCSI=m
-CONFIG_TCM_USER2=m
-CONFIG_LOOPBACK_TARGET=m
-CONFIG_TCM_FC=m
-CONFIG_ISCSI_TARGET=m
-CONFIG_ISCSI_TARGET_CXGB4=m
-CONFIG_SBP_TARGET=m
-CONFIG_FUSION=y
-CONFIG_FUSION_SPI=m
-CONFIG_FUSION_FC=m
-CONFIG_FUSION_SAS=m
-CONFIG_FUSION_MAX_SGE=128
-CONFIG_FUSION_CTL=m
-CONFIG_FUSION_LAN=m
-# CONFIG_FUSION_LOGGING is not set
-
-#
-# IEEE 1394 (FireWire) support
-#
-CONFIG_FIREWIRE=m
-CONFIG_FIREWIRE_OHCI=m
-CONFIG_FIREWIRE_SBP2=m
-CONFIG_FIREWIRE_NET=m
-CONFIG_FIREWIRE_NOSY=m
-# end of IEEE 1394 (FireWire) support
-
-CONFIG_MACINTOSH_DRIVERS=y
-CONFIG_MAC_EMUMOUSEBTN=m
-CONFIG_NETDEVICES=y
-CONFIG_MII=m
-CONFIG_NET_CORE=y
-CONFIG_BONDING=m
-CONFIG_DUMMY=m
-CONFIG_WIREGUARD=m
-# CONFIG_WIREGUARD_DEBUG is not set
-CONFIG_EQUALIZER=m
-CONFIG_NET_FC=y
-CONFIG_IFB=m
-CONFIG_NET_TEAM=m
-CONFIG_NET_TEAM_MODE_BROADCAST=m
-CONFIG_NET_TEAM_MODE_ROUNDROBIN=m
-CONFIG_NET_TEAM_MODE_RANDOM=m
-CONFIG_NET_TEAM_MODE_ACTIVEBACKUP=m
-CONFIG_NET_TEAM_MODE_LOADBALANCE=m
-CONFIG_MACVLAN=m
-CONFIG_MACVTAP=m
-CONFIG_IPVLAN_L3S=y
-CONFIG_IPVLAN=m
-CONFIG_IPVTAP=m
-CONFIG_VXLAN=m
-CONFIG_GENEVE=m
-CONFIG_BAREUDP=m
-CONFIG_GTP=m
-CONFIG_MACSEC=m
-CONFIG_NETCONSOLE=m
-CONFIG_NETCONSOLE_DYNAMIC=y
-CONFIG_NETPOLL=y
-CONFIG_NET_POLL_CONTROLLER=y
-CONFIG_NTB_NETDEV=m
-CONFIG_RIONET=m
-CONFIG_RIONET_TX_SIZE=128
-CONFIG_RIONET_RX_SIZE=128
-CONFIG_TUN=m
-CONFIG_TAP=m
-# CONFIG_TUN_VNET_CROSS_LE is not set
-CONFIG_VETH=m
-CONFIG_VIRTIO_NET=m
-CONFIG_NLMON=m
-CONFIG_NET_VRF=m
-CONFIG_VSOCKMON=m
-CONFIG_SUNGEM_PHY=m
-# CONFIG_ARCNET is not set
-CONFIG_ATM_DRIVERS=y
-# CONFIG_ATM_DUMMY is not set
-CONFIG_ATM_TCP=m
-CONFIG_ATM_LANAI=m
-CONFIG_ATM_ENI=m
-# CONFIG_ATM_ENI_DEBUG is not set
-# CONFIG_ATM_ENI_TUNE_BURST is not set
-CONFIG_ATM_FIRESTREAM=m
-CONFIG_ATM_ZATM=m
-# CONFIG_ATM_ZATM_DEBUG is not set
-CONFIG_ATM_NICSTAR=m
-# CONFIG_ATM_NICSTAR_USE_SUNI is not set
-# CONFIG_ATM_NICSTAR_USE_IDT77105 is not set
-CONFIG_ATM_IDT77252=m
-# CONFIG_ATM_IDT77252_DEBUG is not set
-# CONFIG_ATM_IDT77252_RCV_ALL is not set
-CONFIG_ATM_IDT77252_USE_SUNI=y
-CONFIG_ATM_AMBASSADOR=m
-# CONFIG_ATM_AMBASSADOR_DEBUG is not set
-CONFIG_ATM_HORIZON=m
-# CONFIG_ATM_HORIZON_DEBUG is not set
-CONFIG_ATM_IA=m
-# CONFIG_ATM_IA_DEBUG is not set
-CONFIG_ATM_FORE200E=m
-CONFIG_ATM_FORE200E_USE_TASKLET=y
-CONFIG_ATM_FORE200E_TX_RETRY=16
-CONFIG_ATM_FORE200E_DEBUG=0
-CONFIG_ATM_HE=m
-CONFIG_ATM_HE_USE_SUNI=y
-CONFIG_ATM_SOLOS=m
-CONFIG_CAIF_DRIVERS=y
-CONFIG_CAIF_TTY=m
-CONFIG_CAIF_SPI_SLAVE=m
-CONFIG_CAIF_SPI_SYNC=y
-CONFIG_CAIF_HSI=m
-CONFIG_CAIF_VIRTIO=m
-
-#
-# Distributed Switch Architecture drivers
-#
-CONFIG_B53=m
-# CONFIG_B53_SPI_DRIVER is not set
-CONFIG_B53_MDIO_DRIVER=m
-CONFIG_B53_MMAP_DRIVER=m
-CONFIG_B53_SRAB_DRIVER=m
-CONFIG_B53_SERDES=m
-CONFIG_NET_DSA_BCM_SF2=m
-CONFIG_NET_DSA_LOOP=m
-CONFIG_NET_DSA_LANTIQ_GSWIP=m
-CONFIG_NET_DSA_MT7530=m
-CONFIG_NET_DSA_MV88E6060=m
-CONFIG_NET_DSA_MICROCHIP_KSZ_COMMON=m
-CONFIG_NET_DSA_MICROCHIP_KSZ9477=m
-CONFIG_NET_DSA_MICROCHIP_KSZ9477_I2C=m
-CONFIG_NET_DSA_MICROCHIP_KSZ9477_SPI=m
-CONFIG_NET_DSA_MICROCHIP_KSZ8795=m
-CONFIG_NET_DSA_MICROCHIP_KSZ8795_SPI=m
-CONFIG_NET_DSA_MV88E6XXX=m
-CONFIG_NET_DSA_MV88E6XXX_GLOBAL2=y
-CONFIG_NET_DSA_MV88E6XXX_PTP=y
-CONFIG_NET_DSA_AR9331=m
-CONFIG_NET_DSA_SJA1105=m
-CONFIG_NET_DSA_SJA1105_PTP=y
-CONFIG_NET_DSA_SJA1105_TAS=y
-CONFIG_NET_DSA_SJA1105_VL=y
-CONFIG_NET_DSA_QCA8K=m
-CONFIG_NET_DSA_REALTEK_SMI=m
-CONFIG_NET_DSA_SMSC_LAN9303=m
-CONFIG_NET_DSA_SMSC_LAN9303_I2C=m
-CONFIG_NET_DSA_SMSC_LAN9303_MDIO=m
-CONFIG_NET_DSA_VITESSE_VSC73XX=m
-CONFIG_NET_DSA_VITESSE_VSC73XX_SPI=m
-CONFIG_NET_DSA_VITESSE_VSC73XX_PLATFORM=m
-# end of Distributed Switch Architecture drivers
-
-CONFIG_ETHERNET=y
-CONFIG_MDIO=m
-CONFIG_NET_VENDOR_3COM=y
-CONFIG_PCMCIA_3C574=m
-CONFIG_PCMCIA_3C589=m
-CONFIG_VORTEX=m
-CONFIG_TYPHOON=m
-CONFIG_NET_VENDOR_ADAPTEC=y
-CONFIG_ADAPTEC_STARFIRE=m
-CONFIG_NET_VENDOR_AGERE=y
-CONFIG_ET131X=m
-CONFIG_NET_VENDOR_ALACRITECH=y
-CONFIG_SLICOSS=m
-CONFIG_NET_VENDOR_ALTEON=y
-CONFIG_ACENIC=m
-# CONFIG_ACENIC_OMIT_TIGON_I is not set
-CONFIG_ALTERA_TSE=m
-CONFIG_NET_VENDOR_AMAZON=y
-CONFIG_ENA_ETHERNET=m
-CONFIG_NET_VENDOR_AMD=y
-CONFIG_AMD8111_ETH=m
-CONFIG_PCNET32=m
-CONFIG_PCMCIA_NMCLAN=m
-CONFIG_AMD_XGBE=m
-CONFIG_AMD_XGBE_DCB=y
-CONFIG_AMD_XGBE_HAVE_ECC=y
-CONFIG_NET_VENDOR_AQUANTIA=y
-CONFIG_AQTION=m
-CONFIG_NET_VENDOR_ARC=y
-CONFIG_NET_VENDOR_ATHEROS=y
-CONFIG_ATL2=m
-CONFIG_ATL1=m
-CONFIG_ATL1E=m
-CONFIG_ATL1C=m
-CONFIG_ALX=m
-CONFIG_NET_VENDOR_AURORA=y
-CONFIG_AURORA_NB8800=m
-CONFIG_NET_VENDOR_BROADCOM=y
-CONFIG_B44=m
-CONFIG_B44_PCI_AUTOSELECT=y
-CONFIG_B44_PCICORE_AUTOSELECT=y
-CONFIG_B44_PCI=y
-CONFIG_BCMGENET=m
-CONFIG_BNX2=m
-CONFIG_CNIC=m
-CONFIG_TIGON3=m
-CONFIG_TIGON3_HWMON=y
-CONFIG_BNX2X=m
-CONFIG_BNX2X_SRIOV=y
-CONFIG_SYSTEMPORT=m
-CONFIG_BNXT=m
-CONFIG_BNXT_SRIOV=y
-CONFIG_BNXT_FLOWER_OFFLOAD=y
-CONFIG_BNXT_DCB=y
-CONFIG_BNXT_HWMON=y
-CONFIG_NET_VENDOR_BROCADE=y
-CONFIG_BNA=m
-CONFIG_NET_VENDOR_CADENCE=y
-CONFIG_MACB=m
-CONFIG_MACB_USE_HWSTAMP=y
-CONFIG_MACB_PCI=m
-CONFIG_NET_VENDOR_CAVIUM=y
-CONFIG_THUNDER_NIC_PF=m
-CONFIG_THUNDER_NIC_VF=m
-CONFIG_THUNDER_NIC_BGX=m
-CONFIG_THUNDER_NIC_RGX=m
-CONFIG_CAVIUM_PTP=m
-CONFIG_LIQUIDIO=m
-CONFIG_LIQUIDIO_VF=m
-CONFIG_NET_VENDOR_CHELSIO=y
-CONFIG_CHELSIO_T1=m
-CONFIG_CHELSIO_T1_1G=y
-CONFIG_CHELSIO_T3=m
-CONFIG_CHELSIO_T4=m
-CONFIG_CHELSIO_T4_DCB=y
-CONFIG_CHELSIO_T4_FCOE=y
-CONFIG_CHELSIO_T4VF=m
-CONFIG_CHELSIO_LIB=m
-CONFIG_NET_VENDOR_CISCO=y
-CONFIG_ENIC=m
-CONFIG_NET_VENDOR_CORTINA=y
-CONFIG_GEMINI_ETHERNET=m
-CONFIG_CX_ECAT=m
-CONFIG_DNET=m
-CONFIG_NET_VENDOR_DEC=y
-CONFIG_NET_TULIP=y
-CONFIG_DE2104X=m
-CONFIG_DE2104X_DSL=0
-CONFIG_TULIP=m
-CONFIG_TULIP_MWI=y
-CONFIG_TULIP_MMIO=y
-CONFIG_TULIP_NAPI=y
-CONFIG_TULIP_NAPI_HW_MITIGATION=y
-CONFIG_DE4X5=m
-CONFIG_WINBOND_840=m
-CONFIG_DM9102=m
-CONFIG_ULI526X=m
-CONFIG_PCMCIA_XIRCOM=m
-CONFIG_NET_VENDOR_DLINK=y
-CONFIG_DL2K=m
-CONFIG_SUNDANCE=m
-# CONFIG_SUNDANCE_MMIO is not set
-CONFIG_NET_VENDOR_EMULEX=y
-CONFIG_BE2NET=m
-CONFIG_BE2NET_HWMON=y
-CONFIG_BE2NET_BE2=y
-CONFIG_BE2NET_BE3=y
-CONFIG_BE2NET_LANCER=y
-CONFIG_BE2NET_SKYHAWK=y
-CONFIG_NET_VENDOR_EZCHIP=y
-CONFIG_EZCHIP_NPS_MANAGEMENT_ENET=m
-CONFIG_NET_VENDOR_FUJITSU=y
-CONFIG_PCMCIA_FMVJ18X=m
-CONFIG_NET_VENDOR_GOOGLE=y
-CONFIG_GVE=m
-CONFIG_NET_VENDOR_HUAWEI=y
-CONFIG_HINIC=m
-CONFIG_NET_VENDOR_I825XX=y
-CONFIG_NET_VENDOR_INTEL=y
-CONFIG_E100=m
-CONFIG_E1000=m
-CONFIG_E1000E=m
-CONFIG_E1000E_HWTS=y
-CONFIG_IGB=m
-CONFIG_IGB_HWMON=y
-CONFIG_IGB_DCA=y
-CONFIG_IGBVF=m
-CONFIG_IXGB=m
-CONFIG_IXGBE=m
-CONFIG_IXGBE_HWMON=y
-CONFIG_IXGBE_DCA=y
-CONFIG_IXGBE_DCB=y
-# CONFIG_IXGBE_IPSEC is not set
-CONFIG_IXGBEVF=m
-CONFIG_IXGBEVF_IPSEC=y
-CONFIG_I40E=m
-CONFIG_I40E_DCB=y
-CONFIG_IAVF=m
-CONFIG_I40EVF=m
-CONFIG_ICE=m
-CONFIG_FM10K=m
-CONFIG_IGC=m
-CONFIG_JME=m
-CONFIG_NET_VENDOR_MARVELL=y
-CONFIG_MVMDIO=m
-CONFIG_SKGE=m
-# CONFIG_SKGE_DEBUG is not set
-CONFIG_SKGE_GENESIS=y
-CONFIG_SKY2=m
-# CONFIG_SKY2_DEBUG is not set
-CONFIG_NET_VENDOR_MELLANOX=y
-CONFIG_MLX4_EN=m
-CONFIG_MLX4_EN_DCB=y
-CONFIG_MLX4_CORE=m
-CONFIG_MLX4_DEBUG=y
-CONFIG_MLX4_CORE_GEN2=y
-CONFIG_MLX5_CORE=m
-CONFIG_MLX5_ACCEL=y
-CONFIG_MLX5_FPGA=y
-CONFIG_MLX5_CORE_EN=y
-CONFIG_MLX5_EN_ARFS=y
-CONFIG_MLX5_EN_RXNFC=y
-CONFIG_MLX5_MPFS=y
-CONFIG_MLX5_ESWITCH=y
-CONFIG_MLX5_CLS_ACT=y
-CONFIG_MLX5_TC_CT=y
-CONFIG_MLX5_CORE_EN_DCB=y
-CONFIG_MLX5_CORE_IPOIB=y
-CONFIG_MLX5_FPGA_IPSEC=y
-CONFIG_MLX5_EN_IPSEC=y
-CONFIG_MLX5_FPGA_TLS=y
-CONFIG_MLX5_TLS=y
-CONFIG_MLX5_EN_TLS=y
-CONFIG_MLX5_SW_STEERING=y
-CONFIG_MLXSW_CORE=m
-CONFIG_MLXSW_CORE_HWMON=y
-CONFIG_MLXSW_CORE_THERMAL=y
-CONFIG_MLXSW_PCI=m
-CONFIG_MLXSW_I2C=m
-CONFIG_MLXSW_SWITCHIB=m
-CONFIG_MLXSW_SWITCHX2=m
-CONFIG_MLXSW_SPECTRUM=m
-CONFIG_MLXSW_SPECTRUM_DCB=y
-CONFIG_MLXSW_MINIMAL=m
-CONFIG_MLXFW=m
-CONFIG_NET_VENDOR_MICREL=y
-CONFIG_KS8842=m
-CONFIG_KS8851=m
-CONFIG_KS8851_MLL=m
-CONFIG_KSZ884X_PCI=m
-CONFIG_NET_VENDOR_MICROCHIP=y
-CONFIG_ENC28J60=m
-# CONFIG_ENC28J60_WRITEVERIFY is not set
-CONFIG_ENCX24J600=m
-CONFIG_LAN743X=m
-CONFIG_NET_VENDOR_MICROSEMI=y
-CONFIG_MSCC_OCELOT_SWITCH=m
-CONFIG_MSCC_OCELOT_SWITCH_OCELOT=m
-CONFIG_NET_VENDOR_MYRI=y
-CONFIG_MYRI10GE=m
-CONFIG_MYRI10GE_DCA=y
-CONFIG_FEALNX=m
-CONFIG_NET_VENDOR_NATSEMI=y
-CONFIG_NATSEMI=m
-CONFIG_NS83820=m
-CONFIG_NET_VENDOR_NETERION=y
-CONFIG_S2IO=m
-CONFIG_VXGE=m
-# CONFIG_VXGE_DEBUG_TRACE_ALL is not set
-CONFIG_NET_VENDOR_NETRONOME=y
-CONFIG_NFP=m
-CONFIG_NFP_APP_FLOWER=y
-CONFIG_NFP_APP_ABM_NIC=y
-# CONFIG_NFP_DEBUG is not set
-CONFIG_NET_VENDOR_NI=y
-CONFIG_NI_XGE_MANAGEMENT_ENET=m
-CONFIG_NET_VENDOR_8390=y
-CONFIG_PCMCIA_AXNET=m
-CONFIG_NE2K_PCI=m
-CONFIG_PCMCIA_PCNET=m
-CONFIG_NET_VENDOR_NVIDIA=y
-CONFIG_FORCEDETH=m
-CONFIG_NET_VENDOR_OKI=y
-CONFIG_ETHOC=m
-CONFIG_NET_VENDOR_PACKET_ENGINES=y
-CONFIG_HAMACHI=m
-CONFIG_YELLOWFIN=m
-CONFIG_NET_VENDOR_PENSANDO=y
-CONFIG_IONIC=m
-CONFIG_NET_VENDOR_QLOGIC=y
-CONFIG_QLA3XXX=m
-CONFIG_QLCNIC=m
-CONFIG_QLCNIC_SRIOV=y
-CONFIG_QLCNIC_DCB=y
-CONFIG_QLCNIC_HWMON=y
-CONFIG_NETXEN_NIC=m
-CONFIG_QED=m
-CONFIG_QED_LL2=y
-CONFIG_QED_SRIOV=y
-CONFIG_QEDE=m
-CONFIG_QED_RDMA=y
-CONFIG_QED_ISCSI=y
-CONFIG_QED_FCOE=y
-CONFIG_QED_OOO=y
-CONFIG_NET_VENDOR_QUALCOMM=y
-CONFIG_QCA7000=m
-CONFIG_QCA7000_SPI=m
-CONFIG_QCA7000_UART=m
-CONFIG_QCOM_EMAC=m
-CONFIG_RMNET=m
-CONFIG_NET_VENDOR_RDC=y
-CONFIG_R6040=m
-CONFIG_NET_VENDOR_REALTEK=y
-CONFIG_ATP=m
-CONFIG_8139CP=m
-CONFIG_8139TOO=m
-# CONFIG_8139TOO_PIO is not set
-CONFIG_8139TOO_TUNE_TWISTER=y
-CONFIG_8139TOO_8129=y
-# CONFIG_8139_OLD_RX_RESET is not set
-CONFIG_R8169=m
-CONFIG_NET_VENDOR_RENESAS=y
-CONFIG_NET_VENDOR_ROCKER=y
-CONFIG_ROCKER=m
-CONFIG_NET_VENDOR_SAMSUNG=y
-CONFIG_SXGBE_ETH=m
-CONFIG_NET_VENDOR_SEEQ=y
-CONFIG_NET_VENDOR_SOLARFLARE=y
-CONFIG_SFC=m
-CONFIG_SFC_MTD=y
-CONFIG_SFC_MCDI_MON=y
-CONFIG_SFC_SRIOV=y
-CONFIG_SFC_MCDI_LOGGING=y
-CONFIG_SFC_FALCON=m
-CONFIG_SFC_FALCON_MTD=y
-CONFIG_NET_VENDOR_SILAN=y
-CONFIG_SC92031=m
-CONFIG_NET_VENDOR_SIS=y
-CONFIG_SIS900=m
-CONFIG_SIS190=m
-CONFIG_NET_VENDOR_SMSC=y
-CONFIG_PCMCIA_SMC91C92=m
-CONFIG_EPIC100=m
-CONFIG_SMSC911X=m
-CONFIG_SMSC9420=m
-CONFIG_NET_VENDOR_SOCIONEXT=y
-CONFIG_NET_VENDOR_STMICRO=y
-CONFIG_STMMAC_ETH=m
-# CONFIG_STMMAC_SELFTESTS is not set
-CONFIG_STMMAC_PLATFORM=m
-CONFIG_DWMAC_DWC_QOS_ETH=m
-CONFIG_DWMAC_GENERIC=m
-CONFIG_DWMAC_INTEL=m
-CONFIG_STMMAC_PCI=m
-CONFIG_NET_VENDOR_SUN=y
-CONFIG_HAPPYMEAL=m
-CONFIG_SUNGEM=m
-CONFIG_CASSINI=m
-CONFIG_NIU=m
-CONFIG_NET_VENDOR_SYNOPSYS=y
-CONFIG_DWC_XLGMAC=m
-CONFIG_DWC_XLGMAC_PCI=m
-CONFIG_NET_VENDOR_TEHUTI=y
-CONFIG_TEHUTI=m
-CONFIG_NET_VENDOR_TI=y
-# CONFIG_TI_CPSW_PHY_SEL is not set
-CONFIG_TLAN=m
-CONFIG_NET_VENDOR_VIA=y
-CONFIG_VIA_RHINE=m
-CONFIG_VIA_RHINE_MMIO=y
-CONFIG_VIA_VELOCITY=m
-CONFIG_NET_VENDOR_WIZNET=y
-CONFIG_WIZNET_W5100=m
-CONFIG_WIZNET_W5300=m
-# CONFIG_WIZNET_BUS_DIRECT is not set
-# CONFIG_WIZNET_BUS_INDIRECT is not set
-CONFIG_WIZNET_BUS_ANY=y
-CONFIG_WIZNET_W5100_SPI=m
-CONFIG_NET_VENDOR_XILINX=y
-CONFIG_XILINX_AXI_EMAC=m
-CONFIG_XILINX_LL_TEMAC=m
-CONFIG_NET_VENDOR_XIRCOM=y
-CONFIG_PCMCIA_XIRC2PS=m
-CONFIG_FDDI=m
-CONFIG_DEFXX=m
-CONFIG_DEFXX_MMIO=y
-CONFIG_SKFP=m
-# CONFIG_HIPPI is not set
-CONFIG_NET_SB1000=m
-CONFIG_MDIO_DEVICE=m
-CONFIG_MDIO_BUS=m
-CONFIG_MDIO_BCM_UNIMAC=m
-CONFIG_MDIO_BITBANG=m
-CONFIG_MDIO_BUS_MUX=m
-CONFIG_MDIO_BUS_MUX_GPIO=m
-CONFIG_MDIO_BUS_MUX_MMIOREG=m
-CONFIG_MDIO_BUS_MUX_MULTIPLEXER=m
-CONFIG_MDIO_CAVIUM=m
-CONFIG_MDIO_GPIO=m
-CONFIG_MDIO_HISI_FEMAC=m
-CONFIG_MDIO_I2C=m
-CONFIG_MDIO_IPQ4019=m
-CONFIG_MDIO_IPQ8064=m
-CONFIG_MDIO_MSCC_MIIM=m
-CONFIG_MDIO_MVUSB=m
-CONFIG_MDIO_OCTEON=m
-CONFIG_MDIO_THUNDER=m
-CONFIG_MDIO_XPCS=m
-CONFIG_PHYLINK=m
-CONFIG_PHYLIB=m
-CONFIG_SWPHY=y
-CONFIG_LED_TRIGGER_PHY=y
-
-#
-# MII PHY device drivers
-#
-CONFIG_SFP=m
-CONFIG_ADIN_PHY=m
-CONFIG_AMD_PHY=m
-CONFIG_AQUANTIA_PHY=m
-CONFIG_AX88796B_PHY=m
-CONFIG_BCM7XXX_PHY=m
-CONFIG_BCM87XX_PHY=m
-CONFIG_BCM_NET_PHYLIB=m
-CONFIG_BROADCOM_PHY=m
-CONFIG_BCM54140_PHY=m
-CONFIG_BCM84881_PHY=m
-CONFIG_CICADA_PHY=m
-CONFIG_CORTINA_PHY=m
-CONFIG_DAVICOM_PHY=m
-CONFIG_DP83822_PHY=m
-CONFIG_DP83TC811_PHY=m
-CONFIG_DP83848_PHY=m
-CONFIG_DP83867_PHY=m
-CONFIG_DP83869_PHY=m
-CONFIG_FIXED_PHY=m
-CONFIG_ICPLUS_PHY=m
-CONFIG_INTEL_XWAY_PHY=m
-CONFIG_LSI_ET1011C_PHY=m
-CONFIG_LXT_PHY=m
-CONFIG_MARVELL_PHY=m
-CONFIG_MARVELL_10G_PHY=m
-CONFIG_MICREL_PHY=m
-CONFIG_MICROCHIP_PHY=m
-CONFIG_MICROCHIP_T1_PHY=m
-CONFIG_MICROSEMI_PHY=m
-CONFIG_NATIONAL_PHY=m
-CONFIG_NXP_TJA11XX_PHY=m
-CONFIG_AT803X_PHY=m
-CONFIG_QSEMI_PHY=m
-CONFIG_REALTEK_PHY=m
-CONFIG_RENESAS_PHY=m
-CONFIG_ROCKCHIP_PHY=m
-CONFIG_SMSC_PHY=m
-CONFIG_STE10XP=m
-CONFIG_TERANETICS_PHY=m
-CONFIG_VITESSE_PHY=m
-CONFIG_XILINX_GMII2RGMII=m
-CONFIG_MICREL_KS8995MA=m
-CONFIG_PLIP=m
-CONFIG_PPP=m
-CONFIG_PPP_BSDCOMP=m
-CONFIG_PPP_DEFLATE=m
-CONFIG_PPP_FILTER=y
-CONFIG_PPP_MPPE=m
-CONFIG_PPP_MULTILINK=y
-CONFIG_PPPOATM=m
-CONFIG_PPPOE=m
-CONFIG_PPTP=m
-CONFIG_PPPOL2TP=m
-CONFIG_PPP_ASYNC=m
-CONFIG_PPP_SYNC_TTY=m
-CONFIG_SLIP=m
-CONFIG_SLHC=m
-CONFIG_SLIP_COMPRESSED=y
-CONFIG_SLIP_SMART=y
-CONFIG_SLIP_MODE_SLIP6=y
-CONFIG_USB_NET_DRIVERS=m
-CONFIG_USB_CATC=m
-CONFIG_USB_KAWETH=m
-CONFIG_USB_PEGASUS=m
-CONFIG_USB_RTL8150=m
-CONFIG_USB_RTL8152=m
-CONFIG_USB_LAN78XX=m
-CONFIG_USB_USBNET=m
-CONFIG_USB_NET_AX8817X=m
-CONFIG_USB_NET_AX88179_178A=m
-CONFIG_USB_NET_CDCETHER=m
-CONFIG_USB_NET_CDC_EEM=m
-CONFIG_USB_NET_CDC_NCM=m
-CONFIG_USB_NET_HUAWEI_CDC_NCM=m
-CONFIG_USB_NET_CDC_MBIM=m
-CONFIG_USB_NET_DM9601=m
-CONFIG_USB_NET_SR9700=m
-CONFIG_USB_NET_SR9800=m
-CONFIG_USB_NET_SMSC75XX=m
-CONFIG_USB_NET_SMSC95XX=m
-CONFIG_USB_NET_GL620A=m
-CONFIG_USB_NET_NET1080=m
-CONFIG_USB_NET_PLUSB=m
-CONFIG_USB_NET_MCS7830=m
-CONFIG_USB_NET_RNDIS_HOST=m
-CONFIG_USB_NET_CDC_SUBSET_ENABLE=m
-CONFIG_USB_NET_CDC_SUBSET=m
-CONFIG_USB_ALI_M5632=y
-CONFIG_USB_AN2720=y
-CONFIG_USB_BELKIN=y
-CONFIG_USB_ARMLINUX=y
-CONFIG_USB_EPSON2888=y
-CONFIG_USB_KC2190=y
-CONFIG_USB_NET_ZAURUS=m
-CONFIG_USB_NET_CX82310_ETH=m
-CONFIG_USB_NET_KALMIA=m
-CONFIG_USB_NET_QMI_WWAN=m
-CONFIG_USB_HSO=m
-CONFIG_USB_NET_INT51X1=m
-CONFIG_USB_CDC_PHONET=m
-CONFIG_USB_IPHETH=m
-CONFIG_USB_SIERRA_NET=m
-CONFIG_USB_VL600=m
-CONFIG_USB_NET_CH9200=m
-CONFIG_USB_NET_AQC111=m
-CONFIG_WLAN=y
-# CONFIG_WIRELESS_WDS is not set
-CONFIG_WLAN_VENDOR_ADMTEK=y
-CONFIG_ADM8211=m
-CONFIG_ATH_COMMON=m
-CONFIG_WLAN_VENDOR_ATH=y
-# CONFIG_ATH_DEBUG is not set
-CONFIG_ATH5K=m
-CONFIG_ATH5K_DEBUG=y
-CONFIG_ATH5K_TRACER=y
-CONFIG_ATH5K_PCI=y
-CONFIG_ATH9K_HW=m
-CONFIG_ATH9K_COMMON=m
-CONFIG_ATH9K_COMMON_DEBUG=y
-CONFIG_ATH9K_BTCOEX_SUPPORT=y
-CONFIG_ATH9K=m
-CONFIG_ATH9K_PCI=y
-CONFIG_ATH9K_AHB=y
-CONFIG_ATH9K_DEBUGFS=y
-CONFIG_ATH9K_STATION_STATISTICS=y
-CONFIG_ATH9K_DYNACK=y
-CONFIG_ATH9K_WOW=y
-CONFIG_ATH9K_RFKILL=y
-CONFIG_ATH9K_CHANNEL_CONTEXT=y
-CONFIG_ATH9K_PCOEM=y
-CONFIG_ATH9K_PCI_NO_EEPROM=m
-CONFIG_ATH9K_HTC=m
-CONFIG_ATH9K_HTC_DEBUGFS=y
-CONFIG_ATH9K_HWRNG=y
-CONFIG_ATH9K_COMMON_SPECTRAL=y
-CONFIG_CARL9170=m
-CONFIG_CARL9170_LEDS=y
-CONFIG_CARL9170_DEBUGFS=y
-CONFIG_CARL9170_WPC=y
-# CONFIG_CARL9170_HWRNG is not set
-CONFIG_ATH6KL=m
-CONFIG_ATH6KL_SDIO=m
-CONFIG_ATH6KL_USB=m
-CONFIG_ATH6KL_DEBUG=y
-CONFIG_ATH6KL_TRACING=y
-CONFIG_AR5523=m
-CONFIG_WIL6210=m
-CONFIG_WIL6210_ISR_COR=y
-CONFIG_WIL6210_TRACING=y
-CONFIG_WIL6210_DEBUGFS=y
-CONFIG_ATH10K=m
-CONFIG_ATH10K_CE=y
-CONFIG_ATH10K_PCI=m
-CONFIG_ATH10K_AHB=y
-CONFIG_ATH10K_SDIO=m
-CONFIG_ATH10K_USB=m
-CONFIG_ATH10K_DEBUG=y
-CONFIG_ATH10K_DEBUGFS=y
-CONFIG_ATH10K_SPECTRAL=y
-CONFIG_ATH10K_TRACING=y
-CONFIG_WCN36XX=m
-CONFIG_WCN36XX_DEBUGFS=y
-CONFIG_WLAN_VENDOR_ATMEL=y
-CONFIG_ATMEL=m
-CONFIG_PCI_ATMEL=m
-CONFIG_PCMCIA_ATMEL=m
-CONFIG_AT76C50X_USB=m
-CONFIG_WLAN_VENDOR_BROADCOM=y
-CONFIG_B43=m
-CONFIG_B43_BCMA=y
-CONFIG_B43_SSB=y
-CONFIG_B43_BUSES_BCMA_AND_SSB=y
-# CONFIG_B43_BUSES_BCMA is not set
-# CONFIG_B43_BUSES_SSB is not set
-CONFIG_B43_PCI_AUTOSELECT=y
-CONFIG_B43_PCICORE_AUTOSELECT=y
-CONFIG_B43_SDIO=y
-CONFIG_B43_BCMA_PIO=y
-CONFIG_B43_PIO=y
-CONFIG_B43_PHY_G=y
-CONFIG_B43_PHY_N=y
-CONFIG_B43_PHY_LP=y
-CONFIG_B43_PHY_HT=y
-CONFIG_B43_LEDS=y
-CONFIG_B43_HWRNG=y
-# CONFIG_B43_DEBUG is not set
-CONFIG_B43LEGACY=m
-CONFIG_B43LEGACY_PCI_AUTOSELECT=y
-CONFIG_B43LEGACY_PCICORE_AUTOSELECT=y
-CONFIG_B43LEGACY_LEDS=y
-CONFIG_B43LEGACY_HWRNG=y
-CONFIG_B43LEGACY_DEBUG=y
-CONFIG_B43LEGACY_DMA=y
-CONFIG_B43LEGACY_PIO=y
-CONFIG_B43LEGACY_DMA_AND_PIO_MODE=y
-# CONFIG_B43LEGACY_DMA_MODE is not set
-# CONFIG_B43LEGACY_PIO_MODE is not set
-CONFIG_BRCMUTIL=m
-CONFIG_BRCMSMAC=m
-CONFIG_BRCMFMAC=m
-CONFIG_BRCMFMAC_PROTO_BCDC=y
-CONFIG_BRCMFMAC_PROTO_MSGBUF=y
-CONFIG_BRCMFMAC_SDIO=y
-CONFIG_BRCMFMAC_USB=y
-CONFIG_BRCMFMAC_PCIE=y
-CONFIG_BRCM_TRACING=y
-CONFIG_BRCMDBG=y
-CONFIG_WLAN_VENDOR_CISCO=y
-CONFIG_AIRO=m
-CONFIG_AIRO_CS=m
-CONFIG_WLAN_VENDOR_INTEL=y
-CONFIG_IPW2100=m
-CONFIG_IPW2100_MONITOR=y
-# CONFIG_IPW2100_DEBUG is not set
-CONFIG_IPW2200=m
-CONFIG_IPW2200_MONITOR=y
-CONFIG_IPW2200_RADIOTAP=y
-CONFIG_IPW2200_PROMISCUOUS=y
-CONFIG_IPW2200_QOS=y
-# CONFIG_IPW2200_DEBUG is not set
-CONFIG_LIBIPW=m
-# CONFIG_LIBIPW_DEBUG is not set
-CONFIG_IWLEGACY=m
-CONFIG_IWL4965=m
-CONFIG_IWL3945=m
-
-#
-# iwl3945 / iwl4965 Debugging Options
-#
-CONFIG_IWLEGACY_DEBUG=y
-CONFIG_IWLEGACY_DEBUGFS=y
-# end of iwl3945 / iwl4965 Debugging Options
-
-CONFIG_IWLWIFI=m
-CONFIG_IWLWIFI_LEDS=y
-CONFIG_IWLDVM=m
-CONFIG_IWLMVM=m
-CONFIG_IWLWIFI_OPMODE_MODULAR=y
-# CONFIG_IWLWIFI_BCAST_FILTERING is not set
-
-#
-# Debugging Options
-#
-CONFIG_IWLWIFI_DEBUG=y
-CONFIG_IWLWIFI_DEBUGFS=y
-CONFIG_IWLWIFI_DEVICE_TRACING=y
-# end of Debugging Options
-
-CONFIG_WLAN_VENDOR_INTERSIL=y
-CONFIG_HOSTAP=m
-CONFIG_HOSTAP_FIRMWARE=y
-CONFIG_HOSTAP_FIRMWARE_NVRAM=y
-CONFIG_HOSTAP_PLX=m
-CONFIG_HOSTAP_PCI=m
-CONFIG_HOSTAP_CS=m
-CONFIG_HERMES=m
-CONFIG_HERMES_PRISM=y
-CONFIG_HERMES_CACHE_FW_ON_INIT=y
-CONFIG_PLX_HERMES=m
-CONFIG_TMD_HERMES=m
-CONFIG_NORTEL_HERMES=m
-CONFIG_PCI_HERMES=m
-CONFIG_PCMCIA_HERMES=m
-CONFIG_PCMCIA_SPECTRUM=m
-CONFIG_ORINOCO_USB=m
-CONFIG_P54_COMMON=m
-CONFIG_P54_USB=m
-CONFIG_P54_PCI=m
-CONFIG_P54_SPI=m
-# CONFIG_P54_SPI_DEFAULT_EEPROM is not set
-CONFIG_P54_LEDS=y
-CONFIG_PRISM54=m
-CONFIG_WLAN_VENDOR_MARVELL=y
-CONFIG_LIBERTAS=m
-CONFIG_LIBERTAS_USB=m
-CONFIG_LIBERTAS_CS=m
-CONFIG_LIBERTAS_SDIO=m
-CONFIG_LIBERTAS_SPI=m
-# CONFIG_LIBERTAS_DEBUG is not set
-CONFIG_LIBERTAS_MESH=y
-CONFIG_LIBERTAS_THINFIRM=m
-# CONFIG_LIBERTAS_THINFIRM_DEBUG is not set
-CONFIG_LIBERTAS_THINFIRM_USB=m
-CONFIG_MWIFIEX=m
-CONFIG_MWIFIEX_SDIO=m
-CONFIG_MWIFIEX_PCIE=m
-CONFIG_MWIFIEX_USB=m
-CONFIG_MWL8K=m
-CONFIG_WLAN_VENDOR_MEDIATEK=y
-CONFIG_MT7601U=m
-CONFIG_MT76_CORE=m
-CONFIG_MT76_LEDS=y
-CONFIG_MT76_USB=m
-CONFIG_MT76x02_LIB=m
-CONFIG_MT76x02_USB=m
-CONFIG_MT76x0_COMMON=m
-CONFIG_MT76x0U=m
-CONFIG_MT76x0E=m
-CONFIG_MT76x2_COMMON=m
-CONFIG_MT76x2E=m
-CONFIG_MT76x2U=m
-CONFIG_MT7603E=m
-CONFIG_MT7615_COMMON=m
-CONFIG_MT7615E=m
-CONFIG_MT7663U=m
-CONFIG_MT7915E=m
-CONFIG_WLAN_VENDOR_RALINK=y
-CONFIG_RT2X00=m
-CONFIG_RT2400PCI=m
-CONFIG_RT2500PCI=m
-CONFIG_RT61PCI=m
-CONFIG_RT2800PCI=m
-CONFIG_RT2800PCI_RT33XX=y
-CONFIG_RT2800PCI_RT35XX=y
-CONFIG_RT2800PCI_RT53XX=y
-CONFIG_RT2800PCI_RT3290=y
-CONFIG_RT2500USB=m
-CONFIG_RT73USB=m
-CONFIG_RT2800USB=m
-CONFIG_RT2800USB_RT33XX=y
-CONFIG_RT2800USB_RT35XX=y
-CONFIG_RT2800USB_RT3573=y
-CONFIG_RT2800USB_RT53XX=y
-CONFIG_RT2800USB_RT55XX=y
-CONFIG_RT2800USB_UNKNOWN=y
-CONFIG_RT2800_LIB=m
-CONFIG_RT2800_LIB_MMIO=m
-CONFIG_RT2X00_LIB_MMIO=m
-CONFIG_RT2X00_LIB_PCI=m
-CONFIG_RT2X00_LIB_USB=m
-CONFIG_RT2X00_LIB=m
-CONFIG_RT2X00_LIB_FIRMWARE=y
-CONFIG_RT2X00_LIB_CRYPTO=y
-CONFIG_RT2X00_LIB_LEDS=y
-CONFIG_RT2X00_LIB_DEBUGFS=y
-# CONFIG_RT2X00_DEBUG is not set
-CONFIG_WLAN_VENDOR_REALTEK=y
-CONFIG_RTL8180=m
-CONFIG_RTL8187=m
-CONFIG_RTL8187_LEDS=y
-CONFIG_RTL_CARDS=m
-CONFIG_RTL8192CE=m
-CONFIG_RTL8192SE=m
-CONFIG_RTL8192DE=m
-CONFIG_RTL8723AE=m
-CONFIG_RTL8723BE=m
-CONFIG_RTL8188EE=m
-CONFIG_RTL8192EE=m
-CONFIG_RTL8821AE=m
-CONFIG_RTL8192CU=m
-CONFIG_RTLWIFI=m
-CONFIG_RTLWIFI_PCI=m
-CONFIG_RTLWIFI_USB=m
-CONFIG_RTLWIFI_DEBUG=y
-CONFIG_RTL8192C_COMMON=m
-CONFIG_RTL8723_COMMON=m
-CONFIG_RTLBTCOEXIST=m
-CONFIG_RTL8XXXU=m
-CONFIG_RTL8XXXU_UNTESTED=y
-CONFIG_RTW88=m
-CONFIG_RTW88_CORE=m
-CONFIG_RTW88_PCI=m
-CONFIG_RTW88_8822B=m
-CONFIG_RTW88_8822C=m
-CONFIG_RTW88_8723D=m
-CONFIG_RTW88_8822BE=m
-CONFIG_RTW88_8822CE=m
-CONFIG_RTW88_8723DE=m
-CONFIG_RTW88_DEBUG=y
-CONFIG_RTW88_DEBUGFS=y
-CONFIG_WLAN_VENDOR_RSI=y
-CONFIG_RSI_91X=m
-CONFIG_RSI_DEBUGFS=y
-CONFIG_RSI_SDIO=m
-CONFIG_RSI_USB=m
-CONFIG_RSI_COEX=y
-CONFIG_WLAN_VENDOR_ST=y
-CONFIG_CW1200=m
-CONFIG_CW1200_WLAN_SDIO=m
-CONFIG_CW1200_WLAN_SPI=m
-CONFIG_WLAN_VENDOR_TI=y
-CONFIG_WL1251=m
-CONFIG_WL1251_SPI=m
-CONFIG_WL1251_SDIO=m
-CONFIG_WL12XX=m
-CONFIG_WL18XX=m
-CONFIG_WLCORE=m
-CONFIG_WLCORE_SPI=m
-CONFIG_WLCORE_SDIO=m
-CONFIG_WILINK_PLATFORM_DATA=y
-CONFIG_WLAN_VENDOR_ZYDAS=y
-CONFIG_USB_ZD1201=m
-CONFIG_ZD1211RW=m
-# CONFIG_ZD1211RW_DEBUG is not set
-CONFIG_WLAN_VENDOR_QUANTENNA=y
-CONFIG_QTNFMAC=m
-CONFIG_QTNFMAC_PCIE=m
-CONFIG_PCMCIA_RAYCS=m
-CONFIG_PCMCIA_WL3501=m
-CONFIG_MAC80211_HWSIM=m
-CONFIG_USB_NET_RNDIS_WLAN=m
-CONFIG_VIRT_WIFI=m
-
-#
-# WiMAX Wireless Broadband devices
-#
-CONFIG_WIMAX_I2400M=m
-CONFIG_WIMAX_I2400M_USB=m
-CONFIG_WIMAX_I2400M_DEBUG_LEVEL=8
-# end of WiMAX Wireless Broadband devices
-
-# CONFIG_WAN is not set
-CONFIG_IEEE802154_DRIVERS=m
-CONFIG_IEEE802154_FAKELB=m
-CONFIG_IEEE802154_AT86RF230=m
-# CONFIG_IEEE802154_AT86RF230_DEBUGFS is not set
-CONFIG_IEEE802154_MRF24J40=m
-CONFIG_IEEE802154_CC2520=m
-CONFIG_IEEE802154_ATUSB=m
-CONFIG_IEEE802154_ADF7242=m
-CONFIG_IEEE802154_CA8210=m
-# CONFIG_IEEE802154_CA8210_DEBUGFS is not set
-CONFIG_IEEE802154_MCR20A=m
-CONFIG_IEEE802154_HWSIM=m
-CONFIG_XEN_NETDEV_FRONTEND=m
-CONFIG_XEN_NETDEV_BACKEND=m
-CONFIG_VMXNET3=m
-CONFIG_FUJITSU_ES=m
-CONFIG_USB4_NET=m
-CONFIG_HYPERV_NET=m
-CONFIG_NETDEVSIM=m
-CONFIG_NET_FAILOVER=m
-CONFIG_ISDN=y
-CONFIG_ISDN_CAPI=y
-CONFIG_CAPI_TRACE=y
-CONFIG_ISDN_CAPI_MIDDLEWARE=y
-CONFIG_MISDN=m
-CONFIG_MISDN_DSP=m
-CONFIG_MISDN_L1OIP=m
-
-#
-# mISDN hardware drivers
-#
-CONFIG_MISDN_HFCPCI=m
-CONFIG_MISDN_HFCMULTI=m
-CONFIG_MISDN_HFCUSB=m
-CONFIG_MISDN_AVMFRITZ=m
-CONFIG_MISDN_SPEEDFAX=m
-CONFIG_MISDN_INFINEON=m
-CONFIG_MISDN_W6692=m
-CONFIG_MISDN_NETJET=m
-CONFIG_MISDN_HDLC=m
-CONFIG_MISDN_IPAC=m
-CONFIG_MISDN_ISAR=m
-CONFIG_NVM=y
-CONFIG_NVM_PBLK=m
-# CONFIG_NVM_PBLK_DEBUG is not set
-
-#
-# Input device support
-#
-CONFIG_INPUT=y
-CONFIG_INPUT_LEDS=m
-CONFIG_INPUT_FF_MEMLESS=m
-CONFIG_INPUT_POLLDEV=m
-CONFIG_INPUT_SPARSEKMAP=m
-CONFIG_INPUT_MATRIXKMAP=m
-
-#
-# Userland interfaces
-#
-CONFIG_INPUT_MOUSEDEV=m
-CONFIG_INPUT_MOUSEDEV_PSAUX=y
-CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
-CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
-CONFIG_INPUT_JOYDEV=m
-CONFIG_INPUT_EVDEV=m
-# CONFIG_INPUT_EVBUG is not set
-
-#
-# Input Device Drivers
-#
-CONFIG_INPUT_KEYBOARD=y
-CONFIG_KEYBOARD_ADC=m
-CONFIG_KEYBOARD_ADP5520=m
-CONFIG_KEYBOARD_ADP5588=m
-CONFIG_KEYBOARD_ADP5589=m
-CONFIG_KEYBOARD_APPLESPI=m
-CONFIG_KEYBOARD_ATKBD=m
-CONFIG_KEYBOARD_QT1050=m
-CONFIG_KEYBOARD_QT1070=m
-CONFIG_KEYBOARD_QT2160=m
-CONFIG_KEYBOARD_DLINK_DIR685=m
-CONFIG_KEYBOARD_LKKBD=m
-CONFIG_KEYBOARD_GPIO=m
-CONFIG_KEYBOARD_GPIO_POLLED=m
-CONFIG_KEYBOARD_TCA6416=m
-CONFIG_KEYBOARD_TCA8418=m
-CONFIG_KEYBOARD_MATRIX=m
-CONFIG_KEYBOARD_LM8323=m
-CONFIG_KEYBOARD_LM8333=m
-CONFIG_KEYBOARD_MAX7359=m
-CONFIG_KEYBOARD_MCS=m
-CONFIG_KEYBOARD_MPR121=m
-CONFIG_KEYBOARD_NEWTON=m
-CONFIG_KEYBOARD_OPENCORES=m
-CONFIG_KEYBOARD_SAMSUNG=m
-CONFIG_KEYBOARD_STOWAWAY=m
-CONFIG_KEYBOARD_SUNKBD=m
-CONFIG_KEYBOARD_STMPE=m
-CONFIG_KEYBOARD_IQS62X=m
-CONFIG_KEYBOARD_OMAP4=m
-CONFIG_KEYBOARD_TC3589X=m
-CONFIG_KEYBOARD_TM2_TOUCHKEY=m
-CONFIG_KEYBOARD_TWL4030=m
-CONFIG_KEYBOARD_XTKBD=m
-CONFIG_KEYBOARD_CROS_EC=m
-CONFIG_KEYBOARD_CAP11XX=m
-CONFIG_KEYBOARD_BCM=m
-CONFIG_KEYBOARD_MTK_PMIC=m
-CONFIG_INPUT_MOUSE=y
-CONFIG_MOUSE_PS2=m
-CONFIG_MOUSE_PS2_ALPS=y
-CONFIG_MOUSE_PS2_BYD=y
-CONFIG_MOUSE_PS2_LOGIPS2PP=y
-CONFIG_MOUSE_PS2_SYNAPTICS=y
-CONFIG_MOUSE_PS2_SYNAPTICS_SMBUS=y
-CONFIG_MOUSE_PS2_CYPRESS=y
-CONFIG_MOUSE_PS2_LIFEBOOK=y
-CONFIG_MOUSE_PS2_TRACKPOINT=y
-CONFIG_MOUSE_PS2_ELANTECH=y
-CONFIG_MOUSE_PS2_ELANTECH_SMBUS=y
-CONFIG_MOUSE_PS2_SENTELIC=y
-CONFIG_MOUSE_PS2_TOUCHKIT=y
-CONFIG_MOUSE_PS2_FOCALTECH=y
-CONFIG_MOUSE_PS2_VMMOUSE=y
-CONFIG_MOUSE_PS2_SMBUS=y
-CONFIG_MOUSE_SERIAL=m
-CONFIG_MOUSE_APPLETOUCH=m
-CONFIG_MOUSE_BCM5974=m
-CONFIG_MOUSE_CYAPA=m
-CONFIG_MOUSE_ELAN_I2C=m
-CONFIG_MOUSE_ELAN_I2C_I2C=y
-CONFIG_MOUSE_ELAN_I2C_SMBUS=y
-CONFIG_MOUSE_VSXXXAA=m
-CONFIG_MOUSE_GPIO=m
-CONFIG_MOUSE_SYNAPTICS_I2C=m
-CONFIG_MOUSE_SYNAPTICS_USB=m
-CONFIG_INPUT_JOYSTICK=y
-CONFIG_JOYSTICK_ANALOG=m
-CONFIG_JOYSTICK_A3D=m
-CONFIG_JOYSTICK_ADI=m
-CONFIG_JOYSTICK_COBRA=m
-CONFIG_JOYSTICK_GF2K=m
-CONFIG_JOYSTICK_GRIP=m
-CONFIG_JOYSTICK_GRIP_MP=m
-CONFIG_JOYSTICK_GUILLEMOT=m
-CONFIG_JOYSTICK_INTERACT=m
-CONFIG_JOYSTICK_SIDEWINDER=m
-CONFIG_JOYSTICK_TMDC=m
-CONFIG_JOYSTICK_IFORCE=m
-CONFIG_JOYSTICK_IFORCE_USB=m
-CONFIG_JOYSTICK_IFORCE_232=m
-CONFIG_JOYSTICK_WARRIOR=m
-CONFIG_JOYSTICK_MAGELLAN=m
-CONFIG_JOYSTICK_SPACEORB=m
-CONFIG_JOYSTICK_SPACEBALL=m
-CONFIG_JOYSTICK_STINGER=m
-CONFIG_JOYSTICK_TWIDJOY=m
-CONFIG_JOYSTICK_ZHENHUA=m
-CONFIG_JOYSTICK_DB9=m
-CONFIG_JOYSTICK_GAMECON=m
-CONFIG_JOYSTICK_TURBOGRAFX=m
-CONFIG_JOYSTICK_AS5011=m
-CONFIG_JOYSTICK_JOYDUMP=m
-CONFIG_JOYSTICK_XPAD=m
-CONFIG_JOYSTICK_XPAD_FF=y
-CONFIG_JOYSTICK_XPAD_LEDS=y
-CONFIG_JOYSTICK_WALKERA0701=m
-CONFIG_JOYSTICK_PSXPAD_SPI=m
-CONFIG_JOYSTICK_PSXPAD_SPI_FF=y
-CONFIG_JOYSTICK_PXRC=m
-CONFIG_JOYSTICK_FSIA6B=m
-CONFIG_INPUT_TABLET=y
-CONFIG_TABLET_USB_ACECAD=m
-CONFIG_TABLET_USB_AIPTEK=m
-CONFIG_TABLET_USB_GTCO=m
-CONFIG_TABLET_USB_HANWANG=m
-CONFIG_TABLET_USB_KBTAB=m
-CONFIG_TABLET_USB_PEGASUS=m
-CONFIG_TABLET_SERIAL_WACOM4=m
-CONFIG_INPUT_TOUCHSCREEN=y
-CONFIG_TOUCHSCREEN_PROPERTIES=y
-CONFIG_TOUCHSCREEN_88PM860X=m
-CONFIG_TOUCHSCREEN_ADS7846=m
-CONFIG_TOUCHSCREEN_AD7877=m
-CONFIG_TOUCHSCREEN_AD7879=m
-CONFIG_TOUCHSCREEN_AD7879_I2C=m
-CONFIG_TOUCHSCREEN_AD7879_SPI=m
-CONFIG_TOUCHSCREEN_ADC=m
-CONFIG_TOUCHSCREEN_AR1021_I2C=m
-CONFIG_TOUCHSCREEN_ATMEL_MXT=m
-CONFIG_TOUCHSCREEN_ATMEL_MXT_T37=y
-CONFIG_TOUCHSCREEN_AUO_PIXCIR=m
-CONFIG_TOUCHSCREEN_BU21013=m
-CONFIG_TOUCHSCREEN_BU21029=m
-CONFIG_TOUCHSCREEN_CHIPONE_ICN8318=m
-CONFIG_TOUCHSCREEN_CHIPONE_ICN8505=m
-CONFIG_TOUCHSCREEN_CY8CTMA140=m
-CONFIG_TOUCHSCREEN_CY8CTMG110=m
-CONFIG_TOUCHSCREEN_CYTTSP_CORE=m
-CONFIG_TOUCHSCREEN_CYTTSP_I2C=m
-CONFIG_TOUCHSCREEN_CYTTSP_SPI=m
-CONFIG_TOUCHSCREEN_CYTTSP4_CORE=m
-CONFIG_TOUCHSCREEN_CYTTSP4_I2C=m
-CONFIG_TOUCHSCREEN_CYTTSP4_SPI=m
-CONFIG_TOUCHSCREEN_DA9034=m
-CONFIG_TOUCHSCREEN_DA9052=m
-CONFIG_TOUCHSCREEN_DYNAPRO=m
-CONFIG_TOUCHSCREEN_HAMPSHIRE=m
-CONFIG_TOUCHSCREEN_EETI=m
-CONFIG_TOUCHSCREEN_EGALAX=m
-CONFIG_TOUCHSCREEN_EGALAX_SERIAL=m
-CONFIG_TOUCHSCREEN_EXC3000=m
-CONFIG_TOUCHSCREEN_FUJITSU=m
-CONFIG_TOUCHSCREEN_GOODIX=m
-CONFIG_TOUCHSCREEN_HIDEEP=m
-CONFIG_TOUCHSCREEN_ILI210X=m
-CONFIG_TOUCHSCREEN_S6SY761=m
-CONFIG_TOUCHSCREEN_GUNZE=m
-CONFIG_TOUCHSCREEN_EKTF2127=m
-CONFIG_TOUCHSCREEN_ELAN=m
-CONFIG_TOUCHSCREEN_ELO=m
-CONFIG_TOUCHSCREEN_WACOM_W8001=m
-CONFIG_TOUCHSCREEN_WACOM_I2C=m
-CONFIG_TOUCHSCREEN_MAX11801=m
-CONFIG_TOUCHSCREEN_MCS5000=m
-CONFIG_TOUCHSCREEN_MMS114=m
-CONFIG_TOUCHSCREEN_MELFAS_MIP4=m
-CONFIG_TOUCHSCREEN_MTOUCH=m
-CONFIG_TOUCHSCREEN_IMX6UL_TSC=m
-CONFIG_TOUCHSCREEN_INEXIO=m
-CONFIG_TOUCHSCREEN_MK712=m
-CONFIG_TOUCHSCREEN_PENMOUNT=m
-CONFIG_TOUCHSCREEN_EDT_FT5X06=m
-CONFIG_TOUCHSCREEN_TOUCHRIGHT=m
-CONFIG_TOUCHSCREEN_TOUCHWIN=m
-CONFIG_TOUCHSCREEN_TI_AM335X_TSC=m
-CONFIG_TOUCHSCREEN_UCB1400=m
-CONFIG_TOUCHSCREEN_PIXCIR=m
-CONFIG_TOUCHSCREEN_WDT87XX_I2C=m
-CONFIG_TOUCHSCREEN_WM831X=m
-CONFIG_TOUCHSCREEN_WM97XX=m
-CONFIG_TOUCHSCREEN_WM9705=y
-CONFIG_TOUCHSCREEN_WM9712=y
-CONFIG_TOUCHSCREEN_WM9713=y
-CONFIG_TOUCHSCREEN_USB_COMPOSITE=m
-CONFIG_TOUCHSCREEN_MC13783=m
-CONFIG_TOUCHSCREEN_USB_EGALAX=y
-CONFIG_TOUCHSCREEN_USB_PANJIT=y
-CONFIG_TOUCHSCREEN_USB_3M=y
-CONFIG_TOUCHSCREEN_USB_ITM=y
-CONFIG_TOUCHSCREEN_USB_ETURBO=y
-CONFIG_TOUCHSCREEN_USB_GUNZE=y
-CONFIG_TOUCHSCREEN_USB_DMC_TSC10=y
-CONFIG_TOUCHSCREEN_USB_IRTOUCH=y
-CONFIG_TOUCHSCREEN_USB_IDEALTEK=y
-CONFIG_TOUCHSCREEN_USB_GENERAL_TOUCH=y
-CONFIG_TOUCHSCREEN_USB_GOTOP=y
-CONFIG_TOUCHSCREEN_USB_JASTEC=y
-CONFIG_TOUCHSCREEN_USB_ELO=y
-CONFIG_TOUCHSCREEN_USB_E2I=y
-CONFIG_TOUCHSCREEN_USB_ZYTRONIC=y
-CONFIG_TOUCHSCREEN_USB_ETT_TC45USB=y
-CONFIG_TOUCHSCREEN_USB_NEXIO=y
-CONFIG_TOUCHSCREEN_USB_EASYTOUCH=y
-CONFIG_TOUCHSCREEN_TOUCHIT213=m
-CONFIG_TOUCHSCREEN_TSC_SERIO=m
-CONFIG_TOUCHSCREEN_TSC200X_CORE=m
-CONFIG_TOUCHSCREEN_TSC2004=m
-CONFIG_TOUCHSCREEN_TSC2005=m
-CONFIG_TOUCHSCREEN_TSC2007=m
-CONFIG_TOUCHSCREEN_TSC2007_IIO=y
-CONFIG_TOUCHSCREEN_PCAP=m
-CONFIG_TOUCHSCREEN_RM_TS=m
-CONFIG_TOUCHSCREEN_SILEAD=m
-CONFIG_TOUCHSCREEN_SIS_I2C=m
-CONFIG_TOUCHSCREEN_ST1232=m
-CONFIG_TOUCHSCREEN_STMFTS=m
-CONFIG_TOUCHSCREEN_STMPE=m
-CONFIG_TOUCHSCREEN_SUR40=m
-CONFIG_TOUCHSCREEN_SURFACE3_SPI=m
-CONFIG_TOUCHSCREEN_SX8654=m
-CONFIG_TOUCHSCREEN_TPS6507X=m
-CONFIG_TOUCHSCREEN_ZET6223=m
-CONFIG_TOUCHSCREEN_ZFORCE=m
-CONFIG_TOUCHSCREEN_COLIBRI_VF50=m
-CONFIG_TOUCHSCREEN_ROHM_BU21023=m
-CONFIG_TOUCHSCREEN_IQS5XX=m
-CONFIG_INPUT_MISC=y
-CONFIG_INPUT_88PM860X_ONKEY=m
-CONFIG_INPUT_88PM80X_ONKEY=m
-CONFIG_INPUT_AD714X=m
-CONFIG_INPUT_AD714X_I2C=m
-CONFIG_INPUT_AD714X_SPI=m
-CONFIG_INPUT_ARIZONA_HAPTICS=m
-CONFIG_INPUT_ATMEL_CAPTOUCH=m
-CONFIG_INPUT_BMA150=m
-CONFIG_INPUT_E3X0_BUTTON=m
-CONFIG_INPUT_PCSPKR=m
-CONFIG_INPUT_MAX77650_ONKEY=m
-CONFIG_INPUT_MAX77693_HAPTIC=m
-CONFIG_INPUT_MAX8925_ONKEY=m
-CONFIG_INPUT_MAX8997_HAPTIC=m
-CONFIG_INPUT_MC13783_PWRBUTTON=m
-CONFIG_INPUT_MMA8450=m
-CONFIG_INPUT_APANEL=m
-CONFIG_INPUT_GPIO_BEEPER=m
-CONFIG_INPUT_GPIO_DECODER=m
-CONFIG_INPUT_GPIO_VIBRA=m
-CONFIG_INPUT_CPCAP_PWRBUTTON=m
-CONFIG_INPUT_ATLAS_BTNS=m
-CONFIG_INPUT_ATI_REMOTE2=m
-CONFIG_INPUT_KEYSPAN_REMOTE=m
-CONFIG_INPUT_KXTJ9=m
-CONFIG_INPUT_POWERMATE=m
-CONFIG_INPUT_YEALINK=m
-CONFIG_INPUT_CM109=m
-CONFIG_INPUT_REGULATOR_HAPTIC=m
-CONFIG_INPUT_RETU_PWRBUTTON=m
-CONFIG_INPUT_TPS65218_PWRBUTTON=m
-CONFIG_INPUT_AXP20X_PEK=m
-CONFIG_INPUT_TWL4030_PWRBUTTON=m
-CONFIG_INPUT_TWL4030_VIBRA=m
-CONFIG_INPUT_TWL6040_VIBRA=m
-CONFIG_INPUT_UINPUT=m
-CONFIG_INPUT_PALMAS_PWRBUTTON=m
-CONFIG_INPUT_PCF50633_PMU=m
-CONFIG_INPUT_PCF8574=m
-CONFIG_INPUT_PWM_BEEPER=m
-CONFIG_INPUT_PWM_VIBRA=m
-CONFIG_INPUT_RK805_PWRKEY=m
-CONFIG_INPUT_GPIO_ROTARY_ENCODER=m
-CONFIG_INPUT_DA9052_ONKEY=m
-CONFIG_INPUT_DA9055_ONKEY=m
-CONFIG_INPUT_DA9063_ONKEY=m
-CONFIG_INPUT_WM831X_ON=m
-CONFIG_INPUT_PCAP=m
-CONFIG_INPUT_ADXL34X=m
-CONFIG_INPUT_ADXL34X_I2C=m
-CONFIG_INPUT_ADXL34X_SPI=m
-CONFIG_INPUT_IMS_PCU=m
-CONFIG_INPUT_IQS269A=m
-CONFIG_INPUT_CMA3000=m
-CONFIG_INPUT_CMA3000_I2C=m
-CONFIG_INPUT_XEN_KBDDEV_FRONTEND=m
-CONFIG_INPUT_IDEAPAD_SLIDEBAR=m
-CONFIG_INPUT_SOC_BUTTON_ARRAY=m
-CONFIG_INPUT_DRV260X_HAPTICS=m
-CONFIG_INPUT_DRV2665_HAPTICS=m
-CONFIG_INPUT_DRV2667_HAPTICS=m
-CONFIG_INPUT_RAVE_SP_PWRBUTTON=m
-CONFIG_INPUT_STPMIC1_ONKEY=m
-CONFIG_RMI4_CORE=m
-CONFIG_RMI4_I2C=m
-CONFIG_RMI4_SPI=m
-CONFIG_RMI4_SMB=m
-CONFIG_RMI4_F03=y
-CONFIG_RMI4_F03_SERIO=m
-CONFIG_RMI4_2D_SENSOR=y
-CONFIG_RMI4_F11=y
-CONFIG_RMI4_F12=y
-CONFIG_RMI4_F30=y
-CONFIG_RMI4_F34=y
-# CONFIG_RMI4_F54 is not set
-CONFIG_RMI4_F55=y
-
-#
-# Hardware I/O ports
-#
-CONFIG_SERIO=m
-CONFIG_ARCH_MIGHT_HAVE_PC_SERIO=y
-CONFIG_SERIO_I8042=m
-CONFIG_SERIO_SERPORT=m
-CONFIG_SERIO_CT82C710=m
-CONFIG_SERIO_PARKBD=m
-CONFIG_SERIO_PCIPS2=m
-CONFIG_SERIO_LIBPS2=m
-CONFIG_SERIO_RAW=m
-CONFIG_SERIO_ALTERA_PS2=m
-CONFIG_SERIO_PS2MULT=m
-CONFIG_SERIO_ARC_PS2=m
-# CONFIG_SERIO_APBPS2 is not set
-CONFIG_HYPERV_KEYBOARD=m
-CONFIG_SERIO_GPIO_PS2=m
-CONFIG_USERIO=m
-CONFIG_GAMEPORT=m
-CONFIG_GAMEPORT_NS558=m
-CONFIG_GAMEPORT_L4=m
-CONFIG_GAMEPORT_EMU10K1=m
-CONFIG_GAMEPORT_FM801=m
-# end of Hardware I/O ports
-# end of Input device support
-
-#
-# Character devices
-#
-CONFIG_TTY=y
-CONFIG_VT=y
-CONFIG_CONSOLE_TRANSLATIONS=y
-CONFIG_VT_CONSOLE=y
-CONFIG_VT_CONSOLE_SLEEP=y
-CONFIG_HW_CONSOLE=y
-CONFIG_VT_HW_CONSOLE_BINDING=y
-CONFIG_UNIX98_PTYS=y
-# CONFIG_LEGACY_PTYS is not set
-CONFIG_LDISC_AUTOLOAD=y
-
-#
-# Serial drivers
-#
-CONFIG_SERIAL_EARLYCON=y
-CONFIG_SERIAL_8250=y
-# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
-CONFIG_SERIAL_8250_PNP=y
-# CONFIG_SERIAL_8250_16550A_VARIANTS is not set
-CONFIG_SERIAL_8250_FINTEK=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_DMA=y
-CONFIG_SERIAL_8250_PCI=y
-CONFIG_SERIAL_8250_EXAR=m
-CONFIG_SERIAL_8250_CS=m
-CONFIG_SERIAL_8250_MEN_MCB=m
-CONFIG_SERIAL_8250_NR_UARTS=32
-CONFIG_SERIAL_8250_RUNTIME_UARTS=4
-CONFIG_SERIAL_8250_EXTENDED=y
-CONFIG_SERIAL_8250_MANY_PORTS=y
-CONFIG_SERIAL_8250_ASPEED_VUART=m
-CONFIG_SERIAL_8250_SHARE_IRQ=y
-# CONFIG_SERIAL_8250_DETECT_IRQ is not set
-CONFIG_SERIAL_8250_RSA=y
-CONFIG_SERIAL_8250_DWLIB=y
-CONFIG_SERIAL_8250_DW=m
-CONFIG_SERIAL_8250_RT288X=y
-CONFIG_SERIAL_8250_LPSS=y
-CONFIG_SERIAL_8250_MID=y
-CONFIG_SERIAL_OF_PLATFORM=m
-
-#
-# Non-8250 serial port support
-#
-CONFIG_SERIAL_MAX3100=m
-CONFIG_SERIAL_MAX310X=m
-CONFIG_SERIAL_UARTLITE=m
-CONFIG_SERIAL_UARTLITE_NR_UARTS=1
-CONFIG_SERIAL_CORE=y
-CONFIG_SERIAL_CORE_CONSOLE=y
-CONFIG_SERIAL_JSM=m
-CONFIG_SERIAL_SIFIVE=m
-CONFIG_SERIAL_LANTIQ=m
-CONFIG_SERIAL_SCCNXP=m
-CONFIG_SERIAL_SC16IS7XX_CORE=m
-CONFIG_SERIAL_SC16IS7XX=m
-CONFIG_SERIAL_SC16IS7XX_I2C=y
-CONFIG_SERIAL_SC16IS7XX_SPI=y
-CONFIG_SERIAL_ALTERA_JTAGUART=m
-CONFIG_SERIAL_ALTERA_UART=m
-CONFIG_SERIAL_ALTERA_UART_MAXPORTS=4
-CONFIG_SERIAL_ALTERA_UART_BAUDRATE=115200
-CONFIG_SERIAL_IFX6X60=m
-CONFIG_SERIAL_XILINX_PS_UART=m
-CONFIG_SERIAL_ARC=m
-CONFIG_SERIAL_ARC_NR_PORTS=1
-CONFIG_SERIAL_RP2=m
-CONFIG_SERIAL_RP2_NR_UARTS=32
-CONFIG_SERIAL_FSL_LPUART=m
-CONFIG_SERIAL_FSL_LINFLEXUART=m
-CONFIG_SERIAL_CONEXANT_DIGICOLOR=m
-CONFIG_SERIAL_MEN_Z135=m
-CONFIG_SERIAL_SPRD=m
-# end of Serial drivers
-
-CONFIG_SERIAL_MCTRL_GPIO=y
-CONFIG_SERIAL_NONSTANDARD=y
-CONFIG_ROCKETPORT=m
-CONFIG_CYCLADES=m
-CONFIG_CYZ_INTR=y
-CONFIG_MOXA_INTELLIO=m
-CONFIG_MOXA_SMARTIO=m
-CONFIG_SYNCLINK=m
-CONFIG_SYNCLINKMP=m
-CONFIG_SYNCLINK_GT=m
-CONFIG_ISI=m
-CONFIG_N_HDLC=m
-CONFIG_N_GSM=m
-CONFIG_NOZOMI=m
-CONFIG_NULL_TTY=m
-CONFIG_TRACE_ROUTER=m
-CONFIG_TRACE_SINK=m
-CONFIG_HVC_DRIVER=y
-CONFIG_HVC_IRQ=y
-CONFIG_HVC_XEN=y
-CONFIG_HVC_XEN_FRONTEND=y
-CONFIG_SERIAL_DEV_BUS=y
-CONFIG_SERIAL_DEV_CTRL_TTYPORT=y
-# CONFIG_TTY_PRINTK is not set
-CONFIG_PRINTER=m
-# CONFIG_LP_CONSOLE is not set
-CONFIG_PPDEV=m
-CONFIG_VIRTIO_CONSOLE=m
-CONFIG_IPMI_HANDLER=m
-CONFIG_IPMI_DMI_DECODE=y
-CONFIG_IPMI_PLAT_DATA=y
-# CONFIG_IPMI_PANIC_EVENT is not set
-CONFIG_IPMI_DEVICE_INTERFACE=m
-CONFIG_IPMI_SI=m
-CONFIG_IPMI_SSIF=m
-CONFIG_IPMI_WATCHDOG=m
-CONFIG_IPMI_POWEROFF=m
-CONFIG_IPMB_DEVICE_INTERFACE=m
-CONFIG_HW_RANDOM=m
-CONFIG_HW_RANDOM_TIMERIOMEM=m
-CONFIG_HW_RANDOM_INTEL=m
-CONFIG_HW_RANDOM_AMD=m
-CONFIG_HW_RANDOM_VIA=m
-CONFIG_HW_RANDOM_VIRTIO=m
-CONFIG_HW_RANDOM_CCTRNG=m
-CONFIG_APPLICOM=m
-
-#
-# PCMCIA character devices
-#
-CONFIG_SYNCLINK_CS=m
-CONFIG_CARDMAN_4000=m
-CONFIG_CARDMAN_4040=m
-CONFIG_SCR24X=m
-CONFIG_IPWIRELESS=m
-# end of PCMCIA character devices
-
-CONFIG_MWAVE=m
-CONFIG_DEVMEM=y
-# CONFIG_DEVKMEM is not set
-CONFIG_NVRAM=m
-CONFIG_RAW_DRIVER=m
-CONFIG_MAX_RAW_DEVS=256
-CONFIG_DEVPORT=y
-CONFIG_HPET=y
-CONFIG_HPET_MMAP=y
-CONFIG_HPET_MMAP_DEFAULT=y
-CONFIG_HANGCHECK_TIMER=m
-CONFIG_TCG_TPM=m
-CONFIG_HW_RANDOM_TPM=y
-CONFIG_TCG_TIS_CORE=m
-CONFIG_TCG_TIS=m
-CONFIG_TCG_TIS_SPI=m
-CONFIG_TCG_TIS_SPI_CR50=y
-CONFIG_TCG_TIS_I2C_ATMEL=m
-CONFIG_TCG_TIS_I2C_INFINEON=m
-CONFIG_TCG_TIS_I2C_NUVOTON=m
-CONFIG_TCG_NSC=m
-CONFIG_TCG_ATMEL=m
-CONFIG_TCG_INFINEON=m
-CONFIG_TCG_XEN=m
-CONFIG_TCG_CRB=m
-CONFIG_TCG_VTPM_PROXY=m
-CONFIG_TCG_TIS_ST33ZP24=m
-CONFIG_TCG_TIS_ST33ZP24_I2C=m
-CONFIG_TCG_TIS_ST33ZP24_SPI=m
-CONFIG_TELCLOCK=m
-CONFIG_XILLYBUS=m
-CONFIG_XILLYBUS_PCIE=m
-CONFIG_XILLYBUS_OF=m
-# end of Character devices
-
-# CONFIG_RANDOM_TRUST_CPU is not set
-# CONFIG_RANDOM_TRUST_BOOTLOADER is not set
-
-#
-# I2C support
-#
-CONFIG_I2C=y
-CONFIG_ACPI_I2C_OPREGION=y
-CONFIG_I2C_BOARDINFO=y
-CONFIG_I2C_COMPAT=y
-CONFIG_I2C_CHARDEV=m
-CONFIG_I2C_MUX=m
-
-#
-# Multiplexer I2C Chip support
-#
-CONFIG_I2C_ARB_GPIO_CHALLENGE=m
-CONFIG_I2C_MUX_GPIO=m
-CONFIG_I2C_MUX_GPMUX=m
-CONFIG_I2C_MUX_LTC4306=m
-CONFIG_I2C_MUX_PCA9541=m
-CONFIG_I2C_MUX_PCA954x=m
-CONFIG_I2C_MUX_PINCTRL=m
-CONFIG_I2C_MUX_REG=m
-CONFIG_I2C_DEMUX_PINCTRL=m
-CONFIG_I2C_MUX_MLXCPLD=m
-# end of Multiplexer I2C Chip support
-
-CONFIG_I2C_HELPER_AUTO=y
-CONFIG_I2C_SMBUS=m
-CONFIG_I2C_ALGOBIT=m
-CONFIG_I2C_ALGOPCA=m
-
-#
-# I2C Hardware Bus support
-#
-
-#
-# PC SMBus host controller drivers
-#
-CONFIG_I2C_ALI1535=m
-CONFIG_I2C_ALI1563=m
-CONFIG_I2C_ALI15X3=m
-CONFIG_I2C_AMD756=m
-CONFIG_I2C_AMD756_S4882=m
-CONFIG_I2C_AMD8111=m
-CONFIG_I2C_AMD_MP2=m
-CONFIG_I2C_I801=m
-CONFIG_I2C_ISCH=m
-CONFIG_I2C_ISMT=m
-CONFIG_I2C_PIIX4=m
-CONFIG_I2C_CHT_WC=m
-CONFIG_I2C_NFORCE2=m
-CONFIG_I2C_NFORCE2_S4985=m
-CONFIG_I2C_NVIDIA_GPU=m
-CONFIG_I2C_SIS5595=m
-CONFIG_I2C_SIS630=m
-CONFIG_I2C_SIS96X=m
-CONFIG_I2C_VIA=m
-CONFIG_I2C_VIAPRO=m
-
-#
-# ACPI drivers
-#
-CONFIG_I2C_SCMI=m
-
-#
-# I2C system bus drivers (mostly embedded / system-on-chip)
-#
-CONFIG_I2C_CBUS_GPIO=m
-CONFIG_I2C_DESIGNWARE_CORE=y
-CONFIG_I2C_DESIGNWARE_SLAVE=y
-CONFIG_I2C_DESIGNWARE_PLATFORM=y
-CONFIG_I2C_DESIGNWARE_BAYTRAIL=y
-CONFIG_I2C_DESIGNWARE_PCI=m
-CONFIG_I2C_EMEV2=m
-CONFIG_I2C_GPIO=m
-# CONFIG_I2C_GPIO_FAULT_INJECTOR is not set
-CONFIG_I2C_KEMPLD=m
-CONFIG_I2C_OCORES=m
-CONFIG_I2C_PCA_PLATFORM=m
-CONFIG_I2C_RK3X=m
-CONFIG_I2C_SIMTEC=m
-CONFIG_I2C_XILINX=m
-
-#
-# External I2C/SMBus adapter drivers
-#
-CONFIG_I2C_DIOLAN_U2C=m
-CONFIG_I2C_DLN2=m
-CONFIG_I2C_PARPORT=m
-CONFIG_I2C_ROBOTFUZZ_OSIF=m
-CONFIG_I2C_TAOS_EVM=m
-CONFIG_I2C_TINY_USB=m
-CONFIG_I2C_VIPERBOARD=m
-
-#
-# Other I2C/SMBus bus drivers
-#
-CONFIG_I2C_MLXCPLD=m
-CONFIG_I2C_CROS_EC_TUNNEL=m
-CONFIG_I2C_FSI=m
-# end of I2C Hardware Bus support
-
-CONFIG_I2C_STUB=m
-CONFIG_I2C_SLAVE=y
-CONFIG_I2C_SLAVE_EEPROM=m
-# CONFIG_I2C_DEBUG_CORE is not set
-# CONFIG_I2C_DEBUG_ALGO is not set
-# CONFIG_I2C_DEBUG_BUS is not set
-# end of I2C support
-
-CONFIG_I3C=m
-CONFIG_CDNS_I3C_MASTER=m
-CONFIG_DW_I3C_MASTER=m
-CONFIG_SPI=y
-# CONFIG_SPI_DEBUG is not set
-CONFIG_SPI_MASTER=y
-CONFIG_SPI_MEM=y
-
-#
-# SPI Master Controller Drivers
-#
-CONFIG_SPI_ALTERA=m
-CONFIG_SPI_AXI_SPI_ENGINE=m
-CONFIG_SPI_BITBANG=m
-CONFIG_SPI_BUTTERFLY=m
-CONFIG_SPI_CADENCE=m
-CONFIG_SPI_DESIGNWARE=m
-CONFIG_SPI_DW_DMA=y
-CONFIG_SPI_DW_PCI=m
-CONFIG_SPI_DW_MMIO=m
-CONFIG_SPI_DLN2=m
-CONFIG_SPI_FSI=m
-CONFIG_SPI_NXP_FLEXSPI=m
-CONFIG_SPI_GPIO=m
-CONFIG_SPI_LM70_LLP=m
-CONFIG_SPI_FSL_LIB=m
-CONFIG_SPI_FSL_SPI=m
-CONFIG_SPI_OC_TINY=m
-CONFIG_SPI_PXA2XX=m
-CONFIG_SPI_PXA2XX_PCI=m
-CONFIG_SPI_ROCKCHIP=m
-CONFIG_SPI_SC18IS602=m
-CONFIG_SPI_SIFIVE=m
-CONFIG_SPI_MXIC=m
-CONFIG_SPI_XCOMM=m
-CONFIG_SPI_XILINX=m
-CONFIG_SPI_ZYNQMP_GQSPI=m
-CONFIG_SPI_AMD=m
-
-#
-# SPI Multiplexer support
-#
-CONFIG_SPI_MUX=m
-
-#
-# SPI Protocol Masters
-#
-CONFIG_SPI_SPIDEV=m
-CONFIG_SPI_LOOPBACK_TEST=m
-CONFIG_SPI_TLE62X0=m
-CONFIG_SPI_SLAVE=y
-CONFIG_SPI_SLAVE_TIME=m
-CONFIG_SPI_SLAVE_SYSTEM_CONTROL=m
-CONFIG_SPI_DYNAMIC=y
-CONFIG_SPMI=m
-CONFIG_HSI=m
-CONFIG_HSI_BOARDINFO=y
-
-#
-# HSI controllers
-#
-
-#
-# HSI clients
-#
-CONFIG_HSI_CHAR=m
-CONFIG_PPS=y
-# CONFIG_PPS_DEBUG is not set
-
-#
-# PPS clients support
-#
-CONFIG_PPS_CLIENT_KTIMER=m
-CONFIG_PPS_CLIENT_LDISC=m
-CONFIG_PPS_CLIENT_PARPORT=m
-CONFIG_PPS_CLIENT_GPIO=m
-
-#
-# PPS generators support
-#
-
-#
-# PTP clock support
-#
-CONFIG_PTP_1588_CLOCK=y
-CONFIG_DP83640_PHY=m
-CONFIG_PTP_1588_CLOCK_INES=m
-CONFIG_PTP_1588_CLOCK_KVM=m
-CONFIG_PTP_1588_CLOCK_IDT82P33=m
-CONFIG_PTP_1588_CLOCK_IDTCM=m
-CONFIG_PTP_1588_CLOCK_VMW=m
-# end of PTP clock support
-
-CONFIG_PINCTRL=y
-CONFIG_GENERIC_PINCTRL_GROUPS=y
-CONFIG_PINMUX=y
-CONFIG_GENERIC_PINMUX_FUNCTIONS=y
-CONFIG_PINCONF=y
-CONFIG_GENERIC_PINCONF=y
-# CONFIG_DEBUG_PINCTRL is not set
-CONFIG_PINCTRL_AS3722=m
-CONFIG_PINCTRL_AXP209=m
-CONFIG_PINCTRL_AMD=m
-CONFIG_PINCTRL_DA9062=m
-CONFIG_PINCTRL_MCP23S08_I2C=m
-CONFIG_PINCTRL_MCP23S08_SPI=m
-CONFIG_PINCTRL_MCP23S08=m
-CONFIG_PINCTRL_SINGLE=m
-CONFIG_PINCTRL_SX150X=y
-CONFIG_PINCTRL_STMFX=m
-CONFIG_PINCTRL_MAX77620=m
-CONFIG_PINCTRL_PALMAS=m
-CONFIG_PINCTRL_RK805=m
-CONFIG_PINCTRL_OCELOT=y
-CONFIG_PINCTRL_BAYTRAIL=y
-CONFIG_PINCTRL_CHERRYVIEW=y
-CONFIG_PINCTRL_LYNXPOINT=y
-CONFIG_PINCTRL_INTEL=y
-CONFIG_PINCTRL_BROXTON=y
-CONFIG_PINCTRL_CANNONLAKE=y
-CONFIG_PINCTRL_CEDARFORK=y
-CONFIG_PINCTRL_DENVERTON=y
-CONFIG_PINCTRL_GEMINILAKE=y
-CONFIG_PINCTRL_ICELAKE=y
-CONFIG_PINCTRL_JASPERLAKE=y
-CONFIG_PINCTRL_LEWISBURG=y
-CONFIG_PINCTRL_SUNRISEPOINT=y
-CONFIG_PINCTRL_TIGERLAKE=y
-CONFIG_PINCTRL_LOCHNAGAR=m
-CONFIG_PINCTRL_MADERA=m
-CONFIG_PINCTRL_CS47L15=y
-CONFIG_PINCTRL_CS47L35=y
-CONFIG_PINCTRL_CS47L85=y
-CONFIG_PINCTRL_CS47L90=y
-CONFIG_PINCTRL_CS47L92=y
-CONFIG_PINCTRL_EQUILIBRIUM=m
-CONFIG_GPIOLIB=y
-CONFIG_GPIOLIB_FASTPATH_LIMIT=512
-CONFIG_OF_GPIO=y
-CONFIG_GPIO_ACPI=y
-CONFIG_GPIOLIB_IRQCHIP=y
-# CONFIG_DEBUG_GPIO is not set
-CONFIG_GPIO_SYSFS=y
-CONFIG_GPIO_GENERIC=y
-CONFIG_GPIO_MAX730X=m
-
-#
-# Memory mapped GPIO drivers
-#
-CONFIG_GPIO_74XX_MMIO=m
-CONFIG_GPIO_ALTERA=m
-CONFIG_GPIO_AMDPT=m
-CONFIG_GPIO_CADENCE=m
-CONFIG_GPIO_DWAPB=m
-CONFIG_GPIO_EXAR=m
-CONFIG_GPIO_FTGPIO010=y
-CONFIG_GPIO_GENERIC_PLATFORM=m
-CONFIG_GPIO_GRGPIO=m
-CONFIG_GPIO_HLWD=m
-CONFIG_GPIO_ICH=m
-CONFIG_GPIO_LOGICVC=m
-CONFIG_GPIO_MB86S7X=m
-CONFIG_GPIO_MENZ127=m
-CONFIG_GPIO_SAMA5D2_PIOBU=m
-CONFIG_GPIO_SIFIVE=y
-CONFIG_GPIO_SIOX=m
-CONFIG_GPIO_SYSCON=m
-CONFIG_GPIO_VX855=m
-CONFIG_GPIO_WCD934X=m
-CONFIG_GPIO_XILINX=m
-CONFIG_GPIO_AMD_FCH=m
-# end of Memory mapped GPIO drivers
-
-#
-# Port-mapped I/O GPIO drivers
-#
-CONFIG_GPIO_F7188X=m
-CONFIG_GPIO_IT87=m
-CONFIG_GPIO_SCH=m
-CONFIG_GPIO_SCH311X=m
-CONFIG_GPIO_WINBOND=m
-CONFIG_GPIO_WS16C48=m
-# end of Port-mapped I/O GPIO drivers
-
-#
-# I2C GPIO expanders
-#
-CONFIG_GPIO_ADP5588=m
-CONFIG_GPIO_ADNP=m
-CONFIG_GPIO_GW_PLD=m
-CONFIG_GPIO_MAX7300=m
-CONFIG_GPIO_MAX732X=m
-CONFIG_GPIO_PCA953X=m
-CONFIG_GPIO_PCA953X_IRQ=y
-CONFIG_GPIO_PCF857X=m
-CONFIG_GPIO_TPIC2810=m
-# end of I2C GPIO expanders
-
-#
-# MFD GPIO expanders
-#
-CONFIG_GPIO_ADP5520=m
-CONFIG_GPIO_ARIZONA=m
-CONFIG_GPIO_BD70528=m
-CONFIG_GPIO_BD71828=m
-CONFIG_GPIO_BD9571MWV=m
-CONFIG_GPIO_CRYSTAL_COVE=m
-CONFIG_GPIO_DA9052=m
-CONFIG_GPIO_DA9055=m
-CONFIG_GPIO_DLN2=m
-CONFIG_GPIO_JANZ_TTL=m
-CONFIG_GPIO_KEMPLD=m
-CONFIG_GPIO_LP3943=m
-CONFIG_GPIO_LP873X=m
-CONFIG_GPIO_LP87565=m
-CONFIG_GPIO_MADERA=m
-CONFIG_GPIO_MAX77620=m
-CONFIG_GPIO_MAX77650=m
-CONFIG_GPIO_MSIC=y
-CONFIG_GPIO_PALMAS=y
-CONFIG_GPIO_RC5T583=y
-CONFIG_GPIO_STMPE=y
-CONFIG_GPIO_TC3589X=y
-CONFIG_GPIO_TPS65086=m
-CONFIG_GPIO_TPS65218=m
-CONFIG_GPIO_TPS6586X=y
-CONFIG_GPIO_TPS65910=y
-CONFIG_GPIO_TPS65912=m
-CONFIG_GPIO_TPS68470=y
-CONFIG_GPIO_TQMX86=m
-CONFIG_GPIO_TWL4030=m
-CONFIG_GPIO_TWL6040=m
-CONFIG_GPIO_UCB1400=m
-CONFIG_GPIO_WHISKEY_COVE=m
-CONFIG_GPIO_WM831X=m
-CONFIG_GPIO_WM8350=m
-CONFIG_GPIO_WM8994=m
-# end of MFD GPIO expanders
-
-#
-# PCI GPIO expanders
-#
-CONFIG_GPIO_AMD8111=m
-CONFIG_GPIO_ML_IOH=m
-CONFIG_GPIO_PCI_IDIO_16=m
-CONFIG_GPIO_PCIE_IDIO_24=m
-CONFIG_GPIO_RDC321X=m
-CONFIG_GPIO_SODAVILLE=y
-# end of PCI GPIO expanders
-
-#
-# SPI GPIO expanders
-#
-CONFIG_GPIO_74X164=m
-CONFIG_GPIO_MAX3191X=m
-CONFIG_GPIO_MAX7301=m
-CONFIG_GPIO_MC33880=m
-CONFIG_GPIO_PISOSR=m
-CONFIG_GPIO_XRA1403=m
-CONFIG_GPIO_MOXTET=m
-# end of SPI GPIO expanders
-
-#
-# USB GPIO expanders
-#
-CONFIG_GPIO_VIPERBOARD=m
-# end of USB GPIO expanders
-
-CONFIG_GPIO_AGGREGATOR=m
-CONFIG_GPIO_MOCKUP=m
-CONFIG_W1=m
-CONFIG_W1_CON=y
-
-#
-# 1-wire Bus Masters
-#
-CONFIG_W1_MASTER_MATROX=m
-CONFIG_W1_MASTER_DS2490=m
-CONFIG_W1_MASTER_DS2482=m
-CONFIG_W1_MASTER_DS1WM=m
-CONFIG_W1_MASTER_GPIO=m
-CONFIG_W1_MASTER_SGI=m
-# end of 1-wire Bus Masters
-
-#
-# 1-wire Slaves
-#
-CONFIG_W1_SLAVE_THERM=m
-CONFIG_W1_SLAVE_SMEM=m
-CONFIG_W1_SLAVE_DS2405=m
-CONFIG_W1_SLAVE_DS2408=m
-# CONFIG_W1_SLAVE_DS2408_READBACK is not set
-CONFIG_W1_SLAVE_DS2413=m
-CONFIG_W1_SLAVE_DS2406=m
-CONFIG_W1_SLAVE_DS2423=m
-CONFIG_W1_SLAVE_DS2805=m
-CONFIG_W1_SLAVE_DS2430=m
-CONFIG_W1_SLAVE_DS2431=m
-CONFIG_W1_SLAVE_DS2433=m
-# CONFIG_W1_SLAVE_DS2433_CRC is not set
-CONFIG_W1_SLAVE_DS2438=m
-CONFIG_W1_SLAVE_DS250X=m
-CONFIG_W1_SLAVE_DS2780=m
-CONFIG_W1_SLAVE_DS2781=m
-CONFIG_W1_SLAVE_DS28E04=m
-CONFIG_W1_SLAVE_DS28E17=m
-# end of 1-wire Slaves
-
-CONFIG_POWER_AVS=y
-CONFIG_QCOM_CPR=m
-CONFIG_POWER_RESET=y
-CONFIG_POWER_RESET_AS3722=y
-CONFIG_POWER_RESET_GPIO=y
-CONFIG_POWER_RESET_GPIO_RESTART=y
-CONFIG_POWER_RESET_LTC2952=y
-CONFIG_POWER_RESET_MT6323=y
-CONFIG_POWER_RESET_RESTART=y
-CONFIG_POWER_RESET_SYSCON=y
-CONFIG_POWER_RESET_SYSCON_POWEROFF=y
-CONFIG_REBOOT_MODE=m
-CONFIG_SYSCON_REBOOT_MODE=m
-CONFIG_NVMEM_REBOOT_MODE=m
-CONFIG_POWER_SUPPLY=y
-# CONFIG_POWER_SUPPLY_DEBUG is not set
-CONFIG_POWER_SUPPLY_HWMON=y
-CONFIG_PDA_POWER=m
-CONFIG_GENERIC_ADC_BATTERY=m
-CONFIG_MAX8925_POWER=m
-CONFIG_WM831X_BACKUP=m
-CONFIG_WM831X_POWER=m
-CONFIG_WM8350_POWER=m
-CONFIG_TEST_POWER=m
-CONFIG_BATTERY_88PM860X=m
-CONFIG_CHARGER_ADP5061=m
-CONFIG_BATTERY_ACT8945A=m
-CONFIG_BATTERY_CPCAP=m
-CONFIG_BATTERY_CW2015=m
-CONFIG_BATTERY_DS2760=m
-CONFIG_BATTERY_DS2780=m
-CONFIG_BATTERY_DS2781=m
-CONFIG_BATTERY_DS2782=m
-CONFIG_BATTERY_LEGO_EV3=m
-CONFIG_BATTERY_SBS=m
-CONFIG_CHARGER_SBS=m
-CONFIG_MANAGER_SBS=m
-CONFIG_BATTERY_BQ27XXX=m
-CONFIG_BATTERY_BQ27XXX_I2C=m
-CONFIG_BATTERY_BQ27XXX_HDQ=m
-# CONFIG_BATTERY_BQ27XXX_DT_UPDATES_NVM is not set
-CONFIG_BATTERY_DA9030=m
-CONFIG_BATTERY_DA9052=m
-CONFIG_CHARGER_DA9150=m
-CONFIG_BATTERY_DA9150=m
-CONFIG_CHARGER_AXP20X=m
-CONFIG_BATTERY_AXP20X=m
-CONFIG_AXP20X_POWER=m
-CONFIG_AXP288_CHARGER=m
-CONFIG_AXP288_FUEL_GAUGE=m
-CONFIG_BATTERY_MAX17040=m
-CONFIG_BATTERY_MAX17042=m
-CONFIG_BATTERY_MAX1721X=m
-CONFIG_BATTERY_TWL4030_MADC=m
-CONFIG_CHARGER_88PM860X=m
-CONFIG_CHARGER_PCF50633=m
-CONFIG_BATTERY_RX51=m
-CONFIG_CHARGER_ISP1704=m
-CONFIG_CHARGER_MAX8903=m
-CONFIG_CHARGER_TWL4030=m
-CONFIG_CHARGER_LP8727=m
-CONFIG_CHARGER_LP8788=m
-CONFIG_CHARGER_GPIO=m
-CONFIG_CHARGER_MANAGER=y
-CONFIG_CHARGER_LT3651=m
-CONFIG_CHARGER_MAX14577=m
-CONFIG_CHARGER_DETECTOR_MAX14656=m
-CONFIG_CHARGER_MAX77650=m
-CONFIG_CHARGER_MAX77693=m
-CONFIG_CHARGER_MAX8997=m
-CONFIG_CHARGER_MAX8998=m
-CONFIG_CHARGER_MP2629=m
-CONFIG_CHARGER_BQ2415X=m
-CONFIG_CHARGER_BQ24190=m
-CONFIG_CHARGER_BQ24257=m
-CONFIG_CHARGER_BQ24735=m
-CONFIG_CHARGER_BQ25890=m
-CONFIG_CHARGER_SMB347=m
-CONFIG_CHARGER_TPS65090=m
-CONFIG_CHARGER_TPS65217=m
-CONFIG_BATTERY_GAUGE_LTC2941=m
-CONFIG_BATTERY_RT5033=m
-CONFIG_CHARGER_RT9455=m
-CONFIG_CHARGER_CROS_USBPD=m
-CONFIG_CHARGER_UCS1002=m
-CONFIG_CHARGER_BD70528=m
-CONFIG_CHARGER_BD99954=m
-CONFIG_CHARGER_WILCO=m
-CONFIG_HWMON=y
-CONFIG_HWMON_VID=m
-# CONFIG_HWMON_DEBUG_CHIP is not set
-
-#
-# Native drivers
-#
-CONFIG_SENSORS_ABITUGURU=m
-CONFIG_SENSORS_ABITUGURU3=m
-CONFIG_SENSORS_AD7314=m
-CONFIG_SENSORS_AD7414=m
-CONFIG_SENSORS_AD7418=m
-CONFIG_SENSORS_ADM1021=m
-CONFIG_SENSORS_ADM1025=m
-CONFIG_SENSORS_ADM1026=m
-CONFIG_SENSORS_ADM1029=m
-CONFIG_SENSORS_ADM1031=m
-CONFIG_SENSORS_ADM1177=m
-CONFIG_SENSORS_ADM9240=m
-CONFIG_SENSORS_ADT7X10=m
-CONFIG_SENSORS_ADT7310=m
-CONFIG_SENSORS_ADT7410=m
-CONFIG_SENSORS_ADT7411=m
-CONFIG_SENSORS_ADT7462=m
-CONFIG_SENSORS_ADT7470=m
-CONFIG_SENSORS_ADT7475=m
-CONFIG_SENSORS_AS370=m
-CONFIG_SENSORS_ASC7621=m
-CONFIG_SENSORS_AXI_FAN_CONTROL=m
-CONFIG_SENSORS_K8TEMP=m
-CONFIG_SENSORS_K10TEMP=m
-CONFIG_SENSORS_FAM15H_POWER=m
-CONFIG_SENSORS_AMD_ENERGY=m
-CONFIG_SENSORS_APPLESMC=m
-CONFIG_SENSORS_ASB100=m
-CONFIG_SENSORS_ASPEED=m
-CONFIG_SENSORS_ATXP1=m
-CONFIG_SENSORS_DRIVETEMP=m
-CONFIG_SENSORS_DS620=m
-CONFIG_SENSORS_DS1621=m
-CONFIG_SENSORS_DELL_SMM=m
-CONFIG_SENSORS_DA9052_ADC=m
-CONFIG_SENSORS_DA9055=m
-CONFIG_SENSORS_I5K_AMB=m
-CONFIG_SENSORS_F71805F=m
-CONFIG_SENSORS_F71882FG=m
-CONFIG_SENSORS_F75375S=m
-CONFIG_SENSORS_GSC=m
-CONFIG_SENSORS_MC13783_ADC=m
-CONFIG_SENSORS_FSCHMD=m
-CONFIG_SENSORS_FTSTEUTATES=m
-CONFIG_SENSORS_GL518SM=m
-CONFIG_SENSORS_GL520SM=m
-CONFIG_SENSORS_G760A=m
-CONFIG_SENSORS_G762=m
-CONFIG_SENSORS_GPIO_FAN=m
-CONFIG_SENSORS_HIH6130=m
-CONFIG_SENSORS_IBMAEM=m
-CONFIG_SENSORS_IBMPEX=m
-CONFIG_SENSORS_IIO_HWMON=m
-CONFIG_SENSORS_I5500=m
-CONFIG_SENSORS_CORETEMP=m
-CONFIG_SENSORS_IT87=m
-CONFIG_SENSORS_JC42=m
-CONFIG_SENSORS_POWR1220=m
-CONFIG_SENSORS_LINEAGE=m
-CONFIG_SENSORS_LOCHNAGAR=m
-CONFIG_SENSORS_LTC2945=m
-CONFIG_SENSORS_LTC2947=m
-CONFIG_SENSORS_LTC2947_I2C=m
-CONFIG_SENSORS_LTC2947_SPI=m
-CONFIG_SENSORS_LTC2990=m
-CONFIG_SENSORS_LTC4151=m
-CONFIG_SENSORS_LTC4215=m
-CONFIG_SENSORS_LTC4222=m
-CONFIG_SENSORS_LTC4245=m
-CONFIG_SENSORS_LTC4260=m
-CONFIG_SENSORS_LTC4261=m
-CONFIG_SENSORS_MAX1111=m
-CONFIG_SENSORS_MAX16065=m
-CONFIG_SENSORS_MAX1619=m
-CONFIG_SENSORS_MAX1668=m
-CONFIG_SENSORS_MAX197=m
-CONFIG_SENSORS_MAX31722=m
-CONFIG_SENSORS_MAX31730=m
-CONFIG_SENSORS_MAX6621=m
-CONFIG_SENSORS_MAX6639=m
-CONFIG_SENSORS_MAX6642=m
-CONFIG_SENSORS_MAX6650=m
-CONFIG_SENSORS_MAX6697=m
-CONFIG_SENSORS_MAX31790=m
-CONFIG_SENSORS_MCP3021=m
-CONFIG_SENSORS_MLXREG_FAN=m
-CONFIG_SENSORS_TC654=m
-CONFIG_SENSORS_MENF21BMC_HWMON=m
-CONFIG_SENSORS_ADCXX=m
-CONFIG_SENSORS_LM63=m
-CONFIG_SENSORS_LM70=m
-CONFIG_SENSORS_LM73=m
-CONFIG_SENSORS_LM75=m
-CONFIG_SENSORS_LM77=m
-CONFIG_SENSORS_LM78=m
-CONFIG_SENSORS_LM80=m
-CONFIG_SENSORS_LM83=m
-CONFIG_SENSORS_LM85=m
-CONFIG_SENSORS_LM87=m
-CONFIG_SENSORS_LM90=m
-CONFIG_SENSORS_LM92=m
-CONFIG_SENSORS_LM93=m
-CONFIG_SENSORS_LM95234=m
-CONFIG_SENSORS_LM95241=m
-CONFIG_SENSORS_LM95245=m
-CONFIG_SENSORS_PC87360=m
-CONFIG_SENSORS_PC87427=m
-CONFIG_SENSORS_NTC_THERMISTOR=m
-CONFIG_SENSORS_NCT6683=m
-CONFIG_SENSORS_NCT6775=m
-CONFIG_SENSORS_NCT7802=m
-CONFIG_SENSORS_NCT7904=m
-CONFIG_SENSORS_NPCM7XX=m
-CONFIG_SENSORS_PCF8591=m
-CONFIG_PMBUS=m
-CONFIG_SENSORS_PMBUS=m
-CONFIG_SENSORS_ADM1275=m
-CONFIG_SENSORS_BEL_PFE=m
-CONFIG_SENSORS_IBM_CFFPS=m
-CONFIG_SENSORS_INSPUR_IPSPS=m
-CONFIG_SENSORS_IR35221=m
-CONFIG_SENSORS_IR38064=m
-CONFIG_SENSORS_IRPS5401=m
-CONFIG_SENSORS_ISL68137=m
-CONFIG_SENSORS_LM25066=m
-CONFIG_SENSORS_LTC2978=m
-# CONFIG_SENSORS_LTC2978_REGULATOR is not set
-CONFIG_SENSORS_LTC3815=m
-CONFIG_SENSORS_MAX16064=m
-CONFIG_SENSORS_MAX16601=m
-CONFIG_SENSORS_MAX20730=m
-CONFIG_SENSORS_MAX20751=m
-CONFIG_SENSORS_MAX31785=m
-CONFIG_SENSORS_MAX34440=m
-CONFIG_SENSORS_MAX8688=m
-CONFIG_SENSORS_PXE1610=m
-CONFIG_SENSORS_TPS40422=m
-CONFIG_SENSORS_TPS53679=m
-CONFIG_SENSORS_UCD9000=m
-CONFIG_SENSORS_UCD9200=m
-CONFIG_SENSORS_XDPE122=m
-CONFIG_SENSORS_ZL6100=m
-CONFIG_SENSORS_PWM_FAN=m
-CONFIG_SENSORS_SHT15=m
-CONFIG_SENSORS_SHT21=m
-CONFIG_SENSORS_SHT3x=m
-CONFIG_SENSORS_SHTC1=m
-CONFIG_SENSORS_SIS5595=m
-CONFIG_SENSORS_DME1737=m
-CONFIG_SENSORS_EMC1403=m
-CONFIG_SENSORS_EMC2103=m
-CONFIG_SENSORS_EMC6W201=m
-CONFIG_SENSORS_SMSC47M1=m
-CONFIG_SENSORS_SMSC47M192=m
-CONFIG_SENSORS_SMSC47B397=m
-CONFIG_SENSORS_SCH56XX_COMMON=m
-CONFIG_SENSORS_SCH5627=m
-CONFIG_SENSORS_SCH5636=m
-CONFIG_SENSORS_STTS751=m
-CONFIG_SENSORS_SMM665=m
-CONFIG_SENSORS_ADC128D818=m
-CONFIG_SENSORS_ADS7828=m
-CONFIG_SENSORS_ADS7871=m
-CONFIG_SENSORS_AMC6821=m
-CONFIG_SENSORS_INA209=m
-CONFIG_SENSORS_INA2XX=m
-CONFIG_SENSORS_INA3221=m
-CONFIG_SENSORS_TC74=m
-CONFIG_SENSORS_THMC50=m
-CONFIG_SENSORS_TMP102=m
-CONFIG_SENSORS_TMP103=m
-CONFIG_SENSORS_TMP108=m
-CONFIG_SENSORS_TMP401=m
-CONFIG_SENSORS_TMP421=m
-CONFIG_SENSORS_TMP513=m
-CONFIG_SENSORS_VIA_CPUTEMP=m
-CONFIG_SENSORS_VIA686A=m
-CONFIG_SENSORS_VT1211=m
-CONFIG_SENSORS_VT8231=m
-CONFIG_SENSORS_W83773G=m
-CONFIG_SENSORS_W83781D=m
-CONFIG_SENSORS_W83791D=m
-CONFIG_SENSORS_W83792D=m
-CONFIG_SENSORS_W83793=m
-CONFIG_SENSORS_W83795=m
-# CONFIG_SENSORS_W83795_FANCTRL is not set
-CONFIG_SENSORS_W83L785TS=m
-CONFIG_SENSORS_W83L786NG=m
-CONFIG_SENSORS_W83627HF=m
-CONFIG_SENSORS_W83627EHF=m
-CONFIG_SENSORS_WM831X=m
-CONFIG_SENSORS_WM8350=m
-CONFIG_SENSORS_XGENE=m
-
-#
-# ACPI drivers
-#
-CONFIG_SENSORS_ACPI_POWER=m
-CONFIG_SENSORS_ATK0110=m
-CONFIG_THERMAL=y
-# CONFIG_THERMAL_STATISTICS is not set
-CONFIG_THERMAL_EMERGENCY_POWEROFF_DELAY_MS=100
-CONFIG_THERMAL_HWMON=y
-CONFIG_THERMAL_OF=y
-CONFIG_THERMAL_WRITABLE_TRIPS=y
-CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE=y
-# CONFIG_THERMAL_DEFAULT_GOV_FAIR_SHARE is not set
-# CONFIG_THERMAL_DEFAULT_GOV_USER_SPACE is not set
-# CONFIG_THERMAL_DEFAULT_GOV_POWER_ALLOCATOR is not set
-CONFIG_THERMAL_GOV_FAIR_SHARE=y
-CONFIG_THERMAL_GOV_STEP_WISE=y
-CONFIG_THERMAL_GOV_BANG_BANG=y
-CONFIG_THERMAL_GOV_USER_SPACE=y
-CONFIG_THERMAL_GOV_POWER_ALLOCATOR=y
-CONFIG_CPU_THERMAL=y
-CONFIG_CPU_FREQ_THERMAL=y
-CONFIG_CPU_IDLE_THERMAL=y
-CONFIG_CLOCK_THERMAL=y
-CONFIG_DEVFREQ_THERMAL=y
-# CONFIG_THERMAL_EMULATION is not set
-CONFIG_THERMAL_MMIO=m
-CONFIG_MAX77620_THERMAL=m
-CONFIG_DA9062_THERMAL=m
-
-#
-# Intel thermal drivers
-#
-CONFIG_INTEL_POWERCLAMP=m
-CONFIG_X86_PKG_TEMP_THERMAL=m
-CONFIG_INTEL_SOC_DTS_IOSF_CORE=m
-CONFIG_INTEL_SOC_DTS_THERMAL=m
-
-#
-# ACPI INT340X thermal drivers
-#
-CONFIG_INT340X_THERMAL=m
-CONFIG_ACPI_THERMAL_REL=m
-CONFIG_INT3406_THERMAL=m
-CONFIG_PROC_THERMAL_MMIO_RAPL=y
-# end of ACPI INT340X thermal drivers
-
-CONFIG_INTEL_BXT_PMIC_THERMAL=m
-CONFIG_INTEL_PCH_THERMAL=m
-# end of Intel thermal drivers
-
-# CONFIG_TI_SOC_THERMAL is not set
-CONFIG_GENERIC_ADC_THERMAL=m
-CONFIG_WATCHDOG=y
-CONFIG_WATCHDOG_CORE=y
-# CONFIG_WATCHDOG_NOWAYOUT is not set
-CONFIG_WATCHDOG_HANDLE_BOOT_ENABLED=y
-CONFIG_WATCHDOG_OPEN_TIMEOUT=0
-CONFIG_WATCHDOG_SYSFS=y
-
-#
-# Watchdog Pretimeout Governors
-#
-CONFIG_WATCHDOG_PRETIMEOUT_GOV=y
-CONFIG_WATCHDOG_PRETIMEOUT_GOV_SEL=m
-CONFIG_WATCHDOG_PRETIMEOUT_GOV_NOOP=m
-CONFIG_WATCHDOG_PRETIMEOUT_GOV_PANIC=y
-# CONFIG_WATCHDOG_PRETIMEOUT_DEFAULT_GOV_NOOP is not set
-CONFIG_WATCHDOG_PRETIMEOUT_DEFAULT_GOV_PANIC=y
-
-#
-# Watchdog Device Drivers
-#
-CONFIG_SOFT_WATCHDOG=m
-# CONFIG_SOFT_WATCHDOG_PRETIMEOUT is not set
-CONFIG_BD70528_WATCHDOG=m
-CONFIG_DA9052_WATCHDOG=m
-CONFIG_DA9055_WATCHDOG=m
-CONFIG_DA9063_WATCHDOG=m
-CONFIG_DA9062_WATCHDOG=m
-CONFIG_GPIO_WATCHDOG=m
-CONFIG_MENF21BMC_WATCHDOG=m
-CONFIG_MENZ069_WATCHDOG=m
-CONFIG_WDAT_WDT=m
-CONFIG_WM831X_WATCHDOG=m
-CONFIG_WM8350_WATCHDOG=m
-CONFIG_XILINX_WATCHDOG=m
-CONFIG_ZIIRAVE_WATCHDOG=m
-CONFIG_RAVE_SP_WATCHDOG=m
-CONFIG_MLX_WDT=m
-CONFIG_CADENCE_WATCHDOG=m
-CONFIG_DW_WATCHDOG=m
-CONFIG_RN5T618_WATCHDOG=m
-CONFIG_TWL4030_WATCHDOG=m
-CONFIG_MAX63XX_WATCHDOG=m
-CONFIG_MAX77620_WATCHDOG=m
-CONFIG_RETU_WATCHDOG=m
-CONFIG_STPMIC1_WATCHDOG=m
-CONFIG_ACQUIRE_WDT=m
-CONFIG_ADVANTECH_WDT=m
-CONFIG_ALIM1535_WDT=m
-CONFIG_ALIM7101_WDT=m
-CONFIG_EBC_C384_WDT=m
-CONFIG_F71808E_WDT=m
-CONFIG_SP5100_TCO=m
-CONFIG_SBC_FITPC2_WATCHDOG=m
-CONFIG_EUROTECH_WDT=m
-CONFIG_IB700_WDT=m
-CONFIG_IBMASR=m
-CONFIG_WAFER_WDT=m
-CONFIG_I6300ESB_WDT=m
-CONFIG_IE6XX_WDT=m
-CONFIG_ITCO_WDT=m
-CONFIG_ITCO_VENDOR_SUPPORT=y
-CONFIG_IT8712F_WDT=m
-CONFIG_IT87_WDT=m
-CONFIG_HP_WATCHDOG=m
-CONFIG_HPWDT_NMI_DECODING=y
-CONFIG_KEMPLD_WDT=m
-CONFIG_SC1200_WDT=m
-CONFIG_PC87413_WDT=m
-CONFIG_NV_TCO=m
-CONFIG_60XX_WDT=m
-CONFIG_CPU5_WDT=m
-CONFIG_SMSC_SCH311X_WDT=m
-CONFIG_SMSC37B787_WDT=m
-CONFIG_TQMX86_WDT=m
-CONFIG_VIA_WDT=m
-CONFIG_W83627HF_WDT=m
-CONFIG_W83877F_WDT=m
-CONFIG_W83977F_WDT=m
-CONFIG_MACHZ_WDT=m
-CONFIG_SBC_EPX_C3_WATCHDOG=m
-CONFIG_INTEL_MEI_WDT=m
-CONFIG_NI903X_WDT=m
-CONFIG_NIC7018_WDT=m
-CONFIG_MEN_A21_WDT=m
-CONFIG_XEN_WDT=m
-
-#
-# PCI-based Watchdog Cards
-#
-CONFIG_PCIPCWATCHDOG=m
-CONFIG_WDTPCI=m
-
-#
-# USB-based Watchdog Cards
-#
-CONFIG_USBPCWATCHDOG=m
-CONFIG_SSB_POSSIBLE=y
-CONFIG_SSB=m
-CONFIG_SSB_SPROM=y
-CONFIG_SSB_BLOCKIO=y
-CONFIG_SSB_PCIHOST_POSSIBLE=y
-CONFIG_SSB_PCIHOST=y
-CONFIG_SSB_B43_PCI_BRIDGE=y
-CONFIG_SSB_PCMCIAHOST_POSSIBLE=y
-CONFIG_SSB_PCMCIAHOST=y
-CONFIG_SSB_SDIOHOST_POSSIBLE=y
-CONFIG_SSB_SDIOHOST=y
-CONFIG_SSB_DRIVER_PCICORE_POSSIBLE=y
-CONFIG_SSB_DRIVER_PCICORE=y
-CONFIG_SSB_DRIVER_GPIO=y
-CONFIG_BCMA_POSSIBLE=y
-CONFIG_BCMA=m
-CONFIG_BCMA_BLOCKIO=y
-CONFIG_BCMA_HOST_PCI_POSSIBLE=y
-CONFIG_BCMA_HOST_PCI=y
-# CONFIG_BCMA_HOST_SOC is not set
-CONFIG_BCMA_DRIVER_PCI=y
-CONFIG_BCMA_DRIVER_GMAC_CMN=y
-CONFIG_BCMA_DRIVER_GPIO=y
-# CONFIG_BCMA_DEBUG is not set
-
-#
-# Multifunction device drivers
-#
-CONFIG_MFD_CORE=y
-CONFIG_MFD_ACT8945A=m
-CONFIG_MFD_AS3711=y
-CONFIG_MFD_AS3722=m
-CONFIG_PMIC_ADP5520=y
-CONFIG_MFD_AAT2870_CORE=y
-CONFIG_MFD_ATMEL_FLEXCOM=m
-CONFIG_MFD_ATMEL_HLCDC=m
-CONFIG_MFD_BCM590XX=m
-CONFIG_MFD_BD9571MWV=m
-CONFIG_MFD_AXP20X=m
-CONFIG_MFD_AXP20X_I2C=m
-CONFIG_MFD_CROS_EC_DEV=m
-CONFIG_MFD_MADERA=m
-CONFIG_MFD_MADERA_I2C=m
-CONFIG_MFD_MADERA_SPI=m
-CONFIG_MFD_CS47L15=y
-CONFIG_MFD_CS47L35=y
-CONFIG_MFD_CS47L85=y
-CONFIG_MFD_CS47L90=y
-CONFIG_MFD_CS47L92=y
-CONFIG_PMIC_DA903X=y
-CONFIG_PMIC_DA9052=y
-CONFIG_MFD_DA9052_SPI=y
-CONFIG_MFD_DA9052_I2C=y
-CONFIG_MFD_DA9055=y
-CONFIG_MFD_DA9062=m
-CONFIG_MFD_DA9063=m
-CONFIG_MFD_DA9150=m
-CONFIG_MFD_DLN2=m
-CONFIG_MFD_GATEWORKS_GSC=m
-CONFIG_MFD_MC13XXX=m
-CONFIG_MFD_MC13XXX_SPI=m
-CONFIG_MFD_MC13XXX_I2C=m
-CONFIG_MFD_MP2629=m
-CONFIG_MFD_HI6421_PMIC=m
-CONFIG_HTC_PASIC3=m
-CONFIG_HTC_I2CPLD=y
-CONFIG_MFD_INTEL_QUARK_I2C_GPIO=m
-CONFIG_LPC_ICH=m
-CONFIG_LPC_SCH=m
-CONFIG_INTEL_SOC_PMIC=y
-CONFIG_INTEL_SOC_PMIC_BXTWC=m
-CONFIG_INTEL_SOC_PMIC_CHTWC=y
-CONFIG_INTEL_SOC_PMIC_CHTDC_TI=m
-CONFIG_INTEL_SOC_PMIC_MRFLD=m
-CONFIG_MFD_INTEL_LPSS=m
-CONFIG_MFD_INTEL_LPSS_ACPI=m
-CONFIG_MFD_INTEL_LPSS_PCI=m
-CONFIG_MFD_INTEL_MSIC=y
-CONFIG_MFD_INTEL_PMC_BXT=m
-CONFIG_MFD_IQS62X=m
-CONFIG_MFD_JANZ_CMODIO=m
-CONFIG_MFD_KEMPLD=m
-CONFIG_MFD_88PM800=m
-CONFIG_MFD_88PM805=m
-CONFIG_MFD_88PM860X=y
-CONFIG_MFD_MAX14577=m
-CONFIG_MFD_MAX77620=y
-CONFIG_MFD_MAX77650=m
-CONFIG_MFD_MAX77686=m
-CONFIG_MFD_MAX77693=m
-CONFIG_MFD_MAX77843=y
-CONFIG_MFD_MAX8907=m
-CONFIG_MFD_MAX8925=y
-CONFIG_MFD_MAX8997=y
-CONFIG_MFD_MAX8998=y
-CONFIG_MFD_MT6360=m
-CONFIG_MFD_MT6397=m
-CONFIG_MFD_MENF21BMC=m
-CONFIG_EZX_PCAP=y
-CONFIG_MFD_CPCAP=m
-CONFIG_MFD_VIPERBOARD=m
-CONFIG_MFD_RETU=m
-CONFIG_MFD_PCF50633=m
-CONFIG_PCF50633_ADC=m
-CONFIG_PCF50633_GPIO=m
-CONFIG_UCB1400_CORE=m
-CONFIG_MFD_RDC321X=m
-CONFIG_MFD_RT5033=m
-CONFIG_MFD_RC5T583=y
-CONFIG_MFD_RK808=m
-CONFIG_MFD_RN5T618=m
-CONFIG_MFD_SEC_CORE=y
-CONFIG_MFD_SI476X_CORE=m
-CONFIG_MFD_SM501=m
-CONFIG_MFD_SM501_GPIO=y
-CONFIG_MFD_SKY81452=m
-CONFIG_MFD_SMSC=y
-CONFIG_ABX500_CORE=y
-CONFIG_AB3100_CORE=y
-CONFIG_AB3100_OTP=y
-CONFIG_MFD_STMPE=y
-
-#
-# STMicroelectronics STMPE Interface Drivers
-#
-CONFIG_STMPE_I2C=y
-CONFIG_STMPE_SPI=y
-# end of STMicroelectronics STMPE Interface Drivers
-
-CONFIG_MFD_SYSCON=y
-CONFIG_MFD_TI_AM335X_TSCADC=m
-CONFIG_MFD_LP3943=m
-CONFIG_MFD_LP8788=y
-CONFIG_MFD_TI_LMU=m
-CONFIG_MFD_PALMAS=y
-CONFIG_TPS6105X=m
-CONFIG_TPS65010=m
-CONFIG_TPS6507X=m
-CONFIG_MFD_TPS65086=m
-CONFIG_MFD_TPS65090=y
-CONFIG_MFD_TPS65217=m
-CONFIG_MFD_TPS68470=y
-CONFIG_MFD_TI_LP873X=m
-CONFIG_MFD_TI_LP87565=m
-CONFIG_MFD_TPS65218=m
-CONFIG_MFD_TPS6586X=y
-CONFIG_MFD_TPS65910=y
-CONFIG_MFD_TPS65912=m
-CONFIG_MFD_TPS65912_I2C=m
-CONFIG_MFD_TPS65912_SPI=m
-CONFIG_MFD_TPS80031=y
-CONFIG_TWL4030_CORE=y
-CONFIG_MFD_TWL4030_AUDIO=y
-CONFIG_TWL6040_CORE=y
-CONFIG_MFD_WL1273_CORE=m
-CONFIG_MFD_LM3533=m
-CONFIG_MFD_TC3589X=y
-CONFIG_MFD_TQMX86=m
-CONFIG_MFD_VX855=m
-CONFIG_MFD_LOCHNAGAR=y
-CONFIG_MFD_ARIZONA=y
-CONFIG_MFD_ARIZONA_I2C=m
-CONFIG_MFD_ARIZONA_SPI=m
-CONFIG_MFD_CS47L24=y
-CONFIG_MFD_WM5102=y
-CONFIG_MFD_WM5110=y
-CONFIG_MFD_WM8997=y
-CONFIG_MFD_WM8998=y
-CONFIG_MFD_WM8400=y
-CONFIG_MFD_WM831X=y
-CONFIG_MFD_WM831X_I2C=y
-CONFIG_MFD_WM831X_SPI=y
-CONFIG_MFD_WM8350=y
-CONFIG_MFD_WM8350_I2C=y
-CONFIG_MFD_WM8994=m
-CONFIG_MFD_ROHM_BD718XX=m
-CONFIG_MFD_ROHM_BD70528=m
-CONFIG_MFD_ROHM_BD71828=m
-CONFIG_MFD_STPMIC1=m
-CONFIG_MFD_STMFX=m
-CONFIG_MFD_WCD934X=m
-CONFIG_RAVE_SP_CORE=m
-# end of Multifunction device drivers
-
-CONFIG_REGULATOR=y
-# CONFIG_REGULATOR_DEBUG is not set
-CONFIG_REGULATOR_FIXED_VOLTAGE=m
-CONFIG_REGULATOR_VIRTUAL_CONSUMER=m
-CONFIG_REGULATOR_USERSPACE_CONSUMER=m
-CONFIG_REGULATOR_88PG86X=m
-CONFIG_REGULATOR_88PM800=m
-CONFIG_REGULATOR_88PM8607=m
-CONFIG_REGULATOR_ACT8865=m
-CONFIG_REGULATOR_ACT8945A=m
-CONFIG_REGULATOR_AD5398=m
-CONFIG_REGULATOR_AAT2870=m
-CONFIG_REGULATOR_AB3100=m
-CONFIG_REGULATOR_ARIZONA_LDO1=m
-CONFIG_REGULATOR_ARIZONA_MICSUPP=m
-CONFIG_REGULATOR_AS3711=m
-CONFIG_REGULATOR_AS3722=m
-CONFIG_REGULATOR_AXP20X=m
-CONFIG_REGULATOR_BCM590XX=m
-CONFIG_REGULATOR_BD70528=m
-CONFIG_REGULATOR_BD71828=m
-CONFIG_REGULATOR_BD718XX=m
-CONFIG_REGULATOR_BD9571MWV=m
-CONFIG_REGULATOR_CPCAP=m
-CONFIG_REGULATOR_DA903X=m
-CONFIG_REGULATOR_DA9052=m
-CONFIG_REGULATOR_DA9055=m
-CONFIG_REGULATOR_DA9062=m
-CONFIG_REGULATOR_DA9063=m
-CONFIG_REGULATOR_DA9210=m
-CONFIG_REGULATOR_DA9211=m
-CONFIG_REGULATOR_FAN53555=m
-CONFIG_REGULATOR_GPIO=m
-CONFIG_REGULATOR_HI6421=m
-CONFIG_REGULATOR_HI6421V530=m
-CONFIG_REGULATOR_ISL9305=m
-CONFIG_REGULATOR_ISL6271A=m
-CONFIG_REGULATOR_LM363X=m
-CONFIG_REGULATOR_LOCHNAGAR=m
-CONFIG_REGULATOR_LP3971=m
-CONFIG_REGULATOR_LP3972=m
-CONFIG_REGULATOR_LP872X=m
-CONFIG_REGULATOR_LP873X=m
-CONFIG_REGULATOR_LP8755=m
-CONFIG_REGULATOR_LP87565=m
-CONFIG_REGULATOR_LP8788=m
-CONFIG_REGULATOR_LTC3589=m
-CONFIG_REGULATOR_LTC3676=m
-CONFIG_REGULATOR_MAX14577=m
-CONFIG_REGULATOR_MAX1586=m
-CONFIG_REGULATOR_MAX77620=m
-CONFIG_REGULATOR_MAX77650=m
-CONFIG_REGULATOR_MAX8649=m
-CONFIG_REGULATOR_MAX8660=m
-CONFIG_REGULATOR_MAX8907=m
-CONFIG_REGULATOR_MAX8925=m
-CONFIG_REGULATOR_MAX8952=m
-CONFIG_REGULATOR_MAX8973=m
-CONFIG_REGULATOR_MAX8997=m
-CONFIG_REGULATOR_MAX8998=m
-CONFIG_REGULATOR_MAX77686=m
-CONFIG_REGULATOR_MAX77693=m
-CONFIG_REGULATOR_MAX77802=m
-CONFIG_REGULATOR_MAX77826=m
-CONFIG_REGULATOR_MC13XXX_CORE=m
-CONFIG_REGULATOR_MC13783=m
-CONFIG_REGULATOR_MC13892=m
-CONFIG_REGULATOR_MCP16502=m
-CONFIG_REGULATOR_MP5416=m
-CONFIG_REGULATOR_MP8859=m
-CONFIG_REGULATOR_MP886X=m
-CONFIG_REGULATOR_MPQ7920=m
-CONFIG_REGULATOR_MT6311=m
-CONFIG_REGULATOR_MT6323=m
-CONFIG_REGULATOR_MT6358=m
-CONFIG_REGULATOR_MT6397=m
-CONFIG_REGULATOR_PALMAS=m
-CONFIG_REGULATOR_PCAP=m
-CONFIG_REGULATOR_PCF50633=m
-CONFIG_REGULATOR_PFUZE100=m
-CONFIG_REGULATOR_PV88060=m
-CONFIG_REGULATOR_PV88080=m
-CONFIG_REGULATOR_PV88090=m
-CONFIG_REGULATOR_PWM=m
-CONFIG_REGULATOR_QCOM_SPMI=m
-CONFIG_REGULATOR_RC5T583=m
-CONFIG_REGULATOR_RK808=m
-CONFIG_REGULATOR_RN5T618=m
-CONFIG_REGULATOR_ROHM=m
-CONFIG_REGULATOR_RT5033=m
-CONFIG_REGULATOR_S2MPA01=m
-CONFIG_REGULATOR_S2MPS11=m
-CONFIG_REGULATOR_S5M8767=m
-CONFIG_REGULATOR_SKY81452=m
-CONFIG_REGULATOR_SLG51000=m
-CONFIG_REGULATOR_STPMIC1=m
-CONFIG_REGULATOR_SY8106A=m
-CONFIG_REGULATOR_SY8824X=m
-CONFIG_REGULATOR_TPS51632=m
-CONFIG_REGULATOR_TPS6105X=m
-CONFIG_REGULATOR_TPS62360=m
-CONFIG_REGULATOR_TPS65023=m
-CONFIG_REGULATOR_TPS6507X=m
-CONFIG_REGULATOR_TPS65086=m
-CONFIG_REGULATOR_TPS65090=m
-CONFIG_REGULATOR_TPS65132=m
-CONFIG_REGULATOR_TPS65217=m
-CONFIG_REGULATOR_TPS65218=m
-CONFIG_REGULATOR_TPS6524X=m
-CONFIG_REGULATOR_TPS6586X=m
-CONFIG_REGULATOR_TPS65910=m
-CONFIG_REGULATOR_TPS65912=m
-CONFIG_REGULATOR_TPS80031=m
-CONFIG_REGULATOR_TWL4030=m
-CONFIG_REGULATOR_VCTRL=m
-CONFIG_REGULATOR_WM831X=m
-CONFIG_REGULATOR_WM8350=m
-CONFIG_REGULATOR_WM8400=m
-CONFIG_REGULATOR_WM8994=m
-CONFIG_RC_CORE=m
-CONFIG_RC_MAP=m
-CONFIG_LIRC=y
-CONFIG_RC_DECODERS=y
-CONFIG_IR_NEC_DECODER=m
-CONFIG_IR_RC5_DECODER=m
-CONFIG_IR_RC6_DECODER=m
-CONFIG_IR_JVC_DECODER=m
-CONFIG_IR_SONY_DECODER=m
-CONFIG_IR_SANYO_DECODER=m
-CONFIG_IR_SHARP_DECODER=m
-CONFIG_IR_MCE_KBD_DECODER=m
-CONFIG_IR_XMP_DECODER=m
-CONFIG_IR_IMON_DECODER=m
-CONFIG_IR_RCMM_DECODER=m
-CONFIG_RC_DEVICES=y
-CONFIG_RC_ATI_REMOTE=m
-CONFIG_IR_ENE=m
-CONFIG_IR_HIX5HD2=m
-CONFIG_IR_IMON=m
-CONFIG_IR_IMON_RAW=m
-CONFIG_IR_MCEUSB=m
-CONFIG_IR_ITE_CIR=m
-CONFIG_IR_FINTEK=m
-CONFIG_IR_NUVOTON=m
-CONFIG_IR_REDRAT3=m
-CONFIG_IR_SPI=m
-CONFIG_IR_STREAMZAP=m
-CONFIG_IR_WINBOND_CIR=m
-CONFIG_IR_IGORPLUGUSB=m
-CONFIG_IR_IGUANA=m
-CONFIG_IR_TTUSBIR=m
-CONFIG_RC_LOOPBACK=m
-CONFIG_IR_GPIO_CIR=m
-CONFIG_IR_GPIO_TX=m
-CONFIG_IR_PWM_TX=m
-CONFIG_IR_SERIAL=m
-CONFIG_IR_SERIAL_TRANSMITTER=y
-CONFIG_IR_SIR=m
-CONFIG_RC_XBOX_DVD=m
-CONFIG_CEC_CORE=m
-CONFIG_CEC_NOTIFIER=y
-CONFIG_CEC_PIN=y
-CONFIG_MEDIA_CEC_RC=y
-# CONFIG_CEC_PIN_ERROR_INJ is not set
-CONFIG_MEDIA_CEC_SUPPORT=y
-CONFIG_CEC_CROS_EC=m
-CONFIG_CEC_GPIO=m
-CONFIG_CEC_SECO=m
-CONFIG_CEC_SECO_RC=y
-CONFIG_USB_PULSE8_CEC=m
-CONFIG_USB_RAINSHADOW_CEC=m
-CONFIG_MEDIA_SUPPORT=m
-# CONFIG_MEDIA_SUPPORT_FILTER is not set
-CONFIG_MEDIA_SUBDRV_AUTOSELECT=y
-
-#
-# Media device types
-#
-CONFIG_MEDIA_CAMERA_SUPPORT=y
-CONFIG_MEDIA_ANALOG_TV_SUPPORT=y
-CONFIG_MEDIA_DIGITAL_TV_SUPPORT=y
-CONFIG_MEDIA_RADIO_SUPPORT=y
-CONFIG_MEDIA_SDR_SUPPORT=y
-CONFIG_MEDIA_PLATFORM_SUPPORT=y
-CONFIG_MEDIA_TEST_SUPPORT=y
-# end of Media device types
-
-#
-# Media core support
-#
-CONFIG_VIDEO_DEV=m
-CONFIG_MEDIA_CONTROLLER=y
-CONFIG_DVB_CORE=m
-# end of Media core support
-
-#
-# Video4Linux options
-#
-CONFIG_VIDEO_V4L2=m
-CONFIG_VIDEO_V4L2_I2C=y
-CONFIG_VIDEO_V4L2_SUBDEV_API=y
-# CONFIG_VIDEO_ADV_DEBUG is not set
-# CONFIG_VIDEO_FIXED_MINOR_RANGES is not set
-CONFIG_VIDEO_TUNER=m
-CONFIG_V4L2_MEM2MEM_DEV=m
-CONFIG_V4L2_FLASH_LED_CLASS=m
-CONFIG_V4L2_FWNODE=m
-CONFIG_VIDEOBUF_GEN=m
-CONFIG_VIDEOBUF_DMA_SG=m
-CONFIG_VIDEOBUF_VMALLOC=m
-# end of Video4Linux options
-
-#
-# Media controller options
-#
-CONFIG_MEDIA_CONTROLLER_DVB=y
-CONFIG_MEDIA_CONTROLLER_REQUEST_API=y
-
-#
-# Please notice that the enabled Media controller Request API is EXPERIMENTAL
-#
-# end of Media controller options
-
-#
-# Digital TV options
-#
-CONFIG_DVB_MMAP=y
-CONFIG_DVB_NET=y
-CONFIG_DVB_MAX_ADAPTERS=16
-# CONFIG_DVB_DYNAMIC_MINORS is not set
-# CONFIG_DVB_DEMUX_SECTION_LOSS_LOG is not set
-# CONFIG_DVB_ULE_DEBUG is not set
-# end of Digital TV options
-
-#
-# Media drivers
-#
-CONFIG_TTPCI_EEPROM=m
-CONFIG_MEDIA_USB_SUPPORT=y
-
-#
-# Webcam devices
-#
-CONFIG_USB_VIDEO_CLASS=m
-CONFIG_USB_VIDEO_CLASS_INPUT_EVDEV=y
-CONFIG_USB_GSPCA=m
-CONFIG_USB_M5602=m
-CONFIG_USB_STV06XX=m
-CONFIG_USB_GL860=m
-CONFIG_USB_GSPCA_BENQ=m
-CONFIG_USB_GSPCA_CONEX=m
-CONFIG_USB_GSPCA_CPIA1=m
-CONFIG_USB_GSPCA_DTCS033=m
-CONFIG_USB_GSPCA_ETOMS=m
-CONFIG_USB_GSPCA_FINEPIX=m
-CONFIG_USB_GSPCA_JEILINJ=m
-CONFIG_USB_GSPCA_JL2005BCD=m
-CONFIG_USB_GSPCA_KINECT=m
-CONFIG_USB_GSPCA_KONICA=m
-CONFIG_USB_GSPCA_MARS=m
-CONFIG_USB_GSPCA_MR97310A=m
-CONFIG_USB_GSPCA_NW80X=m
-CONFIG_USB_GSPCA_OV519=m
-CONFIG_USB_GSPCA_OV534=m
-CONFIG_USB_GSPCA_OV534_9=m
-CONFIG_USB_GSPCA_PAC207=m
-CONFIG_USB_GSPCA_PAC7302=m
-CONFIG_USB_GSPCA_PAC7311=m
-CONFIG_USB_GSPCA_SE401=m
-CONFIG_USB_GSPCA_SN9C2028=m
-CONFIG_USB_GSPCA_SN9C20X=m
-CONFIG_USB_GSPCA_SONIXB=m
-CONFIG_USB_GSPCA_SONIXJ=m
-CONFIG_USB_GSPCA_SPCA500=m
-CONFIG_USB_GSPCA_SPCA501=m
-CONFIG_USB_GSPCA_SPCA505=m
-CONFIG_USB_GSPCA_SPCA506=m
-CONFIG_USB_GSPCA_SPCA508=m
-CONFIG_USB_GSPCA_SPCA561=m
-CONFIG_USB_GSPCA_SPCA1528=m
-CONFIG_USB_GSPCA_SQ905=m
-CONFIG_USB_GSPCA_SQ905C=m
-CONFIG_USB_GSPCA_SQ930X=m
-CONFIG_USB_GSPCA_STK014=m
-CONFIG_USB_GSPCA_STK1135=m
-CONFIG_USB_GSPCA_STV0680=m
-CONFIG_USB_GSPCA_SUNPLUS=m
-CONFIG_USB_GSPCA_T613=m
-CONFIG_USB_GSPCA_TOPRO=m
-CONFIG_USB_GSPCA_TOUPTEK=m
-CONFIG_USB_GSPCA_TV8532=m
-CONFIG_USB_GSPCA_VC032X=m
-CONFIG_USB_GSPCA_VICAM=m
-CONFIG_USB_GSPCA_XIRLINK_CIT=m
-CONFIG_USB_GSPCA_ZC3XX=m
-CONFIG_USB_PWC=m
-# CONFIG_USB_PWC_DEBUG is not set
-CONFIG_USB_PWC_INPUT_EVDEV=y
-CONFIG_VIDEO_CPIA2=m
-CONFIG_USB_ZR364XX=m
-CONFIG_USB_STKWEBCAM=m
-CONFIG_USB_S2255=m
-CONFIG_VIDEO_USBTV=m
-
-#
-# Analog TV USB devices
-#
-CONFIG_VIDEO_PVRUSB2=m
-CONFIG_VIDEO_PVRUSB2_SYSFS=y
-CONFIG_VIDEO_PVRUSB2_DVB=y
-# CONFIG_VIDEO_PVRUSB2_DEBUGIFC is not set
-CONFIG_VIDEO_HDPVR=m
-CONFIG_VIDEO_STK1160_COMMON=m
-CONFIG_VIDEO_STK1160=m
-CONFIG_VIDEO_GO7007=m
-CONFIG_VIDEO_GO7007_USB=m
-CONFIG_VIDEO_GO7007_LOADER=m
-CONFIG_VIDEO_GO7007_USB_S2250_BOARD=m
-
-#
-# Analog/digital TV USB devices
-#
-CONFIG_VIDEO_AU0828=m
-CONFIG_VIDEO_AU0828_V4L2=y
-CONFIG_VIDEO_AU0828_RC=y
-CONFIG_VIDEO_CX231XX=m
-CONFIG_VIDEO_CX231XX_RC=y
-CONFIG_VIDEO_CX231XX_ALSA=m
-CONFIG_VIDEO_CX231XX_DVB=m
-CONFIG_VIDEO_TM6000=m
-CONFIG_VIDEO_TM6000_ALSA=m
-CONFIG_VIDEO_TM6000_DVB=m
-
-#
-# Digital TV USB devices
-#
-CONFIG_DVB_USB=m
-# CONFIG_DVB_USB_DEBUG is not set
-CONFIG_DVB_USB_DIB3000MC=m
-CONFIG_DVB_USB_A800=m
-CONFIG_DVB_USB_DIBUSB_MB=m
-CONFIG_DVB_USB_DIBUSB_MB_FAULTY=y
-CONFIG_DVB_USB_DIBUSB_MC=m
-CONFIG_DVB_USB_DIB0700=m
-CONFIG_DVB_USB_UMT_010=m
-CONFIG_DVB_USB_CXUSB=m
-CONFIG_DVB_USB_CXUSB_ANALOG=y
-CONFIG_DVB_USB_M920X=m
-CONFIG_DVB_USB_DIGITV=m
-CONFIG_DVB_USB_VP7045=m
-CONFIG_DVB_USB_VP702X=m
-CONFIG_DVB_USB_GP8PSK=m
-CONFIG_DVB_USB_NOVA_T_USB2=m
-CONFIG_DVB_USB_TTUSB2=m
-CONFIG_DVB_USB_DTT200U=m
-CONFIG_DVB_USB_OPERA1=m
-CONFIG_DVB_USB_AF9005=m
-CONFIG_DVB_USB_AF9005_REMOTE=m
-CONFIG_DVB_USB_PCTV452E=m
-CONFIG_DVB_USB_DW2102=m
-CONFIG_DVB_USB_CINERGY_T2=m
-CONFIG_DVB_USB_DTV5100=m
-CONFIG_DVB_USB_AZ6027=m
-CONFIG_DVB_USB_TECHNISAT_USB2=m
-CONFIG_DVB_USB_V2=m
-CONFIG_DVB_USB_AF9015=m
-CONFIG_DVB_USB_AF9035=m
-CONFIG_DVB_USB_ANYSEE=m
-CONFIG_DVB_USB_AU6610=m
-CONFIG_DVB_USB_AZ6007=m
-CONFIG_DVB_USB_CE6230=m
-CONFIG_DVB_USB_EC168=m
-CONFIG_DVB_USB_GL861=m
-CONFIG_DVB_USB_LME2510=m
-CONFIG_DVB_USB_MXL111SF=m
-CONFIG_DVB_USB_RTL28XXU=m
-CONFIG_DVB_USB_DVBSKY=m
-CONFIG_DVB_USB_ZD1301=m
-CONFIG_DVB_TTUSB_BUDGET=m
-CONFIG_DVB_TTUSB_DEC=m
-CONFIG_SMS_USB_DRV=m
-CONFIG_DVB_B2C2_FLEXCOP_USB=m
-# CONFIG_DVB_B2C2_FLEXCOP_USB_DEBUG is not set
-CONFIG_DVB_AS102=m
-
-#
-# Webcam, TV (analog/digital) USB devices
-#
-CONFIG_VIDEO_EM28XX=m
-CONFIG_VIDEO_EM28XX_V4L2=m
-CONFIG_VIDEO_EM28XX_ALSA=m
-CONFIG_VIDEO_EM28XX_DVB=m
-CONFIG_VIDEO_EM28XX_RC=m
-
-#
-# Software defined radio USB devices
-#
-CONFIG_USB_AIRSPY=m
-CONFIG_USB_HACKRF=m
-CONFIG_USB_MSI2500=m
-CONFIG_MEDIA_PCI_SUPPORT=y
-
-#
-# Media capture support
-#
-CONFIG_VIDEO_MEYE=m
-CONFIG_VIDEO_SOLO6X10=m
-CONFIG_VIDEO_TW5864=m
-CONFIG_VIDEO_TW68=m
-CONFIG_VIDEO_TW686X=m
-
-#
-# Media capture/analog TV support
-#
-CONFIG_VIDEO_IVTV=m
-# CONFIG_VIDEO_IVTV_DEPRECATED_IOCTLS is not set
-CONFIG_VIDEO_IVTV_ALSA=m
-CONFIG_VIDEO_FB_IVTV=m
-# CONFIG_VIDEO_FB_IVTV_FORCE_PAT is not set
-CONFIG_VIDEO_HEXIUM_GEMINI=m
-CONFIG_VIDEO_HEXIUM_ORION=m
-CONFIG_VIDEO_MXB=m
-CONFIG_VIDEO_DT3155=m
-
-#
-# Media capture/analog/hybrid TV support
-#
-CONFIG_VIDEO_CX18=m
-CONFIG_VIDEO_CX18_ALSA=m
-CONFIG_VIDEO_CX23885=m
-CONFIG_MEDIA_ALTERA_CI=m
-CONFIG_VIDEO_CX25821=m
-CONFIG_VIDEO_CX25821_ALSA=m
-CONFIG_VIDEO_CX88=m
-CONFIG_VIDEO_CX88_ALSA=m
-CONFIG_VIDEO_CX88_BLACKBIRD=m
-CONFIG_VIDEO_CX88_DVB=m
-CONFIG_VIDEO_CX88_ENABLE_VP3054=y
-CONFIG_VIDEO_CX88_VP3054=m
-CONFIG_VIDEO_CX88_MPEG=m
-CONFIG_VIDEO_BT848=m
-CONFIG_DVB_BT8XX=m
-CONFIG_VIDEO_SAA7134=m
-CONFIG_VIDEO_SAA7134_ALSA=m
-CONFIG_VIDEO_SAA7134_RC=y
-CONFIG_VIDEO_SAA7134_DVB=m
-CONFIG_VIDEO_SAA7134_GO7007=m
-CONFIG_VIDEO_SAA7164=m
-
-#
-# Media digital TV PCI Adapters
-#
-CONFIG_DVB_AV7110_IR=y
-CONFIG_DVB_AV7110=m
-CONFIG_DVB_AV7110_OSD=y
-CONFIG_DVB_BUDGET_CORE=m
-CONFIG_DVB_BUDGET=m
-CONFIG_DVB_BUDGET_CI=m
-CONFIG_DVB_BUDGET_AV=m
-CONFIG_DVB_BUDGET_PATCH=m
-CONFIG_DVB_B2C2_FLEXCOP_PCI=m
-# CONFIG_DVB_B2C2_FLEXCOP_PCI_DEBUG is not set
-CONFIG_DVB_PLUTO2=m
-CONFIG_DVB_DM1105=m
-CONFIG_DVB_PT1=m
-CONFIG_DVB_PT3=m
-CONFIG_MANTIS_CORE=m
-CONFIG_DVB_MANTIS=m
-CONFIG_DVB_HOPPER=m
-CONFIG_DVB_NGENE=m
-CONFIG_DVB_DDBRIDGE=m
-# CONFIG_DVB_DDBRIDGE_MSIENABLE is not set
-CONFIG_DVB_SMIPCIE=m
-CONFIG_DVB_NETUP_UNIDVB=m
-CONFIG_VIDEO_IPU3_CIO2=m
-CONFIG_RADIO_ADAPTERS=y
-CONFIG_RADIO_TEA575X=m
-CONFIG_RADIO_SI470X=m
-CONFIG_USB_SI470X=m
-CONFIG_I2C_SI470X=m
-CONFIG_RADIO_SI4713=m
-CONFIG_USB_SI4713=m
-CONFIG_PLATFORM_SI4713=m
-CONFIG_I2C_SI4713=m
-CONFIG_RADIO_SI476X=m
-CONFIG_USB_MR800=m
-CONFIG_USB_DSBR=m
-CONFIG_RADIO_MAXIRADIO=m
-CONFIG_RADIO_SHARK=m
-CONFIG_RADIO_SHARK2=m
-CONFIG_USB_KEENE=m
-CONFIG_USB_RAREMONO=m
-CONFIG_USB_MA901=m
-CONFIG_RADIO_TEA5764=m
-CONFIG_RADIO_SAA7706H=m
-CONFIG_RADIO_TEF6862=m
-CONFIG_RADIO_WL1273=m
-CONFIG_RADIO_WL128X=m
-CONFIG_MEDIA_COMMON_OPTIONS=y
-
-#
-# common driver options
-#
-CONFIG_VIDEO_CX2341X=m
-CONFIG_VIDEO_TVEEPROM=m
-CONFIG_CYPRESS_FIRMWARE=m
-CONFIG_VIDEOBUF2_CORE=m
-CONFIG_VIDEOBUF2_V4L2=m
-CONFIG_VIDEOBUF2_MEMOPS=m
-CONFIG_VIDEOBUF2_DMA_CONTIG=m
-CONFIG_VIDEOBUF2_VMALLOC=m
-CONFIG_VIDEOBUF2_DMA_SG=m
-CONFIG_VIDEOBUF2_DVB=m
-CONFIG_DVB_B2C2_FLEXCOP=m
-CONFIG_VIDEO_SAA7146=m
-CONFIG_VIDEO_SAA7146_VV=m
-CONFIG_SMS_SIANO_MDTV=m
-CONFIG_SMS_SIANO_RC=y
-# CONFIG_SMS_SIANO_DEBUGFS is not set
-CONFIG_VIDEO_V4L2_TPG=m
-CONFIG_V4L_PLATFORM_DRIVERS=y
-CONFIG_VIDEO_CAFE_CCIC=m
-CONFIG_VIDEO_CADENCE=y
-CONFIG_VIDEO_CADENCE_CSI2RX=m
-CONFIG_VIDEO_CADENCE_CSI2TX=m
-CONFIG_VIDEO_ASPEED=m
-CONFIG_VIDEO_MUX=m
-CONFIG_VIDEO_XILINX=m
-CONFIG_VIDEO_XILINX_TPG=m
-CONFIG_VIDEO_XILINX_VTC=m
-CONFIG_V4L_MEM2MEM_DRIVERS=y
-CONFIG_VIDEO_MEM2MEM_DEINTERLACE=m
-CONFIG_DVB_PLATFORM_DRIVERS=y
-CONFIG_SDR_PLATFORM_DRIVERS=y
-
-#
-# MMC/SDIO DVB adapters
-#
-CONFIG_SMS_SDIO_DRV=m
-CONFIG_V4L_TEST_DRIVERS=y
-CONFIG_VIDEO_VIMC=m
-CONFIG_VIDEO_VIVID=m
-CONFIG_VIDEO_VIVID_CEC=y
-CONFIG_VIDEO_VIVID_MAX_DEVS=64
-CONFIG_VIDEO_VIM2M=m
-CONFIG_VIDEO_VICODEC=m
-
-#
-# FireWire (IEEE 1394) Adapters
-#
-CONFIG_DVB_FIREDTV=m
-CONFIG_DVB_FIREDTV_INPUT=y
-# end of Media drivers
-
-#
-# Media ancillary drivers
-#
-CONFIG_MEDIA_ATTACH=y
-
-#
-# IR I2C driver auto-selected by 'Autoselect ancillary drivers'
-#
-CONFIG_VIDEO_IR_I2C=m
-
-#
-# Audio decoders, processors and mixers
-#
-CONFIG_VIDEO_TVAUDIO=m
-CONFIG_VIDEO_TDA7432=m
-CONFIG_VIDEO_TDA9840=m
-CONFIG_VIDEO_TDA1997X=m
-CONFIG_VIDEO_TEA6415C=m
-CONFIG_VIDEO_TEA6420=m
-CONFIG_VIDEO_MSP3400=m
-CONFIG_VIDEO_CS3308=m
-CONFIG_VIDEO_CS5345=m
-CONFIG_VIDEO_CS53L32A=m
-CONFIG_VIDEO_TLV320AIC23B=m
-CONFIG_VIDEO_UDA1342=m
-CONFIG_VIDEO_WM8775=m
-CONFIG_VIDEO_WM8739=m
-CONFIG_VIDEO_VP27SMPX=m
-CONFIG_VIDEO_SONY_BTF_MPX=m
-# end of Audio decoders, processors and mixers
-
-#
-# RDS decoders
-#
-CONFIG_VIDEO_SAA6588=m
-# end of RDS decoders
-
-#
-# Video decoders
-#
-CONFIG_VIDEO_ADV7180=m
-CONFIG_VIDEO_ADV7183=m
-CONFIG_VIDEO_ADV748X=m
-CONFIG_VIDEO_ADV7604=m
-CONFIG_VIDEO_ADV7604_CEC=y
-CONFIG_VIDEO_ADV7842=m
-CONFIG_VIDEO_ADV7842_CEC=y
-CONFIG_VIDEO_BT819=m
-CONFIG_VIDEO_BT856=m
-CONFIG_VIDEO_BT866=m
-CONFIG_VIDEO_KS0127=m
-CONFIG_VIDEO_ML86V7667=m
-CONFIG_VIDEO_SAA7110=m
-CONFIG_VIDEO_SAA711X=m
-CONFIG_VIDEO_TC358743=m
-CONFIG_VIDEO_TC358743_CEC=y
-CONFIG_VIDEO_TVP514X=m
-CONFIG_VIDEO_TVP5150=m
-CONFIG_VIDEO_TVP7002=m
-CONFIG_VIDEO_TW2804=m
-CONFIG_VIDEO_TW9903=m
-CONFIG_VIDEO_TW9906=m
-CONFIG_VIDEO_TW9910=m
-CONFIG_VIDEO_VPX3220=m
-
-#
-# Video and audio decoders
-#
-CONFIG_VIDEO_SAA717X=m
-CONFIG_VIDEO_CX25840=m
-# end of Video decoders
-
-#
-# Video encoders
-#
-CONFIG_VIDEO_SAA7127=m
-CONFIG_VIDEO_SAA7185=m
-CONFIG_VIDEO_ADV7170=m
-CONFIG_VIDEO_ADV7175=m
-CONFIG_VIDEO_ADV7343=m
-CONFIG_VIDEO_ADV7393=m
-CONFIG_VIDEO_AD9389B=m
-CONFIG_VIDEO_AK881X=m
-CONFIG_VIDEO_THS8200=m
-# end of Video encoders
-
-#
-# Video improvement chips
-#
-CONFIG_VIDEO_UPD64031A=m
-CONFIG_VIDEO_UPD64083=m
-# end of Video improvement chips
-
-#
-# Audio/Video compression chips
-#
-CONFIG_VIDEO_SAA6752HS=m
-# end of Audio/Video compression chips
-
-#
-# SDR tuner chips
-#
-CONFIG_SDR_MAX2175=m
-# end of SDR tuner chips
-
-#
-# Miscellaneous helper chips
-#
-CONFIG_VIDEO_THS7303=m
-CONFIG_VIDEO_M52790=m
-CONFIG_VIDEO_I2C=m
-CONFIG_VIDEO_ST_MIPID02=m
-# end of Miscellaneous helper chips
-
-#
-# Camera sensor devices
-#
-CONFIG_VIDEO_APTINA_PLL=m
-CONFIG_VIDEO_SMIAPP_PLL=m
-CONFIG_VIDEO_HI556=m
-CONFIG_VIDEO_IMX214=m
-CONFIG_VIDEO_IMX219=m
-CONFIG_VIDEO_IMX258=m
-CONFIG_VIDEO_IMX274=m
-CONFIG_VIDEO_IMX290=m
-CONFIG_VIDEO_IMX319=m
-CONFIG_VIDEO_IMX355=m
-CONFIG_VIDEO_OV2640=m
-CONFIG_VIDEO_OV2659=m
-CONFIG_VIDEO_OV2680=m
-CONFIG_VIDEO_OV2685=m
-CONFIG_VIDEO_OV2740=m
-CONFIG_VIDEO_OV5640=m
-CONFIG_VIDEO_OV5645=m
-CONFIG_VIDEO_OV5647=m
-CONFIG_VIDEO_OV6650=m
-CONFIG_VIDEO_OV5670=m
-CONFIG_VIDEO_OV5675=m
-CONFIG_VIDEO_OV5695=m
-CONFIG_VIDEO_OV7251=m
-CONFIG_VIDEO_OV772X=m
-CONFIG_VIDEO_OV7640=m
-CONFIG_VIDEO_OV7670=m
-CONFIG_VIDEO_OV7740=m
-CONFIG_VIDEO_OV8856=m
-CONFIG_VIDEO_OV9640=m
-CONFIG_VIDEO_OV9650=m
-CONFIG_VIDEO_OV13858=m
-CONFIG_VIDEO_VS6624=m
-CONFIG_VIDEO_MT9M001=m
-CONFIG_VIDEO_MT9M032=m
-CONFIG_VIDEO_MT9M111=m
-CONFIG_VIDEO_MT9P031=m
-CONFIG_VIDEO_MT9T001=m
-CONFIG_VIDEO_MT9T112=m
-CONFIG_VIDEO_MT9V011=m
-CONFIG_VIDEO_MT9V032=m
-CONFIG_VIDEO_MT9V111=m
-CONFIG_VIDEO_SR030PC30=m
-CONFIG_VIDEO_NOON010PC30=m
-CONFIG_VIDEO_M5MOLS=m
-CONFIG_VIDEO_RJ54N1=m
-CONFIG_VIDEO_S5K6AA=m
-CONFIG_VIDEO_S5K6A3=m
-CONFIG_VIDEO_S5K4ECGX=m
-CONFIG_VIDEO_S5K5BAF=m
-CONFIG_VIDEO_SMIAPP=m
-CONFIG_VIDEO_ET8EK8=m
-CONFIG_VIDEO_S5C73M3=m
-# end of Camera sensor devices
-
-#
-# Lens drivers
-#
-CONFIG_VIDEO_AD5820=m
-CONFIG_VIDEO_AK7375=m
-CONFIG_VIDEO_DW9714=m
-CONFIG_VIDEO_DW9807_VCM=m
-# end of Lens drivers
-
-#
-# Flash devices
-#
-CONFIG_VIDEO_ADP1653=m
-CONFIG_VIDEO_LM3560=m
-CONFIG_VIDEO_LM3646=m
-# end of Flash devices
-
-#
-# SPI helper chips
-#
-CONFIG_VIDEO_GS1662=m
-# end of SPI helper chips
-
-#
-# Media SPI Adapters
-#
-CONFIG_CXD2880_SPI_DRV=m
-# end of Media SPI Adapters
-
-CONFIG_MEDIA_TUNER=m
-
-#
-# Customize TV tuners
-#
-CONFIG_MEDIA_TUNER_SIMPLE=m
-CONFIG_MEDIA_TUNER_TDA18250=m
-CONFIG_MEDIA_TUNER_TDA8290=m
-CONFIG_MEDIA_TUNER_TDA827X=m
-CONFIG_MEDIA_TUNER_TDA18271=m
-CONFIG_MEDIA_TUNER_TDA9887=m
-CONFIG_MEDIA_TUNER_TEA5761=m
-CONFIG_MEDIA_TUNER_TEA5767=m
-CONFIG_MEDIA_TUNER_MSI001=m
-CONFIG_MEDIA_TUNER_MT20XX=m
-CONFIG_MEDIA_TUNER_MT2060=m
-CONFIG_MEDIA_TUNER_MT2063=m
-CONFIG_MEDIA_TUNER_MT2266=m
-CONFIG_MEDIA_TUNER_MT2131=m
-CONFIG_MEDIA_TUNER_QT1010=m
-CONFIG_MEDIA_TUNER_XC2028=m
-CONFIG_MEDIA_TUNER_XC5000=m
-CONFIG_MEDIA_TUNER_XC4000=m
-CONFIG_MEDIA_TUNER_MXL5005S=m
-CONFIG_MEDIA_TUNER_MXL5007T=m
-CONFIG_MEDIA_TUNER_MC44S803=m
-CONFIG_MEDIA_TUNER_MAX2165=m
-CONFIG_MEDIA_TUNER_TDA18218=m
-CONFIG_MEDIA_TUNER_FC0011=m
-CONFIG_MEDIA_TUNER_FC0012=m
-CONFIG_MEDIA_TUNER_FC0013=m
-CONFIG_MEDIA_TUNER_TDA18212=m
-CONFIG_MEDIA_TUNER_E4000=m
-CONFIG_MEDIA_TUNER_FC2580=m
-CONFIG_MEDIA_TUNER_M88RS6000T=m
-CONFIG_MEDIA_TUNER_TUA9001=m
-CONFIG_MEDIA_TUNER_SI2157=m
-CONFIG_MEDIA_TUNER_IT913X=m
-CONFIG_MEDIA_TUNER_R820T=m
-CONFIG_MEDIA_TUNER_MXL301RF=m
-CONFIG_MEDIA_TUNER_QM1D1C0042=m
-CONFIG_MEDIA_TUNER_QM1D1B0004=m
-# end of Customize TV tuners
-
-#
-# Customise DVB Frontends
-#
-
-#
-# Multistandard (satellite) frontends
-#
-CONFIG_DVB_STB0899=m
-CONFIG_DVB_STB6100=m
-CONFIG_DVB_STV090x=m
-CONFIG_DVB_STV0910=m
-CONFIG_DVB_STV6110x=m
-CONFIG_DVB_STV6111=m
-CONFIG_DVB_MXL5XX=m
-CONFIG_DVB_M88DS3103=m
-
-#
-# Multistandard (cable + terrestrial) frontends
-#
-CONFIG_DVB_DRXK=m
-CONFIG_DVB_TDA18271C2DD=m
-CONFIG_DVB_SI2165=m
-CONFIG_DVB_MN88472=m
-CONFIG_DVB_MN88473=m
-
-#
-# DVB-S (satellite) frontends
-#
-CONFIG_DVB_CX24110=m
-CONFIG_DVB_CX24123=m
-CONFIG_DVB_MT312=m
-CONFIG_DVB_ZL10036=m
-CONFIG_DVB_ZL10039=m
-CONFIG_DVB_S5H1420=m
-CONFIG_DVB_STV0288=m
-CONFIG_DVB_STB6000=m
-CONFIG_DVB_STV0299=m
-CONFIG_DVB_STV6110=m
-CONFIG_DVB_STV0900=m
-CONFIG_DVB_TDA8083=m
-CONFIG_DVB_TDA10086=m
-CONFIG_DVB_TDA8261=m
-CONFIG_DVB_VES1X93=m
-CONFIG_DVB_TUNER_ITD1000=m
-CONFIG_DVB_TUNER_CX24113=m
-CONFIG_DVB_TDA826X=m
-CONFIG_DVB_TUA6100=m
-CONFIG_DVB_CX24116=m
-CONFIG_DVB_CX24117=m
-CONFIG_DVB_CX24120=m
-CONFIG_DVB_SI21XX=m
-CONFIG_DVB_TS2020=m
-CONFIG_DVB_DS3000=m
-CONFIG_DVB_MB86A16=m
-CONFIG_DVB_TDA10071=m
-
-#
-# DVB-T (terrestrial) frontends
-#
-CONFIG_DVB_SP8870=m
-CONFIG_DVB_SP887X=m
-CONFIG_DVB_CX22700=m
-CONFIG_DVB_CX22702=m
-CONFIG_DVB_S5H1432=m
-CONFIG_DVB_DRXD=m
-CONFIG_DVB_L64781=m
-CONFIG_DVB_TDA1004X=m
-CONFIG_DVB_NXT6000=m
-CONFIG_DVB_MT352=m
-CONFIG_DVB_ZL10353=m
-CONFIG_DVB_DIB3000MB=m
-CONFIG_DVB_DIB3000MC=m
-CONFIG_DVB_DIB7000M=m
-CONFIG_DVB_DIB7000P=m
-CONFIG_DVB_DIB9000=m
-CONFIG_DVB_TDA10048=m
-CONFIG_DVB_AF9013=m
-CONFIG_DVB_EC100=m
-CONFIG_DVB_STV0367=m
-CONFIG_DVB_CXD2820R=m
-CONFIG_DVB_CXD2841ER=m
-CONFIG_DVB_RTL2830=m
-CONFIG_DVB_RTL2832=m
-CONFIG_DVB_RTL2832_SDR=m
-CONFIG_DVB_SI2168=m
-CONFIG_DVB_AS102_FE=m
-CONFIG_DVB_ZD1301_DEMOD=m
-CONFIG_DVB_GP8PSK_FE=m
-CONFIG_DVB_CXD2880=m
-
-#
-# DVB-C (cable) frontends
-#
-CONFIG_DVB_VES1820=m
-CONFIG_DVB_TDA10021=m
-CONFIG_DVB_TDA10023=m
-CONFIG_DVB_STV0297=m
-
-#
-# ATSC (North American/Korean Terrestrial/Cable DTV) frontends
-#
-CONFIG_DVB_NXT200X=m
-CONFIG_DVB_OR51211=m
-CONFIG_DVB_OR51132=m
-CONFIG_DVB_BCM3510=m
-CONFIG_DVB_LGDT330X=m
-CONFIG_DVB_LGDT3305=m
-CONFIG_DVB_LGDT3306A=m
-CONFIG_DVB_LG2160=m
-CONFIG_DVB_S5H1409=m
-CONFIG_DVB_AU8522=m
-CONFIG_DVB_AU8522_DTV=m
-CONFIG_DVB_AU8522_V4L=m
-CONFIG_DVB_S5H1411=m
-
-#
-# ISDB-T (terrestrial) frontends
-#
-CONFIG_DVB_S921=m
-CONFIG_DVB_DIB8000=m
-CONFIG_DVB_MB86A20S=m
-
-#
-# ISDB-S (satellite) & ISDB-T (terrestrial) frontends
-#
-CONFIG_DVB_TC90522=m
-CONFIG_DVB_MN88443X=m
-
-#
-# Digital terrestrial only tuners/PLL
-#
-CONFIG_DVB_PLL=m
-CONFIG_DVB_TUNER_DIB0070=m
-CONFIG_DVB_TUNER_DIB0090=m
-
-#
-# SEC control devices for DVB-S
-#
-CONFIG_DVB_DRX39XYJ=m
-CONFIG_DVB_LNBH25=m
-CONFIG_DVB_LNBH29=m
-CONFIG_DVB_LNBP21=m
-CONFIG_DVB_LNBP22=m
-CONFIG_DVB_ISL6405=m
-CONFIG_DVB_ISL6421=m
-CONFIG_DVB_ISL6423=m
-CONFIG_DVB_A8293=m
-CONFIG_DVB_LGS8GL5=m
-CONFIG_DVB_LGS8GXX=m
-CONFIG_DVB_ATBM8830=m
-CONFIG_DVB_TDA665x=m
-CONFIG_DVB_IX2505V=m
-CONFIG_DVB_M88RS2000=m
-CONFIG_DVB_AF9033=m
-CONFIG_DVB_HORUS3A=m
-CONFIG_DVB_ASCOT2E=m
-CONFIG_DVB_HELENE=m
-
-#
-# Common Interface (EN50221) controller drivers
-#
-CONFIG_DVB_CXD2099=m
-CONFIG_DVB_SP2=m
-# end of Customise DVB Frontends
-
-#
-# Tools to develop new frontends
-#
-CONFIG_DVB_DUMMY_FE=m
-# end of Media ancillary drivers
-
-#
-# Graphics support
-#
-CONFIG_AGP=m
-CONFIG_AGP_AMD64=m
-CONFIG_AGP_INTEL=m
-CONFIG_AGP_SIS=m
-CONFIG_AGP_VIA=m
-CONFIG_INTEL_GTT=m
-CONFIG_VGA_ARB=y
-CONFIG_VGA_ARB_MAX_GPUS=10
-CONFIG_VGA_SWITCHEROO=y
-CONFIG_DRM=m
-CONFIG_DRM_MIPI_DBI=m
-CONFIG_DRM_MIPI_DSI=y
-CONFIG_DRM_DP_AUX_CHARDEV=y
-# CONFIG_DRM_DEBUG_SELFTEST is not set
-CONFIG_DRM_KMS_HELPER=m
-CONFIG_DRM_KMS_FB_HELPER=y
-# CONFIG_DRM_DEBUG_DP_MST_TOPOLOGY_REFS is not set
-CONFIG_DRM_FBDEV_EMULATION=y
-CONFIG_DRM_FBDEV_OVERALLOC=100
-# CONFIG_DRM_FBDEV_LEAK_PHYS_SMEM is not set
-CONFIG_DRM_LOAD_EDID_FIRMWARE=y
-CONFIG_DRM_DP_CEC=y
-CONFIG_DRM_TTM=m
-CONFIG_DRM_TTM_DMA_PAGE_POOL=y
-CONFIG_DRM_VRAM_HELPER=m
-CONFIG_DRM_TTM_HELPER=m
-CONFIG_DRM_GEM_CMA_HELPER=y
-CONFIG_DRM_KMS_CMA_HELPER=y
-CONFIG_DRM_GEM_SHMEM_HELPER=y
-CONFIG_DRM_SCHED=m
-
-#
-# I2C encoder or helper chips
-#
-CONFIG_DRM_I2C_CH7006=m
-CONFIG_DRM_I2C_SIL164=m
-CONFIG_DRM_I2C_NXP_TDA998X=m
-CONFIG_DRM_I2C_NXP_TDA9950=m
-# end of I2C encoder or helper chips
-
-#
-# ARM devices
-#
-CONFIG_DRM_KOMEDA=m
-# end of ARM devices
-
-CONFIG_DRM_RADEON=m
-CONFIG_DRM_RADEON_USERPTR=y
-CONFIG_DRM_AMDGPU=m
-CONFIG_DRM_AMDGPU_SI=y
-CONFIG_DRM_AMDGPU_CIK=y
-CONFIG_DRM_AMDGPU_USERPTR=y
-# CONFIG_DRM_AMDGPU_GART_DEBUGFS is not set
-
-#
-# ACP (Audio CoProcessor) Configuration
-#
-CONFIG_DRM_AMD_ACP=y
-# end of ACP (Audio CoProcessor) Configuration
-
-#
-# Display Engine Configuration
-#
-CONFIG_DRM_AMD_DC=y
-CONFIG_DRM_AMD_DC_DCN=y
-CONFIG_DRM_AMD_DC_HDCP=y
-# CONFIG_DEBUG_KERNEL_DC is not set
-# end of Display Engine Configuration
-
-CONFIG_HSA_AMD=y
-CONFIG_DRM_NOUVEAU=m
-# CONFIG_NOUVEAU_LEGACY_CTX_SUPPORT is not set
-CONFIG_NOUVEAU_DEBUG=5
-CONFIG_NOUVEAU_DEBUG_DEFAULT=3
-# CONFIG_NOUVEAU_DEBUG_MMU is not set
-CONFIG_DRM_NOUVEAU_BACKLIGHT=y
-CONFIG_DRM_NOUVEAU_SVM=y
-CONFIG_DRM_I915=m
-CONFIG_DRM_I915_FORCE_PROBE="*"
-CONFIG_DRM_I915_CAPTURE_ERROR=y
-CONFIG_DRM_I915_COMPRESS_ERROR=y
-CONFIG_DRM_I915_USERPTR=y
-CONFIG_DRM_I915_GVT=y
-CONFIG_DRM_I915_GVT_KVMGT=m
-
-#
-# drm/i915 Debugging
-#
-# CONFIG_DRM_I915_WERROR is not set
-# CONFIG_DRM_I915_DEBUG is not set
-# CONFIG_DRM_I915_DEBUG_MMIO is not set
-# CONFIG_DRM_I915_SW_FENCE_DEBUG_OBJECTS is not set
-# CONFIG_DRM_I915_SW_FENCE_CHECK_DAG is not set
-# CONFIG_DRM_I915_DEBUG_GUC is not set
-# CONFIG_DRM_I915_SELFTEST is not set
-# CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS is not set
-# CONFIG_DRM_I915_DEBUG_VBLANK_EVADE is not set
-# CONFIG_DRM_I915_DEBUG_RUNTIME_PM is not set
-# end of drm/i915 Debugging
-
-#
-# drm/i915 Profile Guided Optimisation
-#
-CONFIG_DRM_I915_FENCE_TIMEOUT=10000
-CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND=250
-CONFIG_DRM_I915_HEARTBEAT_INTERVAL=2500
-CONFIG_DRM_I915_PREEMPT_TIMEOUT=640
-CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT=8000
-CONFIG_DRM_I915_STOP_TIMEOUT=100
-CONFIG_DRM_I915_TIMESLICE_DURATION=1
-# end of drm/i915 Profile Guided Optimisation
-
-CONFIG_DRM_VGEM=m
-CONFIG_DRM_VKMS=m
-CONFIG_DRM_VMWGFX=m
-CONFIG_DRM_VMWGFX_FBCON=y
-CONFIG_DRM_GMA500=m
-CONFIG_DRM_GMA600=y
-CONFIG_DRM_GMA3600=y
-CONFIG_DRM_UDL=m
-CONFIG_DRM_AST=m
-CONFIG_DRM_MGAG200=m
-CONFIG_DRM_RCAR_DW_HDMI=m
-CONFIG_DRM_RCAR_LVDS=m
-CONFIG_DRM_QXL=m
-CONFIG_DRM_BOCHS=m
-CONFIG_DRM_VIRTIO_GPU=m
-CONFIG_DRM_PANEL=y
-
-#
-# Display Panels
-#
-CONFIG_DRM_PANEL_ARM_VERSATILE=m
-CONFIG_DRM_PANEL_ASUS_Z00T_TM5P5_NT35596=m
-CONFIG_DRM_PANEL_BOE_HIMAX8279D=m
-CONFIG_DRM_PANEL_BOE_TV101WUM_NL6=m
-CONFIG_DRM_PANEL_LVDS=m
-CONFIG_DRM_PANEL_SIMPLE=m
-CONFIG_DRM_PANEL_ELIDA_KD35T133=m
-CONFIG_DRM_PANEL_FEIXIN_K101_IM2BA02=m
-CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D=m
-CONFIG_DRM_PANEL_ILITEK_IL9322=m
-CONFIG_DRM_PANEL_ILITEK_ILI9881C=m
-CONFIG_DRM_PANEL_INNOLUX_P079ZCA=m
-CONFIG_DRM_PANEL_JDI_LT070ME05000=m
-CONFIG_DRM_PANEL_KINGDISPLAY_KD097D04=m
-CONFIG_DRM_PANEL_LEADTEK_LTK050H3146W=m
-CONFIG_DRM_PANEL_LEADTEK_LTK500HD1829=m
-CONFIG_DRM_PANEL_SAMSUNG_LD9040=m
-CONFIG_DRM_PANEL_LG_LB035Q02=m
-CONFIG_DRM_PANEL_LG_LG4573=m
-CONFIG_DRM_PANEL_NEC_NL8048HL11=m
-CONFIG_DRM_PANEL_NOVATEK_NT35510=m
-CONFIG_DRM_PANEL_NOVATEK_NT39016=m
-CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO=m
-CONFIG_DRM_PANEL_ORISETECH_OTM8009A=m
-CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS=m
-CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00=m
-CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN=m
-CONFIG_DRM_PANEL_RAYDIUM_RM67191=m
-CONFIG_DRM_PANEL_RAYDIUM_RM68200=m
-CONFIG_DRM_PANEL_ROCKTECH_JH057N00900=m
-CONFIG_DRM_PANEL_RONBO_RB070D30=m
-CONFIG_DRM_PANEL_SAMSUNG_S6D16D0=m
-CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2=m
-CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03=m
-CONFIG_DRM_PANEL_SAMSUNG_S6E63M0=m
-CONFIG_DRM_PANEL_SAMSUNG_S6E88A0_AMS452EF01=m
-CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0=m
-CONFIG_DRM_PANEL_SEIKO_43WVF1G=m
-CONFIG_DRM_PANEL_SHARP_LQ101R1SX01=m
-CONFIG_DRM_PANEL_SHARP_LS037V7DW01=m
-CONFIG_DRM_PANEL_SHARP_LS043T1LE01=m
-CONFIG_DRM_PANEL_SITRONIX_ST7701=m
-CONFIG_DRM_PANEL_SITRONIX_ST7789V=m
-CONFIG_DRM_PANEL_SONY_ACX424AKP=m
-CONFIG_DRM_PANEL_SONY_ACX565AKM=m
-CONFIG_DRM_PANEL_TPO_TD028TTEC1=m
-CONFIG_DRM_PANEL_TPO_TD043MTEA1=m
-CONFIG_DRM_PANEL_TPO_TPG110=m
-CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA=m
-CONFIG_DRM_PANEL_VISIONOX_RM69299=m
-CONFIG_DRM_PANEL_XINPENG_XPP055C272=m
-# end of Display Panels
-
-CONFIG_DRM_BRIDGE=y
-CONFIG_DRM_PANEL_BRIDGE=y
-
-#
-# Display Interface Bridges
-#
-CONFIG_DRM_CDNS_DSI=m
-CONFIG_DRM_CHRONTEL_CH7033=m
-CONFIG_DRM_DISPLAY_CONNECTOR=m
-CONFIG_DRM_LVDS_CODEC=m
-CONFIG_DRM_MEGACHIPS_STDPXXXX_GE_B850V3_FW=m
-CONFIG_DRM_NWL_MIPI_DSI=m
-CONFIG_DRM_NXP_PTN3460=m
-CONFIG_DRM_PARADE_PS8622=m
-CONFIG_DRM_PARADE_PS8640=m
-CONFIG_DRM_SIL_SII8620=m
-CONFIG_DRM_SII902X=m
-CONFIG_DRM_SII9234=m
-CONFIG_DRM_SIMPLE_BRIDGE=m
-CONFIG_DRM_THINE_THC63LVD1024=m
-CONFIG_DRM_TOSHIBA_TC358764=m
-CONFIG_DRM_TOSHIBA_TC358767=m
-CONFIG_DRM_TOSHIBA_TC358768=m
-CONFIG_DRM_TI_TFP410=m
-CONFIG_DRM_TI_SN65DSI86=m
-CONFIG_DRM_TI_TPD12S015=m
-CONFIG_DRM_ANALOGIX_ANX6345=m
-CONFIG_DRM_ANALOGIX_ANX78XX=m
-CONFIG_DRM_ANALOGIX_DP=m
-CONFIG_DRM_I2C_ADV7511=m
-CONFIG_DRM_I2C_ADV7511_AUDIO=y
-CONFIG_DRM_I2C_ADV7511_CEC=y
-CONFIG_DRM_DW_HDMI=m
-CONFIG_DRM_DW_HDMI_AHB_AUDIO=m
-CONFIG_DRM_DW_HDMI_I2S_AUDIO=m
-CONFIG_DRM_DW_HDMI_CEC=m
-# end of Display Interface Bridges
-
-# CONFIG_DRM_ETNAVIV is not set
-CONFIG_DRM_ARCPGU=m
-CONFIG_DRM_MXS=y
-CONFIG_DRM_MXSFB=m
-CONFIG_DRM_CIRRUS_QEMU=m
-CONFIG_DRM_GM12U320=m
-CONFIG_TINYDRM_HX8357D=m
-CONFIG_TINYDRM_ILI9225=m
-CONFIG_TINYDRM_ILI9341=m
-CONFIG_TINYDRM_ILI9486=m
-CONFIG_TINYDRM_MI0283QT=m
-CONFIG_TINYDRM_REPAPER=m
-CONFIG_TINYDRM_ST7586=m
-CONFIG_TINYDRM_ST7735R=m
-CONFIG_DRM_XEN=y
-CONFIG_DRM_XEN_FRONTEND=m
-CONFIG_DRM_VBOXVIDEO=m
-# CONFIG_DRM_LEGACY is not set
-CONFIG_DRM_PANEL_ORIENTATION_QUIRKS=y
-
-#
-# Frame buffer Devices
-#
-CONFIG_FB_CMDLINE=y
-CONFIG_FB_NOTIFY=y
-CONFIG_FB=y
-CONFIG_FIRMWARE_EDID=y
-CONFIG_FB_BOOT_VESA_SUPPORT=y
-CONFIG_FB_CFB_FILLRECT=y
-CONFIG_FB_CFB_COPYAREA=y
-CONFIG_FB_CFB_IMAGEBLIT=y
-CONFIG_FB_SYS_FILLRECT=m
-CONFIG_FB_SYS_COPYAREA=m
-CONFIG_FB_SYS_IMAGEBLIT=m
-# CONFIG_FB_FOREIGN_ENDIAN is not set
-CONFIG_FB_SYS_FOPS=m
-CONFIG_FB_DEFERRED_IO=y
-CONFIG_FB_BACKLIGHT=m
-CONFIG_FB_MODE_HELPERS=y
-CONFIG_FB_TILEBLITTING=y
-
-#
-# Frame buffer hardware drivers
-#
-# CONFIG_FB_CIRRUS is not set
-# CONFIG_FB_PM2 is not set
-# CONFIG_FB_CYBER2000 is not set
-# CONFIG_FB_ARC is not set
-# CONFIG_FB_ASILIANT is not set
-# CONFIG_FB_IMSTT is not set
-# CONFIG_FB_VGA16 is not set
-# CONFIG_FB_UVESA is not set
-CONFIG_FB_VESA=y
-CONFIG_FB_EFI=y
-# CONFIG_FB_N411 is not set
-# CONFIG_FB_HGA is not set
-# CONFIG_FB_OPENCORES is not set
-# CONFIG_FB_S1D13XXX is not set
-# CONFIG_FB_NVIDIA is not set
-# CONFIG_FB_RIVA is not set
-# CONFIG_FB_I740 is not set
-# CONFIG_FB_LE80578 is not set
-# CONFIG_FB_INTEL is not set
-# CONFIG_FB_MATROX is not set
-# CONFIG_FB_RADEON is not set
-# CONFIG_FB_ATY128 is not set
-# CONFIG_FB_ATY is not set
-# CONFIG_FB_S3 is not set
-# CONFIG_FB_SAVAGE is not set
-# CONFIG_FB_SIS is not set
-# CONFIG_FB_VIA is not set
-# CONFIG_FB_NEOMAGIC is not set
-# CONFIG_FB_KYRO is not set
-# CONFIG_FB_3DFX is not set
-# CONFIG_FB_VOODOO1 is not set
-# CONFIG_FB_VT8623 is not set
-# CONFIG_FB_TRIDENT is not set
-# CONFIG_FB_ARK is not set
-# CONFIG_FB_PM3 is not set
-# CONFIG_FB_CARMINE is not set
-# CONFIG_FB_SM501 is not set
-# CONFIG_FB_SMSCUFX is not set
-# CONFIG_FB_UDL is not set
-# CONFIG_FB_IBM_GXT4500 is not set
-# CONFIG_FB_VIRTUAL is not set
-CONFIG_XEN_FBDEV_FRONTEND=m
-# CONFIG_FB_METRONOME is not set
-# CONFIG_FB_MB862XX is not set
-CONFIG_FB_HYPERV=m
-CONFIG_FB_SIMPLE=y
-# CONFIG_FB_SSD1307 is not set
-# CONFIG_FB_SM712 is not set
-# end of Frame buffer Devices
-
-#
-# Backlight & LCD device support
-#
-CONFIG_LCD_CLASS_DEVICE=m
-CONFIG_LCD_L4F00242T03=m
-CONFIG_LCD_LMS283GF05=m
-CONFIG_LCD_LTV350QV=m
-CONFIG_LCD_ILI922X=m
-CONFIG_LCD_ILI9320=m
-CONFIG_LCD_TDO24M=m
-CONFIG_LCD_VGG2432A4=m
-CONFIG_LCD_PLATFORM=m
-CONFIG_LCD_AMS369FG06=m
-CONFIG_LCD_LMS501KF03=m
-CONFIG_LCD_HX8357=m
-CONFIG_LCD_OTM3225A=m
-CONFIG_BACKLIGHT_CLASS_DEVICE=y
-CONFIG_BACKLIGHT_GENERIC=m
-CONFIG_BACKLIGHT_LM3533=m
-CONFIG_BACKLIGHT_PWM=m
-CONFIG_BACKLIGHT_DA903X=m
-CONFIG_BACKLIGHT_DA9052=m
-CONFIG_BACKLIGHT_MAX8925=m
-CONFIG_BACKLIGHT_APPLE=m
-CONFIG_BACKLIGHT_QCOM_WLED=m
-CONFIG_BACKLIGHT_SAHARA=m
-CONFIG_BACKLIGHT_WM831X=m
-CONFIG_BACKLIGHT_ADP5520=m
-CONFIG_BACKLIGHT_ADP8860=m
-CONFIG_BACKLIGHT_ADP8870=m
-CONFIG_BACKLIGHT_88PM860X=m
-CONFIG_BACKLIGHT_PCF50633=m
-CONFIG_BACKLIGHT_AAT2870=m
-CONFIG_BACKLIGHT_LM3630A=m
-CONFIG_BACKLIGHT_LM3639=m
-CONFIG_BACKLIGHT_LP855X=m
-CONFIG_BACKLIGHT_LP8788=m
-CONFIG_BACKLIGHT_PANDORA=m
-CONFIG_BACKLIGHT_SKY81452=m
-CONFIG_BACKLIGHT_TPS65217=m
-CONFIG_BACKLIGHT_AS3711=m
-CONFIG_BACKLIGHT_GPIO=m
-CONFIG_BACKLIGHT_LV5207LP=m
-CONFIG_BACKLIGHT_BD6107=m
-CONFIG_BACKLIGHT_ARCXCNN=m
-CONFIG_BACKLIGHT_RAVE_SP=m
-CONFIG_BACKLIGHT_LED=m
-# end of Backlight & LCD device support
-
-CONFIG_VIDEOMODE_HELPERS=y
-CONFIG_HDMI=y
-
-#
-# Console display driver support
-#
-CONFIG_VGA_CONSOLE=y
-CONFIG_VGACON_SOFT_SCROLLBACK=y
-CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=64
-# CONFIG_VGACON_SOFT_SCROLLBACK_PERSISTENT_ENABLE_BY_DEFAULT is not set
-CONFIG_DUMMY_CONSOLE=y
-CONFIG_DUMMY_CONSOLE_COLUMNS=80
-CONFIG_DUMMY_CONSOLE_ROWS=25
-CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
-CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y
-CONFIG_FRAMEBUFFER_CONSOLE_DEFERRED_TAKEOVER=y
-# end of Console display driver support
-
-# CONFIG_LOGO is not set
-# end of Graphics support
-
-CONFIG_SOUND=m
-CONFIG_SOUND_OSS_CORE=y
-# CONFIG_SOUND_OSS_CORE_PRECLAIM is not set
-CONFIG_SND=m
-CONFIG_SND_TIMER=m
-CONFIG_SND_PCM=m
-CONFIG_SND_PCM_ELD=y
-CONFIG_SND_PCM_IEC958=y
-CONFIG_SND_DMAENGINE_PCM=m
-CONFIG_SND_HWDEP=m
-CONFIG_SND_SEQ_DEVICE=m
-CONFIG_SND_RAWMIDI=m
-CONFIG_SND_COMPRESS_OFFLOAD=m
-CONFIG_SND_JACK=y
-CONFIG_SND_JACK_INPUT_DEV=y
-CONFIG_SND_OSSEMUL=y
-CONFIG_SND_MIXER_OSS=m
-CONFIG_SND_PCM_OSS=m
-CONFIG_SND_PCM_OSS_PLUGINS=y
-CONFIG_SND_PCM_TIMER=y
-CONFIG_SND_HRTIMER=m
-CONFIG_SND_DYNAMIC_MINORS=y
-CONFIG_SND_MAX_CARDS=32
-# CONFIG_SND_SUPPORT_OLD_API is not set
-CONFIG_SND_PROC_FS=y
-CONFIG_SND_VERBOSE_PROCFS=y
-CONFIG_SND_VERBOSE_PRINTK=y
-CONFIG_SND_DEBUG=y
-# CONFIG_SND_DEBUG_VERBOSE is not set
-# CONFIG_SND_PCM_XRUN_DEBUG is not set
-# CONFIG_SND_CTL_VALIDATION is not set
-CONFIG_SND_VMASTER=y
-CONFIG_SND_DMA_SGBUF=y
-CONFIG_SND_SEQUENCER=m
-CONFIG_SND_SEQ_DUMMY=m
-CONFIG_SND_SEQUENCER_OSS=m
-CONFIG_SND_SEQ_HRTIMER_DEFAULT=y
-CONFIG_SND_SEQ_MIDI_EVENT=m
-CONFIG_SND_SEQ_MIDI=m
-CONFIG_SND_SEQ_MIDI_EMUL=m
-CONFIG_SND_SEQ_VIRMIDI=m
-CONFIG_SND_MPU401_UART=m
-CONFIG_SND_OPL3_LIB=m
-CONFIG_SND_OPL3_LIB_SEQ=m
-CONFIG_SND_VX_LIB=m
-CONFIG_SND_AC97_CODEC=m
-CONFIG_SND_DRIVERS=y
-# CONFIG_SND_PCSP is not set
-CONFIG_SND_DUMMY=m
-CONFIG_SND_ALOOP=m
-CONFIG_SND_VIRMIDI=m
-CONFIG_SND_MTPAV=m
-CONFIG_SND_MTS64=m
-CONFIG_SND_SERIAL_U16550=m
-CONFIG_SND_MPU401=m
-CONFIG_SND_PORTMAN2X4=m
-CONFIG_SND_AC97_POWER_SAVE=y
-CONFIG_SND_AC97_POWER_SAVE_DEFAULT=0
-CONFIG_SND_SB_COMMON=m
-CONFIG_SND_PCI=y
-CONFIG_SND_AD1889=m
-CONFIG_SND_ALS300=m
-CONFIG_SND_ALS4000=m
-CONFIG_SND_ALI5451=m
-CONFIG_SND_ASIHPI=m
-CONFIG_SND_ATIIXP=m
-CONFIG_SND_ATIIXP_MODEM=m
-CONFIG_SND_AU8810=m
-CONFIG_SND_AU8820=m
-CONFIG_SND_AU8830=m
-CONFIG_SND_AW2=m
-CONFIG_SND_AZT3328=m
-CONFIG_SND_BT87X=m
-# CONFIG_SND_BT87X_OVERCLOCK is not set
-CONFIG_SND_CA0106=m
-CONFIG_SND_CMIPCI=m
-CONFIG_SND_OXYGEN_LIB=m
-CONFIG_SND_OXYGEN=m
-CONFIG_SND_CS4281=m
-CONFIG_SND_CS46XX=m
-CONFIG_SND_CS46XX_NEW_DSP=y
-CONFIG_SND_CTXFI=m
-CONFIG_SND_DARLA20=m
-CONFIG_SND_GINA20=m
-CONFIG_SND_LAYLA20=m
-CONFIG_SND_DARLA24=m
-CONFIG_SND_GINA24=m
-CONFIG_SND_LAYLA24=m
-CONFIG_SND_MONA=m
-CONFIG_SND_MIA=m
-CONFIG_SND_ECHO3G=m
-CONFIG_SND_INDIGO=m
-CONFIG_SND_INDIGOIO=m
-CONFIG_SND_INDIGODJ=m
-CONFIG_SND_INDIGOIOX=m
-CONFIG_SND_INDIGODJX=m
-CONFIG_SND_EMU10K1=m
-CONFIG_SND_EMU10K1_SEQ=m
-CONFIG_SND_EMU10K1X=m
-CONFIG_SND_ENS1370=m
-CONFIG_SND_ENS1371=m
-CONFIG_SND_ES1938=m
-CONFIG_SND_ES1968=m
-CONFIG_SND_ES1968_INPUT=y
-CONFIG_SND_ES1968_RADIO=y
-CONFIG_SND_FM801=m
-CONFIG_SND_FM801_TEA575X_BOOL=y
-CONFIG_SND_HDSP=m
-CONFIG_SND_HDSPM=m
-CONFIG_SND_ICE1712=m
-CONFIG_SND_ICE1724=m
-CONFIG_SND_INTEL8X0=m
-CONFIG_SND_INTEL8X0M=m
-CONFIG_SND_KORG1212=m
-CONFIG_SND_LOLA=m
-CONFIG_SND_LX6464ES=m
-CONFIG_SND_MAESTRO3=m
-CONFIG_SND_MAESTRO3_INPUT=y
-CONFIG_SND_MIXART=m
-CONFIG_SND_NM256=m
-CONFIG_SND_PCXHR=m
-CONFIG_SND_RIPTIDE=m
-CONFIG_SND_RME32=m
-CONFIG_SND_RME96=m
-CONFIG_SND_RME9652=m
-CONFIG_SND_SONICVIBES=m
-CONFIG_SND_TRIDENT=m
-CONFIG_SND_VIA82XX=m
-CONFIG_SND_VIA82XX_MODEM=m
-CONFIG_SND_VIRTUOSO=m
-CONFIG_SND_VX222=m
-CONFIG_SND_YMFPCI=m
-
-#
-# HD-Audio
-#
-CONFIG_SND_HDA=m
-CONFIG_SND_HDA_INTEL=m
-CONFIG_SND_HDA_HWDEP=y
-CONFIG_SND_HDA_RECONFIG=y
-CONFIG_SND_HDA_INPUT_BEEP=y
-CONFIG_SND_HDA_INPUT_BEEP_MODE=1
-CONFIG_SND_HDA_PATCH_LOADER=y
-CONFIG_SND_HDA_CODEC_REALTEK=m
-CONFIG_SND_HDA_CODEC_ANALOG=m
-CONFIG_SND_HDA_CODEC_SIGMATEL=m
-CONFIG_SND_HDA_CODEC_VIA=m
-CONFIG_SND_HDA_CODEC_HDMI=m
-CONFIG_SND_HDA_CODEC_CIRRUS=m
-CONFIG_SND_HDA_CODEC_CONEXANT=m
-CONFIG_SND_HDA_CODEC_CA0110=m
-CONFIG_SND_HDA_CODEC_CA0132=m
-CONFIG_SND_HDA_CODEC_CA0132_DSP=y
-CONFIG_SND_HDA_CODEC_CMEDIA=m
-CONFIG_SND_HDA_CODEC_SI3054=m
-CONFIG_SND_HDA_GENERIC=m
-CONFIG_SND_HDA_POWER_SAVE_DEFAULT=0
-# end of HD-Audio
-
-CONFIG_SND_HDA_CORE=m
-CONFIG_SND_HDA_DSP_LOADER=y
-CONFIG_SND_HDA_COMPONENT=y
-CONFIG_SND_HDA_I915=y
-CONFIG_SND_HDA_EXT_CORE=m
-CONFIG_SND_HDA_PREALLOC_SIZE=0
-CONFIG_SND_INTEL_NHLT=y
-CONFIG_SND_INTEL_DSP_CONFIG=m
-CONFIG_SND_SPI=y
-CONFIG_SND_USB=y
-CONFIG_SND_USB_AUDIO=m
-CONFIG_SND_USB_AUDIO_USE_MEDIA_CONTROLLER=y
-CONFIG_SND_USB_UA101=m
-CONFIG_SND_USB_USX2Y=m
-CONFIG_SND_USB_CAIAQ=m
-CONFIG_SND_USB_CAIAQ_INPUT=y
-CONFIG_SND_USB_US122L=m
-CONFIG_SND_USB_6FIRE=m
-CONFIG_SND_USB_HIFACE=m
-CONFIG_SND_BCD2000=m
-CONFIG_SND_USB_LINE6=m
-CONFIG_SND_USB_POD=m
-CONFIG_SND_USB_PODHD=m
-CONFIG_SND_USB_TONEPORT=m
-CONFIG_SND_USB_VARIAX=m
-CONFIG_SND_FIREWIRE=y
-CONFIG_SND_FIREWIRE_LIB=m
-CONFIG_SND_DICE=m
-CONFIG_SND_OXFW=m
-CONFIG_SND_ISIGHT=m
-CONFIG_SND_FIREWORKS=m
-CONFIG_SND_BEBOB=m
-CONFIG_SND_FIREWIRE_DIGI00X=m
-CONFIG_SND_FIREWIRE_TASCAM=m
-CONFIG_SND_FIREWIRE_MOTU=m
-CONFIG_SND_FIREFACE=m
-CONFIG_SND_PCMCIA=y
-CONFIG_SND_VXPOCKET=m
-CONFIG_SND_PDAUDIOCF=m
-CONFIG_SND_SOC=m
-CONFIG_SND_SOC_AC97_BUS=y
-CONFIG_SND_SOC_GENERIC_DMAENGINE_PCM=y
-CONFIG_SND_SOC_COMPRESS=y
-CONFIG_SND_SOC_TOPOLOGY=y
-CONFIG_SND_SOC_ACPI=m
-CONFIG_SND_SOC_AMD_ACP=m
-CONFIG_SND_SOC_AMD_CZ_DA7219MX98357_MACH=m
-CONFIG_SND_SOC_AMD_CZ_RT5645_MACH=m
-CONFIG_SND_SOC_AMD_ACP3x=m
-CONFIG_SND_SOC_AMD_RV_RT5682_MACH=m
-CONFIG_SND_SOC_AMD_RENOIR=m
-CONFIG_SND_SOC_AMD_RENOIR_MACH=m
-CONFIG_SND_ATMEL_SOC=m
-CONFIG_SND_SOC_MIKROE_PROTO=m
-CONFIG_SND_BCM63XX_I2S_WHISTLER=m
-CONFIG_SND_DESIGNWARE_I2S=m
-CONFIG_SND_DESIGNWARE_PCM=y
-
-#
-# SoC Audio for Freescale CPUs
-#
-
-#
-# Common SoC Audio options for Freescale CPUs:
-#
-# CONFIG_SND_SOC_FSL_ASRC is not set
-# CONFIG_SND_SOC_FSL_SAI is not set
-# CONFIG_SND_SOC_FSL_AUDMIX is not set
-# CONFIG_SND_SOC_FSL_SSI is not set
-# CONFIG_SND_SOC_FSL_SPDIF is not set
-# CONFIG_SND_SOC_FSL_ESAI is not set
-# CONFIG_SND_SOC_FSL_MICFIL is not set
-# CONFIG_SND_SOC_IMX_AUDMUX is not set
-# end of SoC Audio for Freescale CPUs
-
-CONFIG_SND_I2S_HI6210_I2S=m
-CONFIG_SND_SOC_IMG=y
-CONFIG_SND_SOC_IMG_I2S_IN=m
-CONFIG_SND_SOC_IMG_I2S_OUT=m
-CONFIG_SND_SOC_IMG_PARALLEL_OUT=m
-CONFIG_SND_SOC_IMG_SPDIF_IN=m
-CONFIG_SND_SOC_IMG_SPDIF_OUT=m
-CONFIG_SND_SOC_IMG_PISTACHIO_INTERNAL_DAC=m
-CONFIG_SND_SOC_INTEL_SST_TOPLEVEL=y
-CONFIG_SND_SST_IPC=m
-CONFIG_SND_SST_IPC_PCI=m
-CONFIG_SND_SST_IPC_ACPI=m
-CONFIG_SND_SOC_INTEL_SST_ACPI=m
-CONFIG_SND_SOC_INTEL_SST=m
-CONFIG_SND_SOC_INTEL_SST_FIRMWARE=m
-CONFIG_SND_SOC_INTEL_HASWELL=m
-CONFIG_SND_SST_ATOM_HIFI2_PLATFORM=m
-CONFIG_SND_SST_ATOM_HIFI2_PLATFORM_PCI=m
-CONFIG_SND_SST_ATOM_HIFI2_PLATFORM_ACPI=m
-CONFIG_SND_SOC_INTEL_SKYLAKE=m
-CONFIG_SND_SOC_INTEL_SKL=m
-CONFIG_SND_SOC_INTEL_APL=m
-CONFIG_SND_SOC_INTEL_KBL=m
-CONFIG_SND_SOC_INTEL_GLK=m
-CONFIG_SND_SOC_INTEL_CNL=m
-CONFIG_SND_SOC_INTEL_CFL=m
-CONFIG_SND_SOC_INTEL_CML_H=m
-CONFIG_SND_SOC_INTEL_CML_LP=m
-CONFIG_SND_SOC_INTEL_SKYLAKE_FAMILY=m
-CONFIG_SND_SOC_INTEL_SKYLAKE_SSP_CLK=m
-# CONFIG_SND_SOC_INTEL_SKYLAKE_HDAUDIO_CODEC is not set
-CONFIG_SND_SOC_INTEL_SKYLAKE_COMMON=m
-CONFIG_SND_SOC_ACPI_INTEL_MATCH=m
-CONFIG_SND_SOC_INTEL_MACH=y
-# CONFIG_SND_SOC_INTEL_USER_FRIENDLY_LONG_NAMES is not set
-CONFIG_SND_SOC_INTEL_HASWELL_MACH=m
-CONFIG_SND_SOC_INTEL_BDW_RT5650_MACH=m
-CONFIG_SND_SOC_INTEL_BDW_RT5677_MACH=m
-CONFIG_SND_SOC_INTEL_BROADWELL_MACH=m
-CONFIG_SND_SOC_INTEL_BYTCR_RT5640_MACH=m
-CONFIG_SND_SOC_INTEL_BYTCR_RT5651_MACH=m
-CONFIG_SND_SOC_INTEL_CHT_BSW_RT5672_MACH=m
-CONFIG_SND_SOC_INTEL_CHT_BSW_RT5645_MACH=m
-CONFIG_SND_SOC_INTEL_CHT_BSW_MAX98090_TI_MACH=m
-CONFIG_SND_SOC_INTEL_CHT_BSW_NAU8824_MACH=m
-CONFIG_SND_SOC_INTEL_BYT_CHT_CX2072X_MACH=m
-CONFIG_SND_SOC_INTEL_BYT_CHT_DA7213_MACH=m
-CONFIG_SND_SOC_INTEL_BYT_CHT_ES8316_MACH=m
-# CONFIG_SND_SOC_INTEL_BYT_CHT_NOCODEC_MACH is not set
-CONFIG_SND_SOC_INTEL_SKL_RT286_MACH=m
-CONFIG_SND_SOC_INTEL_SKL_NAU88L25_SSM4567_MACH=m
-CONFIG_SND_SOC_INTEL_SKL_NAU88L25_MAX98357A_MACH=m
-CONFIG_SND_SOC_INTEL_DA7219_MAX98357A_GENERIC=m
-CONFIG_SND_SOC_INTEL_BXT_DA7219_MAX98357A_COMMON=m
-CONFIG_SND_SOC_INTEL_BXT_DA7219_MAX98357A_MACH=m
-CONFIG_SND_SOC_INTEL_BXT_RT298_MACH=m
-CONFIG_SND_SOC_INTEL_SOF_WM8804_MACH=m
-CONFIG_SND_SOC_INTEL_KBL_RT5663_MAX98927_MACH=m
-CONFIG_SND_SOC_INTEL_KBL_RT5663_RT5514_MAX98927_MACH=m
-CONFIG_SND_SOC_INTEL_KBL_DA7219_MAX98357A_MACH=m
-CONFIG_SND_SOC_INTEL_KBL_DA7219_MAX98927_MACH=m
-CONFIG_SND_SOC_INTEL_KBL_RT5660_MACH=m
-CONFIG_SND_SOC_INTEL_GLK_DA7219_MAX98357A_MACH=m
-CONFIG_SND_SOC_INTEL_GLK_RT5682_MAX98357A_MACH=m
-CONFIG_SND_SOC_INTEL_SKL_HDA_DSP_GENERIC_MACH=m
-CONFIG_SND_SOC_INTEL_SOF_RT5682_MACH=m
-CONFIG_SND_SOC_INTEL_SOF_PCM512x_MACH=m
-CONFIG_SND_SOC_INTEL_CML_LP_DA7219_MAX98357A_MACH=m
-CONFIG_SND_SOC_INTEL_SOF_CML_RT1011_RT5682_MACH=m
-CONFIG_SND_SOC_INTEL_SOF_DA7219_MAX98373_MACH=m
-CONFIG_SND_SOC_INTEL_EHL_RT5660_MACH=m
-CONFIG_SND_SOC_MTK_BTCVSD=m
-CONFIG_SND_SOC_SOF_TOPLEVEL=y
-CONFIG_SND_SOC_SOF_PCI=m
-CONFIG_SND_SOC_SOF_ACPI=m
-CONFIG_SND_SOC_SOF_OF=m
-# CONFIG_SND_SOC_SOF_DEBUG_PROBES is not set
-# CONFIG_SND_SOC_SOF_DEVELOPER_SUPPORT is not set
-CONFIG_SND_SOC_SOF=m
-CONFIG_SND_SOC_SOF_PROBE_WORK_QUEUE=y
-CONFIG_SND_SOC_SOF_INTEL_TOPLEVEL=y
-CONFIG_SND_SOC_SOF_INTEL_ACPI=m
-CONFIG_SND_SOC_SOF_INTEL_PCI=m
-CONFIG_SND_SOC_SOF_INTEL_HIFI_EP_IPC=m
-CONFIG_SND_SOC_SOF_INTEL_ATOM_HIFI_EP=m
-CONFIG_SND_SOC_SOF_INTEL_COMMON=m
-CONFIG_SND_SOC_SOF_MERRIFIELD_SUPPORT=y
-CONFIG_SND_SOC_SOF_MERRIFIELD=m
-CONFIG_SND_SOC_SOF_APOLLOLAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_APOLLOLAKE=m
-CONFIG_SND_SOC_SOF_GEMINILAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_GEMINILAKE=m
-CONFIG_SND_SOC_SOF_CANNONLAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_CANNONLAKE=m
-CONFIG_SND_SOC_SOF_COFFEELAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_COFFEELAKE=m
-CONFIG_SND_SOC_SOF_ICELAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_ICELAKE=m
-CONFIG_SND_SOC_SOF_COMETLAKE=m
-CONFIG_SND_SOC_SOF_COMETLAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_COMETLAKE_LP_SUPPORT=y
-CONFIG_SND_SOC_SOF_TIGERLAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_TIGERLAKE=m
-CONFIG_SND_SOC_SOF_ELKHARTLAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_ELKHARTLAKE=m
-CONFIG_SND_SOC_SOF_JASPERLAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_JASPERLAKE=m
-CONFIG_SND_SOC_SOF_HDA_COMMON=m
-CONFIG_SND_SOC_SOF_HDA_LINK=y
-CONFIG_SND_SOC_SOF_HDA_AUDIO_CODEC=y
-# CONFIG_SND_SOC_SOF_HDA_ALWAYS_ENABLE_DMI_L1 is not set
-CONFIG_SND_SOC_SOF_HDA_LINK_BASELINE=m
-CONFIG_SND_SOC_SOF_HDA=m
-CONFIG_SND_SOC_SOF_XTENSA=m
-
-#
-# STMicroelectronics STM32 SOC audio support
-#
-# end of STMicroelectronics STM32 SOC audio support
-
-CONFIG_SND_SOC_XILINX_I2S=m
-CONFIG_SND_SOC_XILINX_AUDIO_FORMATTER=m
-CONFIG_SND_SOC_XILINX_SPDIF=m
-CONFIG_SND_SOC_XTFPGA_I2S=m
-CONFIG_ZX_TDM=m
-CONFIG_SND_SOC_I2C_AND_SPI=m
-
-#
-# CODEC drivers
-#
-CONFIG_SND_SOC_AC97_CODEC=m
-CONFIG_SND_SOC_ADAU_UTILS=m
-CONFIG_SND_SOC_ADAU1701=m
-CONFIG_SND_SOC_ADAU17X1=m
-CONFIG_SND_SOC_ADAU1761=m
-CONFIG_SND_SOC_ADAU1761_I2C=m
-CONFIG_SND_SOC_ADAU1761_SPI=m
-CONFIG_SND_SOC_ADAU7002=m
-CONFIG_SND_SOC_ADAU7118=m
-CONFIG_SND_SOC_ADAU7118_HW=m
-CONFIG_SND_SOC_ADAU7118_I2C=m
-CONFIG_SND_SOC_AK4104=m
-CONFIG_SND_SOC_AK4118=m
-CONFIG_SND_SOC_AK4458=m
-CONFIG_SND_SOC_AK4554=m
-CONFIG_SND_SOC_AK4613=m
-CONFIG_SND_SOC_AK4642=m
-CONFIG_SND_SOC_AK5386=m
-CONFIG_SND_SOC_AK5558=m
-CONFIG_SND_SOC_ALC5623=m
-CONFIG_SND_SOC_BD28623=m
-# CONFIG_SND_SOC_BT_SCO is not set
-CONFIG_SND_SOC_CPCAP=m
-CONFIG_SND_SOC_CROS_EC_CODEC=m
-CONFIG_SND_SOC_CS35L32=m
-CONFIG_SND_SOC_CS35L33=m
-CONFIG_SND_SOC_CS35L34=m
-CONFIG_SND_SOC_CS35L35=m
-CONFIG_SND_SOC_CS35L36=m
-CONFIG_SND_SOC_CS42L42=m
-CONFIG_SND_SOC_CS42L51=m
-CONFIG_SND_SOC_CS42L51_I2C=m
-CONFIG_SND_SOC_CS42L52=m
-CONFIG_SND_SOC_CS42L56=m
-CONFIG_SND_SOC_CS42L73=m
-CONFIG_SND_SOC_CS4265=m
-CONFIG_SND_SOC_CS4270=m
-CONFIG_SND_SOC_CS4271=m
-CONFIG_SND_SOC_CS4271_I2C=m
-CONFIG_SND_SOC_CS4271_SPI=m
-CONFIG_SND_SOC_CS42XX8=m
-CONFIG_SND_SOC_CS42XX8_I2C=m
-CONFIG_SND_SOC_CS43130=m
-CONFIG_SND_SOC_CS4341=m
-CONFIG_SND_SOC_CS4349=m
-CONFIG_SND_SOC_CS53L30=m
-CONFIG_SND_SOC_CX2072X=m
-CONFIG_SND_SOC_DA7213=m
-CONFIG_SND_SOC_DA7219=m
-CONFIG_SND_SOC_DMIC=m
-CONFIG_SND_SOC_HDMI_CODEC=m
-CONFIG_SND_SOC_ES7134=m
-CONFIG_SND_SOC_ES7241=m
-CONFIG_SND_SOC_ES8316=m
-CONFIG_SND_SOC_ES8328=m
-CONFIG_SND_SOC_ES8328_I2C=m
-CONFIG_SND_SOC_ES8328_SPI=m
-CONFIG_SND_SOC_GTM601=m
-CONFIG_SND_SOC_HDAC_HDMI=m
-CONFIG_SND_SOC_HDAC_HDA=m
-CONFIG_SND_SOC_INNO_RK3036=m
-CONFIG_SND_SOC_LOCHNAGAR_SC=m
-CONFIG_SND_SOC_MAX98088=m
-CONFIG_SND_SOC_MAX98090=m
-CONFIG_SND_SOC_MAX98357A=m
-CONFIG_SND_SOC_MAX98504=m
-CONFIG_SND_SOC_MAX9867=m
-CONFIG_SND_SOC_MAX98927=m
-CONFIG_SND_SOC_MAX98373=m
-CONFIG_SND_SOC_MAX98390=m
-CONFIG_SND_SOC_MAX9860=m
-CONFIG_SND_SOC_MSM8916_WCD_ANALOG=m
-CONFIG_SND_SOC_MSM8916_WCD_DIGITAL=m
-CONFIG_SND_SOC_PCM1681=m
-CONFIG_SND_SOC_PCM1789=m
-CONFIG_SND_SOC_PCM1789_I2C=m
-CONFIG_SND_SOC_PCM179X=m
-CONFIG_SND_SOC_PCM179X_I2C=m
-CONFIG_SND_SOC_PCM179X_SPI=m
-CONFIG_SND_SOC_PCM186X=m
-CONFIG_SND_SOC_PCM186X_I2C=m
-CONFIG_SND_SOC_PCM186X_SPI=m
-CONFIG_SND_SOC_PCM3060=m
-CONFIG_SND_SOC_PCM3060_I2C=m
-CONFIG_SND_SOC_PCM3060_SPI=m
-CONFIG_SND_SOC_PCM3168A=m
-CONFIG_SND_SOC_PCM3168A_I2C=m
-CONFIG_SND_SOC_PCM3168A_SPI=m
-CONFIG_SND_SOC_PCM512x=m
-CONFIG_SND_SOC_PCM512x_I2C=m
-CONFIG_SND_SOC_PCM512x_SPI=m
-CONFIG_SND_SOC_RK3328=m
-CONFIG_SND_SOC_RL6231=m
-CONFIG_SND_SOC_RL6347A=m
-CONFIG_SND_SOC_RT286=m
-CONFIG_SND_SOC_RT298=m
-CONFIG_SND_SOC_RT1011=m
-CONFIG_SND_SOC_RT1015=m
-CONFIG_SND_SOC_RT1308_SDW=m
-CONFIG_SND_SOC_RT5514=m
-CONFIG_SND_SOC_RT5514_SPI=m
-CONFIG_SND_SOC_RT5616=m
-CONFIG_SND_SOC_RT5631=m
-CONFIG_SND_SOC_RT5640=m
-CONFIG_SND_SOC_RT5645=m
-CONFIG_SND_SOC_RT5651=m
-CONFIG_SND_SOC_RT5660=m
-CONFIG_SND_SOC_RT5663=m
-CONFIG_SND_SOC_RT5670=m
-CONFIG_SND_SOC_RT5677=m
-CONFIG_SND_SOC_RT5677_SPI=m
-CONFIG_SND_SOC_RT5682=m
-CONFIG_SND_SOC_RT5682_I2C=m
-CONFIG_SND_SOC_RT5682_SDW=m
-CONFIG_SND_SOC_RT700=m
-CONFIG_SND_SOC_RT700_SDW=m
-CONFIG_SND_SOC_RT711=m
-CONFIG_SND_SOC_RT711_SDW=m
-CONFIG_SND_SOC_RT715=m
-CONFIG_SND_SOC_RT715_SDW=m
-CONFIG_SND_SOC_SGTL5000=m
-CONFIG_SND_SOC_SI476X=m
-CONFIG_SND_SOC_SIGMADSP=m
-CONFIG_SND_SOC_SIGMADSP_I2C=m
-CONFIG_SND_SOC_SIGMADSP_REGMAP=m
-CONFIG_SND_SOC_SIMPLE_AMPLIFIER=m
-CONFIG_SND_SOC_SIRF_AUDIO_CODEC=m
-CONFIG_SND_SOC_SPDIF=m
-CONFIG_SND_SOC_SSM2305=m
-CONFIG_SND_SOC_SSM2602=m
-CONFIG_SND_SOC_SSM2602_SPI=m
-CONFIG_SND_SOC_SSM2602_I2C=m
-CONFIG_SND_SOC_SSM4567=m
-CONFIG_SND_SOC_STA32X=m
-CONFIG_SND_SOC_STA350=m
-CONFIG_SND_SOC_STI_SAS=m
-CONFIG_SND_SOC_TAS2552=m
-CONFIG_SND_SOC_TAS2562=m
-CONFIG_SND_SOC_TAS2770=m
-CONFIG_SND_SOC_TAS5086=m
-CONFIG_SND_SOC_TAS571X=m
-CONFIG_SND_SOC_TAS5720=m
-CONFIG_SND_SOC_TAS6424=m
-CONFIG_SND_SOC_TDA7419=m
-CONFIG_SND_SOC_TFA9879=m
-CONFIG_SND_SOC_TLV320AIC23=m
-CONFIG_SND_SOC_TLV320AIC23_I2C=m
-CONFIG_SND_SOC_TLV320AIC23_SPI=m
-CONFIG_SND_SOC_TLV320AIC31XX=m
-CONFIG_SND_SOC_TLV320AIC32X4=m
-CONFIG_SND_SOC_TLV320AIC32X4_I2C=m
-CONFIG_SND_SOC_TLV320AIC32X4_SPI=m
-CONFIG_SND_SOC_TLV320AIC3X=m
-CONFIG_SND_SOC_TLV320ADCX140=m
-CONFIG_SND_SOC_TS3A227E=m
-CONFIG_SND_SOC_TSCS42XX=m
-CONFIG_SND_SOC_TSCS454=m
-CONFIG_SND_SOC_UDA1334=m
-CONFIG_SND_SOC_WCD9335=m
-CONFIG_SND_SOC_WCD934X=m
-CONFIG_SND_SOC_WM8510=m
-CONFIG_SND_SOC_WM8523=m
-CONFIG_SND_SOC_WM8524=m
-CONFIG_SND_SOC_WM8580=m
-CONFIG_SND_SOC_WM8711=m
-CONFIG_SND_SOC_WM8728=m
-CONFIG_SND_SOC_WM8731=m
-CONFIG_SND_SOC_WM8737=m
-CONFIG_SND_SOC_WM8741=m
-CONFIG_SND_SOC_WM8750=m
-CONFIG_SND_SOC_WM8753=m
-CONFIG_SND_SOC_WM8770=m
-CONFIG_SND_SOC_WM8776=m
-CONFIG_SND_SOC_WM8782=m
-CONFIG_SND_SOC_WM8804=m
-CONFIG_SND_SOC_WM8804_I2C=m
-CONFIG_SND_SOC_WM8804_SPI=m
-CONFIG_SND_SOC_WM8903=m
-CONFIG_SND_SOC_WM8904=m
-CONFIG_SND_SOC_WM8960=m
-CONFIG_SND_SOC_WM8962=m
-CONFIG_SND_SOC_WM8974=m
-CONFIG_SND_SOC_WM8978=m
-CONFIG_SND_SOC_WM8985=m
-CONFIG_SND_SOC_WSA881X=m
-CONFIG_SND_SOC_ZL38060=m
-CONFIG_SND_SOC_ZX_AUD96P22=m
-CONFIG_SND_SOC_MAX9759=m
-CONFIG_SND_SOC_MT6351=m
-CONFIG_SND_SOC_MT6358=m
-CONFIG_SND_SOC_MT6660=m
-CONFIG_SND_SOC_NAU8540=m
-CONFIG_SND_SOC_NAU8810=m
-CONFIG_SND_SOC_NAU8822=m
-CONFIG_SND_SOC_NAU8824=m
-CONFIG_SND_SOC_NAU8825=m
-CONFIG_SND_SOC_TPA6130A2=m
-# end of CODEC drivers
-
-CONFIG_SND_SIMPLE_CARD_UTILS=m
-CONFIG_SND_SIMPLE_CARD=m
-CONFIG_SND_AUDIO_GRAPH_CARD=m
-CONFIG_SND_X86=y
-CONFIG_HDMI_LPE_AUDIO=m
-CONFIG_SND_SYNTH_EMUX=m
-CONFIG_SND_XEN_FRONTEND=m
-CONFIG_AC97_BUS=m
-
-#
-# HID support
-#
-CONFIG_HID=m
-CONFIG_HID_BATTERY_STRENGTH=y
-CONFIG_HIDRAW=y
-CONFIG_UHID=m
-CONFIG_HID_GENERIC=m
-
-#
-# Special HID drivers
-#
-CONFIG_HID_A4TECH=m
-CONFIG_HID_ACCUTOUCH=m
-CONFIG_HID_ACRUX=m
-CONFIG_HID_ACRUX_FF=y
-CONFIG_HID_APPLE=m
-CONFIG_HID_APPLEIR=m
-CONFIG_HID_ASUS=m
-CONFIG_HID_AUREAL=m
-CONFIG_HID_BELKIN=m
-CONFIG_HID_BETOP_FF=m
-CONFIG_HID_BIGBEN_FF=m
-CONFIG_HID_CHERRY=m
-CONFIG_HID_CHICONY=m
-CONFIG_HID_CORSAIR=m
-CONFIG_HID_COUGAR=m
-CONFIG_HID_MACALLY=m
-CONFIG_HID_PRODIKEYS=m
-CONFIG_HID_CMEDIA=m
-CONFIG_HID_CP2112=m
-CONFIG_HID_CREATIVE_SB0540=m
-CONFIG_HID_CYPRESS=m
-CONFIG_HID_DRAGONRISE=m
-CONFIG_DRAGONRISE_FF=y
-CONFIG_HID_EMS_FF=m
-CONFIG_HID_ELAN=m
-CONFIG_HID_ELECOM=m
-CONFIG_HID_ELO=m
-CONFIG_HID_EZKEY=m
-CONFIG_HID_GEMBIRD=m
-CONFIG_HID_GFRM=m
-CONFIG_HID_GLORIOUS=m
-CONFIG_HID_HOLTEK=m
-CONFIG_HOLTEK_FF=y
-CONFIG_HID_GOOGLE_HAMMER=m
-CONFIG_HID_GT683R=m
-CONFIG_HID_KEYTOUCH=m
-CONFIG_HID_KYE=m
-CONFIG_HID_UCLOGIC=m
-CONFIG_HID_WALTOP=m
-CONFIG_HID_VIEWSONIC=m
-CONFIG_HID_GYRATION=m
-CONFIG_HID_ICADE=m
-CONFIG_HID_ITE=m
-CONFIG_HID_JABRA=m
-CONFIG_HID_TWINHAN=m
-CONFIG_HID_KENSINGTON=m
-CONFIG_HID_LCPOWER=m
-CONFIG_HID_LED=m
-CONFIG_HID_LENOVO=m
-CONFIG_HID_LOGITECH=m
-CONFIG_HID_LOGITECH_DJ=m
-CONFIG_HID_LOGITECH_HIDPP=m
-CONFIG_LOGITECH_FF=y
-CONFIG_LOGIRUMBLEPAD2_FF=y
-CONFIG_LOGIG940_FF=y
-CONFIG_LOGIWHEELS_FF=y
-CONFIG_HID_MAGICMOUSE=m
-CONFIG_HID_MALTRON=m
-CONFIG_HID_MAYFLASH=m
-CONFIG_HID_REDRAGON=m
-CONFIG_HID_MICROSOFT=m
-CONFIG_HID_MONTEREY=m
-CONFIG_HID_MULTITOUCH=m
-CONFIG_HID_NTI=m
-CONFIG_HID_NTRIG=m
-CONFIG_HID_ORTEK=m
-CONFIG_HID_PANTHERLORD=m
-CONFIG_PANTHERLORD_FF=y
-CONFIG_HID_PENMOUNT=m
-CONFIG_HID_PETALYNX=m
-CONFIG_HID_PICOLCD=m
-CONFIG_HID_PICOLCD_FB=y
-CONFIG_HID_PICOLCD_BACKLIGHT=y
-CONFIG_HID_PICOLCD_LCD=y
-CONFIG_HID_PICOLCD_LEDS=y
-CONFIG_HID_PICOLCD_CIR=y
-CONFIG_HID_PLANTRONICS=m
-CONFIG_HID_PRIMAX=m
-CONFIG_HID_RETRODE=m
-CONFIG_HID_ROCCAT=m
-CONFIG_HID_SAITEK=m
-CONFIG_HID_SAMSUNG=m
-CONFIG_HID_SONY=m
-CONFIG_SONY_FF=y
-CONFIG_HID_SPEEDLINK=m
-CONFIG_HID_STEAM=m
-CONFIG_HID_STEELSERIES=m
-CONFIG_HID_SUNPLUS=m
-CONFIG_HID_RMI=m
-CONFIG_HID_GREENASIA=m
-CONFIG_GREENASIA_FF=y
-CONFIG_HID_HYPERV_MOUSE=m
-CONFIG_HID_SMARTJOYPLUS=m
-CONFIG_SMARTJOYPLUS_FF=y
-CONFIG_HID_TIVO=m
-CONFIG_HID_TOPSEED=m
-CONFIG_HID_THINGM=m
-CONFIG_HID_THRUSTMASTER=m
-CONFIG_THRUSTMASTER_FF=y
-CONFIG_HID_UDRAW_PS3=m
-CONFIG_HID_U2FZERO=m
-CONFIG_HID_WACOM=m
-CONFIG_HID_WIIMOTE=m
-CONFIG_HID_XINMO=m
-CONFIG_HID_ZEROPLUS=m
-CONFIG_ZEROPLUS_FF=y
-CONFIG_HID_ZYDACRON=m
-CONFIG_HID_SENSOR_HUB=m
-# CONFIG_HID_SENSOR_CUSTOM_SENSOR is not set
-CONFIG_HID_ALPS=m
-CONFIG_HID_MCP2221=m
-# end of Special HID drivers
-
-#
-# USB HID support
-#
-CONFIG_USB_HID=m
-CONFIG_HID_PID=y
-CONFIG_USB_HIDDEV=y
-
-#
-# USB HID Boot Protocol drivers
-#
-# CONFIG_USB_KBD is not set
-# CONFIG_USB_MOUSE is not set
-# end of USB HID Boot Protocol drivers
-# end of USB HID support
-
-#
-# I2C HID support
-#
-CONFIG_I2C_HID=m
-# end of I2C HID support
-
-#
-# Intel ISH HID support
-#
-CONFIG_INTEL_ISH_HID=m
-CONFIG_INTEL_ISH_FIRMWARE_DOWNLOADER=m
-# end of Intel ISH HID support
-# end of HID support
-
-CONFIG_USB_OHCI_LITTLE_ENDIAN=y
-CONFIG_USB_SUPPORT=y
-CONFIG_USB_COMMON=y
-CONFIG_USB_LED_TRIG=y
-CONFIG_USB_ULPI_BUS=m
-CONFIG_USB_CONN_GPIO=m
-CONFIG_USB_ARCH_HAS_HCD=y
-CONFIG_USB=y
-CONFIG_USB_PCI=y
-CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
-
-#
-# Miscellaneous USB options
-#
-CONFIG_USB_DEFAULT_PERSIST=y
-CONFIG_USB_DYNAMIC_MINORS=y
-# CONFIG_USB_OTG is not set
-# CONFIG_USB_OTG_WHITELIST is not set
-# CONFIG_USB_OTG_BLACKLIST_HUB is not set
-CONFIG_USB_LEDS_TRIGGER_USBPORT=m
-CONFIG_USB_AUTOSUSPEND_DELAY=2
-CONFIG_USB_MON=m
-
-#
-# USB Host Controller Drivers
-#
-CONFIG_USB_C67X00_HCD=m
-CONFIG_USB_XHCI_HCD=m
-# CONFIG_USB_XHCI_DBGCAP is not set
-CONFIG_USB_XHCI_PCI=m
-CONFIG_USB_XHCI_PCI_RENESAS=m
-CONFIG_USB_XHCI_PLATFORM=m
-CONFIG_USB_EHCI_HCD=m
-CONFIG_USB_EHCI_ROOT_HUB_TT=y
-CONFIG_USB_EHCI_TT_NEWSCHED=y
-CONFIG_USB_EHCI_PCI=m
-CONFIG_USB_EHCI_FSL=m
-CONFIG_USB_EHCI_HCD_PLATFORM=m
-CONFIG_USB_OXU210HP_HCD=m
-CONFIG_USB_ISP116X_HCD=m
-CONFIG_USB_FOTG210_HCD=m
-CONFIG_USB_MAX3421_HCD=m
-CONFIG_USB_OHCI_HCD=m
-CONFIG_USB_OHCI_HCD_PCI=m
-# CONFIG_USB_OHCI_HCD_SSB is not set
-CONFIG_USB_OHCI_HCD_PLATFORM=m
-CONFIG_USB_UHCI_HCD=m
-CONFIG_USB_U132_HCD=m
-CONFIG_USB_SL811_HCD=m
-# CONFIG_USB_SL811_HCD_ISO is not set
-CONFIG_USB_SL811_CS=m
-CONFIG_USB_R8A66597_HCD=m
-CONFIG_USB_HCD_BCMA=m
-CONFIG_USB_HCD_SSB=m
-# CONFIG_USB_HCD_TEST_MODE is not set
-
-#
-# USB Device Class drivers
-#
-CONFIG_USB_ACM=m
-CONFIG_USB_PRINTER=m
-CONFIG_USB_WDM=m
-CONFIG_USB_TMC=m
-
-#
-# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may
-#
-
-#
-# also be needed; see USB_STORAGE Help for more info
-#
-CONFIG_USB_STORAGE=m
-# CONFIG_USB_STORAGE_DEBUG is not set
-CONFIG_USB_STORAGE_REALTEK=m
-CONFIG_REALTEK_AUTOPM=y
-CONFIG_USB_STORAGE_DATAFAB=m
-CONFIG_USB_STORAGE_FREECOM=m
-CONFIG_USB_STORAGE_ISD200=m
-CONFIG_USB_STORAGE_USBAT=m
-CONFIG_USB_STORAGE_SDDR09=m
-CONFIG_USB_STORAGE_SDDR55=m
-CONFIG_USB_STORAGE_JUMPSHOT=m
-CONFIG_USB_STORAGE_ALAUDA=m
-CONFIG_USB_STORAGE_ONETOUCH=m
-CONFIG_USB_STORAGE_KARMA=m
-CONFIG_USB_STORAGE_CYPRESS_ATACB=m
-CONFIG_USB_STORAGE_ENE_UB6250=m
-CONFIG_USB_UAS=m
-
-#
-# USB Imaging devices
-#
-CONFIG_USB_MDC800=m
-CONFIG_USB_MICROTEK=m
-CONFIG_USBIP_CORE=m
-CONFIG_USBIP_VHCI_HCD=m
-CONFIG_USBIP_VHCI_HC_PORTS=8
-CONFIG_USBIP_VHCI_NR_HCS=1
-CONFIG_USBIP_HOST=m
-CONFIG_USBIP_VUDC=m
-# CONFIG_USBIP_DEBUG is not set
-CONFIG_USB_CDNS3=m
-CONFIG_USB_CDNS3_GADGET=y
-CONFIG_USB_CDNS3_HOST=y
-CONFIG_USB_CDNS3_PCI_WRAP=m
-CONFIG_USB_MUSB_HDRC=m
-# CONFIG_USB_MUSB_HOST is not set
-# CONFIG_USB_MUSB_GADGET is not set
-CONFIG_USB_MUSB_DUAL_ROLE=y
-
-#
-# Platform Glue Layer
-#
-
-#
-# MUSB DMA mode
-#
-# CONFIG_MUSB_PIO_ONLY is not set
-CONFIG_USB_DWC3=m
-CONFIG_USB_DWC3_ULPI=y
-# CONFIG_USB_DWC3_HOST is not set
-# CONFIG_USB_DWC3_GADGET is not set
-CONFIG_USB_DWC3_DUAL_ROLE=y
-
-#
-# Platform Glue Driver Support
-#
-CONFIG_USB_DWC3_PCI=m
-CONFIG_USB_DWC3_HAPS=m
-CONFIG_USB_DWC3_OF_SIMPLE=m
-CONFIG_USB_DWC2=m
-# CONFIG_USB_DWC2_HOST is not set
-
-#
-# Gadget/Dual-role mode requires USB Gadget support to be enabled
-#
-# CONFIG_USB_DWC2_PERIPHERAL is not set
-CONFIG_USB_DWC2_DUAL_ROLE=y
-CONFIG_USB_DWC2_PCI=m
-# CONFIG_USB_DWC2_DEBUG is not set
-# CONFIG_USB_DWC2_TRACK_MISSED_SOFS is not set
-CONFIG_USB_CHIPIDEA=m
-CONFIG_USB_CHIPIDEA_UDC=y
-CONFIG_USB_CHIPIDEA_HOST=y
-CONFIG_USB_CHIPIDEA_PCI=m
-CONFIG_USB_CHIPIDEA_MSM=m
-CONFIG_USB_CHIPIDEA_IMX=m
-CONFIG_USB_CHIPIDEA_GENERIC=m
-CONFIG_USB_CHIPIDEA_TEGRA=m
-CONFIG_USB_ISP1760=m
-CONFIG_USB_ISP1760_HCD=y
-CONFIG_USB_ISP1761_UDC=y
-# CONFIG_USB_ISP1760_HOST_ROLE is not set
-# CONFIG_USB_ISP1760_GADGET_ROLE is not set
-CONFIG_USB_ISP1760_DUAL_ROLE=y
-
-#
-# USB port drivers
-#
-CONFIG_USB_USS720=m
-CONFIG_USB_SERIAL=y
-CONFIG_USB_SERIAL_CONSOLE=y
-CONFIG_USB_SERIAL_GENERIC=y
-CONFIG_USB_SERIAL_SIMPLE=m
-CONFIG_USB_SERIAL_AIRCABLE=m
-CONFIG_USB_SERIAL_ARK3116=m
-CONFIG_USB_SERIAL_BELKIN=m
-CONFIG_USB_SERIAL_CH341=m
-CONFIG_USB_SERIAL_WHITEHEAT=m
-CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m
-CONFIG_USB_SERIAL_CP210X=m
-CONFIG_USB_SERIAL_CYPRESS_M8=m
-CONFIG_USB_SERIAL_EMPEG=m
-CONFIG_USB_SERIAL_FTDI_SIO=m
-CONFIG_USB_SERIAL_VISOR=m
-CONFIG_USB_SERIAL_IPAQ=m
-CONFIG_USB_SERIAL_IR=m
-CONFIG_USB_SERIAL_EDGEPORT=m
-CONFIG_USB_SERIAL_EDGEPORT_TI=m
-CONFIG_USB_SERIAL_F81232=m
-CONFIG_USB_SERIAL_F8153X=m
-CONFIG_USB_SERIAL_GARMIN=m
-CONFIG_USB_SERIAL_IPW=m
-CONFIG_USB_SERIAL_IUU=m
-CONFIG_USB_SERIAL_KEYSPAN_PDA=m
-CONFIG_USB_SERIAL_KEYSPAN=m
-CONFIG_USB_SERIAL_KLSI=m
-CONFIG_USB_SERIAL_KOBIL_SCT=m
-CONFIG_USB_SERIAL_MCT_U232=m
-CONFIG_USB_SERIAL_METRO=m
-CONFIG_USB_SERIAL_MOS7720=m
-CONFIG_USB_SERIAL_MOS7715_PARPORT=y
-CONFIG_USB_SERIAL_MOS7840=m
-CONFIG_USB_SERIAL_MXUPORT=m
-CONFIG_USB_SERIAL_NAVMAN=m
-CONFIG_USB_SERIAL_PL2303=m
-CONFIG_USB_SERIAL_OTI6858=m
-CONFIG_USB_SERIAL_QCAUX=m
-CONFIG_USB_SERIAL_QUALCOMM=m
-CONFIG_USB_SERIAL_SPCP8X5=m
-CONFIG_USB_SERIAL_SAFE=m
-# CONFIG_USB_SERIAL_SAFE_PADDED is not set
-CONFIG_USB_SERIAL_SIERRAWIRELESS=m
-CONFIG_USB_SERIAL_SYMBOL=m
-CONFIG_USB_SERIAL_TI=m
-CONFIG_USB_SERIAL_CYBERJACK=m
-CONFIG_USB_SERIAL_XIRCOM=m
-CONFIG_USB_SERIAL_WWAN=m
-CONFIG_USB_SERIAL_OPTION=m
-CONFIG_USB_SERIAL_OMNINET=m
-CONFIG_USB_SERIAL_OPTICON=m
-CONFIG_USB_SERIAL_XSENS_MT=m
-CONFIG_USB_SERIAL_WISHBONE=m
-CONFIG_USB_SERIAL_SSU100=m
-CONFIG_USB_SERIAL_QT2=m
-CONFIG_USB_SERIAL_UPD78F0730=m
-CONFIG_USB_SERIAL_DEBUG=m
-
-#
-# USB Miscellaneous drivers
-#
-CONFIG_USB_EMI62=m
-CONFIG_USB_EMI26=m
-CONFIG_USB_ADUTUX=m
-CONFIG_USB_SEVSEG=m
-CONFIG_USB_LEGOTOWER=m
-CONFIG_USB_LCD=m
-CONFIG_USB_CYPRESS_CY7C63=m
-CONFIG_USB_CYTHERM=m
-CONFIG_USB_IDMOUSE=m
-CONFIG_USB_FTDI_ELAN=m
-CONFIG_USB_APPLEDISPLAY=m
-CONFIG_APPLE_MFI_FASTCHARGE=m
-CONFIG_USB_SISUSBVGA=m
-CONFIG_USB_SISUSBVGA_CON=y
-CONFIG_USB_LD=m
-CONFIG_USB_TRANCEVIBRATOR=m
-CONFIG_USB_IOWARRIOR=m
-CONFIG_USB_TEST=m
-CONFIG_USB_EHSET_TEST_FIXTURE=m
-CONFIG_USB_ISIGHTFW=m
-CONFIG_USB_YUREX=m
-CONFIG_USB_EZUSB_FX2=m
-CONFIG_USB_HUB_USB251XB=m
-CONFIG_USB_HSIC_USB3503=m
-CONFIG_USB_HSIC_USB4604=m
-CONFIG_USB_LINK_LAYER_TEST=m
-CONFIG_USB_CHAOSKEY=m
-CONFIG_USB_ATM=m
-CONFIG_USB_SPEEDTOUCH=m
-CONFIG_USB_CXACRU=m
-CONFIG_USB_UEAGLEATM=m
-CONFIG_USB_XUSBATM=m
-
-#
-# USB Physical Layer drivers
-#
-CONFIG_USB_PHY=y
-CONFIG_NOP_USB_XCEIV=m
-CONFIG_USB_GPIO_VBUS=m
-CONFIG_TAHVO_USB=m
-# CONFIG_TAHVO_USB_HOST_BY_DEFAULT is not set
-CONFIG_USB_ISP1301=m
-# end of USB Physical Layer drivers
-
-CONFIG_USB_GADGET=m
-# CONFIG_USB_GADGET_DEBUG is not set
-# CONFIG_USB_GADGET_DEBUG_FILES is not set
-# CONFIG_USB_GADGET_DEBUG_FS is not set
-CONFIG_USB_GADGET_VBUS_DRAW=2
-CONFIG_USB_GADGET_STORAGE_NUM_BUFFERS=2
-CONFIG_U_SERIAL_CONSOLE=y
-
-#
-# USB Peripheral Controller
-#
-CONFIG_USB_FOTG210_UDC=m
-CONFIG_USB_GR_UDC=m
-CONFIG_USB_R8A66597=m
-CONFIG_USB_PXA27X=m
-CONFIG_USB_MV_UDC=m
-CONFIG_USB_MV_U3D=m
-CONFIG_USB_SNP_CORE=m
-CONFIG_USB_SNP_UDC_PLAT=m
-CONFIG_USB_M66592=m
-CONFIG_USB_BDC_UDC=m
-
-#
-# Platform Support
-#
-CONFIG_USB_BDC_PCI=m
-CONFIG_USB_AMD5536UDC=m
-CONFIG_USB_NET2272=m
-CONFIG_USB_NET2272_DMA=y
-CONFIG_USB_NET2280=m
-CONFIG_USB_GOKU=m
-CONFIG_USB_EG20T=m
-CONFIG_USB_GADGET_XILINX=m
-CONFIG_USB_MAX3420_UDC=m
-CONFIG_USB_DUMMY_HCD=m
-# end of USB Peripheral Controller
-
-CONFIG_USB_LIBCOMPOSITE=m
-CONFIG_USB_F_ACM=m
-CONFIG_USB_F_SS_LB=m
-CONFIG_USB_U_SERIAL=m
-CONFIG_USB_U_ETHER=m
-CONFIG_USB_U_AUDIO=m
-CONFIG_USB_F_SERIAL=m
-CONFIG_USB_F_OBEX=m
-CONFIG_USB_F_NCM=m
-CONFIG_USB_F_ECM=m
-CONFIG_USB_F_PHONET=m
-CONFIG_USB_F_EEM=m
-CONFIG_USB_F_SUBSET=m
-CONFIG_USB_F_RNDIS=m
-CONFIG_USB_F_MASS_STORAGE=m
-CONFIG_USB_F_FS=m
-CONFIG_USB_F_UAC1=m
-CONFIG_USB_F_UAC1_LEGACY=m
-CONFIG_USB_F_UAC2=m
-CONFIG_USB_F_UVC=m
-CONFIG_USB_F_MIDI=m
-CONFIG_USB_F_HID=m
-CONFIG_USB_F_PRINTER=m
-CONFIG_USB_F_TCM=m
-CONFIG_USB_CONFIGFS=m
-CONFIG_USB_CONFIGFS_SERIAL=y
-CONFIG_USB_CONFIGFS_ACM=y
-CONFIG_USB_CONFIGFS_OBEX=y
-CONFIG_USB_CONFIGFS_NCM=y
-CONFIG_USB_CONFIGFS_ECM=y
-CONFIG_USB_CONFIGFS_ECM_SUBSET=y
-CONFIG_USB_CONFIGFS_RNDIS=y
-CONFIG_USB_CONFIGFS_EEM=y
-CONFIG_USB_CONFIGFS_PHONET=y
-CONFIG_USB_CONFIGFS_MASS_STORAGE=y
-CONFIG_USB_CONFIGFS_F_LB_SS=y
-CONFIG_USB_CONFIGFS_F_FS=y
-CONFIG_USB_CONFIGFS_F_UAC1=y
-CONFIG_USB_CONFIGFS_F_UAC1_LEGACY=y
-CONFIG_USB_CONFIGFS_F_UAC2=y
-CONFIG_USB_CONFIGFS_F_MIDI=y
-CONFIG_USB_CONFIGFS_F_HID=y
-CONFIG_USB_CONFIGFS_F_UVC=y
-CONFIG_USB_CONFIGFS_F_PRINTER=y
-CONFIG_USB_CONFIGFS_F_TCM=y
-
-#
-# USB Gadget precomposed configurations
-#
-CONFIG_USB_ZERO=m
-CONFIG_USB_AUDIO=m
-# CONFIG_GADGET_UAC1 is not set
-CONFIG_USB_ETH=m
-CONFIG_USB_ETH_RNDIS=y
-CONFIG_USB_ETH_EEM=y
-CONFIG_USB_G_NCM=m
-CONFIG_USB_GADGETFS=m
-CONFIG_USB_FUNCTIONFS=m
-CONFIG_USB_FUNCTIONFS_ETH=y
-CONFIG_USB_FUNCTIONFS_RNDIS=y
-CONFIG_USB_FUNCTIONFS_GENERIC=y
-CONFIG_USB_MASS_STORAGE=m
-CONFIG_USB_GADGET_TARGET=m
-CONFIG_USB_G_SERIAL=m
-CONFIG_USB_MIDI_GADGET=m
-CONFIG_USB_G_PRINTER=m
-CONFIG_USB_CDC_COMPOSITE=m
-CONFIG_USB_G_NOKIA=m
-CONFIG_USB_G_ACM_MS=m
-CONFIG_USB_G_MULTI=m
-CONFIG_USB_G_MULTI_RNDIS=y
-CONFIG_USB_G_MULTI_CDC=y
-CONFIG_USB_G_HID=m
-CONFIG_USB_G_DBGP=m
-# CONFIG_USB_G_DBGP_PRINTK is not set
-CONFIG_USB_G_DBGP_SERIAL=y
-CONFIG_USB_G_WEBCAM=m
-CONFIG_USB_RAW_GADGET=m
-# end of USB Gadget precomposed configurations
-
-CONFIG_TYPEC=m
-CONFIG_TYPEC_TCPM=m
-CONFIG_TYPEC_TCPCI=m
-CONFIG_TYPEC_RT1711H=m
-CONFIG_TYPEC_FUSB302=m
-CONFIG_TYPEC_WCOVE=m
-CONFIG_TYPEC_UCSI=m
-CONFIG_UCSI_CCG=m
-CONFIG_UCSI_ACPI=m
-CONFIG_TYPEC_HD3SS3220=m
-CONFIG_TYPEC_TPS6598X=m
-
-#
-# USB Type-C Multiplexer/DeMultiplexer Switch support
-#
-CONFIG_TYPEC_MUX_PI3USB30532=m
-CONFIG_TYPEC_MUX_INTEL_PMC=m
-# end of USB Type-C Multiplexer/DeMultiplexer Switch support
-
-#
-# USB Type-C Alternate Mode drivers
-#
-CONFIG_TYPEC_DP_ALTMODE=m
-CONFIG_TYPEC_NVIDIA_ALTMODE=m
-# end of USB Type-C Alternate Mode drivers
-
-CONFIG_USB_ROLE_SWITCH=m
-CONFIG_USB_ROLES_INTEL_XHCI=m
-CONFIG_MMC=m
-CONFIG_PWRSEQ_EMMC=m
-CONFIG_PWRSEQ_SD8787=m
-CONFIG_PWRSEQ_SIMPLE=m
-CONFIG_MMC_BLOCK=m
-CONFIG_MMC_BLOCK_MINORS=8
-CONFIG_SDIO_UART=m
-CONFIG_MMC_TEST=m
-
-#
-# MMC/SD/SDIO Host Controller Drivers
-#
-# CONFIG_MMC_DEBUG is not set
-CONFIG_MMC_SDHCI=m
-CONFIG_MMC_SDHCI_IO_ACCESSORS=y
-CONFIG_MMC_SDHCI_PCI=m
-CONFIG_MMC_RICOH_MMC=y
-CONFIG_MMC_SDHCI_ACPI=m
-CONFIG_MMC_SDHCI_PLTFM=m
-CONFIG_MMC_SDHCI_OF_ARASAN=m
-CONFIG_MMC_SDHCI_OF_ASPEED=m
-CONFIG_MMC_SDHCI_OF_AT91=m
-CONFIG_MMC_SDHCI_OF_DWCMSHC=m
-CONFIG_MMC_SDHCI_CADENCE=m
-CONFIG_MMC_SDHCI_F_SDH30=m
-CONFIG_MMC_SDHCI_MILBEAUT=m
-CONFIG_MMC_WBSD=m
-CONFIG_MMC_ALCOR=m
-CONFIG_MMC_TIFM_SD=m
-CONFIG_MMC_SPI=m
-CONFIG_MMC_SDRICOH_CS=m
-CONFIG_MMC_CB710=m
-CONFIG_MMC_VIA_SDMMC=m
-CONFIG_MMC_VUB300=m
-CONFIG_MMC_USHC=m
-CONFIG_MMC_USDHI6ROL0=m
-CONFIG_MMC_REALTEK_PCI=m
-CONFIG_MMC_REALTEK_USB=m
-CONFIG_MMC_CQHCI=m
-CONFIG_MMC_HSQ=m
-CONFIG_MMC_TOSHIBA_PCI=m
-CONFIG_MMC_MTK=m
-CONFIG_MMC_SDHCI_XENON=m
-CONFIG_MMC_SDHCI_OMAP=m
-CONFIG_MMC_SDHCI_AM654=m
-CONFIG_MMC_SDHCI_EXTERNAL_DMA=y
-CONFIG_MEMSTICK=m
-# CONFIG_MEMSTICK_DEBUG is not set
-
-#
-# MemoryStick drivers
-#
-# CONFIG_MEMSTICK_UNSAFE_RESUME is not set
-CONFIG_MSPRO_BLOCK=m
-CONFIG_MS_BLOCK=m
-
-#
-# MemoryStick Host Controller Drivers
-#
-CONFIG_MEMSTICK_TIFM_MS=m
-CONFIG_MEMSTICK_JMICRON_38X=m
-CONFIG_MEMSTICK_R592=m
-CONFIG_MEMSTICK_REALTEK_PCI=m
-CONFIG_MEMSTICK_REALTEK_USB=m
-CONFIG_NEW_LEDS=y
-CONFIG_LEDS_CLASS=y
-CONFIG_LEDS_CLASS_FLASH=m
-CONFIG_LEDS_BRIGHTNESS_HW_CHANGED=y
-
-#
-# LED drivers
-#
-CONFIG_LEDS_88PM860X=m
-CONFIG_LEDS_AAT1290=m
-CONFIG_LEDS_AN30259A=m
-CONFIG_LEDS_APU=m
-CONFIG_LEDS_AS3645A=m
-CONFIG_LEDS_AW2013=m
-CONFIG_LEDS_BCM6328=m
-CONFIG_LEDS_BCM6358=m
-CONFIG_LEDS_CPCAP=m
-CONFIG_LEDS_CR0014114=m
-CONFIG_LEDS_EL15203000=m
-CONFIG_LEDS_LM3530=m
-CONFIG_LEDS_LM3532=m
-CONFIG_LEDS_LM3533=m
-CONFIG_LEDS_LM3642=m
-CONFIG_LEDS_LM3692X=m
-CONFIG_LEDS_LM3601X=m
-CONFIG_LEDS_MT6323=m
-CONFIG_LEDS_PCA9532=m
-CONFIG_LEDS_PCA9532_GPIO=y
-CONFIG_LEDS_GPIO=m
-CONFIG_LEDS_LP3944=m
-CONFIG_LEDS_LP3952=m
-# CONFIG_LEDS_LP5521 is not set
-# CONFIG_LEDS_LP5523 is not set
-# CONFIG_LEDS_LP5562 is not set
-# CONFIG_LEDS_LP8501 is not set
-CONFIG_LEDS_LP8788=m
-CONFIG_LEDS_LP8860=m
-CONFIG_LEDS_CLEVO_MAIL=m
-CONFIG_LEDS_PCA955X=m
-CONFIG_LEDS_PCA955X_GPIO=y
-CONFIG_LEDS_PCA963X=m
-CONFIG_LEDS_WM831X_STATUS=m
-CONFIG_LEDS_WM8350=m
-CONFIG_LEDS_DA903X=m
-CONFIG_LEDS_DA9052=m
-CONFIG_LEDS_DAC124S085=m
-CONFIG_LEDS_PWM=m
-CONFIG_LEDS_REGULATOR=m
-CONFIG_LEDS_BD2802=m
-CONFIG_LEDS_INTEL_SS4200=m
-CONFIG_LEDS_LT3593=m
-CONFIG_LEDS_ADP5520=m
-CONFIG_LEDS_MC13783=m
-CONFIG_LEDS_TCA6507=m
-CONFIG_LEDS_TLC591XX=m
-CONFIG_LEDS_MAX77650=m
-CONFIG_LEDS_MAX77693=m
-CONFIG_LEDS_MAX8997=m
-CONFIG_LEDS_LM355x=m
-CONFIG_LEDS_MENF21BMC=m
-CONFIG_LEDS_KTD2692=m
-CONFIG_LEDS_IS31FL319X=m
-CONFIG_LEDS_IS31FL32XX=m
-
-#
-# LED driver for blink(1) USB RGB LED is under Special HID drivers (HID_THINGM)
-#
-CONFIG_LEDS_BLINKM=m
-CONFIG_LEDS_SYSCON=y
-CONFIG_LEDS_MLXCPLD=m
-CONFIG_LEDS_MLXREG=m
-CONFIG_LEDS_USER=m
-CONFIG_LEDS_NIC78BX=m
-CONFIG_LEDS_SPI_BYTE=m
-CONFIG_LEDS_TI_LMU_COMMON=m
-CONFIG_LEDS_LM3697=m
-CONFIG_LEDS_LM36274=m
-CONFIG_LEDS_TPS6105X=m
-CONFIG_LEDS_SGM3140=m
-
-#
-# LED Triggers
-#
-CONFIG_LEDS_TRIGGERS=y
-CONFIG_LEDS_TRIGGER_TIMER=m
-CONFIG_LEDS_TRIGGER_ONESHOT=m
-CONFIG_LEDS_TRIGGER_DISK=y
-CONFIG_LEDS_TRIGGER_MTD=y
-CONFIG_LEDS_TRIGGER_HEARTBEAT=m
-CONFIG_LEDS_TRIGGER_BACKLIGHT=m
-CONFIG_LEDS_TRIGGER_CPU=y
-CONFIG_LEDS_TRIGGER_ACTIVITY=m
-CONFIG_LEDS_TRIGGER_GPIO=m
-CONFIG_LEDS_TRIGGER_DEFAULT_ON=m
-
-#
-# iptables trigger is under Netfilter config (LED target)
-#
-CONFIG_LEDS_TRIGGER_TRANSIENT=m
-CONFIG_LEDS_TRIGGER_CAMERA=m
-CONFIG_LEDS_TRIGGER_PANIC=y
-CONFIG_LEDS_TRIGGER_NETDEV=m
-CONFIG_LEDS_TRIGGER_PATTERN=m
-CONFIG_LEDS_TRIGGER_AUDIO=m
-CONFIG_ACCESSIBILITY=y
-CONFIG_A11Y_BRAILLE_CONSOLE=y
-CONFIG_INFINIBAND=m
-CONFIG_INFINIBAND_USER_MAD=m
-CONFIG_INFINIBAND_USER_ACCESS=m
-# CONFIG_INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI is not set
-CONFIG_INFINIBAND_USER_MEM=y
-CONFIG_INFINIBAND_ON_DEMAND_PAGING=y
-CONFIG_INFINIBAND_ADDR_TRANS=y
-CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS=y
-CONFIG_INFINIBAND_MTHCA=m
-CONFIG_INFINIBAND_MTHCA_DEBUG=y
-CONFIG_INFINIBAND_QIB=m
-CONFIG_INFINIBAND_QIB_DCA=y
-CONFIG_INFINIBAND_CXGB4=m
-CONFIG_INFINIBAND_EFA=m
-CONFIG_INFINIBAND_I40IW=m
-CONFIG_MLX4_INFINIBAND=m
-CONFIG_MLX5_INFINIBAND=m
-CONFIG_INFINIBAND_OCRDMA=m
-CONFIG_INFINIBAND_VMWARE_PVRDMA=m
-CONFIG_INFINIBAND_USNIC=m
-CONFIG_INFINIBAND_BNXT_RE=m
-CONFIG_INFINIBAND_HFI1=m
-# CONFIG_HFI1_DEBUG_SDMA_ORDER is not set
-# CONFIG_SDMA_VERBOSITY is not set
-CONFIG_INFINIBAND_QEDR=m
-CONFIG_INFINIBAND_RDMAVT=m
-CONFIG_RDMA_RXE=m
-CONFIG_RDMA_SIW=m
-CONFIG_INFINIBAND_IPOIB=m
-CONFIG_INFINIBAND_IPOIB_CM=y
-CONFIG_INFINIBAND_IPOIB_DEBUG=y
-# CONFIG_INFINIBAND_IPOIB_DEBUG_DATA is not set
-CONFIG_INFINIBAND_SRP=m
-CONFIG_INFINIBAND_SRPT=m
-CONFIG_INFINIBAND_ISER=m
-CONFIG_INFINIBAND_ISERT=m
-CONFIG_INFINIBAND_RTRS=m
-CONFIG_INFINIBAND_RTRS_CLIENT=m
-CONFIG_INFINIBAND_RTRS_SERVER=m
-CONFIG_INFINIBAND_OPA_VNIC=m
-CONFIG_EDAC_ATOMIC_SCRUB=y
-CONFIG_EDAC_SUPPORT=y
-CONFIG_EDAC=y
-CONFIG_EDAC_LEGACY_SYSFS=y
-# CONFIG_EDAC_DEBUG is not set
-CONFIG_EDAC_DECODE_MCE=m
-CONFIG_EDAC_GHES=y
-CONFIG_EDAC_AMD64=m
-# CONFIG_EDAC_AMD64_ERROR_INJECTION is not set
-CONFIG_EDAC_E752X=m
-CONFIG_EDAC_I82975X=m
-CONFIG_EDAC_I3000=m
-CONFIG_EDAC_I3200=m
-CONFIG_EDAC_IE31200=m
-CONFIG_EDAC_X38=m
-CONFIG_EDAC_I5400=m
-CONFIG_EDAC_I7CORE=m
-CONFIG_EDAC_I5000=m
-CONFIG_EDAC_I5100=m
-CONFIG_EDAC_I7300=m
-CONFIG_EDAC_SBRIDGE=m
-CONFIG_EDAC_SKX=m
-CONFIG_EDAC_I10NM=m
-CONFIG_EDAC_PND2=m
-CONFIG_RTC_LIB=y
-CONFIG_RTC_MC146818_LIB=y
-CONFIG_RTC_CLASS=y
-CONFIG_RTC_HCTOSYS=y
-CONFIG_RTC_HCTOSYS_DEVICE="rtc0"
-CONFIG_RTC_SYSTOHC=y
-CONFIG_RTC_SYSTOHC_DEVICE="rtc0"
-# CONFIG_RTC_DEBUG is not set
-CONFIG_RTC_NVMEM=y
-
-#
-# RTC interfaces
-#
-CONFIG_RTC_INTF_SYSFS=y
-CONFIG_RTC_INTF_PROC=y
-CONFIG_RTC_INTF_DEV=y
-CONFIG_RTC_INTF_DEV_UIE_EMUL=y
-# CONFIG_RTC_DRV_TEST is not set
-
-#
-# I2C RTC drivers
-#
-CONFIG_RTC_DRV_88PM860X=m
-CONFIG_RTC_DRV_88PM80X=m
-CONFIG_RTC_DRV_ABB5ZES3=m
-CONFIG_RTC_DRV_ABEOZ9=m
-CONFIG_RTC_DRV_ABX80X=m
-CONFIG_RTC_DRV_AS3722=m
-CONFIG_RTC_DRV_DS1307=m
-CONFIG_RTC_DRV_DS1307_CENTURY=y
-CONFIG_RTC_DRV_DS1374=m
-CONFIG_RTC_DRV_DS1374_WDT=y
-CONFIG_RTC_DRV_DS1672=m
-CONFIG_RTC_DRV_HYM8563=m
-CONFIG_RTC_DRV_LP8788=m
-CONFIG_RTC_DRV_MAX6900=m
-CONFIG_RTC_DRV_MAX8907=m
-CONFIG_RTC_DRV_MAX8925=m
-CONFIG_RTC_DRV_MAX8998=m
-CONFIG_RTC_DRV_MAX8997=m
-CONFIG_RTC_DRV_MAX77686=m
-CONFIG_RTC_DRV_RK808=m
-CONFIG_RTC_DRV_RS5C372=m
-CONFIG_RTC_DRV_ISL1208=m
-CONFIG_RTC_DRV_ISL12022=m
-CONFIG_RTC_DRV_ISL12026=m
-CONFIG_RTC_DRV_X1205=m
-CONFIG_RTC_DRV_PCF8523=m
-CONFIG_RTC_DRV_PCF85063=m
-CONFIG_RTC_DRV_PCF85363=m
-CONFIG_RTC_DRV_PCF8563=m
-CONFIG_RTC_DRV_PCF8583=m
-CONFIG_RTC_DRV_M41T80=m
-CONFIG_RTC_DRV_M41T80_WDT=y
-CONFIG_RTC_DRV_BD70528=m
-CONFIG_RTC_DRV_BQ32K=m
-CONFIG_RTC_DRV_TWL4030=m
-CONFIG_RTC_DRV_PALMAS=m
-CONFIG_RTC_DRV_TPS6586X=m
-CONFIG_RTC_DRV_TPS65910=m
-CONFIG_RTC_DRV_TPS80031=m
-CONFIG_RTC_DRV_RC5T583=m
-CONFIG_RTC_DRV_RC5T619=m
-CONFIG_RTC_DRV_S35390A=m
-CONFIG_RTC_DRV_FM3130=m
-CONFIG_RTC_DRV_RX8010=m
-CONFIG_RTC_DRV_RX8581=m
-CONFIG_RTC_DRV_RX8025=m
-CONFIG_RTC_DRV_EM3027=m
-CONFIG_RTC_DRV_RV3028=m
-CONFIG_RTC_DRV_RV8803=m
-CONFIG_RTC_DRV_S5M=m
-CONFIG_RTC_DRV_SD3078=m
-
-#
-# SPI RTC drivers
-#
-CONFIG_RTC_DRV_M41T93=m
-CONFIG_RTC_DRV_M41T94=m
-CONFIG_RTC_DRV_DS1302=m
-CONFIG_RTC_DRV_DS1305=m
-CONFIG_RTC_DRV_DS1343=m
-CONFIG_RTC_DRV_DS1347=m
-CONFIG_RTC_DRV_DS1390=m
-CONFIG_RTC_DRV_MAX6916=m
-CONFIG_RTC_DRV_R9701=m
-CONFIG_RTC_DRV_RX4581=m
-CONFIG_RTC_DRV_RX6110=m
-CONFIG_RTC_DRV_RS5C348=m
-CONFIG_RTC_DRV_MAX6902=m
-CONFIG_RTC_DRV_PCF2123=m
-CONFIG_RTC_DRV_MCP795=m
-CONFIG_RTC_I2C_AND_SPI=y
-
-#
-# SPI and I2C RTC drivers
-#
-CONFIG_RTC_DRV_DS3232=m
-CONFIG_RTC_DRV_DS3232_HWMON=y
-CONFIG_RTC_DRV_PCF2127=m
-CONFIG_RTC_DRV_RV3029C2=m
-CONFIG_RTC_DRV_RV3029_HWMON=y
-
-#
-# Platform RTC drivers
-#
-CONFIG_RTC_DRV_CMOS=y
-CONFIG_RTC_DRV_DS1286=m
-CONFIG_RTC_DRV_DS1511=m
-CONFIG_RTC_DRV_DS1553=m
-CONFIG_RTC_DRV_DS1685_FAMILY=m
-CONFIG_RTC_DRV_DS1685=y
-# CONFIG_RTC_DRV_DS1689 is not set
-# CONFIG_RTC_DRV_DS17285 is not set
-# CONFIG_RTC_DRV_DS17485 is not set
-# CONFIG_RTC_DRV_DS17885 is not set
-CONFIG_RTC_DRV_DS1742=m
-CONFIG_RTC_DRV_DS2404=m
-CONFIG_RTC_DRV_DA9052=m
-CONFIG_RTC_DRV_DA9055=m
-CONFIG_RTC_DRV_DA9063=m
-CONFIG_RTC_DRV_STK17TA8=m
-CONFIG_RTC_DRV_M48T86=m
-CONFIG_RTC_DRV_M48T35=m
-CONFIG_RTC_DRV_M48T59=m
-CONFIG_RTC_DRV_MSM6242=m
-CONFIG_RTC_DRV_BQ4802=m
-CONFIG_RTC_DRV_RP5C01=m
-CONFIG_RTC_DRV_V3020=m
-CONFIG_RTC_DRV_WM831X=m
-CONFIG_RTC_DRV_WM8350=m
-CONFIG_RTC_DRV_PCF50633=m
-CONFIG_RTC_DRV_AB3100=m
-CONFIG_RTC_DRV_ZYNQMP=m
-CONFIG_RTC_DRV_CROS_EC=m
-
-#
-# on-CPU RTC drivers
-#
-CONFIG_RTC_DRV_CADENCE=m
-CONFIG_RTC_DRV_FTRTC010=m
-CONFIG_RTC_DRV_PCAP=m
-CONFIG_RTC_DRV_MC13XXX=m
-CONFIG_RTC_DRV_MT6397=m
-CONFIG_RTC_DRV_R7301=m
-CONFIG_RTC_DRV_CPCAP=m
-
-#
-# HID Sensor RTC drivers
-#
-CONFIG_RTC_DRV_HID_SENSOR_TIME=m
-CONFIG_RTC_DRV_WILCO_EC=m
-CONFIG_DMADEVICES=y
-# CONFIG_DMADEVICES_DEBUG is not set
-
-#
-# DMA Devices
-#
-CONFIG_DMA_ENGINE=y
-CONFIG_DMA_VIRTUAL_CHANNELS=y
-CONFIG_DMA_ACPI=y
-CONFIG_DMA_OF=y
-CONFIG_ALTERA_MSGDMA=m
-CONFIG_DW_AXI_DMAC=m
-CONFIG_FSL_EDMA=m
-CONFIG_INTEL_IDMA64=m
-CONFIG_INTEL_IDXD=m
-CONFIG_INTEL_IOATDMA=m
-CONFIG_INTEL_MIC_X100_DMA=m
-CONFIG_PLX_DMA=m
-CONFIG_QCOM_HIDMA_MGMT=m
-CONFIG_QCOM_HIDMA=m
-CONFIG_DW_DMAC_CORE=y
-CONFIG_DW_DMAC=y
-CONFIG_DW_DMAC_PCI=y
-CONFIG_DW_EDMA=m
-CONFIG_DW_EDMA_PCIE=m
-CONFIG_HSU_DMA=y
-CONFIG_SF_PDMA=m
-
-#
-# DMA Clients
-#
-CONFIG_ASYNC_TX_DMA=y
-# CONFIG_DMATEST is not set
-CONFIG_DMA_ENGINE_RAID=y
-
-#
-# DMABUF options
-#
-CONFIG_SYNC_FILE=y
-# CONFIG_SW_SYNC is not set
-CONFIG_UDMABUF=y
-# CONFIG_DMABUF_MOVE_NOTIFY is not set
-# CONFIG_DMABUF_SELFTESTS is not set
-CONFIG_DMABUF_HEAPS=y
-CONFIG_DMABUF_HEAPS_SYSTEM=y
-# end of DMABUF options
-
-CONFIG_DCA=m
-CONFIG_AUXDISPLAY=y
-CONFIG_HD44780=m
-CONFIG_KS0108=m
-CONFIG_KS0108_PORT=0x378
-CONFIG_KS0108_DELAY=2
-CONFIG_CFAG12864B=m
-CONFIG_CFAG12864B_RATE=20
-CONFIG_IMG_ASCII_LCD=m
-CONFIG_HT16K33=m
-CONFIG_PARPORT_PANEL=m
-CONFIG_PANEL_PARPORT=0
-CONFIG_PANEL_PROFILE=5
-# CONFIG_PANEL_CHANGE_MESSAGE is not set
-# CONFIG_CHARLCD_BL_OFF is not set
-# CONFIG_CHARLCD_BL_ON is not set
-CONFIG_CHARLCD_BL_FLASH=y
-CONFIG_PANEL=m
-CONFIG_CHARLCD=m
-CONFIG_UIO=m
-CONFIG_UIO_CIF=m
-CONFIG_UIO_PDRV_GENIRQ=m
-CONFIG_UIO_DMEM_GENIRQ=m
-CONFIG_UIO_AEC=m
-CONFIG_UIO_SERCOS3=m
-CONFIG_UIO_PCI_GENERIC=m
-CONFIG_UIO_NETX=m
-CONFIG_UIO_PRUSS=m
-CONFIG_UIO_MF624=m
-CONFIG_UIO_HV_GENERIC=m
-CONFIG_VFIO_IOMMU_TYPE1=m
-CONFIG_VFIO_VIRQFD=m
-CONFIG_VFIO=m
-# CONFIG_VFIO_NOIOMMU is not set
-CONFIG_VFIO_PCI=m
-CONFIG_VFIO_PCI_VGA=y
-CONFIG_VFIO_PCI_MMAP=y
-CONFIG_VFIO_PCI_INTX=y
-CONFIG_VFIO_PCI_IGD=y
-CONFIG_VFIO_MDEV=m
-CONFIG_VFIO_MDEV_DEVICE=m
-CONFIG_IRQ_BYPASS_MANAGER=m
-CONFIG_VIRT_DRIVERS=y
-CONFIG_VBOXGUEST=m
-CONFIG_VIRTIO=y
-CONFIG_VIRTIO_MENU=y
-CONFIG_VIRTIO_PCI=m
-CONFIG_VIRTIO_PCI_LEGACY=y
-CONFIG_VIRTIO_VDPA=m
-CONFIG_VIRTIO_PMEM=m
-CONFIG_VIRTIO_BALLOON=m
-CONFIG_VIRTIO_MEM=m
-CONFIG_VIRTIO_INPUT=m
-CONFIG_VIRTIO_MMIO=m
-CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y
-CONFIG_VDPA=m
-CONFIG_VDPA_SIM=m
-CONFIG_IFCVF=m
-CONFIG_VHOST_IOTLB=m
-CONFIG_VHOST_RING=m
-CONFIG_VHOST=m
-CONFIG_VHOST_MENU=y
-CONFIG_VHOST_NET=m
-CONFIG_VHOST_SCSI=m
-CONFIG_VHOST_VSOCK=m
-CONFIG_VHOST_VDPA=m
-# CONFIG_VHOST_CROSS_ENDIAN_LEGACY is not set
-
-#
-# Microsoft Hyper-V guest support
-#
-CONFIG_HYPERV=m
-CONFIG_HYPERV_TIMER=y
-CONFIG_HYPERV_UTILS=m
-CONFIG_HYPERV_BALLOON=m
-# end of Microsoft Hyper-V guest support
-
-#
-# Xen driver support
-#
-CONFIG_XEN_BALLOON=y
-CONFIG_XEN_BALLOON_MEMORY_HOTPLUG=y
-CONFIG_XEN_BALLOON_MEMORY_HOTPLUG_LIMIT=512
-CONFIG_XEN_SCRUB_PAGES_DEFAULT=y
-CONFIG_XEN_DEV_EVTCHN=m
-CONFIG_XEN_BACKEND=y
-CONFIG_XENFS=m
-CONFIG_XEN_COMPAT_XENFS=y
-CONFIG_XEN_SYS_HYPERVISOR=y
-CONFIG_XEN_XENBUS_FRONTEND=y
-CONFIG_XEN_GNTDEV=m
-CONFIG_XEN_GNTDEV_DMABUF=y
-CONFIG_XEN_GRANT_DEV_ALLOC=m
-CONFIG_XEN_GRANT_DMA_ALLOC=y
-CONFIG_SWIOTLB_XEN=y
-CONFIG_XEN_PCIDEV_BACKEND=m
-CONFIG_XEN_PVCALLS_FRONTEND=m
-CONFIG_XEN_PVCALLS_BACKEND=y
-CONFIG_XEN_SCSI_BACKEND=m
-CONFIG_XEN_PRIVCMD=m
-CONFIG_XEN_ACPI_PROCESSOR=m
-CONFIG_XEN_MCE_LOG=y
-CONFIG_XEN_HAVE_PVMMU=y
-CONFIG_XEN_EFI=y
-CONFIG_XEN_AUTO_XLATE=y
-CONFIG_XEN_ACPI=y
-CONFIG_XEN_SYMS=y
-CONFIG_XEN_HAVE_VPMU=y
-CONFIG_XEN_FRONT_PGDIR_SHBUF=m
-# end of Xen driver support
-
-# CONFIG_GREYBUS is not set
-CONFIG_STAGING=y
-CONFIG_PRISM2_USB=m
-CONFIG_COMEDI=m
-# CONFIG_COMEDI_DEBUG is not set
-CONFIG_COMEDI_DEFAULT_BUF_SIZE_KB=2048
-CONFIG_COMEDI_DEFAULT_BUF_MAXSIZE_KB=20480
-CONFIG_COMEDI_MISC_DRIVERS=y
-CONFIG_COMEDI_BOND=m
-CONFIG_COMEDI_TEST=m
-CONFIG_COMEDI_PARPORT=m
-# CONFIG_COMEDI_ISA_DRIVERS is not set
-CONFIG_COMEDI_PCI_DRIVERS=m
-CONFIG_COMEDI_8255_PCI=m
-CONFIG_COMEDI_ADDI_WATCHDOG=m
-CONFIG_COMEDI_ADDI_APCI_1032=m
-CONFIG_COMEDI_ADDI_APCI_1500=m
-CONFIG_COMEDI_ADDI_APCI_1516=m
-CONFIG_COMEDI_ADDI_APCI_1564=m
-CONFIG_COMEDI_ADDI_APCI_16XX=m
-CONFIG_COMEDI_ADDI_APCI_2032=m
-CONFIG_COMEDI_ADDI_APCI_2200=m
-CONFIG_COMEDI_ADDI_APCI_3120=m
-CONFIG_COMEDI_ADDI_APCI_3501=m
-CONFIG_COMEDI_ADDI_APCI_3XXX=m
-CONFIG_COMEDI_ADL_PCI6208=m
-CONFIG_COMEDI_ADL_PCI7X3X=m
-CONFIG_COMEDI_ADL_PCI8164=m
-CONFIG_COMEDI_ADL_PCI9111=m
-CONFIG_COMEDI_ADL_PCI9118=m
-CONFIG_COMEDI_ADV_PCI1710=m
-CONFIG_COMEDI_ADV_PCI1720=m
-CONFIG_COMEDI_ADV_PCI1723=m
-CONFIG_COMEDI_ADV_PCI1724=m
-CONFIG_COMEDI_ADV_PCI1760=m
-CONFIG_COMEDI_ADV_PCI_DIO=m
-CONFIG_COMEDI_AMPLC_DIO200_PCI=m
-CONFIG_COMEDI_AMPLC_PC236_PCI=m
-CONFIG_COMEDI_AMPLC_PC263_PCI=m
-CONFIG_COMEDI_AMPLC_PCI224=m
-CONFIG_COMEDI_AMPLC_PCI230=m
-CONFIG_COMEDI_CONTEC_PCI_DIO=m
-CONFIG_COMEDI_DAS08_PCI=m
-CONFIG_COMEDI_DT3000=m
-CONFIG_COMEDI_DYNA_PCI10XX=m
-CONFIG_COMEDI_GSC_HPDI=m
-CONFIG_COMEDI_MF6X4=m
-CONFIG_COMEDI_ICP_MULTI=m
-CONFIG_COMEDI_DAQBOARD2000=m
-CONFIG_COMEDI_JR3_PCI=m
-CONFIG_COMEDI_KE_COUNTER=m
-CONFIG_COMEDI_CB_PCIDAS64=m
-CONFIG_COMEDI_CB_PCIDAS=m
-CONFIG_COMEDI_CB_PCIDDA=m
-CONFIG_COMEDI_CB_PCIMDAS=m
-CONFIG_COMEDI_CB_PCIMDDA=m
-CONFIG_COMEDI_ME4000=m
-CONFIG_COMEDI_ME_DAQ=m
-CONFIG_COMEDI_NI_6527=m
-CONFIG_COMEDI_NI_65XX=m
-CONFIG_COMEDI_NI_660X=m
-CONFIG_COMEDI_NI_670X=m
-CONFIG_COMEDI_NI_LABPC_PCI=m
-CONFIG_COMEDI_NI_PCIDIO=m
-CONFIG_COMEDI_NI_PCIMIO=m
-CONFIG_COMEDI_RTD520=m
-CONFIG_COMEDI_S626=m
-CONFIG_COMEDI_MITE=m
-CONFIG_COMEDI_NI_TIOCMD=m
-CONFIG_COMEDI_PCMCIA_DRIVERS=m
-CONFIG_COMEDI_CB_DAS16_CS=m
-CONFIG_COMEDI_DAS08_CS=m
-CONFIG_COMEDI_NI_DAQ_700_CS=m
-CONFIG_COMEDI_NI_DAQ_DIO24_CS=m
-CONFIG_COMEDI_NI_LABPC_CS=m
-CONFIG_COMEDI_NI_MIO_CS=m
-CONFIG_COMEDI_QUATECH_DAQP_CS=m
-CONFIG_COMEDI_USB_DRIVERS=m
-CONFIG_COMEDI_DT9812=m
-CONFIG_COMEDI_NI_USB6501=m
-CONFIG_COMEDI_USBDUX=m
-CONFIG_COMEDI_USBDUXFAST=m
-CONFIG_COMEDI_USBDUXSIGMA=m
-CONFIG_COMEDI_VMK80XX=m
-CONFIG_COMEDI_8254=m
-CONFIG_COMEDI_8255=m
-CONFIG_COMEDI_8255_SA=m
-CONFIG_COMEDI_KCOMEDILIB=m
-CONFIG_COMEDI_AMPLC_DIO200=m
-CONFIG_COMEDI_AMPLC_PC236=m
-CONFIG_COMEDI_DAS08=m
-CONFIG_COMEDI_NI_LABPC=m
-CONFIG_COMEDI_NI_TIO=m
-CONFIG_COMEDI_NI_ROUTING=m
-CONFIG_RTL8192U=m
-CONFIG_RTLLIB=m
-CONFIG_RTLLIB_CRYPTO_CCMP=m
-CONFIG_RTLLIB_CRYPTO_TKIP=m
-CONFIG_RTLLIB_CRYPTO_WEP=m
-CONFIG_RTL8192E=m
-CONFIG_RTL8723BS=m
-CONFIG_R8712U=m
-CONFIG_R8188EU=m
-CONFIG_88EU_AP_MODE=y
-CONFIG_RTS5208=m
-CONFIG_VT6655=m
-CONFIG_VT6656=m
-
-#
-# IIO staging drivers
-#
-
-#
-# Accelerometers
-#
-CONFIG_ADIS16203=m
-CONFIG_ADIS16240=m
-# end of Accelerometers
-
-#
-# Analog to digital converters
-#
-CONFIG_AD7816=m
-CONFIG_AD7280=m
-# end of Analog to digital converters
-
-#
-# Analog digital bi-direction converters
-#
-CONFIG_ADT7316=m
-CONFIG_ADT7316_SPI=m
-CONFIG_ADT7316_I2C=m
-# end of Analog digital bi-direction converters
-
-#
-# Capacitance to digital converters
-#
-CONFIG_AD7150=m
-CONFIG_AD7746=m
-# end of Capacitance to digital converters
-
-#
-# Direct Digital Synthesis
-#
-CONFIG_AD9832=m
-CONFIG_AD9834=m
-# end of Direct Digital Synthesis
-
-#
-# Network Analyzer, Impedance Converters
-#
-CONFIG_AD5933=m
-# end of Network Analyzer, Impedance Converters
-
-#
-# Active energy metering IC
-#
-CONFIG_ADE7854=m
-CONFIG_ADE7854_I2C=m
-CONFIG_ADE7854_SPI=m
-# end of Active energy metering IC
-
-#
-# Resolver to digital converters
-#
-CONFIG_AD2S1210=m
-# end of Resolver to digital converters
-# end of IIO staging drivers
-
-# CONFIG_FB_SM750 is not set
-
-#
-# Speakup console speech
-#
-CONFIG_SPEAKUP=m
-CONFIG_SPEAKUP_SYNTH_ACNTSA=m
-CONFIG_SPEAKUP_SYNTH_APOLLO=m
-CONFIG_SPEAKUP_SYNTH_AUDPTR=m
-CONFIG_SPEAKUP_SYNTH_BNS=m
-CONFIG_SPEAKUP_SYNTH_DECTLK=m
-CONFIG_SPEAKUP_SYNTH_DECEXT=m
-CONFIG_SPEAKUP_SYNTH_LTLK=m
-CONFIG_SPEAKUP_SYNTH_SOFT=m
-CONFIG_SPEAKUP_SYNTH_SPKOUT=m
-CONFIG_SPEAKUP_SYNTH_TXPRT=m
-CONFIG_SPEAKUP_SYNTH_DUMMY=m
-# end of Speakup console speech
-
-CONFIG_STAGING_MEDIA=y
-CONFIG_INTEL_ATOMISP=y
-CONFIG_VIDEO_ATOMISP=m
-CONFIG_VIDEO_ATOMISP_ISP2401=y
-CONFIG_VIDEO_ATOMISP_OV5693=m
-CONFIG_VIDEO_ATOMISP_OV2722=m
-CONFIG_VIDEO_ATOMISP_GC2235=m
-CONFIG_VIDEO_ATOMISP_MSRLIST_HELPER=m
-CONFIG_VIDEO_ATOMISP_MT9M114=m
-CONFIG_VIDEO_ATOMISP_GC0310=m
-CONFIG_VIDEO_ATOMISP_OV2680=m
-CONFIG_VIDEO_ATOMISP_LM3554=m
-CONFIG_VIDEO_IPU3_IMGU=m
-
-#
-# soc_camera sensor drivers
-#
-CONFIG_VIDEO_USBVISION=m
-
-#
-# Android
-#
-# end of Android
-
-CONFIG_STAGING_BOARD=y
-CONFIG_LTE_GDM724X=m
-CONFIG_FIREWIRE_SERIAL=m
-CONFIG_FWTTY_MAX_TOTAL_PORTS=64
-CONFIG_FWTTY_MAX_CARD_PORTS=32
-CONFIG_GS_FPGABOOT=m
-CONFIG_UNISYSSPAR=y
-CONFIG_UNISYS_VISORNIC=m
-CONFIG_UNISYS_VISORINPUT=m
-CONFIG_UNISYS_VISORHBA=m
-CONFIG_COMMON_CLK_XLNX_CLKWZRD=m
-# CONFIG_FB_TFT is not set
-CONFIG_WILC1000=m
-CONFIG_WILC1000_SDIO=m
-CONFIG_WILC1000_SPI=m
-# CONFIG_WILC1000_HW_OOB_INTR is not set
-CONFIG_MOST_COMPONENTS=m
-CONFIG_MOST_CDEV=m
-CONFIG_MOST_NET=m
-CONFIG_MOST_SOUND=m
-CONFIG_MOST_VIDEO=m
-CONFIG_MOST_DIM2=m
-CONFIG_MOST_I2C=m
-CONFIG_MOST_USB=m
-CONFIG_KS7010=m
-CONFIG_PI433=m
-
-#
-# Gasket devices
-#
-CONFIG_STAGING_GASKET_FRAMEWORK=m
-CONFIG_STAGING_APEX_DRIVER=m
-# end of Gasket devices
-
-CONFIG_XIL_AXIS_FIFO=m
-CONFIG_FIELDBUS_DEV=m
-CONFIG_HMS_ANYBUSS_BUS=m
-CONFIG_ARCX_ANYBUS_CONTROLLER=m
-CONFIG_HMS_PROFINET=m
-CONFIG_KPC2000=y
-CONFIG_KPC2000_CORE=m
-CONFIG_KPC2000_SPI=m
-CONFIG_KPC2000_I2C=m
-CONFIG_KPC2000_DMA=m
-CONFIG_QLGE=m
-CONFIG_WFX=m
-CONFIG_X86_PLATFORM_DEVICES=y
-CONFIG_ACPI_WMI=m
-CONFIG_WMI_BMOF=m
-CONFIG_ALIENWARE_WMI=m
-CONFIG_HUAWEI_WMI=m
-CONFIG_INTEL_WMI_SBL_FW_UPDATE=m
-CONFIG_INTEL_WMI_THUNDERBOLT=m
-CONFIG_MXM_WMI=m
-CONFIG_PEAQ_WMI=m
-CONFIG_XIAOMI_WMI=m
-CONFIG_ACERHDF=m
-CONFIG_ACER_WIRELESS=m
-CONFIG_ACER_WMI=m
-CONFIG_APPLE_GMUX=m
-CONFIG_ASUS_LAPTOP=m
-CONFIG_ASUS_WIRELESS=m
-CONFIG_ASUS_WMI=m
-CONFIG_ASUS_NB_WMI=m
-CONFIG_EEEPC_LAPTOP=m
-CONFIG_EEEPC_WMI=m
-CONFIG_DCDBAS=m
-CONFIG_DELL_SMBIOS=m
-CONFIG_DELL_SMBIOS_WMI=y
-CONFIG_DELL_SMBIOS_SMM=y
-CONFIG_DELL_LAPTOP=m
-CONFIG_DELL_RBTN=m
-# CONFIG_DELL_RBU is not set
-CONFIG_DELL_SMO8800=m
-CONFIG_DELL_WMI=m
-CONFIG_DELL_WMI_DESCRIPTOR=m
-CONFIG_DELL_WMI_AIO=m
-CONFIG_DELL_WMI_LED=m
-CONFIG_AMILO_RFKILL=m
-CONFIG_FUJITSU_LAPTOP=m
-CONFIG_FUJITSU_TABLET=m
-CONFIG_GPD_POCKET_FAN=m
-CONFIG_HP_ACCEL=m
-CONFIG_HP_WIRELESS=m
-CONFIG_HP_WMI=m
-CONFIG_IBM_RTL=m
-CONFIG_IDEAPAD_LAPTOP=m
-CONFIG_SENSORS_HDAPS=m
-CONFIG_THINKPAD_ACPI=m
-CONFIG_THINKPAD_ACPI_ALSA_SUPPORT=y
-# CONFIG_THINKPAD_ACPI_DEBUGFACILITIES is not set
-# CONFIG_THINKPAD_ACPI_DEBUG is not set
-# CONFIG_THINKPAD_ACPI_UNSAFE_LEDS is not set
-CONFIG_THINKPAD_ACPI_VIDEO=y
-CONFIG_THINKPAD_ACPI_HOTKEY_POLL=y
-CONFIG_INTEL_CHT_INT33FE=m
-CONFIG_INTEL_HID_EVENT=m
-CONFIG_INTEL_INT0002_VGPIO=m
-CONFIG_INTEL_MENLOW=m
-CONFIG_INTEL_OAKTRAIL=m
-CONFIG_INTEL_VBTN=m
-CONFIG_SURFACE3_WMI=m
-CONFIG_SURFACE_3_BUTTON=m
-CONFIG_SURFACE_3_POWER_OPREGION=m
-CONFIG_SURFACE_PRO3_BUTTON=m
-CONFIG_MSI_LAPTOP=m
-CONFIG_MSI_WMI=m
-CONFIG_PCENGINES_APU2=m
-CONFIG_SAMSUNG_LAPTOP=m
-CONFIG_SAMSUNG_Q10=m
-CONFIG_ACPI_TOSHIBA=m
-CONFIG_TOSHIBA_BT_RFKILL=m
-CONFIG_TOSHIBA_HAPS=m
-CONFIG_TOSHIBA_WMI=m
-CONFIG_ACPI_CMPC=m
-CONFIG_COMPAL_LAPTOP=m
-CONFIG_LG_LAPTOP=m
-CONFIG_PANASONIC_LAPTOP=m
-CONFIG_SONY_LAPTOP=m
-CONFIG_SONYPI_COMPAT=y
-CONFIG_SYSTEM76_ACPI=m
-CONFIG_TOPSTAR_LAPTOP=m
-CONFIG_I2C_MULTI_INSTANTIATE=m
-CONFIG_MLX_PLATFORM=m
-CONFIG_TOUCHSCREEN_DMI=y
-CONFIG_INTEL_IPS=m
-CONFIG_INTEL_RST=m
-CONFIG_INTEL_SMARTCONNECT=m
-
-#
-# Intel Speed Select Technology interface support
-#
-CONFIG_INTEL_SPEED_SELECT_INTERFACE=m
-# end of Intel Speed Select Technology interface support
-
-CONFIG_INTEL_TURBO_MAX_3=y
-CONFIG_INTEL_UNCORE_FREQ_CONTROL=m
-CONFIG_INTEL_BXTWC_PMIC_TMU=m
-CONFIG_INTEL_CHTDC_TI_PWRBTN=m
-CONFIG_INTEL_MFLD_THERMAL=m
-CONFIG_INTEL_MID_POWER_BUTTON=m
-CONFIG_INTEL_MRFLD_PWRBTN=m
-CONFIG_INTEL_PMC_CORE=y
-CONFIG_INTEL_PUNIT_IPC=m
-CONFIG_INTEL_SCU_IPC=y
-CONFIG_INTEL_SCU=y
-CONFIG_INTEL_SCU_PCI=y
-CONFIG_INTEL_SCU_PLATFORM=m
-CONFIG_INTEL_SCU_IPC_UTIL=m
-CONFIG_INTEL_TELEMETRY=m
-CONFIG_PMC_ATOM=y
-CONFIG_MFD_CROS_EC=m
-CONFIG_CHROME_PLATFORMS=y
-CONFIG_CHROMEOS_LAPTOP=m
-CONFIG_CHROMEOS_PSTORE=m
-CONFIG_CHROMEOS_TBMC=m
-CONFIG_CROS_EC=m
-CONFIG_CROS_EC_I2C=m
-CONFIG_CROS_EC_RPMSG=m
-CONFIG_CROS_EC_ISHTP=m
-CONFIG_CROS_EC_SPI=m
-CONFIG_CROS_EC_LPC=m
-CONFIG_CROS_EC_PROTO=y
-CONFIG_CROS_KBD_LED_BACKLIGHT=m
-CONFIG_CROS_EC_CHARDEV=m
-CONFIG_CROS_EC_LIGHTBAR=m
-CONFIG_CROS_EC_VBC=m
-CONFIG_CROS_EC_DEBUGFS=m
-CONFIG_CROS_EC_SENSORHUB=m
-CONFIG_CROS_EC_SYSFS=m
-CONFIG_CROS_EC_TYPEC=m
-CONFIG_CROS_USBPD_LOGGER=m
-CONFIG_CROS_USBPD_NOTIFY=m
-CONFIG_WILCO_EC=m
-CONFIG_WILCO_EC_DEBUGFS=m
-CONFIG_WILCO_EC_EVENTS=m
-CONFIG_WILCO_EC_TELEMETRY=m
-CONFIG_MELLANOX_PLATFORM=y
-CONFIG_MLXREG_HOTPLUG=m
-CONFIG_MLXREG_IO=m
-CONFIG_HAVE_CLK=y
-CONFIG_CLKDEV_LOOKUP=y
-CONFIG_HAVE_CLK_PREPARE=y
-CONFIG_COMMON_CLK=y
-CONFIG_COMMON_CLK_WM831X=m
-CONFIG_CLK_HSDK=y
-CONFIG_COMMON_CLK_MAX77686=m
-CONFIG_COMMON_CLK_MAX9485=m
-CONFIG_COMMON_CLK_RK808=m
-CONFIG_COMMON_CLK_SI5341=m
-CONFIG_COMMON_CLK_SI5351=m
-CONFIG_COMMON_CLK_SI514=m
-CONFIG_COMMON_CLK_SI544=m
-CONFIG_COMMON_CLK_SI570=m
-CONFIG_COMMON_CLK_CDCE706=m
-CONFIG_COMMON_CLK_CDCE925=m
-CONFIG_COMMON_CLK_CS2000_CP=m
-CONFIG_COMMON_CLK_S2MPS11=m
-CONFIG_CLK_TWL6040=m
-CONFIG_COMMON_CLK_LOCHNAGAR=m
-CONFIG_COMMON_CLK_PALMAS=m
-CONFIG_COMMON_CLK_PWM=m
-CONFIG_COMMON_CLK_VC5=m
-CONFIG_COMMON_CLK_BD718XX=m
-CONFIG_COMMON_CLK_FIXED_MMIO=y
-CONFIG_CLK_LGM_CGU=y
-CONFIG_HWSPINLOCK=y
-
-#
-# Clock Source drivers
-#
-CONFIG_TIMER_OF=y
-CONFIG_TIMER_PROBE=y
-CONFIG_CLKEVT_I8253=y
-CONFIG_I8253_LOCK=y
-CONFIG_CLKBLD_I8253=y
-CONFIG_CLKSRC_MMIO=y
-CONFIG_MICROCHIP_PIT64B=y
-# end of Clock Source drivers
-
-CONFIG_MAILBOX=y
-CONFIG_PLATFORM_MHU=m
-CONFIG_PCC=y
-CONFIG_ALTERA_MBOX=m
-CONFIG_MAILBOX_TEST=m
-CONFIG_IOMMU_IOVA=y
-CONFIG_IOASID=y
-CONFIG_IOMMU_API=y
-CONFIG_IOMMU_SUPPORT=y
-
-#
-# Generic IOMMU Pagetable Support
-#
-# end of Generic IOMMU Pagetable Support
-
-# CONFIG_IOMMU_DEBUGFS is not set
-# CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set
-CONFIG_OF_IOMMU=y
-CONFIG_IOMMU_DMA=y
-CONFIG_AMD_IOMMU=y
-CONFIG_AMD_IOMMU_V2=y
-CONFIG_DMAR_TABLE=y
-CONFIG_INTEL_IOMMU=y
-CONFIG_INTEL_IOMMU_SVM=y
-# CONFIG_INTEL_IOMMU_DEFAULT_ON is not set
-CONFIG_INTEL_IOMMU_FLOPPY_WA=y
-# CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON is not set
-CONFIG_IRQ_REMAP=y
-CONFIG_HYPERV_IOMMU=y
-
-#
-# Remoteproc drivers
-#
-CONFIG_REMOTEPROC=y
-# end of Remoteproc drivers
-
-#
-# Rpmsg drivers
-#
-CONFIG_RPMSG=m
-CONFIG_RPMSG_CHAR=m
-CONFIG_RPMSG_QCOM_GLINK=m
-CONFIG_RPMSG_QCOM_GLINK_RPM=m
-CONFIG_RPMSG_VIRTIO=m
-# end of Rpmsg drivers
-
-CONFIG_SOUNDWIRE=m
-
-#
-# SoundWire Devices
-#
-CONFIG_SOUNDWIRE_CADENCE=m
-CONFIG_SOUNDWIRE_INTEL=m
-CONFIG_SOUNDWIRE_QCOM=m
-
-#
-# SOC (System On Chip) specific Drivers
-#
-
-#
-# Amlogic SoC drivers
-#
-# end of Amlogic SoC drivers
-
-#
-# Aspeed SoC drivers
-#
-# end of Aspeed SoC drivers
-
-#
-# Broadcom SoC drivers
-#
-# end of Broadcom SoC drivers
-
-#
-# NXP/Freescale QorIQ SoC drivers
-#
-# end of NXP/Freescale QorIQ SoC drivers
-
-#
-# i.MX SoC drivers
-#
-# end of i.MX SoC drivers
-
-#
-# Qualcomm SoC drivers
-#
-# end of Qualcomm SoC drivers
-
-CONFIG_SOC_TI=y
-
-#
-# Xilinx SoC drivers
-#
-CONFIG_XILINX_VCU=m
-# end of Xilinx SoC drivers
-# end of SOC (System On Chip) specific Drivers
-
-CONFIG_PM_DEVFREQ=y
-
-#
-# DEVFREQ Governors
-#
-CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=m
-CONFIG_DEVFREQ_GOV_PERFORMANCE=m
-CONFIG_DEVFREQ_GOV_POWERSAVE=m
-CONFIG_DEVFREQ_GOV_USERSPACE=m
-CONFIG_DEVFREQ_GOV_PASSIVE=m
-
-#
-# DEVFREQ Drivers
-#
-CONFIG_PM_DEVFREQ_EVENT=y
-CONFIG_EXTCON=y
-
-#
-# Extcon Device Drivers
-#
-CONFIG_EXTCON_ADC_JACK=m
-CONFIG_EXTCON_ARIZONA=m
-CONFIG_EXTCON_AXP288=m
-CONFIG_EXTCON_FSA9480=m
-CONFIG_EXTCON_GPIO=m
-CONFIG_EXTCON_INTEL_INT3496=m
-CONFIG_EXTCON_INTEL_CHT_WC=m
-CONFIG_EXTCON_INTEL_MRFLD=m
-CONFIG_EXTCON_MAX14577=m
-CONFIG_EXTCON_MAX3355=m
-CONFIG_EXTCON_MAX77693=m
-CONFIG_EXTCON_MAX77843=m
-CONFIG_EXTCON_MAX8997=m
-CONFIG_EXTCON_PALMAS=m
-CONFIG_EXTCON_PTN5150=m
-CONFIG_EXTCON_RT8973A=m
-CONFIG_EXTCON_SM5502=m
-CONFIG_EXTCON_USB_GPIO=m
-CONFIG_EXTCON_USBC_CROS_EC=m
-CONFIG_MEMORY=y
-CONFIG_IIO=m
-CONFIG_IIO_BUFFER=y
-CONFIG_IIO_BUFFER_CB=m
-CONFIG_IIO_BUFFER_DMA=m
-CONFIG_IIO_BUFFER_DMAENGINE=m
-CONFIG_IIO_BUFFER_HW_CONSUMER=m
-CONFIG_IIO_KFIFO_BUF=m
-CONFIG_IIO_TRIGGERED_BUFFER=m
-CONFIG_IIO_CONFIGFS=m
-CONFIG_IIO_TRIGGER=y
-CONFIG_IIO_CONSUMERS_PER_TRIGGER=2
-CONFIG_IIO_SW_DEVICE=m
-CONFIG_IIO_SW_TRIGGER=m
-CONFIG_IIO_TRIGGERED_EVENT=m
-
-#
-# Accelerometers
-#
-CONFIG_ADIS16201=m
-CONFIG_ADIS16209=m
-CONFIG_ADXL372=m
-CONFIG_ADXL372_SPI=m
-CONFIG_ADXL372_I2C=m
-CONFIG_BMA220=m
-CONFIG_BMA400=m
-CONFIG_BMA400_I2C=m
-CONFIG_BMC150_ACCEL=m
-CONFIG_BMC150_ACCEL_I2C=m
-CONFIG_BMC150_ACCEL_SPI=m
-CONFIG_DA280=m
-CONFIG_DA311=m
-CONFIG_DMARD06=m
-CONFIG_DMARD09=m
-CONFIG_DMARD10=m
-CONFIG_HID_SENSOR_ACCEL_3D=m
-CONFIG_IIO_CROS_EC_ACCEL_LEGACY=m
-CONFIG_IIO_ST_ACCEL_3AXIS=m
-CONFIG_IIO_ST_ACCEL_I2C_3AXIS=m
-CONFIG_IIO_ST_ACCEL_SPI_3AXIS=m
-CONFIG_KXSD9=m
-CONFIG_KXSD9_SPI=m
-CONFIG_KXSD9_I2C=m
-CONFIG_KXCJK1013=m
-CONFIG_MC3230=m
-CONFIG_MMA7455=m
-CONFIG_MMA7455_I2C=m
-CONFIG_MMA7455_SPI=m
-CONFIG_MMA7660=m
-CONFIG_MMA8452=m
-CONFIG_MMA9551_CORE=m
-CONFIG_MMA9551=m
-CONFIG_MMA9553=m
-CONFIG_MXC4005=m
-CONFIG_MXC6255=m
-CONFIG_SCA3000=m
-CONFIG_STK8312=m
-CONFIG_STK8BA50=m
-# end of Accelerometers
-
-#
-# Analog to digital converters
-#
-CONFIG_AD_SIGMA_DELTA=m
-CONFIG_AD7091R5=m
-CONFIG_AD7124=m
-CONFIG_AD7192=m
-CONFIG_AD7266=m
-CONFIG_AD7291=m
-CONFIG_AD7292=m
-CONFIG_AD7298=m
-CONFIG_AD7476=m
-CONFIG_AD7606=m
-CONFIG_AD7606_IFACE_PARALLEL=m
-CONFIG_AD7606_IFACE_SPI=m
-CONFIG_AD7766=m
-CONFIG_AD7768_1=m
-CONFIG_AD7780=m
-CONFIG_AD7791=m
-CONFIG_AD7793=m
-CONFIG_AD7887=m
-CONFIG_AD7923=m
-CONFIG_AD7949=m
-CONFIG_AD799X=m
-CONFIG_AD9467=m
-CONFIG_ADI_AXI_ADC=m
-CONFIG_AXP20X_ADC=m
-CONFIG_AXP288_ADC=m
-CONFIG_CC10001_ADC=m
-CONFIG_CPCAP_ADC=m
-CONFIG_DA9150_GPADC=m
-CONFIG_DLN2_ADC=m
-CONFIG_ENVELOPE_DETECTOR=m
-CONFIG_HI8435=m
-CONFIG_HX711=m
-CONFIG_INA2XX_ADC=m
-CONFIG_INTEL_MRFLD_ADC=m
-CONFIG_LP8788_ADC=m
-CONFIG_LTC2471=m
-CONFIG_LTC2485=m
-CONFIG_LTC2496=m
-CONFIG_LTC2497=m
-CONFIG_MAX1027=m
-CONFIG_MAX11100=m
-CONFIG_MAX1118=m
-CONFIG_MAX1241=m
-CONFIG_MAX1363=m
-CONFIG_MAX9611=m
-CONFIG_MCP320X=m
-CONFIG_MCP3422=m
-CONFIG_MCP3911=m
-CONFIG_MEN_Z188_ADC=m
-CONFIG_MP2629_ADC=m
-CONFIG_NAU7802=m
-CONFIG_PALMAS_GPADC=m
-CONFIG_QCOM_VADC_COMMON=m
-CONFIG_QCOM_SPMI_IADC=m
-CONFIG_QCOM_SPMI_VADC=m
-CONFIG_QCOM_SPMI_ADC5=m
-CONFIG_RN5T618_ADC=m
-CONFIG_SD_ADC_MODULATOR=m
-CONFIG_STMPE_ADC=m
-CONFIG_TI_ADC081C=m
-CONFIG_TI_ADC0832=m
-CONFIG_TI_ADC084S021=m
-CONFIG_TI_ADC12138=m
-CONFIG_TI_ADC108S102=m
-CONFIG_TI_ADC128S052=m
-CONFIG_TI_ADC161S626=m
-CONFIG_TI_ADS1015=m
-CONFIG_TI_ADS7950=m
-CONFIG_TI_ADS8344=m
-CONFIG_TI_ADS8688=m
-CONFIG_TI_ADS124S08=m
-CONFIG_TI_AM335X_ADC=m
-CONFIG_TI_TLC4541=m
-CONFIG_TWL4030_MADC=m
-CONFIG_TWL6030_GPADC=m
-CONFIG_VF610_ADC=m
-CONFIG_VIPERBOARD_ADC=m
-CONFIG_XILINX_XADC=m
-# end of Analog to digital converters
-
-#
-# Analog Front Ends
-#
-CONFIG_IIO_RESCALE=m
-# end of Analog Front Ends
-
-#
-# Amplifiers
-#
-CONFIG_AD8366=m
-CONFIG_HMC425=m
-# end of Amplifiers
-
-#
-# Chemical Sensors
-#
-CONFIG_ATLAS_PH_SENSOR=m
-CONFIG_ATLAS_EZO_SENSOR=m
-CONFIG_BME680=m
-CONFIG_BME680_I2C=m
-CONFIG_BME680_SPI=m
-CONFIG_CCS811=m
-CONFIG_IAQCORE=m
-CONFIG_PMS7003=m
-CONFIG_SENSIRION_SGP30=m
-CONFIG_SPS30=m
-CONFIG_VZ89X=m
-# end of Chemical Sensors
-
-CONFIG_IIO_CROS_EC_SENSORS_CORE=m
-CONFIG_IIO_CROS_EC_SENSORS=m
-CONFIG_IIO_CROS_EC_SENSORS_LID_ANGLE=m
-
-#
-# Hid Sensor IIO Common
-#
-CONFIG_HID_SENSOR_IIO_COMMON=m
-CONFIG_HID_SENSOR_IIO_TRIGGER=m
-# end of Hid Sensor IIO Common
-
-CONFIG_IIO_MS_SENSORS_I2C=m
-
-#
-# SSP Sensor Common
-#
-CONFIG_IIO_SSP_SENSORS_COMMONS=m
-CONFIG_IIO_SSP_SENSORHUB=m
-# end of SSP Sensor Common
-
-CONFIG_IIO_ST_SENSORS_I2C=m
-CONFIG_IIO_ST_SENSORS_SPI=m
-CONFIG_IIO_ST_SENSORS_CORE=m
-
-#
-# Digital to analog converters
-#
-CONFIG_AD5064=m
-CONFIG_AD5360=m
-CONFIG_AD5380=m
-CONFIG_AD5421=m
-CONFIG_AD5446=m
-CONFIG_AD5449=m
-CONFIG_AD5592R_BASE=m
-CONFIG_AD5592R=m
-CONFIG_AD5593R=m
-CONFIG_AD5504=m
-CONFIG_AD5624R_SPI=m
-CONFIG_AD5686=m
-CONFIG_AD5686_SPI=m
-CONFIG_AD5696_I2C=m
-CONFIG_AD5755=m
-CONFIG_AD5758=m
-CONFIG_AD5761=m
-CONFIG_AD5764=m
-CONFIG_AD5770R=m
-CONFIG_AD5791=m
-CONFIG_AD7303=m
-CONFIG_AD8801=m
-CONFIG_DPOT_DAC=m
-CONFIG_DS4424=m
-CONFIG_LTC1660=m
-CONFIG_LTC2632=m
-CONFIG_M62332=m
-CONFIG_MAX517=m
-CONFIG_MAX5821=m
-CONFIG_MCP4725=m
-CONFIG_MCP4922=m
-CONFIG_TI_DAC082S085=m
-CONFIG_TI_DAC5571=m
-CONFIG_TI_DAC7311=m
-CONFIG_TI_DAC7612=m
-CONFIG_VF610_DAC=m
-# end of Digital to analog converters
-
-#
-# IIO dummy driver
-#
-# CONFIG_IIO_SIMPLE_DUMMY is not set
-# end of IIO dummy driver
-
-#
-# Frequency Synthesizers DDS/PLL
-#
-
-#
-# Clock Generator/Distribution
-#
-CONFIG_AD9523=m
-# end of Clock Generator/Distribution
-
-#
-# Phase-Locked Loop (PLL) frequency synthesizers
-#
-CONFIG_ADF4350=m
-CONFIG_ADF4371=m
-# end of Phase-Locked Loop (PLL) frequency synthesizers
-# end of Frequency Synthesizers DDS/PLL
-
-#
-# Digital gyroscope sensors
-#
-CONFIG_ADIS16080=m
-CONFIG_ADIS16130=m
-CONFIG_ADIS16136=m
-CONFIG_ADIS16260=m
-CONFIG_ADXRS450=m
-CONFIG_BMG160=m
-CONFIG_BMG160_I2C=m
-CONFIG_BMG160_SPI=m
-CONFIG_FXAS21002C=m
-CONFIG_FXAS21002C_I2C=m
-CONFIG_FXAS21002C_SPI=m
-CONFIG_HID_SENSOR_GYRO_3D=m
-CONFIG_MPU3050=m
-CONFIG_MPU3050_I2C=m
-CONFIG_IIO_ST_GYRO_3AXIS=m
-CONFIG_IIO_ST_GYRO_I2C_3AXIS=m
-CONFIG_IIO_ST_GYRO_SPI_3AXIS=m
-CONFIG_ITG3200=m
-# end of Digital gyroscope sensors
-
-#
-# Health Sensors
-#
-
-#
-# Heart Rate Monitors
-#
-CONFIG_AFE4403=m
-CONFIG_AFE4404=m
-CONFIG_MAX30100=m
-CONFIG_MAX30102=m
-# end of Heart Rate Monitors
-# end of Health Sensors
-
-#
-# Humidity sensors
-#
-CONFIG_AM2315=m
-CONFIG_DHT11=m
-CONFIG_HDC100X=m
-CONFIG_HID_SENSOR_HUMIDITY=m
-CONFIG_HTS221=m
-CONFIG_HTS221_I2C=m
-CONFIG_HTS221_SPI=m
-CONFIG_HTU21=m
-CONFIG_SI7005=m
-CONFIG_SI7020=m
-# end of Humidity sensors
-
-#
-# Inertial measurement units
-#
-CONFIG_ADIS16400=m
-CONFIG_ADIS16460=m
-CONFIG_ADIS16475=m
-CONFIG_ADIS16480=m
-CONFIG_BMI160=m
-CONFIG_BMI160_I2C=m
-CONFIG_BMI160_SPI=m
-CONFIG_FXOS8700=m
-CONFIG_FXOS8700_I2C=m
-CONFIG_FXOS8700_SPI=m
-CONFIG_KMX61=m
-CONFIG_INV_MPU6050_IIO=m
-CONFIG_INV_MPU6050_I2C=m
-CONFIG_INV_MPU6050_SPI=m
-CONFIG_IIO_ST_LSM6DSX=m
-CONFIG_IIO_ST_LSM6DSX_I2C=m
-CONFIG_IIO_ST_LSM6DSX_SPI=m
-CONFIG_IIO_ST_LSM6DSX_I3C=m
-# end of Inertial measurement units
-
-CONFIG_IIO_ADIS_LIB=m
-CONFIG_IIO_ADIS_LIB_BUFFER=y
-
-#
-# Light sensors
-#
-CONFIG_ACPI_ALS=m
-CONFIG_ADJD_S311=m
-CONFIG_ADUX1020=m
-CONFIG_AL3010=m
-CONFIG_AL3320A=m
-CONFIG_APDS9300=m
-CONFIG_APDS9960=m
-CONFIG_BH1750=m
-CONFIG_BH1780=m
-CONFIG_CM32181=m
-CONFIG_CM3232=m
-CONFIG_CM3323=m
-CONFIG_CM3605=m
-CONFIG_CM36651=m
-CONFIG_IIO_CROS_EC_LIGHT_PROX=m
-CONFIG_GP2AP002=m
-CONFIG_GP2AP020A00F=m
-CONFIG_IQS621_ALS=m
-CONFIG_SENSORS_ISL29018=m
-CONFIG_SENSORS_ISL29028=m
-CONFIG_ISL29125=m
-CONFIG_HID_SENSOR_ALS=m
-CONFIG_HID_SENSOR_PROX=m
-CONFIG_JSA1212=m
-CONFIG_RPR0521=m
-CONFIG_SENSORS_LM3533=m
-CONFIG_LTR501=m
-CONFIG_LV0104CS=m
-CONFIG_MAX44000=m
-CONFIG_MAX44009=m
-CONFIG_NOA1305=m
-CONFIG_OPT3001=m
-CONFIG_PA12203001=m
-CONFIG_SI1133=m
-CONFIG_SI1145=m
-CONFIG_STK3310=m
-CONFIG_ST_UVIS25=m
-CONFIG_ST_UVIS25_I2C=m
-CONFIG_ST_UVIS25_SPI=m
-CONFIG_TCS3414=m
-CONFIG_TCS3472=m
-CONFIG_SENSORS_TSL2563=m
-CONFIG_TSL2583=m
-CONFIG_TSL2772=m
-CONFIG_TSL4531=m
-CONFIG_US5182D=m
-CONFIG_VCNL4000=m
-CONFIG_VCNL4035=m
-CONFIG_VEML6030=m
-CONFIG_VEML6070=m
-CONFIG_VL6180=m
-CONFIG_ZOPT2201=m
-# end of Light sensors
-
-#
-# Magnetometer sensors
-#
-CONFIG_AK8974=m
-CONFIG_AK8975=m
-CONFIG_AK09911=m
-CONFIG_BMC150_MAGN=m
-CONFIG_BMC150_MAGN_I2C=m
-CONFIG_BMC150_MAGN_SPI=m
-CONFIG_MAG3110=m
-CONFIG_HID_SENSOR_MAGNETOMETER_3D=m
-CONFIG_MMC35240=m
-CONFIG_IIO_ST_MAGN_3AXIS=m
-CONFIG_IIO_ST_MAGN_I2C_3AXIS=m
-CONFIG_IIO_ST_MAGN_SPI_3AXIS=m
-CONFIG_SENSORS_HMC5843=m
-CONFIG_SENSORS_HMC5843_I2C=m
-CONFIG_SENSORS_HMC5843_SPI=m
-CONFIG_SENSORS_RM3100=m
-CONFIG_SENSORS_RM3100_I2C=m
-CONFIG_SENSORS_RM3100_SPI=m
-# end of Magnetometer sensors
-
-#
-# Multiplexers
-#
-CONFIG_IIO_MUX=m
-# end of Multiplexers
-
-#
-# Inclinometer sensors
-#
-CONFIG_HID_SENSOR_INCLINOMETER_3D=m
-CONFIG_HID_SENSOR_DEVICE_ROTATION=m
-# end of Inclinometer sensors
-
-#
-# Triggers - standalone
-#
-CONFIG_IIO_HRTIMER_TRIGGER=m
-CONFIG_IIO_INTERRUPT_TRIGGER=m
-CONFIG_IIO_TIGHTLOOP_TRIGGER=m
-CONFIG_IIO_SYSFS_TRIGGER=m
-# end of Triggers - standalone
-
-#
-# Linear and angular position sensors
-#
-CONFIG_IQS624_POS=m
-# end of Linear and angular position sensors
-
-#
-# Digital potentiometers
-#
-CONFIG_AD5272=m
-CONFIG_DS1803=m
-CONFIG_MAX5432=m
-CONFIG_MAX5481=m
-CONFIG_MAX5487=m
-CONFIG_MCP4018=m
-CONFIG_MCP4131=m
-CONFIG_MCP4531=m
-CONFIG_MCP41010=m
-CONFIG_TPL0102=m
-# end of Digital potentiometers
-
-#
-# Digital potentiostats
-#
-CONFIG_LMP91000=m
-# end of Digital potentiostats
-
-#
-# Pressure sensors
-#
-CONFIG_ABP060MG=m
-CONFIG_BMP280=m
-CONFIG_BMP280_I2C=m
-CONFIG_BMP280_SPI=m
-CONFIG_IIO_CROS_EC_BARO=m
-CONFIG_DLHL60D=m
-CONFIG_DPS310=m
-CONFIG_HID_SENSOR_PRESS=m
-CONFIG_HP03=m
-CONFIG_ICP10100=m
-CONFIG_MPL115=m
-CONFIG_MPL115_I2C=m
-CONFIG_MPL115_SPI=m
-CONFIG_MPL3115=m
-CONFIG_MS5611=m
-CONFIG_MS5611_I2C=m
-CONFIG_MS5611_SPI=m
-CONFIG_MS5637=m
-CONFIG_IIO_ST_PRESS=m
-CONFIG_IIO_ST_PRESS_I2C=m
-CONFIG_IIO_ST_PRESS_SPI=m
-CONFIG_T5403=m
-CONFIG_HP206C=m
-CONFIG_ZPA2326=m
-CONFIG_ZPA2326_I2C=m
-CONFIG_ZPA2326_SPI=m
-# end of Pressure sensors
-
-#
-# Lightning sensors
-#
-CONFIG_AS3935=m
-# end of Lightning sensors
-
-#
-# Proximity and distance sensors
-#
-CONFIG_ISL29501=m
-CONFIG_LIDAR_LITE_V2=m
-CONFIG_MB1232=m
-CONFIG_PING=m
-CONFIG_RFD77402=m
-CONFIG_SRF04=m
-CONFIG_SX9310=m
-CONFIG_SX9500=m
-CONFIG_SRF08=m
-CONFIG_VCNL3020=m
-CONFIG_VL53L0X_I2C=m
-# end of Proximity and distance sensors
-
-#
-# Resolver to digital converters
-#
-CONFIG_AD2S90=m
-CONFIG_AD2S1200=m
-# end of Resolver to digital converters
-
-#
-# Temperature sensors
-#
-CONFIG_IQS620AT_TEMP=m
-CONFIG_LTC2983=m
-CONFIG_MAXIM_THERMOCOUPLE=m
-CONFIG_HID_SENSOR_TEMP=m
-CONFIG_MLX90614=m
-CONFIG_MLX90632=m
-CONFIG_TMP006=m
-CONFIG_TMP007=m
-CONFIG_TSYS01=m
-CONFIG_TSYS02D=m
-CONFIG_MAX31856=m
-# end of Temperature sensors
-
-CONFIG_NTB=m
-CONFIG_NTB_MSI=y
-CONFIG_NTB_AMD=m
-CONFIG_NTB_IDT=m
-CONFIG_NTB_INTEL=m
-CONFIG_NTB_SWITCHTEC=m
-# CONFIG_NTB_PINGPONG is not set
-# CONFIG_NTB_TOOL is not set
-# CONFIG_NTB_PERF is not set
-# CONFIG_NTB_MSI_TEST is not set
-CONFIG_NTB_TRANSPORT=m
-CONFIG_VME_BUS=y
-
-#
-# VME Bridge Drivers
-#
-CONFIG_VME_CA91CX42=m
-CONFIG_VME_TSI148=m
-# CONFIG_VME_FAKE is not set
-
-#
-# VME Board Drivers
-#
-CONFIG_VMIVME_7805=m
-
-#
-# VME Device Drivers
-#
-CONFIG_VME_USER=m
-CONFIG_PWM=y
-CONFIG_PWM_SYSFS=y
-# CONFIG_PWM_DEBUG is not set
-CONFIG_PWM_ATMEL_HLCDC_PWM=m
-CONFIG_PWM_CRC=y
-CONFIG_PWM_CROS_EC=m
-CONFIG_PWM_FSL_FTM=m
-CONFIG_PWM_IQS620A=m
-CONFIG_PWM_LP3943=m
-CONFIG_PWM_LPSS=m
-CONFIG_PWM_LPSS_PCI=m
-CONFIG_PWM_LPSS_PLATFORM=m
-CONFIG_PWM_PCA9685=m
-CONFIG_PWM_STMPE=y
-CONFIG_PWM_TWL=m
-CONFIG_PWM_TWL_LED=m
-
-#
-# IRQ chip support
-#
-CONFIG_IRQCHIP=y
-CONFIG_AL_FIC=y
-CONFIG_MADERA_IRQ=m
-# end of IRQ chip support
-
-CONFIG_IPACK_BUS=m
-CONFIG_BOARD_TPCI200=m
-CONFIG_SERIAL_IPOCTAL=m
-CONFIG_RESET_CONTROLLER=y
-CONFIG_RESET_BRCMSTB_RESCAL=y
-CONFIG_RESET_INTEL_GW=y
-CONFIG_RESET_TI_SYSCON=m
-
-#
-# PHY Subsystem
-#
-CONFIG_GENERIC_PHY=y
-CONFIG_GENERIC_PHY_MIPI_DPHY=y
-CONFIG_BCM_KONA_USB2_PHY=m
-CONFIG_PHY_CADENCE_TORRENT=m
-CONFIG_PHY_CADENCE_DPHY=m
-CONFIG_PHY_CADENCE_SIERRA=m
-CONFIG_PHY_CADENCE_SALVO=m
-CONFIG_PHY_FSL_IMX8MQ_USB=m
-CONFIG_PHY_MIXEL_MIPI_DPHY=m
-CONFIG_PHY_PXA_28NM_HSIC=m
-CONFIG_PHY_PXA_28NM_USB2=m
-CONFIG_PHY_CPCAP_USB=m
-CONFIG_PHY_MAPPHONE_MDM6600=m
-CONFIG_PHY_OCELOT_SERDES=m
-CONFIG_PHY_QCOM_USB_HS=m
-CONFIG_PHY_QCOM_USB_HSIC=m
-CONFIG_PHY_SAMSUNG_USB2=m
-CONFIG_PHY_TUSB1210=m
-CONFIG_PHY_INTEL_COMBO=y
-CONFIG_PHY_INTEL_EMMC=m
-# end of PHY Subsystem
-
-CONFIG_POWERCAP=y
-CONFIG_INTEL_RAPL_CORE=m
-CONFIG_INTEL_RAPL=m
-CONFIG_IDLE_INJECT=y
-CONFIG_MCB=m
-CONFIG_MCB_PCI=m
-CONFIG_MCB_LPC=m
-
-#
-# Performance monitor support
-#
-# end of Performance monitor support
-
-CONFIG_RAS=y
-CONFIG_RAS_CEC=y
-# CONFIG_RAS_CEC_DEBUG is not set
-CONFIG_USB4=m
-
-#
-# Android
-#
-# CONFIG_ANDROID is not set
-# end of Android
-
-CONFIG_LIBNVDIMM=y
-CONFIG_BLK_DEV_PMEM=m
-CONFIG_ND_BLK=m
-CONFIG_ND_CLAIM=y
-CONFIG_ND_BTT=m
-CONFIG_BTT=y
-CONFIG_ND_PFN=m
-CONFIG_NVDIMM_PFN=y
-CONFIG_NVDIMM_DAX=y
-CONFIG_OF_PMEM=m
-CONFIG_DAX_DRIVER=y
-CONFIG_DAX=y
-CONFIG_DEV_DAX=m
-CONFIG_DEV_DAX_PMEM=m
-CONFIG_DEV_DAX_HMEM=m
-CONFIG_DEV_DAX_KMEM=m
-CONFIG_DEV_DAX_PMEM_COMPAT=m
-CONFIG_NVMEM=y
-CONFIG_NVMEM_SYSFS=y
-CONFIG_NVMEM_SPMI_SDAM=m
-CONFIG_RAVE_SP_EEPROM=m
-
-#
-# HW tracing support
-#
-CONFIG_STM=m
-CONFIG_STM_PROTO_BASIC=m
-CONFIG_STM_PROTO_SYS_T=m
-# CONFIG_STM_DUMMY is not set
-CONFIG_STM_SOURCE_CONSOLE=m
-CONFIG_STM_SOURCE_HEARTBEAT=m
-CONFIG_STM_SOURCE_FTRACE=m
-CONFIG_INTEL_TH=m
-CONFIG_INTEL_TH_PCI=m
-CONFIG_INTEL_TH_ACPI=m
-CONFIG_INTEL_TH_GTH=m
-CONFIG_INTEL_TH_STH=m
-CONFIG_INTEL_TH_MSU=m
-CONFIG_INTEL_TH_PTI=m
-# CONFIG_INTEL_TH_DEBUG is not set
-# end of HW tracing support
-
-CONFIG_FPGA=m
-CONFIG_ALTERA_PR_IP_CORE=m
-CONFIG_ALTERA_PR_IP_CORE_PLAT=m
-CONFIG_FPGA_MGR_ALTERA_PS_SPI=m
-CONFIG_FPGA_MGR_ALTERA_CVP=m
-CONFIG_FPGA_MGR_XILINX_SPI=m
-CONFIG_FPGA_MGR_ICE40_SPI=m
-CONFIG_FPGA_MGR_MACHXO2_SPI=m
-CONFIG_FPGA_BRIDGE=m
-CONFIG_ALTERA_FREEZE_BRIDGE=m
-CONFIG_XILINX_PR_DECOUPLER=m
-CONFIG_FPGA_REGION=m
-CONFIG_OF_FPGA_REGION=m
-CONFIG_FPGA_DFL=m
-CONFIG_FPGA_DFL_FME=m
-CONFIG_FPGA_DFL_FME_MGR=m
-CONFIG_FPGA_DFL_FME_BRIDGE=m
-CONFIG_FPGA_DFL_FME_REGION=m
-CONFIG_FPGA_DFL_AFU=m
-CONFIG_FPGA_DFL_PCI=m
-CONFIG_FSI=m
-CONFIG_FSI_NEW_DEV_NODE=y
-CONFIG_FSI_MASTER_GPIO=m
-CONFIG_FSI_MASTER_HUB=m
-CONFIG_FSI_MASTER_ASPEED=m
-CONFIG_FSI_SCOM=m
-CONFIG_FSI_SBEFIFO=m
-CONFIG_FSI_OCC=m
-CONFIG_TEE=m
-
-#
-# TEE drivers
-#
-CONFIG_AMDTEE=m
-# end of TEE drivers
-
-CONFIG_MULTIPLEXER=m
-
-#
-# Multiplexer drivers
-#
-CONFIG_MUX_ADG792A=m
-CONFIG_MUX_ADGS1408=m
-CONFIG_MUX_GPIO=m
-CONFIG_MUX_MMIO=m
-# end of Multiplexer drivers
-
-CONFIG_PM_OPP=y
-CONFIG_UNISYS_VISORBUS=m
-CONFIG_SIOX=m
-CONFIG_SIOX_BUS_GPIO=m
-CONFIG_SLIMBUS=m
-CONFIG_SLIM_QCOM_CTRL=m
-CONFIG_INTERCONNECT=y
-CONFIG_COUNTER=m
-CONFIG_FTM_QUADDEC=m
-CONFIG_MOST=m
-# end of Device Drivers
-
-#
-# File systems
-#
-CONFIG_DCACHE_WORD_ACCESS=y
-CONFIG_VALIDATE_FS_PARSER=y
-CONFIG_FS_IOMAP=y
-# CONFIG_EXT2_FS is not set
-# CONFIG_EXT3_FS is not set
-CONFIG_EXT4_FS=m
-CONFIG_EXT4_USE_FOR_EXT2=y
-CONFIG_EXT4_FS_POSIX_ACL=y
-CONFIG_EXT4_FS_SECURITY=y
-# CONFIG_EXT4_DEBUG is not set
-CONFIG_JBD2=m
-# CONFIG_JBD2_DEBUG is not set
-CONFIG_FS_MBCACHE=m
-CONFIG_REISERFS_FS=m
-# CONFIG_REISERFS_CHECK is not set
-CONFIG_REISERFS_PROC_INFO=y
-CONFIG_REISERFS_FS_XATTR=y
-CONFIG_REISERFS_FS_POSIX_ACL=y
-CONFIG_REISERFS_FS_SECURITY=y
-CONFIG_JFS_FS=m
-CONFIG_JFS_POSIX_ACL=y
-CONFIG_JFS_SECURITY=y
-# CONFIG_JFS_DEBUG is not set
-CONFIG_JFS_STATISTICS=y
-CONFIG_XFS_FS=m
-CONFIG_XFS_QUOTA=y
-CONFIG_XFS_POSIX_ACL=y
-CONFIG_XFS_RT=y
-CONFIG_XFS_ONLINE_SCRUB=y
-CONFIG_XFS_ONLINE_REPAIR=y
-# CONFIG_XFS_WARN is not set
-# CONFIG_XFS_DEBUG is not set
-CONFIG_GFS2_FS=m
-CONFIG_GFS2_FS_LOCKING_DLM=y
-CONFIG_OCFS2_FS=m
-CONFIG_OCFS2_FS_O2CB=m
-CONFIG_OCFS2_FS_USERSPACE_CLUSTER=m
-CONFIG_OCFS2_FS_STATS=y
-CONFIG_OCFS2_DEBUG_MASKLOG=y
-# CONFIG_OCFS2_DEBUG_FS is not set
-CONFIG_BTRFS_FS=m
-CONFIG_BTRFS_FS_POSIX_ACL=y
-# CONFIG_BTRFS_FS_CHECK_INTEGRITY is not set
-# CONFIG_BTRFS_FS_RUN_SANITY_TESTS is not set
-# CONFIG_BTRFS_DEBUG is not set
-# CONFIG_BTRFS_ASSERT is not set
-# CONFIG_BTRFS_FS_REF_VERIFY is not set
-CONFIG_NILFS2_FS=m
-CONFIG_F2FS_FS=m
-CONFIG_F2FS_STAT_FS=y
-CONFIG_F2FS_FS_XATTR=y
-CONFIG_F2FS_FS_POSIX_ACL=y
-CONFIG_F2FS_FS_SECURITY=y
-CONFIG_F2FS_CHECK_FS=y
-# CONFIG_F2FS_IO_TRACE is not set
-# CONFIG_F2FS_FAULT_INJECTION is not set
-CONFIG_F2FS_FS_COMPRESSION=y
-CONFIG_F2FS_FS_LZO=y
-CONFIG_F2FS_FS_LZ4=y
-CONFIG_F2FS_FS_ZSTD=y
-CONFIG_F2FS_FS_LZORLE=y
-CONFIG_ZONEFS_FS=m
-CONFIG_FS_DAX=y
-CONFIG_FS_DAX_PMD=y
-CONFIG_FS_POSIX_ACL=y
-CONFIG_EXPORTFS=y
-CONFIG_EXPORTFS_BLOCK_OPS=y
-CONFIG_FILE_LOCKING=y
-# CONFIG_MANDATORY_FILE_LOCKING is not set
-CONFIG_FS_ENCRYPTION=y
-CONFIG_FS_ENCRYPTION_ALGS=m
-CONFIG_FS_VERITY=y
-# CONFIG_FS_VERITY_DEBUG is not set
-CONFIG_FS_VERITY_BUILTIN_SIGNATURES=y
-CONFIG_FSNOTIFY=y
-CONFIG_DNOTIFY=y
-CONFIG_INOTIFY_USER=y
-CONFIG_FANOTIFY=y
-CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
-CONFIG_QUOTA=y
-CONFIG_QUOTA_NETLINK_INTERFACE=y
-# CONFIG_PRINT_QUOTA_WARNING is not set
-# CONFIG_QUOTA_DEBUG is not set
-CONFIG_QUOTA_TREE=m
-CONFIG_QFMT_V1=m
-CONFIG_QFMT_V2=m
-CONFIG_QUOTACTL=y
-CONFIG_QUOTACTL_COMPAT=y
-CONFIG_AUTOFS4_FS=y
-CONFIG_AUTOFS_FS=y
-CONFIG_FUSE_FS=m
-CONFIG_CUSE=m
-CONFIG_VIRTIO_FS=m
-CONFIG_OVERLAY_FS=m
-CONFIG_OVERLAY_FS_REDIRECT_DIR=y
-# CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW is not set
-CONFIG_OVERLAY_FS_INDEX=y
-CONFIG_OVERLAY_FS_XINO_AUTO=y
-CONFIG_OVERLAY_FS_METACOPY=y
-
-#
-# Caches
-#
-CONFIG_FSCACHE=m
-CONFIG_FSCACHE_STATS=y
-CONFIG_FSCACHE_HISTOGRAM=y
-# CONFIG_FSCACHE_DEBUG is not set
-# CONFIG_FSCACHE_OBJECT_LIST is not set
-CONFIG_CACHEFILES=m
-# CONFIG_CACHEFILES_DEBUG is not set
-# CONFIG_CACHEFILES_HISTOGRAM is not set
-# end of Caches
-
-#
-# CD-ROM/DVD Filesystems
-#
-CONFIG_ISO9660_FS=m
-CONFIG_JOLIET=y
-CONFIG_ZISOFS=y
-CONFIG_UDF_FS=m
-# end of CD-ROM/DVD Filesystems
-
-#
-# DOS/FAT/EXFAT/NT Filesystems
-#
-CONFIG_FAT_FS=m
-CONFIG_MSDOS_FS=m
-CONFIG_VFAT_FS=m
-CONFIG_FAT_DEFAULT_CODEPAGE=437
-CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
-CONFIG_FAT_DEFAULT_UTF8=y
-CONFIG_EXFAT_FS=m
-CONFIG_EXFAT_DEFAULT_IOCHARSET="utf8"
-CONFIG_NTFS_FS=m
-# CONFIG_NTFS_DEBUG is not set
-CONFIG_NTFS_RW=y
-# end of DOS/FAT/EXFAT/NT Filesystems
-
-#
-# Pseudo filesystems
-#
-CONFIG_PROC_FS=y
-CONFIG_PROC_KCORE=y
-CONFIG_PROC_VMCORE=y
-CONFIG_PROC_VMCORE_DEVICE_DUMP=y
-CONFIG_PROC_SYSCTL=y
-CONFIG_PROC_PAGE_MONITOR=y
-CONFIG_PROC_CHILDREN=y
-CONFIG_PROC_PID_ARCH_STATUS=y
-CONFIG_PROC_CPU_RESCTRL=y
-CONFIG_KERNFS=y
-CONFIG_SYSFS=y
-CONFIG_TMPFS=y
-CONFIG_TMPFS_POSIX_ACL=y
-CONFIG_TMPFS_XATTR=y
-CONFIG_HUGETLBFS=y
-CONFIG_HUGETLB_PAGE=y
-CONFIG_MEMFD_CREATE=y
-CONFIG_ARCH_HAS_GIGANTIC_PAGE=y
-CONFIG_CONFIGFS_FS=y
-CONFIG_EFIVAR_FS=y
-# end of Pseudo filesystems
-
-CONFIG_MISC_FILESYSTEMS=y
-CONFIG_ORANGEFS_FS=m
-# CONFIG_ADFS_FS is not set
-CONFIG_AFFS_FS=m
-CONFIG_ECRYPT_FS=m
-# CONFIG_ECRYPT_FS_MESSAGING is not set
-CONFIG_HFS_FS=m
-CONFIG_HFSPLUS_FS=m
-CONFIG_BEFS_FS=m
-# CONFIG_BEFS_DEBUG is not set
-# CONFIG_BFS_FS is not set
-# CONFIG_EFS_FS is not set
-CONFIG_JFFS2_FS=m
-CONFIG_JFFS2_FS_DEBUG=0
-CONFIG_JFFS2_FS_WRITEBUFFER=y
-# CONFIG_JFFS2_FS_WBUF_VERIFY is not set
-CONFIG_JFFS2_SUMMARY=y
-CONFIG_JFFS2_FS_XATTR=y
-CONFIG_JFFS2_FS_POSIX_ACL=y
-CONFIG_JFFS2_FS_SECURITY=y
-# CONFIG_JFFS2_COMPRESSION_OPTIONS is not set
-CONFIG_JFFS2_ZLIB=y
-CONFIG_JFFS2_RTIME=y
-CONFIG_UBIFS_FS=m
-# CONFIG_UBIFS_FS_ADVANCED_COMPR is not set
-CONFIG_UBIFS_FS_LZO=y
-CONFIG_UBIFS_FS_ZLIB=y
-CONFIG_UBIFS_FS_ZSTD=y
-CONFIG_UBIFS_ATIME_SUPPORT=y
-CONFIG_UBIFS_FS_XATTR=y
-CONFIG_UBIFS_FS_SECURITY=y
-CONFIG_UBIFS_FS_AUTHENTICATION=y
-CONFIG_CRAMFS=m
-CONFIG_CRAMFS_BLOCKDEV=y
-CONFIG_CRAMFS_MTD=y
-CONFIG_SQUASHFS=m
-# CONFIG_SQUASHFS_FILE_CACHE is not set
-CONFIG_SQUASHFS_FILE_DIRECT=y
-# CONFIG_SQUASHFS_DECOMP_SINGLE is not set
-CONFIG_SQUASHFS_DECOMP_MULTI=y
-# CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU is not set
-CONFIG_SQUASHFS_XATTR=y
-CONFIG_SQUASHFS_ZLIB=y
-CONFIG_SQUASHFS_LZ4=y
-CONFIG_SQUASHFS_LZO=y
-CONFIG_SQUASHFS_XZ=y
-CONFIG_SQUASHFS_ZSTD=y
-# CONFIG_SQUASHFS_4K_DEVBLK_SIZE is not set
-# CONFIG_SQUASHFS_EMBEDDED is not set
-CONFIG_SQUASHFS_FRAGMENT_CACHE_SIZE=3
-# CONFIG_VXFS_FS is not set
-CONFIG_MINIX_FS=m
-CONFIG_OMFS_FS=m
-# CONFIG_HPFS_FS is not set
-# CONFIG_QNX4FS_FS is not set
-# CONFIG_QNX6FS_FS is not set
-CONFIG_ROMFS_FS=m
-CONFIG_ROMFS_BACKED_BY_BLOCK=y
-# CONFIG_ROMFS_BACKED_BY_MTD is not set
-# CONFIG_ROMFS_BACKED_BY_BOTH is not set
-CONFIG_ROMFS_ON_BLOCK=y
-CONFIG_PSTORE=y
-CONFIG_PSTORE_DEFLATE_COMPRESS=m
-CONFIG_PSTORE_LZO_COMPRESS=m
-CONFIG_PSTORE_LZ4_COMPRESS=m
-CONFIG_PSTORE_LZ4HC_COMPRESS=m
-# CONFIG_PSTORE_842_COMPRESS is not set
-CONFIG_PSTORE_ZSTD_COMPRESS=y
-CONFIG_PSTORE_COMPRESS=y
-# CONFIG_PSTORE_DEFLATE_COMPRESS_DEFAULT is not set
-# CONFIG_PSTORE_LZO_COMPRESS_DEFAULT is not set
-# CONFIG_PSTORE_LZ4_COMPRESS_DEFAULT is not set
-# CONFIG_PSTORE_LZ4HC_COMPRESS_DEFAULT is not set
-CONFIG_PSTORE_ZSTD_COMPRESS_DEFAULT=y
-CONFIG_PSTORE_COMPRESS_DEFAULT="zstd"
-# CONFIG_PSTORE_CONSOLE is not set
-# CONFIG_PSTORE_PMSG is not set
-# CONFIG_PSTORE_FTRACE is not set
-CONFIG_PSTORE_RAM=y
-CONFIG_PSTORE_ZONE=m
-CONFIG_PSTORE_BLK=m
-CONFIG_PSTORE_BLK_BLKDEV=""
-CONFIG_PSTORE_BLK_KMSG_SIZE=64
-CONFIG_PSTORE_BLK_MAX_REASON=2
-# CONFIG_SYSV_FS is not set
-CONFIG_UFS_FS=m
-# CONFIG_UFS_FS_WRITE is not set
-# CONFIG_UFS_DEBUG is not set
-CONFIG_EROFS_FS=m
-# CONFIG_EROFS_FS_DEBUG is not set
-CONFIG_EROFS_FS_XATTR=y
-CONFIG_EROFS_FS_POSIX_ACL=y
-CONFIG_EROFS_FS_SECURITY=y
-CONFIG_EROFS_FS_ZIP=y
-CONFIG_EROFS_FS_CLUSTER_PAGE_LIMIT=2
-CONFIG_VBOXSF_FS=m
-CONFIG_NETWORK_FILESYSTEMS=y
-CONFIG_NFS_FS=m
-CONFIG_NFS_V2=m
-CONFIG_NFS_V3=m
-CONFIG_NFS_V3_ACL=y
-CONFIG_NFS_V4=m
-CONFIG_NFS_SWAP=y
-CONFIG_NFS_V4_1=y
-CONFIG_NFS_V4_2=y
-CONFIG_PNFS_FILE_LAYOUT=m
-CONFIG_PNFS_BLOCK=m
-CONFIG_PNFS_FLEXFILE_LAYOUT=m
-CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN="kernel.org"
-CONFIG_NFS_V4_1_MIGRATION=y
-CONFIG_NFS_V4_SECURITY_LABEL=y
-CONFIG_NFS_FSCACHE=y
-# CONFIG_NFS_USE_LEGACY_DNS is not set
-CONFIG_NFS_USE_KERNEL_DNS=y
-CONFIG_NFS_DEBUG=y
-# CONFIG_NFS_DISABLE_UDP_SUPPORT is not set
-CONFIG_NFSD=m
-CONFIG_NFSD_V2_ACL=y
-CONFIG_NFSD_V3=y
-CONFIG_NFSD_V3_ACL=y
-CONFIG_NFSD_V4=y
-CONFIG_NFSD_PNFS=y
-CONFIG_NFSD_BLOCKLAYOUT=y
-CONFIG_NFSD_SCSILAYOUT=y
-# CONFIG_NFSD_FLEXFILELAYOUT is not set
-CONFIG_NFSD_V4_SECURITY_LABEL=y
-CONFIG_GRACE_PERIOD=m
-CONFIG_LOCKD=m
-CONFIG_LOCKD_V4=y
-CONFIG_NFS_ACL_SUPPORT=m
-CONFIG_NFS_COMMON=y
-CONFIG_SUNRPC=m
-CONFIG_SUNRPC_GSS=m
-CONFIG_SUNRPC_BACKCHANNEL=y
-CONFIG_SUNRPC_SWAP=y
-CONFIG_RPCSEC_GSS_KRB5=m
-CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES=y
-CONFIG_SUNRPC_DEBUG=y
-CONFIG_SUNRPC_XPRT_RDMA=m
-CONFIG_CEPH_FS=m
-CONFIG_CEPH_FSCACHE=y
-CONFIG_CEPH_FS_POSIX_ACL=y
-CONFIG_CEPH_FS_SECURITY_LABEL=y
-CONFIG_CIFS=m
-# CONFIG_CIFS_STATS2 is not set
-CONFIG_CIFS_ALLOW_INSECURE_LEGACY=y
-# CONFIG_CIFS_WEAK_PW_HASH is not set
-CONFIG_CIFS_UPCALL=y
-CONFIG_CIFS_XATTR=y
-CONFIG_CIFS_POSIX=y
-CONFIG_CIFS_DEBUG=y
-# CONFIG_CIFS_DEBUG2 is not set
-# CONFIG_CIFS_DEBUG_DUMP_KEYS is not set
-CONFIG_CIFS_DFS_UPCALL=y
-# CONFIG_CIFS_SMB_DIRECT is not set
-CONFIG_CIFS_FSCACHE=y
-CONFIG_CODA_FS=m
-CONFIG_AFS_FS=m
-# CONFIG_AFS_DEBUG is not set
-CONFIG_AFS_FSCACHE=y
-# CONFIG_AFS_DEBUG_CURSOR is not set
-CONFIG_9P_FS=m
-CONFIG_9P_FSCACHE=y
-CONFIG_9P_FS_POSIX_ACL=y
-CONFIG_9P_FS_SECURITY=y
-CONFIG_NLS=y
-CONFIG_NLS_DEFAULT="utf8"
-CONFIG_NLS_CODEPAGE_437=m
-CONFIG_NLS_CODEPAGE_737=m
-CONFIG_NLS_CODEPAGE_775=m
-CONFIG_NLS_CODEPAGE_850=m
-CONFIG_NLS_CODEPAGE_852=m
-CONFIG_NLS_CODEPAGE_855=m
-CONFIG_NLS_CODEPAGE_857=m
-CONFIG_NLS_CODEPAGE_860=m
-CONFIG_NLS_CODEPAGE_861=m
-CONFIG_NLS_CODEPAGE_862=m
-CONFIG_NLS_CODEPAGE_863=m
-CONFIG_NLS_CODEPAGE_864=m
-CONFIG_NLS_CODEPAGE_865=m
-CONFIG_NLS_CODEPAGE_866=m
-CONFIG_NLS_CODEPAGE_869=m
-CONFIG_NLS_CODEPAGE_936=m
-CONFIG_NLS_CODEPAGE_950=m
-CONFIG_NLS_CODEPAGE_932=m
-CONFIG_NLS_CODEPAGE_949=m
-CONFIG_NLS_CODEPAGE_874=m
-CONFIG_NLS_ISO8859_8=m
-CONFIG_NLS_CODEPAGE_1250=m
-CONFIG_NLS_CODEPAGE_1251=m
-CONFIG_NLS_ASCII=m
-CONFIG_NLS_ISO8859_1=m
-CONFIG_NLS_ISO8859_2=m
-CONFIG_NLS_ISO8859_3=m
-CONFIG_NLS_ISO8859_4=m
-CONFIG_NLS_ISO8859_5=m
-CONFIG_NLS_ISO8859_6=m
-CONFIG_NLS_ISO8859_7=m
-CONFIG_NLS_ISO8859_9=m
-CONFIG_NLS_ISO8859_13=m
-CONFIG_NLS_ISO8859_14=m
-CONFIG_NLS_ISO8859_15=m
-CONFIG_NLS_KOI8_R=m
-CONFIG_NLS_KOI8_U=m
-CONFIG_NLS_MAC_ROMAN=m
-CONFIG_NLS_MAC_CELTIC=m
-CONFIG_NLS_MAC_CENTEURO=m
-CONFIG_NLS_MAC_CROATIAN=m
-CONFIG_NLS_MAC_CYRILLIC=m
-CONFIG_NLS_MAC_GAELIC=m
-CONFIG_NLS_MAC_GREEK=m
-CONFIG_NLS_MAC_ICELAND=m
-CONFIG_NLS_MAC_INUIT=m
-CONFIG_NLS_MAC_ROMANIAN=m
-CONFIG_NLS_MAC_TURKISH=m
-CONFIG_NLS_UTF8=m
-CONFIG_DLM=m
-# CONFIG_DLM_DEBUG is not set
-CONFIG_UNICODE=y
-# CONFIG_UNICODE_NORMALIZATION_SELFTEST is not set
-CONFIG_IO_WQ=y
-# end of File systems
-
-#
-# Security options
-#
-CONFIG_KEYS=y
-CONFIG_KEYS_REQUEST_CACHE=y
-CONFIG_PERSISTENT_KEYRINGS=y
-CONFIG_TRUSTED_KEYS=m
-CONFIG_ENCRYPTED_KEYS=m
-CONFIG_KEY_DH_OPERATIONS=y
-CONFIG_KEY_NOTIFICATIONS=y
-# CONFIG_SECURITY_DMESG_RESTRICT is not set
-CONFIG_SECURITY=y
-CONFIG_SECURITYFS=y
-CONFIG_SECURITY_NETWORK=y
-CONFIG_PAGE_TABLE_ISOLATION=y
-CONFIG_SECURITY_INFINIBAND=y
-CONFIG_SECURITY_NETWORK_XFRM=y
-CONFIG_SECURITY_PATH=y
-# CONFIG_INTEL_TXT is not set
-CONFIG_LSM_MMAP_MIN_ADDR=65536
-CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR=y
-CONFIG_HARDENED_USERCOPY=y
-CONFIG_HARDENED_USERCOPY_FALLBACK=y
-# CONFIG_HARDENED_USERCOPY_PAGESPAN is not set
-CONFIG_FORTIFY_SOURCE=y
-# CONFIG_STATIC_USERMODEHELPER is not set
-CONFIG_SECURITY_SELINUX=y
-CONFIG_SECURITY_SELINUX_BOOTPARAM=y
-# CONFIG_SECURITY_SELINUX_DISABLE is not set
-CONFIG_SECURITY_SELINUX_DEVELOP=y
-CONFIG_SECURITY_SELINUX_AVC_STATS=y
-CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=0
-CONFIG_SECURITY_SELINUX_SIDTAB_HASH_BITS=9
-CONFIG_SECURITY_SELINUX_SID2STR_CACHE_SIZE=256
-CONFIG_SECURITY_SMACK=y
-CONFIG_SECURITY_SMACK_BRINGUP=y
-CONFIG_SECURITY_SMACK_NETFILTER=y
-CONFIG_SECURITY_SMACK_APPEND_SIGNALS=y
-CONFIG_SECURITY_TOMOYO=y
-CONFIG_SECURITY_TOMOYO_MAX_ACCEPT_ENTRY=2048
-CONFIG_SECURITY_TOMOYO_MAX_AUDIT_LOG=1024
-# CONFIG_SECURITY_TOMOYO_OMIT_USERSPACE_LOADER is not set
-CONFIG_SECURITY_TOMOYO_POLICY_LOADER="/sbin/tomoyo-init"
-CONFIG_SECURITY_TOMOYO_ACTIVATION_TRIGGER="/sbin/init"
-# CONFIG_SECURITY_TOMOYO_INSECURE_BUILTIN_SETTING is not set
-CONFIG_SECURITY_APPARMOR=y
-CONFIG_SECURITY_APPARMOR_HASH=y
-CONFIG_SECURITY_APPARMOR_HASH_DEFAULT=y
-# CONFIG_SECURITY_APPARMOR_DEBUG is not set
-# CONFIG_SECURITY_LOADPIN is not set
-CONFIG_SECURITY_YAMA=y
-CONFIG_SECURITY_SAFESETID=y
-CONFIG_SECURITY_LOCKDOWN_LSM=y
-# CONFIG_SECURITY_LOCKDOWN_LSM_EARLY is not set
-CONFIG_LOCK_DOWN_KERNEL_FORCE_NONE=y
-# CONFIG_LOCK_DOWN_KERNEL_FORCE_INTEGRITY is not set
-# CONFIG_LOCK_DOWN_KERNEL_FORCE_CONFIDENTIALITY is not set
-# CONFIG_INTEGRITY is not set
-# CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT is not set
-# CONFIG_DEFAULT_SECURITY_SELINUX is not set
-# CONFIG_DEFAULT_SECURITY_SMACK is not set
-# CONFIG_DEFAULT_SECURITY_TOMOYO is not set
-# CONFIG_DEFAULT_SECURITY_APPARMOR is not set
-CONFIG_DEFAULT_SECURITY_DAC=y
-CONFIG_LSM="lockdown,yama"
-
-#
-# Kernel hardening options
-#
-CONFIG_GCC_PLUGIN_STRUCTLEAK=y
-
-#
-# Memory initialization
-#
-# CONFIG_INIT_STACK_NONE is not set
-# CONFIG_GCC_PLUGIN_STRUCTLEAK_USER is not set
-# CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF is not set
-CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL=y
-# CONFIG_GCC_PLUGIN_STRUCTLEAK_VERBOSE is not set
-# CONFIG_GCC_PLUGIN_STACKLEAK is not set
-CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y
-# CONFIG_INIT_ON_FREE_DEFAULT_ON is not set
-# end of Memory initialization
-# end of Kernel hardening options
-# end of Security options
-
-CONFIG_XOR_BLOCKS=m
-CONFIG_ASYNC_CORE=m
-CONFIG_ASYNC_MEMCPY=m
-CONFIG_ASYNC_XOR=m
-CONFIG_ASYNC_PQ=m
-CONFIG_ASYNC_RAID6_RECOV=m
-CONFIG_CRYPTO=y
-
-#
-# Crypto core or helper
-#
-CONFIG_CRYPTO_ALGAPI=y
-CONFIG_CRYPTO_ALGAPI2=y
-CONFIG_CRYPTO_AEAD=y
-CONFIG_CRYPTO_AEAD2=y
-CONFIG_CRYPTO_SKCIPHER=y
-CONFIG_CRYPTO_SKCIPHER2=y
-CONFIG_CRYPTO_HASH=y
-CONFIG_CRYPTO_HASH2=y
-CONFIG_CRYPTO_RNG=y
-CONFIG_CRYPTO_RNG2=y
-CONFIG_CRYPTO_RNG_DEFAULT=y
-CONFIG_CRYPTO_AKCIPHER2=y
-CONFIG_CRYPTO_AKCIPHER=y
-CONFIG_CRYPTO_KPP2=y
-CONFIG_CRYPTO_KPP=y
-CONFIG_CRYPTO_ACOMP2=y
-CONFIG_CRYPTO_MANAGER=y
-CONFIG_CRYPTO_MANAGER2=y
-CONFIG_CRYPTO_USER=m
-CONFIG_CRYPTO_MANAGER_DISABLE_TESTS=y
-CONFIG_CRYPTO_GF128MUL=y
-CONFIG_CRYPTO_NULL=y
-CONFIG_CRYPTO_NULL2=y
-CONFIG_CRYPTO_PCRYPT=m
-CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_AUTHENC=m
-CONFIG_CRYPTO_TEST=m
-CONFIG_CRYPTO_SIMD=m
-CONFIG_CRYPTO_GLUE_HELPER_X86=m
-CONFIG_CRYPTO_ENGINE=m
-
-#
-# Public-key cryptography
-#
-CONFIG_CRYPTO_RSA=y
-CONFIG_CRYPTO_DH=y
-CONFIG_CRYPTO_ECC=m
-CONFIG_CRYPTO_ECDH=m
-CONFIG_CRYPTO_ECRDSA=m
-CONFIG_CRYPTO_CURVE25519=m
-CONFIG_CRYPTO_CURVE25519_X86=m
-
-#
-# Authenticated Encryption with Associated Data
-#
-CONFIG_CRYPTO_CCM=m
-CONFIG_CRYPTO_GCM=y
-CONFIG_CRYPTO_CHACHA20POLY1305=m
-CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128_AESNI_SSE2=m
-CONFIG_CRYPTO_SEQIV=y
-CONFIG_CRYPTO_ECHAINIV=m
-
-#
-# Block modes
-#
-CONFIG_CRYPTO_CBC=m
-CONFIG_CRYPTO_CFB=m
-CONFIG_CRYPTO_CTR=y
-CONFIG_CRYPTO_CTS=m
-CONFIG_CRYPTO_ECB=m
-CONFIG_CRYPTO_LRW=m
-CONFIG_CRYPTO_OFB=m
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_XTS=m
-CONFIG_CRYPTO_KEYWRAP=m
-CONFIG_CRYPTO_NHPOLY1305=m
-CONFIG_CRYPTO_NHPOLY1305_SSE2=m
-CONFIG_CRYPTO_NHPOLY1305_AVX2=m
-CONFIG_CRYPTO_ADIANTUM=m
-CONFIG_CRYPTO_ESSIV=m
-
-#
-# Hash modes
-#
-CONFIG_CRYPTO_CMAC=m
-CONFIG_CRYPTO_HMAC=y
-CONFIG_CRYPTO_XCBC=m
-CONFIG_CRYPTO_VMAC=m
-
-#
-# Digest
-#
-CONFIG_CRYPTO_CRC32C=m
-CONFIG_CRYPTO_CRC32C_INTEL=m
-CONFIG_CRYPTO_CRC32=m
-CONFIG_CRYPTO_CRC32_PCLMUL=m
-CONFIG_CRYPTO_XXHASH=m
-CONFIG_CRYPTO_BLAKE2B=m
-CONFIG_CRYPTO_BLAKE2S=m
-CONFIG_CRYPTO_BLAKE2S_X86=m
-CONFIG_CRYPTO_CRCT10DIF=y
-CONFIG_CRYPTO_CRCT10DIF_PCLMUL=m
-CONFIG_CRYPTO_GHASH=y
-CONFIG_CRYPTO_POLY1305=m
-CONFIG_CRYPTO_POLY1305_X86_64=m
-CONFIG_CRYPTO_MD4=m
-CONFIG_CRYPTO_MD5=y
-CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_RMD128=m
-CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_RMD256=m
-CONFIG_CRYPTO_RMD320=m
-CONFIG_CRYPTO_SHA1=y
-CONFIG_CRYPTO_SHA1_SSSE3=m
-CONFIG_CRYPTO_SHA256_SSSE3=m
-CONFIG_CRYPTO_SHA512_SSSE3=m
-CONFIG_CRYPTO_SHA256=y
-CONFIG_CRYPTO_SHA512=y
-CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_SM3=m
-CONFIG_CRYPTO_STREEBOG=m
-CONFIG_CRYPTO_TGR192=m
-CONFIG_CRYPTO_WP512=m
-CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL=m
-
-#
-# Ciphers
-#
-CONFIG_CRYPTO_AES=y
-CONFIG_CRYPTO_AES_TI=m
-CONFIG_CRYPTO_AES_NI_INTEL=m
-CONFIG_CRYPTO_ANUBIS=m
-CONFIG_CRYPTO_ARC4=m
-CONFIG_CRYPTO_BLOWFISH=m
-CONFIG_CRYPTO_BLOWFISH_COMMON=m
-CONFIG_CRYPTO_BLOWFISH_X86_64=m
-CONFIG_CRYPTO_CAMELLIA=m
-CONFIG_CRYPTO_CAMELLIA_X86_64=m
-CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64=m
-CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64=m
-CONFIG_CRYPTO_CAST_COMMON=m
-CONFIG_CRYPTO_CAST5=m
-CONFIG_CRYPTO_CAST5_AVX_X86_64=m
-CONFIG_CRYPTO_CAST6=m
-CONFIG_CRYPTO_CAST6_AVX_X86_64=m
-CONFIG_CRYPTO_DES=m
-CONFIG_CRYPTO_DES3_EDE_X86_64=m
-CONFIG_CRYPTO_FCRYPT=m
-CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SALSA20=m
-CONFIG_CRYPTO_CHACHA20=m
-CONFIG_CRYPTO_CHACHA20_X86_64=m
-CONFIG_CRYPTO_SEED=m
-CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_SERPENT_SSE2_X86_64=m
-CONFIG_CRYPTO_SERPENT_AVX_X86_64=m
-CONFIG_CRYPTO_SERPENT_AVX2_X86_64=m
-CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_TEA=m
-CONFIG_CRYPTO_TWOFISH=m
-CONFIG_CRYPTO_TWOFISH_COMMON=m
-CONFIG_CRYPTO_TWOFISH_X86_64=m
-CONFIG_CRYPTO_TWOFISH_X86_64_3WAY=m
-CONFIG_CRYPTO_TWOFISH_AVX_X86_64=m
-
-#
-# Compression
-#
-CONFIG_CRYPTO_DEFLATE=m
-CONFIG_CRYPTO_LZO=m
-CONFIG_CRYPTO_842=m
-CONFIG_CRYPTO_LZ4=y
-CONFIG_CRYPTO_LZ4HC=m
-CONFIG_CRYPTO_ZSTD=y
-
-#
-# Random Number Generation
-#
-CONFIG_CRYPTO_ANSI_CPRNG=m
-CONFIG_CRYPTO_DRBG_MENU=y
-CONFIG_CRYPTO_DRBG_HMAC=y
-CONFIG_CRYPTO_DRBG_HASH=y
-CONFIG_CRYPTO_DRBG_CTR=y
-CONFIG_CRYPTO_DRBG=y
-CONFIG_CRYPTO_JITTERENTROPY=y
-CONFIG_CRYPTO_USER_API=m
-CONFIG_CRYPTO_USER_API_HASH=m
-CONFIG_CRYPTO_USER_API_SKCIPHER=m
-CONFIG_CRYPTO_USER_API_RNG=m
-CONFIG_CRYPTO_USER_API_AEAD=m
-# CONFIG_CRYPTO_STATS is not set
-CONFIG_CRYPTO_HASH_INFO=y
-
-#
-# Crypto library routines
-#
-CONFIG_CRYPTO_LIB_AES=y
-CONFIG_CRYPTO_LIB_ARC4=m
-CONFIG_CRYPTO_ARCH_HAVE_LIB_BLAKE2S=m
-CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC=m
-CONFIG_CRYPTO_LIB_BLAKE2S=m
-CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA=m
-CONFIG_CRYPTO_LIB_CHACHA_GENERIC=m
-CONFIG_CRYPTO_LIB_CHACHA=m
-CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519=m
-CONFIG_CRYPTO_LIB_CURVE25519_GENERIC=m
-CONFIG_CRYPTO_LIB_CURVE25519=m
-CONFIG_CRYPTO_LIB_DES=m
-CONFIG_CRYPTO_LIB_POLY1305_RSIZE=11
-CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305=m
-CONFIG_CRYPTO_LIB_POLY1305_GENERIC=m
-CONFIG_CRYPTO_LIB_POLY1305=m
-CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
-CONFIG_CRYPTO_LIB_SHA256=y
-CONFIG_CRYPTO_HW=y
-CONFIG_CRYPTO_DEV_PADLOCK=m
-CONFIG_CRYPTO_DEV_PADLOCK_AES=m
-CONFIG_CRYPTO_DEV_PADLOCK_SHA=m
-CONFIG_CRYPTO_DEV_ATMEL_I2C=m
-CONFIG_CRYPTO_DEV_ATMEL_ECC=m
-CONFIG_CRYPTO_DEV_ATMEL_SHA204A=m
-CONFIG_CRYPTO_DEV_CCP=y
-CONFIG_CRYPTO_DEV_CCP_DD=m
-CONFIG_CRYPTO_DEV_SP_CCP=y
-CONFIG_CRYPTO_DEV_CCP_CRYPTO=m
-CONFIG_CRYPTO_DEV_SP_PSP=y
-CONFIG_CRYPTO_DEV_CCP_DEBUGFS=y
-CONFIG_CRYPTO_DEV_QAT=m
-CONFIG_CRYPTO_DEV_QAT_DH895xCC=m
-CONFIG_CRYPTO_DEV_QAT_C3XXX=m
-CONFIG_CRYPTO_DEV_QAT_C62X=m
-CONFIG_CRYPTO_DEV_QAT_DH895xCCVF=m
-CONFIG_CRYPTO_DEV_QAT_C3XXXVF=m
-CONFIG_CRYPTO_DEV_QAT_C62XVF=m
-CONFIG_CRYPTO_DEV_NITROX=m
-CONFIG_CRYPTO_DEV_NITROX_CNN55XX=m
-CONFIG_CRYPTO_DEV_CHELSIO=m
-CONFIG_CHELSIO_IPSEC_INLINE=y
-CONFIG_CHELSIO_TLS_DEVICE=y
-CONFIG_CRYPTO_DEV_VIRTIO=m
-CONFIG_CRYPTO_DEV_SAFEXCEL=m
-CONFIG_CRYPTO_DEV_CCREE=m
-CONFIG_CRYPTO_DEV_AMLOGIC_GXL=m
-CONFIG_CRYPTO_DEV_AMLOGIC_GXL_DEBUG=y
-CONFIG_ASYMMETRIC_KEY_TYPE=y
-CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y
-CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE=m
-CONFIG_X509_CERTIFICATE_PARSER=y
-CONFIG_PKCS8_PRIVATE_KEY_PARSER=m
-CONFIG_TPM_KEY_PARSER=m
-CONFIG_PKCS7_MESSAGE_PARSER=y
-# CONFIG_PKCS7_TEST_KEY is not set
-CONFIG_SIGNED_PE_FILE_VERIFICATION=y
-
-#
-# Certificates for signature checking
-#
-CONFIG_MODULE_SIG_KEY="certs/signing_key.pem"
-CONFIG_SYSTEM_TRUSTED_KEYRING=y
-CONFIG_SYSTEM_TRUSTED_KEYS=""
-# CONFIG_SYSTEM_EXTRA_CERTIFICATE is not set
-CONFIG_SECONDARY_TRUSTED_KEYRING=y
-CONFIG_SYSTEM_BLACKLIST_KEYRING=y
-CONFIG_SYSTEM_BLACKLIST_HASH_LIST=""
-# end of Certificates for signature checking
-
-CONFIG_BINARY_PRINTF=y
-
-#
-# Library routines
-#
-CONFIG_RAID6_PQ=m
-CONFIG_RAID6_PQ_BENCHMARK=y
-CONFIG_LINEAR_RANGES=y
-CONFIG_PACKING=y
-CONFIG_BITREVERSE=y
-CONFIG_GENERIC_STRNCPY_FROM_USER=y
-CONFIG_GENERIC_STRNLEN_USER=y
-CONFIG_GENERIC_NET_UTILS=y
-CONFIG_GENERIC_FIND_FIRST_BIT=y
-CONFIG_CORDIC=m
-# CONFIG_PRIME_NUMBERS is not set
-CONFIG_RATIONAL=y
-CONFIG_GENERIC_PCI_IOMAP=y
-CONFIG_GENERIC_IOMAP=y
-CONFIG_ARCH_USE_CMPXCHG_LOCKREF=y
-CONFIG_ARCH_HAS_FAST_MULTIPLIER=y
-CONFIG_ARCH_USE_SYM_ANNOTATIONS=y
-CONFIG_CRC_CCITT=y
-CONFIG_CRC16=m
-CONFIG_CRC_T10DIF=y
-CONFIG_CRC_ITU_T=m
-CONFIG_CRC32=y
-# CONFIG_CRC32_SELFTEST is not set
-CONFIG_CRC32_SLICEBY8=y
-# CONFIG_CRC32_SLICEBY4 is not set
-# CONFIG_CRC32_SARWATE is not set
-# CONFIG_CRC32_BIT is not set
-CONFIG_CRC64=m
-CONFIG_CRC4=m
-CONFIG_CRC7=m
-CONFIG_LIBCRC32C=m
-CONFIG_CRC8=m
-CONFIG_XXHASH=y
-# CONFIG_RANDOM32_SELFTEST is not set
-CONFIG_842_COMPRESS=m
-CONFIG_842_DECOMPRESS=m
-CONFIG_ZLIB_INFLATE=y
-CONFIG_ZLIB_DEFLATE=y
-CONFIG_LZO_COMPRESS=y
-CONFIG_LZO_DECOMPRESS=y
-CONFIG_LZ4_COMPRESS=y
-CONFIG_LZ4HC_COMPRESS=m
-CONFIG_LZ4_DECOMPRESS=y
-CONFIG_ZSTD_COMPRESS=y
-CONFIG_ZSTD_DECOMPRESS=y
-CONFIG_XZ_DEC=y
-CONFIG_XZ_DEC_X86=y
-CONFIG_XZ_DEC_POWERPC=y
-CONFIG_XZ_DEC_IA64=y
-CONFIG_XZ_DEC_ARM=y
-CONFIG_XZ_DEC_ARMTHUMB=y
-CONFIG_XZ_DEC_SPARC=y
-CONFIG_XZ_DEC_BCJ=y
-# CONFIG_XZ_DEC_TEST is not set
-CONFIG_DECOMPRESS_GZIP=y
-CONFIG_DECOMPRESS_BZIP2=y
-CONFIG_DECOMPRESS_LZMA=y
-CONFIG_DECOMPRESS_XZ=y
-CONFIG_DECOMPRESS_LZO=y
-CONFIG_DECOMPRESS_LZ4=y
-CONFIG_GENERIC_ALLOCATOR=y
-CONFIG_REED_SOLOMON=y
-CONFIG_REED_SOLOMON_ENC8=y
-CONFIG_REED_SOLOMON_DEC8=y
-CONFIG_REED_SOLOMON_DEC16=y
-CONFIG_BCH=m
-CONFIG_TEXTSEARCH=y
-CONFIG_TEXTSEARCH_KMP=m
-CONFIG_TEXTSEARCH_BM=m
-CONFIG_TEXTSEARCH_FSM=m
-CONFIG_BTREE=y
-CONFIG_INTERVAL_TREE=y
-CONFIG_XARRAY_MULTI=y
-CONFIG_ASSOCIATIVE_ARRAY=y
-CONFIG_HAS_IOMEM=y
-CONFIG_HAS_IOPORT_MAP=y
-CONFIG_HAS_DMA=y
-CONFIG_NEED_SG_DMA_LENGTH=y
-CONFIG_NEED_DMA_MAP_STATE=y
-CONFIG_ARCH_DMA_ADDR_T_64BIT=y
-CONFIG_DMA_DECLARE_COHERENT=y
-CONFIG_ARCH_HAS_FORCE_DMA_UNENCRYPTED=y
-CONFIG_DMA_VIRT_OPS=y
-CONFIG_SWIOTLB=y
-CONFIG_DMA_COHERENT_POOL=y
-# CONFIG_DMA_API_DEBUG is not set
-CONFIG_SGL_ALLOC=y
-CONFIG_IOMMU_HELPER=y
-CONFIG_CHECK_SIGNATURE=y
-CONFIG_CPU_RMAP=y
-CONFIG_DQL=y
-CONFIG_GLOB=y
-# CONFIG_GLOB_SELFTEST is not set
-CONFIG_NLATTR=y
-CONFIG_LRU_CACHE=m
-CONFIG_CLZ_TAB=y
-CONFIG_IRQ_POLL=y
-CONFIG_MPILIB=y
-CONFIG_DIMLIB=y
-CONFIG_LIBFDT=y
-CONFIG_OID_REGISTRY=y
-CONFIG_UCS2_STRING=y
-CONFIG_HAVE_GENERIC_VDSO=y
-CONFIG_GENERIC_GETTIMEOFDAY=y
-CONFIG_GENERIC_VDSO_TIME_NS=y
-CONFIG_FONT_SUPPORT=y
-CONFIG_FONTS=y
-# CONFIG_FONT_8x8 is not set
-CONFIG_FONT_8x16=y
-# CONFIG_FONT_6x11 is not set
-# CONFIG_FONT_7x14 is not set
-# CONFIG_FONT_PEARL_8x8 is not set
-# CONFIG_FONT_ACORN_8x8 is not set
-# CONFIG_FONT_MINI_4x6 is not set
-# CONFIG_FONT_6x10 is not set
-# CONFIG_FONT_10x18 is not set
-# CONFIG_FONT_SUN8x16 is not set
-# CONFIG_FONT_SUN12x22 is not set
-CONFIG_FONT_TER16x32=y
-CONFIG_SG_POOL=y
-CONFIG_ARCH_HAS_PMEM_API=y
-CONFIG_MEMREGION=y
-CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE=y
-CONFIG_ARCH_HAS_UACCESS_MCSAFE=y
-CONFIG_ARCH_STACKWALK=y
-CONFIG_SBITMAP=y
-CONFIG_PARMAN=m
-CONFIG_OBJAGG=m
-# CONFIG_STRING_SELFTEST is not set
-# end of Library routines
-
-#
-# Kernel hacking
-#
-
-#
-# printk and dmesg options
-#
-CONFIG_PRINTK_TIME=y
-# CONFIG_PRINTK_CALLER is not set
-CONFIG_CONSOLE_LOGLEVEL_DEFAULT=4
-CONFIG_CONSOLE_LOGLEVEL_QUIET=1
-CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4
-# CONFIG_BOOT_PRINTK_DELAY is not set
-CONFIG_DYNAMIC_DEBUG=y
-CONFIG_DYNAMIC_DEBUG_CORE=y
-CONFIG_SYMBOLIC_ERRNAME=y
-CONFIG_DEBUG_BUGVERBOSE=y
-# end of printk and dmesg options
-
-#
-# Compile-time checks and compiler options
-#
-CONFIG_DEBUG_INFO=y
-# CONFIG_DEBUG_INFO_REDUCED is not set
-# CONFIG_DEBUG_INFO_COMPRESSED is not set
-# CONFIG_DEBUG_INFO_SPLIT is not set
-CONFIG_DEBUG_INFO_DWARF4=y
-CONFIG_DEBUG_INFO_BTF=y
-# CONFIG_GDB_SCRIPTS is not set
-# CONFIG_ENABLE_MUST_CHECK is not set
-CONFIG_FRAME_WARN=2048
-CONFIG_STRIP_ASM_SYMS=y
-# CONFIG_READABLE_ASM is not set
-# CONFIG_HEADERS_INSTALL is not set
-# CONFIG_DEBUG_SECTION_MISMATCH is not set
-CONFIG_SECTION_MISMATCH_WARN_ONLY=y
-CONFIG_STACK_VALIDATION=y
-# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set
-# end of Compile-time checks and compiler options
-
-#
-# Generic Kernel Debugging Instruments
-#
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE=0x0
-CONFIG_MAGIC_SYSRQ_SERIAL=y
-CONFIG_MAGIC_SYSRQ_SERIAL_SEQUENCE=""
-CONFIG_DEBUG_FS=y
-CONFIG_HAVE_ARCH_KGDB=y
-# CONFIG_KGDB is not set
-CONFIG_ARCH_HAS_UBSAN_SANITIZE_ALL=y
-# CONFIG_UBSAN is not set
-# end of Generic Kernel Debugging Instruments
-
-CONFIG_DEBUG_KERNEL=y
-CONFIG_DEBUG_MISC=y
-
-#
-# Memory Debugging
-#
-# CONFIG_PAGE_EXTENSION is not set
-# CONFIG_DEBUG_PAGEALLOC is not set
-# CONFIG_PAGE_OWNER is not set
-CONFIG_PAGE_POISONING=y
-CONFIG_PAGE_POISONING_NO_SANITY=y
-CONFIG_PAGE_POISONING_ZERO=y
-# CONFIG_DEBUG_PAGE_REF is not set
-# CONFIG_DEBUG_RODATA_TEST is not set
-CONFIG_ARCH_HAS_DEBUG_WX=y
-CONFIG_DEBUG_WX=y
-CONFIG_GENERIC_PTDUMP=y
-CONFIG_PTDUMP_CORE=y
-# CONFIG_PTDUMP_DEBUGFS is not set
-# CONFIG_DEBUG_OBJECTS is not set
-# CONFIG_SLUB_DEBUG_ON is not set
-# CONFIG_SLUB_STATS is not set
-CONFIG_HAVE_DEBUG_KMEMLEAK=y
-# CONFIG_DEBUG_KMEMLEAK is not set
-# CONFIG_DEBUG_STACK_USAGE is not set
-CONFIG_SCHED_STACK_END_CHECK=y
-CONFIG_ARCH_HAS_DEBUG_VM_PGTABLE=y
-# CONFIG_DEBUG_VM is not set
-# CONFIG_DEBUG_VM_PGTABLE is not set
-CONFIG_ARCH_HAS_DEBUG_VIRTUAL=y
-# CONFIG_DEBUG_VIRTUAL is not set
-CONFIG_DEBUG_MEMORY_INIT=y
-# CONFIG_DEBUG_PER_CPU_MAPS is not set
-CONFIG_HAVE_ARCH_KASAN=y
-CONFIG_HAVE_ARCH_KASAN_VMALLOC=y
-CONFIG_CC_HAS_KASAN_GENERIC=y
-CONFIG_CC_HAS_WORKING_NOSANITIZE_ADDRESS=y
-# CONFIG_KASAN is not set
-CONFIG_KASAN_STACK=1
-# end of Memory Debugging
-
-# CONFIG_DEBUG_SHIRQ is not set
-
-#
-# Debug Oops, Lockups and Hangs
-#
-# CONFIG_PANIC_ON_OOPS is not set
-CONFIG_PANIC_ON_OOPS_VALUE=0
-CONFIG_PANIC_TIMEOUT=0
-CONFIG_LOCKUP_DETECTOR=y
-CONFIG_SOFTLOCKUP_DETECTOR=y
-# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
-CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
-CONFIG_HARDLOCKUP_DETECTOR_PERF=y
-CONFIG_HARDLOCKUP_CHECK_TIMESTAMP=y
-CONFIG_HARDLOCKUP_DETECTOR=y
-# CONFIG_BOOTPARAM_HARDLOCKUP_PANIC is not set
-CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE=0
-CONFIG_DETECT_HUNG_TASK=y
-CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=120
-# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
-CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
-# CONFIG_WQ_WATCHDOG is not set
-# CONFIG_TEST_LOCKUP is not set
-# end of Debug Oops, Lockups and Hangs
-
-#
-# Scheduler Debugging
-#
-CONFIG_SCHED_DEBUG=y
-CONFIG_SCHED_INFO=y
-CONFIG_SCHEDSTATS=y
-# end of Scheduler Debugging
-
-# CONFIG_DEBUG_TIMEKEEPING is not set
-CONFIG_DEBUG_PREEMPT=y
-
-#
-# Lock Debugging (spinlocks, mutexes, etc...)
-#
-CONFIG_LOCK_DEBUGGING_SUPPORT=y
-# CONFIG_PROVE_LOCKING is not set
-# CONFIG_LOCK_STAT is not set
-# CONFIG_DEBUG_RT_MUTEXES is not set
-# CONFIG_DEBUG_SPINLOCK is not set
-# CONFIG_DEBUG_MUTEXES is not set
-# CONFIG_DEBUG_WW_MUTEX_SLOWPATH is not set
-# CONFIG_DEBUG_RWSEMS is not set
-# CONFIG_DEBUG_LOCK_ALLOC is not set
-# CONFIG_DEBUG_ATOMIC_SLEEP is not set
-# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
-# CONFIG_LOCK_TORTURE_TEST is not set
-# CONFIG_WW_MUTEX_SELFTEST is not set
-# end of Lock Debugging (spinlocks, mutexes, etc...)
-
-CONFIG_STACKTRACE=y
-# CONFIG_WARN_ALL_UNSEEDED_RANDOM is not set
-# CONFIG_DEBUG_KOBJECT is not set
-
-#
-# Debug kernel data structures
-#
-# CONFIG_DEBUG_LIST is not set
-# CONFIG_DEBUG_PLIST is not set
-# CONFIG_DEBUG_SG is not set
-# CONFIG_DEBUG_NOTIFIERS is not set
-# CONFIG_BUG_ON_DATA_CORRUPTION is not set
-# end of Debug kernel data structures
-
-# CONFIG_DEBUG_CREDENTIALS is not set
-
-#
-# RCU Debugging
-#
-# CONFIG_RCU_PERF_TEST is not set
-# CONFIG_RCU_TORTURE_TEST is not set
-CONFIG_RCU_CPU_STALL_TIMEOUT=60
-# CONFIG_RCU_TRACE is not set
-# CONFIG_RCU_EQS_DEBUG is not set
-# end of RCU Debugging
-
-# CONFIG_DEBUG_WQ_FORCE_RR_CPU is not set
-# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
-# CONFIG_CPU_HOTPLUG_STATE_CONTROL is not set
-CONFIG_LATENCYTOP=y
-CONFIG_USER_STACKTRACE_SUPPORT=y
-CONFIG_NOP_TRACER=y
-CONFIG_HAVE_FUNCTION_TRACER=y
-CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
-CONFIG_HAVE_DYNAMIC_FTRACE=y
-CONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS=y
-CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS=y
-CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
-CONFIG_HAVE_SYSCALL_TRACEPOINTS=y
-CONFIG_HAVE_FENTRY=y
-CONFIG_HAVE_C_RECORDMCOUNT=y
-CONFIG_TRACER_MAX_TRACE=y
-CONFIG_TRACE_CLOCK=y
-CONFIG_RING_BUFFER=y
-CONFIG_EVENT_TRACING=y
-CONFIG_CONTEXT_SWITCH_TRACER=y
-CONFIG_RING_BUFFER_ALLOW_SWAP=y
-CONFIG_TRACING=y
-CONFIG_GENERIC_TRACER=y
-CONFIG_TRACING_SUPPORT=y
-CONFIG_FTRACE=y
-# CONFIG_BOOTTIME_TRACING is not set
-CONFIG_FUNCTION_TRACER=y
-CONFIG_FUNCTION_GRAPH_TRACER=y
-CONFIG_DYNAMIC_FTRACE=y
-CONFIG_DYNAMIC_FTRACE_WITH_REGS=y
-CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS=y
-CONFIG_FUNCTION_PROFILER=y
-CONFIG_STACK_TRACER=y
-# CONFIG_IRQSOFF_TRACER is not set
-# CONFIG_PREEMPT_TRACER is not set
-CONFIG_SCHED_TRACER=y
-CONFIG_HWLAT_TRACER=y
-CONFIG_MMIOTRACE=y
-CONFIG_FTRACE_SYSCALLS=y
-CONFIG_TRACER_SNAPSHOT=y
-# CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP is not set
-CONFIG_BRANCH_PROFILE_NONE=y
-# CONFIG_PROFILE_ANNOTATED_BRANCHES is not set
-CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_KPROBE_EVENTS=y
-# CONFIG_KPROBE_EVENTS_ON_NOTRACE is not set
-CONFIG_UPROBE_EVENTS=y
-CONFIG_BPF_EVENTS=y
-CONFIG_DYNAMIC_EVENTS=y
-CONFIG_PROBE_EVENTS=y
-CONFIG_BPF_KPROBE_OVERRIDE=y
-CONFIG_FTRACE_MCOUNT_RECORD=y
-CONFIG_TRACING_MAP=y
-CONFIG_SYNTH_EVENTS=y
-CONFIG_HIST_TRIGGERS=y
-# CONFIG_TRACE_EVENT_INJECT is not set
-# CONFIG_TRACEPOINT_BENCHMARK is not set
-# CONFIG_RING_BUFFER_BENCHMARK is not set
-# CONFIG_TRACE_EVAL_MAP_FILE is not set
-# CONFIG_FTRACE_STARTUP_TEST is not set
-# CONFIG_RING_BUFFER_STARTUP_TEST is not set
-# CONFIG_MMIOTRACE_TEST is not set
-# CONFIG_PREEMPTIRQ_DELAY_TEST is not set
-# CONFIG_SYNTH_EVENT_GEN_TEST is not set
-# CONFIG_KPROBE_EVENT_GEN_TEST is not set
-# CONFIG_HIST_TRIGGERS_DEBUG is not set
-# CONFIG_PROVIDE_OHCI1394_DMA_INIT is not set
-# CONFIG_SAMPLES is not set
-CONFIG_HAVE_ARCH_KCSAN=y
-CONFIG_ARCH_HAS_DEVMEM_IS_ALLOWED=y
-CONFIG_STRICT_DEVMEM=y
-CONFIG_IO_STRICT_DEVMEM=y
-
-#
-# x86 Debugging
-#
-CONFIG_TRACE_IRQFLAGS_SUPPORT=y
-# CONFIG_X86_VERBOSE_BOOTUP is not set
-CONFIG_EARLY_PRINTK=y
-# CONFIG_EARLY_PRINTK_DBGP is not set
-# CONFIG_EARLY_PRINTK_USB_XDBC is not set
-# CONFIG_EFI_PGT_DUMP is not set
-# CONFIG_DEBUG_TLBFLUSH is not set
-# CONFIG_IOMMU_DEBUG is not set
-CONFIG_HAVE_MMIOTRACE_SUPPORT=y
-# CONFIG_X86_DECODER_SELFTEST is not set
-CONFIG_IO_DELAY_0X80=y
-# CONFIG_IO_DELAY_0XED is not set
-# CONFIG_IO_DELAY_UDELAY is not set
-# CONFIG_IO_DELAY_NONE is not set
-CONFIG_DEBUG_BOOT_PARAMS=y
-# CONFIG_CPA_DEBUG is not set
-# CONFIG_DEBUG_ENTRY is not set
-# CONFIG_DEBUG_NMI_SELFTEST is not set
-# CONFIG_X86_DEBUG_FPU is not set
-# CONFIG_PUNIT_ATOM_DEBUG is not set
-CONFIG_UNWINDER_ORC=y
-# CONFIG_UNWINDER_FRAME_POINTER is not set
-# CONFIG_UNWINDER_GUESS is not set
-# end of x86 Debugging
-
-#
-# Kernel Testing and Coverage
-#
-# CONFIG_KUNIT is not set
-# CONFIG_NOTIFIER_ERROR_INJECTION is not set
-CONFIG_FUNCTION_ERROR_INJECTION=y
-# CONFIG_FAULT_INJECTION is not set
-CONFIG_ARCH_HAS_KCOV=y
-CONFIG_CC_HAS_SANCOV_TRACE_PC=y
-# CONFIG_KCOV is not set
-CONFIG_RUNTIME_TESTING_MENU=y
-CONFIG_LKDTM=m
-# CONFIG_TEST_LIST_SORT is not set
-# CONFIG_TEST_MIN_HEAP is not set
-# CONFIG_TEST_SORT is not set
-# CONFIG_KPROBES_SANITY_TEST is not set
-# CONFIG_BACKTRACE_SELF_TEST is not set
-# CONFIG_RBTREE_TEST is not set
-# CONFIG_REED_SOLOMON_TEST is not set
-# CONFIG_INTERVAL_TREE_TEST is not set
-# CONFIG_PERCPU_TEST is not set
-# CONFIG_ATOMIC64_SELFTEST is not set
-# CONFIG_ASYNC_RAID6_TEST is not set
-# CONFIG_TEST_HEXDUMP is not set
-# CONFIG_TEST_STRING_HELPERS is not set
-# CONFIG_TEST_STRSCPY is not set
-# CONFIG_TEST_KSTRTOX is not set
-# CONFIG_TEST_PRINTF is not set
-# CONFIG_TEST_BITMAP is not set
-# CONFIG_TEST_BITFIELD is not set
-# CONFIG_TEST_UUID is not set
-# CONFIG_TEST_XARRAY is not set
-# CONFIG_TEST_OVERFLOW is not set
-# CONFIG_TEST_RHASHTABLE is not set
-# CONFIG_TEST_HASH is not set
-# CONFIG_TEST_IDA is not set
-# CONFIG_TEST_PARMAN is not set
-# CONFIG_TEST_LKM is not set
-# CONFIG_TEST_BITOPS is not set
-# CONFIG_TEST_VMALLOC is not set
-# CONFIG_TEST_USER_COPY is not set
-# CONFIG_TEST_BPF is not set
-# CONFIG_TEST_BLACKHOLE_DEV is not set
-# CONFIG_FIND_BIT_BENCHMARK is not set
-# CONFIG_TEST_FIRMWARE is not set
-# CONFIG_TEST_SYSCTL is not set
-# CONFIG_TEST_UDELAY is not set
-# CONFIG_TEST_STATIC_KEYS is not set
-# CONFIG_TEST_KMOD is not set
-# CONFIG_TEST_MEMCAT_P is not set
-# CONFIG_TEST_OBJAGG is not set
-# CONFIG_TEST_STACKINIT is not set
-# CONFIG_TEST_MEMINIT is not set
-# CONFIG_TEST_HMM is not set
-# CONFIG_MEMTEST is not set
-# CONFIG_HYPERV_TESTING is not set
-# end of Kernel Testing and Coverage
-# end of Kernel hacking
diff --git a/linux58-tkg/linux58-tkg-config/generic-desktop-profile.cfg b/linux58-tkg/linux58-tkg-config/generic-desktop-profile.cfg
deleted file mode 100644
index d14bf2e..0000000
--- a/linux58-tkg/linux58-tkg-config/generic-desktop-profile.cfg
+++ /dev/null
@@ -1,35 +0,0 @@
-# linux58-TkG config file
-# Generic Desktop
-
-
-#### KERNEL OPTIONS ####
-
-# Disable some non-module debugging - See PKGBUILD for the list
-_debugdisable="false"
-
-# LEAVE AN EMPTY VALUE TO BE PROMPTED ABOUT FOLLOWING OPTIONS AT BUILD TIME
-
-# Set to "true" to disable FUNCTION_TRACER/GRAPH_TRACER, lowering overhead but limiting debugging and analyzing of kernel functions - Kernel default is "false"
-_ftracedisable="false"
-
-# Set to "true" to disable NUMA, lowering overhead, but breaking CUDA/NvEnc on Nvidia equipped systems - Kernel default is "false"
-_numadisable="false"
-
-# Set to "true" to use explicit preemption points to lower latency at the cost of a small throughput loss - Can give a nice perf boost in VMs - Kernel default is "false"
-_voluntary_preempt="false"
-
-# A selection of patches from Zen/Liquorix kernel and additional tweaks for a better gaming experience (ZENIFY) - Default is "true"
-_zenify="true"
-
-# compiler optimization level - 1. Optimize for performance (-O2); 2. Optimize harder (-O3); 3. Optimize for size (-Os) - Kernel default is "2"
-_compileroptlevel="1"
-
-# Trust the CPU manufacturer to initialize Linux's CRNG (RANDOM_TRUST_CPU) - Kernel default is "false"
-_random_trust_cpu="false"
-
-# CPU scheduler runqueue sharing - No sharing (RQ_NONE), SMT (hyperthread) siblings (RQ_SMT), Multicore siblings (RQ_MC), Symmetric Multi-Processing (RQ_SMP), NUMA (RQ_ALL)
-# Valid values are "none", "smt", "mc", "mc-llc"(for zen), "smp", "all" - Kernel default is "mc"
-_runqueue_sharing="mc"
-
-# Timer frequency - "500", "750" or "1000" - More options available in kernel config prompt when left empty depending on selected cpusched - Kernel default is "750"
-_timer_freq="500"
diff --git a/linux58-tkg/linux58-tkg-config/prepare b/linux58-tkg/linux58-tkg-config/prepare
deleted file mode 100644
index 9a4672c..0000000
--- a/linux58-tkg/linux58-tkg-config/prepare
+++ /dev/null
@@ -1,1015 +0,0 @@
-#!/bin/bash
-
-_basever=58
-_basekernel=5.8
-_sub=16
-
-_tkg_initscript() {
-
-  cp "$_where"/linux"$_basever"-tkg-patches/* "$_where" # copy patches inside the PKGBUILD's dir to preserve makepkg sourcing and md5sum checking
-  cp "$_where"/linux"$_basever"-tkg-config/* "$_where" # copy config files and hooks inside the PKGBUILD's dir to preserve makepkg sourcing and md5sum checking
-
-  # Load external configuration file if present. Available variable values will overwrite customization.cfg ones.
-  if [ -e "$_EXT_CONFIG_PATH" ]; then
-    source "$_EXT_CONFIG_PATH" && msg2 "External configuration file $_EXT_CONFIG_PATH will be used to override customization.cfg values." && msg2 ""
-  fi
-
-  if [ -z "$_OPTIPROFILE" ] && [ ! -e "$_where"/cpuschedset ]; then
-    # Prompt about optimized configurations. Available variable values will overwrite customization.cfg/external config ones.
-    plain "Do you want to use a predefined optimized profile?"
-    read -rp "`echo $'  > 1.Custom\n    2.Ryzen Desktop (Performance)\n    3.Other Desktop (Performance)\nchoice[1-3?]: '`" _OPTIPROFILE;
-  fi
-  if [ "$_OPTIPROFILE" = "2" ]; then
-    source "$_where"/ryzen-desktop-profile.cfg && msg2 "Ryzen Desktop (Performance) profile will be used." && msg2 ""
-  elif [ "$_OPTIPROFILE" = "3" ]; then
-    source "$_where"/generic-desktop-profile.cfg && msg2 "Generic Desktop (Performance) profile will be used." && msg2 ""
-  fi
-
-  # source cpuschedset early if present
-  if [ -e "$_where"/cpuschedset ]; then
-    source "$_where"/cpuschedset
-  fi
-
-  # CPU SCHED selector
-  if [ -z "$_cpusched" ] && [ ! -e "$_where"/cpuschedset ]; then
-    plain "What CPU sched variant do you want to build/install?"
-    read -rp "`echo $'  > 1.Undead PDS (TkG)\n    2.Project C / PDS\n    3.Project C / BMQ\n    4.CFS\nchoice[1-3?]: '`" CONDITION;
-    if [ "$CONDITION" = "2" ]; then
-      echo "_cpusched=\"pds\"" > "$_where"/cpuschedset
-    elif [ "$CONDITION" = "3" ]; then
-      echo "_cpusched=\"bmq\"" > "$_where"/cpuschedset
-    elif [ "$CONDITION" = "4" ]; then
-      echo "_cpusched=\"cfs\"" > "$_where"/cpuschedset
-    else
-      echo "_cpusched=\"upds\"" > "$_where"/cpuschedset
-    fi
-    if [ -n "$_custom_pkgbase" ]; then
-      echo "_custom_pkgbase=\"${_custom_pkgbase}\"" >> "$_where"/cpuschedset
-    fi
-  elif [ "$_cpusched" = "upds" ]; then
-    echo "_cpusched=\"upds\"" > "$_where"/cpuschedset
-  elif [ "$_cpusched" = "pds" ]; then
-    echo "_cpusched=\"pds\"" > "$_where"/cpuschedset
-  elif [ "$_cpusched" = "cfs" ]; then
-    echo "_cpusched=\"cfs\"" > "$_where"/cpuschedset
-  elif [ "$_cpusched" = "bmq" ]; then
-    echo "_cpusched=\"bmq\"" > "$_where"/cpuschedset
-  else
-    if [ "$_nofallback" != "true" ]; then
-      warning "Something is wrong with your cpusched selection. Do you want to fallback to CFS (default)?"
-      read -rp "`echo $'    > N/y : '`" _fallback;
-    fi
-    if [[ "$_fallback" =~ [yY] ]] || [ "$_nofallback" = "true" ]; then
-      echo "_cpusched=\"cfs\"" > "$_where"/cpuschedset
-    else
-      error "Exiting..."
-      exit 1
-    fi
-  fi
-
-  source "$_where"/cpuschedset
-}
-
-user_patcher() {
-	# To patch the user because all your base are belong to us
-	local _patches=("$_where"/*."${_userpatch_ext}revert")
-	if [ ${#_patches[@]} -ge 2 ] || [ -e "${_patches}" ]; then
-	  if [ "$_user_patches_no_confirm" != "true" ]; then
-	    msg2 "Found ${#_patches[@]} 'to revert' userpatches for ${_userpatch_target}:"
-	    printf '%s\n' "${_patches[@]}"
-	    read -rp "Do you want to install it/them? - Be careful with that ;)"$'\n> N/y : ' _CONDITION;
-	  fi
-	  if [[ "$_CONDITION" =~ [yY] ]] || [ "$_user_patches_no_confirm" = "true" ]; then
-	    for _f in "${_patches[@]}"; do
-	      if [ -e "${_f}" ]; then
-	        msg2 "######################################################"
-	        msg2 ""
-	        msg2 "Reverting your own ${_userpatch_target} patch ${_f}"
-	        msg2 ""
-	        msg2 "######################################################"
-	        patch -Np1 -R < "${_f}"
-	        echo "Reverted your own patch ${_f}" >> "$_where"/last_build_config.log
-	      fi
-	    done
-	  fi
-	fi
-
-	_patches=("$_where"/*."${_userpatch_ext}patch")
-	if [ ${#_patches[@]} -ge 2 ] || [ -e "${_patches}" ]; then
-	  if [ "$_user_patches_no_confirm" != "true" ]; then
-	    msg2 "Found ${#_patches[@]} userpatches for ${_userpatch_target}:"
-	    printf '%s\n' "${_patches[@]}"
-	    read -rp "Do you want to install it/them? - Be careful with that ;)"$'\n> N/y : ' _CONDITION;
-	  fi
-	  if [[ "$_CONDITION" =~ [yY] ]] || [ "$_user_patches_no_confirm" = "true" ]; then
-	    for _f in "${_patches[@]}"; do
-	      if [ -e "${_f}" ]; then
-	        msg2 "######################################################"
-	        msg2 ""
-	        msg2 "Applying your own ${_userpatch_target} patch ${_f}"
-	        msg2 ""
-	        msg2 "######################################################"
-	        patch -Np1 < "${_f}"
-	        echo "Applied your own patch ${_f}" >> "$_where"/last_build_config.log
-	      fi
-	    done
-	  fi
-	fi
-}
-
-_tkg_srcprep() {
-
-  if [ "${_distro}" = "Arch" ]; then
-    msg2 "Setting version..."
-    scripts/setlocalversion --save-scmversion
-    echo "-$pkgrel-tkg-${_cpusched}" > localversion.10-pkgrel
-    echo "" > localversion.20-pkgname
-
-    # add upstream patch
-    msg2 "Patching from $_basekernel to $pkgver"
-    patch -p1 -i "$srcdir"/patch-"${pkgver}"
-
-    # ARCH Patches
-    if [ "${_configfile}" = "config_hardened.x86_64" ] && [ "${_cpusched}" = "cfs" ]; then
-      msg2 "Using linux hardened patchset"
-      patch -Np1 -i "$srcdir"/0012-linux-hardened.patch
-    else
-      patch -Np1 -i "$srcdir"/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch
-    fi
-  fi
-
-  # graysky's cpu opts - https://github.com/graysky2/kernel_gcc_patch
-  msg2 "Applying graysky's cpu opts patch"
-  if [ "${_distro}" = "Arch" ]; then
-    patch -Np1 -i "$srcdir"/enable_additional_cpu_optimizations_for_gcc_v10.1%2B_kernel_v5.8%2B.patch
-  else
-    patch -Np1 -i "$srcdir"/enable_additional_cpu_optimizations_for_gcc_v10.1+_kernel_v5.8+.patch
-  fi
-
-  # TkG
-  msg2 "Applying clear linux patches"
-  patch -Np1 -i "$srcdir"/0002-clear-patches.patch
-
-  msg2 "Applying glitched base patch"
-  patch -Np1 -i "$srcdir"/0003-glitched-base.patch
-
-  if [ -z $_misc_adds ]; then
-    plain "Enable misc additions ? May contain temporary fixes pending upstream or changes that can break on non-Arch. "
-    read -rp "`echo $'    > [Y]/n : '`" _interactive_misc_adds;
-    if [ "$_interactive_misc_adds" != "n" ] && [ "$_interactive_misc_adds" != "N" ]; then
-      _misc_adds="true"
-    fi
-  fi
-
-  if [ "$_misc_adds" = "true" ]; then
-    msg2 "Applying misc additions patch"
-    patch -Np1 -i "$srcdir"/0012-misc-additions.patch
-  fi
-
-  if [ "${_cpusched}" = "MuQSS" ]; then
-    # MuQSS
-    msg2 "Applying MuQSS base patch"
-    patch -Np1 -i "$srcdir"/0004-5.8-ck1.patch
-
-    if [ "${_aggressive_ondemand}" = "true" ]; then
-      msg2 "Applying MuQSS agressive ondemand governor patch"
-      patch -Np1 -i "$srcdir"/0004-glitched-ondemand-muqss.patch
-    fi
-
-    msg2 "Applying Glitched MuQSS patch"
-    patch -Np1 -i "$srcdir"/0004-glitched-muqss.patch
-
-  elif [ "${_cpusched}" = "pds" ]; then
-    # PDS-mq
-    msg2 "Applying PDS base patch"
-    patch -Np1 -i "$srcdir"/0009-prjc_v5.8-r3.patch
-
-    if [ "${_aggressive_ondemand}" = "true" ]; then
-      msg2 "Applying PDS agressive ondemand governor patch"
-      patch -Np1 -i "$srcdir"/0009-glitched-ondemand-bmq.patch
-    fi
-
-    msg2 "Applying Glitched PDS patch"
-    patch -Np1 -i "$srcdir"/0005-glitched-pds.patch
-
-  elif [ "${_cpusched}" = "upds" ]; then
-    # PDS-mq
-    msg2 "Applying PDS base patch"
-    patch -Np1 -i "$srcdir"/0005-v5.8_undead-pds099o.patch
-
-    if [ "${_aggressive_ondemand}" = "true" ]; then
-      msg2 "Applying PDS agressive ondemand governor patch"
-      patch -Np1 -i "$srcdir"/0005-undead-glitched-ondemand-pds.patch
-    fi
-
-    msg2 "Applying Glitched PDS patch"
-    patch -Np1 -i "$srcdir"/0005-undead-glitched-pds.patch
-
-  elif [ "${_cpusched}" = "bmq" ]; then
-    # Project C / BMQ
-    msg2 "Applying Project C / BMQ base patch"
-
-    patch -Np1 -i "$srcdir"/0009-prjc_v5.8-r3.patch
-
-    if [ "${_aggressive_ondemand}" = "true" ]; then
-      msg2 "Applying BMQ agressive ondemand governor patch"
-      patch -Np1 -i "$srcdir"/0009-glitched-ondemand-bmq.patch
-    fi
-
-    msg2 "Applying Glitched BMQ patch"
-    patch -Np1 -i "$srcdir"/0009-glitched-bmq.patch
-
-  elif [ "${_cpusched}" = "cfs" ]; then
-    msg2 "Applying Glitched CFS patch"
-    patch -Np1 -i "$srcdir"/0003-glitched-cfs.patch
-  fi
-
-  if [ "${_distro}" = "Arch" ]; then
-    if [ -z "${_configfile}" ]; then
-    _configfile="config.x86_64"
-    fi
-
-    cat "${srcdir}/${_configfile}" > ./.config
-  fi
-
-
-  # Set some -tkg defaults
-  echo "# CONFIG_DYNAMIC_FAULT is not set" >> ./.config
-  sed -i -e 's/CONFIG_DEFAULT_FQ_CODEL=y/# CONFIG_DEFAULT_FQ_CODEL is not set/' ./.config
-  echo "CONFIG_DEFAULT_CAKE=y" >> ./.config
-  echo "CONFIG_NR_TTY_DEVICES=63" >> ./.config
-  echo "# CONFIG_NTP_PPS is not set" >> ./.config
-  sed -i -e 's/CONFIG_CRYPTO_LZ4=m/CONFIG_CRYPTO_LZ4=y/' ./.config
-  sed -i -e 's/CONFIG_CRYPTO_LZ4HC=m/CONFIG_CRYPTO_LZ4HC=y/' ./.config
-  sed -i -e 's/CONFIG_LZ4_COMPRESS=m/CONFIG_LZ4_COMPRESS=y/' ./.config
-  sed -i -e 's/CONFIG_LZ4HC_COMPRESS=m/CONFIG_LZ4HC_COMPRESS=y/' ./.config
-  sed -i -e 's/CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZO=y/# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZO is not set/' ./.config
-  sed -i -e 's/# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZ4 is not set/CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZ4=y/' ./.config
-  sed -i -e 's/CONFIG_ZSWAP_COMPRESSOR_DEFAULT="lzo"/CONFIG_ZSWAP_COMPRESSOR_DEFAULT="lz4"/' ./.config
-  #sed -i -e 's/CONFIG_RCU_BOOST_DELAY=500/CONFIG_RCU_BOOST_DELAY=0/' ./.config
-  sed -i -e 's/# CONFIG_CMDLINE_BOOL is not set/CONFIG_CMDLINE_BOOL=y/' ./.config
-  echo "CONFIG_CMDLINE=\"${_custom_commandline}\"" >> ./.config
-  echo "# CONFIG_CMDLINE_OVERRIDE is not set" >> ./.config
-  echo "# CONFIG_X86_P6_NOP is not set" >> ./.config
-
-  # openrgb
-  echo "CONFIG_I2C_NCT6775=m" >> ./.config
-
-  # ccache fix
-  if [ "$_noccache" != "true" ]; then
-    if { [ "$_distro" = "Arch" ] && pacman -Qq ccache &> /dev/null; } || { [ "$_distro" = "Ubuntu" ] && dpkg -l ccache > /dev/null; }; then
-      sed -i -e 's/CONFIG_GCC_PLUGINS=y/# CONFIG_GCC_PLUGINS is not set/' ./.config
-    fi
-  fi
-  # Skip dbg package creation on non-Arch
-  if [ "$_distro" != "Arch" ]; then
-    sed -i -e 's/CONFIG_DEBUG_INFO.*/CONFIG_DEBUG_INFO=n/' ./.config
-  fi
-
-  if [ "$_font_autoselect" != "false" ]; then
-    sed -i -e 's/CONFIG_FONT_TER16x32=y/# CONFIG_FONT_TER16x32 is not set\nCONFIG_FONT_AUTOSELECT=y/' ./.config
-  fi
-
-  # Inject cpuopts options
-  echo "# CONFIG_MK8SSE3 is not set" >> ./.config
-  echo "# CONFIG_MK10 is not set" >> ./.config
-  echo "# CONFIG_MBARCELONA is not set" >> ./.config
-  echo "# CONFIG_MBOBCAT is not set" >> ./.config
-  echo "# CONFIG_MJAGUAR is not set" >> ./.config
-  echo "# CONFIG_MBULLDOZER is not set" >> ./.config
-  echo "# CONFIG_MPILEDRIVER is not set" >> ./.config
-  echo "# CONFIG_MSTEAMROLLER is not set" >> ./.config
-  echo "# CONFIG_MEXCAVATOR is not set" >> ./.config
-  echo "# CONFIG_MZEN is not set" >> ./.config
-  echo "# CONFIG_MZEN2 is not set" >> ./.config
-  echo "# CONFIG_MATOM is not set" >> ./.config
-  echo "# CONFIG_MNEHALEM is not set" >> ./.config
-  echo "# CONFIG_MWESTMERE is not set" >> ./.config
-  echo "# CONFIG_MSILVERMONT is not set" >> ./.config
-  echo "# CONFIG_MSANDYBRIDGE is not set" >> ./.config
-  echo "# CONFIG_MIVYBRIDGE is not set" >> ./.config
-  echo "# CONFIG_MHASWELL is not set" >> ./.config
-  echo "# CONFIG_MBROADWELL is not set" >> ./.config
-  echo "# CONFIG_MSKYLAKE is not set" >> ./.config
-  echo "# CONFIG_MSKYLAKEX is not set" >> ./.config
-  echo "# CONFIG_MCANNONLAKE is not set" >> ./.config
-  echo "# CONFIG_MICELAKE is not set" >> ./.config
-  echo "# CONFIG_MGOLDMONT is not set" >> ./.config
-  echo "# CONFIG_MGOLDMONTPLUS is not set" >> ./.config
-  echo "# CONFIG_MCASCADELAKE is not set" >> ./.config
-  echo "# CONFIG_MCOOPERLAKE is not set" >> ./.config
-  echo "# CONFIG_MTIGERLAKE is not set" >> ./.config
-
-  # Disable some debugging
-  if [ "${_debugdisable}" = "true" ]; then
-    sed -i -e 's/CONFIG_SLUB_DEBUG=y/# CONFIG_SLUB_DEBUG is not set/' ./.config
-    sed -i -e 's/CONFIG_PM_DEBUG=y/# CONFIG_PM_DEBUG is not set/' ./.config
-    sed -i -e 's/CONFIG_PM_ADVANCED_DEBUG=y/# CONFIG_PM_ADVANCED_DEBUG is not set/' ./.config
-    sed -i -e 's/CONFIG_PM_SLEEP_DEBUG=y/# CONFIG_PM_SLEEP_DEBUG is not set/' ./.config
-    sed -i -e 's/CONFIG_ACPI_DEBUG=y/# CONFIG_ACPI_DEBUG is not set/' ./.config
-    sed -i -e 's/CONFIG_SCHED_DEBUG=y/# CONFIG_SCHED_DEBUG is not set/' ./.config
-    sed -i -e 's/CONFIG_LATENCYTOP=y/# CONFIG_LATENCYTOP is not set/' ./.config
-    sed -i -e 's/CONFIG_DEBUG_PREEMPT=y/# CONFIG_DEBUG_PREEMPT is not set/' ./.config
-  fi
-
-  if [ "${_cpusched}" = "MuQSS" ]; then
-    # MuQSS default config
-    echo "CONFIG_SCHED_MUQSS=y" >> ./.config
-  elif [ "${_cpusched}" = "upds" ]; then
-    # PDS default config
-    echo "CONFIG_SCHED_PDS=y" >> ./.config
-  elif [ "${_cpusched}" = "pds" ]; then
-    # PDS default config
-    echo "CONFIG_SCHED_ALT=y" >> ./.config
-    echo "CONFIG_SCHED_PDS=y" >> ./.config
-    echo "# CONFIG_SCHED_BMQ is not set" >> ./.config
-  elif [ "${_cpusched}" = "bmq" ]; then
-    # BMQ default config
-    echo "CONFIG_SCHED_ALT=y" >> ./.config
-    echo "CONFIG_SCHED_BMQ=y" >> ./.config
-    echo "# CONFIG_SCHED_PDS is not set" >> ./.config
-  fi
-
-  if [ "${_cpusched}" = "MuQSS" ] || [ "${_cpusched}" = "pds" ] || [ "${_cpusched}" = "bmq" ] || [ "${_cpusched}" = "upds" ]; then
-    # Disable CFS
-    sed -i -e 's/CONFIG_FAIR_GROUP_SCHED=y/# CONFIG_FAIR_GROUP_SCHED is not set/' ./.config
-    sed -i -e 's/CONFIG_CFS_BANDWIDTH=y/# CONFIG_CFS_BANDWIDTH is not set/' ./.config
-    # sched yield type
-    if [ -n "$_sched_yield_type" ]; then
-      CONDITION0="$_sched_yield_type"
-    else
-      plain ""
-      plain "CPU sched_yield_type - Choose what sort of yield sched_yield will perform."
-      plain ""
-      plain "For PDS and MuQSS:"
-      plain "0: No yield."
-      plain "1: Yield only to better priority/deadline tasks."
-      plain "2: Expire timeslice and recalculate deadline."
-      plain ""
-      plain "For BMQ (experimental) - No recommended value yet, so try for yourself x) :"
-      plain "0: No yield."
-      plain "1: Deboost and requeue task. (default)"
-      plain "2: Set rq skip task."
-      if [ "${_cpusched}" = "MuQSS" ]; then
-        read -rp "`echo $'\n      0. Supposedly best option for gaming performance - could lead to stability issues on some (AMD) platforms when combined with MuQSS\n    > 1. Default and recommended option for MuQSS - could lead to stability issues on some (Intel) platforms\n      2. Can be a good option with low rr_interval on MuQSS\n    [0-2?]: '`" CONDITION0;
-      else
-        read -rp "`echo $'\n    > 0. Recommended option for gaming on PDS - "tkg" default\n      1. Default, but can lead to stability issues on some platforms\n      2. Can be a good option with low rr_interval on MuQSS\n    [0-2?]: '`" CONDITION0;
-      fi
-    fi
-    if [ "$CONDITION0" = "0" ]; then
-      if [ "${_cpusched}" = "bmq" ] || [ "${_cpusched}" = "pds" ]; then
-        sed -i -e 's/int sched_yield_type __read_mostly = 1;/int sched_yield_type __read_mostly = 0;/' ./kernel/sched/alt_core.c
-      elif [ "${_cpusched}" = "upds" ]; then
-        sed -i -e 's/int sched_yield_type __read_mostly = 1;/int sched_yield_type __read_mostly = 0;/' ./kernel/sched/pds.c
-      else
-        sed -i -e 's/int sched_yield_type __read_mostly = 1;/int sched_yield_type __read_mostly = 0;/' ./kernel/sched/"${_cpusched}".c
-      fi
-    elif [ "$CONDITION0" = "1" ]; then
-      msg2 "Using default CPU sched yield type (1)"
-    elif [ "$CONDITION0" = "2" ]; then
-      if [ "${_cpusched}" = "bmq" ] || [ "${_cpusched}" = "pds" ]; then
-        sed -i -e 's/int sched_yield_type __read_mostly = 1;/int sched_yield_type __read_mostly = 2;/' ./kernel/sched/alt_core.c
-      elif [ "${_cpusched}" = "upds" ]; then
-        sed -i -e 's/int sched_yield_type __read_mostly = 1;/int sched_yield_type __read_mostly = 2;/' ./kernel/sched/pds.c
-      else
-        sed -i -e 's/int sched_yield_type __read_mostly = 1;/int sched_yield_type __read_mostly = 2;/' ./kernel/sched/"${_cpusched}".c
-      fi
-    else
-      if [ "${_cpusched}" = "MuQSS" ]; then
-        msg2 "Using default CPU sched yield type (1)"
-      elif [ "${_cpusched}" = "bmq" ] || [ "${_cpusched}" = "pds" ]; then
-        sed -i -e 's/int sched_yield_type __read_mostly = 1;/int sched_yield_type __read_mostly = 0;/' ./kernel/sched/alt_core.c
-      elif [ "${_cpusched}" = "upds" ]; then
-        sed -i -e 's/int sched_yield_type __read_mostly = 1;/int sched_yield_type __read_mostly = 0;/' ./kernel/sched/pds.c
-      else
-        sed -i -e 's/int sched_yield_type __read_mostly = 1;/int sched_yield_type __read_mostly = 0;/' ./kernel/sched/"${_cpusched}".c
-      fi
-    fi
-  fi
-
-  # Round Robin interval
-  if [ "${_cpusched}" = "MuQSS" ] || [ "${_cpusched}" = "pds" ] || [ "${_cpusched}" = "bmq" ] || [ "${_cpusched}" = "upds" ]; then
-    if [ -n "$_rr_interval" ]; then
-      CONDITION1="$_rr_interval"
-    else
-      plain ""
-      plain "Round Robin interval is the longest duration two tasks with the same nice level will"
-      plain "be delayed for. When CPU time is requested by a task, it receives a time slice equal"
-      plain "to the rr_interval in addition to a virtual deadline. When using yield_type 2, a low"
-      plain "value can help offset the disadvantages of rescheduling a process that has yielded."
-      plain ""
-      plain "MuQSS default: 6ms"
-      plain "PDS default: 4ms"
-      plain "BMQ default: 2ms"
-      read -rp "`echo $'\n    > 0.Keep defaults\n      1.2ms\n      2.4ms\n      3.6ms\n      4.8ms\n    [0-4?]: '`" CONDITION1;
-    fi
-    if [ "$CONDITION1" = "1" ]; then
-      msg2 "Using 2ms rr_interval"
-      _rrvalue="2"
-    elif [ "$CONDITION1" = "2" ]; then
-      msg2 "Using 4ms rr_interval"
-      _rrvalue="4"
-    elif [ "$CONDITION1" = "3" ]; then
-      msg2 "Using 6ms rr_interval"
-      _rrvalue="6"
-    elif [ "$CONDITION1" = "4" ]; then
-      msg2 "Using 8ms rr_interval"
-      _rrvalue="8"
-    else
-      msg2 "Using default rr_interval"
-      _rrvalue="default"
-    fi
-    if [ "$_rrvalue" != "default" ]; then
-      if [ "${_cpusched}" = "MuQSS" ]; then
-        sed -i -e "s/int rr_interval __read_mostly = 6;/int rr_interval __read_mostly = ${_rrvalue};/" ./kernel/sched/"${_cpusched}".c
-      elif [ "${_cpusched}" = "upds" ]; then
-        sed -i -e "s/#define SCHED_DEFAULT_RR (4)/#define SCHED_DEFAULT_RR (${_rrvalue})/" ./kernel/sched/pds.c
-      elif [ "${_cpusched}" = "bmq" ] || [ "${_cpusched}" = "pds" ]; then
-        sed -i -e "s/u64 sched_timeslice_ns __read_mostly = (4 * 1000 * 1000);/u64 sched_timeslice_ns __read_mostly = (${_rrvalue} * 1000 * 1000);/" ./kernel/sched/alt_core.c
-      fi
-    else
-      if [ "${_cpusched}" = "bmq" ] || [ "${_cpusched}" = "pds" ]; then
-        sed -i -e "s/u64 sched_timeslice_ns __read_mostly = (4 * 1000 * 1000);/u64 sched_timeslice_ns __read_mostly = (2 * 1000 * 1000);/" ./kernel/sched/alt_core.c
-      fi
-    fi
-  fi
-
-  # zenify
-  if [ "$_zenify" = "true" ]; then
-    echo "CONFIG_ZENIFY=y" >> ./.config
-  elif [ "$_zenify" = "false" ]; then
-    echo "# CONFIG_ZENIFY is not set" >> ./.config
-  fi
-
-  # compiler optimization level
-  if [ "$_compileroptlevel" = "1" ]; then
-    echo "# CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3 is not set" >> ./.config
-  elif [ "$_compileroptlevel" = "2" ]; then
-    sed -i -e 's/CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y/# CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE is not set/' ./.config
-    echo "CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y" >> ./.config
-  elif [ "$_compileroptlevel" = "3" ]; then
-    sed -i -e 's/CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y/# CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE is not set/' ./.config
-    sed -i -e 's/# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set/CONFIG_CC_OPTIMIZE_FOR_SIZE=y/' ./.config
-    echo "# CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3 is not set" >> ./.config
-  fi
-
-  # cpu opt
-  if [ -n "$_processor_opt" ] && [ "$_processor_opt" != "native" ]; then
-    echo "# CONFIG_MNATIVE is not set" >> ./.config
-  fi
-
-  if [ -n "$_processor_opt" ] && [ "$_processor_opt" != "generic" ]; then
-    sed -i -e 's/CONFIG_GENERIC_CPU=y/# CONFIG_GENERIC_CPU is not set/' ./.config
-  fi
-
-  if [ "$_processor_opt" = "native" ]; then
-    echo "CONFIG_MNATIVE=y" >> ./.config
-  elif [ "$_processor_opt" = "k8" ]; then
-    sed -i -e 's/# CONFIG_MK8 is not set/CONFIG_MK8=y/' ./.config
-  elif [ "$_processor_opt" = "k8sse3" ]; then
-    sed -i -e 's/# CONFIG_MK8SSE3 is not set/CONFIG_MK8SSE3=y/' ./.config
-  elif [ "$_processor_opt" = "k10" ]; then
-    sed -i -e 's/# CONFIG_MK10 is not set/CONFIG_MK10=y/' ./.config
-  elif [ "$_processor_opt" = "barcelona" ]; then
-    sed -i -e 's/# CONFIG_MBARCELONA is not set/CONFIG_MBARCELONA=y/' ./.config
-  elif [ "$_processor_opt" = "bobcat" ]; then
-    sed -i -e 's/# CONFIG_MBOBCAT is not set/CONFIG_MBOBCAT=y/' ./.config
-  elif [ "$_processor_opt" = "jaguar" ]; then
-    sed -i -e 's/# CONFIG_MJAGUAR is not set/CONFIG_MJAGUAR=y/' ./.config
-  elif [ "$_processor_opt" = "bulldozer" ]; then
-    sed -i -e 's/# CONFIG_MBULLDOZER is not set/CONFIG_MBULLDOZER=y/' ./.config
-  elif [ "$_processor_opt" = "piledriver" ]; then
-    sed -i -e 's/# CONFIG_MPILEDRIVER is not set/CONFIG_MPILEDRIVER=y/' ./.config
-  elif [ "$_processor_opt" = "steamroller" ]; then
-    sed -i -e 's/# CONFIG_MSTEAMROLLER is not set/CONFIG_MSTEAMROLLER=y/' ./.config
-  elif [ "$_processor_opt" = "excavator" ]; then
-    sed -i -e 's/# CONFIG_MEXCAVATOR is not set/CONFIG_MEXCAVATOR=y/' ./.config
-  elif [ "$_processor_opt" = "zen" ]; then
-    sed -i -e 's/# CONFIG_MZEN is not set/CONFIG_MZEN=y/' ./.config
-  elif [ "$_processor_opt" = "zen2" ]; then
-    sed -i -e 's/# CONFIG_MZEN2 is not set/CONFIG_MZEN2=y/' ./.config
-  elif [ "$_processor_opt" = "mpsc" ]; then
-    sed -i -e 's/# CONFIG_MPSC is not set/CONFIG_MPSC=y/' ./.config
-  elif [ "$_processor_opt" = "atom" ]; then
-    sed -i -e 's/# CONFIG_MATOM is not set/CONFIG_MATOM=y/' ./.config
-  elif [ "$_processor_opt" = "core2" ]; then
-    sed -i -e 's/# CONFIG_MCORE2 is not set/CONFIG_MCORE2=y/' ./.config
-  elif [ "$_processor_opt" = "nehalem" ]; then
-    sed -i -e 's/# CONFIG_MNEHALEM is not set/CONFIG_MNEHALEM=y/' ./.config
-  elif [ "$_processor_opt" = "westmere" ]; then
-    sed -i -e 's/# CONFIG_MWESTMERE is not set/CONFIG_MWESTMERE=y/' ./.config
-  elif [ "$_processor_opt" = "silvermont" ]; then
-    sed -i -e 's/# CONFIG_MSILVERMONT is not set/CONFIG_MSILVERMONT=y/' ./.config
-  elif [ "$_processor_opt" = "sandybridge" ]; then
-    sed -i -e 's/# CONFIG_MSANDYBRIDGE is not set/CONFIG_MSANDYBRIDGE=y/' ./.config
-  elif [ "$_processor_opt" = "ivybridge" ]; then
-    sed -i -e 's/# CONFIG_MIVYBRIDGE is not set/CONFIG_MIVYBRIDGE=y/' ./.config
-  elif [ "$_processor_opt" = "haswell" ]; then
-    sed -i -e 's/# CONFIG_MHASWELL is not set/CONFIG_MHASWELL=y/' ./.config
-  elif [ "$_processor_opt" = "broadwell" ]; then
-    sed -i -e 's/# CONFIG_MBROADWELL is not set/CONFIG_MBROADWELL=y/' ./.config
-  elif [ "$_processor_opt" = "skylake" ]; then
-    sed -i -e 's/# CONFIG_MSKYLAKE is not set/CONFIG_MSKYLAKE=y/' ./.config
-  elif [ "$_processor_opt" = "skylakex" ]; then
-    sed -i -e 's/# CONFIG_MSKYLAKEX is not set/CONFIG_MSKYLAKEX=y/' ./.config
-  elif [ "$_processor_opt" = "cannonlake" ]; then
-    sed -i -e 's/# CONFIG_MCANNONLAKE is not set/CONFIG_MCANNONLAKE=y/' ./.config
-  elif [ "$_processor_opt" = "icelake" ]; then
-    sed -i -e 's/# CONFIG_MICELAKE is not set/CONFIG_MICELAKE=y/' ./.config
-  elif [ "$_processor_opt" = "goldmont" ]; then
-    sed -i -e 's/# CONFIG_MGOLDMONT is not set/CONFIG_MGOLDMONT=y/' ./.config
-  elif [ "$_processor_opt" = "goldmontplus" ]; then
-    sed -i -e 's/# CONFIG_MGOLDMONTPLUS is not set/CONFIG_MGOLDMONTPLUS=y/' ./.config
-  elif [ "$_processor_opt" = "cascadelake" ]; then
-    sed -i -e 's/# CONFIG_MCASCADELAKE is not set/CONFIG_MCASCADELAKE=y/' ./.config
-  elif [ "$_processor_opt" = "cooperlake" ]; then
-    sed -i -e 's/# CONFIG_MCOOPERLAKE is not set/CONFIG_MCOOPERLAKE=y/' ./.config
-  elif [ "$_processor_opt" = "tigerlake" ]; then
-    sed -i -e 's/# CONFIG_MTIGERLAKE is not set/CONFIG_MTIGERLAKE=y/' ./.config
-  fi
-
-  # irq threading
-  if [ "$_irq_threading" = "true" ]; then
-    echo "CONFIG_FORCE_IRQ_THREADING=y" >> ./.config
-  elif [ "$_irq_threading" = "false" ]; then
-    echo "# CONFIG_FORCE_IRQ_THREADING is not set" >> ./.config
-  fi
-
-  # smt nice
-  if [ "$_smt_nice" = "true" ]; then
-    echo "CONFIG_SMT_NICE=y" >> ./.config
-  elif [ "$_smt_nice" = "false" ]; then
-    echo "# CONFIG_SMT_NICE is not set" >> ./.config
-  fi
-
-  # random trust cpu
-  if [ "$_random_trust_cpu" = "true" ]; then
-    sed -i -e 's/# CONFIG_RANDOM_TRUST_CPU is not set/CONFIG_RANDOM_TRUST_CPU=y/' ./.config
-  fi
-
-  # rq sharing
-  if [ "$_runqueue_sharing" = "none" ]; then
-    echo -e "CONFIG_RQ_NONE=y\n# CONFIG_RQ_SMT is not set\n# CONFIG_RQ_MC is not set\n# CONFIG_RQ_MC_LLC is not set\n# CONFIG_RQ_SMP is not set\n# CONFIG_RQ_ALL is not set" >> ./.config
-  elif [ -z "$_runqueue_sharing" ] || [ "$_runqueue_sharing" = "smt" ]; then
-    echo -e "# CONFIG_RQ_NONE is not set\nCONFIG_RQ_SMT=y\n# CONFIG_RQ_MC is not set\n# CONFIG_RQ_MC_LLC is not set\n# CONFIG_RQ_SMP is not set\n# CONFIG_RQ_ALL is not set" >> ./.config
-  elif [ "$_runqueue_sharing" = "mc" ]; then
-    echo -e "# CONFIG_RQ_NONE is not set\n# CONFIG_RQ_SMT is not set\nCONFIG_RQ_MC=y\n# CONFIG_RQ_MC_LLC is not set\n# CONFIG_RQ_SMP is not set\n# CONFIG_RQ_ALL is not set" >> ./.config
-  elif [ "$_runqueue_sharing" = "smp" ]; then
-    echo -e "# CONFIG_RQ_NONE is not set\n# CONFIG_RQ_SMT is not set\n# CONFIG_RQ_MC is not set\n# CONFIG_RQ_MC_LLC is not set\nCONFIG_RQ_SMP=y\n# CONFIG_RQ_ALL is not set" >> ./.config
-  elif [ "$_runqueue_sharing" = "all" ]; then
-    echo -e "# CONFIG_RQ_NONE is not set\n# CONFIG_RQ_SMT is not set\n# CONFIG_RQ_MC is not set\n# CONFIG_RQ_MC_LLC is not set\n# CONFIG_RQ_SMP is not set\nCONFIG_RQ_ALL=y" >> ./.config
-  elif [ "$_runqueue_sharing" = "mc-llc" ]; then
-    echo -e "# CONFIG_RQ_NONE is not set\n# CONFIG_RQ_SMT is not set\n# CONFIG_RQ_MC is not set\nCONFIG_RQ_MC_LLC=y\n# CONFIG_RQ_SMP is not set\n# CONFIG_RQ_ALL is not set" >> ./.config
-  fi
-
-  # timer freq
-  if [ -n "$_timer_freq" ] && [ "$_timer_freq" != "300" ]; then
-    sed -i -e 's/CONFIG_HZ_300=y/# CONFIG_HZ_300 is not set/' ./.config
-    sed -i -e 's/CONFIG_HZ_300_NODEF=y/# CONFIG_HZ_300_NODEF is not set/' ./.config
-    if [ "$_timer_freq" = "1000" ]; then
-      sed -i -e 's/# CONFIG_HZ_1000 is not set/CONFIG_HZ_1000=y/' ./.config
-      sed -i -e 's/CONFIG_HZ=300/CONFIG_HZ=1000/' ./.config
-      echo "# CONFIG_HZ_500 is not set" >> ./.config
-      echo "# CONFIG_HZ_500_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_750 is not set" >> ./.config
-      echo "# CONFIG_HZ_750_NODEF is not set" >> ./.config
-      echo "CONFIG_HZ_1000_NODEF=y" >> ./.config
-      echo "# CONFIG_HZ_250_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_300_NODEF is not set" >> ./.config
-    elif [ "$_timer_freq" = "750" ]; then
-      sed -i -e 's/CONFIG_HZ=300/CONFIG_HZ=750/' ./.config
-      echo "# CONFIG_HZ_500 is not set" >> ./.config
-      echo "# CONFIG_HZ_500_NODEF is not set" >> ./.config
-      echo "CONFIG_HZ_750=y" >> ./.config
-      echo "CONFIG_HZ_750_NODEF=y" >> ./.config
-      echo "# CONFIG_HZ_1000_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_250_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_300_NODEF is not set" >> ./.config
-    elif [ "$_timer_freq" = "500" ]; then
-      sed -i -e 's/CONFIG_HZ=300/CONFIG_HZ=500/' ./.config
-      echo "CONFIG_HZ_500=y" >> ./.config
-      echo "CONFIG_HZ_500_NODEF=y" >> ./.config
-      echo "# CONFIG_HZ_750 is not set" >> ./.config
-      echo "# CONFIG_HZ_750_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_1000_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_250_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_300_NODEF is not set" >> ./.config
-    elif [ "$_timer_freq" = "100" ]; then
-      sed -i -e 's/CONFIG_HZ=300/CONFIG_HZ=100/' ./.config
-      echo "# CONFIG_HZ_500 is not set" >> ./.config
-      echo "# CONFIG_HZ_750 is not set" >> ./.config
-      echo "# CONFIG_HZ_1000_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_750_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_500_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_250_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_300_NODEF is not set" >> ./.config
-      echo "CONFIG_HZ_100=y" >> ./.config
-      echo "CONFIG_HZ_100_NODEF=y" >> ./.config
-    fi
-  elif [ "${_cpusched}" = "MuQSS" ] && [ -z "$_timer_freq" ]; then
-      sed -i -e 's/CONFIG_HZ=300/CONFIG_HZ=100/' ./.config
-      echo "# CONFIG_HZ_500 is not set" >> ./.config
-      echo "# CONFIG_HZ_750 is not set" >> ./.config
-      echo "# CONFIG_HZ_1000_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_750_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_500_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_250_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_300_NODEF is not set" >> ./.config
-      echo "CONFIG_HZ_100=y" >> ./.config
-      echo "CONFIG_HZ_100_NODEF=y" >> ./.config
-  else
-    sed -i -e 's/CONFIG_HZ_300=y/# CONFIG_HZ_300 is not set/' ./.config
-    sed -i -e 's/CONFIG_HZ_300_NODEF=y/# CONFIG_HZ_300_NODEF is not set/' ./.config
-    sed -i -e 's/CONFIG_HZ=300/CONFIG_HZ=500/' ./.config
-    echo "CONFIG_HZ_500=y" >> ./.config
-    echo "CONFIG_HZ_500_NODEF=y" >> ./.config
-    echo "# CONFIG_HZ_250_NODEF is not set" >> ./.config
-    echo "# CONFIG_HZ_300_NODEF is not set" >> ./.config
-  fi
-
-  # default cpu gov
-  if [ "$_default_cpu_gov" = "performance" ]; then
-    sed -i -e 's/CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL=y/# CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL is not set/' ./.config
-    sed -i -e 's/# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set/CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y/' ./.config
-  elif [ "$_default_cpu_gov" = "ondemand" ]; then
-    sed -i -e 's/CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL=y/# CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL is not set/' ./.config
-    sed -i -e 's/# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set/CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y/' ./.config
-  fi
-
-  # ACPI_CPUFREQ disablement
-  if [ "$_disable_acpi_cpufreq" = "true" ]; then
-    sed -i -e 's/CONFIG_X86_ACPI_CPUFREQ=m/# CONFIG_X86_ACPI_CPUFREQ is not set/' ./.config
-  fi
-
-  # ftrace
-  if [ -z "$_ftracedisable" ]; then
-    plain ""
-    plain "Disable FUNCTION_TRACER/GRAPH_TRACER? Lowers overhead but limits debugging"
-    plain "and analyzing of kernel functions."
-    read -rp "`echo $'    > N/y : '`" CONDITION2;
-  fi
-  if [[ "$CONDITION2" =~ [yY] ]] || [ "$_ftracedisable" = "true" ]; then
-    sed -i -e 's/CONFIG_FUNCTION_TRACER=y/# CONFIG_FUNCTION_TRACER is not set/' ./.config
-    sed -i -e 's/CONFIG_FUNCTION_GRAPH_TRACER=y/# CONFIG_FUNCTION_GRAPH_TRACER is not set/' ./.config
-  fi
-
-  # disable numa
-  if [ -z "$_numadisable" ]; then
-    plain ""
-    plain "Disable NUMA? Lowers overhead, but breaks CUDA/NvEnc on Nvidia if disabled."
-    plain "https://bbs.archlinux.org/viewtopic.php?id=239174"
-    read -rp "`echo $'    > N/y : '`" CONDITION3;
-  fi
-  if [[ "$CONDITION3" =~ [yY] ]] || [ "$_numadisable" = "true" ]; then
-    # disable NUMA since 99.9% of users do not have multiple CPUs but do have multiple cores in one CPU
-    sed -i -e 's/CONFIG_NUMA=y/# CONFIG_NUMA is not set/' \
-        -i -e '/CONFIG_AMD_NUMA=y/d' \
-        -i -e '/CONFIG_X86_64_ACPI_NUMA=y/d' \
-        -i -e '/CONFIG_NODES_SPAN_OTHER_NODES=y/d' \
-        -i -e '/# CONFIG_NUMA_EMU is not set/d' \
-        -i -e '/CONFIG_NODES_SHIFT=6/d' \
-        -i -e '/CONFIG_NEED_MULTIPLE_NODES=y/d' \
-        -i -e '/CONFIG_USE_PERCPU_NUMA_NODE_ID=y/d' \
-        -i -e '/CONFIG_ACPI_NUMA=y/d' ./.config
-  fi
-
-  # tickless
-  if [ -z "$_tickless" ]; then
-    plain ""
-    plain "Use CattaRappa mode (Tickless/Dynticks) ?"
-    plain "Can give higher performances in many cases but lower consistency on some hardware."
-    plain "Just tickless idle can perform better with some platforms (mostly AMD) or CPU schedulers (mostly MuQSS)."
-    if [ "${_cpusched}" = "MuQSS" ]; then
-      read -rp "`echo $'\n      0.No, use periodic ticks\n      1.Yes, full tickless baby!\n    > 2.Just tickless idle plz\n    [0-2?]: '`" CONDITION4;
-    else
-      read -rp "`echo $'\n      0.No, use periodic ticks\n    > 1.Yes, full tickless baby!\n      2.Just tickless idle plz\n    [0-2?]: '`" CONDITION4;
-    fi
-  fi
-  if [ "$CONDITION4" = "0" ] || [ "$_tickless" = "0" ]; then
-    echo "# CONFIG_NO_HZ_FULL_NODEF is not set" >> ./.config
-    sed -i -e 's/# CONFIG_HZ_PERIODIC is not set/CONFIG_HZ_PERIODIC=y/' ./.config
-    sed -i -e 's/CONFIG_NO_HZ_IDLE=y/# CONFIG_NO_HZ_IDLE is not set/' ./.config
-    sed -i -e 's/CONFIG_NO_HZ_FULL=y/# CONFIG_NO_HZ_FULL is not set/' ./.config
-    sed -i -e 's/CONFIG_NO_HZ=y/# CONFIG_NO_HZ is not set/' ./.config
-    sed -i -e 's/CONFIG_NO_HZ_COMMON=y/# CONFIG_NO_HZ_COMMON is not set/' ./.config
-  elif [ "$CONDITION4" = "2" ] || [ "$_tickless" = "2" ]; then
-    echo "# CONFIG_NO_HZ_FULL_NODEF is not set" >> ./.config
-    sed -i -e 's/CONFIG_HZ_PERIODIC=y/# CONFIG_HZ_PERIODIC is not set/' ./.config
-    sed -i -e 's/# CONFIG_NO_HZ_IDLE is not set/CONFIG_NO_HZ_IDLE=y/' ./.config
-    sed -i -e 's/CONFIG_NO_HZ_FULL=y/# CONFIG_NO_HZ_FULL is not set/' ./.config
-    sed -i -e 's/# CONFIG_NO_HZ is not set/CONFIG_NO_HZ=y/' ./.config
-    sed -i -e 's/# CONFIG_NO_HZ_COMMON is not set/CONFIG_NO_HZ_COMMON=y/' ./.config
-  else
-    if [ "${_cpusched}" = "MuQSS" ]; then
-      echo "# CONFIG_NO_HZ_FULL_NODEF is not set" >> ./.config
-      sed -i -e 's/CONFIG_HZ_PERIODIC=y/# CONFIG_HZ_PERIODIC is not set/' ./.config
-      sed -i -e 's/# CONFIG_NO_HZ_IDLE is not set/CONFIG_NO_HZ_IDLE=y/' ./.config
-      sed -i -e 's/CONFIG_NO_HZ_FULL=y/# CONFIG_NO_HZ_FULL is not set/' ./.config
-      sed -i -e 's/# CONFIG_NO_HZ is not set/CONFIG_NO_HZ=y/' ./.config
-      sed -i -e 's/# CONFIG_NO_HZ_COMMON is not set/CONFIG_NO_HZ_COMMON=y/' ./.config
-    else
-      echo "CONFIG_NO_HZ_FULL_NODEF=y" >> ./.config
-      sed -i -e 's/CONFIG_HZ_PERIODIC=y/# CONFIG_HZ_PERIODIC is not set/' ./.config
-      sed -i -e 's/CONFIG_NO_HZ_IDLE=y/# CONFIG_NO_HZ_IDLE is not set/' ./.config
-      sed -i -e 's/# CONFIG_NO_HZ_FULL is not set/CONFIG_NO_HZ_FULL=y/' ./.config
-      sed -i -e 's/# CONFIG_NO_HZ is not set/CONFIG_NO_HZ=y/' ./.config
-      sed -i -e 's/# CONFIG_NO_HZ_COMMON is not set/CONFIG_NO_HZ_COMMON=y/' ./.config
-      echo "CONFIG_CONTEXT_TRACKING=y" >> ./.config
-      echo "# CONFIG_CONTEXT_TRACKING_FORCE is not set" >> ./.config
-    fi
-  fi
-
-  # voluntary preempt
-  if [ -z "$_voluntary_preempt" ]; then
-    plain ""
-    plain "Use explicit preemption points?"
-    plain "It can improve latency on PDS (at the cost of throughput)"
-    plain "and improve throughput on other schedulers (at the cost of latency)"
-    read -rp "`echo $'    > N/y : '`" CONDITION5;
-  fi
-  if [[ "$CONDITION5" =~ [yY] ]] || [ "$_voluntary_preempt" = "true" ]; then
-    sed -i -e 's/CONFIG_PREEMPT=y/# CONFIG_PREEMPT is not set/' ./.config
-    sed -i -e 's/CONFIG_PREEMPT_LL=y/# CONFIG_PREEMPT_LL is not set/' ./.config
-    sed -i -e 's/# CONFIG_PREEMPT_VOLUNTARY is not set/CONFIG_PREEMPT_VOLUNTARY=y/' ./.config
-  fi
-
-  # Open Firmware support
-  if [ -z "$_OFenable" ]; then
-    plain ""
-    plain "Enable Device Tree and Open Firmware support?"
-    read -rp "`echo $'    > N/y : '`" CONDITION6;
-  fi
-  if [[ "$CONDITION6" =~ [yY] ]] || [ "$_OFenable" = "true" ]; then
-    sed -i -e 's/# CONFIG_OF is not set/CONFIG_OF=y/' ./.config
-  fi
-
-  # acs override
-  if [ -z "$_acs_override" ]; then
-    plain ""
-    plain "Use ACS override patch?"
-    plain "https://wiki.archlinux.org/index.php/PCI_passthrough_via_OVMF#Bypassing_the_IOMMU_groups_.28ACS_override_patch.29"
-    read -rp "`echo $'    > N/y : '`" CONDITION7;
-  fi
-  if [[ "$CONDITION7" =~ [yY] ]] || [ "$_acs_override" = "true" ]; then
-    msg2 "Patching ACS override"
-    patch -Np1 -i "$srcdir"/0006-add-acs-overrides_iommu.patch
-  fi
-
-  # bcachefs
-  if [ -z "$_bcachefs" ]; then
-     plain ""
-     plain "Add Bcache filesystem support? You'll have to install bcachefs-tools-git from AUR for utilities."
-     plain "https://bcachefs.org/"
-     read -rp "`echo $'    > N/y : '`" CONDITION8;
-  fi
-  if [[ "$CONDITION8" =~ [yY] ]] || [ "$_bcachefs" = "true" ]; then
-     msg2 "Patching Bcache filesystem support override"
-     patch -Np1 -i "$srcdir"/0008-5.8-bcachefs.patch
-     echo "CONFIG_BCACHEFS_FS=m" >> ./.config
-     echo "CONFIG_BCACHEFS_QUOTA=y" >> ./.config
-     echo "CONFIG_BCACHEFS_POSIX_ACL=y" >> ./.config
-     echo "# CONFIG_BCACHEFS_DEBUG is not set" >> ./.config
-     echo "# CONFIG_BCACHEFS_TESTS is not set" >> ./.config
-     echo "# CONFIG_DEBUG_CLOSURES is not set" >> ./.config
-  fi
-
-  # fsync support
-  if [ -z "$_fsync" ]; then
-    plain ""
-    plain "Enable support for fsync, an experimental replacement for esync in Valve Proton 4.11+"
-    plain "https://steamcommunity.com/games/221410/announcements/detail/2957094910196249305"
-    read -rp "`echo $'    > N/y : '`" CONDITION9;
-  fi
-  if [[ "$CONDITION9" =~ [yY] ]] || [ "$_fsync" = "true" ]; then
-    msg2 "Patching Fsync support"
-    patch -Np1 -i "$srcdir"/0007-v5.8-fsync.patch
-  fi
-
-  # ZFS fix
-  if [ -z "$_zfsfix" ]; then
-    plain ""
-    plain "Add back missing symbol for AES-NI/AVX support on ZFS"
-    plain "https://github.com/NixOS/nixpkgs/blob/master/pkgs/os-specific/linux/kernel/export_kernel_fpu_functions_5_3.patch"
-    read -rp "`echo $'    > N/y : '`" CONDITION11;
-  fi
-  if [[ "$CONDITION11" =~ [yY] ]] || [ "$_zfsfix" = "true" ]; then
-    msg2 "Patching missing symbol for AES-NI/AVX support on ZFS"
-    patch -Np1 -i "$srcdir"/0011-ZFS-fix.patch
-  fi
-
-  # Community patches
-  if [ -n "$_community_patches" ]; then
-    if [ ! -d "$_where/../../community-patches" ]; then
-      cd "$_where/../.." && git clone https://github.com/Frogging-Family/community-patches.git && cd "${srcdir}/${_srcpath}"
-    fi
-    _community_patches=($_community_patches)
-    for _p in ${_community_patches[@]}; do
-      ln -s "$_where"/../../community-patches/linux"$_basever"-tkg/$_p "$_where"/
-    done
-  fi
-
-  # userpatches
-  if [ "$_user_patches" = "true" ]; then
-    _userpatch_target="linux-${_basekernel}"
-    _userpatch_ext="my"
-    user_patcher
-  fi
-
-  # Community patches removal
-  for _p in ${_community_patches[@]}; do
-    rm -f "$_where"/$_p
-  done
-
-  if [ "$_distro" = "Arch" ]; then
-    # don't run depmod on 'make install'. We'll do this ourselves in packaging
-    sed -i '2iexit 0' scripts/depmod.sh
-
-    # get kernel version
-    make prepare
-  fi
-
-  # modprobed-db
-  if [ -z "$_modprobeddb" ]; then
-    plain ""
-    plain "Use modprobed db to clean config from unneeded modules?"
-    plain "Speeds up compilation considerably. Requires root."
-    plain "https://wiki.archlinux.org/index.php/Modprobed-db"
-    plain "!!!! Make sure to have a well populated db !!!!"
-    read -rp "`echo $'    > N/y : '`" CONDITIONMPDB;
-  fi
-  if [[ "$CONDITIONMPDB" =~ [yY] ]] || [ "$_modprobeddb" = "true" ]; then
-    sudo modprobed-db recall
-    yes "" | make localmodconfig
-  fi
-
-  if [ true = "$_config_fragments" ]; then
-    local fragments=()
-    mapfile -d '' -t fragments < <(find "$_where"/ -type f -name "*.myfrag" -print0)
-
-    if [ true = "$_config_fragments_no_confirm" ]; then
-      printf 'Using config fragment %s\n' "${fragments[@]#$_where/}"
-    else
-      for i in "${!fragments[@]}"; do
-        while true; do
-          read -r -p 'Found config fragment '"${fragments[$i]#$_where/}"', apply it? [y/N] ' CONDITIONMPDB
-          CONDITIONMPDB="$(printf '%s' "$CONDITIONMPDB" | tr '[:upper:]' '[:lower:]')"
-          case "$CONDITIONMPDB" in
-            y|yes)
-              break;;
-            n|no|'')
-              unset fragments[$i]
-              break;;
-            *)
-              echo 'Please answer with yes or no'
-          esac
-        done
-      done
-    fi
-
-    if [ 0 -lt "${#fragments[@]}" ]; then
-      scripts/kconfig/merge_config.sh -m .config "${fragments[@]}"
-    fi
-  fi
-
-  # menuconfig / nconfig
-  if [ -z "$_menunconfig" ]; then
-    plain ""
-    plain "*Optional* For advanced users - Do you want to use make menuconfig or nconfig"
-    plain "to configure the kernel before building it?"
-    plain "If you do, make sure your terminal is currently"
-    plain "at least 19 lines by 80 columns large or you'll get an error :D"
-    read -rp "`echo $'    > 0. nope\n      1. menuconfig\n      2. nconfig\n      3. xconfig\n      choice[0-3?]: '`" CONDITIONMNC;
-    _menunconfig="$CONDITIONMNC"
-  fi
-  if [ 1 = "$_menunconfig" ]; then
-    cp .config .config.orig
-    make menuconfig
-  elif [ 2 = "$_menunconfig" ]; then
-    cp .config .config.orig
-    make nconfig
-  elif [ 3 = "$_menunconfig" ]; then
-    cp .config .config.orig
-    make xconfig
-  else
-    # rewrite configuration
-    yes "" | make config >/dev/null
-  fi
-  if [ 1 = "$_menunconfig" ] || [ 2 = "$_menunconfig" ] || [ 3 = "$_menunconfig" ]; then
-    if [ -z "${_diffconfig}" ]; then
-      while true; do
-        read -r -p 'Generate a config fragment from your changes? [y/N] ' CONDITIONF
-        CONDITIONF="$(printf '%s' "$CONDITIONF" | tr '[:upper:]' '[:lower:]')"
-        case "$CONDITIONF" in
-          y|yes)
-            _diffconfig=true
-            break;;
-          n|no|'')
-            _diffconfig=false
-            break;;
-          *)
-            echo 'Please answer with yes or no'
-        esac
-      done
-    fi
-    if [ true = "$_diffconfig" ]; then
-      if [ -z "$_diffconfig_name" ]; then
-        IFS= read -r -p 'Filename for the config fragment [leave empty to not generate fragment]: ' _diffconfig_name
-      fi
-      if [ -z "$_diffconfig_name" ]; then
-        echo 'No file name given, not generating config fragment.'
-      else (
-        prev_pwd="${PWD:-$(pwd)}"
-        cd "$_where"
-        "${prev_pwd}/scripts/diffconfig" -m "${prev_pwd}/.config.orig" "${prev_pwd}/.config" > "$_diffconfig_name"
-      ) fi
-    fi
-    rm .config.orig
-  fi
-
-  if [ "$_distro" = "Arch" ]; then
-    make -s kernelrelease > version
-    msg2 "Prepared %s version %s" "$pkgbase" "$(<version)"
-  fi
-}
-
-exit_cleanup() {
-  # Remove state tracker
-  rm -f "$_where"/cpuschedset
-
-  # Remove temporarily copied files
-  rm -rf "$_where"/*.patch
-  rm -rf "$_where"/*-profile.cfg
-  rm -f "$_where"/config*
-  rm -f "$_where"/*.hook
-  rm -f "$_where"/cleanup
-  rm -f "$_where"/prepare
-
-  # Community patches removal in case of failure
-  for _p in ${_community_patches[@]}; do
-    rm -f "$_where"/"$_p"
-  done
-
-  if [ "${_distro}" = "Arch" ]; then
-    if [ "$_NUKR" = "true" ] && [ "$_where" != "$srcdir" ]; then
-      rm -rf "$_where"/src/*
-      # Double tap
-      rm -rf "$srcdir"/linux-*
-      rm -rf "$srcdir"/*.xz
-      rm -rf "$srcdir"/*.patch
-      rm -rf "$srcdir"/*-profile.cfg
-      rm -f "$srcdir"/config.x86_64
-      rm -f "$srcdir"/customization.cfg
-    else
-      # Meh
-      rm -rf "$srcdir"/linux-${_basekernel}/Documentation/filesystems/aufs/*
-      rm -f "$srcdir"/linux-${_basekernel}/Documentation/ABI/testing/*-aufs
-      rm -rf "$srcdir"/linux-${_basekernel}/fs/aufs/*
-      rm -f "$srcdir"/linux-${_basekernel}/include/uapi/linux/aufs*
-
-      rm -f "$srcdir"/linux-${_basekernel}/mm/prfile.c
-
-      rm -f "$srcdir"/linux-${_basekernel}/block/bfq*
-
-      rm -rf "$srcdir"/linux-${_basekernel}/drivers/scsi/vhba/*
-
-      rm -rf "$srcdir"/linux-${_basekernel}/fs/exfat/*
-      rm -f "$srcdir"/linux-${_basekernel}/include/trace/events/fs.h
-
-      rm -f "$srcdir"/linux-${_basekernel}/Documentation/scheduler/sched-PDS-mq.txt
-      rm -f "$srcdir"/linux-${_basekernel}/include/linux/skip_list.h
-      rm -f "$srcdir"/linux-${_basekernel}/kernel/sched/pds.c
-      rm -f "$srcdir"/linux-${_basekernel}/kernel/sched/pds_sched.h
-
-      rm -f "$srcdir"/linux-${_basekernel}/Documentation/scheduler/sched-BMQ.txt
-      rm -f "$srcdir"/linux-${_basekernel}/kernel/sched/alt_core.c
-      rm -f "$srcdir"/linux-${_basekernel}/kernel/sched/sched/alt_debug.c
-      rm -f "$srcdir"/linux-${_basekernel}/kernel/sched/alt_sched.h
-
-      rm -f "$srcdir"/linux-${_basekernel}/Documentation/scheduler/sched-BFS.txt
-      rm -f "$srcdir"/linux-${_basekernel}/Documentation/scheduler/sched-MuQSS.txt
-      rm -rf "$srcdir"/linux-${_basekernel}/arch/blackfin/*
-      rm -f "$srcdir"/linux-${_basekernel}/arch/powerpc/configs/c2k_defconfig
-      rm -f "$srcdir"/linux-${_basekernel}/arch/score/configs/spct6600_defconfig
-      rm -f "$srcdir"/linux-${_basekernel}/arch/tile/configs/tilegx_defconfig
-      rm -f "$srcdir"/linux-${_basekernel}/arch/tile/configs/tilepro_defconfig
-      rm -f "$srcdir"/linux-${_basekernel}/drivers/staging/lustre/lnet/lnet/lib-eq.c
-      rm -f "$srcdir"/linux-${_basekernel}/kernel/sched/MuQSS*
-      rm -f "$srcdir"/linux-${_basekernel}/kernel/skip_list.c
-
-      rm -f "$srcdir"/linux-${_basekernel}/Documentation/vm/uksm.txt
-      rm -f "$srcdir"/linux-${_basekernel}/include/linux/sradix-tree.h
-      rm -f "$srcdir"/linux-${_basekernel}/include/linux/uksm.h
-      rm -f "$srcdir"/linux-${_basekernel}/lib/sradix-tree.c
-      rm -f "$srcdir"/linux-${_basekernel}/mm/uksm.c
-    fi
-      
-    remove_deps
-  fi
-
-  msg2 'exit cleanup done\n'
-  if [ -n "$_runtime" ]; then
-    msg2 "compilation time : \n$_runtime"
-  fi
-}
-
-trap exit_cleanup EXIT
diff --git a/linux58-tkg/linux58-tkg-config/ryzen-desktop-profile.cfg b/linux58-tkg/linux58-tkg-config/ryzen-desktop-profile.cfg
deleted file mode 100644
index 66f160f..0000000
--- a/linux58-tkg/linux58-tkg-config/ryzen-desktop-profile.cfg
+++ /dev/null
@@ -1,38 +0,0 @@
-# linux58-TkG config file
-# Ryzen Desktop
-
-
-#### KERNEL OPTIONS ####
-
-# Disable some non-module debugging - See PKGBUILD for the list
-_debugdisable="false"
-
-# LEAVE AN EMPTY VALUE TO BE PROMPTED ABOUT FOLLOWING OPTIONS AT BUILD TIME
-
-# Set to "true" to disable FUNCTION_TRACER/GRAPH_TRACER, lowering overhead but limiting debugging and analyzing of kernel functions - Kernel default is "false"
-_ftracedisable="false"
-
-# Set to "true" to disable NUMA, lowering overhead, but breaking CUDA/NvEnc on Nvidia equipped systems - Kernel default is "false"
-_numadisable="false"
-
-# Set to "true" to use explicit preemption points to lower latency at the cost of a small throughput loss - Can give a nice perf boost in VMs - Kernel default is "false"
-_voluntary_preempt="false"
-
-# A selection of patches from Zen/Liquorix kernel and additional tweaks for a better gaming experience (ZENIFY) - Default is "true"
-_zenify="true"
-
-# compiler optimization level - 1. Optimize for performance (-O2); 2. Optimize harder (-O3); 3. Optimize for size (-Os) - Kernel default is "2"
-_compileroptlevel="1"
-
-# Trust the CPU manufacturer to initialize Linux's CRNG (RANDOM_TRUST_CPU) - Kernel default is "false"
-_random_trust_cpu="false"
-
-# CPU scheduler runqueue sharing - No sharing (RQ_NONE), SMT (hyperthread) siblings (RQ_SMT), Multicore siblings (RQ_MC), Symmetric Multi-Processing (RQ_SMP), NUMA (RQ_ALL)
-# Valid values are "none", "smt", "mc", "mc-llc"(for zen), "smp", "all" - Kernel default is "mc"
-_runqueue_sharing="mc-llc"
-
-# Timer frequency - "500", "750" or "1000" - More options available in kernel config prompt when left empty depending on selected cpusched - Kernel default is "500"
-_timer_freq="500"
-
-# Default CPU governor - "performance", "ondemand" (tweaked), "schedutil" or leave empty for default (schedutil on AMD and legacy Intel, intel_pstate on modern Intel) - Enforcing an option will disable intel_pstate altogether!
-_default_cpu_gov="performance"
diff --git a/linux58-tkg/linux58-tkg-patches/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch b/linux58-tkg/linux58-tkg-patches/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch
deleted file mode 100644
index 83240cb..0000000
--- a/linux58-tkg/linux58-tkg-patches/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch
+++ /dev/null
@@ -1,156 +0,0 @@
-From 5ec2dd3a095442ec1a21d86042a4994f2ba24e63 Mon Sep 17 00:00:00 2001
-Message-Id: <5ec2dd3a095442ec1a21d86042a4994f2ba24e63.1512651251.git.jan.steffens@gmail.com>
-From: Serge Hallyn <serge.hallyn@canonical.com>
-Date: Fri, 31 May 2013 19:12:12 +0100
-Subject: [PATCH] add sysctl to disallow unprivileged CLONE_NEWUSER by default
-
-Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com>
-[bwh: Remove unneeded binary sysctl bits]
-Signed-off-by: Daniel Micay <danielmicay@gmail.com>
----
- kernel/fork.c           | 15 +++++++++++++++
- kernel/sysctl.c         | 12 ++++++++++++
- kernel/user_namespace.c |  3 +++
- 3 files changed, 30 insertions(+)
-
-diff --git a/kernel/fork.c b/kernel/fork.c
-index 07cc743698d3668e..4011d68a8ff9305c 100644
---- a/kernel/fork.c
-+++ b/kernel/fork.c
-@@ -102,6 +102,11 @@
- 
- #define CREATE_TRACE_POINTS
- #include <trace/events/task.h>
-+#ifdef CONFIG_USER_NS
-+extern int unprivileged_userns_clone;
-+#else
-+#define unprivileged_userns_clone 0
-+#endif
- 
- /*
-  * Minimum number of threads to boot the kernel
-@@ -1555,6 +1560,10 @@ static __latent_entropy struct task_struct *copy_process(
- 	if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
- 		return ERR_PTR(-EINVAL);
- 
-+	if ((clone_flags & CLONE_NEWUSER) && !unprivileged_userns_clone)
-+		if (!capable(CAP_SYS_ADMIN))
-+			return ERR_PTR(-EPERM);
-+
- 	/*
- 	 * Thread groups must share signals as well, and detached threads
- 	 * can only be started up within the thread group.
-@@ -2348,6 +2357,12 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
- 	if (unshare_flags & CLONE_NEWNS)
- 		unshare_flags |= CLONE_FS;
- 
-+	if ((unshare_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) {
-+		err = -EPERM;
-+		if (!capable(CAP_SYS_ADMIN))
-+			goto bad_unshare_out;
-+	}
-+
- 	err = check_unshare_flags(unshare_flags);
- 	if (err)
- 		goto bad_unshare_out;
-diff --git a/kernel/sysctl.c b/kernel/sysctl.c
-index b86520ed3fb60fbf..f7dab3760839f1a1 100644
---- a/kernel/sysctl.c
-+++ b/kernel/sysctl.c
-@@ -105,6 +105,9 @@ extern int core_uses_pid;
- 
- #if defined(CONFIG_SYSCTL)
- 
-+#ifdef CONFIG_USER_NS
-+extern int unprivileged_userns_clone;
-+#endif
- /* Constants used for minimum and  maximum */
- #ifdef CONFIG_LOCKUP_DETECTOR
- static int sixty = 60;
-@@ -513,6 +516,15 @@ static struct ctl_table kern_table[] = {
- 		.proc_handler	= proc_dointvec,
- 	},
- #endif
-+#ifdef CONFIG_USER_NS
-+	{
-+		.procname	= "unprivileged_userns_clone",
-+		.data		= &unprivileged_userns_clone,
-+		.maxlen		= sizeof(int),
-+		.mode		= 0644,
-+		.proc_handler	= proc_dointvec,
-+	},
-+#endif
- #ifdef CONFIG_PROC_SYSCTL
- 	{
- 		.procname	= "tainted",
-diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
-index c490f1e4313b998a..dd03bd39d7bf194d 100644
---- a/kernel/user_namespace.c
-+++ b/kernel/user_namespace.c
-@@ -24,6 +24,9 @@
- #include <linux/projid.h>
- #include <linux/fs_struct.h>
- 
-+/* sysctl */
-+int unprivileged_userns_clone;
-+
- static struct kmem_cache *user_ns_cachep __read_mostly;
- static DEFINE_MUTEX(userns_state_mutex);
- 
--- 
-2.15.1
-
-From b5202296055dd333db4425120d3f93ef4e6a0573 Mon Sep 17 00:00:00 2001
-From: "Jan Alexander Steffens (heftig)" <jan.steffens@gmail.com>
-Date: Thu, 7 Dec 2017 13:50:48 +0100
-Subject: ZEN: Add CONFIG for unprivileged_userns_clone
-
-This way our default behavior continues to match the vanilla kernel.
----
- init/Kconfig            | 16 ++++++++++++++++
- kernel/user_namespace.c |  4 ++++
- 2 files changed, 20 insertions(+)
-
-diff --git a/init/Kconfig b/init/Kconfig
-index 4592bf7997c0..f3df02990aff 100644
---- a/init/Kconfig
-+++ b/init/Kconfig
-@@ -1004,6 +1004,22 @@ config USER_NS
- 
- 	  If unsure, say N.
- 
-+config USER_NS_UNPRIVILEGED
-+	bool "Allow unprivileged users to create namespaces"
-+	default y
-+	depends on USER_NS
-+	help
-+	  When disabled, unprivileged users will not be able to create
-+	  new namespaces. Allowing users to create their own namespaces
-+	  has been part of several recent local privilege escalation
-+	  exploits, so if you need user namespaces but are
-+	  paranoid^Wsecurity-conscious you want to disable this.
-+
-+	  This setting can be overridden at runtime via the
-+	  kernel.unprivileged_userns_clone sysctl.
-+
-+	  If unsure, say Y.
-+
- config PID_NS
- 	bool "PID Namespaces"
- 	default y
-diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
-index 6b9dbc257e34..107b17f0d528 100644
---- a/kernel/user_namespace.c
-+++ b/kernel/user_namespace.c
-@@ -27,7 +27,11 @@
- #include <linux/sort.h>
- 
- /* sysctl */
-+#ifdef CONFIG_USER_NS_UNPRIVILEGED
-+int unprivileged_userns_clone = 1;
-+#else
- int unprivileged_userns_clone;
-+#endif
- 
- static struct kmem_cache *user_ns_cachep __read_mostly;
- static DEFINE_MUTEX(userns_state_mutex);
diff --git a/linux58-tkg/linux58-tkg-patches/0002-clear-patches.patch b/linux58-tkg/linux58-tkg-patches/0002-clear-patches.patch
deleted file mode 100644
index 22a32f5..0000000
--- a/linux58-tkg/linux58-tkg-patches/0002-clear-patches.patch
+++ /dev/null
@@ -1,360 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Arjan van de Ven <arjan@linux.intel.com>
-Date: Mon, 14 Mar 2016 11:10:58 -0600
-Subject: [PATCH] pci pme wakeups
-
-Reduce wakeups for PME checks, which are a workaround for miswired
-boards (sadly, too many of them) in laptops.
----
- drivers/pci/pci.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
-index c9338f9..6974fbf 100644
---- a/drivers/pci/pci.c
-+++ b/drivers/pci/pci.c
-@@ -62,7 +62,7 @@ struct pci_pme_device {
- 	struct pci_dev *dev;
- };
- 
--#define PME_TIMEOUT 1000 /* How long between PME checks */
-+#define PME_TIMEOUT 4000 /* How long between PME checks */
- 
- static void pci_dev_d3_sleep(struct pci_dev *dev)
- {
--- 
-https://clearlinux.org
-
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Arjan van de Ven <arjan@linux.intel.com>
-Date: Sat, 19 Mar 2016 21:32:19 -0400
-Subject: [PATCH] intel_idle: tweak cpuidle cstates
-
-Increase target_residency in cpuidle cstate
-
-Tune intel_idle to be a bit less agressive;
-Clear linux is cleaner in hygiene (wakupes) than the average linux,
-so we can afford changing these in a way that increases
-performance while keeping power efficiency
----
- drivers/idle/intel_idle.c | 44 +++++++++++++++++++--------------------
- 1 file changed, 22 insertions(+), 22 deletions(-)
-
-diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
-index f449584..c994d24 100644
---- a/drivers/idle/intel_idle.c
-+++ b/drivers/idle/intel_idle.c
-@@ -531,7 +531,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
- 		.desc = "MWAIT 0x01",
- 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
- 		.exit_latency = 10,
--		.target_residency = 20,
-+		.target_residency = 120,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -539,7 +539,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
- 		.desc = "MWAIT 0x10",
- 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 33,
--		.target_residency = 100,
-+		.target_residency = 900,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -547,7 +547,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
- 		.desc = "MWAIT 0x20",
- 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 133,
--		.target_residency = 400,
-+		.target_residency = 1000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -555,7 +555,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
- 		.desc = "MWAIT 0x32",
- 		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 166,
--		.target_residency = 500,
-+		.target_residency = 1500,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -563,7 +563,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
- 		.desc = "MWAIT 0x40",
- 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 300,
--		.target_residency = 900,
-+		.target_residency = 2000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -571,7 +571,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
- 		.desc = "MWAIT 0x50",
- 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 600,
--		.target_residency = 1800,
-+		.target_residency = 5000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -579,7 +579,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
- 		.desc = "MWAIT 0x60",
- 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 2600,
--		.target_residency = 7700,
-+		.target_residency = 9000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -599,7 +599,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
- 		.desc = "MWAIT 0x01",
- 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
- 		.exit_latency = 10,
--		.target_residency = 20,
-+		.target_residency = 120,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -607,7 +607,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
- 		.desc = "MWAIT 0x10",
- 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 40,
--		.target_residency = 100,
-+		.target_residency = 1000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -615,7 +615,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
- 		.desc = "MWAIT 0x20",
- 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 133,
--		.target_residency = 400,
-+		.target_residency = 1000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -623,7 +623,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
- 		.desc = "MWAIT 0x32",
- 		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 166,
--		.target_residency = 500,
-+		.target_residency = 2000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -631,7 +631,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
- 		.desc = "MWAIT 0x40",
- 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 300,
--		.target_residency = 900,
-+		.target_residency = 4000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -639,7 +639,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
- 		.desc = "MWAIT 0x50",
- 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 600,
--		.target_residency = 1800,
-+		.target_residency = 7000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -647,7 +647,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
- 		.desc = "MWAIT 0x60",
- 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 2600,
--		.target_residency = 7700,
-+		.target_residency = 9000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -668,7 +668,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
- 		.desc = "MWAIT 0x01",
- 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
- 		.exit_latency = 10,
--		.target_residency = 20,
-+		.target_residency = 120,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -676,7 +676,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
- 		.desc = "MWAIT 0x10",
- 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 70,
--		.target_residency = 100,
-+		.target_residency = 1000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -684,7 +684,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
- 		.desc = "MWAIT 0x20",
- 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 85,
--		.target_residency = 200,
-+		.target_residency = 600,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -692,7 +692,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
- 		.desc = "MWAIT 0x33",
- 		.flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 124,
--		.target_residency = 800,
-+		.target_residency = 3000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -700,7 +700,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
- 		.desc = "MWAIT 0x40",
- 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 200,
--		.target_residency = 800,
-+		.target_residency = 3200,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -708,7 +708,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
- 		.desc = "MWAIT 0x50",
- 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 480,
--		.target_residency = 5000,
-+		.target_residency = 9000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -716,7 +716,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
- 		.desc = "MWAIT 0x60",
- 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 890,
--		.target_residency = 5000,
-+		.target_residency = 9000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -737,7 +737,7 @@ static struct cpuidle_state skx_cstates[] __initdata = {
- 		.desc = "MWAIT 0x01",
- 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
- 		.exit_latency = 10,
--		.target_residency = 20,
-+		.target_residency = 300,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
--- 
-https://clearlinux.org
-
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Arjan van de Ven <arjan@linux.intel.com>
-Date: Fri, 6 Jan 2017 15:34:09 +0000
-Subject: [PATCH] ipv4/tcp: allow the memory tuning for tcp to go a little
- bigger than default
-
----
- net/ipv4/tcp.c | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
-index 30c1142..4345075 100644
---- a/net/ipv4/tcp.c
-+++ b/net/ipv4/tcp.c
-@@ -4201,8 +4201,8 @@ void __init tcp_init(void)
- 	tcp_init_mem();
- 	/* Set per-socket limits to no more than 1/128 the pressure threshold */
- 	limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7);
--	max_wshare = min(4UL*1024*1024, limit);
--	max_rshare = min(6UL*1024*1024, limit);
-+	max_wshare = min(16UL*1024*1024, limit);
-+	max_rshare = min(16UL*1024*1024, limit);
- 
- 	init_net.ipv4.sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
- 	init_net.ipv4.sysctl_tcp_wmem[1] = 16*1024;
--- 
-https://clearlinux.org
-
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Arjan van de Ven <arjan@linux.intel.com>
-Date: Sun, 18 Feb 2018 23:35:41 +0000
-Subject: [PATCH] locking: rwsem: spin faster
-
-tweak rwsem owner spinning a bit
----
- kernel/locking/rwsem.c | 4 +++-
- 1 file changed, 3 insertions(+), 1 deletion(-)
-
-diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
-index f11b9bd..1bbfcc1 100644
---- a/kernel/locking/rwsem.c
-+++ b/kernel/locking/rwsem.c
-@@ -717,6 +717,7 @@ rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable)
- 	struct task_struct *new, *owner;
- 	unsigned long flags, new_flags;
- 	enum owner_state state;
-+	int i = 0;
- 
- 	owner = rwsem_owner_flags(sem, &flags);
- 	state = rwsem_owner_state(owner, flags, nonspinnable);
-@@ -750,7 +751,8 @@ rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable)
- 			break;
- 		}
- 
--		cpu_relax();
-+		if (i++ > 1000)
-+			cpu_relax();
- 	}
- 	rcu_read_unlock();
- 
--- 
-https://clearlinux.org
-
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Arjan van de Ven <arjan@linux.intel.com>
-Date: Thu, 2 Jun 2016 23:36:32 -0500
-Subject: [PATCH] initialize ata before graphics
-
-ATA init is the long pole in the boot process, and its asynchronous.
-move the graphics init after it so that ata and graphics initialize
-in parallel
----
- drivers/Makefile | 15 ++++++++-------
- 1 file changed, 8 insertions(+), 7 deletions(-)
-
-diff --git a/drivers/Makefile b/drivers/Makefile
-index c0cd1b9..af1e2fb 100644
---- a/drivers/Makefile
-+++ b/drivers/Makefile
-@@ -59,15 +59,8 @@ obj-y				+= char/
- # iommu/ comes before gpu as gpu are using iommu controllers
- obj-y				+= iommu/
- 
--# gpu/ comes after char for AGP vs DRM startup and after iommu
--obj-y				+= gpu/
--
- obj-$(CONFIG_CONNECTOR)		+= connector/
- 
--# i810fb and intelfb depend on char/agp/
--obj-$(CONFIG_FB_I810)           += video/fbdev/i810/
--obj-$(CONFIG_FB_INTEL)          += video/fbdev/intelfb/
--
- obj-$(CONFIG_PARPORT)		+= parport/
- obj-$(CONFIG_NVM)		+= lightnvm/
- obj-y				+= base/ block/ misc/ mfd/ nfc/
-@@ -80,6 +73,14 @@ obj-$(CONFIG_IDE)		+= ide/
- obj-y				+= scsi/
- obj-y				+= nvme/
- obj-$(CONFIG_ATA)		+= ata/
-+
-+# gpu/ comes after char for AGP vs DRM startup and after iommu
-+obj-y				+= gpu/
-+
-+# i810fb and intelfb depend on char/agp/
-+obj-$(CONFIG_FB_I810)           += video/fbdev/i810/
-+obj-$(CONFIG_FB_INTEL)          += video/fbdev/intelfb/
-+
- obj-$(CONFIG_TARGET_CORE)	+= target/
- obj-$(CONFIG_MTD)		+= mtd/
- obj-$(CONFIG_SPI)		+= spi/
--- 
-https://clearlinux.org
-
diff --git a/linux58-tkg/linux58-tkg-patches/0003-glitched-base.patch b/linux58-tkg/linux58-tkg-patches/0003-glitched-base.patch
deleted file mode 100644
index fb09b35..0000000
--- a/linux58-tkg/linux58-tkg-patches/0003-glitched-base.patch
+++ /dev/null
@@ -1,708 +0,0 @@
-From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
-From: Tk-Glitch <ti3nou@gmail.com>
-Date: Wed, 4 Jul 2018 04:30:08 +0200
-Subject: [PATCH 01/17] glitched
-
----
- scripts/mkcompile_h | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/scripts/mkcompile_h b/scripts/mkcompile_h
-index baf3ab8d9d49..854e32e6aec7 100755
---- a/scripts/mkcompile_h
-+++ b/scripts/mkcompile_h
-@@ -41,8 +41,8 @@ else
- fi
- 
- UTS_VERSION="#$VERSION"
--CONFIG_FLAGS=""
--if [ -n "$SMP" ] ; then CONFIG_FLAGS="SMP"; fi
-+CONFIG_FLAGS="TKG"
-+if [ -n "$SMP" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS SMP"; fi
- if [ -n "$PREEMPT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT"; fi
- if [ -n "$PREEMPT_RT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT_RT"; fi
- 
--- 
-2.28.0
-
-
-From c304f43d14e98d4bf1215fc10bc5012f554bdd8a Mon Sep 17 00:00:00 2001
-From: Alexandre Frade <admfrade@gmail.com>
-Date: Mon, 29 Jan 2018 16:59:22 +0000
-Subject: [PATCH 02/17] dcache: cache_pressure = 50 decreases the rate at which
- VFS caches are reclaimed
-
-Signed-off-by: Alexandre Frade <admfrade@gmail.com>
----
- fs/dcache.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/fs/dcache.c b/fs/dcache.c
-index 361ea7ab30ea..0c5cf69b241a 100644
---- a/fs/dcache.c
-+++ b/fs/dcache.c
-@@ -71,7 +71,7 @@
-  * If no ancestor relationship:
-  * arbitrary, since it's serialized on rename_lock
-  */
--int sysctl_vfs_cache_pressure __read_mostly = 100;
-+int sysctl_vfs_cache_pressure __read_mostly = 50;
- EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
- 
- __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
--- 
-2.28.0
-
-
-From 28f32f59d9d55ac7ec3a20b79bdd02d2a0a5f7e1 Mon Sep 17 00:00:00 2001
-From: Alexandre Frade <admfrade@gmail.com>
-Date: Mon, 29 Jan 2018 18:29:13 +0000
-Subject: [PATCH 03/17] sched/core: nr_migrate = 128 increases number of tasks
- to iterate in a single balance run.
-
-Signed-off-by: Alexandre Frade <admfrade@gmail.com>
----
- kernel/sched/core.c | 6 +++---
- 1 file changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/kernel/sched/core.c b/kernel/sched/core.c
-index f788cd61df21..2bfbb4213707 100644
---- a/kernel/sched/core.c
-+++ b/kernel/sched/core.c
-@@ -59,7 +59,7 @@ const_debug unsigned int sysctl_sched_features =
-  * Number of tasks to iterate in a single balance run.
-  * Limited because this is done with IRQs disabled.
-  */
--const_debug unsigned int sysctl_sched_nr_migrate = 32;
-+const_debug unsigned int sysctl_sched_nr_migrate = 128;
- 
- /*
-  * period over which we measure -rt task CPU usage in us.
-@@ -71,9 +71,9 @@ __read_mostly int scheduler_running;
- 
- /*
-  * part of the period that we allow rt tasks to run in us.
-- * default: 0.95s
-+ * XanMod default: 0.98s
-  */
--int sysctl_sched_rt_runtime = 950000;
-+int sysctl_sched_rt_runtime = 980000;
- 
- /*
-  * __task_rq_lock - lock the rq @p resides on.
--- 
-2.28.0
-
-
-From acc49f33a10f61dc66c423888cbb883ba46710e4 Mon Sep 17 00:00:00 2001
-From: Alexandre Frade <admfrade@gmail.com>
-Date: Mon, 29 Jan 2018 17:41:29 +0000
-Subject: [PATCH 04/17] scripts: disable the localversion "+" tag of a git repo
-
-Signed-off-by: Alexandre Frade <admfrade@gmail.com>
----
- scripts/setlocalversion | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/scripts/setlocalversion b/scripts/setlocalversion
-index 20f2efd57b11..0552d8b9f582 100755
---- a/scripts/setlocalversion
-+++ b/scripts/setlocalversion
-@@ -54,7 +54,7 @@ scm_version()
- 			# If only the short version is requested, don't bother
- 			# running further git commands
- 			if $short; then
--				echo "+"
-+			#	echo "+"
- 				return
- 			fi
- 			# If we are past a tagged commit (like
--- 
-2.28.0
-
-
-From 61fcb33fb0de8bc0f060e0a1ada38ed149217f4d Mon Sep 17 00:00:00 2001
-From: Oleksandr Natalenko <oleksandr@redhat.com>
-Date: Wed, 11 Dec 2019 11:46:19 +0100
-Subject: [PATCH 05/17] init/Kconfig: enable -O3 for all arches
-
-Building a kernel with -O3 may help in hunting bugs like [1] and thus
-using this switch should not be restricted to one specific arch only.
-
-With that, lets expose it for everyone.
-
-[1] https://lore.kernel.org/lkml/673b885183fb64f1cbb3ed2387524077@natalenko.name/
-
-Signed-off-by: Oleksandr Natalenko <oleksandr@redhat.com>
----
- init/Kconfig | 1 -
- 1 file changed, 1 deletion(-)
-
-diff --git a/init/Kconfig b/init/Kconfig
-index 0498af567f70..3ae8678e1145 100644
---- a/init/Kconfig
-+++ b/init/Kconfig
-@@ -1278,7 +1278,6 @@ config CC_OPTIMIZE_FOR_PERFORMANCE
- 
- config CC_OPTIMIZE_FOR_PERFORMANCE_O3
- 	bool "Optimize more for performance (-O3)"
--	depends on ARC
- 	help
- 	  Choosing this option will pass "-O3" to your compiler to optimize
- 	  the kernel yet more for performance.
--- 
-2.28.0
-
-
-From 360c6833e07cc9fdef5746f6bc45bdbc7212288d Mon Sep 17 00:00:00 2001
-From: "Jan Alexander Steffens (heftig)" <jan.steffens@gmail.com>
-Date: Fri, 26 Oct 2018 11:22:33 +0100
-Subject: [PATCH 06/17] infiniband: Fix __read_overflow2 error with -O3
- inlining
-
----
- drivers/infiniband/core/addr.c | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
-index 3a98439bba83..6efc4f907f58 100644
---- a/drivers/infiniband/core/addr.c
-+++ b/drivers/infiniband/core/addr.c
-@@ -820,6 +820,7 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
- 	union {
- 		struct sockaddr_in  _sockaddr_in;
- 		struct sockaddr_in6 _sockaddr_in6;
-+		struct sockaddr_ib  _sockaddr_ib;
- 	} sgid_addr, dgid_addr;
- 	int ret;
- 
--- 
-2.28.0
-
-
-From f85ed068b4d0e6c31edce8574a95757a60e58b87 Mon Sep 17 00:00:00 2001
-From: Etienne Juvigny <Ti3noU@gmail.com>
-Date: Mon, 3 Sep 2018 17:36:25 +0200
-Subject: [PATCH 07/17] Zenify & stuff
-
----
- init/Kconfig           | 32 ++++++++++++++++++++++++++++++++
- kernel/sched/fair.c    | 25 +++++++++++++++++++++++++
- mm/page-writeback.c    |  8 ++++++++
- 3 files changed, 65 insertions(+)
-
-diff --git a/init/Kconfig b/init/Kconfig
-index 3ae8678e1145..da708eed0f1e 100644
---- a/init/Kconfig
-+++ b/init/Kconfig
-@@ -92,6 +92,38 @@ config THREAD_INFO_IN_TASK
- 
- menu "General setup"
- 
-+config ZENIFY
-+	bool "A selection of patches from Zen/Liquorix kernel and additional tweaks for a better gaming experience"
-+	default y
-+	help
-+	  Tunes the kernel for responsiveness at the cost of throughput and power usage.
-+
-+	  --- Virtual Memory Subsystem ---------------------------
-+
-+	    Mem dirty before bg writeback..:  10 %  ->  20 %
-+	    Mem dirty before sync writeback:  20 %  ->  50 %
-+
-+	  --- Block Layer ----------------------------------------
-+
-+	    Queue depth...............:      128    -> 512
-+	    Default MQ scheduler......: mq-deadline -> bfq
-+
-+	  --- CFS CPU Scheduler ----------------------------------
-+
-+	    Scheduling latency.............:   6    ->   3    ms
-+	    Minimal granularity............:   0.75 ->   0.3  ms
-+	    Wakeup granularity.............:   1    ->   0.5  ms
-+	    CPU migration cost.............:   0.5  ->   0.25 ms
-+	    Bandwidth slice size...........:   5    ->   3    ms
-+	    Ondemand fine upscaling limit..:  95 %  ->  85 %
-+
-+	  --- MuQSS CPU Scheduler --------------------------------
-+
-+	    Scheduling interval............:   6    ->   3    ms
-+	    ISO task max realtime use......:  70 %  ->  25 %
-+	    Ondemand coarse upscaling limit:  80 %  ->  45 %
-+	    Ondemand fine upscaling limit..:  95 %  ->  45 %
-+
- config BROKEN
- 	bool
- 
-diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
-index 6b3b59cc51d6..2a0072192c3d 100644
---- a/kernel/sched/fair.c
-+++ b/kernel/sched/fair.c
-@@ -37,8 +37,13 @@
-  *
-  * (default: 6ms * (1 + ilog(ncpus)), units: nanoseconds)
-  */
-+#ifdef CONFIG_ZENIFY
-+unsigned int sysctl_sched_latency			= 3000000ULL;
-+static unsigned int normalized_sysctl_sched_latency	= 3000000ULL;
-+#else
- unsigned int sysctl_sched_latency			= 6000000ULL;
- static unsigned int normalized_sysctl_sched_latency	= 6000000ULL;
-+#endif
- 
- /*
-  * The initial- and re-scaling of tunables is configurable
-@@ -58,13 +63,22 @@ enum sched_tunable_scaling sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_L
-  *
-  * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
-  */
-+#ifdef CONFIG_ZENIFY
-+unsigned int sysctl_sched_min_granularity			= 300000ULL;
-+static unsigned int normalized_sysctl_sched_min_granularity	= 300000ULL;
-+#else
- unsigned int sysctl_sched_min_granularity			= 750000ULL;
- static unsigned int normalized_sysctl_sched_min_granularity	= 750000ULL;
-+#endif
- 
- /*
-  * This value is kept at sysctl_sched_latency/sysctl_sched_min_granularity
-  */
-+#ifdef CONFIG_ZENIFY
-+static unsigned int sched_nr_latency = 10;
-+#else
- static unsigned int sched_nr_latency = 8;
-+#endif
- 
- /*
-  * After fork, child runs first. If set to 0 (default) then
-@@ -81,10 +95,17 @@ unsigned int sysctl_sched_child_runs_first __read_mostly;
-  *
-  * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds)
-  */
-+#ifdef CONFIG_ZENIFY
-+unsigned int sysctl_sched_wakeup_granularity			= 500000UL;
-+static unsigned int normalized_sysctl_sched_wakeup_granularity	= 500000UL;
-+
-+const_debug unsigned int sysctl_sched_migration_cost	= 50000UL;
-+#else
- unsigned int sysctl_sched_wakeup_granularity			= 1000000UL;
- static unsigned int normalized_sysctl_sched_wakeup_granularity	= 1000000UL;
- 
- const_debug unsigned int sysctl_sched_migration_cost	= 500000UL;
-+#endif
- 
- int sched_thermal_decay_shift;
- static int __init setup_sched_thermal_decay_shift(char *str)
-@@ -128,8 +149,12 @@ int __weak arch_asym_cpu_priority(int cpu)
-  *
-  * (default: 5 msec, units: microseconds)
-  */
-+#ifdef CONFIG_ZENIFY
-+unsigned int sysctl_sched_cfs_bandwidth_slice		= 3000UL;
-+#else
- unsigned int sysctl_sched_cfs_bandwidth_slice		= 5000UL;
- #endif
-+#endif
- 
- static inline void update_load_add(struct load_weight *lw, unsigned long inc)
- {
-diff --git a/mm/page-writeback.c b/mm/page-writeback.c
-index 28b3e7a67565..01a1aef2b9b1 100644
---- a/mm/page-writeback.c
-+++ b/mm/page-writeback.c
-@@ -71,7 +71,11 @@ static long ratelimit_pages = 32;
- /*
-  * Start background writeback (via writeback threads) at this percentage
-  */
-+#ifdef CONFIG_ZENIFY
-+int dirty_background_ratio = 20;
-+#else
- int dirty_background_ratio = 10;
-+#endif
- 
- /*
-  * dirty_background_bytes starts at 0 (disabled) so that it is a function of
-@@ -88,7 +92,11 @@ int vm_highmem_is_dirtyable;
- /*
-  * The generator of dirty data starts writeback at this percentage
-  */
-+#ifdef CONFIG_ZENIFY
-+int vm_dirty_ratio = 50;
-+#else
- int vm_dirty_ratio = 20;
-+#endif
- 
- /*
-  * vm_dirty_bytes starts at 0 (disabled) so that it is a function of
--- 
-2.28.0
-
-
-From e92e67143385cf285851e12aa8b7f083dd38dd24 Mon Sep 17 00:00:00 2001
-From: Steven Barrett <damentz@liquorix.net>
-Date: Sun, 16 Jan 2011 18:57:32 -0600
-Subject: [PATCH 08/17] ZEN: Allow TCP YeAH as default congestion control
-
-4.4: In my tests YeAH dramatically slowed down transfers over a WLAN,
-     reducing throughput from ~65Mbps (CUBIC) to ~7MBps (YeAH) over 10
-     seconds (netperf TCP_STREAM) including long stalls.
-
-     Be careful when choosing this. ~heftig
----
- net/ipv4/Kconfig | 4 ++++
- 1 file changed, 4 insertions(+)
-
-diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
-index e64e59b536d3..bfb55ef7ebbe 100644
---- a/net/ipv4/Kconfig
-+++ b/net/ipv4/Kconfig
-@@ -691,6 +691,9 @@ choice
- 	config DEFAULT_VEGAS
- 		bool "Vegas" if TCP_CONG_VEGAS=y
- 
-+	config DEFAULT_YEAH
-+		bool "YeAH" if TCP_CONG_YEAH=y
-+
- 	config DEFAULT_VENO
- 		bool "Veno" if TCP_CONG_VENO=y
- 
-@@ -724,6 +727,7 @@ config DEFAULT_TCP_CONG
- 	default "htcp" if DEFAULT_HTCP
- 	default "hybla" if DEFAULT_HYBLA
- 	default "vegas" if DEFAULT_VEGAS
-+	default "yeah" if DEFAULT_YEAH
- 	default "westwood" if DEFAULT_WESTWOOD
- 	default "veno" if DEFAULT_VENO
- 	default "reno" if DEFAULT_RENO
--- 
-2.28.0
-
-
-From 76dbe7477bfde1b5e8bf29a71b5af7ab2be9b98e Mon Sep 17 00:00:00 2001
-From: Steven Barrett <steven@liquorix.net>
-Date: Wed, 28 Nov 2018 19:01:27 -0600
-Subject: [PATCH 09/17] zen: Use [defer+madvise] as default khugepaged defrag
- strategy
-
-For some reason, the default strategy to respond to THP fault fallbacks
-is still just madvise, meaning stall if the program wants transparent
-hugepages, but don't trigger a background reclaim / compaction if THP
-begins to fail allocations.  This creates a snowball affect where we
-still use the THP code paths, but we almost always fail once a system
-has been active and busy for a while.
-
-The option "defer" was created for interactive systems where THP can
-still improve performance.  If we have to fallback to a regular page due
-to an allocation failure or anything else, we will trigger a background
-reclaim and compaction so future THP attempts succeed and previous
-attempts eventually have their smaller pages combined without stalling
-running applications.
-
-We still want madvise to stall applications that explicitely want THP,
-so defer+madvise _does_ make a ton of sense.  Make it the default for
-interactive systems, especially if the kernel maintainer left
-transparent hugepages on "always".
-
-Reasoning and details in the original patch: https://lwn.net/Articles/711248/
----
- mm/huge_memory.c | 4 ++++
- 1 file changed, 4 insertions(+)
-
-diff --git a/mm/huge_memory.c b/mm/huge_memory.c
-index 74300e337c3c..9277f22c10a7 100644
---- a/mm/huge_memory.c
-+++ b/mm/huge_memory.c
-@@ -53,7 +53,11 @@ unsigned long transparent_hugepage_flags __read_mostly =
- #ifdef CONFIG_TRANSPARENT_HUGEPAGE_MADVISE
- 	(1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)|
- #endif
-+#ifdef CONFIG_ZENIFY
-+	(1<<TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG)|
-+#else
- 	(1<<TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG)|
-+#endif
- 	(1<<TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG)|
- 	(1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);
- 
--- 
-2.28.0
-
-
-From 2b65a1329cb220b43c19c4d0de5833fae9e2b22d Mon Sep 17 00:00:00 2001
-From: Alexandre Frade <admfrade@gmail.com>
-Date: Wed, 24 Oct 2018 16:58:52 -0300
-Subject: [PATCH 10/17] net/sched: allow configuring cake qdisc as default
-
-Signed-off-by: Alexandre Frade <admfrade@gmail.com>
----
- net/sched/Kconfig | 4 ++++
- 1 file changed, 4 insertions(+)
-
-diff --git a/net/sched/Kconfig b/net/sched/Kconfig
-index 84badf00647e..6a922bca9f39 100644
---- a/net/sched/Kconfig
-+++ b/net/sched/Kconfig
-@@ -471,6 +471,9 @@ choice
- 	config DEFAULT_SFQ
- 		bool "Stochastic Fair Queue" if NET_SCH_SFQ
- 
-+	config DEFAULT_CAKE
-+		bool "Common Applications Kept Enhanced" if NET_SCH_CAKE
-+
- 	config DEFAULT_PFIFO_FAST
- 		bool "Priority FIFO Fast"
- endchoice
-@@ -481,6 +484,7 @@ config DEFAULT_NET_SCH
- 	default "fq" if DEFAULT_FQ
- 	default "fq_codel" if DEFAULT_FQ_CODEL
- 	default "sfq" if DEFAULT_SFQ
-+	default "cake" if DEFAULT_CAKE
- 	default "pfifo_fast"
- endif
- 
--- 
-2.28.0
-
-
-From 816ee502759e954304693813bd03d94986b28dba Mon Sep 17 00:00:00 2001
-From: Tk-Glitch <ti3nou@gmail.com>
-Date: Mon, 18 Feb 2019 17:40:57 +0100
-Subject: [PATCH 11/17] mm: Set watermark_scale_factor to 200 (from 10)
-
-Multiple users have reported it's helping reducing/eliminating stuttering
-with DXVK.
----
- mm/page_alloc.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/mm/page_alloc.c b/mm/page_alloc.c
-index 898ff44f2c7b..e72074034793 100644
---- a/mm/page_alloc.c
-+++ b/mm/page_alloc.c
-@@ -330,7 +330,7 @@ int watermark_boost_factor __read_mostly;
- #else
- int watermark_boost_factor __read_mostly = 15000;
- #endif
--int watermark_scale_factor = 10;
-+int watermark_scale_factor = 200;
- 
- static unsigned long nr_kernel_pages __initdata;
- static unsigned long nr_all_pages __initdata;
--- 
-2.28.0
-
-
-From 90240bcd90a568878738e66c0d45bed3e38e347b Mon Sep 17 00:00:00 2001
-From: Tk-Glitch <ti3nou@gmail.com>
-Date: Fri, 19 Apr 2019 12:33:38 +0200
-Subject: [PATCH 12/17] Set vm.max_map_count to 262144 by default
-
-The value is still pretty low, and AMD64-ABI and ELF extended numbering
-supports that, so we should be fine on modern x86 systems.
-
-This fixes crashes in some applications using more than 65535 vmas (also
-affects some windows games running in wine, such as Star Citizen).
----
- include/linux/mm.h | 3 +--
- 1 file changed, 1 insertion(+), 2 deletions(-)
-
-diff --git a/include/linux/mm.h b/include/linux/mm.h
-index bc05c3588aa3..b0cefe94920d 100644
---- a/include/linux/mm.h
-+++ b/include/linux/mm.h
-@@ -190,8 +190,7 @@ static inline void __mm_zero_struct_page(struct page *page)
-  * not a hard limit any more. Although some userspace tools can be surprised by
-  * that.
-  */
--#define MAPCOUNT_ELF_CORE_MARGIN	(5)
--#define DEFAULT_MAX_MAP_COUNT	(USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
-+#define DEFAULT_MAX_MAP_COUNT	(262144)
- 
- extern int sysctl_max_map_count;
- 
--- 
-2.28.0
-
-
-From 3a34034dba5efe91bcec491efe8c66e8087f509b Mon Sep 17 00:00:00 2001
-From: Tk-Glitch <ti3nou@gmail.com>
-Date: Mon, 27 Jul 2020 00:19:18 +0200
-Subject: [PATCH 13/17] mm: bump DEFAULT_MAX_MAP_COUNT
-
-Some games such as Detroit: Become Human tend to be very crash prone with
-lower values.
----
- include/linux/mm.h | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/include/linux/mm.h b/include/linux/mm.h
-index b0cefe94920d..890165099b07 100644
---- a/include/linux/mm.h
-+++ b/include/linux/mm.h
-@@ -190,7 +190,7 @@ static inline void __mm_zero_struct_page(struct page *page)
-  * not a hard limit any more. Although some userspace tools can be surprised by
-  * that.
-  */
--#define DEFAULT_MAX_MAP_COUNT	(262144)
-+#define DEFAULT_MAX_MAP_COUNT	(524288)
- 
- extern int sysctl_max_map_count;
- 
--- 
-2.28.0
-
-
-From 977812938da7c7226415778c340832141d9278b7 Mon Sep 17 00:00:00 2001
-From: Alexandre Frade <admfrade@gmail.com>
-Date: Mon, 25 Nov 2019 15:13:06 -0300
-Subject: [PATCH 14/17] elevator: set default scheduler to bfq for blk-mq
-
-Signed-off-by: Alexandre Frade <admfrade@gmail.com>
----
- block/elevator.c | 6 +++---
- 1 file changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/block/elevator.c b/block/elevator.c
-index 4eab3d70e880..79669aa39d79 100644
---- a/block/elevator.c
-+++ b/block/elevator.c
-@@ -623,15 +623,15 @@ static inline bool elv_support_iosched(struct request_queue *q)
- }
- 
- /*
-- * For single queue devices, default to using mq-deadline. If we have multiple
-- * queues or mq-deadline is not available, default to "none".
-+ * For single queue devices, default to using bfq. If we have multiple
-+ * queues or bfq is not available, default to "none".
-  */
- static struct elevator_type *elevator_get_default(struct request_queue *q)
- {
- 	if (q->nr_hw_queues != 1)
- 		return NULL;
- 
--	return elevator_get(q, "mq-deadline", false);
-+	return elevator_get(q, "bfq", false);
- }
- 
- /*
--- 
-2.28.0
-
-
-From e2111bc5989131c675659d40e0cc4f214df2f990 Mon Sep 17 00:00:00 2001
-From: Alexandre Frade <admfrade@gmail.com>
-Date: Fri, 10 May 2019 16:45:59 -0300
-Subject: [PATCH 15/17] block: set rq_affinity = 2 for full multithreading I/O
- requests
-
-Signed-off-by: Alexandre Frade <admfrade@gmail.com>
----
- include/linux/blkdev.h | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
-index 28efe374a2e1..d4e5d35d2ece 100644
---- a/include/linux/blkdev.h
-+++ b/include/linux/blkdev.h
-@@ -624,7 +624,8 @@ struct request_queue {
- #define QUEUE_FLAG_RQ_ALLOC_TIME 27	/* record rq->alloc_time_ns */
- 
- #define QUEUE_FLAG_MQ_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
--				 (1 << QUEUE_FLAG_SAME_COMP))
-+				 (1 << QUEUE_FLAG_SAME_COMP)	|	\
-+				 (1 << QUEUE_FLAG_SAME_FORCE))
- 
- void blk_queue_flag_set(unsigned int flag, struct request_queue *q);
- void blk_queue_flag_clear(unsigned int flag, struct request_queue *q);
--- 
-2.28.0
-
-
-From 3c229f434aca65c4ca61772bc03c3e0370817b92 Mon Sep 17 00:00:00 2001
-From: Alexandre Frade <kernel@xanmod.org>
-Date: Mon, 3 Aug 2020 17:05:04 +0000
-Subject: [PATCH 16/17] mm: set 2 megabytes for address_space-level file
- read-ahead pages size
-
-Signed-off-by: Alexandre Frade <kernel@xanmod.org>
----
- include/linux/pagemap.h | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
-index cf2468da68e9..007dea784451 100644
---- a/include/linux/pagemap.h
-+++ b/include/linux/pagemap.h
-@@ -655,7 +655,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask);
- void delete_from_page_cache_batch(struct address_space *mapping,
- 				  struct pagevec *pvec);
- 
--#define VM_READAHEAD_PAGES	(SZ_128K / PAGE_SIZE)
-+#define VM_READAHEAD_PAGES	(SZ_2M / PAGE_SIZE)
- 
- void page_cache_sync_readahead(struct address_space *, struct file_ra_state *,
- 		struct file *, pgoff_t index, unsigned long req_count);
--- 
-2.28.0
-
-
-From 716f41cf6631f3a85834dcb67b4ce99185b6387f Mon Sep 17 00:00:00 2001
-From: Steven Barrett <steven@liquorix.net>
-Date: Wed, 15 Jan 2020 20:43:56 -0600
-Subject: [PATCH 17/17] ZEN: intel-pstate: Implement "enable" parameter
-
-If intel-pstate is compiled into the kernel, it will preempt the loading
-of acpi-cpufreq so you can take advantage of hardware p-states without
-any friction.
-
-However, intel-pstate is not completely superior to cpufreq's ondemand
-for one reason.  There's no concept of an up_threshold property.
-
-In ondemand, up_threshold essentially reduces the maximum utilization to
-compare against, allowing you to hit max frequencies and turbo boost
-from a much lower core utilization.
-
-With intel-pstate, you have the concept of minimum and maximum
-performance, but no tunable that lets you define, maximum frequency
-means 50% core utilization.  For just this oversight, there's reasons
-you may want ondemand.
-
-Lets support setting "enable" in kernel boot parameters.  This lets
-kernel maintainers include "intel_pstate=disable" statically in the
-static boot parameters, but let users of the kernel override this
-selection.
----
- Documentation/admin-guide/kernel-parameters.txt | 3 +++
- drivers/cpufreq/intel_pstate.c                  | 2 ++
- 2 files changed, 5 insertions(+)
-
-diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
-index fb95fad81c79..3e92fee81e33 100644
---- a/Documentation/admin-guide/kernel-parameters.txt
-+++ b/Documentation/admin-guide/kernel-parameters.txt
-@@ -1857,6 +1857,9 @@
- 			disable
- 			  Do not enable intel_pstate as the default
- 			  scaling driver for the supported processors
-+			enable
-+			  Enable intel_pstate in-case "disable" was passed
-+			  previously in the kernel boot parameters
- 			passive
- 			  Use intel_pstate as a scaling driver, but configure it
- 			  to work with generic cpufreq governors (instead of
-diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
-index 36a469150ff9..aee891c9b78a 100644
---- a/drivers/cpufreq/intel_pstate.c
-+++ b/drivers/cpufreq/intel_pstate.c
-@@ -2845,6 +2845,8 @@ static int __init intel_pstate_setup(char *str)
- 		pr_info("HWP disabled\n");
- 		no_hwp = 1;
- 	}
-+	if (!strcmp(str, "enable"))
-+		no_load = 0;
- 	if (!strcmp(str, "force"))
- 		force_load = 1;
- 	if (!strcmp(str, "hwp_only"))
--- 
-2.28.0
-
diff --git a/linux58-tkg/linux58-tkg-patches/0003-glitched-cfs.patch b/linux58-tkg/linux58-tkg-patches/0003-glitched-cfs.patch
deleted file mode 100644
index 06b7f02..0000000
--- a/linux58-tkg/linux58-tkg-patches/0003-glitched-cfs.patch
+++ /dev/null
@@ -1,72 +0,0 @@
-diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
-index 2a202a846757..1d9c7ed79b11 100644
---- a/kernel/Kconfig.hz
-+++ b/kernel/Kconfig.hz
-@@ -4,7 +4,7 @@
- 
- choice
- 	prompt "Timer frequency"
--	default HZ_250
-+	default HZ_500
- 	help
- 	 Allows the configuration of the timer frequency. It is customary
- 	 to have the timer interrupt run at 1000 Hz but 100 Hz may be more
-@@ -39,6 +39,13 @@ choice
- 	 on SMP and NUMA systems and exactly dividing by both PAL and
- 	 NTSC frame rates for video and multimedia work.
- 
-+	config HZ_500
-+		bool "500 HZ"
-+	help
-+	 500 Hz is a balanced timer frequency. Provides fast interactivity
-+	 on desktops with great smoothness without increasing CPU power
-+	 consumption and sacrificing the battery life on laptops.
-+
- 	config HZ_1000
- 		bool "1000 HZ"
- 	help
-@@ -52,6 +59,7 @@ config HZ
- 	default 100 if HZ_100
- 	default 250 if HZ_250
- 	default 300 if HZ_300
-+	default 500 if HZ_500
- 	default 1000 if HZ_1000
- 
- config SCHED_HRTICK
-
-diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
-index 2a202a846757..1d9c7ed79b11 100644
---- a/kernel/Kconfig.hz
-+++ b/kernel/Kconfig.hz
-@@ -4,7 +4,7 @@
- 
- choice
- 	prompt "Timer frequency"
--	default HZ_500
-+	default HZ_750
- 	help
- 	 Allows the configuration of the timer frequency. It is customary
- 	 to have the timer interrupt run at 1000 Hz but 100 Hz may be more
-@@ -46,6 +46,13 @@ choice
- 	 on desktops with great smoothness without increasing CPU power
- 	 consumption and sacrificing the battery life on laptops.
- 
-+	config HZ_750
-+		bool "750 HZ"
-+	help
-+	 750 Hz is a good timer frequency for desktops. Provides fast
-+	 interactivity with great smoothness without sacrificing too
-+	 much throughput.
-+
- 	config HZ_1000
- 		bool "1000 HZ"
- 	help
-@@ -60,6 +67,7 @@ config HZ
- 	default 250 if HZ_250
- 	default 300 if HZ_300
- 	default 500 if HZ_500
-+	default 750 if HZ_750
- 	default 1000 if HZ_1000
- 
- config SCHED_HRTICK
-
diff --git a/linux58-tkg/linux58-tkg-patches/0005-glitched-pds.patch b/linux58-tkg/linux58-tkg-patches/0005-glitched-pds.patch
deleted file mode 100644
index 4307c45..0000000
--- a/linux58-tkg/linux58-tkg-patches/0005-glitched-pds.patch
+++ /dev/null
@@ -1,90 +0,0 @@
-From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
-From: Tk-Glitch <ti3nou@gmail.com>
-Date: Wed, 4 Jul 2018 04:30:08 +0200
-Subject: glitched - PDS
-
-diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
-index 2a202a846757..1d9c7ed79b11 100644
---- a/kernel/Kconfig.hz
-+++ b/kernel/Kconfig.hz
-@@ -4,7 +4,7 @@
- 
- choice
- 	prompt "Timer frequency"
--	default HZ_250
-+	default HZ_500
- 	help
- 	 Allows the configuration of the timer frequency. It is customary
- 	 to have the timer interrupt run at 1000 Hz but 100 Hz may be more
-@@ -39,6 +39,13 @@ choice
- 	 on SMP and NUMA systems and exactly dividing by both PAL and
- 	 NTSC frame rates for video and multimedia work.
- 
-+	config HZ_500
-+		bool "500 HZ"
-+	help
-+	 500 Hz is a balanced timer frequency. Provides fast interactivity
-+	 on desktops with great smoothness without increasing CPU power
-+	 consumption and sacrificing the battery life on laptops.
-+
- 	config HZ_1000
- 		bool "1000 HZ"
- 	help
-@@ -52,6 +59,7 @@ config HZ
- 	default 100 if HZ_100
- 	default 250 if HZ_250
- 	default 300 if HZ_300
-+	default 500 if HZ_500
- 	default 1000 if HZ_1000
- 
- config SCHED_HRTICK
-
-diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
-index 2a202a846757..1d9c7ed79b11 100644
---- a/kernel/Kconfig.hz
-+++ b/kernel/Kconfig.hz
-@@ -4,7 +4,7 @@
- 
- choice
- 	prompt "Timer frequency"
--	default HZ_500
-+	default HZ_750
- 	help
- 	 Allows the configuration of the timer frequency. It is customary
- 	 to have the timer interrupt run at 1000 Hz but 100 Hz may be more
-@@ -46,6 +46,13 @@ choice
- 	 on desktops with great smoothness without increasing CPU power
- 	 consumption and sacrificing the battery life on laptops.
- 
-+	config HZ_750
-+		bool "750 HZ"
-+	help
-+	 750 Hz is a good timer frequency for desktops. Provides fast
-+	 interactivity with great smoothness without sacrificing too
-+	 much throughput.
-+
- 	config HZ_1000
- 		bool "1000 HZ"
- 	help
-@@ -60,6 +67,7 @@ config HZ
- 	default 250 if HZ_250
- 	default 300 if HZ_300
- 	default 500 if HZ_500
-+	default 750 if HZ_750
- 	default 1000 if HZ_1000
- 
- config SCHED_HRTICK
-
-diff --git a/mm/vmscan.c b/mm/vmscan.c
-index 9270a4370d54..30d01e647417 100644
---- a/mm/vmscan.c
-+++ b/mm/vmscan.c
-@@ -159,7 +159,7 @@ struct scan_control {
- /*
-  * From 0 .. 100.  Higher means more swappy.
-  */
--int vm_swappiness = 60;
-+int vm_swappiness = 20;
- /*
-  * The total number of pages which are beyond the high watermark within all
-  * zones.
diff --git a/linux58-tkg/linux58-tkg-patches/0005-undead-glitched-ondemand-pds.patch b/linux58-tkg/linux58-tkg-patches/0005-undead-glitched-ondemand-pds.patch
deleted file mode 100644
index c1929e8..0000000
--- a/linux58-tkg/linux58-tkg-patches/0005-undead-glitched-ondemand-pds.patch
+++ /dev/null
@@ -1,18 +0,0 @@
-diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
-index 6b423eebfd5d..61e3271675d6 100644
---- a/drivers/cpufreq/cpufreq_ondemand.c
-+++ b/drivers/cpufreq/cpufreq_ondemand.c
-@@ -21,10 +21,10 @@
- #include "cpufreq_ondemand.h"
- 
- /* On-demand governor macros */
--#define DEF_FREQUENCY_UP_THRESHOLD		(63)
--#define DEF_SAMPLING_DOWN_FACTOR		(1)
-+#define DEF_FREQUENCY_UP_THRESHOLD		(55)
-+#define DEF_SAMPLING_DOWN_FACTOR		(5)
- #define MAX_SAMPLING_DOWN_FACTOR		(100000)
--#define MICRO_FREQUENCY_UP_THRESHOLD		(95)
-+#define MICRO_FREQUENCY_UP_THRESHOLD		(63)
- #define MICRO_FREQUENCY_MIN_SAMPLE_RATE		(10000)
- #define MIN_FREQUENCY_UP_THRESHOLD		(1)
- #define MAX_FREQUENCY_UP_THRESHOLD		(100) 
diff --git a/linux58-tkg/linux58-tkg-patches/0005-undead-glitched-pds.patch b/linux58-tkg/linux58-tkg-patches/0005-undead-glitched-pds.patch
deleted file mode 100644
index 23271f5..0000000
--- a/linux58-tkg/linux58-tkg-patches/0005-undead-glitched-pds.patch
+++ /dev/null
@@ -1,166 +0,0 @@
-From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
-From: Tk-Glitch <ti3nou@gmail.com>
-Date: Wed, 4 Jul 2018 04:30:08 +0200
-Subject: glitched - PDS
-
-diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
-index 2a202a846757..1d9c7ed79b11 100644
---- a/kernel/Kconfig.hz
-+++ b/kernel/Kconfig.hz
-@@ -4,7 +4,7 @@
- 
- choice
- 	prompt "Timer frequency"
--	default HZ_250
-+	default HZ_500
- 	help
- 	 Allows the configuration of the timer frequency. It is customary
- 	 to have the timer interrupt run at 1000 Hz but 100 Hz may be more
-@@ -39,6 +39,13 @@ choice
- 	 on SMP and NUMA systems and exactly dividing by both PAL and
- 	 NTSC frame rates for video and multimedia work.
- 
-+	config HZ_500
-+		bool "500 HZ"
-+	help
-+	 500 Hz is a balanced timer frequency. Provides fast interactivity
-+	 on desktops with great smoothness without increasing CPU power
-+	 consumption and sacrificing the battery life on laptops.
-+
- 	config HZ_1000
- 		bool "1000 HZ"
- 	help
-@@ -52,6 +59,7 @@ config HZ
- 	default 100 if HZ_100
- 	default 250 if HZ_250
- 	default 300 if HZ_300
-+	default 500 if HZ_500
- 	default 1000 if HZ_1000
- 
- config SCHED_HRTICK
-
-diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
-index 2a202a846757..1d9c7ed79b11 100644
---- a/kernel/Kconfig.hz
-+++ b/kernel/Kconfig.hz
-@@ -4,7 +4,7 @@
- 
- choice
- 	prompt "Timer frequency"
--	default HZ_500
-+	default HZ_750
- 	help
- 	 Allows the configuration of the timer frequency. It is customary
- 	 to have the timer interrupt run at 1000 Hz but 100 Hz may be more
-@@ -46,6 +46,13 @@ choice
- 	 on desktops with great smoothness without increasing CPU power
- 	 consumption and sacrificing the battery life on laptops.
- 
-+	config HZ_750
-+		bool "750 HZ"
-+	help
-+	 750 Hz is a good timer frequency for desktops. Provides fast
-+	 interactivity with great smoothness without sacrificing too
-+	 much throughput.
-+
- 	config HZ_1000
- 		bool "1000 HZ"
- 	help
-@@ -60,6 +67,7 @@ config HZ
- 	default 250 if HZ_250
- 	default 300 if HZ_300
- 	default 500 if HZ_500
-+	default 750 if HZ_750
- 	default 1000 if HZ_1000
- 
- config SCHED_HRTICK
-
-diff --git a/mm/vmscan.c b/mm/vmscan.c
-index 9270a4370d54..30d01e647417 100644
---- a/mm/vmscan.c
-+++ b/mm/vmscan.c
-@@ -159,7 +159,7 @@ struct scan_control {
- /*
-  * From 0 .. 100.  Higher means more swappy.
-  */
--int vm_swappiness = 60;
-+int vm_swappiness = 20;
- /*
-  * The total number of pages which are beyond the high watermark within all
-  * zones.
-
-diff --git a/init/Kconfig b/init/Kconfig
-index 11fd9b502d06..e9bc34d3019b 100644
---- a/init/Kconfig
-+++ b/init/Kconfig
-@@ -715,6 +715,7 @@ menu "Scheduler features"
- config UCLAMP_TASK
- 	bool "Enable utilization clamping for RT/FAIR tasks"
- 	depends on CPU_FREQ_GOV_SCHEDUTIL
-+	depends on !SCHED_PDS
- 	help
- 	  This feature enables the scheduler to track the clamped utilization
- 	  of each CPU based on RUNNABLE tasks scheduled on that CPU.
-@@ -948,7 +948,6 @@ config CGROUP_DEVICE
- 
- config CGROUP_CPUACCT
- 	bool "Simple CPU accounting controller"
--	depends on !SCHED_PDS
- 	help
- 	  Provides a simple controller for monitoring the
- 	  total CPU consumed by the tasks in a cgroup.
-diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
-index b23231bae996..cab4e5c5b38e 100644
---- a/kernel/sched/Makefile
-+++ b/kernel/sched/Makefile
-@@ -24,13 +24,13 @@ obj-y += fair.o rt.o deadline.o
- obj-$(CONFIG_SMP) += cpudeadline.o topology.o stop_task.o
- obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
- obj-$(CONFIG_SCHED_DEBUG) += debug.o
--obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
- endif
- obj-y += loadavg.o clock.o cputime.o
- obj-y += idle.o
- obj-y += wait.o wait_bit.o swait.o completion.o
- obj-$(CONFIG_SMP) += cpupri.o pelt.o
- obj-$(CONFIG_SCHEDSTATS) += stats.o
-+obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
- obj-$(CONFIG_CPU_FREQ) += cpufreq.o
- obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o
- obj-$(CONFIG_MEMBARRIER) += membarrier.o
-
-diff --git a/kernel/sched/pds.c b/kernel/sched/pds.c
-index 9281ad164..f09a609cf 100644
---- a/kernel/sched/pds.c
-+++ b/kernel/sched/pds.c
-@@ -81,6 +81,18 @@ enum {
- 	NR_CPU_AFFINITY_CHK_LEVEL
- };
- 
-+/*
-+ * This allows printing both to /proc/sched_debug and
-+ * to the console
-+ */
-+#define SEQ_printf(m, x...)			\
-+ do {						\
-+	if (m)					\
-+		seq_printf(m, x);		\
-+	else					\
-+		pr_cont(x);			\
-+ } while (0)
-+
- static inline void print_scheduler_version(void)
- {
- 	printk(KERN_INFO "pds: PDS-mq CPU Scheduler 0.99o by Alfred Chen.\n");
-@@ -6353,7 +6365,10 @@ void ia64_set_curr_task(int cpu, struct task_struct *p)
- #ifdef CONFIG_SCHED_DEBUG
- void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
- 			  struct seq_file *m)
--{}
-+{
-+	SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, task_pid_nr_ns(p, ns),
-+						get_nr_threads(p));
-+}
- 
- void proc_sched_set_task(struct task_struct *p)
- {}
diff --git a/linux58-tkg/linux58-tkg-patches/0005-v5.8_undead-pds099o.patch b/linux58-tkg/linux58-tkg-patches/0005-v5.8_undead-pds099o.patch
deleted file mode 100644
index 7cb7e91..0000000
--- a/linux58-tkg/linux58-tkg-patches/0005-v5.8_undead-pds099o.patch
+++ /dev/null
@@ -1,8530 +0,0 @@
-From 68f1a9541ef3185b1021e8e54d2712c7039418d7 Mon Sep 17 00:00:00 2001
-From: Tk-Glitch <ti3nou@gmail.com>
-Date: Mon, 15 Jun 2020 23:58:41 +0200
-Subject: PDS 099o, initial 5.8 rebase
-
-
-diff --git a/Documentation/scheduler/sched-PDS-mq.txt b/Documentation/scheduler/sched-PDS-mq.txt
-new file mode 100644
-index 000000000000..709e86f6487e
---- /dev/null
-+++ b/Documentation/scheduler/sched-PDS-mq.txt
-@@ -0,0 +1,56 @@
-+        Priority and Deadline based Skiplist multiple queue Scheduler
-+        -------------------------------------------------------------
-+
-+CONTENT
-+========
-+
-+ 0. Development
-+ 1. Overview
-+   1.1 Design goal
-+   1.2 Design summary
-+ 2. Design Detail
-+   2.1 Skip list implementation
-+   2.2 Task preempt
-+   2.3 Task policy, priority and deadline
-+   2.4 Task selection
-+   2.5 Run queue balance
-+   2.6 Task migration
-+
-+
-+0. Development
-+==============
-+
-+Priority and Deadline based Skiplist multiple queue scheduler, referred to as
-+PDS from here on, is developed upon the enhancement patchset VRQ(Variable Run
-+Queue) for BFS(Brain Fuck Scheduler by Con Kolivas). PDS inherits the existing
-+design from VRQ and inspired by the introduction of skiplist data structure
-+to the scheduler by Con Kolivas. However, PDS is different from MuQSS(Multiple
-+Queue Skiplist Scheduler, the successor after BFS) in many ways.
-+
-+1. Overview
-+===========
-+
-+1.1 Design goal
-+---------------
-+
-+PDS is designed to make the cpu process scheduler code to be simple, but while
-+efficiency and scalable. Be Simple, the scheduler code will be easy to be read
-+and the behavious of scheduler will be easy to predict. Be efficiency, the
-+scheduler shall be well balance the thoughput performance and task interactivity
-+at the same time for different properties the tasks behave. Be scalable, the
-+performance of the scheduler should be in good shape with the glowing of
-+workload or with the growing of the cpu numbers.
-+
-+1.2 Design summary
-+------------------
-+
-+PDS is described as a multiple run queues cpu scheduler. Each cpu has its own
-+run queue. A heavry customized skiplist is used as the backend data structure
-+of the cpu run queue. Tasks in run queue is sorted by priority then virtual
-+deadline(simplfy to just deadline from here on). In PDS, balance action among
-+run queues are kept as less as possible to reduce the migration cost. Cpumask
-+data structure is widely used in cpu affinity checking and cpu preemption/
-+selection to make PDS scalable with increasing cpu number.
-+
-+
-+To be continued...
-diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
-index f18d5067cd0f..fe489fc01c73 100644
---- a/arch/powerpc/platforms/cell/spufs/sched.c
-+++ b/arch/powerpc/platforms/cell/spufs/sched.c
-@@ -51,11 +51,6 @@ static struct task_struct *spusched_task;
- static struct timer_list spusched_timer;
- static struct timer_list spuloadavg_timer;
- 
--/*
-- * Priority of a normal, non-rt, non-niced'd process (aka nice level 0).
-- */
--#define NORMAL_PRIO		120
--
- /*
-  * Frequency of the spu scheduler tick.  By default we do one SPU scheduler
-  * tick for every 10 CPU scheduler ticks.
-diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
-index 2d3f963fd6f1..5f41ead019b1 100644
---- a/arch/x86/Kconfig
-+++ b/arch/x86/Kconfig
-@@ -1006,6 +1006,22 @@ config NR_CPUS
- config SCHED_SMT
- 	def_bool y if SMP
- 
-+config SMT_NICE
-+	bool "SMT (Hyperthreading) aware nice priority and policy support"
-+	depends on SCHED_PDS && SCHED_SMT
-+	default y
-+	---help---
-+	  Enabling Hyperthreading on Intel CPUs decreases the effectiveness
-+	  of the use of 'nice' levels and different scheduling policies
-+	  (e.g. realtime) due to sharing of CPU power between hyperthreads.
-+	  SMT nice support makes each logical CPU aware of what is running on
-+	  its hyperthread siblings, maintaining appropriate distribution of
-+	  CPU according to nice levels and scheduling policies at the expense
-+	  of slightly increased overhead.
-+
-+	  If unsure say Y here.
-+
-+
- config SCHED_MC
- 	def_bool y
- 	prompt "Multi-core scheduler support"
-diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
-index 737ff3b9c2c0..b5bc5a1b6de7 100644
---- a/drivers/cpufreq/cpufreq_conservative.c
-+++ b/drivers/cpufreq/cpufreq_conservative.c
-@@ -28,8 +28,8 @@ struct cs_dbs_tuners {
- };
- 
- /* Conservative governor macros */
--#define DEF_FREQUENCY_UP_THRESHOLD		(80)
--#define DEF_FREQUENCY_DOWN_THRESHOLD		(20)
-+#define DEF_FREQUENCY_UP_THRESHOLD		(63)
-+#define DEF_FREQUENCY_DOWN_THRESHOLD		(26)
- #define DEF_FREQUENCY_STEP			(5)
- #define DEF_SAMPLING_DOWN_FACTOR		(1)
- #define MAX_SAMPLING_DOWN_FACTOR		(10)
-diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
-index 82a4d37ddecb..1130e0f5db72 100644
---- a/drivers/cpufreq/cpufreq_ondemand.c
-+++ b/drivers/cpufreq/cpufreq_ondemand.c
-@@ -18,7 +18,7 @@
- #include "cpufreq_ondemand.h"
- 
- /* On-demand governor macros */
--#define DEF_FREQUENCY_UP_THRESHOLD		(80)
-+#define DEF_FREQUENCY_UP_THRESHOLD		(63)
- #define DEF_SAMPLING_DOWN_FACTOR		(1)
- #define MAX_SAMPLING_DOWN_FACTOR		(100000)
- #define MICRO_FREQUENCY_UP_THRESHOLD		(95)
-@@ -127,7 +127,7 @@ static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq)
- }
- 
- /*
-- * Every sampling_rate, we check, if current idle time is less than 20%
-+ * Every sampling_rate, we check, if current idle time is less than 37%
-  * (default), then we try to increase frequency. Else, we adjust the frequency
-  * proportional to load.
-  */
-diff --git a/fs/proc/base.c b/fs/proc/base.c
-index eb2255e95f62..62b8cedbccb6 100644
---- a/fs/proc/base.c
-+++ b/fs/proc/base.c
-@@ -479,7 +479,7 @@ static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns,
- 		seq_puts(m, "0 0 0\n");
- 	else
- 		seq_printf(m, "%llu %llu %lu\n",
--		   (unsigned long long)task->se.sum_exec_runtime,
-+		   (unsigned long long)tsk_seruntime(task),
- 		   (unsigned long long)task->sched_info.run_delay,
- 		   task->sched_info.pcount);
- 
-diff --git a/include/linux/init_task.h b/include/linux/init_task.h
-index 2c620d7ac432..1a7987c40c80 100644
---- a/include/linux/init_task.h
-+++ b/include/linux/init_task.h
-@@ -36,7 +36,11 @@ extern struct cred init_cred;
- #define INIT_PREV_CPUTIME(x)
- #endif
- 
-+#ifdef CONFIG_SCHED_PDS
-+#define INIT_TASK_COMM "PDS"
-+#else
- #define INIT_TASK_COMM "swapper"
-+#endif /* !CONFIG_SCHED_PDS */
- 
- /* Attach to the init_task data structure for proper alignment */
- #ifdef CONFIG_ARCH_TASK_STRUCT_ON_STACK
-diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
-index fed6ba96c527..f03a5ee419a1 100644
---- a/include/linux/jiffies.h
-+++ b/include/linux/jiffies.h
-@@ -169,7 +169,7 @@ static inline u64 get_jiffies_64(void)
-  * Have the 32 bit jiffies value wrap 5 minutes after boot
-  * so jiffies wrap bugs show up earlier.
-  */
--#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-300*HZ))
-+#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-10*HZ))
- 
- /*
-  * Change timeval to jiffies, trying to avoid the
-diff --git a/kernel/smp.c b/kernel/smp.c
-index 4418f5cb8324..2b51afac5b06 100644
---- a/kernel/smp.c
-+++ b/kernel/smp.c
-diff --git a/include/linux/sched.h b/include/linux/sched.h
-index 4418f5cb8324..2b51afac5b06 100644
---- a/include/linux/sched.h
-+++ b/include/linux/sched.h
-@@ -31,6 +31,7 @@
- #include <linux/posix-timers.h>
- #include <linux/rseq.h>
- #include <linux/kcsan.h>
-+#include <linux/skip_list.h>
- 
- /* task_struct member predeclarations (sorted alphabetically): */
- struct audit_context;
-@@ -652,9 +653,13 @@ struct task_struct {
- 	unsigned int			flags;
- 	unsigned int			ptrace;
- 
--#ifdef CONFIG_SMP
-+#if defined(CONFIG_SMP) || defined(CONFIG_SCHED_PDS)
- 	int				on_cpu;
-+#endif
-+#if defined(CONFIG_SMP) && !defined(CONFIG_SCHED_PDS)
- 	struct __call_single_node	wake_entry;
-+#endif
-+#ifdef CONFIG_SMP
- #ifdef CONFIG_THREAD_INFO_IN_TASK
- 	/* Current CPU: */
- 	unsigned int			cpu;
-@@ -663,6 +668,7 @@ struct task_struct {
- 	unsigned long			wakee_flip_decay_ts;
- 	struct task_struct		*last_wakee;
- 
-+#ifndef CONFIG_SCHED_PDS
- 	/*
- 	 * recent_used_cpu is initially set as the last CPU used by a task
- 	 * that wakes affine another task. Waker/wakee relationships can
-@@ -671,6 +677,7 @@ struct task_struct {
- 	 * used CPU that may be idle.
- 	 */
- 	int				recent_used_cpu;
-+#endif /* CONFIG_SCHED_PDS */
- 	int				wake_cpu;
- #endif
- 	int				on_rq;
-@@ -680,13 +687,27 @@ struct task_struct {
- 	int				normal_prio;
- 	unsigned int			rt_priority;
- 
-+#ifdef CONFIG_SCHED_PDS
-+	int				time_slice;
-+	u64				deadline;
-+	/* skip list level */
-+	int				sl_level;
-+	/* skip list node */
-+	struct skiplist_node		sl_node;
-+	/* 8bits prio and 56bits deadline for quick processing */
-+	u64				priodl;
-+	u64				last_ran;
-+	/* sched_clock time spent running */
-+	u64				sched_time;
-+#else /* CONFIG_SCHED_PDS */
- 	const struct sched_class	*sched_class;
- 	struct sched_entity		se;
- 	struct sched_rt_entity		rt;
-+	struct sched_dl_entity		dl;
-+#endif
- #ifdef CONFIG_CGROUP_SCHED
- 	struct task_group		*sched_task_group;
- #endif
--	struct sched_dl_entity		dl;
- 
- #ifdef CONFIG_UCLAMP_TASK
- 	/* Clamp values requested for a scheduling entity */
-@@ -1306,6 +1327,29 @@ struct task_struct {
- 	 */
- };
- 
-+#ifdef CONFIG_SCHED_PDS
-+void cpu_scaling(int cpu);
-+void cpu_nonscaling(int cpu);
-+#define tsk_seruntime(t)		((t)->sched_time)
-+/* replace the uncertian rt_timeout with 0UL */
-+#define tsk_rttimeout(t)		(0UL)
-+
-+#define task_running_idle(p)	((p)->prio == IDLE_PRIO)
-+#else /* CFS */
-+extern int runqueue_is_locked(int cpu);
-+static inline void cpu_scaling(int cpu)
-+{
-+}
-+
-+static inline void cpu_nonscaling(int cpu)
-+{
-+}
-+#define tsk_seruntime(t)	((t)->se.sum_exec_runtime)
-+#define tsk_rttimeout(t)	((t)->rt.timeout)
-+
-+#define iso_task(p)		(false)
-+#endif /* CONFIG_SCHED_PDS */
-+
- static inline struct pid *task_pid(struct task_struct *task)
- {
- 	return task->thread_pid;
-diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h
-index 1aff00b65f3c..a5e5fc2c9170 100644
---- a/include/linux/sched/deadline.h
-+++ b/include/linux/sched/deadline.h
-@@ -1,5 +1,22 @@
- /* SPDX-License-Identifier: GPL-2.0 */
- 
-+#ifdef CONFIG_SCHED_PDS
-+
-+#define __tsk_deadline(p)	((p)->deadline)
-+
-+static inline int dl_prio(int prio)
-+{
-+	return 1;
-+}
-+
-+static inline int dl_task(struct task_struct *p)
-+{
-+	return 1;
-+}
-+#else
-+
-+#define __tsk_deadline(p)	((p)->dl.deadline)
-+
- /*
-  * SCHED_DEADLINE tasks has negative priorities, reflecting
-  * the fact that any of them has higher prio than RT and
-@@ -19,6 +36,7 @@ static inline int dl_task(struct task_struct *p)
- {
- 	return dl_prio(p->prio);
- }
-+#endif /* CONFIG_SCHED_PDS */
- 
- static inline bool dl_time_before(u64 a, u64 b)
- {
-diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h
-index 7d64feafc408..fba04bb91492 100644
---- a/include/linux/sched/prio.h
-+++ b/include/linux/sched/prio.h
-@@ -20,7 +20,18 @@
-  */
- 
- #define MAX_USER_RT_PRIO	100
-+
-+#ifdef CONFIG_SCHED_PDS
-+#define ISO_PRIO		(MAX_USER_RT_PRIO)
-+
-+#define MAX_RT_PRIO		((MAX_USER_RT_PRIO) + 1)
-+
-+#define NORMAL_PRIO		(MAX_RT_PRIO)
-+#define IDLE_PRIO		((MAX_RT_PRIO) + 1)
-+#define PRIO_LIMIT		((IDLE_PRIO) + 1)
-+#else /* !CONFIG_SCHED_PDS */
- #define MAX_RT_PRIO		MAX_USER_RT_PRIO
-+#endif /* CONFIG_SCHED_PDS */
- 
- #define MAX_PRIO		(MAX_RT_PRIO + NICE_WIDTH)
- #define DEFAULT_PRIO		(MAX_RT_PRIO + NICE_WIDTH / 2)
-diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
-index e5af028c08b4..a96012e6f15e 100644
---- a/include/linux/sched/rt.h
-+++ b/include/linux/sched/rt.h
-@@ -24,8 +24,10 @@ static inline bool task_is_realtime(struct task_struct *tsk)
- 
- 	if (policy == SCHED_FIFO || policy == SCHED_RR)
- 		return true;
-+#ifndef CONFIG_SCHED_PDS
- 	if (policy == SCHED_DEADLINE)
- 		return true;
-+#endif
- 	return false;
- }
- 
-diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
-index 38359071236a..90328ccd527f 100644
---- a/include/linux/sched/task.h
-+++ b/include/linux/sched/task.h
-@@ -106,7 +106,7 @@ extern long kernel_wait4(pid_t, int __user *, int, struct rusage *);
- extern void free_task(struct task_struct *tsk);
- 
- /* sched_exec is called by processes performing an exec */
--#ifdef CONFIG_SMP
-+#if defined(CONFIG_SMP) && !defined(CONFIG_SCHED_PDS)
- extern void sched_exec(void);
- #else
- #define sched_exec()   {}
-diff --git a/include/linux/skip_list.h b/include/linux/skip_list.h
-new file mode 100644
-index 000000000000..713fedd8034f
---- /dev/null
-+++ b/include/linux/skip_list.h
-@@ -0,0 +1,177 @@
-+/*
-+  Copyright (C) 2016 Alfred Chen.
-+
-+  Code based on Con Kolivas's skip list implementation for BFS, and
-+  which is based on example originally by William Pugh.
-+
-+Skip Lists are a probabilistic alternative to balanced trees, as
-+described in the June 1990 issue of CACM and were invented by
-+William Pugh in 1987.
-+
-+A couple of comments about this implementation:
-+
-+This file only provides a infrastructure of skip list.
-+
-+skiplist_node is embedded into container data structure, to get rid the
-+dependency of kmalloc/kfree operation in scheduler code.
-+
-+A customized search function should be defined using DEFINE_SKIPLIST_INSERT
-+macro and be used for skip list insert operation.
-+
-+Random Level is also not defined in this file, instead, it should be customized
-+implemented and set to node->level then pass to the customized skiplist_insert
-+function.
-+
-+Levels start at zero and go up to (NUM_SKIPLIST_LEVEL -1)
-+
-+NUM_SKIPLIST_LEVEL in this implementation is 8 instead of origin 16,
-+considering that there will be 256 entries to enable the top level when using
-+random level p=0.5, and that number is more than enough for a run queue usage
-+in a scheduler usage. And it also help to reduce the memory usage of the
-+embedded skip list node in task_struct to about 50%.
-+
-+The insertion routine has been implemented so as to use the
-+dirty hack described in the CACM paper: if a random level is
-+generated that is more than the current maximum level, the
-+current maximum level plus one is used instead.
-+
-+BFS Notes: In this implementation of skiplists, there are bidirectional
-+next/prev pointers and the insert function returns a pointer to the actual
-+node the value is stored. The key here is chosen by the scheduler so as to
-+sort tasks according to the priority list requirements and is no longer used
-+by the scheduler after insertion. The scheduler lookup, however, occurs in
-+O(1) time because it is always the first item in the level 0 linked list.
-+Since the task struct stores a copy of the node pointer upon skiplist_insert,
-+it can also remove it much faster than the original implementation with the
-+aid of prev<->next pointer manipulation and no searching.
-+*/
-+#ifndef _LINUX_SKIP_LIST_H
-+#define _LINUX_SKIP_LIST_H
-+
-+#include <linux/kernel.h>
-+
-+#define NUM_SKIPLIST_LEVEL (8)
-+
-+struct skiplist_node {
-+	int level;	/* Levels in this node */
-+	struct skiplist_node *next[NUM_SKIPLIST_LEVEL];
-+	struct skiplist_node *prev[NUM_SKIPLIST_LEVEL];
-+};
-+
-+#define SKIPLIST_NODE_INIT(name) { 0,\
-+				   {&name, &name, &name, &name,\
-+				    &name, &name, &name, &name},\
-+				   {&name, &name, &name, &name,\
-+				    &name, &name, &name, &name},\
-+				 }
-+
-+static inline void INIT_SKIPLIST_NODE(struct skiplist_node *node)
-+{
-+	/* only level 0 ->next matters in skiplist_empty()*/
-+	WRITE_ONCE(node->next[0], node);
-+}
-+
-+/**
-+ * FULL_INIT_SKIPLIST_NODE -- fully init a skiplist_node, expecially for header
-+ * @node: the skip list node to be inited.
-+ */
-+static inline void FULL_INIT_SKIPLIST_NODE(struct skiplist_node *node)
-+{
-+	int i;
-+
-+	node->level = 0;
-+	for (i = 0; i < NUM_SKIPLIST_LEVEL; i++) {
-+		WRITE_ONCE(node->next[i], node);
-+		node->prev[i] = node;
-+	}
-+}
-+
-+/**
-+ * skiplist_empty - test whether a skip list is empty
-+ * @head: the skip list to test.
-+ */
-+static inline int skiplist_empty(const struct skiplist_node *head)
-+{
-+	return READ_ONCE(head->next[0]) == head;
-+}
-+
-+/**
-+ * skiplist_entry - get the struct for this entry
-+ * @ptr: the &struct skiplist_node pointer.
-+ * @type:       the type of the struct this is embedded in.
-+ * @member:     the name of the skiplist_node within the struct.
-+ */
-+#define skiplist_entry(ptr, type, member) \
-+	container_of(ptr, type, member)
-+
-+/**
-+ * DEFINE_SKIPLIST_INSERT_FUNC -- macro to define a customized skip list insert
-+ * function, which takes two parameters, first one is the header node of the
-+ * skip list, second one is the skip list node to be inserted
-+ * @func_name: the customized skip list insert function name
-+ * @search_func: the search function to be used, which takes two parameters,
-+ * 1st one is the itrator of skiplist_node in the list, the 2nd is the skip list
-+ * node to be inserted, the function should return true if search should be
-+ * continued, otherwise return false.
-+ * Returns 1 if @node is inserted as the first item of skip list at level zero,
-+ * otherwise 0
-+ */
-+#define DEFINE_SKIPLIST_INSERT_FUNC(func_name, search_func)\
-+static inline int func_name(struct skiplist_node *head, struct skiplist_node *node)\
-+{\
-+	struct skiplist_node *update[NUM_SKIPLIST_LEVEL];\
-+	struct skiplist_node *p, *q;\
-+	int k = head->level;\
-+\
-+	p = head;\
-+	do {\
-+		while (q = p->next[k], q != head && search_func(q, node))\
-+			p = q;\
-+		update[k] = p;\
-+	} while (--k >= 0);\
-+\
-+	k = node->level;\
-+	if (unlikely(k > head->level)) {\
-+		node->level = k = ++head->level;\
-+		update[k] = head;\
-+	}\
-+\
-+	do {\
-+		p = update[k];\
-+		q = p->next[k];\
-+		node->next[k] = q;\
-+		p->next[k] = node;\
-+		node->prev[k] = p;\
-+		q->prev[k] = node;\
-+	} while (--k >= 0);\
-+\
-+	return (p == head);\
-+}
-+
-+/**
-+ * skiplist_del_init -- delete skip list node from a skip list and reset it's
-+ * init state
-+ * @head: the header node of the skip list to be deleted from.
-+ * @node: the skip list node to be deleted, the caller need to ensure @node is
-+ * in skip list which @head represent.
-+ * Returns 1 if @node is the first item of skip level at level zero, otherwise 0
-+ */
-+static inline int
-+skiplist_del_init(struct skiplist_node *head, struct skiplist_node *node)
-+{
-+	int l, m = node->level;
-+
-+	for (l = 0; l <= m; l++) {
-+		node->prev[l]->next[l] = node->next[l];
-+		node->next[l]->prev[l] = node->prev[l];
-+	}
-+	if (m == head->level && m > 0) {
-+		while (head->next[m] == head && m > 0)
-+			m--;
-+		head->level = m;
-+	}
-+	INIT_SKIPLIST_NODE(node);
-+
-+	return (node->prev[0] == head);
-+}
-+#endif /* _LINUX_SKIP_LIST_H */
-diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
-index 3bac0a8ceab2..d6d384ddb57d 100644
---- a/include/uapi/linux/sched.h
-+++ b/include/uapi/linux/sched.h
-@@ -115,7 +115,10 @@ struct clone_args {
- #define SCHED_FIFO		1
- #define SCHED_RR		2
- #define SCHED_BATCH		3
--/* SCHED_ISO: reserved but not implemented yet */
-+/* SCHED_ISO: Implemented in BFS/MuQSSPDS only */
-+
-+#define SCHED_ISO		4
-+
- #define SCHED_IDLE		5
- #define SCHED_DEADLINE		6
- 
-diff --git a/init/Kconfig b/init/Kconfig
-index 74a5ac65644f..e4fd406b58dd 100644
---- a/init/Kconfig
-+++ b/init/Kconfig
-@@ -61,6 +61,21 @@ config THREAD_INFO_IN_TASK
- 
- menu "General setup"
- 
-+config SCHED_PDS
-+	bool "PDS-mq cpu scheduler"
-+	help
-+	  The Priority and Deadline based Skip list multiple queue CPU
-+	  Scheduler for excellent interactivity and responsiveness on the
-+	  desktop and solid scalability on normal hardware and commodity
-+	  servers.
-+
-+	  Currently incompatible with the Group CPU scheduler, and RCU TORTURE
-+          TEST so these options are disabled.
-+
-+          Say Y here.
-+	default y
-+
-+
- config BROKEN
- 	bool
- 
-@@ -777,6 +792,7 @@ config NUMA_BALANCING
- 	depends on ARCH_SUPPORTS_NUMA_BALANCING
- 	depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY
- 	depends on SMP && NUMA && MIGRATION
-+	depends on !SCHED_PDS
- 	help
- 	  This option adds support for automatic NUMA aware memory/task placement.
- 	  The mechanism is quite primitive and is based on migrating memory when
-@@ -878,7 +894,7 @@ menuconfig CGROUP_SCHED
- 	  bandwidth allocation to such task groups. It uses cgroups to group
- 	  tasks.
- 
--if CGROUP_SCHED
-+if CGROUP_SCHED && !SCHED_PDS
- config FAIR_GROUP_SCHED
- 	bool "Group scheduling for SCHED_OTHER"
- 	depends on CGROUP_SCHED
-@@ -1007,6 +1023,7 @@ config CGROUP_DEVICE
- 
- config CGROUP_CPUACCT
- 	bool "Simple CPU accounting controller"
-+	depends on !SCHED_PDS
- 	help
- 	  Provides a simple controller for monitoring the
- 	  total CPU consumed by the tasks in a cgroup.
-@@ -1134,6 +1151,7 @@ config CHECKPOINT_RESTORE
- 
- config SCHED_AUTOGROUP
- 	bool "Automatic process group scheduling"
-+	depends on !SCHED_PDS
- 	select CGROUPS
- 	select CGROUP_SCHED
- 	select FAIR_GROUP_SCHED
-diff --git a/init/init_task.c b/init/init_task.c
-index bd403ed3e418..162d3deddd45 100644
---- a/init/init_task.c
-+++ b/init/init_task.c
-@@ -59,6 +59,126 @@ struct task_struct init_task
- 	__init_task_data
- #endif
- = {
-+#ifdef CONFIG_SCHED_PDS
-+#ifdef CONFIG_THREAD_INFO_IN_TASK
-+	.thread_info	= INIT_THREAD_INFO(init_task),
-+	.stack_refcount	= ATOMIC_INIT(1),
-+#endif
-+	.state		= 0,
-+	.stack		= init_stack,
-+	.usage		= ATOMIC_INIT(2),
-+	.flags		= PF_KTHREAD,
-+	.prio		= NORMAL_PRIO,
-+	.static_prio	= MAX_PRIO - 20,
-+	.normal_prio	= NORMAL_PRIO,
-+	.deadline	= 0, /* PDS only */
-+	.policy		= SCHED_NORMAL,
-+	.cpus_ptr	= &init_task.cpus_mask,
-+	.cpus_mask	= CPU_MASK_ALL,
-+	.nr_cpus_allowed= NR_CPUS,
-+	.mm		= NULL,
-+	.active_mm	= &init_mm,
-+	.restart_block	= {
-+		.fn = do_no_restart_syscall,
-+	},
-+	.sl_level	= 0, /* PDS only */
-+	.sl_node	= SKIPLIST_NODE_INIT(init_task.sl_node), /* PDS only */
-+	.time_slice	= HZ, /* PDS only */
-+	.tasks		= LIST_HEAD_INIT(init_task.tasks),
-+#ifdef CONFIG_SMP
-+	.pushable_tasks	= PLIST_NODE_INIT(init_task.pushable_tasks, MAX_PRIO),
-+#endif
-+#ifdef CONFIG_CGROUP_SCHED
-+	.sched_task_group = &root_task_group,
-+#endif
-+	.ptraced	= LIST_HEAD_INIT(init_task.ptraced),
-+	.ptrace_entry	= LIST_HEAD_INIT(init_task.ptrace_entry),
-+	.real_parent	= &init_task,
-+	.parent		= &init_task,
-+	.children	= LIST_HEAD_INIT(init_task.children),
-+	.sibling	= LIST_HEAD_INIT(init_task.sibling),
-+	.group_leader	= &init_task,
-+	RCU_POINTER_INITIALIZER(real_cred, &init_cred),
-+	RCU_POINTER_INITIALIZER(cred, &init_cred),
-+	.comm		= INIT_TASK_COMM,
-+	.thread		= INIT_THREAD,
-+	.fs		= &init_fs,
-+	.files		= &init_files,
-+	.signal		= &init_signals,
-+	.sighand	= &init_sighand,
-+	.nsproxy	= &init_nsproxy,
-+	.pending	= {
-+		.list = LIST_HEAD_INIT(init_task.pending.list),
-+		.signal = {{0}}
-+	},
-+	.blocked	= {{0}},
-+	.alloc_lock	= __SPIN_LOCK_UNLOCKED(init_task.alloc_lock),
-+	.journal_info	= NULL,
-+	INIT_CPU_TIMERS(init_task)
-+	.pi_lock	= __RAW_SPIN_LOCK_UNLOCKED(init_task.pi_lock),
-+	.timer_slack_ns = 50000, /* 50 usec default slack */
-+	.thread_pid	= &init_struct_pid,
-+	.thread_group	= LIST_HEAD_INIT(init_task.thread_group),
-+	.thread_node	= LIST_HEAD_INIT(init_signals.thread_head),
-+#ifdef CONFIG_AUDITSYSCALL
-+	.loginuid	= INVALID_UID,
-+	.sessionid	= AUDIT_SID_UNSET,
-+#endif
-+#ifdef CONFIG_PERF_EVENTS
-+	.perf_event_mutex = __MUTEX_INITIALIZER(init_task.perf_event_mutex),
-+	.perf_event_list = LIST_HEAD_INIT(init_task.perf_event_list),
-+#endif
-+#ifdef CONFIG_PREEMPT_RCU
-+	.rcu_read_lock_nesting = 0,
-+	.rcu_read_unlock_special.s = 0,
-+	.rcu_node_entry = LIST_HEAD_INIT(init_task.rcu_node_entry),
-+	.rcu_blocked_node = NULL,
-+#endif
-+#ifdef CONFIG_TASKS_RCU
-+	.rcu_tasks_holdout = false,
-+	.rcu_tasks_holdout_list = LIST_HEAD_INIT(init_task.rcu_tasks_holdout_list),
-+	.rcu_tasks_idle_cpu = -1,
-+#endif
-+#ifdef CONFIG_CPUSETS
-+	.mems_allowed_seq = SEQCNT_ZERO(init_task.mems_allowed_seq),
-+#endif
-+#ifdef CONFIG_RT_MUTEXES
-+	.pi_waiters	= RB_ROOT_CACHED,
-+	.pi_top_task	= NULL,
-+#endif
-+	INIT_PREV_CPUTIME(init_task)
-+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-+	.vtime.seqcount	= SEQCNT_ZERO(init_task.vtime_seqcount),
-+	.vtime.starttime = 0,
-+	.vtime.state	= VTIME_SYS,
-+#endif
-+#ifdef CONFIG_NUMA_BALANCING
-+	.numa_preferred_nid = -1,
-+	.numa_group	= NULL,
-+	.numa_faults	= NULL,
-+#endif
-+#ifdef CONFIG_KASAN
-+	.kasan_depth	= 1,
-+#endif
-+#ifdef CONFIG_TRACE_IRQFLAGS
-+	.softirqs_enabled = 1,
-+#endif
-+#ifdef CONFIG_LOCKDEP
-+	.lockdep_recursion = 0,
-+#endif
-+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-+	.ret_stack	= NULL,
-+#endif
-+#if defined(CONFIG_TRACING) && defined(CONFIG_PREEMPT)
-+	.trace_recursion = 0,
-+#endif
-+#ifdef CONFIG_LIVEPATCH
-+	.patch_state	= KLP_UNDEFINED,
-+#endif
-+#ifdef CONFIG_SECURITY
-+	.security	= NULL,
-+#endif
-+#else /* CONFIG_SCHED_PDS */
- #ifdef CONFIG_THREAD_INFO_IN_TASK
- 	.thread_info	= INIT_THREAD_INFO(init_task),
- 	.stack_refcount	= REFCOUNT_INIT(1),
-@@ -182,6 +302,7 @@ struct task_struct init_task
- #ifdef CONFIG_SECURITY
- 	.security	= NULL,
- #endif
-+#endif /* CONFIG_SCHED_PDS */
- };
- EXPORT_SYMBOL(init_task);
- 
-diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
-index 729d3a5c772e..10a7c52b90d5 100644
---- a/kernel/cgroup/cpuset.c
-+++ b/kernel/cgroup/cpuset.c
-@@ -636,7 +636,7 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial)
- 	return ret;
- }
- 
--#ifdef CONFIG_SMP
-+#if defined(CONFIG_SMP) && !defined(CONFIG_SCHED_PDS)
- /*
-  * Helper routine for generate_sched_domains().
-  * Do cpusets a, b have overlapping effective cpus_allowed masks?
-@@ -1009,7 +1009,7 @@ static void rebuild_sched_domains_locked(void)
- 	/* Have scheduler rebuild the domains */
- 	partition_and_rebuild_sched_domains(ndoms, doms, attr);
- }
--#else /* !CONFIG_SMP */
-+#else /* !CONFIG_SMP || CONFIG_SCHED_PDS */
- static void rebuild_sched_domains_locked(void)
- {
- }
-diff --git a/kernel/delayacct.c b/kernel/delayacct.c
-index 27725754ac99..769d773c7182 100644
---- a/kernel/delayacct.c
-+++ b/kernel/delayacct.c
-@@ -106,7 +106,7 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
- 	 */
- 	t1 = tsk->sched_info.pcount;
- 	t2 = tsk->sched_info.run_delay;
--	t3 = tsk->se.sum_exec_runtime;
-+	t3 = tsk_seruntime(tsk);
- 
- 	d->cpu_count += t1;
- 
-diff --git a/kernel/exit.c b/kernel/exit.c
-index ce2a75bc0ade..f0f864bc1ab9 100644
---- a/kernel/exit.c
-+++ b/kernel/exit.c
-@@ -122,7 +122,7 @@ static void __exit_signal(struct task_struct *tsk)
- 			sig->curr_target = next_thread(tsk);
- 	}
- 
--	add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
-+	add_device_randomness((const void*) &tsk_seruntime(tsk),
- 			      sizeof(unsigned long long));
- 
- 	/*
-@@ -143,7 +143,7 @@ static void __exit_signal(struct task_struct *tsk)
- 	sig->inblock += task_io_get_inblock(tsk);
- 	sig->oublock += task_io_get_oublock(tsk);
- 	task_io_accounting_add(&sig->ioac, &tsk->ioac);
--	sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
-+	sig->sum_sched_runtime += tsk_seruntime(tsk);
- 	sig->nr_threads--;
- 	__unhash_process(tsk, group_dead);
- 	write_sequnlock(&sig->stats_lock);
-diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
-index f6310f848f34..b5de980c7d4e 100644
---- a/kernel/livepatch/transition.c
-+++ b/kernel/livepatch/transition.c
-@@ -306,7 +306,11 @@ static bool klp_try_switch_task(struct task_struct *task)
- 	 */
- 	rq = task_rq_lock(task, &flags);
- 
-+#ifdef	CONFIG_SCHED_PDS
-+	if (task_running(task) && task != current) {
-+#else
- 	if (task_running(rq, task) && task != current) {
-+#endif
- 		snprintf(err_buf, STACK_ERR_BUF_SIZE,
- 			 "%s: %s:%d is running\n", __func__, task->comm,
- 			 task->pid);
-diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
-index c9f090d64f00..063d15a1ab8b 100644
---- a/kernel/locking/rtmutex.c
-+++ b/kernel/locking/rtmutex.c
-@@ -229,7 +229,7 @@ static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
-  * Only use with rt_mutex_waiter_{less,equal}()
-  */
- #define task_to_waiter(p)	\
--	&(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline }
-+	&(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = __tsk_deadline(p) }
- 
- static inline int
- rt_mutex_waiter_less(struct rt_mutex_waiter *left,
-@@ -680,7 +680,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
- 	 * the values of the node being removed.
- 	 */
- 	waiter->prio = task->prio;
--	waiter->deadline = task->dl.deadline;
-+	waiter->deadline = __tsk_deadline(task);
- 
- 	rt_mutex_enqueue(lock, waiter);
- 
-@@ -953,7 +953,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
- 	waiter->task = task;
- 	waiter->lock = lock;
- 	waiter->prio = task->prio;
--	waiter->deadline = task->dl.deadline;
-+	waiter->deadline = __tsk_deadline(task);
- 
- 	/* Get the top priority waiter on the lock */
- 	if (rt_mutex_has_waiters(lock))
-diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
-index 21fb5a5662b5..8ebe4e33fb5f 100644
---- a/kernel/sched/Makefile
-+++ b/kernel/sched/Makefile
-@@ -16,15 +16,21 @@ ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
- CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
- endif
- 
--obj-y += core.o loadavg.o clock.o cputime.o
--obj-y += idle.o fair.o rt.o deadline.o
--obj-y += wait.o wait_bit.o swait.o completion.o
--
--obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o
-+ifdef CONFIG_SCHED_PDS
-+obj-y += pds.o
-+else
-+obj-y += core.o
-+obj-y += fair.o rt.o deadline.o
-+obj-$(CONFIG_SMP) += cpudeadline.o topology.o stop_task.o
- obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
--obj-$(CONFIG_SCHEDSTATS) += stats.o
- obj-$(CONFIG_SCHED_DEBUG) += debug.o
- obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
-+endif
-+obj-y += loadavg.o clock.o cputime.o
-+obj-y += idle.o
-+obj-y += wait.o wait_bit.o swait.o completion.o
-+obj-$(CONFIG_SMP) += cpupri.o pelt.o
-+obj-$(CONFIG_SCHEDSTATS) += stats.o
- obj-$(CONFIG_CPU_FREQ) += cpufreq.o
- obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o
- obj-$(CONFIG_MEMBARRIER) += membarrier.o
-diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
-index 7fbaee24c824..28377ad56248 100644
---- a/kernel/sched/cpufreq_schedutil.c
-+++ b/kernel/sched/cpufreq_schedutil.c
-@@ -183,6 +183,7 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy,
- 	return cpufreq_driver_resolve_freq(policy, freq);
- }
- 
-+#ifndef CONFIG_SCHED_PDS
- /*
-  * This function computes an effective utilization for the given CPU, to be
-  * used for frequency selection given the linear relation: f = u * f_max.
-@@ -300,6 +301,13 @@ static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu)
- 
- 	return schedutil_cpu_util(sg_cpu->cpu, util, max, FREQUENCY_UTIL, NULL);
- }
-+#else /* CONFIG_SCHED_PDS */
-+static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu)
-+{
-+	sg_cpu->max = arch_scale_cpu_capacity(sg_cpu->cpu);
-+	return sg_cpu->max;
-+}
-+#endif
- 
- /**
-  * sugov_iowait_reset() - Reset the IO boost status of a CPU.
-@@ -443,7 +451,9 @@ static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; }
-  */
- static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu, struct sugov_policy *sg_policy)
- {
-+#ifndef CONFIG_SCHED_PDS
- 	if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_dl)
-+#endif
- 		sg_policy->limits_changed = true;
- }
- 
-@@ -686,6 +696,7 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy)
- 	}
- 
- 	ret = sched_setattr_nocheck(thread, &attr);
-+
- 	if (ret) {
- 		kthread_stop(thread);
- 		pr_warn("%s: failed to set SCHED_DEADLINE\n", __func__);
-@@ -916,6 +927,7 @@ static int __init sugov_register(void)
- core_initcall(sugov_register);
- 
- #ifdef CONFIG_ENERGY_MODEL
-+#ifndef CONFIG_SCHED_PDS
- extern bool sched_energy_update;
- extern struct mutex sched_energy_mutex;
- 
-@@ -946,4 +958,10 @@ void sched_cpufreq_governor_change(struct cpufreq_policy *policy,
- 	}
- 
- }
-+#else /* CONFIG_SCHED_PDS */
-+void sched_cpufreq_governor_change(struct cpufreq_policy *policy,
-+				  struct cpufreq_governor *old_gov)
-+{
-+}
-+#endif
- #endif
-diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
-index ff9435dee1df..1377ea3d1b76 100644
---- a/kernel/sched/cputime.c
-+++ b/kernel/sched/cputime.c
-@@ -122,7 +122,12 @@ void account_user_time(struct task_struct *p, u64 cputime)
- 	p->utime += cputime;
- 	account_group_user_time(p, cputime);
- 
-+#ifdef	CONFIG_SCHED_PDS
-+	index = (task_nice(p) > 0 || task_running_idle(p)) ? CPUTIME_NICE :
-+		CPUTIME_USER;
-+#else
- 	index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
-+#endif
- 
- 	/* Add user time to cpustat. */
- 	task_group_account_field(p, index, cputime);
-@@ -146,7 +151,11 @@ void account_guest_time(struct task_struct *p, u64 cputime)
- 	p->gtime += cputime;
- 
- 	/* Add guest time to cpustat. */
-+#ifdef	CONFIG_SCHED_PDS
-+	if (task_nice(p) > 0 || task_running_idle(p)) {
-+#else
- 	if (task_nice(p) > 0) {
-+#endif
- 		cpustat[CPUTIME_NICE] += cputime;
- 		cpustat[CPUTIME_GUEST_NICE] += cputime;
- 	} else {
-@@ -269,7 +278,7 @@ static inline u64 account_other_time(u64 max)
- #ifdef CONFIG_64BIT
- static inline u64 read_sum_exec_runtime(struct task_struct *t)
- {
--	return t->se.sum_exec_runtime;
-+	return tsk_seruntime(t);
- }
- #else
- static u64 read_sum_exec_runtime(struct task_struct *t)
-@@ -279,7 +288,7 @@ static u64 read_sum_exec_runtime(struct task_struct *t)
- 	struct rq *rq;
- 
- 	rq = task_rq_lock(t, &rf);
--	ns = t->se.sum_exec_runtime;
-+	ns = tsk_seruntime(t);
- 	task_rq_unlock(rq, t, &rf);
- 
- 	return ns;
-@@ -658,7 +667,7 @@ void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev,
- void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
- {
- 	struct task_cputime cputime = {
--		.sum_exec_runtime = p->se.sum_exec_runtime,
-+		.sum_exec_runtime = tsk_seruntime(p),
- 	};
- 
- 	task_cputime(p, &cputime.utime, &cputime.stime);
-diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
-index b743bf38f08f..16e5754af1cf 100644
---- a/kernel/sched/idle.c
-+++ b/kernel/sched/idle.c
-@@ -361,6 +361,7 @@ void cpu_startup_entry(enum cpuhp_state state)
- 		do_idle();
- }
- 
-+#ifndef CONFIG_SCHED_PDS
- /*
-  * idle-task scheduling class.
-  */
-@@ -481,3 +482,4 @@ const struct sched_class idle_sched_class = {
- 	.switched_to		= switched_to_idle,
- 	.update_curr		= update_curr_idle,
- };
-+#endif
-diff --git a/kernel/sched/pds.c b/kernel/sched/pds.c
-new file mode 100644
-index 000000000000..02d7d5a67c77
---- /dev/null
-+++ b/kernel/sched/pds.c
-@@ -0,0 +1,6619 @@
-+/*
-+ *  kernel/sched/pds.c, was kernel/sched.c
-+ *
-+ *  PDS-mq Core kernel scheduler code and related syscalls
-+ *
-+ *  Copyright (C) 1991-2002  Linus Torvalds
-+ *
-+ *  2009-08-13	Brainfuck deadline scheduling policy by Con Kolivas deletes
-+ *		a whole lot of those previous things.
-+ *  2017-09-06	Priority and Deadline based Skip list multiple queue kernel
-+ *		scheduler by Alfred Chen.
-+ */
-+#include "pds_sched.h"
-+
-+#include <linux/sched/rt.h>
-+
-+#include <linux/context_tracking.h>
-+#include <linux/compat.h>
-+#include <linux/blkdev.h>
-+#include <linux/delayacct.h>
-+#include <linux/freezer.h>
-+#include <linux/init_task.h>
-+#include <linux/kprobes.h>
-+#include <linux/mmu_context.h>
-+#include <linux/nmi.h>
-+#include <linux/profile.h>
-+#include <linux/rcupdate_wait.h>
-+#include <linux/security.h>
-+#include <linux/syscalls.h>
-+#include <linux/wait_bit.h>
-+
-+#include <linux/kcov.h>
-+
-+#include <asm/switch_to.h>
-+
-+#include "../workqueue_internal.h"
-+#include "../../fs/io-wq.h"
-+#include "../smpboot.h"
-+
-+#include "pelt.h"
-+#include "smp.h"
-+
-+#define CREATE_TRACE_POINTS
-+#include <trace/events/sched.h>
-+
-+
-+#define rt_prio(prio)		((prio) < MAX_RT_PRIO)
-+#define rt_task(p)		rt_prio((p)->prio)
-+#define rt_policy(policy)	((policy) == SCHED_FIFO || \
-+				 (policy) == SCHED_RR || \
-+				 (policy) == SCHED_ISO)
-+#define task_has_rt_policy(p)	(rt_policy((p)->policy))
-+
-+#define idle_policy(policy)	((policy) == SCHED_IDLE)
-+#define idleprio_task(p)	unlikely(idle_policy((p)->policy))
-+
-+#define STOP_PRIO		(MAX_RT_PRIO - 1)
-+
-+/*
-+ * Some helpers for converting to/from various scales. Use shifts to get
-+ * approximate multiples of ten for less overhead.
-+ */
-+#define JIFFIES_TO_NS(TIME)	((TIME) * (1000000000 / HZ))
-+#define JIFFY_NS		(1000000000 / HZ)
-+#define HALF_JIFFY_NS		(1000000000 / HZ / 2)
-+#define HALF_JIFFY_US		(1000000 / HZ / 2)
-+#define MS_TO_NS(TIME)		((TIME) << 20)
-+#define MS_TO_US(TIME)		((TIME) << 10)
-+#define NS_TO_MS(TIME)		((TIME) >> 20)
-+#define NS_TO_US(TIME)		((TIME) >> 10)
-+#define US_TO_NS(TIME)		((TIME) << 10)
-+
-+#define RESCHED_US	(100) /* Reschedule if less than this many μs left */
-+
-+enum {
-+	BASE_CPU_AFFINITY_CHK_LEVEL = 1,
-+#ifdef CONFIG_SCHED_SMT
-+	SMT_CPU_AFFINITY_CHK_LEVEL_SPACE_HOLDER,
-+#endif
-+#ifdef CONFIG_SCHED_MC
-+	MC_CPU_AFFINITY_CHK_LEVEL_SPACE_HOLDER,
-+#endif
-+	NR_CPU_AFFINITY_CHK_LEVEL
-+};
-+
-+static inline void print_scheduler_version(void)
-+{
-+	printk(KERN_INFO "pds: PDS-mq CPU Scheduler 0.99o by Alfred Chen and kept alive artificially by Tk-Glitch.\n");
-+}
-+
-+/*
-+ * This is the time all tasks within the same priority round robin.
-+ * Value is in ms and set to a minimum of 6ms. Scales with number of cpus.
-+ * Tunable via /proc interface.
-+ */
-+#define SCHED_DEFAULT_RR (4)
-+int rr_interval __read_mostly = SCHED_DEFAULT_RR;
-+
-+static int __init rr_interval_set(char *str)
-+{
-+	u32 rr;
-+
-+	pr_info("rr_interval: ");
-+	if (kstrtouint(str, 0, &rr)) {
-+		pr_cont("using default of %u, unable to parse %s\n",
-+			rr_interval, str);
-+		return 1;
-+	}
-+
-+	rr_interval = rr;
-+	pr_cont("%d\n", rr_interval);
-+
-+	return 1;
-+}
-+__setup("rr_interval=", rr_interval_set);
-+
-+
-+static const u64 sched_prio2deadline[NICE_WIDTH] = {
-+/* -20 */	  6291456,   6920601,   7612661,   8373927,   9211319,
-+/* -15 */	 10132450,  11145695,  12260264,  13486290,  14834919,
-+/* -10 */	 16318410,  17950251,  19745276,  21719803,  23891783,
-+/*  -5 */	 26280961,  28909057,  31799962,  34979958,  38477953,
-+/*   0 */	 42325748,  46558322,  51214154,  56335569,  61969125,
-+/*   5 */	 68166037,  74982640,  82480904,  90728994,  99801893,
-+/*  10 */	109782082, 120760290, 132836319, 146119950, 160731945,
-+/*  15 */	176805139, 194485652, 213934217, 235327638, 258860401
-+};
-+
-+/**
-+ * sched_yield_type - Choose what sort of yield sched_yield will perform.
-+ * 0: No yield.
-+ * 1: Yield only to better priority/deadline tasks. (default)
-+ * 2: Expire timeslice and recalculate deadline.
-+ */
-+int sched_yield_type __read_mostly = 1;
-+
-+/*
-+ * The quota handed out to tasks of all priority levels when refilling their
-+ * time_slice.
-+ */
-+static inline int timeslice(void)
-+{
-+	return MS_TO_US(rr_interval);
-+}
-+
-+#ifdef CONFIG_SMP
-+enum {
-+SCHED_RQ_EMPTY		=	0,
-+SCHED_RQ_IDLE,
-+SCHED_RQ_NORMAL_0,
-+SCHED_RQ_NORMAL_1,
-+SCHED_RQ_NORMAL_2,
-+SCHED_RQ_NORMAL_3,
-+SCHED_RQ_NORMAL_4,
-+SCHED_RQ_NORMAL_5,
-+SCHED_RQ_NORMAL_6,
-+SCHED_RQ_NORMAL_7,
-+SCHED_RQ_ISO,
-+SCHED_RQ_RT,
-+NR_SCHED_RQ_QUEUED_LEVEL
-+};
-+
-+static cpumask_t sched_rq_queued_masks[NR_SCHED_RQ_QUEUED_LEVEL]
-+____cacheline_aligned_in_smp;
-+
-+static DECLARE_BITMAP(sched_rq_queued_masks_bitmap, NR_SCHED_RQ_QUEUED_LEVEL)
-+____cacheline_aligned_in_smp;
-+
-+static cpumask_t sched_rq_pending_masks[NR_SCHED_RQ_QUEUED_LEVEL]
-+____cacheline_aligned_in_smp;
-+
-+static DECLARE_BITMAP(sched_rq_pending_masks_bitmap, NR_SCHED_RQ_QUEUED_LEVEL)
-+____cacheline_aligned_in_smp;
-+
-+DEFINE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_CHK_LEVEL], sched_cpu_affinity_chk_masks);
-+DEFINE_PER_CPU(cpumask_t *, sched_cpu_llc_start_mask);
-+DEFINE_PER_CPU(cpumask_t *, sched_cpu_affinity_chk_end_masks);
-+
-+#ifdef CONFIG_SCHED_SMT
-+DEFINE_PER_CPU(int, sched_sibling_cpu);
-+DEFINE_STATIC_KEY_FALSE(sched_smt_present);
-+EXPORT_SYMBOL_GPL(sched_smt_present);
-+
-+static cpumask_t sched_cpu_sg_idle_mask ____cacheline_aligned_in_smp;
-+
-+#ifdef CONFIG_SMT_NICE
-+/*
-+ * Preemptible sibling group mask
-+ * Which all sibling cpus are running at PRIO_LIMIT or IDLE_PRIO
-+ */
-+static cpumask_t sched_cpu_psg_mask ____cacheline_aligned_in_smp;
-+/*
-+ * SMT supressed mask
-+ * When a cpu is running task with NORMAL/ISO/RT policy, its sibling cpu
-+ * will be supressed to run IDLE priority task.
-+ */
-+static cpumask_t sched_smt_supressed_mask ____cacheline_aligned_in_smp;
-+#endif /* CONFIG_SMT_NICE */
-+#endif
-+
-+static int sched_rq_prio[NR_CPUS] ____cacheline_aligned;
-+
-+/*
-+ * Keep a unique ID per domain (we use the first CPUs number in the cpumask of
-+ * the domain), this allows us to quickly tell if two cpus are in the same cache
-+ * domain, see cpus_share_cache().
-+ */
-+DEFINE_PER_CPU(int, sd_llc_id);
-+
-+int __weak arch_sd_sibling_asym_packing(void)
-+{
-+       return 0*SD_ASYM_PACKING;
-+}
-+#else
-+struct rq *uprq;
-+#endif /* CONFIG_SMP */
-+
-+static DEFINE_MUTEX(sched_hotcpu_mutex);
-+
-+DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
-+
-+#ifndef prepare_arch_switch
-+# define prepare_arch_switch(next)	do { } while (0)
-+#endif
-+#ifndef finish_arch_post_lock_switch
-+# define finish_arch_post_lock_switch()	do { } while (0)
-+#endif
-+
-+/*
-+ * Context: p->pi_lock
-+ */
-+static inline struct rq
-+*__task_access_lock(struct task_struct *p, raw_spinlock_t **plock)
-+{
-+	struct rq *rq;
-+	for (;;) {
-+		rq = task_rq(p);
-+		if (p->on_cpu || task_on_rq_queued(p)) {
-+			raw_spin_lock(&rq->lock);
-+			if (likely((p->on_cpu || task_on_rq_queued(p))
-+				   && rq == task_rq(p))) {
-+				*plock = &rq->lock;
-+				return rq;
-+			}
-+			raw_spin_unlock(&rq->lock);
-+		} else if (task_on_rq_migrating(p)) {
-+			do {
-+				cpu_relax();
-+			} while (unlikely(task_on_rq_migrating(p)));
-+		} else {
-+			*plock = NULL;
-+			return rq;
-+		}
-+	}
-+}
-+
-+static inline void
-+__task_access_unlock(struct task_struct *p, raw_spinlock_t *lock)
-+{
-+	if (NULL != lock)
-+		raw_spin_unlock(lock);
-+}
-+
-+static inline struct rq
-+*task_access_lock_irqsave(struct task_struct *p, raw_spinlock_t **plock,
-+			  unsigned long *flags)
-+{
-+	struct rq *rq;
-+	for (;;) {
-+		rq = task_rq(p);
-+		if (p->on_cpu || task_on_rq_queued(p)) {
-+			raw_spin_lock_irqsave(&rq->lock, *flags);
-+			if (likely((p->on_cpu || task_on_rq_queued(p))
-+				   && rq == task_rq(p))) {
-+				*plock = &rq->lock;
-+				return rq;
-+			}
-+			raw_spin_unlock_irqrestore(&rq->lock, *flags);
-+		} else if (task_on_rq_migrating(p)) {
-+			do {
-+				cpu_relax();
-+			} while (unlikely(task_on_rq_migrating(p)));
-+		} else {
-+			raw_spin_lock_irqsave(&p->pi_lock, *flags);
-+			if (likely(!p->on_cpu && !p->on_rq &&
-+				   rq == task_rq(p))) {
-+				*plock = &p->pi_lock;
-+				return rq;
-+			}
-+			raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
-+		}
-+	}
-+}
-+
-+static inline void
-+task_access_unlock_irqrestore(struct task_struct *p, raw_spinlock_t *lock,
-+			      unsigned long *flags)
-+{
-+	raw_spin_unlock_irqrestore(lock, *flags);
-+}
-+
-+/*
-+ * __task_rq_lock - lock the rq @p resides on.
-+ */
-+struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
-+	__acquires(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	lockdep_assert_held(&p->pi_lock);
-+
-+	for (;;) {
-+		rq = task_rq(p);
-+		raw_spin_lock(&rq->lock);
-+		if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
-+			return rq;
-+		raw_spin_unlock(&rq->lock);
-+
-+		while (unlikely(task_on_rq_migrating(p)))
-+			cpu_relax();
-+	}
-+}
-+
-+/*
-+ * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
-+ */
-+struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
-+	__acquires(p->pi_lock)
-+	__acquires(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	for (;;) {
-+		raw_spin_lock_irqsave(&p->pi_lock, rf->flags);
-+		rq = task_rq(p);
-+		raw_spin_lock(&rq->lock);
-+		/*
-+		 *	move_queued_task()		task_rq_lock()
-+		 *
-+		 *	ACQUIRE (rq->lock)
-+		 *	[S] ->on_rq = MIGRATING		[L] rq = task_rq()
-+		 *	WMB (__set_task_cpu())		ACQUIRE (rq->lock);
-+		 *	[S] ->cpu = new_cpu		[L] task_rq()
-+		 *					[L] ->on_rq
-+		 *	RELEASE (rq->lock)
-+		 *
-+		 * If we observe the old CPU in task_rq_lock(), the acquire of
-+		 * the old rq->lock will fully serialize against the stores.
-+		 *
-+		 * If we observe the new CPU in task_rq_lock(), the address
-+		 * dependency headed by '[L] rq = task_rq()' and the acquire
-+		 * will pair with the WMB to ensure we then also see migrating.
-+		 */
-+		if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
-+			return rq;
-+		}
-+		raw_spin_unlock(&rq->lock);
-+		raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
-+
-+		while (unlikely(task_on_rq_migrating(p)))
-+			cpu_relax();
-+	}
-+}
-+
-+/*
-+ * RQ-clock updating methods:
-+ */
-+
-+static void update_rq_clock_task(struct rq *rq, s64 delta)
-+{
-+/*
-+ * In theory, the compile should just see 0 here, and optimize out the call
-+ * to sched_rt_avg_update. But I don't trust it...
-+ */
-+	s64 __maybe_unused steal = 0, irq_delta = 0;
-+
-+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-+	irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
-+
-+	/*
-+	 * Since irq_time is only updated on {soft,}irq_exit, we might run into
-+	 * this case when a previous update_rq_clock() happened inside a
-+	 * {soft,}irq region.
-+	 *
-+	 * When this happens, we stop ->clock_task and only update the
-+	 * prev_irq_time stamp to account for the part that fit, so that a next
-+	 * update will consume the rest. This ensures ->clock_task is
-+	 * monotonic.
-+	 *
-+	 * It does however cause some slight miss-attribution of {soft,}irq
-+	 * time, a more accurate solution would be to update the irq_time using
-+	 * the current rq->clock timestamp, except that would require using
-+	 * atomic ops.
-+	 */
-+	if (irq_delta > delta)
-+		irq_delta = delta;
-+
-+	rq->prev_irq_time += irq_delta;
-+	delta -= irq_delta;
-+#endif
-+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
-+	if (static_key_false((&paravirt_steal_rq_enabled))) {
-+		steal = paravirt_steal_clock(cpu_of(rq));
-+		steal -= rq->prev_steal_time_rq;
-+
-+		if (unlikely(steal > delta))
-+			steal = delta;
-+
-+		rq->prev_steal_time_rq += steal;
-+
-+		delta -= steal;
-+	}
-+#endif
-+
-+	rq->clock_task += delta;
-+
-+#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
-+	if ((irq_delta + steal))
-+		update_irq_load_avg(rq, irq_delta + steal);
-+#endif
-+}
-+
-+static inline void update_rq_clock(struct rq *rq)
-+{
-+	s64 delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
-+
-+	if (unlikely(delta <= 0))
-+		return;
-+	rq->clock += delta;
-+	update_rq_clock_task(rq, delta);
-+}
-+
-+static inline void update_task_priodl(struct task_struct *p)
-+{
-+	p->priodl = (((u64) (p->prio))<<56) | ((p->deadline)>>8);
-+}
-+
-+/*
-+ * Deadline is "now" in niffies + (offset by priority). Setting the deadline
-+ * is the key to everything. It distributes CPU fairly amongst tasks of the
-+ * same nice value, it proportions CPU according to nice level, it means the
-+ * task that last woke up the longest ago has the earliest deadline, thus
-+ * ensuring that interactive tasks get low latency on wake up. The CPU
-+ * proportion works out to the square of the virtual deadline difference, so
-+ * this equation will give nice 19 3% CPU compared to nice 0.
-+ */
-+static inline u64 task_deadline_diff(const struct task_struct *p)
-+{
-+	return sched_prio2deadline[TASK_USER_PRIO(p)];
-+}
-+
-+static inline u64 static_deadline_diff(int static_prio)
-+{
-+	return sched_prio2deadline[USER_PRIO(static_prio)];
-+}
-+
-+/*
-+ * The time_slice is only refilled when it is empty and that is when we set a
-+ * new deadline for non-rt tasks.
-+ */
-+static inline void time_slice_expired(struct task_struct *p, struct rq *rq)
-+{
-+	p->time_slice = timeslice();
-+	if (p->prio >= NORMAL_PRIO)
-+		p->deadline = rq->clock + task_deadline_diff(p);
-+
-+	update_task_priodl(p);
-+}
-+
-+static inline struct task_struct *rq_first_queued_task(struct rq *rq)
-+{
-+	struct skiplist_node *node = rq->sl_header.next[0];
-+
-+	if (node == &rq->sl_header)
-+		return rq->idle;
-+
-+	return skiplist_entry(node, struct task_struct, sl_node);
-+}
-+
-+static inline struct task_struct *rq_second_queued_task(struct rq *rq)
-+{
-+	struct skiplist_node *node = rq->sl_header.next[0]->next[0];
-+
-+	if (node == &rq->sl_header)
-+		return rq->idle;
-+
-+	return skiplist_entry(node, struct task_struct, sl_node);
-+}
-+
-+static inline int is_second_in_rq(struct task_struct *p, struct rq *rq)
-+{
-+	return (p->sl_node.prev[0]->prev[0] == &rq->sl_header);
-+}
-+
-+static const int task_dl_hash_tbl[] = {
-+/*	0           4           8           12           */
-+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
-+/*	16          20          24          28           */
-+	1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5, 6, 7
-+};
-+
-+static inline int
-+task_deadline_level(const struct task_struct *p, const struct rq *rq)
-+{
-+	u64 delta = (rq->clock + sched_prio2deadline[39] - p->deadline) >> 23;
-+
-+	delta = min((size_t)delta, ARRAY_SIZE(task_dl_hash_tbl) - 1);
-+	return task_dl_hash_tbl[delta];
-+}
-+
-+/*
-+ * cmpxchg based fetch_or, macro so it works for different integer types
-+ */
-+#define fetch_or(ptr, mask)						\
-+	({								\
-+		typeof(ptr) _ptr = (ptr);				\
-+		typeof(mask) _mask = (mask);				\
-+		typeof(*_ptr) _old, _val = *_ptr;			\
-+									\
-+		for (;;) {						\
-+			_old = cmpxchg(_ptr, _val, _val | _mask);	\
-+			if (_old == _val)				\
-+				break;					\
-+			_val = _old;					\
-+		}							\
-+	_old;								\
-+})
-+
-+#if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
-+/*
-+ * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG,
-+ * this avoids any races wrt polling state changes and thereby avoids
-+ * spurious IPIs.
-+ */
-+static bool set_nr_and_not_polling(struct task_struct *p)
-+{
-+	struct thread_info *ti = task_thread_info(p);
-+	return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG);
-+}
-+
-+/*
-+ * Atomically set TIF_NEED_RESCHED if TIF_POLLING_NRFLAG is set.
-+ *
-+ * If this returns true, then the idle task promises to call
-+ * flush_smp_call_function_from_idle() and reschedule soon.
-+ */
-+static bool set_nr_if_polling(struct task_struct *p)
-+{
-+	struct thread_info *ti = task_thread_info(p);
-+	typeof(ti->flags) old, val = READ_ONCE(ti->flags);
-+
-+	for (;;) {
-+		if (!(val & _TIF_POLLING_NRFLAG))
-+			return false;
-+		if (val & _TIF_NEED_RESCHED)
-+			return true;
-+		old = cmpxchg(&ti->flags, val, val | _TIF_NEED_RESCHED);
-+		if (old == val)
-+			break;
-+		val = old;
-+	}
-+	return true;
-+}
-+
-+#else
-+static bool set_nr_and_not_polling(struct task_struct *p)
-+{
-+	set_tsk_need_resched(p);
-+	return true;
-+}
-+
-+#ifdef CONFIG_SMP
-+static bool set_nr_if_polling(struct task_struct *p)
-+{
-+	return false;
-+}
-+#endif
-+#endif
-+
-+#ifdef	CONFIG_SMP
-+#ifdef	CONFIG_SMT_NICE
-+static void resched_cpu_if_curr_is(int cpu, int priority)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	rcu_read_lock();
-+
-+	if (rcu_dereference(rq->curr)->prio != priority)
-+		goto out;
-+
-+	if (set_nr_if_polling(rq->idle)) {
-+		trace_sched_wake_idle_without_ipi(cpu);
-+	} else {
-+		if (!do_raw_spin_trylock(&rq->lock))
-+			goto out;
-+		spin_acquire(&rq->lock.dep_map, SINGLE_DEPTH_NESTING, 1, _RET_IP_);
-+
-+		if (priority == rq->curr->prio)
-+			smp_send_reschedule(cpu);
-+		/* Else CPU is not idle, do nothing here */
-+
-+		spin_release(&rq->lock.dep_map, _RET_IP_);
-+		do_raw_spin_unlock(&rq->lock);
-+	}
-+
-+out:
-+	rcu_read_unlock();
-+}
-+#endif /* CONFIG_SMT_NICE */
-+
-+static inline bool
-+__update_cpumasks_bitmap(int cpu, unsigned long *plevel, unsigned long level,
-+			 cpumask_t cpumasks[], unsigned long bitmap[])
-+{
-+	if (*plevel == level)
-+		return false;
-+
-+	cpumask_clear_cpu(cpu, cpumasks + *plevel);
-+	if (cpumask_empty(cpumasks + *plevel))
-+		clear_bit(*plevel, bitmap);
-+	cpumask_set_cpu(cpu, cpumasks + level);
-+	set_bit(level, bitmap);
-+
-+	*plevel = level;
-+
-+	return true;
-+}
-+
-+static inline int
-+task_running_policy_level(const struct task_struct *p, const struct rq *rq)
-+{
-+	int prio = p->prio;
-+
-+	if (NORMAL_PRIO == prio)
-+		return SCHED_RQ_NORMAL_0 + task_deadline_level(p, rq);
-+
-+	if (ISO_PRIO == prio)
-+		return SCHED_RQ_ISO;
-+	if (prio < MAX_RT_PRIO)
-+		return SCHED_RQ_RT;
-+	return PRIO_LIMIT - prio;
-+}
-+
-+static inline void update_sched_rq_queued_masks_normal(struct rq *rq)
-+{
-+	struct task_struct *p = rq_first_queued_task(rq);
-+
-+	if (p->prio != NORMAL_PRIO)
-+		return;
-+
-+	__update_cpumasks_bitmap(cpu_of(rq), &rq->queued_level,
-+				 task_running_policy_level(p, rq),
-+				 &sched_rq_queued_masks[0],
-+				 &sched_rq_queued_masks_bitmap[0]);
-+}
-+
-+#ifdef CONFIG_SMT_NICE
-+static inline void update_sched_cpu_psg_mask(const int cpu)
-+{
-+	cpumask_t tmp;
-+
-+	cpumask_or(&tmp, &sched_rq_queued_masks[SCHED_RQ_EMPTY],
-+		   &sched_rq_queued_masks[SCHED_RQ_IDLE]);
-+	cpumask_and(&tmp, &tmp, cpu_smt_mask(cpu));
-+	if (cpumask_equal(&tmp, cpu_smt_mask(cpu)))
-+		cpumask_or(&sched_cpu_psg_mask, &sched_cpu_psg_mask,
-+			   cpu_smt_mask(cpu));
-+	else
-+		cpumask_andnot(&sched_cpu_psg_mask, &sched_cpu_psg_mask,
-+			       cpu_smt_mask(cpu));
-+}
-+#endif
-+
-+static inline void update_sched_rq_queued_masks(struct rq *rq)
-+{
-+	int cpu = cpu_of(rq);
-+	struct task_struct *p = rq_first_queued_task(rq);
-+	unsigned long level;
-+#ifdef CONFIG_SCHED_SMT
-+	unsigned long last_level = rq->queued_level;
-+#endif
-+
-+	level = task_running_policy_level(p, rq);
-+	sched_rq_prio[cpu] = p->prio;
-+
-+	if (!__update_cpumasks_bitmap(cpu, &rq->queued_level, level,
-+				      &sched_rq_queued_masks[0],
-+				      &sched_rq_queued_masks_bitmap[0]))
-+		return;
-+
-+#ifdef CONFIG_SCHED_SMT
-+	if (cpu == per_cpu(sched_sibling_cpu, cpu))
-+		return;
-+
-+	if (SCHED_RQ_EMPTY == last_level) {
-+		cpumask_andnot(&sched_cpu_sg_idle_mask, &sched_cpu_sg_idle_mask,
-+			       cpu_smt_mask(cpu));
-+	} else if (SCHED_RQ_EMPTY == level) {
-+		cpumask_t tmp;
-+
-+		cpumask_and(&tmp, cpu_smt_mask(cpu),
-+			    &sched_rq_queued_masks[SCHED_RQ_EMPTY]);
-+		if (cpumask_equal(&tmp, cpu_smt_mask(cpu)))
-+			cpumask_or(&sched_cpu_sg_idle_mask, cpu_smt_mask(cpu),
-+				   &sched_cpu_sg_idle_mask);
-+	}
-+
-+#ifdef CONFIG_SMT_NICE
-+	if (level <= SCHED_RQ_IDLE && last_level > SCHED_RQ_IDLE) {
-+		cpumask_clear_cpu(per_cpu(sched_sibling_cpu, cpu),
-+				  &sched_smt_supressed_mask);
-+		update_sched_cpu_psg_mask(cpu);
-+		resched_cpu_if_curr_is(per_cpu(sched_sibling_cpu, cpu), PRIO_LIMIT);
-+	} else if (last_level <= SCHED_RQ_IDLE && level > SCHED_RQ_IDLE) {
-+		cpumask_set_cpu(per_cpu(sched_sibling_cpu, cpu),
-+				&sched_smt_supressed_mask);
-+		update_sched_cpu_psg_mask(cpu);
-+		resched_cpu_if_curr_is(per_cpu(sched_sibling_cpu, cpu), IDLE_PRIO);
-+	}
-+#endif /* CONFIG_SMT_NICE */
-+#endif
-+}
-+
-+static inline void update_sched_rq_pending_masks(struct rq *rq)
-+{
-+	unsigned long level;
-+	struct task_struct *p = rq_second_queued_task(rq);
-+
-+	level = task_running_policy_level(p, rq);
-+
-+	__update_cpumasks_bitmap(cpu_of(rq), &rq->pending_level, level,
-+				 &sched_rq_pending_masks[0],
-+				 &sched_rq_pending_masks_bitmap[0]);
-+}
-+
-+#else /* CONFIG_SMP */
-+static inline void update_sched_rq_queued_masks(struct rq *rq) {}
-+static inline void update_sched_rq_queued_masks_normal(struct rq *rq) {}
-+static inline void update_sched_rq_pending_masks(struct rq *rq) {}
-+#endif
-+
-+#ifdef CONFIG_NO_HZ_FULL
-+/*
-+ * Tick may be needed by tasks in the runqueue depending on their policy and
-+ * requirements. If tick is needed, lets send the target an IPI to kick it out
-+ * of nohz mode if necessary.
-+ */
-+static inline void sched_update_tick_dependency(struct rq *rq)
-+{
-+	int cpu;
-+
-+	if (!tick_nohz_full_enabled())
-+		return;
-+
-+	cpu = cpu_of(rq);
-+
-+	if (!tick_nohz_full_cpu(cpu))
-+		return;
-+
-+	if (rq->nr_running < 2)
-+		tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED);
-+	else
-+		tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
-+}
-+#else /* !CONFIG_NO_HZ_FULL */
-+static inline void sched_update_tick_dependency(struct rq *rq) { }
-+#endif
-+
-+/*
-+ * Removing from the runqueue. Deleting a task from the skip list is done
-+ * via the stored node reference in the task struct and does not require a full
-+ * look up. Thus it occurs in O(k) time where k is the "level" of the list the
-+ * task was stored at - usually < 4, max 16.
-+ *
-+ * Context: rq->lock
-+ */
-+static inline void dequeue_task(struct task_struct *p, struct rq *rq, int flags)
-+{
-+	lockdep_assert_held(&rq->lock);
-+
-+	WARN_ONCE(task_rq(p) != rq, "pds: dequeue task reside on cpu%d from cpu%d\n",
-+		  task_cpu(p), cpu_of(rq));
-+	if (skiplist_del_init(&rq->sl_header, &p->sl_node)) {
-+		update_sched_rq_queued_masks(rq);
-+		update_sched_rq_pending_masks(rq);
-+	} else if (is_second_in_rq(p, rq))
-+		update_sched_rq_pending_masks(rq);
-+	rq->nr_running--;
-+
-+	sched_update_tick_dependency(rq);
-+	psi_dequeue(p, flags & DEQUEUE_SLEEP);
-+
-+	sched_info_dequeued(rq, p);
-+}
-+
-+/*
-+ * To determine if it's safe for a task of SCHED_IDLE to actually run as
-+ * an idle task, we ensure none of the following conditions are met.
-+ */
-+static inline bool idleprio_suitable(struct task_struct *p)
-+{
-+	return (!freezing(p) && !signal_pending(p) &&
-+		!(task_contributes_to_load(p)) && !(p->flags & (PF_EXITING)));
-+}
-+
-+/*
-+ * pds_skiplist_random_level -- Returns a pseudo-random level number for skip
-+ * list node which is used in PDS run queue.
-+ *
-+ * In current implementation, based on testing, the first 8 bits in microseconds
-+ * of niffies are suitable for random level population.
-+ * find_first_bit() is used to satisfy p = 0.5 between each levels, and there
-+ * should be platform hardware supported instruction(known as ctz/clz) to speed
-+ * up this function.
-+ * The skiplist level for a task is populated when task is created and doesn't
-+ * change in task's life time. When task is being inserted into run queue, this
-+ * skiplist level is set to task's sl_node->level, the skiplist insert function
-+ * may change it based on current level of the skip lsit.
-+ */
-+static inline int pds_skiplist_random_level(const struct task_struct *p)
-+{
-+	long unsigned int randseed;
-+
-+	/*
-+	 * 1. Some architectures don't have better than microsecond resolution
-+	 * so mask out ~microseconds as a factor of the random seed for skiplist
-+	 * insertion.
-+	 * 2. Use address of task structure pointer as another factor of the
-+	 * random seed for task burst forking scenario.
-+	 */
-+	randseed = (task_rq(p)->clock ^ (long unsigned int)p) >> 10;
-+
-+	return find_first_bit(&randseed, NUM_SKIPLIST_LEVEL - 1);
-+}
-+
-+/**
-+ * pds_skiplist_task_search -- search function used in PDS run queue skip list
-+ * node insert operation.
-+ * @it: iterator pointer to the node in the skip list
-+ * @node: pointer to the skiplist_node to be inserted
-+ *
-+ * Returns true if key of @it is less or equal to key value of @node, otherwise
-+ * false.
-+ */
-+static inline bool
-+pds_skiplist_task_search(struct skiplist_node *it, struct skiplist_node *node)
-+{
-+	return (skiplist_entry(it, struct task_struct, sl_node)->priodl <=
-+		skiplist_entry(node, struct task_struct, sl_node)->priodl);
-+}
-+
-+/*
-+ * Define the skip list insert function for PDS
-+ */
-+DEFINE_SKIPLIST_INSERT_FUNC(pds_skiplist_insert, pds_skiplist_task_search);
-+
-+/*
-+ * Adding task to the runqueue.
-+ *
-+ * Context: rq->lock
-+ */
-+static inline void enqueue_task(struct task_struct *p, struct rq *rq, int flags)
-+{
-+	lockdep_assert_held(&rq->lock);
-+
-+	WARN_ONCE(task_rq(p) != rq, "pds: enqueue task reside on cpu%d to cpu%d\n",
-+		  task_cpu(p), cpu_of(rq));
-+
-+	p->sl_node.level = p->sl_level;
-+	if (pds_skiplist_insert(&rq->sl_header, &p->sl_node)) {
-+		update_sched_rq_queued_masks(rq);
-+		update_sched_rq_pending_masks(rq);
-+	} else if (is_second_in_rq(p, rq))
-+		update_sched_rq_pending_masks(rq);
-+	rq->nr_running++;
-+
-+	sched_update_tick_dependency(rq);
-+
-+	sched_info_queued(rq, p);
-+	psi_enqueue(p, flags);
-+
-+	/*
-+	 * If in_iowait is set, the code below may not trigger any cpufreq
-+	 * utilization updates, so do it here explicitly with the IOWAIT flag
-+	 * passed.
-+	 */
-+	if (p->in_iowait)
-+		cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_IOWAIT);
-+}
-+
-+static inline void requeue_task(struct task_struct *p, struct rq *rq)
-+{
-+	bool b_first, b_second;
-+
-+	lockdep_assert_held(&rq->lock);
-+
-+	WARN_ONCE(task_rq(p) != rq, "pds: cpu[%d] requeue task reside on cpu%d\n",
-+		  cpu_of(rq), task_cpu(p));
-+
-+	b_first = skiplist_del_init(&rq->sl_header, &p->sl_node);
-+	b_second = is_second_in_rq(p, rq);
-+
-+	p->sl_node.level = p->sl_level;
-+	if (pds_skiplist_insert(&rq->sl_header, &p->sl_node) || b_first) {
-+		update_sched_rq_queued_masks(rq);
-+		update_sched_rq_pending_masks(rq);
-+	} else if (is_second_in_rq(p, rq) || b_second)
-+		update_sched_rq_pending_masks(rq);
-+}
-+
-+/*
-+ * resched_curr - mark rq's current task 'to be rescheduled now'.
-+ *
-+ * On UP this means the setting of the need_resched flag, on SMP it
-+ * might also involve a cross-CPU call to trigger the scheduler on
-+ * the target CPU.
-+ */
-+void resched_curr(struct rq *rq)
-+{
-+	struct task_struct *curr = rq->curr;
-+	int cpu;
-+
-+	lockdep_assert_held(&rq->lock);
-+
-+	if (test_tsk_need_resched(curr))
-+		return;
-+
-+	cpu = cpu_of(rq);
-+	if (cpu == smp_processor_id()) {
-+		set_tsk_need_resched(curr);
-+		set_preempt_need_resched();
-+		return;
-+	}
-+
-+	if (set_nr_and_not_polling(curr))
-+		smp_send_reschedule(cpu);
-+	else
-+		trace_sched_wake_idle_without_ipi(cpu);
-+}
-+
-+static inline void check_preempt_curr(struct rq *rq, struct task_struct *p)
-+{
-+	struct task_struct *curr = rq->curr;
-+
-+	if (curr->prio == PRIO_LIMIT)
-+		resched_curr(rq);
-+
-+	if (task_running_idle(p))
-+		return;
-+
-+	if (p->priodl < curr->priodl)
-+		resched_curr(rq);
-+}
-+
-+#ifdef CONFIG_SCHED_HRTICK
-+/*
-+ * Use HR-timers to deliver accurate preemption points.
-+ */
-+
-+static void hrtick_clear(struct rq *rq)
-+{
-+	if (hrtimer_active(&rq->hrtick_timer))
-+		hrtimer_cancel(&rq->hrtick_timer);
-+}
-+
-+/*
-+ * High-resolution timer tick.
-+ * Runs from hardirq context with interrupts disabled.
-+ */
-+static enum hrtimer_restart hrtick(struct hrtimer *timer)
-+{
-+	struct rq *rq = container_of(timer, struct rq, hrtick_timer);
-+	struct task_struct *p;
-+
-+	WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
-+
-+	raw_spin_lock(&rq->lock);
-+	p = rq->curr;
-+	p->time_slice = 0;
-+	resched_curr(rq);
-+	raw_spin_unlock(&rq->lock);
-+
-+	return HRTIMER_NORESTART;
-+}
-+
-+/*
-+ * Use hrtick when:
-+ *  - enabled by features
-+ *  - hrtimer is actually high res
-+ */
-+static inline int hrtick_enabled(struct rq *rq)
-+{
-+	/**
-+	 * PDS doesn't support sched_feat yet
-+	if (!sched_feat(HRTICK))
-+		return 0;
-+	*/
-+	if (!cpu_active(cpu_of(rq)))
-+		return 0;
-+	return hrtimer_is_hres_active(&rq->hrtick_timer);
-+}
-+
-+#ifdef CONFIG_SMP
-+
-+static void __hrtick_restart(struct rq *rq)
-+{
-+	struct hrtimer *timer = &rq->hrtick_timer;
-+
-+	hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED_HARD);
-+}
-+
-+/*
-+ * called from hardirq (IPI) context
-+ */
-+static void __hrtick_start(void *arg)
-+{
-+	struct rq *rq = arg;
-+
-+	raw_spin_lock(&rq->lock);
-+	__hrtick_restart(rq);
-+	raw_spin_unlock(&rq->lock);
-+}
-+
-+/*
-+ * Called to set the hrtick timer state.
-+ *
-+ * called with rq->lock held and irqs disabled
-+ */
-+void hrtick_start(struct rq *rq, u64 delay)
-+{
-+	struct hrtimer *timer = &rq->hrtick_timer;
-+	ktime_t time;
-+	s64 delta;
-+
-+	/*
-+	 * Don't schedule slices shorter than 10000ns, that just
-+	 * doesn't make sense and can cause timer DoS.
-+	 */
-+	delta = max_t(s64, delay, 10000LL);
-+	time = ktime_add_ns(timer->base->get_time(), delta);
-+
-+	hrtimer_set_expires(timer, time);
-+
-+	if (rq == this_rq())
-+		__hrtick_restart(rq);
-+	else
-+		smp_call_function_single_async(cpu_of(rq), &rq->hrtick_csd);
-+}
-+
-+#else
-+/*
-+ * Called to set the hrtick timer state.
-+ *
-+ * called with rq->lock held and irqs disabled
-+ */
-+void hrtick_start(struct rq *rq, u64 delay)
-+{
-+	/*
-+	 * Don't schedule slices shorter than 10000ns, that just
-+	 * doesn't make sense. Rely on vruntime for fairness.
-+	 */
-+	delay = max_t(u64, delay, 10000LL);
-+	hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay),
-+		      HRTIMER_MODE_REL_PINNED_HARD);
-+}
-+#endif /* CONFIG_SMP */
-+
-+static void hrtick_rq_init(struct rq *rq)
-+{
-+#ifdef CONFIG_SMP
-+	rq->hrtick_csd.flags = 0;
-+	rq->hrtick_csd.func = __hrtick_start;
-+	rq->hrtick_csd.info = rq;
-+#endif
-+
-+	hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
-+	rq->hrtick_timer.function = hrtick;
-+}
-+
-+static inline int rq_dither(struct rq *rq)
-+{
-+	if ((rq->clock - rq->last_tick > HALF_JIFFY_NS) || hrtick_enabled(rq))
-+		return 0;
-+
-+	return HALF_JIFFY_NS;
-+}
-+
-+#else	/* CONFIG_SCHED_HRTICK */
-+static inline int hrtick_enabled(struct rq *rq)
-+{
-+	return 0;
-+}
-+
-+static inline void hrtick_clear(struct rq *rq)
-+{
-+}
-+
-+static inline void hrtick_rq_init(struct rq *rq)
-+{
-+}
-+
-+static inline int rq_dither(struct rq *rq)
-+{
-+	return (rq->clock - rq->last_tick > HALF_JIFFY_NS)? 0:HALF_JIFFY_NS;
-+}
-+#endif	/* CONFIG_SCHED_HRTICK */
-+
-+static inline int normal_prio(struct task_struct *p)
-+{
-+	static const int policy_to_prio[] = {
-+		NORMAL_PRIO,	/* SCHED_NORMAL */
-+		0,		/* SCHED_FIFO */
-+		0,		/* SCHED_RR */
-+		IDLE_PRIO,	/* SCHED_BATCH */
-+		ISO_PRIO,	/* SCHED_ISO */
-+		IDLE_PRIO	/* SCHED_IDLE */
-+	};
-+
-+	if (task_has_rt_policy(p))
-+		return MAX_RT_PRIO - 1 - p->rt_priority;
-+	return policy_to_prio[p->policy];
-+}
-+
-+/*
-+ * Calculate the current priority, i.e. the priority
-+ * taken into account by the scheduler. This value might
-+ * be boosted by RT tasks as it will be RT if the task got
-+ * RT-boosted. If not then it returns p->normal_prio.
-+ */
-+static int effective_prio(struct task_struct *p)
-+{
-+	p->normal_prio = normal_prio(p);
-+	/*
-+	 * If we are RT tasks or we were boosted to RT priority,
-+	 * keep the priority unchanged. Otherwise, update priority
-+	 * to the normal priority:
-+	 */
-+	if (!rt_prio(p->prio))
-+		return p->normal_prio;
-+	return p->prio;
-+}
-+
-+/*
-+ * activate_task - move a task to the runqueue.
-+ *
-+ * Context: rq->lock
-+ */
-+static void activate_task(struct task_struct *p, struct rq *rq)
-+{
-+	if (task_contributes_to_load(p))
-+		rq->nr_uninterruptible--;
-+	enqueue_task(p, rq, ENQUEUE_WAKEUP);
-+	p->on_rq = 1;
-+	cpufreq_update_this_cpu(rq, 0);
-+}
-+
-+/*
-+ * deactivate_task - remove a task from the runqueue.
-+ *
-+ * Context: rq->lock
-+ */
-+static inline void deactivate_task(struct task_struct *p, struct rq *rq)
-+{
-+	if (task_contributes_to_load(p))
-+		rq->nr_uninterruptible++;
-+	dequeue_task(p, rq, DEQUEUE_SLEEP);
-+	p->on_rq = 0;
-+	cpufreq_update_this_cpu(rq, 0);
-+}
-+
-+static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
-+{
-+#ifdef CONFIG_SMP
-+	/*
-+	 * After ->cpu is set up to a new value, task_access_lock(p, ...) can be
-+	 * successfully executed on another CPU. We must ensure that updates of
-+	 * per-task data have been completed by this moment.
-+	 */
-+	smp_wmb();
-+
-+#ifdef CONFIG_THREAD_INFO_IN_TASK
-+	WRITE_ONCE(p->cpu, cpu);
-+#else
-+	WRITE_ONCE(task_thread_info(p)->cpu, cpu);
-+#endif
-+#endif
-+}
-+
-+#ifdef CONFIG_SMP
-+void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
-+{
-+#ifdef CONFIG_SCHED_DEBUG
-+	/*
-+	 * We should never call set_task_cpu() on a blocked task,
-+	 * ttwu() will sort out the placement.
-+	 */
-+	WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING &&
-+		     !p->on_rq);
-+#ifdef CONFIG_LOCKDEP
-+	/*
-+	 * The caller should hold either p->pi_lock or rq->lock, when changing
-+	 * a task's CPU. ->pi_lock for waking tasks, rq->lock for runnable tasks.
-+	 *
-+	 * sched_move_task() holds both and thus holding either pins the cgroup,
-+	 * see task_group().
-+	 */
-+	WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) ||
-+				      lockdep_is_held(&task_rq(p)->lock)));
-+#endif
-+	/*
-+	 * Clearly, migrating tasks to offline CPUs is a fairly daft thing.
-+	 */
-+	WARN_ON_ONCE(!cpu_online(new_cpu));
-+#endif
-+	if (task_cpu(p) == new_cpu)
-+		return;
-+	trace_sched_migrate_task(p, new_cpu);
-+	rseq_migrate(p);
-+	perf_event_task_migrate(p);
-+
-+	__set_task_cpu(p, new_cpu);
-+}
-+
-+static inline bool is_per_cpu_kthread(struct task_struct *p)
-+{
-+	return ((p->flags & PF_KTHREAD) && (1 == p->nr_cpus_allowed));
-+}
-+
-+/*
-+ * Per-CPU kthreads are allowed to run on !active && online CPUs, see
-+ * __set_cpus_allowed_ptr() and select_fallback_rq().
-+ */
-+static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
-+{
-+	if (!cpumask_test_cpu(cpu, &p->cpus_mask))
-+		return false;
-+
-+	if (is_per_cpu_kthread(p))
-+		return cpu_online(cpu);
-+
-+	return cpu_active(cpu);
-+}
-+
-+/*
-+ * This is how migration works:
-+ *
-+ * 1) we invoke migration_cpu_stop() on the target CPU using
-+ *    stop_one_cpu().
-+ * 2) stopper starts to run (implicitly forcing the migrated thread
-+ *    off the CPU)
-+ * 3) it checks whether the migrated task is still in the wrong runqueue.
-+ * 4) if it's in the wrong runqueue then the migration thread removes
-+ *    it and puts it into the right queue.
-+ * 5) stopper completes and stop_one_cpu() returns and the migration
-+ *    is done.
-+ */
-+
-+/*
-+ * move_queued_task - move a queued task to new rq.
-+ *
-+ * Returns (locked) new rq. Old rq's lock is released.
-+ */
-+static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int
-+				   new_cpu)
-+{
-+	lockdep_assert_held(&rq->lock);
-+
-+	p->on_rq = TASK_ON_RQ_MIGRATING;
-+	dequeue_task(p, rq, 0);
-+	set_task_cpu(p, new_cpu);
-+	raw_spin_unlock(&rq->lock);
-+
-+	rq = cpu_rq(new_cpu);
-+
-+	raw_spin_lock(&rq->lock);
-+	BUG_ON(task_cpu(p) != new_cpu);
-+	enqueue_task(p, rq, 0);
-+	p->on_rq = TASK_ON_RQ_QUEUED;
-+	check_preempt_curr(rq, p);
-+
-+	return rq;
-+}
-+
-+struct migration_arg {
-+	struct task_struct *task;
-+	int dest_cpu;
-+};
-+
-+/*
-+ * Move (not current) task off this CPU, onto the destination CPU. We're doing
-+ * this because either it can't run here any more (set_cpus_allowed()
-+ * away from this CPU, or CPU going down), or because we're
-+ * attempting to rebalance this task on exec (sched_exec).
-+ *
-+ * So we race with normal scheduler movements, but that's OK, as long
-+ * as the task is no longer on this CPU.
-+ */
-+static struct rq *__migrate_task(struct rq *rq, struct task_struct *p, int
-+				 dest_cpu)
-+{
-+	/* Affinity changed (again). */
-+	if (!is_cpu_allowed(p, dest_cpu))
-+		return rq;
-+
-+	update_rq_clock(rq);
-+	return move_queued_task(rq, p, dest_cpu);
-+}
-+
-+/*
-+ * migration_cpu_stop - this will be executed by a highprio stopper thread
-+ * and performs thread migration by bumping thread off CPU then
-+ * 'pushing' onto another runqueue.
-+ */
-+static int migration_cpu_stop(void *data)
-+{
-+	struct migration_arg *arg = data;
-+	struct task_struct *p = arg->task;
-+	struct rq *rq = this_rq();
-+
-+	/*
-+	 * The original target CPU might have gone down and we might
-+	 * be on another CPU but it doesn't matter.
-+	 */
-+	local_irq_disable();
-+
-+	raw_spin_lock(&p->pi_lock);
-+	raw_spin_lock(&rq->lock);
-+	/*
-+	 * If task_rq(p) != rq, it cannot be migrated here, because we're
-+	 * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because
-+	 * we're holding p->pi_lock.
-+	 */
-+	if (task_rq(p) == rq)
-+		if (task_on_rq_queued(p))
-+			rq = __migrate_task(rq, p, arg->dest_cpu);
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock(&p->pi_lock);
-+
-+	local_irq_enable();
-+	return 0;
-+}
-+
-+static inline void
-+set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	cpumask_copy(&p->cpus_mask, new_mask);
-+	p->nr_cpus_allowed = cpumask_weight(new_mask);
-+}
-+
-+void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	set_cpus_allowed_common(p, new_mask);
-+}
-+#endif
-+
-+/* Enter with rq lock held. We know p is on the local CPU */
-+static inline void __set_tsk_resched(struct task_struct *p)
-+{
-+	set_tsk_need_resched(p);
-+	set_preempt_need_resched();
-+}
-+
-+/**
-+ * task_curr - is this task currently executing on a CPU?
-+ * @p: the task in question.
-+ *
-+ * Return: 1 if the task is currently executing. 0 otherwise.
-+ */
-+inline int task_curr(const struct task_struct *p)
-+{
-+	return cpu_curr(task_cpu(p)) == p;
-+}
-+
-+#ifdef CONFIG_SMP
-+/*
-+ * wait_task_inactive - wait for a thread to unschedule.
-+ *
-+ * If @match_state is nonzero, it's the @p->state value just checked and
-+ * not expected to change.  If it changes, i.e. @p might have woken up,
-+ * then return zero.  When we succeed in waiting for @p to be off its CPU,
-+ * we return a positive number (its total switch count).  If a second call
-+ * a short while later returns the same number, the caller can be sure that
-+ * @p has remained unscheduled the whole time.
-+ *
-+ * The caller must ensure that the task *will* unschedule sometime soon,
-+ * else this function might spin for a *long* time. This function can't
-+ * be called with interrupts off, or it may introduce deadlock with
-+ * smp_call_function() if an IPI is sent by the same process we are
-+ * waiting to become inactive.
-+ */
-+unsigned long wait_task_inactive(struct task_struct *p, long match_state)
-+{
-+	unsigned long flags;
-+	bool running, on_rq;
-+	unsigned long ncsw;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+
-+	for (;;) {
-+		rq = task_rq(p);
-+
-+		/*
-+		 * If the task is actively running on another CPU
-+		 * still, just relax and busy-wait without holding
-+		 * any locks.
-+		 *
-+		 * NOTE! Since we don't hold any locks, it's not
-+		 * even sure that "rq" stays as the right runqueue!
-+		 * But we don't care, since this will return false
-+		 * if the runqueue has changed and p is actually now
-+		 * running somewhere else!
-+		 */
-+		while (task_running(p) && p == rq->curr) {
-+			if (match_state && unlikely(p->state != match_state))
-+				return 0;
-+			cpu_relax();
-+		}
-+
-+		/*
-+		 * Ok, time to look more closely! We need the rq
-+		 * lock now, to be *sure*. If we're wrong, we'll
-+		 * just go back and repeat.
-+		 */
-+		task_access_lock_irqsave(p, &lock, &flags);
-+		trace_sched_wait_task(p);
-+		running = task_running(p);
-+		on_rq = p->on_rq;
-+		ncsw = 0;
-+		if (!match_state || p->state == match_state)
-+			ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
-+		task_access_unlock_irqrestore(p, lock, &flags);
-+
-+		/*
-+		 * If it changed from the expected state, bail out now.
-+		 */
-+		if (unlikely(!ncsw))
-+			break;
-+
-+		/*
-+		 * Was it really running after all now that we
-+		 * checked with the proper locks actually held?
-+		 *
-+		 * Oops. Go back and try again..
-+		 */
-+		if (unlikely(running)) {
-+			cpu_relax();
-+			continue;
-+		}
-+
-+		/*
-+		 * It's not enough that it's not actively running,
-+		 * it must be off the runqueue _entirely_, and not
-+		 * preempted!
-+		 *
-+		 * So if it was still runnable (but just not actively
-+		 * running right now), it's preempted, and we should
-+		 * yield - it could be a while.
-+		 */
-+		if (unlikely(on_rq)) {
-+			ktime_t to = NSEC_PER_SEC / HZ;
-+
-+			set_current_state(TASK_UNINTERRUPTIBLE);
-+			schedule_hrtimeout(&to, HRTIMER_MODE_REL);
-+			continue;
-+		}
-+
-+		/*
-+		 * Ahh, all good. It wasn't running, and it wasn't
-+		 * runnable, which means that it will never become
-+		 * running in the future either. We're all done!
-+		 */
-+		break;
-+	}
-+
-+	return ncsw;
-+}
-+
-+/***
-+ * kick_process - kick a running thread to enter/exit the kernel
-+ * @p: the to-be-kicked thread
-+ *
-+ * Cause a process which is running on another CPU to enter
-+ * kernel-mode, without any delay. (to get signals handled.)
-+ *
-+ * NOTE: this function doesn't have to take the runqueue lock,
-+ * because all it wants to ensure is that the remote task enters
-+ * the kernel. If the IPI races and the task has been migrated
-+ * to another CPU then no harm is done and the purpose has been
-+ * achieved as well.
-+ */
-+void kick_process(struct task_struct *p)
-+{
-+	int cpu;
-+
-+	preempt_disable();
-+	cpu = task_cpu(p);
-+	if ((cpu != smp_processor_id()) && task_curr(p))
-+		smp_send_reschedule(cpu);
-+	preempt_enable();
-+}
-+EXPORT_SYMBOL_GPL(kick_process);
-+
-+/*
-+ * ->cpus_mask is protected by both rq->lock and p->pi_lock
-+ *
-+ * A few notes on cpu_active vs cpu_online:
-+ *
-+ *  - cpu_active must be a subset of cpu_online
-+ *
-+ *  - on CPU-up we allow per-CPU kthreads on the online && !active CPU,
-+ *    see __set_cpus_allowed_ptr(). At this point the newly online
-+ *    CPU isn't yet part of the sched domains, and balancing will not
-+ *    see it.
-+ *
-+ *  - on cpu-down we clear cpu_active() to mask the sched domains and
-+ *    avoid the load balancer to place new tasks on the to be removed
-+ *    CPU. Existing tasks will remain running there and will be taken
-+ *    off.
-+ *
-+ * This means that fallback selection must not select !active CPUs.
-+ * And can assume that any active CPU must be online. Conversely
-+ * select_task_rq() below may allow selection of !active CPUs in order
-+ * to satisfy the above rules.
-+ */
-+static int select_fallback_rq(int cpu, struct task_struct *p)
-+{
-+	int nid = cpu_to_node(cpu);
-+	const struct cpumask *nodemask = NULL;
-+	enum { cpuset, possible, fail } state = cpuset;
-+	int dest_cpu;
-+
-+	/*
-+	 * If the node that the CPU is on has been offlined, cpu_to_node()
-+	 * will return -1. There is no CPU on the node, and we should
-+	 * select the CPU on the other node.
-+	 */
-+	if (nid != -1) {
-+		nodemask = cpumask_of_node(nid);
-+
-+		/* Look for allowed, online CPU in same node. */
-+		for_each_cpu(dest_cpu, nodemask) {
-+			if (!cpu_active(dest_cpu))
-+				continue;
-+			if (cpumask_test_cpu(dest_cpu, &p->cpus_mask))
-+				return dest_cpu;
-+		}
-+	}
-+
-+	for (;;) {
-+		/* Any allowed, online CPU? */
-+		for_each_cpu(dest_cpu, &p->cpus_mask) {
-+			if (!is_cpu_allowed(p, dest_cpu))
-+				continue;
-+			goto out;
-+		}
-+
-+		/* No more Mr. Nice Guy. */
-+		switch (state) {
-+		case cpuset:
-+			if (IS_ENABLED(CONFIG_CPUSETS)) {
-+				cpuset_cpus_allowed_fallback(p);
-+				state = possible;
-+				break;
-+			}
-+			/* Fall-through */
-+		case possible:
-+			do_set_cpus_allowed(p, cpu_possible_mask);
-+			state = fail;
-+			break;
-+
-+		case fail:
-+			BUG();
-+			break;
-+		}
-+	}
-+
-+out:
-+	if (state != cpuset) {
-+		/*
-+		 * Don't tell them about moving exiting tasks or
-+		 * kernel threads (both mm NULL), since they never
-+		 * leave kernel.
-+		 */
-+		if (p->mm && printk_ratelimit()) {
-+			printk_deferred("process %d (%s) no longer affine to cpu%d\n",
-+					task_pid_nr(p), p->comm, cpu);
-+		}
-+	}
-+
-+	return dest_cpu;
-+}
-+
-+static inline int best_mask_cpu(int cpu, const cpumask_t *cpumask)
-+{
-+	cpumask_t *mask;
-+
-+	if (cpumask_test_cpu(cpu, cpumask))
-+		return cpu;
-+
-+	mask = &(per_cpu(sched_cpu_affinity_chk_masks, cpu)[0]);
-+	while ((cpu = cpumask_any_and(cpumask, mask)) >= nr_cpu_ids)
-+		mask++;
-+
-+	return cpu;
-+}
-+
-+/*
-+ * task_preemptible_rq - return the rq which the given task can preempt on
-+ * @p: task wants to preempt CPU
-+ * @only_preempt_low_policy: indicate only preempt rq running low policy than @p
-+ */
-+static inline int
-+task_preemptible_rq_idle(struct task_struct *p, cpumask_t *chk_mask)
-+{
-+	cpumask_t tmp;
-+
-+#ifdef CONFIG_SCHED_SMT
-+	if (cpumask_and(&tmp, chk_mask, &sched_cpu_sg_idle_mask))
-+		return best_mask_cpu(task_cpu(p), &tmp);
-+#endif
-+
-+#ifdef CONFIG_SMT_NICE
-+	/* Only ttwu on cpu which is not smt supressed */
-+	if (cpumask_andnot(&tmp, chk_mask, &sched_smt_supressed_mask)) {
-+		cpumask_t t;
-+		if (cpumask_and(&t, &tmp, &sched_rq_queued_masks[SCHED_RQ_EMPTY]))
-+			return best_mask_cpu(task_cpu(p), &t);
-+		return best_mask_cpu(task_cpu(p), &tmp);
-+	}
-+#endif
-+
-+	if (cpumask_and(&tmp, chk_mask, &sched_rq_queued_masks[SCHED_RQ_EMPTY]))
-+		return best_mask_cpu(task_cpu(p), &tmp);
-+	return best_mask_cpu(task_cpu(p), chk_mask);
-+}
-+
-+static inline int
-+task_preemptible_rq(struct task_struct *p, cpumask_t *chk_mask,
-+		    int preempt_level)
-+{
-+	cpumask_t tmp;
-+	int level;
-+
-+#ifdef CONFIG_SCHED_SMT
-+#ifdef CONFIG_SMT_NICE
-+	if (cpumask_and(&tmp, chk_mask, &sched_cpu_psg_mask))
-+		return best_mask_cpu(task_cpu(p), &tmp);
-+#else
-+	if (cpumask_and(&tmp, chk_mask, &sched_cpu_sg_idle_mask))
-+		return best_mask_cpu(task_cpu(p), &tmp);
-+#endif
-+#endif
-+
-+	level = find_first_bit(sched_rq_queued_masks_bitmap,
-+			       NR_SCHED_RQ_QUEUED_LEVEL);
-+
-+	while (level < preempt_level) {
-+		if (cpumask_and(&tmp, chk_mask, &sched_rq_queued_masks[level]))
-+			return best_mask_cpu(task_cpu(p), &tmp);
-+
-+		level = find_next_bit(sched_rq_queued_masks_bitmap,
-+				      NR_SCHED_RQ_QUEUED_LEVEL,
-+				      level + 1);
-+	}
-+
-+	if (unlikely(SCHED_RQ_RT == level &&
-+		     level == preempt_level &&
-+		     cpumask_and(&tmp, chk_mask,
-+				 &sched_rq_queued_masks[SCHED_RQ_RT]))) {
-+		unsigned int cpu;
-+
-+		for_each_cpu (cpu, &tmp)
-+			if (p->prio < sched_rq_prio[cpu])
-+				return cpu;
-+	}
-+
-+	return best_mask_cpu(task_cpu(p), chk_mask);
-+}
-+
-+static inline int select_task_rq(struct task_struct *p)
-+{
-+	cpumask_t chk_mask;
-+
-+	if (unlikely(!cpumask_and(&chk_mask, &p->cpus_mask, cpu_online_mask)))
-+		return select_fallback_rq(task_cpu(p), p);
-+
-+	/* Check IDLE tasks suitable to run normal priority */
-+	if (idleprio_task(p)) {
-+		if (idleprio_suitable(p)) {
-+			p->prio = p->normal_prio;
-+			update_task_priodl(p);
-+			return task_preemptible_rq_idle(p, &chk_mask);
-+		}
-+		p->prio = NORMAL_PRIO;
-+		update_task_priodl(p);
-+	}
-+
-+	return task_preemptible_rq(p, &chk_mask,
-+				   task_running_policy_level(p, this_rq()));
-+}
-+#else /* CONFIG_SMP */
-+static inline int select_task_rq(struct task_struct *p)
-+{
-+	return 0;
-+}
-+#endif /* CONFIG_SMP */
-+
-+static void
-+ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
-+{
-+	struct rq *rq;
-+
-+	if (!schedstat_enabled())
-+		return;
-+
-+	rq= this_rq();
-+
-+#ifdef CONFIG_SMP
-+	if (cpu == rq->cpu)
-+		__schedstat_inc(rq->ttwu_local);
-+	else {
-+		/** PDS ToDo:
-+		 * How to do ttwu_wake_remote
-+		 */
-+	}
-+#endif /* CONFIG_SMP */
-+
-+	__schedstat_inc(rq->ttwu_count);
-+}
-+
-+/*
-+ * Mark the task runnable and perform wakeup-preemption.
-+ */
-+static inline void
-+ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
-+{
-+	p->state = TASK_RUNNING;
-+	trace_sched_wakeup(p);
-+}
-+
-+static inline void
-+ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags)
-+{
-+#ifdef CONFIG_SMP
-+	if (p->sched_contributes_to_load)
-+		rq->nr_uninterruptible--;
-+#endif
-+
-+	activate_task(p, rq);
-+	ttwu_do_wakeup(rq, p, 0);
-+}
-+
-+static int ttwu_remote(struct task_struct *p, int wake_flags)
-+{
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+	int ret = 0;
-+
-+	rq = __task_access_lock(p, &lock);
-+	if (task_on_rq_queued(p)) {
-+		ttwu_do_wakeup(rq, p, wake_flags);
-+		ret = 1;
-+	}
-+	__task_access_unlock(p, lock);
-+
-+	return ret;
-+}
-+
-+/*
-+ * Notes on Program-Order guarantees on SMP systems.
-+ *
-+ *  MIGRATION
-+ *
-+ * The basic program-order guarantee on SMP systems is that when a task [t]
-+ * migrates, all its activity on its old CPU [c0] happens-before any subsequent
-+ * execution on its new CPU [c1].
-+ *
-+ * For migration (of runnable tasks) this is provided by the following means:
-+ *
-+ *  A) UNLOCK of the rq(c0)->lock scheduling out task t
-+ *  B) migration for t is required to synchronize *both* rq(c0)->lock and
-+ *     rq(c1)->lock (if not at the same time, then in that order).
-+ *  C) LOCK of the rq(c1)->lock scheduling in task
-+ *
-+ * Transitivity guarantees that B happens after A and C after B.
-+ * Note: we only require RCpc transitivity.
-+ * Note: the CPU doing B need not be c0 or c1
-+ *
-+ * Example:
-+ *
-+ *   CPU0            CPU1            CPU2
-+ *
-+ *   LOCK rq(0)->lock
-+ *   sched-out X
-+ *   sched-in Y
-+ *   UNLOCK rq(0)->lock
-+ *
-+ *                                   LOCK rq(0)->lock // orders against CPU0
-+ *                                   dequeue X
-+ *                                   UNLOCK rq(0)->lock
-+ *
-+ *                                   LOCK rq(1)->lock
-+ *                                   enqueue X
-+ *                                   UNLOCK rq(1)->lock
-+ *
-+ *                   LOCK rq(1)->lock // orders against CPU2
-+ *                   sched-out Z
-+ *                   sched-in X
-+ *                   UNLOCK rq(1)->lock
-+ *
-+ *
-+ *  BLOCKING -- aka. SLEEP + WAKEUP
-+ *
-+ * For blocking we (obviously) need to provide the same guarantee as for
-+ * migration. However the means are completely different as there is no lock
-+ * chain to provide order. Instead we do:
-+ *
-+ *   1) smp_store_release(X->on_cpu, 0)
-+ *   2) smp_cond_load_acquire(!X->on_cpu)
-+ *
-+ * Example:
-+ *
-+ *   CPU0 (schedule)  CPU1 (try_to_wake_up) CPU2 (schedule)
-+ *
-+ *   LOCK rq(0)->lock LOCK X->pi_lock
-+ *   dequeue X
-+ *   sched-out X
-+ *   smp_store_release(X->on_cpu, 0);
-+ *
-+ *                    smp_cond_load_acquire(&X->on_cpu, !VAL);
-+ *                    X->state = WAKING
-+ *                    set_task_cpu(X,2)
-+ *
-+ *                    LOCK rq(2)->lock
-+ *                    enqueue X
-+ *                    X->state = RUNNING
-+ *                    UNLOCK rq(2)->lock
-+ *
-+ *                                          LOCK rq(2)->lock // orders against CPU1
-+ *                                          sched-out Z
-+ *                                          sched-in X
-+ *                                          UNLOCK rq(2)->lock
-+ *
-+ *                    UNLOCK X->pi_lock
-+ *   UNLOCK rq(0)->lock
-+ *
-+ *
-+ * However; for wakeups there is a second guarantee we must provide, namely we
-+ * must observe the state that lead to our wakeup. That is, not only must our
-+ * task observe its own prior state, it must also observe the stores prior to
-+ * its wakeup.
-+ *
-+ * This means that any means of doing remote wakeups must order the CPU doing
-+ * the wakeup against the CPU the task is going to end up running on. This,
-+ * however, is already required for the regular Program-Order guarantee above,
-+ * since the waking CPU is the one issueing the ACQUIRE (smp_cond_load_acquire).
-+ *
-+ */
-+
-+/***
-+ * try_to_wake_up - wake up a thread
-+ * @p: the thread to be awakened
-+ * @state: the mask of task states that can be woken
-+ * @wake_flags: wake modifier flags (WF_*)
-+ *
-+ * Put it on the run-queue if it's not already there. The "current"
-+ * thread is always on the run-queue (except when the actual
-+ * re-schedule is in progress), and as such you're allowed to do
-+ * the simpler "current->state = TASK_RUNNING" to mark yourself
-+ * runnable without the overhead of this.
-+ *
-+ * Return: %true if @p was woken up, %false if it was already running.
-+ * or @state didn't match @p's state.
-+ */
-+static int try_to_wake_up(struct task_struct *p, unsigned int state,
-+			  int wake_flags)
-+{
-+	unsigned long flags;
-+	struct rq *rq;
-+	int cpu, success = 0;
-+
-+	/*
-+	 * If we are going to wake up a thread waiting for CONDITION we
-+	 * need to ensure that CONDITION=1 done by the caller can not be
-+	 * reordered with p->state check below. This pairs with mb() in
-+	 * set_current_state() the waiting thread does.
-+	 */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	smp_mb__after_spinlock();
-+	if (!(p->state & state))
-+		goto out;
-+
-+	trace_sched_waking(p);
-+
-+	/* We're going to change ->state: */
-+	success = 1;
-+	cpu = task_cpu(p);
-+
-+	/*
-+	 * Ensure we load p->on_rq _after_ p->state, otherwise it would
-+	 * be possible to, falsely, observe p->on_rq == 0 and get stuck
-+	 * in smp_cond_load_acquire() below.
-+	 *
-+	 * flush_smp_call_function_from_idle()			try_to_wake_up()
-+	 *   STORE p->on_rq = 1			  LOAD p->state
-+	 *   UNLOCK rq->lock
-+	 *
-+	 * __schedule() (switch to task 'p')
-+	 *   LOCK rq->lock			  smp_rmb();
-+	 *   smp_mb__after_spinlock();
-+	 *   UNLOCK rq->lock
-+	 *
-+	 * [task p]
-+	 *   STORE p->state = UNINTERRUPTIBLE	  LOAD p->on_rq
-+	 *
-+	 * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
-+	 * __schedule().  See the comment for smp_mb__after_spinlock().
-+	 */
-+	smp_rmb();
-+	if (p->on_rq && ttwu_remote(p, wake_flags))
-+		goto stat;
-+
-+#ifdef CONFIG_SMP
-+	/*
-+	 * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
-+	 * possible to, falsely, observe p->on_cpu == 0.
-+	 *
-+	 * One must be running (->on_cpu == 1) in order to remove oneself
-+	 * from the runqueue.
-+	 *
-+	 * __schedule() (switch to task 'p')	try_to_wake_up()
-+	 *   STORE p->on_cpu = 1		  LOAD p->on_rq
-+	 *   UNLOCK rq->lock
-+	 *
-+	 * __schedule() (put 'p' to sleep)
-+	 *   LOCK rq->lock			  smp_rmb();
-+	 *   smp_mb__after_spinlock();
-+	 *   STORE p->on_rq = 0			  LOAD p->on_cpu
-+	 *
-+	 * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
-+	 * __schedule().  See the comment for smp_mb__after_spinlock().
-+	 */
-+	smp_rmb();
-+
-+	/*
-+	 * If the owning (remote) CPU is still in the middle of schedule() with
-+	 * this task as prev, wait until its done referencing the task.
-+	 *
-+	 * Pairs with the smp_store_release() in finish_task().
-+	 *
-+	 * This ensures that tasks getting woken will be fully ordered against
-+	 * their previous state and preserve Program Order.
-+	 */
-+	smp_cond_load_acquire(&p->on_cpu, !VAL);
-+
-+	p->sched_contributes_to_load = !!task_contributes_to_load(p);
-+	p->state = TASK_WAKING;
-+
-+	if (p->in_iowait) {
-+		delayacct_blkio_end(p);
-+		atomic_dec(&task_rq(p)->nr_iowait);
-+	}
-+
-+	if (SCHED_ISO == p->policy && ISO_PRIO != p->prio) {
-+		p->prio = ISO_PRIO;
-+		p->deadline = 0UL;
-+		update_task_priodl(p);
-+	}
-+
-+	cpu = select_task_rq(p);
-+
-+	if (cpu != task_cpu(p)) {
-+		wake_flags |= WF_MIGRATED;
-+		psi_ttwu_dequeue(p);
-+		set_task_cpu(p, cpu);
-+	}
-+#else /* CONFIG_SMP */
-+	if (p->in_iowait) {
-+		delayacct_blkio_end(p);
-+		atomic_dec(&task_rq(p)->nr_iowait);
-+	}
-+#endif
-+
-+	rq = cpu_rq(cpu);
-+	raw_spin_lock(&rq->lock);
-+
-+	update_rq_clock(rq);
-+	ttwu_do_activate(rq, p, wake_flags);
-+	check_preempt_curr(rq, p);
-+
-+	raw_spin_unlock(&rq->lock);
-+
-+stat:
-+	ttwu_stat(p, cpu, wake_flags);
-+out:
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+
-+	return success;
-+}
-+
-+/**
-+ * try_invoke_on_locked_down_task - Invoke a function on task in fixed state
-+ * @p: Process for which the function is to be invoked.
-+ * @func: Function to invoke.
-+ * @arg: Argument to function.
-+ *
-+ * If the specified task can be quickly locked into a definite state
-+ * (either sleeping or on a given runqueue), arrange to keep it in that
-+ * state while invoking @func(@arg).  This function can use ->on_rq and
-+ * task_curr() to work out what the state is, if required.  Given that
-+ * @func can be invoked with a runqueue lock held, it had better be quite
-+ * lightweight.
-+ *
-+ * Returns:
-+ *	@false if the task slipped out from under the locks.
-+ *	@true if the task was locked onto a runqueue or is sleeping.
-+ *		However, @func can override this by returning @false.
-+ */
-+bool try_invoke_on_locked_down_task(struct task_struct *p, bool (*func)(struct task_struct *t, void *arg), void *arg)
-+{
-+	bool ret = false;
-+	struct rq_flags rf;
-+	struct rq *rq;
-+
-+	lockdep_assert_irqs_enabled();
-+	raw_spin_lock_irq(&p->pi_lock);
-+	if (p->on_rq) {
-+		rq = __task_rq_lock(p, &rf);
-+		if (task_rq(p) == rq)
-+			ret = func(p, arg);
-+		rq_unlock(rq, &rf);
-+	} else {
-+		switch (p->state) {
-+		case TASK_RUNNING:
-+		case TASK_WAKING:
-+			break;
-+		default:
-+			smp_rmb(); // See smp_rmb() comment in try_to_wake_up().
-+			if (!p->on_rq)
-+				ret = func(p, arg);
-+		}
-+	}
-+	raw_spin_unlock_irq(&p->pi_lock);
-+	return ret;
-+}
-+
-+/**
-+ * wake_up_process - Wake up a specific process
-+ * @p: The process to be woken up.
-+ *
-+ * Attempt to wake up the nominated process and move it to the set of runnable
-+ * processes.
-+ *
-+ * Return: 1 if the process was woken up, 0 if it was already running.
-+ *
-+ * This function executes a full memory barrier before accessing the task state.
-+ */
-+int wake_up_process(struct task_struct *p)
-+{
-+	return try_to_wake_up(p, TASK_NORMAL, 0);
-+}
-+EXPORT_SYMBOL(wake_up_process);
-+
-+int wake_up_state(struct task_struct *p, unsigned int state)
-+{
-+	return try_to_wake_up(p, state, 0);
-+}
-+
-+/*
-+ * Perform scheduler related setup for a newly forked process p.
-+ * p is forked by current.
-+ */
-+int sched_fork(unsigned long __maybe_unused clone_flags, struct task_struct *p)
-+{
-+	unsigned long flags;
-+	int cpu = get_cpu();
-+	struct rq *rq = this_rq();
-+
-+#ifdef CONFIG_PREEMPT_NOTIFIERS
-+	INIT_HLIST_HEAD(&p->preempt_notifiers);
-+#endif
-+	/* Should be reset in fork.c but done here for ease of PDS patching */
-+	p->on_cpu =
-+	p->on_rq =
-+	p->utime =
-+	p->stime =
-+	p->sched_time = 0;
-+
-+	p->sl_level = pds_skiplist_random_level(p);
-+	INIT_SKIPLIST_NODE(&p->sl_node);
-+
-+#ifdef CONFIG_COMPACTION
-+	p->capture_control = NULL;
-+#endif
-+
-+	/*
-+	 * We mark the process as NEW here. This guarantees that
-+	 * nobody will actually run it, and a signal or other external
-+	 * event cannot wake it up and insert it on the runqueue either.
-+	 */
-+	p->state = TASK_NEW;
-+
-+	/*
-+	 * Make sure we do not leak PI boosting priority to the child.
-+	 */
-+	p->prio = current->normal_prio;
-+
-+	/*
-+	 * Revert to default priority/policy on fork if requested.
-+	 */
-+	if (unlikely(p->sched_reset_on_fork)) {
-+		if (task_has_rt_policy(p)) {
-+			p->policy = SCHED_NORMAL;
-+			p->static_prio = NICE_TO_PRIO(0);
-+			p->rt_priority = 0;
-+		} else if (PRIO_TO_NICE(p->static_prio) < 0)
-+			p->static_prio = NICE_TO_PRIO(0);
-+
-+		p->prio = p->normal_prio = normal_prio(p);
-+
-+		/*
-+		 * We don't need the reset flag anymore after the fork. It has
-+		 * fulfilled its duty:
-+		 */
-+		p->sched_reset_on_fork = 0;
-+	}
-+
-+	/*
-+	 * Share the timeslice between parent and child, thus the
-+	 * total amount of pending timeslices in the system doesn't change,
-+	 * resulting in more scheduling fairness.
-+	 */
-+	raw_spin_lock_irqsave(&rq->lock, flags);
-+	rq->curr->time_slice /= 2;
-+	p->time_slice = rq->curr->time_slice;
-+#ifdef CONFIG_SCHED_HRTICK
-+	hrtick_start(rq, US_TO_NS(rq->curr->time_slice));
-+#endif
-+
-+	if (p->time_slice < RESCHED_US) {
-+		update_rq_clock(rq);
-+		time_slice_expired(p, rq);
-+		resched_curr(rq);
-+	} else
-+		update_task_priodl(p);
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+
-+	/*
-+	 * The child is not yet in the pid-hash so no cgroup attach races,
-+	 * and the cgroup is pinned to this child due to cgroup_fork()
-+	 * is ran before sched_fork().
-+	 *
-+	 * Silence PROVE_RCU.
-+	 */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	/*
-+	 * We're setting the CPU for the first time, we don't migrate,
-+	 * so use __set_task_cpu().
-+	 */
-+	__set_task_cpu(p, cpu);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+
-+#ifdef CONFIG_SCHED_INFO
-+	if (unlikely(sched_info_on()))
-+		memset(&p->sched_info, 0, sizeof(p->sched_info));
-+#endif
-+	init_task_preempt_count(p);
-+
-+	put_cpu();
-+	return 0;
-+}
-+
-+#ifdef CONFIG_SCHEDSTATS
-+
-+DEFINE_STATIC_KEY_FALSE(sched_schedstats);
-+static bool __initdata __sched_schedstats = false;
-+
-+static void set_schedstats(bool enabled)
-+{
-+	if (enabled)
-+		static_branch_enable(&sched_schedstats);
-+	else
-+		static_branch_disable(&sched_schedstats);
-+}
-+
-+void force_schedstat_enabled(void)
-+{
-+	if (!schedstat_enabled()) {
-+		pr_info("kernel profiling enabled schedstats, disable via kernel.sched_schedstats.\n");
-+		static_branch_enable(&sched_schedstats);
-+	}
-+}
-+
-+static int __init setup_schedstats(char *str)
-+{
-+	int ret = 0;
-+	if (!str)
-+		goto out;
-+
-+	/*
-+	 * This code is called before jump labels have been set up, so we can't
-+	 * change the static branch directly just yet.  Instead set a temporary
-+	 * variable so init_schedstats() can do it later.
-+	 */
-+	if (!strcmp(str, "enable")) {
-+		__sched_schedstats = true;
-+		ret = 1;
-+	} else if (!strcmp(str, "disable")) {
-+		__sched_schedstats = false;
-+		ret = 1;
-+	}
-+out:
-+	if (!ret)
-+		pr_warn("Unable to parse schedstats=\n");
-+
-+	return ret;
-+}
-+__setup("schedstats=", setup_schedstats);
-+
-+static void __init init_schedstats(void)
-+{
-+	set_schedstats(__sched_schedstats);
-+}
-+
-+#ifdef CONFIG_PROC_SYSCTL
-+int sysctl_schedstats(struct ctl_table *table, int write,
-+			 void __user *buffer, size_t *lenp, loff_t *ppos)
-+{
-+	struct ctl_table t;
-+	int err;
-+	int state = static_branch_likely(&sched_schedstats);
-+
-+	if (write && !capable(CAP_SYS_ADMIN))
-+		return -EPERM;
-+
-+	t = *table;
-+	t.data = &state;
-+	err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
-+	if (err < 0)
-+		return err;
-+	if (write)
-+		set_schedstats(state);
-+	return err;
-+}
-+#endif /* CONFIG_PROC_SYSCTL */
-+#else  /* !CONFIG_SCHEDSTATS */
-+static inline void init_schedstats(void) {}
-+#endif /* CONFIG_SCHEDSTATS */
-+
-+/*
-+ * wake_up_new_task - wake up a newly created task for the first time.
-+ *
-+ * This function will do some initial scheduler statistics housekeeping
-+ * that must be done for every newly created context, then puts the task
-+ * on the runqueue and wakes it.
-+ */
-+void wake_up_new_task(struct task_struct *p)
-+{
-+	unsigned long flags;
-+	struct rq *rq;
-+
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+
-+	p->state = TASK_RUNNING;
-+
-+	rq = cpu_rq(select_task_rq(p));
-+#ifdef CONFIG_SMP
-+	/*
-+	 * Fork balancing, do it here and not earlier because:
-+	 * - cpus_mask can change in the fork path
-+	 * - any previously selected CPU might disappear through hotplug
-+	 * Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq,
-+	 * as we're not fully set-up yet.
-+	 */
-+	__set_task_cpu(p, cpu_of(rq));
-+#endif
-+
-+	raw_spin_lock(&rq->lock);
-+
-+	update_rq_clock(rq);
-+	activate_task(p, rq);
-+	trace_sched_wakeup_new(p);
-+	check_preempt_curr(rq, p);
-+
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+}
-+
-+#ifdef CONFIG_PREEMPT_NOTIFIERS
-+
-+static DEFINE_STATIC_KEY_FALSE(preempt_notifier_key);
-+
-+void preempt_notifier_inc(void)
-+{
-+	static_branch_inc(&preempt_notifier_key);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_inc);
-+
-+void preempt_notifier_dec(void)
-+{
-+	static_branch_dec(&preempt_notifier_key);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_dec);
-+
-+/**
-+ * preempt_notifier_register - tell me when current is being preempted & rescheduled
-+ * @notifier: notifier struct to register
-+ */
-+void preempt_notifier_register(struct preempt_notifier *notifier)
-+{
-+	if (!static_branch_unlikely(&preempt_notifier_key))
-+		WARN(1, "registering preempt_notifier while notifiers disabled\n");
-+
-+	hlist_add_head(&notifier->link, &current->preempt_notifiers);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_register);
-+
-+/**
-+ * preempt_notifier_unregister - no longer interested in preemption notifications
-+ * @notifier: notifier struct to unregister
-+ *
-+ * This is *not* safe to call from within a preemption notifier.
-+ */
-+void preempt_notifier_unregister(struct preempt_notifier *notifier)
-+{
-+	hlist_del(&notifier->link);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_unregister);
-+
-+static void __fire_sched_in_preempt_notifiers(struct task_struct *curr)
-+{
-+	struct preempt_notifier *notifier;
-+
-+	hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
-+		notifier->ops->sched_in(notifier, raw_smp_processor_id());
-+}
-+
-+static __always_inline void fire_sched_in_preempt_notifiers(struct task_struct *curr)
-+{
-+	if (static_branch_unlikely(&preempt_notifier_key))
-+		__fire_sched_in_preempt_notifiers(curr);
-+}
-+
-+static void
-+__fire_sched_out_preempt_notifiers(struct task_struct *curr,
-+				   struct task_struct *next)
-+{
-+	struct preempt_notifier *notifier;
-+
-+	hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
-+		notifier->ops->sched_out(notifier, next);
-+}
-+
-+static __always_inline void
-+fire_sched_out_preempt_notifiers(struct task_struct *curr,
-+				 struct task_struct *next)
-+{
-+	if (static_branch_unlikely(&preempt_notifier_key))
-+		__fire_sched_out_preempt_notifiers(curr, next);
-+}
-+
-+#else /* !CONFIG_PREEMPT_NOTIFIERS */
-+
-+static inline void fire_sched_in_preempt_notifiers(struct task_struct *curr)
-+{
-+}
-+
-+static inline void
-+fire_sched_out_preempt_notifiers(struct task_struct *curr,
-+				 struct task_struct *next)
-+{
-+}
-+
-+#endif /* CONFIG_PREEMPT_NOTIFIERS */
-+
-+static inline void prepare_task(struct task_struct *next)
-+{
-+	/*
-+	 * Claim the task as running, we do this before switching to it
-+	 * such that any running task will have this set.
-+	 */
-+	next->on_cpu = 1;
-+}
-+
-+static inline void finish_task(struct task_struct *prev)
-+{
-+#ifdef CONFIG_SMP
-+	/*
-+	 * After ->on_cpu is cleared, the task can be moved to a different CPU.
-+	 * We must ensure this doesn't happen until the switch is completely
-+	 * finished.
-+	 *
-+	 * In particular, the load of prev->state in finish_task_switch() must
-+	 * happen before this.
-+	 *
-+	 * Pairs with the smp_cond_load_acquire() in try_to_wake_up().
-+	 */
-+	smp_store_release(&prev->on_cpu, 0);
-+#else
-+	prev->on_cpu = 0;
-+#endif
-+}
-+
-+static inline void
-+prepare_lock_switch(struct rq *rq, struct task_struct *next)
-+{
-+	/*
-+	 * Since the runqueue lock will be released by the next
-+	 * task (which is an invalid locking op but in the case
-+	 * of the scheduler it's an obvious special-case), so we
-+	 * do an early lockdep release here:
-+	 */
-+	spin_release(&rq->lock.dep_map, _THIS_IP_);
-+#ifdef CONFIG_DEBUG_SPINLOCK
-+	/* this is a valid case when another task releases the spinlock */
-+	rq->lock.owner = next;
-+#endif
-+}
-+
-+static inline void finish_lock_switch(struct rq *rq)
-+{
-+	/*
-+	 * If we are tracking spinlock dependencies then we have to
-+	 * fix up the runqueue lock - which gets 'carried over' from
-+	 * prev into current:
-+	 */
-+	spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
-+	raw_spin_unlock_irq(&rq->lock);
-+}
-+
-+/**
-+ * prepare_task_switch - prepare to switch tasks
-+ * @rq: the runqueue preparing to switch
-+ * @next: the task we are going to switch to.
-+ *
-+ * This is called with the rq lock held and interrupts off. It must
-+ * be paired with a subsequent finish_task_switch after the context
-+ * switch.
-+ *
-+ * prepare_task_switch sets up locking and calls architecture specific
-+ * hooks.
-+ */
-+static inline void
-+prepare_task_switch(struct rq *rq, struct task_struct *prev,
-+		    struct task_struct *next)
-+{
-+	kcov_prepare_switch(prev);
-+	sched_info_switch(rq, prev, next);
-+	perf_event_task_sched_out(prev, next);
-+	rseq_preempt(prev);
-+	fire_sched_out_preempt_notifiers(prev, next);
-+	prepare_task(next);
-+	prepare_arch_switch(next);
-+}
-+
-+/**
-+ * finish_task_switch - clean up after a task-switch
-+ * @rq: runqueue associated with task-switch
-+ * @prev: the thread we just switched away from.
-+ *
-+ * finish_task_switch must be called after the context switch, paired
-+ * with a prepare_task_switch call before the context switch.
-+ * finish_task_switch will reconcile locking set up by prepare_task_switch,
-+ * and do any other architecture-specific cleanup actions.
-+ *
-+ * Note that we may have delayed dropping an mm in context_switch(). If
-+ * so, we finish that here outside of the runqueue lock.  (Doing it
-+ * with the lock held can cause deadlocks; see schedule() for
-+ * details.)
-+ *
-+ * The context switch have flipped the stack from under us and restored the
-+ * local variables which were saved when this task called schedule() in the
-+ * past. prev == current is still correct but we need to recalculate this_rq
-+ * because prev may have moved to another CPU.
-+ */
-+static struct rq *finish_task_switch(struct task_struct *prev)
-+	__releases(rq->lock)
-+{
-+	struct rq *rq = this_rq();
-+	struct mm_struct *mm = rq->prev_mm;
-+	long prev_state;
-+
-+	/*
-+	 * The previous task will have left us with a preempt_count of 2
-+	 * because it left us after:
-+	 *
-+	 *	schedule()
-+	 *	  preempt_disable();			// 1
-+	 *	  __schedule()
-+	 *	    raw_spin_lock_irq(&rq->lock)	// 2
-+	 *
-+	 * Also, see FORK_PREEMPT_COUNT.
-+	 */
-+	if (WARN_ONCE(preempt_count() != 2*PREEMPT_DISABLE_OFFSET,
-+		      "corrupted preempt_count: %s/%d/0x%x\n",
-+		      current->comm, current->pid, preempt_count()))
-+		preempt_count_set(FORK_PREEMPT_COUNT);
-+
-+	rq->prev_mm = NULL;
-+
-+	/*
-+	 * A task struct has one reference for the use as "current".
-+	 * If a task dies, then it sets TASK_DEAD in tsk->state and calls
-+	 * schedule one last time. The schedule call will never return, and
-+	 * the scheduled task must drop that reference.
-+	 *
-+	 * We must observe prev->state before clearing prev->on_cpu (in
-+	 * finish_task), otherwise a concurrent wakeup can get prev
-+	 * running on another CPU and we could rave with its RUNNING -> DEAD
-+	 * transition, resulting in a double drop.
-+	 */
-+	prev_state = prev->state;
-+	vtime_task_switch(prev);
-+	perf_event_task_sched_in(prev, current);
-+	finish_task(prev);
-+	finish_lock_switch(rq);
-+	finish_arch_post_lock_switch();
-+	kcov_finish_switch(current);
-+
-+	fire_sched_in_preempt_notifiers(current);
-+	/*
-+	 * When switching through a kernel thread, the loop in
-+	 * membarrier_{private,global}_expedited() may have observed that
-+	 * kernel thread and not issued an IPI. It is therefore possible to
-+	 * schedule between user->kernel->user threads without passing though
-+	 * switch_mm(). Membarrier requires a barrier after storing to
-+	 * rq->curr, before returning to userspace, so provide them here:
-+	 *
-+	 * - a full memory barrier for {PRIVATE,GLOBAL}_EXPEDITED, implicitly
-+	 *   provided by mmdrop(),
-+	 * - a sync_core for SYNC_CORE.
-+	 */
-+	if (mm) {
-+		membarrier_mm_sync_core_before_usermode(mm);
-+		mmdrop(mm);
-+	}
-+	if (unlikely(prev_state == TASK_DEAD)) {
-+		/*
-+		 * Remove function-return probe instances associated with this
-+		 * task and put them back on the free list.
-+		 */
-+		kprobe_flush_task(prev);
-+
-+		/* Task is done with its stack. */
-+		put_task_stack(prev);
-+
-+		put_task_struct_rcu_user(prev);
-+	}
-+
-+	tick_nohz_task_switch();
-+	return rq;
-+}
-+
-+/**
-+ * schedule_tail - first thing a freshly forked thread must call.
-+ * @prev: the thread we just switched away from.
-+ */
-+asmlinkage __visible void schedule_tail(struct task_struct *prev)
-+	__releases(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	/*
-+	 * New tasks start with FORK_PREEMPT_COUNT, see there and
-+	 * finish_task_switch() for details.
-+	 *
-+	 * finish_task_switch() will drop rq->lock() and lower preempt_count
-+	 * and the preempt_enable() will end up enabling preemption (on
-+	 * PREEMPT_COUNT kernels).
-+	 */
-+
-+	rq = finish_task_switch(prev);
-+	preempt_enable();
-+
-+	if (current->set_child_tid)
-+		put_user(task_pid_vnr(current), current->set_child_tid);
-+
-+	calculate_sigpending();
-+}
-+
-+/*
-+ * context_switch - switch to the new MM and the new thread's register state.
-+ */
-+static __always_inline struct rq *
-+context_switch(struct rq *rq, struct task_struct *prev,
-+	       struct task_struct *next)
-+{
-+	prepare_task_switch(rq, prev, next);
-+
-+	/*
-+	 * For paravirt, this is coupled with an exit in switch_to to
-+	 * combine the page table reload and the switch backend into
-+	 * one hypercall.
-+	 */
-+	arch_start_context_switch(prev);
-+
-+	/*
-+	 * kernel -> kernel   lazy + transfer active
-+	 *   user -> kernel   lazy + mmgrab() active
-+	 *
-+	 * kernel ->   user   switch + mmdrop() active
-+	 *   user ->   user   switch
-+	 */
-+	if (!next->mm) {                                // to kernel
-+		enter_lazy_tlb(prev->active_mm, next);
-+
-+		next->active_mm = prev->active_mm;
-+		if (prev->mm)                           // from user
-+			mmgrab(prev->active_mm);
-+		else
-+			prev->active_mm = NULL;
-+	} else {                                        // to user
-+		membarrier_switch_mm(rq, prev->active_mm, next->mm);
-+		/*
-+		 * sys_membarrier() requires an smp_mb() between setting
-+		 * rq->curr / membarrier_switch_mm() and returning to userspace.
-+		 *
-+		 * The below provides this either through switch_mm(), or in
-+		 * case 'prev->active_mm == next->mm' through
-+		 * finish_task_switch()'s mmdrop().
-+		 */
-+		switch_mm_irqs_off(prev->active_mm, next->mm, next);
-+
-+		if (!prev->mm) {                        // from kernel
-+			/* will mmdrop() in finish_task_switch(). */
-+			rq->prev_mm = prev->active_mm;
-+			prev->active_mm = NULL;
-+		}
-+	}
-+
-+	prepare_lock_switch(rq, next);
-+
-+	/* Here we just switch the register state and the stack. */
-+	switch_to(prev, next, prev);
-+	barrier();
-+
-+	return finish_task_switch(prev);
-+}
-+
-+/*
-+ * nr_running, nr_uninterruptible and nr_context_switches:
-+ *
-+ * externally visible scheduler statistics: current number of runnable
-+ * threads, total number of context switches performed since bootup.
-+ */
-+unsigned long nr_running(void)
-+{
-+	unsigned long i, sum = 0;
-+
-+	for_each_online_cpu(i)
-+		sum += cpu_rq(i)->nr_running;
-+
-+	return sum;
-+}
-+
-+/*
-+ * Check if only the current task is running on the CPU.
-+ *
-+ * Caution: this function does not check that the caller has disabled
-+ * preemption, thus the result might have a time-of-check-to-time-of-use
-+ * race.  The caller is responsible to use it correctly, for example:
-+ *
-+ * - from a non-preemptible section (of course)
-+ *
-+ * - from a thread that is bound to a single CPU
-+ *
-+ * - in a loop with very short iterations (e.g. a polling loop)
-+ */
-+bool single_task_running(void)
-+{
-+	return raw_rq()->nr_running == 1;
-+}
-+EXPORT_SYMBOL(single_task_running);
-+
-+unsigned long long nr_context_switches(void)
-+{
-+	int i;
-+	unsigned long long sum = 0;
-+
-+	for_each_possible_cpu(i)
-+		sum += cpu_rq(i)->nr_switches;
-+
-+	return sum;
-+}
-+
-+/*
-+ * Consumers of these two interfaces, like for example the cpuidle menu
-+ * governor, are using nonsensical data. Preferring shallow idle state selection
-+ * for a CPU that has IO-wait which might not even end up running the task when
-+ * it does become runnable.
-+ */
-+
-+unsigned long nr_iowait_cpu(int cpu)
-+{
-+	return atomic_read(&cpu_rq(cpu)->nr_iowait);
-+}
-+
-+/*
-+ * IO-wait accounting, and how its mostly bollocks (on SMP).
-+ *
-+ * The idea behind IO-wait account is to account the idle time that we could
-+ * have spend running if it were not for IO. That is, if we were to improve the
-+ * storage performance, we'd have a proportional reduction in IO-wait time.
-+ *
-+ * This all works nicely on UP, where, when a task blocks on IO, we account
-+ * idle time as IO-wait, because if the storage were faster, it could've been
-+ * running and we'd not be idle.
-+ *
-+ * This has been extended to SMP, by doing the same for each CPU. This however
-+ * is broken.
-+ *
-+ * Imagine for instance the case where two tasks block on one CPU, only the one
-+ * CPU will have IO-wait accounted, while the other has regular idle. Even
-+ * though, if the storage were faster, both could've ran at the same time,
-+ * utilising both CPUs.
-+ *
-+ * This means, that when looking globally, the current IO-wait accounting on
-+ * SMP is a lower bound, by reason of under accounting.
-+ *
-+ * Worse, since the numbers are provided per CPU, they are sometimes
-+ * interpreted per CPU, and that is nonsensical. A blocked task isn't strictly
-+ * associated with any one particular CPU, it can wake to another CPU than it
-+ * blocked on. This means the per CPU IO-wait number is meaningless.
-+ *
-+ * Task CPU affinities can make all that even more 'interesting'.
-+ */
-+
-+unsigned long nr_iowait(void)
-+{
-+	unsigned long i, sum = 0;
-+
-+	for_each_possible_cpu(i)
-+		sum += nr_iowait_cpu(i);
-+
-+	return sum;
-+}
-+
-+DEFINE_PER_CPU(struct kernel_stat, kstat);
-+DEFINE_PER_CPU(struct kernel_cpustat, kernel_cpustat);
-+
-+EXPORT_PER_CPU_SYMBOL(kstat);
-+EXPORT_PER_CPU_SYMBOL(kernel_cpustat);
-+
-+static inline void pds_update_curr(struct rq *rq, struct task_struct *p)
-+{
-+	s64 ns = rq->clock_task - p->last_ran;
-+
-+	p->sched_time += ns;
-+	account_group_exec_runtime(p, ns);
-+
-+	/* time_slice accounting is done in usecs to avoid overflow on 32bit */
-+	p->time_slice -= NS_TO_US(ns);
-+	p->last_ran = rq->clock_task;
-+}
-+
-+/*
-+ * Return accounted runtime for the task.
-+ * Return separately the current's pending runtime that have not been
-+ * accounted yet.
-+ */
-+unsigned long long task_sched_runtime(struct task_struct *p)
-+{
-+	unsigned long flags;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+	u64 ns;
-+
-+#if defined(CONFIG_64BIT) && defined(CONFIG_SMP)
-+	/*
-+	 * 64-bit doesn't need locks to atomically read a 64-bit value.
-+	 * So we have a optimization chance when the task's delta_exec is 0.
-+	 * Reading ->on_cpu is racy, but this is ok.
-+	 *
-+	 * If we race with it leaving CPU, we'll take a lock. So we're correct.
-+	 * If we race with it entering CPU, unaccounted time is 0. This is
-+	 * indistinguishable from the read occurring a few cycles earlier.
-+	 * If we see ->on_cpu without ->on_rq, the task is leaving, and has
-+	 * been accounted, so we're correct here as well.
-+	 */
-+	if (!p->on_cpu || !task_on_rq_queued(p))
-+		return tsk_seruntime(p);
-+#endif
-+
-+	rq = task_access_lock_irqsave(p, &lock, &flags);
-+	/*
-+	 * Must be ->curr _and_ ->on_rq.  If dequeued, we would
-+	 * project cycles that may never be accounted to this
-+	 * thread, breaking clock_gettime().
-+	 */
-+	if (p == rq->curr && task_on_rq_queued(p)) {
-+		update_rq_clock(rq);
-+		pds_update_curr(rq, p);
-+	}
-+	ns = tsk_seruntime(p);
-+	task_access_unlock_irqrestore(p, lock, &flags);
-+
-+	return ns;
-+}
-+
-+/* This manages tasks that have run out of timeslice during a scheduler_tick */
-+static inline void pds_scheduler_task_tick(struct rq *rq)
-+{
-+	struct task_struct *p = rq->curr;
-+
-+	if (is_idle_task(p))
-+		return;
-+
-+	pds_update_curr(rq, p);
-+
-+	cpufreq_update_util(rq, 0);
-+
-+	/*
-+	 * Tasks that were scheduled in the first half of a tick are not
-+	 * allowed to run into the 2nd half of the next tick if they will
-+	 * run out of time slice in the interim. Otherwise, if they have
-+	 * less than RESCHED_US μs of time slice left they will be rescheduled.
-+	 */
-+	if (p->time_slice - rq->dither >= RESCHED_US)
-+		return;
-+
-+	/**
-+	 * p->time_slice < RESCHED_US. We will modify task_struct under
-+	 * rq lock as p is rq->curr
-+	 */
-+	__set_tsk_resched(p);
-+}
-+
-+#ifdef CONFIG_SMP
-+
-+#ifdef CONFIG_SCHED_SMT
-+static int active_load_balance_cpu_stop(void *data)
-+{
-+	struct rq *rq = this_rq();
-+	struct task_struct *p = data;
-+	int cpu;
-+	unsigned long flags;
-+
-+	local_irq_save(flags);
-+
-+	raw_spin_lock(&p->pi_lock);
-+	raw_spin_lock(&rq->lock);
-+
-+	rq->active_balance = 0;
-+	/*
-+	 * _something_ may have changed the task, double check again
-+	 */
-+	if (task_on_rq_queued(p) && task_rq(p) == rq &&
-+	    (cpu = cpumask_any_and(&p->cpus_mask, &sched_cpu_sg_idle_mask)) < nr_cpu_ids)
-+		rq = __migrate_task(rq, p, cpu);
-+
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock(&p->pi_lock);
-+
-+	local_irq_restore(flags);
-+
-+	return 0;
-+}
-+
-+/* pds_sg_balance_trigger - trigger slibing group balance for @cpu */
-+static void pds_sg_balance_trigger(const int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+	struct task_struct *curr;
-+
-+	if (!raw_spin_trylock_irqsave(&rq->lock, flags))
-+		return;
-+	curr = rq->curr;
-+	if (!is_idle_task(curr) &&
-+	    cpumask_intersects(&curr->cpus_mask, &sched_cpu_sg_idle_mask)) {
-+		int active_balance = 0;
-+
-+		if (likely(!rq->active_balance)) {
-+			rq->active_balance = 1;
-+			active_balance = 1;
-+		}
-+
-+		raw_spin_unlock_irqrestore(&rq->lock, flags);
-+
-+		if (likely(active_balance))
-+			stop_one_cpu_nowait(cpu, active_load_balance_cpu_stop,
-+					    curr, &rq->active_balance_work);
-+	} else
-+		raw_spin_unlock_irqrestore(&rq->lock, flags);
-+}
-+
-+/*
-+ * pds_sg_balance_check - slibing group balance check for run queue @rq
-+ */
-+static inline void pds_sg_balance_check(const struct rq *rq)
-+{
-+	cpumask_t chk;
-+	int i;
-+
-+	/* Only online cpu will do sg balance checking */
-+	if (unlikely(!rq->online))
-+		return;
-+
-+	/* Only cpu in slibing idle group will do the checking */
-+	if (!cpumask_test_cpu(cpu_of(rq), &sched_cpu_sg_idle_mask))
-+		return;
-+
-+	/* Find potential cpus which can migrate the currently running task */
-+	if (!cpumask_andnot(&chk, &sched_rq_pending_masks[SCHED_RQ_EMPTY],
-+			    &sched_rq_queued_masks[SCHED_RQ_EMPTY]))
-+		return;
-+
-+	for_each_cpu(i, &chk) {
-+		/* skip the cpu which has idle slibing cpu */
-+		if (cpumask_test_cpu(per_cpu(sched_sibling_cpu, i),
-+				     &sched_rq_queued_masks[SCHED_RQ_EMPTY]))
-+			continue;
-+		pds_sg_balance_trigger(i);
-+	}
-+}
-+DEFINE_PER_CPU(unsigned long, thermal_pressure);
-+
-+void arch_set_thermal_pressure(struct cpumask *cpus,
-+			       unsigned long th_pressure)
-+{
-+	int cpu;
-+
-+	for_each_cpu(cpu, cpus)
-+		WRITE_ONCE(per_cpu(thermal_pressure, cpu), th_pressure);
-+}
-+#endif /* CONFIG_SCHED_SMT */
-+#endif /* CONFIG_SMP */
-+
-+/*
-+ * This function gets called by the timer code, with HZ frequency.
-+ * We call it with interrupts disabled.
-+ */
-+void scheduler_tick(void)
-+{
-+	int cpu __maybe_unused = smp_processor_id();
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	arch_scale_freq_tick();
-+	sched_clock_tick();
-+
-+	raw_spin_lock(&rq->lock);
-+	update_rq_clock(rq);
-+
-+	pds_scheduler_task_tick(rq);
-+	update_sched_rq_queued_masks_normal(rq);
-+	calc_global_load_tick(rq);
-+	psi_task_tick(rq);
-+
-+	rq->last_tick = rq->clock;
-+	raw_spin_unlock(&rq->lock);
-+
-+	perf_event_task_tick();
-+}
-+
-+#ifdef CONFIG_NO_HZ_FULL
-+struct tick_work {
-+	int			cpu;
-+	atomic_t		state;
-+	struct delayed_work	work;
-+};
-+/* Values for ->state, see diagram below. */
-+#define TICK_SCHED_REMOTE_OFFLINE	0
-+#define TICK_SCHED_REMOTE_OFFLINING	1
-+#define TICK_SCHED_REMOTE_RUNNING	2
-+
-+/*
-+ * State diagram for ->state:
-+ *
-+ *
-+ *          TICK_SCHED_REMOTE_OFFLINE
-+ *                    |   ^
-+ *                    |   |
-+ *                    |   | sched_tick_remote()
-+ *                    |   |
-+ *                    |   |
-+ *                    +--TICK_SCHED_REMOTE_OFFLINING
-+ *                    |   ^
-+ *                    |   |
-+ * sched_tick_start() |   | sched_tick_stop()
-+ *                    |   |
-+ *                    V   |
-+ *          TICK_SCHED_REMOTE_RUNNING
-+ *
-+ *
-+ * Other transitions get WARN_ON_ONCE(), except that sched_tick_remote()
-+ * and sched_tick_start() are happy to leave the state in RUNNING.
-+ */
-+
-+static struct tick_work __percpu *tick_work_cpu;
-+
-+static void sched_tick_remote(struct work_struct *work)
-+{
-+	struct delayed_work *dwork = to_delayed_work(work);
-+	struct tick_work *twork = container_of(dwork, struct tick_work, work);
-+	int cpu = twork->cpu;
-+	struct rq *rq = cpu_rq(cpu);
-+	struct task_struct *curr;
-+	unsigned long flags;
-+	u64 delta;
-+	int os;
-+
-+	/*
-+	 * Handle the tick only if it appears the remote CPU is running in full
-+	 * dynticks mode. The check is racy by nature, but missing a tick or
-+	 * having one too much is no big deal because the scheduler tick updates
-+	 * statistics and checks timeslices in a time-independent way, regardless
-+	 * of when exactly it is running.
-+	 */
-+	if (!tick_nohz_tick_stopped_cpu(cpu))
-+		goto out_requeue;
-+
-+	raw_spin_lock_irqsave(&rq->lock, flags);
-+	curr = rq->curr;
-+	if (cpu_is_offline(cpu))
-+		goto out_unlock;
-+
-+	update_rq_clock(rq);
-+	if (!is_idle_task(curr)) {
-+		/*
-+		 * Make sure the next tick runs within a reasonable
-+		 * amount of time.
-+		 */
-+		delta = rq_clock_task(rq) - curr->last_ran;
-+		WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
-+	}
-+	pds_scheduler_task_tick(rq);
-+	update_sched_rq_queued_masks_normal(rq);
-+	calc_load_nohz_remote(rq);
-+
-+out_unlock:
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+
-+out_requeue:
-+	/*
-+	 * Run the remote tick once per second (1Hz). This arbitrary
-+	 * frequency is large enough to avoid overload but short enough
-+	 * to keep scheduler internal stats reasonably up to date.  But
-+	 * first update state to reflect hotplug activity if required.
-+	 */
-+	os = atomic_fetch_add_unless(&twork->state, -1, TICK_SCHED_REMOTE_RUNNING);
-+	WARN_ON_ONCE(os == TICK_SCHED_REMOTE_OFFLINE);
-+	if (os == TICK_SCHED_REMOTE_RUNNING)
-+		queue_delayed_work(system_unbound_wq, dwork, HZ);
-+}
-+
-+static void sched_tick_start(int cpu)
-+{
-+	int os;
-+	struct tick_work *twork;
-+
-+	if (housekeeping_cpu(cpu, HK_FLAG_TICK))
-+		return;
-+
-+	WARN_ON_ONCE(!tick_work_cpu);
-+
-+	twork = per_cpu_ptr(tick_work_cpu, cpu);
-+	os = atomic_xchg(&twork->state, TICK_SCHED_REMOTE_RUNNING);
-+	WARN_ON_ONCE(os == TICK_SCHED_REMOTE_RUNNING);
-+	if (os == TICK_SCHED_REMOTE_OFFLINE) {
-+		twork->cpu = cpu;
-+		INIT_DELAYED_WORK(&twork->work, sched_tick_remote);
-+		queue_delayed_work(system_unbound_wq, &twork->work, HZ);
-+	}
-+}
-+
-+#ifdef CONFIG_HOTPLUG_CPU
-+static void sched_tick_stop(int cpu)
-+{
-+	struct tick_work *twork;
-+
-+	if (housekeeping_cpu(cpu, HK_FLAG_TICK))
-+		return;
-+
-+	WARN_ON_ONCE(!tick_work_cpu);
-+
-+	twork = per_cpu_ptr(tick_work_cpu, cpu);
-+	cancel_delayed_work_sync(&twork->work);
-+}
-+#endif /* CONFIG_HOTPLUG_CPU */
-+
-+int __init sched_tick_offload_init(void)
-+{
-+	tick_work_cpu = alloc_percpu(struct tick_work);
-+	BUG_ON(!tick_work_cpu);
-+	return 0;
-+}
-+
-+#else /* !CONFIG_NO_HZ_FULL */
-+static inline void sched_tick_start(int cpu) { }
-+static inline void sched_tick_stop(int cpu) { }
-+#endif
-+
-+#if defined(CONFIG_PREEMPTION) && (defined(CONFIG_DEBUG_PREEMPT) || \
-+				defined(CONFIG_PREEMPT_TRACER))
-+/*
-+ * If the value passed in is equal to the current preempt count
-+ * then we just disabled preemption. Start timing the latency.
-+ */
-+static inline void preempt_latency_start(int val)
-+{
-+	if (preempt_count() == val) {
-+		unsigned long ip = get_lock_parent_ip();
-+#ifdef CONFIG_DEBUG_PREEMPT
-+		current->preempt_disable_ip = ip;
-+#endif
-+		trace_preempt_off(CALLER_ADDR0, ip);
-+	}
-+}
-+
-+void preempt_count_add(int val)
-+{
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	/*
-+	 * Underflow?
-+	 */
-+	if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
-+		return;
-+#endif
-+	__preempt_count_add(val);
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	/*
-+	 * Spinlock count overflowing soon?
-+	 */
-+	DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
-+				PREEMPT_MASK - 10);
-+#endif
-+	preempt_latency_start(val);
-+}
-+EXPORT_SYMBOL(preempt_count_add);
-+NOKPROBE_SYMBOL(preempt_count_add);
-+
-+/*
-+ * If the value passed in equals to the current preempt count
-+ * then we just enabled preemption. Stop timing the latency.
-+ */
-+static inline void preempt_latency_stop(int val)
-+{
-+	if (preempt_count() == val)
-+		trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
-+}
-+
-+void preempt_count_sub(int val)
-+{
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	/*
-+	 * Underflow?
-+	 */
-+	if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
-+		return;
-+	/*
-+	 * Is the spinlock portion underflowing?
-+	 */
-+	if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) &&
-+			!(preempt_count() & PREEMPT_MASK)))
-+		return;
-+#endif
-+
-+	preempt_latency_stop(val);
-+	__preempt_count_sub(val);
-+}
-+EXPORT_SYMBOL(preempt_count_sub);
-+NOKPROBE_SYMBOL(preempt_count_sub);
-+
-+#else
-+static inline void preempt_latency_start(int val) { }
-+static inline void preempt_latency_stop(int val) { }
-+#endif
-+
-+/*
-+ * Timeslices below RESCHED_US are considered as good as expired as there's no
-+ * point rescheduling when there's so little time left. SCHED_BATCH tasks
-+ * have been flagged be not latency sensitive and likely to be fully CPU
-+ * bound so every time they're rescheduled they have their time_slice
-+ * refilled, but get a new later deadline to have little effect on
-+ * SCHED_NORMAL tasks.
-+
-+ */
-+static inline void check_deadline(struct task_struct *p, struct rq *rq)
-+{
-+	if (rq->idle == p)
-+		return;
-+
-+	pds_update_curr(rq, p);
-+
-+	if (p->time_slice < RESCHED_US) {
-+		time_slice_expired(p, rq);
-+		if (SCHED_ISO == p->policy && ISO_PRIO == p->prio) {
-+			p->prio = NORMAL_PRIO;
-+			p->deadline = rq->clock + task_deadline_diff(p);
-+			update_task_priodl(p);
-+		}
-+		if (SCHED_FIFO != p->policy && task_on_rq_queued(p))
-+			requeue_task(p, rq);
-+	}
-+}
-+
-+#ifdef	CONFIG_SMP
-+
-+#define SCHED_RQ_NR_MIGRATION (32UL)
-+/*
-+ * Migrate pending tasks in @rq to @dest_cpu
-+ * Will try to migrate mininal of half of @rq nr_running tasks and
-+ * SCHED_RQ_NR_MIGRATION to @dest_cpu
-+ */
-+static inline int
-+migrate_pending_tasks(struct rq *rq, struct rq *dest_rq, int filter_prio)
-+{
-+	struct task_struct *p;
-+	int dest_cpu = cpu_of(dest_rq);
-+	int nr_migrated = 0;
-+	int nr_tries = min((rq->nr_running + 1) / 2, SCHED_RQ_NR_MIGRATION);
-+	struct skiplist_node *node = rq->sl_header.next[0];
-+
-+	while (nr_tries && node != &rq->sl_header) {
-+		p = skiplist_entry(node, struct task_struct, sl_node);
-+		node = node->next[0];
-+
-+		if (task_running(p))
-+			continue;
-+		if (p->prio >= filter_prio)
-+			break;
-+		if (cpumask_test_cpu(dest_cpu, &p->cpus_mask)) {
-+			dequeue_task(p, rq, 0);
-+			set_task_cpu(p, dest_cpu);
-+			enqueue_task(p, dest_rq, 0);
-+			nr_migrated++;
-+		}
-+		nr_tries--;
-+		/* make a jump */
-+		if (node == &rq->sl_header)
-+			break;
-+		node = node->next[0];
-+	}
-+
-+	return nr_migrated;
-+}
-+
-+static inline int
-+take_queued_task_cpumask(struct rq *rq, cpumask_t *chk_mask, int filter_prio)
-+{
-+	int src_cpu;
-+
-+	for_each_cpu(src_cpu, chk_mask) {
-+		int nr_migrated;
-+		struct rq *src_rq = cpu_rq(src_cpu);
-+
-+		if (!do_raw_spin_trylock(&src_rq->lock)) {
-+			if (PRIO_LIMIT == filter_prio)
-+				continue;
-+			return 0;
-+		}
-+		spin_acquire(&src_rq->lock.dep_map, SINGLE_DEPTH_NESTING, 1, _RET_IP_);
-+
-+		update_rq_clock(src_rq);
-+		if ((nr_migrated = migrate_pending_tasks(src_rq, rq, filter_prio)))
-+			cpufreq_update_this_cpu(rq, 0);
-+
-+		spin_release(&src_rq->lock.dep_map, _RET_IP_);
-+		do_raw_spin_unlock(&src_rq->lock);
-+
-+		if (nr_migrated || PRIO_LIMIT != filter_prio)
-+			return nr_migrated;
-+	}
-+	return 0;
-+}
-+
-+static inline int take_other_rq_task(struct rq *rq, int cpu, int filter_prio)
-+{
-+	struct cpumask *affinity_mask, *end;
-+	struct cpumask chk;
-+
-+	if (PRIO_LIMIT == filter_prio) {
-+		cpumask_complement(&chk, &sched_rq_pending_masks[SCHED_RQ_EMPTY]);
-+#ifdef CONFIG_SMT_NICE
-+		{
-+		/* also try to take IDLE priority tasks from smt supressed cpu */
-+		struct cpumask t;
-+		if (cpumask_and(&t, &sched_smt_supressed_mask,
-+				&sched_rq_queued_masks[SCHED_RQ_IDLE]))
-+			cpumask_or(&chk, &chk, &t);
-+		}
-+#endif
-+	} else if (NORMAL_PRIO == filter_prio) {
-+		cpumask_or(&chk, &sched_rq_pending_masks[SCHED_RQ_RT],
-+			   &sched_rq_pending_masks[SCHED_RQ_ISO]);
-+	} else if (IDLE_PRIO == filter_prio) {
-+		cpumask_complement(&chk, &sched_rq_pending_masks[SCHED_RQ_EMPTY]);
-+		cpumask_andnot(&chk, &chk, &sched_rq_pending_masks[SCHED_RQ_IDLE]);
-+	} else
-+		cpumask_copy(&chk, &sched_rq_pending_masks[SCHED_RQ_RT]);
-+
-+	if (cpumask_empty(&chk))
-+		return 0;
-+
-+	affinity_mask = per_cpu(sched_cpu_llc_start_mask, cpu);
-+	end = per_cpu(sched_cpu_affinity_chk_end_masks, cpu);
-+	do {
-+		struct cpumask tmp;
-+
-+		if (cpumask_and(&tmp, &chk, affinity_mask) &&
-+		    take_queued_task_cpumask(rq, &tmp, filter_prio))
-+			return 1;
-+	} while (++affinity_mask < end);
-+
-+	return 0;
-+}
-+#endif
-+
-+static inline struct task_struct *
-+choose_next_task(struct rq *rq, int cpu, struct task_struct *prev)
-+{
-+	struct task_struct *next = rq_first_queued_task(rq);
-+
-+#ifdef CONFIG_SMT_NICE
-+	if (cpumask_test_cpu(cpu, &sched_smt_supressed_mask)) {
-+		if (next->prio >= IDLE_PRIO) {
-+			if (rq->online &&
-+			    take_other_rq_task(rq, cpu, IDLE_PRIO))
-+				return rq_first_queued_task(rq);
-+			return rq->idle;
-+		}
-+	}
-+#endif
-+
-+#ifdef	CONFIG_SMP
-+	if (likely(rq->online))
-+		if (take_other_rq_task(rq, cpu, next->prio)) {
-+			resched_curr(rq);
-+			return rq_first_queued_task(rq);
-+		}
-+#endif
-+	return next;
-+}
-+
-+static inline unsigned long get_preempt_disable_ip(struct task_struct *p)
-+{
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	return p->preempt_disable_ip;
-+#else
-+	return 0;
-+#endif
-+}
-+
-+/*
-+ * Print scheduling while atomic bug:
-+ */
-+static noinline void __schedule_bug(struct task_struct *prev)
-+{
-+	/* Save this before calling printk(), since that will clobber it */
-+	unsigned long preempt_disable_ip = get_preempt_disable_ip(current);
-+
-+	if (oops_in_progress)
-+		return;
-+
-+	printk(KERN_ERR "BUG: scheduling while atomic: %s/%d/0x%08x\n",
-+		prev->comm, prev->pid, preempt_count());
-+
-+	debug_show_held_locks(prev);
-+	print_modules();
-+	if (irqs_disabled())
-+		print_irqtrace_events(prev);
-+	if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)
-+	    && in_atomic_preempt_off()) {
-+		pr_err("Preemption disabled at:");
-+		print_ip_sym(KERN_ERR, preempt_disable_ip);
-+	}
-+	if (panic_on_warn)
-+		panic("scheduling while atomic\n");
-+
-+	dump_stack();
-+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+}
-+
-+/*
-+ * Various schedule()-time debugging checks and statistics:
-+ */
-+static inline void schedule_debug(struct task_struct *prev, bool preempt)
-+{
-+#ifdef CONFIG_SCHED_STACK_END_CHECK
-+	if (task_stack_end_corrupted(prev))
-+		panic("corrupted stack end detected inside scheduler\n");
-+#endif
-+
-+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-+	if (!preempt && prev->state && prev->non_block_count) {
-+		printk(KERN_ERR "BUG: scheduling in a non-blocking section: %s/%d/%i\n",
-+			prev->comm, prev->pid, prev->non_block_count);
-+		dump_stack();
-+		add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+	}
-+#endif
-+
-+	if (unlikely(in_atomic_preempt_off())) {
-+		__schedule_bug(prev);
-+		preempt_count_set(PREEMPT_DISABLED);
-+	}
-+	rcu_sleep_check();
-+
-+	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
-+
-+	schedstat_inc(this_rq()->sched_count);
-+}
-+
-+static inline void set_rq_task(struct rq *rq, struct task_struct *p)
-+{
-+	p->last_ran = rq->clock_task;
-+
-+#ifdef CONFIG_HIGH_RES_TIMERS
-+	if (p != rq->idle)
-+		hrtick_start(rq, US_TO_NS(p->time_slice));
-+#endif
-+	/* update rq->dither */
-+	rq->dither = rq_dither(rq);
-+}
-+
-+/*
-+ * schedule() is the main scheduler function.
-+ *
-+ * The main means of driving the scheduler and thus entering this function are:
-+ *
-+ *   1. Explicit blocking: mutex, semaphore, waitqueue, etc.
-+ *
-+ *   2. TIF_NEED_RESCHED flag is checked on interrupt and userspace return
-+ *      paths. For example, see arch/x86/entry_64.S.
-+ *
-+ *      To drive preemption between tasks, the scheduler sets the flag in timer
-+ *      interrupt handler scheduler_tick().
-+ *
-+ *   3. Wakeups don't really cause entry into schedule(). They add a
-+ *      task to the run-queue and that's it.
-+ *
-+ *      Now, if the new task added to the run-queue preempts the current
-+ *      task, then the wakeup sets TIF_NEED_RESCHED and schedule() gets
-+ *      called on the nearest possible occasion:
-+ *
-+ *       - If the kernel is preemptible (CONFIG_PREEMPTION=y):
-+ *
-+ *         - in syscall or exception context, at the next outmost
-+ *           preempt_enable(). (this might be as soon as the wake_up()'s
-+ *           spin_unlock()!)
-+ *
-+ *         - in IRQ context, return from interrupt-handler to
-+ *           preemptible context
-+ *
-+ *       - If the kernel is not preemptible (CONFIG_PREEMPT is not set)
-+ *         then at the next:
-+ *
-+ *          - cond_resched() call
-+ *          - explicit schedule() call
-+ *          - return from syscall or exception to user-space
-+ *          - return from interrupt-handler to user-space
-+ *
-+ * WARNING: must be called with preemption disabled!
-+ */
-+static void __sched notrace __schedule(bool preempt)
-+{
-+	struct task_struct *prev, *next;
-+	unsigned long *switch_count;
-+	struct rq *rq;
-+	int cpu;
-+
-+	cpu = smp_processor_id();
-+	rq = cpu_rq(cpu);
-+	prev = rq->curr;
-+
-+	schedule_debug(prev, preempt);
-+
-+	/* by passing sched_feat(HRTICK) checking which PDS doesn't support */
-+	hrtick_clear(rq);
-+
-+	local_irq_disable();
-+	rcu_note_context_switch(preempt);
-+
-+	/*
-+	 * Make sure that signal_pending_state()->signal_pending() below
-+	 * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
-+	 * done by the caller to avoid the race with signal_wake_up().
-+	 *
-+	 * The membarrier system call requires a full memory barrier
-+	 * after coming from user-space, before storing to rq->curr.
-+	 */
-+	raw_spin_lock(&rq->lock);
-+	smp_mb__after_spinlock();
-+
-+	update_rq_clock(rq);
-+
-+	switch_count = &prev->nivcsw;
-+	if (!preempt && prev->state) {
-+		if (signal_pending_state(prev->state, prev)) {
-+			prev->state = TASK_RUNNING;
-+		} else {
-+			deactivate_task(prev, rq);
-+
-+			if (prev->in_iowait) {
-+				atomic_inc(&rq->nr_iowait);
-+				delayacct_blkio_start();
-+			}
-+		}
-+		switch_count = &prev->nvcsw;
-+	}
-+
-+	clear_tsk_need_resched(prev);
-+	clear_preempt_need_resched();
-+
-+	check_deadline(prev, rq);
-+
-+	next = choose_next_task(rq, cpu, prev);
-+
-+	set_rq_task(rq, next);
-+
-+	if (prev != next) {
-+		if (next->prio == PRIO_LIMIT)
-+			schedstat_inc(rq->sched_goidle);
-+
-+		/*
-+		 * RCU users of rcu_dereference(rq->curr) may not see
-+		 * changes to task_struct made by pick_next_task().
-+		 */
-+		RCU_INIT_POINTER(rq->curr, next);
-+		/*
-+		 * The membarrier system call requires each architecture
-+		 * to have a full memory barrier after updating
-+		 * rq->curr, before returning to user-space.
-+		 *
-+		 * Here are the schemes providing that barrier on the
-+		 * various architectures:
-+		 * - mm ? switch_mm() : mmdrop() for x86, s390, sparc, PowerPC.
-+		 *   switch_mm() rely on membarrier_arch_switch_mm() on PowerPC.
-+		 * - finish_lock_switch() for weakly-ordered
-+		 *   architectures where spin_unlock is a full barrier,
-+		 * - switch_to() for arm64 (weakly-ordered, spin_unlock
-+		 *   is a RELEASE barrier),
-+		 */
-+		++*switch_count;
-+		rq->nr_switches++;
-+
-+		psi_sched_switch(prev, next, !task_on_rq_queued(prev));
-+
-+		trace_sched_switch(preempt, prev, next);
-+
-+		/* Also unlocks the rq: */
-+		rq = context_switch(rq, prev, next);
-+#ifdef CONFIG_SCHED_SMT
-+		pds_sg_balance_check(rq);
-+#endif
-+	} else
-+		raw_spin_unlock_irq(&rq->lock);
-+}
-+
-+void __noreturn do_task_dead(void)
-+{
-+	/* Causes final put_task_struct in finish_task_switch(): */
-+	set_special_state(TASK_DEAD);
-+
-+	/* Tell freezer to ignore us: */
-+	current->flags |= PF_NOFREEZE;
-+	__schedule(false);
-+
-+	BUG();
-+
-+	/* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */
-+	for (;;)
-+		cpu_relax();
-+}
-+
-+static inline void sched_submit_work(struct task_struct *tsk)
-+{
-+	if (!tsk->state || tsk_is_pi_blocked(tsk) ||
-+	    signal_pending_state(tsk->state, tsk))
-+		return;
-+
-+	/*
-+	 * If a worker went to sleep, notify and ask workqueue whether
-+	 * it wants to wake up a task to maintain concurrency.
-+	 * As this function is called inside the schedule() context,
-+	 * we disable preemption to avoid it calling schedule() again
-+	 * in the possible wakeup of a kworker.
-+	 */
-+	if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) {
-+		preempt_disable();
-+		if (tsk->flags & PF_WQ_WORKER)
-+			wq_worker_sleeping(tsk);
-+		else
-+			io_wq_worker_sleeping(tsk);
-+		preempt_enable_no_resched();
-+	}
-+
-+	/*
-+	 * If we are going to sleep and we have plugged IO queued,
-+	 * make sure to submit it to avoid deadlocks.
-+	 */
-+	if (blk_needs_flush_plug(tsk))
-+		blk_schedule_flush_plug(tsk);
-+}
-+
-+static void sched_update_worker(struct task_struct *tsk)
-+{
-+	if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) {
-+		if (tsk->flags & PF_WQ_WORKER)
-+			wq_worker_running(tsk);
-+		else
-+			io_wq_worker_running(tsk);
-+	}
-+}
-+
-+asmlinkage __visible void __sched schedule(void)
-+{
-+	struct task_struct *tsk = current;
-+
-+	sched_submit_work(tsk);
-+	do {
-+		preempt_disable();
-+		__schedule(false);
-+		sched_preempt_enable_no_resched();
-+	} while (need_resched());
-+    sched_update_worker(tsk);
-+}
-+EXPORT_SYMBOL(schedule);
-+
-+/*
-+ * synchronize_rcu_tasks() makes sure that no task is stuck in preempted
-+ * state (have scheduled out non-voluntarily) by making sure that all
-+ * tasks have either left the run queue or have gone into user space.
-+ * As idle tasks do not do either, they must not ever be preempted
-+ * (schedule out non-voluntarily).
-+ *
-+ * schedule_idle() is similar to schedule_preempt_disable() except that it
-+ * never enables preemption because it does not call sched_submit_work().
-+ */
-+void __sched schedule_idle(void)
-+{
-+	/*
-+	 * As this skips calling sched_submit_work(), which the idle task does
-+	 * regardless because that function is a nop when the task is in a
-+	 * TASK_RUNNING state, make sure this isn't used someplace that the
-+	 * current task can be in any other state. Note, idle is always in the
-+	 * TASK_RUNNING state.
-+	 */
-+	WARN_ON_ONCE(current->state);
-+	do {
-+		__schedule(false);
-+	} while (need_resched());
-+}
-+
-+#ifdef CONFIG_CONTEXT_TRACKING
-+asmlinkage __visible void __sched schedule_user(void)
-+{
-+	/*
-+	 * If we come here after a random call to set_need_resched(),
-+	 * or we have been woken up remotely but the IPI has not yet arrived,
-+	 * we haven't yet exited the RCU idle mode. Do it here manually until
-+	 * we find a better solution.
-+	 *
-+	 * NB: There are buggy callers of this function.  Ideally we
-+	 * should warn if prev_state != CONTEXT_USER, but that will trigger
-+	 * too frequently to make sense yet.
-+	 */
-+	enum ctx_state prev_state = exception_enter();
-+	schedule();
-+	exception_exit(prev_state);
-+}
-+#endif
-+
-+/**
-+ * schedule_preempt_disabled - called with preemption disabled
-+ *
-+ * Returns with preemption disabled. Note: preempt_count must be 1
-+ */
-+void __sched schedule_preempt_disabled(void)
-+{
-+	sched_preempt_enable_no_resched();
-+	schedule();
-+	preempt_disable();
-+}
-+
-+static void __sched notrace preempt_schedule_common(void)
-+{
-+	do {
-+		/*
-+		 * Because the function tracer can trace preempt_count_sub()
-+		 * and it also uses preempt_enable/disable_notrace(), if
-+		 * NEED_RESCHED is set, the preempt_enable_notrace() called
-+		 * by the function tracer will call this function again and
-+		 * cause infinite recursion.
-+		 *
-+		 * Preemption must be disabled here before the function
-+		 * tracer can trace. Break up preempt_disable() into two
-+		 * calls. One to disable preemption without fear of being
-+		 * traced. The other to still record the preemption latency,
-+		 * which can also be traced by the function tracer.
-+		 */
-+		preempt_disable_notrace();
-+		preempt_latency_start(1);
-+		__schedule(true);
-+		preempt_latency_stop(1);
-+		preempt_enable_no_resched_notrace();
-+
-+		/*
-+		 * Check again in case we missed a preemption opportunity
-+		 * between schedule and now.
-+		 */
-+	} while (need_resched());
-+}
-+
-+#ifdef CONFIG_PREEMPTION
-+/*
-+ * This is the entry point to schedule() from in-kernel preemption
-+ * off of preempt_enable.
-+ */
-+asmlinkage __visible void __sched notrace preempt_schedule(void)
-+{
-+	/*
-+	 * If there is a non-zero preempt_count or interrupts are disabled,
-+	 * we do not want to preempt the current task. Just return..
-+	 */
-+	if (likely(!preemptible()))
-+		return;
-+
-+	preempt_schedule_common();
-+}
-+NOKPROBE_SYMBOL(preempt_schedule);
-+EXPORT_SYMBOL(preempt_schedule);
-+
-+/**
-+ * preempt_schedule_notrace - preempt_schedule called by tracing
-+ *
-+ * The tracing infrastructure uses preempt_enable_notrace to prevent
-+ * recursion and tracing preempt enabling caused by the tracing
-+ * infrastructure itself. But as tracing can happen in areas coming
-+ * from userspace or just about to enter userspace, a preempt enable
-+ * can occur before user_exit() is called. This will cause the scheduler
-+ * to be called when the system is still in usermode.
-+ *
-+ * To prevent this, the preempt_enable_notrace will use this function
-+ * instead of preempt_schedule() to exit user context if needed before
-+ * calling the scheduler.
-+ */
-+asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
-+{
-+	enum ctx_state prev_ctx;
-+
-+	if (likely(!preemptible()))
-+		return;
-+
-+	do {
-+		/*
-+		 * Because the function tracer can trace preempt_count_sub()
-+		 * and it also uses preempt_enable/disable_notrace(), if
-+		 * NEED_RESCHED is set, the preempt_enable_notrace() called
-+		 * by the function tracer will call this function again and
-+		 * cause infinite recursion.
-+		 *
-+		 * Preemption must be disabled here before the function
-+		 * tracer can trace. Break up preempt_disable() into two
-+		 * calls. One to disable preemption without fear of being
-+		 * traced. The other to still record the preemption latency,
-+		 * which can also be traced by the function tracer.
-+		 */
-+		preempt_disable_notrace();
-+		preempt_latency_start(1);
-+		/*
-+		 * Needs preempt disabled in case user_exit() is traced
-+		 * and the tracer calls preempt_enable_notrace() causing
-+		 * an infinite recursion.
-+		 */
-+		prev_ctx = exception_enter();
-+		__schedule(true);
-+		exception_exit(prev_ctx);
-+
-+		preempt_latency_stop(1);
-+		preempt_enable_no_resched_notrace();
-+	} while (need_resched());
-+}
-+EXPORT_SYMBOL_GPL(preempt_schedule_notrace);
-+
-+#endif /* CONFIG_PREEMPTION */
-+
-+/*
-+ * This is the entry point to schedule() from kernel preemption
-+ * off of irq context.
-+ * Note, that this is called and return with irqs disabled. This will
-+ * protect us against recursive calling from irq.
-+ */
-+asmlinkage __visible void __sched preempt_schedule_irq(void)
-+{
-+	enum ctx_state prev_state;
-+
-+	/* Catch callers which need to be fixed */
-+	BUG_ON(preempt_count() || !irqs_disabled());
-+
-+	prev_state = exception_enter();
-+
-+	do {
-+		preempt_disable();
-+		local_irq_enable();
-+		__schedule(true);
-+		local_irq_disable();
-+		sched_preempt_enable_no_resched();
-+	} while (need_resched());
-+
-+	exception_exit(prev_state);
-+}
-+
-+int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags,
-+			  void *key)
-+{
-+	return try_to_wake_up(curr->private, mode, wake_flags);
-+}
-+EXPORT_SYMBOL(default_wake_function);
-+
-+static inline void
-+check_task_changed(struct rq *rq, struct task_struct *p)
-+{
-+	/*
-+	 * Trigger changes when task priority/deadline modified.
-+	 */
-+	if (task_on_rq_queued(p)) {
-+		struct task_struct *first;
-+
-+		requeue_task(p, rq);
-+
-+		/* Resched if first queued task not running and not IDLE */
-+		if ((first = rq_first_queued_task(rq)) != rq->curr &&
-+		    !task_running_idle(first))
-+			resched_curr(rq);
-+	}
-+}
-+
-+#ifdef CONFIG_RT_MUTEXES
-+
-+static inline int __rt_effective_prio(struct task_struct *pi_task, int prio)
-+{
-+	if (pi_task)
-+		prio = min(prio, pi_task->prio);
-+
-+	return prio;
-+}
-+
-+static inline int rt_effective_prio(struct task_struct *p, int prio)
-+{
-+	struct task_struct *pi_task = rt_mutex_get_top_task(p);
-+
-+	return __rt_effective_prio(pi_task, prio);
-+}
-+
-+/*
-+ * rt_mutex_setprio - set the current priority of a task
-+ * @p: task to boost
-+ * @pi_task: donor task
-+ *
-+ * This function changes the 'effective' priority of a task. It does
-+ * not touch ->normal_prio like __setscheduler().
-+ *
-+ * Used by the rt_mutex code to implement priority inheritance
-+ * logic. Call site only calls if the priority of the task changed.
-+ */
-+void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
-+{
-+	int prio;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+
-+	/* XXX used to be waiter->prio, not waiter->task->prio */
-+	prio = __rt_effective_prio(pi_task, p->normal_prio);
-+
-+	/*
-+	 * If nothing changed; bail early.
-+	 */
-+	if (p->pi_top_task == pi_task && prio == p->prio)
-+		return;
-+
-+	rq = __task_access_lock(p, &lock);
-+	/*
-+	 * Set under pi_lock && rq->lock, such that the value can be used under
-+	 * either lock.
-+	 *
-+	 * Note that there is loads of tricky to make this pointer cache work
-+	 * right. rt_mutex_slowunlock()+rt_mutex_postunlock() work together to
-+	 * ensure a task is de-boosted (pi_task is set to NULL) before the
-+	 * task is allowed to run again (and can exit). This ensures the pointer
-+	 * points to a blocked task -- which guaratees the task is present.
-+	 */
-+	p->pi_top_task = pi_task;
-+
-+	/*
-+	 * For FIFO/RR we only need to set prio, if that matches we're done.
-+	 */
-+	if (prio == p->prio)
-+		goto out_unlock;
-+
-+	/*
-+	 * Idle task boosting is a nono in general. There is one
-+	 * exception, when PREEMPT_RT and NOHZ is active:
-+	 *
-+	 * The idle task calls get_next_timer_interrupt() and holds
-+	 * the timer wheel base->lock on the CPU and another CPU wants
-+	 * to access the timer (probably to cancel it). We can safely
-+	 * ignore the boosting request, as the idle CPU runs this code
-+	 * with interrupts disabled and will complete the lock
-+	 * protected section without being interrupted. So there is no
-+	 * real need to boost.
-+	 */
-+	if (unlikely(p == rq->idle)) {
-+		WARN_ON(p != rq->curr);
-+		WARN_ON(p->pi_blocked_on);
-+		goto out_unlock;
-+	}
-+
-+	trace_sched_pi_setprio(p, pi_task);
-+	p->prio = prio;
-+	update_task_priodl(p);
-+
-+	check_task_changed(rq, p);
-+
-+out_unlock:
-+	__task_access_unlock(p, lock);
-+}
-+#else
-+static inline int rt_effective_prio(struct task_struct *p, int prio)
-+{
-+	return prio;
-+}
-+#endif
-+
-+void set_user_nice(struct task_struct *p, long nice)
-+{
-+	int new_static;
-+	unsigned long flags;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+
-+	if (task_nice(p) == nice || nice < MIN_NICE || nice > MAX_NICE)
-+		return;
-+	new_static = NICE_TO_PRIO(nice);
-+	/*
-+	 * We have to be careful, if called from sys_setpriority(),
-+	 * the task might be in the middle of scheduling on another CPU.
-+	 */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	rq = __task_access_lock(p, &lock);
-+
-+	/* rq lock may not held!! */
-+	update_rq_clock(rq);
-+
-+	p->static_prio = new_static;
-+	/*
-+	 * The RT priorities are set via sched_setscheduler(), but we still
-+	 * allow the 'normal' nice value to be set - but as expected
-+	 * it wont have any effect on scheduling until the task is
-+	 * not SCHED_NORMAL/SCHED_BATCH:
-+	 */
-+	if (task_has_rt_policy(p))
-+		goto out_unlock;
-+
-+	p->deadline -= task_deadline_diff(p);
-+	p->deadline += static_deadline_diff(new_static);
-+	p->prio = effective_prio(p);
-+	update_task_priodl(p);
-+
-+	check_task_changed(rq, p);
-+out_unlock:
-+	__task_access_unlock(p, lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+}
-+EXPORT_SYMBOL(set_user_nice);
-+
-+/*
-+ * can_nice - check if a task can reduce its nice value
-+ * @p: task
-+ * @nice: nice value
-+ */
-+int can_nice(const struct task_struct *p, const int nice)
-+{
-+	/* Convert nice value [19,-20] to rlimit style value [1,40] */
-+	int nice_rlim = nice_to_rlimit(nice);
-+
-+	return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) ||
-+		capable(CAP_SYS_NICE));
-+}
-+
-+#ifdef __ARCH_WANT_SYS_NICE
-+
-+/*
-+ * sys_nice - change the priority of the current process.
-+ * @increment: priority increment
-+ *
-+ * sys_setpriority is a more generic, but much slower function that
-+ * does similar things.
-+ */
-+SYSCALL_DEFINE1(nice, int, increment)
-+{
-+	long nice, retval;
-+
-+	/*
-+	 * Setpriority might change our priority at the same moment.
-+	 * We don't have to worry. Conceptually one call occurs first
-+	 * and we have a single winner.
-+	 */
-+
-+	increment = clamp(increment, -NICE_WIDTH, NICE_WIDTH);
-+	nice = task_nice(current) + increment;
-+
-+	nice = clamp_val(nice, MIN_NICE, MAX_NICE);
-+	if (increment < 0 && !can_nice(current, nice))
-+		return -EPERM;
-+
-+	retval = security_task_setnice(current, nice);
-+	if (retval)
-+		return retval;
-+
-+	set_user_nice(current, nice);
-+	return 0;
-+}
-+
-+#endif
-+
-+/**
-+ * task_prio - return the priority value of a given task.
-+ * @p: the task in question.
-+ *
-+ * Return: The priority value as seen by users in /proc.
-+ * RT tasks are offset by -100. Normal tasks are centered around 1, value goes
-+ * from 0(SCHED_ISO) up to 82 (nice +19 SCHED_IDLE).
-+ */
-+int task_prio(const struct task_struct *p)
-+{
-+	int level, prio = p->prio - MAX_RT_PRIO;
-+	static const int level_to_nice_prio[] = {39, 33, 26, 20, 14, 7, 0, 0};
-+
-+	/* rt tasks */
-+	if (prio <= 0)
-+		goto out;
-+
-+	preempt_disable();
-+	level = task_deadline_level(p, this_rq());
-+	preempt_enable();
-+	prio += level_to_nice_prio[level];
-+	if (idleprio_task(p))
-+		prio += NICE_WIDTH;
-+out:
-+	return prio;
-+}
-+
-+/**
-+ * idle_cpu - is a given CPU idle currently?
-+ * @cpu: the processor in question.
-+ *
-+ * Return: 1 if the CPU is currently idle. 0 otherwise.
-+ */
-+int idle_cpu(int cpu)
-+{
-+	return cpu_curr(cpu) == cpu_rq(cpu)->idle;
-+}
-+
-+/**
-+ * idle_task - return the idle task for a given CPU.
-+ * @cpu: the processor in question.
-+ *
-+ * Return: The idle task for the cpu @cpu.
-+ */
-+struct task_struct *idle_task(int cpu)
-+{
-+	return cpu_rq(cpu)->idle;
-+}
-+
-+/**
-+ * find_process_by_pid - find a process with a matching PID value.
-+ * @pid: the pid in question.
-+ *
-+ * The task of @pid, if found. %NULL otherwise.
-+ */
-+static inline struct task_struct *find_process_by_pid(pid_t pid)
-+{
-+	return pid ? find_task_by_vpid(pid) : current;
-+}
-+
-+#ifdef CONFIG_SMP
-+void sched_set_stop_task(int cpu, struct task_struct *stop)
-+{
-+	struct sched_param stop_param = { .sched_priority = STOP_PRIO };
-+	struct sched_param start_param = { .sched_priority = 0 };
-+	struct task_struct *old_stop = cpu_rq(cpu)->stop;
-+
-+	if (stop) {
-+		/*
-+		 * Make it appear like a SCHED_FIFO task, its something
-+		 * userspace knows about and won't get confused about.
-+		 *
-+		 * Also, it will make PI more or less work without too
-+		 * much confusion -- but then, stop work should not
-+		 * rely on PI working anyway.
-+		 */
-+		sched_setscheduler_nocheck(stop, SCHED_FIFO, &stop_param);
-+	}
-+
-+	cpu_rq(cpu)->stop = stop;
-+
-+	if (old_stop) {
-+		/*
-+		 * Reset it back to a normal scheduling policy so that
-+		 * it can die in pieces.
-+		 */
-+		sched_setscheduler_nocheck(old_stop, SCHED_NORMAL, &start_param);
-+	}
-+}
-+
-+/*
-+ * Change a given task's CPU affinity. Migrate the thread to a
-+ * proper CPU and schedule it away if the CPU it's executing on
-+ * is removed from the allowed bitmask.
-+ *
-+ * NOTE: the caller must have a valid reference to the task, the
-+ * task must not exit() & deallocate itself prematurely. The
-+ * call is not atomic; no spinlocks may be held.
-+ */
-+static int __set_cpus_allowed_ptr(struct task_struct *p,
-+				  const struct cpumask *new_mask, bool check)
-+{
-+	const struct cpumask *cpu_valid_mask = cpu_active_mask;
-+	int dest_cpu;
-+	unsigned long flags;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+	int ret = 0;
-+
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	rq = __task_access_lock(p, &lock);
-+
-+	if (p->flags & PF_KTHREAD) {
-+		/*
-+		 * Kernel threads are allowed on online && !active CPUs
-+		 */
-+		cpu_valid_mask = cpu_online_mask;
-+	}
-+
-+	/*
-+	 * Must re-check here, to close a race against __kthread_bind(),
-+	 * sched_setaffinity() is not guaranteed to observe the flag.
-+	 */
-+	if (check && (p->flags & PF_NO_SETAFFINITY)) {
-+		ret = -EINVAL;
-+		goto out;
-+	}
-+
-+	if (cpumask_equal(&p->cpus_mask, new_mask))
-+		goto out;
-+
-+	dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
-+	if (dest_cpu >= nr_cpu_ids) {
-+		ret = -EINVAL;
-+		goto out;
-+	}
-+
-+	do_set_cpus_allowed(p, new_mask);
-+
-+	if (p->flags & PF_KTHREAD) {
-+		/*
-+		 * For kernel threads that do indeed end up on online &&
-+		 * !active we want to ensure they are strict per-CPU threads.
-+		 */
-+		WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) &&
-+			!cpumask_intersects(new_mask, cpu_active_mask) &&
-+			p->nr_cpus_allowed != 1);
-+	}
-+
-+	/* Can the task run on the task's current CPU? If so, we're done */
-+	if (cpumask_test_cpu(task_cpu(p), new_mask))
-+		goto out;
-+
-+	if (task_running(p) || p->state == TASK_WAKING) {
-+		struct migration_arg arg = { p, dest_cpu };
-+
-+		/* Need help from migration thread: drop lock and wait. */
-+		__task_access_unlock(p, lock);
-+		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+		stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
-+		return 0;
-+	}
-+	if (task_on_rq_queued(p)) {
-+		/*
-+		 * OK, since we're going to drop the lock immediately
-+		 * afterwards anyway.
-+		 */
-+		update_rq_clock(rq);
-+		rq = move_queued_task(rq, p, dest_cpu);
-+		lock = &rq->lock;
-+	}
-+
-+out:
-+	__task_access_unlock(p, lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+
-+	return ret;
-+}
-+
-+int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	return __set_cpus_allowed_ptr(p, new_mask, false);
-+}
-+EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
-+
-+#else
-+static inline int
-+__set_cpus_allowed_ptr(struct task_struct *p,
-+		       const struct cpumask *new_mask, bool check)
-+{
-+	return set_cpus_allowed_ptr(p, new_mask);
-+}
-+#endif
-+
-+static u64 task_init_deadline(const struct task_struct *p)
-+{
-+	return task_rq(p)->clock + task_deadline_diff(p);
-+}
-+
-+u64 (* task_init_deadline_func_tbl[])(const struct task_struct *p) = {
-+	task_init_deadline,	/* SCHED_NORMAL */
-+	NULL,			/* SCHED_FIFO */
-+	NULL,			/* SCHED_RR */
-+	task_init_deadline,	/* SCHED_BATCH */
-+	NULL,			/* SCHED_ISO */
-+	task_init_deadline	/* SCHED_IDLE */
-+};
-+
-+/*
-+ * sched_setparam() passes in -1 for its policy, to let the functions
-+ * it calls know not to change it.
-+ */
-+#define SETPARAM_POLICY -1
-+
-+static void __setscheduler_params(struct task_struct *p,
-+		const struct sched_attr *attr)
-+{
-+	int old_policy = p->policy;
-+	int policy = attr->sched_policy;
-+
-+	if (policy == SETPARAM_POLICY)
-+		policy = p->policy;
-+
-+	p->policy = policy;
-+
-+	/*
-+	 * allow normal nice value to be set, but will not have any
-+	 * effect on scheduling until the task not SCHED_NORMAL/
-+	 * SCHED_BATCH
-+	 */
-+	p->static_prio = NICE_TO_PRIO(attr->sched_nice);
-+
-+	/*
-+	 * __sched_setscheduler() ensures attr->sched_priority == 0 when
-+	 * !rt_policy. Always setting this ensures that things like
-+	 * getparam()/getattr() don't report silly values for !rt tasks.
-+	 */
-+	p->rt_priority = attr->sched_priority;
-+	p->normal_prio = normal_prio(p);
-+
-+	if (old_policy != policy)
-+		p->deadline = (task_init_deadline_func_tbl[p->policy])?
-+			task_init_deadline_func_tbl[p->policy](p):0ULL;
-+}
-+
-+/* Actually do priority change: must hold rq lock. */
-+static void __setscheduler(struct rq *rq, struct task_struct *p,
-+			   const struct sched_attr *attr, bool keep_boost)
-+{
-+	__setscheduler_params(p, attr);
-+
-+	/*
-+	 * Keep a potential priority boosting if called from
-+	 * sched_setscheduler().
-+	 */
-+	p->prio = normal_prio(p);
-+	if (keep_boost)
-+		p->prio = rt_effective_prio(p, p->prio);
-+	update_task_priodl(p);
-+}
-+
-+/*
-+ * check the target process has a UID that matches the current process's
-+ */
-+static bool check_same_owner(struct task_struct *p)
-+{
-+	const struct cred *cred = current_cred(), *pcred;
-+	bool match;
-+
-+	rcu_read_lock();
-+	pcred = __task_cred(p);
-+	match = (uid_eq(cred->euid, pcred->euid) ||
-+		 uid_eq(cred->euid, pcred->uid));
-+	rcu_read_unlock();
-+	return match;
-+}
-+
-+static int
-+__sched_setscheduler(struct task_struct *p,
-+		     const struct sched_attr *attr, bool user, bool pi)
-+{
-+	const struct sched_attr dl_squash_attr = {
-+		.size		= sizeof(struct sched_attr),
-+		.sched_policy	= SCHED_FIFO,
-+		.sched_nice	= 0,
-+		.sched_priority = 99,
-+	};
-+	int newprio = MAX_RT_PRIO - 1 - attr->sched_priority;
-+	int retval, oldpolicy = -1;
-+	int policy = attr->sched_policy;
-+	unsigned long flags;
-+	struct rq *rq;
-+	int reset_on_fork;
-+	raw_spinlock_t *lock;
-+
-+	/* The pi code expects interrupts enabled */
-+	BUG_ON(pi && in_interrupt());
-+
-+	/*
-+	 * PDS supports SCHED_DEADLINE by squash it as prio 0 SCHED_FIFO
-+	 */
-+	if (unlikely(SCHED_DEADLINE == policy)) {
-+		attr = &dl_squash_attr;
-+		policy = attr->sched_policy;
-+		newprio = MAX_RT_PRIO - 1 - attr->sched_priority;
-+	}
-+recheck:
-+	/* Double check policy once rq lock held */
-+	if (policy < 0) {
-+		reset_on_fork = p->sched_reset_on_fork;
-+		policy = oldpolicy = p->policy;
-+	} else {
-+		reset_on_fork = !!(attr->sched_flags & SCHED_RESET_ON_FORK);
-+
-+		if (policy > SCHED_IDLE)
-+			return -EINVAL;
-+	}
-+
-+	if (attr->sched_flags & ~(SCHED_FLAG_ALL))
-+		return -EINVAL;
-+
-+	/*
-+	 * Valid priorities for SCHED_FIFO and SCHED_RR are
-+	 * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL and
-+	 * SCHED_BATCH and SCHED_IDLE is 0.
-+	 */
-+	if (attr->sched_priority < 0 ||
-+	    (p->mm && attr->sched_priority > MAX_USER_RT_PRIO - 1) ||
-+	    (!p->mm && attr->sched_priority > MAX_RT_PRIO - 1))
-+		return -EINVAL;
-+	if ((SCHED_RR == policy || SCHED_FIFO == policy) !=
-+	    (attr->sched_priority != 0))
-+		return -EINVAL;
-+
-+	/*
-+	 * Allow unprivileged RT tasks to decrease priority:
-+	 */
-+	if (user && !capable(CAP_SYS_NICE)) {
-+		if (SCHED_FIFO == policy || SCHED_RR == policy) {
-+			unsigned long rlim_rtprio =
-+					task_rlimit(p, RLIMIT_RTPRIO);
-+
-+			/* Can't set/change the rt policy */
-+			if (policy != p->policy && !rlim_rtprio)
-+				return -EPERM;
-+
-+			/* Can't increase priority */
-+			if (attr->sched_priority > p->rt_priority &&
-+			    attr->sched_priority > rlim_rtprio)
-+				return -EPERM;
-+		}
-+
-+		/* Can't change other user's priorities */
-+		if (!check_same_owner(p))
-+			return -EPERM;
-+
-+		/* Normal users shall not reset the sched_reset_on_fork flag */
-+		if (p->sched_reset_on_fork && !reset_on_fork)
-+			return -EPERM;
-+	}
-+
-+	if (user) {
-+		retval = security_task_setscheduler(p);
-+		if (retval)
-+			return retval;
-+	}
-+
-+	if (pi)
-+		cpuset_read_lock();
-+
-+	/*
-+	 * Make sure no PI-waiters arrive (or leave) while we are
-+	 * changing the priority of the task:
-+	 */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+
-+	/*
-+	 * To be able to change p->policy safely, task_access_lock()
-+	 * must be called.
-+	 * IF use task_access_lock() here:
-+	 * For the task p which is not running, reading rq->stop is
-+	 * racy but acceptable as ->stop doesn't change much.
-+	 * An enhancemnet can be made to read rq->stop saftly.
-+	 */
-+	rq = __task_access_lock(p, &lock);
-+
-+	/*
-+	 * Changing the policy of the stop threads its a very bad idea
-+	 */
-+	if (p == rq->stop) {
-+		retval = -EINVAL;
-+		goto unlock;
-+	}
-+
-+	/*
-+	 * If not changing anything there's no need to proceed further:
-+	 */
-+	if (unlikely(policy == p->policy)) {
-+		if (rt_policy(policy) && attr->sched_priority != p->rt_priority)
-+			goto change;
-+		if (!rt_policy(policy) &&
-+		    NICE_TO_PRIO(attr->sched_nice) != p->static_prio)
-+			goto change;
-+
-+		p->sched_reset_on_fork = reset_on_fork;
-+		retval = 0;
-+		goto unlock;
-+	}
-+change:
-+
-+	/* Re-check policy now with rq lock held */
-+	if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
-+		policy = oldpolicy = -1;
-+		__task_access_unlock(p, lock);
-+		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+		if (pi)
-+			cpuset_read_unlock();
-+		goto recheck;
-+	}
-+
-+	p->sched_reset_on_fork = reset_on_fork;
-+
-+	if (pi) {
-+		/*
-+		 * Take priority boosted tasks into account. If the new
-+		 * effective priority is unchanged, we just store the new
-+		 * normal parameters and do not touch the scheduler class and
-+		 * the runqueue. This will be done when the task deboost
-+		 * itself.
-+		 */
-+		if (rt_effective_prio(p, newprio) == p->prio) {
-+			__setscheduler_params(p, attr);
-+			retval = 0;
-+			goto unlock;
-+		}
-+	}
-+
-+	__setscheduler(rq, p, attr, pi);
-+
-+	check_task_changed(rq, p);
-+
-+	/* Avoid rq from going away on us: */
-+	preempt_disable();
-+	__task_access_unlock(p, lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+
-+	if (pi) {
-+		cpuset_read_unlock();
-+		rt_mutex_adjust_pi(p);
-+	}
-+
-+	preempt_enable();
-+
-+	return 0;
-+
-+unlock:
-+	__task_access_unlock(p, lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+	if (pi)
-+		cpuset_read_unlock();
-+	return retval;
-+}
-+
-+static int _sched_setscheduler(struct task_struct *p, int policy,
-+			       const struct sched_param *param, bool check)
-+{
-+	struct sched_attr attr = {
-+		.sched_policy   = policy,
-+		.sched_priority = param->sched_priority,
-+		.sched_nice     = PRIO_TO_NICE(p->static_prio),
-+	};
-+
-+	/* Fixup the legacy SCHED_RESET_ON_FORK hack. */
-+	if ((policy != SETPARAM_POLICY) && (policy & SCHED_RESET_ON_FORK)) {
-+		attr.sched_flags |= SCHED_FLAG_RESET_ON_FORK;
-+		policy &= ~SCHED_RESET_ON_FORK;
-+		attr.sched_policy = policy;
-+	}
-+
-+	return __sched_setscheduler(p, &attr, check, true);
-+}
-+
-+/**
-+ * sched_setscheduler - change the scheduling policy and/or RT priority of a thread.
-+ * @p: the task in question.
-+ * @policy: new policy.
-+ * @param: structure containing the new RT priority.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ *
-+ * NOTE that the task may be already dead.
-+ */
-+int sched_setscheduler(struct task_struct *p, int policy,
-+		       const struct sched_param *param)
-+{
-+	return _sched_setscheduler(p, policy, param, true);
-+}
-+
-+EXPORT_SYMBOL_GPL(sched_setscheduler);
-+
-+int sched_setattr(struct task_struct *p, const struct sched_attr *attr)
-+{
-+	return __sched_setscheduler(p, attr, true, true);
-+}
-+EXPORT_SYMBOL_GPL(sched_setattr);
-+
-+int sched_setattr_nocheck(struct task_struct *p, const struct sched_attr *attr)
-+{
-+	return __sched_setscheduler(p, attr, false, true);
-+}
-+
-+/**
-+ * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace.
-+ * @p: the task in question.
-+ * @policy: new policy.
-+ * @param: structure containing the new RT priority.
-+ *
-+ * Just like sched_setscheduler, only don't bother checking if the
-+ * current context has permission.  For example, this is needed in
-+ * stop_machine(): we create temporary high priority worker threads,
-+ * but our caller might not have that capability.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+int sched_setscheduler_nocheck(struct task_struct *p, int policy,
-+			       const struct sched_param *param)
-+{
-+	return _sched_setscheduler(p, policy, param, false);
-+}
-+EXPORT_SYMBOL_GPL(sched_setscheduler_nocheck);
-+
-+static int
-+do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
-+{
-+	struct sched_param lparam;
-+	struct task_struct *p;
-+	int retval;
-+
-+	if (!param || pid < 0)
-+		return -EINVAL;
-+	if (copy_from_user(&lparam, param, sizeof(struct sched_param)))
-+		return -EFAULT;
-+
-+	rcu_read_lock();
-+	retval = -ESRCH;
-+	p = find_process_by_pid(pid);
-+	if (likely(p))
-+		get_task_struct(p);
-+	rcu_read_unlock();
-+
-+	if (likely(p)) {
-+		retval = sched_setscheduler(p, policy, &lparam);
-+		put_task_struct(p);
-+	}
-+
-+	return retval;
-+}
-+
-+/*
-+ * Mimics kernel/events/core.c perf_copy_attr().
-+ */
-+static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *attr)
-+{
-+	u32 size;
-+	int ret;
-+
-+	/* Zero the full structure, so that a short copy will be nice: */
-+	memset(attr, 0, sizeof(*attr));
-+
-+	ret = get_user(size, &uattr->size);
-+	if (ret)
-+		return ret;
-+
-+	/* ABI compatibility quirk: */
-+	if (!size)
-+		size = SCHED_ATTR_SIZE_VER0;
-+
-+	if (size < SCHED_ATTR_SIZE_VER0 || size > PAGE_SIZE)
-+		goto err_size;
-+
-+	ret = copy_struct_from_user(attr, sizeof(*attr), uattr, size);
-+	if (ret) {
-+		if (ret == -E2BIG)
-+			goto err_size;
-+		return ret;
-+	}
-+
-+	/*
-+	 * XXX: Do we want to be lenient like existing syscalls; or do we want
-+	 * to be strict and return an error on out-of-bounds values?
-+	 */
-+	attr->sched_nice = clamp(attr->sched_nice, -20, 19);
-+
-+	/* sched/core.c uses zero here but we already know ret is zero */
-+	return 0;
-+
-+err_size:
-+	put_user(sizeof(*attr), &uattr->size);
-+	return -E2BIG;
-+}
-+
-+/**
-+ * sys_sched_setscheduler - set/change the scheduler policy and RT priority
-+ * @pid: the pid in question.
-+ * @policy: new policy.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ * @param: structure containing the new RT priority.
-+ */
-+SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy, struct sched_param __user *, param)
-+{
-+	if (policy < 0)
-+		return -EINVAL;
-+
-+	return do_sched_setscheduler(pid, policy, param);
-+}
-+
-+/**
-+ * sys_sched_setparam - set/change the RT priority of a thread
-+ * @pid: the pid in question.
-+ * @param: structure containing the new RT priority.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
-+{
-+	return do_sched_setscheduler(pid, SETPARAM_POLICY, param);
-+}
-+
-+/**
-+ * sys_sched_setattr - same as above, but with extended sched_attr
-+ * @pid: the pid in question.
-+ * @uattr: structure containing the extended parameters.
-+ */
-+SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr,
-+			       unsigned int, flags)
-+{
-+	struct sched_attr attr;
-+	struct task_struct *p;
-+	int retval;
-+
-+	if (!uattr || pid < 0 || flags)
-+		return -EINVAL;
-+
-+	retval = sched_copy_attr(uattr, &attr);
-+	if (retval)
-+		return retval;
-+
-+	if ((int)attr.sched_policy < 0)
-+		return -EINVAL;
-+
-+	rcu_read_lock();
-+	retval = -ESRCH;
-+	p = find_process_by_pid(pid);
-+	if (p != NULL)
-+		retval = sched_setattr(p, &attr);
-+	rcu_read_unlock();
-+
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_getscheduler - get the policy (scheduling class) of a thread
-+ * @pid: the pid in question.
-+ *
-+ * Return: On success, the policy of the thread. Otherwise, a negative error
-+ * code.
-+ */
-+SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
-+{
-+	struct task_struct *p;
-+	int retval = -EINVAL;
-+
-+	if (pid < 0)
-+		goto out_nounlock;
-+
-+	retval = -ESRCH;
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	if (p) {
-+		retval = security_task_getscheduler(p);
-+		if (!retval)
-+			retval = p->policy;
-+	}
-+	rcu_read_unlock();
-+
-+out_nounlock:
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_getscheduler - get the RT priority of a thread
-+ * @pid: the pid in question.
-+ * @param: structure containing the RT priority.
-+ *
-+ * Return: On success, 0 and the RT priority is in @param. Otherwise, an error
-+ * code.
-+ */
-+SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
-+{
-+	struct sched_param lp = { .sched_priority = 0 };
-+	struct task_struct *p;
-+	int retval = -EINVAL;
-+
-+	if (!param || pid < 0)
-+		goto out_nounlock;
-+
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	retval = -ESRCH;
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	if (task_has_rt_policy(p))
-+		lp.sched_priority = p->rt_priority;
-+	rcu_read_unlock();
-+
-+	/*
-+	 * This one might sleep, we cannot do it with a spinlock held ...
-+	 */
-+	retval = copy_to_user(param, &lp, sizeof(*param)) ? -EFAULT : 0;
-+
-+out_nounlock:
-+	return retval;
-+
-+out_unlock:
-+	rcu_read_unlock();
-+	return retval;
-+}
-+
-+/*
-+ * Copy the kernel size attribute structure (which might be larger
-+ * than what user-space knows about) to user-space.
-+ *
-+ * Note that all cases are valid: user-space buffer can be larger or
-+ * smaller than the kernel-space buffer. The usual case is that both
-+ * have the same size.
-+ */
-+static int
-+sched_attr_copy_to_user(struct sched_attr __user *uattr,
-+			struct sched_attr *kattr,
-+			unsigned int usize)
-+{
-+	unsigned int ksize = sizeof(*kattr);
-+
-+	if (!access_ok(uattr, usize))
-+		return -EFAULT;
-+
-+	/*
-+	 * sched_getattr() ABI forwards and backwards compatibility:
-+	 *
-+	 * If usize == ksize then we just copy everything to user-space and all is good.
-+	 *
-+	 * If usize < ksize then we only copy as much as user-space has space for,
-+	 * this keeps ABI compatibility as well. We skip the rest.
-+	 *
-+	 * If usize > ksize then user-space is using a newer version of the ABI,
-+	 * which part the kernel doesn't know about. Just ignore it - tooling can
-+	 * detect the kernel's knowledge of attributes from the attr->size value
-+	 * which is set to ksize in this case.
-+	 */
-+	kattr->size = min(usize, ksize);
-+
-+	if (copy_to_user(uattr, kattr, kattr->size))
-+		return -EFAULT;
-+
-+	return 0;
-+}
-+
-+/**
-+ * sys_sched_getattr - similar to sched_getparam, but with sched_attr
-+ * @pid: the pid in question.
-+ * @uattr: structure containing the extended parameters.
-+ * @usize: sizeof(attr) for fwd/bwd comp.
-+ * @flags: for future extension.
-+ */
-+SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
-+		unsigned int, usize, unsigned int, flags)
-+{
-+	struct sched_attr kattr = { };
-+	struct task_struct *p;
-+	int retval;
-+
-+	if (!uattr || pid < 0 || usize > PAGE_SIZE ||
-+	    usize < SCHED_ATTR_SIZE_VER0 || flags)
-+		return -EINVAL;
-+
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	retval = -ESRCH;
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	kattr.sched_policy = p->policy;
-+	if (rt_task(p))
-+		kattr.sched_priority = p->rt_priority;
-+	else
-+		kattr.sched_nice = task_nice(p);
-+
-+#ifdef CONFIG_UCLAMP_TASK
-+	kattr.sched_util_min = p->uclamp_req[UCLAMP_MIN].value;
-+	kattr.sched_util_max = p->uclamp_req[UCLAMP_MAX].value;
-+#endif
-+
-+	rcu_read_unlock();
-+
-+	return sched_attr_copy_to_user(uattr, &kattr, usize);
-+
-+out_unlock:
-+	rcu_read_unlock();
-+	return retval;
-+}
-+
-+long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
-+{
-+	cpumask_var_t cpus_mask, new_mask;
-+	struct task_struct *p;
-+	int retval;
-+
-+	get_online_cpus();
-+	rcu_read_lock();
-+
-+	p = find_process_by_pid(pid);
-+	if (!p) {
-+		rcu_read_unlock();
-+		put_online_cpus();
-+		return -ESRCH;
-+	}
-+
-+	/* Prevent p going away */
-+	get_task_struct(p);
-+	rcu_read_unlock();
-+
-+	if (p->flags & PF_NO_SETAFFINITY) {
-+		retval = -EINVAL;
-+		goto out_put_task;
-+	}
-+	if (!alloc_cpumask_var(&cpus_mask, GFP_KERNEL)) {
-+		retval = -ENOMEM;
-+		goto out_put_task;
-+	}
-+	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) {
-+		retval = -ENOMEM;
-+		goto out_free_cpus_allowed;
-+	}
-+	retval = -EPERM;
-+	if (!check_same_owner(p)) {
-+		rcu_read_lock();
-+		if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
-+			rcu_read_unlock();
-+			goto out_unlock;
-+		}
-+		rcu_read_unlock();
-+	}
-+
-+	retval = security_task_setscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	cpuset_cpus_allowed(p, cpus_mask);
-+	cpumask_and(new_mask, in_mask, cpus_mask);
-+again:
-+	retval = __set_cpus_allowed_ptr(p, new_mask, true);
-+
-+	if (!retval) {
-+		cpuset_cpus_allowed(p, cpus_mask);
-+		if (!cpumask_subset(new_mask, cpus_mask)) {
-+			/*
-+			 * We must have raced with a concurrent cpuset
-+			 * update. Just reset the cpus_mask to the
-+			 * cpuset's cpus_mask
-+			 */
-+			cpumask_copy(new_mask, cpus_mask);
-+			goto again;
-+		}
-+	}
-+out_unlock:
-+	free_cpumask_var(new_mask);
-+out_free_cpus_allowed:
-+	free_cpumask_var(cpus_mask);
-+out_put_task:
-+	put_task_struct(p);
-+	put_online_cpus();
-+	return retval;
-+}
-+
-+static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
-+			     struct cpumask *new_mask)
-+{
-+	if (len < cpumask_size())
-+		cpumask_clear(new_mask);
-+	else if (len > cpumask_size())
-+		len = cpumask_size();
-+
-+	return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0;
-+}
-+
-+/**
-+ * sys_sched_setaffinity - set the CPU affinity of a process
-+ * @pid: pid of the process
-+ * @len: length in bytes of the bitmask pointed to by user_mask_ptr
-+ * @user_mask_ptr: user-space pointer to the new CPU mask
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
-+		unsigned long __user *, user_mask_ptr)
-+{
-+	cpumask_var_t new_mask;
-+	int retval;
-+
-+	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
-+		return -ENOMEM;
-+
-+	retval = get_user_cpu_mask(user_mask_ptr, len, new_mask);
-+	if (retval == 0)
-+		retval = sched_setaffinity(pid, new_mask);
-+	free_cpumask_var(new_mask);
-+	return retval;
-+}
-+
-+long sched_getaffinity(pid_t pid, cpumask_t *mask)
-+{
-+	struct task_struct *p;
-+	raw_spinlock_t *lock;
-+	unsigned long flags;
-+	int retval;
-+
-+	rcu_read_lock();
-+
-+	retval = -ESRCH;
-+	p = find_process_by_pid(pid);
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	task_access_lock_irqsave(p, &lock, &flags);
-+	cpumask_and(mask, &p->cpus_mask, cpu_active_mask);
-+	task_access_unlock_irqrestore(p, lock, &flags);
-+
-+out_unlock:
-+	rcu_read_unlock();
-+
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_getaffinity - get the CPU affinity of a process
-+ * @pid: pid of the process
-+ * @len: length in bytes of the bitmask pointed to by user_mask_ptr
-+ * @user_mask_ptr: user-space pointer to hold the current CPU mask
-+ *
-+ * Return: size of CPU mask copied to user_mask_ptr on success. An
-+ * error code otherwise.
-+ */
-+SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
-+		unsigned long __user *, user_mask_ptr)
-+{
-+	int ret;
-+	cpumask_var_t mask;
-+
-+	if ((len * BITS_PER_BYTE) < nr_cpu_ids)
-+		return -EINVAL;
-+	if (len & (sizeof(unsigned long)-1))
-+		return -EINVAL;
-+
-+	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
-+		return -ENOMEM;
-+
-+	ret = sched_getaffinity(pid, mask);
-+	if (ret == 0) {
-+		unsigned int retlen = min_t(size_t, len, cpumask_size());
-+
-+		if (copy_to_user(user_mask_ptr, mask, retlen))
-+			ret = -EFAULT;
-+		else
-+			ret = retlen;
-+	}
-+	free_cpumask_var(mask);
-+
-+	return ret;
-+}
-+
-+/**
-+ * sys_sched_yield - yield the current processor to other threads.
-+ *
-+ * This function yields the current CPU to other tasks. It does this by
-+ * scheduling away the current task. If it still has the earliest deadline
-+ * it will be scheduled again as the next task.
-+ *
-+ * Return: 0.
-+ */
-+static void do_sched_yield(void)
-+{
-+	struct rq *rq;
-+	struct rq_flags rf;
-+
-+	if (!sched_yield_type)
-+		return;
-+
-+	rq = this_rq_lock_irq(&rf);
-+
-+	if (sched_yield_type > 1) {
-+		time_slice_expired(current, rq);
-+		requeue_task(current, rq);
-+	}
-+	schedstat_inc(rq->yld_count);
-+
-+	/*
-+	 * Since we are going to call schedule() anyway, there's
-+	 * no need to preempt or enable interrupts:
-+	 */
-+	preempt_disable();
-+	raw_spin_unlock(&rq->lock);
-+	sched_preempt_enable_no_resched();
-+
-+	schedule();
-+}
-+
-+SYSCALL_DEFINE0(sched_yield)
-+{
-+	do_sched_yield();
-+	return 0;
-+}
-+
-+#ifndef CONFIG_PREEMPTION
-+int __sched _cond_resched(void)
-+{
-+	if (should_resched(0)) {
-+		preempt_schedule_common();
-+		return 1;
-+	}
-+	rcu_all_qs();
-+	return 0;
-+}
-+EXPORT_SYMBOL(_cond_resched);
-+#endif
-+
-+/*
-+ * __cond_resched_lock() - if a reschedule is pending, drop the given lock,
-+ * call schedule, and on return reacquire the lock.
-+ *
-+ * This works OK both with and without CONFIG_PREEMPTION.  We do strange low-level
-+ * operations here to prevent schedule() from being called twice (once via
-+ * spin_unlock(), once by hand).
-+ */
-+int __cond_resched_lock(spinlock_t *lock)
-+{
-+	int resched = should_resched(PREEMPT_LOCK_OFFSET);
-+	int ret = 0;
-+
-+	lockdep_assert_held(lock);
-+
-+	if (spin_needbreak(lock) || resched) {
-+		spin_unlock(lock);
-+		if (resched)
-+			preempt_schedule_common();
-+		else
-+			cpu_relax();
-+		ret = 1;
-+		spin_lock(lock);
-+	}
-+	return ret;
-+}
-+EXPORT_SYMBOL(__cond_resched_lock);
-+
-+/**
-+ * yield - yield the current processor to other threads.
-+ *
-+ * Do not ever use this function, there's a 99% chance you're doing it wrong.
-+ *
-+ * The scheduler is at all times free to pick the calling task as the most
-+ * eligible task to run, if removing the yield() call from your code breaks
-+ * it, its already broken.
-+ *
-+ * Typical broken usage is:
-+ *
-+ * while (!event)
-+ * 	yield();
-+ *
-+ * where one assumes that yield() will let 'the other' process run that will
-+ * make event true. If the current task is a SCHED_FIFO task that will never
-+ * happen. Never use yield() as a progress guarantee!!
-+ *
-+ * If you want to use yield() to wait for something, use wait_event().
-+ * If you want to use yield() to be 'nice' for others, use cond_resched().
-+ * If you still want to use yield(), do not!
-+ */
-+void __sched yield(void)
-+{
-+	set_current_state(TASK_RUNNING);
-+	do_sched_yield();
-+}
-+EXPORT_SYMBOL(yield);
-+
-+/**
-+ * yield_to - yield the current processor to another thread in
-+ * your thread group, or accelerate that thread toward the
-+ * processor it's on.
-+ * @p: target task
-+ * @preempt: whether task preemption is allowed or not
-+ *
-+ * It's the caller's job to ensure that the target task struct
-+ * can't go away on us before we can do any checks.
-+ *
-+ * In PDS, yield_to is not supported.
-+ *
-+ * Return:
-+ *	true (>0) if we indeed boosted the target task.
-+ *	false (0) if we failed to boost the target.
-+ *	-ESRCH if there's no task to yield to.
-+ */
-+int __sched yield_to(struct task_struct *p, bool preempt)
-+{
-+	return 0;
-+}
-+EXPORT_SYMBOL_GPL(yield_to);
-+
-+int io_schedule_prepare(void)
-+{
-+	int old_iowait = current->in_iowait;
-+
-+	current->in_iowait = 1;
-+	blk_schedule_flush_plug(current);
-+
-+	return old_iowait;
-+}
-+
-+void io_schedule_finish(int token)
-+{
-+	current->in_iowait = token;
-+}
-+
-+/*
-+ * This task is about to go to sleep on IO.  Increment rq->nr_iowait so
-+ * that process accounting knows that this is a task in IO wait state.
-+ *
-+ * But don't do that if it is a deliberate, throttling IO wait (this task
-+ * has set its backing_dev_info: the queue against which it should throttle)
-+ */
-+
-+long __sched io_schedule_timeout(long timeout)
-+{
-+	int token;
-+	long ret;
-+
-+	token = io_schedule_prepare();
-+	ret = schedule_timeout(timeout);
-+	io_schedule_finish(token);
-+
-+	return ret;
-+}
-+EXPORT_SYMBOL(io_schedule_timeout);
-+
-+void io_schedule(void)
-+{
-+	int token;
-+
-+	token = io_schedule_prepare();
-+	schedule();
-+	io_schedule_finish(token);
-+}
-+EXPORT_SYMBOL(io_schedule);
-+
-+/**
-+ * sys_sched_get_priority_max - return maximum RT priority.
-+ * @policy: scheduling class.
-+ *
-+ * Return: On success, this syscall returns the maximum
-+ * rt_priority that can be used by a given scheduling class.
-+ * On failure, a negative error code is returned.
-+ */
-+SYSCALL_DEFINE1(sched_get_priority_max, int, policy)
-+{
-+	int ret = -EINVAL;
-+
-+	switch (policy) {
-+	case SCHED_FIFO:
-+	case SCHED_RR:
-+		ret = MAX_USER_RT_PRIO-1;
-+		break;
-+	case SCHED_NORMAL:
-+	case SCHED_BATCH:
-+	case SCHED_ISO:
-+	case SCHED_IDLE:
-+		ret = 0;
-+		break;
-+	}
-+	return ret;
-+}
-+
-+/**
-+ * sys_sched_get_priority_min - return minimum RT priority.
-+ * @policy: scheduling class.
-+ *
-+ * Return: On success, this syscall returns the minimum
-+ * rt_priority that can be used by a given scheduling class.
-+ * On failure, a negative error code is returned.
-+ */
-+SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
-+{
-+	int ret = -EINVAL;
-+
-+	switch (policy) {
-+	case SCHED_FIFO:
-+	case SCHED_RR:
-+		ret = 1;
-+		break;
-+	case SCHED_NORMAL:
-+	case SCHED_BATCH:
-+	case SCHED_ISO:
-+	case SCHED_IDLE:
-+		ret = 0;
-+		break;
-+	}
-+	return ret;
-+}
-+
-+static int sched_rr_get_interval(pid_t pid, struct timespec64 *t)
-+{
-+	struct task_struct *p;
-+	int retval;
-+
-+	if (pid < 0)
-+		return -EINVAL;
-+
-+	retval = -ESRCH;
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+	rcu_read_unlock();
-+
-+	*t = ns_to_timespec64(MS_TO_NS(rr_interval));
-+	return 0;
-+
-+out_unlock:
-+	rcu_read_unlock();
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_rr_get_interval - return the default timeslice of a process.
-+ * @pid: pid of the process.
-+ * @interval: userspace pointer to the timeslice value.
-+ *
-+ *
-+ * Return: On success, 0 and the timeslice is in @interval. Otherwise,
-+ * an error code.
-+ */
-+SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
-+		struct __kernel_timespec __user *, interval)
-+{
-+	struct timespec64 t;
-+	int retval = sched_rr_get_interval(pid, &t);
-+
-+	if (retval == 0)
-+		retval = put_timespec64(&t, interval);
-+
-+	return retval;
-+}
-+
-+#ifdef CONFIG_COMPAT_32BIT_TIME
-+SYSCALL_DEFINE2(sched_rr_get_interval_time32, pid_t, pid,
-+		struct old_timespec32 __user *, interval)
-+{
-+	struct timespec64 t;
-+	int retval = sched_rr_get_interval(pid, &t);
-+
-+	if (retval == 0)
-+		retval = put_old_timespec32(&t, interval);
-+	return retval;
-+}
-+#endif
-+
-+void sched_show_task(struct task_struct *p)
-+{
-+	unsigned long free = 0;
-+	int ppid;
-+
-+	if (!try_get_task_stack(p))
-+		return;
-+
-+	printk(KERN_INFO "%-15.15s %c", p->comm, task_state_to_char(p));
-+
-+	if (p->state == TASK_RUNNING)
-+		printk(KERN_CONT "  running task    ");
-+#ifdef CONFIG_DEBUG_STACK_USAGE
-+	free = stack_not_used(p);
-+#endif
-+	ppid = 0;
-+	rcu_read_lock();
-+	if (pid_alive(p))
-+		ppid = task_pid_nr(rcu_dereference(p->real_parent));
-+	rcu_read_unlock();
-+	printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free,
-+		task_pid_nr(p), ppid,
-+		(unsigned long)task_thread_info(p)->flags);
-+
-+	print_worker_info(KERN_INFO, p);
-+	show_stack(p, NULL, KERN_INFO);
-+	put_task_stack(p);
-+}
-+EXPORT_SYMBOL_GPL(sched_show_task);
-+
-+static inline bool
-+state_filter_match(unsigned long state_filter, struct task_struct *p)
-+{
-+	/* no filter, everything matches */
-+	if (!state_filter)
-+		return true;
-+
-+	/* filter, but doesn't match */
-+	if (!(p->state & state_filter))
-+		return false;
-+
-+	/*
-+	 * When looking for TASK_UNINTERRUPTIBLE skip TASK_IDLE (allows
-+	 * TASK_KILLABLE).
-+	 */
-+	if (state_filter == TASK_UNINTERRUPTIBLE && p->state == TASK_IDLE)
-+		return false;
-+
-+	return true;
-+}
-+
-+
-+void show_state_filter(unsigned long state_filter)
-+{
-+	struct task_struct *g, *p;
-+
-+#if BITS_PER_LONG == 32
-+	printk(KERN_INFO
-+		"  task                PC stack   pid father\n");
-+#else
-+	printk(KERN_INFO
-+		"  task                        PC stack   pid father\n");
-+#endif
-+	rcu_read_lock();
-+	for_each_process_thread(g, p) {
-+		/*
-+		 * reset the NMI-timeout, listing all files on a slow
-+		 * console might take a lot of time:
-+		 * Also, reset softlockup watchdogs on all CPUs, because
-+		 * another CPU might be blocked waiting for us to process
-+		 * an IPI.
-+		 */
-+		touch_nmi_watchdog();
-+		touch_all_softlockup_watchdogs();
-+		if (state_filter_match(state_filter, p))
-+			sched_show_task(p);
-+	}
-+
-+#ifdef CONFIG_SCHED_DEBUG
-+	/* PDS TODO: should support this
-+	if (!state_filter)
-+		sysrq_sched_debug_show();
-+	*/
-+#endif
-+	rcu_read_unlock();
-+	/*
-+	 * Only show locks if all tasks are dumped:
-+	 */
-+	if (!state_filter)
-+		debug_show_all_locks();
-+}
-+
-+void dump_cpu_task(int cpu)
-+{
-+	pr_info("Task dump for CPU %d:\n", cpu);
-+	sched_show_task(cpu_curr(cpu));
-+}
-+
-+/**
-+ * init_idle - set up an idle thread for a given CPU
-+ * @idle: task in question
-+ * @cpu: cpu the idle task belongs to
-+ *
-+ * NOTE: this function does not set the idle thread's NEED_RESCHED
-+ * flag, to make booting more robust.
-+ */
-+void init_idle(struct task_struct *idle, int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	raw_spin_lock_irqsave(&idle->pi_lock, flags);
-+	raw_spin_lock(&rq->lock);
-+	update_rq_clock(rq);
-+
-+	idle->last_ran = rq->clock_task;
-+	idle->state = TASK_RUNNING;
-+	idle->flags |= PF_IDLE;
-+	/* Setting prio to illegal value shouldn't matter when never queued */
-+	idle->prio = PRIO_LIMIT;
-+	idle->deadline = rq_clock(rq) + task_deadline_diff(idle);
-+	update_task_priodl(idle);
-+
-+	kasan_unpoison_task_stack(idle);
-+
-+#ifdef CONFIG_SMP
-+	/*
-+	 * It's possible that init_idle() gets called multiple times on a task,
-+	 * in that case do_set_cpus_allowed() will not do the right thing.
-+	 *
-+	 * And since this is boot we can forgo the serialisation.
-+	 */
-+	set_cpus_allowed_common(idle, cpumask_of(cpu));
-+#endif
-+
-+	/* Silence PROVE_RCU */
-+	rcu_read_lock();
-+	__set_task_cpu(idle, cpu);
-+	rcu_read_unlock();
-+
-+	rq->idle = idle;
-+	rcu_assign_pointer(rq->curr, idle);
-+	idle->on_cpu = 1;
-+
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock_irqrestore(&idle->pi_lock, flags);
-+
-+	/* Set the preempt count _outside_ the spinlocks! */
-+	init_idle_preempt_count(idle, cpu);
-+
-+	ftrace_graph_init_idle_task(idle, cpu);
-+	vtime_init_idle(idle, cpu);
-+#ifdef CONFIG_SMP
-+	sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
-+#endif
-+}
-+
-+void resched_cpu(int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	raw_spin_lock_irqsave(&rq->lock, flags);
-+	if (cpu_online(cpu) || cpu == smp_processor_id())
-+		resched_curr(cpu_rq(cpu));
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+}
-+
-+static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task)
-+{
-+	struct wake_q_node *node = &task->wake_q;
-+
-+	/*
-+	 * Atomically grab the task, if ->wake_q is !nil already it means
-+	 * its already queued (either by us or someone else) and will get the
-+	 * wakeup due to that.
-+	 *
-+	 * In order to ensure that a pending wakeup will observe our pending
-+	 * state, even in the failed case, an explicit smp_mb() must be used.
-+	 */
-+	smp_mb__before_atomic();
-+	if (unlikely(cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL)))
-+		return false;
-+
-+	/*
-+	 * The head is context local, there can be no concurrency.
-+	 */
-+	*head->lastp = node;
-+	head->lastp = &node->next;
-+	return true;
-+}
-+
-+/**
-+ * wake_q_add() - queue a wakeup for 'later' waking.
-+ * @head: the wake_q_head to add @task to
-+ * @task: the task to queue for 'later' wakeup
-+ *
-+ * Queue a task for later wakeup, most likely by the wake_up_q() call in the
-+ * same context, _HOWEVER_ this is not guaranteed, the wakeup can come
-+ * instantly.
-+ *
-+ * This function must be used as-if it were wake_up_process(); IOW the task
-+ * must be ready to be woken at this location.
-+ */
-+void wake_q_add(struct wake_q_head *head, struct task_struct *task)
-+{
-+	if (__wake_q_add(head, task))
-+		get_task_struct(task);
-+}
-+
-+/**
-+ * wake_q_add_safe() - safely queue a wakeup for 'later' waking.
-+ * @head: the wake_q_head to add @task to
-+ * @task: the task to queue for 'later' wakeup
-+ *
-+ * Queue a task for later wakeup, most likely by the wake_up_q() call in the
-+ * same context, _HOWEVER_ this is not guaranteed, the wakeup can come
-+ * instantly.
-+ *
-+ * This function must be used as-if it were wake_up_process(); IOW the task
-+ * must be ready to be woken at this location.
-+ *
-+ * This function is essentially a task-safe equivalent to wake_q_add(). Callers
-+ * that already hold reference to @task can call the 'safe' version and trust
-+ * wake_q to do the right thing depending whether or not the @task is already
-+ * queued for wakeup.
-+ */
-+void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task)
-+{
-+	if (!__wake_q_add(head, task))
-+		put_task_struct(task);
-+}
-+
-+void wake_up_q(struct wake_q_head *head)
-+{
-+	struct wake_q_node *node = head->first;
-+
-+	while (node != WAKE_Q_TAIL) {
-+		struct task_struct *task;
-+
-+		task = container_of(node, struct task_struct, wake_q);
-+		BUG_ON(!task);
-+		/* task can safely be re-inserted now: */
-+		node = node->next;
-+		task->wake_q.next = NULL;
-+
-+		/*
-+		 * wake_up_process() executes a full barrier, which pairs with
-+		 * the queueing in wake_q_add() so as not to miss wakeups.
-+		 */
-+		wake_up_process(task);
-+		put_task_struct(task);
-+	}
-+}
-+
-+#ifdef CONFIG_SMP
-+
-+int cpuset_cpumask_can_shrink(const struct cpumask __maybe_unused *cur,
-+			      const struct cpumask __maybe_unused *trial)
-+{
-+	return 1;
-+}
-+
-+int task_can_attach(struct task_struct *p,
-+		    const struct cpumask *cs_cpus_allowed)
-+{
-+	int ret = 0;
-+
-+	/*
-+	 * Kthreads which disallow setaffinity shouldn't be moved
-+	 * to a new cpuset; we don't want to change their CPU
-+	 * affinity and isolating such threads by their set of
-+	 * allowed nodes is unnecessary.  Thus, cpusets are not
-+	 * applicable for such threads.  This prevents checking for
-+	 * success of set_cpus_allowed_ptr() on all attached tasks
-+	 * before cpus_mask may be changed.
-+	 */
-+	if (p->flags & PF_NO_SETAFFINITY)
-+		ret = -EINVAL;
-+
-+	return ret;
-+}
-+
-+static bool sched_smp_initialized __read_mostly;
-+
-+#ifdef CONFIG_NO_HZ_COMMON
-+void nohz_balance_enter_idle(int cpu)
-+{
-+}
-+
-+void select_nohz_load_balancer(int stop_tick)
-+{
-+}
-+
-+void set_cpu_sd_state_idle(void) {}
-+
-+/*
-+ * In the semi idle case, use the nearest busy CPU for migrating timers
-+ * from an idle CPU.  This is good for power-savings.
-+ *
-+ * We don't do similar optimization for completely idle system, as
-+ * selecting an idle CPU will add more delays to the timers than intended
-+ * (as that CPU's timer base may not be uptodate wrt jiffies etc).
-+ */
-+int get_nohz_timer_target(void)
-+{
-+	int i, cpu = smp_processor_id(), default_cpu = -1;
-+	struct cpumask *mask;
-+
-+	if (housekeeping_cpu(cpu, HK_FLAG_TIMER)) {
-+		if (!idle_cpu(cpu))
-+			return cpu;
-+		default_cpu = cpu;
-+	}
-+
-+	for (mask = &(per_cpu(sched_cpu_affinity_chk_masks, cpu)[0]);
-+	     mask < per_cpu(sched_cpu_affinity_chk_end_masks, cpu); mask++)
-+		for_each_cpu_and(i, mask, housekeeping_cpumask(HK_FLAG_TIMER))
-+			if (!idle_cpu(i))
-+				return i;
-+
-+	if (default_cpu == -1)
-+		default_cpu = housekeeping_any_cpu(HK_FLAG_TIMER);
-+	cpu = default_cpu;
-+
-+	return cpu;
-+}
-+
-+/*
-+ * When add_timer_on() enqueues a timer into the timer wheel of an
-+ * idle CPU then this timer might expire before the next timer event
-+ * which is scheduled to wake up that CPU. In case of a completely
-+ * idle system the next event might even be infinite time into the
-+ * future. wake_up_idle_cpu() ensures that the CPU is woken up and
-+ * leaves the inner idle loop so the newly added timer is taken into
-+ * account when the CPU goes back to idle and evaluates the timer
-+ * wheel for the next timer event.
-+ */
-+void wake_up_idle_cpu(int cpu)
-+{
-+	if (cpu == smp_processor_id())
-+		return;
-+
-+	set_tsk_need_resched(cpu_rq(cpu)->idle);
-+	smp_send_reschedule(cpu);
-+}
-+
-+void wake_up_nohz_cpu(int cpu)
-+{
-+	wake_up_idle_cpu(cpu);
-+}
-+#endif /* CONFIG_NO_HZ_COMMON */
-+
-+#ifdef CONFIG_HOTPLUG_CPU
-+/*
-+ * Ensures that the idle task is using init_mm right before its CPU goes
-+ * offline.
-+ */
-+void idle_task_exit(void)
-+{
-+	struct mm_struct *mm = current->active_mm;
-+
-+	BUG_ON(current != this_rq()->idle);
-+
-+	if (mm != &init_mm) {
-+		switch_mm(mm, &init_mm, current);
-+		finish_arch_post_lock_switch();
-+	}
-+
-+	/* finish_cpu(), as ran on the BP, will clean up the active_mm state */
-+}
-+
-+/*
-+ * Migrate all tasks from the rq, sleeping tasks will be migrated by
-+ * try_to_wake_up()->select_task_rq().
-+ *
-+ * Called with rq->lock held even though we'er in stop_machine() and
-+ * there's no concurrency possible, we hold the required locks anyway
-+ * because of lock validation efforts.
-+ */
-+static void migrate_tasks(struct rq *dead_rq)
-+{
-+	struct rq *rq = dead_rq;
-+	struct task_struct *p, *stop = rq->stop;
-+	struct skiplist_node *node;
-+	int count = 0;
-+
-+	/*
-+	 * Fudge the rq selection such that the below task selection loop
-+	 * doesn't get stuck on the currently eligible stop task.
-+	 *
-+	 * We're currently inside stop_machine() and the rq is either stuck
-+	 * in the stop_machine_cpu_stop() loop, or we're executing this code,
-+	 * either way we should never end up calling schedule() until we're
-+	 * done here.
-+	 */
-+	rq->stop = NULL;
-+
-+	node = &rq->sl_header;
-+	while ((node = node->next[0]) != &rq->sl_header) {
-+		int dest_cpu;
-+
-+		p = skiplist_entry(node, struct task_struct, sl_node);
-+
-+		/* skip the running task */
-+		if (task_running(p))
-+			continue;
-+
-+		/*
-+		 * Rules for changing task_struct::cpus_mask are holding
-+		 * both pi_lock and rq->lock, such that holding either
-+		 * stabilizes the mask.
-+		 *
-+		 * Drop rq->lock is not quite as disastrous as it usually is
-+		 * because !cpu_active at this point, which means load-balance
-+		 * will not interfere. Also, stop-machine.
-+		 */
-+		raw_spin_unlock(&rq->lock);
-+		raw_spin_lock(&p->pi_lock);
-+		raw_spin_lock(&rq->lock);
-+
-+		/*
-+		 * Since we're inside stop-machine, _nothing_ should have
-+		 * changed the task, WARN if weird stuff happened, because in
-+		 * that case the above rq->lock drop is a fail too.
-+		 */
-+		if (WARN_ON(task_rq(p) != rq || !task_on_rq_queued(p))) {
-+			raw_spin_unlock(&p->pi_lock);
-+			continue;
-+		}
-+
-+		count++;
-+		/* Find suitable destination for @next, with force if needed. */
-+		dest_cpu = select_fallback_rq(dead_rq->cpu, p);
-+
-+		rq = __migrate_task(rq, p, dest_cpu);
-+		raw_spin_unlock(&rq->lock);
-+		raw_spin_unlock(&p->pi_lock);
-+
-+		rq = dead_rq;
-+		raw_spin_lock(&rq->lock);
-+		/* Check queued task all over from the header again */
-+		node = &rq->sl_header;
-+	}
-+
-+	rq->stop = stop;
-+}
-+
-+static void set_rq_offline(struct rq *rq)
-+{
-+	if (rq->online)
-+		rq->online = false;
-+}
-+#endif /* CONFIG_HOTPLUG_CPU */
-+
-+static void set_rq_online(struct rq *rq)
-+{
-+	if (!rq->online)
-+		rq->online = true;
-+}
-+
-+#ifdef CONFIG_SCHED_DEBUG
-+
-+static __read_mostly int sched_debug_enabled;
-+
-+static int __init sched_debug_setup(char *str)
-+{
-+	sched_debug_enabled = 1;
-+
-+	return 0;
-+}
-+early_param("sched_debug", sched_debug_setup);
-+
-+static inline bool sched_debug(void)
-+{
-+	return sched_debug_enabled;
-+}
-+#else /* !CONFIG_SCHED_DEBUG */
-+static inline bool sched_debug(void)
-+{
-+	return false;
-+}
-+#endif /* CONFIG_SCHED_DEBUG */
-+
-+#ifdef CONFIG_SMP
-+void send_call_function_single_ipi(int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	if (!set_nr_if_polling(rq->idle))
-+		arch_send_call_function_single_ipi(cpu);
-+	else
-+		trace_sched_wake_idle_without_ipi(cpu);
-+}
-+
-+void sched_ttwu_pending(void *arg)
-+{
-+	struct llist_node *llist = arg;
-+	struct rq *rq = this_rq();
-+	struct task_struct *p, *t;
-+	struct rq_flags rf;
-+
-+	if (!llist)
-+		return;
-+
-+	/*
-+	 * rq::ttwu_pending racy indication of out-standing wakeups.
-+	 * Races such that false-negatives are possible, since they
-+	 * are shorter lived that false-positives would be.
-+	 */
-+	WRITE_ONCE(rq->ttwu_pending, 0);
-+
-+	rq_lock_irqsave(rq, &rf);
-+	update_rq_clock(rq);
-+
-+	/*llist_for_each_entry_safe(p, t, llist, wake_entry)
-+		ttwu_do_activate(rq, p, p->sched_remote_wakeup ? WF_MIGRATED : 0, &rf);*/
-+
-+	rq_unlock_irqrestore(rq, &rf);
-+}
-+
-+void wake_up_if_idle(int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	rcu_read_lock();
-+
-+	if (!is_idle_task(rcu_dereference(rq->curr)))
-+		goto out;
-+
-+	if (set_nr_if_polling(rq->idle)) {
-+		trace_sched_wake_idle_without_ipi(cpu);
-+	} else {
-+		raw_spin_lock_irqsave(&rq->lock, flags);
-+		if (is_idle_task(rq->curr))
-+			smp_send_reschedule(cpu);
-+		/* Else CPU is not idle, do nothing here */
-+		raw_spin_unlock_irqrestore(&rq->lock, flags);
-+	}
-+
-+out:
-+	rcu_read_unlock();
-+}
-+
-+bool cpus_share_cache(int this_cpu, int that_cpu)
-+{
-+	return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu);
-+}
-+#endif /* CONFIG_SMP */
-+
-+/*
-+ * Topology list, bottom-up.
-+ */
-+static struct sched_domain_topology_level default_topology[] = {
-+#ifdef CONFIG_SCHED_SMT
-+	{ cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
-+#endif
-+#ifdef CONFIG_SCHED_MC
-+	{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
-+#endif
-+	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
-+	{ NULL, },
-+};
-+
-+static struct sched_domain_topology_level *sched_domain_topology =
-+	default_topology;
-+
-+#define for_each_sd_topology(tl)			\
-+	for (tl = sched_domain_topology; tl->mask; tl++)
-+
-+void set_sched_topology(struct sched_domain_topology_level *tl)
-+{
-+	if (WARN_ON_ONCE(sched_smp_initialized))
-+		return;
-+
-+	sched_domain_topology = tl;
-+}
-+
-+/*
-+ * Initializers for schedule domains
-+ * Non-inlined to reduce accumulated stack pressure in build_sched_domains()
-+ */
-+
-+int sched_domain_level_max;
-+
-+/*
-+ * Partition sched domains as specified by the 'ndoms_new'
-+ * cpumasks in the array doms_new[] of cpumasks. This compares
-+ * doms_new[] to the current sched domain partitioning, doms_cur[].
-+ * It destroys each deleted domain and builds each new domain.
-+ *
-+ * 'doms_new' is an array of cpumask_var_t's of length 'ndoms_new'.
-+ * The masks don't intersect (don't overlap.) We should setup one
-+ * sched domain for each mask. CPUs not in any of the cpumasks will
-+ * not be load balanced. If the same cpumask appears both in the
-+ * current 'doms_cur' domains and in the new 'doms_new', we can leave
-+ * it as it is.
-+ *
-+ * The passed in 'doms_new' should be allocated using
-+ * alloc_sched_domains.  This routine takes ownership of it and will
-+ * free_sched_domains it when done with it. If the caller failed the
-+ * alloc call, then it can pass in doms_new == NULL && ndoms_new == 1,
-+ * and partition_sched_domains() will fallback to the single partition
-+ * 'fallback_doms', it also forces the domains to be rebuilt.
-+ *
-+ * If doms_new == NULL it will be replaced with cpu_online_mask.
-+ * ndoms_new == 0 is a special case for destroying existing domains,
-+ * and it will not create the default domain.
-+ *
-+ * Call with hotplug lock held
-+ */
-+void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
-+			     struct sched_domain_attr *dattr_new)
-+{
-+	/**
-+	 * PDS doesn't depend on sched domains, but just keep this api
-+	 */
-+}
-+
-+/*
-+ * used to mark begin/end of suspend/resume:
-+ */
-+static int num_cpus_frozen;
-+
-+#ifdef CONFIG_NUMA
-+int __read_mostly		node_reclaim_distance = RECLAIM_DISTANCE;
-+
-+/*
-+ * sched_numa_find_closest() - given the NUMA topology, find the cpu
-+ *                             closest to @cpu from @cpumask.
-+ * cpumask: cpumask to find a cpu from
-+ * cpu: cpu to be close to
-+ *
-+ * returns: cpu, or nr_cpu_ids when nothing found.
-+ */
-+int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
-+{
-+	return best_mask_cpu(cpu, cpus);
-+}
-+#endif /* CONFIG_NUMA */
-+
-+/*
-+ * Update cpusets according to cpu_active mask.  If cpusets are
-+ * disabled, cpuset_update_active_cpus() becomes a simple wrapper
-+ * around partition_sched_domains().
-+ *
-+ * If we come here as part of a suspend/resume, don't touch cpusets because we
-+ * want to restore it back to its original state upon resume anyway.
-+ */
-+static void cpuset_cpu_active(void)
-+{
-+	if (cpuhp_tasks_frozen) {
-+		/*
-+		 * num_cpus_frozen tracks how many CPUs are involved in suspend
-+		 * resume sequence. As long as this is not the last online
-+		 * operation in the resume sequence, just build a single sched
-+		 * domain, ignoring cpusets.
-+		 */
-+		partition_sched_domains(1, NULL, NULL);
-+		if (--num_cpus_frozen)
-+			return;
-+		/*
-+		 * This is the last CPU online operation. So fall through and
-+		 * restore the original sched domains by considering the
-+		 * cpuset configurations.
-+		 */
-+		cpuset_force_rebuild();
-+	}
-+
-+	cpuset_update_active_cpus();
-+}
-+
-+static int cpuset_cpu_inactive(unsigned int cpu)
-+{
-+	if (!cpuhp_tasks_frozen) {
-+		cpuset_update_active_cpus();
-+	} else {
-+		num_cpus_frozen++;
-+		partition_sched_domains(1, NULL, NULL);
-+	}
-+	return 0;
-+}
-+
-+int sched_cpu_activate(unsigned int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+#ifdef CONFIG_SCHED_SMT
-+	/*
-+	 * When going up, increment the number of cores with SMT present.
-+	 */
-+	if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
-+		static_branch_inc_cpuslocked(&sched_smt_present);
-+#endif
-+	set_cpu_active(cpu, true);
-+
-+	if (sched_smp_initialized)
-+		cpuset_cpu_active();
-+
-+	/*
-+	 * Put the rq online, if not already. This happens:
-+	 *
-+	 * 1) In the early boot process, because we build the real domains
-+	 *    after all cpus have been brought up.
-+	 *
-+	 * 2) At runtime, if cpuset_cpu_active() fails to rebuild the
-+	 *    domains.
-+	 */
-+	raw_spin_lock_irqsave(&rq->lock, flags);
-+	set_rq_online(rq);
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+
-+	return 0;
-+}
-+
-+int sched_cpu_deactivate(unsigned int cpu)
-+{
-+	int ret;
-+
-+	set_cpu_active(cpu, false);
-+	/*
-+	 * We've cleared cpu_active_mask, wait for all preempt-disabled and RCU
-+	 * users of this state to go away such that all new such users will
-+	 * observe it.
-+	 *
-+	 * Do sync before park smpboot threads to take care the rcu boost case.
-+	 */
-+	synchronize_rcu();
-+
-+#ifdef CONFIG_SCHED_SMT
-+	/*
-+	 * When going down, decrement the number of cores with SMT present.
-+	 */
-+	if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
-+		static_branch_dec_cpuslocked(&sched_smt_present);
-+#endif
-+
-+	if (!sched_smp_initialized)
-+		return 0;
-+
-+	ret = cpuset_cpu_inactive(cpu);
-+	if (ret) {
-+		set_cpu_active(cpu, true);
-+		return ret;
-+	}
-+	return 0;
-+}
-+
-+static void sched_rq_cpu_starting(unsigned int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	rq->calc_load_update = calc_load_update;
-+}
-+
-+int sched_cpu_starting(unsigned int cpu)
-+{
-+	sched_rq_cpu_starting(cpu);
-+	sched_tick_start(cpu);
-+	return 0;
-+}
-+
-+#ifdef CONFIG_HOTPLUG_CPU
-+int sched_cpu_dying(unsigned int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	sched_tick_stop(cpu);
-+	raw_spin_lock_irqsave(&rq->lock, flags);
-+	set_rq_offline(rq);
-+	migrate_tasks(rq);
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+
-+	hrtick_clear(rq);
-+	return 0;
-+}
-+#endif
-+
-+#ifdef CONFIG_SMP
-+static void sched_init_topology_cpumask_early(void)
-+{
-+	int cpu, level;
-+	cpumask_t *tmp;
-+
-+	for_each_possible_cpu(cpu) {
-+		for (level = 0; level < NR_CPU_AFFINITY_CHK_LEVEL; level++) {
-+			tmp = &(per_cpu(sched_cpu_affinity_chk_masks, cpu)[level]);
-+			cpumask_copy(tmp, cpu_possible_mask);
-+			cpumask_clear_cpu(cpu, tmp);
-+		}
-+		per_cpu(sched_cpu_llc_start_mask, cpu) =
-+			&(per_cpu(sched_cpu_affinity_chk_masks, cpu)[0]);
-+		per_cpu(sched_cpu_affinity_chk_end_masks, cpu) =
-+			&(per_cpu(sched_cpu_affinity_chk_masks, cpu)[1]);
-+	}
-+}
-+
-+static void sched_init_topology_cpumask(void)
-+{
-+	int cpu;
-+	cpumask_t *chk;
-+
-+	for_each_online_cpu(cpu) {
-+		chk = &(per_cpu(sched_cpu_affinity_chk_masks, cpu)[0]);
-+
-+#ifdef CONFIG_SCHED_SMT
-+		cpumask_setall(chk);
-+		cpumask_clear_cpu(cpu, chk);
-+		if (cpumask_and(chk, chk, topology_sibling_cpumask(cpu))) {
-+			per_cpu(sched_sibling_cpu, cpu) = cpumask_first(chk);
-+			printk(KERN_INFO "pds: cpu #%d affinity check mask - smt 0x%08lx",
-+			       cpu, (chk++)->bits[0]);
-+		}
-+#endif
-+#ifdef CONFIG_SCHED_MC
-+		cpumask_setall(chk);
-+		cpumask_clear_cpu(cpu, chk);
-+		if (cpumask_and(chk, chk, cpu_coregroup_mask(cpu))) {
-+			per_cpu(sched_cpu_llc_start_mask, cpu) = chk;
-+			printk(KERN_INFO "pds: cpu #%d affinity check mask - coregroup 0x%08lx",
-+			       cpu, (chk++)->bits[0]);
-+		}
-+		cpumask_complement(chk, cpu_coregroup_mask(cpu));
-+
-+		/**
-+		 * Set up sd_llc_id per CPU
-+		 */
-+		per_cpu(sd_llc_id, cpu) =
-+			cpumask_first(cpu_coregroup_mask(cpu));
-+#else
-+		per_cpu(sd_llc_id, cpu) =
-+			cpumask_first(topology_core_cpumask(cpu));
-+
-+		per_cpu(sched_cpu_llc_start_mask, cpu) = chk;
-+
-+		cpumask_setall(chk);
-+		cpumask_clear_cpu(cpu, chk);
-+#endif /* NOT CONFIG_SCHED_MC */
-+		if (cpumask_and(chk, chk, topology_core_cpumask(cpu)))
-+			printk(KERN_INFO "pds: cpu #%d affinity check mask - core 0x%08lx",
-+			       cpu, (chk++)->bits[0]);
-+		cpumask_complement(chk, topology_core_cpumask(cpu));
-+
-+		if (cpumask_and(chk, chk, cpu_online_mask))
-+			printk(KERN_INFO "pds: cpu #%d affinity check mask - others 0x%08lx",
-+			       cpu, (chk++)->bits[0]);
-+
-+		per_cpu(sched_cpu_affinity_chk_end_masks, cpu) = chk;
-+	}
-+}
-+#endif
-+
-+void __init sched_init_smp(void)
-+{
-+	/* Move init over to a non-isolated CPU */
-+	if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0)
-+		BUG();
-+
-+	cpumask_copy(&sched_rq_queued_masks[SCHED_RQ_EMPTY], cpu_online_mask);
-+
-+	sched_init_topology_cpumask();
-+
-+	sched_smp_initialized = true;
-+}
-+#else
-+void __init sched_init_smp(void)
-+{
-+}
-+#endif /* CONFIG_SMP */
-+
-+int in_sched_functions(unsigned long addr)
-+{
-+	return in_lock_functions(addr) ||
-+		(addr >= (unsigned long)__sched_text_start
-+		&& addr < (unsigned long)__sched_text_end);
-+}
-+
-+#ifdef CONFIG_CGROUP_SCHED
-+/* task group related information */
-+struct task_group {
-+	struct cgroup_subsys_state css;
-+
-+	struct rcu_head rcu;
-+	struct list_head list;
-+
-+	struct task_group *parent;
-+	struct list_head siblings;
-+	struct list_head children;
-+};
-+
-+/*
-+ * Default task group.
-+ * Every task in system belongs to this group at bootup.
-+ */
-+struct task_group root_task_group;
-+LIST_HEAD(task_groups);
-+
-+/* Cacheline aligned slab cache for task_group */
-+static struct kmem_cache *task_group_cache __read_mostly;
-+#endif /* CONFIG_CGROUP_SCHED */
-+
-+void __init sched_init(void)
-+{
-+	int i;
-+	struct rq *rq;
-+
-+	print_scheduler_version();
-+
-+	wait_bit_init();
-+
-+#ifdef CONFIG_SMP
-+	for (i = 0; i < NR_SCHED_RQ_QUEUED_LEVEL; i++)
-+		cpumask_clear(&sched_rq_queued_masks[i]);
-+	cpumask_setall(&sched_rq_queued_masks[SCHED_RQ_EMPTY]);
-+	set_bit(SCHED_RQ_EMPTY, sched_rq_queued_masks_bitmap);
-+
-+	cpumask_setall(&sched_rq_pending_masks[SCHED_RQ_EMPTY]);
-+	set_bit(SCHED_RQ_EMPTY, sched_rq_pending_masks_bitmap);
-+#else
-+	uprq = &per_cpu(runqueues, 0);
-+#endif
-+
-+#ifdef CONFIG_CGROUP_SCHED
-+	task_group_cache = KMEM_CACHE(task_group, 0);
-+
-+	list_add(&root_task_group.list, &task_groups);
-+	INIT_LIST_HEAD(&root_task_group.children);
-+	INIT_LIST_HEAD(&root_task_group.siblings);
-+#endif /* CONFIG_CGROUP_SCHED */
-+	for_each_possible_cpu(i) {
-+		rq = cpu_rq(i);
-+		FULL_INIT_SKIPLIST_NODE(&rq->sl_header);
-+		raw_spin_lock_init(&rq->lock);
-+		rq->dither = 0;
-+		rq->nr_running = rq->nr_uninterruptible = 0;
-+		rq->calc_load_active = 0;
-+		rq->calc_load_update = jiffies + LOAD_FREQ;
-+#ifdef CONFIG_SMP
-+		rq->online = false;
-+		rq->cpu = i;
-+
-+		rq->queued_level = SCHED_RQ_EMPTY;
-+		rq->pending_level = SCHED_RQ_EMPTY;
-+#ifdef CONFIG_SCHED_SMT
-+		per_cpu(sched_sibling_cpu, i) = i;
-+		rq->active_balance = 0;
-+#endif
-+#endif
-+		rq->nr_switches = 0;
-+		atomic_set(&rq->nr_iowait, 0);
-+		hrtick_rq_init(rq);
-+	}
-+#ifdef CONFIG_SMP
-+	/* Set rq->online for cpu 0 */
-+	cpu_rq(0)->online = true;
-+#endif
-+
-+	/*
-+	 * The boot idle thread does lazy MMU switching as well:
-+	 */
-+	mmgrab(&init_mm);
-+	enter_lazy_tlb(&init_mm, current);
-+
-+	/*
-+	 * Make us the idle thread. Technically, schedule() should not be
-+	 * called from this thread, however somewhere below it might be,
-+	 * but because we are the idle thread, we just pick up running again
-+	 * when this runqueue becomes "idle".
-+	 */
-+	init_idle(current, smp_processor_id());
-+
-+	calc_load_update = jiffies + LOAD_FREQ;
-+
-+#ifdef CONFIG_SMP
-+	idle_thread_set_boot_cpu();
-+
-+	sched_init_topology_cpumask_early();
-+#endif /* SMP */
-+
-+	init_schedstats();
-+
-+	psi_init();
-+}
-+
-+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-+static inline int preempt_count_equals(int preempt_offset)
-+{
-+	int nested = preempt_count() + rcu_preempt_depth();
-+
-+	return (nested == preempt_offset);
-+}
-+
-+void __might_sleep(const char *file, int line, int preempt_offset)
-+{
-+	/*
-+	 * Blocking primitives will set (and therefore destroy) current->state,
-+	 * since we will exit with TASK_RUNNING make sure we enter with it,
-+	 * otherwise we will destroy state.
-+	 */
-+	WARN_ONCE(current->state != TASK_RUNNING && current->task_state_change,
-+			"do not call blocking ops when !TASK_RUNNING; "
-+			"state=%lx set at [<%p>] %pS\n",
-+			current->state,
-+			(void *)current->task_state_change,
-+			(void *)current->task_state_change);
-+
-+	___might_sleep(file, line, preempt_offset);
-+}
-+EXPORT_SYMBOL(__might_sleep);
-+
-+void ___might_sleep(const char *file, int line, int preempt_offset)
-+{
-+	/* Ratelimiting timestamp: */
-+	static unsigned long prev_jiffy;
-+
-+	unsigned long preempt_disable_ip;
-+
-+	/* WARN_ON_ONCE() by default, no rate limit required: */
-+	rcu_sleep_check();
-+
-+	if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
-+	     !is_idle_task(current) && !current->non_block_count) ||
-+	    system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING ||
-+	    oops_in_progress)
-+		return;
-+	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
-+		return;
-+	prev_jiffy = jiffies;
-+
-+	/* Save this before calling printk(), since that will clobber it: */
-+	preempt_disable_ip = get_preempt_disable_ip(current);
-+
-+	printk(KERN_ERR
-+		"BUG: sleeping function called from invalid context at %s:%d\n",
-+			file, line);
-+	printk(KERN_ERR
-+		"in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n",
-+			in_atomic(), irqs_disabled(), current->non_block_count,
-+			current->pid, current->comm);
-+
-+	if (task_stack_end_corrupted(current))
-+		printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");
-+
-+	debug_show_held_locks(current);
-+	if (irqs_disabled())
-+		print_irqtrace_events(current);
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	if (!preempt_count_equals(preempt_offset)) {
-+		pr_err("Preemption disabled at:");
-+		print_ip_sym(KERN_ERR, preempt_disable_ip);
-+	}
-+#endif
-+	dump_stack();
-+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+}
-+EXPORT_SYMBOL(___might_sleep);
-+
-+void __cant_sleep(const char *file, int line, int preempt_offset)
-+{
-+	static unsigned long prev_jiffy;
-+
-+	if (irqs_disabled())
-+		return;
-+
-+	if (!IS_ENABLED(CONFIG_PREEMPT_COUNT))
-+		return;
-+
-+	if (preempt_count() > preempt_offset)
-+		return;
-+
-+	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
-+		return;
-+	prev_jiffy = jiffies;
-+
-+	printk(KERN_ERR "BUG: assuming atomic context at %s:%d\n", file, line);
-+	printk(KERN_ERR "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
-+			in_atomic(), irqs_disabled(),
-+			current->pid, current->comm);
-+
-+	debug_show_held_locks(current);
-+	dump_stack();
-+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+}
-+EXPORT_SYMBOL_GPL(__cant_sleep);
-+#endif
-+
-+#ifdef CONFIG_MAGIC_SYSRQ
-+void normalize_rt_tasks(void)
-+{
-+	struct task_struct *g, *p;
-+	struct sched_attr attr = {
-+		.sched_policy = SCHED_NORMAL,
-+	};
-+
-+	read_lock(&tasklist_lock);
-+	for_each_process_thread(g, p) {
-+		/*
-+		 * Only normalize user tasks:
-+		 */
-+		if (p->flags & PF_KTHREAD)
-+			continue;
-+
-+		if (!rt_task(p)) {
-+			/*
-+			 * Renice negative nice level userspace
-+			 * tasks back to 0:
-+			 */
-+			if (task_nice(p) < 0)
-+				set_user_nice(p, 0);
-+			continue;
-+		}
-+
-+		__sched_setscheduler(p, &attr, false, false);
-+	}
-+	read_unlock(&tasklist_lock);
-+}
-+#endif /* CONFIG_MAGIC_SYSRQ */
-+
-+#if defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB)
-+/*
-+ * These functions are only useful for the IA64 MCA handling, or kdb.
-+ *
-+ * They can only be called when the whole system has been
-+ * stopped - every CPU needs to be quiescent, and no scheduling
-+ * activity can take place. Using them for anything else would
-+ * be a serious bug, and as a result, they aren't even visible
-+ * under any other configuration.
-+ */
-+
-+/**
-+ * curr_task - return the current task for a given CPU.
-+ * @cpu: the processor in question.
-+ *
-+ * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
-+ *
-+ * Return: The current task for @cpu.
-+ */
-+struct task_struct *curr_task(int cpu)
-+{
-+	return cpu_curr(cpu);
-+}
-+
-+#endif /* defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB) */
-+
-+#ifdef CONFIG_IA64
-+/**
-+ * ia64_set_curr_task - set the current task for a given CPU.
-+ * @cpu: the processor in question.
-+ * @p: the task pointer to set.
-+ *
-+ * Description: This function must only be used when non-maskable interrupts
-+ * are serviced on a separate stack.  It allows the architecture to switch the
-+ * notion of the current task on a CPU in a non-blocking manner.  This function
-+ * must be called with all CPU's synchronised, and interrupts disabled, the
-+ * and caller must save the original value of the current task (see
-+ * curr_task() above) and restore that value before reenabling interrupts and
-+ * re-starting the system.
-+ *
-+ * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
-+ */
-+void ia64_set_curr_task(int cpu, struct task_struct *p)
-+{
-+	cpu_curr(cpu) = p;
-+}
-+
-+#endif
-+
-+#ifdef CONFIG_SCHED_DEBUG
-+void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
-+			  struct seq_file *m)
-+{}
-+
-+void proc_sched_set_task(struct task_struct *p)
-+{}
-+#endif
-+
-+#ifdef CONFIG_CGROUP_SCHED
-+static void sched_free_group(struct task_group *tg)
-+{
-+	kmem_cache_free(task_group_cache, tg);
-+}
-+
-+/* allocate runqueue etc for a new task group */
-+struct task_group *sched_create_group(struct task_group *parent)
-+{
-+	struct task_group *tg;
-+
-+	tg = kmem_cache_alloc(task_group_cache, GFP_KERNEL | __GFP_ZERO);
-+	if (!tg)
-+		return ERR_PTR(-ENOMEM);
-+
-+	return tg;
-+}
-+
-+void sched_online_group(struct task_group *tg, struct task_group *parent)
-+{
-+}
-+
-+/* rcu callback to free various structures associated with a task group */
-+static void sched_free_group_rcu(struct rcu_head *rhp)
-+{
-+	/* Now it should be safe to free those cfs_rqs */
-+	sched_free_group(container_of(rhp, struct task_group, rcu));
-+}
-+
-+void sched_destroy_group(struct task_group *tg)
-+{
-+	/* Wait for possible concurrent references to cfs_rqs complete */
-+	call_rcu(&tg->rcu, sched_free_group_rcu);
-+}
-+
-+void sched_offline_group(struct task_group *tg)
-+{
-+}
-+
-+static inline struct task_group *css_tg(struct cgroup_subsys_state *css)
-+{
-+	return css ? container_of(css, struct task_group, css) : NULL;
-+}
-+
-+static struct cgroup_subsys_state *
-+cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
-+{
-+	struct task_group *parent = css_tg(parent_css);
-+	struct task_group *tg;
-+
-+	if (!parent) {
-+		/* This is early initialization for the top cgroup */
-+		return &root_task_group.css;
-+	}
-+
-+	tg = sched_create_group(parent);
-+	if (IS_ERR(tg))
-+		return ERR_PTR(-ENOMEM);
-+	return &tg->css;
-+}
-+
-+/* Expose task group only after completing cgroup initialization */
-+static int cpu_cgroup_css_online(struct cgroup_subsys_state *css)
-+{
-+	struct task_group *tg = css_tg(css);
-+	struct task_group *parent = css_tg(css->parent);
-+
-+	if (parent)
-+		sched_online_group(tg, parent);
-+	return 0;
-+}
-+
-+static void cpu_cgroup_css_released(struct cgroup_subsys_state *css)
-+{
-+	struct task_group *tg = css_tg(css);
-+
-+	sched_offline_group(tg);
-+}
-+
-+static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
-+{
-+	struct task_group *tg = css_tg(css);
-+
-+	/*
-+	 * Relies on the RCU grace period between css_released() and this.
-+	 */
-+	sched_free_group(tg);
-+}
-+
-+static void cpu_cgroup_fork(struct task_struct *task)
-+{
-+}
-+
-+static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
-+{
-+	return 0;
-+}
-+
-+static void cpu_cgroup_attach(struct cgroup_taskset *tset)
-+{
-+}
-+
-+static struct cftype cpu_legacy_files[] = {
-+	{ }	/* Terminate */
-+};
-+
-+static struct cftype cpu_files[] = {
-+	{ }	/* terminate */
-+};
-+
-+static int cpu_extra_stat_show(struct seq_file *sf,
-+			       struct cgroup_subsys_state *css)
-+{
-+	return 0;
-+}
-+
-+struct cgroup_subsys cpu_cgrp_subsys = {
-+	.css_alloc	= cpu_cgroup_css_alloc,
-+	.css_online	= cpu_cgroup_css_online,
-+	.css_released	= cpu_cgroup_css_released,
-+	.css_free	= cpu_cgroup_css_free,
-+	.css_extra_stat_show = cpu_extra_stat_show,
-+	.fork		= cpu_cgroup_fork,
-+	.can_attach	= cpu_cgroup_can_attach,
-+	.attach		= cpu_cgroup_attach,
-+	.legacy_cftypes	= cpu_files,
-+	.legacy_cftypes	= cpu_legacy_files,
-+	.dfl_cftypes	= cpu_files,
-+	.early_init	= true,
-+	.threaded	= true,
-+};
-+#endif	/* CONFIG_CGROUP_SCHED */
-+
-+#undef CREATE_TRACE_POINTS
-diff --git a/kernel/sched/pds_sched.h b/kernel/sched/pds_sched.h
-new file mode 100644
-index 000000000000..6c3361f06087
---- /dev/null
-+++ b/kernel/sched/pds_sched.h
-@@ -0,0 +1,577 @@
-+#ifndef PDS_SCHED_H
-+#define PDS_SCHED_H
-+
-+#include <linux/sched.h>
-+
-+#include <linux/sched/clock.h>
-+#include <linux/sched/cpufreq.h>
-+#include <linux/sched/cputime.h>
-+#include <linux/sched/debug.h>
-+#include <linux/sched/init.h>
-+#include <linux/sched/isolation.h>
-+#include <linux/sched/loadavg.h>
-+#include <linux/sched/mm.h>
-+#include <linux/sched/nohz.h>
-+#include <linux/sched/signal.h>
-+#include <linux/sched/stat.h>
-+#include <linux/sched/sysctl.h>
-+#include <linux/sched/task.h>
-+#include <linux/sched/topology.h>
-+#include <linux/sched/wake_q.h>
-+
-+#include <uapi/linux/sched/types.h>
-+
-+#include <linux/cgroup.h>
-+#include <linux/cpufreq.h>
-+#include <linux/cpuidle.h>
-+#include <linux/cpuset.h>
-+#include <linux/ctype.h>
-+#include <linux/kthread.h>
-+#include <linux/livepatch.h>
-+#include <linux/membarrier.h>
-+#include <linux/proc_fs.h>
-+#include <linux/psi.h>
-+#include <linux/slab.h>
-+#include <linux/stop_machine.h>
-+#include <linux/suspend.h>
-+#include <linux/swait.h>
-+#include <linux/syscalls.h>
-+#include <linux/tsacct_kern.h>
-+
-+#include <asm/tlb.h>
-+
-+#ifdef CONFIG_PARAVIRT
-+# include <asm/paravirt.h>
-+#endif
-+
-+#include "cpupri.h"
-+
-+/* task_struct::on_rq states: */
-+#define TASK_ON_RQ_QUEUED	1
-+#define TASK_ON_RQ_MIGRATING	2
-+
-+static inline int task_on_rq_queued(struct task_struct *p)
-+{
-+	return p->on_rq == TASK_ON_RQ_QUEUED;
-+}
-+
-+static inline int task_on_rq_migrating(struct task_struct *p)
-+{
-+	return READ_ONCE(p->on_rq) == TASK_ON_RQ_MIGRATING;
-+}
-+
-+/*
-+ * wake flags
-+ */
-+#define WF_SYNC		0x01		/* waker goes to sleep after wakeup */
-+#define WF_FORK		0x02		/* child wakeup after fork */
-+#define WF_MIGRATED	0x04		/* internal use, task got migrated */
-+
-+/*
-+ * rq::clock_update_flags bits
-+ */
-+#define RQCF_REQ_SKIP		0x01
-+#define RQCF_ACT_SKIP		0x02
-+#define RQCF_UPDATED		0x04
-+
-+/*
-+ * This is the main, per-CPU runqueue data structure.
-+ * This data should only be modified by the local cpu.
-+ */
-+struct rq {
-+	/* runqueue lock: */
-+	raw_spinlock_t lock;
-+
-+	struct task_struct __rcu *curr;
-+	struct task_struct *idle, *stop;
-+	struct mm_struct *prev_mm;
-+
-+	struct skiplist_node sl_header;
-+
-+	/* switch count */
-+	u64 nr_switches;
-+
-+	atomic_t nr_iowait;
-+
-+#ifdef CONFIG_MEMBARRIER
-+	int membarrier_state;
-+#endif
-+
-+#ifdef CONFIG_SMP
-+	int cpu;		/* cpu of this runqueue */
-+	bool online;
-+	unsigned int		ttwu_pending;
-+	unsigned int		clock_update_flags;
-+
-+#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
-+	struct sched_avg	avg_irq;
-+#endif
-+#ifdef CONFIG_SCHED_THERMAL_PRESSURE
-+	struct sched_avg	avg_thermal;
-+#endif
-+
-+	unsigned long queued_level;
-+	unsigned long pending_level;
-+
-+#ifdef CONFIG_SCHED_SMT
-+	int active_balance;
-+	struct cpu_stop_work active_balance_work;
-+#endif
-+#endif /* CONFIG_SMP */
-+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-+	u64 prev_irq_time;
-+#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
-+#ifdef CONFIG_PARAVIRT
-+	u64 prev_steal_time;
-+#endif /* CONFIG_PARAVIRT */
-+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
-+	u64 prev_steal_time_rq;
-+#endif /* CONFIG_PARAVIRT_TIME_ACCOUNTING */
-+
-+	/* calc_load related fields */
-+	unsigned long calc_load_update;
-+	long calc_load_active;
-+
-+	u64 clock, last_tick;
-+	u64 clock_task;
-+	int dither;
-+
-+	unsigned long nr_running;
-+	unsigned long nr_uninterruptible;
-+
-+#ifdef CONFIG_SCHED_HRTICK
-+#ifdef CONFIG_SMP
-+	call_single_data_t hrtick_csd;
-+#endif
-+	struct hrtimer hrtick_timer;
-+#endif
-+
-+#ifdef CONFIG_SCHEDSTATS
-+
-+	/* latency stats */
-+	struct sched_info rq_sched_info;
-+	unsigned long long rq_cpu_time;
-+	/* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
-+
-+	/* sys_sched_yield() stats */
-+	unsigned int yld_count;
-+
-+	/* schedule() stats */
-+	unsigned int sched_switch;
-+	unsigned int sched_count;
-+	unsigned int sched_goidle;
-+
-+	/* try_to_wake_up() stats */
-+	unsigned int ttwu_count;
-+	unsigned int ttwu_local;
-+#endif /* CONFIG_SCHEDSTATS */
-+#ifdef CONFIG_CPU_IDLE
-+	/* Must be inspected within a rcu lock section */
-+	struct cpuidle_state *idle_state;
-+#endif
-+};
-+
-+#define task_contributes_to_load(task)	((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
-+					 (task->flags & PF_FROZEN) == 0 && \
-+					 (task->state & TASK_NOLOAD) == 0)
-+
-+extern unsigned long calc_load_update;
-+extern atomic_long_t calc_load_tasks;
-+
-+extern void calc_global_load_tick(struct rq *this_rq);
-+extern long calc_load_fold_active(struct rq *this_rq, long adjust);
-+
-+#ifndef CONFIG_SMP
-+extern struct rq *uprq;
-+#define cpu_rq(cpu)	(uprq)
-+#define this_rq()	(uprq)
-+#define raw_rq()	(uprq)
-+#define task_rq(p)	(uprq)
-+#define cpu_curr(cpu)	((uprq)->curr)
-+#else /* CONFIG_SMP */
-+DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
-+#define cpu_rq(cpu)		(&per_cpu(runqueues, (cpu)))
-+#define this_rq()		this_cpu_ptr(&runqueues)
-+#define raw_rq()		raw_cpu_ptr(&runqueues)
-+#define task_rq(p)		cpu_rq(task_cpu(p))
-+#define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
-+
-+#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
-+void register_sched_domain_sysctl(void);
-+void unregister_sched_domain_sysctl(void);
-+#else
-+static inline void register_sched_domain_sysctl(void)
-+{
-+}
-+static inline void unregister_sched_domain_sysctl(void)
-+{
-+}
-+#endif
-+
-+#endif /* CONFIG_SMP */
-+
-+#ifndef arch_scale_freq_tick
-+static __always_inline
-+void arch_scale_freq_tick(void)
-+{
-+}
-+#endif
-+
-+#ifndef arch_scale_freq_capacity
-+static __always_inline
-+unsigned long arch_scale_freq_capacity(int cpu)
-+{
-+		return SCHED_CAPACITY_SCALE;
-+}
-+#endif
-+
-+static inline u64 __rq_clock_broken(struct rq *rq)
-+{
-+	return READ_ONCE(rq->clock);
-+}
-+
-+static inline u64 rq_clock(struct rq *rq)
-+{
-+	/*
-+	 * Relax lockdep_assert_held() checking as in VRQ, call to
-+	 * sched_info_xxxx() may not held rq->lock
-+	 * lockdep_assert_held(&rq->lock);
-+	 */
-+	return rq->clock;
-+}
-+
-+static inline u64 rq_clock_task(struct rq *rq)
-+{
-+	/*
-+	 * Relax lockdep_assert_held() checking as in VRQ, call to
-+	 * sched_info_xxxx() may not held rq->lock
-+	 * lockdep_assert_held(&rq->lock);
-+	 */
-+	return rq->clock_task;
-+}
-+
-+/**
-+ * By default the decay is the default pelt decay period.
-+ * The decay shift can change the decay period in
-+ * multiples of 32.
-+ *  Decay shift		Decay period(ms)
-+ *	0			32
-+ *	1			64
-+ *	2			128
-+ *	3			256
-+ *	4			512
-+ */
-+extern int sched_thermal_decay_shift;
-+
-+static inline u64 rq_clock_thermal(struct rq *rq)
-+{
-+	return rq_clock_task(rq) >> sched_thermal_decay_shift;
-+}
-+
-+/*
-+ * {de,en}queue flags:
-+ *
-+ * DEQUEUE_SLEEP  - task is no longer runnable
-+ * ENQUEUE_WAKEUP - task just became runnable
-+ *
-+ */
-+
-+#define DEQUEUE_SLEEP		0x01
-+
-+#define ENQUEUE_WAKEUP		0x01
-+
-+
-+/*
-+ * Below are scheduler API which using in other kernel code
-+ * It use the dummy rq_flags
-+ * ToDo : PDS need to support these APIs for compatibility with mainline
-+ * scheduler code.
-+ */
-+struct rq_flags {
-+	unsigned long flags;
-+	struct pin_cookie cookie;
-+	unsigned int		clock_update_flags;
-+};
-+
-+struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
-+	__acquires(rq->lock);
-+
-+struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
-+	__acquires(p->pi_lock)
-+	__acquires(rq->lock);
-+
-+static inline void __task_rq_unlock(struct rq *rq, struct rq_flags *rf)
-+	__releases(rq->lock)
-+{
-+	raw_spin_unlock(&rq->lock);
-+}
-+
-+static inline void rq_pin_lock(struct rq *rq, struct rq_flags *rf)
-+{
-+	rf->cookie = lockdep_pin_lock(&rq->lock);
-+
-+#ifdef CONFIG_SCHED_DEBUG
-+	rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
-+	rf->clock_update_flags = 0;
-+#endif
-+}
-+
-+static inline void rq_unpin_lock(struct rq *rq, struct rq_flags *rf)
-+{
-+#ifdef CONFIG_SCHED_DEBUG
-+	if (rq->clock_update_flags > RQCF_ACT_SKIP)
-+		rf->clock_update_flags = RQCF_UPDATED;
-+#endif
-+
-+	lockdep_unpin_lock(&rq->lock, rf->cookie);
-+}
-+
-+static inline void
-+task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
-+	__releases(rq->lock)
-+	__releases(p->pi_lock)
-+{
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
-+}
-+
-+static inline void
-+rq_lock_irqsave(struct rq *rq, struct rq_flags *rf)
-+	__acquires(rq->lock)
-+{
-+	raw_spin_lock_irqsave(&rq->lock, rf->flags);
-+	rq_pin_lock(rq, rf);
-+}
-+
-+static inline void
-+rq_unlock_irqrestore(struct rq *rq, struct rq_flags *rf)
-+	__releases(rq->lock)
-+{
-+	rq_unpin_lock(rq, rf);
-+	raw_spin_unlock_irqrestore(&rq->lock, rf->flags);
-+}
-+
-+static inline void
-+rq_unlock_irq(struct rq *rq, struct rq_flags *rf)
-+	__releases(rq->lock)
-+{
-+	raw_spin_unlock_irq(&rq->lock);
-+}
-+
-+static inline void
-+rq_unlock(struct rq *rq, struct rq_flags *rf)
-+	__releases(rq->lock)
-+{
-+	rq_unpin_lock(rq, rf);
-+	raw_spin_unlock(&rq->lock);
-+}
-+
-+static inline struct rq *
-+this_rq_lock_irq(struct rq_flags *rf)
-+	__acquires(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	local_irq_disable();
-+	rq = this_rq();
-+	raw_spin_lock(&rq->lock);
-+
-+	return rq;
-+}
-+
-+static inline int task_current(struct rq *rq, struct task_struct *p)
-+{
-+	return rq->curr == p;
-+}
-+
-+static inline bool task_running(struct task_struct *p)
-+{
-+	return p->on_cpu;
-+}
-+
-+extern struct static_key_false sched_schedstats;
-+
-+extern void flush_smp_call_function_from_idle(void);
-+
-+#ifdef CONFIG_CPU_IDLE
-+static inline void idle_set_state(struct rq *rq,
-+				  struct cpuidle_state *idle_state)
-+{
-+	rq->idle_state = idle_state;
-+}
-+
-+static inline struct cpuidle_state *idle_get_state(struct rq *rq)
-+{
-+	WARN_ON(!rcu_read_lock_held());
-+	return rq->idle_state;
-+}
-+#else
-+static inline void idle_set_state(struct rq *rq,
-+				  struct cpuidle_state *idle_state)
-+{
-+}
-+
-+static inline struct cpuidle_state *idle_get_state(struct rq *rq)
-+{
-+	return NULL;
-+}
-+#endif
-+
-+static inline int cpu_of(const struct rq *rq)
-+{
-+#ifdef CONFIG_SMP
-+	return rq->cpu;
-+#else
-+	return 0;
-+#endif
-+}
-+
-+#include "stats.h"
-+
-+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-+struct irqtime {
-+	u64			total;
-+	u64			tick_delta;
-+	u64			irq_start_time;
-+	struct u64_stats_sync	sync;
-+};
-+
-+DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
-+
-+/*
-+ * Returns the irqtime minus the softirq time computed by ksoftirqd.
-+ * Otherwise ksoftirqd's sum_exec_runtime is substracted its own runtime
-+ * and never move forward.
-+ */
-+static inline u64 irq_time_read(int cpu)
-+{
-+	struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu);
-+	unsigned int seq;
-+	u64 total;
-+
-+	do {
-+		seq = __u64_stats_fetch_begin(&irqtime->sync);
-+		total = irqtime->total;
-+	} while (__u64_stats_fetch_retry(&irqtime->sync, seq));
-+
-+	return total;
-+}
-+#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
-+
-+#ifdef CONFIG_CPU_FREQ
-+DECLARE_PER_CPU(struct update_util_data __rcu *, cpufreq_update_util_data);
-+
-+/**
-+ * cpufreq_update_util - Take a note about CPU utilization changes.
-+ * @rq: Runqueue to carry out the update for.
-+ * @flags: Update reason flags.
-+ *
-+ * This function is called by the scheduler on the CPU whose utilization is
-+ * being updated.
-+ *
-+ * It can only be called from RCU-sched read-side critical sections.
-+ *
-+ * The way cpufreq is currently arranged requires it to evaluate the CPU
-+ * performance state (frequency/voltage) on a regular basis to prevent it from
-+ * being stuck in a completely inadequate performance level for too long.
-+ * That is not guaranteed to happen if the updates are only triggered from CFS
-+ * and DL, though, because they may not be coming in if only RT tasks are
-+ * active all the time (or there are RT tasks only).
-+ *
-+ * As a workaround for that issue, this function is called periodically by the
-+ * RT sched class to trigger extra cpufreq updates to prevent it from stalling,
-+ * but that really is a band-aid.  Going forward it should be replaced with
-+ * solutions targeted more specifically at RT tasks.
-+ */
-+static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
-+{
-+	struct update_util_data *data;
-+
-+	data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data));
-+	if (data)
-+		data->func(data, rq_clock(rq), flags);
-+}
-+
-+static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags)
-+{
-+	if (cpu_of(rq) == smp_processor_id())
-+		cpufreq_update_util(rq, flags);
-+}
-+#else
-+static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
-+static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags) {}
-+#endif /* CONFIG_CPU_FREQ */
-+
-+#ifdef CONFIG_NO_HZ_FULL
-+extern int __init sched_tick_offload_init(void);
-+#else
-+static inline int sched_tick_offload_init(void) { return 0; }
-+#endif
-+
-+#ifdef arch_scale_freq_capacity
-+#ifndef arch_scale_freq_invariant
-+#define arch_scale_freq_invariant()	(true)
-+#endif
-+#else /* arch_scale_freq_capacity */
-+#define arch_scale_freq_invariant()	(false)
-+#endif
-+
-+extern void schedule_idle(void);
-+
-+/*
-+ * !! For sched_setattr_nocheck() (kernel) only !!
-+ *
-+ * This is actually gross. :(
-+ *
-+ * It is used to make schedutil kworker(s) higher priority than SCHED_DEADLINE
-+ * tasks, but still be able to sleep. We need this on platforms that cannot
-+ * atomically change clock frequency. Remove once fast switching will be
-+ * available on such platforms.
-+ *
-+ * SUGOV stands for SchedUtil GOVernor.
-+ */
-+#define SCHED_FLAG_SUGOV	0x10000000
-+
-+#ifdef CONFIG_MEMBARRIER
-+/*
-+ * The scheduler provides memory barriers required by membarrier between:
-+ * - prior user-space memory accesses and store to rq->membarrier_state,
-+ * - store to rq->membarrier_state and following user-space memory accesses.
-+ * In the same way it provides those guarantees around store to rq->curr.
-+ */
-+static inline void membarrier_switch_mm(struct rq *rq,
-+					struct mm_struct *prev_mm,
-+					struct mm_struct *next_mm)
-+{
-+	int membarrier_state;
-+
-+	if (prev_mm == next_mm)
-+		return;
-+
-+	membarrier_state = atomic_read(&next_mm->membarrier_state);
-+	if (READ_ONCE(rq->membarrier_state) == membarrier_state)
-+		return;
-+
-+	WRITE_ONCE(rq->membarrier_state, membarrier_state);
-+}
-+#else
-+static inline void membarrier_switch_mm(struct rq *rq,
-+					struct mm_struct *prev_mm,
-+					struct mm_struct *next_mm)
-+{
-+}
-+#endif
-+
-+#ifdef CONFIG_NUMA
-+extern int sched_numa_find_closest(const struct cpumask *cpus, int cpu);
-+#else
-+static inline int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
-+{
-+	return nr_cpu_ids;
-+}
-+#endif
-+
-+void swake_up_all_locked(struct swait_queue_head *q);
-+void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
-+
-+#endif /* PDS_SCHED_H */
-diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c
-index b647d04d9c8b..05b6cfd91842 100644
---- a/kernel/sched/pelt.c
-+++ b/kernel/sched/pelt.c
-@@ -250,6 +250,7 @@ ___update_load_avg(struct sched_avg *sa, unsigned long load)
- 	WRITE_ONCE(sa->util_avg, sa->util_sum / divider);
- }
- 
-+#ifndef CONFIG_SCHED_PDS
- /*
-  * sched_entity:
-  *
-@@ -367,6 +368,7 @@ int update_dl_rq_load_avg(u64 now, struct rq *rq, int running)
- 
- 	return 0;
- }
-+#endif
- 
- #ifdef CONFIG_SCHED_THERMAL_PRESSURE
- /*
-diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h
-index eb034d9f024d..a074572f2976 100644
---- a/kernel/sched/pelt.h
-+++ b/kernel/sched/pelt.h
-@@ -1,11 +1,13 @@
- #ifdef CONFIG_SMP
- #include "sched-pelt.h"
- 
-+#ifndef CONFIG_SCHED_PDS
- int __update_load_avg_blocked_se(u64 now, struct sched_entity *se);
- int __update_load_avg_se(u64 now, struct cfs_rq *cfs_rq, struct sched_entity *se);
- int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq);
- int update_rt_rq_load_avg(u64 now, struct rq *rq, int running);
- int update_dl_rq_load_avg(u64 now, struct rq *rq, int running);
-+#endif
- 
- #ifdef CONFIG_SCHED_THERMAL_PRESSURE
- int update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity);
-@@ -37,6 +39,7 @@ update_irq_load_avg(struct rq *rq, u64 running)
- }
- #endif
- 
-+#ifndef CONFIG_SCHED_PDS
- /*
-  * When a task is dequeued, its estimated utilization should not be update if
-  * its util_avg has not been updated at least once.
-@@ -157,9 +160,11 @@ static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq)
- 	return rq_clock_pelt(rq_of(cfs_rq));
- }
- #endif
-+#endif /* CONFIG_SCHED_PDS */
- 
- #else
- 
-+#ifndef CONFIG_SCHED_PDS
- static inline int
- update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
- {
-@@ -188,6 +193,7 @@ static inline u64 thermal_load_avg(struct rq *rq)
- {
- 	return 0;
- }
-+#endif
- 
- static inline int
- update_irq_load_avg(struct rq *rq, u64 running)
-diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
-index db3a57675ccf..5a8060bd2343 100644
---- a/kernel/sched/sched.h
-+++ b/kernel/sched/sched.h
-@@ -2,6 +2,10 @@
- /*
-  * Scheduler internal types and methods:
-  */
-+#ifdef CONFIG_SCHED_PDS
-+#include "pds_sched.h"
-+#else
-+
- #include <linux/sched.h>
- 
- #include <linux/sched/autogroup.h>
-@@ -2546,3 +2550,5 @@ static inline bool is_per_cpu_kthread(struct task_struct *p)
- 
- void swake_up_all_locked(struct swait_queue_head *q);
- void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
-+
-+#endif /* !CONFIG_SCHED_PDS */
-diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c
-index 750fb3c67eed..45bd43942575 100644
---- a/kernel/sched/stats.c
-+++ b/kernel/sched/stats.c
-@@ -22,8 +22,10 @@ static int show_schedstat(struct seq_file *seq, void *v)
- 	} else {
- 		struct rq *rq;
- #ifdef CONFIG_SMP
-+#ifndef CONFIG_SCHED_PDS
- 		struct sched_domain *sd;
- 		int dcount = 0;
-+#endif
- #endif
- 		cpu = (unsigned long)(v - 2);
- 		rq = cpu_rq(cpu);
-@@ -40,6 +42,7 @@ static int show_schedstat(struct seq_file *seq, void *v)
- 		seq_printf(seq, "\n");
- 
- #ifdef CONFIG_SMP
-+#ifndef CONFIG_SCHED_PDS
- 		/* domain-specific stats */
- 		rcu_read_lock();
- 		for_each_domain(cpu, sd) {
-@@ -68,6 +71,7 @@ static int show_schedstat(struct seq_file *seq, void *v)
- 			    sd->ttwu_move_balance);
- 		}
- 		rcu_read_unlock();
-+#endif
- #endif
- 	}
- 	return 0;
-diff --git a/kernel/sysctl.c b/kernel/sysctl.c
-index 8a176d8727a3..b9dde576b576 100644
---- a/kernel/sysctl.c
-+++ b/kernel/sysctl.c
-@@ -130,9 +130,13 @@ static int __maybe_unused four = 4;
- static unsigned long zero_ul;
- static unsigned long one_ul = 1;
- static unsigned long long_max = LONG_MAX;
--static int one_hundred = 100;
--static int two_hundred = 200;
--static int one_thousand = 1000;
-+static int __read_mostly one_hundred = 100;
-+static int __read_mostly two_hundred = 200;
-+static int __read_mostly one_thousand = 1000;
-+#ifdef CONFIG_SCHED_PDS
-+extern int rr_interval;
-+extern int sched_yield_type;
-+#endif
- #ifdef CONFIG_PRINTK
- static int ten_thousand = 10000;
- #endif
-@@ -288,7 +292,7 @@ static struct ctl_table sysctl_base_table[] = {
- 	{ }
- };
- 
--#ifdef CONFIG_SCHED_DEBUG
-+#if defined(CONFIG_SCHED_DEBUG) && !defined(CONFIG_SCHED_PDS)
- static int min_sched_granularity_ns = 100000;		/* 100 usecs */
- static int max_sched_granularity_ns = NSEC_PER_SEC;	/* 1 second */
- static int min_wakeup_granularity_ns;			/* 0 usecs */
-@@ -305,6 +309,7 @@ static int max_extfrag_threshold = 1000;
- #endif
- 
- static struct ctl_table kern_table[] = {
-+#ifndef CONFIG_SCHED_PDS
- 	{
- 		.procname	= "sched_child_runs_first",
- 		.data		= &sysctl_sched_child_runs_first,
-@@ -486,6 +491,7 @@ static struct ctl_table kern_table[] = {
- 		.extra2		= SYSCTL_ONE,
- 	},
- #endif
-+#endif /* !CONFIG_SCHED_PDS */
- #ifdef CONFIG_PROVE_LOCKING
- 	{
- 		.procname	= "prove_locking",
-@@ -1049,6 +1055,26 @@ static struct ctl_table kern_table[] = {
- 		.proc_handler	= proc_dointvec,
- 	},
- #endif
-+#ifdef CONFIG_SCHED_PDS
-+	{
-+		.procname	= "rr_interval",
-+		.data		= &rr_interval,
-+		.maxlen		= sizeof (int),
-+		.mode		= 0644,
-+		.proc_handler	= &proc_dointvec_minmax,
-+		.extra1		= SYSCTL_ONE,
-+		.extra2		= &one_thousand,
-+	},
-+	{
-+		.procname	= "yield_type",
-+		.data		= &sched_yield_type,
-+		.maxlen		= sizeof (int),
-+		.mode		= 0644,
-+		.proc_handler	= &proc_dointvec_minmax,
-+		.extra1		= SYSCTL_ZERO,
-+		.extra2		= &two,
-+	},
-+#endif
- #if defined(CONFIG_S390) && defined(CONFIG_SMP)
- 	{
- 		.procname	= "spin_retry",
-diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
-index 2fd3b3fa68bf..6f3b08afdd4c 100644
---- a/kernel/time/posix-cpu-timers.c
-+++ b/kernel/time/posix-cpu-timers.c
-@@ -236,7 +236,7 @@ static void task_sample_cputime(struct task_struct *p, u64 *samples)
- 	u64 stime, utime;
- 
- 	task_cputime(p, &utime, &stime);
--	store_samples(samples, stime, utime, p->se.sum_exec_runtime);
-+	store_samples(samples, stime, utime, tsk_seruntime(p));
- }
- 
- static void proc_sample_cputime_atomic(struct task_cputime_atomic *at,
-@@ -806,6 +806,7 @@ static void collect_posix_cputimers(struct posix_cputimers *pct, u64 *samples,
- 	}
- }
- 
-+#ifndef CONFIG_SCHED_PDS
- static inline void check_dl_overrun(struct task_struct *tsk)
- {
- 	if (tsk->dl.dl_overrun) {
-@@ -813,6 +814,7 @@ static inline void check_dl_overrun(struct task_struct *tsk)
- 		__group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
- 	}
- }
-+#endif
- 
- static bool check_rlimit(u64 time, u64 limit, int signo, bool rt, bool hard)
- {
-@@ -840,8 +842,10 @@ static void check_thread_timers(struct task_struct *tsk,
- 	u64 samples[CPUCLOCK_MAX];
- 	unsigned long soft;
- 
-+#ifndef CONFIG_SCHED_PDS
- 	if (dl_task(tsk))
- 		check_dl_overrun(tsk);
-+#endif
- 
- 	if (expiry_cache_is_inactive(pct))
- 		return;
-@@ -855,7 +859,7 @@ static void check_thread_timers(struct task_struct *tsk,
- 	soft = task_rlimit(tsk, RLIMIT_RTTIME);
- 	if (soft != RLIM_INFINITY) {
- 		/* Task RT timeout is accounted in jiffies. RTTIME is usec */
--		unsigned long rttime = tsk->rt.timeout * (USEC_PER_SEC / HZ);
-+		unsigned long rttime = tsk_rttimeout(tsk) * (USEC_PER_SEC / HZ);
- 		unsigned long hard = task_rlimit_max(tsk, RLIMIT_RTTIME);
- 
- 		/* At the hard limit, send SIGKILL. No further action. */
-@@ -1091,8 +1095,10 @@ static inline bool fastpath_timer_check(struct task_struct *tsk)
- 			return true;
- 	}
- 
-+#ifndef CONFIG_SCHED_PDS
- 	if (dl_task(tsk) && tsk->dl.dl_overrun)
- 		return true;
-+#endif
- 
- 	return false;
- }
-diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
-index b5e3496cf803..0816db0b9c16 100644
---- a/kernel/trace/trace_selftest.c
-+++ b/kernel/trace/trace_selftest.c
-@@ -1048,10 +1048,15 @@ static int trace_wakeup_test_thread(void *data)
- {
- 	/* Make this a -deadline thread */
- 	static const struct sched_attr attr = {
-+#ifdef CONFIG_SCHED_PDS
-+		/* No deadline on BFS, use RR */
-+		.sched_policy = SCHED_RR,
-+#else
- 		.sched_policy = SCHED_DEADLINE,
- 		.sched_runtime = 100000ULL,
- 		.sched_deadline = 10000000ULL,
- 		.sched_period = 10000000ULL
-+#endif
- 	};
- 	struct wakeup_test_data *x = data;
- 
diff --git a/linux58-tkg/linux58-tkg-patches/0006-add-acs-overrides_iommu.patch b/linux58-tkg/linux58-tkg-patches/0006-add-acs-overrides_iommu.patch
deleted file mode 100644
index d1303a5..0000000
--- a/linux58-tkg/linux58-tkg-patches/0006-add-acs-overrides_iommu.patch
+++ /dev/null
@@ -1,193 +0,0 @@
-From cdeab384f48dd9c88e2dff2e9ad8d57dca1a1b1c Mon Sep 17 00:00:00 2001
-From: Mark Weiman <mark.weiman@markzz.com>
-Date: Sun, 12 Aug 2018 11:36:21 -0400
-Subject: [PATCH] pci: Enable overrides for missing ACS capabilities
-
-This an updated version of Alex Williamson's patch from:
-https://lkml.org/lkml/2013/5/30/513
-
-Original commit message follows:
-
-PCIe ACS (Access Control Services) is the PCIe 2.0+ feature that
-allows us to control whether transactions are allowed to be redirected
-in various subnodes of a PCIe topology.  For instance, if two
-endpoints are below a root port or downsteam switch port, the
-downstream port may optionally redirect transactions between the
-devices, bypassing upstream devices.  The same can happen internally
-on multifunction devices.  The transaction may never be visible to the
-upstream devices.
-
-One upstream device that we particularly care about is the IOMMU.  If
-a redirection occurs in the topology below the IOMMU, then the IOMMU
-cannot provide isolation between devices.  This is why the PCIe spec
-encourages topologies to include ACS support.  Without it, we have to
-assume peer-to-peer DMA within a hierarchy can bypass IOMMU isolation.
-
-Unfortunately, far too many topologies do not support ACS to make this
-a steadfast requirement.  Even the latest chipsets from Intel are only
-sporadically supporting ACS.  We have trouble getting interconnect
-vendors to include the PCIe spec required PCIe capability, let alone
-suggested features.
-
-Therefore, we need to add some flexibility.  The pcie_acs_override=
-boot option lets users opt-in specific devices or sets of devices to
-assume ACS support.  The "downstream" option assumes full ACS support
-on root ports and downstream switch ports.  The "multifunction"
-option assumes the subset of ACS features available on multifunction
-endpoints and upstream switch ports are supported.  The "id:nnnn:nnnn"
-option enables ACS support on devices matching the provided vendor
-and device IDs, allowing more strategic ACS overrides.  These options
-may be combined in any order.  A maximum of 16 id specific overrides
-are available.  It's suggested to use the most limited set of options
-necessary to avoid completely disabling ACS across the topology.
-Note to hardware vendors, we have facilities to permanently quirk
-specific devices which enforce isolation but not provide an ACS
-capability.  Please contact me to have your devices added and save
-your customers the hassle of this boot option.
-
-Signed-off-by: Mark Weiman <mark.weiman@markzz.com>
----
- .../admin-guide/kernel-parameters.txt         |   9 ++
- drivers/pci/quirks.c                          | 101 ++++++++++++++++++
- 2 files changed, 110 insertions(+)
-
-diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
-index aefd358a5ca3..173b3596fd9e 100644
---- a/Documentation/admin-guide/kernel-parameters.txt
-+++ b/Documentation/admin-guide/kernel-parameters.txt
-@@ -3190,6 +3190,15 @@
- 		nomsi		[MSI] If the PCI_MSI kernel config parameter is
- 				enabled, this kernel boot option can be used to
- 				disable the use of MSI interrupts system-wide.
-+		pcie_acs_override =
-+					[PCIE] Override missing PCIe ACS support for:
-+				downstream
-+					All downstream ports - full ACS capabilities
-+				multifunction
-+					All multifunction devices - multifunction ACS subset
-+				id:nnnn:nnnn
-+					Specific device - full ACS capabilities
-+					Specified as vid:did (vendor/device ID) in hex
- 		noioapicquirk	[APIC] Disable all boot interrupt quirks.
- 				Safety option to keep boot IRQs enabled. This
- 				should never be necessary.
-diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
-index 4700d24e5d55..8f7a3d7fd9c1 100644
---- a/drivers/pci/quirks.c
-+++ b/drivers/pci/quirks.c
-@@ -3372,6 +3372,106 @@ static void quirk_no_bus_reset(struct pci_dev *dev)
- 	dev->dev_flags |= PCI_DEV_FLAGS_NO_BUS_RESET;
- }
- 
-+static bool acs_on_downstream;
-+static bool acs_on_multifunction;
-+
-+#define NUM_ACS_IDS 16
-+struct acs_on_id {
-+	unsigned short vendor;
-+	unsigned short device;
-+};
-+static struct acs_on_id acs_on_ids[NUM_ACS_IDS];
-+static u8 max_acs_id;
-+
-+static __init int pcie_acs_override_setup(char *p)
-+{
-+	if (!p)
-+		return -EINVAL;
-+
-+	while (*p) {
-+		if (!strncmp(p, "downstream", 10))
-+			acs_on_downstream = true;
-+		if (!strncmp(p, "multifunction", 13))
-+			acs_on_multifunction = true;
-+		if (!strncmp(p, "id:", 3)) {
-+			char opt[5];
-+			int ret;
-+			long val;
-+
-+			if (max_acs_id >= NUM_ACS_IDS - 1) {
-+				pr_warn("Out of PCIe ACS override slots (%d)\n",
-+						NUM_ACS_IDS);
-+				goto next;
-+			}
-+
-+			p += 3;
-+			snprintf(opt, 5, "%s", p);
-+			ret = kstrtol(opt, 16, &val);
-+			if (ret) {
-+				pr_warn("PCIe ACS ID parse error %d\n", ret);
-+				goto next;
-+			}
-+			acs_on_ids[max_acs_id].vendor = val;
-+
-+			p += strcspn(p, ":");
-+			if (*p != ':') {
-+				pr_warn("PCIe ACS invalid ID\n");
-+				goto next;
-+			}
-+
-+			p++;
-+			snprintf(opt, 5, "%s", p);
-+			ret = kstrtol(opt, 16, &val);
-+			if (ret) {
-+				pr_warn("PCIe ACS ID parse error %d\n", ret);
-+				goto next;
-+			}
-+			acs_on_ids[max_acs_id].device = val;
-+			max_acs_id++;
-+		}
-+next:
-+		p += strcspn(p, ",");
-+		if (*p == ',')
-+			p++;
-+	}
-+
-+	if (acs_on_downstream || acs_on_multifunction || max_acs_id)
-+		pr_warn("Warning: PCIe ACS overrides enabled; This may allow non-IOMMU protected peer-to-peer DMA\n");
-+
-+	return 0;
-+}
-+early_param("pcie_acs_override", pcie_acs_override_setup);
-+
-+static int pcie_acs_overrides(struct pci_dev *dev, u16 acs_flags)
-+{
-+	int i;
-+
-+	/* Never override ACS for legacy devices or devices with ACS caps */
-+	if (!pci_is_pcie(dev) ||
-+		pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS))
-+			return -ENOTTY;
-+
-+	for (i = 0; i < max_acs_id; i++)
-+		if (acs_on_ids[i].vendor == dev->vendor &&
-+			acs_on_ids[i].device == dev->device)
-+				return 1;
-+
-+	switch (pci_pcie_type(dev)) {
-+	case PCI_EXP_TYPE_DOWNSTREAM:
-+	case PCI_EXP_TYPE_ROOT_PORT:
-+		if (acs_on_downstream)
-+			return 1;
-+		break;
-+	case PCI_EXP_TYPE_ENDPOINT:
-+	case PCI_EXP_TYPE_UPSTREAM:
-+	case PCI_EXP_TYPE_LEG_END:
-+	case PCI_EXP_TYPE_RC_END:
-+		if (acs_on_multifunction && dev->multifunction)
-+			return 1;
-+	}
-+
-+	return -ENOTTY;
-+}
- /*
-  * Some Atheros AR9xxx and QCA988x chips do not behave after a bus reset.
-  * The device will throw a Link Down error on AER-capable systems and
-@@ -4513,6 +4613,7 @@ static const struct pci_dev_acs_enabled {
- 	{ PCI_VENDOR_ID_ZHAOXIN, 0x9083, pci_quirk_mf_endpoint_acs },
- 	/* Zhaoxin Root/Downstream Ports */
- 	{ PCI_VENDOR_ID_ZHAOXIN, PCI_ANY_ID, pci_quirk_zhaoxin_pcie_ports_acs },
-+ 	{ PCI_ANY_ID, PCI_ANY_ID, pcie_acs_overrides },
- 	{ 0 }
- };
- 
-
diff --git a/linux58-tkg/linux58-tkg-patches/0007-v5.8-fsync.patch b/linux58-tkg/linux58-tkg-patches/0007-v5.8-fsync.patch
deleted file mode 100644
index 01c86d8..0000000
--- a/linux58-tkg/linux58-tkg-patches/0007-v5.8-fsync.patch
+++ /dev/null
@@ -1,908 +0,0 @@
-From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
-From: Tk-Glitch <ti3nou@gmail.com>
-Date: Mon, 20 Apr 2020 14:09:11 +0200
-Subject: Import Fsync v3 patchset - Squashed from https://gitlab.collabora.com/tonyk/linux/-/commits/futex-proton-v3
-
-diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h
-index a89eb0accd5e2ee527be1e3e11b1117ff5bf94b4..580001e89c6caed57dd8b3cb491d65dce846caff 100644
---- a/include/uapi/linux/futex.h
-+++ b/include/uapi/linux/futex.h
-@@ -21,6 +21,7 @@
- #define FUTEX_WAKE_BITSET	10
- #define FUTEX_WAIT_REQUEUE_PI	11
- #define FUTEX_CMP_REQUEUE_PI	12
-+#define FUTEX_WAIT_MULTIPLE	13
- 
- #define FUTEX_PRIVATE_FLAG	128
- #define FUTEX_CLOCK_REALTIME	256
-@@ -40,6 +41,8 @@
- 					 FUTEX_PRIVATE_FLAG)
- #define FUTEX_CMP_REQUEUE_PI_PRIVATE	(FUTEX_CMP_REQUEUE_PI | \
- 					 FUTEX_PRIVATE_FLAG)
-+#define FUTEX_WAIT_MULTIPLE_PRIVATE	(FUTEX_WAIT_MULTIPLE | \
-+					 FUTEX_PRIVATE_FLAG)
- 
- /*
-  * Support for robust futexes: the kernel cleans up held futexes at
-@@ -150,4 +153,21 @@ struct robust_list_head {
-   (((op & 0xf) << 28) | ((cmp & 0xf) << 24)		\
-    | ((oparg & 0xfff) << 12) | (cmparg & 0xfff))
- 
-+/*
-+ * Maximum number of multiple futexes to wait for
-+ */
-+#define FUTEX_MULTIPLE_MAX_COUNT	128
-+
-+/**
-+ * struct futex_wait_block - Block of futexes to be waited for
-+ * @uaddr:	User address of the futex
-+ * @val:	Futex value expected by userspace
-+ * @bitset:	Bitset for the optional bitmasked wakeup
-+ */
-+struct futex_wait_block {
-+	__u32 __user *uaddr;
-+	__u32 val;
-+	__u32 bitset;
-+};
-+
- #endif /* _UAPI_LINUX_FUTEX_H */
-diff --git a/kernel/futex.c b/kernel/futex.c
-index 0cf84c8664f207c574325b899ef2e57f01295a94..58cf9eb2b851b4858e29b5ef4114a29a92e676ba 100644
---- a/kernel/futex.c
-+++ b/kernel/futex.c
-@@ -215,6 +215,8 @@ struct futex_pi_state {
-  * @rt_waiter:		rt_waiter storage for use with requeue_pi
-  * @requeue_pi_key:	the requeue_pi target futex key
-  * @bitset:		bitset for the optional bitmasked wakeup
-+ * @uaddr:             userspace address of futex
-+ * @uval:              expected futex's value
-  *
-  * We use this hashed waitqueue, instead of a normal wait_queue_entry_t, so
-  * we can wake only the relevant ones (hashed queues may be shared).
-@@ -237,6 +239,8 @@ struct futex_q {
- 	struct rt_mutex_waiter *rt_waiter;
- 	union futex_key *requeue_pi_key;
- 	u32 bitset;
-+	u32 __user *uaddr;
-+	u32 uval;
- } __randomize_layout;
- 
- static const struct futex_q futex_q_init = {
-@@ -2420,6 +2424,29 @@ static int unqueue_me(struct futex_q *q)
- 	return ret;
- }
- 
-+/**
-+ * unqueue_multiple() - Remove several futexes from their futex_hash_bucket
-+ * @q:	The list of futexes to unqueue
-+ * @count: Number of futexes in the list
-+ *
-+ * Helper to unqueue a list of futexes. This can't fail.
-+ *
-+ * Return:
-+ *  - >=0 - Index of the last futex that was awoken;
-+ *  - -1  - If no futex was awoken
-+ */
-+static int unqueue_multiple(struct futex_q *q, int count)
-+{
-+	int ret = -1;
-+	int i;
-+
-+	for (i = 0; i < count; i++) {
-+		if (!unqueue_me(&q[i]))
-+			ret = i;
-+	}
-+	return ret;
-+}
-+
- /*
-  * PI futexes can not be requeued and must remove themself from the
-  * hash bucket. The hash bucket lock (i.e. lock_ptr) is held on entry
-@@ -2783,6 +2810,211 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
- 	return ret;
- }
- 
-+/**
-+ * futex_wait_multiple_setup() - Prepare to wait and enqueue multiple futexes
-+ * @qs:		The corresponding futex list
-+ * @count:	The size of the lists
-+ * @flags:	Futex flags (FLAGS_SHARED, etc.)
-+ * @awaken:	Index of the last awoken futex
-+ *
-+ * Prepare multiple futexes in a single step and enqueue them. This may fail if
-+ * the futex list is invalid or if any futex was already awoken. On success the
-+ * task is ready to interruptible sleep.
-+ *
-+ * Return:
-+ *  -  1 - One of the futexes was awaken by another thread
-+ *  -  0 - Success
-+ *  - <0 - -EFAULT, -EWOULDBLOCK or -EINVAL
-+ */
-+static int futex_wait_multiple_setup(struct futex_q *qs, int count,
-+				     unsigned int flags, int *awaken)
-+{
-+	struct futex_hash_bucket *hb;
-+	int ret, i;
-+	u32 uval;
-+
-+	/*
-+	 * Enqueuing multiple futexes is tricky, because we need to
-+	 * enqueue each futex in the list before dealing with the next
-+	 * one to avoid deadlocking on the hash bucket.  But, before
-+	 * enqueuing, we need to make sure that current->state is
-+	 * TASK_INTERRUPTIBLE, so we don't absorb any awake events, which
-+	 * cannot be done before the get_futex_key of the next key,
-+	 * because it calls get_user_pages, which can sleep.  Thus, we
-+	 * fetch the list of futexes keys in two steps, by first pinning
-+	 * all the memory keys in the futex key, and only then we read
-+	 * each key and queue the corresponding futex.
-+	 */
-+retry:
-+	for (i = 0; i < count; i++) {
-+		qs[i].key = FUTEX_KEY_INIT;
-+		ret = get_futex_key(qs[i].uaddr, flags & FLAGS_SHARED,
-+				    &qs[i].key, FUTEX_READ);
-+		if (unlikely(ret)) {
-+			for (--i; i >= 0; i--)
-+				put_futex_key(&qs[i].key);
-+			return ret;
-+		}
-+	}
-+
-+	set_current_state(TASK_INTERRUPTIBLE);
-+
-+	for (i = 0; i < count; i++) {
-+		struct futex_q *q = &qs[i];
-+
-+		hb = queue_lock(q);
-+
-+		ret = get_futex_value_locked(&uval, q->uaddr);
-+		if (ret) {
-+			/*
-+			 * We need to try to handle the fault, which
-+			 * cannot be done without sleep, so we need to
-+			 * undo all the work already done, to make sure
-+			 * we don't miss any wake ups.  Therefore, clean
-+			 * up, handle the fault and retry from the
-+			 * beginning.
-+			 */
-+			queue_unlock(hb);
-+
-+			/*
-+			 * Keys 0..(i-1) are implicitly put
-+			 * on unqueue_multiple.
-+			 */
-+			put_futex_key(&q->key);
-+
-+			*awaken = unqueue_multiple(qs, i);
-+
-+			__set_current_state(TASK_RUNNING);
-+
-+			/*
-+			 * On a real fault, prioritize the error even if
-+			 * some other futex was awoken.  Userspace gave
-+			 * us a bad address, -EFAULT them.
-+			 */
-+			ret = get_user(uval, q->uaddr);
-+			if (ret)
-+				return ret;
-+
-+			/*
-+			 * Even if the page fault was handled, If
-+			 * something was already awaken, we can safely
-+			 * give up and succeed to give a hint for userspace to
-+			 * acquire the right futex faster.
-+			 */
-+			if (*awaken >= 0)
-+				return 1;
-+
-+			goto retry;
-+		}
-+
-+		if (uval != q->uval) {
-+			queue_unlock(hb);
-+
-+			put_futex_key(&qs[i].key);
-+
-+			/*
-+			 * If something was already awaken, we can
-+			 * safely ignore the error and succeed.
-+			 */
-+			*awaken = unqueue_multiple(qs, i);
-+			__set_current_state(TASK_RUNNING);
-+			if (*awaken >= 0)
-+				return 1;
-+
-+			return -EWOULDBLOCK;
-+		}
-+
-+		/*
-+		 * The bucket lock can't be held while dealing with the
-+		 * next futex. Queue each futex at this moment so hb can
-+		 * be unlocked.
-+		 */
-+		queue_me(&qs[i], hb);
-+	}
-+	return 0;
-+}
-+
-+/**
-+ * futex_wait_multiple() - Prepare to wait on and enqueue several futexes
-+ * @qs:		The list of futexes to wait on
-+ * @op:		Operation code from futex's syscall
-+ * @count:	The number of objects
-+ * @abs_time:	Timeout before giving up and returning to userspace
-+ *
-+ * Entry point for the FUTEX_WAIT_MULTIPLE futex operation, this function
-+ * sleeps on a group of futexes and returns on the first futex that
-+ * triggered, or after the timeout has elapsed.
-+ *
-+ * Return:
-+ *  - >=0 - Hint to the futex that was awoken
-+ *  - <0  - On error
-+ */
-+static int futex_wait_multiple(struct futex_q *qs, int op,
-+			       u32 count, ktime_t *abs_time)
-+{
-+	struct hrtimer_sleeper timeout, *to;
-+	int ret, flags = 0, hint = 0;
-+	unsigned int i;
-+
-+	if (!(op & FUTEX_PRIVATE_FLAG))
-+		flags |= FLAGS_SHARED;
-+
-+	if (op & FUTEX_CLOCK_REALTIME)
-+		flags |= FLAGS_CLOCKRT;
-+
-+	to = futex_setup_timer(abs_time, &timeout, flags, 0);
-+	while (1) {
-+		ret = futex_wait_multiple_setup(qs, count, flags, &hint);
-+		if (ret) {
-+			if (ret > 0) {
-+				/* A futex was awaken during setup */
-+				ret = hint;
-+			}
-+			break;
-+		}
-+
-+		if (to)
-+			hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS);
-+
-+		/*
-+		 * Avoid sleeping if another thread already tried to
-+		 * wake us.
-+		 */
-+		for (i = 0; i < count; i++) {
-+			if (plist_node_empty(&qs[i].list))
-+				break;
-+		}
-+
-+		if (i == count && (!to || to->task))
-+			freezable_schedule();
-+
-+		ret = unqueue_multiple(qs, count);
-+
-+		__set_current_state(TASK_RUNNING);
-+
-+		if (ret >= 0)
-+			break;
-+		if (to && !to->task) {
-+			ret = -ETIMEDOUT;
-+			break;
-+		} else if (signal_pending(current)) {
-+			ret = -ERESTARTSYS;
-+			break;
-+		}
-+		/*
-+		 * The final case is a spurious wakeup, for
-+		 * which just retry.
-+		 */
-+	}
-+
-+	if (to) {
-+		hrtimer_cancel(&to->timer);
-+		destroy_hrtimer_on_stack(&to->timer);
-+	}
-+
-+	return ret;
-+}
-+
- static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
- 		      ktime_t *abs_time, u32 bitset)
- {
-@@ -3907,6 +4139,43 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
- 	return -ENOSYS;
- }
- 
-+/**
-+ * futex_read_wait_block - Read an array of futex_wait_block from userspace
-+ * @uaddr:	Userspace address of the block
-+ * @count:	Number of blocks to be read
-+ *
-+ * This function creates and allocate an array of futex_q (we zero it to
-+ * initialize the fields) and then, for each futex_wait_block element from
-+ * userspace, fill a futex_q element with proper values.
-+ */
-+inline struct futex_q *futex_read_wait_block(u32 __user *uaddr, u32 count)
-+{
-+	unsigned int i;
-+	struct futex_q *qs;
-+	struct futex_wait_block fwb;
-+	struct futex_wait_block __user *entry =
-+		(struct futex_wait_block __user *)uaddr;
-+
-+	if (!count || count > FUTEX_MULTIPLE_MAX_COUNT)
-+		return ERR_PTR(-EINVAL);
-+
-+	qs = kcalloc(count, sizeof(*qs), GFP_KERNEL);
-+	if (!qs)
-+		return ERR_PTR(-ENOMEM);
-+
-+	for (i = 0; i < count; i++) {
-+		if (copy_from_user(&fwb, &entry[i], sizeof(fwb))) {
-+			kfree(qs);
-+			return ERR_PTR(-EFAULT);
-+		}
-+
-+		qs[i].uaddr = fwb.uaddr;
-+		qs[i].uval = fwb.val;
-+		qs[i].bitset = fwb.bitset;
-+	}
-+
-+	return qs;
-+}
- 
- SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
- 		struct __kernel_timespec __user *, utime, u32 __user *, uaddr2,
-@@ -3919,7 +4188,8 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
- 
- 	if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
- 		      cmd == FUTEX_WAIT_BITSET ||
--		      cmd == FUTEX_WAIT_REQUEUE_PI)) {
-+		      cmd == FUTEX_WAIT_REQUEUE_PI ||
-+		      cmd == FUTEX_WAIT_MULTIPLE)) {
- 		if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG))))
- 			return -EFAULT;
- 		if (get_timespec64(&ts, utime))
-@@ -3940,6 +4210,25 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
- 	    cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
- 		val2 = (u32) (unsigned long) utime;
- 
-+	if (cmd == FUTEX_WAIT_MULTIPLE) {
-+		int ret;
-+		struct futex_q *qs;
-+
-+#ifdef CONFIG_X86_X32
-+		if (unlikely(in_x32_syscall()))
-+			return -ENOSYS;
-+#endif
-+		qs = futex_read_wait_block(uaddr, val);
-+
-+		if (IS_ERR(qs))
-+			return PTR_ERR(qs);
-+
-+		ret = futex_wait_multiple(qs, op, val, tp);
-+		kfree(qs);
-+
-+		return ret;
-+	}
-+
- 	return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
- }
- 
-@@ -4102,6 +4391,57 @@ COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
- #endif /* CONFIG_COMPAT */
- 
- #ifdef CONFIG_COMPAT_32BIT_TIME
-+/**
-+ * struct compat_futex_wait_block - Block of futexes to be waited for
-+ * @uaddr:	User address of the futex (compatible pointer)
-+ * @val:	Futex value expected by userspace
-+ * @bitset:	Bitset for the optional bitmasked wakeup
-+ */
-+struct compat_futex_wait_block {
-+	compat_uptr_t	uaddr;
-+	__u32 val;
-+	__u32 bitset;
-+};
-+
-+/**
-+ * compat_futex_read_wait_block - Read an array of futex_wait_block from
-+ * userspace
-+ * @uaddr:	Userspace address of the block
-+ * @count:	Number of blocks to be read
-+ *
-+ * This function does the same as futex_read_wait_block(), except that it
-+ * converts the pointer to the futex from the compat version to the regular one.
-+ */
-+inline struct futex_q *compat_futex_read_wait_block(u32 __user *uaddr,
-+						    u32 count)
-+{
-+	unsigned int i;
-+	struct futex_q *qs;
-+	struct compat_futex_wait_block fwb;
-+	struct compat_futex_wait_block __user *entry =
-+		(struct compat_futex_wait_block __user *)uaddr;
-+
-+	if (!count || count > FUTEX_MULTIPLE_MAX_COUNT)
-+		return ERR_PTR(-EINVAL);
-+
-+	qs = kcalloc(count, sizeof(*qs), GFP_KERNEL);
-+	if (!qs)
-+		return ERR_PTR(-ENOMEM);
-+
-+	for (i = 0; i < count; i++) {
-+		if (copy_from_user(&fwb, &entry[i], sizeof(fwb))) {
-+			kfree(qs);
-+			return ERR_PTR(-EFAULT);
-+		}
-+
-+		qs[i].uaddr = compat_ptr(fwb.uaddr);
-+		qs[i].uval = fwb.val;
-+		qs[i].bitset = fwb.bitset;
-+	}
-+
-+	return qs;
-+}
-+
- SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
- 		struct old_timespec32 __user *, utime, u32 __user *, uaddr2,
- 		u32, val3)
-@@ -4113,7 +4453,8 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
- 
- 	if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
- 		      cmd == FUTEX_WAIT_BITSET ||
--		      cmd == FUTEX_WAIT_REQUEUE_PI)) {
-+		      cmd == FUTEX_WAIT_REQUEUE_PI ||
-+		      cmd == FUTEX_WAIT_MULTIPLE)) {
- 		if (get_old_timespec32(&ts, utime))
- 			return -EFAULT;
- 		if (!timespec64_valid(&ts))
-@@ -4128,6 +4469,19 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
- 	    cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
- 		val2 = (int) (unsigned long) utime;
- 
-+	if (cmd == FUTEX_WAIT_MULTIPLE) {
-+		int ret;
-+		struct futex_q *qs = compat_futex_read_wait_block(uaddr, val);
-+
-+		if (IS_ERR(qs))
-+			return PTR_ERR(qs);
-+
-+		ret = futex_wait_multiple(qs, op, val, tp);
-+		kfree(qs);
-+
-+		return ret;
-+	}
-+
- 	return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
- }
- #endif /* CONFIG_COMPAT_32BIT_TIME */
-diff --git a/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
-index ee55e6d389a3f053194435342c4e471dc7cf8786..2a63e1c2cfb6407a5988233217cff2e52787bc66 100644
---- a/tools/testing/selftests/futex/functional/futex_wait_timeout.c
-+++ b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
-@@ -11,6 +11,7 @@
-  *
-  * HISTORY
-  *      2009-Nov-6: Initial version by Darren Hart <dvhart@linux.intel.com>
-+ *      2019-Dec-13: Add WAIT_MULTIPLE test by Krisman <krisman@collabora.com>
-  *
-  *****************************************************************************/
- 
-@@ -41,6 +42,8 @@ int main(int argc, char *argv[])
- {
- 	futex_t f1 = FUTEX_INITIALIZER;
- 	struct timespec to;
-+	time_t secs;
-+	struct futex_wait_block fwb = {&f1, f1, 0};
- 	int res, ret = RET_PASS;
- 	int c;
- 
-@@ -65,7 +68,7 @@ int main(int argc, char *argv[])
- 	}
- 
- 	ksft_print_header();
--	ksft_set_plan(1);
-+	ksft_set_plan(2);
- 	ksft_print_msg("%s: Block on a futex and wait for timeout\n",
- 	       basename(argv[0]));
- 	ksft_print_msg("\tArguments: timeout=%ldns\n", timeout_ns);
-@@ -79,8 +82,39 @@ int main(int argc, char *argv[])
- 	if (!res || errno != ETIMEDOUT) {
- 		fail("futex_wait returned %d\n", ret < 0 ? errno : ret);
- 		ret = RET_FAIL;
-+	} else
-+		ksft_test_result_pass("futex_wait timeout succeeds\n");
-+
-+	info("Calling futex_wait_multiple on f1: %u @ %p\n", f1, &f1);
-+
-+	/* Setup absolute time */
-+	ret = clock_gettime(CLOCK_REALTIME, &to);
-+	secs = (to.tv_nsec + timeout_ns) / 1000000000;
-+	to.tv_nsec = ((int64_t)to.tv_nsec + timeout_ns) % 1000000000;
-+	to.tv_sec += secs;
-+	info("to.tv_sec  = %ld\n", to.tv_sec);
-+	info("to.tv_nsec = %ld\n", to.tv_nsec);
-+
-+	res = futex_wait_multiple(&fwb, 1, &to,
-+				  FUTEX_PRIVATE_FLAG | FUTEX_CLOCK_REALTIME);
-+
-+#ifdef __ILP32__
-+	if (res == -1 && errno == ENOSYS) {
-+		ksft_test_result_skip("futex_wait_multiple not supported at x32\n");
-+	} else {
-+		ksft_test_result_fail("futex_wait_multiple returned %d\n",
-+				      res < 0 ? errno : res);
-+		ret = RET_FAIL;
- 	}
-+#else
-+	if (!res || errno != ETIMEDOUT) {
-+		ksft_test_result_fail("futex_wait_multiple returned %d\n",
-+				      res < 0 ? errno : res);
-+		ret = RET_FAIL;
-+	} else
-+		ksft_test_result_pass("futex_wait_multiple timeout succeeds\n");
-+#endif /* __ILP32__ */
- 
--	print_result(TEST_NAME, ret);
-+	ksft_print_cnts();
- 	return ret;
- }
-diff --git a/tools/testing/selftests/futex/include/futextest.h b/tools/testing/selftests/futex/include/futextest.h
-index ddbcfc9b7bac4aebb5bac2f249e26ecfd948aa84..bb103bef4557012ef9a389ca74c868e4476a8a31 100644
---- a/tools/testing/selftests/futex/include/futextest.h
-+++ b/tools/testing/selftests/futex/include/futextest.h
-@@ -38,6 +38,14 @@ typedef volatile u_int32_t futex_t;
- #ifndef FUTEX_CMP_REQUEUE_PI
- #define FUTEX_CMP_REQUEUE_PI		12
- #endif
-+#ifndef FUTEX_WAIT_MULTIPLE
-+#define FUTEX_WAIT_MULTIPLE		13
-+struct futex_wait_block {
-+	futex_t *uaddr;
-+	futex_t val;
-+	__u32 bitset;
-+};
-+#endif
- #ifndef FUTEX_WAIT_REQUEUE_PI_PRIVATE
- #define FUTEX_WAIT_REQUEUE_PI_PRIVATE	(FUTEX_WAIT_REQUEUE_PI | \
- 					 FUTEX_PRIVATE_FLAG)
-@@ -80,6 +88,20 @@ futex_wait(futex_t *uaddr, futex_t val, struct timespec *timeout, int opflags)
- 	return futex(uaddr, FUTEX_WAIT, val, timeout, NULL, 0, opflags);
- }
- 
-+/**
-+ * futex_wait_multiple() - block on several futexes with optional timeout
-+ * @fwb:	wait block user space address
-+ * @count:	number of entities at fwb
-+ * @timeout:	absolute timeout
-+ */
-+static inline int
-+futex_wait_multiple(struct futex_wait_block *fwb, int count,
-+		    struct timespec *timeout, int opflags)
-+{
-+	return futex(fwb, FUTEX_WAIT_MULTIPLE, count, timeout, NULL, 0,
-+		     opflags);
-+}
-+
- /**
-  * futex_wake() - wake one or more tasks blocked on uaddr
-  * @nr_wake:	wake up to this many tasks
-diff --git a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
-index 0ae390ff816449c88d0bb655a26eb014382c2b4f..bcbac042992d447e0bc9ef5fefe94e875de310f2 100644
---- a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
-+++ b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
-@@ -12,6 +12,7 @@
-  *
-  * HISTORY
-  *      2009-Nov-14: Initial version by Gowrishankar <gowrishankar.m@in.ibm.com>
-+ *      2019-Dec-13: Add WAIT_MULTIPLE test by Krisman <krisman@collabora.com>
-  *
-  *****************************************************************************/
- 
-@@ -40,6 +41,7 @@ int main(int argc, char *argv[])
- {
- 	struct timespec to = {.tv_sec = 0, .tv_nsec = timeout_ns};
- 	futex_t f1 = FUTEX_INITIALIZER;
-+	struct futex_wait_block fwb = {&f1, f1+1, 0};
- 	int res, ret = RET_PASS;
- 	int c;
- 
-@@ -61,7 +63,7 @@ int main(int argc, char *argv[])
- 	}
- 
- 	ksft_print_header();
--	ksft_set_plan(1);
-+	ksft_set_plan(2);
- 	ksft_print_msg("%s: Test the unexpected futex value in FUTEX_WAIT\n",
- 	       basename(argv[0]));
- 
-@@ -71,8 +73,30 @@ int main(int argc, char *argv[])
- 		fail("futex_wait returned: %d %s\n",
- 		     res ? errno : res, res ? strerror(errno) : "");
- 		ret = RET_FAIL;
-+	} else
-+		ksft_test_result_pass("futex_wait wouldblock succeeds\n");
-+
-+	info("Calling futex_wait_multiple on f1: %u @ %p with val=%u\n",
-+	     f1, &f1, f1+1);
-+	res = futex_wait_multiple(&fwb, 1, NULL, FUTEX_PRIVATE_FLAG);
-+
-+#ifdef __ILP32__
-+	if (res != -1 || errno != ENOSYS) {
-+		ksft_test_result_fail("futex_wait_multiple returned %d\n",
-+				      res < 0 ? errno : res);
-+		ret = RET_FAIL;
-+	} else {
-+		ksft_test_result_skip("futex_wait_multiple not supported at x32\n");
-+	}
-+#else
-+	if (!res || errno != EWOULDBLOCK) {
-+		ksft_test_result_fail("futex_wait_multiple returned %d\n",
-+				      res < 0 ? errno : res);
-+		ret = RET_FAIL;
- 	}
-+	ksft_test_result_pass("futex_wait_multiple wouldblock succeeds\n");
-+#endif /* __ILP32__ */
- 
--	print_result(TEST_NAME, ret);
-+	ksft_print_cnts();
- 	return ret;
- }
-diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore
-index a09f570619023750f558c84004aff166b4337d72..4660128a545edb04a17cc6bd9760931c1386122f 100644
---- a/tools/testing/selftests/futex/functional/.gitignore
-+++ b/tools/testing/selftests/futex/functional/.gitignore
-@@ -5,3 +5,4 @@ futex_wait_private_mapped_file
- futex_wait_timeout
- futex_wait_uninitialized_heap
- futex_wait_wouldblock
-+futex_wait_multiple
-diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile
-index 30996306cabcfe89a47977643e529b122893bb7e..75f9fface11fa3c90c1bdb9a49b3ea51291afd58 100644
---- a/tools/testing/selftests/futex/functional/Makefile
-+++ b/tools/testing/selftests/futex/functional/Makefile
-@@ -14,7 +14,8 @@ TEST_GEN_FILES := \
- 	futex_requeue_pi_signal_restart \
- 	futex_requeue_pi_mismatched_ops \
- 	futex_wait_uninitialized_heap \
--	futex_wait_private_mapped_file
-+	futex_wait_private_mapped_file \
-+	futex_wait_multiple
- 
- TEST_PROGS := run.sh
- 
-diff --git a/tools/testing/selftests/futex/functional/futex_wait_multiple.c b/tools/testing/selftests/futex/functional/futex_wait_multiple.c
-new file mode 100644
-index 0000000000000000000000000000000000000000..b48422e79f42edba1653bb0bd2a4c4fd98d2d48d
---- /dev/null
-+++ b/tools/testing/selftests/futex/functional/futex_wait_multiple.c
-@@ -0,0 +1,173 @@
-+// SPDX-License-Identifier: GPL-2.0-or-later
-+/******************************************************************************
-+ *
-+ *   Copyright Â© Collabora, Ltd., 2019
-+ *
-+ * DESCRIPTION
-+ *      Test basic semantics of FUTEX_WAIT_MULTIPLE
-+ *
-+ * AUTHOR
-+ *      Gabriel Krisman Bertazi <krisman@collabora.com>
-+ *
-+ * HISTORY
-+ *      2019-Dec-13: Initial version by Krisman <krisman@collabora.com>
-+ *
-+ *****************************************************************************/
-+
-+#include <errno.h>
-+#include <getopt.h>
-+#include <stdio.h>
-+#include <stdlib.h>
-+#include <string.h>
-+#include <time.h>
-+#include <pthread.h>
-+#include "futextest.h"
-+#include "logging.h"
-+
-+#define TEST_NAME "futex-wait-multiple"
-+#define timeout_ns 100000
-+#define MAX_COUNT 128
-+#define WAKE_WAIT_US 3000000
-+
-+int ret = RET_PASS;
-+char *progname;
-+futex_t f[MAX_COUNT] = {0};
-+struct futex_wait_block fwb[MAX_COUNT];
-+
-+void usage(char *prog)
-+{
-+	printf("Usage: %s\n", prog);
-+	printf("  -c	Use color\n");
-+	printf("  -h	Display this help message\n");
-+	printf("  -v L	Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
-+	       VQUIET, VCRITICAL, VINFO);
-+}
-+
-+void test_count_overflow(void)
-+{
-+	futex_t f = FUTEX_INITIALIZER;
-+	struct futex_wait_block fwb[MAX_COUNT+1];
-+	int res, i;
-+
-+	ksft_print_msg("%s: Test a too big number of futexes\n", progname);
-+
-+	for (i = 0; i < MAX_COUNT+1; i++) {
-+		fwb[i].uaddr = &f;
-+		fwb[i].val = f;
-+		fwb[i].bitset = 0;
-+	}
-+
-+	res = futex_wait_multiple(fwb, MAX_COUNT+1, NULL, FUTEX_PRIVATE_FLAG);
-+
-+#ifdef __ILP32__
-+	if (res != -1 || errno != ENOSYS) {
-+		ksft_test_result_fail("futex_wait_multiple returned %d\n",
-+				      res < 0 ? errno : res);
-+		ret = RET_FAIL;
-+	} else {
-+		ksft_test_result_skip("futex_wait_multiple not supported at x32\n");
-+	}
-+#else
-+	if (res != -1 || errno != EINVAL) {
-+		ksft_test_result_fail("futex_wait_multiple returned %d\n",
-+				      res < 0 ? errno : res);
-+		ret = RET_FAIL;
-+	} else {
-+		ksft_test_result_pass("futex_wait_multiple count overflow succeed\n");
-+	}
-+
-+#endif /* __ILP32__ */
-+}
-+
-+void *waiterfn(void *arg)
-+{
-+	int res;
-+
-+	res = futex_wait_multiple(fwb, MAX_COUNT, NULL, FUTEX_PRIVATE_FLAG);
-+
-+#ifdef __ILP32__
-+	if (res != -1 || errno != ENOSYS) {
-+		ksft_test_result_fail("futex_wait_multiple returned %d\n",
-+				      res < 0 ? errno : res);
-+		ret = RET_FAIL;
-+	} else {
-+		ksft_test_result_skip("futex_wait_multiple not supported at x32\n");
-+	}
-+#else
-+	if (res < 0)
-+		ksft_print_msg("waiter failed %d\n", res);
-+
-+	info("futex_wait_multiple: Got hint futex %d was freed\n", res);
-+#endif /* __ILP32__ */
-+
-+	return NULL;
-+}
-+
-+void test_fwb_wakeup(void)
-+{
-+	int res, i;
-+	pthread_t waiter;
-+
-+	ksft_print_msg("%s: Test wake up in a list of futex\n", progname);
-+
-+	for (i = 0; i < MAX_COUNT; i++) {
-+		fwb[i].uaddr = &f[i];
-+		fwb[i].val = f[i];
-+		fwb[i].bitset = 0xffffffff;
-+	}
-+
-+	res = pthread_create(&waiter, NULL, waiterfn, NULL);
-+	if (res) {
-+		ksft_test_result_fail("Creating waiting thread failed");
-+		ksft_exit_fail();
-+	}
-+
-+	usleep(WAKE_WAIT_US);
-+	res = futex_wake(&(f[MAX_COUNT-1]), 1, FUTEX_PRIVATE_FLAG);
-+	if (res != 1) {
-+		ksft_test_result_fail("Failed to wake thread res=%d\n", res);
-+		ksft_exit_fail();
-+	}
-+
-+	pthread_join(waiter, NULL);
-+	ksft_test_result_pass("%s succeed\n", __func__);
-+}
-+
-+int main(int argc, char *argv[])
-+{
-+	int c;
-+
-+	while ((c = getopt(argc, argv, "cht:v:")) != -1) {
-+		switch (c) {
-+		case 'c':
-+			log_color(1);
-+			break;
-+		case 'h':
-+			usage(basename(argv[0]));
-+			exit(0);
-+		case 'v':
-+			log_verbosity(atoi(optarg));
-+			break;
-+		default:
-+			usage(basename(argv[0]));
-+			exit(1);
-+		}
-+	}
-+
-+	progname = basename(argv[0]);
-+
-+	ksft_print_header();
-+	ksft_set_plan(2);
-+
-+	test_count_overflow();
-+
-+#ifdef __ILP32__
-+	// if it's a 32x binary, there's no futex to wakeup
-+	ksft_test_result_skip("futex_wait_multiple not supported at x32\n");
-+#else
-+	test_fwb_wakeup();
-+#endif /* __ILP32__ */
-+
-+	ksft_print_cnts();
-+	return ret;
-+}
-diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh
-index 1acb6ace1680e8f3d6b3ee2dc528c19ddfdb018e..a8be94f28ff78b4879d2d19bca5d9b0fcb26c1f8 100755
---- a/tools/testing/selftests/futex/functional/run.sh
-+++ b/tools/testing/selftests/futex/functional/run.sh
-@@ -73,3 +73,6 @@ echo
- echo
- ./futex_wait_uninitialized_heap $COLOR
- ./futex_wait_private_mapped_file $COLOR
-+
-+echo
-+./futex_wait_multiple $COLOR
-diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h
-index 580001e89c6caed57dd8b3cb491d65dce846caff..a3e760886b8e7e74285fdcf2caaaa6f66ad16675 100644
---- a/include/uapi/linux/futex.h
-+++ b/include/uapi/linux/futex.h
-@@ -21,7 +21,7 @@
- #define FUTEX_WAKE_BITSET	10
- #define FUTEX_WAIT_REQUEUE_PI	11
- #define FUTEX_CMP_REQUEUE_PI	12
--#define FUTEX_WAIT_MULTIPLE	13
-+#define FUTEX_WAIT_MULTIPLE	31
- 
- #define FUTEX_PRIVATE_FLAG	128
- #define FUTEX_CLOCK_REALTIME	256
-diff --git a/kernel/futex.c b/kernel/futex.c
-index 58cf9eb2b851b4858e29b5ef4114a29a92e676ba..e0bb628a5e1988dcc9ae5442a4259edc229d578d 100644
---- a/kernel/futex.c
-+++ b/kernel/futex.c
-@@ -4198,7 +4198,7 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
- 			return -EINVAL;
- 
- 		t = timespec64_to_ktime(ts);
--		if (cmd == FUTEX_WAIT)
-+		if (cmd == FUTEX_WAIT || cmd == FUTEX_WAIT_MULTIPLE)
- 			t = ktime_add_safe(ktime_get(), t);
- 		tp = &t;
- 	}
-@@ -4399,6 +4399,7 @@ COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
-  */
- struct compat_futex_wait_block {
- 	compat_uptr_t	uaddr;
-+	__u32 pad;
- 	__u32 val;
- 	__u32 bitset;
- };
-@@ -4461,7 +4462,7 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
- 			return -EINVAL;
- 
- 		t = timespec64_to_ktime(ts);
--		if (cmd == FUTEX_WAIT)
-+		if (cmd == FUTEX_WAIT || cmd == FUTEX_WAIT_MULTIPLE)
- 			t = ktime_add_safe(ktime_get(), t);
- 		tp = &t;
- 	}
diff --git a/linux58-tkg/linux58-tkg-patches/0008-5.8-bcachefs.patch b/linux58-tkg/linux58-tkg-patches/0008-5.8-bcachefs.patch
deleted file mode 100644
index 69cd9f9..0000000
--- a/linux58-tkg/linux58-tkg-patches/0008-5.8-bcachefs.patch
+++ /dev/null
@@ -1,70598 +0,0 @@
-diff --git a/block/bio.c b/block/bio.c
-index a7366c02c9b5..9a5a289757f9 100644
---- a/block/bio.c
-+++ b/block/bio.c
-@@ -1316,6 +1316,7 @@ void bio_set_pages_dirty(struct bio *bio)
- 			set_page_dirty_lock(bvec->bv_page);
- 	}
- }
-+EXPORT_SYMBOL_GPL(bio_set_pages_dirty);
- 
- /*
-  * bio_check_pages_dirty() will check that all the BIO's pages are still dirty.
-@@ -1375,6 +1376,7 @@ void bio_check_pages_dirty(struct bio *bio)
- 	spin_unlock_irqrestore(&bio_dirty_lock, flags);
- 	schedule_work(&bio_dirty_work);
- }
-+EXPORT_SYMBOL_GPL(bio_check_pages_dirty);
- 
- static inline bool bio_remaining_done(struct bio *bio)
- {
-diff --git a/block/blk-core.c b/block/blk-core.c
-index 03252af8c82c..71907944fa78 100644
---- a/block/blk-core.c
-+++ b/block/blk-core.c
-@@ -215,18 +215,23 @@ int blk_status_to_errno(blk_status_t status)
- }
- EXPORT_SYMBOL_GPL(blk_status_to_errno);
- 
--static void print_req_error(struct request *req, blk_status_t status,
--		const char *caller)
-+const char *blk_status_to_str(blk_status_t status)
- {
- 	int idx = (__force int)status;
- 
- 	if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors)))
--		return;
-+		return "(invalid error)";
-+	return blk_errors[idx].name;
-+}
-+EXPORT_SYMBOL_GPL(blk_status_to_str);
- 
-+static void print_req_error(struct request *req, blk_status_t status,
-+		const char *caller)
-+{
- 	printk_ratelimited(KERN_ERR
- 		"%s: %s error, dev %s, sector %llu op 0x%x:(%s) flags 0x%x "
- 		"phys_seg %u prio class %u\n",
--		caller, blk_errors[idx].name,
-+		caller, blk_status_to_str(status),
- 		req->rq_disk ? req->rq_disk->disk_name : "?",
- 		blk_rq_pos(req), req_op(req), blk_op_str(req_op(req)),
- 		req->cmd_flags & ~REQ_OP_MASK,
-diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig
-index bf7dd96db9b3..14274562f6e1 100644
---- a/drivers/md/bcache/Kconfig
-+++ b/drivers/md/bcache/Kconfig
-@@ -3,6 +3,7 @@
- config BCACHE
- 	tristate "Block device as cache"
- 	select CRC64
-+	select CLOSURES
- 	help
- 	Allows a block device to be used as cache for other devices; uses
- 	a btree for indexing and the layout is optimized for SSDs.
-@@ -18,15 +19,6 @@ config BCACHE_DEBUG
- 	Enables extra debugging tools, allows expensive runtime checks to be
- 	turned on.
- 
--config BCACHE_CLOSURES_DEBUG
--	bool "Debug closures"
--	depends on BCACHE
--	select DEBUG_FS
--	help
--	Keeps all active closures in a linked list and provides a debugfs
--	interface to list them, which makes it possible to see asynchronous
--	operations that get stuck.
--
- config BCACHE_ASYNC_REGISTRAION
- 	bool "Asynchronous device registration (EXPERIMENTAL)"
- 	depends on BCACHE
-diff --git a/drivers/md/bcache/Makefile b/drivers/md/bcache/Makefile
-index fd714628da6a..0fb1b6009da3 100644
---- a/drivers/md/bcache/Makefile
-+++ b/drivers/md/bcache/Makefile
-@@ -2,6 +2,6 @@
- 
- obj-$(CONFIG_BCACHE)	+= bcache.o
- 
--bcache-y		:= alloc.o bset.o btree.o closure.o debug.o extents.o\
--	io.o journal.o movinggc.o request.o stats.o super.o sysfs.o trace.o\
-+bcache-y		:= alloc.o bset.o btree.o debug.o extents.o io.o\
-+	journal.o movinggc.o request.o stats.o super.o sysfs.o trace.o\
- 	util.o writeback.o
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index 221e0191b687..4e82115c5524 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -180,6 +180,7 @@
- 
- #include <linux/bcache.h>
- #include <linux/bio.h>
-+#include <linux/closure.h>
- #include <linux/kobject.h>
- #include <linux/list.h>
- #include <linux/mutex.h>
-@@ -192,7 +193,6 @@
- 
- #include "bset.h"
- #include "util.h"
--#include "closure.h"
- 
- struct bucket {
- 	atomic_t	pin;
-diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c
-deleted file mode 100644
-index 0164a1fe94a9..000000000000
---- a/drivers/md/bcache/closure.c
-+++ /dev/null
-@@ -1,217 +0,0 @@
--// SPDX-License-Identifier: GPL-2.0
--/*
-- * Asynchronous refcounty things
-- *
-- * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
-- * Copyright 2012 Google, Inc.
-- */
--
--#include <linux/debugfs.h>
--#include <linux/module.h>
--#include <linux/seq_file.h>
--#include <linux/sched/debug.h>
--
--#include "closure.h"
--
--static inline void closure_put_after_sub(struct closure *cl, int flags)
--{
--	int r = flags & CLOSURE_REMAINING_MASK;
--
--	BUG_ON(flags & CLOSURE_GUARD_MASK);
--	BUG_ON(!r && (flags & ~CLOSURE_DESTRUCTOR));
--
--	if (!r) {
--		if (cl->fn && !(flags & CLOSURE_DESTRUCTOR)) {
--			atomic_set(&cl->remaining,
--				   CLOSURE_REMAINING_INITIALIZER);
--			closure_queue(cl);
--		} else {
--			struct closure *parent = cl->parent;
--			closure_fn *destructor = cl->fn;
--
--			closure_debug_destroy(cl);
--
--			if (destructor)
--				destructor(cl);
--
--			if (parent)
--				closure_put(parent);
--		}
--	}
--}
--
--/* For clearing flags with the same atomic op as a put */
--void closure_sub(struct closure *cl, int v)
--{
--	closure_put_after_sub(cl, atomic_sub_return(v, &cl->remaining));
--}
--
--/*
-- * closure_put - decrement a closure's refcount
-- */
--void closure_put(struct closure *cl)
--{
--	closure_put_after_sub(cl, atomic_dec_return(&cl->remaining));
--}
--
--/*
-- * closure_wake_up - wake up all closures on a wait list, without memory barrier
-- */
--void __closure_wake_up(struct closure_waitlist *wait_list)
--{
--	struct llist_node *list;
--	struct closure *cl, *t;
--	struct llist_node *reverse = NULL;
--
--	list = llist_del_all(&wait_list->list);
--
--	/* We first reverse the list to preserve FIFO ordering and fairness */
--	reverse = llist_reverse_order(list);
--
--	/* Then do the wakeups */
--	llist_for_each_entry_safe(cl, t, reverse, list) {
--		closure_set_waiting(cl, 0);
--		closure_sub(cl, CLOSURE_WAITING + 1);
--	}
--}
--
--/**
-- * closure_wait - add a closure to a waitlist
-- * @waitlist: will own a ref on @cl, which will be released when
-- * closure_wake_up() is called on @waitlist.
-- * @cl: closure pointer.
-- *
-- */
--bool closure_wait(struct closure_waitlist *waitlist, struct closure *cl)
--{
--	if (atomic_read(&cl->remaining) & CLOSURE_WAITING)
--		return false;
--
--	closure_set_waiting(cl, _RET_IP_);
--	atomic_add(CLOSURE_WAITING + 1, &cl->remaining);
--	llist_add(&cl->list, &waitlist->list);
--
--	return true;
--}
--
--struct closure_syncer {
--	struct task_struct	*task;
--	int			done;
--};
--
--static void closure_sync_fn(struct closure *cl)
--{
--	struct closure_syncer *s = cl->s;
--	struct task_struct *p;
--
--	rcu_read_lock();
--	p = READ_ONCE(s->task);
--	s->done = 1;
--	wake_up_process(p);
--	rcu_read_unlock();
--}
--
--void __sched __closure_sync(struct closure *cl)
--{
--	struct closure_syncer s = { .task = current };
--
--	cl->s = &s;
--	continue_at(cl, closure_sync_fn, NULL);
--
--	while (1) {
--		set_current_state(TASK_UNINTERRUPTIBLE);
--		if (s.done)
--			break;
--		schedule();
--	}
--
--	__set_current_state(TASK_RUNNING);
--}
--
--#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
--
--static LIST_HEAD(closure_list);
--static DEFINE_SPINLOCK(closure_list_lock);
--
--void closure_debug_create(struct closure *cl)
--{
--	unsigned long flags;
--
--	BUG_ON(cl->magic == CLOSURE_MAGIC_ALIVE);
--	cl->magic = CLOSURE_MAGIC_ALIVE;
--
--	spin_lock_irqsave(&closure_list_lock, flags);
--	list_add(&cl->all, &closure_list);
--	spin_unlock_irqrestore(&closure_list_lock, flags);
--}
--
--void closure_debug_destroy(struct closure *cl)
--{
--	unsigned long flags;
--
--	BUG_ON(cl->magic != CLOSURE_MAGIC_ALIVE);
--	cl->magic = CLOSURE_MAGIC_DEAD;
--
--	spin_lock_irqsave(&closure_list_lock, flags);
--	list_del(&cl->all);
--	spin_unlock_irqrestore(&closure_list_lock, flags);
--}
--
--static struct dentry *closure_debug;
--
--static int debug_seq_show(struct seq_file *f, void *data)
--{
--	struct closure *cl;
--
--	spin_lock_irq(&closure_list_lock);
--
--	list_for_each_entry(cl, &closure_list, all) {
--		int r = atomic_read(&cl->remaining);
--
--		seq_printf(f, "%p: %pS -> %pS p %p r %i ",
--			   cl, (void *) cl->ip, cl->fn, cl->parent,
--			   r & CLOSURE_REMAINING_MASK);
--
--		seq_printf(f, "%s%s\n",
--			   test_bit(WORK_STRUCT_PENDING_BIT,
--				    work_data_bits(&cl->work)) ? "Q" : "",
--			   r & CLOSURE_RUNNING	? "R" : "");
--
--		if (r & CLOSURE_WAITING)
--			seq_printf(f, " W %pS\n",
--				   (void *) cl->waiting_on);
--
--		seq_printf(f, "\n");
--	}
--
--	spin_unlock_irq(&closure_list_lock);
--	return 0;
--}
--
--static int debug_seq_open(struct inode *inode, struct file *file)
--{
--	return single_open(file, debug_seq_show, NULL);
--}
--
--static const struct file_operations debug_ops = {
--	.owner		= THIS_MODULE,
--	.open		= debug_seq_open,
--	.read		= seq_read,
--	.release	= single_release
--};
--
--void  __init closure_debug_init(void)
--{
--	if (!IS_ERR_OR_NULL(bcache_debug))
--		/*
--		 * it is unnecessary to check return value of
--		 * debugfs_create_file(), we should not care
--		 * about this.
--		 */
--		closure_debug = debugfs_create_file(
--			"closures", 0400, bcache_debug, NULL, &debug_ops);
--}
--#endif
--
--MODULE_AUTHOR("Kent Overstreet <koverstreet@google.com>");
--MODULE_LICENSE("GPL");
-diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h
-deleted file mode 100644
-index c88cdc4ae4ec..000000000000
---- a/drivers/md/bcache/closure.h
-+++ /dev/null
-@@ -1,378 +0,0 @@
--/* SPDX-License-Identifier: GPL-2.0 */
--#ifndef _LINUX_CLOSURE_H
--#define _LINUX_CLOSURE_H
--
--#include <linux/llist.h>
--#include <linux/sched.h>
--#include <linux/sched/task_stack.h>
--#include <linux/workqueue.h>
--
--/*
-- * Closure is perhaps the most overused and abused term in computer science, but
-- * since I've been unable to come up with anything better you're stuck with it
-- * again.
-- *
-- * What are closures?
-- *
-- * They embed a refcount. The basic idea is they count "things that are in
-- * progress" - in flight bios, some other thread that's doing something else -
-- * anything you might want to wait on.
-- *
-- * The refcount may be manipulated with closure_get() and closure_put().
-- * closure_put() is where many of the interesting things happen, when it causes
-- * the refcount to go to 0.
-- *
-- * Closures can be used to wait on things both synchronously and asynchronously,
-- * and synchronous and asynchronous use can be mixed without restriction. To
-- * wait synchronously, use closure_sync() - you will sleep until your closure's
-- * refcount hits 1.
-- *
-- * To wait asynchronously, use
-- *   continue_at(cl, next_function, workqueue);
-- *
-- * passing it, as you might expect, the function to run when nothing is pending
-- * and the workqueue to run that function out of.
-- *
-- * continue_at() also, critically, requires a 'return' immediately following the
-- * location where this macro is referenced, to return to the calling function.
-- * There's good reason for this.
-- *
-- * To use safely closures asynchronously, they must always have a refcount while
-- * they are running owned by the thread that is running them. Otherwise, suppose
-- * you submit some bios and wish to have a function run when they all complete:
-- *
-- * foo_endio(struct bio *bio)
-- * {
-- *	closure_put(cl);
-- * }
-- *
-- * closure_init(cl);
-- *
-- * do_stuff();
-- * closure_get(cl);
-- * bio1->bi_endio = foo_endio;
-- * bio_submit(bio1);
-- *
-- * do_more_stuff();
-- * closure_get(cl);
-- * bio2->bi_endio = foo_endio;
-- * bio_submit(bio2);
-- *
-- * continue_at(cl, complete_some_read, system_wq);
-- *
-- * If closure's refcount started at 0, complete_some_read() could run before the
-- * second bio was submitted - which is almost always not what you want! More
-- * importantly, it wouldn't be possible to say whether the original thread or
-- * complete_some_read()'s thread owned the closure - and whatever state it was
-- * associated with!
-- *
-- * So, closure_init() initializes a closure's refcount to 1 - and when a
-- * closure_fn is run, the refcount will be reset to 1 first.
-- *
-- * Then, the rule is - if you got the refcount with closure_get(), release it
-- * with closure_put() (i.e, in a bio->bi_endio function). If you have a refcount
-- * on a closure because you called closure_init() or you were run out of a
-- * closure - _always_ use continue_at(). Doing so consistently will help
-- * eliminate an entire class of particularly pernicious races.
-- *
-- * Lastly, you might have a wait list dedicated to a specific event, and have no
-- * need for specifying the condition - you just want to wait until someone runs
-- * closure_wake_up() on the appropriate wait list. In that case, just use
-- * closure_wait(). It will return either true or false, depending on whether the
-- * closure was already on a wait list or not - a closure can only be on one wait
-- * list at a time.
-- *
-- * Parents:
-- *
-- * closure_init() takes two arguments - it takes the closure to initialize, and
-- * a (possibly null) parent.
-- *
-- * If parent is non null, the new closure will have a refcount for its lifetime;
-- * a closure is considered to be "finished" when its refcount hits 0 and the
-- * function to run is null. Hence
-- *
-- * continue_at(cl, NULL, NULL);
-- *
-- * returns up the (spaghetti) stack of closures, precisely like normal return
-- * returns up the C stack. continue_at() with non null fn is better thought of
-- * as doing a tail call.
-- *
-- * All this implies that a closure should typically be embedded in a particular
-- * struct (which its refcount will normally control the lifetime of), and that
-- * struct can very much be thought of as a stack frame.
-- */
--
--struct closure;
--struct closure_syncer;
--typedef void (closure_fn) (struct closure *);
--extern struct dentry *bcache_debug;
--
--struct closure_waitlist {
--	struct llist_head	list;
--};
--
--enum closure_state {
--	/*
--	 * CLOSURE_WAITING: Set iff the closure is on a waitlist. Must be set by
--	 * the thread that owns the closure, and cleared by the thread that's
--	 * waking up the closure.
--	 *
--	 * The rest are for debugging and don't affect behaviour:
--	 *
--	 * CLOSURE_RUNNING: Set when a closure is running (i.e. by
--	 * closure_init() and when closure_put() runs then next function), and
--	 * must be cleared before remaining hits 0. Primarily to help guard
--	 * against incorrect usage and accidentally transferring references.
--	 * continue_at() and closure_return() clear it for you, if you're doing
--	 * something unusual you can use closure_set_dead() which also helps
--	 * annotate where references are being transferred.
--	 */
--
--	CLOSURE_BITS_START	= (1U << 26),
--	CLOSURE_DESTRUCTOR	= (1U << 26),
--	CLOSURE_WAITING		= (1U << 28),
--	CLOSURE_RUNNING		= (1U << 30),
--};
--
--#define CLOSURE_GUARD_MASK					\
--	((CLOSURE_DESTRUCTOR|CLOSURE_WAITING|CLOSURE_RUNNING) << 1)
--
--#define CLOSURE_REMAINING_MASK		(CLOSURE_BITS_START - 1)
--#define CLOSURE_REMAINING_INITIALIZER	(1|CLOSURE_RUNNING)
--
--struct closure {
--	union {
--		struct {
--			struct workqueue_struct *wq;
--			struct closure_syncer	*s;
--			struct llist_node	list;
--			closure_fn		*fn;
--		};
--		struct work_struct	work;
--	};
--
--	struct closure		*parent;
--
--	atomic_t		remaining;
--
--#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
--#define CLOSURE_MAGIC_DEAD	0xc054dead
--#define CLOSURE_MAGIC_ALIVE	0xc054a11e
--
--	unsigned int		magic;
--	struct list_head	all;
--	unsigned long		ip;
--	unsigned long		waiting_on;
--#endif
--};
--
--void closure_sub(struct closure *cl, int v);
--void closure_put(struct closure *cl);
--void __closure_wake_up(struct closure_waitlist *list);
--bool closure_wait(struct closure_waitlist *list, struct closure *cl);
--void __closure_sync(struct closure *cl);
--
--/**
-- * closure_sync - sleep until a closure a closure has nothing left to wait on
-- *
-- * Sleeps until the refcount hits 1 - the thread that's running the closure owns
-- * the last refcount.
-- */
--static inline void closure_sync(struct closure *cl)
--{
--	if ((atomic_read(&cl->remaining) & CLOSURE_REMAINING_MASK) != 1)
--		__closure_sync(cl);
--}
--
--#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
--
--void closure_debug_init(void);
--void closure_debug_create(struct closure *cl);
--void closure_debug_destroy(struct closure *cl);
--
--#else
--
--static inline void closure_debug_init(void) {}
--static inline void closure_debug_create(struct closure *cl) {}
--static inline void closure_debug_destroy(struct closure *cl) {}
--
--#endif
--
--static inline void closure_set_ip(struct closure *cl)
--{
--#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
--	cl->ip = _THIS_IP_;
--#endif
--}
--
--static inline void closure_set_ret_ip(struct closure *cl)
--{
--#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
--	cl->ip = _RET_IP_;
--#endif
--}
--
--static inline void closure_set_waiting(struct closure *cl, unsigned long f)
--{
--#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
--	cl->waiting_on = f;
--#endif
--}
--
--static inline void closure_set_stopped(struct closure *cl)
--{
--	atomic_sub(CLOSURE_RUNNING, &cl->remaining);
--}
--
--static inline void set_closure_fn(struct closure *cl, closure_fn *fn,
--				  struct workqueue_struct *wq)
--{
--	closure_set_ip(cl);
--	cl->fn = fn;
--	cl->wq = wq;
--	/* between atomic_dec() in closure_put() */
--	smp_mb__before_atomic();
--}
--
--static inline void closure_queue(struct closure *cl)
--{
--	struct workqueue_struct *wq = cl->wq;
--	/**
--	 * Changes made to closure, work_struct, or a couple of other structs
--	 * may cause work.func not pointing to the right location.
--	 */
--	BUILD_BUG_ON(offsetof(struct closure, fn)
--		     != offsetof(struct work_struct, func));
--	if (wq) {
--		INIT_WORK(&cl->work, cl->work.func);
--		BUG_ON(!queue_work(wq, &cl->work));
--	} else
--		cl->fn(cl);
--}
--
--/**
-- * closure_get - increment a closure's refcount
-- */
--static inline void closure_get(struct closure *cl)
--{
--#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
--	BUG_ON((atomic_inc_return(&cl->remaining) &
--		CLOSURE_REMAINING_MASK) <= 1);
--#else
--	atomic_inc(&cl->remaining);
--#endif
--}
--
--/**
-- * closure_init - Initialize a closure, setting the refcount to 1
-- * @cl:		closure to initialize
-- * @parent:	parent of the new closure. cl will take a refcount on it for its
-- *		lifetime; may be NULL.
-- */
--static inline void closure_init(struct closure *cl, struct closure *parent)
--{
--	memset(cl, 0, sizeof(struct closure));
--	cl->parent = parent;
--	if (parent)
--		closure_get(parent);
--
--	atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
--
--	closure_debug_create(cl);
--	closure_set_ip(cl);
--}
--
--static inline void closure_init_stack(struct closure *cl)
--{
--	memset(cl, 0, sizeof(struct closure));
--	atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
--}
--
--/**
-- * closure_wake_up - wake up all closures on a wait list,
-- *		     with memory barrier
-- */
--static inline void closure_wake_up(struct closure_waitlist *list)
--{
--	/* Memory barrier for the wait list */
--	smp_mb();
--	__closure_wake_up(list);
--}
--
--/**
-- * continue_at - jump to another function with barrier
-- *
-- * After @cl is no longer waiting on anything (i.e. all outstanding refs have
-- * been dropped with closure_put()), it will resume execution at @fn running out
-- * of @wq (or, if @wq is NULL, @fn will be called by closure_put() directly).
-- *
-- * This is because after calling continue_at() you no longer have a ref on @cl,
-- * and whatever @cl owns may be freed out from under you - a running closure fn
-- * has a ref on its own closure which continue_at() drops.
-- *
-- * Note you are expected to immediately return after using this macro.
-- */
--#define continue_at(_cl, _fn, _wq)					\
--do {									\
--	set_closure_fn(_cl, _fn, _wq);					\
--	closure_sub(_cl, CLOSURE_RUNNING + 1);				\
--} while (0)
--
--/**
-- * closure_return - finish execution of a closure
-- *
-- * This is used to indicate that @cl is finished: when all outstanding refs on
-- * @cl have been dropped @cl's ref on its parent closure (as passed to
-- * closure_init()) will be dropped, if one was specified - thus this can be
-- * thought of as returning to the parent closure.
-- */
--#define closure_return(_cl)	continue_at((_cl), NULL, NULL)
--
--/**
-- * continue_at_nobarrier - jump to another function without barrier
-- *
-- * Causes @fn to be executed out of @cl, in @wq context (or called directly if
-- * @wq is NULL).
-- *
-- * The ref the caller of continue_at_nobarrier() had on @cl is now owned by @fn,
-- * thus it's not safe to touch anything protected by @cl after a
-- * continue_at_nobarrier().
-- */
--#define continue_at_nobarrier(_cl, _fn, _wq)				\
--do {									\
--	set_closure_fn(_cl, _fn, _wq);					\
--	closure_queue(_cl);						\
--} while (0)
--
--/**
-- * closure_return_with_destructor - finish execution of a closure,
-- *				    with destructor
-- *
-- * Works like closure_return(), except @destructor will be called when all
-- * outstanding refs on @cl have been dropped; @destructor may be used to safely
-- * free the memory occupied by @cl, and it is called with the ref on the parent
-- * closure still held - so @destructor could safely return an item to a
-- * freelist protected by @cl's parent.
-- */
--#define closure_return_with_destructor(_cl, _destructor)		\
--do {									\
--	set_closure_fn(_cl, _destructor, NULL);				\
--	closure_sub(_cl, CLOSURE_RUNNING - CLOSURE_DESTRUCTOR + 1);	\
--} while (0)
--
--/**
-- * closure_call - execute @fn out of a new, uninitialized closure
-- *
-- * Typically used when running out of one closure, and we want to run @fn
-- * asynchronously out of a new closure - @parent will then wait for @cl to
-- * finish.
-- */
--static inline void closure_call(struct closure *cl, closure_fn fn,
--				struct workqueue_struct *wq,
--				struct closure *parent)
--{
--	closure_init(cl, parent);
--	continue_at_nobarrier(cl, fn, wq);
--}
--
--#endif /* _LINUX_CLOSURE_H */
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 2014016f9a60..331febeabade 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -2819,7 +2819,6 @@ static int __init bcache_init(void)
- 		goto err;
- 
- 	bch_debug_init();
--	closure_debug_init();
- 
- 	bcache_is_reboot = false;
- 
-diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
-index c029f7443190..59093f9f1793 100644
---- a/drivers/md/bcache/util.h
-+++ b/drivers/md/bcache/util.h
-@@ -4,6 +4,7 @@
- #define _BCACHE_UTIL_H
- 
- #include <linux/blkdev.h>
-+#include <linux/closure.h>
- #include <linux/errno.h>
- #include <linux/kernel.h>
- #include <linux/sched/clock.h>
-@@ -13,8 +14,6 @@
- #include <linux/workqueue.h>
- #include <linux/crc64.h>
- 
--#include "closure.h"
--
- #define PAGE_SECTORS		(PAGE_SIZE / 512)
- 
- struct closure;
-diff --git a/fs/Kconfig b/fs/Kconfig
-index a88aa3af73c1..18e1627b95f9 100644
---- a/fs/Kconfig
-+++ b/fs/Kconfig
-@@ -40,6 +40,7 @@ source "fs/ocfs2/Kconfig"
- source "fs/btrfs/Kconfig"
- source "fs/nilfs2/Kconfig"
- source "fs/f2fs/Kconfig"
-+source "fs/bcachefs/Kconfig"
- source "fs/zonefs/Kconfig"
- 
- config FS_DAX
-diff --git a/fs/Makefile b/fs/Makefile
-index 2ce5112b02c8..8e926e6bf48f 100644
---- a/fs/Makefile
-+++ b/fs/Makefile
-@@ -130,6 +130,7 @@ obj-$(CONFIG_OCFS2_FS)		+= ocfs2/
- obj-$(CONFIG_BTRFS_FS)		+= btrfs/
- obj-$(CONFIG_GFS2_FS)           += gfs2/
- obj-$(CONFIG_F2FS_FS)		+= f2fs/
-+obj-$(CONFIG_BCACHEFS_FS)	+= bcachefs/
- obj-$(CONFIG_CEPH_FS)		+= ceph/
- obj-$(CONFIG_PSTORE)		+= pstore/
- obj-$(CONFIG_EFIVAR_FS)		+= efivarfs/
-diff --git a/fs/bcachefs/Kconfig b/fs/bcachefs/Kconfig
-new file mode 100644
-index 000000000000..10abddae6a80
---- /dev/null
-+++ b/fs/bcachefs/Kconfig
-@@ -0,0 +1,50 @@
-+
-+config BCACHEFS_FS
-+	tristate "bcachefs filesystem support"
-+	depends on BLOCK
-+	select EXPORTFS
-+	select CLOSURES
-+	select LIBCRC32C
-+	select CRC64
-+	select FS_POSIX_ACL
-+	select LZ4_COMPRESS
-+	select LZ4_DECOMPRESS
-+	select ZLIB_DEFLATE
-+	select ZLIB_INFLATE
-+	select ZSTD_COMPRESS
-+	select ZSTD_DECOMPRESS
-+	select CRYPTO_SHA256
-+	select CRYPTO_CHACHA20
-+	select CRYPTO_POLY1305
-+	select KEYS
-+	select SIXLOCKS
-+	select RAID6_PQ
-+	select XOR_BLOCKS
-+	---help---
-+	The bcachefs filesystem - a modern, copy on write filesystem, with
-+	support for multiple devices, compression, checksumming, etc.
-+
-+config BCACHEFS_QUOTA
-+	bool "bcachefs quota support"
-+	depends on BCACHEFS_FS
-+	select QUOTACTL
-+
-+config BCACHEFS_POSIX_ACL
-+	bool "bcachefs POSIX ACL support"
-+	depends on BCACHEFS_FS
-+	select FS_POSIX_ACL
-+
-+config BCACHEFS_DEBUG
-+	bool "bcachefs debugging"
-+	depends on BCACHEFS_FS
-+	---help---
-+	Enables many extra debugging checks and assertions.
-+
-+	The resulting code will be significantly slower than normal; you
-+	probably shouldn't select this option unless you're a developer.
-+
-+config BCACHEFS_TESTS
-+	bool "bcachefs unit and performance tests"
-+	depends on BCACHEFS_FS
-+	---help---
-+	Include some unit and performance tests for the core btree code
-diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile
-new file mode 100644
-index 000000000000..d85ced62c0dd
---- /dev/null
-+++ b/fs/bcachefs/Makefile
-@@ -0,0 +1,59 @@
-+
-+obj-$(CONFIG_BCACHEFS_FS)	+= bcachefs.o
-+
-+bcachefs-y		:=	\
-+	acl.o			\
-+	alloc_background.o	\
-+	alloc_foreground.o	\
-+	bkey.o			\
-+	bkey_methods.o		\
-+	bkey_sort.o		\
-+	bset.o			\
-+	btree_cache.o		\
-+	btree_gc.o		\
-+	btree_io.o		\
-+	btree_iter.o		\
-+	btree_key_cache.o	\
-+	btree_update_interior.o	\
-+	btree_update_leaf.o	\
-+	buckets.o		\
-+	chardev.o		\
-+	checksum.o		\
-+	clock.o			\
-+	compress.o		\
-+	debug.o			\
-+	dirent.o		\
-+	disk_groups.o		\
-+	ec.o			\
-+	error.o			\
-+	extents.o		\
-+	extent_update.o		\
-+	fs.o			\
-+	fs-common.o		\
-+	fs-ioctl.o		\
-+	fs-io.o			\
-+	fsck.o			\
-+	inode.o			\
-+	io.o			\
-+	journal.o		\
-+	journal_io.o		\
-+	journal_reclaim.o	\
-+	journal_seq_blacklist.o	\
-+	keylist.o		\
-+	migrate.o		\
-+	move.o			\
-+	movinggc.o		\
-+	opts.o			\
-+	quota.o			\
-+	rebalance.o		\
-+	recovery.o		\
-+	reflink.o		\
-+	replicas.o		\
-+	siphash.o		\
-+	super.o			\
-+	super-io.o		\
-+	sysfs.o			\
-+	tests.o			\
-+	trace.o			\
-+	util.o			\
-+	xattr.o
-diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c
-new file mode 100644
-index 000000000000..76c98ddbf628
---- /dev/null
-+++ b/fs/bcachefs/acl.c
-@@ -0,0 +1,388 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#ifdef CONFIG_BCACHEFS_POSIX_ACL
-+
-+#include "bcachefs.h"
-+
-+#include <linux/fs.h>
-+#include <linux/posix_acl.h>
-+#include <linux/posix_acl_xattr.h>
-+#include <linux/sched.h>
-+#include <linux/slab.h>
-+
-+#include "acl.h"
-+#include "fs.h"
-+#include "xattr.h"
-+
-+static inline size_t bch2_acl_size(unsigned nr_short, unsigned nr_long)
-+{
-+	return sizeof(bch_acl_header) +
-+		sizeof(bch_acl_entry_short) * nr_short +
-+		sizeof(bch_acl_entry) * nr_long;
-+}
-+
-+static inline int acl_to_xattr_type(int type)
-+{
-+	switch (type) {
-+	case ACL_TYPE_ACCESS:
-+		return KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS;
-+	case ACL_TYPE_DEFAULT:
-+		return KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT;
-+	default:
-+		BUG();
-+	}
-+}
-+
-+/*
-+ * Convert from filesystem to in-memory representation.
-+ */
-+static struct posix_acl *bch2_acl_from_disk(const void *value, size_t size)
-+{
-+	const void *p, *end = value + size;
-+	struct posix_acl *acl;
-+	struct posix_acl_entry *out;
-+	unsigned count = 0;
-+
-+	if (!value)
-+		return NULL;
-+	if (size < sizeof(bch_acl_header))
-+		goto invalid;
-+	if (((bch_acl_header *)value)->a_version !=
-+	    cpu_to_le32(BCH_ACL_VERSION))
-+		goto invalid;
-+
-+	p = value + sizeof(bch_acl_header);
-+	while (p < end) {
-+		const bch_acl_entry *entry = p;
-+
-+		if (p + sizeof(bch_acl_entry_short) > end)
-+			goto invalid;
-+
-+		switch (le16_to_cpu(entry->e_tag)) {
-+		case ACL_USER_OBJ:
-+		case ACL_GROUP_OBJ:
-+		case ACL_MASK:
-+		case ACL_OTHER:
-+			p += sizeof(bch_acl_entry_short);
-+			break;
-+		case ACL_USER:
-+		case ACL_GROUP:
-+			p += sizeof(bch_acl_entry);
-+			break;
-+		default:
-+			goto invalid;
-+		}
-+
-+		count++;
-+	}
-+
-+	if (p > end)
-+		goto invalid;
-+
-+	if (!count)
-+		return NULL;
-+
-+	acl = posix_acl_alloc(count, GFP_KERNEL);
-+	if (!acl)
-+		return ERR_PTR(-ENOMEM);
-+
-+	out = acl->a_entries;
-+
-+	p = value + sizeof(bch_acl_header);
-+	while (p < end) {
-+		const bch_acl_entry *in = p;
-+
-+		out->e_tag  = le16_to_cpu(in->e_tag);
-+		out->e_perm = le16_to_cpu(in->e_perm);
-+
-+		switch (out->e_tag) {
-+		case ACL_USER_OBJ:
-+		case ACL_GROUP_OBJ:
-+		case ACL_MASK:
-+		case ACL_OTHER:
-+			p += sizeof(bch_acl_entry_short);
-+			break;
-+		case ACL_USER:
-+			out->e_uid = make_kuid(&init_user_ns,
-+					       le32_to_cpu(in->e_id));
-+			p += sizeof(bch_acl_entry);
-+			break;
-+		case ACL_GROUP:
-+			out->e_gid = make_kgid(&init_user_ns,
-+					       le32_to_cpu(in->e_id));
-+			p += sizeof(bch_acl_entry);
-+			break;
-+		}
-+
-+		out++;
-+	}
-+
-+	BUG_ON(out != acl->a_entries + acl->a_count);
-+
-+	return acl;
-+invalid:
-+	pr_err("invalid acl entry");
-+	return ERR_PTR(-EINVAL);
-+}
-+
-+#define acl_for_each_entry(acl, acl_e)			\
-+	for (acl_e = acl->a_entries;			\
-+	     acl_e < acl->a_entries + acl->a_count;	\
-+	     acl_e++)
-+
-+/*
-+ * Convert from in-memory to filesystem representation.
-+ */
-+static struct bkey_i_xattr *
-+bch2_acl_to_xattr(struct btree_trans *trans,
-+		  const struct posix_acl *acl,
-+		  int type)
-+{
-+	struct bkey_i_xattr *xattr;
-+	bch_acl_header *acl_header;
-+	const struct posix_acl_entry *acl_e;
-+	void *outptr;
-+	unsigned nr_short = 0, nr_long = 0, acl_len, u64s;
-+
-+	acl_for_each_entry(acl, acl_e) {
-+		switch (acl_e->e_tag) {
-+		case ACL_USER:
-+		case ACL_GROUP:
-+			nr_long++;
-+			break;
-+		case ACL_USER_OBJ:
-+		case ACL_GROUP_OBJ:
-+		case ACL_MASK:
-+		case ACL_OTHER:
-+			nr_short++;
-+			break;
-+		default:
-+			return ERR_PTR(-EINVAL);
-+		}
-+	}
-+
-+	acl_len = bch2_acl_size(nr_short, nr_long);
-+	u64s = BKEY_U64s + xattr_val_u64s(0, acl_len);
-+
-+	if (u64s > U8_MAX)
-+		return ERR_PTR(-E2BIG);
-+
-+	xattr = bch2_trans_kmalloc(trans, u64s * sizeof(u64));
-+	if (IS_ERR(xattr))
-+		return xattr;
-+
-+	bkey_xattr_init(&xattr->k_i);
-+	xattr->k.u64s		= u64s;
-+	xattr->v.x_type		= acl_to_xattr_type(type);
-+	xattr->v.x_name_len	= 0,
-+	xattr->v.x_val_len	= cpu_to_le16(acl_len);
-+
-+	acl_header = xattr_val(&xattr->v);
-+	acl_header->a_version = cpu_to_le32(BCH_ACL_VERSION);
-+
-+	outptr = (void *) acl_header + sizeof(*acl_header);
-+
-+	acl_for_each_entry(acl, acl_e) {
-+		bch_acl_entry *entry = outptr;
-+
-+		entry->e_tag = cpu_to_le16(acl_e->e_tag);
-+		entry->e_perm = cpu_to_le16(acl_e->e_perm);
-+		switch (acl_e->e_tag) {
-+		case ACL_USER:
-+			entry->e_id = cpu_to_le32(
-+				from_kuid(&init_user_ns, acl_e->e_uid));
-+			outptr += sizeof(bch_acl_entry);
-+			break;
-+		case ACL_GROUP:
-+			entry->e_id = cpu_to_le32(
-+				from_kgid(&init_user_ns, acl_e->e_gid));
-+			outptr += sizeof(bch_acl_entry);
-+			break;
-+
-+		case ACL_USER_OBJ:
-+		case ACL_GROUP_OBJ:
-+		case ACL_MASK:
-+		case ACL_OTHER:
-+			outptr += sizeof(bch_acl_entry_short);
-+			break;
-+		}
-+	}
-+
-+	BUG_ON(outptr != xattr_val(&xattr->v) + acl_len);
-+
-+	return xattr;
-+}
-+
-+struct posix_acl *bch2_get_acl(struct inode *vinode, int type)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(vinode);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c_xattr xattr;
-+	struct posix_acl *acl = NULL;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+retry:
-+	bch2_trans_begin(&trans);
-+
-+	iter = bch2_hash_lookup(&trans, bch2_xattr_hash_desc,
-+			&inode->ei_str_hash, inode->v.i_ino,
-+			&X_SEARCH(acl_to_xattr_type(type), "", 0),
-+			0);
-+	if (IS_ERR(iter)) {
-+		if (PTR_ERR(iter) == -EINTR)
-+			goto retry;
-+
-+		if (PTR_ERR(iter) != -ENOENT)
-+			acl = ERR_CAST(iter);
-+		goto out;
-+	}
-+
-+	xattr = bkey_s_c_to_xattr(bch2_btree_iter_peek_slot(iter));
-+
-+	acl = bch2_acl_from_disk(xattr_val(xattr.v),
-+			le16_to_cpu(xattr.v->x_val_len));
-+
-+	if (!IS_ERR(acl))
-+		set_cached_acl(&inode->v, type, acl);
-+out:
-+	bch2_trans_exit(&trans);
-+	return acl;
-+}
-+
-+int bch2_set_acl_trans(struct btree_trans *trans,
-+		       struct bch_inode_unpacked *inode_u,
-+		       const struct bch_hash_info *hash_info,
-+		       struct posix_acl *acl, int type)
-+{
-+	int ret;
-+
-+	if (type == ACL_TYPE_DEFAULT &&
-+	    !S_ISDIR(inode_u->bi_mode))
-+		return acl ? -EACCES : 0;
-+
-+	if (acl) {
-+		struct bkey_i_xattr *xattr =
-+			bch2_acl_to_xattr(trans, acl, type);
-+		if (IS_ERR(xattr))
-+			return PTR_ERR(xattr);
-+
-+		ret = bch2_hash_set(trans, bch2_xattr_hash_desc, hash_info,
-+				    inode_u->bi_inum, &xattr->k_i, 0);
-+	} else {
-+		struct xattr_search_key search =
-+			X_SEARCH(acl_to_xattr_type(type), "", 0);
-+
-+		ret = bch2_hash_delete(trans, bch2_xattr_hash_desc, hash_info,
-+				       inode_u->bi_inum, &search);
-+	}
-+
-+	return ret == -ENOENT ? 0 : ret;
-+}
-+
-+int bch2_set_acl(struct inode *vinode, struct posix_acl *_acl, int type)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(vinode);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct btree_trans trans;
-+	struct btree_iter *inode_iter;
-+	struct bch_inode_unpacked inode_u;
-+	struct posix_acl *acl;
-+	umode_t mode;
-+	int ret;
-+
-+	mutex_lock(&inode->ei_update_lock);
-+	bch2_trans_init(&trans, c, 0, 0);
-+retry:
-+	bch2_trans_begin(&trans);
-+	acl = _acl;
-+
-+	inode_iter = bch2_inode_peek(&trans, &inode_u, inode->v.i_ino,
-+				     BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(inode_iter);
-+	if (ret)
-+		goto btree_err;
-+
-+	mode = inode_u.bi_mode;
-+
-+	if (type == ACL_TYPE_ACCESS) {
-+		ret = posix_acl_update_mode(&inode->v, &mode, &acl);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	ret = bch2_set_acl_trans(&trans, &inode_u,
-+				 &inode->ei_str_hash,
-+				 acl, type);
-+	if (ret)
-+		goto btree_err;
-+
-+	inode_u.bi_ctime	= bch2_current_time(c);
-+	inode_u.bi_mode		= mode;
-+
-+	ret =   bch2_inode_write(&trans, inode_iter, &inode_u) ?:
-+		bch2_trans_commit(&trans, NULL,
-+				  &inode->ei_journal_seq,
-+				  BTREE_INSERT_NOUNLOCK);
-+btree_err:
-+	if (ret == -EINTR)
-+		goto retry;
-+	if (unlikely(ret))
-+		goto err;
-+
-+	bch2_inode_update_after_write(c, inode, &inode_u,
-+				      ATTR_CTIME|ATTR_MODE);
-+
-+	set_cached_acl(&inode->v, type, acl);
-+err:
-+	bch2_trans_exit(&trans);
-+	mutex_unlock(&inode->ei_update_lock);
-+
-+	return ret;
-+}
-+
-+int bch2_acl_chmod(struct btree_trans *trans,
-+		   struct bch_inode_info *inode,
-+		   umode_t mode,
-+		   struct posix_acl **new_acl)
-+{
-+	struct btree_iter *iter;
-+	struct bkey_s_c_xattr xattr;
-+	struct bkey_i_xattr *new;
-+	struct posix_acl *acl;
-+	int ret = 0;
-+
-+	iter = bch2_hash_lookup(trans, bch2_xattr_hash_desc,
-+			&inode->ei_str_hash, inode->v.i_ino,
-+			&X_SEARCH(KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS, "", 0),
-+			BTREE_ITER_INTENT);
-+	if (IS_ERR(iter))
-+		return PTR_ERR(iter) != -ENOENT ? PTR_ERR(iter) : 0;
-+
-+	xattr = bkey_s_c_to_xattr(bch2_btree_iter_peek_slot(iter));
-+
-+	acl = bch2_acl_from_disk(xattr_val(xattr.v),
-+			le16_to_cpu(xattr.v->x_val_len));
-+	if (IS_ERR_OR_NULL(acl))
-+		return PTR_ERR(acl);
-+
-+	ret = __posix_acl_chmod(&acl, GFP_KERNEL, mode);
-+	if (ret)
-+		goto err;
-+
-+	new = bch2_acl_to_xattr(trans, acl, ACL_TYPE_ACCESS);
-+	if (IS_ERR(new)) {
-+		ret = PTR_ERR(new);
-+		goto err;
-+	}
-+
-+	new->k.p = iter->pos;
-+	bch2_trans_update(trans, iter, &new->k_i, 0);
-+	*new_acl = acl;
-+	acl = NULL;
-+err:
-+	kfree(acl);
-+	return ret;
-+}
-+
-+#endif /* CONFIG_BCACHEFS_POSIX_ACL */
-diff --git a/fs/bcachefs/acl.h b/fs/bcachefs/acl.h
-new file mode 100644
-index 000000000000..cb62d502a7ff
---- /dev/null
-+++ b/fs/bcachefs/acl.h
-@@ -0,0 +1,59 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_ACL_H
-+#define _BCACHEFS_ACL_H
-+
-+struct bch_inode_unpacked;
-+struct bch_hash_info;
-+struct bch_inode_info;
-+struct posix_acl;
-+
-+#ifdef CONFIG_BCACHEFS_POSIX_ACL
-+
-+#define BCH_ACL_VERSION	0x0001
-+
-+typedef struct {
-+	__le16		e_tag;
-+	__le16		e_perm;
-+	__le32		e_id;
-+} bch_acl_entry;
-+
-+typedef struct {
-+	__le16		e_tag;
-+	__le16		e_perm;
-+} bch_acl_entry_short;
-+
-+typedef struct {
-+	__le32		a_version;
-+} bch_acl_header;
-+
-+struct posix_acl *bch2_get_acl(struct inode *, int);
-+
-+int bch2_set_acl_trans(struct btree_trans *,
-+		       struct bch_inode_unpacked *,
-+		       const struct bch_hash_info *,
-+		       struct posix_acl *, int);
-+int bch2_set_acl(struct inode *, struct posix_acl *, int);
-+int bch2_acl_chmod(struct btree_trans *, struct bch_inode_info *,
-+		   umode_t, struct posix_acl **);
-+
-+#else
-+
-+static inline int bch2_set_acl_trans(struct btree_trans *trans,
-+				     struct bch_inode_unpacked *inode_u,
-+				     const struct bch_hash_info *hash_info,
-+				     struct posix_acl *acl, int type)
-+{
-+	return 0;
-+}
-+
-+static inline int bch2_acl_chmod(struct btree_trans *trans,
-+				 struct bch_inode_info *inode,
-+				 umode_t mode,
-+				 struct posix_acl **new_acl)
-+{
-+	return 0;
-+}
-+
-+#endif /* CONFIG_BCACHEFS_POSIX_ACL */
-+
-+#endif /* _BCACHEFS_ACL_H */
-diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
-new file mode 100644
-index 000000000000..9aa0b42b26b6
---- /dev/null
-+++ b/fs/bcachefs/alloc_background.c
-@@ -0,0 +1,1436 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "alloc_background.h"
-+#include "alloc_foreground.h"
-+#include "btree_cache.h"
-+#include "btree_io.h"
-+#include "btree_key_cache.h"
-+#include "btree_update.h"
-+#include "btree_update_interior.h"
-+#include "btree_gc.h"
-+#include "buckets.h"
-+#include "clock.h"
-+#include "debug.h"
-+#include "ec.h"
-+#include "error.h"
-+#include "recovery.h"
-+
-+#include <linux/kthread.h>
-+#include <linux/math64.h>
-+#include <linux/random.h>
-+#include <linux/rculist.h>
-+#include <linux/rcupdate.h>
-+#include <linux/sched/task.h>
-+#include <linux/sort.h>
-+#include <trace/events/bcachefs.h>
-+
-+static const char * const bch2_alloc_field_names[] = {
-+#define x(name, bytes) #name,
-+	BCH_ALLOC_FIELDS()
-+#undef x
-+	NULL
-+};
-+
-+static void bch2_recalc_oldest_io(struct bch_fs *, struct bch_dev *, int);
-+
-+/* Ratelimiting/PD controllers */
-+
-+static void pd_controllers_update(struct work_struct *work)
-+{
-+	struct bch_fs *c = container_of(to_delayed_work(work),
-+					   struct bch_fs,
-+					   pd_controllers_update);
-+	struct bch_dev *ca;
-+	s64 free = 0, fragmented = 0;
-+	unsigned i;
-+
-+	for_each_member_device(ca, c, i) {
-+		struct bch_dev_usage stats = bch2_dev_usage_read(ca);
-+
-+		free += bucket_to_sector(ca,
-+				__dev_buckets_free(ca, stats)) << 9;
-+		/*
-+		 * Bytes of internal fragmentation, which can be
-+		 * reclaimed by copy GC
-+		 */
-+		fragmented += max_t(s64, 0, (bucket_to_sector(ca,
-+					stats.buckets[BCH_DATA_user] +
-+					stats.buckets[BCH_DATA_cached]) -
-+				  (stats.sectors[BCH_DATA_user] +
-+				   stats.sectors[BCH_DATA_cached])) << 9);
-+	}
-+
-+	bch2_pd_controller_update(&c->copygc_pd, free, fragmented, -1);
-+	schedule_delayed_work(&c->pd_controllers_update,
-+			      c->pd_controllers_update_seconds * HZ);
-+}
-+
-+/* Persistent alloc info: */
-+
-+static inline u64 get_alloc_field(const struct bch_alloc *a,
-+				  const void **p, unsigned field)
-+{
-+	unsigned bytes = BCH_ALLOC_FIELD_BYTES[field];
-+	u64 v;
-+
-+	if (!(a->fields & (1 << field)))
-+		return 0;
-+
-+	switch (bytes) {
-+	case 1:
-+		v = *((const u8 *) *p);
-+		break;
-+	case 2:
-+		v = le16_to_cpup(*p);
-+		break;
-+	case 4:
-+		v = le32_to_cpup(*p);
-+		break;
-+	case 8:
-+		v = le64_to_cpup(*p);
-+		break;
-+	default:
-+		BUG();
-+	}
-+
-+	*p += bytes;
-+	return v;
-+}
-+
-+static inline void put_alloc_field(struct bkey_i_alloc *a, void **p,
-+				   unsigned field, u64 v)
-+{
-+	unsigned bytes = BCH_ALLOC_FIELD_BYTES[field];
-+
-+	if (!v)
-+		return;
-+
-+	a->v.fields |= 1 << field;
-+
-+	switch (bytes) {
-+	case 1:
-+		*((u8 *) *p) = v;
-+		break;
-+	case 2:
-+		*((__le16 *) *p) = cpu_to_le16(v);
-+		break;
-+	case 4:
-+		*((__le32 *) *p) = cpu_to_le32(v);
-+		break;
-+	case 8:
-+		*((__le64 *) *p) = cpu_to_le64(v);
-+		break;
-+	default:
-+		BUG();
-+	}
-+
-+	*p += bytes;
-+}
-+
-+struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k)
-+{
-+	struct bkey_alloc_unpacked ret = { .gen = 0 };
-+
-+	if (k.k->type == KEY_TYPE_alloc) {
-+		const struct bch_alloc *a = bkey_s_c_to_alloc(k).v;
-+		const void *d = a->data;
-+		unsigned idx = 0;
-+
-+		ret.gen = a->gen;
-+
-+#define x(_name, _bits)	ret._name = get_alloc_field(a, &d, idx++);
-+		BCH_ALLOC_FIELDS()
-+#undef  x
-+	}
-+	return ret;
-+}
-+
-+void bch2_alloc_pack(struct bkey_i_alloc *dst,
-+		     const struct bkey_alloc_unpacked src)
-+{
-+	unsigned idx = 0;
-+	void *d = dst->v.data;
-+	unsigned bytes;
-+
-+	dst->v.fields	= 0;
-+	dst->v.gen	= src.gen;
-+
-+#define x(_name, _bits)	put_alloc_field(dst, &d, idx++, src._name);
-+	BCH_ALLOC_FIELDS()
-+#undef  x
-+
-+	bytes = (void *) d - (void *) &dst->v;
-+	set_bkey_val_bytes(&dst->k, bytes);
-+	memset_u64s_tail(&dst->v, 0, bytes);
-+}
-+
-+static unsigned bch_alloc_val_u64s(const struct bch_alloc *a)
-+{
-+	unsigned i, bytes = offsetof(struct bch_alloc, data);
-+
-+	for (i = 0; i < ARRAY_SIZE(BCH_ALLOC_FIELD_BYTES); i++)
-+		if (a->fields & (1 << i))
-+			bytes += BCH_ALLOC_FIELD_BYTES[i];
-+
-+	return DIV_ROUND_UP(bytes, sizeof(u64));
-+}
-+
-+const char *bch2_alloc_invalid(const struct bch_fs *c, struct bkey_s_c k)
-+{
-+	struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k);
-+
-+	if (k.k->p.inode >= c->sb.nr_devices ||
-+	    !c->devs[k.k->p.inode])
-+		return "invalid device";
-+
-+	/* allow for unknown fields */
-+	if (bkey_val_u64s(a.k) < bch_alloc_val_u64s(a.v))
-+		return "incorrect value size";
-+
-+	return NULL;
-+}
-+
-+void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c,
-+			struct bkey_s_c k)
-+{
-+	struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k);
-+	const void *d = a.v->data;
-+	unsigned i;
-+
-+	pr_buf(out, "gen %u", a.v->gen);
-+
-+	for (i = 0; i < BCH_ALLOC_FIELD_NR; i++)
-+		if (a.v->fields & (1 << i))
-+			pr_buf(out, " %s %llu",
-+			       bch2_alloc_field_names[i],
-+			       get_alloc_field(a.v, &d, i));
-+}
-+
-+static int bch2_alloc_read_fn(struct bch_fs *c, enum btree_id id,
-+			      unsigned level, struct bkey_s_c k)
-+{
-+	if (!level)
-+		bch2_mark_key(c, k, 0, 0, NULL, 0,
-+			      BTREE_TRIGGER_ALLOC_READ|
-+			      BTREE_TRIGGER_NOATOMIC);
-+
-+	return 0;
-+}
-+
-+int bch2_alloc_read(struct bch_fs *c, struct journal_keys *journal_keys)
-+{
-+	struct bch_dev *ca;
-+	unsigned i;
-+	int ret = 0;
-+
-+	ret = bch2_btree_and_journal_walk(c, journal_keys, BTREE_ID_ALLOC,
-+					  NULL, bch2_alloc_read_fn);
-+	if (ret) {
-+		bch_err(c, "error reading alloc info: %i", ret);
-+		return ret;
-+	}
-+
-+	percpu_down_write(&c->mark_lock);
-+	bch2_dev_usage_from_buckets(c);
-+	percpu_up_write(&c->mark_lock);
-+
-+	mutex_lock(&c->bucket_clock[READ].lock);
-+	for_each_member_device(ca, c, i) {
-+		down_read(&ca->bucket_lock);
-+		bch2_recalc_oldest_io(c, ca, READ);
-+		up_read(&ca->bucket_lock);
-+	}
-+	mutex_unlock(&c->bucket_clock[READ].lock);
-+
-+	mutex_lock(&c->bucket_clock[WRITE].lock);
-+	for_each_member_device(ca, c, i) {
-+		down_read(&ca->bucket_lock);
-+		bch2_recalc_oldest_io(c, ca, WRITE);
-+		up_read(&ca->bucket_lock);
-+	}
-+	mutex_unlock(&c->bucket_clock[WRITE].lock);
-+
-+	return 0;
-+}
-+
-+enum alloc_write_ret {
-+	ALLOC_WROTE,
-+	ALLOC_NOWROTE,
-+	ALLOC_END,
-+};
-+
-+static int bch2_alloc_write_key(struct btree_trans *trans,
-+				struct btree_iter *iter,
-+				unsigned flags)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct bkey_s_c k;
-+	struct bch_dev *ca;
-+	struct bucket_array *ba;
-+	struct bucket *g;
-+	struct bucket_mark m;
-+	struct bkey_alloc_unpacked old_u, new_u;
-+	__BKEY_PADDED(k, 8) alloc_key; /* hack: */
-+	struct bkey_i_alloc *a;
-+	int ret;
-+retry:
-+	bch2_trans_begin(trans);
-+
-+	ret = bch2_btree_key_cache_flush(trans,
-+			BTREE_ID_ALLOC, iter->pos);
-+	if (ret)
-+		goto err;
-+
-+	k = bch2_btree_iter_peek_slot(iter);
-+	ret = bkey_err(k);
-+	if (ret)
-+		goto err;
-+
-+	old_u = bch2_alloc_unpack(k);
-+
-+	if (iter->pos.inode >= c->sb.nr_devices ||
-+	    !c->devs[iter->pos.inode])
-+		return ALLOC_END;
-+
-+	percpu_down_read(&c->mark_lock);
-+	ca	= bch_dev_bkey_exists(c, iter->pos.inode);
-+	ba	= bucket_array(ca);
-+
-+	if (iter->pos.offset >= ba->nbuckets) {
-+		percpu_up_read(&c->mark_lock);
-+		return ALLOC_END;
-+	}
-+
-+	g	= &ba->b[iter->pos.offset];
-+	m	= READ_ONCE(g->mark);
-+	new_u	= alloc_mem_to_key(g, m);
-+	percpu_up_read(&c->mark_lock);
-+
-+	if (!bkey_alloc_unpacked_cmp(old_u, new_u))
-+		return ALLOC_NOWROTE;
-+
-+	a = bkey_alloc_init(&alloc_key.k);
-+	a->k.p = iter->pos;
-+	bch2_alloc_pack(a, new_u);
-+
-+	bch2_trans_update(trans, iter, &a->k_i,
-+			  BTREE_TRIGGER_NORUN);
-+	ret = bch2_trans_commit(trans, NULL, NULL,
-+				BTREE_INSERT_NOFAIL|
-+				BTREE_INSERT_USE_RESERVE|
-+				flags);
-+err:
-+	if (ret == -EINTR)
-+		goto retry;
-+	return ret;
-+}
-+
-+int bch2_alloc_write(struct bch_fs *c, unsigned flags, bool *wrote)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bch_dev *ca;
-+	unsigned i;
-+	int ret = 0;
-+
-+	BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8);
-+
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC, POS_MIN,
-+				   BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
-+
-+	for_each_rw_member(ca, c, i) {
-+		unsigned first_bucket;
-+
-+		percpu_down_read(&c->mark_lock);
-+		first_bucket = bucket_array(ca)->first_bucket;
-+		percpu_up_read(&c->mark_lock);
-+
-+		bch2_btree_iter_set_pos(iter, POS(i, first_bucket));
-+
-+		while (1) {
-+			bch2_trans_cond_resched(&trans);
-+
-+			ret = bch2_alloc_write_key(&trans, iter, flags);
-+			if (ret < 0 || ret == ALLOC_END)
-+				break;
-+			if (ret == ALLOC_WROTE)
-+				*wrote = true;
-+			bch2_btree_iter_next_slot(iter);
-+		}
-+
-+		if (ret < 0) {
-+			percpu_ref_put(&ca->io_ref);
-+			break;
-+		}
-+	}
-+
-+	bch2_trans_exit(&trans);
-+
-+	return ret < 0 ? ret : 0;
-+}
-+
-+/* Bucket IO clocks: */
-+
-+static void bch2_recalc_oldest_io(struct bch_fs *c, struct bch_dev *ca, int rw)
-+{
-+	struct bucket_clock *clock = &c->bucket_clock[rw];
-+	struct bucket_array *buckets = bucket_array(ca);
-+	struct bucket *g;
-+	u16 max_last_io = 0;
-+	unsigned i;
-+
-+	lockdep_assert_held(&c->bucket_clock[rw].lock);
-+
-+	/* Recalculate max_last_io for this device: */
-+	for_each_bucket(g, buckets)
-+		max_last_io = max(max_last_io, bucket_last_io(c, g, rw));
-+
-+	ca->max_last_bucket_io[rw] = max_last_io;
-+
-+	/* Recalculate global max_last_io: */
-+	max_last_io = 0;
-+
-+	for_each_member_device(ca, c, i)
-+		max_last_io = max(max_last_io, ca->max_last_bucket_io[rw]);
-+
-+	clock->max_last_io = max_last_io;
-+}
-+
-+static void bch2_rescale_bucket_io_times(struct bch_fs *c, int rw)
-+{
-+	struct bucket_clock *clock = &c->bucket_clock[rw];
-+	struct bucket_array *buckets;
-+	struct bch_dev *ca;
-+	struct bucket *g;
-+	unsigned i;
-+
-+	trace_rescale_prios(c);
-+
-+	for_each_member_device(ca, c, i) {
-+		down_read(&ca->bucket_lock);
-+		buckets = bucket_array(ca);
-+
-+		for_each_bucket(g, buckets)
-+			g->io_time[rw] = clock->hand -
-+			bucket_last_io(c, g, rw) / 2;
-+
-+		bch2_recalc_oldest_io(c, ca, rw);
-+
-+		up_read(&ca->bucket_lock);
-+	}
-+}
-+
-+static inline u64 bucket_clock_freq(u64 capacity)
-+{
-+	return max(capacity >> 10, 2028ULL);
-+}
-+
-+static void bch2_inc_clock_hand(struct io_timer *timer)
-+{
-+	struct bucket_clock *clock = container_of(timer,
-+						struct bucket_clock, rescale);
-+	struct bch_fs *c = container_of(clock,
-+					struct bch_fs, bucket_clock[clock->rw]);
-+	struct bch_dev *ca;
-+	u64 capacity;
-+	unsigned i;
-+
-+	mutex_lock(&clock->lock);
-+
-+	/* if clock cannot be advanced more, rescale prio */
-+	if (clock->max_last_io >= U16_MAX - 2)
-+		bch2_rescale_bucket_io_times(c, clock->rw);
-+
-+	BUG_ON(clock->max_last_io >= U16_MAX - 2);
-+
-+	for_each_member_device(ca, c, i)
-+		ca->max_last_bucket_io[clock->rw]++;
-+	clock->max_last_io++;
-+	clock->hand++;
-+
-+	mutex_unlock(&clock->lock);
-+
-+	capacity = READ_ONCE(c->capacity);
-+
-+	if (!capacity)
-+		return;
-+
-+	/*
-+	 * we only increment when 0.1% of the filesystem capacity has been read
-+	 * or written too, this determines if it's time
-+	 *
-+	 * XXX: we shouldn't really be going off of the capacity of devices in
-+	 * RW mode (that will be 0 when we're RO, yet we can still service
-+	 * reads)
-+	 */
-+	timer->expire += bucket_clock_freq(capacity);
-+
-+	bch2_io_timer_add(&c->io_clock[clock->rw], timer);
-+}
-+
-+static void bch2_bucket_clock_init(struct bch_fs *c, int rw)
-+{
-+	struct bucket_clock *clock = &c->bucket_clock[rw];
-+
-+	clock->hand		= 1;
-+	clock->rw		= rw;
-+	clock->rescale.fn	= bch2_inc_clock_hand;
-+	clock->rescale.expire	= bucket_clock_freq(c->capacity);
-+	mutex_init(&clock->lock);
-+}
-+
-+/* Background allocator thread: */
-+
-+/*
-+ * Scans for buckets to be invalidated, invalidates them, rewrites prios/gens
-+ * (marking them as invalidated on disk), then optionally issues discard
-+ * commands to the newly free buckets, then puts them on the various freelists.
-+ */
-+
-+#define BUCKET_GC_GEN_MAX	96U
-+
-+/**
-+ * wait_buckets_available - wait on reclaimable buckets
-+ *
-+ * If there aren't enough available buckets to fill up free_inc, wait until
-+ * there are.
-+ */
-+static int wait_buckets_available(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	unsigned long gc_count = c->gc_count;
-+	u64 available;
-+	int ret = 0;
-+
-+	ca->allocator_state = ALLOCATOR_BLOCKED;
-+	closure_wake_up(&c->freelist_wait);
-+
-+	while (1) {
-+		set_current_state(TASK_INTERRUPTIBLE);
-+		if (kthread_should_stop()) {
-+			ret = 1;
-+			break;
-+		}
-+
-+		if (gc_count != c->gc_count)
-+			ca->inc_gen_really_needs_gc = 0;
-+
-+		available = max_t(s64, 0, dev_buckets_available(ca) -
-+				  ca->inc_gen_really_needs_gc);
-+
-+		if (available > fifo_free(&ca->free_inc) ||
-+		    (available &&
-+		     (!fifo_full(&ca->free[RESERVE_BTREE]) ||
-+		      !fifo_full(&ca->free[RESERVE_MOVINGGC]))))
-+			break;
-+
-+		up_read(&c->gc_lock);
-+		schedule();
-+		try_to_freeze();
-+		down_read(&c->gc_lock);
-+	}
-+
-+	__set_current_state(TASK_RUNNING);
-+	ca->allocator_state = ALLOCATOR_RUNNING;
-+	closure_wake_up(&c->freelist_wait);
-+
-+	return ret;
-+}
-+
-+static bool bch2_can_invalidate_bucket(struct bch_dev *ca,
-+				       size_t bucket,
-+				       struct bucket_mark mark)
-+{
-+	u8 gc_gen;
-+
-+	if (!is_available_bucket(mark))
-+		return false;
-+
-+	if (ca->buckets_nouse &&
-+	    test_bit(bucket, ca->buckets_nouse))
-+		return false;
-+
-+	gc_gen = bucket_gc_gen(ca, bucket);
-+
-+	if (gc_gen >= BUCKET_GC_GEN_MAX / 2)
-+		ca->inc_gen_needs_gc++;
-+
-+	if (gc_gen >= BUCKET_GC_GEN_MAX)
-+		ca->inc_gen_really_needs_gc++;
-+
-+	return gc_gen < BUCKET_GC_GEN_MAX;
-+}
-+
-+/*
-+ * Determines what order we're going to reuse buckets, smallest bucket_key()
-+ * first.
-+ *
-+ *
-+ * - We take into account the read prio of the bucket, which gives us an
-+ *   indication of how hot the data is -- we scale the prio so that the prio
-+ *   farthest from the clock is worth 1/8th of the closest.
-+ *
-+ * - The number of sectors of cached data in the bucket, which gives us an
-+ *   indication of the cost in cache misses this eviction will cause.
-+ *
-+ * - If hotness * sectors used compares equal, we pick the bucket with the
-+ *   smallest bucket_gc_gen() - since incrementing the same bucket's generation
-+ *   number repeatedly forces us to run mark and sweep gc to avoid generation
-+ *   number wraparound.
-+ */
-+
-+static unsigned long bucket_sort_key(struct bch_fs *c, struct bch_dev *ca,
-+				     size_t b, struct bucket_mark m)
-+{
-+	unsigned last_io = bucket_last_io(c, bucket(ca, b), READ);
-+	unsigned max_last_io = ca->max_last_bucket_io[READ];
-+
-+	/*
-+	 * Time since last read, scaled to [0, 8) where larger value indicates
-+	 * more recently read data:
-+	 */
-+	unsigned long hotness = (max_last_io - last_io) * 7 / max_last_io;
-+
-+	/* How much we want to keep the data in this bucket: */
-+	unsigned long data_wantness =
-+		(hotness + 1) * bucket_sectors_used(m);
-+
-+	unsigned long needs_journal_commit =
-+		bucket_needs_journal_commit(m, c->journal.last_seq_ondisk);
-+
-+	return  (data_wantness << 9) |
-+		(needs_journal_commit << 8) |
-+		(bucket_gc_gen(ca, b) / 16);
-+}
-+
-+static inline int bucket_alloc_cmp(alloc_heap *h,
-+				   struct alloc_heap_entry l,
-+				   struct alloc_heap_entry r)
-+{
-+	return  cmp_int(l.key, r.key) ?:
-+		cmp_int(r.nr, l.nr) ?:
-+		cmp_int(l.bucket, r.bucket);
-+}
-+
-+static inline int bucket_idx_cmp(const void *_l, const void *_r)
-+{
-+	const struct alloc_heap_entry *l = _l, *r = _r;
-+
-+	return cmp_int(l->bucket, r->bucket);
-+}
-+
-+static void find_reclaimable_buckets_lru(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	struct bucket_array *buckets;
-+	struct alloc_heap_entry e = { 0 };
-+	size_t b, i, nr = 0;
-+
-+	ca->alloc_heap.used = 0;
-+
-+	mutex_lock(&c->bucket_clock[READ].lock);
-+	down_read(&ca->bucket_lock);
-+
-+	buckets = bucket_array(ca);
-+
-+	bch2_recalc_oldest_io(c, ca, READ);
-+
-+	/*
-+	 * Find buckets with lowest read priority, by building a maxheap sorted
-+	 * by read priority and repeatedly replacing the maximum element until
-+	 * all buckets have been visited.
-+	 */
-+	for (b = ca->mi.first_bucket; b < ca->mi.nbuckets; b++) {
-+		struct bucket_mark m = READ_ONCE(buckets->b[b].mark);
-+		unsigned long key = bucket_sort_key(c, ca, b, m);
-+
-+		if (!bch2_can_invalidate_bucket(ca, b, m))
-+			continue;
-+
-+		if (e.nr && e.bucket + e.nr == b && e.key == key) {
-+			e.nr++;
-+		} else {
-+			if (e.nr)
-+				heap_add_or_replace(&ca->alloc_heap, e,
-+					-bucket_alloc_cmp, NULL);
-+
-+			e = (struct alloc_heap_entry) {
-+				.bucket = b,
-+				.nr	= 1,
-+				.key	= key,
-+			};
-+		}
-+
-+		cond_resched();
-+	}
-+
-+	if (e.nr)
-+		heap_add_or_replace(&ca->alloc_heap, e,
-+				-bucket_alloc_cmp, NULL);
-+
-+	for (i = 0; i < ca->alloc_heap.used; i++)
-+		nr += ca->alloc_heap.data[i].nr;
-+
-+	while (nr - ca->alloc_heap.data[0].nr >= ALLOC_SCAN_BATCH(ca)) {
-+		nr -= ca->alloc_heap.data[0].nr;
-+		heap_pop(&ca->alloc_heap, e, -bucket_alloc_cmp, NULL);
-+	}
-+
-+	up_read(&ca->bucket_lock);
-+	mutex_unlock(&c->bucket_clock[READ].lock);
-+}
-+
-+static void find_reclaimable_buckets_fifo(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	struct bucket_array *buckets = bucket_array(ca);
-+	struct bucket_mark m;
-+	size_t b, start;
-+
-+	if (ca->fifo_last_bucket <  ca->mi.first_bucket ||
-+	    ca->fifo_last_bucket >= ca->mi.nbuckets)
-+		ca->fifo_last_bucket = ca->mi.first_bucket;
-+
-+	start = ca->fifo_last_bucket;
-+
-+	do {
-+		ca->fifo_last_bucket++;
-+		if (ca->fifo_last_bucket == ca->mi.nbuckets)
-+			ca->fifo_last_bucket = ca->mi.first_bucket;
-+
-+		b = ca->fifo_last_bucket;
-+		m = READ_ONCE(buckets->b[b].mark);
-+
-+		if (bch2_can_invalidate_bucket(ca, b, m)) {
-+			struct alloc_heap_entry e = { .bucket = b, .nr = 1, };
-+
-+			heap_add(&ca->alloc_heap, e, bucket_alloc_cmp, NULL);
-+			if (heap_full(&ca->alloc_heap))
-+				break;
-+		}
-+
-+		cond_resched();
-+	} while (ca->fifo_last_bucket != start);
-+}
-+
-+static void find_reclaimable_buckets_random(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	struct bucket_array *buckets = bucket_array(ca);
-+	struct bucket_mark m;
-+	size_t checked, i;
-+
-+	for (checked = 0;
-+	     checked < ca->mi.nbuckets / 2;
-+	     checked++) {
-+		size_t b = bch2_rand_range(ca->mi.nbuckets -
-+					   ca->mi.first_bucket) +
-+			ca->mi.first_bucket;
-+
-+		m = READ_ONCE(buckets->b[b].mark);
-+
-+		if (bch2_can_invalidate_bucket(ca, b, m)) {
-+			struct alloc_heap_entry e = { .bucket = b, .nr = 1, };
-+
-+			heap_add(&ca->alloc_heap, e, bucket_alloc_cmp, NULL);
-+			if (heap_full(&ca->alloc_heap))
-+				break;
-+		}
-+
-+		cond_resched();
-+	}
-+
-+	sort(ca->alloc_heap.data,
-+	     ca->alloc_heap.used,
-+	     sizeof(ca->alloc_heap.data[0]),
-+	     bucket_idx_cmp, NULL);
-+
-+	/* remove duplicates: */
-+	for (i = 0; i + 1 < ca->alloc_heap.used; i++)
-+		if (ca->alloc_heap.data[i].bucket ==
-+		    ca->alloc_heap.data[i + 1].bucket)
-+			ca->alloc_heap.data[i].nr = 0;
-+}
-+
-+static size_t find_reclaimable_buckets(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	size_t i, nr = 0;
-+
-+	ca->inc_gen_needs_gc			= 0;
-+
-+	switch (ca->mi.replacement) {
-+	case CACHE_REPLACEMENT_LRU:
-+		find_reclaimable_buckets_lru(c, ca);
-+		break;
-+	case CACHE_REPLACEMENT_FIFO:
-+		find_reclaimable_buckets_fifo(c, ca);
-+		break;
-+	case CACHE_REPLACEMENT_RANDOM:
-+		find_reclaimable_buckets_random(c, ca);
-+		break;
-+	}
-+
-+	heap_resort(&ca->alloc_heap, bucket_alloc_cmp, NULL);
-+
-+	for (i = 0; i < ca->alloc_heap.used; i++)
-+		nr += ca->alloc_heap.data[i].nr;
-+
-+	return nr;
-+}
-+
-+static inline long next_alloc_bucket(struct bch_dev *ca)
-+{
-+	struct alloc_heap_entry e, *top = ca->alloc_heap.data;
-+
-+	while (ca->alloc_heap.used) {
-+		if (top->nr) {
-+			size_t b = top->bucket;
-+
-+			top->bucket++;
-+			top->nr--;
-+			return b;
-+		}
-+
-+		heap_pop(&ca->alloc_heap, e, bucket_alloc_cmp, NULL);
-+	}
-+
-+	return -1;
-+}
-+
-+/*
-+ * returns sequence number of most recent journal entry that updated this
-+ * bucket:
-+ */
-+static u64 bucket_journal_seq(struct bch_fs *c, struct bucket_mark m)
-+{
-+	if (m.journal_seq_valid) {
-+		u64 journal_seq = atomic64_read(&c->journal.seq);
-+		u64 bucket_seq	= journal_seq;
-+
-+		bucket_seq &= ~((u64) U16_MAX);
-+		bucket_seq |= m.journal_seq;
-+
-+		if (bucket_seq > journal_seq)
-+			bucket_seq -= 1 << 16;
-+
-+		return bucket_seq;
-+	} else {
-+		return 0;
-+	}
-+}
-+
-+static int bch2_invalidate_one_bucket2(struct btree_trans *trans,
-+				       struct bch_dev *ca,
-+				       struct btree_iter *iter,
-+				       u64 *journal_seq, unsigned flags)
-+{
-+#if 0
-+	__BKEY_PADDED(k, BKEY_ALLOC_VAL_U64s_MAX) alloc_key;
-+#else
-+	/* hack: */
-+	__BKEY_PADDED(k, 8) alloc_key;
-+#endif
-+	struct bch_fs *c = trans->c;
-+	struct bkey_i_alloc *a;
-+	struct bkey_alloc_unpacked u;
-+	struct bucket *g;
-+	struct bucket_mark m;
-+	bool invalidating_cached_data;
-+	size_t b;
-+	int ret = 0;
-+
-+	BUG_ON(!ca->alloc_heap.used ||
-+	       !ca->alloc_heap.data[0].nr);
-+	b = ca->alloc_heap.data[0].bucket;
-+
-+	/* first, put on free_inc and mark as owned by allocator: */
-+	percpu_down_read(&c->mark_lock);
-+	spin_lock(&c->freelist_lock);
-+
-+	verify_not_on_freelist(c, ca, b);
-+
-+	BUG_ON(!fifo_push(&ca->free_inc, b));
-+
-+	g = bucket(ca, b);
-+	m = READ_ONCE(g->mark);
-+
-+	invalidating_cached_data = m.cached_sectors != 0;
-+
-+	/*
-+	 * If we're not invalidating cached data, we only increment the bucket
-+	 * gen in memory here, the incremented gen will be updated in the btree
-+	 * by bch2_trans_mark_pointer():
-+	 */
-+
-+	if (!invalidating_cached_data)
-+		bch2_invalidate_bucket(c, ca, b, &m);
-+	else
-+		bch2_mark_alloc_bucket(c, ca, b, true, gc_pos_alloc(c, NULL), 0);
-+
-+	spin_unlock(&c->freelist_lock);
-+	percpu_up_read(&c->mark_lock);
-+
-+	if (!invalidating_cached_data)
-+		goto out;
-+
-+	/*
-+	 * If the read-only path is trying to shut down, we can't be generating
-+	 * new btree updates:
-+	 */
-+	if (test_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags)) {
-+		ret = 1;
-+		goto out;
-+	}
-+
-+	BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8);
-+
-+	bch2_btree_iter_set_pos(iter, POS(ca->dev_idx, b));
-+retry:
-+	ret = bch2_btree_iter_traverse(iter);
-+	if (ret)
-+		return ret;
-+
-+	percpu_down_read(&c->mark_lock);
-+	g = bucket(ca, iter->pos.offset);
-+	m = READ_ONCE(g->mark);
-+	u = alloc_mem_to_key(g, m);
-+
-+	percpu_up_read(&c->mark_lock);
-+
-+	invalidating_cached_data = u.cached_sectors != 0;
-+
-+	u.gen++;
-+	u.data_type	= 0;
-+	u.dirty_sectors	= 0;
-+	u.cached_sectors = 0;
-+	u.read_time	= c->bucket_clock[READ].hand;
-+	u.write_time	= c->bucket_clock[WRITE].hand;
-+
-+	a = bkey_alloc_init(&alloc_key.k);
-+	a->k.p = iter->pos;
-+	bch2_alloc_pack(a, u);
-+
-+	bch2_trans_update(trans, iter, &a->k_i,
-+			  BTREE_TRIGGER_BUCKET_INVALIDATE);
-+
-+	/*
-+	 * XXX:
-+	 * when using deferred btree updates, we have journal reclaim doing
-+	 * btree updates and thus requiring the allocator to make forward
-+	 * progress, and here the allocator is requiring space in the journal -
-+	 * so we need a journal pre-reservation:
-+	 */
-+	ret = bch2_trans_commit(trans, NULL,
-+				invalidating_cached_data ? journal_seq : NULL,
-+				BTREE_INSERT_NOUNLOCK|
-+				BTREE_INSERT_NOCHECK_RW|
-+				BTREE_INSERT_NOFAIL|
-+				BTREE_INSERT_USE_RESERVE|
-+				BTREE_INSERT_USE_ALLOC_RESERVE|
-+				flags);
-+	if (ret == -EINTR)
-+		goto retry;
-+out:
-+	if (!ret) {
-+		/* remove from alloc_heap: */
-+		struct alloc_heap_entry e, *top = ca->alloc_heap.data;
-+
-+		top->bucket++;
-+		top->nr--;
-+
-+		if (!top->nr)
-+			heap_pop(&ca->alloc_heap, e, bucket_alloc_cmp, NULL);
-+
-+		/*
-+		 * Make sure we flush the last journal entry that updated this
-+		 * bucket (i.e. deleting the last reference) before writing to
-+		 * this bucket again:
-+		 */
-+		*journal_seq = max(*journal_seq, bucket_journal_seq(c, m));
-+	} else {
-+		size_t b2;
-+
-+		/* remove from free_inc: */
-+		percpu_down_read(&c->mark_lock);
-+		spin_lock(&c->freelist_lock);
-+
-+		bch2_mark_alloc_bucket(c, ca, b, false,
-+				       gc_pos_alloc(c, NULL), 0);
-+
-+		BUG_ON(!fifo_pop_back(&ca->free_inc, b2));
-+		BUG_ON(b != b2);
-+
-+		spin_unlock(&c->freelist_lock);
-+		percpu_up_read(&c->mark_lock);
-+	}
-+
-+	return ret < 0 ? ret : 0;
-+}
-+
-+/*
-+ * Pull buckets off ca->alloc_heap, invalidate them, move them to ca->free_inc:
-+ */
-+static int bch2_invalidate_buckets(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	u64 journal_seq = 0;
-+	int ret = 0;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC,
-+				   POS(ca->dev_idx, 0),
-+				   BTREE_ITER_CACHED|
-+				   BTREE_ITER_CACHED_NOFILL|
-+				   BTREE_ITER_INTENT);
-+
-+	/* Only use nowait if we've already invalidated at least one bucket: */
-+	while (!ret &&
-+	       !fifo_full(&ca->free_inc) &&
-+	       ca->alloc_heap.used)
-+		ret = bch2_invalidate_one_bucket2(&trans, ca, iter, &journal_seq,
-+				BTREE_INSERT_GC_LOCK_HELD|
-+				(!fifo_empty(&ca->free_inc)
-+				 ? BTREE_INSERT_NOWAIT : 0));
-+
-+	bch2_trans_exit(&trans);
-+
-+	/* If we used NOWAIT, don't return the error: */
-+	if (!fifo_empty(&ca->free_inc))
-+		ret = 0;
-+	if (ret) {
-+		bch_err(ca, "error invalidating buckets: %i", ret);
-+		return ret;
-+	}
-+
-+	if (journal_seq)
-+		ret = bch2_journal_flush_seq(&c->journal, journal_seq);
-+	if (ret) {
-+		bch_err(ca, "journal error: %i", ret);
-+		return ret;
-+	}
-+
-+	return 0;
-+}
-+
-+static int push_invalidated_bucket(struct bch_fs *c, struct bch_dev *ca, size_t bucket)
-+{
-+	unsigned i;
-+	int ret = 0;
-+
-+	while (1) {
-+		set_current_state(TASK_INTERRUPTIBLE);
-+
-+		spin_lock(&c->freelist_lock);
-+		for (i = 0; i < RESERVE_NR; i++) {
-+
-+			/*
-+			 * Don't strand buckets on the copygc freelist until
-+			 * after recovery is finished:
-+			 */
-+			if (!test_bit(BCH_FS_STARTED, &c->flags) &&
-+			    i == RESERVE_MOVINGGC)
-+				continue;
-+
-+			if (fifo_push(&ca->free[i], bucket)) {
-+				fifo_pop(&ca->free_inc, bucket);
-+
-+				closure_wake_up(&c->freelist_wait);
-+				ca->allocator_state = ALLOCATOR_RUNNING;
-+
-+				spin_unlock(&c->freelist_lock);
-+				goto out;
-+			}
-+		}
-+
-+		if (ca->allocator_state != ALLOCATOR_BLOCKED_FULL) {
-+			ca->allocator_state = ALLOCATOR_BLOCKED_FULL;
-+			closure_wake_up(&c->freelist_wait);
-+		}
-+
-+		spin_unlock(&c->freelist_lock);
-+
-+		if ((current->flags & PF_KTHREAD) &&
-+		    kthread_should_stop()) {
-+			ret = 1;
-+			break;
-+		}
-+
-+		schedule();
-+		try_to_freeze();
-+	}
-+out:
-+	__set_current_state(TASK_RUNNING);
-+	return ret;
-+}
-+
-+/*
-+ * Pulls buckets off free_inc, discards them (if enabled), then adds them to
-+ * freelists, waiting until there's room if necessary:
-+ */
-+static int discard_invalidated_buckets(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	while (!fifo_empty(&ca->free_inc)) {
-+		size_t bucket = fifo_peek(&ca->free_inc);
-+
-+		if (ca->mi.discard &&
-+		    blk_queue_discard(bdev_get_queue(ca->disk_sb.bdev)))
-+			blkdev_issue_discard(ca->disk_sb.bdev,
-+					     bucket_to_sector(ca, bucket),
-+					     ca->mi.bucket_size, GFP_NOIO, 0);
-+
-+		if (push_invalidated_bucket(c, ca, bucket))
-+			return 1;
-+	}
-+
-+	return 0;
-+}
-+
-+/**
-+ * bch_allocator_thread - move buckets from free_inc to reserves
-+ *
-+ * The free_inc FIFO is populated by find_reclaimable_buckets(), and
-+ * the reserves are depleted by bucket allocation. When we run out
-+ * of free_inc, try to invalidate some buckets and write out
-+ * prios and gens.
-+ */
-+static int bch2_allocator_thread(void *arg)
-+{
-+	struct bch_dev *ca = arg;
-+	struct bch_fs *c = ca->fs;
-+	size_t nr;
-+	int ret;
-+
-+	set_freezable();
-+	ca->allocator_state = ALLOCATOR_RUNNING;
-+
-+	while (1) {
-+		cond_resched();
-+		if (kthread_should_stop())
-+			break;
-+
-+		pr_debug("discarding %zu invalidated buckets",
-+			 fifo_used(&ca->free_inc));
-+
-+		ret = discard_invalidated_buckets(c, ca);
-+		if (ret)
-+			goto stop;
-+
-+		down_read(&c->gc_lock);
-+
-+		ret = bch2_invalidate_buckets(c, ca);
-+		if (ret) {
-+			up_read(&c->gc_lock);
-+			goto stop;
-+		}
-+
-+		if (!fifo_empty(&ca->free_inc)) {
-+			up_read(&c->gc_lock);
-+			continue;
-+		}
-+
-+		pr_debug("free_inc now empty");
-+
-+		do {
-+			/*
-+			 * Find some buckets that we can invalidate, either
-+			 * they're completely unused, or only contain clean data
-+			 * that's been written back to the backing device or
-+			 * another cache tier
-+			 */
-+
-+			pr_debug("scanning for reclaimable buckets");
-+
-+			nr = find_reclaimable_buckets(c, ca);
-+
-+			pr_debug("found %zu buckets", nr);
-+
-+			trace_alloc_batch(ca, nr, ca->alloc_heap.size);
-+
-+			if ((ca->inc_gen_needs_gc >= ALLOC_SCAN_BATCH(ca) ||
-+			     ca->inc_gen_really_needs_gc) &&
-+			    c->gc_thread) {
-+				atomic_inc(&c->kick_gc);
-+				wake_up_process(c->gc_thread);
-+			}
-+
-+			/*
-+			 * If we found any buckets, we have to invalidate them
-+			 * before we scan for more - but if we didn't find very
-+			 * many we may want to wait on more buckets being
-+			 * available so we don't spin:
-+			 */
-+			if (!nr ||
-+			    (nr < ALLOC_SCAN_BATCH(ca) &&
-+			     !fifo_empty(&ca->free[RESERVE_NONE]))) {
-+				ret = wait_buckets_available(c, ca);
-+				if (ret) {
-+					up_read(&c->gc_lock);
-+					goto stop;
-+				}
-+			}
-+		} while (!nr);
-+
-+		up_read(&c->gc_lock);
-+
-+		pr_debug("%zu buckets to invalidate", nr);
-+
-+		/*
-+		 * alloc_heap is now full of newly-invalidated buckets: next,
-+		 * write out the new bucket gens:
-+		 */
-+	}
-+
-+stop:
-+	pr_debug("alloc thread stopping (ret %i)", ret);
-+	ca->allocator_state = ALLOCATOR_STOPPED;
-+	closure_wake_up(&c->freelist_wait);
-+	return 0;
-+}
-+
-+/* Startup/shutdown (ro/rw): */
-+
-+void bch2_recalc_capacity(struct bch_fs *c)
-+{
-+	struct bch_dev *ca;
-+	u64 capacity = 0, reserved_sectors = 0, gc_reserve, copygc_threshold = 0;
-+	unsigned bucket_size_max = 0;
-+	unsigned long ra_pages = 0;
-+	unsigned i, j;
-+
-+	lockdep_assert_held(&c->state_lock);
-+
-+	for_each_online_member(ca, c, i) {
-+		struct backing_dev_info *bdi = ca->disk_sb.bdev->bd_bdi;
-+
-+		ra_pages += bdi->ra_pages;
-+	}
-+
-+	bch2_set_ra_pages(c, ra_pages);
-+
-+	for_each_rw_member(ca, c, i) {
-+		u64 dev_reserve = 0;
-+
-+		/*
-+		 * We need to reserve buckets (from the number
-+		 * of currently available buckets) against
-+		 * foreground writes so that mainly copygc can
-+		 * make forward progress.
-+		 *
-+		 * We need enough to refill the various reserves
-+		 * from scratch - copygc will use its entire
-+		 * reserve all at once, then run against when
-+		 * its reserve is refilled (from the formerly
-+		 * available buckets).
-+		 *
-+		 * This reserve is just used when considering if
-+		 * allocations for foreground writes must wait -
-+		 * not -ENOSPC calculations.
-+		 */
-+		for (j = 0; j < RESERVE_NONE; j++)
-+			dev_reserve += ca->free[j].size;
-+
-+		dev_reserve += 1;	/* btree write point */
-+		dev_reserve += 1;	/* copygc write point */
-+		dev_reserve += 1;	/* rebalance write point */
-+
-+		dev_reserve *= ca->mi.bucket_size;
-+
-+		copygc_threshold += dev_reserve;
-+
-+		capacity += bucket_to_sector(ca, ca->mi.nbuckets -
-+					     ca->mi.first_bucket);
-+
-+		reserved_sectors += dev_reserve * 2;
-+
-+		bucket_size_max = max_t(unsigned, bucket_size_max,
-+					ca->mi.bucket_size);
-+	}
-+
-+	gc_reserve = c->opts.gc_reserve_bytes
-+		? c->opts.gc_reserve_bytes >> 9
-+		: div64_u64(capacity * c->opts.gc_reserve_percent, 100);
-+
-+	reserved_sectors = max(gc_reserve, reserved_sectors);
-+
-+	reserved_sectors = min(reserved_sectors, capacity);
-+
-+	c->copygc_threshold = copygc_threshold;
-+	c->capacity = capacity - reserved_sectors;
-+
-+	c->bucket_size_max = bucket_size_max;
-+
-+	if (c->capacity) {
-+		bch2_io_timer_add(&c->io_clock[READ],
-+				 &c->bucket_clock[READ].rescale);
-+		bch2_io_timer_add(&c->io_clock[WRITE],
-+				 &c->bucket_clock[WRITE].rescale);
-+	} else {
-+		bch2_io_timer_del(&c->io_clock[READ],
-+				 &c->bucket_clock[READ].rescale);
-+		bch2_io_timer_del(&c->io_clock[WRITE],
-+				 &c->bucket_clock[WRITE].rescale);
-+	}
-+
-+	/* Wake up case someone was waiting for buckets */
-+	closure_wake_up(&c->freelist_wait);
-+}
-+
-+static bool bch2_dev_has_open_write_point(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	struct open_bucket *ob;
-+	bool ret = false;
-+
-+	for (ob = c->open_buckets;
-+	     ob < c->open_buckets + ARRAY_SIZE(c->open_buckets);
-+	     ob++) {
-+		spin_lock(&ob->lock);
-+		if (ob->valid && !ob->on_partial_list &&
-+		    ob->ptr.dev == ca->dev_idx)
-+			ret = true;
-+		spin_unlock(&ob->lock);
-+	}
-+
-+	return ret;
-+}
-+
-+/* device goes ro: */
-+void bch2_dev_allocator_remove(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	unsigned i;
-+
-+	BUG_ON(ca->alloc_thread);
-+
-+	/* First, remove device from allocation groups: */
-+
-+	for (i = 0; i < ARRAY_SIZE(c->rw_devs); i++)
-+		clear_bit(ca->dev_idx, c->rw_devs[i].d);
-+
-+	/*
-+	 * Capacity is calculated based off of devices in allocation groups:
-+	 */
-+	bch2_recalc_capacity(c);
-+
-+	/* Next, close write points that point to this device... */
-+	for (i = 0; i < ARRAY_SIZE(c->write_points); i++)
-+		bch2_writepoint_stop(c, ca, &c->write_points[i]);
-+
-+	bch2_writepoint_stop(c, ca, &c->copygc_write_point);
-+	bch2_writepoint_stop(c, ca, &c->rebalance_write_point);
-+	bch2_writepoint_stop(c, ca, &c->btree_write_point);
-+
-+	mutex_lock(&c->btree_reserve_cache_lock);
-+	while (c->btree_reserve_cache_nr) {
-+		struct btree_alloc *a =
-+			&c->btree_reserve_cache[--c->btree_reserve_cache_nr];
-+
-+		bch2_open_buckets_put(c, &a->ob);
-+	}
-+	mutex_unlock(&c->btree_reserve_cache_lock);
-+
-+	while (1) {
-+		struct open_bucket *ob;
-+
-+		spin_lock(&c->freelist_lock);
-+		if (!ca->open_buckets_partial_nr) {
-+			spin_unlock(&c->freelist_lock);
-+			break;
-+		}
-+		ob = c->open_buckets +
-+			ca->open_buckets_partial[--ca->open_buckets_partial_nr];
-+		ob->on_partial_list = false;
-+		spin_unlock(&c->freelist_lock);
-+
-+		bch2_open_bucket_put(c, ob);
-+	}
-+
-+	bch2_ec_stop_dev(c, ca);
-+
-+	/*
-+	 * Wake up threads that were blocked on allocation, so they can notice
-+	 * the device can no longer be removed and the capacity has changed:
-+	 */
-+	closure_wake_up(&c->freelist_wait);
-+
-+	/*
-+	 * journal_res_get() can block waiting for free space in the journal -
-+	 * it needs to notice there may not be devices to allocate from anymore:
-+	 */
-+	wake_up(&c->journal.wait);
-+
-+	/* Now wait for any in flight writes: */
-+
-+	closure_wait_event(&c->open_buckets_wait,
-+			   !bch2_dev_has_open_write_point(c, ca));
-+}
-+
-+/* device goes rw: */
-+void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	unsigned i;
-+
-+	for (i = 0; i < ARRAY_SIZE(c->rw_devs); i++)
-+		if (ca->mi.data_allowed & (1 << i))
-+			set_bit(ca->dev_idx, c->rw_devs[i].d);
-+}
-+
-+void bch2_dev_allocator_quiesce(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	if (ca->alloc_thread)
-+		closure_wait_event(&c->freelist_wait,
-+				   ca->allocator_state != ALLOCATOR_RUNNING);
-+}
-+
-+/* stop allocator thread: */
-+void bch2_dev_allocator_stop(struct bch_dev *ca)
-+{
-+	struct task_struct *p;
-+
-+	p = rcu_dereference_protected(ca->alloc_thread, 1);
-+	ca->alloc_thread = NULL;
-+
-+	/*
-+	 * We need an rcu barrier between setting ca->alloc_thread = NULL and
-+	 * the thread shutting down to avoid bch2_wake_allocator() racing:
-+	 *
-+	 * XXX: it would be better to have the rcu barrier be asynchronous
-+	 * instead of blocking us here
-+	 */
-+	synchronize_rcu();
-+
-+	if (p) {
-+		kthread_stop(p);
-+		put_task_struct(p);
-+	}
-+}
-+
-+/* start allocator thread: */
-+int bch2_dev_allocator_start(struct bch_dev *ca)
-+{
-+	struct task_struct *p;
-+
-+	/*
-+	 * allocator thread already started?
-+	 */
-+	if (ca->alloc_thread)
-+		return 0;
-+
-+	p = kthread_create(bch2_allocator_thread, ca,
-+			   "bch_alloc[%s]", ca->name);
-+	if (IS_ERR(p))
-+		return PTR_ERR(p);
-+
-+	get_task_struct(p);
-+	rcu_assign_pointer(ca->alloc_thread, p);
-+	wake_up_process(p);
-+	return 0;
-+}
-+
-+void bch2_fs_allocator_background_init(struct bch_fs *c)
-+{
-+	spin_lock_init(&c->freelist_lock);
-+	bch2_bucket_clock_init(c, READ);
-+	bch2_bucket_clock_init(c, WRITE);
-+
-+	c->pd_controllers_update_seconds = 5;
-+	INIT_DELAYED_WORK(&c->pd_controllers_update, pd_controllers_update);
-+}
-diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h
-new file mode 100644
-index 000000000000..f6b9f27f0713
---- /dev/null
-+++ b/fs/bcachefs/alloc_background.h
-@@ -0,0 +1,97 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_ALLOC_BACKGROUND_H
-+#define _BCACHEFS_ALLOC_BACKGROUND_H
-+
-+#include "bcachefs.h"
-+#include "alloc_types.h"
-+#include "debug.h"
-+
-+struct bkey_alloc_unpacked {
-+	u8		gen;
-+#define x(_name, _bits)	u##_bits _name;
-+	BCH_ALLOC_FIELDS()
-+#undef  x
-+};
-+
-+/* returns true if not equal */
-+static inline bool bkey_alloc_unpacked_cmp(struct bkey_alloc_unpacked l,
-+					   struct bkey_alloc_unpacked r)
-+{
-+	return l.gen != r.gen
-+#define x(_name, _bits)	|| l._name != r._name
-+	BCH_ALLOC_FIELDS()
-+#undef  x
-+	;
-+}
-+
-+struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c);
-+void bch2_alloc_pack(struct bkey_i_alloc *,
-+		     const struct bkey_alloc_unpacked);
-+
-+static inline struct bkey_alloc_unpacked
-+alloc_mem_to_key(struct bucket *g, struct bucket_mark m)
-+{
-+	return (struct bkey_alloc_unpacked) {
-+		.gen		= m.gen,
-+		.oldest_gen	= g->oldest_gen,
-+		.data_type	= m.data_type,
-+		.dirty_sectors	= m.dirty_sectors,
-+		.cached_sectors	= m.cached_sectors,
-+		.read_time	= g->io_time[READ],
-+		.write_time	= g->io_time[WRITE],
-+	};
-+}
-+
-+#define ALLOC_SCAN_BATCH(ca)		max_t(size_t, 1, (ca)->mi.nbuckets >> 9)
-+
-+const char *bch2_alloc_invalid(const struct bch_fs *, struct bkey_s_c);
-+void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-+
-+#define bch2_bkey_ops_alloc (struct bkey_ops) {		\
-+	.key_invalid	= bch2_alloc_invalid,		\
-+	.val_to_text	= bch2_alloc_to_text,		\
-+}
-+
-+struct journal_keys;
-+int bch2_alloc_read(struct bch_fs *, struct journal_keys *);
-+
-+static inline void bch2_wake_allocator(struct bch_dev *ca)
-+{
-+	struct task_struct *p;
-+
-+	rcu_read_lock();
-+	p = rcu_dereference(ca->alloc_thread);
-+	if (p)
-+		wake_up_process(p);
-+	rcu_read_unlock();
-+}
-+
-+static inline void verify_not_on_freelist(struct bch_fs *c, struct bch_dev *ca,
-+					  size_t bucket)
-+{
-+	if (expensive_debug_checks(c)) {
-+		size_t iter;
-+		long i;
-+		unsigned j;
-+
-+		for (j = 0; j < RESERVE_NR; j++)
-+			fifo_for_each_entry(i, &ca->free[j], iter)
-+				BUG_ON(i == bucket);
-+		fifo_for_each_entry(i, &ca->free_inc, iter)
-+			BUG_ON(i == bucket);
-+	}
-+}
-+
-+void bch2_recalc_capacity(struct bch_fs *);
-+
-+void bch2_dev_allocator_remove(struct bch_fs *, struct bch_dev *);
-+void bch2_dev_allocator_add(struct bch_fs *, struct bch_dev *);
-+
-+void bch2_dev_allocator_quiesce(struct bch_fs *, struct bch_dev *);
-+void bch2_dev_allocator_stop(struct bch_dev *);
-+int bch2_dev_allocator_start(struct bch_dev *);
-+
-+int bch2_alloc_write(struct bch_fs *, unsigned, bool *);
-+void bch2_fs_allocator_background_init(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_ALLOC_BACKGROUND_H */
-diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
-new file mode 100644
-index 000000000000..4a048828869b
---- /dev/null
-+++ b/fs/bcachefs/alloc_foreground.c
-@@ -0,0 +1,992 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Primary bucket allocation code
-+ *
-+ * Copyright 2012 Google, Inc.
-+ *
-+ * Allocation in bcache is done in terms of buckets:
-+ *
-+ * Each bucket has associated an 8 bit gen; this gen corresponds to the gen in
-+ * btree pointers - they must match for the pointer to be considered valid.
-+ *
-+ * Thus (assuming a bucket has no dirty data or metadata in it) we can reuse a
-+ * bucket simply by incrementing its gen.
-+ *
-+ * The gens (along with the priorities; it's really the gens are important but
-+ * the code is named as if it's the priorities) are written in an arbitrary list
-+ * of buckets on disk, with a pointer to them in the journal header.
-+ *
-+ * When we invalidate a bucket, we have to write its new gen to disk and wait
-+ * for that write to complete before we use it - otherwise after a crash we
-+ * could have pointers that appeared to be good but pointed to data that had
-+ * been overwritten.
-+ *
-+ * Since the gens and priorities are all stored contiguously on disk, we can
-+ * batch this up: We fill up the free_inc list with freshly invalidated buckets,
-+ * call prio_write(), and when prio_write() finishes we pull buckets off the
-+ * free_inc list and optionally discard them.
-+ *
-+ * free_inc isn't the only freelist - if it was, we'd often have to sleep while
-+ * priorities and gens were being written before we could allocate. c->free is a
-+ * smaller freelist, and buckets on that list are always ready to be used.
-+ *
-+ * If we've got discards enabled, that happens when a bucket moves from the
-+ * free_inc list to the free list.
-+ *
-+ * It's important to ensure that gens don't wrap around - with respect to
-+ * either the oldest gen in the btree or the gen on disk. This is quite
-+ * difficult to do in practice, but we explicitly guard against it anyways - if
-+ * a bucket is in danger of wrapping around we simply skip invalidating it that
-+ * time around, and we garbage collect or rewrite the priorities sooner than we
-+ * would have otherwise.
-+ *
-+ * bch2_bucket_alloc() allocates a single bucket from a specific device.
-+ *
-+ * bch2_bucket_alloc_set() allocates one or more buckets from different devices
-+ * in a given filesystem.
-+ *
-+ * invalidate_buckets() drives all the processes described above. It's called
-+ * from bch2_bucket_alloc() and a few other places that need to make sure free
-+ * buckets are ready.
-+ *
-+ * invalidate_buckets_(lru|fifo)() find buckets that are available to be
-+ * invalidated, and then invalidate them and stick them on the free_inc list -
-+ * in either lru or fifo order.
-+ */
-+
-+#include "bcachefs.h"
-+#include "alloc_background.h"
-+#include "alloc_foreground.h"
-+#include "btree_gc.h"
-+#include "buckets.h"
-+#include "clock.h"
-+#include "debug.h"
-+#include "disk_groups.h"
-+#include "ec.h"
-+#include "io.h"
-+
-+#include <linux/math64.h>
-+#include <linux/rculist.h>
-+#include <linux/rcupdate.h>
-+#include <trace/events/bcachefs.h>
-+
-+/*
-+ * Open buckets represent a bucket that's currently being allocated from.  They
-+ * serve two purposes:
-+ *
-+ *  - They track buckets that have been partially allocated, allowing for
-+ *    sub-bucket sized allocations - they're used by the sector allocator below
-+ *
-+ *  - They provide a reference to the buckets they own that mark and sweep GC
-+ *    can find, until the new allocation has a pointer to it inserted into the
-+ *    btree
-+ *
-+ * When allocating some space with the sector allocator, the allocation comes
-+ * with a reference to an open bucket - the caller is required to put that
-+ * reference _after_ doing the index update that makes its allocation reachable.
-+ */
-+
-+void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
-+{
-+	struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
-+
-+	if (ob->ec) {
-+		bch2_ec_bucket_written(c, ob);
-+		return;
-+	}
-+
-+	percpu_down_read(&c->mark_lock);
-+	spin_lock(&ob->lock);
-+
-+	bch2_mark_alloc_bucket(c, ca, PTR_BUCKET_NR(ca, &ob->ptr),
-+			       false, gc_pos_alloc(c, ob), 0);
-+	ob->valid = false;
-+	ob->type = 0;
-+
-+	spin_unlock(&ob->lock);
-+	percpu_up_read(&c->mark_lock);
-+
-+	spin_lock(&c->freelist_lock);
-+	ob->freelist = c->open_buckets_freelist;
-+	c->open_buckets_freelist = ob - c->open_buckets;
-+	c->open_buckets_nr_free++;
-+	spin_unlock(&c->freelist_lock);
-+
-+	closure_wake_up(&c->open_buckets_wait);
-+}
-+
-+void bch2_open_bucket_write_error(struct bch_fs *c,
-+				  struct open_buckets *obs,
-+				  unsigned dev)
-+{
-+	struct open_bucket *ob;
-+	unsigned i;
-+
-+	open_bucket_for_each(c, obs, ob, i)
-+		if (ob->ptr.dev == dev &&
-+		    ob->ec)
-+			bch2_ec_bucket_cancel(c, ob);
-+}
-+
-+static struct open_bucket *bch2_open_bucket_alloc(struct bch_fs *c)
-+{
-+	struct open_bucket *ob;
-+
-+	BUG_ON(!c->open_buckets_freelist || !c->open_buckets_nr_free);
-+
-+	ob = c->open_buckets + c->open_buckets_freelist;
-+	c->open_buckets_freelist = ob->freelist;
-+	atomic_set(&ob->pin, 1);
-+	ob->type = 0;
-+
-+	c->open_buckets_nr_free--;
-+	return ob;
-+}
-+
-+static void open_bucket_free_unused(struct bch_fs *c,
-+				    struct write_point *wp,
-+				    struct open_bucket *ob)
-+{
-+	struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
-+	bool may_realloc = wp->type == BCH_DATA_user;
-+
-+	BUG_ON(ca->open_buckets_partial_nr >
-+	       ARRAY_SIZE(ca->open_buckets_partial));
-+
-+	if (ca->open_buckets_partial_nr <
-+	    ARRAY_SIZE(ca->open_buckets_partial) &&
-+	    may_realloc) {
-+		spin_lock(&c->freelist_lock);
-+		ob->on_partial_list = true;
-+		ca->open_buckets_partial[ca->open_buckets_partial_nr++] =
-+			ob - c->open_buckets;
-+		spin_unlock(&c->freelist_lock);
-+
-+		closure_wake_up(&c->open_buckets_wait);
-+		closure_wake_up(&c->freelist_wait);
-+	} else {
-+		bch2_open_bucket_put(c, ob);
-+	}
-+}
-+
-+static void verify_not_stale(struct bch_fs *c, const struct open_buckets *obs)
-+{
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	struct open_bucket *ob;
-+	unsigned i;
-+
-+	open_bucket_for_each(c, obs, ob, i) {
-+		struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
-+
-+		BUG_ON(ptr_stale(ca, &ob->ptr));
-+	}
-+#endif
-+}
-+
-+/* _only_ for allocating the journal on a new device: */
-+long bch2_bucket_alloc_new_fs(struct bch_dev *ca)
-+{
-+	struct bucket_array *buckets;
-+	ssize_t b;
-+
-+	rcu_read_lock();
-+	buckets = bucket_array(ca);
-+
-+	for (b = ca->mi.first_bucket; b < ca->mi.nbuckets; b++)
-+		if (is_available_bucket(buckets->b[b].mark))
-+			goto success;
-+	b = -1;
-+success:
-+	rcu_read_unlock();
-+	return b;
-+}
-+
-+static inline unsigned open_buckets_reserved(enum alloc_reserve reserve)
-+{
-+	switch (reserve) {
-+	case RESERVE_ALLOC:
-+		return 0;
-+	case RESERVE_BTREE:
-+		return OPEN_BUCKETS_COUNT / 4;
-+	default:
-+		return OPEN_BUCKETS_COUNT / 2;
-+	}
-+}
-+
-+/**
-+ * bch_bucket_alloc - allocate a single bucket from a specific device
-+ *
-+ * Returns index of bucket on success, 0 on failure
-+ * */
-+struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
-+				      enum alloc_reserve reserve,
-+				      bool may_alloc_partial,
-+				      struct closure *cl)
-+{
-+	struct bucket_array *buckets;
-+	struct open_bucket *ob;
-+	long bucket = 0;
-+
-+	spin_lock(&c->freelist_lock);
-+
-+	if (may_alloc_partial) {
-+		int i;
-+
-+		for (i = ca->open_buckets_partial_nr - 1; i >= 0; --i) {
-+			ob = c->open_buckets + ca->open_buckets_partial[i];
-+
-+			if (reserve <= ob->alloc_reserve) {
-+				array_remove_item(ca->open_buckets_partial,
-+						  ca->open_buckets_partial_nr,
-+						  i);
-+				ob->on_partial_list = false;
-+				ob->alloc_reserve = reserve;
-+				spin_unlock(&c->freelist_lock);
-+				return ob;
-+			}
-+		}
-+	}
-+
-+	if (unlikely(c->open_buckets_nr_free <= open_buckets_reserved(reserve))) {
-+		if (cl)
-+			closure_wait(&c->open_buckets_wait, cl);
-+
-+		if (!c->blocked_allocate_open_bucket)
-+			c->blocked_allocate_open_bucket = local_clock();
-+
-+		spin_unlock(&c->freelist_lock);
-+		trace_open_bucket_alloc_fail(ca, reserve);
-+		return ERR_PTR(-OPEN_BUCKETS_EMPTY);
-+	}
-+
-+	if (likely(fifo_pop(&ca->free[RESERVE_NONE], bucket)))
-+		goto out;
-+
-+	switch (reserve) {
-+	case RESERVE_ALLOC:
-+		if (fifo_pop(&ca->free[RESERVE_BTREE], bucket))
-+			goto out;
-+		break;
-+	case RESERVE_BTREE:
-+		if (fifo_used(&ca->free[RESERVE_BTREE]) * 2 >=
-+		    ca->free[RESERVE_BTREE].size &&
-+		    fifo_pop(&ca->free[RESERVE_BTREE], bucket))
-+			goto out;
-+		break;
-+	case RESERVE_MOVINGGC:
-+		if (fifo_pop(&ca->free[RESERVE_MOVINGGC], bucket))
-+			goto out;
-+		break;
-+	default:
-+		break;
-+	}
-+
-+	if (cl)
-+		closure_wait(&c->freelist_wait, cl);
-+
-+	if (!c->blocked_allocate)
-+		c->blocked_allocate = local_clock();
-+
-+	spin_unlock(&c->freelist_lock);
-+
-+	trace_bucket_alloc_fail(ca, reserve);
-+	return ERR_PTR(-FREELIST_EMPTY);
-+out:
-+	verify_not_on_freelist(c, ca, bucket);
-+
-+	ob = bch2_open_bucket_alloc(c);
-+
-+	spin_lock(&ob->lock);
-+	buckets = bucket_array(ca);
-+
-+	ob->valid	= true;
-+	ob->sectors_free = ca->mi.bucket_size;
-+	ob->alloc_reserve = reserve;
-+	ob->ptr		= (struct bch_extent_ptr) {
-+		.type	= 1 << BCH_EXTENT_ENTRY_ptr,
-+		.gen	= buckets->b[bucket].mark.gen,
-+		.offset	= bucket_to_sector(ca, bucket),
-+		.dev	= ca->dev_idx,
-+	};
-+
-+	bucket_io_clock_reset(c, ca, bucket, READ);
-+	bucket_io_clock_reset(c, ca, bucket, WRITE);
-+	spin_unlock(&ob->lock);
-+
-+	if (c->blocked_allocate_open_bucket) {
-+		bch2_time_stats_update(
-+			&c->times[BCH_TIME_blocked_allocate_open_bucket],
-+			c->blocked_allocate_open_bucket);
-+		c->blocked_allocate_open_bucket = 0;
-+	}
-+
-+	if (c->blocked_allocate) {
-+		bch2_time_stats_update(
-+			&c->times[BCH_TIME_blocked_allocate],
-+			c->blocked_allocate);
-+		c->blocked_allocate = 0;
-+	}
-+
-+	spin_unlock(&c->freelist_lock);
-+
-+	bch2_wake_allocator(ca);
-+
-+	trace_bucket_alloc(ca, reserve);
-+	return ob;
-+}
-+
-+static int __dev_stripe_cmp(struct dev_stripe_state *stripe,
-+			    unsigned l, unsigned r)
-+{
-+	return ((stripe->next_alloc[l] > stripe->next_alloc[r]) -
-+		(stripe->next_alloc[l] < stripe->next_alloc[r]));
-+}
-+
-+#define dev_stripe_cmp(l, r) __dev_stripe_cmp(stripe, l, r)
-+
-+struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *c,
-+					  struct dev_stripe_state *stripe,
-+					  struct bch_devs_mask *devs)
-+{
-+	struct dev_alloc_list ret = { .nr = 0 };
-+	unsigned i;
-+
-+	for_each_set_bit(i, devs->d, BCH_SB_MEMBERS_MAX)
-+		ret.devs[ret.nr++] = i;
-+
-+	bubble_sort(ret.devs, ret.nr, dev_stripe_cmp);
-+	return ret;
-+}
-+
-+void bch2_dev_stripe_increment(struct bch_dev *ca,
-+			       struct dev_stripe_state *stripe)
-+{
-+	u64 *v = stripe->next_alloc + ca->dev_idx;
-+	u64 free_space = dev_buckets_free(ca);
-+	u64 free_space_inv = free_space
-+		? div64_u64(1ULL << 48, free_space)
-+		: 1ULL << 48;
-+	u64 scale = *v / 4;
-+
-+	if (*v + free_space_inv >= *v)
-+		*v += free_space_inv;
-+	else
-+		*v = U64_MAX;
-+
-+	for (v = stripe->next_alloc;
-+	     v < stripe->next_alloc + ARRAY_SIZE(stripe->next_alloc); v++)
-+		*v = *v < scale ? 0 : *v - scale;
-+}
-+
-+#define BUCKET_MAY_ALLOC_PARTIAL	(1 << 0)
-+#define BUCKET_ALLOC_USE_DURABILITY	(1 << 1)
-+
-+static void add_new_bucket(struct bch_fs *c,
-+			   struct open_buckets *ptrs,
-+			   struct bch_devs_mask *devs_may_alloc,
-+			   unsigned *nr_effective,
-+			   bool *have_cache,
-+			   unsigned flags,
-+			   struct open_bucket *ob)
-+{
-+	unsigned durability =
-+		bch_dev_bkey_exists(c, ob->ptr.dev)->mi.durability;
-+
-+	__clear_bit(ob->ptr.dev, devs_may_alloc->d);
-+	*nr_effective	+= (flags & BUCKET_ALLOC_USE_DURABILITY)
-+		? durability : 1;
-+	*have_cache	|= !durability;
-+
-+	ob_push(c, ptrs, ob);
-+}
-+
-+enum bucket_alloc_ret
-+bch2_bucket_alloc_set(struct bch_fs *c,
-+		      struct open_buckets *ptrs,
-+		      struct dev_stripe_state *stripe,
-+		      struct bch_devs_mask *devs_may_alloc,
-+		      unsigned nr_replicas,
-+		      unsigned *nr_effective,
-+		      bool *have_cache,
-+		      enum alloc_reserve reserve,
-+		      unsigned flags,
-+		      struct closure *cl)
-+{
-+	struct dev_alloc_list devs_sorted =
-+		bch2_dev_alloc_list(c, stripe, devs_may_alloc);
-+	struct bch_dev *ca;
-+	enum bucket_alloc_ret ret = INSUFFICIENT_DEVICES;
-+	unsigned i;
-+
-+	BUG_ON(*nr_effective >= nr_replicas);
-+
-+	for (i = 0; i < devs_sorted.nr; i++) {
-+		struct open_bucket *ob;
-+
-+		ca = rcu_dereference(c->devs[devs_sorted.devs[i]]);
-+		if (!ca)
-+			continue;
-+
-+		if (!ca->mi.durability && *have_cache)
-+			continue;
-+
-+		ob = bch2_bucket_alloc(c, ca, reserve,
-+				flags & BUCKET_MAY_ALLOC_PARTIAL, cl);
-+		if (IS_ERR(ob)) {
-+			ret = -PTR_ERR(ob);
-+
-+			if (cl)
-+				return ret;
-+			continue;
-+		}
-+
-+		add_new_bucket(c, ptrs, devs_may_alloc,
-+			       nr_effective, have_cache, flags, ob);
-+
-+		bch2_dev_stripe_increment(ca, stripe);
-+
-+		if (*nr_effective >= nr_replicas)
-+			return ALLOC_SUCCESS;
-+	}
-+
-+	return ret;
-+}
-+
-+/* Allocate from stripes: */
-+
-+/*
-+ * if we can't allocate a new stripe because there are already too many
-+ * partially filled stripes, force allocating from an existing stripe even when
-+ * it's to a device we don't want:
-+ */
-+
-+static void bucket_alloc_from_stripe(struct bch_fs *c,
-+				     struct open_buckets *ptrs,
-+				     struct write_point *wp,
-+				     struct bch_devs_mask *devs_may_alloc,
-+				     u16 target,
-+				     unsigned erasure_code,
-+				     unsigned nr_replicas,
-+				     unsigned *nr_effective,
-+				     bool *have_cache,
-+				     unsigned flags)
-+{
-+	struct dev_alloc_list devs_sorted;
-+	struct ec_stripe_head *h;
-+	struct open_bucket *ob;
-+	struct bch_dev *ca;
-+	unsigned i, ec_idx;
-+
-+	if (!erasure_code)
-+		return;
-+
-+	if (nr_replicas < 2)
-+		return;
-+
-+	if (ec_open_bucket(c, ptrs))
-+		return;
-+
-+	h = bch2_ec_stripe_head_get(c, target, 0, nr_replicas - 1);
-+	if (!h)
-+		return;
-+
-+	devs_sorted = bch2_dev_alloc_list(c, &wp->stripe, devs_may_alloc);
-+
-+	for (i = 0; i < devs_sorted.nr; i++)
-+		open_bucket_for_each(c, &h->s->blocks, ob, ec_idx)
-+			if (ob->ptr.dev == devs_sorted.devs[i] &&
-+			    !test_and_set_bit(h->s->data_block_idx[ec_idx],
-+					      h->s->blocks_allocated))
-+				goto got_bucket;
-+	goto out_put_head;
-+got_bucket:
-+	ca = bch_dev_bkey_exists(c, ob->ptr.dev);
-+
-+	ob->ec_idx	= h->s->data_block_idx[ec_idx];
-+	ob->ec		= h->s;
-+
-+	add_new_bucket(c, ptrs, devs_may_alloc,
-+		       nr_effective, have_cache, flags, ob);
-+	atomic_inc(&h->s->pin);
-+out_put_head:
-+	bch2_ec_stripe_head_put(c, h);
-+}
-+
-+/* Sector allocator */
-+
-+static void get_buckets_from_writepoint(struct bch_fs *c,
-+					struct open_buckets *ptrs,
-+					struct write_point *wp,
-+					struct bch_devs_mask *devs_may_alloc,
-+					unsigned nr_replicas,
-+					unsigned *nr_effective,
-+					bool *have_cache,
-+					unsigned flags,
-+					bool need_ec)
-+{
-+	struct open_buckets ptrs_skip = { .nr = 0 };
-+	struct open_bucket *ob;
-+	unsigned i;
-+
-+	open_bucket_for_each(c, &wp->ptrs, ob, i) {
-+		struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
-+
-+		if (*nr_effective < nr_replicas &&
-+		    test_bit(ob->ptr.dev, devs_may_alloc->d) &&
-+		    (ca->mi.durability ||
-+		     (wp->type == BCH_DATA_user && !*have_cache)) &&
-+		    (ob->ec || !need_ec)) {
-+			add_new_bucket(c, ptrs, devs_may_alloc,
-+				       nr_effective, have_cache,
-+				       flags, ob);
-+		} else {
-+			ob_push(c, &ptrs_skip, ob);
-+		}
-+	}
-+	wp->ptrs = ptrs_skip;
-+}
-+
-+static enum bucket_alloc_ret
-+open_bucket_add_buckets(struct bch_fs *c,
-+			struct open_buckets *ptrs,
-+			struct write_point *wp,
-+			struct bch_devs_list *devs_have,
-+			u16 target,
-+			unsigned erasure_code,
-+			unsigned nr_replicas,
-+			unsigned *nr_effective,
-+			bool *have_cache,
-+			enum alloc_reserve reserve,
-+			unsigned flags,
-+			struct closure *_cl)
-+{
-+	struct bch_devs_mask devs;
-+	struct open_bucket *ob;
-+	struct closure *cl = NULL;
-+	enum bucket_alloc_ret ret;
-+	unsigned i;
-+
-+	rcu_read_lock();
-+	devs = target_rw_devs(c, wp->type, target);
-+	rcu_read_unlock();
-+
-+	/* Don't allocate from devices we already have pointers to: */
-+	for (i = 0; i < devs_have->nr; i++)
-+		__clear_bit(devs_have->devs[i], devs.d);
-+
-+	open_bucket_for_each(c, ptrs, ob, i)
-+		__clear_bit(ob->ptr.dev, devs.d);
-+
-+	if (erasure_code) {
-+		if (!ec_open_bucket(c, ptrs)) {
-+			get_buckets_from_writepoint(c, ptrs, wp, &devs,
-+						    nr_replicas, nr_effective,
-+						    have_cache, flags, true);
-+			if (*nr_effective >= nr_replicas)
-+				return 0;
-+		}
-+
-+		if (!ec_open_bucket(c, ptrs)) {
-+			bucket_alloc_from_stripe(c, ptrs, wp, &devs,
-+						 target, erasure_code,
-+						 nr_replicas, nr_effective,
-+						 have_cache, flags);
-+			if (*nr_effective >= nr_replicas)
-+				return 0;
-+		}
-+	}
-+
-+	get_buckets_from_writepoint(c, ptrs, wp, &devs,
-+				    nr_replicas, nr_effective,
-+				    have_cache, flags, false);
-+	if (*nr_effective >= nr_replicas)
-+		return 0;
-+
-+	percpu_down_read(&c->mark_lock);
-+	rcu_read_lock();
-+
-+retry_blocking:
-+	/*
-+	 * Try nonblocking first, so that if one device is full we'll try from
-+	 * other devices:
-+	 */
-+	ret = bch2_bucket_alloc_set(c, ptrs, &wp->stripe, &devs,
-+				nr_replicas, nr_effective, have_cache,
-+				reserve, flags, cl);
-+	if (ret && ret != INSUFFICIENT_DEVICES && !cl && _cl) {
-+		cl = _cl;
-+		goto retry_blocking;
-+	}
-+
-+	rcu_read_unlock();
-+	percpu_up_read(&c->mark_lock);
-+
-+	return ret;
-+}
-+
-+void bch2_open_buckets_stop_dev(struct bch_fs *c, struct bch_dev *ca,
-+				struct open_buckets *obs)
-+{
-+	struct open_buckets ptrs = { .nr = 0 };
-+	struct open_bucket *ob, *ob2;
-+	unsigned i, j;
-+
-+	open_bucket_for_each(c, obs, ob, i) {
-+		bool drop = !ca || ob->ptr.dev == ca->dev_idx;
-+
-+		if (!drop && ob->ec) {
-+			mutex_lock(&ob->ec->lock);
-+			open_bucket_for_each(c, &ob->ec->blocks, ob2, j)
-+				drop |= ob2->ptr.dev == ca->dev_idx;
-+			open_bucket_for_each(c, &ob->ec->parity, ob2, j)
-+				drop |= ob2->ptr.dev == ca->dev_idx;
-+			mutex_unlock(&ob->ec->lock);
-+		}
-+
-+		if (drop)
-+			bch2_open_bucket_put(c, ob);
-+		else
-+			ob_push(c, &ptrs, ob);
-+	}
-+
-+	*obs = ptrs;
-+}
-+
-+void bch2_writepoint_stop(struct bch_fs *c, struct bch_dev *ca,
-+			  struct write_point *wp)
-+{
-+	mutex_lock(&wp->lock);
-+	bch2_open_buckets_stop_dev(c, ca, &wp->ptrs);
-+	mutex_unlock(&wp->lock);
-+}
-+
-+static inline struct hlist_head *writepoint_hash(struct bch_fs *c,
-+						 unsigned long write_point)
-+{
-+	unsigned hash =
-+		hash_long(write_point, ilog2(ARRAY_SIZE(c->write_points_hash)));
-+
-+	return &c->write_points_hash[hash];
-+}
-+
-+static struct write_point *__writepoint_find(struct hlist_head *head,
-+					     unsigned long write_point)
-+{
-+	struct write_point *wp;
-+
-+	hlist_for_each_entry_rcu(wp, head, node)
-+		if (wp->write_point == write_point)
-+			return wp;
-+
-+	return NULL;
-+}
-+
-+static inline bool too_many_writepoints(struct bch_fs *c, unsigned factor)
-+{
-+	u64 stranded	= c->write_points_nr * c->bucket_size_max;
-+	u64 free	= bch2_fs_usage_read_short(c).free;
-+
-+	return stranded * factor > free;
-+}
-+
-+static bool try_increase_writepoints(struct bch_fs *c)
-+{
-+	struct write_point *wp;
-+
-+	if (c->write_points_nr == ARRAY_SIZE(c->write_points) ||
-+	    too_many_writepoints(c, 32))
-+		return false;
-+
-+	wp = c->write_points + c->write_points_nr++;
-+	hlist_add_head_rcu(&wp->node, writepoint_hash(c, wp->write_point));
-+	return true;
-+}
-+
-+static bool try_decrease_writepoints(struct bch_fs *c,
-+				     unsigned old_nr)
-+{
-+	struct write_point *wp;
-+
-+	mutex_lock(&c->write_points_hash_lock);
-+	if (c->write_points_nr < old_nr) {
-+		mutex_unlock(&c->write_points_hash_lock);
-+		return true;
-+	}
-+
-+	if (c->write_points_nr == 1 ||
-+	    !too_many_writepoints(c, 8)) {
-+		mutex_unlock(&c->write_points_hash_lock);
-+		return false;
-+	}
-+
-+	wp = c->write_points + --c->write_points_nr;
-+
-+	hlist_del_rcu(&wp->node);
-+	mutex_unlock(&c->write_points_hash_lock);
-+
-+	bch2_writepoint_stop(c, NULL, wp);
-+	return true;
-+}
-+
-+static struct write_point *writepoint_find(struct bch_fs *c,
-+					   unsigned long write_point)
-+{
-+	struct write_point *wp, *oldest;
-+	struct hlist_head *head;
-+
-+	if (!(write_point & 1UL)) {
-+		wp = (struct write_point *) write_point;
-+		mutex_lock(&wp->lock);
-+		return wp;
-+	}
-+
-+	head = writepoint_hash(c, write_point);
-+restart_find:
-+	wp = __writepoint_find(head, write_point);
-+	if (wp) {
-+lock_wp:
-+		mutex_lock(&wp->lock);
-+		if (wp->write_point == write_point)
-+			goto out;
-+		mutex_unlock(&wp->lock);
-+		goto restart_find;
-+	}
-+restart_find_oldest:
-+	oldest = NULL;
-+	for (wp = c->write_points;
-+	     wp < c->write_points + c->write_points_nr; wp++)
-+		if (!oldest || time_before64(wp->last_used, oldest->last_used))
-+			oldest = wp;
-+
-+	mutex_lock(&oldest->lock);
-+	mutex_lock(&c->write_points_hash_lock);
-+	if (oldest >= c->write_points + c->write_points_nr ||
-+	    try_increase_writepoints(c)) {
-+		mutex_unlock(&c->write_points_hash_lock);
-+		mutex_unlock(&oldest->lock);
-+		goto restart_find_oldest;
-+	}
-+
-+	wp = __writepoint_find(head, write_point);
-+	if (wp && wp != oldest) {
-+		mutex_unlock(&c->write_points_hash_lock);
-+		mutex_unlock(&oldest->lock);
-+		goto lock_wp;
-+	}
-+
-+	wp = oldest;
-+	hlist_del_rcu(&wp->node);
-+	wp->write_point = write_point;
-+	hlist_add_head_rcu(&wp->node, head);
-+	mutex_unlock(&c->write_points_hash_lock);
-+out:
-+	wp->last_used = sched_clock();
-+	return wp;
-+}
-+
-+/*
-+ * Get us an open_bucket we can allocate from, return with it locked:
-+ */
-+struct write_point *bch2_alloc_sectors_start(struct bch_fs *c,
-+				unsigned target,
-+				unsigned erasure_code,
-+				struct write_point_specifier write_point,
-+				struct bch_devs_list *devs_have,
-+				unsigned nr_replicas,
-+				unsigned nr_replicas_required,
-+				enum alloc_reserve reserve,
-+				unsigned flags,
-+				struct closure *cl)
-+{
-+	struct write_point *wp;
-+	struct open_bucket *ob;
-+	struct open_buckets ptrs;
-+	unsigned nr_effective, write_points_nr;
-+	unsigned ob_flags = 0;
-+	bool have_cache;
-+	enum bucket_alloc_ret ret;
-+	int i;
-+
-+	if (!(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS))
-+		ob_flags |= BUCKET_ALLOC_USE_DURABILITY;
-+
-+	BUG_ON(!nr_replicas || !nr_replicas_required);
-+retry:
-+	ptrs.nr		= 0;
-+	nr_effective	= 0;
-+	write_points_nr = c->write_points_nr;
-+	have_cache	= false;
-+
-+	wp = writepoint_find(c, write_point.v);
-+
-+	if (wp->type == BCH_DATA_user)
-+		ob_flags |= BUCKET_MAY_ALLOC_PARTIAL;
-+
-+	/* metadata may not allocate on cache devices: */
-+	if (wp->type != BCH_DATA_user)
-+		have_cache = true;
-+
-+	if (!target || (flags & BCH_WRITE_ONLY_SPECIFIED_DEVS)) {
-+		ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have,
-+					      target, erasure_code,
-+					      nr_replicas, &nr_effective,
-+					      &have_cache, reserve,
-+					      ob_flags, cl);
-+	} else {
-+		ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have,
-+					      target, erasure_code,
-+					      nr_replicas, &nr_effective,
-+					      &have_cache, reserve,
-+					      ob_flags, NULL);
-+		if (!ret)
-+			goto alloc_done;
-+
-+		ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have,
-+					      0, erasure_code,
-+					      nr_replicas, &nr_effective,
-+					      &have_cache, reserve,
-+					      ob_flags, cl);
-+	}
-+alloc_done:
-+	BUG_ON(!ret && nr_effective < nr_replicas);
-+
-+	if (erasure_code && !ec_open_bucket(c, &ptrs))
-+		pr_debug("failed to get ec bucket: ret %u", ret);
-+
-+	if (ret == INSUFFICIENT_DEVICES &&
-+	    nr_effective >= nr_replicas_required)
-+		ret = 0;
-+
-+	if (ret)
-+		goto err;
-+
-+	/* Free buckets we didn't use: */
-+	open_bucket_for_each(c, &wp->ptrs, ob, i)
-+		open_bucket_free_unused(c, wp, ob);
-+
-+	wp->ptrs = ptrs;
-+
-+	wp->sectors_free = UINT_MAX;
-+
-+	open_bucket_for_each(c, &wp->ptrs, ob, i)
-+		wp->sectors_free = min(wp->sectors_free, ob->sectors_free);
-+
-+	BUG_ON(!wp->sectors_free || wp->sectors_free == UINT_MAX);
-+
-+	verify_not_stale(c, &wp->ptrs);
-+
-+	return wp;
-+err:
-+	open_bucket_for_each(c, &wp->ptrs, ob, i)
-+		if (ptrs.nr < ARRAY_SIZE(ptrs.v))
-+			ob_push(c, &ptrs, ob);
-+		else
-+			open_bucket_free_unused(c, wp, ob);
-+	wp->ptrs = ptrs;
-+
-+	mutex_unlock(&wp->lock);
-+
-+	if (ret == FREELIST_EMPTY &&
-+	    try_decrease_writepoints(c, write_points_nr))
-+		goto retry;
-+
-+	switch (ret) {
-+	case OPEN_BUCKETS_EMPTY:
-+	case FREELIST_EMPTY:
-+		return cl ? ERR_PTR(-EAGAIN) : ERR_PTR(-ENOSPC);
-+	case INSUFFICIENT_DEVICES:
-+		return ERR_PTR(-EROFS);
-+	default:
-+		BUG();
-+	}
-+}
-+
-+/*
-+ * Append pointers to the space we just allocated to @k, and mark @sectors space
-+ * as allocated out of @ob
-+ */
-+void bch2_alloc_sectors_append_ptrs(struct bch_fs *c, struct write_point *wp,
-+				    struct bkey_i *k, unsigned sectors)
-+
-+{
-+	struct open_bucket *ob;
-+	unsigned i;
-+
-+	BUG_ON(sectors > wp->sectors_free);
-+	wp->sectors_free -= sectors;
-+
-+	open_bucket_for_each(c, &wp->ptrs, ob, i) {
-+		struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
-+		struct bch_extent_ptr tmp = ob->ptr;
-+
-+		tmp.cached = !ca->mi.durability &&
-+			wp->type == BCH_DATA_user;
-+
-+		tmp.offset += ca->mi.bucket_size - ob->sectors_free;
-+		bch2_bkey_append_ptr(k, tmp);
-+
-+		BUG_ON(sectors > ob->sectors_free);
-+		ob->sectors_free -= sectors;
-+	}
-+}
-+
-+/*
-+ * Append pointers to the space we just allocated to @k, and mark @sectors space
-+ * as allocated out of @ob
-+ */
-+void bch2_alloc_sectors_done(struct bch_fs *c, struct write_point *wp)
-+{
-+	struct open_buckets ptrs = { .nr = 0 }, keep = { .nr = 0 };
-+	struct open_bucket *ob;
-+	unsigned i;
-+
-+	open_bucket_for_each(c, &wp->ptrs, ob, i)
-+		ob_push(c, !ob->sectors_free ? &ptrs : &keep, ob);
-+	wp->ptrs = keep;
-+
-+	mutex_unlock(&wp->lock);
-+
-+	bch2_open_buckets_put(c, &ptrs);
-+}
-+
-+static inline void writepoint_init(struct write_point *wp,
-+				   enum bch_data_type type)
-+{
-+	mutex_init(&wp->lock);
-+	wp->type = type;
-+}
-+
-+void bch2_fs_allocator_foreground_init(struct bch_fs *c)
-+{
-+	struct open_bucket *ob;
-+	struct write_point *wp;
-+
-+	mutex_init(&c->write_points_hash_lock);
-+	c->write_points_nr = ARRAY_SIZE(c->write_points);
-+
-+	/* open bucket 0 is a sentinal NULL: */
-+	spin_lock_init(&c->open_buckets[0].lock);
-+
-+	for (ob = c->open_buckets + 1;
-+	     ob < c->open_buckets + ARRAY_SIZE(c->open_buckets); ob++) {
-+		spin_lock_init(&ob->lock);
-+		c->open_buckets_nr_free++;
-+
-+		ob->freelist = c->open_buckets_freelist;
-+		c->open_buckets_freelist = ob - c->open_buckets;
-+	}
-+
-+	writepoint_init(&c->btree_write_point,		BCH_DATA_btree);
-+	writepoint_init(&c->rebalance_write_point,	BCH_DATA_user);
-+	writepoint_init(&c->copygc_write_point,		BCH_DATA_user);
-+
-+	for (wp = c->write_points;
-+	     wp < c->write_points + c->write_points_nr; wp++) {
-+		writepoint_init(wp, BCH_DATA_user);
-+
-+		wp->last_used	= sched_clock();
-+		wp->write_point	= (unsigned long) wp;
-+		hlist_add_head_rcu(&wp->node,
-+				   writepoint_hash(c, wp->write_point));
-+	}
-+}
-diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h
-new file mode 100644
-index 000000000000..c658295cb8e0
---- /dev/null
-+++ b/fs/bcachefs/alloc_foreground.h
-@@ -0,0 +1,138 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_ALLOC_FOREGROUND_H
-+#define _BCACHEFS_ALLOC_FOREGROUND_H
-+
-+#include "bcachefs.h"
-+#include "alloc_types.h"
-+
-+#include <linux/hash.h>
-+
-+struct bkey;
-+struct bch_dev;
-+struct bch_fs;
-+struct bch_devs_List;
-+
-+enum bucket_alloc_ret {
-+	ALLOC_SUCCESS,
-+	OPEN_BUCKETS_EMPTY,
-+	FREELIST_EMPTY,		/* Allocator thread not keeping up */
-+	INSUFFICIENT_DEVICES,
-+};
-+
-+struct dev_alloc_list {
-+	unsigned	nr;
-+	u8		devs[BCH_SB_MEMBERS_MAX];
-+};
-+
-+struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *,
-+					  struct dev_stripe_state *,
-+					  struct bch_devs_mask *);
-+void bch2_dev_stripe_increment(struct bch_dev *, struct dev_stripe_state *);
-+
-+long bch2_bucket_alloc_new_fs(struct bch_dev *);
-+
-+struct open_bucket *bch2_bucket_alloc(struct bch_fs *, struct bch_dev *,
-+				      enum alloc_reserve, bool,
-+				      struct closure *);
-+
-+static inline void ob_push(struct bch_fs *c, struct open_buckets *obs,
-+			   struct open_bucket *ob)
-+{
-+	BUG_ON(obs->nr >= ARRAY_SIZE(obs->v));
-+
-+	obs->v[obs->nr++] = ob - c->open_buckets;
-+}
-+
-+#define open_bucket_for_each(_c, _obs, _ob, _i)				\
-+	for ((_i) = 0;							\
-+	     (_i) < (_obs)->nr &&					\
-+	     ((_ob) = (_c)->open_buckets + (_obs)->v[_i], true);	\
-+	     (_i)++)
-+
-+static inline struct open_bucket *ec_open_bucket(struct bch_fs *c,
-+						 struct open_buckets *obs)
-+{
-+	struct open_bucket *ob;
-+	unsigned i;
-+
-+	open_bucket_for_each(c, obs, ob, i)
-+		if (ob->ec)
-+			return ob;
-+
-+	return NULL;
-+}
-+
-+void bch2_open_bucket_write_error(struct bch_fs *,
-+			struct open_buckets *, unsigned);
-+
-+void __bch2_open_bucket_put(struct bch_fs *, struct open_bucket *);
-+
-+static inline void bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
-+{
-+	if (atomic_dec_and_test(&ob->pin))
-+		__bch2_open_bucket_put(c, ob);
-+}
-+
-+static inline void bch2_open_buckets_put(struct bch_fs *c,
-+					 struct open_buckets *ptrs)
-+{
-+	struct open_bucket *ob;
-+	unsigned i;
-+
-+	open_bucket_for_each(c, ptrs, ob, i)
-+		bch2_open_bucket_put(c, ob);
-+	ptrs->nr = 0;
-+}
-+
-+static inline void bch2_open_bucket_get(struct bch_fs *c,
-+					struct write_point *wp,
-+					struct open_buckets *ptrs)
-+{
-+	struct open_bucket *ob;
-+	unsigned i;
-+
-+	open_bucket_for_each(c, &wp->ptrs, ob, i) {
-+		ob->type = wp->type;
-+		atomic_inc(&ob->pin);
-+		ob_push(c, ptrs, ob);
-+	}
-+}
-+
-+enum bucket_alloc_ret
-+bch2_bucket_alloc_set(struct bch_fs *, struct open_buckets *,
-+		      struct dev_stripe_state *, struct bch_devs_mask *,
-+		      unsigned, unsigned *, bool *, enum alloc_reserve,
-+		      unsigned, struct closure *);
-+
-+struct write_point *bch2_alloc_sectors_start(struct bch_fs *,
-+					     unsigned, unsigned,
-+					     struct write_point_specifier,
-+					     struct bch_devs_list *,
-+					     unsigned, unsigned,
-+					     enum alloc_reserve,
-+					     unsigned,
-+					     struct closure *);
-+
-+void bch2_alloc_sectors_append_ptrs(struct bch_fs *, struct write_point *,
-+				    struct bkey_i *, unsigned);
-+void bch2_alloc_sectors_done(struct bch_fs *, struct write_point *);
-+
-+void bch2_open_buckets_stop_dev(struct bch_fs *, struct bch_dev *,
-+				struct open_buckets *);
-+
-+void bch2_writepoint_stop(struct bch_fs *, struct bch_dev *,
-+			  struct write_point *);
-+
-+static inline struct write_point_specifier writepoint_hashed(unsigned long v)
-+{
-+	return (struct write_point_specifier) { .v = v | 1 };
-+}
-+
-+static inline struct write_point_specifier writepoint_ptr(struct write_point *wp)
-+{
-+	return (struct write_point_specifier) { .v = (unsigned long) wp };
-+}
-+
-+void bch2_fs_allocator_foreground_init(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_ALLOC_FOREGROUND_H */
-diff --git a/fs/bcachefs/alloc_types.h b/fs/bcachefs/alloc_types.h
-new file mode 100644
-index 000000000000..20705460bb0a
---- /dev/null
-+++ b/fs/bcachefs/alloc_types.h
-@@ -0,0 +1,113 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_ALLOC_TYPES_H
-+#define _BCACHEFS_ALLOC_TYPES_H
-+
-+#include <linux/mutex.h>
-+#include <linux/spinlock.h>
-+
-+#include "clock_types.h"
-+#include "fifo.h"
-+
-+struct ec_bucket_buf;
-+
-+/* There's two of these clocks, one for reads and one for writes: */
-+struct bucket_clock {
-+	/*
-+	 * "now" in (read/write) IO time - incremented whenever we do X amount
-+	 * of reads or writes.
-+	 *
-+	 * Goes with the bucket read/write prios: when we read or write to a
-+	 * bucket we reset the bucket's prio to the current hand; thus hand -
-+	 * prio = time since bucket was last read/written.
-+	 *
-+	 * The units are some amount (bytes/sectors) of data read/written, and
-+	 * the units can change on the fly if we need to rescale to fit
-+	 * everything in a u16 - your only guarantee is that the units are
-+	 * consistent.
-+	 */
-+	u16			hand;
-+	u16			max_last_io;
-+
-+	int			rw;
-+
-+	struct io_timer		rescale;
-+	struct mutex		lock;
-+};
-+
-+/* There is one reserve for each type of btree, one for prios and gens
-+ * and one for moving GC */
-+enum alloc_reserve {
-+	RESERVE_ALLOC		= -1,
-+	RESERVE_BTREE		= 0,
-+	RESERVE_MOVINGGC	= 1,
-+	RESERVE_NONE		= 2,
-+	RESERVE_NR		= 3,
-+};
-+
-+typedef FIFO(long)	alloc_fifo;
-+
-+#define OPEN_BUCKETS_COUNT	1024
-+
-+#define WRITE_POINT_HASH_NR	32
-+#define WRITE_POINT_MAX		32
-+
-+typedef u16			open_bucket_idx_t;
-+
-+struct open_bucket {
-+	spinlock_t		lock;
-+	atomic_t		pin;
-+	open_bucket_idx_t	freelist;
-+
-+	/*
-+	 * When an open bucket has an ec_stripe attached, this is the index of
-+	 * the block in the stripe this open_bucket corresponds to:
-+	 */
-+	u8			ec_idx;
-+	u8			type;
-+	unsigned		valid:1;
-+	unsigned		on_partial_list:1;
-+	int			alloc_reserve:3;
-+	unsigned		sectors_free;
-+	struct bch_extent_ptr	ptr;
-+	struct ec_stripe_new	*ec;
-+};
-+
-+#define OPEN_BUCKET_LIST_MAX	15
-+
-+struct open_buckets {
-+	open_bucket_idx_t	nr;
-+	open_bucket_idx_t	v[OPEN_BUCKET_LIST_MAX];
-+};
-+
-+struct dev_stripe_state {
-+	u64			next_alloc[BCH_SB_MEMBERS_MAX];
-+};
-+
-+struct write_point {
-+	struct hlist_node	node;
-+	struct mutex		lock;
-+	u64			last_used;
-+	unsigned long		write_point;
-+	enum bch_data_type	type;
-+	bool			is_ec;
-+
-+	/* calculated based on how many pointers we're actually going to use: */
-+	unsigned		sectors_free;
-+
-+	struct open_buckets	ptrs;
-+	struct dev_stripe_state	stripe;
-+};
-+
-+struct write_point_specifier {
-+	unsigned long		v;
-+};
-+
-+struct alloc_heap_entry {
-+	size_t			bucket;
-+	size_t			nr;
-+	unsigned long		key;
-+};
-+
-+typedef HEAP(struct alloc_heap_entry) alloc_heap;
-+
-+#endif /* _BCACHEFS_ALLOC_TYPES_H */
-diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
-new file mode 100644
-index 000000000000..3a5a00e53cbf
---- /dev/null
-+++ b/fs/bcachefs/bcachefs.h
-@@ -0,0 +1,883 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_H
-+#define _BCACHEFS_H
-+
-+/*
-+ * SOME HIGH LEVEL CODE DOCUMENTATION:
-+ *
-+ * Bcache mostly works with cache sets, cache devices, and backing devices.
-+ *
-+ * Support for multiple cache devices hasn't quite been finished off yet, but
-+ * it's about 95% plumbed through. A cache set and its cache devices is sort of
-+ * like a md raid array and its component devices. Most of the code doesn't care
-+ * about individual cache devices, the main abstraction is the cache set.
-+ *
-+ * Multiple cache devices is intended to give us the ability to mirror dirty
-+ * cached data and metadata, without mirroring clean cached data.
-+ *
-+ * Backing devices are different, in that they have a lifetime independent of a
-+ * cache set. When you register a newly formatted backing device it'll come up
-+ * in passthrough mode, and then you can attach and detach a backing device from
-+ * a cache set at runtime - while it's mounted and in use. Detaching implicitly
-+ * invalidates any cached data for that backing device.
-+ *
-+ * A cache set can have multiple (many) backing devices attached to it.
-+ *
-+ * There's also flash only volumes - this is the reason for the distinction
-+ * between struct cached_dev and struct bcache_device. A flash only volume
-+ * works much like a bcache device that has a backing device, except the
-+ * "cached" data is always dirty. The end result is that we get thin
-+ * provisioning with very little additional code.
-+ *
-+ * Flash only volumes work but they're not production ready because the moving
-+ * garbage collector needs more work. More on that later.
-+ *
-+ * BUCKETS/ALLOCATION:
-+ *
-+ * Bcache is primarily designed for caching, which means that in normal
-+ * operation all of our available space will be allocated. Thus, we need an
-+ * efficient way of deleting things from the cache so we can write new things to
-+ * it.
-+ *
-+ * To do this, we first divide the cache device up into buckets. A bucket is the
-+ * unit of allocation; they're typically around 1 mb - anywhere from 128k to 2M+
-+ * works efficiently.
-+ *
-+ * Each bucket has a 16 bit priority, and an 8 bit generation associated with
-+ * it. The gens and priorities for all the buckets are stored contiguously and
-+ * packed on disk (in a linked list of buckets - aside from the superblock, all
-+ * of bcache's metadata is stored in buckets).
-+ *
-+ * The priority is used to implement an LRU. We reset a bucket's priority when
-+ * we allocate it or on cache it, and every so often we decrement the priority
-+ * of each bucket. It could be used to implement something more sophisticated,
-+ * if anyone ever gets around to it.
-+ *
-+ * The generation is used for invalidating buckets. Each pointer also has an 8
-+ * bit generation embedded in it; for a pointer to be considered valid, its gen
-+ * must match the gen of the bucket it points into.  Thus, to reuse a bucket all
-+ * we have to do is increment its gen (and write its new gen to disk; we batch
-+ * this up).
-+ *
-+ * Bcache is entirely COW - we never write twice to a bucket, even buckets that
-+ * contain metadata (including btree nodes).
-+ *
-+ * THE BTREE:
-+ *
-+ * Bcache is in large part design around the btree.
-+ *
-+ * At a high level, the btree is just an index of key -> ptr tuples.
-+ *
-+ * Keys represent extents, and thus have a size field. Keys also have a variable
-+ * number of pointers attached to them (potentially zero, which is handy for
-+ * invalidating the cache).
-+ *
-+ * The key itself is an inode:offset pair. The inode number corresponds to a
-+ * backing device or a flash only volume. The offset is the ending offset of the
-+ * extent within the inode - not the starting offset; this makes lookups
-+ * slightly more convenient.
-+ *
-+ * Pointers contain the cache device id, the offset on that device, and an 8 bit
-+ * generation number. More on the gen later.
-+ *
-+ * Index lookups are not fully abstracted - cache lookups in particular are
-+ * still somewhat mixed in with the btree code, but things are headed in that
-+ * direction.
-+ *
-+ * Updates are fairly well abstracted, though. There are two different ways of
-+ * updating the btree; insert and replace.
-+ *
-+ * BTREE_INSERT will just take a list of keys and insert them into the btree -
-+ * overwriting (possibly only partially) any extents they overlap with. This is
-+ * used to update the index after a write.
-+ *
-+ * BTREE_REPLACE is really cmpxchg(); it inserts a key into the btree iff it is
-+ * overwriting a key that matches another given key. This is used for inserting
-+ * data into the cache after a cache miss, and for background writeback, and for
-+ * the moving garbage collector.
-+ *
-+ * There is no "delete" operation; deleting things from the index is
-+ * accomplished by either by invalidating pointers (by incrementing a bucket's
-+ * gen) or by inserting a key with 0 pointers - which will overwrite anything
-+ * previously present at that location in the index.
-+ *
-+ * This means that there are always stale/invalid keys in the btree. They're
-+ * filtered out by the code that iterates through a btree node, and removed when
-+ * a btree node is rewritten.
-+ *
-+ * BTREE NODES:
-+ *
-+ * Our unit of allocation is a bucket, and we we can't arbitrarily allocate and
-+ * free smaller than a bucket - so, that's how big our btree nodes are.
-+ *
-+ * (If buckets are really big we'll only use part of the bucket for a btree node
-+ * - no less than 1/4th - but a bucket still contains no more than a single
-+ * btree node. I'd actually like to change this, but for now we rely on the
-+ * bucket's gen for deleting btree nodes when we rewrite/split a node.)
-+ *
-+ * Anyways, btree nodes are big - big enough to be inefficient with a textbook
-+ * btree implementation.
-+ *
-+ * The way this is solved is that btree nodes are internally log structured; we
-+ * can append new keys to an existing btree node without rewriting it. This
-+ * means each set of keys we write is sorted, but the node is not.
-+ *
-+ * We maintain this log structure in memory - keeping 1Mb of keys sorted would
-+ * be expensive, and we have to distinguish between the keys we have written and
-+ * the keys we haven't. So to do a lookup in a btree node, we have to search
-+ * each sorted set. But we do merge written sets together lazily, so the cost of
-+ * these extra searches is quite low (normally most of the keys in a btree node
-+ * will be in one big set, and then there'll be one or two sets that are much
-+ * smaller).
-+ *
-+ * This log structure makes bcache's btree more of a hybrid between a
-+ * conventional btree and a compacting data structure, with some of the
-+ * advantages of both.
-+ *
-+ * GARBAGE COLLECTION:
-+ *
-+ * We can't just invalidate any bucket - it might contain dirty data or
-+ * metadata. If it once contained dirty data, other writes might overwrite it
-+ * later, leaving no valid pointers into that bucket in the index.
-+ *
-+ * Thus, the primary purpose of garbage collection is to find buckets to reuse.
-+ * It also counts how much valid data it each bucket currently contains, so that
-+ * allocation can reuse buckets sooner when they've been mostly overwritten.
-+ *
-+ * It also does some things that are really internal to the btree
-+ * implementation. If a btree node contains pointers that are stale by more than
-+ * some threshold, it rewrites the btree node to avoid the bucket's generation
-+ * wrapping around. It also merges adjacent btree nodes if they're empty enough.
-+ *
-+ * THE JOURNAL:
-+ *
-+ * Bcache's journal is not necessary for consistency; we always strictly
-+ * order metadata writes so that the btree and everything else is consistent on
-+ * disk in the event of an unclean shutdown, and in fact bcache had writeback
-+ * caching (with recovery from unclean shutdown) before journalling was
-+ * implemented.
-+ *
-+ * Rather, the journal is purely a performance optimization; we can't complete a
-+ * write until we've updated the index on disk, otherwise the cache would be
-+ * inconsistent in the event of an unclean shutdown. This means that without the
-+ * journal, on random write workloads we constantly have to update all the leaf
-+ * nodes in the btree, and those writes will be mostly empty (appending at most
-+ * a few keys each) - highly inefficient in terms of amount of metadata writes,
-+ * and it puts more strain on the various btree resorting/compacting code.
-+ *
-+ * The journal is just a log of keys we've inserted; on startup we just reinsert
-+ * all the keys in the open journal entries. That means that when we're updating
-+ * a node in the btree, we can wait until a 4k block of keys fills up before
-+ * writing them out.
-+ *
-+ * For simplicity, we only journal updates to leaf nodes; updates to parent
-+ * nodes are rare enough (since our leaf nodes are huge) that it wasn't worth
-+ * the complexity to deal with journalling them (in particular, journal replay)
-+ * - updates to non leaf nodes just happen synchronously (see btree_split()).
-+ */
-+
-+#undef pr_fmt
-+#define pr_fmt(fmt) "bcachefs: %s() " fmt "\n", __func__
-+
-+#include <linux/bug.h>
-+#include <linux/bio.h>
-+#include <linux/closure.h>
-+#include <linux/kobject.h>
-+#include <linux/list.h>
-+#include <linux/math64.h>
-+#include <linux/mutex.h>
-+#include <linux/percpu-refcount.h>
-+#include <linux/percpu-rwsem.h>
-+#include <linux/rhashtable.h>
-+#include <linux/rwsem.h>
-+#include <linux/semaphore.h>
-+#include <linux/seqlock.h>
-+#include <linux/shrinker.h>
-+#include <linux/types.h>
-+#include <linux/workqueue.h>
-+#include <linux/zstd.h>
-+
-+#include "bcachefs_format.h"
-+#include "fifo.h"
-+#include "opts.h"
-+#include "util.h"
-+
-+#define dynamic_fault(...)		0
-+#define race_fault(...)			0
-+
-+#define bch2_fs_init_fault(name)					\
-+	dynamic_fault("bcachefs:bch_fs_init:" name)
-+#define bch2_meta_read_fault(name)					\
-+	 dynamic_fault("bcachefs:meta:read:" name)
-+#define bch2_meta_write_fault(name)					\
-+	 dynamic_fault("bcachefs:meta:write:" name)
-+
-+#ifdef __KERNEL__
-+#define bch2_fmt(_c, fmt)	"bcachefs (%s): " fmt "\n", ((_c)->name)
-+#else
-+#define bch2_fmt(_c, fmt)	fmt "\n"
-+#endif
-+
-+#define bch_info(c, fmt, ...) \
-+	printk(KERN_INFO bch2_fmt(c, fmt), ##__VA_ARGS__)
-+#define bch_notice(c, fmt, ...) \
-+	printk(KERN_NOTICE bch2_fmt(c, fmt), ##__VA_ARGS__)
-+#define bch_warn(c, fmt, ...) \
-+	printk(KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__)
-+#define bch_warn_ratelimited(c, fmt, ...) \
-+	printk_ratelimited(KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__)
-+#define bch_err(c, fmt, ...) \
-+	printk(KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__)
-+#define bch_err_ratelimited(c, fmt, ...) \
-+	printk_ratelimited(KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__)
-+
-+#define bch_verbose(c, fmt, ...)					\
-+do {									\
-+	if ((c)->opts.verbose)						\
-+		bch_info(c, fmt, ##__VA_ARGS__);			\
-+} while (0)
-+
-+#define pr_verbose_init(opts, fmt, ...)					\
-+do {									\
-+	if (opt_get(opts, verbose))					\
-+		pr_info(fmt, ##__VA_ARGS__);				\
-+} while (0)
-+
-+/* Parameters that are useful for debugging, but should always be compiled in: */
-+#define BCH_DEBUG_PARAMS_ALWAYS()					\
-+	BCH_DEBUG_PARAM(key_merging_disabled,				\
-+		"Disables merging of extents")				\
-+	BCH_DEBUG_PARAM(btree_gc_always_rewrite,			\
-+		"Causes mark and sweep to compact and rewrite every "	\
-+		"btree node it traverses")				\
-+	BCH_DEBUG_PARAM(btree_gc_rewrite_disabled,			\
-+		"Disables rewriting of btree nodes during mark and sweep")\
-+	BCH_DEBUG_PARAM(btree_shrinker_disabled,			\
-+		"Disables the shrinker callback for the btree node cache")
-+
-+/* Parameters that should only be compiled in in debug mode: */
-+#define BCH_DEBUG_PARAMS_DEBUG()					\
-+	BCH_DEBUG_PARAM(expensive_debug_checks,				\
-+		"Enables various runtime debugging checks that "	\
-+		"significantly affect performance")			\
-+	BCH_DEBUG_PARAM(debug_check_iterators,				\
-+		"Enables extra verification for btree iterators")	\
-+	BCH_DEBUG_PARAM(debug_check_bkeys,				\
-+		"Run bkey_debugcheck (primarily checking GC/allocation "\
-+		"information) when iterating over keys")		\
-+	BCH_DEBUG_PARAM(verify_btree_ondisk,				\
-+		"Reread btree nodes at various points to verify the "	\
-+		"mergesort in the read path against modifications "	\
-+		"done in memory")					\
-+	BCH_DEBUG_PARAM(journal_seq_verify,				\
-+		"Store the journal sequence number in the version "	\
-+		"number of every btree key, and verify that btree "	\
-+		"update ordering is preserved during recovery")		\
-+	BCH_DEBUG_PARAM(inject_invalid_keys,				\
-+		"Store the journal sequence number in the version "	\
-+		"number of every btree key, and verify that btree "	\
-+		"update ordering is preserved during recovery")		\
-+	BCH_DEBUG_PARAM(test_alloc_startup,				\
-+		"Force allocator startup to use the slowpath where it"	\
-+		"can't find enough free buckets without invalidating"	\
-+		"cached data")						\
-+	BCH_DEBUG_PARAM(force_reconstruct_read,				\
-+		"Force reads to use the reconstruct path, when reading"	\
-+		"from erasure coded extents")				\
-+	BCH_DEBUG_PARAM(test_restart_gc,				\
-+		"Test restarting mark and sweep gc when bucket gens change")
-+
-+#define BCH_DEBUG_PARAMS_ALL() BCH_DEBUG_PARAMS_ALWAYS() BCH_DEBUG_PARAMS_DEBUG()
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+#define BCH_DEBUG_PARAMS() BCH_DEBUG_PARAMS_ALL()
-+#else
-+#define BCH_DEBUG_PARAMS() BCH_DEBUG_PARAMS_ALWAYS()
-+#endif
-+
-+#define BCH_TIME_STATS()			\
-+	x(btree_node_mem_alloc)			\
-+	x(btree_node_split)			\
-+	x(btree_node_sort)			\
-+	x(btree_node_read)			\
-+	x(btree_gc)				\
-+	x(btree_lock_contended_read)		\
-+	x(btree_lock_contended_intent)		\
-+	x(btree_lock_contended_write)		\
-+	x(data_write)				\
-+	x(data_read)				\
-+	x(data_promote)				\
-+	x(journal_write)			\
-+	x(journal_delay)			\
-+	x(journal_flush_seq)			\
-+	x(blocked_journal)			\
-+	x(blocked_allocate)			\
-+	x(blocked_allocate_open_bucket)
-+
-+enum bch_time_stats {
-+#define x(name) BCH_TIME_##name,
-+	BCH_TIME_STATS()
-+#undef x
-+	BCH_TIME_STAT_NR
-+};
-+
-+#include "alloc_types.h"
-+#include "btree_types.h"
-+#include "buckets_types.h"
-+#include "clock_types.h"
-+#include "ec_types.h"
-+#include "journal_types.h"
-+#include "keylist_types.h"
-+#include "quota_types.h"
-+#include "rebalance_types.h"
-+#include "replicas_types.h"
-+#include "super_types.h"
-+
-+/* Number of nodes btree coalesce will try to coalesce at once */
-+#define GC_MERGE_NODES		4U
-+
-+/* Maximum number of nodes we might need to allocate atomically: */
-+#define BTREE_RESERVE_MAX	(BTREE_MAX_DEPTH + (BTREE_MAX_DEPTH - 1))
-+
-+/* Size of the freelist we allocate btree nodes from: */
-+#define BTREE_NODE_RESERVE	(BTREE_RESERVE_MAX * 4)
-+
-+#define BTREE_NODE_OPEN_BUCKET_RESERVE	(BTREE_RESERVE_MAX * BCH_REPLICAS_MAX)
-+
-+struct btree;
-+
-+enum gc_phase {
-+	GC_PHASE_NOT_RUNNING,
-+	GC_PHASE_START,
-+	GC_PHASE_SB,
-+
-+	GC_PHASE_BTREE_EC,
-+	GC_PHASE_BTREE_EXTENTS,
-+	GC_PHASE_BTREE_INODES,
-+	GC_PHASE_BTREE_DIRENTS,
-+	GC_PHASE_BTREE_XATTRS,
-+	GC_PHASE_BTREE_ALLOC,
-+	GC_PHASE_BTREE_QUOTAS,
-+	GC_PHASE_BTREE_REFLINK,
-+
-+	GC_PHASE_PENDING_DELETE,
-+	GC_PHASE_ALLOC,
-+};
-+
-+struct gc_pos {
-+	enum gc_phase		phase;
-+	struct bpos		pos;
-+	unsigned		level;
-+};
-+
-+struct io_count {
-+	u64			sectors[2][BCH_DATA_NR];
-+};
-+
-+struct bch_dev {
-+	struct kobject		kobj;
-+	struct percpu_ref	ref;
-+	struct completion	ref_completion;
-+	struct percpu_ref	io_ref;
-+	struct completion	io_ref_completion;
-+
-+	struct bch_fs		*fs;
-+
-+	u8			dev_idx;
-+	/*
-+	 * Cached version of this device's member info from superblock
-+	 * Committed by bch2_write_super() -> bch_fs_mi_update()
-+	 */
-+	struct bch_member_cpu	mi;
-+	uuid_le			uuid;
-+	char			name[BDEVNAME_SIZE];
-+
-+	struct bch_sb_handle	disk_sb;
-+	struct bch_sb		*sb_read_scratch;
-+	int			sb_write_error;
-+
-+	struct bch_devs_mask	self;
-+
-+	/* biosets used in cloned bios for writing multiple replicas */
-+	struct bio_set		replica_set;
-+
-+	/*
-+	 * Buckets:
-+	 * Per-bucket arrays are protected by c->mark_lock, bucket_lock and
-+	 * gc_lock, for device resize - holding any is sufficient for access:
-+	 * Or rcu_read_lock(), but only for ptr_stale():
-+	 */
-+	struct bucket_array __rcu *buckets[2];
-+	unsigned long		*buckets_nouse;
-+	struct rw_semaphore	bucket_lock;
-+
-+	struct bch_dev_usage __percpu *usage[2];
-+
-+	/* Allocator: */
-+	struct task_struct __rcu *alloc_thread;
-+
-+	/*
-+	 * free: Buckets that are ready to be used
-+	 *
-+	 * free_inc: Incoming buckets - these are buckets that currently have
-+	 * cached data in them, and we can't reuse them until after we write
-+	 * their new gen to disk. After prio_write() finishes writing the new
-+	 * gens/prios, they'll be moved to the free list (and possibly discarded
-+	 * in the process)
-+	 */
-+	alloc_fifo		free[RESERVE_NR];
-+	alloc_fifo		free_inc;
-+
-+	open_bucket_idx_t	open_buckets_partial[OPEN_BUCKETS_COUNT];
-+	open_bucket_idx_t	open_buckets_partial_nr;
-+
-+	size_t			fifo_last_bucket;
-+
-+	/* last calculated minimum prio */
-+	u16			max_last_bucket_io[2];
-+
-+	size_t			inc_gen_needs_gc;
-+	size_t			inc_gen_really_needs_gc;
-+
-+	/*
-+	 * XXX: this should be an enum for allocator state, so as to include
-+	 * error state
-+	 */
-+	enum {
-+		ALLOCATOR_STOPPED,
-+		ALLOCATOR_RUNNING,
-+		ALLOCATOR_BLOCKED,
-+		ALLOCATOR_BLOCKED_FULL,
-+	}			allocator_state;
-+
-+	alloc_heap		alloc_heap;
-+
-+	atomic64_t		rebalance_work;
-+
-+	struct journal_device	journal;
-+
-+	struct work_struct	io_error_work;
-+
-+	/* The rest of this all shows up in sysfs */
-+	atomic64_t		cur_latency[2];
-+	struct time_stats	io_latency[2];
-+
-+#define CONGESTED_MAX		1024
-+	atomic_t		congested;
-+	u64			congested_last;
-+
-+	struct io_count __percpu *io_done;
-+};
-+
-+enum {
-+	/* startup: */
-+	BCH_FS_ALLOC_READ_DONE,
-+	BCH_FS_ALLOC_CLEAN,
-+	BCH_FS_ALLOCATOR_RUNNING,
-+	BCH_FS_ALLOCATOR_STOPPING,
-+	BCH_FS_INITIAL_GC_DONE,
-+	BCH_FS_BTREE_INTERIOR_REPLAY_DONE,
-+	BCH_FS_FSCK_DONE,
-+	BCH_FS_STARTED,
-+	BCH_FS_RW,
-+
-+	/* shutdown: */
-+	BCH_FS_STOPPING,
-+	BCH_FS_EMERGENCY_RO,
-+	BCH_FS_WRITE_DISABLE_COMPLETE,
-+
-+	/* errors: */
-+	BCH_FS_ERROR,
-+	BCH_FS_ERRORS_FIXED,
-+
-+	/* misc: */
-+	BCH_FS_BDEV_MOUNTED,
-+	BCH_FS_FIXED_GENS,
-+	BCH_FS_ALLOC_WRITTEN,
-+	BCH_FS_REBUILD_REPLICAS,
-+	BCH_FS_HOLD_BTREE_WRITES,
-+};
-+
-+struct btree_debug {
-+	unsigned		id;
-+	struct dentry		*btree;
-+	struct dentry		*btree_format;
-+	struct dentry		*failed;
-+};
-+
-+struct bch_fs_pcpu {
-+	u64			sectors_available;
-+};
-+
-+struct journal_seq_blacklist_table {
-+	size_t			nr;
-+	struct journal_seq_blacklist_table_entry {
-+		u64		start;
-+		u64		end;
-+		bool		dirty;
-+	}			entries[0];
-+};
-+
-+struct journal_keys {
-+	struct journal_key {
-+		enum btree_id	btree_id:8;
-+		unsigned	level:8;
-+		struct bkey_i	*k;
-+		u32		journal_seq;
-+		u32		journal_offset;
-+	}			*d;
-+	size_t			nr;
-+	u64			journal_seq_base;
-+};
-+
-+struct bch_fs {
-+	struct closure		cl;
-+
-+	struct list_head	list;
-+	struct kobject		kobj;
-+	struct kobject		internal;
-+	struct kobject		opts_dir;
-+	struct kobject		time_stats;
-+	unsigned long		flags;
-+
-+	int			minor;
-+	struct device		*chardev;
-+	struct super_block	*vfs_sb;
-+	char			name[40];
-+
-+	/* ro/rw, add/remove/resize devices: */
-+	struct rw_semaphore	state_lock;
-+
-+	/* Counts outstanding writes, for clean transition to read-only */
-+	struct percpu_ref	writes;
-+	struct work_struct	read_only_work;
-+
-+	struct bch_dev __rcu	*devs[BCH_SB_MEMBERS_MAX];
-+
-+	struct bch_replicas_cpu replicas;
-+	struct bch_replicas_cpu replicas_gc;
-+	struct mutex		replicas_gc_lock;
-+
-+	struct journal_entry_res replicas_journal_res;
-+
-+	struct bch_disk_groups_cpu __rcu *disk_groups;
-+
-+	struct bch_opts		opts;
-+
-+	/* Updated by bch2_sb_update():*/
-+	struct {
-+		uuid_le		uuid;
-+		uuid_le		user_uuid;
-+
-+		u16		version;
-+		u16		encoded_extent_max;
-+
-+		u8		nr_devices;
-+		u8		clean;
-+
-+		u8		encryption_type;
-+
-+		u64		time_base_lo;
-+		u32		time_base_hi;
-+		u32		time_precision;
-+		u64		features;
-+		u64		compat;
-+	}			sb;
-+
-+	struct bch_sb_handle	disk_sb;
-+
-+	unsigned short		block_bits;	/* ilog2(block_size) */
-+
-+	u16			btree_foreground_merge_threshold;
-+
-+	struct closure		sb_write;
-+	struct mutex		sb_lock;
-+
-+	/* BTREE CACHE */
-+	struct bio_set		btree_bio;
-+
-+	struct btree_root	btree_roots[BTREE_ID_NR];
-+	struct mutex		btree_root_lock;
-+
-+	struct btree_cache	btree_cache;
-+
-+	/*
-+	 * Cache of allocated btree nodes - if we allocate a btree node and
-+	 * don't use it, if we free it that space can't be reused until going
-+	 * _all_ the way through the allocator (which exposes us to a livelock
-+	 * when allocating btree reserves fail halfway through) - instead, we
-+	 * can stick them here:
-+	 */
-+	struct btree_alloc	btree_reserve_cache[BTREE_NODE_RESERVE * 2];
-+	unsigned		btree_reserve_cache_nr;
-+	struct mutex		btree_reserve_cache_lock;
-+
-+	mempool_t		btree_interior_update_pool;
-+	struct list_head	btree_interior_update_list;
-+	struct list_head	btree_interior_updates_unwritten;
-+	struct mutex		btree_interior_update_lock;
-+	struct closure_waitlist	btree_interior_update_wait;
-+
-+	struct workqueue_struct	*btree_interior_update_worker;
-+	struct work_struct	btree_interior_update_work;
-+
-+	/* btree_iter.c: */
-+	struct mutex		btree_trans_lock;
-+	struct list_head	btree_trans_list;
-+	mempool_t		btree_iters_pool;
-+
-+	struct btree_key_cache	btree_key_cache;
-+
-+	struct workqueue_struct	*wq;
-+	/* copygc needs its own workqueue for index updates.. */
-+	struct workqueue_struct	*copygc_wq;
-+	struct workqueue_struct	*journal_reclaim_wq;
-+
-+	/* ALLOCATION */
-+	struct delayed_work	pd_controllers_update;
-+	unsigned		pd_controllers_update_seconds;
-+
-+	struct bch_devs_mask	rw_devs[BCH_DATA_NR];
-+
-+	u64			capacity; /* sectors */
-+
-+	/*
-+	 * When capacity _decreases_ (due to a disk being removed), we
-+	 * increment capacity_gen - this invalidates outstanding reservations
-+	 * and forces them to be revalidated
-+	 */
-+	u32			capacity_gen;
-+	unsigned		bucket_size_max;
-+
-+	atomic64_t		sectors_available;
-+
-+	struct bch_fs_pcpu __percpu	*pcpu;
-+
-+	struct percpu_rw_semaphore	mark_lock;
-+
-+	seqcount_t			usage_lock;
-+	struct bch_fs_usage		*usage_base;
-+	struct bch_fs_usage __percpu	*usage[2];
-+	struct bch_fs_usage __percpu	*usage_gc;
-+
-+	/* single element mempool: */
-+	struct mutex		usage_scratch_lock;
-+	struct bch_fs_usage	*usage_scratch;
-+
-+	/*
-+	 * When we invalidate buckets, we use both the priority and the amount
-+	 * of good data to determine which buckets to reuse first - to weight
-+	 * those together consistently we keep track of the smallest nonzero
-+	 * priority of any bucket.
-+	 */
-+	struct bucket_clock	bucket_clock[2];
-+
-+	struct io_clock		io_clock[2];
-+
-+	/* JOURNAL SEQ BLACKLIST */
-+	struct journal_seq_blacklist_table *
-+				journal_seq_blacklist_table;
-+	struct work_struct	journal_seq_blacklist_gc_work;
-+
-+	/* ALLOCATOR */
-+	spinlock_t		freelist_lock;
-+	struct closure_waitlist	freelist_wait;
-+	u64			blocked_allocate;
-+	u64			blocked_allocate_open_bucket;
-+	open_bucket_idx_t	open_buckets_freelist;
-+	open_bucket_idx_t	open_buckets_nr_free;
-+	struct closure_waitlist	open_buckets_wait;
-+	struct open_bucket	open_buckets[OPEN_BUCKETS_COUNT];
-+
-+	struct write_point	btree_write_point;
-+	struct write_point	rebalance_write_point;
-+
-+	struct write_point	write_points[WRITE_POINT_MAX];
-+	struct hlist_head	write_points_hash[WRITE_POINT_HASH_NR];
-+	struct mutex		write_points_hash_lock;
-+	unsigned		write_points_nr;
-+
-+	/* GARBAGE COLLECTION */
-+	struct task_struct	*gc_thread;
-+	atomic_t		kick_gc;
-+	unsigned long		gc_count;
-+
-+	/*
-+	 * Tracks GC's progress - everything in the range [ZERO_KEY..gc_cur_pos]
-+	 * has been marked by GC.
-+	 *
-+	 * gc_cur_phase is a superset of btree_ids (BTREE_ID_EXTENTS etc.)
-+	 *
-+	 * Protected by gc_pos_lock. Only written to by GC thread, so GC thread
-+	 * can read without a lock.
-+	 */
-+	seqcount_t		gc_pos_lock;
-+	struct gc_pos		gc_pos;
-+
-+	/*
-+	 * The allocation code needs gc_mark in struct bucket to be correct, but
-+	 * it's not while a gc is in progress.
-+	 */
-+	struct rw_semaphore	gc_lock;
-+
-+	/* IO PATH */
-+	struct semaphore	io_in_flight;
-+	struct bio_set		bio_read;
-+	struct bio_set		bio_read_split;
-+	struct bio_set		bio_write;
-+	struct mutex		bio_bounce_pages_lock;
-+	mempool_t		bio_bounce_pages;
-+	struct rhashtable	promote_table;
-+
-+	mempool_t		compression_bounce[2];
-+	mempool_t		compress_workspace[BCH_COMPRESSION_TYPE_NR];
-+	mempool_t		decompress_workspace;
-+	ZSTD_parameters		zstd_params;
-+
-+	struct crypto_shash	*sha256;
-+	struct crypto_sync_skcipher *chacha20;
-+	struct crypto_shash	*poly1305;
-+
-+	atomic64_t		key_version;
-+
-+	mempool_t		large_bkey_pool;
-+
-+	/* REBALANCE */
-+	struct bch_fs_rebalance	rebalance;
-+
-+	/* COPYGC */
-+	struct task_struct	*copygc_thread;
-+	copygc_heap		copygc_heap;
-+	struct bch_pd_controller copygc_pd;
-+	struct write_point	copygc_write_point;
-+	u64			copygc_threshold;
-+
-+	/* STRIPES: */
-+	GENRADIX(struct stripe) stripes[2];
-+
-+	ec_stripes_heap		ec_stripes_heap;
-+	spinlock_t		ec_stripes_heap_lock;
-+
-+	/* ERASURE CODING */
-+	struct list_head	ec_stripe_head_list;
-+	struct mutex		ec_stripe_head_lock;
-+
-+	struct list_head	ec_stripe_new_list;
-+	struct mutex		ec_stripe_new_lock;
-+
-+	struct work_struct	ec_stripe_create_work;
-+	u64			ec_stripe_hint;
-+
-+	struct bio_set		ec_bioset;
-+
-+	struct work_struct	ec_stripe_delete_work;
-+	struct llist_head	ec_stripe_delete_list;
-+
-+	/* REFLINK */
-+	u64			reflink_hint;
-+
-+	/* VFS IO PATH - fs-io.c */
-+	struct bio_set		writepage_bioset;
-+	struct bio_set		dio_write_bioset;
-+	struct bio_set		dio_read_bioset;
-+
-+	struct bio_list		btree_write_error_list;
-+	struct work_struct	btree_write_error_work;
-+	spinlock_t		btree_write_error_lock;
-+
-+	/* ERRORS */
-+	struct list_head	fsck_errors;
-+	struct mutex		fsck_error_lock;
-+	bool			fsck_alloc_err;
-+
-+	/* QUOTAS */
-+	struct bch_memquota_type quotas[QTYP_NR];
-+
-+	/* DEBUG JUNK */
-+	struct dentry		*debug;
-+	struct btree_debug	btree_debug[BTREE_ID_NR];
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	struct btree		*verify_data;
-+	struct btree_node	*verify_ondisk;
-+	struct mutex		verify_lock;
-+#endif
-+
-+	u64			unused_inode_hint;
-+
-+	/*
-+	 * A btree node on disk could have too many bsets for an iterator to fit
-+	 * on the stack - have to dynamically allocate them
-+	 */
-+	mempool_t		fill_iter;
-+
-+	mempool_t		btree_bounce_pool;
-+
-+	struct journal		journal;
-+	struct list_head	journal_entries;
-+	struct journal_keys	journal_keys;
-+
-+	u64			last_bucket_seq_cleanup;
-+
-+	/* The rest of this all shows up in sysfs */
-+	atomic_long_t		read_realloc_races;
-+	atomic_long_t		extent_migrate_done;
-+	atomic_long_t		extent_migrate_raced;
-+
-+	unsigned		btree_gc_periodic:1;
-+	unsigned		copy_gc_enabled:1;
-+	bool			promote_whole_extents;
-+
-+#define BCH_DEBUG_PARAM(name, description) bool name;
-+	BCH_DEBUG_PARAMS_ALL()
-+#undef BCH_DEBUG_PARAM
-+
-+	struct time_stats	times[BCH_TIME_STAT_NR];
-+};
-+
-+static inline void bch2_set_ra_pages(struct bch_fs *c, unsigned ra_pages)
-+{
-+#ifndef NO_BCACHEFS_FS
-+	if (c->vfs_sb)
-+		c->vfs_sb->s_bdi->ra_pages = ra_pages;
-+#endif
-+}
-+
-+static inline unsigned bucket_bytes(const struct bch_dev *ca)
-+{
-+	return ca->mi.bucket_size << 9;
-+}
-+
-+static inline unsigned block_bytes(const struct bch_fs *c)
-+{
-+	return c->opts.block_size << 9;
-+}
-+
-+static inline struct timespec64 bch2_time_to_timespec(struct bch_fs *c, u64 time)
-+{
-+	return ns_to_timespec64(time * c->sb.time_precision + c->sb.time_base_lo);
-+}
-+
-+static inline s64 timespec_to_bch2_time(struct bch_fs *c, struct timespec64 ts)
-+{
-+	s64 ns = timespec64_to_ns(&ts) - c->sb.time_base_lo;
-+
-+	if (c->sb.time_precision == 1)
-+		return ns;
-+
-+	return div_s64(ns, c->sb.time_precision);
-+}
-+
-+static inline s64 bch2_current_time(struct bch_fs *c)
-+{
-+	struct timespec64 now;
-+
-+	ktime_get_coarse_real_ts64(&now);
-+	return timespec_to_bch2_time(c, now);
-+}
-+
-+static inline bool bch2_dev_exists2(const struct bch_fs *c, unsigned dev)
-+{
-+	return dev < c->sb.nr_devices && c->devs[dev];
-+}
-+
-+#endif /* _BCACHEFS_H */
-diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
-new file mode 100644
-index 000000000000..d5a2230e403c
---- /dev/null
-+++ b/fs/bcachefs/bcachefs_format.h
-@@ -0,0 +1,1671 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_FORMAT_H
-+#define _BCACHEFS_FORMAT_H
-+
-+/*
-+ * bcachefs on disk data structures
-+ *
-+ * OVERVIEW:
-+ *
-+ * There are three main types of on disk data structures in bcachefs (this is
-+ * reduced from 5 in bcache)
-+ *
-+ *  - superblock
-+ *  - journal
-+ *  - btree
-+ *
-+ * The btree is the primary structure; most metadata exists as keys in the
-+ * various btrees. There are only a small number of btrees, they're not
-+ * sharded - we have one btree for extents, another for inodes, et cetera.
-+ *
-+ * SUPERBLOCK:
-+ *
-+ * The superblock contains the location of the journal, the list of devices in
-+ * the filesystem, and in general any metadata we need in order to decide
-+ * whether we can start a filesystem or prior to reading the journal/btree
-+ * roots.
-+ *
-+ * The superblock is extensible, and most of the contents of the superblock are
-+ * in variable length, type tagged fields; see struct bch_sb_field.
-+ *
-+ * Backup superblocks do not reside in a fixed location; also, superblocks do
-+ * not have a fixed size. To locate backup superblocks we have struct
-+ * bch_sb_layout; we store a copy of this inside every superblock, and also
-+ * before the first superblock.
-+ *
-+ * JOURNAL:
-+ *
-+ * The journal primarily records btree updates in the order they occurred;
-+ * journal replay consists of just iterating over all the keys in the open
-+ * journal entries and re-inserting them into the btrees.
-+ *
-+ * The journal also contains entry types for the btree roots, and blacklisted
-+ * journal sequence numbers (see journal_seq_blacklist.c).
-+ *
-+ * BTREE:
-+ *
-+ * bcachefs btrees are copy on write b+ trees, where nodes are big (typically
-+ * 128k-256k) and log structured. We use struct btree_node for writing the first
-+ * entry in a given node (offset 0), and struct btree_node_entry for all
-+ * subsequent writes.
-+ *
-+ * After the header, btree node entries contain a list of keys in sorted order.
-+ * Values are stored inline with the keys; since values are variable length (and
-+ * keys effectively are variable length too, due to packing) we can't do random
-+ * access without building up additional in memory tables in the btree node read
-+ * path.
-+ *
-+ * BTREE KEYS (struct bkey):
-+ *
-+ * The various btrees share a common format for the key - so as to avoid
-+ * switching in fastpath lookup/comparison code - but define their own
-+ * structures for the key values.
-+ *
-+ * The size of a key/value pair is stored as a u8 in units of u64s, so the max
-+ * size is just under 2k. The common part also contains a type tag for the
-+ * value, and a format field indicating whether the key is packed or not (and
-+ * also meant to allow adding new key fields in the future, if desired).
-+ *
-+ * bkeys, when stored within a btree node, may also be packed. In that case, the
-+ * bkey_format in that node is used to unpack it. Packed bkeys mean that we can
-+ * be generous with field sizes in the common part of the key format (64 bit
-+ * inode number, 64 bit offset, 96 bit version field, etc.) for negligible cost.
-+ */
-+
-+#include <asm/types.h>
-+#include <asm/byteorder.h>
-+#include <linux/kernel.h>
-+#include <linux/uuid.h>
-+
-+#define LE_BITMASK(_bits, name, type, field, offset, end)		\
-+static const unsigned	name##_OFFSET = offset;				\
-+static const unsigned	name##_BITS = (end - offset);			\
-+static const __u##_bits	name##_MAX = (1ULL << (end - offset)) - 1;	\
-+									\
-+static inline __u64 name(const type *k)					\
-+{									\
-+	return (__le##_bits##_to_cpu(k->field) >> offset) &		\
-+		~(~0ULL << (end - offset));				\
-+}									\
-+									\
-+static inline void SET_##name(type *k, __u64 v)				\
-+{									\
-+	__u##_bits new = __le##_bits##_to_cpu(k->field);		\
-+									\
-+	new &= ~(~(~0ULL << (end - offset)) << offset);			\
-+	new |= (v & ~(~0ULL << (end - offset))) << offset;		\
-+	k->field = __cpu_to_le##_bits(new);				\
-+}
-+
-+#define LE16_BITMASK(n, t, f, o, e)	LE_BITMASK(16, n, t, f, o, e)
-+#define LE32_BITMASK(n, t, f, o, e)	LE_BITMASK(32, n, t, f, o, e)
-+#define LE64_BITMASK(n, t, f, o, e)	LE_BITMASK(64, n, t, f, o, e)
-+
-+struct bkey_format {
-+	__u8		key_u64s;
-+	__u8		nr_fields;
-+	/* One unused slot for now: */
-+	__u8		bits_per_field[6];
-+	__le64		field_offset[6];
-+};
-+
-+/* Btree keys - all units are in sectors */
-+
-+struct bpos {
-+	/*
-+	 * Word order matches machine byte order - btree code treats a bpos as a
-+	 * single large integer, for search/comparison purposes
-+	 *
-+	 * Note that wherever a bpos is embedded in another on disk data
-+	 * structure, it has to be byte swabbed when reading in metadata that
-+	 * wasn't written in native endian order:
-+	 */
-+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-+	__u32		snapshot;
-+	__u64		offset;
-+	__u64		inode;
-+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-+	__u64		inode;
-+	__u64		offset;		/* Points to end of extent - sectors */
-+	__u32		snapshot;
-+#else
-+#error edit for your odd byteorder.
-+#endif
-+} __attribute__((packed, aligned(4)));
-+
-+#define KEY_INODE_MAX			((__u64)~0ULL)
-+#define KEY_OFFSET_MAX			((__u64)~0ULL)
-+#define KEY_SNAPSHOT_MAX		((__u32)~0U)
-+#define KEY_SIZE_MAX			((__u32)~0U)
-+
-+static inline struct bpos POS(__u64 inode, __u64 offset)
-+{
-+	struct bpos ret;
-+
-+	ret.inode	= inode;
-+	ret.offset	= offset;
-+	ret.snapshot	= 0;
-+
-+	return ret;
-+}
-+
-+#define POS_MIN				POS(0, 0)
-+#define POS_MAX				POS(KEY_INODE_MAX, KEY_OFFSET_MAX)
-+
-+/* Empty placeholder struct, for container_of() */
-+struct bch_val {
-+	__u64		__nothing[0];
-+};
-+
-+struct bversion {
-+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-+	__u64		lo;
-+	__u32		hi;
-+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-+	__u32		hi;
-+	__u64		lo;
-+#endif
-+} __attribute__((packed, aligned(4)));
-+
-+struct bkey {
-+	/* Size of combined key and value, in u64s */
-+	__u8		u64s;
-+
-+	/* Format of key (0 for format local to btree node) */
-+#if defined(__LITTLE_ENDIAN_BITFIELD)
-+	__u8		format:7,
-+			needs_whiteout:1;
-+#elif defined (__BIG_ENDIAN_BITFIELD)
-+	__u8		needs_whiteout:1,
-+			format:7;
-+#else
-+#error edit for your odd byteorder.
-+#endif
-+
-+	/* Type of the value */
-+	__u8		type;
-+
-+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-+	__u8		pad[1];
-+
-+	struct bversion	version;
-+	__u32		size;		/* extent size, in sectors */
-+	struct bpos	p;
-+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-+	struct bpos	p;
-+	__u32		size;		/* extent size, in sectors */
-+	struct bversion	version;
-+
-+	__u8		pad[1];
-+#endif
-+} __attribute__((packed, aligned(8)));
-+
-+struct bkey_packed {
-+	__u64		_data[0];
-+
-+	/* Size of combined key and value, in u64s */
-+	__u8		u64s;
-+
-+	/* Format of key (0 for format local to btree node) */
-+
-+	/*
-+	 * XXX: next incompat on disk format change, switch format and
-+	 * needs_whiteout - bkey_packed() will be cheaper if format is the high
-+	 * bits of the bitfield
-+	 */
-+#if defined(__LITTLE_ENDIAN_BITFIELD)
-+	__u8		format:7,
-+			needs_whiteout:1;
-+#elif defined (__BIG_ENDIAN_BITFIELD)
-+	__u8		needs_whiteout:1,
-+			format:7;
-+#endif
-+
-+	/* Type of the value */
-+	__u8		type;
-+	__u8		key_start[0];
-+
-+	/*
-+	 * We copy bkeys with struct assignment in various places, and while
-+	 * that shouldn't be done with packed bkeys we can't disallow it in C,
-+	 * and it's legal to cast a bkey to a bkey_packed  - so padding it out
-+	 * to the same size as struct bkey should hopefully be safest.
-+	 */
-+	__u8		pad[sizeof(struct bkey) - 3];
-+} __attribute__((packed, aligned(8)));
-+
-+#define BKEY_U64s			(sizeof(struct bkey) / sizeof(__u64))
-+#define BKEY_U64s_MAX			U8_MAX
-+#define BKEY_VAL_U64s_MAX		(BKEY_U64s_MAX - BKEY_U64s)
-+
-+#define KEY_PACKED_BITS_START		24
-+
-+#define KEY_FORMAT_LOCAL_BTREE		0
-+#define KEY_FORMAT_CURRENT		1
-+
-+enum bch_bkey_fields {
-+	BKEY_FIELD_INODE,
-+	BKEY_FIELD_OFFSET,
-+	BKEY_FIELD_SNAPSHOT,
-+	BKEY_FIELD_SIZE,
-+	BKEY_FIELD_VERSION_HI,
-+	BKEY_FIELD_VERSION_LO,
-+	BKEY_NR_FIELDS,
-+};
-+
-+#define bkey_format_field(name, field)					\
-+	[BKEY_FIELD_##name] = (sizeof(((struct bkey *) NULL)->field) * 8)
-+
-+#define BKEY_FORMAT_CURRENT						\
-+((struct bkey_format) {							\
-+	.key_u64s	= BKEY_U64s,					\
-+	.nr_fields	= BKEY_NR_FIELDS,				\
-+	.bits_per_field = {						\
-+		bkey_format_field(INODE,	p.inode),		\
-+		bkey_format_field(OFFSET,	p.offset),		\
-+		bkey_format_field(SNAPSHOT,	p.snapshot),		\
-+		bkey_format_field(SIZE,		size),			\
-+		bkey_format_field(VERSION_HI,	version.hi),		\
-+		bkey_format_field(VERSION_LO,	version.lo),		\
-+	},								\
-+})
-+
-+/* bkey with inline value */
-+struct bkey_i {
-+	__u64			_data[0];
-+
-+	union {
-+	struct {
-+		/* Size of combined key and value, in u64s */
-+		__u8		u64s;
-+	};
-+	struct {
-+		struct bkey	k;
-+		struct bch_val	v;
-+	};
-+	};
-+};
-+
-+#define KEY(_inode, _offset, _size)					\
-+((struct bkey) {							\
-+	.u64s		= BKEY_U64s,					\
-+	.format		= KEY_FORMAT_CURRENT,				\
-+	.p		= POS(_inode, _offset),				\
-+	.size		= _size,					\
-+})
-+
-+static inline void bkey_init(struct bkey *k)
-+{
-+	*k = KEY(0, 0, 0);
-+}
-+
-+#define bkey_bytes(_k)		((_k)->u64s * sizeof(__u64))
-+
-+#define __BKEY_PADDED(key, pad)					\
-+	struct { struct bkey_i key; __u64 key ## _pad[pad]; }
-+
-+/*
-+ * - DELETED keys are used internally to mark keys that should be ignored but
-+ *   override keys in composition order.  Their version number is ignored.
-+ *
-+ * - DISCARDED keys indicate that the data is all 0s because it has been
-+ *   discarded. DISCARDs may have a version; if the version is nonzero the key
-+ *   will be persistent, otherwise the key will be dropped whenever the btree
-+ *   node is rewritten (like DELETED keys).
-+ *
-+ * - ERROR: any read of the data returns a read error, as the data was lost due
-+ *   to a failing device. Like DISCARDED keys, they can be removed (overridden)
-+ *   by new writes or cluster-wide GC. Node repair can also overwrite them with
-+ *   the same or a more recent version number, but not with an older version
-+ *   number.
-+ *
-+ * - WHITEOUT: for hash table btrees
-+*/
-+#define BCH_BKEY_TYPES()				\
-+	x(deleted,		0)			\
-+	x(discard,		1)			\
-+	x(error,		2)			\
-+	x(cookie,		3)			\
-+	x(whiteout,		4)			\
-+	x(btree_ptr,		5)			\
-+	x(extent,		6)			\
-+	x(reservation,		7)			\
-+	x(inode,		8)			\
-+	x(inode_generation,	9)			\
-+	x(dirent,		10)			\
-+	x(xattr,		11)			\
-+	x(alloc,		12)			\
-+	x(quota,		13)			\
-+	x(stripe,		14)			\
-+	x(reflink_p,		15)			\
-+	x(reflink_v,		16)			\
-+	x(inline_data,		17)			\
-+	x(btree_ptr_v2,		18)
-+
-+enum bch_bkey_type {
-+#define x(name, nr) KEY_TYPE_##name	= nr,
-+	BCH_BKEY_TYPES()
-+#undef x
-+	KEY_TYPE_MAX,
-+};
-+
-+struct bch_cookie {
-+	struct bch_val		v;
-+	__le64			cookie;
-+};
-+
-+/* Extents */
-+
-+/*
-+ * In extent bkeys, the value is a list of pointers (bch_extent_ptr), optionally
-+ * preceded by checksum/compression information (bch_extent_crc32 or
-+ * bch_extent_crc64).
-+ *
-+ * One major determining factor in the format of extents is how we handle and
-+ * represent extents that have been partially overwritten and thus trimmed:
-+ *
-+ * If an extent is not checksummed or compressed, when the extent is trimmed we
-+ * don't have to remember the extent we originally allocated and wrote: we can
-+ * merely adjust ptr->offset to point to the start of the data that is currently
-+ * live. The size field in struct bkey records the current (live) size of the
-+ * extent, and is also used to mean "size of region on disk that we point to" in
-+ * this case.
-+ *
-+ * Thus an extent that is not checksummed or compressed will consist only of a
-+ * list of bch_extent_ptrs, with none of the fields in
-+ * bch_extent_crc32/bch_extent_crc64.
-+ *
-+ * When an extent is checksummed or compressed, it's not possible to read only
-+ * the data that is currently live: we have to read the entire extent that was
-+ * originally written, and then return only the part of the extent that is
-+ * currently live.
-+ *
-+ * Thus, in addition to the current size of the extent in struct bkey, we need
-+ * to store the size of the originally allocated space - this is the
-+ * compressed_size and uncompressed_size fields in bch_extent_crc32/64. Also,
-+ * when the extent is trimmed, instead of modifying the offset field of the
-+ * pointer, we keep a second smaller offset field - "offset into the original
-+ * extent of the currently live region".
-+ *
-+ * The other major determining factor is replication and data migration:
-+ *
-+ * Each pointer may have its own bch_extent_crc32/64. When doing a replicated
-+ * write, we will initially write all the replicas in the same format, with the
-+ * same checksum type and compression format - however, when copygc runs later (or
-+ * tiering/cache promotion, anything that moves data), it is not in general
-+ * going to rewrite all the pointers at once - one of the replicas may be in a
-+ * bucket on one device that has very little fragmentation while another lives
-+ * in a bucket that has become heavily fragmented, and thus is being rewritten
-+ * sooner than the rest.
-+ *
-+ * Thus it will only move a subset of the pointers (or in the case of
-+ * tiering/cache promotion perhaps add a single pointer without dropping any
-+ * current pointers), and if the extent has been partially overwritten it must
-+ * write only the currently live portion (or copygc would not be able to reduce
-+ * fragmentation!) - which necessitates a different bch_extent_crc format for
-+ * the new pointer.
-+ *
-+ * But in the interests of space efficiency, we don't want to store one
-+ * bch_extent_crc for each pointer if we don't have to.
-+ *
-+ * Thus, a bch_extent consists of bch_extent_crc32s, bch_extent_crc64s, and
-+ * bch_extent_ptrs appended arbitrarily one after the other. We determine the
-+ * type of a given entry with a scheme similar to utf8 (except we're encoding a
-+ * type, not a size), encoding the type in the position of the first set bit:
-+ *
-+ * bch_extent_crc32	- 0b1
-+ * bch_extent_ptr	- 0b10
-+ * bch_extent_crc64	- 0b100
-+ *
-+ * We do it this way because bch_extent_crc32 is _very_ constrained on bits (and
-+ * bch_extent_crc64 is the least constrained).
-+ *
-+ * Then, each bch_extent_crc32/64 applies to the pointers that follow after it,
-+ * until the next bch_extent_crc32/64.
-+ *
-+ * If there are no bch_extent_crcs preceding a bch_extent_ptr, then that pointer
-+ * is neither checksummed nor compressed.
-+ */
-+
-+/* 128 bits, sufficient for cryptographic MACs: */
-+struct bch_csum {
-+	__le64			lo;
-+	__le64			hi;
-+} __attribute__((packed, aligned(8)));
-+
-+#define BCH_EXTENT_ENTRY_TYPES()		\
-+	x(ptr,			0)		\
-+	x(crc32,		1)		\
-+	x(crc64,		2)		\
-+	x(crc128,		3)		\
-+	x(stripe_ptr,		4)
-+#define BCH_EXTENT_ENTRY_MAX	5
-+
-+enum bch_extent_entry_type {
-+#define x(f, n) BCH_EXTENT_ENTRY_##f = n,
-+	BCH_EXTENT_ENTRY_TYPES()
-+#undef x
-+};
-+
-+/* Compressed/uncompressed size are stored biased by 1: */
-+struct bch_extent_crc32 {
-+#if defined(__LITTLE_ENDIAN_BITFIELD)
-+	__u32			type:2,
-+				_compressed_size:7,
-+				_uncompressed_size:7,
-+				offset:7,
-+				_unused:1,
-+				csum_type:4,
-+				compression_type:4;
-+	__u32			csum;
-+#elif defined (__BIG_ENDIAN_BITFIELD)
-+	__u32			csum;
-+	__u32			compression_type:4,
-+				csum_type:4,
-+				_unused:1,
-+				offset:7,
-+				_uncompressed_size:7,
-+				_compressed_size:7,
-+				type:2;
-+#endif
-+} __attribute__((packed, aligned(8)));
-+
-+#define CRC32_SIZE_MAX		(1U << 7)
-+#define CRC32_NONCE_MAX		0
-+
-+struct bch_extent_crc64 {
-+#if defined(__LITTLE_ENDIAN_BITFIELD)
-+	__u64			type:3,
-+				_compressed_size:9,
-+				_uncompressed_size:9,
-+				offset:9,
-+				nonce:10,
-+				csum_type:4,
-+				compression_type:4,
-+				csum_hi:16;
-+#elif defined (__BIG_ENDIAN_BITFIELD)
-+	__u64			csum_hi:16,
-+				compression_type:4,
-+				csum_type:4,
-+				nonce:10,
-+				offset:9,
-+				_uncompressed_size:9,
-+				_compressed_size:9,
-+				type:3;
-+#endif
-+	__u64			csum_lo;
-+} __attribute__((packed, aligned(8)));
-+
-+#define CRC64_SIZE_MAX		(1U << 9)
-+#define CRC64_NONCE_MAX		((1U << 10) - 1)
-+
-+struct bch_extent_crc128 {
-+#if defined(__LITTLE_ENDIAN_BITFIELD)
-+	__u64			type:4,
-+				_compressed_size:13,
-+				_uncompressed_size:13,
-+				offset:13,
-+				nonce:13,
-+				csum_type:4,
-+				compression_type:4;
-+#elif defined (__BIG_ENDIAN_BITFIELD)
-+	__u64			compression_type:4,
-+				csum_type:4,
-+				nonce:13,
-+				offset:13,
-+				_uncompressed_size:13,
-+				_compressed_size:13,
-+				type:4;
-+#endif
-+	struct bch_csum		csum;
-+} __attribute__((packed, aligned(8)));
-+
-+#define CRC128_SIZE_MAX		(1U << 13)
-+#define CRC128_NONCE_MAX	((1U << 13) - 1)
-+
-+/*
-+ * @reservation - pointer hasn't been written to, just reserved
-+ */
-+struct bch_extent_ptr {
-+#if defined(__LITTLE_ENDIAN_BITFIELD)
-+	__u64			type:1,
-+				cached:1,
-+				unused:1,
-+				reservation:1,
-+				offset:44, /* 8 petabytes */
-+				dev:8,
-+				gen:8;
-+#elif defined (__BIG_ENDIAN_BITFIELD)
-+	__u64			gen:8,
-+				dev:8,
-+				offset:44,
-+				reservation:1,
-+				unused:1,
-+				cached:1,
-+				type:1;
-+#endif
-+} __attribute__((packed, aligned(8)));
-+
-+struct bch_extent_stripe_ptr {
-+#if defined(__LITTLE_ENDIAN_BITFIELD)
-+	__u64			type:5,
-+				block:8,
-+				idx:51;
-+#elif defined (__BIG_ENDIAN_BITFIELD)
-+	__u64			idx:51,
-+				block:8,
-+				type:5;
-+#endif
-+};
-+
-+struct bch_extent_reservation {
-+#if defined(__LITTLE_ENDIAN_BITFIELD)
-+	__u64			type:6,
-+				unused:22,
-+				replicas:4,
-+				generation:32;
-+#elif defined (__BIG_ENDIAN_BITFIELD)
-+	__u64			generation:32,
-+				replicas:4,
-+				unused:22,
-+				type:6;
-+#endif
-+};
-+
-+union bch_extent_entry {
-+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ ||  __BITS_PER_LONG == 64
-+	unsigned long			type;
-+#elif __BITS_PER_LONG == 32
-+	struct {
-+		unsigned long		pad;
-+		unsigned long		type;
-+	};
-+#else
-+#error edit for your odd byteorder.
-+#endif
-+
-+#define x(f, n) struct bch_extent_##f	f;
-+	BCH_EXTENT_ENTRY_TYPES()
-+#undef x
-+};
-+
-+struct bch_btree_ptr {
-+	struct bch_val		v;
-+
-+	struct bch_extent_ptr	start[0];
-+	__u64			_data[0];
-+} __attribute__((packed, aligned(8)));
-+
-+struct bch_btree_ptr_v2 {
-+	struct bch_val		v;
-+
-+	__u64			mem_ptr;
-+	__le64			seq;
-+	__le16			sectors_written;
-+	/* In case we ever decide to do variable size btree nodes: */
-+	__le16			sectors;
-+	struct bpos		min_key;
-+	struct bch_extent_ptr	start[0];
-+	__u64			_data[0];
-+} __attribute__((packed, aligned(8)));
-+
-+struct bch_extent {
-+	struct bch_val		v;
-+
-+	union bch_extent_entry	start[0];
-+	__u64			_data[0];
-+} __attribute__((packed, aligned(8)));
-+
-+struct bch_reservation {
-+	struct bch_val		v;
-+
-+	__le32			generation;
-+	__u8			nr_replicas;
-+	__u8			pad[3];
-+} __attribute__((packed, aligned(8)));
-+
-+/* Maximum size (in u64s) a single pointer could be: */
-+#define BKEY_EXTENT_PTR_U64s_MAX\
-+	((sizeof(struct bch_extent_crc128) +			\
-+	  sizeof(struct bch_extent_ptr)) / sizeof(u64))
-+
-+/* Maximum possible size of an entire extent value: */
-+#define BKEY_EXTENT_VAL_U64s_MAX				\
-+	(1 + BKEY_EXTENT_PTR_U64s_MAX * (BCH_REPLICAS_MAX + 1))
-+
-+#define BKEY_PADDED(key)	__BKEY_PADDED(key, BKEY_EXTENT_VAL_U64s_MAX)
-+
-+/* * Maximum possible size of an entire extent, key + value: */
-+#define BKEY_EXTENT_U64s_MAX		(BKEY_U64s + BKEY_EXTENT_VAL_U64s_MAX)
-+
-+/* Btree pointers don't carry around checksums: */
-+#define BKEY_BTREE_PTR_VAL_U64s_MAX				\
-+	((sizeof(struct bch_btree_ptr_v2) +			\
-+	  sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX) / sizeof(u64))
-+#define BKEY_BTREE_PTR_U64s_MAX					\
-+	(BKEY_U64s + BKEY_BTREE_PTR_VAL_U64s_MAX)
-+
-+/* Inodes */
-+
-+#define BLOCKDEV_INODE_MAX	4096
-+
-+#define BCACHEFS_ROOT_INO	4096
-+
-+struct bch_inode {
-+	struct bch_val		v;
-+
-+	__le64			bi_hash_seed;
-+	__le32			bi_flags;
-+	__le16			bi_mode;
-+	__u8			fields[0];
-+} __attribute__((packed, aligned(8)));
-+
-+struct bch_inode_generation {
-+	struct bch_val		v;
-+
-+	__le32			bi_generation;
-+	__le32			pad;
-+} __attribute__((packed, aligned(8)));
-+
-+#define BCH_INODE_FIELDS()			\
-+	x(bi_atime,			64)	\
-+	x(bi_ctime,			64)	\
-+	x(bi_mtime,			64)	\
-+	x(bi_otime,			64)	\
-+	x(bi_size,			64)	\
-+	x(bi_sectors,			64)	\
-+	x(bi_uid,			32)	\
-+	x(bi_gid,			32)	\
-+	x(bi_nlink,			32)	\
-+	x(bi_generation,		32)	\
-+	x(bi_dev,			32)	\
-+	x(bi_data_checksum,		8)	\
-+	x(bi_compression,		8)	\
-+	x(bi_project,			32)	\
-+	x(bi_background_compression,	8)	\
-+	x(bi_data_replicas,		8)	\
-+	x(bi_promote_target,		16)	\
-+	x(bi_foreground_target,		16)	\
-+	x(bi_background_target,		16)	\
-+	x(bi_erasure_code,		16)	\
-+	x(bi_fields_set,		16)
-+
-+/* subset of BCH_INODE_FIELDS */
-+#define BCH_INODE_OPTS()			\
-+	x(data_checksum,		8)	\
-+	x(compression,			8)	\
-+	x(project,			32)	\
-+	x(background_compression,	8)	\
-+	x(data_replicas,		8)	\
-+	x(promote_target,		16)	\
-+	x(foreground_target,		16)	\
-+	x(background_target,		16)	\
-+	x(erasure_code,			16)
-+
-+enum inode_opt_id {
-+#define x(name, ...)				\
-+	Inode_opt_##name,
-+	BCH_INODE_OPTS()
-+#undef  x
-+	Inode_opt_nr,
-+};
-+
-+enum {
-+	/*
-+	 * User flags (get/settable with FS_IOC_*FLAGS, correspond to FS_*_FL
-+	 * flags)
-+	 */
-+	__BCH_INODE_SYNC	= 0,
-+	__BCH_INODE_IMMUTABLE	= 1,
-+	__BCH_INODE_APPEND	= 2,
-+	__BCH_INODE_NODUMP	= 3,
-+	__BCH_INODE_NOATIME	= 4,
-+
-+	__BCH_INODE_I_SIZE_DIRTY= 5,
-+	__BCH_INODE_I_SECTORS_DIRTY= 6,
-+	__BCH_INODE_UNLINKED	= 7,
-+
-+	/* bits 20+ reserved for packed fields below: */
-+};
-+
-+#define BCH_INODE_SYNC		(1 << __BCH_INODE_SYNC)
-+#define BCH_INODE_IMMUTABLE	(1 << __BCH_INODE_IMMUTABLE)
-+#define BCH_INODE_APPEND	(1 << __BCH_INODE_APPEND)
-+#define BCH_INODE_NODUMP	(1 << __BCH_INODE_NODUMP)
-+#define BCH_INODE_NOATIME	(1 << __BCH_INODE_NOATIME)
-+#define BCH_INODE_I_SIZE_DIRTY	(1 << __BCH_INODE_I_SIZE_DIRTY)
-+#define BCH_INODE_I_SECTORS_DIRTY (1 << __BCH_INODE_I_SECTORS_DIRTY)
-+#define BCH_INODE_UNLINKED	(1 << __BCH_INODE_UNLINKED)
-+
-+LE32_BITMASK(INODE_STR_HASH,	struct bch_inode, bi_flags, 20, 24);
-+LE32_BITMASK(INODE_NR_FIELDS,	struct bch_inode, bi_flags, 24, 32);
-+
-+/* Dirents */
-+
-+/*
-+ * Dirents (and xattrs) have to implement string lookups; since our b-tree
-+ * doesn't support arbitrary length strings for the key, we instead index by a
-+ * 64 bit hash (currently truncated sha1) of the string, stored in the offset
-+ * field of the key - using linear probing to resolve hash collisions. This also
-+ * provides us with the readdir cookie posix requires.
-+ *
-+ * Linear probing requires us to use whiteouts for deletions, in the event of a
-+ * collision:
-+ */
-+
-+struct bch_dirent {
-+	struct bch_val		v;
-+
-+	/* Target inode number: */
-+	__le64			d_inum;
-+
-+	/*
-+	 * Copy of mode bits 12-15 from the target inode - so userspace can get
-+	 * the filetype without having to do a stat()
-+	 */
-+	__u8			d_type;
-+
-+	__u8			d_name[];
-+} __attribute__((packed, aligned(8)));
-+
-+#define BCH_NAME_MAX	(U8_MAX * sizeof(u64) -				\
-+			 sizeof(struct bkey) -				\
-+			 offsetof(struct bch_dirent, d_name))
-+
-+
-+/* Xattrs */
-+
-+#define KEY_TYPE_XATTR_INDEX_USER			0
-+#define KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS	1
-+#define KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT	2
-+#define KEY_TYPE_XATTR_INDEX_TRUSTED			3
-+#define KEY_TYPE_XATTR_INDEX_SECURITY	        4
-+
-+struct bch_xattr {
-+	struct bch_val		v;
-+	__u8			x_type;
-+	__u8			x_name_len;
-+	__le16			x_val_len;
-+	__u8			x_name[];
-+} __attribute__((packed, aligned(8)));
-+
-+/* Bucket/allocation information: */
-+
-+struct bch_alloc {
-+	struct bch_val		v;
-+	__u8			fields;
-+	__u8			gen;
-+	__u8			data[];
-+} __attribute__((packed, aligned(8)));
-+
-+#define BCH_ALLOC_FIELDS()			\
-+	x(read_time,		16)		\
-+	x(write_time,		16)		\
-+	x(data_type,		8)		\
-+	x(dirty_sectors,	16)		\
-+	x(cached_sectors,	16)		\
-+	x(oldest_gen,		8)
-+
-+enum {
-+#define x(name, bytes) BCH_ALLOC_FIELD_##name,
-+	BCH_ALLOC_FIELDS()
-+#undef x
-+	BCH_ALLOC_FIELD_NR
-+};
-+
-+static const unsigned BCH_ALLOC_FIELD_BYTES[] = {
-+#define x(name, bits) [BCH_ALLOC_FIELD_##name] = bits / 8,
-+	BCH_ALLOC_FIELDS()
-+#undef x
-+};
-+
-+#define x(name, bits) + (bits / 8)
-+static const unsigned BKEY_ALLOC_VAL_U64s_MAX =
-+	DIV_ROUND_UP(offsetof(struct bch_alloc, data)
-+		     BCH_ALLOC_FIELDS(), sizeof(u64));
-+#undef x
-+
-+#define BKEY_ALLOC_U64s_MAX	(BKEY_U64s + BKEY_ALLOC_VAL_U64s_MAX)
-+
-+/* Quotas: */
-+
-+enum quota_types {
-+	QTYP_USR		= 0,
-+	QTYP_GRP		= 1,
-+	QTYP_PRJ		= 2,
-+	QTYP_NR			= 3,
-+};
-+
-+enum quota_counters {
-+	Q_SPC			= 0,
-+	Q_INO			= 1,
-+	Q_COUNTERS		= 2,
-+};
-+
-+struct bch_quota_counter {
-+	__le64			hardlimit;
-+	__le64			softlimit;
-+};
-+
-+struct bch_quota {
-+	struct bch_val		v;
-+	struct bch_quota_counter c[Q_COUNTERS];
-+} __attribute__((packed, aligned(8)));
-+
-+/* Erasure coding */
-+
-+struct bch_stripe {
-+	struct bch_val		v;
-+	__le16			sectors;
-+	__u8			algorithm;
-+	__u8			nr_blocks;
-+	__u8			nr_redundant;
-+
-+	__u8			csum_granularity_bits;
-+	__u8			csum_type;
-+	__u8			pad;
-+
-+	struct bch_extent_ptr	ptrs[0];
-+} __attribute__((packed, aligned(8)));
-+
-+/* Reflink: */
-+
-+struct bch_reflink_p {
-+	struct bch_val		v;
-+	__le64			idx;
-+
-+	__le32			reservation_generation;
-+	__u8			nr_replicas;
-+	__u8			pad[3];
-+};
-+
-+struct bch_reflink_v {
-+	struct bch_val		v;
-+	__le64			refcount;
-+	union bch_extent_entry	start[0];
-+	__u64			_data[0];
-+};
-+
-+/* Inline data */
-+
-+struct bch_inline_data {
-+	struct bch_val		v;
-+	u8			data[0];
-+};
-+
-+/* Optional/variable size superblock sections: */
-+
-+struct bch_sb_field {
-+	__u64			_data[0];
-+	__le32			u64s;
-+	__le32			type;
-+};
-+
-+#define BCH_SB_FIELDS()		\
-+	x(journal,	0)	\
-+	x(members,	1)	\
-+	x(crypt,	2)	\
-+	x(replicas_v0,	3)	\
-+	x(quota,	4)	\
-+	x(disk_groups,	5)	\
-+	x(clean,	6)	\
-+	x(replicas,	7)	\
-+	x(journal_seq_blacklist, 8)
-+
-+enum bch_sb_field_type {
-+#define x(f, nr)	BCH_SB_FIELD_##f = nr,
-+	BCH_SB_FIELDS()
-+#undef x
-+	BCH_SB_FIELD_NR
-+};
-+
-+/* BCH_SB_FIELD_journal: */
-+
-+struct bch_sb_field_journal {
-+	struct bch_sb_field	field;
-+	__le64			buckets[0];
-+};
-+
-+/* BCH_SB_FIELD_members: */
-+
-+#define BCH_MIN_NR_NBUCKETS	(1 << 6)
-+
-+struct bch_member {
-+	uuid_le			uuid;
-+	__le64			nbuckets;	/* device size */
-+	__le16			first_bucket;   /* index of first bucket used */
-+	__le16			bucket_size;	/* sectors */
-+	__le32			pad;
-+	__le64			last_mount;	/* time_t */
-+
-+	__le64			flags[2];
-+};
-+
-+LE64_BITMASK(BCH_MEMBER_STATE,		struct bch_member, flags[0],  0,  4)
-+/* 4-10 unused, was TIER, HAS_(META)DATA */
-+LE64_BITMASK(BCH_MEMBER_REPLACEMENT,	struct bch_member, flags[0], 10, 14)
-+LE64_BITMASK(BCH_MEMBER_DISCARD,	struct bch_member, flags[0], 14, 15)
-+LE64_BITMASK(BCH_MEMBER_DATA_ALLOWED,	struct bch_member, flags[0], 15, 20)
-+LE64_BITMASK(BCH_MEMBER_GROUP,		struct bch_member, flags[0], 20, 28)
-+LE64_BITMASK(BCH_MEMBER_DURABILITY,	struct bch_member, flags[0], 28, 30)
-+
-+#define BCH_TIER_MAX			4U
-+
-+#if 0
-+LE64_BITMASK(BCH_MEMBER_NR_READ_ERRORS,	struct bch_member, flags[1], 0,  20);
-+LE64_BITMASK(BCH_MEMBER_NR_WRITE_ERRORS,struct bch_member, flags[1], 20, 40);
-+#endif
-+
-+enum bch_member_state {
-+	BCH_MEMBER_STATE_RW		= 0,
-+	BCH_MEMBER_STATE_RO		= 1,
-+	BCH_MEMBER_STATE_FAILED		= 2,
-+	BCH_MEMBER_STATE_SPARE		= 3,
-+	BCH_MEMBER_STATE_NR		= 4,
-+};
-+
-+enum cache_replacement {
-+	CACHE_REPLACEMENT_LRU		= 0,
-+	CACHE_REPLACEMENT_FIFO		= 1,
-+	CACHE_REPLACEMENT_RANDOM	= 2,
-+	CACHE_REPLACEMENT_NR		= 3,
-+};
-+
-+struct bch_sb_field_members {
-+	struct bch_sb_field	field;
-+	struct bch_member	members[0];
-+};
-+
-+/* BCH_SB_FIELD_crypt: */
-+
-+struct nonce {
-+	__le32			d[4];
-+};
-+
-+struct bch_key {
-+	__le64			key[4];
-+};
-+
-+#define BCH_KEY_MAGIC					\
-+	(((u64) 'b' <<  0)|((u64) 'c' <<  8)|		\
-+	 ((u64) 'h' << 16)|((u64) '*' << 24)|		\
-+	 ((u64) '*' << 32)|((u64) 'k' << 40)|		\
-+	 ((u64) 'e' << 48)|((u64) 'y' << 56))
-+
-+struct bch_encrypted_key {
-+	__le64			magic;
-+	struct bch_key		key;
-+};
-+
-+/*
-+ * If this field is present in the superblock, it stores an encryption key which
-+ * is used encrypt all other data/metadata. The key will normally be encrypted
-+ * with the key userspace provides, but if encryption has been turned off we'll
-+ * just store the master key unencrypted in the superblock so we can access the
-+ * previously encrypted data.
-+ */
-+struct bch_sb_field_crypt {
-+	struct bch_sb_field	field;
-+
-+	__le64			flags;
-+	__le64			kdf_flags;
-+	struct bch_encrypted_key key;
-+};
-+
-+LE64_BITMASK(BCH_CRYPT_KDF_TYPE,	struct bch_sb_field_crypt, flags, 0, 4);
-+
-+enum bch_kdf_types {
-+	BCH_KDF_SCRYPT		= 0,
-+	BCH_KDF_NR		= 1,
-+};
-+
-+/* stored as base 2 log of scrypt params: */
-+LE64_BITMASK(BCH_KDF_SCRYPT_N,	struct bch_sb_field_crypt, kdf_flags,  0, 16);
-+LE64_BITMASK(BCH_KDF_SCRYPT_R,	struct bch_sb_field_crypt, kdf_flags, 16, 32);
-+LE64_BITMASK(BCH_KDF_SCRYPT_P,	struct bch_sb_field_crypt, kdf_flags, 32, 48);
-+
-+/* BCH_SB_FIELD_replicas: */
-+
-+#define BCH_DATA_TYPES()		\
-+	x(none,		0)		\
-+	x(sb,		1)		\
-+	x(journal,	2)		\
-+	x(btree,	3)		\
-+	x(user,		4)		\
-+	x(cached,	5)
-+
-+enum bch_data_type {
-+#define x(t, n) BCH_DATA_##t,
-+	BCH_DATA_TYPES()
-+#undef x
-+	BCH_DATA_NR
-+};
-+
-+struct bch_replicas_entry_v0 {
-+	__u8			data_type;
-+	__u8			nr_devs;
-+	__u8			devs[0];
-+} __attribute__((packed));
-+
-+struct bch_sb_field_replicas_v0 {
-+	struct bch_sb_field	field;
-+	struct bch_replicas_entry_v0 entries[0];
-+} __attribute__((packed, aligned(8)));
-+
-+struct bch_replicas_entry {
-+	__u8			data_type;
-+	__u8			nr_devs;
-+	__u8			nr_required;
-+	__u8			devs[0];
-+} __attribute__((packed));
-+
-+#define replicas_entry_bytes(_i)					\
-+	(offsetof(typeof(*(_i)), devs) + (_i)->nr_devs)
-+
-+struct bch_sb_field_replicas {
-+	struct bch_sb_field	field;
-+	struct bch_replicas_entry entries[0];
-+} __attribute__((packed, aligned(8)));
-+
-+/* BCH_SB_FIELD_quota: */
-+
-+struct bch_sb_quota_counter {
-+	__le32				timelimit;
-+	__le32				warnlimit;
-+};
-+
-+struct bch_sb_quota_type {
-+	__le64				flags;
-+	struct bch_sb_quota_counter	c[Q_COUNTERS];
-+};
-+
-+struct bch_sb_field_quota {
-+	struct bch_sb_field		field;
-+	struct bch_sb_quota_type	q[QTYP_NR];
-+} __attribute__((packed, aligned(8)));
-+
-+/* BCH_SB_FIELD_disk_groups: */
-+
-+#define BCH_SB_LABEL_SIZE		32
-+
-+struct bch_disk_group {
-+	__u8			label[BCH_SB_LABEL_SIZE];
-+	__le64			flags[2];
-+} __attribute__((packed, aligned(8)));
-+
-+LE64_BITMASK(BCH_GROUP_DELETED,		struct bch_disk_group, flags[0], 0,  1)
-+LE64_BITMASK(BCH_GROUP_DATA_ALLOWED,	struct bch_disk_group, flags[0], 1,  6)
-+LE64_BITMASK(BCH_GROUP_PARENT,		struct bch_disk_group, flags[0], 6, 24)
-+
-+struct bch_sb_field_disk_groups {
-+	struct bch_sb_field	field;
-+	struct bch_disk_group	entries[0];
-+} __attribute__((packed, aligned(8)));
-+
-+/*
-+ * On clean shutdown, store btree roots and current journal sequence number in
-+ * the superblock:
-+ */
-+struct jset_entry {
-+	__le16			u64s;
-+	__u8			btree_id;
-+	__u8			level;
-+	__u8			type; /* designates what this jset holds */
-+	__u8			pad[3];
-+
-+	union {
-+		struct bkey_i	start[0];
-+		__u64		_data[0];
-+	};
-+};
-+
-+struct bch_sb_field_clean {
-+	struct bch_sb_field	field;
-+
-+	__le32			flags;
-+	__le16			read_clock;
-+	__le16			write_clock;
-+	__le64			journal_seq;
-+
-+	union {
-+		struct jset_entry start[0];
-+		__u64		_data[0];
-+	};
-+};
-+
-+struct journal_seq_blacklist_entry {
-+	__le64			start;
-+	__le64			end;
-+};
-+
-+struct bch_sb_field_journal_seq_blacklist {
-+	struct bch_sb_field	field;
-+
-+	union {
-+		struct journal_seq_blacklist_entry start[0];
-+		__u64		_data[0];
-+	};
-+};
-+
-+/* Superblock: */
-+
-+/*
-+ * New versioning scheme:
-+ * One common version number for all on disk data structures - superblock, btree
-+ * nodes, journal entries
-+ */
-+#define BCH_JSET_VERSION_OLD			2
-+#define BCH_BSET_VERSION_OLD			3
-+
-+enum bcachefs_metadata_version {
-+	bcachefs_metadata_version_min			= 9,
-+	bcachefs_metadata_version_new_versioning	= 10,
-+	bcachefs_metadata_version_bkey_renumber		= 10,
-+	bcachefs_metadata_version_inode_btree_change	= 11,
-+	bcachefs_metadata_version_max			= 12,
-+};
-+
-+#define bcachefs_metadata_version_current	(bcachefs_metadata_version_max - 1)
-+
-+#define BCH_SB_SECTOR			8
-+#define BCH_SB_MEMBERS_MAX		64 /* XXX kill */
-+
-+struct bch_sb_layout {
-+	uuid_le			magic;	/* bcachefs superblock UUID */
-+	__u8			layout_type;
-+	__u8			sb_max_size_bits; /* base 2 of 512 byte sectors */
-+	__u8			nr_superblocks;
-+	__u8			pad[5];
-+	__le64			sb_offset[61];
-+} __attribute__((packed, aligned(8)));
-+
-+#define BCH_SB_LAYOUT_SECTOR	7
-+
-+/*
-+ * @offset	- sector where this sb was written
-+ * @version	- on disk format version
-+ * @version_min	- Oldest metadata version this filesystem contains; so we can
-+ *		  safely drop compatibility code and refuse to mount filesystems
-+ *		  we'd need it for
-+ * @magic	- identifies as a bcachefs superblock (BCACHE_MAGIC)
-+ * @seq		- incremented each time superblock is written
-+ * @uuid	- used for generating various magic numbers and identifying
-+ *                member devices, never changes
-+ * @user_uuid	- user visible UUID, may be changed
-+ * @label	- filesystem label
-+ * @seq		- identifies most recent superblock, incremented each time
-+ *		  superblock is written
-+ * @features	- enabled incompatible features
-+ */
-+struct bch_sb {
-+	struct bch_csum		csum;
-+	__le16			version;
-+	__le16			version_min;
-+	__le16			pad[2];
-+	uuid_le			magic;
-+	uuid_le			uuid;
-+	uuid_le			user_uuid;
-+	__u8			label[BCH_SB_LABEL_SIZE];
-+	__le64			offset;
-+	__le64			seq;
-+
-+	__le16			block_size;
-+	__u8			dev_idx;
-+	__u8			nr_devices;
-+	__le32			u64s;
-+
-+	__le64			time_base_lo;
-+	__le32			time_base_hi;
-+	__le32			time_precision;
-+
-+	__le64			flags[8];
-+	__le64			features[2];
-+	__le64			compat[2];
-+
-+	struct bch_sb_layout	layout;
-+
-+	union {
-+		struct bch_sb_field start[0];
-+		__le64		_data[0];
-+	};
-+} __attribute__((packed, aligned(8)));
-+
-+/*
-+ * Flags:
-+ * BCH_SB_INITALIZED	- set on first mount
-+ * BCH_SB_CLEAN		- did we shut down cleanly? Just a hint, doesn't affect
-+ *			  behaviour of mount/recovery path:
-+ * BCH_SB_INODE_32BIT	- limit inode numbers to 32 bits
-+ * BCH_SB_128_BIT_MACS	- 128 bit macs instead of 80
-+ * BCH_SB_ENCRYPTION_TYPE - if nonzero encryption is enabled; overrides
-+ *			   DATA/META_CSUM_TYPE. Also indicates encryption
-+ *			   algorithm in use, if/when we get more than one
-+ */
-+
-+LE16_BITMASK(BCH_SB_BLOCK_SIZE,		struct bch_sb, block_size, 0, 16);
-+
-+LE64_BITMASK(BCH_SB_INITIALIZED,	struct bch_sb, flags[0],  0,  1);
-+LE64_BITMASK(BCH_SB_CLEAN,		struct bch_sb, flags[0],  1,  2);
-+LE64_BITMASK(BCH_SB_CSUM_TYPE,		struct bch_sb, flags[0],  2,  8);
-+LE64_BITMASK(BCH_SB_ERROR_ACTION,	struct bch_sb, flags[0],  8, 12);
-+
-+LE64_BITMASK(BCH_SB_BTREE_NODE_SIZE,	struct bch_sb, flags[0], 12, 28);
-+
-+LE64_BITMASK(BCH_SB_GC_RESERVE,		struct bch_sb, flags[0], 28, 33);
-+LE64_BITMASK(BCH_SB_ROOT_RESERVE,	struct bch_sb, flags[0], 33, 40);
-+
-+LE64_BITMASK(BCH_SB_META_CSUM_TYPE,	struct bch_sb, flags[0], 40, 44);
-+LE64_BITMASK(BCH_SB_DATA_CSUM_TYPE,	struct bch_sb, flags[0], 44, 48);
-+
-+LE64_BITMASK(BCH_SB_META_REPLICAS_WANT,	struct bch_sb, flags[0], 48, 52);
-+LE64_BITMASK(BCH_SB_DATA_REPLICAS_WANT,	struct bch_sb, flags[0], 52, 56);
-+
-+LE64_BITMASK(BCH_SB_POSIX_ACL,		struct bch_sb, flags[0], 56, 57);
-+LE64_BITMASK(BCH_SB_USRQUOTA,		struct bch_sb, flags[0], 57, 58);
-+LE64_BITMASK(BCH_SB_GRPQUOTA,		struct bch_sb, flags[0], 58, 59);
-+LE64_BITMASK(BCH_SB_PRJQUOTA,		struct bch_sb, flags[0], 59, 60);
-+
-+LE64_BITMASK(BCH_SB_HAS_ERRORS,		struct bch_sb, flags[0], 60, 61);
-+
-+LE64_BITMASK(BCH_SB_REFLINK,		struct bch_sb, flags[0], 61, 62);
-+
-+/* 61-64 unused */
-+
-+LE64_BITMASK(BCH_SB_STR_HASH_TYPE,	struct bch_sb, flags[1],  0,  4);
-+LE64_BITMASK(BCH_SB_COMPRESSION_TYPE,	struct bch_sb, flags[1],  4,  8);
-+LE64_BITMASK(BCH_SB_INODE_32BIT,	struct bch_sb, flags[1],  8,  9);
-+
-+LE64_BITMASK(BCH_SB_128_BIT_MACS,	struct bch_sb, flags[1],  9, 10);
-+LE64_BITMASK(BCH_SB_ENCRYPTION_TYPE,	struct bch_sb, flags[1], 10, 14);
-+
-+/*
-+ * Max size of an extent that may require bouncing to read or write
-+ * (checksummed, compressed): 64k
-+ */
-+LE64_BITMASK(BCH_SB_ENCODED_EXTENT_MAX_BITS,
-+					struct bch_sb, flags[1], 14, 20);
-+
-+LE64_BITMASK(BCH_SB_META_REPLICAS_REQ,	struct bch_sb, flags[1], 20, 24);
-+LE64_BITMASK(BCH_SB_DATA_REPLICAS_REQ,	struct bch_sb, flags[1], 24, 28);
-+
-+LE64_BITMASK(BCH_SB_PROMOTE_TARGET,	struct bch_sb, flags[1], 28, 40);
-+LE64_BITMASK(BCH_SB_FOREGROUND_TARGET,	struct bch_sb, flags[1], 40, 52);
-+LE64_BITMASK(BCH_SB_BACKGROUND_TARGET,	struct bch_sb, flags[1], 52, 64);
-+
-+LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE,
-+					struct bch_sb, flags[2],  0,  4);
-+LE64_BITMASK(BCH_SB_GC_RESERVE_BYTES,	struct bch_sb, flags[2],  4, 64);
-+
-+LE64_BITMASK(BCH_SB_ERASURE_CODE,	struct bch_sb, flags[3],  0, 16);
-+
-+/*
-+ * Features:
-+ *
-+ * journal_seq_blacklist_v3:	gates BCH_SB_FIELD_journal_seq_blacklist
-+ * reflink:			gates KEY_TYPE_reflink
-+ * inline_data:			gates KEY_TYPE_inline_data
-+ * new_siphash:			gates BCH_STR_HASH_SIPHASH
-+ * new_extent_overwrite:	gates BTREE_NODE_NEW_EXTENT_OVERWRITE
-+ */
-+#define BCH_SB_FEATURES()			\
-+	x(lz4,				0)	\
-+	x(gzip,				1)	\
-+	x(zstd,				2)	\
-+	x(atomic_nlink,			3)	\
-+	x(ec,				4)	\
-+	x(journal_seq_blacklist_v3,	5)	\
-+	x(reflink,			6)	\
-+	x(new_siphash,			7)	\
-+	x(inline_data,			8)	\
-+	x(new_extent_overwrite,		9)	\
-+	x(incompressible,		10)	\
-+	x(btree_ptr_v2,			11)	\
-+	x(extents_above_btree_updates,	12)	\
-+	x(btree_updates_journalled,	13)
-+
-+#define BCH_SB_FEATURES_ALL				\
-+	((1ULL << BCH_FEATURE_new_siphash)|		\
-+	 (1ULL << BCH_FEATURE_new_extent_overwrite)|	\
-+	 (1ULL << BCH_FEATURE_btree_ptr_v2)|		\
-+	 (1ULL << BCH_FEATURE_extents_above_btree_updates))
-+
-+enum bch_sb_feature {
-+#define x(f, n) BCH_FEATURE_##f,
-+	BCH_SB_FEATURES()
-+#undef x
-+	BCH_FEATURE_NR,
-+};
-+
-+enum bch_sb_compat {
-+	BCH_COMPAT_FEAT_ALLOC_INFO	= 0,
-+	BCH_COMPAT_FEAT_ALLOC_METADATA	= 1,
-+};
-+
-+/* options: */
-+
-+#define BCH_REPLICAS_MAX		4U
-+
-+enum bch_error_actions {
-+	BCH_ON_ERROR_CONTINUE		= 0,
-+	BCH_ON_ERROR_RO			= 1,
-+	BCH_ON_ERROR_PANIC		= 2,
-+	BCH_NR_ERROR_ACTIONS		= 3,
-+};
-+
-+enum bch_str_hash_type {
-+	BCH_STR_HASH_CRC32C		= 0,
-+	BCH_STR_HASH_CRC64		= 1,
-+	BCH_STR_HASH_SIPHASH_OLD	= 2,
-+	BCH_STR_HASH_SIPHASH		= 3,
-+	BCH_STR_HASH_NR			= 4,
-+};
-+
-+enum bch_str_hash_opts {
-+	BCH_STR_HASH_OPT_CRC32C		= 0,
-+	BCH_STR_HASH_OPT_CRC64		= 1,
-+	BCH_STR_HASH_OPT_SIPHASH	= 2,
-+	BCH_STR_HASH_OPT_NR		= 3,
-+};
-+
-+enum bch_csum_type {
-+	BCH_CSUM_NONE			= 0,
-+	BCH_CSUM_CRC32C_NONZERO		= 1,
-+	BCH_CSUM_CRC64_NONZERO		= 2,
-+	BCH_CSUM_CHACHA20_POLY1305_80	= 3,
-+	BCH_CSUM_CHACHA20_POLY1305_128	= 4,
-+	BCH_CSUM_CRC32C			= 5,
-+	BCH_CSUM_CRC64			= 6,
-+	BCH_CSUM_NR			= 7,
-+};
-+
-+static const unsigned bch_crc_bytes[] = {
-+	[BCH_CSUM_NONE]				= 0,
-+	[BCH_CSUM_CRC32C_NONZERO]		= 4,
-+	[BCH_CSUM_CRC32C]			= 4,
-+	[BCH_CSUM_CRC64_NONZERO]		= 8,
-+	[BCH_CSUM_CRC64]			= 8,
-+	[BCH_CSUM_CHACHA20_POLY1305_80]		= 10,
-+	[BCH_CSUM_CHACHA20_POLY1305_128]	= 16,
-+};
-+
-+static inline _Bool bch2_csum_type_is_encryption(enum bch_csum_type type)
-+{
-+	switch (type) {
-+	case BCH_CSUM_CHACHA20_POLY1305_80:
-+	case BCH_CSUM_CHACHA20_POLY1305_128:
-+		return true;
-+	default:
-+		return false;
-+	}
-+}
-+
-+enum bch_csum_opts {
-+	BCH_CSUM_OPT_NONE		= 0,
-+	BCH_CSUM_OPT_CRC32C		= 1,
-+	BCH_CSUM_OPT_CRC64		= 2,
-+	BCH_CSUM_OPT_NR			= 3,
-+};
-+
-+#define BCH_COMPRESSION_TYPES()		\
-+	x(none,			0)	\
-+	x(lz4_old,		1)	\
-+	x(gzip,			2)	\
-+	x(lz4,			3)	\
-+	x(zstd,			4)	\
-+	x(incompressible,	5)
-+
-+enum bch_compression_type {
-+#define x(t, n) BCH_COMPRESSION_TYPE_##t,
-+	BCH_COMPRESSION_TYPES()
-+#undef x
-+	BCH_COMPRESSION_TYPE_NR
-+};
-+
-+#define BCH_COMPRESSION_OPTS()		\
-+	x(none,		0)		\
-+	x(lz4,		1)		\
-+	x(gzip,		2)		\
-+	x(zstd,		3)
-+
-+enum bch_compression_opts {
-+#define x(t, n) BCH_COMPRESSION_OPT_##t,
-+	BCH_COMPRESSION_OPTS()
-+#undef x
-+	BCH_COMPRESSION_OPT_NR
-+};
-+
-+/*
-+ * Magic numbers
-+ *
-+ * The various other data structures have their own magic numbers, which are
-+ * xored with the first part of the cache set's UUID
-+ */
-+
-+#define BCACHE_MAGIC							\
-+	UUID_LE(0xf67385c6, 0x1a4e, 0xca45,				\
-+		0x82, 0x65, 0xf5, 0x7f, 0x48, 0xba, 0x6d, 0x81)
-+
-+#define BCACHEFS_STATFS_MAGIC		0xca451a4e
-+
-+#define JSET_MAGIC		__cpu_to_le64(0x245235c1a3625032ULL)
-+#define BSET_MAGIC		__cpu_to_le64(0x90135c78b99e07f5ULL)
-+
-+static inline __le64 __bch2_sb_magic(struct bch_sb *sb)
-+{
-+	__le64 ret;
-+	memcpy(&ret, &sb->uuid, sizeof(ret));
-+	return ret;
-+}
-+
-+static inline __u64 __jset_magic(struct bch_sb *sb)
-+{
-+	return __le64_to_cpu(__bch2_sb_magic(sb) ^ JSET_MAGIC);
-+}
-+
-+static inline __u64 __bset_magic(struct bch_sb *sb)
-+{
-+	return __le64_to_cpu(__bch2_sb_magic(sb) ^ BSET_MAGIC);
-+}
-+
-+/* Journal */
-+
-+#define JSET_KEYS_U64s	(sizeof(struct jset_entry) / sizeof(__u64))
-+
-+#define BCH_JSET_ENTRY_TYPES()			\
-+	x(btree_keys,		0)		\
-+	x(btree_root,		1)		\
-+	x(prio_ptrs,		2)		\
-+	x(blacklist,		3)		\
-+	x(blacklist_v2,		4)		\
-+	x(usage,		5)		\
-+	x(data_usage,		6)
-+
-+enum {
-+#define x(f, nr)	BCH_JSET_ENTRY_##f	= nr,
-+	BCH_JSET_ENTRY_TYPES()
-+#undef x
-+	BCH_JSET_ENTRY_NR
-+};
-+
-+/*
-+ * Journal sequence numbers can be blacklisted: bsets record the max sequence
-+ * number of all the journal entries they contain updates for, so that on
-+ * recovery we can ignore those bsets that contain index updates newer that what
-+ * made it into the journal.
-+ *
-+ * This means that we can't reuse that journal_seq - we have to skip it, and
-+ * then record that we skipped it so that the next time we crash and recover we
-+ * don't think there was a missing journal entry.
-+ */
-+struct jset_entry_blacklist {
-+	struct jset_entry	entry;
-+	__le64			seq;
-+};
-+
-+struct jset_entry_blacklist_v2 {
-+	struct jset_entry	entry;
-+	__le64			start;
-+	__le64			end;
-+};
-+
-+enum {
-+	FS_USAGE_RESERVED		= 0,
-+	FS_USAGE_INODES			= 1,
-+	FS_USAGE_KEY_VERSION		= 2,
-+	FS_USAGE_NR			= 3
-+};
-+
-+struct jset_entry_usage {
-+	struct jset_entry	entry;
-+	__le64			v;
-+} __attribute__((packed));
-+
-+struct jset_entry_data_usage {
-+	struct jset_entry	entry;
-+	__le64			v;
-+	struct bch_replicas_entry r;
-+} __attribute__((packed));
-+
-+/*
-+ * On disk format for a journal entry:
-+ * seq is monotonically increasing; every journal entry has its own unique
-+ * sequence number.
-+ *
-+ * last_seq is the oldest journal entry that still has keys the btree hasn't
-+ * flushed to disk yet.
-+ *
-+ * version is for on disk format changes.
-+ */
-+struct jset {
-+	struct bch_csum		csum;
-+
-+	__le64			magic;
-+	__le64			seq;
-+	__le32			version;
-+	__le32			flags;
-+
-+	__le32			u64s; /* size of d[] in u64s */
-+
-+	__u8			encrypted_start[0];
-+
-+	__le16			read_clock;
-+	__le16			write_clock;
-+
-+	/* Sequence number of oldest dirty journal entry */
-+	__le64			last_seq;
-+
-+
-+	union {
-+		struct jset_entry start[0];
-+		__u64		_data[0];
-+	};
-+} __attribute__((packed, aligned(8)));
-+
-+LE32_BITMASK(JSET_CSUM_TYPE,	struct jset, flags, 0, 4);
-+LE32_BITMASK(JSET_BIG_ENDIAN,	struct jset, flags, 4, 5);
-+
-+#define BCH_JOURNAL_BUCKETS_MIN		8
-+
-+/* Btree: */
-+
-+#define BCH_BTREE_IDS()					\
-+	x(EXTENTS,	0, "extents")			\
-+	x(INODES,	1, "inodes")			\
-+	x(DIRENTS,	2, "dirents")			\
-+	x(XATTRS,	3, "xattrs")			\
-+	x(ALLOC,	4, "alloc")			\
-+	x(QUOTAS,	5, "quotas")			\
-+	x(EC,		6, "stripes")			\
-+	x(REFLINK,	7, "reflink")
-+
-+enum btree_id {
-+#define x(kwd, val, name) BTREE_ID_##kwd = val,
-+	BCH_BTREE_IDS()
-+#undef x
-+	BTREE_ID_NR
-+};
-+
-+#define BTREE_MAX_DEPTH		4U
-+
-+/* Btree nodes */
-+
-+/*
-+ * Btree nodes
-+ *
-+ * On disk a btree node is a list/log of these; within each set the keys are
-+ * sorted
-+ */
-+struct bset {
-+	__le64			seq;
-+
-+	/*
-+	 * Highest journal entry this bset contains keys for.
-+	 * If on recovery we don't see that journal entry, this bset is ignored:
-+	 * this allows us to preserve the order of all index updates after a
-+	 * crash, since the journal records a total order of all index updates
-+	 * and anything that didn't make it to the journal doesn't get used.
-+	 */
-+	__le64			journal_seq;
-+
-+	__le32			flags;
-+	__le16			version;
-+	__le16			u64s; /* count of d[] in u64s */
-+
-+	union {
-+		struct bkey_packed start[0];
-+		__u64		_data[0];
-+	};
-+} __attribute__((packed, aligned(8)));
-+
-+LE32_BITMASK(BSET_CSUM_TYPE,	struct bset, flags, 0, 4);
-+
-+LE32_BITMASK(BSET_BIG_ENDIAN,	struct bset, flags, 4, 5);
-+LE32_BITMASK(BSET_SEPARATE_WHITEOUTS,
-+				struct bset, flags, 5, 6);
-+
-+struct btree_node {
-+	struct bch_csum		csum;
-+	__le64			magic;
-+
-+	/* this flags field is encrypted, unlike bset->flags: */
-+	__le64			flags;
-+
-+	/* Closed interval: */
-+	struct bpos		min_key;
-+	struct bpos		max_key;
-+	struct bch_extent_ptr	ptr;
-+	struct bkey_format	format;
-+
-+	union {
-+	struct bset		keys;
-+	struct {
-+		__u8		pad[22];
-+		__le16		u64s;
-+		__u64		_data[0];
-+
-+	};
-+	};
-+} __attribute__((packed, aligned(8)));
-+
-+LE64_BITMASK(BTREE_NODE_ID,	struct btree_node, flags,  0,  4);
-+LE64_BITMASK(BTREE_NODE_LEVEL,	struct btree_node, flags,  4,  8);
-+LE64_BITMASK(BTREE_NODE_NEW_EXTENT_OVERWRITE,
-+				struct btree_node, flags,  8,  9);
-+/* 9-32 unused */
-+LE64_BITMASK(BTREE_NODE_SEQ,	struct btree_node, flags, 32, 64);
-+
-+struct btree_node_entry {
-+	struct bch_csum		csum;
-+
-+	union {
-+	struct bset		keys;
-+	struct {
-+		__u8		pad[22];
-+		__le16		u64s;
-+		__u64		_data[0];
-+
-+	};
-+	};
-+} __attribute__((packed, aligned(8)));
-+
-+#endif /* _BCACHEFS_FORMAT_H */
-diff --git a/fs/bcachefs/bcachefs_ioctl.h b/fs/bcachefs/bcachefs_ioctl.h
-new file mode 100644
-index 000000000000..d71157a3e073
---- /dev/null
-+++ b/fs/bcachefs/bcachefs_ioctl.h
-@@ -0,0 +1,332 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_IOCTL_H
-+#define _BCACHEFS_IOCTL_H
-+
-+#include <linux/uuid.h>
-+#include <asm/ioctl.h>
-+#include "bcachefs_format.h"
-+
-+/*
-+ * Flags common to multiple ioctls:
-+ */
-+#define BCH_FORCE_IF_DATA_LOST		(1 << 0)
-+#define BCH_FORCE_IF_METADATA_LOST	(1 << 1)
-+#define BCH_FORCE_IF_DATA_DEGRADED	(1 << 2)
-+#define BCH_FORCE_IF_METADATA_DEGRADED	(1 << 3)
-+
-+#define BCH_FORCE_IF_DEGRADED			\
-+	(BCH_FORCE_IF_DATA_DEGRADED|		\
-+	 BCH_FORCE_IF_METADATA_DEGRADED)
-+
-+/*
-+ * If cleared, ioctl that refer to a device pass it as a pointer to a pathname
-+ * (e.g. /dev/sda1); if set, the dev field is the device's index within the
-+ * filesystem:
-+ */
-+#define BCH_BY_INDEX			(1 << 4)
-+
-+/*
-+ * For BCH_IOCTL_READ_SUPER: get superblock of a specific device, not filesystem
-+ * wide superblock:
-+ */
-+#define BCH_READ_DEV			(1 << 5)
-+
-+/* global control dev: */
-+
-+/* These are currently broken, and probably unnecessary: */
-+#if 0
-+#define BCH_IOCTL_ASSEMBLE	_IOW(0xbc, 1, struct bch_ioctl_assemble)
-+#define BCH_IOCTL_INCREMENTAL	_IOW(0xbc, 2, struct bch_ioctl_incremental)
-+
-+struct bch_ioctl_assemble {
-+	__u32			flags;
-+	__u32			nr_devs;
-+	__u64			pad;
-+	__u64			devs[];
-+};
-+
-+struct bch_ioctl_incremental {
-+	__u32			flags;
-+	__u64			pad;
-+	__u64			dev;
-+};
-+#endif
-+
-+/* filesystem ioctls: */
-+
-+#define BCH_IOCTL_QUERY_UUID	_IOR(0xbc,	1,  struct bch_ioctl_query_uuid)
-+
-+/* These only make sense when we also have incremental assembly */
-+#if 0
-+#define BCH_IOCTL_START		_IOW(0xbc,	2,  struct bch_ioctl_start)
-+#define BCH_IOCTL_STOP		_IO(0xbc,	3)
-+#endif
-+
-+#define BCH_IOCTL_DISK_ADD	_IOW(0xbc,	4,  struct bch_ioctl_disk)
-+#define BCH_IOCTL_DISK_REMOVE	_IOW(0xbc,	5,  struct bch_ioctl_disk)
-+#define BCH_IOCTL_DISK_ONLINE	_IOW(0xbc,	6,  struct bch_ioctl_disk)
-+#define BCH_IOCTL_DISK_OFFLINE	_IOW(0xbc,	7,  struct bch_ioctl_disk)
-+#define BCH_IOCTL_DISK_SET_STATE _IOW(0xbc,	8,  struct bch_ioctl_disk_set_state)
-+#define BCH_IOCTL_DATA		_IOW(0xbc,	10, struct bch_ioctl_data)
-+#define BCH_IOCTL_FS_USAGE	_IOWR(0xbc,	11, struct bch_ioctl_fs_usage)
-+#define BCH_IOCTL_DEV_USAGE	_IOWR(0xbc,	11, struct bch_ioctl_dev_usage)
-+#define BCH_IOCTL_READ_SUPER	_IOW(0xbc,	12, struct bch_ioctl_read_super)
-+#define BCH_IOCTL_DISK_GET_IDX	_IOW(0xbc,	13,  struct bch_ioctl_disk_get_idx)
-+#define BCH_IOCTL_DISK_RESIZE	_IOW(0xbc,	14,  struct bch_ioctl_disk_resize)
-+
-+/* ioctl below act on a particular file, not the filesystem as a whole: */
-+
-+#define BCHFS_IOC_REINHERIT_ATTRS	_IOR(0xbc, 64, const char __user *)
-+
-+/*
-+ * BCH_IOCTL_QUERY_UUID: get filesystem UUID
-+ *
-+ * Returns user visible UUID, not internal UUID (which may not ever be changed);
-+ * the filesystem's sysfs directory may be found under /sys/fs/bcachefs with
-+ * this UUID.
-+ */
-+struct bch_ioctl_query_uuid {
-+	uuid_le			uuid;
-+};
-+
-+#if 0
-+struct bch_ioctl_start {
-+	__u32			flags;
-+	__u32			pad;
-+};
-+#endif
-+
-+/*
-+ * BCH_IOCTL_DISK_ADD: add a new device to an existing filesystem
-+ *
-+ * The specified device must not be open or in use. On success, the new device
-+ * will be an online member of the filesystem just like any other member.
-+ *
-+ * The device must first be prepared by userspace by formatting with a bcachefs
-+ * superblock, which is only used for passing in superblock options/parameters
-+ * for that device (in struct bch_member). The new device's superblock should
-+ * not claim to be a member of any existing filesystem - UUIDs on it will be
-+ * ignored.
-+ */
-+
-+/*
-+ * BCH_IOCTL_DISK_REMOVE: permanently remove a member device from a filesystem
-+ *
-+ * Any data present on @dev will be permanently deleted, and @dev will be
-+ * removed from its slot in the filesystem's list of member devices. The device
-+ * may be either offline or offline.
-+ *
-+ * Will fail removing @dev would leave us with insufficient read write devices
-+ * or degraded/unavailable data, unless the approprate BCH_FORCE_IF_* flags are
-+ * set.
-+ */
-+
-+/*
-+ * BCH_IOCTL_DISK_ONLINE: given a disk that is already a member of a filesystem
-+ * but is not open (e.g. because we started in degraded mode), bring it online
-+ *
-+ * all existing data on @dev will be available once the device is online,
-+ * exactly as if @dev was present when the filesystem was first mounted
-+ */
-+
-+/*
-+ * BCH_IOCTL_DISK_OFFLINE: offline a disk, causing the kernel to close that
-+ * block device, without removing it from the filesystem (so it can be brought
-+ * back online later)
-+ *
-+ * Data present on @dev will be unavailable while @dev is offline (unless
-+ * replicated), but will still be intact and untouched if @dev is brought back
-+ * online
-+ *
-+ * Will fail (similarly to BCH_IOCTL_DISK_SET_STATE) if offlining @dev would
-+ * leave us with insufficient read write devices or degraded/unavailable data,
-+ * unless the approprate BCH_FORCE_IF_* flags are set.
-+ */
-+
-+struct bch_ioctl_disk {
-+	__u32			flags;
-+	__u32			pad;
-+	__u64			dev;
-+};
-+
-+/*
-+ * BCH_IOCTL_DISK_SET_STATE: modify state of a member device of a filesystem
-+ *
-+ * @new_state		- one of the bch_member_state states (rw, ro, failed,
-+ *			  spare)
-+ *
-+ * Will refuse to change member state if we would then have insufficient devices
-+ * to write to, or if it would result in degraded data (when @new_state is
-+ * failed or spare) unless the appropriate BCH_FORCE_IF_* flags are set.
-+ */
-+struct bch_ioctl_disk_set_state {
-+	__u32			flags;
-+	__u8			new_state;
-+	__u8			pad[3];
-+	__u64			dev;
-+};
-+
-+enum bch_data_ops {
-+	BCH_DATA_OP_SCRUB	= 0,
-+	BCH_DATA_OP_REREPLICATE	= 1,
-+	BCH_DATA_OP_MIGRATE	= 2,
-+	BCH_DATA_OP_NR		= 3,
-+};
-+
-+/*
-+ * BCH_IOCTL_DATA: operations that walk and manipulate filesystem data (e.g.
-+ * scrub, rereplicate, migrate).
-+ *
-+ * This ioctl kicks off a job in the background, and returns a file descriptor.
-+ * Reading from the file descriptor returns a struct bch_ioctl_data_event,
-+ * indicating current progress, and closing the file descriptor will stop the
-+ * job. The file descriptor is O_CLOEXEC.
-+ */
-+struct bch_ioctl_data {
-+	__u32			op;
-+	__u32			flags;
-+
-+	struct bpos		start;
-+	struct bpos		end;
-+
-+	union {
-+	struct {
-+		__u32		dev;
-+		__u32		pad;
-+	}			migrate;
-+	struct {
-+		__u64		pad[8];
-+	};
-+	};
-+} __attribute__((packed, aligned(8)));
-+
-+enum bch_data_event {
-+	BCH_DATA_EVENT_PROGRESS	= 0,
-+	/* XXX: add an event for reporting errors */
-+	BCH_DATA_EVENT_NR	= 1,
-+};
-+
-+struct bch_ioctl_data_progress {
-+	__u8			data_type;
-+	__u8			btree_id;
-+	__u8			pad[2];
-+	struct bpos		pos;
-+
-+	__u64			sectors_done;
-+	__u64			sectors_total;
-+} __attribute__((packed, aligned(8)));
-+
-+struct bch_ioctl_data_event {
-+	__u8			type;
-+	__u8			pad[7];
-+	union {
-+	struct bch_ioctl_data_progress p;
-+	__u64			pad2[15];
-+	};
-+} __attribute__((packed, aligned(8)));
-+
-+struct bch_replicas_usage {
-+	__u64			sectors;
-+	struct bch_replicas_entry r;
-+} __attribute__((packed));
-+
-+static inline struct bch_replicas_usage *
-+replicas_usage_next(struct bch_replicas_usage *u)
-+{
-+	return (void *) u + replicas_entry_bytes(&u->r) + 8;
-+}
-+
-+/*
-+ * BCH_IOCTL_FS_USAGE: query filesystem disk space usage
-+ *
-+ * Returns disk space usage broken out by data type, number of replicas, and
-+ * by component device
-+ *
-+ * @replica_entries_bytes - size, in bytes, allocated for replica usage entries
-+ *
-+ * On success, @replica_entries_bytes will be changed to indicate the number of
-+ * bytes actually used.
-+ *
-+ * Returns -ERANGE if @replica_entries_bytes was too small
-+ */
-+struct bch_ioctl_fs_usage {
-+	__u64			capacity;
-+	__u64			used;
-+	__u64			online_reserved;
-+	__u64			persistent_reserved[BCH_REPLICAS_MAX];
-+
-+	__u32			replica_entries_bytes;
-+	__u32			pad;
-+
-+	struct bch_replicas_usage replicas[0];
-+};
-+
-+/*
-+ * BCH_IOCTL_DEV_USAGE: query device disk space usage
-+ *
-+ * Returns disk space usage broken out by data type - both by buckets and
-+ * sectors.
-+ */
-+struct bch_ioctl_dev_usage {
-+	__u64			dev;
-+	__u32			flags;
-+	__u8			state;
-+	__u8			pad[7];
-+
-+	__u32			bucket_size;
-+	__u64			nr_buckets;
-+	__u64			available_buckets;
-+
-+	__u64			buckets[BCH_DATA_NR];
-+	__u64			sectors[BCH_DATA_NR];
-+
-+	__u64			ec_buckets;
-+	__u64			ec_sectors;
-+};
-+
-+/*
-+ * BCH_IOCTL_READ_SUPER: read filesystem superblock
-+ *
-+ * Equivalent to reading the superblock directly from the block device, except
-+ * avoids racing with the kernel writing the superblock or having to figure out
-+ * which block device to read
-+ *
-+ * @sb		- buffer to read into
-+ * @size	- size of userspace allocated buffer
-+ * @dev		- device to read superblock for, if BCH_READ_DEV flag is
-+ *		  specified
-+ *
-+ * Returns -ERANGE if buffer provided is too small
-+ */
-+struct bch_ioctl_read_super {
-+	__u32			flags;
-+	__u32			pad;
-+	__u64			dev;
-+	__u64			size;
-+	__u64			sb;
-+};
-+
-+/*
-+ * BCH_IOCTL_DISK_GET_IDX: give a path to a block device, query filesystem to
-+ * determine if disk is a (online) member - if so, returns device's index
-+ *
-+ * Returns -ENOENT if not found
-+ */
-+struct bch_ioctl_disk_get_idx {
-+	__u64			dev;
-+};
-+
-+/*
-+ * BCH_IOCTL_DISK_RESIZE: resize filesystem on a device
-+ *
-+ * @dev		- member to resize
-+ * @nbuckets	- new number of buckets
-+ */
-+struct bch_ioctl_disk_resize {
-+	__u32			flags;
-+	__u32			pad;
-+	__u64			dev;
-+	__u64			nbuckets;
-+};
-+
-+#endif /* _BCACHEFS_IOCTL_H */
-diff --git a/fs/bcachefs/bkey.c b/fs/bcachefs/bkey.c
-new file mode 100644
-index 000000000000..4d0c9129cd4a
---- /dev/null
-+++ b/fs/bcachefs/bkey.c
-@@ -0,0 +1,1154 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "bkey.h"
-+#include "bkey_methods.h"
-+#include "bset.h"
-+#include "util.h"
-+
-+#undef EBUG_ON
-+
-+#ifdef DEBUG_BKEYS
-+#define EBUG_ON(cond)		BUG_ON(cond)
-+#else
-+#define EBUG_ON(cond)
-+#endif
-+
-+const struct bkey_format bch2_bkey_format_current = BKEY_FORMAT_CURRENT;
-+
-+struct bkey __bch2_bkey_unpack_key(const struct bkey_format *,
-+			      const struct bkey_packed *);
-+
-+void bch2_to_binary(char *out, const u64 *p, unsigned nr_bits)
-+{
-+	unsigned bit = high_bit_offset, done = 0;
-+
-+	while (1) {
-+		while (bit < 64) {
-+			if (done && !(done % 8))
-+				*out++ = ' ';
-+			*out++ = *p & (1ULL << (63 - bit)) ? '1' : '0';
-+			bit++;
-+			done++;
-+			if (done == nr_bits) {
-+				*out++ = '\0';
-+				return;
-+			}
-+		}
-+
-+		p = next_word(p);
-+		bit = 0;
-+	}
-+}
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+
-+static void bch2_bkey_pack_verify(const struct bkey_packed *packed,
-+				 const struct bkey *unpacked,
-+				 const struct bkey_format *format)
-+{
-+	struct bkey tmp;
-+
-+	BUG_ON(bkeyp_val_u64s(format, packed) !=
-+	       bkey_val_u64s(unpacked));
-+
-+	BUG_ON(packed->u64s < bkeyp_key_u64s(format, packed));
-+
-+	tmp = __bch2_bkey_unpack_key(format, packed);
-+
-+	if (memcmp(&tmp, unpacked, sizeof(struct bkey))) {
-+		char buf1[160], buf2[160];
-+		char buf3[160], buf4[160];
-+
-+		bch2_bkey_to_text(&PBUF(buf1), unpacked);
-+		bch2_bkey_to_text(&PBUF(buf2), &tmp);
-+		bch2_to_binary(buf3, (void *) unpacked, 80);
-+		bch2_to_binary(buf4, high_word(format, packed), 80);
-+
-+		panic("keys differ: format u64s %u fields %u %u %u %u %u\n%s\n%s\n%s\n%s\n",
-+		      format->key_u64s,
-+		      format->bits_per_field[0],
-+		      format->bits_per_field[1],
-+		      format->bits_per_field[2],
-+		      format->bits_per_field[3],
-+		      format->bits_per_field[4],
-+		      buf1, buf2, buf3, buf4);
-+	}
-+}
-+
-+#else
-+static inline void bch2_bkey_pack_verify(const struct bkey_packed *packed,
-+					const struct bkey *unpacked,
-+					const struct bkey_format *format) {}
-+#endif
-+
-+struct pack_state {
-+	const struct bkey_format *format;
-+	unsigned		bits;	/* bits remaining in current word */
-+	u64			w;	/* current word */
-+	u64			*p;	/* pointer to next word */
-+};
-+
-+__always_inline
-+static struct pack_state pack_state_init(const struct bkey_format *format,
-+					 struct bkey_packed *k)
-+{
-+	u64 *p = high_word(format, k);
-+
-+	return (struct pack_state) {
-+		.format	= format,
-+		.bits	= 64 - high_bit_offset,
-+		.w	= 0,
-+		.p	= p,
-+	};
-+}
-+
-+__always_inline
-+static void pack_state_finish(struct pack_state *state,
-+			      struct bkey_packed *k)
-+{
-+	EBUG_ON(state->p <  k->_data);
-+	EBUG_ON(state->p >= k->_data + state->format->key_u64s);
-+
-+	*state->p = state->w;
-+}
-+
-+struct unpack_state {
-+	const struct bkey_format *format;
-+	unsigned		bits;	/* bits remaining in current word */
-+	u64			w;	/* current word */
-+	const u64		*p;	/* pointer to next word */
-+};
-+
-+__always_inline
-+static struct unpack_state unpack_state_init(const struct bkey_format *format,
-+					     const struct bkey_packed *k)
-+{
-+	const u64 *p = high_word(format, k);
-+
-+	return (struct unpack_state) {
-+		.format	= format,
-+		.bits	= 64 - high_bit_offset,
-+		.w	= *p << high_bit_offset,
-+		.p	= p,
-+	};
-+}
-+
-+__always_inline
-+static u64 get_inc_field(struct unpack_state *state, unsigned field)
-+{
-+	unsigned bits = state->format->bits_per_field[field];
-+	u64 v = 0, offset = le64_to_cpu(state->format->field_offset[field]);
-+
-+	if (bits >= state->bits) {
-+		v = state->w >> (64 - bits);
-+		bits -= state->bits;
-+
-+		state->p = next_word(state->p);
-+		state->w = *state->p;
-+		state->bits = 64;
-+	}
-+
-+	/* avoid shift by 64 if bits is 0 - bits is never 64 here: */
-+	v |= (state->w >> 1) >> (63 - bits);
-+	state->w <<= bits;
-+	state->bits -= bits;
-+
-+	return v + offset;
-+}
-+
-+__always_inline
-+static bool set_inc_field(struct pack_state *state, unsigned field, u64 v)
-+{
-+	unsigned bits = state->format->bits_per_field[field];
-+	u64 offset = le64_to_cpu(state->format->field_offset[field]);
-+
-+	if (v < offset)
-+		return false;
-+
-+	v -= offset;
-+
-+	if (fls64(v) > bits)
-+		return false;
-+
-+	if (bits > state->bits) {
-+		bits -= state->bits;
-+		/* avoid shift by 64 if bits is 0 - bits is never 64 here: */
-+		state->w |= (v >> 1) >> (bits - 1);
-+
-+		*state->p = state->w;
-+		state->p = next_word(state->p);
-+		state->w = 0;
-+		state->bits = 64;
-+	}
-+
-+	state->bits -= bits;
-+	state->w |= v << state->bits;
-+
-+	return true;
-+}
-+
-+/*
-+ * Note: does NOT set out->format (we don't know what it should be here!)
-+ *
-+ * Also: doesn't work on extents - it doesn't preserve the invariant that
-+ * if k is packed bkey_start_pos(k) will successfully pack
-+ */
-+static bool bch2_bkey_transform_key(const struct bkey_format *out_f,
-+				   struct bkey_packed *out,
-+				   const struct bkey_format *in_f,
-+				   const struct bkey_packed *in)
-+{
-+	struct pack_state out_s = pack_state_init(out_f, out);
-+	struct unpack_state in_s = unpack_state_init(in_f, in);
-+	unsigned i;
-+
-+	out->_data[0] = 0;
-+
-+	for (i = 0; i < BKEY_NR_FIELDS; i++)
-+		if (!set_inc_field(&out_s, i, get_inc_field(&in_s, i)))
-+			return false;
-+
-+	/* Can't happen because the val would be too big to unpack: */
-+	EBUG_ON(in->u64s - in_f->key_u64s + out_f->key_u64s > U8_MAX);
-+
-+	pack_state_finish(&out_s, out);
-+	out->u64s	= out_f->key_u64s + in->u64s - in_f->key_u64s;
-+	out->needs_whiteout = in->needs_whiteout;
-+	out->type	= in->type;
-+
-+	return true;
-+}
-+
-+bool bch2_bkey_transform(const struct bkey_format *out_f,
-+			struct bkey_packed *out,
-+			const struct bkey_format *in_f,
-+			const struct bkey_packed *in)
-+{
-+	if (!bch2_bkey_transform_key(out_f, out, in_f, in))
-+		return false;
-+
-+	memcpy_u64s((u64 *) out + out_f->key_u64s,
-+		    (u64 *) in + in_f->key_u64s,
-+		    (in->u64s - in_f->key_u64s));
-+	return true;
-+}
-+
-+#define bkey_fields()							\
-+	x(BKEY_FIELD_INODE,		p.inode)			\
-+	x(BKEY_FIELD_OFFSET,		p.offset)			\
-+	x(BKEY_FIELD_SNAPSHOT,		p.snapshot)			\
-+	x(BKEY_FIELD_SIZE,		size)				\
-+	x(BKEY_FIELD_VERSION_HI,	version.hi)			\
-+	x(BKEY_FIELD_VERSION_LO,	version.lo)
-+
-+struct bkey __bch2_bkey_unpack_key(const struct bkey_format *format,
-+			      const struct bkey_packed *in)
-+{
-+	struct unpack_state state = unpack_state_init(format, in);
-+	struct bkey out;
-+
-+	EBUG_ON(format->nr_fields != BKEY_NR_FIELDS);
-+	EBUG_ON(in->u64s < format->key_u64s);
-+	EBUG_ON(in->format != KEY_FORMAT_LOCAL_BTREE);
-+	EBUG_ON(in->u64s - format->key_u64s + BKEY_U64s > U8_MAX);
-+
-+	out.u64s	= BKEY_U64s + in->u64s - format->key_u64s;
-+	out.format	= KEY_FORMAT_CURRENT;
-+	out.needs_whiteout = in->needs_whiteout;
-+	out.type	= in->type;
-+	out.pad[0]	= 0;
-+
-+#define x(id, field)	out.field = get_inc_field(&state, id);
-+	bkey_fields()
-+#undef x
-+
-+	return out;
-+}
-+
-+#ifndef HAVE_BCACHEFS_COMPILED_UNPACK
-+struct bpos __bkey_unpack_pos(const struct bkey_format *format,
-+				     const struct bkey_packed *in)
-+{
-+	struct unpack_state state = unpack_state_init(format, in);
-+	struct bpos out;
-+
-+	EBUG_ON(format->nr_fields != BKEY_NR_FIELDS);
-+	EBUG_ON(in->u64s < format->key_u64s);
-+	EBUG_ON(in->format != KEY_FORMAT_LOCAL_BTREE);
-+
-+	out.inode	= get_inc_field(&state, BKEY_FIELD_INODE);
-+	out.offset	= get_inc_field(&state, BKEY_FIELD_OFFSET);
-+	out.snapshot	= get_inc_field(&state, BKEY_FIELD_SNAPSHOT);
-+
-+	return out;
-+}
-+#endif
-+
-+/**
-+ * bch2_bkey_pack_key -- pack just the key, not the value
-+ */
-+bool bch2_bkey_pack_key(struct bkey_packed *out, const struct bkey *in,
-+		   const struct bkey_format *format)
-+{
-+	struct pack_state state = pack_state_init(format, out);
-+
-+	EBUG_ON((void *) in == (void *) out);
-+	EBUG_ON(format->nr_fields != BKEY_NR_FIELDS);
-+	EBUG_ON(in->format != KEY_FORMAT_CURRENT);
-+
-+	out->_data[0] = 0;
-+
-+#define x(id, field)	if (!set_inc_field(&state, id, in->field)) return false;
-+	bkey_fields()
-+#undef x
-+
-+	/*
-+	 * Extents - we have to guarantee that if an extent is packed, a trimmed
-+	 * version will also pack:
-+	 */
-+	if (bkey_start_offset(in) <
-+	    le64_to_cpu(format->field_offset[BKEY_FIELD_OFFSET]))
-+		return false;
-+
-+	pack_state_finish(&state, out);
-+	out->u64s	= format->key_u64s + in->u64s - BKEY_U64s;
-+	out->format	= KEY_FORMAT_LOCAL_BTREE;
-+	out->needs_whiteout = in->needs_whiteout;
-+	out->type	= in->type;
-+
-+	bch2_bkey_pack_verify(out, in, format);
-+	return true;
-+}
-+
-+/**
-+ * bch2_bkey_unpack -- unpack the key and the value
-+ */
-+void bch2_bkey_unpack(const struct btree *b, struct bkey_i *dst,
-+		 const struct bkey_packed *src)
-+{
-+	__bkey_unpack_key(b, &dst->k, src);
-+
-+	memcpy_u64s(&dst->v,
-+		    bkeyp_val(&b->format, src),
-+		    bkeyp_val_u64s(&b->format, src));
-+}
-+
-+/**
-+ * bch2_bkey_pack -- pack the key and the value
-+ */
-+bool bch2_bkey_pack(struct bkey_packed *out, const struct bkey_i *in,
-+	       const struct bkey_format *format)
-+{
-+	struct bkey_packed tmp;
-+
-+	if (!bch2_bkey_pack_key(&tmp, &in->k, format))
-+		return false;
-+
-+	memmove_u64s((u64 *) out + format->key_u64s,
-+		     &in->v,
-+		     bkey_val_u64s(&in->k));
-+	memcpy_u64s(out, &tmp, format->key_u64s);
-+
-+	return true;
-+}
-+
-+__always_inline
-+static bool set_inc_field_lossy(struct pack_state *state, unsigned field, u64 v)
-+{
-+	unsigned bits = state->format->bits_per_field[field];
-+	u64 offset = le64_to_cpu(state->format->field_offset[field]);
-+	bool ret = true;
-+
-+	EBUG_ON(v < offset);
-+	v -= offset;
-+
-+	if (fls64(v) > bits) {
-+		v = ~(~0ULL << bits);
-+		ret = false;
-+	}
-+
-+	if (bits > state->bits) {
-+		bits -= state->bits;
-+		state->w |= (v >> 1) >> (bits - 1);
-+
-+		*state->p = state->w;
-+		state->p = next_word(state->p);
-+		state->w = 0;
-+		state->bits = 64;
-+	}
-+
-+	state->bits -= bits;
-+	state->w |= v << state->bits;
-+
-+	return ret;
-+}
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+static bool bkey_packed_successor(struct bkey_packed *out,
-+				  const struct btree *b,
-+				  struct bkey_packed k)
-+{
-+	const struct bkey_format *f = &b->format;
-+	unsigned nr_key_bits = b->nr_key_bits;
-+	unsigned first_bit, offset;
-+	u64 *p;
-+
-+	EBUG_ON(b->nr_key_bits != bkey_format_key_bits(f));
-+
-+	if (!nr_key_bits)
-+		return false;
-+
-+	*out = k;
-+
-+	first_bit = high_bit_offset + nr_key_bits - 1;
-+	p = nth_word(high_word(f, out), first_bit >> 6);
-+	offset = 63 - (first_bit & 63);
-+
-+	while (nr_key_bits) {
-+		unsigned bits = min(64 - offset, nr_key_bits);
-+		u64 mask = (~0ULL >> (64 - bits)) << offset;
-+
-+		if ((*p & mask) != mask) {
-+			*p += 1ULL << offset;
-+			EBUG_ON(bkey_cmp_packed(b, out, &k) <= 0);
-+			return true;
-+		}
-+
-+		*p &= ~mask;
-+		p = prev_word(p);
-+		nr_key_bits -= bits;
-+		offset = 0;
-+	}
-+
-+	return false;
-+}
-+#endif
-+
-+/*
-+ * Returns a packed key that compares <= in
-+ *
-+ * This is used in bset_search_tree(), where we need a packed pos in order to be
-+ * able to compare against the keys in the auxiliary search tree - and it's
-+ * legal to use a packed pos that isn't equivalent to the original pos,
-+ * _provided_ it compares <= to the original pos.
-+ */
-+enum bkey_pack_pos_ret bch2_bkey_pack_pos_lossy(struct bkey_packed *out,
-+					   struct bpos in,
-+					   const struct btree *b)
-+{
-+	const struct bkey_format *f = &b->format;
-+	struct pack_state state = pack_state_init(f, out);
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	struct bpos orig = in;
-+#endif
-+	bool exact = true;
-+
-+	out->_data[0] = 0;
-+
-+	if (unlikely(in.snapshot <
-+		     le64_to_cpu(f->field_offset[BKEY_FIELD_SNAPSHOT]))) {
-+		if (!in.offset-- &&
-+		    !in.inode--)
-+			return BKEY_PACK_POS_FAIL;
-+		in.snapshot	= KEY_SNAPSHOT_MAX;
-+		exact = false;
-+	}
-+
-+	if (unlikely(in.offset <
-+		     le64_to_cpu(f->field_offset[BKEY_FIELD_OFFSET]))) {
-+		if (!in.inode--)
-+			return BKEY_PACK_POS_FAIL;
-+		in.offset	= KEY_OFFSET_MAX;
-+		in.snapshot	= KEY_SNAPSHOT_MAX;
-+		exact = false;
-+	}
-+
-+	if (unlikely(in.inode <
-+		     le64_to_cpu(f->field_offset[BKEY_FIELD_INODE])))
-+		return BKEY_PACK_POS_FAIL;
-+
-+	if (!set_inc_field_lossy(&state, BKEY_FIELD_INODE, in.inode)) {
-+		in.offset	= KEY_OFFSET_MAX;
-+		in.snapshot	= KEY_SNAPSHOT_MAX;
-+		exact = false;
-+	}
-+
-+	if (!set_inc_field_lossy(&state, BKEY_FIELD_OFFSET, in.offset)) {
-+		in.snapshot	= KEY_SNAPSHOT_MAX;
-+		exact = false;
-+	}
-+
-+	if (!set_inc_field_lossy(&state, BKEY_FIELD_SNAPSHOT, in.snapshot))
-+		exact = false;
-+
-+	pack_state_finish(&state, out);
-+	out->u64s	= f->key_u64s;
-+	out->format	= KEY_FORMAT_LOCAL_BTREE;
-+	out->type	= KEY_TYPE_deleted;
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	if (exact) {
-+		BUG_ON(bkey_cmp_left_packed(b, out, &orig));
-+	} else {
-+		struct bkey_packed successor;
-+
-+		BUG_ON(bkey_cmp_left_packed(b, out, &orig) >= 0);
-+		BUG_ON(bkey_packed_successor(&successor, b, *out) &&
-+		       bkey_cmp_left_packed(b, &successor, &orig) < 0);
-+	}
-+#endif
-+
-+	return exact ? BKEY_PACK_POS_EXACT : BKEY_PACK_POS_SMALLER;
-+}
-+
-+void bch2_bkey_format_init(struct bkey_format_state *s)
-+{
-+	unsigned i;
-+
-+	for (i = 0; i < ARRAY_SIZE(s->field_min); i++)
-+		s->field_min[i] = U64_MAX;
-+
-+	for (i = 0; i < ARRAY_SIZE(s->field_max); i++)
-+		s->field_max[i] = 0;
-+
-+	/* Make sure we can store a size of 0: */
-+	s->field_min[BKEY_FIELD_SIZE] = 0;
-+}
-+
-+static void __bkey_format_add(struct bkey_format_state *s,
-+			      unsigned field, u64 v)
-+{
-+	s->field_min[field] = min(s->field_min[field], v);
-+	s->field_max[field] = max(s->field_max[field], v);
-+}
-+
-+/*
-+ * Changes @format so that @k can be successfully packed with @format
-+ */
-+void bch2_bkey_format_add_key(struct bkey_format_state *s, const struct bkey *k)
-+{
-+#define x(id, field) __bkey_format_add(s, id, k->field);
-+	bkey_fields()
-+#undef x
-+	__bkey_format_add(s, BKEY_FIELD_OFFSET, bkey_start_offset(k));
-+}
-+
-+void bch2_bkey_format_add_pos(struct bkey_format_state *s, struct bpos p)
-+{
-+	unsigned field = 0;
-+
-+	__bkey_format_add(s, field++, p.inode);
-+	__bkey_format_add(s, field++, p.offset);
-+	__bkey_format_add(s, field++, p.snapshot);
-+}
-+
-+/*
-+ * We don't want it to be possible for the packed format to represent fields
-+ * bigger than a u64... that will cause confusion and issues (like with
-+ * bkey_packed_successor())
-+ */
-+static void set_format_field(struct bkey_format *f, enum bch_bkey_fields i,
-+			     unsigned bits, u64 offset)
-+{
-+	offset = bits == 64 ? 0 : min(offset, U64_MAX - ((1ULL << bits) - 1));
-+
-+	f->bits_per_field[i]	= bits;
-+	f->field_offset[i]	= cpu_to_le64(offset);
-+}
-+
-+struct bkey_format bch2_bkey_format_done(struct bkey_format_state *s)
-+{
-+	unsigned i, bits = KEY_PACKED_BITS_START;
-+	struct bkey_format ret = {
-+		.nr_fields = BKEY_NR_FIELDS,
-+	};
-+
-+	for (i = 0; i < ARRAY_SIZE(s->field_min); i++) {
-+		s->field_min[i] = min(s->field_min[i], s->field_max[i]);
-+
-+		set_format_field(&ret, i,
-+				 fls64(s->field_max[i] - s->field_min[i]),
-+				 s->field_min[i]);
-+
-+		bits += ret.bits_per_field[i];
-+	}
-+
-+	/* allow for extent merging: */
-+	if (ret.bits_per_field[BKEY_FIELD_SIZE]) {
-+		ret.bits_per_field[BKEY_FIELD_SIZE] += 4;
-+		bits += 4;
-+	}
-+
-+	ret.key_u64s = DIV_ROUND_UP(bits, 64);
-+
-+	/* if we have enough spare bits, round fields up to nearest byte */
-+	bits = ret.key_u64s * 64 - bits;
-+
-+	for (i = 0; i < ARRAY_SIZE(ret.bits_per_field); i++) {
-+		unsigned r = round_up(ret.bits_per_field[i], 8) -
-+			ret.bits_per_field[i];
-+
-+		if (r <= bits) {
-+			set_format_field(&ret, i,
-+					 ret.bits_per_field[i] + r,
-+					 le64_to_cpu(ret.field_offset[i]));
-+			bits -= r;
-+		}
-+	}
-+
-+	EBUG_ON(bch2_bkey_format_validate(&ret));
-+	return ret;
-+}
-+
-+const char *bch2_bkey_format_validate(struct bkey_format *f)
-+{
-+	unsigned i, bits = KEY_PACKED_BITS_START;
-+
-+	if (f->nr_fields != BKEY_NR_FIELDS)
-+		return "incorrect number of fields";
-+
-+	for (i = 0; i < f->nr_fields; i++) {
-+		u64 field_offset = le64_to_cpu(f->field_offset[i]);
-+
-+		if (f->bits_per_field[i] > 64)
-+			return "field too large";
-+
-+		if (field_offset &&
-+		    (f->bits_per_field[i] == 64 ||
-+		    (field_offset + ((1ULL << f->bits_per_field[i]) - 1) <
-+		     field_offset)))
-+			return "offset + bits overflow";
-+
-+		bits += f->bits_per_field[i];
-+	}
-+
-+	if (f->key_u64s != DIV_ROUND_UP(bits, 64))
-+		return "incorrect key_u64s";
-+
-+	return NULL;
-+}
-+
-+/*
-+ * Most significant differing bit
-+ * Bits are indexed from 0 - return is [0, nr_key_bits)
-+ */
-+__pure
-+unsigned bch2_bkey_greatest_differing_bit(const struct btree *b,
-+					  const struct bkey_packed *l_k,
-+					  const struct bkey_packed *r_k)
-+{
-+	const u64 *l = high_word(&b->format, l_k);
-+	const u64 *r = high_word(&b->format, r_k);
-+	unsigned nr_key_bits = b->nr_key_bits;
-+	unsigned word_bits = 64 - high_bit_offset;
-+	u64 l_v, r_v;
-+
-+	EBUG_ON(b->nr_key_bits != bkey_format_key_bits(&b->format));
-+
-+	/* for big endian, skip past header */
-+	l_v = *l & (~0ULL >> high_bit_offset);
-+	r_v = *r & (~0ULL >> high_bit_offset);
-+
-+	while (nr_key_bits) {
-+		if (nr_key_bits < word_bits) {
-+			l_v >>= word_bits - nr_key_bits;
-+			r_v >>= word_bits - nr_key_bits;
-+			nr_key_bits = 0;
-+		} else {
-+			nr_key_bits -= word_bits;
-+		}
-+
-+		if (l_v != r_v)
-+			return fls64(l_v ^ r_v) - 1 + nr_key_bits;
-+
-+		l = next_word(l);
-+		r = next_word(r);
-+
-+		l_v = *l;
-+		r_v = *r;
-+		word_bits = 64;
-+	}
-+
-+	return 0;
-+}
-+
-+/*
-+ * First set bit
-+ * Bits are indexed from 0 - return is [0, nr_key_bits)
-+ */
-+__pure
-+unsigned bch2_bkey_ffs(const struct btree *b, const struct bkey_packed *k)
-+{
-+	const u64 *p = high_word(&b->format, k);
-+	unsigned nr_key_bits = b->nr_key_bits;
-+	unsigned ret = 0, offset;
-+
-+	EBUG_ON(b->nr_key_bits != bkey_format_key_bits(&b->format));
-+
-+	offset = nr_key_bits;
-+	while (offset > 64) {
-+		p = next_word(p);
-+		offset -= 64;
-+	}
-+
-+	offset = 64 - offset;
-+
-+	while (nr_key_bits) {
-+		unsigned bits = nr_key_bits + offset < 64
-+			? nr_key_bits
-+			: 64 - offset;
-+
-+		u64 mask = (~0ULL >> (64 - bits)) << offset;
-+
-+		if (*p & mask)
-+			return ret + __ffs64(*p & mask) - offset;
-+
-+		p = prev_word(p);
-+		nr_key_bits -= bits;
-+		ret += bits;
-+		offset = 0;
-+	}
-+
-+	return 0;
-+}
-+
-+#ifdef CONFIG_X86_64
-+
-+static inline int __bkey_cmp_bits(const u64 *l, const u64 *r,
-+				  unsigned nr_key_bits)
-+{
-+	long d0, d1, d2, d3;
-+	int cmp;
-+
-+	/* we shouldn't need asm for this, but gcc is being retarded: */
-+
-+	asm(".intel_syntax noprefix;"
-+	    "xor eax, eax;"
-+	    "xor edx, edx;"
-+	    "1:;"
-+	    "mov r8, [rdi];"
-+	    "mov r9, [rsi];"
-+	    "sub ecx, 64;"
-+	    "jl 2f;"
-+
-+	    "cmp r8, r9;"
-+	    "jnz 3f;"
-+
-+	    "lea rdi, [rdi - 8];"
-+	    "lea rsi, [rsi - 8];"
-+	    "jmp 1b;"
-+
-+	    "2:;"
-+	    "not ecx;"
-+	    "shr r8, 1;"
-+	    "shr r9, 1;"
-+	    "shr r8, cl;"
-+	    "shr r9, cl;"
-+	    "cmp r8, r9;"
-+
-+	    "3:\n"
-+	    "seta al;"
-+	    "setb dl;"
-+	    "sub eax, edx;"
-+	    ".att_syntax prefix;"
-+	    : "=&D" (d0), "=&S" (d1), "=&d" (d2), "=&c" (d3), "=&a" (cmp)
-+	    : "0" (l), "1" (r), "3" (nr_key_bits)
-+	    : "r8", "r9", "cc", "memory");
-+
-+	return cmp;
-+}
-+
-+#define I(_x)			(*(out)++ = (_x))
-+#define I1(i0)						I(i0)
-+#define I2(i0, i1)		(I1(i0),		I(i1))
-+#define I3(i0, i1, i2)		(I2(i0, i1),		I(i2))
-+#define I4(i0, i1, i2, i3)	(I3(i0, i1, i2),	I(i3))
-+#define I5(i0, i1, i2, i3, i4)	(I4(i0, i1, i2, i3),	I(i4))
-+
-+static u8 *compile_bkey_field(const struct bkey_format *format, u8 *out,
-+			      enum bch_bkey_fields field,
-+			      unsigned dst_offset, unsigned dst_size,
-+			      bool *eax_zeroed)
-+{
-+	unsigned bits = format->bits_per_field[field];
-+	u64 offset = le64_to_cpu(format->field_offset[field]);
-+	unsigned i, byte, bit_offset, align, shl, shr;
-+
-+	if (!bits && !offset) {
-+		if (!*eax_zeroed) {
-+			/* xor eax, eax */
-+			I2(0x31, 0xc0);
-+		}
-+
-+		*eax_zeroed = true;
-+		goto set_field;
-+	}
-+
-+	if (!bits) {
-+		/* just return offset: */
-+
-+		switch (dst_size) {
-+		case 8:
-+			if (offset > S32_MAX) {
-+				/* mov [rdi + dst_offset], offset */
-+				I3(0xc7, 0x47, dst_offset);
-+				memcpy(out, &offset, 4);
-+				out += 4;
-+
-+				I3(0xc7, 0x47, dst_offset + 4);
-+				memcpy(out, (void *) &offset + 4, 4);
-+				out += 4;
-+			} else {
-+				/* mov [rdi + dst_offset], offset */
-+				/* sign extended */
-+				I4(0x48, 0xc7, 0x47, dst_offset);
-+				memcpy(out, &offset, 4);
-+				out += 4;
-+			}
-+			break;
-+		case 4:
-+			/* mov [rdi + dst_offset], offset */
-+			I3(0xc7, 0x47, dst_offset);
-+			memcpy(out, &offset, 4);
-+			out += 4;
-+			break;
-+		default:
-+			BUG();
-+		}
-+
-+		return out;
-+	}
-+
-+	bit_offset = format->key_u64s * 64;
-+	for (i = 0; i <= field; i++)
-+		bit_offset -= format->bits_per_field[i];
-+
-+	byte = bit_offset / 8;
-+	bit_offset -= byte * 8;
-+
-+	*eax_zeroed = false;
-+
-+	if (bit_offset == 0 && bits == 8) {
-+		/* movzx eax, BYTE PTR [rsi + imm8] */
-+		I4(0x0f, 0xb6, 0x46, byte);
-+	} else if (bit_offset == 0 && bits == 16) {
-+		/* movzx eax, WORD PTR [rsi + imm8] */
-+		I4(0x0f, 0xb7, 0x46, byte);
-+	} else if (bit_offset + bits <= 32) {
-+		align = min(4 - DIV_ROUND_UP(bit_offset + bits, 8), byte & 3);
-+		byte -= align;
-+		bit_offset += align * 8;
-+
-+		BUG_ON(bit_offset + bits > 32);
-+
-+		/* mov eax, [rsi + imm8] */
-+		I3(0x8b, 0x46, byte);
-+
-+		if (bit_offset) {
-+			/* shr eax, imm8 */
-+			I3(0xc1, 0xe8, bit_offset);
-+		}
-+
-+		if (bit_offset + bits < 32) {
-+			unsigned mask = ~0U >> (32 - bits);
-+
-+			/* and eax, imm32 */
-+			I1(0x25);
-+			memcpy(out, &mask, 4);
-+			out += 4;
-+		}
-+	} else if (bit_offset + bits <= 64) {
-+		align = min(8 - DIV_ROUND_UP(bit_offset + bits, 8), byte & 7);
-+		byte -= align;
-+		bit_offset += align * 8;
-+
-+		BUG_ON(bit_offset + bits > 64);
-+
-+		/* mov rax, [rsi + imm8] */
-+		I4(0x48, 0x8b, 0x46, byte);
-+
-+		shl = 64 - bit_offset - bits;
-+		shr = bit_offset + shl;
-+
-+		if (shl) {
-+			/* shl rax, imm8 */
-+			I4(0x48, 0xc1, 0xe0, shl);
-+		}
-+
-+		if (shr) {
-+			/* shr rax, imm8 */
-+			I4(0x48, 0xc1, 0xe8, shr);
-+		}
-+	} else {
-+		align = min(4 - DIV_ROUND_UP(bit_offset + bits, 8), byte & 3);
-+		byte -= align;
-+		bit_offset += align * 8;
-+
-+		BUG_ON(bit_offset + bits > 96);
-+
-+		/* mov rax, [rsi + byte] */
-+		I4(0x48, 0x8b, 0x46, byte);
-+
-+		/* mov edx, [rsi + byte + 8] */
-+		I3(0x8b, 0x56, byte + 8);
-+
-+		/* bits from next word: */
-+		shr = bit_offset + bits - 64;
-+		BUG_ON(shr > bit_offset);
-+
-+		/* shr rax, bit_offset */
-+		I4(0x48, 0xc1, 0xe8, shr);
-+
-+		/* shl rdx, imm8 */
-+		I4(0x48, 0xc1, 0xe2, 64 - shr);
-+
-+		/* or rax, rdx */
-+		I3(0x48, 0x09, 0xd0);
-+
-+		shr = bit_offset - shr;
-+
-+		if (shr) {
-+			/* shr rax, imm8 */
-+			I4(0x48, 0xc1, 0xe8, shr);
-+		}
-+	}
-+
-+	/* rax += offset: */
-+	if (offset > S32_MAX) {
-+		/* mov rdx, imm64 */
-+		I2(0x48, 0xba);
-+		memcpy(out, &offset, 8);
-+		out += 8;
-+		/* add %rdx, %rax */
-+		I3(0x48, 0x01, 0xd0);
-+	} else if (offset + (~0ULL >> (64 - bits)) > U32_MAX) {
-+		/* add rax, imm32 */
-+		I2(0x48, 0x05);
-+		memcpy(out, &offset, 4);
-+		out += 4;
-+	} else if (offset) {
-+		/* add eax, imm32 */
-+		I1(0x05);
-+		memcpy(out, &offset, 4);
-+		out += 4;
-+	}
-+set_field:
-+	switch (dst_size) {
-+	case 8:
-+		/* mov [rdi + dst_offset], rax */
-+		I4(0x48, 0x89, 0x47, dst_offset);
-+		break;
-+	case 4:
-+		/* mov [rdi + dst_offset], eax */
-+		I3(0x89, 0x47, dst_offset);
-+		break;
-+	default:
-+		BUG();
-+	}
-+
-+	return out;
-+}
-+
-+int bch2_compile_bkey_format(const struct bkey_format *format, void *_out)
-+{
-+	bool eax_zeroed = false;
-+	u8 *out = _out;
-+
-+	/*
-+	 * rdi: dst - unpacked key
-+	 * rsi: src - packed key
-+	 */
-+
-+	/* k->u64s, k->format, k->type */
-+
-+	/* mov eax, [rsi] */
-+	I2(0x8b, 0x06);
-+
-+	/* add eax, BKEY_U64s - format->key_u64s */
-+	I5(0x05, BKEY_U64s - format->key_u64s, KEY_FORMAT_CURRENT, 0, 0);
-+
-+	/* and eax, imm32: mask out k->pad: */
-+	I5(0x25, 0xff, 0xff, 0xff, 0);
-+
-+	/* mov [rdi], eax */
-+	I2(0x89, 0x07);
-+
-+#define x(id, field)							\
-+	out = compile_bkey_field(format, out, id,			\
-+				 offsetof(struct bkey, field),		\
-+				 sizeof(((struct bkey *) NULL)->field),	\
-+				 &eax_zeroed);
-+	bkey_fields()
-+#undef x
-+
-+	/* retq */
-+	I1(0xc3);
-+
-+	return (void *) out - _out;
-+}
-+
-+#else
-+static inline int __bkey_cmp_bits(const u64 *l, const u64 *r,
-+				  unsigned nr_key_bits)
-+{
-+	u64 l_v, r_v;
-+
-+	if (!nr_key_bits)
-+		return 0;
-+
-+	/* for big endian, skip past header */
-+	nr_key_bits += high_bit_offset;
-+	l_v = *l & (~0ULL >> high_bit_offset);
-+	r_v = *r & (~0ULL >> high_bit_offset);
-+
-+	while (1) {
-+		if (nr_key_bits < 64) {
-+			l_v >>= 64 - nr_key_bits;
-+			r_v >>= 64 - nr_key_bits;
-+			nr_key_bits = 0;
-+		} else {
-+			nr_key_bits -= 64;
-+		}
-+
-+		if (!nr_key_bits || l_v != r_v)
-+			break;
-+
-+		l = next_word(l);
-+		r = next_word(r);
-+
-+		l_v = *l;
-+		r_v = *r;
-+	}
-+
-+	return cmp_int(l_v, r_v);
-+}
-+#endif
-+
-+__pure
-+int __bch2_bkey_cmp_packed_format_checked(const struct bkey_packed *l,
-+					  const struct bkey_packed *r,
-+					  const struct btree *b)
-+{
-+	const struct bkey_format *f = &b->format;
-+	int ret;
-+
-+	EBUG_ON(!bkey_packed(l) || !bkey_packed(r));
-+	EBUG_ON(b->nr_key_bits != bkey_format_key_bits(f));
-+
-+	ret = __bkey_cmp_bits(high_word(f, l),
-+			      high_word(f, r),
-+			      b->nr_key_bits);
-+
-+	EBUG_ON(ret != bkey_cmp(bkey_unpack_pos(b, l),
-+				bkey_unpack_pos(b, r)));
-+	return ret;
-+}
-+
-+__pure __flatten
-+int __bch2_bkey_cmp_left_packed_format_checked(const struct btree *b,
-+					       const struct bkey_packed *l,
-+					       const struct bpos *r)
-+{
-+	return bkey_cmp(bkey_unpack_pos_format_checked(b, l), *r);
-+}
-+
-+__pure __flatten
-+int __bch2_bkey_cmp_packed(const struct bkey_packed *l,
-+			   const struct bkey_packed *r,
-+			   const struct btree *b)
-+{
-+	struct bkey unpacked;
-+
-+	if (likely(bkey_packed(l) && bkey_packed(r)))
-+		return __bch2_bkey_cmp_packed_format_checked(l, r, b);
-+
-+	if (bkey_packed(l)) {
-+		__bkey_unpack_key_format_checked(b, &unpacked, l);
-+		l = (void*) &unpacked;
-+	} else if (bkey_packed(r)) {
-+		__bkey_unpack_key_format_checked(b, &unpacked, r);
-+		r = (void*) &unpacked;
-+	}
-+
-+	return bkey_cmp(((struct bkey *) l)->p, ((struct bkey *) r)->p);
-+}
-+
-+__pure __flatten
-+int __bch2_bkey_cmp_left_packed(const struct btree *b,
-+				const struct bkey_packed *l,
-+				const struct bpos *r)
-+{
-+	const struct bkey *l_unpacked;
-+
-+	return unlikely(l_unpacked = packed_to_bkey_c(l))
-+		? bkey_cmp(l_unpacked->p, *r)
-+		: __bch2_bkey_cmp_left_packed_format_checked(b, l, r);
-+}
-+
-+void bch2_bpos_swab(struct bpos *p)
-+{
-+	u8 *l = (u8 *) p;
-+	u8 *h = ((u8 *) &p[1]) - 1;
-+
-+	while (l < h) {
-+		swap(*l, *h);
-+		l++;
-+		--h;
-+	}
-+}
-+
-+void bch2_bkey_swab_key(const struct bkey_format *_f, struct bkey_packed *k)
-+{
-+	const struct bkey_format *f = bkey_packed(k) ? _f : &bch2_bkey_format_current;
-+	u8 *l = k->key_start;
-+	u8 *h = (u8 *) (k->_data + f->key_u64s) - 1;
-+
-+	while (l < h) {
-+		swap(*l, *h);
-+		l++;
-+		--h;
-+	}
-+}
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+void bch2_bkey_pack_test(void)
-+{
-+	struct bkey t = KEY(4134ULL, 1250629070527416633ULL, 0);
-+	struct bkey_packed p;
-+
-+	struct bkey_format test_format = {
-+		.key_u64s	= 2,
-+		.nr_fields	= BKEY_NR_FIELDS,
-+		.bits_per_field = {
-+			13,
-+			64,
-+		},
-+	};
-+
-+	struct unpack_state in_s =
-+		unpack_state_init(&bch2_bkey_format_current, (void *) &t);
-+	struct pack_state out_s = pack_state_init(&test_format, &p);
-+	unsigned i;
-+
-+	for (i = 0; i < out_s.format->nr_fields; i++) {
-+		u64 a, v = get_inc_field(&in_s, i);
-+
-+		switch (i) {
-+#define x(id, field)	case id: a = t.field; break;
-+	bkey_fields()
-+#undef x
-+		default:
-+			BUG();
-+		}
-+
-+		if (a != v)
-+			panic("got %llu actual %llu i %u\n", v, a, i);
-+
-+		if (!set_inc_field(&out_s, i, v))
-+			panic("failed at %u\n", i);
-+	}
-+
-+	BUG_ON(!bch2_bkey_pack_key(&p, &t, &test_format));
-+}
-+#endif
-diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h
-new file mode 100644
-index 000000000000..cbcfbd26bc58
---- /dev/null
-+++ b/fs/bcachefs/bkey.h
-@@ -0,0 +1,605 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BKEY_H
-+#define _BCACHEFS_BKEY_H
-+
-+#include <linux/bug.h>
-+#include "bcachefs_format.h"
-+
-+#include "util.h"
-+#include "vstructs.h"
-+
-+#ifdef CONFIG_X86_64
-+#define HAVE_BCACHEFS_COMPILED_UNPACK	1
-+#endif
-+
-+void bch2_to_binary(char *, const u64 *, unsigned);
-+
-+/* bkey with split value, const */
-+struct bkey_s_c {
-+	const struct bkey	*k;
-+	const struct bch_val	*v;
-+};
-+
-+/* bkey with split value */
-+struct bkey_s {
-+	union {
-+	struct {
-+		struct bkey	*k;
-+		struct bch_val	*v;
-+	};
-+	struct bkey_s_c		s_c;
-+	};
-+};
-+
-+#define bkey_next(_k)		vstruct_next(_k)
-+
-+static inline struct bkey_packed *bkey_next_skip_noops(struct bkey_packed *k,
-+						       struct bkey_packed *end)
-+{
-+	k = bkey_next(k);
-+
-+	while (k != end && !k->u64s)
-+		k = (void *) ((u64 *) k + 1);
-+	return k;
-+}
-+
-+#define bkey_val_u64s(_k)	((_k)->u64s - BKEY_U64s)
-+
-+static inline size_t bkey_val_bytes(const struct bkey *k)
-+{
-+	return bkey_val_u64s(k) * sizeof(u64);
-+}
-+
-+static inline void set_bkey_val_u64s(struct bkey *k, unsigned val_u64s)
-+{
-+	k->u64s = BKEY_U64s + val_u64s;
-+}
-+
-+static inline void set_bkey_val_bytes(struct bkey *k, unsigned bytes)
-+{
-+	k->u64s = BKEY_U64s + DIV_ROUND_UP(bytes, sizeof(u64));
-+}
-+
-+#define bkey_val_end(_k)	((void *) (((u64 *) (_k).v) + bkey_val_u64s((_k).k)))
-+
-+#define bkey_deleted(_k)	((_k)->type == KEY_TYPE_deleted)
-+
-+#define bkey_whiteout(_k)				\
-+	((_k)->type == KEY_TYPE_deleted || (_k)->type == KEY_TYPE_discard)
-+
-+#define bkey_packed_typecheck(_k)					\
-+({									\
-+	BUILD_BUG_ON(!type_is(_k, struct bkey *) &&			\
-+		     !type_is(_k, struct bkey_packed *));		\
-+	type_is(_k, struct bkey_packed *);				\
-+})
-+
-+enum bkey_lr_packed {
-+	BKEY_PACKED_BOTH,
-+	BKEY_PACKED_RIGHT,
-+	BKEY_PACKED_LEFT,
-+	BKEY_PACKED_NONE,
-+};
-+
-+#define bkey_lr_packed_typecheck(_l, _r)				\
-+	(!bkey_packed_typecheck(_l) + ((!bkey_packed_typecheck(_r)) << 1))
-+
-+#define bkey_lr_packed(_l, _r)						\
-+	((_l)->format + ((_r)->format << 1))
-+
-+#define bkey_copy(_dst, _src)					\
-+do {								\
-+	BUILD_BUG_ON(!type_is(_dst, struct bkey_i *) &&		\
-+		     !type_is(_dst, struct bkey_packed *));	\
-+	BUILD_BUG_ON(!type_is(_src, struct bkey_i *) &&		\
-+		     !type_is(_src, struct bkey_packed *));	\
-+	EBUG_ON((u64 *) (_dst) > (u64 *) (_src) &&		\
-+		(u64 *) (_dst) < (u64 *) (_src) +		\
-+		((struct bkey *) (_src))->u64s);		\
-+								\
-+	memcpy_u64s_small((_dst), (_src),			\
-+			  ((struct bkey *) (_src))->u64s);	\
-+} while (0)
-+
-+struct btree;
-+
-+struct bkey_format_state {
-+	u64 field_min[BKEY_NR_FIELDS];
-+	u64 field_max[BKEY_NR_FIELDS];
-+};
-+
-+void bch2_bkey_format_init(struct bkey_format_state *);
-+void bch2_bkey_format_add_key(struct bkey_format_state *, const struct bkey *);
-+void bch2_bkey_format_add_pos(struct bkey_format_state *, struct bpos);
-+struct bkey_format bch2_bkey_format_done(struct bkey_format_state *);
-+const char *bch2_bkey_format_validate(struct bkey_format *);
-+
-+__pure
-+unsigned bch2_bkey_greatest_differing_bit(const struct btree *,
-+					  const struct bkey_packed *,
-+					  const struct bkey_packed *);
-+__pure
-+unsigned bch2_bkey_ffs(const struct btree *, const struct bkey_packed *);
-+
-+__pure
-+int __bch2_bkey_cmp_packed_format_checked(const struct bkey_packed *,
-+				     const struct bkey_packed *,
-+				     const struct btree *);
-+
-+__pure
-+int __bch2_bkey_cmp_left_packed_format_checked(const struct btree *,
-+					  const struct bkey_packed *,
-+					  const struct bpos *);
-+
-+__pure
-+int __bch2_bkey_cmp_packed(const struct bkey_packed *,
-+			   const struct bkey_packed *,
-+			   const struct btree *);
-+
-+__pure
-+int __bch2_bkey_cmp_left_packed(const struct btree *,
-+				const struct bkey_packed *,
-+				const struct bpos *);
-+
-+static inline __pure
-+int bkey_cmp_left_packed(const struct btree *b,
-+			 const struct bkey_packed *l, const struct bpos *r)
-+{
-+	return __bch2_bkey_cmp_left_packed(b, l, r);
-+}
-+
-+/*
-+ * we prefer to pass bpos by ref, but it's often enough terribly convenient to
-+ * pass it by by val... as much as I hate c++, const ref would be nice here:
-+ */
-+__pure __flatten
-+static inline int bkey_cmp_left_packed_byval(const struct btree *b,
-+					     const struct bkey_packed *l,
-+					     struct bpos r)
-+{
-+	return bkey_cmp_left_packed(b, l, &r);
-+}
-+
-+/*
-+ * If @_l or @_r are struct bkey * (not bkey_packed *), uses type information to
-+ * skip dispatching on k->format:
-+ */
-+#define bkey_cmp_packed(_b, _l, _r)					\
-+({									\
-+	int _cmp;							\
-+									\
-+	switch (bkey_lr_packed_typecheck(_l, _r)) {			\
-+	case BKEY_PACKED_NONE:						\
-+		_cmp = bkey_cmp(((struct bkey *) (_l))->p,		\
-+				((struct bkey *) (_r))->p);		\
-+		break;							\
-+	case BKEY_PACKED_LEFT:						\
-+		_cmp = bkey_cmp_left_packed((_b),			\
-+				  (struct bkey_packed *) (_l),		\
-+				  &((struct bkey *) (_r))->p);		\
-+		break;							\
-+	case BKEY_PACKED_RIGHT:						\
-+		_cmp = -bkey_cmp_left_packed((_b),			\
-+				  (struct bkey_packed *) (_r),		\
-+				  &((struct bkey *) (_l))->p);		\
-+		break;							\
-+	case BKEY_PACKED_BOTH:						\
-+		_cmp = __bch2_bkey_cmp_packed((void *) (_l),		\
-+					 (void *) (_r), (_b));		\
-+		break;							\
-+	}								\
-+	_cmp;								\
-+})
-+
-+#if 1
-+static __always_inline int bkey_cmp(struct bpos l, struct bpos r)
-+{
-+	if (l.inode != r.inode)
-+		return l.inode < r.inode ? -1 : 1;
-+	if (l.offset != r.offset)
-+		return l.offset < r.offset ? -1 : 1;
-+	if (l.snapshot != r.snapshot)
-+		return l.snapshot < r.snapshot ? -1 : 1;
-+	return 0;
-+}
-+#else
-+int bkey_cmp(struct bpos l, struct bpos r);
-+#endif
-+
-+static inline struct bpos bpos_min(struct bpos l, struct bpos r)
-+{
-+	return bkey_cmp(l, r) < 0 ? l : r;
-+}
-+
-+void bch2_bpos_swab(struct bpos *);
-+void bch2_bkey_swab_key(const struct bkey_format *, struct bkey_packed *);
-+
-+static __always_inline int bversion_cmp(struct bversion l, struct bversion r)
-+{
-+	return  cmp_int(l.hi, r.hi) ?:
-+		cmp_int(l.lo, r.lo);
-+}
-+
-+#define ZERO_VERSION	((struct bversion) { .hi = 0, .lo = 0 })
-+#define MAX_VERSION	((struct bversion) { .hi = ~0, .lo = ~0ULL })
-+
-+static __always_inline int bversion_zero(struct bversion v)
-+{
-+	return !bversion_cmp(v, ZERO_VERSION);
-+}
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+/* statement expressions confusing unlikely()? */
-+#define bkey_packed(_k)							\
-+	({ EBUG_ON((_k)->format > KEY_FORMAT_CURRENT);			\
-+	 (_k)->format != KEY_FORMAT_CURRENT; })
-+#else
-+#define bkey_packed(_k)		((_k)->format != KEY_FORMAT_CURRENT)
-+#endif
-+
-+/*
-+ * It's safe to treat an unpacked bkey as a packed one, but not the reverse
-+ */
-+static inline struct bkey_packed *bkey_to_packed(struct bkey_i *k)
-+{
-+	return (struct bkey_packed *) k;
-+}
-+
-+static inline const struct bkey_packed *bkey_to_packed_c(const struct bkey_i *k)
-+{
-+	return (const struct bkey_packed *) k;
-+}
-+
-+static inline struct bkey_i *packed_to_bkey(struct bkey_packed *k)
-+{
-+	return bkey_packed(k) ? NULL : (struct bkey_i *) k;
-+}
-+
-+static inline const struct bkey *packed_to_bkey_c(const struct bkey_packed *k)
-+{
-+	return bkey_packed(k) ? NULL : (const struct bkey *) k;
-+}
-+
-+static inline unsigned bkey_format_key_bits(const struct bkey_format *format)
-+{
-+	return format->bits_per_field[BKEY_FIELD_INODE] +
-+		format->bits_per_field[BKEY_FIELD_OFFSET] +
-+		format->bits_per_field[BKEY_FIELD_SNAPSHOT];
-+}
-+
-+static inline struct bpos bkey_successor(struct bpos p)
-+{
-+	struct bpos ret = p;
-+
-+	if (!++ret.offset)
-+		BUG_ON(!++ret.inode);
-+
-+	return ret;
-+}
-+
-+static inline struct bpos bkey_predecessor(struct bpos p)
-+{
-+	struct bpos ret = p;
-+
-+	if (!ret.offset--)
-+		BUG_ON(!ret.inode--);
-+
-+	return ret;
-+}
-+
-+static inline u64 bkey_start_offset(const struct bkey *k)
-+{
-+	return k->p.offset - k->size;
-+}
-+
-+static inline struct bpos bkey_start_pos(const struct bkey *k)
-+{
-+	return (struct bpos) {
-+		.inode		= k->p.inode,
-+		.offset		= bkey_start_offset(k),
-+		.snapshot	= k->p.snapshot,
-+	};
-+}
-+
-+/* Packed helpers */
-+
-+static inline unsigned bkeyp_key_u64s(const struct bkey_format *format,
-+				      const struct bkey_packed *k)
-+{
-+	unsigned ret = bkey_packed(k) ? format->key_u64s : BKEY_U64s;
-+
-+	EBUG_ON(k->u64s < ret);
-+	return ret;
-+}
-+
-+static inline unsigned bkeyp_key_bytes(const struct bkey_format *format,
-+				       const struct bkey_packed *k)
-+{
-+	return bkeyp_key_u64s(format, k) * sizeof(u64);
-+}
-+
-+static inline unsigned bkeyp_val_u64s(const struct bkey_format *format,
-+				      const struct bkey_packed *k)
-+{
-+	return k->u64s - bkeyp_key_u64s(format, k);
-+}
-+
-+static inline size_t bkeyp_val_bytes(const struct bkey_format *format,
-+				     const struct bkey_packed *k)
-+{
-+	return bkeyp_val_u64s(format, k) * sizeof(u64);
-+}
-+
-+static inline void set_bkeyp_val_u64s(const struct bkey_format *format,
-+				      struct bkey_packed *k, unsigned val_u64s)
-+{
-+	k->u64s = bkeyp_key_u64s(format, k) + val_u64s;
-+}
-+
-+#define bkeyp_val(_format, _k)						\
-+	 ((struct bch_val *) ((_k)->_data + bkeyp_key_u64s(_format, _k)))
-+
-+extern const struct bkey_format bch2_bkey_format_current;
-+
-+bool bch2_bkey_transform(const struct bkey_format *,
-+			 struct bkey_packed *,
-+			 const struct bkey_format *,
-+			 const struct bkey_packed *);
-+
-+struct bkey __bch2_bkey_unpack_key(const struct bkey_format *,
-+				   const struct bkey_packed *);
-+
-+#ifndef HAVE_BCACHEFS_COMPILED_UNPACK
-+struct bpos __bkey_unpack_pos(const struct bkey_format *,
-+			      const struct bkey_packed *);
-+#endif
-+
-+bool bch2_bkey_pack_key(struct bkey_packed *, const struct bkey *,
-+		   const struct bkey_format *);
-+
-+enum bkey_pack_pos_ret {
-+	BKEY_PACK_POS_EXACT,
-+	BKEY_PACK_POS_SMALLER,
-+	BKEY_PACK_POS_FAIL,
-+};
-+
-+enum bkey_pack_pos_ret bch2_bkey_pack_pos_lossy(struct bkey_packed *, struct bpos,
-+					   const struct btree *);
-+
-+static inline bool bkey_pack_pos(struct bkey_packed *out, struct bpos in,
-+				 const struct btree *b)
-+{
-+	return bch2_bkey_pack_pos_lossy(out, in, b) == BKEY_PACK_POS_EXACT;
-+}
-+
-+void bch2_bkey_unpack(const struct btree *, struct bkey_i *,
-+		 const struct bkey_packed *);
-+bool bch2_bkey_pack(struct bkey_packed *, const struct bkey_i *,
-+	       const struct bkey_format *);
-+
-+static inline u64 bkey_field_max(const struct bkey_format *f,
-+				 enum bch_bkey_fields nr)
-+{
-+	return f->bits_per_field[nr] < 64
-+		? (le64_to_cpu(f->field_offset[nr]) +
-+		   ~(~0ULL << f->bits_per_field[nr]))
-+		: U64_MAX;
-+}
-+
-+#ifdef HAVE_BCACHEFS_COMPILED_UNPACK
-+
-+int bch2_compile_bkey_format(const struct bkey_format *, void *);
-+
-+#else
-+
-+static inline int bch2_compile_bkey_format(const struct bkey_format *format,
-+					  void *out) { return 0; }
-+
-+#endif
-+
-+static inline void bkey_reassemble(struct bkey_i *dst,
-+				   struct bkey_s_c src)
-+{
-+	dst->k = *src.k;
-+	memcpy_u64s_small(&dst->v, src.v, bkey_val_u64s(src.k));
-+}
-+
-+#define bkey_s_null		((struct bkey_s)   { .k = NULL })
-+#define bkey_s_c_null		((struct bkey_s_c) { .k = NULL })
-+
-+#define bkey_s_err(err)		((struct bkey_s)   { .k = ERR_PTR(err) })
-+#define bkey_s_c_err(err)	((struct bkey_s_c) { .k = ERR_PTR(err) })
-+
-+static inline struct bkey_s bkey_to_s(struct bkey *k)
-+{
-+	return (struct bkey_s) { .k = k, .v = NULL };
-+}
-+
-+static inline struct bkey_s_c bkey_to_s_c(const struct bkey *k)
-+{
-+	return (struct bkey_s_c) { .k = k, .v = NULL };
-+}
-+
-+static inline struct bkey_s bkey_i_to_s(struct bkey_i *k)
-+{
-+	return (struct bkey_s) { .k = &k->k, .v = &k->v };
-+}
-+
-+static inline struct bkey_s_c bkey_i_to_s_c(const struct bkey_i *k)
-+{
-+	return (struct bkey_s_c) { .k = &k->k, .v = &k->v };
-+}
-+
-+/*
-+ * For a given type of value (e.g. struct bch_extent), generates the types for
-+ * bkey + bch_extent - inline, split, split const - and also all the conversion
-+ * functions, which also check that the value is of the correct type.
-+ *
-+ * We use anonymous unions for upcasting - e.g. converting from e.g. a
-+ * bkey_i_extent to a bkey_i - since that's always safe, instead of conversion
-+ * functions.
-+ */
-+#define BKEY_VAL_ACCESSORS(name)					\
-+struct bkey_i_##name {							\
-+	union {								\
-+		struct bkey		k;				\
-+		struct bkey_i		k_i;				\
-+	};								\
-+	struct bch_##name		v;				\
-+};									\
-+									\
-+struct bkey_s_c_##name {						\
-+	union {								\
-+	struct {							\
-+		const struct bkey	*k;				\
-+		const struct bch_##name	*v;				\
-+	};								\
-+	struct bkey_s_c			s_c;				\
-+	};								\
-+};									\
-+									\
-+struct bkey_s_##name {							\
-+	union {								\
-+	struct {							\
-+		struct bkey		*k;				\
-+		struct bch_##name	*v;				\
-+	};								\
-+	struct bkey_s_c_##name		c;				\
-+	struct bkey_s			s;				\
-+	struct bkey_s_c			s_c;				\
-+	};								\
-+};									\
-+									\
-+static inline struct bkey_i_##name *bkey_i_to_##name(struct bkey_i *k)	\
-+{									\
-+	EBUG_ON(k->k.type != KEY_TYPE_##name);				\
-+	return container_of(&k->k, struct bkey_i_##name, k);		\
-+}									\
-+									\
-+static inline const struct bkey_i_##name *				\
-+bkey_i_to_##name##_c(const struct bkey_i *k)				\
-+{									\
-+	EBUG_ON(k->k.type != KEY_TYPE_##name);				\
-+	return container_of(&k->k, struct bkey_i_##name, k);		\
-+}									\
-+									\
-+static inline struct bkey_s_##name bkey_s_to_##name(struct bkey_s k)	\
-+{									\
-+	EBUG_ON(k.k->type != KEY_TYPE_##name);				\
-+	return (struct bkey_s_##name) {					\
-+		.k = k.k,						\
-+		.v = container_of(k.v, struct bch_##name, v),		\
-+	};								\
-+}									\
-+									\
-+static inline struct bkey_s_c_##name bkey_s_c_to_##name(struct bkey_s_c k)\
-+{									\
-+	EBUG_ON(k.k->type != KEY_TYPE_##name);				\
-+	return (struct bkey_s_c_##name) {				\
-+		.k = k.k,						\
-+		.v = container_of(k.v, struct bch_##name, v),		\
-+	};								\
-+}									\
-+									\
-+static inline struct bkey_s_##name name##_i_to_s(struct bkey_i_##name *k)\
-+{									\
-+	return (struct bkey_s_##name) {					\
-+		.k = &k->k,						\
-+		.v = &k->v,						\
-+	};								\
-+}									\
-+									\
-+static inline struct bkey_s_c_##name					\
-+name##_i_to_s_c(const struct bkey_i_##name *k)				\
-+{									\
-+	return (struct bkey_s_c_##name) {				\
-+		.k = &k->k,						\
-+		.v = &k->v,						\
-+	};								\
-+}									\
-+									\
-+static inline struct bkey_s_##name bkey_i_to_s_##name(struct bkey_i *k)	\
-+{									\
-+	EBUG_ON(k->k.type != KEY_TYPE_##name);				\
-+	return (struct bkey_s_##name) {					\
-+		.k = &k->k,						\
-+		.v = container_of(&k->v, struct bch_##name, v),		\
-+	};								\
-+}									\
-+									\
-+static inline struct bkey_s_c_##name					\
-+bkey_i_to_s_c_##name(const struct bkey_i *k)				\
-+{									\
-+	EBUG_ON(k->k.type != KEY_TYPE_##name);				\
-+	return (struct bkey_s_c_##name) {				\
-+		.k = &k->k,						\
-+		.v = container_of(&k->v, struct bch_##name, v),		\
-+	};								\
-+}									\
-+									\
-+static inline struct bkey_i_##name *bkey_##name##_init(struct bkey_i *_k)\
-+{									\
-+	struct bkey_i_##name *k =					\
-+		container_of(&_k->k, struct bkey_i_##name, k);		\
-+									\
-+	bkey_init(&k->k);						\
-+	memset(&k->v, 0, sizeof(k->v));					\
-+	k->k.type = KEY_TYPE_##name;					\
-+	set_bkey_val_bytes(&k->k, sizeof(k->v));			\
-+									\
-+	return k;							\
-+}
-+
-+BKEY_VAL_ACCESSORS(cookie);
-+BKEY_VAL_ACCESSORS(btree_ptr);
-+BKEY_VAL_ACCESSORS(extent);
-+BKEY_VAL_ACCESSORS(reservation);
-+BKEY_VAL_ACCESSORS(inode);
-+BKEY_VAL_ACCESSORS(inode_generation);
-+BKEY_VAL_ACCESSORS(dirent);
-+BKEY_VAL_ACCESSORS(xattr);
-+BKEY_VAL_ACCESSORS(alloc);
-+BKEY_VAL_ACCESSORS(quota);
-+BKEY_VAL_ACCESSORS(stripe);
-+BKEY_VAL_ACCESSORS(reflink_p);
-+BKEY_VAL_ACCESSORS(reflink_v);
-+BKEY_VAL_ACCESSORS(inline_data);
-+BKEY_VAL_ACCESSORS(btree_ptr_v2);
-+
-+/* byte order helpers */
-+
-+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-+
-+static inline unsigned high_word_offset(const struct bkey_format *f)
-+{
-+	return f->key_u64s - 1;
-+}
-+
-+#define high_bit_offset		0
-+#define nth_word(p, n)		((p) - (n))
-+
-+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-+
-+static inline unsigned high_word_offset(const struct bkey_format *f)
-+{
-+	return 0;
-+}
-+
-+#define high_bit_offset		KEY_PACKED_BITS_START
-+#define nth_word(p, n)		((p) + (n))
-+
-+#else
-+#error edit for your odd byteorder.
-+#endif
-+
-+#define high_word(f, k)		((k)->_data + high_word_offset(f))
-+#define next_word(p)		nth_word(p, 1)
-+#define prev_word(p)		nth_word(p, -1)
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+void bch2_bkey_pack_test(void);
-+#else
-+static inline void bch2_bkey_pack_test(void) {}
-+#endif
-+
-+#endif /* _BCACHEFS_BKEY_H */
-diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c
-new file mode 100644
-index 000000000000..36e0c5152b47
---- /dev/null
-+++ b/fs/bcachefs/bkey_methods.c
-@@ -0,0 +1,353 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "bkey_methods.h"
-+#include "btree_types.h"
-+#include "alloc_background.h"
-+#include "dirent.h"
-+#include "ec.h"
-+#include "error.h"
-+#include "extents.h"
-+#include "inode.h"
-+#include "quota.h"
-+#include "reflink.h"
-+#include "xattr.h"
-+
-+const char * const bch2_bkey_types[] = {
-+#define x(name, nr) #name,
-+	BCH_BKEY_TYPES()
-+#undef x
-+	NULL
-+};
-+
-+static const char *deleted_key_invalid(const struct bch_fs *c,
-+					struct bkey_s_c k)
-+{
-+	return NULL;
-+}
-+
-+#define bch2_bkey_ops_deleted (struct bkey_ops) {	\
-+	.key_invalid = deleted_key_invalid,		\
-+}
-+
-+#define bch2_bkey_ops_discard (struct bkey_ops) {	\
-+	.key_invalid = deleted_key_invalid,		\
-+}
-+
-+static const char *empty_val_key_invalid(const struct bch_fs *c, struct bkey_s_c k)
-+{
-+	if (bkey_val_bytes(k.k))
-+		return "value size should be zero";
-+
-+	return NULL;
-+}
-+
-+#define bch2_bkey_ops_error (struct bkey_ops) {		\
-+	.key_invalid = empty_val_key_invalid,		\
-+}
-+
-+static const char *key_type_cookie_invalid(const struct bch_fs *c,
-+					   struct bkey_s_c k)
-+{
-+	if (bkey_val_bytes(k.k) != sizeof(struct bch_cookie))
-+		return "incorrect value size";
-+
-+	return NULL;
-+}
-+
-+#define bch2_bkey_ops_cookie (struct bkey_ops) {	\
-+	.key_invalid = key_type_cookie_invalid,		\
-+}
-+
-+#define bch2_bkey_ops_whiteout (struct bkey_ops) {	\
-+	.key_invalid = empty_val_key_invalid,		\
-+}
-+
-+static const char *key_type_inline_data_invalid(const struct bch_fs *c,
-+					   struct bkey_s_c k)
-+{
-+	return NULL;
-+}
-+
-+static void key_type_inline_data_to_text(struct printbuf *out, struct bch_fs *c,
-+					 struct bkey_s_c k)
-+{
-+	pr_buf(out, "(%zu bytes)", bkey_val_bytes(k.k));
-+}
-+
-+#define bch2_bkey_ops_inline_data (struct bkey_ops) {	\
-+	.key_invalid	= key_type_inline_data_invalid,	\
-+	.val_to_text	= key_type_inline_data_to_text,	\
-+}
-+
-+static const struct bkey_ops bch2_bkey_ops[] = {
-+#define x(name, nr) [KEY_TYPE_##name]	= bch2_bkey_ops_##name,
-+	BCH_BKEY_TYPES()
-+#undef x
-+};
-+
-+const char *bch2_bkey_val_invalid(struct bch_fs *c, struct bkey_s_c k)
-+{
-+	if (k.k->type >= KEY_TYPE_MAX)
-+		return "invalid type";
-+
-+	return bch2_bkey_ops[k.k->type].key_invalid(c, k);
-+}
-+
-+const char *__bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
-+				enum btree_node_type type)
-+{
-+	if (k.k->u64s < BKEY_U64s)
-+		return "u64s too small";
-+
-+	if (type == BKEY_TYPE_BTREE &&
-+	    bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX)
-+		return "value too big";
-+
-+	if (btree_node_type_is_extents(type)) {
-+		if ((k.k->size == 0) != bkey_deleted(k.k))
-+			return "bad size field";
-+
-+		if (k.k->size > k.k->p.offset)
-+			return "size greater than offset";
-+	} else {
-+		if (k.k->size)
-+			return "nonzero size field";
-+	}
-+
-+	if (k.k->p.snapshot)
-+		return "nonzero snapshot";
-+
-+	if (type != BKEY_TYPE_BTREE &&
-+	    !bkey_cmp(k.k->p, POS_MAX))
-+		return "POS_MAX key";
-+
-+	return NULL;
-+}
-+
-+const char *bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
-+			      enum btree_node_type type)
-+{
-+	return __bch2_bkey_invalid(c, k, type) ?:
-+		bch2_bkey_val_invalid(c, k);
-+}
-+
-+const char *bch2_bkey_in_btree_node(struct btree *b, struct bkey_s_c k)
-+{
-+	if (bkey_cmp(k.k->p, b->data->min_key) < 0)
-+		return "key before start of btree node";
-+
-+	if (bkey_cmp(k.k->p, b->data->max_key) > 0)
-+		return "key past end of btree node";
-+
-+	return NULL;
-+}
-+
-+void bch2_bkey_debugcheck(struct bch_fs *c, struct btree *b, struct bkey_s_c k)
-+{
-+	const struct bkey_ops *ops = &bch2_bkey_ops[k.k->type];
-+	const char *invalid;
-+
-+	BUG_ON(!k.k->u64s);
-+
-+	invalid = bch2_bkey_invalid(c, k, btree_node_type(b)) ?:
-+		bch2_bkey_in_btree_node(b, k);
-+	if (invalid) {
-+		char buf[160];
-+
-+		bch2_bkey_val_to_text(&PBUF(buf), c, k);
-+		bch2_fs_inconsistent(c, "invalid bkey %s: %s", buf, invalid);
-+		return;
-+	}
-+
-+	if (ops->key_debugcheck)
-+		ops->key_debugcheck(c, k);
-+}
-+
-+void bch2_bpos_to_text(struct printbuf *out, struct bpos pos)
-+{
-+	if (!bkey_cmp(pos, POS_MIN))
-+		pr_buf(out, "POS_MIN");
-+	else if (!bkey_cmp(pos, POS_MAX))
-+		pr_buf(out, "POS_MAX");
-+	else
-+		pr_buf(out, "%llu:%llu", pos.inode, pos.offset);
-+}
-+
-+void bch2_bkey_to_text(struct printbuf *out, const struct bkey *k)
-+{
-+	if (k) {
-+		pr_buf(out, "u64s %u type %s ", k->u64s,
-+		       bch2_bkey_types[k->type]);
-+
-+		bch2_bpos_to_text(out, k->p);
-+
-+		pr_buf(out, " snap %u len %u ver %llu",
-+		       k->p.snapshot, k->size, k->version.lo);
-+	} else {
-+		pr_buf(out, "(null)");
-+	}
-+}
-+
-+void bch2_val_to_text(struct printbuf *out, struct bch_fs *c,
-+		      struct bkey_s_c k)
-+{
-+	const struct bkey_ops *ops = &bch2_bkey_ops[k.k->type];
-+
-+	if (likely(ops->val_to_text))
-+		ops->val_to_text(out, c, k);
-+}
-+
-+void bch2_bkey_val_to_text(struct printbuf *out, struct bch_fs *c,
-+			   struct bkey_s_c k)
-+{
-+	bch2_bkey_to_text(out, k.k);
-+
-+	if (k.k) {
-+		pr_buf(out, ": ");
-+		bch2_val_to_text(out, c, k);
-+	}
-+}
-+
-+void bch2_bkey_swab_val(struct bkey_s k)
-+{
-+	const struct bkey_ops *ops = &bch2_bkey_ops[k.k->type];
-+
-+	if (ops->swab)
-+		ops->swab(k);
-+}
-+
-+bool bch2_bkey_normalize(struct bch_fs *c, struct bkey_s k)
-+{
-+	const struct bkey_ops *ops = &bch2_bkey_ops[k.k->type];
-+
-+	return ops->key_normalize
-+		? ops->key_normalize(c, k)
-+		: false;
-+}
-+
-+enum merge_result bch2_bkey_merge(struct bch_fs *c,
-+				  struct bkey_s l, struct bkey_s r)
-+{
-+	const struct bkey_ops *ops = &bch2_bkey_ops[l.k->type];
-+	enum merge_result ret;
-+
-+	if (key_merging_disabled(c) ||
-+	    !ops->key_merge ||
-+	    l.k->type != r.k->type ||
-+	    bversion_cmp(l.k->version, r.k->version) ||
-+	    bkey_cmp(l.k->p, bkey_start_pos(r.k)))
-+		return BCH_MERGE_NOMERGE;
-+
-+	ret = ops->key_merge(c, l, r);
-+
-+	if (ret != BCH_MERGE_NOMERGE)
-+		l.k->needs_whiteout |= r.k->needs_whiteout;
-+	return ret;
-+}
-+
-+static const struct old_bkey_type {
-+	u8		btree_node_type;
-+	u8		old;
-+	u8		new;
-+} bkey_renumber_table[] = {
-+	{BKEY_TYPE_BTREE,	128, KEY_TYPE_btree_ptr		},
-+	{BKEY_TYPE_EXTENTS,	128, KEY_TYPE_extent		},
-+	{BKEY_TYPE_EXTENTS,	129, KEY_TYPE_extent		},
-+	{BKEY_TYPE_EXTENTS,	130, KEY_TYPE_reservation	},
-+	{BKEY_TYPE_INODES,	128, KEY_TYPE_inode		},
-+	{BKEY_TYPE_INODES,	130, KEY_TYPE_inode_generation	},
-+	{BKEY_TYPE_DIRENTS,	128, KEY_TYPE_dirent		},
-+	{BKEY_TYPE_DIRENTS,	129, KEY_TYPE_whiteout		},
-+	{BKEY_TYPE_XATTRS,	128, KEY_TYPE_xattr		},
-+	{BKEY_TYPE_XATTRS,	129, KEY_TYPE_whiteout		},
-+	{BKEY_TYPE_ALLOC,	128, KEY_TYPE_alloc		},
-+	{BKEY_TYPE_QUOTAS,	128, KEY_TYPE_quota		},
-+};
-+
-+void bch2_bkey_renumber(enum btree_node_type btree_node_type,
-+			struct bkey_packed *k,
-+			int write)
-+{
-+	const struct old_bkey_type *i;
-+
-+	for (i = bkey_renumber_table;
-+	     i < bkey_renumber_table + ARRAY_SIZE(bkey_renumber_table);
-+	     i++)
-+		if (btree_node_type == i->btree_node_type &&
-+		    k->type == (write ? i->new : i->old)) {
-+			k->type = write ? i->old : i->new;
-+			break;
-+		}
-+}
-+
-+void __bch2_bkey_compat(unsigned level, enum btree_id btree_id,
-+			unsigned version, unsigned big_endian,
-+			int write,
-+			struct bkey_format *f,
-+			struct bkey_packed *k)
-+{
-+	const struct bkey_ops *ops;
-+	struct bkey uk;
-+	struct bkey_s u;
-+	int i;
-+
-+	/*
-+	 * Do these operations in reverse order in the write path:
-+	 */
-+
-+	for (i = 0; i < 4; i++)
-+	switch (!write ? i : 3 - i) {
-+	case 0:
-+		if (big_endian != CPU_BIG_ENDIAN)
-+			bch2_bkey_swab_key(f, k);
-+		break;
-+	case 1:
-+		if (version < bcachefs_metadata_version_bkey_renumber)
-+			bch2_bkey_renumber(__btree_node_type(level, btree_id), k, write);
-+		break;
-+	case 2:
-+		if (version < bcachefs_metadata_version_inode_btree_change &&
-+		    btree_id == BTREE_ID_INODES) {
-+			if (!bkey_packed(k)) {
-+				struct bkey_i *u = packed_to_bkey(k);
-+				swap(u->k.p.inode, u->k.p.offset);
-+			} else if (f->bits_per_field[BKEY_FIELD_INODE] &&
-+				   f->bits_per_field[BKEY_FIELD_OFFSET]) {
-+				struct bkey_format tmp = *f, *in = f, *out = &tmp;
-+
-+				swap(tmp.bits_per_field[BKEY_FIELD_INODE],
-+				     tmp.bits_per_field[BKEY_FIELD_OFFSET]);
-+				swap(tmp.field_offset[BKEY_FIELD_INODE],
-+				     tmp.field_offset[BKEY_FIELD_OFFSET]);
-+
-+				if (!write)
-+					swap(in, out);
-+
-+				uk = __bch2_bkey_unpack_key(in, k);
-+				swap(uk.p.inode, uk.p.offset);
-+				BUG_ON(!bch2_bkey_pack_key(k, &uk, out));
-+			}
-+		}
-+		break;
-+	case 3:
-+		if (!bkey_packed(k)) {
-+			u = bkey_i_to_s(packed_to_bkey(k));
-+		} else {
-+			uk = __bch2_bkey_unpack_key(f, k);
-+			u.k = &uk;
-+			u.v = bkeyp_val(f, k);
-+		}
-+
-+		if (big_endian != CPU_BIG_ENDIAN)
-+			bch2_bkey_swab_val(u);
-+
-+		ops = &bch2_bkey_ops[k->type];
-+
-+		if (ops->compat)
-+			ops->compat(btree_id, version, big_endian, write, u);
-+		break;
-+	default:
-+		BUG();
-+	}
-+}
-diff --git a/fs/bcachefs/bkey_methods.h b/fs/bcachefs/bkey_methods.h
-new file mode 100644
-index 000000000000..0bca725ae3b8
---- /dev/null
-+++ b/fs/bcachefs/bkey_methods.h
-@@ -0,0 +1,82 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BKEY_METHODS_H
-+#define _BCACHEFS_BKEY_METHODS_H
-+
-+#include "bkey.h"
-+
-+struct bch_fs;
-+struct btree;
-+struct bkey;
-+enum btree_node_type;
-+
-+extern const char * const bch2_bkey_types[];
-+
-+enum merge_result {
-+	BCH_MERGE_NOMERGE,
-+
-+	/*
-+	 * The keys were mergeable, but would have overflowed size - so instead
-+	 * l was changed to the maximum size, and both keys were modified:
-+	 */
-+	BCH_MERGE_PARTIAL,
-+	BCH_MERGE_MERGE,
-+};
-+
-+struct bkey_ops {
-+	/* Returns reason for being invalid if invalid, else NULL: */
-+	const char *	(*key_invalid)(const struct bch_fs *,
-+				       struct bkey_s_c);
-+	void		(*key_debugcheck)(struct bch_fs *, struct bkey_s_c);
-+	void		(*val_to_text)(struct printbuf *, struct bch_fs *,
-+				       struct bkey_s_c);
-+	void		(*swab)(struct bkey_s);
-+	bool		(*key_normalize)(struct bch_fs *, struct bkey_s);
-+	enum merge_result (*key_merge)(struct bch_fs *,
-+				       struct bkey_s, struct bkey_s);
-+	void		(*compat)(enum btree_id id, unsigned version,
-+				  unsigned big_endian, int write,
-+				  struct bkey_s);
-+};
-+
-+const char *bch2_bkey_val_invalid(struct bch_fs *, struct bkey_s_c);
-+const char *__bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c,
-+				enum btree_node_type);
-+const char *bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c,
-+			      enum btree_node_type);
-+const char *bch2_bkey_in_btree_node(struct btree *, struct bkey_s_c);
-+
-+void bch2_bkey_debugcheck(struct bch_fs *, struct btree *, struct bkey_s_c);
-+
-+void bch2_bpos_to_text(struct printbuf *, struct bpos);
-+void bch2_bkey_to_text(struct printbuf *, const struct bkey *);
-+void bch2_val_to_text(struct printbuf *, struct bch_fs *,
-+		      struct bkey_s_c);
-+void bch2_bkey_val_to_text(struct printbuf *, struct bch_fs *,
-+			   struct bkey_s_c);
-+
-+void bch2_bkey_swab_val(struct bkey_s);
-+
-+bool bch2_bkey_normalize(struct bch_fs *, struct bkey_s);
-+
-+enum merge_result bch2_bkey_merge(struct bch_fs *,
-+				  struct bkey_s, struct bkey_s);
-+
-+void bch2_bkey_renumber(enum btree_node_type, struct bkey_packed *, int);
-+
-+void __bch2_bkey_compat(unsigned, enum btree_id, unsigned, unsigned,
-+			int, struct bkey_format *, struct bkey_packed *);
-+
-+static inline void bch2_bkey_compat(unsigned level, enum btree_id btree_id,
-+			       unsigned version, unsigned big_endian,
-+			       int write,
-+			       struct bkey_format *f,
-+			       struct bkey_packed *k)
-+{
-+	if (version < bcachefs_metadata_version_current ||
-+	    big_endian != CPU_BIG_ENDIAN)
-+		__bch2_bkey_compat(level, btree_id, version,
-+				   big_endian, write, f, k);
-+
-+}
-+
-+#endif /* _BCACHEFS_BKEY_METHODS_H */
-diff --git a/fs/bcachefs/bkey_on_stack.h b/fs/bcachefs/bkey_on_stack.h
-new file mode 100644
-index 000000000000..f607a0cb37ed
---- /dev/null
-+++ b/fs/bcachefs/bkey_on_stack.h
-@@ -0,0 +1,43 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BKEY_ON_STACK_H
-+#define _BCACHEFS_BKEY_ON_STACK_H
-+
-+#include "bcachefs.h"
-+
-+struct bkey_on_stack {
-+	struct bkey_i	*k;
-+	u64		onstack[12];
-+};
-+
-+static inline void bkey_on_stack_realloc(struct bkey_on_stack *s,
-+					 struct bch_fs *c, unsigned u64s)
-+{
-+	if (s->k == (void *) s->onstack &&
-+	    u64s > ARRAY_SIZE(s->onstack)) {
-+		s->k = mempool_alloc(&c->large_bkey_pool, GFP_NOFS);
-+		memcpy(s->k, s->onstack, sizeof(s->onstack));
-+	}
-+}
-+
-+static inline void bkey_on_stack_reassemble(struct bkey_on_stack *s,
-+					    struct bch_fs *c,
-+					    struct bkey_s_c k)
-+{
-+	bkey_on_stack_realloc(s, c, k.k->u64s);
-+	bkey_reassemble(s->k, k);
-+}
-+
-+static inline void bkey_on_stack_init(struct bkey_on_stack *s)
-+{
-+	s->k = (void *) s->onstack;
-+}
-+
-+static inline void bkey_on_stack_exit(struct bkey_on_stack *s,
-+				      struct bch_fs *c)
-+{
-+	if (s->k != (void *) s->onstack)
-+		mempool_free(s->k, &c->large_bkey_pool);
-+	s->k = NULL;
-+}
-+
-+#endif /* _BCACHEFS_BKEY_ON_STACK_H */
-diff --git a/fs/bcachefs/bkey_sort.c b/fs/bcachefs/bkey_sort.c
-new file mode 100644
-index 000000000000..839e78d1dc35
---- /dev/null
-+++ b/fs/bcachefs/bkey_sort.c
-@@ -0,0 +1,515 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "bkey_on_stack.h"
-+#include "bkey_sort.h"
-+#include "bset.h"
-+#include "extents.h"
-+
-+typedef int (*sort_cmp_fn)(struct btree *,
-+			   struct bkey_packed *,
-+			   struct bkey_packed *);
-+
-+static inline bool sort_iter_end(struct sort_iter *iter)
-+{
-+	return !iter->used;
-+}
-+
-+static inline void __sort_iter_sift(struct sort_iter *iter,
-+				    unsigned from,
-+				    sort_cmp_fn cmp)
-+{
-+	unsigned i;
-+
-+	for (i = from;
-+	     i + 1 < iter->used &&
-+	     cmp(iter->b, iter->data[i].k, iter->data[i + 1].k) > 0;
-+	     i++)
-+		swap(iter->data[i], iter->data[i + 1]);
-+}
-+
-+static inline void sort_iter_sift(struct sort_iter *iter, sort_cmp_fn cmp)
-+{
-+
-+	__sort_iter_sift(iter, 0, cmp);
-+}
-+
-+static inline void sort_iter_sort(struct sort_iter *iter, sort_cmp_fn cmp)
-+{
-+	unsigned i = iter->used;
-+
-+	while (i--)
-+		__sort_iter_sift(iter, i, cmp);
-+}
-+
-+static inline struct bkey_packed *sort_iter_peek(struct sort_iter *iter)
-+{
-+	return !sort_iter_end(iter) ? iter->data->k : NULL;
-+}
-+
-+static inline void __sort_iter_advance(struct sort_iter *iter,
-+				       unsigned idx, sort_cmp_fn cmp)
-+{
-+	struct sort_iter_set *i = iter->data + idx;
-+
-+	BUG_ON(idx >= iter->used);
-+
-+	i->k = bkey_next_skip_noops(i->k, i->end);
-+
-+	BUG_ON(i->k > i->end);
-+
-+	if (i->k == i->end)
-+		array_remove_item(iter->data, iter->used, idx);
-+	else
-+		__sort_iter_sift(iter, idx, cmp);
-+}
-+
-+static inline void sort_iter_advance(struct sort_iter *iter, sort_cmp_fn cmp)
-+{
-+	__sort_iter_advance(iter, 0, cmp);
-+}
-+
-+static inline struct bkey_packed *sort_iter_next(struct sort_iter *iter,
-+						 sort_cmp_fn cmp)
-+{
-+	struct bkey_packed *ret = sort_iter_peek(iter);
-+
-+	if (ret)
-+		sort_iter_advance(iter, cmp);
-+
-+	return ret;
-+}
-+
-+/*
-+ * If keys compare equal, compare by pointer order:
-+ */
-+static inline int key_sort_fix_overlapping_cmp(struct btree *b,
-+					       struct bkey_packed *l,
-+					       struct bkey_packed *r)
-+{
-+	return bkey_cmp_packed(b, l, r) ?:
-+		cmp_int((unsigned long) l, (unsigned long) r);
-+}
-+
-+static inline bool should_drop_next_key(struct sort_iter *iter)
-+{
-+	/*
-+	 * key_sort_cmp() ensures that when keys compare equal the older key
-+	 * comes first; so if l->k compares equal to r->k then l->k is older
-+	 * and should be dropped.
-+	 */
-+	return iter->used >= 2 &&
-+		!bkey_cmp_packed(iter->b,
-+				 iter->data[0].k,
-+				 iter->data[1].k);
-+}
-+
-+struct btree_nr_keys
-+bch2_key_sort_fix_overlapping(struct bch_fs *c, struct bset *dst,
-+			      struct sort_iter *iter)
-+{
-+	struct bkey_packed *out = dst->start;
-+	struct bkey_packed *k;
-+	struct btree_nr_keys nr;
-+
-+	memset(&nr, 0, sizeof(nr));
-+
-+	sort_iter_sort(iter, key_sort_fix_overlapping_cmp);
-+
-+	while ((k = sort_iter_peek(iter))) {
-+		if (!bkey_whiteout(k) &&
-+		    !should_drop_next_key(iter)) {
-+			bkey_copy(out, k);
-+			btree_keys_account_key_add(&nr, 0, out);
-+			out = bkey_next(out);
-+		}
-+
-+		sort_iter_advance(iter, key_sort_fix_overlapping_cmp);
-+	}
-+
-+	dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
-+	return nr;
-+}
-+
-+static void extent_sort_append(struct bch_fs *c,
-+			       struct bkey_format *f,
-+			       struct btree_nr_keys *nr,
-+			       struct bkey_packed **out,
-+			       struct bkey_s k)
-+{
-+	if (!bkey_whiteout(k.k)) {
-+		if (!bch2_bkey_pack_key(*out, k.k, f))
-+			memcpy_u64s_small(*out, k.k, BKEY_U64s);
-+
-+		memcpy_u64s_small(bkeyp_val(f, *out), k.v, bkey_val_u64s(k.k));
-+
-+		btree_keys_account_key_add(nr, 0, *out);
-+		*out = bkey_next(*out);
-+	}
-+}
-+
-+/* Sort + repack in a new format: */
-+struct btree_nr_keys
-+bch2_sort_repack(struct bset *dst, struct btree *src,
-+		 struct btree_node_iter *src_iter,
-+		 struct bkey_format *out_f,
-+		 bool filter_whiteouts)
-+{
-+	struct bkey_format *in_f = &src->format;
-+	struct bkey_packed *in, *out = vstruct_last(dst);
-+	struct btree_nr_keys nr;
-+
-+	memset(&nr, 0, sizeof(nr));
-+
-+	while ((in = bch2_btree_node_iter_next_all(src_iter, src))) {
-+		if (filter_whiteouts && bkey_whiteout(in))
-+			continue;
-+
-+		if (bch2_bkey_transform(out_f, out, bkey_packed(in)
-+				       ? in_f : &bch2_bkey_format_current, in))
-+			out->format = KEY_FORMAT_LOCAL_BTREE;
-+		else
-+			bch2_bkey_unpack(src, (void *) out, in);
-+
-+		btree_keys_account_key_add(&nr, 0, out);
-+		out = bkey_next(out);
-+	}
-+
-+	dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
-+	return nr;
-+}
-+
-+/* Sort, repack, and call bch2_bkey_normalize() to drop stale pointers: */
-+struct btree_nr_keys
-+bch2_sort_repack_merge(struct bch_fs *c,
-+		       struct bset *dst, struct btree *src,
-+		       struct btree_node_iter *iter,
-+		       struct bkey_format *out_f,
-+		       bool filter_whiteouts)
-+{
-+	struct bkey_packed *out = vstruct_last(dst), *k_packed;
-+	struct bkey_on_stack k;
-+	struct btree_nr_keys nr;
-+
-+	memset(&nr, 0, sizeof(nr));
-+	bkey_on_stack_init(&k);
-+
-+	while ((k_packed = bch2_btree_node_iter_next_all(iter, src))) {
-+		if (filter_whiteouts && bkey_whiteout(k_packed))
-+			continue;
-+
-+		/*
-+		 * NOTE:
-+		 * bch2_bkey_normalize may modify the key we pass it (dropping
-+		 * stale pointers) and we don't have a write lock on the src
-+		 * node; we have to make a copy of the entire key before calling
-+		 * normalize
-+		 */
-+		bkey_on_stack_realloc(&k, c, k_packed->u64s + BKEY_U64s);
-+		bch2_bkey_unpack(src, k.k, k_packed);
-+
-+		if (filter_whiteouts &&
-+		    bch2_bkey_normalize(c, bkey_i_to_s(k.k)))
-+			continue;
-+
-+		extent_sort_append(c, out_f, &nr, &out, bkey_i_to_s(k.k));
-+	}
-+
-+	dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
-+	bkey_on_stack_exit(&k, c);
-+	return nr;
-+}
-+
-+static inline int sort_keys_cmp(struct btree *b,
-+				struct bkey_packed *l,
-+				struct bkey_packed *r)
-+{
-+	return bkey_cmp_packed(b, l, r) ?:
-+		(int) bkey_deleted(r) - (int) bkey_deleted(l) ?:
-+		(int) l->needs_whiteout - (int) r->needs_whiteout;
-+}
-+
-+unsigned bch2_sort_keys(struct bkey_packed *dst,
-+			struct sort_iter *iter,
-+			bool filter_whiteouts)
-+{
-+	const struct bkey_format *f = &iter->b->format;
-+	struct bkey_packed *in, *next, *out = dst;
-+
-+	sort_iter_sort(iter, sort_keys_cmp);
-+
-+	while ((in = sort_iter_next(iter, sort_keys_cmp))) {
-+		bool needs_whiteout = false;
-+
-+		if (bkey_whiteout(in) &&
-+		    (filter_whiteouts || !in->needs_whiteout))
-+			continue;
-+
-+		while ((next = sort_iter_peek(iter)) &&
-+		       !bkey_cmp_packed(iter->b, in, next)) {
-+			BUG_ON(in->needs_whiteout &&
-+			       next->needs_whiteout);
-+			needs_whiteout |= in->needs_whiteout;
-+			in = sort_iter_next(iter, sort_keys_cmp);
-+		}
-+
-+		if (bkey_whiteout(in)) {
-+			memcpy_u64s(out, in, bkeyp_key_u64s(f, in));
-+			set_bkeyp_val_u64s(f, out, 0);
-+		} else {
-+			bkey_copy(out, in);
-+		}
-+		out->needs_whiteout |= needs_whiteout;
-+		out = bkey_next(out);
-+	}
-+
-+	return (u64 *) out - (u64 *) dst;
-+}
-+
-+/* Compat code for btree_node_old_extent_overwrite: */
-+
-+/*
-+ * If keys compare equal, compare by pointer order:
-+ *
-+ * Necessary for sort_fix_overlapping() - if there are multiple keys that
-+ * compare equal in different sets, we have to process them newest to oldest.
-+ */
-+static inline int extent_sort_fix_overlapping_cmp(struct btree *b,
-+						  struct bkey_packed *l,
-+						  struct bkey_packed *r)
-+{
-+	struct bkey ul = bkey_unpack_key(b, l);
-+	struct bkey ur = bkey_unpack_key(b, r);
-+
-+	return bkey_cmp(bkey_start_pos(&ul),
-+			bkey_start_pos(&ur)) ?:
-+		cmp_int((unsigned long) r, (unsigned long) l);
-+}
-+
-+/*
-+ * The algorithm in extent_sort_fix_overlapping() relies on keys in the same
-+ * bset being ordered by start offset - but 0 size whiteouts (which are always
-+ * KEY_TYPE_deleted) break this ordering, so we need to skip over them:
-+ */
-+static void extent_iter_advance(struct sort_iter *iter, unsigned idx)
-+{
-+	struct sort_iter_set *i = iter->data + idx;
-+
-+	do {
-+		i->k = bkey_next_skip_noops(i->k, i->end);
-+	} while (i->k != i->end && bkey_deleted(i->k));
-+
-+	if (i->k == i->end)
-+		array_remove_item(iter->data, iter->used, idx);
-+	else
-+		__sort_iter_sift(iter, idx, extent_sort_fix_overlapping_cmp);
-+}
-+
-+struct btree_nr_keys
-+bch2_extent_sort_fix_overlapping(struct bch_fs *c, struct bset *dst,
-+				 struct sort_iter *iter)
-+{
-+	struct btree *b = iter->b;
-+	struct bkey_format *f = &b->format;
-+	struct sort_iter_set *_l = iter->data, *_r = iter->data + 1;
-+	struct bkey_packed *out = dst->start;
-+	struct bkey l_unpacked, r_unpacked;
-+	struct bkey_s l, r;
-+	struct btree_nr_keys nr;
-+	struct bkey_on_stack split;
-+	unsigned i;
-+
-+	memset(&nr, 0, sizeof(nr));
-+	bkey_on_stack_init(&split);
-+
-+	sort_iter_sort(iter, extent_sort_fix_overlapping_cmp);
-+	for (i = 0; i < iter->used;) {
-+		if (bkey_deleted(iter->data[i].k))
-+			__sort_iter_advance(iter, i,
-+					    extent_sort_fix_overlapping_cmp);
-+		else
-+			i++;
-+	}
-+
-+	while (!sort_iter_end(iter)) {
-+		l = __bkey_disassemble(b, _l->k, &l_unpacked);
-+
-+		if (iter->used == 1) {
-+			extent_sort_append(c, f, &nr, &out, l);
-+			extent_iter_advance(iter, 0);
-+			continue;
-+		}
-+
-+		r = __bkey_disassemble(b, _r->k, &r_unpacked);
-+
-+		/* If current key and next key don't overlap, just append */
-+		if (bkey_cmp(l.k->p, bkey_start_pos(r.k)) <= 0) {
-+			extent_sort_append(c, f, &nr, &out, l);
-+			extent_iter_advance(iter, 0);
-+			continue;
-+		}
-+
-+		/* Skip 0 size keys */
-+		if (!r.k->size) {
-+			extent_iter_advance(iter, 1);
-+			continue;
-+		}
-+
-+		/*
-+		 * overlap: keep the newer key and trim the older key so they
-+		 * don't overlap. comparing pointers tells us which one is
-+		 * newer, since the bsets are appended one after the other.
-+		 */
-+
-+		/* can't happen because of comparison func */
-+		BUG_ON(_l->k < _r->k &&
-+		       !bkey_cmp(bkey_start_pos(l.k), bkey_start_pos(r.k)));
-+
-+		if (_l->k > _r->k) {
-+			/* l wins, trim r */
-+			if (bkey_cmp(l.k->p, r.k->p) >= 0) {
-+				extent_iter_advance(iter, 1);
-+			} else {
-+				bch2_cut_front_s(l.k->p, r);
-+				extent_save(b, _r->k, r.k);
-+				__sort_iter_sift(iter, 1,
-+					 extent_sort_fix_overlapping_cmp);
-+			}
-+		} else if (bkey_cmp(l.k->p, r.k->p) > 0) {
-+
-+			/*
-+			 * r wins, but it overlaps in the middle of l - split l:
-+			 */
-+			bkey_on_stack_reassemble(&split, c, l.s_c);
-+			bch2_cut_back(bkey_start_pos(r.k), split.k);
-+
-+			bch2_cut_front_s(r.k->p, l);
-+			extent_save(b, _l->k, l.k);
-+
-+			__sort_iter_sift(iter, 0,
-+					 extent_sort_fix_overlapping_cmp);
-+
-+			extent_sort_append(c, f, &nr, &out,
-+					   bkey_i_to_s(split.k));
-+		} else {
-+			bch2_cut_back_s(bkey_start_pos(r.k), l);
-+			extent_save(b, _l->k, l.k);
-+		}
-+	}
-+
-+	dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
-+
-+	bkey_on_stack_exit(&split, c);
-+	return nr;
-+}
-+
-+static inline int sort_extents_cmp(struct btree *b,
-+				   struct bkey_packed *l,
-+				   struct bkey_packed *r)
-+{
-+	return bkey_cmp_packed(b, l, r) ?:
-+		(int) bkey_deleted(l) - (int) bkey_deleted(r);
-+}
-+
-+unsigned bch2_sort_extents(struct bkey_packed *dst,
-+			   struct sort_iter *iter,
-+			   bool filter_whiteouts)
-+{
-+	struct bkey_packed *in, *out = dst;
-+
-+	sort_iter_sort(iter, sort_extents_cmp);
-+
-+	while ((in = sort_iter_next(iter, sort_extents_cmp))) {
-+		if (bkey_deleted(in))
-+			continue;
-+
-+		if (bkey_whiteout(in) &&
-+		    (filter_whiteouts || !in->needs_whiteout))
-+			continue;
-+
-+		bkey_copy(out, in);
-+		out = bkey_next(out);
-+	}
-+
-+	return (u64 *) out - (u64 *) dst;
-+}
-+
-+static inline int sort_extent_whiteouts_cmp(struct btree *b,
-+					    struct bkey_packed *l,
-+					    struct bkey_packed *r)
-+{
-+	struct bkey ul = bkey_unpack_key(b, l);
-+	struct bkey ur = bkey_unpack_key(b, r);
-+
-+	return bkey_cmp(bkey_start_pos(&ul), bkey_start_pos(&ur));
-+}
-+
-+unsigned bch2_sort_extent_whiteouts(struct bkey_packed *dst,
-+				    struct sort_iter *iter)
-+{
-+	const struct bkey_format *f = &iter->b->format;
-+	struct bkey_packed *in, *out = dst;
-+	struct bkey_i l, r;
-+	bool prev = false, l_packed = false;
-+	u64 max_packed_size	= bkey_field_max(f, BKEY_FIELD_SIZE);
-+	u64 max_packed_offset	= bkey_field_max(f, BKEY_FIELD_OFFSET);
-+	u64 new_size;
-+
-+	max_packed_size = min_t(u64, max_packed_size, KEY_SIZE_MAX);
-+
-+	sort_iter_sort(iter, sort_extent_whiteouts_cmp);
-+
-+	while ((in = sort_iter_next(iter, sort_extent_whiteouts_cmp))) {
-+		if (bkey_deleted(in))
-+			continue;
-+
-+		EBUG_ON(bkeyp_val_u64s(f, in));
-+		EBUG_ON(in->type != KEY_TYPE_discard);
-+
-+		r.k = bkey_unpack_key(iter->b, in);
-+
-+		if (prev &&
-+		    bkey_cmp(l.k.p, bkey_start_pos(&r.k)) >= 0) {
-+			if (bkey_cmp(l.k.p, r.k.p) >= 0)
-+				continue;
-+
-+			new_size = l_packed
-+				? min(max_packed_size, max_packed_offset -
-+				      bkey_start_offset(&l.k))
-+				: KEY_SIZE_MAX;
-+
-+			new_size = min(new_size, r.k.p.offset -
-+				       bkey_start_offset(&l.k));
-+
-+			BUG_ON(new_size < l.k.size);
-+
-+			bch2_key_resize(&l.k, new_size);
-+
-+			if (bkey_cmp(l.k.p, r.k.p) >= 0)
-+				continue;
-+
-+			bch2_cut_front(l.k.p, &r);
-+		}
-+
-+		if (prev) {
-+			if (!bch2_bkey_pack(out, &l, f)) {
-+				BUG_ON(l_packed);
-+				bkey_copy(out, &l);
-+			}
-+			out = bkey_next(out);
-+		}
-+
-+		l = r;
-+		prev = true;
-+		l_packed = bkey_packed(in);
-+	}
-+
-+	if (prev) {
-+		if (!bch2_bkey_pack(out, &l, f)) {
-+			BUG_ON(l_packed);
-+			bkey_copy(out, &l);
-+		}
-+		out = bkey_next(out);
-+	}
-+
-+	return (u64 *) out - (u64 *) dst;
-+}
-diff --git a/fs/bcachefs/bkey_sort.h b/fs/bcachefs/bkey_sort.h
-new file mode 100644
-index 000000000000..458a051fdac5
---- /dev/null
-+++ b/fs/bcachefs/bkey_sort.h
-@@ -0,0 +1,57 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BKEY_SORT_H
-+#define _BCACHEFS_BKEY_SORT_H
-+
-+struct sort_iter {
-+	struct btree		*b;
-+	unsigned		used;
-+	unsigned		size;
-+
-+	struct sort_iter_set {
-+		struct bkey_packed *k, *end;
-+	} data[MAX_BSETS + 1];
-+};
-+
-+static inline void sort_iter_init(struct sort_iter *iter, struct btree *b)
-+{
-+	iter->b = b;
-+	iter->used = 0;
-+	iter->size = ARRAY_SIZE(iter->data);
-+}
-+
-+static inline void sort_iter_add(struct sort_iter *iter,
-+				 struct bkey_packed *k,
-+				 struct bkey_packed *end)
-+{
-+	BUG_ON(iter->used >= iter->size);
-+
-+	if (k != end)
-+		iter->data[iter->used++] = (struct sort_iter_set) { k, end };
-+}
-+
-+struct btree_nr_keys
-+bch2_key_sort_fix_overlapping(struct bch_fs *, struct bset *,
-+			      struct sort_iter *);
-+struct btree_nr_keys
-+bch2_extent_sort_fix_overlapping(struct bch_fs *, struct bset *,
-+				 struct sort_iter *);
-+
-+struct btree_nr_keys
-+bch2_sort_repack(struct bset *, struct btree *,
-+		 struct btree_node_iter *,
-+		 struct bkey_format *, bool);
-+struct btree_nr_keys
-+bch2_sort_repack_merge(struct bch_fs *,
-+		       struct bset *, struct btree *,
-+		       struct btree_node_iter *,
-+		       struct bkey_format *, bool);
-+
-+unsigned bch2_sort_keys(struct bkey_packed *,
-+			struct sort_iter *, bool);
-+unsigned bch2_sort_extents(struct bkey_packed *,
-+			   struct sort_iter *, bool);
-+
-+unsigned bch2_sort_extent_whiteouts(struct bkey_packed *,
-+				    struct sort_iter *);
-+
-+#endif /* _BCACHEFS_BKEY_SORT_H */
-diff --git a/fs/bcachefs/bset.c b/fs/bcachefs/bset.c
-new file mode 100644
-index 000000000000..f7c2841ed8a7
---- /dev/null
-+++ b/fs/bcachefs/bset.c
-@@ -0,0 +1,1742 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Code for working with individual keys, and sorted sets of keys with in a
-+ * btree node
-+ *
-+ * Copyright 2012 Google, Inc.
-+ */
-+
-+#include "bcachefs.h"
-+#include "btree_cache.h"
-+#include "bset.h"
-+#include "eytzinger.h"
-+#include "util.h"
-+
-+#include <asm/unaligned.h>
-+#include <linux/console.h>
-+#include <linux/random.h>
-+#include <linux/prefetch.h>
-+
-+/* hack.. */
-+#include "alloc_types.h"
-+#include <trace/events/bcachefs.h>
-+
-+static inline void __bch2_btree_node_iter_advance(struct btree_node_iter *,
-+						  struct btree *);
-+
-+static inline unsigned __btree_node_iter_used(struct btree_node_iter *iter)
-+{
-+	unsigned n = ARRAY_SIZE(iter->data);
-+
-+	while (n && __btree_node_iter_set_end(iter, n - 1))
-+		--n;
-+
-+	return n;
-+}
-+
-+struct bset_tree *bch2_bkey_to_bset(struct btree *b, struct bkey_packed *k)
-+{
-+	unsigned offset = __btree_node_key_to_offset(b, k);
-+	struct bset_tree *t;
-+
-+	for_each_bset(b, t)
-+		if (offset <= t->end_offset) {
-+			EBUG_ON(offset < btree_bkey_first_offset(t));
-+			return t;
-+		}
-+
-+	BUG();
-+}
-+
-+/*
-+ * There are never duplicate live keys in the btree - but including keys that
-+ * have been flagged as deleted (and will be cleaned up later) we _will_ see
-+ * duplicates.
-+ *
-+ * Thus the sort order is: usual key comparison first, but for keys that compare
-+ * equal the deleted key(s) come first, and the (at most one) live version comes
-+ * last.
-+ *
-+ * The main reason for this is insertion: to handle overwrites, we first iterate
-+ * over keys that compare equal to our insert key, and then insert immediately
-+ * prior to the first key greater than the key we're inserting - our insert
-+ * position will be after all keys that compare equal to our insert key, which
-+ * by the time we actually do the insert will all be deleted.
-+ */
-+
-+void bch2_dump_bset(struct bch_fs *c, struct btree *b,
-+		    struct bset *i, unsigned set)
-+{
-+	struct bkey_packed *_k, *_n;
-+	struct bkey uk, n;
-+	struct bkey_s_c k;
-+	char buf[200];
-+
-+	if (!i->u64s)
-+		return;
-+
-+	for (_k = i->start;
-+	     _k < vstruct_last(i);
-+	     _k = _n) {
-+		_n = bkey_next_skip_noops(_k, vstruct_last(i));
-+
-+		k = bkey_disassemble(b, _k, &uk);
-+		if (c)
-+			bch2_bkey_val_to_text(&PBUF(buf), c, k);
-+		else
-+			bch2_bkey_to_text(&PBUF(buf), k.k);
-+		printk(KERN_ERR "block %u key %5zu: %s\n", set,
-+		       _k->_data - i->_data, buf);
-+
-+		if (_n == vstruct_last(i))
-+			continue;
-+
-+		n = bkey_unpack_key(b, _n);
-+
-+		if (bkey_cmp(bkey_start_pos(&n), k.k->p) < 0) {
-+			printk(KERN_ERR "Key skipped backwards\n");
-+			continue;
-+		}
-+
-+		if (!bkey_deleted(k.k) &&
-+		    !bkey_cmp(n.p, k.k->p))
-+			printk(KERN_ERR "Duplicate keys\n");
-+	}
-+}
-+
-+void bch2_dump_btree_node(struct bch_fs *c, struct btree *b)
-+{
-+	struct bset_tree *t;
-+
-+	console_lock();
-+	for_each_bset(b, t)
-+		bch2_dump_bset(c, b, bset(b, t), t - b->set);
-+	console_unlock();
-+}
-+
-+void bch2_dump_btree_node_iter(struct btree *b,
-+			      struct btree_node_iter *iter)
-+{
-+	struct btree_node_iter_set *set;
-+
-+	printk(KERN_ERR "btree node iter with %u/%u sets:\n",
-+	       __btree_node_iter_used(iter), b->nsets);
-+
-+	btree_node_iter_for_each(iter, set) {
-+		struct bkey_packed *k = __btree_node_offset_to_key(b, set->k);
-+		struct bset_tree *t = bch2_bkey_to_bset(b, k);
-+		struct bkey uk = bkey_unpack_key(b, k);
-+		char buf[100];
-+
-+		bch2_bkey_to_text(&PBUF(buf), &uk);
-+		printk(KERN_ERR "set %zu key %u: %s\n",
-+		       t - b->set, set->k, buf);
-+	}
-+}
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+
-+void __bch2_verify_btree_nr_keys(struct btree *b)
-+{
-+	struct bset_tree *t;
-+	struct bkey_packed *k;
-+	struct btree_nr_keys nr = { 0 };
-+
-+	for_each_bset(b, t)
-+		bset_tree_for_each_key(b, t, k)
-+			if (!bkey_whiteout(k))
-+				btree_keys_account_key_add(&nr, t - b->set, k);
-+
-+	BUG_ON(memcmp(&nr, &b->nr, sizeof(nr)));
-+}
-+
-+static void bch2_btree_node_iter_next_check(struct btree_node_iter *_iter,
-+					    struct btree *b)
-+{
-+	struct btree_node_iter iter = *_iter;
-+	const struct bkey_packed *k, *n;
-+
-+	k = bch2_btree_node_iter_peek_all(&iter, b);
-+	__bch2_btree_node_iter_advance(&iter, b);
-+	n = bch2_btree_node_iter_peek_all(&iter, b);
-+
-+	bkey_unpack_key(b, k);
-+
-+	if (n &&
-+	    bkey_iter_cmp(b, k, n) > 0) {
-+		struct btree_node_iter_set *set;
-+		struct bkey ku = bkey_unpack_key(b, k);
-+		struct bkey nu = bkey_unpack_key(b, n);
-+		char buf1[80], buf2[80];
-+
-+		bch2_dump_btree_node(NULL, b);
-+		bch2_bkey_to_text(&PBUF(buf1), &ku);
-+		bch2_bkey_to_text(&PBUF(buf2), &nu);
-+		printk(KERN_ERR "out of order/overlapping:\n%s\n%s\n",
-+		       buf1, buf2);
-+		printk(KERN_ERR "iter was:");
-+
-+		btree_node_iter_for_each(_iter, set) {
-+			struct bkey_packed *k = __btree_node_offset_to_key(b, set->k);
-+			struct bset_tree *t = bch2_bkey_to_bset(b, k);
-+			printk(" [%zi %zi]", t - b->set,
-+			       k->_data - bset(b, t)->_data);
-+		}
-+		panic("\n");
-+	}
-+}
-+
-+void bch2_btree_node_iter_verify(struct btree_node_iter *iter,
-+				 struct btree *b)
-+{
-+	struct btree_node_iter_set *set, *s2;
-+	struct bkey_packed *k, *p;
-+	struct bset_tree *t;
-+
-+	if (bch2_btree_node_iter_end(iter))
-+		return;
-+
-+	/* Verify no duplicates: */
-+	btree_node_iter_for_each(iter, set)
-+		btree_node_iter_for_each(iter, s2)
-+			BUG_ON(set != s2 && set->end == s2->end);
-+
-+	/* Verify that set->end is correct: */
-+	btree_node_iter_for_each(iter, set) {
-+		for_each_bset(b, t)
-+			if (set->end == t->end_offset)
-+				goto found;
-+		BUG();
-+found:
-+		BUG_ON(set->k < btree_bkey_first_offset(t) ||
-+		       set->k >= t->end_offset);
-+	}
-+
-+	/* Verify iterator is sorted: */
-+	btree_node_iter_for_each(iter, set)
-+		BUG_ON(set != iter->data &&
-+		       btree_node_iter_cmp(b, set[-1], set[0]) > 0);
-+
-+	k = bch2_btree_node_iter_peek_all(iter, b);
-+
-+	for_each_bset(b, t) {
-+		if (iter->data[0].end == t->end_offset)
-+			continue;
-+
-+		p = bch2_bkey_prev_all(b, t,
-+			bch2_btree_node_iter_bset_pos(iter, b, t));
-+
-+		BUG_ON(p && bkey_iter_cmp(b, k, p) < 0);
-+	}
-+}
-+
-+void bch2_verify_insert_pos(struct btree *b, struct bkey_packed *where,
-+			    struct bkey_packed *insert, unsigned clobber_u64s)
-+{
-+	struct bset_tree *t = bch2_bkey_to_bset(b, where);
-+	struct bkey_packed *prev = bch2_bkey_prev_all(b, t, where);
-+	struct bkey_packed *next = (void *) (where->_data + clobber_u64s);
-+#if 0
-+	BUG_ON(prev &&
-+	       bkey_iter_cmp(b, prev, insert) > 0);
-+#else
-+	if (prev &&
-+	    bkey_iter_cmp(b, prev, insert) > 0) {
-+		struct bkey k1 = bkey_unpack_key(b, prev);
-+		struct bkey k2 = bkey_unpack_key(b, insert);
-+		char buf1[100];
-+		char buf2[100];
-+
-+		bch2_dump_btree_node(NULL, b);
-+		bch2_bkey_to_text(&PBUF(buf1), &k1);
-+		bch2_bkey_to_text(&PBUF(buf2), &k2);
-+
-+		panic("prev > insert:\n"
-+		      "prev    key %s\n"
-+		      "insert  key %s\n",
-+		      buf1, buf2);
-+	}
-+#endif
-+#if 0
-+	BUG_ON(next != btree_bkey_last(b, t) &&
-+	       bkey_iter_cmp(b, insert, next) > 0);
-+#else
-+	if (next != btree_bkey_last(b, t) &&
-+	    bkey_iter_cmp(b, insert, next) > 0) {
-+		struct bkey k1 = bkey_unpack_key(b, insert);
-+		struct bkey k2 = bkey_unpack_key(b, next);
-+		char buf1[100];
-+		char buf2[100];
-+
-+		bch2_dump_btree_node(NULL, b);
-+		bch2_bkey_to_text(&PBUF(buf1), &k1);
-+		bch2_bkey_to_text(&PBUF(buf2), &k2);
-+
-+		panic("insert > next:\n"
-+		      "insert  key %s\n"
-+		      "next    key %s\n",
-+		      buf1, buf2);
-+	}
-+#endif
-+}
-+
-+#else
-+
-+static inline void bch2_btree_node_iter_next_check(struct btree_node_iter *iter,
-+						   struct btree *b) {}
-+
-+#endif
-+
-+/* Auxiliary search trees */
-+
-+#define BFLOAT_FAILED_UNPACKED	U8_MAX
-+#define BFLOAT_FAILED		U8_MAX
-+
-+struct bkey_float {
-+	u8		exponent;
-+	u8		key_offset;
-+	u16		mantissa;
-+};
-+#define BKEY_MANTISSA_BITS	16
-+
-+static unsigned bkey_float_byte_offset(unsigned idx)
-+{
-+	return idx * sizeof(struct bkey_float);
-+}
-+
-+struct ro_aux_tree {
-+	struct bkey_float	f[0];
-+};
-+
-+struct rw_aux_tree {
-+	u16		offset;
-+	struct bpos	k;
-+};
-+
-+static unsigned bset_aux_tree_buf_end(const struct bset_tree *t)
-+{
-+	BUG_ON(t->aux_data_offset == U16_MAX);
-+
-+	switch (bset_aux_tree_type(t)) {
-+	case BSET_NO_AUX_TREE:
-+		return t->aux_data_offset;
-+	case BSET_RO_AUX_TREE:
-+		return t->aux_data_offset +
-+			DIV_ROUND_UP(t->size * sizeof(struct bkey_float) +
-+				     t->size * sizeof(u8), 8);
-+	case BSET_RW_AUX_TREE:
-+		return t->aux_data_offset +
-+			DIV_ROUND_UP(sizeof(struct rw_aux_tree) * t->size, 8);
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static unsigned bset_aux_tree_buf_start(const struct btree *b,
-+					const struct bset_tree *t)
-+{
-+	return t == b->set
-+		? DIV_ROUND_UP(b->unpack_fn_len, 8)
-+		: bset_aux_tree_buf_end(t - 1);
-+}
-+
-+static void *__aux_tree_base(const struct btree *b,
-+			     const struct bset_tree *t)
-+{
-+	return b->aux_data + t->aux_data_offset * 8;
-+}
-+
-+static struct ro_aux_tree *ro_aux_tree_base(const struct btree *b,
-+					    const struct bset_tree *t)
-+{
-+	EBUG_ON(bset_aux_tree_type(t) != BSET_RO_AUX_TREE);
-+
-+	return __aux_tree_base(b, t);
-+}
-+
-+static u8 *ro_aux_tree_prev(const struct btree *b,
-+			    const struct bset_tree *t)
-+{
-+	EBUG_ON(bset_aux_tree_type(t) != BSET_RO_AUX_TREE);
-+
-+	return __aux_tree_base(b, t) + bkey_float_byte_offset(t->size);
-+}
-+
-+static struct bkey_float *bkey_float(const struct btree *b,
-+				     const struct bset_tree *t,
-+				     unsigned idx)
-+{
-+	return ro_aux_tree_base(b, t)->f + idx;
-+}
-+
-+static void bset_aux_tree_verify(struct btree *b)
-+{
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	struct bset_tree *t;
-+
-+	for_each_bset(b, t) {
-+		if (t->aux_data_offset == U16_MAX)
-+			continue;
-+
-+		BUG_ON(t != b->set &&
-+		       t[-1].aux_data_offset == U16_MAX);
-+
-+		BUG_ON(t->aux_data_offset < bset_aux_tree_buf_start(b, t));
-+		BUG_ON(t->aux_data_offset > btree_aux_data_u64s(b));
-+		BUG_ON(bset_aux_tree_buf_end(t) > btree_aux_data_u64s(b));
-+	}
-+#endif
-+}
-+
-+void bch2_btree_keys_init(struct btree *b, bool *expensive_debug_checks)
-+{
-+	unsigned i;
-+
-+	b->nsets		= 0;
-+	memset(&b->nr, 0, sizeof(b->nr));
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	b->expensive_debug_checks = expensive_debug_checks;
-+#endif
-+	for (i = 0; i < MAX_BSETS; i++)
-+		b->set[i].data_offset = U16_MAX;
-+
-+	bch2_bset_set_no_aux_tree(b, b->set);
-+}
-+
-+/* Binary tree stuff for auxiliary search trees */
-+
-+/*
-+ * Cacheline/offset <-> bkey pointer arithmetic:
-+ *
-+ * t->tree is a binary search tree in an array; each node corresponds to a key
-+ * in one cacheline in t->set (BSET_CACHELINE bytes).
-+ *
-+ * This means we don't have to store the full index of the key that a node in
-+ * the binary tree points to; eytzinger1_to_inorder() gives us the cacheline, and
-+ * then bkey_float->m gives us the offset within that cacheline, in units of 8
-+ * bytes.
-+ *
-+ * cacheline_to_bkey() and friends abstract out all the pointer arithmetic to
-+ * make this work.
-+ *
-+ * To construct the bfloat for an arbitrary key we need to know what the key
-+ * immediately preceding it is: we have to check if the two keys differ in the
-+ * bits we're going to store in bkey_float->mantissa. t->prev[j] stores the size
-+ * of the previous key so we can walk backwards to it from t->tree[j]'s key.
-+ */
-+
-+static inline void *bset_cacheline(const struct btree *b,
-+				   const struct bset_tree *t,
-+				   unsigned cacheline)
-+{
-+	return (void *) round_down((unsigned long) btree_bkey_first(b, t),
-+				   L1_CACHE_BYTES) +
-+		cacheline * BSET_CACHELINE;
-+}
-+
-+static struct bkey_packed *cacheline_to_bkey(const struct btree *b,
-+					     const struct bset_tree *t,
-+					     unsigned cacheline,
-+					     unsigned offset)
-+{
-+	return bset_cacheline(b, t, cacheline) + offset * 8;
-+}
-+
-+static unsigned bkey_to_cacheline(const struct btree *b,
-+				  const struct bset_tree *t,
-+				  const struct bkey_packed *k)
-+{
-+	return ((void *) k - bset_cacheline(b, t, 0)) / BSET_CACHELINE;
-+}
-+
-+static ssize_t __bkey_to_cacheline_offset(const struct btree *b,
-+					  const struct bset_tree *t,
-+					  unsigned cacheline,
-+					  const struct bkey_packed *k)
-+{
-+	return (u64 *) k - (u64 *) bset_cacheline(b, t, cacheline);
-+}
-+
-+static unsigned bkey_to_cacheline_offset(const struct btree *b,
-+					 const struct bset_tree *t,
-+					 unsigned cacheline,
-+					 const struct bkey_packed *k)
-+{
-+	size_t m = __bkey_to_cacheline_offset(b, t, cacheline, k);
-+
-+	EBUG_ON(m > U8_MAX);
-+	return m;
-+}
-+
-+static inline struct bkey_packed *tree_to_bkey(const struct btree *b,
-+					       const struct bset_tree *t,
-+					       unsigned j)
-+{
-+	return cacheline_to_bkey(b, t,
-+			__eytzinger1_to_inorder(j, t->size, t->extra),
-+			bkey_float(b, t, j)->key_offset);
-+}
-+
-+static struct bkey_packed *tree_to_prev_bkey(const struct btree *b,
-+					     const struct bset_tree *t,
-+					     unsigned j)
-+{
-+	unsigned prev_u64s = ro_aux_tree_prev(b, t)[j];
-+
-+	return (void *) (tree_to_bkey(b, t, j)->_data - prev_u64s);
-+}
-+
-+static struct rw_aux_tree *rw_aux_tree(const struct btree *b,
-+				       const struct bset_tree *t)
-+{
-+	EBUG_ON(bset_aux_tree_type(t) != BSET_RW_AUX_TREE);
-+
-+	return __aux_tree_base(b, t);
-+}
-+
-+/*
-+ * For the write set - the one we're currently inserting keys into - we don't
-+ * maintain a full search tree, we just keep a simple lookup table in t->prev.
-+ */
-+static struct bkey_packed *rw_aux_to_bkey(const struct btree *b,
-+					  struct bset_tree *t,
-+					  unsigned j)
-+{
-+	return __btree_node_offset_to_key(b, rw_aux_tree(b, t)[j].offset);
-+}
-+
-+static void rw_aux_tree_set(const struct btree *b, struct bset_tree *t,
-+			    unsigned j, struct bkey_packed *k)
-+{
-+	EBUG_ON(k >= btree_bkey_last(b, t));
-+
-+	rw_aux_tree(b, t)[j] = (struct rw_aux_tree) {
-+		.offset	= __btree_node_key_to_offset(b, k),
-+		.k	= bkey_unpack_pos(b, k),
-+	};
-+}
-+
-+static void bch2_bset_verify_rw_aux_tree(struct btree *b,
-+					struct bset_tree *t)
-+{
-+	struct bkey_packed *k = btree_bkey_first(b, t);
-+	unsigned j = 0;
-+
-+	if (!btree_keys_expensive_checks(b))
-+		return;
-+
-+	BUG_ON(bset_has_ro_aux_tree(t));
-+
-+	if (!bset_has_rw_aux_tree(t))
-+		return;
-+
-+	BUG_ON(t->size < 1);
-+	BUG_ON(rw_aux_to_bkey(b, t, j) != k);
-+
-+	goto start;
-+	while (1) {
-+		if (rw_aux_to_bkey(b, t, j) == k) {
-+			BUG_ON(bkey_cmp(rw_aux_tree(b, t)[j].k,
-+					bkey_unpack_pos(b, k)));
-+start:
-+			if (++j == t->size)
-+				break;
-+
-+			BUG_ON(rw_aux_tree(b, t)[j].offset <=
-+			       rw_aux_tree(b, t)[j - 1].offset);
-+		}
-+
-+		k = bkey_next_skip_noops(k, btree_bkey_last(b, t));
-+		BUG_ON(k >= btree_bkey_last(b, t));
-+	}
-+}
-+
-+/* returns idx of first entry >= offset: */
-+static unsigned rw_aux_tree_bsearch(struct btree *b,
-+				    struct bset_tree *t,
-+				    unsigned offset)
-+{
-+	unsigned bset_offs = offset - btree_bkey_first_offset(t);
-+	unsigned bset_u64s = t->end_offset - btree_bkey_first_offset(t);
-+	unsigned idx = bset_u64s ? bset_offs * t->size / bset_u64s : 0;
-+
-+	EBUG_ON(bset_aux_tree_type(t) != BSET_RW_AUX_TREE);
-+	EBUG_ON(!t->size);
-+	EBUG_ON(idx > t->size);
-+
-+	while (idx < t->size &&
-+	       rw_aux_tree(b, t)[idx].offset < offset)
-+		idx++;
-+
-+	while (idx &&
-+	       rw_aux_tree(b, t)[idx - 1].offset >= offset)
-+		idx--;
-+
-+	EBUG_ON(idx < t->size &&
-+		rw_aux_tree(b, t)[idx].offset < offset);
-+	EBUG_ON(idx && rw_aux_tree(b, t)[idx - 1].offset >= offset);
-+	EBUG_ON(idx + 1 < t->size &&
-+		rw_aux_tree(b, t)[idx].offset ==
-+		rw_aux_tree(b, t)[idx + 1].offset);
-+
-+	return idx;
-+}
-+
-+static inline unsigned bkey_mantissa(const struct bkey_packed *k,
-+				     const struct bkey_float *f,
-+				     unsigned idx)
-+{
-+	u64 v;
-+
-+	EBUG_ON(!bkey_packed(k));
-+
-+	v = get_unaligned((u64 *) (((u8 *) k->_data) + (f->exponent >> 3)));
-+
-+	/*
-+	 * In little endian, we're shifting off low bits (and then the bits we
-+	 * want are at the low end), in big endian we're shifting off high bits
-+	 * (and then the bits we want are at the high end, so we shift them
-+	 * back down):
-+	 */
-+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-+	v >>= f->exponent & 7;
-+#else
-+	v >>= 64 - (f->exponent & 7) - BKEY_MANTISSA_BITS;
-+#endif
-+	return (u16) v;
-+}
-+
-+static void make_bfloat(struct btree *b, struct bset_tree *t,
-+			unsigned j,
-+			struct bkey_packed *min_key,
-+			struct bkey_packed *max_key)
-+{
-+	struct bkey_float *f = bkey_float(b, t, j);
-+	struct bkey_packed *m = tree_to_bkey(b, t, j);
-+	struct bkey_packed *l, *r;
-+	unsigned mantissa;
-+	int shift, exponent, high_bit;
-+
-+	if (is_power_of_2(j)) {
-+		l = min_key;
-+
-+		if (!l->u64s) {
-+			if (!bkey_pack_pos(l, b->data->min_key, b)) {
-+				struct bkey_i tmp;
-+
-+				bkey_init(&tmp.k);
-+				tmp.k.p = b->data->min_key;
-+				bkey_copy(l, &tmp);
-+			}
-+		}
-+	} else {
-+		l = tree_to_prev_bkey(b, t, j >> ffs(j));
-+
-+		EBUG_ON(m < l);
-+	}
-+
-+	if (is_power_of_2(j + 1)) {
-+		r = max_key;
-+
-+		if (!r->u64s) {
-+			if (!bkey_pack_pos(r, t->max_key, b)) {
-+				struct bkey_i tmp;
-+
-+				bkey_init(&tmp.k);
-+				tmp.k.p = t->max_key;
-+				bkey_copy(r, &tmp);
-+			}
-+		}
-+	} else {
-+		r = tree_to_bkey(b, t, j >> (ffz(j) + 1));
-+
-+		EBUG_ON(m > r);
-+	}
-+
-+	/*
-+	 * for failed bfloats, the lookup code falls back to comparing against
-+	 * the original key.
-+	 */
-+
-+	if (!bkey_packed(l) || !bkey_packed(r) || !bkey_packed(m) ||
-+	    !b->nr_key_bits) {
-+		f->exponent = BFLOAT_FAILED_UNPACKED;
-+		return;
-+	}
-+
-+	/*
-+	 * The greatest differing bit of l and r is the first bit we must
-+	 * include in the bfloat mantissa we're creating in order to do
-+	 * comparisons - that bit always becomes the high bit of
-+	 * bfloat->mantissa, and thus the exponent we're calculating here is
-+	 * the position of what will become the low bit in bfloat->mantissa:
-+	 *
-+	 * Note that this may be negative - we may be running off the low end
-+	 * of the key: we handle this later:
-+	 */
-+	high_bit = max(bch2_bkey_greatest_differing_bit(b, l, r),
-+		       min_t(unsigned, BKEY_MANTISSA_BITS, b->nr_key_bits) - 1);
-+	exponent = high_bit - (BKEY_MANTISSA_BITS - 1);
-+
-+	/*
-+	 * Then we calculate the actual shift value, from the start of the key
-+	 * (k->_data), to get the key bits starting at exponent:
-+	 */
-+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-+	shift = (int) (b->format.key_u64s * 64 - b->nr_key_bits) + exponent;
-+
-+	EBUG_ON(shift + BKEY_MANTISSA_BITS > b->format.key_u64s * 64);
-+#else
-+	shift = high_bit_offset +
-+		b->nr_key_bits -
-+		exponent -
-+		BKEY_MANTISSA_BITS;
-+
-+	EBUG_ON(shift < KEY_PACKED_BITS_START);
-+#endif
-+	EBUG_ON(shift < 0 || shift >= BFLOAT_FAILED);
-+
-+	f->exponent = shift;
-+	mantissa = bkey_mantissa(m, f, j);
-+
-+	/*
-+	 * If we've got garbage bits, set them to all 1s - it's legal for the
-+	 * bfloat to compare larger than the original key, but not smaller:
-+	 */
-+	if (exponent < 0)
-+		mantissa |= ~(~0U << -exponent);
-+
-+	f->mantissa = mantissa;
-+}
-+
-+/* bytes remaining - only valid for last bset: */
-+static unsigned __bset_tree_capacity(struct btree *b, struct bset_tree *t)
-+{
-+	bset_aux_tree_verify(b);
-+
-+	return btree_aux_data_bytes(b) - t->aux_data_offset * sizeof(u64);
-+}
-+
-+static unsigned bset_ro_tree_capacity(struct btree *b, struct bset_tree *t)
-+{
-+	return __bset_tree_capacity(b, t) /
-+		(sizeof(struct bkey_float) + sizeof(u8));
-+}
-+
-+static unsigned bset_rw_tree_capacity(struct btree *b, struct bset_tree *t)
-+{
-+	return __bset_tree_capacity(b, t) / sizeof(struct rw_aux_tree);
-+}
-+
-+static void __build_rw_aux_tree(struct btree *b, struct bset_tree *t)
-+{
-+	struct bkey_packed *k;
-+
-+	t->size = 1;
-+	t->extra = BSET_RW_AUX_TREE_VAL;
-+	rw_aux_tree(b, t)[0].offset =
-+		__btree_node_key_to_offset(b, btree_bkey_first(b, t));
-+
-+	bset_tree_for_each_key(b, t, k) {
-+		if (t->size == bset_rw_tree_capacity(b, t))
-+			break;
-+
-+		if ((void *) k - (void *) rw_aux_to_bkey(b, t, t->size - 1) >
-+		    L1_CACHE_BYTES)
-+			rw_aux_tree_set(b, t, t->size++, k);
-+	}
-+}
-+
-+static void __build_ro_aux_tree(struct btree *b, struct bset_tree *t)
-+{
-+	struct bkey_packed *prev = NULL, *k = btree_bkey_first(b, t);
-+	struct bkey_packed min_key, max_key;
-+	unsigned j, cacheline = 1;
-+
-+	/* signal to make_bfloat() that they're uninitialized: */
-+	min_key.u64s = max_key.u64s = 0;
-+
-+	t->size = min(bkey_to_cacheline(b, t, btree_bkey_last(b, t)),
-+		      bset_ro_tree_capacity(b, t));
-+retry:
-+	if (t->size < 2) {
-+		t->size = 0;
-+		t->extra = BSET_NO_AUX_TREE_VAL;
-+		return;
-+	}
-+
-+	t->extra = (t->size - rounddown_pow_of_two(t->size - 1)) << 1;
-+
-+	/* First we figure out where the first key in each cacheline is */
-+	eytzinger1_for_each(j, t->size) {
-+		while (bkey_to_cacheline(b, t, k) < cacheline)
-+			prev = k, k = bkey_next_skip_noops(k, btree_bkey_last(b, t));
-+
-+		if (k >= btree_bkey_last(b, t)) {
-+			/* XXX: this path sucks */
-+			t->size--;
-+			goto retry;
-+		}
-+
-+		ro_aux_tree_prev(b, t)[j] = prev->u64s;
-+		bkey_float(b, t, j)->key_offset =
-+			bkey_to_cacheline_offset(b, t, cacheline++, k);
-+
-+		EBUG_ON(tree_to_prev_bkey(b, t, j) != prev);
-+		EBUG_ON(tree_to_bkey(b, t, j) != k);
-+	}
-+
-+	while (k != btree_bkey_last(b, t))
-+		prev = k, k = bkey_next_skip_noops(k, btree_bkey_last(b, t));
-+
-+	t->max_key = bkey_unpack_pos(b, prev);
-+
-+	/* Then we build the tree */
-+	eytzinger1_for_each(j, t->size)
-+		make_bfloat(b, t, j, &min_key, &max_key);
-+}
-+
-+static void bset_alloc_tree(struct btree *b, struct bset_tree *t)
-+{
-+	struct bset_tree *i;
-+
-+	for (i = b->set; i != t; i++)
-+		BUG_ON(bset_has_rw_aux_tree(i));
-+
-+	bch2_bset_set_no_aux_tree(b, t);
-+
-+	/* round up to next cacheline: */
-+	t->aux_data_offset = round_up(bset_aux_tree_buf_start(b, t),
-+				      SMP_CACHE_BYTES / sizeof(u64));
-+
-+	bset_aux_tree_verify(b);
-+}
-+
-+void bch2_bset_build_aux_tree(struct btree *b, struct bset_tree *t,
-+			     bool writeable)
-+{
-+	if (writeable
-+	    ? bset_has_rw_aux_tree(t)
-+	    : bset_has_ro_aux_tree(t))
-+		return;
-+
-+	bset_alloc_tree(b, t);
-+
-+	if (!__bset_tree_capacity(b, t))
-+		return;
-+
-+	if (writeable)
-+		__build_rw_aux_tree(b, t);
-+	else
-+		__build_ro_aux_tree(b, t);
-+
-+	bset_aux_tree_verify(b);
-+}
-+
-+void bch2_bset_init_first(struct btree *b, struct bset *i)
-+{
-+	struct bset_tree *t;
-+
-+	BUG_ON(b->nsets);
-+
-+	memset(i, 0, sizeof(*i));
-+	get_random_bytes(&i->seq, sizeof(i->seq));
-+	SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN);
-+
-+	t = &b->set[b->nsets++];
-+	set_btree_bset(b, t, i);
-+}
-+
-+void bch2_bset_init_next(struct bch_fs *c, struct btree *b,
-+			 struct btree_node_entry *bne)
-+{
-+	struct bset *i = &bne->keys;
-+	struct bset_tree *t;
-+
-+	BUG_ON(bset_byte_offset(b, bne) >= btree_bytes(c));
-+	BUG_ON((void *) bne < (void *) btree_bkey_last(b, bset_tree_last(b)));
-+	BUG_ON(b->nsets >= MAX_BSETS);
-+
-+	memset(i, 0, sizeof(*i));
-+	i->seq = btree_bset_first(b)->seq;
-+	SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN);
-+
-+	t = &b->set[b->nsets++];
-+	set_btree_bset(b, t, i);
-+}
-+
-+/*
-+ * find _some_ key in the same bset as @k that precedes @k - not necessarily the
-+ * immediate predecessor:
-+ */
-+static struct bkey_packed *__bkey_prev(struct btree *b, struct bset_tree *t,
-+				       struct bkey_packed *k)
-+{
-+	struct bkey_packed *p;
-+	unsigned offset;
-+	int j;
-+
-+	EBUG_ON(k < btree_bkey_first(b, t) ||
-+		k > btree_bkey_last(b, t));
-+
-+	if (k == btree_bkey_first(b, t))
-+		return NULL;
-+
-+	switch (bset_aux_tree_type(t)) {
-+	case BSET_NO_AUX_TREE:
-+		p = btree_bkey_first(b, t);
-+		break;
-+	case BSET_RO_AUX_TREE:
-+		j = min_t(unsigned, t->size - 1, bkey_to_cacheline(b, t, k));
-+
-+		do {
-+			p = j ? tree_to_bkey(b, t,
-+					__inorder_to_eytzinger1(j--,
-+							t->size, t->extra))
-+			      : btree_bkey_first(b, t);
-+		} while (p >= k);
-+		break;
-+	case BSET_RW_AUX_TREE:
-+		offset = __btree_node_key_to_offset(b, k);
-+		j = rw_aux_tree_bsearch(b, t, offset);
-+		p = j ? rw_aux_to_bkey(b, t, j - 1)
-+		      : btree_bkey_first(b, t);
-+		break;
-+	}
-+
-+	return p;
-+}
-+
-+struct bkey_packed *bch2_bkey_prev_filter(struct btree *b,
-+					  struct bset_tree *t,
-+					  struct bkey_packed *k,
-+					  unsigned min_key_type)
-+{
-+	struct bkey_packed *p, *i, *ret = NULL, *orig_k = k;
-+
-+	while ((p = __bkey_prev(b, t, k)) && !ret) {
-+		for (i = p; i != k; i = bkey_next_skip_noops(i, k))
-+			if (i->type >= min_key_type)
-+				ret = i;
-+
-+		k = p;
-+	}
-+
-+	if (btree_keys_expensive_checks(b)) {
-+		BUG_ON(ret >= orig_k);
-+
-+		for (i = ret
-+			? bkey_next_skip_noops(ret, orig_k)
-+			: btree_bkey_first(b, t);
-+		     i != orig_k;
-+		     i = bkey_next_skip_noops(i, orig_k))
-+			BUG_ON(i->type >= min_key_type);
-+	}
-+
-+	return ret;
-+}
-+
-+/* Insert */
-+
-+static void rw_aux_tree_fix_invalidated_key(struct btree *b,
-+					    struct bset_tree *t,
-+					    struct bkey_packed *k)
-+{
-+	unsigned offset = __btree_node_key_to_offset(b, k);
-+	unsigned j = rw_aux_tree_bsearch(b, t, offset);
-+
-+	if (j < t->size &&
-+	    rw_aux_tree(b, t)[j].offset == offset)
-+		rw_aux_tree_set(b, t, j, k);
-+
-+	bch2_bset_verify_rw_aux_tree(b, t);
-+}
-+
-+static void ro_aux_tree_fix_invalidated_key(struct btree *b,
-+					    struct bset_tree *t,
-+					    struct bkey_packed *k)
-+{
-+	struct bkey_packed min_key, max_key;
-+	unsigned inorder, j;
-+
-+	EBUG_ON(bset_aux_tree_type(t) != BSET_RO_AUX_TREE);
-+
-+	/* signal to make_bfloat() that they're uninitialized: */
-+	min_key.u64s = max_key.u64s = 0;
-+
-+	if (bkey_next_skip_noops(k, btree_bkey_last(b, t)) == btree_bkey_last(b, t)) {
-+		t->max_key = bkey_unpack_pos(b, k);
-+
-+		for (j = 1; j < t->size; j = j * 2 + 1)
-+			make_bfloat(b, t, j, &min_key, &max_key);
-+	}
-+
-+	inorder = bkey_to_cacheline(b, t, k);
-+
-+	if (inorder &&
-+	    inorder < t->size) {
-+		j = __inorder_to_eytzinger1(inorder, t->size, t->extra);
-+
-+		if (k == tree_to_bkey(b, t, j)) {
-+			/* Fix the node this key corresponds to */
-+			make_bfloat(b, t, j, &min_key, &max_key);
-+
-+			/* Children for which this key is the right boundary */
-+			for (j = eytzinger1_left_child(j);
-+			     j < t->size;
-+			     j = eytzinger1_right_child(j))
-+				make_bfloat(b, t, j, &min_key, &max_key);
-+		}
-+	}
-+
-+	if (inorder + 1 < t->size) {
-+		j = __inorder_to_eytzinger1(inorder + 1, t->size, t->extra);
-+
-+		if (k == tree_to_prev_bkey(b, t, j)) {
-+			make_bfloat(b, t, j, &min_key, &max_key);
-+
-+			/* Children for which this key is the left boundary */
-+			for (j = eytzinger1_right_child(j);
-+			     j < t->size;
-+			     j = eytzinger1_left_child(j))
-+				make_bfloat(b, t, j, &min_key, &max_key);
-+		}
-+	}
-+}
-+
-+/**
-+ * bch2_bset_fix_invalidated_key() - given an existing  key @k that has been
-+ * modified, fix any auxiliary search tree by remaking all the nodes in the
-+ * auxiliary search tree that @k corresponds to
-+ */
-+void bch2_bset_fix_invalidated_key(struct btree *b, struct bkey_packed *k)
-+{
-+	struct bset_tree *t = bch2_bkey_to_bset(b, k);
-+
-+	switch (bset_aux_tree_type(t)) {
-+	case BSET_NO_AUX_TREE:
-+		break;
-+	case BSET_RO_AUX_TREE:
-+		ro_aux_tree_fix_invalidated_key(b, t, k);
-+		break;
-+	case BSET_RW_AUX_TREE:
-+		rw_aux_tree_fix_invalidated_key(b, t, k);
-+		break;
-+	}
-+}
-+
-+static void bch2_bset_fix_lookup_table(struct btree *b,
-+				       struct bset_tree *t,
-+				       struct bkey_packed *_where,
-+				       unsigned clobber_u64s,
-+				       unsigned new_u64s)
-+{
-+	int shift = new_u64s - clobber_u64s;
-+	unsigned l, j, where = __btree_node_key_to_offset(b, _where);
-+
-+	EBUG_ON(bset_has_ro_aux_tree(t));
-+
-+	if (!bset_has_rw_aux_tree(t))
-+		return;
-+
-+	/* returns first entry >= where */
-+	l = rw_aux_tree_bsearch(b, t, where);
-+
-+	if (!l) /* never delete first entry */
-+		l++;
-+	else if (l < t->size &&
-+		 where < t->end_offset &&
-+		 rw_aux_tree(b, t)[l].offset == where)
-+		rw_aux_tree_set(b, t, l++, _where);
-+
-+	/* l now > where */
-+
-+	for (j = l;
-+	     j < t->size &&
-+	     rw_aux_tree(b, t)[j].offset < where + clobber_u64s;
-+	     j++)
-+		;
-+
-+	if (j < t->size &&
-+	    rw_aux_tree(b, t)[j].offset + shift ==
-+	    rw_aux_tree(b, t)[l - 1].offset)
-+		j++;
-+
-+	memmove(&rw_aux_tree(b, t)[l],
-+		&rw_aux_tree(b, t)[j],
-+		(void *) &rw_aux_tree(b, t)[t->size] -
-+		(void *) &rw_aux_tree(b, t)[j]);
-+	t->size -= j - l;
-+
-+	for (j = l; j < t->size; j++)
-+	       rw_aux_tree(b, t)[j].offset += shift;
-+
-+	EBUG_ON(l < t->size &&
-+		rw_aux_tree(b, t)[l].offset ==
-+		rw_aux_tree(b, t)[l - 1].offset);
-+
-+	if (t->size < bset_rw_tree_capacity(b, t) &&
-+	    (l < t->size
-+	     ? rw_aux_tree(b, t)[l].offset
-+	     : t->end_offset) -
-+	    rw_aux_tree(b, t)[l - 1].offset >
-+	    L1_CACHE_BYTES / sizeof(u64)) {
-+		struct bkey_packed *start = rw_aux_to_bkey(b, t, l - 1);
-+		struct bkey_packed *end = l < t->size
-+			? rw_aux_to_bkey(b, t, l)
-+			: btree_bkey_last(b, t);
-+		struct bkey_packed *k = start;
-+
-+		while (1) {
-+			k = bkey_next_skip_noops(k, end);
-+			if (k == end)
-+				break;
-+
-+			if ((void *) k - (void *) start >= L1_CACHE_BYTES) {
-+				memmove(&rw_aux_tree(b, t)[l + 1],
-+					&rw_aux_tree(b, t)[l],
-+					(void *) &rw_aux_tree(b, t)[t->size] -
-+					(void *) &rw_aux_tree(b, t)[l]);
-+				t->size++;
-+				rw_aux_tree_set(b, t, l, k);
-+				break;
-+			}
-+		}
-+	}
-+
-+	bch2_bset_verify_rw_aux_tree(b, t);
-+	bset_aux_tree_verify(b);
-+}
-+
-+void bch2_bset_insert(struct btree *b,
-+		      struct btree_node_iter *iter,
-+		      struct bkey_packed *where,
-+		      struct bkey_i *insert,
-+		      unsigned clobber_u64s)
-+{
-+	struct bkey_format *f = &b->format;
-+	struct bset_tree *t = bset_tree_last(b);
-+	struct bkey_packed packed, *src = bkey_to_packed(insert);
-+
-+	bch2_bset_verify_rw_aux_tree(b, t);
-+	bch2_verify_insert_pos(b, where, bkey_to_packed(insert), clobber_u64s);
-+
-+	if (bch2_bkey_pack_key(&packed, &insert->k, f))
-+		src = &packed;
-+
-+	if (!bkey_whiteout(&insert->k))
-+		btree_keys_account_key_add(&b->nr, t - b->set, src);
-+
-+	if (src->u64s != clobber_u64s) {
-+		u64 *src_p = where->_data + clobber_u64s;
-+		u64 *dst_p = where->_data + src->u64s;
-+
-+		EBUG_ON((int) le16_to_cpu(bset(b, t)->u64s) <
-+			(int) clobber_u64s - src->u64s);
-+
-+		memmove_u64s(dst_p, src_p, btree_bkey_last(b, t)->_data - src_p);
-+		le16_add_cpu(&bset(b, t)->u64s, src->u64s - clobber_u64s);
-+		set_btree_bset_end(b, t);
-+	}
-+
-+	memcpy_u64s(where, src,
-+		    bkeyp_key_u64s(f, src));
-+	memcpy_u64s(bkeyp_val(f, where), &insert->v,
-+		    bkeyp_val_u64s(f, src));
-+
-+	if (src->u64s != clobber_u64s)
-+		bch2_bset_fix_lookup_table(b, t, where, clobber_u64s, src->u64s);
-+
-+	bch2_verify_btree_nr_keys(b);
-+}
-+
-+void bch2_bset_delete(struct btree *b,
-+		      struct bkey_packed *where,
-+		      unsigned clobber_u64s)
-+{
-+	struct bset_tree *t = bset_tree_last(b);
-+	u64 *src_p = where->_data + clobber_u64s;
-+	u64 *dst_p = where->_data;
-+
-+	bch2_bset_verify_rw_aux_tree(b, t);
-+
-+	EBUG_ON(le16_to_cpu(bset(b, t)->u64s) < clobber_u64s);
-+
-+	memmove_u64s_down(dst_p, src_p, btree_bkey_last(b, t)->_data - src_p);
-+	le16_add_cpu(&bset(b, t)->u64s, -clobber_u64s);
-+	set_btree_bset_end(b, t);
-+
-+	bch2_bset_fix_lookup_table(b, t, where, clobber_u64s, 0);
-+}
-+
-+/* Lookup */
-+
-+__flatten
-+static struct bkey_packed *bset_search_write_set(const struct btree *b,
-+				struct bset_tree *t,
-+				struct bpos *search,
-+				const struct bkey_packed *packed_search)
-+{
-+	unsigned l = 0, r = t->size;
-+
-+	while (l + 1 != r) {
-+		unsigned m = (l + r) >> 1;
-+
-+		if (bkey_cmp(rw_aux_tree(b, t)[m].k, *search) < 0)
-+			l = m;
-+		else
-+			r = m;
-+	}
-+
-+	return rw_aux_to_bkey(b, t, l);
-+}
-+
-+static inline void prefetch_four_cachelines(void *p)
-+{
-+#ifdef CONFIG_X86_64
-+	asm(".intel_syntax noprefix;"
-+	    "prefetcht0 [%0 - 127 + 64 * 0];"
-+	    "prefetcht0 [%0 - 127 + 64 * 1];"
-+	    "prefetcht0 [%0 - 127 + 64 * 2];"
-+	    "prefetcht0 [%0 - 127 + 64 * 3];"
-+	    ".att_syntax prefix;"
-+	    :
-+	    : "r" (p + 127));
-+#else
-+	prefetch(p + L1_CACHE_BYTES * 0);
-+	prefetch(p + L1_CACHE_BYTES * 1);
-+	prefetch(p + L1_CACHE_BYTES * 2);
-+	prefetch(p + L1_CACHE_BYTES * 3);
-+#endif
-+}
-+
-+static inline bool bkey_mantissa_bits_dropped(const struct btree *b,
-+					      const struct bkey_float *f,
-+					      unsigned idx)
-+{
-+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-+	unsigned key_bits_start = b->format.key_u64s * 64 - b->nr_key_bits;
-+
-+	return f->exponent > key_bits_start;
-+#else
-+	unsigned key_bits_end = high_bit_offset + b->nr_key_bits;
-+
-+	return f->exponent + BKEY_MANTISSA_BITS < key_bits_end;
-+#endif
-+}
-+
-+__flatten
-+static struct bkey_packed *bset_search_tree(const struct btree *b,
-+				struct bset_tree *t,
-+				struct bpos *search,
-+				const struct bkey_packed *packed_search)
-+{
-+	struct ro_aux_tree *base = ro_aux_tree_base(b, t);
-+	struct bkey_float *f;
-+	struct bkey_packed *k;
-+	unsigned inorder, n = 1, l, r;
-+	int cmp;
-+
-+	do {
-+		if (likely(n << 4 < t->size))
-+			prefetch(&base->f[n << 4]);
-+
-+		f = &base->f[n];
-+
-+		if (!unlikely(packed_search))
-+			goto slowpath;
-+		if (unlikely(f->exponent >= BFLOAT_FAILED))
-+			goto slowpath;
-+
-+		l = f->mantissa;
-+		r = bkey_mantissa(packed_search, f, n);
-+
-+		if (unlikely(l == r) && bkey_mantissa_bits_dropped(b, f, n))
-+			goto slowpath;
-+
-+		n = n * 2 + (l < r);
-+		continue;
-+slowpath:
-+		k = tree_to_bkey(b, t, n);
-+		cmp = bkey_cmp_p_or_unp(b, k, packed_search, search);
-+		if (!cmp)
-+			return k;
-+
-+		n = n * 2 + (cmp < 0);
-+	} while (n < t->size);
-+
-+	inorder = __eytzinger1_to_inorder(n >> 1, t->size, t->extra);
-+
-+	/*
-+	 * n would have been the node we recursed to - the low bit tells us if
-+	 * we recursed left or recursed right.
-+	 */
-+	if (likely(!(n & 1))) {
-+		--inorder;
-+		if (unlikely(!inorder))
-+			return btree_bkey_first(b, t);
-+
-+		f = &base->f[eytzinger1_prev(n >> 1, t->size)];
-+	}
-+
-+	return cacheline_to_bkey(b, t, inorder, f->key_offset);
-+}
-+
-+static __always_inline __flatten
-+struct bkey_packed *__bch2_bset_search(struct btree *b,
-+				struct bset_tree *t,
-+				struct bpos *search,
-+				const struct bkey_packed *lossy_packed_search)
-+{
-+
-+	/*
-+	 * First, we search for a cacheline, then lastly we do a linear search
-+	 * within that cacheline.
-+	 *
-+	 * To search for the cacheline, there's three different possibilities:
-+	 *  * The set is too small to have a search tree, so we just do a linear
-+	 *    search over the whole set.
-+	 *  * The set is the one we're currently inserting into; keeping a full
-+	 *    auxiliary search tree up to date would be too expensive, so we
-+	 *    use a much simpler lookup table to do a binary search -
-+	 *    bset_search_write_set().
-+	 *  * Or we use the auxiliary search tree we constructed earlier -
-+	 *    bset_search_tree()
-+	 */
-+
-+	switch (bset_aux_tree_type(t)) {
-+	case BSET_NO_AUX_TREE:
-+		return btree_bkey_first(b, t);
-+	case BSET_RW_AUX_TREE:
-+		return bset_search_write_set(b, t, search, lossy_packed_search);
-+	case BSET_RO_AUX_TREE:
-+		/*
-+		 * Each node in the auxiliary search tree covers a certain range
-+		 * of bits, and keys above and below the set it covers might
-+		 * differ outside those bits - so we have to special case the
-+		 * start and end - handle that here:
-+		 */
-+
-+		if (bkey_cmp(*search, t->max_key) > 0)
-+			return btree_bkey_last(b, t);
-+
-+		return bset_search_tree(b, t, search, lossy_packed_search);
-+	default:
-+		unreachable();
-+	}
-+}
-+
-+static __always_inline __flatten
-+struct bkey_packed *bch2_bset_search_linear(struct btree *b,
-+				struct bset_tree *t,
-+				struct bpos *search,
-+				struct bkey_packed *packed_search,
-+				const struct bkey_packed *lossy_packed_search,
-+				struct bkey_packed *m)
-+{
-+	if (lossy_packed_search)
-+		while (m != btree_bkey_last(b, t) &&
-+		       bkey_iter_cmp_p_or_unp(b, m,
-+					lossy_packed_search, search) < 0)
-+			m = bkey_next_skip_noops(m, btree_bkey_last(b, t));
-+
-+	if (!packed_search)
-+		while (m != btree_bkey_last(b, t) &&
-+		       bkey_iter_pos_cmp(b, m, search) < 0)
-+			m = bkey_next_skip_noops(m, btree_bkey_last(b, t));
-+
-+	if (btree_keys_expensive_checks(b)) {
-+		struct bkey_packed *prev = bch2_bkey_prev_all(b, t, m);
-+
-+		BUG_ON(prev &&
-+		       bkey_iter_cmp_p_or_unp(b, prev,
-+					packed_search, search) >= 0);
-+	}
-+
-+	return m;
-+}
-+
-+/*
-+ * Returns the first key greater than or equal to @search
-+ */
-+static __always_inline __flatten
-+struct bkey_packed *bch2_bset_search(struct btree *b,
-+				struct bset_tree *t,
-+				struct bpos *search,
-+				struct bkey_packed *packed_search,
-+				const struct bkey_packed *lossy_packed_search)
-+{
-+	struct bkey_packed *m = __bch2_bset_search(b, t, search,
-+						   lossy_packed_search);
-+
-+	return bch2_bset_search_linear(b, t, search,
-+				 packed_search, lossy_packed_search, m);
-+}
-+
-+/* Btree node iterator */
-+
-+static inline void __bch2_btree_node_iter_push(struct btree_node_iter *iter,
-+			      struct btree *b,
-+			      const struct bkey_packed *k,
-+			      const struct bkey_packed *end)
-+{
-+	if (k != end) {
-+		struct btree_node_iter_set *pos;
-+
-+		btree_node_iter_for_each(iter, pos)
-+			;
-+
-+		BUG_ON(pos >= iter->data + ARRAY_SIZE(iter->data));
-+		*pos = (struct btree_node_iter_set) {
-+			__btree_node_key_to_offset(b, k),
-+			__btree_node_key_to_offset(b, end)
-+		};
-+	}
-+}
-+
-+void bch2_btree_node_iter_push(struct btree_node_iter *iter,
-+			       struct btree *b,
-+			       const struct bkey_packed *k,
-+			       const struct bkey_packed *end)
-+{
-+	__bch2_btree_node_iter_push(iter, b, k, end);
-+	bch2_btree_node_iter_sort(iter, b);
-+}
-+
-+noinline __flatten __attribute__((cold))
-+static void btree_node_iter_init_pack_failed(struct btree_node_iter *iter,
-+			      struct btree *b, struct bpos *search)
-+{
-+	struct bset_tree *t;
-+
-+	trace_bkey_pack_pos_fail(search);
-+
-+	for_each_bset(b, t)
-+		__bch2_btree_node_iter_push(iter, b,
-+			bch2_bset_search(b, t, search, NULL, NULL),
-+			btree_bkey_last(b, t));
-+
-+	bch2_btree_node_iter_sort(iter, b);
-+}
-+
-+/**
-+ * bch_btree_node_iter_init - initialize a btree node iterator, starting from a
-+ * given position
-+ *
-+ * Main entry point to the lookup code for individual btree nodes:
-+ *
-+ * NOTE:
-+ *
-+ * When you don't filter out deleted keys, btree nodes _do_ contain duplicate
-+ * keys. This doesn't matter for most code, but it does matter for lookups.
-+ *
-+ * Some adjacent keys with a string of equal keys:
-+ *	i j k k k k l m
-+ *
-+ * If you search for k, the lookup code isn't guaranteed to return you any
-+ * specific k. The lookup code is conceptually doing a binary search and
-+ * iterating backwards is very expensive so if the pivot happens to land at the
-+ * last k that's what you'll get.
-+ *
-+ * This works out ok, but it's something to be aware of:
-+ *
-+ *  - For non extents, we guarantee that the live key comes last - see
-+ *    btree_node_iter_cmp(), keys_out_of_order(). So the duplicates you don't
-+ *    see will only be deleted keys you don't care about.
-+ *
-+ *  - For extents, deleted keys sort last (see the comment at the top of this
-+ *    file). But when you're searching for extents, you actually want the first
-+ *    key strictly greater than your search key - an extent that compares equal
-+ *    to the search key is going to have 0 sectors after the search key.
-+ *
-+ *    But this does mean that we can't just search for
-+ *    bkey_successor(start_of_range) to get the first extent that overlaps with
-+ *    the range we want - if we're unlucky and there's an extent that ends
-+ *    exactly where we searched, then there could be a deleted key at the same
-+ *    position and we'd get that when we search instead of the preceding extent
-+ *    we needed.
-+ *
-+ *    So we've got to search for start_of_range, then after the lookup iterate
-+ *    past any extents that compare equal to the position we searched for.
-+ */
-+__flatten
-+void bch2_btree_node_iter_init(struct btree_node_iter *iter,
-+			       struct btree *b, struct bpos *search)
-+{
-+	struct bkey_packed p, *packed_search = NULL;
-+	struct btree_node_iter_set *pos = iter->data;
-+	struct bkey_packed *k[MAX_BSETS];
-+	unsigned i;
-+
-+	EBUG_ON(bkey_cmp(*search, b->data->min_key) < 0);
-+	bset_aux_tree_verify(b);
-+
-+	memset(iter, 0, sizeof(*iter));
-+
-+	switch (bch2_bkey_pack_pos_lossy(&p, *search, b)) {
-+	case BKEY_PACK_POS_EXACT:
-+		packed_search = &p;
-+		break;
-+	case BKEY_PACK_POS_SMALLER:
-+		packed_search = NULL;
-+		break;
-+	case BKEY_PACK_POS_FAIL:
-+		btree_node_iter_init_pack_failed(iter, b, search);
-+		return;
-+	}
-+
-+	for (i = 0; i < b->nsets; i++) {
-+		k[i] = __bch2_bset_search(b, b->set + i, search, &p);
-+		prefetch_four_cachelines(k[i]);
-+	}
-+
-+	for (i = 0; i < b->nsets; i++) {
-+		struct bset_tree *t = b->set + i;
-+		struct bkey_packed *end = btree_bkey_last(b, t);
-+
-+		k[i] = bch2_bset_search_linear(b, t, search,
-+					       packed_search, &p, k[i]);
-+		if (k[i] != end)
-+			*pos++ = (struct btree_node_iter_set) {
-+				__btree_node_key_to_offset(b, k[i]),
-+				__btree_node_key_to_offset(b, end)
-+			};
-+	}
-+
-+	bch2_btree_node_iter_sort(iter, b);
-+}
-+
-+void bch2_btree_node_iter_init_from_start(struct btree_node_iter *iter,
-+					  struct btree *b)
-+{
-+	struct bset_tree *t;
-+
-+	memset(iter, 0, sizeof(*iter));
-+
-+	for_each_bset(b, t)
-+		__bch2_btree_node_iter_push(iter, b,
-+					   btree_bkey_first(b, t),
-+					   btree_bkey_last(b, t));
-+	bch2_btree_node_iter_sort(iter, b);
-+}
-+
-+struct bkey_packed *bch2_btree_node_iter_bset_pos(struct btree_node_iter *iter,
-+						  struct btree *b,
-+						  struct bset_tree *t)
-+{
-+	struct btree_node_iter_set *set;
-+
-+	btree_node_iter_for_each(iter, set)
-+		if (set->end == t->end_offset)
-+			return __btree_node_offset_to_key(b, set->k);
-+
-+	return btree_bkey_last(b, t);
-+}
-+
-+static inline bool btree_node_iter_sort_two(struct btree_node_iter *iter,
-+					    struct btree *b,
-+					    unsigned first)
-+{
-+	bool ret;
-+
-+	if ((ret = (btree_node_iter_cmp(b,
-+					iter->data[first],
-+					iter->data[first + 1]) > 0)))
-+		swap(iter->data[first], iter->data[first + 1]);
-+	return ret;
-+}
-+
-+void bch2_btree_node_iter_sort(struct btree_node_iter *iter,
-+			       struct btree *b)
-+{
-+	/* unrolled bubble sort: */
-+
-+	if (!__btree_node_iter_set_end(iter, 2)) {
-+		btree_node_iter_sort_two(iter, b, 0);
-+		btree_node_iter_sort_two(iter, b, 1);
-+	}
-+
-+	if (!__btree_node_iter_set_end(iter, 1))
-+		btree_node_iter_sort_two(iter, b, 0);
-+}
-+
-+void bch2_btree_node_iter_set_drop(struct btree_node_iter *iter,
-+				   struct btree_node_iter_set *set)
-+{
-+	struct btree_node_iter_set *last =
-+		iter->data + ARRAY_SIZE(iter->data) - 1;
-+
-+	memmove(&set[0], &set[1], (void *) last - (void *) set);
-+	*last = (struct btree_node_iter_set) { 0, 0 };
-+}
-+
-+static inline void __bch2_btree_node_iter_advance(struct btree_node_iter *iter,
-+						  struct btree *b)
-+{
-+	iter->data->k += __bch2_btree_node_iter_peek_all(iter, b)->u64s;
-+
-+	EBUG_ON(iter->data->k > iter->data->end);
-+
-+	while (!__btree_node_iter_set_end(iter, 0) &&
-+	       !__bch2_btree_node_iter_peek_all(iter, b)->u64s)
-+		iter->data->k++;
-+
-+	if (unlikely(__btree_node_iter_set_end(iter, 0))) {
-+		bch2_btree_node_iter_set_drop(iter, iter->data);
-+		return;
-+	}
-+
-+	if (__btree_node_iter_set_end(iter, 1))
-+		return;
-+
-+	if (!btree_node_iter_sort_two(iter, b, 0))
-+		return;
-+
-+	if (__btree_node_iter_set_end(iter, 2))
-+		return;
-+
-+	btree_node_iter_sort_two(iter, b, 1);
-+}
-+
-+void bch2_btree_node_iter_advance(struct btree_node_iter *iter,
-+				  struct btree *b)
-+{
-+	if (btree_keys_expensive_checks(b)) {
-+		bch2_btree_node_iter_verify(iter, b);
-+		bch2_btree_node_iter_next_check(iter, b);
-+	}
-+
-+	__bch2_btree_node_iter_advance(iter, b);
-+}
-+
-+/*
-+ * Expensive:
-+ */
-+struct bkey_packed *bch2_btree_node_iter_prev_all(struct btree_node_iter *iter,
-+						  struct btree *b)
-+{
-+	struct bkey_packed *k, *prev = NULL;
-+	struct btree_node_iter_set *set;
-+	struct bset_tree *t;
-+	unsigned end = 0;
-+
-+	if (btree_keys_expensive_checks(b))
-+		bch2_btree_node_iter_verify(iter, b);
-+
-+	for_each_bset(b, t) {
-+		k = bch2_bkey_prev_all(b, t,
-+			bch2_btree_node_iter_bset_pos(iter, b, t));
-+		if (k &&
-+		    (!prev || bkey_iter_cmp(b, k, prev) > 0)) {
-+			prev = k;
-+			end = t->end_offset;
-+		}
-+	}
-+
-+	if (!prev)
-+		return NULL;
-+
-+	/*
-+	 * We're manually memmoving instead of just calling sort() to ensure the
-+	 * prev we picked ends up in slot 0 - sort won't necessarily put it
-+	 * there because of duplicate deleted keys:
-+	 */
-+	btree_node_iter_for_each(iter, set)
-+		if (set->end == end)
-+			goto found;
-+
-+	BUG_ON(set != &iter->data[__btree_node_iter_used(iter)]);
-+found:
-+	BUG_ON(set >= iter->data + ARRAY_SIZE(iter->data));
-+
-+	memmove(&iter->data[1],
-+		&iter->data[0],
-+		(void *) set - (void *) &iter->data[0]);
-+
-+	iter->data[0].k = __btree_node_key_to_offset(b, prev);
-+	iter->data[0].end = end;
-+
-+	if (btree_keys_expensive_checks(b))
-+		bch2_btree_node_iter_verify(iter, b);
-+	return prev;
-+}
-+
-+struct bkey_packed *bch2_btree_node_iter_prev_filter(struct btree_node_iter *iter,
-+						     struct btree *b,
-+						     unsigned min_key_type)
-+{
-+	struct bkey_packed *prev;
-+
-+	do {
-+		prev = bch2_btree_node_iter_prev_all(iter, b);
-+	} while (prev && prev->type < min_key_type);
-+
-+	return prev;
-+}
-+
-+struct bkey_s_c bch2_btree_node_iter_peek_unpack(struct btree_node_iter *iter,
-+						 struct btree *b,
-+						 struct bkey *u)
-+{
-+	struct bkey_packed *k = bch2_btree_node_iter_peek(iter, b);
-+
-+	return k ? bkey_disassemble(b, k, u) : bkey_s_c_null;
-+}
-+
-+/* Mergesort */
-+
-+void bch2_btree_keys_stats(struct btree *b, struct bset_stats *stats)
-+{
-+	struct bset_tree *t;
-+
-+	for_each_bset(b, t) {
-+		enum bset_aux_tree_type type = bset_aux_tree_type(t);
-+		size_t j;
-+
-+		stats->sets[type].nr++;
-+		stats->sets[type].bytes += le16_to_cpu(bset(b, t)->u64s) *
-+			sizeof(u64);
-+
-+		if (bset_has_ro_aux_tree(t)) {
-+			stats->floats += t->size - 1;
-+
-+			for (j = 1; j < t->size; j++)
-+				stats->failed +=
-+					bkey_float(b, t, j)->exponent ==
-+					BFLOAT_FAILED;
-+		}
-+	}
-+}
-+
-+void bch2_bfloat_to_text(struct printbuf *out, struct btree *b,
-+			 struct bkey_packed *k)
-+{
-+	struct bset_tree *t = bch2_bkey_to_bset(b, k);
-+	struct bkey uk;
-+	unsigned j, inorder;
-+
-+	if (out->pos != out->end)
-+		*out->pos = '\0';
-+
-+	if (!bset_has_ro_aux_tree(t))
-+		return;
-+
-+	inorder = bkey_to_cacheline(b, t, k);
-+	if (!inorder || inorder >= t->size)
-+		return;
-+
-+	j = __inorder_to_eytzinger1(inorder, t->size, t->extra);
-+	if (k != tree_to_bkey(b, t, j))
-+		return;
-+
-+	switch (bkey_float(b, t, j)->exponent) {
-+	case BFLOAT_FAILED:
-+		uk = bkey_unpack_key(b, k);
-+		pr_buf(out,
-+		       "    failed unpacked at depth %u\n"
-+		       "\t%llu:%llu\n",
-+		       ilog2(j),
-+		       uk.p.inode, uk.p.offset);
-+		break;
-+	}
-+}
-diff --git a/fs/bcachefs/bset.h b/fs/bcachefs/bset.h
-new file mode 100644
-index 000000000000..5921cf689105
---- /dev/null
-+++ b/fs/bcachefs/bset.h
-@@ -0,0 +1,661 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BSET_H
-+#define _BCACHEFS_BSET_H
-+
-+#include <linux/kernel.h>
-+#include <linux/types.h>
-+
-+#include "bcachefs_format.h"
-+#include "bkey.h"
-+#include "bkey_methods.h"
-+#include "btree_types.h"
-+#include "util.h" /* for time_stats */
-+#include "vstructs.h"
-+
-+/*
-+ * BKEYS:
-+ *
-+ * A bkey contains a key, a size field, a variable number of pointers, and some
-+ * ancillary flag bits.
-+ *
-+ * We use two different functions for validating bkeys, bkey_invalid and
-+ * bkey_deleted().
-+ *
-+ * The one exception to the rule that ptr_invalid() filters out invalid keys is
-+ * that it also filters out keys of size 0 - these are keys that have been
-+ * completely overwritten. It'd be safe to delete these in memory while leaving
-+ * them on disk, just unnecessary work - so we filter them out when resorting
-+ * instead.
-+ *
-+ * We can't filter out stale keys when we're resorting, because garbage
-+ * collection needs to find them to ensure bucket gens don't wrap around -
-+ * unless we're rewriting the btree node those stale keys still exist on disk.
-+ *
-+ * We also implement functions here for removing some number of sectors from the
-+ * front or the back of a bkey - this is mainly used for fixing overlapping
-+ * extents, by removing the overlapping sectors from the older key.
-+ *
-+ * BSETS:
-+ *
-+ * A bset is an array of bkeys laid out contiguously in memory in sorted order,
-+ * along with a header. A btree node is made up of a number of these, written at
-+ * different times.
-+ *
-+ * There could be many of them on disk, but we never allow there to be more than
-+ * 4 in memory - we lazily resort as needed.
-+ *
-+ * We implement code here for creating and maintaining auxiliary search trees
-+ * (described below) for searching an individial bset, and on top of that we
-+ * implement a btree iterator.
-+ *
-+ * BTREE ITERATOR:
-+ *
-+ * Most of the code in bcache doesn't care about an individual bset - it needs
-+ * to search entire btree nodes and iterate over them in sorted order.
-+ *
-+ * The btree iterator code serves both functions; it iterates through the keys
-+ * in a btree node in sorted order, starting from either keys after a specific
-+ * point (if you pass it a search key) or the start of the btree node.
-+ *
-+ * AUXILIARY SEARCH TREES:
-+ *
-+ * Since keys are variable length, we can't use a binary search on a bset - we
-+ * wouldn't be able to find the start of the next key. But binary searches are
-+ * slow anyways, due to terrible cache behaviour; bcache originally used binary
-+ * searches and that code topped out at under 50k lookups/second.
-+ *
-+ * So we need to construct some sort of lookup table. Since we only insert keys
-+ * into the last (unwritten) set, most of the keys within a given btree node are
-+ * usually in sets that are mostly constant. We use two different types of
-+ * lookup tables to take advantage of this.
-+ *
-+ * Both lookup tables share in common that they don't index every key in the
-+ * set; they index one key every BSET_CACHELINE bytes, and then a linear search
-+ * is used for the rest.
-+ *
-+ * For sets that have been written to disk and are no longer being inserted
-+ * into, we construct a binary search tree in an array - traversing a binary
-+ * search tree in an array gives excellent locality of reference and is very
-+ * fast, since both children of any node are adjacent to each other in memory
-+ * (and their grandchildren, and great grandchildren...) - this means
-+ * prefetching can be used to great effect.
-+ *
-+ * It's quite useful performance wise to keep these nodes small - not just
-+ * because they're more likely to be in L2, but also because we can prefetch
-+ * more nodes on a single cacheline and thus prefetch more iterations in advance
-+ * when traversing this tree.
-+ *
-+ * Nodes in the auxiliary search tree must contain both a key to compare against
-+ * (we don't want to fetch the key from the set, that would defeat the purpose),
-+ * and a pointer to the key. We use a few tricks to compress both of these.
-+ *
-+ * To compress the pointer, we take advantage of the fact that one node in the
-+ * search tree corresponds to precisely BSET_CACHELINE bytes in the set. We have
-+ * a function (to_inorder()) that takes the index of a node in a binary tree and
-+ * returns what its index would be in an inorder traversal, so we only have to
-+ * store the low bits of the offset.
-+ *
-+ * The key is 84 bits (KEY_DEV + key->key, the offset on the device). To
-+ * compress that,  we take advantage of the fact that when we're traversing the
-+ * search tree at every iteration we know that both our search key and the key
-+ * we're looking for lie within some range - bounded by our previous
-+ * comparisons. (We special case the start of a search so that this is true even
-+ * at the root of the tree).
-+ *
-+ * So we know the key we're looking for is between a and b, and a and b don't
-+ * differ higher than bit 50, we don't need to check anything higher than bit
-+ * 50.
-+ *
-+ * We don't usually need the rest of the bits, either; we only need enough bits
-+ * to partition the key range we're currently checking.  Consider key n - the
-+ * key our auxiliary search tree node corresponds to, and key p, the key
-+ * immediately preceding n.  The lowest bit we need to store in the auxiliary
-+ * search tree is the highest bit that differs between n and p.
-+ *
-+ * Note that this could be bit 0 - we might sometimes need all 80 bits to do the
-+ * comparison. But we'd really like our nodes in the auxiliary search tree to be
-+ * of fixed size.
-+ *
-+ * The solution is to make them fixed size, and when we're constructing a node
-+ * check if p and n differed in the bits we needed them to. If they don't we
-+ * flag that node, and when doing lookups we fallback to comparing against the
-+ * real key. As long as this doesn't happen to often (and it seems to reliably
-+ * happen a bit less than 1% of the time), we win - even on failures, that key
-+ * is then more likely to be in cache than if we were doing binary searches all
-+ * the way, since we're touching so much less memory.
-+ *
-+ * The keys in the auxiliary search tree are stored in (software) floating
-+ * point, with an exponent and a mantissa. The exponent needs to be big enough
-+ * to address all the bits in the original key, but the number of bits in the
-+ * mantissa is somewhat arbitrary; more bits just gets us fewer failures.
-+ *
-+ * We need 7 bits for the exponent and 3 bits for the key's offset (since keys
-+ * are 8 byte aligned); using 22 bits for the mantissa means a node is 4 bytes.
-+ * We need one node per 128 bytes in the btree node, which means the auxiliary
-+ * search trees take up 3% as much memory as the btree itself.
-+ *
-+ * Constructing these auxiliary search trees is moderately expensive, and we
-+ * don't want to be constantly rebuilding the search tree for the last set
-+ * whenever we insert another key into it. For the unwritten set, we use a much
-+ * simpler lookup table - it's just a flat array, so index i in the lookup table
-+ * corresponds to the i range of BSET_CACHELINE bytes in the set. Indexing
-+ * within each byte range works the same as with the auxiliary search trees.
-+ *
-+ * These are much easier to keep up to date when we insert a key - we do it
-+ * somewhat lazily; when we shift a key up we usually just increment the pointer
-+ * to it, only when it would overflow do we go to the trouble of finding the
-+ * first key in that range of bytes again.
-+ */
-+
-+extern bool bch2_expensive_debug_checks;
-+
-+static inline bool btree_keys_expensive_checks(const struct btree *b)
-+{
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	return bch2_expensive_debug_checks || *b->expensive_debug_checks;
-+#else
-+	return false;
-+#endif
-+}
-+
-+enum bset_aux_tree_type {
-+	BSET_NO_AUX_TREE,
-+	BSET_RO_AUX_TREE,
-+	BSET_RW_AUX_TREE,
-+};
-+
-+#define BSET_TREE_NR_TYPES	3
-+
-+#define BSET_NO_AUX_TREE_VAL	(U16_MAX)
-+#define BSET_RW_AUX_TREE_VAL	(U16_MAX - 1)
-+
-+static inline enum bset_aux_tree_type bset_aux_tree_type(const struct bset_tree *t)
-+{
-+	switch (t->extra) {
-+	case BSET_NO_AUX_TREE_VAL:
-+		EBUG_ON(t->size);
-+		return BSET_NO_AUX_TREE;
-+	case BSET_RW_AUX_TREE_VAL:
-+		EBUG_ON(!t->size);
-+		return BSET_RW_AUX_TREE;
-+	default:
-+		EBUG_ON(!t->size);
-+		return BSET_RO_AUX_TREE;
-+	}
-+}
-+
-+/*
-+ * BSET_CACHELINE was originally intended to match the hardware cacheline size -
-+ * it used to be 64, but I realized the lookup code would touch slightly less
-+ * memory if it was 128.
-+ *
-+ * It definites the number of bytes (in struct bset) per struct bkey_float in
-+ * the auxiliar search tree - when we're done searching the bset_float tree we
-+ * have this many bytes left that we do a linear search over.
-+ *
-+ * Since (after level 5) every level of the bset_tree is on a new cacheline,
-+ * we're touching one fewer cacheline in the bset tree in exchange for one more
-+ * cacheline in the linear search - but the linear search might stop before it
-+ * gets to the second cacheline.
-+ */
-+
-+#define BSET_CACHELINE		128
-+
-+static inline size_t btree_keys_cachelines(struct btree *b)
-+{
-+	return (1U << b->byte_order) / BSET_CACHELINE;
-+}
-+
-+static inline size_t btree_aux_data_bytes(struct btree *b)
-+{
-+	return btree_keys_cachelines(b) * 8;
-+}
-+
-+static inline size_t btree_aux_data_u64s(struct btree *b)
-+{
-+	return btree_aux_data_bytes(b) / sizeof(u64);
-+}
-+
-+typedef void (*compiled_unpack_fn)(struct bkey *, const struct bkey_packed *);
-+
-+static inline void
-+__bkey_unpack_key_format_checked(const struct btree *b,
-+			       struct bkey *dst,
-+			       const struct bkey_packed *src)
-+{
-+#ifdef HAVE_BCACHEFS_COMPILED_UNPACK
-+	{
-+		compiled_unpack_fn unpack_fn = b->aux_data;
-+		unpack_fn(dst, src);
-+
-+		if (btree_keys_expensive_checks(b)) {
-+			struct bkey dst2 = __bch2_bkey_unpack_key(&b->format, src);
-+
-+			BUG_ON(memcmp(dst, &dst2, sizeof(*dst)));
-+		}
-+	}
-+#else
-+	*dst = __bch2_bkey_unpack_key(&b->format, src);
-+#endif
-+}
-+
-+static inline struct bkey
-+bkey_unpack_key_format_checked(const struct btree *b,
-+			       const struct bkey_packed *src)
-+{
-+	struct bkey dst;
-+
-+	__bkey_unpack_key_format_checked(b, &dst, src);
-+	return dst;
-+}
-+
-+static inline void __bkey_unpack_key(const struct btree *b,
-+				     struct bkey *dst,
-+				     const struct bkey_packed *src)
-+{
-+	if (likely(bkey_packed(src)))
-+		__bkey_unpack_key_format_checked(b, dst, src);
-+	else
-+		*dst = *packed_to_bkey_c(src);
-+}
-+
-+/**
-+ * bkey_unpack_key -- unpack just the key, not the value
-+ */
-+static inline struct bkey bkey_unpack_key(const struct btree *b,
-+					  const struct bkey_packed *src)
-+{
-+	return likely(bkey_packed(src))
-+		? bkey_unpack_key_format_checked(b, src)
-+		: *packed_to_bkey_c(src);
-+}
-+
-+static inline struct bpos
-+bkey_unpack_pos_format_checked(const struct btree *b,
-+			       const struct bkey_packed *src)
-+{
-+#ifdef HAVE_BCACHEFS_COMPILED_UNPACK
-+	return bkey_unpack_key_format_checked(b, src).p;
-+#else
-+	return __bkey_unpack_pos(&b->format, src);
-+#endif
-+}
-+
-+static inline struct bpos bkey_unpack_pos(const struct btree *b,
-+					  const struct bkey_packed *src)
-+{
-+	return likely(bkey_packed(src))
-+		? bkey_unpack_pos_format_checked(b, src)
-+		: packed_to_bkey_c(src)->p;
-+}
-+
-+/* Disassembled bkeys */
-+
-+static inline struct bkey_s_c bkey_disassemble(struct btree *b,
-+					       const struct bkey_packed *k,
-+					       struct bkey *u)
-+{
-+	__bkey_unpack_key(b, u, k);
-+
-+	return (struct bkey_s_c) { u, bkeyp_val(&b->format, k), };
-+}
-+
-+/* non const version: */
-+static inline struct bkey_s __bkey_disassemble(struct btree *b,
-+					       struct bkey_packed *k,
-+					       struct bkey *u)
-+{
-+	__bkey_unpack_key(b, u, k);
-+
-+	return (struct bkey_s) { .k = u, .v = bkeyp_val(&b->format, k), };
-+}
-+
-+#define for_each_bset(_b, _t)						\
-+	for (_t = (_b)->set; _t < (_b)->set + (_b)->nsets; _t++)
-+
-+#define bset_tree_for_each_key(_b, _t, _k)				\
-+	for (_k = btree_bkey_first(_b, _t);				\
-+	     _k != btree_bkey_last(_b, _t);				\
-+	     _k = bkey_next_skip_noops(_k, btree_bkey_last(_b, _t)))
-+
-+static inline bool bset_has_ro_aux_tree(struct bset_tree *t)
-+{
-+	return bset_aux_tree_type(t) == BSET_RO_AUX_TREE;
-+}
-+
-+static inline bool bset_has_rw_aux_tree(struct bset_tree *t)
-+{
-+	return bset_aux_tree_type(t) == BSET_RW_AUX_TREE;
-+}
-+
-+static inline void bch2_bset_set_no_aux_tree(struct btree *b,
-+					    struct bset_tree *t)
-+{
-+	BUG_ON(t < b->set);
-+
-+	for (; t < b->set + ARRAY_SIZE(b->set); t++) {
-+		t->size = 0;
-+		t->extra = BSET_NO_AUX_TREE_VAL;
-+		t->aux_data_offset = U16_MAX;
-+	}
-+}
-+
-+static inline void btree_node_set_format(struct btree *b,
-+					 struct bkey_format f)
-+{
-+	int len;
-+
-+	b->format	= f;
-+	b->nr_key_bits	= bkey_format_key_bits(&f);
-+
-+	len = bch2_compile_bkey_format(&b->format, b->aux_data);
-+	BUG_ON(len < 0 || len > U8_MAX);
-+
-+	b->unpack_fn_len = len;
-+
-+	bch2_bset_set_no_aux_tree(b, b->set);
-+}
-+
-+static inline struct bset *bset_next_set(struct btree *b,
-+					 unsigned block_bytes)
-+{
-+	struct bset *i = btree_bset_last(b);
-+
-+	EBUG_ON(!is_power_of_2(block_bytes));
-+
-+	return ((void *) i) + round_up(vstruct_bytes(i), block_bytes);
-+}
-+
-+void bch2_btree_keys_init(struct btree *, bool *);
-+
-+void bch2_bset_init_first(struct btree *, struct bset *);
-+void bch2_bset_init_next(struct bch_fs *, struct btree *,
-+			 struct btree_node_entry *);
-+void bch2_bset_build_aux_tree(struct btree *, struct bset_tree *, bool);
-+void bch2_bset_fix_invalidated_key(struct btree *, struct bkey_packed *);
-+
-+void bch2_bset_insert(struct btree *, struct btree_node_iter *,
-+		     struct bkey_packed *, struct bkey_i *, unsigned);
-+void bch2_bset_delete(struct btree *, struct bkey_packed *, unsigned);
-+
-+/* Bkey utility code */
-+
-+/* packed or unpacked */
-+static inline int bkey_cmp_p_or_unp(const struct btree *b,
-+				    const struct bkey_packed *l,
-+				    const struct bkey_packed *r_packed,
-+				    const struct bpos *r)
-+{
-+	EBUG_ON(r_packed && !bkey_packed(r_packed));
-+
-+	if (unlikely(!bkey_packed(l)))
-+		return bkey_cmp(packed_to_bkey_c(l)->p, *r);
-+
-+	if (likely(r_packed))
-+		return __bch2_bkey_cmp_packed_format_checked(l, r_packed, b);
-+
-+	return __bch2_bkey_cmp_left_packed_format_checked(b, l, r);
-+}
-+
-+struct bset_tree *bch2_bkey_to_bset(struct btree *, struct bkey_packed *);
-+
-+struct bkey_packed *bch2_bkey_prev_filter(struct btree *, struct bset_tree *,
-+					  struct bkey_packed *, unsigned);
-+
-+static inline struct bkey_packed *
-+bch2_bkey_prev_all(struct btree *b, struct bset_tree *t, struct bkey_packed *k)
-+{
-+	return bch2_bkey_prev_filter(b, t, k, 0);
-+}
-+
-+static inline struct bkey_packed *
-+bch2_bkey_prev(struct btree *b, struct bset_tree *t, struct bkey_packed *k)
-+{
-+	return bch2_bkey_prev_filter(b, t, k, KEY_TYPE_discard + 1);
-+}
-+
-+enum bch_extent_overlap {
-+	BCH_EXTENT_OVERLAP_ALL		= 0,
-+	BCH_EXTENT_OVERLAP_BACK		= 1,
-+	BCH_EXTENT_OVERLAP_FRONT	= 2,
-+	BCH_EXTENT_OVERLAP_MIDDLE	= 3,
-+};
-+
-+/* Returns how k overlaps with m */
-+static inline enum bch_extent_overlap bch2_extent_overlap(const struct bkey *k,
-+							  const struct bkey *m)
-+{
-+	int cmp1 = bkey_cmp(k->p, m->p) < 0;
-+	int cmp2 = bkey_cmp(bkey_start_pos(k),
-+			    bkey_start_pos(m)) > 0;
-+
-+	return (cmp1 << 1) + cmp2;
-+}
-+
-+/* Btree key iteration */
-+
-+void bch2_btree_node_iter_push(struct btree_node_iter *, struct btree *,
-+			      const struct bkey_packed *,
-+			      const struct bkey_packed *);
-+void bch2_btree_node_iter_init(struct btree_node_iter *, struct btree *,
-+			       struct bpos *);
-+void bch2_btree_node_iter_init_from_start(struct btree_node_iter *,
-+					  struct btree *);
-+struct bkey_packed *bch2_btree_node_iter_bset_pos(struct btree_node_iter *,
-+						 struct btree *,
-+						 struct bset_tree *);
-+
-+void bch2_btree_node_iter_sort(struct btree_node_iter *, struct btree *);
-+void bch2_btree_node_iter_set_drop(struct btree_node_iter *,
-+				   struct btree_node_iter_set *);
-+void bch2_btree_node_iter_advance(struct btree_node_iter *, struct btree *);
-+
-+#define btree_node_iter_for_each(_iter, _set)				\
-+	for (_set = (_iter)->data;					\
-+	     _set < (_iter)->data + ARRAY_SIZE((_iter)->data) &&	\
-+	     (_set)->k != (_set)->end;					\
-+	     _set++)
-+
-+static inline bool __btree_node_iter_set_end(struct btree_node_iter *iter,
-+					     unsigned i)
-+{
-+	return iter->data[i].k == iter->data[i].end;
-+}
-+
-+static inline bool bch2_btree_node_iter_end(struct btree_node_iter *iter)
-+{
-+	return __btree_node_iter_set_end(iter, 0);
-+}
-+
-+/*
-+ * When keys compare equal, deleted keys compare first:
-+ *
-+ * XXX: only need to compare pointers for keys that are both within a
-+ * btree_node_iterator - we need to break ties for prev() to work correctly
-+ */
-+static inline int bkey_iter_cmp(const struct btree *b,
-+				const struct bkey_packed *l,
-+				const struct bkey_packed *r)
-+{
-+	return bkey_cmp_packed(b, l, r)
-+		?: (int) bkey_deleted(r) - (int) bkey_deleted(l)
-+		?: cmp_int(l, r);
-+}
-+
-+static inline int btree_node_iter_cmp(const struct btree *b,
-+				      struct btree_node_iter_set l,
-+				      struct btree_node_iter_set r)
-+{
-+	return bkey_iter_cmp(b,
-+			__btree_node_offset_to_key(b, l.k),
-+			__btree_node_offset_to_key(b, r.k));
-+}
-+
-+/* These assume r (the search key) is not a deleted key: */
-+static inline int bkey_iter_pos_cmp(const struct btree *b,
-+			const struct bkey_packed *l,
-+			const struct bpos *r)
-+{
-+	return bkey_cmp_left_packed(b, l, r)
-+		?: -((int) bkey_deleted(l));
-+}
-+
-+static inline int bkey_iter_cmp_p_or_unp(const struct btree *b,
-+				    const struct bkey_packed *l,
-+				    const struct bkey_packed *r_packed,
-+				    const struct bpos *r)
-+{
-+	return bkey_cmp_p_or_unp(b, l, r_packed, r)
-+		?: -((int) bkey_deleted(l));
-+}
-+
-+static inline struct bkey_packed *
-+__bch2_btree_node_iter_peek_all(struct btree_node_iter *iter,
-+				struct btree *b)
-+{
-+	return __btree_node_offset_to_key(b, iter->data->k);
-+}
-+
-+static inline struct bkey_packed *
-+bch2_btree_node_iter_peek_filter(struct btree_node_iter *iter,
-+				 struct btree *b,
-+				 unsigned min_key_type)
-+{
-+	while (!bch2_btree_node_iter_end(iter)) {
-+		struct bkey_packed *k = __bch2_btree_node_iter_peek_all(iter, b);
-+
-+		if (k->type >= min_key_type)
-+			return k;
-+
-+		bch2_btree_node_iter_advance(iter, b);
-+	}
-+
-+	return NULL;
-+}
-+
-+static inline struct bkey_packed *
-+bch2_btree_node_iter_peek_all(struct btree_node_iter *iter,
-+			      struct btree *b)
-+{
-+	return bch2_btree_node_iter_peek_filter(iter, b, 0);
-+}
-+
-+static inline struct bkey_packed *
-+bch2_btree_node_iter_peek(struct btree_node_iter *iter, struct btree *b)
-+{
-+	return bch2_btree_node_iter_peek_filter(iter, b, KEY_TYPE_discard + 1);
-+}
-+
-+static inline struct bkey_packed *
-+bch2_btree_node_iter_next_all(struct btree_node_iter *iter, struct btree *b)
-+{
-+	struct bkey_packed *ret = bch2_btree_node_iter_peek_all(iter, b);
-+
-+	if (ret)
-+		bch2_btree_node_iter_advance(iter, b);
-+
-+	return ret;
-+}
-+
-+struct bkey_packed *bch2_btree_node_iter_prev_all(struct btree_node_iter *,
-+						  struct btree *);
-+struct bkey_packed *bch2_btree_node_iter_prev_filter(struct btree_node_iter *,
-+						     struct btree *, unsigned);
-+
-+static inline struct bkey_packed *
-+bch2_btree_node_iter_prev(struct btree_node_iter *iter, struct btree *b)
-+{
-+	return bch2_btree_node_iter_prev_filter(iter, b, KEY_TYPE_discard + 1);
-+}
-+
-+struct bkey_s_c bch2_btree_node_iter_peek_unpack(struct btree_node_iter *,
-+						struct btree *,
-+						struct bkey *);
-+
-+#define for_each_btree_node_key_unpack(b, k, iter, unpacked)		\
-+	for (bch2_btree_node_iter_init_from_start((iter), (b));		\
-+	     (k = bch2_btree_node_iter_peek_unpack((iter), (b), (unpacked))).k;\
-+	     bch2_btree_node_iter_advance(iter, b))
-+
-+/* Accounting: */
-+
-+static inline void btree_keys_account_key(struct btree_nr_keys *n,
-+					  unsigned bset,
-+					  struct bkey_packed *k,
-+					  int sign)
-+{
-+	n->live_u64s		+= k->u64s * sign;
-+	n->bset_u64s[bset]	+= k->u64s * sign;
-+
-+	if (bkey_packed(k))
-+		n->packed_keys	+= sign;
-+	else
-+		n->unpacked_keys += sign;
-+}
-+
-+static inline void btree_keys_account_val_delta(struct btree *b,
-+						struct bkey_packed *k,
-+						int delta)
-+{
-+	struct bset_tree *t = bch2_bkey_to_bset(b, k);
-+
-+	b->nr.live_u64s			+= delta;
-+	b->nr.bset_u64s[t - b->set]	+= delta;
-+}
-+
-+#define btree_keys_account_key_add(_nr, _bset_idx, _k)		\
-+	btree_keys_account_key(_nr, _bset_idx, _k, 1)
-+#define btree_keys_account_key_drop(_nr, _bset_idx, _k)	\
-+	btree_keys_account_key(_nr, _bset_idx, _k, -1)
-+
-+#define btree_account_key_add(_b, _k)				\
-+	btree_keys_account_key(&(_b)->nr,			\
-+		bch2_bkey_to_bset(_b, _k) - (_b)->set, _k, 1)
-+#define btree_account_key_drop(_b, _k)				\
-+	btree_keys_account_key(&(_b)->nr,			\
-+		bch2_bkey_to_bset(_b, _k) - (_b)->set, _k, -1)
-+
-+struct bset_stats {
-+	struct {
-+		size_t nr, bytes;
-+	} sets[BSET_TREE_NR_TYPES];
-+
-+	size_t floats;
-+	size_t failed;
-+};
-+
-+void bch2_btree_keys_stats(struct btree *, struct bset_stats *);
-+void bch2_bfloat_to_text(struct printbuf *, struct btree *,
-+			 struct bkey_packed *);
-+
-+/* Debug stuff */
-+
-+void bch2_dump_bset(struct bch_fs *, struct btree *, struct bset *, unsigned);
-+void bch2_dump_btree_node(struct bch_fs *, struct btree *);
-+void bch2_dump_btree_node_iter(struct btree *, struct btree_node_iter *);
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+
-+void __bch2_verify_btree_nr_keys(struct btree *);
-+void bch2_btree_node_iter_verify(struct btree_node_iter *, struct btree *);
-+void bch2_verify_insert_pos(struct btree *, struct bkey_packed *,
-+			    struct bkey_packed *, unsigned);
-+
-+#else
-+
-+static inline void __bch2_verify_btree_nr_keys(struct btree *b) {}
-+static inline void bch2_btree_node_iter_verify(struct btree_node_iter *iter,
-+					      struct btree *b) {}
-+static inline void bch2_verify_insert_pos(struct btree *b,
-+					  struct bkey_packed *where,
-+					  struct bkey_packed *insert,
-+					  unsigned clobber_u64s) {}
-+#endif
-+
-+static inline void bch2_verify_btree_nr_keys(struct btree *b)
-+{
-+	if (btree_keys_expensive_checks(b))
-+		__bch2_verify_btree_nr_keys(b);
-+}
-+
-+#endif /* _BCACHEFS_BSET_H */
-diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c
-new file mode 100644
-index 000000000000..736671112861
---- /dev/null
-+++ b/fs/bcachefs/btree_cache.c
-@@ -0,0 +1,1057 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "btree_cache.h"
-+#include "btree_io.h"
-+#include "btree_iter.h"
-+#include "btree_locking.h"
-+#include "debug.h"
-+
-+#include <linux/prefetch.h>
-+#include <linux/sched/mm.h>
-+#include <trace/events/bcachefs.h>
-+
-+const char * const bch2_btree_ids[] = {
-+#define x(kwd, val, name) name,
-+	BCH_BTREE_IDS()
-+#undef x
-+	NULL
-+};
-+
-+void bch2_recalc_btree_reserve(struct bch_fs *c)
-+{
-+	unsigned i, reserve = 16;
-+
-+	if (!c->btree_roots[0].b)
-+		reserve += 8;
-+
-+	for (i = 0; i < BTREE_ID_NR; i++)
-+		if (c->btree_roots[i].b)
-+			reserve += min_t(unsigned, 1,
-+					 c->btree_roots[i].b->c.level) * 8;
-+
-+	c->btree_cache.reserve = reserve;
-+}
-+
-+static inline unsigned btree_cache_can_free(struct btree_cache *bc)
-+{
-+	return max_t(int, 0, bc->used - bc->reserve);
-+}
-+
-+static void __btree_node_data_free(struct bch_fs *c, struct btree *b)
-+{
-+	EBUG_ON(btree_node_write_in_flight(b));
-+
-+	kvpfree(b->data, btree_bytes(c));
-+	b->data = NULL;
-+	vfree(b->aux_data);
-+	b->aux_data = NULL;
-+}
-+
-+static void btree_node_data_free(struct bch_fs *c, struct btree *b)
-+{
-+	struct btree_cache *bc = &c->btree_cache;
-+
-+	__btree_node_data_free(c, b);
-+	bc->used--;
-+	list_move(&b->list, &bc->freed);
-+}
-+
-+static int bch2_btree_cache_cmp_fn(struct rhashtable_compare_arg *arg,
-+				   const void *obj)
-+{
-+	const struct btree *b = obj;
-+	const u64 *v = arg->key;
-+
-+	return b->hash_val == *v ? 0 : 1;
-+}
-+
-+static const struct rhashtable_params bch_btree_cache_params = {
-+	.head_offset	= offsetof(struct btree, hash),
-+	.key_offset	= offsetof(struct btree, hash_val),
-+	.key_len	= sizeof(u64),
-+	.obj_cmpfn	= bch2_btree_cache_cmp_fn,
-+};
-+
-+static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
-+{
-+	BUG_ON(b->data || b->aux_data);
-+
-+	b->data = kvpmalloc(btree_bytes(c), gfp);
-+	if (!b->data)
-+		return -ENOMEM;
-+
-+	b->aux_data = vmalloc_exec(btree_aux_data_bytes(b), gfp);
-+	if (!b->aux_data) {
-+		kvpfree(b->data, btree_bytes(c));
-+		b->data = NULL;
-+		return -ENOMEM;
-+	}
-+
-+	return 0;
-+}
-+
-+static struct btree *__btree_node_mem_alloc(struct bch_fs *c)
-+{
-+	struct btree *b = kzalloc(sizeof(struct btree), GFP_KERNEL);
-+	if (!b)
-+		return NULL;
-+
-+	bkey_btree_ptr_init(&b->key);
-+	six_lock_init(&b->c.lock);
-+	INIT_LIST_HEAD(&b->list);
-+	INIT_LIST_HEAD(&b->write_blocked);
-+	b->byte_order = ilog2(btree_bytes(c));
-+	return b;
-+}
-+
-+static struct btree *btree_node_mem_alloc(struct bch_fs *c)
-+{
-+	struct btree_cache *bc = &c->btree_cache;
-+	struct btree *b = __btree_node_mem_alloc(c);
-+	if (!b)
-+		return NULL;
-+
-+	if (btree_node_data_alloc(c, b, GFP_KERNEL)) {
-+		kfree(b);
-+		return NULL;
-+	}
-+
-+	bc->used++;
-+	list_add(&b->list, &bc->freeable);
-+	return b;
-+}
-+
-+/* Btree in memory cache - hash table */
-+
-+void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
-+{
-+	rhashtable_remove_fast(&bc->table, &b->hash, bch_btree_cache_params);
-+
-+	/* Cause future lookups for this node to fail: */
-+	b->hash_val = 0;
-+
-+	six_lock_wakeup_all(&b->c.lock);
-+}
-+
-+int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b)
-+{
-+	BUG_ON(b->hash_val);
-+	b->hash_val = btree_ptr_hash_val(&b->key);
-+
-+	return rhashtable_lookup_insert_fast(&bc->table, &b->hash,
-+					     bch_btree_cache_params);
-+}
-+
-+int bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b,
-+				unsigned level, enum btree_id id)
-+{
-+	int ret;
-+
-+	b->c.level	= level;
-+	b->c.btree_id	= id;
-+
-+	mutex_lock(&bc->lock);
-+	ret = __bch2_btree_node_hash_insert(bc, b);
-+	if (!ret)
-+		list_add(&b->list, &bc->live);
-+	mutex_unlock(&bc->lock);
-+
-+	return ret;
-+}
-+
-+__flatten
-+static inline struct btree *btree_cache_find(struct btree_cache *bc,
-+				     const struct bkey_i *k)
-+{
-+	u64 v = btree_ptr_hash_val(k);
-+
-+	return rhashtable_lookup_fast(&bc->table, &v, bch_btree_cache_params);
-+}
-+
-+/*
-+ * this version is for btree nodes that have already been freed (we're not
-+ * reaping a real btree node)
-+ */
-+static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
-+{
-+	struct btree_cache *bc = &c->btree_cache;
-+	int ret = 0;
-+
-+	lockdep_assert_held(&bc->lock);
-+
-+	if (!six_trylock_intent(&b->c.lock))
-+		return -ENOMEM;
-+
-+	if (!six_trylock_write(&b->c.lock))
-+		goto out_unlock_intent;
-+
-+	if (btree_node_noevict(b))
-+		goto out_unlock;
-+
-+	if (!btree_node_may_write(b))
-+		goto out_unlock;
-+
-+	if (btree_node_dirty(b) &&
-+	    test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags))
-+		goto out_unlock;
-+
-+	if (btree_node_dirty(b) ||
-+	    btree_node_write_in_flight(b) ||
-+	    btree_node_read_in_flight(b)) {
-+		if (!flush)
-+			goto out_unlock;
-+
-+		wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight,
-+			       TASK_UNINTERRUPTIBLE);
-+
-+		/*
-+		 * Using the underscore version because we don't want to compact
-+		 * bsets after the write, since this node is about to be evicted
-+		 * - unless btree verify mode is enabled, since it runs out of
-+		 * the post write cleanup:
-+		 */
-+		if (verify_btree_ondisk(c))
-+			bch2_btree_node_write(c, b, SIX_LOCK_intent);
-+		else
-+			__bch2_btree_node_write(c, b, SIX_LOCK_read);
-+
-+		/* wait for any in flight btree write */
-+		btree_node_wait_on_io(b);
-+	}
-+out:
-+	if (b->hash_val && !ret)
-+		trace_btree_node_reap(c, b);
-+	return ret;
-+out_unlock:
-+	six_unlock_write(&b->c.lock);
-+out_unlock_intent:
-+	six_unlock_intent(&b->c.lock);
-+	ret = -ENOMEM;
-+	goto out;
-+}
-+
-+static int btree_node_reclaim(struct bch_fs *c, struct btree *b)
-+{
-+	return __btree_node_reclaim(c, b, false);
-+}
-+
-+static int btree_node_write_and_reclaim(struct bch_fs *c, struct btree *b)
-+{
-+	return __btree_node_reclaim(c, b, true);
-+}
-+
-+static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
-+					   struct shrink_control *sc)
-+{
-+	struct bch_fs *c = container_of(shrink, struct bch_fs,
-+					btree_cache.shrink);
-+	struct btree_cache *bc = &c->btree_cache;
-+	struct btree *b, *t;
-+	unsigned long nr = sc->nr_to_scan;
-+	unsigned long can_free;
-+	unsigned long touched = 0;
-+	unsigned long freed = 0;
-+	unsigned i;
-+
-+	if (btree_shrinker_disabled(c))
-+		return SHRINK_STOP;
-+
-+	/* Return -1 if we can't do anything right now */
-+	if (sc->gfp_mask & __GFP_FS)
-+		mutex_lock(&bc->lock);
-+	else if (!mutex_trylock(&bc->lock))
-+		return -1;
-+
-+	/*
-+	 * It's _really_ critical that we don't free too many btree nodes - we
-+	 * have to always leave ourselves a reserve. The reserve is how we
-+	 * guarantee that allocating memory for a new btree node can always
-+	 * succeed, so that inserting keys into the btree can always succeed and
-+	 * IO can always make forward progress:
-+	 */
-+	nr /= btree_pages(c);
-+	can_free = btree_cache_can_free(bc);
-+	nr = min_t(unsigned long, nr, can_free);
-+
-+	i = 0;
-+	list_for_each_entry_safe(b, t, &bc->freeable, list) {
-+		touched++;
-+
-+		if (freed >= nr)
-+			break;
-+
-+		if (++i > 3 &&
-+		    !btree_node_reclaim(c, b)) {
-+			btree_node_data_free(c, b);
-+			six_unlock_write(&b->c.lock);
-+			six_unlock_intent(&b->c.lock);
-+			freed++;
-+		}
-+	}
-+restart:
-+	list_for_each_entry_safe(b, t, &bc->live, list) {
-+		touched++;
-+
-+		if (freed >= nr) {
-+			/* Save position */
-+			if (&t->list != &bc->live)
-+				list_move_tail(&bc->live, &t->list);
-+			break;
-+		}
-+
-+		if (!btree_node_accessed(b) &&
-+		    !btree_node_reclaim(c, b)) {
-+			/* can't call bch2_btree_node_hash_remove under lock  */
-+			freed++;
-+			if (&t->list != &bc->live)
-+				list_move_tail(&bc->live, &t->list);
-+
-+			btree_node_data_free(c, b);
-+			mutex_unlock(&bc->lock);
-+
-+			bch2_btree_node_hash_remove(bc, b);
-+			six_unlock_write(&b->c.lock);
-+			six_unlock_intent(&b->c.lock);
-+
-+			if (freed >= nr)
-+				goto out;
-+
-+			if (sc->gfp_mask & __GFP_FS)
-+				mutex_lock(&bc->lock);
-+			else if (!mutex_trylock(&bc->lock))
-+				goto out;
-+			goto restart;
-+		} else
-+			clear_btree_node_accessed(b);
-+	}
-+
-+	mutex_unlock(&bc->lock);
-+out:
-+	return (unsigned long) freed * btree_pages(c);
-+}
-+
-+static unsigned long bch2_btree_cache_count(struct shrinker *shrink,
-+					    struct shrink_control *sc)
-+{
-+	struct bch_fs *c = container_of(shrink, struct bch_fs,
-+					btree_cache.shrink);
-+	struct btree_cache *bc = &c->btree_cache;
-+
-+	if (btree_shrinker_disabled(c))
-+		return 0;
-+
-+	return btree_cache_can_free(bc) * btree_pages(c);
-+}
-+
-+void bch2_fs_btree_cache_exit(struct bch_fs *c)
-+{
-+	struct btree_cache *bc = &c->btree_cache;
-+	struct btree *b;
-+	unsigned i;
-+
-+	if (bc->shrink.list.next)
-+		unregister_shrinker(&bc->shrink);
-+
-+	mutex_lock(&bc->lock);
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	if (c->verify_data)
-+		list_move(&c->verify_data->list, &bc->live);
-+
-+	kvpfree(c->verify_ondisk, btree_bytes(c));
-+#endif
-+
-+	for (i = 0; i < BTREE_ID_NR; i++)
-+		if (c->btree_roots[i].b)
-+			list_add(&c->btree_roots[i].b->list, &bc->live);
-+
-+	list_splice(&bc->freeable, &bc->live);
-+
-+	while (!list_empty(&bc->live)) {
-+		b = list_first_entry(&bc->live, struct btree, list);
-+
-+		BUG_ON(btree_node_read_in_flight(b) ||
-+		       btree_node_write_in_flight(b));
-+
-+		if (btree_node_dirty(b))
-+			bch2_btree_complete_write(c, b, btree_current_write(b));
-+		clear_btree_node_dirty(b);
-+
-+		btree_node_data_free(c, b);
-+	}
-+
-+	while (!list_empty(&bc->freed)) {
-+		b = list_first_entry(&bc->freed, struct btree, list);
-+		list_del(&b->list);
-+		kfree(b);
-+	}
-+
-+	mutex_unlock(&bc->lock);
-+
-+	if (bc->table_init_done)
-+		rhashtable_destroy(&bc->table);
-+}
-+
-+int bch2_fs_btree_cache_init(struct bch_fs *c)
-+{
-+	struct btree_cache *bc = &c->btree_cache;
-+	unsigned i;
-+	int ret = 0;
-+
-+	pr_verbose_init(c->opts, "");
-+
-+	ret = rhashtable_init(&bc->table, &bch_btree_cache_params);
-+	if (ret)
-+		goto out;
-+
-+	bc->table_init_done = true;
-+
-+	bch2_recalc_btree_reserve(c);
-+
-+	for (i = 0; i < bc->reserve; i++)
-+		if (!btree_node_mem_alloc(c)) {
-+			ret = -ENOMEM;
-+			goto out;
-+		}
-+
-+	list_splice_init(&bc->live, &bc->freeable);
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	mutex_init(&c->verify_lock);
-+
-+	c->verify_ondisk = kvpmalloc(btree_bytes(c), GFP_KERNEL);
-+	if (!c->verify_ondisk) {
-+		ret = -ENOMEM;
-+		goto out;
-+	}
-+
-+	c->verify_data = btree_node_mem_alloc(c);
-+	if (!c->verify_data) {
-+		ret = -ENOMEM;
-+		goto out;
-+	}
-+
-+	list_del_init(&c->verify_data->list);
-+#endif
-+
-+	bc->shrink.count_objects	= bch2_btree_cache_count;
-+	bc->shrink.scan_objects		= bch2_btree_cache_scan;
-+	bc->shrink.seeks		= 4;
-+	bc->shrink.batch		= btree_pages(c) * 2;
-+	register_shrinker(&bc->shrink);
-+out:
-+	pr_verbose_init(c->opts, "ret %i", ret);
-+	return ret;
-+}
-+
-+void bch2_fs_btree_cache_init_early(struct btree_cache *bc)
-+{
-+	mutex_init(&bc->lock);
-+	INIT_LIST_HEAD(&bc->live);
-+	INIT_LIST_HEAD(&bc->freeable);
-+	INIT_LIST_HEAD(&bc->freed);
-+}
-+
-+/*
-+ * We can only have one thread cannibalizing other cached btree nodes at a time,
-+ * or we'll deadlock. We use an open coded mutex to ensure that, which a
-+ * cannibalize_bucket() will take. This means every time we unlock the root of
-+ * the btree, we need to release this lock if we have it held.
-+ */
-+void bch2_btree_cache_cannibalize_unlock(struct bch_fs *c)
-+{
-+	struct btree_cache *bc = &c->btree_cache;
-+
-+	if (bc->alloc_lock == current) {
-+		trace_btree_node_cannibalize_unlock(c);
-+		bc->alloc_lock = NULL;
-+		closure_wake_up(&bc->alloc_wait);
-+	}
-+}
-+
-+int bch2_btree_cache_cannibalize_lock(struct bch_fs *c, struct closure *cl)
-+{
-+	struct btree_cache *bc = &c->btree_cache;
-+	struct task_struct *old;
-+
-+	old = cmpxchg(&bc->alloc_lock, NULL, current);
-+	if (old == NULL || old == current)
-+		goto success;
-+
-+	if (!cl) {
-+		trace_btree_node_cannibalize_lock_fail(c);
-+		return -ENOMEM;
-+	}
-+
-+	closure_wait(&bc->alloc_wait, cl);
-+
-+	/* Try again, after adding ourselves to waitlist */
-+	old = cmpxchg(&bc->alloc_lock, NULL, current);
-+	if (old == NULL || old == current) {
-+		/* We raced */
-+		closure_wake_up(&bc->alloc_wait);
-+		goto success;
-+	}
-+
-+	trace_btree_node_cannibalize_lock_fail(c);
-+	return -EAGAIN;
-+
-+success:
-+	trace_btree_node_cannibalize_lock(c);
-+	return 0;
-+}
-+
-+static struct btree *btree_node_cannibalize(struct bch_fs *c)
-+{
-+	struct btree_cache *bc = &c->btree_cache;
-+	struct btree *b;
-+
-+	list_for_each_entry_reverse(b, &bc->live, list)
-+		if (!btree_node_reclaim(c, b))
-+			return b;
-+
-+	while (1) {
-+		list_for_each_entry_reverse(b, &bc->live, list)
-+			if (!btree_node_write_and_reclaim(c, b))
-+				return b;
-+
-+		/*
-+		 * Rare case: all nodes were intent-locked.
-+		 * Just busy-wait.
-+		 */
-+		WARN_ONCE(1, "btree cache cannibalize failed\n");
-+		cond_resched();
-+	}
-+}
-+
-+struct btree *bch2_btree_node_mem_alloc(struct bch_fs *c)
-+{
-+	struct btree_cache *bc = &c->btree_cache;
-+	struct btree *b;
-+	u64 start_time = local_clock();
-+	unsigned flags;
-+
-+	flags = memalloc_nofs_save();
-+	mutex_lock(&bc->lock);
-+
-+	/*
-+	 * btree_free() doesn't free memory; it sticks the node on the end of
-+	 * the list. Check if there's any freed nodes there:
-+	 */
-+	list_for_each_entry(b, &bc->freeable, list)
-+		if (!btree_node_reclaim(c, b))
-+			goto got_node;
-+
-+	/*
-+	 * We never free struct btree itself, just the memory that holds the on
-+	 * disk node. Check the freed list before allocating a new one:
-+	 */
-+	list_for_each_entry(b, &bc->freed, list)
-+		if (!btree_node_reclaim(c, b))
-+			goto got_node;
-+
-+	b = NULL;
-+got_node:
-+	if (b)
-+		list_del_init(&b->list);
-+	mutex_unlock(&bc->lock);
-+
-+	if (!b) {
-+		b = __btree_node_mem_alloc(c);
-+		if (!b)
-+			goto err;
-+
-+		BUG_ON(!six_trylock_intent(&b->c.lock));
-+		BUG_ON(!six_trylock_write(&b->c.lock));
-+	}
-+
-+	if (!b->data) {
-+		if (btree_node_data_alloc(c, b, __GFP_NOWARN|GFP_KERNEL))
-+			goto err;
-+
-+		mutex_lock(&bc->lock);
-+		bc->used++;
-+		mutex_unlock(&bc->lock);
-+	}
-+
-+	BUG_ON(btree_node_hashed(b));
-+	BUG_ON(btree_node_write_in_flight(b));
-+out:
-+	b->flags		= 0;
-+	b->written		= 0;
-+	b->nsets		= 0;
-+	b->sib_u64s[0]		= 0;
-+	b->sib_u64s[1]		= 0;
-+	b->whiteout_u64s	= 0;
-+	bch2_btree_keys_init(b, &c->expensive_debug_checks);
-+
-+	bch2_time_stats_update(&c->times[BCH_TIME_btree_node_mem_alloc],
-+			       start_time);
-+
-+	memalloc_nofs_restore(flags);
-+	return b;
-+err:
-+	mutex_lock(&bc->lock);
-+
-+	if (b) {
-+		list_add(&b->list, &bc->freed);
-+		six_unlock_write(&b->c.lock);
-+		six_unlock_intent(&b->c.lock);
-+	}
-+
-+	/* Try to cannibalize another cached btree node: */
-+	if (bc->alloc_lock == current) {
-+		b = btree_node_cannibalize(c);
-+		list_del_init(&b->list);
-+		mutex_unlock(&bc->lock);
-+
-+		bch2_btree_node_hash_remove(bc, b);
-+
-+		trace_btree_node_cannibalize(c);
-+		goto out;
-+	}
-+
-+	mutex_unlock(&bc->lock);
-+	memalloc_nofs_restore(flags);
-+	return ERR_PTR(-ENOMEM);
-+}
-+
-+/* Slowpath, don't want it inlined into btree_iter_traverse() */
-+static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
-+				struct btree_iter *iter,
-+				const struct bkey_i *k,
-+				enum btree_id btree_id,
-+				unsigned level,
-+				enum six_lock_type lock_type,
-+				bool sync)
-+{
-+	struct btree_cache *bc = &c->btree_cache;
-+	struct btree *b;
-+
-+	BUG_ON(level + 1 >= BTREE_MAX_DEPTH);
-+	/*
-+	 * Parent node must be locked, else we could read in a btree node that's
-+	 * been freed:
-+	 */
-+	if (iter && !bch2_btree_node_relock(iter, level + 1))
-+		return ERR_PTR(-EINTR);
-+
-+	b = bch2_btree_node_mem_alloc(c);
-+	if (IS_ERR(b))
-+		return b;
-+
-+	bkey_copy(&b->key, k);
-+	if (bch2_btree_node_hash_insert(bc, b, level, btree_id)) {
-+		/* raced with another fill: */
-+
-+		/* mark as unhashed... */
-+		b->hash_val = 0;
-+
-+		mutex_lock(&bc->lock);
-+		list_add(&b->list, &bc->freeable);
-+		mutex_unlock(&bc->lock);
-+
-+		six_unlock_write(&b->c.lock);
-+		six_unlock_intent(&b->c.lock);
-+		return NULL;
-+	}
-+
-+	/*
-+	 * Unlock before doing IO:
-+	 *
-+	 * XXX: ideally should be dropping all btree node locks here
-+	 */
-+	if (iter && btree_node_read_locked(iter, level + 1))
-+		btree_node_unlock(iter, level + 1);
-+
-+	bch2_btree_node_read(c, b, sync);
-+
-+	six_unlock_write(&b->c.lock);
-+
-+	if (!sync) {
-+		six_unlock_intent(&b->c.lock);
-+		return NULL;
-+	}
-+
-+	if (lock_type == SIX_LOCK_read)
-+		six_lock_downgrade(&b->c.lock);
-+
-+	return b;
-+}
-+
-+static int lock_node_check_fn(struct six_lock *lock, void *p)
-+{
-+	struct btree *b = container_of(lock, struct btree, c.lock);
-+	const struct bkey_i *k = p;
-+
-+	return b->hash_val == btree_ptr_hash_val(k) ? 0 : -1;
-+}
-+
-+/**
-+ * bch_btree_node_get - find a btree node in the cache and lock it, reading it
-+ * in from disk if necessary.
-+ *
-+ * If IO is necessary and running under generic_make_request, returns -EAGAIN.
-+ *
-+ * The btree node will have either a read or a write lock held, depending on
-+ * the @write parameter.
-+ */
-+struct btree *bch2_btree_node_get(struct bch_fs *c, struct btree_iter *iter,
-+				  const struct bkey_i *k, unsigned level,
-+				  enum six_lock_type lock_type)
-+{
-+	struct btree_cache *bc = &c->btree_cache;
-+	struct btree *b;
-+	struct bset_tree *t;
-+
-+	EBUG_ON(level >= BTREE_MAX_DEPTH);
-+
-+	b = btree_node_mem_ptr(k);
-+	if (b)
-+		goto lock_node;
-+retry:
-+	b = btree_cache_find(bc, k);
-+	if (unlikely(!b)) {
-+		/*
-+		 * We must have the parent locked to call bch2_btree_node_fill(),
-+		 * else we could read in a btree node from disk that's been
-+		 * freed:
-+		 */
-+		b = bch2_btree_node_fill(c, iter, k, iter->btree_id,
-+					 level, lock_type, true);
-+
-+		/* We raced and found the btree node in the cache */
-+		if (!b)
-+			goto retry;
-+
-+		if (IS_ERR(b))
-+			return b;
-+	} else {
-+lock_node:
-+		/*
-+		 * There's a potential deadlock with splits and insertions into
-+		 * interior nodes we have to avoid:
-+		 *
-+		 * The other thread might be holding an intent lock on the node
-+		 * we want, and they want to update its parent node so they're
-+		 * going to upgrade their intent lock on the parent node to a
-+		 * write lock.
-+		 *
-+		 * But if we're holding a read lock on the parent, and we're
-+		 * trying to get the intent lock they're holding, we deadlock.
-+		 *
-+		 * So to avoid this we drop the read locks on parent nodes when
-+		 * we're starting to take intent locks - and handle the race.
-+		 *
-+		 * The race is that they might be about to free the node we
-+		 * want, and dropping our read lock on the parent node lets them
-+		 * update the parent marking the node we want as freed, and then
-+		 * free it:
-+		 *
-+		 * To guard against this, btree nodes are evicted from the cache
-+		 * when they're freed - and b->hash_val is zeroed out, which we
-+		 * check for after we lock the node.
-+		 *
-+		 * Then, bch2_btree_node_relock() on the parent will fail - because
-+		 * the parent was modified, when the pointer to the node we want
-+		 * was removed - and we'll bail out:
-+		 */
-+		if (btree_node_read_locked(iter, level + 1))
-+			btree_node_unlock(iter, level + 1);
-+
-+		if (!btree_node_lock(b, k->k.p, level, iter, lock_type,
-+				     lock_node_check_fn, (void *) k)) {
-+			if (b->hash_val != btree_ptr_hash_val(k))
-+				goto retry;
-+			return ERR_PTR(-EINTR);
-+		}
-+
-+		if (unlikely(b->hash_val != btree_ptr_hash_val(k) ||
-+			     b->c.level != level ||
-+			     race_fault())) {
-+			six_unlock_type(&b->c.lock, lock_type);
-+			if (bch2_btree_node_relock(iter, level + 1))
-+				goto retry;
-+
-+			trace_trans_restart_btree_node_reused(iter->trans->ip);
-+			return ERR_PTR(-EINTR);
-+		}
-+	}
-+
-+	/* XXX: waiting on IO with btree locks held: */
-+	wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight,
-+		       TASK_UNINTERRUPTIBLE);
-+
-+	prefetch(b->aux_data);
-+
-+	for_each_bset(b, t) {
-+		void *p = (u64 *) b->aux_data + t->aux_data_offset;
-+
-+		prefetch(p + L1_CACHE_BYTES * 0);
-+		prefetch(p + L1_CACHE_BYTES * 1);
-+		prefetch(p + L1_CACHE_BYTES * 2);
-+	}
-+
-+	/* avoid atomic set bit if it's not needed: */
-+	if (!btree_node_accessed(b))
-+		set_btree_node_accessed(b);
-+
-+	if (unlikely(btree_node_read_error(b))) {
-+		six_unlock_type(&b->c.lock, lock_type);
-+		return ERR_PTR(-EIO);
-+	}
-+
-+	EBUG_ON(b->c.btree_id != iter->btree_id ||
-+		BTREE_NODE_LEVEL(b->data) != level ||
-+		bkey_cmp(b->data->max_key, k->k.p));
-+
-+	return b;
-+}
-+
-+struct btree *bch2_btree_node_get_noiter(struct bch_fs *c,
-+					 const struct bkey_i *k,
-+					 enum btree_id btree_id,
-+					 unsigned level)
-+{
-+	struct btree_cache *bc = &c->btree_cache;
-+	struct btree *b;
-+	struct bset_tree *t;
-+	int ret;
-+
-+	EBUG_ON(level >= BTREE_MAX_DEPTH);
-+
-+	b = btree_node_mem_ptr(k);
-+	if (b)
-+		goto lock_node;
-+retry:
-+	b = btree_cache_find(bc, k);
-+	if (unlikely(!b)) {
-+		b = bch2_btree_node_fill(c, NULL, k, btree_id,
-+					 level, SIX_LOCK_read, true);
-+
-+		/* We raced and found the btree node in the cache */
-+		if (!b)
-+			goto retry;
-+
-+		if (IS_ERR(b))
-+			return b;
-+	} else {
-+lock_node:
-+		ret = six_lock_read(&b->c.lock, lock_node_check_fn, (void *) k);
-+		if (ret)
-+			goto retry;
-+
-+		if (unlikely(b->hash_val != btree_ptr_hash_val(k) ||
-+			     b->c.btree_id != btree_id ||
-+			     b->c.level != level)) {
-+			six_unlock_read(&b->c.lock);
-+			goto retry;
-+		}
-+	}
-+
-+	/* XXX: waiting on IO with btree locks held: */
-+	wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight,
-+		       TASK_UNINTERRUPTIBLE);
-+
-+	prefetch(b->aux_data);
-+
-+	for_each_bset(b, t) {
-+		void *p = (u64 *) b->aux_data + t->aux_data_offset;
-+
-+		prefetch(p + L1_CACHE_BYTES * 0);
-+		prefetch(p + L1_CACHE_BYTES * 1);
-+		prefetch(p + L1_CACHE_BYTES * 2);
-+	}
-+
-+	/* avoid atomic set bit if it's not needed: */
-+	if (!btree_node_accessed(b))
-+		set_btree_node_accessed(b);
-+
-+	if (unlikely(btree_node_read_error(b))) {
-+		six_unlock_read(&b->c.lock);
-+		return ERR_PTR(-EIO);
-+	}
-+
-+	EBUG_ON(b->c.btree_id != btree_id ||
-+		BTREE_NODE_LEVEL(b->data) != level ||
-+		bkey_cmp(b->data->max_key, k->k.p));
-+
-+	return b;
-+}
-+
-+struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
-+					  struct btree_iter *iter,
-+					  struct btree *b,
-+					  enum btree_node_sibling sib)
-+{
-+	struct btree_trans *trans = iter->trans;
-+	struct btree *parent;
-+	struct btree_node_iter node_iter;
-+	struct bkey_packed *k;
-+	BKEY_PADDED(k) tmp;
-+	struct btree *ret = NULL;
-+	unsigned level = b->c.level;
-+
-+	parent = btree_iter_node(iter, level + 1);
-+	if (!parent)
-+		return NULL;
-+
-+	/*
-+	 * There's a corner case where a btree_iter might have a node locked
-+	 * that is just outside its current pos - when
-+	 * bch2_btree_iter_set_pos_same_leaf() gets to the end of the node.
-+	 *
-+	 * But the lock ordering checks in __bch2_btree_node_lock() go off of
-+	 * iter->pos, not the node's key: so if the iterator is marked as
-+	 * needing to be traversed, we risk deadlock if we don't bail out here:
-+	 */
-+	if (iter->uptodate >= BTREE_ITER_NEED_TRAVERSE)
-+		return ERR_PTR(-EINTR);
-+
-+	if (!bch2_btree_node_relock(iter, level + 1)) {
-+		ret = ERR_PTR(-EINTR);
-+		goto out;
-+	}
-+
-+	node_iter = iter->l[parent->c.level].iter;
-+
-+	k = bch2_btree_node_iter_peek_all(&node_iter, parent);
-+	BUG_ON(bkey_cmp_left_packed(parent, k, &b->key.k.p));
-+
-+	k = sib == btree_prev_sib
-+		? bch2_btree_node_iter_prev(&node_iter, parent)
-+		: (bch2_btree_node_iter_advance(&node_iter, parent),
-+		   bch2_btree_node_iter_peek(&node_iter, parent));
-+	if (!k)
-+		goto out;
-+
-+	bch2_bkey_unpack(parent, &tmp.k, k);
-+
-+	ret = bch2_btree_node_get(c, iter, &tmp.k, level,
-+				  SIX_LOCK_intent);
-+
-+	if (PTR_ERR_OR_ZERO(ret) == -EINTR && !trans->nounlock) {
-+		struct btree_iter *linked;
-+
-+		if (!bch2_btree_node_relock(iter, level + 1))
-+			goto out;
-+
-+		/*
-+		 * We might have got -EINTR because trylock failed, and we're
-+		 * holding other locks that would cause us to deadlock:
-+		 */
-+		trans_for_each_iter(trans, linked)
-+			if (btree_iter_cmp(iter, linked) < 0)
-+				__bch2_btree_iter_unlock(linked);
-+
-+		if (sib == btree_prev_sib)
-+			btree_node_unlock(iter, level);
-+
-+		ret = bch2_btree_node_get(c, iter, &tmp.k, level,
-+					  SIX_LOCK_intent);
-+
-+		/*
-+		 * before btree_iter_relock() calls btree_iter_verify_locks():
-+		 */
-+		if (btree_lock_want(iter, level + 1) == BTREE_NODE_UNLOCKED)
-+			btree_node_unlock(iter, level + 1);
-+
-+		if (!bch2_btree_node_relock(iter, level)) {
-+			btree_iter_set_dirty(iter, BTREE_ITER_NEED_RELOCK);
-+
-+			if (!IS_ERR(ret)) {
-+				six_unlock_intent(&ret->c.lock);
-+				ret = ERR_PTR(-EINTR);
-+			}
-+		}
-+
-+		bch2_trans_relock(trans);
-+	}
-+out:
-+	if (btree_lock_want(iter, level + 1) == BTREE_NODE_UNLOCKED)
-+		btree_node_unlock(iter, level + 1);
-+
-+	if (PTR_ERR_OR_ZERO(ret) == -EINTR)
-+		bch2_btree_iter_upgrade(iter, level + 2);
-+
-+	BUG_ON(!IS_ERR(ret) && !btree_node_locked(iter, level));
-+
-+	if (!IS_ERR_OR_NULL(ret)) {
-+		struct btree *n1 = ret, *n2 = b;
-+
-+		if (sib != btree_prev_sib)
-+			swap(n1, n2);
-+
-+		BUG_ON(bkey_cmp(bkey_successor(n1->key.k.p),
-+				n2->data->min_key));
-+	}
-+
-+	bch2_btree_trans_verify_locks(trans);
-+
-+	return ret;
-+}
-+
-+void bch2_btree_node_prefetch(struct bch_fs *c, struct btree_iter *iter,
-+			      const struct bkey_i *k, unsigned level)
-+{
-+	struct btree_cache *bc = &c->btree_cache;
-+	struct btree *b;
-+
-+	BUG_ON(!btree_node_locked(iter, level + 1));
-+	BUG_ON(level >= BTREE_MAX_DEPTH);
-+
-+	b = btree_cache_find(bc, k);
-+	if (b)
-+		return;
-+
-+	bch2_btree_node_fill(c, iter, k, iter->btree_id,
-+			     level, SIX_LOCK_read, false);
-+}
-+
-+void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
-+			     struct btree *b)
-+{
-+	const struct bkey_format *f = &b->format;
-+	struct bset_stats stats;
-+
-+	memset(&stats, 0, sizeof(stats));
-+
-+	bch2_btree_keys_stats(b, &stats);
-+
-+	pr_buf(out,
-+	       "l %u %llu:%llu - %llu:%llu:\n"
-+	       "    ptrs: ",
-+	       b->c.level,
-+	       b->data->min_key.inode,
-+	       b->data->min_key.offset,
-+	       b->data->max_key.inode,
-+	       b->data->max_key.offset);
-+	bch2_val_to_text(out, c, bkey_i_to_s_c(&b->key));
-+	pr_buf(out, "\n"
-+	       "    format: u64s %u fields %u %u %u %u %u\n"
-+	       "    unpack fn len: %u\n"
-+	       "    bytes used %zu/%zu (%zu%% full)\n"
-+	       "    sib u64s: %u, %u (merge threshold %zu)\n"
-+	       "    nr packed keys %u\n"
-+	       "    nr unpacked keys %u\n"
-+	       "    floats %zu\n"
-+	       "    failed unpacked %zu\n",
-+	       f->key_u64s,
-+	       f->bits_per_field[0],
-+	       f->bits_per_field[1],
-+	       f->bits_per_field[2],
-+	       f->bits_per_field[3],
-+	       f->bits_per_field[4],
-+	       b->unpack_fn_len,
-+	       b->nr.live_u64s * sizeof(u64),
-+	       btree_bytes(c) - sizeof(struct btree_node),
-+	       b->nr.live_u64s * 100 / btree_max_u64s(c),
-+	       b->sib_u64s[0],
-+	       b->sib_u64s[1],
-+	       BTREE_FOREGROUND_MERGE_THRESHOLD(c),
-+	       b->nr.packed_keys,
-+	       b->nr.unpacked_keys,
-+	       stats.floats,
-+	       stats.failed);
-+}
-diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h
-new file mode 100644
-index 000000000000..d0d3a85bb8be
---- /dev/null
-+++ b/fs/bcachefs/btree_cache.h
-@@ -0,0 +1,104 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BTREE_CACHE_H
-+#define _BCACHEFS_BTREE_CACHE_H
-+
-+#include "bcachefs.h"
-+#include "btree_types.h"
-+
-+struct btree_iter;
-+
-+extern const char * const bch2_btree_ids[];
-+
-+void bch2_recalc_btree_reserve(struct bch_fs *);
-+
-+void bch2_btree_node_hash_remove(struct btree_cache *, struct btree *);
-+int __bch2_btree_node_hash_insert(struct btree_cache *, struct btree *);
-+int bch2_btree_node_hash_insert(struct btree_cache *, struct btree *,
-+				unsigned, enum btree_id);
-+
-+void bch2_btree_cache_cannibalize_unlock(struct bch_fs *);
-+int bch2_btree_cache_cannibalize_lock(struct bch_fs *, struct closure *);
-+
-+struct btree *bch2_btree_node_mem_alloc(struct bch_fs *);
-+
-+struct btree *bch2_btree_node_get(struct bch_fs *, struct btree_iter *,
-+				  const struct bkey_i *, unsigned,
-+				  enum six_lock_type);
-+
-+struct btree *bch2_btree_node_get_noiter(struct bch_fs *, const struct bkey_i *,
-+					 enum btree_id, unsigned);
-+
-+struct btree *bch2_btree_node_get_sibling(struct bch_fs *, struct btree_iter *,
-+				struct btree *, enum btree_node_sibling);
-+
-+void bch2_btree_node_prefetch(struct bch_fs *, struct btree_iter *,
-+			      const struct bkey_i *, unsigned);
-+
-+void bch2_fs_btree_cache_exit(struct bch_fs *);
-+int bch2_fs_btree_cache_init(struct bch_fs *);
-+void bch2_fs_btree_cache_init_early(struct btree_cache *);
-+
-+static inline u64 btree_ptr_hash_val(const struct bkey_i *k)
-+{
-+	switch (k->k.type) {
-+	case KEY_TYPE_btree_ptr:
-+		return *((u64 *) bkey_i_to_btree_ptr_c(k)->v.start);
-+	case KEY_TYPE_btree_ptr_v2:
-+		return bkey_i_to_btree_ptr_v2_c(k)->v.seq;
-+	default:
-+		return 0;
-+	}
-+}
-+
-+static inline struct btree *btree_node_mem_ptr(const struct bkey_i *k)
-+{
-+	return k->k.type == KEY_TYPE_btree_ptr_v2
-+		? (void *)(unsigned long)bkey_i_to_btree_ptr_v2_c(k)->v.mem_ptr
-+		: NULL;
-+}
-+
-+/* is btree node in hash table? */
-+static inline bool btree_node_hashed(struct btree *b)
-+{
-+	return b->hash_val != 0;
-+}
-+
-+#define for_each_cached_btree(_b, _c, _tbl, _iter, _pos)		\
-+	for ((_tbl) = rht_dereference_rcu((_c)->btree_cache.table.tbl,	\
-+					  &(_c)->btree_cache.table),	\
-+	     _iter = 0;	_iter < (_tbl)->size; _iter++)			\
-+		rht_for_each_entry_rcu((_b), (_pos), _tbl, _iter, hash)
-+
-+static inline size_t btree_bytes(struct bch_fs *c)
-+{
-+	return c->opts.btree_node_size << 9;
-+}
-+
-+static inline size_t btree_max_u64s(struct bch_fs *c)
-+{
-+	return (btree_bytes(c) - sizeof(struct btree_node)) / sizeof(u64);
-+}
-+
-+static inline size_t btree_pages(struct bch_fs *c)
-+{
-+	return btree_bytes(c) / PAGE_SIZE;
-+}
-+
-+static inline unsigned btree_blocks(struct bch_fs *c)
-+{
-+	return c->opts.btree_node_size >> c->block_bits;
-+}
-+
-+#define BTREE_SPLIT_THRESHOLD(c)		(btree_max_u64s(c) * 2 / 3)
-+
-+#define BTREE_FOREGROUND_MERGE_THRESHOLD(c)	(btree_max_u64s(c) * 1 / 3)
-+#define BTREE_FOREGROUND_MERGE_HYSTERESIS(c)			\
-+	(BTREE_FOREGROUND_MERGE_THRESHOLD(c) +			\
-+	 (BTREE_FOREGROUND_MERGE_THRESHOLD(c) << 2))
-+
-+#define btree_node_root(_c, _b)	((_c)->btree_roots[(_b)->c.btree_id].b)
-+
-+void bch2_btree_node_to_text(struct printbuf *, struct bch_fs *,
-+			     struct btree *);
-+
-+#endif /* _BCACHEFS_BTREE_CACHE_H */
-diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
-new file mode 100644
-index 000000000000..4f581130270c
---- /dev/null
-+++ b/fs/bcachefs/btree_gc.c
-@@ -0,0 +1,1395 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Copyright (C) 2010 Kent Overstreet <kent.overstreet@gmail.com>
-+ * Copyright (C) 2014 Datera Inc.
-+ */
-+
-+#include "bcachefs.h"
-+#include "alloc_background.h"
-+#include "alloc_foreground.h"
-+#include "bkey_methods.h"
-+#include "btree_locking.h"
-+#include "btree_update_interior.h"
-+#include "btree_io.h"
-+#include "btree_gc.h"
-+#include "buckets.h"
-+#include "clock.h"
-+#include "debug.h"
-+#include "ec.h"
-+#include "error.h"
-+#include "extents.h"
-+#include "journal.h"
-+#include "keylist.h"
-+#include "move.h"
-+#include "recovery.h"
-+#include "replicas.h"
-+#include "super-io.h"
-+
-+#include <linux/slab.h>
-+#include <linux/bitops.h>
-+#include <linux/freezer.h>
-+#include <linux/kthread.h>
-+#include <linux/preempt.h>
-+#include <linux/rcupdate.h>
-+#include <linux/sched/task.h>
-+#include <trace/events/bcachefs.h>
-+
-+static inline void __gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
-+{
-+	write_seqcount_begin(&c->gc_pos_lock);
-+	c->gc_pos = new_pos;
-+	write_seqcount_end(&c->gc_pos_lock);
-+}
-+
-+static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
-+{
-+	BUG_ON(gc_pos_cmp(new_pos, c->gc_pos) <= 0);
-+	__gc_pos_set(c, new_pos);
-+}
-+
-+static int bch2_gc_check_topology(struct bch_fs *c,
-+				  struct bkey_s_c k,
-+				  struct bpos *expected_start,
-+				  struct bpos expected_end,
-+				  bool is_last)
-+{
-+	int ret = 0;
-+
-+	if (k.k->type == KEY_TYPE_btree_ptr_v2) {
-+		struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k);
-+
-+		if (fsck_err_on(bkey_cmp(*expected_start, bp.v->min_key), c,
-+				"btree node with incorrect min_key: got %llu:%llu, should be %llu:%llu",
-+				bp.v->min_key.inode,
-+				bp.v->min_key.offset,
-+				expected_start->inode,
-+				expected_start->offset)) {
-+			BUG();
-+		}
-+	}
-+
-+	*expected_start = bkey_cmp(k.k->p, POS_MAX)
-+		? bkey_successor(k.k->p)
-+		: k.k->p;
-+
-+	if (fsck_err_on(is_last &&
-+			bkey_cmp(k.k->p, expected_end), c,
-+			"btree node with incorrect max_key: got %llu:%llu, should be %llu:%llu",
-+			k.k->p.inode,
-+			k.k->p.offset,
-+			expected_end.inode,
-+			expected_end.offset)) {
-+		BUG();
-+	}
-+fsck_err:
-+	return ret;
-+}
-+
-+/* marking of btree keys/nodes: */
-+
-+static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k,
-+			    u8 *max_stale, bool initial)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const struct bch_extent_ptr *ptr;
-+	unsigned flags =
-+		BTREE_TRIGGER_GC|
-+		(initial ? BTREE_TRIGGER_NOATOMIC : 0);
-+	int ret = 0;
-+
-+	if (initial) {
-+		BUG_ON(journal_seq_verify(c) &&
-+		       k.k->version.lo > journal_cur_seq(&c->journal));
-+
-+		/* XXX change to fsck check */
-+		if (fsck_err_on(k.k->version.lo > atomic64_read(&c->key_version), c,
-+				"key version number higher than recorded: %llu > %llu",
-+				k.k->version.lo,
-+				atomic64_read(&c->key_version)))
-+			atomic64_set(&c->key_version, k.k->version.lo);
-+
-+		if (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
-+		    fsck_err_on(!bch2_bkey_replicas_marked(c, k), c,
-+				"superblock not marked as containing replicas (type %u)",
-+				k.k->type)) {
-+			ret = bch2_mark_bkey_replicas(c, k);
-+			if (ret)
-+				return ret;
-+		}
-+
-+		bkey_for_each_ptr(ptrs, ptr) {
-+			struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-+			struct bucket *g = PTR_BUCKET(ca, ptr, true);
-+			struct bucket *g2 = PTR_BUCKET(ca, ptr, false);
-+
-+			if (mustfix_fsck_err_on(!g->gen_valid, c,
-+					"bucket %u:%zu data type %s ptr gen %u missing in alloc btree",
-+					ptr->dev, PTR_BUCKET_NR(ca, ptr),
-+					bch2_data_types[ptr_data_type(k.k, ptr)],
-+					ptr->gen)) {
-+				g2->_mark.gen	= g->_mark.gen		= ptr->gen;
-+				g2->gen_valid	= g->gen_valid		= true;
-+			}
-+
-+			if (mustfix_fsck_err_on(gen_cmp(ptr->gen, g->mark.gen) > 0, c,
-+					"bucket %u:%zu data type %s ptr gen in the future: %u > %u",
-+					ptr->dev, PTR_BUCKET_NR(ca, ptr),
-+					bch2_data_types[ptr_data_type(k.k, ptr)],
-+					ptr->gen, g->mark.gen)) {
-+				g2->_mark.gen	= g->_mark.gen		= ptr->gen;
-+				g2->gen_valid	= g->gen_valid		= true;
-+				g2->_mark.data_type		= 0;
-+				g2->_mark.dirty_sectors		= 0;
-+				g2->_mark.cached_sectors	= 0;
-+				set_bit(BCH_FS_FIXED_GENS, &c->flags);
-+			}
-+		}
-+	}
-+
-+	bkey_for_each_ptr(ptrs, ptr) {
-+		struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-+		struct bucket *g = PTR_BUCKET(ca, ptr, true);
-+
-+		if (gen_after(g->oldest_gen, ptr->gen))
-+			g->oldest_gen = ptr->gen;
-+
-+		*max_stale = max(*max_stale, ptr_stale(ca, ptr));
-+	}
-+
-+	bch2_mark_key(c, k, 0, k.k->size, NULL, 0, flags);
-+fsck_err:
-+	return ret;
-+}
-+
-+static int btree_gc_mark_node(struct bch_fs *c, struct btree *b, u8 *max_stale,
-+			      bool initial)
-+{
-+	struct bpos next_node_start = b->data->min_key;
-+	struct btree_node_iter iter;
-+	struct bkey unpacked;
-+	struct bkey_s_c k;
-+	int ret = 0;
-+
-+	*max_stale = 0;
-+
-+	if (!btree_node_type_needs_gc(btree_node_type(b)))
-+		return 0;
-+
-+	bch2_btree_node_iter_init_from_start(&iter, b);
-+
-+	while ((k = bch2_btree_node_iter_peek_unpack(&iter, b, &unpacked)).k) {
-+		bch2_bkey_debugcheck(c, b, k);
-+
-+		ret = bch2_gc_mark_key(c, k, max_stale, initial);
-+		if (ret)
-+			break;
-+
-+		bch2_btree_node_iter_advance(&iter, b);
-+
-+		if (b->c.level) {
-+			ret = bch2_gc_check_topology(c, k,
-+					&next_node_start,
-+					b->data->max_key,
-+					bch2_btree_node_iter_end(&iter));
-+			if (ret)
-+				break;
-+		}
-+	}
-+
-+	return ret;
-+}
-+
-+static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
-+			 bool initial, bool metadata_only)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct btree *b;
-+	unsigned depth = metadata_only			? 1
-+		: expensive_debug_checks(c)		? 0
-+		: !btree_node_type_needs_gc(btree_id)	? 1
-+		: 0;
-+	u8 max_stale = 0;
-+	int ret = 0;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	gc_pos_set(c, gc_pos_btree(btree_id, POS_MIN, 0));
-+
-+	__for_each_btree_node(&trans, iter, btree_id, POS_MIN,
-+			      0, depth, BTREE_ITER_PREFETCH, b) {
-+		bch2_verify_btree_nr_keys(b);
-+
-+		gc_pos_set(c, gc_pos_btree_node(b));
-+
-+		ret = btree_gc_mark_node(c, b, &max_stale, initial);
-+		if (ret)
-+			break;
-+
-+		if (!initial) {
-+			if (max_stale > 64)
-+				bch2_btree_node_rewrite(c, iter,
-+						b->data->keys.seq,
-+						BTREE_INSERT_USE_RESERVE|
-+						BTREE_INSERT_NOWAIT|
-+						BTREE_INSERT_GC_LOCK_HELD);
-+			else if (!btree_gc_rewrite_disabled(c) &&
-+				 (btree_gc_always_rewrite(c) || max_stale > 16))
-+				bch2_btree_node_rewrite(c, iter,
-+						b->data->keys.seq,
-+						BTREE_INSERT_NOWAIT|
-+						BTREE_INSERT_GC_LOCK_HELD);
-+		}
-+
-+		bch2_trans_cond_resched(&trans);
-+	}
-+	ret = bch2_trans_exit(&trans) ?: ret;
-+	if (ret)
-+		return ret;
-+
-+	mutex_lock(&c->btree_root_lock);
-+	b = c->btree_roots[btree_id].b;
-+	if (!btree_node_fake(b))
-+		ret = bch2_gc_mark_key(c, bkey_i_to_s_c(&b->key),
-+				       &max_stale, initial);
-+	gc_pos_set(c, gc_pos_btree_root(b->c.btree_id));
-+	mutex_unlock(&c->btree_root_lock);
-+
-+	return ret;
-+}
-+
-+static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b,
-+				      struct journal_keys *journal_keys,
-+				      unsigned target_depth)
-+{
-+	struct btree_and_journal_iter iter;
-+	struct bkey_s_c k;
-+	struct bpos next_node_start = b->data->min_key;
-+	u8 max_stale = 0;
-+	int ret = 0;
-+
-+	bch2_btree_and_journal_iter_init_node_iter(&iter, journal_keys, b);
-+
-+	while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
-+		bch2_bkey_debugcheck(c, b, k);
-+
-+		BUG_ON(bkey_cmp(k.k->p, b->data->min_key) < 0);
-+		BUG_ON(bkey_cmp(k.k->p, b->data->max_key) > 0);
-+
-+		ret = bch2_gc_mark_key(c, k, &max_stale, true);
-+		if (ret)
-+			break;
-+
-+		if (b->c.level) {
-+			struct btree *child;
-+			BKEY_PADDED(k) tmp;
-+
-+			bkey_reassemble(&tmp.k, k);
-+			k = bkey_i_to_s_c(&tmp.k);
-+
-+			bch2_btree_and_journal_iter_advance(&iter);
-+
-+			ret = bch2_gc_check_topology(c, k,
-+					&next_node_start,
-+					b->data->max_key,
-+					!bch2_btree_and_journal_iter_peek(&iter).k);
-+			if (ret)
-+				break;
-+
-+			if (b->c.level > target_depth) {
-+				child = bch2_btree_node_get_noiter(c, &tmp.k,
-+							b->c.btree_id, b->c.level - 1);
-+				ret = PTR_ERR_OR_ZERO(child);
-+				if (ret)
-+					break;
-+
-+				ret = bch2_gc_btree_init_recurse(c, child,
-+						journal_keys, target_depth);
-+				six_unlock_read(&child->c.lock);
-+
-+				if (ret)
-+					break;
-+			}
-+		} else {
-+			bch2_btree_and_journal_iter_advance(&iter);
-+		}
-+	}
-+
-+	return ret;
-+}
-+
-+static int bch2_gc_btree_init(struct bch_fs *c,
-+			      struct journal_keys *journal_keys,
-+			      enum btree_id btree_id,
-+			      bool metadata_only)
-+{
-+	struct btree *b;
-+	unsigned target_depth = metadata_only		? 1
-+		: expensive_debug_checks(c)		? 0
-+		: !btree_node_type_needs_gc(btree_id)	? 1
-+		: 0;
-+	u8 max_stale = 0;
-+	int ret = 0;
-+
-+	b = c->btree_roots[btree_id].b;
-+
-+	if (btree_node_fake(b))
-+		return 0;
-+
-+	six_lock_read(&b->c.lock, NULL, NULL);
-+	if (fsck_err_on(bkey_cmp(b->data->min_key, POS_MIN), c,
-+			"btree root with incorrect min_key: %llu:%llu",
-+			b->data->min_key.inode,
-+			b->data->min_key.offset)) {
-+		BUG();
-+	}
-+
-+	if (fsck_err_on(bkey_cmp(b->data->max_key, POS_MAX), c,
-+			"btree root with incorrect min_key: %llu:%llu",
-+			b->data->max_key.inode,
-+			b->data->max_key.offset)) {
-+		BUG();
-+	}
-+
-+	if (b->c.level >= target_depth)
-+		ret = bch2_gc_btree_init_recurse(c, b,
-+					journal_keys, target_depth);
-+
-+	if (!ret)
-+		ret = bch2_gc_mark_key(c, bkey_i_to_s_c(&b->key),
-+				       &max_stale, true);
-+fsck_err:
-+	six_unlock_read(&b->c.lock);
-+
-+	return ret;
-+}
-+
-+static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r)
-+{
-+	return  (int) btree_id_to_gc_phase(l) -
-+		(int) btree_id_to_gc_phase(r);
-+}
-+
-+static int bch2_gc_btrees(struct bch_fs *c, struct journal_keys *journal_keys,
-+			  bool initial, bool metadata_only)
-+{
-+	enum btree_id ids[BTREE_ID_NR];
-+	unsigned i;
-+
-+	for (i = 0; i < BTREE_ID_NR; i++)
-+		ids[i] = i;
-+	bubble_sort(ids, BTREE_ID_NR, btree_id_gc_phase_cmp);
-+
-+	for (i = 0; i < BTREE_ID_NR; i++) {
-+		enum btree_id id = ids[i];
-+		int ret = initial
-+			? bch2_gc_btree_init(c, journal_keys,
-+					     id, metadata_only)
-+			: bch2_gc_btree(c, id, initial, metadata_only);
-+		if (ret)
-+			return ret;
-+	}
-+
-+	return 0;
-+}
-+
-+static void mark_metadata_sectors(struct bch_fs *c, struct bch_dev *ca,
-+				  u64 start, u64 end,
-+				  enum bch_data_type type,
-+				  unsigned flags)
-+{
-+	u64 b = sector_to_bucket(ca, start);
-+
-+	do {
-+		unsigned sectors =
-+			min_t(u64, bucket_to_sector(ca, b + 1), end) - start;
-+
-+		bch2_mark_metadata_bucket(c, ca, b, type, sectors,
-+					  gc_phase(GC_PHASE_SB), flags);
-+		b++;
-+		start += sectors;
-+	} while (start < end);
-+}
-+
-+void bch2_mark_dev_superblock(struct bch_fs *c, struct bch_dev *ca,
-+			      unsigned flags)
-+{
-+	struct bch_sb_layout *layout = &ca->disk_sb.sb->layout;
-+	unsigned i;
-+	u64 b;
-+
-+	/*
-+	 * This conditional is kind of gross, but we may be called from the
-+	 * device add path, before the new device has actually been added to the
-+	 * running filesystem:
-+	 */
-+	if (c) {
-+		lockdep_assert_held(&c->sb_lock);
-+		percpu_down_read(&c->mark_lock);
-+	}
-+
-+	for (i = 0; i < layout->nr_superblocks; i++) {
-+		u64 offset = le64_to_cpu(layout->sb_offset[i]);
-+
-+		if (offset == BCH_SB_SECTOR)
-+			mark_metadata_sectors(c, ca, 0, BCH_SB_SECTOR,
-+					      BCH_DATA_sb, flags);
-+
-+		mark_metadata_sectors(c, ca, offset,
-+				      offset + (1 << layout->sb_max_size_bits),
-+				      BCH_DATA_sb, flags);
-+	}
-+
-+	for (i = 0; i < ca->journal.nr; i++) {
-+		b = ca->journal.buckets[i];
-+		bch2_mark_metadata_bucket(c, ca, b, BCH_DATA_journal,
-+					  ca->mi.bucket_size,
-+					  gc_phase(GC_PHASE_SB), flags);
-+	}
-+
-+	if (c)
-+		percpu_up_read(&c->mark_lock);
-+}
-+
-+static void bch2_mark_superblocks(struct bch_fs *c)
-+{
-+	struct bch_dev *ca;
-+	unsigned i;
-+
-+	mutex_lock(&c->sb_lock);
-+	gc_pos_set(c, gc_phase(GC_PHASE_SB));
-+
-+	for_each_online_member(ca, c, i)
-+		bch2_mark_dev_superblock(c, ca, BTREE_TRIGGER_GC);
-+	mutex_unlock(&c->sb_lock);
-+}
-+
-+#if 0
-+/* Also see bch2_pending_btree_node_free_insert_done() */
-+static void bch2_mark_pending_btree_node_frees(struct bch_fs *c)
-+{
-+	struct btree_update *as;
-+	struct pending_btree_node_free *d;
-+
-+	mutex_lock(&c->btree_interior_update_lock);
-+	gc_pos_set(c, gc_phase(GC_PHASE_PENDING_DELETE));
-+
-+	for_each_pending_btree_node_free(c, as, d)
-+		if (d->index_update_done)
-+			bch2_mark_key(c, bkey_i_to_s_c(&d->key),
-+				      0, 0, NULL, 0,
-+				      BTREE_TRIGGER_GC);
-+
-+	mutex_unlock(&c->btree_interior_update_lock);
-+}
-+#endif
-+
-+static void bch2_mark_allocator_buckets(struct bch_fs *c)
-+{
-+	struct bch_dev *ca;
-+	struct open_bucket *ob;
-+	size_t i, j, iter;
-+	unsigned ci;
-+
-+	percpu_down_read(&c->mark_lock);
-+
-+	spin_lock(&c->freelist_lock);
-+	gc_pos_set(c, gc_pos_alloc(c, NULL));
-+
-+	for_each_member_device(ca, c, ci) {
-+		fifo_for_each_entry(i, &ca->free_inc, iter)
-+			bch2_mark_alloc_bucket(c, ca, i, true,
-+					       gc_pos_alloc(c, NULL),
-+					       BTREE_TRIGGER_GC);
-+
-+
-+
-+		for (j = 0; j < RESERVE_NR; j++)
-+			fifo_for_each_entry(i, &ca->free[j], iter)
-+				bch2_mark_alloc_bucket(c, ca, i, true,
-+						       gc_pos_alloc(c, NULL),
-+						       BTREE_TRIGGER_GC);
-+	}
-+
-+	spin_unlock(&c->freelist_lock);
-+
-+	for (ob = c->open_buckets;
-+	     ob < c->open_buckets + ARRAY_SIZE(c->open_buckets);
-+	     ob++) {
-+		spin_lock(&ob->lock);
-+		if (ob->valid) {
-+			gc_pos_set(c, gc_pos_alloc(c, ob));
-+			ca = bch_dev_bkey_exists(c, ob->ptr.dev);
-+			bch2_mark_alloc_bucket(c, ca, PTR_BUCKET_NR(ca, &ob->ptr), true,
-+					       gc_pos_alloc(c, ob),
-+					       BTREE_TRIGGER_GC);
-+		}
-+		spin_unlock(&ob->lock);
-+	}
-+
-+	percpu_up_read(&c->mark_lock);
-+}
-+
-+static void bch2_gc_free(struct bch_fs *c)
-+{
-+	struct bch_dev *ca;
-+	unsigned i;
-+
-+	genradix_free(&c->stripes[1]);
-+
-+	for_each_member_device(ca, c, i) {
-+		kvpfree(rcu_dereference_protected(ca->buckets[1], 1),
-+			sizeof(struct bucket_array) +
-+			ca->mi.nbuckets * sizeof(struct bucket));
-+		ca->buckets[1] = NULL;
-+
-+		free_percpu(ca->usage[1]);
-+		ca->usage[1] = NULL;
-+	}
-+
-+	free_percpu(c->usage_gc);
-+	c->usage_gc = NULL;
-+}
-+
-+static int bch2_gc_done(struct bch_fs *c,
-+			bool initial, bool metadata_only)
-+{
-+	struct bch_dev *ca;
-+	bool verify = !metadata_only &&
-+		(!initial ||
-+		 (c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)));
-+	unsigned i;
-+	int ret = 0;
-+
-+#define copy_field(_f, _msg, ...)					\
-+	if (dst->_f != src->_f) {					\
-+		if (verify)						\
-+			fsck_err(c, _msg ": got %llu, should be %llu"	\
-+				, ##__VA_ARGS__, dst->_f, src->_f);	\
-+		dst->_f = src->_f;					\
-+	}
-+#define copy_stripe_field(_f, _msg, ...)				\
-+	if (dst->_f != src->_f) {					\
-+		if (verify)						\
-+			fsck_err(c, "stripe %zu has wrong "_msg		\
-+				": got %u, should be %u",		\
-+				dst_iter.pos, ##__VA_ARGS__,		\
-+				dst->_f, src->_f);			\
-+		dst->_f = src->_f;					\
-+		dst->dirty = true;					\
-+	}
-+#define copy_bucket_field(_f)						\
-+	if (dst->b[b].mark._f != src->b[b].mark._f) {			\
-+		if (verify)						\
-+			fsck_err(c, "bucket %u:%zu gen %u data type %s has wrong " #_f	\
-+				": got %u, should be %u", i, b,		\
-+				dst->b[b].mark.gen,			\
-+				bch2_data_types[dst->b[b].mark.data_type],\
-+				dst->b[b].mark._f, src->b[b].mark._f);	\
-+		dst->b[b]._mark._f = src->b[b].mark._f;			\
-+	}
-+#define copy_dev_field(_f, _msg, ...)					\
-+	copy_field(_f, "dev %u has wrong " _msg, i, ##__VA_ARGS__)
-+#define copy_fs_field(_f, _msg, ...)					\
-+	copy_field(_f, "fs has wrong " _msg, ##__VA_ARGS__)
-+
-+	if (!metadata_only) {
-+		struct genradix_iter dst_iter = genradix_iter_init(&c->stripes[0], 0);
-+		struct genradix_iter src_iter = genradix_iter_init(&c->stripes[1], 0);
-+		struct stripe *dst, *src;
-+		unsigned i;
-+
-+		c->ec_stripes_heap.used = 0;
-+
-+		while ((dst = genradix_iter_peek(&dst_iter, &c->stripes[0])) &&
-+		       (src = genradix_iter_peek(&src_iter, &c->stripes[1]))) {
-+			BUG_ON(src_iter.pos != dst_iter.pos);
-+
-+			copy_stripe_field(alive,	"alive");
-+			copy_stripe_field(sectors,	"sectors");
-+			copy_stripe_field(algorithm,	"algorithm");
-+			copy_stripe_field(nr_blocks,	"nr_blocks");
-+			copy_stripe_field(nr_redundant,	"nr_redundant");
-+			copy_stripe_field(blocks_nonempty,
-+					  "blocks_nonempty");
-+
-+			for (i = 0; i < ARRAY_SIZE(dst->block_sectors); i++)
-+				copy_stripe_field(block_sectors[i],
-+						  "block_sectors[%u]", i);
-+
-+			if (dst->alive) {
-+				spin_lock(&c->ec_stripes_heap_lock);
-+				bch2_stripes_heap_insert(c, dst, dst_iter.pos);
-+				spin_unlock(&c->ec_stripes_heap_lock);
-+			}
-+
-+			genradix_iter_advance(&dst_iter, &c->stripes[0]);
-+			genradix_iter_advance(&src_iter, &c->stripes[1]);
-+		}
-+	}
-+
-+	for_each_member_device(ca, c, i) {
-+		struct bucket_array *dst = __bucket_array(ca, 0);
-+		struct bucket_array *src = __bucket_array(ca, 1);
-+		size_t b;
-+
-+		for (b = 0; b < src->nbuckets; b++) {
-+			copy_bucket_field(gen);
-+			copy_bucket_field(data_type);
-+			copy_bucket_field(owned_by_allocator);
-+			copy_bucket_field(stripe);
-+			copy_bucket_field(dirty_sectors);
-+			copy_bucket_field(cached_sectors);
-+
-+			dst->b[b].oldest_gen = src->b[b].oldest_gen;
-+		}
-+	};
-+
-+	bch2_fs_usage_acc_to_base(c, 0);
-+	bch2_fs_usage_acc_to_base(c, 1);
-+
-+	bch2_dev_usage_from_buckets(c);
-+
-+	{
-+		unsigned nr = fs_usage_u64s(c);
-+		struct bch_fs_usage *dst = c->usage_base;
-+		struct bch_fs_usage *src = (void *)
-+			bch2_acc_percpu_u64s((void *) c->usage_gc, nr);
-+
-+		copy_fs_field(hidden,		"hidden");
-+		copy_fs_field(btree,		"btree");
-+
-+		if (!metadata_only) {
-+			copy_fs_field(data,	"data");
-+			copy_fs_field(cached,	"cached");
-+			copy_fs_field(reserved,	"reserved");
-+			copy_fs_field(nr_inodes,"nr_inodes");
-+
-+			for (i = 0; i < BCH_REPLICAS_MAX; i++)
-+				copy_fs_field(persistent_reserved[i],
-+					      "persistent_reserved[%i]", i);
-+		}
-+
-+		for (i = 0; i < c->replicas.nr; i++) {
-+			struct bch_replicas_entry *e =
-+				cpu_replicas_entry(&c->replicas, i);
-+			char buf[80];
-+
-+			if (metadata_only &&
-+			    (e->data_type == BCH_DATA_user ||
-+			     e->data_type == BCH_DATA_cached))
-+				continue;
-+
-+			bch2_replicas_entry_to_text(&PBUF(buf), e);
-+
-+			copy_fs_field(replicas[i], "%s", buf);
-+		}
-+	}
-+
-+#undef copy_fs_field
-+#undef copy_dev_field
-+#undef copy_bucket_field
-+#undef copy_stripe_field
-+#undef copy_field
-+fsck_err:
-+	return ret;
-+}
-+
-+static int bch2_gc_start(struct bch_fs *c,
-+			 bool metadata_only)
-+{
-+	struct bch_dev *ca;
-+	unsigned i;
-+	int ret;
-+
-+	BUG_ON(c->usage_gc);
-+
-+	c->usage_gc = __alloc_percpu_gfp(fs_usage_u64s(c) * sizeof(u64),
-+					 sizeof(u64), GFP_KERNEL);
-+	if (!c->usage_gc) {
-+		bch_err(c, "error allocating c->usage_gc");
-+		return -ENOMEM;
-+	}
-+
-+	for_each_member_device(ca, c, i) {
-+		BUG_ON(ca->buckets[1]);
-+		BUG_ON(ca->usage[1]);
-+
-+		ca->buckets[1] = kvpmalloc(sizeof(struct bucket_array) +
-+				ca->mi.nbuckets * sizeof(struct bucket),
-+				GFP_KERNEL|__GFP_ZERO);
-+		if (!ca->buckets[1]) {
-+			percpu_ref_put(&ca->ref);
-+			bch_err(c, "error allocating ca->buckets[gc]");
-+			return -ENOMEM;
-+		}
-+
-+		ca->usage[1] = alloc_percpu(struct bch_dev_usage);
-+		if (!ca->usage[1]) {
-+			bch_err(c, "error allocating ca->usage[gc]");
-+			percpu_ref_put(&ca->ref);
-+			return -ENOMEM;
-+		}
-+	}
-+
-+	ret = bch2_ec_mem_alloc(c, true);
-+	if (ret) {
-+		bch_err(c, "error allocating ec gc mem");
-+		return ret;
-+	}
-+
-+	percpu_down_write(&c->mark_lock);
-+
-+	/*
-+	 * indicate to stripe code that we need to allocate for the gc stripes
-+	 * radix tree, too
-+	 */
-+	gc_pos_set(c, gc_phase(GC_PHASE_START));
-+
-+	for_each_member_device(ca, c, i) {
-+		struct bucket_array *dst = __bucket_array(ca, 1);
-+		struct bucket_array *src = __bucket_array(ca, 0);
-+		size_t b;
-+
-+		dst->first_bucket	= src->first_bucket;
-+		dst->nbuckets		= src->nbuckets;
-+
-+		for (b = 0; b < src->nbuckets; b++) {
-+			struct bucket *d = &dst->b[b];
-+			struct bucket *s = &src->b[b];
-+
-+			d->_mark.gen = dst->b[b].oldest_gen = s->mark.gen;
-+			d->gen_valid = s->gen_valid;
-+
-+			if (metadata_only &&
-+			    (s->mark.data_type == BCH_DATA_user ||
-+			     s->mark.data_type == BCH_DATA_cached)) {
-+				d->_mark = s->mark;
-+				d->_mark.owned_by_allocator = 0;
-+			}
-+		}
-+	};
-+
-+	percpu_up_write(&c->mark_lock);
-+
-+	return 0;
-+}
-+
-+/**
-+ * bch2_gc - walk _all_ references to buckets, and recompute them:
-+ *
-+ * Order matters here:
-+ *  - Concurrent GC relies on the fact that we have a total ordering for
-+ *    everything that GC walks - see  gc_will_visit_node(),
-+ *    gc_will_visit_root()
-+ *
-+ *  - also, references move around in the course of index updates and
-+ *    various other crap: everything needs to agree on the ordering
-+ *    references are allowed to move around in - e.g., we're allowed to
-+ *    start with a reference owned by an open_bucket (the allocator) and
-+ *    move it to the btree, but not the reverse.
-+ *
-+ *    This is necessary to ensure that gc doesn't miss references that
-+ *    move around - if references move backwards in the ordering GC
-+ *    uses, GC could skip past them
-+ */
-+int bch2_gc(struct bch_fs *c, struct journal_keys *journal_keys,
-+	    bool initial, bool metadata_only)
-+{
-+	struct bch_dev *ca;
-+	u64 start_time = local_clock();
-+	unsigned i, iter = 0;
-+	int ret;
-+
-+	lockdep_assert_held(&c->state_lock);
-+	trace_gc_start(c);
-+
-+	down_write(&c->gc_lock);
-+
-+	/* flush interior btree updates: */
-+	closure_wait_event(&c->btree_interior_update_wait,
-+			   !bch2_btree_interior_updates_nr_pending(c));
-+again:
-+	ret = bch2_gc_start(c, metadata_only);
-+	if (ret)
-+		goto out;
-+
-+	bch2_mark_superblocks(c);
-+
-+	ret = bch2_gc_btrees(c, journal_keys, initial, metadata_only);
-+	if (ret)
-+		goto out;
-+
-+#if 0
-+	bch2_mark_pending_btree_node_frees(c);
-+#endif
-+	bch2_mark_allocator_buckets(c);
-+
-+	c->gc_count++;
-+out:
-+	if (!ret &&
-+	    (test_bit(BCH_FS_FIXED_GENS, &c->flags) ||
-+	     (!iter && test_restart_gc(c)))) {
-+		/*
-+		 * XXX: make sure gens we fixed got saved
-+		 */
-+		if (iter++ <= 2) {
-+			bch_info(c, "Fixed gens, restarting mark and sweep:");
-+			clear_bit(BCH_FS_FIXED_GENS, &c->flags);
-+			__gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
-+
-+			percpu_down_write(&c->mark_lock);
-+			bch2_gc_free(c);
-+			percpu_up_write(&c->mark_lock);
-+			/* flush fsck errors, reset counters */
-+			bch2_flush_fsck_errs(c);
-+
-+			goto again;
-+		}
-+
-+		bch_info(c, "Unable to fix bucket gens, looping");
-+		ret = -EINVAL;
-+	}
-+
-+	if (!ret) {
-+		bch2_journal_block(&c->journal);
-+
-+		percpu_down_write(&c->mark_lock);
-+		ret = bch2_gc_done(c, initial, metadata_only);
-+
-+		bch2_journal_unblock(&c->journal);
-+	} else {
-+		percpu_down_write(&c->mark_lock);
-+	}
-+
-+	/* Indicates that gc is no longer in progress: */
-+	__gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
-+
-+	bch2_gc_free(c);
-+	percpu_up_write(&c->mark_lock);
-+
-+	up_write(&c->gc_lock);
-+
-+	trace_gc_end(c);
-+	bch2_time_stats_update(&c->times[BCH_TIME_btree_gc], start_time);
-+
-+	/*
-+	 * Wake up allocator in case it was waiting for buckets
-+	 * because of not being able to inc gens
-+	 */
-+	for_each_member_device(ca, c, i)
-+		bch2_wake_allocator(ca);
-+
-+	/*
-+	 * At startup, allocations can happen directly instead of via the
-+	 * allocator thread - issue wakeup in case they blocked on gc_lock:
-+	 */
-+	closure_wake_up(&c->freelist_wait);
-+	return ret;
-+}
-+
-+/*
-+ * For recalculating oldest gen, we only need to walk keys in leaf nodes; btree
-+ * node pointers currently never have cached pointers that can become stale:
-+ */
-+static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id id)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for_each_btree_key(&trans, iter, id, POS_MIN, BTREE_ITER_PREFETCH, k, ret) {
-+		struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+		const struct bch_extent_ptr *ptr;
-+
-+		percpu_down_read(&c->mark_lock);
-+		bkey_for_each_ptr(ptrs, ptr) {
-+			struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-+			struct bucket *g = PTR_BUCKET(ca, ptr, false);
-+
-+			if (gen_after(g->gc_gen, ptr->gen))
-+				g->gc_gen = ptr->gen;
-+
-+			if (gen_after(g->mark.gen, ptr->gen) > 32) {
-+				/* rewrite btree node */
-+
-+			}
-+		}
-+		percpu_up_read(&c->mark_lock);
-+	}
-+
-+	bch2_trans_exit(&trans);
-+	return ret;
-+}
-+
-+int bch2_gc_gens(struct bch_fs *c)
-+{
-+	struct bch_dev *ca;
-+	struct bucket_array *buckets;
-+	struct bucket *g;
-+	unsigned i;
-+	int ret;
-+
-+	/*
-+	 * Ideally we would be using state_lock and not gc_lock here, but that
-+	 * introduces a deadlock in the RO path - we currently take the state
-+	 * lock at the start of going RO, thus the gc thread may get stuck:
-+	 */
-+	down_read(&c->gc_lock);
-+
-+	for_each_member_device(ca, c, i) {
-+		down_read(&ca->bucket_lock);
-+		buckets = bucket_array(ca);
-+
-+		for_each_bucket(g, buckets)
-+			g->gc_gen = g->mark.gen;
-+		up_read(&ca->bucket_lock);
-+	}
-+
-+	for (i = 0; i < BTREE_ID_NR; i++)
-+		if (btree_node_type_needs_gc(i)) {
-+			ret = bch2_gc_btree_gens(c, i);
-+			if (ret) {
-+				bch_err(c, "error recalculating oldest_gen: %i", ret);
-+				goto err;
-+			}
-+		}
-+
-+	for_each_member_device(ca, c, i) {
-+		down_read(&ca->bucket_lock);
-+		buckets = bucket_array(ca);
-+
-+		for_each_bucket(g, buckets)
-+			g->oldest_gen = g->gc_gen;
-+		up_read(&ca->bucket_lock);
-+	}
-+
-+	c->gc_count++;
-+err:
-+	up_read(&c->gc_lock);
-+	return ret;
-+}
-+
-+/* Btree coalescing */
-+
-+static void recalc_packed_keys(struct btree *b)
-+{
-+	struct bset *i = btree_bset_first(b);
-+	struct bkey_packed *k;
-+
-+	memset(&b->nr, 0, sizeof(b->nr));
-+
-+	BUG_ON(b->nsets != 1);
-+
-+	vstruct_for_each(i, k)
-+		btree_keys_account_key_add(&b->nr, 0, k);
-+}
-+
-+static void bch2_coalesce_nodes(struct bch_fs *c, struct btree_iter *iter,
-+				struct btree *old_nodes[GC_MERGE_NODES])
-+{
-+	struct btree *parent = btree_node_parent(iter, old_nodes[0]);
-+	unsigned i, nr_old_nodes, nr_new_nodes, u64s = 0;
-+	unsigned blocks = btree_blocks(c) * 2 / 3;
-+	struct btree *new_nodes[GC_MERGE_NODES];
-+	struct btree_update *as;
-+	struct keylist keylist;
-+	struct bkey_format_state format_state;
-+	struct bkey_format new_format;
-+
-+	memset(new_nodes, 0, sizeof(new_nodes));
-+	bch2_keylist_init(&keylist, NULL);
-+
-+	/* Count keys that are not deleted */
-+	for (i = 0; i < GC_MERGE_NODES && old_nodes[i]; i++)
-+		u64s += old_nodes[i]->nr.live_u64s;
-+
-+	nr_old_nodes = nr_new_nodes = i;
-+
-+	/* Check if all keys in @old_nodes could fit in one fewer node */
-+	if (nr_old_nodes <= 1 ||
-+	    __vstruct_blocks(struct btree_node, c->block_bits,
-+			     DIV_ROUND_UP(u64s, nr_old_nodes - 1)) > blocks)
-+		return;
-+
-+	/* Find a format that all keys in @old_nodes can pack into */
-+	bch2_bkey_format_init(&format_state);
-+
-+	for (i = 0; i < nr_old_nodes; i++)
-+		__bch2_btree_calc_format(&format_state, old_nodes[i]);
-+
-+	new_format = bch2_bkey_format_done(&format_state);
-+
-+	/* Check if repacking would make any nodes too big to fit */
-+	for (i = 0; i < nr_old_nodes; i++)
-+		if (!bch2_btree_node_format_fits(c, old_nodes[i], &new_format)) {
-+			trace_btree_gc_coalesce_fail(c,
-+					BTREE_GC_COALESCE_FAIL_FORMAT_FITS);
-+			return;
-+		}
-+
-+	if (bch2_keylist_realloc(&keylist, NULL, 0,
-+			(BKEY_U64s + BKEY_EXTENT_U64s_MAX) * nr_old_nodes)) {
-+		trace_btree_gc_coalesce_fail(c,
-+				BTREE_GC_COALESCE_FAIL_KEYLIST_REALLOC);
-+		return;
-+	}
-+
-+	as = bch2_btree_update_start(iter->trans, iter->btree_id,
-+			btree_update_reserve_required(c, parent) + nr_old_nodes,
-+			BTREE_INSERT_NOFAIL|
-+			BTREE_INSERT_USE_RESERVE,
-+			NULL);
-+	if (IS_ERR(as)) {
-+		trace_btree_gc_coalesce_fail(c,
-+				BTREE_GC_COALESCE_FAIL_RESERVE_GET);
-+		bch2_keylist_free(&keylist, NULL);
-+		return;
-+	}
-+
-+	trace_btree_gc_coalesce(c, old_nodes[0]);
-+
-+	for (i = 0; i < nr_old_nodes; i++)
-+		bch2_btree_interior_update_will_free_node(as, old_nodes[i]);
-+
-+	/* Repack everything with @new_format and sort down to one bset */
-+	for (i = 0; i < nr_old_nodes; i++)
-+		new_nodes[i] =
-+			__bch2_btree_node_alloc_replacement(as, old_nodes[i],
-+							    new_format);
-+
-+	/*
-+	 * Conceptually we concatenate the nodes together and slice them
-+	 * up at different boundaries.
-+	 */
-+	for (i = nr_new_nodes - 1; i > 0; --i) {
-+		struct btree *n1 = new_nodes[i];
-+		struct btree *n2 = new_nodes[i - 1];
-+
-+		struct bset *s1 = btree_bset_first(n1);
-+		struct bset *s2 = btree_bset_first(n2);
-+		struct bkey_packed *k, *last = NULL;
-+
-+		/* Calculate how many keys from @n2 we could fit inside @n1 */
-+		u64s = 0;
-+
-+		for (k = s2->start;
-+		     k < vstruct_last(s2) &&
-+		     vstruct_blocks_plus(n1->data, c->block_bits,
-+					 u64s + k->u64s) <= blocks;
-+		     k = bkey_next_skip_noops(k, vstruct_last(s2))) {
-+			last = k;
-+			u64s += k->u64s;
-+		}
-+
-+		if (u64s == le16_to_cpu(s2->u64s)) {
-+			/* n2 fits entirely in n1 */
-+			n1->key.k.p = n1->data->max_key = n2->data->max_key;
-+
-+			memcpy_u64s(vstruct_last(s1),
-+				    s2->start,
-+				    le16_to_cpu(s2->u64s));
-+			le16_add_cpu(&s1->u64s, le16_to_cpu(s2->u64s));
-+
-+			set_btree_bset_end(n1, n1->set);
-+
-+			six_unlock_write(&n2->c.lock);
-+			bch2_btree_node_free_never_inserted(c, n2);
-+			six_unlock_intent(&n2->c.lock);
-+
-+			memmove(new_nodes + i - 1,
-+				new_nodes + i,
-+				sizeof(new_nodes[0]) * (nr_new_nodes - i));
-+			new_nodes[--nr_new_nodes] = NULL;
-+		} else if (u64s) {
-+			/* move part of n2 into n1 */
-+			n1->key.k.p = n1->data->max_key =
-+				bkey_unpack_pos(n1, last);
-+
-+			n2->data->min_key = bkey_successor(n1->data->max_key);
-+
-+			memcpy_u64s(vstruct_last(s1),
-+				    s2->start, u64s);
-+			le16_add_cpu(&s1->u64s, u64s);
-+
-+			memmove(s2->start,
-+				vstruct_idx(s2, u64s),
-+				(le16_to_cpu(s2->u64s) - u64s) * sizeof(u64));
-+			s2->u64s = cpu_to_le16(le16_to_cpu(s2->u64s) - u64s);
-+
-+			set_btree_bset_end(n1, n1->set);
-+			set_btree_bset_end(n2, n2->set);
-+		}
-+	}
-+
-+	for (i = 0; i < nr_new_nodes; i++) {
-+		struct btree *n = new_nodes[i];
-+
-+		recalc_packed_keys(n);
-+		btree_node_reset_sib_u64s(n);
-+
-+		bch2_btree_build_aux_trees(n);
-+
-+		bch2_btree_update_add_new_node(as, n);
-+		six_unlock_write(&n->c.lock);
-+
-+		bch2_btree_node_write(c, n, SIX_LOCK_intent);
-+	}
-+
-+	/*
-+	 * The keys for the old nodes get deleted. We don't want to insert keys
-+	 * that compare equal to the keys for the new nodes we'll also be
-+	 * inserting - we can't because keys on a keylist must be strictly
-+	 * greater than the previous keys, and we also don't need to since the
-+	 * key for the new node will serve the same purpose (overwriting the key
-+	 * for the old node).
-+	 */
-+	for (i = 0; i < nr_old_nodes; i++) {
-+		struct bkey_i delete;
-+		unsigned j;
-+
-+		for (j = 0; j < nr_new_nodes; j++)
-+			if (!bkey_cmp(old_nodes[i]->key.k.p,
-+				      new_nodes[j]->key.k.p))
-+				goto next;
-+
-+		bkey_init(&delete.k);
-+		delete.k.p = old_nodes[i]->key.k.p;
-+		bch2_keylist_add_in_order(&keylist, &delete);
-+next:
-+		i = i;
-+	}
-+
-+	/*
-+	 * Keys for the new nodes get inserted: bch2_btree_insert_keys() only
-+	 * does the lookup once and thus expects the keys to be in sorted order
-+	 * so we have to make sure the new keys are correctly ordered with
-+	 * respect to the deleted keys added in the previous loop
-+	 */
-+	for (i = 0; i < nr_new_nodes; i++)
-+		bch2_keylist_add_in_order(&keylist, &new_nodes[i]->key);
-+
-+	/* Insert the newly coalesced nodes */
-+	bch2_btree_insert_node(as, parent, iter, &keylist, 0);
-+
-+	BUG_ON(!bch2_keylist_empty(&keylist));
-+
-+	BUG_ON(iter->l[old_nodes[0]->c.level].b != old_nodes[0]);
-+
-+	bch2_btree_iter_node_replace(iter, new_nodes[0]);
-+
-+	for (i = 0; i < nr_new_nodes; i++)
-+		bch2_btree_update_get_open_buckets(as, new_nodes[i]);
-+
-+	/* Free the old nodes and update our sliding window */
-+	for (i = 0; i < nr_old_nodes; i++) {
-+		bch2_btree_node_free_inmem(c, old_nodes[i], iter);
-+
-+		/*
-+		 * the index update might have triggered a split, in which case
-+		 * the nodes we coalesced - the new nodes we just created -
-+		 * might not be sibling nodes anymore - don't add them to the
-+		 * sliding window (except the first):
-+		 */
-+		if (!i) {
-+			old_nodes[i] = new_nodes[i];
-+		} else {
-+			old_nodes[i] = NULL;
-+		}
-+	}
-+
-+	for (i = 0; i < nr_new_nodes; i++)
-+		six_unlock_intent(&new_nodes[i]->c.lock);
-+
-+	bch2_btree_update_done(as);
-+	bch2_keylist_free(&keylist, NULL);
-+}
-+
-+static int bch2_coalesce_btree(struct bch_fs *c, enum btree_id btree_id)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct btree *b;
-+	bool kthread = (current->flags & PF_KTHREAD) != 0;
-+	unsigned i;
-+
-+	/* Sliding window of adjacent btree nodes */
-+	struct btree *merge[GC_MERGE_NODES];
-+	u32 lock_seq[GC_MERGE_NODES];
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	/*
-+	 * XXX: We don't have a good way of positively matching on sibling nodes
-+	 * that have the same parent - this code works by handling the cases
-+	 * where they might not have the same parent, and is thus fragile. Ugh.
-+	 *
-+	 * Perhaps redo this to use multiple linked iterators?
-+	 */
-+	memset(merge, 0, sizeof(merge));
-+
-+	__for_each_btree_node(&trans, iter, btree_id, POS_MIN,
-+			      BTREE_MAX_DEPTH, 0,
-+			      BTREE_ITER_PREFETCH, b) {
-+		memmove(merge + 1, merge,
-+			sizeof(merge) - sizeof(merge[0]));
-+		memmove(lock_seq + 1, lock_seq,
-+			sizeof(lock_seq) - sizeof(lock_seq[0]));
-+
-+		merge[0] = b;
-+
-+		for (i = 1; i < GC_MERGE_NODES; i++) {
-+			if (!merge[i] ||
-+			    !six_relock_intent(&merge[i]->c.lock, lock_seq[i]))
-+				break;
-+
-+			if (merge[i]->c.level != merge[0]->c.level) {
-+				six_unlock_intent(&merge[i]->c.lock);
-+				break;
-+			}
-+		}
-+		memset(merge + i, 0, (GC_MERGE_NODES - i) * sizeof(merge[0]));
-+
-+		bch2_coalesce_nodes(c, iter, merge);
-+
-+		for (i = 1; i < GC_MERGE_NODES && merge[i]; i++) {
-+			lock_seq[i] = merge[i]->c.lock.state.seq;
-+			six_unlock_intent(&merge[i]->c.lock);
-+		}
-+
-+		lock_seq[0] = merge[0]->c.lock.state.seq;
-+
-+		if (kthread && kthread_should_stop()) {
-+			bch2_trans_exit(&trans);
-+			return -ESHUTDOWN;
-+		}
-+
-+		bch2_trans_cond_resched(&trans);
-+
-+		/*
-+		 * If the parent node wasn't relocked, it might have been split
-+		 * and the nodes in our sliding window might not have the same
-+		 * parent anymore - blow away the sliding window:
-+		 */
-+		if (btree_iter_node(iter, iter->level + 1) &&
-+		    !btree_node_intent_locked(iter, iter->level + 1))
-+			memset(merge + 1, 0,
-+			       (GC_MERGE_NODES - 1) * sizeof(merge[0]));
-+	}
-+	return bch2_trans_exit(&trans);
-+}
-+
-+/**
-+ * bch_coalesce - coalesce adjacent nodes with low occupancy
-+ */
-+void bch2_coalesce(struct bch_fs *c)
-+{
-+	enum btree_id id;
-+
-+	down_read(&c->gc_lock);
-+	trace_gc_coalesce_start(c);
-+
-+	for (id = 0; id < BTREE_ID_NR; id++) {
-+		int ret = c->btree_roots[id].b
-+			? bch2_coalesce_btree(c, id)
-+			: 0;
-+
-+		if (ret) {
-+			if (ret != -ESHUTDOWN)
-+				bch_err(c, "btree coalescing failed: %d", ret);
-+			return;
-+		}
-+	}
-+
-+	trace_gc_coalesce_end(c);
-+	up_read(&c->gc_lock);
-+}
-+
-+static int bch2_gc_thread(void *arg)
-+{
-+	struct bch_fs *c = arg;
-+	struct io_clock *clock = &c->io_clock[WRITE];
-+	unsigned long last = atomic_long_read(&clock->now);
-+	unsigned last_kick = atomic_read(&c->kick_gc);
-+	int ret;
-+
-+	set_freezable();
-+
-+	while (1) {
-+		while (1) {
-+			set_current_state(TASK_INTERRUPTIBLE);
-+
-+			if (kthread_should_stop()) {
-+				__set_current_state(TASK_RUNNING);
-+				return 0;
-+			}
-+
-+			if (atomic_read(&c->kick_gc) != last_kick)
-+				break;
-+
-+			if (c->btree_gc_periodic) {
-+				unsigned long next = last + c->capacity / 16;
-+
-+				if (atomic_long_read(&clock->now) >= next)
-+					break;
-+
-+				bch2_io_clock_schedule_timeout(clock, next);
-+			} else {
-+				schedule();
-+			}
-+
-+			try_to_freeze();
-+		}
-+		__set_current_state(TASK_RUNNING);
-+
-+		last = atomic_long_read(&clock->now);
-+		last_kick = atomic_read(&c->kick_gc);
-+
-+		/*
-+		 * Full gc is currently incompatible with btree key cache:
-+		 */
-+#if 0
-+		ret = bch2_gc(c, NULL, false, false);
-+#else
-+		ret = bch2_gc_gens(c);
-+#endif
-+		if (ret)
-+			bch_err(c, "btree gc failed: %i", ret);
-+
-+		debug_check_no_locks_held();
-+	}
-+
-+	return 0;
-+}
-+
-+void bch2_gc_thread_stop(struct bch_fs *c)
-+{
-+	struct task_struct *p;
-+
-+	p = c->gc_thread;
-+	c->gc_thread = NULL;
-+
-+	if (p) {
-+		kthread_stop(p);
-+		put_task_struct(p);
-+	}
-+}
-+
-+int bch2_gc_thread_start(struct bch_fs *c)
-+{
-+	struct task_struct *p;
-+
-+	BUG_ON(c->gc_thread);
-+
-+	p = kthread_create(bch2_gc_thread, c, "bch_gc");
-+	if (IS_ERR(p))
-+		return PTR_ERR(p);
-+
-+	get_task_struct(p);
-+	c->gc_thread = p;
-+	wake_up_process(p);
-+	return 0;
-+}
-diff --git a/fs/bcachefs/btree_gc.h b/fs/bcachefs/btree_gc.h
-new file mode 100644
-index 000000000000..3694a3df62a8
---- /dev/null
-+++ b/fs/bcachefs/btree_gc.h
-@@ -0,0 +1,121 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BTREE_GC_H
-+#define _BCACHEFS_BTREE_GC_H
-+
-+#include "btree_types.h"
-+
-+void bch2_coalesce(struct bch_fs *);
-+
-+struct journal_keys;
-+int bch2_gc(struct bch_fs *, struct journal_keys *, bool, bool);
-+int bch2_gc_gens(struct bch_fs *);
-+void bch2_gc_thread_stop(struct bch_fs *);
-+int bch2_gc_thread_start(struct bch_fs *);
-+void bch2_mark_dev_superblock(struct bch_fs *, struct bch_dev *, unsigned);
-+
-+/*
-+ * For concurrent mark and sweep (with other index updates), we define a total
-+ * ordering of _all_ references GC walks:
-+ *
-+ * Note that some references will have the same GC position as others - e.g.
-+ * everything within the same btree node; in those cases we're relying on
-+ * whatever locking exists for where those references live, i.e. the write lock
-+ * on a btree node.
-+ *
-+ * That locking is also required to ensure GC doesn't pass the updater in
-+ * between the updater adding/removing the reference and updating the GC marks;
-+ * without that, we would at best double count sometimes.
-+ *
-+ * That part is important - whenever calling bch2_mark_pointers(), a lock _must_
-+ * be held that prevents GC from passing the position the updater is at.
-+ *
-+ * (What about the start of gc, when we're clearing all the marks? GC clears the
-+ * mark with the gc pos seqlock held, and bch_mark_bucket checks against the gc
-+ * position inside its cmpxchg loop, so crap magically works).
-+ */
-+
-+/* Position of (the start of) a gc phase: */
-+static inline struct gc_pos gc_phase(enum gc_phase phase)
-+{
-+	return (struct gc_pos) {
-+		.phase	= phase,
-+		.pos	= POS_MIN,
-+		.level	= 0,
-+	};
-+}
-+
-+static inline int gc_pos_cmp(struct gc_pos l, struct gc_pos r)
-+{
-+	if (l.phase != r.phase)
-+		return l.phase < r.phase ? -1 : 1;
-+	if (bkey_cmp(l.pos, r.pos))
-+		return bkey_cmp(l.pos, r.pos);
-+	if (l.level != r.level)
-+		return l.level < r.level ? -1 : 1;
-+	return 0;
-+}
-+
-+static inline enum gc_phase btree_id_to_gc_phase(enum btree_id id)
-+{
-+	switch (id) {
-+#define x(n, v, s) case BTREE_ID_##n: return GC_PHASE_BTREE_##n;
-+	BCH_BTREE_IDS()
-+#undef x
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static inline struct gc_pos gc_pos_btree(enum btree_id id,
-+					 struct bpos pos, unsigned level)
-+{
-+	return (struct gc_pos) {
-+		.phase	= btree_id_to_gc_phase(id),
-+		.pos	= pos,
-+		.level	= level,
-+	};
-+}
-+
-+/*
-+ * GC position of the pointers within a btree node: note, _not_ for &b->key
-+ * itself, that lives in the parent node:
-+ */
-+static inline struct gc_pos gc_pos_btree_node(struct btree *b)
-+{
-+	return gc_pos_btree(b->c.btree_id, b->key.k.p, b->c.level);
-+}
-+
-+/*
-+ * GC position of the pointer to a btree root: we don't use
-+ * gc_pos_pointer_to_btree_node() here to avoid a potential race with
-+ * btree_split() increasing the tree depth - the new root will have level > the
-+ * old root and thus have a greater gc position than the old root, but that
-+ * would be incorrect since once gc has marked the root it's not coming back.
-+ */
-+static inline struct gc_pos gc_pos_btree_root(enum btree_id id)
-+{
-+	return gc_pos_btree(id, POS_MAX, BTREE_MAX_DEPTH);
-+}
-+
-+static inline struct gc_pos gc_pos_alloc(struct bch_fs *c, struct open_bucket *ob)
-+{
-+	return (struct gc_pos) {
-+		.phase	= GC_PHASE_ALLOC,
-+		.pos	= POS(ob ? ob - c->open_buckets : 0, 0),
-+	};
-+}
-+
-+static inline bool gc_visited(struct bch_fs *c, struct gc_pos pos)
-+{
-+	unsigned seq;
-+	bool ret;
-+
-+	do {
-+		seq = read_seqcount_begin(&c->gc_pos_lock);
-+		ret = gc_pos_cmp(pos, c->gc_pos) <= 0;
-+	} while (read_seqcount_retry(&c->gc_pos_lock, seq));
-+
-+	return ret;
-+}
-+
-+#endif /* _BCACHEFS_BTREE_GC_H */
-diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
-new file mode 100644
-index 000000000000..2f5097218f9c
---- /dev/null
-+++ b/fs/bcachefs/btree_io.c
-@@ -0,0 +1,1834 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "bkey_methods.h"
-+#include "bkey_sort.h"
-+#include "btree_cache.h"
-+#include "btree_io.h"
-+#include "btree_iter.h"
-+#include "btree_locking.h"
-+#include "btree_update.h"
-+#include "btree_update_interior.h"
-+#include "buckets.h"
-+#include "checksum.h"
-+#include "debug.h"
-+#include "error.h"
-+#include "extents.h"
-+#include "io.h"
-+#include "journal_reclaim.h"
-+#include "journal_seq_blacklist.h"
-+#include "super-io.h"
-+
-+#include <linux/sched/mm.h>
-+#include <trace/events/bcachefs.h>
-+
-+static void verify_no_dups(struct btree *b,
-+			   struct bkey_packed *start,
-+			   struct bkey_packed *end,
-+			   bool extents)
-+{
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	struct bkey_packed *k, *p;
-+
-+	if (start == end)
-+		return;
-+
-+	for (p = start, k = bkey_next_skip_noops(start, end);
-+	     k != end;
-+	     p = k, k = bkey_next_skip_noops(k, end)) {
-+		struct bkey l = bkey_unpack_key(b, p);
-+		struct bkey r = bkey_unpack_key(b, k);
-+
-+		BUG_ON(extents
-+		       ? bkey_cmp(l.p, bkey_start_pos(&r)) > 0
-+		       : bkey_cmp(l.p, bkey_start_pos(&r)) >= 0);
-+		//BUG_ON(bkey_cmp_packed(&b->format, p, k) >= 0);
-+	}
-+#endif
-+}
-+
-+static void set_needs_whiteout(struct bset *i, int v)
-+{
-+	struct bkey_packed *k;
-+
-+	for (k = i->start;
-+	     k != vstruct_last(i);
-+	     k = bkey_next_skip_noops(k, vstruct_last(i)))
-+		k->needs_whiteout = v;
-+}
-+
-+static void btree_bounce_free(struct bch_fs *c, size_t size,
-+			      bool used_mempool, void *p)
-+{
-+	if (used_mempool)
-+		mempool_free(p, &c->btree_bounce_pool);
-+	else
-+		vpfree(p, size);
-+}
-+
-+static void *btree_bounce_alloc(struct bch_fs *c, size_t size,
-+				bool *used_mempool)
-+{
-+	unsigned flags = memalloc_nofs_save();
-+	void *p;
-+
-+	BUG_ON(size > btree_bytes(c));
-+
-+	*used_mempool = false;
-+	p = vpmalloc(size, __GFP_NOWARN|GFP_NOWAIT);
-+	if (!p) {
-+		*used_mempool = true;
-+		p = mempool_alloc(&c->btree_bounce_pool, GFP_NOIO);
-+	}
-+	memalloc_nofs_restore(flags);
-+	return p;
-+}
-+
-+static void sort_bkey_ptrs(const struct btree *bt,
-+			   struct bkey_packed **ptrs, unsigned nr)
-+{
-+	unsigned n = nr, a = nr / 2, b, c, d;
-+
-+	if (!a)
-+		return;
-+
-+	/* Heap sort: see lib/sort.c: */
-+	while (1) {
-+		if (a)
-+			a--;
-+		else if (--n)
-+			swap(ptrs[0], ptrs[n]);
-+		else
-+			break;
-+
-+		for (b = a; c = 2 * b + 1, (d = c + 1) < n;)
-+			b = bkey_cmp_packed(bt,
-+					    ptrs[c],
-+					    ptrs[d]) >= 0 ? c : d;
-+		if (d == n)
-+			b = c;
-+
-+		while (b != a &&
-+		       bkey_cmp_packed(bt,
-+				       ptrs[a],
-+				       ptrs[b]) >= 0)
-+			b = (b - 1) / 2;
-+		c = b;
-+		while (b != a) {
-+			b = (b - 1) / 2;
-+			swap(ptrs[b], ptrs[c]);
-+		}
-+	}
-+}
-+
-+static void bch2_sort_whiteouts(struct bch_fs *c, struct btree *b)
-+{
-+	struct bkey_packed *new_whiteouts, **ptrs, **ptrs_end, *k;
-+	bool used_mempool = false;
-+	size_t bytes = b->whiteout_u64s * sizeof(u64);
-+
-+	if (!b->whiteout_u64s)
-+		return;
-+
-+	new_whiteouts = btree_bounce_alloc(c, bytes, &used_mempool);
-+
-+	ptrs = ptrs_end = ((void *) new_whiteouts + bytes);
-+
-+	for (k = unwritten_whiteouts_start(c, b);
-+	     k != unwritten_whiteouts_end(c, b);
-+	     k = bkey_next(k))
-+		*--ptrs = k;
-+
-+	sort_bkey_ptrs(b, ptrs, ptrs_end - ptrs);
-+
-+	k = new_whiteouts;
-+
-+	while (ptrs != ptrs_end) {
-+		bkey_copy(k, *ptrs);
-+		k = bkey_next(k);
-+		ptrs++;
-+	}
-+
-+	verify_no_dups(b, new_whiteouts,
-+		       (void *) ((u64 *) new_whiteouts + b->whiteout_u64s),
-+		       btree_node_old_extent_overwrite(b));
-+
-+	memcpy_u64s(unwritten_whiteouts_start(c, b),
-+		    new_whiteouts, b->whiteout_u64s);
-+
-+	btree_bounce_free(c, bytes, used_mempool, new_whiteouts);
-+}
-+
-+static bool should_compact_bset(struct btree *b, struct bset_tree *t,
-+				bool compacting, enum compact_mode mode)
-+{
-+	if (!bset_dead_u64s(b, t))
-+		return false;
-+
-+	switch (mode) {
-+	case COMPACT_LAZY:
-+		return should_compact_bset_lazy(b, t) ||
-+			(compacting && !bset_written(b, bset(b, t)));
-+	case COMPACT_ALL:
-+		return true;
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static bool bch2_compact_extent_whiteouts(struct bch_fs *c,
-+					  struct btree *b,
-+					  enum compact_mode mode)
-+{
-+	const struct bkey_format *f = &b->format;
-+	struct bset_tree *t;
-+	struct bkey_packed *whiteouts = NULL;
-+	struct bkey_packed *u_start, *u_pos;
-+	struct sort_iter sort_iter;
-+	unsigned bytes, whiteout_u64s = 0, u64s;
-+	bool used_mempool, compacting = false;
-+
-+	BUG_ON(!btree_node_is_extents(b));
-+
-+	for_each_bset(b, t)
-+		if (should_compact_bset(b, t, whiteout_u64s != 0, mode))
-+			whiteout_u64s += bset_dead_u64s(b, t);
-+
-+	if (!whiteout_u64s)
-+		return false;
-+
-+	bch2_sort_whiteouts(c, b);
-+
-+	sort_iter_init(&sort_iter, b);
-+
-+	whiteout_u64s += b->whiteout_u64s;
-+	bytes = whiteout_u64s * sizeof(u64);
-+
-+	whiteouts = btree_bounce_alloc(c, bytes, &used_mempool);
-+	u_start = u_pos = whiteouts;
-+
-+	memcpy_u64s(u_pos, unwritten_whiteouts_start(c, b),
-+		    b->whiteout_u64s);
-+	u_pos = (void *) u_pos + b->whiteout_u64s * sizeof(u64);
-+
-+	sort_iter_add(&sort_iter, u_start, u_pos);
-+
-+	for_each_bset(b, t) {
-+		struct bset *i = bset(b, t);
-+		struct bkey_packed *k, *n, *out, *start, *end;
-+		struct btree_node_entry *src = NULL, *dst = NULL;
-+
-+		if (t != b->set && !bset_written(b, i)) {
-+			src = container_of(i, struct btree_node_entry, keys);
-+			dst = max(write_block(b),
-+				  (void *) btree_bkey_last(b, t - 1));
-+		}
-+
-+		if (src != dst)
-+			compacting = true;
-+
-+		if (!should_compact_bset(b, t, compacting, mode)) {
-+			if (src != dst) {
-+				memmove(dst, src, sizeof(*src) +
-+					le16_to_cpu(src->keys.u64s) *
-+					sizeof(u64));
-+				i = &dst->keys;
-+				set_btree_bset(b, t, i);
-+			}
-+			continue;
-+		}
-+
-+		compacting = true;
-+		u_start = u_pos;
-+		start = i->start;
-+		end = vstruct_last(i);
-+
-+		if (src != dst) {
-+			memmove(dst, src, sizeof(*src));
-+			i = &dst->keys;
-+			set_btree_bset(b, t, i);
-+		}
-+
-+		out = i->start;
-+
-+		for (k = start; k != end; k = n) {
-+			n = bkey_next_skip_noops(k, end);
-+
-+			if (bkey_deleted(k))
-+				continue;
-+
-+			BUG_ON(bkey_whiteout(k) &&
-+			       k->needs_whiteout &&
-+			       bkey_written(b, k));
-+
-+			if (bkey_whiteout(k) && !k->needs_whiteout)
-+				continue;
-+
-+			if (bkey_whiteout(k)) {
-+				memcpy_u64s(u_pos, k, bkeyp_key_u64s(f, k));
-+				set_bkeyp_val_u64s(f, u_pos, 0);
-+				u_pos = bkey_next(u_pos);
-+			} else {
-+				bkey_copy(out, k);
-+				out = bkey_next(out);
-+			}
-+		}
-+
-+		sort_iter_add(&sort_iter, u_start, u_pos);
-+
-+		i->u64s = cpu_to_le16((u64 *) out - i->_data);
-+		set_btree_bset_end(b, t);
-+		bch2_bset_set_no_aux_tree(b, t);
-+	}
-+
-+	b->whiteout_u64s = (u64 *) u_pos - (u64 *) whiteouts;
-+
-+	BUG_ON((void *) unwritten_whiteouts_start(c, b) <
-+	       (void *) btree_bkey_last(b, bset_tree_last(b)));
-+
-+	u64s = bch2_sort_extent_whiteouts(unwritten_whiteouts_start(c, b),
-+					  &sort_iter);
-+
-+	BUG_ON(u64s > b->whiteout_u64s);
-+	BUG_ON(u_pos != whiteouts && !u64s);
-+
-+	if (u64s != b->whiteout_u64s) {
-+		void *src = unwritten_whiteouts_start(c, b);
-+
-+		b->whiteout_u64s = u64s;
-+		memmove_u64s_up(unwritten_whiteouts_start(c, b), src, u64s);
-+	}
-+
-+	verify_no_dups(b,
-+		       unwritten_whiteouts_start(c, b),
-+		       unwritten_whiteouts_end(c, b),
-+		       true);
-+
-+	btree_bounce_free(c, bytes, used_mempool, whiteouts);
-+
-+	bch2_btree_build_aux_trees(b);
-+
-+	bch_btree_keys_u64s_remaining(c, b);
-+	bch2_verify_btree_nr_keys(b);
-+
-+	return true;
-+}
-+
-+static bool bch2_drop_whiteouts(struct btree *b, enum compact_mode mode)
-+{
-+	struct bset_tree *t;
-+	bool ret = false;
-+
-+	for_each_bset(b, t) {
-+		struct bset *i = bset(b, t);
-+		struct bkey_packed *k, *n, *out, *start, *end;
-+		struct btree_node_entry *src = NULL, *dst = NULL;
-+
-+		if (t != b->set && !bset_written(b, i)) {
-+			src = container_of(i, struct btree_node_entry, keys);
-+			dst = max(write_block(b),
-+				  (void *) btree_bkey_last(b, t - 1));
-+		}
-+
-+		if (src != dst)
-+			ret = true;
-+
-+		if (!should_compact_bset(b, t, ret, mode)) {
-+			if (src != dst) {
-+				memmove(dst, src, sizeof(*src) +
-+					le16_to_cpu(src->keys.u64s) *
-+					sizeof(u64));
-+				i = &dst->keys;
-+				set_btree_bset(b, t, i);
-+			}
-+			continue;
-+		}
-+
-+		start	= btree_bkey_first(b, t);
-+		end	= btree_bkey_last(b, t);
-+
-+		if (src != dst) {
-+			memmove(dst, src, sizeof(*src));
-+			i = &dst->keys;
-+			set_btree_bset(b, t, i);
-+		}
-+
-+		out = i->start;
-+
-+		for (k = start; k != end; k = n) {
-+			n = bkey_next_skip_noops(k, end);
-+
-+			if (!bkey_whiteout(k)) {
-+				bkey_copy(out, k);
-+				out = bkey_next(out);
-+			} else {
-+				BUG_ON(k->needs_whiteout);
-+			}
-+		}
-+
-+		i->u64s = cpu_to_le16((u64 *) out - i->_data);
-+		set_btree_bset_end(b, t);
-+		bch2_bset_set_no_aux_tree(b, t);
-+		ret = true;
-+	}
-+
-+	bch2_verify_btree_nr_keys(b);
-+
-+	bch2_btree_build_aux_trees(b);
-+
-+	return ret;
-+}
-+
-+bool bch2_compact_whiteouts(struct bch_fs *c, struct btree *b,
-+			    enum compact_mode mode)
-+{
-+	return !btree_node_old_extent_overwrite(b)
-+		? bch2_drop_whiteouts(b, mode)
-+		: bch2_compact_extent_whiteouts(c, b, mode);
-+}
-+
-+static void btree_node_sort(struct bch_fs *c, struct btree *b,
-+			    struct btree_iter *iter,
-+			    unsigned start_idx,
-+			    unsigned end_idx,
-+			    bool filter_whiteouts)
-+{
-+	struct btree_node *out;
-+	struct sort_iter sort_iter;
-+	struct bset_tree *t;
-+	struct bset *start_bset = bset(b, &b->set[start_idx]);
-+	bool used_mempool = false;
-+	u64 start_time, seq = 0;
-+	unsigned i, u64s = 0, bytes, shift = end_idx - start_idx - 1;
-+	bool sorting_entire_node = start_idx == 0 &&
-+		end_idx == b->nsets;
-+
-+	sort_iter_init(&sort_iter, b);
-+
-+	for (t = b->set + start_idx;
-+	     t < b->set + end_idx;
-+	     t++) {
-+		u64s += le16_to_cpu(bset(b, t)->u64s);
-+		sort_iter_add(&sort_iter,
-+			      btree_bkey_first(b, t),
-+			      btree_bkey_last(b, t));
-+	}
-+
-+	bytes = sorting_entire_node
-+		? btree_bytes(c)
-+		: __vstruct_bytes(struct btree_node, u64s);
-+
-+	out = btree_bounce_alloc(c, bytes, &used_mempool);
-+
-+	start_time = local_clock();
-+
-+	if (btree_node_old_extent_overwrite(b))
-+		filter_whiteouts = bset_written(b, start_bset);
-+
-+	u64s = (btree_node_old_extent_overwrite(b)
-+		? bch2_sort_extents
-+		: bch2_sort_keys)(out->keys.start,
-+				  &sort_iter,
-+				  filter_whiteouts);
-+
-+	out->keys.u64s = cpu_to_le16(u64s);
-+
-+	BUG_ON(vstruct_end(&out->keys) > (void *) out + bytes);
-+
-+	if (sorting_entire_node)
-+		bch2_time_stats_update(&c->times[BCH_TIME_btree_node_sort],
-+				       start_time);
-+
-+	/* Make sure we preserve bset journal_seq: */
-+	for (t = b->set + start_idx; t < b->set + end_idx; t++)
-+		seq = max(seq, le64_to_cpu(bset(b, t)->journal_seq));
-+	start_bset->journal_seq = cpu_to_le64(seq);
-+
-+	if (sorting_entire_node) {
-+		unsigned u64s = le16_to_cpu(out->keys.u64s);
-+
-+		BUG_ON(bytes != btree_bytes(c));
-+
-+		/*
-+		 * Our temporary buffer is the same size as the btree node's
-+		 * buffer, we can just swap buffers instead of doing a big
-+		 * memcpy()
-+		 */
-+		*out = *b->data;
-+		out->keys.u64s = cpu_to_le16(u64s);
-+		swap(out, b->data);
-+		set_btree_bset(b, b->set, &b->data->keys);
-+	} else {
-+		start_bset->u64s = out->keys.u64s;
-+		memcpy_u64s(start_bset->start,
-+			    out->keys.start,
-+			    le16_to_cpu(out->keys.u64s));
-+	}
-+
-+	for (i = start_idx + 1; i < end_idx; i++)
-+		b->nr.bset_u64s[start_idx] +=
-+			b->nr.bset_u64s[i];
-+
-+	b->nsets -= shift;
-+
-+	for (i = start_idx + 1; i < b->nsets; i++) {
-+		b->nr.bset_u64s[i]	= b->nr.bset_u64s[i + shift];
-+		b->set[i]		= b->set[i + shift];
-+	}
-+
-+	for (i = b->nsets; i < MAX_BSETS; i++)
-+		b->nr.bset_u64s[i] = 0;
-+
-+	set_btree_bset_end(b, &b->set[start_idx]);
-+	bch2_bset_set_no_aux_tree(b, &b->set[start_idx]);
-+
-+	btree_bounce_free(c, bytes, used_mempool, out);
-+
-+	bch2_verify_btree_nr_keys(b);
-+}
-+
-+void bch2_btree_sort_into(struct bch_fs *c,
-+			 struct btree *dst,
-+			 struct btree *src)
-+{
-+	struct btree_nr_keys nr;
-+	struct btree_node_iter src_iter;
-+	u64 start_time = local_clock();
-+
-+	BUG_ON(dst->nsets != 1);
-+
-+	bch2_bset_set_no_aux_tree(dst, dst->set);
-+
-+	bch2_btree_node_iter_init_from_start(&src_iter, src);
-+
-+	if (btree_node_is_extents(src))
-+		nr = bch2_sort_repack_merge(c, btree_bset_first(dst),
-+				src, &src_iter,
-+				&dst->format,
-+				true);
-+	else
-+		nr = bch2_sort_repack(btree_bset_first(dst),
-+				src, &src_iter,
-+				&dst->format,
-+				true);
-+
-+	bch2_time_stats_update(&c->times[BCH_TIME_btree_node_sort],
-+			       start_time);
-+
-+	set_btree_bset_end(dst, dst->set);
-+
-+	dst->nr.live_u64s	+= nr.live_u64s;
-+	dst->nr.bset_u64s[0]	+= nr.bset_u64s[0];
-+	dst->nr.packed_keys	+= nr.packed_keys;
-+	dst->nr.unpacked_keys	+= nr.unpacked_keys;
-+
-+	bch2_verify_btree_nr_keys(dst);
-+}
-+
-+#define SORT_CRIT	(4096 / sizeof(u64))
-+
-+/*
-+ * We're about to add another bset to the btree node, so if there's currently
-+ * too many bsets - sort some of them together:
-+ */
-+static bool btree_node_compact(struct bch_fs *c, struct btree *b,
-+			       struct btree_iter *iter)
-+{
-+	unsigned unwritten_idx;
-+	bool ret = false;
-+
-+	for (unwritten_idx = 0;
-+	     unwritten_idx < b->nsets;
-+	     unwritten_idx++)
-+		if (!bset_written(b, bset(b, &b->set[unwritten_idx])))
-+			break;
-+
-+	if (b->nsets - unwritten_idx > 1) {
-+		btree_node_sort(c, b, iter, unwritten_idx,
-+				b->nsets, false);
-+		ret = true;
-+	}
-+
-+	if (unwritten_idx > 1) {
-+		btree_node_sort(c, b, iter, 0, unwritten_idx, false);
-+		ret = true;
-+	}
-+
-+	return ret;
-+}
-+
-+void bch2_btree_build_aux_trees(struct btree *b)
-+{
-+	struct bset_tree *t;
-+
-+	for_each_bset(b, t)
-+		bch2_bset_build_aux_tree(b, t,
-+				!bset_written(b, bset(b, t)) &&
-+				t == bset_tree_last(b));
-+}
-+
-+/*
-+ * @bch_btree_init_next - initialize a new (unwritten) bset that can then be
-+ * inserted into
-+ *
-+ * Safe to call if there already is an unwritten bset - will only add a new bset
-+ * if @b doesn't already have one.
-+ *
-+ * Returns true if we sorted (i.e. invalidated iterators
-+ */
-+void bch2_btree_init_next(struct bch_fs *c, struct btree *b,
-+			  struct btree_iter *iter)
-+{
-+	struct btree_node_entry *bne;
-+	bool did_sort;
-+
-+	EBUG_ON(!(b->c.lock.state.seq & 1));
-+	EBUG_ON(iter && iter->l[b->c.level].b != b);
-+
-+	did_sort = btree_node_compact(c, b, iter);
-+
-+	bne = want_new_bset(c, b);
-+	if (bne)
-+		bch2_bset_init_next(c, b, bne);
-+
-+	bch2_btree_build_aux_trees(b);
-+
-+	if (iter && did_sort)
-+		bch2_btree_iter_reinit_node(iter, b);
-+}
-+
-+static void btree_err_msg(struct printbuf *out, struct bch_fs *c,
-+			  struct btree *b, struct bset *i,
-+			  unsigned offset, int write)
-+{
-+	pr_buf(out, "error validating btree node %sat btree %u level %u/%u\n"
-+	       "pos ",
-+	       write ? "before write " : "",
-+	       b->c.btree_id, b->c.level,
-+	       c->btree_roots[b->c.btree_id].level);
-+	bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key));
-+
-+	pr_buf(out, " node offset %u", b->written);
-+	if (i)
-+		pr_buf(out, " bset u64s %u", le16_to_cpu(i->u64s));
-+}
-+
-+enum btree_err_type {
-+	BTREE_ERR_FIXABLE,
-+	BTREE_ERR_WANT_RETRY,
-+	BTREE_ERR_MUST_RETRY,
-+	BTREE_ERR_FATAL,
-+};
-+
-+enum btree_validate_ret {
-+	BTREE_RETRY_READ = 64,
-+};
-+
-+#define btree_err(type, c, b, i, msg, ...)				\
-+({									\
-+	__label__ out;							\
-+	char _buf[300];							\
-+	struct printbuf out = PBUF(_buf);				\
-+									\
-+	btree_err_msg(&out, c, b, i, b->written, write);		\
-+	pr_buf(&out, ": " msg, ##__VA_ARGS__);				\
-+									\
-+	if (type == BTREE_ERR_FIXABLE &&				\
-+	    write == READ &&						\
-+	    !test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)) {		\
-+		mustfix_fsck_err(c, "%s", _buf);			\
-+		goto out;						\
-+	}								\
-+									\
-+	switch (write) {						\
-+	case READ:							\
-+		bch_err(c, "%s", _buf);					\
-+									\
-+		switch (type) {						\
-+		case BTREE_ERR_FIXABLE:					\
-+			ret = BCH_FSCK_ERRORS_NOT_FIXED;		\
-+			goto fsck_err;					\
-+		case BTREE_ERR_WANT_RETRY:				\
-+			if (have_retry) {				\
-+				ret = BTREE_RETRY_READ;			\
-+				goto fsck_err;				\
-+			}						\
-+			break;						\
-+		case BTREE_ERR_MUST_RETRY:				\
-+			ret = BTREE_RETRY_READ;				\
-+			goto fsck_err;					\
-+		case BTREE_ERR_FATAL:					\
-+			ret = BCH_FSCK_ERRORS_NOT_FIXED;		\
-+			goto fsck_err;					\
-+		}							\
-+		break;							\
-+	case WRITE:							\
-+		bch_err(c, "corrupt metadata before write: %s", _buf);	\
-+									\
-+		if (bch2_fs_inconsistent(c)) {				\
-+			ret = BCH_FSCK_ERRORS_NOT_FIXED;		\
-+			goto fsck_err;					\
-+		}							\
-+		break;							\
-+	}								\
-+out:									\
-+	true;								\
-+})
-+
-+#define btree_err_on(cond, ...)	((cond) ? btree_err(__VA_ARGS__) : false)
-+
-+static int validate_bset(struct bch_fs *c, struct btree *b,
-+			 struct bset *i, unsigned sectors,
-+			 int write, bool have_retry)
-+{
-+	unsigned version = le16_to_cpu(i->version);
-+	const char *err;
-+	int ret = 0;
-+
-+	btree_err_on((version != BCH_BSET_VERSION_OLD &&
-+		      version < bcachefs_metadata_version_min) ||
-+		     version >= bcachefs_metadata_version_max,
-+		     BTREE_ERR_FATAL, c, b, i,
-+		     "unsupported bset version");
-+
-+	if (btree_err_on(b->written + sectors > c->opts.btree_node_size,
-+			 BTREE_ERR_FIXABLE, c, b, i,
-+			 "bset past end of btree node")) {
-+		i->u64s = 0;
-+		return 0;
-+	}
-+
-+	btree_err_on(b->written && !i->u64s,
-+		     BTREE_ERR_FIXABLE, c, b, i,
-+		     "empty bset");
-+
-+	if (!b->written) {
-+		struct btree_node *bn =
-+			container_of(i, struct btree_node, keys);
-+		/* These indicate that we read the wrong btree node: */
-+
-+		if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
-+			struct bch_btree_ptr_v2 *bp =
-+				&bkey_i_to_btree_ptr_v2(&b->key)->v;
-+
-+			/* XXX endianness */
-+			btree_err_on(bp->seq != bn->keys.seq,
-+				     BTREE_ERR_MUST_RETRY, c, b, NULL,
-+				     "incorrect sequence number (wrong btree node)");
-+		}
-+
-+		btree_err_on(BTREE_NODE_ID(bn) != b->c.btree_id,
-+			     BTREE_ERR_MUST_RETRY, c, b, i,
-+			     "incorrect btree id");
-+
-+		btree_err_on(BTREE_NODE_LEVEL(bn) != b->c.level,
-+			     BTREE_ERR_MUST_RETRY, c, b, i,
-+			     "incorrect level");
-+
-+		if (BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN) {
-+			u64 *p = (u64 *) &bn->ptr;
-+
-+			*p = swab64(*p);
-+		}
-+
-+		if (!write)
-+			compat_btree_node(b->c.level, b->c.btree_id, version,
-+					  BSET_BIG_ENDIAN(i), write, bn);
-+
-+		if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
-+			struct bch_btree_ptr_v2 *bp =
-+				&bkey_i_to_btree_ptr_v2(&b->key)->v;
-+
-+			btree_err_on(bkey_cmp(b->data->min_key, bp->min_key),
-+				     BTREE_ERR_MUST_RETRY, c, b, NULL,
-+				     "incorrect min_key: got %llu:%llu should be %llu:%llu",
-+				     b->data->min_key.inode,
-+				     b->data->min_key.offset,
-+				     bp->min_key.inode,
-+				     bp->min_key.offset);
-+		}
-+
-+		btree_err_on(bkey_cmp(bn->max_key, b->key.k.p),
-+			     BTREE_ERR_MUST_RETRY, c, b, i,
-+			     "incorrect max key");
-+
-+		if (write)
-+			compat_btree_node(b->c.level, b->c.btree_id, version,
-+					  BSET_BIG_ENDIAN(i), write, bn);
-+
-+		/* XXX: ideally we would be validating min_key too */
-+#if 0
-+		/*
-+		 * not correct anymore, due to btree node write error
-+		 * handling
-+		 *
-+		 * need to add bn->seq to btree keys and verify
-+		 * against that
-+		 */
-+		btree_err_on(!extent_contains_ptr(bkey_i_to_s_c_extent(&b->key),
-+						  bn->ptr),
-+			     BTREE_ERR_FATAL, c, b, i,
-+			     "incorrect backpointer");
-+#endif
-+		err = bch2_bkey_format_validate(&bn->format);
-+		btree_err_on(err,
-+			     BTREE_ERR_FATAL, c, b, i,
-+			     "invalid bkey format: %s", err);
-+
-+		compat_bformat(b->c.level, b->c.btree_id, version,
-+			       BSET_BIG_ENDIAN(i), write,
-+			       &bn->format);
-+	}
-+fsck_err:
-+	return ret;
-+}
-+
-+static int validate_bset_keys(struct bch_fs *c, struct btree *b,
-+			 struct bset *i, unsigned *whiteout_u64s,
-+			 int write, bool have_retry)
-+{
-+	unsigned version = le16_to_cpu(i->version);
-+	struct bkey_packed *k, *prev = NULL;
-+	bool seen_non_whiteout = false;
-+	int ret = 0;
-+
-+	if (!BSET_SEPARATE_WHITEOUTS(i)) {
-+		seen_non_whiteout = true;
-+		*whiteout_u64s = 0;
-+	}
-+
-+	for (k = i->start;
-+	     k != vstruct_last(i);) {
-+		struct bkey_s u;
-+		struct bkey tmp;
-+		const char *invalid;
-+
-+		if (btree_err_on(bkey_next(k) > vstruct_last(i),
-+				 BTREE_ERR_FIXABLE, c, b, i,
-+				 "key extends past end of bset")) {
-+			i->u64s = cpu_to_le16((u64 *) k - i->_data);
-+			break;
-+		}
-+
-+		if (btree_err_on(k->format > KEY_FORMAT_CURRENT,
-+				 BTREE_ERR_FIXABLE, c, b, i,
-+				 "invalid bkey format %u", k->format)) {
-+			i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
-+			memmove_u64s_down(k, bkey_next(k),
-+					  (u64 *) vstruct_end(i) - (u64 *) k);
-+			continue;
-+		}
-+
-+		/* XXX: validate k->u64s */
-+		if (!write)
-+			bch2_bkey_compat(b->c.level, b->c.btree_id, version,
-+				    BSET_BIG_ENDIAN(i), write,
-+				    &b->format, k);
-+
-+		u = __bkey_disassemble(b, k, &tmp);
-+
-+		invalid = __bch2_bkey_invalid(c, u.s_c, btree_node_type(b)) ?:
-+			bch2_bkey_in_btree_node(b, u.s_c) ?:
-+			(write ? bch2_bkey_val_invalid(c, u.s_c) : NULL);
-+		if (invalid) {
-+			char buf[160];
-+
-+			bch2_bkey_val_to_text(&PBUF(buf), c, u.s_c);
-+			btree_err(BTREE_ERR_FIXABLE, c, b, i,
-+				  "invalid bkey:\n%s\n%s", invalid, buf);
-+
-+			i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
-+			memmove_u64s_down(k, bkey_next(k),
-+					  (u64 *) vstruct_end(i) - (u64 *) k);
-+			continue;
-+		}
-+
-+		if (write)
-+			bch2_bkey_compat(b->c.level, b->c.btree_id, version,
-+				    BSET_BIG_ENDIAN(i), write,
-+				    &b->format, k);
-+
-+		/*
-+		 * with the separate whiteouts thing (used for extents), the
-+		 * second set of keys actually can have whiteouts too, so we
-+		 * can't solely go off bkey_whiteout()...
-+		 */
-+
-+		if (!seen_non_whiteout &&
-+		    (!bkey_whiteout(k) ||
-+		     (prev && bkey_iter_cmp(b, prev, k) > 0))) {
-+			*whiteout_u64s = k->_data - i->_data;
-+			seen_non_whiteout = true;
-+		} else if (prev && bkey_iter_cmp(b, prev, k) > 0) {
-+			char buf1[80];
-+			char buf2[80];
-+			struct bkey up = bkey_unpack_key(b, prev);
-+
-+			bch2_bkey_to_text(&PBUF(buf1), &up);
-+			bch2_bkey_to_text(&PBUF(buf2), u.k);
-+
-+			bch2_dump_bset(c, b, i, 0);
-+			btree_err(BTREE_ERR_FATAL, c, b, i,
-+				  "keys out of order: %s > %s",
-+				  buf1, buf2);
-+			/* XXX: repair this */
-+		}
-+
-+		prev = k;
-+		k = bkey_next_skip_noops(k, vstruct_last(i));
-+	}
-+fsck_err:
-+	return ret;
-+}
-+
-+int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry)
-+{
-+	struct btree_node_entry *bne;
-+	struct sort_iter *iter;
-+	struct btree_node *sorted;
-+	struct bkey_packed *k;
-+	struct bch_extent_ptr *ptr;
-+	struct bset *i;
-+	bool used_mempool, blacklisted;
-+	unsigned u64s;
-+	int ret, retry_read = 0, write = READ;
-+
-+	iter = mempool_alloc(&c->fill_iter, GFP_NOIO);
-+	sort_iter_init(iter, b);
-+	iter->size = (btree_blocks(c) + 1) * 2;
-+
-+	if (bch2_meta_read_fault("btree"))
-+		btree_err(BTREE_ERR_MUST_RETRY, c, b, NULL,
-+			  "dynamic fault");
-+
-+	btree_err_on(le64_to_cpu(b->data->magic) != bset_magic(c),
-+		     BTREE_ERR_MUST_RETRY, c, b, NULL,
-+		     "bad magic");
-+
-+	btree_err_on(!b->data->keys.seq,
-+		     BTREE_ERR_MUST_RETRY, c, b, NULL,
-+		     "bad btree header");
-+
-+	if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
-+		struct bch_btree_ptr_v2 *bp =
-+			&bkey_i_to_btree_ptr_v2(&b->key)->v;
-+
-+		btree_err_on(b->data->keys.seq != bp->seq,
-+			     BTREE_ERR_MUST_RETRY, c, b, NULL,
-+			     "got wrong btree node (seq %llx want %llx)",
-+			     b->data->keys.seq, bp->seq);
-+	}
-+
-+	while (b->written < c->opts.btree_node_size) {
-+		unsigned sectors, whiteout_u64s = 0;
-+		struct nonce nonce;
-+		struct bch_csum csum;
-+		bool first = !b->written;
-+
-+		if (!b->written) {
-+			i = &b->data->keys;
-+
-+			btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)),
-+				     BTREE_ERR_WANT_RETRY, c, b, i,
-+				     "unknown checksum type");
-+
-+			nonce = btree_nonce(i, b->written << 9);
-+			csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data);
-+
-+			btree_err_on(bch2_crc_cmp(csum, b->data->csum),
-+				     BTREE_ERR_WANT_RETRY, c, b, i,
-+				     "invalid checksum");
-+
-+			bset_encrypt(c, i, b->written << 9);
-+
-+			if (btree_node_is_extents(b) &&
-+			    !BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data)) {
-+				set_btree_node_old_extent_overwrite(b);
-+				set_btree_node_need_rewrite(b);
-+			}
-+
-+			sectors = vstruct_sectors(b->data, c->block_bits);
-+		} else {
-+			bne = write_block(b);
-+			i = &bne->keys;
-+
-+			if (i->seq != b->data->keys.seq)
-+				break;
-+
-+			btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)),
-+				     BTREE_ERR_WANT_RETRY, c, b, i,
-+				     "unknown checksum type");
-+
-+			nonce = btree_nonce(i, b->written << 9);
-+			csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
-+
-+			btree_err_on(bch2_crc_cmp(csum, bne->csum),
-+				     BTREE_ERR_WANT_RETRY, c, b, i,
-+				     "invalid checksum");
-+
-+			bset_encrypt(c, i, b->written << 9);
-+
-+			sectors = vstruct_sectors(bne, c->block_bits);
-+		}
-+
-+		ret = validate_bset(c, b, i, sectors,
-+				    READ, have_retry);
-+		if (ret)
-+			goto fsck_err;
-+
-+		if (!b->written)
-+			btree_node_set_format(b, b->data->format);
-+
-+		ret = validate_bset_keys(c, b, i, &whiteout_u64s,
-+				    READ, have_retry);
-+		if (ret)
-+			goto fsck_err;
-+
-+		SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN);
-+
-+		b->written += sectors;
-+
-+		blacklisted = bch2_journal_seq_is_blacklisted(c,
-+					le64_to_cpu(i->journal_seq),
-+					true);
-+
-+		btree_err_on(blacklisted && first,
-+			     BTREE_ERR_FIXABLE, c, b, i,
-+			     "first btree node bset has blacklisted journal seq");
-+		if (blacklisted && !first)
-+			continue;
-+
-+		sort_iter_add(iter, i->start,
-+			      vstruct_idx(i, whiteout_u64s));
-+
-+		sort_iter_add(iter,
-+			      vstruct_idx(i, whiteout_u64s),
-+			      vstruct_last(i));
-+	}
-+
-+	for (bne = write_block(b);
-+	     bset_byte_offset(b, bne) < btree_bytes(c);
-+	     bne = (void *) bne + block_bytes(c))
-+		btree_err_on(bne->keys.seq == b->data->keys.seq,
-+			     BTREE_ERR_WANT_RETRY, c, b, NULL,
-+			     "found bset signature after last bset");
-+
-+	sorted = btree_bounce_alloc(c, btree_bytes(c), &used_mempool);
-+	sorted->keys.u64s = 0;
-+
-+	set_btree_bset(b, b->set, &b->data->keys);
-+
-+	b->nr = (btree_node_old_extent_overwrite(b)
-+		 ? bch2_extent_sort_fix_overlapping
-+		 : bch2_key_sort_fix_overlapping)(c, &sorted->keys, iter);
-+
-+	u64s = le16_to_cpu(sorted->keys.u64s);
-+	*sorted = *b->data;
-+	sorted->keys.u64s = cpu_to_le16(u64s);
-+	swap(sorted, b->data);
-+	set_btree_bset(b, b->set, &b->data->keys);
-+	b->nsets = 1;
-+
-+	BUG_ON(b->nr.live_u64s != u64s);
-+
-+	btree_bounce_free(c, btree_bytes(c), used_mempool, sorted);
-+
-+	i = &b->data->keys;
-+	for (k = i->start; k != vstruct_last(i);) {
-+		struct bkey tmp;
-+		struct bkey_s u = __bkey_disassemble(b, k, &tmp);
-+		const char *invalid = bch2_bkey_val_invalid(c, u.s_c);
-+
-+		if (invalid ||
-+		    (inject_invalid_keys(c) &&
-+		     !bversion_cmp(u.k->version, MAX_VERSION))) {
-+			char buf[160];
-+
-+			bch2_bkey_val_to_text(&PBUF(buf), c, u.s_c);
-+			btree_err(BTREE_ERR_FIXABLE, c, b, i,
-+				  "invalid bkey %s: %s", buf, invalid);
-+
-+			btree_keys_account_key_drop(&b->nr, 0, k);
-+
-+			i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
-+			memmove_u64s_down(k, bkey_next(k),
-+					  (u64 *) vstruct_end(i) - (u64 *) k);
-+			set_btree_bset_end(b, b->set);
-+			continue;
-+		}
-+
-+		if (u.k->type == KEY_TYPE_btree_ptr_v2) {
-+			struct bkey_s_btree_ptr_v2 bp = bkey_s_to_btree_ptr_v2(u);
-+
-+			bp.v->mem_ptr = 0;
-+		}
-+
-+		k = bkey_next_skip_noops(k, vstruct_last(i));
-+	}
-+
-+	bch2_bset_build_aux_tree(b, b->set, false);
-+
-+	set_needs_whiteout(btree_bset_first(b), true);
-+
-+	btree_node_reset_sib_u64s(b);
-+
-+	bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&b->key)), ptr) {
-+		struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-+
-+		if (ca->mi.state != BCH_MEMBER_STATE_RW)
-+			set_btree_node_need_rewrite(b);
-+	}
-+out:
-+	mempool_free(iter, &c->fill_iter);
-+	return retry_read;
-+fsck_err:
-+	if (ret == BTREE_RETRY_READ) {
-+		retry_read = 1;
-+	} else {
-+		bch2_inconsistent_error(c);
-+		set_btree_node_read_error(b);
-+	}
-+	goto out;
-+}
-+
-+static void btree_node_read_work(struct work_struct *work)
-+{
-+	struct btree_read_bio *rb =
-+		container_of(work, struct btree_read_bio, work);
-+	struct bch_fs *c	= rb->c;
-+	struct bch_dev *ca	= bch_dev_bkey_exists(c, rb->pick.ptr.dev);
-+	struct btree *b		= rb->bio.bi_private;
-+	struct bio *bio		= &rb->bio;
-+	struct bch_io_failures failed = { .nr = 0 };
-+	bool can_retry;
-+
-+	goto start;
-+	while (1) {
-+		bch_info(c, "retrying read");
-+		ca = bch_dev_bkey_exists(c, rb->pick.ptr.dev);
-+		rb->have_ioref		= bch2_dev_get_ioref(ca, READ);
-+		bio_reset(bio);
-+		bio->bi_opf		= REQ_OP_READ|REQ_SYNC|REQ_META;
-+		bio->bi_iter.bi_sector	= rb->pick.ptr.offset;
-+		bio->bi_iter.bi_size	= btree_bytes(c);
-+
-+		if (rb->have_ioref) {
-+			bio_set_dev(bio, ca->disk_sb.bdev);
-+			submit_bio_wait(bio);
-+		} else {
-+			bio->bi_status = BLK_STS_REMOVED;
-+		}
-+start:
-+		bch2_dev_io_err_on(bio->bi_status, ca, "btree read: %s",
-+				   bch2_blk_status_to_str(bio->bi_status));
-+		if (rb->have_ioref)
-+			percpu_ref_put(&ca->io_ref);
-+		rb->have_ioref = false;
-+
-+		bch2_mark_io_failure(&failed, &rb->pick);
-+
-+		can_retry = bch2_bkey_pick_read_device(c,
-+				bkey_i_to_s_c(&b->key),
-+				&failed, &rb->pick) > 0;
-+
-+		if (!bio->bi_status &&
-+		    !bch2_btree_node_read_done(c, b, can_retry))
-+			break;
-+
-+		if (!can_retry) {
-+			set_btree_node_read_error(b);
-+			break;
-+		}
-+	}
-+
-+	bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read],
-+			       rb->start_time);
-+	bio_put(&rb->bio);
-+	clear_btree_node_read_in_flight(b);
-+	wake_up_bit(&b->flags, BTREE_NODE_read_in_flight);
-+}
-+
-+static void btree_node_read_endio(struct bio *bio)
-+{
-+	struct btree_read_bio *rb =
-+		container_of(bio, struct btree_read_bio, bio);
-+	struct bch_fs *c	= rb->c;
-+
-+	if (rb->have_ioref) {
-+		struct bch_dev *ca = bch_dev_bkey_exists(c, rb->pick.ptr.dev);
-+		bch2_latency_acct(ca, rb->start_time, READ);
-+	}
-+
-+	queue_work(system_unbound_wq, &rb->work);
-+}
-+
-+void bch2_btree_node_read(struct bch_fs *c, struct btree *b,
-+			  bool sync)
-+{
-+	struct extent_ptr_decoded pick;
-+	struct btree_read_bio *rb;
-+	struct bch_dev *ca;
-+	struct bio *bio;
-+	int ret;
-+
-+	trace_btree_read(c, b);
-+
-+	ret = bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key),
-+					 NULL, &pick);
-+	if (bch2_fs_fatal_err_on(ret <= 0, c,
-+			"btree node read error: no device to read from")) {
-+		set_btree_node_read_error(b);
-+		return;
-+	}
-+
-+	ca = bch_dev_bkey_exists(c, pick.ptr.dev);
-+
-+	bio = bio_alloc_bioset(GFP_NOIO, buf_pages(b->data,
-+						   btree_bytes(c)),
-+			       &c->btree_bio);
-+	rb = container_of(bio, struct btree_read_bio, bio);
-+	rb->c			= c;
-+	rb->start_time		= local_clock();
-+	rb->have_ioref		= bch2_dev_get_ioref(ca, READ);
-+	rb->pick		= pick;
-+	INIT_WORK(&rb->work, btree_node_read_work);
-+	bio->bi_opf		= REQ_OP_READ|REQ_SYNC|REQ_META;
-+	bio->bi_iter.bi_sector	= pick.ptr.offset;
-+	bio->bi_end_io		= btree_node_read_endio;
-+	bio->bi_private		= b;
-+	bch2_bio_map(bio, b->data, btree_bytes(c));
-+
-+	set_btree_node_read_in_flight(b);
-+
-+	if (rb->have_ioref) {
-+		this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_btree],
-+			     bio_sectors(bio));
-+		bio_set_dev(bio, ca->disk_sb.bdev);
-+
-+		if (sync) {
-+			submit_bio_wait(bio);
-+
-+			bio->bi_private	= b;
-+			btree_node_read_work(&rb->work);
-+		} else {
-+			submit_bio(bio);
-+		}
-+	} else {
-+		bio->bi_status = BLK_STS_REMOVED;
-+
-+		if (sync)
-+			btree_node_read_work(&rb->work);
-+		else
-+			queue_work(system_unbound_wq, &rb->work);
-+
-+	}
-+}
-+
-+int bch2_btree_root_read(struct bch_fs *c, enum btree_id id,
-+			const struct bkey_i *k, unsigned level)
-+{
-+	struct closure cl;
-+	struct btree *b;
-+	int ret;
-+
-+	closure_init_stack(&cl);
-+
-+	do {
-+		ret = bch2_btree_cache_cannibalize_lock(c, &cl);
-+		closure_sync(&cl);
-+	} while (ret);
-+
-+	b = bch2_btree_node_mem_alloc(c);
-+	bch2_btree_cache_cannibalize_unlock(c);
-+
-+	BUG_ON(IS_ERR(b));
-+
-+	bkey_copy(&b->key, k);
-+	BUG_ON(bch2_btree_node_hash_insert(&c->btree_cache, b, level, id));
-+
-+	bch2_btree_node_read(c, b, true);
-+
-+	if (btree_node_read_error(b)) {
-+		bch2_btree_node_hash_remove(&c->btree_cache, b);
-+
-+		mutex_lock(&c->btree_cache.lock);
-+		list_move(&b->list, &c->btree_cache.freeable);
-+		mutex_unlock(&c->btree_cache.lock);
-+
-+		ret = -EIO;
-+		goto err;
-+	}
-+
-+	bch2_btree_set_root_for_read(c, b);
-+err:
-+	six_unlock_write(&b->c.lock);
-+	six_unlock_intent(&b->c.lock);
-+
-+	return ret;
-+}
-+
-+void bch2_btree_complete_write(struct bch_fs *c, struct btree *b,
-+			      struct btree_write *w)
-+{
-+	unsigned long old, new, v = READ_ONCE(b->will_make_reachable);
-+
-+	do {
-+		old = new = v;
-+		if (!(old & 1))
-+			break;
-+
-+		new &= ~1UL;
-+	} while ((v = cmpxchg(&b->will_make_reachable, old, new)) != old);
-+
-+	if (old & 1)
-+		closure_put(&((struct btree_update *) new)->cl);
-+
-+	bch2_journal_pin_drop(&c->journal, &w->journal);
-+}
-+
-+static void btree_node_write_done(struct bch_fs *c, struct btree *b)
-+{
-+	struct btree_write *w = btree_prev_write(b);
-+
-+	bch2_btree_complete_write(c, b, w);
-+	btree_node_io_unlock(b);
-+}
-+
-+static void bch2_btree_node_write_error(struct bch_fs *c,
-+					struct btree_write_bio *wbio)
-+{
-+	struct btree *b		= wbio->wbio.bio.bi_private;
-+	__BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp;
-+	struct bch_extent_ptr *ptr;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = bch2_trans_get_node_iter(&trans, b->c.btree_id, b->key.k.p,
-+					BTREE_MAX_DEPTH, b->c.level, 0);
-+retry:
-+	ret = bch2_btree_iter_traverse(iter);
-+	if (ret)
-+		goto err;
-+
-+	/* has node been freed? */
-+	if (iter->l[b->c.level].b != b) {
-+		/* node has been freed: */
-+		BUG_ON(!btree_node_dying(b));
-+		goto out;
-+	}
-+
-+	BUG_ON(!btree_node_hashed(b));
-+
-+	bkey_copy(&tmp.k, &b->key);
-+
-+	bch2_bkey_drop_ptrs(bkey_i_to_s(&tmp.k), ptr,
-+		bch2_dev_list_has_dev(wbio->wbio.failed, ptr->dev));
-+
-+	if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(&tmp.k)))
-+		goto err;
-+
-+	ret = bch2_btree_node_update_key(c, iter, b, &tmp.k);
-+	if (ret == -EINTR)
-+		goto retry;
-+	if (ret)
-+		goto err;
-+out:
-+	bch2_trans_exit(&trans);
-+	bio_put(&wbio->wbio.bio);
-+	btree_node_write_done(c, b);
-+	return;
-+err:
-+	set_btree_node_noevict(b);
-+	bch2_fs_fatal_error(c, "fatal error writing btree node");
-+	goto out;
-+}
-+
-+void bch2_btree_write_error_work(struct work_struct *work)
-+{
-+	struct bch_fs *c = container_of(work, struct bch_fs,
-+					btree_write_error_work);
-+	struct bio *bio;
-+
-+	while (1) {
-+		spin_lock_irq(&c->btree_write_error_lock);
-+		bio = bio_list_pop(&c->btree_write_error_list);
-+		spin_unlock_irq(&c->btree_write_error_lock);
-+
-+		if (!bio)
-+			break;
-+
-+		bch2_btree_node_write_error(c,
-+			container_of(bio, struct btree_write_bio, wbio.bio));
-+	}
-+}
-+
-+static void btree_node_write_work(struct work_struct *work)
-+{
-+	struct btree_write_bio *wbio =
-+		container_of(work, struct btree_write_bio, work);
-+	struct bch_fs *c	= wbio->wbio.c;
-+	struct btree *b		= wbio->wbio.bio.bi_private;
-+
-+	btree_bounce_free(c,
-+		wbio->bytes,
-+		wbio->wbio.used_mempool,
-+		wbio->data);
-+
-+	if (wbio->wbio.failed.nr) {
-+		unsigned long flags;
-+
-+		spin_lock_irqsave(&c->btree_write_error_lock, flags);
-+		bio_list_add(&c->btree_write_error_list, &wbio->wbio.bio);
-+		spin_unlock_irqrestore(&c->btree_write_error_lock, flags);
-+
-+		queue_work(c->wq, &c->btree_write_error_work);
-+		return;
-+	}
-+
-+	bio_put(&wbio->wbio.bio);
-+	btree_node_write_done(c, b);
-+}
-+
-+static void btree_node_write_endio(struct bio *bio)
-+{
-+	struct bch_write_bio *wbio	= to_wbio(bio);
-+	struct bch_write_bio *parent	= wbio->split ? wbio->parent : NULL;
-+	struct bch_write_bio *orig	= parent ?: wbio;
-+	struct bch_fs *c		= wbio->c;
-+	struct bch_dev *ca		= bch_dev_bkey_exists(c, wbio->dev);
-+	unsigned long flags;
-+
-+	if (wbio->have_ioref)
-+		bch2_latency_acct(ca, wbio->submit_time, WRITE);
-+
-+	if (bch2_dev_io_err_on(bio->bi_status, ca, "btree write: %s",
-+			       bch2_blk_status_to_str(bio->bi_status)) ||
-+	    bch2_meta_write_fault("btree")) {
-+		spin_lock_irqsave(&c->btree_write_error_lock, flags);
-+		bch2_dev_list_add_dev(&orig->failed, wbio->dev);
-+		spin_unlock_irqrestore(&c->btree_write_error_lock, flags);
-+	}
-+
-+	if (wbio->have_ioref)
-+		percpu_ref_put(&ca->io_ref);
-+
-+	if (parent) {
-+		bio_put(bio);
-+		bio_endio(&parent->bio);
-+	} else {
-+		struct btree_write_bio *wb =
-+			container_of(orig, struct btree_write_bio, wbio);
-+
-+		INIT_WORK(&wb->work, btree_node_write_work);
-+		queue_work(system_unbound_wq, &wb->work);
-+	}
-+}
-+
-+static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
-+				   struct bset *i, unsigned sectors)
-+{
-+	unsigned whiteout_u64s = 0;
-+	int ret;
-+
-+	if (bch2_bkey_invalid(c, bkey_i_to_s_c(&b->key), BKEY_TYPE_BTREE))
-+		return -1;
-+
-+	ret = validate_bset(c, b, i, sectors, WRITE, false) ?:
-+		validate_bset_keys(c, b, i, &whiteout_u64s, WRITE, false);
-+	if (ret)
-+		bch2_inconsistent_error(c);
-+
-+	return ret;
-+}
-+
-+void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
-+			    enum six_lock_type lock_type_held)
-+{
-+	struct btree_write_bio *wbio;
-+	struct bset_tree *t;
-+	struct bset *i;
-+	struct btree_node *bn = NULL;
-+	struct btree_node_entry *bne = NULL;
-+	BKEY_PADDED(key) k;
-+	struct bch_extent_ptr *ptr;
-+	struct sort_iter sort_iter;
-+	struct nonce nonce;
-+	unsigned bytes_to_write, sectors_to_write, bytes, u64s;
-+	u64 seq = 0;
-+	bool used_mempool;
-+	unsigned long old, new;
-+	bool validate_before_checksum = false;
-+	void *data;
-+
-+	if (test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags))
-+		return;
-+
-+	/*
-+	 * We may only have a read lock on the btree node - the dirty bit is our
-+	 * "lock" against racing with other threads that may be trying to start
-+	 * a write, we do a write iff we clear the dirty bit. Since setting the
-+	 * dirty bit requires a write lock, we can't race with other threads
-+	 * redirtying it:
-+	 */
-+	do {
-+		old = new = READ_ONCE(b->flags);
-+
-+		if (!(old & (1 << BTREE_NODE_dirty)))
-+			return;
-+
-+		if (!btree_node_may_write(b))
-+			return;
-+
-+		if (old & (1 << BTREE_NODE_write_in_flight)) {
-+			btree_node_wait_on_io(b);
-+			continue;
-+		}
-+
-+		new &= ~(1 << BTREE_NODE_dirty);
-+		new &= ~(1 << BTREE_NODE_need_write);
-+		new |=  (1 << BTREE_NODE_write_in_flight);
-+		new |=  (1 << BTREE_NODE_just_written);
-+		new ^=  (1 << BTREE_NODE_write_idx);
-+	} while (cmpxchg_acquire(&b->flags, old, new) != old);
-+
-+	BUG_ON(btree_node_fake(b));
-+	BUG_ON((b->will_make_reachable != 0) != !b->written);
-+
-+	BUG_ON(b->written >= c->opts.btree_node_size);
-+	BUG_ON(b->written & (c->opts.block_size - 1));
-+	BUG_ON(bset_written(b, btree_bset_last(b)));
-+	BUG_ON(le64_to_cpu(b->data->magic) != bset_magic(c));
-+	BUG_ON(memcmp(&b->data->format, &b->format, sizeof(b->format)));
-+
-+	bch2_sort_whiteouts(c, b);
-+
-+	sort_iter_init(&sort_iter, b);
-+
-+	bytes = !b->written
-+		? sizeof(struct btree_node)
-+		: sizeof(struct btree_node_entry);
-+
-+	bytes += b->whiteout_u64s * sizeof(u64);
-+
-+	for_each_bset(b, t) {
-+		i = bset(b, t);
-+
-+		if (bset_written(b, i))
-+			continue;
-+
-+		bytes += le16_to_cpu(i->u64s) * sizeof(u64);
-+		sort_iter_add(&sort_iter,
-+			      btree_bkey_first(b, t),
-+			      btree_bkey_last(b, t));
-+		seq = max(seq, le64_to_cpu(i->journal_seq));
-+	}
-+
-+	data = btree_bounce_alloc(c, bytes, &used_mempool);
-+
-+	if (!b->written) {
-+		bn = data;
-+		*bn = *b->data;
-+		i = &bn->keys;
-+	} else {
-+		bne = data;
-+		bne->keys = b->data->keys;
-+		i = &bne->keys;
-+	}
-+
-+	i->journal_seq	= cpu_to_le64(seq);
-+	i->u64s		= 0;
-+
-+	if (!btree_node_old_extent_overwrite(b)) {
-+		sort_iter_add(&sort_iter,
-+			      unwritten_whiteouts_start(c, b),
-+			      unwritten_whiteouts_end(c, b));
-+		SET_BSET_SEPARATE_WHITEOUTS(i, false);
-+	} else {
-+		memcpy_u64s(i->start,
-+			    unwritten_whiteouts_start(c, b),
-+			    b->whiteout_u64s);
-+		i->u64s = cpu_to_le16(b->whiteout_u64s);
-+		SET_BSET_SEPARATE_WHITEOUTS(i, true);
-+	}
-+
-+	b->whiteout_u64s = 0;
-+
-+	u64s = btree_node_old_extent_overwrite(b)
-+		? bch2_sort_extents(vstruct_last(i), &sort_iter, false)
-+		: bch2_sort_keys(i->start, &sort_iter, false);
-+	le16_add_cpu(&i->u64s, u64s);
-+
-+	set_needs_whiteout(i, false);
-+
-+	/* do we have data to write? */
-+	if (b->written && !i->u64s)
-+		goto nowrite;
-+
-+	bytes_to_write = vstruct_end(i) - data;
-+	sectors_to_write = round_up(bytes_to_write, block_bytes(c)) >> 9;
-+
-+	memset(data + bytes_to_write, 0,
-+	       (sectors_to_write << 9) - bytes_to_write);
-+
-+	BUG_ON(b->written + sectors_to_write > c->opts.btree_node_size);
-+	BUG_ON(BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN);
-+	BUG_ON(i->seq != b->data->keys.seq);
-+
-+	i->version = c->sb.version < bcachefs_metadata_version_new_versioning
-+		? cpu_to_le16(BCH_BSET_VERSION_OLD)
-+		: cpu_to_le16(c->sb.version);
-+	SET_BSET_CSUM_TYPE(i, bch2_meta_checksum_type(c));
-+
-+	if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i)))
-+		validate_before_checksum = true;
-+
-+	/* validate_bset will be modifying: */
-+	if (le16_to_cpu(i->version) < bcachefs_metadata_version_max)
-+		validate_before_checksum = true;
-+
-+	/* if we're going to be encrypting, check metadata validity first: */
-+	if (validate_before_checksum &&
-+	    validate_bset_for_write(c, b, i, sectors_to_write))
-+		goto err;
-+
-+	bset_encrypt(c, i, b->written << 9);
-+
-+	nonce = btree_nonce(i, b->written << 9);
-+
-+	if (bn)
-+		bn->csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bn);
-+	else
-+		bne->csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
-+
-+	/* if we're not encrypting, check metadata after checksumming: */
-+	if (!validate_before_checksum &&
-+	    validate_bset_for_write(c, b, i, sectors_to_write))
-+		goto err;
-+
-+	/*
-+	 * We handle btree write errors by immediately halting the journal -
-+	 * after we've done that, we can't issue any subsequent btree writes
-+	 * because they might have pointers to new nodes that failed to write.
-+	 *
-+	 * Furthermore, there's no point in doing any more btree writes because
-+	 * with the journal stopped, we're never going to update the journal to
-+	 * reflect that those writes were done and the data flushed from the
-+	 * journal:
-+	 *
-+	 * Also on journal error, the pending write may have updates that were
-+	 * never journalled (interior nodes, see btree_update_nodes_written()) -
-+	 * it's critical that we don't do the write in that case otherwise we
-+	 * will have updates visible that weren't in the journal:
-+	 *
-+	 * Make sure to update b->written so bch2_btree_init_next() doesn't
-+	 * break:
-+	 */
-+	if (bch2_journal_error(&c->journal) ||
-+	    c->opts.nochanges)
-+		goto err;
-+
-+	trace_btree_write(b, bytes_to_write, sectors_to_write);
-+
-+	wbio = container_of(bio_alloc_bioset(GFP_NOIO,
-+				buf_pages(data, sectors_to_write << 9),
-+				&c->btree_bio),
-+			    struct btree_write_bio, wbio.bio);
-+	wbio_init(&wbio->wbio.bio);
-+	wbio->data			= data;
-+	wbio->bytes			= bytes;
-+	wbio->wbio.used_mempool		= used_mempool;
-+	wbio->wbio.bio.bi_opf		= REQ_OP_WRITE|REQ_META;
-+	wbio->wbio.bio.bi_end_io	= btree_node_write_endio;
-+	wbio->wbio.bio.bi_private	= b;
-+
-+	bch2_bio_map(&wbio->wbio.bio, data, sectors_to_write << 9);
-+
-+	/*
-+	 * If we're appending to a leaf node, we don't technically need FUA -
-+	 * this write just needs to be persisted before the next journal write,
-+	 * which will be marked FLUSH|FUA.
-+	 *
-+	 * Similarly if we're writing a new btree root - the pointer is going to
-+	 * be in the next journal entry.
-+	 *
-+	 * But if we're writing a new btree node (that isn't a root) or
-+	 * appending to a non leaf btree node, we need either FUA or a flush
-+	 * when we write the parent with the new pointer. FUA is cheaper than a
-+	 * flush, and writes appending to leaf nodes aren't blocking anything so
-+	 * just make all btree node writes FUA to keep things sane.
-+	 */
-+
-+	bkey_copy(&k.key, &b->key);
-+
-+	bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&k.key)), ptr)
-+		ptr->offset += b->written;
-+
-+	b->written += sectors_to_write;
-+
-+	/* XXX: submitting IO with btree locks held: */
-+	bch2_submit_wbio_replicas(&wbio->wbio, c, BCH_DATA_btree, &k.key);
-+	return;
-+err:
-+	set_btree_node_noevict(b);
-+	b->written += sectors_to_write;
-+nowrite:
-+	btree_bounce_free(c, bytes, used_mempool, data);
-+	btree_node_write_done(c, b);
-+}
-+
-+/*
-+ * Work that must be done with write lock held:
-+ */
-+bool bch2_btree_post_write_cleanup(struct bch_fs *c, struct btree *b)
-+{
-+	bool invalidated_iter = false;
-+	struct btree_node_entry *bne;
-+	struct bset_tree *t;
-+
-+	if (!btree_node_just_written(b))
-+		return false;
-+
-+	BUG_ON(b->whiteout_u64s);
-+
-+	clear_btree_node_just_written(b);
-+
-+	/*
-+	 * Note: immediately after write, bset_written() doesn't work - the
-+	 * amount of data we had to write after compaction might have been
-+	 * smaller than the offset of the last bset.
-+	 *
-+	 * However, we know that all bsets have been written here, as long as
-+	 * we're still holding the write lock:
-+	 */
-+
-+	/*
-+	 * XXX: decide if we really want to unconditionally sort down to a
-+	 * single bset:
-+	 */
-+	if (b->nsets > 1) {
-+		btree_node_sort(c, b, NULL, 0, b->nsets, true);
-+		invalidated_iter = true;
-+	} else {
-+		invalidated_iter = bch2_drop_whiteouts(b, COMPACT_ALL);
-+	}
-+
-+	for_each_bset(b, t)
-+		set_needs_whiteout(bset(b, t), true);
-+
-+	bch2_btree_verify(c, b);
-+
-+	/*
-+	 * If later we don't unconditionally sort down to a single bset, we have
-+	 * to ensure this is still true:
-+	 */
-+	BUG_ON((void *) btree_bkey_last(b, bset_tree_last(b)) > write_block(b));
-+
-+	bne = want_new_bset(c, b);
-+	if (bne)
-+		bch2_bset_init_next(c, b, bne);
-+
-+	bch2_btree_build_aux_trees(b);
-+
-+	return invalidated_iter;
-+}
-+
-+/*
-+ * Use this one if the node is intent locked:
-+ */
-+void bch2_btree_node_write(struct bch_fs *c, struct btree *b,
-+			  enum six_lock_type lock_type_held)
-+{
-+	BUG_ON(lock_type_held == SIX_LOCK_write);
-+
-+	if (lock_type_held == SIX_LOCK_intent ||
-+	    six_lock_tryupgrade(&b->c.lock)) {
-+		__bch2_btree_node_write(c, b, SIX_LOCK_intent);
-+
-+		/* don't cycle lock unnecessarily: */
-+		if (btree_node_just_written(b) &&
-+		    six_trylock_write(&b->c.lock)) {
-+			bch2_btree_post_write_cleanup(c, b);
-+			six_unlock_write(&b->c.lock);
-+		}
-+
-+		if (lock_type_held == SIX_LOCK_read)
-+			six_lock_downgrade(&b->c.lock);
-+	} else {
-+		__bch2_btree_node_write(c, b, SIX_LOCK_read);
-+	}
-+}
-+
-+static void __bch2_btree_flush_all(struct bch_fs *c, unsigned flag)
-+{
-+	struct bucket_table *tbl;
-+	struct rhash_head *pos;
-+	struct btree *b;
-+	unsigned i;
-+restart:
-+	rcu_read_lock();
-+	for_each_cached_btree(b, c, tbl, i, pos)
-+		if (test_bit(flag, &b->flags)) {
-+			rcu_read_unlock();
-+			wait_on_bit_io(&b->flags, flag, TASK_UNINTERRUPTIBLE);
-+			goto restart;
-+
-+		}
-+	rcu_read_unlock();
-+}
-+
-+void bch2_btree_flush_all_reads(struct bch_fs *c)
-+{
-+	__bch2_btree_flush_all(c, BTREE_NODE_read_in_flight);
-+}
-+
-+void bch2_btree_flush_all_writes(struct bch_fs *c)
-+{
-+	__bch2_btree_flush_all(c, BTREE_NODE_write_in_flight);
-+}
-+
-+void bch2_btree_verify_flushed(struct bch_fs *c)
-+{
-+	struct bucket_table *tbl;
-+	struct rhash_head *pos;
-+	struct btree *b;
-+	unsigned i;
-+
-+	rcu_read_lock();
-+	for_each_cached_btree(b, c, tbl, i, pos) {
-+		unsigned long flags = READ_ONCE(b->flags);
-+
-+		BUG_ON((flags & (1 << BTREE_NODE_dirty)) ||
-+		       (flags & (1 << BTREE_NODE_write_in_flight)));
-+	}
-+	rcu_read_unlock();
-+}
-+
-+void bch2_dirty_btree_nodes_to_text(struct printbuf *out, struct bch_fs *c)
-+{
-+	struct bucket_table *tbl;
-+	struct rhash_head *pos;
-+	struct btree *b;
-+	unsigned i;
-+
-+	rcu_read_lock();
-+	for_each_cached_btree(b, c, tbl, i, pos) {
-+		unsigned long flags = READ_ONCE(b->flags);
-+
-+		if (!(flags & (1 << BTREE_NODE_dirty)))
-+			continue;
-+
-+		pr_buf(out, "%p d %u n %u l %u w %u b %u r %u:%lu\n",
-+		       b,
-+		       (flags & (1 << BTREE_NODE_dirty)) != 0,
-+		       (flags & (1 << BTREE_NODE_need_write)) != 0,
-+		       b->c.level,
-+		       b->written,
-+		       !list_empty_careful(&b->write_blocked),
-+		       b->will_make_reachable != 0,
-+		       b->will_make_reachable & 1);
-+	}
-+	rcu_read_unlock();
-+}
-diff --git a/fs/bcachefs/btree_io.h b/fs/bcachefs/btree_io.h
-new file mode 100644
-index 000000000000..626d0f071b70
---- /dev/null
-+++ b/fs/bcachefs/btree_io.h
-@@ -0,0 +1,220 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BTREE_IO_H
-+#define _BCACHEFS_BTREE_IO_H
-+
-+#include "bkey_methods.h"
-+#include "bset.h"
-+#include "btree_locking.h"
-+#include "checksum.h"
-+#include "extents.h"
-+#include "io_types.h"
-+
-+struct bch_fs;
-+struct btree_write;
-+struct btree;
-+struct btree_iter;
-+
-+struct btree_read_bio {
-+	struct bch_fs		*c;
-+	u64			start_time;
-+	unsigned		have_ioref:1;
-+	struct extent_ptr_decoded	pick;
-+	struct work_struct	work;
-+	struct bio		bio;
-+};
-+
-+struct btree_write_bio {
-+	struct work_struct	work;
-+	void			*data;
-+	unsigned		bytes;
-+	struct bch_write_bio	wbio;
-+};
-+
-+static inline void btree_node_io_unlock(struct btree *b)
-+{
-+	EBUG_ON(!btree_node_write_in_flight(b));
-+	clear_btree_node_write_in_flight(b);
-+	wake_up_bit(&b->flags, BTREE_NODE_write_in_flight);
-+}
-+
-+static inline void btree_node_io_lock(struct btree *b)
-+{
-+	wait_on_bit_lock_io(&b->flags, BTREE_NODE_write_in_flight,
-+			    TASK_UNINTERRUPTIBLE);
-+}
-+
-+static inline void btree_node_wait_on_io(struct btree *b)
-+{
-+	wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight,
-+		       TASK_UNINTERRUPTIBLE);
-+}
-+
-+static inline bool btree_node_may_write(struct btree *b)
-+{
-+	return list_empty_careful(&b->write_blocked) &&
-+		(!b->written || !b->will_make_reachable);
-+}
-+
-+enum compact_mode {
-+	COMPACT_LAZY,
-+	COMPACT_ALL,
-+};
-+
-+bool bch2_compact_whiteouts(struct bch_fs *, struct btree *,
-+			    enum compact_mode);
-+
-+static inline bool should_compact_bset_lazy(struct btree *b,
-+					    struct bset_tree *t)
-+{
-+	unsigned total_u64s = bset_u64s(t);
-+	unsigned dead_u64s = bset_dead_u64s(b, t);
-+
-+	return dead_u64s > 64 && dead_u64s * 3 > total_u64s;
-+}
-+
-+static inline bool bch2_maybe_compact_whiteouts(struct bch_fs *c, struct btree *b)
-+{
-+	struct bset_tree *t;
-+
-+	for_each_bset(b, t)
-+		if (should_compact_bset_lazy(b, t))
-+			return bch2_compact_whiteouts(c, b, COMPACT_LAZY);
-+
-+	return false;
-+}
-+
-+static inline struct nonce btree_nonce(struct bset *i, unsigned offset)
-+{
-+	return (struct nonce) {{
-+		[0] = cpu_to_le32(offset),
-+		[1] = ((__le32 *) &i->seq)[0],
-+		[2] = ((__le32 *) &i->seq)[1],
-+		[3] = ((__le32 *) &i->journal_seq)[0]^BCH_NONCE_BTREE,
-+	}};
-+}
-+
-+static inline void bset_encrypt(struct bch_fs *c, struct bset *i, unsigned offset)
-+{
-+	struct nonce nonce = btree_nonce(i, offset);
-+
-+	if (!offset) {
-+		struct btree_node *bn = container_of(i, struct btree_node, keys);
-+		unsigned bytes = (void *) &bn->keys - (void *) &bn->flags;
-+
-+		bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, &bn->flags,
-+			     bytes);
-+
-+		nonce = nonce_add(nonce, round_up(bytes, CHACHA_BLOCK_SIZE));
-+	}
-+
-+	bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, i->_data,
-+		     vstruct_end(i) - (void *) i->_data);
-+}
-+
-+void bch2_btree_sort_into(struct bch_fs *, struct btree *, struct btree *);
-+
-+void bch2_btree_build_aux_trees(struct btree *);
-+void bch2_btree_init_next(struct bch_fs *, struct btree *,
-+			 struct btree_iter *);
-+
-+int bch2_btree_node_read_done(struct bch_fs *, struct btree *, bool);
-+void bch2_btree_node_read(struct bch_fs *, struct btree *, bool);
-+int bch2_btree_root_read(struct bch_fs *, enum btree_id,
-+			 const struct bkey_i *, unsigned);
-+
-+void bch2_btree_complete_write(struct bch_fs *, struct btree *,
-+			      struct btree_write *);
-+void bch2_btree_write_error_work(struct work_struct *);
-+
-+void __bch2_btree_node_write(struct bch_fs *, struct btree *,
-+			    enum six_lock_type);
-+bool bch2_btree_post_write_cleanup(struct bch_fs *, struct btree *);
-+
-+void bch2_btree_node_write(struct bch_fs *, struct btree *,
-+			  enum six_lock_type);
-+
-+static inline void btree_node_write_if_need(struct bch_fs *c, struct btree *b,
-+					    enum six_lock_type lock_held)
-+{
-+	while (b->written &&
-+	       btree_node_need_write(b) &&
-+	       btree_node_may_write(b)) {
-+		if (!btree_node_write_in_flight(b)) {
-+			bch2_btree_node_write(c, b, lock_held);
-+			break;
-+		}
-+
-+		six_unlock_type(&b->c.lock, lock_held);
-+		btree_node_wait_on_io(b);
-+		btree_node_lock_type(c, b, lock_held);
-+	}
-+}
-+
-+#define bch2_btree_node_write_cond(_c, _b, cond)			\
-+do {									\
-+	unsigned long old, new, v = READ_ONCE((_b)->flags);		\
-+									\
-+	do {								\
-+		old = new = v;						\
-+									\
-+		if (!(old & (1 << BTREE_NODE_dirty)) || !(cond))	\
-+			break;						\
-+									\
-+		new |= (1 << BTREE_NODE_need_write);			\
-+	} while ((v = cmpxchg(&(_b)->flags, old, new)) != old);		\
-+									\
-+	btree_node_write_if_need(_c, _b, SIX_LOCK_read);		\
-+} while (0)
-+
-+void bch2_btree_flush_all_reads(struct bch_fs *);
-+void bch2_btree_flush_all_writes(struct bch_fs *);
-+void bch2_btree_verify_flushed(struct bch_fs *);
-+void bch2_dirty_btree_nodes_to_text(struct printbuf *, struct bch_fs *);
-+
-+static inline void compat_bformat(unsigned level, enum btree_id btree_id,
-+				 unsigned version, unsigned big_endian,
-+				 int write, struct bkey_format *f)
-+{
-+	if (version < bcachefs_metadata_version_inode_btree_change &&
-+	    btree_id == BTREE_ID_INODES) {
-+		swap(f->bits_per_field[BKEY_FIELD_INODE],
-+		     f->bits_per_field[BKEY_FIELD_OFFSET]);
-+		swap(f->field_offset[BKEY_FIELD_INODE],
-+		     f->field_offset[BKEY_FIELD_OFFSET]);
-+	}
-+}
-+
-+static inline void compat_bpos(unsigned level, enum btree_id btree_id,
-+			       unsigned version, unsigned big_endian,
-+			       int write, struct bpos *p)
-+{
-+	if (big_endian != CPU_BIG_ENDIAN)
-+		bch2_bpos_swab(p);
-+
-+	if (version < bcachefs_metadata_version_inode_btree_change &&
-+	    btree_id == BTREE_ID_INODES)
-+		swap(p->inode, p->offset);
-+}
-+
-+static inline void compat_btree_node(unsigned level, enum btree_id btree_id,
-+				     unsigned version, unsigned big_endian,
-+				     int write,
-+				     struct btree_node *bn)
-+{
-+	if (version < bcachefs_metadata_version_inode_btree_change &&
-+	    btree_node_type_is_extents(btree_id) &&
-+	    bkey_cmp(bn->min_key, POS_MIN) &&
-+	    write)
-+		bn->min_key = bkey_predecessor(bn->min_key);
-+
-+	compat_bpos(level, btree_id, version, big_endian, write, &bn->min_key);
-+	compat_bpos(level, btree_id, version, big_endian, write, &bn->max_key);
-+
-+	if (version < bcachefs_metadata_version_inode_btree_change &&
-+	    btree_node_type_is_extents(btree_id) &&
-+	    bkey_cmp(bn->min_key, POS_MIN) &&
-+	    !write)
-+		bn->min_key = bkey_successor(bn->min_key);
-+}
-+
-+#endif /* _BCACHEFS_BTREE_IO_H */
-diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
-new file mode 100644
-index 000000000000..6fab76c3220c
---- /dev/null
-+++ b/fs/bcachefs/btree_iter.c
-@@ -0,0 +1,2445 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "bkey_methods.h"
-+#include "btree_cache.h"
-+#include "btree_iter.h"
-+#include "btree_key_cache.h"
-+#include "btree_locking.h"
-+#include "btree_update.h"
-+#include "debug.h"
-+#include "extents.h"
-+#include "journal.h"
-+
-+#include <linux/prefetch.h>
-+#include <trace/events/bcachefs.h>
-+
-+static inline bool is_btree_node(struct btree_iter *iter, unsigned l)
-+{
-+	return l < BTREE_MAX_DEPTH &&
-+		(unsigned long) iter->l[l].b >= 128;
-+}
-+
-+static inline struct bpos btree_iter_search_key(struct btree_iter *iter)
-+{
-+	struct bpos pos = iter->pos;
-+
-+	if ((iter->flags & BTREE_ITER_IS_EXTENTS) &&
-+	    bkey_cmp(pos, POS_MAX))
-+		pos = bkey_successor(pos);
-+	return pos;
-+}
-+
-+static inline bool btree_iter_pos_before_node(struct btree_iter *iter,
-+					      struct btree *b)
-+{
-+	return bkey_cmp(btree_iter_search_key(iter), b->data->min_key) < 0;
-+}
-+
-+static inline bool btree_iter_pos_after_node(struct btree_iter *iter,
-+					     struct btree *b)
-+{
-+	return bkey_cmp(b->key.k.p, btree_iter_search_key(iter)) < 0;
-+}
-+
-+static inline bool btree_iter_pos_in_node(struct btree_iter *iter,
-+					  struct btree *b)
-+{
-+	return iter->btree_id == b->c.btree_id &&
-+		!btree_iter_pos_before_node(iter, b) &&
-+		!btree_iter_pos_after_node(iter, b);
-+}
-+
-+/* Btree node locking: */
-+
-+void bch2_btree_node_unlock_write(struct btree *b, struct btree_iter *iter)
-+{
-+	bch2_btree_node_unlock_write_inlined(b, iter);
-+}
-+
-+void __bch2_btree_node_lock_write(struct btree *b, struct btree_iter *iter)
-+{
-+	struct btree_iter *linked;
-+	unsigned readers = 0;
-+
-+	EBUG_ON(!btree_node_intent_locked(iter, b->c.level));
-+
-+	trans_for_each_iter(iter->trans, linked)
-+		if (linked->l[b->c.level].b == b &&
-+		    btree_node_read_locked(linked, b->c.level))
-+			readers++;
-+
-+	/*
-+	 * Must drop our read locks before calling six_lock_write() -
-+	 * six_unlock() won't do wakeups until the reader count
-+	 * goes to 0, and it's safe because we have the node intent
-+	 * locked:
-+	 */
-+	atomic64_sub(__SIX_VAL(read_lock, readers),
-+		     &b->c.lock.state.counter);
-+	btree_node_lock_type(iter->trans->c, b, SIX_LOCK_write);
-+	atomic64_add(__SIX_VAL(read_lock, readers),
-+		     &b->c.lock.state.counter);
-+}
-+
-+bool __bch2_btree_node_relock(struct btree_iter *iter, unsigned level)
-+{
-+	struct btree *b = btree_iter_node(iter, level);
-+	int want = __btree_lock_want(iter, level);
-+
-+	if (!is_btree_node(iter, level))
-+		return false;
-+
-+	if (race_fault())
-+		return false;
-+
-+	if (six_relock_type(&b->c.lock, want, iter->l[level].lock_seq) ||
-+	    (btree_node_lock_seq_matches(iter, b, level) &&
-+	     btree_node_lock_increment(iter->trans, b, level, want))) {
-+		mark_btree_node_locked(iter, level, want);
-+		return true;
-+	} else {
-+		return false;
-+	}
-+}
-+
-+static bool bch2_btree_node_upgrade(struct btree_iter *iter, unsigned level)
-+{
-+	struct btree *b = iter->l[level].b;
-+
-+	EBUG_ON(btree_lock_want(iter, level) != BTREE_NODE_INTENT_LOCKED);
-+
-+	if (!is_btree_node(iter, level))
-+		return false;
-+
-+	if (btree_node_intent_locked(iter, level))
-+		return true;
-+
-+	if (race_fault())
-+		return false;
-+
-+	if (btree_node_locked(iter, level)
-+	    ? six_lock_tryupgrade(&b->c.lock)
-+	    : six_relock_type(&b->c.lock, SIX_LOCK_intent, iter->l[level].lock_seq))
-+		goto success;
-+
-+	if (btree_node_lock_seq_matches(iter, b, level) &&
-+	    btree_node_lock_increment(iter->trans, b, level, BTREE_NODE_INTENT_LOCKED)) {
-+		btree_node_unlock(iter, level);
-+		goto success;
-+	}
-+
-+	return false;
-+success:
-+	mark_btree_node_intent_locked(iter, level);
-+	return true;
-+}
-+
-+static inline bool btree_iter_get_locks(struct btree_iter *iter,
-+					bool upgrade, bool trace)
-+{
-+	unsigned l = iter->level;
-+	int fail_idx = -1;
-+
-+	do {
-+		if (!btree_iter_node(iter, l))
-+			break;
-+
-+		if (!(upgrade
-+		      ? bch2_btree_node_upgrade(iter, l)
-+		      : bch2_btree_node_relock(iter, l))) {
-+			if (trace)
-+				(upgrade
-+				 ? trace_node_upgrade_fail
-+				 : trace_node_relock_fail)(l, iter->l[l].lock_seq,
-+						is_btree_node(iter, l)
-+						? 0
-+						: (unsigned long) iter->l[l].b,
-+						is_btree_node(iter, l)
-+						? iter->l[l].b->c.lock.state.seq
-+						: 0);
-+
-+			fail_idx = l;
-+			btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
-+		}
-+
-+		l++;
-+	} while (l < iter->locks_want);
-+
-+	/*
-+	 * When we fail to get a lock, we have to ensure that any child nodes
-+	 * can't be relocked so bch2_btree_iter_traverse has to walk back up to
-+	 * the node that we failed to relock:
-+	 */
-+	while (fail_idx >= 0) {
-+		btree_node_unlock(iter, fail_idx);
-+		iter->l[fail_idx].b = BTREE_ITER_NO_NODE_GET_LOCKS;
-+		--fail_idx;
-+	}
-+
-+	if (iter->uptodate == BTREE_ITER_NEED_RELOCK)
-+		iter->uptodate = BTREE_ITER_NEED_PEEK;
-+
-+	bch2_btree_trans_verify_locks(iter->trans);
-+
-+	return iter->uptodate < BTREE_ITER_NEED_RELOCK;
-+}
-+
-+static struct bpos btree_node_pos(struct btree_bkey_cached_common *_b,
-+				  enum btree_iter_type type)
-+{
-+	return  type != BTREE_ITER_CACHED
-+		? container_of(_b, struct btree, c)->key.k.p
-+		: container_of(_b, struct bkey_cached, c)->key.pos;
-+}
-+
-+/* Slowpath: */
-+bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
-+			    unsigned level, struct btree_iter *iter,
-+			    enum six_lock_type type,
-+			    six_lock_should_sleep_fn should_sleep_fn,
-+			    void *p)
-+{
-+	struct btree_trans *trans = iter->trans;
-+	struct btree_iter *linked;
-+	u64 start_time = local_clock();
-+	bool ret = true;
-+
-+	/* Check if it's safe to block: */
-+	trans_for_each_iter(trans, linked) {
-+		if (!linked->nodes_locked)
-+			continue;
-+
-+		/*
-+		 * Can't block taking an intent lock if we have _any_ nodes read
-+		 * locked:
-+		 *
-+		 * - Our read lock blocks another thread with an intent lock on
-+		 *   the same node from getting a write lock, and thus from
-+		 *   dropping its intent lock
-+		 *
-+		 * - And the other thread may have multiple nodes intent locked:
-+		 *   both the node we want to intent lock, and the node we
-+		 *   already have read locked - deadlock:
-+		 */
-+		if (type == SIX_LOCK_intent &&
-+		    linked->nodes_locked != linked->nodes_intent_locked) {
-+			if (!(trans->nounlock)) {
-+				linked->locks_want = max_t(unsigned,
-+						linked->locks_want,
-+						__fls(linked->nodes_locked) + 1);
-+				if (!btree_iter_get_locks(linked, true, false))
-+					ret = false;
-+			} else {
-+				ret = false;
-+			}
-+		}
-+
-+		/*
-+		 * Interior nodes must be locked before their descendants: if
-+		 * another iterator has possible descendants locked of the node
-+		 * we're about to lock, it must have the ancestors locked too:
-+		 */
-+		if (linked->btree_id == iter->btree_id &&
-+		    level > __fls(linked->nodes_locked)) {
-+			if (!(trans->nounlock)) {
-+				linked->locks_want =
-+					max(level + 1, max_t(unsigned,
-+					    linked->locks_want,
-+					    iter->locks_want));
-+				if (!btree_iter_get_locks(linked, true, false))
-+					ret = false;
-+			} else {
-+				ret = false;
-+			}
-+		}
-+
-+		/* Must lock btree nodes in key order: */
-+		if ((cmp_int(iter->btree_id, linked->btree_id) ?:
-+		     -cmp_int(btree_iter_type(iter), btree_iter_type(linked))) < 0)
-+			ret = false;
-+
-+		if (iter->btree_id == linked->btree_id &&
-+		    btree_node_locked(linked, level) &&
-+		    bkey_cmp(pos, btree_node_pos((void *) linked->l[level].b,
-+						 btree_iter_type(linked))) <= 0)
-+			ret = false;
-+
-+		/*
-+		 * Recheck if this is a node we already have locked - since one
-+		 * of the get_locks() calls might've successfully
-+		 * upgraded/relocked it:
-+		 */
-+		if (linked->l[level].b == b &&
-+		    btree_node_locked_type(linked, level) >= type) {
-+			six_lock_increment(&b->c.lock, type);
-+			return true;
-+		}
-+	}
-+
-+	if (unlikely(!ret)) {
-+		trace_trans_restart_would_deadlock(iter->trans->ip);
-+		return false;
-+	}
-+
-+	if (six_trylock_type(&b->c.lock, type))
-+		return true;
-+
-+	if (six_lock_type(&b->c.lock, type, should_sleep_fn, p))
-+		return false;
-+
-+	bch2_time_stats_update(&trans->c->times[lock_to_time_stat(type)],
-+			       start_time);
-+	return true;
-+}
-+
-+/* Btree iterator locking: */
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+static void bch2_btree_iter_verify_locks(struct btree_iter *iter)
-+{
-+	unsigned l;
-+
-+	if (!(iter->trans->iters_linked & (1ULL << iter->idx))) {
-+		BUG_ON(iter->nodes_locked);
-+		return;
-+	}
-+
-+	for (l = 0; is_btree_node(iter, l); l++) {
-+		if (iter->uptodate >= BTREE_ITER_NEED_RELOCK &&
-+		    !btree_node_locked(iter, l))
-+			continue;
-+
-+		BUG_ON(btree_lock_want(iter, l) !=
-+		       btree_node_locked_type(iter, l));
-+	}
-+}
-+
-+void bch2_btree_trans_verify_locks(struct btree_trans *trans)
-+{
-+	struct btree_iter *iter;
-+
-+	trans_for_each_iter_all(trans, iter)
-+		bch2_btree_iter_verify_locks(iter);
-+}
-+#else
-+static inline void bch2_btree_iter_verify_locks(struct btree_iter *iter) {}
-+#endif
-+
-+__flatten
-+bool bch2_btree_iter_relock(struct btree_iter *iter, bool trace)
-+{
-+	return btree_iter_get_locks(iter, false, trace);
-+}
-+
-+bool __bch2_btree_iter_upgrade(struct btree_iter *iter,
-+			       unsigned new_locks_want)
-+{
-+	struct btree_iter *linked;
-+
-+	EBUG_ON(iter->locks_want >= new_locks_want);
-+
-+	iter->locks_want = new_locks_want;
-+
-+	if (btree_iter_get_locks(iter, true, true))
-+		return true;
-+
-+	/*
-+	 * Ancestor nodes must be locked before child nodes, so set locks_want
-+	 * on iterators that might lock ancestors before us to avoid getting
-+	 * -EINTR later:
-+	 */
-+	trans_for_each_iter(iter->trans, linked)
-+		if (linked != iter &&
-+		    linked->btree_id == iter->btree_id &&
-+		    linked->locks_want < new_locks_want) {
-+			linked->locks_want = new_locks_want;
-+			btree_iter_get_locks(linked, true, false);
-+		}
-+
-+	return false;
-+}
-+
-+bool __bch2_btree_iter_upgrade_nounlock(struct btree_iter *iter,
-+					unsigned new_locks_want)
-+{
-+	unsigned l = iter->level;
-+
-+	EBUG_ON(iter->locks_want >= new_locks_want);
-+
-+	iter->locks_want = new_locks_want;
-+
-+	do {
-+		if (!btree_iter_node(iter, l))
-+			break;
-+
-+		if (!bch2_btree_node_upgrade(iter, l)) {
-+			iter->locks_want = l;
-+			return false;
-+		}
-+
-+		l++;
-+	} while (l < iter->locks_want);
-+
-+	return true;
-+}
-+
-+void __bch2_btree_iter_downgrade(struct btree_iter *iter,
-+				 unsigned downgrade_to)
-+{
-+	unsigned l, new_locks_want = downgrade_to ?:
-+		(iter->flags & BTREE_ITER_INTENT ? 1 : 0);
-+
-+	if (iter->locks_want < downgrade_to) {
-+		iter->locks_want = new_locks_want;
-+
-+		while (iter->nodes_locked &&
-+		       (l = __fls(iter->nodes_locked)) >= iter->locks_want) {
-+			if (l > iter->level) {
-+				btree_node_unlock(iter, l);
-+			} else {
-+				if (btree_node_intent_locked(iter, l)) {
-+					six_lock_downgrade(&iter->l[l].b->c.lock);
-+					iter->nodes_intent_locked ^= 1 << l;
-+				}
-+				break;
-+			}
-+		}
-+	}
-+
-+	bch2_btree_trans_verify_locks(iter->trans);
-+}
-+
-+void bch2_trans_downgrade(struct btree_trans *trans)
-+{
-+	struct btree_iter *iter;
-+
-+	trans_for_each_iter(trans, iter)
-+		bch2_btree_iter_downgrade(iter);
-+}
-+
-+/* Btree transaction locking: */
-+
-+bool bch2_trans_relock(struct btree_trans *trans)
-+{
-+	struct btree_iter *iter;
-+	bool ret = true;
-+
-+	trans_for_each_iter(trans, iter)
-+		if (iter->uptodate == BTREE_ITER_NEED_RELOCK)
-+			ret &= bch2_btree_iter_relock(iter, true);
-+
-+	return ret;
-+}
-+
-+void bch2_trans_unlock(struct btree_trans *trans)
-+{
-+	struct btree_iter *iter;
-+
-+	trans_for_each_iter(trans, iter)
-+		__bch2_btree_iter_unlock(iter);
-+}
-+
-+/* Btree iterator: */
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+
-+static void bch2_btree_iter_verify_cached(struct btree_iter *iter)
-+{
-+	struct bkey_cached *ck;
-+	bool locked = btree_node_locked(iter, 0);
-+
-+	if (!bch2_btree_node_relock(iter, 0))
-+		return;
-+
-+	ck = (void *) iter->l[0].b;
-+	BUG_ON(ck->key.btree_id != iter->btree_id ||
-+	       bkey_cmp(ck->key.pos, iter->pos));
-+
-+	if (!locked)
-+		btree_node_unlock(iter, 0);
-+}
-+
-+static void bch2_btree_iter_verify_level(struct btree_iter *iter,
-+					 unsigned level)
-+{
-+	struct bpos pos = btree_iter_search_key(iter);
-+	struct btree_iter_level *l = &iter->l[level];
-+	struct btree_node_iter tmp = l->iter;
-+	bool locked = btree_node_locked(iter, level);
-+	struct bkey_packed *p, *k;
-+	char buf1[100], buf2[100];
-+	const char *msg;
-+
-+	if (!debug_check_iterators(iter->trans->c))
-+		return;
-+
-+	if (btree_iter_type(iter) == BTREE_ITER_CACHED) {
-+		if (!level)
-+			bch2_btree_iter_verify_cached(iter);
-+		return;
-+	}
-+
-+	BUG_ON(iter->level < iter->min_depth);
-+
-+	if (!btree_iter_node(iter, level))
-+		return;
-+
-+	if (!bch2_btree_node_relock(iter, level))
-+		return;
-+
-+	/*
-+	 * Ideally this invariant would always be true, and hopefully in the
-+	 * future it will be, but for now set_pos_same_leaf() breaks it:
-+	 */
-+	BUG_ON(iter->uptodate < BTREE_ITER_NEED_TRAVERSE &&
-+	       !btree_iter_pos_in_node(iter, l->b));
-+
-+	/*
-+	 * node iterators don't use leaf node iterator:
-+	 */
-+	if (btree_iter_type(iter) == BTREE_ITER_NODES &&
-+	    level <= iter->min_depth)
-+		goto unlock;
-+
-+	bch2_btree_node_iter_verify(&l->iter, l->b);
-+
-+	/*
-+	 * For interior nodes, the iterator will have skipped past
-+	 * deleted keys:
-+	 *
-+	 * For extents, the iterator may have skipped past deleted keys (but not
-+	 * whiteouts)
-+	 */
-+	p = level || btree_node_type_is_extents(iter->btree_id)
-+		? bch2_btree_node_iter_prev_filter(&tmp, l->b, KEY_TYPE_discard)
-+		: bch2_btree_node_iter_prev_all(&tmp, l->b);
-+	k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
-+
-+	if (p && bkey_iter_pos_cmp(l->b, p, &pos) >= 0) {
-+		msg = "before";
-+		goto err;
-+	}
-+
-+	if (k && bkey_iter_pos_cmp(l->b, k, &pos) < 0) {
-+		msg = "after";
-+		goto err;
-+	}
-+unlock:
-+	if (!locked)
-+		btree_node_unlock(iter, level);
-+	return;
-+err:
-+	strcpy(buf1, "(none)");
-+	strcpy(buf2, "(none)");
-+
-+	if (p) {
-+		struct bkey uk = bkey_unpack_key(l->b, p);
-+		bch2_bkey_to_text(&PBUF(buf1), &uk);
-+	}
-+
-+	if (k) {
-+		struct bkey uk = bkey_unpack_key(l->b, k);
-+		bch2_bkey_to_text(&PBUF(buf2), &uk);
-+	}
-+
-+	panic("iterator should be %s key at level %u:\n"
-+	      "iter pos %s %llu:%llu\n"
-+	      "prev key %s\n"
-+	      "cur  key %s\n",
-+	      msg, level,
-+	      iter->flags & BTREE_ITER_IS_EXTENTS ? ">" : "=>",
-+	      iter->pos.inode, iter->pos.offset,
-+	      buf1, buf2);
-+}
-+
-+static void bch2_btree_iter_verify(struct btree_iter *iter)
-+{
-+	unsigned i;
-+
-+	bch2_btree_trans_verify_locks(iter->trans);
-+
-+	for (i = 0; i < BTREE_MAX_DEPTH; i++)
-+		bch2_btree_iter_verify_level(iter, i);
-+}
-+
-+void bch2_btree_trans_verify_iters(struct btree_trans *trans, struct btree *b)
-+{
-+	struct btree_iter *iter;
-+
-+	if (!debug_check_iterators(trans->c))
-+		return;
-+
-+	trans_for_each_iter_with_node(trans, b, iter)
-+		bch2_btree_iter_verify_level(iter, b->c.level);
-+}
-+
-+#else
-+
-+static inline void bch2_btree_iter_verify_level(struct btree_iter *iter, unsigned l) {}
-+static inline void bch2_btree_iter_verify(struct btree_iter *iter) {}
-+
-+#endif
-+
-+static void btree_node_iter_set_set_pos(struct btree_node_iter *iter,
-+					struct btree *b,
-+					struct bset_tree *t,
-+					struct bkey_packed *k)
-+{
-+	struct btree_node_iter_set *set;
-+
-+	btree_node_iter_for_each(iter, set)
-+		if (set->end == t->end_offset) {
-+			set->k = __btree_node_key_to_offset(b, k);
-+			bch2_btree_node_iter_sort(iter, b);
-+			return;
-+		}
-+
-+	bch2_btree_node_iter_push(iter, b, k, btree_bkey_last(b, t));
-+}
-+
-+static void __bch2_btree_iter_fix_key_modified(struct btree_iter *iter,
-+					       struct btree *b,
-+					       struct bkey_packed *where)
-+{
-+	struct btree_iter_level *l = &iter->l[b->c.level];
-+	struct bpos pos = btree_iter_search_key(iter);
-+
-+	if (where != bch2_btree_node_iter_peek_all(&l->iter, l->b))
-+		return;
-+
-+	if (bkey_iter_pos_cmp(l->b, where, &pos) < 0)
-+		bch2_btree_node_iter_advance(&l->iter, l->b);
-+
-+	btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
-+}
-+
-+void bch2_btree_iter_fix_key_modified(struct btree_iter *iter,
-+				      struct btree *b,
-+				      struct bkey_packed *where)
-+{
-+	struct btree_iter *linked;
-+
-+	trans_for_each_iter_with_node(iter->trans, b, linked) {
-+		__bch2_btree_iter_fix_key_modified(linked, b, where);
-+		bch2_btree_iter_verify_level(linked, b->c.level);
-+	}
-+}
-+
-+static void __bch2_btree_node_iter_fix(struct btree_iter *iter,
-+				      struct btree *b,
-+				      struct btree_node_iter *node_iter,
-+				      struct bset_tree *t,
-+				      struct bkey_packed *where,
-+				      unsigned clobber_u64s,
-+				      unsigned new_u64s)
-+{
-+	const struct bkey_packed *end = btree_bkey_last(b, t);
-+	struct btree_node_iter_set *set;
-+	unsigned offset = __btree_node_key_to_offset(b, where);
-+	int shift = new_u64s - clobber_u64s;
-+	unsigned old_end = t->end_offset - shift;
-+	unsigned orig_iter_pos = node_iter->data[0].k;
-+	bool iter_current_key_modified =
-+		orig_iter_pos >= offset &&
-+		orig_iter_pos <= offset + clobber_u64s;
-+	struct bpos iter_pos = btree_iter_search_key(iter);
-+
-+	btree_node_iter_for_each(node_iter, set)
-+		if (set->end == old_end)
-+			goto found;
-+
-+	/* didn't find the bset in the iterator - might have to readd it: */
-+	if (new_u64s &&
-+	    bkey_iter_pos_cmp(b, where, &iter_pos) >= 0) {
-+		bch2_btree_node_iter_push(node_iter, b, where, end);
-+		goto fixup_done;
-+	} else {
-+		/* Iterator is after key that changed */
-+		return;
-+	}
-+found:
-+	set->end = t->end_offset;
-+
-+	/* Iterator hasn't gotten to the key that changed yet: */
-+	if (set->k < offset)
-+		return;
-+
-+	if (new_u64s &&
-+	    bkey_iter_pos_cmp(b, where, &iter_pos) >= 0) {
-+		set->k = offset;
-+	} else if (set->k < offset + clobber_u64s) {
-+		set->k = offset + new_u64s;
-+		if (set->k == set->end)
-+			bch2_btree_node_iter_set_drop(node_iter, set);
-+	} else {
-+		/* Iterator is after key that changed */
-+		set->k = (int) set->k + shift;
-+		return;
-+	}
-+
-+	bch2_btree_node_iter_sort(node_iter, b);
-+fixup_done:
-+	if (node_iter->data[0].k != orig_iter_pos)
-+		iter_current_key_modified = true;
-+
-+	/*
-+	 * When a new key is added, and the node iterator now points to that
-+	 * key, the iterator might have skipped past deleted keys that should
-+	 * come after the key the iterator now points to. We have to rewind to
-+	 * before those deleted keys - otherwise
-+	 * bch2_btree_node_iter_prev_all() breaks:
-+	 */
-+	if (!bch2_btree_node_iter_end(node_iter) &&
-+	    iter_current_key_modified &&
-+	    (b->c.level ||
-+	     btree_node_type_is_extents(iter->btree_id))) {
-+		struct bset_tree *t;
-+		struct bkey_packed *k, *k2, *p;
-+
-+		k = bch2_btree_node_iter_peek_all(node_iter, b);
-+
-+		for_each_bset(b, t) {
-+			bool set_pos = false;
-+
-+			if (node_iter->data[0].end == t->end_offset)
-+				continue;
-+
-+			k2 = bch2_btree_node_iter_bset_pos(node_iter, b, t);
-+
-+			while ((p = bch2_bkey_prev_all(b, t, k2)) &&
-+			       bkey_iter_cmp(b, k, p) < 0) {
-+				k2 = p;
-+				set_pos = true;
-+			}
-+
-+			if (set_pos)
-+				btree_node_iter_set_set_pos(node_iter,
-+							    b, t, k2);
-+		}
-+	}
-+
-+	if (!b->c.level &&
-+	    node_iter == &iter->l[0].iter &&
-+	    iter_current_key_modified)
-+		btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
-+}
-+
-+void bch2_btree_node_iter_fix(struct btree_iter *iter,
-+			      struct btree *b,
-+			      struct btree_node_iter *node_iter,
-+			      struct bkey_packed *where,
-+			      unsigned clobber_u64s,
-+			      unsigned new_u64s)
-+{
-+	struct bset_tree *t = bch2_bkey_to_bset(b, where);
-+	struct btree_iter *linked;
-+
-+	if (node_iter != &iter->l[b->c.level].iter) {
-+		__bch2_btree_node_iter_fix(iter, b, node_iter, t,
-+					   where, clobber_u64s, new_u64s);
-+
-+		if (debug_check_iterators(iter->trans->c))
-+			bch2_btree_node_iter_verify(node_iter, b);
-+	}
-+
-+	trans_for_each_iter_with_node(iter->trans, b, linked) {
-+		__bch2_btree_node_iter_fix(linked, b,
-+					   &linked->l[b->c.level].iter, t,
-+					   where, clobber_u64s, new_u64s);
-+		bch2_btree_iter_verify_level(linked, b->c.level);
-+	}
-+}
-+
-+static inline struct bkey_s_c __btree_iter_unpack(struct btree_iter *iter,
-+						  struct btree_iter_level *l,
-+						  struct bkey *u,
-+						  struct bkey_packed *k)
-+{
-+	struct bkey_s_c ret;
-+
-+	if (unlikely(!k)) {
-+		/*
-+		 * signal to bch2_btree_iter_peek_slot() that we're currently at
-+		 * a hole
-+		 */
-+		u->type = KEY_TYPE_deleted;
-+		return bkey_s_c_null;
-+	}
-+
-+	ret = bkey_disassemble(l->b, k, u);
-+
-+	if (debug_check_bkeys(iter->trans->c))
-+		bch2_bkey_debugcheck(iter->trans->c, l->b, ret);
-+
-+	return ret;
-+}
-+
-+/* peek_all() doesn't skip deleted keys */
-+static inline struct bkey_s_c __btree_iter_peek_all(struct btree_iter *iter,
-+						    struct btree_iter_level *l,
-+						    struct bkey *u)
-+{
-+	return __btree_iter_unpack(iter, l, u,
-+			bch2_btree_node_iter_peek_all(&l->iter, l->b));
-+}
-+
-+static inline struct bkey_s_c __btree_iter_peek(struct btree_iter *iter,
-+						struct btree_iter_level *l)
-+{
-+	return __btree_iter_unpack(iter, l, &iter->k,
-+			bch2_btree_node_iter_peek(&l->iter, l->b));
-+}
-+
-+static inline struct bkey_s_c __btree_iter_prev(struct btree_iter *iter,
-+						struct btree_iter_level *l)
-+{
-+	return __btree_iter_unpack(iter, l, &iter->k,
-+			bch2_btree_node_iter_prev(&l->iter, l->b));
-+}
-+
-+static inline bool btree_iter_advance_to_pos(struct btree_iter *iter,
-+					     struct btree_iter_level *l,
-+					     int max_advance)
-+{
-+	struct bpos pos = btree_iter_search_key(iter);
-+	struct bkey_packed *k;
-+	int nr_advanced = 0;
-+
-+	while ((k = bch2_btree_node_iter_peek_all(&l->iter, l->b)) &&
-+	       bkey_iter_pos_cmp(l->b, k, &pos) < 0) {
-+		if (max_advance > 0 && nr_advanced >= max_advance)
-+			return false;
-+
-+		bch2_btree_node_iter_advance(&l->iter, l->b);
-+		nr_advanced++;
-+	}
-+
-+	return true;
-+}
-+
-+/*
-+ * Verify that iterator for parent node points to child node:
-+ */
-+static void btree_iter_verify_new_node(struct btree_iter *iter, struct btree *b)
-+{
-+	struct btree_iter_level *l;
-+	unsigned plevel;
-+	bool parent_locked;
-+	struct bkey_packed *k;
-+
-+	if (!IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
-+		return;
-+
-+	plevel = b->c.level + 1;
-+	if (!btree_iter_node(iter, plevel))
-+		return;
-+
-+	parent_locked = btree_node_locked(iter, plevel);
-+
-+	if (!bch2_btree_node_relock(iter, plevel))
-+		return;
-+
-+	l = &iter->l[plevel];
-+	k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
-+	if (!k ||
-+	    bkey_deleted(k) ||
-+	    bkey_cmp_left_packed(l->b, k, &b->key.k.p)) {
-+		char buf[100];
-+		struct bkey uk = bkey_unpack_key(b, k);
-+
-+		bch2_bkey_to_text(&PBUF(buf), &uk);
-+		panic("parent iter doesn't point to new node:\n%s\n%llu:%llu\n",
-+		      buf, b->key.k.p.inode, b->key.k.p.offset);
-+	}
-+
-+	if (!parent_locked)
-+		btree_node_unlock(iter, b->c.level + 1);
-+}
-+
-+static inline void __btree_iter_init(struct btree_iter *iter,
-+				     unsigned level)
-+{
-+	struct bpos pos = btree_iter_search_key(iter);
-+	struct btree_iter_level *l = &iter->l[level];
-+
-+	bch2_btree_node_iter_init(&l->iter, l->b, &pos);
-+
-+	btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
-+}
-+
-+static inline void btree_iter_node_set(struct btree_iter *iter,
-+				       struct btree *b)
-+{
-+	BUG_ON(btree_iter_type(iter) == BTREE_ITER_CACHED);
-+
-+	btree_iter_verify_new_node(iter, b);
-+
-+	EBUG_ON(!btree_iter_pos_in_node(iter, b));
-+	EBUG_ON(b->c.lock.state.seq & 1);
-+
-+	iter->l[b->c.level].lock_seq = b->c.lock.state.seq;
-+	iter->l[b->c.level].b = b;
-+	__btree_iter_init(iter, b->c.level);
-+}
-+
-+/*
-+ * A btree node is being replaced - update the iterator to point to the new
-+ * node:
-+ */
-+void bch2_btree_iter_node_replace(struct btree_iter *iter, struct btree *b)
-+{
-+	enum btree_node_locked_type t;
-+	struct btree_iter *linked;
-+
-+	trans_for_each_iter(iter->trans, linked)
-+		if (btree_iter_type(linked) != BTREE_ITER_CACHED &&
-+		    btree_iter_pos_in_node(linked, b)) {
-+			/*
-+			 * bch2_btree_iter_node_drop() has already been called -
-+			 * the old node we're replacing has already been
-+			 * unlocked and the pointer invalidated
-+			 */
-+			BUG_ON(btree_node_locked(linked, b->c.level));
-+
-+			t = btree_lock_want(linked, b->c.level);
-+			if (t != BTREE_NODE_UNLOCKED) {
-+				six_lock_increment(&b->c.lock, t);
-+				mark_btree_node_locked(linked, b->c.level, t);
-+			}
-+
-+			btree_iter_node_set(linked, b);
-+		}
-+}
-+
-+void bch2_btree_iter_node_drop(struct btree_iter *iter, struct btree *b)
-+{
-+	struct btree_iter *linked;
-+	unsigned level = b->c.level;
-+
-+	trans_for_each_iter(iter->trans, linked)
-+		if (linked->l[level].b == b) {
-+			__btree_node_unlock(linked, level);
-+			linked->l[level].b = BTREE_ITER_NO_NODE_DROP;
-+		}
-+}
-+
-+/*
-+ * A btree node has been modified in such a way as to invalidate iterators - fix
-+ * them:
-+ */
-+void bch2_btree_iter_reinit_node(struct btree_iter *iter, struct btree *b)
-+{
-+	struct btree_iter *linked;
-+
-+	trans_for_each_iter_with_node(iter->trans, b, linked)
-+		__btree_iter_init(linked, b->c.level);
-+}
-+
-+static int lock_root_check_fn(struct six_lock *lock, void *p)
-+{
-+	struct btree *b = container_of(lock, struct btree, c.lock);
-+	struct btree **rootp = p;
-+
-+	return b == *rootp ? 0 : -1;
-+}
-+
-+static inline int btree_iter_lock_root(struct btree_iter *iter,
-+				       unsigned depth_want)
-+{
-+	struct bch_fs *c = iter->trans->c;
-+	struct btree *b, **rootp = &c->btree_roots[iter->btree_id].b;
-+	enum six_lock_type lock_type;
-+	unsigned i;
-+
-+	EBUG_ON(iter->nodes_locked);
-+
-+	while (1) {
-+		b = READ_ONCE(*rootp);
-+		iter->level = READ_ONCE(b->c.level);
-+
-+		if (unlikely(iter->level < depth_want)) {
-+			/*
-+			 * the root is at a lower depth than the depth we want:
-+			 * got to the end of the btree, or we're walking nodes
-+			 * greater than some depth and there are no nodes >=
-+			 * that depth
-+			 */
-+			iter->level = depth_want;
-+			for (i = iter->level; i < BTREE_MAX_DEPTH; i++)
-+				iter->l[i].b = NULL;
-+			return 1;
-+		}
-+
-+		lock_type = __btree_lock_want(iter, iter->level);
-+		if (unlikely(!btree_node_lock(b, POS_MAX, iter->level,
-+					      iter, lock_type,
-+					      lock_root_check_fn, rootp)))
-+			return -EINTR;
-+
-+		if (likely(b == READ_ONCE(*rootp) &&
-+			   b->c.level == iter->level &&
-+			   !race_fault())) {
-+			for (i = 0; i < iter->level; i++)
-+				iter->l[i].b = BTREE_ITER_NO_NODE_LOCK_ROOT;
-+			iter->l[iter->level].b = b;
-+			for (i = iter->level + 1; i < BTREE_MAX_DEPTH; i++)
-+				iter->l[i].b = NULL;
-+
-+			mark_btree_node_locked(iter, iter->level, lock_type);
-+			btree_iter_node_set(iter, b);
-+			return 0;
-+		}
-+
-+		six_unlock_type(&b->c.lock, lock_type);
-+	}
-+}
-+
-+noinline
-+static void btree_iter_prefetch(struct btree_iter *iter)
-+{
-+	struct bch_fs *c = iter->trans->c;
-+	struct btree_iter_level *l = &iter->l[iter->level];
-+	struct btree_node_iter node_iter = l->iter;
-+	struct bkey_packed *k;
-+	BKEY_PADDED(k) tmp;
-+	unsigned nr = test_bit(BCH_FS_STARTED, &c->flags)
-+		? (iter->level > 1 ? 0 :  2)
-+		: (iter->level > 1 ? 1 : 16);
-+	bool was_locked = btree_node_locked(iter, iter->level);
-+
-+	while (nr) {
-+		if (!bch2_btree_node_relock(iter, iter->level))
-+			return;
-+
-+		bch2_btree_node_iter_advance(&node_iter, l->b);
-+		k = bch2_btree_node_iter_peek(&node_iter, l->b);
-+		if (!k)
-+			break;
-+
-+		bch2_bkey_unpack(l->b, &tmp.k, k);
-+		bch2_btree_node_prefetch(c, iter, &tmp.k, iter->level - 1);
-+	}
-+
-+	if (!was_locked)
-+		btree_node_unlock(iter, iter->level);
-+}
-+
-+static noinline void btree_node_mem_ptr_set(struct btree_iter *iter,
-+					    unsigned plevel, struct btree *b)
-+{
-+	struct btree_iter_level *l = &iter->l[plevel];
-+	bool locked = btree_node_locked(iter, plevel);
-+	struct bkey_packed *k;
-+	struct bch_btree_ptr_v2 *bp;
-+
-+	if (!bch2_btree_node_relock(iter, plevel))
-+		return;
-+
-+	k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
-+	BUG_ON(k->type != KEY_TYPE_btree_ptr_v2);
-+
-+	bp = (void *) bkeyp_val(&l->b->format, k);
-+	bp->mem_ptr = (unsigned long)b;
-+
-+	if (!locked)
-+		btree_node_unlock(iter, plevel);
-+}
-+
-+static __always_inline int btree_iter_down(struct btree_iter *iter)
-+{
-+	struct bch_fs *c = iter->trans->c;
-+	struct btree_iter_level *l = &iter->l[iter->level];
-+	struct btree *b;
-+	unsigned level = iter->level - 1;
-+	enum six_lock_type lock_type = __btree_lock_want(iter, level);
-+	BKEY_PADDED(k) tmp;
-+
-+	EBUG_ON(!btree_node_locked(iter, iter->level));
-+
-+	bch2_bkey_unpack(l->b, &tmp.k,
-+			 bch2_btree_node_iter_peek(&l->iter, l->b));
-+
-+	b = bch2_btree_node_get(c, iter, &tmp.k, level, lock_type);
-+	if (unlikely(IS_ERR(b)))
-+		return PTR_ERR(b);
-+
-+	mark_btree_node_locked(iter, level, lock_type);
-+	btree_iter_node_set(iter, b);
-+
-+	if (tmp.k.k.type == KEY_TYPE_btree_ptr_v2 &&
-+	    unlikely(b != btree_node_mem_ptr(&tmp.k)))
-+		btree_node_mem_ptr_set(iter, level + 1, b);
-+
-+	if (iter->flags & BTREE_ITER_PREFETCH)
-+		btree_iter_prefetch(iter);
-+
-+	iter->level = level;
-+
-+	return 0;
-+}
-+
-+static void btree_iter_up(struct btree_iter *iter)
-+{
-+	btree_node_unlock(iter, iter->level++);
-+}
-+
-+static int btree_iter_traverse_one(struct btree_iter *);
-+
-+static int __btree_iter_traverse_all(struct btree_trans *trans, int ret)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct btree_iter *iter;
-+	u8 sorted[BTREE_ITER_MAX];
-+	unsigned i, nr_sorted = 0;
-+
-+	if (trans->in_traverse_all)
-+		return -EINTR;
-+
-+	trans->in_traverse_all = true;
-+retry_all:
-+	nr_sorted = 0;
-+
-+	trans_for_each_iter(trans, iter)
-+		sorted[nr_sorted++] = iter->idx;
-+
-+#define btree_iter_cmp_by_idx(_l, _r)				\
-+		btree_iter_cmp(&trans->iters[_l], &trans->iters[_r])
-+
-+	bubble_sort(sorted, nr_sorted, btree_iter_cmp_by_idx);
-+#undef btree_iter_cmp_by_idx
-+	bch2_trans_unlock(trans);
-+
-+	if (unlikely(ret == -ENOMEM)) {
-+		struct closure cl;
-+
-+		closure_init_stack(&cl);
-+
-+		do {
-+			ret = bch2_btree_cache_cannibalize_lock(c, &cl);
-+			closure_sync(&cl);
-+		} while (ret);
-+	}
-+
-+	if (unlikely(ret == -EIO)) {
-+		trans->error = true;
-+		goto out;
-+	}
-+
-+	BUG_ON(ret && ret != -EINTR);
-+
-+	/* Now, redo traversals in correct order: */
-+	for (i = 0; i < nr_sorted; i++) {
-+		unsigned idx = sorted[i];
-+
-+		/*
-+		 * sucessfully traversing one iterator can cause another to be
-+		 * unlinked, in btree_key_cache_fill()
-+		 */
-+		if (!(trans->iters_linked & (1ULL << idx)))
-+			continue;
-+
-+		ret = btree_iter_traverse_one(&trans->iters[idx]);
-+		if (ret)
-+			goto retry_all;
-+	}
-+
-+	if (hweight64(trans->iters_live) > 1)
-+		ret = -EINTR;
-+	else
-+		trans_for_each_iter(trans, iter)
-+			if (iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT) {
-+				ret = -EINTR;
-+				break;
-+			}
-+out:
-+	bch2_btree_cache_cannibalize_unlock(c);
-+
-+	trans->in_traverse_all = false;
-+	return ret;
-+}
-+
-+int bch2_btree_iter_traverse_all(struct btree_trans *trans)
-+{
-+	return __btree_iter_traverse_all(trans, 0);
-+}
-+
-+static inline bool btree_iter_good_node(struct btree_iter *iter,
-+					unsigned l, int check_pos)
-+{
-+	if (!is_btree_node(iter, l) ||
-+	    !bch2_btree_node_relock(iter, l))
-+		return false;
-+
-+	if (check_pos <= 0 && btree_iter_pos_before_node(iter, iter->l[l].b))
-+		return false;
-+	if (check_pos >= 0 && btree_iter_pos_after_node(iter, iter->l[l].b))
-+		return false;
-+	return true;
-+}
-+
-+static inline unsigned btree_iter_up_until_good_node(struct btree_iter *iter,
-+						     int check_pos)
-+{
-+	unsigned l = iter->level;
-+
-+	while (btree_iter_node(iter, l) &&
-+	       !btree_iter_good_node(iter, l, check_pos)) {
-+		btree_node_unlock(iter, l);
-+		iter->l[l].b = BTREE_ITER_NO_NODE_UP;
-+		l++;
-+	}
-+
-+	return l;
-+}
-+
-+/*
-+ * This is the main state machine for walking down the btree - walks down to a
-+ * specified depth
-+ *
-+ * Returns 0 on success, -EIO on error (error reading in a btree node).
-+ *
-+ * On error, caller (peek_node()/peek_key()) must return NULL; the error is
-+ * stashed in the iterator and returned from bch2_trans_exit().
-+ */
-+static int btree_iter_traverse_one(struct btree_iter *iter)
-+{
-+	unsigned depth_want = iter->level;
-+
-+	/*
-+	 * if we need interior nodes locked, call btree_iter_relock() to make
-+	 * sure we walk back up enough that we lock them:
-+	 */
-+	if (iter->uptodate == BTREE_ITER_NEED_RELOCK ||
-+	    iter->locks_want > 1)
-+		bch2_btree_iter_relock(iter, false);
-+
-+	if (btree_iter_type(iter) == BTREE_ITER_CACHED)
-+		return bch2_btree_iter_traverse_cached(iter);
-+
-+	if (iter->uptodate < BTREE_ITER_NEED_RELOCK)
-+		return 0;
-+
-+	if (unlikely(iter->level >= BTREE_MAX_DEPTH))
-+		return 0;
-+
-+	/*
-+	 * XXX: correctly using BTREE_ITER_UPTODATE should make using check_pos
-+	 * here unnecessary
-+	 */
-+	iter->level = btree_iter_up_until_good_node(iter, 0);
-+
-+	/*
-+	 * If we've got a btree node locked (i.e. we aren't about to relock the
-+	 * root) - advance its node iterator if necessary:
-+	 *
-+	 * XXX correctly using BTREE_ITER_UPTODATE should make this unnecessary
-+	 */
-+	if (is_btree_node(iter, iter->level)) {
-+		BUG_ON(!btree_iter_pos_in_node(iter, iter->l[iter->level].b));
-+
-+		btree_iter_advance_to_pos(iter, &iter->l[iter->level], -1);
-+	}
-+
-+	/*
-+	 * Note: iter->nodes[iter->level] may be temporarily NULL here - that
-+	 * would indicate to other code that we got to the end of the btree,
-+	 * here it indicates that relocking the root failed - it's critical that
-+	 * btree_iter_lock_root() comes next and that it can't fail
-+	 */
-+	while (iter->level > depth_want) {
-+		int ret = btree_iter_node(iter, iter->level)
-+			? btree_iter_down(iter)
-+			: btree_iter_lock_root(iter, depth_want);
-+		if (unlikely(ret)) {
-+			if (ret == 1)
-+				return 0;
-+
-+			iter->level = depth_want;
-+
-+			if (ret == -EIO) {
-+				iter->flags |= BTREE_ITER_ERROR;
-+				iter->l[iter->level].b =
-+					BTREE_ITER_NO_NODE_ERROR;
-+			} else {
-+				iter->l[iter->level].b =
-+					BTREE_ITER_NO_NODE_DOWN;
-+			}
-+			return ret;
-+		}
-+	}
-+
-+	iter->uptodate = BTREE_ITER_NEED_PEEK;
-+
-+	bch2_btree_iter_verify(iter);
-+	return 0;
-+}
-+
-+int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
-+{
-+	struct btree_trans *trans = iter->trans;
-+	int ret;
-+
-+	ret =   bch2_trans_cond_resched(trans) ?:
-+		btree_iter_traverse_one(iter);
-+	if (unlikely(ret))
-+		ret = __btree_iter_traverse_all(trans, ret);
-+
-+	return ret;
-+}
-+
-+static inline void bch2_btree_iter_checks(struct btree_iter *iter)
-+{
-+	enum btree_iter_type type = btree_iter_type(iter);
-+
-+	EBUG_ON(iter->btree_id >= BTREE_ID_NR);
-+
-+	BUG_ON((type == BTREE_ITER_KEYS ||
-+		type == BTREE_ITER_CACHED) &&
-+	       (bkey_cmp(iter->pos, bkey_start_pos(&iter->k)) < 0 ||
-+		bkey_cmp(iter->pos, iter->k.p) > 0));
-+
-+	bch2_btree_iter_verify_locks(iter);
-+	bch2_btree_iter_verify_level(iter, iter->level);
-+}
-+
-+/* Iterate across nodes (leaf and interior nodes) */
-+
-+struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter)
-+{
-+	struct btree *b;
-+	int ret;
-+
-+	EBUG_ON(btree_iter_type(iter) != BTREE_ITER_NODES);
-+	bch2_btree_iter_checks(iter);
-+
-+	if (iter->uptodate == BTREE_ITER_UPTODATE)
-+		return iter->l[iter->level].b;
-+
-+	ret = bch2_btree_iter_traverse(iter);
-+	if (ret)
-+		return NULL;
-+
-+	b = btree_iter_node(iter, iter->level);
-+	if (!b)
-+		return NULL;
-+
-+	BUG_ON(bkey_cmp(b->key.k.p, iter->pos) < 0);
-+
-+	iter->pos = b->key.k.p;
-+	iter->uptodate = BTREE_ITER_UPTODATE;
-+
-+	bch2_btree_iter_verify(iter);
-+
-+	return b;
-+}
-+
-+struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
-+{
-+	struct btree *b;
-+	int ret;
-+
-+	EBUG_ON(btree_iter_type(iter) != BTREE_ITER_NODES);
-+	bch2_btree_iter_checks(iter);
-+
-+	/* already got to end? */
-+	if (!btree_iter_node(iter, iter->level))
-+		return NULL;
-+
-+	bch2_trans_cond_resched(iter->trans);
-+
-+	btree_iter_up(iter);
-+
-+	if (!bch2_btree_node_relock(iter, iter->level))
-+		btree_iter_set_dirty(iter, BTREE_ITER_NEED_RELOCK);
-+
-+	ret = bch2_btree_iter_traverse(iter);
-+	if (ret)
-+		return NULL;
-+
-+	/* got to end? */
-+	b = btree_iter_node(iter, iter->level);
-+	if (!b)
-+		return NULL;
-+
-+	if (bkey_cmp(iter->pos, b->key.k.p) < 0) {
-+		/*
-+		 * Haven't gotten to the end of the parent node: go back down to
-+		 * the next child node
-+		 */
-+
-+		/*
-+		 * We don't really want to be unlocking here except we can't
-+		 * directly tell btree_iter_traverse() "traverse to this level"
-+		 * except by setting iter->level, so we have to unlock so we
-+		 * don't screw up our lock invariants:
-+		 */
-+		if (btree_node_read_locked(iter, iter->level))
-+			btree_node_unlock(iter, iter->level);
-+
-+		iter->pos	= bkey_successor(iter->pos);
-+		iter->level	= iter->min_depth;
-+
-+		btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
-+		ret = bch2_btree_iter_traverse(iter);
-+		if (ret)
-+			return NULL;
-+
-+		b = iter->l[iter->level].b;
-+	}
-+
-+	iter->pos = b->key.k.p;
-+	iter->uptodate = BTREE_ITER_UPTODATE;
-+
-+	bch2_btree_iter_verify(iter);
-+
-+	return b;
-+}
-+
-+/* Iterate across keys (in leaf nodes only) */
-+
-+void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *iter, struct bpos new_pos)
-+{
-+	struct btree_iter_level *l = &iter->l[0];
-+
-+	EBUG_ON(iter->level != 0);
-+	EBUG_ON(bkey_cmp(new_pos, iter->pos) < 0);
-+	EBUG_ON(!btree_node_locked(iter, 0));
-+	EBUG_ON(bkey_cmp(new_pos, l->b->key.k.p) > 0);
-+
-+	bkey_init(&iter->k);
-+	iter->k.p = iter->pos = new_pos;
-+	btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
-+
-+	btree_iter_advance_to_pos(iter, l, -1);
-+
-+	/*
-+	 * XXX:
-+	 * keeping a node locked that's outside (even just outside) iter->pos
-+	 * breaks __bch2_btree_node_lock(). This seems to only affect
-+	 * bch2_btree_node_get_sibling so for now it's fixed there, but we
-+	 * should try to get rid of this corner case.
-+	 *
-+	 * (this behaviour is currently needed for BTREE_INSERT_NOUNLOCK)
-+	 */
-+
-+	if (bch2_btree_node_iter_end(&l->iter) &&
-+	    btree_iter_pos_after_node(iter, l->b))
-+		btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
-+}
-+
-+static void btree_iter_pos_changed(struct btree_iter *iter, int cmp)
-+{
-+	unsigned l = iter->level;
-+
-+	if (!cmp)
-+		goto out;
-+
-+	if (unlikely(btree_iter_type(iter) == BTREE_ITER_CACHED)) {
-+		btree_node_unlock(iter, 0);
-+		iter->l[0].b = BTREE_ITER_NO_NODE_UP;
-+		btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
-+		return;
-+	}
-+
-+	l = btree_iter_up_until_good_node(iter, cmp);
-+
-+	if (btree_iter_node(iter, l)) {
-+		/*
-+		 * We might have to skip over many keys, or just a few: try
-+		 * advancing the node iterator, and if we have to skip over too
-+		 * many keys just reinit it (or if we're rewinding, since that
-+		 * is expensive).
-+		 */
-+		if (cmp < 0 ||
-+		    !btree_iter_advance_to_pos(iter, &iter->l[l], 8))
-+			__btree_iter_init(iter, l);
-+
-+		/* Don't leave it locked if we're not supposed to: */
-+		if (btree_lock_want(iter, l) == BTREE_NODE_UNLOCKED)
-+			btree_node_unlock(iter, l);
-+	}
-+out:
-+	if (l != iter->level)
-+		btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
-+	else
-+		btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
-+}
-+
-+void __bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos,
-+			       bool strictly_greater)
-+{
-+	struct bpos old = btree_iter_search_key(iter);
-+	int cmp;
-+
-+	iter->flags &= ~BTREE_ITER_IS_EXTENTS;
-+	iter->flags |= strictly_greater ? BTREE_ITER_IS_EXTENTS : 0;
-+
-+	bkey_init(&iter->k);
-+	iter->k.p = iter->pos = new_pos;
-+
-+	cmp = bkey_cmp(btree_iter_search_key(iter), old);
-+
-+	btree_iter_pos_changed(iter, cmp);
-+}
-+
-+void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
-+{
-+	int cmp = bkey_cmp(new_pos, iter->pos);
-+
-+	bkey_init(&iter->k);
-+	iter->k.p = iter->pos = new_pos;
-+
-+	btree_iter_pos_changed(iter, cmp);
-+}
-+
-+static inline bool btree_iter_set_pos_to_next_leaf(struct btree_iter *iter)
-+{
-+	struct btree_iter_level *l = &iter->l[0];
-+	bool ret;
-+
-+	bkey_init(&iter->k);
-+	iter->k.p = iter->pos = l->b->key.k.p;
-+
-+	ret = bkey_cmp(iter->pos, POS_MAX) != 0;
-+	if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS))
-+		iter->k.p = iter->pos = bkey_successor(iter->pos);
-+
-+	btree_iter_pos_changed(iter, 1);
-+	return ret;
-+}
-+
-+static inline bool btree_iter_set_pos_to_prev_leaf(struct btree_iter *iter)
-+{
-+	struct btree_iter_level *l = &iter->l[0];
-+	bool ret;
-+
-+	bkey_init(&iter->k);
-+	iter->k.p = iter->pos = l->b->data->min_key;
-+	iter->uptodate	= BTREE_ITER_NEED_TRAVERSE;
-+
-+	ret = bkey_cmp(iter->pos, POS_MIN) != 0;
-+	if (ret) {
-+		iter->k.p = iter->pos = bkey_predecessor(iter->pos);
-+
-+		if (iter->flags & BTREE_ITER_IS_EXTENTS)
-+			iter->k.p = iter->pos = bkey_predecessor(iter->pos);
-+	}
-+
-+	btree_iter_pos_changed(iter, -1);
-+	return ret;
-+}
-+
-+/**
-+ * btree_iter_peek_uptodate - given an iterator that is uptodate, return the key
-+ * it currently points to
-+ */
-+static inline struct bkey_s_c btree_iter_peek_uptodate(struct btree_iter *iter)
-+{
-+	struct btree_iter_level *l = &iter->l[0];
-+	struct bkey_s_c ret = { .k = &iter->k };
-+
-+	if (!bkey_deleted(&iter->k)) {
-+		struct bkey_packed *_k =
-+			__bch2_btree_node_iter_peek_all(&l->iter, l->b);
-+
-+		ret.v = bkeyp_val(&l->b->format, _k);
-+
-+		if (debug_check_iterators(iter->trans->c)) {
-+			struct bkey k = bkey_unpack_key(l->b, _k);
-+
-+			BUG_ON(memcmp(&k, &iter->k, sizeof(k)));
-+		}
-+
-+		if (debug_check_bkeys(iter->trans->c))
-+			bch2_bkey_debugcheck(iter->trans->c, l->b, ret);
-+	}
-+
-+	return ret;
-+}
-+
-+/**
-+ * bch2_btree_iter_peek: returns first key greater than or equal to iterator's
-+ * current position
-+ */
-+struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
-+{
-+	struct btree_iter_level *l = &iter->l[0];
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	EBUG_ON(btree_iter_type(iter) != BTREE_ITER_KEYS);
-+	bch2_btree_iter_checks(iter);
-+
-+	if (iter->uptodate == BTREE_ITER_UPTODATE &&
-+	    !bkey_deleted(&iter->k))
-+		return btree_iter_peek_uptodate(iter);
-+
-+	while (1) {
-+		ret = bch2_btree_iter_traverse(iter);
-+		if (unlikely(ret))
-+			return bkey_s_c_err(ret);
-+
-+		k = __btree_iter_peek(iter, l);
-+		if (likely(k.k))
-+			break;
-+
-+		if (!btree_iter_set_pos_to_next_leaf(iter))
-+			return bkey_s_c_null;
-+	}
-+
-+	/*
-+	 * iter->pos should always be equal to the key we just
-+	 * returned - except extents can straddle iter->pos:
-+	 */
-+	if (!(iter->flags & BTREE_ITER_IS_EXTENTS) ||
-+	    bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0)
-+		iter->pos = bkey_start_pos(k.k);
-+
-+	iter->uptodate = BTREE_ITER_UPTODATE;
-+
-+	bch2_btree_iter_verify_level(iter, 0);
-+	return k;
-+}
-+
-+/**
-+ * bch2_btree_iter_next: returns first key greater than iterator's current
-+ * position
-+ */
-+struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
-+{
-+	if (unlikely(!bkey_cmp(iter->k.p, POS_MAX)))
-+		return bkey_s_c_null;
-+
-+	bch2_btree_iter_set_pos(iter,
-+		(iter->flags & BTREE_ITER_IS_EXTENTS)
-+		? iter->k.p
-+		: bkey_successor(iter->k.p));
-+
-+	return bch2_btree_iter_peek(iter);
-+}
-+
-+static struct bkey_s_c __btree_trans_updates_peek(struct btree_iter *iter)
-+{
-+	struct bpos pos = btree_iter_search_key(iter);
-+	struct btree_trans *trans = iter->trans;
-+	struct btree_insert_entry *i;
-+
-+	trans_for_each_update2(trans, i)
-+		if ((cmp_int(iter->btree_id,	i->iter->btree_id) ?:
-+		     bkey_cmp(pos,		i->k->k.p)) <= 0)
-+			break;
-+
-+	return i < trans->updates2 + trans->nr_updates2 &&
-+		iter->btree_id == i->iter->btree_id
-+		? bkey_i_to_s_c(i->k)
-+		: bkey_s_c_null;
-+}
-+
-+static struct bkey_s_c __bch2_btree_iter_peek_with_updates(struct btree_iter *iter)
-+{
-+	struct btree_iter_level *l = &iter->l[0];
-+	struct bkey_s_c k = __btree_iter_peek(iter, l);
-+	struct bkey_s_c u = __btree_trans_updates_peek(iter);
-+
-+	if (k.k && (!u.k || bkey_cmp(k.k->p, u.k->p) < 0))
-+		return k;
-+	if (u.k && bkey_cmp(u.k->p, l->b->key.k.p) <= 0) {
-+		iter->k = *u.k;
-+		return u;
-+	}
-+	return bkey_s_c_null;
-+}
-+
-+struct bkey_s_c bch2_btree_iter_peek_with_updates(struct btree_iter *iter)
-+{
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	EBUG_ON(btree_iter_type(iter) != BTREE_ITER_KEYS);
-+	bch2_btree_iter_checks(iter);
-+
-+	while (1) {
-+		ret = bch2_btree_iter_traverse(iter);
-+		if (unlikely(ret))
-+			return bkey_s_c_err(ret);
-+
-+		k = __bch2_btree_iter_peek_with_updates(iter);
-+
-+		if (k.k && bkey_deleted(k.k)) {
-+			bch2_btree_iter_set_pos(iter,
-+				(iter->flags & BTREE_ITER_IS_EXTENTS)
-+				? iter->k.p
-+				: bkey_successor(iter->k.p));
-+			continue;
-+		}
-+
-+		if (likely(k.k))
-+			break;
-+
-+		if (!btree_iter_set_pos_to_next_leaf(iter))
-+			return bkey_s_c_null;
-+	}
-+
-+	/*
-+	 * iter->pos should always be equal to the key we just
-+	 * returned - except extents can straddle iter->pos:
-+	 */
-+	if (!(iter->flags & BTREE_ITER_IS_EXTENTS) ||
-+	    bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0)
-+		iter->pos = bkey_start_pos(k.k);
-+
-+	iter->uptodate = BTREE_ITER_UPTODATE;
-+	return k;
-+}
-+
-+struct bkey_s_c bch2_btree_iter_next_with_updates(struct btree_iter *iter)
-+{
-+	if (unlikely(!bkey_cmp(iter->k.p, POS_MAX)))
-+		return bkey_s_c_null;
-+
-+	bch2_btree_iter_set_pos(iter,
-+		(iter->flags & BTREE_ITER_IS_EXTENTS)
-+		? iter->k.p
-+		: bkey_successor(iter->k.p));
-+
-+	return bch2_btree_iter_peek_with_updates(iter);
-+}
-+
-+/**
-+ * bch2_btree_iter_peek_prev: returns first key less than or equal to
-+ * iterator's current position
-+ */
-+struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
-+{
-+	struct bpos pos = iter->pos;
-+	struct btree_iter_level *l = &iter->l[0];
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	EBUG_ON(btree_iter_type(iter) != BTREE_ITER_KEYS);
-+	bch2_btree_iter_checks(iter);
-+
-+	if (iter->uptodate == BTREE_ITER_UPTODATE &&
-+	    !bkey_deleted(&iter->k))
-+		return btree_iter_peek_uptodate(iter);
-+
-+	while (1) {
-+		ret = bch2_btree_iter_traverse(iter);
-+		if (unlikely(ret))
-+			return bkey_s_c_err(ret);
-+
-+		k = __btree_iter_peek(iter, l);
-+		if (!k.k || bkey_cmp(bkey_start_pos(k.k), pos) > 0)
-+			k = __btree_iter_prev(iter, l);
-+
-+		if (likely(k.k))
-+			break;
-+
-+		if (!btree_iter_set_pos_to_prev_leaf(iter))
-+			return bkey_s_c_null;
-+	}
-+
-+	EBUG_ON(bkey_cmp(bkey_start_pos(k.k), pos) > 0);
-+	iter->pos	= bkey_start_pos(k.k);
-+	iter->uptodate	= BTREE_ITER_UPTODATE;
-+	return k;
-+}
-+
-+/**
-+ * bch2_btree_iter_prev: returns first key less than iterator's current
-+ * position
-+ */
-+struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter)
-+{
-+	struct bpos pos = bkey_start_pos(&iter->k);
-+
-+	EBUG_ON(btree_iter_type(iter) != BTREE_ITER_KEYS);
-+	bch2_btree_iter_checks(iter);
-+
-+	if (unlikely(!bkey_cmp(pos, POS_MIN)))
-+		return bkey_s_c_null;
-+
-+	bch2_btree_iter_set_pos(iter, bkey_predecessor(pos));
-+
-+	return bch2_btree_iter_peek_prev(iter);
-+}
-+
-+static inline struct bkey_s_c
-+__bch2_btree_iter_peek_slot_extents(struct btree_iter *iter)
-+{
-+	struct btree_iter_level *l = &iter->l[0];
-+	struct btree_node_iter node_iter;
-+	struct bkey_s_c k;
-+	struct bkey n;
-+	int ret;
-+
-+	/* keys & holes can't span inode numbers: */
-+	if (iter->pos.offset == KEY_OFFSET_MAX) {
-+		if (iter->pos.inode == KEY_INODE_MAX)
-+			return bkey_s_c_null;
-+
-+		bch2_btree_iter_set_pos(iter, bkey_successor(iter->pos));
-+
-+		ret = bch2_btree_iter_traverse(iter);
-+		if (unlikely(ret))
-+			return bkey_s_c_err(ret);
-+	}
-+
-+	/*
-+	 * iterator is now at the correct position for inserting at iter->pos,
-+	 * but we need to keep iterating until we find the first non whiteout so
-+	 * we know how big a hole we have, if any:
-+	 */
-+
-+	node_iter = l->iter;
-+	k = __btree_iter_unpack(iter, l, &iter->k,
-+		bch2_btree_node_iter_peek(&node_iter, l->b));
-+
-+	if (k.k && bkey_cmp(bkey_start_pos(k.k), iter->pos) <= 0) {
-+		/*
-+		 * We're not setting iter->uptodate because the node iterator
-+		 * doesn't necessarily point at the key we're returning:
-+		 */
-+
-+		EBUG_ON(bkey_cmp(k.k->p, iter->pos) <= 0);
-+		bch2_btree_iter_verify_level(iter, 0);
-+		return k;
-+	}
-+
-+	/* hole */
-+
-+	if (!k.k)
-+		k.k = &l->b->key.k;
-+
-+	bkey_init(&n);
-+	n.p = iter->pos;
-+	bch2_key_resize(&n,
-+			min_t(u64, KEY_SIZE_MAX,
-+			      (k.k->p.inode == n.p.inode
-+			       ? bkey_start_offset(k.k)
-+			       : KEY_OFFSET_MAX) -
-+			      n.p.offset));
-+
-+	EBUG_ON(!n.size);
-+
-+	iter->k	= n;
-+	iter->uptodate = BTREE_ITER_UPTODATE;
-+
-+	bch2_btree_iter_verify_level(iter, 0);
-+	return (struct bkey_s_c) { &iter->k, NULL };
-+}
-+
-+struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
-+{
-+	struct btree_iter_level *l = &iter->l[0];
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	EBUG_ON(btree_iter_type(iter) != BTREE_ITER_KEYS);
-+	bch2_btree_iter_checks(iter);
-+
-+	if (iter->uptodate == BTREE_ITER_UPTODATE)
-+		return btree_iter_peek_uptodate(iter);
-+
-+	ret = bch2_btree_iter_traverse(iter);
-+	if (unlikely(ret))
-+		return bkey_s_c_err(ret);
-+
-+	if (iter->flags & BTREE_ITER_IS_EXTENTS)
-+		return __bch2_btree_iter_peek_slot_extents(iter);
-+
-+	k = __btree_iter_peek_all(iter, l, &iter->k);
-+
-+	EBUG_ON(k.k && bkey_deleted(k.k) && bkey_cmp(k.k->p, iter->pos) == 0);
-+
-+	if (!k.k || bkey_cmp(iter->pos, k.k->p)) {
-+		/* hole */
-+		bkey_init(&iter->k);
-+		iter->k.p = iter->pos;
-+		k = (struct bkey_s_c) { &iter->k, NULL };
-+	}
-+
-+	iter->uptodate = BTREE_ITER_UPTODATE;
-+	bch2_btree_iter_verify_level(iter, 0);
-+	return k;
-+}
-+
-+struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *iter)
-+{
-+	if (unlikely(!bkey_cmp(iter->k.p, POS_MAX)))
-+		return bkey_s_c_null;
-+
-+	bch2_btree_iter_set_pos(iter,
-+		(iter->flags & BTREE_ITER_IS_EXTENTS)
-+		? iter->k.p
-+		: bkey_successor(iter->k.p));
-+
-+	return bch2_btree_iter_peek_slot(iter);
-+}
-+
-+struct bkey_s_c bch2_btree_iter_peek_cached(struct btree_iter *iter)
-+{
-+	struct bkey_cached *ck;
-+	int ret;
-+
-+	EBUG_ON(btree_iter_type(iter) != BTREE_ITER_CACHED);
-+	bch2_btree_iter_checks(iter);
-+
-+	ret = bch2_btree_iter_traverse(iter);
-+	if (unlikely(ret))
-+		return bkey_s_c_err(ret);
-+
-+	ck = (void *) iter->l[0].b;
-+
-+	EBUG_ON(iter->btree_id != ck->key.btree_id ||
-+		bkey_cmp(iter->pos, ck->key.pos));
-+	BUG_ON(!ck->valid);
-+
-+	return bkey_i_to_s_c(ck->k);
-+}
-+
-+static inline void bch2_btree_iter_init(struct btree_trans *trans,
-+			struct btree_iter *iter, enum btree_id btree_id,
-+			struct bpos pos, unsigned flags)
-+{
-+	struct bch_fs *c = trans->c;
-+	unsigned i;
-+
-+	if (btree_node_type_is_extents(btree_id) &&
-+	    !(flags & BTREE_ITER_NODES))
-+		flags |= BTREE_ITER_IS_EXTENTS;
-+
-+	iter->trans			= trans;
-+	iter->pos			= pos;
-+	bkey_init(&iter->k);
-+	iter->k.p			= pos;
-+	iter->flags			= flags;
-+	iter->uptodate			= BTREE_ITER_NEED_TRAVERSE;
-+	iter->btree_id			= btree_id;
-+	iter->level			= 0;
-+	iter->min_depth			= 0;
-+	iter->locks_want		= flags & BTREE_ITER_INTENT ? 1 : 0;
-+	iter->nodes_locked		= 0;
-+	iter->nodes_intent_locked	= 0;
-+	for (i = 0; i < ARRAY_SIZE(iter->l); i++)
-+		iter->l[i].b		= BTREE_ITER_NO_NODE_INIT;
-+
-+	prefetch(c->btree_roots[btree_id].b);
-+}
-+
-+/* new transactional stuff: */
-+
-+static inline void __bch2_trans_iter_free(struct btree_trans *trans,
-+					  unsigned idx)
-+{
-+	__bch2_btree_iter_unlock(&trans->iters[idx]);
-+	trans->iters_linked		&= ~(1ULL << idx);
-+	trans->iters_live		&= ~(1ULL << idx);
-+	trans->iters_touched		&= ~(1ULL << idx);
-+}
-+
-+int bch2_trans_iter_put(struct btree_trans *trans,
-+			struct btree_iter *iter)
-+{
-+	int ret;
-+
-+	if (IS_ERR_OR_NULL(iter))
-+		return 0;
-+
-+	BUG_ON(trans->iters + iter->idx != iter);
-+
-+	ret = btree_iter_err(iter);
-+
-+	if (!(trans->iters_touched & (1ULL << iter->idx)) &&
-+	    !(iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT))
-+		__bch2_trans_iter_free(trans, iter->idx);
-+
-+	trans->iters_live	&= ~(1ULL << iter->idx);
-+	return ret;
-+}
-+
-+int bch2_trans_iter_free(struct btree_trans *trans,
-+			 struct btree_iter *iter)
-+{
-+	if (IS_ERR_OR_NULL(iter))
-+		return 0;
-+
-+	trans->iters_touched &= ~(1ULL << iter->idx);
-+
-+	return bch2_trans_iter_put(trans, iter);
-+}
-+
-+static int bch2_trans_realloc_iters(struct btree_trans *trans,
-+				    unsigned new_size)
-+{
-+	void *p, *new_iters, *new_updates, *new_updates2;
-+	size_t iters_bytes;
-+	size_t updates_bytes;
-+
-+	new_size = roundup_pow_of_two(new_size);
-+
-+	BUG_ON(new_size > BTREE_ITER_MAX);
-+
-+	if (new_size <= trans->size)
-+		return 0;
-+
-+	BUG_ON(trans->used_mempool);
-+
-+	bch2_trans_unlock(trans);
-+
-+	iters_bytes	= sizeof(struct btree_iter) * new_size;
-+	updates_bytes	= sizeof(struct btree_insert_entry) * new_size;
-+
-+	p = kmalloc(iters_bytes +
-+		    updates_bytes +
-+		    updates_bytes, GFP_NOFS);
-+	if (p)
-+		goto success;
-+
-+	p = mempool_alloc(&trans->c->btree_iters_pool, GFP_NOFS);
-+	new_size = BTREE_ITER_MAX;
-+
-+	trans->used_mempool = true;
-+success:
-+	new_iters	= p; p += iters_bytes;
-+	new_updates	= p; p += updates_bytes;
-+	new_updates2	= p; p += updates_bytes;
-+
-+	memcpy(new_iters, trans->iters,
-+	       sizeof(struct btree_iter) * trans->nr_iters);
-+	memcpy(new_updates, trans->updates,
-+	       sizeof(struct btree_insert_entry) * trans->nr_updates);
-+	memcpy(new_updates2, trans->updates2,
-+	       sizeof(struct btree_insert_entry) * trans->nr_updates2);
-+
-+	if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
-+		memset(trans->iters, POISON_FREE,
-+		       sizeof(struct btree_iter) * trans->nr_iters +
-+		       sizeof(struct btree_insert_entry) * trans->nr_iters);
-+
-+	if (trans->iters != trans->iters_onstack)
-+		kfree(trans->iters);
-+
-+	trans->iters		= new_iters;
-+	trans->updates		= new_updates;
-+	trans->updates2		= new_updates2;
-+	trans->size		= new_size;
-+
-+	if (trans->iters_live) {
-+		trace_trans_restart_iters_realloced(trans->ip, trans->size);
-+		return -EINTR;
-+	}
-+
-+	return 0;
-+}
-+
-+static struct btree_iter *btree_trans_iter_alloc(struct btree_trans *trans)
-+{
-+	unsigned idx = __ffs64(~trans->iters_linked);
-+
-+	if (idx < trans->nr_iters)
-+		goto got_slot;
-+
-+	if (trans->nr_iters == trans->size) {
-+		int ret;
-+
-+		if (trans->nr_iters >= BTREE_ITER_MAX) {
-+			struct btree_iter *iter;
-+
-+			trans_for_each_iter(trans, iter) {
-+				pr_err("iter: btree %s pos %llu:%llu%s%s%s %ps",
-+				       bch2_btree_ids[iter->btree_id],
-+				       iter->pos.inode,
-+				       iter->pos.offset,
-+				       (trans->iters_live & (1ULL << iter->idx)) ? " live" : "",
-+				       (trans->iters_touched & (1ULL << iter->idx)) ? " touched" : "",
-+				       iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT ? " keep" : "",
-+				       (void *) iter->ip_allocated);
-+			}
-+
-+			panic("trans iter oveflow\n");
-+		}
-+
-+		ret = bch2_trans_realloc_iters(trans, trans->size * 2);
-+		if (ret)
-+			return ERR_PTR(ret);
-+	}
-+
-+	idx = trans->nr_iters++;
-+	BUG_ON(trans->nr_iters > trans->size);
-+
-+	trans->iters[idx].idx = idx;
-+got_slot:
-+	BUG_ON(trans->iters_linked & (1ULL << idx));
-+	trans->iters_linked |= 1ULL << idx;
-+	trans->iters[idx].flags = 0;
-+	return &trans->iters[idx];
-+}
-+
-+static inline void btree_iter_copy(struct btree_iter *dst,
-+				   struct btree_iter *src)
-+{
-+	unsigned i, idx = dst->idx;
-+
-+	*dst = *src;
-+	dst->idx = idx;
-+	dst->flags &= ~BTREE_ITER_KEEP_UNTIL_COMMIT;
-+
-+	for (i = 0; i < BTREE_MAX_DEPTH; i++)
-+		if (btree_node_locked(dst, i))
-+			six_lock_increment(&dst->l[i].b->c.lock,
-+					   __btree_lock_want(dst, i));
-+
-+	dst->flags &= ~BTREE_ITER_KEEP_UNTIL_COMMIT;
-+	dst->flags &= ~BTREE_ITER_SET_POS_AFTER_COMMIT;
-+}
-+
-+static inline struct bpos bpos_diff(struct bpos l, struct bpos r)
-+{
-+	if (bkey_cmp(l, r) > 0)
-+		swap(l, r);
-+
-+	return POS(r.inode - l.inode, r.offset - l.offset);
-+}
-+
-+static struct btree_iter *__btree_trans_get_iter(struct btree_trans *trans,
-+						 unsigned btree_id, struct bpos pos,
-+						 unsigned flags)
-+{
-+	struct btree_iter *iter, *best = NULL;
-+
-+	BUG_ON(trans->nr_iters > BTREE_ITER_MAX);
-+
-+	trans_for_each_iter(trans, iter) {
-+		if (btree_iter_type(iter) != (flags & BTREE_ITER_TYPE))
-+			continue;
-+
-+		if (iter->btree_id != btree_id)
-+			continue;
-+
-+		if (best &&
-+		    bkey_cmp(bpos_diff(best->pos, pos),
-+			     bpos_diff(iter->pos, pos)) < 0)
-+			continue;
-+
-+		best = iter;
-+	}
-+
-+	if (!best) {
-+		iter = btree_trans_iter_alloc(trans);
-+		if (IS_ERR(iter))
-+			return iter;
-+
-+		bch2_btree_iter_init(trans, iter, btree_id, pos, flags);
-+	} else if ((trans->iters_live & (1ULL << best->idx)) ||
-+		   (best->flags & BTREE_ITER_KEEP_UNTIL_COMMIT)) {
-+		iter = btree_trans_iter_alloc(trans);
-+		if (IS_ERR(iter))
-+			return iter;
-+
-+		btree_iter_copy(iter, best);
-+	} else {
-+		iter = best;
-+	}
-+
-+	iter->flags &= ~BTREE_ITER_KEEP_UNTIL_COMMIT;
-+	iter->flags &= ~BTREE_ITER_USER_FLAGS;
-+	iter->flags |= flags & BTREE_ITER_USER_FLAGS;
-+
-+	if (iter->flags & BTREE_ITER_INTENT)
-+		bch2_btree_iter_upgrade(iter, 1);
-+	else
-+		bch2_btree_iter_downgrade(iter);
-+
-+	BUG_ON(iter->btree_id != btree_id);
-+	BUG_ON((iter->flags ^ flags) & BTREE_ITER_TYPE);
-+	BUG_ON(iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT);
-+	BUG_ON(iter->flags & BTREE_ITER_SET_POS_AFTER_COMMIT);
-+	BUG_ON(trans->iters_live & (1ULL << iter->idx));
-+
-+	trans->iters_live	|= 1ULL << iter->idx;
-+	trans->iters_touched	|= 1ULL << iter->idx;
-+
-+	return iter;
-+}
-+
-+struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
-+					 enum btree_id btree_id,
-+					 struct bpos pos, unsigned flags)
-+{
-+	struct btree_iter *iter =
-+		__btree_trans_get_iter(trans, btree_id, pos, flags);
-+
-+	if (!IS_ERR(iter))
-+		__bch2_btree_iter_set_pos(iter, pos,
-+			btree_node_type_is_extents(btree_id));
-+	return iter;
-+}
-+
-+struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *trans,
-+					    enum btree_id btree_id,
-+					    struct bpos pos,
-+					    unsigned locks_want,
-+					    unsigned depth,
-+					    unsigned flags)
-+{
-+	struct btree_iter *iter =
-+		__btree_trans_get_iter(trans, btree_id, pos,
-+				       flags|BTREE_ITER_NODES);
-+	unsigned i;
-+
-+	BUG_ON(IS_ERR(iter));
-+	BUG_ON(bkey_cmp(iter->pos, pos));
-+
-+	iter->locks_want = locks_want;
-+	iter->level	= depth;
-+	iter->min_depth	= depth;
-+
-+	for (i = 0; i < ARRAY_SIZE(iter->l); i++)
-+		iter->l[i].b		= NULL;
-+	iter->l[iter->level].b		= BTREE_ITER_NO_NODE_INIT;
-+
-+	return iter;
-+}
-+
-+struct btree_iter *__bch2_trans_copy_iter(struct btree_trans *trans,
-+					struct btree_iter *src)
-+{
-+	struct btree_iter *iter;
-+
-+	iter = btree_trans_iter_alloc(trans);
-+	if (IS_ERR(iter))
-+		return iter;
-+
-+	btree_iter_copy(iter, src);
-+
-+	trans->iters_live |= 1ULL << iter->idx;
-+	/*
-+	 * We don't need to preserve this iter since it's cheap to copy it
-+	 * again - this will cause trans_iter_put() to free it right away:
-+	 */
-+	trans->iters_touched &= ~(1ULL << iter->idx);
-+
-+	return iter;
-+}
-+
-+static int bch2_trans_preload_mem(struct btree_trans *trans, size_t size)
-+{
-+	if (size > trans->mem_bytes) {
-+		size_t old_bytes = trans->mem_bytes;
-+		size_t new_bytes = roundup_pow_of_two(size);
-+		void *new_mem = krealloc(trans->mem, new_bytes, GFP_NOFS);
-+
-+		if (!new_mem)
-+			return -ENOMEM;
-+
-+		trans->mem = new_mem;
-+		trans->mem_bytes = new_bytes;
-+
-+		if (old_bytes) {
-+			trace_trans_restart_mem_realloced(trans->ip, new_bytes);
-+			return -EINTR;
-+		}
-+	}
-+
-+	return 0;
-+}
-+
-+void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
-+{
-+	void *p;
-+	int ret;
-+
-+	ret = bch2_trans_preload_mem(trans, trans->mem_top + size);
-+	if (ret)
-+		return ERR_PTR(ret);
-+
-+	p = trans->mem + trans->mem_top;
-+	trans->mem_top += size;
-+	return p;
-+}
-+
-+inline void bch2_trans_unlink_iters(struct btree_trans *trans)
-+{
-+	u64 iters = trans->iters_linked &
-+		~trans->iters_touched &
-+		~trans->iters_live;
-+
-+	while (iters) {
-+		unsigned idx = __ffs64(iters);
-+
-+		iters &= ~(1ULL << idx);
-+		__bch2_trans_iter_free(trans, idx);
-+	}
-+}
-+
-+void bch2_trans_reset(struct btree_trans *trans, unsigned flags)
-+{
-+	struct btree_iter *iter;
-+
-+	trans_for_each_iter(trans, iter)
-+		iter->flags &= ~(BTREE_ITER_KEEP_UNTIL_COMMIT|
-+				 BTREE_ITER_SET_POS_AFTER_COMMIT);
-+
-+	bch2_trans_unlink_iters(trans);
-+
-+	trans->iters_touched &= trans->iters_live;
-+
-+	trans->need_reset		= 0;
-+	trans->nr_updates		= 0;
-+	trans->nr_updates2		= 0;
-+	trans->mem_top			= 0;
-+
-+	trans->extra_journal_entries	= NULL;
-+	trans->extra_journal_entry_u64s	= 0;
-+
-+	if (trans->fs_usage_deltas) {
-+		trans->fs_usage_deltas->used = 0;
-+		memset(&trans->fs_usage_deltas->memset_start, 0,
-+		       (void *) &trans->fs_usage_deltas->memset_end -
-+		       (void *) &trans->fs_usage_deltas->memset_start);
-+	}
-+
-+	if (!(flags & TRANS_RESET_NOTRAVERSE))
-+		bch2_btree_iter_traverse_all(trans);
-+}
-+
-+void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
-+		     unsigned expected_nr_iters,
-+		     size_t expected_mem_bytes)
-+{
-+	memset(trans, 0, offsetof(struct btree_trans, iters_onstack));
-+
-+	/*
-+	 * reallocating iterators currently completely breaks
-+	 * bch2_trans_iter_put():
-+	 */
-+	expected_nr_iters = BTREE_ITER_MAX;
-+
-+	trans->c		= c;
-+	trans->ip		= _RET_IP_;
-+	trans->size		= ARRAY_SIZE(trans->iters_onstack);
-+	trans->iters		= trans->iters_onstack;
-+	trans->updates		= trans->updates_onstack;
-+	trans->updates2		= trans->updates2_onstack;
-+	trans->fs_usage_deltas	= NULL;
-+
-+	if (expected_nr_iters > trans->size)
-+		bch2_trans_realloc_iters(trans, expected_nr_iters);
-+
-+	if (expected_mem_bytes)
-+		bch2_trans_preload_mem(trans, expected_mem_bytes);
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	trans->pid = current->pid;
-+	mutex_lock(&c->btree_trans_lock);
-+	list_add(&trans->list, &c->btree_trans_list);
-+	mutex_unlock(&c->btree_trans_lock);
-+#endif
-+}
-+
-+int bch2_trans_exit(struct btree_trans *trans)
-+{
-+	bch2_trans_unlock(trans);
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	mutex_lock(&trans->c->btree_trans_lock);
-+	list_del(&trans->list);
-+	mutex_unlock(&trans->c->btree_trans_lock);
-+#endif
-+
-+	bch2_journal_preres_put(&trans->c->journal, &trans->journal_preres);
-+
-+	kfree(trans->fs_usage_deltas);
-+	kfree(trans->mem);
-+	if (trans->used_mempool)
-+		mempool_free(trans->iters, &trans->c->btree_iters_pool);
-+	else if (trans->iters != trans->iters_onstack)
-+		kfree(trans->iters);
-+	trans->mem	= (void *) 0x1;
-+	trans->iters	= (void *) 0x1;
-+
-+	return trans->error ? -EIO : 0;
-+}
-+
-+static void bch2_btree_iter_node_to_text(struct printbuf *out,
-+				 struct btree_bkey_cached_common *_b,
-+				 enum btree_iter_type type)
-+{
-+	pr_buf(out, "    %px l=%u %s:",
-+	       _b, _b->level, bch2_btree_ids[_b->btree_id]);
-+	bch2_bpos_to_text(out, btree_node_pos(_b, type));
-+}
-+
-+void bch2_btree_trans_to_text(struct printbuf *out, struct bch_fs *c)
-+{
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	struct btree_trans *trans;
-+	struct btree_iter *iter;
-+	struct btree *b;
-+	unsigned l;
-+
-+	mutex_lock(&c->btree_trans_lock);
-+	list_for_each_entry(trans, &c->btree_trans_list, list) {
-+		pr_buf(out, "%i %px %ps\n", trans->pid, trans, (void *) trans->ip);
-+
-+		trans_for_each_iter(trans, iter) {
-+			if (!iter->nodes_locked)
-+				continue;
-+
-+			pr_buf(out, "  iter %u %s:",
-+			       iter->idx,
-+			       bch2_btree_ids[iter->btree_id]);
-+			bch2_bpos_to_text(out, iter->pos);
-+			pr_buf(out, "\n");
-+
-+			for (l = 0; l < BTREE_MAX_DEPTH; l++) {
-+				if (btree_node_locked(iter, l)) {
-+					pr_buf(out, "    %s l=%u ",
-+					       btree_node_intent_locked(iter, l) ? "i" : "r", l);
-+					bch2_btree_iter_node_to_text(out,
-+							(void *) iter->l[l].b,
-+							btree_iter_type(iter));
-+					pr_buf(out, "\n");
-+				}
-+			}
-+		}
-+
-+		b = READ_ONCE(trans->locking);
-+		if (b) {
-+			pr_buf(out, "  locking iter %u l=%u %s:",
-+			       trans->locking_iter_idx,
-+			       trans->locking_level,
-+			       bch2_btree_ids[trans->locking_btree_id]);
-+			bch2_bpos_to_text(out, trans->locking_pos);
-+
-+
-+			pr_buf(out, " node ");
-+			bch2_btree_iter_node_to_text(out,
-+					(void *) b,
-+					btree_iter_type(&trans->iters[trans->locking_iter_idx]));
-+			pr_buf(out, "\n");
-+		}
-+	}
-+	mutex_unlock(&c->btree_trans_lock);
-+#endif
-+}
-+
-+void bch2_fs_btree_iter_exit(struct bch_fs *c)
-+{
-+	mempool_exit(&c->btree_iters_pool);
-+}
-+
-+int bch2_fs_btree_iter_init(struct bch_fs *c)
-+{
-+	unsigned nr = BTREE_ITER_MAX;
-+
-+	INIT_LIST_HEAD(&c->btree_trans_list);
-+	mutex_init(&c->btree_trans_lock);
-+
-+	return mempool_init_kmalloc_pool(&c->btree_iters_pool, 1,
-+			sizeof(struct btree_iter) * nr +
-+			sizeof(struct btree_insert_entry) * nr +
-+			sizeof(struct btree_insert_entry) * nr);
-+}
-diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h
-new file mode 100644
-index 000000000000..bd9ec3ec9a92
---- /dev/null
-+++ b/fs/bcachefs/btree_iter.h
-@@ -0,0 +1,314 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BTREE_ITER_H
-+#define _BCACHEFS_BTREE_ITER_H
-+
-+#include "bset.h"
-+#include "btree_types.h"
-+
-+static inline void btree_iter_set_dirty(struct btree_iter *iter,
-+					enum btree_iter_uptodate u)
-+{
-+	iter->uptodate = max_t(unsigned, iter->uptodate, u);
-+}
-+
-+static inline struct btree *btree_iter_node(struct btree_iter *iter,
-+					    unsigned level)
-+{
-+	return level < BTREE_MAX_DEPTH ? iter->l[level].b : NULL;
-+}
-+
-+static inline bool btree_node_lock_seq_matches(const struct btree_iter *iter,
-+					const struct btree *b, unsigned level)
-+{
-+	/*
-+	 * We don't compare the low bits of the lock sequence numbers because
-+	 * @iter might have taken a write lock on @b, and we don't want to skip
-+	 * the linked iterator if the sequence numbers were equal before taking
-+	 * that write lock. The lock sequence number is incremented by taking
-+	 * and releasing write locks and is even when unlocked:
-+	 */
-+	return iter->l[level].lock_seq >> 1 == b->c.lock.state.seq >> 1;
-+}
-+
-+static inline struct btree *btree_node_parent(struct btree_iter *iter,
-+					      struct btree *b)
-+{
-+	return btree_iter_node(iter, b->c.level + 1);
-+}
-+
-+static inline bool btree_trans_has_multiple_iters(const struct btree_trans *trans)
-+{
-+	return hweight64(trans->iters_linked) > 1;
-+}
-+
-+static inline int btree_iter_err(const struct btree_iter *iter)
-+{
-+	return iter->flags & BTREE_ITER_ERROR ? -EIO : 0;
-+}
-+
-+/* Iterate over iters within a transaction: */
-+
-+#define trans_for_each_iter_all(_trans, _iter)				\
-+	for (_iter = (_trans)->iters;					\
-+	     _iter < (_trans)->iters + (_trans)->nr_iters;		\
-+	     _iter++)
-+
-+static inline struct btree_iter *
-+__trans_next_iter(struct btree_trans *trans, unsigned idx)
-+{
-+	EBUG_ON(idx < trans->nr_iters && trans->iters[idx].idx != idx);
-+
-+	for (; idx < trans->nr_iters; idx++)
-+		if (trans->iters_linked & (1ULL << idx))
-+			return &trans->iters[idx];
-+
-+	return NULL;
-+}
-+
-+#define trans_for_each_iter(_trans, _iter)				\
-+	for (_iter = __trans_next_iter((_trans), 0);			\
-+	     (_iter);							\
-+	     _iter = __trans_next_iter((_trans), (_iter)->idx + 1))
-+
-+static inline bool __iter_has_node(const struct btree_iter *iter,
-+				   const struct btree *b)
-+{
-+	return iter->l[b->c.level].b == b &&
-+		btree_node_lock_seq_matches(iter, b, b->c.level);
-+}
-+
-+static inline struct btree_iter *
-+__trans_next_iter_with_node(struct btree_trans *trans, struct btree *b,
-+			    unsigned idx)
-+{
-+	struct btree_iter *iter = __trans_next_iter(trans, idx);
-+
-+	while (iter && !__iter_has_node(iter, b))
-+		iter = __trans_next_iter(trans, iter->idx + 1);
-+
-+	return iter;
-+}
-+
-+#define trans_for_each_iter_with_node(_trans, _b, _iter)		\
-+	for (_iter = __trans_next_iter_with_node((_trans), (_b), 0);	\
-+	     (_iter);							\
-+	     _iter = __trans_next_iter_with_node((_trans), (_b),	\
-+						 (_iter)->idx + 1))
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+void bch2_btree_trans_verify_iters(struct btree_trans *, struct btree *);
-+void bch2_btree_trans_verify_locks(struct btree_trans *);
-+#else
-+static inline void bch2_btree_trans_verify_iters(struct btree_trans *trans,
-+						 struct btree *b) {}
-+static inline void bch2_btree_trans_verify_locks(struct btree_trans *iter) {}
-+#endif
-+
-+void bch2_btree_iter_fix_key_modified(struct btree_iter *, struct btree *,
-+					   struct bkey_packed *);
-+void bch2_btree_node_iter_fix(struct btree_iter *, struct btree *,
-+			      struct btree_node_iter *, struct bkey_packed *,
-+			      unsigned, unsigned);
-+
-+bool bch2_btree_iter_relock(struct btree_iter *, bool);
-+bool bch2_trans_relock(struct btree_trans *);
-+void bch2_trans_unlock(struct btree_trans *);
-+
-+bool __bch2_btree_iter_upgrade(struct btree_iter *, unsigned);
-+bool __bch2_btree_iter_upgrade_nounlock(struct btree_iter *, unsigned);
-+
-+static inline bool bch2_btree_iter_upgrade(struct btree_iter *iter,
-+					   unsigned new_locks_want)
-+{
-+	new_locks_want = min(new_locks_want, BTREE_MAX_DEPTH);
-+
-+	return iter->locks_want < new_locks_want
-+		? (!iter->trans->nounlock
-+		   ? __bch2_btree_iter_upgrade(iter, new_locks_want)
-+		   : __bch2_btree_iter_upgrade_nounlock(iter, new_locks_want))
-+		: iter->uptodate <= BTREE_ITER_NEED_PEEK;
-+}
-+
-+void __bch2_btree_iter_downgrade(struct btree_iter *, unsigned);
-+
-+static inline void bch2_btree_iter_downgrade(struct btree_iter *iter)
-+{
-+	if (iter->locks_want > (iter->flags & BTREE_ITER_INTENT) ? 1 : 0)
-+		__bch2_btree_iter_downgrade(iter, 0);
-+}
-+
-+void bch2_trans_downgrade(struct btree_trans *);
-+
-+void bch2_btree_iter_node_replace(struct btree_iter *, struct btree *);
-+void bch2_btree_iter_node_drop(struct btree_iter *, struct btree *);
-+
-+void bch2_btree_iter_reinit_node(struct btree_iter *, struct btree *);
-+
-+int __must_check __bch2_btree_iter_traverse(struct btree_iter *);
-+
-+static inline int __must_check
-+bch2_btree_iter_traverse(struct btree_iter *iter)
-+{
-+	return iter->uptodate >= BTREE_ITER_NEED_RELOCK
-+		? __bch2_btree_iter_traverse(iter)
-+		: 0;
-+}
-+
-+int bch2_btree_iter_traverse_all(struct btree_trans *);
-+
-+struct btree *bch2_btree_iter_peek_node(struct btree_iter *);
-+struct btree *bch2_btree_iter_next_node(struct btree_iter *);
-+
-+struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *);
-+struct bkey_s_c bch2_btree_iter_next(struct btree_iter *);
-+
-+struct bkey_s_c bch2_btree_iter_peek_with_updates(struct btree_iter *);
-+struct bkey_s_c bch2_btree_iter_next_with_updates(struct btree_iter *);
-+
-+struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *);
-+struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *);
-+
-+struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *);
-+struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *);
-+
-+struct bkey_s_c bch2_btree_iter_peek_cached(struct btree_iter *);
-+
-+void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *, struct bpos);
-+void __bch2_btree_iter_set_pos(struct btree_iter *, struct bpos, bool);
-+void bch2_btree_iter_set_pos(struct btree_iter *, struct bpos);
-+
-+static inline int btree_iter_cmp(const struct btree_iter *l,
-+				 const struct btree_iter *r)
-+{
-+	return   cmp_int(l->btree_id, r->btree_id) ?:
-+		-cmp_int(btree_iter_type(l), btree_iter_type(r)) ?:
-+		 bkey_cmp(l->pos, r->pos);
-+}
-+
-+/*
-+ * Unlocks before scheduling
-+ * Note: does not revalidate iterator
-+ */
-+static inline int bch2_trans_cond_resched(struct btree_trans *trans)
-+{
-+	if (need_resched() || race_fault()) {
-+		bch2_trans_unlock(trans);
-+		schedule();
-+		return bch2_trans_relock(trans) ? 0 : -EINTR;
-+	} else {
-+		return 0;
-+	}
-+}
-+
-+#define __for_each_btree_node(_trans, _iter, _btree_id, _start,	\
-+			      _locks_want, _depth, _flags, _b)		\
-+	for (iter = bch2_trans_get_node_iter((_trans), (_btree_id),	\
-+				_start, _locks_want, _depth, _flags),	\
-+	     _b = bch2_btree_iter_peek_node(_iter);			\
-+	     (_b);							\
-+	     (_b) = bch2_btree_iter_next_node(_iter))
-+
-+#define for_each_btree_node(_trans, _iter, _btree_id, _start,		\
-+			    _flags, _b)					\
-+	__for_each_btree_node(_trans, _iter, _btree_id, _start,		\
-+			      0, 0, _flags, _b)
-+
-+static inline struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter,
-+						     unsigned flags)
-+{
-+	if ((flags & BTREE_ITER_TYPE) == BTREE_ITER_CACHED)
-+		return bch2_btree_iter_peek_cached(iter);
-+	else
-+		return flags & BTREE_ITER_SLOTS
-+			? bch2_btree_iter_peek_slot(iter)
-+			: bch2_btree_iter_peek(iter);
-+}
-+
-+static inline struct bkey_s_c __bch2_btree_iter_next(struct btree_iter *iter,
-+						     unsigned flags)
-+{
-+	return flags & BTREE_ITER_SLOTS
-+		? bch2_btree_iter_next_slot(iter)
-+		: bch2_btree_iter_next(iter);
-+}
-+
-+static inline int bkey_err(struct bkey_s_c k)
-+{
-+	return PTR_ERR_OR_ZERO(k.k);
-+}
-+
-+#define for_each_btree_key(_trans, _iter, _btree_id,			\
-+			   _start, _flags, _k, _ret)			\
-+	for ((_ret) = PTR_ERR_OR_ZERO((_iter) =				\
-+			bch2_trans_get_iter((_trans), (_btree_id),	\
-+					    (_start), (_flags))) ?:	\
-+		      PTR_ERR_OR_ZERO(((_k) =				\
-+			__bch2_btree_iter_peek(_iter, _flags)).k);	\
-+	     !_ret && (_k).k;						\
-+	     (_ret) = PTR_ERR_OR_ZERO(((_k) =				\
-+			__bch2_btree_iter_next(_iter, _flags)).k))
-+
-+#define for_each_btree_key_continue(_iter, _flags, _k, _ret)		\
-+	for ((_k) = __bch2_btree_iter_peek(_iter, _flags);		\
-+	     !((_ret) = bkey_err(_k)) && (_k).k;			\
-+	     (_k) = __bch2_btree_iter_next(_iter, _flags))
-+
-+/* new multiple iterator interface: */
-+
-+int bch2_trans_iter_put(struct btree_trans *, struct btree_iter *);
-+int bch2_trans_iter_free(struct btree_trans *, struct btree_iter *);
-+
-+void bch2_trans_unlink_iters(struct btree_trans *);
-+
-+struct btree_iter *__bch2_trans_get_iter(struct btree_trans *, enum btree_id,
-+					 struct bpos, unsigned);
-+
-+static inline struct btree_iter *
-+bch2_trans_get_iter(struct btree_trans *trans, enum btree_id btree_id,
-+		    struct bpos pos, unsigned flags)
-+{
-+	struct btree_iter *iter =
-+		__bch2_trans_get_iter(trans, btree_id, pos, flags);
-+
-+	if (!IS_ERR(iter))
-+		iter->ip_allocated = _THIS_IP_;
-+	return iter;
-+}
-+
-+struct btree_iter *__bch2_trans_copy_iter(struct btree_trans *,
-+					struct btree_iter *);
-+static inline struct btree_iter *
-+bch2_trans_copy_iter(struct btree_trans *trans, struct btree_iter *src)
-+{
-+	struct btree_iter *iter =
-+		__bch2_trans_copy_iter(trans, src);
-+
-+	if (!IS_ERR(iter))
-+		iter->ip_allocated = _THIS_IP_;
-+	return iter;
-+
-+}
-+
-+struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *,
-+				enum btree_id, struct bpos,
-+				unsigned, unsigned, unsigned);
-+
-+#define TRANS_RESET_NOTRAVERSE		(1 << 0)
-+
-+void bch2_trans_reset(struct btree_trans *, unsigned);
-+
-+static inline void bch2_trans_begin(struct btree_trans *trans)
-+{
-+	return bch2_trans_reset(trans, 0);
-+}
-+
-+void *bch2_trans_kmalloc(struct btree_trans *, size_t);
-+void bch2_trans_init(struct btree_trans *, struct bch_fs *, unsigned, size_t);
-+int bch2_trans_exit(struct btree_trans *);
-+
-+void bch2_btree_trans_to_text(struct printbuf *, struct bch_fs *);
-+
-+void bch2_fs_btree_iter_exit(struct bch_fs *);
-+int bch2_fs_btree_iter_init(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_BTREE_ITER_H */
-diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c
-new file mode 100644
-index 000000000000..61662750dfc0
---- /dev/null
-+++ b/fs/bcachefs/btree_key_cache.c
-@@ -0,0 +1,519 @@
-+
-+#include "bcachefs.h"
-+#include "btree_cache.h"
-+#include "btree_iter.h"
-+#include "btree_key_cache.h"
-+#include "btree_locking.h"
-+#include "btree_update.h"
-+#include "error.h"
-+#include "journal.h"
-+#include "journal_reclaim.h"
-+
-+#include <trace/events/bcachefs.h>
-+
-+static int bch2_btree_key_cache_cmp_fn(struct rhashtable_compare_arg *arg,
-+				       const void *obj)
-+{
-+	const struct bkey_cached *ck = obj;
-+	const struct bkey_cached_key *key = arg->key;
-+
-+	return cmp_int(ck->key.btree_id, key->btree_id) ?:
-+		bkey_cmp(ck->key.pos, key->pos);
-+}
-+
-+static const struct rhashtable_params bch2_btree_key_cache_params = {
-+	.head_offset	= offsetof(struct bkey_cached, hash),
-+	.key_offset	= offsetof(struct bkey_cached, key),
-+	.key_len	= sizeof(struct bkey_cached_key),
-+	.obj_cmpfn	= bch2_btree_key_cache_cmp_fn,
-+};
-+
-+__flatten
-+static inline struct bkey_cached *
-+btree_key_cache_find(struct bch_fs *c, enum btree_id btree_id, struct bpos pos)
-+{
-+	struct bkey_cached_key key = {
-+		.btree_id	= btree_id,
-+		.pos		= pos,
-+	};
-+
-+	return rhashtable_lookup_fast(&c->btree_key_cache.table, &key,
-+				      bch2_btree_key_cache_params);
-+}
-+
-+static bool bkey_cached_lock_for_evict(struct bkey_cached *ck)
-+{
-+	if (!six_trylock_intent(&ck->c.lock))
-+		return false;
-+
-+	if (!six_trylock_write(&ck->c.lock)) {
-+		six_unlock_intent(&ck->c.lock);
-+		return false;
-+	}
-+
-+	if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
-+		six_unlock_write(&ck->c.lock);
-+		six_unlock_intent(&ck->c.lock);
-+		return false;
-+	}
-+
-+	return true;
-+}
-+
-+static void bkey_cached_evict(struct btree_key_cache *c,
-+			      struct bkey_cached *ck)
-+{
-+	BUG_ON(rhashtable_remove_fast(&c->table, &ck->hash,
-+				      bch2_btree_key_cache_params));
-+	memset(&ck->key, ~0, sizeof(ck->key));
-+}
-+
-+static void bkey_cached_free(struct btree_key_cache *c,
-+			     struct bkey_cached *ck)
-+{
-+	list_move(&ck->list, &c->freed);
-+
-+	kfree(ck->k);
-+	ck->k		= NULL;
-+	ck->u64s	= 0;
-+
-+	six_unlock_write(&ck->c.lock);
-+	six_unlock_intent(&ck->c.lock);
-+}
-+
-+static struct bkey_cached *
-+bkey_cached_alloc(struct btree_key_cache *c)
-+{
-+	struct bkey_cached *ck;
-+
-+	list_for_each_entry(ck, &c->freed, list)
-+		if (bkey_cached_lock_for_evict(ck))
-+			return ck;
-+
-+	list_for_each_entry(ck, &c->clean, list)
-+		if (bkey_cached_lock_for_evict(ck)) {
-+			bkey_cached_evict(c, ck);
-+			return ck;
-+		}
-+
-+	ck = kzalloc(sizeof(*ck), GFP_NOFS);
-+	if (!ck)
-+		return NULL;
-+
-+	INIT_LIST_HEAD(&ck->list);
-+	six_lock_init(&ck->c.lock);
-+	BUG_ON(!six_trylock_intent(&ck->c.lock));
-+	BUG_ON(!six_trylock_write(&ck->c.lock));
-+
-+	return ck;
-+}
-+
-+static struct bkey_cached *
-+btree_key_cache_create(struct btree_key_cache *c,
-+		       enum btree_id btree_id,
-+		       struct bpos pos)
-+{
-+	struct bkey_cached *ck;
-+
-+	ck = bkey_cached_alloc(c);
-+	if (!ck)
-+		return ERR_PTR(-ENOMEM);
-+
-+	ck->c.level		= 0;
-+	ck->c.btree_id		= btree_id;
-+	ck->key.btree_id	= btree_id;
-+	ck->key.pos		= pos;
-+	ck->valid		= false;
-+
-+	BUG_ON(ck->flags);
-+
-+	if (rhashtable_lookup_insert_fast(&c->table,
-+					  &ck->hash,
-+					  bch2_btree_key_cache_params)) {
-+		/* We raced with another fill: */
-+		bkey_cached_free(c, ck);
-+		return NULL;
-+	}
-+
-+	list_move(&ck->list, &c->clean);
-+	six_unlock_write(&ck->c.lock);
-+
-+	return ck;
-+}
-+
-+static int btree_key_cache_fill(struct btree_trans *trans,
-+				struct btree_iter *ck_iter,
-+				struct bkey_cached *ck)
-+{
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	unsigned new_u64s = 0;
-+	struct bkey_i *new_k = NULL;
-+	int ret;
-+
-+	iter = bch2_trans_get_iter(trans, ck->key.btree_id,
-+				   ck->key.pos, BTREE_ITER_SLOTS);
-+	if (IS_ERR(iter))
-+		return PTR_ERR(iter);
-+
-+	k = bch2_btree_iter_peek_slot(iter);
-+	ret = bkey_err(k);
-+	if (ret) {
-+		bch2_trans_iter_put(trans, iter);
-+		return ret;
-+	}
-+
-+	if (!bch2_btree_node_relock(ck_iter, 0)) {
-+		bch2_trans_iter_put(trans, iter);
-+		trace_transaction_restart_ip(trans->ip, _THIS_IP_);
-+		return -EINTR;
-+	}
-+
-+	if (k.k->u64s > ck->u64s) {
-+		new_u64s = roundup_pow_of_two(k.k->u64s);
-+		new_k = kmalloc(new_u64s * sizeof(u64), GFP_NOFS);
-+		if (!new_k) {
-+			bch2_trans_iter_put(trans, iter);
-+			return -ENOMEM;
-+		}
-+	}
-+
-+	bch2_btree_node_lock_write(ck_iter->l[0].b, ck_iter);
-+	if (new_k) {
-+		kfree(ck->k);
-+		ck->u64s = new_u64s;
-+		ck->k = new_k;
-+	}
-+
-+	bkey_reassemble(ck->k, k);
-+	ck->valid = true;
-+	bch2_btree_node_unlock_write(ck_iter->l[0].b, ck_iter);
-+
-+	/* We're not likely to need this iterator again: */
-+	bch2_trans_iter_free(trans, iter);
-+
-+	return 0;
-+}
-+
-+static int bkey_cached_check_fn(struct six_lock *lock, void *p)
-+{
-+	struct bkey_cached *ck = container_of(lock, struct bkey_cached, c.lock);
-+	const struct btree_iter *iter = p;
-+
-+	return ck->key.btree_id == iter->btree_id &&
-+		!bkey_cmp(ck->key.pos, iter->pos) ? 0 : -1;
-+}
-+
-+int bch2_btree_iter_traverse_cached(struct btree_iter *iter)
-+{
-+	struct btree_trans *trans = iter->trans;
-+	struct bch_fs *c = trans->c;
-+	struct bkey_cached *ck;
-+	int ret = 0;
-+
-+	BUG_ON(iter->level);
-+
-+	if (btree_node_locked(iter, 0)) {
-+		ck = (void *) iter->l[0].b;
-+		goto fill;
-+	}
-+retry:
-+	ck = btree_key_cache_find(c, iter->btree_id, iter->pos);
-+	if (!ck) {
-+		if (iter->flags & BTREE_ITER_CACHED_NOCREATE) {
-+			iter->l[0].b = NULL;
-+			return 0;
-+		}
-+
-+		mutex_lock(&c->btree_key_cache.lock);
-+		ck = btree_key_cache_create(&c->btree_key_cache,
-+					    iter->btree_id, iter->pos);
-+		mutex_unlock(&c->btree_key_cache.lock);
-+
-+		ret = PTR_ERR_OR_ZERO(ck);
-+		if (ret)
-+			goto err;
-+		if (!ck)
-+			goto retry;
-+
-+		mark_btree_node_locked(iter, 0, SIX_LOCK_intent);
-+		iter->locks_want = 1;
-+	} else {
-+		enum six_lock_type lock_want = __btree_lock_want(iter, 0);
-+
-+		if (!btree_node_lock((void *) ck, iter->pos, 0, iter, lock_want,
-+				     bkey_cached_check_fn, iter)) {
-+			if (ck->key.btree_id != iter->btree_id ||
-+			    bkey_cmp(ck->key.pos, iter->pos)) {
-+				goto retry;
-+			}
-+
-+			trace_transaction_restart_ip(trans->ip, _THIS_IP_);
-+			ret = -EINTR;
-+			goto err;
-+		}
-+
-+		if (ck->key.btree_id != iter->btree_id ||
-+		    bkey_cmp(ck->key.pos, iter->pos)) {
-+			six_unlock_type(&ck->c.lock, lock_want);
-+			goto retry;
-+		}
-+
-+		mark_btree_node_locked(iter, 0, lock_want);
-+	}
-+
-+	iter->l[0].lock_seq	= ck->c.lock.state.seq;
-+	iter->l[0].b		= (void *) ck;
-+fill:
-+	if (!ck->valid && !(iter->flags & BTREE_ITER_CACHED_NOFILL)) {
-+		if (!btree_node_intent_locked(iter, 0))
-+			bch2_btree_iter_upgrade(iter, 1);
-+		if (!btree_node_intent_locked(iter, 0)) {
-+			trace_transaction_restart_ip(trans->ip, _THIS_IP_);
-+			ret = -EINTR;
-+			goto err;
-+		}
-+
-+		ret = btree_key_cache_fill(trans, iter, ck);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	iter->uptodate = BTREE_ITER_NEED_PEEK;
-+	bch2_btree_iter_downgrade(iter);
-+	return ret;
-+err:
-+	if (ret != -EINTR) {
-+		btree_node_unlock(iter, 0);
-+		iter->flags |= BTREE_ITER_ERROR;
-+		iter->l[0].b = BTREE_ITER_NO_NODE_ERROR;
-+	}
-+	return ret;
-+}
-+
-+static int btree_key_cache_flush_pos(struct btree_trans *trans,
-+				     struct bkey_cached_key key,
-+				     u64 journal_seq,
-+				     bool evict)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct journal *j = &c->journal;
-+	struct btree_iter *c_iter = NULL, *b_iter = NULL;
-+	struct bkey_cached *ck;
-+	int ret;
-+
-+	b_iter = bch2_trans_get_iter(trans, key.btree_id, key.pos,
-+				     BTREE_ITER_SLOTS|
-+				     BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(b_iter);
-+	if (ret)
-+		goto out;
-+
-+	c_iter = bch2_trans_get_iter(trans, key.btree_id, key.pos,
-+				     BTREE_ITER_CACHED|
-+				     BTREE_ITER_CACHED_NOFILL|
-+				     BTREE_ITER_CACHED_NOCREATE|
-+				     BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(c_iter);
-+	if (ret)
-+		goto out;
-+retry:
-+	ret = bch2_btree_iter_traverse(c_iter);
-+	if (ret)
-+		goto err;
-+
-+	ck = (void *) c_iter->l[0].b;
-+	if (!ck ||
-+	    (journal_seq && ck->journal.seq != journal_seq))
-+		goto out;
-+
-+	if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
-+		if (!evict)
-+			goto out;
-+		goto evict;
-+	}
-+
-+	ret   = bch2_btree_iter_traverse(b_iter) ?:
-+		bch2_trans_update(trans, b_iter, ck->k, BTREE_TRIGGER_NORUN) ?:
-+		bch2_trans_commit(trans, NULL, NULL,
-+				  BTREE_INSERT_NOUNLOCK|
-+				  BTREE_INSERT_NOCHECK_RW|
-+				  BTREE_INSERT_NOFAIL|
-+				  BTREE_INSERT_USE_RESERVE|
-+				  BTREE_INSERT_USE_ALLOC_RESERVE|
-+				  BTREE_INSERT_JOURNAL_RESERVED|
-+				  BTREE_INSERT_JOURNAL_RECLAIM);
-+err:
-+	if (ret == -EINTR)
-+		goto retry;
-+
-+	BUG_ON(ret && !bch2_journal_error(j));
-+
-+	if (ret)
-+		goto out;
-+
-+	bch2_journal_pin_drop(j, &ck->journal);
-+	bch2_journal_preres_put(j, &ck->res);
-+	clear_bit(BKEY_CACHED_DIRTY, &ck->flags);
-+
-+	if (!evict) {
-+		mutex_lock(&c->btree_key_cache.lock);
-+		list_move_tail(&ck->list, &c->btree_key_cache.clean);
-+		mutex_unlock(&c->btree_key_cache.lock);
-+	} else {
-+evict:
-+		BUG_ON(!btree_node_intent_locked(c_iter, 0));
-+
-+		mark_btree_node_unlocked(c_iter, 0);
-+		c_iter->l[0].b = NULL;
-+
-+		six_lock_write(&ck->c.lock, NULL, NULL);
-+
-+		mutex_lock(&c->btree_key_cache.lock);
-+		bkey_cached_evict(&c->btree_key_cache, ck);
-+		bkey_cached_free(&c->btree_key_cache, ck);
-+		mutex_unlock(&c->btree_key_cache.lock);
-+	}
-+out:
-+	bch2_trans_iter_put(trans, b_iter);
-+	bch2_trans_iter_put(trans, c_iter);
-+	return ret;
-+}
-+
-+static void btree_key_cache_journal_flush(struct journal *j,
-+					  struct journal_entry_pin *pin,
-+					  u64 seq)
-+{
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	struct bkey_cached *ck =
-+		container_of(pin, struct bkey_cached, journal);
-+	struct bkey_cached_key key;
-+	struct btree_trans trans;
-+
-+	six_lock_read(&ck->c.lock, NULL, NULL);
-+	key = ck->key;
-+
-+	if (ck->journal.seq != seq ||
-+	    !test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
-+		six_unlock_read(&ck->c.lock);
-+		return;
-+	}
-+	six_unlock_read(&ck->c.lock);
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+	btree_key_cache_flush_pos(&trans, key, seq, false);
-+	bch2_trans_exit(&trans);
-+}
-+
-+/*
-+ * Flush and evict a key from the key cache:
-+ */
-+int bch2_btree_key_cache_flush(struct btree_trans *trans,
-+			       enum btree_id id, struct bpos pos)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct bkey_cached_key key = { id, pos };
-+
-+	/* Fastpath - assume it won't be found: */
-+	if (!btree_key_cache_find(c, id, pos))
-+		return 0;
-+
-+	return btree_key_cache_flush_pos(trans, key, 0, true);
-+}
-+
-+bool bch2_btree_insert_key_cached(struct btree_trans *trans,
-+				  struct btree_iter *iter,
-+				  struct bkey_i *insert)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct bkey_cached *ck = (void *) iter->l[0].b;
-+
-+	BUG_ON(insert->u64s > ck->u64s);
-+
-+	if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) {
-+		int difference;
-+
-+		BUG_ON(jset_u64s(insert->u64s) > trans->journal_preres.u64s);
-+
-+		difference = jset_u64s(insert->u64s) - ck->res.u64s;
-+		if (difference > 0) {
-+			trans->journal_preres.u64s	-= difference;
-+			ck->res.u64s			+= difference;
-+		}
-+	}
-+
-+	bkey_copy(ck->k, insert);
-+	ck->valid = true;
-+
-+	if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
-+		mutex_lock(&c->btree_key_cache.lock);
-+		list_del_init(&ck->list);
-+
-+		set_bit(BKEY_CACHED_DIRTY, &ck->flags);
-+		mutex_unlock(&c->btree_key_cache.lock);
-+	}
-+
-+	bch2_journal_pin_update(&c->journal, trans->journal_res.seq,
-+				&ck->journal, btree_key_cache_journal_flush);
-+	return true;
-+}
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+void bch2_btree_key_cache_verify_clean(struct btree_trans *trans,
-+			       enum btree_id id, struct bpos pos)
-+{
-+	BUG_ON(btree_key_cache_find(trans->c, id, pos));
-+}
-+#endif
-+
-+void bch2_fs_btree_key_cache_exit(struct btree_key_cache *c)
-+{
-+	struct bkey_cached *ck, *n;
-+
-+	mutex_lock(&c->lock);
-+	list_for_each_entry_safe(ck, n, &c->clean, list) {
-+		kfree(ck->k);
-+		kfree(ck);
-+	}
-+	list_for_each_entry_safe(ck, n, &c->freed, list)
-+		kfree(ck);
-+	mutex_unlock(&c->lock);
-+
-+	rhashtable_destroy(&c->table);
-+}
-+
-+void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *c)
-+{
-+	mutex_init(&c->lock);
-+	INIT_LIST_HEAD(&c->freed);
-+	INIT_LIST_HEAD(&c->clean);
-+}
-+
-+int bch2_fs_btree_key_cache_init(struct btree_key_cache *c)
-+{
-+	return rhashtable_init(&c->table, &bch2_btree_key_cache_params);
-+}
-+
-+void bch2_btree_key_cache_to_text(struct printbuf *out, struct btree_key_cache *c)
-+{
-+	struct bucket_table *tbl;
-+	struct bkey_cached *ck;
-+	struct rhash_head *pos;
-+	size_t i;
-+
-+	mutex_lock(&c->lock);
-+	tbl = rht_dereference_rcu(c->table.tbl, &c->table);
-+
-+	for (i = 0; i < tbl->size; i++) {
-+		rht_for_each_entry_rcu(ck, pos, tbl, i, hash) {
-+			pr_buf(out, "%s:",
-+			       bch2_btree_ids[ck->key.btree_id]);
-+			bch2_bpos_to_text(out, ck->key.pos);
-+
-+			if (test_bit(BKEY_CACHED_DIRTY, &ck->flags))
-+				pr_buf(out, " journal seq %llu", ck->journal.seq);
-+			pr_buf(out, "\n");
-+		}
-+	}
-+	mutex_unlock(&c->lock);
-+}
-diff --git a/fs/bcachefs/btree_key_cache.h b/fs/bcachefs/btree_key_cache.h
-new file mode 100644
-index 000000000000..b1756c6c622c
---- /dev/null
-+++ b/fs/bcachefs/btree_key_cache.h
-@@ -0,0 +1,25 @@
-+#ifndef _BCACHEFS_BTREE_KEY_CACHE_H
-+#define _BCACHEFS_BTREE_KEY_CACHE_H
-+
-+int bch2_btree_iter_traverse_cached(struct btree_iter *);
-+
-+bool bch2_btree_insert_key_cached(struct btree_trans *,
-+			struct btree_iter *, struct bkey_i *);
-+int bch2_btree_key_cache_flush(struct btree_trans *,
-+			       enum btree_id, struct bpos);
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+void bch2_btree_key_cache_verify_clean(struct btree_trans *,
-+				enum btree_id, struct bpos);
-+#else
-+static inline void
-+bch2_btree_key_cache_verify_clean(struct btree_trans *trans,
-+				enum btree_id id, struct bpos pos) {}
-+#endif
-+
-+void bch2_fs_btree_key_cache_exit(struct btree_key_cache *);
-+void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *);
-+int bch2_fs_btree_key_cache_init(struct btree_key_cache *);
-+
-+void bch2_btree_key_cache_to_text(struct printbuf *, struct btree_key_cache *);
-+
-+#endif /* _BCACHEFS_BTREE_KEY_CACHE_H */
-diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h
-new file mode 100644
-index 000000000000..81fbf3e18647
---- /dev/null
-+++ b/fs/bcachefs/btree_locking.h
-@@ -0,0 +1,257 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BTREE_LOCKING_H
-+#define _BCACHEFS_BTREE_LOCKING_H
-+
-+/*
-+ * Only for internal btree use:
-+ *
-+ * The btree iterator tracks what locks it wants to take, and what locks it
-+ * currently has - here we have wrappers for locking/unlocking btree nodes and
-+ * updating the iterator state
-+ */
-+
-+#include <linux/six.h>
-+
-+#include "btree_iter.h"
-+
-+/* matches six lock types */
-+enum btree_node_locked_type {
-+	BTREE_NODE_UNLOCKED		= -1,
-+	BTREE_NODE_READ_LOCKED		= SIX_LOCK_read,
-+	BTREE_NODE_INTENT_LOCKED	= SIX_LOCK_intent,
-+};
-+
-+static inline int btree_node_locked_type(struct btree_iter *iter,
-+					 unsigned level)
-+{
-+	/*
-+	 * We're relying on the fact that if nodes_intent_locked is set
-+	 * nodes_locked must be set as well, so that we can compute without
-+	 * branches:
-+	 */
-+	return BTREE_NODE_UNLOCKED +
-+		((iter->nodes_locked >> level) & 1) +
-+		((iter->nodes_intent_locked >> level) & 1);
-+}
-+
-+static inline bool btree_node_intent_locked(struct btree_iter *iter,
-+					    unsigned level)
-+{
-+	return btree_node_locked_type(iter, level) == BTREE_NODE_INTENT_LOCKED;
-+}
-+
-+static inline bool btree_node_read_locked(struct btree_iter *iter,
-+					  unsigned level)
-+{
-+	return btree_node_locked_type(iter, level) == BTREE_NODE_READ_LOCKED;
-+}
-+
-+static inline bool btree_node_locked(struct btree_iter *iter, unsigned level)
-+{
-+	return iter->nodes_locked & (1 << level);
-+}
-+
-+static inline void mark_btree_node_unlocked(struct btree_iter *iter,
-+					    unsigned level)
-+{
-+	iter->nodes_locked &= ~(1 << level);
-+	iter->nodes_intent_locked &= ~(1 << level);
-+}
-+
-+static inline void mark_btree_node_locked(struct btree_iter *iter,
-+					  unsigned level,
-+					  enum six_lock_type type)
-+{
-+	/* relying on this to avoid a branch */
-+	BUILD_BUG_ON(SIX_LOCK_read   != 0);
-+	BUILD_BUG_ON(SIX_LOCK_intent != 1);
-+
-+	iter->nodes_locked |= 1 << level;
-+	iter->nodes_intent_locked |= type << level;
-+}
-+
-+static inline void mark_btree_node_intent_locked(struct btree_iter *iter,
-+						 unsigned level)
-+{
-+	mark_btree_node_locked(iter, level, SIX_LOCK_intent);
-+}
-+
-+static inline enum six_lock_type __btree_lock_want(struct btree_iter *iter, int level)
-+{
-+	return level < iter->locks_want
-+		? SIX_LOCK_intent
-+		: SIX_LOCK_read;
-+}
-+
-+static inline enum btree_node_locked_type
-+btree_lock_want(struct btree_iter *iter, int level)
-+{
-+	if (level < iter->level)
-+		return BTREE_NODE_UNLOCKED;
-+	if (level < iter->locks_want)
-+		return BTREE_NODE_INTENT_LOCKED;
-+	if (level == iter->level)
-+		return BTREE_NODE_READ_LOCKED;
-+	return BTREE_NODE_UNLOCKED;
-+}
-+
-+static inline void __btree_node_unlock(struct btree_iter *iter, unsigned level)
-+{
-+	int lock_type = btree_node_locked_type(iter, level);
-+
-+	EBUG_ON(level >= BTREE_MAX_DEPTH);
-+
-+	if (lock_type != BTREE_NODE_UNLOCKED)
-+		six_unlock_type(&iter->l[level].b->c.lock, lock_type);
-+	mark_btree_node_unlocked(iter, level);
-+}
-+
-+static inline void btree_node_unlock(struct btree_iter *iter, unsigned level)
-+{
-+	EBUG_ON(!level && iter->trans->nounlock);
-+
-+	__btree_node_unlock(iter, level);
-+}
-+
-+static inline void __bch2_btree_iter_unlock(struct btree_iter *iter)
-+{
-+	btree_iter_set_dirty(iter, BTREE_ITER_NEED_RELOCK);
-+
-+	while (iter->nodes_locked)
-+		btree_node_unlock(iter, __ffs(iter->nodes_locked));
-+}
-+
-+static inline enum bch_time_stats lock_to_time_stat(enum six_lock_type type)
-+{
-+	switch (type) {
-+	case SIX_LOCK_read:
-+		return BCH_TIME_btree_lock_contended_read;
-+	case SIX_LOCK_intent:
-+		return BCH_TIME_btree_lock_contended_intent;
-+	case SIX_LOCK_write:
-+		return BCH_TIME_btree_lock_contended_write;
-+	default:
-+		BUG();
-+	}
-+}
-+
-+/*
-+ * wrapper around six locks that just traces lock contended time
-+ */
-+static inline void __btree_node_lock_type(struct bch_fs *c, struct btree *b,
-+					  enum six_lock_type type)
-+{
-+	u64 start_time = local_clock();
-+
-+	six_lock_type(&b->c.lock, type, NULL, NULL);
-+	bch2_time_stats_update(&c->times[lock_to_time_stat(type)], start_time);
-+}
-+
-+static inline void btree_node_lock_type(struct bch_fs *c, struct btree *b,
-+					enum six_lock_type type)
-+{
-+	if (!six_trylock_type(&b->c.lock, type))
-+		__btree_node_lock_type(c, b, type);
-+}
-+
-+/*
-+ * Lock a btree node if we already have it locked on one of our linked
-+ * iterators:
-+ */
-+static inline bool btree_node_lock_increment(struct btree_trans *trans,
-+					     struct btree *b, unsigned level,
-+					     enum btree_node_locked_type want)
-+{
-+	struct btree_iter *iter;
-+
-+	trans_for_each_iter(trans, iter)
-+		if (iter->l[level].b == b &&
-+		    btree_node_locked_type(iter, level) >= want) {
-+			six_lock_increment(&b->c.lock, want);
-+			return true;
-+		}
-+
-+	return false;
-+}
-+
-+bool __bch2_btree_node_lock(struct btree *, struct bpos, unsigned,
-+			    struct btree_iter *, enum six_lock_type,
-+			    six_lock_should_sleep_fn, void *);
-+
-+static inline bool btree_node_lock(struct btree *b,
-+			struct bpos pos, unsigned level,
-+			struct btree_iter *iter,
-+			enum six_lock_type type,
-+			six_lock_should_sleep_fn should_sleep_fn, void *p)
-+{
-+	struct btree_trans *trans = iter->trans;
-+	bool ret;
-+
-+	EBUG_ON(level >= BTREE_MAX_DEPTH);
-+	EBUG_ON(!(trans->iters_linked & (1ULL << iter->idx)));
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	trans->locking		= b;
-+	trans->locking_iter_idx = iter->idx;
-+	trans->locking_pos	= pos;
-+	trans->locking_btree_id	= iter->btree_id;
-+	trans->locking_level	= level;
-+#endif
-+	ret   = likely(six_trylock_type(&b->c.lock, type)) ||
-+		btree_node_lock_increment(trans, b, level, type) ||
-+		__bch2_btree_node_lock(b, pos, level, iter, type,
-+				       should_sleep_fn, p);
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	trans->locking = NULL;
-+#endif
-+	return ret;
-+}
-+
-+bool __bch2_btree_node_relock(struct btree_iter *, unsigned);
-+
-+static inline bool bch2_btree_node_relock(struct btree_iter *iter,
-+					  unsigned level)
-+{
-+	EBUG_ON(btree_node_locked(iter, level) &&
-+		btree_node_locked_type(iter, level) !=
-+		__btree_lock_want(iter, level));
-+
-+	return likely(btree_node_locked(iter, level)) ||
-+		__bch2_btree_node_relock(iter, level);
-+}
-+
-+/*
-+ * Updates the saved lock sequence number, so that bch2_btree_node_relock() will
-+ * succeed:
-+ */
-+static inline void
-+bch2_btree_node_unlock_write_inlined(struct btree *b, struct btree_iter *iter)
-+{
-+	struct btree_iter *linked;
-+
-+	EBUG_ON(iter->l[b->c.level].b != b);
-+	EBUG_ON(iter->l[b->c.level].lock_seq + 1 != b->c.lock.state.seq);
-+
-+	trans_for_each_iter_with_node(iter->trans, b, linked)
-+		linked->l[b->c.level].lock_seq += 2;
-+
-+	six_unlock_write(&b->c.lock);
-+}
-+
-+void bch2_btree_node_unlock_write(struct btree *, struct btree_iter *);
-+
-+void __bch2_btree_node_lock_write(struct btree *, struct btree_iter *);
-+
-+static inline void bch2_btree_node_lock_write(struct btree *b, struct btree_iter *iter)
-+{
-+	EBUG_ON(iter->l[b->c.level].b != b);
-+	EBUG_ON(iter->l[b->c.level].lock_seq != b->c.lock.state.seq);
-+
-+	if (unlikely(!six_trylock_write(&b->c.lock)))
-+		__bch2_btree_node_lock_write(b, iter);
-+}
-+
-+#endif /* _BCACHEFS_BTREE_LOCKING_H */
-+
-+
-diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
-new file mode 100644
-index 000000000000..683b416ef427
---- /dev/null
-+++ b/fs/bcachefs/btree_types.h
-@@ -0,0 +1,664 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BTREE_TYPES_H
-+#define _BCACHEFS_BTREE_TYPES_H
-+
-+#include <linux/list.h>
-+#include <linux/rhashtable.h>
-+#include <linux/six.h>
-+
-+#include "bkey_methods.h"
-+#include "buckets_types.h"
-+#include "journal_types.h"
-+
-+struct open_bucket;
-+struct btree_update;
-+struct btree_trans;
-+
-+#define MAX_BSETS		3U
-+
-+struct btree_nr_keys {
-+
-+	/*
-+	 * Amount of live metadata (i.e. size of node after a compaction) in
-+	 * units of u64s
-+	 */
-+	u16			live_u64s;
-+	u16			bset_u64s[MAX_BSETS];
-+
-+	/* live keys only: */
-+	u16			packed_keys;
-+	u16			unpacked_keys;
-+};
-+
-+struct bset_tree {
-+	/*
-+	 * We construct a binary tree in an array as if the array
-+	 * started at 1, so that things line up on the same cachelines
-+	 * better: see comments in bset.c at cacheline_to_bkey() for
-+	 * details
-+	 */
-+
-+	/* size of the binary tree and prev array */
-+	u16			size;
-+
-+	/* function of size - precalculated for to_inorder() */
-+	u16			extra;
-+
-+	u16			data_offset;
-+	u16			aux_data_offset;
-+	u16			end_offset;
-+
-+	struct bpos		max_key;
-+};
-+
-+struct btree_write {
-+	struct journal_entry_pin	journal;
-+};
-+
-+struct btree_alloc {
-+	struct open_buckets	ob;
-+	BKEY_PADDED(k);
-+};
-+
-+struct btree_bkey_cached_common {
-+	struct six_lock		lock;
-+	u8			level;
-+	u8			btree_id;
-+};
-+
-+struct btree {
-+	struct btree_bkey_cached_common c;
-+
-+	struct rhash_head	hash;
-+	u64			hash_val;
-+
-+	unsigned long		flags;
-+	u16			written;
-+	u8			nsets;
-+	u8			nr_key_bits;
-+
-+	struct bkey_format	format;
-+
-+	struct btree_node	*data;
-+	void			*aux_data;
-+
-+	/*
-+	 * Sets of sorted keys - the real btree node - plus a binary search tree
-+	 *
-+	 * set[0] is special; set[0]->tree, set[0]->prev and set[0]->data point
-+	 * to the memory we have allocated for this btree node. Additionally,
-+	 * set[0]->data points to the entire btree node as it exists on disk.
-+	 */
-+	struct bset_tree	set[MAX_BSETS];
-+
-+	struct btree_nr_keys	nr;
-+	u16			sib_u64s[2];
-+	u16			whiteout_u64s;
-+	u8			byte_order;
-+	u8			unpack_fn_len;
-+
-+	/*
-+	 * XXX: add a delete sequence number, so when bch2_btree_node_relock()
-+	 * fails because the lock sequence number has changed - i.e. the
-+	 * contents were modified - we can still relock the node if it's still
-+	 * the one we want, without redoing the traversal
-+	 */
-+
-+	/*
-+	 * For asynchronous splits/interior node updates:
-+	 * When we do a split, we allocate new child nodes and update the parent
-+	 * node to point to them: we update the parent in memory immediately,
-+	 * but then we must wait until the children have been written out before
-+	 * the update to the parent can be written - this is a list of the
-+	 * btree_updates that are blocking this node from being
-+	 * written:
-+	 */
-+	struct list_head	write_blocked;
-+
-+	/*
-+	 * Also for asynchronous splits/interior node updates:
-+	 * If a btree node isn't reachable yet, we don't want to kick off
-+	 * another write - because that write also won't yet be reachable and
-+	 * marking it as completed before it's reachable would be incorrect:
-+	 */
-+	unsigned long		will_make_reachable;
-+
-+	struct open_buckets	ob;
-+
-+	/* lru list */
-+	struct list_head	list;
-+
-+	struct btree_write	writes[2];
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	bool			*expensive_debug_checks;
-+#endif
-+
-+	/* Key/pointer for this btree node */
-+	__BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
-+};
-+
-+struct btree_cache {
-+	struct rhashtable	table;
-+	bool			table_init_done;
-+	/*
-+	 * We never free a struct btree, except on shutdown - we just put it on
-+	 * the btree_cache_freed list and reuse it later. This simplifies the
-+	 * code, and it doesn't cost us much memory as the memory usage is
-+	 * dominated by buffers that hold the actual btree node data and those
-+	 * can be freed - and the number of struct btrees allocated is
-+	 * effectively bounded.
-+	 *
-+	 * btree_cache_freeable effectively is a small cache - we use it because
-+	 * high order page allocations can be rather expensive, and it's quite
-+	 * common to delete and allocate btree nodes in quick succession. It
-+	 * should never grow past ~2-3 nodes in practice.
-+	 */
-+	struct mutex		lock;
-+	struct list_head	live;
-+	struct list_head	freeable;
-+	struct list_head	freed;
-+
-+	/* Number of elements in live + freeable lists */
-+	unsigned		used;
-+	unsigned		reserve;
-+	struct shrinker		shrink;
-+
-+	/*
-+	 * If we need to allocate memory for a new btree node and that
-+	 * allocation fails, we can cannibalize another node in the btree cache
-+	 * to satisfy the allocation - lock to guarantee only one thread does
-+	 * this at a time:
-+	 */
-+	struct task_struct	*alloc_lock;
-+	struct closure_waitlist	alloc_wait;
-+};
-+
-+struct btree_node_iter {
-+	struct btree_node_iter_set {
-+		u16	k, end;
-+	} data[MAX_BSETS];
-+};
-+
-+enum btree_iter_type {
-+	BTREE_ITER_KEYS,
-+	BTREE_ITER_NODES,
-+	BTREE_ITER_CACHED,
-+};
-+
-+#define BTREE_ITER_TYPE			((1 << 2) - 1)
-+
-+/*
-+ * Iterate over all possible positions, synthesizing deleted keys for holes:
-+ */
-+#define BTREE_ITER_SLOTS		(1 << 2)
-+/*
-+ * Indicates that intent locks should be taken on leaf nodes, because we expect
-+ * to be doing updates:
-+ */
-+#define BTREE_ITER_INTENT		(1 << 3)
-+/*
-+ * Causes the btree iterator code to prefetch additional btree nodes from disk:
-+ */
-+#define BTREE_ITER_PREFETCH		(1 << 4)
-+/*
-+ * Indicates that this iterator should not be reused until transaction commit,
-+ * either because a pending update references it or because the update depends
-+ * on that particular key being locked (e.g. by the str_hash code, for hash
-+ * table consistency)
-+ */
-+#define BTREE_ITER_KEEP_UNTIL_COMMIT	(1 << 5)
-+/*
-+ * Used in bch2_btree_iter_traverse(), to indicate whether we're searching for
-+ * @pos or the first key strictly greater than @pos
-+ */
-+#define BTREE_ITER_IS_EXTENTS		(1 << 6)
-+#define BTREE_ITER_ERROR		(1 << 7)
-+#define BTREE_ITER_SET_POS_AFTER_COMMIT	(1 << 8)
-+#define BTREE_ITER_CACHED_NOFILL	(1 << 9)
-+#define BTREE_ITER_CACHED_NOCREATE	(1 << 10)
-+
-+#define BTREE_ITER_USER_FLAGS				\
-+	(BTREE_ITER_SLOTS				\
-+	|BTREE_ITER_INTENT				\
-+	|BTREE_ITER_PREFETCH				\
-+	|BTREE_ITER_CACHED_NOFILL			\
-+	|BTREE_ITER_CACHED_NOCREATE)
-+
-+enum btree_iter_uptodate {
-+	BTREE_ITER_UPTODATE		= 0,
-+	BTREE_ITER_NEED_PEEK		= 1,
-+	BTREE_ITER_NEED_RELOCK		= 2,
-+	BTREE_ITER_NEED_TRAVERSE	= 3,
-+};
-+
-+#define BTREE_ITER_NO_NODE_GET_LOCKS	((struct btree *) 1)
-+#define BTREE_ITER_NO_NODE_DROP		((struct btree *) 2)
-+#define BTREE_ITER_NO_NODE_LOCK_ROOT	((struct btree *) 3)
-+#define BTREE_ITER_NO_NODE_UP		((struct btree *) 4)
-+#define BTREE_ITER_NO_NODE_DOWN		((struct btree *) 5)
-+#define BTREE_ITER_NO_NODE_INIT		((struct btree *) 6)
-+#define BTREE_ITER_NO_NODE_ERROR	((struct btree *) 7)
-+
-+/*
-+ * @pos			- iterator's current position
-+ * @level		- current btree depth
-+ * @locks_want		- btree level below which we start taking intent locks
-+ * @nodes_locked	- bitmask indicating which nodes in @nodes are locked
-+ * @nodes_intent_locked	- bitmask indicating which locks are intent locks
-+ */
-+struct btree_iter {
-+	struct btree_trans	*trans;
-+	struct bpos		pos;
-+	struct bpos		pos_after_commit;
-+
-+	u16			flags;
-+	u8			idx;
-+
-+	enum btree_id		btree_id:4;
-+	enum btree_iter_uptodate uptodate:4;
-+	unsigned		level:4,
-+				min_depth:4,
-+				locks_want:4,
-+				nodes_locked:4,
-+				nodes_intent_locked:4;
-+
-+	struct btree_iter_level {
-+		struct btree	*b;
-+		struct btree_node_iter iter;
-+		u32		lock_seq;
-+	}			l[BTREE_MAX_DEPTH];
-+
-+	/*
-+	 * Current unpacked key - so that bch2_btree_iter_next()/
-+	 * bch2_btree_iter_next_slot() can correctly advance pos.
-+	 */
-+	struct bkey		k;
-+	unsigned long		ip_allocated;
-+};
-+
-+static inline enum btree_iter_type
-+btree_iter_type(const struct btree_iter *iter)
-+{
-+	return iter->flags & BTREE_ITER_TYPE;
-+}
-+
-+static inline struct btree_iter_level *iter_l(struct btree_iter *iter)
-+{
-+	return iter->l + iter->level;
-+}
-+
-+struct btree_key_cache {
-+	struct mutex		lock;
-+	struct rhashtable	table;
-+	struct list_head	freed;
-+	struct list_head	clean;
-+};
-+
-+struct bkey_cached_key {
-+	u32			btree_id;
-+	struct bpos		pos;
-+} __attribute__((packed, aligned(4)));
-+
-+#define BKEY_CACHED_DIRTY		0
-+
-+struct bkey_cached {
-+	struct btree_bkey_cached_common c;
-+
-+	unsigned long		flags;
-+	u8			u64s;
-+	bool			valid;
-+	struct bkey_cached_key	key;
-+
-+	struct rhash_head	hash;
-+	struct list_head	list;
-+
-+	struct journal_preres	res;
-+	struct journal_entry_pin journal;
-+
-+	struct bkey_i		*k;
-+};
-+
-+struct btree_insert_entry {
-+	unsigned		trigger_flags;
-+	unsigned		trans_triggers_run:1;
-+	struct bkey_i		*k;
-+	struct btree_iter	*iter;
-+};
-+
-+#ifndef CONFIG_LOCKDEP
-+#define BTREE_ITER_MAX		64
-+#else
-+#define BTREE_ITER_MAX		32
-+#endif
-+
-+struct btree_trans {
-+	struct bch_fs		*c;
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	struct list_head	list;
-+	struct btree		*locking;
-+	unsigned		locking_iter_idx;
-+	struct bpos		locking_pos;
-+	u8			locking_btree_id;
-+	u8			locking_level;
-+	pid_t			pid;
-+#endif
-+	unsigned long		ip;
-+
-+	u64			iters_linked;
-+	u64			iters_live;
-+	u64			iters_touched;
-+
-+	u8			nr_iters;
-+	u8			nr_updates;
-+	u8			nr_updates2;
-+	u8			size;
-+	unsigned		used_mempool:1;
-+	unsigned		error:1;
-+	unsigned		nounlock:1;
-+	unsigned		need_reset:1;
-+	unsigned		in_traverse_all:1;
-+
-+	unsigned		mem_top;
-+	unsigned		mem_bytes;
-+	void			*mem;
-+
-+	struct btree_iter	*iters;
-+	struct btree_insert_entry *updates;
-+	struct btree_insert_entry *updates2;
-+
-+	/* update path: */
-+	struct jset_entry	*extra_journal_entries;
-+	unsigned		extra_journal_entry_u64s;
-+	struct journal_entry_pin *journal_pin;
-+
-+	struct journal_res	journal_res;
-+	struct journal_preres	journal_preres;
-+	u64			*journal_seq;
-+	struct disk_reservation *disk_res;
-+	unsigned		flags;
-+	unsigned		journal_u64s;
-+	unsigned		journal_preres_u64s;
-+	struct replicas_delta_list *fs_usage_deltas;
-+
-+	struct btree_iter	iters_onstack[2];
-+	struct btree_insert_entry updates_onstack[2];
-+	struct btree_insert_entry updates2_onstack[2];
-+};
-+
-+#define BTREE_FLAG(flag)						\
-+static inline bool btree_node_ ## flag(struct btree *b)			\
-+{	return test_bit(BTREE_NODE_ ## flag, &b->flags); }		\
-+									\
-+static inline void set_btree_node_ ## flag(struct btree *b)		\
-+{	set_bit(BTREE_NODE_ ## flag, &b->flags); }			\
-+									\
-+static inline void clear_btree_node_ ## flag(struct btree *b)		\
-+{	clear_bit(BTREE_NODE_ ## flag, &b->flags); }
-+
-+enum btree_flags {
-+	BTREE_NODE_read_in_flight,
-+	BTREE_NODE_read_error,
-+	BTREE_NODE_dirty,
-+	BTREE_NODE_need_write,
-+	BTREE_NODE_noevict,
-+	BTREE_NODE_write_idx,
-+	BTREE_NODE_accessed,
-+	BTREE_NODE_write_in_flight,
-+	BTREE_NODE_just_written,
-+	BTREE_NODE_dying,
-+	BTREE_NODE_fake,
-+	BTREE_NODE_old_extent_overwrite,
-+	BTREE_NODE_need_rewrite,
-+};
-+
-+BTREE_FLAG(read_in_flight);
-+BTREE_FLAG(read_error);
-+BTREE_FLAG(dirty);
-+BTREE_FLAG(need_write);
-+BTREE_FLAG(noevict);
-+BTREE_FLAG(write_idx);
-+BTREE_FLAG(accessed);
-+BTREE_FLAG(write_in_flight);
-+BTREE_FLAG(just_written);
-+BTREE_FLAG(dying);
-+BTREE_FLAG(fake);
-+BTREE_FLAG(old_extent_overwrite);
-+BTREE_FLAG(need_rewrite);
-+
-+static inline struct btree_write *btree_current_write(struct btree *b)
-+{
-+	return b->writes + btree_node_write_idx(b);
-+}
-+
-+static inline struct btree_write *btree_prev_write(struct btree *b)
-+{
-+	return b->writes + (btree_node_write_idx(b) ^ 1);
-+}
-+
-+static inline struct bset_tree *bset_tree_last(struct btree *b)
-+{
-+	EBUG_ON(!b->nsets);
-+	return b->set + b->nsets - 1;
-+}
-+
-+static inline void *
-+__btree_node_offset_to_ptr(const struct btree *b, u16 offset)
-+{
-+	return (void *) ((u64 *) b->data + 1 + offset);
-+}
-+
-+static inline u16
-+__btree_node_ptr_to_offset(const struct btree *b, const void *p)
-+{
-+	u16 ret = (u64 *) p - 1 - (u64 *) b->data;
-+
-+	EBUG_ON(__btree_node_offset_to_ptr(b, ret) != p);
-+	return ret;
-+}
-+
-+static inline struct bset *bset(const struct btree *b,
-+				const struct bset_tree *t)
-+{
-+	return __btree_node_offset_to_ptr(b, t->data_offset);
-+}
-+
-+static inline void set_btree_bset_end(struct btree *b, struct bset_tree *t)
-+{
-+	t->end_offset =
-+		__btree_node_ptr_to_offset(b, vstruct_last(bset(b, t)));
-+}
-+
-+static inline void set_btree_bset(struct btree *b, struct bset_tree *t,
-+				  const struct bset *i)
-+{
-+	t->data_offset = __btree_node_ptr_to_offset(b, i);
-+	set_btree_bset_end(b, t);
-+}
-+
-+static inline struct bset *btree_bset_first(struct btree *b)
-+{
-+	return bset(b, b->set);
-+}
-+
-+static inline struct bset *btree_bset_last(struct btree *b)
-+{
-+	return bset(b, bset_tree_last(b));
-+}
-+
-+static inline u16
-+__btree_node_key_to_offset(const struct btree *b, const struct bkey_packed *k)
-+{
-+	return __btree_node_ptr_to_offset(b, k);
-+}
-+
-+static inline struct bkey_packed *
-+__btree_node_offset_to_key(const struct btree *b, u16 k)
-+{
-+	return __btree_node_offset_to_ptr(b, k);
-+}
-+
-+static inline unsigned btree_bkey_first_offset(const struct bset_tree *t)
-+{
-+	return t->data_offset + offsetof(struct bset, _data) / sizeof(u64);
-+}
-+
-+#define btree_bkey_first(_b, _t)					\
-+({									\
-+	EBUG_ON(bset(_b, _t)->start !=					\
-+		__btree_node_offset_to_key(_b, btree_bkey_first_offset(_t)));\
-+									\
-+	bset(_b, _t)->start;						\
-+})
-+
-+#define btree_bkey_last(_b, _t)						\
-+({									\
-+	EBUG_ON(__btree_node_offset_to_key(_b, (_t)->end_offset) !=	\
-+		vstruct_last(bset(_b, _t)));				\
-+									\
-+	__btree_node_offset_to_key(_b, (_t)->end_offset);		\
-+})
-+
-+static inline unsigned bset_u64s(struct bset_tree *t)
-+{
-+	return t->end_offset - t->data_offset -
-+		sizeof(struct bset) / sizeof(u64);
-+}
-+
-+static inline unsigned bset_dead_u64s(struct btree *b, struct bset_tree *t)
-+{
-+	return bset_u64s(t) - b->nr.bset_u64s[t - b->set];
-+}
-+
-+static inline unsigned bset_byte_offset(struct btree *b, void *i)
-+{
-+	return i - (void *) b->data;
-+}
-+
-+enum btree_node_type {
-+#define x(kwd, val, name) BKEY_TYPE_##kwd = val,
-+	BCH_BTREE_IDS()
-+#undef x
-+	BKEY_TYPE_BTREE,
-+};
-+
-+/* Type of a key in btree @id at level @level: */
-+static inline enum btree_node_type __btree_node_type(unsigned level, enum btree_id id)
-+{
-+	return level ? BKEY_TYPE_BTREE : (enum btree_node_type) id;
-+}
-+
-+/* Type of keys @b contains: */
-+static inline enum btree_node_type btree_node_type(struct btree *b)
-+{
-+	return __btree_node_type(b->c.level, b->c.btree_id);
-+}
-+
-+static inline bool btree_node_type_is_extents(enum btree_node_type type)
-+{
-+	switch (type) {
-+	case BKEY_TYPE_EXTENTS:
-+	case BKEY_TYPE_REFLINK:
-+		return true;
-+	default:
-+		return false;
-+	}
-+}
-+
-+static inline bool btree_node_is_extents(struct btree *b)
-+{
-+	return btree_node_type_is_extents(btree_node_type(b));
-+}
-+
-+static inline enum btree_node_type btree_iter_key_type(struct btree_iter *iter)
-+{
-+	return __btree_node_type(iter->level, iter->btree_id);
-+}
-+
-+static inline bool btree_iter_is_extents(struct btree_iter *iter)
-+{
-+	return btree_node_type_is_extents(btree_iter_key_type(iter));
-+}
-+
-+#define BTREE_NODE_TYPE_HAS_TRIGGERS			\
-+	((1U << BKEY_TYPE_EXTENTS)|			\
-+	 (1U << BKEY_TYPE_ALLOC)|			\
-+	 (1U << BKEY_TYPE_INODES)|			\
-+	 (1U << BKEY_TYPE_REFLINK)|			\
-+	 (1U << BKEY_TYPE_EC)|				\
-+	 (1U << BKEY_TYPE_BTREE))
-+
-+#define BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS		\
-+	((1U << BKEY_TYPE_EXTENTS)|			\
-+	 (1U << BKEY_TYPE_INODES)|			\
-+	 (1U << BKEY_TYPE_REFLINK))
-+
-+enum btree_trigger_flags {
-+	__BTREE_TRIGGER_NORUN,		/* Don't run triggers at all */
-+
-+	__BTREE_TRIGGER_INSERT,
-+	__BTREE_TRIGGER_OVERWRITE,
-+	__BTREE_TRIGGER_OVERWRITE_SPLIT,
-+
-+	__BTREE_TRIGGER_GC,
-+	__BTREE_TRIGGER_BUCKET_INVALIDATE,
-+	__BTREE_TRIGGER_ALLOC_READ,
-+	__BTREE_TRIGGER_NOATOMIC,
-+};
-+
-+#define BTREE_TRIGGER_NORUN		(1U << __BTREE_TRIGGER_NORUN)
-+
-+#define BTREE_TRIGGER_INSERT		(1U << __BTREE_TRIGGER_INSERT)
-+#define BTREE_TRIGGER_OVERWRITE		(1U << __BTREE_TRIGGER_OVERWRITE)
-+#define BTREE_TRIGGER_OVERWRITE_SPLIT	(1U << __BTREE_TRIGGER_OVERWRITE_SPLIT)
-+
-+#define BTREE_TRIGGER_GC		(1U << __BTREE_TRIGGER_GC)
-+#define BTREE_TRIGGER_BUCKET_INVALIDATE	(1U << __BTREE_TRIGGER_BUCKET_INVALIDATE)
-+#define BTREE_TRIGGER_ALLOC_READ	(1U << __BTREE_TRIGGER_ALLOC_READ)
-+#define BTREE_TRIGGER_NOATOMIC		(1U << __BTREE_TRIGGER_NOATOMIC)
-+
-+static inline bool btree_node_type_needs_gc(enum btree_node_type type)
-+{
-+	return BTREE_NODE_TYPE_HAS_TRIGGERS & (1U << type);
-+}
-+
-+struct btree_root {
-+	struct btree		*b;
-+
-+	/* On disk root - see async splits: */
-+	__BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
-+	u8			level;
-+	u8			alive;
-+	s8			error;
-+};
-+
-+/*
-+ * Optional hook that will be called just prior to a btree node update, when
-+ * we're holding the write lock and we know what key is about to be overwritten:
-+ */
-+
-+enum btree_insert_ret {
-+	BTREE_INSERT_OK,
-+	/* leaf node needs to be split */
-+	BTREE_INSERT_BTREE_NODE_FULL,
-+	BTREE_INSERT_ENOSPC,
-+	BTREE_INSERT_NEED_MARK_REPLICAS,
-+	BTREE_INSERT_NEED_JOURNAL_RES,
-+};
-+
-+enum btree_gc_coalesce_fail_reason {
-+	BTREE_GC_COALESCE_FAIL_RESERVE_GET,
-+	BTREE_GC_COALESCE_FAIL_KEYLIST_REALLOC,
-+	BTREE_GC_COALESCE_FAIL_FORMAT_FITS,
-+};
-+
-+enum btree_node_sibling {
-+	btree_prev_sib,
-+	btree_next_sib,
-+};
-+
-+typedef struct btree_nr_keys (*sort_fix_overlapping_fn)(struct bset *,
-+							struct btree *,
-+							struct btree_node_iter *);
-+
-+#endif /* _BCACHEFS_BTREE_TYPES_H */
-diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h
-new file mode 100644
-index 000000000000..e0b1bde37484
---- /dev/null
-+++ b/fs/bcachefs/btree_update.h
-@@ -0,0 +1,144 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BTREE_UPDATE_H
-+#define _BCACHEFS_BTREE_UPDATE_H
-+
-+#include "btree_iter.h"
-+#include "journal.h"
-+
-+struct bch_fs;
-+struct btree;
-+
-+void bch2_btree_node_lock_for_insert(struct bch_fs *, struct btree *,
-+				     struct btree_iter *);
-+bool bch2_btree_bset_insert_key(struct btree_iter *, struct btree *,
-+				struct btree_node_iter *, struct bkey_i *);
-+void bch2_btree_add_journal_pin(struct bch_fs *, struct btree *, u64);
-+
-+enum btree_insert_flags {
-+	__BTREE_INSERT_NOUNLOCK,
-+	__BTREE_INSERT_NOFAIL,
-+	__BTREE_INSERT_NOCHECK_RW,
-+	__BTREE_INSERT_LAZY_RW,
-+	__BTREE_INSERT_USE_RESERVE,
-+	__BTREE_INSERT_USE_ALLOC_RESERVE,
-+	__BTREE_INSERT_JOURNAL_REPLAY,
-+	__BTREE_INSERT_JOURNAL_RESERVED,
-+	__BTREE_INSERT_JOURNAL_RECLAIM,
-+	__BTREE_INSERT_NOWAIT,
-+	__BTREE_INSERT_GC_LOCK_HELD,
-+	__BCH_HASH_SET_MUST_CREATE,
-+	__BCH_HASH_SET_MUST_REPLACE,
-+};
-+
-+/*
-+ * Don't drop locks _after_ successfully updating btree:
-+ */
-+#define BTREE_INSERT_NOUNLOCK		(1 << __BTREE_INSERT_NOUNLOCK)
-+
-+/* Don't check for -ENOSPC: */
-+#define BTREE_INSERT_NOFAIL		(1 << __BTREE_INSERT_NOFAIL)
-+
-+#define BTREE_INSERT_NOCHECK_RW		(1 << __BTREE_INSERT_NOCHECK_RW)
-+#define BTREE_INSERT_LAZY_RW		(1 << __BTREE_INSERT_LAZY_RW)
-+
-+/* for copygc, or when merging btree nodes */
-+#define BTREE_INSERT_USE_RESERVE	(1 << __BTREE_INSERT_USE_RESERVE)
-+#define BTREE_INSERT_USE_ALLOC_RESERVE	(1 << __BTREE_INSERT_USE_ALLOC_RESERVE)
-+
-+/* Insert is for journal replay - don't get journal reservations: */
-+#define BTREE_INSERT_JOURNAL_REPLAY	(1 << __BTREE_INSERT_JOURNAL_REPLAY)
-+
-+/* Indicates that we have pre-reserved space in the journal: */
-+#define BTREE_INSERT_JOURNAL_RESERVED	(1 << __BTREE_INSERT_JOURNAL_RESERVED)
-+
-+/* Insert is being called from journal reclaim path: */
-+#define BTREE_INSERT_JOURNAL_RECLAIM (1 << __BTREE_INSERT_JOURNAL_RECLAIM)
-+
-+/* Don't block on allocation failure (for new btree nodes: */
-+#define BTREE_INSERT_NOWAIT		(1 << __BTREE_INSERT_NOWAIT)
-+#define BTREE_INSERT_GC_LOCK_HELD	(1 << __BTREE_INSERT_GC_LOCK_HELD)
-+
-+#define BCH_HASH_SET_MUST_CREATE	(1 << __BCH_HASH_SET_MUST_CREATE)
-+#define BCH_HASH_SET_MUST_REPLACE	(1 << __BCH_HASH_SET_MUST_REPLACE)
-+
-+int bch2_btree_delete_at(struct btree_trans *, struct btree_iter *, unsigned);
-+
-+int __bch2_btree_insert(struct btree_trans *, enum btree_id, struct bkey_i *);
-+int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *,
-+		     struct disk_reservation *, u64 *, int flags);
-+
-+int bch2_btree_delete_at_range(struct btree_trans *, struct btree_iter *,
-+			       struct bpos, u64 *);
-+int bch2_btree_delete_range(struct bch_fs *, enum btree_id,
-+			    struct bpos, struct bpos, u64 *);
-+
-+int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *,
-+			    __le64, unsigned);
-+int bch2_btree_node_update_key(struct bch_fs *, struct btree_iter *,
-+			       struct btree *, struct bkey_i *);
-+
-+int bch2_trans_update(struct btree_trans *, struct btree_iter *,
-+		      struct bkey_i *, enum btree_trigger_flags);
-+int __bch2_trans_commit(struct btree_trans *);
-+
-+/**
-+ * bch2_trans_commit - insert keys at given iterator positions
-+ *
-+ * This is main entry point for btree updates.
-+ *
-+ * Return values:
-+ * -EINTR: locking changed, this function should be called again.
-+ * -EROFS: filesystem read only
-+ * -EIO: journal or btree node IO error
-+ */
-+static inline int bch2_trans_commit(struct btree_trans *trans,
-+				    struct disk_reservation *disk_res,
-+				    u64 *journal_seq,
-+				    unsigned flags)
-+{
-+	trans->disk_res		= disk_res;
-+	trans->journal_seq	= journal_seq;
-+	trans->flags		= flags;
-+
-+	return __bch2_trans_commit(trans);
-+}
-+
-+#define __bch2_trans_do(_trans, _disk_res, _journal_seq, _flags, _do)	\
-+({									\
-+	int _ret;							\
-+									\
-+	while (1) {							\
-+		_ret = (_do) ?:	bch2_trans_commit(_trans, (_disk_res),	\
-+					(_journal_seq), (_flags));	\
-+		if (_ret != -EINTR)					\
-+			break;						\
-+		bch2_trans_reset(_trans, 0);				\
-+	}								\
-+									\
-+	_ret;								\
-+})
-+
-+#define bch2_trans_do(_c, _disk_res, _journal_seq, _flags, _do)		\
-+({									\
-+	struct btree_trans trans;					\
-+	int _ret, _ret2;						\
-+									\
-+	bch2_trans_init(&trans, (_c), 0, 0);				\
-+	_ret = __bch2_trans_do(&trans, _disk_res, _journal_seq, _flags,	\
-+			       _do);					\
-+	_ret2 = bch2_trans_exit(&trans);				\
-+									\
-+	_ret ?: _ret2;							\
-+})
-+
-+#define trans_for_each_update(_trans, _i)				\
-+	for ((_i) = (_trans)->updates;					\
-+	     (_i) < (_trans)->updates + (_trans)->nr_updates;		\
-+	     (_i)++)
-+
-+#define trans_for_each_update2(_trans, _i)				\
-+	for ((_i) = (_trans)->updates2;					\
-+	     (_i) < (_trans)->updates2 + (_trans)->nr_updates2;		\
-+	     (_i)++)
-+
-+#endif /* _BCACHEFS_BTREE_UPDATE_H */
-diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
-new file mode 100644
-index 000000000000..a2604b0ce2d8
---- /dev/null
-+++ b/fs/bcachefs/btree_update_interior.c
-@@ -0,0 +1,2075 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "alloc_foreground.h"
-+#include "bkey_methods.h"
-+#include "btree_cache.h"
-+#include "btree_gc.h"
-+#include "btree_update.h"
-+#include "btree_update_interior.h"
-+#include "btree_io.h"
-+#include "btree_iter.h"
-+#include "btree_locking.h"
-+#include "buckets.h"
-+#include "extents.h"
-+#include "journal.h"
-+#include "journal_reclaim.h"
-+#include "keylist.h"
-+#include "replicas.h"
-+#include "super-io.h"
-+
-+#include <linux/random.h>
-+#include <trace/events/bcachefs.h>
-+
-+/* Debug code: */
-+
-+/*
-+ * Verify that child nodes correctly span parent node's range:
-+ */
-+static void btree_node_interior_verify(struct bch_fs *c, struct btree *b)
-+{
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	struct bpos next_node = b->data->min_key;
-+	struct btree_node_iter iter;
-+	struct bkey_s_c k;
-+	struct bkey_s_c_btree_ptr_v2 bp;
-+	struct bkey unpacked;
-+
-+	BUG_ON(!b->c.level);
-+
-+	if (!test_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags))
-+		return;
-+
-+	bch2_btree_node_iter_init_from_start(&iter, b);
-+
-+	while (1) {
-+		k = bch2_btree_node_iter_peek_unpack(&iter, b, &unpacked);
-+		if (k.k->type != KEY_TYPE_btree_ptr_v2)
-+			break;
-+		bp = bkey_s_c_to_btree_ptr_v2(k);
-+
-+		BUG_ON(bkey_cmp(next_node, bp.v->min_key));
-+
-+		bch2_btree_node_iter_advance(&iter, b);
-+
-+		if (bch2_btree_node_iter_end(&iter)) {
-+			BUG_ON(bkey_cmp(k.k->p, b->key.k.p));
-+			break;
-+		}
-+
-+		next_node = bkey_successor(k.k->p);
-+	}
-+#endif
-+}
-+
-+/* Calculate ideal packed bkey format for new btree nodes: */
-+
-+void __bch2_btree_calc_format(struct bkey_format_state *s, struct btree *b)
-+{
-+	struct bkey_packed *k;
-+	struct bset_tree *t;
-+	struct bkey uk;
-+
-+	bch2_bkey_format_add_pos(s, b->data->min_key);
-+
-+	for_each_bset(b, t)
-+		bset_tree_for_each_key(b, t, k)
-+			if (!bkey_whiteout(k)) {
-+				uk = bkey_unpack_key(b, k);
-+				bch2_bkey_format_add_key(s, &uk);
-+			}
-+}
-+
-+static struct bkey_format bch2_btree_calc_format(struct btree *b)
-+{
-+	struct bkey_format_state s;
-+
-+	bch2_bkey_format_init(&s);
-+	__bch2_btree_calc_format(&s, b);
-+
-+	return bch2_bkey_format_done(&s);
-+}
-+
-+static size_t btree_node_u64s_with_format(struct btree *b,
-+					  struct bkey_format *new_f)
-+{
-+	struct bkey_format *old_f = &b->format;
-+
-+	/* stupid integer promotion rules */
-+	ssize_t delta =
-+	    (((int) new_f->key_u64s - old_f->key_u64s) *
-+	     (int) b->nr.packed_keys) +
-+	    (((int) new_f->key_u64s - BKEY_U64s) *
-+	     (int) b->nr.unpacked_keys);
-+
-+	BUG_ON(delta + b->nr.live_u64s < 0);
-+
-+	return b->nr.live_u64s + delta;
-+}
-+
-+/**
-+ * btree_node_format_fits - check if we could rewrite node with a new format
-+ *
-+ * This assumes all keys can pack with the new format -- it just checks if
-+ * the re-packed keys would fit inside the node itself.
-+ */
-+bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *b,
-+				 struct bkey_format *new_f)
-+{
-+	size_t u64s = btree_node_u64s_with_format(b, new_f);
-+
-+	return __vstruct_bytes(struct btree_node, u64s) < btree_bytes(c);
-+}
-+
-+/* Btree node freeing/allocation: */
-+
-+static void __btree_node_free(struct bch_fs *c, struct btree *b)
-+{
-+	trace_btree_node_free(c, b);
-+
-+	BUG_ON(btree_node_dirty(b));
-+	BUG_ON(btree_node_need_write(b));
-+	BUG_ON(b == btree_node_root(c, b));
-+	BUG_ON(b->ob.nr);
-+	BUG_ON(!list_empty(&b->write_blocked));
-+	BUG_ON(b->will_make_reachable);
-+
-+	clear_btree_node_noevict(b);
-+
-+	bch2_btree_node_hash_remove(&c->btree_cache, b);
-+
-+	mutex_lock(&c->btree_cache.lock);
-+	list_move(&b->list, &c->btree_cache.freeable);
-+	mutex_unlock(&c->btree_cache.lock);
-+}
-+
-+void bch2_btree_node_free_never_inserted(struct bch_fs *c, struct btree *b)
-+{
-+	struct open_buckets ob = b->ob;
-+
-+	b->ob.nr = 0;
-+
-+	clear_btree_node_dirty(b);
-+
-+	btree_node_lock_type(c, b, SIX_LOCK_write);
-+	__btree_node_free(c, b);
-+	six_unlock_write(&b->c.lock);
-+
-+	bch2_open_buckets_put(c, &ob);
-+}
-+
-+void bch2_btree_node_free_inmem(struct bch_fs *c, struct btree *b,
-+				struct btree_iter *iter)
-+{
-+	struct btree_iter *linked;
-+
-+	trans_for_each_iter(iter->trans, linked)
-+		BUG_ON(linked->l[b->c.level].b == b);
-+
-+	six_lock_write(&b->c.lock, NULL, NULL);
-+	__btree_node_free(c, b);
-+	six_unlock_write(&b->c.lock);
-+	six_unlock_intent(&b->c.lock);
-+}
-+
-+static struct btree *__bch2_btree_node_alloc(struct bch_fs *c,
-+					     struct disk_reservation *res,
-+					     struct closure *cl,
-+					     unsigned flags)
-+{
-+	struct write_point *wp;
-+	struct btree *b;
-+	BKEY_PADDED(k) tmp;
-+	struct open_buckets ob = { .nr = 0 };
-+	struct bch_devs_list devs_have = (struct bch_devs_list) { 0 };
-+	unsigned nr_reserve;
-+	enum alloc_reserve alloc_reserve;
-+
-+	if (flags & BTREE_INSERT_USE_ALLOC_RESERVE) {
-+		nr_reserve	= 0;
-+		alloc_reserve	= RESERVE_ALLOC;
-+	} else if (flags & BTREE_INSERT_USE_RESERVE) {
-+		nr_reserve	= BTREE_NODE_RESERVE / 2;
-+		alloc_reserve	= RESERVE_BTREE;
-+	} else {
-+		nr_reserve	= BTREE_NODE_RESERVE;
-+		alloc_reserve	= RESERVE_NONE;
-+	}
-+
-+	mutex_lock(&c->btree_reserve_cache_lock);
-+	if (c->btree_reserve_cache_nr > nr_reserve) {
-+		struct btree_alloc *a =
-+			&c->btree_reserve_cache[--c->btree_reserve_cache_nr];
-+
-+		ob = a->ob;
-+		bkey_copy(&tmp.k, &a->k);
-+		mutex_unlock(&c->btree_reserve_cache_lock);
-+		goto mem_alloc;
-+	}
-+	mutex_unlock(&c->btree_reserve_cache_lock);
-+
-+retry:
-+	wp = bch2_alloc_sectors_start(c, c->opts.foreground_target, 0,
-+				      writepoint_ptr(&c->btree_write_point),
-+				      &devs_have,
-+				      res->nr_replicas,
-+				      c->opts.metadata_replicas_required,
-+				      alloc_reserve, 0, cl);
-+	if (IS_ERR(wp))
-+		return ERR_CAST(wp);
-+
-+	if (wp->sectors_free < c->opts.btree_node_size) {
-+		struct open_bucket *ob;
-+		unsigned i;
-+
-+		open_bucket_for_each(c, &wp->ptrs, ob, i)
-+			if (ob->sectors_free < c->opts.btree_node_size)
-+				ob->sectors_free = 0;
-+
-+		bch2_alloc_sectors_done(c, wp);
-+		goto retry;
-+	}
-+
-+	if (c->sb.features & (1ULL << BCH_FEATURE_btree_ptr_v2))
-+		bkey_btree_ptr_v2_init(&tmp.k);
-+	else
-+		bkey_btree_ptr_init(&tmp.k);
-+
-+	bch2_alloc_sectors_append_ptrs(c, wp, &tmp.k, c->opts.btree_node_size);
-+
-+	bch2_open_bucket_get(c, wp, &ob);
-+	bch2_alloc_sectors_done(c, wp);
-+mem_alloc:
-+	b = bch2_btree_node_mem_alloc(c);
-+
-+	/* we hold cannibalize_lock: */
-+	BUG_ON(IS_ERR(b));
-+	BUG_ON(b->ob.nr);
-+
-+	bkey_copy(&b->key, &tmp.k);
-+	b->ob = ob;
-+
-+	return b;
-+}
-+
-+static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned level)
-+{
-+	struct bch_fs *c = as->c;
-+	struct btree *b;
-+	int ret;
-+
-+	BUG_ON(level >= BTREE_MAX_DEPTH);
-+	BUG_ON(!as->nr_prealloc_nodes);
-+
-+	b = as->prealloc_nodes[--as->nr_prealloc_nodes];
-+
-+	set_btree_node_accessed(b);
-+	set_btree_node_dirty(b);
-+	set_btree_node_need_write(b);
-+
-+	bch2_bset_init_first(b, &b->data->keys);
-+	b->c.level	= level;
-+	b->c.btree_id	= as->btree_id;
-+
-+	memset(&b->nr, 0, sizeof(b->nr));
-+	b->data->magic = cpu_to_le64(bset_magic(c));
-+	b->data->flags = 0;
-+	SET_BTREE_NODE_ID(b->data, as->btree_id);
-+	SET_BTREE_NODE_LEVEL(b->data, level);
-+	b->data->ptr = bch2_bkey_ptrs_c(bkey_i_to_s_c(&b->key)).start->ptr;
-+
-+	if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
-+		struct bkey_i_btree_ptr_v2 *bp = bkey_i_to_btree_ptr_v2(&b->key);
-+
-+		bp->v.mem_ptr		= 0;
-+		bp->v.seq		= b->data->keys.seq;
-+		bp->v.sectors_written	= 0;
-+		bp->v.sectors		= cpu_to_le16(c->opts.btree_node_size);
-+	}
-+
-+	if (c->sb.features & (1ULL << BCH_FEATURE_new_extent_overwrite))
-+		SET_BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data, true);
-+
-+	if (btree_node_is_extents(b) &&
-+	    !BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data)) {
-+		set_btree_node_old_extent_overwrite(b);
-+		set_btree_node_need_rewrite(b);
-+	}
-+
-+	bch2_btree_build_aux_trees(b);
-+
-+	ret = bch2_btree_node_hash_insert(&c->btree_cache, b, level, as->btree_id);
-+	BUG_ON(ret);
-+
-+	trace_btree_node_alloc(c, b);
-+	return b;
-+}
-+
-+static void btree_set_min(struct btree *b, struct bpos pos)
-+{
-+	if (b->key.k.type == KEY_TYPE_btree_ptr_v2)
-+		bkey_i_to_btree_ptr_v2(&b->key)->v.min_key = pos;
-+	b->data->min_key = pos;
-+}
-+
-+static void btree_set_max(struct btree *b, struct bpos pos)
-+{
-+	b->key.k.p = pos;
-+	b->data->max_key = pos;
-+}
-+
-+struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *as,
-+						  struct btree *b,
-+						  struct bkey_format format)
-+{
-+	struct btree *n;
-+
-+	n = bch2_btree_node_alloc(as, b->c.level);
-+
-+	SET_BTREE_NODE_SEQ(n->data, BTREE_NODE_SEQ(b->data) + 1);
-+
-+	btree_set_min(n, b->data->min_key);
-+	btree_set_max(n, b->data->max_key);
-+
-+	n->data->format		= format;
-+	btree_node_set_format(n, format);
-+
-+	bch2_btree_sort_into(as->c, n, b);
-+
-+	btree_node_reset_sib_u64s(n);
-+
-+	n->key.k.p = b->key.k.p;
-+	return n;
-+}
-+
-+static struct btree *bch2_btree_node_alloc_replacement(struct btree_update *as,
-+						       struct btree *b)
-+{
-+	struct bkey_format new_f = bch2_btree_calc_format(b);
-+
-+	/*
-+	 * The keys might expand with the new format - if they wouldn't fit in
-+	 * the btree node anymore, use the old format for now:
-+	 */
-+	if (!bch2_btree_node_format_fits(as->c, b, &new_f))
-+		new_f = b->format;
-+
-+	return __bch2_btree_node_alloc_replacement(as, b, new_f);
-+}
-+
-+static struct btree *__btree_root_alloc(struct btree_update *as, unsigned level)
-+{
-+	struct btree *b = bch2_btree_node_alloc(as, level);
-+
-+	btree_set_min(b, POS_MIN);
-+	btree_set_max(b, POS_MAX);
-+	b->data->format = bch2_btree_calc_format(b);
-+
-+	btree_node_set_format(b, b->data->format);
-+	bch2_btree_build_aux_trees(b);
-+
-+	bch2_btree_update_add_new_node(as, b);
-+	six_unlock_write(&b->c.lock);
-+
-+	return b;
-+}
-+
-+static void bch2_btree_reserve_put(struct btree_update *as)
-+{
-+	struct bch_fs *c = as->c;
-+
-+	mutex_lock(&c->btree_reserve_cache_lock);
-+
-+	while (as->nr_prealloc_nodes) {
-+		struct btree *b = as->prealloc_nodes[--as->nr_prealloc_nodes];
-+
-+		six_unlock_write(&b->c.lock);
-+
-+		if (c->btree_reserve_cache_nr <
-+		    ARRAY_SIZE(c->btree_reserve_cache)) {
-+			struct btree_alloc *a =
-+				&c->btree_reserve_cache[c->btree_reserve_cache_nr++];
-+
-+			a->ob = b->ob;
-+			b->ob.nr = 0;
-+			bkey_copy(&a->k, &b->key);
-+		} else {
-+			bch2_open_buckets_put(c, &b->ob);
-+		}
-+
-+		btree_node_lock_type(c, b, SIX_LOCK_write);
-+		__btree_node_free(c, b);
-+		six_unlock_write(&b->c.lock);
-+
-+		six_unlock_intent(&b->c.lock);
-+	}
-+
-+	mutex_unlock(&c->btree_reserve_cache_lock);
-+}
-+
-+static int bch2_btree_reserve_get(struct btree_update *as, unsigned nr_nodes,
-+				  unsigned flags, struct closure *cl)
-+{
-+	struct bch_fs *c = as->c;
-+	struct btree *b;
-+	int ret;
-+
-+	BUG_ON(nr_nodes > BTREE_RESERVE_MAX);
-+
-+	/*
-+	 * Protects reaping from the btree node cache and using the btree node
-+	 * open bucket reserve:
-+	 */
-+	ret = bch2_btree_cache_cannibalize_lock(c, cl);
-+	if (ret)
-+		return ret;
-+
-+	while (as->nr_prealloc_nodes < nr_nodes) {
-+		b = __bch2_btree_node_alloc(c, &as->disk_res,
-+					    flags & BTREE_INSERT_NOWAIT
-+					    ? NULL : cl, flags);
-+		if (IS_ERR(b)) {
-+			ret = PTR_ERR(b);
-+			goto err_free;
-+		}
-+
-+		ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&b->key));
-+		if (ret)
-+			goto err_free;
-+
-+		as->prealloc_nodes[as->nr_prealloc_nodes++] = b;
-+	}
-+
-+	bch2_btree_cache_cannibalize_unlock(c);
-+	return 0;
-+err_free:
-+	bch2_btree_cache_cannibalize_unlock(c);
-+	trace_btree_reserve_get_fail(c, nr_nodes, cl);
-+	return ret;
-+}
-+
-+/* Asynchronous interior node update machinery */
-+
-+static void bch2_btree_update_free(struct btree_update *as)
-+{
-+	struct bch_fs *c = as->c;
-+
-+	bch2_journal_preres_put(&c->journal, &as->journal_preres);
-+
-+	bch2_journal_pin_drop(&c->journal, &as->journal);
-+	bch2_journal_pin_flush(&c->journal, &as->journal);
-+	bch2_disk_reservation_put(c, &as->disk_res);
-+	bch2_btree_reserve_put(as);
-+
-+	mutex_lock(&c->btree_interior_update_lock);
-+	list_del(&as->unwritten_list);
-+	list_del(&as->list);
-+	mutex_unlock(&c->btree_interior_update_lock);
-+
-+	closure_debug_destroy(&as->cl);
-+	mempool_free(as, &c->btree_interior_update_pool);
-+
-+	closure_wake_up(&c->btree_interior_update_wait);
-+}
-+
-+static void btree_update_will_delete_key(struct btree_update *as,
-+					 struct bkey_i *k)
-+{
-+	BUG_ON(bch2_keylist_u64s(&as->old_keys) + k->k.u64s >
-+	       ARRAY_SIZE(as->_old_keys));
-+	bch2_keylist_add(&as->old_keys, k);
-+}
-+
-+static void btree_update_will_add_key(struct btree_update *as,
-+				      struct bkey_i *k)
-+{
-+	BUG_ON(bch2_keylist_u64s(&as->new_keys) + k->k.u64s >
-+	       ARRAY_SIZE(as->_new_keys));
-+	bch2_keylist_add(&as->new_keys, k);
-+}
-+
-+/*
-+ * The transactional part of an interior btree node update, where we journal the
-+ * update we did to the interior node and update alloc info:
-+ */
-+static int btree_update_nodes_written_trans(struct btree_trans *trans,
-+					    struct btree_update *as)
-+{
-+	struct bkey_i *k;
-+	int ret;
-+
-+	trans->extra_journal_entries = (void *) &as->journal_entries[0];
-+	trans->extra_journal_entry_u64s = as->journal_u64s;
-+	trans->journal_pin = &as->journal;
-+
-+	for_each_keylist_key(&as->new_keys, k) {
-+		ret = bch2_trans_mark_key(trans, bkey_i_to_s_c(k),
-+					  0, 0, BTREE_TRIGGER_INSERT);
-+		if (ret)
-+			return ret;
-+	}
-+
-+	for_each_keylist_key(&as->old_keys, k) {
-+		ret = bch2_trans_mark_key(trans, bkey_i_to_s_c(k),
-+					  0, 0, BTREE_TRIGGER_OVERWRITE);
-+		if (ret)
-+			return ret;
-+	}
-+
-+	return 0;
-+}
-+
-+static void btree_update_nodes_written(struct btree_update *as)
-+{
-+	struct bch_fs *c = as->c;
-+	struct btree *b = as->b;
-+	u64 journal_seq = 0;
-+	unsigned i;
-+	int ret;
-+
-+	/*
-+	 * We did an update to a parent node where the pointers we added pointed
-+	 * to child nodes that weren't written yet: now, the child nodes have
-+	 * been written so we can write out the update to the interior node.
-+	 */
-+
-+	/*
-+	 * We can't call into journal reclaim here: we'd block on the journal
-+	 * reclaim lock, but we may need to release the open buckets we have
-+	 * pinned in order for other btree updates to make forward progress, and
-+	 * journal reclaim does btree updates when flushing bkey_cached entries,
-+	 * which may require allocations as well.
-+	 */
-+	ret = bch2_trans_do(c, &as->disk_res, &journal_seq,
-+			    BTREE_INSERT_NOFAIL|
-+			    BTREE_INSERT_USE_RESERVE|
-+			    BTREE_INSERT_USE_ALLOC_RESERVE|
-+			    BTREE_INSERT_NOCHECK_RW|
-+			    BTREE_INSERT_JOURNAL_RECLAIM|
-+			    BTREE_INSERT_JOURNAL_RESERVED,
-+			    btree_update_nodes_written_trans(&trans, as));
-+	BUG_ON(ret && !bch2_journal_error(&c->journal));
-+
-+	if (b) {
-+		/*
-+		 * @b is the node we did the final insert into:
-+		 *
-+		 * On failure to get a journal reservation, we still have to
-+		 * unblock the write and allow most of the write path to happen
-+		 * so that shutdown works, but the i->journal_seq mechanism
-+		 * won't work to prevent the btree write from being visible (we
-+		 * didn't get a journal sequence number) - instead
-+		 * __bch2_btree_node_write() doesn't do the actual write if
-+		 * we're in journal error state:
-+		 */
-+
-+		btree_node_lock_type(c, b, SIX_LOCK_intent);
-+		btree_node_lock_type(c, b, SIX_LOCK_write);
-+		mutex_lock(&c->btree_interior_update_lock);
-+
-+		list_del(&as->write_blocked_list);
-+
-+		if (!ret && as->b == b) {
-+			struct bset *i = btree_bset_last(b);
-+
-+			BUG_ON(!b->c.level);
-+			BUG_ON(!btree_node_dirty(b));
-+
-+			i->journal_seq = cpu_to_le64(
-+				max(journal_seq,
-+				    le64_to_cpu(i->journal_seq)));
-+
-+			bch2_btree_add_journal_pin(c, b, journal_seq);
-+		}
-+
-+		mutex_unlock(&c->btree_interior_update_lock);
-+		six_unlock_write(&b->c.lock);
-+
-+		btree_node_write_if_need(c, b, SIX_LOCK_intent);
-+		six_unlock_intent(&b->c.lock);
-+	}
-+
-+	bch2_journal_pin_drop(&c->journal, &as->journal);
-+
-+	bch2_journal_preres_put(&c->journal, &as->journal_preres);
-+
-+	mutex_lock(&c->btree_interior_update_lock);
-+	for (i = 0; i < as->nr_new_nodes; i++) {
-+		b = as->new_nodes[i];
-+
-+		BUG_ON(b->will_make_reachable != (unsigned long) as);
-+		b->will_make_reachable = 0;
-+	}
-+	mutex_unlock(&c->btree_interior_update_lock);
-+
-+	for (i = 0; i < as->nr_new_nodes; i++) {
-+		b = as->new_nodes[i];
-+
-+		btree_node_lock_type(c, b, SIX_LOCK_read);
-+		btree_node_write_if_need(c, b, SIX_LOCK_read);
-+		six_unlock_read(&b->c.lock);
-+	}
-+
-+	for (i = 0; i < as->nr_open_buckets; i++)
-+		bch2_open_bucket_put(c, c->open_buckets + as->open_buckets[i]);
-+
-+	bch2_btree_update_free(as);
-+}
-+
-+static void btree_interior_update_work(struct work_struct *work)
-+{
-+	struct bch_fs *c =
-+		container_of(work, struct bch_fs, btree_interior_update_work);
-+	struct btree_update *as;
-+
-+	while (1) {
-+		mutex_lock(&c->btree_interior_update_lock);
-+		as = list_first_entry_or_null(&c->btree_interior_updates_unwritten,
-+					      struct btree_update, unwritten_list);
-+		if (as && !as->nodes_written)
-+			as = NULL;
-+		mutex_unlock(&c->btree_interior_update_lock);
-+
-+		if (!as)
-+			break;
-+
-+		btree_update_nodes_written(as);
-+	}
-+}
-+
-+static void btree_update_set_nodes_written(struct closure *cl)
-+{
-+	struct btree_update *as = container_of(cl, struct btree_update, cl);
-+	struct bch_fs *c = as->c;
-+
-+	mutex_lock(&c->btree_interior_update_lock);
-+	as->nodes_written = true;
-+	mutex_unlock(&c->btree_interior_update_lock);
-+
-+	queue_work(c->btree_interior_update_worker, &c->btree_interior_update_work);
-+}
-+
-+/*
-+ * We're updating @b with pointers to nodes that haven't finished writing yet:
-+ * block @b from being written until @as completes
-+ */
-+static void btree_update_updated_node(struct btree_update *as, struct btree *b)
-+{
-+	struct bch_fs *c = as->c;
-+
-+	mutex_lock(&c->btree_interior_update_lock);
-+	list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten);
-+
-+	BUG_ON(as->mode != BTREE_INTERIOR_NO_UPDATE);
-+	BUG_ON(!btree_node_dirty(b));
-+
-+	as->mode	= BTREE_INTERIOR_UPDATING_NODE;
-+	as->b		= b;
-+	list_add(&as->write_blocked_list, &b->write_blocked);
-+
-+	mutex_unlock(&c->btree_interior_update_lock);
-+}
-+
-+static void btree_update_reparent(struct btree_update *as,
-+				  struct btree_update *child)
-+{
-+	struct bch_fs *c = as->c;
-+
-+	lockdep_assert_held(&c->btree_interior_update_lock);
-+
-+	child->b = NULL;
-+	child->mode = BTREE_INTERIOR_UPDATING_AS;
-+
-+	/*
-+	 * When we write a new btree root, we have to drop our journal pin
-+	 * _before_ the new nodes are technically reachable; see
-+	 * btree_update_nodes_written().
-+	 *
-+	 * This goes for journal pins that are recursively blocked on us - so,
-+	 * just transfer the journal pin to the new interior update so
-+	 * btree_update_nodes_written() can drop it.
-+	 */
-+	bch2_journal_pin_copy(&c->journal, &as->journal, &child->journal, NULL);
-+	bch2_journal_pin_drop(&c->journal, &child->journal);
-+}
-+
-+static void btree_update_updated_root(struct btree_update *as, struct btree *b)
-+{
-+	struct bkey_i *insert = &b->key;
-+	struct bch_fs *c = as->c;
-+
-+	BUG_ON(as->mode != BTREE_INTERIOR_NO_UPDATE);
-+
-+	BUG_ON(as->journal_u64s + jset_u64s(insert->k.u64s) >
-+	       ARRAY_SIZE(as->journal_entries));
-+
-+	as->journal_u64s +=
-+		journal_entry_set((void *) &as->journal_entries[as->journal_u64s],
-+				  BCH_JSET_ENTRY_btree_root,
-+				  b->c.btree_id, b->c.level,
-+				  insert, insert->k.u64s);
-+
-+	mutex_lock(&c->btree_interior_update_lock);
-+	list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten);
-+
-+	as->mode	= BTREE_INTERIOR_UPDATING_ROOT;
-+	mutex_unlock(&c->btree_interior_update_lock);
-+}
-+
-+/*
-+ * bch2_btree_update_add_new_node:
-+ *
-+ * This causes @as to wait on @b to be written, before it gets to
-+ * bch2_btree_update_nodes_written
-+ *
-+ * Additionally, it sets b->will_make_reachable to prevent any additional writes
-+ * to @b from happening besides the first until @b is reachable on disk
-+ *
-+ * And it adds @b to the list of @as's new nodes, so that we can update sector
-+ * counts in bch2_btree_update_nodes_written:
-+ */
-+void bch2_btree_update_add_new_node(struct btree_update *as, struct btree *b)
-+{
-+	struct bch_fs *c = as->c;
-+
-+	closure_get(&as->cl);
-+
-+	mutex_lock(&c->btree_interior_update_lock);
-+	BUG_ON(as->nr_new_nodes >= ARRAY_SIZE(as->new_nodes));
-+	BUG_ON(b->will_make_reachable);
-+
-+	as->new_nodes[as->nr_new_nodes++] = b;
-+	b->will_make_reachable = 1UL|(unsigned long) as;
-+
-+	mutex_unlock(&c->btree_interior_update_lock);
-+
-+	btree_update_will_add_key(as, &b->key);
-+}
-+
-+/*
-+ * returns true if @b was a new node
-+ */
-+static void btree_update_drop_new_node(struct bch_fs *c, struct btree *b)
-+{
-+	struct btree_update *as;
-+	unsigned long v;
-+	unsigned i;
-+
-+	mutex_lock(&c->btree_interior_update_lock);
-+	/*
-+	 * When b->will_make_reachable != 0, it owns a ref on as->cl that's
-+	 * dropped when it gets written by bch2_btree_complete_write - the
-+	 * xchg() is for synchronization with bch2_btree_complete_write:
-+	 */
-+	v = xchg(&b->will_make_reachable, 0);
-+	as = (struct btree_update *) (v & ~1UL);
-+
-+	if (!as) {
-+		mutex_unlock(&c->btree_interior_update_lock);
-+		return;
-+	}
-+
-+	for (i = 0; i < as->nr_new_nodes; i++)
-+		if (as->new_nodes[i] == b)
-+			goto found;
-+
-+	BUG();
-+found:
-+	array_remove_item(as->new_nodes, as->nr_new_nodes, i);
-+	mutex_unlock(&c->btree_interior_update_lock);
-+
-+	if (v & 1)
-+		closure_put(&as->cl);
-+}
-+
-+void bch2_btree_update_get_open_buckets(struct btree_update *as, struct btree *b)
-+{
-+	while (b->ob.nr)
-+		as->open_buckets[as->nr_open_buckets++] =
-+			b->ob.v[--b->ob.nr];
-+}
-+
-+/*
-+ * @b is being split/rewritten: it may have pointers to not-yet-written btree
-+ * nodes and thus outstanding btree_updates - redirect @b's
-+ * btree_updates to point to this btree_update:
-+ */
-+void bch2_btree_interior_update_will_free_node(struct btree_update *as,
-+					       struct btree *b)
-+{
-+	struct bch_fs *c = as->c;
-+	struct btree_update *p, *n;
-+	struct btree_write *w;
-+
-+	set_btree_node_dying(b);
-+
-+	if (btree_node_fake(b))
-+		return;
-+
-+	mutex_lock(&c->btree_interior_update_lock);
-+
-+	/*
-+	 * Does this node have any btree_update operations preventing
-+	 * it from being written?
-+	 *
-+	 * If so, redirect them to point to this btree_update: we can
-+	 * write out our new nodes, but we won't make them visible until those
-+	 * operations complete
-+	 */
-+	list_for_each_entry_safe(p, n, &b->write_blocked, write_blocked_list) {
-+		list_del_init(&p->write_blocked_list);
-+		btree_update_reparent(as, p);
-+
-+		/*
-+		 * for flush_held_btree_writes() waiting on updates to flush or
-+		 * nodes to be writeable:
-+		 */
-+		closure_wake_up(&c->btree_interior_update_wait);
-+	}
-+
-+	clear_btree_node_dirty(b);
-+	clear_btree_node_need_write(b);
-+
-+	/*
-+	 * Does this node have unwritten data that has a pin on the journal?
-+	 *
-+	 * If so, transfer that pin to the btree_update operation -
-+	 * note that if we're freeing multiple nodes, we only need to keep the
-+	 * oldest pin of any of the nodes we're freeing. We'll release the pin
-+	 * when the new nodes are persistent and reachable on disk:
-+	 */
-+	w = btree_current_write(b);
-+	bch2_journal_pin_copy(&c->journal, &as->journal, &w->journal, NULL);
-+	bch2_journal_pin_drop(&c->journal, &w->journal);
-+
-+	w = btree_prev_write(b);
-+	bch2_journal_pin_copy(&c->journal, &as->journal, &w->journal, NULL);
-+	bch2_journal_pin_drop(&c->journal, &w->journal);
-+
-+	mutex_unlock(&c->btree_interior_update_lock);
-+
-+	/*
-+	 * Is this a node that isn't reachable on disk yet?
-+	 *
-+	 * Nodes that aren't reachable yet have writes blocked until they're
-+	 * reachable - now that we've cancelled any pending writes and moved
-+	 * things waiting on that write to wait on this update, we can drop this
-+	 * node from the list of nodes that the other update is making
-+	 * reachable, prior to freeing it:
-+	 */
-+	btree_update_drop_new_node(c, b);
-+
-+	btree_update_will_delete_key(as, &b->key);
-+}
-+
-+void bch2_btree_update_done(struct btree_update *as)
-+{
-+	BUG_ON(as->mode == BTREE_INTERIOR_NO_UPDATE);
-+
-+	bch2_btree_reserve_put(as);
-+
-+	continue_at(&as->cl, btree_update_set_nodes_written, system_freezable_wq);
-+}
-+
-+struct btree_update *
-+bch2_btree_update_start(struct btree_trans *trans, enum btree_id id,
-+			unsigned nr_nodes, unsigned flags,
-+			struct closure *cl)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct btree_update *as;
-+	int disk_res_flags = (flags & BTREE_INSERT_NOFAIL)
-+		? BCH_DISK_RESERVATION_NOFAIL : 0;
-+	int journal_flags = (flags & BTREE_INSERT_JOURNAL_RESERVED)
-+		? JOURNAL_RES_GET_RECLAIM : 0;
-+	int ret = 0;
-+
-+	/*
-+	 * This check isn't necessary for correctness - it's just to potentially
-+	 * prevent us from doing a lot of work that'll end up being wasted:
-+	 */
-+	ret = bch2_journal_error(&c->journal);
-+	if (ret)
-+		return ERR_PTR(ret);
-+
-+	as = mempool_alloc(&c->btree_interior_update_pool, GFP_NOIO);
-+	memset(as, 0, sizeof(*as));
-+	closure_init(&as->cl, NULL);
-+	as->c		= c;
-+	as->mode	= BTREE_INTERIOR_NO_UPDATE;
-+	as->btree_id	= id;
-+	INIT_LIST_HEAD(&as->list);
-+	INIT_LIST_HEAD(&as->unwritten_list);
-+	INIT_LIST_HEAD(&as->write_blocked_list);
-+	bch2_keylist_init(&as->old_keys, as->_old_keys);
-+	bch2_keylist_init(&as->new_keys, as->_new_keys);
-+	bch2_keylist_init(&as->parent_keys, as->inline_keys);
-+
-+	ret = bch2_journal_preres_get(&c->journal, &as->journal_preres,
-+				      BTREE_UPDATE_JOURNAL_RES,
-+				      journal_flags|JOURNAL_RES_GET_NONBLOCK);
-+	if (ret == -EAGAIN) {
-+		if (flags & BTREE_INSERT_NOUNLOCK)
-+			return ERR_PTR(-EINTR);
-+
-+		bch2_trans_unlock(trans);
-+
-+		ret = bch2_journal_preres_get(&c->journal, &as->journal_preres,
-+				BTREE_UPDATE_JOURNAL_RES,
-+				journal_flags);
-+		if (ret)
-+			return ERR_PTR(ret);
-+
-+		if (!bch2_trans_relock(trans)) {
-+			ret = -EINTR;
-+			goto err;
-+		}
-+	}
-+
-+	ret = bch2_disk_reservation_get(c, &as->disk_res,
-+			nr_nodes * c->opts.btree_node_size,
-+			c->opts.metadata_replicas,
-+			disk_res_flags);
-+	if (ret)
-+		goto err;
-+
-+	ret = bch2_btree_reserve_get(as, nr_nodes, flags, cl);
-+	if (ret)
-+		goto err;
-+
-+	mutex_lock(&c->btree_interior_update_lock);
-+	list_add_tail(&as->list, &c->btree_interior_update_list);
-+	mutex_unlock(&c->btree_interior_update_lock);
-+
-+	return as;
-+err:
-+	bch2_btree_update_free(as);
-+	return ERR_PTR(ret);
-+}
-+
-+/* Btree root updates: */
-+
-+static void bch2_btree_set_root_inmem(struct bch_fs *c, struct btree *b)
-+{
-+	/* Root nodes cannot be reaped */
-+	mutex_lock(&c->btree_cache.lock);
-+	list_del_init(&b->list);
-+	mutex_unlock(&c->btree_cache.lock);
-+
-+	mutex_lock(&c->btree_root_lock);
-+	BUG_ON(btree_node_root(c, b) &&
-+	       (b->c.level < btree_node_root(c, b)->c.level ||
-+		!btree_node_dying(btree_node_root(c, b))));
-+
-+	btree_node_root(c, b) = b;
-+	mutex_unlock(&c->btree_root_lock);
-+
-+	bch2_recalc_btree_reserve(c);
-+}
-+
-+/**
-+ * bch_btree_set_root - update the root in memory and on disk
-+ *
-+ * To ensure forward progress, the current task must not be holding any
-+ * btree node write locks. However, you must hold an intent lock on the
-+ * old root.
-+ *
-+ * Note: This allocates a journal entry but doesn't add any keys to
-+ * it.  All the btree roots are part of every journal write, so there
-+ * is nothing new to be done.  This just guarantees that there is a
-+ * journal write.
-+ */
-+static void bch2_btree_set_root(struct btree_update *as, struct btree *b,
-+				struct btree_iter *iter)
-+{
-+	struct bch_fs *c = as->c;
-+	struct btree *old;
-+
-+	trace_btree_set_root(c, b);
-+	BUG_ON(!b->written &&
-+	       !test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags));
-+
-+	old = btree_node_root(c, b);
-+
-+	/*
-+	 * Ensure no one is using the old root while we switch to the
-+	 * new root:
-+	 */
-+	bch2_btree_node_lock_write(old, iter);
-+
-+	bch2_btree_set_root_inmem(c, b);
-+
-+	btree_update_updated_root(as, b);
-+
-+	/*
-+	 * Unlock old root after new root is visible:
-+	 *
-+	 * The new root isn't persistent, but that's ok: we still have
-+	 * an intent lock on the new root, and any updates that would
-+	 * depend on the new root would have to update the new root.
-+	 */
-+	bch2_btree_node_unlock_write(old, iter);
-+}
-+
-+/* Interior node updates: */
-+
-+static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b,
-+					struct btree_iter *iter,
-+					struct bkey_i *insert,
-+					struct btree_node_iter *node_iter)
-+{
-+	struct bkey_packed *k;
-+
-+	BUG_ON(as->journal_u64s + jset_u64s(insert->k.u64s) >
-+	       ARRAY_SIZE(as->journal_entries));
-+
-+	as->journal_u64s +=
-+		journal_entry_set((void *) &as->journal_entries[as->journal_u64s],
-+				  BCH_JSET_ENTRY_btree_keys,
-+				  b->c.btree_id, b->c.level,
-+				  insert, insert->k.u64s);
-+
-+	while ((k = bch2_btree_node_iter_peek_all(node_iter, b)) &&
-+	       bkey_iter_pos_cmp(b, k, &insert->k.p) < 0)
-+		bch2_btree_node_iter_advance(node_iter, b);
-+
-+	bch2_btree_bset_insert_key(iter, b, node_iter, insert);
-+	set_btree_node_dirty(b);
-+	set_btree_node_need_write(b);
-+}
-+
-+/*
-+ * Move keys from n1 (original replacement node, now lower node) to n2 (higher
-+ * node)
-+ */
-+static struct btree *__btree_split_node(struct btree_update *as,
-+					struct btree *n1,
-+					struct btree_iter *iter)
-+{
-+	size_t nr_packed = 0, nr_unpacked = 0;
-+	struct btree *n2;
-+	struct bset *set1, *set2;
-+	struct bkey_packed *k, *prev = NULL;
-+
-+	n2 = bch2_btree_node_alloc(as, n1->c.level);
-+	bch2_btree_update_add_new_node(as, n2);
-+
-+	n2->data->max_key	= n1->data->max_key;
-+	n2->data->format	= n1->format;
-+	SET_BTREE_NODE_SEQ(n2->data, BTREE_NODE_SEQ(n1->data));
-+	n2->key.k.p = n1->key.k.p;
-+
-+	btree_node_set_format(n2, n2->data->format);
-+
-+	set1 = btree_bset_first(n1);
-+	set2 = btree_bset_first(n2);
-+
-+	/*
-+	 * Has to be a linear search because we don't have an auxiliary
-+	 * search tree yet
-+	 */
-+	k = set1->start;
-+	while (1) {
-+		struct bkey_packed *n = bkey_next_skip_noops(k, vstruct_last(set1));
-+
-+		if (n == vstruct_last(set1))
-+			break;
-+		if (k->_data - set1->_data >= (le16_to_cpu(set1->u64s) * 3) / 5)
-+			break;
-+
-+		if (bkey_packed(k))
-+			nr_packed++;
-+		else
-+			nr_unpacked++;
-+
-+		prev = k;
-+		k = n;
-+	}
-+
-+	BUG_ON(!prev);
-+
-+	btree_set_max(n1, bkey_unpack_pos(n1, prev));
-+	btree_set_min(n2, bkey_successor(n1->key.k.p));
-+
-+	set2->u64s = cpu_to_le16((u64 *) vstruct_end(set1) - (u64 *) k);
-+	set1->u64s = cpu_to_le16(le16_to_cpu(set1->u64s) - le16_to_cpu(set2->u64s));
-+
-+	set_btree_bset_end(n1, n1->set);
-+	set_btree_bset_end(n2, n2->set);
-+
-+	n2->nr.live_u64s	= le16_to_cpu(set2->u64s);
-+	n2->nr.bset_u64s[0]	= le16_to_cpu(set2->u64s);
-+	n2->nr.packed_keys	= n1->nr.packed_keys - nr_packed;
-+	n2->nr.unpacked_keys	= n1->nr.unpacked_keys - nr_unpacked;
-+
-+	n1->nr.live_u64s	= le16_to_cpu(set1->u64s);
-+	n1->nr.bset_u64s[0]	= le16_to_cpu(set1->u64s);
-+	n1->nr.packed_keys	= nr_packed;
-+	n1->nr.unpacked_keys	= nr_unpacked;
-+
-+	BUG_ON(!set1->u64s);
-+	BUG_ON(!set2->u64s);
-+
-+	memcpy_u64s(set2->start,
-+		    vstruct_end(set1),
-+		    le16_to_cpu(set2->u64s));
-+
-+	btree_node_reset_sib_u64s(n1);
-+	btree_node_reset_sib_u64s(n2);
-+
-+	bch2_verify_btree_nr_keys(n1);
-+	bch2_verify_btree_nr_keys(n2);
-+
-+	if (n1->c.level) {
-+		btree_node_interior_verify(as->c, n1);
-+		btree_node_interior_verify(as->c, n2);
-+	}
-+
-+	return n2;
-+}
-+
-+/*
-+ * For updates to interior nodes, we've got to do the insert before we split
-+ * because the stuff we're inserting has to be inserted atomically. Post split,
-+ * the keys might have to go in different nodes and the split would no longer be
-+ * atomic.
-+ *
-+ * Worse, if the insert is from btree node coalescing, if we do the insert after
-+ * we do the split (and pick the pivot) - the pivot we pick might be between
-+ * nodes that were coalesced, and thus in the middle of a child node post
-+ * coalescing:
-+ */
-+static void btree_split_insert_keys(struct btree_update *as, struct btree *b,
-+				    struct btree_iter *iter,
-+				    struct keylist *keys)
-+{
-+	struct btree_node_iter node_iter;
-+	struct bkey_i *k = bch2_keylist_front(keys);
-+	struct bkey_packed *src, *dst, *n;
-+	struct bset *i;
-+
-+	BUG_ON(btree_node_type(b) != BKEY_TYPE_BTREE);
-+
-+	bch2_btree_node_iter_init(&node_iter, b, &k->k.p);
-+
-+	while (!bch2_keylist_empty(keys)) {
-+		k = bch2_keylist_front(keys);
-+
-+		bch2_insert_fixup_btree_ptr(as, b, iter, k, &node_iter);
-+		bch2_keylist_pop_front(keys);
-+	}
-+
-+	/*
-+	 * We can't tolerate whiteouts here - with whiteouts there can be
-+	 * duplicate keys, and it would be rather bad if we picked a duplicate
-+	 * for the pivot:
-+	 */
-+	i = btree_bset_first(b);
-+	src = dst = i->start;
-+	while (src != vstruct_last(i)) {
-+		n = bkey_next_skip_noops(src, vstruct_last(i));
-+		if (!bkey_deleted(src)) {
-+			memmove_u64s_down(dst, src, src->u64s);
-+			dst = bkey_next(dst);
-+		}
-+		src = n;
-+	}
-+
-+	i->u64s = cpu_to_le16((u64 *) dst - i->_data);
-+	set_btree_bset_end(b, b->set);
-+
-+	BUG_ON(b->nsets != 1 ||
-+	       b->nr.live_u64s != le16_to_cpu(btree_bset_first(b)->u64s));
-+
-+	btree_node_interior_verify(as->c, b);
-+}
-+
-+static void btree_split(struct btree_update *as, struct btree *b,
-+			struct btree_iter *iter, struct keylist *keys,
-+			unsigned flags)
-+{
-+	struct bch_fs *c = as->c;
-+	struct btree *parent = btree_node_parent(iter, b);
-+	struct btree *n1, *n2 = NULL, *n3 = NULL;
-+	u64 start_time = local_clock();
-+
-+	BUG_ON(!parent && (b != btree_node_root(c, b)));
-+	BUG_ON(!btree_node_intent_locked(iter, btree_node_root(c, b)->c.level));
-+
-+	bch2_btree_interior_update_will_free_node(as, b);
-+
-+	n1 = bch2_btree_node_alloc_replacement(as, b);
-+	bch2_btree_update_add_new_node(as, n1);
-+
-+	if (keys)
-+		btree_split_insert_keys(as, n1, iter, keys);
-+
-+	if (bset_u64s(&n1->set[0]) > BTREE_SPLIT_THRESHOLD(c)) {
-+		trace_btree_split(c, b);
-+
-+		n2 = __btree_split_node(as, n1, iter);
-+
-+		bch2_btree_build_aux_trees(n2);
-+		bch2_btree_build_aux_trees(n1);
-+		six_unlock_write(&n2->c.lock);
-+		six_unlock_write(&n1->c.lock);
-+
-+		bch2_btree_node_write(c, n2, SIX_LOCK_intent);
-+
-+		/*
-+		 * Note that on recursive parent_keys == keys, so we
-+		 * can't start adding new keys to parent_keys before emptying it
-+		 * out (which we did with btree_split_insert_keys() above)
-+		 */
-+		bch2_keylist_add(&as->parent_keys, &n1->key);
-+		bch2_keylist_add(&as->parent_keys, &n2->key);
-+
-+		if (!parent) {
-+			/* Depth increases, make a new root */
-+			n3 = __btree_root_alloc(as, b->c.level + 1);
-+
-+			n3->sib_u64s[0] = U16_MAX;
-+			n3->sib_u64s[1] = U16_MAX;
-+
-+			btree_split_insert_keys(as, n3, iter, &as->parent_keys);
-+
-+			bch2_btree_node_write(c, n3, SIX_LOCK_intent);
-+		}
-+	} else {
-+		trace_btree_compact(c, b);
-+
-+		bch2_btree_build_aux_trees(n1);
-+		six_unlock_write(&n1->c.lock);
-+
-+		if (parent)
-+			bch2_keylist_add(&as->parent_keys, &n1->key);
-+	}
-+
-+	bch2_btree_node_write(c, n1, SIX_LOCK_intent);
-+
-+	/* New nodes all written, now make them visible: */
-+
-+	if (parent) {
-+		/* Split a non root node */
-+		bch2_btree_insert_node(as, parent, iter, &as->parent_keys, flags);
-+	} else if (n3) {
-+		bch2_btree_set_root(as, n3, iter);
-+	} else {
-+		/* Root filled up but didn't need to be split */
-+		bch2_btree_set_root(as, n1, iter);
-+	}
-+
-+	bch2_btree_update_get_open_buckets(as, n1);
-+	if (n2)
-+		bch2_btree_update_get_open_buckets(as, n2);
-+	if (n3)
-+		bch2_btree_update_get_open_buckets(as, n3);
-+
-+	/* Successful split, update the iterator to point to the new nodes: */
-+
-+	six_lock_increment(&b->c.lock, SIX_LOCK_intent);
-+	bch2_btree_iter_node_drop(iter, b);
-+	if (n3)
-+		bch2_btree_iter_node_replace(iter, n3);
-+	if (n2)
-+		bch2_btree_iter_node_replace(iter, n2);
-+	bch2_btree_iter_node_replace(iter, n1);
-+
-+	/*
-+	 * The old node must be freed (in memory) _before_ unlocking the new
-+	 * nodes - else another thread could re-acquire a read lock on the old
-+	 * node after another thread has locked and updated the new node, thus
-+	 * seeing stale data:
-+	 */
-+	bch2_btree_node_free_inmem(c, b, iter);
-+
-+	if (n3)
-+		six_unlock_intent(&n3->c.lock);
-+	if (n2)
-+		six_unlock_intent(&n2->c.lock);
-+	six_unlock_intent(&n1->c.lock);
-+
-+	bch2_btree_trans_verify_locks(iter->trans);
-+
-+	bch2_time_stats_update(&c->times[BCH_TIME_btree_node_split],
-+			       start_time);
-+}
-+
-+static void
-+bch2_btree_insert_keys_interior(struct btree_update *as, struct btree *b,
-+				struct btree_iter *iter, struct keylist *keys)
-+{
-+	struct btree_iter *linked;
-+	struct btree_node_iter node_iter;
-+	struct bkey_i *insert = bch2_keylist_front(keys);
-+	struct bkey_packed *k;
-+
-+	/* Don't screw up @iter's position: */
-+	node_iter = iter->l[b->c.level].iter;
-+
-+	/*
-+	 * btree_split(), btree_gc_coalesce() will insert keys before
-+	 * the iterator's current position - they know the keys go in
-+	 * the node the iterator points to:
-+	 */
-+	while ((k = bch2_btree_node_iter_prev_all(&node_iter, b)) &&
-+	       (bkey_cmp_packed(b, k, &insert->k) >= 0))
-+		;
-+
-+	for_each_keylist_key(keys, insert)
-+		bch2_insert_fixup_btree_ptr(as, b, iter, insert, &node_iter);
-+
-+	btree_update_updated_node(as, b);
-+
-+	trans_for_each_iter_with_node(iter->trans, b, linked)
-+		bch2_btree_node_iter_peek(&linked->l[b->c.level].iter, b);
-+
-+	bch2_btree_trans_verify_iters(iter->trans, b);
-+}
-+
-+/**
-+ * bch_btree_insert_node - insert bkeys into a given btree node
-+ *
-+ * @iter:		btree iterator
-+ * @keys:		list of keys to insert
-+ * @hook:		insert callback
-+ * @persistent:		if not null, @persistent will wait on journal write
-+ *
-+ * Inserts as many keys as it can into a given btree node, splitting it if full.
-+ * If a split occurred, this function will return early. This can only happen
-+ * for leaf nodes -- inserts into interior nodes have to be atomic.
-+ */
-+void bch2_btree_insert_node(struct btree_update *as, struct btree *b,
-+			    struct btree_iter *iter, struct keylist *keys,
-+			    unsigned flags)
-+{
-+	struct bch_fs *c = as->c;
-+	int old_u64s = le16_to_cpu(btree_bset_last(b)->u64s);
-+	int old_live_u64s = b->nr.live_u64s;
-+	int live_u64s_added, u64s_added;
-+
-+	BUG_ON(!btree_node_intent_locked(iter, btree_node_root(c, b)->c.level));
-+	BUG_ON(!b->c.level);
-+	BUG_ON(!as || as->b);
-+	bch2_verify_keylist_sorted(keys);
-+
-+	if (as->must_rewrite)
-+		goto split;
-+
-+	bch2_btree_node_lock_for_insert(c, b, iter);
-+
-+	if (!bch2_btree_node_insert_fits(c, b, bch2_keylist_u64s(keys))) {
-+		bch2_btree_node_unlock_write(b, iter);
-+		goto split;
-+	}
-+
-+	bch2_btree_insert_keys_interior(as, b, iter, keys);
-+
-+	live_u64s_added = (int) b->nr.live_u64s - old_live_u64s;
-+	u64s_added = (int) le16_to_cpu(btree_bset_last(b)->u64s) - old_u64s;
-+
-+	if (b->sib_u64s[0] != U16_MAX && live_u64s_added < 0)
-+		b->sib_u64s[0] = max(0, (int) b->sib_u64s[0] + live_u64s_added);
-+	if (b->sib_u64s[1] != U16_MAX && live_u64s_added < 0)
-+		b->sib_u64s[1] = max(0, (int) b->sib_u64s[1] + live_u64s_added);
-+
-+	if (u64s_added > live_u64s_added &&
-+	    bch2_maybe_compact_whiteouts(c, b))
-+		bch2_btree_iter_reinit_node(iter, b);
-+
-+	bch2_btree_node_unlock_write(b, iter);
-+
-+	btree_node_interior_verify(c, b);
-+
-+	/*
-+	 * when called from the btree_split path the new nodes aren't added to
-+	 * the btree iterator yet, so the merge path's unlock/wait/relock dance
-+	 * won't work:
-+	 */
-+	bch2_foreground_maybe_merge(c, iter, b->c.level,
-+				    flags|BTREE_INSERT_NOUNLOCK);
-+	return;
-+split:
-+	btree_split(as, b, iter, keys, flags);
-+}
-+
-+int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter,
-+			  unsigned flags)
-+{
-+	struct btree_trans *trans = iter->trans;
-+	struct btree *b = iter_l(iter)->b;
-+	struct btree_update *as;
-+	struct closure cl;
-+	int ret = 0;
-+	struct btree_insert_entry *i;
-+
-+	/*
-+	 * We already have a disk reservation and open buckets pinned; this
-+	 * allocation must not block:
-+	 */
-+	trans_for_each_update(trans, i)
-+		if (btree_node_type_needs_gc(i->iter->btree_id))
-+			flags |= BTREE_INSERT_USE_RESERVE;
-+
-+	closure_init_stack(&cl);
-+
-+	/* Hack, because gc and splitting nodes doesn't mix yet: */
-+	if (!(flags & BTREE_INSERT_GC_LOCK_HELD) &&
-+	    !down_read_trylock(&c->gc_lock)) {
-+		if (flags & BTREE_INSERT_NOUNLOCK) {
-+			trace_transaction_restart_ip(trans->ip, _THIS_IP_);
-+			return -EINTR;
-+		}
-+
-+		bch2_trans_unlock(trans);
-+		down_read(&c->gc_lock);
-+
-+		if (!bch2_trans_relock(trans))
-+			ret = -EINTR;
-+	}
-+
-+	/*
-+	 * XXX: figure out how far we might need to split,
-+	 * instead of locking/reserving all the way to the root:
-+	 */
-+	if (!bch2_btree_iter_upgrade(iter, U8_MAX)) {
-+		trace_trans_restart_iter_upgrade(trans->ip);
-+		ret = -EINTR;
-+		goto out;
-+	}
-+
-+	as = bch2_btree_update_start(trans, iter->btree_id,
-+		btree_update_reserve_required(c, b), flags,
-+		!(flags & BTREE_INSERT_NOUNLOCK) ? &cl : NULL);
-+	if (IS_ERR(as)) {
-+		ret = PTR_ERR(as);
-+		if (ret == -EAGAIN) {
-+			BUG_ON(flags & BTREE_INSERT_NOUNLOCK);
-+			bch2_trans_unlock(trans);
-+			ret = -EINTR;
-+
-+			trace_transaction_restart_ip(trans->ip, _THIS_IP_);
-+		}
-+		goto out;
-+	}
-+
-+	btree_split(as, b, iter, NULL, flags);
-+	bch2_btree_update_done(as);
-+
-+	/*
-+	 * We haven't successfully inserted yet, so don't downgrade all the way
-+	 * back to read locks;
-+	 */
-+	__bch2_btree_iter_downgrade(iter, 1);
-+out:
-+	if (!(flags & BTREE_INSERT_GC_LOCK_HELD))
-+		up_read(&c->gc_lock);
-+	closure_sync(&cl);
-+	return ret;
-+}
-+
-+void __bch2_foreground_maybe_merge(struct bch_fs *c,
-+				   struct btree_iter *iter,
-+				   unsigned level,
-+				   unsigned flags,
-+				   enum btree_node_sibling sib)
-+{
-+	struct btree_trans *trans = iter->trans;
-+	struct btree_update *as;
-+	struct bkey_format_state new_s;
-+	struct bkey_format new_f;
-+	struct bkey_i delete;
-+	struct btree *b, *m, *n, *prev, *next, *parent;
-+	struct closure cl;
-+	size_t sib_u64s;
-+	int ret = 0;
-+
-+	BUG_ON(!btree_node_locked(iter, level));
-+
-+	closure_init_stack(&cl);
-+retry:
-+	BUG_ON(!btree_node_locked(iter, level));
-+
-+	b = iter->l[level].b;
-+
-+	parent = btree_node_parent(iter, b);
-+	if (!parent)
-+		goto out;
-+
-+	if (b->sib_u64s[sib] > BTREE_FOREGROUND_MERGE_THRESHOLD(c))
-+		goto out;
-+
-+	/* XXX: can't be holding read locks */
-+	m = bch2_btree_node_get_sibling(c, iter, b, sib);
-+	if (IS_ERR(m)) {
-+		ret = PTR_ERR(m);
-+		goto err;
-+	}
-+
-+	/* NULL means no sibling: */
-+	if (!m) {
-+		b->sib_u64s[sib] = U16_MAX;
-+		goto out;
-+	}
-+
-+	if (sib == btree_prev_sib) {
-+		prev = m;
-+		next = b;
-+	} else {
-+		prev = b;
-+		next = m;
-+	}
-+
-+	bch2_bkey_format_init(&new_s);
-+	__bch2_btree_calc_format(&new_s, b);
-+	__bch2_btree_calc_format(&new_s, m);
-+	new_f = bch2_bkey_format_done(&new_s);
-+
-+	sib_u64s = btree_node_u64s_with_format(b, &new_f) +
-+		btree_node_u64s_with_format(m, &new_f);
-+
-+	if (sib_u64s > BTREE_FOREGROUND_MERGE_HYSTERESIS(c)) {
-+		sib_u64s -= BTREE_FOREGROUND_MERGE_HYSTERESIS(c);
-+		sib_u64s /= 2;
-+		sib_u64s += BTREE_FOREGROUND_MERGE_HYSTERESIS(c);
-+	}
-+
-+	sib_u64s = min(sib_u64s, btree_max_u64s(c));
-+	b->sib_u64s[sib] = sib_u64s;
-+
-+	if (b->sib_u64s[sib] > BTREE_FOREGROUND_MERGE_THRESHOLD(c)) {
-+		six_unlock_intent(&m->c.lock);
-+		goto out;
-+	}
-+
-+	/* We're changing btree topology, doesn't mix with gc: */
-+	if (!(flags & BTREE_INSERT_GC_LOCK_HELD) &&
-+	    !down_read_trylock(&c->gc_lock))
-+		goto err_cycle_gc_lock;
-+
-+	if (!bch2_btree_iter_upgrade(iter, U8_MAX)) {
-+		ret = -EINTR;
-+		goto err_unlock;
-+	}
-+
-+	as = bch2_btree_update_start(trans, iter->btree_id,
-+			 btree_update_reserve_required(c, parent) + 1,
-+			 flags|
-+			 BTREE_INSERT_NOFAIL|
-+			 BTREE_INSERT_USE_RESERVE,
-+			 !(flags & BTREE_INSERT_NOUNLOCK) ? &cl : NULL);
-+	if (IS_ERR(as)) {
-+		ret = PTR_ERR(as);
-+		goto err_unlock;
-+	}
-+
-+	trace_btree_merge(c, b);
-+
-+	bch2_btree_interior_update_will_free_node(as, b);
-+	bch2_btree_interior_update_will_free_node(as, m);
-+
-+	n = bch2_btree_node_alloc(as, b->c.level);
-+	bch2_btree_update_add_new_node(as, n);
-+
-+	btree_set_min(n, prev->data->min_key);
-+	btree_set_max(n, next->data->max_key);
-+	n->data->format		= new_f;
-+
-+	btree_node_set_format(n, new_f);
-+
-+	bch2_btree_sort_into(c, n, prev);
-+	bch2_btree_sort_into(c, n, next);
-+
-+	bch2_btree_build_aux_trees(n);
-+	six_unlock_write(&n->c.lock);
-+
-+	bkey_init(&delete.k);
-+	delete.k.p = prev->key.k.p;
-+	bch2_keylist_add(&as->parent_keys, &delete);
-+	bch2_keylist_add(&as->parent_keys, &n->key);
-+
-+	bch2_btree_node_write(c, n, SIX_LOCK_intent);
-+
-+	bch2_btree_insert_node(as, parent, iter, &as->parent_keys, flags);
-+
-+	bch2_btree_update_get_open_buckets(as, n);
-+
-+	six_lock_increment(&b->c.lock, SIX_LOCK_intent);
-+	bch2_btree_iter_node_drop(iter, b);
-+	bch2_btree_iter_node_drop(iter, m);
-+
-+	bch2_btree_iter_node_replace(iter, n);
-+
-+	bch2_btree_trans_verify_iters(trans, n);
-+
-+	bch2_btree_node_free_inmem(c, b, iter);
-+	bch2_btree_node_free_inmem(c, m, iter);
-+
-+	six_unlock_intent(&n->c.lock);
-+
-+	bch2_btree_update_done(as);
-+
-+	if (!(flags & BTREE_INSERT_GC_LOCK_HELD))
-+		up_read(&c->gc_lock);
-+out:
-+	bch2_btree_trans_verify_locks(trans);
-+
-+	/*
-+	 * Don't downgrade locks here: we're called after successful insert,
-+	 * and the caller will downgrade locks after a successful insert
-+	 * anyways (in case e.g. a split was required first)
-+	 *
-+	 * And we're also called when inserting into interior nodes in the
-+	 * split path, and downgrading to read locks in there is potentially
-+	 * confusing:
-+	 */
-+	closure_sync(&cl);
-+	return;
-+
-+err_cycle_gc_lock:
-+	six_unlock_intent(&m->c.lock);
-+
-+	if (flags & BTREE_INSERT_NOUNLOCK)
-+		goto out;
-+
-+	bch2_trans_unlock(trans);
-+
-+	down_read(&c->gc_lock);
-+	up_read(&c->gc_lock);
-+	ret = -EINTR;
-+	goto err;
-+
-+err_unlock:
-+	six_unlock_intent(&m->c.lock);
-+	if (!(flags & BTREE_INSERT_GC_LOCK_HELD))
-+		up_read(&c->gc_lock);
-+err:
-+	BUG_ON(ret == -EAGAIN && (flags & BTREE_INSERT_NOUNLOCK));
-+
-+	if ((ret == -EAGAIN || ret == -EINTR) &&
-+	    !(flags & BTREE_INSERT_NOUNLOCK)) {
-+		bch2_trans_unlock(trans);
-+		closure_sync(&cl);
-+		ret = bch2_btree_iter_traverse(iter);
-+		if (ret)
-+			goto out;
-+
-+		goto retry;
-+	}
-+
-+	goto out;
-+}
-+
-+static int __btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
-+				struct btree *b, unsigned flags,
-+				struct closure *cl)
-+{
-+	struct btree *n, *parent = btree_node_parent(iter, b);
-+	struct btree_update *as;
-+
-+	as = bch2_btree_update_start(iter->trans, iter->btree_id,
-+		(parent
-+		 ? btree_update_reserve_required(c, parent)
-+		 : 0) + 1,
-+		flags, cl);
-+	if (IS_ERR(as)) {
-+		trace_btree_gc_rewrite_node_fail(c, b);
-+		return PTR_ERR(as);
-+	}
-+
-+	bch2_btree_interior_update_will_free_node(as, b);
-+
-+	n = bch2_btree_node_alloc_replacement(as, b);
-+	bch2_btree_update_add_new_node(as, n);
-+
-+	bch2_btree_build_aux_trees(n);
-+	six_unlock_write(&n->c.lock);
-+
-+	trace_btree_gc_rewrite_node(c, b);
-+
-+	bch2_btree_node_write(c, n, SIX_LOCK_intent);
-+
-+	if (parent) {
-+		bch2_keylist_add(&as->parent_keys, &n->key);
-+		bch2_btree_insert_node(as, parent, iter, &as->parent_keys, flags);
-+	} else {
-+		bch2_btree_set_root(as, n, iter);
-+	}
-+
-+	bch2_btree_update_get_open_buckets(as, n);
-+
-+	six_lock_increment(&b->c.lock, SIX_LOCK_intent);
-+	bch2_btree_iter_node_drop(iter, b);
-+	bch2_btree_iter_node_replace(iter, n);
-+	bch2_btree_node_free_inmem(c, b, iter);
-+	six_unlock_intent(&n->c.lock);
-+
-+	bch2_btree_update_done(as);
-+	return 0;
-+}
-+
-+/**
-+ * bch_btree_node_rewrite - Rewrite/move a btree node
-+ *
-+ * Returns 0 on success, -EINTR or -EAGAIN on failure (i.e.
-+ * btree_check_reserve() has to wait)
-+ */
-+int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
-+			    __le64 seq, unsigned flags)
-+{
-+	struct btree_trans *trans = iter->trans;
-+	struct closure cl;
-+	struct btree *b;
-+	int ret;
-+
-+	flags |= BTREE_INSERT_NOFAIL;
-+
-+	closure_init_stack(&cl);
-+
-+	bch2_btree_iter_upgrade(iter, U8_MAX);
-+
-+	if (!(flags & BTREE_INSERT_GC_LOCK_HELD)) {
-+		if (!down_read_trylock(&c->gc_lock)) {
-+			bch2_trans_unlock(trans);
-+			down_read(&c->gc_lock);
-+		}
-+	}
-+
-+	while (1) {
-+		ret = bch2_btree_iter_traverse(iter);
-+		if (ret)
-+			break;
-+
-+		b = bch2_btree_iter_peek_node(iter);
-+		if (!b || b->data->keys.seq != seq)
-+			break;
-+
-+		ret = __btree_node_rewrite(c, iter, b, flags, &cl);
-+		if (ret != -EAGAIN &&
-+		    ret != -EINTR)
-+			break;
-+
-+		bch2_trans_unlock(trans);
-+		closure_sync(&cl);
-+	}
-+
-+	bch2_btree_iter_downgrade(iter);
-+
-+	if (!(flags & BTREE_INSERT_GC_LOCK_HELD))
-+		up_read(&c->gc_lock);
-+
-+	closure_sync(&cl);
-+	return ret;
-+}
-+
-+static void __bch2_btree_node_update_key(struct bch_fs *c,
-+					 struct btree_update *as,
-+					 struct btree_iter *iter,
-+					 struct btree *b, struct btree *new_hash,
-+					 struct bkey_i *new_key)
-+{
-+	struct btree *parent;
-+	int ret;
-+
-+	btree_update_will_delete_key(as, &b->key);
-+	btree_update_will_add_key(as, new_key);
-+
-+	parent = btree_node_parent(iter, b);
-+	if (parent) {
-+		if (new_hash) {
-+			bkey_copy(&new_hash->key, new_key);
-+			ret = bch2_btree_node_hash_insert(&c->btree_cache,
-+					new_hash, b->c.level, b->c.btree_id);
-+			BUG_ON(ret);
-+		}
-+
-+		bch2_keylist_add(&as->parent_keys, new_key);
-+		bch2_btree_insert_node(as, parent, iter, &as->parent_keys, 0);
-+
-+		if (new_hash) {
-+			mutex_lock(&c->btree_cache.lock);
-+			bch2_btree_node_hash_remove(&c->btree_cache, new_hash);
-+
-+			bch2_btree_node_hash_remove(&c->btree_cache, b);
-+
-+			bkey_copy(&b->key, new_key);
-+			ret = __bch2_btree_node_hash_insert(&c->btree_cache, b);
-+			BUG_ON(ret);
-+			mutex_unlock(&c->btree_cache.lock);
-+		} else {
-+			bkey_copy(&b->key, new_key);
-+		}
-+	} else {
-+		BUG_ON(btree_node_root(c, b) != b);
-+
-+		bch2_btree_node_lock_write(b, iter);
-+		bkey_copy(&b->key, new_key);
-+
-+		if (btree_ptr_hash_val(&b->key) != b->hash_val) {
-+			mutex_lock(&c->btree_cache.lock);
-+			bch2_btree_node_hash_remove(&c->btree_cache, b);
-+
-+			ret = __bch2_btree_node_hash_insert(&c->btree_cache, b);
-+			BUG_ON(ret);
-+			mutex_unlock(&c->btree_cache.lock);
-+		}
-+
-+		btree_update_updated_root(as, b);
-+		bch2_btree_node_unlock_write(b, iter);
-+	}
-+
-+	bch2_btree_update_done(as);
-+}
-+
-+int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
-+			       struct btree *b,
-+			       struct bkey_i *new_key)
-+{
-+	struct btree *parent = btree_node_parent(iter, b);
-+	struct btree_update *as = NULL;
-+	struct btree *new_hash = NULL;
-+	struct closure cl;
-+	int ret;
-+
-+	closure_init_stack(&cl);
-+
-+	if (!bch2_btree_iter_upgrade(iter, U8_MAX))
-+		return -EINTR;
-+
-+	if (!down_read_trylock(&c->gc_lock)) {
-+		bch2_trans_unlock(iter->trans);
-+		down_read(&c->gc_lock);
-+
-+		if (!bch2_trans_relock(iter->trans)) {
-+			ret = -EINTR;
-+			goto err;
-+		}
-+	}
-+
-+	/*
-+	 * check btree_ptr_hash_val() after @b is locked by
-+	 * btree_iter_traverse():
-+	 */
-+	if (btree_ptr_hash_val(new_key) != b->hash_val) {
-+		/* bch2_btree_reserve_get will unlock */
-+		ret = bch2_btree_cache_cannibalize_lock(c, &cl);
-+		if (ret) {
-+			bch2_trans_unlock(iter->trans);
-+			up_read(&c->gc_lock);
-+			closure_sync(&cl);
-+			down_read(&c->gc_lock);
-+
-+			if (!bch2_trans_relock(iter->trans)) {
-+				ret = -EINTR;
-+				goto err;
-+			}
-+		}
-+
-+		new_hash = bch2_btree_node_mem_alloc(c);
-+	}
-+retry:
-+	as = bch2_btree_update_start(iter->trans, iter->btree_id,
-+		parent ? btree_update_reserve_required(c, parent) : 0,
-+		BTREE_INSERT_NOFAIL|
-+		BTREE_INSERT_USE_RESERVE|
-+		BTREE_INSERT_USE_ALLOC_RESERVE,
-+		&cl);
-+
-+	if (IS_ERR(as)) {
-+		ret = PTR_ERR(as);
-+		if (ret == -EAGAIN)
-+			ret = -EINTR;
-+
-+		if (ret == -EINTR) {
-+			bch2_trans_unlock(iter->trans);
-+			up_read(&c->gc_lock);
-+			closure_sync(&cl);
-+			down_read(&c->gc_lock);
-+
-+			if (bch2_trans_relock(iter->trans))
-+				goto retry;
-+		}
-+
-+		goto err;
-+	}
-+
-+	ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(new_key));
-+	if (ret)
-+		goto err_free_update;
-+
-+	__bch2_btree_node_update_key(c, as, iter, b, new_hash, new_key);
-+
-+	bch2_btree_iter_downgrade(iter);
-+err:
-+	if (new_hash) {
-+		mutex_lock(&c->btree_cache.lock);
-+		list_move(&new_hash->list, &c->btree_cache.freeable);
-+		mutex_unlock(&c->btree_cache.lock);
-+
-+		six_unlock_write(&new_hash->c.lock);
-+		six_unlock_intent(&new_hash->c.lock);
-+	}
-+	up_read(&c->gc_lock);
-+	closure_sync(&cl);
-+	return ret;
-+err_free_update:
-+	bch2_btree_update_free(as);
-+	goto err;
-+}
-+
-+/* Init code: */
-+
-+/*
-+ * Only for filesystem bringup, when first reading the btree roots or allocating
-+ * btree roots when initializing a new filesystem:
-+ */
-+void bch2_btree_set_root_for_read(struct bch_fs *c, struct btree *b)
-+{
-+	BUG_ON(btree_node_root(c, b));
-+
-+	bch2_btree_set_root_inmem(c, b);
-+}
-+
-+void bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id)
-+{
-+	struct closure cl;
-+	struct btree *b;
-+	int ret;
-+
-+	closure_init_stack(&cl);
-+
-+	do {
-+		ret = bch2_btree_cache_cannibalize_lock(c, &cl);
-+		closure_sync(&cl);
-+	} while (ret);
-+
-+	b = bch2_btree_node_mem_alloc(c);
-+	bch2_btree_cache_cannibalize_unlock(c);
-+
-+	set_btree_node_fake(b);
-+	set_btree_node_need_rewrite(b);
-+	b->c.level	= 0;
-+	b->c.btree_id	= id;
-+
-+	bkey_btree_ptr_init(&b->key);
-+	b->key.k.p = POS_MAX;
-+	*((u64 *) bkey_i_to_btree_ptr(&b->key)->v.start) = U64_MAX - id;
-+
-+	bch2_bset_init_first(b, &b->data->keys);
-+	bch2_btree_build_aux_trees(b);
-+
-+	b->data->flags = 0;
-+	btree_set_min(b, POS_MIN);
-+	btree_set_max(b, POS_MAX);
-+	b->data->format = bch2_btree_calc_format(b);
-+	btree_node_set_format(b, b->data->format);
-+
-+	ret = bch2_btree_node_hash_insert(&c->btree_cache, b,
-+					  b->c.level, b->c.btree_id);
-+	BUG_ON(ret);
-+
-+	bch2_btree_set_root_inmem(c, b);
-+
-+	six_unlock_write(&b->c.lock);
-+	six_unlock_intent(&b->c.lock);
-+}
-+
-+void bch2_btree_updates_to_text(struct printbuf *out, struct bch_fs *c)
-+{
-+	struct btree_update *as;
-+
-+	mutex_lock(&c->btree_interior_update_lock);
-+	list_for_each_entry(as, &c->btree_interior_update_list, list)
-+		pr_buf(out, "%p m %u w %u r %u j %llu\n",
-+		       as,
-+		       as->mode,
-+		       as->nodes_written,
-+		       atomic_read(&as->cl.remaining) & CLOSURE_REMAINING_MASK,
-+		       as->journal.seq);
-+	mutex_unlock(&c->btree_interior_update_lock);
-+}
-+
-+size_t bch2_btree_interior_updates_nr_pending(struct bch_fs *c)
-+{
-+	size_t ret = 0;
-+	struct list_head *i;
-+
-+	mutex_lock(&c->btree_interior_update_lock);
-+	list_for_each(i, &c->btree_interior_update_list)
-+		ret++;
-+	mutex_unlock(&c->btree_interior_update_lock);
-+
-+	return ret;
-+}
-+
-+void bch2_journal_entries_to_btree_roots(struct bch_fs *c, struct jset *jset)
-+{
-+	struct btree_root *r;
-+	struct jset_entry *entry;
-+
-+	mutex_lock(&c->btree_root_lock);
-+
-+	vstruct_for_each(jset, entry)
-+		if (entry->type == BCH_JSET_ENTRY_btree_root) {
-+			r = &c->btree_roots[entry->btree_id];
-+			r->level = entry->level;
-+			r->alive = true;
-+			bkey_copy(&r->key, &entry->start[0]);
-+		}
-+
-+	mutex_unlock(&c->btree_root_lock);
-+}
-+
-+struct jset_entry *
-+bch2_btree_roots_to_journal_entries(struct bch_fs *c,
-+				    struct jset_entry *start,
-+				    struct jset_entry *end)
-+{
-+	struct jset_entry *entry;
-+	unsigned long have = 0;
-+	unsigned i;
-+
-+	for (entry = start; entry < end; entry = vstruct_next(entry))
-+		if (entry->type == BCH_JSET_ENTRY_btree_root)
-+			__set_bit(entry->btree_id, &have);
-+
-+	mutex_lock(&c->btree_root_lock);
-+
-+	for (i = 0; i < BTREE_ID_NR; i++)
-+		if (c->btree_roots[i].alive && !test_bit(i, &have)) {
-+			journal_entry_set(end,
-+					  BCH_JSET_ENTRY_btree_root,
-+					  i, c->btree_roots[i].level,
-+					  &c->btree_roots[i].key,
-+					  c->btree_roots[i].key.u64s);
-+			end = vstruct_next(end);
-+		}
-+
-+	mutex_unlock(&c->btree_root_lock);
-+
-+	return end;
-+}
-+
-+void bch2_fs_btree_interior_update_exit(struct bch_fs *c)
-+{
-+	if (c->btree_interior_update_worker)
-+		destroy_workqueue(c->btree_interior_update_worker);
-+	mempool_exit(&c->btree_interior_update_pool);
-+}
-+
-+int bch2_fs_btree_interior_update_init(struct bch_fs *c)
-+{
-+	mutex_init(&c->btree_reserve_cache_lock);
-+	INIT_LIST_HEAD(&c->btree_interior_update_list);
-+	INIT_LIST_HEAD(&c->btree_interior_updates_unwritten);
-+	mutex_init(&c->btree_interior_update_lock);
-+	INIT_WORK(&c->btree_interior_update_work, btree_interior_update_work);
-+
-+	c->btree_interior_update_worker =
-+		alloc_workqueue("btree_update", WQ_UNBOUND|WQ_MEM_RECLAIM, 1);
-+	if (!c->btree_interior_update_worker)
-+		return -ENOMEM;
-+
-+	return mempool_init_kmalloc_pool(&c->btree_interior_update_pool, 1,
-+					 sizeof(struct btree_update));
-+}
-diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h
-new file mode 100644
-index 000000000000..7668225e72c6
---- /dev/null
-+++ b/fs/bcachefs/btree_update_interior.h
-@@ -0,0 +1,331 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BTREE_UPDATE_INTERIOR_H
-+#define _BCACHEFS_BTREE_UPDATE_INTERIOR_H
-+
-+#include "btree_cache.h"
-+#include "btree_locking.h"
-+#include "btree_update.h"
-+
-+void __bch2_btree_calc_format(struct bkey_format_state *, struct btree *);
-+bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *,
-+				struct bkey_format *);
-+
-+#define BTREE_UPDATE_NODES_MAX		((BTREE_MAX_DEPTH - 2) * 2 + GC_MERGE_NODES)
-+
-+#define BTREE_UPDATE_JOURNAL_RES	(BTREE_UPDATE_NODES_MAX * (BKEY_BTREE_PTR_U64s_MAX + 1))
-+
-+/*
-+ * Tracks an in progress split/rewrite of a btree node and the update to the
-+ * parent node:
-+ *
-+ * When we split/rewrite a node, we do all the updates in memory without
-+ * waiting for any writes to complete - we allocate the new node(s) and update
-+ * the parent node, possibly recursively up to the root.
-+ *
-+ * The end result is that we have one or more new nodes being written -
-+ * possibly several, if there were multiple splits - and then a write (updating
-+ * an interior node) which will make all these new nodes visible.
-+ *
-+ * Additionally, as we split/rewrite nodes we free the old nodes - but the old
-+ * nodes can't be freed (their space on disk can't be reclaimed) until the
-+ * update to the interior node that makes the new node visible completes -
-+ * until then, the old nodes are still reachable on disk.
-+ *
-+ */
-+struct btree_update {
-+	struct closure			cl;
-+	struct bch_fs			*c;
-+
-+	struct list_head		list;
-+	struct list_head		unwritten_list;
-+
-+	/* What kind of update are we doing? */
-+	enum {
-+		BTREE_INTERIOR_NO_UPDATE,
-+		BTREE_INTERIOR_UPDATING_NODE,
-+		BTREE_INTERIOR_UPDATING_ROOT,
-+		BTREE_INTERIOR_UPDATING_AS,
-+	} mode;
-+
-+	unsigned			must_rewrite:1;
-+	unsigned			nodes_written:1;
-+
-+	enum btree_id			btree_id;
-+
-+	struct disk_reservation		disk_res;
-+	struct journal_preres		journal_preres;
-+
-+	/*
-+	 * BTREE_INTERIOR_UPDATING_NODE:
-+	 * The update that made the new nodes visible was a regular update to an
-+	 * existing interior node - @b. We can't write out the update to @b
-+	 * until the new nodes we created are finished writing, so we block @b
-+	 * from writing by putting this btree_interior update on the
-+	 * @b->write_blocked list with @write_blocked_list:
-+	 */
-+	struct btree			*b;
-+	struct list_head		write_blocked_list;
-+
-+	/*
-+	 * We may be freeing nodes that were dirty, and thus had journal entries
-+	 * pinned: we need to transfer the oldest of those pins to the
-+	 * btree_update operation, and release it when the new node(s)
-+	 * are all persistent and reachable:
-+	 */
-+	struct journal_entry_pin	journal;
-+
-+	/* Preallocated nodes we reserve when we start the update: */
-+	struct btree			*prealloc_nodes[BTREE_UPDATE_NODES_MAX];
-+	unsigned			nr_prealloc_nodes;
-+
-+	/* Nodes being freed: */
-+	struct keylist			old_keys;
-+	u64				_old_keys[BTREE_UPDATE_NODES_MAX *
-+						  BKEY_BTREE_PTR_VAL_U64s_MAX];
-+
-+	/* Nodes being added: */
-+	struct keylist			new_keys;
-+	u64				_new_keys[BTREE_UPDATE_NODES_MAX *
-+						  BKEY_BTREE_PTR_VAL_U64s_MAX];
-+
-+	/* New nodes, that will be made reachable by this update: */
-+	struct btree			*new_nodes[BTREE_UPDATE_NODES_MAX];
-+	unsigned			nr_new_nodes;
-+
-+	open_bucket_idx_t		open_buckets[BTREE_UPDATE_NODES_MAX *
-+						     BCH_REPLICAS_MAX];
-+	open_bucket_idx_t		nr_open_buckets;
-+
-+	unsigned			journal_u64s;
-+	u64				journal_entries[BTREE_UPDATE_JOURNAL_RES];
-+
-+	/* Only here to reduce stack usage on recursive splits: */
-+	struct keylist			parent_keys;
-+	/*
-+	 * Enough room for btree_split's keys without realloc - btree node
-+	 * pointers never have crc/compression info, so we only need to acount
-+	 * for the pointers for three keys
-+	 */
-+	u64				inline_keys[BKEY_BTREE_PTR_U64s_MAX * 3];
-+};
-+
-+void bch2_btree_node_free_inmem(struct bch_fs *, struct btree *,
-+				struct btree_iter *);
-+void bch2_btree_node_free_never_inserted(struct bch_fs *, struct btree *);
-+
-+void bch2_btree_update_get_open_buckets(struct btree_update *, struct btree *);
-+
-+struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *,
-+						  struct btree *,
-+						  struct bkey_format);
-+
-+void bch2_btree_update_done(struct btree_update *);
-+struct btree_update *
-+bch2_btree_update_start(struct btree_trans *, enum btree_id, unsigned,
-+			unsigned, struct closure *);
-+
-+void bch2_btree_interior_update_will_free_node(struct btree_update *,
-+					       struct btree *);
-+void bch2_btree_update_add_new_node(struct btree_update *, struct btree *);
-+
-+void bch2_btree_insert_node(struct btree_update *, struct btree *,
-+			    struct btree_iter *, struct keylist *,
-+			    unsigned);
-+int bch2_btree_split_leaf(struct bch_fs *, struct btree_iter *, unsigned);
-+
-+void __bch2_foreground_maybe_merge(struct bch_fs *, struct btree_iter *,
-+				   unsigned, unsigned, enum btree_node_sibling);
-+
-+static inline void bch2_foreground_maybe_merge_sibling(struct bch_fs *c,
-+					struct btree_iter *iter,
-+					unsigned level, unsigned flags,
-+					enum btree_node_sibling sib)
-+{
-+	struct btree *b;
-+
-+	if (iter->uptodate >= BTREE_ITER_NEED_TRAVERSE)
-+		return;
-+
-+	if (!bch2_btree_node_relock(iter, level))
-+		return;
-+
-+	b = iter->l[level].b;
-+	if (b->sib_u64s[sib] > c->btree_foreground_merge_threshold)
-+		return;
-+
-+	__bch2_foreground_maybe_merge(c, iter, level, flags, sib);
-+}
-+
-+static inline void bch2_foreground_maybe_merge(struct bch_fs *c,
-+					       struct btree_iter *iter,
-+					       unsigned level,
-+					       unsigned flags)
-+{
-+	bch2_foreground_maybe_merge_sibling(c, iter, level, flags,
-+					    btree_prev_sib);
-+	bch2_foreground_maybe_merge_sibling(c, iter, level, flags,
-+					    btree_next_sib);
-+}
-+
-+void bch2_btree_set_root_for_read(struct bch_fs *, struct btree *);
-+void bch2_btree_root_alloc(struct bch_fs *, enum btree_id);
-+
-+static inline unsigned btree_update_reserve_required(struct bch_fs *c,
-+						     struct btree *b)
-+{
-+	unsigned depth = btree_node_root(c, b)->c.level + 1;
-+
-+	/*
-+	 * Number of nodes we might have to allocate in a worst case btree
-+	 * split operation - we split all the way up to the root, then allocate
-+	 * a new root, unless we're already at max depth:
-+	 */
-+	if (depth < BTREE_MAX_DEPTH)
-+		return (depth - b->c.level) * 2 + 1;
-+	else
-+		return (depth - b->c.level) * 2 - 1;
-+}
-+
-+static inline void btree_node_reset_sib_u64s(struct btree *b)
-+{
-+	b->sib_u64s[0] = b->nr.live_u64s;
-+	b->sib_u64s[1] = b->nr.live_u64s;
-+}
-+
-+static inline void *btree_data_end(struct bch_fs *c, struct btree *b)
-+{
-+	return (void *) b->data + btree_bytes(c);
-+}
-+
-+static inline struct bkey_packed *unwritten_whiteouts_start(struct bch_fs *c,
-+							    struct btree *b)
-+{
-+	return (void *) ((u64 *) btree_data_end(c, b) - b->whiteout_u64s);
-+}
-+
-+static inline struct bkey_packed *unwritten_whiteouts_end(struct bch_fs *c,
-+							  struct btree *b)
-+{
-+	return btree_data_end(c, b);
-+}
-+
-+static inline void *write_block(struct btree *b)
-+{
-+	return (void *) b->data + (b->written << 9);
-+}
-+
-+static inline bool __btree_addr_written(struct btree *b, void *p)
-+{
-+	return p < write_block(b);
-+}
-+
-+static inline bool bset_written(struct btree *b, struct bset *i)
-+{
-+	return __btree_addr_written(b, i);
-+}
-+
-+static inline bool bkey_written(struct btree *b, struct bkey_packed *k)
-+{
-+	return __btree_addr_written(b, k);
-+}
-+
-+static inline ssize_t __bch_btree_u64s_remaining(struct bch_fs *c,
-+						 struct btree *b,
-+						 void *end)
-+{
-+	ssize_t used = bset_byte_offset(b, end) / sizeof(u64) +
-+		b->whiteout_u64s;
-+	ssize_t total = c->opts.btree_node_size << 6;
-+
-+	return total - used;
-+}
-+
-+static inline size_t bch_btree_keys_u64s_remaining(struct bch_fs *c,
-+						   struct btree *b)
-+{
-+	ssize_t remaining = __bch_btree_u64s_remaining(c, b,
-+				btree_bkey_last(b, bset_tree_last(b)));
-+
-+	BUG_ON(remaining < 0);
-+
-+	if (bset_written(b, btree_bset_last(b)))
-+		return 0;
-+
-+	return remaining;
-+}
-+
-+static inline unsigned btree_write_set_buffer(struct btree *b)
-+{
-+	/*
-+	 * Could buffer up larger amounts of keys for btrees with larger keys,
-+	 * pending benchmarking:
-+	 */
-+	return 4 << 10;
-+}
-+
-+static inline struct btree_node_entry *want_new_bset(struct bch_fs *c,
-+						     struct btree *b)
-+{
-+	struct bset_tree *t = bset_tree_last(b);
-+	struct btree_node_entry *bne = max(write_block(b),
-+			(void *) btree_bkey_last(b, bset_tree_last(b)));
-+	ssize_t remaining_space =
-+		__bch_btree_u64s_remaining(c, b, &bne->keys.start[0]);
-+
-+	if (unlikely(bset_written(b, bset(b, t)))) {
-+		if (remaining_space > (ssize_t) (block_bytes(c) >> 3))
-+			return bne;
-+	} else {
-+		if (unlikely(bset_u64s(t) * sizeof(u64) > btree_write_set_buffer(b)) &&
-+		    remaining_space > (ssize_t) (btree_write_set_buffer(b) >> 3))
-+			return bne;
-+	}
-+
-+	return NULL;
-+}
-+
-+static inline void push_whiteout(struct bch_fs *c, struct btree *b,
-+				 struct bpos pos)
-+{
-+	struct bkey_packed k;
-+
-+	BUG_ON(bch_btree_keys_u64s_remaining(c, b) < BKEY_U64s);
-+
-+	if (!bkey_pack_pos(&k, pos, b)) {
-+		struct bkey *u = (void *) &k;
-+
-+		bkey_init(u);
-+		u->p = pos;
-+	}
-+
-+	k.needs_whiteout = true;
-+
-+	b->whiteout_u64s += k.u64s;
-+	bkey_copy(unwritten_whiteouts_start(c, b), &k);
-+}
-+
-+/*
-+ * write lock must be held on @b (else the dirty bset that we were going to
-+ * insert into could be written out from under us)
-+ */
-+static inline bool bch2_btree_node_insert_fits(struct bch_fs *c,
-+					       struct btree *b, unsigned u64s)
-+{
-+	if (unlikely(btree_node_need_rewrite(b)))
-+		return false;
-+
-+	return u64s <= bch_btree_keys_u64s_remaining(c, b);
-+}
-+
-+void bch2_btree_updates_to_text(struct printbuf *, struct bch_fs *);
-+
-+size_t bch2_btree_interior_updates_nr_pending(struct bch_fs *);
-+
-+void bch2_journal_entries_to_btree_roots(struct bch_fs *, struct jset *);
-+struct jset_entry *bch2_btree_roots_to_journal_entries(struct bch_fs *,
-+					struct jset_entry *, struct jset_entry *);
-+
-+void bch2_fs_btree_interior_update_exit(struct bch_fs *);
-+int bch2_fs_btree_interior_update_init(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_BTREE_UPDATE_INTERIOR_H */
-diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c
-new file mode 100644
-index 000000000000..cd699c257244
---- /dev/null
-+++ b/fs/bcachefs/btree_update_leaf.c
-@@ -0,0 +1,1171 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "btree_update.h"
-+#include "btree_update_interior.h"
-+#include "btree_gc.h"
-+#include "btree_io.h"
-+#include "btree_iter.h"
-+#include "btree_key_cache.h"
-+#include "btree_locking.h"
-+#include "buckets.h"
-+#include "debug.h"
-+#include "error.h"
-+#include "extent_update.h"
-+#include "journal.h"
-+#include "journal_reclaim.h"
-+#include "keylist.h"
-+#include "replicas.h"
-+
-+#include <linux/prefetch.h>
-+#include <linux/sort.h>
-+#include <trace/events/bcachefs.h>
-+
-+static inline bool same_leaf_as_prev(struct btree_trans *trans,
-+				     struct btree_insert_entry *i)
-+{
-+	return i != trans->updates2 &&
-+		iter_l(i[0].iter)->b == iter_l(i[-1].iter)->b;
-+}
-+
-+inline void bch2_btree_node_lock_for_insert(struct bch_fs *c, struct btree *b,
-+					    struct btree_iter *iter)
-+{
-+	bch2_btree_node_lock_write(b, iter);
-+
-+	if (btree_iter_type(iter) == BTREE_ITER_CACHED)
-+		return;
-+
-+	if (unlikely(btree_node_just_written(b)) &&
-+	    bch2_btree_post_write_cleanup(c, b))
-+		bch2_btree_iter_reinit_node(iter, b);
-+
-+	/*
-+	 * If the last bset has been written, or if it's gotten too big - start
-+	 * a new bset to insert into:
-+	 */
-+	if (want_new_bset(c, b))
-+		bch2_btree_init_next(c, b, iter);
-+}
-+
-+/* Inserting into a given leaf node (last stage of insert): */
-+
-+/* Handle overwrites and do insert, for non extents: */
-+bool bch2_btree_bset_insert_key(struct btree_iter *iter,
-+				struct btree *b,
-+				struct btree_node_iter *node_iter,
-+				struct bkey_i *insert)
-+{
-+	struct bkey_packed *k;
-+	unsigned clobber_u64s = 0, new_u64s = 0;
-+
-+	EBUG_ON(btree_node_just_written(b));
-+	EBUG_ON(bset_written(b, btree_bset_last(b)));
-+	EBUG_ON(bkey_deleted(&insert->k) && bkey_val_u64s(&insert->k));
-+	EBUG_ON(bkey_cmp(b->data->min_key, POS_MIN) &&
-+		bkey_cmp(bkey_start_pos(&insert->k),
-+			 bkey_predecessor(b->data->min_key)) < 0);
-+	EBUG_ON(bkey_cmp(insert->k.p, b->data->min_key) < 0);
-+	EBUG_ON(bkey_cmp(insert->k.p, b->data->max_key) > 0);
-+	EBUG_ON(insert->k.u64s >
-+		bch_btree_keys_u64s_remaining(iter->trans->c, b));
-+	EBUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS);
-+
-+	k = bch2_btree_node_iter_peek_all(node_iter, b);
-+	if (k && bkey_cmp_packed(b, k, &insert->k))
-+		k = NULL;
-+
-+	/* @k is the key being overwritten/deleted, if any: */
-+	EBUG_ON(k && bkey_whiteout(k));
-+
-+	/* Deleting, but not found? nothing to do: */
-+	if (bkey_whiteout(&insert->k) && !k)
-+		return false;
-+
-+	if (bkey_whiteout(&insert->k)) {
-+		/* Deleting: */
-+		btree_account_key_drop(b, k);
-+		k->type = KEY_TYPE_deleted;
-+
-+		if (k->needs_whiteout)
-+			push_whiteout(iter->trans->c, b, insert->k.p);
-+		k->needs_whiteout = false;
-+
-+		if (k >= btree_bset_last(b)->start) {
-+			clobber_u64s = k->u64s;
-+			bch2_bset_delete(b, k, clobber_u64s);
-+			goto fix_iter;
-+		} else {
-+			bch2_btree_iter_fix_key_modified(iter, b, k);
-+		}
-+
-+		return true;
-+	}
-+
-+	if (k) {
-+		/* Overwriting: */
-+		btree_account_key_drop(b, k);
-+		k->type = KEY_TYPE_deleted;
-+
-+		insert->k.needs_whiteout = k->needs_whiteout;
-+		k->needs_whiteout = false;
-+
-+		if (k >= btree_bset_last(b)->start) {
-+			clobber_u64s = k->u64s;
-+			goto overwrite;
-+		} else {
-+			bch2_btree_iter_fix_key_modified(iter, b, k);
-+		}
-+	}
-+
-+	k = bch2_btree_node_iter_bset_pos(node_iter, b, bset_tree_last(b));
-+overwrite:
-+	bch2_bset_insert(b, node_iter, k, insert, clobber_u64s);
-+	new_u64s = k->u64s;
-+fix_iter:
-+	if (clobber_u64s != new_u64s)
-+		bch2_btree_node_iter_fix(iter, b, node_iter, k,
-+					 clobber_u64s, new_u64s);
-+	return true;
-+}
-+
-+static void __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
-+			       unsigned i, u64 seq)
-+{
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	struct btree_write *w = container_of(pin, struct btree_write, journal);
-+	struct btree *b = container_of(w, struct btree, writes[i]);
-+
-+	btree_node_lock_type(c, b, SIX_LOCK_read);
-+	bch2_btree_node_write_cond(c, b,
-+		(btree_current_write(b) == w && w->journal.seq == seq));
-+	six_unlock_read(&b->c.lock);
-+}
-+
-+static void btree_node_flush0(struct journal *j, struct journal_entry_pin *pin, u64 seq)
-+{
-+	return __btree_node_flush(j, pin, 0, seq);
-+}
-+
-+static void btree_node_flush1(struct journal *j, struct journal_entry_pin *pin, u64 seq)
-+{
-+	return __btree_node_flush(j, pin, 1, seq);
-+}
-+
-+inline void bch2_btree_add_journal_pin(struct bch_fs *c,
-+				       struct btree *b, u64 seq)
-+{
-+	struct btree_write *w = btree_current_write(b);
-+
-+	bch2_journal_pin_add(&c->journal, seq, &w->journal,
-+			     btree_node_write_idx(b) == 0
-+			     ? btree_node_flush0
-+			     : btree_node_flush1);
-+}
-+
-+/**
-+ * btree_insert_key - insert a key one key into a leaf node
-+ */
-+static bool btree_insert_key_leaf(struct btree_trans *trans,
-+				  struct btree_iter *iter,
-+				  struct bkey_i *insert)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct btree *b = iter_l(iter)->b;
-+	struct bset_tree *t = bset_tree_last(b);
-+	struct bset *i = bset(b, t);
-+	int old_u64s = bset_u64s(t);
-+	int old_live_u64s = b->nr.live_u64s;
-+	int live_u64s_added, u64s_added;
-+
-+	EBUG_ON(!iter->level &&
-+		!test_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags));
-+
-+	if (unlikely(!bch2_btree_bset_insert_key(iter, b,
-+					&iter_l(iter)->iter, insert)))
-+		return false;
-+
-+	i->journal_seq = cpu_to_le64(max(trans->journal_res.seq,
-+					 le64_to_cpu(i->journal_seq)));
-+
-+	bch2_btree_add_journal_pin(c, b, trans->journal_res.seq);
-+
-+	if (unlikely(!btree_node_dirty(b)))
-+		set_btree_node_dirty(b);
-+
-+	live_u64s_added = (int) b->nr.live_u64s - old_live_u64s;
-+	u64s_added = (int) bset_u64s(t) - old_u64s;
-+
-+	if (b->sib_u64s[0] != U16_MAX && live_u64s_added < 0)
-+		b->sib_u64s[0] = max(0, (int) b->sib_u64s[0] + live_u64s_added);
-+	if (b->sib_u64s[1] != U16_MAX && live_u64s_added < 0)
-+		b->sib_u64s[1] = max(0, (int) b->sib_u64s[1] + live_u64s_added);
-+
-+	if (u64s_added > live_u64s_added &&
-+	    bch2_maybe_compact_whiteouts(c, b))
-+		bch2_btree_iter_reinit_node(iter, b);
-+
-+	trace_btree_insert_key(c, b, insert);
-+	return true;
-+}
-+
-+/* Cached btree updates: */
-+
-+/* Normal update interface: */
-+
-+static inline void btree_insert_entry_checks(struct btree_trans *trans,
-+					     struct btree_iter *iter,
-+					     struct bkey_i *insert)
-+{
-+	struct bch_fs *c = trans->c;
-+
-+	BUG_ON(bkey_cmp(insert->k.p, iter->pos));
-+	BUG_ON(debug_check_bkeys(c) &&
-+	       bch2_bkey_invalid(c, bkey_i_to_s_c(insert),
-+				 __btree_node_type(iter->level, iter->btree_id)));
-+}
-+
-+static noinline int
-+bch2_trans_journal_preres_get_cold(struct btree_trans *trans, unsigned u64s)
-+{
-+	struct bch_fs *c = trans->c;
-+	int ret;
-+
-+	bch2_trans_unlock(trans);
-+
-+	ret = bch2_journal_preres_get(&c->journal,
-+			&trans->journal_preres, u64s, 0);
-+	if (ret)
-+		return ret;
-+
-+	if (!bch2_trans_relock(trans)) {
-+		trace_trans_restart_journal_preres_get(trans->ip);
-+		return -EINTR;
-+	}
-+
-+	return 0;
-+}
-+
-+static inline int bch2_trans_journal_res_get(struct btree_trans *trans,
-+					     unsigned flags)
-+{
-+	struct bch_fs *c = trans->c;
-+	int ret;
-+
-+	if (trans->flags & BTREE_INSERT_JOURNAL_RESERVED)
-+		flags |= JOURNAL_RES_GET_RESERVED;
-+
-+	ret = bch2_journal_res_get(&c->journal, &trans->journal_res,
-+				   trans->journal_u64s, flags);
-+
-+	return ret == -EAGAIN ? BTREE_INSERT_NEED_JOURNAL_RES : ret;
-+}
-+
-+static enum btree_insert_ret
-+btree_key_can_insert(struct btree_trans *trans,
-+		     struct btree_iter *iter,
-+		     unsigned u64s)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct btree *b = iter_l(iter)->b;
-+
-+	if (!bch2_btree_node_insert_fits(c, b, u64s))
-+		return BTREE_INSERT_BTREE_NODE_FULL;
-+
-+	return BTREE_INSERT_OK;
-+}
-+
-+static enum btree_insert_ret
-+btree_key_can_insert_cached(struct btree_trans *trans,
-+			    struct btree_iter *iter,
-+			    unsigned u64s)
-+{
-+	struct bkey_cached *ck = (void *) iter->l[0].b;
-+	unsigned new_u64s;
-+	struct bkey_i *new_k;
-+
-+	BUG_ON(iter->level);
-+
-+	if (u64s <= ck->u64s)
-+		return BTREE_INSERT_OK;
-+
-+	new_u64s	= roundup_pow_of_two(u64s);
-+	new_k		= krealloc(ck->k, new_u64s * sizeof(u64), GFP_NOFS);
-+	if (!new_k)
-+		return -ENOMEM;
-+
-+	ck->u64s	= new_u64s;
-+	ck->k		= new_k;
-+	return BTREE_INSERT_OK;
-+}
-+
-+static inline void do_btree_insert_one(struct btree_trans *trans,
-+				       struct btree_iter *iter,
-+				       struct bkey_i *insert)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct journal *j = &c->journal;
-+	bool did_work;
-+
-+	EBUG_ON(trans->journal_res.ref !=
-+		!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY));
-+
-+	insert->k.needs_whiteout = false;
-+
-+	did_work = (btree_iter_type(iter) != BTREE_ITER_CACHED)
-+		? btree_insert_key_leaf(trans, iter, insert)
-+		: bch2_btree_insert_key_cached(trans, iter, insert);
-+	if (!did_work)
-+		return;
-+
-+	if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) {
-+		bch2_journal_add_keys(j, &trans->journal_res,
-+				      iter->btree_id, insert);
-+
-+		bch2_journal_set_has_inode(j, &trans->journal_res,
-+					   insert->k.p.inode);
-+
-+		if (trans->journal_seq)
-+			*trans->journal_seq = trans->journal_res.seq;
-+	}
-+}
-+
-+static inline bool iter_has_trans_triggers(struct btree_iter *iter)
-+{
-+	return BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS & (1U << iter->btree_id);
-+}
-+
-+static inline bool iter_has_nontrans_triggers(struct btree_iter *iter)
-+{
-+	return (BTREE_NODE_TYPE_HAS_TRIGGERS &
-+		~BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS) &
-+		(1U << iter->btree_id);
-+}
-+
-+static noinline void bch2_btree_iter_unlock_noinline(struct btree_iter *iter)
-+{
-+	__bch2_btree_iter_unlock(iter);
-+}
-+
-+static noinline void bch2_trans_mark_gc(struct btree_trans *trans)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct btree_insert_entry *i;
-+
-+	trans_for_each_update(trans, i) {
-+		/*
-+		 * XXX: synchronization of cached update triggers with gc
-+		 */
-+		BUG_ON(btree_iter_type(i->iter) == BTREE_ITER_CACHED);
-+
-+		if (gc_visited(c, gc_pos_btree_node(i->iter->l[0].b)))
-+			bch2_mark_update(trans, i->iter, i->k, NULL,
-+					 i->trigger_flags|BTREE_TRIGGER_GC);
-+	}
-+}
-+
-+static inline int
-+bch2_trans_commit_write_locked(struct btree_trans *trans,
-+			       struct btree_insert_entry **stopped_at)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct bch_fs_usage *fs_usage = NULL;
-+	struct btree_insert_entry *i;
-+	unsigned u64s = 0;
-+	bool marking = false;
-+	int ret;
-+
-+	if (race_fault()) {
-+		trace_trans_restart_fault_inject(trans->ip);
-+		return -EINTR;
-+	}
-+
-+	/*
-+	 * Check if the insert will fit in the leaf node with the write lock
-+	 * held, otherwise another thread could write the node changing the
-+	 * amount of space available:
-+	 */
-+
-+	prefetch(&trans->c->journal.flags);
-+
-+	trans_for_each_update2(trans, i) {
-+		/* Multiple inserts might go to same leaf: */
-+		if (!same_leaf_as_prev(trans, i))
-+			u64s = 0;
-+
-+		u64s += i->k->k.u64s;
-+		ret = btree_iter_type(i->iter) != BTREE_ITER_CACHED
-+			? btree_key_can_insert(trans, i->iter, u64s)
-+			: btree_key_can_insert_cached(trans, i->iter, u64s);
-+		if (ret) {
-+			*stopped_at = i;
-+			return ret;
-+		}
-+
-+		if (btree_node_type_needs_gc(i->iter->btree_id))
-+			marking = true;
-+	}
-+
-+	if (marking) {
-+		percpu_down_read(&c->mark_lock);
-+		fs_usage = bch2_fs_usage_scratch_get(c);
-+	}
-+
-+	/*
-+	 * Don't get journal reservation until after we know insert will
-+	 * succeed:
-+	 */
-+	if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) {
-+		ret = bch2_trans_journal_res_get(trans,
-+				JOURNAL_RES_GET_NONBLOCK);
-+		if (ret)
-+			goto err;
-+	} else {
-+		trans->journal_res.seq = c->journal.replay_journal_seq;
-+	}
-+
-+	if (unlikely(trans->extra_journal_entry_u64s)) {
-+		memcpy_u64s_small(journal_res_entry(&c->journal, &trans->journal_res),
-+				  trans->extra_journal_entries,
-+				  trans->extra_journal_entry_u64s);
-+
-+		trans->journal_res.offset	+= trans->extra_journal_entry_u64s;
-+		trans->journal_res.u64s		-= trans->extra_journal_entry_u64s;
-+	}
-+
-+	/*
-+	 * Not allowed to fail after we've gotten our journal reservation - we
-+	 * have to use it:
-+	 */
-+
-+	if (!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) {
-+		if (journal_seq_verify(c))
-+			trans_for_each_update2(trans, i)
-+				i->k->k.version.lo = trans->journal_res.seq;
-+		else if (inject_invalid_keys(c))
-+			trans_for_each_update2(trans, i)
-+				i->k->k.version = MAX_VERSION;
-+	}
-+
-+	/* Must be called under mark_lock: */
-+	if (marking && trans->fs_usage_deltas &&
-+	    bch2_replicas_delta_list_apply(c, fs_usage,
-+					   trans->fs_usage_deltas)) {
-+		ret = BTREE_INSERT_NEED_MARK_REPLICAS;
-+		goto err;
-+	}
-+
-+	trans_for_each_update(trans, i)
-+		if (iter_has_nontrans_triggers(i->iter))
-+			bch2_mark_update(trans, i->iter, i->k,
-+					 fs_usage, i->trigger_flags);
-+
-+	if (marking)
-+		bch2_trans_fs_usage_apply(trans, fs_usage);
-+
-+	if (unlikely(c->gc_pos.phase))
-+		bch2_trans_mark_gc(trans);
-+
-+	trans_for_each_update2(trans, i)
-+		do_btree_insert_one(trans, i->iter, i->k);
-+err:
-+	if (marking) {
-+		bch2_fs_usage_scratch_put(c, fs_usage);
-+		percpu_up_read(&c->mark_lock);
-+	}
-+
-+	return ret;
-+}
-+
-+/*
-+ * Get journal reservation, take write locks, and attempt to do btree update(s):
-+ */
-+static inline int do_bch2_trans_commit(struct btree_trans *trans,
-+				       struct btree_insert_entry **stopped_at)
-+{
-+	struct btree_insert_entry *i;
-+	struct btree_iter *iter;
-+	int ret;
-+
-+	trans_for_each_update2(trans, i)
-+		BUG_ON(!btree_node_intent_locked(i->iter, i->iter->level));
-+
-+	ret = bch2_journal_preres_get(&trans->c->journal,
-+			&trans->journal_preres, trans->journal_preres_u64s,
-+			JOURNAL_RES_GET_NONBLOCK|
-+			((trans->flags & BTREE_INSERT_JOURNAL_RECLAIM)
-+			 ? JOURNAL_RES_GET_RECLAIM : 0));
-+	if (unlikely(ret == -EAGAIN))
-+		ret = bch2_trans_journal_preres_get_cold(trans,
-+						trans->journal_preres_u64s);
-+	if (unlikely(ret))
-+		return ret;
-+
-+	/*
-+	 * Can't be holding any read locks when we go to take write locks:
-+	 *
-+	 * note - this must be done after bch2_trans_journal_preres_get_cold()
-+	 * or anything else that might call bch2_trans_relock(), since that
-+	 * would just retake the read locks:
-+	 */
-+	trans_for_each_iter(trans, iter) {
-+		if (iter->nodes_locked != iter->nodes_intent_locked) {
-+			EBUG_ON(iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT);
-+			EBUG_ON(trans->iters_live & (1ULL << iter->idx));
-+			bch2_btree_iter_unlock_noinline(iter);
-+		}
-+	}
-+
-+	if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
-+		trans_for_each_update2(trans, i)
-+			btree_insert_entry_checks(trans, i->iter, i->k);
-+	bch2_btree_trans_verify_locks(trans);
-+
-+	trans_for_each_update2(trans, i)
-+		if (!same_leaf_as_prev(trans, i))
-+			bch2_btree_node_lock_for_insert(trans->c,
-+					iter_l(i->iter)->b, i->iter);
-+
-+	ret = bch2_trans_commit_write_locked(trans, stopped_at);
-+
-+	trans_for_each_update2(trans, i)
-+		if (!same_leaf_as_prev(trans, i))
-+			bch2_btree_node_unlock_write_inlined(iter_l(i->iter)->b,
-+							     i->iter);
-+
-+	if (!ret && trans->journal_pin)
-+		bch2_journal_pin_add(&trans->c->journal, trans->journal_res.seq,
-+				     trans->journal_pin, NULL);
-+
-+	/*
-+	 * Drop journal reservation after dropping write locks, since dropping
-+	 * the journal reservation may kick off a journal write:
-+	 */
-+	bch2_journal_res_put(&trans->c->journal, &trans->journal_res);
-+
-+	if (unlikely(ret))
-+		return ret;
-+
-+	if (trans->flags & BTREE_INSERT_NOUNLOCK)
-+		trans->nounlock = true;
-+
-+	trans_for_each_update2(trans, i)
-+		if (btree_iter_type(i->iter) != BTREE_ITER_CACHED &&
-+		    !same_leaf_as_prev(trans, i))
-+			bch2_foreground_maybe_merge(trans->c, i->iter,
-+						    0, trans->flags);
-+
-+	trans->nounlock = false;
-+
-+	bch2_trans_downgrade(trans);
-+
-+	return 0;
-+}
-+
-+static noinline
-+int bch2_trans_commit_error(struct btree_trans *trans,
-+			    struct btree_insert_entry *i,
-+			    int ret)
-+{
-+	struct bch_fs *c = trans->c;
-+	unsigned flags = trans->flags;
-+
-+	/*
-+	 * BTREE_INSERT_NOUNLOCK means don't unlock _after_ successful btree
-+	 * update; if we haven't done anything yet it doesn't apply
-+	 */
-+	flags &= ~BTREE_INSERT_NOUNLOCK;
-+
-+	switch (ret) {
-+	case BTREE_INSERT_BTREE_NODE_FULL:
-+		ret = bch2_btree_split_leaf(c, i->iter, flags);
-+
-+		/*
-+		 * if the split succeeded without dropping locks the insert will
-+		 * still be atomic (what the caller peeked() and is overwriting
-+		 * won't have changed)
-+		 */
-+#if 0
-+		/*
-+		 * XXX:
-+		 * split -> btree node merging (of parent node) might still drop
-+		 * locks when we're not passing it BTREE_INSERT_NOUNLOCK
-+		 *
-+		 * we don't want to pass BTREE_INSERT_NOUNLOCK to split as that
-+		 * will inhibit merging - but we don't have a reliable way yet
-+		 * (do we?) of checking if we dropped locks in this path
-+		 */
-+		if (!ret)
-+			goto retry;
-+#endif
-+
-+		/*
-+		 * don't care if we got ENOSPC because we told split it
-+		 * couldn't block:
-+		 */
-+		if (!ret ||
-+		    ret == -EINTR ||
-+		    (flags & BTREE_INSERT_NOUNLOCK)) {
-+			trace_trans_restart_btree_node_split(trans->ip);
-+			ret = -EINTR;
-+		}
-+		break;
-+	case BTREE_INSERT_ENOSPC:
-+		ret = -ENOSPC;
-+		break;
-+	case BTREE_INSERT_NEED_MARK_REPLICAS:
-+		bch2_trans_unlock(trans);
-+
-+		trans_for_each_update(trans, i) {
-+			ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(i->k));
-+			if (ret)
-+				return ret;
-+		}
-+
-+		if (bch2_trans_relock(trans))
-+			return 0;
-+
-+		trace_trans_restart_mark_replicas(trans->ip);
-+		ret = -EINTR;
-+		break;
-+	case BTREE_INSERT_NEED_JOURNAL_RES:
-+		bch2_trans_unlock(trans);
-+
-+		ret = bch2_trans_journal_res_get(trans, JOURNAL_RES_GET_CHECK);
-+		if (ret)
-+			return ret;
-+
-+		if (bch2_trans_relock(trans))
-+			return 0;
-+
-+		trace_trans_restart_journal_res_get(trans->ip);
-+		ret = -EINTR;
-+		break;
-+	default:
-+		BUG_ON(ret >= 0);
-+		break;
-+	}
-+
-+	if (ret == -EINTR) {
-+		int ret2 = bch2_btree_iter_traverse_all(trans);
-+
-+		if (ret2) {
-+			trace_trans_restart_traverse(trans->ip);
-+			return ret2;
-+		}
-+
-+		trace_trans_restart_atomic(trans->ip);
-+	}
-+
-+	return ret;
-+}
-+
-+static noinline int
-+bch2_trans_commit_get_rw_cold(struct btree_trans *trans)
-+{
-+	struct bch_fs *c = trans->c;
-+	int ret;
-+
-+	if (likely(!(trans->flags & BTREE_INSERT_LAZY_RW)))
-+		return -EROFS;
-+
-+	bch2_trans_unlock(trans);
-+
-+	ret = bch2_fs_read_write_early(c);
-+	if (ret)
-+		return ret;
-+
-+	percpu_ref_get(&c->writes);
-+	return 0;
-+}
-+
-+static void bch2_trans_update2(struct btree_trans *trans,
-+			       struct btree_iter *iter,
-+			       struct bkey_i *insert)
-+{
-+	struct btree_insert_entry *i, n = (struct btree_insert_entry) {
-+		.iter = iter, .k = insert
-+	};
-+
-+	btree_insert_entry_checks(trans, n.iter, n.k);
-+
-+	BUG_ON(iter->uptodate > BTREE_ITER_NEED_PEEK);
-+
-+	EBUG_ON(trans->nr_updates2 >= trans->nr_iters);
-+
-+	iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
-+
-+	trans_for_each_update2(trans, i) {
-+		if (btree_iter_cmp(n.iter, i->iter) == 0) {
-+			*i = n;
-+			return;
-+		}
-+
-+		if (btree_iter_cmp(n.iter, i->iter) <= 0)
-+			break;
-+	}
-+
-+	array_insert_item(trans->updates2, trans->nr_updates2,
-+			  i - trans->updates2, n);
-+}
-+
-+static int extent_update_to_keys(struct btree_trans *trans,
-+				 struct btree_iter *orig_iter,
-+				 struct bkey_i *insert)
-+{
-+	struct btree_iter *iter;
-+	int ret;
-+
-+	ret = bch2_extent_can_insert(trans, orig_iter, insert);
-+	if (ret)
-+		return ret;
-+
-+	if (bkey_deleted(&insert->k))
-+		return 0;
-+
-+	iter = bch2_trans_copy_iter(trans, orig_iter);
-+	if (IS_ERR(iter))
-+		return PTR_ERR(iter);
-+
-+	iter->flags |= BTREE_ITER_INTENT;
-+	__bch2_btree_iter_set_pos(iter, insert->k.p, false);
-+	bch2_trans_update2(trans, iter, insert);
-+	bch2_trans_iter_put(trans, iter);
-+	return 0;
-+}
-+
-+static int extent_handle_overwrites(struct btree_trans *trans,
-+				    enum btree_id btree_id,
-+				    struct bpos start, struct bpos end)
-+{
-+	struct btree_iter *iter = NULL, *update_iter;
-+	struct bkey_i *update;
-+	struct bkey_s_c k;
-+	int ret = 0;
-+
-+	iter = bch2_trans_get_iter(trans, btree_id, start, BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(iter);
-+	if (ret)
-+		return ret;
-+
-+	k = bch2_btree_iter_peek_with_updates(iter);
-+
-+	while (k.k && !(ret = bkey_err(k))) {
-+		if (bkey_cmp(end, bkey_start_pos(k.k)) <= 0)
-+			break;
-+
-+		if (bkey_cmp(bkey_start_pos(k.k), start) < 0) {
-+			update_iter = bch2_trans_copy_iter(trans, iter);
-+			if ((ret = PTR_ERR_OR_ZERO(update_iter)))
-+				goto err;
-+
-+			update = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
-+			if ((ret = PTR_ERR_OR_ZERO(update)))
-+				goto err;
-+
-+			bkey_reassemble(update, k);
-+			bch2_cut_back(start, update);
-+
-+			__bch2_btree_iter_set_pos(update_iter, update->k.p, false);
-+			bch2_trans_update2(trans, update_iter, update);
-+			bch2_trans_iter_put(trans, update_iter);
-+		}
-+
-+		if (bkey_cmp(k.k->p, end) > 0) {
-+			update_iter = bch2_trans_copy_iter(trans, iter);
-+			if ((ret = PTR_ERR_OR_ZERO(update_iter)))
-+				goto err;
-+
-+			update = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
-+			if ((ret = PTR_ERR_OR_ZERO(update)))
-+				goto err;
-+
-+			bkey_reassemble(update, k);
-+			bch2_cut_front(end, update);
-+
-+			__bch2_btree_iter_set_pos(update_iter, update->k.p, false);
-+			bch2_trans_update2(trans, update_iter, update);
-+			bch2_trans_iter_put(trans, update_iter);
-+		} else {
-+			update_iter = bch2_trans_copy_iter(trans, iter);
-+			if ((ret = PTR_ERR_OR_ZERO(update_iter)))
-+				goto err;
-+
-+			update = bch2_trans_kmalloc(trans, sizeof(struct bkey));
-+			if ((ret = PTR_ERR_OR_ZERO(update)))
-+				goto err;
-+
-+			update->k = *k.k;
-+			set_bkey_val_u64s(&update->k, 0);
-+			update->k.type = KEY_TYPE_deleted;
-+			update->k.size = 0;
-+
-+			__bch2_btree_iter_set_pos(update_iter, update->k.p, false);
-+			bch2_trans_update2(trans, update_iter, update);
-+			bch2_trans_iter_put(trans, update_iter);
-+		}
-+
-+		k = bch2_btree_iter_next_with_updates(iter);
-+	}
-+err:
-+	if (!IS_ERR_OR_NULL(iter))
-+		bch2_trans_iter_put(trans, iter);
-+	return ret;
-+}
-+
-+int __bch2_trans_commit(struct btree_trans *trans)
-+{
-+	struct btree_insert_entry *i = NULL;
-+	struct btree_iter *iter;
-+	bool trans_trigger_run;
-+	unsigned u64s;
-+	int ret = 0;
-+
-+	BUG_ON(trans->need_reset);
-+
-+	if (!trans->nr_updates)
-+		goto out_noupdates;
-+
-+	if (trans->flags & BTREE_INSERT_GC_LOCK_HELD)
-+		lockdep_assert_held(&trans->c->gc_lock);
-+
-+	memset(&trans->journal_preres, 0, sizeof(trans->journal_preres));
-+
-+	trans->journal_u64s		= trans->extra_journal_entry_u64s;
-+	trans->journal_preres_u64s	= 0;
-+
-+	if (!(trans->flags & BTREE_INSERT_NOCHECK_RW) &&
-+	    unlikely(!percpu_ref_tryget(&trans->c->writes))) {
-+		ret = bch2_trans_commit_get_rw_cold(trans);
-+		if (ret)
-+			return ret;
-+	}
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	trans_for_each_update(trans, i)
-+		if (btree_iter_type(i->iter) != BTREE_ITER_CACHED &&
-+		    !(i->trigger_flags & BTREE_TRIGGER_NORUN))
-+			bch2_btree_key_cache_verify_clean(trans,
-+					i->iter->btree_id, i->iter->pos);
-+#endif
-+
-+	/*
-+	 * Running triggers will append more updates to the list of updates as
-+	 * we're walking it:
-+	 */
-+	do {
-+		trans_trigger_run = false;
-+
-+		trans_for_each_update(trans, i) {
-+			if (unlikely(i->iter->uptodate > BTREE_ITER_NEED_PEEK &&
-+				     (ret = bch2_btree_iter_traverse(i->iter)))) {
-+				trace_trans_restart_traverse(trans->ip);
-+				goto out;
-+			}
-+
-+			/*
-+			 * We're not using bch2_btree_iter_upgrade here because
-+			 * we know trans->nounlock can't be set:
-+			 */
-+			if (unlikely(i->iter->locks_want < 1 &&
-+				     !__bch2_btree_iter_upgrade(i->iter, 1))) {
-+				trace_trans_restart_upgrade(trans->ip);
-+				ret = -EINTR;
-+				goto out;
-+			}
-+
-+			if (iter_has_trans_triggers(i->iter) &&
-+			    !i->trans_triggers_run) {
-+				i->trans_triggers_run = true;
-+				trans_trigger_run = true;
-+
-+				ret = bch2_trans_mark_update(trans, i->iter, i->k,
-+							     i->trigger_flags);
-+				if (unlikely(ret)) {
-+					if (ret == -EINTR)
-+						trace_trans_restart_mark(trans->ip);
-+					goto out;
-+				}
-+			}
-+		}
-+	} while (trans_trigger_run);
-+
-+	/* Turn extents updates into keys: */
-+	trans_for_each_update(trans, i)
-+		if (i->iter->flags & BTREE_ITER_IS_EXTENTS) {
-+			struct bpos start = bkey_start_pos(&i->k->k);
-+
-+			while (i + 1 < trans->updates + trans->nr_updates &&
-+			       i[0].iter->btree_id == i[1].iter->btree_id &&
-+			       !bkey_cmp(i[0].k->k.p, bkey_start_pos(&i[1].k->k)))
-+				i++;
-+
-+			ret = extent_handle_overwrites(trans, i->iter->btree_id,
-+						       start, i->k->k.p);
-+			if (ret)
-+				goto out;
-+		}
-+
-+	trans_for_each_update(trans, i) {
-+		if (i->iter->flags & BTREE_ITER_IS_EXTENTS) {
-+			ret = extent_update_to_keys(trans, i->iter, i->k);
-+			if (ret)
-+				goto out;
-+		} else {
-+			bch2_trans_update2(trans, i->iter, i->k);
-+		}
-+	}
-+
-+	trans_for_each_update2(trans, i) {
-+		BUG_ON(i->iter->uptodate > BTREE_ITER_NEED_PEEK);
-+		BUG_ON(i->iter->locks_want < 1);
-+
-+		u64s = jset_u64s(i->k->k.u64s);
-+		if (btree_iter_type(i->iter) == BTREE_ITER_CACHED &&
-+		    likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)))
-+			trans->journal_preres_u64s += u64s;
-+		trans->journal_u64s += u64s;
-+	}
-+retry:
-+	memset(&trans->journal_res, 0, sizeof(trans->journal_res));
-+
-+	ret = do_bch2_trans_commit(trans, &i);
-+
-+	/* make sure we didn't drop or screw up locks: */
-+	bch2_btree_trans_verify_locks(trans);
-+
-+	if (ret)
-+		goto err;
-+
-+	trans_for_each_iter(trans, iter)
-+		if ((trans->iters_live & (1ULL << iter->idx)) &&
-+		    (iter->flags & BTREE_ITER_SET_POS_AFTER_COMMIT)) {
-+			if (trans->flags & BTREE_INSERT_NOUNLOCK)
-+				bch2_btree_iter_set_pos_same_leaf(iter, iter->pos_after_commit);
-+			else
-+				bch2_btree_iter_set_pos(iter, iter->pos_after_commit);
-+		}
-+out:
-+	bch2_journal_preres_put(&trans->c->journal, &trans->journal_preres);
-+
-+	if (likely(!(trans->flags & BTREE_INSERT_NOCHECK_RW)))
-+		percpu_ref_put(&trans->c->writes);
-+out_noupdates:
-+	bch2_trans_reset(trans, !ret ? TRANS_RESET_NOTRAVERSE : 0);
-+
-+	return ret;
-+err:
-+	ret = bch2_trans_commit_error(trans, i, ret);
-+	if (ret)
-+		goto out;
-+
-+	goto retry;
-+}
-+
-+int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
-+		      struct bkey_i *k, enum btree_trigger_flags flags)
-+{
-+	struct btree_insert_entry *i, n = (struct btree_insert_entry) {
-+		.trigger_flags = flags, .iter = iter, .k = k
-+	};
-+
-+	EBUG_ON(bkey_cmp(iter->pos,
-+			 (iter->flags & BTREE_ITER_IS_EXTENTS)
-+			 ? bkey_start_pos(&k->k)
-+			 : k->k.p));
-+
-+	iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
-+
-+	if (btree_node_type_is_extents(iter->btree_id)) {
-+		iter->pos_after_commit = k->k.p;
-+		iter->flags |= BTREE_ITER_SET_POS_AFTER_COMMIT;
-+	}
-+
-+	/*
-+	 * Pending updates are kept sorted: first, find position of new update:
-+	 */
-+	trans_for_each_update(trans, i)
-+		if (btree_iter_cmp(iter, i->iter) <= 0)
-+			break;
-+
-+	/*
-+	 * Now delete/trim any updates the new update overwrites:
-+	 */
-+	if (i > trans->updates &&
-+	    i[-1].iter->btree_id == iter->btree_id &&
-+	    bkey_cmp(iter->pos, i[-1].k->k.p) < 0)
-+		bch2_cut_back(n.iter->pos, i[-1].k);
-+
-+	while (i < trans->updates + trans->nr_updates &&
-+	       iter->btree_id == i->iter->btree_id &&
-+	       bkey_cmp(n.k->k.p, i->k->k.p) >= 0)
-+		array_remove_item(trans->updates, trans->nr_updates,
-+				  i - trans->updates);
-+
-+	if (i < trans->updates + trans->nr_updates &&
-+	    iter->btree_id == i->iter->btree_id &&
-+	    bkey_cmp(n.k->k.p, i->iter->pos) > 0) {
-+		/*
-+		 * When we have an extent that overwrites the start of another
-+		 * update, trimming that extent will mean the iterator's
-+		 * position has to change since the iterator position has to
-+		 * match the extent's start pos - but we don't want to change
-+		 * the iterator pos if some other code is using it, so we may
-+		 * need to clone it:
-+		 */
-+		if (trans->iters_live & (1ULL << i->iter->idx)) {
-+			i->iter = bch2_trans_copy_iter(trans, i->iter);
-+			if (IS_ERR(i->iter)) {
-+				trans->need_reset = true;
-+				return PTR_ERR(i->iter);
-+			}
-+
-+			i->iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
-+			bch2_trans_iter_put(trans, i->iter);
-+		}
-+
-+		bch2_cut_front(n.k->k.p, i->k);
-+		bch2_btree_iter_set_pos(i->iter, n.k->k.p);
-+	}
-+
-+	EBUG_ON(trans->nr_updates >= trans->nr_iters);
-+
-+	array_insert_item(trans->updates, trans->nr_updates,
-+			  i - trans->updates, n);
-+	return 0;
-+}
-+
-+int __bch2_btree_insert(struct btree_trans *trans,
-+			enum btree_id id, struct bkey_i *k)
-+{
-+	struct btree_iter *iter;
-+	int ret;
-+
-+	iter = bch2_trans_get_iter(trans, id, bkey_start_pos(&k->k),
-+				   BTREE_ITER_INTENT);
-+	if (IS_ERR(iter))
-+		return PTR_ERR(iter);
-+
-+	ret   = bch2_btree_iter_traverse(iter) ?:
-+		bch2_trans_update(trans, iter, k, 0);
-+	bch2_trans_iter_put(trans, iter);
-+	return ret;
-+}
-+
-+/**
-+ * bch2_btree_insert - insert keys into the extent btree
-+ * @c:			pointer to struct bch_fs
-+ * @id:			btree to insert into
-+ * @insert_keys:	list of keys to insert
-+ * @hook:		insert callback
-+ */
-+int bch2_btree_insert(struct bch_fs *c, enum btree_id id,
-+		      struct bkey_i *k,
-+		      struct disk_reservation *disk_res,
-+		      u64 *journal_seq, int flags)
-+{
-+	return bch2_trans_do(c, disk_res, journal_seq, flags,
-+			     __bch2_btree_insert(&trans, id, k));
-+}
-+
-+int bch2_btree_delete_at_range(struct btree_trans *trans,
-+			       struct btree_iter *iter,
-+			       struct bpos end,
-+			       u64 *journal_seq)
-+{
-+	struct bkey_s_c k;
-+	int ret = 0;
-+retry:
-+	while ((k = bch2_btree_iter_peek(iter)).k &&
-+	       !(ret = bkey_err(k)) &&
-+	       bkey_cmp(iter->pos, end) < 0) {
-+		struct bkey_i delete;
-+
-+		bch2_trans_begin(trans);
-+
-+		bkey_init(&delete.k);
-+
-+		/*
-+		 * For extents, iter.pos won't necessarily be the same as
-+		 * bkey_start_pos(k.k) (for non extents they always will be the
-+		 * same). It's important that we delete starting from iter.pos
-+		 * because the range we want to delete could start in the middle
-+		 * of k.
-+		 *
-+		 * (bch2_btree_iter_peek() does guarantee that iter.pos >=
-+		 * bkey_start_pos(k.k)).
-+		 */
-+		delete.k.p = iter->pos;
-+
-+		if (btree_node_type_is_extents(iter->btree_id)) {
-+			unsigned max_sectors =
-+				KEY_SIZE_MAX & (~0 << trans->c->block_bits);
-+
-+			/* create the biggest key we can */
-+			bch2_key_resize(&delete.k, max_sectors);
-+			bch2_cut_back(end, &delete);
-+
-+			ret = bch2_extent_trim_atomic(&delete, iter);
-+			if (ret)
-+				break;
-+		}
-+
-+		bch2_trans_update(trans, iter, &delete, 0);
-+		ret = bch2_trans_commit(trans, NULL, journal_seq,
-+					BTREE_INSERT_NOFAIL);
-+		if (ret)
-+			break;
-+
-+		bch2_trans_cond_resched(trans);
-+	}
-+
-+	if (ret == -EINTR) {
-+		ret = 0;
-+		goto retry;
-+	}
-+
-+	return ret;
-+
-+}
-+
-+int bch2_btree_delete_at(struct btree_trans *trans,
-+			 struct btree_iter *iter, unsigned flags)
-+{
-+	struct bkey_i k;
-+
-+	bkey_init(&k.k);
-+	k.k.p = iter->pos;
-+
-+	bch2_trans_update(trans, iter, &k, 0);
-+	return bch2_trans_commit(trans, NULL, NULL,
-+				 BTREE_INSERT_NOFAIL|
-+				 BTREE_INSERT_USE_RESERVE|flags);
-+}
-+
-+/*
-+ * bch_btree_delete_range - delete everything within a given range
-+ *
-+ * Range is a half open interval - [start, end)
-+ */
-+int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
-+			    struct bpos start, struct bpos end,
-+			    u64 *journal_seq)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	int ret = 0;
-+
-+	/*
-+	 * XXX: whether we need mem/more iters depends on whether this btree id
-+	 * has triggers
-+	 */
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 512);
-+
-+	iter = bch2_trans_get_iter(&trans, id, start, BTREE_ITER_INTENT);
-+
-+	ret = bch2_btree_delete_at_range(&trans, iter, end, journal_seq);
-+	ret = bch2_trans_exit(&trans) ?: ret;
-+
-+	BUG_ON(ret == -EINTR);
-+	return ret;
-+}
-diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
-new file mode 100644
-index 000000000000..97a8af31ded1
---- /dev/null
-+++ b/fs/bcachefs/buckets.c
-@@ -0,0 +1,2145 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Code for manipulating bucket marks for garbage collection.
-+ *
-+ * Copyright 2014 Datera, Inc.
-+ *
-+ * Bucket states:
-+ * - free bucket: mark == 0
-+ *   The bucket contains no data and will not be read
-+ *
-+ * - allocator bucket: owned_by_allocator == 1
-+ *   The bucket is on a free list, or it is an open bucket
-+ *
-+ * - cached bucket: owned_by_allocator == 0 &&
-+ *                  dirty_sectors == 0 &&
-+ *                  cached_sectors > 0
-+ *   The bucket contains data but may be safely discarded as there are
-+ *   enough replicas of the data on other cache devices, or it has been
-+ *   written back to the backing device
-+ *
-+ * - dirty bucket: owned_by_allocator == 0 &&
-+ *                 dirty_sectors > 0
-+ *   The bucket contains data that we must not discard (either only copy,
-+ *   or one of the 'main copies' for data requiring multiple replicas)
-+ *
-+ * - metadata bucket: owned_by_allocator == 0 && is_metadata == 1
-+ *   This is a btree node, journal or gen/prio bucket
-+ *
-+ * Lifecycle:
-+ *
-+ * bucket invalidated => bucket on freelist => open bucket =>
-+ *     [dirty bucket =>] cached bucket => bucket invalidated => ...
-+ *
-+ * Note that cache promotion can skip the dirty bucket step, as data
-+ * is copied from a deeper tier to a shallower tier, onto a cached
-+ * bucket.
-+ * Note also that a cached bucket can spontaneously become dirty --
-+ * see below.
-+ *
-+ * Only a traversal of the key space can determine whether a bucket is
-+ * truly dirty or cached.
-+ *
-+ * Transitions:
-+ *
-+ * - free => allocator: bucket was invalidated
-+ * - cached => allocator: bucket was invalidated
-+ *
-+ * - allocator => dirty: open bucket was filled up
-+ * - allocator => cached: open bucket was filled up
-+ * - allocator => metadata: metadata was allocated
-+ *
-+ * - dirty => cached: dirty sectors were copied to a deeper tier
-+ * - dirty => free: dirty sectors were overwritten or moved (copy gc)
-+ * - cached => free: cached sectors were overwritten
-+ *
-+ * - metadata => free: metadata was freed
-+ *
-+ * Oddities:
-+ * - cached => dirty: a device was removed so formerly replicated data
-+ *                    is no longer sufficiently replicated
-+ * - free => cached: cannot happen
-+ * - free => dirty: cannot happen
-+ * - free => metadata: cannot happen
-+ */
-+
-+#include "bcachefs.h"
-+#include "alloc_background.h"
-+#include "bset.h"
-+#include "btree_gc.h"
-+#include "btree_update.h"
-+#include "buckets.h"
-+#include "ec.h"
-+#include "error.h"
-+#include "movinggc.h"
-+#include "replicas.h"
-+
-+#include <linux/preempt.h>
-+#include <trace/events/bcachefs.h>
-+
-+/*
-+ * Clear journal_seq_valid for buckets for which it's not needed, to prevent
-+ * wraparound:
-+ */
-+void bch2_bucket_seq_cleanup(struct bch_fs *c)
-+{
-+	u64 journal_seq = atomic64_read(&c->journal.seq);
-+	u16 last_seq_ondisk = c->journal.last_seq_ondisk;
-+	struct bch_dev *ca;
-+	struct bucket_array *buckets;
-+	struct bucket *g;
-+	struct bucket_mark m;
-+	unsigned i;
-+
-+	if (journal_seq - c->last_bucket_seq_cleanup <
-+	    (1U << (BUCKET_JOURNAL_SEQ_BITS - 2)))
-+		return;
-+
-+	c->last_bucket_seq_cleanup = journal_seq;
-+
-+	for_each_member_device(ca, c, i) {
-+		down_read(&ca->bucket_lock);
-+		buckets = bucket_array(ca);
-+
-+		for_each_bucket(g, buckets) {
-+			bucket_cmpxchg(g, m, ({
-+				if (!m.journal_seq_valid ||
-+				    bucket_needs_journal_commit(m, last_seq_ondisk))
-+					break;
-+
-+				m.journal_seq_valid = 0;
-+			}));
-+		}
-+		up_read(&ca->bucket_lock);
-+	}
-+}
-+
-+void bch2_fs_usage_initialize(struct bch_fs *c)
-+{
-+	struct bch_fs_usage *usage;
-+	unsigned i;
-+
-+	percpu_down_write(&c->mark_lock);
-+	usage = c->usage_base;
-+
-+	bch2_fs_usage_acc_to_base(c, 0);
-+	bch2_fs_usage_acc_to_base(c, 1);
-+
-+	for (i = 0; i < BCH_REPLICAS_MAX; i++)
-+		usage->reserved += usage->persistent_reserved[i];
-+
-+	for (i = 0; i < c->replicas.nr; i++) {
-+		struct bch_replicas_entry *e =
-+			cpu_replicas_entry(&c->replicas, i);
-+
-+		switch (e->data_type) {
-+		case BCH_DATA_btree:
-+			usage->btree	+= usage->replicas[i];
-+			break;
-+		case BCH_DATA_user:
-+			usage->data	+= usage->replicas[i];
-+			break;
-+		case BCH_DATA_cached:
-+			usage->cached	+= usage->replicas[i];
-+			break;
-+		}
-+	}
-+
-+	percpu_up_write(&c->mark_lock);
-+}
-+
-+void bch2_fs_usage_scratch_put(struct bch_fs *c, struct bch_fs_usage *fs_usage)
-+{
-+	if (fs_usage == c->usage_scratch)
-+		mutex_unlock(&c->usage_scratch_lock);
-+	else
-+		kfree(fs_usage);
-+}
-+
-+struct bch_fs_usage *bch2_fs_usage_scratch_get(struct bch_fs *c)
-+{
-+	struct bch_fs_usage *ret;
-+	unsigned bytes = fs_usage_u64s(c) * sizeof(u64);
-+
-+	ret = kzalloc(bytes, GFP_NOWAIT|__GFP_NOWARN);
-+	if (ret)
-+		return ret;
-+
-+	if (mutex_trylock(&c->usage_scratch_lock))
-+		goto out_pool;
-+
-+	ret = kzalloc(bytes, GFP_NOFS);
-+	if (ret)
-+		return ret;
-+
-+	mutex_lock(&c->usage_scratch_lock);
-+out_pool:
-+	ret = c->usage_scratch;
-+	memset(ret, 0, bytes);
-+	return ret;
-+}
-+
-+struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *ca)
-+{
-+	struct bch_dev_usage ret;
-+
-+	memset(&ret, 0, sizeof(ret));
-+	acc_u64s_percpu((u64 *) &ret,
-+			(u64 __percpu *) ca->usage[0],
-+			sizeof(ret) / sizeof(u64));
-+
-+	return ret;
-+}
-+
-+static inline struct bch_fs_usage *fs_usage_ptr(struct bch_fs *c,
-+						unsigned journal_seq,
-+						bool gc)
-+{
-+	return this_cpu_ptr(gc
-+			    ? c->usage_gc
-+			    : c->usage[journal_seq & 1]);
-+}
-+
-+u64 bch2_fs_usage_read_one(struct bch_fs *c, u64 *v)
-+{
-+	ssize_t offset = v - (u64 *) c->usage_base;
-+	unsigned seq;
-+	u64 ret;
-+
-+	BUG_ON(offset < 0 || offset >= fs_usage_u64s(c));
-+	percpu_rwsem_assert_held(&c->mark_lock);
-+
-+	do {
-+		seq = read_seqcount_begin(&c->usage_lock);
-+		ret = *v +
-+			percpu_u64_get((u64 __percpu *) c->usage[0] + offset) +
-+			percpu_u64_get((u64 __percpu *) c->usage[1] + offset);
-+	} while (read_seqcount_retry(&c->usage_lock, seq));
-+
-+	return ret;
-+}
-+
-+struct bch_fs_usage *bch2_fs_usage_read(struct bch_fs *c)
-+{
-+	struct bch_fs_usage *ret;
-+	unsigned seq, v, u64s = fs_usage_u64s(c);
-+retry:
-+	ret = kmalloc(u64s * sizeof(u64), GFP_NOFS);
-+	if (unlikely(!ret))
-+		return NULL;
-+
-+	percpu_down_read(&c->mark_lock);
-+
-+	v = fs_usage_u64s(c);
-+	if (unlikely(u64s != v)) {
-+		u64s = v;
-+		percpu_up_read(&c->mark_lock);
-+		kfree(ret);
-+		goto retry;
-+	}
-+
-+	do {
-+		seq = read_seqcount_begin(&c->usage_lock);
-+		memcpy(ret, c->usage_base, u64s * sizeof(u64));
-+		acc_u64s_percpu((u64 *) ret, (u64 __percpu *) c->usage[0], u64s);
-+		acc_u64s_percpu((u64 *) ret, (u64 __percpu *) c->usage[1], u64s);
-+	} while (read_seqcount_retry(&c->usage_lock, seq));
-+
-+	return ret;
-+}
-+
-+void bch2_fs_usage_acc_to_base(struct bch_fs *c, unsigned idx)
-+{
-+	unsigned u64s = fs_usage_u64s(c);
-+
-+	BUG_ON(idx >= 2);
-+
-+	write_seqcount_begin(&c->usage_lock);
-+
-+	acc_u64s_percpu((u64 *) c->usage_base,
-+			(u64 __percpu *) c->usage[idx], u64s);
-+	percpu_memset(c->usage[idx], 0, u64s * sizeof(u64));
-+
-+	write_seqcount_end(&c->usage_lock);
-+}
-+
-+void bch2_fs_usage_to_text(struct printbuf *out,
-+			   struct bch_fs *c,
-+			   struct bch_fs_usage *fs_usage)
-+{
-+	unsigned i;
-+
-+	pr_buf(out, "capacity:\t\t\t%llu\n", c->capacity);
-+
-+	pr_buf(out, "hidden:\t\t\t\t%llu\n",
-+	       fs_usage->hidden);
-+	pr_buf(out, "data:\t\t\t\t%llu\n",
-+	       fs_usage->data);
-+	pr_buf(out, "cached:\t\t\t\t%llu\n",
-+	       fs_usage->cached);
-+	pr_buf(out, "reserved:\t\t\t%llu\n",
-+	       fs_usage->reserved);
-+	pr_buf(out, "nr_inodes:\t\t\t%llu\n",
-+	       fs_usage->nr_inodes);
-+	pr_buf(out, "online reserved:\t\t%llu\n",
-+	       fs_usage->online_reserved);
-+
-+	for (i = 0;
-+	     i < ARRAY_SIZE(fs_usage->persistent_reserved);
-+	     i++) {
-+		pr_buf(out, "%u replicas:\n", i + 1);
-+		pr_buf(out, "\treserved:\t\t%llu\n",
-+		       fs_usage->persistent_reserved[i]);
-+	}
-+
-+	for (i = 0; i < c->replicas.nr; i++) {
-+		struct bch_replicas_entry *e =
-+			cpu_replicas_entry(&c->replicas, i);
-+
-+		pr_buf(out, "\t");
-+		bch2_replicas_entry_to_text(out, e);
-+		pr_buf(out, ":\t%llu\n", fs_usage->replicas[i]);
-+	}
-+}
-+
-+#define RESERVE_FACTOR	6
-+
-+static u64 reserve_factor(u64 r)
-+{
-+	return r + (round_up(r, (1 << RESERVE_FACTOR)) >> RESERVE_FACTOR);
-+}
-+
-+static u64 avail_factor(u64 r)
-+{
-+	return (r << RESERVE_FACTOR) / ((1 << RESERVE_FACTOR) + 1);
-+}
-+
-+u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage *fs_usage)
-+{
-+	return min(fs_usage->hidden +
-+		   fs_usage->btree +
-+		   fs_usage->data +
-+		   reserve_factor(fs_usage->reserved +
-+				  fs_usage->online_reserved),
-+		   c->capacity);
-+}
-+
-+static struct bch_fs_usage_short
-+__bch2_fs_usage_read_short(struct bch_fs *c)
-+{
-+	struct bch_fs_usage_short ret;
-+	u64 data, reserved;
-+
-+	ret.capacity = c->capacity -
-+		bch2_fs_usage_read_one(c, &c->usage_base->hidden);
-+
-+	data		= bch2_fs_usage_read_one(c, &c->usage_base->data) +
-+		bch2_fs_usage_read_one(c, &c->usage_base->btree);
-+	reserved	= bch2_fs_usage_read_one(c, &c->usage_base->reserved) +
-+		bch2_fs_usage_read_one(c, &c->usage_base->online_reserved);
-+
-+	ret.used	= min(ret.capacity, data + reserve_factor(reserved));
-+	ret.free	= ret.capacity - ret.used;
-+
-+	ret.nr_inodes	= bch2_fs_usage_read_one(c, &c->usage_base->nr_inodes);
-+
-+	return ret;
-+}
-+
-+struct bch_fs_usage_short
-+bch2_fs_usage_read_short(struct bch_fs *c)
-+{
-+	struct bch_fs_usage_short ret;
-+
-+	percpu_down_read(&c->mark_lock);
-+	ret = __bch2_fs_usage_read_short(c);
-+	percpu_up_read(&c->mark_lock);
-+
-+	return ret;
-+}
-+
-+static inline int is_unavailable_bucket(struct bucket_mark m)
-+{
-+	return !is_available_bucket(m);
-+}
-+
-+static inline int is_fragmented_bucket(struct bucket_mark m,
-+				       struct bch_dev *ca)
-+{
-+	if (!m.owned_by_allocator &&
-+	    m.data_type == BCH_DATA_user &&
-+	    bucket_sectors_used(m))
-+		return max_t(int, 0, (int) ca->mi.bucket_size -
-+			     bucket_sectors_used(m));
-+	return 0;
-+}
-+
-+static inline int bucket_stripe_sectors(struct bucket_mark m)
-+{
-+	return m.stripe ? m.dirty_sectors : 0;
-+}
-+
-+static inline enum bch_data_type bucket_type(struct bucket_mark m)
-+{
-+	return m.cached_sectors && !m.dirty_sectors
-+		? BCH_DATA_cached
-+		: m.data_type;
-+}
-+
-+static bool bucket_became_unavailable(struct bucket_mark old,
-+				      struct bucket_mark new)
-+{
-+	return is_available_bucket(old) &&
-+	       !is_available_bucket(new);
-+}
-+
-+int bch2_fs_usage_apply(struct bch_fs *c,
-+			struct bch_fs_usage *fs_usage,
-+			struct disk_reservation *disk_res,
-+			unsigned journal_seq)
-+{
-+	s64 added = fs_usage->data + fs_usage->reserved;
-+	s64 should_not_have_added;
-+	int ret = 0;
-+
-+	percpu_rwsem_assert_held(&c->mark_lock);
-+
-+	/*
-+	 * Not allowed to reduce sectors_available except by getting a
-+	 * reservation:
-+	 */
-+	should_not_have_added = added - (s64) (disk_res ? disk_res->sectors : 0);
-+	if (WARN_ONCE(should_not_have_added > 0,
-+		      "disk usage increased by %lli without a reservation",
-+		      should_not_have_added)) {
-+		atomic64_sub(should_not_have_added, &c->sectors_available);
-+		added -= should_not_have_added;
-+		ret = -1;
-+	}
-+
-+	if (added > 0) {
-+		disk_res->sectors		-= added;
-+		fs_usage->online_reserved	-= added;
-+	}
-+
-+	preempt_disable();
-+	acc_u64s((u64 *) fs_usage_ptr(c, journal_seq, false),
-+		 (u64 *) fs_usage, fs_usage_u64s(c));
-+	preempt_enable();
-+
-+	return ret;
-+}
-+
-+static inline void account_bucket(struct bch_fs_usage *fs_usage,
-+				  struct bch_dev_usage *dev_usage,
-+				  enum bch_data_type type,
-+				  int nr, s64 size)
-+{
-+	if (type == BCH_DATA_sb || type == BCH_DATA_journal)
-+		fs_usage->hidden	+= size;
-+
-+	dev_usage->buckets[type]	+= nr;
-+}
-+
-+static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
-+				  struct bch_fs_usage *fs_usage,
-+				  struct bucket_mark old, struct bucket_mark new,
-+				  bool gc)
-+{
-+	struct bch_dev_usage *u;
-+
-+	percpu_rwsem_assert_held(&c->mark_lock);
-+
-+	preempt_disable();
-+	u = this_cpu_ptr(ca->usage[gc]);
-+
-+	if (bucket_type(old))
-+		account_bucket(fs_usage, u, bucket_type(old),
-+			       -1, -ca->mi.bucket_size);
-+
-+	if (bucket_type(new))
-+		account_bucket(fs_usage, u, bucket_type(new),
-+			       1, ca->mi.bucket_size);
-+
-+	u->buckets_alloc +=
-+		(int) new.owned_by_allocator - (int) old.owned_by_allocator;
-+	u->buckets_unavailable +=
-+		is_unavailable_bucket(new) - is_unavailable_bucket(old);
-+
-+	u->buckets_ec += (int) new.stripe - (int) old.stripe;
-+	u->sectors_ec += bucket_stripe_sectors(new) -
-+			 bucket_stripe_sectors(old);
-+
-+	u->sectors[old.data_type] -= old.dirty_sectors;
-+	u->sectors[new.data_type] += new.dirty_sectors;
-+	u->sectors[BCH_DATA_cached] +=
-+		(int) new.cached_sectors - (int) old.cached_sectors;
-+	u->sectors_fragmented +=
-+		is_fragmented_bucket(new, ca) - is_fragmented_bucket(old, ca);
-+	preempt_enable();
-+
-+	if (!is_available_bucket(old) && is_available_bucket(new))
-+		bch2_wake_allocator(ca);
-+}
-+
-+void bch2_dev_usage_from_buckets(struct bch_fs *c)
-+{
-+	struct bch_dev *ca;
-+	struct bucket_mark old = { .v.counter = 0 };
-+	struct bucket_array *buckets;
-+	struct bucket *g;
-+	unsigned i;
-+	int cpu;
-+
-+	c->usage_base->hidden = 0;
-+
-+	for_each_member_device(ca, c, i) {
-+		for_each_possible_cpu(cpu)
-+			memset(per_cpu_ptr(ca->usage[0], cpu), 0,
-+			       sizeof(*ca->usage[0]));
-+
-+		buckets = bucket_array(ca);
-+
-+		for_each_bucket(g, buckets)
-+			bch2_dev_usage_update(c, ca, c->usage_base,
-+					      old, g->mark, false);
-+	}
-+}
-+
-+static inline int update_replicas(struct bch_fs *c,
-+				  struct bch_fs_usage *fs_usage,
-+				  struct bch_replicas_entry *r,
-+				  s64 sectors)
-+{
-+	int idx = bch2_replicas_entry_idx(c, r);
-+
-+	if (idx < 0)
-+		return -1;
-+
-+	if (!fs_usage)
-+		return 0;
-+
-+	switch (r->data_type) {
-+	case BCH_DATA_btree:
-+		fs_usage->btree		+= sectors;
-+		break;
-+	case BCH_DATA_user:
-+		fs_usage->data		+= sectors;
-+		break;
-+	case BCH_DATA_cached:
-+		fs_usage->cached	+= sectors;
-+		break;
-+	}
-+	fs_usage->replicas[idx]		+= sectors;
-+	return 0;
-+}
-+
-+static inline void update_cached_sectors(struct bch_fs *c,
-+					 struct bch_fs_usage *fs_usage,
-+					 unsigned dev, s64 sectors)
-+{
-+	struct bch_replicas_padded r;
-+
-+	bch2_replicas_entry_cached(&r.e, dev);
-+
-+	update_replicas(c, fs_usage, &r.e, sectors);
-+}
-+
-+static struct replicas_delta_list *
-+replicas_deltas_realloc(struct btree_trans *trans, unsigned more)
-+{
-+	struct replicas_delta_list *d = trans->fs_usage_deltas;
-+	unsigned new_size = d ? (d->size + more) * 2 : 128;
-+
-+	if (!d || d->used + more > d->size) {
-+		d = krealloc(d, sizeof(*d) + new_size, GFP_NOIO|__GFP_ZERO);
-+		BUG_ON(!d);
-+
-+		d->size = new_size;
-+		trans->fs_usage_deltas = d;
-+	}
-+	return d;
-+}
-+
-+static inline void update_replicas_list(struct btree_trans *trans,
-+					struct bch_replicas_entry *r,
-+					s64 sectors)
-+{
-+	struct replicas_delta_list *d;
-+	struct replicas_delta *n;
-+	unsigned b;
-+
-+	if (!sectors)
-+		return;
-+
-+	b = replicas_entry_bytes(r) + 8;
-+	d = replicas_deltas_realloc(trans, b);
-+
-+	n = (void *) d->d + d->used;
-+	n->delta = sectors;
-+	memcpy(&n->r, r, replicas_entry_bytes(r));
-+	d->used += b;
-+}
-+
-+static inline void update_cached_sectors_list(struct btree_trans *trans,
-+					      unsigned dev, s64 sectors)
-+{
-+	struct bch_replicas_padded r;
-+
-+	bch2_replicas_entry_cached(&r.e, dev);
-+
-+	update_replicas_list(trans, &r.e, sectors);
-+}
-+
-+static inline struct replicas_delta *
-+replicas_delta_next(struct replicas_delta *d)
-+{
-+	return (void *) d + replicas_entry_bytes(&d->r) + 8;
-+}
-+
-+int bch2_replicas_delta_list_apply(struct bch_fs *c,
-+				   struct bch_fs_usage *fs_usage,
-+				   struct replicas_delta_list *r)
-+{
-+	struct replicas_delta *d = r->d;
-+	struct replicas_delta *top = (void *) r->d + r->used;
-+	unsigned i;
-+
-+	for (d = r->d; d != top; d = replicas_delta_next(d))
-+		if (update_replicas(c, fs_usage, &d->r, d->delta)) {
-+			top = d;
-+			goto unwind;
-+		}
-+
-+	if (!fs_usage)
-+		return 0;
-+
-+	fs_usage->nr_inodes += r->nr_inodes;
-+
-+	for (i = 0; i < BCH_REPLICAS_MAX; i++) {
-+		fs_usage->reserved += r->persistent_reserved[i];
-+		fs_usage->persistent_reserved[i] += r->persistent_reserved[i];
-+	}
-+
-+	return 0;
-+unwind:
-+	for (d = r->d; d != top; d = replicas_delta_next(d))
-+		update_replicas(c, fs_usage, &d->r, -d->delta);
-+	return -1;
-+}
-+
-+#define do_mark_fn(fn, c, pos, flags, ...)				\
-+({									\
-+	int gc, ret = 0;						\
-+									\
-+	percpu_rwsem_assert_held(&c->mark_lock);			\
-+									\
-+	for (gc = 0; gc < 2 && !ret; gc++)				\
-+		if (!gc == !(flags & BTREE_TRIGGER_GC) ||		\
-+		    (gc && gc_visited(c, pos)))				\
-+			ret = fn(c, __VA_ARGS__, gc);			\
-+	ret;								\
-+})
-+
-+static int __bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
-+				    size_t b, struct bucket_mark *ret,
-+				    bool gc)
-+{
-+	struct bch_fs_usage *fs_usage = fs_usage_ptr(c, 0, gc);
-+	struct bucket *g = __bucket(ca, b, gc);
-+	struct bucket_mark old, new;
-+
-+	old = bucket_cmpxchg(g, new, ({
-+		BUG_ON(!is_available_bucket(new));
-+
-+		new.owned_by_allocator	= true;
-+		new.data_type		= 0;
-+		new.cached_sectors	= 0;
-+		new.dirty_sectors	= 0;
-+		new.gen++;
-+	}));
-+
-+	bch2_dev_usage_update(c, ca, fs_usage, old, new, gc);
-+
-+	if (old.cached_sectors)
-+		update_cached_sectors(c, fs_usage, ca->dev_idx,
-+				      -((s64) old.cached_sectors));
-+
-+	if (!gc)
-+		*ret = old;
-+	return 0;
-+}
-+
-+void bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
-+			    size_t b, struct bucket_mark *old)
-+{
-+	do_mark_fn(__bch2_invalidate_bucket, c, gc_phase(GC_PHASE_START), 0,
-+		   ca, b, old);
-+
-+	if (!old->owned_by_allocator && old->cached_sectors)
-+		trace_invalidate(ca, bucket_to_sector(ca, b),
-+				 old->cached_sectors);
-+}
-+
-+static int __bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
-+				    size_t b, bool owned_by_allocator,
-+				    bool gc)
-+{
-+	struct bch_fs_usage *fs_usage = fs_usage_ptr(c, 0, gc);
-+	struct bucket *g = __bucket(ca, b, gc);
-+	struct bucket_mark old, new;
-+
-+	old = bucket_cmpxchg(g, new, ({
-+		new.owned_by_allocator	= owned_by_allocator;
-+	}));
-+
-+	bch2_dev_usage_update(c, ca, fs_usage, old, new, gc);
-+
-+	BUG_ON(!gc &&
-+	       !owned_by_allocator && !old.owned_by_allocator);
-+
-+	return 0;
-+}
-+
-+void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
-+			    size_t b, bool owned_by_allocator,
-+			    struct gc_pos pos, unsigned flags)
-+{
-+	preempt_disable();
-+
-+	do_mark_fn(__bch2_mark_alloc_bucket, c, pos, flags,
-+		   ca, b, owned_by_allocator);
-+
-+	preempt_enable();
-+}
-+
-+static int bch2_mark_alloc(struct bch_fs *c,
-+			   struct bkey_s_c old, struct bkey_s_c new,
-+			   struct bch_fs_usage *fs_usage,
-+			   u64 journal_seq, unsigned flags)
-+{
-+	bool gc = flags & BTREE_TRIGGER_GC;
-+	struct bkey_alloc_unpacked u;
-+	struct bch_dev *ca;
-+	struct bucket *g;
-+	struct bucket_mark old_m, m;
-+
-+	/* We don't do anything for deletions - do we?: */
-+	if (new.k->type != KEY_TYPE_alloc)
-+		return 0;
-+
-+	/*
-+	 * alloc btree is read in by bch2_alloc_read, not gc:
-+	 */
-+	if ((flags & BTREE_TRIGGER_GC) &&
-+	    !(flags & BTREE_TRIGGER_BUCKET_INVALIDATE))
-+		return 0;
-+
-+	ca = bch_dev_bkey_exists(c, new.k->p.inode);
-+
-+	if (new.k->p.offset >= ca->mi.nbuckets)
-+		return 0;
-+
-+	g = __bucket(ca, new.k->p.offset, gc);
-+	u = bch2_alloc_unpack(new);
-+
-+	old_m = bucket_cmpxchg(g, m, ({
-+		m.gen			= u.gen;
-+		m.data_type		= u.data_type;
-+		m.dirty_sectors		= u.dirty_sectors;
-+		m.cached_sectors	= u.cached_sectors;
-+
-+		if (journal_seq) {
-+			m.journal_seq_valid	= 1;
-+			m.journal_seq		= journal_seq;
-+		}
-+	}));
-+
-+	if (!(flags & BTREE_TRIGGER_ALLOC_READ))
-+		bch2_dev_usage_update(c, ca, fs_usage, old_m, m, gc);
-+
-+	g->io_time[READ]	= u.read_time;
-+	g->io_time[WRITE]	= u.write_time;
-+	g->oldest_gen		= u.oldest_gen;
-+	g->gen_valid		= 1;
-+
-+	/*
-+	 * need to know if we're getting called from the invalidate path or
-+	 * not:
-+	 */
-+
-+	if ((flags & BTREE_TRIGGER_BUCKET_INVALIDATE) &&
-+	    old_m.cached_sectors) {
-+		update_cached_sectors(c, fs_usage, ca->dev_idx,
-+				      -old_m.cached_sectors);
-+		trace_invalidate(ca, bucket_to_sector(ca, new.k->p.offset),
-+				 old_m.cached_sectors);
-+	}
-+
-+	return 0;
-+}
-+
-+#define checked_add(a, b)					\
-+({								\
-+	unsigned _res = (unsigned) (a) + (b);			\
-+	bool overflow = _res > U16_MAX;				\
-+	if (overflow)						\
-+		_res = U16_MAX;					\
-+	(a) = _res;						\
-+	overflow;						\
-+})
-+
-+static int __bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
-+				       size_t b, enum bch_data_type data_type,
-+				       unsigned sectors, bool gc)
-+{
-+	struct bucket *g = __bucket(ca, b, gc);
-+	struct bucket_mark old, new;
-+	bool overflow;
-+
-+	BUG_ON(data_type != BCH_DATA_sb &&
-+	       data_type != BCH_DATA_journal);
-+
-+	old = bucket_cmpxchg(g, new, ({
-+		new.data_type	= data_type;
-+		overflow = checked_add(new.dirty_sectors, sectors);
-+	}));
-+
-+	bch2_fs_inconsistent_on(old.data_type &&
-+				old.data_type != data_type, c,
-+		"different types of data in same bucket: %s, %s",
-+		bch2_data_types[old.data_type],
-+		bch2_data_types[data_type]);
-+
-+	bch2_fs_inconsistent_on(overflow, c,
-+		"bucket %u:%zu gen %u data type %s sector count overflow: %u + %u > U16_MAX",
-+		ca->dev_idx, b, new.gen,
-+		bch2_data_types[old.data_type ?: data_type],
-+		old.dirty_sectors, sectors);
-+
-+	if (c)
-+		bch2_dev_usage_update(c, ca, fs_usage_ptr(c, 0, gc),
-+				      old, new, gc);
-+
-+	return 0;
-+}
-+
-+void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
-+			       size_t b, enum bch_data_type type,
-+			       unsigned sectors, struct gc_pos pos,
-+			       unsigned flags)
-+{
-+	BUG_ON(type != BCH_DATA_sb &&
-+	       type != BCH_DATA_journal);
-+
-+	preempt_disable();
-+
-+	if (likely(c)) {
-+		do_mark_fn(__bch2_mark_metadata_bucket, c, pos, flags,
-+			   ca, b, type, sectors);
-+	} else {
-+		__bch2_mark_metadata_bucket(c, ca, b, type, sectors, 0);
-+	}
-+
-+	preempt_enable();
-+}
-+
-+static s64 disk_sectors_scaled(unsigned n, unsigned d, unsigned sectors)
-+{
-+	return DIV_ROUND_UP(sectors * n, d);
-+}
-+
-+static s64 __ptr_disk_sectors_delta(unsigned old_size,
-+				    unsigned offset, s64 delta,
-+				    unsigned flags,
-+				    unsigned n, unsigned d)
-+{
-+	BUG_ON(!n || !d);
-+
-+	if (flags & BTREE_TRIGGER_OVERWRITE_SPLIT) {
-+		BUG_ON(offset + -delta > old_size);
-+
-+		return -disk_sectors_scaled(n, d, old_size) +
-+			disk_sectors_scaled(n, d, offset) +
-+			disk_sectors_scaled(n, d, old_size - offset + delta);
-+	} else if (flags & BTREE_TRIGGER_OVERWRITE) {
-+		BUG_ON(offset + -delta > old_size);
-+
-+		return -disk_sectors_scaled(n, d, old_size) +
-+			disk_sectors_scaled(n, d, old_size + delta);
-+	} else {
-+		return  disk_sectors_scaled(n, d, delta);
-+	}
-+}
-+
-+static s64 ptr_disk_sectors_delta(struct extent_ptr_decoded p,
-+				  unsigned offset, s64 delta,
-+				  unsigned flags)
-+{
-+	return __ptr_disk_sectors_delta(p.crc.live_size,
-+					offset, delta, flags,
-+					p.crc.compressed_size,
-+					p.crc.uncompressed_size);
-+}
-+
-+static void bucket_set_stripe(struct bch_fs *c,
-+			      const struct bch_extent_ptr *ptr,
-+			      struct bch_fs_usage *fs_usage,
-+			      u64 journal_seq,
-+			      unsigned flags,
-+			      bool enabled)
-+{
-+	bool gc = flags & BTREE_TRIGGER_GC;
-+	struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-+	struct bucket *g = PTR_BUCKET(ca, ptr, gc);
-+	struct bucket_mark new, old;
-+
-+	old = bucket_cmpxchg(g, new, ({
-+		new.stripe			= enabled;
-+		if (journal_seq) {
-+			new.journal_seq_valid	= 1;
-+			new.journal_seq		= journal_seq;
-+		}
-+	}));
-+
-+	bch2_dev_usage_update(c, ca, fs_usage, old, new, gc);
-+
-+	/*
-+	 * XXX write repair code for these, flag stripe as possibly bad
-+	 */
-+	if (old.gen != ptr->gen)
-+		bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
-+			      "stripe with stale pointer");
-+#if 0
-+	/*
-+	 * We'd like to check for these, but these checks don't work
-+	 * yet:
-+	 */
-+	if (old.stripe && enabled)
-+		bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
-+			      "multiple stripes using same bucket");
-+
-+	if (!old.stripe && !enabled)
-+		bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
-+			      "deleting stripe but bucket not marked as stripe bucket");
-+#endif
-+}
-+
-+static int __mark_pointer(struct bch_fs *c, struct bkey_s_c k,
-+			  struct extent_ptr_decoded p,
-+			  s64 sectors, enum bch_data_type ptr_data_type,
-+			  u8 bucket_gen, u8 *bucket_data_type,
-+			  u16 *dirty_sectors, u16 *cached_sectors)
-+{
-+	u16 *dst_sectors = !p.ptr.cached
-+		? dirty_sectors
-+		: cached_sectors;
-+	u16 orig_sectors = *dst_sectors;
-+	char buf[200];
-+
-+	if (gen_after(p.ptr.gen, bucket_gen)) {
-+		bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
-+			"bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n"
-+			"while marking %s",
-+			p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr),
-+			bucket_gen,
-+			bch2_data_types[*bucket_data_type ?: ptr_data_type],
-+			p.ptr.gen,
-+			(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
-+		return -EIO;
-+	}
-+
-+	if (gen_cmp(bucket_gen, p.ptr.gen) >= 96U) {
-+		bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
-+			"bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n"
-+			"while marking %s",
-+			p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr),
-+			bucket_gen,
-+			bch2_data_types[*bucket_data_type ?: ptr_data_type],
-+			p.ptr.gen,
-+			(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
-+		return -EIO;
-+	}
-+
-+	if (bucket_gen != p.ptr.gen && !p.ptr.cached) {
-+		bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
-+			"bucket %u:%zu gen %u data type %s: stale dirty ptr (gen %u)\n"
-+			"while marking %s",
-+			p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr),
-+			bucket_gen,
-+			bch2_data_types[*bucket_data_type ?: ptr_data_type],
-+			p.ptr.gen,
-+			(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
-+		return -EIO;
-+	}
-+
-+	if (bucket_gen != p.ptr.gen)
-+		return 1;
-+
-+	if (*bucket_data_type && *bucket_data_type != ptr_data_type) {
-+		bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
-+			"bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n"
-+			"while marking %s",
-+			p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr),
-+			bucket_gen,
-+			bch2_data_types[*bucket_data_type],
-+			bch2_data_types[ptr_data_type],
-+			(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
-+		return -EIO;
-+	}
-+
-+	if (checked_add(*dst_sectors, sectors)) {
-+		bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
-+			"bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U16_MAX\n"
-+			"while marking %s",
-+			p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr),
-+			bucket_gen,
-+			bch2_data_types[*bucket_data_type ?: ptr_data_type],
-+			orig_sectors, sectors,
-+			(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
-+		return -EIO;
-+	}
-+
-+	*bucket_data_type = *dirty_sectors || *cached_sectors
-+		? ptr_data_type : 0;
-+	return 0;
-+}
-+
-+static int bch2_mark_pointer(struct bch_fs *c, struct bkey_s_c k,
-+			     struct extent_ptr_decoded p,
-+			     s64 sectors, enum bch_data_type data_type,
-+			     struct bch_fs_usage *fs_usage,
-+			     u64 journal_seq, unsigned flags)
-+{
-+	bool gc = flags & BTREE_TRIGGER_GC;
-+	struct bucket_mark old, new;
-+	struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
-+	struct bucket *g = PTR_BUCKET(ca, &p.ptr, gc);
-+	u8 bucket_data_type;
-+	u64 v;
-+	int ret;
-+
-+	v = atomic64_read(&g->_mark.v);
-+	do {
-+		new.v.counter = old.v.counter = v;
-+		bucket_data_type = new.data_type;
-+
-+		ret = __mark_pointer(c, k, p, sectors, data_type, new.gen,
-+				     &bucket_data_type,
-+				     &new.dirty_sectors,
-+				     &new.cached_sectors);
-+		if (ret)
-+			return ret;
-+
-+		new.data_type = bucket_data_type;
-+
-+		if (journal_seq) {
-+			new.journal_seq_valid = 1;
-+			new.journal_seq = journal_seq;
-+		}
-+
-+		if (flags & BTREE_TRIGGER_NOATOMIC) {
-+			g->_mark = new;
-+			break;
-+		}
-+	} while ((v = atomic64_cmpxchg(&g->_mark.v,
-+			      old.v.counter,
-+			      new.v.counter)) != old.v.counter);
-+
-+	bch2_dev_usage_update(c, ca, fs_usage, old, new, gc);
-+
-+	BUG_ON(!gc && bucket_became_unavailable(old, new));
-+
-+	return 0;
-+}
-+
-+static int bch2_mark_stripe_ptr(struct bch_fs *c,
-+				struct bch_extent_stripe_ptr p,
-+				enum bch_data_type data_type,
-+				struct bch_fs_usage *fs_usage,
-+				s64 sectors, unsigned flags,
-+				struct bch_replicas_padded *r,
-+				unsigned *nr_data,
-+				unsigned *nr_parity)
-+{
-+	bool gc = flags & BTREE_TRIGGER_GC;
-+	struct stripe *m;
-+	unsigned i, blocks_nonempty = 0;
-+
-+	m = genradix_ptr(&c->stripes[gc], p.idx);
-+
-+	spin_lock(&c->ec_stripes_heap_lock);
-+
-+	if (!m || !m->alive) {
-+		spin_unlock(&c->ec_stripes_heap_lock);
-+		bch_err_ratelimited(c, "pointer to nonexistent stripe %llu",
-+				    (u64) p.idx);
-+		return -EIO;
-+	}
-+
-+	BUG_ON(m->r.e.data_type != data_type);
-+
-+	*nr_data	= m->nr_blocks - m->nr_redundant;
-+	*nr_parity	= m->nr_redundant;
-+	*r = m->r;
-+
-+	m->block_sectors[p.block] += sectors;
-+
-+	for (i = 0; i < m->nr_blocks; i++)
-+		blocks_nonempty += m->block_sectors[i] != 0;
-+
-+	if (m->blocks_nonempty != blocks_nonempty) {
-+		m->blocks_nonempty = blocks_nonempty;
-+		if (!gc)
-+			bch2_stripes_heap_update(c, m, p.idx);
-+	}
-+
-+	spin_unlock(&c->ec_stripes_heap_lock);
-+
-+	return 0;
-+}
-+
-+static int bch2_mark_extent(struct bch_fs *c,
-+			    struct bkey_s_c old, struct bkey_s_c new,
-+			    unsigned offset, s64 sectors,
-+			    enum bch_data_type data_type,
-+			    struct bch_fs_usage *fs_usage,
-+			    unsigned journal_seq, unsigned flags)
-+{
-+	struct bkey_s_c k = flags & BTREE_TRIGGER_INSERT ? new : old;
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const union bch_extent_entry *entry;
-+	struct extent_ptr_decoded p;
-+	struct bch_replicas_padded r;
-+	s64 dirty_sectors = 0;
-+	bool stale;
-+	int ret;
-+
-+	r.e.data_type	= data_type;
-+	r.e.nr_devs	= 0;
-+	r.e.nr_required	= 1;
-+
-+	BUG_ON(!sectors);
-+
-+	bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
-+		s64 disk_sectors = data_type == BCH_DATA_btree
-+			? sectors
-+			: ptr_disk_sectors_delta(p, offset, sectors, flags);
-+
-+		ret = bch2_mark_pointer(c, k, p, disk_sectors, data_type,
-+					fs_usage, journal_seq, flags);
-+		if (ret < 0)
-+			return ret;
-+
-+		stale = ret > 0;
-+
-+		if (p.ptr.cached) {
-+			if (!stale)
-+				update_cached_sectors(c, fs_usage, p.ptr.dev,
-+						      disk_sectors);
-+		} else if (!p.has_ec) {
-+			dirty_sectors	       += disk_sectors;
-+			r.e.devs[r.e.nr_devs++]	= p.ptr.dev;
-+		} else {
-+			struct bch_replicas_padded ec_r;
-+			unsigned nr_data, nr_parity;
-+			s64 parity_sectors;
-+
-+			ret = bch2_mark_stripe_ptr(c, p.ec, data_type,
-+					fs_usage, disk_sectors, flags,
-+					&ec_r, &nr_data, &nr_parity);
-+			if (ret)
-+				return ret;
-+
-+			parity_sectors =
-+				__ptr_disk_sectors_delta(p.crc.live_size,
-+					offset, sectors, flags,
-+					p.crc.compressed_size * nr_parity,
-+					p.crc.uncompressed_size * nr_data);
-+
-+			update_replicas(c, fs_usage, &ec_r.e,
-+					disk_sectors + parity_sectors);
-+
-+			/*
-+			 * There may be other dirty pointers in this extent, but
-+			 * if so they're not required for mounting if we have an
-+			 * erasure coded pointer in this extent:
-+			 */
-+			r.e.nr_required = 0;
-+		}
-+	}
-+
-+	if (r.e.nr_devs)
-+		update_replicas(c, fs_usage, &r.e, dirty_sectors);
-+
-+	return 0;
-+}
-+
-+static int bch2_mark_stripe(struct bch_fs *c,
-+			    struct bkey_s_c old, struct bkey_s_c new,
-+			    struct bch_fs_usage *fs_usage,
-+			    u64 journal_seq, unsigned flags)
-+{
-+	bool gc = flags & BTREE_TRIGGER_GC;
-+	size_t idx = new.k->p.offset;
-+	const struct bch_stripe *old_s = old.k->type == KEY_TYPE_stripe
-+		? bkey_s_c_to_stripe(old).v : NULL;
-+	const struct bch_stripe *new_s = new.k->type == KEY_TYPE_stripe
-+		? bkey_s_c_to_stripe(new).v : NULL;
-+	struct stripe *m = genradix_ptr(&c->stripes[gc], idx);
-+	unsigned i;
-+
-+	if (!m || (old_s && !m->alive)) {
-+		bch_err_ratelimited(c, "error marking nonexistent stripe %zu",
-+				    idx);
-+		return -1;
-+	}
-+
-+	if (!new_s) {
-+		/* Deleting: */
-+		for (i = 0; i < old_s->nr_blocks; i++)
-+			bucket_set_stripe(c, old_s->ptrs + i, fs_usage,
-+					  journal_seq, flags, false);
-+
-+		if (!gc && m->on_heap) {
-+			spin_lock(&c->ec_stripes_heap_lock);
-+			bch2_stripes_heap_del(c, m, idx);
-+			spin_unlock(&c->ec_stripes_heap_lock);
-+		}
-+
-+		memset(m, 0, sizeof(*m));
-+	} else {
-+		BUG_ON(old_s && new_s->nr_blocks != old_s->nr_blocks);
-+		BUG_ON(old_s && new_s->nr_redundant != old_s->nr_redundant);
-+
-+		for (i = 0; i < new_s->nr_blocks; i++) {
-+			if (!old_s ||
-+			    memcmp(new_s->ptrs + i,
-+				   old_s->ptrs + i,
-+				   sizeof(struct bch_extent_ptr))) {
-+
-+				if (old_s)
-+					bucket_set_stripe(c, old_s->ptrs + i, fs_usage,
-+							  journal_seq, flags, false);
-+				bucket_set_stripe(c, new_s->ptrs + i, fs_usage,
-+						  journal_seq, flags, true);
-+			}
-+		}
-+
-+		m->alive	= true;
-+		m->sectors	= le16_to_cpu(new_s->sectors);
-+		m->algorithm	= new_s->algorithm;
-+		m->nr_blocks	= new_s->nr_blocks;
-+		m->nr_redundant	= new_s->nr_redundant;
-+
-+		bch2_bkey_to_replicas(&m->r.e, new);
-+
-+		/* gc recalculates these fields: */
-+		if (!(flags & BTREE_TRIGGER_GC)) {
-+			m->blocks_nonempty = 0;
-+
-+			for (i = 0; i < new_s->nr_blocks; i++) {
-+				m->block_sectors[i] =
-+					stripe_blockcount_get(new_s, i);
-+				m->blocks_nonempty += !!m->block_sectors[i];
-+			}
-+		}
-+
-+		if (!gc) {
-+			spin_lock(&c->ec_stripes_heap_lock);
-+			bch2_stripes_heap_update(c, m, idx);
-+			spin_unlock(&c->ec_stripes_heap_lock);
-+		}
-+	}
-+
-+	return 0;
-+}
-+
-+static int bch2_mark_key_locked(struct bch_fs *c,
-+		   struct bkey_s_c old,
-+		   struct bkey_s_c new,
-+		   unsigned offset, s64 sectors,
-+		   struct bch_fs_usage *fs_usage,
-+		   u64 journal_seq, unsigned flags)
-+{
-+	struct bkey_s_c k = flags & BTREE_TRIGGER_INSERT ? new : old;
-+	int ret = 0;
-+
-+	BUG_ON(!(flags & (BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE)));
-+
-+	preempt_disable();
-+
-+	if (!fs_usage || (flags & BTREE_TRIGGER_GC))
-+		fs_usage = fs_usage_ptr(c, journal_seq,
-+					flags & BTREE_TRIGGER_GC);
-+
-+	switch (k.k->type) {
-+	case KEY_TYPE_alloc:
-+		ret = bch2_mark_alloc(c, old, new, fs_usage, journal_seq, flags);
-+		break;
-+	case KEY_TYPE_btree_ptr:
-+	case KEY_TYPE_btree_ptr_v2:
-+		sectors = !(flags & BTREE_TRIGGER_OVERWRITE)
-+			?  c->opts.btree_node_size
-+			: -c->opts.btree_node_size;
-+
-+		ret = bch2_mark_extent(c, old, new, offset, sectors,
-+				BCH_DATA_btree, fs_usage, journal_seq, flags);
-+		break;
-+	case KEY_TYPE_extent:
-+	case KEY_TYPE_reflink_v:
-+		ret = bch2_mark_extent(c, old, new, offset, sectors,
-+				BCH_DATA_user, fs_usage, journal_seq, flags);
-+		break;
-+	case KEY_TYPE_stripe:
-+		ret = bch2_mark_stripe(c, old, new, fs_usage, journal_seq, flags);
-+		break;
-+	case KEY_TYPE_inode:
-+		if (!(flags & BTREE_TRIGGER_OVERWRITE))
-+			fs_usage->nr_inodes++;
-+		else
-+			fs_usage->nr_inodes--;
-+		break;
-+	case KEY_TYPE_reservation: {
-+		unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
-+
-+		sectors *= replicas;
-+		replicas = clamp_t(unsigned, replicas, 1,
-+				   ARRAY_SIZE(fs_usage->persistent_reserved));
-+
-+		fs_usage->reserved				+= sectors;
-+		fs_usage->persistent_reserved[replicas - 1]	+= sectors;
-+		break;
-+	}
-+	}
-+
-+	preempt_enable();
-+
-+	return ret;
-+}
-+
-+int bch2_mark_key(struct bch_fs *c, struct bkey_s_c new,
-+		  unsigned offset, s64 sectors,
-+		  struct bch_fs_usage *fs_usage,
-+		  u64 journal_seq, unsigned flags)
-+{
-+	struct bkey deleted;
-+	struct bkey_s_c old = (struct bkey_s_c) { &deleted, NULL };
-+	int ret;
-+
-+	bkey_init(&deleted);
-+
-+	percpu_down_read(&c->mark_lock);
-+	ret = bch2_mark_key_locked(c, old, new, offset, sectors,
-+				   fs_usage, journal_seq,
-+				   BTREE_TRIGGER_INSERT|flags);
-+	percpu_up_read(&c->mark_lock);
-+
-+	return ret;
-+}
-+
-+int bch2_mark_update(struct btree_trans *trans,
-+		     struct btree_iter *iter,
-+		     struct bkey_i *new,
-+		     struct bch_fs_usage *fs_usage,
-+		     unsigned flags)
-+{
-+	struct bch_fs		*c = trans->c;
-+	struct btree		*b = iter_l(iter)->b;
-+	struct btree_node_iter	node_iter = iter_l(iter)->iter;
-+	struct bkey_packed	*_old;
-+	struct bkey_s_c		old;
-+	struct bkey		unpacked;
-+	int ret = 0;
-+
-+	if (unlikely(flags & BTREE_TRIGGER_NORUN))
-+		return 0;
-+
-+	if (!btree_node_type_needs_gc(iter->btree_id))
-+		return 0;
-+
-+	bkey_init(&unpacked);
-+	old = (struct bkey_s_c) { &unpacked, NULL };
-+
-+	if (!btree_node_type_is_extents(iter->btree_id)) {
-+		if (btree_iter_type(iter) != BTREE_ITER_CACHED) {
-+			_old = bch2_btree_node_iter_peek(&node_iter, b);
-+			if (_old)
-+				old = bkey_disassemble(b, _old, &unpacked);
-+		} else {
-+			struct bkey_cached *ck = (void *) iter->l[0].b;
-+
-+			if (ck->valid)
-+				old = bkey_i_to_s_c(ck->k);
-+		}
-+
-+		if (old.k->type == new->k.type) {
-+			bch2_mark_key_locked(c, old, bkey_i_to_s_c(new), 0, 0,
-+				fs_usage, trans->journal_res.seq,
-+				BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags);
-+
-+		} else {
-+			bch2_mark_key_locked(c, old, bkey_i_to_s_c(new), 0, 0,
-+				fs_usage, trans->journal_res.seq,
-+				BTREE_TRIGGER_INSERT|flags);
-+			bch2_mark_key_locked(c, old, bkey_i_to_s_c(new), 0, 0,
-+				fs_usage, trans->journal_res.seq,
-+				BTREE_TRIGGER_OVERWRITE|flags);
-+		}
-+	} else {
-+		BUG_ON(btree_iter_type(iter) == BTREE_ITER_CACHED);
-+		bch2_mark_key_locked(c, old, bkey_i_to_s_c(new),
-+			0, new->k.size,
-+			fs_usage, trans->journal_res.seq,
-+			BTREE_TRIGGER_INSERT|flags);
-+
-+		while ((_old = bch2_btree_node_iter_peek(&node_iter, b))) {
-+			unsigned offset = 0;
-+			s64 sectors;
-+
-+			old = bkey_disassemble(b, _old, &unpacked);
-+			sectors = -((s64) old.k->size);
-+
-+			flags |= BTREE_TRIGGER_OVERWRITE;
-+
-+			if (bkey_cmp(new->k.p, bkey_start_pos(old.k)) <= 0)
-+				return 0;
-+
-+			switch (bch2_extent_overlap(&new->k, old.k)) {
-+			case BCH_EXTENT_OVERLAP_ALL:
-+				offset = 0;
-+				sectors = -((s64) old.k->size);
-+				break;
-+			case BCH_EXTENT_OVERLAP_BACK:
-+				offset = bkey_start_offset(&new->k) -
-+					bkey_start_offset(old.k);
-+				sectors = bkey_start_offset(&new->k) -
-+					old.k->p.offset;
-+				break;
-+			case BCH_EXTENT_OVERLAP_FRONT:
-+				offset = 0;
-+				sectors = bkey_start_offset(old.k) -
-+					new->k.p.offset;
-+				break;
-+			case BCH_EXTENT_OVERLAP_MIDDLE:
-+				offset = bkey_start_offset(&new->k) -
-+					bkey_start_offset(old.k);
-+				sectors = -((s64) new->k.size);
-+				flags |= BTREE_TRIGGER_OVERWRITE_SPLIT;
-+				break;
-+			}
-+
-+			BUG_ON(sectors >= 0);
-+
-+			ret = bch2_mark_key_locked(c, old, bkey_i_to_s_c(new),
-+					offset, sectors, fs_usage,
-+					trans->journal_res.seq, flags) ?: 1;
-+			if (ret <= 0)
-+				break;
-+
-+			bch2_btree_node_iter_advance(&node_iter, b);
-+		}
-+	}
-+
-+	return ret;
-+}
-+
-+void bch2_trans_fs_usage_apply(struct btree_trans *trans,
-+			       struct bch_fs_usage *fs_usage)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct btree_insert_entry *i;
-+	static int warned_disk_usage = 0;
-+	u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0;
-+	char buf[200];
-+
-+	if (!bch2_fs_usage_apply(c, fs_usage, trans->disk_res,
-+				 trans->journal_res.seq) ||
-+	    warned_disk_usage ||
-+	    xchg(&warned_disk_usage, 1))
-+		return;
-+
-+	bch_err(c, "disk usage increased more than %llu sectors reserved",
-+		disk_res_sectors);
-+
-+	trans_for_each_update(trans, i) {
-+		pr_err("while inserting");
-+		bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(i->k));
-+		pr_err("%s", buf);
-+		pr_err("overlapping with");
-+
-+		if (btree_iter_type(i->iter) != BTREE_ITER_CACHED) {
-+			struct btree		*b = iter_l(i->iter)->b;
-+			struct btree_node_iter	node_iter = iter_l(i->iter)->iter;
-+			struct bkey_packed	*_k;
-+
-+			while ((_k = bch2_btree_node_iter_peek(&node_iter, b))) {
-+				struct bkey		unpacked;
-+				struct bkey_s_c		k;
-+
-+				pr_info("_k %px format %u", _k, _k->format);
-+				k = bkey_disassemble(b, _k, &unpacked);
-+
-+				if (btree_node_is_extents(b)
-+				    ? bkey_cmp(i->k->k.p, bkey_start_pos(k.k)) <= 0
-+				    : bkey_cmp(i->k->k.p, k.k->p))
-+					break;
-+
-+				bch2_bkey_val_to_text(&PBUF(buf), c, k);
-+				pr_err("%s", buf);
-+
-+				bch2_btree_node_iter_advance(&node_iter, b);
-+			}
-+		} else {
-+			struct bkey_cached *ck = (void *) i->iter->l[0].b;
-+
-+			if (ck->valid) {
-+				bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(ck->k));
-+				pr_err("%s", buf);
-+			}
-+		}
-+	}
-+}
-+
-+/* trans_mark: */
-+
-+static struct btree_iter *trans_get_update(struct btree_trans *trans,
-+			    enum btree_id btree_id, struct bpos pos,
-+			    struct bkey_s_c *k)
-+{
-+	struct btree_insert_entry *i;
-+
-+	trans_for_each_update(trans, i)
-+		if (i->iter->btree_id == btree_id &&
-+		    (btree_node_type_is_extents(btree_id)
-+		     ? bkey_cmp(pos, bkey_start_pos(&i->k->k)) >= 0 &&
-+		       bkey_cmp(pos, i->k->k.p) < 0
-+		     : !bkey_cmp(pos, i->iter->pos))) {
-+			*k = bkey_i_to_s_c(i->k);
-+			return i->iter;
-+		}
-+
-+	return NULL;
-+}
-+
-+static int trans_get_key(struct btree_trans *trans,
-+			 enum btree_id btree_id, struct bpos pos,
-+			 struct btree_iter **iter,
-+			 struct bkey_s_c *k)
-+{
-+	unsigned flags = btree_id != BTREE_ID_ALLOC
-+		? BTREE_ITER_SLOTS
-+		: BTREE_ITER_CACHED;
-+	int ret;
-+
-+	*iter = trans_get_update(trans, btree_id, pos, k);
-+	if (*iter)
-+		return 1;
-+
-+	*iter = bch2_trans_get_iter(trans, btree_id, pos,
-+				    flags|BTREE_ITER_INTENT);
-+	if (IS_ERR(*iter))
-+		return PTR_ERR(*iter);
-+
-+	*k = __bch2_btree_iter_peek(*iter, flags);
-+	ret = bkey_err(*k);
-+	if (ret)
-+		bch2_trans_iter_put(trans, *iter);
-+	return ret;
-+}
-+
-+static int bch2_trans_mark_pointer(struct btree_trans *trans,
-+			struct bkey_s_c k, struct extent_ptr_decoded p,
-+			s64 sectors, enum bch_data_type data_type)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
-+	struct bpos pos = POS(p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr));
-+	struct btree_iter *iter;
-+	struct bkey_s_c k_a;
-+	struct bkey_alloc_unpacked u;
-+	struct bkey_i_alloc *a;
-+	struct bucket *g;
-+	int ret;
-+
-+	iter = trans_get_update(trans, BTREE_ID_ALLOC, pos, &k_a);
-+	if (iter) {
-+		u = bch2_alloc_unpack(k_a);
-+	} else {
-+		iter = bch2_trans_get_iter(trans, BTREE_ID_ALLOC, pos,
-+					   BTREE_ITER_CACHED|
-+					   BTREE_ITER_CACHED_NOFILL|
-+					   BTREE_ITER_INTENT);
-+		if (IS_ERR(iter))
-+			return PTR_ERR(iter);
-+
-+		ret = bch2_btree_iter_traverse(iter);
-+		if (ret)
-+			goto out;
-+
-+		percpu_down_read(&c->mark_lock);
-+		g = bucket(ca, pos.offset);
-+		u = alloc_mem_to_key(g, READ_ONCE(g->mark));
-+		percpu_up_read(&c->mark_lock);
-+	}
-+
-+	ret = __mark_pointer(c, k, p, sectors, data_type, u.gen, &u.data_type,
-+			     &u.dirty_sectors, &u.cached_sectors);
-+	if (ret)
-+		goto out;
-+
-+	a = bch2_trans_kmalloc(trans, BKEY_ALLOC_U64s_MAX * 8);
-+	ret = PTR_ERR_OR_ZERO(a);
-+	if (ret)
-+		goto out;
-+
-+	bkey_alloc_init(&a->k_i);
-+	a->k.p = pos;
-+	bch2_alloc_pack(a, u);
-+	bch2_trans_update(trans, iter, &a->k_i, 0);
-+out:
-+	bch2_trans_iter_put(trans, iter);
-+	return ret;
-+}
-+
-+static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
-+			struct bch_extent_stripe_ptr p,
-+			s64 sectors, enum bch_data_type data_type,
-+			struct bch_replicas_padded *r,
-+			unsigned *nr_data,
-+			unsigned *nr_parity)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct bkey_i_stripe *s;
-+	int ret = 0;
-+
-+	ret = trans_get_key(trans, BTREE_ID_EC, POS(0, p.idx), &iter, &k);
-+	if (ret < 0)
-+		return ret;
-+
-+	if (k.k->type != KEY_TYPE_stripe) {
-+		bch2_fs_inconsistent(c,
-+			"pointer to nonexistent stripe %llu",
-+			(u64) p.idx);
-+		ret = -EIO;
-+		goto out;
-+	}
-+
-+	s = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
-+	ret = PTR_ERR_OR_ZERO(s);
-+	if (ret)
-+		goto out;
-+
-+	bkey_reassemble(&s->k_i, k);
-+
-+	stripe_blockcount_set(&s->v, p.block,
-+		stripe_blockcount_get(&s->v, p.block) +
-+		sectors);
-+
-+	*nr_data	= s->v.nr_blocks - s->v.nr_redundant;
-+	*nr_parity	= s->v.nr_redundant;
-+	bch2_bkey_to_replicas(&r->e, bkey_i_to_s_c(&s->k_i));
-+	bch2_trans_update(trans, iter, &s->k_i, 0);
-+out:
-+	bch2_trans_iter_put(trans, iter);
-+	return ret;
-+}
-+
-+static int bch2_trans_mark_extent(struct btree_trans *trans,
-+			struct bkey_s_c k, unsigned offset,
-+			s64 sectors, unsigned flags,
-+			enum bch_data_type data_type)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const union bch_extent_entry *entry;
-+	struct extent_ptr_decoded p;
-+	struct bch_replicas_padded r;
-+	s64 dirty_sectors = 0;
-+	bool stale;
-+	int ret;
-+
-+	r.e.data_type	= data_type;
-+	r.e.nr_devs	= 0;
-+	r.e.nr_required	= 1;
-+
-+	BUG_ON(!sectors);
-+
-+	bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
-+		s64 disk_sectors = data_type == BCH_DATA_btree
-+			? sectors
-+			: ptr_disk_sectors_delta(p, offset, sectors, flags);
-+
-+		ret = bch2_trans_mark_pointer(trans, k, p, disk_sectors,
-+					      data_type);
-+		if (ret < 0)
-+			return ret;
-+
-+		stale = ret > 0;
-+
-+		if (p.ptr.cached) {
-+			if (!stale)
-+				update_cached_sectors_list(trans, p.ptr.dev,
-+							   disk_sectors);
-+		} else if (!p.has_ec) {
-+			dirty_sectors	       += disk_sectors;
-+			r.e.devs[r.e.nr_devs++]	= p.ptr.dev;
-+		} else {
-+			struct bch_replicas_padded ec_r;
-+			unsigned nr_data, nr_parity;
-+			s64 parity_sectors;
-+
-+			ret = bch2_trans_mark_stripe_ptr(trans, p.ec,
-+					disk_sectors, data_type,
-+					&ec_r, &nr_data, &nr_parity);
-+			if (ret)
-+				return ret;
-+
-+			parity_sectors =
-+				__ptr_disk_sectors_delta(p.crc.live_size,
-+					offset, sectors, flags,
-+					p.crc.compressed_size * nr_parity,
-+					p.crc.uncompressed_size * nr_data);
-+
-+			update_replicas_list(trans, &ec_r.e,
-+					     disk_sectors + parity_sectors);
-+
-+			r.e.nr_required = 0;
-+		}
-+	}
-+
-+	if (r.e.nr_devs)
-+		update_replicas_list(trans, &r.e, dirty_sectors);
-+
-+	return 0;
-+}
-+
-+static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
-+			struct bkey_s_c_reflink_p p,
-+			u64 idx, unsigned sectors,
-+			unsigned flags)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct bkey_i_reflink_v *r_v;
-+	s64 ret;
-+
-+	ret = trans_get_key(trans, BTREE_ID_REFLINK,
-+			    POS(0, idx), &iter, &k);
-+	if (ret < 0)
-+		return ret;
-+
-+	if (k.k->type != KEY_TYPE_reflink_v) {
-+		bch2_fs_inconsistent(c,
-+			"%llu:%llu len %u points to nonexistent indirect extent %llu",
-+			p.k->p.inode, p.k->p.offset, p.k->size, idx);
-+		ret = -EIO;
-+		goto err;
-+	}
-+
-+	if ((flags & BTREE_TRIGGER_OVERWRITE) &&
-+	    (bkey_start_offset(k.k) < idx ||
-+	     k.k->p.offset > idx + sectors))
-+		goto out;
-+
-+	sectors = k.k->p.offset - idx;
-+
-+	r_v = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
-+	ret = PTR_ERR_OR_ZERO(r_v);
-+	if (ret)
-+		goto err;
-+
-+	bkey_reassemble(&r_v->k_i, k);
-+
-+	le64_add_cpu(&r_v->v.refcount,
-+		     !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1);
-+
-+	if (!r_v->v.refcount) {
-+		r_v->k.type = KEY_TYPE_deleted;
-+		set_bkey_val_u64s(&r_v->k, 0);
-+	}
-+
-+	bch2_btree_iter_set_pos(iter, bkey_start_pos(k.k));
-+	BUG_ON(iter->uptodate > BTREE_ITER_NEED_PEEK);
-+
-+	bch2_trans_update(trans, iter, &r_v->k_i, 0);
-+out:
-+	ret = sectors;
-+err:
-+	bch2_trans_iter_put(trans, iter);
-+	return ret;
-+}
-+
-+static int bch2_trans_mark_reflink_p(struct btree_trans *trans,
-+			struct bkey_s_c_reflink_p p, unsigned offset,
-+			s64 sectors, unsigned flags)
-+{
-+	u64 idx = le64_to_cpu(p.v->idx) + offset;
-+	s64 ret = 0;
-+
-+	sectors = abs(sectors);
-+	BUG_ON(offset + sectors > p.k->size);
-+
-+	while (sectors) {
-+		ret = __bch2_trans_mark_reflink_p(trans, p, idx, sectors, flags);
-+		if (ret < 0)
-+			break;
-+
-+		idx += ret;
-+		sectors = max_t(s64, 0LL, sectors - ret);
-+		ret = 0;
-+	}
-+
-+	return ret;
-+}
-+
-+int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k,
-+			unsigned offset, s64 sectors, unsigned flags)
-+{
-+	struct replicas_delta_list *d;
-+	struct bch_fs *c = trans->c;
-+
-+	switch (k.k->type) {
-+	case KEY_TYPE_btree_ptr:
-+	case KEY_TYPE_btree_ptr_v2:
-+		sectors = !(flags & BTREE_TRIGGER_OVERWRITE)
-+			?  c->opts.btree_node_size
-+			: -c->opts.btree_node_size;
-+
-+		return bch2_trans_mark_extent(trans, k, offset, sectors,
-+					      flags, BCH_DATA_btree);
-+	case KEY_TYPE_extent:
-+	case KEY_TYPE_reflink_v:
-+		return bch2_trans_mark_extent(trans, k, offset, sectors,
-+					      flags, BCH_DATA_user);
-+	case KEY_TYPE_inode:
-+		d = replicas_deltas_realloc(trans, 0);
-+
-+		if (!(flags & BTREE_TRIGGER_OVERWRITE))
-+			d->nr_inodes++;
-+		else
-+			d->nr_inodes--;
-+		return 0;
-+	case KEY_TYPE_reservation: {
-+		unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
-+
-+		d = replicas_deltas_realloc(trans, 0);
-+
-+		sectors *= replicas;
-+		replicas = clamp_t(unsigned, replicas, 1,
-+				   ARRAY_SIZE(d->persistent_reserved));
-+
-+		d->persistent_reserved[replicas - 1] += sectors;
-+		return 0;
-+	}
-+	case KEY_TYPE_reflink_p:
-+		return bch2_trans_mark_reflink_p(trans,
-+					bkey_s_c_to_reflink_p(k),
-+					offset, sectors, flags);
-+	default:
-+		return 0;
-+	}
-+}
-+
-+int bch2_trans_mark_update(struct btree_trans *trans,
-+			   struct btree_iter *iter,
-+			   struct bkey_i *insert,
-+			   unsigned flags)
-+{
-+	struct btree		*b = iter_l(iter)->b;
-+	struct btree_node_iter	node_iter = iter_l(iter)->iter;
-+	struct bkey_packed	*_k;
-+	int ret;
-+
-+	if (unlikely(flags & BTREE_TRIGGER_NORUN))
-+		return 0;
-+
-+	if (!btree_node_type_needs_gc(iter->btree_id))
-+		return 0;
-+
-+	ret = bch2_trans_mark_key(trans, bkey_i_to_s_c(insert),
-+			0, insert->k.size, BTREE_TRIGGER_INSERT);
-+	if (ret)
-+		return ret;
-+
-+	if (btree_iter_type(iter) == BTREE_ITER_CACHED) {
-+		struct bkey_cached *ck = (void *) iter->l[0].b;
-+
-+		return bch2_trans_mark_key(trans, bkey_i_to_s_c(ck->k),
-+					   0, 0, BTREE_TRIGGER_OVERWRITE);
-+	}
-+
-+	while ((_k = bch2_btree_node_iter_peek(&node_iter, b))) {
-+		struct bkey		unpacked;
-+		struct bkey_s_c		k;
-+		unsigned		offset = 0;
-+		s64			sectors = 0;
-+		unsigned		flags = BTREE_TRIGGER_OVERWRITE;
-+
-+		k = bkey_disassemble(b, _k, &unpacked);
-+
-+		if (btree_node_is_extents(b)
-+		    ? bkey_cmp(insert->k.p, bkey_start_pos(k.k)) <= 0
-+		    : bkey_cmp(insert->k.p, k.k->p))
-+			break;
-+
-+		if (btree_node_is_extents(b)) {
-+			switch (bch2_extent_overlap(&insert->k, k.k)) {
-+			case BCH_EXTENT_OVERLAP_ALL:
-+				offset = 0;
-+				sectors = -((s64) k.k->size);
-+				break;
-+			case BCH_EXTENT_OVERLAP_BACK:
-+				offset = bkey_start_offset(&insert->k) -
-+					bkey_start_offset(k.k);
-+				sectors = bkey_start_offset(&insert->k) -
-+					k.k->p.offset;
-+				break;
-+			case BCH_EXTENT_OVERLAP_FRONT:
-+				offset = 0;
-+				sectors = bkey_start_offset(k.k) -
-+					insert->k.p.offset;
-+				break;
-+			case BCH_EXTENT_OVERLAP_MIDDLE:
-+				offset = bkey_start_offset(&insert->k) -
-+					bkey_start_offset(k.k);
-+				sectors = -((s64) insert->k.size);
-+				flags |= BTREE_TRIGGER_OVERWRITE_SPLIT;
-+				break;
-+			}
-+
-+			BUG_ON(sectors >= 0);
-+		}
-+
-+		ret = bch2_trans_mark_key(trans, k, offset, sectors, flags);
-+		if (ret)
-+			return ret;
-+
-+		bch2_btree_node_iter_advance(&node_iter, b);
-+	}
-+
-+	return 0;
-+}
-+
-+/* Disk reservations: */
-+
-+static u64 bch2_recalc_sectors_available(struct bch_fs *c)
-+{
-+	percpu_u64_set(&c->pcpu->sectors_available, 0);
-+
-+	return avail_factor(__bch2_fs_usage_read_short(c).free);
-+}
-+
-+void __bch2_disk_reservation_put(struct bch_fs *c, struct disk_reservation *res)
-+{
-+	percpu_down_read(&c->mark_lock);
-+	this_cpu_sub(c->usage[0]->online_reserved,
-+		     res->sectors);
-+	percpu_up_read(&c->mark_lock);
-+
-+	res->sectors = 0;
-+}
-+
-+#define SECTORS_CACHE	1024
-+
-+int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
-+			      unsigned sectors, int flags)
-+{
-+	struct bch_fs_pcpu *pcpu;
-+	u64 old, v, get;
-+	s64 sectors_available;
-+	int ret;
-+
-+	percpu_down_read(&c->mark_lock);
-+	preempt_disable();
-+	pcpu = this_cpu_ptr(c->pcpu);
-+
-+	if (sectors <= pcpu->sectors_available)
-+		goto out;
-+
-+	v = atomic64_read(&c->sectors_available);
-+	do {
-+		old = v;
-+		get = min((u64) sectors + SECTORS_CACHE, old);
-+
-+		if (get < sectors) {
-+			preempt_enable();
-+			percpu_up_read(&c->mark_lock);
-+			goto recalculate;
-+		}
-+	} while ((v = atomic64_cmpxchg(&c->sectors_available,
-+				       old, old - get)) != old);
-+
-+	pcpu->sectors_available		+= get;
-+
-+out:
-+	pcpu->sectors_available		-= sectors;
-+	this_cpu_add(c->usage[0]->online_reserved, sectors);
-+	res->sectors			+= sectors;
-+
-+	preempt_enable();
-+	percpu_up_read(&c->mark_lock);
-+	return 0;
-+
-+recalculate:
-+	percpu_down_write(&c->mark_lock);
-+
-+	sectors_available = bch2_recalc_sectors_available(c);
-+
-+	if (sectors <= sectors_available ||
-+	    (flags & BCH_DISK_RESERVATION_NOFAIL)) {
-+		atomic64_set(&c->sectors_available,
-+			     max_t(s64, 0, sectors_available - sectors));
-+		this_cpu_add(c->usage[0]->online_reserved, sectors);
-+		res->sectors			+= sectors;
-+		ret = 0;
-+	} else {
-+		atomic64_set(&c->sectors_available, sectors_available);
-+		ret = -ENOSPC;
-+	}
-+
-+	percpu_up_write(&c->mark_lock);
-+
-+	return ret;
-+}
-+
-+/* Startup/shutdown: */
-+
-+static void buckets_free_rcu(struct rcu_head *rcu)
-+{
-+	struct bucket_array *buckets =
-+		container_of(rcu, struct bucket_array, rcu);
-+
-+	kvpfree(buckets,
-+		sizeof(struct bucket_array) +
-+		buckets->nbuckets * sizeof(struct bucket));
-+}
-+
-+int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
-+{
-+	struct bucket_array *buckets = NULL, *old_buckets = NULL;
-+	unsigned long *buckets_nouse = NULL;
-+	alloc_fifo	free[RESERVE_NR];
-+	alloc_fifo	free_inc;
-+	alloc_heap	alloc_heap;
-+
-+	size_t btree_reserve	= DIV_ROUND_UP(BTREE_NODE_RESERVE,
-+			     ca->mi.bucket_size / c->opts.btree_node_size);
-+	/* XXX: these should be tunable */
-+	size_t reserve_none	= max_t(size_t, 1, nbuckets >> 9);
-+	size_t copygc_reserve	= max_t(size_t, 2, nbuckets >> 7);
-+	size_t free_inc_nr	= max(max_t(size_t, 1, nbuckets >> 12),
-+				      btree_reserve * 2);
-+	bool resize = ca->buckets[0] != NULL;
-+	int ret = -ENOMEM;
-+	unsigned i;
-+
-+	memset(&free,		0, sizeof(free));
-+	memset(&free_inc,	0, sizeof(free_inc));
-+	memset(&alloc_heap,	0, sizeof(alloc_heap));
-+
-+	if (!(buckets		= kvpmalloc(sizeof(struct bucket_array) +
-+					    nbuckets * sizeof(struct bucket),
-+					    GFP_KERNEL|__GFP_ZERO)) ||
-+	    !(buckets_nouse	= kvpmalloc(BITS_TO_LONGS(nbuckets) *
-+					    sizeof(unsigned long),
-+					    GFP_KERNEL|__GFP_ZERO)) ||
-+	    !init_fifo(&free[RESERVE_BTREE], btree_reserve, GFP_KERNEL) ||
-+	    !init_fifo(&free[RESERVE_MOVINGGC],
-+		       copygc_reserve, GFP_KERNEL) ||
-+	    !init_fifo(&free[RESERVE_NONE], reserve_none, GFP_KERNEL) ||
-+	    !init_fifo(&free_inc,	free_inc_nr, GFP_KERNEL) ||
-+	    !init_heap(&alloc_heap,	ALLOC_SCAN_BATCH(ca) << 1, GFP_KERNEL))
-+		goto err;
-+
-+	buckets->first_bucket	= ca->mi.first_bucket;
-+	buckets->nbuckets	= nbuckets;
-+
-+	bch2_copygc_stop(c);
-+
-+	if (resize) {
-+		down_write(&c->gc_lock);
-+		down_write(&ca->bucket_lock);
-+		percpu_down_write(&c->mark_lock);
-+	}
-+
-+	old_buckets = bucket_array(ca);
-+
-+	if (resize) {
-+		size_t n = min(buckets->nbuckets, old_buckets->nbuckets);
-+
-+		memcpy(buckets->b,
-+		       old_buckets->b,
-+		       n * sizeof(struct bucket));
-+		memcpy(buckets_nouse,
-+		       ca->buckets_nouse,
-+		       BITS_TO_LONGS(n) * sizeof(unsigned long));
-+	}
-+
-+	rcu_assign_pointer(ca->buckets[0], buckets);
-+	buckets = old_buckets;
-+
-+	swap(ca->buckets_nouse, buckets_nouse);
-+
-+	if (resize) {
-+		percpu_up_write(&c->mark_lock);
-+		up_write(&c->gc_lock);
-+	}
-+
-+	spin_lock(&c->freelist_lock);
-+	for (i = 0; i < RESERVE_NR; i++) {
-+		fifo_move(&free[i], &ca->free[i]);
-+		swap(ca->free[i], free[i]);
-+	}
-+	fifo_move(&free_inc, &ca->free_inc);
-+	swap(ca->free_inc, free_inc);
-+	spin_unlock(&c->freelist_lock);
-+
-+	/* with gc lock held, alloc_heap can't be in use: */
-+	swap(ca->alloc_heap, alloc_heap);
-+
-+	nbuckets = ca->mi.nbuckets;
-+
-+	if (resize)
-+		up_write(&ca->bucket_lock);
-+
-+	ret = 0;
-+err:
-+	free_heap(&alloc_heap);
-+	free_fifo(&free_inc);
-+	for (i = 0; i < RESERVE_NR; i++)
-+		free_fifo(&free[i]);
-+	kvpfree(buckets_nouse,
-+		BITS_TO_LONGS(nbuckets) * sizeof(unsigned long));
-+	if (buckets)
-+		call_rcu(&old_buckets->rcu, buckets_free_rcu);
-+
-+	return ret;
-+}
-+
-+void bch2_dev_buckets_free(struct bch_dev *ca)
-+{
-+	unsigned i;
-+
-+	free_heap(&ca->alloc_heap);
-+	free_fifo(&ca->free_inc);
-+	for (i = 0; i < RESERVE_NR; i++)
-+		free_fifo(&ca->free[i]);
-+	kvpfree(ca->buckets_nouse,
-+		BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long));
-+	kvpfree(rcu_dereference_protected(ca->buckets[0], 1),
-+		sizeof(struct bucket_array) +
-+		ca->mi.nbuckets * sizeof(struct bucket));
-+
-+	free_percpu(ca->usage[0]);
-+}
-+
-+int bch2_dev_buckets_alloc(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	if (!(ca->usage[0] = alloc_percpu(struct bch_dev_usage)))
-+		return -ENOMEM;
-+
-+	return bch2_dev_buckets_resize(c, ca, ca->mi.nbuckets);;
-+}
-diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h
-new file mode 100644
-index 000000000000..653f6761862e
---- /dev/null
-+++ b/fs/bcachefs/buckets.h
-@@ -0,0 +1,324 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+/*
-+ * Code for manipulating bucket marks for garbage collection.
-+ *
-+ * Copyright 2014 Datera, Inc.
-+ */
-+
-+#ifndef _BUCKETS_H
-+#define _BUCKETS_H
-+
-+#include "buckets_types.h"
-+#include "super.h"
-+
-+#define for_each_bucket(_b, _buckets)				\
-+	for (_b = (_buckets)->b + (_buckets)->first_bucket;	\
-+	     _b < (_buckets)->b + (_buckets)->nbuckets; _b++)
-+
-+#define bucket_cmpxchg(g, new, expr)				\
-+({								\
-+	struct bucket *_g = g;					\
-+	u64 _v = atomic64_read(&(g)->_mark.v);			\
-+	struct bucket_mark _old;				\
-+								\
-+	do {							\
-+		(new).v.counter = _old.v.counter = _v;		\
-+		expr;						\
-+	} while ((_v = atomic64_cmpxchg(&(_g)->_mark.v,		\
-+			       _old.v.counter,			\
-+			       (new).v.counter)) != _old.v.counter);\
-+	_old;							\
-+})
-+
-+static inline struct bucket_array *__bucket_array(struct bch_dev *ca,
-+						  bool gc)
-+{
-+	return rcu_dereference_check(ca->buckets[gc],
-+				     !ca->fs ||
-+				     percpu_rwsem_is_held(&ca->fs->mark_lock) ||
-+				     lockdep_is_held(&ca->fs->gc_lock) ||
-+				     lockdep_is_held(&ca->bucket_lock));
-+}
-+
-+static inline struct bucket_array *bucket_array(struct bch_dev *ca)
-+{
-+	return __bucket_array(ca, false);
-+}
-+
-+static inline struct bucket *__bucket(struct bch_dev *ca, size_t b, bool gc)
-+{
-+	struct bucket_array *buckets = __bucket_array(ca, gc);
-+
-+	BUG_ON(b < buckets->first_bucket || b >= buckets->nbuckets);
-+	return buckets->b + b;
-+}
-+
-+static inline struct bucket *bucket(struct bch_dev *ca, size_t b)
-+{
-+	return __bucket(ca, b, false);
-+}
-+
-+static inline void bucket_io_clock_reset(struct bch_fs *c, struct bch_dev *ca,
-+					 size_t b, int rw)
-+{
-+	bucket(ca, b)->io_time[rw] = c->bucket_clock[rw].hand;
-+}
-+
-+static inline u16 bucket_last_io(struct bch_fs *c, struct bucket *g, int rw)
-+{
-+	return c->bucket_clock[rw].hand - g->io_time[rw];
-+}
-+
-+/*
-+ * bucket_gc_gen() returns the difference between the bucket's current gen and
-+ * the oldest gen of any pointer into that bucket in the btree.
-+ */
-+
-+static inline u8 bucket_gc_gen(struct bch_dev *ca, size_t b)
-+{
-+	struct bucket *g = bucket(ca, b);
-+
-+	return g->mark.gen - g->oldest_gen;
-+}
-+
-+static inline size_t PTR_BUCKET_NR(const struct bch_dev *ca,
-+				   const struct bch_extent_ptr *ptr)
-+{
-+	return sector_to_bucket(ca, ptr->offset);
-+}
-+
-+static inline struct bucket *PTR_BUCKET(struct bch_dev *ca,
-+					const struct bch_extent_ptr *ptr,
-+					bool gc)
-+{
-+	return __bucket(ca, PTR_BUCKET_NR(ca, ptr), gc);
-+}
-+
-+static inline enum bch_data_type ptr_data_type(const struct bkey *k,
-+					       const struct bch_extent_ptr *ptr)
-+{
-+	if (k->type == KEY_TYPE_btree_ptr ||
-+	    k->type == KEY_TYPE_btree_ptr_v2)
-+		return BCH_DATA_btree;
-+
-+	return ptr->cached ? BCH_DATA_cached : BCH_DATA_user;
-+}
-+
-+static inline struct bucket_mark ptr_bucket_mark(struct bch_dev *ca,
-+						 const struct bch_extent_ptr *ptr)
-+{
-+	struct bucket_mark m;
-+
-+	rcu_read_lock();
-+	m = READ_ONCE(PTR_BUCKET(ca, ptr, 0)->mark);
-+	rcu_read_unlock();
-+
-+	return m;
-+}
-+
-+static inline int gen_cmp(u8 a, u8 b)
-+{
-+	return (s8) (a - b);
-+}
-+
-+static inline int gen_after(u8 a, u8 b)
-+{
-+	int r = gen_cmp(a, b);
-+
-+	return r > 0 ? r : 0;
-+}
-+
-+/**
-+ * ptr_stale() - check if a pointer points into a bucket that has been
-+ * invalidated.
-+ */
-+static inline u8 ptr_stale(struct bch_dev *ca,
-+			   const struct bch_extent_ptr *ptr)
-+{
-+	return gen_after(ptr_bucket_mark(ca, ptr).gen, ptr->gen);
-+}
-+
-+static inline s64 __ptr_disk_sectors(struct extent_ptr_decoded p,
-+				     unsigned live_size)
-+{
-+	return live_size && p.crc.compression_type
-+		? max(1U, DIV_ROUND_UP(live_size * p.crc.compressed_size,
-+				       p.crc.uncompressed_size))
-+		: live_size;
-+}
-+
-+static inline s64 ptr_disk_sectors(struct extent_ptr_decoded p)
-+{
-+	return __ptr_disk_sectors(p, p.crc.live_size);
-+}
-+
-+/* bucket gc marks */
-+
-+static inline unsigned bucket_sectors_used(struct bucket_mark mark)
-+{
-+	return mark.dirty_sectors + mark.cached_sectors;
-+}
-+
-+static inline bool bucket_unused(struct bucket_mark mark)
-+{
-+	return !mark.owned_by_allocator &&
-+		!mark.data_type &&
-+		!bucket_sectors_used(mark);
-+}
-+
-+static inline bool is_available_bucket(struct bucket_mark mark)
-+{
-+	return (!mark.owned_by_allocator &&
-+		!mark.dirty_sectors &&
-+		!mark.stripe);
-+}
-+
-+static inline bool bucket_needs_journal_commit(struct bucket_mark m,
-+					       u16 last_seq_ondisk)
-+{
-+	return m.journal_seq_valid &&
-+		((s16) m.journal_seq - (s16) last_seq_ondisk > 0);
-+}
-+
-+/* Device usage: */
-+
-+struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *);
-+
-+void bch2_dev_usage_from_buckets(struct bch_fs *);
-+
-+static inline u64 __dev_buckets_available(struct bch_dev *ca,
-+					  struct bch_dev_usage stats)
-+{
-+	u64 total = ca->mi.nbuckets - ca->mi.first_bucket;
-+
-+	if (WARN_ONCE(stats.buckets_unavailable > total,
-+		      "buckets_unavailable overflow (%llu > %llu)\n",
-+		      stats.buckets_unavailable, total))
-+		return 0;
-+
-+	return total - stats.buckets_unavailable;
-+}
-+
-+/*
-+ * Number of reclaimable buckets - only for use by the allocator thread:
-+ */
-+static inline u64 dev_buckets_available(struct bch_dev *ca)
-+{
-+	return __dev_buckets_available(ca, bch2_dev_usage_read(ca));
-+}
-+
-+static inline u64 __dev_buckets_free(struct bch_dev *ca,
-+				     struct bch_dev_usage stats)
-+{
-+	return __dev_buckets_available(ca, stats) +
-+		fifo_used(&ca->free[RESERVE_NONE]) +
-+		fifo_used(&ca->free_inc);
-+}
-+
-+static inline u64 dev_buckets_free(struct bch_dev *ca)
-+{
-+	return __dev_buckets_free(ca, bch2_dev_usage_read(ca));
-+}
-+
-+/* Filesystem usage: */
-+
-+static inline unsigned fs_usage_u64s(struct bch_fs *c)
-+{
-+
-+	return sizeof(struct bch_fs_usage) / sizeof(u64) +
-+		READ_ONCE(c->replicas.nr);
-+}
-+
-+void bch2_fs_usage_scratch_put(struct bch_fs *, struct bch_fs_usage *);
-+struct bch_fs_usage *bch2_fs_usage_scratch_get(struct bch_fs *);
-+
-+u64 bch2_fs_usage_read_one(struct bch_fs *, u64 *);
-+
-+struct bch_fs_usage *bch2_fs_usage_read(struct bch_fs *);
-+
-+void bch2_fs_usage_acc_to_base(struct bch_fs *, unsigned);
-+
-+void bch2_fs_usage_to_text(struct printbuf *,
-+			   struct bch_fs *, struct bch_fs_usage *);
-+
-+u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage *);
-+
-+struct bch_fs_usage_short
-+bch2_fs_usage_read_short(struct bch_fs *);
-+
-+/* key/bucket marking: */
-+
-+void bch2_bucket_seq_cleanup(struct bch_fs *);
-+void bch2_fs_usage_initialize(struct bch_fs *);
-+
-+void bch2_invalidate_bucket(struct bch_fs *, struct bch_dev *,
-+			    size_t, struct bucket_mark *);
-+void bch2_mark_alloc_bucket(struct bch_fs *, struct bch_dev *,
-+			    size_t, bool, struct gc_pos, unsigned);
-+void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *,
-+			       size_t, enum bch_data_type, unsigned,
-+			       struct gc_pos, unsigned);
-+
-+int bch2_mark_key(struct bch_fs *, struct bkey_s_c, unsigned,
-+		  s64, struct bch_fs_usage *, u64, unsigned);
-+int bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *,
-+			struct disk_reservation *, unsigned);
-+
-+int bch2_mark_update(struct btree_trans *, struct btree_iter *,
-+		     struct bkey_i *, struct bch_fs_usage *, unsigned);
-+
-+int bch2_replicas_delta_list_apply(struct bch_fs *,
-+				   struct bch_fs_usage *,
-+				   struct replicas_delta_list *);
-+int bch2_trans_mark_key(struct btree_trans *, struct bkey_s_c,
-+			unsigned, s64, unsigned);
-+int bch2_trans_mark_update(struct btree_trans *, struct btree_iter *iter,
-+			   struct bkey_i *insert, unsigned);
-+void bch2_trans_fs_usage_apply(struct btree_trans *, struct bch_fs_usage *);
-+
-+/* disk reservations: */
-+
-+void __bch2_disk_reservation_put(struct bch_fs *, struct disk_reservation *);
-+
-+static inline void bch2_disk_reservation_put(struct bch_fs *c,
-+					     struct disk_reservation *res)
-+{
-+	if (res->sectors)
-+		__bch2_disk_reservation_put(c, res);
-+}
-+
-+#define BCH_DISK_RESERVATION_NOFAIL		(1 << 0)
-+
-+int bch2_disk_reservation_add(struct bch_fs *,
-+			     struct disk_reservation *,
-+			     unsigned, int);
-+
-+static inline struct disk_reservation
-+bch2_disk_reservation_init(struct bch_fs *c, unsigned nr_replicas)
-+{
-+	return (struct disk_reservation) {
-+		.sectors	= 0,
-+#if 0
-+		/* not used yet: */
-+		.gen		= c->capacity_gen,
-+#endif
-+		.nr_replicas	= nr_replicas,
-+	};
-+}
-+
-+static inline int bch2_disk_reservation_get(struct bch_fs *c,
-+					    struct disk_reservation *res,
-+					    unsigned sectors,
-+					    unsigned nr_replicas,
-+					    int flags)
-+{
-+	*res = bch2_disk_reservation_init(c, nr_replicas);
-+
-+	return bch2_disk_reservation_add(c, res, sectors * nr_replicas, flags);
-+}
-+
-+int bch2_dev_buckets_resize(struct bch_fs *, struct bch_dev *, u64);
-+void bch2_dev_buckets_free(struct bch_dev *);
-+int bch2_dev_buckets_alloc(struct bch_fs *, struct bch_dev *);
-+
-+#endif /* _BUCKETS_H */
-diff --git a/fs/bcachefs/buckets_types.h b/fs/bcachefs/buckets_types.h
-new file mode 100644
-index 000000000000..d5215b14d7d9
---- /dev/null
-+++ b/fs/bcachefs/buckets_types.h
-@@ -0,0 +1,135 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BUCKETS_TYPES_H
-+#define _BUCKETS_TYPES_H
-+
-+#include "bcachefs_format.h"
-+#include "util.h"
-+
-+#define BUCKET_JOURNAL_SEQ_BITS		16
-+
-+struct bucket_mark {
-+	union {
-+	atomic64_t	v;
-+
-+	struct {
-+	u8		gen;
-+	u8		data_type:3,
-+			owned_by_allocator:1,
-+			journal_seq_valid:1,
-+			stripe:1;
-+	u16		dirty_sectors;
-+	u16		cached_sectors;
-+
-+	/*
-+	 * low bits of journal sequence number when this bucket was most
-+	 * recently modified: if journal_seq_valid is set, this bucket can't be
-+	 * reused until the journal sequence number written to disk is >= the
-+	 * bucket's journal sequence number:
-+	 */
-+	u16		journal_seq;
-+	};
-+	};
-+};
-+
-+struct bucket {
-+	union {
-+		struct bucket_mark	_mark;
-+		const struct bucket_mark mark;
-+	};
-+
-+	u16				io_time[2];
-+	u8				oldest_gen;
-+	u8				gc_gen;
-+	unsigned			gen_valid:1;
-+};
-+
-+struct bucket_array {
-+	struct rcu_head		rcu;
-+	u16			first_bucket;
-+	size_t			nbuckets;
-+	struct bucket		b[];
-+};
-+
-+struct bch_dev_usage {
-+	u64			buckets[BCH_DATA_NR];
-+	u64			buckets_alloc;
-+	u64			buckets_unavailable;
-+
-+	/* _compressed_ sectors: */
-+	u64			sectors[BCH_DATA_NR];
-+	u64			sectors_fragmented;
-+
-+	u64			buckets_ec;
-+	u64			sectors_ec;
-+};
-+
-+struct bch_fs_usage {
-+	/* all fields are in units of 512 byte sectors: */
-+
-+	u64			online_reserved;
-+
-+	/* fields after online_reserved are cleared/recalculated by gc: */
-+	u64			gc_start[0];
-+
-+	u64			hidden;
-+	u64			btree;
-+	u64			data;
-+	u64			cached;
-+	u64			reserved;
-+	u64			nr_inodes;
-+
-+	/* XXX: add stats for compression ratio */
-+#if 0
-+	u64			uncompressed;
-+	u64			compressed;
-+#endif
-+
-+	/* broken out: */
-+
-+	u64			persistent_reserved[BCH_REPLICAS_MAX];
-+	u64			replicas[];
-+};
-+
-+struct bch_fs_usage_short {
-+	u64			capacity;
-+	u64			used;
-+	u64			free;
-+	u64			nr_inodes;
-+};
-+
-+struct replicas_delta {
-+	s64			delta;
-+	struct bch_replicas_entry r;
-+} __packed;
-+
-+struct replicas_delta_list {
-+	unsigned		size;
-+	unsigned		used;
-+
-+	struct			{} memset_start;
-+	u64			nr_inodes;
-+	u64			persistent_reserved[BCH_REPLICAS_MAX];
-+	struct			{} memset_end;
-+	struct replicas_delta	d[0];
-+};
-+
-+/*
-+ * A reservation for space on disk:
-+ */
-+struct disk_reservation {
-+	u64			sectors;
-+	u32			gen;
-+	unsigned		nr_replicas;
-+};
-+
-+struct copygc_heap_entry {
-+	u8			dev;
-+	u8			gen;
-+	u16			fragmentation;
-+	u32			sectors;
-+	u64			offset;
-+};
-+
-+typedef HEAP(struct copygc_heap_entry) copygc_heap;
-+
-+#endif /* _BUCKETS_TYPES_H */
-diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c
-new file mode 100644
-index 000000000000..0377f9018d27
---- /dev/null
-+++ b/fs/bcachefs/chardev.c
-@@ -0,0 +1,704 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#ifndef NO_BCACHEFS_CHARDEV
-+
-+#include "bcachefs.h"
-+#include "bcachefs_ioctl.h"
-+#include "buckets.h"
-+#include "chardev.h"
-+#include "move.h"
-+#include "replicas.h"
-+#include "super.h"
-+#include "super-io.h"
-+
-+#include <linux/anon_inodes.h>
-+#include <linux/cdev.h>
-+#include <linux/device.h>
-+#include <linux/file.h>
-+#include <linux/fs.h>
-+#include <linux/ioctl.h>
-+#include <linux/kthread.h>
-+#include <linux/major.h>
-+#include <linux/sched/task.h>
-+#include <linux/slab.h>
-+#include <linux/uaccess.h>
-+
-+/* returns with ref on ca->ref */
-+static struct bch_dev *bch2_device_lookup(struct bch_fs *c, u64 dev,
-+					  unsigned flags)
-+{
-+	struct bch_dev *ca;
-+
-+	if (flags & BCH_BY_INDEX) {
-+		if (dev >= c->sb.nr_devices)
-+			return ERR_PTR(-EINVAL);
-+
-+		rcu_read_lock();
-+		ca = rcu_dereference(c->devs[dev]);
-+		if (ca)
-+			percpu_ref_get(&ca->ref);
-+		rcu_read_unlock();
-+
-+		if (!ca)
-+			return ERR_PTR(-EINVAL);
-+	} else {
-+		char *path;
-+
-+		path = strndup_user((const char __user *)
-+				    (unsigned long) dev, PATH_MAX);
-+		if (IS_ERR(path))
-+			return ERR_CAST(path);
-+
-+		ca = bch2_dev_lookup(c, path);
-+		kfree(path);
-+	}
-+
-+	return ca;
-+}
-+
-+#if 0
-+static long bch2_ioctl_assemble(struct bch_ioctl_assemble __user *user_arg)
-+{
-+	struct bch_ioctl_assemble arg;
-+	struct bch_fs *c;
-+	u64 *user_devs = NULL;
-+	char **devs = NULL;
-+	unsigned i;
-+	int ret = -EFAULT;
-+
-+	if (copy_from_user(&arg, user_arg, sizeof(arg)))
-+		return -EFAULT;
-+
-+	if (arg.flags || arg.pad)
-+		return -EINVAL;
-+
-+	user_devs = kmalloc_array(arg.nr_devs, sizeof(u64), GFP_KERNEL);
-+	if (!user_devs)
-+		return -ENOMEM;
-+
-+	devs = kcalloc(arg.nr_devs, sizeof(char *), GFP_KERNEL);
-+
-+	if (copy_from_user(user_devs, user_arg->devs,
-+			   sizeof(u64) * arg.nr_devs))
-+		goto err;
-+
-+	for (i = 0; i < arg.nr_devs; i++) {
-+		devs[i] = strndup_user((const char __user *)(unsigned long)
-+				       user_devs[i],
-+				       PATH_MAX);
-+		if (!devs[i]) {
-+			ret = -ENOMEM;
-+			goto err;
-+		}
-+	}
-+
-+	c = bch2_fs_open(devs, arg.nr_devs, bch2_opts_empty());
-+	ret = PTR_ERR_OR_ZERO(c);
-+	if (!ret)
-+		closure_put(&c->cl);
-+err:
-+	if (devs)
-+		for (i = 0; i < arg.nr_devs; i++)
-+			kfree(devs[i]);
-+	kfree(devs);
-+	return ret;
-+}
-+
-+static long bch2_ioctl_incremental(struct bch_ioctl_incremental __user *user_arg)
-+{
-+	struct bch_ioctl_incremental arg;
-+	const char *err;
-+	char *path;
-+
-+	if (copy_from_user(&arg, user_arg, sizeof(arg)))
-+		return -EFAULT;
-+
-+	if (arg.flags || arg.pad)
-+		return -EINVAL;
-+
-+	path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX);
-+	if (!path)
-+		return -ENOMEM;
-+
-+	err = bch2_fs_open_incremental(path);
-+	kfree(path);
-+
-+	if (err) {
-+		pr_err("Could not register bcachefs devices: %s", err);
-+		return -EINVAL;
-+	}
-+
-+	return 0;
-+}
-+#endif
-+
-+static long bch2_global_ioctl(unsigned cmd, void __user *arg)
-+{
-+	switch (cmd) {
-+#if 0
-+	case BCH_IOCTL_ASSEMBLE:
-+		return bch2_ioctl_assemble(arg);
-+	case BCH_IOCTL_INCREMENTAL:
-+		return bch2_ioctl_incremental(arg);
-+#endif
-+	default:
-+		return -ENOTTY;
-+	}
-+}
-+
-+static long bch2_ioctl_query_uuid(struct bch_fs *c,
-+			struct bch_ioctl_query_uuid __user *user_arg)
-+{
-+	return copy_to_user(&user_arg->uuid,
-+			    &c->sb.user_uuid,
-+			    sizeof(c->sb.user_uuid));
-+}
-+
-+#if 0
-+static long bch2_ioctl_start(struct bch_fs *c, struct bch_ioctl_start arg)
-+{
-+	if (arg.flags || arg.pad)
-+		return -EINVAL;
-+
-+	return bch2_fs_start(c);
-+}
-+
-+static long bch2_ioctl_stop(struct bch_fs *c)
-+{
-+	bch2_fs_stop(c);
-+	return 0;
-+}
-+#endif
-+
-+static long bch2_ioctl_disk_add(struct bch_fs *c, struct bch_ioctl_disk arg)
-+{
-+	char *path;
-+	int ret;
-+
-+	if (arg.flags || arg.pad)
-+		return -EINVAL;
-+
-+	path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX);
-+	if (!path)
-+		return -ENOMEM;
-+
-+	ret = bch2_dev_add(c, path);
-+	kfree(path);
-+
-+	return ret;
-+}
-+
-+static long bch2_ioctl_disk_remove(struct bch_fs *c, struct bch_ioctl_disk arg)
-+{
-+	struct bch_dev *ca;
-+
-+	if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST|
-+			   BCH_FORCE_IF_METADATA_LOST|
-+			   BCH_FORCE_IF_DEGRADED|
-+			   BCH_BY_INDEX)) ||
-+	    arg.pad)
-+		return -EINVAL;
-+
-+	ca = bch2_device_lookup(c, arg.dev, arg.flags);
-+	if (IS_ERR(ca))
-+		return PTR_ERR(ca);
-+
-+	return bch2_dev_remove(c, ca, arg.flags);
-+}
-+
-+static long bch2_ioctl_disk_online(struct bch_fs *c, struct bch_ioctl_disk arg)
-+{
-+	char *path;
-+	int ret;
-+
-+	if (arg.flags || arg.pad)
-+		return -EINVAL;
-+
-+	path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX);
-+	if (!path)
-+		return -ENOMEM;
-+
-+	ret = bch2_dev_online(c, path);
-+	kfree(path);
-+	return ret;
-+}
-+
-+static long bch2_ioctl_disk_offline(struct bch_fs *c, struct bch_ioctl_disk arg)
-+{
-+	struct bch_dev *ca;
-+	int ret;
-+
-+	if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST|
-+			   BCH_FORCE_IF_METADATA_LOST|
-+			   BCH_FORCE_IF_DEGRADED|
-+			   BCH_BY_INDEX)) ||
-+	    arg.pad)
-+		return -EINVAL;
-+
-+	ca = bch2_device_lookup(c, arg.dev, arg.flags);
-+	if (IS_ERR(ca))
-+		return PTR_ERR(ca);
-+
-+	ret = bch2_dev_offline(c, ca, arg.flags);
-+	percpu_ref_put(&ca->ref);
-+	return ret;
-+}
-+
-+static long bch2_ioctl_disk_set_state(struct bch_fs *c,
-+			struct bch_ioctl_disk_set_state arg)
-+{
-+	struct bch_dev *ca;
-+	int ret;
-+
-+	if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST|
-+			   BCH_FORCE_IF_METADATA_LOST|
-+			   BCH_FORCE_IF_DEGRADED|
-+			   BCH_BY_INDEX)) ||
-+	    arg.pad[0] || arg.pad[1] || arg.pad[2])
-+		return -EINVAL;
-+
-+	ca = bch2_device_lookup(c, arg.dev, arg.flags);
-+	if (IS_ERR(ca))
-+		return PTR_ERR(ca);
-+
-+	ret = bch2_dev_set_state(c, ca, arg.new_state, arg.flags);
-+
-+	percpu_ref_put(&ca->ref);
-+	return ret;
-+}
-+
-+struct bch_data_ctx {
-+	struct bch_fs			*c;
-+	struct bch_ioctl_data		arg;
-+	struct bch_move_stats		stats;
-+
-+	int				ret;
-+
-+	struct task_struct		*thread;
-+};
-+
-+static int bch2_data_thread(void *arg)
-+{
-+	struct bch_data_ctx *ctx = arg;
-+
-+	ctx->ret = bch2_data_job(ctx->c, &ctx->stats, ctx->arg);
-+
-+	ctx->stats.data_type = U8_MAX;
-+	return 0;
-+}
-+
-+static int bch2_data_job_release(struct inode *inode, struct file *file)
-+{
-+	struct bch_data_ctx *ctx = file->private_data;
-+
-+	kthread_stop(ctx->thread);
-+	put_task_struct(ctx->thread);
-+	kfree(ctx);
-+	return 0;
-+}
-+
-+static ssize_t bch2_data_job_read(struct file *file, char __user *buf,
-+				  size_t len, loff_t *ppos)
-+{
-+	struct bch_data_ctx *ctx = file->private_data;
-+	struct bch_fs *c = ctx->c;
-+	struct bch_ioctl_data_event e = {
-+		.type			= BCH_DATA_EVENT_PROGRESS,
-+		.p.data_type		= ctx->stats.data_type,
-+		.p.btree_id		= ctx->stats.btree_id,
-+		.p.pos			= ctx->stats.pos,
-+		.p.sectors_done		= atomic64_read(&ctx->stats.sectors_seen),
-+		.p.sectors_total	= bch2_fs_usage_read_short(c).used,
-+	};
-+
-+	if (len < sizeof(e))
-+		return -EINVAL;
-+
-+	return copy_to_user(buf, &e, sizeof(e)) ?: sizeof(e);
-+}
-+
-+static const struct file_operations bcachefs_data_ops = {
-+	.release	= bch2_data_job_release,
-+	.read		= bch2_data_job_read,
-+	.llseek		= no_llseek,
-+};
-+
-+static long bch2_ioctl_data(struct bch_fs *c,
-+			    struct bch_ioctl_data arg)
-+{
-+	struct bch_data_ctx *ctx = NULL;
-+	struct file *file = NULL;
-+	unsigned flags = O_RDONLY|O_CLOEXEC|O_NONBLOCK;
-+	int ret, fd = -1;
-+
-+	if (arg.op >= BCH_DATA_OP_NR || arg.flags)
-+		return -EINVAL;
-+
-+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
-+	if (!ctx)
-+		return -ENOMEM;
-+
-+	ctx->c = c;
-+	ctx->arg = arg;
-+
-+	ctx->thread = kthread_create(bch2_data_thread, ctx, "[bcachefs]");
-+	if (IS_ERR(ctx->thread)) {
-+		ret = PTR_ERR(ctx->thread);
-+		goto err;
-+	}
-+
-+	ret = get_unused_fd_flags(flags);
-+	if (ret < 0)
-+		goto err;
-+	fd = ret;
-+
-+	file = anon_inode_getfile("[bcachefs]", &bcachefs_data_ops, ctx, flags);
-+	if (IS_ERR(file)) {
-+		ret = PTR_ERR(file);
-+		goto err;
-+	}
-+
-+	fd_install(fd, file);
-+
-+	get_task_struct(ctx->thread);
-+	wake_up_process(ctx->thread);
-+
-+	return fd;
-+err:
-+	if (fd >= 0)
-+		put_unused_fd(fd);
-+	if (!IS_ERR_OR_NULL(ctx->thread))
-+		kthread_stop(ctx->thread);
-+	kfree(ctx);
-+	return ret;
-+}
-+
-+static long bch2_ioctl_fs_usage(struct bch_fs *c,
-+				struct bch_ioctl_fs_usage __user *user_arg)
-+{
-+	struct bch_ioctl_fs_usage *arg = NULL;
-+	struct bch_replicas_usage *dst_e, *dst_end;
-+	struct bch_fs_usage *src;
-+	u32 replica_entries_bytes;
-+	unsigned i;
-+	int ret = 0;
-+
-+	if (!test_bit(BCH_FS_STARTED, &c->flags))
-+		return -EINVAL;
-+
-+	if (get_user(replica_entries_bytes, &user_arg->replica_entries_bytes))
-+		return -EFAULT;
-+
-+	arg = kzalloc(sizeof(*arg) + replica_entries_bytes, GFP_KERNEL);
-+	if (!arg)
-+		return -ENOMEM;
-+
-+	src = bch2_fs_usage_read(c);
-+	if (!src) {
-+		ret = -ENOMEM;
-+		goto err;
-+	}
-+
-+	arg->capacity		= c->capacity;
-+	arg->used		= bch2_fs_sectors_used(c, src);
-+	arg->online_reserved	= src->online_reserved;
-+
-+	for (i = 0; i < BCH_REPLICAS_MAX; i++)
-+		arg->persistent_reserved[i] = src->persistent_reserved[i];
-+
-+	dst_e	= arg->replicas;
-+	dst_end = (void *) arg->replicas + replica_entries_bytes;
-+
-+	for (i = 0; i < c->replicas.nr; i++) {
-+		struct bch_replicas_entry *src_e =
-+			cpu_replicas_entry(&c->replicas, i);
-+
-+		if (replicas_usage_next(dst_e) > dst_end) {
-+			ret = -ERANGE;
-+			break;
-+		}
-+
-+		dst_e->sectors		= src->replicas[i];
-+		dst_e->r		= *src_e;
-+
-+		/* recheck after setting nr_devs: */
-+		if (replicas_usage_next(dst_e) > dst_end) {
-+			ret = -ERANGE;
-+			break;
-+		}
-+
-+		memcpy(dst_e->r.devs, src_e->devs, src_e->nr_devs);
-+
-+		dst_e = replicas_usage_next(dst_e);
-+	}
-+
-+	arg->replica_entries_bytes = (void *) dst_e - (void *) arg->replicas;
-+
-+	percpu_up_read(&c->mark_lock);
-+	kfree(src);
-+
-+	if (!ret)
-+		ret = copy_to_user(user_arg, arg,
-+			sizeof(*arg) + arg->replica_entries_bytes);
-+err:
-+	kfree(arg);
-+	return ret;
-+}
-+
-+static long bch2_ioctl_dev_usage(struct bch_fs *c,
-+				 struct bch_ioctl_dev_usage __user *user_arg)
-+{
-+	struct bch_ioctl_dev_usage arg;
-+	struct bch_dev_usage src;
-+	struct bch_dev *ca;
-+	unsigned i;
-+
-+	if (!test_bit(BCH_FS_STARTED, &c->flags))
-+		return -EINVAL;
-+
-+	if (copy_from_user(&arg, user_arg, sizeof(arg)))
-+		return -EFAULT;
-+
-+	if ((arg.flags & ~BCH_BY_INDEX) ||
-+	    arg.pad[0] ||
-+	    arg.pad[1] ||
-+	    arg.pad[2])
-+		return -EINVAL;
-+
-+	ca = bch2_device_lookup(c, arg.dev, arg.flags);
-+	if (IS_ERR(ca))
-+		return PTR_ERR(ca);
-+
-+	src = bch2_dev_usage_read(ca);
-+
-+	arg.state		= ca->mi.state;
-+	arg.bucket_size		= ca->mi.bucket_size;
-+	arg.nr_buckets		= ca->mi.nbuckets - ca->mi.first_bucket;
-+	arg.available_buckets	= arg.nr_buckets - src.buckets_unavailable;
-+	arg.ec_buckets		= src.buckets_ec;
-+	arg.ec_sectors		= src.sectors_ec;
-+
-+	for (i = 0; i < BCH_DATA_NR; i++) {
-+		arg.buckets[i] = src.buckets[i];
-+		arg.sectors[i] = src.sectors[i];
-+	}
-+
-+	percpu_ref_put(&ca->ref);
-+
-+	return copy_to_user(user_arg, &arg, sizeof(arg));
-+}
-+
-+static long bch2_ioctl_read_super(struct bch_fs *c,
-+				  struct bch_ioctl_read_super arg)
-+{
-+	struct bch_dev *ca = NULL;
-+	struct bch_sb *sb;
-+	int ret = 0;
-+
-+	if ((arg.flags & ~(BCH_BY_INDEX|BCH_READ_DEV)) ||
-+	    arg.pad)
-+		return -EINVAL;
-+
-+	mutex_lock(&c->sb_lock);
-+
-+	if (arg.flags & BCH_READ_DEV) {
-+		ca = bch2_device_lookup(c, arg.dev, arg.flags);
-+
-+		if (IS_ERR(ca)) {
-+			ret = PTR_ERR(ca);
-+			goto err;
-+		}
-+
-+		sb = ca->disk_sb.sb;
-+	} else {
-+		sb = c->disk_sb.sb;
-+	}
-+
-+	if (vstruct_bytes(sb) > arg.size) {
-+		ret = -ERANGE;
-+		goto err;
-+	}
-+
-+	ret = copy_to_user((void __user *)(unsigned long)arg.sb,
-+			   sb, vstruct_bytes(sb));
-+err:
-+	if (ca)
-+		percpu_ref_put(&ca->ref);
-+	mutex_unlock(&c->sb_lock);
-+	return ret;
-+}
-+
-+static long bch2_ioctl_disk_get_idx(struct bch_fs *c,
-+				    struct bch_ioctl_disk_get_idx arg)
-+{
-+	dev_t dev = huge_decode_dev(arg.dev);
-+	struct bch_dev *ca;
-+	unsigned i;
-+
-+	for_each_online_member(ca, c, i)
-+		if (ca->disk_sb.bdev->bd_dev == dev) {
-+			percpu_ref_put(&ca->io_ref);
-+			return i;
-+		}
-+
-+	return -ENOENT;
-+}
-+
-+static long bch2_ioctl_disk_resize(struct bch_fs *c,
-+				   struct bch_ioctl_disk_resize arg)
-+{
-+	struct bch_dev *ca;
-+	int ret;
-+
-+	if ((arg.flags & ~BCH_BY_INDEX) ||
-+	    arg.pad)
-+		return -EINVAL;
-+
-+	ca = bch2_device_lookup(c, arg.dev, arg.flags);
-+	if (IS_ERR(ca))
-+		return PTR_ERR(ca);
-+
-+	ret = bch2_dev_resize(c, ca, arg.nbuckets);
-+
-+	percpu_ref_put(&ca->ref);
-+	return ret;
-+}
-+
-+#define BCH_IOCTL(_name, _argtype)					\
-+do {									\
-+	_argtype i;							\
-+									\
-+	if (copy_from_user(&i, arg, sizeof(i)))				\
-+		return -EFAULT;						\
-+	return bch2_ioctl_##_name(c, i);				\
-+} while (0)
-+
-+long bch2_fs_ioctl(struct bch_fs *c, unsigned cmd, void __user *arg)
-+{
-+	/* ioctls that don't require admin cap: */
-+	switch (cmd) {
-+	case BCH_IOCTL_QUERY_UUID:
-+		return bch2_ioctl_query_uuid(c, arg);
-+	case BCH_IOCTL_FS_USAGE:
-+		return bch2_ioctl_fs_usage(c, arg);
-+	case BCH_IOCTL_DEV_USAGE:
-+		return bch2_ioctl_dev_usage(c, arg);
-+	}
-+
-+	if (!capable(CAP_SYS_ADMIN))
-+		return -EPERM;
-+
-+	switch (cmd) {
-+#if 0
-+	case BCH_IOCTL_START:
-+		BCH_IOCTL(start, struct bch_ioctl_start);
-+	case BCH_IOCTL_STOP:
-+		return bch2_ioctl_stop(c);
-+#endif
-+	case BCH_IOCTL_READ_SUPER:
-+		BCH_IOCTL(read_super, struct bch_ioctl_read_super);
-+	case BCH_IOCTL_DISK_GET_IDX:
-+		BCH_IOCTL(disk_get_idx, struct bch_ioctl_disk_get_idx);
-+	}
-+
-+	if (!test_bit(BCH_FS_STARTED, &c->flags))
-+		return -EINVAL;
-+
-+	/* ioctls that do require admin cap: */
-+	switch (cmd) {
-+	case BCH_IOCTL_DISK_ADD:
-+		BCH_IOCTL(disk_add, struct bch_ioctl_disk);
-+	case BCH_IOCTL_DISK_REMOVE:
-+		BCH_IOCTL(disk_remove, struct bch_ioctl_disk);
-+	case BCH_IOCTL_DISK_ONLINE:
-+		BCH_IOCTL(disk_online, struct bch_ioctl_disk);
-+	case BCH_IOCTL_DISK_OFFLINE:
-+		BCH_IOCTL(disk_offline, struct bch_ioctl_disk);
-+	case BCH_IOCTL_DISK_SET_STATE:
-+		BCH_IOCTL(disk_set_state, struct bch_ioctl_disk_set_state);
-+	case BCH_IOCTL_DATA:
-+		BCH_IOCTL(data, struct bch_ioctl_data);
-+	case BCH_IOCTL_DISK_RESIZE:
-+		BCH_IOCTL(disk_resize, struct bch_ioctl_disk_resize);
-+
-+	default:
-+		return -ENOTTY;
-+	}
-+}
-+
-+static DEFINE_IDR(bch_chardev_minor);
-+
-+static long bch2_chardev_ioctl(struct file *filp, unsigned cmd, unsigned long v)
-+{
-+	unsigned minor = iminor(file_inode(filp));
-+	struct bch_fs *c = minor < U8_MAX ? idr_find(&bch_chardev_minor, minor) : NULL;
-+	void __user *arg = (void __user *) v;
-+
-+	return c
-+		? bch2_fs_ioctl(c, cmd, arg)
-+		: bch2_global_ioctl(cmd, arg);
-+}
-+
-+static const struct file_operations bch_chardev_fops = {
-+	.owner		= THIS_MODULE,
-+	.unlocked_ioctl = bch2_chardev_ioctl,
-+	.open		= nonseekable_open,
-+};
-+
-+static int bch_chardev_major;
-+static struct class *bch_chardev_class;
-+static struct device *bch_chardev;
-+
-+void bch2_fs_chardev_exit(struct bch_fs *c)
-+{
-+	if (!IS_ERR_OR_NULL(c->chardev))
-+		device_unregister(c->chardev);
-+	if (c->minor >= 0)
-+		idr_remove(&bch_chardev_minor, c->minor);
-+}
-+
-+int bch2_fs_chardev_init(struct bch_fs *c)
-+{
-+	c->minor = idr_alloc(&bch_chardev_minor, c, 0, 0, GFP_KERNEL);
-+	if (c->minor < 0)
-+		return c->minor;
-+
-+	c->chardev = device_create(bch_chardev_class, NULL,
-+				   MKDEV(bch_chardev_major, c->minor), c,
-+				   "bcachefs%u-ctl", c->minor);
-+	if (IS_ERR(c->chardev))
-+		return PTR_ERR(c->chardev);
-+
-+	return 0;
-+}
-+
-+void bch2_chardev_exit(void)
-+{
-+	if (!IS_ERR_OR_NULL(bch_chardev_class))
-+		device_destroy(bch_chardev_class,
-+			       MKDEV(bch_chardev_major, U8_MAX));
-+	if (!IS_ERR_OR_NULL(bch_chardev_class))
-+		class_destroy(bch_chardev_class);
-+	if (bch_chardev_major > 0)
-+		unregister_chrdev(bch_chardev_major, "bcachefs");
-+}
-+
-+int __init bch2_chardev_init(void)
-+{
-+	bch_chardev_major = register_chrdev(0, "bcachefs-ctl", &bch_chardev_fops);
-+	if (bch_chardev_major < 0)
-+		return bch_chardev_major;
-+
-+	bch_chardev_class = class_create(THIS_MODULE, "bcachefs");
-+	if (IS_ERR(bch_chardev_class))
-+		return PTR_ERR(bch_chardev_class);
-+
-+	bch_chardev = device_create(bch_chardev_class, NULL,
-+				    MKDEV(bch_chardev_major, U8_MAX),
-+				    NULL, "bcachefs-ctl");
-+	if (IS_ERR(bch_chardev))
-+		return PTR_ERR(bch_chardev);
-+
-+	return 0;
-+}
-+
-+#endif /* NO_BCACHEFS_CHARDEV */
-diff --git a/fs/bcachefs/chardev.h b/fs/bcachefs/chardev.h
-new file mode 100644
-index 000000000000..3a4890d39ff9
---- /dev/null
-+++ b/fs/bcachefs/chardev.h
-@@ -0,0 +1,31 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_CHARDEV_H
-+#define _BCACHEFS_CHARDEV_H
-+
-+#ifndef NO_BCACHEFS_FS
-+
-+long bch2_fs_ioctl(struct bch_fs *, unsigned, void __user *);
-+
-+void bch2_fs_chardev_exit(struct bch_fs *);
-+int bch2_fs_chardev_init(struct bch_fs *);
-+
-+void bch2_chardev_exit(void);
-+int __init bch2_chardev_init(void);
-+
-+#else
-+
-+static inline long bch2_fs_ioctl(struct bch_fs *c,
-+				unsigned cmd, void __user * arg)
-+{
-+	return -ENOSYS;
-+}
-+
-+static inline void bch2_fs_chardev_exit(struct bch_fs *c) {}
-+static inline int bch2_fs_chardev_init(struct bch_fs *c) { return 0; }
-+
-+static inline void bch2_chardev_exit(void) {}
-+static inline int __init bch2_chardev_init(void) { return 0; }
-+
-+#endif /* NO_BCACHEFS_FS */
-+
-+#endif /* _BCACHEFS_CHARDEV_H */
-diff --git a/fs/bcachefs/checksum.c b/fs/bcachefs/checksum.c
-new file mode 100644
-index 000000000000..3d88719ba86c
---- /dev/null
-+++ b/fs/bcachefs/checksum.c
-@@ -0,0 +1,618 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "checksum.h"
-+#include "super.h"
-+#include "super-io.h"
-+
-+#include <linux/crc32c.h>
-+#include <linux/crypto.h>
-+#include <linux/key.h>
-+#include <linux/random.h>
-+#include <linux/scatterlist.h>
-+#include <crypto/algapi.h>
-+#include <crypto/chacha.h>
-+#include <crypto/hash.h>
-+#include <crypto/poly1305.h>
-+#include <crypto/skcipher.h>
-+#include <keys/user-type.h>
-+
-+static u64 bch2_checksum_init(unsigned type)
-+{
-+	switch (type) {
-+	case BCH_CSUM_NONE:
-+		return 0;
-+	case BCH_CSUM_CRC32C_NONZERO:
-+		return U32_MAX;
-+	case BCH_CSUM_CRC64_NONZERO:
-+		return U64_MAX;
-+	case BCH_CSUM_CRC32C:
-+		return 0;
-+	case BCH_CSUM_CRC64:
-+		return 0;
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static u64 bch2_checksum_final(unsigned type, u64 crc)
-+{
-+	switch (type) {
-+	case BCH_CSUM_NONE:
-+		return 0;
-+	case BCH_CSUM_CRC32C_NONZERO:
-+		return crc ^ U32_MAX;
-+	case BCH_CSUM_CRC64_NONZERO:
-+		return crc ^ U64_MAX;
-+	case BCH_CSUM_CRC32C:
-+		return crc;
-+	case BCH_CSUM_CRC64:
-+		return crc;
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static u64 bch2_checksum_update(unsigned type, u64 crc, const void *data, size_t len)
-+{
-+	switch (type) {
-+	case BCH_CSUM_NONE:
-+		return 0;
-+	case BCH_CSUM_CRC32C_NONZERO:
-+	case BCH_CSUM_CRC32C:
-+		return crc32c(crc, data, len);
-+	case BCH_CSUM_CRC64_NONZERO:
-+	case BCH_CSUM_CRC64:
-+		return crc64_be(crc, data, len);
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static inline void do_encrypt_sg(struct crypto_sync_skcipher *tfm,
-+				 struct nonce nonce,
-+				 struct scatterlist *sg, size_t len)
-+{
-+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm);
-+	int ret;
-+
-+	skcipher_request_set_sync_tfm(req, tfm);
-+	skcipher_request_set_crypt(req, sg, sg, len, nonce.d);
-+
-+	ret = crypto_skcipher_encrypt(req);
-+	BUG_ON(ret);
-+}
-+
-+static inline void do_encrypt(struct crypto_sync_skcipher *tfm,
-+			      struct nonce nonce,
-+			      void *buf, size_t len)
-+{
-+	struct scatterlist sg;
-+
-+	sg_init_one(&sg, buf, len);
-+	do_encrypt_sg(tfm, nonce, &sg, len);
-+}
-+
-+int bch2_chacha_encrypt_key(struct bch_key *key, struct nonce nonce,
-+			    void *buf, size_t len)
-+{
-+	struct crypto_sync_skcipher *chacha20 =
-+		crypto_alloc_sync_skcipher("chacha20", 0, 0);
-+	int ret;
-+
-+	if (!chacha20) {
-+		pr_err("error requesting chacha20 module: %li", PTR_ERR(chacha20));
-+		return PTR_ERR(chacha20);
-+	}
-+
-+	ret = crypto_skcipher_setkey(&chacha20->base,
-+				     (void *) key, sizeof(*key));
-+	if (ret) {
-+		pr_err("crypto_skcipher_setkey() error: %i", ret);
-+		goto err;
-+	}
-+
-+	do_encrypt(chacha20, nonce, buf, len);
-+err:
-+	crypto_free_sync_skcipher(chacha20);
-+	return ret;
-+}
-+
-+static void gen_poly_key(struct bch_fs *c, struct shash_desc *desc,
-+			 struct nonce nonce)
-+{
-+	u8 key[POLY1305_KEY_SIZE];
-+
-+	nonce.d[3] ^= BCH_NONCE_POLY;
-+
-+	memset(key, 0, sizeof(key));
-+	do_encrypt(c->chacha20, nonce, key, sizeof(key));
-+
-+	desc->tfm = c->poly1305;
-+	crypto_shash_init(desc);
-+	crypto_shash_update(desc, key, sizeof(key));
-+}
-+
-+struct bch_csum bch2_checksum(struct bch_fs *c, unsigned type,
-+			      struct nonce nonce, const void *data, size_t len)
-+{
-+	switch (type) {
-+	case BCH_CSUM_NONE:
-+	case BCH_CSUM_CRC32C_NONZERO:
-+	case BCH_CSUM_CRC64_NONZERO:
-+	case BCH_CSUM_CRC32C:
-+	case BCH_CSUM_CRC64: {
-+		u64 crc = bch2_checksum_init(type);
-+
-+		crc = bch2_checksum_update(type, crc, data, len);
-+		crc = bch2_checksum_final(type, crc);
-+
-+		return (struct bch_csum) { .lo = cpu_to_le64(crc) };
-+	}
-+
-+	case BCH_CSUM_CHACHA20_POLY1305_80:
-+	case BCH_CSUM_CHACHA20_POLY1305_128: {
-+		SHASH_DESC_ON_STACK(desc, c->poly1305);
-+		u8 digest[POLY1305_DIGEST_SIZE];
-+		struct bch_csum ret = { 0 };
-+
-+		gen_poly_key(c, desc, nonce);
-+
-+		crypto_shash_update(desc, data, len);
-+		crypto_shash_final(desc, digest);
-+
-+		memcpy(&ret, digest, bch_crc_bytes[type]);
-+		return ret;
-+	}
-+	default:
-+		BUG();
-+	}
-+}
-+
-+void bch2_encrypt(struct bch_fs *c, unsigned type,
-+		  struct nonce nonce, void *data, size_t len)
-+{
-+	if (!bch2_csum_type_is_encryption(type))
-+		return;
-+
-+	do_encrypt(c->chacha20, nonce, data, len);
-+}
-+
-+static struct bch_csum __bch2_checksum_bio(struct bch_fs *c, unsigned type,
-+					   struct nonce nonce, struct bio *bio,
-+					   struct bvec_iter *iter)
-+{
-+	struct bio_vec bv;
-+
-+	switch (type) {
-+	case BCH_CSUM_NONE:
-+		return (struct bch_csum) { 0 };
-+	case BCH_CSUM_CRC32C_NONZERO:
-+	case BCH_CSUM_CRC64_NONZERO:
-+	case BCH_CSUM_CRC32C:
-+	case BCH_CSUM_CRC64: {
-+		u64 crc = bch2_checksum_init(type);
-+
-+#ifdef CONFIG_HIGHMEM
-+		__bio_for_each_segment(bv, bio, *iter, *iter) {
-+			void *p = kmap_atomic(bv.bv_page) + bv.bv_offset;
-+			crc = bch2_checksum_update(type,
-+				crc, p, bv.bv_len);
-+			kunmap_atomic(p);
-+		}
-+#else
-+		__bio_for_each_bvec(bv, bio, *iter, *iter)
-+			crc = bch2_checksum_update(type, crc,
-+				page_address(bv.bv_page) + bv.bv_offset,
-+				bv.bv_len);
-+#endif
-+		crc = bch2_checksum_final(type, crc);
-+		return (struct bch_csum) { .lo = cpu_to_le64(crc) };
-+	}
-+
-+	case BCH_CSUM_CHACHA20_POLY1305_80:
-+	case BCH_CSUM_CHACHA20_POLY1305_128: {
-+		SHASH_DESC_ON_STACK(desc, c->poly1305);
-+		u8 digest[POLY1305_DIGEST_SIZE];
-+		struct bch_csum ret = { 0 };
-+
-+		gen_poly_key(c, desc, nonce);
-+
-+#ifdef CONFIG_HIGHMEM
-+		__bio_for_each_segment(bv, bio, *iter, *iter) {
-+			void *p = kmap_atomic(bv.bv_page) + bv.bv_offset;
-+
-+			crypto_shash_update(desc, p, bv.bv_len);
-+			kunmap_atomic(p);
-+		}
-+#else
-+		__bio_for_each_bvec(bv, bio, *iter, *iter)
-+			crypto_shash_update(desc,
-+				page_address(bv.bv_page) + bv.bv_offset,
-+				bv.bv_len);
-+#endif
-+		crypto_shash_final(desc, digest);
-+
-+		memcpy(&ret, digest, bch_crc_bytes[type]);
-+		return ret;
-+	}
-+	default:
-+		BUG();
-+	}
-+}
-+
-+struct bch_csum bch2_checksum_bio(struct bch_fs *c, unsigned type,
-+				  struct nonce nonce, struct bio *bio)
-+{
-+	struct bvec_iter iter = bio->bi_iter;
-+
-+	return __bch2_checksum_bio(c, type, nonce, bio, &iter);
-+}
-+
-+void bch2_encrypt_bio(struct bch_fs *c, unsigned type,
-+		      struct nonce nonce, struct bio *bio)
-+{
-+	struct bio_vec bv;
-+	struct bvec_iter iter;
-+	struct scatterlist sgl[16], *sg = sgl;
-+	size_t bytes = 0;
-+
-+	if (!bch2_csum_type_is_encryption(type))
-+		return;
-+
-+	sg_init_table(sgl, ARRAY_SIZE(sgl));
-+
-+	bio_for_each_segment(bv, bio, iter) {
-+		if (sg == sgl + ARRAY_SIZE(sgl)) {
-+			sg_mark_end(sg - 1);
-+			do_encrypt_sg(c->chacha20, nonce, sgl, bytes);
-+
-+			nonce = nonce_add(nonce, bytes);
-+			bytes = 0;
-+
-+			sg_init_table(sgl, ARRAY_SIZE(sgl));
-+			sg = sgl;
-+		}
-+
-+		sg_set_page(sg++, bv.bv_page, bv.bv_len, bv.bv_offset);
-+		bytes += bv.bv_len;
-+	}
-+
-+	sg_mark_end(sg - 1);
-+	do_encrypt_sg(c->chacha20, nonce, sgl, bytes);
-+}
-+
-+struct bch_csum bch2_checksum_merge(unsigned type, struct bch_csum a,
-+				    struct bch_csum b, size_t b_len)
-+{
-+	BUG_ON(!bch2_checksum_mergeable(type));
-+
-+	while (b_len) {
-+		unsigned b = min_t(unsigned, b_len, PAGE_SIZE);
-+
-+		a.lo = bch2_checksum_update(type, a.lo,
-+				page_address(ZERO_PAGE(0)), b);
-+		b_len -= b;
-+	}
-+
-+	a.lo ^= b.lo;
-+	a.hi ^= b.hi;
-+	return a;
-+}
-+
-+int bch2_rechecksum_bio(struct bch_fs *c, struct bio *bio,
-+			struct bversion version,
-+			struct bch_extent_crc_unpacked crc_old,
-+			struct bch_extent_crc_unpacked *crc_a,
-+			struct bch_extent_crc_unpacked *crc_b,
-+			unsigned len_a, unsigned len_b,
-+			unsigned new_csum_type)
-+{
-+	struct bvec_iter iter = bio->bi_iter;
-+	struct nonce nonce = extent_nonce(version, crc_old);
-+	struct bch_csum merged = { 0 };
-+	struct crc_split {
-+		struct bch_extent_crc_unpacked	*crc;
-+		unsigned			len;
-+		unsigned			csum_type;
-+		struct bch_csum			csum;
-+	} splits[3] = {
-+		{ crc_a, len_a, new_csum_type },
-+		{ crc_b, len_b, new_csum_type },
-+		{ NULL,	 bio_sectors(bio) - len_a - len_b, new_csum_type },
-+	}, *i;
-+	bool mergeable = crc_old.csum_type == new_csum_type &&
-+		bch2_checksum_mergeable(new_csum_type);
-+	unsigned crc_nonce = crc_old.nonce;
-+
-+	BUG_ON(len_a + len_b > bio_sectors(bio));
-+	BUG_ON(crc_old.uncompressed_size != bio_sectors(bio));
-+	BUG_ON(crc_is_compressed(crc_old));
-+	BUG_ON(bch2_csum_type_is_encryption(crc_old.csum_type) !=
-+	       bch2_csum_type_is_encryption(new_csum_type));
-+
-+	for (i = splits; i < splits + ARRAY_SIZE(splits); i++) {
-+		iter.bi_size = i->len << 9;
-+		if (mergeable || i->crc)
-+			i->csum = __bch2_checksum_bio(c, i->csum_type,
-+						      nonce, bio, &iter);
-+		else
-+			bio_advance_iter(bio, &iter, i->len << 9);
-+		nonce = nonce_add(nonce, i->len << 9);
-+	}
-+
-+	if (mergeable)
-+		for (i = splits; i < splits + ARRAY_SIZE(splits); i++)
-+			merged = bch2_checksum_merge(new_csum_type, merged,
-+						     i->csum, i->len << 9);
-+	else
-+		merged = bch2_checksum_bio(c, crc_old.csum_type,
-+				extent_nonce(version, crc_old), bio);
-+
-+	if (bch2_crc_cmp(merged, crc_old.csum))
-+		return -EIO;
-+
-+	for (i = splits; i < splits + ARRAY_SIZE(splits); i++) {
-+		if (i->crc)
-+			*i->crc = (struct bch_extent_crc_unpacked) {
-+				.csum_type		= i->csum_type,
-+				.compression_type	= crc_old.compression_type,
-+				.compressed_size	= i->len,
-+				.uncompressed_size	= i->len,
-+				.offset			= 0,
-+				.live_size		= i->len,
-+				.nonce			= crc_nonce,
-+				.csum			= i->csum,
-+			};
-+
-+		if (bch2_csum_type_is_encryption(new_csum_type))
-+			crc_nonce += i->len;
-+	}
-+
-+	return 0;
-+}
-+
-+#ifdef __KERNEL__
-+int bch2_request_key(struct bch_sb *sb, struct bch_key *key)
-+{
-+	char key_description[60];
-+	struct key *keyring_key;
-+	const struct user_key_payload *ukp;
-+	int ret;
-+
-+	snprintf(key_description, sizeof(key_description),
-+		 "bcachefs:%pUb", &sb->user_uuid);
-+
-+	keyring_key = request_key(&key_type_logon, key_description, NULL);
-+	if (IS_ERR(keyring_key))
-+		return PTR_ERR(keyring_key);
-+
-+	down_read(&keyring_key->sem);
-+	ukp = dereference_key_locked(keyring_key);
-+	if (ukp->datalen == sizeof(*key)) {
-+		memcpy(key, ukp->data, ukp->datalen);
-+		ret = 0;
-+	} else {
-+		ret = -EINVAL;
-+	}
-+	up_read(&keyring_key->sem);
-+	key_put(keyring_key);
-+
-+	return ret;
-+}
-+#else
-+#include <keyutils.h>
-+#include <uuid/uuid.h>
-+
-+int bch2_request_key(struct bch_sb *sb, struct bch_key *key)
-+{
-+	key_serial_t key_id;
-+	char key_description[60];
-+	char uuid[40];
-+
-+	uuid_unparse_lower(sb->user_uuid.b, uuid);
-+	sprintf(key_description, "bcachefs:%s", uuid);
-+
-+	key_id = request_key("user", key_description, NULL,
-+			     KEY_SPEC_USER_KEYRING);
-+	if (key_id < 0)
-+		return -errno;
-+
-+	if (keyctl_read(key_id, (void *) key, sizeof(*key)) != sizeof(*key))
-+		return -1;
-+
-+	return 0;
-+}
-+#endif
-+
-+int bch2_decrypt_sb_key(struct bch_fs *c,
-+			struct bch_sb_field_crypt *crypt,
-+			struct bch_key *key)
-+{
-+	struct bch_encrypted_key sb_key = crypt->key;
-+	struct bch_key user_key;
-+	int ret = 0;
-+
-+	/* is key encrypted? */
-+	if (!bch2_key_is_encrypted(&sb_key))
-+		goto out;
-+
-+	ret = bch2_request_key(c->disk_sb.sb, &user_key);
-+	if (ret) {
-+		bch_err(c, "error requesting encryption key: %i", ret);
-+		goto err;
-+	}
-+
-+	/* decrypt real key: */
-+	ret = bch2_chacha_encrypt_key(&user_key, bch2_sb_key_nonce(c),
-+			     &sb_key, sizeof(sb_key));
-+	if (ret)
-+		goto err;
-+
-+	if (bch2_key_is_encrypted(&sb_key)) {
-+		bch_err(c, "incorrect encryption key");
-+		ret = -EINVAL;
-+		goto err;
-+	}
-+out:
-+	*key = sb_key.key;
-+err:
-+	memzero_explicit(&sb_key, sizeof(sb_key));
-+	memzero_explicit(&user_key, sizeof(user_key));
-+	return ret;
-+}
-+
-+static int bch2_alloc_ciphers(struct bch_fs *c)
-+{
-+	if (!c->chacha20)
-+		c->chacha20 = crypto_alloc_sync_skcipher("chacha20", 0, 0);
-+	if (IS_ERR(c->chacha20)) {
-+		bch_err(c, "error requesting chacha20 module: %li",
-+			PTR_ERR(c->chacha20));
-+		return PTR_ERR(c->chacha20);
-+	}
-+
-+	if (!c->poly1305)
-+		c->poly1305 = crypto_alloc_shash("poly1305", 0, 0);
-+	if (IS_ERR(c->poly1305)) {
-+		bch_err(c, "error requesting poly1305 module: %li",
-+			PTR_ERR(c->poly1305));
-+		return PTR_ERR(c->poly1305);
-+	}
-+
-+	return 0;
-+}
-+
-+int bch2_disable_encryption(struct bch_fs *c)
-+{
-+	struct bch_sb_field_crypt *crypt;
-+	struct bch_key key;
-+	int ret = -EINVAL;
-+
-+	mutex_lock(&c->sb_lock);
-+
-+	crypt = bch2_sb_get_crypt(c->disk_sb.sb);
-+	if (!crypt)
-+		goto out;
-+
-+	/* is key encrypted? */
-+	ret = 0;
-+	if (bch2_key_is_encrypted(&crypt->key))
-+		goto out;
-+
-+	ret = bch2_decrypt_sb_key(c, crypt, &key);
-+	if (ret)
-+		goto out;
-+
-+	crypt->key.magic	= BCH_KEY_MAGIC;
-+	crypt->key.key		= key;
-+
-+	SET_BCH_SB_ENCRYPTION_TYPE(c->disk_sb.sb, 0);
-+	bch2_write_super(c);
-+out:
-+	mutex_unlock(&c->sb_lock);
-+
-+	return ret;
-+}
-+
-+int bch2_enable_encryption(struct bch_fs *c, bool keyed)
-+{
-+	struct bch_encrypted_key key;
-+	struct bch_key user_key;
-+	struct bch_sb_field_crypt *crypt;
-+	int ret = -EINVAL;
-+
-+	mutex_lock(&c->sb_lock);
-+
-+	/* Do we already have an encryption key? */
-+	if (bch2_sb_get_crypt(c->disk_sb.sb))
-+		goto err;
-+
-+	ret = bch2_alloc_ciphers(c);
-+	if (ret)
-+		goto err;
-+
-+	key.magic = BCH_KEY_MAGIC;
-+	get_random_bytes(&key.key, sizeof(key.key));
-+
-+	if (keyed) {
-+		ret = bch2_request_key(c->disk_sb.sb, &user_key);
-+		if (ret) {
-+			bch_err(c, "error requesting encryption key: %i", ret);
-+			goto err;
-+		}
-+
-+		ret = bch2_chacha_encrypt_key(&user_key, bch2_sb_key_nonce(c),
-+					      &key, sizeof(key));
-+		if (ret)
-+			goto err;
-+	}
-+
-+	ret = crypto_skcipher_setkey(&c->chacha20->base,
-+			(void *) &key.key, sizeof(key.key));
-+	if (ret)
-+		goto err;
-+
-+	crypt = bch2_sb_resize_crypt(&c->disk_sb, sizeof(*crypt) / sizeof(u64));
-+	if (!crypt) {
-+		ret = -ENOMEM; /* XXX this technically could be -ENOSPC */
-+		goto err;
-+	}
-+
-+	crypt->key = key;
-+
-+	/* write superblock */
-+	SET_BCH_SB_ENCRYPTION_TYPE(c->disk_sb.sb, 1);
-+	bch2_write_super(c);
-+err:
-+	mutex_unlock(&c->sb_lock);
-+	memzero_explicit(&user_key, sizeof(user_key));
-+	memzero_explicit(&key, sizeof(key));
-+	return ret;
-+}
-+
-+void bch2_fs_encryption_exit(struct bch_fs *c)
-+{
-+	if (!IS_ERR_OR_NULL(c->poly1305))
-+		crypto_free_shash(c->poly1305);
-+	if (!IS_ERR_OR_NULL(c->chacha20))
-+		crypto_free_sync_skcipher(c->chacha20);
-+	if (!IS_ERR_OR_NULL(c->sha256))
-+		crypto_free_shash(c->sha256);
-+}
-+
-+int bch2_fs_encryption_init(struct bch_fs *c)
-+{
-+	struct bch_sb_field_crypt *crypt;
-+	struct bch_key key;
-+	int ret = 0;
-+
-+	pr_verbose_init(c->opts, "");
-+
-+	c->sha256 = crypto_alloc_shash("sha256", 0, 0);
-+	if (IS_ERR(c->sha256)) {
-+		bch_err(c, "error requesting sha256 module");
-+		ret = PTR_ERR(c->sha256);
-+		goto out;
-+	}
-+
-+	crypt = bch2_sb_get_crypt(c->disk_sb.sb);
-+	if (!crypt)
-+		goto out;
-+
-+	ret = bch2_alloc_ciphers(c);
-+	if (ret)
-+		goto out;
-+
-+	ret = bch2_decrypt_sb_key(c, crypt, &key);
-+	if (ret)
-+		goto out;
-+
-+	ret = crypto_skcipher_setkey(&c->chacha20->base,
-+			(void *) &key.key, sizeof(key.key));
-+	if (ret)
-+		goto out;
-+out:
-+	memzero_explicit(&key, sizeof(key));
-+	pr_verbose_init(c->opts, "ret %i", ret);
-+	return ret;
-+}
-diff --git a/fs/bcachefs/checksum.h b/fs/bcachefs/checksum.h
-new file mode 100644
-index 000000000000..24dee8039d57
---- /dev/null
-+++ b/fs/bcachefs/checksum.h
-@@ -0,0 +1,202 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_CHECKSUM_H
-+#define _BCACHEFS_CHECKSUM_H
-+
-+#include "bcachefs.h"
-+#include "extents_types.h"
-+#include "super-io.h"
-+
-+#include <linux/crc64.h>
-+#include <crypto/chacha.h>
-+
-+static inline bool bch2_checksum_mergeable(unsigned type)
-+{
-+
-+	switch (type) {
-+	case BCH_CSUM_NONE:
-+	case BCH_CSUM_CRC32C:
-+	case BCH_CSUM_CRC64:
-+		return true;
-+	default:
-+		return false;
-+	}
-+}
-+
-+struct bch_csum bch2_checksum_merge(unsigned, struct bch_csum,
-+				    struct bch_csum, size_t);
-+
-+#define BCH_NONCE_EXTENT	cpu_to_le32(1 << 28)
-+#define BCH_NONCE_BTREE		cpu_to_le32(2 << 28)
-+#define BCH_NONCE_JOURNAL	cpu_to_le32(3 << 28)
-+#define BCH_NONCE_PRIO		cpu_to_le32(4 << 28)
-+#define BCH_NONCE_POLY		cpu_to_le32(1 << 31)
-+
-+struct bch_csum bch2_checksum(struct bch_fs *, unsigned, struct nonce,
-+			     const void *, size_t);
-+
-+/*
-+ * This is used for various on disk data structures - bch_sb, prio_set, bset,
-+ * jset: The checksum is _always_ the first field of these structs
-+ */
-+#define csum_vstruct(_c, _type, _nonce, _i)				\
-+({									\
-+	const void *start = ((const void *) (_i)) + sizeof((_i)->csum);	\
-+	const void *end = vstruct_end(_i);				\
-+									\
-+	bch2_checksum(_c, _type, _nonce, start, end - start);		\
-+})
-+
-+int bch2_chacha_encrypt_key(struct bch_key *, struct nonce, void *, size_t);
-+int bch2_request_key(struct bch_sb *, struct bch_key *);
-+
-+void bch2_encrypt(struct bch_fs *, unsigned, struct nonce,
-+		 void *data, size_t);
-+
-+struct bch_csum bch2_checksum_bio(struct bch_fs *, unsigned,
-+				  struct nonce, struct bio *);
-+
-+int bch2_rechecksum_bio(struct bch_fs *, struct bio *, struct bversion,
-+			struct bch_extent_crc_unpacked,
-+			struct bch_extent_crc_unpacked *,
-+			struct bch_extent_crc_unpacked *,
-+			unsigned, unsigned, unsigned);
-+
-+void bch2_encrypt_bio(struct bch_fs *, unsigned,
-+		    struct nonce, struct bio *);
-+
-+int bch2_decrypt_sb_key(struct bch_fs *, struct bch_sb_field_crypt *,
-+			struct bch_key *);
-+
-+int bch2_disable_encryption(struct bch_fs *);
-+int bch2_enable_encryption(struct bch_fs *, bool);
-+
-+void bch2_fs_encryption_exit(struct bch_fs *);
-+int bch2_fs_encryption_init(struct bch_fs *);
-+
-+static inline enum bch_csum_type bch2_csum_opt_to_type(enum bch_csum_opts type,
-+						       bool data)
-+{
-+	switch (type) {
-+	case BCH_CSUM_OPT_NONE:
-+	     return BCH_CSUM_NONE;
-+	case BCH_CSUM_OPT_CRC32C:
-+	     return data ? BCH_CSUM_CRC32C : BCH_CSUM_CRC32C_NONZERO;
-+	case BCH_CSUM_OPT_CRC64:
-+	     return data ? BCH_CSUM_CRC64 : BCH_CSUM_CRC64_NONZERO;
-+	default:
-+	     BUG();
-+	}
-+}
-+
-+static inline enum bch_csum_type bch2_data_checksum_type(struct bch_fs *c,
-+							 unsigned opt)
-+{
-+	if (c->sb.encryption_type)
-+		return c->opts.wide_macs
-+			? BCH_CSUM_CHACHA20_POLY1305_128
-+			: BCH_CSUM_CHACHA20_POLY1305_80;
-+
-+	return bch2_csum_opt_to_type(opt, true);
-+}
-+
-+static inline enum bch_csum_type bch2_meta_checksum_type(struct bch_fs *c)
-+{
-+	if (c->sb.encryption_type)
-+		return BCH_CSUM_CHACHA20_POLY1305_128;
-+
-+	return bch2_csum_opt_to_type(c->opts.metadata_checksum, false);
-+}
-+
-+static const unsigned bch2_compression_opt_to_type[] = {
-+#define x(t, n) [BCH_COMPRESSION_OPT_##t] = BCH_COMPRESSION_TYPE_##t,
-+	BCH_COMPRESSION_OPTS()
-+#undef x
-+};
-+
-+static inline bool bch2_checksum_type_valid(const struct bch_fs *c,
-+					   unsigned type)
-+{
-+	if (type >= BCH_CSUM_NR)
-+		return false;
-+
-+	if (bch2_csum_type_is_encryption(type) && !c->chacha20)
-+		return false;
-+
-+	return true;
-+}
-+
-+/* returns true if not equal */
-+static inline bool bch2_crc_cmp(struct bch_csum l, struct bch_csum r)
-+{
-+	/*
-+	 * XXX: need some way of preventing the compiler from optimizing this
-+	 * into a form that isn't constant time..
-+	 */
-+	return ((l.lo ^ r.lo) | (l.hi ^ r.hi)) != 0;
-+}
-+
-+/* for skipping ahead and encrypting/decrypting at an offset: */
-+static inline struct nonce nonce_add(struct nonce nonce, unsigned offset)
-+{
-+	EBUG_ON(offset & (CHACHA_BLOCK_SIZE - 1));
-+
-+	le32_add_cpu(&nonce.d[0], offset / CHACHA_BLOCK_SIZE);
-+	return nonce;
-+}
-+
-+static inline struct nonce null_nonce(void)
-+{
-+	struct nonce ret;
-+
-+	memset(&ret, 0, sizeof(ret));
-+	return ret;
-+}
-+
-+static inline struct nonce extent_nonce(struct bversion version,
-+					struct bch_extent_crc_unpacked crc)
-+{
-+	unsigned compression_type = crc_is_compressed(crc)
-+		? crc.compression_type
-+		: 0;
-+	unsigned size = compression_type ? crc.uncompressed_size : 0;
-+	struct nonce nonce = (struct nonce) {{
-+		[0] = cpu_to_le32(size << 22),
-+		[1] = cpu_to_le32(version.lo),
-+		[2] = cpu_to_le32(version.lo >> 32),
-+		[3] = cpu_to_le32(version.hi|
-+				  (compression_type << 24))^BCH_NONCE_EXTENT,
-+	}};
-+
-+	return nonce_add(nonce, crc.nonce << 9);
-+}
-+
-+static inline bool bch2_key_is_encrypted(struct bch_encrypted_key *key)
-+{
-+	return le64_to_cpu(key->magic) != BCH_KEY_MAGIC;
-+}
-+
-+static inline struct nonce __bch2_sb_key_nonce(struct bch_sb *sb)
-+{
-+	__le64 magic = __bch2_sb_magic(sb);
-+
-+	return (struct nonce) {{
-+		[0] = 0,
-+		[1] = 0,
-+		[2] = ((__le32 *) &magic)[0],
-+		[3] = ((__le32 *) &magic)[1],
-+	}};
-+}
-+
-+static inline struct nonce bch2_sb_key_nonce(struct bch_fs *c)
-+{
-+	__le64 magic = bch2_sb_magic(c);
-+
-+	return (struct nonce) {{
-+		[0] = 0,
-+		[1] = 0,
-+		[2] = ((__le32 *) &magic)[0],
-+		[3] = ((__le32 *) &magic)[1],
-+	}};
-+}
-+
-+#endif /* _BCACHEFS_CHECKSUM_H */
-diff --git a/fs/bcachefs/clock.c b/fs/bcachefs/clock.c
-new file mode 100644
-index 000000000000..1d1590de55e8
---- /dev/null
-+++ b/fs/bcachefs/clock.c
-@@ -0,0 +1,191 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "clock.h"
-+
-+#include <linux/freezer.h>
-+#include <linux/kthread.h>
-+#include <linux/preempt.h>
-+
-+static inline long io_timer_cmp(io_timer_heap *h,
-+				struct io_timer *l,
-+				struct io_timer *r)
-+{
-+	return l->expire - r->expire;
-+}
-+
-+void bch2_io_timer_add(struct io_clock *clock, struct io_timer *timer)
-+{
-+	size_t i;
-+
-+	spin_lock(&clock->timer_lock);
-+
-+	if (time_after_eq((unsigned long) atomic_long_read(&clock->now),
-+			  timer->expire)) {
-+		spin_unlock(&clock->timer_lock);
-+		timer->fn(timer);
-+		return;
-+	}
-+
-+	for (i = 0; i < clock->timers.used; i++)
-+		if (clock->timers.data[i] == timer)
-+			goto out;
-+
-+	BUG_ON(!heap_add(&clock->timers, timer, io_timer_cmp, NULL));
-+out:
-+	spin_unlock(&clock->timer_lock);
-+}
-+
-+void bch2_io_timer_del(struct io_clock *clock, struct io_timer *timer)
-+{
-+	size_t i;
-+
-+	spin_lock(&clock->timer_lock);
-+
-+	for (i = 0; i < clock->timers.used; i++)
-+		if (clock->timers.data[i] == timer) {
-+			heap_del(&clock->timers, i, io_timer_cmp, NULL);
-+			break;
-+		}
-+
-+	spin_unlock(&clock->timer_lock);
-+}
-+
-+struct io_clock_wait {
-+	struct io_timer		io_timer;
-+	struct timer_list	cpu_timer;
-+	struct task_struct	*task;
-+	int			expired;
-+};
-+
-+static void io_clock_wait_fn(struct io_timer *timer)
-+{
-+	struct io_clock_wait *wait = container_of(timer,
-+				struct io_clock_wait, io_timer);
-+
-+	wait->expired = 1;
-+	wake_up_process(wait->task);
-+}
-+
-+static void io_clock_cpu_timeout(struct timer_list *timer)
-+{
-+	struct io_clock_wait *wait = container_of(timer,
-+				struct io_clock_wait, cpu_timer);
-+
-+	wait->expired = 1;
-+	wake_up_process(wait->task);
-+}
-+
-+void bch2_io_clock_schedule_timeout(struct io_clock *clock, unsigned long until)
-+{
-+	struct io_clock_wait wait;
-+
-+	/* XXX: calculate sleep time rigorously */
-+	wait.io_timer.expire	= until;
-+	wait.io_timer.fn	= io_clock_wait_fn;
-+	wait.task		= current;
-+	wait.expired		= 0;
-+	bch2_io_timer_add(clock, &wait.io_timer);
-+
-+	schedule();
-+
-+	bch2_io_timer_del(clock, &wait.io_timer);
-+}
-+
-+void bch2_kthread_io_clock_wait(struct io_clock *clock,
-+				unsigned long io_until,
-+				unsigned long cpu_timeout)
-+{
-+	bool kthread = (current->flags & PF_KTHREAD) != 0;
-+	struct io_clock_wait wait;
-+
-+	wait.io_timer.expire	= io_until;
-+	wait.io_timer.fn	= io_clock_wait_fn;
-+	wait.task		= current;
-+	wait.expired		= 0;
-+	bch2_io_timer_add(clock, &wait.io_timer);
-+
-+	timer_setup_on_stack(&wait.cpu_timer, io_clock_cpu_timeout, 0);
-+
-+	if (cpu_timeout != MAX_SCHEDULE_TIMEOUT)
-+		mod_timer(&wait.cpu_timer, cpu_timeout + jiffies);
-+
-+	while (1) {
-+		set_current_state(TASK_INTERRUPTIBLE);
-+		if (kthread && kthread_should_stop())
-+			break;
-+
-+		if (wait.expired)
-+			break;
-+
-+		schedule();
-+		try_to_freeze();
-+	}
-+
-+	__set_current_state(TASK_RUNNING);
-+	del_singleshot_timer_sync(&wait.cpu_timer);
-+	destroy_timer_on_stack(&wait.cpu_timer);
-+	bch2_io_timer_del(clock, &wait.io_timer);
-+}
-+
-+static struct io_timer *get_expired_timer(struct io_clock *clock,
-+					  unsigned long now)
-+{
-+	struct io_timer *ret = NULL;
-+
-+	spin_lock(&clock->timer_lock);
-+
-+	if (clock->timers.used &&
-+	    time_after_eq(now, clock->timers.data[0]->expire))
-+		heap_pop(&clock->timers, ret, io_timer_cmp, NULL);
-+
-+	spin_unlock(&clock->timer_lock);
-+
-+	return ret;
-+}
-+
-+void __bch2_increment_clock(struct io_clock *clock, unsigned sectors)
-+{
-+	struct io_timer *timer;
-+	unsigned long now = atomic_long_add_return(sectors, &clock->now);
-+
-+	while ((timer = get_expired_timer(clock, now)))
-+		timer->fn(timer);
-+}
-+
-+void bch2_io_timers_to_text(struct printbuf *out, struct io_clock *clock)
-+{
-+	unsigned long now;
-+	unsigned i;
-+
-+	spin_lock(&clock->timer_lock);
-+	now = atomic_long_read(&clock->now);
-+
-+	for (i = 0; i < clock->timers.used; i++)
-+		pr_buf(out, "%ps:\t%li\n",
-+		       clock->timers.data[i]->fn,
-+		       clock->timers.data[i]->expire - now);
-+	spin_unlock(&clock->timer_lock);
-+}
-+
-+void bch2_io_clock_exit(struct io_clock *clock)
-+{
-+	free_heap(&clock->timers);
-+	free_percpu(clock->pcpu_buf);
-+}
-+
-+int bch2_io_clock_init(struct io_clock *clock)
-+{
-+	atomic_long_set(&clock->now, 0);
-+	spin_lock_init(&clock->timer_lock);
-+
-+	clock->max_slop = IO_CLOCK_PCPU_SECTORS * num_possible_cpus();
-+
-+	clock->pcpu_buf = alloc_percpu(*clock->pcpu_buf);
-+	if (!clock->pcpu_buf)
-+		return -ENOMEM;
-+
-+	if (!init_heap(&clock->timers, NR_IO_TIMERS, GFP_KERNEL))
-+		return -ENOMEM;
-+
-+	return 0;
-+}
-diff --git a/fs/bcachefs/clock.h b/fs/bcachefs/clock.h
-new file mode 100644
-index 000000000000..70a0f7436c84
---- /dev/null
-+++ b/fs/bcachefs/clock.h
-@@ -0,0 +1,38 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_CLOCK_H
-+#define _BCACHEFS_CLOCK_H
-+
-+void bch2_io_timer_add(struct io_clock *, struct io_timer *);
-+void bch2_io_timer_del(struct io_clock *, struct io_timer *);
-+void bch2_kthread_io_clock_wait(struct io_clock *, unsigned long,
-+				unsigned long);
-+
-+void __bch2_increment_clock(struct io_clock *, unsigned);
-+
-+static inline void bch2_increment_clock(struct bch_fs *c, unsigned sectors,
-+					int rw)
-+{
-+	struct io_clock *clock = &c->io_clock[rw];
-+
-+	if (unlikely(this_cpu_add_return(*clock->pcpu_buf, sectors) >=
-+		   IO_CLOCK_PCPU_SECTORS))
-+		__bch2_increment_clock(clock, this_cpu_xchg(*clock->pcpu_buf, 0));
-+}
-+
-+void bch2_io_clock_schedule_timeout(struct io_clock *, unsigned long);
-+
-+#define bch2_kthread_wait_event_ioclock_timeout(condition, clock, timeout)\
-+({									\
-+	long __ret = timeout;						\
-+	might_sleep();							\
-+	if (!___wait_cond_timeout(condition))				\
-+		__ret = __wait_event_timeout(wq, condition, timeout);	\
-+	__ret;								\
-+})
-+
-+void bch2_io_timers_to_text(struct printbuf *, struct io_clock *);
-+
-+void bch2_io_clock_exit(struct io_clock *);
-+int bch2_io_clock_init(struct io_clock *);
-+
-+#endif /* _BCACHEFS_CLOCK_H */
-diff --git a/fs/bcachefs/clock_types.h b/fs/bcachefs/clock_types.h
-new file mode 100644
-index 000000000000..92c740a47565
---- /dev/null
-+++ b/fs/bcachefs/clock_types.h
-@@ -0,0 +1,37 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_CLOCK_TYPES_H
-+#define _BCACHEFS_CLOCK_TYPES_H
-+
-+#include "util.h"
-+
-+#define NR_IO_TIMERS		(BCH_SB_MEMBERS_MAX * 3)
-+
-+/*
-+ * Clocks/timers in units of sectors of IO:
-+ *
-+ * Note - they use percpu batching, so they're only approximate.
-+ */
-+
-+struct io_timer;
-+typedef void (*io_timer_fn)(struct io_timer *);
-+
-+struct io_timer {
-+	io_timer_fn		fn;
-+	unsigned long		expire;
-+};
-+
-+/* Amount to buffer up on a percpu counter */
-+#define IO_CLOCK_PCPU_SECTORS	128
-+
-+typedef HEAP(struct io_timer *)	io_timer_heap;
-+
-+struct io_clock {
-+	atomic_long_t		now;
-+	u16 __percpu		*pcpu_buf;
-+	unsigned		max_slop;
-+
-+	spinlock_t		timer_lock;
-+	io_timer_heap		timers;
-+};
-+
-+#endif /* _BCACHEFS_CLOCK_TYPES_H */
-diff --git a/fs/bcachefs/compress.c b/fs/bcachefs/compress.c
-new file mode 100644
-index 000000000000..b50d2b0d5fd3
---- /dev/null
-+++ b/fs/bcachefs/compress.c
-@@ -0,0 +1,629 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "checksum.h"
-+#include "compress.h"
-+#include "extents.h"
-+#include "io.h"
-+#include "super-io.h"
-+
-+#include <linux/lz4.h>
-+#include <linux/zlib.h>
-+#include <linux/zstd.h>
-+
-+/* Bounce buffer: */
-+struct bbuf {
-+	void		*b;
-+	enum {
-+		BB_NONE,
-+		BB_VMAP,
-+		BB_KMALLOC,
-+		BB_MEMPOOL,
-+	}		type;
-+	int		rw;
-+};
-+
-+static struct bbuf __bounce_alloc(struct bch_fs *c, unsigned size, int rw)
-+{
-+	void *b;
-+
-+	BUG_ON(size > c->sb.encoded_extent_max << 9);
-+
-+	b = kmalloc(size, GFP_NOIO|__GFP_NOWARN);
-+	if (b)
-+		return (struct bbuf) { .b = b, .type = BB_KMALLOC, .rw = rw };
-+
-+	b = mempool_alloc(&c->compression_bounce[rw], GFP_NOIO);
-+	if (b)
-+		return (struct bbuf) { .b = b, .type = BB_MEMPOOL, .rw = rw };
-+
-+	BUG();
-+}
-+
-+static bool bio_phys_contig(struct bio *bio, struct bvec_iter start)
-+{
-+	struct bio_vec bv;
-+	struct bvec_iter iter;
-+	void *expected_start = NULL;
-+
-+	__bio_for_each_bvec(bv, bio, iter, start) {
-+		if (expected_start &&
-+		    expected_start != page_address(bv.bv_page) + bv.bv_offset)
-+			return false;
-+
-+		expected_start = page_address(bv.bv_page) +
-+			bv.bv_offset + bv.bv_len;
-+	}
-+
-+	return true;
-+}
-+
-+static struct bbuf __bio_map_or_bounce(struct bch_fs *c, struct bio *bio,
-+				       struct bvec_iter start, int rw)
-+{
-+	struct bbuf ret;
-+	struct bio_vec bv;
-+	struct bvec_iter iter;
-+	unsigned nr_pages = 0;
-+	struct page *stack_pages[16];
-+	struct page **pages = NULL;
-+	void *data;
-+
-+	BUG_ON(bvec_iter_sectors(start) > c->sb.encoded_extent_max);
-+
-+	if (!IS_ENABLED(CONFIG_HIGHMEM) &&
-+	    bio_phys_contig(bio, start))
-+		return (struct bbuf) {
-+			.b = page_address(bio_iter_page(bio, start)) +
-+				bio_iter_offset(bio, start),
-+			.type = BB_NONE, .rw = rw
-+		};
-+
-+	/* check if we can map the pages contiguously: */
-+	__bio_for_each_segment(bv, bio, iter, start) {
-+		if (iter.bi_size != start.bi_size &&
-+		    bv.bv_offset)
-+			goto bounce;
-+
-+		if (bv.bv_len < iter.bi_size &&
-+		    bv.bv_offset + bv.bv_len < PAGE_SIZE)
-+			goto bounce;
-+
-+		nr_pages++;
-+	}
-+
-+	BUG_ON(DIV_ROUND_UP(start.bi_size, PAGE_SIZE) > nr_pages);
-+
-+	pages = nr_pages > ARRAY_SIZE(stack_pages)
-+		? kmalloc_array(nr_pages, sizeof(struct page *), GFP_NOIO)
-+		: stack_pages;
-+	if (!pages)
-+		goto bounce;
-+
-+	nr_pages = 0;
-+	__bio_for_each_segment(bv, bio, iter, start)
-+		pages[nr_pages++] = bv.bv_page;
-+
-+	data = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
-+	if (pages != stack_pages)
-+		kfree(pages);
-+
-+	if (data)
-+		return (struct bbuf) {
-+			.b = data + bio_iter_offset(bio, start),
-+			.type = BB_VMAP, .rw = rw
-+		};
-+bounce:
-+	ret = __bounce_alloc(c, start.bi_size, rw);
-+
-+	if (rw == READ)
-+		memcpy_from_bio(ret.b, bio, start);
-+
-+	return ret;
-+}
-+
-+static struct bbuf bio_map_or_bounce(struct bch_fs *c, struct bio *bio, int rw)
-+{
-+	return __bio_map_or_bounce(c, bio, bio->bi_iter, rw);
-+}
-+
-+static void bio_unmap_or_unbounce(struct bch_fs *c, struct bbuf buf)
-+{
-+	switch (buf.type) {
-+	case BB_NONE:
-+		break;
-+	case BB_VMAP:
-+		vunmap((void *) ((unsigned long) buf.b & PAGE_MASK));
-+		break;
-+	case BB_KMALLOC:
-+		kfree(buf.b);
-+		break;
-+	case BB_MEMPOOL:
-+		mempool_free(buf.b, &c->compression_bounce[buf.rw]);
-+		break;
-+	}
-+}
-+
-+static inline void zlib_set_workspace(z_stream *strm, void *workspace)
-+{
-+#ifdef __KERNEL__
-+	strm->workspace = workspace;
-+#endif
-+}
-+
-+static int __bio_uncompress(struct bch_fs *c, struct bio *src,
-+			    void *dst_data, struct bch_extent_crc_unpacked crc)
-+{
-+	struct bbuf src_data = { NULL };
-+	size_t src_len = src->bi_iter.bi_size;
-+	size_t dst_len = crc.uncompressed_size << 9;
-+	void *workspace;
-+	int ret;
-+
-+	src_data = bio_map_or_bounce(c, src, READ);
-+
-+	switch (crc.compression_type) {
-+	case BCH_COMPRESSION_TYPE_lz4_old:
-+	case BCH_COMPRESSION_TYPE_lz4:
-+		ret = LZ4_decompress_safe_partial(src_data.b, dst_data,
-+						  src_len, dst_len, dst_len);
-+		if (ret != dst_len)
-+			goto err;
-+		break;
-+	case BCH_COMPRESSION_TYPE_gzip: {
-+		z_stream strm = {
-+			.next_in	= src_data.b,
-+			.avail_in	= src_len,
-+			.next_out	= dst_data,
-+			.avail_out	= dst_len,
-+		};
-+
-+		workspace = mempool_alloc(&c->decompress_workspace, GFP_NOIO);
-+
-+		zlib_set_workspace(&strm, workspace);
-+		zlib_inflateInit2(&strm, -MAX_WBITS);
-+		ret = zlib_inflate(&strm, Z_FINISH);
-+
-+		mempool_free(workspace, &c->decompress_workspace);
-+
-+		if (ret != Z_STREAM_END)
-+			goto err;
-+		break;
-+	}
-+	case BCH_COMPRESSION_TYPE_zstd: {
-+		ZSTD_DCtx *ctx;
-+		size_t real_src_len = le32_to_cpup(src_data.b);
-+
-+		if (real_src_len > src_len - 4)
-+			goto err;
-+
-+		workspace = mempool_alloc(&c->decompress_workspace, GFP_NOIO);
-+		ctx = ZSTD_initDCtx(workspace, ZSTD_DCtxWorkspaceBound());
-+
-+		ret = ZSTD_decompressDCtx(ctx,
-+				dst_data,	dst_len,
-+				src_data.b + 4, real_src_len);
-+
-+		mempool_free(workspace, &c->decompress_workspace);
-+
-+		if (ret != dst_len)
-+			goto err;
-+		break;
-+	}
-+	default:
-+		BUG();
-+	}
-+	ret = 0;
-+out:
-+	bio_unmap_or_unbounce(c, src_data);
-+	return ret;
-+err:
-+	ret = -EIO;
-+	goto out;
-+}
-+
-+int bch2_bio_uncompress_inplace(struct bch_fs *c, struct bio *bio,
-+				struct bch_extent_crc_unpacked *crc)
-+{
-+	struct bbuf data = { NULL };
-+	size_t dst_len = crc->uncompressed_size << 9;
-+
-+	/* bio must own its pages: */
-+	BUG_ON(!bio->bi_vcnt);
-+	BUG_ON(DIV_ROUND_UP(crc->live_size, PAGE_SECTORS) > bio->bi_max_vecs);
-+
-+	if (crc->uncompressed_size	> c->sb.encoded_extent_max ||
-+	    crc->compressed_size	> c->sb.encoded_extent_max) {
-+		bch_err(c, "error rewriting existing data: extent too big");
-+		return -EIO;
-+	}
-+
-+	data = __bounce_alloc(c, dst_len, WRITE);
-+
-+	if (__bio_uncompress(c, bio, data.b, *crc)) {
-+		bch_err(c, "error rewriting existing data: decompression error");
-+		bio_unmap_or_unbounce(c, data);
-+		return -EIO;
-+	}
-+
-+	/*
-+	 * XXX: don't have a good way to assert that the bio was allocated with
-+	 * enough space, we depend on bch2_move_extent doing the right thing
-+	 */
-+	bio->bi_iter.bi_size = crc->live_size << 9;
-+
-+	memcpy_to_bio(bio, bio->bi_iter, data.b + (crc->offset << 9));
-+
-+	crc->csum_type		= 0;
-+	crc->compression_type	= 0;
-+	crc->compressed_size	= crc->live_size;
-+	crc->uncompressed_size	= crc->live_size;
-+	crc->offset		= 0;
-+	crc->csum		= (struct bch_csum) { 0, 0 };
-+
-+	bio_unmap_or_unbounce(c, data);
-+	return 0;
-+}
-+
-+int bch2_bio_uncompress(struct bch_fs *c, struct bio *src,
-+		       struct bio *dst, struct bvec_iter dst_iter,
-+		       struct bch_extent_crc_unpacked crc)
-+{
-+	struct bbuf dst_data = { NULL };
-+	size_t dst_len = crc.uncompressed_size << 9;
-+	int ret = -ENOMEM;
-+
-+	if (crc.uncompressed_size	> c->sb.encoded_extent_max ||
-+	    crc.compressed_size		> c->sb.encoded_extent_max)
-+		return -EIO;
-+
-+	dst_data = dst_len == dst_iter.bi_size
-+		? __bio_map_or_bounce(c, dst, dst_iter, WRITE)
-+		: __bounce_alloc(c, dst_len, WRITE);
-+
-+	ret = __bio_uncompress(c, src, dst_data.b, crc);
-+	if (ret)
-+		goto err;
-+
-+	if (dst_data.type != BB_NONE &&
-+	    dst_data.type != BB_VMAP)
-+		memcpy_to_bio(dst, dst_iter, dst_data.b + (crc.offset << 9));
-+err:
-+	bio_unmap_or_unbounce(c, dst_data);
-+	return ret;
-+}
-+
-+static int attempt_compress(struct bch_fs *c,
-+			    void *workspace,
-+			    void *dst, size_t dst_len,
-+			    void *src, size_t src_len,
-+			    enum bch_compression_type compression_type)
-+{
-+	switch (compression_type) {
-+	case BCH_COMPRESSION_TYPE_lz4: {
-+		int len = src_len;
-+		int ret = LZ4_compress_destSize(
-+				src,		dst,
-+				&len,		dst_len,
-+				workspace);
-+
-+		if (len < src_len)
-+			return -len;
-+
-+		return ret;
-+	}
-+	case BCH_COMPRESSION_TYPE_gzip: {
-+		z_stream strm = {
-+			.next_in	= src,
-+			.avail_in	= src_len,
-+			.next_out	= dst,
-+			.avail_out	= dst_len,
-+		};
-+
-+		zlib_set_workspace(&strm, workspace);
-+		zlib_deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
-+				  Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL,
-+				  Z_DEFAULT_STRATEGY);
-+
-+		if (zlib_deflate(&strm, Z_FINISH) != Z_STREAM_END)
-+			return 0;
-+
-+		if (zlib_deflateEnd(&strm) != Z_OK)
-+			return 0;
-+
-+		return strm.total_out;
-+	}
-+	case BCH_COMPRESSION_TYPE_zstd: {
-+		ZSTD_CCtx *ctx = ZSTD_initCCtx(workspace,
-+			ZSTD_CCtxWorkspaceBound(c->zstd_params.cParams));
-+
-+		size_t len = ZSTD_compressCCtx(ctx,
-+				dst + 4,	dst_len - 4,
-+				src,		src_len,
-+				c->zstd_params);
-+		if (ZSTD_isError(len))
-+			return 0;
-+
-+		*((__le32 *) dst) = cpu_to_le32(len);
-+		return len + 4;
-+	}
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static unsigned __bio_compress(struct bch_fs *c,
-+			       struct bio *dst, size_t *dst_len,
-+			       struct bio *src, size_t *src_len,
-+			       enum bch_compression_type compression_type)
-+{
-+	struct bbuf src_data = { NULL }, dst_data = { NULL };
-+	void *workspace;
-+	unsigned pad;
-+	int ret = 0;
-+
-+	BUG_ON(compression_type >= BCH_COMPRESSION_TYPE_NR);
-+	BUG_ON(!mempool_initialized(&c->compress_workspace[compression_type]));
-+
-+	/* If it's only one block, don't bother trying to compress: */
-+	if (bio_sectors(src) <= c->opts.block_size)
-+		return 0;
-+
-+	dst_data = bio_map_or_bounce(c, dst, WRITE);
-+	src_data = bio_map_or_bounce(c, src, READ);
-+
-+	workspace = mempool_alloc(&c->compress_workspace[compression_type], GFP_NOIO);
-+
-+	*src_len = src->bi_iter.bi_size;
-+	*dst_len = dst->bi_iter.bi_size;
-+
-+	/*
-+	 * XXX: this algorithm sucks when the compression code doesn't tell us
-+	 * how much would fit, like LZ4 does:
-+	 */
-+	while (1) {
-+		if (*src_len <= block_bytes(c)) {
-+			ret = -1;
-+			break;
-+		}
-+
-+		ret = attempt_compress(c, workspace,
-+				       dst_data.b,	*dst_len,
-+				       src_data.b,	*src_len,
-+				       compression_type);
-+		if (ret > 0) {
-+			*dst_len = ret;
-+			ret = 0;
-+			break;
-+		}
-+
-+		/* Didn't fit: should we retry with a smaller amount?  */
-+		if (*src_len <= *dst_len) {
-+			ret = -1;
-+			break;
-+		}
-+
-+		/*
-+		 * If ret is negative, it's a hint as to how much data would fit
-+		 */
-+		BUG_ON(-ret >= *src_len);
-+
-+		if (ret < 0)
-+			*src_len = -ret;
-+		else
-+			*src_len -= (*src_len - *dst_len) / 2;
-+		*src_len = round_down(*src_len, block_bytes(c));
-+	}
-+
-+	mempool_free(workspace, &c->compress_workspace[compression_type]);
-+
-+	if (ret)
-+		goto err;
-+
-+	/* Didn't get smaller: */
-+	if (round_up(*dst_len, block_bytes(c)) >= *src_len)
-+		goto err;
-+
-+	pad = round_up(*dst_len, block_bytes(c)) - *dst_len;
-+
-+	memset(dst_data.b + *dst_len, 0, pad);
-+	*dst_len += pad;
-+
-+	if (dst_data.type != BB_NONE &&
-+	    dst_data.type != BB_VMAP)
-+		memcpy_to_bio(dst, dst->bi_iter, dst_data.b);
-+
-+	BUG_ON(!*dst_len || *dst_len > dst->bi_iter.bi_size);
-+	BUG_ON(!*src_len || *src_len > src->bi_iter.bi_size);
-+	BUG_ON(*dst_len & (block_bytes(c) - 1));
-+	BUG_ON(*src_len & (block_bytes(c) - 1));
-+out:
-+	bio_unmap_or_unbounce(c, src_data);
-+	bio_unmap_or_unbounce(c, dst_data);
-+	return compression_type;
-+err:
-+	compression_type = BCH_COMPRESSION_TYPE_incompressible;
-+	goto out;
-+}
-+
-+unsigned bch2_bio_compress(struct bch_fs *c,
-+			   struct bio *dst, size_t *dst_len,
-+			   struct bio *src, size_t *src_len,
-+			   unsigned compression_type)
-+{
-+	unsigned orig_dst = dst->bi_iter.bi_size;
-+	unsigned orig_src = src->bi_iter.bi_size;
-+
-+	/* Don't consume more than BCH_ENCODED_EXTENT_MAX from @src: */
-+	src->bi_iter.bi_size = min_t(unsigned, src->bi_iter.bi_size,
-+				     c->sb.encoded_extent_max << 9);
-+	/* Don't generate a bigger output than input: */
-+	dst->bi_iter.bi_size = min(dst->bi_iter.bi_size, src->bi_iter.bi_size);
-+
-+	if (compression_type == BCH_COMPRESSION_TYPE_lz4_old)
-+		compression_type = BCH_COMPRESSION_TYPE_lz4;
-+
-+	compression_type =
-+		__bio_compress(c, dst, dst_len, src, src_len, compression_type);
-+
-+	dst->bi_iter.bi_size = orig_dst;
-+	src->bi_iter.bi_size = orig_src;
-+	return compression_type;
-+}
-+
-+static int __bch2_fs_compress_init(struct bch_fs *, u64);
-+
-+#define BCH_FEATURE_none	0
-+
-+static const unsigned bch2_compression_opt_to_feature[] = {
-+#define x(t, n) [BCH_COMPRESSION_OPT_##t] = BCH_FEATURE_##t,
-+	BCH_COMPRESSION_OPTS()
-+#undef x
-+};
-+
-+#undef BCH_FEATURE_none
-+
-+static int __bch2_check_set_has_compressed_data(struct bch_fs *c, u64 f)
-+{
-+	int ret = 0;
-+
-+	if ((c->sb.features & f) == f)
-+		return 0;
-+
-+	mutex_lock(&c->sb_lock);
-+
-+	if ((c->sb.features & f) == f) {
-+		mutex_unlock(&c->sb_lock);
-+		return 0;
-+	}
-+
-+	ret = __bch2_fs_compress_init(c, c->sb.features|f);
-+	if (ret) {
-+		mutex_unlock(&c->sb_lock);
-+		return ret;
-+	}
-+
-+	c->disk_sb.sb->features[0] |= cpu_to_le64(f);
-+	bch2_write_super(c);
-+	mutex_unlock(&c->sb_lock);
-+
-+	return 0;
-+}
-+
-+int bch2_check_set_has_compressed_data(struct bch_fs *c,
-+				       unsigned compression_type)
-+{
-+	BUG_ON(compression_type >= ARRAY_SIZE(bch2_compression_opt_to_feature));
-+
-+	return compression_type
-+		? __bch2_check_set_has_compressed_data(c,
-+				1ULL << bch2_compression_opt_to_feature[compression_type])
-+		: 0;
-+}
-+
-+void bch2_fs_compress_exit(struct bch_fs *c)
-+{
-+	unsigned i;
-+
-+	mempool_exit(&c->decompress_workspace);
-+	for (i = 0; i < ARRAY_SIZE(c->compress_workspace); i++)
-+		mempool_exit(&c->compress_workspace[i]);
-+	mempool_exit(&c->compression_bounce[WRITE]);
-+	mempool_exit(&c->compression_bounce[READ]);
-+}
-+
-+static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
-+{
-+	size_t max_extent = c->sb.encoded_extent_max << 9;
-+	size_t decompress_workspace_size = 0;
-+	bool decompress_workspace_needed;
-+	ZSTD_parameters params = ZSTD_getParams(0, max_extent, 0);
-+	struct {
-+		unsigned	feature;
-+		unsigned	type;
-+		size_t		compress_workspace;
-+		size_t		decompress_workspace;
-+	} compression_types[] = {
-+		{ BCH_FEATURE_lz4, BCH_COMPRESSION_TYPE_lz4, LZ4_MEM_COMPRESS, 0 },
-+		{ BCH_FEATURE_gzip, BCH_COMPRESSION_TYPE_gzip,
-+			zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL),
-+			zlib_inflate_workspacesize(), },
-+		{ BCH_FEATURE_zstd, BCH_COMPRESSION_TYPE_zstd,
-+			ZSTD_CCtxWorkspaceBound(params.cParams),
-+			ZSTD_DCtxWorkspaceBound() },
-+	}, *i;
-+	int ret = 0;
-+
-+	pr_verbose_init(c->opts, "");
-+
-+	c->zstd_params = params;
-+
-+	for (i = compression_types;
-+	     i < compression_types + ARRAY_SIZE(compression_types);
-+	     i++)
-+		if (features & (1 << i->feature))
-+			goto have_compressed;
-+
-+	goto out;
-+have_compressed:
-+
-+	if (!mempool_initialized(&c->compression_bounce[READ])) {
-+		ret = mempool_init_kvpmalloc_pool(&c->compression_bounce[READ],
-+						  1, max_extent);
-+		if (ret)
-+			goto out;
-+	}
-+
-+	if (!mempool_initialized(&c->compression_bounce[WRITE])) {
-+		ret = mempool_init_kvpmalloc_pool(&c->compression_bounce[WRITE],
-+						  1, max_extent);
-+		if (ret)
-+			goto out;
-+	}
-+
-+	for (i = compression_types;
-+	     i < compression_types + ARRAY_SIZE(compression_types);
-+	     i++) {
-+		decompress_workspace_size =
-+			max(decompress_workspace_size, i->decompress_workspace);
-+
-+		if (!(features & (1 << i->feature)))
-+			continue;
-+
-+		if (i->decompress_workspace)
-+			decompress_workspace_needed = true;
-+
-+		if (mempool_initialized(&c->compress_workspace[i->type]))
-+			continue;
-+
-+		ret = mempool_init_kvpmalloc_pool(
-+				&c->compress_workspace[i->type],
-+				1, i->compress_workspace);
-+		if (ret)
-+			goto out;
-+	}
-+
-+	if (!mempool_initialized(&c->decompress_workspace)) {
-+		ret = mempool_init_kvpmalloc_pool(
-+				&c->decompress_workspace,
-+				1, decompress_workspace_size);
-+		if (ret)
-+			goto out;
-+	}
-+out:
-+	pr_verbose_init(c->opts, "ret %i", ret);
-+	return ret;
-+}
-+
-+int bch2_fs_compress_init(struct bch_fs *c)
-+{
-+	u64 f = c->sb.features;
-+
-+	if (c->opts.compression)
-+		f |= 1ULL << bch2_compression_opt_to_feature[c->opts.compression];
-+
-+	if (c->opts.background_compression)
-+		f |= 1ULL << bch2_compression_opt_to_feature[c->opts.background_compression];
-+
-+	return __bch2_fs_compress_init(c, f);
-+
-+}
-diff --git a/fs/bcachefs/compress.h b/fs/bcachefs/compress.h
-new file mode 100644
-index 000000000000..4bab1f61b3b5
---- /dev/null
-+++ b/fs/bcachefs/compress.h
-@@ -0,0 +1,18 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_COMPRESS_H
-+#define _BCACHEFS_COMPRESS_H
-+
-+#include "extents_types.h"
-+
-+int bch2_bio_uncompress_inplace(struct bch_fs *, struct bio *,
-+				struct bch_extent_crc_unpacked *);
-+int bch2_bio_uncompress(struct bch_fs *, struct bio *, struct bio *,
-+		       struct bvec_iter, struct bch_extent_crc_unpacked);
-+unsigned bch2_bio_compress(struct bch_fs *, struct bio *, size_t *,
-+			   struct bio *, size_t *, unsigned);
-+
-+int bch2_check_set_has_compressed_data(struct bch_fs *, unsigned);
-+void bch2_fs_compress_exit(struct bch_fs *);
-+int bch2_fs_compress_init(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_COMPRESS_H */
-diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c
-new file mode 100644
-index 000000000000..aa10591a3b1a
---- /dev/null
-+++ b/fs/bcachefs/debug.c
-@@ -0,0 +1,432 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Assorted bcachefs debug code
-+ *
-+ * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
-+ * Copyright 2012 Google, Inc.
-+ */
-+
-+#include "bcachefs.h"
-+#include "bkey_methods.h"
-+#include "btree_cache.h"
-+#include "btree_io.h"
-+#include "btree_iter.h"
-+#include "btree_update.h"
-+#include "buckets.h"
-+#include "debug.h"
-+#include "error.h"
-+#include "extents.h"
-+#include "fsck.h"
-+#include "inode.h"
-+#include "io.h"
-+#include "super.h"
-+
-+#include <linux/console.h>
-+#include <linux/debugfs.h>
-+#include <linux/module.h>
-+#include <linux/random.h>
-+#include <linux/seq_file.h>
-+
-+static struct dentry *bch_debug;
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+
-+void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
-+{
-+	struct btree *v = c->verify_data;
-+	struct btree_node *n_ondisk, *n_sorted, *n_inmemory;
-+	struct bset *sorted, *inmemory;
-+	struct extent_ptr_decoded pick;
-+	struct bch_dev *ca;
-+	struct bio *bio;
-+
-+	if (c->opts.nochanges)
-+		return;
-+
-+	btree_node_io_lock(b);
-+	mutex_lock(&c->verify_lock);
-+
-+	n_ondisk = c->verify_ondisk;
-+	n_sorted = c->verify_data->data;
-+	n_inmemory = b->data;
-+
-+	bkey_copy(&v->key, &b->key);
-+	v->written	= 0;
-+	v->c.level	= b->c.level;
-+	v->c.btree_id	= b->c.btree_id;
-+	bch2_btree_keys_init(v, &c->expensive_debug_checks);
-+
-+	if (bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key),
-+				       NULL, &pick) <= 0)
-+		return;
-+
-+	ca = bch_dev_bkey_exists(c, pick.ptr.dev);
-+	if (!bch2_dev_get_ioref(ca, READ))
-+		return;
-+
-+	bio = bio_alloc_bioset(GFP_NOIO,
-+			buf_pages(n_sorted, btree_bytes(c)),
-+			&c->btree_bio);
-+	bio_set_dev(bio, ca->disk_sb.bdev);
-+	bio->bi_opf		= REQ_OP_READ|REQ_META;
-+	bio->bi_iter.bi_sector	= pick.ptr.offset;
-+	bch2_bio_map(bio, n_sorted, btree_bytes(c));
-+
-+	submit_bio_wait(bio);
-+
-+	bio_put(bio);
-+	percpu_ref_put(&ca->io_ref);
-+
-+	memcpy(n_ondisk, n_sorted, btree_bytes(c));
-+
-+	if (bch2_btree_node_read_done(c, v, false))
-+		goto out;
-+
-+	n_sorted = c->verify_data->data;
-+	sorted = &n_sorted->keys;
-+	inmemory = &n_inmemory->keys;
-+
-+	if (inmemory->u64s != sorted->u64s ||
-+	    memcmp(inmemory->start,
-+		   sorted->start,
-+		   vstruct_end(inmemory) - (void *) inmemory->start)) {
-+		unsigned offset = 0, sectors;
-+		struct bset *i;
-+		unsigned j;
-+
-+		console_lock();
-+
-+		printk(KERN_ERR "*** in memory:\n");
-+		bch2_dump_bset(c, b, inmemory, 0);
-+
-+		printk(KERN_ERR "*** read back in:\n");
-+		bch2_dump_bset(c, v, sorted, 0);
-+
-+		while (offset < b->written) {
-+			if (!offset ) {
-+				i = &n_ondisk->keys;
-+				sectors = vstruct_blocks(n_ondisk, c->block_bits) <<
-+					c->block_bits;
-+			} else {
-+				struct btree_node_entry *bne =
-+					(void *) n_ondisk + (offset << 9);
-+				i = &bne->keys;
-+
-+				sectors = vstruct_blocks(bne, c->block_bits) <<
-+					c->block_bits;
-+			}
-+
-+			printk(KERN_ERR "*** on disk block %u:\n", offset);
-+			bch2_dump_bset(c, b, i, offset);
-+
-+			offset += sectors;
-+		}
-+
-+		printk(KERN_ERR "*** block %u/%u not written\n",
-+		       offset >> c->block_bits, btree_blocks(c));
-+
-+		for (j = 0; j < le16_to_cpu(inmemory->u64s); j++)
-+			if (inmemory->_data[j] != sorted->_data[j])
-+				break;
-+
-+		printk(KERN_ERR "b->written %u\n", b->written);
-+
-+		console_unlock();
-+		panic("verify failed at %u\n", j);
-+	}
-+out:
-+	mutex_unlock(&c->verify_lock);
-+	btree_node_io_unlock(b);
-+}
-+
-+#endif
-+
-+#ifdef CONFIG_DEBUG_FS
-+
-+/* XXX: bch_fs refcounting */
-+
-+struct dump_iter {
-+	struct bpos		from;
-+	struct bch_fs	*c;
-+	enum btree_id		id;
-+
-+	char			buf[PAGE_SIZE];
-+	size_t			bytes;	/* what's currently in buf */
-+
-+	char __user		*ubuf;	/* destination user buffer */
-+	size_t			size;	/* size of requested read */
-+	ssize_t			ret;	/* bytes read so far */
-+};
-+
-+static int flush_buf(struct dump_iter *i)
-+{
-+	if (i->bytes) {
-+		size_t bytes = min(i->bytes, i->size);
-+		int err = copy_to_user(i->ubuf, i->buf, bytes);
-+
-+		if (err)
-+			return err;
-+
-+		i->ret	 += bytes;
-+		i->ubuf	 += bytes;
-+		i->size	 -= bytes;
-+		i->bytes -= bytes;
-+		memmove(i->buf, i->buf + bytes, i->bytes);
-+	}
-+
-+	return 0;
-+}
-+
-+static int bch2_dump_open(struct inode *inode, struct file *file)
-+{
-+	struct btree_debug *bd = inode->i_private;
-+	struct dump_iter *i;
-+
-+	i = kzalloc(sizeof(struct dump_iter), GFP_KERNEL);
-+	if (!i)
-+		return -ENOMEM;
-+
-+	file->private_data = i;
-+	i->from = POS_MIN;
-+	i->c	= container_of(bd, struct bch_fs, btree_debug[bd->id]);
-+	i->id	= bd->id;
-+
-+	return 0;
-+}
-+
-+static int bch2_dump_release(struct inode *inode, struct file *file)
-+{
-+	kfree(file->private_data);
-+	return 0;
-+}
-+
-+static ssize_t bch2_read_btree(struct file *file, char __user *buf,
-+			       size_t size, loff_t *ppos)
-+{
-+	struct dump_iter *i = file->private_data;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int err;
-+
-+	i->ubuf = buf;
-+	i->size	= size;
-+	i->ret	= 0;
-+
-+	err = flush_buf(i);
-+	if (err)
-+		return err;
-+
-+	if (!i->size)
-+		return i->ret;
-+
-+	bch2_trans_init(&trans, i->c, 0, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, i->id, i->from, BTREE_ITER_PREFETCH);
-+	k = bch2_btree_iter_peek(iter);
-+
-+	while (k.k && !(err = bkey_err(k))) {
-+		bch2_bkey_val_to_text(&PBUF(i->buf), i->c, k);
-+		i->bytes = strlen(i->buf);
-+		BUG_ON(i->bytes >= PAGE_SIZE);
-+		i->buf[i->bytes] = '\n';
-+		i->bytes++;
-+
-+		k = bch2_btree_iter_next(iter);
-+		i->from = iter->pos;
-+
-+		err = flush_buf(i);
-+		if (err)
-+			break;
-+
-+		if (!i->size)
-+			break;
-+	}
-+	bch2_trans_exit(&trans);
-+
-+	return err < 0 ? err : i->ret;
-+}
-+
-+static const struct file_operations btree_debug_ops = {
-+	.owner		= THIS_MODULE,
-+	.open		= bch2_dump_open,
-+	.release	= bch2_dump_release,
-+	.read		= bch2_read_btree,
-+};
-+
-+static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf,
-+				       size_t size, loff_t *ppos)
-+{
-+	struct dump_iter *i = file->private_data;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct btree *b;
-+	int err;
-+
-+	i->ubuf = buf;
-+	i->size	= size;
-+	i->ret	= 0;
-+
-+	err = flush_buf(i);
-+	if (err)
-+		return err;
-+
-+	if (!i->size || !bkey_cmp(POS_MAX, i->from))
-+		return i->ret;
-+
-+	bch2_trans_init(&trans, i->c, 0, 0);
-+
-+	for_each_btree_node(&trans, iter, i->id, i->from, 0, b) {
-+		bch2_btree_node_to_text(&PBUF(i->buf), i->c, b);
-+		i->bytes = strlen(i->buf);
-+		err = flush_buf(i);
-+		if (err)
-+			break;
-+
-+		/*
-+		 * can't easily correctly restart a btree node traversal across
-+		 * all nodes, meh
-+		 */
-+		i->from = bkey_cmp(POS_MAX, b->key.k.p)
-+			? bkey_successor(b->key.k.p)
-+			: b->key.k.p;
-+
-+		if (!i->size)
-+			break;
-+	}
-+	bch2_trans_exit(&trans);
-+
-+	return err < 0 ? err : i->ret;
-+}
-+
-+static const struct file_operations btree_format_debug_ops = {
-+	.owner		= THIS_MODULE,
-+	.open		= bch2_dump_open,
-+	.release	= bch2_dump_release,
-+	.read		= bch2_read_btree_formats,
-+};
-+
-+static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
-+				       size_t size, loff_t *ppos)
-+{
-+	struct dump_iter *i = file->private_data;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct btree *prev_node = NULL;
-+	int err;
-+
-+	i->ubuf = buf;
-+	i->size	= size;
-+	i->ret	= 0;
-+
-+	err = flush_buf(i);
-+	if (err)
-+		return err;
-+
-+	if (!i->size)
-+		return i->ret;
-+
-+	bch2_trans_init(&trans, i->c, 0, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, i->id, i->from, BTREE_ITER_PREFETCH);
-+
-+	while ((k = bch2_btree_iter_peek(iter)).k &&
-+	       !(err = bkey_err(k))) {
-+		struct btree_iter_level *l = &iter->l[0];
-+		struct bkey_packed *_k =
-+			bch2_btree_node_iter_peek(&l->iter, l->b);
-+
-+		if (l->b != prev_node) {
-+			bch2_btree_node_to_text(&PBUF(i->buf), i->c, l->b);
-+			i->bytes = strlen(i->buf);
-+			err = flush_buf(i);
-+			if (err)
-+				break;
-+		}
-+		prev_node = l->b;
-+
-+		bch2_bfloat_to_text(&PBUF(i->buf), l->b, _k);
-+		i->bytes = strlen(i->buf);
-+		err = flush_buf(i);
-+		if (err)
-+			break;
-+
-+		bch2_btree_iter_next(iter);
-+		i->from = iter->pos;
-+
-+		err = flush_buf(i);
-+		if (err)
-+			break;
-+
-+		if (!i->size)
-+			break;
-+	}
-+	bch2_trans_exit(&trans);
-+
-+	return err < 0 ? err : i->ret;
-+}
-+
-+static const struct file_operations bfloat_failed_debug_ops = {
-+	.owner		= THIS_MODULE,
-+	.open		= bch2_dump_open,
-+	.release	= bch2_dump_release,
-+	.read		= bch2_read_bfloat_failed,
-+};
-+
-+void bch2_fs_debug_exit(struct bch_fs *c)
-+{
-+	if (!IS_ERR_OR_NULL(c->debug))
-+		debugfs_remove_recursive(c->debug);
-+}
-+
-+void bch2_fs_debug_init(struct bch_fs *c)
-+{
-+	struct btree_debug *bd;
-+	char name[100];
-+
-+	if (IS_ERR_OR_NULL(bch_debug))
-+		return;
-+
-+	snprintf(name, sizeof(name), "%pU", c->sb.user_uuid.b);
-+	c->debug = debugfs_create_dir(name, bch_debug);
-+	if (IS_ERR_OR_NULL(c->debug))
-+		return;
-+
-+	for (bd = c->btree_debug;
-+	     bd < c->btree_debug + ARRAY_SIZE(c->btree_debug);
-+	     bd++) {
-+		bd->id = bd - c->btree_debug;
-+		bd->btree = debugfs_create_file(bch2_btree_ids[bd->id],
-+						0400, c->debug, bd,
-+						&btree_debug_ops);
-+
-+		snprintf(name, sizeof(name), "%s-formats",
-+			 bch2_btree_ids[bd->id]);
-+
-+		bd->btree_format = debugfs_create_file(name, 0400, c->debug, bd,
-+						       &btree_format_debug_ops);
-+
-+		snprintf(name, sizeof(name), "%s-bfloat-failed",
-+			 bch2_btree_ids[bd->id]);
-+
-+		bd->failed = debugfs_create_file(name, 0400, c->debug, bd,
-+						 &bfloat_failed_debug_ops);
-+	}
-+}
-+
-+#endif
-+
-+void bch2_debug_exit(void)
-+{
-+	if (!IS_ERR_OR_NULL(bch_debug))
-+		debugfs_remove_recursive(bch_debug);
-+}
-+
-+int __init bch2_debug_init(void)
-+{
-+	int ret = 0;
-+
-+	bch_debug = debugfs_create_dir("bcachefs", NULL);
-+	return ret;
-+}
-diff --git a/fs/bcachefs/debug.h b/fs/bcachefs/debug.h
-new file mode 100644
-index 000000000000..56c2d1ab5f63
---- /dev/null
-+++ b/fs/bcachefs/debug.h
-@@ -0,0 +1,63 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_DEBUG_H
-+#define _BCACHEFS_DEBUG_H
-+
-+#include "bcachefs.h"
-+
-+struct bio;
-+struct btree;
-+struct bch_fs;
-+
-+#define BCH_DEBUG_PARAM(name, description) extern bool bch2_##name;
-+BCH_DEBUG_PARAMS()
-+#undef BCH_DEBUG_PARAM
-+
-+#define BCH_DEBUG_PARAM(name, description)				\
-+	static inline bool name(struct bch_fs *c)			\
-+	{ return bch2_##name || c->name;	}
-+BCH_DEBUG_PARAMS_ALWAYS()
-+#undef BCH_DEBUG_PARAM
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+
-+#define BCH_DEBUG_PARAM(name, description)				\
-+	static inline bool name(struct bch_fs *c)			\
-+	{ return bch2_##name || c->name;	}
-+BCH_DEBUG_PARAMS_DEBUG()
-+#undef BCH_DEBUG_PARAM
-+
-+void __bch2_btree_verify(struct bch_fs *, struct btree *);
-+
-+#define bypass_torture_test(d)		((d)->bypass_torture_test)
-+
-+#else /* DEBUG */
-+
-+#define BCH_DEBUG_PARAM(name, description)				\
-+	static inline bool name(struct bch_fs *c) { return false; }
-+BCH_DEBUG_PARAMS_DEBUG()
-+#undef BCH_DEBUG_PARAM
-+
-+static inline void __bch2_btree_verify(struct bch_fs *c, struct btree *b) {}
-+
-+#define bypass_torture_test(d)		0
-+
-+#endif
-+
-+static inline void bch2_btree_verify(struct bch_fs *c, struct btree *b)
-+{
-+	if (verify_btree_ondisk(c))
-+		__bch2_btree_verify(c, b);
-+}
-+
-+#ifdef CONFIG_DEBUG_FS
-+void bch2_fs_debug_exit(struct bch_fs *);
-+void bch2_fs_debug_init(struct bch_fs *);
-+#else
-+static inline void bch2_fs_debug_exit(struct bch_fs *c) {}
-+static inline void bch2_fs_debug_init(struct bch_fs *c) {}
-+#endif
-+
-+void bch2_debug_exit(void);
-+int bch2_debug_init(void);
-+
-+#endif /* _BCACHEFS_DEBUG_H */
-diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c
-new file mode 100644
-index 000000000000..f34bfda8ab0d
---- /dev/null
-+++ b/fs/bcachefs/dirent.c
-@@ -0,0 +1,385 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "bkey_methods.h"
-+#include "btree_update.h"
-+#include "extents.h"
-+#include "dirent.h"
-+#include "fs.h"
-+#include "keylist.h"
-+#include "str_hash.h"
-+
-+#include <linux/dcache.h>
-+
-+unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d)
-+{
-+	unsigned len = bkey_val_bytes(d.k) -
-+		offsetof(struct bch_dirent, d_name);
-+
-+	return strnlen(d.v->d_name, len);
-+}
-+
-+static u64 bch2_dirent_hash(const struct bch_hash_info *info,
-+			    const struct qstr *name)
-+{
-+	struct bch_str_hash_ctx ctx;
-+
-+	bch2_str_hash_init(&ctx, info);
-+	bch2_str_hash_update(&ctx, info, name->name, name->len);
-+
-+	/* [0,2) reserved for dots */
-+	return max_t(u64, bch2_str_hash_end(&ctx, info), 2);
-+}
-+
-+static u64 dirent_hash_key(const struct bch_hash_info *info, const void *key)
-+{
-+	return bch2_dirent_hash(info, key);
-+}
-+
-+static u64 dirent_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k)
-+{
-+	struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
-+	struct qstr name = QSTR_INIT(d.v->d_name, bch2_dirent_name_bytes(d));
-+
-+	return bch2_dirent_hash(info, &name);
-+}
-+
-+static bool dirent_cmp_key(struct bkey_s_c _l, const void *_r)
-+{
-+	struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l);
-+	int len = bch2_dirent_name_bytes(l);
-+	const struct qstr *r = _r;
-+
-+	return len - r->len ?: memcmp(l.v->d_name, r->name, len);
-+}
-+
-+static bool dirent_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r)
-+{
-+	struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l);
-+	struct bkey_s_c_dirent r = bkey_s_c_to_dirent(_r);
-+	int l_len = bch2_dirent_name_bytes(l);
-+	int r_len = bch2_dirent_name_bytes(r);
-+
-+	return l_len - r_len ?: memcmp(l.v->d_name, r.v->d_name, l_len);
-+}
-+
-+const struct bch_hash_desc bch2_dirent_hash_desc = {
-+	.btree_id	= BTREE_ID_DIRENTS,
-+	.key_type	= KEY_TYPE_dirent,
-+	.hash_key	= dirent_hash_key,
-+	.hash_bkey	= dirent_hash_bkey,
-+	.cmp_key	= dirent_cmp_key,
-+	.cmp_bkey	= dirent_cmp_bkey,
-+};
-+
-+const char *bch2_dirent_invalid(const struct bch_fs *c, struct bkey_s_c k)
-+{
-+	struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
-+	unsigned len;
-+
-+	if (bkey_val_bytes(k.k) < sizeof(struct bch_dirent))
-+		return "value too small";
-+
-+	len = bch2_dirent_name_bytes(d);
-+	if (!len)
-+		return "empty name";
-+
-+	/*
-+	 * older versions of bcachefs were buggy and creating dirent
-+	 * keys that were bigger than necessary:
-+	 */
-+	if (bkey_val_u64s(k.k) > dirent_val_u64s(len + 7))
-+		return "value too big";
-+
-+	if (len > BCH_NAME_MAX)
-+		return "dirent name too big";
-+
-+	return NULL;
-+}
-+
-+void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c,
-+			 struct bkey_s_c k)
-+{
-+	struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
-+
-+	bch_scnmemcpy(out, d.v->d_name,
-+		      bch2_dirent_name_bytes(d));
-+	pr_buf(out, " -> %llu type %u", d.v->d_inum, d.v->d_type);
-+}
-+
-+static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
-+				u8 type, const struct qstr *name, u64 dst)
-+{
-+	struct bkey_i_dirent *dirent;
-+	unsigned u64s = BKEY_U64s + dirent_val_u64s(name->len);
-+
-+	if (name->len > BCH_NAME_MAX)
-+		return ERR_PTR(-ENAMETOOLONG);
-+
-+	BUG_ON(u64s > U8_MAX);
-+
-+	dirent = bch2_trans_kmalloc(trans, u64s * sizeof(u64));
-+	if (IS_ERR(dirent))
-+		return dirent;
-+
-+	bkey_dirent_init(&dirent->k_i);
-+	dirent->k.u64s = u64s;
-+	dirent->v.d_inum = cpu_to_le64(dst);
-+	dirent->v.d_type = type;
-+
-+	memcpy(dirent->v.d_name, name->name, name->len);
-+	memset(dirent->v.d_name + name->len, 0,
-+	       bkey_val_bytes(&dirent->k) -
-+	       offsetof(struct bch_dirent, d_name) -
-+	       name->len);
-+
-+	EBUG_ON(bch2_dirent_name_bytes(dirent_i_to_s_c(dirent)) != name->len);
-+
-+	return dirent;
-+}
-+
-+int bch2_dirent_create(struct btree_trans *trans,
-+		       u64 dir_inum, const struct bch_hash_info *hash_info,
-+		       u8 type, const struct qstr *name, u64 dst_inum,
-+		       int flags)
-+{
-+	struct bkey_i_dirent *dirent;
-+	int ret;
-+
-+	dirent = dirent_create_key(trans, type, name, dst_inum);
-+	ret = PTR_ERR_OR_ZERO(dirent);
-+	if (ret)
-+		return ret;
-+
-+	return bch2_hash_set(trans, bch2_dirent_hash_desc, hash_info,
-+			     dir_inum, &dirent->k_i, flags);
-+}
-+
-+static void dirent_copy_target(struct bkey_i_dirent *dst,
-+			       struct bkey_s_c_dirent src)
-+{
-+	dst->v.d_inum = src.v->d_inum;
-+	dst->v.d_type = src.v->d_type;
-+}
-+
-+int bch2_dirent_rename(struct btree_trans *trans,
-+		       u64 src_dir, struct bch_hash_info *src_hash,
-+		       u64 dst_dir, struct bch_hash_info *dst_hash,
-+		       const struct qstr *src_name, u64 *src_inum,
-+		       const struct qstr *dst_name, u64 *dst_inum,
-+		       enum bch_rename_mode mode)
-+{
-+	struct btree_iter *src_iter = NULL, *dst_iter = NULL;
-+	struct bkey_s_c old_src, old_dst;
-+	struct bkey_i_dirent *new_src = NULL, *new_dst = NULL;
-+	struct bpos dst_pos =
-+		POS(dst_dir, bch2_dirent_hash(dst_hash, dst_name));
-+	int ret = 0;
-+
-+	*src_inum = *dst_inum = 0;
-+
-+	/*
-+	 * Lookup dst:
-+	 *
-+	 * Note that in BCH_RENAME mode, we're _not_ checking if
-+	 * the target already exists - we're relying on the VFS
-+	 * to do that check for us for correctness:
-+	 */
-+	dst_iter = mode == BCH_RENAME
-+		? bch2_hash_hole(trans, bch2_dirent_hash_desc,
-+				 dst_hash, dst_dir, dst_name)
-+		: bch2_hash_lookup(trans, bch2_dirent_hash_desc,
-+				   dst_hash, dst_dir, dst_name,
-+				   BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(dst_iter);
-+	if (ret)
-+		goto out;
-+
-+	old_dst = bch2_btree_iter_peek_slot(dst_iter);
-+
-+	if (mode != BCH_RENAME)
-+		*dst_inum = le64_to_cpu(bkey_s_c_to_dirent(old_dst).v->d_inum);
-+
-+	/* Lookup src: */
-+	src_iter = bch2_hash_lookup(trans, bch2_dirent_hash_desc,
-+				    src_hash, src_dir, src_name,
-+				    BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(src_iter);
-+	if (ret)
-+		goto out;
-+
-+	old_src = bch2_btree_iter_peek_slot(src_iter);
-+	*src_inum = le64_to_cpu(bkey_s_c_to_dirent(old_src).v->d_inum);
-+
-+	/* Create new dst key: */
-+	new_dst = dirent_create_key(trans, 0, dst_name, 0);
-+	ret = PTR_ERR_OR_ZERO(new_dst);
-+	if (ret)
-+		goto out;
-+
-+	dirent_copy_target(new_dst, bkey_s_c_to_dirent(old_src));
-+	new_dst->k.p = dst_iter->pos;
-+
-+	/* Create new src key: */
-+	if (mode == BCH_RENAME_EXCHANGE) {
-+		new_src = dirent_create_key(trans, 0, src_name, 0);
-+		ret = PTR_ERR_OR_ZERO(new_src);
-+		if (ret)
-+			goto out;
-+
-+		dirent_copy_target(new_src, bkey_s_c_to_dirent(old_dst));
-+		new_src->k.p = src_iter->pos;
-+	} else {
-+		new_src = bch2_trans_kmalloc(trans, sizeof(struct bkey_i));
-+		ret = PTR_ERR_OR_ZERO(new_src);
-+		if (ret)
-+			goto out;
-+
-+		bkey_init(&new_src->k);
-+		new_src->k.p = src_iter->pos;
-+
-+		if (bkey_cmp(dst_pos, src_iter->pos) <= 0 &&
-+		    bkey_cmp(src_iter->pos, dst_iter->pos) < 0) {
-+			/*
-+			 * We have a hash collision for the new dst key,
-+			 * and new_src - the key we're deleting - is between
-+			 * new_dst's hashed slot and the slot we're going to be
-+			 * inserting it into - oops.  This will break the hash
-+			 * table if we don't deal with it:
-+			 */
-+			if (mode == BCH_RENAME) {
-+				/*
-+				 * If we're not overwriting, we can just insert
-+				 * new_dst at the src position:
-+				 */
-+				new_dst->k.p = src_iter->pos;
-+				bch2_trans_update(trans, src_iter,
-+						  &new_dst->k_i, 0);
-+				goto out;
-+			} else {
-+				/* If we're overwriting, we can't insert new_dst
-+				 * at a different slot because it has to
-+				 * overwrite old_dst - just make sure to use a
-+				 * whiteout when deleting src:
-+				 */
-+				new_src->k.type = KEY_TYPE_whiteout;
-+			}
-+		} else {
-+			/* Check if we need a whiteout to delete src: */
-+			ret = bch2_hash_needs_whiteout(trans, bch2_dirent_hash_desc,
-+						       src_hash, src_iter);
-+			if (ret < 0)
-+				goto out;
-+
-+			if (ret)
-+				new_src->k.type = KEY_TYPE_whiteout;
-+		}
-+	}
-+
-+	bch2_trans_update(trans, src_iter, &new_src->k_i, 0);
-+	bch2_trans_update(trans, dst_iter, &new_dst->k_i, 0);
-+out:
-+	bch2_trans_iter_put(trans, src_iter);
-+	bch2_trans_iter_put(trans, dst_iter);
-+	return ret;
-+}
-+
-+int bch2_dirent_delete_at(struct btree_trans *trans,
-+			  const struct bch_hash_info *hash_info,
-+			  struct btree_iter *iter)
-+{
-+	return bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
-+				   hash_info, iter);
-+}
-+
-+struct btree_iter *
-+__bch2_dirent_lookup_trans(struct btree_trans *trans, u64 dir_inum,
-+			   const struct bch_hash_info *hash_info,
-+			   const struct qstr *name, unsigned flags)
-+{
-+	return bch2_hash_lookup(trans, bch2_dirent_hash_desc,
-+				hash_info, dir_inum, name, flags);
-+}
-+
-+u64 bch2_dirent_lookup(struct bch_fs *c, u64 dir_inum,
-+		       const struct bch_hash_info *hash_info,
-+		       const struct qstr *name)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	u64 inum = 0;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = __bch2_dirent_lookup_trans(&trans, dir_inum,
-+					  hash_info, name, 0);
-+	if (IS_ERR(iter)) {
-+		BUG_ON(PTR_ERR(iter) == -EINTR);
-+		goto out;
-+	}
-+
-+	k = bch2_btree_iter_peek_slot(iter);
-+	inum = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum);
-+out:
-+	bch2_trans_exit(&trans);
-+	return inum;
-+}
-+
-+int bch2_empty_dir_trans(struct btree_trans *trans, u64 dir_inum)
-+{
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	for_each_btree_key(trans, iter, BTREE_ID_DIRENTS,
-+			   POS(dir_inum, 0), 0, k, ret) {
-+		if (k.k->p.inode > dir_inum)
-+			break;
-+
-+		if (k.k->type == KEY_TYPE_dirent) {
-+			ret = -ENOTEMPTY;
-+			break;
-+		}
-+	}
-+	bch2_trans_iter_put(trans, iter);
-+
-+	return ret;
-+}
-+
-+int bch2_readdir(struct bch_fs *c, u64 inum, struct dir_context *ctx)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct bkey_s_c_dirent dirent;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS,
-+			   POS(inum, ctx->pos), 0, k, ret) {
-+		if (k.k->p.inode > inum)
-+			break;
-+
-+		if (k.k->type != KEY_TYPE_dirent)
-+			continue;
-+
-+		dirent = bkey_s_c_to_dirent(k);
-+
-+		/*
-+		 * XXX: dir_emit() can fault and block, while we're holding
-+		 * locks
-+		 */
-+		ctx->pos = dirent.k->p.offset;
-+		if (!dir_emit(ctx, dirent.v->d_name,
-+			      bch2_dirent_name_bytes(dirent),
-+			      le64_to_cpu(dirent.v->d_inum),
-+			      dirent.v->d_type))
-+			break;
-+		ctx->pos = dirent.k->p.offset + 1;
-+	}
-+	ret = bch2_trans_exit(&trans) ?: ret;
-+
-+	return ret;
-+}
-diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h
-new file mode 100644
-index 000000000000..34769371dd13
---- /dev/null
-+++ b/fs/bcachefs/dirent.h
-@@ -0,0 +1,63 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_DIRENT_H
-+#define _BCACHEFS_DIRENT_H
-+
-+#include "str_hash.h"
-+
-+extern const struct bch_hash_desc bch2_dirent_hash_desc;
-+
-+const char *bch2_dirent_invalid(const struct bch_fs *, struct bkey_s_c);
-+void bch2_dirent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-+
-+#define bch2_bkey_ops_dirent (struct bkey_ops) {	\
-+	.key_invalid	= bch2_dirent_invalid,		\
-+	.val_to_text	= bch2_dirent_to_text,		\
-+}
-+
-+struct qstr;
-+struct file;
-+struct dir_context;
-+struct bch_fs;
-+struct bch_hash_info;
-+struct bch_inode_info;
-+
-+unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent);
-+
-+static inline unsigned dirent_val_u64s(unsigned len)
-+{
-+	return DIV_ROUND_UP(offsetof(struct bch_dirent, d_name) + len,
-+			    sizeof(u64));
-+}
-+
-+int bch2_dirent_create(struct btree_trans *, u64,
-+		       const struct bch_hash_info *, u8,
-+		       const struct qstr *, u64, int);
-+
-+int bch2_dirent_delete_at(struct btree_trans *,
-+			  const struct bch_hash_info *,
-+			  struct btree_iter *);
-+
-+enum bch_rename_mode {
-+	BCH_RENAME,
-+	BCH_RENAME_OVERWRITE,
-+	BCH_RENAME_EXCHANGE,
-+};
-+
-+int bch2_dirent_rename(struct btree_trans *,
-+		       u64, struct bch_hash_info *,
-+		       u64, struct bch_hash_info *,
-+		       const struct qstr *, u64 *,
-+		       const struct qstr *, u64 *,
-+		       enum bch_rename_mode);
-+
-+struct btree_iter *
-+__bch2_dirent_lookup_trans(struct btree_trans *, u64,
-+			   const struct bch_hash_info *,
-+			   const struct qstr *, unsigned);
-+u64 bch2_dirent_lookup(struct bch_fs *, u64, const struct bch_hash_info *,
-+		       const struct qstr *);
-+
-+int bch2_empty_dir_trans(struct btree_trans *, u64);
-+int bch2_readdir(struct bch_fs *, u64, struct dir_context *);
-+
-+#endif /* _BCACHEFS_DIRENT_H */
-diff --git a/fs/bcachefs/disk_groups.c b/fs/bcachefs/disk_groups.c
-new file mode 100644
-index 000000000000..c52b6faac9b4
---- /dev/null
-+++ b/fs/bcachefs/disk_groups.c
-@@ -0,0 +1,486 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "disk_groups.h"
-+#include "super-io.h"
-+
-+#include <linux/sort.h>
-+
-+static int group_cmp(const void *_l, const void *_r)
-+{
-+	const struct bch_disk_group *l = _l;
-+	const struct bch_disk_group *r = _r;
-+
-+	return ((BCH_GROUP_DELETED(l) > BCH_GROUP_DELETED(r)) -
-+		(BCH_GROUP_DELETED(l) < BCH_GROUP_DELETED(r))) ?:
-+		((BCH_GROUP_PARENT(l) > BCH_GROUP_PARENT(r)) -
-+		 (BCH_GROUP_PARENT(l) < BCH_GROUP_PARENT(r))) ?:
-+		strncmp(l->label, r->label, sizeof(l->label));
-+}
-+
-+static const char *bch2_sb_disk_groups_validate(struct bch_sb *sb,
-+						struct bch_sb_field *f)
-+{
-+	struct bch_sb_field_disk_groups *groups =
-+		field_to_type(f, disk_groups);
-+	struct bch_disk_group *g, *sorted = NULL;
-+	struct bch_sb_field_members *mi;
-+	struct bch_member *m;
-+	unsigned i, nr_groups, len;
-+	const char *err = NULL;
-+
-+	mi		= bch2_sb_get_members(sb);
-+	groups		= bch2_sb_get_disk_groups(sb);
-+	nr_groups	= disk_groups_nr(groups);
-+
-+	for (m = mi->members;
-+	     m < mi->members + sb->nr_devices;
-+	     m++) {
-+		unsigned g;
-+
-+		if (!BCH_MEMBER_GROUP(m))
-+			continue;
-+
-+		g = BCH_MEMBER_GROUP(m) - 1;
-+
-+		if (g >= nr_groups ||
-+		    BCH_GROUP_DELETED(&groups->entries[g]))
-+			return "disk has invalid group";
-+	}
-+
-+	if (!nr_groups)
-+		return NULL;
-+
-+	for (g = groups->entries;
-+	     g < groups->entries + nr_groups;
-+	     g++) {
-+		if (BCH_GROUP_DELETED(g))
-+			continue;
-+
-+		len = strnlen(g->label, sizeof(g->label));
-+		if (!len) {
-+			err = "group with empty label";
-+			goto err;
-+		}
-+	}
-+
-+	sorted = kmalloc_array(nr_groups, sizeof(*sorted), GFP_KERNEL);
-+	if (!sorted)
-+		return "cannot allocate memory";
-+
-+	memcpy(sorted, groups->entries, nr_groups * sizeof(*sorted));
-+	sort(sorted, nr_groups, sizeof(*sorted), group_cmp, NULL);
-+
-+	for (i = 0; i + 1 < nr_groups; i++)
-+		if (!BCH_GROUP_DELETED(sorted + i) &&
-+		    !group_cmp(sorted + i, sorted + i + 1)) {
-+			err = "duplicate groups";
-+			goto err;
-+		}
-+
-+	err = NULL;
-+err:
-+	kfree(sorted);
-+	return err;
-+}
-+
-+static void bch2_sb_disk_groups_to_text(struct printbuf *out,
-+					struct bch_sb *sb,
-+					struct bch_sb_field *f)
-+{
-+	struct bch_sb_field_disk_groups *groups =
-+		field_to_type(f, disk_groups);
-+	struct bch_disk_group *g;
-+	unsigned nr_groups = disk_groups_nr(groups);
-+
-+	for (g = groups->entries;
-+	     g < groups->entries + nr_groups;
-+	     g++) {
-+		if (g != groups->entries)
-+			pr_buf(out, " ");
-+
-+		if (BCH_GROUP_DELETED(g))
-+			pr_buf(out, "[deleted]");
-+		else
-+			pr_buf(out, "[parent %llu name %s]",
-+			       BCH_GROUP_PARENT(g), g->label);
-+	}
-+}
-+
-+const struct bch_sb_field_ops bch_sb_field_ops_disk_groups = {
-+	.validate	= bch2_sb_disk_groups_validate,
-+	.to_text	= bch2_sb_disk_groups_to_text
-+};
-+
-+int bch2_sb_disk_groups_to_cpu(struct bch_fs *c)
-+{
-+	struct bch_sb_field_members *mi;
-+	struct bch_sb_field_disk_groups *groups;
-+	struct bch_disk_groups_cpu *cpu_g, *old_g;
-+	unsigned i, g, nr_groups;
-+
-+	lockdep_assert_held(&c->sb_lock);
-+
-+	mi		= bch2_sb_get_members(c->disk_sb.sb);
-+	groups		= bch2_sb_get_disk_groups(c->disk_sb.sb);
-+	nr_groups	= disk_groups_nr(groups);
-+
-+	if (!groups)
-+		return 0;
-+
-+	cpu_g = kzalloc(sizeof(*cpu_g) +
-+			sizeof(cpu_g->entries[0]) * nr_groups, GFP_KERNEL);
-+	if (!cpu_g)
-+		return -ENOMEM;
-+
-+	cpu_g->nr = nr_groups;
-+
-+	for (i = 0; i < nr_groups; i++) {
-+		struct bch_disk_group *src	= &groups->entries[i];
-+		struct bch_disk_group_cpu *dst	= &cpu_g->entries[i];
-+
-+		dst->deleted	= BCH_GROUP_DELETED(src);
-+		dst->parent	= BCH_GROUP_PARENT(src);
-+	}
-+
-+	for (i = 0; i < c->disk_sb.sb->nr_devices; i++) {
-+		struct bch_member *m = mi->members + i;
-+		struct bch_disk_group_cpu *dst =
-+			&cpu_g->entries[BCH_MEMBER_GROUP(m)];
-+
-+		if (!bch2_member_exists(m))
-+			continue;
-+
-+		g = BCH_MEMBER_GROUP(m);
-+		while (g) {
-+			dst = &cpu_g->entries[g - 1];
-+			__set_bit(i, dst->devs.d);
-+			g = dst->parent;
-+		}
-+	}
-+
-+	old_g = rcu_dereference_protected(c->disk_groups,
-+				lockdep_is_held(&c->sb_lock));
-+	rcu_assign_pointer(c->disk_groups, cpu_g);
-+	if (old_g)
-+		kfree_rcu(old_g, rcu);
-+
-+	return 0;
-+}
-+
-+const struct bch_devs_mask *bch2_target_to_mask(struct bch_fs *c, unsigned target)
-+{
-+	struct target t = target_decode(target);
-+
-+	switch (t.type) {
-+	case TARGET_NULL:
-+		return NULL;
-+	case TARGET_DEV: {
-+		struct bch_dev *ca = t.dev < c->sb.nr_devices
-+			? rcu_dereference(c->devs[t.dev])
-+			: NULL;
-+		return ca ? &ca->self : NULL;
-+	}
-+	case TARGET_GROUP: {
-+		struct bch_disk_groups_cpu *g = rcu_dereference(c->disk_groups);
-+
-+		return g && t.group < g->nr && !g->entries[t.group].deleted
-+			? &g->entries[t.group].devs
-+			: NULL;
-+	}
-+	default:
-+		BUG();
-+	}
-+}
-+
-+bool bch2_dev_in_target(struct bch_fs *c, unsigned dev, unsigned target)
-+{
-+	struct target t = target_decode(target);
-+
-+	switch (t.type) {
-+	case TARGET_NULL:
-+		return false;
-+	case TARGET_DEV:
-+		return dev == t.dev;
-+	case TARGET_GROUP: {
-+		struct bch_disk_groups_cpu *g;
-+		const struct bch_devs_mask *m;
-+		bool ret;
-+
-+		rcu_read_lock();
-+		g = rcu_dereference(c->disk_groups);
-+		m = g && t.group < g->nr && !g->entries[t.group].deleted
-+			? &g->entries[t.group].devs
-+			: NULL;
-+
-+		ret = m ? test_bit(dev, m->d) : false;
-+		rcu_read_unlock();
-+
-+		return ret;
-+	}
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static int __bch2_disk_group_find(struct bch_sb_field_disk_groups *groups,
-+				  unsigned parent,
-+				  const char *name, unsigned namelen)
-+{
-+	unsigned i, nr_groups = disk_groups_nr(groups);
-+
-+	if (!namelen || namelen > BCH_SB_LABEL_SIZE)
-+		return -EINVAL;
-+
-+	for (i = 0; i < nr_groups; i++) {
-+		struct bch_disk_group *g = groups->entries + i;
-+
-+		if (BCH_GROUP_DELETED(g))
-+			continue;
-+
-+		if (!BCH_GROUP_DELETED(g) &&
-+		    BCH_GROUP_PARENT(g) == parent &&
-+		    strnlen(g->label, sizeof(g->label)) == namelen &&
-+		    !memcmp(name, g->label, namelen))
-+			return i;
-+	}
-+
-+	return -1;
-+}
-+
-+static int __bch2_disk_group_add(struct bch_sb_handle *sb, unsigned parent,
-+				 const char *name, unsigned namelen)
-+{
-+	struct bch_sb_field_disk_groups *groups =
-+		bch2_sb_get_disk_groups(sb->sb);
-+	unsigned i, nr_groups = disk_groups_nr(groups);
-+	struct bch_disk_group *g;
-+
-+	if (!namelen || namelen > BCH_SB_LABEL_SIZE)
-+		return -EINVAL;
-+
-+	for (i = 0;
-+	     i < nr_groups && !BCH_GROUP_DELETED(&groups->entries[i]);
-+	     i++)
-+		;
-+
-+	if (i == nr_groups) {
-+		unsigned u64s =
-+			(sizeof(struct bch_sb_field_disk_groups) +
-+			 sizeof(struct bch_disk_group) * (nr_groups + 1)) /
-+			sizeof(u64);
-+
-+		groups = bch2_sb_resize_disk_groups(sb, u64s);
-+		if (!groups)
-+			return -ENOSPC;
-+
-+		nr_groups = disk_groups_nr(groups);
-+	}
-+
-+	BUG_ON(i >= nr_groups);
-+
-+	g = &groups->entries[i];
-+
-+	memcpy(g->label, name, namelen);
-+	if (namelen < sizeof(g->label))
-+		g->label[namelen] = '\0';
-+	SET_BCH_GROUP_DELETED(g, 0);
-+	SET_BCH_GROUP_PARENT(g, parent);
-+	SET_BCH_GROUP_DATA_ALLOWED(g, ~0);
-+
-+	return i;
-+}
-+
-+int bch2_disk_path_find(struct bch_sb_handle *sb, const char *name)
-+{
-+	struct bch_sb_field_disk_groups *groups =
-+		bch2_sb_get_disk_groups(sb->sb);
-+	int v = -1;
-+
-+	do {
-+		const char *next = strchrnul(name, '.');
-+		unsigned len = next - name;
-+
-+		if (*next == '.')
-+			next++;
-+
-+		v = __bch2_disk_group_find(groups, v + 1, name, len);
-+		name = next;
-+	} while (*name && v >= 0);
-+
-+	return v;
-+}
-+
-+int bch2_disk_path_find_or_create(struct bch_sb_handle *sb, const char *name)
-+{
-+	struct bch_sb_field_disk_groups *groups;
-+	unsigned parent = 0;
-+	int v = -1;
-+
-+	do {
-+		const char *next = strchrnul(name, '.');
-+		unsigned len = next - name;
-+
-+		if (*next == '.')
-+			next++;
-+
-+		groups = bch2_sb_get_disk_groups(sb->sb);
-+
-+		v = __bch2_disk_group_find(groups, parent, name, len);
-+		if (v < 0)
-+			v = __bch2_disk_group_add(sb, parent, name, len);
-+		if (v < 0)
-+			return v;
-+
-+		parent = v + 1;
-+		name = next;
-+	} while (*name && v >= 0);
-+
-+	return v;
-+}
-+
-+void bch2_disk_path_to_text(struct printbuf *out,
-+			    struct bch_sb_handle *sb,
-+			    unsigned v)
-+{
-+	struct bch_sb_field_disk_groups *groups =
-+		bch2_sb_get_disk_groups(sb->sb);
-+	struct bch_disk_group *g;
-+	unsigned nr = 0;
-+	u16 path[32];
-+
-+	while (1) {
-+		if (nr == ARRAY_SIZE(path))
-+			goto inval;
-+
-+		if (v >= disk_groups_nr(groups))
-+			goto inval;
-+
-+		g = groups->entries + v;
-+
-+		if (BCH_GROUP_DELETED(g))
-+			goto inval;
-+
-+		path[nr++] = v;
-+
-+		if (!BCH_GROUP_PARENT(g))
-+			break;
-+
-+		v = BCH_GROUP_PARENT(g) - 1;
-+	}
-+
-+	while (nr) {
-+		v = path[--nr];
-+		g = groups->entries + v;
-+
-+		bch_scnmemcpy(out, g->label,
-+			      strnlen(g->label, sizeof(g->label)));
-+
-+		if (nr)
-+			pr_buf(out, ".");
-+	}
-+	return;
-+inval:
-+	pr_buf(out, "invalid group %u", v);
-+}
-+
-+int bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name)
-+{
-+	struct bch_member *mi;
-+	int v = -1;
-+	int ret = 0;
-+
-+	mutex_lock(&c->sb_lock);
-+
-+	if (!strlen(name) || !strcmp(name, "none"))
-+		goto write_sb;
-+
-+	v = bch2_disk_path_find_or_create(&c->disk_sb, name);
-+	if (v < 0) {
-+		mutex_unlock(&c->sb_lock);
-+		return v;
-+	}
-+
-+	ret = bch2_sb_disk_groups_to_cpu(c);
-+	if (ret)
-+		goto unlock;
-+write_sb:
-+	mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
-+	SET_BCH_MEMBER_GROUP(mi, v + 1);
-+
-+	bch2_write_super(c);
-+unlock:
-+	mutex_unlock(&c->sb_lock);
-+
-+	return ret;
-+}
-+
-+int bch2_opt_target_parse(struct bch_fs *c, const char *buf, u64 *v)
-+{
-+	struct bch_dev *ca;
-+	int g;
-+
-+	if (!strlen(buf) || !strcmp(buf, "none")) {
-+		*v = 0;
-+		return 0;
-+	}
-+
-+	/* Is it a device? */
-+	ca = bch2_dev_lookup(c, buf);
-+	if (!IS_ERR(ca)) {
-+		*v = dev_to_target(ca->dev_idx);
-+		percpu_ref_put(&ca->ref);
-+		return 0;
-+	}
-+
-+	mutex_lock(&c->sb_lock);
-+	g = bch2_disk_path_find(&c->disk_sb, buf);
-+	mutex_unlock(&c->sb_lock);
-+
-+	if (g >= 0) {
-+		*v = group_to_target(g);
-+		return 0;
-+	}
-+
-+	return -EINVAL;
-+}
-+
-+void bch2_opt_target_to_text(struct printbuf *out, struct bch_fs *c, u64 v)
-+{
-+	struct target t = target_decode(v);
-+
-+	switch (t.type) {
-+	case TARGET_NULL:
-+		pr_buf(out, "none");
-+		break;
-+	case TARGET_DEV: {
-+		struct bch_dev *ca;
-+
-+		rcu_read_lock();
-+		ca = t.dev < c->sb.nr_devices
-+			? rcu_dereference(c->devs[t.dev])
-+			: NULL;
-+
-+		if (ca && percpu_ref_tryget(&ca->io_ref)) {
-+			char b[BDEVNAME_SIZE];
-+
-+			pr_buf(out, "/dev/%s",
-+			     bdevname(ca->disk_sb.bdev, b));
-+			percpu_ref_put(&ca->io_ref);
-+		} else if (ca) {
-+			pr_buf(out, "offline device %u", t.dev);
-+		} else {
-+			pr_buf(out, "invalid device %u", t.dev);
-+		}
-+
-+		rcu_read_unlock();
-+		break;
-+	}
-+	case TARGET_GROUP:
-+		mutex_lock(&c->sb_lock);
-+		bch2_disk_path_to_text(out, &c->disk_sb, t.group);
-+		mutex_unlock(&c->sb_lock);
-+		break;
-+	default:
-+		BUG();
-+	}
-+}
-diff --git a/fs/bcachefs/disk_groups.h b/fs/bcachefs/disk_groups.h
-new file mode 100644
-index 000000000000..3d84f23c34ed
---- /dev/null
-+++ b/fs/bcachefs/disk_groups.h
-@@ -0,0 +1,91 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_DISK_GROUPS_H
-+#define _BCACHEFS_DISK_GROUPS_H
-+
-+extern const struct bch_sb_field_ops bch_sb_field_ops_disk_groups;
-+
-+static inline unsigned disk_groups_nr(struct bch_sb_field_disk_groups *groups)
-+{
-+	return groups
-+		? (vstruct_end(&groups->field) -
-+		   (void *) &groups->entries[0]) / sizeof(struct bch_disk_group)
-+		: 0;
-+}
-+
-+struct target {
-+	enum {
-+		TARGET_NULL,
-+		TARGET_DEV,
-+		TARGET_GROUP,
-+	}			type;
-+	union {
-+		unsigned	dev;
-+		unsigned	group;
-+	};
-+};
-+
-+#define TARGET_DEV_START	1
-+#define TARGET_GROUP_START	(256 + TARGET_DEV_START)
-+
-+static inline u16 dev_to_target(unsigned dev)
-+{
-+	return TARGET_DEV_START + dev;
-+}
-+
-+static inline u16 group_to_target(unsigned group)
-+{
-+	return TARGET_GROUP_START + group;
-+}
-+
-+static inline struct target target_decode(unsigned target)
-+{
-+	if (target >= TARGET_GROUP_START)
-+		return (struct target) {
-+			.type	= TARGET_GROUP,
-+			.group	= target - TARGET_GROUP_START
-+		};
-+
-+	if (target >= TARGET_DEV_START)
-+		return (struct target) {
-+			.type	= TARGET_DEV,
-+			.group	= target - TARGET_DEV_START
-+		};
-+
-+	return (struct target) { .type = TARGET_NULL };
-+}
-+
-+const struct bch_devs_mask *bch2_target_to_mask(struct bch_fs *, unsigned);
-+
-+static inline struct bch_devs_mask target_rw_devs(struct bch_fs *c,
-+						  enum bch_data_type data_type,
-+						  u16 target)
-+{
-+	struct bch_devs_mask devs = c->rw_devs[data_type];
-+	const struct bch_devs_mask *t = bch2_target_to_mask(c, target);
-+
-+	if (t)
-+		bitmap_and(devs.d, devs.d, t->d, BCH_SB_MEMBERS_MAX);
-+	return devs;
-+}
-+
-+bool bch2_dev_in_target(struct bch_fs *, unsigned, unsigned);
-+
-+int bch2_disk_path_find(struct bch_sb_handle *, const char *);
-+
-+/* Exported for userspace bcachefs-tools: */
-+int bch2_disk_path_find_or_create(struct bch_sb_handle *, const char *);
-+
-+void bch2_disk_path_to_text(struct printbuf *, struct bch_sb_handle *,
-+			    unsigned);
-+
-+int bch2_opt_target_parse(struct bch_fs *, const char *, u64 *);
-+void bch2_opt_target_to_text(struct printbuf *, struct bch_fs *, u64);
-+
-+int bch2_sb_disk_groups_to_cpu(struct bch_fs *);
-+
-+int bch2_dev_group_set(struct bch_fs *, struct bch_dev *, const char *);
-+
-+const char *bch2_sb_validate_disk_groups(struct bch_sb *,
-+					 struct bch_sb_field *);
-+
-+#endif /* _BCACHEFS_DISK_GROUPS_H */
-diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
-new file mode 100644
-index 000000000000..5514f65378ad
---- /dev/null
-+++ b/fs/bcachefs/ec.c
-@@ -0,0 +1,1639 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+/* erasure coding */
-+
-+#include "bcachefs.h"
-+#include "alloc_foreground.h"
-+#include "bkey_on_stack.h"
-+#include "bset.h"
-+#include "btree_gc.h"
-+#include "btree_update.h"
-+#include "buckets.h"
-+#include "disk_groups.h"
-+#include "ec.h"
-+#include "error.h"
-+#include "io.h"
-+#include "keylist.h"
-+#include "recovery.h"
-+#include "super-io.h"
-+#include "util.h"
-+
-+#include <linux/sort.h>
-+
-+#ifdef __KERNEL__
-+
-+#include <linux/raid/pq.h>
-+#include <linux/raid/xor.h>
-+
-+static void raid5_recov(unsigned disks, unsigned failed_idx,
-+			size_t size, void **data)
-+{
-+	unsigned i = 2, nr;
-+
-+	BUG_ON(failed_idx >= disks);
-+
-+	swap(data[0], data[failed_idx]);
-+	memcpy(data[0], data[1], size);
-+
-+	while (i < disks) {
-+		nr = min_t(unsigned, disks - i, MAX_XOR_BLOCKS);
-+		xor_blocks(nr, size, data[0], data + i);
-+		i += nr;
-+	}
-+
-+	swap(data[0], data[failed_idx]);
-+}
-+
-+static void raid_gen(int nd, int np, size_t size, void **v)
-+{
-+	if (np >= 1)
-+		raid5_recov(nd + np, nd, size, v);
-+	if (np >= 2)
-+		raid6_call.gen_syndrome(nd + np, size, v);
-+	BUG_ON(np > 2);
-+}
-+
-+static void raid_rec(int nr, int *ir, int nd, int np, size_t size, void **v)
-+{
-+	switch (nr) {
-+	case 0:
-+		break;
-+	case 1:
-+		if (ir[0] < nd + 1)
-+			raid5_recov(nd + 1, ir[0], size, v);
-+		else
-+			raid6_call.gen_syndrome(nd + np, size, v);
-+		break;
-+	case 2:
-+		if (ir[1] < nd) {
-+			/* data+data failure. */
-+			raid6_2data_recov(nd + np, size, ir[0], ir[1], v);
-+		} else if (ir[0] < nd) {
-+			/* data + p/q failure */
-+
-+			if (ir[1] == nd) /* data + p failure */
-+				raid6_datap_recov(nd + np, size, ir[0], v);
-+			else { /* data + q failure */
-+				raid5_recov(nd + 1, ir[0], size, v);
-+				raid6_call.gen_syndrome(nd + np, size, v);
-+			}
-+		} else {
-+			raid_gen(nd, np, size, v);
-+		}
-+		break;
-+	default:
-+		BUG();
-+	}
-+}
-+
-+#else
-+
-+#include <raid/raid.h>
-+
-+#endif
-+
-+struct ec_bio {
-+	struct bch_dev		*ca;
-+	struct ec_stripe_buf	*buf;
-+	size_t			idx;
-+	struct bio		bio;
-+};
-+
-+/* Stripes btree keys: */
-+
-+const char *bch2_stripe_invalid(const struct bch_fs *c, struct bkey_s_c k)
-+{
-+	const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
-+
-+	if (k.k->p.inode)
-+		return "invalid stripe key";
-+
-+	if (bkey_val_bytes(k.k) < sizeof(*s))
-+		return "incorrect value size";
-+
-+	if (bkey_val_bytes(k.k) < sizeof(*s) ||
-+	    bkey_val_u64s(k.k) < stripe_val_u64s(s))
-+		return "incorrect value size";
-+
-+	return bch2_bkey_ptrs_invalid(c, k);
-+}
-+
-+void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c,
-+			 struct bkey_s_c k)
-+{
-+	const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
-+	unsigned i;
-+
-+	pr_buf(out, "algo %u sectors %u blocks %u:%u csum %u gran %u",
-+	       s->algorithm,
-+	       le16_to_cpu(s->sectors),
-+	       s->nr_blocks - s->nr_redundant,
-+	       s->nr_redundant,
-+	       s->csum_type,
-+	       1U << s->csum_granularity_bits);
-+
-+	for (i = 0; i < s->nr_blocks; i++)
-+		pr_buf(out, " %u:%llu:%u", s->ptrs[i].dev,
-+		       (u64) s->ptrs[i].offset,
-+		       stripe_blockcount_get(s, i));
-+}
-+
-+static int ptr_matches_stripe(struct bch_fs *c,
-+			      struct bch_stripe *v,
-+			      const struct bch_extent_ptr *ptr)
-+{
-+	unsigned i;
-+
-+	for (i = 0; i < v->nr_blocks - v->nr_redundant; i++) {
-+		const struct bch_extent_ptr *ptr2 = v->ptrs + i;
-+
-+		if (ptr->dev == ptr2->dev &&
-+		    ptr->gen == ptr2->gen &&
-+		    ptr->offset >= ptr2->offset &&
-+		    ptr->offset <  ptr2->offset + le16_to_cpu(v->sectors))
-+			return i;
-+	}
-+
-+	return -1;
-+}
-+
-+static int extent_matches_stripe(struct bch_fs *c,
-+				 struct bch_stripe *v,
-+				 struct bkey_s_c k)
-+{
-+
-+	switch (k.k->type) {
-+	case KEY_TYPE_extent: {
-+		struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
-+		const struct bch_extent_ptr *ptr;
-+		int idx;
-+
-+		extent_for_each_ptr(e, ptr) {
-+			idx = ptr_matches_stripe(c, v, ptr);
-+			if (idx >= 0)
-+				return idx;
-+		}
-+		break;
-+	}
-+	}
-+
-+	return -1;
-+}
-+
-+static bool extent_has_stripe_ptr(struct bkey_s_c k, u64 idx)
-+{
-+	switch (k.k->type) {
-+	case KEY_TYPE_extent: {
-+		struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
-+		const union bch_extent_entry *entry;
-+
-+		extent_for_each_entry(e, entry)
-+			if (extent_entry_type(entry) ==
-+			    BCH_EXTENT_ENTRY_stripe_ptr &&
-+			    entry->stripe_ptr.idx == idx)
-+				return true;
-+
-+		break;
-+	}
-+	}
-+
-+	return false;
-+}
-+
-+/* Checksumming: */
-+
-+static void ec_generate_checksums(struct ec_stripe_buf *buf)
-+{
-+	struct bch_stripe *v = &buf->key.v;
-+	unsigned csum_granularity = 1 << v->csum_granularity_bits;
-+	unsigned csums_per_device = stripe_csums_per_device(v);
-+	unsigned csum_bytes = bch_crc_bytes[v->csum_type];
-+	unsigned i, j;
-+
-+	if (!csum_bytes)
-+		return;
-+
-+	BUG_ON(buf->offset);
-+	BUG_ON(buf->size != le16_to_cpu(v->sectors));
-+
-+	for (i = 0; i < v->nr_blocks; i++) {
-+		for (j = 0; j < csums_per_device; j++) {
-+			unsigned offset = j << v->csum_granularity_bits;
-+			unsigned len = min(csum_granularity, buf->size - offset);
-+
-+			struct bch_csum csum =
-+				bch2_checksum(NULL, v->csum_type,
-+					      null_nonce(),
-+					      buf->data[i] + (offset << 9),
-+					      len << 9);
-+
-+			memcpy(stripe_csum(v, i, j), &csum, csum_bytes);
-+		}
-+	}
-+}
-+
-+static void ec_validate_checksums(struct bch_fs *c, struct ec_stripe_buf *buf)
-+{
-+	struct bch_stripe *v = &buf->key.v;
-+	unsigned csum_granularity = 1 << v->csum_granularity_bits;
-+	unsigned csum_bytes = bch_crc_bytes[v->csum_type];
-+	unsigned i;
-+
-+	if (!csum_bytes)
-+		return;
-+
-+	for (i = 0; i < v->nr_blocks; i++) {
-+		unsigned offset = buf->offset;
-+		unsigned end = buf->offset + buf->size;
-+
-+		if (!test_bit(i, buf->valid))
-+			continue;
-+
-+		while (offset < end) {
-+			unsigned j = offset >> v->csum_granularity_bits;
-+			unsigned len = min(csum_granularity, end - offset);
-+			struct bch_csum csum;
-+
-+			BUG_ON(offset & (csum_granularity - 1));
-+			BUG_ON(offset + len != le16_to_cpu(v->sectors) &&
-+			       ((offset + len) & (csum_granularity - 1)));
-+
-+			csum = bch2_checksum(NULL, v->csum_type,
-+					     null_nonce(),
-+					     buf->data[i] + ((offset - buf->offset) << 9),
-+					     len << 9);
-+
-+			if (memcmp(stripe_csum(v, i, j), &csum, csum_bytes)) {
-+				__bcache_io_error(c,
-+					"checksum error while doing reconstruct read (%u:%u)",
-+					i, j);
-+				clear_bit(i, buf->valid);
-+				break;
-+			}
-+
-+			offset += len;
-+		}
-+	}
-+}
-+
-+/* Erasure coding: */
-+
-+static void ec_generate_ec(struct ec_stripe_buf *buf)
-+{
-+	struct bch_stripe *v = &buf->key.v;
-+	unsigned nr_data = v->nr_blocks - v->nr_redundant;
-+	unsigned bytes = le16_to_cpu(v->sectors) << 9;
-+
-+	raid_gen(nr_data, v->nr_redundant, bytes, buf->data);
-+}
-+
-+static unsigned __ec_nr_failed(struct ec_stripe_buf *buf, unsigned nr)
-+{
-+	return nr - bitmap_weight(buf->valid, nr);
-+}
-+
-+static unsigned ec_nr_failed(struct ec_stripe_buf *buf)
-+{
-+	return __ec_nr_failed(buf, buf->key.v.nr_blocks);
-+}
-+
-+static int ec_do_recov(struct bch_fs *c, struct ec_stripe_buf *buf)
-+{
-+	struct bch_stripe *v = &buf->key.v;
-+	unsigned i, failed[EC_STRIPE_MAX], nr_failed = 0;
-+	unsigned nr_data = v->nr_blocks - v->nr_redundant;
-+	unsigned bytes = buf->size << 9;
-+
-+	if (ec_nr_failed(buf) > v->nr_redundant) {
-+		__bcache_io_error(c,
-+			"error doing reconstruct read: unable to read enough blocks");
-+		return -1;
-+	}
-+
-+	for (i = 0; i < nr_data; i++)
-+		if (!test_bit(i, buf->valid))
-+			failed[nr_failed++] = i;
-+
-+	raid_rec(nr_failed, failed, nr_data, v->nr_redundant, bytes, buf->data);
-+	return 0;
-+}
-+
-+/* IO: */
-+
-+static void ec_block_endio(struct bio *bio)
-+{
-+	struct ec_bio *ec_bio = container_of(bio, struct ec_bio, bio);
-+	struct bch_dev *ca = ec_bio->ca;
-+	struct closure *cl = bio->bi_private;
-+
-+	if (bch2_dev_io_err_on(bio->bi_status, ca, "erasure coding %s: %s",
-+			       bio_data_dir(bio) ? "write" : "read",
-+			       bch2_blk_status_to_str(bio->bi_status)))
-+		clear_bit(ec_bio->idx, ec_bio->buf->valid);
-+
-+	bio_put(&ec_bio->bio);
-+	percpu_ref_put(&ca->io_ref);
-+	closure_put(cl);
-+}
-+
-+static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf,
-+			unsigned rw, unsigned idx, struct closure *cl)
-+{
-+	struct bch_stripe *v = &buf->key.v;
-+	unsigned offset = 0, bytes = buf->size << 9;
-+	struct bch_extent_ptr *ptr = &v->ptrs[idx];
-+	struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-+
-+	if (!bch2_dev_get_ioref(ca, rw)) {
-+		clear_bit(idx, buf->valid);
-+		return;
-+	}
-+
-+	while (offset < bytes) {
-+		unsigned nr_iovecs = min_t(size_t, BIO_MAX_PAGES,
-+					   DIV_ROUND_UP(bytes, PAGE_SIZE));
-+		unsigned b = min_t(size_t, bytes - offset,
-+				   nr_iovecs << PAGE_SHIFT);
-+		struct ec_bio *ec_bio;
-+
-+		ec_bio = container_of(bio_alloc_bioset(GFP_KERNEL, nr_iovecs,
-+						       &c->ec_bioset),
-+				      struct ec_bio, bio);
-+
-+		ec_bio->ca			= ca;
-+		ec_bio->buf			= buf;
-+		ec_bio->idx			= idx;
-+
-+		bio_set_dev(&ec_bio->bio, ca->disk_sb.bdev);
-+		bio_set_op_attrs(&ec_bio->bio, rw, 0);
-+
-+		ec_bio->bio.bi_iter.bi_sector	= ptr->offset + buf->offset + (offset >> 9);
-+		ec_bio->bio.bi_end_io		= ec_block_endio;
-+		ec_bio->bio.bi_private		= cl;
-+
-+		bch2_bio_map(&ec_bio->bio, buf->data[idx] + offset, b);
-+
-+		closure_get(cl);
-+		percpu_ref_get(&ca->io_ref);
-+
-+		submit_bio(&ec_bio->bio);
-+
-+		offset += b;
-+	}
-+
-+	percpu_ref_put(&ca->io_ref);
-+}
-+
-+/* recovery read path: */
-+int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct ec_stripe_buf *buf;
-+	struct closure cl;
-+	struct bkey_s_c k;
-+	struct bch_stripe *v;
-+	unsigned stripe_idx;
-+	unsigned offset, end;
-+	unsigned i, nr_data, csum_granularity;
-+	int ret = 0, idx;
-+
-+	closure_init_stack(&cl);
-+
-+	BUG_ON(!rbio->pick.has_ec);
-+
-+	stripe_idx = rbio->pick.ec.idx;
-+
-+	buf = kzalloc(sizeof(*buf), GFP_NOIO);
-+	if (!buf)
-+		return -ENOMEM;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_EC,
-+				   POS(0, stripe_idx),
-+				   BTREE_ITER_SLOTS);
-+	k = bch2_btree_iter_peek_slot(iter);
-+	if (bkey_err(k) || k.k->type != KEY_TYPE_stripe) {
-+		__bcache_io_error(c,
-+			"error doing reconstruct read: stripe not found");
-+		kfree(buf);
-+		return bch2_trans_exit(&trans) ?: -EIO;
-+	}
-+
-+	bkey_reassemble(&buf->key.k_i, k);
-+	bch2_trans_exit(&trans);
-+
-+	v = &buf->key.v;
-+
-+	nr_data = v->nr_blocks - v->nr_redundant;
-+
-+	idx = ptr_matches_stripe(c, v, &rbio->pick.ptr);
-+	BUG_ON(idx < 0);
-+
-+	csum_granularity = 1U << v->csum_granularity_bits;
-+
-+	offset	= rbio->bio.bi_iter.bi_sector - v->ptrs[idx].offset;
-+	end	= offset + bio_sectors(&rbio->bio);
-+
-+	BUG_ON(end > le16_to_cpu(v->sectors));
-+
-+	buf->offset	= round_down(offset, csum_granularity);
-+	buf->size	= min_t(unsigned, le16_to_cpu(v->sectors),
-+				round_up(end, csum_granularity)) - buf->offset;
-+
-+	for (i = 0; i < v->nr_blocks; i++) {
-+		buf->data[i] = kmalloc(buf->size << 9, GFP_NOIO);
-+		if (!buf->data[i]) {
-+			ret = -ENOMEM;
-+			goto err;
-+		}
-+	}
-+
-+	memset(buf->valid, 0xFF, sizeof(buf->valid));
-+
-+	for (i = 0; i < v->nr_blocks; i++) {
-+		struct bch_extent_ptr *ptr = v->ptrs + i;
-+		struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-+
-+		if (ptr_stale(ca, ptr)) {
-+			__bcache_io_error(c,
-+					  "error doing reconstruct read: stale pointer");
-+			clear_bit(i, buf->valid);
-+			continue;
-+		}
-+
-+		ec_block_io(c, buf, REQ_OP_READ, i, &cl);
-+	}
-+
-+	closure_sync(&cl);
-+
-+	if (ec_nr_failed(buf) > v->nr_redundant) {
-+		__bcache_io_error(c,
-+			"error doing reconstruct read: unable to read enough blocks");
-+		ret = -EIO;
-+		goto err;
-+	}
-+
-+	ec_validate_checksums(c, buf);
-+
-+	ret = ec_do_recov(c, buf);
-+	if (ret)
-+		goto err;
-+
-+	memcpy_to_bio(&rbio->bio, rbio->bio.bi_iter,
-+		      buf->data[idx] + ((offset - buf->offset) << 9));
-+err:
-+	for (i = 0; i < v->nr_blocks; i++)
-+		kfree(buf->data[i]);
-+	kfree(buf);
-+	return ret;
-+}
-+
-+/* stripe bucket accounting: */
-+
-+static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp)
-+{
-+	ec_stripes_heap n, *h = &c->ec_stripes_heap;
-+
-+	if (idx >= h->size) {
-+		if (!init_heap(&n, max(1024UL, roundup_pow_of_two(idx + 1)), gfp))
-+			return -ENOMEM;
-+
-+		spin_lock(&c->ec_stripes_heap_lock);
-+		if (n.size > h->size) {
-+			memcpy(n.data, h->data, h->used * sizeof(h->data[0]));
-+			n.used = h->used;
-+			swap(*h, n);
-+		}
-+		spin_unlock(&c->ec_stripes_heap_lock);
-+
-+		free_heap(&n);
-+	}
-+
-+	if (!genradix_ptr_alloc(&c->stripes[0], idx, gfp))
-+		return -ENOMEM;
-+
-+	if (c->gc_pos.phase != GC_PHASE_NOT_RUNNING &&
-+	    !genradix_ptr_alloc(&c->stripes[1], idx, gfp))
-+		return -ENOMEM;
-+
-+	return 0;
-+}
-+
-+static int ec_stripe_mem_alloc(struct bch_fs *c,
-+			       struct btree_iter *iter)
-+{
-+	size_t idx = iter->pos.offset;
-+	int ret = 0;
-+
-+	if (!__ec_stripe_mem_alloc(c, idx, GFP_NOWAIT|__GFP_NOWARN))
-+		return ret;
-+
-+	bch2_trans_unlock(iter->trans);
-+	ret = -EINTR;
-+
-+	if (!__ec_stripe_mem_alloc(c, idx, GFP_KERNEL))
-+		return ret;
-+
-+	return -ENOMEM;
-+}
-+
-+static ssize_t stripe_idx_to_delete(struct bch_fs *c)
-+{
-+	ec_stripes_heap *h = &c->ec_stripes_heap;
-+
-+	return h->used && h->data[0].blocks_nonempty == 0
-+		? h->data[0].idx : -1;
-+}
-+
-+static inline int ec_stripes_heap_cmp(ec_stripes_heap *h,
-+				      struct ec_stripe_heap_entry l,
-+				      struct ec_stripe_heap_entry r)
-+{
-+	return ((l.blocks_nonempty > r.blocks_nonempty) -
-+		(l.blocks_nonempty < r.blocks_nonempty));
-+}
-+
-+static inline void ec_stripes_heap_set_backpointer(ec_stripes_heap *h,
-+						   size_t i)
-+{
-+	struct bch_fs *c = container_of(h, struct bch_fs, ec_stripes_heap);
-+
-+	genradix_ptr(&c->stripes[0], h->data[i].idx)->heap_idx = i;
-+}
-+
-+static void heap_verify_backpointer(struct bch_fs *c, size_t idx)
-+{
-+	ec_stripes_heap *h = &c->ec_stripes_heap;
-+	struct stripe *m = genradix_ptr(&c->stripes[0], idx);
-+
-+	BUG_ON(!m->alive);
-+	BUG_ON(m->heap_idx >= h->used);
-+	BUG_ON(h->data[m->heap_idx].idx != idx);
-+}
-+
-+void bch2_stripes_heap_del(struct bch_fs *c,
-+			   struct stripe *m, size_t idx)
-+{
-+	if (!m->on_heap)
-+		return;
-+
-+	m->on_heap = false;
-+
-+	heap_verify_backpointer(c, idx);
-+
-+	heap_del(&c->ec_stripes_heap, m->heap_idx,
-+		 ec_stripes_heap_cmp,
-+		 ec_stripes_heap_set_backpointer);
-+}
-+
-+void bch2_stripes_heap_insert(struct bch_fs *c,
-+			      struct stripe *m, size_t idx)
-+{
-+	if (m->on_heap)
-+		return;
-+
-+	BUG_ON(heap_full(&c->ec_stripes_heap));
-+
-+	m->on_heap = true;
-+
-+	heap_add(&c->ec_stripes_heap, ((struct ec_stripe_heap_entry) {
-+			.idx = idx,
-+			.blocks_nonempty = m->blocks_nonempty,
-+		}),
-+		 ec_stripes_heap_cmp,
-+		 ec_stripes_heap_set_backpointer);
-+
-+	heap_verify_backpointer(c, idx);
-+}
-+
-+void bch2_stripes_heap_update(struct bch_fs *c,
-+			      struct stripe *m, size_t idx)
-+{
-+	ec_stripes_heap *h = &c->ec_stripes_heap;
-+	size_t i;
-+
-+	if (!m->on_heap)
-+		return;
-+
-+	heap_verify_backpointer(c, idx);
-+
-+	h->data[m->heap_idx].blocks_nonempty = m->blocks_nonempty;
-+
-+	i = m->heap_idx;
-+	heap_sift_up(h,	  i, ec_stripes_heap_cmp,
-+		     ec_stripes_heap_set_backpointer);
-+	heap_sift_down(h, i, ec_stripes_heap_cmp,
-+		       ec_stripes_heap_set_backpointer);
-+
-+	heap_verify_backpointer(c, idx);
-+
-+	if (stripe_idx_to_delete(c) >= 0 &&
-+	    !percpu_ref_is_dying(&c->writes))
-+		schedule_work(&c->ec_stripe_delete_work);
-+}
-+
-+/* stripe deletion */
-+
-+static int ec_stripe_delete(struct bch_fs *c, size_t idx)
-+{
-+	//pr_info("deleting stripe %zu", idx);
-+	return bch2_btree_delete_range(c, BTREE_ID_EC,
-+				       POS(0, idx),
-+				       POS(0, idx + 1),
-+				       NULL);
-+}
-+
-+static void ec_stripe_delete_work(struct work_struct *work)
-+{
-+	struct bch_fs *c =
-+		container_of(work, struct bch_fs, ec_stripe_delete_work);
-+	ssize_t idx;
-+
-+	while (1) {
-+		spin_lock(&c->ec_stripes_heap_lock);
-+		idx = stripe_idx_to_delete(c);
-+		if (idx < 0) {
-+			spin_unlock(&c->ec_stripes_heap_lock);
-+			break;
-+		}
-+
-+		bch2_stripes_heap_del(c, genradix_ptr(&c->stripes[0], idx), idx);
-+		spin_unlock(&c->ec_stripes_heap_lock);
-+
-+		if (ec_stripe_delete(c, idx))
-+			break;
-+	}
-+}
-+
-+/* stripe creation: */
-+
-+static int ec_stripe_bkey_insert(struct bch_fs *c,
-+				 struct bkey_i_stripe *stripe)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct bpos start_pos = POS(0, c->ec_stripe_hint);
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+retry:
-+	bch2_trans_begin(&trans);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_EC, start_pos,
-+			   BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
-+		if (bkey_cmp(k.k->p, POS(0, U32_MAX)) > 0) {
-+			if (start_pos.offset) {
-+				start_pos = POS_MIN;
-+				bch2_btree_iter_set_pos(iter, start_pos);
-+				continue;
-+			}
-+
-+			ret = -ENOSPC;
-+			break;
-+		}
-+
-+		if (bkey_deleted(k.k))
-+			goto found_slot;
-+	}
-+
-+	goto err;
-+found_slot:
-+	start_pos = iter->pos;
-+
-+	ret = ec_stripe_mem_alloc(c, iter);
-+	if (ret)
-+		goto err;
-+
-+	stripe->k.p = iter->pos;
-+
-+	bch2_trans_update(&trans, iter, &stripe->k_i, 0);
-+
-+	ret = bch2_trans_commit(&trans, NULL, NULL,
-+				BTREE_INSERT_NOFAIL);
-+err:
-+	bch2_trans_iter_put(&trans, iter);
-+
-+	if (ret == -EINTR)
-+		goto retry;
-+
-+	c->ec_stripe_hint = ret ? start_pos.offset : start_pos.offset + 1;
-+	bch2_trans_exit(&trans);
-+
-+	return ret;
-+}
-+
-+static void extent_stripe_ptr_add(struct bkey_s_extent e,
-+				  struct ec_stripe_buf *s,
-+				  struct bch_extent_ptr *ptr,
-+				  unsigned block)
-+{
-+	struct bch_extent_stripe_ptr *dst = (void *) ptr;
-+	union bch_extent_entry *end = extent_entry_last(e);
-+
-+	memmove_u64s_up(dst + 1, dst, (u64 *) end - (u64 *) dst);
-+	e.k->u64s += sizeof(*dst) / sizeof(u64);
-+
-+	*dst = (struct bch_extent_stripe_ptr) {
-+		.type = 1 << BCH_EXTENT_ENTRY_stripe_ptr,
-+		.block		= block,
-+		.idx		= s->key.k.p.offset,
-+	};
-+}
-+
-+static int ec_stripe_update_ptrs(struct bch_fs *c,
-+				 struct ec_stripe_buf *s,
-+				 struct bkey *pos)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct bkey_s_extent e;
-+	struct bkey_on_stack sk;
-+	int ret = 0, dev, idx;
-+
-+	bkey_on_stack_init(&sk);
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-+
-+	/* XXX this doesn't support the reflink btree */
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
-+				   bkey_start_pos(pos),
-+				   BTREE_ITER_INTENT);
-+
-+	while ((k = bch2_btree_iter_peek(iter)).k &&
-+	       !(ret = bkey_err(k)) &&
-+	       bkey_cmp(bkey_start_pos(k.k), pos->p) < 0) {
-+		struct bch_extent_ptr *ptr, *ec_ptr = NULL;
-+
-+		if (extent_has_stripe_ptr(k, s->key.k.p.offset)) {
-+			bch2_btree_iter_next(iter);
-+			continue;
-+		}
-+
-+		idx = extent_matches_stripe(c, &s->key.v, k);
-+		if (idx < 0) {
-+			bch2_btree_iter_next(iter);
-+			continue;
-+		}
-+
-+		dev = s->key.v.ptrs[idx].dev;
-+
-+		bkey_on_stack_reassemble(&sk, c, k);
-+		e = bkey_i_to_s_extent(sk.k);
-+
-+		bch2_bkey_drop_ptrs(e.s, ptr, ptr->dev != dev);
-+		ec_ptr = (void *) bch2_bkey_has_device(e.s_c, dev);
-+		BUG_ON(!ec_ptr);
-+
-+		extent_stripe_ptr_add(e, s, ec_ptr, idx);
-+
-+		bch2_btree_iter_set_pos(iter, bkey_start_pos(&sk.k->k));
-+		bch2_trans_update(&trans, iter, sk.k, 0);
-+
-+		ret = bch2_trans_commit(&trans, NULL, NULL,
-+					BTREE_INSERT_NOFAIL|
-+					BTREE_INSERT_USE_RESERVE);
-+		if (ret == -EINTR)
-+			ret = 0;
-+		if (ret)
-+			break;
-+	}
-+
-+	bch2_trans_exit(&trans);
-+	bkey_on_stack_exit(&sk, c);
-+
-+	return ret;
-+}
-+
-+/*
-+ * data buckets of new stripe all written: create the stripe
-+ */
-+static void ec_stripe_create(struct ec_stripe_new *s)
-+{
-+	struct bch_fs *c = s->c;
-+	struct open_bucket *ob;
-+	struct bkey_i *k;
-+	struct stripe *m;
-+	struct bch_stripe *v = &s->stripe.key.v;
-+	unsigned i, nr_data = v->nr_blocks - v->nr_redundant;
-+	struct closure cl;
-+	int ret;
-+
-+	BUG_ON(s->h->s == s);
-+
-+	closure_init_stack(&cl);
-+
-+	if (s->err) {
-+		if (s->err != -EROFS)
-+			bch_err(c, "error creating stripe: error writing data buckets");
-+		goto err;
-+	}
-+
-+	BUG_ON(!s->allocated);
-+
-+	if (!percpu_ref_tryget(&c->writes))
-+		goto err;
-+
-+	BUG_ON(bitmap_weight(s->blocks_allocated,
-+			     s->blocks.nr) != s->blocks.nr);
-+
-+	ec_generate_ec(&s->stripe);
-+
-+	ec_generate_checksums(&s->stripe);
-+
-+	/* write p/q: */
-+	for (i = nr_data; i < v->nr_blocks; i++)
-+		ec_block_io(c, &s->stripe, REQ_OP_WRITE, i, &cl);
-+
-+	closure_sync(&cl);
-+
-+	for (i = nr_data; i < v->nr_blocks; i++)
-+		if (!test_bit(i, s->stripe.valid)) {
-+			bch_err(c, "error creating stripe: error writing redundancy buckets");
-+			goto err_put_writes;
-+		}
-+
-+	ret = s->existing_stripe
-+		? bch2_btree_insert(c, BTREE_ID_EC, &s->stripe.key.k_i,
-+				    NULL, NULL, BTREE_INSERT_NOFAIL)
-+		: ec_stripe_bkey_insert(c, &s->stripe.key);
-+	if (ret) {
-+		bch_err(c, "error creating stripe: error creating stripe key");
-+		goto err_put_writes;
-+	}
-+
-+	for_each_keylist_key(&s->keys, k) {
-+		ret = ec_stripe_update_ptrs(c, &s->stripe, &k->k);
-+		if (ret) {
-+			bch_err(c, "error creating stripe: error updating pointers");
-+			break;
-+		}
-+	}
-+
-+	spin_lock(&c->ec_stripes_heap_lock);
-+	m = genradix_ptr(&c->stripes[0], s->stripe.key.k.p.offset);
-+#if 0
-+	pr_info("created a %s stripe %llu",
-+		s->existing_stripe ? "existing" : "new",
-+		s->stripe.key.k.p.offset);
-+#endif
-+	BUG_ON(m->on_heap);
-+	bch2_stripes_heap_insert(c, m, s->stripe.key.k.p.offset);
-+	spin_unlock(&c->ec_stripes_heap_lock);
-+err_put_writes:
-+	percpu_ref_put(&c->writes);
-+err:
-+	open_bucket_for_each(c, &s->blocks, ob, i) {
-+		ob->ec = NULL;
-+		__bch2_open_bucket_put(c, ob);
-+	}
-+
-+	bch2_open_buckets_put(c, &s->parity);
-+
-+	bch2_keylist_free(&s->keys, s->inline_keys);
-+
-+	for (i = 0; i < s->stripe.key.v.nr_blocks; i++)
-+		kvpfree(s->stripe.data[i], s->stripe.size << 9);
-+	kfree(s);
-+}
-+
-+static void ec_stripe_create_work(struct work_struct *work)
-+{
-+	struct bch_fs *c = container_of(work,
-+		struct bch_fs, ec_stripe_create_work);
-+	struct ec_stripe_new *s, *n;
-+restart:
-+	mutex_lock(&c->ec_stripe_new_lock);
-+	list_for_each_entry_safe(s, n, &c->ec_stripe_new_list, list)
-+		if (!atomic_read(&s->pin)) {
-+			list_del(&s->list);
-+			mutex_unlock(&c->ec_stripe_new_lock);
-+			ec_stripe_create(s);
-+			goto restart;
-+		}
-+	mutex_unlock(&c->ec_stripe_new_lock);
-+}
-+
-+static void ec_stripe_new_put(struct bch_fs *c, struct ec_stripe_new *s)
-+{
-+	BUG_ON(atomic_read(&s->pin) <= 0);
-+
-+	if (atomic_dec_and_test(&s->pin)) {
-+		BUG_ON(!s->pending);
-+		queue_work(system_long_wq, &c->ec_stripe_create_work);
-+	}
-+}
-+
-+static void ec_stripe_set_pending(struct bch_fs *c, struct ec_stripe_head *h)
-+{
-+	struct ec_stripe_new *s = h->s;
-+
-+	BUG_ON(!s->allocated && !s->err);
-+
-+	h->s		= NULL;
-+	s->pending	= true;
-+
-+	mutex_lock(&c->ec_stripe_new_lock);
-+	list_add(&s->list, &c->ec_stripe_new_list);
-+	mutex_unlock(&c->ec_stripe_new_lock);
-+
-+	ec_stripe_new_put(c, s);
-+}
-+
-+/* have a full bucket - hand it off to be erasure coded: */
-+void bch2_ec_bucket_written(struct bch_fs *c, struct open_bucket *ob)
-+{
-+	struct ec_stripe_new *s = ob->ec;
-+
-+	if (ob->sectors_free)
-+		s->err = -1;
-+
-+	ec_stripe_new_put(c, s);
-+}
-+
-+void bch2_ec_bucket_cancel(struct bch_fs *c, struct open_bucket *ob)
-+{
-+	struct ec_stripe_new *s = ob->ec;
-+
-+	s->err = -EIO;
-+}
-+
-+void *bch2_writepoint_ec_buf(struct bch_fs *c, struct write_point *wp)
-+{
-+	struct open_bucket *ob = ec_open_bucket(c, &wp->ptrs);
-+	struct bch_dev *ca;
-+	unsigned offset;
-+
-+	if (!ob)
-+		return NULL;
-+
-+	ca	= bch_dev_bkey_exists(c, ob->ptr.dev);
-+	offset	= ca->mi.bucket_size - ob->sectors_free;
-+
-+	return ob->ec->stripe.data[ob->ec_idx] + (offset << 9);
-+}
-+
-+void bch2_ec_add_backpointer(struct bch_fs *c, struct write_point *wp,
-+			     struct bpos pos, unsigned sectors)
-+{
-+	struct open_bucket *ob = ec_open_bucket(c, &wp->ptrs);
-+	struct ec_stripe_new *ec;
-+
-+	if (!ob)
-+		return;
-+
-+	//pr_info("adding backpointer at %llu:%llu", pos.inode, pos.offset);
-+
-+	ec = ob->ec;
-+	mutex_lock(&ec->lock);
-+
-+	if (bch2_keylist_realloc(&ec->keys, ec->inline_keys,
-+				 ARRAY_SIZE(ec->inline_keys),
-+				 BKEY_U64s)) {
-+		BUG();
-+	}
-+
-+	bkey_init(&ec->keys.top->k);
-+	ec->keys.top->k.p	= pos;
-+	bch2_key_resize(&ec->keys.top->k, sectors);
-+	bch2_keylist_push(&ec->keys);
-+
-+	mutex_unlock(&ec->lock);
-+}
-+
-+static int unsigned_cmp(const void *_l, const void *_r)
-+{
-+	unsigned l = *((const unsigned *) _l);
-+	unsigned r = *((const unsigned *) _r);
-+
-+	return cmp_int(l, r);
-+}
-+
-+/* pick most common bucket size: */
-+static unsigned pick_blocksize(struct bch_fs *c,
-+			       struct bch_devs_mask *devs)
-+{
-+	struct bch_dev *ca;
-+	unsigned i, nr = 0, sizes[BCH_SB_MEMBERS_MAX];
-+	struct {
-+		unsigned nr, size;
-+	} cur = { 0, 0 }, best = { 0, 0 };
-+
-+	for_each_member_device_rcu(ca, c, i, devs)
-+		sizes[nr++] = ca->mi.bucket_size;
-+
-+	sort(sizes, nr, sizeof(unsigned), unsigned_cmp, NULL);
-+
-+	for (i = 0; i < nr; i++) {
-+		if (sizes[i] != cur.size) {
-+			if (cur.nr > best.nr)
-+				best = cur;
-+
-+			cur.nr = 0;
-+			cur.size = sizes[i];
-+		}
-+
-+		cur.nr++;
-+	}
-+
-+	if (cur.nr > best.nr)
-+		best = cur;
-+
-+	return best.size;
-+}
-+
-+static bool may_create_new_stripe(struct bch_fs *c)
-+{
-+	return false;
-+}
-+
-+static void ec_stripe_key_init(struct bch_fs *c,
-+			       struct bkey_i_stripe *s,
-+			       unsigned nr_data,
-+			       unsigned nr_parity,
-+			       unsigned stripe_size)
-+{
-+	unsigned u64s;
-+
-+	bkey_stripe_init(&s->k_i);
-+	s->v.sectors			= cpu_to_le16(stripe_size);
-+	s->v.algorithm			= 0;
-+	s->v.nr_blocks			= nr_data + nr_parity;
-+	s->v.nr_redundant		= nr_parity;
-+	s->v.csum_granularity_bits	= ilog2(c->sb.encoded_extent_max);
-+	s->v.csum_type			= BCH_CSUM_CRC32C;
-+	s->v.pad			= 0;
-+
-+	while ((u64s = stripe_val_u64s(&s->v)) > BKEY_VAL_U64s_MAX) {
-+		BUG_ON(1 << s->v.csum_granularity_bits >=
-+		       le16_to_cpu(s->v.sectors) ||
-+		       s->v.csum_granularity_bits == U8_MAX);
-+		s->v.csum_granularity_bits++;
-+	}
-+
-+	set_bkey_val_u64s(&s->k, u64s);
-+}
-+
-+static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
-+{
-+	struct ec_stripe_new *s;
-+	unsigned i;
-+
-+	lockdep_assert_held(&h->lock);
-+
-+	s = kzalloc(sizeof(*s), GFP_KERNEL);
-+	if (!s)
-+		return -ENOMEM;
-+
-+	mutex_init(&s->lock);
-+	atomic_set(&s->pin, 1);
-+	s->c		= c;
-+	s->h		= h;
-+	s->nr_data	= min_t(unsigned, h->nr_active_devs,
-+				EC_STRIPE_MAX) - h->redundancy;
-+	s->nr_parity	= h->redundancy;
-+
-+	bch2_keylist_init(&s->keys, s->inline_keys);
-+
-+	s->stripe.offset	= 0;
-+	s->stripe.size		= h->blocksize;
-+	memset(s->stripe.valid, 0xFF, sizeof(s->stripe.valid));
-+
-+	ec_stripe_key_init(c, &s->stripe.key, s->nr_data,
-+			   s->nr_parity, h->blocksize);
-+
-+	for (i = 0; i < s->stripe.key.v.nr_blocks; i++) {
-+		s->stripe.data[i] = kvpmalloc(s->stripe.size << 9, GFP_KERNEL);
-+		if (!s->stripe.data[i])
-+			goto err;
-+	}
-+
-+	h->s = s;
-+
-+	return 0;
-+err:
-+	for (i = 0; i < s->stripe.key.v.nr_blocks; i++)
-+		kvpfree(s->stripe.data[i], s->stripe.size << 9);
-+	kfree(s);
-+	return -ENOMEM;
-+}
-+
-+static struct ec_stripe_head *
-+ec_new_stripe_head_alloc(struct bch_fs *c, unsigned target,
-+			 unsigned algo, unsigned redundancy)
-+{
-+	struct ec_stripe_head *h;
-+	struct bch_dev *ca;
-+	unsigned i;
-+
-+	h = kzalloc(sizeof(*h), GFP_KERNEL);
-+	if (!h)
-+		return NULL;
-+
-+	mutex_init(&h->lock);
-+	mutex_lock(&h->lock);
-+
-+	h->target	= target;
-+	h->algo		= algo;
-+	h->redundancy	= redundancy;
-+
-+	rcu_read_lock();
-+	h->devs = target_rw_devs(c, BCH_DATA_user, target);
-+
-+	for_each_member_device_rcu(ca, c, i, &h->devs)
-+		if (!ca->mi.durability)
-+			__clear_bit(i, h->devs.d);
-+
-+	h->blocksize = pick_blocksize(c, &h->devs);
-+
-+	for_each_member_device_rcu(ca, c, i, &h->devs)
-+		if (ca->mi.bucket_size == h->blocksize)
-+			h->nr_active_devs++;
-+
-+	rcu_read_unlock();
-+	list_add(&h->list, &c->ec_stripe_head_list);
-+	return h;
-+}
-+
-+void bch2_ec_stripe_head_put(struct bch_fs *c, struct ec_stripe_head *h)
-+{
-+	if (h->s &&
-+	    h->s->allocated &&
-+	    bitmap_weight(h->s->blocks_allocated,
-+			  h->s->blocks.nr) == h->s->blocks.nr)
-+		ec_stripe_set_pending(c, h);
-+
-+	mutex_unlock(&h->lock);
-+}
-+
-+struct ec_stripe_head *__bch2_ec_stripe_head_get(struct bch_fs *c,
-+					       unsigned target,
-+					       unsigned algo,
-+					       unsigned redundancy)
-+{
-+	struct ec_stripe_head *h;
-+
-+	if (!redundancy)
-+		return NULL;
-+
-+	mutex_lock(&c->ec_stripe_head_lock);
-+	list_for_each_entry(h, &c->ec_stripe_head_list, list)
-+		if (h->target		== target &&
-+		    h->algo		== algo &&
-+		    h->redundancy	== redundancy) {
-+			mutex_lock(&h->lock);
-+			goto found;
-+		}
-+
-+	h = ec_new_stripe_head_alloc(c, target, algo, redundancy);
-+found:
-+	mutex_unlock(&c->ec_stripe_head_lock);
-+	return h;
-+}
-+
-+/*
-+ * XXX: use a higher watermark for allocating open buckets here:
-+ */
-+static int new_stripe_alloc_buckets(struct bch_fs *c, struct ec_stripe_head *h)
-+{
-+	struct bch_devs_mask devs;
-+	struct open_bucket *ob;
-+	unsigned i, nr_have, nr_data =
-+		min_t(unsigned, h->nr_active_devs,
-+		      EC_STRIPE_MAX) - h->redundancy;
-+	bool have_cache = true;
-+	int ret = 0;
-+
-+	devs = h->devs;
-+
-+	for_each_set_bit(i, h->s->blocks_allocated, EC_STRIPE_MAX) {
-+		__clear_bit(h->s->stripe.key.v.ptrs[i].dev, devs.d);
-+		--nr_data;
-+	}
-+
-+	BUG_ON(h->s->blocks.nr > nr_data);
-+	BUG_ON(h->s->parity.nr > h->redundancy);
-+
-+	open_bucket_for_each(c, &h->s->parity, ob, i)
-+		__clear_bit(ob->ptr.dev, devs.d);
-+	open_bucket_for_each(c, &h->s->blocks, ob, i)
-+		__clear_bit(ob->ptr.dev, devs.d);
-+
-+	percpu_down_read(&c->mark_lock);
-+	rcu_read_lock();
-+
-+	if (h->s->parity.nr < h->redundancy) {
-+		nr_have = h->s->parity.nr;
-+
-+		ret = bch2_bucket_alloc_set(c, &h->s->parity,
-+					    &h->parity_stripe,
-+					    &devs,
-+					    h->redundancy,
-+					    &nr_have,
-+					    &have_cache,
-+					    RESERVE_NONE,
-+					    0,
-+					    NULL);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	if (h->s->blocks.nr < nr_data) {
-+		nr_have = h->s->blocks.nr;
-+
-+		ret = bch2_bucket_alloc_set(c, &h->s->blocks,
-+					    &h->block_stripe,
-+					    &devs,
-+					    nr_data,
-+					    &nr_have,
-+					    &have_cache,
-+					    RESERVE_NONE,
-+					    0,
-+					    NULL);
-+		if (ret)
-+			goto err;
-+	}
-+err:
-+	rcu_read_unlock();
-+	percpu_up_read(&c->mark_lock);
-+	return ret;
-+}
-+
-+/* XXX: doesn't obey target: */
-+static s64 get_existing_stripe(struct bch_fs *c,
-+			       unsigned target,
-+			       unsigned algo,
-+			       unsigned redundancy)
-+{
-+	ec_stripes_heap *h = &c->ec_stripes_heap;
-+	struct stripe *m;
-+	size_t heap_idx;
-+	u64 stripe_idx;
-+
-+	if (may_create_new_stripe(c))
-+		return -1;
-+
-+	spin_lock(&c->ec_stripes_heap_lock);
-+	for (heap_idx = 0; heap_idx < h->used; heap_idx++) {
-+		if (!h->data[heap_idx].blocks_nonempty)
-+			continue;
-+
-+		stripe_idx = h->data[heap_idx].idx;
-+		m = genradix_ptr(&c->stripes[0], stripe_idx);
-+
-+		if (m->algorithm	== algo &&
-+		    m->nr_redundant	== redundancy &&
-+		    m->blocks_nonempty	< m->nr_blocks - m->nr_redundant) {
-+			bch2_stripes_heap_del(c, m, stripe_idx);
-+			spin_unlock(&c->ec_stripes_heap_lock);
-+			return stripe_idx;
-+		}
-+	}
-+
-+	spin_unlock(&c->ec_stripes_heap_lock);
-+	return -1;
-+}
-+
-+static int get_stripe_key(struct bch_fs *c, u64 idx, struct ec_stripe_buf *stripe)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_EC, POS(0, idx), BTREE_ITER_SLOTS);
-+	k = bch2_btree_iter_peek_slot(iter);
-+	ret = bkey_err(k);
-+	if (!ret)
-+		bkey_reassemble(&stripe->key.k_i, k);
-+	bch2_trans_exit(&trans);
-+
-+	return ret;
-+}
-+
-+struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
-+					       unsigned target,
-+					       unsigned algo,
-+					       unsigned redundancy)
-+{
-+	struct closure cl;
-+	struct ec_stripe_head *h;
-+	struct open_bucket *ob;
-+	unsigned i, data_idx = 0;
-+	s64 idx;
-+
-+	closure_init_stack(&cl);
-+
-+	h = __bch2_ec_stripe_head_get(c, target, algo, redundancy);
-+	if (!h)
-+		return NULL;
-+
-+	if (!h->s && ec_new_stripe_alloc(c, h)) {
-+		bch2_ec_stripe_head_put(c, h);
-+		return NULL;
-+	}
-+
-+	if (!h->s->allocated) {
-+		if (!h->s->existing_stripe &&
-+		    (idx = get_existing_stripe(c, target, algo, redundancy)) >= 0) {
-+			//pr_info("got existing stripe %llu", idx);
-+
-+			h->s->existing_stripe = true;
-+			h->s->existing_stripe_idx = idx;
-+			if (get_stripe_key(c, idx, &h->s->stripe)) {
-+				/* btree error */
-+				BUG();
-+			}
-+
-+			for (i = 0; i < h->s->stripe.key.v.nr_blocks; i++)
-+				if (stripe_blockcount_get(&h->s->stripe.key.v, i)) {
-+					__set_bit(i, h->s->blocks_allocated);
-+					ec_block_io(c, &h->s->stripe, READ, i, &cl);
-+				}
-+		}
-+
-+		if (new_stripe_alloc_buckets(c, h)) {
-+			bch2_ec_stripe_head_put(c, h);
-+			h = NULL;
-+			goto out;
-+		}
-+
-+		open_bucket_for_each(c, &h->s->blocks, ob, i) {
-+			data_idx = find_next_zero_bit(h->s->blocks_allocated,
-+						      h->s->nr_data, data_idx);
-+			BUG_ON(data_idx >= h->s->nr_data);
-+
-+			h->s->stripe.key.v.ptrs[data_idx] = ob->ptr;
-+			h->s->data_block_idx[i] = data_idx;
-+			data_idx++;
-+		}
-+
-+		open_bucket_for_each(c, &h->s->parity, ob, i)
-+			h->s->stripe.key.v.ptrs[h->s->nr_data + i] = ob->ptr;
-+
-+		//pr_info("new stripe, blocks_allocated %lx", h->s->blocks_allocated[0]);
-+		h->s->allocated = true;
-+	}
-+out:
-+	closure_sync(&cl);
-+	return h;
-+}
-+
-+void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	struct ec_stripe_head *h;
-+	struct open_bucket *ob;
-+	unsigned i;
-+
-+	mutex_lock(&c->ec_stripe_head_lock);
-+	list_for_each_entry(h, &c->ec_stripe_head_list, list) {
-+
-+		mutex_lock(&h->lock);
-+		if (!h->s)
-+			goto unlock;
-+
-+		open_bucket_for_each(c, &h->s->blocks, ob, i)
-+			if (ob->ptr.dev == ca->dev_idx)
-+				goto found;
-+		open_bucket_for_each(c, &h->s->parity, ob, i)
-+			if (ob->ptr.dev == ca->dev_idx)
-+				goto found;
-+		goto unlock;
-+found:
-+		h->s->err = -EROFS;
-+		ec_stripe_set_pending(c, h);
-+unlock:
-+		mutex_unlock(&h->lock);
-+	}
-+	mutex_unlock(&c->ec_stripe_head_lock);
-+}
-+
-+static int __bch2_stripe_write_key(struct btree_trans *trans,
-+				   struct btree_iter *iter,
-+				   struct stripe *m,
-+				   size_t idx,
-+				   struct bkey_i_stripe *new_key)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct bkey_s_c k;
-+	unsigned i;
-+	int ret;
-+
-+	bch2_btree_iter_set_pos(iter, POS(0, idx));
-+
-+	k = bch2_btree_iter_peek_slot(iter);
-+	ret = bkey_err(k);
-+	if (ret)
-+		return ret;
-+
-+	if (k.k->type != KEY_TYPE_stripe)
-+		return -EIO;
-+
-+	bkey_reassemble(&new_key->k_i, k);
-+
-+	spin_lock(&c->ec_stripes_heap_lock);
-+
-+	for (i = 0; i < new_key->v.nr_blocks; i++)
-+		stripe_blockcount_set(&new_key->v, i,
-+				      m->block_sectors[i]);
-+	m->dirty = false;
-+
-+	spin_unlock(&c->ec_stripes_heap_lock);
-+
-+	bch2_trans_update(trans, iter, &new_key->k_i, 0);
-+	return 0;
-+}
-+
-+int bch2_stripes_write(struct bch_fs *c, unsigned flags, bool *wrote)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct genradix_iter giter;
-+	struct bkey_i_stripe *new_key;
-+	struct stripe *m;
-+	int ret = 0;
-+
-+	new_key = kmalloc(255 * sizeof(u64), GFP_KERNEL);
-+	BUG_ON(!new_key);
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_EC, POS_MIN,
-+				   BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
-+
-+	genradix_for_each(&c->stripes[0], giter, m) {
-+		if (!m->dirty)
-+			continue;
-+
-+		ret = __bch2_trans_do(&trans, NULL, NULL,
-+				      BTREE_INSERT_NOFAIL|flags,
-+			__bch2_stripe_write_key(&trans, iter, m,
-+					giter.pos, new_key));
-+
-+		if (ret)
-+			break;
-+
-+		*wrote = true;
-+	}
-+
-+	bch2_trans_exit(&trans);
-+
-+	kfree(new_key);
-+
-+	return ret;
-+}
-+
-+static int bch2_stripes_read_fn(struct bch_fs *c, enum btree_id id,
-+			      unsigned level, struct bkey_s_c k)
-+{
-+	int ret = 0;
-+
-+	if (k.k->type == KEY_TYPE_stripe) {
-+		struct stripe *m;
-+
-+		ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL) ?:
-+			bch2_mark_key(c, k, 0, 0, NULL, 0,
-+				      BTREE_TRIGGER_ALLOC_READ|
-+				      BTREE_TRIGGER_NOATOMIC);
-+		if (ret)
-+			return ret;
-+
-+		spin_lock(&c->ec_stripes_heap_lock);
-+		m = genradix_ptr(&c->stripes[0], k.k->p.offset);
-+		bch2_stripes_heap_insert(c, m, k.k->p.offset);
-+		spin_unlock(&c->ec_stripes_heap_lock);
-+	}
-+
-+	return ret;
-+}
-+
-+int bch2_stripes_read(struct bch_fs *c, struct journal_keys *journal_keys)
-+{
-+	int ret = bch2_btree_and_journal_walk(c, journal_keys, BTREE_ID_EC,
-+					  NULL, bch2_stripes_read_fn);
-+	if (ret)
-+		bch_err(c, "error reading stripes: %i", ret);
-+
-+	return ret;
-+}
-+
-+int bch2_ec_mem_alloc(struct bch_fs *c, bool gc)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	size_t i, idx = 0;
-+	int ret = 0;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_EC, POS(0, U64_MAX), 0);
-+
-+	k = bch2_btree_iter_prev(iter);
-+	if (!IS_ERR_OR_NULL(k.k))
-+		idx = k.k->p.offset + 1;
-+	ret = bch2_trans_exit(&trans);
-+	if (ret)
-+		return ret;
-+
-+	if (!idx)
-+		return 0;
-+
-+	if (!gc &&
-+	    !init_heap(&c->ec_stripes_heap, roundup_pow_of_two(idx),
-+		       GFP_KERNEL))
-+		return -ENOMEM;
-+#if 0
-+	ret = genradix_prealloc(&c->stripes[gc], idx, GFP_KERNEL);
-+#else
-+	for (i = 0; i < idx; i++)
-+		if (!genradix_ptr_alloc(&c->stripes[gc], i, GFP_KERNEL))
-+			return -ENOMEM;
-+#endif
-+	return 0;
-+}
-+
-+void bch2_stripes_heap_to_text(struct printbuf *out, struct bch_fs *c)
-+{
-+	ec_stripes_heap *h = &c->ec_stripes_heap;
-+	struct stripe *m;
-+	size_t i;
-+
-+	spin_lock(&c->ec_stripes_heap_lock);
-+	for (i = 0; i < min(h->used, 20UL); i++) {
-+		m = genradix_ptr(&c->stripes[0], h->data[i].idx);
-+
-+		pr_buf(out, "%zu %u/%u+%u\n", h->data[i].idx,
-+		       h->data[i].blocks_nonempty,
-+		       m->nr_blocks - m->nr_redundant,
-+		       m->nr_redundant);
-+	}
-+	spin_unlock(&c->ec_stripes_heap_lock);
-+}
-+
-+void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c)
-+{
-+	struct ec_stripe_head *h;
-+	struct ec_stripe_new *s;
-+
-+	mutex_lock(&c->ec_stripe_head_lock);
-+	list_for_each_entry(h, &c->ec_stripe_head_list, list) {
-+		pr_buf(out, "target %u algo %u redundancy %u:\n",
-+		       h->target, h->algo, h->redundancy);
-+
-+		if (h->s)
-+			pr_buf(out, "\tpending: blocks %u allocated %u\n",
-+			       h->s->blocks.nr,
-+			       bitmap_weight(h->s->blocks_allocated,
-+					     h->s->blocks.nr));
-+	}
-+	mutex_unlock(&c->ec_stripe_head_lock);
-+
-+	mutex_lock(&c->ec_stripe_new_lock);
-+	list_for_each_entry(s, &c->ec_stripe_new_list, list) {
-+		pr_buf(out, "\tin flight: blocks %u allocated %u pin %u\n",
-+		       s->blocks.nr,
-+		       bitmap_weight(s->blocks_allocated,
-+				     s->blocks.nr),
-+		       atomic_read(&s->pin));
-+	}
-+	mutex_unlock(&c->ec_stripe_new_lock);
-+}
-+
-+void bch2_fs_ec_exit(struct bch_fs *c)
-+{
-+	struct ec_stripe_head *h;
-+
-+	while (1) {
-+		mutex_lock(&c->ec_stripe_head_lock);
-+		h = list_first_entry_or_null(&c->ec_stripe_head_list,
-+					     struct ec_stripe_head, list);
-+		if (h)
-+			list_del(&h->list);
-+		mutex_unlock(&c->ec_stripe_head_lock);
-+		if (!h)
-+			break;
-+
-+		BUG_ON(h->s);
-+		kfree(h);
-+	}
-+
-+	BUG_ON(!list_empty(&c->ec_stripe_new_list));
-+
-+	free_heap(&c->ec_stripes_heap);
-+	genradix_free(&c->stripes[0]);
-+	bioset_exit(&c->ec_bioset);
-+}
-+
-+int bch2_fs_ec_init(struct bch_fs *c)
-+{
-+	INIT_WORK(&c->ec_stripe_create_work, ec_stripe_create_work);
-+	INIT_WORK(&c->ec_stripe_delete_work, ec_stripe_delete_work);
-+
-+	return bioset_init(&c->ec_bioset, 1, offsetof(struct ec_bio, bio),
-+			   BIOSET_NEED_BVECS);
-+}
-diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h
-new file mode 100644
-index 000000000000..f8fc3d616cd7
---- /dev/null
-+++ b/fs/bcachefs/ec.h
-@@ -0,0 +1,169 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_EC_H
-+#define _BCACHEFS_EC_H
-+
-+#include "ec_types.h"
-+#include "keylist_types.h"
-+
-+const char *bch2_stripe_invalid(const struct bch_fs *, struct bkey_s_c);
-+void bch2_stripe_to_text(struct printbuf *, struct bch_fs *,
-+			 struct bkey_s_c);
-+
-+#define bch2_bkey_ops_stripe (struct bkey_ops) {	\
-+	.key_invalid	= bch2_stripe_invalid,		\
-+	.val_to_text	= bch2_stripe_to_text,		\
-+	.swab		= bch2_ptr_swab,		\
-+}
-+
-+static inline unsigned stripe_csums_per_device(const struct bch_stripe *s)
-+{
-+	return DIV_ROUND_UP(le16_to_cpu(s->sectors),
-+			    1 << s->csum_granularity_bits);
-+}
-+
-+static inline unsigned stripe_csum_offset(const struct bch_stripe *s,
-+					  unsigned dev, unsigned csum_idx)
-+{
-+	unsigned csum_bytes = bch_crc_bytes[s->csum_type];
-+
-+	return sizeof(struct bch_stripe) +
-+		sizeof(struct bch_extent_ptr) * s->nr_blocks +
-+		(dev * stripe_csums_per_device(s) + csum_idx) * csum_bytes;
-+}
-+
-+static inline unsigned stripe_blockcount_offset(const struct bch_stripe *s,
-+						unsigned idx)
-+{
-+	return stripe_csum_offset(s, s->nr_blocks, 0) +
-+		sizeof(u16) * idx;
-+}
-+
-+static inline unsigned stripe_blockcount_get(const struct bch_stripe *s,
-+					     unsigned idx)
-+{
-+	return le16_to_cpup((void *) s + stripe_blockcount_offset(s, idx));
-+}
-+
-+static inline void stripe_blockcount_set(struct bch_stripe *s,
-+					 unsigned idx, unsigned v)
-+{
-+	__le16 *p = (void *) s + stripe_blockcount_offset(s, idx);
-+
-+	*p = cpu_to_le16(v);
-+}
-+
-+static inline unsigned stripe_val_u64s(const struct bch_stripe *s)
-+{
-+	return DIV_ROUND_UP(stripe_blockcount_offset(s, s->nr_blocks),
-+			    sizeof(u64));
-+}
-+
-+static inline void *stripe_csum(struct bch_stripe *s,
-+				unsigned dev, unsigned csum_idx)
-+{
-+	return (void *) s + stripe_csum_offset(s, dev, csum_idx);
-+}
-+
-+struct bch_read_bio;
-+
-+struct ec_stripe_buf {
-+	/* might not be buffering the entire stripe: */
-+	unsigned		offset;
-+	unsigned		size;
-+	unsigned long		valid[BITS_TO_LONGS(EC_STRIPE_MAX)];
-+
-+	void			*data[EC_STRIPE_MAX];
-+
-+	union {
-+		struct bkey_i_stripe	key;
-+		u64			pad[255];
-+	};
-+};
-+
-+struct ec_stripe_head;
-+
-+struct ec_stripe_new {
-+	struct bch_fs		*c;
-+	struct ec_stripe_head	*h;
-+	struct mutex		lock;
-+	struct list_head	list;
-+
-+	/* counts in flight writes, stripe is created when pin == 0 */
-+	atomic_t		pin;
-+
-+	int			err;
-+
-+	u8			nr_data;
-+	u8			nr_parity;
-+	bool			allocated;
-+	bool			pending;
-+	bool			existing_stripe;
-+	u64			existing_stripe_idx;
-+
-+	unsigned long		blocks_allocated[BITS_TO_LONGS(EC_STRIPE_MAX)];
-+
-+	struct open_buckets	blocks;
-+	u8			data_block_idx[EC_STRIPE_MAX];
-+	struct open_buckets	parity;
-+
-+	struct keylist		keys;
-+	u64			inline_keys[BKEY_U64s * 8];
-+
-+	struct ec_stripe_buf	stripe;
-+};
-+
-+struct ec_stripe_head {
-+	struct list_head	list;
-+	struct mutex		lock;
-+
-+	unsigned		target;
-+	unsigned		algo;
-+	unsigned		redundancy;
-+
-+	struct bch_devs_mask	devs;
-+	unsigned		nr_active_devs;
-+
-+	unsigned		blocksize;
-+
-+	struct dev_stripe_state	block_stripe;
-+	struct dev_stripe_state	parity_stripe;
-+
-+	struct ec_stripe_new	*s;
-+};
-+
-+int bch2_ec_read_extent(struct bch_fs *, struct bch_read_bio *);
-+
-+void *bch2_writepoint_ec_buf(struct bch_fs *, struct write_point *);
-+void bch2_ec_add_backpointer(struct bch_fs *, struct write_point *,
-+			     struct bpos, unsigned);
-+
-+void bch2_ec_bucket_written(struct bch_fs *, struct open_bucket *);
-+void bch2_ec_bucket_cancel(struct bch_fs *, struct open_bucket *);
-+
-+int bch2_ec_stripe_new_alloc(struct bch_fs *, struct ec_stripe_head *);
-+
-+void bch2_ec_stripe_head_put(struct bch_fs *, struct ec_stripe_head *);
-+struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *, unsigned,
-+					       unsigned, unsigned);
-+
-+void bch2_stripes_heap_update(struct bch_fs *, struct stripe *, size_t);
-+void bch2_stripes_heap_del(struct bch_fs *, struct stripe *, size_t);
-+void bch2_stripes_heap_insert(struct bch_fs *, struct stripe *, size_t);
-+
-+void bch2_ec_stop_dev(struct bch_fs *, struct bch_dev *);
-+
-+void bch2_ec_flush_new_stripes(struct bch_fs *);
-+
-+struct journal_keys;
-+int bch2_stripes_read(struct bch_fs *, struct journal_keys *);
-+int bch2_stripes_write(struct bch_fs *, unsigned, bool *);
-+
-+int bch2_ec_mem_alloc(struct bch_fs *, bool);
-+
-+void bch2_stripes_heap_to_text(struct printbuf *, struct bch_fs *);
-+void bch2_new_stripes_to_text(struct printbuf *, struct bch_fs *);
-+
-+void bch2_fs_ec_exit(struct bch_fs *);
-+int bch2_fs_ec_init(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_EC_H */
-diff --git a/fs/bcachefs/ec_types.h b/fs/bcachefs/ec_types.h
-new file mode 100644
-index 000000000000..e4d633fca5bf
---- /dev/null
-+++ b/fs/bcachefs/ec_types.h
-@@ -0,0 +1,39 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_EC_TYPES_H
-+#define _BCACHEFS_EC_TYPES_H
-+
-+#include <linux/llist.h>
-+
-+#define EC_STRIPE_MAX	16
-+
-+struct bch_replicas_padded {
-+	struct bch_replicas_entry	e;
-+	u8				pad[EC_STRIPE_MAX];
-+};
-+
-+struct stripe {
-+	size_t			heap_idx;
-+
-+	u16			sectors;
-+	u8			algorithm;
-+
-+	u8			nr_blocks;
-+	u8			nr_redundant;
-+
-+	unsigned		alive:1;
-+	unsigned		dirty:1;
-+	unsigned		on_heap:1;
-+	u8			blocks_nonempty;
-+	u16			block_sectors[EC_STRIPE_MAX];
-+
-+	struct bch_replicas_padded r;
-+};
-+
-+struct ec_stripe_heap_entry {
-+	size_t			idx;
-+	unsigned		blocks_nonempty;
-+};
-+
-+typedef HEAP(struct ec_stripe_heap_entry) ec_stripes_heap;
-+
-+#endif /* _BCACHEFS_EC_TYPES_H */
-diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c
-new file mode 100644
-index 000000000000..cd46706fb6f5
---- /dev/null
-+++ b/fs/bcachefs/error.c
-@@ -0,0 +1,172 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "error.h"
-+#include "io.h"
-+#include "super.h"
-+
-+#define FSCK_ERR_RATELIMIT_NR	10
-+
-+bool bch2_inconsistent_error(struct bch_fs *c)
-+{
-+	set_bit(BCH_FS_ERROR, &c->flags);
-+
-+	switch (c->opts.errors) {
-+	case BCH_ON_ERROR_CONTINUE:
-+		return false;
-+	case BCH_ON_ERROR_RO:
-+		if (bch2_fs_emergency_read_only(c))
-+			bch_err(c, "emergency read only");
-+		return true;
-+	case BCH_ON_ERROR_PANIC:
-+		panic(bch2_fmt(c, "panic after error"));
-+		return true;
-+	default:
-+		BUG();
-+	}
-+}
-+
-+void bch2_fatal_error(struct bch_fs *c)
-+{
-+	if (bch2_fs_emergency_read_only(c))
-+		bch_err(c, "emergency read only");
-+}
-+
-+void bch2_io_error_work(struct work_struct *work)
-+{
-+	struct bch_dev *ca = container_of(work, struct bch_dev, io_error_work);
-+	struct bch_fs *c = ca->fs;
-+	bool dev;
-+
-+	down_write(&c->state_lock);
-+	dev = bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_RO,
-+				    BCH_FORCE_IF_DEGRADED);
-+	if (dev
-+	    ? __bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_RO,
-+				  BCH_FORCE_IF_DEGRADED)
-+	    : bch2_fs_emergency_read_only(c))
-+		bch_err(ca,
-+			"too many IO errors, setting %s RO",
-+			dev ? "device" : "filesystem");
-+	up_write(&c->state_lock);
-+}
-+
-+void bch2_io_error(struct bch_dev *ca)
-+{
-+	//queue_work(system_long_wq, &ca->io_error_work);
-+}
-+
-+#ifdef __KERNEL__
-+#define ask_yn()	false
-+#else
-+#include "tools-util.h"
-+#endif
-+
-+enum fsck_err_ret bch2_fsck_err(struct bch_fs *c, unsigned flags,
-+				const char *fmt, ...)
-+{
-+	struct fsck_err_state *s = NULL;
-+	va_list args;
-+	bool fix = false, print = true, suppressing = false;
-+	char _buf[sizeof(s->buf)], *buf = _buf;
-+
-+	if (test_bit(BCH_FS_FSCK_DONE, &c->flags)) {
-+		va_start(args, fmt);
-+		vprintk(fmt, args);
-+		va_end(args);
-+
-+		return bch2_inconsistent_error(c)
-+			? FSCK_ERR_EXIT
-+			: FSCK_ERR_FIX;
-+	}
-+
-+	mutex_lock(&c->fsck_error_lock);
-+
-+	list_for_each_entry(s, &c->fsck_errors, list)
-+		if (s->fmt == fmt)
-+			goto found;
-+
-+	s = kzalloc(sizeof(*s), GFP_NOFS);
-+	if (!s) {
-+		if (!c->fsck_alloc_err)
-+			bch_err(c, "kmalloc err, cannot ratelimit fsck errs");
-+		c->fsck_alloc_err = true;
-+		buf = _buf;
-+		goto print;
-+	}
-+
-+	INIT_LIST_HEAD(&s->list);
-+	s->fmt = fmt;
-+found:
-+	list_move(&s->list, &c->fsck_errors);
-+	s->nr++;
-+	if (c->opts.ratelimit_errors &&
-+	    s->nr >= FSCK_ERR_RATELIMIT_NR) {
-+		if (s->nr == FSCK_ERR_RATELIMIT_NR)
-+			suppressing = true;
-+		else
-+			print = false;
-+	}
-+	buf		= s->buf;
-+print:
-+	va_start(args, fmt);
-+	vscnprintf(buf, sizeof(_buf), fmt, args);
-+	va_end(args);
-+
-+	if (c->opts.fix_errors == FSCK_OPT_EXIT) {
-+		bch_err(c, "%s, exiting", buf);
-+	} else if (flags & FSCK_CAN_FIX) {
-+		if (c->opts.fix_errors == FSCK_OPT_ASK) {
-+			printk(KERN_ERR "%s: fix?", buf);
-+			fix = ask_yn();
-+		} else if (c->opts.fix_errors == FSCK_OPT_YES ||
-+			   (c->opts.nochanges &&
-+			    !(flags & FSCK_CAN_IGNORE))) {
-+			if (print)
-+				bch_err(c, "%s, fixing", buf);
-+			fix = true;
-+		} else {
-+			if (print)
-+				bch_err(c, "%s, not fixing", buf);
-+			fix = false;
-+		}
-+	} else if (flags & FSCK_NEED_FSCK) {
-+		if (print)
-+			bch_err(c, "%s (run fsck to correct)", buf);
-+	} else {
-+		if (print)
-+			bch_err(c, "%s (repair unimplemented)", buf);
-+	}
-+
-+	if (suppressing)
-+		bch_err(c, "Ratelimiting new instances of previous error");
-+
-+	mutex_unlock(&c->fsck_error_lock);
-+
-+	if (fix) {
-+		set_bit(BCH_FS_ERRORS_FIXED, &c->flags);
-+		return FSCK_ERR_FIX;
-+	} else {
-+		set_bit(BCH_FS_ERROR, &c->flags);
-+		return c->opts.fix_errors == FSCK_OPT_EXIT ||
-+			!(flags & FSCK_CAN_IGNORE)
-+			? FSCK_ERR_EXIT
-+			: FSCK_ERR_IGNORE;
-+	}
-+}
-+
-+void bch2_flush_fsck_errs(struct bch_fs *c)
-+{
-+	struct fsck_err_state *s, *n;
-+
-+	mutex_lock(&c->fsck_error_lock);
-+
-+	list_for_each_entry_safe(s, n, &c->fsck_errors, list) {
-+		if (s->ratelimited)
-+			bch_err(c, "Saw %llu errors like:\n    %s", s->nr, s->buf);
-+
-+		list_del(&s->list);
-+		kfree(s);
-+	}
-+
-+	mutex_unlock(&c->fsck_error_lock);
-+}
-diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h
-new file mode 100644
-index 000000000000..94b53312fbbd
---- /dev/null
-+++ b/fs/bcachefs/error.h
-@@ -0,0 +1,211 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_ERROR_H
-+#define _BCACHEFS_ERROR_H
-+
-+#include <linux/list.h>
-+#include <linux/printk.h>
-+
-+struct bch_dev;
-+struct bch_fs;
-+struct work_struct;
-+
-+/*
-+ * XXX: separate out errors that indicate on disk data is inconsistent, and flag
-+ * superblock as such
-+ */
-+
-+/* Error messages: */
-+
-+/*
-+ * Inconsistency errors: The on disk data is inconsistent. If these occur during
-+ * initial recovery, they don't indicate a bug in the running code - we walk all
-+ * the metadata before modifying anything. If they occur at runtime, they
-+ * indicate either a bug in the running code or (less likely) data is being
-+ * silently corrupted under us.
-+ *
-+ * XXX: audit all inconsistent errors and make sure they're all recoverable, in
-+ * BCH_ON_ERROR_CONTINUE mode
-+ */
-+
-+bool bch2_inconsistent_error(struct bch_fs *);
-+
-+#define bch2_fs_inconsistent(c, ...)					\
-+({									\
-+	bch_err(c, __VA_ARGS__);					\
-+	bch2_inconsistent_error(c);					\
-+})
-+
-+#define bch2_fs_inconsistent_on(cond, c, ...)				\
-+({									\
-+	int _ret = !!(cond);						\
-+									\
-+	if (_ret)							\
-+		bch2_fs_inconsistent(c, __VA_ARGS__);			\
-+	_ret;								\
-+})
-+
-+/*
-+ * Later we might want to mark only the particular device inconsistent, not the
-+ * entire filesystem:
-+ */
-+
-+#define bch2_dev_inconsistent(ca, ...)					\
-+do {									\
-+	bch_err(ca, __VA_ARGS__);					\
-+	bch2_inconsistent_error((ca)->fs);				\
-+} while (0)
-+
-+#define bch2_dev_inconsistent_on(cond, ca, ...)				\
-+({									\
-+	int _ret = !!(cond);						\
-+									\
-+	if (_ret)							\
-+		bch2_dev_inconsistent(ca, __VA_ARGS__);			\
-+	_ret;								\
-+})
-+
-+/*
-+ * Fsck errors: inconsistency errors we detect at mount time, and should ideally
-+ * be able to repair:
-+ */
-+
-+enum {
-+	BCH_FSCK_OK			= 0,
-+	BCH_FSCK_ERRORS_NOT_FIXED	= 1,
-+	BCH_FSCK_REPAIR_UNIMPLEMENTED	= 2,
-+	BCH_FSCK_REPAIR_IMPOSSIBLE	= 3,
-+	BCH_FSCK_UNKNOWN_VERSION	= 4,
-+};
-+
-+enum fsck_err_opts {
-+	FSCK_OPT_EXIT,
-+	FSCK_OPT_YES,
-+	FSCK_OPT_NO,
-+	FSCK_OPT_ASK,
-+};
-+
-+enum fsck_err_ret {
-+	FSCK_ERR_IGNORE	= 0,
-+	FSCK_ERR_FIX	= 1,
-+	FSCK_ERR_EXIT	= 2,
-+};
-+
-+struct fsck_err_state {
-+	struct list_head	list;
-+	const char		*fmt;
-+	u64			nr;
-+	bool			ratelimited;
-+	char			buf[512];
-+};
-+
-+#define FSCK_CAN_FIX		(1 << 0)
-+#define FSCK_CAN_IGNORE		(1 << 1)
-+#define FSCK_NEED_FSCK		(1 << 2)
-+
-+__printf(3, 4) __cold
-+enum fsck_err_ret bch2_fsck_err(struct bch_fs *,
-+				unsigned, const char *, ...);
-+void bch2_flush_fsck_errs(struct bch_fs *);
-+
-+#define __fsck_err(c, _flags, msg, ...)					\
-+({									\
-+	int _fix = bch2_fsck_err(c, _flags, msg, ##__VA_ARGS__);\
-+									\
-+	if (_fix == FSCK_ERR_EXIT) {					\
-+		bch_err(c, "Unable to continue, halting");		\
-+		ret = BCH_FSCK_ERRORS_NOT_FIXED;			\
-+		goto fsck_err;						\
-+	}								\
-+									\
-+	_fix;								\
-+})
-+
-+/* These macros return true if error should be fixed: */
-+
-+/* XXX: mark in superblock that filesystem contains errors, if we ignore: */
-+
-+#define __fsck_err_on(cond, c, _flags, ...)				\
-+	((cond) ? __fsck_err(c, _flags,	##__VA_ARGS__) : false)
-+
-+#define need_fsck_err_on(cond, c, ...)					\
-+	__fsck_err_on(cond, c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, ##__VA_ARGS__)
-+
-+#define need_fsck_err(c, ...)						\
-+	__fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, ##__VA_ARGS__)
-+
-+#define mustfix_fsck_err(c, ...)					\
-+	__fsck_err(c, FSCK_CAN_FIX, ##__VA_ARGS__)
-+
-+#define mustfix_fsck_err_on(cond, c, ...)				\
-+	__fsck_err_on(cond, c, FSCK_CAN_FIX, ##__VA_ARGS__)
-+
-+#define fsck_err(c, ...)						\
-+	__fsck_err(c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, ##__VA_ARGS__)
-+
-+#define fsck_err_on(cond, c, ...)					\
-+	__fsck_err_on(cond, c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, ##__VA_ARGS__)
-+
-+/*
-+ * Fatal errors: these don't indicate a bug, but we can't continue running in RW
-+ * mode - pretty much just due to metadata IO errors:
-+ */
-+
-+void bch2_fatal_error(struct bch_fs *);
-+
-+#define bch2_fs_fatal_error(c, ...)					\
-+do {									\
-+	bch_err(c, __VA_ARGS__);					\
-+	bch2_fatal_error(c);						\
-+} while (0)
-+
-+#define bch2_fs_fatal_err_on(cond, c, ...)				\
-+({									\
-+	int _ret = !!(cond);						\
-+									\
-+	if (_ret)							\
-+		bch2_fs_fatal_error(c, __VA_ARGS__);			\
-+	_ret;								\
-+})
-+
-+/*
-+ * IO errors: either recoverable metadata IO (because we have replicas), or data
-+ * IO - we need to log it and print out a message, but we don't (necessarily)
-+ * want to shut down the fs:
-+ */
-+
-+void bch2_io_error_work(struct work_struct *);
-+
-+/* Does the error handling without logging a message */
-+void bch2_io_error(struct bch_dev *);
-+
-+/* Logs message and handles the error: */
-+#define bch2_dev_io_error(ca, fmt, ...)					\
-+do {									\
-+	printk_ratelimited(KERN_ERR bch2_fmt((ca)->fs,			\
-+		"IO error on %s for " fmt),				\
-+		(ca)->name, ##__VA_ARGS__);				\
-+	bch2_io_error(ca);						\
-+} while (0)
-+
-+#define bch2_dev_io_err_on(cond, ca, ...)				\
-+({									\
-+	bool _ret = (cond);						\
-+									\
-+	if (_ret)							\
-+		bch2_dev_io_error(ca, __VA_ARGS__);			\
-+	_ret;								\
-+})
-+
-+/* kill? */
-+
-+#define __bcache_io_error(c, fmt, ...)					\
-+	printk_ratelimited(KERN_ERR bch2_fmt(c,				\
-+			"IO error: " fmt), ##__VA_ARGS__)
-+
-+#define bcache_io_error(c, bio, fmt, ...)				\
-+do {									\
-+	__bcache_io_error(c, fmt, ##__VA_ARGS__);			\
-+	(bio)->bi_status = BLK_STS_IOERR;					\
-+} while (0)
-+
-+#endif /* _BCACHEFS_ERROR_H */
-diff --git a/fs/bcachefs/extent_update.c b/fs/bcachefs/extent_update.c
-new file mode 100644
-index 000000000000..fd011df3cb99
---- /dev/null
-+++ b/fs/bcachefs/extent_update.c
-@@ -0,0 +1,229 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "bkey_on_stack.h"
-+#include "btree_update.h"
-+#include "btree_update_interior.h"
-+#include "buckets.h"
-+#include "debug.h"
-+#include "extents.h"
-+#include "extent_update.h"
-+
-+/*
-+ * This counts the number of iterators to the alloc & ec btrees we'll need
-+ * inserting/removing this extent:
-+ */
-+static unsigned bch2_bkey_nr_alloc_ptrs(struct bkey_s_c k)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const union bch_extent_entry *entry;
-+	unsigned ret = 0;
-+
-+	bkey_extent_entry_for_each(ptrs, entry) {
-+		switch (__extent_entry_type(entry)) {
-+		case BCH_EXTENT_ENTRY_ptr:
-+		case BCH_EXTENT_ENTRY_stripe_ptr:
-+			ret++;
-+		}
-+	}
-+
-+	return ret;
-+}
-+
-+static int count_iters_for_insert(struct btree_trans *trans,
-+				  struct bkey_s_c k,
-+				  unsigned offset,
-+				  struct bpos *end,
-+				  unsigned *nr_iters,
-+				  unsigned max_iters)
-+{
-+	int ret = 0, ret2 = 0;
-+
-+	if (*nr_iters >= max_iters) {
-+		*end = bpos_min(*end, k.k->p);
-+		ret = 1;
-+	}
-+
-+	switch (k.k->type) {
-+	case KEY_TYPE_extent:
-+	case KEY_TYPE_reflink_v:
-+		*nr_iters += bch2_bkey_nr_alloc_ptrs(k);
-+
-+		if (*nr_iters >= max_iters) {
-+			*end = bpos_min(*end, k.k->p);
-+			ret = 1;
-+		}
-+
-+		break;
-+	case KEY_TYPE_reflink_p: {
-+		struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
-+		u64 idx = le64_to_cpu(p.v->idx);
-+		unsigned sectors = bpos_min(*end, p.k->p).offset -
-+			bkey_start_offset(p.k);
-+		struct btree_iter *iter;
-+		struct bkey_s_c r_k;
-+
-+		for_each_btree_key(trans, iter,
-+				   BTREE_ID_REFLINK, POS(0, idx + offset),
-+				   BTREE_ITER_SLOTS, r_k, ret2) {
-+			if (bkey_cmp(bkey_start_pos(r_k.k),
-+				     POS(0, idx + sectors)) >= 0)
-+				break;
-+
-+			/* extent_update_to_keys(), for the reflink_v update */
-+			*nr_iters += 1;
-+
-+			*nr_iters += 1 + bch2_bkey_nr_alloc_ptrs(r_k);
-+
-+			if (*nr_iters >= max_iters) {
-+				struct bpos pos = bkey_start_pos(k.k);
-+				pos.offset += min_t(u64, k.k->size,
-+						    r_k.k->p.offset - idx);
-+
-+				*end = bpos_min(*end, pos);
-+				ret = 1;
-+				break;
-+			}
-+		}
-+
-+		bch2_trans_iter_put(trans, iter);
-+		break;
-+	}
-+	}
-+
-+	return ret2 ?: ret;
-+}
-+
-+#define EXTENT_ITERS_MAX	(BTREE_ITER_MAX / 3)
-+
-+int bch2_extent_atomic_end(struct btree_iter *iter,
-+			   struct bkey_i *insert,
-+			   struct bpos *end)
-+{
-+	struct btree_trans *trans = iter->trans;
-+	struct btree *b;
-+	struct btree_node_iter	node_iter;
-+	struct bkey_packed	*_k;
-+	unsigned		nr_iters = 0;
-+	int ret;
-+
-+	ret = bch2_btree_iter_traverse(iter);
-+	if (ret)
-+		return ret;
-+
-+	b = iter->l[0].b;
-+	node_iter = iter->l[0].iter;
-+
-+	BUG_ON(bkey_cmp(b->data->min_key, POS_MIN) &&
-+	       bkey_cmp(bkey_start_pos(&insert->k),
-+			bkey_predecessor(b->data->min_key)) < 0);
-+
-+	*end = bpos_min(insert->k.p, b->key.k.p);
-+
-+	/* extent_update_to_keys(): */
-+	nr_iters += 1;
-+
-+	ret = count_iters_for_insert(trans, bkey_i_to_s_c(insert), 0, end,
-+				     &nr_iters, EXTENT_ITERS_MAX / 2);
-+	if (ret < 0)
-+		return ret;
-+
-+	while ((_k = bch2_btree_node_iter_peek(&node_iter, b))) {
-+		struct bkey	unpacked;
-+		struct bkey_s_c	k = bkey_disassemble(b, _k, &unpacked);
-+		unsigned offset = 0;
-+
-+		if (bkey_cmp(bkey_start_pos(k.k), *end) >= 0)
-+			break;
-+
-+		if (bkey_cmp(bkey_start_pos(&insert->k),
-+			     bkey_start_pos(k.k)) > 0)
-+			offset = bkey_start_offset(&insert->k) -
-+				bkey_start_offset(k.k);
-+
-+		/* extent_handle_overwrites(): */
-+		switch (bch2_extent_overlap(&insert->k, k.k)) {
-+		case BCH_EXTENT_OVERLAP_ALL:
-+		case BCH_EXTENT_OVERLAP_FRONT:
-+			nr_iters += 1;
-+			break;
-+		case BCH_EXTENT_OVERLAP_BACK:
-+		case BCH_EXTENT_OVERLAP_MIDDLE:
-+			nr_iters += 2;
-+			break;
-+		}
-+
-+		ret = count_iters_for_insert(trans, k, offset, end,
-+					&nr_iters, EXTENT_ITERS_MAX);
-+		if (ret)
-+			break;
-+
-+		bch2_btree_node_iter_advance(&node_iter, b);
-+	}
-+
-+	return ret < 0 ? ret : 0;
-+}
-+
-+int bch2_extent_trim_atomic(struct bkey_i *k, struct btree_iter *iter)
-+{
-+	struct bpos end;
-+	int ret;
-+
-+	ret = bch2_extent_atomic_end(iter, k, &end);
-+	if (ret)
-+		return ret;
-+
-+	bch2_cut_back(end, k);
-+	return 0;
-+}
-+
-+int bch2_extent_is_atomic(struct bkey_i *k, struct btree_iter *iter)
-+{
-+	struct bpos end;
-+	int ret;
-+
-+	ret = bch2_extent_atomic_end(iter, k, &end);
-+	if (ret)
-+		return ret;
-+
-+	return !bkey_cmp(end, k->k.p);
-+}
-+
-+enum btree_insert_ret
-+bch2_extent_can_insert(struct btree_trans *trans,
-+		       struct btree_iter *iter,
-+		       struct bkey_i *insert)
-+{
-+	struct btree_iter_level *l = &iter->l[0];
-+	struct btree_node_iter node_iter = l->iter;
-+	struct bkey_packed *_k;
-+	struct bkey_s_c k;
-+	struct bkey unpacked;
-+	int sectors;
-+
-+	_k = bch2_btree_node_iter_peek(&node_iter, l->b);
-+	if (!_k)
-+		return BTREE_INSERT_OK;
-+
-+	k = bkey_disassemble(l->b, _k, &unpacked);
-+
-+	/* Check if we're splitting a compressed extent: */
-+
-+	if (bkey_cmp(bkey_start_pos(&insert->k), bkey_start_pos(k.k)) > 0 &&
-+	    bkey_cmp(insert->k.p, k.k->p) < 0 &&
-+	    (sectors = bch2_bkey_sectors_compressed(k))) {
-+		int flags = trans->flags & BTREE_INSERT_NOFAIL
-+			? BCH_DISK_RESERVATION_NOFAIL : 0;
-+
-+		switch (bch2_disk_reservation_add(trans->c, trans->disk_res,
-+						  sectors, flags)) {
-+		case 0:
-+			break;
-+		case -ENOSPC:
-+			return BTREE_INSERT_ENOSPC;
-+		default:
-+			BUG();
-+		}
-+	}
-+
-+	return BTREE_INSERT_OK;
-+}
-diff --git a/fs/bcachefs/extent_update.h b/fs/bcachefs/extent_update.h
-new file mode 100644
-index 000000000000..38dc084627d2
---- /dev/null
-+++ b/fs/bcachefs/extent_update.h
-@@ -0,0 +1,16 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_EXTENT_UPDATE_H
-+#define _BCACHEFS_EXTENT_UPDATE_H
-+
-+#include "bcachefs.h"
-+
-+int bch2_extent_atomic_end(struct btree_iter *, struct bkey_i *,
-+			   struct bpos *);
-+int bch2_extent_trim_atomic(struct bkey_i *, struct btree_iter *);
-+int bch2_extent_is_atomic(struct bkey_i *, struct btree_iter *);
-+
-+enum btree_insert_ret
-+bch2_extent_can_insert(struct btree_trans *, struct btree_iter *,
-+		       struct bkey_i *);
-+
-+#endif /* _BCACHEFS_EXTENT_UPDATE_H */
-diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
-new file mode 100644
-index 000000000000..568f039edcff
---- /dev/null
-+++ b/fs/bcachefs/extents.c
-@@ -0,0 +1,1258 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Copyright (C) 2010 Kent Overstreet <kent.overstreet@gmail.com>
-+ *
-+ * Code for managing the extent btree and dynamically updating the writeback
-+ * dirty sector count.
-+ */
-+
-+#include "bcachefs.h"
-+#include "bkey_methods.h"
-+#include "btree_gc.h"
-+#include "btree_io.h"
-+#include "btree_iter.h"
-+#include "buckets.h"
-+#include "checksum.h"
-+#include "debug.h"
-+#include "disk_groups.h"
-+#include "error.h"
-+#include "extents.h"
-+#include "inode.h"
-+#include "journal.h"
-+#include "replicas.h"
-+#include "super.h"
-+#include "super-io.h"
-+#include "util.h"
-+
-+#include <trace/events/bcachefs.h>
-+
-+static unsigned bch2_crc_field_size_max[] = {
-+	[BCH_EXTENT_ENTRY_crc32] = CRC32_SIZE_MAX,
-+	[BCH_EXTENT_ENTRY_crc64] = CRC64_SIZE_MAX,
-+	[BCH_EXTENT_ENTRY_crc128] = CRC128_SIZE_MAX,
-+};
-+
-+static void bch2_extent_crc_pack(union bch_extent_crc *,
-+				 struct bch_extent_crc_unpacked,
-+				 enum bch_extent_entry_type);
-+
-+static struct bch_dev_io_failures *dev_io_failures(struct bch_io_failures *f,
-+						   unsigned dev)
-+{
-+	struct bch_dev_io_failures *i;
-+
-+	for (i = f->devs; i < f->devs + f->nr; i++)
-+		if (i->dev == dev)
-+			return i;
-+
-+	return NULL;
-+}
-+
-+void bch2_mark_io_failure(struct bch_io_failures *failed,
-+			  struct extent_ptr_decoded *p)
-+{
-+	struct bch_dev_io_failures *f = dev_io_failures(failed, p->ptr.dev);
-+
-+	if (!f) {
-+		BUG_ON(failed->nr >= ARRAY_SIZE(failed->devs));
-+
-+		f = &failed->devs[failed->nr++];
-+		f->dev		= p->ptr.dev;
-+		f->idx		= p->idx;
-+		f->nr_failed	= 1;
-+		f->nr_retries	= 0;
-+	} else if (p->idx != f->idx) {
-+		f->idx		= p->idx;
-+		f->nr_failed	= 1;
-+		f->nr_retries	= 0;
-+	} else {
-+		f->nr_failed++;
-+	}
-+}
-+
-+/*
-+ * returns true if p1 is better than p2:
-+ */
-+static inline bool ptr_better(struct bch_fs *c,
-+			      const struct extent_ptr_decoded p1,
-+			      const struct extent_ptr_decoded p2)
-+{
-+	if (likely(!p1.idx && !p2.idx)) {
-+		struct bch_dev *dev1 = bch_dev_bkey_exists(c, p1.ptr.dev);
-+		struct bch_dev *dev2 = bch_dev_bkey_exists(c, p2.ptr.dev);
-+
-+		u64 l1 = atomic64_read(&dev1->cur_latency[READ]);
-+		u64 l2 = atomic64_read(&dev2->cur_latency[READ]);
-+
-+		/* Pick at random, biased in favor of the faster device: */
-+
-+		return bch2_rand_range(l1 + l2) > l1;
-+	}
-+
-+	if (force_reconstruct_read(c))
-+		return p1.idx > p2.idx;
-+
-+	return p1.idx < p2.idx;
-+}
-+
-+/*
-+ * This picks a non-stale pointer, preferably from a device other than @avoid.
-+ * Avoid can be NULL, meaning pick any. If there are no non-stale pointers to
-+ * other devices, it will still pick a pointer from avoid.
-+ */
-+int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
-+			       struct bch_io_failures *failed,
-+			       struct extent_ptr_decoded *pick)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const union bch_extent_entry *entry;
-+	struct extent_ptr_decoded p;
-+	struct bch_dev_io_failures *f;
-+	struct bch_dev *ca;
-+	int ret = 0;
-+
-+	if (k.k->type == KEY_TYPE_error)
-+		return -EIO;
-+
-+	bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
-+		ca = bch_dev_bkey_exists(c, p.ptr.dev);
-+
-+		/*
-+		 * If there are any dirty pointers it's an error if we can't
-+		 * read:
-+		 */
-+		if (!ret && !p.ptr.cached)
-+			ret = -EIO;
-+
-+		if (p.ptr.cached && ptr_stale(ca, &p.ptr))
-+			continue;
-+
-+		f = failed ? dev_io_failures(failed, p.ptr.dev) : NULL;
-+		if (f)
-+			p.idx = f->nr_failed < f->nr_retries
-+				? f->idx
-+				: f->idx + 1;
-+
-+		if (!p.idx &&
-+		    !bch2_dev_is_readable(ca))
-+			p.idx++;
-+
-+		if (force_reconstruct_read(c) &&
-+		    !p.idx && p.has_ec)
-+			p.idx++;
-+
-+		if (p.idx >= (unsigned) p.has_ec + 1)
-+			continue;
-+
-+		if (ret > 0 && !ptr_better(c, p, *pick))
-+			continue;
-+
-+		*pick = p;
-+		ret = 1;
-+	}
-+
-+	return ret;
-+}
-+
-+/* KEY_TYPE_btree_ptr: */
-+
-+const char *bch2_btree_ptr_invalid(const struct bch_fs *c, struct bkey_s_c k)
-+{
-+	if (bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX)
-+		return "value too big";
-+
-+	return bch2_bkey_ptrs_invalid(c, k);
-+}
-+
-+void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct bkey_s_c k)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const struct bch_extent_ptr *ptr;
-+	const char *err;
-+	char buf[160];
-+	struct bucket_mark mark;
-+	struct bch_dev *ca;
-+
-+	if (!test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags))
-+		return;
-+
-+	if (!percpu_down_read_trylock(&c->mark_lock))
-+		return;
-+
-+	bkey_for_each_ptr(ptrs, ptr) {
-+		ca = bch_dev_bkey_exists(c, ptr->dev);
-+
-+		mark = ptr_bucket_mark(ca, ptr);
-+
-+		err = "stale";
-+		if (gen_after(mark.gen, ptr->gen))
-+			goto err;
-+
-+		err = "inconsistent";
-+		if (mark.data_type != BCH_DATA_btree ||
-+		    mark.dirty_sectors < c->opts.btree_node_size)
-+			goto err;
-+	}
-+out:
-+	percpu_up_read(&c->mark_lock);
-+	return;
-+err:
-+	bch2_fs_inconsistent(c, "%s btree pointer %s: bucket %zi gen %i mark %08x",
-+		err, (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf),
-+		PTR_BUCKET_NR(ca, ptr),
-+		mark.gen, (unsigned) mark.v.counter);
-+	goto out;
-+}
-+
-+void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c,
-+			    struct bkey_s_c k)
-+{
-+	bch2_bkey_ptrs_to_text(out, c, k);
-+}
-+
-+void bch2_btree_ptr_v2_to_text(struct printbuf *out, struct bch_fs *c,
-+			    struct bkey_s_c k)
-+{
-+	struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k);
-+
-+	pr_buf(out, "seq %llx sectors %u written %u min_key ",
-+	       le64_to_cpu(bp.v->seq),
-+	       le16_to_cpu(bp.v->sectors),
-+	       le16_to_cpu(bp.v->sectors_written));
-+
-+	bch2_bpos_to_text(out, bp.v->min_key);
-+	pr_buf(out, " ");
-+	bch2_bkey_ptrs_to_text(out, c, k);
-+}
-+
-+void bch2_btree_ptr_v2_compat(enum btree_id btree_id, unsigned version,
-+			      unsigned big_endian, int write,
-+			      struct bkey_s k)
-+{
-+	struct bkey_s_btree_ptr_v2 bp = bkey_s_to_btree_ptr_v2(k);
-+
-+	compat_bpos(0, btree_id, version, big_endian, write, &bp.v->min_key);
-+
-+	if (version < bcachefs_metadata_version_inode_btree_change &&
-+	    btree_node_type_is_extents(btree_id) &&
-+	    bkey_cmp(bp.v->min_key, POS_MIN))
-+		bp.v->min_key = write
-+			? bkey_predecessor(bp.v->min_key)
-+			: bkey_successor(bp.v->min_key);
-+}
-+
-+/* KEY_TYPE_extent: */
-+
-+const char *bch2_extent_invalid(const struct bch_fs *c, struct bkey_s_c k)
-+{
-+	return bch2_bkey_ptrs_invalid(c, k);
-+}
-+
-+void bch2_extent_debugcheck(struct bch_fs *c, struct bkey_s_c k)
-+{
-+	struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
-+	const union bch_extent_entry *entry;
-+	struct extent_ptr_decoded p;
-+	char buf[160];
-+
-+	if (!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags) ||
-+	    !test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags))
-+		return;
-+
-+	if (!percpu_down_read_trylock(&c->mark_lock))
-+		return;
-+
-+	extent_for_each_ptr_decode(e, p, entry) {
-+		struct bch_dev *ca	= bch_dev_bkey_exists(c, p.ptr.dev);
-+		struct bucket_mark mark = ptr_bucket_mark(ca, &p.ptr);
-+		unsigned stale		= gen_after(mark.gen, p.ptr.gen);
-+		unsigned disk_sectors	= ptr_disk_sectors(p);
-+		unsigned mark_sectors	= p.ptr.cached
-+			? mark.cached_sectors
-+			: mark.dirty_sectors;
-+
-+		bch2_fs_inconsistent_on(stale && !p.ptr.cached, c,
-+			"stale dirty pointer (ptr gen %u bucket %u",
-+			p.ptr.gen, mark.gen);
-+
-+		bch2_fs_inconsistent_on(stale > 96, c,
-+			"key too stale: %i", stale);
-+
-+		bch2_fs_inconsistent_on(!stale &&
-+			(mark.data_type != BCH_DATA_user ||
-+			 mark_sectors < disk_sectors), c,
-+			"extent pointer not marked: %s:\n"
-+			"type %u sectors %u < %u",
-+			(bch2_bkey_val_to_text(&PBUF(buf), c, e.s_c), buf),
-+			mark.data_type,
-+			mark_sectors, disk_sectors);
-+	}
-+
-+	percpu_up_read(&c->mark_lock);
-+}
-+
-+void bch2_extent_to_text(struct printbuf *out, struct bch_fs *c,
-+			 struct bkey_s_c k)
-+{
-+	bch2_bkey_ptrs_to_text(out, c, k);
-+}
-+
-+enum merge_result bch2_extent_merge(struct bch_fs *c,
-+				    struct bkey_s _l, struct bkey_s _r)
-+{
-+	struct bkey_s_extent l = bkey_s_to_extent(_l);
-+	struct bkey_s_extent r = bkey_s_to_extent(_r);
-+	union bch_extent_entry *en_l = l.v->start;
-+	union bch_extent_entry *en_r = r.v->start;
-+	struct bch_extent_crc_unpacked crc_l, crc_r;
-+
-+	if (bkey_val_u64s(l.k) != bkey_val_u64s(r.k))
-+		return BCH_MERGE_NOMERGE;
-+
-+	crc_l = bch2_extent_crc_unpack(l.k, NULL);
-+
-+	extent_for_each_entry(l, en_l) {
-+		en_r = vstruct_idx(r.v, (u64 *) en_l - l.v->_data);
-+
-+		if (extent_entry_type(en_l) != extent_entry_type(en_r))
-+			return BCH_MERGE_NOMERGE;
-+
-+		switch (extent_entry_type(en_l)) {
-+		case BCH_EXTENT_ENTRY_ptr: {
-+			const struct bch_extent_ptr *lp = &en_l->ptr;
-+			const struct bch_extent_ptr *rp = &en_r->ptr;
-+			struct bch_dev *ca;
-+
-+			if (lp->offset + crc_l.compressed_size != rp->offset ||
-+			    lp->dev			!= rp->dev ||
-+			    lp->gen			!= rp->gen)
-+				return BCH_MERGE_NOMERGE;
-+
-+			/* We don't allow extents to straddle buckets: */
-+			ca = bch_dev_bkey_exists(c, lp->dev);
-+
-+			if (PTR_BUCKET_NR(ca, lp) != PTR_BUCKET_NR(ca, rp))
-+				return BCH_MERGE_NOMERGE;
-+
-+			break;
-+		}
-+		case BCH_EXTENT_ENTRY_stripe_ptr:
-+			if (en_l->stripe_ptr.block	!= en_r->stripe_ptr.block ||
-+			    en_l->stripe_ptr.idx	!= en_r->stripe_ptr.idx)
-+				return BCH_MERGE_NOMERGE;
-+			break;
-+		case BCH_EXTENT_ENTRY_crc32:
-+		case BCH_EXTENT_ENTRY_crc64:
-+		case BCH_EXTENT_ENTRY_crc128:
-+			crc_l = bch2_extent_crc_unpack(l.k, entry_to_crc(en_l));
-+			crc_r = bch2_extent_crc_unpack(r.k, entry_to_crc(en_r));
-+
-+			if (crc_l.csum_type		!= crc_r.csum_type ||
-+			    crc_l.compression_type	!= crc_r.compression_type ||
-+			    crc_l.nonce			!= crc_r.nonce)
-+				return BCH_MERGE_NOMERGE;
-+
-+			if (crc_l.offset + crc_l.live_size != crc_l.compressed_size ||
-+			    crc_r.offset)
-+				return BCH_MERGE_NOMERGE;
-+
-+			if (!bch2_checksum_mergeable(crc_l.csum_type))
-+				return BCH_MERGE_NOMERGE;
-+
-+			if (crc_is_compressed(crc_l))
-+				return BCH_MERGE_NOMERGE;
-+
-+			if (crc_l.csum_type &&
-+			    crc_l.uncompressed_size +
-+			    crc_r.uncompressed_size > c->sb.encoded_extent_max)
-+				return BCH_MERGE_NOMERGE;
-+
-+			if (crc_l.uncompressed_size + crc_r.uncompressed_size >
-+			    bch2_crc_field_size_max[extent_entry_type(en_l)])
-+				return BCH_MERGE_NOMERGE;
-+
-+			break;
-+		default:
-+			return BCH_MERGE_NOMERGE;
-+		}
-+	}
-+
-+	extent_for_each_entry(l, en_l) {
-+		struct bch_extent_crc_unpacked crc_l, crc_r;
-+
-+		en_r = vstruct_idx(r.v, (u64 *) en_l - l.v->_data);
-+
-+		if (!extent_entry_is_crc(en_l))
-+			continue;
-+
-+		crc_l = bch2_extent_crc_unpack(l.k, entry_to_crc(en_l));
-+		crc_r = bch2_extent_crc_unpack(r.k, entry_to_crc(en_r));
-+
-+		crc_l.csum = bch2_checksum_merge(crc_l.csum_type,
-+						 crc_l.csum,
-+						 crc_r.csum,
-+						 crc_r.uncompressed_size << 9);
-+
-+		crc_l.uncompressed_size	+= crc_r.uncompressed_size;
-+		crc_l.compressed_size	+= crc_r.compressed_size;
-+
-+		bch2_extent_crc_pack(entry_to_crc(en_l), crc_l,
-+				     extent_entry_type(en_l));
-+	}
-+
-+	bch2_key_resize(l.k, l.k->size + r.k->size);
-+
-+	return BCH_MERGE_MERGE;
-+}
-+
-+/* KEY_TYPE_reservation: */
-+
-+const char *bch2_reservation_invalid(const struct bch_fs *c, struct bkey_s_c k)
-+{
-+	struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k);
-+
-+	if (bkey_val_bytes(k.k) != sizeof(struct bch_reservation))
-+		return "incorrect value size";
-+
-+	if (!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX)
-+		return "invalid nr_replicas";
-+
-+	return NULL;
-+}
-+
-+void bch2_reservation_to_text(struct printbuf *out, struct bch_fs *c,
-+			      struct bkey_s_c k)
-+{
-+	struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k);
-+
-+	pr_buf(out, "generation %u replicas %u",
-+	       le32_to_cpu(r.v->generation),
-+	       r.v->nr_replicas);
-+}
-+
-+enum merge_result bch2_reservation_merge(struct bch_fs *c,
-+					 struct bkey_s _l, struct bkey_s _r)
-+{
-+	struct bkey_s_reservation l = bkey_s_to_reservation(_l);
-+	struct bkey_s_reservation r = bkey_s_to_reservation(_r);
-+
-+	if (l.v->generation != r.v->generation ||
-+	    l.v->nr_replicas != r.v->nr_replicas)
-+		return BCH_MERGE_NOMERGE;
-+
-+	if ((u64) l.k->size + r.k->size > KEY_SIZE_MAX) {
-+		bch2_key_resize(l.k, KEY_SIZE_MAX);
-+		bch2_cut_front_s(l.k->p, r.s);
-+		return BCH_MERGE_PARTIAL;
-+	}
-+
-+	bch2_key_resize(l.k, l.k->size + r.k->size);
-+
-+	return BCH_MERGE_MERGE;
-+}
-+
-+/* Extent checksum entries: */
-+
-+/* returns true if not equal */
-+static inline bool bch2_crc_unpacked_cmp(struct bch_extent_crc_unpacked l,
-+					 struct bch_extent_crc_unpacked r)
-+{
-+	return (l.csum_type		!= r.csum_type ||
-+		l.compression_type	!= r.compression_type ||
-+		l.compressed_size	!= r.compressed_size ||
-+		l.uncompressed_size	!= r.uncompressed_size ||
-+		l.offset		!= r.offset ||
-+		l.live_size		!= r.live_size ||
-+		l.nonce			!= r.nonce ||
-+		bch2_crc_cmp(l.csum, r.csum));
-+}
-+
-+static inline bool can_narrow_crc(struct bch_extent_crc_unpacked u,
-+				  struct bch_extent_crc_unpacked n)
-+{
-+	return !crc_is_compressed(u) &&
-+		u.csum_type &&
-+		u.uncompressed_size > u.live_size &&
-+		bch2_csum_type_is_encryption(u.csum_type) ==
-+		bch2_csum_type_is_encryption(n.csum_type);
-+}
-+
-+bool bch2_can_narrow_extent_crcs(struct bkey_s_c k,
-+				 struct bch_extent_crc_unpacked n)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	struct bch_extent_crc_unpacked crc;
-+	const union bch_extent_entry *i;
-+
-+	if (!n.csum_type)
-+		return false;
-+
-+	bkey_for_each_crc(k.k, ptrs, crc, i)
-+		if (can_narrow_crc(crc, n))
-+			return true;
-+
-+	return false;
-+}
-+
-+/*
-+ * We're writing another replica for this extent, so while we've got the data in
-+ * memory we'll be computing a new checksum for the currently live data.
-+ *
-+ * If there are other replicas we aren't moving, and they are checksummed but
-+ * not compressed, we can modify them to point to only the data that is
-+ * currently live (so that readers won't have to bounce) while we've got the
-+ * checksum we need:
-+ */
-+bool bch2_bkey_narrow_crcs(struct bkey_i *k, struct bch_extent_crc_unpacked n)
-+{
-+	struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(k));
-+	struct bch_extent_crc_unpacked u;
-+	struct extent_ptr_decoded p;
-+	union bch_extent_entry *i;
-+	bool ret = false;
-+
-+	/* Find a checksum entry that covers only live data: */
-+	if (!n.csum_type) {
-+		bkey_for_each_crc(&k->k, ptrs, u, i)
-+			if (!crc_is_compressed(u) &&
-+			    u.csum_type &&
-+			    u.live_size == u.uncompressed_size) {
-+				n = u;
-+				goto found;
-+			}
-+		return false;
-+	}
-+found:
-+	BUG_ON(crc_is_compressed(n));
-+	BUG_ON(n.offset);
-+	BUG_ON(n.live_size != k->k.size);
-+
-+restart_narrow_pointers:
-+	ptrs = bch2_bkey_ptrs(bkey_i_to_s(k));
-+
-+	bkey_for_each_ptr_decode(&k->k, ptrs, p, i)
-+		if (can_narrow_crc(p.crc, n)) {
-+			bch2_bkey_drop_ptr(bkey_i_to_s(k), &i->ptr);
-+			p.ptr.offset += p.crc.offset;
-+			p.crc = n;
-+			bch2_extent_ptr_decoded_append(k, &p);
-+			ret = true;
-+			goto restart_narrow_pointers;
-+		}
-+
-+	return ret;
-+}
-+
-+static void bch2_extent_crc_pack(union bch_extent_crc *dst,
-+				 struct bch_extent_crc_unpacked src,
-+				 enum bch_extent_entry_type type)
-+{
-+#define set_common_fields(_dst, _src)					\
-+		_dst.type		= 1 << type;			\
-+		_dst.csum_type		= _src.csum_type,		\
-+		_dst.compression_type	= _src.compression_type,	\
-+		_dst._compressed_size	= _src.compressed_size - 1,	\
-+		_dst._uncompressed_size	= _src.uncompressed_size - 1,	\
-+		_dst.offset		= _src.offset
-+
-+	switch (type) {
-+	case BCH_EXTENT_ENTRY_crc32:
-+		set_common_fields(dst->crc32, src);
-+		dst->crc32.csum	 = *((__le32 *) &src.csum.lo);
-+		break;
-+	case BCH_EXTENT_ENTRY_crc64:
-+		set_common_fields(dst->crc64, src);
-+		dst->crc64.nonce	= src.nonce;
-+		dst->crc64.csum_lo	= src.csum.lo;
-+		dst->crc64.csum_hi	= *((__le16 *) &src.csum.hi);
-+		break;
-+	case BCH_EXTENT_ENTRY_crc128:
-+		set_common_fields(dst->crc128, src);
-+		dst->crc128.nonce	= src.nonce;
-+		dst->crc128.csum	= src.csum;
-+		break;
-+	default:
-+		BUG();
-+	}
-+#undef set_common_fields
-+}
-+
-+void bch2_extent_crc_append(struct bkey_i *k,
-+			    struct bch_extent_crc_unpacked new)
-+{
-+	struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(k));
-+	union bch_extent_crc *crc = (void *) ptrs.end;
-+	enum bch_extent_entry_type type;
-+
-+	if (bch_crc_bytes[new.csum_type]	<= 4 &&
-+	    new.uncompressed_size		<= CRC32_SIZE_MAX &&
-+	    new.nonce				<= CRC32_NONCE_MAX)
-+		type = BCH_EXTENT_ENTRY_crc32;
-+	else if (bch_crc_bytes[new.csum_type]	<= 10 &&
-+		   new.uncompressed_size	<= CRC64_SIZE_MAX &&
-+		   new.nonce			<= CRC64_NONCE_MAX)
-+		type = BCH_EXTENT_ENTRY_crc64;
-+	else if (bch_crc_bytes[new.csum_type]	<= 16 &&
-+		   new.uncompressed_size	<= CRC128_SIZE_MAX &&
-+		   new.nonce			<= CRC128_NONCE_MAX)
-+		type = BCH_EXTENT_ENTRY_crc128;
-+	else
-+		BUG();
-+
-+	bch2_extent_crc_pack(crc, new, type);
-+
-+	k->k.u64s += extent_entry_u64s(ptrs.end);
-+
-+	EBUG_ON(bkey_val_u64s(&k->k) > BKEY_EXTENT_VAL_U64s_MAX);
-+}
-+
-+/* Generic code for keys with pointers: */
-+
-+unsigned bch2_bkey_nr_ptrs(struct bkey_s_c k)
-+{
-+	return bch2_bkey_devs(k).nr;
-+}
-+
-+unsigned bch2_bkey_nr_ptrs_allocated(struct bkey_s_c k)
-+{
-+	return k.k->type == KEY_TYPE_reservation
-+		? bkey_s_c_to_reservation(k).v->nr_replicas
-+		: bch2_bkey_dirty_devs(k).nr;
-+}
-+
-+unsigned bch2_bkey_nr_ptrs_fully_allocated(struct bkey_s_c k)
-+{
-+	unsigned ret = 0;
-+
-+	if (k.k->type == KEY_TYPE_reservation) {
-+		ret = bkey_s_c_to_reservation(k).v->nr_replicas;
-+	} else {
-+		struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+		const union bch_extent_entry *entry;
-+		struct extent_ptr_decoded p;
-+
-+		bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
-+			ret += !p.ptr.cached && !crc_is_compressed(p.crc);
-+	}
-+
-+	return ret;
-+}
-+
-+unsigned bch2_bkey_sectors_compressed(struct bkey_s_c k)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const union bch_extent_entry *entry;
-+	struct extent_ptr_decoded p;
-+	unsigned ret = 0;
-+
-+	bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
-+		if (!p.ptr.cached && crc_is_compressed(p.crc))
-+			ret += p.crc.compressed_size;
-+
-+	return ret;
-+}
-+
-+bool bch2_bkey_is_incompressible(struct bkey_s_c k)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const union bch_extent_entry *entry;
-+	struct bch_extent_crc_unpacked crc;
-+
-+	bkey_for_each_crc(k.k, ptrs, crc, entry)
-+		if (crc.compression_type == BCH_COMPRESSION_TYPE_incompressible)
-+			return true;
-+	return false;
-+}
-+
-+bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size,
-+				unsigned nr_replicas)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bpos end = pos;
-+	struct bkey_s_c k;
-+	bool ret = true;
-+	int err;
-+
-+	end.offset += size;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, pos,
-+			   BTREE_ITER_SLOTS, k, err) {
-+		if (bkey_cmp(bkey_start_pos(k.k), end) >= 0)
-+			break;
-+
-+		if (nr_replicas > bch2_bkey_nr_ptrs_fully_allocated(k)) {
-+			ret = false;
-+			break;
-+		}
-+	}
-+	bch2_trans_exit(&trans);
-+
-+	return ret;
-+}
-+
-+static unsigned bch2_extent_ptr_durability(struct bch_fs *c,
-+					   struct extent_ptr_decoded p)
-+{
-+	unsigned durability = 0;
-+	struct bch_dev *ca;
-+
-+	if (p.ptr.cached)
-+		return 0;
-+
-+	ca = bch_dev_bkey_exists(c, p.ptr.dev);
-+
-+	if (ca->mi.state != BCH_MEMBER_STATE_FAILED)
-+		durability = max_t(unsigned, durability, ca->mi.durability);
-+
-+	if (p.has_ec) {
-+		struct stripe *s =
-+			genradix_ptr(&c->stripes[0], p.ec.idx);
-+
-+		if (WARN_ON(!s))
-+			goto out;
-+
-+		durability += s->nr_redundant;
-+	}
-+out:
-+	return durability;
-+}
-+
-+unsigned bch2_bkey_durability(struct bch_fs *c, struct bkey_s_c k)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const union bch_extent_entry *entry;
-+	struct extent_ptr_decoded p;
-+	unsigned durability = 0;
-+
-+	bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
-+		durability += bch2_extent_ptr_durability(c, p);
-+
-+	return durability;
-+}
-+
-+void bch2_bkey_mark_replicas_cached(struct bch_fs *c, struct bkey_s k,
-+				    unsigned target,
-+				    unsigned nr_desired_replicas)
-+{
-+	struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
-+	union bch_extent_entry *entry;
-+	struct extent_ptr_decoded p;
-+	int extra = bch2_bkey_durability(c, k.s_c) - nr_desired_replicas;
-+
-+	if (target && extra > 0)
-+		bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
-+			int n = bch2_extent_ptr_durability(c, p);
-+
-+			if (n && n <= extra &&
-+			    !bch2_dev_in_target(c, p.ptr.dev, target)) {
-+				entry->ptr.cached = true;
-+				extra -= n;
-+			}
-+		}
-+
-+	if (extra > 0)
-+		bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
-+			int n = bch2_extent_ptr_durability(c, p);
-+
-+			if (n && n <= extra) {
-+				entry->ptr.cached = true;
-+				extra -= n;
-+			}
-+		}
-+}
-+
-+void bch2_bkey_append_ptr(struct bkey_i *k,
-+			  struct bch_extent_ptr ptr)
-+{
-+	EBUG_ON(bch2_bkey_has_device(bkey_i_to_s_c(k), ptr.dev));
-+
-+	switch (k->k.type) {
-+	case KEY_TYPE_btree_ptr:
-+	case KEY_TYPE_btree_ptr_v2:
-+	case KEY_TYPE_extent:
-+		EBUG_ON(bkey_val_u64s(&k->k) >= BKEY_EXTENT_VAL_U64s_MAX);
-+
-+		ptr.type = 1 << BCH_EXTENT_ENTRY_ptr;
-+
-+		memcpy((void *) &k->v + bkey_val_bytes(&k->k),
-+		       &ptr,
-+		       sizeof(ptr));
-+		k->u64s++;
-+		break;
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static inline void __extent_entry_insert(struct bkey_i *k,
-+					 union bch_extent_entry *dst,
-+					 union bch_extent_entry *new)
-+{
-+	union bch_extent_entry *end = bkey_val_end(bkey_i_to_s(k));
-+
-+	memmove_u64s_up_small((u64 *) dst + extent_entry_u64s(new),
-+			      dst, (u64 *) end - (u64 *) dst);
-+	k->k.u64s += extent_entry_u64s(new);
-+	memcpy(dst, new, extent_entry_bytes(new));
-+}
-+
-+void bch2_extent_ptr_decoded_append(struct bkey_i *k,
-+				    struct extent_ptr_decoded *p)
-+{
-+	struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(k));
-+	struct bch_extent_crc_unpacked crc =
-+		bch2_extent_crc_unpack(&k->k, NULL);
-+	union bch_extent_entry *pos;
-+
-+	if (!bch2_crc_unpacked_cmp(crc, p->crc)) {
-+		pos = ptrs.start;
-+		goto found;
-+	}
-+
-+	bkey_for_each_crc(&k->k, ptrs, crc, pos)
-+		if (!bch2_crc_unpacked_cmp(crc, p->crc)) {
-+			pos = extent_entry_next(pos);
-+			goto found;
-+		}
-+
-+	bch2_extent_crc_append(k, p->crc);
-+	pos = bkey_val_end(bkey_i_to_s(k));
-+found:
-+	p->ptr.type = 1 << BCH_EXTENT_ENTRY_ptr;
-+	__extent_entry_insert(k, pos, to_entry(&p->ptr));
-+
-+	if (p->has_ec) {
-+		p->ec.type = 1 << BCH_EXTENT_ENTRY_stripe_ptr;
-+		__extent_entry_insert(k, pos, to_entry(&p->ec));
-+	}
-+}
-+
-+static union bch_extent_entry *extent_entry_prev(struct bkey_ptrs ptrs,
-+					  union bch_extent_entry *entry)
-+{
-+	union bch_extent_entry *i = ptrs.start;
-+
-+	if (i == entry)
-+		return NULL;
-+
-+	while (extent_entry_next(i) != entry)
-+		i = extent_entry_next(i);
-+	return i;
-+}
-+
-+union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s k,
-+					   struct bch_extent_ptr *ptr)
-+{
-+	struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
-+	union bch_extent_entry *dst, *src, *prev;
-+	bool drop_crc = true;
-+
-+	EBUG_ON(ptr < &ptrs.start->ptr ||
-+		ptr >= &ptrs.end->ptr);
-+	EBUG_ON(ptr->type != 1 << BCH_EXTENT_ENTRY_ptr);
-+
-+	src = extent_entry_next(to_entry(ptr));
-+	if (src != ptrs.end &&
-+	    !extent_entry_is_crc(src))
-+		drop_crc = false;
-+
-+	dst = to_entry(ptr);
-+	while ((prev = extent_entry_prev(ptrs, dst))) {
-+		if (extent_entry_is_ptr(prev))
-+			break;
-+
-+		if (extent_entry_is_crc(prev)) {
-+			if (drop_crc)
-+				dst = prev;
-+			break;
-+		}
-+
-+		dst = prev;
-+	}
-+
-+	memmove_u64s_down(dst, src,
-+			  (u64 *) ptrs.end - (u64 *) src);
-+	k.k->u64s -= (u64 *) src - (u64 *) dst;
-+
-+	return dst;
-+}
-+
-+void bch2_bkey_drop_device(struct bkey_s k, unsigned dev)
-+{
-+	struct bch_extent_ptr *ptr;
-+
-+	bch2_bkey_drop_ptrs(k, ptr, ptr->dev == dev);
-+}
-+
-+const struct bch_extent_ptr *
-+bch2_bkey_has_device(struct bkey_s_c k, unsigned dev)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const struct bch_extent_ptr *ptr;
-+
-+	bkey_for_each_ptr(ptrs, ptr)
-+		if (ptr->dev == dev)
-+			return ptr;
-+
-+	return NULL;
-+}
-+
-+bool bch2_bkey_has_target(struct bch_fs *c, struct bkey_s_c k, unsigned target)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const struct bch_extent_ptr *ptr;
-+
-+	bkey_for_each_ptr(ptrs, ptr)
-+		if (bch2_dev_in_target(c, ptr->dev, target) &&
-+		    (!ptr->cached ||
-+		     !ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr)))
-+			return true;
-+
-+	return false;
-+}
-+
-+bool bch2_bkey_matches_ptr(struct bch_fs *c, struct bkey_s_c k,
-+			   struct bch_extent_ptr m, u64 offset)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const union bch_extent_entry *entry;
-+	struct extent_ptr_decoded p;
-+
-+	bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
-+		if (p.ptr.dev	== m.dev &&
-+		    p.ptr.gen	== m.gen &&
-+		    (s64) p.ptr.offset + p.crc.offset - bkey_start_offset(k.k) ==
-+		    (s64) m.offset  - offset)
-+			return true;
-+
-+	return false;
-+}
-+
-+/*
-+ * bch_extent_normalize - clean up an extent, dropping stale pointers etc.
-+ *
-+ * Returns true if @k should be dropped entirely
-+ *
-+ * For existing keys, only called when btree nodes are being rewritten, not when
-+ * they're merely being compacted/resorted in memory.
-+ */
-+bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k)
-+{
-+	struct bch_extent_ptr *ptr;
-+
-+	bch2_bkey_drop_ptrs(k, ptr,
-+		ptr->cached &&
-+		ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr));
-+
-+	/* will only happen if all pointers were cached: */
-+	if (!bch2_bkey_nr_ptrs(k.s_c))
-+		k.k->type = KEY_TYPE_discard;
-+
-+	return bkey_whiteout(k.k);
-+}
-+
-+void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
-+			    struct bkey_s_c k)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const union bch_extent_entry *entry;
-+	struct bch_extent_crc_unpacked crc;
-+	const struct bch_extent_ptr *ptr;
-+	const struct bch_extent_stripe_ptr *ec;
-+	struct bch_dev *ca;
-+	bool first = true;
-+
-+	bkey_extent_entry_for_each(ptrs, entry) {
-+		if (!first)
-+			pr_buf(out, " ");
-+
-+		switch (__extent_entry_type(entry)) {
-+		case BCH_EXTENT_ENTRY_ptr:
-+			ptr = entry_to_ptr(entry);
-+			ca = ptr->dev < c->sb.nr_devices && c->devs[ptr->dev]
-+				? bch_dev_bkey_exists(c, ptr->dev)
-+				: NULL;
-+
-+			pr_buf(out, "ptr: %u:%llu gen %u%s%s", ptr->dev,
-+			       (u64) ptr->offset, ptr->gen,
-+			       ptr->cached ? " cached" : "",
-+			       ca && ptr_stale(ca, ptr)
-+			       ? " stale" : "");
-+			break;
-+		case BCH_EXTENT_ENTRY_crc32:
-+		case BCH_EXTENT_ENTRY_crc64:
-+		case BCH_EXTENT_ENTRY_crc128:
-+			crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry));
-+
-+			pr_buf(out, "crc: c_size %u size %u offset %u nonce %u csum %u compress %u",
-+			       crc.compressed_size,
-+			       crc.uncompressed_size,
-+			       crc.offset, crc.nonce,
-+			       crc.csum_type,
-+			       crc.compression_type);
-+			break;
-+		case BCH_EXTENT_ENTRY_stripe_ptr:
-+			ec = &entry->stripe_ptr;
-+
-+			pr_buf(out, "ec: idx %llu block %u",
-+			       (u64) ec->idx, ec->block);
-+			break;
-+		default:
-+			pr_buf(out, "(invalid extent entry %.16llx)", *((u64 *) entry));
-+			return;
-+		}
-+
-+		first = false;
-+	}
-+}
-+
-+static const char *extent_ptr_invalid(const struct bch_fs *c,
-+				      struct bkey_s_c k,
-+				      const struct bch_extent_ptr *ptr,
-+				      unsigned size_ondisk,
-+				      bool metadata)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const struct bch_extent_ptr *ptr2;
-+	struct bch_dev *ca;
-+
-+	if (!bch2_dev_exists2(c, ptr->dev))
-+		return "pointer to invalid device";
-+
-+	ca = bch_dev_bkey_exists(c, ptr->dev);
-+	if (!ca)
-+		return "pointer to invalid device";
-+
-+	bkey_for_each_ptr(ptrs, ptr2)
-+		if (ptr != ptr2 && ptr->dev == ptr2->dev)
-+			return "multiple pointers to same device";
-+
-+	if (ptr->offset + size_ondisk > bucket_to_sector(ca, ca->mi.nbuckets))
-+		return "offset past end of device";
-+
-+	if (ptr->offset < bucket_to_sector(ca, ca->mi.first_bucket))
-+		return "offset before first bucket";
-+
-+	if (bucket_remainder(ca, ptr->offset) +
-+	    size_ondisk > ca->mi.bucket_size)
-+		return "spans multiple buckets";
-+
-+	return NULL;
-+}
-+
-+const char *bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const union bch_extent_entry *entry;
-+	struct bch_extent_crc_unpacked crc;
-+	unsigned size_ondisk = k.k->size;
-+	const char *reason;
-+	unsigned nonce = UINT_MAX;
-+
-+	if (k.k->type == KEY_TYPE_btree_ptr)
-+		size_ondisk = c->opts.btree_node_size;
-+	if (k.k->type == KEY_TYPE_btree_ptr_v2)
-+		size_ondisk = le16_to_cpu(bkey_s_c_to_btree_ptr_v2(k).v->sectors);
-+
-+	bkey_extent_entry_for_each(ptrs, entry) {
-+		if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX)
-+			return "invalid extent entry type";
-+
-+		if (k.k->type == KEY_TYPE_btree_ptr &&
-+		    !extent_entry_is_ptr(entry))
-+			return "has non ptr field";
-+
-+		switch (extent_entry_type(entry)) {
-+		case BCH_EXTENT_ENTRY_ptr:
-+			reason = extent_ptr_invalid(c, k, &entry->ptr,
-+						    size_ondisk, false);
-+			if (reason)
-+				return reason;
-+			break;
-+		case BCH_EXTENT_ENTRY_crc32:
-+		case BCH_EXTENT_ENTRY_crc64:
-+		case BCH_EXTENT_ENTRY_crc128:
-+			crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry));
-+
-+			if (crc.offset + crc.live_size >
-+			    crc.uncompressed_size)
-+				return "checksum offset + key size > uncompressed size";
-+
-+			size_ondisk = crc.compressed_size;
-+
-+			if (!bch2_checksum_type_valid(c, crc.csum_type))
-+				return "invalid checksum type";
-+
-+			if (crc.compression_type >= BCH_COMPRESSION_TYPE_NR)
-+				return "invalid compression type";
-+
-+			if (bch2_csum_type_is_encryption(crc.csum_type)) {
-+				if (nonce == UINT_MAX)
-+					nonce = crc.offset + crc.nonce;
-+				else if (nonce != crc.offset + crc.nonce)
-+					return "incorrect nonce";
-+			}
-+			break;
-+		case BCH_EXTENT_ENTRY_stripe_ptr:
-+			break;
-+		}
-+	}
-+
-+	return NULL;
-+}
-+
-+void bch2_ptr_swab(struct bkey_s k)
-+{
-+	struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
-+	union bch_extent_entry *entry;
-+	u64 *d;
-+
-+	for (d =  (u64 *) ptrs.start;
-+	     d != (u64 *) ptrs.end;
-+	     d++)
-+		*d = swab64(*d);
-+
-+	for (entry = ptrs.start;
-+	     entry < ptrs.end;
-+	     entry = extent_entry_next(entry)) {
-+		switch (extent_entry_type(entry)) {
-+		case BCH_EXTENT_ENTRY_ptr:
-+			break;
-+		case BCH_EXTENT_ENTRY_crc32:
-+			entry->crc32.csum = swab32(entry->crc32.csum);
-+			break;
-+		case BCH_EXTENT_ENTRY_crc64:
-+			entry->crc64.csum_hi = swab16(entry->crc64.csum_hi);
-+			entry->crc64.csum_lo = swab64(entry->crc64.csum_lo);
-+			break;
-+		case BCH_EXTENT_ENTRY_crc128:
-+			entry->crc128.csum.hi = (__force __le64)
-+				swab64((__force u64) entry->crc128.csum.hi);
-+			entry->crc128.csum.lo = (__force __le64)
-+				swab64((__force u64) entry->crc128.csum.lo);
-+			break;
-+		case BCH_EXTENT_ENTRY_stripe_ptr:
-+			break;
-+		}
-+	}
-+}
-+
-+/* Generic extent code: */
-+
-+int bch2_cut_front_s(struct bpos where, struct bkey_s k)
-+{
-+	unsigned new_val_u64s = bkey_val_u64s(k.k);
-+	int val_u64s_delta;
-+	u64 sub;
-+
-+	if (bkey_cmp(where, bkey_start_pos(k.k)) <= 0)
-+		return 0;
-+
-+	EBUG_ON(bkey_cmp(where, k.k->p) > 0);
-+
-+	sub = where.offset - bkey_start_offset(k.k);
-+
-+	k.k->size -= sub;
-+
-+	if (!k.k->size) {
-+		k.k->type = KEY_TYPE_deleted;
-+		new_val_u64s = 0;
-+	}
-+
-+	switch (k.k->type) {
-+	case KEY_TYPE_extent:
-+	case KEY_TYPE_reflink_v: {
-+		struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
-+		union bch_extent_entry *entry;
-+		bool seen_crc = false;
-+
-+		bkey_extent_entry_for_each(ptrs, entry) {
-+			switch (extent_entry_type(entry)) {
-+			case BCH_EXTENT_ENTRY_ptr:
-+				if (!seen_crc)
-+					entry->ptr.offset += sub;
-+				break;
-+			case BCH_EXTENT_ENTRY_crc32:
-+				entry->crc32.offset += sub;
-+				break;
-+			case BCH_EXTENT_ENTRY_crc64:
-+				entry->crc64.offset += sub;
-+				break;
-+			case BCH_EXTENT_ENTRY_crc128:
-+				entry->crc128.offset += sub;
-+				break;
-+			case BCH_EXTENT_ENTRY_stripe_ptr:
-+				break;
-+			}
-+
-+			if (extent_entry_is_crc(entry))
-+				seen_crc = true;
-+		}
-+
-+		break;
-+	}
-+	case KEY_TYPE_reflink_p: {
-+		struct bkey_s_reflink_p p = bkey_s_to_reflink_p(k);
-+
-+		le64_add_cpu(&p.v->idx, sub);
-+		break;
-+	}
-+	case KEY_TYPE_inline_data: {
-+		struct bkey_s_inline_data d = bkey_s_to_inline_data(k);
-+
-+		sub = min_t(u64, sub << 9, bkey_val_bytes(d.k));
-+
-+		memmove(d.v->data,
-+			d.v->data + sub,
-+			bkey_val_bytes(d.k) - sub);
-+
-+		new_val_u64s -= sub >> 3;
-+		break;
-+	}
-+	}
-+
-+	val_u64s_delta = bkey_val_u64s(k.k) - new_val_u64s;
-+	BUG_ON(val_u64s_delta < 0);
-+
-+	set_bkey_val_u64s(k.k, new_val_u64s);
-+	memset(bkey_val_end(k), 0, val_u64s_delta * sizeof(u64));
-+	return -val_u64s_delta;
-+}
-+
-+int bch2_cut_back_s(struct bpos where, struct bkey_s k)
-+{
-+	unsigned new_val_u64s = bkey_val_u64s(k.k);
-+	int val_u64s_delta;
-+	u64 len = 0;
-+
-+	if (bkey_cmp(where, k.k->p) >= 0)
-+		return 0;
-+
-+	EBUG_ON(bkey_cmp(where, bkey_start_pos(k.k)) < 0);
-+
-+	len = where.offset - bkey_start_offset(k.k);
-+
-+	k.k->p = where;
-+	k.k->size = len;
-+
-+	if (!len) {
-+		k.k->type = KEY_TYPE_deleted;
-+		new_val_u64s = 0;
-+	}
-+
-+	switch (k.k->type) {
-+	case KEY_TYPE_inline_data:
-+		new_val_u64s = min(new_val_u64s, k.k->size << 6);
-+		break;
-+	}
-+
-+	val_u64s_delta = bkey_val_u64s(k.k) - new_val_u64s;
-+	BUG_ON(val_u64s_delta < 0);
-+
-+	set_bkey_val_u64s(k.k, new_val_u64s);
-+	memset(bkey_val_end(k), 0, val_u64s_delta * sizeof(u64));
-+	return -val_u64s_delta;
-+}
-diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h
-new file mode 100644
-index 000000000000..29b15365d19c
---- /dev/null
-+++ b/fs/bcachefs/extents.h
-@@ -0,0 +1,603 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_EXTENTS_H
-+#define _BCACHEFS_EXTENTS_H
-+
-+#include "bcachefs.h"
-+#include "bkey.h"
-+#include "extents_types.h"
-+
-+struct bch_fs;
-+struct btree_trans;
-+
-+/* extent entries: */
-+
-+#define extent_entry_last(_e)						\
-+	((typeof(&(_e).v->start[0])) bkey_val_end(_e))
-+
-+#define entry_to_ptr(_entry)						\
-+({									\
-+	EBUG_ON((_entry) && !extent_entry_is_ptr(_entry));		\
-+									\
-+	__builtin_choose_expr(						\
-+		type_is_exact(_entry, const union bch_extent_entry *),	\
-+		(const struct bch_extent_ptr *) (_entry),		\
-+		(struct bch_extent_ptr *) (_entry));			\
-+})
-+
-+/* downcast, preserves const */
-+#define to_entry(_entry)						\
-+({									\
-+	BUILD_BUG_ON(!type_is(_entry, union bch_extent_crc *) &&	\
-+		     !type_is(_entry, struct bch_extent_ptr *) &&	\
-+		     !type_is(_entry, struct bch_extent_stripe_ptr *));	\
-+									\
-+	__builtin_choose_expr(						\
-+		(type_is_exact(_entry, const union bch_extent_crc *) ||	\
-+		 type_is_exact(_entry, const struct bch_extent_ptr *) ||\
-+		 type_is_exact(_entry, const struct bch_extent_stripe_ptr *)),\
-+		(const union bch_extent_entry *) (_entry),		\
-+		(union bch_extent_entry *) (_entry));			\
-+})
-+
-+#define extent_entry_next(_entry)					\
-+	((typeof(_entry)) ((void *) (_entry) + extent_entry_bytes(_entry)))
-+
-+static inline unsigned
-+__extent_entry_type(const union bch_extent_entry *e)
-+{
-+	return e->type ? __ffs(e->type) : BCH_EXTENT_ENTRY_MAX;
-+}
-+
-+static inline enum bch_extent_entry_type
-+extent_entry_type(const union bch_extent_entry *e)
-+{
-+	int ret = __ffs(e->type);
-+
-+	EBUG_ON(ret < 0 || ret >= BCH_EXTENT_ENTRY_MAX);
-+
-+	return ret;
-+}
-+
-+static inline size_t extent_entry_bytes(const union bch_extent_entry *entry)
-+{
-+	switch (extent_entry_type(entry)) {
-+#define x(f, n)						\
-+	case BCH_EXTENT_ENTRY_##f:			\
-+		return sizeof(struct bch_extent_##f);
-+	BCH_EXTENT_ENTRY_TYPES()
-+#undef x
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static inline size_t extent_entry_u64s(const union bch_extent_entry *entry)
-+{
-+	return extent_entry_bytes(entry) / sizeof(u64);
-+}
-+
-+static inline bool extent_entry_is_ptr(const union bch_extent_entry *e)
-+{
-+	switch (extent_entry_type(e)) {
-+	case BCH_EXTENT_ENTRY_ptr:
-+		return true;
-+	default:
-+		return false;
-+	}
-+}
-+
-+static inline bool extent_entry_is_crc(const union bch_extent_entry *e)
-+{
-+	switch (extent_entry_type(e)) {
-+	case BCH_EXTENT_ENTRY_crc32:
-+	case BCH_EXTENT_ENTRY_crc64:
-+	case BCH_EXTENT_ENTRY_crc128:
-+		return true;
-+	default:
-+		return false;
-+	}
-+}
-+
-+union bch_extent_crc {
-+	u8				type;
-+	struct bch_extent_crc32		crc32;
-+	struct bch_extent_crc64		crc64;
-+	struct bch_extent_crc128	crc128;
-+};
-+
-+#define __entry_to_crc(_entry)						\
-+	__builtin_choose_expr(						\
-+		type_is_exact(_entry, const union bch_extent_entry *),	\
-+		(const union bch_extent_crc *) (_entry),		\
-+		(union bch_extent_crc *) (_entry))
-+
-+#define entry_to_crc(_entry)						\
-+({									\
-+	EBUG_ON((_entry) && !extent_entry_is_crc(_entry));		\
-+									\
-+	__entry_to_crc(_entry);						\
-+})
-+
-+static inline struct bch_extent_crc_unpacked
-+bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc)
-+{
-+#define common_fields(_crc)						\
-+		.csum_type		= _crc.csum_type,		\
-+		.compression_type	= _crc.compression_type,	\
-+		.compressed_size	= _crc._compressed_size + 1,	\
-+		.uncompressed_size	= _crc._uncompressed_size + 1,	\
-+		.offset			= _crc.offset,			\
-+		.live_size		= k->size
-+
-+	if (!crc)
-+		return (struct bch_extent_crc_unpacked) {
-+			.compressed_size	= k->size,
-+			.uncompressed_size	= k->size,
-+			.live_size		= k->size,
-+		};
-+
-+	switch (extent_entry_type(to_entry(crc))) {
-+	case BCH_EXTENT_ENTRY_crc32: {
-+		struct bch_extent_crc_unpacked ret = (struct bch_extent_crc_unpacked) {
-+			common_fields(crc->crc32),
-+		};
-+
-+		*((__le32 *) &ret.csum.lo) = crc->crc32.csum;
-+
-+		memcpy(&ret.csum.lo, &crc->crc32.csum,
-+		       sizeof(crc->crc32.csum));
-+
-+		return ret;
-+	}
-+	case BCH_EXTENT_ENTRY_crc64: {
-+		struct bch_extent_crc_unpacked ret = (struct bch_extent_crc_unpacked) {
-+			common_fields(crc->crc64),
-+			.nonce			= crc->crc64.nonce,
-+			.csum.lo		= (__force __le64) crc->crc64.csum_lo,
-+		};
-+
-+		*((__le16 *) &ret.csum.hi) = crc->crc64.csum_hi;
-+
-+		return ret;
-+	}
-+	case BCH_EXTENT_ENTRY_crc128: {
-+		struct bch_extent_crc_unpacked ret = (struct bch_extent_crc_unpacked) {
-+			common_fields(crc->crc128),
-+			.nonce			= crc->crc128.nonce,
-+			.csum			= crc->crc128.csum,
-+		};
-+
-+		return ret;
-+	}
-+	default:
-+		BUG();
-+	}
-+#undef common_fields
-+}
-+
-+static inline bool crc_is_compressed(struct bch_extent_crc_unpacked crc)
-+{
-+	return (crc.compression_type != BCH_COMPRESSION_TYPE_none &&
-+		crc.compression_type != BCH_COMPRESSION_TYPE_incompressible);
-+}
-+
-+/* bkey_ptrs: generically over any key type that has ptrs */
-+
-+struct bkey_ptrs_c {
-+	const union bch_extent_entry	*start;
-+	const union bch_extent_entry	*end;
-+};
-+
-+struct bkey_ptrs {
-+	union bch_extent_entry	*start;
-+	union bch_extent_entry	*end;
-+};
-+
-+static inline struct bkey_ptrs_c bch2_bkey_ptrs_c(struct bkey_s_c k)
-+{
-+	switch (k.k->type) {
-+	case KEY_TYPE_btree_ptr: {
-+		struct bkey_s_c_btree_ptr e = bkey_s_c_to_btree_ptr(k);
-+		return (struct bkey_ptrs_c) {
-+			to_entry(&e.v->start[0]),
-+			to_entry(extent_entry_last(e))
-+		};
-+	}
-+	case KEY_TYPE_extent: {
-+		struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
-+		return (struct bkey_ptrs_c) {
-+			e.v->start,
-+			extent_entry_last(e)
-+		};
-+	}
-+	case KEY_TYPE_stripe: {
-+		struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
-+		return (struct bkey_ptrs_c) {
-+			to_entry(&s.v->ptrs[0]),
-+			to_entry(&s.v->ptrs[s.v->nr_blocks]),
-+		};
-+	}
-+	case KEY_TYPE_reflink_v: {
-+		struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(k);
-+
-+		return (struct bkey_ptrs_c) {
-+			r.v->start,
-+			bkey_val_end(r),
-+		};
-+	}
-+	case KEY_TYPE_btree_ptr_v2: {
-+		struct bkey_s_c_btree_ptr_v2 e = bkey_s_c_to_btree_ptr_v2(k);
-+		return (struct bkey_ptrs_c) {
-+			to_entry(&e.v->start[0]),
-+			to_entry(extent_entry_last(e))
-+		};
-+	}
-+	default:
-+		return (struct bkey_ptrs_c) { NULL, NULL };
-+	}
-+}
-+
-+static inline struct bkey_ptrs bch2_bkey_ptrs(struct bkey_s k)
-+{
-+	struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k.s_c);
-+
-+	return (struct bkey_ptrs) {
-+		(void *) p.start,
-+		(void *) p.end
-+	};
-+}
-+
-+#define __bkey_extent_entry_for_each_from(_start, _end, _entry)		\
-+	for ((_entry) = (_start);					\
-+	     (_entry) < (_end);						\
-+	     (_entry) = extent_entry_next(_entry))
-+
-+#define __bkey_ptr_next(_ptr, _end)					\
-+({									\
-+	typeof(_end) _entry;						\
-+									\
-+	__bkey_extent_entry_for_each_from(to_entry(_ptr), _end, _entry)	\
-+		if (extent_entry_is_ptr(_entry))			\
-+			break;						\
-+									\
-+	_entry < (_end) ? entry_to_ptr(_entry) : NULL;			\
-+})
-+
-+#define bkey_extent_entry_for_each_from(_p, _entry, _start)		\
-+	__bkey_extent_entry_for_each_from(_start, (_p).end, _entry)
-+
-+#define bkey_extent_entry_for_each(_p, _entry)				\
-+	bkey_extent_entry_for_each_from(_p, _entry, _p.start)
-+
-+#define __bkey_for_each_ptr(_start, _end, _ptr)				\
-+	for ((_ptr) = (_start);						\
-+	     ((_ptr) = __bkey_ptr_next(_ptr, _end));			\
-+	     (_ptr)++)
-+
-+#define bkey_ptr_next(_p, _ptr)						\
-+	__bkey_ptr_next(_ptr, (_p).end)
-+
-+#define bkey_for_each_ptr(_p, _ptr)					\
-+	__bkey_for_each_ptr(&(_p).start->ptr, (_p).end, _ptr)
-+
-+#define __bkey_ptr_next_decode(_k, _end, _ptr, _entry)			\
-+({									\
-+	__label__ out;							\
-+									\
-+	(_ptr).idx	= 0;						\
-+	(_ptr).has_ec	= false;					\
-+									\
-+	__bkey_extent_entry_for_each_from(_entry, _end, _entry)		\
-+		switch (extent_entry_type(_entry)) {			\
-+		case BCH_EXTENT_ENTRY_ptr:				\
-+			(_ptr).ptr		= _entry->ptr;		\
-+			goto out;					\
-+		case BCH_EXTENT_ENTRY_crc32:				\
-+		case BCH_EXTENT_ENTRY_crc64:				\
-+		case BCH_EXTENT_ENTRY_crc128:				\
-+			(_ptr).crc = bch2_extent_crc_unpack(_k,		\
-+					entry_to_crc(_entry));		\
-+			break;						\
-+		case BCH_EXTENT_ENTRY_stripe_ptr:			\
-+			(_ptr).ec = _entry->stripe_ptr;			\
-+			(_ptr).has_ec	= true;				\
-+			break;						\
-+		}							\
-+out:									\
-+	_entry < (_end);						\
-+})
-+
-+#define __bkey_for_each_ptr_decode(_k, _start, _end, _ptr, _entry)	\
-+	for ((_ptr).crc = bch2_extent_crc_unpack(_k, NULL),		\
-+	     (_entry) = _start;						\
-+	     __bkey_ptr_next_decode(_k, _end, _ptr, _entry);		\
-+	     (_entry) = extent_entry_next(_entry))
-+
-+#define bkey_for_each_ptr_decode(_k, _p, _ptr, _entry)			\
-+	__bkey_for_each_ptr_decode(_k, (_p).start, (_p).end,		\
-+				   _ptr, _entry)
-+
-+#define bkey_crc_next(_k, _start, _end, _crc, _iter)			\
-+({									\
-+	__bkey_extent_entry_for_each_from(_iter, _end, _iter)		\
-+		if (extent_entry_is_crc(_iter)) {			\
-+			(_crc) = bch2_extent_crc_unpack(_k,		\
-+						entry_to_crc(_iter));	\
-+			break;						\
-+		}							\
-+									\
-+	(_iter) < (_end);						\
-+})
-+
-+#define __bkey_for_each_crc(_k, _start, _end, _crc, _iter)		\
-+	for ((_crc) = bch2_extent_crc_unpack(_k, NULL),			\
-+	     (_iter) = (_start);					\
-+	     bkey_crc_next(_k, _start, _end, _crc, _iter);		\
-+	     (_iter) = extent_entry_next(_iter))
-+
-+#define bkey_for_each_crc(_k, _p, _crc, _iter)				\
-+	__bkey_for_each_crc(_k, (_p).start, (_p).end, _crc, _iter)
-+
-+/* Iterate over pointers in KEY_TYPE_extent: */
-+
-+#define extent_for_each_entry_from(_e, _entry, _start)			\
-+	__bkey_extent_entry_for_each_from(_start,			\
-+				extent_entry_last(_e),_entry)
-+
-+#define extent_for_each_entry(_e, _entry)				\
-+	extent_for_each_entry_from(_e, _entry, (_e).v->start)
-+
-+#define extent_ptr_next(_e, _ptr)					\
-+	__bkey_ptr_next(_ptr, extent_entry_last(_e))
-+
-+#define extent_for_each_ptr(_e, _ptr)					\
-+	__bkey_for_each_ptr(&(_e).v->start->ptr, extent_entry_last(_e), _ptr)
-+
-+#define extent_for_each_ptr_decode(_e, _ptr, _entry)			\
-+	__bkey_for_each_ptr_decode((_e).k, (_e).v->start,		\
-+				   extent_entry_last(_e), _ptr, _entry)
-+
-+/* utility code common to all keys with pointers: */
-+
-+void bch2_mark_io_failure(struct bch_io_failures *,
-+			  struct extent_ptr_decoded *);
-+int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c,
-+			       struct bch_io_failures *,
-+			       struct extent_ptr_decoded *);
-+
-+/* KEY_TYPE_btree_ptr: */
-+
-+const char *bch2_btree_ptr_invalid(const struct bch_fs *, struct bkey_s_c);
-+void bch2_btree_ptr_debugcheck(struct bch_fs *, struct bkey_s_c);
-+void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *,
-+			    struct bkey_s_c);
-+
-+void bch2_btree_ptr_v2_to_text(struct printbuf *, struct bch_fs *,
-+			    struct bkey_s_c);
-+void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned,
-+			      int, struct bkey_s);
-+
-+#define bch2_bkey_ops_btree_ptr (struct bkey_ops) {		\
-+	.key_invalid	= bch2_btree_ptr_invalid,		\
-+	.key_debugcheck	= bch2_btree_ptr_debugcheck,		\
-+	.val_to_text	= bch2_btree_ptr_to_text,		\
-+	.swab		= bch2_ptr_swab,			\
-+}
-+
-+#define bch2_bkey_ops_btree_ptr_v2 (struct bkey_ops) {		\
-+	.key_invalid	= bch2_btree_ptr_invalid,		\
-+	.key_debugcheck	= bch2_btree_ptr_debugcheck,		\
-+	.val_to_text	= bch2_btree_ptr_v2_to_text,		\
-+	.swab		= bch2_ptr_swab,			\
-+	.compat		= bch2_btree_ptr_v2_compat,		\
-+}
-+
-+/* KEY_TYPE_extent: */
-+
-+const char *bch2_extent_invalid(const struct bch_fs *, struct bkey_s_c);
-+void bch2_extent_debugcheck(struct bch_fs *, struct bkey_s_c);
-+void bch2_extent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-+enum merge_result bch2_extent_merge(struct bch_fs *,
-+				    struct bkey_s, struct bkey_s);
-+
-+#define bch2_bkey_ops_extent (struct bkey_ops) {		\
-+	.key_invalid	= bch2_extent_invalid,			\
-+	.key_debugcheck	= bch2_extent_debugcheck,		\
-+	.val_to_text	= bch2_extent_to_text,			\
-+	.swab		= bch2_ptr_swab,			\
-+	.key_normalize	= bch2_extent_normalize,		\
-+	.key_merge	= bch2_extent_merge,			\
-+}
-+
-+/* KEY_TYPE_reservation: */
-+
-+const char *bch2_reservation_invalid(const struct bch_fs *, struct bkey_s_c);
-+void bch2_reservation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-+enum merge_result bch2_reservation_merge(struct bch_fs *,
-+					 struct bkey_s, struct bkey_s);
-+
-+#define bch2_bkey_ops_reservation (struct bkey_ops) {		\
-+	.key_invalid	= bch2_reservation_invalid,		\
-+	.val_to_text	= bch2_reservation_to_text,		\
-+	.key_merge	= bch2_reservation_merge,		\
-+}
-+
-+/* Extent checksum entries: */
-+
-+bool bch2_can_narrow_extent_crcs(struct bkey_s_c,
-+				 struct bch_extent_crc_unpacked);
-+bool bch2_bkey_narrow_crcs(struct bkey_i *, struct bch_extent_crc_unpacked);
-+void bch2_extent_crc_append(struct bkey_i *,
-+			    struct bch_extent_crc_unpacked);
-+
-+/* Generic code for keys with pointers: */
-+
-+static inline bool bkey_extent_is_direct_data(const struct bkey *k)
-+{
-+	switch (k->type) {
-+	case KEY_TYPE_btree_ptr:
-+	case KEY_TYPE_btree_ptr_v2:
-+	case KEY_TYPE_extent:
-+	case KEY_TYPE_reflink_v:
-+		return true;
-+	default:
-+		return false;
-+	}
-+}
-+
-+static inline bool bkey_extent_is_data(const struct bkey *k)
-+{
-+	return bkey_extent_is_direct_data(k) ||
-+		k->type == KEY_TYPE_inline_data ||
-+		k->type == KEY_TYPE_reflink_p;
-+}
-+
-+/*
-+ * Should extent be counted under inode->i_sectors?
-+ */
-+static inline bool bkey_extent_is_allocation(const struct bkey *k)
-+{
-+	switch (k->type) {
-+	case KEY_TYPE_extent:
-+	case KEY_TYPE_reservation:
-+	case KEY_TYPE_reflink_p:
-+	case KEY_TYPE_reflink_v:
-+	case KEY_TYPE_inline_data:
-+		return true;
-+	default:
-+		return false;
-+	}
-+}
-+
-+static inline struct bch_devs_list bch2_bkey_devs(struct bkey_s_c k)
-+{
-+	struct bch_devs_list ret = (struct bch_devs_list) { 0 };
-+	struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k);
-+	const struct bch_extent_ptr *ptr;
-+
-+	bkey_for_each_ptr(p, ptr)
-+		ret.devs[ret.nr++] = ptr->dev;
-+
-+	return ret;
-+}
-+
-+static inline struct bch_devs_list bch2_bkey_dirty_devs(struct bkey_s_c k)
-+{
-+	struct bch_devs_list ret = (struct bch_devs_list) { 0 };
-+	struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k);
-+	const struct bch_extent_ptr *ptr;
-+
-+	bkey_for_each_ptr(p, ptr)
-+		if (!ptr->cached)
-+			ret.devs[ret.nr++] = ptr->dev;
-+
-+	return ret;
-+}
-+
-+static inline struct bch_devs_list bch2_bkey_cached_devs(struct bkey_s_c k)
-+{
-+	struct bch_devs_list ret = (struct bch_devs_list) { 0 };
-+	struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k);
-+	const struct bch_extent_ptr *ptr;
-+
-+	bkey_for_each_ptr(p, ptr)
-+		if (ptr->cached)
-+			ret.devs[ret.nr++] = ptr->dev;
-+
-+	return ret;
-+}
-+
-+unsigned bch2_bkey_nr_ptrs(struct bkey_s_c);
-+unsigned bch2_bkey_nr_ptrs_allocated(struct bkey_s_c);
-+unsigned bch2_bkey_nr_ptrs_fully_allocated(struct bkey_s_c);
-+bool bch2_bkey_is_incompressible(struct bkey_s_c);
-+unsigned bch2_bkey_sectors_compressed(struct bkey_s_c);
-+bool bch2_check_range_allocated(struct bch_fs *, struct bpos, u64, unsigned);
-+unsigned bch2_bkey_durability(struct bch_fs *, struct bkey_s_c);
-+
-+void bch2_bkey_mark_replicas_cached(struct bch_fs *, struct bkey_s,
-+				    unsigned, unsigned);
-+
-+void bch2_bkey_append_ptr(struct bkey_i *, struct bch_extent_ptr);
-+void bch2_extent_ptr_decoded_append(struct bkey_i *,
-+				    struct extent_ptr_decoded *);
-+union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s,
-+					   struct bch_extent_ptr *);
-+
-+#define bch2_bkey_drop_ptrs(_k, _ptr, _cond)				\
-+do {									\
-+	struct bkey_ptrs _ptrs = bch2_bkey_ptrs(_k);			\
-+									\
-+	_ptr = &_ptrs.start->ptr;					\
-+									\
-+	while ((_ptr = bkey_ptr_next(_ptrs, _ptr))) {			\
-+		if (_cond) {						\
-+			_ptr = (void *) bch2_bkey_drop_ptr(_k, _ptr);	\
-+			_ptrs = bch2_bkey_ptrs(_k);			\
-+			continue;					\
-+		}							\
-+									\
-+		(_ptr)++;						\
-+	}								\
-+} while (0)
-+
-+void bch2_bkey_drop_device(struct bkey_s, unsigned);
-+const struct bch_extent_ptr *bch2_bkey_has_device(struct bkey_s_c, unsigned);
-+bool bch2_bkey_has_target(struct bch_fs *, struct bkey_s_c, unsigned);
-+
-+bool bch2_bkey_matches_ptr(struct bch_fs *, struct bkey_s_c,
-+			   struct bch_extent_ptr, u64);
-+
-+bool bch2_extent_normalize(struct bch_fs *, struct bkey_s);
-+void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *,
-+			    struct bkey_s_c);
-+const char *bch2_bkey_ptrs_invalid(const struct bch_fs *, struct bkey_s_c);
-+
-+void bch2_ptr_swab(struct bkey_s);
-+
-+/* Generic extent code: */
-+
-+int bch2_cut_front_s(struct bpos, struct bkey_s);
-+int bch2_cut_back_s(struct bpos, struct bkey_s);
-+
-+static inline void bch2_cut_front(struct bpos where, struct bkey_i *k)
-+{
-+	bch2_cut_front_s(where, bkey_i_to_s(k));
-+}
-+
-+static inline void bch2_cut_back(struct bpos where, struct bkey_i *k)
-+{
-+	bch2_cut_back_s(where, bkey_i_to_s(k));
-+}
-+
-+/**
-+ * bch_key_resize - adjust size of @k
-+ *
-+ * bkey_start_offset(k) will be preserved, modifies where the extent ends
-+ */
-+static inline void bch2_key_resize(struct bkey *k, unsigned new_size)
-+{
-+	k->p.offset -= k->size;
-+	k->p.offset += new_size;
-+	k->size = new_size;
-+}
-+
-+/*
-+ * In extent_sort_fix_overlapping(), insert_fixup_extent(),
-+ * extent_merge_inline() - we're modifying keys in place that are packed. To do
-+ * that we have to unpack the key, modify the unpacked key - then this
-+ * copies/repacks the unpacked to the original as necessary.
-+ */
-+static inline void extent_save(struct btree *b, struct bkey_packed *dst,
-+			       struct bkey *src)
-+{
-+	struct bkey_format *f = &b->format;
-+	struct bkey_i *dst_unpacked;
-+
-+	if ((dst_unpacked = packed_to_bkey(dst)))
-+		dst_unpacked->k = *src;
-+	else
-+		BUG_ON(!bch2_bkey_pack_key(dst, src, f));
-+}
-+
-+#endif /* _BCACHEFS_EXTENTS_H */
-diff --git a/fs/bcachefs/extents_types.h b/fs/bcachefs/extents_types.h
-new file mode 100644
-index 000000000000..43d6c341ecca
---- /dev/null
-+++ b/fs/bcachefs/extents_types.h
-@@ -0,0 +1,40 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_EXTENTS_TYPES_H
-+#define _BCACHEFS_EXTENTS_TYPES_H
-+
-+#include "bcachefs_format.h"
-+
-+struct bch_extent_crc_unpacked {
-+	u32			compressed_size;
-+	u32			uncompressed_size;
-+	u32			live_size;
-+
-+	u8			csum_type;
-+	u8			compression_type;
-+
-+	u16			offset;
-+
-+	u16			nonce;
-+
-+	struct bch_csum		csum;
-+};
-+
-+struct extent_ptr_decoded {
-+	unsigned			idx;
-+	bool				has_ec;
-+	struct bch_extent_crc_unpacked	crc;
-+	struct bch_extent_ptr		ptr;
-+	struct bch_extent_stripe_ptr	ec;
-+};
-+
-+struct bch_io_failures {
-+	u8			nr;
-+	struct bch_dev_io_failures {
-+		u8		dev;
-+		u8		idx;
-+		u8		nr_failed;
-+		u8		nr_retries;
-+	}			devs[BCH_REPLICAS_MAX];
-+};
-+
-+#endif /* _BCACHEFS_EXTENTS_TYPES_H */
-diff --git a/fs/bcachefs/eytzinger.h b/fs/bcachefs/eytzinger.h
-new file mode 100644
-index 000000000000..26d5cad7e6a5
---- /dev/null
-+++ b/fs/bcachefs/eytzinger.h
-@@ -0,0 +1,285 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _EYTZINGER_H
-+#define _EYTZINGER_H
-+
-+#include <linux/bitops.h>
-+#include <linux/log2.h>
-+
-+#include "util.h"
-+
-+/*
-+ * Traversal for trees in eytzinger layout - a full binary tree layed out in an
-+ * array
-+ */
-+
-+/*
-+ * One based indexing version:
-+ *
-+ * With one based indexing each level of the tree starts at a power of two -
-+ * good for cacheline alignment:
-+ *
-+ * Size parameter is treated as if we were using 0 based indexing, however:
-+ * valid nodes, and inorder indices, are in the range [1..size) - that is, there
-+ * are actually size - 1 elements
-+ */
-+
-+static inline unsigned eytzinger1_child(unsigned i, unsigned child)
-+{
-+	EBUG_ON(child > 1);
-+
-+	return (i << 1) + child;
-+}
-+
-+static inline unsigned eytzinger1_left_child(unsigned i)
-+{
-+	return eytzinger1_child(i, 0);
-+}
-+
-+static inline unsigned eytzinger1_right_child(unsigned i)
-+{
-+	return eytzinger1_child(i, 1);
-+}
-+
-+static inline unsigned eytzinger1_first(unsigned size)
-+{
-+	return rounddown_pow_of_two(size - 1);
-+}
-+
-+static inline unsigned eytzinger1_last(unsigned size)
-+{
-+	return rounddown_pow_of_two(size) - 1;
-+}
-+
-+/*
-+ * eytzinger1_next() and eytzinger1_prev() have the nice properties that
-+ *
-+ * eytzinger1_next(0) == eytzinger1_first())
-+ * eytzinger1_prev(0) == eytzinger1_last())
-+ *
-+ * eytzinger1_prev(eytzinger1_first()) == 0
-+ * eytzinger1_next(eytzinger1_last()) == 0
-+ */
-+
-+static inline unsigned eytzinger1_next(unsigned i, unsigned size)
-+{
-+	EBUG_ON(i >= size);
-+
-+	if (eytzinger1_right_child(i) < size) {
-+		i = eytzinger1_right_child(i);
-+
-+		i <<= __fls(size) - __fls(i);
-+		i >>= i >= size;
-+	} else {
-+		i >>= ffz(i) + 1;
-+	}
-+
-+	return i;
-+}
-+
-+static inline unsigned eytzinger1_prev(unsigned i, unsigned size)
-+{
-+	EBUG_ON(i >= size);
-+
-+	if (eytzinger1_left_child(i) < size) {
-+		i = eytzinger1_left_child(i) + 1;
-+
-+		i <<= __fls(size) - __fls(i);
-+		i -= 1;
-+		i >>= i >= size;
-+	} else {
-+		i >>= __ffs(i) + 1;
-+	}
-+
-+	return i;
-+}
-+
-+static inline unsigned eytzinger1_extra(unsigned size)
-+{
-+	return (size - rounddown_pow_of_two(size - 1)) << 1;
-+}
-+
-+static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size,
-+					      unsigned extra)
-+{
-+	unsigned b = __fls(i);
-+	unsigned shift = __fls(size - 1) - b;
-+	int s;
-+
-+	EBUG_ON(!i || i >= size);
-+
-+	i  ^= 1U << b;
-+	i <<= 1;
-+	i  |= 1;
-+	i <<= shift;
-+
-+	/*
-+	 * sign bit trick:
-+	 *
-+	 * if (i > extra)
-+	 *	i -= (i - extra) >> 1;
-+	 */
-+	s = extra - i;
-+	i += (s >> 1) & (s >> 31);
-+
-+	return i;
-+}
-+
-+static inline unsigned __inorder_to_eytzinger1(unsigned i, unsigned size,
-+					       unsigned extra)
-+{
-+	unsigned shift;
-+	int s;
-+
-+	EBUG_ON(!i || i >= size);
-+
-+	/*
-+	 * sign bit trick:
-+	 *
-+	 * if (i > extra)
-+	 *	i += i - extra;
-+	 */
-+	s = extra - i;
-+	i -= s & (s >> 31);
-+
-+	shift = __ffs(i);
-+
-+	i >>= shift + 1;
-+	i  |= 1U << (__fls(size - 1) - shift);
-+
-+	return i;
-+}
-+
-+static inline unsigned eytzinger1_to_inorder(unsigned i, unsigned size)
-+{
-+	return __eytzinger1_to_inorder(i, size, eytzinger1_extra(size));
-+}
-+
-+static inline unsigned inorder_to_eytzinger1(unsigned i, unsigned size)
-+{
-+	return __inorder_to_eytzinger1(i, size, eytzinger1_extra(size));
-+}
-+
-+#define eytzinger1_for_each(_i, _size)			\
-+	for ((_i) = eytzinger1_first((_size));		\
-+	     (_i) != 0;					\
-+	     (_i) = eytzinger1_next((_i), (_size)))
-+
-+/* Zero based indexing version: */
-+
-+static inline unsigned eytzinger0_child(unsigned i, unsigned child)
-+{
-+	EBUG_ON(child > 1);
-+
-+	return (i << 1) + 1 + child;
-+}
-+
-+static inline unsigned eytzinger0_left_child(unsigned i)
-+{
-+	return eytzinger0_child(i, 0);
-+}
-+
-+static inline unsigned eytzinger0_right_child(unsigned i)
-+{
-+	return eytzinger0_child(i, 1);
-+}
-+
-+static inline unsigned eytzinger0_first(unsigned size)
-+{
-+	return eytzinger1_first(size + 1) - 1;
-+}
-+
-+static inline unsigned eytzinger0_last(unsigned size)
-+{
-+	return eytzinger1_last(size + 1) - 1;
-+}
-+
-+static inline unsigned eytzinger0_next(unsigned i, unsigned size)
-+{
-+	return eytzinger1_next(i + 1, size + 1) - 1;
-+}
-+
-+static inline unsigned eytzinger0_prev(unsigned i, unsigned size)
-+{
-+	return eytzinger1_prev(i + 1, size + 1) - 1;
-+}
-+
-+static inline unsigned eytzinger0_extra(unsigned size)
-+{
-+	return eytzinger1_extra(size + 1);
-+}
-+
-+static inline unsigned __eytzinger0_to_inorder(unsigned i, unsigned size,
-+					       unsigned extra)
-+{
-+	return __eytzinger1_to_inorder(i + 1, size + 1, extra) - 1;
-+}
-+
-+static inline unsigned __inorder_to_eytzinger0(unsigned i, unsigned size,
-+					       unsigned extra)
-+{
-+	return __inorder_to_eytzinger1(i + 1, size + 1, extra) - 1;
-+}
-+
-+static inline unsigned eytzinger0_to_inorder(unsigned i, unsigned size)
-+{
-+	return __eytzinger0_to_inorder(i, size, eytzinger0_extra(size));
-+}
-+
-+static inline unsigned inorder_to_eytzinger0(unsigned i, unsigned size)
-+{
-+	return __inorder_to_eytzinger0(i, size, eytzinger0_extra(size));
-+}
-+
-+#define eytzinger0_for_each(_i, _size)			\
-+	for ((_i) = eytzinger0_first((_size));		\
-+	     (_i) != -1;				\
-+	     (_i) = eytzinger0_next((_i), (_size)))
-+
-+typedef int (*eytzinger_cmp_fn)(const void *l, const void *r, size_t size);
-+
-+/* return greatest node <= @search, or -1 if not found */
-+static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size,
-+					 eytzinger_cmp_fn cmp, const void *search)
-+{
-+	unsigned i, n = 0;
-+
-+	if (!nr)
-+		return -1;
-+
-+	do {
-+		i = n;
-+		n = eytzinger0_child(i, cmp(search, base + i * size, size) >= 0);
-+	} while (n < nr);
-+
-+	if (n & 1) {
-+		/* @i was greater than @search, return previous node: */
-+
-+		if (i == eytzinger0_first(nr))
-+			return -1;
-+
-+		return eytzinger0_prev(i, nr);
-+	} else {
-+		return i;
-+	}
-+}
-+
-+#define eytzinger0_find(base, nr, size, _cmp, search)			\
-+({									\
-+	void *_base	= (base);					\
-+	void *_search	= (search);					\
-+	size_t _nr	= (nr);						\
-+	size_t _size	= (size);					\
-+	size_t _i	= 0;						\
-+	int _res;							\
-+									\
-+	while (_i < _nr &&						\
-+	       (_res = _cmp(_search, _base + _i * _size, _size)))	\
-+		_i = eytzinger0_child(_i, _res > 0);			\
-+	_i;								\
-+})
-+
-+void eytzinger0_sort(void *, size_t, size_t,
-+		    int (*cmp_func)(const void *, const void *, size_t),
-+		    void (*swap_func)(void *, void *, size_t));
-+
-+#endif /* _EYTZINGER_H */
-diff --git a/fs/bcachefs/fifo.h b/fs/bcachefs/fifo.h
-new file mode 100644
-index 000000000000..cdb272708a4b
---- /dev/null
-+++ b/fs/bcachefs/fifo.h
-@@ -0,0 +1,127 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_FIFO_H
-+#define _BCACHEFS_FIFO_H
-+
-+#include "util.h"
-+
-+#define FIFO(type)							\
-+struct {								\
-+	size_t front, back, size, mask;					\
-+	type *data;							\
-+}
-+
-+#define DECLARE_FIFO(type, name)	FIFO(type) name
-+
-+#define fifo_buf_size(fifo)						\
-+	((fifo)->size							\
-+	 ? roundup_pow_of_two((fifo)->size) * sizeof((fifo)->data[0])	\
-+	 : 0)
-+
-+#define init_fifo(fifo, _size, _gfp)					\
-+({									\
-+	(fifo)->front	= (fifo)->back = 0;				\
-+	(fifo)->size	= (_size);					\
-+	(fifo)->mask	= (fifo)->size					\
-+		? roundup_pow_of_two((fifo)->size) - 1			\
-+		: 0;							\
-+	(fifo)->data	= kvpmalloc(fifo_buf_size(fifo), (_gfp));	\
-+})
-+
-+#define free_fifo(fifo)							\
-+do {									\
-+	kvpfree((fifo)->data, fifo_buf_size(fifo));			\
-+	(fifo)->data = NULL;						\
-+} while (0)
-+
-+#define fifo_swap(l, r)							\
-+do {									\
-+	swap((l)->front, (r)->front);					\
-+	swap((l)->back, (r)->back);					\
-+	swap((l)->size, (r)->size);					\
-+	swap((l)->mask, (r)->mask);					\
-+	swap((l)->data, (r)->data);					\
-+} while (0)
-+
-+#define fifo_move(dest, src)						\
-+do {									\
-+	typeof(*((dest)->data)) _t;					\
-+	while (!fifo_full(dest) &&					\
-+	       fifo_pop(src, _t))					\
-+		fifo_push(dest, _t);					\
-+} while (0)
-+
-+#define fifo_used(fifo)		(((fifo)->back - (fifo)->front))
-+#define fifo_free(fifo)		((fifo)->size - fifo_used(fifo))
-+
-+#define fifo_empty(fifo)	((fifo)->front == (fifo)->back)
-+#define fifo_full(fifo)		(fifo_used(fifo) == (fifo)->size)
-+
-+#define fifo_peek_front(fifo)	((fifo)->data[(fifo)->front & (fifo)->mask])
-+#define fifo_peek_back(fifo)	((fifo)->data[((fifo)->back - 1) & (fifo)->mask])
-+
-+#define fifo_entry_idx_abs(fifo, p)					\
-+	((((p) >= &fifo_peek_front(fifo)				\
-+	   ? (fifo)->front : (fifo)->back) & ~(fifo)->mask) +		\
-+	   (((p) - (fifo)->data)))
-+
-+#define fifo_entry_idx(fifo, p)	(((p) - &fifo_peek_front(fifo)) & (fifo)->mask)
-+#define fifo_idx_entry(fifo, i)	(fifo)->data[((fifo)->front + (i)) & (fifo)->mask]
-+
-+#define fifo_push_back_ref(f)						\
-+	(fifo_full((f)) ? NULL : &(f)->data[(f)->back++ & (f)->mask])
-+
-+#define fifo_push_front_ref(f)						\
-+	(fifo_full((f)) ? NULL : &(f)->data[--(f)->front & (f)->mask])
-+
-+#define fifo_push_back(fifo, new)					\
-+({									\
-+	typeof((fifo)->data) _r = fifo_push_back_ref(fifo);		\
-+	if (_r)								\
-+		*_r = (new);						\
-+	_r != NULL;							\
-+})
-+
-+#define fifo_push_front(fifo, new)					\
-+({									\
-+	typeof((fifo)->data) _r = fifo_push_front_ref(fifo);		\
-+	if (_r)								\
-+		*_r = (new);						\
-+	_r != NULL;							\
-+})
-+
-+#define fifo_pop_front(fifo, i)						\
-+({									\
-+	bool _r = !fifo_empty((fifo));					\
-+	if (_r)								\
-+		(i) = (fifo)->data[(fifo)->front++ & (fifo)->mask];	\
-+	_r;								\
-+})
-+
-+#define fifo_pop_back(fifo, i)						\
-+({									\
-+	bool _r = !fifo_empty((fifo));					\
-+	if (_r)								\
-+		(i) = (fifo)->data[--(fifo)->back & (fifo)->mask];	\
-+	_r;								\
-+})
-+
-+#define fifo_push_ref(fifo)	fifo_push_back_ref(fifo)
-+#define fifo_push(fifo, i)	fifo_push_back(fifo, (i))
-+#define fifo_pop(fifo, i)	fifo_pop_front(fifo, (i))
-+#define fifo_peek(fifo)		fifo_peek_front(fifo)
-+
-+#define fifo_for_each_entry(_entry, _fifo, _iter)			\
-+	for (typecheck(typeof((_fifo)->front), _iter),			\
-+	     (_iter) = (_fifo)->front;					\
-+	     ((_iter != (_fifo)->back) &&				\
-+	      (_entry = (_fifo)->data[(_iter) & (_fifo)->mask], true));	\
-+	     (_iter)++)
-+
-+#define fifo_for_each_entry_ptr(_ptr, _fifo, _iter)			\
-+	for (typecheck(typeof((_fifo)->front), _iter),			\
-+	     (_iter) = (_fifo)->front;					\
-+	     ((_iter != (_fifo)->back) &&				\
-+	      (_ptr = &(_fifo)->data[(_iter) & (_fifo)->mask], true));	\
-+	     (_iter)++)
-+
-+#endif /* _BCACHEFS_FIFO_H */
-diff --git a/fs/bcachefs/fs-common.c b/fs/bcachefs/fs-common.c
-new file mode 100644
-index 000000000000..878419d40992
---- /dev/null
-+++ b/fs/bcachefs/fs-common.c
-@@ -0,0 +1,317 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "acl.h"
-+#include "btree_update.h"
-+#include "dirent.h"
-+#include "fs-common.h"
-+#include "inode.h"
-+#include "xattr.h"
-+
-+#include <linux/posix_acl.h>
-+
-+int bch2_create_trans(struct btree_trans *trans, u64 dir_inum,
-+		      struct bch_inode_unpacked *dir_u,
-+		      struct bch_inode_unpacked *new_inode,
-+		      const struct qstr *name,
-+		      uid_t uid, gid_t gid, umode_t mode, dev_t rdev,
-+		      struct posix_acl *default_acl,
-+		      struct posix_acl *acl)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct btree_iter *dir_iter = NULL;
-+	struct bch_hash_info hash = bch2_hash_info_init(c, new_inode);
-+	u64 now = bch2_current_time(trans->c);
-+	int ret;
-+
-+	dir_iter = bch2_inode_peek(trans, dir_u, dir_inum, BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(dir_iter);
-+	if (ret)
-+		goto err;
-+
-+	bch2_inode_init_late(new_inode, now, uid, gid, mode, rdev, dir_u);
-+
-+	if (!name)
-+		new_inode->bi_flags |= BCH_INODE_UNLINKED;
-+
-+	ret = bch2_inode_create(trans, new_inode,
-+				BLOCKDEV_INODE_MAX, 0,
-+				&c->unused_inode_hint);
-+	if (ret)
-+		goto err;
-+
-+	if (default_acl) {
-+		ret = bch2_set_acl_trans(trans, new_inode, &hash,
-+					 default_acl, ACL_TYPE_DEFAULT);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	if (acl) {
-+		ret = bch2_set_acl_trans(trans, new_inode, &hash,
-+					 acl, ACL_TYPE_ACCESS);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	if (name) {
-+		struct bch_hash_info dir_hash = bch2_hash_info_init(c, dir_u);
-+		dir_u->bi_mtime = dir_u->bi_ctime = now;
-+
-+		if (S_ISDIR(new_inode->bi_mode))
-+			dir_u->bi_nlink++;
-+
-+		ret = bch2_inode_write(trans, dir_iter, dir_u);
-+		if (ret)
-+			goto err;
-+
-+		ret = bch2_dirent_create(trans, dir_inum, &dir_hash,
-+					 mode_to_type(new_inode->bi_mode),
-+					 name, new_inode->bi_inum,
-+					 BCH_HASH_SET_MUST_CREATE);
-+		if (ret)
-+			goto err;
-+	}
-+err:
-+	bch2_trans_iter_put(trans, dir_iter);
-+	return ret;
-+}
-+
-+int bch2_link_trans(struct btree_trans *trans, u64 dir_inum,
-+		    u64 inum, struct bch_inode_unpacked *dir_u,
-+		    struct bch_inode_unpacked *inode_u, const struct qstr *name)
-+{
-+	struct btree_iter *dir_iter = NULL, *inode_iter = NULL;
-+	struct bch_hash_info dir_hash;
-+	u64 now = bch2_current_time(trans->c);
-+	int ret;
-+
-+	inode_iter = bch2_inode_peek(trans, inode_u, inum, BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(inode_iter);
-+	if (ret)
-+		goto err;
-+
-+	inode_u->bi_ctime = now;
-+	bch2_inode_nlink_inc(inode_u);
-+
-+	dir_iter = bch2_inode_peek(trans, dir_u, dir_inum, 0);
-+	ret = PTR_ERR_OR_ZERO(dir_iter);
-+	if (ret)
-+		goto err;
-+
-+	dir_u->bi_mtime = dir_u->bi_ctime = now;
-+
-+	dir_hash = bch2_hash_info_init(trans->c, dir_u);
-+
-+	ret =   bch2_dirent_create(trans, dir_inum, &dir_hash,
-+				  mode_to_type(inode_u->bi_mode),
-+				  name, inum, BCH_HASH_SET_MUST_CREATE) ?:
-+		bch2_inode_write(trans, dir_iter, dir_u) ?:
-+		bch2_inode_write(trans, inode_iter, inode_u);
-+err:
-+	bch2_trans_iter_put(trans, dir_iter);
-+	bch2_trans_iter_put(trans, inode_iter);
-+	return ret;
-+}
-+
-+int bch2_unlink_trans(struct btree_trans *trans,
-+		      u64 dir_inum, struct bch_inode_unpacked *dir_u,
-+		      struct bch_inode_unpacked *inode_u,
-+		      const struct qstr *name)
-+{
-+	struct btree_iter *dir_iter = NULL, *dirent_iter = NULL,
-+			  *inode_iter = NULL;
-+	struct bch_hash_info dir_hash;
-+	u64 inum, now = bch2_current_time(trans->c);
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	dir_iter = bch2_inode_peek(trans, dir_u, dir_inum, BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(dir_iter);
-+	if (ret)
-+		goto err;
-+
-+	dir_hash = bch2_hash_info_init(trans->c, dir_u);
-+
-+	dirent_iter = __bch2_dirent_lookup_trans(trans, dir_inum, &dir_hash,
-+						 name, BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(dirent_iter);
-+	if (ret)
-+		goto err;
-+
-+	k = bch2_btree_iter_peek_slot(dirent_iter);
-+	inum = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum);
-+
-+	inode_iter = bch2_inode_peek(trans, inode_u, inum, BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(inode_iter);
-+	if (ret)
-+		goto err;
-+
-+	dir_u->bi_mtime = dir_u->bi_ctime = inode_u->bi_ctime = now;
-+	dir_u->bi_nlink -= S_ISDIR(inode_u->bi_mode);
-+	bch2_inode_nlink_dec(inode_u);
-+
-+	ret =   (S_ISDIR(inode_u->bi_mode)
-+		 ? bch2_empty_dir_trans(trans, inum)
-+		 : 0) ?:
-+		bch2_dirent_delete_at(trans, &dir_hash, dirent_iter) ?:
-+		bch2_inode_write(trans, dir_iter, dir_u) ?:
-+		bch2_inode_write(trans, inode_iter, inode_u);
-+err:
-+	bch2_trans_iter_put(trans, inode_iter);
-+	bch2_trans_iter_put(trans, dirent_iter);
-+	bch2_trans_iter_put(trans, dir_iter);
-+	return ret;
-+}
-+
-+bool bch2_reinherit_attrs(struct bch_inode_unpacked *dst_u,
-+			  struct bch_inode_unpacked *src_u)
-+{
-+	u64 src, dst;
-+	unsigned id;
-+	bool ret = false;
-+
-+	for (id = 0; id < Inode_opt_nr; id++) {
-+		if (dst_u->bi_fields_set & (1 << id))
-+			continue;
-+
-+		src = bch2_inode_opt_get(src_u, id);
-+		dst = bch2_inode_opt_get(dst_u, id);
-+
-+		if (src == dst)
-+			continue;
-+
-+		bch2_inode_opt_set(dst_u, id, src);
-+		ret = true;
-+	}
-+
-+	return ret;
-+}
-+
-+int bch2_rename_trans(struct btree_trans *trans,
-+		      u64 src_dir, struct bch_inode_unpacked *src_dir_u,
-+		      u64 dst_dir, struct bch_inode_unpacked *dst_dir_u,
-+		      struct bch_inode_unpacked *src_inode_u,
-+		      struct bch_inode_unpacked *dst_inode_u,
-+		      const struct qstr *src_name,
-+		      const struct qstr *dst_name,
-+		      enum bch_rename_mode mode)
-+{
-+	struct btree_iter *src_dir_iter = NULL, *dst_dir_iter = NULL;
-+	struct btree_iter *src_inode_iter = NULL, *dst_inode_iter = NULL;
-+	struct bch_hash_info src_hash, dst_hash;
-+	u64 src_inode, dst_inode, now = bch2_current_time(trans->c);
-+	int ret;
-+
-+	src_dir_iter = bch2_inode_peek(trans, src_dir_u, src_dir,
-+				       BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(src_dir_iter);
-+	if (ret)
-+		goto err;
-+
-+	src_hash = bch2_hash_info_init(trans->c, src_dir_u);
-+
-+	if (dst_dir != src_dir) {
-+		dst_dir_iter = bch2_inode_peek(trans, dst_dir_u, dst_dir,
-+					       BTREE_ITER_INTENT);
-+		ret = PTR_ERR_OR_ZERO(dst_dir_iter);
-+		if (ret)
-+			goto err;
-+
-+		dst_hash = bch2_hash_info_init(trans->c, dst_dir_u);
-+	} else {
-+		dst_dir_u = src_dir_u;
-+		dst_hash = src_hash;
-+	}
-+
-+	ret = bch2_dirent_rename(trans,
-+				 src_dir, &src_hash,
-+				 dst_dir, &dst_hash,
-+				 src_name, &src_inode,
-+				 dst_name, &dst_inode,
-+				 mode);
-+	if (ret)
-+		goto err;
-+
-+	src_inode_iter = bch2_inode_peek(trans, src_inode_u, src_inode,
-+					 BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(src_inode_iter);
-+	if (ret)
-+		goto err;
-+
-+	if (dst_inode) {
-+		dst_inode_iter = bch2_inode_peek(trans, dst_inode_u, dst_inode,
-+						 BTREE_ITER_INTENT);
-+		ret = PTR_ERR_OR_ZERO(dst_inode_iter);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	if (mode == BCH_RENAME_OVERWRITE) {
-+		if (S_ISDIR(src_inode_u->bi_mode) !=
-+		    S_ISDIR(dst_inode_u->bi_mode)) {
-+			ret = -ENOTDIR;
-+			goto err;
-+		}
-+
-+		if (S_ISDIR(dst_inode_u->bi_mode) &&
-+		    bch2_empty_dir_trans(trans, dst_inode)) {
-+			ret = -ENOTEMPTY;
-+			goto err;
-+		}
-+	}
-+
-+	if (bch2_reinherit_attrs(src_inode_u, dst_dir_u) &&
-+	    S_ISDIR(src_inode_u->bi_mode)) {
-+		ret = -EXDEV;
-+		goto err;
-+	}
-+
-+	if (mode == BCH_RENAME_EXCHANGE &&
-+	    bch2_reinherit_attrs(dst_inode_u, src_dir_u) &&
-+	    S_ISDIR(dst_inode_u->bi_mode)) {
-+		ret = -EXDEV;
-+		goto err;
-+	}
-+
-+	if (S_ISDIR(src_inode_u->bi_mode)) {
-+		src_dir_u->bi_nlink--;
-+		dst_dir_u->bi_nlink++;
-+	}
-+
-+	if (dst_inode && S_ISDIR(dst_inode_u->bi_mode)) {
-+		dst_dir_u->bi_nlink--;
-+		src_dir_u->bi_nlink += mode == BCH_RENAME_EXCHANGE;
-+	}
-+
-+	if (mode == BCH_RENAME_OVERWRITE)
-+		bch2_inode_nlink_dec(dst_inode_u);
-+
-+	src_dir_u->bi_mtime		= now;
-+	src_dir_u->bi_ctime		= now;
-+
-+	if (src_dir != dst_dir) {
-+		dst_dir_u->bi_mtime	= now;
-+		dst_dir_u->bi_ctime	= now;
-+	}
-+
-+	src_inode_u->bi_ctime		= now;
-+
-+	if (dst_inode)
-+		dst_inode_u->bi_ctime	= now;
-+
-+	ret =   bch2_inode_write(trans, src_dir_iter, src_dir_u) ?:
-+		(src_dir != dst_dir
-+		 ? bch2_inode_write(trans, dst_dir_iter, dst_dir_u)
-+		 : 0 ) ?:
-+		bch2_inode_write(trans, src_inode_iter, src_inode_u) ?:
-+		(dst_inode
-+		 ? bch2_inode_write(trans, dst_inode_iter, dst_inode_u)
-+		 : 0 );
-+err:
-+	bch2_trans_iter_put(trans, dst_inode_iter);
-+	bch2_trans_iter_put(trans, src_inode_iter);
-+	bch2_trans_iter_put(trans, dst_dir_iter);
-+	bch2_trans_iter_put(trans, src_dir_iter);
-+	return ret;
-+}
-diff --git a/fs/bcachefs/fs-common.h b/fs/bcachefs/fs-common.h
-new file mode 100644
-index 000000000000..2273b7961c9b
---- /dev/null
-+++ b/fs/bcachefs/fs-common.h
-@@ -0,0 +1,37 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_FS_COMMON_H
-+#define _BCACHEFS_FS_COMMON_H
-+
-+struct posix_acl;
-+
-+int bch2_create_trans(struct btree_trans *, u64,
-+		      struct bch_inode_unpacked *,
-+		      struct bch_inode_unpacked *,
-+		      const struct qstr *,
-+		      uid_t, gid_t, umode_t, dev_t,
-+		      struct posix_acl *,
-+		      struct posix_acl *);
-+
-+int bch2_link_trans(struct btree_trans *, u64,
-+		    u64, struct bch_inode_unpacked *,
-+		    struct bch_inode_unpacked *,
-+		    const struct qstr *);
-+
-+int bch2_unlink_trans(struct btree_trans *,
-+		      u64, struct bch_inode_unpacked *,
-+		      struct bch_inode_unpacked *,
-+		      const struct qstr *);
-+
-+int bch2_rename_trans(struct btree_trans *,
-+		      u64, struct bch_inode_unpacked *,
-+		      u64, struct bch_inode_unpacked *,
-+		      struct bch_inode_unpacked *,
-+		      struct bch_inode_unpacked *,
-+		      const struct qstr *,
-+		      const struct qstr *,
-+		      enum bch_rename_mode);
-+
-+bool bch2_reinherit_attrs(struct bch_inode_unpacked *,
-+			  struct bch_inode_unpacked *);
-+
-+#endif /* _BCACHEFS_FS_COMMON_H */
-diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c
-new file mode 100644
-index 000000000000..55004998536d
---- /dev/null
-+++ b/fs/bcachefs/fs-io.c
-@@ -0,0 +1,3133 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#ifndef NO_BCACHEFS_FS
-+
-+#include "bcachefs.h"
-+#include "alloc_foreground.h"
-+#include "bkey_on_stack.h"
-+#include "btree_update.h"
-+#include "buckets.h"
-+#include "clock.h"
-+#include "error.h"
-+#include "extents.h"
-+#include "extent_update.h"
-+#include "fs.h"
-+#include "fs-io.h"
-+#include "fsck.h"
-+#include "inode.h"
-+#include "journal.h"
-+#include "io.h"
-+#include "keylist.h"
-+#include "quota.h"
-+#include "reflink.h"
-+
-+#include <linux/aio.h>
-+#include <linux/backing-dev.h>
-+#include <linux/falloc.h>
-+#include <linux/migrate.h>
-+#include <linux/mmu_context.h>
-+#include <linux/pagevec.h>
-+#include <linux/sched/signal.h>
-+#include <linux/task_io_accounting_ops.h>
-+#include <linux/uio.h>
-+#include <linux/writeback.h>
-+
-+#include <trace/events/bcachefs.h>
-+#include <trace/events/writeback.h>
-+
-+struct quota_res {
-+	u64				sectors;
-+};
-+
-+struct bch_writepage_io {
-+	struct closure			cl;
-+	struct bch_inode_info		*inode;
-+
-+	/* must be last: */
-+	struct bch_write_op		op;
-+};
-+
-+struct dio_write {
-+	struct completion		done;
-+	struct kiocb			*req;
-+	struct mm_struct		*mm;
-+	unsigned			loop:1,
-+					sync:1,
-+					free_iov:1;
-+	struct quota_res		quota_res;
-+	u64				written;
-+
-+	struct iov_iter			iter;
-+	struct iovec			inline_vecs[2];
-+
-+	/* must be last: */
-+	struct bch_write_op		op;
-+};
-+
-+struct dio_read {
-+	struct closure			cl;
-+	struct kiocb			*req;
-+	long				ret;
-+	struct bch_read_bio		rbio;
-+};
-+
-+/* pagecache_block must be held */
-+static int write_invalidate_inode_pages_range(struct address_space *mapping,
-+					      loff_t start, loff_t end)
-+{
-+	int ret;
-+
-+	/*
-+	 * XXX: the way this is currently implemented, we can spin if a process
-+	 * is continually redirtying a specific page
-+	 */
-+	do {
-+		if (!mapping->nrpages &&
-+		    !mapping->nrexceptional)
-+			return 0;
-+
-+		ret = filemap_write_and_wait_range(mapping, start, end);
-+		if (ret)
-+			break;
-+
-+		if (!mapping->nrpages)
-+			return 0;
-+
-+		ret = invalidate_inode_pages2_range(mapping,
-+				start >> PAGE_SHIFT,
-+				end >> PAGE_SHIFT);
-+	} while (ret == -EBUSY);
-+
-+	return ret;
-+}
-+
-+/* quotas */
-+
-+#ifdef CONFIG_BCACHEFS_QUOTA
-+
-+static void bch2_quota_reservation_put(struct bch_fs *c,
-+				       struct bch_inode_info *inode,
-+				       struct quota_res *res)
-+{
-+	if (!res->sectors)
-+		return;
-+
-+	mutex_lock(&inode->ei_quota_lock);
-+	BUG_ON(res->sectors > inode->ei_quota_reserved);
-+
-+	bch2_quota_acct(c, inode->ei_qid, Q_SPC,
-+			-((s64) res->sectors), KEY_TYPE_QUOTA_PREALLOC);
-+	inode->ei_quota_reserved -= res->sectors;
-+	mutex_unlock(&inode->ei_quota_lock);
-+
-+	res->sectors = 0;
-+}
-+
-+static int bch2_quota_reservation_add(struct bch_fs *c,
-+				      struct bch_inode_info *inode,
-+				      struct quota_res *res,
-+				      unsigned sectors,
-+				      bool check_enospc)
-+{
-+	int ret;
-+
-+	mutex_lock(&inode->ei_quota_lock);
-+	ret = bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors,
-+			      check_enospc ? KEY_TYPE_QUOTA_PREALLOC : KEY_TYPE_QUOTA_NOCHECK);
-+	if (likely(!ret)) {
-+		inode->ei_quota_reserved += sectors;
-+		res->sectors += sectors;
-+	}
-+	mutex_unlock(&inode->ei_quota_lock);
-+
-+	return ret;
-+}
-+
-+#else
-+
-+static void bch2_quota_reservation_put(struct bch_fs *c,
-+				       struct bch_inode_info *inode,
-+				       struct quota_res *res)
-+{
-+}
-+
-+static int bch2_quota_reservation_add(struct bch_fs *c,
-+				      struct bch_inode_info *inode,
-+				      struct quota_res *res,
-+				      unsigned sectors,
-+				      bool check_enospc)
-+{
-+	return 0;
-+}
-+
-+#endif
-+
-+/* i_size updates: */
-+
-+struct inode_new_size {
-+	loff_t		new_size;
-+	u64		now;
-+	unsigned	fields;
-+};
-+
-+static int inode_set_size(struct bch_inode_info *inode,
-+			  struct bch_inode_unpacked *bi,
-+			  void *p)
-+{
-+	struct inode_new_size *s = p;
-+
-+	bi->bi_size = s->new_size;
-+	if (s->fields & ATTR_ATIME)
-+		bi->bi_atime = s->now;
-+	if (s->fields & ATTR_MTIME)
-+		bi->bi_mtime = s->now;
-+	if (s->fields & ATTR_CTIME)
-+		bi->bi_ctime = s->now;
-+
-+	return 0;
-+}
-+
-+int __must_check bch2_write_inode_size(struct bch_fs *c,
-+				       struct bch_inode_info *inode,
-+				       loff_t new_size, unsigned fields)
-+{
-+	struct inode_new_size s = {
-+		.new_size	= new_size,
-+		.now		= bch2_current_time(c),
-+		.fields		= fields,
-+	};
-+
-+	return bch2_write_inode(c, inode, inode_set_size, &s, fields);
-+}
-+
-+static void i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode,
-+			   struct quota_res *quota_res, s64 sectors)
-+{
-+	if (!sectors)
-+		return;
-+
-+	mutex_lock(&inode->ei_quota_lock);
-+#ifdef CONFIG_BCACHEFS_QUOTA
-+	if (quota_res && sectors > 0) {
-+		BUG_ON(sectors > quota_res->sectors);
-+		BUG_ON(sectors > inode->ei_quota_reserved);
-+
-+		quota_res->sectors -= sectors;
-+		inode->ei_quota_reserved -= sectors;
-+	} else {
-+		bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, KEY_TYPE_QUOTA_WARN);
-+	}
-+#endif
-+	inode->v.i_blocks += sectors;
-+	mutex_unlock(&inode->ei_quota_lock);
-+}
-+
-+/* page state: */
-+
-+/* stored in page->private: */
-+
-+struct bch_page_sector {
-+	/* Uncompressed, fully allocated replicas: */
-+	unsigned		nr_replicas:3;
-+
-+	/* Owns PAGE_SECTORS * replicas_reserved sized reservation: */
-+	unsigned		replicas_reserved:3;
-+
-+	/* i_sectors: */
-+	enum {
-+		SECTOR_UNALLOCATED,
-+		SECTOR_RESERVED,
-+		SECTOR_DIRTY,
-+		SECTOR_ALLOCATED,
-+	}			state:2;
-+};
-+
-+struct bch_page_state {
-+	spinlock_t		lock;
-+	atomic_t		write_count;
-+	struct bch_page_sector	s[PAGE_SECTORS];
-+};
-+
-+static inline struct bch_page_state *__bch2_page_state(struct page *page)
-+{
-+	return page_has_private(page)
-+		? (struct bch_page_state *) page_private(page)
-+		: NULL;
-+}
-+
-+static inline struct bch_page_state *bch2_page_state(struct page *page)
-+{
-+	EBUG_ON(!PageLocked(page));
-+
-+	return __bch2_page_state(page);
-+}
-+
-+/* for newly allocated pages: */
-+static void __bch2_page_state_release(struct page *page)
-+{
-+	struct bch_page_state *s = __bch2_page_state(page);
-+
-+	if (!s)
-+		return;
-+
-+	ClearPagePrivate(page);
-+	set_page_private(page, 0);
-+	put_page(page);
-+	kfree(s);
-+}
-+
-+static void bch2_page_state_release(struct page *page)
-+{
-+	struct bch_page_state *s = bch2_page_state(page);
-+
-+	if (!s)
-+		return;
-+
-+	ClearPagePrivate(page);
-+	set_page_private(page, 0);
-+	put_page(page);
-+	kfree(s);
-+}
-+
-+/* for newly allocated pages: */
-+static struct bch_page_state *__bch2_page_state_create(struct page *page,
-+						       gfp_t gfp)
-+{
-+	struct bch_page_state *s;
-+
-+	s = kzalloc(sizeof(*s), GFP_NOFS|gfp);
-+	if (!s)
-+		return NULL;
-+
-+	spin_lock_init(&s->lock);
-+	/*
-+	 * migrate_page_move_mapping() assumes that pages with private data
-+	 * have their count elevated by 1.
-+	 */
-+	get_page(page);
-+	set_page_private(page, (unsigned long) s);
-+	SetPagePrivate(page);
-+	return s;
-+}
-+
-+static struct bch_page_state *bch2_page_state_create(struct page *page,
-+						     gfp_t gfp)
-+{
-+	return bch2_page_state(page) ?: __bch2_page_state_create(page, gfp);
-+}
-+
-+static inline unsigned inode_nr_replicas(struct bch_fs *c, struct bch_inode_info *inode)
-+{
-+	/* XXX: this should not be open coded */
-+	return inode->ei_inode.bi_data_replicas
-+		? inode->ei_inode.bi_data_replicas - 1
-+		: c->opts.data_replicas;
-+}
-+
-+static inline unsigned sectors_to_reserve(struct bch_page_sector *s,
-+						  unsigned nr_replicas)
-+{
-+	return max(0, (int) nr_replicas -
-+		   s->nr_replicas -
-+		   s->replicas_reserved);
-+}
-+
-+static int bch2_get_page_disk_reservation(struct bch_fs *c,
-+				struct bch_inode_info *inode,
-+				struct page *page, bool check_enospc)
-+{
-+	struct bch_page_state *s = bch2_page_state_create(page, 0);
-+	unsigned nr_replicas = inode_nr_replicas(c, inode);
-+	struct disk_reservation disk_res = { 0 };
-+	unsigned i, disk_res_sectors = 0;
-+	int ret;
-+
-+	if (!s)
-+		return -ENOMEM;
-+
-+	for (i = 0; i < ARRAY_SIZE(s->s); i++)
-+		disk_res_sectors += sectors_to_reserve(&s->s[i], nr_replicas);
-+
-+	if (!disk_res_sectors)
-+		return 0;
-+
-+	ret = bch2_disk_reservation_get(c, &disk_res,
-+					disk_res_sectors, 1,
-+					!check_enospc
-+					? BCH_DISK_RESERVATION_NOFAIL
-+					: 0);
-+	if (unlikely(ret))
-+		return ret;
-+
-+	for (i = 0; i < ARRAY_SIZE(s->s); i++)
-+		s->s[i].replicas_reserved +=
-+			sectors_to_reserve(&s->s[i], nr_replicas);
-+
-+	return 0;
-+}
-+
-+struct bch2_page_reservation {
-+	struct disk_reservation	disk;
-+	struct quota_res	quota;
-+};
-+
-+static void bch2_page_reservation_init(struct bch_fs *c,
-+			struct bch_inode_info *inode,
-+			struct bch2_page_reservation *res)
-+{
-+	memset(res, 0, sizeof(*res));
-+
-+	res->disk.nr_replicas = inode_nr_replicas(c, inode);
-+}
-+
-+static void bch2_page_reservation_put(struct bch_fs *c,
-+			struct bch_inode_info *inode,
-+			struct bch2_page_reservation *res)
-+{
-+	bch2_disk_reservation_put(c, &res->disk);
-+	bch2_quota_reservation_put(c, inode, &res->quota);
-+}
-+
-+static int bch2_page_reservation_get(struct bch_fs *c,
-+			struct bch_inode_info *inode, struct page *page,
-+			struct bch2_page_reservation *res,
-+			unsigned offset, unsigned len, bool check_enospc)
-+{
-+	struct bch_page_state *s = bch2_page_state_create(page, 0);
-+	unsigned i, disk_sectors = 0, quota_sectors = 0;
-+	int ret;
-+
-+	if (!s)
-+		return -ENOMEM;
-+
-+	for (i = round_down(offset, block_bytes(c)) >> 9;
-+	     i < round_up(offset + len, block_bytes(c)) >> 9;
-+	     i++) {
-+		disk_sectors += sectors_to_reserve(&s->s[i],
-+						res->disk.nr_replicas);
-+		quota_sectors += s->s[i].state == SECTOR_UNALLOCATED;
-+	}
-+
-+	if (disk_sectors) {
-+		ret = bch2_disk_reservation_add(c, &res->disk,
-+						disk_sectors,
-+						!check_enospc
-+						? BCH_DISK_RESERVATION_NOFAIL
-+						: 0);
-+		if (unlikely(ret))
-+			return ret;
-+	}
-+
-+	if (quota_sectors) {
-+		ret = bch2_quota_reservation_add(c, inode, &res->quota,
-+						 quota_sectors,
-+						 check_enospc);
-+		if (unlikely(ret)) {
-+			struct disk_reservation tmp = {
-+				.sectors = disk_sectors
-+			};
-+
-+			bch2_disk_reservation_put(c, &tmp);
-+			res->disk.sectors -= disk_sectors;
-+			return ret;
-+		}
-+	}
-+
-+	return 0;
-+}
-+
-+static void bch2_clear_page_bits(struct page *page)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(page->mapping->host);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct bch_page_state *s = bch2_page_state(page);
-+	struct disk_reservation disk_res = { 0 };
-+	int i, dirty_sectors = 0;
-+
-+	if (!s)
-+		return;
-+
-+	EBUG_ON(!PageLocked(page));
-+	EBUG_ON(PageWriteback(page));
-+
-+	for (i = 0; i < ARRAY_SIZE(s->s); i++) {
-+		disk_res.sectors += s->s[i].replicas_reserved;
-+		s->s[i].replicas_reserved = 0;
-+
-+		if (s->s[i].state == SECTOR_DIRTY) {
-+			dirty_sectors++;
-+			s->s[i].state = SECTOR_UNALLOCATED;
-+		}
-+	}
-+
-+	bch2_disk_reservation_put(c, &disk_res);
-+
-+	if (dirty_sectors)
-+		i_sectors_acct(c, inode, NULL, -dirty_sectors);
-+
-+	bch2_page_state_release(page);
-+}
-+
-+static void bch2_set_page_dirty(struct bch_fs *c,
-+			struct bch_inode_info *inode, struct page *page,
-+			struct bch2_page_reservation *res,
-+			unsigned offset, unsigned len)
-+{
-+	struct bch_page_state *s = bch2_page_state(page);
-+	unsigned i, dirty_sectors = 0;
-+
-+	WARN_ON((u64) page_offset(page) + offset + len >
-+		round_up((u64) i_size_read(&inode->v), block_bytes(c)));
-+
-+	spin_lock(&s->lock);
-+
-+	for (i = round_down(offset, block_bytes(c)) >> 9;
-+	     i < round_up(offset + len, block_bytes(c)) >> 9;
-+	     i++) {
-+		unsigned sectors = sectors_to_reserve(&s->s[i],
-+						res->disk.nr_replicas);
-+
-+		/*
-+		 * This can happen if we race with the error path in
-+		 * bch2_writepage_io_done():
-+		 */
-+		sectors = min_t(unsigned, sectors, res->disk.sectors);
-+
-+		s->s[i].replicas_reserved += sectors;
-+		res->disk.sectors -= sectors;
-+
-+		if (s->s[i].state == SECTOR_UNALLOCATED)
-+			dirty_sectors++;
-+
-+		s->s[i].state = max_t(unsigned, s->s[i].state, SECTOR_DIRTY);
-+	}
-+
-+	spin_unlock(&s->lock);
-+
-+	if (dirty_sectors)
-+		i_sectors_acct(c, inode, &res->quota, dirty_sectors);
-+
-+	if (!PageDirty(page))
-+		__set_page_dirty_nobuffers(page);
-+}
-+
-+vm_fault_t bch2_page_fault(struct vm_fault *vmf)
-+{
-+	struct file *file = vmf->vma->vm_file;
-+	struct bch_inode_info *inode = file_bch_inode(file);
-+	int ret;
-+
-+	bch2_pagecache_add_get(&inode->ei_pagecache_lock);
-+	ret = filemap_fault(vmf);
-+	bch2_pagecache_add_put(&inode->ei_pagecache_lock);
-+
-+	return ret;
-+}
-+
-+vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
-+{
-+	struct page *page = vmf->page;
-+	struct file *file = vmf->vma->vm_file;
-+	struct bch_inode_info *inode = file_bch_inode(file);
-+	struct address_space *mapping = file->f_mapping;
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct bch2_page_reservation res;
-+	unsigned len;
-+	loff_t isize;
-+	int ret = VM_FAULT_LOCKED;
-+
-+	bch2_page_reservation_init(c, inode, &res);
-+
-+	sb_start_pagefault(inode->v.i_sb);
-+	file_update_time(file);
-+
-+	/*
-+	 * Not strictly necessary, but helps avoid dio writes livelocking in
-+	 * write_invalidate_inode_pages_range() - can drop this if/when we get
-+	 * a write_invalidate_inode_pages_range() that works without dropping
-+	 * page lock before invalidating page
-+	 */
-+	bch2_pagecache_add_get(&inode->ei_pagecache_lock);
-+
-+	lock_page(page);
-+	isize = i_size_read(&inode->v);
-+
-+	if (page->mapping != mapping || page_offset(page) >= isize) {
-+		unlock_page(page);
-+		ret = VM_FAULT_NOPAGE;
-+		goto out;
-+	}
-+
-+	len = min_t(loff_t, PAGE_SIZE, isize - page_offset(page));
-+
-+	if (bch2_page_reservation_get(c, inode, page, &res, 0, len, true)) {
-+		unlock_page(page);
-+		ret = VM_FAULT_SIGBUS;
-+		goto out;
-+	}
-+
-+	bch2_set_page_dirty(c, inode, page, &res, 0, len);
-+	bch2_page_reservation_put(c, inode, &res);
-+
-+	wait_for_stable_page(page);
-+out:
-+	bch2_pagecache_add_put(&inode->ei_pagecache_lock);
-+	sb_end_pagefault(inode->v.i_sb);
-+
-+	return ret;
-+}
-+
-+void bch2_invalidatepage(struct page *page, unsigned int offset,
-+			 unsigned int length)
-+{
-+	if (offset || length < PAGE_SIZE)
-+		return;
-+
-+	bch2_clear_page_bits(page);
-+}
-+
-+int bch2_releasepage(struct page *page, gfp_t gfp_mask)
-+{
-+	if (PageDirty(page))
-+		return 0;
-+
-+	bch2_clear_page_bits(page);
-+	return 1;
-+}
-+
-+#ifdef CONFIG_MIGRATION
-+int bch2_migrate_page(struct address_space *mapping, struct page *newpage,
-+		      struct page *page, enum migrate_mode mode)
-+{
-+	int ret;
-+
-+	EBUG_ON(!PageLocked(page));
-+	EBUG_ON(!PageLocked(newpage));
-+
-+	ret = migrate_page_move_mapping(mapping, newpage, page, 0);
-+	if (ret != MIGRATEPAGE_SUCCESS)
-+		return ret;
-+
-+	if (PagePrivate(page)) {
-+		ClearPagePrivate(page);
-+		get_page(newpage);
-+		set_page_private(newpage, page_private(page));
-+		set_page_private(page, 0);
-+		put_page(page);
-+		SetPagePrivate(newpage);
-+	}
-+
-+	if (mode != MIGRATE_SYNC_NO_COPY)
-+		migrate_page_copy(newpage, page);
-+	else
-+		migrate_page_states(newpage, page);
-+	return MIGRATEPAGE_SUCCESS;
-+}
-+#endif
-+
-+/* readpage(s): */
-+
-+static void bch2_readpages_end_io(struct bio *bio)
-+{
-+	struct bvec_iter_all iter;
-+	struct bio_vec *bv;
-+
-+	bio_for_each_segment_all(bv, bio, iter) {
-+		struct page *page = bv->bv_page;
-+
-+		if (!bio->bi_status) {
-+			SetPageUptodate(page);
-+		} else {
-+			ClearPageUptodate(page);
-+			SetPageError(page);
-+		}
-+		unlock_page(page);
-+	}
-+
-+	bio_put(bio);
-+}
-+
-+static inline void page_state_init_for_read(struct page *page)
-+{
-+	SetPagePrivate(page);
-+	page->private = 0;
-+}
-+
-+struct readpages_iter {
-+	struct address_space	*mapping;
-+	struct page		**pages;
-+	unsigned		nr_pages;
-+	unsigned		nr_added;
-+	unsigned		idx;
-+	pgoff_t			offset;
-+};
-+
-+static int readpages_iter_init(struct readpages_iter *iter,
-+			       struct address_space *mapping,
-+			       struct list_head *pages, unsigned nr_pages)
-+{
-+	memset(iter, 0, sizeof(*iter));
-+
-+	iter->mapping	= mapping;
-+	iter->offset	= list_last_entry(pages, struct page, lru)->index;
-+
-+	iter->pages = kmalloc_array(nr_pages, sizeof(struct page *), GFP_NOFS);
-+	if (!iter->pages)
-+		return -ENOMEM;
-+
-+	while (!list_empty(pages)) {
-+		struct page *page = list_last_entry(pages, struct page, lru);
-+
-+		__bch2_page_state_create(page, __GFP_NOFAIL);
-+
-+		iter->pages[iter->nr_pages++] = page;
-+		list_del(&page->lru);
-+	}
-+
-+	return 0;
-+}
-+
-+static inline struct page *readpage_iter_next(struct readpages_iter *iter)
-+{
-+	struct page *page;
-+	unsigned i;
-+	int ret;
-+
-+	BUG_ON(iter->idx > iter->nr_added);
-+	BUG_ON(iter->nr_added > iter->nr_pages);
-+
-+	if (iter->idx < iter->nr_added)
-+		goto out;
-+
-+	while (1) {
-+		if (iter->idx == iter->nr_pages)
-+			return NULL;
-+
-+		ret = add_to_page_cache_lru_vec(iter->mapping,
-+				iter->pages	+ iter->nr_added,
-+				iter->nr_pages	- iter->nr_added,
-+				iter->offset	+ iter->nr_added,
-+				GFP_NOFS);
-+		if (ret > 0)
-+			break;
-+
-+		page = iter->pages[iter->nr_added];
-+		iter->idx++;
-+		iter->nr_added++;
-+
-+		__bch2_page_state_release(page);
-+		put_page(page);
-+	}
-+
-+	iter->nr_added += ret;
-+
-+	for (i = iter->idx; i < iter->nr_added; i++)
-+		put_page(iter->pages[i]);
-+out:
-+	EBUG_ON(iter->pages[iter->idx]->index != iter->offset + iter->idx);
-+
-+	return iter->pages[iter->idx];
-+}
-+
-+static void bch2_add_page_sectors(struct bio *bio, struct bkey_s_c k)
-+{
-+	struct bvec_iter iter;
-+	struct bio_vec bv;
-+	unsigned nr_ptrs = k.k->type == KEY_TYPE_reflink_v
-+		? 0 : bch2_bkey_nr_ptrs_fully_allocated(k);
-+	unsigned state = k.k->type == KEY_TYPE_reservation
-+		? SECTOR_RESERVED
-+		: SECTOR_ALLOCATED;
-+
-+	bio_for_each_segment(bv, bio, iter) {
-+		struct bch_page_state *s = bch2_page_state(bv.bv_page);
-+		unsigned i;
-+
-+		for (i = bv.bv_offset >> 9;
-+		     i < (bv.bv_offset + bv.bv_len) >> 9;
-+		     i++) {
-+			s->s[i].nr_replicas = nr_ptrs;
-+			s->s[i].state = state;
-+		}
-+	}
-+}
-+
-+static bool extent_partial_reads_expensive(struct bkey_s_c k)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	struct bch_extent_crc_unpacked crc;
-+	const union bch_extent_entry *i;
-+
-+	bkey_for_each_crc(k.k, ptrs, crc, i)
-+		if (crc.csum_type || crc.compression_type)
-+			return true;
-+	return false;
-+}
-+
-+static void readpage_bio_extend(struct readpages_iter *iter,
-+				struct bio *bio,
-+				unsigned sectors_this_extent,
-+				bool get_more)
-+{
-+	while (bio_sectors(bio) < sectors_this_extent &&
-+	       bio->bi_vcnt < bio->bi_max_vecs) {
-+		pgoff_t page_offset = bio_end_sector(bio) >> PAGE_SECTOR_SHIFT;
-+		struct page *page = readpage_iter_next(iter);
-+		int ret;
-+
-+		if (page) {
-+			if (iter->offset + iter->idx != page_offset)
-+				break;
-+
-+			iter->idx++;
-+		} else {
-+			if (!get_more)
-+				break;
-+
-+			page = xa_load(&iter->mapping->i_pages, page_offset);
-+			if (page && !xa_is_value(page))
-+				break;
-+
-+			page = __page_cache_alloc(readahead_gfp_mask(iter->mapping));
-+			if (!page)
-+				break;
-+
-+			if (!__bch2_page_state_create(page, 0)) {
-+				put_page(page);
-+				break;
-+			}
-+
-+			ret = add_to_page_cache_lru(page, iter->mapping,
-+						    page_offset, GFP_NOFS);
-+			if (ret) {
-+				__bch2_page_state_release(page);
-+				put_page(page);
-+				break;
-+			}
-+
-+			put_page(page);
-+		}
-+
-+		BUG_ON(!bio_add_page(bio, page, PAGE_SIZE, 0));
-+	}
-+}
-+
-+static void bchfs_read(struct btree_trans *trans, struct btree_iter *iter,
-+		       struct bch_read_bio *rbio, u64 inum,
-+		       struct readpages_iter *readpages_iter)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct bkey_on_stack sk;
-+	int flags = BCH_READ_RETRY_IF_STALE|
-+		BCH_READ_MAY_PROMOTE;
-+	int ret = 0;
-+
-+	rbio->c = c;
-+	rbio->start_time = local_clock();
-+
-+	bkey_on_stack_init(&sk);
-+retry:
-+	while (1) {
-+		struct bkey_s_c k;
-+		unsigned bytes, sectors, offset_into_extent;
-+
-+		bch2_btree_iter_set_pos(iter,
-+				POS(inum, rbio->bio.bi_iter.bi_sector));
-+
-+		k = bch2_btree_iter_peek_slot(iter);
-+		ret = bkey_err(k);
-+		if (ret)
-+			break;
-+
-+		bkey_on_stack_reassemble(&sk, c, k);
-+		k = bkey_i_to_s_c(sk.k);
-+
-+		offset_into_extent = iter->pos.offset -
-+			bkey_start_offset(k.k);
-+		sectors = k.k->size - offset_into_extent;
-+
-+		ret = bch2_read_indirect_extent(trans,
-+					&offset_into_extent, &sk);
-+		if (ret)
-+			break;
-+
-+		sectors = min(sectors, k.k->size - offset_into_extent);
-+
-+		bch2_trans_unlock(trans);
-+
-+		if (readpages_iter)
-+			readpage_bio_extend(readpages_iter, &rbio->bio, sectors,
-+					    extent_partial_reads_expensive(k));
-+
-+		bytes = min(sectors, bio_sectors(&rbio->bio)) << 9;
-+		swap(rbio->bio.bi_iter.bi_size, bytes);
-+
-+		if (rbio->bio.bi_iter.bi_size == bytes)
-+			flags |= BCH_READ_LAST_FRAGMENT;
-+
-+		if (bkey_extent_is_allocation(k.k))
-+			bch2_add_page_sectors(&rbio->bio, k);
-+
-+		bch2_read_extent(c, rbio, k, offset_into_extent, flags);
-+
-+		if (flags & BCH_READ_LAST_FRAGMENT)
-+			break;
-+
-+		swap(rbio->bio.bi_iter.bi_size, bytes);
-+		bio_advance(&rbio->bio, bytes);
-+	}
-+
-+	if (ret == -EINTR)
-+		goto retry;
-+
-+	if (ret) {
-+		bcache_io_error(c, &rbio->bio, "btree IO error %i", ret);
-+		bio_endio(&rbio->bio);
-+	}
-+
-+	bkey_on_stack_exit(&sk, c);
-+}
-+
-+int bch2_readpages(struct file *file, struct address_space *mapping,
-+		   struct list_head *pages, unsigned nr_pages)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(mapping->host);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct bch_io_opts opts = io_opts(c, &inode->ei_inode);
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct page *page;
-+	struct readpages_iter readpages_iter;
-+	int ret;
-+
-+	ret = readpages_iter_init(&readpages_iter, mapping, pages, nr_pages);
-+	BUG_ON(ret);
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, POS_MIN,
-+				   BTREE_ITER_SLOTS);
-+
-+	bch2_pagecache_add_get(&inode->ei_pagecache_lock);
-+
-+	while ((page = readpage_iter_next(&readpages_iter))) {
-+		pgoff_t index = readpages_iter.offset + readpages_iter.idx;
-+		unsigned n = min_t(unsigned,
-+				   readpages_iter.nr_pages -
-+				   readpages_iter.idx,
-+				   BIO_MAX_PAGES);
-+		struct bch_read_bio *rbio =
-+			rbio_init(bio_alloc_bioset(GFP_NOFS, n, &c->bio_read),
-+				  opts);
-+
-+		readpages_iter.idx++;
-+
-+		bio_set_op_attrs(&rbio->bio, REQ_OP_READ, 0);
-+		rbio->bio.bi_iter.bi_sector = (sector_t) index << PAGE_SECTOR_SHIFT;
-+		rbio->bio.bi_end_io = bch2_readpages_end_io;
-+		BUG_ON(!bio_add_page(&rbio->bio, page, PAGE_SIZE, 0));
-+
-+		bchfs_read(&trans, iter, rbio, inode->v.i_ino,
-+			   &readpages_iter);
-+	}
-+
-+	bch2_pagecache_add_put(&inode->ei_pagecache_lock);
-+
-+	bch2_trans_exit(&trans);
-+	kfree(readpages_iter.pages);
-+
-+	return 0;
-+}
-+
-+static void __bchfs_readpage(struct bch_fs *c, struct bch_read_bio *rbio,
-+			     u64 inum, struct page *page)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+
-+	bch2_page_state_create(page, __GFP_NOFAIL);
-+
-+	bio_set_op_attrs(&rbio->bio, REQ_OP_READ, REQ_SYNC);
-+	rbio->bio.bi_iter.bi_sector =
-+		(sector_t) page->index << PAGE_SECTOR_SHIFT;
-+	BUG_ON(!bio_add_page(&rbio->bio, page, PAGE_SIZE, 0));
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, POS_MIN,
-+				   BTREE_ITER_SLOTS);
-+
-+	bchfs_read(&trans, iter, rbio, inum, NULL);
-+
-+	bch2_trans_exit(&trans);
-+}
-+
-+int bch2_readpage(struct file *file, struct page *page)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(page->mapping->host);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct bch_io_opts opts = io_opts(c, &inode->ei_inode);
-+	struct bch_read_bio *rbio;
-+
-+	rbio = rbio_init(bio_alloc_bioset(GFP_NOFS, 1, &c->bio_read), opts);
-+	rbio->bio.bi_end_io = bch2_readpages_end_io;
-+
-+	__bchfs_readpage(c, rbio, inode->v.i_ino, page);
-+	return 0;
-+}
-+
-+static void bch2_read_single_page_end_io(struct bio *bio)
-+{
-+	complete(bio->bi_private);
-+}
-+
-+static int bch2_read_single_page(struct page *page,
-+				 struct address_space *mapping)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(mapping->host);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct bch_read_bio *rbio;
-+	int ret;
-+	DECLARE_COMPLETION_ONSTACK(done);
-+
-+	rbio = rbio_init(bio_alloc_bioset(GFP_NOFS, 1, &c->bio_read),
-+			 io_opts(c, &inode->ei_inode));
-+	rbio->bio.bi_private = &done;
-+	rbio->bio.bi_end_io = bch2_read_single_page_end_io;
-+
-+	__bchfs_readpage(c, rbio, inode->v.i_ino, page);
-+	wait_for_completion(&done);
-+
-+	ret = blk_status_to_errno(rbio->bio.bi_status);
-+	bio_put(&rbio->bio);
-+
-+	if (ret < 0)
-+		return ret;
-+
-+	SetPageUptodate(page);
-+	return 0;
-+}
-+
-+/* writepages: */
-+
-+struct bch_writepage_state {
-+	struct bch_writepage_io	*io;
-+	struct bch_io_opts	opts;
-+};
-+
-+static inline struct bch_writepage_state bch_writepage_state_init(struct bch_fs *c,
-+								  struct bch_inode_info *inode)
-+{
-+	return (struct bch_writepage_state) {
-+		.opts = io_opts(c, &inode->ei_inode)
-+	};
-+}
-+
-+static void bch2_writepage_io_free(struct closure *cl)
-+{
-+	struct bch_writepage_io *io = container_of(cl,
-+					struct bch_writepage_io, cl);
-+
-+	bio_put(&io->op.wbio.bio);
-+}
-+
-+static void bch2_writepage_io_done(struct closure *cl)
-+{
-+	struct bch_writepage_io *io = container_of(cl,
-+					struct bch_writepage_io, cl);
-+	struct bch_fs *c = io->op.c;
-+	struct bio *bio = &io->op.wbio.bio;
-+	struct bvec_iter_all iter;
-+	struct bio_vec *bvec;
-+	unsigned i;
-+
-+	if (io->op.error) {
-+		bio_for_each_segment_all(bvec, bio, iter) {
-+			struct bch_page_state *s;
-+
-+			SetPageError(bvec->bv_page);
-+			mapping_set_error(bvec->bv_page->mapping, -EIO);
-+
-+			s = __bch2_page_state(bvec->bv_page);
-+			spin_lock(&s->lock);
-+			for (i = 0; i < PAGE_SECTORS; i++)
-+				s->s[i].nr_replicas = 0;
-+			spin_unlock(&s->lock);
-+		}
-+	}
-+
-+	if (io->op.flags & BCH_WRITE_WROTE_DATA_INLINE) {
-+		bio_for_each_segment_all(bvec, bio, iter) {
-+			struct bch_page_state *s;
-+
-+			s = __bch2_page_state(bvec->bv_page);
-+			spin_lock(&s->lock);
-+			for (i = 0; i < PAGE_SECTORS; i++)
-+				s->s[i].nr_replicas = 0;
-+			spin_unlock(&s->lock);
-+		}
-+	}
-+
-+	/*
-+	 * racing with fallocate can cause us to add fewer sectors than
-+	 * expected - but we shouldn't add more sectors than expected:
-+	 */
-+	BUG_ON(io->op.i_sectors_delta > 0);
-+
-+	/*
-+	 * (error (due to going RO) halfway through a page can screw that up
-+	 * slightly)
-+	 * XXX wtf?
-+	   BUG_ON(io->op.op.i_sectors_delta >= PAGE_SECTORS);
-+	 */
-+
-+	/*
-+	 * PageWriteback is effectively our ref on the inode - fixup i_blocks
-+	 * before calling end_page_writeback:
-+	 */
-+	i_sectors_acct(c, io->inode, NULL, io->op.i_sectors_delta);
-+
-+	bio_for_each_segment_all(bvec, bio, iter) {
-+		struct bch_page_state *s = __bch2_page_state(bvec->bv_page);
-+
-+		if (atomic_dec_and_test(&s->write_count))
-+			end_page_writeback(bvec->bv_page);
-+	}
-+
-+	closure_return_with_destructor(&io->cl, bch2_writepage_io_free);
-+}
-+
-+static void bch2_writepage_do_io(struct bch_writepage_state *w)
-+{
-+	struct bch_writepage_io *io = w->io;
-+
-+	w->io = NULL;
-+	closure_call(&io->op.cl, bch2_write, NULL, &io->cl);
-+	continue_at(&io->cl, bch2_writepage_io_done, NULL);
-+}
-+
-+/*
-+ * Get a bch_writepage_io and add @page to it - appending to an existing one if
-+ * possible, else allocating a new one:
-+ */
-+static void bch2_writepage_io_alloc(struct bch_fs *c,
-+				    struct writeback_control *wbc,
-+				    struct bch_writepage_state *w,
-+				    struct bch_inode_info *inode,
-+				    u64 sector,
-+				    unsigned nr_replicas)
-+{
-+	struct bch_write_op *op;
-+
-+	w->io = container_of(bio_alloc_bioset(GFP_NOFS,
-+					      BIO_MAX_PAGES,
-+					      &c->writepage_bioset),
-+			     struct bch_writepage_io, op.wbio.bio);
-+
-+	closure_init(&w->io->cl, NULL);
-+	w->io->inode		= inode;
-+
-+	op			= &w->io->op;
-+	bch2_write_op_init(op, c, w->opts);
-+	op->target		= w->opts.foreground_target;
-+	op_journal_seq_set(op, &inode->ei_journal_seq);
-+	op->nr_replicas		= nr_replicas;
-+	op->res.nr_replicas	= nr_replicas;
-+	op->write_point		= writepoint_hashed(inode->ei_last_dirtied);
-+	op->pos			= POS(inode->v.i_ino, sector);
-+	op->wbio.bio.bi_iter.bi_sector = sector;
-+	op->wbio.bio.bi_opf	= wbc_to_write_flags(wbc);
-+}
-+
-+static int __bch2_writepage(struct page *page,
-+			    struct writeback_control *wbc,
-+			    void *data)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(page->mapping->host);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct bch_writepage_state *w = data;
-+	struct bch_page_state *s, orig;
-+	unsigned i, offset, nr_replicas_this_write = U32_MAX;
-+	loff_t i_size = i_size_read(&inode->v);
-+	pgoff_t end_index = i_size >> PAGE_SHIFT;
-+	int ret;
-+
-+	EBUG_ON(!PageUptodate(page));
-+
-+	/* Is the page fully inside i_size? */
-+	if (page->index < end_index)
-+		goto do_io;
-+
-+	/* Is the page fully outside i_size? (truncate in progress) */
-+	offset = i_size & (PAGE_SIZE - 1);
-+	if (page->index > end_index || !offset) {
-+		unlock_page(page);
-+		return 0;
-+	}
-+
-+	/*
-+	 * The page straddles i_size.  It must be zeroed out on each and every
-+	 * writepage invocation because it may be mmapped.  "A file is mapped
-+	 * in multiples of the page size.  For a file that is not a multiple of
-+	 * the  page size, the remaining memory is zeroed when mapped, and
-+	 * writes to that region are not written out to the file."
-+	 */
-+	zero_user_segment(page, offset, PAGE_SIZE);
-+do_io:
-+	s = bch2_page_state_create(page, __GFP_NOFAIL);
-+
-+	ret = bch2_get_page_disk_reservation(c, inode, page, true);
-+	if (ret) {
-+		SetPageError(page);
-+		mapping_set_error(page->mapping, ret);
-+		unlock_page(page);
-+		return 0;
-+	}
-+
-+	/* Before unlocking the page, get copy of reservations: */
-+	orig = *s;
-+
-+	for (i = 0; i < PAGE_SECTORS; i++) {
-+		if (s->s[i].state < SECTOR_DIRTY)
-+			continue;
-+
-+		nr_replicas_this_write =
-+			min_t(unsigned, nr_replicas_this_write,
-+			      s->s[i].nr_replicas +
-+			      s->s[i].replicas_reserved);
-+	}
-+
-+	for (i = 0; i < PAGE_SECTORS; i++) {
-+		if (s->s[i].state < SECTOR_DIRTY)
-+			continue;
-+
-+		s->s[i].nr_replicas = w->opts.compression
-+			? 0 : nr_replicas_this_write;
-+
-+		s->s[i].replicas_reserved = 0;
-+		s->s[i].state = SECTOR_ALLOCATED;
-+	}
-+
-+	BUG_ON(atomic_read(&s->write_count));
-+	atomic_set(&s->write_count, 1);
-+
-+	BUG_ON(PageWriteback(page));
-+	set_page_writeback(page);
-+
-+	unlock_page(page);
-+
-+	offset = 0;
-+	while (1) {
-+		unsigned sectors = 1, dirty_sectors = 0, reserved_sectors = 0;
-+		u64 sector;
-+
-+		while (offset < PAGE_SECTORS &&
-+		       orig.s[offset].state < SECTOR_DIRTY)
-+			offset++;
-+
-+		if (offset == PAGE_SECTORS)
-+			break;
-+
-+		sector = ((u64) page->index << PAGE_SECTOR_SHIFT) + offset;
-+
-+		while (offset + sectors < PAGE_SECTORS &&
-+		       orig.s[offset + sectors].state >= SECTOR_DIRTY)
-+			sectors++;
-+
-+		for (i = offset; i < offset + sectors; i++) {
-+			reserved_sectors += orig.s[i].replicas_reserved;
-+			dirty_sectors += orig.s[i].state == SECTOR_DIRTY;
-+		}
-+
-+		if (w->io &&
-+		    (w->io->op.res.nr_replicas != nr_replicas_this_write ||
-+		     bio_full(&w->io->op.wbio.bio, PAGE_SIZE) ||
-+		     w->io->op.wbio.bio.bi_iter.bi_size + (sectors << 9) >=
-+		     (BIO_MAX_PAGES * PAGE_SIZE) ||
-+		     bio_end_sector(&w->io->op.wbio.bio) != sector))
-+			bch2_writepage_do_io(w);
-+
-+		if (!w->io)
-+			bch2_writepage_io_alloc(c, wbc, w, inode, sector,
-+						nr_replicas_this_write);
-+
-+		atomic_inc(&s->write_count);
-+
-+		BUG_ON(inode != w->io->inode);
-+		BUG_ON(!bio_add_page(&w->io->op.wbio.bio, page,
-+				     sectors << 9, offset << 9));
-+
-+		/* Check for writing past i_size: */
-+		WARN_ON((bio_end_sector(&w->io->op.wbio.bio) << 9) >
-+			round_up(i_size, block_bytes(c)));
-+
-+		w->io->op.res.sectors += reserved_sectors;
-+		w->io->op.i_sectors_delta -= dirty_sectors;
-+		w->io->op.new_i_size = i_size;
-+
-+		offset += sectors;
-+	}
-+
-+	if (atomic_dec_and_test(&s->write_count))
-+		end_page_writeback(page);
-+
-+	return 0;
-+}
-+
-+int bch2_writepages(struct address_space *mapping, struct writeback_control *wbc)
-+{
-+	struct bch_fs *c = mapping->host->i_sb->s_fs_info;
-+	struct bch_writepage_state w =
-+		bch_writepage_state_init(c, to_bch_ei(mapping->host));
-+	struct blk_plug plug;
-+	int ret;
-+
-+	blk_start_plug(&plug);
-+	ret = write_cache_pages(mapping, wbc, __bch2_writepage, &w);
-+	if (w.io)
-+		bch2_writepage_do_io(&w);
-+	blk_finish_plug(&plug);
-+	return ret;
-+}
-+
-+int bch2_writepage(struct page *page, struct writeback_control *wbc)
-+{
-+	struct bch_fs *c = page->mapping->host->i_sb->s_fs_info;
-+	struct bch_writepage_state w =
-+		bch_writepage_state_init(c, to_bch_ei(page->mapping->host));
-+	int ret;
-+
-+	ret = __bch2_writepage(page, wbc, &w);
-+	if (w.io)
-+		bch2_writepage_do_io(&w);
-+
-+	return ret;
-+}
-+
-+/* buffered writes: */
-+
-+int bch2_write_begin(struct file *file, struct address_space *mapping,
-+		     loff_t pos, unsigned len, unsigned flags,
-+		     struct page **pagep, void **fsdata)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(mapping->host);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct bch2_page_reservation *res;
-+	pgoff_t index = pos >> PAGE_SHIFT;
-+	unsigned offset = pos & (PAGE_SIZE - 1);
-+	struct page *page;
-+	int ret = -ENOMEM;
-+
-+	res = kmalloc(sizeof(*res), GFP_KERNEL);
-+	if (!res)
-+		return -ENOMEM;
-+
-+	bch2_page_reservation_init(c, inode, res);
-+	*fsdata = res;
-+
-+	bch2_pagecache_add_get(&inode->ei_pagecache_lock);
-+
-+	page = grab_cache_page_write_begin(mapping, index, flags);
-+	if (!page)
-+		goto err_unlock;
-+
-+	if (PageUptodate(page))
-+		goto out;
-+
-+	/* If we're writing entire page, don't need to read it in first: */
-+	if (len == PAGE_SIZE)
-+		goto out;
-+
-+	if (!offset && pos + len >= inode->v.i_size) {
-+		zero_user_segment(page, len, PAGE_SIZE);
-+		flush_dcache_page(page);
-+		goto out;
-+	}
-+
-+	if (index > inode->v.i_size >> PAGE_SHIFT) {
-+		zero_user_segments(page, 0, offset, offset + len, PAGE_SIZE);
-+		flush_dcache_page(page);
-+		goto out;
-+	}
-+readpage:
-+	ret = bch2_read_single_page(page, mapping);
-+	if (ret)
-+		goto err;
-+out:
-+	ret = bch2_page_reservation_get(c, inode, page, res,
-+					offset, len, true);
-+	if (ret) {
-+		if (!PageUptodate(page)) {
-+			/*
-+			 * If the page hasn't been read in, we won't know if we
-+			 * actually need a reservation - we don't actually need
-+			 * to read here, we just need to check if the page is
-+			 * fully backed by uncompressed data:
-+			 */
-+			goto readpage;
-+		}
-+
-+		goto err;
-+	}
-+
-+	*pagep = page;
-+	return 0;
-+err:
-+	unlock_page(page);
-+	put_page(page);
-+	*pagep = NULL;
-+err_unlock:
-+	bch2_pagecache_add_put(&inode->ei_pagecache_lock);
-+	kfree(res);
-+	*fsdata = NULL;
-+	return ret;
-+}
-+
-+int bch2_write_end(struct file *file, struct address_space *mapping,
-+		   loff_t pos, unsigned len, unsigned copied,
-+		   struct page *page, void *fsdata)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(mapping->host);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct bch2_page_reservation *res = fsdata;
-+	unsigned offset = pos & (PAGE_SIZE - 1);
-+
-+	lockdep_assert_held(&inode->v.i_rwsem);
-+
-+	if (unlikely(copied < len && !PageUptodate(page))) {
-+		/*
-+		 * The page needs to be read in, but that would destroy
-+		 * our partial write - simplest thing is to just force
-+		 * userspace to redo the write:
-+		 */
-+		zero_user(page, 0, PAGE_SIZE);
-+		flush_dcache_page(page);
-+		copied = 0;
-+	}
-+
-+	spin_lock(&inode->v.i_lock);
-+	if (pos + copied > inode->v.i_size)
-+		i_size_write(&inode->v, pos + copied);
-+	spin_unlock(&inode->v.i_lock);
-+
-+	if (copied) {
-+		if (!PageUptodate(page))
-+			SetPageUptodate(page);
-+
-+		bch2_set_page_dirty(c, inode, page, res, offset, copied);
-+
-+		inode->ei_last_dirtied = (unsigned long) current;
-+	}
-+
-+	unlock_page(page);
-+	put_page(page);
-+	bch2_pagecache_add_put(&inode->ei_pagecache_lock);
-+
-+	bch2_page_reservation_put(c, inode, res);
-+	kfree(res);
-+
-+	return copied;
-+}
-+
-+#define WRITE_BATCH_PAGES	32
-+
-+static int __bch2_buffered_write(struct bch_inode_info *inode,
-+				 struct address_space *mapping,
-+				 struct iov_iter *iter,
-+				 loff_t pos, unsigned len)
-+{
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct page *pages[WRITE_BATCH_PAGES];
-+	struct bch2_page_reservation res;
-+	unsigned long index = pos >> PAGE_SHIFT;
-+	unsigned offset = pos & (PAGE_SIZE - 1);
-+	unsigned nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE);
-+	unsigned i, reserved = 0, set_dirty = 0;
-+	unsigned copied = 0, nr_pages_copied = 0;
-+	int ret = 0;
-+
-+	BUG_ON(!len);
-+	BUG_ON(nr_pages > ARRAY_SIZE(pages));
-+
-+	bch2_page_reservation_init(c, inode, &res);
-+
-+	for (i = 0; i < nr_pages; i++) {
-+		pages[i] = grab_cache_page_write_begin(mapping, index + i, 0);
-+		if (!pages[i]) {
-+			nr_pages = i;
-+			if (!i) {
-+				ret = -ENOMEM;
-+				goto out;
-+			}
-+			len = min_t(unsigned, len,
-+				    nr_pages * PAGE_SIZE - offset);
-+			break;
-+		}
-+	}
-+
-+	if (offset && !PageUptodate(pages[0])) {
-+		ret = bch2_read_single_page(pages[0], mapping);
-+		if (ret)
-+			goto out;
-+	}
-+
-+	if ((pos + len) & (PAGE_SIZE - 1) &&
-+	    !PageUptodate(pages[nr_pages - 1])) {
-+		if ((index + nr_pages - 1) << PAGE_SHIFT >= inode->v.i_size) {
-+			zero_user(pages[nr_pages - 1], 0, PAGE_SIZE);
-+		} else {
-+			ret = bch2_read_single_page(pages[nr_pages - 1], mapping);
-+			if (ret)
-+				goto out;
-+		}
-+	}
-+
-+	while (reserved < len) {
-+		struct page *page = pages[(offset + reserved) >> PAGE_SHIFT];
-+		unsigned pg_offset = (offset + reserved) & (PAGE_SIZE - 1);
-+		unsigned pg_len = min_t(unsigned, len - reserved,
-+					PAGE_SIZE - pg_offset);
-+retry_reservation:
-+		ret = bch2_page_reservation_get(c, inode, page, &res,
-+						pg_offset, pg_len, true);
-+
-+		if (ret && !PageUptodate(page)) {
-+			ret = bch2_read_single_page(page, mapping);
-+			if (!ret)
-+				goto retry_reservation;
-+		}
-+
-+		if (ret)
-+			goto out;
-+
-+		reserved += pg_len;
-+	}
-+
-+	if (mapping_writably_mapped(mapping))
-+		for (i = 0; i < nr_pages; i++)
-+			flush_dcache_page(pages[i]);
-+
-+	while (copied < len) {
-+		struct page *page = pages[(offset + copied) >> PAGE_SHIFT];
-+		unsigned pg_offset = (offset + copied) & (PAGE_SIZE - 1);
-+		unsigned pg_len = min_t(unsigned, len - copied,
-+					PAGE_SIZE - pg_offset);
-+		unsigned pg_copied = iov_iter_copy_from_user_atomic(page,
-+						iter, pg_offset, pg_len);
-+
-+		if (!pg_copied)
-+			break;
-+
-+		if (!PageUptodate(page) &&
-+		    pg_copied != PAGE_SIZE &&
-+		    pos + copied + pg_copied < inode->v.i_size) {
-+			zero_user(page, 0, PAGE_SIZE);
-+			break;
-+		}
-+
-+		flush_dcache_page(page);
-+		iov_iter_advance(iter, pg_copied);
-+		copied += pg_copied;
-+
-+		if (pg_copied != pg_len)
-+			break;
-+	}
-+
-+	if (!copied)
-+		goto out;
-+
-+	spin_lock(&inode->v.i_lock);
-+	if (pos + copied > inode->v.i_size)
-+		i_size_write(&inode->v, pos + copied);
-+	spin_unlock(&inode->v.i_lock);
-+
-+	while (set_dirty < copied) {
-+		struct page *page = pages[(offset + set_dirty) >> PAGE_SHIFT];
-+		unsigned pg_offset = (offset + set_dirty) & (PAGE_SIZE - 1);
-+		unsigned pg_len = min_t(unsigned, copied - set_dirty,
-+					PAGE_SIZE - pg_offset);
-+
-+		if (!PageUptodate(page))
-+			SetPageUptodate(page);
-+
-+		bch2_set_page_dirty(c, inode, page, &res, pg_offset, pg_len);
-+		unlock_page(page);
-+		put_page(page);
-+
-+		set_dirty += pg_len;
-+	}
-+
-+	nr_pages_copied = DIV_ROUND_UP(offset + copied, PAGE_SIZE);
-+	inode->ei_last_dirtied = (unsigned long) current;
-+out:
-+	for (i = nr_pages_copied; i < nr_pages; i++) {
-+		unlock_page(pages[i]);
-+		put_page(pages[i]);
-+	}
-+
-+	bch2_page_reservation_put(c, inode, &res);
-+
-+	return copied ?: ret;
-+}
-+
-+static ssize_t bch2_buffered_write(struct kiocb *iocb, struct iov_iter *iter)
-+{
-+	struct file *file = iocb->ki_filp;
-+	struct address_space *mapping = file->f_mapping;
-+	struct bch_inode_info *inode = file_bch_inode(file);
-+	loff_t pos = iocb->ki_pos;
-+	ssize_t written = 0;
-+	int ret = 0;
-+
-+	bch2_pagecache_add_get(&inode->ei_pagecache_lock);
-+
-+	do {
-+		unsigned offset = pos & (PAGE_SIZE - 1);
-+		unsigned bytes = min_t(unsigned long, iov_iter_count(iter),
-+			      PAGE_SIZE * WRITE_BATCH_PAGES - offset);
-+again:
-+		/*
-+		 * Bring in the user page that we will copy from _first_.
-+		 * Otherwise there's a nasty deadlock on copying from the
-+		 * same page as we're writing to, without it being marked
-+		 * up-to-date.
-+		 *
-+		 * Not only is this an optimisation, but it is also required
-+		 * to check that the address is actually valid, when atomic
-+		 * usercopies are used, below.
-+		 */
-+		if (unlikely(iov_iter_fault_in_readable(iter, bytes))) {
-+			bytes = min_t(unsigned long, iov_iter_count(iter),
-+				      PAGE_SIZE - offset);
-+
-+			if (unlikely(iov_iter_fault_in_readable(iter, bytes))) {
-+				ret = -EFAULT;
-+				break;
-+			}
-+		}
-+
-+		if (unlikely(fatal_signal_pending(current))) {
-+			ret = -EINTR;
-+			break;
-+		}
-+
-+		ret = __bch2_buffered_write(inode, mapping, iter, pos, bytes);
-+		if (unlikely(ret < 0))
-+			break;
-+
-+		cond_resched();
-+
-+		if (unlikely(ret == 0)) {
-+			/*
-+			 * If we were unable to copy any data at all, we must
-+			 * fall back to a single segment length write.
-+			 *
-+			 * If we didn't fallback here, we could livelock
-+			 * because not all segments in the iov can be copied at
-+			 * once without a pagefault.
-+			 */
-+			bytes = min_t(unsigned long, PAGE_SIZE - offset,
-+				      iov_iter_single_seg_count(iter));
-+			goto again;
-+		}
-+		pos += ret;
-+		written += ret;
-+		ret = 0;
-+
-+		balance_dirty_pages_ratelimited(mapping);
-+	} while (iov_iter_count(iter));
-+
-+	bch2_pagecache_add_put(&inode->ei_pagecache_lock);
-+
-+	return written ? written : ret;
-+}
-+
-+/* O_DIRECT reads */
-+
-+static void bch2_dio_read_complete(struct closure *cl)
-+{
-+	struct dio_read *dio = container_of(cl, struct dio_read, cl);
-+
-+	dio->req->ki_complete(dio->req, dio->ret, 0);
-+	bio_check_pages_dirty(&dio->rbio.bio);	/* transfers ownership */
-+}
-+
-+static void bch2_direct_IO_read_endio(struct bio *bio)
-+{
-+	struct dio_read *dio = bio->bi_private;
-+
-+	if (bio->bi_status)
-+		dio->ret = blk_status_to_errno(bio->bi_status);
-+
-+	closure_put(&dio->cl);
-+}
-+
-+static void bch2_direct_IO_read_split_endio(struct bio *bio)
-+{
-+	bch2_direct_IO_read_endio(bio);
-+	bio_check_pages_dirty(bio);	/* transfers ownership */
-+}
-+
-+static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter)
-+{
-+	struct file *file = req->ki_filp;
-+	struct bch_inode_info *inode = file_bch_inode(file);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct bch_io_opts opts = io_opts(c, &inode->ei_inode);
-+	struct dio_read *dio;
-+	struct bio *bio;
-+	loff_t offset = req->ki_pos;
-+	bool sync = is_sync_kiocb(req);
-+	size_t shorten;
-+	ssize_t ret;
-+
-+	if ((offset|iter->count) & (block_bytes(c) - 1))
-+		return -EINVAL;
-+
-+	ret = min_t(loff_t, iter->count,
-+		    max_t(loff_t, 0, i_size_read(&inode->v) - offset));
-+
-+	if (!ret)
-+		return ret;
-+
-+	shorten = iov_iter_count(iter) - round_up(ret, block_bytes(c));
-+	iter->count -= shorten;
-+
-+	bio = bio_alloc_bioset(GFP_KERNEL,
-+			       iov_iter_npages(iter, BIO_MAX_PAGES),
-+			       &c->dio_read_bioset);
-+
-+	bio->bi_end_io = bch2_direct_IO_read_endio;
-+
-+	dio = container_of(bio, struct dio_read, rbio.bio);
-+	closure_init(&dio->cl, NULL);
-+
-+	/*
-+	 * this is a _really_ horrible hack just to avoid an atomic sub at the
-+	 * end:
-+	 */
-+	if (!sync) {
-+		set_closure_fn(&dio->cl, bch2_dio_read_complete, NULL);
-+		atomic_set(&dio->cl.remaining,
-+			   CLOSURE_REMAINING_INITIALIZER -
-+			   CLOSURE_RUNNING +
-+			   CLOSURE_DESTRUCTOR);
-+	} else {
-+		atomic_set(&dio->cl.remaining,
-+			   CLOSURE_REMAINING_INITIALIZER + 1);
-+	}
-+
-+	dio->req	= req;
-+	dio->ret	= ret;
-+
-+	goto start;
-+	while (iter->count) {
-+		bio = bio_alloc_bioset(GFP_KERNEL,
-+				       iov_iter_npages(iter, BIO_MAX_PAGES),
-+				       &c->bio_read);
-+		bio->bi_end_io		= bch2_direct_IO_read_split_endio;
-+start:
-+		bio_set_op_attrs(bio, REQ_OP_READ, REQ_SYNC);
-+		bio->bi_iter.bi_sector	= offset >> 9;
-+		bio->bi_private		= dio;
-+
-+		ret = bio_iov_iter_get_pages(bio, iter);
-+		if (ret < 0) {
-+			/* XXX: fault inject this path */
-+			bio->bi_status = BLK_STS_RESOURCE;
-+			bio_endio(bio);
-+			break;
-+		}
-+
-+		offset += bio->bi_iter.bi_size;
-+		bio_set_pages_dirty(bio);
-+
-+		if (iter->count)
-+			closure_get(&dio->cl);
-+
-+		bch2_read(c, rbio_init(bio, opts), inode->v.i_ino);
-+	}
-+
-+	iter->count += shorten;
-+
-+	if (sync) {
-+		closure_sync(&dio->cl);
-+		closure_debug_destroy(&dio->cl);
-+		ret = dio->ret;
-+		bio_check_pages_dirty(&dio->rbio.bio); /* transfers ownership */
-+		return ret;
-+	} else {
-+		return -EIOCBQUEUED;
-+	}
-+}
-+
-+ssize_t bch2_read_iter(struct kiocb *iocb, struct iov_iter *iter)
-+{
-+	struct file *file = iocb->ki_filp;
-+	struct bch_inode_info *inode = file_bch_inode(file);
-+	struct address_space *mapping = file->f_mapping;
-+	size_t count = iov_iter_count(iter);
-+	ssize_t ret;
-+
-+	if (!count)
-+		return 0; /* skip atime */
-+
-+	if (iocb->ki_flags & IOCB_DIRECT) {
-+		struct blk_plug plug;
-+
-+		ret = filemap_write_and_wait_range(mapping,
-+					iocb->ki_pos,
-+					iocb->ki_pos + count - 1);
-+		if (ret < 0)
-+			return ret;
-+
-+		file_accessed(file);
-+
-+		blk_start_plug(&plug);
-+		ret = bch2_direct_IO_read(iocb, iter);
-+		blk_finish_plug(&plug);
-+
-+		if (ret >= 0)
-+			iocb->ki_pos += ret;
-+	} else {
-+		bch2_pagecache_add_get(&inode->ei_pagecache_lock);
-+		ret = generic_file_read_iter(iocb, iter);
-+		bch2_pagecache_add_put(&inode->ei_pagecache_lock);
-+	}
-+
-+	return ret;
-+}
-+
-+/* O_DIRECT writes */
-+
-+static void bch2_dio_write_loop_async(struct bch_write_op *);
-+
-+static long bch2_dio_write_loop(struct dio_write *dio)
-+{
-+	bool kthread = (current->flags & PF_KTHREAD) != 0;
-+	struct kiocb *req = dio->req;
-+	struct address_space *mapping = req->ki_filp->f_mapping;
-+	struct bch_inode_info *inode = file_bch_inode(req->ki_filp);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct bio *bio = &dio->op.wbio.bio;
-+	struct bvec_iter_all iter;
-+	struct bio_vec *bv;
-+	unsigned unaligned;
-+	bool sync = dio->sync;
-+	long ret;
-+
-+	if (dio->loop)
-+		goto loop;
-+
-+	while (1) {
-+		if (kthread)
-+			kthread_use_mm(dio->mm);
-+		BUG_ON(current->faults_disabled_mapping);
-+		current->faults_disabled_mapping = mapping;
-+
-+		ret = bio_iov_iter_get_pages(bio, &dio->iter);
-+
-+		current->faults_disabled_mapping = NULL;
-+		if (kthread)
-+			kthread_unuse_mm(dio->mm);
-+
-+		if (unlikely(ret < 0))
-+			goto err;
-+
-+		unaligned = bio->bi_iter.bi_size & (block_bytes(c) - 1);
-+		bio->bi_iter.bi_size -= unaligned;
-+		iov_iter_revert(&dio->iter, unaligned);
-+
-+		if (!bio->bi_iter.bi_size) {
-+			/*
-+			 * bio_iov_iter_get_pages was only able to get <
-+			 * blocksize worth of pages:
-+			 */
-+			bio_for_each_segment_all(bv, bio, iter)
-+				put_page(bv->bv_page);
-+			ret = -EFAULT;
-+			goto err;
-+		}
-+
-+		bch2_write_op_init(&dio->op, c, io_opts(c, &inode->ei_inode));
-+		dio->op.end_io		= bch2_dio_write_loop_async;
-+		dio->op.target		= dio->op.opts.foreground_target;
-+		op_journal_seq_set(&dio->op, &inode->ei_journal_seq);
-+		dio->op.write_point	= writepoint_hashed((unsigned long) current);
-+		dio->op.nr_replicas	= dio->op.opts.data_replicas;
-+		dio->op.pos		= POS(inode->v.i_ino, (u64) req->ki_pos >> 9);
-+
-+		if ((req->ki_flags & IOCB_DSYNC) &&
-+		    !c->opts.journal_flush_disabled)
-+			dio->op.flags |= BCH_WRITE_FLUSH;
-+
-+		ret = bch2_disk_reservation_get(c, &dio->op.res, bio_sectors(bio),
-+						dio->op.opts.data_replicas, 0);
-+		if (unlikely(ret) &&
-+		    !bch2_check_range_allocated(c, dio->op.pos,
-+				bio_sectors(bio), dio->op.opts.data_replicas))
-+			goto err;
-+
-+		task_io_account_write(bio->bi_iter.bi_size);
-+
-+		if (!dio->sync && !dio->loop && dio->iter.count) {
-+			struct iovec *iov = dio->inline_vecs;
-+
-+			if (dio->iter.nr_segs > ARRAY_SIZE(dio->inline_vecs)) {
-+				iov = kmalloc(dio->iter.nr_segs * sizeof(*iov),
-+					      GFP_KERNEL);
-+				if (unlikely(!iov)) {
-+					dio->sync = sync = true;
-+					goto do_io;
-+				}
-+
-+				dio->free_iov = true;
-+			}
-+
-+			memcpy(iov, dio->iter.iov, dio->iter.nr_segs * sizeof(*iov));
-+			dio->iter.iov = iov;
-+		}
-+do_io:
-+		dio->loop = true;
-+		closure_call(&dio->op.cl, bch2_write, NULL, NULL);
-+
-+		if (sync)
-+			wait_for_completion(&dio->done);
-+		else
-+			return -EIOCBQUEUED;
-+loop:
-+		i_sectors_acct(c, inode, &dio->quota_res,
-+			       dio->op.i_sectors_delta);
-+		req->ki_pos += (u64) dio->op.written << 9;
-+		dio->written += dio->op.written;
-+
-+		spin_lock(&inode->v.i_lock);
-+		if (req->ki_pos > inode->v.i_size)
-+			i_size_write(&inode->v, req->ki_pos);
-+		spin_unlock(&inode->v.i_lock);
-+
-+		bio_for_each_segment_all(bv, bio, iter)
-+			put_page(bv->bv_page);
-+		if (!dio->iter.count || dio->op.error)
-+			break;
-+
-+		bio_reset(bio);
-+		reinit_completion(&dio->done);
-+	}
-+
-+	ret = dio->op.error ?: ((long) dio->written << 9);
-+err:
-+	bch2_pagecache_block_put(&inode->ei_pagecache_lock);
-+	bch2_quota_reservation_put(c, inode, &dio->quota_res);
-+
-+	if (dio->free_iov)
-+		kfree(dio->iter.iov);
-+
-+	bio_put(bio);
-+
-+	/* inode->i_dio_count is our ref on inode and thus bch_fs */
-+	inode_dio_end(&inode->v);
-+
-+	if (!sync) {
-+		req->ki_complete(req, ret, 0);
-+		ret = -EIOCBQUEUED;
-+	}
-+	return ret;
-+}
-+
-+static void bch2_dio_write_loop_async(struct bch_write_op *op)
-+{
-+	struct dio_write *dio = container_of(op, struct dio_write, op);
-+
-+	if (dio->sync)
-+		complete(&dio->done);
-+	else
-+		bch2_dio_write_loop(dio);
-+}
-+
-+static noinline
-+ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
-+{
-+	struct file *file = req->ki_filp;
-+	struct address_space *mapping = file->f_mapping;
-+	struct bch_inode_info *inode = file_bch_inode(file);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct dio_write *dio;
-+	struct bio *bio;
-+	bool locked = true, extending;
-+	ssize_t ret;
-+
-+	prefetch(&c->opts);
-+	prefetch((void *) &c->opts + 64);
-+	prefetch(&inode->ei_inode);
-+	prefetch((void *) &inode->ei_inode + 64);
-+
-+	inode_lock(&inode->v);
-+
-+	ret = generic_write_checks(req, iter);
-+	if (unlikely(ret <= 0))
-+		goto err;
-+
-+	ret = file_remove_privs(file);
-+	if (unlikely(ret))
-+		goto err;
-+
-+	ret = file_update_time(file);
-+	if (unlikely(ret))
-+		goto err;
-+
-+	if (unlikely((req->ki_pos|iter->count) & (block_bytes(c) - 1)))
-+		goto err;
-+
-+	inode_dio_begin(&inode->v);
-+	bch2_pagecache_block_get(&inode->ei_pagecache_lock);
-+
-+	extending = req->ki_pos + iter->count > inode->v.i_size;
-+	if (!extending) {
-+		inode_unlock(&inode->v);
-+		locked = false;
-+	}
-+
-+	bio = bio_alloc_bioset(GFP_KERNEL,
-+			       iov_iter_npages(iter, BIO_MAX_PAGES),
-+			       &c->dio_write_bioset);
-+	dio = container_of(bio, struct dio_write, op.wbio.bio);
-+	init_completion(&dio->done);
-+	dio->req		= req;
-+	dio->mm			= current->mm;
-+	dio->loop		= false;
-+	dio->sync		= is_sync_kiocb(req) || extending;
-+	dio->free_iov		= false;
-+	dio->quota_res.sectors	= 0;
-+	dio->written		= 0;
-+	dio->iter		= *iter;
-+
-+	ret = bch2_quota_reservation_add(c, inode, &dio->quota_res,
-+					 iter->count >> 9, true);
-+	if (unlikely(ret))
-+		goto err_put_bio;
-+
-+	ret = write_invalidate_inode_pages_range(mapping,
-+					req->ki_pos,
-+					req->ki_pos + iter->count - 1);
-+	if (unlikely(ret))
-+		goto err_put_bio;
-+
-+	ret = bch2_dio_write_loop(dio);
-+err:
-+	if (locked)
-+		inode_unlock(&inode->v);
-+	return ret;
-+err_put_bio:
-+	bch2_pagecache_block_put(&inode->ei_pagecache_lock);
-+	bch2_quota_reservation_put(c, inode, &dio->quota_res);
-+	bio_put(bio);
-+	inode_dio_end(&inode->v);
-+	goto err;
-+}
-+
-+ssize_t bch2_write_iter(struct kiocb *iocb, struct iov_iter *from)
-+{
-+	struct file *file = iocb->ki_filp;
-+	struct bch_inode_info *inode = file_bch_inode(file);
-+	ssize_t ret;
-+
-+	if (iocb->ki_flags & IOCB_DIRECT)
-+		return bch2_direct_write(iocb, from);
-+
-+	/* We can write back this queue in page reclaim */
-+	current->backing_dev_info = inode_to_bdi(&inode->v);
-+	inode_lock(&inode->v);
-+
-+	ret = generic_write_checks(iocb, from);
-+	if (ret <= 0)
-+		goto unlock;
-+
-+	ret = file_remove_privs(file);
-+	if (ret)
-+		goto unlock;
-+
-+	ret = file_update_time(file);
-+	if (ret)
-+		goto unlock;
-+
-+	ret = bch2_buffered_write(iocb, from);
-+	if (likely(ret > 0))
-+		iocb->ki_pos += ret;
-+unlock:
-+	inode_unlock(&inode->v);
-+	current->backing_dev_info = NULL;
-+
-+	if (ret > 0)
-+		ret = generic_write_sync(iocb, ret);
-+
-+	return ret;
-+}
-+
-+/* fsync: */
-+
-+int bch2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
-+{
-+	struct bch_inode_info *inode = file_bch_inode(file);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	int ret, ret2;
-+
-+	ret = file_write_and_wait_range(file, start, end);
-+	if (ret)
-+		return ret;
-+
-+	if (datasync && !(inode->v.i_state & I_DIRTY_DATASYNC))
-+		goto out;
-+
-+	ret = sync_inode_metadata(&inode->v, 1);
-+	if (ret)
-+		return ret;
-+out:
-+	if (!c->opts.journal_flush_disabled)
-+		ret = bch2_journal_flush_seq(&c->journal,
-+					     inode->ei_journal_seq);
-+	ret2 = file_check_and_advance_wb_err(file);
-+
-+	return ret ?: ret2;
-+}
-+
-+/* truncate: */
-+
-+static inline int range_has_data(struct bch_fs *c,
-+				  struct bpos start,
-+				  struct bpos end)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int ret = 0;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, start, 0, k, ret) {
-+		if (bkey_cmp(bkey_start_pos(k.k), end) >= 0)
-+			break;
-+
-+		if (bkey_extent_is_data(k.k)) {
-+			ret = 1;
-+			break;
-+		}
-+	}
-+
-+	return bch2_trans_exit(&trans) ?: ret;
-+}
-+
-+static int __bch2_truncate_page(struct bch_inode_info *inode,
-+				pgoff_t index, loff_t start, loff_t end)
-+{
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct address_space *mapping = inode->v.i_mapping;
-+	struct bch_page_state *s;
-+	unsigned start_offset = start & (PAGE_SIZE - 1);
-+	unsigned end_offset = ((end - 1) & (PAGE_SIZE - 1)) + 1;
-+	unsigned i;
-+	struct page *page;
-+	int ret = 0;
-+
-+	/* Page boundary? Nothing to do */
-+	if (!((index == start >> PAGE_SHIFT && start_offset) ||
-+	      (index == end >> PAGE_SHIFT && end_offset != PAGE_SIZE)))
-+		return 0;
-+
-+	/* Above i_size? */
-+	if (index << PAGE_SHIFT >= inode->v.i_size)
-+		return 0;
-+
-+	page = find_lock_page(mapping, index);
-+	if (!page) {
-+		/*
-+		 * XXX: we're doing two index lookups when we end up reading the
-+		 * page
-+		 */
-+		ret = range_has_data(c,
-+				POS(inode->v.i_ino, index << PAGE_SECTOR_SHIFT),
-+				POS(inode->v.i_ino, (index + 1) << PAGE_SECTOR_SHIFT));
-+		if (ret <= 0)
-+			return ret;
-+
-+		page = find_or_create_page(mapping, index, GFP_KERNEL);
-+		if (unlikely(!page)) {
-+			ret = -ENOMEM;
-+			goto out;
-+		}
-+	}
-+
-+	s = bch2_page_state_create(page, 0);
-+	if (!s) {
-+		ret = -ENOMEM;
-+		goto unlock;
-+	}
-+
-+	if (!PageUptodate(page)) {
-+		ret = bch2_read_single_page(page, mapping);
-+		if (ret)
-+			goto unlock;
-+	}
-+
-+	if (index != start >> PAGE_SHIFT)
-+		start_offset = 0;
-+	if (index != end >> PAGE_SHIFT)
-+		end_offset = PAGE_SIZE;
-+
-+	for (i = round_up(start_offset, block_bytes(c)) >> 9;
-+	     i < round_down(end_offset, block_bytes(c)) >> 9;
-+	     i++) {
-+		s->s[i].nr_replicas	= 0;
-+		s->s[i].state		= SECTOR_UNALLOCATED;
-+	}
-+
-+	zero_user_segment(page, start_offset, end_offset);
-+
-+	/*
-+	 * Bit of a hack - we don't want truncate to fail due to -ENOSPC.
-+	 *
-+	 * XXX: because we aren't currently tracking whether the page has actual
-+	 * data in it (vs. just 0s, or only partially written) this wrong. ick.
-+	 */
-+	ret = bch2_get_page_disk_reservation(c, inode, page, false);
-+	BUG_ON(ret);
-+
-+	__set_page_dirty_nobuffers(page);
-+unlock:
-+	unlock_page(page);
-+	put_page(page);
-+out:
-+	return ret;
-+}
-+
-+static int bch2_truncate_page(struct bch_inode_info *inode, loff_t from)
-+{
-+	return __bch2_truncate_page(inode, from >> PAGE_SHIFT,
-+				    from, round_up(from, PAGE_SIZE));
-+}
-+
-+static int bch2_extend(struct bch_inode_info *inode,
-+		       struct bch_inode_unpacked *inode_u,
-+		       struct iattr *iattr)
-+{
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct address_space *mapping = inode->v.i_mapping;
-+	int ret;
-+
-+	/*
-+	 * sync appends:
-+	 *
-+	 * this has to be done _before_ extending i_size:
-+	 */
-+	ret = filemap_write_and_wait_range(mapping, inode_u->bi_size, S64_MAX);
-+	if (ret)
-+		return ret;
-+
-+	truncate_setsize(&inode->v, iattr->ia_size);
-+	setattr_copy(&inode->v, iattr);
-+
-+	mutex_lock(&inode->ei_update_lock);
-+	ret = bch2_write_inode_size(c, inode, inode->v.i_size,
-+				    ATTR_MTIME|ATTR_CTIME);
-+	mutex_unlock(&inode->ei_update_lock);
-+
-+	return ret;
-+}
-+
-+static int bch2_truncate_finish_fn(struct bch_inode_info *inode,
-+				   struct bch_inode_unpacked *bi,
-+				   void *p)
-+{
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+
-+	bi->bi_flags &= ~BCH_INODE_I_SIZE_DIRTY;
-+	bi->bi_mtime = bi->bi_ctime = bch2_current_time(c);
-+	return 0;
-+}
-+
-+static int bch2_truncate_start_fn(struct bch_inode_info *inode,
-+				  struct bch_inode_unpacked *bi, void *p)
-+{
-+	u64 *new_i_size = p;
-+
-+	bi->bi_flags |= BCH_INODE_I_SIZE_DIRTY;
-+	bi->bi_size = *new_i_size;
-+	return 0;
-+}
-+
-+int bch2_truncate(struct bch_inode_info *inode, struct iattr *iattr)
-+{
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct address_space *mapping = inode->v.i_mapping;
-+	struct bch_inode_unpacked inode_u;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	u64 new_i_size = iattr->ia_size;
-+	s64 i_sectors_delta = 0;
-+	int ret = 0;
-+
-+	inode_dio_wait(&inode->v);
-+	bch2_pagecache_block_get(&inode->ei_pagecache_lock);
-+
-+	/*
-+	 * fetch current on disk i_size: inode is locked, i_size can only
-+	 * increase underneath us:
-+	 */
-+	bch2_trans_init(&trans, c, 0, 0);
-+	iter = bch2_inode_peek(&trans, &inode_u, inode->v.i_ino, 0);
-+	ret = PTR_ERR_OR_ZERO(iter);
-+	bch2_trans_exit(&trans);
-+
-+	if (ret)
-+		goto err;
-+
-+	/*
-+	 * check this before next assertion; on filesystem error our normal
-+	 * invariants are a bit broken (truncate has to truncate the page cache
-+	 * before the inode).
-+	 */
-+	ret = bch2_journal_error(&c->journal);
-+	if (ret)
-+		goto err;
-+
-+	BUG_ON(inode->v.i_size < inode_u.bi_size);
-+
-+	if (iattr->ia_size > inode->v.i_size) {
-+		ret = bch2_extend(inode, &inode_u, iattr);
-+		goto err;
-+	}
-+
-+	ret = bch2_truncate_page(inode, iattr->ia_size);
-+	if (unlikely(ret))
-+		goto err;
-+
-+	/*
-+	 * When extending, we're going to write the new i_size to disk
-+	 * immediately so we need to flush anything above the current on disk
-+	 * i_size first:
-+	 *
-+	 * Also, when extending we need to flush the page that i_size currently
-+	 * straddles - if it's mapped to userspace, we need to ensure that
-+	 * userspace has to redirty it and call .mkwrite -> set_page_dirty
-+	 * again to allocate the part of the page that was extended.
-+	 */
-+	if (iattr->ia_size > inode_u.bi_size)
-+		ret = filemap_write_and_wait_range(mapping,
-+				inode_u.bi_size,
-+				iattr->ia_size - 1);
-+	else if (iattr->ia_size & (PAGE_SIZE - 1))
-+		ret = filemap_write_and_wait_range(mapping,
-+				round_down(iattr->ia_size, PAGE_SIZE),
-+				iattr->ia_size - 1);
-+	if (ret)
-+		goto err;
-+
-+	mutex_lock(&inode->ei_update_lock);
-+	ret = bch2_write_inode(c, inode, bch2_truncate_start_fn,
-+			       &new_i_size, 0);
-+	mutex_unlock(&inode->ei_update_lock);
-+
-+	if (unlikely(ret))
-+		goto err;
-+
-+	truncate_setsize(&inode->v, iattr->ia_size);
-+
-+	ret = bch2_fpunch(c, inode->v.i_ino,
-+			round_up(iattr->ia_size, block_bytes(c)) >> 9,
-+			U64_MAX, &inode->ei_journal_seq, &i_sectors_delta);
-+	i_sectors_acct(c, inode, NULL, i_sectors_delta);
-+
-+	if (unlikely(ret))
-+		goto err;
-+
-+	setattr_copy(&inode->v, iattr);
-+
-+	mutex_lock(&inode->ei_update_lock);
-+	ret = bch2_write_inode(c, inode, bch2_truncate_finish_fn, NULL,
-+			       ATTR_MTIME|ATTR_CTIME);
-+	mutex_unlock(&inode->ei_update_lock);
-+err:
-+	bch2_pagecache_block_put(&inode->ei_pagecache_lock);
-+	return ret;
-+}
-+
-+/* fallocate: */
-+
-+static long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, loff_t len)
-+{
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	u64 discard_start = round_up(offset, block_bytes(c)) >> 9;
-+	u64 discard_end = round_down(offset + len, block_bytes(c)) >> 9;
-+	int ret = 0;
-+
-+	inode_lock(&inode->v);
-+	inode_dio_wait(&inode->v);
-+	bch2_pagecache_block_get(&inode->ei_pagecache_lock);
-+
-+	ret = __bch2_truncate_page(inode,
-+				   offset >> PAGE_SHIFT,
-+				   offset, offset + len);
-+	if (unlikely(ret))
-+		goto err;
-+
-+	if (offset >> PAGE_SHIFT !=
-+	    (offset + len) >> PAGE_SHIFT) {
-+		ret = __bch2_truncate_page(inode,
-+					   (offset + len) >> PAGE_SHIFT,
-+					   offset, offset + len);
-+		if (unlikely(ret))
-+			goto err;
-+	}
-+
-+	truncate_pagecache_range(&inode->v, offset, offset + len - 1);
-+
-+	if (discard_start < discard_end) {
-+		s64 i_sectors_delta = 0;
-+
-+		ret = bch2_fpunch(c, inode->v.i_ino,
-+				  discard_start, discard_end,
-+				  &inode->ei_journal_seq,
-+				  &i_sectors_delta);
-+		i_sectors_acct(c, inode, NULL, i_sectors_delta);
-+	}
-+err:
-+	bch2_pagecache_block_put(&inode->ei_pagecache_lock);
-+	inode_unlock(&inode->v);
-+
-+	return ret;
-+}
-+
-+static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
-+				   loff_t offset, loff_t len,
-+				   bool insert)
-+{
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct address_space *mapping = inode->v.i_mapping;
-+	struct bkey_on_stack copy;
-+	struct btree_trans trans;
-+	struct btree_iter *src, *dst;
-+	loff_t shift, new_size;
-+	u64 src_start;
-+	int ret;
-+
-+	if ((offset | len) & (block_bytes(c) - 1))
-+		return -EINVAL;
-+
-+	bkey_on_stack_init(&copy);
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 256);
-+
-+	/*
-+	 * We need i_mutex to keep the page cache consistent with the extents
-+	 * btree, and the btree consistent with i_size - we don't need outside
-+	 * locking for the extents btree itself, because we're using linked
-+	 * iterators
-+	 */
-+	inode_lock(&inode->v);
-+	inode_dio_wait(&inode->v);
-+	bch2_pagecache_block_get(&inode->ei_pagecache_lock);
-+
-+	if (insert) {
-+		ret = -EFBIG;
-+		if (inode->v.i_sb->s_maxbytes - inode->v.i_size < len)
-+			goto err;
-+
-+		ret = -EINVAL;
-+		if (offset >= inode->v.i_size)
-+			goto err;
-+
-+		src_start	= U64_MAX;
-+		shift		= len;
-+	} else {
-+		ret = -EINVAL;
-+		if (offset + len >= inode->v.i_size)
-+			goto err;
-+
-+		src_start	= offset + len;
-+		shift		= -len;
-+	}
-+
-+	new_size = inode->v.i_size + shift;
-+
-+	ret = write_invalidate_inode_pages_range(mapping, offset, LLONG_MAX);
-+	if (ret)
-+		goto err;
-+
-+	if (insert) {
-+		i_size_write(&inode->v, new_size);
-+		mutex_lock(&inode->ei_update_lock);
-+		ret = bch2_write_inode_size(c, inode, new_size,
-+					    ATTR_MTIME|ATTR_CTIME);
-+		mutex_unlock(&inode->ei_update_lock);
-+	} else {
-+		s64 i_sectors_delta = 0;
-+
-+		ret = bch2_fpunch(c, inode->v.i_ino,
-+				  offset >> 9, (offset + len) >> 9,
-+				  &inode->ei_journal_seq,
-+				  &i_sectors_delta);
-+		i_sectors_acct(c, inode, NULL, i_sectors_delta);
-+
-+		if (ret)
-+			goto err;
-+	}
-+
-+	src = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
-+			POS(inode->v.i_ino, src_start >> 9),
-+			BTREE_ITER_INTENT);
-+	BUG_ON(IS_ERR_OR_NULL(src));
-+
-+	dst = bch2_trans_copy_iter(&trans, src);
-+	BUG_ON(IS_ERR_OR_NULL(dst));
-+
-+	while (1) {
-+		struct disk_reservation disk_res =
-+			bch2_disk_reservation_init(c, 0);
-+		struct bkey_i delete;
-+		struct bkey_s_c k;
-+		struct bpos next_pos;
-+		struct bpos move_pos = POS(inode->v.i_ino, offset >> 9);
-+		struct bpos atomic_end;
-+		unsigned trigger_flags = 0;
-+
-+		k = insert
-+			? bch2_btree_iter_peek_prev(src)
-+			: bch2_btree_iter_peek(src);
-+		if ((ret = bkey_err(k)))
-+			goto bkey_err;
-+
-+		if (!k.k || k.k->p.inode != inode->v.i_ino)
-+			break;
-+
-+		BUG_ON(bkey_cmp(src->pos, bkey_start_pos(k.k)));
-+
-+		if (insert &&
-+		    bkey_cmp(k.k->p, POS(inode->v.i_ino, offset >> 9)) <= 0)
-+			break;
-+reassemble:
-+		bkey_on_stack_reassemble(&copy, c, k);
-+
-+		if (insert &&
-+		    bkey_cmp(bkey_start_pos(k.k), move_pos) < 0)
-+			bch2_cut_front(move_pos, copy.k);
-+
-+		copy.k->k.p.offset += shift >> 9;
-+		bch2_btree_iter_set_pos(dst, bkey_start_pos(&copy.k->k));
-+
-+		ret = bch2_extent_atomic_end(dst, copy.k, &atomic_end);
-+		if (ret)
-+			goto bkey_err;
-+
-+		if (bkey_cmp(atomic_end, copy.k->k.p)) {
-+			if (insert) {
-+				move_pos = atomic_end;
-+				move_pos.offset -= shift >> 9;
-+				goto reassemble;
-+			} else {
-+				bch2_cut_back(atomic_end, copy.k);
-+			}
-+		}
-+
-+		bkey_init(&delete.k);
-+		delete.k.p = copy.k->k.p;
-+		delete.k.size = copy.k->k.size;
-+		delete.k.p.offset -= shift >> 9;
-+
-+		next_pos = insert ? bkey_start_pos(&delete.k) : delete.k.p;
-+
-+		if (copy.k->k.size == k.k->size) {
-+			/*
-+			 * If we're moving the entire extent, we can skip
-+			 * running triggers:
-+			 */
-+			trigger_flags |= BTREE_TRIGGER_NORUN;
-+		} else {
-+			/* We might end up splitting compressed extents: */
-+			unsigned nr_ptrs =
-+				bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(copy.k));
-+
-+			ret = bch2_disk_reservation_get(c, &disk_res,
-+					copy.k->k.size, nr_ptrs,
-+					BCH_DISK_RESERVATION_NOFAIL);
-+			BUG_ON(ret);
-+		}
-+
-+		bch2_btree_iter_set_pos(src, bkey_start_pos(&delete.k));
-+
-+		ret =   bch2_trans_update(&trans, src, &delete, trigger_flags) ?:
-+			bch2_trans_update(&trans, dst, copy.k, trigger_flags) ?:
-+			bch2_trans_commit(&trans, &disk_res,
-+					  &inode->ei_journal_seq,
-+					  BTREE_INSERT_NOFAIL);
-+		bch2_disk_reservation_put(c, &disk_res);
-+bkey_err:
-+		if (!ret)
-+			bch2_btree_iter_set_pos(src, next_pos);
-+
-+		if (ret == -EINTR)
-+			ret = 0;
-+		if (ret)
-+			goto err;
-+
-+		bch2_trans_cond_resched(&trans);
-+	}
-+	bch2_trans_unlock(&trans);
-+
-+	if (!insert) {
-+		i_size_write(&inode->v, new_size);
-+		mutex_lock(&inode->ei_update_lock);
-+		ret = bch2_write_inode_size(c, inode, new_size,
-+					    ATTR_MTIME|ATTR_CTIME);
-+		mutex_unlock(&inode->ei_update_lock);
-+	}
-+err:
-+	bch2_trans_exit(&trans);
-+	bkey_on_stack_exit(&copy, c);
-+	bch2_pagecache_block_put(&inode->ei_pagecache_lock);
-+	inode_unlock(&inode->v);
-+	return ret;
-+}
-+
-+static long bchfs_fallocate(struct bch_inode_info *inode, int mode,
-+			    loff_t offset, loff_t len)
-+{
-+	struct address_space *mapping = inode->v.i_mapping;
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bpos end_pos;
-+	loff_t end		= offset + len;
-+	loff_t block_start	= round_down(offset,	block_bytes(c));
-+	loff_t block_end	= round_up(end,		block_bytes(c));
-+	unsigned sectors;
-+	unsigned replicas = io_opts(c, &inode->ei_inode).data_replicas;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-+
-+	inode_lock(&inode->v);
-+	inode_dio_wait(&inode->v);
-+	bch2_pagecache_block_get(&inode->ei_pagecache_lock);
-+
-+	if (!(mode & FALLOC_FL_KEEP_SIZE) && end > inode->v.i_size) {
-+		ret = inode_newsize_ok(&inode->v, end);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	if (mode & FALLOC_FL_ZERO_RANGE) {
-+		ret = __bch2_truncate_page(inode,
-+					   offset >> PAGE_SHIFT,
-+					   offset, end);
-+
-+		if (!ret &&
-+		    offset >> PAGE_SHIFT != end >> PAGE_SHIFT)
-+			ret = __bch2_truncate_page(inode,
-+						   end >> PAGE_SHIFT,
-+						   offset, end);
-+
-+		if (unlikely(ret))
-+			goto err;
-+
-+		truncate_pagecache_range(&inode->v, offset, end - 1);
-+	}
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
-+			POS(inode->v.i_ino, block_start >> 9),
-+			BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
-+	end_pos = POS(inode->v.i_ino, block_end >> 9);
-+
-+	while (bkey_cmp(iter->pos, end_pos) < 0) {
-+		s64 i_sectors_delta = 0;
-+		struct disk_reservation disk_res = { 0 };
-+		struct quota_res quota_res = { 0 };
-+		struct bkey_i_reservation reservation;
-+		struct bkey_s_c k;
-+
-+		bch2_trans_begin(&trans);
-+
-+		k = bch2_btree_iter_peek_slot(iter);
-+		if ((ret = bkey_err(k)))
-+			goto bkey_err;
-+
-+		/* already reserved */
-+		if (k.k->type == KEY_TYPE_reservation &&
-+		    bkey_s_c_to_reservation(k).v->nr_replicas >= replicas) {
-+			bch2_btree_iter_next_slot(iter);
-+			continue;
-+		}
-+
-+		if (bkey_extent_is_data(k.k) &&
-+		    !(mode & FALLOC_FL_ZERO_RANGE)) {
-+			bch2_btree_iter_next_slot(iter);
-+			continue;
-+		}
-+
-+		bkey_reservation_init(&reservation.k_i);
-+		reservation.k.type	= KEY_TYPE_reservation;
-+		reservation.k.p		= k.k->p;
-+		reservation.k.size	= k.k->size;
-+
-+		bch2_cut_front(iter->pos,	&reservation.k_i);
-+		bch2_cut_back(end_pos,		&reservation.k_i);
-+
-+		sectors = reservation.k.size;
-+		reservation.v.nr_replicas = bch2_bkey_nr_ptrs_allocated(k);
-+
-+		if (!bkey_extent_is_allocation(k.k)) {
-+			ret = bch2_quota_reservation_add(c, inode,
-+					&quota_res,
-+					sectors, true);
-+			if (unlikely(ret))
-+				goto bkey_err;
-+		}
-+
-+		if (reservation.v.nr_replicas < replicas ||
-+		    bch2_bkey_sectors_compressed(k)) {
-+			ret = bch2_disk_reservation_get(c, &disk_res, sectors,
-+							replicas, 0);
-+			if (unlikely(ret))
-+				goto bkey_err;
-+
-+			reservation.v.nr_replicas = disk_res.nr_replicas;
-+		}
-+
-+		ret = bch2_extent_update(&trans, iter, &reservation.k_i,
-+				&disk_res, &inode->ei_journal_seq,
-+				0, &i_sectors_delta);
-+		i_sectors_acct(c, inode, &quota_res, i_sectors_delta);
-+bkey_err:
-+		bch2_quota_reservation_put(c, inode, &quota_res);
-+		bch2_disk_reservation_put(c, &disk_res);
-+		if (ret == -EINTR)
-+			ret = 0;
-+		if (ret)
-+			goto err;
-+	}
-+
-+	/*
-+	 * Do we need to extend the file?
-+	 *
-+	 * If we zeroed up to the end of the file, we dropped whatever writes
-+	 * were going to write out the current i_size, so we have to extend
-+	 * manually even if FL_KEEP_SIZE was set:
-+	 */
-+	if (end >= inode->v.i_size &&
-+	    (!(mode & FALLOC_FL_KEEP_SIZE) ||
-+	     (mode & FALLOC_FL_ZERO_RANGE))) {
-+		struct btree_iter *inode_iter;
-+		struct bch_inode_unpacked inode_u;
-+
-+		do {
-+			bch2_trans_begin(&trans);
-+			inode_iter = bch2_inode_peek(&trans, &inode_u,
-+						     inode->v.i_ino, 0);
-+			ret = PTR_ERR_OR_ZERO(inode_iter);
-+		} while (ret == -EINTR);
-+
-+		bch2_trans_unlock(&trans);
-+
-+		if (ret)
-+			goto err;
-+
-+		/*
-+		 * Sync existing appends before extending i_size,
-+		 * as in bch2_extend():
-+		 */
-+		ret = filemap_write_and_wait_range(mapping,
-+					inode_u.bi_size, S64_MAX);
-+		if (ret)
-+			goto err;
-+
-+		if (mode & FALLOC_FL_KEEP_SIZE)
-+			end = inode->v.i_size;
-+		else
-+			i_size_write(&inode->v, end);
-+
-+		mutex_lock(&inode->ei_update_lock);
-+		ret = bch2_write_inode_size(c, inode, end, 0);
-+		mutex_unlock(&inode->ei_update_lock);
-+	}
-+err:
-+	bch2_trans_exit(&trans);
-+	bch2_pagecache_block_put(&inode->ei_pagecache_lock);
-+	inode_unlock(&inode->v);
-+	return ret;
-+}
-+
-+long bch2_fallocate_dispatch(struct file *file, int mode,
-+			     loff_t offset, loff_t len)
-+{
-+	struct bch_inode_info *inode = file_bch_inode(file);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	long ret;
-+
-+	if (!percpu_ref_tryget(&c->writes))
-+		return -EROFS;
-+
-+	if (!(mode & ~(FALLOC_FL_KEEP_SIZE|FALLOC_FL_ZERO_RANGE)))
-+		ret = bchfs_fallocate(inode, mode, offset, len);
-+	else if (mode == (FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE))
-+		ret = bchfs_fpunch(inode, offset, len);
-+	else if (mode == FALLOC_FL_INSERT_RANGE)
-+		ret = bchfs_fcollapse_finsert(inode, offset, len, true);
-+	else if (mode == FALLOC_FL_COLLAPSE_RANGE)
-+		ret = bchfs_fcollapse_finsert(inode, offset, len, false);
-+	else
-+		ret = -EOPNOTSUPP;
-+
-+	percpu_ref_put(&c->writes);
-+
-+	return ret;
-+}
-+
-+static void mark_range_unallocated(struct bch_inode_info *inode,
-+				   loff_t start, loff_t end)
-+{
-+	pgoff_t index = start >> PAGE_SHIFT;
-+	pgoff_t end_index = (end - 1) >> PAGE_SHIFT;
-+	struct pagevec pvec;
-+
-+	pagevec_init(&pvec);
-+
-+	do {
-+		unsigned nr_pages, i, j;
-+
-+		nr_pages = pagevec_lookup_range(&pvec, inode->v.i_mapping,
-+						&index, end_index);
-+		if (nr_pages == 0)
-+			break;
-+
-+		for (i = 0; i < nr_pages; i++) {
-+			struct page *page = pvec.pages[i];
-+			struct bch_page_state *s;
-+
-+			lock_page(page);
-+			s = bch2_page_state(page);
-+
-+			if (s) {
-+				spin_lock(&s->lock);
-+				for (j = 0; j < PAGE_SECTORS; j++)
-+					s->s[j].nr_replicas = 0;
-+				spin_unlock(&s->lock);
-+			}
-+
-+			unlock_page(page);
-+		}
-+		pagevec_release(&pvec);
-+	} while (index <= end_index);
-+}
-+
-+loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src,
-+			     struct file *file_dst, loff_t pos_dst,
-+			     loff_t len, unsigned remap_flags)
-+{
-+	struct bch_inode_info *src = file_bch_inode(file_src);
-+	struct bch_inode_info *dst = file_bch_inode(file_dst);
-+	struct bch_fs *c = src->v.i_sb->s_fs_info;
-+	s64 i_sectors_delta = 0;
-+	u64 aligned_len;
-+	loff_t ret = 0;
-+
-+	if (!c->opts.reflink)
-+		return -EOPNOTSUPP;
-+
-+	if (remap_flags & ~(REMAP_FILE_DEDUP|REMAP_FILE_ADVISORY))
-+		return -EINVAL;
-+
-+	if (remap_flags & REMAP_FILE_DEDUP)
-+		return -EOPNOTSUPP;
-+
-+	if ((pos_src & (block_bytes(c) - 1)) ||
-+	    (pos_dst & (block_bytes(c) - 1)))
-+		return -EINVAL;
-+
-+	if (src == dst &&
-+	    abs(pos_src - pos_dst) < len)
-+		return -EINVAL;
-+
-+	bch2_lock_inodes(INODE_LOCK|INODE_PAGECACHE_BLOCK, src, dst);
-+
-+	file_update_time(file_dst);
-+
-+	inode_dio_wait(&src->v);
-+	inode_dio_wait(&dst->v);
-+
-+	ret = generic_remap_file_range_prep(file_src, pos_src,
-+					    file_dst, pos_dst,
-+					    &len, remap_flags);
-+	if (ret < 0 || len == 0)
-+		goto err;
-+
-+	aligned_len = round_up((u64) len, block_bytes(c));
-+
-+	ret = write_invalidate_inode_pages_range(dst->v.i_mapping,
-+				pos_dst, pos_dst + len - 1);
-+	if (ret)
-+		goto err;
-+
-+	mark_range_unallocated(src, pos_src, pos_src + aligned_len);
-+
-+	ret = bch2_remap_range(c,
-+			       POS(dst->v.i_ino, pos_dst >> 9),
-+			       POS(src->v.i_ino, pos_src >> 9),
-+			       aligned_len >> 9,
-+			       &dst->ei_journal_seq,
-+			       pos_dst + len, &i_sectors_delta);
-+	if (ret < 0)
-+		goto err;
-+
-+	/*
-+	 * due to alignment, we might have remapped slightly more than requsted
-+	 */
-+	ret = min((u64) ret << 9, (u64) len);
-+
-+	/* XXX get a quota reservation */
-+	i_sectors_acct(c, dst, NULL, i_sectors_delta);
-+
-+	spin_lock(&dst->v.i_lock);
-+	if (pos_dst + ret > dst->v.i_size)
-+		i_size_write(&dst->v, pos_dst + ret);
-+	spin_unlock(&dst->v.i_lock);
-+err:
-+	bch2_unlock_inodes(INODE_LOCK|INODE_PAGECACHE_BLOCK, src, dst);
-+
-+	return ret;
-+}
-+
-+/* fseek: */
-+
-+static int page_data_offset(struct page *page, unsigned offset)
-+{
-+	struct bch_page_state *s = bch2_page_state(page);
-+	unsigned i;
-+
-+	if (s)
-+		for (i = offset >> 9; i < PAGE_SECTORS; i++)
-+			if (s->s[i].state >= SECTOR_DIRTY)
-+				return i << 9;
-+
-+	return -1;
-+}
-+
-+static loff_t bch2_seek_pagecache_data(struct inode *vinode,
-+				       loff_t start_offset,
-+				       loff_t end_offset)
-+{
-+	struct address_space *mapping = vinode->i_mapping;
-+	struct page *page;
-+	pgoff_t start_index	= start_offset >> PAGE_SHIFT;
-+	pgoff_t end_index	= end_offset >> PAGE_SHIFT;
-+	pgoff_t index		= start_index;
-+	loff_t ret;
-+	int offset;
-+
-+	while (index <= end_index) {
-+		if (find_get_pages_range(mapping, &index, end_index, 1, &page)) {
-+			lock_page(page);
-+
-+			offset = page_data_offset(page,
-+					page->index == start_index
-+					? start_offset & (PAGE_SIZE - 1)
-+					: 0);
-+			if (offset >= 0) {
-+				ret = clamp(((loff_t) page->index << PAGE_SHIFT) +
-+					    offset,
-+					    start_offset, end_offset);
-+				unlock_page(page);
-+				put_page(page);
-+				return ret;
-+			}
-+
-+			unlock_page(page);
-+			put_page(page);
-+		} else {
-+			break;
-+		}
-+	}
-+
-+	return end_offset;
-+}
-+
-+static loff_t bch2_seek_data(struct file *file, u64 offset)
-+{
-+	struct bch_inode_info *inode = file_bch_inode(file);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	u64 isize, next_data = MAX_LFS_FILESIZE;
-+	int ret;
-+
-+	isize = i_size_read(&inode->v);
-+	if (offset >= isize)
-+		return -ENXIO;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
-+			   POS(inode->v.i_ino, offset >> 9), 0, k, ret) {
-+		if (k.k->p.inode != inode->v.i_ino) {
-+			break;
-+		} else if (bkey_extent_is_data(k.k)) {
-+			next_data = max(offset, bkey_start_offset(k.k) << 9);
-+			break;
-+		} else if (k.k->p.offset >> 9 > isize)
-+			break;
-+	}
-+
-+	ret = bch2_trans_exit(&trans) ?: ret;
-+	if (ret)
-+		return ret;
-+
-+	if (next_data > offset)
-+		next_data = bch2_seek_pagecache_data(&inode->v,
-+						     offset, next_data);
-+
-+	if (next_data >= isize)
-+		return -ENXIO;
-+
-+	return vfs_setpos(file, next_data, MAX_LFS_FILESIZE);
-+}
-+
-+static int __page_hole_offset(struct page *page, unsigned offset)
-+{
-+	struct bch_page_state *s = bch2_page_state(page);
-+	unsigned i;
-+
-+	if (!s)
-+		return 0;
-+
-+	for (i = offset >> 9; i < PAGE_SECTORS; i++)
-+		if (s->s[i].state < SECTOR_DIRTY)
-+			return i << 9;
-+
-+	return -1;
-+}
-+
-+static loff_t page_hole_offset(struct address_space *mapping, loff_t offset)
-+{
-+	pgoff_t index = offset >> PAGE_SHIFT;
-+	struct page *page;
-+	int pg_offset;
-+	loff_t ret = -1;
-+
-+	page = find_lock_entry(mapping, index);
-+	if (!page || xa_is_value(page))
-+		return offset;
-+
-+	pg_offset = __page_hole_offset(page, offset & (PAGE_SIZE - 1));
-+	if (pg_offset >= 0)
-+		ret = ((loff_t) index << PAGE_SHIFT) + pg_offset;
-+
-+	unlock_page(page);
-+
-+	return ret;
-+}
-+
-+static loff_t bch2_seek_pagecache_hole(struct inode *vinode,
-+				       loff_t start_offset,
-+				       loff_t end_offset)
-+{
-+	struct address_space *mapping = vinode->i_mapping;
-+	loff_t offset = start_offset, hole;
-+
-+	while (offset < end_offset) {
-+		hole = page_hole_offset(mapping, offset);
-+		if (hole >= 0 && hole <= end_offset)
-+			return max(start_offset, hole);
-+
-+		offset += PAGE_SIZE;
-+		offset &= PAGE_MASK;
-+	}
-+
-+	return end_offset;
-+}
-+
-+static loff_t bch2_seek_hole(struct file *file, u64 offset)
-+{
-+	struct bch_inode_info *inode = file_bch_inode(file);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	u64 isize, next_hole = MAX_LFS_FILESIZE;
-+	int ret;
-+
-+	isize = i_size_read(&inode->v);
-+	if (offset >= isize)
-+		return -ENXIO;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
-+			   POS(inode->v.i_ino, offset >> 9),
-+			   BTREE_ITER_SLOTS, k, ret) {
-+		if (k.k->p.inode != inode->v.i_ino) {
-+			next_hole = bch2_seek_pagecache_hole(&inode->v,
-+					offset, MAX_LFS_FILESIZE);
-+			break;
-+		} else if (!bkey_extent_is_data(k.k)) {
-+			next_hole = bch2_seek_pagecache_hole(&inode->v,
-+					max(offset, bkey_start_offset(k.k) << 9),
-+					k.k->p.offset << 9);
-+
-+			if (next_hole < k.k->p.offset << 9)
-+				break;
-+		} else {
-+			offset = max(offset, bkey_start_offset(k.k) << 9);
-+		}
-+	}
-+
-+	ret = bch2_trans_exit(&trans) ?: ret;
-+	if (ret)
-+		return ret;
-+
-+	if (next_hole > isize)
-+		next_hole = isize;
-+
-+	return vfs_setpos(file, next_hole, MAX_LFS_FILESIZE);
-+}
-+
-+loff_t bch2_llseek(struct file *file, loff_t offset, int whence)
-+{
-+	switch (whence) {
-+	case SEEK_SET:
-+	case SEEK_CUR:
-+	case SEEK_END:
-+		return generic_file_llseek(file, offset, whence);
-+	case SEEK_DATA:
-+		return bch2_seek_data(file, offset);
-+	case SEEK_HOLE:
-+		return bch2_seek_hole(file, offset);
-+	}
-+
-+	return -EINVAL;
-+}
-+
-+void bch2_fs_fsio_exit(struct bch_fs *c)
-+{
-+	bioset_exit(&c->dio_write_bioset);
-+	bioset_exit(&c->dio_read_bioset);
-+	bioset_exit(&c->writepage_bioset);
-+}
-+
-+int bch2_fs_fsio_init(struct bch_fs *c)
-+{
-+	int ret = 0;
-+
-+	pr_verbose_init(c->opts, "");
-+
-+	if (bioset_init(&c->writepage_bioset,
-+			4, offsetof(struct bch_writepage_io, op.wbio.bio),
-+			BIOSET_NEED_BVECS) ||
-+	    bioset_init(&c->dio_read_bioset,
-+			4, offsetof(struct dio_read, rbio.bio),
-+			BIOSET_NEED_BVECS) ||
-+	    bioset_init(&c->dio_write_bioset,
-+			4, offsetof(struct dio_write, op.wbio.bio),
-+			BIOSET_NEED_BVECS))
-+		ret = -ENOMEM;
-+
-+	pr_verbose_init(c->opts, "ret %i", ret);
-+	return ret;
-+}
-+
-+#endif /* NO_BCACHEFS_FS */
-diff --git a/fs/bcachefs/fs-io.h b/fs/bcachefs/fs-io.h
-new file mode 100644
-index 000000000000..7063556d289b
---- /dev/null
-+++ b/fs/bcachefs/fs-io.h
-@@ -0,0 +1,57 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_FS_IO_H
-+#define _BCACHEFS_FS_IO_H
-+
-+#ifndef NO_BCACHEFS_FS
-+
-+#include "buckets.h"
-+#include "io_types.h"
-+
-+#include <linux/uio.h>
-+
-+struct quota_res;
-+
-+int __must_check bch2_write_inode_size(struct bch_fs *,
-+				       struct bch_inode_info *,
-+				       loff_t, unsigned);
-+
-+int bch2_writepage(struct page *, struct writeback_control *);
-+int bch2_readpage(struct file *, struct page *);
-+
-+int bch2_writepages(struct address_space *, struct writeback_control *);
-+int bch2_readpages(struct file *, struct address_space *,
-+		   struct list_head *, unsigned);
-+
-+int bch2_write_begin(struct file *, struct address_space *, loff_t,
-+		     unsigned, unsigned, struct page **, void **);
-+int bch2_write_end(struct file *, struct address_space *, loff_t,
-+		   unsigned, unsigned, struct page *, void *);
-+
-+ssize_t bch2_read_iter(struct kiocb *, struct iov_iter *);
-+ssize_t bch2_write_iter(struct kiocb *, struct iov_iter *);
-+
-+int bch2_fsync(struct file *, loff_t, loff_t, int);
-+
-+int bch2_truncate(struct bch_inode_info *, struct iattr *);
-+long bch2_fallocate_dispatch(struct file *, int, loff_t, loff_t);
-+
-+loff_t bch2_remap_file_range(struct file *, loff_t, struct file *,
-+			     loff_t, loff_t, unsigned);
-+
-+loff_t bch2_llseek(struct file *, loff_t, int);
-+
-+vm_fault_t bch2_page_fault(struct vm_fault *);
-+vm_fault_t bch2_page_mkwrite(struct vm_fault *);
-+void bch2_invalidatepage(struct page *, unsigned int, unsigned int);
-+int bch2_releasepage(struct page *, gfp_t);
-+int bch2_migrate_page(struct address_space *, struct page *,
-+		      struct page *, enum migrate_mode);
-+
-+void bch2_fs_fsio_exit(struct bch_fs *);
-+int bch2_fs_fsio_init(struct bch_fs *);
-+#else
-+static inline void bch2_fs_fsio_exit(struct bch_fs *c) {}
-+static inline int bch2_fs_fsio_init(struct bch_fs *c) { return 0; }
-+#endif
-+
-+#endif /* _BCACHEFS_FS_IO_H */
-diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c
-new file mode 100644
-index 000000000000..0873d2f0928c
---- /dev/null
-+++ b/fs/bcachefs/fs-ioctl.c
-@@ -0,0 +1,312 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#ifndef NO_BCACHEFS_FS
-+
-+#include "bcachefs.h"
-+#include "chardev.h"
-+#include "dirent.h"
-+#include "fs.h"
-+#include "fs-common.h"
-+#include "fs-ioctl.h"
-+#include "quota.h"
-+
-+#include <linux/compat.h>
-+#include <linux/mount.h>
-+
-+#define FS_IOC_GOINGDOWN	     _IOR('X', 125, __u32)
-+
-+struct flags_set {
-+	unsigned		mask;
-+	unsigned		flags;
-+
-+	unsigned		projid;
-+};
-+
-+static int bch2_inode_flags_set(struct bch_inode_info *inode,
-+				struct bch_inode_unpacked *bi,
-+				void *p)
-+{
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	/*
-+	 * We're relying on btree locking here for exclusion with other ioctl
-+	 * calls - use the flags in the btree (@bi), not inode->i_flags:
-+	 */
-+	struct flags_set *s = p;
-+	unsigned newflags = s->flags;
-+	unsigned oldflags = bi->bi_flags & s->mask;
-+
-+	if (((newflags ^ oldflags) & (BCH_INODE_APPEND|BCH_INODE_IMMUTABLE)) &&
-+	    !capable(CAP_LINUX_IMMUTABLE))
-+		return -EPERM;
-+
-+	if (!S_ISREG(bi->bi_mode) &&
-+	    !S_ISDIR(bi->bi_mode) &&
-+	    (newflags & (BCH_INODE_NODUMP|BCH_INODE_NOATIME)) != newflags)
-+		return -EINVAL;
-+
-+	bi->bi_flags &= ~s->mask;
-+	bi->bi_flags |= newflags;
-+
-+	bi->bi_ctime = timespec_to_bch2_time(c, current_time(&inode->v));
-+	return 0;
-+}
-+
-+static int bch2_ioc_getflags(struct bch_inode_info *inode, int __user *arg)
-+{
-+	unsigned flags = map_flags(bch_flags_to_uflags, inode->ei_inode.bi_flags);
-+
-+	return put_user(flags, arg);
-+}
-+
-+static int bch2_ioc_setflags(struct bch_fs *c,
-+			     struct file *file,
-+			     struct bch_inode_info *inode,
-+			     void __user *arg)
-+{
-+	struct flags_set s = { .mask = map_defined(bch_flags_to_uflags) };
-+	unsigned uflags;
-+	int ret;
-+
-+	if (get_user(uflags, (int __user *) arg))
-+		return -EFAULT;
-+
-+	s.flags = map_flags_rev(bch_flags_to_uflags, uflags);
-+	if (uflags)
-+		return -EOPNOTSUPP;
-+
-+	ret = mnt_want_write_file(file);
-+	if (ret)
-+		return ret;
-+
-+	inode_lock(&inode->v);
-+	if (!inode_owner_or_capable(&inode->v)) {
-+		ret = -EACCES;
-+		goto setflags_out;
-+	}
-+
-+	mutex_lock(&inode->ei_update_lock);
-+	ret = bch2_write_inode(c, inode, bch2_inode_flags_set, &s,
-+			       ATTR_CTIME);
-+	mutex_unlock(&inode->ei_update_lock);
-+
-+setflags_out:
-+	inode_unlock(&inode->v);
-+	mnt_drop_write_file(file);
-+	return ret;
-+}
-+
-+static int bch2_ioc_fsgetxattr(struct bch_inode_info *inode,
-+			       struct fsxattr __user *arg)
-+{
-+	struct fsxattr fa = { 0 };
-+
-+	fa.fsx_xflags = map_flags(bch_flags_to_xflags, inode->ei_inode.bi_flags);
-+	fa.fsx_projid = inode->ei_qid.q[QTYP_PRJ];
-+
-+	return copy_to_user(arg, &fa, sizeof(fa));
-+}
-+
-+static int fssetxattr_inode_update_fn(struct bch_inode_info *inode,
-+				      struct bch_inode_unpacked *bi,
-+				      void *p)
-+{
-+	struct flags_set *s = p;
-+
-+	if (s->projid != bi->bi_project) {
-+		bi->bi_fields_set |= 1U << Inode_opt_project;
-+		bi->bi_project = s->projid;
-+	}
-+
-+	return bch2_inode_flags_set(inode, bi, p);
-+}
-+
-+static int bch2_ioc_fssetxattr(struct bch_fs *c,
-+			       struct file *file,
-+			       struct bch_inode_info *inode,
-+			       struct fsxattr __user *arg)
-+{
-+	struct flags_set s = { .mask = map_defined(bch_flags_to_xflags) };
-+	struct fsxattr fa;
-+	int ret;
-+
-+	if (copy_from_user(&fa, arg, sizeof(fa)))
-+		return -EFAULT;
-+
-+	s.flags = map_flags_rev(bch_flags_to_xflags, fa.fsx_xflags);
-+	if (fa.fsx_xflags)
-+		return -EOPNOTSUPP;
-+
-+	if (fa.fsx_projid >= U32_MAX)
-+		return -EINVAL;
-+
-+	/*
-+	 * inode fields accessible via the xattr interface are stored with a +1
-+	 * bias, so that 0 means unset:
-+	 */
-+	s.projid = fa.fsx_projid + 1;
-+
-+	ret = mnt_want_write_file(file);
-+	if (ret)
-+		return ret;
-+
-+	inode_lock(&inode->v);
-+	if (!inode_owner_or_capable(&inode->v)) {
-+		ret = -EACCES;
-+		goto err;
-+	}
-+
-+	mutex_lock(&inode->ei_update_lock);
-+	ret = bch2_set_projid(c, inode, fa.fsx_projid);
-+	if (ret)
-+		goto err_unlock;
-+
-+	ret = bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s,
-+			       ATTR_CTIME);
-+err_unlock:
-+	mutex_unlock(&inode->ei_update_lock);
-+err:
-+	inode_unlock(&inode->v);
-+	mnt_drop_write_file(file);
-+	return ret;
-+}
-+
-+static int bch2_reinherit_attrs_fn(struct bch_inode_info *inode,
-+				   struct bch_inode_unpacked *bi,
-+				   void *p)
-+{
-+	struct bch_inode_info *dir = p;
-+
-+	return !bch2_reinherit_attrs(bi, &dir->ei_inode);
-+}
-+
-+static int bch2_ioc_reinherit_attrs(struct bch_fs *c,
-+				    struct file *file,
-+				    struct bch_inode_info *src,
-+				    const char __user *name)
-+{
-+	struct bch_inode_info *dst;
-+	struct inode *vinode = NULL;
-+	char *kname = NULL;
-+	struct qstr qstr;
-+	int ret = 0;
-+	u64 inum;
-+
-+	kname = kmalloc(BCH_NAME_MAX + 1, GFP_KERNEL);
-+	if (!kname)
-+		return -ENOMEM;
-+
-+	ret = strncpy_from_user(kname, name, BCH_NAME_MAX);
-+	if (unlikely(ret < 0))
-+		goto err1;
-+
-+	qstr.len	= ret;
-+	qstr.name	= kname;
-+
-+	ret = -ENOENT;
-+	inum = bch2_dirent_lookup(c, src->v.i_ino,
-+				  &src->ei_str_hash,
-+				  &qstr);
-+	if (!inum)
-+		goto err1;
-+
-+	vinode = bch2_vfs_inode_get(c, inum);
-+	ret = PTR_ERR_OR_ZERO(vinode);
-+	if (ret)
-+		goto err1;
-+
-+	dst = to_bch_ei(vinode);
-+
-+	ret = mnt_want_write_file(file);
-+	if (ret)
-+		goto err2;
-+
-+	bch2_lock_inodes(INODE_UPDATE_LOCK, src, dst);
-+
-+	if (inode_attr_changing(src, dst, Inode_opt_project)) {
-+		ret = bch2_fs_quota_transfer(c, dst,
-+					     src->ei_qid,
-+					     1 << QTYP_PRJ,
-+					     KEY_TYPE_QUOTA_PREALLOC);
-+		if (ret)
-+			goto err3;
-+	}
-+
-+	ret = bch2_write_inode(c, dst, bch2_reinherit_attrs_fn, src, 0);
-+err3:
-+	bch2_unlock_inodes(INODE_UPDATE_LOCK, src, dst);
-+
-+	/* return true if we did work */
-+	if (ret >= 0)
-+		ret = !ret;
-+
-+	mnt_drop_write_file(file);
-+err2:
-+	iput(vinode);
-+err1:
-+	kfree(kname);
-+
-+	return ret;
-+}
-+
-+long bch2_fs_file_ioctl(struct file *file, unsigned cmd, unsigned long arg)
-+{
-+	struct bch_inode_info *inode = file_bch_inode(file);
-+	struct super_block *sb = inode->v.i_sb;
-+	struct bch_fs *c = sb->s_fs_info;
-+
-+	switch (cmd) {
-+	case FS_IOC_GETFLAGS:
-+		return bch2_ioc_getflags(inode, (int __user *) arg);
-+
-+	case FS_IOC_SETFLAGS:
-+		return bch2_ioc_setflags(c, file, inode, (int __user *) arg);
-+
-+	case FS_IOC_FSGETXATTR:
-+		return bch2_ioc_fsgetxattr(inode, (void __user *) arg);
-+	case FS_IOC_FSSETXATTR:
-+		return bch2_ioc_fssetxattr(c, file, inode,
-+					   (void __user *) arg);
-+
-+	case BCHFS_IOC_REINHERIT_ATTRS:
-+		return bch2_ioc_reinherit_attrs(c, file, inode,
-+						(void __user *) arg);
-+
-+	case FS_IOC_GETVERSION:
-+		return -ENOTTY;
-+	case FS_IOC_SETVERSION:
-+		return -ENOTTY;
-+
-+	case FS_IOC_GOINGDOWN:
-+		if (!capable(CAP_SYS_ADMIN))
-+			return -EPERM;
-+
-+		down_write(&sb->s_umount);
-+		sb->s_flags |= SB_RDONLY;
-+		if (bch2_fs_emergency_read_only(c))
-+			bch_err(c, "emergency read only due to ioctl");
-+		up_write(&sb->s_umount);
-+		return 0;
-+
-+	default:
-+		return bch2_fs_ioctl(c, cmd, (void __user *) arg);
-+	}
-+}
-+
-+#ifdef CONFIG_COMPAT
-+long bch2_compat_fs_ioctl(struct file *file, unsigned cmd, unsigned long arg)
-+{
-+	/* These are just misnamed, they actually get/put from/to user an int */
-+	switch (cmd) {
-+	case FS_IOC_GETFLAGS:
-+		cmd = FS_IOC_GETFLAGS;
-+		break;
-+	case FS_IOC32_SETFLAGS:
-+		cmd = FS_IOC_SETFLAGS;
-+		break;
-+	default:
-+		return -ENOIOCTLCMD;
-+	}
-+	return bch2_fs_file_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
-+}
-+#endif
-+
-+#endif /* NO_BCACHEFS_FS */
-diff --git a/fs/bcachefs/fs-ioctl.h b/fs/bcachefs/fs-ioctl.h
-new file mode 100644
-index 000000000000..f201980ef2c3
---- /dev/null
-+++ b/fs/bcachefs/fs-ioctl.h
-@@ -0,0 +1,81 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_FS_IOCTL_H
-+#define _BCACHEFS_FS_IOCTL_H
-+
-+/* Inode flags: */
-+
-+/* bcachefs inode flags -> vfs inode flags: */
-+static const unsigned bch_flags_to_vfs[] = {
-+	[__BCH_INODE_SYNC]	= S_SYNC,
-+	[__BCH_INODE_IMMUTABLE]	= S_IMMUTABLE,
-+	[__BCH_INODE_APPEND]	= S_APPEND,
-+	[__BCH_INODE_NOATIME]	= S_NOATIME,
-+};
-+
-+/* bcachefs inode flags -> FS_IOC_GETFLAGS: */
-+static const unsigned bch_flags_to_uflags[] = {
-+	[__BCH_INODE_SYNC]	= FS_SYNC_FL,
-+	[__BCH_INODE_IMMUTABLE]	= FS_IMMUTABLE_FL,
-+	[__BCH_INODE_APPEND]	= FS_APPEND_FL,
-+	[__BCH_INODE_NODUMP]	= FS_NODUMP_FL,
-+	[__BCH_INODE_NOATIME]	= FS_NOATIME_FL,
-+};
-+
-+/* bcachefs inode flags -> FS_IOC_FSGETXATTR: */
-+static const unsigned bch_flags_to_xflags[] = {
-+	[__BCH_INODE_SYNC]	= FS_XFLAG_SYNC,
-+	[__BCH_INODE_IMMUTABLE]	= FS_XFLAG_IMMUTABLE,
-+	[__BCH_INODE_APPEND]	= FS_XFLAG_APPEND,
-+	[__BCH_INODE_NODUMP]	= FS_XFLAG_NODUMP,
-+	[__BCH_INODE_NOATIME]	= FS_XFLAG_NOATIME,
-+	//[__BCH_INODE_PROJINHERIT] = FS_XFLAG_PROJINHERIT;
-+};
-+
-+#define set_flags(_map, _in, _out)					\
-+do {									\
-+	unsigned _i;							\
-+									\
-+	for (_i = 0; _i < ARRAY_SIZE(_map); _i++)			\
-+		if ((_in) & (1 << _i))					\
-+			(_out) |= _map[_i];				\
-+		else							\
-+			(_out) &= ~_map[_i];				\
-+} while (0)
-+
-+#define map_flags(_map, _in)						\
-+({									\
-+	unsigned _out = 0;						\
-+									\
-+	set_flags(_map, _in, _out);					\
-+	_out;								\
-+})
-+
-+#define map_flags_rev(_map, _in)					\
-+({									\
-+	unsigned _i, _out = 0;						\
-+									\
-+	for (_i = 0; _i < ARRAY_SIZE(_map); _i++)			\
-+		if ((_in) & _map[_i]) {					\
-+			(_out) |= 1 << _i;				\
-+			(_in) &= ~_map[_i];				\
-+		}							\
-+	(_out);								\
-+})
-+
-+#define map_defined(_map)						\
-+({									\
-+	unsigned _in = ~0;						\
-+									\
-+	map_flags_rev(_map, _in);					\
-+})
-+
-+/* Set VFS inode flags from bcachefs inode: */
-+static inline void bch2_inode_flags_to_vfs(struct bch_inode_info *inode)
-+{
-+	set_flags(bch_flags_to_vfs, inode->ei_inode.bi_flags, inode->v.i_flags);
-+}
-+
-+long bch2_fs_file_ioctl(struct file *, unsigned, unsigned long);
-+long bch2_compat_fs_ioctl(struct file *, unsigned, unsigned long);
-+
-+#endif /* _BCACHEFS_FS_IOCTL_H */
-diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
-new file mode 100644
-index 000000000000..e504e6b19abe
---- /dev/null
-+++ b/fs/bcachefs/fs.c
-@@ -0,0 +1,1628 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#ifndef NO_BCACHEFS_FS
-+
-+#include "bcachefs.h"
-+#include "acl.h"
-+#include "bkey_on_stack.h"
-+#include "btree_update.h"
-+#include "buckets.h"
-+#include "chardev.h"
-+#include "dirent.h"
-+#include "extents.h"
-+#include "fs.h"
-+#include "fs-common.h"
-+#include "fs-io.h"
-+#include "fs-ioctl.h"
-+#include "fsck.h"
-+#include "inode.h"
-+#include "io.h"
-+#include "journal.h"
-+#include "keylist.h"
-+#include "quota.h"
-+#include "super.h"
-+#include "xattr.h"
-+
-+#include <linux/aio.h>
-+#include <linux/backing-dev.h>
-+#include <linux/exportfs.h>
-+#include <linux/fiemap.h>
-+#include <linux/module.h>
-+#include <linux/posix_acl.h>
-+#include <linux/random.h>
-+#include <linux/statfs.h>
-+#include <linux/xattr.h>
-+
-+static struct kmem_cache *bch2_inode_cache;
-+
-+static void bch2_vfs_inode_init(struct bch_fs *,
-+				struct bch_inode_info *,
-+				struct bch_inode_unpacked *);
-+
-+static void journal_seq_copy(struct bch_inode_info *dst,
-+			     u64 journal_seq)
-+{
-+	u64 old, v = READ_ONCE(dst->ei_journal_seq);
-+
-+	do {
-+		old = v;
-+
-+		if (old >= journal_seq)
-+			break;
-+	} while ((v = cmpxchg(&dst->ei_journal_seq, old, journal_seq)) != old);
-+}
-+
-+static void __pagecache_lock_put(struct pagecache_lock *lock, long i)
-+{
-+	BUG_ON(atomic_long_read(&lock->v) == 0);
-+
-+	if (atomic_long_sub_return_release(i, &lock->v) == 0)
-+		wake_up_all(&lock->wait);
-+}
-+
-+static bool __pagecache_lock_tryget(struct pagecache_lock *lock, long i)
-+{
-+	long v = atomic_long_read(&lock->v), old;
-+
-+	do {
-+		old = v;
-+
-+		if (i > 0 ? v < 0 : v > 0)
-+			return false;
-+	} while ((v = atomic_long_cmpxchg_acquire(&lock->v,
-+					old, old + i)) != old);
-+	return true;
-+}
-+
-+static void __pagecache_lock_get(struct pagecache_lock *lock, long i)
-+{
-+	wait_event(lock->wait, __pagecache_lock_tryget(lock, i));
-+}
-+
-+void bch2_pagecache_add_put(struct pagecache_lock *lock)
-+{
-+	__pagecache_lock_put(lock, 1);
-+}
-+
-+void bch2_pagecache_add_get(struct pagecache_lock *lock)
-+{
-+	__pagecache_lock_get(lock, 1);
-+}
-+
-+void bch2_pagecache_block_put(struct pagecache_lock *lock)
-+{
-+	__pagecache_lock_put(lock, -1);
-+}
-+
-+void bch2_pagecache_block_get(struct pagecache_lock *lock)
-+{
-+	__pagecache_lock_get(lock, -1);
-+}
-+
-+void bch2_inode_update_after_write(struct bch_fs *c,
-+				   struct bch_inode_info *inode,
-+				   struct bch_inode_unpacked *bi,
-+				   unsigned fields)
-+{
-+	set_nlink(&inode->v, bch2_inode_nlink_get(bi));
-+	i_uid_write(&inode->v, bi->bi_uid);
-+	i_gid_write(&inode->v, bi->bi_gid);
-+	inode->v.i_mode	= bi->bi_mode;
-+
-+	if (fields & ATTR_ATIME)
-+		inode->v.i_atime = bch2_time_to_timespec(c, bi->bi_atime);
-+	if (fields & ATTR_MTIME)
-+		inode->v.i_mtime = bch2_time_to_timespec(c, bi->bi_mtime);
-+	if (fields & ATTR_CTIME)
-+		inode->v.i_ctime = bch2_time_to_timespec(c, bi->bi_ctime);
-+
-+	inode->ei_inode		= *bi;
-+
-+	bch2_inode_flags_to_vfs(inode);
-+}
-+
-+int __must_check bch2_write_inode(struct bch_fs *c,
-+				  struct bch_inode_info *inode,
-+				  inode_set_fn set,
-+				  void *p, unsigned fields)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bch_inode_unpacked inode_u;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+retry:
-+	bch2_trans_begin(&trans);
-+
-+	iter = bch2_inode_peek(&trans, &inode_u, inode->v.i_ino,
-+			       BTREE_ITER_INTENT);
-+	ret   = PTR_ERR_OR_ZERO(iter) ?:
-+		(set ? set(inode, &inode_u, p) : 0) ?:
-+		bch2_inode_write(&trans, iter, &inode_u) ?:
-+		bch2_trans_commit(&trans, NULL,
-+				  &inode->ei_journal_seq,
-+				  BTREE_INSERT_NOUNLOCK|
-+				  BTREE_INSERT_NOFAIL);
-+
-+	/*
-+	 * the btree node lock protects inode->ei_inode, not ei_update_lock;
-+	 * this is important for inode updates via bchfs_write_index_update
-+	 */
-+	if (!ret)
-+		bch2_inode_update_after_write(c, inode, &inode_u, fields);
-+
-+	bch2_trans_iter_put(&trans, iter);
-+
-+	if (ret == -EINTR)
-+		goto retry;
-+
-+	bch2_trans_exit(&trans);
-+	return ret < 0 ? ret : 0;
-+}
-+
-+int bch2_fs_quota_transfer(struct bch_fs *c,
-+			   struct bch_inode_info *inode,
-+			   struct bch_qid new_qid,
-+			   unsigned qtypes,
-+			   enum quota_acct_mode mode)
-+{
-+	unsigned i;
-+	int ret;
-+
-+	qtypes &= enabled_qtypes(c);
-+
-+	for (i = 0; i < QTYP_NR; i++)
-+		if (new_qid.q[i] == inode->ei_qid.q[i])
-+			qtypes &= ~(1U << i);
-+
-+	if (!qtypes)
-+		return 0;
-+
-+	mutex_lock(&inode->ei_quota_lock);
-+
-+	ret = bch2_quota_transfer(c, qtypes, new_qid,
-+				  inode->ei_qid,
-+				  inode->v.i_blocks +
-+				  inode->ei_quota_reserved,
-+				  mode);
-+	if (!ret)
-+		for (i = 0; i < QTYP_NR; i++)
-+			if (qtypes & (1 << i))
-+				inode->ei_qid.q[i] = new_qid.q[i];
-+
-+	mutex_unlock(&inode->ei_quota_lock);
-+
-+	return ret;
-+}
-+
-+struct inode *bch2_vfs_inode_get(struct bch_fs *c, u64 inum)
-+{
-+	struct bch_inode_unpacked inode_u;
-+	struct bch_inode_info *inode;
-+	int ret;
-+
-+	inode = to_bch_ei(iget_locked(c->vfs_sb, inum));
-+	if (unlikely(!inode))
-+		return ERR_PTR(-ENOMEM);
-+	if (!(inode->v.i_state & I_NEW))
-+		return &inode->v;
-+
-+	ret = bch2_inode_find_by_inum(c, inum, &inode_u);
-+	if (ret) {
-+		iget_failed(&inode->v);
-+		return ERR_PTR(ret);
-+	}
-+
-+	bch2_vfs_inode_init(c, inode, &inode_u);
-+
-+	inode->ei_journal_seq = bch2_inode_journal_seq(&c->journal, inum);
-+
-+	unlock_new_inode(&inode->v);
-+
-+	return &inode->v;
-+}
-+
-+static struct bch_inode_info *
-+__bch2_create(struct bch_inode_info *dir, struct dentry *dentry,
-+	      umode_t mode, dev_t rdev, bool tmpfile)
-+{
-+	struct bch_fs *c = dir->v.i_sb->s_fs_info;
-+	struct user_namespace *ns = dir->v.i_sb->s_user_ns;
-+	struct btree_trans trans;
-+	struct bch_inode_unpacked dir_u;
-+	struct bch_inode_info *inode, *old;
-+	struct bch_inode_unpacked inode_u;
-+	struct posix_acl *default_acl = NULL, *acl = NULL;
-+	u64 journal_seq = 0;
-+	int ret;
-+
-+	/*
-+	 * preallocate acls + vfs inode before btree transaction, so that
-+	 * nothing can fail after the transaction succeeds:
-+	 */
-+#ifdef CONFIG_BCACHEFS_POSIX_ACL
-+	ret = posix_acl_create(&dir->v, &mode, &default_acl, &acl);
-+	if (ret)
-+		return ERR_PTR(ret);
-+#endif
-+	inode = to_bch_ei(new_inode(c->vfs_sb));
-+	if (unlikely(!inode)) {
-+		inode = ERR_PTR(-ENOMEM);
-+		goto err;
-+	}
-+
-+	bch2_inode_init_early(c, &inode_u);
-+
-+	if (!tmpfile)
-+		mutex_lock(&dir->ei_update_lock);
-+
-+	bch2_trans_init(&trans, c, 8, 1024);
-+retry:
-+	bch2_trans_begin(&trans);
-+
-+	ret   = bch2_create_trans(&trans, dir->v.i_ino, &dir_u, &inode_u,
-+				  !tmpfile ? &dentry->d_name : NULL,
-+				  from_kuid(ns, current_fsuid()),
-+				  from_kgid(ns, current_fsgid()),
-+				  mode, rdev,
-+				  default_acl, acl) ?:
-+		bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1,
-+				KEY_TYPE_QUOTA_PREALLOC);
-+	if (unlikely(ret))
-+		goto err_before_quota;
-+
-+	ret   = bch2_trans_commit(&trans, NULL, &journal_seq,
-+				  BTREE_INSERT_NOUNLOCK);
-+	if (unlikely(ret)) {
-+		bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1,
-+				KEY_TYPE_QUOTA_WARN);
-+err_before_quota:
-+		if (ret == -EINTR)
-+			goto retry;
-+		goto err_trans;
-+	}
-+
-+	if (!tmpfile) {
-+		bch2_inode_update_after_write(c, dir, &dir_u,
-+					      ATTR_MTIME|ATTR_CTIME);
-+		journal_seq_copy(dir, journal_seq);
-+		mutex_unlock(&dir->ei_update_lock);
-+	}
-+
-+	bch2_vfs_inode_init(c, inode, &inode_u);
-+	journal_seq_copy(inode, journal_seq);
-+
-+	set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl);
-+	set_cached_acl(&inode->v, ACL_TYPE_DEFAULT, default_acl);
-+
-+	/*
-+	 * we must insert the new inode into the inode cache before calling
-+	 * bch2_trans_exit() and dropping locks, else we could race with another
-+	 * thread pulling the inode in and modifying it:
-+	 */
-+
-+	old = to_bch_ei(insert_inode_locked2(&inode->v));
-+	if (unlikely(old)) {
-+		/*
-+		 * We raced, another process pulled the new inode into cache
-+		 * before us:
-+		 */
-+		journal_seq_copy(old, journal_seq);
-+		make_bad_inode(&inode->v);
-+		iput(&inode->v);
-+
-+		inode = old;
-+	} else {
-+		/*
-+		 * we really don't want insert_inode_locked2() to be setting
-+		 * I_NEW...
-+		 */
-+		unlock_new_inode(&inode->v);
-+	}
-+
-+	bch2_trans_exit(&trans);
-+err:
-+	posix_acl_release(default_acl);
-+	posix_acl_release(acl);
-+	return inode;
-+err_trans:
-+	if (!tmpfile)
-+		mutex_unlock(&dir->ei_update_lock);
-+
-+	bch2_trans_exit(&trans);
-+	make_bad_inode(&inode->v);
-+	iput(&inode->v);
-+	inode = ERR_PTR(ret);
-+	goto err;
-+}
-+
-+/* methods */
-+
-+static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry,
-+				  unsigned int flags)
-+{
-+	struct bch_fs *c = vdir->i_sb->s_fs_info;
-+	struct bch_inode_info *dir = to_bch_ei(vdir);
-+	struct inode *vinode = NULL;
-+	u64 inum;
-+
-+	inum = bch2_dirent_lookup(c, dir->v.i_ino,
-+				  &dir->ei_str_hash,
-+				  &dentry->d_name);
-+
-+	if (inum)
-+		vinode = bch2_vfs_inode_get(c, inum);
-+
-+	return d_splice_alias(vinode, dentry);
-+}
-+
-+static int bch2_mknod(struct inode *vdir, struct dentry *dentry,
-+		      umode_t mode, dev_t rdev)
-+{
-+	struct bch_inode_info *inode =
-+		__bch2_create(to_bch_ei(vdir), dentry, mode, rdev, false);
-+
-+	if (IS_ERR(inode))
-+		return PTR_ERR(inode);
-+
-+	d_instantiate(dentry, &inode->v);
-+	return 0;
-+}
-+
-+static int bch2_create(struct inode *vdir, struct dentry *dentry,
-+		       umode_t mode, bool excl)
-+{
-+	return bch2_mknod(vdir, dentry, mode|S_IFREG, 0);
-+}
-+
-+static int __bch2_link(struct bch_fs *c,
-+		       struct bch_inode_info *inode,
-+		       struct bch_inode_info *dir,
-+		       struct dentry *dentry)
-+{
-+	struct btree_trans trans;
-+	struct bch_inode_unpacked dir_u, inode_u;
-+	int ret;
-+
-+	mutex_lock(&inode->ei_update_lock);
-+	bch2_trans_init(&trans, c, 4, 1024);
-+
-+	do {
-+		bch2_trans_begin(&trans);
-+		ret   = bch2_link_trans(&trans,
-+					dir->v.i_ino,
-+					inode->v.i_ino, &dir_u, &inode_u,
-+					&dentry->d_name) ?:
-+			bch2_trans_commit(&trans, NULL,
-+					&inode->ei_journal_seq,
-+					BTREE_INSERT_NOUNLOCK);
-+	} while (ret == -EINTR);
-+
-+	if (likely(!ret)) {
-+		BUG_ON(inode_u.bi_inum != inode->v.i_ino);
-+
-+		journal_seq_copy(inode, dir->ei_journal_seq);
-+		bch2_inode_update_after_write(c, dir, &dir_u,
-+					      ATTR_MTIME|ATTR_CTIME);
-+		bch2_inode_update_after_write(c, inode, &inode_u, ATTR_CTIME);
-+	}
-+
-+	bch2_trans_exit(&trans);
-+	mutex_unlock(&inode->ei_update_lock);
-+	return ret;
-+}
-+
-+static int bch2_link(struct dentry *old_dentry, struct inode *vdir,
-+		     struct dentry *dentry)
-+{
-+	struct bch_fs *c = vdir->i_sb->s_fs_info;
-+	struct bch_inode_info *dir = to_bch_ei(vdir);
-+	struct bch_inode_info *inode = to_bch_ei(old_dentry->d_inode);
-+	int ret;
-+
-+	lockdep_assert_held(&inode->v.i_rwsem);
-+
-+	ret = __bch2_link(c, inode, dir, dentry);
-+	if (unlikely(ret))
-+		return ret;
-+
-+	ihold(&inode->v);
-+	d_instantiate(dentry, &inode->v);
-+	return 0;
-+}
-+
-+static int bch2_unlink(struct inode *vdir, struct dentry *dentry)
-+{
-+	struct bch_fs *c = vdir->i_sb->s_fs_info;
-+	struct bch_inode_info *dir = to_bch_ei(vdir);
-+	struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
-+	struct bch_inode_unpacked dir_u, inode_u;
-+	struct btree_trans trans;
-+	int ret;
-+
-+	bch2_lock_inodes(INODE_UPDATE_LOCK, dir, inode);
-+	bch2_trans_init(&trans, c, 4, 1024);
-+
-+	do {
-+		bch2_trans_begin(&trans);
-+
-+		ret   = bch2_unlink_trans(&trans,
-+					  dir->v.i_ino, &dir_u,
-+					  &inode_u, &dentry->d_name) ?:
-+			bch2_trans_commit(&trans, NULL,
-+					  &dir->ei_journal_seq,
-+					  BTREE_INSERT_NOUNLOCK|
-+					  BTREE_INSERT_NOFAIL);
-+	} while (ret == -EINTR);
-+
-+	if (likely(!ret)) {
-+		BUG_ON(inode_u.bi_inum != inode->v.i_ino);
-+
-+		journal_seq_copy(inode, dir->ei_journal_seq);
-+		bch2_inode_update_after_write(c, dir, &dir_u,
-+					      ATTR_MTIME|ATTR_CTIME);
-+		bch2_inode_update_after_write(c, inode, &inode_u,
-+					      ATTR_MTIME);
-+	}
-+
-+	bch2_trans_exit(&trans);
-+	bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode);
-+
-+	return ret;
-+}
-+
-+static int bch2_symlink(struct inode *vdir, struct dentry *dentry,
-+			const char *symname)
-+{
-+	struct bch_fs *c = vdir->i_sb->s_fs_info;
-+	struct bch_inode_info *dir = to_bch_ei(vdir), *inode;
-+	int ret;
-+
-+	inode = __bch2_create(dir, dentry, S_IFLNK|S_IRWXUGO, 0, true);
-+	if (unlikely(IS_ERR(inode)))
-+		return PTR_ERR(inode);
-+
-+	inode_lock(&inode->v);
-+	ret = page_symlink(&inode->v, symname, strlen(symname) + 1);
-+	inode_unlock(&inode->v);
-+
-+	if (unlikely(ret))
-+		goto err;
-+
-+	ret = filemap_write_and_wait_range(inode->v.i_mapping, 0, LLONG_MAX);
-+	if (unlikely(ret))
-+		goto err;
-+
-+	journal_seq_copy(dir, inode->ei_journal_seq);
-+
-+	ret = __bch2_link(c, inode, dir, dentry);
-+	if (unlikely(ret))
-+		goto err;
-+
-+	d_instantiate(dentry, &inode->v);
-+	return 0;
-+err:
-+	iput(&inode->v);
-+	return ret;
-+}
-+
-+static int bch2_mkdir(struct inode *vdir, struct dentry *dentry, umode_t mode)
-+{
-+	return bch2_mknod(vdir, dentry, mode|S_IFDIR, 0);
-+}
-+
-+static int bch2_rename2(struct inode *src_vdir, struct dentry *src_dentry,
-+			struct inode *dst_vdir, struct dentry *dst_dentry,
-+			unsigned flags)
-+{
-+	struct bch_fs *c = src_vdir->i_sb->s_fs_info;
-+	struct bch_inode_info *src_dir = to_bch_ei(src_vdir);
-+	struct bch_inode_info *dst_dir = to_bch_ei(dst_vdir);
-+	struct bch_inode_info *src_inode = to_bch_ei(src_dentry->d_inode);
-+	struct bch_inode_info *dst_inode = to_bch_ei(dst_dentry->d_inode);
-+	struct bch_inode_unpacked dst_dir_u, src_dir_u;
-+	struct bch_inode_unpacked src_inode_u, dst_inode_u;
-+	struct btree_trans trans;
-+	enum bch_rename_mode mode = flags & RENAME_EXCHANGE
-+		? BCH_RENAME_EXCHANGE
-+		: dst_dentry->d_inode
-+		? BCH_RENAME_OVERWRITE : BCH_RENAME;
-+	u64 journal_seq = 0;
-+	int ret;
-+
-+	if (flags & ~(RENAME_NOREPLACE|RENAME_EXCHANGE))
-+		return -EINVAL;
-+
-+	if (mode == BCH_RENAME_OVERWRITE) {
-+		ret = filemap_write_and_wait_range(src_inode->v.i_mapping,
-+						   0, LLONG_MAX);
-+		if (ret)
-+			return ret;
-+	}
-+
-+	bch2_trans_init(&trans, c, 8, 2048);
-+
-+	bch2_lock_inodes(INODE_UPDATE_LOCK,
-+			 src_dir,
-+			 dst_dir,
-+			 src_inode,
-+			 dst_inode);
-+
-+	if (inode_attr_changing(dst_dir, src_inode, Inode_opt_project)) {
-+		ret = bch2_fs_quota_transfer(c, src_inode,
-+					     dst_dir->ei_qid,
-+					     1 << QTYP_PRJ,
-+					     KEY_TYPE_QUOTA_PREALLOC);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	if (mode == BCH_RENAME_EXCHANGE &&
-+	    inode_attr_changing(src_dir, dst_inode, Inode_opt_project)) {
-+		ret = bch2_fs_quota_transfer(c, dst_inode,
-+					     src_dir->ei_qid,
-+					     1 << QTYP_PRJ,
-+					     KEY_TYPE_QUOTA_PREALLOC);
-+		if (ret)
-+			goto err;
-+	}
-+
-+retry:
-+	bch2_trans_begin(&trans);
-+	ret   = bch2_rename_trans(&trans,
-+				  src_dir->v.i_ino, &src_dir_u,
-+				  dst_dir->v.i_ino, &dst_dir_u,
-+				  &src_inode_u,
-+				  &dst_inode_u,
-+				  &src_dentry->d_name,
-+				  &dst_dentry->d_name,
-+				  mode) ?:
-+		bch2_trans_commit(&trans, NULL,
-+				  &journal_seq,
-+				  BTREE_INSERT_NOUNLOCK);
-+	if (ret == -EINTR)
-+		goto retry;
-+	if (unlikely(ret))
-+		goto err;
-+
-+	BUG_ON(src_inode->v.i_ino != src_inode_u.bi_inum);
-+	BUG_ON(dst_inode &&
-+	       dst_inode->v.i_ino != dst_inode_u.bi_inum);
-+
-+	bch2_inode_update_after_write(c, src_dir, &src_dir_u,
-+				      ATTR_MTIME|ATTR_CTIME);
-+	journal_seq_copy(src_dir, journal_seq);
-+
-+	if (src_dir != dst_dir) {
-+		bch2_inode_update_after_write(c, dst_dir, &dst_dir_u,
-+					      ATTR_MTIME|ATTR_CTIME);
-+		journal_seq_copy(dst_dir, journal_seq);
-+	}
-+
-+	bch2_inode_update_after_write(c, src_inode, &src_inode_u,
-+				      ATTR_CTIME);
-+	journal_seq_copy(src_inode, journal_seq);
-+
-+	if (dst_inode) {
-+		bch2_inode_update_after_write(c, dst_inode, &dst_inode_u,
-+					      ATTR_CTIME);
-+		journal_seq_copy(dst_inode, journal_seq);
-+	}
-+err:
-+	bch2_trans_exit(&trans);
-+
-+	bch2_fs_quota_transfer(c, src_inode,
-+			       bch_qid(&src_inode->ei_inode),
-+			       1 << QTYP_PRJ,
-+			       KEY_TYPE_QUOTA_NOCHECK);
-+	if (dst_inode)
-+		bch2_fs_quota_transfer(c, dst_inode,
-+				       bch_qid(&dst_inode->ei_inode),
-+				       1 << QTYP_PRJ,
-+				       KEY_TYPE_QUOTA_NOCHECK);
-+
-+	bch2_unlock_inodes(INODE_UPDATE_LOCK,
-+			   src_dir,
-+			   dst_dir,
-+			   src_inode,
-+			   dst_inode);
-+
-+	return ret;
-+}
-+
-+void bch2_setattr_copy(struct bch_inode_info *inode,
-+		       struct bch_inode_unpacked *bi,
-+		       struct iattr *attr)
-+{
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	unsigned int ia_valid = attr->ia_valid;
-+
-+	if (ia_valid & ATTR_UID)
-+		bi->bi_uid = from_kuid(c->vfs_sb->s_user_ns, attr->ia_uid);
-+	if (ia_valid & ATTR_GID)
-+		bi->bi_gid = from_kgid(c->vfs_sb->s_user_ns, attr->ia_gid);
-+
-+	if (ia_valid & ATTR_ATIME)
-+		bi->bi_atime = timespec_to_bch2_time(c, attr->ia_atime);
-+	if (ia_valid & ATTR_MTIME)
-+		bi->bi_mtime = timespec_to_bch2_time(c, attr->ia_mtime);
-+	if (ia_valid & ATTR_CTIME)
-+		bi->bi_ctime = timespec_to_bch2_time(c, attr->ia_ctime);
-+
-+	if (ia_valid & ATTR_MODE) {
-+		umode_t mode = attr->ia_mode;
-+		kgid_t gid = ia_valid & ATTR_GID
-+			? attr->ia_gid
-+			: inode->v.i_gid;
-+
-+		if (!in_group_p(gid) &&
-+		    !capable_wrt_inode_uidgid(&inode->v, CAP_FSETID))
-+			mode &= ~S_ISGID;
-+		bi->bi_mode = mode;
-+	}
-+}
-+
-+static int bch2_setattr_nonsize(struct bch_inode_info *inode,
-+				struct iattr *attr)
-+{
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct bch_qid qid;
-+	struct btree_trans trans;
-+	struct btree_iter *inode_iter;
-+	struct bch_inode_unpacked inode_u;
-+	struct posix_acl *acl = NULL;
-+	int ret;
-+
-+	mutex_lock(&inode->ei_update_lock);
-+
-+	qid = inode->ei_qid;
-+
-+	if (attr->ia_valid & ATTR_UID)
-+		qid.q[QTYP_USR] = from_kuid(&init_user_ns, attr->ia_uid);
-+
-+	if (attr->ia_valid & ATTR_GID)
-+		qid.q[QTYP_GRP] = from_kgid(&init_user_ns, attr->ia_gid);
-+
-+	ret = bch2_fs_quota_transfer(c, inode, qid, ~0,
-+				     KEY_TYPE_QUOTA_PREALLOC);
-+	if (ret)
-+		goto err;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+retry:
-+	bch2_trans_begin(&trans);
-+	kfree(acl);
-+	acl = NULL;
-+
-+	inode_iter = bch2_inode_peek(&trans, &inode_u, inode->v.i_ino,
-+				     BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(inode_iter);
-+	if (ret)
-+		goto btree_err;
-+
-+	bch2_setattr_copy(inode, &inode_u, attr);
-+
-+	if (attr->ia_valid & ATTR_MODE) {
-+		ret = bch2_acl_chmod(&trans, inode, inode_u.bi_mode, &acl);
-+		if (ret)
-+			goto btree_err;
-+	}
-+
-+	ret =   bch2_inode_write(&trans, inode_iter, &inode_u) ?:
-+		bch2_trans_commit(&trans, NULL,
-+				  &inode->ei_journal_seq,
-+				  BTREE_INSERT_NOUNLOCK|
-+				  BTREE_INSERT_NOFAIL);
-+btree_err:
-+	if (ret == -EINTR)
-+		goto retry;
-+	if (unlikely(ret))
-+		goto err_trans;
-+
-+	bch2_inode_update_after_write(c, inode, &inode_u, attr->ia_valid);
-+
-+	if (acl)
-+		set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl);
-+err_trans:
-+	bch2_trans_exit(&trans);
-+err:
-+	mutex_unlock(&inode->ei_update_lock);
-+
-+	return ret;
-+}
-+
-+static int bch2_getattr(const struct path *path, struct kstat *stat,
-+			u32 request_mask, unsigned query_flags)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(d_inode(path->dentry));
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+
-+	stat->dev	= inode->v.i_sb->s_dev;
-+	stat->ino	= inode->v.i_ino;
-+	stat->mode	= inode->v.i_mode;
-+	stat->nlink	= inode->v.i_nlink;
-+	stat->uid	= inode->v.i_uid;
-+	stat->gid	= inode->v.i_gid;
-+	stat->rdev	= inode->v.i_rdev;
-+	stat->size	= i_size_read(&inode->v);
-+	stat->atime	= inode->v.i_atime;
-+	stat->mtime	= inode->v.i_mtime;
-+	stat->ctime	= inode->v.i_ctime;
-+	stat->blksize	= block_bytes(c);
-+	stat->blocks	= inode->v.i_blocks;
-+
-+	if (request_mask & STATX_BTIME) {
-+		stat->result_mask |= STATX_BTIME;
-+		stat->btime = bch2_time_to_timespec(c, inode->ei_inode.bi_otime);
-+	}
-+
-+	if (inode->ei_inode.bi_flags & BCH_INODE_IMMUTABLE)
-+		stat->attributes |= STATX_ATTR_IMMUTABLE;
-+	stat->attributes_mask	 |= STATX_ATTR_IMMUTABLE;
-+
-+	if (inode->ei_inode.bi_flags & BCH_INODE_APPEND)
-+		stat->attributes |= STATX_ATTR_APPEND;
-+	stat->attributes_mask	 |= STATX_ATTR_APPEND;
-+
-+	if (inode->ei_inode.bi_flags & BCH_INODE_NODUMP)
-+		stat->attributes |= STATX_ATTR_NODUMP;
-+	stat->attributes_mask	 |= STATX_ATTR_NODUMP;
-+
-+	return 0;
-+}
-+
-+static int bch2_setattr(struct dentry *dentry, struct iattr *iattr)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
-+	int ret;
-+
-+	lockdep_assert_held(&inode->v.i_rwsem);
-+
-+	ret = setattr_prepare(dentry, iattr);
-+	if (ret)
-+		return ret;
-+
-+	return iattr->ia_valid & ATTR_SIZE
-+		? bch2_truncate(inode, iattr)
-+		: bch2_setattr_nonsize(inode, iattr);
-+}
-+
-+static int bch2_tmpfile(struct inode *vdir, struct dentry *dentry, umode_t mode)
-+{
-+	struct bch_inode_info *inode =
-+		__bch2_create(to_bch_ei(vdir), dentry, mode, 0, true);
-+
-+	if (IS_ERR(inode))
-+		return PTR_ERR(inode);
-+
-+	d_mark_tmpfile(dentry, &inode->v);
-+	d_instantiate(dentry, &inode->v);
-+	return 0;
-+}
-+
-+static int bch2_fill_extent(struct bch_fs *c,
-+			    struct fiemap_extent_info *info,
-+			    struct bkey_s_c k, unsigned flags)
-+{
-+	if (bkey_extent_is_data(k.k)) {
-+		struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+		const union bch_extent_entry *entry;
-+		struct extent_ptr_decoded p;
-+		int ret;
-+
-+		if (k.k->type == KEY_TYPE_reflink_v)
-+			flags |= FIEMAP_EXTENT_SHARED;
-+
-+		bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
-+			int flags2 = 0;
-+			u64 offset = p.ptr.offset;
-+
-+			if (p.crc.compression_type)
-+				flags2 |= FIEMAP_EXTENT_ENCODED;
-+			else
-+				offset += p.crc.offset;
-+
-+			if ((offset & (c->opts.block_size - 1)) ||
-+			    (k.k->size & (c->opts.block_size - 1)))
-+				flags2 |= FIEMAP_EXTENT_NOT_ALIGNED;
-+
-+			ret = fiemap_fill_next_extent(info,
-+						bkey_start_offset(k.k) << 9,
-+						offset << 9,
-+						k.k->size << 9, flags|flags2);
-+			if (ret)
-+				return ret;
-+		}
-+
-+		return 0;
-+	} else if (k.k->type == KEY_TYPE_reservation) {
-+		return fiemap_fill_next_extent(info,
-+					       bkey_start_offset(k.k) << 9,
-+					       0, k.k->size << 9,
-+					       flags|
-+					       FIEMAP_EXTENT_DELALLOC|
-+					       FIEMAP_EXTENT_UNWRITTEN);
-+	} else {
-+		BUG();
-+	}
-+}
-+
-+static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
-+		       u64 start, u64 len)
-+{
-+	struct bch_fs *c = vinode->i_sb->s_fs_info;
-+	struct bch_inode_info *ei = to_bch_ei(vinode);
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct bkey_on_stack cur, prev;
-+	struct bpos end = POS(ei->v.i_ino, (start + len) >> 9);
-+	unsigned offset_into_extent, sectors;
-+	bool have_extent = false;
-+	int ret = 0;
-+
-+	ret = fiemap_prep(&ei->v, info, start, &len, FIEMAP_FLAG_SYNC);
-+	if (ret)
-+		return ret;
-+
-+	if (start + len < start)
-+		return -EINVAL;
-+
-+	bkey_on_stack_init(&cur);
-+	bkey_on_stack_init(&prev);
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
-+				   POS(ei->v.i_ino, start >> 9), 0);
-+retry:
-+	while ((k = bch2_btree_iter_peek(iter)).k &&
-+	       !(ret = bkey_err(k)) &&
-+	       bkey_cmp(iter->pos, end) < 0) {
-+		if (!bkey_extent_is_data(k.k) &&
-+		    k.k->type != KEY_TYPE_reservation) {
-+			bch2_btree_iter_next(iter);
-+			continue;
-+		}
-+
-+		bkey_on_stack_realloc(&cur, c, k.k->u64s);
-+		bkey_on_stack_realloc(&prev, c, k.k->u64s);
-+		bkey_reassemble(cur.k, k);
-+		k = bkey_i_to_s_c(cur.k);
-+
-+		offset_into_extent	= iter->pos.offset -
-+			bkey_start_offset(k.k);
-+		sectors			= k.k->size - offset_into_extent;
-+
-+		ret = bch2_read_indirect_extent(&trans,
-+					&offset_into_extent, &cur);
-+		if (ret)
-+			break;
-+
-+		sectors = min(sectors, k.k->size - offset_into_extent);
-+
-+		if (offset_into_extent)
-+			bch2_cut_front(POS(k.k->p.inode,
-+					   bkey_start_offset(k.k) +
-+					   offset_into_extent),
-+				       cur.k);
-+		bch2_key_resize(&cur.k->k, sectors);
-+		cur.k->k.p = iter->pos;
-+		cur.k->k.p.offset += cur.k->k.size;
-+
-+		if (have_extent) {
-+			ret = bch2_fill_extent(c, info,
-+					bkey_i_to_s_c(prev.k), 0);
-+			if (ret)
-+				break;
-+		}
-+
-+		bkey_copy(prev.k, cur.k);
-+		have_extent = true;
-+
-+		if (k.k->type == KEY_TYPE_reflink_v)
-+			bch2_btree_iter_set_pos(iter, k.k->p);
-+		else
-+			bch2_btree_iter_next(iter);
-+	}
-+
-+	if (ret == -EINTR)
-+		goto retry;
-+
-+	if (!ret && have_extent)
-+		ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k),
-+				       FIEMAP_EXTENT_LAST);
-+
-+	ret = bch2_trans_exit(&trans) ?: ret;
-+	bkey_on_stack_exit(&cur, c);
-+	bkey_on_stack_exit(&prev, c);
-+	return ret < 0 ? ret : 0;
-+}
-+
-+static const struct vm_operations_struct bch_vm_ops = {
-+	.fault		= bch2_page_fault,
-+	.map_pages	= filemap_map_pages,
-+	.page_mkwrite   = bch2_page_mkwrite,
-+};
-+
-+static int bch2_mmap(struct file *file, struct vm_area_struct *vma)
-+{
-+	file_accessed(file);
-+
-+	vma->vm_ops = &bch_vm_ops;
-+	return 0;
-+}
-+
-+/* Directories: */
-+
-+static loff_t bch2_dir_llseek(struct file *file, loff_t offset, int whence)
-+{
-+	return generic_file_llseek_size(file, offset, whence,
-+					S64_MAX, S64_MAX);
-+}
-+
-+static int bch2_vfs_readdir(struct file *file, struct dir_context *ctx)
-+{
-+	struct bch_inode_info *inode = file_bch_inode(file);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+
-+	if (!dir_emit_dots(file, ctx))
-+		return 0;
-+
-+	return bch2_readdir(c, inode->v.i_ino, ctx);
-+}
-+
-+static const struct file_operations bch_file_operations = {
-+	.llseek		= bch2_llseek,
-+	.read_iter	= bch2_read_iter,
-+	.write_iter	= bch2_write_iter,
-+	.mmap		= bch2_mmap,
-+	.open		= generic_file_open,
-+	.fsync		= bch2_fsync,
-+	.splice_read	= generic_file_splice_read,
-+	/*
-+	 * Broken, on v5.3:
-+	.splice_write	= iter_file_splice_write,
-+	*/
-+	.fallocate	= bch2_fallocate_dispatch,
-+	.unlocked_ioctl = bch2_fs_file_ioctl,
-+#ifdef CONFIG_COMPAT
-+	.compat_ioctl	= bch2_compat_fs_ioctl,
-+#endif
-+	.remap_file_range = bch2_remap_file_range,
-+};
-+
-+static const struct inode_operations bch_file_inode_operations = {
-+	.getattr	= bch2_getattr,
-+	.setattr	= bch2_setattr,
-+	.fiemap		= bch2_fiemap,
-+	.listxattr	= bch2_xattr_list,
-+#ifdef CONFIG_BCACHEFS_POSIX_ACL
-+	.get_acl	= bch2_get_acl,
-+	.set_acl	= bch2_set_acl,
-+#endif
-+};
-+
-+static const struct inode_operations bch_dir_inode_operations = {
-+	.lookup		= bch2_lookup,
-+	.create		= bch2_create,
-+	.link		= bch2_link,
-+	.unlink		= bch2_unlink,
-+	.symlink	= bch2_symlink,
-+	.mkdir		= bch2_mkdir,
-+	.rmdir		= bch2_unlink,
-+	.mknod		= bch2_mknod,
-+	.rename		= bch2_rename2,
-+	.getattr	= bch2_getattr,
-+	.setattr	= bch2_setattr,
-+	.tmpfile	= bch2_tmpfile,
-+	.listxattr	= bch2_xattr_list,
-+#ifdef CONFIG_BCACHEFS_POSIX_ACL
-+	.get_acl	= bch2_get_acl,
-+	.set_acl	= bch2_set_acl,
-+#endif
-+};
-+
-+static const struct file_operations bch_dir_file_operations = {
-+	.llseek		= bch2_dir_llseek,
-+	.read		= generic_read_dir,
-+	.iterate_shared	= bch2_vfs_readdir,
-+	.fsync		= bch2_fsync,
-+	.unlocked_ioctl = bch2_fs_file_ioctl,
-+#ifdef CONFIG_COMPAT
-+	.compat_ioctl	= bch2_compat_fs_ioctl,
-+#endif
-+};
-+
-+static const struct inode_operations bch_symlink_inode_operations = {
-+	.get_link	= page_get_link,
-+	.getattr	= bch2_getattr,
-+	.setattr	= bch2_setattr,
-+	.listxattr	= bch2_xattr_list,
-+#ifdef CONFIG_BCACHEFS_POSIX_ACL
-+	.get_acl	= bch2_get_acl,
-+	.set_acl	= bch2_set_acl,
-+#endif
-+};
-+
-+static const struct inode_operations bch_special_inode_operations = {
-+	.getattr	= bch2_getattr,
-+	.setattr	= bch2_setattr,
-+	.listxattr	= bch2_xattr_list,
-+#ifdef CONFIG_BCACHEFS_POSIX_ACL
-+	.get_acl	= bch2_get_acl,
-+	.set_acl	= bch2_set_acl,
-+#endif
-+};
-+
-+static const struct address_space_operations bch_address_space_operations = {
-+	.writepage	= bch2_writepage,
-+	.readpage	= bch2_readpage,
-+	.writepages	= bch2_writepages,
-+	.readpages	= bch2_readpages,
-+	.set_page_dirty	= __set_page_dirty_nobuffers,
-+	.write_begin	= bch2_write_begin,
-+	.write_end	= bch2_write_end,
-+	.invalidatepage	= bch2_invalidatepage,
-+	.releasepage	= bch2_releasepage,
-+	.direct_IO	= noop_direct_IO,
-+#ifdef CONFIG_MIGRATION
-+	.migratepage	= bch2_migrate_page,
-+#endif
-+	.error_remove_page = generic_error_remove_page,
-+};
-+
-+static struct inode *bch2_nfs_get_inode(struct super_block *sb,
-+		u64 ino, u32 generation)
-+{
-+	struct bch_fs *c = sb->s_fs_info;
-+	struct inode *vinode;
-+
-+	if (ino < BCACHEFS_ROOT_INO)
-+		return ERR_PTR(-ESTALE);
-+
-+	vinode = bch2_vfs_inode_get(c, ino);
-+	if (IS_ERR(vinode))
-+		return ERR_CAST(vinode);
-+	if (generation && vinode->i_generation != generation) {
-+		/* we didn't find the right inode.. */
-+		iput(vinode);
-+		return ERR_PTR(-ESTALE);
-+	}
-+	return vinode;
-+}
-+
-+static struct dentry *bch2_fh_to_dentry(struct super_block *sb, struct fid *fid,
-+		int fh_len, int fh_type)
-+{
-+	return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
-+				    bch2_nfs_get_inode);
-+}
-+
-+static struct dentry *bch2_fh_to_parent(struct super_block *sb, struct fid *fid,
-+		int fh_len, int fh_type)
-+{
-+	return generic_fh_to_parent(sb, fid, fh_len, fh_type,
-+				    bch2_nfs_get_inode);
-+}
-+
-+static const struct export_operations bch_export_ops = {
-+	.fh_to_dentry	= bch2_fh_to_dentry,
-+	.fh_to_parent	= bch2_fh_to_parent,
-+	//.get_parent	= bch2_get_parent,
-+};
-+
-+static void bch2_vfs_inode_init(struct bch_fs *c,
-+				struct bch_inode_info *inode,
-+				struct bch_inode_unpacked *bi)
-+{
-+	bch2_inode_update_after_write(c, inode, bi, ~0);
-+
-+	inode->v.i_blocks	= bi->bi_sectors;
-+	inode->v.i_ino		= bi->bi_inum;
-+	inode->v.i_rdev		= bi->bi_dev;
-+	inode->v.i_generation	= bi->bi_generation;
-+	inode->v.i_size		= bi->bi_size;
-+
-+	inode->ei_journal_seq	= 0;
-+	inode->ei_quota_reserved = 0;
-+	inode->ei_str_hash	= bch2_hash_info_init(c, bi);
-+	inode->ei_qid		= bch_qid(bi);
-+
-+	inode->v.i_mapping->a_ops = &bch_address_space_operations;
-+
-+	switch (inode->v.i_mode & S_IFMT) {
-+	case S_IFREG:
-+		inode->v.i_op	= &bch_file_inode_operations;
-+		inode->v.i_fop	= &bch_file_operations;
-+		break;
-+	case S_IFDIR:
-+		inode->v.i_op	= &bch_dir_inode_operations;
-+		inode->v.i_fop	= &bch_dir_file_operations;
-+		break;
-+	case S_IFLNK:
-+		inode_nohighmem(&inode->v);
-+		inode->v.i_op	= &bch_symlink_inode_operations;
-+		break;
-+	default:
-+		init_special_inode(&inode->v, inode->v.i_mode, inode->v.i_rdev);
-+		inode->v.i_op	= &bch_special_inode_operations;
-+		break;
-+	}
-+}
-+
-+static struct inode *bch2_alloc_inode(struct super_block *sb)
-+{
-+	struct bch_inode_info *inode;
-+
-+	inode = kmem_cache_alloc(bch2_inode_cache, GFP_NOFS);
-+	if (!inode)
-+		return NULL;
-+
-+	inode_init_once(&inode->v);
-+	mutex_init(&inode->ei_update_lock);
-+	pagecache_lock_init(&inode->ei_pagecache_lock);
-+	mutex_init(&inode->ei_quota_lock);
-+	inode->ei_journal_seq = 0;
-+
-+	return &inode->v;
-+}
-+
-+static void bch2_i_callback(struct rcu_head *head)
-+{
-+	struct inode *vinode = container_of(head, struct inode, i_rcu);
-+	struct bch_inode_info *inode = to_bch_ei(vinode);
-+
-+	kmem_cache_free(bch2_inode_cache, inode);
-+}
-+
-+static void bch2_destroy_inode(struct inode *vinode)
-+{
-+	call_rcu(&vinode->i_rcu, bch2_i_callback);
-+}
-+
-+static int inode_update_times_fn(struct bch_inode_info *inode,
-+				 struct bch_inode_unpacked *bi,
-+				 void *p)
-+{
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+
-+	bi->bi_atime	= timespec_to_bch2_time(c, inode->v.i_atime);
-+	bi->bi_mtime	= timespec_to_bch2_time(c, inode->v.i_mtime);
-+	bi->bi_ctime	= timespec_to_bch2_time(c, inode->v.i_ctime);
-+
-+	return 0;
-+}
-+
-+static int bch2_vfs_write_inode(struct inode *vinode,
-+				struct writeback_control *wbc)
-+{
-+	struct bch_fs *c = vinode->i_sb->s_fs_info;
-+	struct bch_inode_info *inode = to_bch_ei(vinode);
-+	int ret;
-+
-+	mutex_lock(&inode->ei_update_lock);
-+	ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL,
-+			       ATTR_ATIME|ATTR_MTIME|ATTR_CTIME);
-+	mutex_unlock(&inode->ei_update_lock);
-+
-+	return ret;
-+}
-+
-+static void bch2_evict_inode(struct inode *vinode)
-+{
-+	struct bch_fs *c = vinode->i_sb->s_fs_info;
-+	struct bch_inode_info *inode = to_bch_ei(vinode);
-+
-+	truncate_inode_pages_final(&inode->v.i_data);
-+
-+	clear_inode(&inode->v);
-+
-+	BUG_ON(!is_bad_inode(&inode->v) && inode->ei_quota_reserved);
-+
-+	if (!inode->v.i_nlink && !is_bad_inode(&inode->v)) {
-+		bch2_quota_acct(c, inode->ei_qid, Q_SPC, -((s64) inode->v.i_blocks),
-+				KEY_TYPE_QUOTA_WARN);
-+		bch2_quota_acct(c, inode->ei_qid, Q_INO, -1,
-+				KEY_TYPE_QUOTA_WARN);
-+		bch2_inode_rm(c, inode->v.i_ino);
-+	}
-+}
-+
-+static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf)
-+{
-+	struct super_block *sb = dentry->d_sb;
-+	struct bch_fs *c = sb->s_fs_info;
-+	struct bch_fs_usage_short usage = bch2_fs_usage_read_short(c);
-+	unsigned shift = sb->s_blocksize_bits - 9;
-+	u64 fsid;
-+
-+	buf->f_type	= BCACHEFS_STATFS_MAGIC;
-+	buf->f_bsize	= sb->s_blocksize;
-+	buf->f_blocks	= usage.capacity >> shift;
-+	buf->f_bfree	= (usage.capacity - usage.used) >> shift;
-+	buf->f_bavail	= buf->f_bfree;
-+	buf->f_files	= 0;
-+	buf->f_ffree	= 0;
-+
-+	fsid = le64_to_cpup((void *) c->sb.user_uuid.b) ^
-+	       le64_to_cpup((void *) c->sb.user_uuid.b + sizeof(u64));
-+	buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
-+	buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
-+	buf->f_namelen	= BCH_NAME_MAX;
-+
-+	return 0;
-+}
-+
-+static int bch2_sync_fs(struct super_block *sb, int wait)
-+{
-+	struct bch_fs *c = sb->s_fs_info;
-+
-+	if (c->opts.journal_flush_disabled)
-+		return 0;
-+
-+	if (!wait) {
-+		bch2_journal_flush_async(&c->journal, NULL);
-+		return 0;
-+	}
-+
-+	return bch2_journal_flush(&c->journal);
-+}
-+
-+static struct bch_fs *bch2_path_to_fs(const char *dev)
-+{
-+	struct bch_fs *c;
-+	struct block_device *bdev = lookup_bdev(dev);
-+
-+	if (IS_ERR(bdev))
-+		return ERR_CAST(bdev);
-+
-+	c = bch2_bdev_to_fs(bdev);
-+	bdput(bdev);
-+	return c ?: ERR_PTR(-ENOENT);
-+}
-+
-+static struct bch_fs *__bch2_open_as_blockdevs(const char *dev_name, char * const *devs,
-+					       unsigned nr_devs, struct bch_opts opts)
-+{
-+	struct bch_fs *c, *c1, *c2;
-+	size_t i;
-+
-+	if (!nr_devs)
-+		return ERR_PTR(-EINVAL);
-+
-+	c = bch2_fs_open(devs, nr_devs, opts);
-+
-+	if (IS_ERR(c) && PTR_ERR(c) == -EBUSY) {
-+		/*
-+		 * Already open?
-+		 * Look up each block device, make sure they all belong to a
-+		 * filesystem and they all belong to the _same_ filesystem
-+		 */
-+
-+		c1 = bch2_path_to_fs(devs[0]);
-+		if (IS_ERR(c1))
-+			return c;
-+
-+		for (i = 1; i < nr_devs; i++) {
-+			c2 = bch2_path_to_fs(devs[i]);
-+			if (!IS_ERR(c2))
-+				closure_put(&c2->cl);
-+
-+			if (c1 != c2) {
-+				closure_put(&c1->cl);
-+				return c;
-+			}
-+		}
-+
-+		c = c1;
-+	}
-+
-+	if (IS_ERR(c))
-+		return c;
-+
-+	down_write(&c->state_lock);
-+
-+	if (!test_bit(BCH_FS_STARTED, &c->flags)) {
-+		up_write(&c->state_lock);
-+		closure_put(&c->cl);
-+		pr_err("err mounting %s: incomplete filesystem", dev_name);
-+		return ERR_PTR(-EINVAL);
-+	}
-+
-+	up_write(&c->state_lock);
-+
-+	set_bit(BCH_FS_BDEV_MOUNTED, &c->flags);
-+	return c;
-+}
-+
-+static struct bch_fs *bch2_open_as_blockdevs(const char *_dev_name,
-+					     struct bch_opts opts)
-+{
-+	char *dev_name = NULL, **devs = NULL, *s;
-+	struct bch_fs *c = ERR_PTR(-ENOMEM);
-+	size_t i, nr_devs = 0;
-+
-+	dev_name = kstrdup(_dev_name, GFP_KERNEL);
-+	if (!dev_name)
-+		goto err;
-+
-+	for (s = dev_name; s; s = strchr(s + 1, ':'))
-+		nr_devs++;
-+
-+	devs = kcalloc(nr_devs, sizeof(const char *), GFP_KERNEL);
-+	if (!devs)
-+		goto err;
-+
-+	for (i = 0, s = dev_name;
-+	     s;
-+	     (s = strchr(s, ':')) && (*s++ = '\0'))
-+		devs[i++] = s;
-+
-+	c = __bch2_open_as_blockdevs(_dev_name, devs, nr_devs, opts);
-+err:
-+	kfree(devs);
-+	kfree(dev_name);
-+	return c;
-+}
-+
-+static int bch2_remount(struct super_block *sb, int *flags, char *data)
-+{
-+	struct bch_fs *c = sb->s_fs_info;
-+	struct bch_opts opts = bch2_opts_empty();
-+	int ret;
-+
-+	opt_set(opts, read_only, (*flags & SB_RDONLY) != 0);
-+
-+	ret = bch2_parse_mount_opts(&opts, data);
-+	if (ret)
-+		return ret;
-+
-+	if (opts.read_only != c->opts.read_only) {
-+		down_write(&c->state_lock);
-+
-+		if (opts.read_only) {
-+			bch2_fs_read_only(c);
-+
-+			sb->s_flags |= SB_RDONLY;
-+		} else {
-+			ret = bch2_fs_read_write(c);
-+			if (ret) {
-+				bch_err(c, "error going rw: %i", ret);
-+				up_write(&c->state_lock);
-+				return -EINVAL;
-+			}
-+
-+			sb->s_flags &= ~SB_RDONLY;
-+		}
-+
-+		c->opts.read_only = opts.read_only;
-+
-+		up_write(&c->state_lock);
-+	}
-+
-+	if (opts.errors >= 0)
-+		c->opts.errors = opts.errors;
-+
-+	return ret;
-+}
-+
-+static int bch2_show_devname(struct seq_file *seq, struct dentry *root)
-+{
-+	struct bch_fs *c = root->d_sb->s_fs_info;
-+	struct bch_dev *ca;
-+	unsigned i;
-+	bool first = true;
-+
-+	for_each_online_member(ca, c, i) {
-+		if (!first)
-+			seq_putc(seq, ':');
-+		first = false;
-+		seq_puts(seq, "/dev/");
-+		seq_puts(seq, ca->name);
-+	}
-+
-+	return 0;
-+}
-+
-+static int bch2_show_options(struct seq_file *seq, struct dentry *root)
-+{
-+	struct bch_fs *c = root->d_sb->s_fs_info;
-+	enum bch_opt_id i;
-+	char buf[512];
-+
-+	for (i = 0; i < bch2_opts_nr; i++) {
-+		const struct bch_option *opt = &bch2_opt_table[i];
-+		u64 v = bch2_opt_get_by_id(&c->opts, i);
-+
-+		if (!(opt->mode & OPT_MOUNT))
-+			continue;
-+
-+		if (v == bch2_opt_get_by_id(&bch2_opts_default, i))
-+			continue;
-+
-+		bch2_opt_to_text(&PBUF(buf), c, opt, v,
-+				 OPT_SHOW_MOUNT_STYLE);
-+		seq_putc(seq, ',');
-+		seq_puts(seq, buf);
-+	}
-+
-+	return 0;
-+}
-+
-+static const struct super_operations bch_super_operations = {
-+	.alloc_inode	= bch2_alloc_inode,
-+	.destroy_inode	= bch2_destroy_inode,
-+	.write_inode	= bch2_vfs_write_inode,
-+	.evict_inode	= bch2_evict_inode,
-+	.sync_fs	= bch2_sync_fs,
-+	.statfs		= bch2_statfs,
-+	.show_devname	= bch2_show_devname,
-+	.show_options	= bch2_show_options,
-+	.remount_fs	= bch2_remount,
-+#if 0
-+	.put_super	= bch2_put_super,
-+	.freeze_fs	= bch2_freeze,
-+	.unfreeze_fs	= bch2_unfreeze,
-+#endif
-+};
-+
-+static int bch2_test_super(struct super_block *s, void *data)
-+{
-+	return s->s_fs_info == data;
-+}
-+
-+static int bch2_set_super(struct super_block *s, void *data)
-+{
-+	s->s_fs_info = data;
-+	return 0;
-+}
-+
-+static struct dentry *bch2_mount(struct file_system_type *fs_type,
-+				 int flags, const char *dev_name, void *data)
-+{
-+	struct bch_fs *c;
-+	struct bch_dev *ca;
-+	struct super_block *sb;
-+	struct inode *vinode;
-+	struct bch_opts opts = bch2_opts_empty();
-+	unsigned i;
-+	int ret;
-+
-+	opt_set(opts, read_only, (flags & SB_RDONLY) != 0);
-+
-+	ret = bch2_parse_mount_opts(&opts, data);
-+	if (ret)
-+		return ERR_PTR(ret);
-+
-+	c = bch2_open_as_blockdevs(dev_name, opts);
-+	if (IS_ERR(c))
-+		return ERR_CAST(c);
-+
-+	sb = sget(fs_type, bch2_test_super, bch2_set_super, flags|SB_NOSEC, c);
-+	if (IS_ERR(sb)) {
-+		closure_put(&c->cl);
-+		return ERR_CAST(sb);
-+	}
-+
-+	BUG_ON(sb->s_fs_info != c);
-+
-+	if (sb->s_root) {
-+		closure_put(&c->cl);
-+
-+		if ((flags ^ sb->s_flags) & SB_RDONLY) {
-+			ret = -EBUSY;
-+			goto err_put_super;
-+		}
-+		goto out;
-+	}
-+
-+	sb->s_blocksize		= block_bytes(c);
-+	sb->s_blocksize_bits	= ilog2(block_bytes(c));
-+	sb->s_maxbytes		= MAX_LFS_FILESIZE;
-+	sb->s_op		= &bch_super_operations;
-+	sb->s_export_op		= &bch_export_ops;
-+#ifdef CONFIG_BCACHEFS_QUOTA
-+	sb->s_qcop		= &bch2_quotactl_operations;
-+	sb->s_quota_types	= QTYPE_MASK_USR|QTYPE_MASK_GRP|QTYPE_MASK_PRJ;
-+#endif
-+	sb->s_xattr		= bch2_xattr_handlers;
-+	sb->s_magic		= BCACHEFS_STATFS_MAGIC;
-+	sb->s_time_gran		= c->sb.time_precision;
-+	c->vfs_sb		= sb;
-+	strlcpy(sb->s_id, c->name, sizeof(sb->s_id));
-+
-+	ret = super_setup_bdi(sb);
-+	if (ret)
-+		goto err_put_super;
-+
-+	sb->s_bdi->congested_fn		= bch2_congested;
-+	sb->s_bdi->congested_data	= c;
-+	sb->s_bdi->ra_pages		= VM_READAHEAD_PAGES;
-+
-+	for_each_online_member(ca, c, i) {
-+		struct block_device *bdev = ca->disk_sb.bdev;
-+
-+		/* XXX: create an anonymous device for multi device filesystems */
-+		sb->s_bdev	= bdev;
-+		sb->s_dev	= bdev->bd_dev;
-+		percpu_ref_put(&ca->io_ref);
-+		break;
-+	}
-+
-+#ifdef CONFIG_BCACHEFS_POSIX_ACL
-+	if (c->opts.acl)
-+		sb->s_flags	|= SB_POSIXACL;
-+#endif
-+
-+	vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_INO);
-+	if (IS_ERR(vinode)) {
-+		bch_err(c, "error mounting: error getting root inode %i",
-+			(int) PTR_ERR(vinode));
-+		ret = PTR_ERR(vinode);
-+		goto err_put_super;
-+	}
-+
-+	sb->s_root = d_make_root(vinode);
-+	if (!sb->s_root) {
-+		bch_err(c, "error mounting: error allocating root dentry");
-+		ret = -ENOMEM;
-+		goto err_put_super;
-+	}
-+
-+	sb->s_flags |= SB_ACTIVE;
-+out:
-+	return dget(sb->s_root);
-+
-+err_put_super:
-+	deactivate_locked_super(sb);
-+	return ERR_PTR(ret);
-+}
-+
-+static void bch2_kill_sb(struct super_block *sb)
-+{
-+	struct bch_fs *c = sb->s_fs_info;
-+
-+	generic_shutdown_super(sb);
-+
-+	if (test_bit(BCH_FS_BDEV_MOUNTED, &c->flags))
-+		bch2_fs_stop(c);
-+	else
-+		closure_put(&c->cl);
-+}
-+
-+static struct file_system_type bcache_fs_type = {
-+	.owner		= THIS_MODULE,
-+	.name		= "bcachefs",
-+	.mount		= bch2_mount,
-+	.kill_sb	= bch2_kill_sb,
-+	.fs_flags	= FS_REQUIRES_DEV,
-+};
-+
-+MODULE_ALIAS_FS("bcachefs");
-+
-+void bch2_vfs_exit(void)
-+{
-+	unregister_filesystem(&bcache_fs_type);
-+	if (bch2_inode_cache)
-+		kmem_cache_destroy(bch2_inode_cache);
-+}
-+
-+int __init bch2_vfs_init(void)
-+{
-+	int ret = -ENOMEM;
-+
-+	bch2_inode_cache = KMEM_CACHE(bch_inode_info, 0);
-+	if (!bch2_inode_cache)
-+		goto err;
-+
-+	ret = register_filesystem(&bcache_fs_type);
-+	if (ret)
-+		goto err;
-+
-+	return 0;
-+err:
-+	bch2_vfs_exit();
-+	return ret;
-+}
-+
-+#endif /* NO_BCACHEFS_FS */
-diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h
-new file mode 100644
-index 000000000000..eda903a45325
---- /dev/null
-+++ b/fs/bcachefs/fs.h
-@@ -0,0 +1,174 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_FS_H
-+#define _BCACHEFS_FS_H
-+
-+#include "inode.h"
-+#include "opts.h"
-+#include "str_hash.h"
-+#include "quota_types.h"
-+
-+#include <linux/seqlock.h>
-+#include <linux/stat.h>
-+
-+/*
-+ * Two-state lock - can be taken for add or block - both states are shared,
-+ * like read side of rwsem, but conflict with other state:
-+ */
-+struct pagecache_lock {
-+	atomic_long_t		v;
-+	wait_queue_head_t	wait;
-+};
-+
-+static inline void pagecache_lock_init(struct pagecache_lock *lock)
-+{
-+	atomic_long_set(&lock->v, 0);
-+	init_waitqueue_head(&lock->wait);
-+}
-+
-+void bch2_pagecache_add_put(struct pagecache_lock *);
-+void bch2_pagecache_add_get(struct pagecache_lock *);
-+void bch2_pagecache_block_put(struct pagecache_lock *);
-+void bch2_pagecache_block_get(struct pagecache_lock *);
-+
-+struct bch_inode_info {
-+	struct inode		v;
-+
-+	struct mutex		ei_update_lock;
-+	u64			ei_journal_seq;
-+	u64			ei_quota_reserved;
-+	unsigned long		ei_last_dirtied;
-+
-+	struct pagecache_lock	ei_pagecache_lock;
-+
-+	struct mutex		ei_quota_lock;
-+	struct bch_qid		ei_qid;
-+
-+	struct bch_hash_info	ei_str_hash;
-+
-+	/* copy of inode in btree: */
-+	struct bch_inode_unpacked ei_inode;
-+};
-+
-+#define to_bch_ei(_inode)					\
-+	container_of_or_null(_inode, struct bch_inode_info, v)
-+
-+static inline int ptrcmp(void *l, void *r)
-+{
-+	return cmp_int(l, r);
-+}
-+
-+enum bch_inode_lock_op {
-+	INODE_LOCK		= (1U << 0),
-+	INODE_PAGECACHE_BLOCK	= (1U << 1),
-+	INODE_UPDATE_LOCK	= (1U << 2),
-+};
-+
-+#define bch2_lock_inodes(_locks, ...)					\
-+do {									\
-+	struct bch_inode_info *a[] = { NULL, __VA_ARGS__ };		\
-+	unsigned i;							\
-+									\
-+	bubble_sort(&a[1], ARRAY_SIZE(a) - 1, ptrcmp);			\
-+									\
-+	for (i = 1; i < ARRAY_SIZE(a); i++)				\
-+		if (a[i] != a[i - 1]) {					\
-+			if ((_locks) & INODE_LOCK)			\
-+				down_write_nested(&a[i]->v.i_rwsem, i);	\
-+			if ((_locks) & INODE_PAGECACHE_BLOCK)		\
-+				bch2_pagecache_block_get(&a[i]->ei_pagecache_lock);\
-+			if ((_locks) & INODE_UPDATE_LOCK)			\
-+				mutex_lock_nested(&a[i]->ei_update_lock, i);\
-+		}							\
-+} while (0)
-+
-+#define bch2_unlock_inodes(_locks, ...)					\
-+do {									\
-+	struct bch_inode_info *a[] = { NULL, __VA_ARGS__ };		\
-+	unsigned i;							\
-+									\
-+	bubble_sort(&a[1], ARRAY_SIZE(a) - 1, ptrcmp);			\
-+									\
-+	for (i = 1; i < ARRAY_SIZE(a); i++)				\
-+		if (a[i] != a[i - 1]) {					\
-+			if ((_locks) & INODE_LOCK)			\
-+				up_write(&a[i]->v.i_rwsem);		\
-+			if ((_locks) & INODE_PAGECACHE_BLOCK)		\
-+				bch2_pagecache_block_put(&a[i]->ei_pagecache_lock);\
-+			if ((_locks) & INODE_UPDATE_LOCK)			\
-+				mutex_unlock(&a[i]->ei_update_lock);	\
-+		}							\
-+} while (0)
-+
-+static inline struct bch_inode_info *file_bch_inode(struct file *file)
-+{
-+	return to_bch_ei(file_inode(file));
-+}
-+
-+static inline bool inode_attr_changing(struct bch_inode_info *dir,
-+				struct bch_inode_info *inode,
-+				enum inode_opt_id id)
-+{
-+	return !(inode->ei_inode.bi_fields_set & (1 << id)) &&
-+		bch2_inode_opt_get(&dir->ei_inode, id) !=
-+		bch2_inode_opt_get(&inode->ei_inode, id);
-+}
-+
-+static inline bool inode_attrs_changing(struct bch_inode_info *dir,
-+				 struct bch_inode_info *inode)
-+{
-+	unsigned id;
-+
-+	for (id = 0; id < Inode_opt_nr; id++)
-+		if (inode_attr_changing(dir, inode, id))
-+			return true;
-+
-+	return false;
-+}
-+
-+struct bch_inode_unpacked;
-+
-+#ifndef NO_BCACHEFS_FS
-+
-+int bch2_fs_quota_transfer(struct bch_fs *,
-+			   struct bch_inode_info *,
-+			   struct bch_qid,
-+			   unsigned,
-+			   enum quota_acct_mode);
-+
-+static inline int bch2_set_projid(struct bch_fs *c,
-+				  struct bch_inode_info *inode,
-+				  u32 projid)
-+{
-+	struct bch_qid qid = inode->ei_qid;
-+
-+	qid.q[QTYP_PRJ] = projid;
-+
-+	return bch2_fs_quota_transfer(c, inode, qid,
-+				      1 << QTYP_PRJ,
-+				      KEY_TYPE_QUOTA_PREALLOC);
-+}
-+
-+struct inode *bch2_vfs_inode_get(struct bch_fs *, u64);
-+
-+/* returns 0 if we want to do the update, or error is passed up */
-+typedef int (*inode_set_fn)(struct bch_inode_info *,
-+			    struct bch_inode_unpacked *, void *);
-+
-+void bch2_inode_update_after_write(struct bch_fs *,
-+				   struct bch_inode_info *,
-+				   struct bch_inode_unpacked *,
-+				   unsigned);
-+int __must_check bch2_write_inode(struct bch_fs *, struct bch_inode_info *,
-+				  inode_set_fn, void *, unsigned);
-+
-+void bch2_vfs_exit(void);
-+int bch2_vfs_init(void);
-+
-+#else
-+
-+static inline void bch2_vfs_exit(void) {}
-+static inline int bch2_vfs_init(void) { return 0; }
-+
-+#endif /* NO_BCACHEFS_FS */
-+
-+#endif /* _BCACHEFS_FS_H */
-diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
-new file mode 100644
-index 000000000000..5a6df3d1973a
---- /dev/null
-+++ b/fs/bcachefs/fsck.c
-@@ -0,0 +1,1502 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "bkey_on_stack.h"
-+#include "btree_update.h"
-+#include "dirent.h"
-+#include "error.h"
-+#include "fs-common.h"
-+#include "fsck.h"
-+#include "inode.h"
-+#include "keylist.h"
-+#include "super.h"
-+#include "xattr.h"
-+
-+#include <linux/dcache.h> /* struct qstr */
-+#include <linux/generic-radix-tree.h>
-+
-+#define QSTR(n) { { { .len = strlen(n) } }, .name = n }
-+
-+static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum)
-+{
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	u64 sectors = 0;
-+	int ret;
-+
-+	for_each_btree_key(trans, iter, BTREE_ID_EXTENTS,
-+			   POS(inum, 0), 0, k, ret) {
-+		if (k.k->p.inode != inum)
-+			break;
-+
-+		if (bkey_extent_is_allocation(k.k))
-+			sectors += k.k->size;
-+	}
-+
-+	bch2_trans_iter_free(trans, iter);
-+
-+	return ret ?: sectors;
-+}
-+
-+static int __remove_dirent(struct btree_trans *trans,
-+			   struct bkey_s_c_dirent dirent)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct qstr name;
-+	struct bch_inode_unpacked dir_inode;
-+	struct bch_hash_info dir_hash_info;
-+	u64 dir_inum = dirent.k->p.inode;
-+	int ret;
-+	char *buf;
-+
-+	name.len = bch2_dirent_name_bytes(dirent);
-+	buf = bch2_trans_kmalloc(trans, name.len + 1);
-+	if (IS_ERR(buf))
-+		return PTR_ERR(buf);
-+
-+	memcpy(buf, dirent.v->d_name, name.len);
-+	buf[name.len] = '\0';
-+	name.name = buf;
-+
-+	ret = bch2_inode_find_by_inum_trans(trans, dir_inum, &dir_inode);
-+	if (ret && ret != -EINTR)
-+		bch_err(c, "remove_dirent: err %i looking up directory inode", ret);
-+	if (ret)
-+		return ret;
-+
-+	dir_hash_info = bch2_hash_info_init(c, &dir_inode);
-+
-+	ret = bch2_hash_delete(trans, bch2_dirent_hash_desc,
-+			       &dir_hash_info, dir_inum, &name);
-+	if (ret && ret != -EINTR)
-+		bch_err(c, "remove_dirent: err %i deleting dirent", ret);
-+	if (ret)
-+		return ret;
-+
-+	return 0;
-+}
-+
-+static int remove_dirent(struct btree_trans *trans,
-+			 struct bkey_s_c_dirent dirent)
-+{
-+	return __bch2_trans_do(trans, NULL, NULL,
-+			       BTREE_INSERT_NOFAIL|
-+			       BTREE_INSERT_LAZY_RW,
-+			       __remove_dirent(trans, dirent));
-+}
-+
-+static int reattach_inode(struct bch_fs *c,
-+			  struct bch_inode_unpacked *lostfound_inode,
-+			  u64 inum)
-+{
-+	struct bch_inode_unpacked dir_u, inode_u;
-+	char name_buf[20];
-+	struct qstr name;
-+	int ret;
-+
-+	snprintf(name_buf, sizeof(name_buf), "%llu", inum);
-+	name = (struct qstr) QSTR(name_buf);
-+
-+	ret = bch2_trans_do(c, NULL, NULL,
-+			    BTREE_INSERT_LAZY_RW,
-+		bch2_link_trans(&trans, lostfound_inode->bi_inum,
-+				inum, &dir_u, &inode_u, &name));
-+	if (ret)
-+		bch_err(c, "error %i reattaching inode %llu", ret, inum);
-+
-+	return ret;
-+}
-+
-+struct inode_walker {
-+	bool			first_this_inode;
-+	bool			have_inode;
-+	u64			cur_inum;
-+	struct bch_inode_unpacked inode;
-+};
-+
-+static struct inode_walker inode_walker_init(void)
-+{
-+	return (struct inode_walker) {
-+		.cur_inum	= -1,
-+		.have_inode	= false,
-+	};
-+}
-+
-+static int walk_inode(struct btree_trans *trans,
-+		      struct inode_walker *w, u64 inum)
-+{
-+	if (inum != w->cur_inum) {
-+		int ret = bch2_inode_find_by_inum_trans(trans, inum,
-+							&w->inode);
-+
-+		if (ret && ret != -ENOENT)
-+			return ret;
-+
-+		w->have_inode	= !ret;
-+		w->cur_inum	= inum;
-+		w->first_this_inode = true;
-+	} else {
-+		w->first_this_inode = false;
-+	}
-+
-+	return 0;
-+}
-+
-+struct hash_check {
-+	struct bch_hash_info	info;
-+
-+	/* start of current chain of hash collisions: */
-+	struct btree_iter	*chain;
-+
-+	/* next offset in current chain of hash collisions: */
-+	u64			chain_end;
-+};
-+
-+static void hash_check_init(struct hash_check *h)
-+{
-+	h->chain = NULL;
-+	h->chain_end = 0;
-+}
-+
-+static void hash_stop_chain(struct btree_trans *trans,
-+			    struct hash_check *h)
-+{
-+	if (h->chain)
-+		bch2_trans_iter_free(trans, h->chain);
-+	h->chain = NULL;
-+}
-+
-+static void hash_check_set_inode(struct btree_trans *trans,
-+				 struct hash_check *h,
-+				 const struct bch_inode_unpacked *bi)
-+{
-+	h->info = bch2_hash_info_init(trans->c, bi);
-+	hash_stop_chain(trans, h);
-+}
-+
-+static int hash_redo_key(const struct bch_hash_desc desc,
-+			 struct btree_trans *trans, struct hash_check *h,
-+			 struct btree_iter *k_iter, struct bkey_s_c k,
-+			 u64 hashed)
-+{
-+	struct bkey_i delete;
-+	struct bkey_i *tmp;
-+
-+	tmp = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
-+	if (IS_ERR(tmp))
-+		return PTR_ERR(tmp);
-+
-+	bkey_reassemble(tmp, k);
-+
-+	bkey_init(&delete.k);
-+	delete.k.p = k_iter->pos;
-+	bch2_trans_update(trans, k_iter, &delete, 0);
-+
-+	return bch2_hash_set(trans, desc, &h->info, k_iter->pos.inode,
-+			     tmp, BCH_HASH_SET_MUST_CREATE);
-+}
-+
-+static int fsck_hash_delete_at(struct btree_trans *trans,
-+			       const struct bch_hash_desc desc,
-+			       struct bch_hash_info *info,
-+			       struct btree_iter *iter)
-+{
-+	int ret;
-+retry:
-+	ret   = bch2_hash_delete_at(trans, desc, info, iter) ?:
-+		bch2_trans_commit(trans, NULL, NULL,
-+				  BTREE_INSERT_NOFAIL|
-+				  BTREE_INSERT_LAZY_RW);
-+	if (ret == -EINTR) {
-+		ret = bch2_btree_iter_traverse(iter);
-+		if (!ret)
-+			goto retry;
-+	}
-+
-+	return ret;
-+}
-+
-+static int hash_check_duplicates(struct btree_trans *trans,
-+			const struct bch_hash_desc desc, struct hash_check *h,
-+			struct btree_iter *k_iter, struct bkey_s_c k)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k2;
-+	char buf[200];
-+	int ret = 0;
-+
-+	if (!bkey_cmp(h->chain->pos, k_iter->pos))
-+		return 0;
-+
-+	iter = bch2_trans_copy_iter(trans, h->chain);
-+	BUG_ON(IS_ERR(iter));
-+
-+	for_each_btree_key_continue(iter, 0, k2, ret) {
-+		if (bkey_cmp(k2.k->p, k.k->p) >= 0)
-+			break;
-+
-+		if (fsck_err_on(k2.k->type == desc.key_type &&
-+				!desc.cmp_bkey(k, k2), c,
-+				"duplicate hash table keys:\n%s",
-+				(bch2_bkey_val_to_text(&PBUF(buf), c,
-+						       k), buf))) {
-+			ret = fsck_hash_delete_at(trans, desc, &h->info, k_iter);
-+			if (ret)
-+				return ret;
-+			ret = 1;
-+			break;
-+		}
-+	}
-+fsck_err:
-+	bch2_trans_iter_free(trans, iter);
-+	return ret;
-+}
-+
-+static void hash_set_chain_start(struct btree_trans *trans,
-+			const struct bch_hash_desc desc,
-+			struct hash_check *h,
-+			struct btree_iter *k_iter, struct bkey_s_c k)
-+{
-+	bool hole = (k.k->type != KEY_TYPE_whiteout &&
-+		     k.k->type != desc.key_type);
-+
-+	if (hole || k.k->p.offset > h->chain_end + 1)
-+		hash_stop_chain(trans, h);
-+
-+	if (!hole) {
-+		if (!h->chain) {
-+			h->chain = bch2_trans_copy_iter(trans, k_iter);
-+			BUG_ON(IS_ERR(h->chain));
-+		}
-+
-+		h->chain_end = k.k->p.offset;
-+	}
-+}
-+
-+static bool key_has_correct_hash(struct btree_trans *trans,
-+			const struct bch_hash_desc desc,
-+			struct hash_check *h,
-+			struct btree_iter *k_iter, struct bkey_s_c k)
-+{
-+	u64 hash;
-+
-+	hash_set_chain_start(trans, desc, h, k_iter, k);
-+
-+	if (k.k->type != desc.key_type)
-+		return true;
-+
-+	hash = desc.hash_bkey(&h->info, k);
-+
-+	return hash >= h->chain->pos.offset &&
-+		hash <= k.k->p.offset;
-+}
-+
-+static int hash_check_key(struct btree_trans *trans,
-+			const struct bch_hash_desc desc, struct hash_check *h,
-+			struct btree_iter *k_iter, struct bkey_s_c k)
-+{
-+	struct bch_fs *c = trans->c;
-+	char buf[200];
-+	u64 hashed;
-+	int ret = 0;
-+
-+	hash_set_chain_start(trans, desc, h, k_iter, k);
-+
-+	if (k.k->type != desc.key_type)
-+		return 0;
-+
-+	hashed = desc.hash_bkey(&h->info, k);
-+
-+	if (fsck_err_on(hashed < h->chain->pos.offset ||
-+			hashed > k.k->p.offset, c,
-+			"hash table key at wrong offset: btree %u, %llu, "
-+			"hashed to %llu chain starts at %llu\n%s",
-+			desc.btree_id, k.k->p.offset,
-+			hashed, h->chain->pos.offset,
-+			(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf))) {
-+		ret = __bch2_trans_do(trans, NULL, NULL,
-+				      BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
-+			hash_redo_key(desc, trans, h, k_iter, k, hashed));
-+		if (ret) {
-+			bch_err(c, "hash_redo_key err %i", ret);
-+			return ret;
-+		}
-+		return 1;
-+	}
-+
-+	ret = hash_check_duplicates(trans, desc, h, k_iter, k);
-+fsck_err:
-+	return ret;
-+}
-+
-+static int check_dirent_hash(struct btree_trans *trans, struct hash_check *h,
-+			     struct btree_iter *iter, struct bkey_s_c *k)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct bkey_i_dirent *d = NULL;
-+	int ret = -EINVAL;
-+	char buf[200];
-+	unsigned len;
-+	u64 hash;
-+
-+	if (key_has_correct_hash(trans, bch2_dirent_hash_desc, h, iter, *k))
-+		return 0;
-+
-+	len = bch2_dirent_name_bytes(bkey_s_c_to_dirent(*k));
-+	BUG_ON(!len);
-+
-+	memcpy(buf, bkey_s_c_to_dirent(*k).v->d_name, len);
-+	buf[len] = '\0';
-+
-+	d = kmalloc(bkey_bytes(k->k), GFP_KERNEL);
-+	if (!d) {
-+		bch_err(c, "memory allocation failure");
-+		return -ENOMEM;
-+	}
-+
-+	bkey_reassemble(&d->k_i, *k);
-+
-+	do {
-+		--len;
-+		if (!len)
-+			goto err_redo;
-+
-+		d->k.u64s = BKEY_U64s + dirent_val_u64s(len);
-+
-+		BUG_ON(bkey_val_bytes(&d->k) <
-+		       offsetof(struct bch_dirent, d_name) + len);
-+
-+		memset(d->v.d_name + len, 0,
-+		       bkey_val_bytes(&d->k) -
-+		       offsetof(struct bch_dirent, d_name) - len);
-+
-+		hash = bch2_dirent_hash_desc.hash_bkey(&h->info,
-+						bkey_i_to_s_c(&d->k_i));
-+	} while (hash < h->chain->pos.offset ||
-+		 hash > k->k->p.offset);
-+
-+	if (fsck_err(c, "dirent with junk at end, was %s (%zu) now %s (%u)",
-+		     buf, strlen(buf), d->v.d_name, len)) {
-+		ret = __bch2_trans_do(trans, NULL, NULL,
-+				      BTREE_INSERT_NOFAIL|
-+				      BTREE_INSERT_LAZY_RW,
-+			(bch2_trans_update(trans, iter, &d->k_i, 0), 0));
-+		if (ret)
-+			goto err;
-+
-+		*k = bch2_btree_iter_peek(iter);
-+
-+		BUG_ON(k->k->type != KEY_TYPE_dirent);
-+	}
-+err:
-+fsck_err:
-+	kfree(d);
-+	return ret;
-+err_redo:
-+	hash = bch2_dirent_hash_desc.hash_bkey(&h->info, *k);
-+
-+	if (fsck_err(c, "cannot fix dirent by removing trailing garbage %s (%zu)\n"
-+		     "hash table key at wrong offset: btree %u, offset %llu, "
-+		     "hashed to %llu chain starts at %llu\n%s",
-+		     buf, strlen(buf), BTREE_ID_DIRENTS,
-+		     k->k->p.offset, hash, h->chain->pos.offset,
-+		     (bch2_bkey_val_to_text(&PBUF(buf), c,
-+					    *k), buf))) {
-+		ret = __bch2_trans_do(trans, NULL, NULL,
-+				      BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
-+			hash_redo_key(bch2_dirent_hash_desc, trans,
-+				      h, iter, *k, hash));
-+		if (ret)
-+			bch_err(c, "hash_redo_key err %i", ret);
-+		else
-+			ret = 1;
-+	}
-+
-+	goto err;
-+}
-+
-+static int bch2_inode_truncate(struct bch_fs *c, u64 inode_nr, u64 new_size)
-+{
-+	return bch2_btree_delete_range(c, BTREE_ID_EXTENTS,
-+			POS(inode_nr, round_up(new_size, block_bytes(c)) >> 9),
-+			POS(inode_nr + 1, 0), NULL);
-+}
-+
-+static int bch2_fix_overlapping_extent(struct btree_trans *trans,
-+				       struct btree_iter *iter,
-+				       struct bkey_s_c k, struct bpos cut_at)
-+{
-+	struct btree_iter *u_iter;
-+	struct bkey_i *u;
-+	int ret;
-+
-+	u = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
-+	ret = PTR_ERR_OR_ZERO(u);
-+	if (ret)
-+		return ret;
-+
-+	bkey_reassemble(u, k);
-+	bch2_cut_front(cut_at, u);
-+
-+	u_iter = bch2_trans_copy_iter(trans, iter);
-+	ret = PTR_ERR_OR_ZERO(u_iter);
-+	if (ret)
-+		return ret;
-+
-+	/*
-+	 * We don't want to go through the
-+	 * extent_handle_overwrites path:
-+	 */
-+	__bch2_btree_iter_set_pos(u_iter, u->k.p, false);
-+
-+	/*
-+	 * XXX: this is going to leave disk space
-+	 * accounting slightly wrong
-+	 */
-+	ret = bch2_trans_update(trans, u_iter, u, 0);
-+	bch2_trans_iter_put(trans, u_iter);
-+	return ret;
-+}
-+
-+/*
-+ * Walk extents: verify that extents have a corresponding S_ISREG inode, and
-+ * that i_size an i_sectors are consistent
-+ */
-+noinline_for_stack
-+static int check_extents(struct bch_fs *c)
-+{
-+	struct inode_walker w = inode_walker_init();
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct bkey_on_stack prev;
-+	u64 i_sectors;
-+	int ret = 0;
-+
-+	bkey_on_stack_init(&prev);
-+	prev.k->k = KEY(0, 0, 0);
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-+
-+	bch_verbose(c, "checking extents");
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
-+				   POS(BCACHEFS_ROOT_INO, 0),
-+				   BTREE_ITER_INTENT);
-+retry:
-+	for_each_btree_key_continue(iter, 0, k, ret) {
-+		if (bkey_cmp(prev.k->k.p, bkey_start_pos(k.k)) > 0) {
-+			char buf1[200];
-+			char buf2[200];
-+
-+			bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(prev.k));
-+			bch2_bkey_val_to_text(&PBUF(buf2), c, k);
-+
-+			if (fsck_err(c, "overlapping extents:\n%s\n%s", buf1, buf2)) {
-+				ret = __bch2_trans_do(&trans, NULL, NULL,
-+						      BTREE_INSERT_NOFAIL|
-+						      BTREE_INSERT_LAZY_RW,
-+						bch2_fix_overlapping_extent(&trans,
-+								iter, k, prev.k->k.p));
-+				if (ret)
-+					goto err;
-+			}
-+		}
-+		bkey_on_stack_reassemble(&prev, c, k);
-+
-+		ret = walk_inode(&trans, &w, k.k->p.inode);
-+		if (ret)
-+			break;
-+
-+		if (fsck_err_on(!w.have_inode, c,
-+			"extent type %u for missing inode %llu",
-+			k.k->type, k.k->p.inode) ||
-+		    fsck_err_on(w.have_inode &&
-+			!S_ISREG(w.inode.bi_mode) && !S_ISLNK(w.inode.bi_mode), c,
-+			"extent type %u for non regular file, inode %llu mode %o",
-+			k.k->type, k.k->p.inode, w.inode.bi_mode)) {
-+			bch2_trans_unlock(&trans);
-+
-+			ret = bch2_inode_truncate(c, k.k->p.inode, 0);
-+			if (ret)
-+				goto err;
-+			continue;
-+		}
-+
-+		if (fsck_err_on(w.first_this_inode &&
-+			w.have_inode &&
-+			!(w.inode.bi_flags & BCH_INODE_I_SECTORS_DIRTY) &&
-+			w.inode.bi_sectors !=
-+			(i_sectors = bch2_count_inode_sectors(&trans, w.cur_inum)),
-+			c, "inode %llu has incorrect i_sectors: got %llu, should be %llu",
-+			w.inode.bi_inum,
-+			w.inode.bi_sectors, i_sectors)) {
-+			struct bkey_inode_buf p;
-+
-+			w.inode.bi_sectors = i_sectors;
-+
-+			bch2_trans_unlock(&trans);
-+
-+			bch2_inode_pack(&p, &w.inode);
-+
-+			ret = bch2_btree_insert(c, BTREE_ID_INODES,
-+						&p.inode.k_i, NULL, NULL,
-+						BTREE_INSERT_NOFAIL|
-+						BTREE_INSERT_LAZY_RW);
-+			if (ret) {
-+				bch_err(c, "error in fsck: error %i updating inode", ret);
-+				goto err;
-+			}
-+
-+			/* revalidate iterator: */
-+			k = bch2_btree_iter_peek(iter);
-+		}
-+
-+		if (fsck_err_on(w.have_inode &&
-+			!(w.inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
-+			k.k->type != KEY_TYPE_reservation &&
-+			k.k->p.offset > round_up(w.inode.bi_size, block_bytes(c)) >> 9, c,
-+			"extent type %u offset %llu past end of inode %llu, i_size %llu",
-+			k.k->type, k.k->p.offset, k.k->p.inode, w.inode.bi_size)) {
-+			bch2_trans_unlock(&trans);
-+
-+			ret = bch2_inode_truncate(c, k.k->p.inode,
-+						  w.inode.bi_size);
-+			if (ret)
-+				goto err;
-+			continue;
-+		}
-+	}
-+err:
-+fsck_err:
-+	if (ret == -EINTR)
-+		goto retry;
-+	bkey_on_stack_exit(&prev, c);
-+	return bch2_trans_exit(&trans) ?: ret;
-+}
-+
-+/*
-+ * Walk dirents: verify that they all have a corresponding S_ISDIR inode,
-+ * validate d_type
-+ */
-+noinline_for_stack
-+static int check_dirents(struct bch_fs *c)
-+{
-+	struct inode_walker w = inode_walker_init();
-+	struct hash_check h;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	unsigned name_len;
-+	char buf[200];
-+	int ret = 0;
-+
-+	bch_verbose(c, "checking dirents");
-+
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-+
-+	hash_check_init(&h);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS,
-+				   POS(BCACHEFS_ROOT_INO, 0), 0);
-+retry:
-+	for_each_btree_key_continue(iter, 0, k, ret) {
-+		struct bkey_s_c_dirent d;
-+		struct bch_inode_unpacked target;
-+		bool have_target;
-+		u64 d_inum;
-+
-+		ret = walk_inode(&trans, &w, k.k->p.inode);
-+		if (ret)
-+			break;
-+
-+		if (fsck_err_on(!w.have_inode, c,
-+				"dirent in nonexisting directory:\n%s",
-+				(bch2_bkey_val_to_text(&PBUF(buf), c,
-+						       k), buf)) ||
-+		    fsck_err_on(!S_ISDIR(w.inode.bi_mode), c,
-+				"dirent in non directory inode type %u:\n%s",
-+				mode_to_type(w.inode.bi_mode),
-+				(bch2_bkey_val_to_text(&PBUF(buf), c,
-+						       k), buf))) {
-+			ret = bch2_btree_delete_at(&trans, iter, 0);
-+			if (ret)
-+				goto err;
-+			continue;
-+		}
-+
-+		if (w.first_this_inode && w.have_inode)
-+			hash_check_set_inode(&trans, &h, &w.inode);
-+
-+		ret = check_dirent_hash(&trans, &h, iter, &k);
-+		if (ret > 0) {
-+			ret = 0;
-+			continue;
-+		}
-+		if (ret)
-+			goto fsck_err;
-+
-+		if (ret)
-+			goto fsck_err;
-+
-+		if (k.k->type != KEY_TYPE_dirent)
-+			continue;
-+
-+		d = bkey_s_c_to_dirent(k);
-+		d_inum = le64_to_cpu(d.v->d_inum);
-+
-+		name_len = bch2_dirent_name_bytes(d);
-+
-+		if (fsck_err_on(!name_len, c, "empty dirent") ||
-+		    fsck_err_on(name_len == 1 &&
-+				!memcmp(d.v->d_name, ".", 1), c,
-+				". dirent") ||
-+		    fsck_err_on(name_len == 2 &&
-+				!memcmp(d.v->d_name, "..", 2), c,
-+				".. dirent") ||
-+		    fsck_err_on(name_len == 2 &&
-+				!memcmp(d.v->d_name, "..", 2), c,
-+				".. dirent") ||
-+		    fsck_err_on(memchr(d.v->d_name, '/', name_len), c,
-+				"dirent name has invalid chars")) {
-+			ret = remove_dirent(&trans, d);
-+			if (ret)
-+				goto err;
-+			continue;
-+		}
-+
-+		if (fsck_err_on(d_inum == d.k->p.inode, c,
-+				"dirent points to own directory:\n%s",
-+				(bch2_bkey_val_to_text(&PBUF(buf), c,
-+						       k), buf))) {
-+			ret = remove_dirent(&trans, d);
-+			if (ret)
-+				goto err;
-+			continue;
-+		}
-+
-+		ret = bch2_inode_find_by_inum_trans(&trans, d_inum, &target);
-+		if (ret && ret != -ENOENT)
-+			break;
-+
-+		have_target = !ret;
-+		ret = 0;
-+
-+		if (fsck_err_on(!have_target, c,
-+				"dirent points to missing inode:\n%s",
-+				(bch2_bkey_val_to_text(&PBUF(buf), c,
-+						       k), buf))) {
-+			ret = remove_dirent(&trans, d);
-+			if (ret)
-+				goto err;
-+			continue;
-+		}
-+
-+		if (fsck_err_on(have_target &&
-+				d.v->d_type !=
-+				mode_to_type(target.bi_mode), c,
-+				"incorrect d_type: should be %u:\n%s",
-+				mode_to_type(target.bi_mode),
-+				(bch2_bkey_val_to_text(&PBUF(buf), c,
-+						       k), buf))) {
-+			struct bkey_i_dirent *n;
-+
-+			n = kmalloc(bkey_bytes(d.k), GFP_KERNEL);
-+			if (!n) {
-+				ret = -ENOMEM;
-+				goto err;
-+			}
-+
-+			bkey_reassemble(&n->k_i, d.s_c);
-+			n->v.d_type = mode_to_type(target.bi_mode);
-+
-+			ret = __bch2_trans_do(&trans, NULL, NULL,
-+					      BTREE_INSERT_NOFAIL|
-+					      BTREE_INSERT_LAZY_RW,
-+				(bch2_trans_update(&trans, iter, &n->k_i, 0), 0));
-+			kfree(n);
-+			if (ret)
-+				goto err;
-+
-+		}
-+	}
-+
-+	hash_stop_chain(&trans, &h);
-+err:
-+fsck_err:
-+	if (ret == -EINTR)
-+		goto retry;
-+
-+	return bch2_trans_exit(&trans) ?: ret;
-+}
-+
-+/*
-+ * Walk xattrs: verify that they all have a corresponding inode
-+ */
-+noinline_for_stack
-+static int check_xattrs(struct bch_fs *c)
-+{
-+	struct inode_walker w = inode_walker_init();
-+	struct hash_check h;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int ret = 0;
-+
-+	bch_verbose(c, "checking xattrs");
-+
-+	hash_check_init(&h);
-+
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS,
-+				   POS(BCACHEFS_ROOT_INO, 0), 0);
-+retry:
-+	for_each_btree_key_continue(iter, 0, k, ret) {
-+		ret = walk_inode(&trans, &w, k.k->p.inode);
-+		if (ret)
-+			break;
-+
-+		if (fsck_err_on(!w.have_inode, c,
-+				"xattr for missing inode %llu",
-+				k.k->p.inode)) {
-+			ret = bch2_btree_delete_at(&trans, iter, 0);
-+			if (ret)
-+				goto err;
-+			continue;
-+		}
-+
-+		if (w.first_this_inode && w.have_inode)
-+			hash_check_set_inode(&trans, &h, &w.inode);
-+
-+		ret = hash_check_key(&trans, bch2_xattr_hash_desc,
-+				     &h, iter, k);
-+		if (ret)
-+			goto fsck_err;
-+	}
-+err:
-+fsck_err:
-+	if (ret == -EINTR)
-+		goto retry;
-+	return bch2_trans_exit(&trans) ?: ret;
-+}
-+
-+/* Get root directory, create if it doesn't exist: */
-+static int check_root(struct bch_fs *c, struct bch_inode_unpacked *root_inode)
-+{
-+	struct bkey_inode_buf packed;
-+	int ret;
-+
-+	bch_verbose(c, "checking root directory");
-+
-+	ret = bch2_inode_find_by_inum(c, BCACHEFS_ROOT_INO, root_inode);
-+	if (ret && ret != -ENOENT)
-+		return ret;
-+
-+	if (fsck_err_on(ret, c, "root directory missing"))
-+		goto create_root;
-+
-+	if (fsck_err_on(!S_ISDIR(root_inode->bi_mode), c,
-+			"root inode not a directory"))
-+		goto create_root;
-+
-+	return 0;
-+fsck_err:
-+	return ret;
-+create_root:
-+	bch2_inode_init(c, root_inode, 0, 0, S_IFDIR|0755,
-+			0, NULL);
-+	root_inode->bi_inum = BCACHEFS_ROOT_INO;
-+
-+	bch2_inode_pack(&packed, root_inode);
-+
-+	return bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
-+				 NULL, NULL,
-+				 BTREE_INSERT_NOFAIL|
-+				 BTREE_INSERT_LAZY_RW);
-+}
-+
-+/* Get lost+found, create if it doesn't exist: */
-+static int check_lostfound(struct bch_fs *c,
-+			   struct bch_inode_unpacked *root_inode,
-+			   struct bch_inode_unpacked *lostfound_inode)
-+{
-+	struct qstr lostfound = QSTR("lost+found");
-+	struct bch_hash_info root_hash_info =
-+		bch2_hash_info_init(c, root_inode);
-+	u64 inum;
-+	int ret;
-+
-+	bch_verbose(c, "checking lost+found");
-+
-+	inum = bch2_dirent_lookup(c, BCACHEFS_ROOT_INO, &root_hash_info,
-+				 &lostfound);
-+	if (!inum) {
-+		bch_notice(c, "creating lost+found");
-+		goto create_lostfound;
-+	}
-+
-+	ret = bch2_inode_find_by_inum(c, inum, lostfound_inode);
-+	if (ret && ret != -ENOENT)
-+		return ret;
-+
-+	if (fsck_err_on(ret, c, "lost+found missing"))
-+		goto create_lostfound;
-+
-+	if (fsck_err_on(!S_ISDIR(lostfound_inode->bi_mode), c,
-+			"lost+found inode not a directory"))
-+		goto create_lostfound;
-+
-+	return 0;
-+fsck_err:
-+	return ret;
-+create_lostfound:
-+	bch2_inode_init_early(c, lostfound_inode);
-+
-+	ret = bch2_trans_do(c, NULL, NULL,
-+			    BTREE_INSERT_NOFAIL|
-+			    BTREE_INSERT_LAZY_RW,
-+		bch2_create_trans(&trans,
-+				  BCACHEFS_ROOT_INO, root_inode,
-+				  lostfound_inode, &lostfound,
-+				  0, 0, S_IFDIR|0700, 0, NULL, NULL));
-+	if (ret)
-+		bch_err(c, "error creating lost+found: %i", ret);
-+
-+	return ret;
-+}
-+
-+struct inode_bitmap {
-+	unsigned long	*bits;
-+	size_t		size;
-+};
-+
-+static inline bool inode_bitmap_test(struct inode_bitmap *b, size_t nr)
-+{
-+	return nr < b->size ? test_bit(nr, b->bits) : false;
-+}
-+
-+static inline int inode_bitmap_set(struct inode_bitmap *b, size_t nr)
-+{
-+	if (nr >= b->size) {
-+		size_t new_size = max_t(size_t, max_t(size_t,
-+					PAGE_SIZE * 8,
-+					b->size * 2),
-+					nr + 1);
-+		void *n;
-+
-+		new_size = roundup_pow_of_two(new_size);
-+		n = krealloc(b->bits, new_size / 8, GFP_KERNEL|__GFP_ZERO);
-+		if (!n) {
-+			return -ENOMEM;
-+		}
-+
-+		b->bits = n;
-+		b->size = new_size;
-+	}
-+
-+	__set_bit(nr, b->bits);
-+	return 0;
-+}
-+
-+struct pathbuf {
-+	size_t		nr;
-+	size_t		size;
-+
-+	struct pathbuf_entry {
-+		u64	inum;
-+		u64	offset;
-+	}		*entries;
-+};
-+
-+static int path_down(struct pathbuf *p, u64 inum)
-+{
-+	if (p->nr == p->size) {
-+		size_t new_size = max_t(size_t, 256UL, p->size * 2);
-+		void *n = krealloc(p->entries,
-+				   new_size * sizeof(p->entries[0]),
-+				   GFP_KERNEL);
-+		if (!n)
-+			return -ENOMEM;
-+
-+		p->entries = n;
-+		p->size = new_size;
-+	};
-+
-+	p->entries[p->nr++] = (struct pathbuf_entry) {
-+		.inum = inum,
-+		.offset = 0,
-+	};
-+	return 0;
-+}
-+
-+noinline_for_stack
-+static int check_directory_structure(struct bch_fs *c,
-+				     struct bch_inode_unpacked *lostfound_inode)
-+{
-+	struct inode_bitmap dirs_done = { NULL, 0 };
-+	struct pathbuf path = { 0, 0, NULL };
-+	struct pathbuf_entry *e;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct bkey_s_c_dirent dirent;
-+	bool had_unreachable;
-+	u64 d_inum;
-+	int ret = 0;
-+
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-+
-+	bch_verbose(c, "checking directory structure");
-+
-+	/* DFS: */
-+restart_dfs:
-+	had_unreachable = false;
-+
-+	ret = inode_bitmap_set(&dirs_done, BCACHEFS_ROOT_INO);
-+	if (ret) {
-+		bch_err(c, "memory allocation failure in inode_bitmap_set()");
-+		goto err;
-+	}
-+
-+	ret = path_down(&path, BCACHEFS_ROOT_INO);
-+	if (ret)
-+		goto err;
-+
-+	while (path.nr) {
-+next:
-+		e = &path.entries[path.nr - 1];
-+
-+		if (e->offset == U64_MAX)
-+			goto up;
-+
-+		for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS,
-+				   POS(e->inum, e->offset + 1), 0, k, ret) {
-+			if (k.k->p.inode != e->inum)
-+				break;
-+
-+			e->offset = k.k->p.offset;
-+
-+			if (k.k->type != KEY_TYPE_dirent)
-+				continue;
-+
-+			dirent = bkey_s_c_to_dirent(k);
-+
-+			if (dirent.v->d_type != DT_DIR)
-+				continue;
-+
-+			d_inum = le64_to_cpu(dirent.v->d_inum);
-+
-+			if (fsck_err_on(inode_bitmap_test(&dirs_done, d_inum), c,
-+					"directory %llu has multiple hardlinks",
-+					d_inum)) {
-+				ret = remove_dirent(&trans, dirent);
-+				if (ret)
-+					goto err;
-+				continue;
-+			}
-+
-+			ret = inode_bitmap_set(&dirs_done, d_inum);
-+			if (ret) {
-+				bch_err(c, "memory allocation failure in inode_bitmap_set()");
-+				goto err;
-+			}
-+
-+			ret = path_down(&path, d_inum);
-+			if (ret) {
-+				goto err;
-+			}
-+
-+			ret = bch2_trans_iter_free(&trans, iter);
-+			if (ret) {
-+				bch_err(c, "btree error %i in fsck", ret);
-+				goto err;
-+			}
-+			goto next;
-+		}
-+		ret = bch2_trans_iter_free(&trans, iter) ?: ret;
-+		if (ret) {
-+			bch_err(c, "btree error %i in fsck", ret);
-+			goto err;
-+		}
-+up:
-+		path.nr--;
-+	}
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES, POS_MIN, 0);
-+retry:
-+	for_each_btree_key_continue(iter, 0, k, ret) {
-+		if (k.k->type != KEY_TYPE_inode)
-+			continue;
-+
-+		if (!S_ISDIR(le16_to_cpu(bkey_s_c_to_inode(k).v->bi_mode)))
-+			continue;
-+
-+		ret = bch2_empty_dir_trans(&trans, k.k->p.inode);
-+		if (ret == -EINTR)
-+			goto retry;
-+		if (!ret)
-+			continue;
-+
-+		if (fsck_err_on(!inode_bitmap_test(&dirs_done, k.k->p.offset), c,
-+				"unreachable directory found (inum %llu)",
-+				k.k->p.offset)) {
-+			bch2_trans_unlock(&trans);
-+
-+			ret = reattach_inode(c, lostfound_inode, k.k->p.offset);
-+			if (ret) {
-+				goto err;
-+			}
-+
-+			had_unreachable = true;
-+		}
-+	}
-+	bch2_trans_iter_free(&trans, iter);
-+	if (ret)
-+		goto err;
-+
-+	if (had_unreachable) {
-+		bch_info(c, "reattached unreachable directories, restarting pass to check for loops");
-+		kfree(dirs_done.bits);
-+		kfree(path.entries);
-+		memset(&dirs_done, 0, sizeof(dirs_done));
-+		memset(&path, 0, sizeof(path));
-+		goto restart_dfs;
-+	}
-+err:
-+fsck_err:
-+	ret = bch2_trans_exit(&trans) ?: ret;
-+	kfree(dirs_done.bits);
-+	kfree(path.entries);
-+	return ret;
-+}
-+
-+struct nlink {
-+	u32	count;
-+	u32	dir_count;
-+};
-+
-+typedef GENRADIX(struct nlink) nlink_table;
-+
-+static void inc_link(struct bch_fs *c, nlink_table *links,
-+		     u64 range_start, u64 *range_end,
-+		     u64 inum, bool dir)
-+{
-+	struct nlink *link;
-+
-+	if (inum < range_start || inum >= *range_end)
-+		return;
-+
-+	link = genradix_ptr_alloc(links, inum - range_start, GFP_KERNEL);
-+	if (!link) {
-+		bch_verbose(c, "allocation failed during fsck - will need another pass");
-+		*range_end = inum;
-+		return;
-+	}
-+
-+	if (dir)
-+		link->dir_count++;
-+	else
-+		link->count++;
-+}
-+
-+noinline_for_stack
-+static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links,
-+			       u64 range_start, u64 *range_end)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct bkey_s_c_dirent d;
-+	u64 d_inum;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-+
-+	inc_link(c, links, range_start, range_end, BCACHEFS_ROOT_INO, false);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS_MIN, 0, k, ret) {
-+		switch (k.k->type) {
-+		case KEY_TYPE_dirent:
-+			d = bkey_s_c_to_dirent(k);
-+			d_inum = le64_to_cpu(d.v->d_inum);
-+
-+			if (d.v->d_type == DT_DIR)
-+				inc_link(c, links, range_start, range_end,
-+					 d.k->p.inode, true);
-+
-+			inc_link(c, links, range_start, range_end,
-+				 d_inum, false);
-+
-+			break;
-+		}
-+
-+		bch2_trans_cond_resched(&trans);
-+	}
-+	ret = bch2_trans_exit(&trans) ?: ret;
-+	if (ret)
-+		bch_err(c, "error in fsck: btree error %i while walking dirents", ret);
-+
-+	return ret;
-+}
-+
-+static int check_inode_nlink(struct bch_fs *c,
-+			     struct bch_inode_unpacked *lostfound_inode,
-+			     struct bch_inode_unpacked *u,
-+			     struct nlink *link,
-+			     bool *do_update)
-+{
-+	u32 i_nlink = bch2_inode_nlink_get(u);
-+	u32 real_i_nlink =
-+		link->count * nlink_bias(u->bi_mode) +
-+		link->dir_count;
-+	int ret = 0;
-+
-+	/*
-+	 * These should have been caught/fixed by earlier passes, we don't
-+	 * repair them here:
-+	 */
-+	if (S_ISDIR(u->bi_mode) && link->count > 1) {
-+		need_fsck_err(c, "directory %llu with multiple hardlinks: %u",
-+			      u->bi_inum, link->count);
-+		return 0;
-+	}
-+
-+	if (S_ISDIR(u->bi_mode) && !link->count) {
-+		need_fsck_err(c, "unreachable directory found (inum %llu)",
-+			      u->bi_inum);
-+		return 0;
-+	}
-+
-+	if (!S_ISDIR(u->bi_mode) && link->dir_count) {
-+		need_fsck_err(c, "non directory with subdirectories (inum %llu)",
-+			      u->bi_inum);
-+		return 0;
-+	}
-+
-+	if (!link->count &&
-+	    !(u->bi_flags & BCH_INODE_UNLINKED) &&
-+	    (c->sb.features & (1 << BCH_FEATURE_atomic_nlink))) {
-+		if (fsck_err(c, "unreachable inode %llu not marked as unlinked (type %u)",
-+			     u->bi_inum, mode_to_type(u->bi_mode)) ==
-+		    FSCK_ERR_IGNORE)
-+			return 0;
-+
-+		ret = reattach_inode(c, lostfound_inode, u->bi_inum);
-+		if (ret)
-+			return ret;
-+
-+		link->count = 1;
-+		real_i_nlink = nlink_bias(u->bi_mode) + link->dir_count;
-+		goto set_i_nlink;
-+	}
-+
-+	if (i_nlink < link->count) {
-+		if (fsck_err(c, "inode %llu i_link too small (%u < %u, type %i)",
-+			     u->bi_inum, i_nlink, link->count,
-+			     mode_to_type(u->bi_mode)) == FSCK_ERR_IGNORE)
-+			return 0;
-+		goto set_i_nlink;
-+	}
-+
-+	if (i_nlink != real_i_nlink &&
-+	    c->sb.clean) {
-+		if (fsck_err(c, "filesystem marked clean, "
-+			     "but inode %llu has wrong i_nlink "
-+			     "(type %u i_nlink %u, should be %u)",
-+			     u->bi_inum, mode_to_type(u->bi_mode),
-+			     i_nlink, real_i_nlink) == FSCK_ERR_IGNORE)
-+			return 0;
-+		goto set_i_nlink;
-+	}
-+
-+	if (i_nlink != real_i_nlink &&
-+	    (c->sb.features & (1 << BCH_FEATURE_atomic_nlink))) {
-+		if (fsck_err(c, "inode %llu has wrong i_nlink "
-+			     "(type %u i_nlink %u, should be %u)",
-+			     u->bi_inum, mode_to_type(u->bi_mode),
-+			     i_nlink, real_i_nlink) == FSCK_ERR_IGNORE)
-+			return 0;
-+		goto set_i_nlink;
-+	}
-+
-+	if (real_i_nlink && i_nlink != real_i_nlink)
-+		bch_verbose(c, "setting inode %llu nlink from %u to %u",
-+			    u->bi_inum, i_nlink, real_i_nlink);
-+set_i_nlink:
-+	if (i_nlink != real_i_nlink) {
-+		bch2_inode_nlink_set(u, real_i_nlink);
-+		*do_update = true;
-+	}
-+fsck_err:
-+	return ret;
-+}
-+
-+static int check_inode(struct btree_trans *trans,
-+		       struct bch_inode_unpacked *lostfound_inode,
-+		       struct btree_iter *iter,
-+		       struct bkey_s_c_inode inode,
-+		       struct nlink *link)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct bch_inode_unpacked u;
-+	bool do_update = false;
-+	int ret = 0;
-+
-+	ret = bch2_inode_unpack(inode, &u);
-+
-+	bch2_trans_unlock(trans);
-+
-+	if (bch2_fs_inconsistent_on(ret, c,
-+			 "error unpacking inode %llu in fsck",
-+			 inode.k->p.inode))
-+		return ret;
-+
-+	if (link) {
-+		ret = check_inode_nlink(c, lostfound_inode, &u, link,
-+					&do_update);
-+		if (ret)
-+			return ret;
-+	}
-+
-+	if (u.bi_flags & BCH_INODE_UNLINKED &&
-+	    (!c->sb.clean ||
-+	     fsck_err(c, "filesystem marked clean, but inode %llu unlinked",
-+		      u.bi_inum))) {
-+		bch_verbose(c, "deleting inode %llu", u.bi_inum);
-+
-+		bch2_fs_lazy_rw(c);
-+
-+		ret = bch2_inode_rm(c, u.bi_inum);
-+		if (ret)
-+			bch_err(c, "error in fsck: error %i while deleting inode", ret);
-+		return ret;
-+	}
-+
-+	if (u.bi_flags & BCH_INODE_I_SIZE_DIRTY &&
-+	    (!c->sb.clean ||
-+	     fsck_err(c, "filesystem marked clean, but inode %llu has i_size dirty",
-+		      u.bi_inum))) {
-+		bch_verbose(c, "truncating inode %llu", u.bi_inum);
-+
-+		bch2_fs_lazy_rw(c);
-+
-+		/*
-+		 * XXX: need to truncate partial blocks too here - or ideally
-+		 * just switch units to bytes and that issue goes away
-+		 */
-+
-+		ret = bch2_inode_truncate(c, u.bi_inum, u.bi_size);
-+		if (ret) {
-+			bch_err(c, "error in fsck: error %i truncating inode", ret);
-+			return ret;
-+		}
-+
-+		/*
-+		 * We truncated without our normal sector accounting hook, just
-+		 * make sure we recalculate it:
-+		 */
-+		u.bi_flags |= BCH_INODE_I_SECTORS_DIRTY;
-+
-+		u.bi_flags &= ~BCH_INODE_I_SIZE_DIRTY;
-+		do_update = true;
-+	}
-+
-+	if (u.bi_flags & BCH_INODE_I_SECTORS_DIRTY &&
-+	    (!c->sb.clean ||
-+	     fsck_err(c, "filesystem marked clean, but inode %llu has i_sectors dirty",
-+		      u.bi_inum))) {
-+		s64 sectors;
-+
-+		bch_verbose(c, "recounting sectors for inode %llu",
-+			    u.bi_inum);
-+
-+		sectors = bch2_count_inode_sectors(trans, u.bi_inum);
-+		if (sectors < 0) {
-+			bch_err(c, "error in fsck: error %i recounting inode sectors",
-+				(int) sectors);
-+			return sectors;
-+		}
-+
-+		u.bi_sectors = sectors;
-+		u.bi_flags &= ~BCH_INODE_I_SECTORS_DIRTY;
-+		do_update = true;
-+	}
-+
-+	if (do_update) {
-+		struct bkey_inode_buf p;
-+
-+		bch2_inode_pack(&p, &u);
-+
-+		ret = __bch2_trans_do(trans, NULL, NULL,
-+				      BTREE_INSERT_NOFAIL|
-+				      BTREE_INSERT_LAZY_RW,
-+			(bch2_trans_update(trans, iter, &p.inode.k_i, 0), 0));
-+		if (ret)
-+			bch_err(c, "error in fsck: error %i "
-+				"updating inode", ret);
-+	}
-+fsck_err:
-+	return ret;
-+}
-+
-+noinline_for_stack
-+static int bch2_gc_walk_inodes(struct bch_fs *c,
-+			       struct bch_inode_unpacked *lostfound_inode,
-+			       nlink_table *links,
-+			       u64 range_start, u64 range_end)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct nlink *link, zero_links = { 0, 0 };
-+	struct genradix_iter nlinks_iter;
-+	int ret = 0, ret2 = 0;
-+	u64 nlinks_pos;
-+
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES,
-+				   POS(0, range_start), 0);
-+	nlinks_iter = genradix_iter_init(links, 0);
-+
-+	while ((k = bch2_btree_iter_peek(iter)).k &&
-+	       !(ret2 = bkey_err(k))) {
-+peek_nlinks:	link = genradix_iter_peek(&nlinks_iter, links);
-+
-+		if (!link && (!k.k || iter->pos.offset >= range_end))
-+			break;
-+
-+		nlinks_pos = range_start + nlinks_iter.pos;
-+		if (iter->pos.offset > nlinks_pos) {
-+			/* Should have been caught by dirents pass: */
-+			need_fsck_err_on(link && link->count, c,
-+				"missing inode %llu (nlink %u)",
-+				nlinks_pos, link->count);
-+			genradix_iter_advance(&nlinks_iter, links);
-+			goto peek_nlinks;
-+		}
-+
-+		if (iter->pos.offset < nlinks_pos || !link)
-+			link = &zero_links;
-+
-+		if (k.k && k.k->type == KEY_TYPE_inode) {
-+			ret = check_inode(&trans, lostfound_inode, iter,
-+					  bkey_s_c_to_inode(k), link);
-+			BUG_ON(ret == -EINTR);
-+			if (ret)
-+				break;
-+		} else {
-+			/* Should have been caught by dirents pass: */
-+			need_fsck_err_on(link->count, c,
-+				"missing inode %llu (nlink %u)",
-+				nlinks_pos, link->count);
-+		}
-+
-+		if (nlinks_pos == iter->pos.offset)
-+			genradix_iter_advance(&nlinks_iter, links);
-+
-+		bch2_btree_iter_next(iter);
-+		bch2_trans_cond_resched(&trans);
-+	}
-+fsck_err:
-+	bch2_trans_exit(&trans);
-+
-+	if (ret2)
-+		bch_err(c, "error in fsck: btree error %i while walking inodes", ret2);
-+
-+	return ret ?: ret2;
-+}
-+
-+noinline_for_stack
-+static int check_inode_nlinks(struct bch_fs *c,
-+			      struct bch_inode_unpacked *lostfound_inode)
-+{
-+	nlink_table links;
-+	u64 this_iter_range_start, next_iter_range_start = 0;
-+	int ret = 0;
-+
-+	bch_verbose(c, "checking inode nlinks");
-+
-+	genradix_init(&links);
-+
-+	do {
-+		this_iter_range_start = next_iter_range_start;
-+		next_iter_range_start = U64_MAX;
-+
-+		ret = bch2_gc_walk_dirents(c, &links,
-+					  this_iter_range_start,
-+					  &next_iter_range_start);
-+		if (ret)
-+			break;
-+
-+		ret = bch2_gc_walk_inodes(c, lostfound_inode, &links,
-+					 this_iter_range_start,
-+					 next_iter_range_start);
-+		if (ret)
-+			break;
-+
-+		genradix_free(&links);
-+	} while (next_iter_range_start != U64_MAX);
-+
-+	genradix_free(&links);
-+
-+	return ret;
-+}
-+
-+/*
-+ * Checks for inconsistencies that shouldn't happen, unless we have a bug.
-+ * Doesn't fix them yet, mainly because they haven't yet been observed:
-+ */
-+int bch2_fsck_full(struct bch_fs *c)
-+{
-+	struct bch_inode_unpacked root_inode, lostfound_inode;
-+
-+	return  check_extents(c) ?:
-+		check_dirents(c) ?:
-+		check_xattrs(c) ?:
-+		check_root(c, &root_inode) ?:
-+		check_lostfound(c, &root_inode, &lostfound_inode) ?:
-+		check_directory_structure(c, &lostfound_inode) ?:
-+		check_inode_nlinks(c, &lostfound_inode);
-+}
-+
-+int bch2_fsck_inode_nlink(struct bch_fs *c)
-+{
-+	struct bch_inode_unpacked root_inode, lostfound_inode;
-+
-+	return  check_root(c, &root_inode) ?:
-+		check_lostfound(c, &root_inode, &lostfound_inode) ?:
-+		check_inode_nlinks(c, &lostfound_inode);
-+}
-+
-+int bch2_fsck_walk_inodes_only(struct bch_fs *c)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct bkey_s_c_inode inode;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_INODES, POS_MIN, 0, k, ret) {
-+		if (k.k->type != KEY_TYPE_inode)
-+			continue;
-+
-+		inode = bkey_s_c_to_inode(k);
-+
-+		if (inode.v->bi_flags &
-+		    (BCH_INODE_I_SIZE_DIRTY|
-+		     BCH_INODE_I_SECTORS_DIRTY|
-+		     BCH_INODE_UNLINKED)) {
-+			ret = check_inode(&trans, NULL, iter, inode, NULL);
-+			BUG_ON(ret == -EINTR);
-+			if (ret)
-+				break;
-+		}
-+	}
-+	BUG_ON(ret == -EINTR);
-+
-+	return bch2_trans_exit(&trans) ?: ret;
-+}
-diff --git a/fs/bcachefs/fsck.h b/fs/bcachefs/fsck.h
-new file mode 100644
-index 000000000000..9e4af02bde1e
---- /dev/null
-+++ b/fs/bcachefs/fsck.h
-@@ -0,0 +1,9 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_FSCK_H
-+#define _BCACHEFS_FSCK_H
-+
-+int bch2_fsck_full(struct bch_fs *);
-+int bch2_fsck_inode_nlink(struct bch_fs *);
-+int bch2_fsck_walk_inodes_only(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_FSCK_H */
-diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c
-new file mode 100644
-index 000000000000..7d20f082ad45
---- /dev/null
-+++ b/fs/bcachefs/inode.c
-@@ -0,0 +1,554 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "bkey_methods.h"
-+#include "btree_update.h"
-+#include "error.h"
-+#include "extents.h"
-+#include "inode.h"
-+#include "str_hash.h"
-+
-+#include <linux/random.h>
-+
-+#include <asm/unaligned.h>
-+
-+const char * const bch2_inode_opts[] = {
-+#define x(name, ...)	#name,
-+	BCH_INODE_OPTS()
-+#undef  x
-+	NULL,
-+};
-+
-+static const u8 byte_table[8] = { 1, 2, 3, 4, 6, 8, 10, 13 };
-+static const u8 bits_table[8] = {
-+	1  * 8 - 1,
-+	2  * 8 - 2,
-+	3  * 8 - 3,
-+	4  * 8 - 4,
-+	6  * 8 - 5,
-+	8  * 8 - 6,
-+	10 * 8 - 7,
-+	13 * 8 - 8,
-+};
-+
-+static int inode_encode_field(u8 *out, u8 *end, u64 hi, u64 lo)
-+{
-+	__be64 in[2] = { cpu_to_be64(hi), cpu_to_be64(lo), };
-+	unsigned shift, bytes, bits = likely(!hi)
-+		? fls64(lo)
-+		: fls64(hi) + 64;
-+
-+	for (shift = 1; shift <= 8; shift++)
-+		if (bits < bits_table[shift - 1])
-+			goto got_shift;
-+
-+	BUG();
-+got_shift:
-+	bytes = byte_table[shift - 1];
-+
-+	BUG_ON(out + bytes > end);
-+
-+	memcpy(out, (u8 *) in + 16 - bytes, bytes);
-+	*out |= (1 << 8) >> shift;
-+
-+	return bytes;
-+}
-+
-+static int inode_decode_field(const u8 *in, const u8 *end,
-+			      u64 out[2], unsigned *out_bits)
-+{
-+	__be64 be[2] = { 0, 0 };
-+	unsigned bytes, shift;
-+	u8 *p;
-+
-+	if (in >= end)
-+		return -1;
-+
-+	if (!*in)
-+		return -1;
-+
-+	/*
-+	 * position of highest set bit indicates number of bytes:
-+	 * shift = number of bits to remove in high byte:
-+	 */
-+	shift	= 8 - __fls(*in); /* 1 <= shift <= 8 */
-+	bytes	= byte_table[shift - 1];
-+
-+	if (in + bytes > end)
-+		return -1;
-+
-+	p = (u8 *) be + 16 - bytes;
-+	memcpy(p, in, bytes);
-+	*p ^= (1 << 8) >> shift;
-+
-+	out[0] = be64_to_cpu(be[0]);
-+	out[1] = be64_to_cpu(be[1]);
-+	*out_bits = out[0] ? 64 + fls64(out[0]) : fls64(out[1]);
-+
-+	return bytes;
-+}
-+
-+void bch2_inode_pack(struct bkey_inode_buf *packed,
-+		     const struct bch_inode_unpacked *inode)
-+{
-+	u8 *out = packed->inode.v.fields;
-+	u8 *end = (void *) &packed[1];
-+	u8 *last_nonzero_field = out;
-+	unsigned nr_fields = 0, last_nonzero_fieldnr = 0;
-+	unsigned bytes;
-+
-+	bkey_inode_init(&packed->inode.k_i);
-+	packed->inode.k.p.offset	= inode->bi_inum;
-+	packed->inode.v.bi_hash_seed	= inode->bi_hash_seed;
-+	packed->inode.v.bi_flags	= cpu_to_le32(inode->bi_flags);
-+	packed->inode.v.bi_mode		= cpu_to_le16(inode->bi_mode);
-+
-+#define x(_name, _bits)					\
-+	out += inode_encode_field(out, end, 0, inode->_name);		\
-+	nr_fields++;							\
-+									\
-+	if (inode->_name) {						\
-+		last_nonzero_field = out;				\
-+		last_nonzero_fieldnr = nr_fields;			\
-+	}
-+
-+	BCH_INODE_FIELDS()
-+#undef  x
-+
-+	out = last_nonzero_field;
-+	nr_fields = last_nonzero_fieldnr;
-+
-+	bytes = out - (u8 *) &packed->inode.v;
-+	set_bkey_val_bytes(&packed->inode.k, bytes);
-+	memset_u64s_tail(&packed->inode.v, 0, bytes);
-+
-+	SET_INODE_NR_FIELDS(&packed->inode.v, nr_fields);
-+
-+	if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
-+		struct bch_inode_unpacked unpacked;
-+
-+		int ret = bch2_inode_unpack(inode_i_to_s_c(&packed->inode),
-+					   &unpacked);
-+		BUG_ON(ret);
-+		BUG_ON(unpacked.bi_inum		!= inode->bi_inum);
-+		BUG_ON(unpacked.bi_hash_seed	!= inode->bi_hash_seed);
-+		BUG_ON(unpacked.bi_mode		!= inode->bi_mode);
-+
-+#define x(_name, _bits)	BUG_ON(unpacked._name != inode->_name);
-+		BCH_INODE_FIELDS()
-+#undef  x
-+	}
-+}
-+
-+int bch2_inode_unpack(struct bkey_s_c_inode inode,
-+		      struct bch_inode_unpacked *unpacked)
-+{
-+	const u8 *in = inode.v->fields;
-+	const u8 *end = (void *) inode.v + bkey_val_bytes(inode.k);
-+	u64 field[2];
-+	unsigned fieldnr = 0, field_bits;
-+	int ret;
-+
-+	unpacked->bi_inum	= inode.k->p.offset;
-+	unpacked->bi_hash_seed	= inode.v->bi_hash_seed;
-+	unpacked->bi_flags	= le32_to_cpu(inode.v->bi_flags);
-+	unpacked->bi_mode	= le16_to_cpu(inode.v->bi_mode);
-+
-+#define x(_name, _bits)					\
-+	if (fieldnr++ == INODE_NR_FIELDS(inode.v)) {			\
-+		memset(&unpacked->_name, 0,				\
-+		       sizeof(*unpacked) -				\
-+		       offsetof(struct bch_inode_unpacked, _name));	\
-+		return 0;						\
-+	}								\
-+									\
-+	ret = inode_decode_field(in, end, field, &field_bits);		\
-+	if (ret < 0)							\
-+		return ret;						\
-+									\
-+	if (field_bits > sizeof(unpacked->_name) * 8)			\
-+		return -1;						\
-+									\
-+	unpacked->_name = field[1];					\
-+	in += ret;
-+
-+	BCH_INODE_FIELDS()
-+#undef  x
-+
-+	/* XXX: signal if there were more fields than expected? */
-+
-+	return 0;
-+}
-+
-+struct btree_iter *bch2_inode_peek(struct btree_trans *trans,
-+				   struct bch_inode_unpacked *inode,
-+				   u64 inum, unsigned flags)
-+{
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	iter = bch2_trans_get_iter(trans, BTREE_ID_INODES, POS(0, inum),
-+				   BTREE_ITER_SLOTS|flags);
-+	if (IS_ERR(iter))
-+		return iter;
-+
-+	k = bch2_btree_iter_peek_slot(iter);
-+	ret = bkey_err(k);
-+	if (ret)
-+		goto err;
-+
-+	ret = k.k->type == KEY_TYPE_inode ? 0 : -EIO;
-+	if (ret)
-+		goto err;
-+
-+	ret = bch2_inode_unpack(bkey_s_c_to_inode(k), inode);
-+	if (ret)
-+		goto err;
-+
-+	return iter;
-+err:
-+	bch2_trans_iter_put(trans, iter);
-+	return ERR_PTR(ret);
-+}
-+
-+int bch2_inode_write(struct btree_trans *trans,
-+		     struct btree_iter *iter,
-+		     struct bch_inode_unpacked *inode)
-+{
-+	struct bkey_inode_buf *inode_p;
-+
-+	inode_p = bch2_trans_kmalloc(trans, sizeof(*inode_p));
-+	if (IS_ERR(inode_p))
-+		return PTR_ERR(inode_p);
-+
-+	bch2_inode_pack(inode_p, inode);
-+	bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0);
-+	return 0;
-+}
-+
-+const char *bch2_inode_invalid(const struct bch_fs *c, struct bkey_s_c k)
-+{
-+		struct bkey_s_c_inode inode = bkey_s_c_to_inode(k);
-+		struct bch_inode_unpacked unpacked;
-+
-+	if (k.k->p.inode)
-+		return "nonzero k.p.inode";
-+
-+	if (bkey_val_bytes(k.k) < sizeof(struct bch_inode))
-+		return "incorrect value size";
-+
-+	if (k.k->p.offset < BLOCKDEV_INODE_MAX)
-+		return "fs inode in blockdev range";
-+
-+	if (INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR)
-+		return "invalid str hash type";
-+
-+	if (bch2_inode_unpack(inode, &unpacked))
-+		return "invalid variable length fields";
-+
-+	if (unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1)
-+		return "invalid data checksum type";
-+
-+	if (unpacked.bi_compression >= BCH_COMPRESSION_OPT_NR + 1)
-+		return "invalid data checksum type";
-+
-+	if ((unpacked.bi_flags & BCH_INODE_UNLINKED) &&
-+	    unpacked.bi_nlink != 0)
-+		return "flagged as unlinked but bi_nlink != 0";
-+
-+	return NULL;
-+}
-+
-+void bch2_inode_to_text(struct printbuf *out, struct bch_fs *c,
-+		       struct bkey_s_c k)
-+{
-+	struct bkey_s_c_inode inode = bkey_s_c_to_inode(k);
-+	struct bch_inode_unpacked unpacked;
-+
-+	if (bch2_inode_unpack(inode, &unpacked)) {
-+		pr_buf(out, "(unpack error)");
-+		return;
-+	}
-+
-+#define x(_name, _bits)						\
-+	pr_buf(out, #_name ": %llu ", (u64) unpacked._name);
-+	BCH_INODE_FIELDS()
-+#undef  x
-+}
-+
-+const char *bch2_inode_generation_invalid(const struct bch_fs *c,
-+					  struct bkey_s_c k)
-+{
-+	if (k.k->p.inode)
-+		return "nonzero k.p.inode";
-+
-+	if (bkey_val_bytes(k.k) != sizeof(struct bch_inode_generation))
-+		return "incorrect value size";
-+
-+	return NULL;
-+}
-+
-+void bch2_inode_generation_to_text(struct printbuf *out, struct bch_fs *c,
-+				   struct bkey_s_c k)
-+{
-+	struct bkey_s_c_inode_generation gen = bkey_s_c_to_inode_generation(k);
-+
-+	pr_buf(out, "generation: %u", le32_to_cpu(gen.v->bi_generation));
-+}
-+
-+void bch2_inode_init_early(struct bch_fs *c,
-+			   struct bch_inode_unpacked *inode_u)
-+{
-+	enum bch_str_hash_type str_hash =
-+		bch2_str_hash_opt_to_type(c, c->opts.str_hash);
-+
-+	memset(inode_u, 0, sizeof(*inode_u));
-+
-+	/* ick */
-+	inode_u->bi_flags |= str_hash << INODE_STR_HASH_OFFSET;
-+	get_random_bytes(&inode_u->bi_hash_seed,
-+			 sizeof(inode_u->bi_hash_seed));
-+}
-+
-+void bch2_inode_init_late(struct bch_inode_unpacked *inode_u, u64 now,
-+			  uid_t uid, gid_t gid, umode_t mode, dev_t rdev,
-+			  struct bch_inode_unpacked *parent)
-+{
-+	inode_u->bi_mode	= mode;
-+	inode_u->bi_uid		= uid;
-+	inode_u->bi_gid		= gid;
-+	inode_u->bi_dev		= rdev;
-+	inode_u->bi_atime	= now;
-+	inode_u->bi_mtime	= now;
-+	inode_u->bi_ctime	= now;
-+	inode_u->bi_otime	= now;
-+
-+	if (parent && parent->bi_mode & S_ISGID) {
-+		inode_u->bi_gid = parent->bi_gid;
-+		if (S_ISDIR(mode))
-+			inode_u->bi_mode |= S_ISGID;
-+	}
-+
-+	if (parent) {
-+#define x(_name, ...)	inode_u->bi_##_name = parent->bi_##_name;
-+		BCH_INODE_OPTS()
-+#undef x
-+	}
-+}
-+
-+void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u,
-+		     uid_t uid, gid_t gid, umode_t mode, dev_t rdev,
-+		     struct bch_inode_unpacked *parent)
-+{
-+	bch2_inode_init_early(c, inode_u);
-+	bch2_inode_init_late(inode_u, bch2_current_time(c),
-+			     uid, gid, mode, rdev, parent);
-+}
-+
-+static inline u32 bkey_generation(struct bkey_s_c k)
-+{
-+	switch (k.k->type) {
-+	case KEY_TYPE_inode:
-+		BUG();
-+	case KEY_TYPE_inode_generation:
-+		return le32_to_cpu(bkey_s_c_to_inode_generation(k).v->bi_generation);
-+	default:
-+		return 0;
-+	}
-+}
-+
-+int bch2_inode_create(struct btree_trans *trans,
-+		      struct bch_inode_unpacked *inode_u,
-+		      u64 min, u64 max, u64 *hint)
-+{
-+	struct bkey_inode_buf *inode_p;
-+	struct btree_iter *iter = NULL;
-+	struct bkey_s_c k;
-+	u64 start;
-+	int ret;
-+
-+	if (!max)
-+		max = ULLONG_MAX;
-+
-+	if (trans->c->opts.inodes_32bit)
-+		max = min_t(u64, max, U32_MAX);
-+
-+	start = READ_ONCE(*hint);
-+
-+	if (start >= max || start < min)
-+		start = min;
-+
-+	inode_p = bch2_trans_kmalloc(trans, sizeof(*inode_p));
-+	if (IS_ERR(inode_p))
-+		return PTR_ERR(inode_p);
-+again:
-+	for_each_btree_key(trans, iter, BTREE_ID_INODES, POS(0, start),
-+			   BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
-+		if (bkey_cmp(iter->pos, POS(0, max)) > 0)
-+			break;
-+
-+		if (k.k->type != KEY_TYPE_inode)
-+			goto found_slot;
-+	}
-+
-+	bch2_trans_iter_put(trans, iter);
-+
-+	if (ret)
-+		return ret;
-+
-+	if (start != min) {
-+		/* Retry from start */
-+		start = min;
-+		goto again;
-+	}
-+
-+	return -ENOSPC;
-+found_slot:
-+	*hint			= k.k->p.offset;
-+	inode_u->bi_inum	= k.k->p.offset;
-+	inode_u->bi_generation	= bkey_generation(k);
-+
-+	bch2_inode_pack(inode_p, inode_u);
-+	bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0);
-+	bch2_trans_iter_put(trans, iter);
-+	return 0;
-+}
-+
-+int bch2_inode_rm(struct bch_fs *c, u64 inode_nr)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_i_inode_generation delete;
-+	struct bpos start = POS(inode_nr, 0);
-+	struct bpos end = POS(inode_nr + 1, 0);
-+	int ret;
-+
-+	/*
-+	 * If this was a directory, there shouldn't be any real dirents left -
-+	 * but there could be whiteouts (from hash collisions) that we should
-+	 * delete:
-+	 *
-+	 * XXX: the dirent could ideally would delete whiteouts when they're no
-+	 * longer needed
-+	 */
-+	ret   = bch2_btree_delete_range(c, BTREE_ID_EXTENTS,
-+					start, end, NULL) ?:
-+		bch2_btree_delete_range(c, BTREE_ID_XATTRS,
-+					start, end, NULL) ?:
-+		bch2_btree_delete_range(c, BTREE_ID_DIRENTS,
-+					start, end, NULL);
-+	if (ret)
-+		return ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES, POS(0, inode_nr),
-+				   BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
-+	do {
-+		struct bkey_s_c k = bch2_btree_iter_peek_slot(iter);
-+		u32 bi_generation = 0;
-+
-+		ret = bkey_err(k);
-+		if (ret)
-+			break;
-+
-+		bch2_fs_inconsistent_on(k.k->type != KEY_TYPE_inode, c,
-+					"inode %llu not found when deleting",
-+					inode_nr);
-+
-+		switch (k.k->type) {
-+		case KEY_TYPE_inode: {
-+			struct bch_inode_unpacked inode_u;
-+
-+			if (!bch2_inode_unpack(bkey_s_c_to_inode(k), &inode_u))
-+				bi_generation = inode_u.bi_generation + 1;
-+			break;
-+		}
-+		case KEY_TYPE_inode_generation: {
-+			struct bkey_s_c_inode_generation g =
-+				bkey_s_c_to_inode_generation(k);
-+			bi_generation = le32_to_cpu(g.v->bi_generation);
-+			break;
-+		}
-+		}
-+
-+		if (!bi_generation) {
-+			bkey_init(&delete.k);
-+			delete.k.p.offset = inode_nr;
-+		} else {
-+			bkey_inode_generation_init(&delete.k_i);
-+			delete.k.p.offset = inode_nr;
-+			delete.v.bi_generation = cpu_to_le32(bi_generation);
-+		}
-+
-+		bch2_trans_update(&trans, iter, &delete.k_i, 0);
-+
-+		ret = bch2_trans_commit(&trans, NULL, NULL,
-+					BTREE_INSERT_NOFAIL);
-+	} while (ret == -EINTR);
-+
-+	bch2_trans_exit(&trans);
-+	return ret;
-+}
-+
-+int bch2_inode_find_by_inum_trans(struct btree_trans *trans, u64 inode_nr,
-+				  struct bch_inode_unpacked *inode)
-+{
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	iter = bch2_trans_get_iter(trans, BTREE_ID_INODES,
-+			POS(0, inode_nr), BTREE_ITER_SLOTS);
-+	if (IS_ERR(iter))
-+		return PTR_ERR(iter);
-+
-+	k = bch2_btree_iter_peek_slot(iter);
-+	ret = bkey_err(k);
-+	if (ret)
-+		goto err;
-+
-+	ret = k.k->type == KEY_TYPE_inode
-+		? bch2_inode_unpack(bkey_s_c_to_inode(k), inode)
-+		: -ENOENT;
-+err:
-+	bch2_trans_iter_put(trans, iter);
-+	return ret;
-+}
-+
-+int bch2_inode_find_by_inum(struct bch_fs *c, u64 inode_nr,
-+			    struct bch_inode_unpacked *inode)
-+{
-+	return bch2_trans_do(c, NULL, NULL, 0,
-+		bch2_inode_find_by_inum_trans(&trans, inode_nr, inode));
-+}
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+void bch2_inode_pack_test(void)
-+{
-+	struct bch_inode_unpacked *u, test_inodes[] = {
-+		{
-+			.bi_atime	= U64_MAX,
-+			.bi_ctime	= U64_MAX,
-+			.bi_mtime	= U64_MAX,
-+			.bi_otime	= U64_MAX,
-+			.bi_size	= U64_MAX,
-+			.bi_sectors	= U64_MAX,
-+			.bi_uid		= U32_MAX,
-+			.bi_gid		= U32_MAX,
-+			.bi_nlink	= U32_MAX,
-+			.bi_generation	= U32_MAX,
-+			.bi_dev		= U32_MAX,
-+		},
-+	};
-+
-+	for (u = test_inodes;
-+	     u < test_inodes + ARRAY_SIZE(test_inodes);
-+	     u++) {
-+		struct bkey_inode_buf p;
-+
-+		bch2_inode_pack(&p, u);
-+	}
-+}
-+#endif
-diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h
-new file mode 100644
-index 000000000000..bb759a46dc41
---- /dev/null
-+++ b/fs/bcachefs/inode.h
-@@ -0,0 +1,177 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_INODE_H
-+#define _BCACHEFS_INODE_H
-+
-+#include "opts.h"
-+
-+extern const char * const bch2_inode_opts[];
-+
-+const char *bch2_inode_invalid(const struct bch_fs *, struct bkey_s_c);
-+void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-+
-+#define bch2_bkey_ops_inode (struct bkey_ops) {		\
-+	.key_invalid	= bch2_inode_invalid,		\
-+	.val_to_text	= bch2_inode_to_text,		\
-+}
-+
-+const char *bch2_inode_generation_invalid(const struct bch_fs *,
-+					  struct bkey_s_c);
-+void bch2_inode_generation_to_text(struct printbuf *, struct bch_fs *,
-+				   struct bkey_s_c);
-+
-+#define bch2_bkey_ops_inode_generation (struct bkey_ops) {	\
-+	.key_invalid	= bch2_inode_generation_invalid,	\
-+	.val_to_text	= bch2_inode_generation_to_text,	\
-+}
-+
-+struct bch_inode_unpacked {
-+	u64			bi_inum;
-+	__le64			bi_hash_seed;
-+	u32			bi_flags;
-+	u16			bi_mode;
-+
-+#define x(_name, _bits)	u##_bits _name;
-+	BCH_INODE_FIELDS()
-+#undef  x
-+};
-+
-+struct bkey_inode_buf {
-+	struct bkey_i_inode	inode;
-+
-+#define x(_name, _bits)		+ 8 + _bits / 8
-+	u8		_pad[0 + BCH_INODE_FIELDS()];
-+#undef  x
-+} __attribute__((packed, aligned(8)));
-+
-+void bch2_inode_pack(struct bkey_inode_buf *, const struct bch_inode_unpacked *);
-+int bch2_inode_unpack(struct bkey_s_c_inode, struct bch_inode_unpacked *);
-+
-+struct btree_iter *bch2_inode_peek(struct btree_trans *,
-+			struct bch_inode_unpacked *, u64, unsigned);
-+int bch2_inode_write(struct btree_trans *, struct btree_iter *,
-+		     struct bch_inode_unpacked *);
-+
-+void bch2_inode_init_early(struct bch_fs *,
-+			   struct bch_inode_unpacked *);
-+void bch2_inode_init_late(struct bch_inode_unpacked *, u64,
-+			  uid_t, gid_t, umode_t, dev_t,
-+			  struct bch_inode_unpacked *);
-+void bch2_inode_init(struct bch_fs *, struct bch_inode_unpacked *,
-+		     uid_t, gid_t, umode_t, dev_t,
-+		     struct bch_inode_unpacked *);
-+
-+int bch2_inode_create(struct btree_trans *,
-+		      struct bch_inode_unpacked *,
-+		      u64, u64, u64 *);
-+
-+int bch2_inode_rm(struct bch_fs *, u64);
-+
-+int bch2_inode_find_by_inum_trans(struct btree_trans *, u64,
-+				  struct bch_inode_unpacked *);
-+int bch2_inode_find_by_inum(struct bch_fs *, u64, struct bch_inode_unpacked *);
-+
-+static inline struct bch_io_opts bch2_inode_opts_get(struct bch_inode_unpacked *inode)
-+{
-+	struct bch_io_opts ret = { 0 };
-+
-+#define x(_name, _bits)					\
-+	if (inode->bi_##_name)						\
-+		opt_set(ret, _name, inode->bi_##_name - 1);
-+	BCH_INODE_OPTS()
-+#undef x
-+	return ret;
-+}
-+
-+static inline void bch2_inode_opt_set(struct bch_inode_unpacked *inode,
-+				      enum inode_opt_id id, u64 v)
-+{
-+	switch (id) {
-+#define x(_name, ...)							\
-+	case Inode_opt_##_name:						\
-+		inode->bi_##_name = v;					\
-+		break;
-+	BCH_INODE_OPTS()
-+#undef x
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static inline u64 bch2_inode_opt_get(struct bch_inode_unpacked *inode,
-+				     enum inode_opt_id id)
-+{
-+	switch (id) {
-+#define x(_name, ...)							\
-+	case Inode_opt_##_name:						\
-+		return inode->bi_##_name;
-+	BCH_INODE_OPTS()
-+#undef x
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static inline struct bch_io_opts
-+io_opts(struct bch_fs *c, struct bch_inode_unpacked *inode)
-+{
-+	struct bch_io_opts opts = bch2_opts_to_inode_opts(c->opts);
-+
-+	bch2_io_opts_apply(&opts, bch2_inode_opts_get(inode));
-+	return opts;
-+}
-+
-+static inline u8 mode_to_type(umode_t mode)
-+{
-+	return (mode >> 12) & 15;
-+}
-+
-+/* i_nlink: */
-+
-+static inline unsigned nlink_bias(umode_t mode)
-+{
-+	return S_ISDIR(mode) ? 2 : 1;
-+}
-+
-+static inline void bch2_inode_nlink_inc(struct bch_inode_unpacked *bi)
-+{
-+	if (bi->bi_flags & BCH_INODE_UNLINKED)
-+		bi->bi_flags &= ~BCH_INODE_UNLINKED;
-+	else
-+		bi->bi_nlink++;
-+}
-+
-+static inline void bch2_inode_nlink_dec(struct bch_inode_unpacked *bi)
-+{
-+	BUG_ON(bi->bi_flags & BCH_INODE_UNLINKED);
-+	if (bi->bi_nlink)
-+		bi->bi_nlink--;
-+	else
-+		bi->bi_flags |= BCH_INODE_UNLINKED;
-+}
-+
-+static inline unsigned bch2_inode_nlink_get(struct bch_inode_unpacked *bi)
-+{
-+	return bi->bi_flags & BCH_INODE_UNLINKED
-+		  ? 0
-+		  : bi->bi_nlink + nlink_bias(bi->bi_mode);
-+}
-+
-+static inline void bch2_inode_nlink_set(struct bch_inode_unpacked *bi,
-+					unsigned nlink)
-+{
-+	if (nlink) {
-+		bi->bi_nlink = nlink - nlink_bias(bi->bi_mode);
-+		bi->bi_flags &= ~BCH_INODE_UNLINKED;
-+	} else {
-+		bi->bi_nlink = 0;
-+		bi->bi_flags |= BCH_INODE_UNLINKED;
-+	}
-+}
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+void bch2_inode_pack_test(void);
-+#else
-+static inline void bch2_inode_pack_test(void) {}
-+#endif
-+
-+#endif /* _BCACHEFS_INODE_H */
-diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c
-new file mode 100644
-index 000000000000..5c9c3cf54edd
---- /dev/null
-+++ b/fs/bcachefs/io.c
-@@ -0,0 +1,2387 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Some low level IO code, and hacks for various block layer limitations
-+ *
-+ * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
-+ * Copyright 2012 Google, Inc.
-+ */
-+
-+#include "bcachefs.h"
-+#include "alloc_foreground.h"
-+#include "bkey_on_stack.h"
-+#include "bset.h"
-+#include "btree_update.h"
-+#include "buckets.h"
-+#include "checksum.h"
-+#include "compress.h"
-+#include "clock.h"
-+#include "debug.h"
-+#include "disk_groups.h"
-+#include "ec.h"
-+#include "error.h"
-+#include "extent_update.h"
-+#include "inode.h"
-+#include "io.h"
-+#include "journal.h"
-+#include "keylist.h"
-+#include "move.h"
-+#include "rebalance.h"
-+#include "super.h"
-+#include "super-io.h"
-+
-+#include <linux/blkdev.h>
-+#include <linux/random.h>
-+#include <linux/sched/mm.h>
-+
-+#include <trace/events/bcachefs.h>
-+
-+const char *bch2_blk_status_to_str(blk_status_t status)
-+{
-+	if (status == BLK_STS_REMOVED)
-+		return "device removed";
-+	return blk_status_to_str(status);
-+}
-+
-+static bool bch2_target_congested(struct bch_fs *c, u16 target)
-+{
-+	const struct bch_devs_mask *devs;
-+	unsigned d, nr = 0, total = 0;
-+	u64 now = local_clock(), last;
-+	s64 congested;
-+	struct bch_dev *ca;
-+
-+	if (!target)
-+		return false;
-+
-+	rcu_read_lock();
-+	devs = bch2_target_to_mask(c, target) ?:
-+		&c->rw_devs[BCH_DATA_user];
-+
-+	for_each_set_bit(d, devs->d, BCH_SB_MEMBERS_MAX) {
-+		ca = rcu_dereference(c->devs[d]);
-+		if (!ca)
-+			continue;
-+
-+		congested = atomic_read(&ca->congested);
-+		last = READ_ONCE(ca->congested_last);
-+		if (time_after64(now, last))
-+			congested -= (now - last) >> 12;
-+
-+		total += max(congested, 0LL);
-+		nr++;
-+	}
-+	rcu_read_unlock();
-+
-+	return bch2_rand_range(nr * CONGESTED_MAX) < total;
-+}
-+
-+static inline void bch2_congested_acct(struct bch_dev *ca, u64 io_latency,
-+				       u64 now, int rw)
-+{
-+	u64 latency_capable =
-+		ca->io_latency[rw].quantiles.entries[QUANTILE_IDX(1)].m;
-+	/* ideally we'd be taking into account the device's variance here: */
-+	u64 latency_threshold = latency_capable << (rw == READ ? 2 : 3);
-+	s64 latency_over = io_latency - latency_threshold;
-+
-+	if (latency_threshold && latency_over > 0) {
-+		/*
-+		 * bump up congested by approximately latency_over * 4 /
-+		 * latency_threshold - we don't need much accuracy here so don't
-+		 * bother with the divide:
-+		 */
-+		if (atomic_read(&ca->congested) < CONGESTED_MAX)
-+			atomic_add(latency_over >>
-+				   max_t(int, ilog2(latency_threshold) - 2, 0),
-+				   &ca->congested);
-+
-+		ca->congested_last = now;
-+	} else if (atomic_read(&ca->congested) > 0) {
-+		atomic_dec(&ca->congested);
-+	}
-+}
-+
-+void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw)
-+{
-+	atomic64_t *latency = &ca->cur_latency[rw];
-+	u64 now = local_clock();
-+	u64 io_latency = time_after64(now, submit_time)
-+		? now - submit_time
-+		: 0;
-+	u64 old, new, v = atomic64_read(latency);
-+
-+	do {
-+		old = v;
-+
-+		/*
-+		 * If the io latency was reasonably close to the current
-+		 * latency, skip doing the update and atomic operation - most of
-+		 * the time:
-+		 */
-+		if (abs((int) (old - io_latency)) < (old >> 1) &&
-+		    now & ~(~0 << 5))
-+			break;
-+
-+		new = ewma_add(old, io_latency, 5);
-+	} while ((v = atomic64_cmpxchg(latency, old, new)) != old);
-+
-+	bch2_congested_acct(ca, io_latency, now, rw);
-+
-+	__bch2_time_stats_update(&ca->io_latency[rw], submit_time, now);
-+}
-+
-+/* Allocate, free from mempool: */
-+
-+void bch2_bio_free_pages_pool(struct bch_fs *c, struct bio *bio)
-+{
-+	struct bvec_iter_all iter;
-+	struct bio_vec *bv;
-+
-+	bio_for_each_segment_all(bv, bio, iter)
-+		if (bv->bv_page != ZERO_PAGE(0))
-+			mempool_free(bv->bv_page, &c->bio_bounce_pages);
-+	bio->bi_vcnt = 0;
-+}
-+
-+static struct page *__bio_alloc_page_pool(struct bch_fs *c, bool *using_mempool)
-+{
-+	struct page *page;
-+
-+	if (likely(!*using_mempool)) {
-+		page = alloc_page(GFP_NOIO);
-+		if (unlikely(!page)) {
-+			mutex_lock(&c->bio_bounce_pages_lock);
-+			*using_mempool = true;
-+			goto pool_alloc;
-+
-+		}
-+	} else {
-+pool_alloc:
-+		page = mempool_alloc(&c->bio_bounce_pages, GFP_NOIO);
-+	}
-+
-+	return page;
-+}
-+
-+void bch2_bio_alloc_pages_pool(struct bch_fs *c, struct bio *bio,
-+			       size_t size)
-+{
-+	bool using_mempool = false;
-+
-+	while (size) {
-+		struct page *page = __bio_alloc_page_pool(c, &using_mempool);
-+		unsigned len = min(PAGE_SIZE, size);
-+
-+		BUG_ON(!bio_add_page(bio, page, len, 0));
-+		size -= len;
-+	}
-+
-+	if (using_mempool)
-+		mutex_unlock(&c->bio_bounce_pages_lock);
-+}
-+
-+/* Extent update path: */
-+
-+static int sum_sector_overwrites(struct btree_trans *trans,
-+				 struct btree_iter *extent_iter,
-+				 struct bkey_i *new,
-+				 bool may_allocate,
-+				 bool *maybe_extending,
-+				 s64 *delta)
-+{
-+	struct btree_iter *iter;
-+	struct bkey_s_c old;
-+	int ret = 0;
-+
-+	*maybe_extending = true;
-+	*delta = 0;
-+
-+	iter = bch2_trans_copy_iter(trans, extent_iter);
-+	if (IS_ERR(iter))
-+		return PTR_ERR(iter);
-+
-+	for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, old, ret) {
-+		if (!may_allocate &&
-+		    bch2_bkey_nr_ptrs_fully_allocated(old) <
-+		    bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(new))) {
-+			ret = -ENOSPC;
-+			break;
-+		}
-+
-+		*delta += (min(new->k.p.offset,
-+			      old.k->p.offset) -
-+			  max(bkey_start_offset(&new->k),
-+			      bkey_start_offset(old.k))) *
-+			(bkey_extent_is_allocation(&new->k) -
-+			 bkey_extent_is_allocation(old.k));
-+
-+		if (bkey_cmp(old.k->p, new->k.p) >= 0) {
-+			/*
-+			 * Check if there's already data above where we're
-+			 * going to be writing to - this means we're definitely
-+			 * not extending the file:
-+			 *
-+			 * Note that it's not sufficient to check if there's
-+			 * data up to the sector offset we're going to be
-+			 * writing to, because i_size could be up to one block
-+			 * less:
-+			 */
-+			if (!bkey_cmp(old.k->p, new->k.p))
-+				old = bch2_btree_iter_next(iter);
-+
-+			if (old.k && !bkey_err(old) &&
-+			    old.k->p.inode == extent_iter->pos.inode &&
-+			    bkey_extent_is_data(old.k))
-+				*maybe_extending = false;
-+
-+			break;
-+		}
-+	}
-+
-+	bch2_trans_iter_put(trans, iter);
-+	return ret;
-+}
-+
-+int bch2_extent_update(struct btree_trans *trans,
-+		       struct btree_iter *iter,
-+		       struct bkey_i *k,
-+		       struct disk_reservation *disk_res,
-+		       u64 *journal_seq,
-+		       u64 new_i_size,
-+		       s64 *i_sectors_delta)
-+{
-+	/* this must live until after bch2_trans_commit(): */
-+	struct bkey_inode_buf inode_p;
-+	bool extending = false;
-+	s64 delta = 0;
-+	int ret;
-+
-+	ret = bch2_extent_trim_atomic(k, iter);
-+	if (ret)
-+		return ret;
-+
-+	ret = sum_sector_overwrites(trans, iter, k,
-+			disk_res && disk_res->sectors != 0,
-+			&extending, &delta);
-+	if (ret)
-+		return ret;
-+
-+	new_i_size = extending
-+		? min(k->k.p.offset << 9, new_i_size)
-+		: 0;
-+
-+	if (delta || new_i_size) {
-+		struct btree_iter *inode_iter;
-+		struct bch_inode_unpacked inode_u;
-+
-+		inode_iter = bch2_inode_peek(trans, &inode_u,
-+				k->k.p.inode, BTREE_ITER_INTENT);
-+		if (IS_ERR(inode_iter))
-+			return PTR_ERR(inode_iter);
-+
-+		/*
-+		 * XXX:
-+		 * writeback can race a bit with truncate, because truncate
-+		 * first updates the inode then truncates the pagecache. This is
-+		 * ugly, but lets us preserve the invariant that the in memory
-+		 * i_size is always >= the on disk i_size.
-+		 *
-+		BUG_ON(new_i_size > inode_u.bi_size &&
-+		       (inode_u.bi_flags & BCH_INODE_I_SIZE_DIRTY));
-+		 */
-+		BUG_ON(new_i_size > inode_u.bi_size && !extending);
-+
-+		if (!(inode_u.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
-+		    new_i_size > inode_u.bi_size)
-+			inode_u.bi_size = new_i_size;
-+		else
-+			new_i_size = 0;
-+
-+		inode_u.bi_sectors += delta;
-+
-+		if (delta || new_i_size) {
-+			bch2_inode_pack(&inode_p, &inode_u);
-+			bch2_trans_update(trans, inode_iter,
-+					  &inode_p.inode.k_i, 0);
-+		}
-+
-+		bch2_trans_iter_put(trans, inode_iter);
-+	}
-+
-+	bch2_trans_update(trans, iter, k, 0);
-+
-+	ret = bch2_trans_commit(trans, disk_res, journal_seq,
-+				BTREE_INSERT_NOCHECK_RW|
-+				BTREE_INSERT_NOFAIL|
-+				BTREE_INSERT_USE_RESERVE);
-+	if (!ret && i_sectors_delta)
-+		*i_sectors_delta += delta;
-+
-+	return ret;
-+}
-+
-+int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter,
-+		   struct bpos end, u64 *journal_seq,
-+		   s64 *i_sectors_delta)
-+{
-+	struct bch_fs *c	= trans->c;
-+	unsigned max_sectors	= KEY_SIZE_MAX & (~0 << c->block_bits);
-+	struct bkey_s_c k;
-+	int ret = 0, ret2 = 0;
-+
-+	while ((k = bch2_btree_iter_peek(iter)).k &&
-+	       bkey_cmp(iter->pos, end) < 0) {
-+		struct disk_reservation disk_res =
-+			bch2_disk_reservation_init(c, 0);
-+		struct bkey_i delete;
-+
-+		bch2_trans_begin(trans);
-+
-+		ret = bkey_err(k);
-+		if (ret)
-+			goto btree_err;
-+
-+		bkey_init(&delete.k);
-+		delete.k.p = iter->pos;
-+
-+		/* create the biggest key we can */
-+		bch2_key_resize(&delete.k, max_sectors);
-+		bch2_cut_back(end, &delete);
-+
-+		ret = bch2_extent_update(trans, iter, &delete,
-+				&disk_res, journal_seq,
-+				0, i_sectors_delta);
-+		bch2_disk_reservation_put(c, &disk_res);
-+btree_err:
-+		if (ret == -EINTR) {
-+			ret2 = ret;
-+			ret = 0;
-+		}
-+		if (ret)
-+			break;
-+	}
-+
-+	if (bkey_cmp(iter->pos, end) > 0) {
-+		bch2_btree_iter_set_pos(iter, end);
-+		ret = bch2_btree_iter_traverse(iter);
-+	}
-+
-+	return ret ?: ret2;
-+}
-+
-+int bch2_fpunch(struct bch_fs *c, u64 inum, u64 start, u64 end,
-+		u64 *journal_seq, s64 *i_sectors_delta)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	int ret = 0;
-+
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024);
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
-+				   POS(inum, start),
-+				   BTREE_ITER_INTENT);
-+
-+	ret = bch2_fpunch_at(&trans, iter, POS(inum, end),
-+			     journal_seq, i_sectors_delta);
-+	bch2_trans_exit(&trans);
-+
-+	if (ret == -EINTR)
-+		ret = 0;
-+
-+	return ret;
-+}
-+
-+int bch2_write_index_default(struct bch_write_op *op)
-+{
-+	struct bch_fs *c = op->c;
-+	struct bkey_on_stack sk;
-+	struct keylist *keys = &op->insert_keys;
-+	struct bkey_i *k = bch2_keylist_front(keys);
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	int ret;
-+
-+	bkey_on_stack_init(&sk);
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
-+				   bkey_start_pos(&k->k),
-+				   BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
-+
-+	do {
-+		bch2_trans_begin(&trans);
-+
-+		k = bch2_keylist_front(keys);
-+
-+		bkey_on_stack_realloc(&sk, c, k->k.u64s);
-+		bkey_copy(sk.k, k);
-+		bch2_cut_front(iter->pos, sk.k);
-+
-+		ret = bch2_extent_update(&trans, iter, sk.k,
-+					 &op->res, op_journal_seq(op),
-+					 op->new_i_size, &op->i_sectors_delta);
-+		if (ret == -EINTR)
-+			continue;
-+		if (ret)
-+			break;
-+
-+		if (bkey_cmp(iter->pos, k->k.p) >= 0)
-+			bch2_keylist_pop_front(keys);
-+	} while (!bch2_keylist_empty(keys));
-+
-+	bch2_trans_exit(&trans);
-+	bkey_on_stack_exit(&sk, c);
-+
-+	return ret;
-+}
-+
-+/* Writes */
-+
-+void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
-+			       enum bch_data_type type,
-+			       const struct bkey_i *k)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(k));
-+	const struct bch_extent_ptr *ptr;
-+	struct bch_write_bio *n;
-+	struct bch_dev *ca;
-+
-+	BUG_ON(c->opts.nochanges);
-+
-+	bkey_for_each_ptr(ptrs, ptr) {
-+		BUG_ON(ptr->dev >= BCH_SB_MEMBERS_MAX ||
-+		       !c->devs[ptr->dev]);
-+
-+		ca = bch_dev_bkey_exists(c, ptr->dev);
-+
-+		if (to_entry(ptr + 1) < ptrs.end) {
-+			n = to_wbio(bio_clone_fast(&wbio->bio, GFP_NOIO,
-+						   &ca->replica_set));
-+
-+			n->bio.bi_end_io	= wbio->bio.bi_end_io;
-+			n->bio.bi_private	= wbio->bio.bi_private;
-+			n->parent		= wbio;
-+			n->split		= true;
-+			n->bounce		= false;
-+			n->put_bio		= true;
-+			n->bio.bi_opf		= wbio->bio.bi_opf;
-+			bio_inc_remaining(&wbio->bio);
-+		} else {
-+			n = wbio;
-+			n->split		= false;
-+		}
-+
-+		n->c			= c;
-+		n->dev			= ptr->dev;
-+		n->have_ioref		= bch2_dev_get_ioref(ca,
-+					type == BCH_DATA_btree ? READ : WRITE);
-+		n->submit_time		= local_clock();
-+		n->bio.bi_iter.bi_sector = ptr->offset;
-+
-+		if (!journal_flushes_device(ca))
-+			n->bio.bi_opf |= REQ_FUA;
-+
-+		if (likely(n->have_ioref)) {
-+			this_cpu_add(ca->io_done->sectors[WRITE][type],
-+				     bio_sectors(&n->bio));
-+
-+			bio_set_dev(&n->bio, ca->disk_sb.bdev);
-+			submit_bio(&n->bio);
-+		} else {
-+			n->bio.bi_status	= BLK_STS_REMOVED;
-+			bio_endio(&n->bio);
-+		}
-+	}
-+}
-+
-+static void __bch2_write(struct closure *);
-+
-+static void bch2_write_done(struct closure *cl)
-+{
-+	struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
-+	struct bch_fs *c = op->c;
-+
-+	if (!op->error && (op->flags & BCH_WRITE_FLUSH))
-+		op->error = bch2_journal_error(&c->journal);
-+
-+	bch2_disk_reservation_put(c, &op->res);
-+	percpu_ref_put(&c->writes);
-+	bch2_keylist_free(&op->insert_keys, op->inline_keys);
-+
-+	bch2_time_stats_update(&c->times[BCH_TIME_data_write], op->start_time);
-+
-+	if (!(op->flags & BCH_WRITE_FROM_INTERNAL))
-+		up(&c->io_in_flight);
-+
-+	if (op->end_io) {
-+		EBUG_ON(cl->parent);
-+		closure_debug_destroy(cl);
-+		op->end_io(op);
-+	} else {
-+		closure_return(cl);
-+	}
-+}
-+
-+/**
-+ * bch_write_index - after a write, update index to point to new data
-+ */
-+static void __bch2_write_index(struct bch_write_op *op)
-+{
-+	struct bch_fs *c = op->c;
-+	struct keylist *keys = &op->insert_keys;
-+	struct bch_extent_ptr *ptr;
-+	struct bkey_i *src, *dst = keys->keys, *n, *k;
-+	unsigned dev;
-+	int ret;
-+
-+	for (src = keys->keys; src != keys->top; src = n) {
-+		n = bkey_next(src);
-+
-+		if (bkey_extent_is_direct_data(&src->k)) {
-+			bch2_bkey_drop_ptrs(bkey_i_to_s(src), ptr,
-+					    test_bit(ptr->dev, op->failed.d));
-+
-+			if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(src))) {
-+				ret = -EIO;
-+				goto err;
-+			}
-+		}
-+
-+		if (dst != src)
-+			memmove_u64s_down(dst, src, src->u64s);
-+		dst = bkey_next(dst);
-+	}
-+
-+	keys->top = dst;
-+
-+	/*
-+	 * probably not the ideal place to hook this in, but I don't
-+	 * particularly want to plumb io_opts all the way through the btree
-+	 * update stack right now
-+	 */
-+	for_each_keylist_key(keys, k) {
-+		bch2_rebalance_add_key(c, bkey_i_to_s_c(k), &op->opts);
-+
-+		if (bch2_bkey_is_incompressible(bkey_i_to_s_c(k)))
-+			bch2_check_set_feature(op->c, BCH_FEATURE_incompressible);
-+
-+	}
-+
-+	if (!bch2_keylist_empty(keys)) {
-+		u64 sectors_start = keylist_sectors(keys);
-+		int ret = op->index_update_fn(op);
-+
-+		BUG_ON(ret == -EINTR);
-+		BUG_ON(keylist_sectors(keys) && !ret);
-+
-+		op->written += sectors_start - keylist_sectors(keys);
-+
-+		if (ret) {
-+			__bcache_io_error(c, "btree IO error %i", ret);
-+			op->error = ret;
-+		}
-+	}
-+out:
-+	/* If some a bucket wasn't written, we can't erasure code it: */
-+	for_each_set_bit(dev, op->failed.d, BCH_SB_MEMBERS_MAX)
-+		bch2_open_bucket_write_error(c, &op->open_buckets, dev);
-+
-+	bch2_open_buckets_put(c, &op->open_buckets);
-+	return;
-+err:
-+	keys->top = keys->keys;
-+	op->error = ret;
-+	goto out;
-+}
-+
-+static void bch2_write_index(struct closure *cl)
-+{
-+	struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
-+	struct bch_fs *c = op->c;
-+
-+	__bch2_write_index(op);
-+
-+	if (!(op->flags & BCH_WRITE_DONE)) {
-+		continue_at(cl, __bch2_write, index_update_wq(op));
-+	} else if (!op->error && (op->flags & BCH_WRITE_FLUSH)) {
-+		bch2_journal_flush_seq_async(&c->journal,
-+					     *op_journal_seq(op),
-+					     cl);
-+		continue_at(cl, bch2_write_done, index_update_wq(op));
-+	} else {
-+		continue_at_nobarrier(cl, bch2_write_done, NULL);
-+	}
-+}
-+
-+static void bch2_write_endio(struct bio *bio)
-+{
-+	struct closure *cl		= bio->bi_private;
-+	struct bch_write_op *op		= container_of(cl, struct bch_write_op, cl);
-+	struct bch_write_bio *wbio	= to_wbio(bio);
-+	struct bch_write_bio *parent	= wbio->split ? wbio->parent : NULL;
-+	struct bch_fs *c		= wbio->c;
-+	struct bch_dev *ca		= bch_dev_bkey_exists(c, wbio->dev);
-+
-+	if (bch2_dev_io_err_on(bio->bi_status, ca, "data write: %s",
-+			       bch2_blk_status_to_str(bio->bi_status)))
-+		set_bit(wbio->dev, op->failed.d);
-+
-+	if (wbio->have_ioref) {
-+		bch2_latency_acct(ca, wbio->submit_time, WRITE);
-+		percpu_ref_put(&ca->io_ref);
-+	}
-+
-+	if (wbio->bounce)
-+		bch2_bio_free_pages_pool(c, bio);
-+
-+	if (wbio->put_bio)
-+		bio_put(bio);
-+
-+	if (parent)
-+		bio_endio(&parent->bio);
-+	else if (!(op->flags & BCH_WRITE_SKIP_CLOSURE_PUT))
-+		closure_put(cl);
-+	else
-+		continue_at_nobarrier(cl, bch2_write_index, index_update_wq(op));
-+}
-+
-+static void init_append_extent(struct bch_write_op *op,
-+			       struct write_point *wp,
-+			       struct bversion version,
-+			       struct bch_extent_crc_unpacked crc)
-+{
-+	struct bch_fs *c = op->c;
-+	struct bkey_i_extent *e;
-+	struct open_bucket *ob;
-+	unsigned i;
-+
-+	BUG_ON(crc.compressed_size > wp->sectors_free);
-+	wp->sectors_free -= crc.compressed_size;
-+	op->pos.offset += crc.uncompressed_size;
-+
-+	e = bkey_extent_init(op->insert_keys.top);
-+	e->k.p		= op->pos;
-+	e->k.size	= crc.uncompressed_size;
-+	e->k.version	= version;
-+
-+	if (crc.csum_type ||
-+	    crc.compression_type ||
-+	    crc.nonce)
-+		bch2_extent_crc_append(&e->k_i, crc);
-+
-+	open_bucket_for_each(c, &wp->ptrs, ob, i) {
-+		struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
-+		union bch_extent_entry *end =
-+			bkey_val_end(bkey_i_to_s(&e->k_i));
-+
-+		end->ptr = ob->ptr;
-+		end->ptr.type = 1 << BCH_EXTENT_ENTRY_ptr;
-+		end->ptr.cached = !ca->mi.durability ||
-+			(op->flags & BCH_WRITE_CACHED) != 0;
-+		end->ptr.offset += ca->mi.bucket_size - ob->sectors_free;
-+
-+		e->k.u64s++;
-+
-+		BUG_ON(crc.compressed_size > ob->sectors_free);
-+		ob->sectors_free -= crc.compressed_size;
-+	}
-+
-+	bch2_keylist_push(&op->insert_keys);
-+}
-+
-+static struct bio *bch2_write_bio_alloc(struct bch_fs *c,
-+					struct write_point *wp,
-+					struct bio *src,
-+					bool *page_alloc_failed,
-+					void *buf)
-+{
-+	struct bch_write_bio *wbio;
-+	struct bio *bio;
-+	unsigned output_available =
-+		min(wp->sectors_free << 9, src->bi_iter.bi_size);
-+	unsigned pages = DIV_ROUND_UP(output_available +
-+				      (buf
-+				       ? ((unsigned long) buf & (PAGE_SIZE - 1))
-+				       : 0), PAGE_SIZE);
-+
-+	bio = bio_alloc_bioset(GFP_NOIO, pages, &c->bio_write);
-+	wbio			= wbio_init(bio);
-+	wbio->put_bio		= true;
-+	/* copy WRITE_SYNC flag */
-+	wbio->bio.bi_opf	= src->bi_opf;
-+
-+	if (buf) {
-+		bch2_bio_map(bio, buf, output_available);
-+		return bio;
-+	}
-+
-+	wbio->bounce		= true;
-+
-+	/*
-+	 * We can't use mempool for more than c->sb.encoded_extent_max
-+	 * worth of pages, but we'd like to allocate more if we can:
-+	 */
-+	bch2_bio_alloc_pages_pool(c, bio,
-+				  min_t(unsigned, output_available,
-+					c->sb.encoded_extent_max << 9));
-+
-+	if (bio->bi_iter.bi_size < output_available)
-+		*page_alloc_failed =
-+			bch2_bio_alloc_pages(bio,
-+					     output_available -
-+					     bio->bi_iter.bi_size,
-+					     GFP_NOFS) != 0;
-+
-+	return bio;
-+}
-+
-+static int bch2_write_rechecksum(struct bch_fs *c,
-+				 struct bch_write_op *op,
-+				 unsigned new_csum_type)
-+{
-+	struct bio *bio = &op->wbio.bio;
-+	struct bch_extent_crc_unpacked new_crc;
-+	int ret;
-+
-+	/* bch2_rechecksum_bio() can't encrypt or decrypt data: */
-+
-+	if (bch2_csum_type_is_encryption(op->crc.csum_type) !=
-+	    bch2_csum_type_is_encryption(new_csum_type))
-+		new_csum_type = op->crc.csum_type;
-+
-+	ret = bch2_rechecksum_bio(c, bio, op->version, op->crc,
-+				  NULL, &new_crc,
-+				  op->crc.offset, op->crc.live_size,
-+				  new_csum_type);
-+	if (ret)
-+		return ret;
-+
-+	bio_advance(bio, op->crc.offset << 9);
-+	bio->bi_iter.bi_size = op->crc.live_size << 9;
-+	op->crc = new_crc;
-+	return 0;
-+}
-+
-+static int bch2_write_decrypt(struct bch_write_op *op)
-+{
-+	struct bch_fs *c = op->c;
-+	struct nonce nonce = extent_nonce(op->version, op->crc);
-+	struct bch_csum csum;
-+
-+	if (!bch2_csum_type_is_encryption(op->crc.csum_type))
-+		return 0;
-+
-+	/*
-+	 * If we need to decrypt data in the write path, we'll no longer be able
-+	 * to verify the existing checksum (poly1305 mac, in this case) after
-+	 * it's decrypted - this is the last point we'll be able to reverify the
-+	 * checksum:
-+	 */
-+	csum = bch2_checksum_bio(c, op->crc.csum_type, nonce, &op->wbio.bio);
-+	if (bch2_crc_cmp(op->crc.csum, csum))
-+		return -EIO;
-+
-+	bch2_encrypt_bio(c, op->crc.csum_type, nonce, &op->wbio.bio);
-+	op->crc.csum_type = 0;
-+	op->crc.csum = (struct bch_csum) { 0, 0 };
-+	return 0;
-+}
-+
-+static enum prep_encoded_ret {
-+	PREP_ENCODED_OK,
-+	PREP_ENCODED_ERR,
-+	PREP_ENCODED_CHECKSUM_ERR,
-+	PREP_ENCODED_DO_WRITE,
-+} bch2_write_prep_encoded_data(struct bch_write_op *op, struct write_point *wp)
-+{
-+	struct bch_fs *c = op->c;
-+	struct bio *bio = &op->wbio.bio;
-+
-+	if (!(op->flags & BCH_WRITE_DATA_ENCODED))
-+		return PREP_ENCODED_OK;
-+
-+	BUG_ON(bio_sectors(bio) != op->crc.compressed_size);
-+
-+	/* Can we just write the entire extent as is? */
-+	if (op->crc.uncompressed_size == op->crc.live_size &&
-+	    op->crc.compressed_size <= wp->sectors_free &&
-+	    (op->crc.compression_type == op->compression_type ||
-+	     op->incompressible)) {
-+		if (!crc_is_compressed(op->crc) &&
-+		    op->csum_type != op->crc.csum_type &&
-+		    bch2_write_rechecksum(c, op, op->csum_type))
-+			return PREP_ENCODED_CHECKSUM_ERR;
-+
-+		return PREP_ENCODED_DO_WRITE;
-+	}
-+
-+	/*
-+	 * If the data is compressed and we couldn't write the entire extent as
-+	 * is, we have to decompress it:
-+	 */
-+	if (crc_is_compressed(op->crc)) {
-+		struct bch_csum csum;
-+
-+		if (bch2_write_decrypt(op))
-+			return PREP_ENCODED_CHECKSUM_ERR;
-+
-+		/* Last point we can still verify checksum: */
-+		csum = bch2_checksum_bio(c, op->crc.csum_type,
-+					 extent_nonce(op->version, op->crc),
-+					 bio);
-+		if (bch2_crc_cmp(op->crc.csum, csum))
-+			return PREP_ENCODED_CHECKSUM_ERR;
-+
-+		if (bch2_bio_uncompress_inplace(c, bio, &op->crc))
-+			return PREP_ENCODED_ERR;
-+	}
-+
-+	/*
-+	 * No longer have compressed data after this point - data might be
-+	 * encrypted:
-+	 */
-+
-+	/*
-+	 * If the data is checksummed and we're only writing a subset,
-+	 * rechecksum and adjust bio to point to currently live data:
-+	 */
-+	if ((op->crc.live_size != op->crc.uncompressed_size ||
-+	     op->crc.csum_type != op->csum_type) &&
-+	    bch2_write_rechecksum(c, op, op->csum_type))
-+		return PREP_ENCODED_CHECKSUM_ERR;
-+
-+	/*
-+	 * If we want to compress the data, it has to be decrypted:
-+	 */
-+	if ((op->compression_type ||
-+	     bch2_csum_type_is_encryption(op->crc.csum_type) !=
-+	     bch2_csum_type_is_encryption(op->csum_type)) &&
-+	    bch2_write_decrypt(op))
-+		return PREP_ENCODED_CHECKSUM_ERR;
-+
-+	return PREP_ENCODED_OK;
-+}
-+
-+static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
-+			     struct bio **_dst)
-+{
-+	struct bch_fs *c = op->c;
-+	struct bio *src = &op->wbio.bio, *dst = src;
-+	struct bvec_iter saved_iter;
-+	void *ec_buf;
-+	struct bpos ec_pos = op->pos;
-+	unsigned total_output = 0, total_input = 0;
-+	bool bounce = false;
-+	bool page_alloc_failed = false;
-+	int ret, more = 0;
-+
-+	BUG_ON(!bio_sectors(src));
-+
-+	ec_buf = bch2_writepoint_ec_buf(c, wp);
-+
-+	switch (bch2_write_prep_encoded_data(op, wp)) {
-+	case PREP_ENCODED_OK:
-+		break;
-+	case PREP_ENCODED_ERR:
-+		ret = -EIO;
-+		goto err;
-+	case PREP_ENCODED_CHECKSUM_ERR:
-+		BUG();
-+		goto csum_err;
-+	case PREP_ENCODED_DO_WRITE:
-+		/* XXX look for bug here */
-+		if (ec_buf) {
-+			dst = bch2_write_bio_alloc(c, wp, src,
-+						   &page_alloc_failed,
-+						   ec_buf);
-+			bio_copy_data(dst, src);
-+			bounce = true;
-+		}
-+		init_append_extent(op, wp, op->version, op->crc);
-+		goto do_write;
-+	}
-+
-+	if (ec_buf ||
-+	    op->compression_type ||
-+	    (op->csum_type &&
-+	     !(op->flags & BCH_WRITE_PAGES_STABLE)) ||
-+	    (bch2_csum_type_is_encryption(op->csum_type) &&
-+	     !(op->flags & BCH_WRITE_PAGES_OWNED))) {
-+		dst = bch2_write_bio_alloc(c, wp, src,
-+					   &page_alloc_failed,
-+					   ec_buf);
-+		bounce = true;
-+	}
-+
-+	saved_iter = dst->bi_iter;
-+
-+	do {
-+		struct bch_extent_crc_unpacked crc =
-+			(struct bch_extent_crc_unpacked) { 0 };
-+		struct bversion version = op->version;
-+		size_t dst_len, src_len;
-+
-+		if (page_alloc_failed &&
-+		    bio_sectors(dst) < wp->sectors_free &&
-+		    bio_sectors(dst) < c->sb.encoded_extent_max)
-+			break;
-+
-+		BUG_ON(op->compression_type &&
-+		       (op->flags & BCH_WRITE_DATA_ENCODED) &&
-+		       bch2_csum_type_is_encryption(op->crc.csum_type));
-+		BUG_ON(op->compression_type && !bounce);
-+
-+		crc.compression_type = op->incompressible
-+			? BCH_COMPRESSION_TYPE_incompressible
-+			: op->compression_type
-+			? bch2_bio_compress(c, dst, &dst_len, src, &src_len,
-+					    op->compression_type)
-+			: 0;
-+		if (!crc_is_compressed(crc)) {
-+			dst_len = min(dst->bi_iter.bi_size, src->bi_iter.bi_size);
-+			dst_len = min_t(unsigned, dst_len, wp->sectors_free << 9);
-+
-+			if (op->csum_type)
-+				dst_len = min_t(unsigned, dst_len,
-+						c->sb.encoded_extent_max << 9);
-+
-+			if (bounce) {
-+				swap(dst->bi_iter.bi_size, dst_len);
-+				bio_copy_data(dst, src);
-+				swap(dst->bi_iter.bi_size, dst_len);
-+			}
-+
-+			src_len = dst_len;
-+		}
-+
-+		BUG_ON(!src_len || !dst_len);
-+
-+		if (bch2_csum_type_is_encryption(op->csum_type)) {
-+			if (bversion_zero(version)) {
-+				version.lo = atomic64_inc_return(&c->key_version);
-+			} else {
-+				crc.nonce = op->nonce;
-+				op->nonce += src_len >> 9;
-+			}
-+		}
-+
-+		if ((op->flags & BCH_WRITE_DATA_ENCODED) &&
-+		    !crc_is_compressed(crc) &&
-+		    bch2_csum_type_is_encryption(op->crc.csum_type) ==
-+		    bch2_csum_type_is_encryption(op->csum_type)) {
-+			/*
-+			 * Note: when we're using rechecksum(), we need to be
-+			 * checksumming @src because it has all the data our
-+			 * existing checksum covers - if we bounced (because we
-+			 * were trying to compress), @dst will only have the
-+			 * part of the data the new checksum will cover.
-+			 *
-+			 * But normally we want to be checksumming post bounce,
-+			 * because part of the reason for bouncing is so the
-+			 * data can't be modified (by userspace) while it's in
-+			 * flight.
-+			 */
-+			if (bch2_rechecksum_bio(c, src, version, op->crc,
-+					&crc, &op->crc,
-+					src_len >> 9,
-+					bio_sectors(src) - (src_len >> 9),
-+					op->csum_type))
-+				goto csum_err;
-+		} else {
-+			if ((op->flags & BCH_WRITE_DATA_ENCODED) &&
-+			    bch2_rechecksum_bio(c, src, version, op->crc,
-+					NULL, &op->crc,
-+					src_len >> 9,
-+					bio_sectors(src) - (src_len >> 9),
-+					op->crc.csum_type))
-+				goto csum_err;
-+
-+			crc.compressed_size	= dst_len >> 9;
-+			crc.uncompressed_size	= src_len >> 9;
-+			crc.live_size		= src_len >> 9;
-+
-+			swap(dst->bi_iter.bi_size, dst_len);
-+			bch2_encrypt_bio(c, op->csum_type,
-+					 extent_nonce(version, crc), dst);
-+			crc.csum = bch2_checksum_bio(c, op->csum_type,
-+					 extent_nonce(version, crc), dst);
-+			crc.csum_type = op->csum_type;
-+			swap(dst->bi_iter.bi_size, dst_len);
-+		}
-+
-+		init_append_extent(op, wp, version, crc);
-+
-+		if (dst != src)
-+			bio_advance(dst, dst_len);
-+		bio_advance(src, src_len);
-+		total_output	+= dst_len;
-+		total_input	+= src_len;
-+	} while (dst->bi_iter.bi_size &&
-+		 src->bi_iter.bi_size &&
-+		 wp->sectors_free &&
-+		 !bch2_keylist_realloc(&op->insert_keys,
-+				      op->inline_keys,
-+				      ARRAY_SIZE(op->inline_keys),
-+				      BKEY_EXTENT_U64s_MAX));
-+
-+	more = src->bi_iter.bi_size != 0;
-+
-+	dst->bi_iter = saved_iter;
-+
-+	if (dst == src && more) {
-+		BUG_ON(total_output != total_input);
-+
-+		dst = bio_split(src, total_input >> 9,
-+				GFP_NOIO, &c->bio_write);
-+		wbio_init(dst)->put_bio	= true;
-+		/* copy WRITE_SYNC flag */
-+		dst->bi_opf		= src->bi_opf;
-+	}
-+
-+	dst->bi_iter.bi_size = total_output;
-+do_write:
-+	/* might have done a realloc... */
-+	bch2_ec_add_backpointer(c, wp, ec_pos, total_input >> 9);
-+
-+	*_dst = dst;
-+	return more;
-+csum_err:
-+	bch_err(c, "error verifying existing checksum while "
-+		"rewriting existing data (memory corruption?)");
-+	ret = -EIO;
-+err:
-+	if (to_wbio(dst)->bounce)
-+		bch2_bio_free_pages_pool(c, dst);
-+	if (to_wbio(dst)->put_bio)
-+		bio_put(dst);
-+
-+	return ret;
-+}
-+
-+static void __bch2_write(struct closure *cl)
-+{
-+	struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
-+	struct bch_fs *c = op->c;
-+	struct write_point *wp;
-+	struct bio *bio;
-+	bool skip_put = true;
-+	unsigned nofs_flags;
-+	int ret;
-+
-+	nofs_flags = memalloc_nofs_save();
-+again:
-+	memset(&op->failed, 0, sizeof(op->failed));
-+
-+	do {
-+		struct bkey_i *key_to_write;
-+		unsigned key_to_write_offset = op->insert_keys.top_p -
-+			op->insert_keys.keys_p;
-+
-+		/* +1 for possible cache device: */
-+		if (op->open_buckets.nr + op->nr_replicas + 1 >
-+		    ARRAY_SIZE(op->open_buckets.v))
-+			goto flush_io;
-+
-+		if (bch2_keylist_realloc(&op->insert_keys,
-+					op->inline_keys,
-+					ARRAY_SIZE(op->inline_keys),
-+					BKEY_EXTENT_U64s_MAX))
-+			goto flush_io;
-+
-+		if ((op->flags & BCH_WRITE_FROM_INTERNAL) &&
-+		    percpu_ref_is_dying(&c->writes)) {
-+			ret = -EROFS;
-+			goto err;
-+		}
-+
-+		/*
-+		 * The copygc thread is now global, which means it's no longer
-+		 * freeing up space on specific disks, which means that
-+		 * allocations for specific disks may hang arbitrarily long:
-+		 */
-+		wp = bch2_alloc_sectors_start(c,
-+			op->target,
-+			op->opts.erasure_code,
-+			op->write_point,
-+			&op->devs_have,
-+			op->nr_replicas,
-+			op->nr_replicas_required,
-+			op->alloc_reserve,
-+			op->flags,
-+			(op->flags & (BCH_WRITE_ALLOC_NOWAIT|
-+				      BCH_WRITE_ONLY_SPECIFIED_DEVS)) ? NULL : cl);
-+		EBUG_ON(!wp);
-+
-+		if (unlikely(IS_ERR(wp))) {
-+			if (unlikely(PTR_ERR(wp) != -EAGAIN)) {
-+				ret = PTR_ERR(wp);
-+				goto err;
-+			}
-+
-+			goto flush_io;
-+		}
-+
-+		/*
-+		 * It's possible for the allocator to fail, put us on the
-+		 * freelist waitlist, and then succeed in one of various retry
-+		 * paths: if that happens, we need to disable the skip_put
-+		 * optimization because otherwise there won't necessarily be a
-+		 * barrier before we free the bch_write_op:
-+		 */
-+		if (atomic_read(&cl->remaining) & CLOSURE_WAITING)
-+			skip_put = false;
-+
-+		bch2_open_bucket_get(c, wp, &op->open_buckets);
-+		ret = bch2_write_extent(op, wp, &bio);
-+		bch2_alloc_sectors_done(c, wp);
-+
-+		if (ret < 0)
-+			goto err;
-+
-+		if (ret) {
-+			skip_put = false;
-+		} else {
-+			/*
-+			 * for the skip_put optimization this has to be set
-+			 * before we submit the bio:
-+			 */
-+			op->flags |= BCH_WRITE_DONE;
-+		}
-+
-+		bio->bi_end_io	= bch2_write_endio;
-+		bio->bi_private	= &op->cl;
-+		bio->bi_opf |= REQ_OP_WRITE;
-+
-+		if (!skip_put)
-+			closure_get(bio->bi_private);
-+		else
-+			op->flags |= BCH_WRITE_SKIP_CLOSURE_PUT;
-+
-+		key_to_write = (void *) (op->insert_keys.keys_p +
-+					 key_to_write_offset);
-+
-+		bch2_submit_wbio_replicas(to_wbio(bio), c, BCH_DATA_user,
-+					  key_to_write);
-+	} while (ret);
-+
-+	if (!skip_put)
-+		continue_at(cl, bch2_write_index, index_update_wq(op));
-+out:
-+	memalloc_nofs_restore(nofs_flags);
-+	return;
-+err:
-+	op->error = ret;
-+	op->flags |= BCH_WRITE_DONE;
-+
-+	continue_at(cl, bch2_write_index, index_update_wq(op));
-+	goto out;
-+flush_io:
-+	/*
-+	 * If the write can't all be submitted at once, we generally want to
-+	 * block synchronously as that signals backpressure to the caller.
-+	 *
-+	 * However, if we're running out of a workqueue, we can't block here
-+	 * because we'll be blocking other work items from completing:
-+	 */
-+	if (current->flags & PF_WQ_WORKER) {
-+		continue_at(cl, bch2_write_index, index_update_wq(op));
-+		goto out;
-+	}
-+
-+	closure_sync(cl);
-+
-+	if (!bch2_keylist_empty(&op->insert_keys)) {
-+		__bch2_write_index(op);
-+
-+		if (op->error) {
-+			op->flags |= BCH_WRITE_DONE;
-+			continue_at_nobarrier(cl, bch2_write_done, NULL);
-+			goto out;
-+		}
-+	}
-+
-+	goto again;
-+}
-+
-+static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len)
-+{
-+	struct closure *cl = &op->cl;
-+	struct bio *bio = &op->wbio.bio;
-+	struct bvec_iter iter;
-+	struct bkey_i_inline_data *id;
-+	unsigned sectors;
-+	int ret;
-+
-+	bch2_check_set_feature(op->c, BCH_FEATURE_inline_data);
-+
-+	ret = bch2_keylist_realloc(&op->insert_keys, op->inline_keys,
-+				   ARRAY_SIZE(op->inline_keys),
-+				   BKEY_U64s + DIV_ROUND_UP(data_len, 8));
-+	if (ret) {
-+		op->error = ret;
-+		goto err;
-+	}
-+
-+	sectors = bio_sectors(bio);
-+	op->pos.offset += sectors;
-+
-+	id = bkey_inline_data_init(op->insert_keys.top);
-+	id->k.p		= op->pos;
-+	id->k.version	= op->version;
-+	id->k.size	= sectors;
-+
-+	iter = bio->bi_iter;
-+	iter.bi_size = data_len;
-+	memcpy_from_bio(id->v.data, bio, iter);
-+
-+	while (data_len & 7)
-+		id->v.data[data_len++] = '\0';
-+	set_bkey_val_bytes(&id->k, data_len);
-+	bch2_keylist_push(&op->insert_keys);
-+
-+	op->flags |= BCH_WRITE_WROTE_DATA_INLINE;
-+	op->flags |= BCH_WRITE_DONE;
-+
-+	continue_at_nobarrier(cl, bch2_write_index, NULL);
-+	return;
-+err:
-+	bch2_write_done(&op->cl);
-+}
-+
-+/**
-+ * bch_write - handle a write to a cache device or flash only volume
-+ *
-+ * This is the starting point for any data to end up in a cache device; it could
-+ * be from a normal write, or a writeback write, or a write to a flash only
-+ * volume - it's also used by the moving garbage collector to compact data in
-+ * mostly empty buckets.
-+ *
-+ * It first writes the data to the cache, creating a list of keys to be inserted
-+ * (if the data won't fit in a single open bucket, there will be multiple keys);
-+ * after the data is written it calls bch_journal, and after the keys have been
-+ * added to the next journal write they're inserted into the btree.
-+ *
-+ * If op->discard is true, instead of inserting the data it invalidates the
-+ * region of the cache represented by op->bio and op->inode.
-+ */
-+void bch2_write(struct closure *cl)
-+{
-+	struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
-+	struct bio *bio = &op->wbio.bio;
-+	struct bch_fs *c = op->c;
-+	unsigned data_len;
-+
-+	BUG_ON(!op->nr_replicas);
-+	BUG_ON(!op->write_point.v);
-+	BUG_ON(!bkey_cmp(op->pos, POS_MAX));
-+
-+	op->start_time = local_clock();
-+	bch2_keylist_init(&op->insert_keys, op->inline_keys);
-+	wbio_init(bio)->put_bio = false;
-+
-+	if (bio_sectors(bio) & (c->opts.block_size - 1)) {
-+		__bcache_io_error(c, "misaligned write");
-+		op->error = -EIO;
-+		goto err;
-+	}
-+
-+	if (c->opts.nochanges ||
-+	    !percpu_ref_tryget(&c->writes)) {
-+		if (!(op->flags & BCH_WRITE_FROM_INTERNAL))
-+			__bcache_io_error(c, "read only");
-+		op->error = -EROFS;
-+		goto err;
-+	}
-+
-+	/*
-+	 * Can't ratelimit copygc - we'd deadlock:
-+	 */
-+	if (!(op->flags & BCH_WRITE_FROM_INTERNAL))
-+		down(&c->io_in_flight);
-+
-+	bch2_increment_clock(c, bio_sectors(bio), WRITE);
-+
-+	data_len = min_t(u64, bio->bi_iter.bi_size,
-+			 op->new_i_size - (op->pos.offset << 9));
-+
-+	if (c->opts.inline_data &&
-+	    data_len <= min(block_bytes(c) / 2, 1024U)) {
-+		bch2_write_data_inline(op, data_len);
-+		return;
-+	}
-+
-+	continue_at_nobarrier(cl, __bch2_write, NULL);
-+	return;
-+err:
-+	bch2_disk_reservation_put(c, &op->res);
-+
-+	if (op->end_io) {
-+		EBUG_ON(cl->parent);
-+		closure_debug_destroy(cl);
-+		op->end_io(op);
-+	} else {
-+		closure_return(cl);
-+	}
-+}
-+
-+/* Cache promotion on read */
-+
-+struct promote_op {
-+	struct closure		cl;
-+	struct rcu_head		rcu;
-+	u64			start_time;
-+
-+	struct rhash_head	hash;
-+	struct bpos		pos;
-+
-+	struct migrate_write	write;
-+	struct bio_vec		bi_inline_vecs[0]; /* must be last */
-+};
-+
-+static const struct rhashtable_params bch_promote_params = {
-+	.head_offset	= offsetof(struct promote_op, hash),
-+	.key_offset	= offsetof(struct promote_op, pos),
-+	.key_len	= sizeof(struct bpos),
-+};
-+
-+static inline bool should_promote(struct bch_fs *c, struct bkey_s_c k,
-+				  struct bpos pos,
-+				  struct bch_io_opts opts,
-+				  unsigned flags)
-+{
-+	if (!(flags & BCH_READ_MAY_PROMOTE))
-+		return false;
-+
-+	if (!opts.promote_target)
-+		return false;
-+
-+	if (bch2_bkey_has_target(c, k, opts.promote_target))
-+		return false;
-+
-+	if (bch2_target_congested(c, opts.promote_target)) {
-+		/* XXX trace this */
-+		return false;
-+	}
-+
-+	if (rhashtable_lookup_fast(&c->promote_table, &pos,
-+				   bch_promote_params))
-+		return false;
-+
-+	return true;
-+}
-+
-+static void promote_free(struct bch_fs *c, struct promote_op *op)
-+{
-+	int ret;
-+
-+	ret = rhashtable_remove_fast(&c->promote_table, &op->hash,
-+				     bch_promote_params);
-+	BUG_ON(ret);
-+	percpu_ref_put(&c->writes);
-+	kfree_rcu(op, rcu);
-+}
-+
-+static void promote_done(struct closure *cl)
-+{
-+	struct promote_op *op =
-+		container_of(cl, struct promote_op, cl);
-+	struct bch_fs *c = op->write.op.c;
-+
-+	bch2_time_stats_update(&c->times[BCH_TIME_data_promote],
-+			       op->start_time);
-+
-+	bch2_bio_free_pages_pool(c, &op->write.op.wbio.bio);
-+	promote_free(c, op);
-+}
-+
-+static void promote_start(struct promote_op *op, struct bch_read_bio *rbio)
-+{
-+	struct bch_fs *c = rbio->c;
-+	struct closure *cl = &op->cl;
-+	struct bio *bio = &op->write.op.wbio.bio;
-+
-+	trace_promote(&rbio->bio);
-+
-+	/* we now own pages: */
-+	BUG_ON(!rbio->bounce);
-+	BUG_ON(rbio->bio.bi_vcnt > bio->bi_max_vecs);
-+
-+	memcpy(bio->bi_io_vec, rbio->bio.bi_io_vec,
-+	       sizeof(struct bio_vec) * rbio->bio.bi_vcnt);
-+	swap(bio->bi_vcnt, rbio->bio.bi_vcnt);
-+
-+	bch2_migrate_read_done(&op->write, rbio);
-+
-+	closure_init(cl, NULL);
-+	closure_call(&op->write.op.cl, bch2_write, c->wq, cl);
-+	closure_return_with_destructor(cl, promote_done);
-+}
-+
-+static struct promote_op *__promote_alloc(struct bch_fs *c,
-+					  enum btree_id btree_id,
-+					  struct bkey_s_c k,
-+					  struct bpos pos,
-+					  struct extent_ptr_decoded *pick,
-+					  struct bch_io_opts opts,
-+					  unsigned sectors,
-+					  struct bch_read_bio **rbio)
-+{
-+	struct promote_op *op = NULL;
-+	struct bio *bio;
-+	unsigned pages = DIV_ROUND_UP(sectors, PAGE_SECTORS);
-+	int ret;
-+
-+	if (!percpu_ref_tryget(&c->writes))
-+		return NULL;
-+
-+	op = kzalloc(sizeof(*op) + sizeof(struct bio_vec) * pages, GFP_NOIO);
-+	if (!op)
-+		goto err;
-+
-+	op->start_time = local_clock();
-+	op->pos = pos;
-+
-+	/*
-+	 * We don't use the mempool here because extents that aren't
-+	 * checksummed or compressed can be too big for the mempool:
-+	 */
-+	*rbio = kzalloc(sizeof(struct bch_read_bio) +
-+			sizeof(struct bio_vec) * pages,
-+			GFP_NOIO);
-+	if (!*rbio)
-+		goto err;
-+
-+	rbio_init(&(*rbio)->bio, opts);
-+	bio_init(&(*rbio)->bio, (*rbio)->bio.bi_inline_vecs, pages);
-+
-+	if (bch2_bio_alloc_pages(&(*rbio)->bio, sectors << 9,
-+				 GFP_NOIO))
-+		goto err;
-+
-+	(*rbio)->bounce		= true;
-+	(*rbio)->split		= true;
-+	(*rbio)->kmalloc	= true;
-+
-+	if (rhashtable_lookup_insert_fast(&c->promote_table, &op->hash,
-+					  bch_promote_params))
-+		goto err;
-+
-+	bio = &op->write.op.wbio.bio;
-+	bio_init(bio, bio->bi_inline_vecs, pages);
-+
-+	ret = bch2_migrate_write_init(c, &op->write,
-+			writepoint_hashed((unsigned long) current),
-+			opts,
-+			DATA_PROMOTE,
-+			(struct data_opts) {
-+				.target = opts.promote_target
-+			},
-+			btree_id, k);
-+	BUG_ON(ret);
-+
-+	return op;
-+err:
-+	if (*rbio)
-+		bio_free_pages(&(*rbio)->bio);
-+	kfree(*rbio);
-+	*rbio = NULL;
-+	kfree(op);
-+	percpu_ref_put(&c->writes);
-+	return NULL;
-+}
-+
-+noinline
-+static struct promote_op *promote_alloc(struct bch_fs *c,
-+					       struct bvec_iter iter,
-+					       struct bkey_s_c k,
-+					       struct extent_ptr_decoded *pick,
-+					       struct bch_io_opts opts,
-+					       unsigned flags,
-+					       struct bch_read_bio **rbio,
-+					       bool *bounce,
-+					       bool *read_full)
-+{
-+	bool promote_full = *read_full || READ_ONCE(c->promote_whole_extents);
-+	/* data might have to be decompressed in the write path: */
-+	unsigned sectors = promote_full
-+		? max(pick->crc.compressed_size, pick->crc.live_size)
-+		: bvec_iter_sectors(iter);
-+	struct bpos pos = promote_full
-+		? bkey_start_pos(k.k)
-+		: POS(k.k->p.inode, iter.bi_sector);
-+	struct promote_op *promote;
-+
-+	if (!should_promote(c, k, pos, opts, flags))
-+		return NULL;
-+
-+	promote = __promote_alloc(c,
-+				  k.k->type == KEY_TYPE_reflink_v
-+				  ? BTREE_ID_REFLINK
-+				  : BTREE_ID_EXTENTS,
-+				  k, pos, pick, opts, sectors, rbio);
-+	if (!promote)
-+		return NULL;
-+
-+	*bounce		= true;
-+	*read_full	= promote_full;
-+	return promote;
-+}
-+
-+/* Read */
-+
-+#define READ_RETRY_AVOID	1
-+#define READ_RETRY		2
-+#define READ_ERR		3
-+
-+enum rbio_context {
-+	RBIO_CONTEXT_NULL,
-+	RBIO_CONTEXT_HIGHPRI,
-+	RBIO_CONTEXT_UNBOUND,
-+};
-+
-+static inline struct bch_read_bio *
-+bch2_rbio_parent(struct bch_read_bio *rbio)
-+{
-+	return rbio->split ? rbio->parent : rbio;
-+}
-+
-+__always_inline
-+static void bch2_rbio_punt(struct bch_read_bio *rbio, work_func_t fn,
-+			   enum rbio_context context,
-+			   struct workqueue_struct *wq)
-+{
-+	if (context <= rbio->context) {
-+		fn(&rbio->work);
-+	} else {
-+		rbio->work.func		= fn;
-+		rbio->context		= context;
-+		queue_work(wq, &rbio->work);
-+	}
-+}
-+
-+static inline struct bch_read_bio *bch2_rbio_free(struct bch_read_bio *rbio)
-+{
-+	BUG_ON(rbio->bounce && !rbio->split);
-+
-+	if (rbio->promote)
-+		promote_free(rbio->c, rbio->promote);
-+	rbio->promote = NULL;
-+
-+	if (rbio->bounce)
-+		bch2_bio_free_pages_pool(rbio->c, &rbio->bio);
-+
-+	if (rbio->split) {
-+		struct bch_read_bio *parent = rbio->parent;
-+
-+		if (rbio->kmalloc)
-+			kfree(rbio);
-+		else
-+			bio_put(&rbio->bio);
-+
-+		rbio = parent;
-+	}
-+
-+	return rbio;
-+}
-+
-+/*
-+ * Only called on a top level bch_read_bio to complete an entire read request,
-+ * not a split:
-+ */
-+static void bch2_rbio_done(struct bch_read_bio *rbio)
-+{
-+	if (rbio->start_time)
-+		bch2_time_stats_update(&rbio->c->times[BCH_TIME_data_read],
-+				       rbio->start_time);
-+	bio_endio(&rbio->bio);
-+}
-+
-+static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio,
-+				     struct bvec_iter bvec_iter, u64 inode,
-+				     struct bch_io_failures *failed,
-+				     unsigned flags)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_on_stack sk;
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	flags &= ~BCH_READ_LAST_FRAGMENT;
-+	flags |= BCH_READ_MUST_CLONE;
-+
-+	bkey_on_stack_init(&sk);
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
-+				   rbio->pos, BTREE_ITER_SLOTS);
-+retry:
-+	rbio->bio.bi_status = 0;
-+
-+	k = bch2_btree_iter_peek_slot(iter);
-+	if (bkey_err(k))
-+		goto err;
-+
-+	bkey_on_stack_reassemble(&sk, c, k);
-+	k = bkey_i_to_s_c(sk.k);
-+	bch2_trans_unlock(&trans);
-+
-+	if (!bch2_bkey_matches_ptr(c, k,
-+				   rbio->pick.ptr,
-+				   rbio->pos.offset -
-+				   rbio->pick.crc.offset)) {
-+		/* extent we wanted to read no longer exists: */
-+		rbio->hole = true;
-+		goto out;
-+	}
-+
-+	ret = __bch2_read_extent(c, rbio, bvec_iter, k, 0, failed, flags);
-+	if (ret == READ_RETRY)
-+		goto retry;
-+	if (ret)
-+		goto err;
-+out:
-+	bch2_rbio_done(rbio);
-+	bch2_trans_exit(&trans);
-+	bkey_on_stack_exit(&sk, c);
-+	return;
-+err:
-+	rbio->bio.bi_status = BLK_STS_IOERR;
-+	goto out;
-+}
-+
-+static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio,
-+			    struct bvec_iter bvec_iter, u64 inode,
-+			    struct bch_io_failures *failed, unsigned flags)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_on_stack sk;
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	flags &= ~BCH_READ_LAST_FRAGMENT;
-+	flags |= BCH_READ_MUST_CLONE;
-+
-+	bkey_on_stack_init(&sk);
-+	bch2_trans_init(&trans, c, 0, 0);
-+retry:
-+	bch2_trans_begin(&trans);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
-+			   POS(inode, bvec_iter.bi_sector),
-+			   BTREE_ITER_SLOTS, k, ret) {
-+		unsigned bytes, sectors, offset_into_extent;
-+
-+		bkey_on_stack_reassemble(&sk, c, k);
-+		k = bkey_i_to_s_c(sk.k);
-+
-+		offset_into_extent = iter->pos.offset -
-+			bkey_start_offset(k.k);
-+		sectors = k.k->size - offset_into_extent;
-+
-+		ret = bch2_read_indirect_extent(&trans,
-+					&offset_into_extent, &sk);
-+		if (ret)
-+			break;
-+
-+		sectors = min(sectors, k.k->size - offset_into_extent);
-+
-+		bch2_trans_unlock(&trans);
-+
-+		bytes = min(sectors, bvec_iter_sectors(bvec_iter)) << 9;
-+		swap(bvec_iter.bi_size, bytes);
-+
-+		ret = __bch2_read_extent(c, rbio, bvec_iter, k,
-+				offset_into_extent, failed, flags);
-+		switch (ret) {
-+		case READ_RETRY:
-+			goto retry;
-+		case READ_ERR:
-+			goto err;
-+		};
-+
-+		if (bytes == bvec_iter.bi_size)
-+			goto out;
-+
-+		swap(bvec_iter.bi_size, bytes);
-+		bio_advance_iter(&rbio->bio, &bvec_iter, bytes);
-+	}
-+
-+	if (ret == -EINTR)
-+		goto retry;
-+	/*
-+	 * If we get here, it better have been because there was an error
-+	 * reading a btree node
-+	 */
-+	BUG_ON(!ret);
-+	__bcache_io_error(c, "btree IO error: %i", ret);
-+err:
-+	rbio->bio.bi_status = BLK_STS_IOERR;
-+out:
-+	bch2_trans_exit(&trans);
-+	bkey_on_stack_exit(&sk, c);
-+	bch2_rbio_done(rbio);
-+}
-+
-+static void bch2_rbio_retry(struct work_struct *work)
-+{
-+	struct bch_read_bio *rbio =
-+		container_of(work, struct bch_read_bio, work);
-+	struct bch_fs *c	= rbio->c;
-+	struct bvec_iter iter	= rbio->bvec_iter;
-+	unsigned flags		= rbio->flags;
-+	u64 inode		= rbio->pos.inode;
-+	struct bch_io_failures failed = { .nr = 0 };
-+
-+	trace_read_retry(&rbio->bio);
-+
-+	if (rbio->retry == READ_RETRY_AVOID)
-+		bch2_mark_io_failure(&failed, &rbio->pick);
-+
-+	rbio->bio.bi_status = 0;
-+
-+	rbio = bch2_rbio_free(rbio);
-+
-+	flags |= BCH_READ_IN_RETRY;
-+	flags &= ~BCH_READ_MAY_PROMOTE;
-+
-+	if (flags & BCH_READ_NODECODE)
-+		bch2_read_retry_nodecode(c, rbio, iter, inode, &failed, flags);
-+	else
-+		bch2_read_retry(c, rbio, iter, inode, &failed, flags);
-+}
-+
-+static void bch2_rbio_error(struct bch_read_bio *rbio, int retry,
-+			    blk_status_t error)
-+{
-+	rbio->retry = retry;
-+
-+	if (rbio->flags & BCH_READ_IN_RETRY)
-+		return;
-+
-+	if (retry == READ_ERR) {
-+		rbio = bch2_rbio_free(rbio);
-+
-+		rbio->bio.bi_status = error;
-+		bch2_rbio_done(rbio);
-+	} else {
-+		bch2_rbio_punt(rbio, bch2_rbio_retry,
-+			       RBIO_CONTEXT_UNBOUND, system_unbound_wq);
-+	}
-+}
-+
-+static int __bch2_rbio_narrow_crcs(struct btree_trans *trans,
-+				   struct bch_read_bio *rbio)
-+{
-+	struct bch_fs *c = rbio->c;
-+	u64 data_offset = rbio->pos.offset - rbio->pick.crc.offset;
-+	struct bch_extent_crc_unpacked new_crc;
-+	struct btree_iter *iter = NULL;
-+	struct bkey_i *new;
-+	struct bkey_s_c k;
-+	int ret = 0;
-+
-+	if (crc_is_compressed(rbio->pick.crc))
-+		return 0;
-+
-+	iter = bch2_trans_get_iter(trans, BTREE_ID_EXTENTS, rbio->pos,
-+				   BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
-+	if ((ret = PTR_ERR_OR_ZERO(iter)))
-+		goto out;
-+
-+	k = bch2_btree_iter_peek_slot(iter);
-+	if ((ret = bkey_err(k)))
-+		goto out;
-+
-+	/*
-+	 * going to be temporarily appending another checksum entry:
-+	 */
-+	new = bch2_trans_kmalloc(trans, bkey_bytes(k.k) +
-+				 BKEY_EXTENT_U64s_MAX * 8);
-+	if ((ret = PTR_ERR_OR_ZERO(new)))
-+		goto out;
-+
-+	bkey_reassemble(new, k);
-+	k = bkey_i_to_s_c(new);
-+
-+	if (bversion_cmp(k.k->version, rbio->version) ||
-+	    !bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, data_offset))
-+		goto out;
-+
-+	/* Extent was merged? */
-+	if (bkey_start_offset(k.k) < data_offset ||
-+	    k.k->p.offset > data_offset + rbio->pick.crc.uncompressed_size)
-+		goto out;
-+
-+	if (bch2_rechecksum_bio(c, &rbio->bio, rbio->version,
-+			rbio->pick.crc, NULL, &new_crc,
-+			bkey_start_offset(k.k) - data_offset, k.k->size,
-+			rbio->pick.crc.csum_type)) {
-+		bch_err(c, "error verifying existing checksum while narrowing checksum (memory corruption?)");
-+		ret = 0;
-+		goto out;
-+	}
-+
-+	if (!bch2_bkey_narrow_crcs(new, new_crc))
-+		goto out;
-+
-+	bch2_trans_update(trans, iter, new, 0);
-+out:
-+	bch2_trans_iter_put(trans, iter);
-+	return ret;
-+}
-+
-+static noinline void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio)
-+{
-+	bch2_trans_do(rbio->c, NULL, NULL, BTREE_INSERT_NOFAIL,
-+		      __bch2_rbio_narrow_crcs(&trans, rbio));
-+}
-+
-+/* Inner part that may run in process context */
-+static void __bch2_read_endio(struct work_struct *work)
-+{
-+	struct bch_read_bio *rbio =
-+		container_of(work, struct bch_read_bio, work);
-+	struct bch_fs *c	= rbio->c;
-+	struct bch_dev *ca	= bch_dev_bkey_exists(c, rbio->pick.ptr.dev);
-+	struct bio *src		= &rbio->bio;
-+	struct bio *dst		= &bch2_rbio_parent(rbio)->bio;
-+	struct bvec_iter dst_iter = rbio->bvec_iter;
-+	struct bch_extent_crc_unpacked crc = rbio->pick.crc;
-+	struct nonce nonce = extent_nonce(rbio->version, crc);
-+	struct bch_csum csum;
-+
-+	/* Reset iterator for checksumming and copying bounced data: */
-+	if (rbio->bounce) {
-+		src->bi_iter.bi_size		= crc.compressed_size << 9;
-+		src->bi_iter.bi_idx		= 0;
-+		src->bi_iter.bi_bvec_done	= 0;
-+	} else {
-+		src->bi_iter			= rbio->bvec_iter;
-+	}
-+
-+	csum = bch2_checksum_bio(c, crc.csum_type, nonce, src);
-+	if (bch2_crc_cmp(csum, rbio->pick.crc.csum))
-+		goto csum_err;
-+
-+	if (unlikely(rbio->narrow_crcs))
-+		bch2_rbio_narrow_crcs(rbio);
-+
-+	if (rbio->flags & BCH_READ_NODECODE)
-+		goto nodecode;
-+
-+	/* Adjust crc to point to subset of data we want: */
-+	crc.offset     += rbio->offset_into_extent;
-+	crc.live_size	= bvec_iter_sectors(rbio->bvec_iter);
-+
-+	if (crc_is_compressed(crc)) {
-+		bch2_encrypt_bio(c, crc.csum_type, nonce, src);
-+		if (bch2_bio_uncompress(c, src, dst, dst_iter, crc))
-+			goto decompression_err;
-+	} else {
-+		/* don't need to decrypt the entire bio: */
-+		nonce = nonce_add(nonce, crc.offset << 9);
-+		bio_advance(src, crc.offset << 9);
-+
-+		BUG_ON(src->bi_iter.bi_size < dst_iter.bi_size);
-+		src->bi_iter.bi_size = dst_iter.bi_size;
-+
-+		bch2_encrypt_bio(c, crc.csum_type, nonce, src);
-+
-+		if (rbio->bounce) {
-+			struct bvec_iter src_iter = src->bi_iter;
-+			bio_copy_data_iter(dst, &dst_iter, src, &src_iter);
-+		}
-+	}
-+
-+	if (rbio->promote) {
-+		/*
-+		 * Re encrypt data we decrypted, so it's consistent with
-+		 * rbio->crc:
-+		 */
-+		bch2_encrypt_bio(c, crc.csum_type, nonce, src);
-+		promote_start(rbio->promote, rbio);
-+		rbio->promote = NULL;
-+	}
-+nodecode:
-+	if (likely(!(rbio->flags & BCH_READ_IN_RETRY))) {
-+		rbio = bch2_rbio_free(rbio);
-+		bch2_rbio_done(rbio);
-+	}
-+	return;
-+csum_err:
-+	/*
-+	 * Checksum error: if the bio wasn't bounced, we may have been
-+	 * reading into buffers owned by userspace (that userspace can
-+	 * scribble over) - retry the read, bouncing it this time:
-+	 */
-+	if (!rbio->bounce && (rbio->flags & BCH_READ_USER_MAPPED)) {
-+		rbio->flags |= BCH_READ_MUST_BOUNCE;
-+		bch2_rbio_error(rbio, READ_RETRY, BLK_STS_IOERR);
-+		return;
-+	}
-+
-+	bch2_dev_io_error(ca,
-+		"data checksum error, inode %llu offset %llu: expected %0llx:%0llx got %0llx:%0llx (type %u)",
-+		rbio->pos.inode, (u64) rbio->bvec_iter.bi_sector,
-+		rbio->pick.crc.csum.hi, rbio->pick.crc.csum.lo,
-+		csum.hi, csum.lo, crc.csum_type);
-+	bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
-+	return;
-+decompression_err:
-+	__bcache_io_error(c, "decompression error, inode %llu offset %llu",
-+			  rbio->pos.inode,
-+			  (u64) rbio->bvec_iter.bi_sector);
-+	bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR);
-+	return;
-+}
-+
-+static void bch2_read_endio(struct bio *bio)
-+{
-+	struct bch_read_bio *rbio =
-+		container_of(bio, struct bch_read_bio, bio);
-+	struct bch_fs *c	= rbio->c;
-+	struct bch_dev *ca	= bch_dev_bkey_exists(c, rbio->pick.ptr.dev);
-+	struct workqueue_struct *wq = NULL;
-+	enum rbio_context context = RBIO_CONTEXT_NULL;
-+
-+	if (rbio->have_ioref) {
-+		bch2_latency_acct(ca, rbio->submit_time, READ);
-+		percpu_ref_put(&ca->io_ref);
-+	}
-+
-+	if (!rbio->split)
-+		rbio->bio.bi_end_io = rbio->end_io;
-+
-+	if (bch2_dev_io_err_on(bio->bi_status, ca, "data read; %s",
-+			       bch2_blk_status_to_str(bio->bi_status))) {
-+		bch2_rbio_error(rbio, READ_RETRY_AVOID, bio->bi_status);
-+		return;
-+	}
-+
-+	if (rbio->pick.ptr.cached &&
-+	    (((rbio->flags & BCH_READ_RETRY_IF_STALE) && race_fault()) ||
-+	     ptr_stale(ca, &rbio->pick.ptr))) {
-+		atomic_long_inc(&c->read_realloc_races);
-+
-+		if (rbio->flags & BCH_READ_RETRY_IF_STALE)
-+			bch2_rbio_error(rbio, READ_RETRY, BLK_STS_AGAIN);
-+		else
-+			bch2_rbio_error(rbio, READ_ERR, BLK_STS_AGAIN);
-+		return;
-+	}
-+
-+	if (rbio->narrow_crcs ||
-+	    crc_is_compressed(rbio->pick.crc) ||
-+	    bch2_csum_type_is_encryption(rbio->pick.crc.csum_type))
-+		context = RBIO_CONTEXT_UNBOUND,	wq = system_unbound_wq;
-+	else if (rbio->pick.crc.csum_type)
-+		context = RBIO_CONTEXT_HIGHPRI,	wq = system_highpri_wq;
-+
-+	bch2_rbio_punt(rbio, __bch2_read_endio, context, wq);
-+}
-+
-+int __bch2_read_indirect_extent(struct btree_trans *trans,
-+				unsigned *offset_into_extent,
-+				struct bkey_on_stack *orig_k)
-+{
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	u64 reflink_offset;
-+	int ret;
-+
-+	reflink_offset = le64_to_cpu(bkey_i_to_reflink_p(orig_k->k)->v.idx) +
-+		*offset_into_extent;
-+
-+	iter = bch2_trans_get_iter(trans, BTREE_ID_REFLINK,
-+				   POS(0, reflink_offset),
-+				   BTREE_ITER_SLOTS);
-+	ret = PTR_ERR_OR_ZERO(iter);
-+	if (ret)
-+		return ret;
-+
-+	k = bch2_btree_iter_peek_slot(iter);
-+	ret = bkey_err(k);
-+	if (ret)
-+		goto err;
-+
-+	if (k.k->type != KEY_TYPE_reflink_v) {
-+		__bcache_io_error(trans->c,
-+				"pointer to nonexistent indirect extent");
-+		ret = -EIO;
-+		goto err;
-+	}
-+
-+	*offset_into_extent = iter->pos.offset - bkey_start_offset(k.k);
-+	bkey_on_stack_reassemble(orig_k, trans->c, k);
-+err:
-+	bch2_trans_iter_put(trans, iter);
-+	return ret;
-+}
-+
-+int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
-+		       struct bvec_iter iter, struct bkey_s_c k,
-+		       unsigned offset_into_extent,
-+		       struct bch_io_failures *failed, unsigned flags)
-+{
-+	struct extent_ptr_decoded pick;
-+	struct bch_read_bio *rbio = NULL;
-+	struct bch_dev *ca;
-+	struct promote_op *promote = NULL;
-+	bool bounce = false, read_full = false, narrow_crcs = false;
-+	struct bpos pos = bkey_start_pos(k.k);
-+	int pick_ret;
-+
-+	if (k.k->type == KEY_TYPE_inline_data) {
-+		struct bkey_s_c_inline_data d = bkey_s_c_to_inline_data(k);
-+		unsigned bytes = min_t(unsigned, iter.bi_size,
-+				       bkey_val_bytes(d.k));
-+
-+		swap(iter.bi_size, bytes);
-+		memcpy_to_bio(&orig->bio, iter, d.v->data);
-+		swap(iter.bi_size, bytes);
-+		bio_advance_iter(&orig->bio, &iter, bytes);
-+		zero_fill_bio_iter(&orig->bio, iter);
-+		goto out_read_done;
-+	}
-+
-+	pick_ret = bch2_bkey_pick_read_device(c, k, failed, &pick);
-+
-+	/* hole or reservation - just zero fill: */
-+	if (!pick_ret)
-+		goto hole;
-+
-+	if (pick_ret < 0) {
-+		__bcache_io_error(c, "no device to read from");
-+		goto err;
-+	}
-+
-+	if (pick_ret > 0)
-+		ca = bch_dev_bkey_exists(c, pick.ptr.dev);
-+
-+	if (flags & BCH_READ_NODECODE) {
-+		/*
-+		 * can happen if we retry, and the extent we were going to read
-+		 * has been merged in the meantime:
-+		 */
-+		if (pick.crc.compressed_size > orig->bio.bi_vcnt * PAGE_SECTORS)
-+			goto hole;
-+
-+		iter.bi_size	= pick.crc.compressed_size << 9;
-+		goto get_bio;
-+	}
-+
-+	if (!(flags & BCH_READ_LAST_FRAGMENT) ||
-+	    bio_flagged(&orig->bio, BIO_CHAIN))
-+		flags |= BCH_READ_MUST_CLONE;
-+
-+	narrow_crcs = !(flags & BCH_READ_IN_RETRY) &&
-+		bch2_can_narrow_extent_crcs(k, pick.crc);
-+
-+	if (narrow_crcs && (flags & BCH_READ_USER_MAPPED))
-+		flags |= BCH_READ_MUST_BOUNCE;
-+
-+	EBUG_ON(offset_into_extent + bvec_iter_sectors(iter) > k.k->size);
-+
-+	if (crc_is_compressed(pick.crc) ||
-+	    (pick.crc.csum_type != BCH_CSUM_NONE &&
-+	     (bvec_iter_sectors(iter) != pick.crc.uncompressed_size ||
-+	      (bch2_csum_type_is_encryption(pick.crc.csum_type) &&
-+	       (flags & BCH_READ_USER_MAPPED)) ||
-+	      (flags & BCH_READ_MUST_BOUNCE)))) {
-+		read_full = true;
-+		bounce = true;
-+	}
-+
-+	if (orig->opts.promote_target)
-+		promote = promote_alloc(c, iter, k, &pick, orig->opts, flags,
-+					&rbio, &bounce, &read_full);
-+
-+	if (!read_full) {
-+		EBUG_ON(crc_is_compressed(pick.crc));
-+		EBUG_ON(pick.crc.csum_type &&
-+			(bvec_iter_sectors(iter) != pick.crc.uncompressed_size ||
-+			 bvec_iter_sectors(iter) != pick.crc.live_size ||
-+			 pick.crc.offset ||
-+			 offset_into_extent));
-+
-+		pos.offset += offset_into_extent;
-+		pick.ptr.offset += pick.crc.offset +
-+			offset_into_extent;
-+		offset_into_extent		= 0;
-+		pick.crc.compressed_size	= bvec_iter_sectors(iter);
-+		pick.crc.uncompressed_size	= bvec_iter_sectors(iter);
-+		pick.crc.offset			= 0;
-+		pick.crc.live_size		= bvec_iter_sectors(iter);
-+		offset_into_extent		= 0;
-+	}
-+get_bio:
-+	if (rbio) {
-+		/*
-+		 * promote already allocated bounce rbio:
-+		 * promote needs to allocate a bio big enough for uncompressing
-+		 * data in the write path, but we're not going to use it all
-+		 * here:
-+		 */
-+		EBUG_ON(rbio->bio.bi_iter.bi_size <
-+		       pick.crc.compressed_size << 9);
-+		rbio->bio.bi_iter.bi_size =
-+			pick.crc.compressed_size << 9;
-+	} else if (bounce) {
-+		unsigned sectors = pick.crc.compressed_size;
-+
-+		rbio = rbio_init(bio_alloc_bioset(GFP_NOIO,
-+						  DIV_ROUND_UP(sectors, PAGE_SECTORS),
-+						  &c->bio_read_split),
-+				 orig->opts);
-+
-+		bch2_bio_alloc_pages_pool(c, &rbio->bio, sectors << 9);
-+		rbio->bounce	= true;
-+		rbio->split	= true;
-+	} else if (flags & BCH_READ_MUST_CLONE) {
-+		/*
-+		 * Have to clone if there were any splits, due to error
-+		 * reporting issues (if a split errored, and retrying didn't
-+		 * work, when it reports the error to its parent (us) we don't
-+		 * know if the error was from our bio, and we should retry, or
-+		 * from the whole bio, in which case we don't want to retry and
-+		 * lose the error)
-+		 */
-+		rbio = rbio_init(bio_clone_fast(&orig->bio, GFP_NOIO,
-+						&c->bio_read_split),
-+				 orig->opts);
-+		rbio->bio.bi_iter = iter;
-+		rbio->split	= true;
-+	} else {
-+		rbio = orig;
-+		rbio->bio.bi_iter = iter;
-+		EBUG_ON(bio_flagged(&rbio->bio, BIO_CHAIN));
-+	}
-+
-+	EBUG_ON(bio_sectors(&rbio->bio) != pick.crc.compressed_size);
-+
-+	rbio->c			= c;
-+	rbio->submit_time	= local_clock();
-+	if (rbio->split)
-+		rbio->parent	= orig;
-+	else
-+		rbio->end_io	= orig->bio.bi_end_io;
-+	rbio->bvec_iter		= iter;
-+	rbio->offset_into_extent= offset_into_extent;
-+	rbio->flags		= flags;
-+	rbio->have_ioref	= pick_ret > 0 && bch2_dev_get_ioref(ca, READ);
-+	rbio->narrow_crcs	= narrow_crcs;
-+	rbio->hole		= 0;
-+	rbio->retry		= 0;
-+	rbio->context		= 0;
-+	/* XXX: only initialize this if needed */
-+	rbio->devs_have		= bch2_bkey_devs(k);
-+	rbio->pick		= pick;
-+	rbio->pos		= pos;
-+	rbio->version		= k.k->version;
-+	rbio->promote		= promote;
-+	INIT_WORK(&rbio->work, NULL);
-+
-+	rbio->bio.bi_opf	= orig->bio.bi_opf;
-+	rbio->bio.bi_iter.bi_sector = pick.ptr.offset;
-+	rbio->bio.bi_end_io	= bch2_read_endio;
-+
-+	if (rbio->bounce)
-+		trace_read_bounce(&rbio->bio);
-+
-+	bch2_increment_clock(c, bio_sectors(&rbio->bio), READ);
-+
-+	rcu_read_lock();
-+	bucket_io_clock_reset(c, ca, PTR_BUCKET_NR(ca, &pick.ptr), READ);
-+	rcu_read_unlock();
-+
-+	if (!(flags & (BCH_READ_IN_RETRY|BCH_READ_LAST_FRAGMENT))) {
-+		bio_inc_remaining(&orig->bio);
-+		trace_read_split(&orig->bio);
-+	}
-+
-+	if (!rbio->pick.idx) {
-+		if (!rbio->have_ioref) {
-+			__bcache_io_error(c, "no device to read from");
-+			bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
-+			goto out;
-+		}
-+
-+		this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_user],
-+			     bio_sectors(&rbio->bio));
-+		bio_set_dev(&rbio->bio, ca->disk_sb.bdev);
-+
-+		if (likely(!(flags & BCH_READ_IN_RETRY)))
-+			submit_bio(&rbio->bio);
-+		else
-+			submit_bio_wait(&rbio->bio);
-+	} else {
-+		/* Attempting reconstruct read: */
-+		if (bch2_ec_read_extent(c, rbio)) {
-+			bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
-+			goto out;
-+		}
-+
-+		if (likely(!(flags & BCH_READ_IN_RETRY)))
-+			bio_endio(&rbio->bio);
-+	}
-+out:
-+	if (likely(!(flags & BCH_READ_IN_RETRY))) {
-+		return 0;
-+	} else {
-+		int ret;
-+
-+		rbio->context = RBIO_CONTEXT_UNBOUND;
-+		bch2_read_endio(&rbio->bio);
-+
-+		ret = rbio->retry;
-+		rbio = bch2_rbio_free(rbio);
-+
-+		if (ret == READ_RETRY_AVOID) {
-+			bch2_mark_io_failure(failed, &pick);
-+			ret = READ_RETRY;
-+		}
-+
-+		return ret;
-+	}
-+
-+err:
-+	if (flags & BCH_READ_IN_RETRY)
-+		return READ_ERR;
-+
-+	orig->bio.bi_status = BLK_STS_IOERR;
-+	goto out_read_done;
-+
-+hole:
-+	/*
-+	 * won't normally happen in the BCH_READ_NODECODE
-+	 * (bch2_move_extent()) path, but if we retry and the extent we wanted
-+	 * to read no longer exists we have to signal that:
-+	 */
-+	if (flags & BCH_READ_NODECODE)
-+		orig->hole = true;
-+
-+	zero_fill_bio_iter(&orig->bio, iter);
-+out_read_done:
-+	if (flags & BCH_READ_LAST_FRAGMENT)
-+		bch2_rbio_done(orig);
-+	return 0;
-+}
-+
-+void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_on_stack sk;
-+	struct bkey_s_c k;
-+	unsigned flags = BCH_READ_RETRY_IF_STALE|
-+		BCH_READ_MAY_PROMOTE|
-+		BCH_READ_USER_MAPPED;
-+	int ret;
-+
-+	BUG_ON(rbio->_state);
-+	BUG_ON(flags & BCH_READ_NODECODE);
-+	BUG_ON(flags & BCH_READ_IN_RETRY);
-+
-+	rbio->c = c;
-+	rbio->start_time = local_clock();
-+
-+	bkey_on_stack_init(&sk);
-+	bch2_trans_init(&trans, c, 0, 0);
-+retry:
-+	bch2_trans_begin(&trans);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
-+				   POS(inode, rbio->bio.bi_iter.bi_sector),
-+				   BTREE_ITER_SLOTS);
-+	while (1) {
-+		unsigned bytes, sectors, offset_into_extent;
-+
-+		bch2_btree_iter_set_pos(iter,
-+				POS(inode, rbio->bio.bi_iter.bi_sector));
-+
-+		k = bch2_btree_iter_peek_slot(iter);
-+		ret = bkey_err(k);
-+		if (ret)
-+			goto err;
-+
-+		offset_into_extent = iter->pos.offset -
-+			bkey_start_offset(k.k);
-+		sectors = k.k->size - offset_into_extent;
-+
-+		bkey_on_stack_reassemble(&sk, c, k);
-+		k = bkey_i_to_s_c(sk.k);
-+
-+		ret = bch2_read_indirect_extent(&trans,
-+					&offset_into_extent, &sk);
-+		if (ret)
-+			goto err;
-+
-+		/*
-+		 * With indirect extents, the amount of data to read is the min
-+		 * of the original extent and the indirect extent:
-+		 */
-+		sectors = min(sectors, k.k->size - offset_into_extent);
-+
-+		/*
-+		 * Unlock the iterator while the btree node's lock is still in
-+		 * cache, before doing the IO:
-+		 */
-+		bch2_trans_unlock(&trans);
-+
-+		bytes = min(sectors, bio_sectors(&rbio->bio)) << 9;
-+		swap(rbio->bio.bi_iter.bi_size, bytes);
-+
-+		if (rbio->bio.bi_iter.bi_size == bytes)
-+			flags |= BCH_READ_LAST_FRAGMENT;
-+
-+		bch2_read_extent(c, rbio, k, offset_into_extent, flags);
-+
-+		if (flags & BCH_READ_LAST_FRAGMENT)
-+			break;
-+
-+		swap(rbio->bio.bi_iter.bi_size, bytes);
-+		bio_advance(&rbio->bio, bytes);
-+	}
-+out:
-+	bch2_trans_exit(&trans);
-+	bkey_on_stack_exit(&sk, c);
-+	return;
-+err:
-+	if (ret == -EINTR)
-+		goto retry;
-+
-+	bcache_io_error(c, &rbio->bio, "btree IO error: %i", ret);
-+	bch2_rbio_done(rbio);
-+	goto out;
-+}
-+
-+void bch2_fs_io_exit(struct bch_fs *c)
-+{
-+	if (c->promote_table.tbl)
-+		rhashtable_destroy(&c->promote_table);
-+	mempool_exit(&c->bio_bounce_pages);
-+	bioset_exit(&c->bio_write);
-+	bioset_exit(&c->bio_read_split);
-+	bioset_exit(&c->bio_read);
-+}
-+
-+int bch2_fs_io_init(struct bch_fs *c)
-+{
-+	if (bioset_init(&c->bio_read, 1, offsetof(struct bch_read_bio, bio),
-+			BIOSET_NEED_BVECS) ||
-+	    bioset_init(&c->bio_read_split, 1, offsetof(struct bch_read_bio, bio),
-+			BIOSET_NEED_BVECS) ||
-+	    bioset_init(&c->bio_write, 1, offsetof(struct bch_write_bio, bio),
-+			BIOSET_NEED_BVECS) ||
-+	    mempool_init_page_pool(&c->bio_bounce_pages,
-+				   max_t(unsigned,
-+					 c->opts.btree_node_size,
-+					 c->sb.encoded_extent_max) /
-+				   PAGE_SECTORS, 0) ||
-+	    rhashtable_init(&c->promote_table, &bch_promote_params))
-+		return -ENOMEM;
-+
-+	return 0;
-+}
-diff --git a/fs/bcachefs/io.h b/fs/bcachefs/io.h
-new file mode 100644
-index 000000000000..ded468d70f09
---- /dev/null
-+++ b/fs/bcachefs/io.h
-@@ -0,0 +1,169 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_IO_H
-+#define _BCACHEFS_IO_H
-+
-+#include "checksum.h"
-+#include "bkey_on_stack.h"
-+#include "io_types.h"
-+
-+#define to_wbio(_bio)			\
-+	container_of((_bio), struct bch_write_bio, bio)
-+
-+#define to_rbio(_bio)			\
-+	container_of((_bio), struct bch_read_bio, bio)
-+
-+void bch2_bio_free_pages_pool(struct bch_fs *, struct bio *);
-+void bch2_bio_alloc_pages_pool(struct bch_fs *, struct bio *, size_t);
-+
-+void bch2_latency_acct(struct bch_dev *, u64, int);
-+
-+void bch2_submit_wbio_replicas(struct bch_write_bio *, struct bch_fs *,
-+			       enum bch_data_type, const struct bkey_i *);
-+
-+#define BLK_STS_REMOVED		((__force blk_status_t)128)
-+
-+const char *bch2_blk_status_to_str(blk_status_t);
-+
-+enum bch_write_flags {
-+	BCH_WRITE_ALLOC_NOWAIT		= (1 << 0),
-+	BCH_WRITE_CACHED		= (1 << 1),
-+	BCH_WRITE_FLUSH			= (1 << 2),
-+	BCH_WRITE_DATA_ENCODED		= (1 << 3),
-+	BCH_WRITE_PAGES_STABLE		= (1 << 4),
-+	BCH_WRITE_PAGES_OWNED		= (1 << 5),
-+	BCH_WRITE_ONLY_SPECIFIED_DEVS	= (1 << 6),
-+	BCH_WRITE_WROTE_DATA_INLINE	= (1 << 7),
-+	BCH_WRITE_FROM_INTERNAL		= (1 << 8),
-+
-+	/* Internal: */
-+	BCH_WRITE_JOURNAL_SEQ_PTR	= (1 << 9),
-+	BCH_WRITE_SKIP_CLOSURE_PUT	= (1 << 10),
-+	BCH_WRITE_DONE			= (1 << 11),
-+};
-+
-+static inline u64 *op_journal_seq(struct bch_write_op *op)
-+{
-+	return (op->flags & BCH_WRITE_JOURNAL_SEQ_PTR)
-+		? op->journal_seq_p : &op->journal_seq;
-+}
-+
-+static inline void op_journal_seq_set(struct bch_write_op *op, u64 *journal_seq)
-+{
-+	op->journal_seq_p = journal_seq;
-+	op->flags |= BCH_WRITE_JOURNAL_SEQ_PTR;
-+}
-+
-+static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op)
-+{
-+	return op->alloc_reserve == RESERVE_MOVINGGC
-+		? op->c->copygc_wq
-+		: op->c->wq;
-+}
-+
-+int bch2_extent_update(struct btree_trans *, struct btree_iter *,
-+		       struct bkey_i *, struct disk_reservation *,
-+		       u64 *, u64, s64 *);
-+int bch2_fpunch_at(struct btree_trans *, struct btree_iter *,
-+		   struct bpos, u64 *, s64 *);
-+int bch2_fpunch(struct bch_fs *c, u64, u64, u64, u64 *, s64 *);
-+
-+int bch2_write_index_default(struct bch_write_op *);
-+
-+static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c,
-+				      struct bch_io_opts opts)
-+{
-+	op->c			= c;
-+	op->end_io		= NULL;
-+	op->flags		= 0;
-+	op->written		= 0;
-+	op->error		= 0;
-+	op->csum_type		= bch2_data_checksum_type(c, opts.data_checksum);
-+	op->compression_type	= bch2_compression_opt_to_type[opts.compression];
-+	op->nr_replicas		= 0;
-+	op->nr_replicas_required = c->opts.data_replicas_required;
-+	op->alloc_reserve	= RESERVE_NONE;
-+	op->incompressible	= 0;
-+	op->open_buckets.nr	= 0;
-+	op->devs_have.nr	= 0;
-+	op->target		= 0;
-+	op->opts		= opts;
-+	op->pos			= POS_MAX;
-+	op->version		= ZERO_VERSION;
-+	op->write_point		= (struct write_point_specifier) { 0 };
-+	op->res			= (struct disk_reservation) { 0 };
-+	op->journal_seq		= 0;
-+	op->new_i_size		= U64_MAX;
-+	op->i_sectors_delta	= 0;
-+	op->index_update_fn	= bch2_write_index_default;
-+}
-+
-+void bch2_write(struct closure *);
-+
-+static inline struct bch_write_bio *wbio_init(struct bio *bio)
-+{
-+	struct bch_write_bio *wbio = to_wbio(bio);
-+
-+	memset(wbio, 0, offsetof(struct bch_write_bio, bio));
-+	return wbio;
-+}
-+
-+struct bch_devs_mask;
-+struct cache_promote_op;
-+struct extent_ptr_decoded;
-+
-+int __bch2_read_indirect_extent(struct btree_trans *, unsigned *,
-+				struct bkey_on_stack *);
-+
-+static inline int bch2_read_indirect_extent(struct btree_trans *trans,
-+					    unsigned *offset_into_extent,
-+					    struct bkey_on_stack *k)
-+{
-+	return k->k->k.type == KEY_TYPE_reflink_p
-+		? __bch2_read_indirect_extent(trans, offset_into_extent, k)
-+		: 0;
-+}
-+
-+enum bch_read_flags {
-+	BCH_READ_RETRY_IF_STALE		= 1 << 0,
-+	BCH_READ_MAY_PROMOTE		= 1 << 1,
-+	BCH_READ_USER_MAPPED		= 1 << 2,
-+	BCH_READ_NODECODE		= 1 << 3,
-+	BCH_READ_LAST_FRAGMENT		= 1 << 4,
-+
-+	/* internal: */
-+	BCH_READ_MUST_BOUNCE		= 1 << 5,
-+	BCH_READ_MUST_CLONE		= 1 << 6,
-+	BCH_READ_IN_RETRY		= 1 << 7,
-+};
-+
-+int __bch2_read_extent(struct bch_fs *, struct bch_read_bio *,
-+		       struct bvec_iter, struct bkey_s_c, unsigned,
-+		       struct bch_io_failures *, unsigned);
-+
-+static inline void bch2_read_extent(struct bch_fs *c,
-+				    struct bch_read_bio *rbio,
-+				    struct bkey_s_c k,
-+				    unsigned offset_into_extent,
-+				    unsigned flags)
-+{
-+	__bch2_read_extent(c, rbio, rbio->bio.bi_iter, k,
-+			   offset_into_extent, NULL, flags);
-+}
-+
-+void bch2_read(struct bch_fs *, struct bch_read_bio *, u64);
-+
-+static inline struct bch_read_bio *rbio_init(struct bio *bio,
-+					     struct bch_io_opts opts)
-+{
-+	struct bch_read_bio *rbio = to_rbio(bio);
-+
-+	rbio->_state	= 0;
-+	rbio->promote	= NULL;
-+	rbio->opts	= opts;
-+	return rbio;
-+}
-+
-+void bch2_fs_io_exit(struct bch_fs *);
-+int bch2_fs_io_init(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_IO_H */
-diff --git a/fs/bcachefs/io_types.h b/fs/bcachefs/io_types.h
-new file mode 100644
-index 000000000000..b23727d212b9
---- /dev/null
-+++ b/fs/bcachefs/io_types.h
-@@ -0,0 +1,148 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_IO_TYPES_H
-+#define _BCACHEFS_IO_TYPES_H
-+
-+#include "alloc_types.h"
-+#include "btree_types.h"
-+#include "buckets_types.h"
-+#include "extents_types.h"
-+#include "keylist_types.h"
-+#include "opts.h"
-+#include "super_types.h"
-+
-+#include <linux/llist.h>
-+#include <linux/workqueue.h>
-+
-+struct bch_read_bio {
-+	struct bch_fs		*c;
-+	u64			start_time;
-+	u64			submit_time;
-+
-+	/*
-+	 * Reads will often have to be split, and if the extent being read from
-+	 * was checksummed or compressed we'll also have to allocate bounce
-+	 * buffers and copy the data back into the original bio.
-+	 *
-+	 * If we didn't have to split, we have to save and restore the original
-+	 * bi_end_io - @split below indicates which:
-+	 */
-+	union {
-+	struct bch_read_bio	*parent;
-+	bio_end_io_t		*end_io;
-+	};
-+
-+	/*
-+	 * Saved copy of bio->bi_iter, from submission time - allows us to
-+	 * resubmit on IO error, and also to copy data back to the original bio
-+	 * when we're bouncing:
-+	 */
-+	struct bvec_iter	bvec_iter;
-+
-+	unsigned		offset_into_extent;
-+
-+	u16			flags;
-+	union {
-+	struct {
-+	u16			bounce:1,
-+				split:1,
-+				kmalloc:1,
-+				have_ioref:1,
-+				narrow_crcs:1,
-+				hole:1,
-+				retry:2,
-+				context:2;
-+	};
-+	u16			_state;
-+	};
-+
-+	struct bch_devs_list	devs_have;
-+
-+	struct extent_ptr_decoded pick;
-+	/* start pos of data we read (may not be pos of data we want) */
-+	struct bpos		pos;
-+	struct bversion		version;
-+
-+	struct promote_op	*promote;
-+
-+	struct bch_io_opts	opts;
-+
-+	struct work_struct	work;
-+
-+	struct bio		bio;
-+};
-+
-+struct bch_write_bio {
-+	struct bch_fs		*c;
-+	struct bch_write_bio	*parent;
-+
-+	u64			submit_time;
-+
-+	struct bch_devs_list	failed;
-+	u8			dev;
-+
-+	unsigned		split:1,
-+				bounce:1,
-+				put_bio:1,
-+				have_ioref:1,
-+				used_mempool:1;
-+
-+	struct bio		bio;
-+};
-+
-+struct bch_write_op {
-+	struct closure		cl;
-+	struct bch_fs		*c;
-+	void			(*end_io)(struct bch_write_op *);
-+	u64			start_time;
-+
-+	unsigned		written; /* sectors */
-+	u16			flags;
-+	s16			error; /* dio write path expects it to hold -ERESTARTSYS... */
-+
-+	unsigned		csum_type:4;
-+	unsigned		compression_type:4;
-+	unsigned		nr_replicas:4;
-+	unsigned		nr_replicas_required:4;
-+	unsigned		alloc_reserve:3;
-+	unsigned		incompressible:1;
-+
-+	struct bch_devs_list	devs_have;
-+	u16			target;
-+	u16			nonce;
-+	struct bch_io_opts	opts;
-+
-+	struct bpos		pos;
-+	struct bversion		version;
-+
-+	/* For BCH_WRITE_DATA_ENCODED: */
-+	struct bch_extent_crc_unpacked crc;
-+
-+	struct write_point_specifier write_point;
-+
-+	struct disk_reservation	res;
-+
-+	struct open_buckets	open_buckets;
-+
-+	/*
-+	 * If caller wants to flush but hasn't passed us a journal_seq ptr, we
-+	 * still need to stash the journal_seq somewhere:
-+	 */
-+	union {
-+		u64			*journal_seq_p;
-+		u64			journal_seq;
-+	};
-+	u64			new_i_size;
-+	s64			i_sectors_delta;
-+
-+	int			(*index_update_fn)(struct bch_write_op *);
-+
-+	struct bch_devs_mask	failed;
-+
-+	struct keylist		insert_keys;
-+	u64			inline_keys[BKEY_EXTENT_U64s_MAX * 2];
-+
-+	/* Must be last: */
-+	struct bch_write_bio	wbio;
-+};
-+
-+#endif /* _BCACHEFS_IO_TYPES_H */
-diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
-new file mode 100644
-index 000000000000..210ad1b0c469
---- /dev/null
-+++ b/fs/bcachefs/journal.c
-@@ -0,0 +1,1248 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * bcachefs journalling code, for btree insertions
-+ *
-+ * Copyright 2012 Google, Inc.
-+ */
-+
-+#include "bcachefs.h"
-+#include "alloc_foreground.h"
-+#include "bkey_methods.h"
-+#include "btree_gc.h"
-+#include "buckets.h"
-+#include "journal.h"
-+#include "journal_io.h"
-+#include "journal_reclaim.h"
-+#include "journal_seq_blacklist.h"
-+#include "super-io.h"
-+
-+#include <trace/events/bcachefs.h>
-+
-+static bool __journal_entry_is_open(union journal_res_state state)
-+{
-+	return state.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL;
-+}
-+
-+static bool journal_entry_is_open(struct journal *j)
-+{
-+	return __journal_entry_is_open(j->reservations);
-+}
-+
-+static void journal_pin_new_entry(struct journal *j, int count)
-+{
-+	struct journal_entry_pin_list *p;
-+
-+	/*
-+	 * The fifo_push() needs to happen at the same time as j->seq is
-+	 * incremented for journal_last_seq() to be calculated correctly
-+	 */
-+	atomic64_inc(&j->seq);
-+	p = fifo_push_ref(&j->pin);
-+
-+	INIT_LIST_HEAD(&p->list);
-+	INIT_LIST_HEAD(&p->flushed);
-+	atomic_set(&p->count, count);
-+	p->devs.nr = 0;
-+}
-+
-+static void bch2_journal_buf_init(struct journal *j)
-+{
-+	struct journal_buf *buf = journal_cur_buf(j);
-+
-+	memset(buf->has_inode, 0, sizeof(buf->has_inode));
-+
-+	memset(buf->data, 0, sizeof(*buf->data));
-+	buf->data->seq	= cpu_to_le64(journal_cur_seq(j));
-+	buf->data->u64s	= 0;
-+}
-+
-+void bch2_journal_halt(struct journal *j)
-+{
-+	union journal_res_state old, new;
-+	u64 v = atomic64_read(&j->reservations.counter);
-+
-+	do {
-+		old.v = new.v = v;
-+		if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL)
-+			return;
-+
-+		new.cur_entry_offset = JOURNAL_ENTRY_ERROR_VAL;
-+	} while ((v = atomic64_cmpxchg(&j->reservations.counter,
-+				       old.v, new.v)) != old.v);
-+
-+	journal_wake(j);
-+	closure_wake_up(&journal_cur_buf(j)->wait);
-+}
-+
-+/* journal entry close/open: */
-+
-+void __bch2_journal_buf_put(struct journal *j, bool need_write_just_set)
-+{
-+	if (!need_write_just_set &&
-+	    test_bit(JOURNAL_NEED_WRITE, &j->flags))
-+		bch2_time_stats_update(j->delay_time,
-+				       j->need_write_time);
-+
-+	clear_bit(JOURNAL_NEED_WRITE, &j->flags);
-+
-+	closure_call(&j->io, bch2_journal_write, system_highpri_wq, NULL);
-+}
-+
-+/*
-+ * Returns true if journal entry is now closed:
-+ */
-+static bool __journal_entry_close(struct journal *j)
-+{
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	struct journal_buf *buf = journal_cur_buf(j);
-+	union journal_res_state old, new;
-+	u64 v = atomic64_read(&j->reservations.counter);
-+	bool set_need_write = false;
-+	unsigned sectors;
-+
-+	lockdep_assert_held(&j->lock);
-+
-+	do {
-+		old.v = new.v = v;
-+		if (old.cur_entry_offset == JOURNAL_ENTRY_CLOSED_VAL)
-+			return true;
-+
-+		if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL) {
-+			/* this entry will never be written: */
-+			closure_wake_up(&buf->wait);
-+			return true;
-+		}
-+
-+		if (!test_bit(JOURNAL_NEED_WRITE, &j->flags)) {
-+			set_bit(JOURNAL_NEED_WRITE, &j->flags);
-+			j->need_write_time = local_clock();
-+			set_need_write = true;
-+		}
-+
-+		if (new.prev_buf_unwritten)
-+			return false;
-+
-+		new.cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL;
-+		new.idx++;
-+		new.prev_buf_unwritten = 1;
-+
-+		BUG_ON(journal_state_count(new, new.idx));
-+	} while ((v = atomic64_cmpxchg(&j->reservations.counter,
-+				       old.v, new.v)) != old.v);
-+
-+	buf->data->u64s		= cpu_to_le32(old.cur_entry_offset);
-+
-+	sectors = vstruct_blocks_plus(buf->data, c->block_bits,
-+				      buf->u64s_reserved) << c->block_bits;
-+	BUG_ON(sectors > buf->sectors);
-+	buf->sectors = sectors;
-+
-+	bkey_extent_init(&buf->key);
-+
-+	/*
-+	 * We have to set last_seq here, _before_ opening a new journal entry:
-+	 *
-+	 * A threads may replace an old pin with a new pin on their current
-+	 * journal reservation - the expectation being that the journal will
-+	 * contain either what the old pin protected or what the new pin
-+	 * protects.
-+	 *
-+	 * After the old pin is dropped journal_last_seq() won't include the old
-+	 * pin, so we can only write the updated last_seq on the entry that
-+	 * contains whatever the new pin protects.
-+	 *
-+	 * Restated, we can _not_ update last_seq for a given entry if there
-+	 * could be a newer entry open with reservations/pins that have been
-+	 * taken against it.
-+	 *
-+	 * Hence, we want update/set last_seq on the current journal entry right
-+	 * before we open a new one:
-+	 */
-+	buf->data->last_seq	= cpu_to_le64(journal_last_seq(j));
-+
-+	if (journal_entry_empty(buf->data))
-+		clear_bit(JOURNAL_NOT_EMPTY, &j->flags);
-+	else
-+		set_bit(JOURNAL_NOT_EMPTY, &j->flags);
-+
-+	journal_pin_new_entry(j, 1);
-+
-+	bch2_journal_buf_init(j);
-+
-+	cancel_delayed_work(&j->write_work);
-+
-+	bch2_journal_space_available(j);
-+
-+	bch2_journal_buf_put(j, old.idx, set_need_write);
-+	return true;
-+}
-+
-+static bool journal_entry_close(struct journal *j)
-+{
-+	bool ret;
-+
-+	spin_lock(&j->lock);
-+	ret = __journal_entry_close(j);
-+	spin_unlock(&j->lock);
-+
-+	return ret;
-+}
-+
-+/*
-+ * should _only_ called from journal_res_get() - when we actually want a
-+ * journal reservation - journal entry is open means journal is dirty:
-+ *
-+ * returns:
-+ * 0:		success
-+ * -ENOSPC:	journal currently full, must invoke reclaim
-+ * -EAGAIN:	journal blocked, must wait
-+ * -EROFS:	insufficient rw devices or journal error
-+ */
-+static int journal_entry_open(struct journal *j)
-+{
-+	struct journal_buf *buf = journal_cur_buf(j);
-+	union journal_res_state old, new;
-+	int u64s;
-+	u64 v;
-+
-+	lockdep_assert_held(&j->lock);
-+	BUG_ON(journal_entry_is_open(j));
-+
-+	if (j->blocked)
-+		return -EAGAIN;
-+
-+	if (j->cur_entry_error)
-+		return j->cur_entry_error;
-+
-+	BUG_ON(!j->cur_entry_sectors);
-+
-+	buf->u64s_reserved	= j->entry_u64s_reserved;
-+	buf->disk_sectors	= j->cur_entry_sectors;
-+	buf->sectors		= min(buf->disk_sectors, buf->buf_size >> 9);
-+
-+	u64s = (int) (buf->sectors << 9) / sizeof(u64) -
-+		journal_entry_overhead(j);
-+	u64s  = clamp_t(int, u64s, 0, JOURNAL_ENTRY_CLOSED_VAL - 1);
-+
-+	if (u64s <= le32_to_cpu(buf->data->u64s))
-+		return -ENOSPC;
-+
-+	/*
-+	 * Must be set before marking the journal entry as open:
-+	 */
-+	j->cur_entry_u64s = u64s;
-+
-+	v = atomic64_read(&j->reservations.counter);
-+	do {
-+		old.v = new.v = v;
-+
-+		if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL)
-+			return -EROFS;
-+
-+		/* Handle any already added entries */
-+		new.cur_entry_offset = le32_to_cpu(buf->data->u64s);
-+
-+		EBUG_ON(journal_state_count(new, new.idx));
-+		journal_state_inc(&new);
-+	} while ((v = atomic64_cmpxchg(&j->reservations.counter,
-+				       old.v, new.v)) != old.v);
-+
-+	if (j->res_get_blocked_start)
-+		bch2_time_stats_update(j->blocked_time,
-+				       j->res_get_blocked_start);
-+	j->res_get_blocked_start = 0;
-+
-+	mod_delayed_work(system_freezable_wq,
-+			 &j->write_work,
-+			 msecs_to_jiffies(j->write_delay_ms));
-+	journal_wake(j);
-+	return 0;
-+}
-+
-+static bool journal_quiesced(struct journal *j)
-+{
-+	union journal_res_state state = READ_ONCE(j->reservations);
-+	bool ret = !state.prev_buf_unwritten && !__journal_entry_is_open(state);
-+
-+	if (!ret)
-+		journal_entry_close(j);
-+	return ret;
-+}
-+
-+static void journal_quiesce(struct journal *j)
-+{
-+	wait_event(j->wait, journal_quiesced(j));
-+}
-+
-+static void journal_write_work(struct work_struct *work)
-+{
-+	struct journal *j = container_of(work, struct journal, write_work.work);
-+
-+	journal_entry_close(j);
-+}
-+
-+/*
-+ * Given an inode number, if that inode number has data in the journal that
-+ * hasn't yet been flushed, return the journal sequence number that needs to be
-+ * flushed:
-+ */
-+u64 bch2_inode_journal_seq(struct journal *j, u64 inode)
-+{
-+	size_t h = hash_64(inode, ilog2(sizeof(j->buf[0].has_inode) * 8));
-+	u64 seq = 0;
-+
-+	if (!test_bit(h, j->buf[0].has_inode) &&
-+	    !test_bit(h, j->buf[1].has_inode))
-+		return 0;
-+
-+	spin_lock(&j->lock);
-+	if (test_bit(h, journal_cur_buf(j)->has_inode))
-+		seq = journal_cur_seq(j);
-+	else if (test_bit(h, journal_prev_buf(j)->has_inode))
-+		seq = journal_cur_seq(j) - 1;
-+	spin_unlock(&j->lock);
-+
-+	return seq;
-+}
-+
-+static int __journal_res_get(struct journal *j, struct journal_res *res,
-+			     unsigned flags)
-+{
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	struct journal_buf *buf;
-+	bool can_discard;
-+	int ret;
-+retry:
-+	if (journal_res_get_fast(j, res, flags))
-+		return 0;
-+
-+	if (bch2_journal_error(j))
-+		return -EROFS;
-+
-+	spin_lock(&j->lock);
-+
-+	/*
-+	 * Recheck after taking the lock, so we don't race with another thread
-+	 * that just did journal_entry_open() and call journal_entry_close()
-+	 * unnecessarily
-+	 */
-+	if (journal_res_get_fast(j, res, flags)) {
-+		spin_unlock(&j->lock);
-+		return 0;
-+	}
-+
-+	if (!(flags & JOURNAL_RES_GET_RESERVED) &&
-+	    !test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags)) {
-+		/*
-+		 * Don't want to close current journal entry, just need to
-+		 * invoke reclaim:
-+		 */
-+		ret = -ENOSPC;
-+		goto unlock;
-+	}
-+
-+	/*
-+	 * If we couldn't get a reservation because the current buf filled up,
-+	 * and we had room for a bigger entry on disk, signal that we want to
-+	 * realloc the journal bufs:
-+	 */
-+	buf = journal_cur_buf(j);
-+	if (journal_entry_is_open(j) &&
-+	    buf->buf_size >> 9 < buf->disk_sectors &&
-+	    buf->buf_size < JOURNAL_ENTRY_SIZE_MAX)
-+		j->buf_size_want = max(j->buf_size_want, buf->buf_size << 1);
-+
-+	if (journal_entry_is_open(j) &&
-+	    !__journal_entry_close(j)) {
-+		/*
-+		 * We failed to get a reservation on the current open journal
-+		 * entry because it's full, and we can't close it because
-+		 * there's still a previous one in flight:
-+		 */
-+		trace_journal_entry_full(c);
-+		ret = -EAGAIN;
-+	} else {
-+		ret = journal_entry_open(j);
-+	}
-+unlock:
-+	if ((ret == -EAGAIN || ret == -ENOSPC) &&
-+	    !j->res_get_blocked_start)
-+		j->res_get_blocked_start = local_clock() ?: 1;
-+
-+	can_discard = j->can_discard;
-+	spin_unlock(&j->lock);
-+
-+	if (!ret)
-+		goto retry;
-+
-+	if (ret == -ENOSPC) {
-+		WARN_ONCE(!can_discard && (flags & JOURNAL_RES_GET_RESERVED),
-+			  "JOURNAL_RES_GET_RESERVED set but journal full");
-+
-+		/*
-+		 * Journal is full - can't rely on reclaim from work item due to
-+		 * freezing:
-+		 */
-+		trace_journal_full(c);
-+
-+		if (!(flags & JOURNAL_RES_GET_NONBLOCK)) {
-+			if (can_discard) {
-+				bch2_journal_do_discards(j);
-+				goto retry;
-+			}
-+
-+			if (mutex_trylock(&j->reclaim_lock)) {
-+				bch2_journal_reclaim(j);
-+				mutex_unlock(&j->reclaim_lock);
-+			}
-+		}
-+
-+		ret = -EAGAIN;
-+	}
-+
-+	return ret;
-+}
-+
-+/*
-+ * Essentially the entry function to the journaling code. When bcachefs is doing
-+ * a btree insert, it calls this function to get the current journal write.
-+ * Journal write is the structure used set up journal writes. The calling
-+ * function will then add its keys to the structure, queuing them for the next
-+ * write.
-+ *
-+ * To ensure forward progress, the current task must not be holding any
-+ * btree node write locks.
-+ */
-+int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res,
-+				  unsigned flags)
-+{
-+	int ret;
-+
-+	closure_wait_event(&j->async_wait,
-+		   (ret = __journal_res_get(j, res, flags)) != -EAGAIN ||
-+		   (flags & JOURNAL_RES_GET_NONBLOCK));
-+	return ret;
-+}
-+
-+/* journal_preres: */
-+
-+static bool journal_preres_available(struct journal *j,
-+				     struct journal_preres *res,
-+				     unsigned new_u64s,
-+				     unsigned flags)
-+{
-+	bool ret = bch2_journal_preres_get_fast(j, res, new_u64s, flags);
-+
-+	if (!ret)
-+		bch2_journal_reclaim_work(&j->reclaim_work.work);
-+
-+	return ret;
-+}
-+
-+int __bch2_journal_preres_get(struct journal *j,
-+			      struct journal_preres *res,
-+			      unsigned new_u64s,
-+			      unsigned flags)
-+{
-+	int ret;
-+
-+	closure_wait_event(&j->preres_wait,
-+		   (ret = bch2_journal_error(j)) ||
-+		   journal_preres_available(j, res, new_u64s, flags));
-+	return ret;
-+}
-+
-+/* journal_entry_res: */
-+
-+void bch2_journal_entry_res_resize(struct journal *j,
-+				   struct journal_entry_res *res,
-+				   unsigned new_u64s)
-+{
-+	union journal_res_state state;
-+	int d = new_u64s - res->u64s;
-+
-+	spin_lock(&j->lock);
-+
-+	j->entry_u64s_reserved += d;
-+	if (d <= 0)
-+		goto out;
-+
-+	j->cur_entry_u64s = max_t(int, 0, j->cur_entry_u64s - d);
-+	smp_mb();
-+	state = READ_ONCE(j->reservations);
-+
-+	if (state.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL &&
-+	    state.cur_entry_offset > j->cur_entry_u64s) {
-+		j->cur_entry_u64s += d;
-+		/*
-+		 * Not enough room in current journal entry, have to flush it:
-+		 */
-+		__journal_entry_close(j);
-+	} else {
-+		journal_cur_buf(j)->u64s_reserved += d;
-+	}
-+out:
-+	spin_unlock(&j->lock);
-+	res->u64s += d;
-+}
-+
-+/* journal flushing: */
-+
-+u64 bch2_journal_last_unwritten_seq(struct journal *j)
-+{
-+	u64 seq;
-+
-+	spin_lock(&j->lock);
-+	seq = journal_cur_seq(j);
-+	if (j->reservations.prev_buf_unwritten)
-+		seq--;
-+	spin_unlock(&j->lock);
-+
-+	return seq;
-+}
-+
-+/**
-+ * bch2_journal_open_seq_async - try to open a new journal entry if @seq isn't
-+ * open yet, or wait if we cannot
-+ *
-+ * used by the btree interior update machinery, when it needs to write a new
-+ * btree root - every journal entry contains the roots of all the btrees, so it
-+ * doesn't need to bother with getting a journal reservation
-+ */
-+int bch2_journal_open_seq_async(struct journal *j, u64 seq, struct closure *cl)
-+{
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	int ret;
-+
-+	spin_lock(&j->lock);
-+
-+	/*
-+	 * Can't try to open more than one sequence number ahead:
-+	 */
-+	BUG_ON(journal_cur_seq(j) < seq && !journal_entry_is_open(j));
-+
-+	if (journal_cur_seq(j) > seq ||
-+	    journal_entry_is_open(j)) {
-+		spin_unlock(&j->lock);
-+		return 0;
-+	}
-+
-+	if (journal_cur_seq(j) < seq &&
-+	    !__journal_entry_close(j)) {
-+		/* haven't finished writing out the previous one: */
-+		trace_journal_entry_full(c);
-+		ret = -EAGAIN;
-+	} else {
-+		BUG_ON(journal_cur_seq(j) != seq);
-+
-+		ret = journal_entry_open(j);
-+	}
-+
-+	if ((ret == -EAGAIN || ret == -ENOSPC) &&
-+	    !j->res_get_blocked_start)
-+		j->res_get_blocked_start = local_clock() ?: 1;
-+
-+	if (ret == -EAGAIN || ret == -ENOSPC)
-+		closure_wait(&j->async_wait, cl);
-+
-+	spin_unlock(&j->lock);
-+
-+	if (ret == -ENOSPC) {
-+		trace_journal_full(c);
-+		bch2_journal_reclaim_work(&j->reclaim_work.work);
-+		ret = -EAGAIN;
-+	}
-+
-+	return ret;
-+}
-+
-+static int journal_seq_error(struct journal *j, u64 seq)
-+{
-+	union journal_res_state state = READ_ONCE(j->reservations);
-+
-+	if (seq == journal_cur_seq(j))
-+		return bch2_journal_error(j);
-+
-+	if (seq + 1 == journal_cur_seq(j) &&
-+	    !state.prev_buf_unwritten &&
-+	    seq > j->seq_ondisk)
-+		return -EIO;
-+
-+	return 0;
-+}
-+
-+static inline struct journal_buf *
-+journal_seq_to_buf(struct journal *j, u64 seq)
-+{
-+	/* seq should be for a journal entry that has been opened: */
-+	BUG_ON(seq > journal_cur_seq(j));
-+	BUG_ON(seq == journal_cur_seq(j) &&
-+	       j->reservations.cur_entry_offset == JOURNAL_ENTRY_CLOSED_VAL);
-+
-+	if (seq == journal_cur_seq(j))
-+		return journal_cur_buf(j);
-+	if (seq + 1 == journal_cur_seq(j) &&
-+	    j->reservations.prev_buf_unwritten)
-+		return journal_prev_buf(j);
-+	return NULL;
-+}
-+
-+/**
-+ * bch2_journal_wait_on_seq - wait for a journal entry to be written
-+ *
-+ * does _not_ cause @seq to be written immediately - if there is no other
-+ * activity to cause the relevant journal entry to be filled up or flushed it
-+ * can wait for an arbitrary amount of time (up to @j->write_delay_ms, which is
-+ * configurable).
-+ */
-+void bch2_journal_wait_on_seq(struct journal *j, u64 seq,
-+			      struct closure *parent)
-+{
-+	struct journal_buf *buf;
-+
-+	spin_lock(&j->lock);
-+
-+	if ((buf = journal_seq_to_buf(j, seq))) {
-+		if (!closure_wait(&buf->wait, parent))
-+			BUG();
-+
-+		if (seq == journal_cur_seq(j)) {
-+			smp_mb();
-+			if (bch2_journal_error(j))
-+				closure_wake_up(&buf->wait);
-+		}
-+	}
-+
-+	spin_unlock(&j->lock);
-+}
-+
-+/**
-+ * bch2_journal_flush_seq_async - wait for a journal entry to be written
-+ *
-+ * like bch2_journal_wait_on_seq, except that it triggers a write immediately if
-+ * necessary
-+ */
-+void bch2_journal_flush_seq_async(struct journal *j, u64 seq,
-+				  struct closure *parent)
-+{
-+	struct journal_buf *buf;
-+
-+	spin_lock(&j->lock);
-+
-+	if (parent &&
-+	    (buf = journal_seq_to_buf(j, seq)))
-+		if (!closure_wait(&buf->wait, parent))
-+			BUG();
-+
-+	if (seq == journal_cur_seq(j))
-+		__journal_entry_close(j);
-+	spin_unlock(&j->lock);
-+}
-+
-+static int journal_seq_flushed(struct journal *j, u64 seq)
-+{
-+	int ret;
-+
-+	spin_lock(&j->lock);
-+	ret = seq <= j->seq_ondisk ? 1 : journal_seq_error(j, seq);
-+
-+	if (seq == journal_cur_seq(j))
-+		__journal_entry_close(j);
-+	spin_unlock(&j->lock);
-+
-+	return ret;
-+}
-+
-+int bch2_journal_flush_seq(struct journal *j, u64 seq)
-+{
-+	u64 start_time = local_clock();
-+	int ret, ret2;
-+
-+	ret = wait_event_killable(j->wait, (ret2 = journal_seq_flushed(j, seq)));
-+
-+	bch2_time_stats_update(j->flush_seq_time, start_time);
-+
-+	return ret ?: ret2 < 0 ? ret2 : 0;
-+}
-+
-+/**
-+ * bch2_journal_meta_async - force a journal entry to be written
-+ */
-+void bch2_journal_meta_async(struct journal *j, struct closure *parent)
-+{
-+	struct journal_res res;
-+
-+	memset(&res, 0, sizeof(res));
-+
-+	bch2_journal_res_get(j, &res, jset_u64s(0), 0);
-+	bch2_journal_res_put(j, &res);
-+
-+	bch2_journal_flush_seq_async(j, res.seq, parent);
-+}
-+
-+int bch2_journal_meta(struct journal *j)
-+{
-+	struct journal_res res;
-+	int ret;
-+
-+	memset(&res, 0, sizeof(res));
-+
-+	ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0);
-+	if (ret)
-+		return ret;
-+
-+	bch2_journal_res_put(j, &res);
-+
-+	return bch2_journal_flush_seq(j, res.seq);
-+}
-+
-+/*
-+ * bch2_journal_flush_async - if there is an open journal entry, or a journal
-+ * still being written, write it and wait for the write to complete
-+ */
-+void bch2_journal_flush_async(struct journal *j, struct closure *parent)
-+{
-+	u64 seq, journal_seq;
-+
-+	spin_lock(&j->lock);
-+	journal_seq = journal_cur_seq(j);
-+
-+	if (journal_entry_is_open(j)) {
-+		seq = journal_seq;
-+	} else if (journal_seq) {
-+		seq = journal_seq - 1;
-+	} else {
-+		spin_unlock(&j->lock);
-+		return;
-+	}
-+	spin_unlock(&j->lock);
-+
-+	bch2_journal_flush_seq_async(j, seq, parent);
-+}
-+
-+int bch2_journal_flush(struct journal *j)
-+{
-+	u64 seq, journal_seq;
-+
-+	spin_lock(&j->lock);
-+	journal_seq = journal_cur_seq(j);
-+
-+	if (journal_entry_is_open(j)) {
-+		seq = journal_seq;
-+	} else if (journal_seq) {
-+		seq = journal_seq - 1;
-+	} else {
-+		spin_unlock(&j->lock);
-+		return 0;
-+	}
-+	spin_unlock(&j->lock);
-+
-+	return bch2_journal_flush_seq(j, seq);
-+}
-+
-+/* block/unlock the journal: */
-+
-+void bch2_journal_unblock(struct journal *j)
-+{
-+	spin_lock(&j->lock);
-+	j->blocked--;
-+	spin_unlock(&j->lock);
-+
-+	journal_wake(j);
-+}
-+
-+void bch2_journal_block(struct journal *j)
-+{
-+	spin_lock(&j->lock);
-+	j->blocked++;
-+	spin_unlock(&j->lock);
-+
-+	journal_quiesce(j);
-+}
-+
-+/* allocate journal on a device: */
-+
-+static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
-+					 bool new_fs, struct closure *cl)
-+{
-+	struct bch_fs *c = ca->fs;
-+	struct journal_device *ja = &ca->journal;
-+	struct bch_sb_field_journal *journal_buckets;
-+	u64 *new_bucket_seq = NULL, *new_buckets = NULL;
-+	int ret = 0;
-+
-+	/* don't handle reducing nr of buckets yet: */
-+	if (nr <= ja->nr)
-+		return 0;
-+
-+	ret = -ENOMEM;
-+	new_buckets	= kzalloc(nr * sizeof(u64), GFP_KERNEL);
-+	new_bucket_seq	= kzalloc(nr * sizeof(u64), GFP_KERNEL);
-+	if (!new_buckets || !new_bucket_seq)
-+		goto err;
-+
-+	journal_buckets = bch2_sb_resize_journal(&ca->disk_sb,
-+						 nr + sizeof(*journal_buckets) / sizeof(u64));
-+	if (!journal_buckets)
-+		goto err;
-+
-+	/*
-+	 * We may be called from the device add path, before the new device has
-+	 * actually been added to the running filesystem:
-+	 */
-+	if (c)
-+		spin_lock(&c->journal.lock);
-+
-+	memcpy(new_buckets,	ja->buckets,	ja->nr * sizeof(u64));
-+	memcpy(new_bucket_seq,	ja->bucket_seq,	ja->nr * sizeof(u64));
-+	swap(new_buckets,	ja->buckets);
-+	swap(new_bucket_seq,	ja->bucket_seq);
-+
-+	if (c)
-+		spin_unlock(&c->journal.lock);
-+
-+	while (ja->nr < nr) {
-+		struct open_bucket *ob = NULL;
-+		unsigned pos;
-+		long bucket;
-+
-+		if (new_fs) {
-+			bucket = bch2_bucket_alloc_new_fs(ca);
-+			if (bucket < 0) {
-+				ret = -ENOSPC;
-+				goto err;
-+			}
-+		} else {
-+			ob = bch2_bucket_alloc(c, ca, RESERVE_ALLOC,
-+					       false, cl);
-+			if (IS_ERR(ob)) {
-+				ret = cl ? -EAGAIN : -ENOSPC;
-+				goto err;
-+			}
-+
-+			bucket = sector_to_bucket(ca, ob->ptr.offset);
-+		}
-+
-+		if (c) {
-+			percpu_down_read(&c->mark_lock);
-+			spin_lock(&c->journal.lock);
-+		}
-+
-+		pos = ja->nr ? (ja->cur_idx + 1) % ja->nr : 0;
-+		__array_insert_item(ja->buckets,		ja->nr, pos);
-+		__array_insert_item(ja->bucket_seq,		ja->nr, pos);
-+		__array_insert_item(journal_buckets->buckets,	ja->nr, pos);
-+		ja->nr++;
-+
-+		ja->buckets[pos] = bucket;
-+		ja->bucket_seq[pos] = 0;
-+		journal_buckets->buckets[pos] = cpu_to_le64(bucket);
-+
-+		if (pos <= ja->discard_idx)
-+			ja->discard_idx = (ja->discard_idx + 1) % ja->nr;
-+		if (pos <= ja->dirty_idx_ondisk)
-+			ja->dirty_idx_ondisk = (ja->dirty_idx_ondisk + 1) % ja->nr;
-+		if (pos <= ja->dirty_idx)
-+			ja->dirty_idx = (ja->dirty_idx + 1) % ja->nr;
-+		if (pos <= ja->cur_idx)
-+			ja->cur_idx = (ja->cur_idx + 1) % ja->nr;
-+
-+		bch2_mark_metadata_bucket(c, ca, bucket, BCH_DATA_journal,
-+					  ca->mi.bucket_size,
-+					  gc_phase(GC_PHASE_SB),
-+					  0);
-+
-+		if (c) {
-+			spin_unlock(&c->journal.lock);
-+			percpu_up_read(&c->mark_lock);
-+		}
-+
-+		if (!new_fs)
-+			bch2_open_bucket_put(c, ob);
-+	}
-+
-+	ret = 0;
-+err:
-+	kfree(new_bucket_seq);
-+	kfree(new_buckets);
-+
-+	return ret;
-+}
-+
-+/*
-+ * Allocate more journal space at runtime - not currently making use if it, but
-+ * the code works:
-+ */
-+int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca,
-+				unsigned nr)
-+{
-+	struct journal_device *ja = &ca->journal;
-+	struct closure cl;
-+	unsigned current_nr;
-+	int ret;
-+
-+	closure_init_stack(&cl);
-+
-+	do {
-+		struct disk_reservation disk_res = { 0, 0 };
-+
-+		closure_sync(&cl);
-+
-+		mutex_lock(&c->sb_lock);
-+		current_nr = ja->nr;
-+
-+		/*
-+		 * note: journal buckets aren't really counted as _sectors_ used yet, so
-+		 * we don't need the disk reservation to avoid the BUG_ON() in buckets.c
-+		 * when space used goes up without a reservation - but we do need the
-+		 * reservation to ensure we'll actually be able to allocate:
-+		 */
-+
-+		if (bch2_disk_reservation_get(c, &disk_res,
-+					      bucket_to_sector(ca, nr - ja->nr), 1, 0)) {
-+			mutex_unlock(&c->sb_lock);
-+			return -ENOSPC;
-+		}
-+
-+		ret = __bch2_set_nr_journal_buckets(ca, nr, false, &cl);
-+
-+		bch2_disk_reservation_put(c, &disk_res);
-+
-+		if (ja->nr != current_nr)
-+			bch2_write_super(c);
-+		mutex_unlock(&c->sb_lock);
-+	} while (ret == -EAGAIN);
-+
-+	return ret;
-+}
-+
-+int bch2_dev_journal_alloc(struct bch_dev *ca)
-+{
-+	unsigned nr;
-+
-+	if (dynamic_fault("bcachefs:add:journal_alloc"))
-+		return -ENOMEM;
-+
-+	/*
-+	 * clamp journal size to 1024 buckets or 512MB (in sectors), whichever
-+	 * is smaller:
-+	 */
-+	nr = clamp_t(unsigned, ca->mi.nbuckets >> 8,
-+		     BCH_JOURNAL_BUCKETS_MIN,
-+		     min(1 << 10,
-+			 (1 << 20) / ca->mi.bucket_size));
-+
-+	return __bch2_set_nr_journal_buckets(ca, nr, true, NULL);
-+}
-+
-+/* startup/shutdown: */
-+
-+static bool bch2_journal_writing_to_device(struct journal *j, unsigned dev_idx)
-+{
-+	union journal_res_state state;
-+	struct journal_buf *w;
-+	bool ret;
-+
-+	spin_lock(&j->lock);
-+	state = READ_ONCE(j->reservations);
-+	w = j->buf + !state.idx;
-+
-+	ret = state.prev_buf_unwritten &&
-+		bch2_bkey_has_device(bkey_i_to_s_c(&w->key), dev_idx);
-+	spin_unlock(&j->lock);
-+
-+	return ret;
-+}
-+
-+void bch2_dev_journal_stop(struct journal *j, struct bch_dev *ca)
-+{
-+	wait_event(j->wait, !bch2_journal_writing_to_device(j, ca->dev_idx));
-+}
-+
-+void bch2_fs_journal_stop(struct journal *j)
-+{
-+	bch2_journal_flush_all_pins(j);
-+
-+	wait_event(j->wait, journal_entry_close(j));
-+
-+	/* do we need to write another journal entry? */
-+	if (test_bit(JOURNAL_NOT_EMPTY, &j->flags))
-+		bch2_journal_meta(j);
-+
-+	journal_quiesce(j);
-+
-+	BUG_ON(!bch2_journal_error(j) &&
-+	       test_bit(JOURNAL_NOT_EMPTY, &j->flags));
-+
-+	cancel_delayed_work_sync(&j->write_work);
-+	cancel_delayed_work_sync(&j->reclaim_work);
-+}
-+
-+int bch2_fs_journal_start(struct journal *j, u64 cur_seq,
-+			  struct list_head *journal_entries)
-+{
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	struct journal_entry_pin_list *p;
-+	struct journal_replay *i;
-+	u64 last_seq = cur_seq, nr, seq;
-+
-+	if (!list_empty(journal_entries))
-+		last_seq = le64_to_cpu(list_last_entry(journal_entries,
-+				struct journal_replay, list)->j.last_seq);
-+
-+	nr = cur_seq - last_seq;
-+
-+	if (nr + 1 > j->pin.size) {
-+		free_fifo(&j->pin);
-+		init_fifo(&j->pin, roundup_pow_of_two(nr + 1), GFP_KERNEL);
-+		if (!j->pin.data) {
-+			bch_err(c, "error reallocating journal fifo (%llu open entries)", nr);
-+			return -ENOMEM;
-+		}
-+	}
-+
-+	j->replay_journal_seq	= last_seq;
-+	j->replay_journal_seq_end = cur_seq;
-+	j->last_seq_ondisk	= last_seq;
-+	j->pin.front		= last_seq;
-+	j->pin.back		= cur_seq;
-+	atomic64_set(&j->seq, cur_seq - 1);
-+
-+	fifo_for_each_entry_ptr(p, &j->pin, seq) {
-+		INIT_LIST_HEAD(&p->list);
-+		INIT_LIST_HEAD(&p->flushed);
-+		atomic_set(&p->count, 1);
-+		p->devs.nr = 0;
-+	}
-+
-+	list_for_each_entry(i, journal_entries, list) {
-+		seq = le64_to_cpu(i->j.seq);
-+		BUG_ON(seq >= cur_seq);
-+
-+		if (seq < last_seq)
-+			continue;
-+
-+		journal_seq_pin(j, seq)->devs = i->devs;
-+	}
-+
-+	spin_lock(&j->lock);
-+
-+	set_bit(JOURNAL_STARTED, &j->flags);
-+
-+	journal_pin_new_entry(j, 1);
-+	bch2_journal_buf_init(j);
-+
-+	c->last_bucket_seq_cleanup = journal_cur_seq(j);
-+
-+	bch2_journal_space_available(j);
-+	spin_unlock(&j->lock);
-+
-+	return 0;
-+}
-+
-+/* init/exit: */
-+
-+void bch2_dev_journal_exit(struct bch_dev *ca)
-+{
-+	kfree(ca->journal.bio);
-+	kfree(ca->journal.buckets);
-+	kfree(ca->journal.bucket_seq);
-+
-+	ca->journal.bio		= NULL;
-+	ca->journal.buckets	= NULL;
-+	ca->journal.bucket_seq	= NULL;
-+}
-+
-+int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb)
-+{
-+	struct journal_device *ja = &ca->journal;
-+	struct bch_sb_field_journal *journal_buckets =
-+		bch2_sb_get_journal(sb);
-+	unsigned i;
-+
-+	ja->nr = bch2_nr_journal_buckets(journal_buckets);
-+
-+	ja->bucket_seq = kcalloc(ja->nr, sizeof(u64), GFP_KERNEL);
-+	if (!ja->bucket_seq)
-+		return -ENOMEM;
-+
-+	ca->journal.bio = bio_kmalloc(GFP_KERNEL,
-+			DIV_ROUND_UP(JOURNAL_ENTRY_SIZE_MAX, PAGE_SIZE));
-+	if (!ca->journal.bio)
-+		return -ENOMEM;
-+
-+	ja->buckets = kcalloc(ja->nr, sizeof(u64), GFP_KERNEL);
-+	if (!ja->buckets)
-+		return -ENOMEM;
-+
-+	for (i = 0; i < ja->nr; i++)
-+		ja->buckets[i] = le64_to_cpu(journal_buckets->buckets[i]);
-+
-+	return 0;
-+}
-+
-+void bch2_fs_journal_exit(struct journal *j)
-+{
-+	kvpfree(j->buf[1].data, j->buf[1].buf_size);
-+	kvpfree(j->buf[0].data, j->buf[0].buf_size);
-+	free_fifo(&j->pin);
-+}
-+
-+int bch2_fs_journal_init(struct journal *j)
-+{
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	static struct lock_class_key res_key;
-+	int ret = 0;
-+
-+	pr_verbose_init(c->opts, "");
-+
-+	spin_lock_init(&j->lock);
-+	spin_lock_init(&j->err_lock);
-+	init_waitqueue_head(&j->wait);
-+	INIT_DELAYED_WORK(&j->write_work, journal_write_work);
-+	INIT_DELAYED_WORK(&j->reclaim_work, bch2_journal_reclaim_work);
-+	init_waitqueue_head(&j->pin_flush_wait);
-+	mutex_init(&j->reclaim_lock);
-+	mutex_init(&j->discard_lock);
-+
-+	lockdep_init_map(&j->res_map, "journal res", &res_key, 0);
-+
-+	j->buf[0].buf_size	= JOURNAL_ENTRY_SIZE_MIN;
-+	j->buf[1].buf_size	= JOURNAL_ENTRY_SIZE_MIN;
-+	j->write_delay_ms	= 1000;
-+	j->reclaim_delay_ms	= 100;
-+
-+	/* Btree roots: */
-+	j->entry_u64s_reserved +=
-+		BTREE_ID_NR * (JSET_KEYS_U64s + BKEY_EXTENT_U64s_MAX);
-+
-+	atomic64_set(&j->reservations.counter,
-+		((union journal_res_state)
-+		 { .cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL }).v);
-+
-+	if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) ||
-+	    !(j->buf[0].data = kvpmalloc(j->buf[0].buf_size, GFP_KERNEL)) ||
-+	    !(j->buf[1].data = kvpmalloc(j->buf[1].buf_size, GFP_KERNEL))) {
-+		ret = -ENOMEM;
-+		goto out;
-+	}
-+
-+	j->pin.front = j->pin.back = 1;
-+out:
-+	pr_verbose_init(c->opts, "ret %i", ret);
-+	return ret;
-+}
-+
-+/* debug: */
-+
-+void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
-+{
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	union journal_res_state s;
-+	struct bch_dev *ca;
-+	unsigned iter;
-+
-+	rcu_read_lock();
-+	spin_lock(&j->lock);
-+	s = READ_ONCE(j->reservations);
-+
-+	pr_buf(out,
-+	       "active journal entries:\t%llu\n"
-+	       "seq:\t\t\t%llu\n"
-+	       "last_seq:\t\t%llu\n"
-+	       "last_seq_ondisk:\t%llu\n"
-+	       "prereserved:\t\t%u/%u\n"
-+	       "current entry sectors:\t%u\n"
-+	       "current entry:\t\t",
-+	       fifo_used(&j->pin),
-+	       journal_cur_seq(j),
-+	       journal_last_seq(j),
-+	       j->last_seq_ondisk,
-+	       j->prereserved.reserved,
-+	       j->prereserved.remaining,
-+	       j->cur_entry_sectors);
-+
-+	switch (s.cur_entry_offset) {
-+	case JOURNAL_ENTRY_ERROR_VAL:
-+		pr_buf(out, "error\n");
-+		break;
-+	case JOURNAL_ENTRY_CLOSED_VAL:
-+		pr_buf(out, "closed\n");
-+		break;
-+	default:
-+		pr_buf(out, "%u/%u\n",
-+		       s.cur_entry_offset,
-+		       j->cur_entry_u64s);
-+		break;
-+	}
-+
-+	pr_buf(out,
-+	       "current entry refs:\t%u\n"
-+	       "prev entry unwritten:\t",
-+	       journal_state_count(s, s.idx));
-+
-+	if (s.prev_buf_unwritten)
-+		pr_buf(out, "yes, ref %u sectors %u\n",
-+		       journal_state_count(s, !s.idx),
-+		       journal_prev_buf(j)->sectors);
-+	else
-+		pr_buf(out, "no\n");
-+
-+	pr_buf(out,
-+	       "need write:\t\t%i\n"
-+	       "replay done:\t\t%i\n",
-+	       test_bit(JOURNAL_NEED_WRITE,	&j->flags),
-+	       test_bit(JOURNAL_REPLAY_DONE,	&j->flags));
-+
-+	for_each_member_device_rcu(ca, c, iter,
-+				   &c->rw_devs[BCH_DATA_journal]) {
-+		struct journal_device *ja = &ca->journal;
-+
-+		if (!ja->nr)
-+			continue;
-+
-+		pr_buf(out,
-+		       "dev %u:\n"
-+		       "\tnr\t\t%u\n"
-+		       "\tavailable\t%u:%u\n"
-+		       "\tdiscard_idx\t\t%u\n"
-+		       "\tdirty_idx_ondisk\t%u (seq %llu)\n"
-+		       "\tdirty_idx\t\t%u (seq %llu)\n"
-+		       "\tcur_idx\t\t%u (seq %llu)\n",
-+		       iter, ja->nr,
-+		       bch2_journal_dev_buckets_available(j, ja, journal_space_discarded),
-+		       ja->sectors_free,
-+		       ja->discard_idx,
-+		       ja->dirty_idx_ondisk,	ja->bucket_seq[ja->dirty_idx_ondisk],
-+		       ja->dirty_idx,		ja->bucket_seq[ja->dirty_idx],
-+		       ja->cur_idx,		ja->bucket_seq[ja->cur_idx]);
-+	}
-+
-+	spin_unlock(&j->lock);
-+	rcu_read_unlock();
-+}
-+
-+void bch2_journal_pins_to_text(struct printbuf *out, struct journal *j)
-+{
-+	struct journal_entry_pin_list *pin_list;
-+	struct journal_entry_pin *pin;
-+	u64 i;
-+
-+	spin_lock(&j->lock);
-+	fifo_for_each_entry_ptr(pin_list, &j->pin, i) {
-+		pr_buf(out, "%llu: count %u\n",
-+		       i, atomic_read(&pin_list->count));
-+
-+		list_for_each_entry(pin, &pin_list->list, list)
-+			pr_buf(out, "\t%px %ps\n",
-+			       pin, pin->flush);
-+
-+		if (!list_empty(&pin_list->flushed))
-+			pr_buf(out, "flushed:\n");
-+
-+		list_for_each_entry(pin, &pin_list->flushed, list)
-+			pr_buf(out, "\t%px %ps\n",
-+			       pin, pin->flush);
-+	}
-+	spin_unlock(&j->lock);
-+}
-diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h
-new file mode 100644
-index 000000000000..56438840efd7
---- /dev/null
-+++ b/fs/bcachefs/journal.h
-@@ -0,0 +1,519 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_JOURNAL_H
-+#define _BCACHEFS_JOURNAL_H
-+
-+/*
-+ * THE JOURNAL:
-+ *
-+ * The primary purpose of the journal is to log updates (insertions) to the
-+ * b-tree, to avoid having to do synchronous updates to the b-tree on disk.
-+ *
-+ * Without the journal, the b-tree is always internally consistent on
-+ * disk - and in fact, in the earliest incarnations bcache didn't have a journal
-+ * but did handle unclean shutdowns by doing all index updates synchronously
-+ * (with coalescing).
-+ *
-+ * Updates to interior nodes still happen synchronously and without the journal
-+ * (for simplicity) - this may change eventually but updates to interior nodes
-+ * are rare enough it's not a huge priority.
-+ *
-+ * This means the journal is relatively separate from the b-tree; it consists of
-+ * just a list of keys and journal replay consists of just redoing those
-+ * insertions in same order that they appear in the journal.
-+ *
-+ * PERSISTENCE:
-+ *
-+ * For synchronous updates (where we're waiting on the index update to hit
-+ * disk), the journal entry will be written out immediately (or as soon as
-+ * possible, if the write for the previous journal entry was still in flight).
-+ *
-+ * Synchronous updates are specified by passing a closure (@flush_cl) to
-+ * bch2_btree_insert() or bch_btree_insert_node(), which then pass that parameter
-+ * down to the journalling code. That closure will will wait on the journal
-+ * write to complete (via closure_wait()).
-+ *
-+ * If the index update wasn't synchronous, the journal entry will be
-+ * written out after 10 ms have elapsed, by default (the delay_ms field
-+ * in struct journal).
-+ *
-+ * JOURNAL ENTRIES:
-+ *
-+ * A journal entry is variable size (struct jset), it's got a fixed length
-+ * header and then a variable number of struct jset_entry entries.
-+ *
-+ * Journal entries are identified by monotonically increasing 64 bit sequence
-+ * numbers - jset->seq; other places in the code refer to this sequence number.
-+ *
-+ * A jset_entry entry contains one or more bkeys (which is what gets inserted
-+ * into the b-tree). We need a container to indicate which b-tree the key is
-+ * for; also, the roots of the various b-trees are stored in jset_entry entries
-+ * (one for each b-tree) - this lets us add new b-tree types without changing
-+ * the on disk format.
-+ *
-+ * We also keep some things in the journal header that are logically part of the
-+ * superblock - all the things that are frequently updated. This is for future
-+ * bcache on raw flash support; the superblock (which will become another
-+ * journal) can't be moved or wear leveled, so it contains just enough
-+ * information to find the main journal, and the superblock only has to be
-+ * rewritten when we want to move/wear level the main journal.
-+ *
-+ * JOURNAL LAYOUT ON DISK:
-+ *
-+ * The journal is written to a ringbuffer of buckets (which is kept in the
-+ * superblock); the individual buckets are not necessarily contiguous on disk
-+ * which means that journal entries are not allowed to span buckets, but also
-+ * that we can resize the journal at runtime if desired (unimplemented).
-+ *
-+ * The journal buckets exist in the same pool as all the other buckets that are
-+ * managed by the allocator and garbage collection - garbage collection marks
-+ * the journal buckets as metadata buckets.
-+ *
-+ * OPEN/DIRTY JOURNAL ENTRIES:
-+ *
-+ * Open/dirty journal entries are journal entries that contain b-tree updates
-+ * that have not yet been written out to the b-tree on disk. We have to track
-+ * which journal entries are dirty, and we also have to avoid wrapping around
-+ * the journal and overwriting old but still dirty journal entries with new
-+ * journal entries.
-+ *
-+ * On disk, this is represented with the "last_seq" field of struct jset;
-+ * last_seq is the first sequence number that journal replay has to replay.
-+ *
-+ * To avoid overwriting dirty journal entries on disk, we keep a mapping (in
-+ * journal_device->seq) of for each journal bucket, the highest sequence number
-+ * any journal entry it contains. Then, by comparing that against last_seq we
-+ * can determine whether that journal bucket contains dirty journal entries or
-+ * not.
-+ *
-+ * To track which journal entries are dirty, we maintain a fifo of refcounts
-+ * (where each entry corresponds to a specific sequence number) - when a ref
-+ * goes to 0, that journal entry is no longer dirty.
-+ *
-+ * Journalling of index updates is done at the same time as the b-tree itself is
-+ * being modified (see btree_insert_key()); when we add the key to the journal
-+ * the pending b-tree write takes a ref on the journal entry the key was added
-+ * to. If a pending b-tree write would need to take refs on multiple dirty
-+ * journal entries, it only keeps the ref on the oldest one (since a newer
-+ * journal entry will still be replayed if an older entry was dirty).
-+ *
-+ * JOURNAL FILLING UP:
-+ *
-+ * There are two ways the journal could fill up; either we could run out of
-+ * space to write to, or we could have too many open journal entries and run out
-+ * of room in the fifo of refcounts. Since those refcounts are decremented
-+ * without any locking we can't safely resize that fifo, so we handle it the
-+ * same way.
-+ *
-+ * If the journal fills up, we start flushing dirty btree nodes until we can
-+ * allocate space for a journal write again - preferentially flushing btree
-+ * nodes that are pinning the oldest journal entries first.
-+ */
-+
-+#include <linux/hash.h>
-+
-+#include "journal_types.h"
-+
-+struct bch_fs;
-+
-+static inline void journal_wake(struct journal *j)
-+{
-+	wake_up(&j->wait);
-+	closure_wake_up(&j->async_wait);
-+	closure_wake_up(&j->preres_wait);
-+}
-+
-+static inline struct journal_buf *journal_cur_buf(struct journal *j)
-+{
-+	return j->buf + j->reservations.idx;
-+}
-+
-+static inline struct journal_buf *journal_prev_buf(struct journal *j)
-+{
-+	return j->buf + !j->reservations.idx;
-+}
-+
-+/* Sequence number of oldest dirty journal entry */
-+
-+static inline u64 journal_last_seq(struct journal *j)
-+{
-+	return j->pin.front;
-+}
-+
-+static inline u64 journal_cur_seq(struct journal *j)
-+{
-+	BUG_ON(j->pin.back - 1 != atomic64_read(&j->seq));
-+
-+	return j->pin.back - 1;
-+}
-+
-+u64 bch2_inode_journal_seq(struct journal *, u64);
-+
-+static inline int journal_state_count(union journal_res_state s, int idx)
-+{
-+	return idx == 0 ? s.buf0_count : s.buf1_count;
-+}
-+
-+static inline void journal_state_inc(union journal_res_state *s)
-+{
-+	s->buf0_count += s->idx == 0;
-+	s->buf1_count += s->idx == 1;
-+}
-+
-+static inline void bch2_journal_set_has_inode(struct journal *j,
-+					      struct journal_res *res,
-+					      u64 inum)
-+{
-+	struct journal_buf *buf = &j->buf[res->idx];
-+	unsigned long bit = hash_64(inum, ilog2(sizeof(buf->has_inode) * 8));
-+
-+	/* avoid atomic op if possible */
-+	if (unlikely(!test_bit(bit, buf->has_inode)))
-+		set_bit(bit, buf->has_inode);
-+}
-+
-+/*
-+ * Amount of space that will be taken up by some keys in the journal (i.e.
-+ * including the jset header)
-+ */
-+static inline unsigned jset_u64s(unsigned u64s)
-+{
-+	return u64s + sizeof(struct jset_entry) / sizeof(u64);
-+}
-+
-+static inline int journal_entry_overhead(struct journal *j)
-+{
-+	return sizeof(struct jset) / sizeof(u64) + j->entry_u64s_reserved;
-+}
-+
-+static inline struct jset_entry *
-+bch2_journal_add_entry_noreservation(struct journal_buf *buf, size_t u64s)
-+{
-+	struct jset *jset = buf->data;
-+	struct jset_entry *entry = vstruct_idx(jset, le32_to_cpu(jset->u64s));
-+
-+	memset(entry, 0, sizeof(*entry));
-+	entry->u64s = cpu_to_le16(u64s);
-+
-+	le32_add_cpu(&jset->u64s, jset_u64s(u64s));
-+
-+	return entry;
-+}
-+
-+static inline struct jset_entry *
-+journal_res_entry(struct journal *j, struct journal_res *res)
-+{
-+	return vstruct_idx(j->buf[res->idx].data, res->offset);
-+}
-+
-+static inline unsigned journal_entry_set(struct jset_entry *entry, unsigned type,
-+					  enum btree_id id, unsigned level,
-+					  const void *data, unsigned u64s)
-+{
-+	memset(entry, 0, sizeof(*entry));
-+	entry->u64s	= cpu_to_le16(u64s);
-+	entry->type	= type;
-+	entry->btree_id = id;
-+	entry->level	= level;
-+	memcpy_u64s_small(entry->_data, data, u64s);
-+
-+	return jset_u64s(u64s);
-+}
-+
-+static inline void bch2_journal_add_entry(struct journal *j, struct journal_res *res,
-+					  unsigned type, enum btree_id id,
-+					  unsigned level,
-+					  const void *data, unsigned u64s)
-+{
-+	unsigned actual = journal_entry_set(journal_res_entry(j, res),
-+			       type, id, level, data, u64s);
-+
-+	EBUG_ON(!res->ref);
-+	EBUG_ON(actual > res->u64s);
-+
-+	res->offset	+= actual;
-+	res->u64s	-= actual;
-+}
-+
-+static inline void bch2_journal_add_keys(struct journal *j, struct journal_res *res,
-+					enum btree_id id, const struct bkey_i *k)
-+{
-+	bch2_journal_add_entry(j, res, BCH_JSET_ENTRY_btree_keys,
-+			       id, 0, k, k->k.u64s);
-+}
-+
-+static inline bool journal_entry_empty(struct jset *j)
-+{
-+	struct jset_entry *i;
-+
-+	if (j->seq != j->last_seq)
-+		return false;
-+
-+	vstruct_for_each(j, i)
-+		if (i->type == BCH_JSET_ENTRY_btree_keys && i->u64s)
-+			return false;
-+	return true;
-+}
-+
-+void __bch2_journal_buf_put(struct journal *, bool);
-+
-+static inline void bch2_journal_buf_put(struct journal *j, unsigned idx,
-+				       bool need_write_just_set)
-+{
-+	union journal_res_state s;
-+
-+	s.v = atomic64_sub_return(((union journal_res_state) {
-+				    .buf0_count = idx == 0,
-+				    .buf1_count = idx == 1,
-+				    }).v, &j->reservations.counter);
-+	if (!journal_state_count(s, idx)) {
-+		EBUG_ON(s.idx == idx || !s.prev_buf_unwritten);
-+		__bch2_journal_buf_put(j, need_write_just_set);
-+	}
-+}
-+
-+/*
-+ * This function releases the journal write structure so other threads can
-+ * then proceed to add their keys as well.
-+ */
-+static inline void bch2_journal_res_put(struct journal *j,
-+				       struct journal_res *res)
-+{
-+	if (!res->ref)
-+		return;
-+
-+	lock_release(&j->res_map, _THIS_IP_);
-+
-+	while (res->u64s)
-+		bch2_journal_add_entry(j, res,
-+				       BCH_JSET_ENTRY_btree_keys,
-+				       0, 0, NULL, 0);
-+
-+	bch2_journal_buf_put(j, res->idx, false);
-+
-+	res->ref = 0;
-+}
-+
-+int bch2_journal_res_get_slowpath(struct journal *, struct journal_res *,
-+				  unsigned);
-+
-+#define JOURNAL_RES_GET_NONBLOCK	(1 << 0)
-+#define JOURNAL_RES_GET_CHECK		(1 << 1)
-+#define JOURNAL_RES_GET_RESERVED	(1 << 2)
-+#define JOURNAL_RES_GET_RECLAIM		(1 << 3)
-+
-+static inline int journal_res_get_fast(struct journal *j,
-+				       struct journal_res *res,
-+				       unsigned flags)
-+{
-+	union journal_res_state old, new;
-+	u64 v = atomic64_read(&j->reservations.counter);
-+
-+	do {
-+		old.v = new.v = v;
-+
-+		/*
-+		 * Check if there is still room in the current journal
-+		 * entry:
-+		 */
-+		if (new.cur_entry_offset + res->u64s > j->cur_entry_u64s)
-+			return 0;
-+
-+		EBUG_ON(!journal_state_count(new, new.idx));
-+
-+		if (!(flags & JOURNAL_RES_GET_RESERVED) &&
-+		    !test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags))
-+			return 0;
-+
-+		if (flags & JOURNAL_RES_GET_CHECK)
-+			return 1;
-+
-+		new.cur_entry_offset += res->u64s;
-+		journal_state_inc(&new);
-+	} while ((v = atomic64_cmpxchg(&j->reservations.counter,
-+				       old.v, new.v)) != old.v);
-+
-+	res->ref	= true;
-+	res->idx	= old.idx;
-+	res->offset	= old.cur_entry_offset;
-+	res->seq	= le64_to_cpu(j->buf[old.idx].data->seq);
-+	return 1;
-+}
-+
-+static inline int bch2_journal_res_get(struct journal *j, struct journal_res *res,
-+				       unsigned u64s, unsigned flags)
-+{
-+	int ret;
-+
-+	EBUG_ON(res->ref);
-+	EBUG_ON(!test_bit(JOURNAL_STARTED, &j->flags));
-+
-+	res->u64s = u64s;
-+
-+	if (journal_res_get_fast(j, res, flags))
-+		goto out;
-+
-+	ret = bch2_journal_res_get_slowpath(j, res, flags);
-+	if (ret)
-+		return ret;
-+out:
-+	if (!(flags & JOURNAL_RES_GET_CHECK)) {
-+		lock_acquire_shared(&j->res_map, 0,
-+				    (flags & JOURNAL_RES_GET_NONBLOCK) != 0,
-+				    NULL, _THIS_IP_);
-+		EBUG_ON(!res->ref);
-+	}
-+	return 0;
-+}
-+
-+/* journal_preres: */
-+
-+static inline bool journal_check_may_get_unreserved(struct journal *j)
-+{
-+	union journal_preres_state s = READ_ONCE(j->prereserved);
-+	bool ret = s.reserved <= s.remaining &&
-+		fifo_free(&j->pin) > 8;
-+
-+	lockdep_assert_held(&j->lock);
-+
-+	if (ret != test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags)) {
-+		if (ret) {
-+			set_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags);
-+			journal_wake(j);
-+		} else {
-+			clear_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags);
-+		}
-+	}
-+	return ret;
-+}
-+
-+static inline void bch2_journal_preres_put(struct journal *j,
-+					   struct journal_preres *res)
-+{
-+	union journal_preres_state s = { .reserved = res->u64s };
-+
-+	if (!res->u64s)
-+		return;
-+
-+	s.v = atomic64_sub_return(s.v, &j->prereserved.counter);
-+	res->u64s = 0;
-+	closure_wake_up(&j->preres_wait);
-+
-+	if (s.reserved <= s.remaining &&
-+	    !test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags)) {
-+		spin_lock(&j->lock);
-+		journal_check_may_get_unreserved(j);
-+		spin_unlock(&j->lock);
-+	}
-+}
-+
-+int __bch2_journal_preres_get(struct journal *,
-+			struct journal_preres *, unsigned, unsigned);
-+
-+static inline int bch2_journal_preres_get_fast(struct journal *j,
-+					       struct journal_preres *res,
-+					       unsigned new_u64s,
-+					       unsigned flags)
-+{
-+	int d = new_u64s - res->u64s;
-+	union journal_preres_state old, new;
-+	u64 v = atomic64_read(&j->prereserved.counter);
-+
-+	do {
-+		old.v = new.v = v;
-+
-+		new.reserved += d;
-+
-+		/*
-+		 * If we're being called from the journal reclaim path, we have
-+		 * to unconditionally give out the pre-reservation, there's
-+		 * nothing else sensible we can do - otherwise we'd recurse back
-+		 * into the reclaim path and deadlock:
-+		 */
-+
-+		if (!(flags & JOURNAL_RES_GET_RECLAIM) &&
-+		    new.reserved > new.remaining)
-+			return 0;
-+	} while ((v = atomic64_cmpxchg(&j->prereserved.counter,
-+				       old.v, new.v)) != old.v);
-+
-+	res->u64s += d;
-+	return 1;
-+}
-+
-+static inline int bch2_journal_preres_get(struct journal *j,
-+					  struct journal_preres *res,
-+					  unsigned new_u64s,
-+					  unsigned flags)
-+{
-+	if (new_u64s <= res->u64s)
-+		return 0;
-+
-+	if (bch2_journal_preres_get_fast(j, res, new_u64s, flags))
-+		return 0;
-+
-+	if (flags & JOURNAL_RES_GET_NONBLOCK)
-+		return -EAGAIN;
-+
-+	return __bch2_journal_preres_get(j, res, new_u64s, flags);
-+}
-+
-+/* journal_entry_res: */
-+
-+void bch2_journal_entry_res_resize(struct journal *,
-+				   struct journal_entry_res *,
-+				   unsigned);
-+
-+u64 bch2_journal_last_unwritten_seq(struct journal *);
-+int bch2_journal_open_seq_async(struct journal *, u64, struct closure *);
-+
-+void bch2_journal_wait_on_seq(struct journal *, u64, struct closure *);
-+void bch2_journal_flush_seq_async(struct journal *, u64, struct closure *);
-+void bch2_journal_flush_async(struct journal *, struct closure *);
-+void bch2_journal_meta_async(struct journal *, struct closure *);
-+
-+int bch2_journal_flush_seq(struct journal *, u64);
-+int bch2_journal_flush(struct journal *);
-+int bch2_journal_meta(struct journal *);
-+
-+void bch2_journal_halt(struct journal *);
-+
-+static inline int bch2_journal_error(struct journal *j)
-+{
-+	return j->reservations.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL
-+		? -EIO : 0;
-+}
-+
-+struct bch_dev;
-+
-+static inline bool journal_flushes_device(struct bch_dev *ca)
-+{
-+	return true;
-+}
-+
-+static inline void bch2_journal_set_replay_done(struct journal *j)
-+{
-+	BUG_ON(!test_bit(JOURNAL_STARTED, &j->flags));
-+	set_bit(JOURNAL_REPLAY_DONE, &j->flags);
-+}
-+
-+void bch2_journal_unblock(struct journal *);
-+void bch2_journal_block(struct journal *);
-+
-+void bch2_journal_debug_to_text(struct printbuf *, struct journal *);
-+void bch2_journal_pins_to_text(struct printbuf *, struct journal *);
-+
-+int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *,
-+				unsigned nr);
-+int bch2_dev_journal_alloc(struct bch_dev *);
-+
-+void bch2_dev_journal_stop(struct journal *, struct bch_dev *);
-+
-+void bch2_fs_journal_stop(struct journal *);
-+int bch2_fs_journal_start(struct journal *, u64, struct list_head *);
-+
-+void bch2_dev_journal_exit(struct bch_dev *);
-+int bch2_dev_journal_init(struct bch_dev *, struct bch_sb *);
-+void bch2_fs_journal_exit(struct journal *);
-+int bch2_fs_journal_init(struct journal *);
-+
-+#endif /* _BCACHEFS_JOURNAL_H */
-diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
-new file mode 100644
-index 000000000000..bd0e6b371701
---- /dev/null
-+++ b/fs/bcachefs/journal_io.c
-@@ -0,0 +1,1183 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "alloc_foreground.h"
-+#include "btree_io.h"
-+#include "btree_update_interior.h"
-+#include "buckets.h"
-+#include "checksum.h"
-+#include "error.h"
-+#include "io.h"
-+#include "journal.h"
-+#include "journal_io.h"
-+#include "journal_reclaim.h"
-+#include "replicas.h"
-+
-+#include <trace/events/bcachefs.h>
-+
-+struct journal_list {
-+	struct closure		cl;
-+	struct mutex		lock;
-+	struct list_head	*head;
-+	int			ret;
-+};
-+
-+#define JOURNAL_ENTRY_ADD_OK		0
-+#define JOURNAL_ENTRY_ADD_OUT_OF_RANGE	5
-+
-+/*
-+ * Given a journal entry we just read, add it to the list of journal entries to
-+ * be replayed:
-+ */
-+static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca,
-+			     struct journal_list *jlist, struct jset *j,
-+			     bool bad)
-+{
-+	struct journal_replay *i, *pos;
-+	struct bch_devs_list devs = { .nr = 0 };
-+	struct list_head *where;
-+	size_t bytes = vstruct_bytes(j);
-+	__le64 last_seq;
-+	int ret;
-+
-+	last_seq = !list_empty(jlist->head)
-+		? list_last_entry(jlist->head, struct journal_replay,
-+				  list)->j.last_seq
-+		: 0;
-+
-+	if (!c->opts.read_entire_journal) {
-+		/* Is this entry older than the range we need? */
-+		if (le64_to_cpu(j->seq) < le64_to_cpu(last_seq)) {
-+			ret = JOURNAL_ENTRY_ADD_OUT_OF_RANGE;
-+			goto out;
-+		}
-+
-+		/* Drop entries we don't need anymore */
-+		list_for_each_entry_safe(i, pos, jlist->head, list) {
-+			if (le64_to_cpu(i->j.seq) >= le64_to_cpu(j->last_seq))
-+				break;
-+			list_del(&i->list);
-+			kvpfree(i, offsetof(struct journal_replay, j) +
-+				vstruct_bytes(&i->j));
-+		}
-+	}
-+
-+	list_for_each_entry_reverse(i, jlist->head, list) {
-+		if (le64_to_cpu(j->seq) > le64_to_cpu(i->j.seq)) {
-+			where = &i->list;
-+			goto add;
-+		}
-+	}
-+
-+	where = jlist->head;
-+add:
-+	i = where->next != jlist->head
-+		? container_of(where->next, struct journal_replay, list)
-+		: NULL;
-+
-+	/*
-+	 * Duplicate journal entries? If so we want the one that didn't have a
-+	 * checksum error:
-+	 */
-+	if (i && le64_to_cpu(j->seq) == le64_to_cpu(i->j.seq)) {
-+		if (i->bad) {
-+			devs = i->devs;
-+			list_del(&i->list);
-+			kvpfree(i, offsetof(struct journal_replay, j) +
-+				vstruct_bytes(&i->j));
-+		} else if (bad) {
-+			goto found;
-+		} else {
-+			fsck_err_on(bytes != vstruct_bytes(&i->j) ||
-+				    memcmp(j, &i->j, bytes), c,
-+				    "found duplicate but non identical journal entries (seq %llu)",
-+				    le64_to_cpu(j->seq));
-+			goto found;
-+		}
-+
-+	}
-+
-+	i = kvpmalloc(offsetof(struct journal_replay, j) + bytes, GFP_KERNEL);
-+	if (!i) {
-+		ret = -ENOMEM;
-+		goto out;
-+	}
-+
-+	list_add(&i->list, where);
-+	i->devs = devs;
-+	i->bad	= bad;
-+	memcpy(&i->j, j, bytes);
-+found:
-+	if (!bch2_dev_list_has_dev(i->devs, ca->dev_idx))
-+		bch2_dev_list_add_dev(&i->devs, ca->dev_idx);
-+	else
-+		fsck_err_on(1, c, "duplicate journal entries on same device");
-+	ret = JOURNAL_ENTRY_ADD_OK;
-+out:
-+fsck_err:
-+	return ret;
-+}
-+
-+static struct nonce journal_nonce(const struct jset *jset)
-+{
-+	return (struct nonce) {{
-+		[0] = 0,
-+		[1] = ((__le32 *) &jset->seq)[0],
-+		[2] = ((__le32 *) &jset->seq)[1],
-+		[3] = BCH_NONCE_JOURNAL,
-+	}};
-+}
-+
-+/* this fills in a range with empty jset_entries: */
-+static void journal_entry_null_range(void *start, void *end)
-+{
-+	struct jset_entry *entry;
-+
-+	for (entry = start; entry != end; entry = vstruct_next(entry))
-+		memset(entry, 0, sizeof(*entry));
-+}
-+
-+#define JOURNAL_ENTRY_REREAD	5
-+#define JOURNAL_ENTRY_NONE	6
-+#define JOURNAL_ENTRY_BAD	7
-+
-+#define journal_entry_err(c, msg, ...)					\
-+({									\
-+	switch (write) {						\
-+	case READ:							\
-+		mustfix_fsck_err(c, msg, ##__VA_ARGS__);		\
-+		break;							\
-+	case WRITE:							\
-+		bch_err(c, "corrupt metadata before write:\n"		\
-+			msg, ##__VA_ARGS__);				\
-+		if (bch2_fs_inconsistent(c)) {				\
-+			ret = BCH_FSCK_ERRORS_NOT_FIXED;		\
-+			goto fsck_err;					\
-+		}							\
-+		break;							\
-+	}								\
-+	true;								\
-+})
-+
-+#define journal_entry_err_on(cond, c, msg, ...)				\
-+	((cond) ? journal_entry_err(c, msg, ##__VA_ARGS__) : false)
-+
-+static int journal_validate_key(struct bch_fs *c, struct jset *jset,
-+				struct jset_entry *entry,
-+				unsigned level, enum btree_id btree_id,
-+				struct bkey_i *k,
-+				const char *type, int write)
-+{
-+	void *next = vstruct_next(entry);
-+	const char *invalid;
-+	unsigned version = le32_to_cpu(jset->version);
-+	int ret = 0;
-+
-+	if (journal_entry_err_on(!k->k.u64s, c,
-+			"invalid %s in journal: k->u64s 0", type)) {
-+		entry->u64s = cpu_to_le16((u64 *) k - entry->_data);
-+		journal_entry_null_range(vstruct_next(entry), next);
-+		return 0;
-+	}
-+
-+	if (journal_entry_err_on((void *) bkey_next(k) >
-+				(void *) vstruct_next(entry), c,
-+			"invalid %s in journal: extends past end of journal entry",
-+			type)) {
-+		entry->u64s = cpu_to_le16((u64 *) k - entry->_data);
-+		journal_entry_null_range(vstruct_next(entry), next);
-+		return 0;
-+	}
-+
-+	if (journal_entry_err_on(k->k.format != KEY_FORMAT_CURRENT, c,
-+			"invalid %s in journal: bad format %u",
-+			type, k->k.format)) {
-+		le16_add_cpu(&entry->u64s, -k->k.u64s);
-+		memmove(k, bkey_next(k), next - (void *) bkey_next(k));
-+		journal_entry_null_range(vstruct_next(entry), next);
-+		return 0;
-+	}
-+
-+	if (!write)
-+		bch2_bkey_compat(level, btree_id, version,
-+			    JSET_BIG_ENDIAN(jset), write,
-+			    NULL, bkey_to_packed(k));
-+
-+	invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(k),
-+				    __btree_node_type(level, btree_id));
-+	if (invalid) {
-+		char buf[160];
-+
-+		bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(k));
-+		mustfix_fsck_err(c, "invalid %s in journal: %s\n%s",
-+				 type, invalid, buf);
-+
-+		le16_add_cpu(&entry->u64s, -k->k.u64s);
-+		memmove(k, bkey_next(k), next - (void *) bkey_next(k));
-+		journal_entry_null_range(vstruct_next(entry), next);
-+		return 0;
-+	}
-+
-+	if (write)
-+		bch2_bkey_compat(level, btree_id, version,
-+			    JSET_BIG_ENDIAN(jset), write,
-+			    NULL, bkey_to_packed(k));
-+fsck_err:
-+	return ret;
-+}
-+
-+static int journal_entry_validate_btree_keys(struct bch_fs *c,
-+					     struct jset *jset,
-+					     struct jset_entry *entry,
-+					     int write)
-+{
-+	struct bkey_i *k;
-+
-+	vstruct_for_each(entry, k) {
-+		int ret = journal_validate_key(c, jset, entry,
-+					       entry->level,
-+					       entry->btree_id,
-+					       k, "key", write);
-+		if (ret)
-+			return ret;
-+	}
-+
-+	return 0;
-+}
-+
-+static int journal_entry_validate_btree_root(struct bch_fs *c,
-+					     struct jset *jset,
-+					     struct jset_entry *entry,
-+					     int write)
-+{
-+	struct bkey_i *k = entry->start;
-+	int ret = 0;
-+
-+	if (journal_entry_err_on(!entry->u64s ||
-+				 le16_to_cpu(entry->u64s) != k->k.u64s, c,
-+				 "invalid btree root journal entry: wrong number of keys")) {
-+		void *next = vstruct_next(entry);
-+		/*
-+		 * we don't want to null out this jset_entry,
-+		 * just the contents, so that later we can tell
-+		 * we were _supposed_ to have a btree root
-+		 */
-+		entry->u64s = 0;
-+		journal_entry_null_range(vstruct_next(entry), next);
-+		return 0;
-+	}
-+
-+	return journal_validate_key(c, jset, entry, 1, entry->btree_id, k,
-+				    "btree root", write);
-+fsck_err:
-+	return ret;
-+}
-+
-+static int journal_entry_validate_prio_ptrs(struct bch_fs *c,
-+					    struct jset *jset,
-+					    struct jset_entry *entry,
-+					    int write)
-+{
-+	/* obsolete, don't care: */
-+	return 0;
-+}
-+
-+static int journal_entry_validate_blacklist(struct bch_fs *c,
-+					    struct jset *jset,
-+					    struct jset_entry *entry,
-+					    int write)
-+{
-+	int ret = 0;
-+
-+	if (journal_entry_err_on(le16_to_cpu(entry->u64s) != 1, c,
-+		"invalid journal seq blacklist entry: bad size")) {
-+		journal_entry_null_range(entry, vstruct_next(entry));
-+	}
-+fsck_err:
-+	return ret;
-+}
-+
-+static int journal_entry_validate_blacklist_v2(struct bch_fs *c,
-+					       struct jset *jset,
-+					       struct jset_entry *entry,
-+					       int write)
-+{
-+	struct jset_entry_blacklist_v2 *bl_entry;
-+	int ret = 0;
-+
-+	if (journal_entry_err_on(le16_to_cpu(entry->u64s) != 2, c,
-+		"invalid journal seq blacklist entry: bad size")) {
-+		journal_entry_null_range(entry, vstruct_next(entry));
-+		goto out;
-+	}
-+
-+	bl_entry = container_of(entry, struct jset_entry_blacklist_v2, entry);
-+
-+	if (journal_entry_err_on(le64_to_cpu(bl_entry->start) >
-+				 le64_to_cpu(bl_entry->end), c,
-+		"invalid journal seq blacklist entry: start > end")) {
-+		journal_entry_null_range(entry, vstruct_next(entry));
-+	}
-+out:
-+fsck_err:
-+	return ret;
-+}
-+
-+static int journal_entry_validate_usage(struct bch_fs *c,
-+					struct jset *jset,
-+					struct jset_entry *entry,
-+					int write)
-+{
-+	struct jset_entry_usage *u =
-+		container_of(entry, struct jset_entry_usage, entry);
-+	unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64);
-+	int ret = 0;
-+
-+	if (journal_entry_err_on(bytes < sizeof(*u),
-+				 c,
-+				 "invalid journal entry usage: bad size")) {
-+		journal_entry_null_range(entry, vstruct_next(entry));
-+		return ret;
-+	}
-+
-+fsck_err:
-+	return ret;
-+}
-+
-+static int journal_entry_validate_data_usage(struct bch_fs *c,
-+					struct jset *jset,
-+					struct jset_entry *entry,
-+					int write)
-+{
-+	struct jset_entry_data_usage *u =
-+		container_of(entry, struct jset_entry_data_usage, entry);
-+	unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64);
-+	int ret = 0;
-+
-+	if (journal_entry_err_on(bytes < sizeof(*u) ||
-+				 bytes < sizeof(*u) + u->r.nr_devs,
-+				 c,
-+				 "invalid journal entry usage: bad size")) {
-+		journal_entry_null_range(entry, vstruct_next(entry));
-+		return ret;
-+	}
-+
-+fsck_err:
-+	return ret;
-+}
-+
-+struct jset_entry_ops {
-+	int (*validate)(struct bch_fs *, struct jset *,
-+			struct jset_entry *, int);
-+};
-+
-+static const struct jset_entry_ops bch2_jset_entry_ops[] = {
-+#define x(f, nr)						\
-+	[BCH_JSET_ENTRY_##f]	= (struct jset_entry_ops) {	\
-+		.validate	= journal_entry_validate_##f,	\
-+	},
-+	BCH_JSET_ENTRY_TYPES()
-+#undef x
-+};
-+
-+static int journal_entry_validate(struct bch_fs *c, struct jset *jset,
-+				  struct jset_entry *entry, int write)
-+{
-+	return entry->type < BCH_JSET_ENTRY_NR
-+		? bch2_jset_entry_ops[entry->type].validate(c, jset,
-+							    entry, write)
-+		: 0;
-+}
-+
-+static int jset_validate_entries(struct bch_fs *c, struct jset *jset,
-+				 int write)
-+{
-+	struct jset_entry *entry;
-+	int ret = 0;
-+
-+	vstruct_for_each(jset, entry) {
-+		if (journal_entry_err_on(vstruct_next(entry) >
-+					 vstruct_last(jset), c,
-+				"journal entry extends past end of jset")) {
-+			jset->u64s = cpu_to_le32((u64 *) entry - jset->_data);
-+			break;
-+		}
-+
-+		ret = journal_entry_validate(c, jset, entry, write);
-+		if (ret)
-+			break;
-+	}
-+fsck_err:
-+	return ret;
-+}
-+
-+static int jset_validate(struct bch_fs *c,
-+			 struct bch_dev *ca,
-+			 struct jset *jset, u64 sector,
-+			 unsigned bucket_sectors_left,
-+			 unsigned sectors_read,
-+			 int write)
-+{
-+	size_t bytes = vstruct_bytes(jset);
-+	struct bch_csum csum;
-+	unsigned version;
-+	int ret = 0;
-+
-+	if (le64_to_cpu(jset->magic) != jset_magic(c))
-+		return JOURNAL_ENTRY_NONE;
-+
-+	version = le32_to_cpu(jset->version);
-+	if (journal_entry_err_on((version != BCH_JSET_VERSION_OLD &&
-+				  version < bcachefs_metadata_version_min) ||
-+				 version >= bcachefs_metadata_version_max, c,
-+			"%s sector %llu seq %llu: unknown journal entry version %u",
-+			ca->name, sector, le64_to_cpu(jset->seq),
-+			version)) {
-+		/* XXX: note we might have missing journal entries */
-+		return JOURNAL_ENTRY_BAD;
-+	}
-+
-+	if (journal_entry_err_on(bytes > bucket_sectors_left << 9, c,
-+			"%s sector %llu seq %llu: journal entry too big (%zu bytes)",
-+			ca->name, sector, le64_to_cpu(jset->seq), bytes)) {
-+		/* XXX: note we might have missing journal entries */
-+		return JOURNAL_ENTRY_BAD;
-+	}
-+
-+	if (bytes > sectors_read << 9)
-+		return JOURNAL_ENTRY_REREAD;
-+
-+	if (fsck_err_on(!bch2_checksum_type_valid(c, JSET_CSUM_TYPE(jset)), c,
-+			"%s sector %llu seq %llu: journal entry with unknown csum type %llu",
-+			ca->name, sector, le64_to_cpu(jset->seq),
-+			JSET_CSUM_TYPE(jset)))
-+		return JOURNAL_ENTRY_BAD;
-+
-+	csum = csum_vstruct(c, JSET_CSUM_TYPE(jset), journal_nonce(jset), jset);
-+	if (journal_entry_err_on(bch2_crc_cmp(csum, jset->csum), c,
-+				 "%s sector %llu seq %llu: journal checksum bad",
-+				 ca->name, sector, le64_to_cpu(jset->seq))) {
-+		/* XXX: retry IO, when we start retrying checksum errors */
-+		/* XXX: note we might have missing journal entries */
-+		return JOURNAL_ENTRY_BAD;
-+	}
-+
-+	bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset),
-+		     jset->encrypted_start,
-+		     vstruct_end(jset) - (void *) jset->encrypted_start);
-+
-+	if (journal_entry_err_on(le64_to_cpu(jset->last_seq) > le64_to_cpu(jset->seq), c,
-+				 "invalid journal entry: last_seq > seq")) {
-+		jset->last_seq = jset->seq;
-+		return JOURNAL_ENTRY_BAD;
-+	}
-+
-+	return 0;
-+fsck_err:
-+	return ret;
-+}
-+
-+struct journal_read_buf {
-+	void		*data;
-+	size_t		size;
-+};
-+
-+static int journal_read_buf_realloc(struct journal_read_buf *b,
-+				    size_t new_size)
-+{
-+	void *n;
-+
-+	/* the bios are sized for this many pages, max: */
-+	if (new_size > JOURNAL_ENTRY_SIZE_MAX)
-+		return -ENOMEM;
-+
-+	new_size = roundup_pow_of_two(new_size);
-+	n = kvpmalloc(new_size, GFP_KERNEL);
-+	if (!n)
-+		return -ENOMEM;
-+
-+	kvpfree(b->data, b->size);
-+	b->data = n;
-+	b->size = new_size;
-+	return 0;
-+}
-+
-+static int journal_read_bucket(struct bch_dev *ca,
-+			       struct journal_read_buf *buf,
-+			       struct journal_list *jlist,
-+			       unsigned bucket)
-+{
-+	struct bch_fs *c = ca->fs;
-+	struct journal_device *ja = &ca->journal;
-+	struct jset *j = NULL;
-+	unsigned sectors, sectors_read = 0;
-+	u64 offset = bucket_to_sector(ca, ja->buckets[bucket]),
-+	    end = offset + ca->mi.bucket_size;
-+	bool saw_bad = false;
-+	int ret = 0;
-+
-+	pr_debug("reading %u", bucket);
-+
-+	while (offset < end) {
-+		if (!sectors_read) {
-+			struct bio *bio;
-+reread:
-+			sectors_read = min_t(unsigned,
-+				end - offset, buf->size >> 9);
-+
-+			bio = bio_kmalloc(GFP_KERNEL,
-+					  buf_pages(buf->data,
-+						    sectors_read << 9));
-+			bio_set_dev(bio, ca->disk_sb.bdev);
-+			bio->bi_iter.bi_sector	= offset;
-+			bio_set_op_attrs(bio, REQ_OP_READ, 0);
-+			bch2_bio_map(bio, buf->data, sectors_read << 9);
-+
-+			ret = submit_bio_wait(bio);
-+			bio_put(bio);
-+
-+			if (bch2_dev_io_err_on(ret, ca,
-+					       "journal read from sector %llu",
-+					       offset) ||
-+			    bch2_meta_read_fault("journal"))
-+				return -EIO;
-+
-+			j = buf->data;
-+		}
-+
-+		ret = jset_validate(c, ca, j, offset,
-+				    end - offset, sectors_read,
-+				    READ);
-+		switch (ret) {
-+		case BCH_FSCK_OK:
-+			sectors = vstruct_sectors(j, c->block_bits);
-+			break;
-+		case JOURNAL_ENTRY_REREAD:
-+			if (vstruct_bytes(j) > buf->size) {
-+				ret = journal_read_buf_realloc(buf,
-+							vstruct_bytes(j));
-+				if (ret)
-+					return ret;
-+			}
-+			goto reread;
-+		case JOURNAL_ENTRY_NONE:
-+			if (!saw_bad)
-+				return 0;
-+			sectors = c->opts.block_size;
-+			goto next_block;
-+		case JOURNAL_ENTRY_BAD:
-+			saw_bad = true;
-+			/*
-+			 * On checksum error we don't really trust the size
-+			 * field of the journal entry we read, so try reading
-+			 * again at next block boundary:
-+			 */
-+			sectors = c->opts.block_size;
-+			break;
-+		default:
-+			return ret;
-+		}
-+
-+		/*
-+		 * This happens sometimes if we don't have discards on -
-+		 * when we've partially overwritten a bucket with new
-+		 * journal entries. We don't need the rest of the
-+		 * bucket:
-+		 */
-+		if (le64_to_cpu(j->seq) < ja->bucket_seq[bucket])
-+			return 0;
-+
-+		ja->bucket_seq[bucket] = le64_to_cpu(j->seq);
-+
-+		mutex_lock(&jlist->lock);
-+		ret = journal_entry_add(c, ca, jlist, j, ret != 0);
-+		mutex_unlock(&jlist->lock);
-+
-+		switch (ret) {
-+		case JOURNAL_ENTRY_ADD_OK:
-+			break;
-+		case JOURNAL_ENTRY_ADD_OUT_OF_RANGE:
-+			break;
-+		default:
-+			return ret;
-+		}
-+next_block:
-+		pr_debug("next");
-+		offset		+= sectors;
-+		sectors_read	-= sectors;
-+		j = ((void *) j) + (sectors << 9);
-+	}
-+
-+	return 0;
-+}
-+
-+static void bch2_journal_read_device(struct closure *cl)
-+{
-+	struct journal_device *ja =
-+		container_of(cl, struct journal_device, read);
-+	struct bch_dev *ca = container_of(ja, struct bch_dev, journal);
-+	struct journal_list *jlist =
-+		container_of(cl->parent, struct journal_list, cl);
-+	struct journal_read_buf buf = { NULL, 0 };
-+	u64 min_seq = U64_MAX;
-+	unsigned i;
-+	int ret;
-+
-+	if (!ja->nr)
-+		goto out;
-+
-+	ret = journal_read_buf_realloc(&buf, PAGE_SIZE);
-+	if (ret)
-+		goto err;
-+
-+	pr_debug("%u journal buckets", ja->nr);
-+
-+	for (i = 0; i < ja->nr; i++) {
-+		ret = journal_read_bucket(ca, &buf, jlist, i);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	/* Find the journal bucket with the highest sequence number: */
-+	for (i = 0; i < ja->nr; i++) {
-+		if (ja->bucket_seq[i] > ja->bucket_seq[ja->cur_idx])
-+			ja->cur_idx = i;
-+
-+		min_seq = min(ja->bucket_seq[i], min_seq);
-+	}
-+
-+	/*
-+	 * If there's duplicate journal entries in multiple buckets (which
-+	 * definitely isn't supposed to happen, but...) - make sure to start
-+	 * cur_idx at the last of those buckets, so we don't deadlock trying to
-+	 * allocate
-+	 */
-+	while (ja->bucket_seq[ja->cur_idx] > min_seq &&
-+	       ja->bucket_seq[ja->cur_idx] >
-+	       ja->bucket_seq[(ja->cur_idx + 1) % ja->nr])
-+		ja->cur_idx = (ja->cur_idx + 1) % ja->nr;
-+
-+	ja->sectors_free = 0;
-+
-+	/*
-+	 * Set dirty_idx to indicate the entire journal is full and needs to be
-+	 * reclaimed - journal reclaim will immediately reclaim whatever isn't
-+	 * pinned when it first runs:
-+	 */
-+	ja->discard_idx = ja->dirty_idx_ondisk =
-+		ja->dirty_idx = (ja->cur_idx + 1) % ja->nr;
-+out:
-+	kvpfree(buf.data, buf.size);
-+	percpu_ref_put(&ca->io_ref);
-+	closure_return(cl);
-+	return;
-+err:
-+	mutex_lock(&jlist->lock);
-+	jlist->ret = ret;
-+	mutex_unlock(&jlist->lock);
-+	goto out;
-+}
-+
-+int bch2_journal_read(struct bch_fs *c, struct list_head *list)
-+{
-+	struct journal_list jlist;
-+	struct journal_replay *i;
-+	struct bch_dev *ca;
-+	unsigned iter;
-+	size_t keys = 0, entries = 0;
-+	bool degraded = false;
-+	int ret = 0;
-+
-+	closure_init_stack(&jlist.cl);
-+	mutex_init(&jlist.lock);
-+	jlist.head = list;
-+	jlist.ret = 0;
-+
-+	for_each_member_device(ca, c, iter) {
-+		if (!test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) &&
-+		    !(bch2_dev_has_data(c, ca) & (1 << BCH_DATA_journal)))
-+			continue;
-+
-+		if ((ca->mi.state == BCH_MEMBER_STATE_RW ||
-+		     ca->mi.state == BCH_MEMBER_STATE_RO) &&
-+		    percpu_ref_tryget(&ca->io_ref))
-+			closure_call(&ca->journal.read,
-+				     bch2_journal_read_device,
-+				     system_unbound_wq,
-+				     &jlist.cl);
-+		else
-+			degraded = true;
-+	}
-+
-+	closure_sync(&jlist.cl);
-+
-+	if (jlist.ret)
-+		return jlist.ret;
-+
-+	list_for_each_entry(i, list, list) {
-+		struct jset_entry *entry;
-+		struct bkey_i *k, *_n;
-+		struct bch_replicas_padded replicas;
-+		char buf[80];
-+
-+		ret = jset_validate_entries(c, &i->j, READ);
-+		if (ret)
-+			goto fsck_err;
-+
-+		/*
-+		 * If we're mounting in degraded mode - if we didn't read all
-+		 * the devices - this is wrong:
-+		 */
-+
-+		bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal, i->devs);
-+
-+		if (!degraded &&
-+		    (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
-+		     fsck_err_on(!bch2_replicas_marked(c, &replicas.e), c,
-+				 "superblock not marked as containing replicas %s",
-+				 (bch2_replicas_entry_to_text(&PBUF(buf),
-+							      &replicas.e), buf)))) {
-+			ret = bch2_mark_replicas(c, &replicas.e);
-+			if (ret)
-+				return ret;
-+		}
-+
-+		for_each_jset_key(k, _n, entry, &i->j)
-+			keys++;
-+		entries++;
-+	}
-+
-+	if (!list_empty(list)) {
-+		i = list_last_entry(list, struct journal_replay, list);
-+
-+		bch_info(c, "journal read done, %zu keys in %zu entries, seq %llu",
-+			 keys, entries, le64_to_cpu(i->j.seq));
-+	}
-+fsck_err:
-+	return ret;
-+}
-+
-+/* journal write: */
-+
-+static void __journal_write_alloc(struct journal *j,
-+				  struct journal_buf *w,
-+				  struct dev_alloc_list *devs_sorted,
-+				  unsigned sectors,
-+				  unsigned *replicas,
-+				  unsigned replicas_want)
-+{
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	struct journal_device *ja;
-+	struct bch_dev *ca;
-+	unsigned i;
-+
-+	if (*replicas >= replicas_want)
-+		return;
-+
-+	for (i = 0; i < devs_sorted->nr; i++) {
-+		ca = rcu_dereference(c->devs[devs_sorted->devs[i]]);
-+		if (!ca)
-+			continue;
-+
-+		ja = &ca->journal;
-+
-+		/*
-+		 * Check that we can use this device, and aren't already using
-+		 * it:
-+		 */
-+		if (!ca->mi.durability ||
-+		    ca->mi.state != BCH_MEMBER_STATE_RW ||
-+		    !ja->nr ||
-+		    bch2_bkey_has_device(bkey_i_to_s_c(&w->key),
-+					 ca->dev_idx) ||
-+		    sectors > ja->sectors_free)
-+			continue;
-+
-+		bch2_dev_stripe_increment(ca, &j->wp.stripe);
-+
-+		bch2_bkey_append_ptr(&w->key,
-+			(struct bch_extent_ptr) {
-+				  .offset = bucket_to_sector(ca,
-+					ja->buckets[ja->cur_idx]) +
-+					ca->mi.bucket_size -
-+					ja->sectors_free,
-+				  .dev = ca->dev_idx,
-+		});
-+
-+		ja->sectors_free -= sectors;
-+		ja->bucket_seq[ja->cur_idx] = le64_to_cpu(w->data->seq);
-+
-+		*replicas += ca->mi.durability;
-+
-+		if (*replicas >= replicas_want)
-+			break;
-+	}
-+}
-+
-+/**
-+ * journal_next_bucket - move on to the next journal bucket if possible
-+ */
-+static int journal_write_alloc(struct journal *j, struct journal_buf *w,
-+			       unsigned sectors)
-+{
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	struct journal_device *ja;
-+	struct bch_dev *ca;
-+	struct dev_alloc_list devs_sorted;
-+	unsigned i, replicas = 0, replicas_want =
-+		READ_ONCE(c->opts.metadata_replicas);
-+
-+	rcu_read_lock();
-+
-+	devs_sorted = bch2_dev_alloc_list(c, &j->wp.stripe,
-+					  &c->rw_devs[BCH_DATA_journal]);
-+
-+	__journal_write_alloc(j, w, &devs_sorted,
-+			      sectors, &replicas, replicas_want);
-+
-+	if (replicas >= replicas_want)
-+		goto done;
-+
-+	for (i = 0; i < devs_sorted.nr; i++) {
-+		ca = rcu_dereference(c->devs[devs_sorted.devs[i]]);
-+		if (!ca)
-+			continue;
-+
-+		ja = &ca->journal;
-+
-+		if (sectors > ja->sectors_free &&
-+		    sectors <= ca->mi.bucket_size &&
-+		    bch2_journal_dev_buckets_available(j, ja,
-+					journal_space_discarded)) {
-+			ja->cur_idx = (ja->cur_idx + 1) % ja->nr;
-+			ja->sectors_free = ca->mi.bucket_size;
-+
-+			/*
-+			 * ja->bucket_seq[ja->cur_idx] must always have
-+			 * something sensible:
-+			 */
-+			ja->bucket_seq[ja->cur_idx] = le64_to_cpu(w->data->seq);
-+		}
-+	}
-+
-+	__journal_write_alloc(j, w, &devs_sorted,
-+			      sectors, &replicas, replicas_want);
-+done:
-+	rcu_read_unlock();
-+
-+	return replicas >= c->opts.metadata_replicas_required ? 0 : -EROFS;
-+}
-+
-+static void journal_write_compact(struct jset *jset)
-+{
-+	struct jset_entry *i, *next, *prev = NULL;
-+
-+	/*
-+	 * Simple compaction, dropping empty jset_entries (from journal
-+	 * reservations that weren't fully used) and merging jset_entries that
-+	 * can be.
-+	 *
-+	 * If we wanted to be really fancy here, we could sort all the keys in
-+	 * the jset and drop keys that were overwritten - probably not worth it:
-+	 */
-+	vstruct_for_each_safe(jset, i, next) {
-+		unsigned u64s = le16_to_cpu(i->u64s);
-+
-+		/* Empty entry: */
-+		if (!u64s)
-+			continue;
-+
-+		/* Can we merge with previous entry? */
-+		if (prev &&
-+		    i->btree_id == prev->btree_id &&
-+		    i->level	== prev->level &&
-+		    i->type	== prev->type &&
-+		    i->type	== BCH_JSET_ENTRY_btree_keys &&
-+		    le16_to_cpu(prev->u64s) + u64s <= U16_MAX) {
-+			memmove_u64s_down(vstruct_next(prev),
-+					  i->_data,
-+					  u64s);
-+			le16_add_cpu(&prev->u64s, u64s);
-+			continue;
-+		}
-+
-+		/* Couldn't merge, move i into new position (after prev): */
-+		prev = prev ? vstruct_next(prev) : jset->start;
-+		if (i != prev)
-+			memmove_u64s_down(prev, i, jset_u64s(u64s));
-+	}
-+
-+	prev = prev ? vstruct_next(prev) : jset->start;
-+	jset->u64s = cpu_to_le32((u64 *) prev - jset->_data);
-+}
-+
-+static void journal_buf_realloc(struct journal *j, struct journal_buf *buf)
-+{
-+	/* we aren't holding j->lock: */
-+	unsigned new_size = READ_ONCE(j->buf_size_want);
-+	void *new_buf;
-+
-+	if (buf->buf_size >= new_size)
-+		return;
-+
-+	new_buf = kvpmalloc(new_size, GFP_NOIO|__GFP_NOWARN);
-+	if (!new_buf)
-+		return;
-+
-+	memcpy(new_buf, buf->data, buf->buf_size);
-+	kvpfree(buf->data, buf->buf_size);
-+	buf->data	= new_buf;
-+	buf->buf_size	= new_size;
-+}
-+
-+static void journal_write_done(struct closure *cl)
-+{
-+	struct journal *j = container_of(cl, struct journal, io);
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	struct journal_buf *w = journal_prev_buf(j);
-+	struct bch_devs_list devs =
-+		bch2_bkey_devs(bkey_i_to_s_c(&w->key));
-+	struct bch_replicas_padded replicas;
-+	u64 seq = le64_to_cpu(w->data->seq);
-+	u64 last_seq = le64_to_cpu(w->data->last_seq);
-+
-+	bch2_time_stats_update(j->write_time, j->write_start_time);
-+
-+	if (!devs.nr) {
-+		bch_err(c, "unable to write journal to sufficient devices");
-+		goto err;
-+	}
-+
-+	bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal, devs);
-+
-+	if (bch2_mark_replicas(c, &replicas.e))
-+		goto err;
-+
-+	spin_lock(&j->lock);
-+	if (seq >= j->pin.front)
-+		journal_seq_pin(j, seq)->devs = devs;
-+
-+	j->seq_ondisk		= seq;
-+	j->last_seq_ondisk	= last_seq;
-+	bch2_journal_space_available(j);
-+
-+	/*
-+	 * Updating last_seq_ondisk may let bch2_journal_reclaim_work() discard
-+	 * more buckets:
-+	 *
-+	 * Must come before signaling write completion, for
-+	 * bch2_fs_journal_stop():
-+	 */
-+	mod_delayed_work(c->journal_reclaim_wq, &j->reclaim_work, 0);
-+out:
-+	/* also must come before signalling write completion: */
-+	closure_debug_destroy(cl);
-+
-+	BUG_ON(!j->reservations.prev_buf_unwritten);
-+	atomic64_sub(((union journal_res_state) { .prev_buf_unwritten = 1 }).v,
-+		     &j->reservations.counter);
-+
-+	closure_wake_up(&w->wait);
-+	journal_wake(j);
-+
-+	if (test_bit(JOURNAL_NEED_WRITE, &j->flags))
-+		mod_delayed_work(system_freezable_wq, &j->write_work, 0);
-+	spin_unlock(&j->lock);
-+	return;
-+err:
-+	bch2_fatal_error(c);
-+	spin_lock(&j->lock);
-+	goto out;
-+}
-+
-+static void journal_write_endio(struct bio *bio)
-+{
-+	struct bch_dev *ca = bio->bi_private;
-+	struct journal *j = &ca->fs->journal;
-+
-+	if (bch2_dev_io_err_on(bio->bi_status, ca, "journal write: %s",
-+			       bch2_blk_status_to_str(bio->bi_status)) ||
-+	    bch2_meta_write_fault("journal")) {
-+		struct journal_buf *w = journal_prev_buf(j);
-+		unsigned long flags;
-+
-+		spin_lock_irqsave(&j->err_lock, flags);
-+		bch2_bkey_drop_device(bkey_i_to_s(&w->key), ca->dev_idx);
-+		spin_unlock_irqrestore(&j->err_lock, flags);
-+	}
-+
-+	closure_put(&j->io);
-+	percpu_ref_put(&ca->io_ref);
-+}
-+
-+void bch2_journal_write(struct closure *cl)
-+{
-+	struct journal *j = container_of(cl, struct journal, io);
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	struct bch_dev *ca;
-+	struct journal_buf *w = journal_prev_buf(j);
-+	struct jset_entry *start, *end;
-+	struct jset *jset;
-+	struct bio *bio;
-+	struct bch_extent_ptr *ptr;
-+	bool validate_before_checksum = false;
-+	unsigned i, sectors, bytes, u64s;
-+	int ret;
-+
-+	bch2_journal_pin_put(j, le64_to_cpu(w->data->seq));
-+
-+	journal_buf_realloc(j, w);
-+	jset = w->data;
-+
-+	j->write_start_time = local_clock();
-+
-+	/*
-+	 * New btree roots are set by journalling them; when the journal entry
-+	 * gets written we have to propagate them to c->btree_roots
-+	 *
-+	 * But, every journal entry we write has to contain all the btree roots
-+	 * (at least for now); so after we copy btree roots to c->btree_roots we
-+	 * have to get any missing btree roots and add them to this journal
-+	 * entry:
-+	 */
-+
-+	bch2_journal_entries_to_btree_roots(c, jset);
-+
-+	start = end = vstruct_last(jset);
-+
-+	end	= bch2_btree_roots_to_journal_entries(c, jset->start, end);
-+
-+	end	= bch2_journal_super_entries_add_common(c, end,
-+						le64_to_cpu(jset->seq));
-+	u64s	= (u64 *) end - (u64 *) start;
-+	BUG_ON(u64s > j->entry_u64s_reserved);
-+
-+	le32_add_cpu(&jset->u64s, u64s);
-+	BUG_ON(vstruct_sectors(jset, c->block_bits) > w->sectors);
-+
-+	journal_write_compact(jset);
-+
-+	jset->read_clock	= cpu_to_le16(c->bucket_clock[READ].hand);
-+	jset->write_clock	= cpu_to_le16(c->bucket_clock[WRITE].hand);
-+	jset->magic		= cpu_to_le64(jset_magic(c));
-+
-+	jset->version		= c->sb.version < bcachefs_metadata_version_new_versioning
-+		? cpu_to_le32(BCH_JSET_VERSION_OLD)
-+		: cpu_to_le32(c->sb.version);
-+
-+	SET_JSET_BIG_ENDIAN(jset, CPU_BIG_ENDIAN);
-+	SET_JSET_CSUM_TYPE(jset, bch2_meta_checksum_type(c));
-+
-+	if (bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset)))
-+		validate_before_checksum = true;
-+
-+	if (le32_to_cpu(jset->version) < bcachefs_metadata_version_max)
-+		validate_before_checksum = true;
-+
-+	if (validate_before_checksum &&
-+	    jset_validate_entries(c, jset, WRITE))
-+		goto err;
-+
-+	bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset),
-+		    jset->encrypted_start,
-+		    vstruct_end(jset) - (void *) jset->encrypted_start);
-+
-+	jset->csum = csum_vstruct(c, JSET_CSUM_TYPE(jset),
-+				  journal_nonce(jset), jset);
-+
-+	if (!validate_before_checksum &&
-+	    jset_validate_entries(c, jset, WRITE))
-+		goto err;
-+
-+	sectors = vstruct_sectors(jset, c->block_bits);
-+	BUG_ON(sectors > w->sectors);
-+
-+	bytes = vstruct_bytes(jset);
-+	memset((void *) jset + bytes, 0, (sectors << 9) - bytes);
-+
-+retry_alloc:
-+	spin_lock(&j->lock);
-+	ret = journal_write_alloc(j, w, sectors);
-+
-+	if (ret && j->can_discard) {
-+		spin_unlock(&j->lock);
-+		bch2_journal_do_discards(j);
-+		goto retry_alloc;
-+	}
-+
-+	/*
-+	 * write is allocated, no longer need to account for it in
-+	 * bch2_journal_space_available():
-+	 */
-+	w->sectors = 0;
-+
-+	/*
-+	 * journal entry has been compacted and allocated, recalculate space
-+	 * available:
-+	 */
-+	bch2_journal_space_available(j);
-+	spin_unlock(&j->lock);
-+
-+	if (ret) {
-+		bch_err(c, "Unable to allocate journal write");
-+		bch2_fatal_error(c);
-+		continue_at(cl, journal_write_done, system_highpri_wq);
-+		return;
-+	}
-+
-+	/*
-+	 * XXX: we really should just disable the entire journal in nochanges
-+	 * mode
-+	 */
-+	if (c->opts.nochanges)
-+		goto no_io;
-+
-+	extent_for_each_ptr(bkey_i_to_s_extent(&w->key), ptr) {
-+		ca = bch_dev_bkey_exists(c, ptr->dev);
-+		if (!percpu_ref_tryget(&ca->io_ref)) {
-+			/* XXX: fix this */
-+			bch_err(c, "missing device for journal write\n");
-+			continue;
-+		}
-+
-+		this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_journal],
-+			     sectors);
-+
-+		bio = ca->journal.bio;
-+		bio_reset(bio);
-+		bio_set_dev(bio, ca->disk_sb.bdev);
-+		bio->bi_iter.bi_sector	= ptr->offset;
-+		bio->bi_end_io		= journal_write_endio;
-+		bio->bi_private		= ca;
-+		bio_set_op_attrs(bio, REQ_OP_WRITE,
-+				 REQ_SYNC|REQ_META|REQ_PREFLUSH|REQ_FUA);
-+		bch2_bio_map(bio, jset, sectors << 9);
-+
-+		trace_journal_write(bio);
-+		closure_bio_submit(bio, cl);
-+
-+		ca->journal.bucket_seq[ca->journal.cur_idx] = le64_to_cpu(jset->seq);
-+	}
-+
-+	for_each_rw_member(ca, c, i)
-+		if (journal_flushes_device(ca) &&
-+		    !bch2_bkey_has_device(bkey_i_to_s_c(&w->key), i)) {
-+			percpu_ref_get(&ca->io_ref);
-+
-+			bio = ca->journal.bio;
-+			bio_reset(bio);
-+			bio_set_dev(bio, ca->disk_sb.bdev);
-+			bio->bi_opf		= REQ_OP_FLUSH;
-+			bio->bi_end_io		= journal_write_endio;
-+			bio->bi_private		= ca;
-+			closure_bio_submit(bio, cl);
-+		}
-+
-+no_io:
-+	bch2_bucket_seq_cleanup(c);
-+
-+	continue_at(cl, journal_write_done, system_highpri_wq);
-+	return;
-+err:
-+	bch2_inconsistent_error(c);
-+	continue_at(cl, journal_write_done, system_highpri_wq);
-+}
-diff --git a/fs/bcachefs/journal_io.h b/fs/bcachefs/journal_io.h
-new file mode 100644
-index 000000000000..6958ee0f8cf2
---- /dev/null
-+++ b/fs/bcachefs/journal_io.h
-@@ -0,0 +1,44 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_JOURNAL_IO_H
-+#define _BCACHEFS_JOURNAL_IO_H
-+
-+/*
-+ * Only used for holding the journal entries we read in btree_journal_read()
-+ * during cache_registration
-+ */
-+struct journal_replay {
-+	struct list_head	list;
-+	struct bch_devs_list	devs;
-+	/* checksum error, but we may want to try using it anyways: */
-+	bool			bad;
-+	/* must be last: */
-+	struct jset		j;
-+};
-+
-+static inline struct jset_entry *__jset_entry_type_next(struct jset *jset,
-+					struct jset_entry *entry, unsigned type)
-+{
-+	while (entry < vstruct_last(jset)) {
-+		if (entry->type == type)
-+			return entry;
-+
-+		entry = vstruct_next(entry);
-+	}
-+
-+	return NULL;
-+}
-+
-+#define for_each_jset_entry_type(entry, jset, type)			\
-+	for (entry = (jset)->start;					\
-+	     (entry = __jset_entry_type_next(jset, entry, type));	\
-+	     entry = vstruct_next(entry))
-+
-+#define for_each_jset_key(k, _n, entry, jset)				\
-+	for_each_jset_entry_type(entry, jset, BCH_JSET_ENTRY_btree_keys)	\
-+		vstruct_for_each_safe(entry, k, _n)
-+
-+int bch2_journal_read(struct bch_fs *, struct list_head *);
-+
-+void bch2_journal_write(struct closure *);
-+
-+#endif /* _BCACHEFS_JOURNAL_IO_H */
-diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c
-new file mode 100644
-index 000000000000..57591983eebd
---- /dev/null
-+++ b/fs/bcachefs/journal_reclaim.c
-@@ -0,0 +1,644 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "journal.h"
-+#include "journal_io.h"
-+#include "journal_reclaim.h"
-+#include "replicas.h"
-+#include "super.h"
-+
-+/* Free space calculations: */
-+
-+static unsigned journal_space_from(struct journal_device *ja,
-+				   enum journal_space_from from)
-+{
-+	switch (from) {
-+	case journal_space_discarded:
-+		return ja->discard_idx;
-+	case journal_space_clean_ondisk:
-+		return ja->dirty_idx_ondisk;
-+	case journal_space_clean:
-+		return ja->dirty_idx;
-+	default:
-+		BUG();
-+	}
-+}
-+
-+unsigned bch2_journal_dev_buckets_available(struct journal *j,
-+					    struct journal_device *ja,
-+					    enum journal_space_from from)
-+{
-+	unsigned available = (journal_space_from(ja, from) -
-+			      ja->cur_idx - 1 + ja->nr) % ja->nr;
-+
-+	/*
-+	 * Don't use the last bucket unless writing the new last_seq
-+	 * will make another bucket available:
-+	 */
-+	if (available && ja->dirty_idx_ondisk == ja->dirty_idx)
-+		--available;
-+
-+	return available;
-+}
-+
-+static void journal_set_remaining(struct journal *j, unsigned u64s_remaining)
-+{
-+	union journal_preres_state old, new;
-+	u64 v = atomic64_read(&j->prereserved.counter);
-+
-+	do {
-+		old.v = new.v = v;
-+		new.remaining = u64s_remaining;
-+	} while ((v = atomic64_cmpxchg(&j->prereserved.counter,
-+				       old.v, new.v)) != old.v);
-+}
-+
-+static struct journal_space {
-+	unsigned	next_entry;
-+	unsigned	remaining;
-+} __journal_space_available(struct journal *j, unsigned nr_devs_want,
-+			    enum journal_space_from from)
-+{
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	struct bch_dev *ca;
-+	unsigned sectors_next_entry	= UINT_MAX;
-+	unsigned sectors_total		= UINT_MAX;
-+	unsigned i, nr_devs = 0;
-+	unsigned unwritten_sectors = j->reservations.prev_buf_unwritten
-+		? journal_prev_buf(j)->sectors
-+		: 0;
-+
-+	rcu_read_lock();
-+	for_each_member_device_rcu(ca, c, i,
-+				   &c->rw_devs[BCH_DATA_journal]) {
-+		struct journal_device *ja = &ca->journal;
-+		unsigned buckets_this_device, sectors_this_device;
-+
-+		if (!ja->nr)
-+			continue;
-+
-+		buckets_this_device = bch2_journal_dev_buckets_available(j, ja, from);
-+		sectors_this_device = ja->sectors_free;
-+
-+		/*
-+		 * We that we don't allocate the space for a journal entry
-+		 * until we write it out - thus, account for it here:
-+		 */
-+		if (unwritten_sectors >= sectors_this_device) {
-+			if (!buckets_this_device)
-+				continue;
-+
-+			buckets_this_device--;
-+			sectors_this_device = ca->mi.bucket_size;
-+		}
-+
-+		sectors_this_device -= unwritten_sectors;
-+
-+		if (sectors_this_device < ca->mi.bucket_size &&
-+		    buckets_this_device) {
-+			buckets_this_device--;
-+			sectors_this_device = ca->mi.bucket_size;
-+		}
-+
-+		if (!sectors_this_device)
-+			continue;
-+
-+		sectors_next_entry = min(sectors_next_entry,
-+					 sectors_this_device);
-+
-+		sectors_total = min(sectors_total,
-+			buckets_this_device * ca->mi.bucket_size +
-+			sectors_this_device);
-+
-+		nr_devs++;
-+	}
-+	rcu_read_unlock();
-+
-+	if (nr_devs < nr_devs_want)
-+		return (struct journal_space) { 0, 0 };
-+
-+	return (struct journal_space) {
-+		.next_entry	= sectors_next_entry,
-+		.remaining	= max_t(int, 0, sectors_total - sectors_next_entry),
-+	};
-+}
-+
-+void bch2_journal_space_available(struct journal *j)
-+{
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	struct bch_dev *ca;
-+	struct journal_space discarded, clean_ondisk, clean;
-+	unsigned overhead, u64s_remaining = 0;
-+	unsigned max_entry_size	 = min(j->buf[0].buf_size >> 9,
-+				       j->buf[1].buf_size >> 9);
-+	unsigned i, nr_online = 0, nr_devs_want;
-+	bool can_discard = false;
-+	int ret = 0;
-+
-+	lockdep_assert_held(&j->lock);
-+
-+	rcu_read_lock();
-+	for_each_member_device_rcu(ca, c, i,
-+				   &c->rw_devs[BCH_DATA_journal]) {
-+		struct journal_device *ja = &ca->journal;
-+
-+		if (!ja->nr)
-+			continue;
-+
-+		while (ja->dirty_idx != ja->cur_idx &&
-+		       ja->bucket_seq[ja->dirty_idx] < journal_last_seq(j))
-+			ja->dirty_idx = (ja->dirty_idx + 1) % ja->nr;
-+
-+		while (ja->dirty_idx_ondisk != ja->dirty_idx &&
-+		       ja->bucket_seq[ja->dirty_idx_ondisk] < j->last_seq_ondisk)
-+			ja->dirty_idx_ondisk = (ja->dirty_idx_ondisk + 1) % ja->nr;
-+
-+		if (ja->discard_idx != ja->dirty_idx_ondisk)
-+			can_discard = true;
-+
-+		max_entry_size = min_t(unsigned, max_entry_size, ca->mi.bucket_size);
-+		nr_online++;
-+	}
-+	rcu_read_unlock();
-+
-+	j->can_discard = can_discard;
-+
-+	if (nr_online < c->opts.metadata_replicas_required) {
-+		ret = -EROFS;
-+		goto out;
-+	}
-+
-+	if (!fifo_free(&j->pin)) {
-+		ret = -ENOSPC;
-+		goto out;
-+	}
-+
-+	nr_devs_want = min_t(unsigned, nr_online, c->opts.metadata_replicas);
-+
-+	discarded	= __journal_space_available(j, nr_devs_want, journal_space_discarded);
-+	clean_ondisk	= __journal_space_available(j, nr_devs_want, journal_space_clean_ondisk);
-+	clean		= __journal_space_available(j, nr_devs_want, journal_space_clean);
-+
-+	if (!discarded.next_entry)
-+		ret = -ENOSPC;
-+
-+	overhead = DIV_ROUND_UP(clean.remaining, max_entry_size) *
-+		journal_entry_overhead(j);
-+	u64s_remaining = clean.remaining << 6;
-+	u64s_remaining = max_t(int, 0, u64s_remaining - overhead);
-+	u64s_remaining /= 4;
-+out:
-+	j->cur_entry_sectors	= !ret ? discarded.next_entry : 0;
-+	j->cur_entry_error	= ret;
-+	journal_set_remaining(j, u64s_remaining);
-+	journal_check_may_get_unreserved(j);
-+
-+	if (!ret)
-+		journal_wake(j);
-+}
-+
-+/* Discards - last part of journal reclaim: */
-+
-+static bool should_discard_bucket(struct journal *j, struct journal_device *ja)
-+{
-+	bool ret;
-+
-+	spin_lock(&j->lock);
-+	ret = ja->discard_idx != ja->dirty_idx_ondisk;
-+	spin_unlock(&j->lock);
-+
-+	return ret;
-+}
-+
-+/*
-+ * Advance ja->discard_idx as long as it points to buckets that are no longer
-+ * dirty, issuing discards if necessary:
-+ */
-+void bch2_journal_do_discards(struct journal *j)
-+{
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	struct bch_dev *ca;
-+	unsigned iter;
-+
-+	mutex_lock(&j->discard_lock);
-+
-+	for_each_rw_member(ca, c, iter) {
-+		struct journal_device *ja = &ca->journal;
-+
-+		while (should_discard_bucket(j, ja)) {
-+			if (ca->mi.discard &&
-+			    blk_queue_discard(bdev_get_queue(ca->disk_sb.bdev)))
-+				blkdev_issue_discard(ca->disk_sb.bdev,
-+					bucket_to_sector(ca,
-+						ja->buckets[ja->discard_idx]),
-+					ca->mi.bucket_size, GFP_NOIO, 0);
-+
-+			spin_lock(&j->lock);
-+			ja->discard_idx = (ja->discard_idx + 1) % ja->nr;
-+
-+			bch2_journal_space_available(j);
-+			spin_unlock(&j->lock);
-+		}
-+	}
-+
-+	mutex_unlock(&j->discard_lock);
-+}
-+
-+/*
-+ * Journal entry pinning - machinery for holding a reference on a given journal
-+ * entry, holding it open to ensure it gets replayed during recovery:
-+ */
-+
-+static void bch2_journal_reclaim_fast(struct journal *j)
-+{
-+	struct journal_entry_pin_list temp;
-+	bool popped = false;
-+
-+	lockdep_assert_held(&j->lock);
-+
-+	/*
-+	 * Unpin journal entries whose reference counts reached zero, meaning
-+	 * all btree nodes got written out
-+	 */
-+	while (!fifo_empty(&j->pin) &&
-+	       !atomic_read(&fifo_peek_front(&j->pin).count)) {
-+		BUG_ON(!list_empty(&fifo_peek_front(&j->pin).list));
-+		BUG_ON(!fifo_pop(&j->pin, temp));
-+		popped = true;
-+	}
-+
-+	if (popped)
-+		bch2_journal_space_available(j);
-+}
-+
-+void bch2_journal_pin_put(struct journal *j, u64 seq)
-+{
-+	struct journal_entry_pin_list *pin_list = journal_seq_pin(j, seq);
-+
-+	if (atomic_dec_and_test(&pin_list->count)) {
-+		spin_lock(&j->lock);
-+		bch2_journal_reclaim_fast(j);
-+		spin_unlock(&j->lock);
-+	}
-+}
-+
-+static inline void __journal_pin_drop(struct journal *j,
-+				      struct journal_entry_pin *pin)
-+{
-+	struct journal_entry_pin_list *pin_list;
-+
-+	if (!journal_pin_active(pin))
-+		return;
-+
-+	pin_list = journal_seq_pin(j, pin->seq);
-+	pin->seq = 0;
-+	list_del_init(&pin->list);
-+
-+	/*
-+	 * Unpinning a journal entry make make journal_next_bucket() succeed, if
-+	 * writing a new last_seq will now make another bucket available:
-+	 */
-+	if (atomic_dec_and_test(&pin_list->count) &&
-+	    pin_list == &fifo_peek_front(&j->pin))
-+		bch2_journal_reclaim_fast(j);
-+	else if (fifo_used(&j->pin) == 1 &&
-+		 atomic_read(&pin_list->count) == 1)
-+		journal_wake(j);
-+}
-+
-+void bch2_journal_pin_drop(struct journal *j,
-+			   struct journal_entry_pin *pin)
-+{
-+	spin_lock(&j->lock);
-+	__journal_pin_drop(j, pin);
-+	spin_unlock(&j->lock);
-+}
-+
-+static void bch2_journal_pin_add_locked(struct journal *j, u64 seq,
-+			    struct journal_entry_pin *pin,
-+			    journal_pin_flush_fn flush_fn)
-+{
-+	struct journal_entry_pin_list *pin_list = journal_seq_pin(j, seq);
-+
-+	__journal_pin_drop(j, pin);
-+
-+	BUG_ON(!atomic_read(&pin_list->count) && seq == journal_last_seq(j));
-+
-+	atomic_inc(&pin_list->count);
-+	pin->seq	= seq;
-+	pin->flush	= flush_fn;
-+
-+	list_add(&pin->list, flush_fn ? &pin_list->list : &pin_list->flushed);
-+}
-+
-+void __bch2_journal_pin_add(struct journal *j, u64 seq,
-+			    struct journal_entry_pin *pin,
-+			    journal_pin_flush_fn flush_fn)
-+{
-+	spin_lock(&j->lock);
-+	bch2_journal_pin_add_locked(j, seq, pin, flush_fn);
-+	spin_unlock(&j->lock);
-+
-+	/*
-+	 * If the journal is currently full,  we might want to call flush_fn
-+	 * immediately:
-+	 */
-+	journal_wake(j);
-+}
-+
-+void bch2_journal_pin_update(struct journal *j, u64 seq,
-+			     struct journal_entry_pin *pin,
-+			     journal_pin_flush_fn flush_fn)
-+{
-+	if (journal_pin_active(pin) && pin->seq < seq)
-+		return;
-+
-+	spin_lock(&j->lock);
-+
-+	if (pin->seq != seq) {
-+		bch2_journal_pin_add_locked(j, seq, pin, flush_fn);
-+	} else {
-+		struct journal_entry_pin_list *pin_list =
-+			journal_seq_pin(j, seq);
-+
-+		/*
-+		 * If the pin is already pinning the right sequence number, it
-+		 * still might've already been flushed:
-+		 */
-+		list_move(&pin->list, &pin_list->list);
-+	}
-+
-+	spin_unlock(&j->lock);
-+
-+	/*
-+	 * If the journal is currently full,  we might want to call flush_fn
-+	 * immediately:
-+	 */
-+	journal_wake(j);
-+}
-+
-+void bch2_journal_pin_copy(struct journal *j,
-+			   struct journal_entry_pin *dst,
-+			   struct journal_entry_pin *src,
-+			   journal_pin_flush_fn flush_fn)
-+{
-+	spin_lock(&j->lock);
-+
-+	if (journal_pin_active(src) &&
-+	    (!journal_pin_active(dst) || src->seq < dst->seq))
-+		bch2_journal_pin_add_locked(j, src->seq, dst, flush_fn);
-+
-+	spin_unlock(&j->lock);
-+}
-+
-+/**
-+ * bch2_journal_pin_flush: ensure journal pin callback is no longer running
-+ */
-+void bch2_journal_pin_flush(struct journal *j, struct journal_entry_pin *pin)
-+{
-+	BUG_ON(journal_pin_active(pin));
-+
-+	wait_event(j->pin_flush_wait, j->flush_in_progress != pin);
-+}
-+
-+/*
-+ * Journal reclaim: flush references to open journal entries to reclaim space in
-+ * the journal
-+ *
-+ * May be done by the journal code in the background as needed to free up space
-+ * for more journal entries, or as part of doing a clean shutdown, or to migrate
-+ * data off of a specific device:
-+ */
-+
-+static struct journal_entry_pin *
-+journal_get_next_pin(struct journal *j, u64 max_seq, u64 *seq)
-+{
-+	struct journal_entry_pin_list *pin_list;
-+	struct journal_entry_pin *ret = NULL;
-+
-+	if (!test_bit(JOURNAL_RECLAIM_STARTED, &j->flags))
-+		return NULL;
-+
-+	spin_lock(&j->lock);
-+
-+	fifo_for_each_entry_ptr(pin_list, &j->pin, *seq)
-+		if (*seq > max_seq ||
-+		    (ret = list_first_entry_or_null(&pin_list->list,
-+				struct journal_entry_pin, list)))
-+			break;
-+
-+	if (ret) {
-+		list_move(&ret->list, &pin_list->flushed);
-+		BUG_ON(j->flush_in_progress);
-+		j->flush_in_progress = ret;
-+		j->last_flushed = jiffies;
-+	}
-+
-+	spin_unlock(&j->lock);
-+
-+	return ret;
-+}
-+
-+/* returns true if we did work */
-+static bool journal_flush_pins(struct journal *j, u64 seq_to_flush,
-+			       unsigned min_nr)
-+{
-+	struct journal_entry_pin *pin;
-+	bool ret = false;
-+	u64 seq;
-+
-+	lockdep_assert_held(&j->reclaim_lock);
-+
-+	while ((pin = journal_get_next_pin(j, min_nr
-+				? U64_MAX : seq_to_flush, &seq))) {
-+		if (min_nr)
-+			min_nr--;
-+
-+		pin->flush(j, pin, seq);
-+
-+		BUG_ON(j->flush_in_progress != pin);
-+		j->flush_in_progress = NULL;
-+		wake_up(&j->pin_flush_wait);
-+		ret = true;
-+	}
-+
-+	return ret;
-+}
-+
-+/**
-+ * bch2_journal_reclaim - free up journal buckets
-+ *
-+ * Background journal reclaim writes out btree nodes. It should be run
-+ * early enough so that we never completely run out of journal buckets.
-+ *
-+ * High watermarks for triggering background reclaim:
-+ * - FIFO has fewer than 512 entries left
-+ * - fewer than 25% journal buckets free
-+ *
-+ * Background reclaim runs until low watermarks are reached:
-+ * - FIFO has more than 1024 entries left
-+ * - more than 50% journal buckets free
-+ *
-+ * As long as a reclaim can complete in the time it takes to fill up
-+ * 512 journal entries or 25% of all journal buckets, then
-+ * journal_next_bucket() should not stall.
-+ */
-+void bch2_journal_reclaim(struct journal *j)
-+{
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	struct bch_dev *ca;
-+	unsigned iter, min_nr = 0;
-+	u64 seq_to_flush = 0;
-+
-+	lockdep_assert_held(&j->reclaim_lock);
-+
-+	bch2_journal_do_discards(j);
-+
-+	spin_lock(&j->lock);
-+
-+	for_each_rw_member(ca, c, iter) {
-+		struct journal_device *ja = &ca->journal;
-+		unsigned nr_buckets, bucket_to_flush;
-+
-+		if (!ja->nr)
-+			continue;
-+
-+		/* Try to keep the journal at most half full: */
-+		nr_buckets = ja->nr / 2;
-+
-+		/* And include pre-reservations: */
-+		nr_buckets += DIV_ROUND_UP(j->prereserved.reserved,
-+					   (ca->mi.bucket_size << 6) -
-+					   journal_entry_overhead(j));
-+
-+		nr_buckets = min(nr_buckets, ja->nr);
-+
-+		bucket_to_flush = (ja->cur_idx + nr_buckets) % ja->nr;
-+		seq_to_flush = max(seq_to_flush,
-+				   ja->bucket_seq[bucket_to_flush]);
-+	}
-+
-+	/* Also flush if the pin fifo is more than half full */
-+	seq_to_flush = max_t(s64, seq_to_flush,
-+			     (s64) journal_cur_seq(j) -
-+			     (j->pin.size >> 1));
-+	spin_unlock(&j->lock);
-+
-+	/*
-+	 * If it's been longer than j->reclaim_delay_ms since we last flushed,
-+	 * make sure to flush at least one journal pin:
-+	 */
-+	if (time_after(jiffies, j->last_flushed +
-+		       msecs_to_jiffies(j->reclaim_delay_ms)))
-+		min_nr = 1;
-+
-+	if (j->prereserved.reserved * 2 > j->prereserved.remaining) {
-+		seq_to_flush = max(seq_to_flush, journal_last_seq(j));
-+		min_nr = 1;
-+	}
-+
-+	journal_flush_pins(j, seq_to_flush, min_nr);
-+
-+	if (!bch2_journal_error(j))
-+		queue_delayed_work(c->journal_reclaim_wq, &j->reclaim_work,
-+				   msecs_to_jiffies(j->reclaim_delay_ms));
-+}
-+
-+void bch2_journal_reclaim_work(struct work_struct *work)
-+{
-+	struct journal *j = container_of(to_delayed_work(work),
-+				struct journal, reclaim_work);
-+
-+	mutex_lock(&j->reclaim_lock);
-+	bch2_journal_reclaim(j);
-+	mutex_unlock(&j->reclaim_lock);
-+}
-+
-+static int journal_flush_done(struct journal *j, u64 seq_to_flush,
-+			      bool *did_work)
-+{
-+	int ret;
-+
-+	ret = bch2_journal_error(j);
-+	if (ret)
-+		return ret;
-+
-+	mutex_lock(&j->reclaim_lock);
-+
-+	*did_work = journal_flush_pins(j, seq_to_flush, 0);
-+
-+	spin_lock(&j->lock);
-+	/*
-+	 * If journal replay hasn't completed, the unreplayed journal entries
-+	 * hold refs on their corresponding sequence numbers
-+	 */
-+	ret = !test_bit(JOURNAL_REPLAY_DONE, &j->flags) ||
-+		journal_last_seq(j) > seq_to_flush ||
-+		(fifo_used(&j->pin) == 1 &&
-+		 atomic_read(&fifo_peek_front(&j->pin).count) == 1);
-+
-+	spin_unlock(&j->lock);
-+	mutex_unlock(&j->reclaim_lock);
-+
-+	return ret;
-+}
-+
-+bool bch2_journal_flush_pins(struct journal *j, u64 seq_to_flush)
-+{
-+	bool did_work = false;
-+
-+	if (!test_bit(JOURNAL_STARTED, &j->flags))
-+		return false;
-+
-+	closure_wait_event(&j->async_wait,
-+		journal_flush_done(j, seq_to_flush, &did_work));
-+
-+	return did_work;
-+}
-+
-+int bch2_journal_flush_device_pins(struct journal *j, int dev_idx)
-+{
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	struct journal_entry_pin_list *p;
-+	u64 iter, seq = 0;
-+	int ret = 0;
-+
-+	spin_lock(&j->lock);
-+	fifo_for_each_entry_ptr(p, &j->pin, iter)
-+		if (dev_idx >= 0
-+		    ? bch2_dev_list_has_dev(p->devs, dev_idx)
-+		    : p->devs.nr < c->opts.metadata_replicas)
-+			seq = iter;
-+	spin_unlock(&j->lock);
-+
-+	bch2_journal_flush_pins(j, seq);
-+
-+	ret = bch2_journal_error(j);
-+	if (ret)
-+		return ret;
-+
-+	mutex_lock(&c->replicas_gc_lock);
-+	bch2_replicas_gc_start(c, 1 << BCH_DATA_journal);
-+
-+	seq = 0;
-+
-+	spin_lock(&j->lock);
-+	while (!ret && seq < j->pin.back) {
-+		struct bch_replicas_padded replicas;
-+
-+		seq = max(seq, journal_last_seq(j));
-+		bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal,
-+					 journal_seq_pin(j, seq)->devs);
-+		seq++;
-+
-+		spin_unlock(&j->lock);
-+		ret = bch2_mark_replicas(c, &replicas.e);
-+		spin_lock(&j->lock);
-+	}
-+	spin_unlock(&j->lock);
-+
-+	ret = bch2_replicas_gc_end(c, ret);
-+	mutex_unlock(&c->replicas_gc_lock);
-+
-+	return ret;
-+}
-diff --git a/fs/bcachefs/journal_reclaim.h b/fs/bcachefs/journal_reclaim.h
-new file mode 100644
-index 000000000000..8128907a7623
---- /dev/null
-+++ b/fs/bcachefs/journal_reclaim.h
-@@ -0,0 +1,69 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_JOURNAL_RECLAIM_H
-+#define _BCACHEFS_JOURNAL_RECLAIM_H
-+
-+#define JOURNAL_PIN	(32 * 1024)
-+
-+enum journal_space_from {
-+	journal_space_discarded,
-+	journal_space_clean_ondisk,
-+	journal_space_clean,
-+};
-+
-+unsigned bch2_journal_dev_buckets_available(struct journal *,
-+					    struct journal_device *,
-+					    enum journal_space_from);
-+void bch2_journal_space_available(struct journal *);
-+
-+static inline bool journal_pin_active(struct journal_entry_pin *pin)
-+{
-+	return pin->seq != 0;
-+}
-+
-+static inline struct journal_entry_pin_list *
-+journal_seq_pin(struct journal *j, u64 seq)
-+{
-+	EBUG_ON(seq < j->pin.front || seq >= j->pin.back);
-+
-+	return &j->pin.data[seq & j->pin.mask];
-+}
-+
-+void bch2_journal_pin_put(struct journal *, u64);
-+void bch2_journal_pin_drop(struct journal *, struct journal_entry_pin *);
-+
-+void __bch2_journal_pin_add(struct journal *, u64, struct journal_entry_pin *,
-+			    journal_pin_flush_fn);
-+
-+static inline void bch2_journal_pin_add(struct journal *j, u64 seq,
-+					struct journal_entry_pin *pin,
-+					journal_pin_flush_fn flush_fn)
-+{
-+	if (unlikely(!journal_pin_active(pin) || pin->seq > seq))
-+		__bch2_journal_pin_add(j, seq, pin, flush_fn);
-+}
-+
-+void bch2_journal_pin_update(struct journal *, u64,
-+			     struct journal_entry_pin *,
-+			     journal_pin_flush_fn);
-+
-+void bch2_journal_pin_copy(struct journal *,
-+			   struct journal_entry_pin *,
-+			   struct journal_entry_pin *,
-+			   journal_pin_flush_fn);
-+
-+void bch2_journal_pin_flush(struct journal *, struct journal_entry_pin *);
-+
-+void bch2_journal_do_discards(struct journal *);
-+void bch2_journal_reclaim(struct journal *);
-+void bch2_journal_reclaim_work(struct work_struct *);
-+
-+bool bch2_journal_flush_pins(struct journal *, u64);
-+
-+static inline bool bch2_journal_flush_all_pins(struct journal *j)
-+{
-+	return bch2_journal_flush_pins(j, U64_MAX);
-+}
-+
-+int bch2_journal_flush_device_pins(struct journal *, int);
-+
-+#endif /* _BCACHEFS_JOURNAL_RECLAIM_H */
-diff --git a/fs/bcachefs/journal_seq_blacklist.c b/fs/bcachefs/journal_seq_blacklist.c
-new file mode 100644
-index 000000000000..d0f1bbf8f6a7
---- /dev/null
-+++ b/fs/bcachefs/journal_seq_blacklist.c
-@@ -0,0 +1,309 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "btree_iter.h"
-+#include "eytzinger.h"
-+#include "journal_seq_blacklist.h"
-+#include "super-io.h"
-+
-+/*
-+ * journal_seq_blacklist machinery:
-+ *
-+ * To guarantee order of btree updates after a crash, we need to detect when a
-+ * btree node entry (bset) is newer than the newest journal entry that was
-+ * successfully written, and ignore it - effectively ignoring any btree updates
-+ * that didn't make it into the journal.
-+ *
-+ * If we didn't do this, we might have two btree nodes, a and b, both with
-+ * updates that weren't written to the journal yet: if b was updated after a,
-+ * but b was flushed and not a - oops; on recovery we'll find that the updates
-+ * to b happened, but not the updates to a that happened before it.
-+ *
-+ * Ignoring bsets that are newer than the newest journal entry is always safe,
-+ * because everything they contain will also have been journalled - and must
-+ * still be present in the journal on disk until a journal entry has been
-+ * written _after_ that bset was written.
-+ *
-+ * To accomplish this, bsets record the newest journal sequence number they
-+ * contain updates for; then, on startup, the btree code queries the journal
-+ * code to ask "Is this sequence number newer than the newest journal entry? If
-+ * so, ignore it."
-+ *
-+ * When this happens, we must blacklist that journal sequence number: the
-+ * journal must not write any entries with that sequence number, and it must
-+ * record that it was blacklisted so that a) on recovery we don't think we have
-+ * missing journal entries and b) so that the btree code continues to ignore
-+ * that bset, until that btree node is rewritten.
-+ */
-+
-+static unsigned sb_blacklist_u64s(unsigned nr)
-+{
-+	struct bch_sb_field_journal_seq_blacklist *bl;
-+
-+	return (sizeof(*bl) + sizeof(bl->start[0]) * nr) / sizeof(u64);
-+}
-+
-+static struct bch_sb_field_journal_seq_blacklist *
-+blacklist_entry_try_merge(struct bch_fs *c,
-+			  struct bch_sb_field_journal_seq_blacklist *bl,
-+			  unsigned i)
-+{
-+	unsigned nr = blacklist_nr_entries(bl);
-+
-+	if (le64_to_cpu(bl->start[i].end) >=
-+	    le64_to_cpu(bl->start[i + 1].start)) {
-+		bl->start[i].end = bl->start[i + 1].end;
-+		--nr;
-+		memmove(&bl->start[i],
-+			&bl->start[i + 1],
-+			sizeof(bl->start[0]) * (nr - i));
-+
-+		bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb,
-+							sb_blacklist_u64s(nr));
-+		BUG_ON(!bl);
-+	}
-+
-+	return bl;
-+}
-+
-+int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end)
-+{
-+	struct bch_sb_field_journal_seq_blacklist *bl;
-+	unsigned i, nr;
-+	int ret = 0;
-+
-+	mutex_lock(&c->sb_lock);
-+	bl = bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
-+	nr = blacklist_nr_entries(bl);
-+
-+	if (bl) {
-+		for (i = 0; i < nr; i++) {
-+			struct journal_seq_blacklist_entry *e =
-+				bl->start + i;
-+
-+			if (start == le64_to_cpu(e->start) &&
-+			    end   == le64_to_cpu(e->end))
-+				goto out;
-+
-+			if (start <= le64_to_cpu(e->start) &&
-+			    end   >= le64_to_cpu(e->end)) {
-+				e->start = cpu_to_le64(start);
-+				e->end	= cpu_to_le64(end);
-+
-+				if (i + 1 < nr)
-+					bl = blacklist_entry_try_merge(c,
-+								bl, i);
-+				if (i)
-+					bl = blacklist_entry_try_merge(c,
-+								bl, i - 1);
-+				goto out_write_sb;
-+			}
-+		}
-+	}
-+
-+	bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb,
-+					sb_blacklist_u64s(nr + 1));
-+	if (!bl) {
-+		ret = -ENOMEM;
-+		goto out;
-+	}
-+
-+	bl->start[nr].start	= cpu_to_le64(start);
-+	bl->start[nr].end	= cpu_to_le64(end);
-+out_write_sb:
-+	c->disk_sb.sb->features[0] |=
-+		1ULL << BCH_FEATURE_journal_seq_blacklist_v3;
-+
-+	ret = bch2_write_super(c);
-+out:
-+	mutex_unlock(&c->sb_lock);
-+
-+	return ret;
-+}
-+
-+static int journal_seq_blacklist_table_cmp(const void *_l,
-+					   const void *_r, size_t size)
-+{
-+	const struct journal_seq_blacklist_table_entry *l = _l;
-+	const struct journal_seq_blacklist_table_entry *r = _r;
-+
-+	return cmp_int(l->start, r->start);
-+}
-+
-+bool bch2_journal_seq_is_blacklisted(struct bch_fs *c, u64 seq,
-+				     bool dirty)
-+{
-+	struct journal_seq_blacklist_table *t = c->journal_seq_blacklist_table;
-+	struct journal_seq_blacklist_table_entry search = { .start = seq };
-+	int idx;
-+
-+	if (!t)
-+		return false;
-+
-+	idx = eytzinger0_find_le(t->entries, t->nr,
-+				 sizeof(t->entries[0]),
-+				 journal_seq_blacklist_table_cmp,
-+				 &search);
-+	if (idx < 0)
-+		return false;
-+
-+	BUG_ON(t->entries[idx].start > seq);
-+
-+	if (seq >= t->entries[idx].end)
-+		return false;
-+
-+	if (dirty)
-+		t->entries[idx].dirty = true;
-+	return true;
-+}
-+
-+int bch2_blacklist_table_initialize(struct bch_fs *c)
-+{
-+	struct bch_sb_field_journal_seq_blacklist *bl =
-+		bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
-+	struct journal_seq_blacklist_table *t;
-+	unsigned i, nr = blacklist_nr_entries(bl);
-+
-+	BUG_ON(c->journal_seq_blacklist_table);
-+
-+	if (!bl)
-+		return 0;
-+
-+	t = kzalloc(sizeof(*t) + sizeof(t->entries[0]) * nr,
-+		    GFP_KERNEL);
-+	if (!t)
-+		return -ENOMEM;
-+
-+	t->nr = nr;
-+
-+	for (i = 0; i < nr; i++) {
-+		t->entries[i].start	= le64_to_cpu(bl->start[i].start);
-+		t->entries[i].end	= le64_to_cpu(bl->start[i].end);
-+	}
-+
-+	eytzinger0_sort(t->entries,
-+			t->nr,
-+			sizeof(t->entries[0]),
-+			journal_seq_blacklist_table_cmp,
-+			NULL);
-+
-+	c->journal_seq_blacklist_table = t;
-+	return 0;
-+}
-+
-+static const char *
-+bch2_sb_journal_seq_blacklist_validate(struct bch_sb *sb,
-+				       struct bch_sb_field *f)
-+{
-+	struct bch_sb_field_journal_seq_blacklist *bl =
-+		field_to_type(f, journal_seq_blacklist);
-+	struct journal_seq_blacklist_entry *i;
-+	unsigned nr = blacklist_nr_entries(bl);
-+
-+	for (i = bl->start; i < bl->start + nr; i++) {
-+		if (le64_to_cpu(i->start) >=
-+		    le64_to_cpu(i->end))
-+			return "entry start >= end";
-+
-+		if (i + 1 < bl->start + nr &&
-+		    le64_to_cpu(i[0].end) >
-+		    le64_to_cpu(i[1].start))
-+			return "entries out of order";
-+	}
-+
-+	return NULL;
-+}
-+
-+static void bch2_sb_journal_seq_blacklist_to_text(struct printbuf *out,
-+						  struct bch_sb *sb,
-+						  struct bch_sb_field *f)
-+{
-+	struct bch_sb_field_journal_seq_blacklist *bl =
-+		field_to_type(f, journal_seq_blacklist);
-+	struct journal_seq_blacklist_entry *i;
-+	unsigned nr = blacklist_nr_entries(bl);
-+
-+	for (i = bl->start; i < bl->start + nr; i++) {
-+		if (i != bl->start)
-+			pr_buf(out, " ");
-+
-+		pr_buf(out, "%llu-%llu",
-+		       le64_to_cpu(i->start),
-+		       le64_to_cpu(i->end));
-+	}
-+}
-+
-+const struct bch_sb_field_ops bch_sb_field_ops_journal_seq_blacklist = {
-+	.validate	= bch2_sb_journal_seq_blacklist_validate,
-+	.to_text	= bch2_sb_journal_seq_blacklist_to_text
-+};
-+
-+void bch2_blacklist_entries_gc(struct work_struct *work)
-+{
-+	struct bch_fs *c = container_of(work, struct bch_fs,
-+					journal_seq_blacklist_gc_work);
-+	struct journal_seq_blacklist_table *t;
-+	struct bch_sb_field_journal_seq_blacklist *bl;
-+	struct journal_seq_blacklist_entry *src, *dst;
-+	struct btree_trans trans;
-+	unsigned i, nr, new_nr;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for (i = 0; i < BTREE_ID_NR; i++) {
-+		struct btree_iter *iter;
-+		struct btree *b;
-+
-+		for_each_btree_node(&trans, iter, i, POS_MIN,
-+				    BTREE_ITER_PREFETCH, b)
-+			if (test_bit(BCH_FS_STOPPING, &c->flags)) {
-+				bch2_trans_exit(&trans);
-+				return;
-+			}
-+		bch2_trans_iter_free(&trans, iter);
-+	}
-+
-+	ret = bch2_trans_exit(&trans);
-+	if (ret)
-+		return;
-+
-+	mutex_lock(&c->sb_lock);
-+	bl = bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
-+	if (!bl)
-+		goto out;
-+
-+	nr = blacklist_nr_entries(bl);
-+	dst = bl->start;
-+
-+	t = c->journal_seq_blacklist_table;
-+	BUG_ON(nr != t->nr);
-+
-+	for (src = bl->start, i = eytzinger0_first(t->nr);
-+	     src < bl->start + nr;
-+	     src++, i = eytzinger0_next(i, nr)) {
-+		BUG_ON(t->entries[i].start	!= le64_to_cpu(src->start));
-+		BUG_ON(t->entries[i].end	!= le64_to_cpu(src->end));
-+
-+		if (t->entries[i].dirty)
-+			*dst++ = *src;
-+	}
-+
-+	new_nr = dst - bl->start;
-+
-+	bch_info(c, "nr blacklist entries was %u, now %u", nr, new_nr);
-+
-+	if (new_nr != nr) {
-+		bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb,
-+				new_nr ? sb_blacklist_u64s(new_nr) : 0);
-+		BUG_ON(new_nr && !bl);
-+
-+		if (!new_nr)
-+			c->disk_sb.sb->features[0] &=
-+				~(1ULL << BCH_FEATURE_journal_seq_blacklist_v3);
-+
-+		bch2_write_super(c);
-+	}
-+out:
-+	mutex_unlock(&c->sb_lock);
-+}
-diff --git a/fs/bcachefs/journal_seq_blacklist.h b/fs/bcachefs/journal_seq_blacklist.h
-new file mode 100644
-index 000000000000..afb886ec8e25
---- /dev/null
-+++ b/fs/bcachefs/journal_seq_blacklist.h
-@@ -0,0 +1,22 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H
-+#define _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H
-+
-+static inline unsigned
-+blacklist_nr_entries(struct bch_sb_field_journal_seq_blacklist *bl)
-+{
-+	return bl
-+		? ((vstruct_end(&bl->field) - (void *) &bl->start[0]) /
-+		   sizeof(struct journal_seq_blacklist_entry))
-+		: 0;
-+}
-+
-+bool bch2_journal_seq_is_blacklisted(struct bch_fs *, u64, bool);
-+int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64, u64);
-+int bch2_blacklist_table_initialize(struct bch_fs *);
-+
-+extern const struct bch_sb_field_ops bch_sb_field_ops_journal_seq_blacklist;
-+
-+void bch2_blacklist_entries_gc(struct work_struct *);
-+
-+#endif /* _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H */
-diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h
-new file mode 100644
-index 000000000000..154b51b891d3
---- /dev/null
-+++ b/fs/bcachefs/journal_types.h
-@@ -0,0 +1,277 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_JOURNAL_TYPES_H
-+#define _BCACHEFS_JOURNAL_TYPES_H
-+
-+#include <linux/cache.h>
-+#include <linux/workqueue.h>
-+
-+#include "alloc_types.h"
-+#include "super_types.h"
-+#include "fifo.h"
-+
-+struct journal_res;
-+
-+/*
-+ * We put two of these in struct journal; we used them for writes to the
-+ * journal that are being staged or in flight.
-+ */
-+struct journal_buf {
-+	struct jset		*data;
-+
-+	BKEY_PADDED(key);
-+
-+	struct closure_waitlist	wait;
-+
-+	unsigned		buf_size;	/* size in bytes of @data */
-+	unsigned		sectors;	/* maximum size for current entry */
-+	unsigned		disk_sectors;	/* maximum size entry could have been, if
-+						   buf_size was bigger */
-+	unsigned		u64s_reserved;
-+	/* bloom filter: */
-+	unsigned long		has_inode[1024 / sizeof(unsigned long)];
-+};
-+
-+/*
-+ * Something that makes a journal entry dirty - i.e. a btree node that has to be
-+ * flushed:
-+ */
-+
-+struct journal_entry_pin_list {
-+	struct list_head		list;
-+	struct list_head		flushed;
-+	atomic_t			count;
-+	struct bch_devs_list		devs;
-+};
-+
-+struct journal;
-+struct journal_entry_pin;
-+typedef void (*journal_pin_flush_fn)(struct journal *j,
-+				struct journal_entry_pin *, u64);
-+
-+struct journal_entry_pin {
-+	struct list_head		list;
-+	journal_pin_flush_fn		flush;
-+	u64				seq;
-+};
-+
-+struct journal_res {
-+	bool			ref;
-+	u8			idx;
-+	u16			u64s;
-+	u32			offset;
-+	u64			seq;
-+};
-+
-+/*
-+ * For reserving space in the journal prior to getting a reservation on a
-+ * particular journal entry:
-+ */
-+struct journal_preres {
-+	unsigned		u64s;
-+};
-+
-+union journal_res_state {
-+	struct {
-+		atomic64_t	counter;
-+	};
-+
-+	struct {
-+		u64		v;
-+	};
-+
-+	struct {
-+		u64		cur_entry_offset:20,
-+				idx:1,
-+				prev_buf_unwritten:1,
-+				buf0_count:21,
-+				buf1_count:21;
-+	};
-+};
-+
-+union journal_preres_state {
-+	struct {
-+		atomic64_t	counter;
-+	};
-+
-+	struct {
-+		u64		v;
-+	};
-+
-+	struct {
-+		u32		reserved;
-+		u32		remaining;
-+	};
-+};
-+
-+/* bytes: */
-+#define JOURNAL_ENTRY_SIZE_MIN		(64U << 10) /* 64k */
-+#define JOURNAL_ENTRY_SIZE_MAX		(4U  << 20) /* 4M */
-+
-+/*
-+ * We stash some journal state as sentinal values in cur_entry_offset:
-+ * note - cur_entry_offset is in units of u64s
-+ */
-+#define JOURNAL_ENTRY_OFFSET_MAX	((1U << 20) - 1)
-+
-+#define JOURNAL_ENTRY_CLOSED_VAL	(JOURNAL_ENTRY_OFFSET_MAX - 1)
-+#define JOURNAL_ENTRY_ERROR_VAL		(JOURNAL_ENTRY_OFFSET_MAX)
-+
-+/*
-+ * JOURNAL_NEED_WRITE - current (pending) journal entry should be written ASAP,
-+ * either because something's waiting on the write to complete or because it's
-+ * been dirty too long and the timer's expired.
-+ */
-+
-+enum {
-+	JOURNAL_REPLAY_DONE,
-+	JOURNAL_STARTED,
-+	JOURNAL_RECLAIM_STARTED,
-+	JOURNAL_NEED_WRITE,
-+	JOURNAL_NOT_EMPTY,
-+	JOURNAL_MAY_GET_UNRESERVED,
-+};
-+
-+/* Embedded in struct bch_fs */
-+struct journal {
-+	/* Fastpath stuff up front: */
-+
-+	unsigned long		flags;
-+
-+	union journal_res_state reservations;
-+
-+	/* Max size of current journal entry */
-+	unsigned		cur_entry_u64s;
-+	unsigned		cur_entry_sectors;
-+
-+	/*
-+	 * 0, or -ENOSPC if waiting on journal reclaim, or -EROFS if
-+	 * insufficient devices:
-+	 */
-+	int			cur_entry_error;
-+
-+	union journal_preres_state prereserved;
-+
-+	/* Reserved space in journal entry to be used just prior to write */
-+	unsigned		entry_u64s_reserved;
-+
-+	unsigned		buf_size_want;
-+
-+	/*
-+	 * Two journal entries -- one is currently open for new entries, the
-+	 * other is possibly being written out.
-+	 */
-+	struct journal_buf	buf[2];
-+
-+	spinlock_t		lock;
-+
-+	/* if nonzero, we may not open a new journal entry: */
-+	unsigned		blocked;
-+
-+	/* Used when waiting because the journal was full */
-+	wait_queue_head_t	wait;
-+	struct closure_waitlist	async_wait;
-+	struct closure_waitlist	preres_wait;
-+
-+	struct closure		io;
-+	struct delayed_work	write_work;
-+
-+	/* Sequence number of most recent journal entry (last entry in @pin) */
-+	atomic64_t		seq;
-+
-+	/* seq, last_seq from the most recent journal entry successfully written */
-+	u64			seq_ondisk;
-+	u64			last_seq_ondisk;
-+
-+	/*
-+	 * FIFO of journal entries whose btree updates have not yet been
-+	 * written out.
-+	 *
-+	 * Each entry is a reference count. The position in the FIFO is the
-+	 * entry's sequence number relative to @seq.
-+	 *
-+	 * The journal entry itself holds a reference count, put when the
-+	 * journal entry is written out. Each btree node modified by the journal
-+	 * entry also holds a reference count, put when the btree node is
-+	 * written.
-+	 *
-+	 * When a reference count reaches zero, the journal entry is no longer
-+	 * needed. When all journal entries in the oldest journal bucket are no
-+	 * longer needed, the bucket can be discarded and reused.
-+	 */
-+	struct {
-+		u64 front, back, size, mask;
-+		struct journal_entry_pin_list *data;
-+	}			pin;
-+
-+	u64			replay_journal_seq;
-+	u64			replay_journal_seq_end;
-+
-+	struct write_point	wp;
-+	spinlock_t		err_lock;
-+
-+	struct delayed_work	reclaim_work;
-+	struct mutex		reclaim_lock;
-+	unsigned long		last_flushed;
-+	struct journal_entry_pin *flush_in_progress;
-+	wait_queue_head_t	pin_flush_wait;
-+
-+	/* protects advancing ja->discard_idx: */
-+	struct mutex		discard_lock;
-+	bool			can_discard;
-+
-+	unsigned		write_delay_ms;
-+	unsigned		reclaim_delay_ms;
-+
-+	u64			res_get_blocked_start;
-+	u64			need_write_time;
-+	u64			write_start_time;
-+
-+	struct time_stats	*write_time;
-+	struct time_stats	*delay_time;
-+	struct time_stats	*blocked_time;
-+	struct time_stats	*flush_seq_time;
-+
-+#ifdef CONFIG_DEBUG_LOCK_ALLOC
-+	struct lockdep_map	res_map;
-+#endif
-+};
-+
-+/*
-+ * Embedded in struct bch_dev. First three fields refer to the array of journal
-+ * buckets, in bch_sb.
-+ */
-+struct journal_device {
-+	/*
-+	 * For each journal bucket, contains the max sequence number of the
-+	 * journal writes it contains - so we know when a bucket can be reused.
-+	 */
-+	u64			*bucket_seq;
-+
-+	unsigned		sectors_free;
-+
-+	/*
-+	 * discard_idx <= dirty_idx_ondisk <= dirty_idx <= cur_idx:
-+	 */
-+	unsigned		discard_idx;		/* Next bucket to discard */
-+	unsigned		dirty_idx_ondisk;
-+	unsigned		dirty_idx;
-+	unsigned		cur_idx;		/* Journal bucket we're currently writing to */
-+	unsigned		nr;
-+
-+	u64			*buckets;
-+
-+	/* Bio for journal reads/writes to this device */
-+	struct bio		*bio;
-+
-+	/* for bch_journal_read_device */
-+	struct closure		read;
-+};
-+
-+/*
-+ * journal_entry_res - reserve space in every journal entry:
-+ */
-+struct journal_entry_res {
-+	unsigned		u64s;
-+};
-+
-+#endif /* _BCACHEFS_JOURNAL_TYPES_H */
-diff --git a/fs/bcachefs/keylist.c b/fs/bcachefs/keylist.c
-new file mode 100644
-index 000000000000..864dfaa67b7a
---- /dev/null
-+++ b/fs/bcachefs/keylist.c
-@@ -0,0 +1,67 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "keylist.h"
-+
-+int bch2_keylist_realloc(struct keylist *l, u64 *inline_u64s,
-+			size_t nr_inline_u64s, size_t new_u64s)
-+{
-+	size_t oldsize = bch2_keylist_u64s(l);
-+	size_t newsize = oldsize + new_u64s;
-+	u64 *old_buf = l->keys_p == inline_u64s ? NULL : l->keys_p;
-+	u64 *new_keys;
-+
-+	newsize = roundup_pow_of_two(newsize);
-+
-+	if (newsize <= nr_inline_u64s ||
-+	    (old_buf && roundup_pow_of_two(oldsize) == newsize))
-+		return 0;
-+
-+	new_keys = krealloc(old_buf, sizeof(u64) * newsize, GFP_NOIO);
-+	if (!new_keys)
-+		return -ENOMEM;
-+
-+	if (!old_buf)
-+		memcpy_u64s(new_keys, inline_u64s, oldsize);
-+
-+	l->keys_p = new_keys;
-+	l->top_p = new_keys + oldsize;
-+
-+	return 0;
-+}
-+
-+void bch2_keylist_add_in_order(struct keylist *l, struct bkey_i *insert)
-+{
-+	struct bkey_i *where;
-+
-+	for_each_keylist_key(l, where)
-+		if (bkey_cmp(insert->k.p, where->k.p) < 0)
-+			break;
-+
-+	memmove_u64s_up((u64 *) where + insert->k.u64s,
-+			where,
-+			((u64 *) l->top) - ((u64 *) where));
-+
-+	l->top_p += insert->k.u64s;
-+	bkey_copy(where, insert);
-+}
-+
-+void bch2_keylist_pop_front(struct keylist *l)
-+{
-+	l->top_p -= bch2_keylist_front(l)->k.u64s;
-+
-+	memmove_u64s_down(l->keys,
-+			  bkey_next(l->keys),
-+			  bch2_keylist_u64s(l));
-+}
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+void bch2_verify_keylist_sorted(struct keylist *l)
-+{
-+	struct bkey_i *k;
-+
-+	for_each_keylist_key(l, k)
-+		BUG_ON(bkey_next(k) != l->top &&
-+		       bkey_cmp(k->k.p, bkey_next(k)->k.p) >= 0);
-+}
-+#endif
-diff --git a/fs/bcachefs/keylist.h b/fs/bcachefs/keylist.h
-new file mode 100644
-index 000000000000..195799bb20bc
---- /dev/null
-+++ b/fs/bcachefs/keylist.h
-@@ -0,0 +1,76 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_KEYLIST_H
-+#define _BCACHEFS_KEYLIST_H
-+
-+#include "keylist_types.h"
-+
-+int bch2_keylist_realloc(struct keylist *, u64 *, size_t, size_t);
-+void bch2_keylist_add_in_order(struct keylist *, struct bkey_i *);
-+void bch2_keylist_pop_front(struct keylist *);
-+
-+static inline void bch2_keylist_init(struct keylist *l, u64 *inline_keys)
-+{
-+	l->top_p = l->keys_p = inline_keys;
-+}
-+
-+static inline void bch2_keylist_free(struct keylist *l, u64 *inline_keys)
-+{
-+	if (l->keys_p != inline_keys)
-+		kfree(l->keys_p);
-+	bch2_keylist_init(l, inline_keys);
-+}
-+
-+static inline void bch2_keylist_push(struct keylist *l)
-+{
-+	l->top = bkey_next(l->top);
-+}
-+
-+static inline void bch2_keylist_add(struct keylist *l, const struct bkey_i *k)
-+{
-+	bkey_copy(l->top, k);
-+	bch2_keylist_push(l);
-+}
-+
-+static inline bool bch2_keylist_empty(struct keylist *l)
-+{
-+	return l->top == l->keys;
-+}
-+
-+static inline size_t bch2_keylist_u64s(struct keylist *l)
-+{
-+	return l->top_p - l->keys_p;
-+}
-+
-+static inline size_t bch2_keylist_bytes(struct keylist *l)
-+{
-+	return bch2_keylist_u64s(l) * sizeof(u64);
-+}
-+
-+static inline struct bkey_i *bch2_keylist_front(struct keylist *l)
-+{
-+	return l->keys;
-+}
-+
-+#define for_each_keylist_key(_keylist, _k)			\
-+	for (_k = (_keylist)->keys;				\
-+	     _k != (_keylist)->top;				\
-+	     _k = bkey_next(_k))
-+
-+static inline u64 keylist_sectors(struct keylist *keys)
-+{
-+	struct bkey_i *k;
-+	u64 ret = 0;
-+
-+	for_each_keylist_key(keys, k)
-+		ret += k->k.size;
-+
-+	return ret;
-+}
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+void bch2_verify_keylist_sorted(struct keylist *);
-+#else
-+static inline void bch2_verify_keylist_sorted(struct keylist *l) {}
-+#endif
-+
-+#endif /* _BCACHEFS_KEYLIST_H */
-diff --git a/fs/bcachefs/keylist_types.h b/fs/bcachefs/keylist_types.h
-new file mode 100644
-index 000000000000..4b3ff7d8a875
---- /dev/null
-+++ b/fs/bcachefs/keylist_types.h
-@@ -0,0 +1,16 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_KEYLIST_TYPES_H
-+#define _BCACHEFS_KEYLIST_TYPES_H
-+
-+struct keylist {
-+	union {
-+		struct bkey_i		*keys;
-+		u64			*keys_p;
-+	};
-+	union {
-+		struct bkey_i		*top;
-+		u64			*top_p;
-+	};
-+};
-+
-+#endif /* _BCACHEFS_KEYLIST_TYPES_H */
-diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c
-new file mode 100644
-index 000000000000..96c8690adc5b
---- /dev/null
-+++ b/fs/bcachefs/migrate.c
-@@ -0,0 +1,170 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Code for moving data off a device.
-+ */
-+
-+#include "bcachefs.h"
-+#include "bkey_on_stack.h"
-+#include "btree_update.h"
-+#include "btree_update_interior.h"
-+#include "buckets.h"
-+#include "extents.h"
-+#include "io.h"
-+#include "journal.h"
-+#include "keylist.h"
-+#include "migrate.h"
-+#include "move.h"
-+#include "replicas.h"
-+#include "super-io.h"
-+
-+static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s k,
-+			 unsigned dev_idx, int flags, bool metadata)
-+{
-+	unsigned replicas = metadata ? c->opts.metadata_replicas : c->opts.data_replicas;
-+	unsigned lost = metadata ? BCH_FORCE_IF_METADATA_LOST : BCH_FORCE_IF_DATA_LOST;
-+	unsigned degraded = metadata ? BCH_FORCE_IF_METADATA_DEGRADED : BCH_FORCE_IF_DATA_DEGRADED;
-+	unsigned nr_good;
-+
-+	bch2_bkey_drop_device(k, dev_idx);
-+
-+	nr_good = bch2_bkey_durability(c, k.s_c);
-+	if ((!nr_good && !(flags & lost)) ||
-+	    (nr_good < replicas && !(flags & degraded)))
-+		return -EINVAL;
-+
-+	return 0;
-+}
-+
-+static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags,
-+				   enum btree_id btree_id)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct bkey_on_stack sk;
-+	int ret = 0;
-+
-+	bkey_on_stack_init(&sk);
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, btree_id, POS_MIN,
-+				   BTREE_ITER_PREFETCH);
-+
-+	while ((k = bch2_btree_iter_peek(iter)).k &&
-+	       !(ret = bkey_err(k))) {
-+		if (!bch2_bkey_has_device(k, dev_idx)) {
-+			bch2_btree_iter_next(iter);
-+			continue;
-+		}
-+
-+		bkey_on_stack_reassemble(&sk, c, k);
-+
-+		ret = drop_dev_ptrs(c, bkey_i_to_s(sk.k),
-+				    dev_idx, flags, false);
-+		if (ret)
-+			break;
-+
-+		/*
-+		 * If the new extent no longer has any pointers, bch2_extent_normalize()
-+		 * will do the appropriate thing with it (turning it into a
-+		 * KEY_TYPE_error key, or just a discard if it was a cached extent)
-+		 */
-+		bch2_extent_normalize(c, bkey_i_to_s(sk.k));
-+
-+		bch2_btree_iter_set_pos(iter, bkey_start_pos(&sk.k->k));
-+
-+		bch2_trans_update(&trans, iter, sk.k, 0);
-+
-+		ret = bch2_trans_commit(&trans, NULL, NULL,
-+					BTREE_INSERT_NOFAIL);
-+
-+		/*
-+		 * don't want to leave ret == -EINTR, since if we raced and
-+		 * something else overwrote the key we could spuriously return
-+		 * -EINTR below:
-+		 */
-+		if (ret == -EINTR)
-+			ret = 0;
-+		if (ret)
-+			break;
-+	}
-+
-+	ret = bch2_trans_exit(&trans) ?: ret;
-+	bkey_on_stack_exit(&sk, c);
-+
-+	BUG_ON(ret == -EINTR);
-+
-+	return ret;
-+}
-+
-+static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
-+{
-+	return  __bch2_dev_usrdata_drop(c, dev_idx, flags, BTREE_ID_EXTENTS) ?:
-+		__bch2_dev_usrdata_drop(c, dev_idx, flags, BTREE_ID_REFLINK);
-+}
-+
-+static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct closure cl;
-+	struct btree *b;
-+	unsigned id;
-+	int ret;
-+
-+	/* don't handle this yet: */
-+	if (flags & BCH_FORCE_IF_METADATA_LOST)
-+		return -EINVAL;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+	closure_init_stack(&cl);
-+
-+	for (id = 0; id < BTREE_ID_NR; id++) {
-+		for_each_btree_node(&trans, iter, id, POS_MIN,
-+				    BTREE_ITER_PREFETCH, b) {
-+			__BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp;
-+retry:
-+			if (!bch2_bkey_has_device(bkey_i_to_s_c(&b->key),
-+						  dev_idx))
-+				continue;
-+
-+			bkey_copy(&tmp.k, &b->key);
-+
-+			ret = drop_dev_ptrs(c, bkey_i_to_s(&tmp.k),
-+					    dev_idx, flags, true);
-+			if (ret) {
-+				bch_err(c, "Cannot drop device without losing data");
-+				goto err;
-+			}
-+
-+			ret = bch2_btree_node_update_key(c, iter, b, &tmp.k);
-+			if (ret == -EINTR) {
-+				b = bch2_btree_iter_peek_node(iter);
-+				goto retry;
-+			}
-+			if (ret) {
-+				bch_err(c, "Error updating btree node key: %i", ret);
-+				goto err;
-+			}
-+		}
-+		bch2_trans_iter_free(&trans, iter);
-+	}
-+
-+	/* flush relevant btree updates */
-+	closure_wait_event(&c->btree_interior_update_wait,
-+			   !bch2_btree_interior_updates_nr_pending(c));
-+
-+	ret = 0;
-+err:
-+	ret = bch2_trans_exit(&trans) ?: ret;
-+
-+	BUG_ON(ret == -EINTR);
-+
-+	return ret;
-+}
-+
-+int bch2_dev_data_drop(struct bch_fs *c, unsigned dev_idx, int flags)
-+{
-+	return bch2_dev_usrdata_drop(c, dev_idx, flags) ?:
-+		bch2_dev_metadata_drop(c, dev_idx, flags);
-+}
-diff --git a/fs/bcachefs/migrate.h b/fs/bcachefs/migrate.h
-new file mode 100644
-index 000000000000..027efaa0d575
---- /dev/null
-+++ b/fs/bcachefs/migrate.h
-@@ -0,0 +1,7 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_MIGRATE_H
-+#define _BCACHEFS_MIGRATE_H
-+
-+int bch2_dev_data_drop(struct bch_fs *, unsigned, int);
-+
-+#endif /* _BCACHEFS_MIGRATE_H */
-diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
-new file mode 100644
-index 000000000000..2f3be487ef65
---- /dev/null
-+++ b/fs/bcachefs/move.c
-@@ -0,0 +1,819 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "alloc_foreground.h"
-+#include "bkey_on_stack.h"
-+#include "btree_gc.h"
-+#include "btree_update.h"
-+#include "btree_update_interior.h"
-+#include "buckets.h"
-+#include "disk_groups.h"
-+#include "inode.h"
-+#include "io.h"
-+#include "journal_reclaim.h"
-+#include "move.h"
-+#include "replicas.h"
-+#include "super-io.h"
-+#include "keylist.h"
-+
-+#include <linux/ioprio.h>
-+#include <linux/kthread.h>
-+
-+#include <trace/events/bcachefs.h>
-+
-+#define SECTORS_IN_FLIGHT_PER_DEVICE	2048
-+
-+struct moving_io {
-+	struct list_head	list;
-+	struct closure		cl;
-+	bool			read_completed;
-+
-+	unsigned		read_sectors;
-+	unsigned		write_sectors;
-+
-+	struct bch_read_bio	rbio;
-+
-+	struct migrate_write	write;
-+	/* Must be last since it is variable size */
-+	struct bio_vec		bi_inline_vecs[0];
-+};
-+
-+struct moving_context {
-+	/* Closure for waiting on all reads and writes to complete */
-+	struct closure		cl;
-+
-+	struct bch_move_stats	*stats;
-+
-+	struct list_head	reads;
-+
-+	/* in flight sectors: */
-+	atomic_t		read_sectors;
-+	atomic_t		write_sectors;
-+
-+	wait_queue_head_t	wait;
-+};
-+
-+static int bch2_migrate_index_update(struct bch_write_op *op)
-+{
-+	struct bch_fs *c = op->c;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct migrate_write *m =
-+		container_of(op, struct migrate_write, op);
-+	struct keylist *keys = &op->insert_keys;
-+	int ret = 0;
-+
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, m->btree_id,
-+				   bkey_start_pos(&bch2_keylist_front(keys)->k),
-+				   BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
-+
-+	while (1) {
-+		struct bkey_s_c k;
-+		struct bkey_i *insert;
-+		struct bkey_i_extent *new;
-+		BKEY_PADDED(k) _new, _insert;
-+		const union bch_extent_entry *entry;
-+		struct extent_ptr_decoded p;
-+		bool did_work = false;
-+		int nr;
-+
-+		bch2_trans_reset(&trans, 0);
-+
-+		k = bch2_btree_iter_peek_slot(iter);
-+		ret = bkey_err(k);
-+		if (ret) {
-+			if (ret == -EINTR)
-+				continue;
-+			break;
-+		}
-+
-+		new = bkey_i_to_extent(bch2_keylist_front(keys));
-+
-+		if (bversion_cmp(k.k->version, new->k.version) ||
-+		    !bch2_bkey_matches_ptr(c, k, m->ptr, m->offset))
-+			goto nomatch;
-+
-+		if (m->data_cmd == DATA_REWRITE &&
-+		    !bch2_bkey_has_device(k, m->data_opts.rewrite_dev))
-+			goto nomatch;
-+
-+		bkey_reassemble(&_insert.k, k);
-+		insert = &_insert.k;
-+
-+		bkey_copy(&_new.k, bch2_keylist_front(keys));
-+		new = bkey_i_to_extent(&_new.k);
-+		bch2_cut_front(iter->pos, &new->k_i);
-+
-+		bch2_cut_front(iter->pos,	insert);
-+		bch2_cut_back(new->k.p,		insert);
-+		bch2_cut_back(insert->k.p,	&new->k_i);
-+
-+		if (m->data_cmd == DATA_REWRITE)
-+			bch2_bkey_drop_device(bkey_i_to_s(insert),
-+					      m->data_opts.rewrite_dev);
-+
-+		extent_for_each_ptr_decode(extent_i_to_s(new), p, entry) {
-+			if (bch2_bkey_has_device(bkey_i_to_s_c(insert), p.ptr.dev)) {
-+				/*
-+				 * raced with another move op? extent already
-+				 * has a pointer to the device we just wrote
-+				 * data to
-+				 */
-+				continue;
-+			}
-+
-+			bch2_extent_ptr_decoded_append(insert, &p);
-+			did_work = true;
-+		}
-+
-+		if (!did_work)
-+			goto nomatch;
-+
-+		bch2_bkey_narrow_crcs(insert,
-+				(struct bch_extent_crc_unpacked) { 0 });
-+		bch2_extent_normalize(c, bkey_i_to_s(insert));
-+		bch2_bkey_mark_replicas_cached(c, bkey_i_to_s(insert),
-+					       op->opts.background_target,
-+					       op->opts.data_replicas);
-+
-+		/*
-+		 * If we're not fully overwriting @k, and it's compressed, we
-+		 * need a reservation for all the pointers in @insert
-+		 */
-+		nr = bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(insert)) -
-+			 m->nr_ptrs_reserved;
-+
-+		if (insert->k.size < k.k->size &&
-+		    bch2_bkey_sectors_compressed(k) &&
-+		    nr > 0) {
-+			ret = bch2_disk_reservation_add(c, &op->res,
-+					keylist_sectors(keys) * nr, 0);
-+			if (ret)
-+				goto out;
-+
-+			m->nr_ptrs_reserved += nr;
-+			goto next;
-+		}
-+
-+		bch2_trans_update(&trans, iter, insert, 0);
-+
-+		ret = bch2_trans_commit(&trans, &op->res,
-+				op_journal_seq(op),
-+				BTREE_INSERT_NOFAIL|
-+				BTREE_INSERT_USE_RESERVE|
-+				m->data_opts.btree_insert_flags);
-+		if (!ret)
-+			atomic_long_inc(&c->extent_migrate_done);
-+		if (ret == -EINTR)
-+			ret = 0;
-+		if (ret)
-+			break;
-+next:
-+		while (bkey_cmp(iter->pos, bch2_keylist_front(keys)->k.p) >= 0) {
-+			bch2_keylist_pop_front(keys);
-+			if (bch2_keylist_empty(keys))
-+				goto out;
-+		}
-+		continue;
-+nomatch:
-+		if (m->ctxt) {
-+			BUG_ON(k.k->p.offset <= iter->pos.offset);
-+			atomic64_inc(&m->ctxt->stats->keys_raced);
-+			atomic64_add(k.k->p.offset - iter->pos.offset,
-+				     &m->ctxt->stats->sectors_raced);
-+		}
-+		atomic_long_inc(&c->extent_migrate_raced);
-+		trace_move_race(&new->k);
-+		bch2_btree_iter_next_slot(iter);
-+		goto next;
-+	}
-+out:
-+	bch2_trans_exit(&trans);
-+	BUG_ON(ret == -EINTR);
-+	return ret;
-+}
-+
-+void bch2_migrate_read_done(struct migrate_write *m, struct bch_read_bio *rbio)
-+{
-+	/* write bio must own pages: */
-+	BUG_ON(!m->op.wbio.bio.bi_vcnt);
-+
-+	m->ptr		= rbio->pick.ptr;
-+	m->offset	= rbio->pos.offset - rbio->pick.crc.offset;
-+	m->op.devs_have	= rbio->devs_have;
-+	m->op.pos	= rbio->pos;
-+	m->op.version	= rbio->version;
-+	m->op.crc	= rbio->pick.crc;
-+	m->op.wbio.bio.bi_iter.bi_size = m->op.crc.compressed_size << 9;
-+
-+	if (bch2_csum_type_is_encryption(m->op.crc.csum_type)) {
-+		m->op.nonce	= m->op.crc.nonce + m->op.crc.offset;
-+		m->op.csum_type = m->op.crc.csum_type;
-+	}
-+
-+	if (m->data_cmd == DATA_REWRITE)
-+		bch2_dev_list_drop_dev(&m->op.devs_have, m->data_opts.rewrite_dev);
-+}
-+
-+int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m,
-+			    struct write_point_specifier wp,
-+			    struct bch_io_opts io_opts,
-+			    enum data_cmd data_cmd,
-+			    struct data_opts data_opts,
-+			    enum btree_id btree_id,
-+			    struct bkey_s_c k)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const union bch_extent_entry *entry;
-+	struct extent_ptr_decoded p;
-+	int ret;
-+
-+	m->btree_id	= btree_id;
-+	m->data_cmd	= data_cmd;
-+	m->data_opts	= data_opts;
-+	m->nr_ptrs_reserved = 0;
-+
-+	bch2_write_op_init(&m->op, c, io_opts);
-+
-+	if (!bch2_bkey_is_incompressible(k))
-+		m->op.compression_type =
-+			bch2_compression_opt_to_type[io_opts.background_compression ?:
-+						     io_opts.compression];
-+	else
-+		m->op.incompressible = true;
-+
-+	m->op.target	= data_opts.target,
-+	m->op.write_point = wp;
-+
-+	if (m->data_opts.btree_insert_flags & BTREE_INSERT_USE_RESERVE) {
-+		m->op.alloc_reserve = RESERVE_MOVINGGC;
-+		m->op.flags |= BCH_WRITE_ALLOC_NOWAIT;
-+	} else {
-+		/* XXX: this should probably be passed in */
-+		m->op.flags |= BCH_WRITE_ONLY_SPECIFIED_DEVS;
-+	}
-+
-+	m->op.flags |= BCH_WRITE_PAGES_STABLE|
-+		BCH_WRITE_PAGES_OWNED|
-+		BCH_WRITE_DATA_ENCODED|
-+		BCH_WRITE_FROM_INTERNAL;
-+
-+	m->op.nr_replicas	= 1;
-+	m->op.nr_replicas_required = 1;
-+	m->op.index_update_fn	= bch2_migrate_index_update;
-+
-+	switch (data_cmd) {
-+	case DATA_ADD_REPLICAS: {
-+		/*
-+		 * DATA_ADD_REPLICAS is used for moving data to a different
-+		 * device in the background, and due to compression the new copy
-+		 * might take up more space than the old copy:
-+		 */
-+#if 0
-+		int nr = (int) io_opts.data_replicas -
-+			bch2_bkey_nr_ptrs_allocated(k);
-+#endif
-+		int nr = (int) io_opts.data_replicas;
-+
-+		if (nr > 0) {
-+			m->op.nr_replicas = m->nr_ptrs_reserved = nr;
-+
-+			ret = bch2_disk_reservation_get(c, &m->op.res,
-+					k.k->size, m->op.nr_replicas, 0);
-+			if (ret)
-+				return ret;
-+		}
-+		break;
-+	}
-+	case DATA_REWRITE: {
-+		unsigned compressed_sectors = 0;
-+
-+		bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
-+			if (!p.ptr.cached &&
-+			    crc_is_compressed(p.crc) &&
-+			    bch2_dev_in_target(c, p.ptr.dev, data_opts.target))
-+				compressed_sectors += p.crc.compressed_size;
-+
-+		if (compressed_sectors) {
-+			ret = bch2_disk_reservation_add(c, &m->op.res,
-+					compressed_sectors,
-+					BCH_DISK_RESERVATION_NOFAIL);
-+			if (ret)
-+				return ret;
-+		}
-+		break;
-+	}
-+	case DATA_PROMOTE:
-+		m->op.flags	|= BCH_WRITE_ALLOC_NOWAIT;
-+		m->op.flags	|= BCH_WRITE_CACHED;
-+		break;
-+	default:
-+		BUG();
-+	}
-+
-+	return 0;
-+}
-+
-+static void move_free(struct closure *cl)
-+{
-+	struct moving_io *io = container_of(cl, struct moving_io, cl);
-+	struct moving_context *ctxt = io->write.ctxt;
-+	struct bvec_iter_all iter;
-+	struct bio_vec *bv;
-+
-+	bch2_disk_reservation_put(io->write.op.c, &io->write.op.res);
-+
-+	bio_for_each_segment_all(bv, &io->write.op.wbio.bio, iter)
-+		if (bv->bv_page)
-+			__free_page(bv->bv_page);
-+
-+	wake_up(&ctxt->wait);
-+
-+	kfree(io);
-+}
-+
-+static void move_write_done(struct closure *cl)
-+{
-+	struct moving_io *io = container_of(cl, struct moving_io, cl);
-+
-+	atomic_sub(io->write_sectors, &io->write.ctxt->write_sectors);
-+	closure_return_with_destructor(cl, move_free);
-+}
-+
-+static void move_write(struct closure *cl)
-+{
-+	struct moving_io *io = container_of(cl, struct moving_io, cl);
-+
-+	if (unlikely(io->rbio.bio.bi_status || io->rbio.hole)) {
-+		closure_return_with_destructor(cl, move_free);
-+		return;
-+	}
-+
-+	bch2_migrate_read_done(&io->write, &io->rbio);
-+
-+	atomic_add(io->write_sectors, &io->write.ctxt->write_sectors);
-+	closure_call(&io->write.op.cl, bch2_write, NULL, cl);
-+	continue_at(cl, move_write_done, NULL);
-+}
-+
-+static inline struct moving_io *next_pending_write(struct moving_context *ctxt)
-+{
-+	struct moving_io *io =
-+		list_first_entry_or_null(&ctxt->reads, struct moving_io, list);
-+
-+	return io && io->read_completed ? io : NULL;
-+}
-+
-+static void move_read_endio(struct bio *bio)
-+{
-+	struct moving_io *io = container_of(bio, struct moving_io, rbio.bio);
-+	struct moving_context *ctxt = io->write.ctxt;
-+
-+	atomic_sub(io->read_sectors, &ctxt->read_sectors);
-+	io->read_completed = true;
-+
-+	if (next_pending_write(ctxt))
-+		wake_up(&ctxt->wait);
-+
-+	closure_put(&ctxt->cl);
-+}
-+
-+static void do_pending_writes(struct moving_context *ctxt)
-+{
-+	struct moving_io *io;
-+
-+	while ((io = next_pending_write(ctxt))) {
-+		list_del(&io->list);
-+		closure_call(&io->cl, move_write, NULL, &ctxt->cl);
-+	}
-+}
-+
-+#define move_ctxt_wait_event(_ctxt, _cond)			\
-+do {								\
-+	do_pending_writes(_ctxt);				\
-+								\
-+	if (_cond)						\
-+		break;						\
-+	__wait_event((_ctxt)->wait,				\
-+		     next_pending_write(_ctxt) || (_cond));	\
-+} while (1)
-+
-+static void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt)
-+{
-+	unsigned sectors_pending = atomic_read(&ctxt->write_sectors);
-+
-+	move_ctxt_wait_event(ctxt,
-+		!atomic_read(&ctxt->write_sectors) ||
-+		atomic_read(&ctxt->write_sectors) != sectors_pending);
-+}
-+
-+static int bch2_move_extent(struct bch_fs *c,
-+			    struct moving_context *ctxt,
-+			    struct write_point_specifier wp,
-+			    struct bch_io_opts io_opts,
-+			    enum btree_id btree_id,
-+			    struct bkey_s_c k,
-+			    enum data_cmd data_cmd,
-+			    struct data_opts data_opts)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	struct moving_io *io;
-+	const union bch_extent_entry *entry;
-+	struct extent_ptr_decoded p;
-+	unsigned sectors = k.k->size, pages;
-+	int ret = -ENOMEM;
-+
-+	move_ctxt_wait_event(ctxt,
-+		atomic_read(&ctxt->write_sectors) <
-+		SECTORS_IN_FLIGHT_PER_DEVICE);
-+
-+	move_ctxt_wait_event(ctxt,
-+		atomic_read(&ctxt->read_sectors) <
-+		SECTORS_IN_FLIGHT_PER_DEVICE);
-+
-+	/* write path might have to decompress data: */
-+	bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
-+		sectors = max_t(unsigned, sectors, p.crc.uncompressed_size);
-+
-+	pages = DIV_ROUND_UP(sectors, PAGE_SECTORS);
-+	io = kzalloc(sizeof(struct moving_io) +
-+		     sizeof(struct bio_vec) * pages, GFP_KERNEL);
-+	if (!io)
-+		goto err;
-+
-+	io->write.ctxt		= ctxt;
-+	io->read_sectors	= k.k->size;
-+	io->write_sectors	= k.k->size;
-+
-+	bio_init(&io->write.op.wbio.bio, io->bi_inline_vecs, pages);
-+	bio_set_prio(&io->write.op.wbio.bio,
-+		     IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
-+
-+	if (bch2_bio_alloc_pages(&io->write.op.wbio.bio, sectors << 9,
-+				 GFP_KERNEL))
-+		goto err_free;
-+
-+	io->rbio.c		= c;
-+	io->rbio.opts		= io_opts;
-+	bio_init(&io->rbio.bio, io->bi_inline_vecs, pages);
-+	io->rbio.bio.bi_vcnt = pages;
-+	bio_set_prio(&io->rbio.bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
-+	io->rbio.bio.bi_iter.bi_size = sectors << 9;
-+
-+	bio_set_op_attrs(&io->rbio.bio, REQ_OP_READ, 0);
-+	io->rbio.bio.bi_iter.bi_sector	= bkey_start_offset(k.k);
-+	io->rbio.bio.bi_end_io		= move_read_endio;
-+
-+	ret = bch2_migrate_write_init(c, &io->write, wp, io_opts,
-+				      data_cmd, data_opts, btree_id, k);
-+	if (ret)
-+		goto err_free_pages;
-+
-+	atomic64_inc(&ctxt->stats->keys_moved);
-+	atomic64_add(k.k->size, &ctxt->stats->sectors_moved);
-+
-+	trace_move_extent(k.k);
-+
-+	atomic_add(io->read_sectors, &ctxt->read_sectors);
-+	list_add_tail(&io->list, &ctxt->reads);
-+
-+	/*
-+	 * dropped by move_read_endio() - guards against use after free of
-+	 * ctxt when doing wakeup
-+	 */
-+	closure_get(&ctxt->cl);
-+	bch2_read_extent(c, &io->rbio, k, 0,
-+			 BCH_READ_NODECODE|
-+			 BCH_READ_LAST_FRAGMENT);
-+	return 0;
-+err_free_pages:
-+	bio_free_pages(&io->write.op.wbio.bio);
-+err_free:
-+	kfree(io);
-+err:
-+	trace_move_alloc_fail(k.k);
-+	return ret;
-+}
-+
-+static int __bch2_move_data(struct bch_fs *c,
-+		struct moving_context *ctxt,
-+		struct bch_ratelimit *rate,
-+		struct write_point_specifier wp,
-+		struct bpos start,
-+		struct bpos end,
-+		move_pred_fn pred, void *arg,
-+		struct bch_move_stats *stats,
-+		enum btree_id btree_id)
-+{
-+	bool kthread = (current->flags & PF_KTHREAD) != 0;
-+	struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
-+	struct bkey_on_stack sk;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct data_opts data_opts;
-+	enum data_cmd data_cmd;
-+	u64 delay, cur_inum = U64_MAX;
-+	int ret = 0, ret2;
-+
-+	bkey_on_stack_init(&sk);
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	stats->data_type = BCH_DATA_user;
-+	stats->btree_id	= btree_id;
-+	stats->pos	= POS_MIN;
-+
-+	iter = bch2_trans_get_iter(&trans, btree_id, start,
-+				   BTREE_ITER_PREFETCH);
-+
-+	if (rate)
-+		bch2_ratelimit_reset(rate);
-+
-+	while (1) {
-+		do {
-+			delay = rate ? bch2_ratelimit_delay(rate) : 0;
-+
-+			if (delay) {
-+				bch2_trans_unlock(&trans);
-+				set_current_state(TASK_INTERRUPTIBLE);
-+			}
-+
-+			if (kthread && (ret = kthread_should_stop())) {
-+				__set_current_state(TASK_RUNNING);
-+				goto out;
-+			}
-+
-+			if (delay)
-+				schedule_timeout(delay);
-+
-+			if (unlikely(freezing(current))) {
-+				bch2_trans_unlock(&trans);
-+				move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads));
-+				try_to_freeze();
-+			}
-+		} while (delay);
-+peek:
-+		k = bch2_btree_iter_peek(iter);
-+
-+		stats->pos = iter->pos;
-+
-+		if (!k.k)
-+			break;
-+		ret = bkey_err(k);
-+		if (ret)
-+			break;
-+		if (bkey_cmp(bkey_start_pos(k.k), end) >= 0)
-+			break;
-+
-+		if (!bkey_extent_is_direct_data(k.k))
-+			goto next_nondata;
-+
-+		if (btree_id == BTREE_ID_EXTENTS &&
-+		    cur_inum != k.k->p.inode) {
-+			struct bch_inode_unpacked inode;
-+
-+			/* don't hold btree locks while looking up inode: */
-+			bch2_trans_unlock(&trans);
-+
-+			io_opts = bch2_opts_to_inode_opts(c->opts);
-+			if (!bch2_inode_find_by_inum(c, k.k->p.inode, &inode))
-+				bch2_io_opts_apply(&io_opts, bch2_inode_opts_get(&inode));
-+			cur_inum = k.k->p.inode;
-+			goto peek;
-+		}
-+
-+		switch ((data_cmd = pred(c, arg, k, &io_opts, &data_opts))) {
-+		case DATA_SKIP:
-+			goto next;
-+		case DATA_SCRUB:
-+			BUG();
-+		case DATA_ADD_REPLICAS:
-+		case DATA_REWRITE:
-+		case DATA_PROMOTE:
-+			break;
-+		default:
-+			BUG();
-+		}
-+
-+		/* unlock before doing IO: */
-+		bkey_on_stack_reassemble(&sk, c, k);
-+		k = bkey_i_to_s_c(sk.k);
-+		bch2_trans_unlock(&trans);
-+
-+		ret2 = bch2_move_extent(c, ctxt, wp, io_opts, btree_id, k,
-+					data_cmd, data_opts);
-+		if (ret2) {
-+			if (ret2 == -ENOMEM) {
-+				/* memory allocation failure, wait for some IO to finish */
-+				bch2_move_ctxt_wait_for_io(ctxt);
-+				continue;
-+			}
-+
-+			/* XXX signal failure */
-+			goto next;
-+		}
-+
-+		if (rate)
-+			bch2_ratelimit_increment(rate, k.k->size);
-+next:
-+		atomic64_add(k.k->size * bch2_bkey_nr_ptrs_allocated(k),
-+			     &stats->sectors_seen);
-+next_nondata:
-+		bch2_btree_iter_next(iter);
-+		bch2_trans_cond_resched(&trans);
-+	}
-+out:
-+	ret = bch2_trans_exit(&trans) ?: ret;
-+	bkey_on_stack_exit(&sk, c);
-+
-+	return ret;
-+}
-+
-+int bch2_move_data(struct bch_fs *c,
-+		   struct bch_ratelimit *rate,
-+		   struct write_point_specifier wp,
-+		   struct bpos start,
-+		   struct bpos end,
-+		   move_pred_fn pred, void *arg,
-+		   struct bch_move_stats *stats)
-+{
-+	struct moving_context ctxt = { .stats = stats };
-+	int ret;
-+
-+	closure_init_stack(&ctxt.cl);
-+	INIT_LIST_HEAD(&ctxt.reads);
-+	init_waitqueue_head(&ctxt.wait);
-+
-+	stats->data_type = BCH_DATA_user;
-+
-+	ret =   __bch2_move_data(c, &ctxt, rate, wp, start, end,
-+				 pred, arg, stats, BTREE_ID_EXTENTS) ?:
-+		__bch2_move_data(c, &ctxt, rate, wp, start, end,
-+				 pred, arg, stats, BTREE_ID_REFLINK);
-+
-+	move_ctxt_wait_event(&ctxt, list_empty(&ctxt.reads));
-+	closure_sync(&ctxt.cl);
-+
-+	EBUG_ON(atomic_read(&ctxt.write_sectors));
-+
-+	trace_move_data(c,
-+			atomic64_read(&stats->sectors_moved),
-+			atomic64_read(&stats->keys_moved));
-+
-+	return ret;
-+}
-+
-+static int bch2_move_btree(struct bch_fs *c,
-+			   move_pred_fn pred,
-+			   void *arg,
-+			   struct bch_move_stats *stats)
-+{
-+	struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct btree *b;
-+	unsigned id;
-+	struct data_opts data_opts;
-+	enum data_cmd cmd;
-+	int ret = 0;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	stats->data_type = BCH_DATA_btree;
-+
-+	for (id = 0; id < BTREE_ID_NR; id++) {
-+		stats->btree_id = id;
-+
-+		for_each_btree_node(&trans, iter, id, POS_MIN,
-+				    BTREE_ITER_PREFETCH, b) {
-+			stats->pos = iter->pos;
-+
-+			switch ((cmd = pred(c, arg,
-+					    bkey_i_to_s_c(&b->key),
-+					    &io_opts, &data_opts))) {
-+			case DATA_SKIP:
-+				goto next;
-+			case DATA_SCRUB:
-+				BUG();
-+			case DATA_ADD_REPLICAS:
-+			case DATA_REWRITE:
-+				break;
-+			default:
-+				BUG();
-+			}
-+
-+			ret = bch2_btree_node_rewrite(c, iter,
-+					b->data->keys.seq, 0) ?: ret;
-+next:
-+			bch2_trans_cond_resched(&trans);
-+		}
-+
-+		ret = bch2_trans_iter_free(&trans, iter) ?: ret;
-+	}
-+
-+	bch2_trans_exit(&trans);
-+
-+	return ret;
-+}
-+
-+#if 0
-+static enum data_cmd scrub_pred(struct bch_fs *c, void *arg,
-+				struct bkey_s_c k,
-+				struct bch_io_opts *io_opts,
-+				struct data_opts *data_opts)
-+{
-+	return DATA_SCRUB;
-+}
-+#endif
-+
-+static enum data_cmd rereplicate_pred(struct bch_fs *c, void *arg,
-+				      struct bkey_s_c k,
-+				      struct bch_io_opts *io_opts,
-+				      struct data_opts *data_opts)
-+{
-+	unsigned nr_good = bch2_bkey_durability(c, k);
-+	unsigned replicas = 0;
-+
-+	switch (k.k->type) {
-+	case KEY_TYPE_btree_ptr:
-+		replicas = c->opts.metadata_replicas;
-+		break;
-+	case KEY_TYPE_extent:
-+		replicas = io_opts->data_replicas;
-+		break;
-+	}
-+
-+	if (!nr_good || nr_good >= replicas)
-+		return DATA_SKIP;
-+
-+	data_opts->target		= 0;
-+	data_opts->btree_insert_flags	= 0;
-+	return DATA_ADD_REPLICAS;
-+}
-+
-+static enum data_cmd migrate_pred(struct bch_fs *c, void *arg,
-+				  struct bkey_s_c k,
-+				  struct bch_io_opts *io_opts,
-+				  struct data_opts *data_opts)
-+{
-+	struct bch_ioctl_data *op = arg;
-+
-+	if (!bch2_bkey_has_device(k, op->migrate.dev))
-+		return DATA_SKIP;
-+
-+	data_opts->target		= 0;
-+	data_opts->btree_insert_flags	= 0;
-+	data_opts->rewrite_dev		= op->migrate.dev;
-+	return DATA_REWRITE;
-+}
-+
-+int bch2_data_job(struct bch_fs *c,
-+		  struct bch_move_stats *stats,
-+		  struct bch_ioctl_data op)
-+{
-+	int ret = 0;
-+
-+	switch (op.op) {
-+	case BCH_DATA_OP_REREPLICATE:
-+		stats->data_type = BCH_DATA_journal;
-+		ret = bch2_journal_flush_device_pins(&c->journal, -1);
-+
-+		ret = bch2_move_btree(c, rereplicate_pred, c, stats) ?: ret;
-+
-+		closure_wait_event(&c->btree_interior_update_wait,
-+				   !bch2_btree_interior_updates_nr_pending(c));
-+
-+		ret = bch2_replicas_gc2(c) ?: ret;
-+
-+		ret = bch2_move_data(c, NULL,
-+				     writepoint_hashed((unsigned long) current),
-+				     op.start,
-+				     op.end,
-+				     rereplicate_pred, c, stats) ?: ret;
-+		ret = bch2_replicas_gc2(c) ?: ret;
-+		break;
-+	case BCH_DATA_OP_MIGRATE:
-+		if (op.migrate.dev >= c->sb.nr_devices)
-+			return -EINVAL;
-+
-+		stats->data_type = BCH_DATA_journal;
-+		ret = bch2_journal_flush_device_pins(&c->journal, op.migrate.dev);
-+
-+		ret = bch2_move_btree(c, migrate_pred, &op, stats) ?: ret;
-+		ret = bch2_replicas_gc2(c) ?: ret;
-+
-+		ret = bch2_move_data(c, NULL,
-+				     writepoint_hashed((unsigned long) current),
-+				     op.start,
-+				     op.end,
-+				     migrate_pred, &op, stats) ?: ret;
-+		ret = bch2_replicas_gc2(c) ?: ret;
-+		break;
-+	default:
-+		ret = -EINVAL;
-+	}
-+
-+	return ret;
-+}
-diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h
-new file mode 100644
-index 000000000000..0acd1720d4f8
---- /dev/null
-+++ b/fs/bcachefs/move.h
-@@ -0,0 +1,64 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_MOVE_H
-+#define _BCACHEFS_MOVE_H
-+
-+#include "btree_iter.h"
-+#include "buckets.h"
-+#include "io_types.h"
-+#include "move_types.h"
-+
-+struct bch_read_bio;
-+struct moving_context;
-+
-+enum data_cmd {
-+	DATA_SKIP,
-+	DATA_SCRUB,
-+	DATA_ADD_REPLICAS,
-+	DATA_REWRITE,
-+	DATA_PROMOTE,
-+};
-+
-+struct data_opts {
-+	u16		target;
-+	unsigned	rewrite_dev;
-+	int		btree_insert_flags;
-+};
-+
-+struct migrate_write {
-+	enum btree_id		btree_id;
-+	enum data_cmd		data_cmd;
-+	struct data_opts	data_opts;
-+
-+	unsigned		nr_ptrs_reserved;
-+
-+	struct moving_context	*ctxt;
-+
-+	/* what we read: */
-+	struct bch_extent_ptr	ptr;
-+	u64			offset;
-+
-+	struct bch_write_op	op;
-+};
-+
-+void bch2_migrate_read_done(struct migrate_write *, struct bch_read_bio *);
-+int bch2_migrate_write_init(struct bch_fs *, struct migrate_write *,
-+			    struct write_point_specifier,
-+			    struct bch_io_opts,
-+			    enum data_cmd, struct data_opts,
-+			    enum btree_id, struct bkey_s_c);
-+
-+typedef enum data_cmd (*move_pred_fn)(struct bch_fs *, void *,
-+				struct bkey_s_c,
-+				struct bch_io_opts *, struct data_opts *);
-+
-+int bch2_move_data(struct bch_fs *, struct bch_ratelimit *,
-+		   struct write_point_specifier,
-+		   struct bpos, struct bpos,
-+		   move_pred_fn, void *,
-+		   struct bch_move_stats *);
-+
-+int bch2_data_job(struct bch_fs *,
-+		  struct bch_move_stats *,
-+		  struct bch_ioctl_data);
-+
-+#endif /* _BCACHEFS_MOVE_H */
-diff --git a/fs/bcachefs/move_types.h b/fs/bcachefs/move_types.h
-new file mode 100644
-index 000000000000..fc0de165af9f
---- /dev/null
-+++ b/fs/bcachefs/move_types.h
-@@ -0,0 +1,17 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_MOVE_TYPES_H
-+#define _BCACHEFS_MOVE_TYPES_H
-+
-+struct bch_move_stats {
-+	enum bch_data_type	data_type;
-+	enum btree_id		btree_id;
-+	struct bpos		pos;
-+
-+	atomic64_t		keys_moved;
-+	atomic64_t		keys_raced;
-+	atomic64_t		sectors_moved;
-+	atomic64_t		sectors_seen;
-+	atomic64_t		sectors_raced;
-+};
-+
-+#endif /* _BCACHEFS_MOVE_TYPES_H */
-diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c
-new file mode 100644
-index 000000000000..de0a7974ec9f
---- /dev/null
-+++ b/fs/bcachefs/movinggc.c
-@@ -0,0 +1,359 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Moving/copying garbage collector
-+ *
-+ * Copyright 2012 Google, Inc.
-+ */
-+
-+#include "bcachefs.h"
-+#include "alloc_foreground.h"
-+#include "btree_iter.h"
-+#include "btree_update.h"
-+#include "buckets.h"
-+#include "clock.h"
-+#include "disk_groups.h"
-+#include "error.h"
-+#include "extents.h"
-+#include "eytzinger.h"
-+#include "io.h"
-+#include "keylist.h"
-+#include "move.h"
-+#include "movinggc.h"
-+#include "super-io.h"
-+
-+#include <trace/events/bcachefs.h>
-+#include <linux/freezer.h>
-+#include <linux/kthread.h>
-+#include <linux/math64.h>
-+#include <linux/sched/task.h>
-+#include <linux/sort.h>
-+#include <linux/wait.h>
-+
-+/*
-+ * We can't use the entire copygc reserve in one iteration of copygc: we may
-+ * need the buckets we're freeing up to go back into the copygc reserve to make
-+ * forward progress, but if the copygc reserve is full they'll be available for
-+ * any allocation - and it's possible that in a given iteration, we free up most
-+ * of the buckets we're going to free before we allocate most of the buckets
-+ * we're going to allocate.
-+ *
-+ * If we only use half of the reserve per iteration, then in steady state we'll
-+ * always have room in the reserve for the buckets we're going to need in the
-+ * next iteration:
-+ */
-+#define COPYGC_BUCKETS_PER_ITER(ca)					\
-+	((ca)->free[RESERVE_MOVINGGC].size / 2)
-+
-+static int bucket_offset_cmp(const void *_l, const void *_r, size_t size)
-+{
-+	const struct copygc_heap_entry *l = _l;
-+	const struct copygc_heap_entry *r = _r;
-+
-+	return  cmp_int(l->dev,    r->dev) ?:
-+		cmp_int(l->offset, r->offset);
-+}
-+
-+static int __copygc_pred(struct bch_fs *c, struct bkey_s_c k)
-+{
-+	copygc_heap *h = &c->copygc_heap;
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const struct bch_extent_ptr *ptr;
-+
-+	bkey_for_each_ptr(ptrs, ptr) {
-+		struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-+		struct copygc_heap_entry search = {
-+			.dev = ptr->dev,
-+			.offset = ptr->offset
-+		};
-+
-+		ssize_t i = eytzinger0_find_le(h->data, h->used,
-+					       sizeof(h->data[0]),
-+					       bucket_offset_cmp, &search);
-+#if 0
-+		/* eytzinger search verify code: */
-+		ssize_t j = -1, k;
-+
-+		for (k = 0; k < h->used; k++)
-+			if (h->data[k].offset <= ptr->offset &&
-+			    (j < 0 || h->data[k].offset > h->data[j].offset))
-+				j = k;
-+
-+		BUG_ON(i != j);
-+#endif
-+		if (i >= 0 &&
-+		    ptr->offset < h->data[i].offset + ca->mi.bucket_size &&
-+		    ptr->gen == h->data[i].gen)
-+			return ptr->dev;
-+	}
-+
-+	return -1;
-+}
-+
-+static enum data_cmd copygc_pred(struct bch_fs *c, void *arg,
-+				 struct bkey_s_c k,
-+				 struct bch_io_opts *io_opts,
-+				 struct data_opts *data_opts)
-+{
-+	int dev_idx = __copygc_pred(c, k);
-+	if (dev_idx < 0)
-+		return DATA_SKIP;
-+
-+	data_opts->target		= io_opts->background_target;
-+	data_opts->btree_insert_flags	= BTREE_INSERT_USE_RESERVE;
-+	data_opts->rewrite_dev		= dev_idx;
-+	return DATA_REWRITE;
-+}
-+
-+static bool have_copygc_reserve(struct bch_dev *ca)
-+{
-+	bool ret;
-+
-+	spin_lock(&ca->fs->freelist_lock);
-+	ret = fifo_full(&ca->free[RESERVE_MOVINGGC]) ||
-+		ca->allocator_state != ALLOCATOR_RUNNING;
-+	spin_unlock(&ca->fs->freelist_lock);
-+
-+	return ret;
-+}
-+
-+static inline int fragmentation_cmp(copygc_heap *heap,
-+				   struct copygc_heap_entry l,
-+				   struct copygc_heap_entry r)
-+{
-+	return cmp_int(l.fragmentation, r.fragmentation);
-+}
-+
-+static int bch2_copygc(struct bch_fs *c)
-+{
-+	copygc_heap *h = &c->copygc_heap;
-+	struct copygc_heap_entry e, *i;
-+	struct bucket_array *buckets;
-+	struct bch_move_stats move_stats;
-+	u64 sectors_to_move = 0, sectors_not_moved = 0;
-+	u64 sectors_reserved = 0;
-+	u64 buckets_to_move, buckets_not_moved = 0;
-+	struct bch_dev *ca;
-+	unsigned dev_idx;
-+	size_t b, heap_size = 0;
-+	int ret;
-+
-+	memset(&move_stats, 0, sizeof(move_stats));
-+	/*
-+	 * Find buckets with lowest sector counts, skipping completely
-+	 * empty buckets, by building a maxheap sorted by sector count,
-+	 * and repeatedly replacing the maximum element until all
-+	 * buckets have been visited.
-+	 */
-+	h->used = 0;
-+
-+	for_each_rw_member(ca, c, dev_idx)
-+		heap_size += ca->mi.nbuckets >> 7;
-+
-+	if (h->size < heap_size) {
-+		free_heap(&c->copygc_heap);
-+		if (!init_heap(&c->copygc_heap, heap_size, GFP_KERNEL)) {
-+			bch_err(c, "error allocating copygc heap");
-+			return 0;
-+		}
-+	}
-+
-+	for_each_rw_member(ca, c, dev_idx) {
-+		closure_wait_event(&c->freelist_wait, have_copygc_reserve(ca));
-+
-+		spin_lock(&ca->fs->freelist_lock);
-+		sectors_reserved += fifo_used(&ca->free[RESERVE_MOVINGGC]) * ca->mi.bucket_size;
-+		spin_unlock(&ca->fs->freelist_lock);
-+
-+		down_read(&ca->bucket_lock);
-+		buckets = bucket_array(ca);
-+
-+		for (b = buckets->first_bucket; b < buckets->nbuckets; b++) {
-+			struct bucket_mark m = READ_ONCE(buckets->b[b].mark);
-+			struct copygc_heap_entry e;
-+
-+			if (m.owned_by_allocator ||
-+			    m.data_type != BCH_DATA_user ||
-+			    !bucket_sectors_used(m) ||
-+			    bucket_sectors_used(m) >= ca->mi.bucket_size)
-+				continue;
-+
-+			e = (struct copygc_heap_entry) {
-+				.dev		= dev_idx,
-+				.gen		= m.gen,
-+				.fragmentation	= bucket_sectors_used(m) * (1U << 15)
-+					/ ca->mi.bucket_size,
-+				.sectors	= bucket_sectors_used(m),
-+				.offset		= bucket_to_sector(ca, b),
-+			};
-+			heap_add_or_replace(h, e, -fragmentation_cmp, NULL);
-+		}
-+		up_read(&ca->bucket_lock);
-+	}
-+
-+	if (!sectors_reserved) {
-+		bch2_fs_fatal_error(c, "stuck, ran out of copygc reserve!");
-+		return -1;
-+	}
-+
-+	for (i = h->data; i < h->data + h->used; i++)
-+		sectors_to_move += i->sectors;
-+
-+	while (sectors_to_move > sectors_reserved) {
-+		BUG_ON(!heap_pop(h, e, -fragmentation_cmp, NULL));
-+		sectors_to_move -= e.sectors;
-+	}
-+
-+	buckets_to_move = h->used;
-+
-+	if (!buckets_to_move)
-+		return 0;
-+
-+	eytzinger0_sort(h->data, h->used,
-+			sizeof(h->data[0]),
-+			bucket_offset_cmp, NULL);
-+
-+	ret = bch2_move_data(c, &c->copygc_pd.rate,
-+			     writepoint_ptr(&c->copygc_write_point),
-+			     POS_MIN, POS_MAX,
-+			     copygc_pred, NULL,
-+			     &move_stats);
-+
-+	for_each_rw_member(ca, c, dev_idx) {
-+		down_read(&ca->bucket_lock);
-+		buckets = bucket_array(ca);
-+		for (i = h->data; i < h->data + h->used; i++) {
-+			struct bucket_mark m;
-+			size_t b;
-+
-+			if (i->dev != dev_idx)
-+				continue;
-+
-+			b = sector_to_bucket(ca, i->offset);
-+			m = READ_ONCE(buckets->b[b].mark);
-+
-+			if (i->gen == m.gen &&
-+			    bucket_sectors_used(m)) {
-+				sectors_not_moved += bucket_sectors_used(m);
-+				buckets_not_moved++;
-+			}
-+		}
-+		up_read(&ca->bucket_lock);
-+	}
-+
-+	if (sectors_not_moved && !ret)
-+		bch_warn_ratelimited(c,
-+			"copygc finished but %llu/%llu sectors, %llu/%llu buckets not moved (move stats: moved %llu sectors, raced %llu keys, %llu sectors)",
-+			 sectors_not_moved, sectors_to_move,
-+			 buckets_not_moved, buckets_to_move,
-+			 atomic64_read(&move_stats.sectors_moved),
-+			 atomic64_read(&move_stats.keys_raced),
-+			 atomic64_read(&move_stats.sectors_raced));
-+
-+	trace_copygc(c,
-+		     atomic64_read(&move_stats.sectors_moved), sectors_not_moved,
-+		     buckets_to_move, buckets_not_moved);
-+	return 0;
-+}
-+
-+/*
-+ * Copygc runs when the amount of fragmented data is above some arbitrary
-+ * threshold:
-+ *
-+ * The threshold at the limit - when the device is full - is the amount of space
-+ * we reserved in bch2_recalc_capacity; we can't have more than that amount of
-+ * disk space stranded due to fragmentation and store everything we have
-+ * promised to store.
-+ *
-+ * But we don't want to be running copygc unnecessarily when the device still
-+ * has plenty of free space - rather, we want copygc to smoothly run every so
-+ * often and continually reduce the amount of fragmented space as the device
-+ * fills up. So, we increase the threshold by half the current free space.
-+ */
-+unsigned long bch2_copygc_wait_amount(struct bch_fs *c)
-+{
-+	struct bch_dev *ca;
-+	unsigned dev_idx;
-+	u64 fragmented_allowed = c->copygc_threshold;
-+	u64 fragmented = 0;
-+
-+	for_each_rw_member(ca, c, dev_idx) {
-+		struct bch_dev_usage usage = bch2_dev_usage_read(ca);
-+
-+		fragmented_allowed += ((__dev_buckets_available(ca, usage) *
-+					ca->mi.bucket_size) >> 1);
-+		fragmented += usage.sectors_fragmented;
-+	}
-+
-+	return max_t(s64, 0, fragmented_allowed - fragmented);
-+}
-+
-+static int bch2_copygc_thread(void *arg)
-+{
-+	struct bch_fs *c = arg;
-+	struct io_clock *clock = &c->io_clock[WRITE];
-+	unsigned long last, wait;
-+
-+	set_freezable();
-+
-+	while (!kthread_should_stop()) {
-+		if (kthread_wait_freezable(c->copy_gc_enabled))
-+			break;
-+
-+		last = atomic_long_read(&clock->now);
-+		wait = bch2_copygc_wait_amount(c);
-+
-+		if (wait > clock->max_slop) {
-+			bch2_kthread_io_clock_wait(clock, last + wait,
-+					MAX_SCHEDULE_TIMEOUT);
-+			continue;
-+		}
-+
-+		if (bch2_copygc(c))
-+			break;
-+	}
-+
-+	return 0;
-+}
-+
-+void bch2_copygc_stop(struct bch_fs *c)
-+{
-+	c->copygc_pd.rate.rate = UINT_MAX;
-+	bch2_ratelimit_reset(&c->copygc_pd.rate);
-+
-+	if (c->copygc_thread) {
-+		kthread_stop(c->copygc_thread);
-+		put_task_struct(c->copygc_thread);
-+	}
-+	c->copygc_thread = NULL;
-+}
-+
-+int bch2_copygc_start(struct bch_fs *c)
-+{
-+	struct task_struct *t;
-+
-+	if (c->copygc_thread)
-+		return 0;
-+
-+	if (c->opts.nochanges)
-+		return 0;
-+
-+	if (bch2_fs_init_fault("copygc_start"))
-+		return -ENOMEM;
-+
-+	t = kthread_create(bch2_copygc_thread, c, "bch_copygc");
-+	if (IS_ERR(t))
-+		return PTR_ERR(t);
-+
-+	get_task_struct(t);
-+
-+	c->copygc_thread = t;
-+	wake_up_process(c->copygc_thread);
-+
-+	return 0;
-+}
-+
-+void bch2_fs_copygc_init(struct bch_fs *c)
-+{
-+	bch2_pd_controller_init(&c->copygc_pd);
-+	c->copygc_pd.d_term = 0;
-+}
-diff --git a/fs/bcachefs/movinggc.h b/fs/bcachefs/movinggc.h
-new file mode 100644
-index 000000000000..922738247d03
---- /dev/null
-+++ b/fs/bcachefs/movinggc.h
-@@ -0,0 +1,9 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_MOVINGGC_H
-+#define _BCACHEFS_MOVINGGC_H
-+
-+void bch2_copygc_stop(struct bch_fs *);
-+int bch2_copygc_start(struct bch_fs *);
-+void bch2_fs_copygc_init(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_MOVINGGC_H */
-diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c
-new file mode 100644
-index 000000000000..afe25cd26c06
---- /dev/null
-+++ b/fs/bcachefs/opts.c
-@@ -0,0 +1,437 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include <linux/kernel.h>
-+
-+#include "bcachefs.h"
-+#include "compress.h"
-+#include "disk_groups.h"
-+#include "opts.h"
-+#include "super-io.h"
-+#include "util.h"
-+
-+const char * const bch2_error_actions[] = {
-+	"continue",
-+	"remount-ro",
-+	"panic",
-+	NULL
-+};
-+
-+const char * const bch2_sb_features[] = {
-+#define x(f, n) #f,
-+	BCH_SB_FEATURES()
-+#undef x
-+	NULL
-+};
-+
-+const char * const bch2_csum_opts[] = {
-+	"none",
-+	"crc32c",
-+	"crc64",
-+	NULL
-+};
-+
-+const char * const bch2_compression_opts[] = {
-+#define x(t, n) #t,
-+	BCH_COMPRESSION_OPTS()
-+#undef x
-+	NULL
-+};
-+
-+const char * const bch2_str_hash_types[] = {
-+	"crc32c",
-+	"crc64",
-+	"siphash",
-+	NULL
-+};
-+
-+const char * const bch2_data_types[] = {
-+#define x(t, n) #t,
-+	BCH_DATA_TYPES()
-+#undef x
-+	NULL
-+};
-+
-+const char * const bch2_cache_replacement_policies[] = {
-+	"lru",
-+	"fifo",
-+	"random",
-+	NULL
-+};
-+
-+/* Default is -1; we skip past it for struct cached_dev's cache mode */
-+const char * const bch2_cache_modes[] = {
-+	"default",
-+	"writethrough",
-+	"writeback",
-+	"writearound",
-+	"none",
-+	NULL
-+};
-+
-+const char * const bch2_dev_state[] = {
-+	"readwrite",
-+	"readonly",
-+	"failed",
-+	"spare",
-+	NULL
-+};
-+
-+void bch2_opts_apply(struct bch_opts *dst, struct bch_opts src)
-+{
-+#define x(_name, ...)						\
-+	if (opt_defined(src, _name))					\
-+		opt_set(*dst, _name, src._name);
-+
-+	BCH_OPTS()
-+#undef x
-+}
-+
-+bool bch2_opt_defined_by_id(const struct bch_opts *opts, enum bch_opt_id id)
-+{
-+	switch (id) {
-+#define x(_name, ...)						\
-+	case Opt_##_name:						\
-+		return opt_defined(*opts, _name);
-+	BCH_OPTS()
-+#undef x
-+	default:
-+		BUG();
-+	}
-+}
-+
-+u64 bch2_opt_get_by_id(const struct bch_opts *opts, enum bch_opt_id id)
-+{
-+	switch (id) {
-+#define x(_name, ...)						\
-+	case Opt_##_name:						\
-+		return opts->_name;
-+	BCH_OPTS()
-+#undef x
-+	default:
-+		BUG();
-+	}
-+}
-+
-+void bch2_opt_set_by_id(struct bch_opts *opts, enum bch_opt_id id, u64 v)
-+{
-+	switch (id) {
-+#define x(_name, ...)						\
-+	case Opt_##_name:						\
-+		opt_set(*opts, _name, v);				\
-+		break;
-+	BCH_OPTS()
-+#undef x
-+	default:
-+		BUG();
-+	}
-+}
-+
-+/*
-+ * Initial options from superblock - here we don't want any options undefined,
-+ * any options the superblock doesn't specify are set to 0:
-+ */
-+struct bch_opts bch2_opts_from_sb(struct bch_sb *sb)
-+{
-+	struct bch_opts opts = bch2_opts_empty();
-+
-+#define x(_name, _bits, _mode, _type, _sb_opt, ...)			\
-+	if (_sb_opt != NO_SB_OPT)					\
-+		opt_set(opts, _name, _sb_opt(sb));
-+	BCH_OPTS()
-+#undef x
-+
-+	return opts;
-+}
-+
-+const struct bch_option bch2_opt_table[] = {
-+#define OPT_BOOL()		.type = BCH_OPT_BOOL
-+#define OPT_UINT(_min, _max)	.type = BCH_OPT_UINT, .min = _min, .max = _max
-+#define OPT_SECTORS(_min, _max)	.type = BCH_OPT_SECTORS, .min = _min, .max = _max
-+#define OPT_STR(_choices)	.type = BCH_OPT_STR, .choices = _choices
-+#define OPT_FN(_fn)		.type = BCH_OPT_FN,			\
-+				.parse = _fn##_parse,			\
-+				.to_text = _fn##_to_text
-+
-+#define x(_name, _bits, _mode, _type, _sb_opt, _default, _hint, _help)	\
-+	[Opt_##_name] = {						\
-+		.attr	= {						\
-+			.name	= #_name,				\
-+			.mode = (_mode) & OPT_RUNTIME ? 0644 : 0444,	\
-+		},							\
-+		.mode	= _mode,					\
-+		.hint	= _hint,					\
-+		.help	= _help,					\
-+		.set_sb	= SET_##_sb_opt,				\
-+		_type							\
-+	},
-+
-+	BCH_OPTS()
-+#undef x
-+};
-+
-+int bch2_opt_lookup(const char *name)
-+{
-+	const struct bch_option *i;
-+
-+	for (i = bch2_opt_table;
-+	     i < bch2_opt_table + ARRAY_SIZE(bch2_opt_table);
-+	     i++)
-+		if (!strcmp(name, i->attr.name))
-+			return i - bch2_opt_table;
-+
-+	return -1;
-+}
-+
-+struct synonym {
-+	const char	*s1, *s2;
-+};
-+
-+static const struct synonym bch_opt_synonyms[] = {
-+	{ "quota",	"usrquota" },
-+};
-+
-+static int bch2_mount_opt_lookup(const char *name)
-+{
-+	const struct synonym *i;
-+
-+	for (i = bch_opt_synonyms;
-+	     i < bch_opt_synonyms + ARRAY_SIZE(bch_opt_synonyms);
-+	     i++)
-+		if (!strcmp(name, i->s1))
-+			name = i->s2;
-+
-+	return bch2_opt_lookup(name);
-+}
-+
-+int bch2_opt_parse(struct bch_fs *c, const struct bch_option *opt,
-+		   const char *val, u64 *res)
-+{
-+	ssize_t ret;
-+
-+	switch (opt->type) {
-+	case BCH_OPT_BOOL:
-+		ret = kstrtou64(val, 10, res);
-+		if (ret < 0)
-+			return ret;
-+
-+		if (*res > 1)
-+			return -ERANGE;
-+		break;
-+	case BCH_OPT_UINT:
-+		ret = kstrtou64(val, 10, res);
-+		if (ret < 0)
-+			return ret;
-+
-+		if (*res < opt->min || *res >= opt->max)
-+			return -ERANGE;
-+		break;
-+	case BCH_OPT_SECTORS:
-+		ret = bch2_strtou64_h(val, res);
-+		if (ret < 0)
-+			return ret;
-+
-+		if (*res & 511)
-+			return -EINVAL;
-+
-+		*res >>= 9;
-+
-+		if (*res < opt->min || *res >= opt->max)
-+			return -ERANGE;
-+		break;
-+	case BCH_OPT_STR:
-+		ret = match_string(opt->choices, -1, val);
-+		if (ret < 0)
-+			return ret;
-+
-+		*res = ret;
-+		break;
-+	case BCH_OPT_FN:
-+		if (!c)
-+			return -EINVAL;
-+
-+		return opt->parse(c, val, res);
-+	}
-+
-+	return 0;
-+}
-+
-+void bch2_opt_to_text(struct printbuf *out, struct bch_fs *c,
-+		      const struct bch_option *opt, u64 v,
-+		      unsigned flags)
-+{
-+	if (flags & OPT_SHOW_MOUNT_STYLE) {
-+		if (opt->type == BCH_OPT_BOOL) {
-+			pr_buf(out, "%s%s",
-+			       v ? "" : "no",
-+			       opt->attr.name);
-+			return;
-+		}
-+
-+		pr_buf(out, "%s=", opt->attr.name);
-+	}
-+
-+	switch (opt->type) {
-+	case BCH_OPT_BOOL:
-+	case BCH_OPT_UINT:
-+		pr_buf(out, "%lli", v);
-+		break;
-+	case BCH_OPT_SECTORS:
-+		bch2_hprint(out, v);
-+		break;
-+	case BCH_OPT_STR:
-+		if (flags & OPT_SHOW_FULL_LIST)
-+			bch2_string_opt_to_text(out, opt->choices, v);
-+		else
-+			pr_buf(out, opt->choices[v]);
-+		break;
-+	case BCH_OPT_FN:
-+		opt->to_text(out, c, v);
-+		break;
-+	default:
-+		BUG();
-+	}
-+}
-+
-+int bch2_opt_check_may_set(struct bch_fs *c, int id, u64 v)
-+{
-+	int ret = 0;
-+
-+	switch (id) {
-+	case Opt_compression:
-+	case Opt_background_compression:
-+		ret = bch2_check_set_has_compressed_data(c, v);
-+		break;
-+	case Opt_erasure_code:
-+		if (v)
-+			bch2_check_set_feature(c, BCH_FEATURE_ec);
-+		break;
-+	}
-+
-+	return ret;
-+}
-+
-+int bch2_opts_check_may_set(struct bch_fs *c)
-+{
-+	unsigned i;
-+	int ret;
-+
-+	for (i = 0; i < bch2_opts_nr; i++) {
-+		ret = bch2_opt_check_may_set(c, i,
-+				bch2_opt_get_by_id(&c->opts, i));
-+		if (ret)
-+			return ret;
-+	}
-+
-+	return 0;
-+}
-+
-+int bch2_parse_mount_opts(struct bch_opts *opts, char *options)
-+{
-+	char *opt, *name, *val;
-+	int ret, id;
-+	u64 v;
-+
-+	while ((opt = strsep(&options, ",")) != NULL) {
-+		name	= strsep(&opt, "=");
-+		val	= opt;
-+
-+		if (val) {
-+			id = bch2_mount_opt_lookup(name);
-+			if (id < 0)
-+				goto bad_opt;
-+
-+			ret = bch2_opt_parse(NULL, &bch2_opt_table[id], val, &v);
-+			if (ret < 0)
-+				goto bad_val;
-+		} else {
-+			id = bch2_mount_opt_lookup(name);
-+			v = 1;
-+
-+			if (id < 0 &&
-+			    !strncmp("no", name, 2)) {
-+				id = bch2_mount_opt_lookup(name + 2);
-+				v = 0;
-+			}
-+
-+			if (id < 0)
-+				goto bad_opt;
-+
-+			if (bch2_opt_table[id].type != BCH_OPT_BOOL)
-+				goto no_val;
-+		}
-+
-+		if (!(bch2_opt_table[id].mode & OPT_MOUNT))
-+			goto bad_opt;
-+
-+		if (id == Opt_acl &&
-+		    !IS_ENABLED(CONFIG_BCACHEFS_POSIX_ACL))
-+			goto bad_opt;
-+
-+		if ((id == Opt_usrquota ||
-+		     id == Opt_grpquota) &&
-+		    !IS_ENABLED(CONFIG_BCACHEFS_QUOTA))
-+			goto bad_opt;
-+
-+		bch2_opt_set_by_id(opts, id, v);
-+	}
-+
-+	return 0;
-+bad_opt:
-+	pr_err("Bad mount option %s", name);
-+	return -1;
-+bad_val:
-+	pr_err("Invalid value %s for mount option %s", val, name);
-+	return -1;
-+no_val:
-+	pr_err("Mount option %s requires a value", name);
-+	return -1;
-+}
-+
-+/* io opts: */
-+
-+struct bch_io_opts bch2_opts_to_inode_opts(struct bch_opts src)
-+{
-+	struct bch_io_opts ret = { 0 };
-+#define x(_name, _bits)					\
-+	if (opt_defined(src, _name))					\
-+		opt_set(ret, _name, src._name);
-+	BCH_INODE_OPTS()
-+#undef x
-+	return ret;
-+}
-+
-+struct bch_opts bch2_inode_opts_to_opts(struct bch_io_opts src)
-+{
-+	struct bch_opts ret = { 0 };
-+#define x(_name, _bits)					\
-+	if (opt_defined(src, _name))					\
-+		opt_set(ret, _name, src._name);
-+	BCH_INODE_OPTS()
-+#undef x
-+	return ret;
-+}
-+
-+void bch2_io_opts_apply(struct bch_io_opts *dst, struct bch_io_opts src)
-+{
-+#define x(_name, _bits)					\
-+	if (opt_defined(src, _name))					\
-+		opt_set(*dst, _name, src._name);
-+	BCH_INODE_OPTS()
-+#undef x
-+}
-+
-+bool bch2_opt_is_inode_opt(enum bch_opt_id id)
-+{
-+	static const enum bch_opt_id inode_opt_list[] = {
-+#define x(_name, _bits)	Opt_##_name,
-+	BCH_INODE_OPTS()
-+#undef x
-+	};
-+	unsigned i;
-+
-+	for (i = 0; i < ARRAY_SIZE(inode_opt_list); i++)
-+		if (inode_opt_list[i] == id)
-+			return true;
-+
-+	return false;
-+}
-diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h
-new file mode 100644
-index 000000000000..014c608ca0c6
---- /dev/null
-+++ b/fs/bcachefs/opts.h
-@@ -0,0 +1,440 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_OPTS_H
-+#define _BCACHEFS_OPTS_H
-+
-+#include <linux/bug.h>
-+#include <linux/log2.h>
-+#include <linux/string.h>
-+#include <linux/sysfs.h>
-+#include "bcachefs_format.h"
-+
-+extern const char * const bch2_error_actions[];
-+extern const char * const bch2_sb_features[];
-+extern const char * const bch2_csum_opts[];
-+extern const char * const bch2_compression_opts[];
-+extern const char * const bch2_str_hash_types[];
-+extern const char * const bch2_data_types[];
-+extern const char * const bch2_cache_replacement_policies[];
-+extern const char * const bch2_cache_modes[];
-+extern const char * const bch2_dev_state[];
-+
-+/*
-+ * Mount options; we also store defaults in the superblock.
-+ *
-+ * Also exposed via sysfs: if an option is writeable, and it's also stored in
-+ * the superblock, changing it via sysfs (currently? might change this) also
-+ * updates the superblock.
-+ *
-+ * We store options as signed integers, where -1 means undefined. This means we
-+ * can pass the mount options to bch2_fs_alloc() as a whole struct, and then only
-+ * apply the options from that struct that are defined.
-+ */
-+
-+/* dummy option, for options that aren't stored in the superblock */
-+LE64_BITMASK(NO_SB_OPT,		struct bch_sb, flags[0], 0, 0);
-+
-+/* When can be set: */
-+enum opt_mode {
-+	OPT_FORMAT	= (1 << 0),
-+	OPT_MOUNT	= (1 << 1),
-+	OPT_RUNTIME	= (1 << 2),
-+	OPT_INODE	= (1 << 3),
-+	OPT_DEVICE	= (1 << 4),
-+};
-+
-+enum opt_type {
-+	BCH_OPT_BOOL,
-+	BCH_OPT_UINT,
-+	BCH_OPT_SECTORS,
-+	BCH_OPT_STR,
-+	BCH_OPT_FN,
-+};
-+
-+/**
-+ * x(name, shortopt, type, in mem type, mode, sb_opt)
-+ *
-+ * @name	- name of mount option, sysfs attribute, and struct bch_opts
-+ *		  member
-+ *
-+ * @mode	- when opt may be set
-+ *
-+ * @sb_option	- name of corresponding superblock option
-+ *
-+ * @type	- one of OPT_BOOL, OPT_UINT, OPT_STR
-+ */
-+
-+/*
-+ * XXX: add fields for
-+ *  - default value
-+ *  - helptext
-+ */
-+
-+#ifdef __KERNEL__
-+#define RATELIMIT_ERRORS true
-+#else
-+#define RATELIMIT_ERRORS false
-+#endif
-+
-+#define BCH_OPTS()							\
-+	x(block_size,			u16,				\
-+	  OPT_FORMAT,							\
-+	  OPT_SECTORS(1, 128),						\
-+	  BCH_SB_BLOCK_SIZE,		8,				\
-+	  "size",	NULL)						\
-+	x(btree_node_size,		u16,				\
-+	  OPT_FORMAT,							\
-+	  OPT_SECTORS(1, 512),						\
-+	  BCH_SB_BTREE_NODE_SIZE,	512,				\
-+	  "size",	"Btree node size, default 256k")		\
-+	x(errors,			u8,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME,				\
-+	  OPT_STR(bch2_error_actions),					\
-+	  BCH_SB_ERROR_ACTION,		BCH_ON_ERROR_RO,		\
-+	  NULL,		"Action to take on filesystem error")		\
-+	x(metadata_replicas,		u8,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME,				\
-+	  OPT_UINT(1, BCH_REPLICAS_MAX),				\
-+	  BCH_SB_META_REPLICAS_WANT,	1,				\
-+	  "#",		"Number of metadata replicas")			\
-+	x(data_replicas,		u8,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE,			\
-+	  OPT_UINT(1, BCH_REPLICAS_MAX),				\
-+	  BCH_SB_DATA_REPLICAS_WANT,	1,				\
-+	  "#",		"Number of data replicas")			\
-+	x(metadata_replicas_required, u8,				\
-+	  OPT_FORMAT|OPT_MOUNT,						\
-+	  OPT_UINT(1, BCH_REPLICAS_MAX),				\
-+	  BCH_SB_META_REPLICAS_REQ,	1,				\
-+	  "#",		NULL)						\
-+	x(data_replicas_required,	u8,				\
-+	  OPT_FORMAT|OPT_MOUNT,						\
-+	  OPT_UINT(1, BCH_REPLICAS_MAX),				\
-+	  BCH_SB_DATA_REPLICAS_REQ,	1,				\
-+	  "#",		NULL)						\
-+	x(metadata_checksum,		u8,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME,				\
-+	  OPT_STR(bch2_csum_opts),					\
-+	  BCH_SB_META_CSUM_TYPE,	BCH_CSUM_OPT_CRC32C,		\
-+	  NULL,		NULL)						\
-+	x(data_checksum,		u8,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE,			\
-+	  OPT_STR(bch2_csum_opts),					\
-+	  BCH_SB_DATA_CSUM_TYPE,	BCH_CSUM_OPT_CRC32C,		\
-+	  NULL,		NULL)						\
-+	x(compression,			u8,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE,			\
-+	  OPT_STR(bch2_compression_opts),				\
-+	  BCH_SB_COMPRESSION_TYPE,	BCH_COMPRESSION_OPT_none,	\
-+	  NULL,		NULL)						\
-+	x(background_compression,	u8,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE,			\
-+	  OPT_STR(bch2_compression_opts),				\
-+	  BCH_SB_BACKGROUND_COMPRESSION_TYPE,BCH_COMPRESSION_OPT_none,	\
-+	  NULL,		NULL)						\
-+	x(str_hash,			u8,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME,				\
-+	  OPT_STR(bch2_str_hash_types),					\
-+	  BCH_SB_STR_HASH_TYPE,		BCH_STR_HASH_OPT_SIPHASH,	\
-+	  NULL,		"Hash function for directory entries and xattrs")\
-+	x(foreground_target,		u16,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE,			\
-+	  OPT_FN(bch2_opt_target),					\
-+	  BCH_SB_FOREGROUND_TARGET,	0,				\
-+	  "(target)",	"Device or disk group for foreground writes")	\
-+	x(background_target,		u16,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE,			\
-+	  OPT_FN(bch2_opt_target),					\
-+	  BCH_SB_BACKGROUND_TARGET,	0,				\
-+	  "(target)",	"Device or disk group to move data to in the background")\
-+	x(promote_target,		u16,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE,			\
-+	  OPT_FN(bch2_opt_target),					\
-+	  BCH_SB_PROMOTE_TARGET,	0,				\
-+	  "(target)",	"Device or disk group to promote data to on read")\
-+	x(erasure_code,			u16,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE,			\
-+	  OPT_BOOL(),							\
-+	  BCH_SB_ERASURE_CODE,		false,				\
-+	  NULL,		"Enable erasure coding (DO NOT USE YET)")	\
-+	x(inodes_32bit,			u8,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME,				\
-+	  OPT_BOOL(),							\
-+	  BCH_SB_INODE_32BIT,		false,				\
-+	  NULL,		"Constrain inode numbers to 32 bits")		\
-+	x(gc_reserve_percent,		u8,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME,				\
-+	  OPT_UINT(5, 21),						\
-+	  BCH_SB_GC_RESERVE,		8,				\
-+	  "%",		"Percentage of disk space to reserve for copygc")\
-+	x(gc_reserve_bytes,		u64,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME,				\
-+	  OPT_SECTORS(0, U64_MAX),					\
-+	  BCH_SB_GC_RESERVE_BYTES,	0,				\
-+	  "%",		"Amount of disk space to reserve for copygc\n"	\
-+			"Takes precedence over gc_reserve_percent if set")\
-+	x(root_reserve_percent,		u8,				\
-+	  OPT_FORMAT|OPT_MOUNT,						\
-+	  OPT_UINT(0, 100),						\
-+	  BCH_SB_ROOT_RESERVE,		0,				\
-+	  "%",		"Percentage of disk space to reserve for superuser")\
-+	x(wide_macs,			u8,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME,				\
-+	  OPT_BOOL(),							\
-+	  BCH_SB_128_BIT_MACS,		false,				\
-+	  NULL,		"Store full 128 bits of cryptographic MACs, instead of 80")\
-+	x(inline_data,			u8,				\
-+	  OPT_MOUNT|OPT_RUNTIME,					\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Enable inline data extents")			\
-+	x(acl,				u8,				\
-+	  OPT_FORMAT|OPT_MOUNT,						\
-+	  OPT_BOOL(),							\
-+	  BCH_SB_POSIX_ACL,		true,				\
-+	  NULL,		"Enable POSIX acls")				\
-+	x(usrquota,			u8,				\
-+	  OPT_FORMAT|OPT_MOUNT,						\
-+	  OPT_BOOL(),							\
-+	  BCH_SB_USRQUOTA,		false,				\
-+	  NULL,		"Enable user quotas")				\
-+	x(grpquota,			u8,				\
-+	  OPT_FORMAT|OPT_MOUNT,						\
-+	  OPT_BOOL(),							\
-+	  BCH_SB_GRPQUOTA,		false,				\
-+	  NULL,		"Enable group quotas")				\
-+	x(prjquota,			u8,				\
-+	  OPT_FORMAT|OPT_MOUNT,						\
-+	  OPT_BOOL(),							\
-+	  BCH_SB_PRJQUOTA,		false,				\
-+	  NULL,		"Enable project quotas")			\
-+	x(reflink,			u8,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME,				\
-+	  OPT_BOOL(),							\
-+	  BCH_SB_REFLINK,		true,				\
-+	  NULL,		"Enable reflink support")			\
-+	x(degraded,			u8,				\
-+	  OPT_MOUNT,							\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Allow mounting in degraded mode")		\
-+	x(discard,			u8,				\
-+	  OPT_MOUNT|OPT_DEVICE,						\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Enable discard/TRIM support")			\
-+	x(verbose,			u8,				\
-+	  OPT_MOUNT,							\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Extra debugging information during mount/recovery")\
-+	x(journal_flush_disabled,	u8,				\
-+	  OPT_MOUNT|OPT_RUNTIME,					\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Disable journal flush on sync/fsync\n"		\
-+			"If enabled, writes can be lost, but only since the\n"\
-+			"last journal write (default 1 second)")	\
-+	x(fsck,				u8,				\
-+	  OPT_MOUNT,							\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Run fsck on mount")				\
-+	x(fix_errors,			u8,				\
-+	  OPT_MOUNT,							\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Fix errors during fsck without asking")	\
-+	x(ratelimit_errors,		u8,				\
-+	  OPT_MOUNT,							\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			RATELIMIT_ERRORS,		\
-+	  NULL,		"Ratelimit error messages during fsck")		\
-+	x(nochanges,			u8,				\
-+	  OPT_MOUNT,							\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Super read only mode - no writes at all will be issued,\n"\
-+			"even if we have to replay the journal")	\
-+	x(norecovery,			u8,				\
-+	  OPT_MOUNT,							\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Don't replay the journal")			\
-+	x(rebuild_replicas,		u8,				\
-+	  OPT_MOUNT,							\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Rebuild the superblock replicas section")	\
-+	x(keep_journal,			u8,				\
-+	  OPT_MOUNT,							\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Don't free journal entries/keys after startup")\
-+	x(read_entire_journal,		u8,				\
-+	  0,								\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Read all journal entries, not just dirty ones")\
-+	x(noexcl,			u8,				\
-+	  OPT_MOUNT,							\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Don't open device in exclusive mode")		\
-+	x(sb,				u64,				\
-+	  OPT_MOUNT,							\
-+	  OPT_UINT(0, S64_MAX),						\
-+	  NO_SB_OPT,			BCH_SB_SECTOR,			\
-+	  "offset",	"Sector offset of superblock")			\
-+	x(read_only,			u8,				\
-+	  0,								\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		NULL)						\
-+	x(nostart,			u8,				\
-+	  0,								\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Don\'t start filesystem, only open devices")	\
-+	x(reconstruct_alloc,		u8,				\
-+	  OPT_MOUNT,							\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Reconstruct alloc btree")			\
-+	x(version_upgrade,		u8,				\
-+	  OPT_MOUNT,							\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Set superblock to latest version,\n"		\
-+			"allowing any new features to be used")		\
-+	x(project,			u8,				\
-+	  OPT_INODE,							\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		NULL)						\
-+	x(fs_size,			u64,				\
-+	  OPT_DEVICE,							\
-+	  OPT_SECTORS(0, S64_MAX),					\
-+	  NO_SB_OPT,			0,				\
-+	  "size",	"Size of filesystem on device")			\
-+	x(bucket,			u32,				\
-+	  OPT_DEVICE,							\
-+	  OPT_SECTORS(0, S64_MAX),					\
-+	  NO_SB_OPT,			0,				\
-+	  "size",	"Size of filesystem on device")			\
-+	x(durability,			u8,				\
-+	  OPT_DEVICE,							\
-+	  OPT_UINT(0, BCH_REPLICAS_MAX),				\
-+	  NO_SB_OPT,			1,				\
-+	  "n",		"Data written to this device will be considered\n"\
-+			"to have already been replicated n times")
-+
-+struct bch_opts {
-+#define x(_name, _bits, ...)	unsigned _name##_defined:1;
-+	BCH_OPTS()
-+#undef x
-+
-+#define x(_name, _bits, ...)	_bits	_name;
-+	BCH_OPTS()
-+#undef x
-+};
-+
-+static const struct bch_opts bch2_opts_default = {
-+#define x(_name, _bits, _mode, _type, _sb_opt, _default, ...)		\
-+	._name##_defined = true,					\
-+	._name = _default,						\
-+
-+	BCH_OPTS()
-+#undef x
-+};
-+
-+#define opt_defined(_opts, _name)	((_opts)._name##_defined)
-+
-+#define opt_get(_opts, _name)						\
-+	(opt_defined(_opts, _name) ? (_opts)._name : bch2_opts_default._name)
-+
-+#define opt_set(_opts, _name, _v)					\
-+do {									\
-+	(_opts)._name##_defined = true;					\
-+	(_opts)._name = _v;						\
-+} while (0)
-+
-+static inline struct bch_opts bch2_opts_empty(void)
-+{
-+	return (struct bch_opts) { 0 };
-+}
-+
-+void bch2_opts_apply(struct bch_opts *, struct bch_opts);
-+
-+enum bch_opt_id {
-+#define x(_name, ...)	Opt_##_name,
-+	BCH_OPTS()
-+#undef x
-+	bch2_opts_nr
-+};
-+
-+struct bch_fs;
-+struct printbuf;
-+
-+struct bch_option {
-+	struct attribute	attr;
-+	void			(*set_sb)(struct bch_sb *, u64);
-+	enum opt_mode		mode;
-+	enum opt_type		type;
-+
-+	union {
-+	struct {
-+		u64		min, max;
-+	};
-+	struct {
-+		const char * const *choices;
-+	};
-+	struct {
-+		int (*parse)(struct bch_fs *, const char *, u64 *);
-+		void (*to_text)(struct printbuf *, struct bch_fs *, u64);
-+	};
-+	};
-+
-+	const char		*hint;
-+	const char		*help;
-+
-+};
-+
-+extern const struct bch_option bch2_opt_table[];
-+
-+bool bch2_opt_defined_by_id(const struct bch_opts *, enum bch_opt_id);
-+u64 bch2_opt_get_by_id(const struct bch_opts *, enum bch_opt_id);
-+void bch2_opt_set_by_id(struct bch_opts *, enum bch_opt_id, u64);
-+
-+struct bch_opts bch2_opts_from_sb(struct bch_sb *);
-+
-+int bch2_opt_lookup(const char *);
-+int bch2_opt_parse(struct bch_fs *, const struct bch_option *, const char *, u64 *);
-+
-+#define OPT_SHOW_FULL_LIST	(1 << 0)
-+#define OPT_SHOW_MOUNT_STYLE	(1 << 1)
-+
-+void bch2_opt_to_text(struct printbuf *, struct bch_fs *,
-+		      const struct bch_option *, u64, unsigned);
-+
-+int bch2_opt_check_may_set(struct bch_fs *, int, u64);
-+int bch2_opts_check_may_set(struct bch_fs *);
-+int bch2_parse_mount_opts(struct bch_opts *, char *);
-+
-+/* inode opts: */
-+
-+struct bch_io_opts {
-+#define x(_name, _bits)	unsigned _name##_defined:1;
-+	BCH_INODE_OPTS()
-+#undef x
-+
-+#define x(_name, _bits)	u##_bits _name;
-+	BCH_INODE_OPTS()
-+#undef x
-+};
-+
-+struct bch_io_opts bch2_opts_to_inode_opts(struct bch_opts);
-+struct bch_opts bch2_inode_opts_to_opts(struct bch_io_opts);
-+void bch2_io_opts_apply(struct bch_io_opts *, struct bch_io_opts);
-+bool bch2_opt_is_inode_opt(enum bch_opt_id);
-+
-+#endif /* _BCACHEFS_OPTS_H */
-diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c
-new file mode 100644
-index 000000000000..d3032a46e7f3
---- /dev/null
-+++ b/fs/bcachefs/quota.c
-@@ -0,0 +1,783 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "btree_update.h"
-+#include "inode.h"
-+#include "quota.h"
-+#include "super-io.h"
-+
-+static const char *bch2_sb_validate_quota(struct bch_sb *sb,
-+					  struct bch_sb_field *f)
-+{
-+	struct bch_sb_field_quota *q = field_to_type(f, quota);
-+
-+	if (vstruct_bytes(&q->field) != sizeof(*q))
-+		return "invalid field quota: wrong size";
-+
-+	return NULL;
-+}
-+
-+const struct bch_sb_field_ops bch_sb_field_ops_quota = {
-+	.validate	= bch2_sb_validate_quota,
-+};
-+
-+const char *bch2_quota_invalid(const struct bch_fs *c, struct bkey_s_c k)
-+{
-+	if (k.k->p.inode >= QTYP_NR)
-+		return "invalid quota type";
-+
-+	if (bkey_val_bytes(k.k) != sizeof(struct bch_quota))
-+		return "incorrect value size";
-+
-+	return NULL;
-+}
-+
-+static const char * const bch2_quota_counters[] = {
-+	"space",
-+	"inodes",
-+};
-+
-+void bch2_quota_to_text(struct printbuf *out, struct bch_fs *c,
-+			struct bkey_s_c k)
-+{
-+	struct bkey_s_c_quota dq = bkey_s_c_to_quota(k);
-+	unsigned i;
-+
-+	for (i = 0; i < Q_COUNTERS; i++)
-+		pr_buf(out, "%s hardlimit %llu softlimit %llu",
-+		       bch2_quota_counters[i],
-+		       le64_to_cpu(dq.v->c[i].hardlimit),
-+		       le64_to_cpu(dq.v->c[i].softlimit));
-+}
-+
-+#ifdef CONFIG_BCACHEFS_QUOTA
-+
-+#include <linux/cred.h>
-+#include <linux/fs.h>
-+#include <linux/quota.h>
-+
-+static inline unsigned __next_qtype(unsigned i, unsigned qtypes)
-+{
-+	qtypes >>= i;
-+	return qtypes ? i + __ffs(qtypes) : QTYP_NR;
-+}
-+
-+#define for_each_set_qtype(_c, _i, _q, _qtypes)				\
-+	for (_i = 0;							\
-+	     (_i = __next_qtype(_i, _qtypes),				\
-+	      _q = &(_c)->quotas[_i],					\
-+	      _i < QTYP_NR);						\
-+	     _i++)
-+
-+static bool ignore_hardlimit(struct bch_memquota_type *q)
-+{
-+	if (capable(CAP_SYS_RESOURCE))
-+		return true;
-+#if 0
-+	struct mem_dqinfo *info = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_id.type];
-+
-+	return capable(CAP_SYS_RESOURCE) &&
-+	       (info->dqi_format->qf_fmt_id != QFMT_VFS_OLD ||
-+		!(info->dqi_flags & DQF_ROOT_SQUASH));
-+#endif
-+	return false;
-+}
-+
-+enum quota_msg {
-+	SOFTWARN,	/* Softlimit reached */
-+	SOFTLONGWARN,	/* Grace time expired */
-+	HARDWARN,	/* Hardlimit reached */
-+
-+	HARDBELOW,	/* Usage got below inode hardlimit */
-+	SOFTBELOW,	/* Usage got below inode softlimit */
-+};
-+
-+static int quota_nl[][Q_COUNTERS] = {
-+	[HARDWARN][Q_SPC]	= QUOTA_NL_BHARDWARN,
-+	[SOFTLONGWARN][Q_SPC]	= QUOTA_NL_BSOFTLONGWARN,
-+	[SOFTWARN][Q_SPC]	= QUOTA_NL_BSOFTWARN,
-+	[HARDBELOW][Q_SPC]	= QUOTA_NL_BHARDBELOW,
-+	[SOFTBELOW][Q_SPC]	= QUOTA_NL_BSOFTBELOW,
-+
-+	[HARDWARN][Q_INO]	= QUOTA_NL_IHARDWARN,
-+	[SOFTLONGWARN][Q_INO]	= QUOTA_NL_ISOFTLONGWARN,
-+	[SOFTWARN][Q_INO]	= QUOTA_NL_ISOFTWARN,
-+	[HARDBELOW][Q_INO]	= QUOTA_NL_IHARDBELOW,
-+	[SOFTBELOW][Q_INO]	= QUOTA_NL_ISOFTBELOW,
-+};
-+
-+struct quota_msgs {
-+	u8		nr;
-+	struct {
-+		u8	qtype;
-+		u8	msg;
-+	}		m[QTYP_NR * Q_COUNTERS];
-+};
-+
-+static void prepare_msg(unsigned qtype,
-+			enum quota_counters counter,
-+			struct quota_msgs *msgs,
-+			enum quota_msg msg_type)
-+{
-+	BUG_ON(msgs->nr >= ARRAY_SIZE(msgs->m));
-+
-+	msgs->m[msgs->nr].qtype	= qtype;
-+	msgs->m[msgs->nr].msg	= quota_nl[msg_type][counter];
-+	msgs->nr++;
-+}
-+
-+static void prepare_warning(struct memquota_counter *qc,
-+			    unsigned qtype,
-+			    enum quota_counters counter,
-+			    struct quota_msgs *msgs,
-+			    enum quota_msg msg_type)
-+{
-+	if (qc->warning_issued & (1 << msg_type))
-+		return;
-+
-+	prepare_msg(qtype, counter, msgs, msg_type);
-+}
-+
-+static void flush_warnings(struct bch_qid qid,
-+			   struct super_block *sb,
-+			   struct quota_msgs *msgs)
-+{
-+	unsigned i;
-+
-+	for (i = 0; i < msgs->nr; i++)
-+		quota_send_warning(make_kqid(&init_user_ns, msgs->m[i].qtype, qid.q[i]),
-+				   sb->s_dev, msgs->m[i].msg);
-+}
-+
-+static int bch2_quota_check_limit(struct bch_fs *c,
-+				  unsigned qtype,
-+				  struct bch_memquota *mq,
-+				  struct quota_msgs *msgs,
-+				  enum quota_counters counter,
-+				  s64 v,
-+				  enum quota_acct_mode mode)
-+{
-+	struct bch_memquota_type *q = &c->quotas[qtype];
-+	struct memquota_counter *qc = &mq->c[counter];
-+	u64 n = qc->v + v;
-+
-+	BUG_ON((s64) n < 0);
-+
-+	if (mode == KEY_TYPE_QUOTA_NOCHECK)
-+		return 0;
-+
-+	if (v <= 0) {
-+		if (n < qc->hardlimit &&
-+		    (qc->warning_issued & (1 << HARDWARN))) {
-+			qc->warning_issued &= ~(1 << HARDWARN);
-+			prepare_msg(qtype, counter, msgs, HARDBELOW);
-+		}
-+
-+		if (n < qc->softlimit &&
-+		    (qc->warning_issued & (1 << SOFTWARN))) {
-+			qc->warning_issued &= ~(1 << SOFTWARN);
-+			prepare_msg(qtype, counter, msgs, SOFTBELOW);
-+		}
-+
-+		qc->warning_issued = 0;
-+		return 0;
-+	}
-+
-+	if (qc->hardlimit &&
-+	    qc->hardlimit < n &&
-+	    !ignore_hardlimit(q)) {
-+		if (mode == KEY_TYPE_QUOTA_PREALLOC)
-+			return -EDQUOT;
-+
-+		prepare_warning(qc, qtype, counter, msgs, HARDWARN);
-+	}
-+
-+	if (qc->softlimit &&
-+	    qc->softlimit < n &&
-+	    qc->timer &&
-+	    ktime_get_real_seconds() >= qc->timer &&
-+	    !ignore_hardlimit(q)) {
-+		if (mode == KEY_TYPE_QUOTA_PREALLOC)
-+			return -EDQUOT;
-+
-+		prepare_warning(qc, qtype, counter, msgs, SOFTLONGWARN);
-+	}
-+
-+	if (qc->softlimit &&
-+	    qc->softlimit < n &&
-+	    qc->timer == 0) {
-+		if (mode == KEY_TYPE_QUOTA_PREALLOC)
-+			return -EDQUOT;
-+
-+		prepare_warning(qc, qtype, counter, msgs, SOFTWARN);
-+
-+		/* XXX is this the right one? */
-+		qc->timer = ktime_get_real_seconds() +
-+			q->limits[counter].warnlimit;
-+	}
-+
-+	return 0;
-+}
-+
-+int bch2_quota_acct(struct bch_fs *c, struct bch_qid qid,
-+		    enum quota_counters counter, s64 v,
-+		    enum quota_acct_mode mode)
-+{
-+	unsigned qtypes = enabled_qtypes(c);
-+	struct bch_memquota_type *q;
-+	struct bch_memquota *mq[QTYP_NR];
-+	struct quota_msgs msgs;
-+	unsigned i;
-+	int ret = 0;
-+
-+	memset(&msgs, 0, sizeof(msgs));
-+
-+	for_each_set_qtype(c, i, q, qtypes)
-+		mutex_lock_nested(&q->lock, i);
-+
-+	for_each_set_qtype(c, i, q, qtypes) {
-+		mq[i] = genradix_ptr_alloc(&q->table, qid.q[i], GFP_NOFS);
-+		if (!mq[i]) {
-+			ret = -ENOMEM;
-+			goto err;
-+		}
-+
-+		ret = bch2_quota_check_limit(c, i, mq[i], &msgs, counter, v, mode);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	for_each_set_qtype(c, i, q, qtypes)
-+		mq[i]->c[counter].v += v;
-+err:
-+	for_each_set_qtype(c, i, q, qtypes)
-+		mutex_unlock(&q->lock);
-+
-+	flush_warnings(qid, c->vfs_sb, &msgs);
-+
-+	return ret;
-+}
-+
-+static void __bch2_quota_transfer(struct bch_memquota *src_q,
-+				  struct bch_memquota *dst_q,
-+				  enum quota_counters counter, s64 v)
-+{
-+	BUG_ON(v > src_q->c[counter].v);
-+	BUG_ON(v + dst_q->c[counter].v < v);
-+
-+	src_q->c[counter].v -= v;
-+	dst_q->c[counter].v += v;
-+}
-+
-+int bch2_quota_transfer(struct bch_fs *c, unsigned qtypes,
-+			struct bch_qid dst,
-+			struct bch_qid src, u64 space,
-+			enum quota_acct_mode mode)
-+{
-+	struct bch_memquota_type *q;
-+	struct bch_memquota *src_q[3], *dst_q[3];
-+	struct quota_msgs msgs;
-+	unsigned i;
-+	int ret = 0;
-+
-+	qtypes &= enabled_qtypes(c);
-+
-+	memset(&msgs, 0, sizeof(msgs));
-+
-+	for_each_set_qtype(c, i, q, qtypes)
-+		mutex_lock_nested(&q->lock, i);
-+
-+	for_each_set_qtype(c, i, q, qtypes) {
-+		src_q[i] = genradix_ptr_alloc(&q->table, src.q[i], GFP_NOFS);
-+		dst_q[i] = genradix_ptr_alloc(&q->table, dst.q[i], GFP_NOFS);
-+
-+		if (!src_q[i] || !dst_q[i]) {
-+			ret = -ENOMEM;
-+			goto err;
-+		}
-+
-+		ret = bch2_quota_check_limit(c, i, dst_q[i], &msgs, Q_SPC,
-+					     dst_q[i]->c[Q_SPC].v + space,
-+					     mode);
-+		if (ret)
-+			goto err;
-+
-+		ret = bch2_quota_check_limit(c, i, dst_q[i], &msgs, Q_INO,
-+					     dst_q[i]->c[Q_INO].v + 1,
-+					     mode);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	for_each_set_qtype(c, i, q, qtypes) {
-+		__bch2_quota_transfer(src_q[i], dst_q[i], Q_SPC, space);
-+		__bch2_quota_transfer(src_q[i], dst_q[i], Q_INO, 1);
-+	}
-+
-+err:
-+	for_each_set_qtype(c, i, q, qtypes)
-+		mutex_unlock(&q->lock);
-+
-+	flush_warnings(dst, c->vfs_sb, &msgs);
-+
-+	return ret;
-+}
-+
-+static int __bch2_quota_set(struct bch_fs *c, struct bkey_s_c k)
-+{
-+	struct bkey_s_c_quota dq;
-+	struct bch_memquota_type *q;
-+	struct bch_memquota *mq;
-+	unsigned i;
-+
-+	BUG_ON(k.k->p.inode >= QTYP_NR);
-+
-+	switch (k.k->type) {
-+	case KEY_TYPE_quota:
-+		dq = bkey_s_c_to_quota(k);
-+		q = &c->quotas[k.k->p.inode];
-+
-+		mutex_lock(&q->lock);
-+		mq = genradix_ptr_alloc(&q->table, k.k->p.offset, GFP_KERNEL);
-+		if (!mq) {
-+			mutex_unlock(&q->lock);
-+			return -ENOMEM;
-+		}
-+
-+		for (i = 0; i < Q_COUNTERS; i++) {
-+			mq->c[i].hardlimit = le64_to_cpu(dq.v->c[i].hardlimit);
-+			mq->c[i].softlimit = le64_to_cpu(dq.v->c[i].softlimit);
-+		}
-+
-+		mutex_unlock(&q->lock);
-+	}
-+
-+	return 0;
-+}
-+
-+static int bch2_quota_init_type(struct bch_fs *c, enum quota_types type)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int ret = 0;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_QUOTAS, POS(type, 0),
-+			   BTREE_ITER_PREFETCH, k, ret) {
-+		if (k.k->p.inode != type)
-+			break;
-+
-+		ret = __bch2_quota_set(c, k);
-+		if (ret)
-+			break;
-+	}
-+
-+	return bch2_trans_exit(&trans) ?: ret;
-+}
-+
-+void bch2_fs_quota_exit(struct bch_fs *c)
-+{
-+	unsigned i;
-+
-+	for (i = 0; i < ARRAY_SIZE(c->quotas); i++)
-+		genradix_free(&c->quotas[i].table);
-+}
-+
-+void bch2_fs_quota_init(struct bch_fs *c)
-+{
-+	unsigned i;
-+
-+	for (i = 0; i < ARRAY_SIZE(c->quotas); i++)
-+		mutex_init(&c->quotas[i].lock);
-+}
-+
-+static void bch2_sb_quota_read(struct bch_fs *c)
-+{
-+	struct bch_sb_field_quota *sb_quota;
-+	unsigned i, j;
-+
-+	sb_quota = bch2_sb_get_quota(c->disk_sb.sb);
-+	if (!sb_quota)
-+		return;
-+
-+	for (i = 0; i < QTYP_NR; i++) {
-+		struct bch_memquota_type *q = &c->quotas[i];
-+
-+		for (j = 0; j < Q_COUNTERS; j++) {
-+			q->limits[j].timelimit =
-+				le32_to_cpu(sb_quota->q[i].c[j].timelimit);
-+			q->limits[j].warnlimit =
-+				le32_to_cpu(sb_quota->q[i].c[j].warnlimit);
-+		}
-+	}
-+}
-+
-+int bch2_fs_quota_read(struct bch_fs *c)
-+{
-+	unsigned i, qtypes = enabled_qtypes(c);
-+	struct bch_memquota_type *q;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bch_inode_unpacked u;
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	mutex_lock(&c->sb_lock);
-+	bch2_sb_quota_read(c);
-+	mutex_unlock(&c->sb_lock);
-+
-+	for_each_set_qtype(c, i, q, qtypes) {
-+		ret = bch2_quota_init_type(c, i);
-+		if (ret)
-+			return ret;
-+	}
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_INODES, POS_MIN,
-+			   BTREE_ITER_PREFETCH, k, ret) {
-+		switch (k.k->type) {
-+		case KEY_TYPE_inode:
-+			ret = bch2_inode_unpack(bkey_s_c_to_inode(k), &u);
-+			if (ret)
-+				return ret;
-+
-+			bch2_quota_acct(c, bch_qid(&u), Q_SPC, u.bi_sectors,
-+					KEY_TYPE_QUOTA_NOCHECK);
-+			bch2_quota_acct(c, bch_qid(&u), Q_INO, 1,
-+					KEY_TYPE_QUOTA_NOCHECK);
-+		}
-+	}
-+	return bch2_trans_exit(&trans) ?: ret;
-+}
-+
-+/* Enable/disable/delete quotas for an entire filesystem: */
-+
-+static int bch2_quota_enable(struct super_block	*sb, unsigned uflags)
-+{
-+	struct bch_fs *c = sb->s_fs_info;
-+
-+	if (sb->s_flags & SB_RDONLY)
-+		return -EROFS;
-+
-+	/* Accounting must be enabled at mount time: */
-+	if (uflags & (FS_QUOTA_UDQ_ACCT|FS_QUOTA_GDQ_ACCT|FS_QUOTA_PDQ_ACCT))
-+		return -EINVAL;
-+
-+	/* Can't enable enforcement without accounting: */
-+	if ((uflags & FS_QUOTA_UDQ_ENFD) && !c->opts.usrquota)
-+		return -EINVAL;
-+
-+	if ((uflags & FS_QUOTA_GDQ_ENFD) && !c->opts.grpquota)
-+		return -EINVAL;
-+
-+	if (uflags & FS_QUOTA_PDQ_ENFD && !c->opts.prjquota)
-+		return -EINVAL;
-+
-+	mutex_lock(&c->sb_lock);
-+	if (uflags & FS_QUOTA_UDQ_ENFD)
-+		SET_BCH_SB_USRQUOTA(c->disk_sb.sb, true);
-+
-+	if (uflags & FS_QUOTA_GDQ_ENFD)
-+		SET_BCH_SB_GRPQUOTA(c->disk_sb.sb, true);
-+
-+	if (uflags & FS_QUOTA_PDQ_ENFD)
-+		SET_BCH_SB_PRJQUOTA(c->disk_sb.sb, true);
-+
-+	bch2_write_super(c);
-+	mutex_unlock(&c->sb_lock);
-+
-+	return 0;
-+}
-+
-+static int bch2_quota_disable(struct super_block *sb, unsigned uflags)
-+{
-+	struct bch_fs *c = sb->s_fs_info;
-+
-+	if (sb->s_flags & SB_RDONLY)
-+		return -EROFS;
-+
-+	mutex_lock(&c->sb_lock);
-+	if (uflags & FS_QUOTA_UDQ_ENFD)
-+		SET_BCH_SB_USRQUOTA(c->disk_sb.sb, false);
-+
-+	if (uflags & FS_QUOTA_GDQ_ENFD)
-+		SET_BCH_SB_GRPQUOTA(c->disk_sb.sb, false);
-+
-+	if (uflags & FS_QUOTA_PDQ_ENFD)
-+		SET_BCH_SB_PRJQUOTA(c->disk_sb.sb, false);
-+
-+	bch2_write_super(c);
-+	mutex_unlock(&c->sb_lock);
-+
-+	return 0;
-+}
-+
-+static int bch2_quota_remove(struct super_block *sb, unsigned uflags)
-+{
-+	struct bch_fs *c = sb->s_fs_info;
-+	int ret;
-+
-+	if (sb->s_flags & SB_RDONLY)
-+		return -EROFS;
-+
-+	if (uflags & FS_USER_QUOTA) {
-+		if (c->opts.usrquota)
-+			return -EINVAL;
-+
-+		ret = bch2_btree_delete_range(c, BTREE_ID_QUOTAS,
-+					      POS(QTYP_USR, 0),
-+					      POS(QTYP_USR + 1, 0),
-+					      NULL);
-+		if (ret)
-+			return ret;
-+	}
-+
-+	if (uflags & FS_GROUP_QUOTA) {
-+		if (c->opts.grpquota)
-+			return -EINVAL;
-+
-+		ret = bch2_btree_delete_range(c, BTREE_ID_QUOTAS,
-+					      POS(QTYP_GRP, 0),
-+					      POS(QTYP_GRP + 1, 0),
-+					      NULL);
-+		if (ret)
-+			return ret;
-+	}
-+
-+	if (uflags & FS_PROJ_QUOTA) {
-+		if (c->opts.prjquota)
-+			return -EINVAL;
-+
-+		ret = bch2_btree_delete_range(c, BTREE_ID_QUOTAS,
-+					      POS(QTYP_PRJ, 0),
-+					      POS(QTYP_PRJ + 1, 0),
-+					      NULL);
-+		if (ret)
-+			return ret;
-+	}
-+
-+	return 0;
-+}
-+
-+/*
-+ * Return quota status information, such as enforcements, quota file inode
-+ * numbers etc.
-+ */
-+static int bch2_quota_get_state(struct super_block *sb, struct qc_state *state)
-+{
-+	struct bch_fs *c = sb->s_fs_info;
-+	unsigned qtypes = enabled_qtypes(c);
-+	unsigned i;
-+
-+	memset(state, 0, sizeof(*state));
-+
-+	for (i = 0; i < QTYP_NR; i++) {
-+		state->s_state[i].flags |= QCI_SYSFILE;
-+
-+		if (!(qtypes & (1 << i)))
-+			continue;
-+
-+		state->s_state[i].flags |= QCI_ACCT_ENABLED;
-+
-+		state->s_state[i].spc_timelimit = c->quotas[i].limits[Q_SPC].timelimit;
-+		state->s_state[i].spc_warnlimit = c->quotas[i].limits[Q_SPC].warnlimit;
-+
-+		state->s_state[i].ino_timelimit = c->quotas[i].limits[Q_INO].timelimit;
-+		state->s_state[i].ino_warnlimit = c->quotas[i].limits[Q_INO].warnlimit;
-+	}
-+
-+	return 0;
-+}
-+
-+/*
-+ * Adjust quota timers & warnings
-+ */
-+static int bch2_quota_set_info(struct super_block *sb, int type,
-+			       struct qc_info *info)
-+{
-+	struct bch_fs *c = sb->s_fs_info;
-+	struct bch_sb_field_quota *sb_quota;
-+	struct bch_memquota_type *q;
-+
-+	if (sb->s_flags & SB_RDONLY)
-+		return -EROFS;
-+
-+	if (type >= QTYP_NR)
-+		return -EINVAL;
-+
-+	if (!((1 << type) & enabled_qtypes(c)))
-+		return -ESRCH;
-+
-+	if (info->i_fieldmask &
-+	    ~(QC_SPC_TIMER|QC_INO_TIMER|QC_SPC_WARNS|QC_INO_WARNS))
-+		return -EINVAL;
-+
-+	q = &c->quotas[type];
-+
-+	mutex_lock(&c->sb_lock);
-+	sb_quota = bch2_sb_get_quota(c->disk_sb.sb);
-+	if (!sb_quota) {
-+		sb_quota = bch2_sb_resize_quota(&c->disk_sb,
-+					sizeof(*sb_quota) / sizeof(u64));
-+		if (!sb_quota)
-+			return -ENOSPC;
-+	}
-+
-+	if (info->i_fieldmask & QC_SPC_TIMER)
-+		sb_quota->q[type].c[Q_SPC].timelimit =
-+			cpu_to_le32(info->i_spc_timelimit);
-+
-+	if (info->i_fieldmask & QC_SPC_WARNS)
-+		sb_quota->q[type].c[Q_SPC].warnlimit =
-+			cpu_to_le32(info->i_spc_warnlimit);
-+
-+	if (info->i_fieldmask & QC_INO_TIMER)
-+		sb_quota->q[type].c[Q_INO].timelimit =
-+			cpu_to_le32(info->i_ino_timelimit);
-+
-+	if (info->i_fieldmask & QC_INO_WARNS)
-+		sb_quota->q[type].c[Q_INO].warnlimit =
-+			cpu_to_le32(info->i_ino_warnlimit);
-+
-+	bch2_sb_quota_read(c);
-+
-+	bch2_write_super(c);
-+	mutex_unlock(&c->sb_lock);
-+
-+	return 0;
-+}
-+
-+/* Get/set individual quotas: */
-+
-+static void __bch2_quota_get(struct qc_dqblk *dst, struct bch_memquota *src)
-+{
-+	dst->d_space		= src->c[Q_SPC].v << 9;
-+	dst->d_spc_hardlimit	= src->c[Q_SPC].hardlimit << 9;
-+	dst->d_spc_softlimit	= src->c[Q_SPC].softlimit << 9;
-+	dst->d_spc_timer	= src->c[Q_SPC].timer;
-+	dst->d_spc_warns	= src->c[Q_SPC].warns;
-+
-+	dst->d_ino_count	= src->c[Q_INO].v;
-+	dst->d_ino_hardlimit	= src->c[Q_INO].hardlimit;
-+	dst->d_ino_softlimit	= src->c[Q_INO].softlimit;
-+	dst->d_ino_timer	= src->c[Q_INO].timer;
-+	dst->d_ino_warns	= src->c[Q_INO].warns;
-+}
-+
-+static int bch2_get_quota(struct super_block *sb, struct kqid kqid,
-+			  struct qc_dqblk *qdq)
-+{
-+	struct bch_fs *c		= sb->s_fs_info;
-+	struct bch_memquota_type *q	= &c->quotas[kqid.type];
-+	qid_t qid			= from_kqid(&init_user_ns, kqid);
-+	struct bch_memquota *mq;
-+
-+	memset(qdq, 0, sizeof(*qdq));
-+
-+	mutex_lock(&q->lock);
-+	mq = genradix_ptr(&q->table, qid);
-+	if (mq)
-+		__bch2_quota_get(qdq, mq);
-+	mutex_unlock(&q->lock);
-+
-+	return 0;
-+}
-+
-+static int bch2_get_next_quota(struct super_block *sb, struct kqid *kqid,
-+			       struct qc_dqblk *qdq)
-+{
-+	struct bch_fs *c		= sb->s_fs_info;
-+	struct bch_memquota_type *q	= &c->quotas[kqid->type];
-+	qid_t qid			= from_kqid(&init_user_ns, *kqid);
-+	struct genradix_iter iter;
-+	struct bch_memquota *mq;
-+	int ret = 0;
-+
-+	mutex_lock(&q->lock);
-+
-+	genradix_for_each_from(&q->table, iter, mq, qid)
-+		if (memcmp(mq, page_address(ZERO_PAGE(0)), sizeof(*mq))) {
-+			__bch2_quota_get(qdq, mq);
-+			*kqid = make_kqid(current_user_ns(), kqid->type, iter.pos);
-+			goto found;
-+		}
-+
-+	ret = -ENOENT;
-+found:
-+	mutex_unlock(&q->lock);
-+	return ret;
-+}
-+
-+static int bch2_set_quota_trans(struct btree_trans *trans,
-+				struct bkey_i_quota *new_quota,
-+				struct qc_dqblk *qdq)
-+{
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	iter = bch2_trans_get_iter(trans, BTREE_ID_QUOTAS, new_quota->k.p,
-+				   BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
-+	k = bch2_btree_iter_peek_slot(iter);
-+
-+	ret = bkey_err(k);
-+	if (unlikely(ret))
-+		return ret;
-+
-+	if (k.k->type == KEY_TYPE_quota)
-+		new_quota->v = *bkey_s_c_to_quota(k).v;
-+
-+	if (qdq->d_fieldmask & QC_SPC_SOFT)
-+		new_quota->v.c[Q_SPC].softlimit = cpu_to_le64(qdq->d_spc_softlimit >> 9);
-+	if (qdq->d_fieldmask & QC_SPC_HARD)
-+		new_quota->v.c[Q_SPC].hardlimit = cpu_to_le64(qdq->d_spc_hardlimit >> 9);
-+
-+	if (qdq->d_fieldmask & QC_INO_SOFT)
-+		new_quota->v.c[Q_INO].softlimit = cpu_to_le64(qdq->d_ino_softlimit);
-+	if (qdq->d_fieldmask & QC_INO_HARD)
-+		new_quota->v.c[Q_INO].hardlimit = cpu_to_le64(qdq->d_ino_hardlimit);
-+
-+	return bch2_trans_update(trans, iter, &new_quota->k_i, 0);
-+}
-+
-+static int bch2_set_quota(struct super_block *sb, struct kqid qid,
-+			  struct qc_dqblk *qdq)
-+{
-+	struct bch_fs *c = sb->s_fs_info;
-+	struct btree_trans trans;
-+	struct bkey_i_quota new_quota;
-+	int ret;
-+
-+	if (sb->s_flags & SB_RDONLY)
-+		return -EROFS;
-+
-+	bkey_quota_init(&new_quota.k_i);
-+	new_quota.k.p = POS(qid.type, from_kqid(&init_user_ns, qid));
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOUNLOCK,
-+			    bch2_set_quota_trans(&trans, &new_quota, qdq)) ?:
-+		__bch2_quota_set(c, bkey_i_to_s_c(&new_quota.k_i));
-+
-+	bch2_trans_exit(&trans);
-+
-+	return ret;
-+}
-+
-+const struct quotactl_ops bch2_quotactl_operations = {
-+	.quota_enable		= bch2_quota_enable,
-+	.quota_disable		= bch2_quota_disable,
-+	.rm_xquota		= bch2_quota_remove,
-+
-+	.get_state		= bch2_quota_get_state,
-+	.set_info		= bch2_quota_set_info,
-+
-+	.get_dqblk		= bch2_get_quota,
-+	.get_nextdqblk		= bch2_get_next_quota,
-+	.set_dqblk		= bch2_set_quota,
-+};
-+
-+#endif /* CONFIG_BCACHEFS_QUOTA */
-diff --git a/fs/bcachefs/quota.h b/fs/bcachefs/quota.h
-new file mode 100644
-index 000000000000..51e4f9713ef0
---- /dev/null
-+++ b/fs/bcachefs/quota.h
-@@ -0,0 +1,71 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_QUOTA_H
-+#define _BCACHEFS_QUOTA_H
-+
-+#include "inode.h"
-+#include "quota_types.h"
-+
-+extern const struct bch_sb_field_ops bch_sb_field_ops_quota;
-+
-+const char *bch2_quota_invalid(const struct bch_fs *, struct bkey_s_c);
-+void bch2_quota_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-+
-+#define bch2_bkey_ops_quota (struct bkey_ops) {		\
-+	.key_invalid	= bch2_quota_invalid,		\
-+	.val_to_text	= bch2_quota_to_text,		\
-+}
-+
-+static inline struct bch_qid bch_qid(struct bch_inode_unpacked *u)
-+{
-+	return (struct bch_qid) {
-+		.q[QTYP_USR] = u->bi_uid,
-+		.q[QTYP_GRP] = u->bi_gid,
-+		.q[QTYP_PRJ] = u->bi_project ? u->bi_project - 1 : 0,
-+	};
-+}
-+
-+static inline unsigned enabled_qtypes(struct bch_fs *c)
-+{
-+	return ((c->opts.usrquota << QTYP_USR)|
-+		(c->opts.grpquota << QTYP_GRP)|
-+		(c->opts.prjquota << QTYP_PRJ));
-+}
-+
-+#ifdef CONFIG_BCACHEFS_QUOTA
-+
-+int bch2_quota_acct(struct bch_fs *, struct bch_qid, enum quota_counters,
-+		    s64, enum quota_acct_mode);
-+
-+int bch2_quota_transfer(struct bch_fs *, unsigned, struct bch_qid,
-+			struct bch_qid, u64, enum quota_acct_mode);
-+
-+void bch2_fs_quota_exit(struct bch_fs *);
-+void bch2_fs_quota_init(struct bch_fs *);
-+int bch2_fs_quota_read(struct bch_fs *);
-+
-+extern const struct quotactl_ops bch2_quotactl_operations;
-+
-+#else
-+
-+static inline int bch2_quota_acct(struct bch_fs *c, struct bch_qid qid,
-+				  enum quota_counters counter, s64 v,
-+				  enum quota_acct_mode mode)
-+{
-+	return 0;
-+}
-+
-+static inline int bch2_quota_transfer(struct bch_fs *c, unsigned qtypes,
-+				      struct bch_qid dst,
-+				      struct bch_qid src, u64 space,
-+				      enum quota_acct_mode mode)
-+{
-+	return 0;
-+}
-+
-+static inline void bch2_fs_quota_exit(struct bch_fs *c) {}
-+static inline void bch2_fs_quota_init(struct bch_fs *c) {}
-+static inline int bch2_fs_quota_read(struct bch_fs *c) { return 0; }
-+
-+#endif
-+
-+#endif /* _BCACHEFS_QUOTA_H */
-diff --git a/fs/bcachefs/quota_types.h b/fs/bcachefs/quota_types.h
-new file mode 100644
-index 000000000000..6a136083d389
---- /dev/null
-+++ b/fs/bcachefs/quota_types.h
-@@ -0,0 +1,43 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_QUOTA_TYPES_H
-+#define _BCACHEFS_QUOTA_TYPES_H
-+
-+#include <linux/generic-radix-tree.h>
-+
-+struct bch_qid {
-+	u32		q[QTYP_NR];
-+};
-+
-+enum quota_acct_mode {
-+	KEY_TYPE_QUOTA_PREALLOC,
-+	KEY_TYPE_QUOTA_WARN,
-+	KEY_TYPE_QUOTA_NOCHECK,
-+};
-+
-+struct memquota_counter {
-+	u64				v;
-+	u64				hardlimit;
-+	u64				softlimit;
-+	s64				timer;
-+	int				warns;
-+	int				warning_issued;
-+};
-+
-+struct bch_memquota {
-+	struct memquota_counter		c[Q_COUNTERS];
-+};
-+
-+typedef GENRADIX(struct bch_memquota)	bch_memquota_table;
-+
-+struct quota_limit {
-+	u32				timelimit;
-+	u32				warnlimit;
-+};
-+
-+struct bch_memquota_type {
-+	struct quota_limit		limits[Q_COUNTERS];
-+	bch_memquota_table		table;
-+	struct mutex			lock;
-+};
-+
-+#endif /* _BCACHEFS_QUOTA_TYPES_H */
-diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c
-new file mode 100644
-index 000000000000..56a1f761271f
---- /dev/null
-+++ b/fs/bcachefs/rebalance.c
-@@ -0,0 +1,331 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "alloc_foreground.h"
-+#include "btree_iter.h"
-+#include "buckets.h"
-+#include "clock.h"
-+#include "disk_groups.h"
-+#include "extents.h"
-+#include "io.h"
-+#include "move.h"
-+#include "rebalance.h"
-+#include "super-io.h"
-+
-+#include <linux/freezer.h>
-+#include <linux/kthread.h>
-+#include <linux/sched/cputime.h>
-+#include <trace/events/bcachefs.h>
-+
-+/*
-+ * Check if an extent should be moved:
-+ * returns -1 if it should not be moved, or
-+ * device of pointer that should be moved, if known, or INT_MAX if unknown
-+ */
-+static int __bch2_rebalance_pred(struct bch_fs *c,
-+				 struct bkey_s_c k,
-+				 struct bch_io_opts *io_opts)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const union bch_extent_entry *entry;
-+	struct extent_ptr_decoded p;
-+
-+	if (io_opts->background_compression &&
-+	    !bch2_bkey_is_incompressible(k))
-+		bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
-+			if (!p.ptr.cached &&
-+			    p.crc.compression_type !=
-+			    bch2_compression_opt_to_type[io_opts->background_compression])
-+				return p.ptr.dev;
-+
-+	if (io_opts->background_target)
-+		bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
-+			if (!p.ptr.cached &&
-+			    !bch2_dev_in_target(c, p.ptr.dev, io_opts->background_target))
-+				return p.ptr.dev;
-+
-+	return -1;
-+}
-+
-+void bch2_rebalance_add_key(struct bch_fs *c,
-+			    struct bkey_s_c k,
-+			    struct bch_io_opts *io_opts)
-+{
-+	atomic64_t *counter;
-+	int dev;
-+
-+	dev = __bch2_rebalance_pred(c, k, io_opts);
-+	if (dev < 0)
-+		return;
-+
-+	counter = dev < INT_MAX
-+		? &bch_dev_bkey_exists(c, dev)->rebalance_work
-+		: &c->rebalance.work_unknown_dev;
-+
-+	if (atomic64_add_return(k.k->size, counter) == k.k->size)
-+		rebalance_wakeup(c);
-+}
-+
-+static enum data_cmd rebalance_pred(struct bch_fs *c, void *arg,
-+				    struct bkey_s_c k,
-+				    struct bch_io_opts *io_opts,
-+				    struct data_opts *data_opts)
-+{
-+	if (__bch2_rebalance_pred(c, k, io_opts) >= 0) {
-+		data_opts->target		= io_opts->background_target;
-+		data_opts->btree_insert_flags	= 0;
-+		return DATA_ADD_REPLICAS;
-+	} else {
-+		return DATA_SKIP;
-+	}
-+}
-+
-+void bch2_rebalance_add_work(struct bch_fs *c, u64 sectors)
-+{
-+	if (atomic64_add_return(sectors, &c->rebalance.work_unknown_dev) ==
-+	    sectors)
-+		rebalance_wakeup(c);
-+}
-+
-+struct rebalance_work {
-+	int		dev_most_full_idx;
-+	unsigned	dev_most_full_percent;
-+	u64		dev_most_full_work;
-+	u64		dev_most_full_capacity;
-+	u64		total_work;
-+};
-+
-+static void rebalance_work_accumulate(struct rebalance_work *w,
-+		u64 dev_work, u64 unknown_dev, u64 capacity, int idx)
-+{
-+	unsigned percent_full;
-+	u64 work = dev_work + unknown_dev;
-+
-+	if (work < dev_work || work < unknown_dev)
-+		work = U64_MAX;
-+	work = min(work, capacity);
-+
-+	percent_full = div64_u64(work * 100, capacity);
-+
-+	if (percent_full >= w->dev_most_full_percent) {
-+		w->dev_most_full_idx		= idx;
-+		w->dev_most_full_percent	= percent_full;
-+		w->dev_most_full_work		= work;
-+		w->dev_most_full_capacity	= capacity;
-+	}
-+
-+	if (w->total_work + dev_work >= w->total_work &&
-+	    w->total_work + dev_work >= dev_work)
-+		w->total_work += dev_work;
-+}
-+
-+static struct rebalance_work rebalance_work(struct bch_fs *c)
-+{
-+	struct bch_dev *ca;
-+	struct rebalance_work ret = { .dev_most_full_idx = -1 };
-+	u64 unknown_dev = atomic64_read(&c->rebalance.work_unknown_dev);
-+	unsigned i;
-+
-+	for_each_online_member(ca, c, i)
-+		rebalance_work_accumulate(&ret,
-+			atomic64_read(&ca->rebalance_work),
-+			unknown_dev,
-+			bucket_to_sector(ca, ca->mi.nbuckets -
-+					 ca->mi.first_bucket),
-+			i);
-+
-+	rebalance_work_accumulate(&ret,
-+		unknown_dev, 0, c->capacity, -1);
-+
-+	return ret;
-+}
-+
-+static void rebalance_work_reset(struct bch_fs *c)
-+{
-+	struct bch_dev *ca;
-+	unsigned i;
-+
-+	for_each_online_member(ca, c, i)
-+		atomic64_set(&ca->rebalance_work, 0);
-+
-+	atomic64_set(&c->rebalance.work_unknown_dev, 0);
-+}
-+
-+static unsigned long curr_cputime(void)
-+{
-+	u64 utime, stime;
-+
-+	task_cputime_adjusted(current, &utime, &stime);
-+	return nsecs_to_jiffies(utime + stime);
-+}
-+
-+static int bch2_rebalance_thread(void *arg)
-+{
-+	struct bch_fs *c = arg;
-+	struct bch_fs_rebalance *r = &c->rebalance;
-+	struct io_clock *clock = &c->io_clock[WRITE];
-+	struct rebalance_work w, p;
-+	unsigned long start, prev_start;
-+	unsigned long prev_run_time, prev_run_cputime;
-+	unsigned long cputime, prev_cputime;
-+	unsigned long io_start;
-+	long throttle;
-+
-+	set_freezable();
-+
-+	io_start	= atomic_long_read(&clock->now);
-+	p		= rebalance_work(c);
-+	prev_start	= jiffies;
-+	prev_cputime	= curr_cputime();
-+
-+	while (!kthread_wait_freezable(r->enabled)) {
-+		cond_resched();
-+
-+		start			= jiffies;
-+		cputime			= curr_cputime();
-+
-+		prev_run_time		= start - prev_start;
-+		prev_run_cputime	= cputime - prev_cputime;
-+
-+		w			= rebalance_work(c);
-+		BUG_ON(!w.dev_most_full_capacity);
-+
-+		if (!w.total_work) {
-+			r->state = REBALANCE_WAITING;
-+			kthread_wait_freezable(rebalance_work(c).total_work);
-+			continue;
-+		}
-+
-+		/*
-+		 * If there isn't much work to do, throttle cpu usage:
-+		 */
-+		throttle = prev_run_cputime * 100 /
-+			max(1U, w.dev_most_full_percent) -
-+			prev_run_time;
-+
-+		if (w.dev_most_full_percent < 20 && throttle > 0) {
-+			r->throttled_until_iotime = io_start +
-+				div_u64(w.dev_most_full_capacity *
-+					(20 - w.dev_most_full_percent),
-+					50);
-+
-+			if (atomic_long_read(&clock->now) + clock->max_slop <
-+			    r->throttled_until_iotime) {
-+				r->throttled_until_cputime = start + throttle;
-+				r->state = REBALANCE_THROTTLED;
-+
-+				bch2_kthread_io_clock_wait(clock,
-+					r->throttled_until_iotime,
-+					throttle);
-+				continue;
-+			}
-+		}
-+
-+		/* minimum 1 mb/sec: */
-+		r->pd.rate.rate =
-+			max_t(u64, 1 << 11,
-+			      r->pd.rate.rate *
-+			      max(p.dev_most_full_percent, 1U) /
-+			      max(w.dev_most_full_percent, 1U));
-+
-+		io_start	= atomic_long_read(&clock->now);
-+		p		= w;
-+		prev_start	= start;
-+		prev_cputime	= cputime;
-+
-+		r->state = REBALANCE_RUNNING;
-+		memset(&r->move_stats, 0, sizeof(r->move_stats));
-+		rebalance_work_reset(c);
-+
-+		bch2_move_data(c,
-+			       /* ratelimiting disabled for now */
-+			       NULL, /*  &r->pd.rate, */
-+			       writepoint_ptr(&c->rebalance_write_point),
-+			       POS_MIN, POS_MAX,
-+			       rebalance_pred, NULL,
-+			       &r->move_stats);
-+	}
-+
-+	return 0;
-+}
-+
-+void bch2_rebalance_work_to_text(struct printbuf *out, struct bch_fs *c)
-+{
-+	struct bch_fs_rebalance *r = &c->rebalance;
-+	struct rebalance_work w = rebalance_work(c);
-+	char h1[21], h2[21];
-+
-+	bch2_hprint(&PBUF(h1), w.dev_most_full_work << 9);
-+	bch2_hprint(&PBUF(h2), w.dev_most_full_capacity << 9);
-+	pr_buf(out, "fullest_dev (%i):\t%s/%s\n",
-+	       w.dev_most_full_idx, h1, h2);
-+
-+	bch2_hprint(&PBUF(h1), w.total_work << 9);
-+	bch2_hprint(&PBUF(h2), c->capacity << 9);
-+	pr_buf(out, "total work:\t\t%s/%s\n", h1, h2);
-+
-+	pr_buf(out, "rate:\t\t\t%u\n", r->pd.rate.rate);
-+
-+	switch (r->state) {
-+	case REBALANCE_WAITING:
-+		pr_buf(out, "waiting\n");
-+		break;
-+	case REBALANCE_THROTTLED:
-+		bch2_hprint(&PBUF(h1),
-+			    (r->throttled_until_iotime -
-+			     atomic_long_read(&c->io_clock[WRITE].now)) << 9);
-+		pr_buf(out, "throttled for %lu sec or %s io\n",
-+		       (r->throttled_until_cputime - jiffies) / HZ,
-+		       h1);
-+		break;
-+	case REBALANCE_RUNNING:
-+		pr_buf(out, "running\n");
-+		pr_buf(out, "pos %llu:%llu\n",
-+		       r->move_stats.pos.inode,
-+		       r->move_stats.pos.offset);
-+		break;
-+	}
-+}
-+
-+void bch2_rebalance_stop(struct bch_fs *c)
-+{
-+	struct task_struct *p;
-+
-+	c->rebalance.pd.rate.rate = UINT_MAX;
-+	bch2_ratelimit_reset(&c->rebalance.pd.rate);
-+
-+	p = rcu_dereference_protected(c->rebalance.thread, 1);
-+	c->rebalance.thread = NULL;
-+
-+	if (p) {
-+		/* for sychronizing with rebalance_wakeup() */
-+		synchronize_rcu();
-+
-+		kthread_stop(p);
-+		put_task_struct(p);
-+	}
-+}
-+
-+int bch2_rebalance_start(struct bch_fs *c)
-+{
-+	struct task_struct *p;
-+
-+	if (c->opts.nochanges)
-+		return 0;
-+
-+	p = kthread_create(bch2_rebalance_thread, c, "bch_rebalance");
-+	if (IS_ERR(p))
-+		return PTR_ERR(p);
-+
-+	get_task_struct(p);
-+	rcu_assign_pointer(c->rebalance.thread, p);
-+	wake_up_process(p);
-+	return 0;
-+}
-+
-+void bch2_fs_rebalance_init(struct bch_fs *c)
-+{
-+	bch2_pd_controller_init(&c->rebalance.pd);
-+
-+	atomic64_set(&c->rebalance.work_unknown_dev, S64_MAX);
-+}
-diff --git a/fs/bcachefs/rebalance.h b/fs/bcachefs/rebalance.h
-new file mode 100644
-index 000000000000..7ade0bb81cce
---- /dev/null
-+++ b/fs/bcachefs/rebalance.h
-@@ -0,0 +1,28 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_REBALANCE_H
-+#define _BCACHEFS_REBALANCE_H
-+
-+#include "rebalance_types.h"
-+
-+static inline void rebalance_wakeup(struct bch_fs *c)
-+{
-+	struct task_struct *p;
-+
-+	rcu_read_lock();
-+	p = rcu_dereference(c->rebalance.thread);
-+	if (p)
-+		wake_up_process(p);
-+	rcu_read_unlock();
-+}
-+
-+void bch2_rebalance_add_key(struct bch_fs *, struct bkey_s_c,
-+			    struct bch_io_opts *);
-+void bch2_rebalance_add_work(struct bch_fs *, u64);
-+
-+void bch2_rebalance_work_to_text(struct printbuf *, struct bch_fs *);
-+
-+void bch2_rebalance_stop(struct bch_fs *);
-+int bch2_rebalance_start(struct bch_fs *);
-+void bch2_fs_rebalance_init(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_REBALANCE_H */
-diff --git a/fs/bcachefs/rebalance_types.h b/fs/bcachefs/rebalance_types.h
-new file mode 100644
-index 000000000000..192c6be20ced
---- /dev/null
-+++ b/fs/bcachefs/rebalance_types.h
-@@ -0,0 +1,27 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_REBALANCE_TYPES_H
-+#define _BCACHEFS_REBALANCE_TYPES_H
-+
-+#include "move_types.h"
-+
-+enum rebalance_state {
-+	REBALANCE_WAITING,
-+	REBALANCE_THROTTLED,
-+	REBALANCE_RUNNING,
-+};
-+
-+struct bch_fs_rebalance {
-+	struct task_struct __rcu *thread;
-+	struct bch_pd_controller pd;
-+
-+	atomic64_t		work_unknown_dev;
-+
-+	enum rebalance_state	state;
-+	unsigned long		throttled_until_iotime;
-+	unsigned long		throttled_until_cputime;
-+	struct bch_move_stats	move_stats;
-+
-+	unsigned		enabled:1;
-+};
-+
-+#endif /* _BCACHEFS_REBALANCE_TYPES_H */
-diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
-new file mode 100644
-index 000000000000..6e829bf0a31f
---- /dev/null
-+++ b/fs/bcachefs/recovery.c
-@@ -0,0 +1,1330 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "alloc_background.h"
-+#include "btree_gc.h"
-+#include "btree_update.h"
-+#include "btree_update_interior.h"
-+#include "btree_io.h"
-+#include "buckets.h"
-+#include "dirent.h"
-+#include "ec.h"
-+#include "error.h"
-+#include "fs-common.h"
-+#include "fsck.h"
-+#include "journal_io.h"
-+#include "journal_reclaim.h"
-+#include "journal_seq_blacklist.h"
-+#include "quota.h"
-+#include "recovery.h"
-+#include "replicas.h"
-+#include "super-io.h"
-+
-+#include <linux/sort.h>
-+#include <linux/stat.h>
-+
-+#define QSTR(n) { { { .len = strlen(n) } }, .name = n }
-+
-+/* iterate over keys read from the journal: */
-+
-+static struct journal_key *journal_key_search(struct journal_keys *journal_keys,
-+					      enum btree_id id, unsigned level,
-+					      struct bpos pos)
-+{
-+	size_t l = 0, r = journal_keys->nr, m;
-+
-+	while (l < r) {
-+		m = l + ((r - l) >> 1);
-+		if ((cmp_int(id,	journal_keys->d[m].btree_id) ?:
-+		     cmp_int(level,	journal_keys->d[m].level) ?:
-+		     bkey_cmp(pos,	journal_keys->d[m].k->k.p)) > 0)
-+			l = m + 1;
-+		else
-+			r = m;
-+	}
-+
-+	BUG_ON(l < journal_keys->nr &&
-+	       (cmp_int(id,	journal_keys->d[l].btree_id) ?:
-+		cmp_int(level,	journal_keys->d[l].level) ?:
-+		bkey_cmp(pos,	journal_keys->d[l].k->k.p)) > 0);
-+
-+	BUG_ON(l &&
-+	       (cmp_int(id,	journal_keys->d[l - 1].btree_id) ?:
-+		cmp_int(level,	journal_keys->d[l - 1].level) ?:
-+		bkey_cmp(pos,	journal_keys->d[l - 1].k->k.p)) <= 0);
-+
-+	return l < journal_keys->nr ? journal_keys->d + l : NULL;
-+}
-+
-+static struct bkey_i *bch2_journal_iter_peek(struct journal_iter *iter)
-+{
-+	if (iter->k &&
-+	    iter->k < iter->keys->d + iter->keys->nr &&
-+	    iter->k->btree_id	== iter->btree_id &&
-+	    iter->k->level	== iter->level)
-+		return iter->k->k;
-+
-+	iter->k = NULL;
-+	return NULL;
-+}
-+
-+static void bch2_journal_iter_advance(struct journal_iter *iter)
-+{
-+	if (iter->k)
-+		iter->k++;
-+}
-+
-+static void bch2_journal_iter_init(struct journal_iter *iter,
-+				   struct journal_keys *journal_keys,
-+				   enum btree_id id, unsigned level,
-+				   struct bpos pos)
-+{
-+	iter->btree_id	= id;
-+	iter->level	= level;
-+	iter->keys	= journal_keys;
-+	iter->k		= journal_key_search(journal_keys, id, level, pos);
-+}
-+
-+static struct bkey_s_c bch2_journal_iter_peek_btree(struct btree_and_journal_iter *iter)
-+{
-+	return iter->btree
-+		? bch2_btree_iter_peek(iter->btree)
-+		: bch2_btree_node_iter_peek_unpack(&iter->node_iter,
-+						   iter->b, &iter->unpacked);
-+}
-+
-+static void bch2_journal_iter_advance_btree(struct btree_and_journal_iter *iter)
-+{
-+	if (iter->btree)
-+		bch2_btree_iter_next(iter->btree);
-+	else
-+		bch2_btree_node_iter_advance(&iter->node_iter, iter->b);
-+}
-+
-+void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *iter)
-+{
-+	switch (iter->last) {
-+	case none:
-+		break;
-+	case btree:
-+		bch2_journal_iter_advance_btree(iter);
-+		break;
-+	case journal:
-+		bch2_journal_iter_advance(&iter->journal);
-+		break;
-+	}
-+
-+	iter->last = none;
-+}
-+
-+struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *iter)
-+{
-+	struct bkey_s_c ret;
-+
-+	while (1) {
-+		struct bkey_s_c btree_k		=
-+			bch2_journal_iter_peek_btree(iter);
-+		struct bkey_s_c journal_k	=
-+			bkey_i_to_s_c(bch2_journal_iter_peek(&iter->journal));
-+
-+		if (btree_k.k && journal_k.k) {
-+			int cmp = bkey_cmp(btree_k.k->p, journal_k.k->p);
-+
-+			if (!cmp)
-+				bch2_journal_iter_advance_btree(iter);
-+
-+			iter->last = cmp < 0 ? btree : journal;
-+		} else if (btree_k.k) {
-+			iter->last = btree;
-+		} else if (journal_k.k) {
-+			iter->last = journal;
-+		} else {
-+			iter->last = none;
-+			return bkey_s_c_null;
-+		}
-+
-+		ret = iter->last == journal ? journal_k : btree_k;
-+
-+		if (iter->b &&
-+		    bkey_cmp(ret.k->p, iter->b->data->max_key) > 0) {
-+			iter->journal.k = NULL;
-+			iter->last = none;
-+			return bkey_s_c_null;
-+		}
-+
-+		if (!bkey_deleted(ret.k))
-+			break;
-+
-+		bch2_btree_and_journal_iter_advance(iter);
-+	}
-+
-+	return ret;
-+}
-+
-+struct bkey_s_c bch2_btree_and_journal_iter_next(struct btree_and_journal_iter *iter)
-+{
-+	bch2_btree_and_journal_iter_advance(iter);
-+
-+	return bch2_btree_and_journal_iter_peek(iter);
-+}
-+
-+void bch2_btree_and_journal_iter_init(struct btree_and_journal_iter *iter,
-+				      struct btree_trans *trans,
-+				      struct journal_keys *journal_keys,
-+				      enum btree_id id, struct bpos pos)
-+{
-+	memset(iter, 0, sizeof(*iter));
-+
-+	iter->btree = bch2_trans_get_iter(trans, id, pos, 0);
-+	bch2_journal_iter_init(&iter->journal, journal_keys, id, 0, pos);
-+}
-+
-+void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *iter,
-+						struct journal_keys *journal_keys,
-+						struct btree *b)
-+{
-+	memset(iter, 0, sizeof(*iter));
-+
-+	iter->b = b;
-+	bch2_btree_node_iter_init_from_start(&iter->node_iter, iter->b);
-+	bch2_journal_iter_init(&iter->journal, journal_keys,
-+			       b->c.btree_id, b->c.level, b->data->min_key);
-+}
-+
-+/* Walk btree, overlaying keys from the journal: */
-+
-+static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b,
-+				struct journal_keys *journal_keys,
-+				enum btree_id btree_id,
-+				btree_walk_node_fn node_fn,
-+				btree_walk_key_fn key_fn)
-+{
-+	struct btree_and_journal_iter iter;
-+	struct bkey_s_c k;
-+	int ret = 0;
-+
-+	bch2_btree_and_journal_iter_init_node_iter(&iter, journal_keys, b);
-+
-+	while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
-+		ret = key_fn(c, btree_id, b->c.level, k);
-+		if (ret)
-+			break;
-+
-+		if (b->c.level) {
-+			struct btree *child;
-+			BKEY_PADDED(k) tmp;
-+
-+			bkey_reassemble(&tmp.k, k);
-+			k = bkey_i_to_s_c(&tmp.k);
-+
-+			bch2_btree_and_journal_iter_advance(&iter);
-+
-+			if (b->c.level > 0) {
-+				child = bch2_btree_node_get_noiter(c, &tmp.k,
-+							b->c.btree_id, b->c.level - 1);
-+				ret = PTR_ERR_OR_ZERO(child);
-+				if (ret)
-+					break;
-+
-+				ret   = (node_fn ? node_fn(c, b) : 0) ?:
-+					bch2_btree_and_journal_walk_recurse(c, child,
-+						journal_keys, btree_id, node_fn, key_fn);
-+				six_unlock_read(&child->c.lock);
-+
-+				if (ret)
-+					break;
-+			}
-+		} else {
-+			bch2_btree_and_journal_iter_advance(&iter);
-+		}
-+	}
-+
-+	return ret;
-+}
-+
-+int bch2_btree_and_journal_walk(struct bch_fs *c, struct journal_keys *journal_keys,
-+				enum btree_id btree_id,
-+				btree_walk_node_fn node_fn,
-+				btree_walk_key_fn key_fn)
-+{
-+	struct btree *b = c->btree_roots[btree_id].b;
-+	int ret = 0;
-+
-+	if (btree_node_fake(b))
-+		return 0;
-+
-+	six_lock_read(&b->c.lock, NULL, NULL);
-+	ret   = (node_fn ? node_fn(c, b) : 0) ?:
-+		bch2_btree_and_journal_walk_recurse(c, b, journal_keys, btree_id,
-+						    node_fn, key_fn) ?:
-+		key_fn(c, btree_id, b->c.level + 1, bkey_i_to_s_c(&b->key));
-+	six_unlock_read(&b->c.lock);
-+
-+	return ret;
-+}
-+
-+/* sort and dedup all keys in the journal: */
-+
-+void bch2_journal_entries_free(struct list_head *list)
-+{
-+
-+	while (!list_empty(list)) {
-+		struct journal_replay *i =
-+			list_first_entry(list, struct journal_replay, list);
-+		list_del(&i->list);
-+		kvpfree(i, offsetof(struct journal_replay, j) +
-+			vstruct_bytes(&i->j));
-+	}
-+}
-+
-+/*
-+ * When keys compare equal, oldest compares first:
-+ */
-+static int journal_sort_key_cmp(const void *_l, const void *_r)
-+{
-+	const struct journal_key *l = _l;
-+	const struct journal_key *r = _r;
-+
-+	return  cmp_int(l->btree_id,	r->btree_id) ?:
-+		cmp_int(l->level,	r->level) ?:
-+		bkey_cmp(l->k->k.p, r->k->k.p) ?:
-+		cmp_int(l->journal_seq, r->journal_seq) ?:
-+		cmp_int(l->journal_offset, r->journal_offset);
-+}
-+
-+void bch2_journal_keys_free(struct journal_keys *keys)
-+{
-+	kvfree(keys->d);
-+	keys->d = NULL;
-+	keys->nr = 0;
-+}
-+
-+static struct journal_keys journal_keys_sort(struct list_head *journal_entries)
-+{
-+	struct journal_replay *p;
-+	struct jset_entry *entry;
-+	struct bkey_i *k, *_n;
-+	struct journal_keys keys = { NULL };
-+	struct journal_key *src, *dst;
-+	size_t nr_keys = 0;
-+
-+	if (list_empty(journal_entries))
-+		return keys;
-+
-+	keys.journal_seq_base =
-+		le64_to_cpu(list_last_entry(journal_entries,
-+				struct journal_replay, list)->j.last_seq);
-+
-+	list_for_each_entry(p, journal_entries, list) {
-+		if (le64_to_cpu(p->j.seq) < keys.journal_seq_base)
-+			continue;
-+
-+		for_each_jset_key(k, _n, entry, &p->j)
-+			nr_keys++;
-+	}
-+
-+
-+	keys.d = kvmalloc(sizeof(keys.d[0]) * nr_keys, GFP_KERNEL);
-+	if (!keys.d)
-+		goto err;
-+
-+	list_for_each_entry(p, journal_entries, list) {
-+		if (le64_to_cpu(p->j.seq) < keys.journal_seq_base)
-+			continue;
-+
-+		for_each_jset_key(k, _n, entry, &p->j)
-+			keys.d[keys.nr++] = (struct journal_key) {
-+				.btree_id	= entry->btree_id,
-+				.level		= entry->level,
-+				.k		= k,
-+				.journal_seq	= le64_to_cpu(p->j.seq) -
-+					keys.journal_seq_base,
-+				.journal_offset	= k->_data - p->j._data,
-+			};
-+	}
-+
-+	sort(keys.d, keys.nr, sizeof(keys.d[0]), journal_sort_key_cmp, NULL);
-+
-+	src = dst = keys.d;
-+	while (src < keys.d + keys.nr) {
-+		while (src + 1 < keys.d + keys.nr &&
-+		       src[0].btree_id	== src[1].btree_id &&
-+		       src[0].level	== src[1].level &&
-+		       !bkey_cmp(src[0].k->k.p, src[1].k->k.p))
-+			src++;
-+
-+		*dst++ = *src++;
-+	}
-+
-+	keys.nr = dst - keys.d;
-+err:
-+	return keys;
-+}
-+
-+/* journal replay: */
-+
-+static void replay_now_at(struct journal *j, u64 seq)
-+{
-+	BUG_ON(seq < j->replay_journal_seq);
-+	BUG_ON(seq > j->replay_journal_seq_end);
-+
-+	while (j->replay_journal_seq < seq)
-+		bch2_journal_pin_put(j, j->replay_journal_seq++);
-+}
-+
-+static int bch2_extent_replay_key(struct bch_fs *c, enum btree_id btree_id,
-+				  struct bkey_i *k)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter, *split_iter;
-+	/*
-+	 * We might cause compressed extents to be split, so we need to pass in
-+	 * a disk_reservation:
-+	 */
-+	struct disk_reservation disk_res =
-+		bch2_disk_reservation_init(c, 0);
-+	struct bkey_i *split;
-+	struct bpos atomic_end;
-+	/*
-+	 * Some extents aren't equivalent - w.r.t. what the triggers do
-+	 * - if they're split:
-+	 */
-+	bool remark_if_split = bch2_bkey_sectors_compressed(bkey_i_to_s_c(k)) ||
-+		k->k.type == KEY_TYPE_reflink_p;
-+	bool remark = false;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-+retry:
-+	bch2_trans_begin(&trans);
-+
-+	iter = bch2_trans_get_iter(&trans, btree_id,
-+				   bkey_start_pos(&k->k),
-+				   BTREE_ITER_INTENT);
-+
-+	do {
-+		ret = bch2_btree_iter_traverse(iter);
-+		if (ret)
-+			goto err;
-+
-+		atomic_end = bpos_min(k->k.p, iter->l[0].b->key.k.p);
-+
-+		split = bch2_trans_kmalloc(&trans, bkey_bytes(&k->k));
-+		ret = PTR_ERR_OR_ZERO(split);
-+		if (ret)
-+			goto err;
-+
-+		if (!remark &&
-+		    remark_if_split &&
-+		    bkey_cmp(atomic_end, k->k.p) < 0) {
-+			ret = bch2_disk_reservation_add(c, &disk_res,
-+					k->k.size *
-+					bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(k)),
-+					BCH_DISK_RESERVATION_NOFAIL);
-+			BUG_ON(ret);
-+
-+			remark = true;
-+		}
-+
-+		bkey_copy(split, k);
-+		bch2_cut_front(iter->pos, split);
-+		bch2_cut_back(atomic_end, split);
-+
-+		split_iter = bch2_trans_copy_iter(&trans, iter);
-+		ret = PTR_ERR_OR_ZERO(split_iter);
-+		if (ret)
-+			goto err;
-+
-+		/*
-+		 * It's important that we don't go through the
-+		 * extent_handle_overwrites() and extent_update_to_keys() path
-+		 * here: journal replay is supposed to treat extents like
-+		 * regular keys
-+		 */
-+		__bch2_btree_iter_set_pos(split_iter, split->k.p, false);
-+		bch2_trans_update(&trans, split_iter, split,
-+				  BTREE_TRIGGER_NORUN);
-+
-+		bch2_btree_iter_set_pos(iter, split->k.p);
-+
-+		if (remark) {
-+			ret = bch2_trans_mark_key(&trans, bkey_i_to_s_c(split),
-+						  0, split->k.size,
-+						  BTREE_TRIGGER_INSERT);
-+			if (ret)
-+				goto err;
-+		}
-+	} while (bkey_cmp(iter->pos, k->k.p) < 0);
-+
-+	if (remark) {
-+		ret = bch2_trans_mark_key(&trans, bkey_i_to_s_c(k),
-+					  0, -((s64) k->k.size),
-+					  BTREE_TRIGGER_OVERWRITE);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	ret = bch2_trans_commit(&trans, &disk_res, NULL,
-+				BTREE_INSERT_NOFAIL|
-+				BTREE_INSERT_LAZY_RW|
-+				BTREE_INSERT_JOURNAL_REPLAY);
-+err:
-+	if (ret == -EINTR)
-+		goto retry;
-+
-+	bch2_disk_reservation_put(c, &disk_res);
-+
-+	return bch2_trans_exit(&trans) ?: ret;
-+}
-+
-+static int __bch2_journal_replay_key(struct btree_trans *trans,
-+				     enum btree_id id, unsigned level,
-+				     struct bkey_i *k)
-+{
-+	struct btree_iter *iter;
-+	int ret;
-+
-+	iter = bch2_trans_get_node_iter(trans, id, k->k.p,
-+					BTREE_MAX_DEPTH, level,
-+					BTREE_ITER_INTENT);
-+	if (IS_ERR(iter))
-+		return PTR_ERR(iter);
-+
-+	/*
-+	 * iter->flags & BTREE_ITER_IS_EXTENTS triggers the update path to run
-+	 * extent_handle_overwrites() and extent_update_to_keys() - but we don't
-+	 * want that here, journal replay is supposed to treat extents like
-+	 * regular keys:
-+	 */
-+	__bch2_btree_iter_set_pos(iter, k->k.p, false);
-+
-+	ret   = bch2_btree_iter_traverse(iter) ?:
-+		bch2_trans_update(trans, iter, k, BTREE_TRIGGER_NORUN);
-+	bch2_trans_iter_put(trans, iter);
-+	return ret;
-+}
-+
-+static int bch2_journal_replay_key(struct bch_fs *c, enum btree_id id,
-+				   unsigned level, struct bkey_i *k)
-+{
-+	return bch2_trans_do(c, NULL, NULL,
-+			     BTREE_INSERT_NOFAIL|
-+			     BTREE_INSERT_LAZY_RW|
-+			     BTREE_INSERT_JOURNAL_REPLAY,
-+			     __bch2_journal_replay_key(&trans, id, level, k));
-+}
-+
-+static int __bch2_alloc_replay_key(struct btree_trans *trans, struct bkey_i *k)
-+{
-+	struct btree_iter *iter;
-+	int ret;
-+
-+	iter = bch2_trans_get_iter(trans, BTREE_ID_ALLOC, k->k.p,
-+				   BTREE_ITER_CACHED|
-+				   BTREE_ITER_CACHED_NOFILL|
-+				   BTREE_ITER_INTENT);
-+	ret =   PTR_ERR_OR_ZERO(iter) ?:
-+		bch2_trans_update(trans, iter, k, BTREE_TRIGGER_NORUN);
-+	bch2_trans_iter_put(trans, iter);
-+	return ret;
-+}
-+
-+static int bch2_alloc_replay_key(struct bch_fs *c, struct bkey_i *k)
-+{
-+	return bch2_trans_do(c, NULL, NULL,
-+			     BTREE_INSERT_NOFAIL|
-+			     BTREE_INSERT_USE_RESERVE|
-+			     BTREE_INSERT_LAZY_RW|
-+			     BTREE_INSERT_JOURNAL_REPLAY,
-+			__bch2_alloc_replay_key(&trans, k));
-+}
-+
-+static int journal_sort_seq_cmp(const void *_l, const void *_r)
-+{
-+	const struct journal_key *l = _l;
-+	const struct journal_key *r = _r;
-+
-+	return  cmp_int(r->level,	l->level) ?:
-+		cmp_int(l->journal_seq, r->journal_seq) ?:
-+		cmp_int(l->btree_id,	r->btree_id) ?:
-+		bkey_cmp(l->k->k.p,	r->k->k.p);
-+}
-+
-+static int bch2_journal_replay(struct bch_fs *c,
-+			       struct journal_keys keys)
-+{
-+	struct journal *j = &c->journal;
-+	struct journal_key *i;
-+	u64 seq;
-+	int ret;
-+
-+	sort(keys.d, keys.nr, sizeof(keys.d[0]), journal_sort_seq_cmp, NULL);
-+
-+	if (keys.nr)
-+		replay_now_at(j, keys.journal_seq_base);
-+
-+	seq = j->replay_journal_seq;
-+
-+	/*
-+	 * First replay updates to the alloc btree - these will only update the
-+	 * btree key cache:
-+	 */
-+	for_each_journal_key(keys, i) {
-+		cond_resched();
-+
-+		if (!i->level && i->btree_id == BTREE_ID_ALLOC) {
-+			j->replay_journal_seq = keys.journal_seq_base + i->journal_seq;
-+			ret = bch2_alloc_replay_key(c, i->k);
-+			if (ret)
-+				goto err;
-+		}
-+	}
-+
-+	/*
-+	 * Next replay updates to interior btree nodes:
-+	 */
-+	for_each_journal_key(keys, i) {
-+		cond_resched();
-+
-+		if (i->level) {
-+			j->replay_journal_seq = keys.journal_seq_base + i->journal_seq;
-+			ret = bch2_journal_replay_key(c, i->btree_id, i->level, i->k);
-+			if (ret)
-+				goto err;
-+		}
-+	}
-+
-+	/*
-+	 * Now that the btree is in a consistent state, we can start journal
-+	 * reclaim (which will be flushing entries from the btree key cache back
-+	 * to the btree:
-+	 */
-+	set_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags);
-+	set_bit(JOURNAL_RECLAIM_STARTED, &j->flags);
-+
-+	j->replay_journal_seq = seq;
-+
-+	/*
-+	 * Now replay leaf node updates:
-+	 */
-+	for_each_journal_key(keys, i) {
-+		cond_resched();
-+
-+		if (i->level || i->btree_id == BTREE_ID_ALLOC)
-+			continue;
-+
-+		replay_now_at(j, keys.journal_seq_base + i->journal_seq);
-+
-+		ret = i->k->k.size
-+			? bch2_extent_replay_key(c, i->btree_id, i->k)
-+			: bch2_journal_replay_key(c, i->btree_id, i->level, i->k);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	replay_now_at(j, j->replay_journal_seq_end);
-+	j->replay_journal_seq = 0;
-+
-+	bch2_journal_set_replay_done(j);
-+	bch2_journal_flush_all_pins(j);
-+	return bch2_journal_error(j);
-+err:
-+	bch_err(c, "journal replay: error %d while replaying key", ret);
-+	return ret;
-+}
-+
-+static bool journal_empty(struct list_head *journal)
-+{
-+	return list_empty(journal) ||
-+		journal_entry_empty(&list_last_entry(journal,
-+					struct journal_replay, list)->j);
-+}
-+
-+static int
-+verify_journal_entries_not_blacklisted_or_missing(struct bch_fs *c,
-+						  struct list_head *journal)
-+{
-+	struct journal_replay *i =
-+		list_last_entry(journal, struct journal_replay, list);
-+	u64 start_seq	= le64_to_cpu(i->j.last_seq);
-+	u64 end_seq	= le64_to_cpu(i->j.seq);
-+	u64 seq		= start_seq;
-+	int ret = 0;
-+
-+	list_for_each_entry(i, journal, list) {
-+		if (le64_to_cpu(i->j.seq) < start_seq)
-+			continue;
-+
-+		fsck_err_on(seq != le64_to_cpu(i->j.seq), c,
-+			"journal entries %llu-%llu missing! (replaying %llu-%llu)",
-+			seq, le64_to_cpu(i->j.seq) - 1,
-+			start_seq, end_seq);
-+
-+		seq = le64_to_cpu(i->j.seq);
-+
-+		fsck_err_on(bch2_journal_seq_is_blacklisted(c, seq, false), c,
-+			    "found blacklisted journal entry %llu", seq);
-+
-+		do {
-+			seq++;
-+		} while (bch2_journal_seq_is_blacklisted(c, seq, false));
-+	}
-+fsck_err:
-+	return ret;
-+}
-+
-+/* journal replay early: */
-+
-+static int journal_replay_entry_early(struct bch_fs *c,
-+				      struct jset_entry *entry)
-+{
-+	int ret = 0;
-+
-+	switch (entry->type) {
-+	case BCH_JSET_ENTRY_btree_root: {
-+		struct btree_root *r;
-+
-+		if (entry->btree_id >= BTREE_ID_NR) {
-+			bch_err(c, "filesystem has unknown btree type %u",
-+				entry->btree_id);
-+			return -EINVAL;
-+		}
-+
-+		r = &c->btree_roots[entry->btree_id];
-+
-+		if (entry->u64s) {
-+			r->level = entry->level;
-+			bkey_copy(&r->key, &entry->start[0]);
-+			r->error = 0;
-+		} else {
-+			r->error = -EIO;
-+		}
-+		r->alive = true;
-+		break;
-+	}
-+	case BCH_JSET_ENTRY_usage: {
-+		struct jset_entry_usage *u =
-+			container_of(entry, struct jset_entry_usage, entry);
-+
-+		switch (entry->btree_id) {
-+		case FS_USAGE_RESERVED:
-+			if (entry->level < BCH_REPLICAS_MAX)
-+				c->usage_base->persistent_reserved[entry->level] =
-+					le64_to_cpu(u->v);
-+			break;
-+		case FS_USAGE_INODES:
-+			c->usage_base->nr_inodes = le64_to_cpu(u->v);
-+			break;
-+		case FS_USAGE_KEY_VERSION:
-+			atomic64_set(&c->key_version,
-+				     le64_to_cpu(u->v));
-+			break;
-+		}
-+
-+		break;
-+	}
-+	case BCH_JSET_ENTRY_data_usage: {
-+		struct jset_entry_data_usage *u =
-+			container_of(entry, struct jset_entry_data_usage, entry);
-+		ret = bch2_replicas_set_usage(c, &u->r,
-+					      le64_to_cpu(u->v));
-+		break;
-+	}
-+	case BCH_JSET_ENTRY_blacklist: {
-+		struct jset_entry_blacklist *bl_entry =
-+			container_of(entry, struct jset_entry_blacklist, entry);
-+
-+		ret = bch2_journal_seq_blacklist_add(c,
-+				le64_to_cpu(bl_entry->seq),
-+				le64_to_cpu(bl_entry->seq) + 1);
-+		break;
-+	}
-+	case BCH_JSET_ENTRY_blacklist_v2: {
-+		struct jset_entry_blacklist_v2 *bl_entry =
-+			container_of(entry, struct jset_entry_blacklist_v2, entry);
-+
-+		ret = bch2_journal_seq_blacklist_add(c,
-+				le64_to_cpu(bl_entry->start),
-+				le64_to_cpu(bl_entry->end) + 1);
-+		break;
-+	}
-+	}
-+
-+	return ret;
-+}
-+
-+static int journal_replay_early(struct bch_fs *c,
-+				struct bch_sb_field_clean *clean,
-+				struct list_head *journal)
-+{
-+	struct jset_entry *entry;
-+	int ret;
-+
-+	if (clean) {
-+		c->bucket_clock[READ].hand = le16_to_cpu(clean->read_clock);
-+		c->bucket_clock[WRITE].hand = le16_to_cpu(clean->write_clock);
-+
-+		for (entry = clean->start;
-+		     entry != vstruct_end(&clean->field);
-+		     entry = vstruct_next(entry)) {
-+			ret = journal_replay_entry_early(c, entry);
-+			if (ret)
-+				return ret;
-+		}
-+	} else {
-+		struct journal_replay *i =
-+			list_last_entry(journal, struct journal_replay, list);
-+
-+		c->bucket_clock[READ].hand = le16_to_cpu(i->j.read_clock);
-+		c->bucket_clock[WRITE].hand = le16_to_cpu(i->j.write_clock);
-+
-+		list_for_each_entry(i, journal, list)
-+			vstruct_for_each(&i->j, entry) {
-+				ret = journal_replay_entry_early(c, entry);
-+				if (ret)
-+					return ret;
-+			}
-+	}
-+
-+	bch2_fs_usage_initialize(c);
-+
-+	return 0;
-+}
-+
-+/* sb clean section: */
-+
-+static struct bkey_i *btree_root_find(struct bch_fs *c,
-+				      struct bch_sb_field_clean *clean,
-+				      struct jset *j,
-+				      enum btree_id id, unsigned *level)
-+{
-+	struct bkey_i *k;
-+	struct jset_entry *entry, *start, *end;
-+
-+	if (clean) {
-+		start = clean->start;
-+		end = vstruct_end(&clean->field);
-+	} else {
-+		start = j->start;
-+		end = vstruct_last(j);
-+	}
-+
-+	for (entry = start; entry < end; entry = vstruct_next(entry))
-+		if (entry->type == BCH_JSET_ENTRY_btree_root &&
-+		    entry->btree_id == id)
-+			goto found;
-+
-+	return NULL;
-+found:
-+	if (!entry->u64s)
-+		return ERR_PTR(-EINVAL);
-+
-+	k = entry->start;
-+	*level = entry->level;
-+	return k;
-+}
-+
-+static int verify_superblock_clean(struct bch_fs *c,
-+				   struct bch_sb_field_clean **cleanp,
-+				   struct jset *j)
-+{
-+	unsigned i;
-+	struct bch_sb_field_clean *clean = *cleanp;
-+	int ret = 0;
-+
-+	if (!c->sb.clean || !j)
-+		return 0;
-+
-+	if (mustfix_fsck_err_on(j->seq != clean->journal_seq, c,
-+			"superblock journal seq (%llu) doesn't match journal (%llu) after clean shutdown",
-+			le64_to_cpu(clean->journal_seq),
-+			le64_to_cpu(j->seq))) {
-+		kfree(clean);
-+		*cleanp = NULL;
-+		return 0;
-+	}
-+
-+	mustfix_fsck_err_on(j->read_clock != clean->read_clock, c,
-+			"superblock read clock doesn't match journal after clean shutdown");
-+	mustfix_fsck_err_on(j->write_clock != clean->write_clock, c,
-+			"superblock read clock doesn't match journal after clean shutdown");
-+
-+	for (i = 0; i < BTREE_ID_NR; i++) {
-+		char buf1[200], buf2[200];
-+		struct bkey_i *k1, *k2;
-+		unsigned l1 = 0, l2 = 0;
-+
-+		k1 = btree_root_find(c, clean, NULL, i, &l1);
-+		k2 = btree_root_find(c, NULL, j, i, &l2);
-+
-+		if (!k1 && !k2)
-+			continue;
-+
-+		mustfix_fsck_err_on(!k1 || !k2 ||
-+				    IS_ERR(k1) ||
-+				    IS_ERR(k2) ||
-+				    k1->k.u64s != k2->k.u64s ||
-+				    memcmp(k1, k2, bkey_bytes(k1)) ||
-+				    l1 != l2, c,
-+			"superblock btree root %u doesn't match journal after clean shutdown\n"
-+			"sb:      l=%u %s\n"
-+			"journal: l=%u %s\n", i,
-+			l1, (bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(k1)), buf1),
-+			l2, (bch2_bkey_val_to_text(&PBUF(buf2), c, bkey_i_to_s_c(k2)), buf2));
-+	}
-+fsck_err:
-+	return ret;
-+}
-+
-+static struct bch_sb_field_clean *read_superblock_clean(struct bch_fs *c)
-+{
-+	struct bch_sb_field_clean *clean, *sb_clean;
-+	int ret;
-+
-+	mutex_lock(&c->sb_lock);
-+	sb_clean = bch2_sb_get_clean(c->disk_sb.sb);
-+
-+	if (fsck_err_on(!sb_clean, c,
-+			"superblock marked clean but clean section not present")) {
-+		SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
-+		c->sb.clean = false;
-+		mutex_unlock(&c->sb_lock);
-+		return NULL;
-+	}
-+
-+	clean = kmemdup(sb_clean, vstruct_bytes(&sb_clean->field),
-+			GFP_KERNEL);
-+	if (!clean) {
-+		mutex_unlock(&c->sb_lock);
-+		return ERR_PTR(-ENOMEM);
-+	}
-+
-+	if (le16_to_cpu(c->disk_sb.sb->version) <
-+	    bcachefs_metadata_version_bkey_renumber)
-+		bch2_sb_clean_renumber(clean, READ);
-+
-+	mutex_unlock(&c->sb_lock);
-+
-+	return clean;
-+fsck_err:
-+	mutex_unlock(&c->sb_lock);
-+	return ERR_PTR(ret);
-+}
-+
-+static int read_btree_roots(struct bch_fs *c)
-+{
-+	unsigned i;
-+	int ret = 0;
-+
-+	for (i = 0; i < BTREE_ID_NR; i++) {
-+		struct btree_root *r = &c->btree_roots[i];
-+
-+		if (!r->alive)
-+			continue;
-+
-+		if (i == BTREE_ID_ALLOC &&
-+		    c->opts.reconstruct_alloc) {
-+			c->sb.compat &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
-+			continue;
-+		}
-+
-+
-+		if (r->error) {
-+			__fsck_err(c, i == BTREE_ID_ALLOC
-+				   ? FSCK_CAN_IGNORE : 0,
-+				   "invalid btree root %s",
-+				   bch2_btree_ids[i]);
-+			if (i == BTREE_ID_ALLOC)
-+				c->sb.compat &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
-+		}
-+
-+		ret = bch2_btree_root_read(c, i, &r->key, r->level);
-+		if (ret) {
-+			__fsck_err(c, i == BTREE_ID_ALLOC
-+				   ? FSCK_CAN_IGNORE : 0,
-+				   "error reading btree root %s",
-+				   bch2_btree_ids[i]);
-+			if (i == BTREE_ID_ALLOC)
-+				c->sb.compat &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
-+		}
-+	}
-+
-+	for (i = 0; i < BTREE_ID_NR; i++)
-+		if (!c->btree_roots[i].b)
-+			bch2_btree_root_alloc(c, i);
-+fsck_err:
-+	return ret;
-+}
-+
-+int bch2_fs_recovery(struct bch_fs *c)
-+{
-+	const char *err = "cannot allocate memory";
-+	struct bch_sb_field_clean *clean = NULL;
-+	u64 journal_seq;
-+	bool wrote = false, write_sb = false;
-+	int ret;
-+
-+	if (c->sb.clean)
-+		clean = read_superblock_clean(c);
-+	ret = PTR_ERR_OR_ZERO(clean);
-+	if (ret)
-+		goto err;
-+
-+	if (c->sb.clean)
-+		bch_info(c, "recovering from clean shutdown, journal seq %llu",
-+			 le64_to_cpu(clean->journal_seq));
-+
-+	if (!c->replicas.entries ||
-+	    c->opts.rebuild_replicas) {
-+		bch_info(c, "building replicas info");
-+		set_bit(BCH_FS_REBUILD_REPLICAS, &c->flags);
-+	}
-+
-+	if (!c->sb.clean || c->opts.fsck || c->opts.keep_journal) {
-+		struct jset *j;
-+
-+		ret = bch2_journal_read(c, &c->journal_entries);
-+		if (ret)
-+			goto err;
-+
-+		if (mustfix_fsck_err_on(c->sb.clean && !journal_empty(&c->journal_entries), c,
-+				"filesystem marked clean but journal not empty")) {
-+			c->sb.compat &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
-+			SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
-+			c->sb.clean = false;
-+		}
-+
-+		if (!c->sb.clean && list_empty(&c->journal_entries)) {
-+			bch_err(c, "no journal entries found");
-+			ret = BCH_FSCK_REPAIR_IMPOSSIBLE;
-+			goto err;
-+		}
-+
-+		c->journal_keys = journal_keys_sort(&c->journal_entries);
-+		if (!c->journal_keys.d) {
-+			ret = -ENOMEM;
-+			goto err;
-+		}
-+
-+		j = &list_last_entry(&c->journal_entries,
-+				     struct journal_replay, list)->j;
-+
-+		ret = verify_superblock_clean(c, &clean, j);
-+		if (ret)
-+			goto err;
-+
-+		journal_seq = le64_to_cpu(j->seq) + 1;
-+	} else {
-+		journal_seq = le64_to_cpu(clean->journal_seq) + 1;
-+	}
-+
-+	if (!c->sb.clean &&
-+	    !(c->sb.features & (1ULL << BCH_FEATURE_extents_above_btree_updates))) {
-+		bch_err(c, "filesystem needs recovery from older version; run fsck from older bcachefs-tools to fix");
-+		ret = -EINVAL;
-+		goto err;
-+	}
-+
-+	ret = journal_replay_early(c, clean, &c->journal_entries);
-+	if (ret)
-+		goto err;
-+
-+	if (!c->sb.clean) {
-+		ret = bch2_journal_seq_blacklist_add(c,
-+						     journal_seq,
-+						     journal_seq + 4);
-+		if (ret) {
-+			bch_err(c, "error creating new journal seq blacklist entry");
-+			goto err;
-+		}
-+
-+		journal_seq += 4;
-+
-+		/*
-+		 * The superblock needs to be written before we do any btree
-+		 * node writes: it will be in the read_write() path
-+		 */
-+	}
-+
-+	ret = bch2_blacklist_table_initialize(c);
-+
-+	if (!list_empty(&c->journal_entries)) {
-+		ret = verify_journal_entries_not_blacklisted_or_missing(c,
-+							&c->journal_entries);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	ret = bch2_fs_journal_start(&c->journal, journal_seq,
-+				    &c->journal_entries);
-+	if (ret)
-+		goto err;
-+
-+	ret = read_btree_roots(c);
-+	if (ret)
-+		goto err;
-+
-+	bch_verbose(c, "starting alloc read");
-+	err = "error reading allocation information";
-+	ret = bch2_alloc_read(c, &c->journal_keys);
-+	if (ret)
-+		goto err;
-+	bch_verbose(c, "alloc read done");
-+
-+	bch_verbose(c, "starting stripes_read");
-+	err = "error reading stripes";
-+	ret = bch2_stripes_read(c, &c->journal_keys);
-+	if (ret)
-+		goto err;
-+	bch_verbose(c, "stripes_read done");
-+
-+	set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
-+
-+	if ((c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)) &&
-+	    !(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_METADATA))) {
-+		/*
-+		 * interior btree node updates aren't consistent with the
-+		 * journal; after an unclean shutdown we have to walk all
-+		 * pointers to metadata:
-+		 */
-+		bch_info(c, "starting metadata mark and sweep");
-+		err = "error in mark and sweep";
-+		ret = bch2_gc(c, &c->journal_keys, true, true);
-+		if (ret)
-+			goto err;
-+		bch_verbose(c, "mark and sweep done");
-+	}
-+
-+	if (c->opts.fsck ||
-+	    !(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)) ||
-+	    test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags)) {
-+		bch_info(c, "starting mark and sweep");
-+		err = "error in mark and sweep";
-+		ret = bch2_gc(c, &c->journal_keys, true, false);
-+		if (ret)
-+			goto err;
-+		bch_verbose(c, "mark and sweep done");
-+	}
-+
-+	clear_bit(BCH_FS_REBUILD_REPLICAS, &c->flags);
-+	set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
-+
-+	/*
-+	 * Skip past versions that might have possibly been used (as nonces),
-+	 * but hadn't had their pointers written:
-+	 */
-+	if (c->sb.encryption_type && !c->sb.clean)
-+		atomic64_add(1 << 16, &c->key_version);
-+
-+	if (c->opts.norecovery)
-+		goto out;
-+
-+	bch_verbose(c, "starting journal replay");
-+	err = "journal replay failed";
-+	ret = bch2_journal_replay(c, c->journal_keys);
-+	if (ret)
-+		goto err;
-+	bch_verbose(c, "journal replay done");
-+
-+	if (!c->opts.nochanges) {
-+		/*
-+		 * note that even when filesystem was clean there might be work
-+		 * to do here, if we ran gc (because of fsck) which recalculated
-+		 * oldest_gen:
-+		 */
-+		bch_verbose(c, "writing allocation info");
-+		err = "error writing out alloc info";
-+		ret = bch2_stripes_write(c, BTREE_INSERT_LAZY_RW, &wrote) ?:
-+			bch2_alloc_write(c, BTREE_INSERT_LAZY_RW, &wrote);
-+		if (ret) {
-+			bch_err(c, "error writing alloc info");
-+			goto err;
-+		}
-+		bch_verbose(c, "alloc write done");
-+
-+		set_bit(BCH_FS_ALLOC_WRITTEN, &c->flags);
-+	}
-+
-+	if (!c->sb.clean) {
-+		if (!(c->sb.features & (1 << BCH_FEATURE_atomic_nlink))) {
-+			bch_info(c, "checking inode link counts");
-+			err = "error in recovery";
-+			ret = bch2_fsck_inode_nlink(c);
-+			if (ret)
-+				goto err;
-+			bch_verbose(c, "check inodes done");
-+
-+		} else {
-+			bch_verbose(c, "checking for deleted inodes");
-+			err = "error in recovery";
-+			ret = bch2_fsck_walk_inodes_only(c);
-+			if (ret)
-+				goto err;
-+			bch_verbose(c, "check inodes done");
-+		}
-+	}
-+
-+	if (c->opts.fsck) {
-+		bch_info(c, "starting fsck");
-+		err = "error in fsck";
-+		ret = bch2_fsck_full(c);
-+		if (ret)
-+			goto err;
-+		bch_verbose(c, "fsck done");
-+	}
-+
-+	if (enabled_qtypes(c)) {
-+		bch_verbose(c, "reading quotas");
-+		ret = bch2_fs_quota_read(c);
-+		if (ret)
-+			goto err;
-+		bch_verbose(c, "quotas done");
-+	}
-+
-+	mutex_lock(&c->sb_lock);
-+	if (c->opts.version_upgrade) {
-+		if (c->sb.version < bcachefs_metadata_version_new_versioning)
-+			c->disk_sb.sb->version_min =
-+				le16_to_cpu(bcachefs_metadata_version_min);
-+		c->disk_sb.sb->version = le16_to_cpu(bcachefs_metadata_version_current);
-+		c->disk_sb.sb->features[0] |= BCH_SB_FEATURES_ALL;
-+		write_sb = true;
-+	}
-+
-+	if (!test_bit(BCH_FS_ERROR, &c->flags)) {
-+		c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_ALLOC_INFO;
-+		write_sb = true;
-+	}
-+
-+	if (c->opts.fsck &&
-+	    !test_bit(BCH_FS_ERROR, &c->flags)) {
-+		c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_atomic_nlink;
-+		SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 0);
-+		write_sb = true;
-+	}
-+
-+	if (write_sb)
-+		bch2_write_super(c);
-+	mutex_unlock(&c->sb_lock);
-+
-+	if (c->journal_seq_blacklist_table &&
-+	    c->journal_seq_blacklist_table->nr > 128)
-+		queue_work(system_long_wq, &c->journal_seq_blacklist_gc_work);
-+out:
-+	ret = 0;
-+err:
-+fsck_err:
-+	set_bit(BCH_FS_FSCK_DONE, &c->flags);
-+	bch2_flush_fsck_errs(c);
-+
-+	if (!c->opts.keep_journal) {
-+		bch2_journal_keys_free(&c->journal_keys);
-+		bch2_journal_entries_free(&c->journal_entries);
-+	}
-+	kfree(clean);
-+	if (ret)
-+		bch_err(c, "Error in recovery: %s (%i)", err, ret);
-+	else
-+		bch_verbose(c, "ret %i", ret);
-+	return ret;
-+}
-+
-+int bch2_fs_initialize(struct bch_fs *c)
-+{
-+	struct bch_inode_unpacked root_inode, lostfound_inode;
-+	struct bkey_inode_buf packed_inode;
-+	struct qstr lostfound = QSTR("lost+found");
-+	const char *err = "cannot allocate memory";
-+	struct bch_dev *ca;
-+	LIST_HEAD(journal);
-+	unsigned i;
-+	int ret;
-+
-+	bch_notice(c, "initializing new filesystem");
-+
-+	mutex_lock(&c->sb_lock);
-+	for_each_online_member(ca, c, i)
-+		bch2_mark_dev_superblock(c, ca, 0);
-+	mutex_unlock(&c->sb_lock);
-+
-+	mutex_lock(&c->sb_lock);
-+	c->disk_sb.sb->version = c->disk_sb.sb->version_min =
-+		le16_to_cpu(bcachefs_metadata_version_current);
-+	c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_atomic_nlink;
-+	c->disk_sb.sb->features[0] |= BCH_SB_FEATURES_ALL;
-+
-+	bch2_write_super(c);
-+	mutex_unlock(&c->sb_lock);
-+
-+	set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
-+	set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
-+
-+	for (i = 0; i < BTREE_ID_NR; i++)
-+		bch2_btree_root_alloc(c, i);
-+
-+	set_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags);
-+	set_bit(JOURNAL_RECLAIM_STARTED, &c->journal.flags);
-+
-+	err = "unable to allocate journal buckets";
-+	for_each_online_member(ca, c, i) {
-+		ret = bch2_dev_journal_alloc(ca);
-+		if (ret) {
-+			percpu_ref_put(&ca->io_ref);
-+			goto err;
-+		}
-+	}
-+
-+	/*
-+	 * journal_res_get() will crash if called before this has
-+	 * set up the journal.pin FIFO and journal.cur pointer:
-+	 */
-+	bch2_fs_journal_start(&c->journal, 1, &journal);
-+	bch2_journal_set_replay_done(&c->journal);
-+
-+	bch2_inode_init(c, &root_inode, 0, 0,
-+			S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0, NULL);
-+	root_inode.bi_inum = BCACHEFS_ROOT_INO;
-+	bch2_inode_pack(&packed_inode, &root_inode);
-+
-+	err = "error creating root directory";
-+	ret = bch2_btree_insert(c, BTREE_ID_INODES,
-+				&packed_inode.inode.k_i,
-+				NULL, NULL, BTREE_INSERT_LAZY_RW);
-+	if (ret)
-+		goto err;
-+
-+	bch2_inode_init_early(c, &lostfound_inode);
-+
-+	err = "error creating lost+found";
-+	ret = bch2_trans_do(c, NULL, NULL, 0,
-+		bch2_create_trans(&trans, BCACHEFS_ROOT_INO,
-+				  &root_inode, &lostfound_inode,
-+				  &lostfound,
-+				  0, 0, S_IFDIR|0700, 0,
-+				  NULL, NULL));
-+	if (ret)
-+		goto err;
-+
-+	if (enabled_qtypes(c)) {
-+		ret = bch2_fs_quota_read(c);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	err = "error writing first journal entry";
-+	ret = bch2_journal_meta(&c->journal);
-+	if (ret)
-+		goto err;
-+
-+	mutex_lock(&c->sb_lock);
-+	SET_BCH_SB_INITIALIZED(c->disk_sb.sb, true);
-+	SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
-+
-+	bch2_write_super(c);
-+	mutex_unlock(&c->sb_lock);
-+
-+	return 0;
-+err:
-+	pr_err("Error initializing new filesystem: %s (%i)", err, ret);
-+	return ret;
-+}
-diff --git a/fs/bcachefs/recovery.h b/fs/bcachefs/recovery.h
-new file mode 100644
-index 000000000000..a66827c9addf
---- /dev/null
-+++ b/fs/bcachefs/recovery.h
-@@ -0,0 +1,60 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_RECOVERY_H
-+#define _BCACHEFS_RECOVERY_H
-+
-+#define for_each_journal_key(keys, i)				\
-+	for (i = (keys).d; i < (keys).d + (keys).nr; (i)++)
-+
-+struct journal_iter {
-+	enum btree_id		btree_id;
-+	unsigned		level;
-+	struct journal_keys	*keys;
-+	struct journal_key	*k;
-+};
-+
-+/*
-+ * Iterate over keys in the btree, with keys from the journal overlaid on top:
-+ */
-+
-+struct btree_and_journal_iter {
-+	struct btree_iter	*btree;
-+
-+	struct btree		*b;
-+	struct btree_node_iter	node_iter;
-+	struct bkey		unpacked;
-+
-+	struct journal_iter	journal;
-+
-+	enum last_key_returned {
-+		none,
-+		btree,
-+		journal,
-+	}			last;
-+};
-+
-+void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *);
-+struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *);
-+struct bkey_s_c bch2_btree_and_journal_iter_next(struct btree_and_journal_iter *);
-+
-+void bch2_btree_and_journal_iter_init(struct btree_and_journal_iter *,
-+				      struct btree_trans *,
-+				      struct journal_keys *,
-+				      enum btree_id, struct bpos);
-+void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
-+						struct journal_keys *,
-+						struct btree *);
-+
-+typedef int (*btree_walk_node_fn)(struct bch_fs *c, struct btree *b);
-+typedef int (*btree_walk_key_fn)(struct bch_fs *c, enum btree_id id,
-+				 unsigned level, struct bkey_s_c k);
-+
-+int bch2_btree_and_journal_walk(struct bch_fs *, struct journal_keys *, enum btree_id,
-+				btree_walk_node_fn, btree_walk_key_fn);
-+
-+void bch2_journal_keys_free(struct journal_keys *);
-+void bch2_journal_entries_free(struct list_head *);
-+
-+int bch2_fs_recovery(struct bch_fs *);
-+int bch2_fs_initialize(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_RECOVERY_H */
-diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c
-new file mode 100644
-index 000000000000..3c473f1380a6
---- /dev/null
-+++ b/fs/bcachefs/reflink.c
-@@ -0,0 +1,303 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "bkey_on_stack.h"
-+#include "btree_update.h"
-+#include "extents.h"
-+#include "inode.h"
-+#include "io.h"
-+#include "reflink.h"
-+
-+#include <linux/sched/signal.h>
-+
-+/* reflink pointers */
-+
-+const char *bch2_reflink_p_invalid(const struct bch_fs *c, struct bkey_s_c k)
-+{
-+	struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
-+
-+	if (bkey_val_bytes(p.k) != sizeof(*p.v))
-+		return "incorrect value size";
-+
-+	return NULL;
-+}
-+
-+void bch2_reflink_p_to_text(struct printbuf *out, struct bch_fs *c,
-+			    struct bkey_s_c k)
-+{
-+	struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
-+
-+	pr_buf(out, "idx %llu", le64_to_cpu(p.v->idx));
-+}
-+
-+enum merge_result bch2_reflink_p_merge(struct bch_fs *c,
-+				       struct bkey_s _l, struct bkey_s _r)
-+{
-+	struct bkey_s_reflink_p l = bkey_s_to_reflink_p(_l);
-+	struct bkey_s_reflink_p r = bkey_s_to_reflink_p(_r);
-+
-+	if (le64_to_cpu(l.v->idx) + l.k->size != le64_to_cpu(r.v->idx))
-+		return BCH_MERGE_NOMERGE;
-+
-+	if ((u64) l.k->size + r.k->size > KEY_SIZE_MAX) {
-+		bch2_key_resize(l.k, KEY_SIZE_MAX);
-+		bch2_cut_front_s(l.k->p, _r);
-+		return BCH_MERGE_PARTIAL;
-+	}
-+
-+	bch2_key_resize(l.k, l.k->size + r.k->size);
-+
-+	return BCH_MERGE_MERGE;
-+}
-+
-+/* indirect extents */
-+
-+const char *bch2_reflink_v_invalid(const struct bch_fs *c, struct bkey_s_c k)
-+{
-+	struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(k);
-+
-+	if (bkey_val_bytes(r.k) < sizeof(*r.v))
-+		return "incorrect value size";
-+
-+	return bch2_bkey_ptrs_invalid(c, k);
-+}
-+
-+void bch2_reflink_v_to_text(struct printbuf *out, struct bch_fs *c,
-+			    struct bkey_s_c k)
-+{
-+	struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(k);
-+
-+	pr_buf(out, "refcount: %llu ", le64_to_cpu(r.v->refcount));
-+
-+	bch2_bkey_ptrs_to_text(out, c, k);
-+}
-+
-+static int bch2_make_extent_indirect(struct btree_trans *trans,
-+				     struct btree_iter *extent_iter,
-+				     struct bkey_i_extent *e)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct btree_iter *reflink_iter;
-+	struct bkey_s_c k;
-+	struct bkey_i_reflink_v *r_v;
-+	struct bkey_i_reflink_p *r_p;
-+	int ret;
-+
-+	for_each_btree_key(trans, reflink_iter, BTREE_ID_REFLINK,
-+			   POS(0, c->reflink_hint),
-+			   BTREE_ITER_INTENT|BTREE_ITER_SLOTS, k, ret) {
-+		if (reflink_iter->pos.inode) {
-+			bch2_btree_iter_set_pos(reflink_iter, POS_MIN);
-+			continue;
-+		}
-+
-+		if (bkey_deleted(k.k) && e->k.size <= k.k->size)
-+			break;
-+	}
-+
-+	if (ret)
-+		goto err;
-+
-+	/* rewind iter to start of hole, if necessary: */
-+	bch2_btree_iter_set_pos(reflink_iter, bkey_start_pos(k.k));
-+
-+	r_v = bch2_trans_kmalloc(trans, sizeof(*r_v) + bkey_val_bytes(&e->k));
-+	ret = PTR_ERR_OR_ZERO(r_v);
-+	if (ret)
-+		goto err;
-+
-+	bkey_reflink_v_init(&r_v->k_i);
-+	r_v->k.p	= reflink_iter->pos;
-+	bch2_key_resize(&r_v->k, e->k.size);
-+	r_v->k.version	= e->k.version;
-+
-+	set_bkey_val_u64s(&r_v->k, bkey_val_u64s(&r_v->k) +
-+			  bkey_val_u64s(&e->k));
-+	r_v->v.refcount	= 0;
-+	memcpy(r_v->v.start, e->v.start, bkey_val_bytes(&e->k));
-+
-+	bch2_trans_update(trans, reflink_iter, &r_v->k_i, 0);
-+
-+	r_p = bch2_trans_kmalloc(trans, sizeof(*r_p));
-+	if (IS_ERR(r_p))
-+		return PTR_ERR(r_p);
-+
-+	e->k.type = KEY_TYPE_reflink_p;
-+	r_p = bkey_i_to_reflink_p(&e->k_i);
-+	set_bkey_val_bytes(&r_p->k, sizeof(r_p->v));
-+	r_p->v.idx = cpu_to_le64(bkey_start_offset(&r_v->k));
-+
-+	bch2_trans_update(trans, extent_iter, &r_p->k_i, 0);
-+err:
-+	if (!IS_ERR(reflink_iter))
-+		c->reflink_hint = reflink_iter->pos.offset;
-+	bch2_trans_iter_put(trans, reflink_iter);
-+
-+	return ret;
-+}
-+
-+static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end)
-+{
-+	struct bkey_s_c k = bch2_btree_iter_peek(iter);
-+	int ret;
-+
-+	for_each_btree_key_continue(iter, 0, k, ret) {
-+		if (bkey_cmp(iter->pos, end) >= 0)
-+			return bkey_s_c_null;
-+
-+		if (k.k->type == KEY_TYPE_extent ||
-+		    k.k->type == KEY_TYPE_reflink_p)
-+			break;
-+	}
-+
-+	return k;
-+}
-+
-+s64 bch2_remap_range(struct bch_fs *c,
-+		     struct bpos dst_start, struct bpos src_start,
-+		     u64 remap_sectors, u64 *journal_seq,
-+		     u64 new_i_size, s64 *i_sectors_delta)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *dst_iter, *src_iter;
-+	struct bkey_s_c src_k;
-+	BKEY_PADDED(k) new_dst;
-+	struct bkey_on_stack new_src;
-+	struct bpos dst_end = dst_start, src_end = src_start;
-+	struct bpos dst_want, src_want;
-+	u64 src_done, dst_done;
-+	int ret = 0, ret2 = 0;
-+
-+	if (!c->opts.reflink)
-+		return -EOPNOTSUPP;
-+
-+	if (!percpu_ref_tryget(&c->writes))
-+		return -EROFS;
-+
-+	bch2_check_set_feature(c, BCH_FEATURE_reflink);
-+
-+	dst_end.offset += remap_sectors;
-+	src_end.offset += remap_sectors;
-+
-+	bkey_on_stack_init(&new_src);
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 4096);
-+
-+	src_iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, src_start,
-+				       BTREE_ITER_INTENT);
-+	dst_iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, dst_start,
-+				       BTREE_ITER_INTENT);
-+
-+	while (1) {
-+		bch2_trans_begin(&trans);
-+
-+		trans.mem_top = 0;
-+
-+		if (fatal_signal_pending(current)) {
-+			ret = -EINTR;
-+			goto err;
-+		}
-+
-+		src_k = get_next_src(src_iter, src_end);
-+		ret = bkey_err(src_k);
-+		if (ret)
-+			goto btree_err;
-+
-+		src_done = bpos_min(src_iter->pos, src_end).offset -
-+			src_start.offset;
-+		dst_want = POS(dst_start.inode, dst_start.offset + src_done);
-+
-+		if (bkey_cmp(dst_iter->pos, dst_want) < 0) {
-+			ret = bch2_fpunch_at(&trans, dst_iter, dst_want,
-+					     journal_seq, i_sectors_delta);
-+			if (ret)
-+				goto btree_err;
-+			continue;
-+		}
-+
-+		BUG_ON(bkey_cmp(dst_iter->pos, dst_want));
-+
-+		if (!bkey_cmp(dst_iter->pos, dst_end))
-+			break;
-+
-+		if (src_k.k->type == KEY_TYPE_extent) {
-+			bkey_on_stack_reassemble(&new_src, c, src_k);
-+			src_k = bkey_i_to_s_c(new_src.k);
-+
-+			bch2_cut_front(src_iter->pos,	new_src.k);
-+			bch2_cut_back(src_end,		new_src.k);
-+
-+			ret = bch2_make_extent_indirect(&trans, src_iter,
-+						bkey_i_to_extent(new_src.k));
-+			if (ret)
-+				goto btree_err;
-+
-+			BUG_ON(src_k.k->type != KEY_TYPE_reflink_p);
-+		}
-+
-+		if (src_k.k->type == KEY_TYPE_reflink_p) {
-+			struct bkey_s_c_reflink_p src_p =
-+				bkey_s_c_to_reflink_p(src_k);
-+			struct bkey_i_reflink_p *dst_p =
-+				bkey_reflink_p_init(&new_dst.k);
-+
-+			u64 offset = le64_to_cpu(src_p.v->idx) +
-+				(src_iter->pos.offset -
-+				 bkey_start_offset(src_k.k));
-+
-+			dst_p->v.idx = cpu_to_le64(offset);
-+		} else {
-+			BUG();
-+		}
-+
-+		new_dst.k.k.p = dst_iter->pos;
-+		bch2_key_resize(&new_dst.k.k,
-+				min(src_k.k->p.offset - src_iter->pos.offset,
-+				    dst_end.offset - dst_iter->pos.offset));
-+
-+		ret = bch2_extent_update(&trans, dst_iter, &new_dst.k,
-+					 NULL, journal_seq,
-+					 new_i_size, i_sectors_delta);
-+		if (ret)
-+			goto btree_err;
-+
-+		dst_done = dst_iter->pos.offset - dst_start.offset;
-+		src_want = POS(src_start.inode, src_start.offset + dst_done);
-+		bch2_btree_iter_set_pos(src_iter, src_want);
-+btree_err:
-+		if (ret == -EINTR)
-+			ret = 0;
-+		if (ret)
-+			goto err;
-+	}
-+
-+	BUG_ON(bkey_cmp(dst_iter->pos, dst_end));
-+err:
-+	BUG_ON(bkey_cmp(dst_iter->pos, dst_end) > 0);
-+
-+	dst_done = dst_iter->pos.offset - dst_start.offset;
-+	new_i_size = min(dst_iter->pos.offset << 9, new_i_size);
-+
-+	bch2_trans_begin(&trans);
-+
-+	do {
-+		struct bch_inode_unpacked inode_u;
-+		struct btree_iter *inode_iter;
-+
-+		inode_iter = bch2_inode_peek(&trans, &inode_u,
-+				dst_start.inode, BTREE_ITER_INTENT);
-+		ret2 = PTR_ERR_OR_ZERO(inode_iter);
-+
-+		if (!ret2 &&
-+		    inode_u.bi_size < new_i_size) {
-+			inode_u.bi_size = new_i_size;
-+			ret2  = bch2_inode_write(&trans, inode_iter, &inode_u) ?:
-+				bch2_trans_commit(&trans, NULL, journal_seq, 0);
-+		}
-+	} while (ret2 == -EINTR);
-+
-+	ret = bch2_trans_exit(&trans) ?: ret;
-+	bkey_on_stack_exit(&new_src, c);
-+
-+	percpu_ref_put(&c->writes);
-+
-+	return dst_done ?: ret ?: ret2;
-+}
-diff --git a/fs/bcachefs/reflink.h b/fs/bcachefs/reflink.h
-new file mode 100644
-index 000000000000..5445c1cf0797
---- /dev/null
-+++ b/fs/bcachefs/reflink.h
-@@ -0,0 +1,31 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_REFLINK_H
-+#define _BCACHEFS_REFLINK_H
-+
-+const char *bch2_reflink_p_invalid(const struct bch_fs *, struct bkey_s_c);
-+void bch2_reflink_p_to_text(struct printbuf *, struct bch_fs *,
-+			    struct bkey_s_c);
-+enum merge_result bch2_reflink_p_merge(struct bch_fs *,
-+				       struct bkey_s, struct bkey_s);
-+
-+#define bch2_bkey_ops_reflink_p (struct bkey_ops) {		\
-+	.key_invalid	= bch2_reflink_p_invalid,		\
-+	.val_to_text	= bch2_reflink_p_to_text,		\
-+	.key_merge	= bch2_reflink_p_merge,		\
-+}
-+
-+const char *bch2_reflink_v_invalid(const struct bch_fs *, struct bkey_s_c);
-+void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *,
-+			    struct bkey_s_c);
-+
-+
-+#define bch2_bkey_ops_reflink_v (struct bkey_ops) {		\
-+	.key_invalid	= bch2_reflink_v_invalid,		\
-+	.val_to_text	= bch2_reflink_v_to_text,		\
-+	.swab		= bch2_ptr_swab,			\
-+}
-+
-+s64 bch2_remap_range(struct bch_fs *, struct bpos, struct bpos,
-+		     u64, u64 *, u64, s64 *);
-+
-+#endif /* _BCACHEFS_REFLINK_H */
-diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c
-new file mode 100644
-index 000000000000..6b6506c68609
---- /dev/null
-+++ b/fs/bcachefs/replicas.c
-@@ -0,0 +1,1059 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "buckets.h"
-+#include "journal.h"
-+#include "replicas.h"
-+#include "super-io.h"
-+
-+static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *,
-+					    struct bch_replicas_cpu *);
-+
-+/* Replicas tracking - in memory: */
-+
-+static inline int u8_cmp(u8 l, u8 r)
-+{
-+	return cmp_int(l, r);
-+}
-+
-+static void verify_replicas_entry(struct bch_replicas_entry *e)
-+{
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	unsigned i;
-+
-+	BUG_ON(e->data_type >= BCH_DATA_NR);
-+	BUG_ON(!e->nr_devs);
-+	BUG_ON(e->nr_required > 1 &&
-+	       e->nr_required >= e->nr_devs);
-+
-+	for (i = 0; i + 1 < e->nr_devs; i++)
-+		BUG_ON(e->devs[i] >= e->devs[i + 1]);
-+#endif
-+}
-+
-+static void replicas_entry_sort(struct bch_replicas_entry *e)
-+{
-+	bubble_sort(e->devs, e->nr_devs, u8_cmp);
-+}
-+
-+static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r)
-+{
-+	eytzinger0_sort(r->entries, r->nr, r->entry_size, memcmp, NULL);
-+}
-+
-+void bch2_replicas_entry_to_text(struct printbuf *out,
-+				 struct bch_replicas_entry *e)
-+{
-+	unsigned i;
-+
-+	pr_buf(out, "%s: %u/%u [",
-+	       bch2_data_types[e->data_type],
-+	       e->nr_required,
-+	       e->nr_devs);
-+
-+	for (i = 0; i < e->nr_devs; i++)
-+		pr_buf(out, i ? " %u" : "%u", e->devs[i]);
-+	pr_buf(out, "]");
-+}
-+
-+void bch2_cpu_replicas_to_text(struct printbuf *out,
-+			      struct bch_replicas_cpu *r)
-+{
-+	struct bch_replicas_entry *e;
-+	bool first = true;
-+
-+	for_each_cpu_replicas_entry(r, e) {
-+		if (!first)
-+			pr_buf(out, " ");
-+		first = false;
-+
-+		bch2_replicas_entry_to_text(out, e);
-+	}
-+}
-+
-+static void extent_to_replicas(struct bkey_s_c k,
-+			       struct bch_replicas_entry *r)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const union bch_extent_entry *entry;
-+	struct extent_ptr_decoded p;
-+
-+	r->nr_required	= 1;
-+
-+	bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
-+		if (p.ptr.cached)
-+			continue;
-+
-+		if (!p.has_ec)
-+			r->devs[r->nr_devs++] = p.ptr.dev;
-+		else
-+			r->nr_required = 0;
-+	}
-+}
-+
-+static void stripe_to_replicas(struct bkey_s_c k,
-+			       struct bch_replicas_entry *r)
-+{
-+	struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
-+	const struct bch_extent_ptr *ptr;
-+
-+	r->nr_required	= s.v->nr_blocks - s.v->nr_redundant;
-+
-+	for (ptr = s.v->ptrs;
-+	     ptr < s.v->ptrs + s.v->nr_blocks;
-+	     ptr++)
-+		r->devs[r->nr_devs++] = ptr->dev;
-+}
-+
-+void bch2_bkey_to_replicas(struct bch_replicas_entry *e,
-+			   struct bkey_s_c k)
-+{
-+	e->nr_devs = 0;
-+
-+	switch (k.k->type) {
-+	case KEY_TYPE_btree_ptr:
-+	case KEY_TYPE_btree_ptr_v2:
-+		e->data_type = BCH_DATA_btree;
-+		extent_to_replicas(k, e);
-+		break;
-+	case KEY_TYPE_extent:
-+	case KEY_TYPE_reflink_v:
-+		e->data_type = BCH_DATA_user;
-+		extent_to_replicas(k, e);
-+		break;
-+	case KEY_TYPE_stripe:
-+		e->data_type = BCH_DATA_user;
-+		stripe_to_replicas(k, e);
-+		break;
-+	}
-+
-+	replicas_entry_sort(e);
-+}
-+
-+void bch2_devlist_to_replicas(struct bch_replicas_entry *e,
-+			      enum bch_data_type data_type,
-+			      struct bch_devs_list devs)
-+{
-+	unsigned i;
-+
-+	BUG_ON(!data_type ||
-+	       data_type == BCH_DATA_sb ||
-+	       data_type >= BCH_DATA_NR);
-+
-+	e->data_type	= data_type;
-+	e->nr_devs	= 0;
-+	e->nr_required	= 1;
-+
-+	for (i = 0; i < devs.nr; i++)
-+		e->devs[e->nr_devs++] = devs.devs[i];
-+
-+	replicas_entry_sort(e);
-+}
-+
-+static struct bch_replicas_cpu
-+cpu_replicas_add_entry(struct bch_replicas_cpu *old,
-+		       struct bch_replicas_entry *new_entry)
-+{
-+	unsigned i;
-+	struct bch_replicas_cpu new = {
-+		.nr		= old->nr + 1,
-+		.entry_size	= max_t(unsigned, old->entry_size,
-+					replicas_entry_bytes(new_entry)),
-+	};
-+
-+	BUG_ON(!new_entry->data_type);
-+	verify_replicas_entry(new_entry);
-+
-+	new.entries = kcalloc(new.nr, new.entry_size, GFP_NOIO);
-+	if (!new.entries)
-+		return new;
-+
-+	for (i = 0; i < old->nr; i++)
-+		memcpy(cpu_replicas_entry(&new, i),
-+		       cpu_replicas_entry(old, i),
-+		       old->entry_size);
-+
-+	memcpy(cpu_replicas_entry(&new, old->nr),
-+	       new_entry,
-+	       replicas_entry_bytes(new_entry));
-+
-+	bch2_cpu_replicas_sort(&new);
-+	return new;
-+}
-+
-+static inline int __replicas_entry_idx(struct bch_replicas_cpu *r,
-+				       struct bch_replicas_entry *search)
-+{
-+	int idx, entry_size = replicas_entry_bytes(search);
-+
-+	if (unlikely(entry_size > r->entry_size))
-+		return -1;
-+
-+	verify_replicas_entry(search);
-+
-+#define entry_cmp(_l, _r, size)	memcmp(_l, _r, entry_size)
-+	idx = eytzinger0_find(r->entries, r->nr, r->entry_size,
-+			      entry_cmp, search);
-+#undef entry_cmp
-+
-+	return idx < r->nr ? idx : -1;
-+}
-+
-+int bch2_replicas_entry_idx(struct bch_fs *c,
-+			    struct bch_replicas_entry *search)
-+{
-+	replicas_entry_sort(search);
-+
-+	return __replicas_entry_idx(&c->replicas, search);
-+}
-+
-+static bool __replicas_has_entry(struct bch_replicas_cpu *r,
-+				 struct bch_replicas_entry *search)
-+{
-+	return __replicas_entry_idx(r, search) >= 0;
-+}
-+
-+bool bch2_replicas_marked(struct bch_fs *c,
-+			  struct bch_replicas_entry *search)
-+{
-+	bool marked;
-+
-+	if (!search->nr_devs)
-+		return true;
-+
-+	verify_replicas_entry(search);
-+
-+	percpu_down_read(&c->mark_lock);
-+	marked = __replicas_has_entry(&c->replicas, search) &&
-+		(likely((!c->replicas_gc.entries)) ||
-+		 __replicas_has_entry(&c->replicas_gc, search));
-+	percpu_up_read(&c->mark_lock);
-+
-+	return marked;
-+}
-+
-+static void __replicas_table_update(struct bch_fs_usage *dst,
-+				    struct bch_replicas_cpu *dst_r,
-+				    struct bch_fs_usage *src,
-+				    struct bch_replicas_cpu *src_r)
-+{
-+	int src_idx, dst_idx;
-+
-+	*dst = *src;
-+
-+	for (src_idx = 0; src_idx < src_r->nr; src_idx++) {
-+		if (!src->replicas[src_idx])
-+			continue;
-+
-+		dst_idx = __replicas_entry_idx(dst_r,
-+				cpu_replicas_entry(src_r, src_idx));
-+		BUG_ON(dst_idx < 0);
-+
-+		dst->replicas[dst_idx] = src->replicas[src_idx];
-+	}
-+}
-+
-+static void __replicas_table_update_pcpu(struct bch_fs_usage __percpu *dst_p,
-+				    struct bch_replicas_cpu *dst_r,
-+				    struct bch_fs_usage __percpu *src_p,
-+				    struct bch_replicas_cpu *src_r)
-+{
-+	unsigned src_nr = sizeof(struct bch_fs_usage) / sizeof(u64) + src_r->nr;
-+	struct bch_fs_usage *dst, *src = (void *)
-+		bch2_acc_percpu_u64s((void *) src_p, src_nr);
-+
-+	preempt_disable();
-+	dst = this_cpu_ptr(dst_p);
-+	preempt_enable();
-+
-+	__replicas_table_update(dst, dst_r, src, src_r);
-+}
-+
-+/*
-+ * Resize filesystem accounting:
-+ */
-+static int replicas_table_update(struct bch_fs *c,
-+				 struct bch_replicas_cpu *new_r)
-+{
-+	struct bch_fs_usage __percpu *new_usage[2] = { NULL, NULL };
-+	struct bch_fs_usage *new_scratch = NULL;
-+	struct bch_fs_usage __percpu *new_gc = NULL;
-+	struct bch_fs_usage *new_base = NULL;
-+	unsigned bytes = sizeof(struct bch_fs_usage) +
-+		sizeof(u64) * new_r->nr;
-+	int ret = -ENOMEM;
-+
-+	if (!(new_base = kzalloc(bytes, GFP_NOIO)) ||
-+	    !(new_usage[0] = __alloc_percpu_gfp(bytes, sizeof(u64),
-+						GFP_NOIO)) ||
-+	    !(new_usage[1] = __alloc_percpu_gfp(bytes, sizeof(u64),
-+						GFP_NOIO)) ||
-+	    !(new_scratch  = kmalloc(bytes, GFP_NOIO)) ||
-+	    (c->usage_gc &&
-+	     !(new_gc = __alloc_percpu_gfp(bytes, sizeof(u64), GFP_NOIO)))) {
-+		bch_err(c, "error updating replicas table: memory allocation failure");
-+		goto err;
-+	}
-+
-+	if (c->usage_base)
-+		__replicas_table_update(new_base,		new_r,
-+					c->usage_base,		&c->replicas);
-+	if (c->usage[0])
-+		__replicas_table_update_pcpu(new_usage[0],	new_r,
-+					     c->usage[0],	&c->replicas);
-+	if (c->usage[1])
-+		__replicas_table_update_pcpu(new_usage[1],	new_r,
-+					     c->usage[1],	&c->replicas);
-+	if (c->usage_gc)
-+		__replicas_table_update_pcpu(new_gc,		new_r,
-+					     c->usage_gc,	&c->replicas);
-+
-+	swap(c->usage_base,	new_base);
-+	swap(c->usage[0],	new_usage[0]);
-+	swap(c->usage[1],	new_usage[1]);
-+	swap(c->usage_scratch,	new_scratch);
-+	swap(c->usage_gc,	new_gc);
-+	swap(c->replicas,	*new_r);
-+	ret = 0;
-+err:
-+	free_percpu(new_gc);
-+	kfree(new_scratch);
-+	free_percpu(new_usage[1]);
-+	free_percpu(new_usage[0]);
-+	kfree(new_base);
-+	return ret;
-+}
-+
-+static unsigned reserve_journal_replicas(struct bch_fs *c,
-+				     struct bch_replicas_cpu *r)
-+{
-+	struct bch_replicas_entry *e;
-+	unsigned journal_res_u64s = 0;
-+
-+	/* nr_inodes: */
-+	journal_res_u64s +=
-+		DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64));
-+
-+	/* key_version: */
-+	journal_res_u64s +=
-+		DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64));
-+
-+	/* persistent_reserved: */
-+	journal_res_u64s +=
-+		DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64)) *
-+		BCH_REPLICAS_MAX;
-+
-+	for_each_cpu_replicas_entry(r, e)
-+		journal_res_u64s +=
-+			DIV_ROUND_UP(sizeof(struct jset_entry_data_usage) +
-+				     e->nr_devs, sizeof(u64));
-+	return journal_res_u64s;
-+}
-+
-+noinline
-+static int bch2_mark_replicas_slowpath(struct bch_fs *c,
-+				struct bch_replicas_entry *new_entry)
-+{
-+	struct bch_replicas_cpu new_r, new_gc;
-+	int ret = 0;
-+
-+	verify_replicas_entry(new_entry);
-+
-+	memset(&new_r, 0, sizeof(new_r));
-+	memset(&new_gc, 0, sizeof(new_gc));
-+
-+	mutex_lock(&c->sb_lock);
-+
-+	if (c->replicas_gc.entries &&
-+	    !__replicas_has_entry(&c->replicas_gc, new_entry)) {
-+		new_gc = cpu_replicas_add_entry(&c->replicas_gc, new_entry);
-+		if (!new_gc.entries)
-+			goto err;
-+	}
-+
-+	if (!__replicas_has_entry(&c->replicas, new_entry)) {
-+		new_r = cpu_replicas_add_entry(&c->replicas, new_entry);
-+		if (!new_r.entries)
-+			goto err;
-+
-+		ret = bch2_cpu_replicas_to_sb_replicas(c, &new_r);
-+		if (ret)
-+			goto err;
-+
-+		bch2_journal_entry_res_resize(&c->journal,
-+				&c->replicas_journal_res,
-+				reserve_journal_replicas(c, &new_r));
-+	}
-+
-+	if (!new_r.entries &&
-+	    !new_gc.entries)
-+		goto out;
-+
-+	/* allocations done, now commit: */
-+
-+	if (new_r.entries)
-+		bch2_write_super(c);
-+
-+	/* don't update in memory replicas until changes are persistent */
-+	percpu_down_write(&c->mark_lock);
-+	if (new_r.entries)
-+		ret = replicas_table_update(c, &new_r);
-+	if (new_gc.entries)
-+		swap(new_gc, c->replicas_gc);
-+	percpu_up_write(&c->mark_lock);
-+out:
-+	mutex_unlock(&c->sb_lock);
-+
-+	kfree(new_r.entries);
-+	kfree(new_gc.entries);
-+
-+	return ret;
-+err:
-+	bch_err(c, "error adding replicas entry: memory allocation failure");
-+	ret = -ENOMEM;
-+	goto out;
-+}
-+
-+static int __bch2_mark_replicas(struct bch_fs *c,
-+				struct bch_replicas_entry *r,
-+				bool check)
-+{
-+	return likely(bch2_replicas_marked(c, r))	? 0
-+		: check					? -1
-+		: bch2_mark_replicas_slowpath(c, r);
-+}
-+
-+int bch2_mark_replicas(struct bch_fs *c, struct bch_replicas_entry *r)
-+{
-+	return __bch2_mark_replicas(c, r, false);
-+}
-+
-+static int __bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k,
-+				     bool check)
-+{
-+	struct bch_replicas_padded search;
-+	struct bch_devs_list cached = bch2_bkey_cached_devs(k);
-+	unsigned i;
-+	int ret;
-+
-+	for (i = 0; i < cached.nr; i++) {
-+		bch2_replicas_entry_cached(&search.e, cached.devs[i]);
-+
-+		ret = __bch2_mark_replicas(c, &search.e, check);
-+		if (ret)
-+			return ret;
-+	}
-+
-+	bch2_bkey_to_replicas(&search.e, k);
-+
-+	return __bch2_mark_replicas(c, &search.e, check);
-+}
-+
-+bool bch2_bkey_replicas_marked(struct bch_fs *c,
-+			       struct bkey_s_c k)
-+{
-+	return __bch2_mark_bkey_replicas(c, k, true) == 0;
-+}
-+
-+int bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k)
-+{
-+	return __bch2_mark_bkey_replicas(c, k, false);
-+}
-+
-+int bch2_replicas_gc_end(struct bch_fs *c, int ret)
-+{
-+	unsigned i;
-+
-+	lockdep_assert_held(&c->replicas_gc_lock);
-+
-+	mutex_lock(&c->sb_lock);
-+	percpu_down_write(&c->mark_lock);
-+
-+	/*
-+	 * this is kind of crappy; the replicas gc mechanism needs to be ripped
-+	 * out
-+	 */
-+
-+	for (i = 0; i < c->replicas.nr; i++) {
-+		struct bch_replicas_entry *e =
-+			cpu_replicas_entry(&c->replicas, i);
-+		struct bch_replicas_cpu n;
-+
-+		if (!__replicas_has_entry(&c->replicas_gc, e) &&
-+		    (c->usage_base->replicas[i] ||
-+		     percpu_u64_get(&c->usage[0]->replicas[i]) ||
-+		     percpu_u64_get(&c->usage[1]->replicas[i]))) {
-+			n = cpu_replicas_add_entry(&c->replicas_gc, e);
-+			if (!n.entries) {
-+				ret = -ENOSPC;
-+				goto err;
-+			}
-+
-+			swap(n, c->replicas_gc);
-+			kfree(n.entries);
-+		}
-+	}
-+
-+	if (bch2_cpu_replicas_to_sb_replicas(c, &c->replicas_gc)) {
-+		ret = -ENOSPC;
-+		goto err;
-+	}
-+
-+	ret = replicas_table_update(c, &c->replicas_gc);
-+err:
-+	kfree(c->replicas_gc.entries);
-+	c->replicas_gc.entries = NULL;
-+
-+	percpu_up_write(&c->mark_lock);
-+
-+	if (!ret)
-+		bch2_write_super(c);
-+
-+	mutex_unlock(&c->sb_lock);
-+
-+	return ret;
-+}
-+
-+int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask)
-+{
-+	struct bch_replicas_entry *e;
-+	unsigned i = 0;
-+
-+	lockdep_assert_held(&c->replicas_gc_lock);
-+
-+	mutex_lock(&c->sb_lock);
-+	BUG_ON(c->replicas_gc.entries);
-+
-+	c->replicas_gc.nr		= 0;
-+	c->replicas_gc.entry_size	= 0;
-+
-+	for_each_cpu_replicas_entry(&c->replicas, e)
-+		if (!((1 << e->data_type) & typemask)) {
-+			c->replicas_gc.nr++;
-+			c->replicas_gc.entry_size =
-+				max_t(unsigned, c->replicas_gc.entry_size,
-+				      replicas_entry_bytes(e));
-+		}
-+
-+	c->replicas_gc.entries = kcalloc(c->replicas_gc.nr,
-+					 c->replicas_gc.entry_size,
-+					 GFP_NOIO);
-+	if (!c->replicas_gc.entries) {
-+		mutex_unlock(&c->sb_lock);
-+		bch_err(c, "error allocating c->replicas_gc");
-+		return -ENOMEM;
-+	}
-+
-+	for_each_cpu_replicas_entry(&c->replicas, e)
-+		if (!((1 << e->data_type) & typemask))
-+			memcpy(cpu_replicas_entry(&c->replicas_gc, i++),
-+			       e, c->replicas_gc.entry_size);
-+
-+	bch2_cpu_replicas_sort(&c->replicas_gc);
-+	mutex_unlock(&c->sb_lock);
-+
-+	return 0;
-+}
-+
-+int bch2_replicas_gc2(struct bch_fs *c)
-+{
-+	struct bch_replicas_cpu new = { 0 };
-+	unsigned i, nr;
-+	int ret = 0;
-+
-+	bch2_journal_meta(&c->journal);
-+retry:
-+	nr		= READ_ONCE(c->replicas.nr);
-+	new.entry_size	= READ_ONCE(c->replicas.entry_size);
-+	new.entries	= kcalloc(nr, new.entry_size, GFP_KERNEL);
-+	if (!new.entries) {
-+		bch_err(c, "error allocating c->replicas_gc");
-+		return -ENOMEM;
-+	}
-+
-+	mutex_lock(&c->sb_lock);
-+	percpu_down_write(&c->mark_lock);
-+
-+	if (nr			!= c->replicas.nr ||
-+	    new.entry_size	!= c->replicas.entry_size) {
-+		percpu_up_write(&c->mark_lock);
-+		mutex_unlock(&c->sb_lock);
-+		kfree(new.entries);
-+		goto retry;
-+	}
-+
-+	for (i = 0; i < c->replicas.nr; i++) {
-+		struct bch_replicas_entry *e =
-+			cpu_replicas_entry(&c->replicas, i);
-+
-+		if (e->data_type == BCH_DATA_journal ||
-+		    c->usage_base->replicas[i] ||
-+		    percpu_u64_get(&c->usage[0]->replicas[i]) ||
-+		    percpu_u64_get(&c->usage[1]->replicas[i]))
-+			memcpy(cpu_replicas_entry(&new, new.nr++),
-+			       e, new.entry_size);
-+	}
-+
-+	bch2_cpu_replicas_sort(&new);
-+
-+	if (bch2_cpu_replicas_to_sb_replicas(c, &new)) {
-+		ret = -ENOSPC;
-+		goto err;
-+	}
-+
-+	ret = replicas_table_update(c, &new);
-+err:
-+	kfree(new.entries);
-+
-+	percpu_up_write(&c->mark_lock);
-+
-+	if (!ret)
-+		bch2_write_super(c);
-+
-+	mutex_unlock(&c->sb_lock);
-+
-+	return ret;
-+}
-+
-+int bch2_replicas_set_usage(struct bch_fs *c,
-+			    struct bch_replicas_entry *r,
-+			    u64 sectors)
-+{
-+	int ret, idx = bch2_replicas_entry_idx(c, r);
-+
-+	if (idx < 0) {
-+		struct bch_replicas_cpu n;
-+
-+		n = cpu_replicas_add_entry(&c->replicas, r);
-+		if (!n.entries)
-+			return -ENOMEM;
-+
-+		ret = replicas_table_update(c, &n);
-+		if (ret)
-+			return ret;
-+
-+		kfree(n.entries);
-+
-+		idx = bch2_replicas_entry_idx(c, r);
-+		BUG_ON(ret < 0);
-+	}
-+
-+	c->usage_base->replicas[idx] = sectors;
-+
-+	return 0;
-+}
-+
-+/* Replicas tracking - superblock: */
-+
-+static int
-+__bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r,
-+				   struct bch_replicas_cpu *cpu_r)
-+{
-+	struct bch_replicas_entry *e, *dst;
-+	unsigned nr = 0, entry_size = 0, idx = 0;
-+
-+	for_each_replicas_entry(sb_r, e) {
-+		entry_size = max_t(unsigned, entry_size,
-+				   replicas_entry_bytes(e));
-+		nr++;
-+	}
-+
-+	cpu_r->entries = kcalloc(nr, entry_size, GFP_NOIO);
-+	if (!cpu_r->entries)
-+		return -ENOMEM;
-+
-+	cpu_r->nr		= nr;
-+	cpu_r->entry_size	= entry_size;
-+
-+	for_each_replicas_entry(sb_r, e) {
-+		dst = cpu_replicas_entry(cpu_r, idx++);
-+		memcpy(dst, e, replicas_entry_bytes(e));
-+		replicas_entry_sort(dst);
-+	}
-+
-+	return 0;
-+}
-+
-+static int
-+__bch2_sb_replicas_v0_to_cpu_replicas(struct bch_sb_field_replicas_v0 *sb_r,
-+				      struct bch_replicas_cpu *cpu_r)
-+{
-+	struct bch_replicas_entry_v0 *e;
-+	unsigned nr = 0, entry_size = 0, idx = 0;
-+
-+	for_each_replicas_entry(sb_r, e) {
-+		entry_size = max_t(unsigned, entry_size,
-+				   replicas_entry_bytes(e));
-+		nr++;
-+	}
-+
-+	entry_size += sizeof(struct bch_replicas_entry) -
-+		sizeof(struct bch_replicas_entry_v0);
-+
-+	cpu_r->entries = kcalloc(nr, entry_size, GFP_NOIO);
-+	if (!cpu_r->entries)
-+		return -ENOMEM;
-+
-+	cpu_r->nr		= nr;
-+	cpu_r->entry_size	= entry_size;
-+
-+	for_each_replicas_entry(sb_r, e) {
-+		struct bch_replicas_entry *dst =
-+			cpu_replicas_entry(cpu_r, idx++);
-+
-+		dst->data_type	= e->data_type;
-+		dst->nr_devs	= e->nr_devs;
-+		dst->nr_required = 1;
-+		memcpy(dst->devs, e->devs, e->nr_devs);
-+		replicas_entry_sort(dst);
-+	}
-+
-+	return 0;
-+}
-+
-+int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c)
-+{
-+	struct bch_sb_field_replicas *sb_v1;
-+	struct bch_sb_field_replicas_v0 *sb_v0;
-+	struct bch_replicas_cpu new_r = { 0, 0, NULL };
-+	int ret = 0;
-+
-+	if ((sb_v1 = bch2_sb_get_replicas(c->disk_sb.sb)))
-+		ret = __bch2_sb_replicas_to_cpu_replicas(sb_v1, &new_r);
-+	else if ((sb_v0 = bch2_sb_get_replicas_v0(c->disk_sb.sb)))
-+		ret = __bch2_sb_replicas_v0_to_cpu_replicas(sb_v0, &new_r);
-+
-+	if (ret)
-+		return -ENOMEM;
-+
-+	bch2_cpu_replicas_sort(&new_r);
-+
-+	percpu_down_write(&c->mark_lock);
-+
-+	ret = replicas_table_update(c, &new_r);
-+	percpu_up_write(&c->mark_lock);
-+
-+	kfree(new_r.entries);
-+
-+	return 0;
-+}
-+
-+static int bch2_cpu_replicas_to_sb_replicas_v0(struct bch_fs *c,
-+					       struct bch_replicas_cpu *r)
-+{
-+	struct bch_sb_field_replicas_v0 *sb_r;
-+	struct bch_replicas_entry_v0 *dst;
-+	struct bch_replicas_entry *src;
-+	size_t bytes;
-+
-+	bytes = sizeof(struct bch_sb_field_replicas);
-+
-+	for_each_cpu_replicas_entry(r, src)
-+		bytes += replicas_entry_bytes(src) - 1;
-+
-+	sb_r = bch2_sb_resize_replicas_v0(&c->disk_sb,
-+			DIV_ROUND_UP(bytes, sizeof(u64)));
-+	if (!sb_r)
-+		return -ENOSPC;
-+
-+	bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas);
-+	sb_r = bch2_sb_get_replicas_v0(c->disk_sb.sb);
-+
-+	memset(&sb_r->entries, 0,
-+	       vstruct_end(&sb_r->field) -
-+	       (void *) &sb_r->entries);
-+
-+	dst = sb_r->entries;
-+	for_each_cpu_replicas_entry(r, src) {
-+		dst->data_type	= src->data_type;
-+		dst->nr_devs	= src->nr_devs;
-+		memcpy(dst->devs, src->devs, src->nr_devs);
-+
-+		dst = replicas_entry_next(dst);
-+
-+		BUG_ON((void *) dst > vstruct_end(&sb_r->field));
-+	}
-+
-+	return 0;
-+}
-+
-+static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c,
-+					    struct bch_replicas_cpu *r)
-+{
-+	struct bch_sb_field_replicas *sb_r;
-+	struct bch_replicas_entry *dst, *src;
-+	bool need_v1 = false;
-+	size_t bytes;
-+
-+	bytes = sizeof(struct bch_sb_field_replicas);
-+
-+	for_each_cpu_replicas_entry(r, src) {
-+		bytes += replicas_entry_bytes(src);
-+		if (src->nr_required != 1)
-+			need_v1 = true;
-+	}
-+
-+	if (!need_v1)
-+		return bch2_cpu_replicas_to_sb_replicas_v0(c, r);
-+
-+	sb_r = bch2_sb_resize_replicas(&c->disk_sb,
-+			DIV_ROUND_UP(bytes, sizeof(u64)));
-+	if (!sb_r)
-+		return -ENOSPC;
-+
-+	bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas_v0);
-+	sb_r = bch2_sb_get_replicas(c->disk_sb.sb);
-+
-+	memset(&sb_r->entries, 0,
-+	       vstruct_end(&sb_r->field) -
-+	       (void *) &sb_r->entries);
-+
-+	dst = sb_r->entries;
-+	for_each_cpu_replicas_entry(r, src) {
-+		memcpy(dst, src, replicas_entry_bytes(src));
-+
-+		dst = replicas_entry_next(dst);
-+
-+		BUG_ON((void *) dst > vstruct_end(&sb_r->field));
-+	}
-+
-+	return 0;
-+}
-+
-+static const char *check_dup_replicas_entries(struct bch_replicas_cpu *cpu_r)
-+{
-+	unsigned i;
-+
-+	sort_cmp_size(cpu_r->entries,
-+		      cpu_r->nr,
-+		      cpu_r->entry_size,
-+		      memcmp, NULL);
-+
-+	for (i = 0; i + 1 < cpu_r->nr; i++) {
-+		struct bch_replicas_entry *l =
-+			cpu_replicas_entry(cpu_r, i);
-+		struct bch_replicas_entry *r =
-+			cpu_replicas_entry(cpu_r, i + 1);
-+
-+		BUG_ON(memcmp(l, r, cpu_r->entry_size) > 0);
-+
-+		if (!memcmp(l, r, cpu_r->entry_size))
-+			return "duplicate replicas entry";
-+	}
-+
-+	return NULL;
-+}
-+
-+static const char *bch2_sb_validate_replicas(struct bch_sb *sb, struct bch_sb_field *f)
-+{
-+	struct bch_sb_field_replicas *sb_r = field_to_type(f, replicas);
-+	struct bch_sb_field_members *mi = bch2_sb_get_members(sb);
-+	struct bch_replicas_cpu cpu_r = { .entries = NULL };
-+	struct bch_replicas_entry *e;
-+	const char *err;
-+	unsigned i;
-+
-+	for_each_replicas_entry(sb_r, e) {
-+		err = "invalid replicas entry: invalid data type";
-+		if (e->data_type >= BCH_DATA_NR)
-+			goto err;
-+
-+		err = "invalid replicas entry: no devices";
-+		if (!e->nr_devs)
-+			goto err;
-+
-+		err = "invalid replicas entry: bad nr_required";
-+		if (e->nr_required > 1 &&
-+		    e->nr_required >= e->nr_devs)
-+			goto err;
-+
-+		err = "invalid replicas entry: invalid device";
-+		for (i = 0; i < e->nr_devs; i++)
-+			if (!bch2_dev_exists(sb, mi, e->devs[i]))
-+				goto err;
-+	}
-+
-+	err = "cannot allocate memory";
-+	if (__bch2_sb_replicas_to_cpu_replicas(sb_r, &cpu_r))
-+		goto err;
-+
-+	err = check_dup_replicas_entries(&cpu_r);
-+err:
-+	kfree(cpu_r.entries);
-+	return err;
-+}
-+
-+static void bch2_sb_replicas_to_text(struct printbuf *out,
-+				     struct bch_sb *sb,
-+				     struct bch_sb_field *f)
-+{
-+	struct bch_sb_field_replicas *r = field_to_type(f, replicas);
-+	struct bch_replicas_entry *e;
-+	bool first = true;
-+
-+	for_each_replicas_entry(r, e) {
-+		if (!first)
-+			pr_buf(out, " ");
-+		first = false;
-+
-+		bch2_replicas_entry_to_text(out, e);
-+	}
-+}
-+
-+const struct bch_sb_field_ops bch_sb_field_ops_replicas = {
-+	.validate	= bch2_sb_validate_replicas,
-+	.to_text	= bch2_sb_replicas_to_text,
-+};
-+
-+static const char *bch2_sb_validate_replicas_v0(struct bch_sb *sb, struct bch_sb_field *f)
-+{
-+	struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0);
-+	struct bch_sb_field_members *mi = bch2_sb_get_members(sb);
-+	struct bch_replicas_cpu cpu_r = { .entries = NULL };
-+	struct bch_replicas_entry_v0 *e;
-+	const char *err;
-+	unsigned i;
-+
-+	for_each_replicas_entry_v0(sb_r, e) {
-+		err = "invalid replicas entry: invalid data type";
-+		if (e->data_type >= BCH_DATA_NR)
-+			goto err;
-+
-+		err = "invalid replicas entry: no devices";
-+		if (!e->nr_devs)
-+			goto err;
-+
-+		err = "invalid replicas entry: invalid device";
-+		for (i = 0; i < e->nr_devs; i++)
-+			if (!bch2_dev_exists(sb, mi, e->devs[i]))
-+				goto err;
-+	}
-+
-+	err = "cannot allocate memory";
-+	if (__bch2_sb_replicas_v0_to_cpu_replicas(sb_r, &cpu_r))
-+		goto err;
-+
-+	err = check_dup_replicas_entries(&cpu_r);
-+err:
-+	kfree(cpu_r.entries);
-+	return err;
-+}
-+
-+const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = {
-+	.validate	= bch2_sb_validate_replicas_v0,
-+};
-+
-+/* Query replicas: */
-+
-+struct replicas_status __bch2_replicas_status(struct bch_fs *c,
-+					      struct bch_devs_mask online_devs)
-+{
-+	struct bch_sb_field_members *mi;
-+	struct bch_replicas_entry *e;
-+	unsigned i, nr_online, nr_offline;
-+	struct replicas_status ret;
-+
-+	memset(&ret, 0, sizeof(ret));
-+
-+	for (i = 0; i < ARRAY_SIZE(ret.replicas); i++)
-+		ret.replicas[i].redundancy = INT_MAX;
-+
-+	mi = bch2_sb_get_members(c->disk_sb.sb);
-+
-+	percpu_down_read(&c->mark_lock);
-+
-+	for_each_cpu_replicas_entry(&c->replicas, e) {
-+		if (e->data_type >= ARRAY_SIZE(ret.replicas))
-+			panic("e %p data_type %u\n", e, e->data_type);
-+
-+		nr_online = nr_offline = 0;
-+
-+		for (i = 0; i < e->nr_devs; i++) {
-+			BUG_ON(!bch2_dev_exists(c->disk_sb.sb, mi,
-+						e->devs[i]));
-+
-+			if (test_bit(e->devs[i], online_devs.d))
-+				nr_online++;
-+			else
-+				nr_offline++;
-+		}
-+
-+		ret.replicas[e->data_type].redundancy =
-+			min(ret.replicas[e->data_type].redundancy,
-+			    (int) nr_online - (int) e->nr_required);
-+
-+		ret.replicas[e->data_type].nr_offline =
-+			max(ret.replicas[e->data_type].nr_offline,
-+			    nr_offline);
-+	}
-+
-+	percpu_up_read(&c->mark_lock);
-+
-+	for (i = 0; i < ARRAY_SIZE(ret.replicas); i++)
-+		if (ret.replicas[i].redundancy == INT_MAX)
-+			ret.replicas[i].redundancy = 0;
-+
-+	return ret;
-+}
-+
-+struct replicas_status bch2_replicas_status(struct bch_fs *c)
-+{
-+	return __bch2_replicas_status(c, bch2_online_devs(c));
-+}
-+
-+static bool have_enough_devs(struct replicas_status s,
-+			     enum bch_data_type type,
-+			     bool force_if_degraded,
-+			     bool force_if_lost)
-+{
-+	return (!s.replicas[type].nr_offline || force_if_degraded) &&
-+		(s.replicas[type].redundancy >= 0 || force_if_lost);
-+}
-+
-+bool bch2_have_enough_devs(struct replicas_status s, unsigned flags)
-+{
-+	return (have_enough_devs(s, BCH_DATA_journal,
-+				 flags & BCH_FORCE_IF_METADATA_DEGRADED,
-+				 flags & BCH_FORCE_IF_METADATA_LOST) &&
-+		have_enough_devs(s, BCH_DATA_btree,
-+				 flags & BCH_FORCE_IF_METADATA_DEGRADED,
-+				 flags & BCH_FORCE_IF_METADATA_LOST) &&
-+		have_enough_devs(s, BCH_DATA_user,
-+				 flags & BCH_FORCE_IF_DATA_DEGRADED,
-+				 flags & BCH_FORCE_IF_DATA_LOST));
-+}
-+
-+int bch2_replicas_online(struct bch_fs *c, bool meta)
-+{
-+	struct replicas_status s = bch2_replicas_status(c);
-+
-+	return (meta
-+		? min(s.replicas[BCH_DATA_journal].redundancy,
-+		      s.replicas[BCH_DATA_btree].redundancy)
-+		: s.replicas[BCH_DATA_user].redundancy) + 1;
-+}
-+
-+unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	struct bch_replicas_entry *e;
-+	unsigned i, ret = 0;
-+
-+	percpu_down_read(&c->mark_lock);
-+
-+	for_each_cpu_replicas_entry(&c->replicas, e)
-+		for (i = 0; i < e->nr_devs; i++)
-+			if (e->devs[i] == ca->dev_idx)
-+				ret |= 1 << e->data_type;
-+
-+	percpu_up_read(&c->mark_lock);
-+
-+	return ret;
-+}
-+
-+int bch2_fs_replicas_init(struct bch_fs *c)
-+{
-+	c->journal.entry_u64s_reserved +=
-+		reserve_journal_replicas(c, &c->replicas);
-+
-+	return replicas_table_update(c, &c->replicas);
-+}
-diff --git a/fs/bcachefs/replicas.h b/fs/bcachefs/replicas.h
-new file mode 100644
-index 000000000000..8b95164fbb56
---- /dev/null
-+++ b/fs/bcachefs/replicas.h
-@@ -0,0 +1,91 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_REPLICAS_H
-+#define _BCACHEFS_REPLICAS_H
-+
-+#include "eytzinger.h"
-+#include "replicas_types.h"
-+
-+void bch2_replicas_entry_to_text(struct printbuf *,
-+				 struct bch_replicas_entry *);
-+void bch2_cpu_replicas_to_text(struct printbuf *, struct bch_replicas_cpu *);
-+
-+static inline struct bch_replicas_entry *
-+cpu_replicas_entry(struct bch_replicas_cpu *r, unsigned i)
-+{
-+	return (void *) r->entries + r->entry_size * i;
-+}
-+
-+int bch2_replicas_entry_idx(struct bch_fs *,
-+			    struct bch_replicas_entry *);
-+
-+void bch2_devlist_to_replicas(struct bch_replicas_entry *,
-+			      enum bch_data_type,
-+			      struct bch_devs_list);
-+bool bch2_replicas_marked(struct bch_fs *, struct bch_replicas_entry *);
-+int bch2_mark_replicas(struct bch_fs *,
-+		       struct bch_replicas_entry *);
-+
-+void bch2_bkey_to_replicas(struct bch_replicas_entry *, struct bkey_s_c);
-+bool bch2_bkey_replicas_marked(struct bch_fs *, struct bkey_s_c);
-+int bch2_mark_bkey_replicas(struct bch_fs *, struct bkey_s_c);
-+
-+static inline void bch2_replicas_entry_cached(struct bch_replicas_entry *e,
-+					      unsigned dev)
-+{
-+	e->data_type	= BCH_DATA_cached;
-+	e->nr_devs	= 1;
-+	e->nr_required	= 1;
-+	e->devs[0]	= dev;
-+}
-+
-+struct replicas_status {
-+	struct {
-+		int		redundancy;
-+		unsigned	nr_offline;
-+	}			replicas[BCH_DATA_NR];
-+};
-+
-+struct replicas_status __bch2_replicas_status(struct bch_fs *,
-+					      struct bch_devs_mask);
-+struct replicas_status bch2_replicas_status(struct bch_fs *);
-+bool bch2_have_enough_devs(struct replicas_status, unsigned);
-+
-+int bch2_replicas_online(struct bch_fs *, bool);
-+unsigned bch2_dev_has_data(struct bch_fs *, struct bch_dev *);
-+
-+int bch2_replicas_gc_end(struct bch_fs *, int);
-+int bch2_replicas_gc_start(struct bch_fs *, unsigned);
-+int bch2_replicas_gc2(struct bch_fs *);
-+
-+int bch2_replicas_set_usage(struct bch_fs *,
-+			    struct bch_replicas_entry *,
-+			    u64);
-+
-+#define for_each_cpu_replicas_entry(_r, _i)				\
-+	for (_i = (_r)->entries;					\
-+	     (void *) (_i) < (void *) (_r)->entries + (_r)->nr * (_r)->entry_size;\
-+	     _i = (void *) (_i) + (_r)->entry_size)
-+
-+/* iterate over superblock replicas - used by userspace tools: */
-+
-+#define replicas_entry_next(_i)						\
-+	((typeof(_i)) ((void *) (_i) + replicas_entry_bytes(_i)))
-+
-+#define for_each_replicas_entry(_r, _i)					\
-+	for (_i = (_r)->entries;					\
-+	     (void *) (_i) < vstruct_end(&(_r)->field) && (_i)->data_type;\
-+	     (_i) = replicas_entry_next(_i))
-+
-+#define for_each_replicas_entry_v0(_r, _i)				\
-+	for (_i = (_r)->entries;					\
-+	     (void *) (_i) < vstruct_end(&(_r)->field) && (_i)->data_type;\
-+	     (_i) = replicas_entry_next(_i))
-+
-+int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *);
-+
-+extern const struct bch_sb_field_ops bch_sb_field_ops_replicas;
-+extern const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0;
-+
-+int bch2_fs_replicas_init(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_REPLICAS_H */
-diff --git a/fs/bcachefs/replicas_types.h b/fs/bcachefs/replicas_types.h
-new file mode 100644
-index 000000000000..0535b1d3760e
---- /dev/null
-+++ b/fs/bcachefs/replicas_types.h
-@@ -0,0 +1,10 @@
-+#ifndef _BCACHEFS_REPLICAS_TYPES_H
-+#define _BCACHEFS_REPLICAS_TYPES_H
-+
-+struct bch_replicas_cpu {
-+	unsigned		nr;
-+	unsigned		entry_size;
-+	struct bch_replicas_entry *entries;
-+};
-+
-+#endif /* _BCACHEFS_REPLICAS_TYPES_H */
-diff --git a/fs/bcachefs/siphash.c b/fs/bcachefs/siphash.c
-new file mode 100644
-index 000000000000..c062edb3fbc2
---- /dev/null
-+++ b/fs/bcachefs/siphash.c
-@@ -0,0 +1,173 @@
-+// SPDX-License-Identifier: BSD-3-Clause
-+/*	$OpenBSD: siphash.c,v 1.3 2015/02/20 11:51:03 tedu Exp $ */
-+
-+/*-
-+ * Copyright (c) 2013 Andre Oppermann <andre@FreeBSD.org>
-+ * All rights reserved.
-+ *
-+ * Redistribution and use in source and binary forms, with or without
-+ * modification, are permitted provided that the following conditions
-+ * are met:
-+ * 1. Redistributions of source code must retain the above copyright
-+ *    notice, this list of conditions and the following disclaimer.
-+ * 2. Redistributions in binary form must reproduce the above copyright
-+ *    notice, this list of conditions and the following disclaimer in the
-+ *    documentation and/or other materials provided with the distribution.
-+ * 3. The name of the author may not be used to endorse or promote
-+ *    products derived from this software without specific prior written
-+ *    permission.
-+ *
-+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
-+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-+ * SUCH DAMAGE.
-+ */
-+
-+/*
-+ * SipHash is a family of PRFs SipHash-c-d where the integer parameters c and d
-+ * are the number of compression rounds and the number of finalization rounds.
-+ * A compression round is identical to a finalization round and this round
-+ * function is called SipRound.  Given a 128-bit key k and a (possibly empty)
-+ * byte string m, SipHash-c-d returns a 64-bit value SipHash-c-d(k; m).
-+ *
-+ * Implemented from the paper "SipHash: a fast short-input PRF", 2012.09.18,
-+ * by Jean-Philippe Aumasson and Daniel J. Bernstein,
-+ * Permanent Document ID b9a943a805fbfc6fde808af9fc0ecdfa
-+ * https://131002.net/siphash/siphash.pdf
-+ * https://131002.net/siphash/
-+ */
-+
-+#include <asm/byteorder.h>
-+#include <asm/unaligned.h>
-+#include <linux/bitops.h>
-+#include <linux/string.h>
-+
-+#include "siphash.h"
-+
-+static void SipHash_Rounds(SIPHASH_CTX *ctx, int rounds)
-+{
-+	while (rounds--) {
-+		ctx->v[0] += ctx->v[1];
-+		ctx->v[2] += ctx->v[3];
-+		ctx->v[1] = rol64(ctx->v[1], 13);
-+		ctx->v[3] = rol64(ctx->v[3], 16);
-+
-+		ctx->v[1] ^= ctx->v[0];
-+		ctx->v[3] ^= ctx->v[2];
-+		ctx->v[0] = rol64(ctx->v[0], 32);
-+
-+		ctx->v[2] += ctx->v[1];
-+		ctx->v[0] += ctx->v[3];
-+		ctx->v[1] = rol64(ctx->v[1], 17);
-+		ctx->v[3] = rol64(ctx->v[3], 21);
-+
-+		ctx->v[1] ^= ctx->v[2];
-+		ctx->v[3] ^= ctx->v[0];
-+		ctx->v[2] = rol64(ctx->v[2], 32);
-+	}
-+}
-+
-+static void SipHash_CRounds(SIPHASH_CTX *ctx, const void *ptr, int rounds)
-+{
-+	u64 m = get_unaligned_le64(ptr);
-+
-+	ctx->v[3] ^= m;
-+	SipHash_Rounds(ctx, rounds);
-+	ctx->v[0] ^= m;
-+}
-+
-+void SipHash_Init(SIPHASH_CTX *ctx, const SIPHASH_KEY *key)
-+{
-+	u64 k0, k1;
-+
-+	k0 = le64_to_cpu(key->k0);
-+	k1 = le64_to_cpu(key->k1);
-+
-+	ctx->v[0] = 0x736f6d6570736575ULL ^ k0;
-+	ctx->v[1] = 0x646f72616e646f6dULL ^ k1;
-+	ctx->v[2] = 0x6c7967656e657261ULL ^ k0;
-+	ctx->v[3] = 0x7465646279746573ULL ^ k1;
-+
-+	memset(ctx->buf, 0, sizeof(ctx->buf));
-+	ctx->bytes = 0;
-+}
-+
-+void SipHash_Update(SIPHASH_CTX *ctx, int rc, int rf,
-+		    const void *src, size_t len)
-+{
-+	const u8 *ptr = src;
-+	size_t left, used;
-+
-+	if (len == 0)
-+		return;
-+
-+	used = ctx->bytes % sizeof(ctx->buf);
-+	ctx->bytes += len;
-+
-+	if (used > 0) {
-+		left = sizeof(ctx->buf) - used;
-+
-+		if (len >= left) {
-+			memcpy(&ctx->buf[used], ptr, left);
-+			SipHash_CRounds(ctx, ctx->buf, rc);
-+			len -= left;
-+			ptr += left;
-+		} else {
-+			memcpy(&ctx->buf[used], ptr, len);
-+			return;
-+		}
-+	}
-+
-+	while (len >= sizeof(ctx->buf)) {
-+		SipHash_CRounds(ctx, ptr, rc);
-+		len -= sizeof(ctx->buf);
-+		ptr += sizeof(ctx->buf);
-+	}
-+
-+	if (len > 0)
-+		memcpy(&ctx->buf[used], ptr, len);
-+}
-+
-+void SipHash_Final(void *dst, SIPHASH_CTX *ctx, int rc, int rf)
-+{
-+	u64 r;
-+
-+	r = SipHash_End(ctx, rc, rf);
-+
-+	*((__le64 *) dst) = cpu_to_le64(r);
-+}
-+
-+u64 SipHash_End(SIPHASH_CTX *ctx, int rc, int rf)
-+{
-+	u64 r;
-+	size_t left, used;
-+
-+	used = ctx->bytes % sizeof(ctx->buf);
-+	left = sizeof(ctx->buf) - used;
-+	memset(&ctx->buf[used], 0, left - 1);
-+	ctx->buf[7] = ctx->bytes;
-+
-+	SipHash_CRounds(ctx, ctx->buf, rc);
-+	ctx->v[2] ^= 0xff;
-+	SipHash_Rounds(ctx, rf);
-+
-+	r = (ctx->v[0] ^ ctx->v[1]) ^ (ctx->v[2] ^ ctx->v[3]);
-+	memset(ctx, 0, sizeof(*ctx));
-+	return (r);
-+}
-+
-+u64 SipHash(const SIPHASH_KEY *key, int rc, int rf, const void *src, size_t len)
-+{
-+	SIPHASH_CTX ctx;
-+
-+	SipHash_Init(&ctx, key);
-+	SipHash_Update(&ctx, rc, rf, src, len);
-+	return SipHash_End(&ctx, rc, rf);
-+}
-diff --git a/fs/bcachefs/siphash.h b/fs/bcachefs/siphash.h
-new file mode 100644
-index 000000000000..3dfaf34a43b2
---- /dev/null
-+++ b/fs/bcachefs/siphash.h
-@@ -0,0 +1,87 @@
-+/* SPDX-License-Identifier: BSD-3-Clause */
-+/* $OpenBSD: siphash.h,v 1.5 2015/02/20 11:51:03 tedu Exp $ */
-+/*-
-+ * Copyright (c) 2013 Andre Oppermann <andre@FreeBSD.org>
-+ * All rights reserved.
-+ *
-+ * Redistribution and use in source and binary forms, with or without
-+ * modification, are permitted provided that the following conditions
-+ * are met:
-+ * 1. Redistributions of source code must retain the above copyright
-+ *    notice, this list of conditions and the following disclaimer.
-+ * 2. Redistributions in binary form must reproduce the above copyright
-+ *    notice, this list of conditions and the following disclaimer in the
-+ *    documentation and/or other materials provided with the distribution.
-+ * 3. The name of the author may not be used to endorse or promote
-+ *    products derived from this software without specific prior written
-+ *    permission.
-+ *
-+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
-+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-+ * SUCH DAMAGE.
-+ *
-+ * $FreeBSD$
-+ */
-+
-+/*
-+ * SipHash is a family of pseudorandom functions (a.k.a. keyed hash functions)
-+ * optimized for speed on short messages returning a 64bit hash/digest value.
-+ *
-+ * The number of rounds is defined during the initialization:
-+ *  SipHash24_Init() for the fast and resonable strong version
-+ *  SipHash48_Init() for the strong version (half as fast)
-+ *
-+ * struct SIPHASH_CTX ctx;
-+ * SipHash24_Init(&ctx);
-+ * SipHash_SetKey(&ctx, "16bytes long key");
-+ * SipHash_Update(&ctx, pointer_to_string, length_of_string);
-+ * SipHash_Final(output, &ctx);
-+ */
-+
-+#ifndef _SIPHASH_H_
-+#define _SIPHASH_H_
-+
-+#include <linux/types.h>
-+
-+#define SIPHASH_BLOCK_LENGTH	 8
-+#define SIPHASH_KEY_LENGTH	16
-+#define SIPHASH_DIGEST_LENGTH	 8
-+
-+typedef struct _SIPHASH_CTX {
-+	u64		v[4];
-+	u8		buf[SIPHASH_BLOCK_LENGTH];
-+	u32		bytes;
-+} SIPHASH_CTX;
-+
-+typedef struct {
-+	__le64		k0;
-+	__le64		k1;
-+} SIPHASH_KEY;
-+
-+void	SipHash_Init(SIPHASH_CTX *, const SIPHASH_KEY *);
-+void	SipHash_Update(SIPHASH_CTX *, int, int, const void *, size_t);
-+u64	SipHash_End(SIPHASH_CTX *, int, int);
-+void	SipHash_Final(void *, SIPHASH_CTX *, int, int);
-+u64	SipHash(const SIPHASH_KEY *, int, int, const void *, size_t);
-+
-+#define SipHash24_Init(_c, _k)		SipHash_Init((_c), (_k))
-+#define SipHash24_Update(_c, _p, _l)	SipHash_Update((_c), 2, 4, (_p), (_l))
-+#define SipHash24_End(_d)		SipHash_End((_d), 2, 4)
-+#define SipHash24_Final(_d, _c)		SipHash_Final((_d), (_c), 2, 4)
-+#define SipHash24(_k, _p, _l)		SipHash((_k), 2, 4, (_p), (_l))
-+
-+#define SipHash48_Init(_c, _k)		SipHash_Init((_c), (_k))
-+#define SipHash48_Update(_c, _p, _l)	SipHash_Update((_c), 4, 8, (_p), (_l))
-+#define SipHash48_End(_d)		SipHash_End((_d), 4, 8)
-+#define SipHash48_Final(_d, _c)		SipHash_Final((_d), (_c), 4, 8)
-+#define SipHash48(_k, _p, _l)		SipHash((_k), 4, 8, (_p), (_l))
-+
-+#endif /* _SIPHASH_H_ */
-diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h
-new file mode 100644
-index 000000000000..dea9b7252b88
---- /dev/null
-+++ b/fs/bcachefs/str_hash.h
-@@ -0,0 +1,336 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_STR_HASH_H
-+#define _BCACHEFS_STR_HASH_H
-+
-+#include "btree_iter.h"
-+#include "btree_update.h"
-+#include "checksum.h"
-+#include "error.h"
-+#include "inode.h"
-+#include "siphash.h"
-+#include "super.h"
-+
-+#include <linux/crc32c.h>
-+#include <crypto/hash.h>
-+#include <crypto/sha.h>
-+
-+static inline enum bch_str_hash_type
-+bch2_str_hash_opt_to_type(struct bch_fs *c, enum bch_str_hash_opts opt)
-+{
-+	switch (opt) {
-+	case BCH_STR_HASH_OPT_CRC32C:
-+		return BCH_STR_HASH_CRC32C;
-+	case BCH_STR_HASH_OPT_CRC64:
-+		return BCH_STR_HASH_CRC64;
-+	case BCH_STR_HASH_OPT_SIPHASH:
-+		return c->sb.features & (1ULL << BCH_FEATURE_new_siphash)
-+			? BCH_STR_HASH_SIPHASH
-+			: BCH_STR_HASH_SIPHASH_OLD;
-+	default:
-+	     BUG();
-+	}
-+}
-+
-+struct bch_hash_info {
-+	u8			type;
-+	union {
-+		__le64		crc_key;
-+		SIPHASH_KEY	siphash_key;
-+	};
-+};
-+
-+static inline struct bch_hash_info
-+bch2_hash_info_init(struct bch_fs *c, const struct bch_inode_unpacked *bi)
-+{
-+	/* XXX ick */
-+	struct bch_hash_info info = {
-+		.type = (bi->bi_flags >> INODE_STR_HASH_OFFSET) &
-+			~(~0U << INODE_STR_HASH_BITS),
-+		.crc_key = bi->bi_hash_seed,
-+	};
-+
-+	if (unlikely(info.type == BCH_STR_HASH_SIPHASH_OLD)) {
-+		SHASH_DESC_ON_STACK(desc, c->sha256);
-+		u8 digest[SHA256_DIGEST_SIZE];
-+
-+		desc->tfm = c->sha256;
-+
-+		crypto_shash_digest(desc, (void *) &bi->bi_hash_seed,
-+				    sizeof(bi->bi_hash_seed), digest);
-+		memcpy(&info.siphash_key, digest, sizeof(info.siphash_key));
-+	}
-+
-+	return info;
-+}
-+
-+struct bch_str_hash_ctx {
-+	union {
-+		u32		crc32c;
-+		u64		crc64;
-+		SIPHASH_CTX	siphash;
-+	};
-+};
-+
-+static inline void bch2_str_hash_init(struct bch_str_hash_ctx *ctx,
-+				     const struct bch_hash_info *info)
-+{
-+	switch (info->type) {
-+	case BCH_STR_HASH_CRC32C:
-+		ctx->crc32c = crc32c(~0, &info->crc_key, sizeof(info->crc_key));
-+		break;
-+	case BCH_STR_HASH_CRC64:
-+		ctx->crc64 = crc64_be(~0, &info->crc_key, sizeof(info->crc_key));
-+		break;
-+	case BCH_STR_HASH_SIPHASH_OLD:
-+	case BCH_STR_HASH_SIPHASH:
-+		SipHash24_Init(&ctx->siphash, &info->siphash_key);
-+		break;
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static inline void bch2_str_hash_update(struct bch_str_hash_ctx *ctx,
-+				       const struct bch_hash_info *info,
-+				       const void *data, size_t len)
-+{
-+	switch (info->type) {
-+	case BCH_STR_HASH_CRC32C:
-+		ctx->crc32c = crc32c(ctx->crc32c, data, len);
-+		break;
-+	case BCH_STR_HASH_CRC64:
-+		ctx->crc64 = crc64_be(ctx->crc64, data, len);
-+		break;
-+	case BCH_STR_HASH_SIPHASH_OLD:
-+	case BCH_STR_HASH_SIPHASH:
-+		SipHash24_Update(&ctx->siphash, data, len);
-+		break;
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static inline u64 bch2_str_hash_end(struct bch_str_hash_ctx *ctx,
-+				   const struct bch_hash_info *info)
-+{
-+	switch (info->type) {
-+	case BCH_STR_HASH_CRC32C:
-+		return ctx->crc32c;
-+	case BCH_STR_HASH_CRC64:
-+		return ctx->crc64 >> 1;
-+	case BCH_STR_HASH_SIPHASH_OLD:
-+	case BCH_STR_HASH_SIPHASH:
-+		return SipHash24_End(&ctx->siphash) >> 1;
-+	default:
-+		BUG();
-+	}
-+}
-+
-+struct bch_hash_desc {
-+	enum btree_id	btree_id;
-+	u8		key_type;
-+
-+	u64		(*hash_key)(const struct bch_hash_info *, const void *);
-+	u64		(*hash_bkey)(const struct bch_hash_info *, struct bkey_s_c);
-+	bool		(*cmp_key)(struct bkey_s_c, const void *);
-+	bool		(*cmp_bkey)(struct bkey_s_c, struct bkey_s_c);
-+};
-+
-+static __always_inline struct btree_iter *
-+bch2_hash_lookup(struct btree_trans *trans,
-+		 const struct bch_hash_desc desc,
-+		 const struct bch_hash_info *info,
-+		 u64 inode, const void *key,
-+		 unsigned flags)
-+{
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	for_each_btree_key(trans, iter, desc.btree_id,
-+			   POS(inode, desc.hash_key(info, key)),
-+			   BTREE_ITER_SLOTS|flags, k, ret) {
-+		if (iter->pos.inode != inode)
-+			break;
-+
-+		if (k.k->type == desc.key_type) {
-+			if (!desc.cmp_key(k, key))
-+				return iter;
-+		} else if (k.k->type == KEY_TYPE_whiteout) {
-+			;
-+		} else {
-+			/* hole, not found */
-+			break;
-+		}
-+	}
-+	bch2_trans_iter_put(trans, iter);
-+
-+	return ERR_PTR(ret ?: -ENOENT);
-+}
-+
-+static __always_inline struct btree_iter *
-+bch2_hash_hole(struct btree_trans *trans,
-+	       const struct bch_hash_desc desc,
-+	       const struct bch_hash_info *info,
-+	       u64 inode, const void *key)
-+{
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	for_each_btree_key(trans, iter, desc.btree_id,
-+			   POS(inode, desc.hash_key(info, key)),
-+			   BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
-+		if (iter->pos.inode != inode)
-+			break;
-+
-+		if (k.k->type != desc.key_type)
-+			return iter;
-+	}
-+
-+	iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
-+	bch2_trans_iter_put(trans, iter);
-+
-+	return ERR_PTR(ret ?: -ENOSPC);
-+}
-+
-+static __always_inline
-+int bch2_hash_needs_whiteout(struct btree_trans *trans,
-+			     const struct bch_hash_desc desc,
-+			     const struct bch_hash_info *info,
-+			     struct btree_iter *start)
-+{
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	iter = bch2_trans_copy_iter(trans, start);
-+	if (IS_ERR(iter))
-+		return PTR_ERR(iter);
-+
-+	bch2_btree_iter_next_slot(iter);
-+
-+	for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, k, ret) {
-+		if (k.k->type != desc.key_type &&
-+		    k.k->type != KEY_TYPE_whiteout)
-+			break;
-+
-+		if (k.k->type == desc.key_type &&
-+		    desc.hash_bkey(info, k) <= start->pos.offset) {
-+			iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
-+			ret = 1;
-+			break;
-+		}
-+	}
-+
-+	bch2_trans_iter_put(trans, iter);
-+	return ret;
-+}
-+
-+static __always_inline
-+int bch2_hash_set(struct btree_trans *trans,
-+		  const struct bch_hash_desc desc,
-+		  const struct bch_hash_info *info,
-+		  u64 inode, struct bkey_i *insert, int flags)
-+{
-+	struct btree_iter *iter, *slot = NULL;
-+	struct bkey_s_c k;
-+	bool found = false;
-+	int ret;
-+
-+	for_each_btree_key(trans, iter, desc.btree_id,
-+			   POS(inode, desc.hash_bkey(info, bkey_i_to_s_c(insert))),
-+			   BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
-+		if (iter->pos.inode != inode)
-+			break;
-+
-+		if (k.k->type == desc.key_type) {
-+			if (!desc.cmp_bkey(k, bkey_i_to_s_c(insert)))
-+				goto found;
-+
-+			/* hash collision: */
-+			continue;
-+		}
-+
-+		if (!slot &&
-+		    !(flags & BCH_HASH_SET_MUST_REPLACE)) {
-+			slot = bch2_trans_copy_iter(trans, iter);
-+			if (IS_ERR(slot))
-+				return PTR_ERR(slot);
-+		}
-+
-+		if (k.k->type != KEY_TYPE_whiteout)
-+			goto not_found;
-+	}
-+
-+	if (!ret)
-+		ret = -ENOSPC;
-+out:
-+	bch2_trans_iter_put(trans, slot);
-+	bch2_trans_iter_put(trans, iter);
-+
-+	return ret;
-+found:
-+	found = true;
-+not_found:
-+
-+	if (!found && (flags & BCH_HASH_SET_MUST_REPLACE)) {
-+		ret = -ENOENT;
-+	} else if (found && (flags & BCH_HASH_SET_MUST_CREATE)) {
-+		ret = -EEXIST;
-+	} else {
-+		if (!found && slot)
-+			swap(iter, slot);
-+
-+		insert->k.p = iter->pos;
-+		bch2_trans_update(trans, iter, insert, 0);
-+	}
-+
-+	goto out;
-+}
-+
-+static __always_inline
-+int bch2_hash_delete_at(struct btree_trans *trans,
-+			const struct bch_hash_desc desc,
-+			const struct bch_hash_info *info,
-+			struct btree_iter *iter)
-+{
-+	struct bkey_i *delete;
-+	int ret;
-+
-+	ret = bch2_hash_needs_whiteout(trans, desc, info, iter);
-+	if (ret < 0)
-+		return ret;
-+
-+	delete = bch2_trans_kmalloc(trans, sizeof(*delete));
-+	if (IS_ERR(delete))
-+		return PTR_ERR(delete);
-+
-+	bkey_init(&delete->k);
-+	delete->k.p = iter->pos;
-+	delete->k.type = ret ? KEY_TYPE_whiteout : KEY_TYPE_deleted;
-+
-+	bch2_trans_update(trans, iter, delete, 0);
-+	return 0;
-+}
-+
-+static __always_inline
-+int bch2_hash_delete(struct btree_trans *trans,
-+		     const struct bch_hash_desc desc,
-+		     const struct bch_hash_info *info,
-+		     u64 inode, const void *key)
-+{
-+	struct btree_iter *iter;
-+	int ret;
-+
-+	iter = bch2_hash_lookup(trans, desc, info, inode, key,
-+				BTREE_ITER_INTENT);
-+	if (IS_ERR(iter))
-+		return PTR_ERR(iter);
-+
-+	ret = bch2_hash_delete_at(trans, desc, info, iter);
-+	bch2_trans_iter_put(trans, iter);
-+	return ret;
-+}
-+
-+#endif /* _BCACHEFS_STR_HASH_H */
-diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
-new file mode 100644
-index 000000000000..cee6cc938734
---- /dev/null
-+++ b/fs/bcachefs/super-io.c
-@@ -0,0 +1,1158 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "btree_update_interior.h"
-+#include "buckets.h"
-+#include "checksum.h"
-+#include "disk_groups.h"
-+#include "ec.h"
-+#include "error.h"
-+#include "io.h"
-+#include "journal.h"
-+#include "journal_seq_blacklist.h"
-+#include "replicas.h"
-+#include "quota.h"
-+#include "super-io.h"
-+#include "super.h"
-+#include "vstructs.h"
-+
-+#include <linux/backing-dev.h>
-+#include <linux/sort.h>
-+
-+const char * const bch2_sb_fields[] = {
-+#define x(name, nr)	#name,
-+	BCH_SB_FIELDS()
-+#undef x
-+	NULL
-+};
-+
-+static const char *bch2_sb_field_validate(struct bch_sb *,
-+					  struct bch_sb_field *);
-+
-+struct bch_sb_field *bch2_sb_field_get(struct bch_sb *sb,
-+				      enum bch_sb_field_type type)
-+{
-+	struct bch_sb_field *f;
-+
-+	/* XXX: need locking around superblock to access optional fields */
-+
-+	vstruct_for_each(sb, f)
-+		if (le32_to_cpu(f->type) == type)
-+			return f;
-+	return NULL;
-+}
-+
-+static struct bch_sb_field *__bch2_sb_field_resize(struct bch_sb_handle *sb,
-+						   struct bch_sb_field *f,
-+						   unsigned u64s)
-+{
-+	unsigned old_u64s = f ? le32_to_cpu(f->u64s) : 0;
-+	unsigned sb_u64s = le32_to_cpu(sb->sb->u64s) + u64s - old_u64s;
-+
-+	BUG_ON(get_order(__vstruct_bytes(struct bch_sb, sb_u64s)) >
-+	       sb->page_order);
-+
-+	if (!f && !u64s) {
-+		/* nothing to do: */
-+	} else if (!f) {
-+		f = vstruct_last(sb->sb);
-+		memset(f, 0, sizeof(u64) * u64s);
-+		f->u64s = cpu_to_le32(u64s);
-+		f->type = 0;
-+	} else {
-+		void *src, *dst;
-+
-+		src = vstruct_end(f);
-+
-+		if (u64s) {
-+			f->u64s = cpu_to_le32(u64s);
-+			dst = vstruct_end(f);
-+		} else {
-+			dst = f;
-+		}
-+
-+		memmove(dst, src, vstruct_end(sb->sb) - src);
-+
-+		if (dst > src)
-+			memset(src, 0, dst - src);
-+	}
-+
-+	sb->sb->u64s = cpu_to_le32(sb_u64s);
-+
-+	return u64s ? f : NULL;
-+}
-+
-+void bch2_sb_field_delete(struct bch_sb_handle *sb,
-+			  enum bch_sb_field_type type)
-+{
-+	struct bch_sb_field *f = bch2_sb_field_get(sb->sb, type);
-+
-+	if (f)
-+		__bch2_sb_field_resize(sb, f, 0);
-+}
-+
-+/* Superblock realloc/free: */
-+
-+void bch2_free_super(struct bch_sb_handle *sb)
-+{
-+	if (sb->bio)
-+		bio_put(sb->bio);
-+	if (!IS_ERR_OR_NULL(sb->bdev))
-+		blkdev_put(sb->bdev, sb->mode);
-+
-+	free_pages((unsigned long) sb->sb, sb->page_order);
-+	memset(sb, 0, sizeof(*sb));
-+}
-+
-+int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s)
-+{
-+	size_t new_bytes = __vstruct_bytes(struct bch_sb, u64s);
-+	unsigned order = get_order(new_bytes);
-+	struct bch_sb *new_sb;
-+	struct bio *bio;
-+
-+	if (sb->sb && sb->page_order >= order)
-+		return 0;
-+
-+	if (sb->have_layout) {
-+		u64 max_bytes = 512 << sb->sb->layout.sb_max_size_bits;
-+
-+		if (new_bytes > max_bytes) {
-+			char buf[BDEVNAME_SIZE];
-+
-+			pr_err("%s: superblock too big: want %zu but have %llu",
-+			       bdevname(sb->bdev, buf), new_bytes, max_bytes);
-+			return -ENOSPC;
-+		}
-+	}
-+
-+	if (sb->page_order >= order && sb->sb)
-+		return 0;
-+
-+	if (dynamic_fault("bcachefs:add:super_realloc"))
-+		return -ENOMEM;
-+
-+	if (sb->have_bio) {
-+		bio = bio_kmalloc(GFP_KERNEL, 1 << order);
-+		if (!bio)
-+			return -ENOMEM;
-+
-+		if (sb->bio)
-+			bio_put(sb->bio);
-+		sb->bio = bio;
-+	}
-+
-+	new_sb = (void *) __get_free_pages(GFP_NOFS|__GFP_ZERO, order);
-+	if (!new_sb)
-+		return -ENOMEM;
-+
-+	if (sb->sb)
-+		memcpy(new_sb, sb->sb, PAGE_SIZE << sb->page_order);
-+
-+	free_pages((unsigned long) sb->sb, sb->page_order);
-+	sb->sb = new_sb;
-+
-+	sb->page_order = order;
-+
-+	return 0;
-+}
-+
-+struct bch_sb_field *bch2_sb_field_resize(struct bch_sb_handle *sb,
-+					  enum bch_sb_field_type type,
-+					  unsigned u64s)
-+{
-+	struct bch_sb_field *f = bch2_sb_field_get(sb->sb, type);
-+	ssize_t old_u64s = f ? le32_to_cpu(f->u64s) : 0;
-+	ssize_t d = -old_u64s + u64s;
-+
-+	if (bch2_sb_realloc(sb, le32_to_cpu(sb->sb->u64s) + d))
-+		return NULL;
-+
-+	if (sb->fs_sb) {
-+		struct bch_fs *c = container_of(sb, struct bch_fs, disk_sb);
-+		struct bch_dev *ca;
-+		unsigned i;
-+
-+		lockdep_assert_held(&c->sb_lock);
-+
-+		/* XXX: we're not checking that offline device have enough space */
-+
-+		for_each_online_member(ca, c, i) {
-+			struct bch_sb_handle *sb = &ca->disk_sb;
-+
-+			if (bch2_sb_realloc(sb, le32_to_cpu(sb->sb->u64s) + d)) {
-+				percpu_ref_put(&ca->ref);
-+				return NULL;
-+			}
-+		}
-+	}
-+
-+	f = bch2_sb_field_get(sb->sb, type);
-+	f = __bch2_sb_field_resize(sb, f, u64s);
-+	if (f)
-+		f->type = cpu_to_le32(type);
-+	return f;
-+}
-+
-+/* Superblock validate: */
-+
-+static inline void __bch2_sb_layout_size_assert(void)
-+{
-+	BUILD_BUG_ON(sizeof(struct bch_sb_layout) != 512);
-+}
-+
-+static const char *validate_sb_layout(struct bch_sb_layout *layout)
-+{
-+	u64 offset, prev_offset, max_sectors;
-+	unsigned i;
-+
-+	if (uuid_le_cmp(layout->magic, BCACHE_MAGIC))
-+		return "Not a bcachefs superblock layout";
-+
-+	if (layout->layout_type != 0)
-+		return "Invalid superblock layout type";
-+
-+	if (!layout->nr_superblocks)
-+		return "Invalid superblock layout: no superblocks";
-+
-+	if (layout->nr_superblocks > ARRAY_SIZE(layout->sb_offset))
-+		return "Invalid superblock layout: too many superblocks";
-+
-+	max_sectors = 1 << layout->sb_max_size_bits;
-+
-+	prev_offset = le64_to_cpu(layout->sb_offset[0]);
-+
-+	for (i = 1; i < layout->nr_superblocks; i++) {
-+		offset = le64_to_cpu(layout->sb_offset[i]);
-+
-+		if (offset < prev_offset + max_sectors)
-+			return "Invalid superblock layout: superblocks overlap";
-+		prev_offset = offset;
-+	}
-+
-+	return NULL;
-+}
-+
-+const char *bch2_sb_validate(struct bch_sb_handle *disk_sb)
-+{
-+	struct bch_sb *sb = disk_sb->sb;
-+	struct bch_sb_field *f;
-+	struct bch_sb_field_members *mi;
-+	const char *err;
-+	u32 version, version_min;
-+	u16 block_size;
-+
-+	version		= le16_to_cpu(sb->version);
-+	version_min	= version >= bcachefs_metadata_version_new_versioning
-+		? le16_to_cpu(sb->version_min)
-+		: version;
-+
-+	if (version    >= bcachefs_metadata_version_max ||
-+	    version_min < bcachefs_metadata_version_min)
-+		return "Unsupported superblock version";
-+
-+	if (version_min > version)
-+		return "Bad minimum version";
-+
-+	if (sb->features[1] ||
-+	    (le64_to_cpu(sb->features[0]) & (~0ULL << BCH_FEATURE_NR)))
-+		return "Filesystem has incompatible features";
-+
-+	block_size = le16_to_cpu(sb->block_size);
-+
-+	if (!is_power_of_2(block_size) ||
-+	    block_size > PAGE_SECTORS)
-+		return "Bad block size";
-+
-+	if (bch2_is_zero(sb->user_uuid.b, sizeof(uuid_le)))
-+		return "Bad user UUID";
-+
-+	if (bch2_is_zero(sb->uuid.b, sizeof(uuid_le)))
-+		return "Bad internal UUID";
-+
-+	if (!sb->nr_devices ||
-+	    sb->nr_devices <= sb->dev_idx ||
-+	    sb->nr_devices > BCH_SB_MEMBERS_MAX)
-+		return "Bad number of member devices";
-+
-+	if (!BCH_SB_META_REPLICAS_WANT(sb) ||
-+	    BCH_SB_META_REPLICAS_WANT(sb) >= BCH_REPLICAS_MAX)
-+		return "Invalid number of metadata replicas";
-+
-+	if (!BCH_SB_META_REPLICAS_REQ(sb) ||
-+	    BCH_SB_META_REPLICAS_REQ(sb) >= BCH_REPLICAS_MAX)
-+		return "Invalid number of metadata replicas";
-+
-+	if (!BCH_SB_DATA_REPLICAS_WANT(sb) ||
-+	    BCH_SB_DATA_REPLICAS_WANT(sb) >= BCH_REPLICAS_MAX)
-+		return "Invalid number of data replicas";
-+
-+	if (!BCH_SB_DATA_REPLICAS_REQ(sb) ||
-+	    BCH_SB_DATA_REPLICAS_REQ(sb) >= BCH_REPLICAS_MAX)
-+		return "Invalid number of data replicas";
-+
-+	if (BCH_SB_META_CSUM_TYPE(sb) >= BCH_CSUM_OPT_NR)
-+		return "Invalid metadata checksum type";
-+
-+	if (BCH_SB_DATA_CSUM_TYPE(sb) >= BCH_CSUM_OPT_NR)
-+		return "Invalid metadata checksum type";
-+
-+	if (BCH_SB_COMPRESSION_TYPE(sb) >= BCH_COMPRESSION_OPT_NR)
-+		return "Invalid compression type";
-+
-+	if (!BCH_SB_BTREE_NODE_SIZE(sb))
-+		return "Btree node size not set";
-+
-+	if (!is_power_of_2(BCH_SB_BTREE_NODE_SIZE(sb)))
-+		return "Btree node size not a power of two";
-+
-+	if (BCH_SB_GC_RESERVE(sb) < 5)
-+		return "gc reserve percentage too small";
-+
-+	if (!sb->time_precision ||
-+	    le32_to_cpu(sb->time_precision) > NSEC_PER_SEC)
-+		return "invalid time precision";
-+
-+	/* validate layout */
-+	err = validate_sb_layout(&sb->layout);
-+	if (err)
-+		return err;
-+
-+	vstruct_for_each(sb, f) {
-+		if (!f->u64s)
-+			return "Invalid superblock: invalid optional field";
-+
-+		if (vstruct_next(f) > vstruct_last(sb))
-+			return "Invalid superblock: invalid optional field";
-+	}
-+
-+	/* members must be validated first: */
-+	mi = bch2_sb_get_members(sb);
-+	if (!mi)
-+		return "Invalid superblock: member info area missing";
-+
-+	err = bch2_sb_field_validate(sb, &mi->field);
-+	if (err)
-+		return err;
-+
-+	vstruct_for_each(sb, f) {
-+		if (le32_to_cpu(f->type) == BCH_SB_FIELD_members)
-+			continue;
-+
-+		err = bch2_sb_field_validate(sb, f);
-+		if (err)
-+			return err;
-+	}
-+
-+	return NULL;
-+}
-+
-+/* device open: */
-+
-+static void bch2_sb_update(struct bch_fs *c)
-+{
-+	struct bch_sb *src = c->disk_sb.sb;
-+	struct bch_sb_field_members *mi = bch2_sb_get_members(src);
-+	struct bch_dev *ca;
-+	unsigned i;
-+
-+	lockdep_assert_held(&c->sb_lock);
-+
-+	c->sb.uuid		= src->uuid;
-+	c->sb.user_uuid		= src->user_uuid;
-+	c->sb.version		= le16_to_cpu(src->version);
-+	c->sb.nr_devices	= src->nr_devices;
-+	c->sb.clean		= BCH_SB_CLEAN(src);
-+	c->sb.encryption_type	= BCH_SB_ENCRYPTION_TYPE(src);
-+	c->sb.encoded_extent_max= 1 << BCH_SB_ENCODED_EXTENT_MAX_BITS(src);
-+	c->sb.time_base_lo	= le64_to_cpu(src->time_base_lo);
-+	c->sb.time_base_hi	= le32_to_cpu(src->time_base_hi);
-+	c->sb.time_precision	= le32_to_cpu(src->time_precision);
-+	c->sb.features		= le64_to_cpu(src->features[0]);
-+	c->sb.compat		= le64_to_cpu(src->compat[0]);
-+
-+	for_each_member_device(ca, c, i)
-+		ca->mi = bch2_mi_to_cpu(mi->members + i);
-+}
-+
-+/* doesn't copy member info */
-+static void __copy_super(struct bch_sb_handle *dst_handle, struct bch_sb *src)
-+{
-+	struct bch_sb_field *src_f, *dst_f;
-+	struct bch_sb *dst = dst_handle->sb;
-+	unsigned i;
-+
-+	dst->version		= src->version;
-+	dst->version_min	= src->version_min;
-+	dst->seq		= src->seq;
-+	dst->uuid		= src->uuid;
-+	dst->user_uuid		= src->user_uuid;
-+	memcpy(dst->label,	src->label, sizeof(dst->label));
-+
-+	dst->block_size		= src->block_size;
-+	dst->nr_devices		= src->nr_devices;
-+
-+	dst->time_base_lo	= src->time_base_lo;
-+	dst->time_base_hi	= src->time_base_hi;
-+	dst->time_precision	= src->time_precision;
-+
-+	memcpy(dst->flags,	src->flags,	sizeof(dst->flags));
-+	memcpy(dst->features,	src->features,	sizeof(dst->features));
-+	memcpy(dst->compat,	src->compat,	sizeof(dst->compat));
-+
-+	for (i = 0; i < BCH_SB_FIELD_NR; i++) {
-+		if (i == BCH_SB_FIELD_journal)
-+			continue;
-+
-+		src_f = bch2_sb_field_get(src, i);
-+		dst_f = bch2_sb_field_get(dst, i);
-+		dst_f = __bch2_sb_field_resize(dst_handle, dst_f,
-+				src_f ? le32_to_cpu(src_f->u64s) : 0);
-+
-+		if (src_f)
-+			memcpy(dst_f, src_f, vstruct_bytes(src_f));
-+	}
-+}
-+
-+int bch2_sb_to_fs(struct bch_fs *c, struct bch_sb *src)
-+{
-+	struct bch_sb_field_journal *journal_buckets =
-+		bch2_sb_get_journal(src);
-+	unsigned journal_u64s = journal_buckets
-+		? le32_to_cpu(journal_buckets->field.u64s)
-+		: 0;
-+	int ret;
-+
-+	lockdep_assert_held(&c->sb_lock);
-+
-+	ret = bch2_sb_realloc(&c->disk_sb,
-+			      le32_to_cpu(src->u64s) - journal_u64s);
-+	if (ret)
-+		return ret;
-+
-+	__copy_super(&c->disk_sb, src);
-+
-+	ret = bch2_sb_replicas_to_cpu_replicas(c);
-+	if (ret)
-+		return ret;
-+
-+	ret = bch2_sb_disk_groups_to_cpu(c);
-+	if (ret)
-+		return ret;
-+
-+	bch2_sb_update(c);
-+	return 0;
-+}
-+
-+int bch2_sb_from_fs(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	struct bch_sb *src = c->disk_sb.sb, *dst = ca->disk_sb.sb;
-+	struct bch_sb_field_journal *journal_buckets =
-+		bch2_sb_get_journal(dst);
-+	unsigned journal_u64s = journal_buckets
-+		? le32_to_cpu(journal_buckets->field.u64s)
-+		: 0;
-+	unsigned u64s = le32_to_cpu(src->u64s) + journal_u64s;
-+	int ret;
-+
-+	ret = bch2_sb_realloc(&ca->disk_sb, u64s);
-+	if (ret)
-+		return ret;
-+
-+	__copy_super(&ca->disk_sb, src);
-+	return 0;
-+}
-+
-+/* read superblock: */
-+
-+static const char *read_one_super(struct bch_sb_handle *sb, u64 offset)
-+{
-+	struct bch_csum csum;
-+	size_t bytes;
-+reread:
-+	bio_reset(sb->bio);
-+	bio_set_dev(sb->bio, sb->bdev);
-+	sb->bio->bi_iter.bi_sector = offset;
-+	bio_set_op_attrs(sb->bio, REQ_OP_READ, REQ_SYNC|REQ_META);
-+	bch2_bio_map(sb->bio, sb->sb, PAGE_SIZE << sb->page_order);
-+
-+	if (submit_bio_wait(sb->bio))
-+		return "IO error";
-+
-+	if (uuid_le_cmp(sb->sb->magic, BCACHE_MAGIC))
-+		return "Not a bcachefs superblock";
-+
-+	if (le16_to_cpu(sb->sb->version) <  bcachefs_metadata_version_min ||
-+	    le16_to_cpu(sb->sb->version) >= bcachefs_metadata_version_max)
-+		return "Unsupported superblock version";
-+
-+	bytes = vstruct_bytes(sb->sb);
-+
-+	if (bytes > 512 << sb->sb->layout.sb_max_size_bits)
-+		return "Bad superblock: too big";
-+
-+	if (get_order(bytes) > sb->page_order) {
-+		if (bch2_sb_realloc(sb, le32_to_cpu(sb->sb->u64s)))
-+			return "cannot allocate memory";
-+		goto reread;
-+	}
-+
-+	if (BCH_SB_CSUM_TYPE(sb->sb) >= BCH_CSUM_NR)
-+		return "unknown csum type";
-+
-+	/* XXX: verify MACs */
-+	csum = csum_vstruct(NULL, BCH_SB_CSUM_TYPE(sb->sb),
-+			    null_nonce(), sb->sb);
-+
-+	if (bch2_crc_cmp(csum, sb->sb->csum))
-+		return "bad checksum reading superblock";
-+
-+	sb->seq = le64_to_cpu(sb->sb->seq);
-+
-+	return NULL;
-+}
-+
-+int bch2_read_super(const char *path, struct bch_opts *opts,
-+		    struct bch_sb_handle *sb)
-+{
-+	u64 offset = opt_get(*opts, sb);
-+	struct bch_sb_layout layout;
-+	const char *err;
-+	__le64 *i;
-+	int ret;
-+
-+	pr_verbose_init(*opts, "");
-+
-+	memset(sb, 0, sizeof(*sb));
-+	sb->mode	= FMODE_READ;
-+	sb->have_bio	= true;
-+
-+	if (!opt_get(*opts, noexcl))
-+		sb->mode |= FMODE_EXCL;
-+
-+	if (!opt_get(*opts, nochanges))
-+		sb->mode |= FMODE_WRITE;
-+
-+	sb->bdev = blkdev_get_by_path(path, sb->mode, sb);
-+	if (IS_ERR(sb->bdev) &&
-+	    PTR_ERR(sb->bdev) == -EACCES &&
-+	    opt_get(*opts, read_only)) {
-+		sb->mode &= ~FMODE_WRITE;
-+
-+		sb->bdev = blkdev_get_by_path(path, sb->mode, sb);
-+		if (!IS_ERR(sb->bdev))
-+			opt_set(*opts, nochanges, true);
-+	}
-+
-+	if (IS_ERR(sb->bdev)) {
-+		ret = PTR_ERR(sb->bdev);
-+		goto out;
-+	}
-+
-+	err = "cannot allocate memory";
-+	ret = bch2_sb_realloc(sb, 0);
-+	if (ret)
-+		goto err;
-+
-+	ret = -EFAULT;
-+	err = "dynamic fault";
-+	if (bch2_fs_init_fault("read_super"))
-+		goto err;
-+
-+	ret = -EINVAL;
-+	err = read_one_super(sb, offset);
-+	if (!err)
-+		goto got_super;
-+
-+	if (opt_defined(*opts, sb))
-+		goto err;
-+
-+	pr_err("error reading default superblock: %s", err);
-+
-+	/*
-+	 * Error reading primary superblock - read location of backup
-+	 * superblocks:
-+	 */
-+	bio_reset(sb->bio);
-+	bio_set_dev(sb->bio, sb->bdev);
-+	sb->bio->bi_iter.bi_sector = BCH_SB_LAYOUT_SECTOR;
-+	bio_set_op_attrs(sb->bio, REQ_OP_READ, REQ_SYNC|REQ_META);
-+	/*
-+	 * use sb buffer to read layout, since sb buffer is page aligned but
-+	 * layout won't be:
-+	 */
-+	bch2_bio_map(sb->bio, sb->sb, sizeof(struct bch_sb_layout));
-+
-+	err = "IO error";
-+	if (submit_bio_wait(sb->bio))
-+		goto err;
-+
-+	memcpy(&layout, sb->sb, sizeof(layout));
-+	err = validate_sb_layout(&layout);
-+	if (err)
-+		goto err;
-+
-+	for (i = layout.sb_offset;
-+	     i < layout.sb_offset + layout.nr_superblocks; i++) {
-+		offset = le64_to_cpu(*i);
-+
-+		if (offset == opt_get(*opts, sb))
-+			continue;
-+
-+		err = read_one_super(sb, offset);
-+		if (!err)
-+			goto got_super;
-+	}
-+
-+	ret = -EINVAL;
-+	goto err;
-+
-+got_super:
-+	err = "Superblock block size smaller than device block size";
-+	ret = -EINVAL;
-+	if (le16_to_cpu(sb->sb->block_size) << 9 <
-+	    bdev_logical_block_size(sb->bdev))
-+		goto err;
-+
-+	if (sb->mode & FMODE_WRITE)
-+		bdev_get_queue(sb->bdev)->backing_dev_info->capabilities
-+			|= BDI_CAP_STABLE_WRITES;
-+	ret = 0;
-+	sb->have_layout = true;
-+out:
-+	pr_verbose_init(*opts, "ret %i", ret);
-+	return ret;
-+err:
-+	bch2_free_super(sb);
-+	pr_err("error reading superblock: %s", err);
-+	goto out;
-+}
-+
-+/* write superblock: */
-+
-+static void write_super_endio(struct bio *bio)
-+{
-+	struct bch_dev *ca = bio->bi_private;
-+
-+	/* XXX: return errors directly */
-+
-+	if (bch2_dev_io_err_on(bio->bi_status, ca, "superblock write: %s",
-+			       bch2_blk_status_to_str(bio->bi_status)))
-+		ca->sb_write_error = 1;
-+
-+	closure_put(&ca->fs->sb_write);
-+	percpu_ref_put(&ca->io_ref);
-+}
-+
-+static void read_back_super(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	struct bch_sb *sb = ca->disk_sb.sb;
-+	struct bio *bio = ca->disk_sb.bio;
-+
-+	bio_reset(bio);
-+	bio_set_dev(bio, ca->disk_sb.bdev);
-+	bio->bi_iter.bi_sector	= le64_to_cpu(sb->layout.sb_offset[0]);
-+	bio->bi_end_io		= write_super_endio;
-+	bio->bi_private		= ca;
-+	bio_set_op_attrs(bio, REQ_OP_READ, REQ_SYNC|REQ_META);
-+	bch2_bio_map(bio, ca->sb_read_scratch, PAGE_SIZE);
-+
-+	this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_sb],
-+		     bio_sectors(bio));
-+
-+	percpu_ref_get(&ca->io_ref);
-+	closure_bio_submit(bio, &c->sb_write);
-+}
-+
-+static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx)
-+{
-+	struct bch_sb *sb = ca->disk_sb.sb;
-+	struct bio *bio = ca->disk_sb.bio;
-+
-+	sb->offset = sb->layout.sb_offset[idx];
-+
-+	SET_BCH_SB_CSUM_TYPE(sb, c->opts.metadata_checksum);
-+	sb->csum = csum_vstruct(c, BCH_SB_CSUM_TYPE(sb),
-+				null_nonce(), sb);
-+
-+	bio_reset(bio);
-+	bio_set_dev(bio, ca->disk_sb.bdev);
-+	bio->bi_iter.bi_sector	= le64_to_cpu(sb->offset);
-+	bio->bi_end_io		= write_super_endio;
-+	bio->bi_private		= ca;
-+	bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC|REQ_META);
-+	bch2_bio_map(bio, sb,
-+		     roundup((size_t) vstruct_bytes(sb),
-+			     bdev_logical_block_size(ca->disk_sb.bdev)));
-+
-+	this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_sb],
-+		     bio_sectors(bio));
-+
-+	percpu_ref_get(&ca->io_ref);
-+	closure_bio_submit(bio, &c->sb_write);
-+}
-+
-+int bch2_write_super(struct bch_fs *c)
-+{
-+	struct closure *cl = &c->sb_write;
-+	struct bch_dev *ca;
-+	unsigned i, sb = 0, nr_wrote;
-+	const char *err;
-+	struct bch_devs_mask sb_written;
-+	bool wrote, can_mount_without_written, can_mount_with_written;
-+	int ret = 0;
-+
-+	lockdep_assert_held(&c->sb_lock);
-+
-+	closure_init_stack(cl);
-+	memset(&sb_written, 0, sizeof(sb_written));
-+
-+	le64_add_cpu(&c->disk_sb.sb->seq, 1);
-+
-+	if (test_bit(BCH_FS_ERROR, &c->flags))
-+		SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 1);
-+
-+	for_each_online_member(ca, c, i)
-+		bch2_sb_from_fs(c, ca);
-+
-+	for_each_online_member(ca, c, i) {
-+		err = bch2_sb_validate(&ca->disk_sb);
-+		if (err) {
-+			bch2_fs_inconsistent(c, "sb invalid before write: %s", err);
-+			ret = -1;
-+			goto out;
-+		}
-+	}
-+
-+	if (c->opts.nochanges)
-+		goto out;
-+
-+	for_each_online_member(ca, c, i) {
-+		__set_bit(ca->dev_idx, sb_written.d);
-+		ca->sb_write_error = 0;
-+	}
-+
-+	for_each_online_member(ca, c, i)
-+		read_back_super(c, ca);
-+	closure_sync(cl);
-+
-+	for_each_online_member(ca, c, i) {
-+		if (!ca->sb_write_error &&
-+		    ca->disk_sb.seq !=
-+		    le64_to_cpu(ca->sb_read_scratch->seq)) {
-+			bch2_fs_fatal_error(c,
-+				"Superblock modified by another process");
-+			percpu_ref_put(&ca->io_ref);
-+			ret = -EROFS;
-+			goto out;
-+		}
-+	}
-+
-+	do {
-+		wrote = false;
-+		for_each_online_member(ca, c, i)
-+			if (!ca->sb_write_error &&
-+			    sb < ca->disk_sb.sb->layout.nr_superblocks) {
-+				write_one_super(c, ca, sb);
-+				wrote = true;
-+			}
-+		closure_sync(cl);
-+		sb++;
-+	} while (wrote);
-+
-+	for_each_online_member(ca, c, i) {
-+		if (ca->sb_write_error)
-+			__clear_bit(ca->dev_idx, sb_written.d);
-+		else
-+			ca->disk_sb.seq = le64_to_cpu(ca->disk_sb.sb->seq);
-+	}
-+
-+	nr_wrote = dev_mask_nr(&sb_written);
-+
-+	can_mount_with_written =
-+		bch2_have_enough_devs(__bch2_replicas_status(c, sb_written),
-+				      BCH_FORCE_IF_DEGRADED);
-+
-+	for (i = 0; i < ARRAY_SIZE(sb_written.d); i++)
-+		sb_written.d[i] = ~sb_written.d[i];
-+
-+	can_mount_without_written =
-+		bch2_have_enough_devs(__bch2_replicas_status(c, sb_written),
-+				      BCH_FORCE_IF_DEGRADED);
-+
-+	/*
-+	 * If we would be able to mount _without_ the devices we successfully
-+	 * wrote superblocks to, we weren't able to write to enough devices:
-+	 *
-+	 * Exception: if we can mount without the successes because we haven't
-+	 * written anything (new filesystem), we continue if we'd be able to
-+	 * mount with the devices we did successfully write to:
-+	 */
-+	if (bch2_fs_fatal_err_on(!nr_wrote ||
-+				 (can_mount_without_written &&
-+				  !can_mount_with_written), c,
-+		"Unable to write superblock to sufficient devices"))
-+		ret = -1;
-+out:
-+	/* Make new options visible after they're persistent: */
-+	bch2_sb_update(c);
-+	return ret;
-+}
-+
-+void __bch2_check_set_feature(struct bch_fs *c, unsigned feat)
-+{
-+	mutex_lock(&c->sb_lock);
-+	if (!(c->sb.features & (1ULL << feat))) {
-+		c->disk_sb.sb->features[0] |= cpu_to_le64(1ULL << feat);
-+
-+		bch2_write_super(c);
-+	}
-+	mutex_unlock(&c->sb_lock);
-+}
-+
-+/* BCH_SB_FIELD_journal: */
-+
-+static int u64_cmp(const void *_l, const void *_r)
-+{
-+	u64 l = *((const u64 *) _l), r = *((const u64 *) _r);
-+
-+	return l < r ? -1 : l > r ? 1 : 0;
-+}
-+
-+static const char *bch2_sb_validate_journal(struct bch_sb *sb,
-+					    struct bch_sb_field *f)
-+{
-+	struct bch_sb_field_journal *journal = field_to_type(f, journal);
-+	struct bch_member *m = bch2_sb_get_members(sb)->members + sb->dev_idx;
-+	const char *err;
-+	unsigned nr;
-+	unsigned i;
-+	u64 *b;
-+
-+	journal = bch2_sb_get_journal(sb);
-+	if (!journal)
-+		return NULL;
-+
-+	nr = bch2_nr_journal_buckets(journal);
-+	if (!nr)
-+		return NULL;
-+
-+	b = kmalloc_array(sizeof(u64), nr, GFP_KERNEL);
-+	if (!b)
-+		return "cannot allocate memory";
-+
-+	for (i = 0; i < nr; i++)
-+		b[i] = le64_to_cpu(journal->buckets[i]);
-+
-+	sort(b, nr, sizeof(u64), u64_cmp, NULL);
-+
-+	err = "journal bucket at sector 0";
-+	if (!b[0])
-+		goto err;
-+
-+	err = "journal bucket before first bucket";
-+	if (m && b[0] < le16_to_cpu(m->first_bucket))
-+		goto err;
-+
-+	err = "journal bucket past end of device";
-+	if (m && b[nr - 1] >= le64_to_cpu(m->nbuckets))
-+		goto err;
-+
-+	err = "duplicate journal buckets";
-+	for (i = 0; i + 1 < nr; i++)
-+		if (b[i] == b[i + 1])
-+			goto err;
-+
-+	err = NULL;
-+err:
-+	kfree(b);
-+	return err;
-+}
-+
-+static const struct bch_sb_field_ops bch_sb_field_ops_journal = {
-+	.validate	= bch2_sb_validate_journal,
-+};
-+
-+/* BCH_SB_FIELD_members: */
-+
-+static const char *bch2_sb_validate_members(struct bch_sb *sb,
-+					    struct bch_sb_field *f)
-+{
-+	struct bch_sb_field_members *mi = field_to_type(f, members);
-+	struct bch_member *m;
-+
-+	if ((void *) (mi->members + sb->nr_devices) >
-+	    vstruct_end(&mi->field))
-+		return "Invalid superblock: bad member info";
-+
-+	for (m = mi->members;
-+	     m < mi->members + sb->nr_devices;
-+	     m++) {
-+		if (!bch2_member_exists(m))
-+			continue;
-+
-+		if (le64_to_cpu(m->nbuckets) > LONG_MAX)
-+			return "Too many buckets";
-+
-+		if (le64_to_cpu(m->nbuckets) -
-+		    le16_to_cpu(m->first_bucket) < BCH_MIN_NR_NBUCKETS)
-+			return "Not enough buckets";
-+
-+		if (le16_to_cpu(m->bucket_size) <
-+		    le16_to_cpu(sb->block_size))
-+			return "bucket size smaller than block size";
-+
-+		if (le16_to_cpu(m->bucket_size) <
-+		    BCH_SB_BTREE_NODE_SIZE(sb))
-+			return "bucket size smaller than btree node size";
-+	}
-+
-+	return NULL;
-+}
-+
-+static const struct bch_sb_field_ops bch_sb_field_ops_members = {
-+	.validate	= bch2_sb_validate_members,
-+};
-+
-+/* BCH_SB_FIELD_crypt: */
-+
-+static const char *bch2_sb_validate_crypt(struct bch_sb *sb,
-+					  struct bch_sb_field *f)
-+{
-+	struct bch_sb_field_crypt *crypt = field_to_type(f, crypt);
-+
-+	if (vstruct_bytes(&crypt->field) != sizeof(*crypt))
-+		return "invalid field crypt: wrong size";
-+
-+	if (BCH_CRYPT_KDF_TYPE(crypt))
-+		return "invalid field crypt: bad kdf type";
-+
-+	return NULL;
-+}
-+
-+static const struct bch_sb_field_ops bch_sb_field_ops_crypt = {
-+	.validate	= bch2_sb_validate_crypt,
-+};
-+
-+/* BCH_SB_FIELD_clean: */
-+
-+void bch2_sb_clean_renumber(struct bch_sb_field_clean *clean, int write)
-+{
-+	struct jset_entry *entry;
-+
-+	for (entry = clean->start;
-+	     entry < (struct jset_entry *) vstruct_end(&clean->field);
-+	     entry = vstruct_next(entry))
-+		bch2_bkey_renumber(BKEY_TYPE_BTREE, bkey_to_packed(entry->start), write);
-+}
-+
-+int bch2_fs_mark_dirty(struct bch_fs *c)
-+{
-+	int ret;
-+
-+	/*
-+	 * Unconditionally write superblock, to verify it hasn't changed before
-+	 * we go rw:
-+	 */
-+
-+	mutex_lock(&c->sb_lock);
-+	SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
-+	c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_new_extent_overwrite;
-+	c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_extents_above_btree_updates;
-+	c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_btree_updates_journalled;
-+	ret = bch2_write_super(c);
-+	mutex_unlock(&c->sb_lock);
-+
-+	return ret;
-+}
-+
-+static void
-+entry_init_u64s(struct jset_entry *entry, unsigned u64s)
-+{
-+	memset(entry, 0, u64s * sizeof(u64));
-+
-+	/*
-+	 * The u64s field counts from the start of data, ignoring the shared
-+	 * fields.
-+	 */
-+	entry->u64s = u64s - 1;
-+}
-+
-+static void
-+entry_init_size(struct jset_entry *entry, size_t size)
-+{
-+	unsigned u64s = DIV_ROUND_UP(size, sizeof(u64));
-+	entry_init_u64s(entry, u64s);
-+}
-+
-+struct jset_entry *
-+bch2_journal_super_entries_add_common(struct bch_fs *c,
-+				      struct jset_entry *entry,
-+				      u64 journal_seq)
-+{
-+	unsigned i;
-+
-+	percpu_down_write(&c->mark_lock);
-+
-+	if (!journal_seq) {
-+		bch2_fs_usage_acc_to_base(c, 0);
-+		bch2_fs_usage_acc_to_base(c, 1);
-+	} else {
-+		bch2_fs_usage_acc_to_base(c, journal_seq & 1);
-+	}
-+
-+	{
-+		struct jset_entry_usage *u =
-+			container_of(entry, struct jset_entry_usage, entry);
-+
-+		entry_init_size(entry, sizeof(*u));
-+		u->entry.type	= BCH_JSET_ENTRY_usage;
-+		u->entry.btree_id = FS_USAGE_INODES;
-+		u->v		= cpu_to_le64(c->usage_base->nr_inodes);
-+
-+		entry = vstruct_next(entry);
-+	}
-+
-+	{
-+		struct jset_entry_usage *u =
-+			container_of(entry, struct jset_entry_usage, entry);
-+
-+		entry_init_size(entry, sizeof(*u));
-+		u->entry.type	= BCH_JSET_ENTRY_usage;
-+		u->entry.btree_id = FS_USAGE_KEY_VERSION;
-+		u->v		= cpu_to_le64(atomic64_read(&c->key_version));
-+
-+		entry = vstruct_next(entry);
-+	}
-+
-+	for (i = 0; i < BCH_REPLICAS_MAX; i++) {
-+		struct jset_entry_usage *u =
-+			container_of(entry, struct jset_entry_usage, entry);
-+
-+		entry_init_size(entry, sizeof(*u));
-+		u->entry.type	= BCH_JSET_ENTRY_usage;
-+		u->entry.btree_id = FS_USAGE_RESERVED;
-+		u->entry.level	= i;
-+		u->v		= cpu_to_le64(c->usage_base->persistent_reserved[i]);
-+
-+		entry = vstruct_next(entry);
-+	}
-+
-+	for (i = 0; i < c->replicas.nr; i++) {
-+		struct bch_replicas_entry *e =
-+			cpu_replicas_entry(&c->replicas, i);
-+		struct jset_entry_data_usage *u =
-+			container_of(entry, struct jset_entry_data_usage, entry);
-+
-+		entry_init_size(entry, sizeof(*u) + e->nr_devs);
-+		u->entry.type	= BCH_JSET_ENTRY_data_usage;
-+		u->v		= cpu_to_le64(c->usage_base->replicas[i]);
-+		memcpy(&u->r, e, replicas_entry_bytes(e));
-+
-+		entry = vstruct_next(entry);
-+	}
-+
-+	percpu_up_write(&c->mark_lock);
-+
-+	return entry;
-+}
-+
-+void bch2_fs_mark_clean(struct bch_fs *c)
-+{
-+	struct bch_sb_field_clean *sb_clean;
-+	struct jset_entry *entry;
-+	unsigned u64s;
-+
-+	mutex_lock(&c->sb_lock);
-+	if (BCH_SB_CLEAN(c->disk_sb.sb))
-+		goto out;
-+
-+	SET_BCH_SB_CLEAN(c->disk_sb.sb, true);
-+
-+	c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_ALLOC_INFO;
-+	c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_ALLOC_METADATA;
-+	c->disk_sb.sb->features[0] &= ~(1ULL << BCH_FEATURE_extents_above_btree_updates);
-+	c->disk_sb.sb->features[0] &= ~(1ULL << BCH_FEATURE_btree_updates_journalled);
-+
-+	u64s = sizeof(*sb_clean) / sizeof(u64) + c->journal.entry_u64s_reserved;
-+
-+	sb_clean = bch2_sb_resize_clean(&c->disk_sb, u64s);
-+	if (!sb_clean) {
-+		bch_err(c, "error resizing superblock while setting filesystem clean");
-+		goto out;
-+	}
-+
-+	sb_clean->flags		= 0;
-+	sb_clean->read_clock	= cpu_to_le16(c->bucket_clock[READ].hand);
-+	sb_clean->write_clock	= cpu_to_le16(c->bucket_clock[WRITE].hand);
-+	sb_clean->journal_seq	= cpu_to_le64(journal_cur_seq(&c->journal) - 1);
-+
-+	/* Trying to catch outstanding bug: */
-+	BUG_ON(le64_to_cpu(sb_clean->journal_seq) > S64_MAX);
-+
-+	entry = sb_clean->start;
-+	entry = bch2_journal_super_entries_add_common(c, entry, 0);
-+	entry = bch2_btree_roots_to_journal_entries(c, entry, entry);
-+	BUG_ON((void *) entry > vstruct_end(&sb_clean->field));
-+
-+	memset(entry, 0,
-+	       vstruct_end(&sb_clean->field) - (void *) entry);
-+
-+	if (le16_to_cpu(c->disk_sb.sb->version) <
-+	    bcachefs_metadata_version_bkey_renumber)
-+		bch2_sb_clean_renumber(sb_clean, WRITE);
-+
-+	bch2_write_super(c);
-+out:
-+	mutex_unlock(&c->sb_lock);
-+}
-+
-+static const char *bch2_sb_validate_clean(struct bch_sb *sb,
-+					  struct bch_sb_field *f)
-+{
-+	struct bch_sb_field_clean *clean = field_to_type(f, clean);
-+
-+	if (vstruct_bytes(&clean->field) < sizeof(*clean))
-+		return "invalid field crypt: wrong size";
-+
-+	return NULL;
-+}
-+
-+static const struct bch_sb_field_ops bch_sb_field_ops_clean = {
-+	.validate	= bch2_sb_validate_clean,
-+};
-+
-+static const struct bch_sb_field_ops *bch2_sb_field_ops[] = {
-+#define x(f, nr)					\
-+	[BCH_SB_FIELD_##f] = &bch_sb_field_ops_##f,
-+	BCH_SB_FIELDS()
-+#undef x
-+};
-+
-+static const char *bch2_sb_field_validate(struct bch_sb *sb,
-+					  struct bch_sb_field *f)
-+{
-+	unsigned type = le32_to_cpu(f->type);
-+
-+	return type < BCH_SB_FIELD_NR
-+		? bch2_sb_field_ops[type]->validate(sb, f)
-+		: NULL;
-+}
-+
-+void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb,
-+			   struct bch_sb_field *f)
-+{
-+	unsigned type = le32_to_cpu(f->type);
-+	const struct bch_sb_field_ops *ops = type < BCH_SB_FIELD_NR
-+		? bch2_sb_field_ops[type] : NULL;
-+
-+	if (ops)
-+		pr_buf(out, "%s", bch2_sb_fields[type]);
-+	else
-+		pr_buf(out, "(unknown field %u)", type);
-+
-+	pr_buf(out, " (size %llu):", vstruct_bytes(f));
-+
-+	if (ops && ops->to_text)
-+		bch2_sb_field_ops[type]->to_text(out, sb, f);
-+}
-diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h
-new file mode 100644
-index 000000000000..7a068158efca
---- /dev/null
-+++ b/fs/bcachefs/super-io.h
-@@ -0,0 +1,137 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_SUPER_IO_H
-+#define _BCACHEFS_SUPER_IO_H
-+
-+#include "extents.h"
-+#include "eytzinger.h"
-+#include "super_types.h"
-+#include "super.h"
-+
-+#include <asm/byteorder.h>
-+
-+struct bch_sb_field *bch2_sb_field_get(struct bch_sb *, enum bch_sb_field_type);
-+struct bch_sb_field *bch2_sb_field_resize(struct bch_sb_handle *,
-+					  enum bch_sb_field_type, unsigned);
-+void bch2_sb_field_delete(struct bch_sb_handle *, enum bch_sb_field_type);
-+
-+#define field_to_type(_f, _name)					\
-+	container_of_or_null(_f, struct bch_sb_field_##_name, field)
-+
-+#define x(_name, _nr)							\
-+static inline struct bch_sb_field_##_name *				\
-+bch2_sb_get_##_name(struct bch_sb *sb)					\
-+{									\
-+	return field_to_type(bch2_sb_field_get(sb,			\
-+				BCH_SB_FIELD_##_name), _name);		\
-+}									\
-+									\
-+static inline struct bch_sb_field_##_name *				\
-+bch2_sb_resize_##_name(struct bch_sb_handle *sb, unsigned u64s)	\
-+{									\
-+	return field_to_type(bch2_sb_field_resize(sb,			\
-+				BCH_SB_FIELD_##_name, u64s), _name);	\
-+}
-+
-+BCH_SB_FIELDS()
-+#undef x
-+
-+extern const char * const bch2_sb_fields[];
-+
-+struct bch_sb_field_ops {
-+	const char *	(*validate)(struct bch_sb *, struct bch_sb_field *);
-+	void		(*to_text)(struct printbuf *, struct bch_sb *,
-+				   struct bch_sb_field *);
-+};
-+
-+static inline __le64 bch2_sb_magic(struct bch_fs *c)
-+{
-+	__le64 ret;
-+	memcpy(&ret, &c->sb.uuid, sizeof(ret));
-+	return ret;
-+}
-+
-+static inline __u64 jset_magic(struct bch_fs *c)
-+{
-+	return __le64_to_cpu(bch2_sb_magic(c) ^ JSET_MAGIC);
-+}
-+
-+static inline __u64 bset_magic(struct bch_fs *c)
-+{
-+	return __le64_to_cpu(bch2_sb_magic(c) ^ BSET_MAGIC);
-+}
-+
-+int bch2_sb_to_fs(struct bch_fs *, struct bch_sb *);
-+int bch2_sb_from_fs(struct bch_fs *, struct bch_dev *);
-+
-+void bch2_free_super(struct bch_sb_handle *);
-+int bch2_sb_realloc(struct bch_sb_handle *, unsigned);
-+
-+const char *bch2_sb_validate(struct bch_sb_handle *);
-+
-+int bch2_read_super(const char *, struct bch_opts *, struct bch_sb_handle *);
-+int bch2_write_super(struct bch_fs *);
-+void __bch2_check_set_feature(struct bch_fs *, unsigned);
-+
-+static inline void bch2_check_set_feature(struct bch_fs *c, unsigned feat)
-+{
-+	if (!(c->sb.features & (1ULL << feat)))
-+		__bch2_check_set_feature(c, feat);
-+}
-+
-+/* BCH_SB_FIELD_journal: */
-+
-+static inline unsigned bch2_nr_journal_buckets(struct bch_sb_field_journal *j)
-+{
-+	return j
-+		? (__le64 *) vstruct_end(&j->field) - j->buckets
-+		: 0;
-+}
-+
-+/* BCH_SB_FIELD_members: */
-+
-+static inline bool bch2_member_exists(struct bch_member *m)
-+{
-+	return !bch2_is_zero(m->uuid.b, sizeof(uuid_le));
-+}
-+
-+static inline bool bch2_dev_exists(struct bch_sb *sb,
-+				   struct bch_sb_field_members *mi,
-+				   unsigned dev)
-+{
-+	return dev < sb->nr_devices &&
-+		bch2_member_exists(&mi->members[dev]);
-+}
-+
-+static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi)
-+{
-+	return (struct bch_member_cpu) {
-+		.nbuckets	= le64_to_cpu(mi->nbuckets),
-+		.first_bucket	= le16_to_cpu(mi->first_bucket),
-+		.bucket_size	= le16_to_cpu(mi->bucket_size),
-+		.group		= BCH_MEMBER_GROUP(mi),
-+		.state		= BCH_MEMBER_STATE(mi),
-+		.replacement	= BCH_MEMBER_REPLACEMENT(mi),
-+		.discard	= BCH_MEMBER_DISCARD(mi),
-+		.data_allowed	= BCH_MEMBER_DATA_ALLOWED(mi),
-+		.durability	= BCH_MEMBER_DURABILITY(mi)
-+			? BCH_MEMBER_DURABILITY(mi) - 1
-+			: 1,
-+		.valid		= !bch2_is_zero(mi->uuid.b, sizeof(uuid_le)),
-+	};
-+}
-+
-+/* BCH_SB_FIELD_clean: */
-+
-+struct jset_entry *
-+bch2_journal_super_entries_add_common(struct bch_fs *,
-+				      struct jset_entry *, u64);
-+
-+void bch2_sb_clean_renumber(struct bch_sb_field_clean *, int);
-+
-+int bch2_fs_mark_dirty(struct bch_fs *);
-+void bch2_fs_mark_clean(struct bch_fs *);
-+
-+void bch2_sb_field_to_text(struct printbuf *, struct bch_sb *,
-+			   struct bch_sb_field *);
-+
-+#endif /* _BCACHEFS_SUPER_IO_H */
-diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
-new file mode 100644
-index 000000000000..30be083b09bf
---- /dev/null
-+++ b/fs/bcachefs/super.c
-@@ -0,0 +1,2062 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * bcachefs setup/teardown code, and some metadata io - read a superblock and
-+ * figure out what to do with it.
-+ *
-+ * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
-+ * Copyright 2012 Google, Inc.
-+ */
-+
-+#include "bcachefs.h"
-+#include "alloc_background.h"
-+#include "alloc_foreground.h"
-+#include "bkey_sort.h"
-+#include "btree_cache.h"
-+#include "btree_gc.h"
-+#include "btree_key_cache.h"
-+#include "btree_update_interior.h"
-+#include "btree_io.h"
-+#include "chardev.h"
-+#include "checksum.h"
-+#include "clock.h"
-+#include "compress.h"
-+#include "debug.h"
-+#include "disk_groups.h"
-+#include "ec.h"
-+#include "error.h"
-+#include "fs.h"
-+#include "fs-io.h"
-+#include "fsck.h"
-+#include "inode.h"
-+#include "io.h"
-+#include "journal.h"
-+#include "journal_reclaim.h"
-+#include "journal_seq_blacklist.h"
-+#include "move.h"
-+#include "migrate.h"
-+#include "movinggc.h"
-+#include "quota.h"
-+#include "rebalance.h"
-+#include "recovery.h"
-+#include "replicas.h"
-+#include "super.h"
-+#include "super-io.h"
-+#include "sysfs.h"
-+
-+#include <linux/backing-dev.h>
-+#include <linux/blkdev.h>
-+#include <linux/debugfs.h>
-+#include <linux/device.h>
-+#include <linux/genhd.h>
-+#include <linux/idr.h>
-+#include <linux/kthread.h>
-+#include <linux/module.h>
-+#include <linux/percpu.h>
-+#include <linux/random.h>
-+#include <linux/sysfs.h>
-+#include <crypto/hash.h>
-+
-+#include <trace/events/bcachefs.h>
-+
-+MODULE_LICENSE("GPL");
-+MODULE_AUTHOR("Kent Overstreet <kent.overstreet@gmail.com>");
-+
-+#define KTYPE(type)							\
-+struct kobj_type type ## _ktype = {					\
-+	.release	= type ## _release,				\
-+	.sysfs_ops	= &type ## _sysfs_ops,				\
-+	.default_attrs	= type ## _files				\
-+}
-+
-+static void bch2_fs_release(struct kobject *);
-+static void bch2_dev_release(struct kobject *);
-+
-+static void bch2_fs_internal_release(struct kobject *k)
-+{
-+}
-+
-+static void bch2_fs_opts_dir_release(struct kobject *k)
-+{
-+}
-+
-+static void bch2_fs_time_stats_release(struct kobject *k)
-+{
-+}
-+
-+static KTYPE(bch2_fs);
-+static KTYPE(bch2_fs_internal);
-+static KTYPE(bch2_fs_opts_dir);
-+static KTYPE(bch2_fs_time_stats);
-+static KTYPE(bch2_dev);
-+
-+static struct kset *bcachefs_kset;
-+static LIST_HEAD(bch_fs_list);
-+static DEFINE_MUTEX(bch_fs_list_lock);
-+
-+static DECLARE_WAIT_QUEUE_HEAD(bch_read_only_wait);
-+
-+static void bch2_dev_free(struct bch_dev *);
-+static int bch2_dev_alloc(struct bch_fs *, unsigned);
-+static int bch2_dev_sysfs_online(struct bch_fs *, struct bch_dev *);
-+static void __bch2_dev_read_only(struct bch_fs *, struct bch_dev *);
-+
-+struct bch_fs *bch2_bdev_to_fs(struct block_device *bdev)
-+{
-+	struct bch_fs *c;
-+	struct bch_dev *ca;
-+	unsigned i;
-+
-+	mutex_lock(&bch_fs_list_lock);
-+	rcu_read_lock();
-+
-+	list_for_each_entry(c, &bch_fs_list, list)
-+		for_each_member_device_rcu(ca, c, i, NULL)
-+			if (ca->disk_sb.bdev == bdev) {
-+				closure_get(&c->cl);
-+				goto found;
-+			}
-+	c = NULL;
-+found:
-+	rcu_read_unlock();
-+	mutex_unlock(&bch_fs_list_lock);
-+
-+	return c;
-+}
-+
-+static struct bch_fs *__bch2_uuid_to_fs(uuid_le uuid)
-+{
-+	struct bch_fs *c;
-+
-+	lockdep_assert_held(&bch_fs_list_lock);
-+
-+	list_for_each_entry(c, &bch_fs_list, list)
-+		if (!memcmp(&c->disk_sb.sb->uuid, &uuid, sizeof(uuid_le)))
-+			return c;
-+
-+	return NULL;
-+}
-+
-+struct bch_fs *bch2_uuid_to_fs(uuid_le uuid)
-+{
-+	struct bch_fs *c;
-+
-+	mutex_lock(&bch_fs_list_lock);
-+	c = __bch2_uuid_to_fs(uuid);
-+	if (c)
-+		closure_get(&c->cl);
-+	mutex_unlock(&bch_fs_list_lock);
-+
-+	return c;
-+}
-+
-+int bch2_congested(void *data, int bdi_bits)
-+{
-+	struct bch_fs *c = data;
-+	struct backing_dev_info *bdi;
-+	struct bch_dev *ca;
-+	unsigned i;
-+	int ret = 0;
-+
-+	rcu_read_lock();
-+	if (bdi_bits & (1 << WB_sync_congested)) {
-+		/* Reads - check all devices: */
-+		for_each_readable_member(ca, c, i) {
-+			bdi = ca->disk_sb.bdev->bd_bdi;
-+
-+			if (bdi_congested(bdi, bdi_bits)) {
-+				ret = 1;
-+				break;
-+			}
-+		}
-+	} else {
-+		const struct bch_devs_mask *devs =
-+			bch2_target_to_mask(c, c->opts.foreground_target) ?:
-+			&c->rw_devs[BCH_DATA_user];
-+
-+		for_each_member_device_rcu(ca, c, i, devs) {
-+			bdi = ca->disk_sb.bdev->bd_bdi;
-+
-+			if (bdi_congested(bdi, bdi_bits)) {
-+				ret = 1;
-+				break;
-+			}
-+		}
-+	}
-+	rcu_read_unlock();
-+
-+	return ret;
-+}
-+
-+/* Filesystem RO/RW: */
-+
-+/*
-+ * For startup/shutdown of RW stuff, the dependencies are:
-+ *
-+ * - foreground writes depend on copygc and rebalance (to free up space)
-+ *
-+ * - copygc and rebalance depend on mark and sweep gc (they actually probably
-+ *   don't because they either reserve ahead of time or don't block if
-+ *   allocations fail, but allocations can require mark and sweep gc to run
-+ *   because of generation number wraparound)
-+ *
-+ * - all of the above depends on the allocator threads
-+ *
-+ * - allocator depends on the journal (when it rewrites prios and gens)
-+ */
-+
-+static void __bch2_fs_read_only(struct bch_fs *c)
-+{
-+	struct bch_dev *ca;
-+	bool wrote = false;
-+	unsigned i, clean_passes = 0;
-+	int ret;
-+
-+	bch2_rebalance_stop(c);
-+	bch2_copygc_stop(c);
-+	bch2_gc_thread_stop(c);
-+
-+	/*
-+	 * Flush journal before stopping allocators, because flushing journal
-+	 * blacklist entries involves allocating new btree nodes:
-+	 */
-+	bch2_journal_flush_all_pins(&c->journal);
-+
-+	/*
-+	 * If the allocator threads didn't all start up, the btree updates to
-+	 * write out alloc info aren't going to work:
-+	 */
-+	if (!test_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags))
-+		goto nowrote_alloc;
-+
-+	bch_verbose(c, "writing alloc info");
-+	/*
-+	 * This should normally just be writing the bucket read/write clocks:
-+	 */
-+	ret = bch2_stripes_write(c, BTREE_INSERT_NOCHECK_RW, &wrote) ?:
-+		bch2_alloc_write(c, BTREE_INSERT_NOCHECK_RW, &wrote);
-+	bch_verbose(c, "writing alloc info complete");
-+
-+	if (ret && !test_bit(BCH_FS_EMERGENCY_RO, &c->flags))
-+		bch2_fs_inconsistent(c, "error writing out alloc info %i", ret);
-+
-+	if (ret)
-+		goto nowrote_alloc;
-+
-+	bch_verbose(c, "flushing journal and stopping allocators");
-+
-+	bch2_journal_flush_all_pins(&c->journal);
-+	set_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags);
-+
-+	do {
-+		clean_passes++;
-+
-+		if (bch2_journal_flush_all_pins(&c->journal))
-+			clean_passes = 0;
-+
-+		/*
-+		 * In flight interior btree updates will generate more journal
-+		 * updates and btree updates (alloc btree):
-+		 */
-+		if (bch2_btree_interior_updates_nr_pending(c)) {
-+			closure_wait_event(&c->btree_interior_update_wait,
-+					   !bch2_btree_interior_updates_nr_pending(c));
-+			clean_passes = 0;
-+		}
-+		flush_work(&c->btree_interior_update_work);
-+
-+		if (bch2_journal_flush_all_pins(&c->journal))
-+			clean_passes = 0;
-+	} while (clean_passes < 2);
-+	bch_verbose(c, "flushing journal and stopping allocators complete");
-+
-+	set_bit(BCH_FS_ALLOC_CLEAN, &c->flags);
-+nowrote_alloc:
-+	closure_wait_event(&c->btree_interior_update_wait,
-+			   !bch2_btree_interior_updates_nr_pending(c));
-+	flush_work(&c->btree_interior_update_work);
-+
-+	for_each_member_device(ca, c, i)
-+		bch2_dev_allocator_stop(ca);
-+
-+	clear_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags);
-+	clear_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags);
-+
-+	bch2_fs_journal_stop(&c->journal);
-+
-+	/*
-+	 * the journal kicks off btree writes via reclaim - wait for in flight
-+	 * writes after stopping journal:
-+	 */
-+	if (test_bit(BCH_FS_EMERGENCY_RO, &c->flags))
-+		bch2_btree_flush_all_writes(c);
-+	else
-+		bch2_btree_verify_flushed(c);
-+
-+	/*
-+	 * After stopping journal:
-+	 */
-+	for_each_member_device(ca, c, i)
-+		bch2_dev_allocator_remove(c, ca);
-+}
-+
-+static void bch2_writes_disabled(struct percpu_ref *writes)
-+{
-+	struct bch_fs *c = container_of(writes, struct bch_fs, writes);
-+
-+	set_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags);
-+	wake_up(&bch_read_only_wait);
-+}
-+
-+void bch2_fs_read_only(struct bch_fs *c)
-+{
-+	if (!test_bit(BCH_FS_RW, &c->flags)) {
-+		cancel_delayed_work_sync(&c->journal.reclaim_work);
-+		return;
-+	}
-+
-+	BUG_ON(test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags));
-+
-+	/*
-+	 * Block new foreground-end write operations from starting - any new
-+	 * writes will return -EROFS:
-+	 *
-+	 * (This is really blocking new _allocations_, writes to previously
-+	 * allocated space can still happen until stopping the allocator in
-+	 * bch2_dev_allocator_stop()).
-+	 */
-+	percpu_ref_kill(&c->writes);
-+
-+	cancel_work_sync(&c->ec_stripe_delete_work);
-+	cancel_delayed_work(&c->pd_controllers_update);
-+
-+	/*
-+	 * If we're not doing an emergency shutdown, we want to wait on
-+	 * outstanding writes to complete so they don't see spurious errors due
-+	 * to shutting down the allocator:
-+	 *
-+	 * If we are doing an emergency shutdown outstanding writes may
-+	 * hang until we shutdown the allocator so we don't want to wait
-+	 * on outstanding writes before shutting everything down - but
-+	 * we do need to wait on them before returning and signalling
-+	 * that going RO is complete:
-+	 */
-+	wait_event(bch_read_only_wait,
-+		   test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags) ||
-+		   test_bit(BCH_FS_EMERGENCY_RO, &c->flags));
-+
-+	__bch2_fs_read_only(c);
-+
-+	wait_event(bch_read_only_wait,
-+		   test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags));
-+
-+	clear_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags);
-+
-+	if (!bch2_journal_error(&c->journal) &&
-+	    !test_bit(BCH_FS_ERROR, &c->flags) &&
-+	    !test_bit(BCH_FS_EMERGENCY_RO, &c->flags) &&
-+	    test_bit(BCH_FS_STARTED, &c->flags) &&
-+	    test_bit(BCH_FS_ALLOC_CLEAN, &c->flags) &&
-+	    !c->opts.norecovery) {
-+		bch_verbose(c, "marking filesystem clean");
-+		bch2_fs_mark_clean(c);
-+	}
-+
-+	clear_bit(BCH_FS_RW, &c->flags);
-+}
-+
-+static void bch2_fs_read_only_work(struct work_struct *work)
-+{
-+	struct bch_fs *c =
-+		container_of(work, struct bch_fs, read_only_work);
-+
-+	down_write(&c->state_lock);
-+	bch2_fs_read_only(c);
-+	up_write(&c->state_lock);
-+}
-+
-+static void bch2_fs_read_only_async(struct bch_fs *c)
-+{
-+	queue_work(system_long_wq, &c->read_only_work);
-+}
-+
-+bool bch2_fs_emergency_read_only(struct bch_fs *c)
-+{
-+	bool ret = !test_and_set_bit(BCH_FS_EMERGENCY_RO, &c->flags);
-+
-+	bch2_journal_halt(&c->journal);
-+	bch2_fs_read_only_async(c);
-+
-+	wake_up(&bch_read_only_wait);
-+	return ret;
-+}
-+
-+static int bch2_fs_read_write_late(struct bch_fs *c)
-+{
-+	int ret;
-+
-+	ret = bch2_gc_thread_start(c);
-+	if (ret) {
-+		bch_err(c, "error starting gc thread");
-+		return ret;
-+	}
-+
-+	ret = bch2_copygc_start(c);
-+	if (ret) {
-+		bch_err(c, "error starting copygc thread");
-+		return ret;
-+	}
-+
-+	ret = bch2_rebalance_start(c);
-+	if (ret) {
-+		bch_err(c, "error starting rebalance thread");
-+		return ret;
-+	}
-+
-+	schedule_delayed_work(&c->pd_controllers_update, 5 * HZ);
-+
-+	schedule_work(&c->ec_stripe_delete_work);
-+
-+	return 0;
-+}
-+
-+static int __bch2_fs_read_write(struct bch_fs *c, bool early)
-+{
-+	struct bch_dev *ca;
-+	unsigned i;
-+	int ret;
-+
-+	if (test_bit(BCH_FS_RW, &c->flags))
-+		return 0;
-+
-+	/*
-+	 * nochanges is used for fsck -n mode - we have to allow going rw
-+	 * during recovery for that to work:
-+	 */
-+	if (c->opts.norecovery ||
-+	    (c->opts.nochanges &&
-+	     (!early || c->opts.read_only)))
-+		return -EROFS;
-+
-+	ret = bch2_fs_mark_dirty(c);
-+	if (ret)
-+		goto err;
-+
-+	/*
-+	 * We need to write out a journal entry before we start doing btree
-+	 * updates, to ensure that on unclean shutdown new journal blacklist
-+	 * entries are created:
-+	 */
-+	bch2_journal_meta(&c->journal);
-+
-+	clear_bit(BCH_FS_ALLOC_CLEAN, &c->flags);
-+
-+	for_each_rw_member(ca, c, i)
-+		bch2_dev_allocator_add(c, ca);
-+	bch2_recalc_capacity(c);
-+
-+	for_each_rw_member(ca, c, i) {
-+		ret = bch2_dev_allocator_start(ca);
-+		if (ret) {
-+			bch_err(c, "error starting allocator threads");
-+			percpu_ref_put(&ca->io_ref);
-+			goto err;
-+		}
-+	}
-+
-+	set_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags);
-+
-+	if (!early) {
-+		ret = bch2_fs_read_write_late(c);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	percpu_ref_reinit(&c->writes);
-+	set_bit(BCH_FS_RW, &c->flags);
-+
-+	queue_delayed_work(c->journal_reclaim_wq,
-+			   &c->journal.reclaim_work, 0);
-+	return 0;
-+err:
-+	__bch2_fs_read_only(c);
-+	return ret;
-+}
-+
-+int bch2_fs_read_write(struct bch_fs *c)
-+{
-+	return __bch2_fs_read_write(c, false);
-+}
-+
-+int bch2_fs_read_write_early(struct bch_fs *c)
-+{
-+	lockdep_assert_held(&c->state_lock);
-+
-+	return __bch2_fs_read_write(c, true);
-+}
-+
-+/* Filesystem startup/shutdown: */
-+
-+static void bch2_fs_free(struct bch_fs *c)
-+{
-+	unsigned i;
-+
-+	for (i = 0; i < BCH_TIME_STAT_NR; i++)
-+		bch2_time_stats_exit(&c->times[i]);
-+
-+	bch2_fs_quota_exit(c);
-+	bch2_fs_fsio_exit(c);
-+	bch2_fs_ec_exit(c);
-+	bch2_fs_encryption_exit(c);
-+	bch2_fs_io_exit(c);
-+	bch2_fs_btree_interior_update_exit(c);
-+	bch2_fs_btree_iter_exit(c);
-+	bch2_fs_btree_key_cache_exit(&c->btree_key_cache);
-+	bch2_fs_btree_cache_exit(c);
-+	bch2_fs_journal_exit(&c->journal);
-+	bch2_io_clock_exit(&c->io_clock[WRITE]);
-+	bch2_io_clock_exit(&c->io_clock[READ]);
-+	bch2_fs_compress_exit(c);
-+	bch2_journal_keys_free(&c->journal_keys);
-+	bch2_journal_entries_free(&c->journal_entries);
-+	percpu_free_rwsem(&c->mark_lock);
-+	kfree(c->usage_scratch);
-+	free_percpu(c->usage[1]);
-+	free_percpu(c->usage[0]);
-+	kfree(c->usage_base);
-+	free_percpu(c->pcpu);
-+	mempool_exit(&c->large_bkey_pool);
-+	mempool_exit(&c->btree_bounce_pool);
-+	bioset_exit(&c->btree_bio);
-+	mempool_exit(&c->fill_iter);
-+	percpu_ref_exit(&c->writes);
-+	kfree(c->replicas.entries);
-+	kfree(c->replicas_gc.entries);
-+	kfree(rcu_dereference_protected(c->disk_groups, 1));
-+	kfree(c->journal_seq_blacklist_table);
-+	free_heap(&c->copygc_heap);
-+
-+	if (c->journal_reclaim_wq)
-+		destroy_workqueue(c->journal_reclaim_wq);
-+	if (c->copygc_wq)
-+		destroy_workqueue(c->copygc_wq);
-+	if (c->wq)
-+		destroy_workqueue(c->wq);
-+
-+	free_pages((unsigned long) c->disk_sb.sb,
-+		   c->disk_sb.page_order);
-+	kvpfree(c, sizeof(*c));
-+	module_put(THIS_MODULE);
-+}
-+
-+static void bch2_fs_release(struct kobject *kobj)
-+{
-+	struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
-+
-+	bch2_fs_free(c);
-+}
-+
-+void bch2_fs_stop(struct bch_fs *c)
-+{
-+	struct bch_dev *ca;
-+	unsigned i;
-+
-+	bch_verbose(c, "shutting down");
-+
-+	set_bit(BCH_FS_STOPPING, &c->flags);
-+
-+	cancel_work_sync(&c->journal_seq_blacklist_gc_work);
-+
-+	down_write(&c->state_lock);
-+	bch2_fs_read_only(c);
-+	up_write(&c->state_lock);
-+
-+	for_each_member_device(ca, c, i)
-+		if (ca->kobj.state_in_sysfs &&
-+		    ca->disk_sb.bdev)
-+			sysfs_remove_link(&part_to_dev(ca->disk_sb.bdev->bd_part)->kobj,
-+					  "bcachefs");
-+
-+	if (c->kobj.state_in_sysfs)
-+		kobject_del(&c->kobj);
-+
-+	bch2_fs_debug_exit(c);
-+	bch2_fs_chardev_exit(c);
-+
-+	kobject_put(&c->time_stats);
-+	kobject_put(&c->opts_dir);
-+	kobject_put(&c->internal);
-+
-+	mutex_lock(&bch_fs_list_lock);
-+	list_del(&c->list);
-+	mutex_unlock(&bch_fs_list_lock);
-+
-+	closure_sync(&c->cl);
-+	closure_debug_destroy(&c->cl);
-+
-+	/* btree prefetch might have kicked off reads in the background: */
-+	bch2_btree_flush_all_reads(c);
-+
-+	for_each_member_device(ca, c, i)
-+		cancel_work_sync(&ca->io_error_work);
-+
-+	cancel_work_sync(&c->btree_write_error_work);
-+	cancel_delayed_work_sync(&c->pd_controllers_update);
-+	cancel_work_sync(&c->read_only_work);
-+
-+	for (i = 0; i < c->sb.nr_devices; i++)
-+		if (c->devs[i])
-+			bch2_dev_free(rcu_dereference_protected(c->devs[i], 1));
-+
-+	bch_verbose(c, "shutdown complete");
-+
-+	kobject_put(&c->kobj);
-+}
-+
-+static const char *bch2_fs_online(struct bch_fs *c)
-+{
-+	struct bch_dev *ca;
-+	const char *err = NULL;
-+	unsigned i;
-+	int ret;
-+
-+	lockdep_assert_held(&bch_fs_list_lock);
-+
-+	if (!list_empty(&c->list))
-+		return NULL;
-+
-+	if (__bch2_uuid_to_fs(c->sb.uuid))
-+		return "filesystem UUID already open";
-+
-+	ret = bch2_fs_chardev_init(c);
-+	if (ret)
-+		return "error creating character device";
-+
-+	bch2_fs_debug_init(c);
-+
-+	if (kobject_add(&c->kobj, NULL, "%pU", c->sb.user_uuid.b) ||
-+	    kobject_add(&c->internal, &c->kobj, "internal") ||
-+	    kobject_add(&c->opts_dir, &c->kobj, "options") ||
-+	    kobject_add(&c->time_stats, &c->kobj, "time_stats") ||
-+	    bch2_opts_create_sysfs_files(&c->opts_dir))
-+		return "error creating sysfs objects";
-+
-+	down_write(&c->state_lock);
-+
-+	err = "error creating sysfs objects";
-+	__for_each_member_device(ca, c, i, NULL)
-+		if (bch2_dev_sysfs_online(c, ca))
-+			goto err;
-+
-+	list_add(&c->list, &bch_fs_list);
-+	err = NULL;
-+err:
-+	up_write(&c->state_lock);
-+	return err;
-+}
-+
-+static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
-+{
-+	struct bch_sb_field_members *mi;
-+	struct bch_fs *c;
-+	unsigned i, iter_size;
-+	const char *err;
-+
-+	pr_verbose_init(opts, "");
-+
-+	c = kvpmalloc(sizeof(struct bch_fs), GFP_KERNEL|__GFP_ZERO);
-+	if (!c)
-+		goto out;
-+
-+	__module_get(THIS_MODULE);
-+
-+	c->minor		= -1;
-+	c->disk_sb.fs_sb	= true;
-+
-+	init_rwsem(&c->state_lock);
-+	mutex_init(&c->sb_lock);
-+	mutex_init(&c->replicas_gc_lock);
-+	mutex_init(&c->btree_root_lock);
-+	INIT_WORK(&c->read_only_work, bch2_fs_read_only_work);
-+
-+	init_rwsem(&c->gc_lock);
-+
-+	for (i = 0; i < BCH_TIME_STAT_NR; i++)
-+		bch2_time_stats_init(&c->times[i]);
-+
-+	bch2_fs_copygc_init(c);
-+	bch2_fs_btree_key_cache_init_early(&c->btree_key_cache);
-+	bch2_fs_allocator_background_init(c);
-+	bch2_fs_allocator_foreground_init(c);
-+	bch2_fs_rebalance_init(c);
-+	bch2_fs_quota_init(c);
-+
-+	INIT_LIST_HEAD(&c->list);
-+
-+	mutex_init(&c->usage_scratch_lock);
-+
-+	mutex_init(&c->bio_bounce_pages_lock);
-+
-+	bio_list_init(&c->btree_write_error_list);
-+	spin_lock_init(&c->btree_write_error_lock);
-+	INIT_WORK(&c->btree_write_error_work, bch2_btree_write_error_work);
-+
-+	INIT_WORK(&c->journal_seq_blacklist_gc_work,
-+		  bch2_blacklist_entries_gc);
-+
-+	INIT_LIST_HEAD(&c->journal_entries);
-+
-+	INIT_LIST_HEAD(&c->fsck_errors);
-+	mutex_init(&c->fsck_error_lock);
-+
-+	INIT_LIST_HEAD(&c->ec_stripe_head_list);
-+	mutex_init(&c->ec_stripe_head_lock);
-+
-+	INIT_LIST_HEAD(&c->ec_stripe_new_list);
-+	mutex_init(&c->ec_stripe_new_lock);
-+
-+	spin_lock_init(&c->ec_stripes_heap_lock);
-+
-+	seqcount_init(&c->gc_pos_lock);
-+
-+	seqcount_init(&c->usage_lock);
-+
-+	sema_init(&c->io_in_flight, 64);
-+
-+	c->copy_gc_enabled		= 1;
-+	c->rebalance.enabled		= 1;
-+	c->promote_whole_extents	= true;
-+
-+	c->journal.write_time	= &c->times[BCH_TIME_journal_write];
-+	c->journal.delay_time	= &c->times[BCH_TIME_journal_delay];
-+	c->journal.blocked_time	= &c->times[BCH_TIME_blocked_journal];
-+	c->journal.flush_seq_time = &c->times[BCH_TIME_journal_flush_seq];
-+
-+	bch2_fs_btree_cache_init_early(&c->btree_cache);
-+
-+	if (percpu_init_rwsem(&c->mark_lock))
-+		goto err;
-+
-+	mutex_lock(&c->sb_lock);
-+
-+	if (bch2_sb_to_fs(c, sb)) {
-+		mutex_unlock(&c->sb_lock);
-+		goto err;
-+	}
-+
-+	mutex_unlock(&c->sb_lock);
-+
-+	scnprintf(c->name, sizeof(c->name), "%pU", &c->sb.user_uuid);
-+
-+	c->opts = bch2_opts_default;
-+	bch2_opts_apply(&c->opts, bch2_opts_from_sb(sb));
-+	bch2_opts_apply(&c->opts, opts);
-+
-+	c->block_bits		= ilog2(c->opts.block_size);
-+	c->btree_foreground_merge_threshold = BTREE_FOREGROUND_MERGE_THRESHOLD(c);
-+
-+	if (bch2_fs_init_fault("fs_alloc"))
-+		goto err;
-+
-+	iter_size = sizeof(struct sort_iter) +
-+		(btree_blocks(c) + 1) * 2 *
-+		sizeof(struct sort_iter_set);
-+
-+	if (!(c->wq = alloc_workqueue("bcachefs",
-+				WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
-+	    !(c->copygc_wq = alloc_workqueue("bcache_copygc",
-+				WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
-+	    !(c->journal_reclaim_wq = alloc_workqueue("bcache_journal",
-+				WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) ||
-+	    percpu_ref_init(&c->writes, bch2_writes_disabled,
-+			    PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
-+	    mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) ||
-+	    bioset_init(&c->btree_bio, 1,
-+			max(offsetof(struct btree_read_bio, bio),
-+			    offsetof(struct btree_write_bio, wbio.bio)),
-+			BIOSET_NEED_BVECS) ||
-+	    !(c->pcpu = alloc_percpu(struct bch_fs_pcpu)) ||
-+	    mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1,
-+					btree_bytes(c)) ||
-+	    mempool_init_kmalloc_pool(&c->large_bkey_pool, 1, 2048) ||
-+	    bch2_io_clock_init(&c->io_clock[READ]) ||
-+	    bch2_io_clock_init(&c->io_clock[WRITE]) ||
-+	    bch2_fs_journal_init(&c->journal) ||
-+	    bch2_fs_replicas_init(c) ||
-+	    bch2_fs_btree_cache_init(c) ||
-+	    bch2_fs_btree_key_cache_init(&c->btree_key_cache) ||
-+	    bch2_fs_btree_iter_init(c) ||
-+	    bch2_fs_btree_interior_update_init(c) ||
-+	    bch2_fs_io_init(c) ||
-+	    bch2_fs_encryption_init(c) ||
-+	    bch2_fs_compress_init(c) ||
-+	    bch2_fs_ec_init(c) ||
-+	    bch2_fs_fsio_init(c))
-+		goto err;
-+
-+	mi = bch2_sb_get_members(c->disk_sb.sb);
-+	for (i = 0; i < c->sb.nr_devices; i++)
-+		if (bch2_dev_exists(c->disk_sb.sb, mi, i) &&
-+		    bch2_dev_alloc(c, i))
-+			goto err;
-+
-+	/*
-+	 * Now that all allocations have succeeded, init various refcounty
-+	 * things that let us shutdown:
-+	 */
-+	closure_init(&c->cl, NULL);
-+
-+	c->kobj.kset = bcachefs_kset;
-+	kobject_init(&c->kobj, &bch2_fs_ktype);
-+	kobject_init(&c->internal, &bch2_fs_internal_ktype);
-+	kobject_init(&c->opts_dir, &bch2_fs_opts_dir_ktype);
-+	kobject_init(&c->time_stats, &bch2_fs_time_stats_ktype);
-+
-+	mutex_lock(&bch_fs_list_lock);
-+	err = bch2_fs_online(c);
-+	mutex_unlock(&bch_fs_list_lock);
-+	if (err) {
-+		bch_err(c, "bch2_fs_online() error: %s", err);
-+		goto err;
-+	}
-+out:
-+	pr_verbose_init(opts, "ret %i", c ? 0 : -ENOMEM);
-+	return c;
-+err:
-+	bch2_fs_free(c);
-+	c = NULL;
-+	goto out;
-+}
-+
-+noinline_for_stack
-+static void print_mount_opts(struct bch_fs *c)
-+{
-+	enum bch_opt_id i;
-+	char buf[512];
-+	struct printbuf p = PBUF(buf);
-+	bool first = true;
-+
-+	strcpy(buf, "(null)");
-+
-+	if (c->opts.read_only) {
-+		pr_buf(&p, "ro");
-+		first = false;
-+	}
-+
-+	for (i = 0; i < bch2_opts_nr; i++) {
-+		const struct bch_option *opt = &bch2_opt_table[i];
-+		u64 v = bch2_opt_get_by_id(&c->opts, i);
-+
-+		if (!(opt->mode & OPT_MOUNT))
-+			continue;
-+
-+		if (v == bch2_opt_get_by_id(&bch2_opts_default, i))
-+			continue;
-+
-+		if (!first)
-+			pr_buf(&p, ",");
-+		first = false;
-+		bch2_opt_to_text(&p, c, opt, v, OPT_SHOW_MOUNT_STYLE);
-+	}
-+
-+	bch_info(c, "mounted with opts: %s", buf);
-+}
-+
-+int bch2_fs_start(struct bch_fs *c)
-+{
-+	const char *err = "cannot allocate memory";
-+	struct bch_sb_field_members *mi;
-+	struct bch_dev *ca;
-+	time64_t now = ktime_get_real_seconds();
-+	unsigned i;
-+	int ret = -EINVAL;
-+
-+	down_write(&c->state_lock);
-+
-+	BUG_ON(test_bit(BCH_FS_STARTED, &c->flags));
-+
-+	mutex_lock(&c->sb_lock);
-+
-+	for_each_online_member(ca, c, i)
-+		bch2_sb_from_fs(c, ca);
-+
-+	mi = bch2_sb_get_members(c->disk_sb.sb);
-+	for_each_online_member(ca, c, i)
-+		mi->members[ca->dev_idx].last_mount = cpu_to_le64(now);
-+
-+	mutex_unlock(&c->sb_lock);
-+
-+	for_each_rw_member(ca, c, i)
-+		bch2_dev_allocator_add(c, ca);
-+	bch2_recalc_capacity(c);
-+
-+	ret = BCH_SB_INITIALIZED(c->disk_sb.sb)
-+		? bch2_fs_recovery(c)
-+		: bch2_fs_initialize(c);
-+	if (ret)
-+		goto err;
-+
-+	ret = bch2_opts_check_may_set(c);
-+	if (ret)
-+		goto err;
-+
-+	err = "dynamic fault";
-+	ret = -EINVAL;
-+	if (bch2_fs_init_fault("fs_start"))
-+		goto err;
-+
-+	set_bit(BCH_FS_STARTED, &c->flags);
-+
-+	if (c->opts.read_only || c->opts.nochanges) {
-+		bch2_fs_read_only(c);
-+	} else {
-+		err = "error going read write";
-+		ret = !test_bit(BCH_FS_RW, &c->flags)
-+			? bch2_fs_read_write(c)
-+			: bch2_fs_read_write_late(c);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	print_mount_opts(c);
-+	ret = 0;
-+out:
-+	up_write(&c->state_lock);
-+	return ret;
-+err:
-+	switch (ret) {
-+	case BCH_FSCK_ERRORS_NOT_FIXED:
-+		bch_err(c, "filesystem contains errors: please report this to the developers");
-+		pr_cont("mount with -o fix_errors to repair\n");
-+		err = "fsck error";
-+		break;
-+	case BCH_FSCK_REPAIR_UNIMPLEMENTED:
-+		bch_err(c, "filesystem contains errors: please report this to the developers");
-+		pr_cont("repair unimplemented: inform the developers so that it can be added\n");
-+		err = "fsck error";
-+		break;
-+	case BCH_FSCK_REPAIR_IMPOSSIBLE:
-+		bch_err(c, "filesystem contains errors, but repair impossible");
-+		err = "fsck error";
-+		break;
-+	case BCH_FSCK_UNKNOWN_VERSION:
-+		err = "unknown metadata version";;
-+		break;
-+	case -ENOMEM:
-+		err = "cannot allocate memory";
-+		break;
-+	case -EIO:
-+		err = "IO error";
-+		break;
-+	}
-+
-+	if (ret >= 0)
-+		ret = -EIO;
-+	goto out;
-+}
-+
-+static const char *bch2_dev_may_add(struct bch_sb *sb, struct bch_fs *c)
-+{
-+	struct bch_sb_field_members *sb_mi;
-+
-+	sb_mi = bch2_sb_get_members(sb);
-+	if (!sb_mi)
-+		return "Invalid superblock: member info area missing";
-+
-+	if (le16_to_cpu(sb->block_size) != c->opts.block_size)
-+		return "mismatched block size";
-+
-+	if (le16_to_cpu(sb_mi->members[sb->dev_idx].bucket_size) <
-+	    BCH_SB_BTREE_NODE_SIZE(c->disk_sb.sb))
-+		return "new cache bucket size is too small";
-+
-+	return NULL;
-+}
-+
-+static const char *bch2_dev_in_fs(struct bch_sb *fs, struct bch_sb *sb)
-+{
-+	struct bch_sb *newest =
-+		le64_to_cpu(fs->seq) > le64_to_cpu(sb->seq) ? fs : sb;
-+	struct bch_sb_field_members *mi = bch2_sb_get_members(newest);
-+
-+	if (uuid_le_cmp(fs->uuid, sb->uuid))
-+		return "device not a member of filesystem";
-+
-+	if (!bch2_dev_exists(newest, mi, sb->dev_idx))
-+		return "device has been removed";
-+
-+	if (fs->block_size != sb->block_size)
-+		return "mismatched block size";
-+
-+	return NULL;
-+}
-+
-+/* Device startup/shutdown: */
-+
-+static void bch2_dev_release(struct kobject *kobj)
-+{
-+	struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj);
-+
-+	kfree(ca);
-+}
-+
-+static void bch2_dev_free(struct bch_dev *ca)
-+{
-+	cancel_work_sync(&ca->io_error_work);
-+
-+	if (ca->kobj.state_in_sysfs &&
-+	    ca->disk_sb.bdev)
-+		sysfs_remove_link(&part_to_dev(ca->disk_sb.bdev->bd_part)->kobj,
-+				  "bcachefs");
-+
-+	if (ca->kobj.state_in_sysfs)
-+		kobject_del(&ca->kobj);
-+
-+	bch2_free_super(&ca->disk_sb);
-+	bch2_dev_journal_exit(ca);
-+
-+	free_percpu(ca->io_done);
-+	bioset_exit(&ca->replica_set);
-+	bch2_dev_buckets_free(ca);
-+	free_page((unsigned long) ca->sb_read_scratch);
-+
-+	bch2_time_stats_exit(&ca->io_latency[WRITE]);
-+	bch2_time_stats_exit(&ca->io_latency[READ]);
-+
-+	percpu_ref_exit(&ca->io_ref);
-+	percpu_ref_exit(&ca->ref);
-+	kobject_put(&ca->kobj);
-+}
-+
-+static void __bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca)
-+{
-+
-+	lockdep_assert_held(&c->state_lock);
-+
-+	if (percpu_ref_is_zero(&ca->io_ref))
-+		return;
-+
-+	__bch2_dev_read_only(c, ca);
-+
-+	reinit_completion(&ca->io_ref_completion);
-+	percpu_ref_kill(&ca->io_ref);
-+	wait_for_completion(&ca->io_ref_completion);
-+
-+	if (ca->kobj.state_in_sysfs) {
-+		struct kobject *block =
-+			&part_to_dev(ca->disk_sb.bdev->bd_part)->kobj;
-+
-+		sysfs_remove_link(block, "bcachefs");
-+		sysfs_remove_link(&ca->kobj, "block");
-+	}
-+
-+	bch2_free_super(&ca->disk_sb);
-+	bch2_dev_journal_exit(ca);
-+}
-+
-+static void bch2_dev_ref_complete(struct percpu_ref *ref)
-+{
-+	struct bch_dev *ca = container_of(ref, struct bch_dev, ref);
-+
-+	complete(&ca->ref_completion);
-+}
-+
-+static void bch2_dev_io_ref_complete(struct percpu_ref *ref)
-+{
-+	struct bch_dev *ca = container_of(ref, struct bch_dev, io_ref);
-+
-+	complete(&ca->io_ref_completion);
-+}
-+
-+static int bch2_dev_sysfs_online(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	int ret;
-+
-+	if (!c->kobj.state_in_sysfs)
-+		return 0;
-+
-+	if (!ca->kobj.state_in_sysfs) {
-+		ret = kobject_add(&ca->kobj, &c->kobj,
-+				  "dev-%u", ca->dev_idx);
-+		if (ret)
-+			return ret;
-+	}
-+
-+	if (ca->disk_sb.bdev) {
-+		struct kobject *block =
-+			&part_to_dev(ca->disk_sb.bdev->bd_part)->kobj;
-+
-+		ret = sysfs_create_link(block, &ca->kobj, "bcachefs");
-+		if (ret)
-+			return ret;
-+		ret = sysfs_create_link(&ca->kobj, block, "block");
-+		if (ret)
-+			return ret;
-+	}
-+
-+	return 0;
-+}
-+
-+static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c,
-+					struct bch_member *member)
-+{
-+	struct bch_dev *ca;
-+
-+	ca = kzalloc(sizeof(*ca), GFP_KERNEL);
-+	if (!ca)
-+		return NULL;
-+
-+	kobject_init(&ca->kobj, &bch2_dev_ktype);
-+	init_completion(&ca->ref_completion);
-+	init_completion(&ca->io_ref_completion);
-+
-+	init_rwsem(&ca->bucket_lock);
-+
-+	INIT_WORK(&ca->io_error_work, bch2_io_error_work);
-+
-+	bch2_time_stats_init(&ca->io_latency[READ]);
-+	bch2_time_stats_init(&ca->io_latency[WRITE]);
-+
-+	ca->mi = bch2_mi_to_cpu(member);
-+	ca->uuid = member->uuid;
-+
-+	if (opt_defined(c->opts, discard))
-+		ca->mi.discard = opt_get(c->opts, discard);
-+
-+	if (percpu_ref_init(&ca->ref, bch2_dev_ref_complete,
-+			    0, GFP_KERNEL) ||
-+	    percpu_ref_init(&ca->io_ref, bch2_dev_io_ref_complete,
-+			    PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
-+	    !(ca->sb_read_scratch = (void *) __get_free_page(GFP_KERNEL)) ||
-+	    bch2_dev_buckets_alloc(c, ca) ||
-+	    bioset_init(&ca->replica_set, 4,
-+			offsetof(struct bch_write_bio, bio), 0) ||
-+	    !(ca->io_done	= alloc_percpu(*ca->io_done)))
-+		goto err;
-+
-+	return ca;
-+err:
-+	bch2_dev_free(ca);
-+	return NULL;
-+}
-+
-+static void bch2_dev_attach(struct bch_fs *c, struct bch_dev *ca,
-+			    unsigned dev_idx)
-+{
-+	ca->dev_idx = dev_idx;
-+	__set_bit(ca->dev_idx, ca->self.d);
-+	scnprintf(ca->name, sizeof(ca->name), "dev-%u", dev_idx);
-+
-+	ca->fs = c;
-+	rcu_assign_pointer(c->devs[ca->dev_idx], ca);
-+
-+	if (bch2_dev_sysfs_online(c, ca))
-+		pr_warn("error creating sysfs objects");
-+}
-+
-+static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx)
-+{
-+	struct bch_member *member =
-+		bch2_sb_get_members(c->disk_sb.sb)->members + dev_idx;
-+	struct bch_dev *ca = NULL;
-+	int ret = 0;
-+
-+	pr_verbose_init(c->opts, "");
-+
-+	if (bch2_fs_init_fault("dev_alloc"))
-+		goto err;
-+
-+	ca = __bch2_dev_alloc(c, member);
-+	if (!ca)
-+		goto err;
-+
-+	bch2_dev_attach(c, ca, dev_idx);
-+out:
-+	pr_verbose_init(c->opts, "ret %i", ret);
-+	return ret;
-+err:
-+	if (ca)
-+		bch2_dev_free(ca);
-+	ret = -ENOMEM;
-+	goto out;
-+}
-+
-+static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb)
-+{
-+	unsigned ret;
-+
-+	if (bch2_dev_is_online(ca)) {
-+		bch_err(ca, "already have device online in slot %u",
-+			sb->sb->dev_idx);
-+		return -EINVAL;
-+	}
-+
-+	if (get_capacity(sb->bdev->bd_disk) <
-+	    ca->mi.bucket_size * ca->mi.nbuckets) {
-+		bch_err(ca, "cannot online: device too small");
-+		return -EINVAL;
-+	}
-+
-+	BUG_ON(!percpu_ref_is_zero(&ca->io_ref));
-+
-+	if (get_capacity(sb->bdev->bd_disk) <
-+	    ca->mi.bucket_size * ca->mi.nbuckets) {
-+		bch_err(ca, "device too small");
-+		return -EINVAL;
-+	}
-+
-+	ret = bch2_dev_journal_init(ca, sb->sb);
-+	if (ret)
-+		return ret;
-+
-+	/* Commit: */
-+	ca->disk_sb = *sb;
-+	if (sb->mode & FMODE_EXCL)
-+		ca->disk_sb.bdev->bd_holder = ca;
-+	memset(sb, 0, sizeof(*sb));
-+
-+	percpu_ref_reinit(&ca->io_ref);
-+
-+	return 0;
-+}
-+
-+static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb)
-+{
-+	struct bch_dev *ca;
-+	int ret;
-+
-+	lockdep_assert_held(&c->state_lock);
-+
-+	if (le64_to_cpu(sb->sb->seq) >
-+	    le64_to_cpu(c->disk_sb.sb->seq))
-+		bch2_sb_to_fs(c, sb->sb);
-+
-+	BUG_ON(sb->sb->dev_idx >= c->sb.nr_devices ||
-+	       !c->devs[sb->sb->dev_idx]);
-+
-+	ca = bch_dev_locked(c, sb->sb->dev_idx);
-+
-+	ret = __bch2_dev_attach_bdev(ca, sb);
-+	if (ret)
-+		return ret;
-+
-+	if (test_bit(BCH_FS_ALLOC_READ_DONE, &c->flags) &&
-+	    !percpu_u64_get(&ca->usage[0]->buckets[BCH_DATA_sb])) {
-+		mutex_lock(&c->sb_lock);
-+		bch2_mark_dev_superblock(ca->fs, ca, 0);
-+		mutex_unlock(&c->sb_lock);
-+	}
-+
-+	bch2_dev_sysfs_online(c, ca);
-+
-+	if (c->sb.nr_devices == 1)
-+		bdevname(ca->disk_sb.bdev, c->name);
-+	bdevname(ca->disk_sb.bdev, ca->name);
-+
-+	rebalance_wakeup(c);
-+	return 0;
-+}
-+
-+/* Device management: */
-+
-+/*
-+ * Note: this function is also used by the error paths - when a particular
-+ * device sees an error, we call it to determine whether we can just set the
-+ * device RO, or - if this function returns false - we'll set the whole
-+ * filesystem RO:
-+ *
-+ * XXX: maybe we should be more explicit about whether we're changing state
-+ * because we got an error or what have you?
-+ */
-+bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
-+			    enum bch_member_state new_state, int flags)
-+{
-+	struct bch_devs_mask new_online_devs;
-+	struct replicas_status s;
-+	struct bch_dev *ca2;
-+	int i, nr_rw = 0, required;
-+
-+	lockdep_assert_held(&c->state_lock);
-+
-+	switch (new_state) {
-+	case BCH_MEMBER_STATE_RW:
-+		return true;
-+	case BCH_MEMBER_STATE_RO:
-+		if (ca->mi.state != BCH_MEMBER_STATE_RW)
-+			return true;
-+
-+		/* do we have enough devices to write to?  */
-+		for_each_member_device(ca2, c, i)
-+			if (ca2 != ca)
-+				nr_rw += ca2->mi.state == BCH_MEMBER_STATE_RW;
-+
-+		required = max(!(flags & BCH_FORCE_IF_METADATA_DEGRADED)
-+			       ? c->opts.metadata_replicas
-+			       : c->opts.metadata_replicas_required,
-+			       !(flags & BCH_FORCE_IF_DATA_DEGRADED)
-+			       ? c->opts.data_replicas
-+			       : c->opts.data_replicas_required);
-+
-+		return nr_rw >= required;
-+	case BCH_MEMBER_STATE_FAILED:
-+	case BCH_MEMBER_STATE_SPARE:
-+		if (ca->mi.state != BCH_MEMBER_STATE_RW &&
-+		    ca->mi.state != BCH_MEMBER_STATE_RO)
-+			return true;
-+
-+		/* do we have enough devices to read from?  */
-+		new_online_devs = bch2_online_devs(c);
-+		__clear_bit(ca->dev_idx, new_online_devs.d);
-+
-+		s = __bch2_replicas_status(c, new_online_devs);
-+
-+		return bch2_have_enough_devs(s, flags);
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static bool bch2_fs_may_start(struct bch_fs *c)
-+{
-+	struct replicas_status s;
-+	struct bch_sb_field_members *mi;
-+	struct bch_dev *ca;
-+	unsigned i, flags = c->opts.degraded
-+		? BCH_FORCE_IF_DEGRADED
-+		: 0;
-+
-+	if (!c->opts.degraded) {
-+		mutex_lock(&c->sb_lock);
-+		mi = bch2_sb_get_members(c->disk_sb.sb);
-+
-+		for (i = 0; i < c->disk_sb.sb->nr_devices; i++) {
-+			if (!bch2_dev_exists(c->disk_sb.sb, mi, i))
-+				continue;
-+
-+			ca = bch_dev_locked(c, i);
-+
-+			if (!bch2_dev_is_online(ca) &&
-+			    (ca->mi.state == BCH_MEMBER_STATE_RW ||
-+			     ca->mi.state == BCH_MEMBER_STATE_RO)) {
-+				mutex_unlock(&c->sb_lock);
-+				return false;
-+			}
-+		}
-+		mutex_unlock(&c->sb_lock);
-+	}
-+
-+	s = bch2_replicas_status(c);
-+
-+	return bch2_have_enough_devs(s, flags);
-+}
-+
-+static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	/*
-+	 * Device going read only means the copygc reserve get smaller, so we
-+	 * don't want that happening while copygc is in progress:
-+	 */
-+	bch2_copygc_stop(c);
-+
-+	/*
-+	 * The allocator thread itself allocates btree nodes, so stop it first:
-+	 */
-+	bch2_dev_allocator_stop(ca);
-+	bch2_dev_allocator_remove(c, ca);
-+	bch2_dev_journal_stop(&c->journal, ca);
-+
-+	bch2_copygc_start(c);
-+}
-+
-+static const char *__bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	lockdep_assert_held(&c->state_lock);
-+
-+	BUG_ON(ca->mi.state != BCH_MEMBER_STATE_RW);
-+
-+	bch2_dev_allocator_add(c, ca);
-+	bch2_recalc_capacity(c);
-+
-+	if (bch2_dev_allocator_start(ca))
-+		return "error starting allocator thread";
-+
-+	return NULL;
-+}
-+
-+int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
-+			 enum bch_member_state new_state, int flags)
-+{
-+	struct bch_sb_field_members *mi;
-+	int ret = 0;
-+
-+	if (ca->mi.state == new_state)
-+		return 0;
-+
-+	if (!bch2_dev_state_allowed(c, ca, new_state, flags))
-+		return -EINVAL;
-+
-+	if (new_state != BCH_MEMBER_STATE_RW)
-+		__bch2_dev_read_only(c, ca);
-+
-+	bch_notice(ca, "%s", bch2_dev_state[new_state]);
-+
-+	mutex_lock(&c->sb_lock);
-+	mi = bch2_sb_get_members(c->disk_sb.sb);
-+	SET_BCH_MEMBER_STATE(&mi->members[ca->dev_idx], new_state);
-+	bch2_write_super(c);
-+	mutex_unlock(&c->sb_lock);
-+
-+	if (new_state == BCH_MEMBER_STATE_RW &&
-+	    __bch2_dev_read_write(c, ca))
-+		ret = -ENOMEM;
-+
-+	rebalance_wakeup(c);
-+
-+	return ret;
-+}
-+
-+int bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
-+		       enum bch_member_state new_state, int flags)
-+{
-+	int ret;
-+
-+	down_write(&c->state_lock);
-+	ret = __bch2_dev_set_state(c, ca, new_state, flags);
-+	up_write(&c->state_lock);
-+
-+	return ret;
-+}
-+
-+/* Device add/removal: */
-+
-+int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	struct btree_trans trans;
-+	size_t i;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for (i = 0; i < ca->mi.nbuckets; i++) {
-+		ret = bch2_btree_key_cache_flush(&trans,
-+				BTREE_ID_ALLOC, POS(ca->dev_idx, i));
-+		if (ret)
-+			break;
-+	}
-+	bch2_trans_exit(&trans);
-+
-+	if (ret)
-+		return ret;
-+
-+	return bch2_btree_delete_range(c, BTREE_ID_ALLOC,
-+				       POS(ca->dev_idx, 0),
-+				       POS(ca->dev_idx + 1, 0),
-+				       NULL);
-+}
-+
-+int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
-+{
-+	struct bch_sb_field_members *mi;
-+	unsigned dev_idx = ca->dev_idx, data;
-+	int ret = -EINVAL;
-+
-+	down_write(&c->state_lock);
-+
-+	/*
-+	 * We consume a reference to ca->ref, regardless of whether we succeed
-+	 * or fail:
-+	 */
-+	percpu_ref_put(&ca->ref);
-+
-+	if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_FAILED, flags)) {
-+		bch_err(ca, "Cannot remove without losing data");
-+		goto err;
-+	}
-+
-+	__bch2_dev_read_only(c, ca);
-+
-+	ret = bch2_dev_data_drop(c, ca->dev_idx, flags);
-+	if (ret) {
-+		bch_err(ca, "Remove failed: error %i dropping data", ret);
-+		goto err;
-+	}
-+
-+	ret = bch2_journal_flush_device_pins(&c->journal, ca->dev_idx);
-+	if (ret) {
-+		bch_err(ca, "Remove failed: error %i flushing journal", ret);
-+		goto err;
-+	}
-+
-+	ret = bch2_dev_remove_alloc(c, ca);
-+	if (ret) {
-+		bch_err(ca, "Remove failed, error deleting alloc info");
-+		goto err;
-+	}
-+
-+	/*
-+	 * must flush all existing journal entries, they might have
-+	 * (overwritten) keys that point to the device we're removing:
-+	 */
-+	bch2_journal_flush_all_pins(&c->journal);
-+	/*
-+	 * hack to ensure bch2_replicas_gc2() clears out entries to this device
-+	 */
-+	bch2_journal_meta(&c->journal);
-+	ret = bch2_journal_error(&c->journal);
-+	if (ret) {
-+		bch_err(ca, "Remove failed, journal error");
-+		goto err;
-+	}
-+
-+	ret = bch2_replicas_gc2(c);
-+	if (ret) {
-+		bch_err(ca, "Remove failed: error %i from replicas gc", ret);
-+		goto err;
-+	}
-+
-+	data = bch2_dev_has_data(c, ca);
-+	if (data) {
-+		char data_has_str[100];
-+
-+		bch2_flags_to_text(&PBUF(data_has_str),
-+				   bch2_data_types, data);
-+		bch_err(ca, "Remove failed, still has data (%s)", data_has_str);
-+		ret = -EBUSY;
-+		goto err;
-+	}
-+
-+	__bch2_dev_offline(c, ca);
-+
-+	mutex_lock(&c->sb_lock);
-+	rcu_assign_pointer(c->devs[ca->dev_idx], NULL);
-+	mutex_unlock(&c->sb_lock);
-+
-+	percpu_ref_kill(&ca->ref);
-+	wait_for_completion(&ca->ref_completion);
-+
-+	bch2_dev_free(ca);
-+
-+	/*
-+	 * Free this device's slot in the bch_member array - all pointers to
-+	 * this device must be gone:
-+	 */
-+	mutex_lock(&c->sb_lock);
-+	mi = bch2_sb_get_members(c->disk_sb.sb);
-+	memset(&mi->members[dev_idx].uuid, 0, sizeof(mi->members[dev_idx].uuid));
-+
-+	bch2_write_super(c);
-+
-+	mutex_unlock(&c->sb_lock);
-+	up_write(&c->state_lock);
-+	return 0;
-+err:
-+	if (ca->mi.state == BCH_MEMBER_STATE_RW &&
-+	    !percpu_ref_is_zero(&ca->io_ref))
-+		__bch2_dev_read_write(c, ca);
-+	up_write(&c->state_lock);
-+	return ret;
-+}
-+
-+static void dev_usage_clear(struct bch_dev *ca)
-+{
-+	struct bucket_array *buckets;
-+
-+	percpu_memset(ca->usage[0], 0, sizeof(*ca->usage[0]));
-+
-+	down_read(&ca->bucket_lock);
-+	buckets = bucket_array(ca);
-+
-+	memset(buckets->b, 0, sizeof(buckets->b[0]) * buckets->nbuckets);
-+	up_read(&ca->bucket_lock);
-+}
-+
-+/* Add new device to running filesystem: */
-+int bch2_dev_add(struct bch_fs *c, const char *path)
-+{
-+	struct bch_opts opts = bch2_opts_empty();
-+	struct bch_sb_handle sb;
-+	const char *err;
-+	struct bch_dev *ca = NULL;
-+	struct bch_sb_field_members *mi;
-+	struct bch_member dev_mi;
-+	unsigned dev_idx, nr_devices, u64s;
-+	int ret;
-+
-+	ret = bch2_read_super(path, &opts, &sb);
-+	if (ret)
-+		return ret;
-+
-+	err = bch2_sb_validate(&sb);
-+	if (err)
-+		return -EINVAL;
-+
-+	dev_mi = bch2_sb_get_members(sb.sb)->members[sb.sb->dev_idx];
-+
-+	err = bch2_dev_may_add(sb.sb, c);
-+	if (err)
-+		return -EINVAL;
-+
-+	ca = __bch2_dev_alloc(c, &dev_mi);
-+	if (!ca) {
-+		bch2_free_super(&sb);
-+		return -ENOMEM;
-+	}
-+
-+	ret = __bch2_dev_attach_bdev(ca, &sb);
-+	if (ret) {
-+		bch2_dev_free(ca);
-+		return ret;
-+	}
-+
-+	/*
-+	 * We want to allocate journal on the new device before adding the new
-+	 * device to the filesystem because allocating after we attach requires
-+	 * spinning up the allocator thread, and the allocator thread requires
-+	 * doing btree writes, which if the existing devices are RO isn't going
-+	 * to work
-+	 *
-+	 * So we have to mark where the superblocks are, but marking allocated
-+	 * data normally updates the filesystem usage too, so we have to mark,
-+	 * allocate the journal, reset all the marks, then remark after we
-+	 * attach...
-+	 */
-+	bch2_mark_dev_superblock(ca->fs, ca, 0);
-+
-+	err = "journal alloc failed";
-+	ret = bch2_dev_journal_alloc(ca);
-+	if (ret)
-+		goto err;
-+
-+	dev_usage_clear(ca);
-+
-+	down_write(&c->state_lock);
-+	mutex_lock(&c->sb_lock);
-+
-+	err = "insufficient space in new superblock";
-+	ret = bch2_sb_from_fs(c, ca);
-+	if (ret)
-+		goto err_unlock;
-+
-+	mi = bch2_sb_get_members(ca->disk_sb.sb);
-+
-+	if (!bch2_sb_resize_members(&ca->disk_sb,
-+				le32_to_cpu(mi->field.u64s) +
-+				sizeof(dev_mi) / sizeof(u64))) {
-+		ret = -ENOSPC;
-+		goto err_unlock;
-+	}
-+
-+	if (dynamic_fault("bcachefs:add:no_slot"))
-+		goto no_slot;
-+
-+	mi = bch2_sb_get_members(c->disk_sb.sb);
-+	for (dev_idx = 0; dev_idx < BCH_SB_MEMBERS_MAX; dev_idx++)
-+		if (!bch2_dev_exists(c->disk_sb.sb, mi, dev_idx))
-+			goto have_slot;
-+no_slot:
-+	err = "no slots available in superblock";
-+	ret = -ENOSPC;
-+	goto err_unlock;
-+
-+have_slot:
-+	nr_devices = max_t(unsigned, dev_idx + 1, c->sb.nr_devices);
-+	u64s = (sizeof(struct bch_sb_field_members) +
-+		sizeof(struct bch_member) * nr_devices) / sizeof(u64);
-+
-+	err = "no space in superblock for member info";
-+	ret = -ENOSPC;
-+
-+	mi = bch2_sb_resize_members(&c->disk_sb, u64s);
-+	if (!mi)
-+		goto err_unlock;
-+
-+	/* success: */
-+
-+	mi->members[dev_idx] = dev_mi;
-+	mi->members[dev_idx].last_mount = cpu_to_le64(ktime_get_real_seconds());
-+	c->disk_sb.sb->nr_devices	= nr_devices;
-+
-+	ca->disk_sb.sb->dev_idx	= dev_idx;
-+	bch2_dev_attach(c, ca, dev_idx);
-+
-+	bch2_mark_dev_superblock(c, ca, 0);
-+
-+	bch2_write_super(c);
-+	mutex_unlock(&c->sb_lock);
-+
-+	if (ca->mi.state == BCH_MEMBER_STATE_RW) {
-+		err = __bch2_dev_read_write(c, ca);
-+		if (err)
-+			goto err_late;
-+	}
-+
-+	up_write(&c->state_lock);
-+	return 0;
-+
-+err_unlock:
-+	mutex_unlock(&c->sb_lock);
-+	up_write(&c->state_lock);
-+err:
-+	if (ca)
-+		bch2_dev_free(ca);
-+	bch2_free_super(&sb);
-+	bch_err(c, "Unable to add device: %s", err);
-+	return ret;
-+err_late:
-+	bch_err(c, "Error going rw after adding device: %s", err);
-+	return -EINVAL;
-+}
-+
-+/* Hot add existing device to running filesystem: */
-+int bch2_dev_online(struct bch_fs *c, const char *path)
-+{
-+	struct bch_opts opts = bch2_opts_empty();
-+	struct bch_sb_handle sb = { NULL };
-+	struct bch_sb_field_members *mi;
-+	struct bch_dev *ca;
-+	unsigned dev_idx;
-+	const char *err;
-+	int ret;
-+
-+	down_write(&c->state_lock);
-+
-+	ret = bch2_read_super(path, &opts, &sb);
-+	if (ret) {
-+		up_write(&c->state_lock);
-+		return ret;
-+	}
-+
-+	dev_idx = sb.sb->dev_idx;
-+
-+	err = bch2_dev_in_fs(c->disk_sb.sb, sb.sb);
-+	if (err)
-+		goto err;
-+
-+	if (bch2_dev_attach_bdev(c, &sb)) {
-+		err = "bch2_dev_attach_bdev() error";
-+		goto err;
-+	}
-+
-+	ca = bch_dev_locked(c, dev_idx);
-+	if (ca->mi.state == BCH_MEMBER_STATE_RW) {
-+		err = __bch2_dev_read_write(c, ca);
-+		if (err)
-+			goto err;
-+	}
-+
-+	mutex_lock(&c->sb_lock);
-+	mi = bch2_sb_get_members(c->disk_sb.sb);
-+
-+	mi->members[ca->dev_idx].last_mount =
-+		cpu_to_le64(ktime_get_real_seconds());
-+
-+	bch2_write_super(c);
-+	mutex_unlock(&c->sb_lock);
-+
-+	up_write(&c->state_lock);
-+	return 0;
-+err:
-+	up_write(&c->state_lock);
-+	bch2_free_super(&sb);
-+	bch_err(c, "error bringing %s online: %s", path, err);
-+	return -EINVAL;
-+}
-+
-+int bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca, int flags)
-+{
-+	down_write(&c->state_lock);
-+
-+	if (!bch2_dev_is_online(ca)) {
-+		bch_err(ca, "Already offline");
-+		up_write(&c->state_lock);
-+		return 0;
-+	}
-+
-+	if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_FAILED, flags)) {
-+		bch_err(ca, "Cannot offline required disk");
-+		up_write(&c->state_lock);
-+		return -EINVAL;
-+	}
-+
-+	__bch2_dev_offline(c, ca);
-+
-+	up_write(&c->state_lock);
-+	return 0;
-+}
-+
-+int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
-+{
-+	struct bch_member *mi;
-+	int ret = 0;
-+
-+	down_write(&c->state_lock);
-+
-+	if (nbuckets < ca->mi.nbuckets) {
-+		bch_err(ca, "Cannot shrink yet");
-+		ret = -EINVAL;
-+		goto err;
-+	}
-+
-+	if (bch2_dev_is_online(ca) &&
-+	    get_capacity(ca->disk_sb.bdev->bd_disk) <
-+	    ca->mi.bucket_size * nbuckets) {
-+		bch_err(ca, "New size larger than device");
-+		ret = -EINVAL;
-+		goto err;
-+	}
-+
-+	ret = bch2_dev_buckets_resize(c, ca, nbuckets);
-+	if (ret) {
-+		bch_err(ca, "Resize error: %i", ret);
-+		goto err;
-+	}
-+
-+	mutex_lock(&c->sb_lock);
-+	mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
-+	mi->nbuckets = cpu_to_le64(nbuckets);
-+
-+	bch2_write_super(c);
-+	mutex_unlock(&c->sb_lock);
-+
-+	bch2_recalc_capacity(c);
-+err:
-+	up_write(&c->state_lock);
-+	return ret;
-+}
-+
-+/* return with ref on ca->ref: */
-+struct bch_dev *bch2_dev_lookup(struct bch_fs *c, const char *path)
-+{
-+	struct block_device *bdev = lookup_bdev(path);
-+	struct bch_dev *ca;
-+	unsigned i;
-+
-+	if (IS_ERR(bdev))
-+		return ERR_CAST(bdev);
-+
-+	for_each_member_device(ca, c, i)
-+		if (ca->disk_sb.bdev == bdev)
-+			goto found;
-+
-+	ca = ERR_PTR(-ENOENT);
-+found:
-+	bdput(bdev);
-+	return ca;
-+}
-+
-+/* Filesystem open: */
-+
-+struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
-+			    struct bch_opts opts)
-+{
-+	struct bch_sb_handle *sb = NULL;
-+	struct bch_fs *c = NULL;
-+	struct bch_sb_field_members *mi;
-+	unsigned i, best_sb = 0;
-+	const char *err;
-+	int ret = -ENOMEM;
-+
-+	pr_verbose_init(opts, "");
-+
-+	if (!nr_devices) {
-+		c = ERR_PTR(-EINVAL);
-+		goto out2;
-+	}
-+
-+	if (!try_module_get(THIS_MODULE)) {
-+		c = ERR_PTR(-ENODEV);
-+		goto out2;
-+	}
-+
-+	sb = kcalloc(nr_devices, sizeof(*sb), GFP_KERNEL);
-+	if (!sb)
-+		goto err;
-+
-+	for (i = 0; i < nr_devices; i++) {
-+		ret = bch2_read_super(devices[i], &opts, &sb[i]);
-+		if (ret)
-+			goto err;
-+
-+		err = bch2_sb_validate(&sb[i]);
-+		if (err)
-+			goto err_print;
-+	}
-+
-+	for (i = 1; i < nr_devices; i++)
-+		if (le64_to_cpu(sb[i].sb->seq) >
-+		    le64_to_cpu(sb[best_sb].sb->seq))
-+			best_sb = i;
-+
-+	mi = bch2_sb_get_members(sb[best_sb].sb);
-+
-+	i = 0;
-+	while (i < nr_devices) {
-+		if (i != best_sb &&
-+		    !bch2_dev_exists(sb[best_sb].sb, mi, sb[i].sb->dev_idx)) {
-+			char buf[BDEVNAME_SIZE];
-+			pr_info("%s has been removed, skipping",
-+				bdevname(sb[i].bdev, buf));
-+			bch2_free_super(&sb[i]);
-+			array_remove_item(sb, nr_devices, i);
-+			continue;
-+		}
-+
-+		err = bch2_dev_in_fs(sb[best_sb].sb, sb[i].sb);
-+		if (err)
-+			goto err_print;
-+		i++;
-+	}
-+
-+	ret = -ENOMEM;
-+	c = bch2_fs_alloc(sb[best_sb].sb, opts);
-+	if (!c)
-+		goto err;
-+
-+	err = "bch2_dev_online() error";
-+	down_write(&c->state_lock);
-+	for (i = 0; i < nr_devices; i++)
-+		if (bch2_dev_attach_bdev(c, &sb[i])) {
-+			up_write(&c->state_lock);
-+			goto err_print;
-+		}
-+	up_write(&c->state_lock);
-+
-+	err = "insufficient devices";
-+	if (!bch2_fs_may_start(c))
-+		goto err_print;
-+
-+	if (!c->opts.nostart) {
-+		ret = bch2_fs_start(c);
-+		if (ret)
-+			goto err;
-+	}
-+out:
-+	kfree(sb);
-+	module_put(THIS_MODULE);
-+out2:
-+	pr_verbose_init(opts, "ret %i", PTR_ERR_OR_ZERO(c));
-+	return c;
-+err_print:
-+	pr_err("bch_fs_open err opening %s: %s",
-+	       devices[0], err);
-+	ret = -EINVAL;
-+err:
-+	if (c)
-+		bch2_fs_stop(c);
-+	for (i = 0; i < nr_devices; i++)
-+		bch2_free_super(&sb[i]);
-+	c = ERR_PTR(ret);
-+	goto out;
-+}
-+
-+static const char *__bch2_fs_open_incremental(struct bch_sb_handle *sb,
-+					      struct bch_opts opts)
-+{
-+	const char *err;
-+	struct bch_fs *c;
-+	bool allocated_fs = false;
-+	int ret;
-+
-+	err = bch2_sb_validate(sb);
-+	if (err)
-+		return err;
-+
-+	mutex_lock(&bch_fs_list_lock);
-+	c = __bch2_uuid_to_fs(sb->sb->uuid);
-+	if (c) {
-+		closure_get(&c->cl);
-+
-+		err = bch2_dev_in_fs(c->disk_sb.sb, sb->sb);
-+		if (err)
-+			goto err;
-+	} else {
-+		c = bch2_fs_alloc(sb->sb, opts);
-+		err = "cannot allocate memory";
-+		if (!c)
-+			goto err;
-+
-+		allocated_fs = true;
-+	}
-+
-+	err = "bch2_dev_online() error";
-+
-+	mutex_lock(&c->sb_lock);
-+	if (bch2_dev_attach_bdev(c, sb)) {
-+		mutex_unlock(&c->sb_lock);
-+		goto err;
-+	}
-+	mutex_unlock(&c->sb_lock);
-+
-+	if (!c->opts.nostart && bch2_fs_may_start(c)) {
-+		err = "error starting filesystem";
-+		ret = bch2_fs_start(c);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	closure_put(&c->cl);
-+	mutex_unlock(&bch_fs_list_lock);
-+
-+	return NULL;
-+err:
-+	mutex_unlock(&bch_fs_list_lock);
-+
-+	if (allocated_fs)
-+		bch2_fs_stop(c);
-+	else if (c)
-+		closure_put(&c->cl);
-+
-+	return err;
-+}
-+
-+const char *bch2_fs_open_incremental(const char *path)
-+{
-+	struct bch_sb_handle sb;
-+	struct bch_opts opts = bch2_opts_empty();
-+	const char *err;
-+
-+	if (bch2_read_super(path, &opts, &sb))
-+		return "error reading superblock";
-+
-+	err = __bch2_fs_open_incremental(&sb, opts);
-+	bch2_free_super(&sb);
-+
-+	return err;
-+}
-+
-+/* Global interfaces/init */
-+
-+static void bcachefs_exit(void)
-+{
-+	bch2_debug_exit();
-+	bch2_vfs_exit();
-+	bch2_chardev_exit();
-+	if (bcachefs_kset)
-+		kset_unregister(bcachefs_kset);
-+}
-+
-+static int __init bcachefs_init(void)
-+{
-+	bch2_bkey_pack_test();
-+	bch2_inode_pack_test();
-+
-+	if (!(bcachefs_kset = kset_create_and_add("bcachefs", NULL, fs_kobj)) ||
-+	    bch2_chardev_init() ||
-+	    bch2_vfs_init() ||
-+	    bch2_debug_init())
-+		goto err;
-+
-+	return 0;
-+err:
-+	bcachefs_exit();
-+	return -ENOMEM;
-+}
-+
-+#define BCH_DEBUG_PARAM(name, description)			\
-+	bool bch2_##name;					\
-+	module_param_named(name, bch2_##name, bool, 0644);	\
-+	MODULE_PARM_DESC(name, description);
-+BCH_DEBUG_PARAMS()
-+#undef BCH_DEBUG_PARAM
-+
-+module_exit(bcachefs_exit);
-+module_init(bcachefs_init);
-diff --git a/fs/bcachefs/super.h b/fs/bcachefs/super.h
-new file mode 100644
-index 000000000000..fffee96726ce
---- /dev/null
-+++ b/fs/bcachefs/super.h
-@@ -0,0 +1,240 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_SUPER_H
-+#define _BCACHEFS_SUPER_H
-+
-+#include "extents.h"
-+
-+#include "bcachefs_ioctl.h"
-+
-+#include <linux/math64.h>
-+
-+static inline size_t sector_to_bucket(const struct bch_dev *ca, sector_t s)
-+{
-+	return div_u64(s, ca->mi.bucket_size);
-+}
-+
-+static inline sector_t bucket_to_sector(const struct bch_dev *ca, size_t b)
-+{
-+	return ((sector_t) b) * ca->mi.bucket_size;
-+}
-+
-+static inline sector_t bucket_remainder(const struct bch_dev *ca, sector_t s)
-+{
-+	u32 remainder;
-+
-+	div_u64_rem(s, ca->mi.bucket_size, &remainder);
-+	return remainder;
-+}
-+
-+static inline bool bch2_dev_is_online(struct bch_dev *ca)
-+{
-+	return !percpu_ref_is_zero(&ca->io_ref);
-+}
-+
-+static inline bool bch2_dev_is_readable(struct bch_dev *ca)
-+{
-+	return bch2_dev_is_online(ca) &&
-+		ca->mi.state != BCH_MEMBER_STATE_FAILED;
-+}
-+
-+static inline bool bch2_dev_get_ioref(struct bch_dev *ca, int rw)
-+{
-+	if (!percpu_ref_tryget(&ca->io_ref))
-+		return false;
-+
-+	if (ca->mi.state == BCH_MEMBER_STATE_RW ||
-+	    (ca->mi.state == BCH_MEMBER_STATE_RO && rw == READ))
-+		return true;
-+
-+	percpu_ref_put(&ca->io_ref);
-+	return false;
-+}
-+
-+static inline unsigned dev_mask_nr(const struct bch_devs_mask *devs)
-+{
-+	return bitmap_weight(devs->d, BCH_SB_MEMBERS_MAX);
-+}
-+
-+static inline bool bch2_dev_list_has_dev(struct bch_devs_list devs,
-+					 unsigned dev)
-+{
-+	unsigned i;
-+
-+	for (i = 0; i < devs.nr; i++)
-+		if (devs.devs[i] == dev)
-+			return true;
-+
-+	return false;
-+}
-+
-+static inline void bch2_dev_list_drop_dev(struct bch_devs_list *devs,
-+					  unsigned dev)
-+{
-+	unsigned i;
-+
-+	for (i = 0; i < devs->nr; i++)
-+		if (devs->devs[i] == dev) {
-+			array_remove_item(devs->devs, devs->nr, i);
-+			return;
-+		}
-+}
-+
-+static inline void bch2_dev_list_add_dev(struct bch_devs_list *devs,
-+					 unsigned dev)
-+{
-+	BUG_ON(bch2_dev_list_has_dev(*devs, dev));
-+	BUG_ON(devs->nr >= BCH_REPLICAS_MAX);
-+	devs->devs[devs->nr++] = dev;
-+}
-+
-+static inline struct bch_devs_list bch2_dev_list_single(unsigned dev)
-+{
-+	return (struct bch_devs_list) { .nr = 1, .devs[0] = dev };
-+}
-+
-+static inline struct bch_dev *__bch2_next_dev(struct bch_fs *c, unsigned *iter,
-+					      const struct bch_devs_mask *mask)
-+{
-+	struct bch_dev *ca = NULL;
-+
-+	while ((*iter = mask
-+		? find_next_bit(mask->d, c->sb.nr_devices, *iter)
-+		: *iter) < c->sb.nr_devices &&
-+	       !(ca = rcu_dereference_check(c->devs[*iter],
-+					    lockdep_is_held(&c->state_lock))))
-+		(*iter)++;
-+
-+	return ca;
-+}
-+
-+#define __for_each_member_device(ca, c, iter, mask)			\
-+	for ((iter) = 0; ((ca) = __bch2_next_dev((c), &(iter), mask)); (iter)++)
-+
-+#define for_each_member_device_rcu(ca, c, iter, mask)			\
-+	__for_each_member_device(ca, c, iter, mask)
-+
-+static inline struct bch_dev *bch2_get_next_dev(struct bch_fs *c, unsigned *iter)
-+{
-+	struct bch_dev *ca;
-+
-+	rcu_read_lock();
-+	if ((ca = __bch2_next_dev(c, iter, NULL)))
-+		percpu_ref_get(&ca->ref);
-+	rcu_read_unlock();
-+
-+	return ca;
-+}
-+
-+/*
-+ * If you break early, you must drop your ref on the current device
-+ */
-+#define for_each_member_device(ca, c, iter)				\
-+	for ((iter) = 0;						\
-+	     (ca = bch2_get_next_dev(c, &(iter)));			\
-+	     percpu_ref_put(&ca->ref), (iter)++)
-+
-+static inline struct bch_dev *bch2_get_next_online_dev(struct bch_fs *c,
-+						      unsigned *iter,
-+						      int state_mask)
-+{
-+	struct bch_dev *ca;
-+
-+	rcu_read_lock();
-+	while ((ca = __bch2_next_dev(c, iter, NULL)) &&
-+	       (!((1 << ca->mi.state) & state_mask) ||
-+		!percpu_ref_tryget(&ca->io_ref)))
-+		(*iter)++;
-+	rcu_read_unlock();
-+
-+	return ca;
-+}
-+
-+#define __for_each_online_member(ca, c, iter, state_mask)		\
-+	for ((iter) = 0;						\
-+	     (ca = bch2_get_next_online_dev(c, &(iter), state_mask));	\
-+	     percpu_ref_put(&ca->io_ref), (iter)++)
-+
-+#define for_each_online_member(ca, c, iter)				\
-+	__for_each_online_member(ca, c, iter, ~0)
-+
-+#define for_each_rw_member(ca, c, iter)					\
-+	__for_each_online_member(ca, c, iter, 1 << BCH_MEMBER_STATE_RW)
-+
-+#define for_each_readable_member(ca, c, iter)				\
-+	__for_each_online_member(ca, c, iter,				\
-+		(1 << BCH_MEMBER_STATE_RW)|(1 << BCH_MEMBER_STATE_RO))
-+
-+/*
-+ * If a key exists that references a device, the device won't be going away and
-+ * we can omit rcu_read_lock():
-+ */
-+static inline struct bch_dev *bch_dev_bkey_exists(const struct bch_fs *c, unsigned idx)
-+{
-+	EBUG_ON(idx >= c->sb.nr_devices || !c->devs[idx]);
-+
-+	return rcu_dereference_check(c->devs[idx], 1);
-+}
-+
-+static inline struct bch_dev *bch_dev_locked(struct bch_fs *c, unsigned idx)
-+{
-+	EBUG_ON(idx >= c->sb.nr_devices || !c->devs[idx]);
-+
-+	return rcu_dereference_protected(c->devs[idx],
-+					 lockdep_is_held(&c->sb_lock) ||
-+					 lockdep_is_held(&c->state_lock));
-+}
-+
-+/* XXX kill, move to struct bch_fs */
-+static inline struct bch_devs_mask bch2_online_devs(struct bch_fs *c)
-+{
-+	struct bch_devs_mask devs;
-+	struct bch_dev *ca;
-+	unsigned i;
-+
-+	memset(&devs, 0, sizeof(devs));
-+	for_each_online_member(ca, c, i)
-+		__set_bit(ca->dev_idx, devs.d);
-+	return devs;
-+}
-+
-+struct bch_fs *bch2_bdev_to_fs(struct block_device *);
-+struct bch_fs *bch2_uuid_to_fs(uuid_le);
-+int bch2_congested(void *, int);
-+
-+bool bch2_dev_state_allowed(struct bch_fs *, struct bch_dev *,
-+			   enum bch_member_state, int);
-+int __bch2_dev_set_state(struct bch_fs *, struct bch_dev *,
-+			enum bch_member_state, int);
-+int bch2_dev_set_state(struct bch_fs *, struct bch_dev *,
-+		      enum bch_member_state, int);
-+
-+int bch2_dev_fail(struct bch_dev *, int);
-+int bch2_dev_remove(struct bch_fs *, struct bch_dev *, int);
-+int bch2_dev_add(struct bch_fs *, const char *);
-+int bch2_dev_online(struct bch_fs *, const char *);
-+int bch2_dev_offline(struct bch_fs *, struct bch_dev *, int);
-+int bch2_dev_resize(struct bch_fs *, struct bch_dev *, u64);
-+struct bch_dev *bch2_dev_lookup(struct bch_fs *, const char *);
-+
-+bool bch2_fs_emergency_read_only(struct bch_fs *);
-+void bch2_fs_read_only(struct bch_fs *);
-+
-+int bch2_fs_read_write(struct bch_fs *);
-+int bch2_fs_read_write_early(struct bch_fs *);
-+
-+/*
-+ * Only for use in the recovery/fsck path:
-+ */
-+static inline void bch2_fs_lazy_rw(struct bch_fs *c)
-+{
-+	if (percpu_ref_is_zero(&c->writes))
-+		bch2_fs_read_write_early(c);
-+}
-+
-+void bch2_fs_stop(struct bch_fs *);
-+
-+int bch2_fs_start(struct bch_fs *);
-+struct bch_fs *bch2_fs_open(char * const *, unsigned, struct bch_opts);
-+const char *bch2_fs_open_incremental(const char *path);
-+
-+#endif /* _BCACHEFS_SUPER_H */
-diff --git a/fs/bcachefs/super_types.h b/fs/bcachefs/super_types.h
-new file mode 100644
-index 000000000000..20406ebd6f5b
---- /dev/null
-+++ b/fs/bcachefs/super_types.h
-@@ -0,0 +1,51 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_SUPER_TYPES_H
-+#define _BCACHEFS_SUPER_TYPES_H
-+
-+struct bch_sb_handle {
-+	struct bch_sb		*sb;
-+	struct block_device	*bdev;
-+	struct bio		*bio;
-+	unsigned		page_order;
-+	fmode_t			mode;
-+	unsigned		have_layout:1;
-+	unsigned		have_bio:1;
-+	unsigned		fs_sb:1;
-+	u64			seq;
-+};
-+
-+struct bch_devs_mask {
-+	unsigned long d[BITS_TO_LONGS(BCH_SB_MEMBERS_MAX)];
-+};
-+
-+struct bch_devs_list {
-+	u8			nr;
-+	u8			devs[BCH_REPLICAS_MAX + 1];
-+};
-+
-+struct bch_member_cpu {
-+	u64			nbuckets;	/* device size */
-+	u16			first_bucket;   /* index of first bucket used */
-+	u16			bucket_size;	/* sectors */
-+	u16			group;
-+	u8			state;
-+	u8			replacement;
-+	u8			discard;
-+	u8			data_allowed;
-+	u8			durability;
-+	u8			valid;
-+};
-+
-+struct bch_disk_group_cpu {
-+	bool				deleted;
-+	u16				parent;
-+	struct bch_devs_mask		devs;
-+};
-+
-+struct bch_disk_groups_cpu {
-+	struct rcu_head			rcu;
-+	unsigned			nr;
-+	struct bch_disk_group_cpu	entries[];
-+};
-+
-+#endif /* _BCACHEFS_SUPER_TYPES_H */
-diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c
-new file mode 100644
-index 000000000000..0cb29f43d99d
---- /dev/null
-+++ b/fs/bcachefs/sysfs.c
-@@ -0,0 +1,1074 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * bcache sysfs interfaces
-+ *
-+ * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
-+ * Copyright 2012 Google, Inc.
-+ */
-+
-+#ifndef NO_BCACHEFS_SYSFS
-+
-+#include "bcachefs.h"
-+#include "alloc_background.h"
-+#include "sysfs.h"
-+#include "btree_cache.h"
-+#include "btree_io.h"
-+#include "btree_iter.h"
-+#include "btree_key_cache.h"
-+#include "btree_update.h"
-+#include "btree_update_interior.h"
-+#include "btree_gc.h"
-+#include "buckets.h"
-+#include "clock.h"
-+#include "disk_groups.h"
-+#include "ec.h"
-+#include "inode.h"
-+#include "journal.h"
-+#include "keylist.h"
-+#include "move.h"
-+#include "opts.h"
-+#include "rebalance.h"
-+#include "replicas.h"
-+#include "super-io.h"
-+#include "tests.h"
-+
-+#include <linux/blkdev.h>
-+#include <linux/sort.h>
-+#include <linux/sched/clock.h>
-+
-+#include "util.h"
-+
-+#define SYSFS_OPS(type)							\
-+struct sysfs_ops type ## _sysfs_ops = {					\
-+	.show	= type ## _show,					\
-+	.store	= type ## _store					\
-+}
-+
-+#define SHOW(fn)							\
-+static ssize_t fn ## _show(struct kobject *kobj, struct attribute *attr,\
-+			   char *buf)					\
-+
-+#define STORE(fn)							\
-+static ssize_t fn ## _store(struct kobject *kobj, struct attribute *attr,\
-+			    const char *buf, size_t size)		\
-+
-+#define __sysfs_attribute(_name, _mode)					\
-+	static struct attribute sysfs_##_name =				\
-+		{ .name = #_name, .mode = _mode }
-+
-+#define write_attribute(n)	__sysfs_attribute(n, S_IWUSR)
-+#define read_attribute(n)	__sysfs_attribute(n, S_IRUGO)
-+#define rw_attribute(n)		__sysfs_attribute(n, S_IRUGO|S_IWUSR)
-+
-+#define sysfs_printf(file, fmt, ...)					\
-+do {									\
-+	if (attr == &sysfs_ ## file)					\
-+		return scnprintf(buf, PAGE_SIZE, fmt "\n", __VA_ARGS__);\
-+} while (0)
-+
-+#define sysfs_print(file, var)						\
-+do {									\
-+	if (attr == &sysfs_ ## file)					\
-+		return snprint(buf, PAGE_SIZE, var);			\
-+} while (0)
-+
-+#define sysfs_hprint(file, val)						\
-+do {									\
-+	if (attr == &sysfs_ ## file) {					\
-+		bch2_hprint(&out, val);					\
-+		pr_buf(&out, "\n");					\
-+		return out.pos - buf;					\
-+	}								\
-+} while (0)
-+
-+#define var_printf(_var, fmt)	sysfs_printf(_var, fmt, var(_var))
-+#define var_print(_var)		sysfs_print(_var, var(_var))
-+#define var_hprint(_var)	sysfs_hprint(_var, var(_var))
-+
-+#define sysfs_strtoul(file, var)					\
-+do {									\
-+	if (attr == &sysfs_ ## file)					\
-+		return strtoul_safe(buf, var) ?: (ssize_t) size;	\
-+} while (0)
-+
-+#define sysfs_strtoul_clamp(file, var, min, max)			\
-+do {									\
-+	if (attr == &sysfs_ ## file)					\
-+		return strtoul_safe_clamp(buf, var, min, max)		\
-+			?: (ssize_t) size;				\
-+} while (0)
-+
-+#define strtoul_or_return(cp)						\
-+({									\
-+	unsigned long _v;						\
-+	int _r = kstrtoul(cp, 10, &_v);					\
-+	if (_r)								\
-+		return _r;						\
-+	_v;								\
-+})
-+
-+#define strtoul_restrict_or_return(cp, min, max)			\
-+({									\
-+	unsigned long __v = 0;						\
-+	int _r = strtoul_safe_restrict(cp, __v, min, max);		\
-+	if (_r)								\
-+		return _r;						\
-+	__v;								\
-+})
-+
-+#define strtoi_h_or_return(cp)						\
-+({									\
-+	u64 _v;								\
-+	int _r = strtoi_h(cp, &_v);					\
-+	if (_r)								\
-+		return _r;						\
-+	_v;								\
-+})
-+
-+#define sysfs_hatoi(file, var)						\
-+do {									\
-+	if (attr == &sysfs_ ## file)					\
-+		return strtoi_h(buf, &var) ?: (ssize_t) size;		\
-+} while (0)
-+
-+write_attribute(trigger_journal_flush);
-+write_attribute(trigger_btree_coalesce);
-+write_attribute(trigger_gc);
-+write_attribute(prune_cache);
-+rw_attribute(btree_gc_periodic);
-+
-+read_attribute(uuid);
-+read_attribute(minor);
-+read_attribute(bucket_size);
-+read_attribute(block_size);
-+read_attribute(btree_node_size);
-+read_attribute(first_bucket);
-+read_attribute(nbuckets);
-+read_attribute(durability);
-+read_attribute(iodone);
-+
-+read_attribute(io_latency_read);
-+read_attribute(io_latency_write);
-+read_attribute(io_latency_stats_read);
-+read_attribute(io_latency_stats_write);
-+read_attribute(congested);
-+
-+read_attribute(bucket_quantiles_last_read);
-+read_attribute(bucket_quantiles_last_write);
-+read_attribute(bucket_quantiles_fragmentation);
-+read_attribute(bucket_quantiles_oldest_gen);
-+
-+read_attribute(reserve_stats);
-+read_attribute(btree_cache_size);
-+read_attribute(compression_stats);
-+read_attribute(journal_debug);
-+read_attribute(journal_pins);
-+read_attribute(btree_updates);
-+read_attribute(dirty_btree_nodes);
-+read_attribute(btree_key_cache);
-+read_attribute(btree_transactions);
-+read_attribute(stripes_heap);
-+
-+read_attribute(internal_uuid);
-+
-+read_attribute(has_data);
-+read_attribute(alloc_debug);
-+write_attribute(wake_allocator);
-+
-+read_attribute(read_realloc_races);
-+read_attribute(extent_migrate_done);
-+read_attribute(extent_migrate_raced);
-+
-+rw_attribute(journal_write_delay_ms);
-+rw_attribute(journal_reclaim_delay_ms);
-+
-+rw_attribute(discard);
-+rw_attribute(cache_replacement_policy);
-+rw_attribute(label);
-+
-+rw_attribute(copy_gc_enabled);
-+sysfs_pd_controller_attribute(copy_gc);
-+
-+rw_attribute(rebalance_enabled);
-+sysfs_pd_controller_attribute(rebalance);
-+read_attribute(rebalance_work);
-+rw_attribute(promote_whole_extents);
-+
-+read_attribute(new_stripes);
-+
-+rw_attribute(pd_controllers_update_seconds);
-+
-+read_attribute(meta_replicas_have);
-+read_attribute(data_replicas_have);
-+
-+read_attribute(io_timers_read);
-+read_attribute(io_timers_write);
-+
-+#ifdef CONFIG_BCACHEFS_TESTS
-+write_attribute(perf_test);
-+#endif /* CONFIG_BCACHEFS_TESTS */
-+
-+#define BCH_DEBUG_PARAM(name, description)				\
-+	rw_attribute(name);
-+
-+	BCH_DEBUG_PARAMS()
-+#undef BCH_DEBUG_PARAM
-+
-+#define x(_name)						\
-+	static struct attribute sysfs_time_stat_##_name =		\
-+		{ .name = #_name, .mode = S_IRUGO };
-+	BCH_TIME_STATS()
-+#undef x
-+
-+static struct attribute sysfs_state_rw = {
-+	.name = "state",
-+	.mode = S_IRUGO
-+};
-+
-+static size_t bch2_btree_cache_size(struct bch_fs *c)
-+{
-+	size_t ret = 0;
-+	struct btree *b;
-+
-+	mutex_lock(&c->btree_cache.lock);
-+	list_for_each_entry(b, &c->btree_cache.live, list)
-+		ret += btree_bytes(c);
-+
-+	mutex_unlock(&c->btree_cache.lock);
-+	return ret;
-+}
-+
-+static int fs_alloc_debug_to_text(struct printbuf *out, struct bch_fs *c)
-+{
-+	struct bch_fs_usage *fs_usage = bch2_fs_usage_read(c);
-+
-+	if (!fs_usage)
-+		return -ENOMEM;
-+
-+	bch2_fs_usage_to_text(out, c, fs_usage);
-+
-+	percpu_up_read(&c->mark_lock);
-+
-+	kfree(fs_usage);
-+	return 0;
-+}
-+
-+static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	u64 nr_uncompressed_extents = 0, uncompressed_sectors = 0,
-+	    nr_compressed_extents = 0,
-+	    compressed_sectors_compressed = 0,
-+	    compressed_sectors_uncompressed = 0;
-+	int ret;
-+
-+	if (!test_bit(BCH_FS_STARTED, &c->flags))
-+		return -EPERM;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS_MIN, 0, k, ret)
-+		if (k.k->type == KEY_TYPE_extent) {
-+			struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
-+			const union bch_extent_entry *entry;
-+			struct extent_ptr_decoded p;
-+
-+			extent_for_each_ptr_decode(e, p, entry) {
-+				if (!crc_is_compressed(p.crc)) {
-+					nr_uncompressed_extents++;
-+					uncompressed_sectors += e.k->size;
-+				} else {
-+					nr_compressed_extents++;
-+					compressed_sectors_compressed +=
-+						p.crc.compressed_size;
-+					compressed_sectors_uncompressed +=
-+						p.crc.uncompressed_size;
-+				}
-+
-+				/* only looking at the first ptr */
-+				break;
-+			}
-+		}
-+
-+	ret = bch2_trans_exit(&trans) ?: ret;
-+	if (ret)
-+		return ret;
-+
-+	pr_buf(out,
-+	       "uncompressed data:\n"
-+	       "	nr extents:			%llu\n"
-+	       "	size (bytes):			%llu\n"
-+	       "compressed data:\n"
-+	       "	nr extents:			%llu\n"
-+	       "	compressed size (bytes):	%llu\n"
-+	       "	uncompressed size (bytes):	%llu\n",
-+	       nr_uncompressed_extents,
-+	       uncompressed_sectors << 9,
-+	       nr_compressed_extents,
-+	       compressed_sectors_compressed << 9,
-+	       compressed_sectors_uncompressed << 9);
-+	return 0;
-+}
-+
-+SHOW(bch2_fs)
-+{
-+	struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
-+	struct printbuf out = _PBUF(buf, PAGE_SIZE);
-+
-+	sysfs_print(minor,			c->minor);
-+	sysfs_printf(internal_uuid, "%pU",	c->sb.uuid.b);
-+
-+	sysfs_print(journal_write_delay_ms,	c->journal.write_delay_ms);
-+	sysfs_print(journal_reclaim_delay_ms,	c->journal.reclaim_delay_ms);
-+
-+	sysfs_print(block_size,			block_bytes(c));
-+	sysfs_print(btree_node_size,		btree_bytes(c));
-+	sysfs_hprint(btree_cache_size,		bch2_btree_cache_size(c));
-+
-+	sysfs_print(read_realloc_races,
-+		    atomic_long_read(&c->read_realloc_races));
-+	sysfs_print(extent_migrate_done,
-+		    atomic_long_read(&c->extent_migrate_done));
-+	sysfs_print(extent_migrate_raced,
-+		    atomic_long_read(&c->extent_migrate_raced));
-+
-+	sysfs_printf(btree_gc_periodic, "%u",	(int) c->btree_gc_periodic);
-+
-+	sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled);
-+
-+	sysfs_print(pd_controllers_update_seconds,
-+		    c->pd_controllers_update_seconds);
-+
-+	sysfs_printf(rebalance_enabled,		"%i", c->rebalance.enabled);
-+	sysfs_pd_controller_show(rebalance,	&c->rebalance.pd); /* XXX */
-+	sysfs_pd_controller_show(copy_gc,	&c->copygc_pd);
-+
-+	if (attr == &sysfs_rebalance_work) {
-+		bch2_rebalance_work_to_text(&out, c);
-+		return out.pos - buf;
-+	}
-+
-+	sysfs_print(promote_whole_extents,	c->promote_whole_extents);
-+
-+	sysfs_printf(meta_replicas_have, "%i",	bch2_replicas_online(c, true));
-+	sysfs_printf(data_replicas_have, "%i",	bch2_replicas_online(c, false));
-+
-+	/* Debugging: */
-+
-+	if (attr == &sysfs_alloc_debug)
-+		return fs_alloc_debug_to_text(&out, c) ?: out.pos - buf;
-+
-+	if (attr == &sysfs_journal_debug) {
-+		bch2_journal_debug_to_text(&out, &c->journal);
-+		return out.pos - buf;
-+	}
-+
-+	if (attr == &sysfs_journal_pins) {
-+		bch2_journal_pins_to_text(&out, &c->journal);
-+		return out.pos - buf;
-+	}
-+
-+	if (attr == &sysfs_btree_updates) {
-+		bch2_btree_updates_to_text(&out, c);
-+		return out.pos - buf;
-+	}
-+
-+	if (attr == &sysfs_dirty_btree_nodes) {
-+		bch2_dirty_btree_nodes_to_text(&out, c);
-+		return out.pos - buf;
-+	}
-+
-+	if (attr == &sysfs_btree_key_cache) {
-+		bch2_btree_key_cache_to_text(&out, &c->btree_key_cache);
-+		return out.pos - buf;
-+	}
-+
-+	if (attr == &sysfs_btree_transactions) {
-+		bch2_btree_trans_to_text(&out, c);
-+		return out.pos - buf;
-+	}
-+
-+	if (attr == &sysfs_stripes_heap) {
-+		bch2_stripes_heap_to_text(&out, c);
-+		return out.pos - buf;
-+	}
-+
-+	if (attr == &sysfs_compression_stats) {
-+		bch2_compression_stats_to_text(&out, c);
-+		return out.pos - buf;
-+	}
-+
-+	if (attr == &sysfs_new_stripes) {
-+		bch2_new_stripes_to_text(&out, c);
-+		return out.pos - buf;
-+	}
-+
-+	if (attr == &sysfs_io_timers_read) {
-+		bch2_io_timers_to_text(&out, &c->io_clock[READ]);
-+		return out.pos - buf;
-+	}
-+	if (attr == &sysfs_io_timers_write) {
-+		bch2_io_timers_to_text(&out, &c->io_clock[WRITE]);
-+		return out.pos - buf;
-+	}
-+
-+#define BCH_DEBUG_PARAM(name, description) sysfs_print(name, c->name);
-+	BCH_DEBUG_PARAMS()
-+#undef BCH_DEBUG_PARAM
-+
-+	return 0;
-+}
-+
-+STORE(bch2_fs)
-+{
-+	struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
-+
-+	sysfs_strtoul(journal_write_delay_ms, c->journal.write_delay_ms);
-+	sysfs_strtoul(journal_reclaim_delay_ms, c->journal.reclaim_delay_ms);
-+
-+	if (attr == &sysfs_btree_gc_periodic) {
-+		ssize_t ret = strtoul_safe(buf, c->btree_gc_periodic)
-+			?: (ssize_t) size;
-+
-+		wake_up_process(c->gc_thread);
-+		return ret;
-+	}
-+
-+	if (attr == &sysfs_copy_gc_enabled) {
-+		ssize_t ret = strtoul_safe(buf, c->copy_gc_enabled)
-+			?: (ssize_t) size;
-+
-+		if (c->copygc_thread)
-+			wake_up_process(c->copygc_thread);
-+		return ret;
-+	}
-+
-+	if (attr == &sysfs_rebalance_enabled) {
-+		ssize_t ret = strtoul_safe(buf, c->rebalance.enabled)
-+			?: (ssize_t) size;
-+
-+		rebalance_wakeup(c);
-+		return ret;
-+	}
-+
-+	sysfs_strtoul(pd_controllers_update_seconds,
-+		      c->pd_controllers_update_seconds);
-+	sysfs_pd_controller_store(rebalance,	&c->rebalance.pd);
-+	sysfs_pd_controller_store(copy_gc,	&c->copygc_pd);
-+
-+	sysfs_strtoul(promote_whole_extents,	c->promote_whole_extents);
-+
-+	/* Debugging: */
-+
-+#define BCH_DEBUG_PARAM(name, description) sysfs_strtoul(name, c->name);
-+	BCH_DEBUG_PARAMS()
-+#undef BCH_DEBUG_PARAM
-+
-+	if (!test_bit(BCH_FS_STARTED, &c->flags))
-+		return -EPERM;
-+
-+	/* Debugging: */
-+
-+	if (attr == &sysfs_trigger_journal_flush)
-+		bch2_journal_meta_async(&c->journal, NULL);
-+
-+	if (attr == &sysfs_trigger_btree_coalesce)
-+		bch2_coalesce(c);
-+
-+	if (attr == &sysfs_trigger_gc) {
-+		/*
-+		 * Full gc is currently incompatible with btree key cache:
-+		 */
-+#if 0
-+		down_read(&c->state_lock);
-+		bch2_gc(c, NULL, false, false);
-+		up_read(&c->state_lock);
-+#else
-+		bch2_gc_gens(c);
-+#endif
-+	}
-+
-+	if (attr == &sysfs_prune_cache) {
-+		struct shrink_control sc;
-+
-+		sc.gfp_mask = GFP_KERNEL;
-+		sc.nr_to_scan = strtoul_or_return(buf);
-+		c->btree_cache.shrink.scan_objects(&c->btree_cache.shrink, &sc);
-+	}
-+
-+#ifdef CONFIG_BCACHEFS_TESTS
-+	if (attr == &sysfs_perf_test) {
-+		char *tmp = kstrdup(buf, GFP_KERNEL), *p = tmp;
-+		char *test		= strsep(&p, " \t\n");
-+		char *nr_str		= strsep(&p, " \t\n");
-+		char *threads_str	= strsep(&p, " \t\n");
-+		unsigned threads;
-+		u64 nr;
-+		int ret = -EINVAL;
-+
-+		if (threads_str &&
-+		    !(ret = kstrtouint(threads_str, 10, &threads)) &&
-+		    !(ret = bch2_strtoull_h(nr_str, &nr)))
-+			bch2_btree_perf_test(c, test, nr, threads);
-+		else
-+			size = ret;
-+		kfree(tmp);
-+	}
-+#endif
-+	return size;
-+}
-+SYSFS_OPS(bch2_fs);
-+
-+struct attribute *bch2_fs_files[] = {
-+	&sysfs_minor,
-+	&sysfs_block_size,
-+	&sysfs_btree_node_size,
-+	&sysfs_btree_cache_size,
-+
-+	&sysfs_meta_replicas_have,
-+	&sysfs_data_replicas_have,
-+
-+	&sysfs_journal_write_delay_ms,
-+	&sysfs_journal_reclaim_delay_ms,
-+
-+	&sysfs_promote_whole_extents,
-+
-+	&sysfs_compression_stats,
-+
-+#ifdef CONFIG_BCACHEFS_TESTS
-+	&sysfs_perf_test,
-+#endif
-+	NULL
-+};
-+
-+/* internal dir - just a wrapper */
-+
-+SHOW(bch2_fs_internal)
-+{
-+	struct bch_fs *c = container_of(kobj, struct bch_fs, internal);
-+	return bch2_fs_show(&c->kobj, attr, buf);
-+}
-+
-+STORE(bch2_fs_internal)
-+{
-+	struct bch_fs *c = container_of(kobj, struct bch_fs, internal);
-+	return bch2_fs_store(&c->kobj, attr, buf, size);
-+}
-+SYSFS_OPS(bch2_fs_internal);
-+
-+struct attribute *bch2_fs_internal_files[] = {
-+	&sysfs_alloc_debug,
-+	&sysfs_journal_debug,
-+	&sysfs_journal_pins,
-+	&sysfs_btree_updates,
-+	&sysfs_dirty_btree_nodes,
-+	&sysfs_btree_key_cache,
-+	&sysfs_btree_transactions,
-+	&sysfs_stripes_heap,
-+
-+	&sysfs_read_realloc_races,
-+	&sysfs_extent_migrate_done,
-+	&sysfs_extent_migrate_raced,
-+
-+	&sysfs_trigger_journal_flush,
-+	&sysfs_trigger_btree_coalesce,
-+	&sysfs_trigger_gc,
-+	&sysfs_prune_cache,
-+
-+	&sysfs_copy_gc_enabled,
-+
-+	&sysfs_rebalance_enabled,
-+	&sysfs_rebalance_work,
-+	sysfs_pd_controller_files(rebalance),
-+	sysfs_pd_controller_files(copy_gc),
-+
-+	&sysfs_new_stripes,
-+
-+	&sysfs_io_timers_read,
-+	&sysfs_io_timers_write,
-+
-+	&sysfs_internal_uuid,
-+
-+#define BCH_DEBUG_PARAM(name, description) &sysfs_##name,
-+	BCH_DEBUG_PARAMS()
-+#undef BCH_DEBUG_PARAM
-+
-+	NULL
-+};
-+
-+/* options */
-+
-+SHOW(bch2_fs_opts_dir)
-+{
-+	struct printbuf out = _PBUF(buf, PAGE_SIZE);
-+	struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
-+	const struct bch_option *opt = container_of(attr, struct bch_option, attr);
-+	int id = opt - bch2_opt_table;
-+	u64 v = bch2_opt_get_by_id(&c->opts, id);
-+
-+	bch2_opt_to_text(&out, c, opt, v, OPT_SHOW_FULL_LIST);
-+	pr_buf(&out, "\n");
-+
-+	return out.pos - buf;
-+}
-+
-+STORE(bch2_fs_opts_dir)
-+{
-+	struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
-+	const struct bch_option *opt = container_of(attr, struct bch_option, attr);
-+	int ret, id = opt - bch2_opt_table;
-+	char *tmp;
-+	u64 v;
-+
-+	tmp = kstrdup(buf, GFP_KERNEL);
-+	if (!tmp)
-+		return -ENOMEM;
-+
-+	ret = bch2_opt_parse(c, opt, strim(tmp), &v);
-+	kfree(tmp);
-+
-+	if (ret < 0)
-+		return ret;
-+
-+	ret = bch2_opt_check_may_set(c, id, v);
-+	if (ret < 0)
-+		return ret;
-+
-+	if (opt->set_sb != SET_NO_SB_OPT) {
-+		mutex_lock(&c->sb_lock);
-+		opt->set_sb(c->disk_sb.sb, v);
-+		bch2_write_super(c);
-+		mutex_unlock(&c->sb_lock);
-+	}
-+
-+	bch2_opt_set_by_id(&c->opts, id, v);
-+
-+	if ((id == Opt_background_target ||
-+	     id == Opt_background_compression) && v) {
-+		bch2_rebalance_add_work(c, S64_MAX);
-+		rebalance_wakeup(c);
-+	}
-+
-+	return size;
-+}
-+SYSFS_OPS(bch2_fs_opts_dir);
-+
-+struct attribute *bch2_fs_opts_dir_files[] = { NULL };
-+
-+int bch2_opts_create_sysfs_files(struct kobject *kobj)
-+{
-+	const struct bch_option *i;
-+	int ret;
-+
-+	for (i = bch2_opt_table;
-+	     i < bch2_opt_table + bch2_opts_nr;
-+	     i++) {
-+		if (!(i->mode & (OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME)))
-+			continue;
-+
-+		ret = sysfs_create_file(kobj, &i->attr);
-+		if (ret)
-+			return ret;
-+	}
-+
-+	return 0;
-+}
-+
-+/* time stats */
-+
-+SHOW(bch2_fs_time_stats)
-+{
-+	struct bch_fs *c = container_of(kobj, struct bch_fs, time_stats);
-+	struct printbuf out = _PBUF(buf, PAGE_SIZE);
-+
-+#define x(name)								\
-+	if (attr == &sysfs_time_stat_##name) {				\
-+		bch2_time_stats_to_text(&out, &c->times[BCH_TIME_##name]);\
-+		return out.pos - buf;					\
-+	}
-+	BCH_TIME_STATS()
-+#undef x
-+
-+	return 0;
-+}
-+
-+STORE(bch2_fs_time_stats)
-+{
-+	return size;
-+}
-+SYSFS_OPS(bch2_fs_time_stats);
-+
-+struct attribute *bch2_fs_time_stats_files[] = {
-+#define x(name)						\
-+	&sysfs_time_stat_##name,
-+	BCH_TIME_STATS()
-+#undef x
-+	NULL
-+};
-+
-+typedef unsigned (bucket_map_fn)(struct bch_fs *, struct bch_dev *,
-+				 size_t, void *);
-+
-+static unsigned bucket_last_io_fn(struct bch_fs *c, struct bch_dev *ca,
-+				  size_t b, void *private)
-+{
-+	int rw = (private ? 1 : 0);
-+
-+	return bucket_last_io(c, bucket(ca, b), rw);
-+}
-+
-+static unsigned bucket_sectors_used_fn(struct bch_fs *c, struct bch_dev *ca,
-+				       size_t b, void *private)
-+{
-+	struct bucket *g = bucket(ca, b);
-+	return bucket_sectors_used(g->mark);
-+}
-+
-+static unsigned bucket_oldest_gen_fn(struct bch_fs *c, struct bch_dev *ca,
-+				     size_t b, void *private)
-+{
-+	return bucket_gc_gen(ca, b);
-+}
-+
-+static int unsigned_cmp(const void *_l, const void *_r)
-+{
-+	const unsigned *l = _l;
-+	const unsigned *r = _r;
-+
-+	return cmp_int(*l, *r);
-+}
-+
-+static int quantiles_to_text(struct printbuf *out,
-+			     struct bch_fs *c, struct bch_dev *ca,
-+			     bucket_map_fn *fn, void *private)
-+{
-+	size_t i, n;
-+	/* Compute 31 quantiles */
-+	unsigned q[31], *p;
-+
-+	down_read(&ca->bucket_lock);
-+	n = ca->mi.nbuckets;
-+
-+	p = vzalloc(n * sizeof(unsigned));
-+	if (!p) {
-+		up_read(&ca->bucket_lock);
-+		return -ENOMEM;
-+	}
-+
-+	for (i = ca->mi.first_bucket; i < n; i++)
-+		p[i] = fn(c, ca, i, private);
-+
-+	sort(p, n, sizeof(unsigned), unsigned_cmp, NULL);
-+	up_read(&ca->bucket_lock);
-+
-+	while (n &&
-+	       !p[n - 1])
-+		--n;
-+
-+	for (i = 0; i < ARRAY_SIZE(q); i++)
-+		q[i] = p[n * (i + 1) / (ARRAY_SIZE(q) + 1)];
-+
-+	vfree(p);
-+
-+	for (i = 0; i < ARRAY_SIZE(q); i++)
-+		pr_buf(out, "%u ", q[i]);
-+	pr_buf(out, "\n");
-+	return 0;
-+}
-+
-+static void reserve_stats_to_text(struct printbuf *out, struct bch_dev *ca)
-+{
-+	enum alloc_reserve i;
-+
-+	spin_lock(&ca->fs->freelist_lock);
-+
-+	pr_buf(out, "free_inc:\t%zu\t%zu\n",
-+	       fifo_used(&ca->free_inc),
-+	       ca->free_inc.size);
-+
-+	for (i = 0; i < RESERVE_NR; i++)
-+		pr_buf(out, "free[%u]:\t%zu\t%zu\n", i,
-+		       fifo_used(&ca->free[i]),
-+		       ca->free[i].size);
-+
-+	spin_unlock(&ca->fs->freelist_lock);
-+}
-+
-+static void dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
-+{
-+	struct bch_fs *c = ca->fs;
-+	struct bch_dev_usage stats = bch2_dev_usage_read(ca);
-+	unsigned i, nr[BCH_DATA_NR];
-+
-+	memset(nr, 0, sizeof(nr));
-+
-+	for (i = 0; i < ARRAY_SIZE(c->open_buckets); i++)
-+		nr[c->open_buckets[i].type]++;
-+
-+	pr_buf(out,
-+		"free_inc:               %zu/%zu\n"
-+		"free[RESERVE_BTREE]:    %zu/%zu\n"
-+		"free[RESERVE_MOVINGGC]: %zu/%zu\n"
-+		"free[RESERVE_NONE]:     %zu/%zu\n"
-+		"buckets:\n"
-+		"    capacity:           %llu\n"
-+		"    alloc:              %llu\n"
-+		"    sb:                 %llu\n"
-+		"    journal:            %llu\n"
-+		"    meta:               %llu\n"
-+		"    user:               %llu\n"
-+		"    cached:             %llu\n"
-+		"    erasure coded:      %llu\n"
-+		"    available:          %lli\n"
-+		"sectors:\n"
-+		"    sb:                 %llu\n"
-+		"    journal:            %llu\n"
-+		"    meta:               %llu\n"
-+		"    user:               %llu\n"
-+		"    cached:             %llu\n"
-+		"    erasure coded:      %llu\n"
-+		"    fragmented:         %llu\n"
-+		"    copygc threshold:   %llu\n"
-+		"freelist_wait:          %s\n"
-+		"open buckets:           %u/%u (reserved %u)\n"
-+		"open_buckets_wait:      %s\n"
-+		"open_buckets_btree:     %u\n"
-+		"open_buckets_user:      %u\n"
-+		"btree reserve cache:    %u\n",
-+		fifo_used(&ca->free_inc),		ca->free_inc.size,
-+		fifo_used(&ca->free[RESERVE_BTREE]),	ca->free[RESERVE_BTREE].size,
-+		fifo_used(&ca->free[RESERVE_MOVINGGC]),	ca->free[RESERVE_MOVINGGC].size,
-+		fifo_used(&ca->free[RESERVE_NONE]),	ca->free[RESERVE_NONE].size,
-+		ca->mi.nbuckets - ca->mi.first_bucket,
-+		stats.buckets_alloc,
-+		stats.buckets[BCH_DATA_sb],
-+		stats.buckets[BCH_DATA_journal],
-+		stats.buckets[BCH_DATA_btree],
-+		stats.buckets[BCH_DATA_user],
-+		stats.buckets[BCH_DATA_cached],
-+		stats.buckets_ec,
-+		__dev_buckets_available(ca, stats),
-+		stats.sectors[BCH_DATA_sb],
-+		stats.sectors[BCH_DATA_journal],
-+		stats.sectors[BCH_DATA_btree],
-+		stats.sectors[BCH_DATA_user],
-+		stats.sectors[BCH_DATA_cached],
-+		stats.sectors_ec,
-+		stats.sectors_fragmented,
-+		c->copygc_threshold,
-+		c->freelist_wait.list.first		? "waiting" : "empty",
-+		c->open_buckets_nr_free, OPEN_BUCKETS_COUNT,
-+		BTREE_NODE_OPEN_BUCKET_RESERVE,
-+		c->open_buckets_wait.list.first		? "waiting" : "empty",
-+		nr[BCH_DATA_btree],
-+		nr[BCH_DATA_user],
-+		c->btree_reserve_cache_nr);
-+}
-+
-+static const char * const bch2_rw[] = {
-+	"read",
-+	"write",
-+	NULL
-+};
-+
-+static void dev_iodone_to_text(struct printbuf *out, struct bch_dev *ca)
-+{
-+	int rw, i;
-+
-+	for (rw = 0; rw < 2; rw++) {
-+		pr_buf(out, "%s:\n", bch2_rw[rw]);
-+
-+		for (i = 1; i < BCH_DATA_NR; i++)
-+			pr_buf(out, "%-12s:%12llu\n",
-+			       bch2_data_types[i],
-+			       percpu_u64_get(&ca->io_done->sectors[rw][i]) << 9);
-+	}
-+}
-+
-+SHOW(bch2_dev)
-+{
-+	struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj);
-+	struct bch_fs *c = ca->fs;
-+	struct printbuf out = _PBUF(buf, PAGE_SIZE);
-+
-+	sysfs_printf(uuid,		"%pU\n", ca->uuid.b);
-+
-+	sysfs_print(bucket_size,	bucket_bytes(ca));
-+	sysfs_print(block_size,		block_bytes(c));
-+	sysfs_print(first_bucket,	ca->mi.first_bucket);
-+	sysfs_print(nbuckets,		ca->mi.nbuckets);
-+	sysfs_print(durability,		ca->mi.durability);
-+	sysfs_print(discard,		ca->mi.discard);
-+
-+	if (attr == &sysfs_label) {
-+		if (ca->mi.group) {
-+			mutex_lock(&c->sb_lock);
-+			bch2_disk_path_to_text(&out, &c->disk_sb,
-+					       ca->mi.group - 1);
-+			mutex_unlock(&c->sb_lock);
-+		}
-+
-+		pr_buf(&out, "\n");
-+		return out.pos - buf;
-+	}
-+
-+	if (attr == &sysfs_has_data) {
-+		bch2_flags_to_text(&out, bch2_data_types,
-+				   bch2_dev_has_data(c, ca));
-+		pr_buf(&out, "\n");
-+		return out.pos - buf;
-+	}
-+
-+	if (attr == &sysfs_cache_replacement_policy) {
-+		bch2_string_opt_to_text(&out,
-+					bch2_cache_replacement_policies,
-+					ca->mi.replacement);
-+		pr_buf(&out, "\n");
-+		return out.pos - buf;
-+	}
-+
-+	if (attr == &sysfs_state_rw) {
-+		bch2_string_opt_to_text(&out, bch2_dev_state,
-+					ca->mi.state);
-+		pr_buf(&out, "\n");
-+		return out.pos - buf;
-+	}
-+
-+	if (attr == &sysfs_iodone) {
-+		dev_iodone_to_text(&out, ca);
-+		return out.pos - buf;
-+	}
-+
-+	sysfs_print(io_latency_read,		atomic64_read(&ca->cur_latency[READ]));
-+	sysfs_print(io_latency_write,		atomic64_read(&ca->cur_latency[WRITE]));
-+
-+	if (attr == &sysfs_io_latency_stats_read) {
-+		bch2_time_stats_to_text(&out, &ca->io_latency[READ]);
-+		return out.pos - buf;
-+	}
-+	if (attr == &sysfs_io_latency_stats_write) {
-+		bch2_time_stats_to_text(&out, &ca->io_latency[WRITE]);
-+		return out.pos - buf;
-+	}
-+
-+	sysfs_printf(congested,			"%u%%",
-+		     clamp(atomic_read(&ca->congested), 0, CONGESTED_MAX)
-+		     * 100 / CONGESTED_MAX);
-+
-+	if (attr == &sysfs_bucket_quantiles_last_read)
-+		return quantiles_to_text(&out, c, ca, bucket_last_io_fn, (void *) 0) ?: out.pos - buf;
-+	if (attr == &sysfs_bucket_quantiles_last_write)
-+		return quantiles_to_text(&out, c, ca, bucket_last_io_fn, (void *) 1) ?: out.pos - buf;
-+	if (attr == &sysfs_bucket_quantiles_fragmentation)
-+		return quantiles_to_text(&out, c, ca, bucket_sectors_used_fn, NULL)  ?: out.pos - buf;
-+	if (attr == &sysfs_bucket_quantiles_oldest_gen)
-+		return quantiles_to_text(&out, c, ca, bucket_oldest_gen_fn, NULL)    ?: out.pos - buf;
-+
-+	if (attr == &sysfs_reserve_stats) {
-+		reserve_stats_to_text(&out, ca);
-+		return out.pos - buf;
-+	}
-+	if (attr == &sysfs_alloc_debug) {
-+		dev_alloc_debug_to_text(&out, ca);
-+		return out.pos - buf;
-+	}
-+
-+	return 0;
-+}
-+
-+STORE(bch2_dev)
-+{
-+	struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj);
-+	struct bch_fs *c = ca->fs;
-+	struct bch_member *mi;
-+
-+	if (attr == &sysfs_discard) {
-+		bool v = strtoul_or_return(buf);
-+
-+		mutex_lock(&c->sb_lock);
-+		mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
-+
-+		if (v != BCH_MEMBER_DISCARD(mi)) {
-+			SET_BCH_MEMBER_DISCARD(mi, v);
-+			bch2_write_super(c);
-+		}
-+		mutex_unlock(&c->sb_lock);
-+	}
-+
-+	if (attr == &sysfs_cache_replacement_policy) {
-+		ssize_t v = __sysfs_match_string(bch2_cache_replacement_policies, -1, buf);
-+
-+		if (v < 0)
-+			return v;
-+
-+		mutex_lock(&c->sb_lock);
-+		mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
-+
-+		if ((unsigned) v != BCH_MEMBER_REPLACEMENT(mi)) {
-+			SET_BCH_MEMBER_REPLACEMENT(mi, v);
-+			bch2_write_super(c);
-+		}
-+		mutex_unlock(&c->sb_lock);
-+	}
-+
-+	if (attr == &sysfs_label) {
-+		char *tmp;
-+		int ret;
-+
-+		tmp = kstrdup(buf, GFP_KERNEL);
-+		if (!tmp)
-+			return -ENOMEM;
-+
-+		ret = bch2_dev_group_set(c, ca, strim(tmp));
-+		kfree(tmp);
-+		if (ret)
-+			return ret;
-+	}
-+
-+	if (attr == &sysfs_wake_allocator)
-+		bch2_wake_allocator(ca);
-+
-+	return size;
-+}
-+SYSFS_OPS(bch2_dev);
-+
-+struct attribute *bch2_dev_files[] = {
-+	&sysfs_uuid,
-+	&sysfs_bucket_size,
-+	&sysfs_block_size,
-+	&sysfs_first_bucket,
-+	&sysfs_nbuckets,
-+	&sysfs_durability,
-+
-+	/* settings: */
-+	&sysfs_discard,
-+	&sysfs_cache_replacement_policy,
-+	&sysfs_state_rw,
-+	&sysfs_label,
-+
-+	&sysfs_has_data,
-+	&sysfs_iodone,
-+
-+	&sysfs_io_latency_read,
-+	&sysfs_io_latency_write,
-+	&sysfs_io_latency_stats_read,
-+	&sysfs_io_latency_stats_write,
-+	&sysfs_congested,
-+
-+	/* alloc info - other stats: */
-+	&sysfs_bucket_quantiles_last_read,
-+	&sysfs_bucket_quantiles_last_write,
-+	&sysfs_bucket_quantiles_fragmentation,
-+	&sysfs_bucket_quantiles_oldest_gen,
-+
-+	&sysfs_reserve_stats,
-+
-+	/* debug: */
-+	&sysfs_alloc_debug,
-+	&sysfs_wake_allocator,
-+	NULL
-+};
-+
-+#endif  /* _BCACHEFS_SYSFS_H_ */
-diff --git a/fs/bcachefs/sysfs.h b/fs/bcachefs/sysfs.h
-new file mode 100644
-index 000000000000..525fd05d91f7
---- /dev/null
-+++ b/fs/bcachefs/sysfs.h
-@@ -0,0 +1,44 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_SYSFS_H_
-+#define _BCACHEFS_SYSFS_H_
-+
-+#include <linux/sysfs.h>
-+
-+#ifndef NO_BCACHEFS_SYSFS
-+
-+struct attribute;
-+struct sysfs_ops;
-+
-+extern struct attribute *bch2_fs_files[];
-+extern struct attribute *bch2_fs_internal_files[];
-+extern struct attribute *bch2_fs_opts_dir_files[];
-+extern struct attribute *bch2_fs_time_stats_files[];
-+extern struct attribute *bch2_dev_files[];
-+
-+extern struct sysfs_ops bch2_fs_sysfs_ops;
-+extern struct sysfs_ops bch2_fs_internal_sysfs_ops;
-+extern struct sysfs_ops bch2_fs_opts_dir_sysfs_ops;
-+extern struct sysfs_ops bch2_fs_time_stats_sysfs_ops;
-+extern struct sysfs_ops bch2_dev_sysfs_ops;
-+
-+int bch2_opts_create_sysfs_files(struct kobject *);
-+
-+#else
-+
-+static struct attribute *bch2_fs_files[] = {};
-+static struct attribute *bch2_fs_internal_files[] = {};
-+static struct attribute *bch2_fs_opts_dir_files[] = {};
-+static struct attribute *bch2_fs_time_stats_files[] = {};
-+static struct attribute *bch2_dev_files[] = {};
-+
-+static const struct sysfs_ops bch2_fs_sysfs_ops;
-+static const struct sysfs_ops bch2_fs_internal_sysfs_ops;
-+static const struct sysfs_ops bch2_fs_opts_dir_sysfs_ops;
-+static const struct sysfs_ops bch2_fs_time_stats_sysfs_ops;
-+static const struct sysfs_ops bch2_dev_sysfs_ops;
-+
-+static inline int bch2_opts_create_sysfs_files(struct kobject *kobj) { return 0; }
-+
-+#endif /* NO_BCACHEFS_SYSFS */
-+
-+#endif  /* _BCACHEFS_SYSFS_H_ */
-diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c
-new file mode 100644
-index 000000000000..4dcace650416
---- /dev/null
-+++ b/fs/bcachefs/tests.c
-@@ -0,0 +1,725 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#ifdef CONFIG_BCACHEFS_TESTS
-+
-+#include "bcachefs.h"
-+#include "btree_update.h"
-+#include "journal_reclaim.h"
-+#include "tests.h"
-+
-+#include "linux/kthread.h"
-+#include "linux/random.h"
-+
-+static void delete_test_keys(struct bch_fs *c)
-+{
-+	int ret;
-+
-+	ret = bch2_btree_delete_range(c, BTREE_ID_EXTENTS,
-+				      POS(0, 0), POS(0, U64_MAX),
-+				      NULL);
-+	BUG_ON(ret);
-+
-+	ret = bch2_btree_delete_range(c, BTREE_ID_XATTRS,
-+				      POS(0, 0), POS(0, U64_MAX),
-+				      NULL);
-+	BUG_ON(ret);
-+}
-+
-+/* unit tests */
-+
-+static void test_delete(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_i_cookie k;
-+	int ret;
-+
-+	bkey_cookie_init(&k.k_i);
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS, k.k.p,
-+				   BTREE_ITER_INTENT);
-+
-+	ret = bch2_btree_iter_traverse(iter);
-+	BUG_ON(ret);
-+
-+	ret = __bch2_trans_do(&trans, NULL, NULL, 0,
-+		bch2_trans_update(&trans, iter, &k.k_i, 0));
-+	BUG_ON(ret);
-+
-+	pr_info("deleting once");
-+	ret = bch2_btree_delete_at(&trans, iter, 0);
-+	BUG_ON(ret);
-+
-+	pr_info("deleting twice");
-+	ret = bch2_btree_delete_at(&trans, iter, 0);
-+	BUG_ON(ret);
-+
-+	bch2_trans_exit(&trans);
-+}
-+
-+static void test_delete_written(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_i_cookie k;
-+	int ret;
-+
-+	bkey_cookie_init(&k.k_i);
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS, k.k.p,
-+				   BTREE_ITER_INTENT);
-+
-+	ret = bch2_btree_iter_traverse(iter);
-+	BUG_ON(ret);
-+
-+	ret = __bch2_trans_do(&trans, NULL, NULL, 0,
-+		bch2_trans_update(&trans, iter, &k.k_i, 0));
-+	BUG_ON(ret);
-+
-+	bch2_journal_flush_all_pins(&c->journal);
-+
-+	ret = bch2_btree_delete_at(&trans, iter, 0);
-+	BUG_ON(ret);
-+
-+	bch2_trans_exit(&trans);
-+}
-+
-+static void test_iterate(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	u64 i;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	delete_test_keys(c);
-+
-+	pr_info("inserting test keys");
-+
-+	for (i = 0; i < nr; i++) {
-+		struct bkey_i_cookie k;
-+
-+		bkey_cookie_init(&k.k_i);
-+		k.k.p.offset = i;
-+
-+		ret = bch2_btree_insert(c, BTREE_ID_XATTRS, &k.k_i,
-+					NULL, NULL, 0);
-+		BUG_ON(ret);
-+	}
-+
-+	pr_info("iterating forwards");
-+
-+	i = 0;
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_XATTRS,
-+			   POS_MIN, 0, k, ret) {
-+		if (k.k->p.inode)
-+			break;
-+
-+		BUG_ON(k.k->p.offset != i++);
-+	}
-+
-+	BUG_ON(i != nr);
-+
-+	pr_info("iterating backwards");
-+
-+	while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(iter)).k))
-+		BUG_ON(k.k->p.offset != --i);
-+
-+	BUG_ON(i);
-+
-+	bch2_trans_exit(&trans);
-+}
-+
-+static void test_iterate_extents(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	u64 i;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	delete_test_keys(c);
-+
-+	pr_info("inserting test extents");
-+
-+	for (i = 0; i < nr; i += 8) {
-+		struct bkey_i_cookie k;
-+
-+		bkey_cookie_init(&k.k_i);
-+		k.k.p.offset = i + 8;
-+		k.k.size = 8;
-+
-+		ret = bch2_btree_insert(c, BTREE_ID_EXTENTS, &k.k_i,
-+					NULL, NULL, 0);
-+		BUG_ON(ret);
-+	}
-+
-+	pr_info("iterating forwards");
-+
-+	i = 0;
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
-+			   POS_MIN, 0, k, ret) {
-+		BUG_ON(bkey_start_offset(k.k) != i);
-+		i = k.k->p.offset;
-+	}
-+
-+	BUG_ON(i != nr);
-+
-+	pr_info("iterating backwards");
-+
-+	while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(iter)).k)) {
-+		BUG_ON(k.k->p.offset != i);
-+		i = bkey_start_offset(k.k);
-+	}
-+
-+	BUG_ON(i);
-+
-+	bch2_trans_exit(&trans);
-+}
-+
-+static void test_iterate_slots(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	u64 i;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	delete_test_keys(c);
-+
-+	pr_info("inserting test keys");
-+
-+	for (i = 0; i < nr; i++) {
-+		struct bkey_i_cookie k;
-+
-+		bkey_cookie_init(&k.k_i);
-+		k.k.p.offset = i * 2;
-+
-+		ret = bch2_btree_insert(c, BTREE_ID_XATTRS, &k.k_i,
-+					NULL, NULL, 0);
-+		BUG_ON(ret);
-+	}
-+
-+	pr_info("iterating forwards");
-+
-+	i = 0;
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_XATTRS, POS_MIN,
-+			   0, k, ret) {
-+		if (k.k->p.inode)
-+			break;
-+
-+		BUG_ON(k.k->p.offset != i);
-+		i += 2;
-+	}
-+	bch2_trans_iter_free(&trans, iter);
-+
-+	BUG_ON(i != nr * 2);
-+
-+	pr_info("iterating forwards by slots");
-+
-+	i = 0;
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_XATTRS, POS_MIN,
-+			   BTREE_ITER_SLOTS, k, ret) {
-+		BUG_ON(k.k->p.offset != i);
-+		BUG_ON(bkey_deleted(k.k) != (i & 1));
-+
-+		i++;
-+		if (i == nr * 2)
-+			break;
-+	}
-+
-+	bch2_trans_exit(&trans);
-+}
-+
-+static void test_iterate_slots_extents(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	u64 i;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	delete_test_keys(c);
-+
-+	pr_info("inserting test keys");
-+
-+	for (i = 0; i < nr; i += 16) {
-+		struct bkey_i_cookie k;
-+
-+		bkey_cookie_init(&k.k_i);
-+		k.k.p.offset = i + 16;
-+		k.k.size = 8;
-+
-+		ret = bch2_btree_insert(c, BTREE_ID_EXTENTS, &k.k_i,
-+					NULL, NULL, 0);
-+		BUG_ON(ret);
-+	}
-+
-+	pr_info("iterating forwards");
-+
-+	i = 0;
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS_MIN,
-+			   0, k, ret) {
-+		BUG_ON(bkey_start_offset(k.k) != i + 8);
-+		BUG_ON(k.k->size != 8);
-+		i += 16;
-+	}
-+	bch2_trans_iter_free(&trans, iter);
-+
-+	BUG_ON(i != nr);
-+
-+	pr_info("iterating forwards by slots");
-+
-+	i = 0;
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS_MIN,
-+			   BTREE_ITER_SLOTS, k, ret) {
-+		BUG_ON(bkey_deleted(k.k) != !(i % 16));
-+
-+		BUG_ON(bkey_start_offset(k.k) != i);
-+		BUG_ON(k.k->size != 8);
-+		i = k.k->p.offset;
-+
-+		if (i == nr)
-+			break;
-+	}
-+
-+	bch2_trans_exit(&trans);
-+}
-+
-+/*
-+ * XXX: we really want to make sure we've got a btree with depth > 0 for these
-+ * tests
-+ */
-+static void test_peek_end(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS, POS_MIN, 0);
-+
-+	k = bch2_btree_iter_peek(iter);
-+	BUG_ON(k.k);
-+
-+	k = bch2_btree_iter_peek(iter);
-+	BUG_ON(k.k);
-+
-+	bch2_trans_exit(&trans);
-+}
-+
-+static void test_peek_end_extents(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, POS_MIN, 0);
-+
-+	k = bch2_btree_iter_peek(iter);
-+	BUG_ON(k.k);
-+
-+	k = bch2_btree_iter_peek(iter);
-+	BUG_ON(k.k);
-+
-+	bch2_trans_exit(&trans);
-+}
-+
-+/* extent unit tests */
-+
-+u64 test_version;
-+
-+static void insert_test_extent(struct bch_fs *c,
-+			       u64 start, u64 end)
-+{
-+	struct bkey_i_cookie k;
-+	int ret;
-+
-+	//pr_info("inserting %llu-%llu v %llu", start, end, test_version);
-+
-+	bkey_cookie_init(&k.k_i);
-+	k.k_i.k.p.offset = end;
-+	k.k_i.k.size = end - start;
-+	k.k_i.k.version.lo = test_version++;
-+
-+	ret = bch2_btree_insert(c, BTREE_ID_EXTENTS, &k.k_i,
-+				NULL, NULL, 0);
-+	BUG_ON(ret);
-+}
-+
-+static void __test_extent_overwrite(struct bch_fs *c,
-+				    u64 e1_start, u64 e1_end,
-+				    u64 e2_start, u64 e2_end)
-+{
-+	insert_test_extent(c, e1_start, e1_end);
-+	insert_test_extent(c, e2_start, e2_end);
-+
-+	delete_test_keys(c);
-+}
-+
-+static void test_extent_overwrite_front(struct bch_fs *c, u64 nr)
-+{
-+	__test_extent_overwrite(c, 0, 64, 0, 32);
-+	__test_extent_overwrite(c, 8, 64, 0, 32);
-+}
-+
-+static void test_extent_overwrite_back(struct bch_fs *c, u64 nr)
-+{
-+	__test_extent_overwrite(c, 0, 64, 32, 64);
-+	__test_extent_overwrite(c, 0, 64, 32, 72);
-+}
-+
-+static void test_extent_overwrite_middle(struct bch_fs *c, u64 nr)
-+{
-+	__test_extent_overwrite(c, 0, 64, 32, 40);
-+}
-+
-+static void test_extent_overwrite_all(struct bch_fs *c, u64 nr)
-+{
-+	__test_extent_overwrite(c, 32, 64,  0,  64);
-+	__test_extent_overwrite(c, 32, 64,  0, 128);
-+	__test_extent_overwrite(c, 32, 64, 32,  64);
-+	__test_extent_overwrite(c, 32, 64, 32, 128);
-+}
-+
-+/* perf tests */
-+
-+static u64 test_rand(void)
-+{
-+	u64 v;
-+#if 0
-+	v = prandom_u32();
-+#else
-+	prandom_bytes(&v, sizeof(v));
-+#endif
-+	return v;
-+}
-+
-+static void rand_insert(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	struct bkey_i_cookie k;
-+	int ret;
-+	u64 i;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for (i = 0; i < nr; i++) {
-+		bkey_cookie_init(&k.k_i);
-+		k.k.p.offset = test_rand();
-+
-+		ret = __bch2_trans_do(&trans, NULL, NULL, 0,
-+			__bch2_btree_insert(&trans, BTREE_ID_XATTRS, &k.k_i));
-+
-+		BUG_ON(ret);
-+	}
-+
-+	bch2_trans_exit(&trans);
-+}
-+
-+static void rand_lookup(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	u64 i;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for (i = 0; i < nr; i++) {
-+		iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS,
-+					   POS(0, test_rand()), 0);
-+
-+		k = bch2_btree_iter_peek(iter);
-+		bch2_trans_iter_free(&trans, iter);
-+	}
-+
-+	bch2_trans_exit(&trans);
-+}
-+
-+static void rand_mixed(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int ret;
-+	u64 i;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for (i = 0; i < nr; i++) {
-+		iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS,
-+					   POS(0, test_rand()), 0);
-+
-+		k = bch2_btree_iter_peek(iter);
-+
-+		if (!(i & 3) && k.k) {
-+			struct bkey_i_cookie k;
-+
-+			bkey_cookie_init(&k.k_i);
-+			k.k.p = iter->pos;
-+
-+			ret = __bch2_trans_do(&trans, NULL, NULL, 0,
-+				bch2_trans_update(&trans, iter, &k.k_i, 0));
-+
-+			BUG_ON(ret);
-+		}
-+
-+		bch2_trans_iter_free(&trans, iter);
-+	}
-+
-+	bch2_trans_exit(&trans);
-+}
-+
-+static int __do_delete(struct btree_trans *trans, struct bpos pos)
-+{
-+	struct btree_iter *iter;
-+	struct bkey_i delete;
-+	struct bkey_s_c k;
-+	int ret = 0;
-+
-+	iter = bch2_trans_get_iter(trans, BTREE_ID_XATTRS, pos,
-+				   BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(iter);
-+	if (ret)
-+		goto err;
-+
-+	k = bch2_btree_iter_peek(iter);
-+	ret = bkey_err(k);
-+	if (ret)
-+		goto err;
-+
-+	bkey_init(&delete.k);
-+	delete.k.p = k.k->p;
-+
-+	bch2_trans_update(trans, iter, &delete, 0);
-+err:
-+	bch2_trans_iter_put(trans, iter);
-+	return ret;
-+}
-+
-+static void rand_delete(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	int ret;
-+	u64 i;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for (i = 0; i < nr; i++) {
-+		struct bpos pos = POS(0, test_rand());
-+
-+		ret = __bch2_trans_do(&trans, NULL, NULL, 0,
-+			__do_delete(&trans, pos));
-+		BUG_ON(ret);
-+	}
-+
-+	bch2_trans_exit(&trans);
-+}
-+
-+static void seq_insert(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct bkey_i_cookie insert;
-+	int ret;
-+	u64 i = 0;
-+
-+	bkey_cookie_init(&insert.k_i);
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_XATTRS, POS_MIN,
-+			   BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
-+		insert.k.p = iter->pos;
-+
-+		ret = __bch2_trans_do(&trans, NULL, NULL, 0,
-+			bch2_trans_update(&trans, iter, &insert.k_i, 0));
-+
-+		BUG_ON(ret);
-+
-+		if (++i == nr)
-+			break;
-+	}
-+	bch2_trans_exit(&trans);
-+}
-+
-+static void seq_lookup(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_XATTRS, POS_MIN, 0, k, ret)
-+		;
-+	bch2_trans_exit(&trans);
-+}
-+
-+static void seq_overwrite(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_XATTRS, POS_MIN,
-+			   BTREE_ITER_INTENT, k, ret) {
-+		struct bkey_i_cookie u;
-+
-+		bkey_reassemble(&u.k_i, k);
-+
-+		ret = __bch2_trans_do(&trans, NULL, NULL, 0,
-+			bch2_trans_update(&trans, iter, &u.k_i, 0));
-+
-+		BUG_ON(ret);
-+	}
-+	bch2_trans_exit(&trans);
-+}
-+
-+static void seq_delete(struct bch_fs *c, u64 nr)
-+{
-+	int ret;
-+
-+	ret = bch2_btree_delete_range(c, BTREE_ID_XATTRS,
-+				      POS(0, 0), POS(0, U64_MAX),
-+				      NULL);
-+	BUG_ON(ret);
-+}
-+
-+typedef void (*perf_test_fn)(struct bch_fs *, u64);
-+
-+struct test_job {
-+	struct bch_fs			*c;
-+	u64				nr;
-+	unsigned			nr_threads;
-+	perf_test_fn			fn;
-+
-+	atomic_t			ready;
-+	wait_queue_head_t		ready_wait;
-+
-+	atomic_t			done;
-+	struct completion		done_completion;
-+
-+	u64				start;
-+	u64				finish;
-+};
-+
-+static int btree_perf_test_thread(void *data)
-+{
-+	struct test_job *j = data;
-+
-+	if (atomic_dec_and_test(&j->ready)) {
-+		wake_up(&j->ready_wait);
-+		j->start = sched_clock();
-+	} else {
-+		wait_event(j->ready_wait, !atomic_read(&j->ready));
-+	}
-+
-+	j->fn(j->c, j->nr / j->nr_threads);
-+
-+	if (atomic_dec_and_test(&j->done)) {
-+		j->finish = sched_clock();
-+		complete(&j->done_completion);
-+	}
-+
-+	return 0;
-+}
-+
-+void bch2_btree_perf_test(struct bch_fs *c, const char *testname,
-+			  u64 nr, unsigned nr_threads)
-+{
-+	struct test_job j = { .c = c, .nr = nr, .nr_threads = nr_threads };
-+	char name_buf[20], nr_buf[20], per_sec_buf[20];
-+	unsigned i;
-+	u64 time;
-+
-+	atomic_set(&j.ready, nr_threads);
-+	init_waitqueue_head(&j.ready_wait);
-+
-+	atomic_set(&j.done, nr_threads);
-+	init_completion(&j.done_completion);
-+
-+#define perf_test(_test)				\
-+	if (!strcmp(testname, #_test)) j.fn = _test
-+
-+	perf_test(rand_insert);
-+	perf_test(rand_lookup);
-+	perf_test(rand_mixed);
-+	perf_test(rand_delete);
-+
-+	perf_test(seq_insert);
-+	perf_test(seq_lookup);
-+	perf_test(seq_overwrite);
-+	perf_test(seq_delete);
-+
-+	/* a unit test, not a perf test: */
-+	perf_test(test_delete);
-+	perf_test(test_delete_written);
-+	perf_test(test_iterate);
-+	perf_test(test_iterate_extents);
-+	perf_test(test_iterate_slots);
-+	perf_test(test_iterate_slots_extents);
-+	perf_test(test_peek_end);
-+	perf_test(test_peek_end_extents);
-+
-+	perf_test(test_extent_overwrite_front);
-+	perf_test(test_extent_overwrite_back);
-+	perf_test(test_extent_overwrite_middle);
-+	perf_test(test_extent_overwrite_all);
-+
-+	if (!j.fn) {
-+		pr_err("unknown test %s", testname);
-+		return;
-+	}
-+
-+	//pr_info("running test %s:", testname);
-+
-+	if (nr_threads == 1)
-+		btree_perf_test_thread(&j);
-+	else
-+		for (i = 0; i < nr_threads; i++)
-+			kthread_run(btree_perf_test_thread, &j,
-+				    "bcachefs perf test[%u]", i);
-+
-+	while (wait_for_completion_interruptible(&j.done_completion))
-+		;
-+
-+	time = j.finish - j.start;
-+
-+	scnprintf(name_buf, sizeof(name_buf), "%s:", testname);
-+	bch2_hprint(&PBUF(nr_buf), nr);
-+	bch2_hprint(&PBUF(per_sec_buf), nr * NSEC_PER_SEC / time);
-+	printk(KERN_INFO "%-12s %s with %u threads in %5llu sec, %5llu nsec per iter, %5s per sec\n",
-+		name_buf, nr_buf, nr_threads,
-+		time / NSEC_PER_SEC,
-+		time * nr_threads / nr,
-+		per_sec_buf);
-+}
-+
-+#endif /* CONFIG_BCACHEFS_TESTS */
-diff --git a/fs/bcachefs/tests.h b/fs/bcachefs/tests.h
-new file mode 100644
-index 000000000000..551d0764225e
---- /dev/null
-+++ b/fs/bcachefs/tests.h
-@@ -0,0 +1,15 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_TEST_H
-+#define _BCACHEFS_TEST_H
-+
-+struct bch_fs;
-+
-+#ifdef CONFIG_BCACHEFS_TESTS
-+
-+void bch2_btree_perf_test(struct bch_fs *, const char *, u64, unsigned);
-+
-+#else
-+
-+#endif /* CONFIG_BCACHEFS_TESTS */
-+
-+#endif /* _BCACHEFS_TEST_H */
-diff --git a/fs/bcachefs/trace.c b/fs/bcachefs/trace.c
-new file mode 100644
-index 000000000000..59e8dfa3d245
---- /dev/null
-+++ b/fs/bcachefs/trace.c
-@@ -0,0 +1,12 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "alloc_types.h"
-+#include "buckets.h"
-+#include "btree_types.h"
-+#include "keylist.h"
-+
-+#include <linux/blktrace_api.h>
-+#include "keylist.h"
-+
-+#define CREATE_TRACE_POINTS
-+#include <trace/events/bcachefs.h>
-diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c
-new file mode 100644
-index 000000000000..fd4044a6a08f
---- /dev/null
-+++ b/fs/bcachefs/util.c
-@@ -0,0 +1,907 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * random utiility code, for bcache but in theory not specific to bcache
-+ *
-+ * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
-+ * Copyright 2012 Google, Inc.
-+ */
-+
-+#include <linux/bio.h>
-+#include <linux/blkdev.h>
-+#include <linux/ctype.h>
-+#include <linux/debugfs.h>
-+#include <linux/freezer.h>
-+#include <linux/kthread.h>
-+#include <linux/log2.h>
-+#include <linux/math64.h>
-+#include <linux/percpu.h>
-+#include <linux/preempt.h>
-+#include <linux/random.h>
-+#include <linux/seq_file.h>
-+#include <linux/string.h>
-+#include <linux/types.h>
-+#include <linux/sched/clock.h>
-+
-+#include "eytzinger.h"
-+#include "util.h"
-+
-+static const char si_units[] = "?kMGTPEZY";
-+
-+static int __bch2_strtoh(const char *cp, u64 *res,
-+			 u64 t_max, bool t_signed)
-+{
-+	bool positive = *cp != '-';
-+	unsigned u;
-+	u64 v = 0;
-+
-+	if (*cp == '+' || *cp == '-')
-+		cp++;
-+
-+	if (!isdigit(*cp))
-+		return -EINVAL;
-+
-+	do {
-+		if (v > U64_MAX / 10)
-+			return -ERANGE;
-+		v *= 10;
-+		if (v > U64_MAX - (*cp - '0'))
-+			return -ERANGE;
-+		v += *cp - '0';
-+		cp++;
-+	} while (isdigit(*cp));
-+
-+	for (u = 1; u < strlen(si_units); u++)
-+		if (*cp == si_units[u]) {
-+			cp++;
-+			goto got_unit;
-+		}
-+	u = 0;
-+got_unit:
-+	if (*cp == '\n')
-+		cp++;
-+	if (*cp)
-+		return -EINVAL;
-+
-+	if (fls64(v) + u * 10 > 64)
-+		return -ERANGE;
-+
-+	v <<= u * 10;
-+
-+	if (positive) {
-+		if (v > t_max)
-+			return -ERANGE;
-+	} else {
-+		if (v && !t_signed)
-+			return -ERANGE;
-+
-+		if (v > t_max + 1)
-+			return -ERANGE;
-+		v = -v;
-+	}
-+
-+	*res = v;
-+	return 0;
-+}
-+
-+#define STRTO_H(name, type)					\
-+int bch2_ ## name ## _h(const char *cp, type *res)		\
-+{								\
-+	u64 v;							\
-+	int ret = __bch2_strtoh(cp, &v, ANYSINT_MAX(type),	\
-+			ANYSINT_MAX(type) != ((type) ~0ULL));	\
-+	*res = v;						\
-+	return ret;						\
-+}
-+
-+STRTO_H(strtoint, int)
-+STRTO_H(strtouint, unsigned int)
-+STRTO_H(strtoll, long long)
-+STRTO_H(strtoull, unsigned long long)
-+STRTO_H(strtou64, u64)
-+
-+void bch2_hprint(struct printbuf *buf, s64 v)
-+{
-+	int u, t = 0;
-+
-+	for (u = 0; v >= 1024 || v <= -1024; u++) {
-+		t = v & ~(~0U << 10);
-+		v >>= 10;
-+	}
-+
-+	pr_buf(buf, "%lli", v);
-+
-+	/*
-+	 * 103 is magic: t is in the range [-1023, 1023] and we want
-+	 * to turn it into [-9, 9]
-+	 */
-+	if (u && v < 100 && v > -100)
-+		pr_buf(buf, ".%i", t / 103);
-+	if (u)
-+		pr_buf(buf, "%c", si_units[u]);
-+}
-+
-+void bch2_string_opt_to_text(struct printbuf *out,
-+			     const char * const list[],
-+			     size_t selected)
-+{
-+	size_t i;
-+
-+	for (i = 0; list[i]; i++)
-+		pr_buf(out, i == selected ? "[%s] " : "%s ", list[i]);
-+}
-+
-+void bch2_flags_to_text(struct printbuf *out,
-+			const char * const list[], u64 flags)
-+{
-+	unsigned bit, nr = 0;
-+	bool first = true;
-+
-+	if (out->pos != out->end)
-+		*out->pos = '\0';
-+
-+	while (list[nr])
-+		nr++;
-+
-+	while (flags && (bit = __ffs(flags)) < nr) {
-+		if (!first)
-+			pr_buf(out, ",");
-+		first = false;
-+		pr_buf(out, "%s", list[bit]);
-+		flags ^= 1 << bit;
-+	}
-+}
-+
-+u64 bch2_read_flag_list(char *opt, const char * const list[])
-+{
-+	u64 ret = 0;
-+	char *p, *s, *d = kstrndup(opt, PAGE_SIZE - 1, GFP_KERNEL);
-+
-+	if (!d)
-+		return -ENOMEM;
-+
-+	s = strim(d);
-+
-+	while ((p = strsep(&s, ","))) {
-+		int flag = match_string(list, -1, p);
-+		if (flag < 0) {
-+			ret = -1;
-+			break;
-+		}
-+
-+		ret |= 1 << flag;
-+	}
-+
-+	kfree(d);
-+
-+	return ret;
-+}
-+
-+bool bch2_is_zero(const void *_p, size_t n)
-+{
-+	const char *p = _p;
-+	size_t i;
-+
-+	for (i = 0; i < n; i++)
-+		if (p[i])
-+			return false;
-+	return true;
-+}
-+
-+static void bch2_quantiles_update(struct quantiles *q, u64 v)
-+{
-+	unsigned i = 0;
-+
-+	while (i < ARRAY_SIZE(q->entries)) {
-+		struct quantile_entry *e = q->entries + i;
-+
-+		if (unlikely(!e->step)) {
-+			e->m = v;
-+			e->step = max_t(unsigned, v / 2, 1024);
-+		} else if (e->m > v) {
-+			e->m = e->m >= e->step
-+				? e->m - e->step
-+				: 0;
-+		} else if (e->m < v) {
-+			e->m = e->m + e->step > e->m
-+				? e->m + e->step
-+				: U32_MAX;
-+		}
-+
-+		if ((e->m > v ? e->m - v : v - e->m) < e->step)
-+			e->step = max_t(unsigned, e->step / 2, 1);
-+
-+		if (v >= e->m)
-+			break;
-+
-+		i = eytzinger0_child(i, v > e->m);
-+	}
-+}
-+
-+/* time stats: */
-+
-+static void bch2_time_stats_update_one(struct time_stats *stats,
-+				       u64 start, u64 end)
-+{
-+	u64 duration, freq;
-+
-+	duration	= time_after64(end, start)
-+		? end - start : 0;
-+	freq		= time_after64(end, stats->last_event)
-+		? end - stats->last_event : 0;
-+
-+	stats->count++;
-+
-+	stats->average_duration = stats->average_duration
-+		? ewma_add(stats->average_duration, duration, 6)
-+		: duration;
-+
-+	stats->average_frequency = stats->average_frequency
-+		? ewma_add(stats->average_frequency, freq, 6)
-+		: freq;
-+
-+	stats->max_duration = max(stats->max_duration, duration);
-+
-+	stats->last_event = end;
-+
-+	bch2_quantiles_update(&stats->quantiles, duration);
-+}
-+
-+void __bch2_time_stats_update(struct time_stats *stats, u64 start, u64 end)
-+{
-+	unsigned long flags;
-+
-+	if (!stats->buffer) {
-+		spin_lock_irqsave(&stats->lock, flags);
-+		bch2_time_stats_update_one(stats, start, end);
-+
-+		if (stats->average_frequency < 32 &&
-+		    stats->count > 1024)
-+			stats->buffer =
-+				alloc_percpu_gfp(struct time_stat_buffer,
-+						 GFP_ATOMIC);
-+		spin_unlock_irqrestore(&stats->lock, flags);
-+	} else {
-+		struct time_stat_buffer_entry *i;
-+		struct time_stat_buffer *b;
-+
-+		preempt_disable();
-+		b = this_cpu_ptr(stats->buffer);
-+
-+		BUG_ON(b->nr >= ARRAY_SIZE(b->entries));
-+		b->entries[b->nr++] = (struct time_stat_buffer_entry) {
-+			.start = start,
-+			.end = end
-+		};
-+
-+		if (b->nr == ARRAY_SIZE(b->entries)) {
-+			spin_lock_irqsave(&stats->lock, flags);
-+			for (i = b->entries;
-+			     i < b->entries + ARRAY_SIZE(b->entries);
-+			     i++)
-+				bch2_time_stats_update_one(stats, i->start, i->end);
-+			spin_unlock_irqrestore(&stats->lock, flags);
-+
-+			b->nr = 0;
-+		}
-+
-+		preempt_enable();
-+	}
-+}
-+
-+static const struct time_unit {
-+	const char	*name;
-+	u32		nsecs;
-+} time_units[] = {
-+	{ "ns",		1		},
-+	{ "us",		NSEC_PER_USEC	},
-+	{ "ms",		NSEC_PER_MSEC	},
-+	{ "sec",	NSEC_PER_SEC	},
-+};
-+
-+static const struct time_unit *pick_time_units(u64 ns)
-+{
-+	const struct time_unit *u;
-+
-+	for (u = time_units;
-+	     u + 1 < time_units + ARRAY_SIZE(time_units) &&
-+	     ns >= u[1].nsecs << 1;
-+	     u++)
-+		;
-+
-+	return u;
-+}
-+
-+static void pr_time_units(struct printbuf *out, u64 ns)
-+{
-+	const struct time_unit *u = pick_time_units(ns);
-+
-+	pr_buf(out, "%llu %s", div_u64(ns, u->nsecs), u->name);
-+}
-+
-+void bch2_time_stats_to_text(struct printbuf *out, struct time_stats *stats)
-+{
-+	const struct time_unit *u;
-+	u64 freq = READ_ONCE(stats->average_frequency);
-+	u64 q, last_q = 0;
-+	int i;
-+
-+	pr_buf(out, "count:\t\t%llu\n",
-+			 stats->count);
-+	pr_buf(out, "rate:\t\t%llu/sec\n",
-+	       freq ?  div64_u64(NSEC_PER_SEC, freq) : 0);
-+
-+	pr_buf(out, "frequency:\t");
-+	pr_time_units(out, freq);
-+
-+	pr_buf(out, "\navg duration:\t");
-+	pr_time_units(out, stats->average_duration);
-+
-+	pr_buf(out, "\nmax duration:\t");
-+	pr_time_units(out, stats->max_duration);
-+
-+	i = eytzinger0_first(NR_QUANTILES);
-+	u = pick_time_units(stats->quantiles.entries[i].m);
-+
-+	pr_buf(out, "\nquantiles (%s):\t", u->name);
-+	eytzinger0_for_each(i, NR_QUANTILES) {
-+		bool is_last = eytzinger0_next(i, NR_QUANTILES) == -1;
-+
-+		q = max(stats->quantiles.entries[i].m, last_q);
-+		pr_buf(out, "%llu%s",
-+		       div_u64(q, u->nsecs),
-+		       is_last ? "\n" : " ");
-+		last_q = q;
-+	}
-+}
-+
-+void bch2_time_stats_exit(struct time_stats *stats)
-+{
-+	free_percpu(stats->buffer);
-+}
-+
-+void bch2_time_stats_init(struct time_stats *stats)
-+{
-+	memset(stats, 0, sizeof(*stats));
-+	spin_lock_init(&stats->lock);
-+}
-+
-+/* ratelimit: */
-+
-+/**
-+ * bch2_ratelimit_delay() - return how long to delay until the next time to do
-+ * some work
-+ *
-+ * @d - the struct bch_ratelimit to update
-+ *
-+ * Returns the amount of time to delay by, in jiffies
-+ */
-+u64 bch2_ratelimit_delay(struct bch_ratelimit *d)
-+{
-+	u64 now = local_clock();
-+
-+	return time_after64(d->next, now)
-+		? nsecs_to_jiffies(d->next - now)
-+		: 0;
-+}
-+
-+/**
-+ * bch2_ratelimit_increment() - increment @d by the amount of work done
-+ *
-+ * @d - the struct bch_ratelimit to update
-+ * @done - the amount of work done, in arbitrary units
-+ */
-+void bch2_ratelimit_increment(struct bch_ratelimit *d, u64 done)
-+{
-+	u64 now = local_clock();
-+
-+	d->next += div_u64(done * NSEC_PER_SEC, d->rate);
-+
-+	if (time_before64(now + NSEC_PER_SEC, d->next))
-+		d->next = now + NSEC_PER_SEC;
-+
-+	if (time_after64(now - NSEC_PER_SEC * 2, d->next))
-+		d->next = now - NSEC_PER_SEC * 2;
-+}
-+
-+/* pd controller: */
-+
-+/*
-+ * Updates pd_controller. Attempts to scale inputed values to units per second.
-+ * @target: desired value
-+ * @actual: current value
-+ *
-+ * @sign: 1 or -1; 1 if increasing the rate makes actual go up, -1 if increasing
-+ * it makes actual go down.
-+ */
-+void bch2_pd_controller_update(struct bch_pd_controller *pd,
-+			      s64 target, s64 actual, int sign)
-+{
-+	s64 proportional, derivative, change;
-+
-+	unsigned long seconds_since_update = (jiffies - pd->last_update) / HZ;
-+
-+	if (seconds_since_update == 0)
-+		return;
-+
-+	pd->last_update = jiffies;
-+
-+	proportional = actual - target;
-+	proportional *= seconds_since_update;
-+	proportional = div_s64(proportional, pd->p_term_inverse);
-+
-+	derivative = actual - pd->last_actual;
-+	derivative = div_s64(derivative, seconds_since_update);
-+	derivative = ewma_add(pd->smoothed_derivative, derivative,
-+			      (pd->d_term / seconds_since_update) ?: 1);
-+	derivative = derivative * pd->d_term;
-+	derivative = div_s64(derivative, pd->p_term_inverse);
-+
-+	change = proportional + derivative;
-+
-+	/* Don't increase rate if not keeping up */
-+	if (change > 0 &&
-+	    pd->backpressure &&
-+	    time_after64(local_clock(),
-+			 pd->rate.next + NSEC_PER_MSEC))
-+		change = 0;
-+
-+	change *= (sign * -1);
-+
-+	pd->rate.rate = clamp_t(s64, (s64) pd->rate.rate + change,
-+				1, UINT_MAX);
-+
-+	pd->last_actual		= actual;
-+	pd->last_derivative	= derivative;
-+	pd->last_proportional	= proportional;
-+	pd->last_change		= change;
-+	pd->last_target		= target;
-+}
-+
-+void bch2_pd_controller_init(struct bch_pd_controller *pd)
-+{
-+	pd->rate.rate		= 1024;
-+	pd->last_update		= jiffies;
-+	pd->p_term_inverse	= 6000;
-+	pd->d_term		= 30;
-+	pd->d_smooth		= pd->d_term;
-+	pd->backpressure	= 1;
-+}
-+
-+size_t bch2_pd_controller_print_debug(struct bch_pd_controller *pd, char *buf)
-+{
-+	/* 2^64 - 1 is 20 digits, plus null byte */
-+	char rate[21];
-+	char actual[21];
-+	char target[21];
-+	char proportional[21];
-+	char derivative[21];
-+	char change[21];
-+	s64 next_io;
-+
-+	bch2_hprint(&PBUF(rate),	pd->rate.rate);
-+	bch2_hprint(&PBUF(actual),	pd->last_actual);
-+	bch2_hprint(&PBUF(target),	pd->last_target);
-+	bch2_hprint(&PBUF(proportional), pd->last_proportional);
-+	bch2_hprint(&PBUF(derivative),	pd->last_derivative);
-+	bch2_hprint(&PBUF(change),	pd->last_change);
-+
-+	next_io = div64_s64(pd->rate.next - local_clock(), NSEC_PER_MSEC);
-+
-+	return sprintf(buf,
-+		       "rate:\t\t%s/sec\n"
-+		       "target:\t\t%s\n"
-+		       "actual:\t\t%s\n"
-+		       "proportional:\t%s\n"
-+		       "derivative:\t%s\n"
-+		       "change:\t\t%s/sec\n"
-+		       "next io:\t%llims\n",
-+		       rate, target, actual, proportional,
-+		       derivative, change, next_io);
-+}
-+
-+/* misc: */
-+
-+void bch2_bio_map(struct bio *bio, void *base, size_t size)
-+{
-+	while (size) {
-+		struct page *page = is_vmalloc_addr(base)
-+				? vmalloc_to_page(base)
-+				: virt_to_page(base);
-+		unsigned offset = offset_in_page(base);
-+		unsigned len = min_t(size_t, PAGE_SIZE - offset, size);
-+
-+		BUG_ON(!bio_add_page(bio, page, len, offset));
-+		size -= len;
-+		base += len;
-+	}
-+}
-+
-+int bch2_bio_alloc_pages(struct bio *bio, size_t size, gfp_t gfp_mask)
-+{
-+	while (size) {
-+		struct page *page = alloc_page(gfp_mask);
-+		unsigned len = min(PAGE_SIZE, size);
-+
-+		if (!page)
-+			return -ENOMEM;
-+
-+		BUG_ON(!bio_add_page(bio, page, len, 0));
-+		size -= len;
-+	}
-+
-+	return 0;
-+}
-+
-+size_t bch2_rand_range(size_t max)
-+{
-+	size_t rand;
-+
-+	if (!max)
-+		return 0;
-+
-+	do {
-+		rand = get_random_long();
-+		rand &= roundup_pow_of_two(max) - 1;
-+	} while (rand >= max);
-+
-+	return rand;
-+}
-+
-+void memcpy_to_bio(struct bio *dst, struct bvec_iter dst_iter, const void *src)
-+{
-+	struct bio_vec bv;
-+	struct bvec_iter iter;
-+
-+	__bio_for_each_segment(bv, dst, iter, dst_iter) {
-+		void *dstp = kmap_atomic(bv.bv_page);
-+		memcpy(dstp + bv.bv_offset, src, bv.bv_len);
-+		kunmap_atomic(dstp);
-+
-+		src += bv.bv_len;
-+	}
-+}
-+
-+void memcpy_from_bio(void *dst, struct bio *src, struct bvec_iter src_iter)
-+{
-+	struct bio_vec bv;
-+	struct bvec_iter iter;
-+
-+	__bio_for_each_segment(bv, src, iter, src_iter) {
-+		void *srcp = kmap_atomic(bv.bv_page);
-+		memcpy(dst, srcp + bv.bv_offset, bv.bv_len);
-+		kunmap_atomic(srcp);
-+
-+		dst += bv.bv_len;
-+	}
-+}
-+
-+void bch_scnmemcpy(struct printbuf *out,
-+		   const char *src, size_t len)
-+{
-+	size_t n = printbuf_remaining(out);
-+
-+	if (n) {
-+		n = min(n - 1, len);
-+		memcpy(out->pos, src, n);
-+		out->pos += n;
-+		*out->pos = '\0';
-+	}
-+}
-+
-+#include "eytzinger.h"
-+
-+static int alignment_ok(const void *base, size_t align)
-+{
-+	return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
-+		((unsigned long)base & (align - 1)) == 0;
-+}
-+
-+static void u32_swap(void *a, void *b, size_t size)
-+{
-+	u32 t = *(u32 *)a;
-+	*(u32 *)a = *(u32 *)b;
-+	*(u32 *)b = t;
-+}
-+
-+static void u64_swap(void *a, void *b, size_t size)
-+{
-+	u64 t = *(u64 *)a;
-+	*(u64 *)a = *(u64 *)b;
-+	*(u64 *)b = t;
-+}
-+
-+static void generic_swap(void *a, void *b, size_t size)
-+{
-+	char t;
-+
-+	do {
-+		t = *(char *)a;
-+		*(char *)a++ = *(char *)b;
-+		*(char *)b++ = t;
-+	} while (--size > 0);
-+}
-+
-+static inline int do_cmp(void *base, size_t n, size_t size,
-+			 int (*cmp_func)(const void *, const void *, size_t),
-+			 size_t l, size_t r)
-+{
-+	return cmp_func(base + inorder_to_eytzinger0(l, n) * size,
-+			base + inorder_to_eytzinger0(r, n) * size,
-+			size);
-+}
-+
-+static inline void do_swap(void *base, size_t n, size_t size,
-+			   void (*swap_func)(void *, void *, size_t),
-+			   size_t l, size_t r)
-+{
-+	swap_func(base + inorder_to_eytzinger0(l, n) * size,
-+		  base + inorder_to_eytzinger0(r, n) * size,
-+		  size);
-+}
-+
-+void eytzinger0_sort(void *base, size_t n, size_t size,
-+		     int (*cmp_func)(const void *, const void *, size_t),
-+		     void (*swap_func)(void *, void *, size_t))
-+{
-+	int i, c, r;
-+
-+	if (!swap_func) {
-+		if (size == 4 && alignment_ok(base, 4))
-+			swap_func = u32_swap;
-+		else if (size == 8 && alignment_ok(base, 8))
-+			swap_func = u64_swap;
-+		else
-+			swap_func = generic_swap;
-+	}
-+
-+	/* heapify */
-+	for (i = n / 2 - 1; i >= 0; --i) {
-+		for (r = i; r * 2 + 1 < n; r = c) {
-+			c = r * 2 + 1;
-+
-+			if (c + 1 < n &&
-+			    do_cmp(base, n, size, cmp_func, c, c + 1) < 0)
-+				c++;
-+
-+			if (do_cmp(base, n, size, cmp_func, r, c) >= 0)
-+				break;
-+
-+			do_swap(base, n, size, swap_func, r, c);
-+		}
-+	}
-+
-+	/* sort */
-+	for (i = n - 1; i > 0; --i) {
-+		do_swap(base, n, size, swap_func, 0, i);
-+
-+		for (r = 0; r * 2 + 1 < i; r = c) {
-+			c = r * 2 + 1;
-+
-+			if (c + 1 < i &&
-+			    do_cmp(base, n, size, cmp_func, c, c + 1) < 0)
-+				c++;
-+
-+			if (do_cmp(base, n, size, cmp_func, r, c) >= 0)
-+				break;
-+
-+			do_swap(base, n, size, swap_func, r, c);
-+		}
-+	}
-+}
-+
-+void sort_cmp_size(void *base, size_t num, size_t size,
-+	  int (*cmp_func)(const void *, const void *, size_t),
-+	  void (*swap_func)(void *, void *, size_t size))
-+{
-+	/* pre-scale counters for performance */
-+	int i = (num/2 - 1) * size, n = num * size, c, r;
-+
-+	if (!swap_func) {
-+		if (size == 4 && alignment_ok(base, 4))
-+			swap_func = u32_swap;
-+		else if (size == 8 && alignment_ok(base, 8))
-+			swap_func = u64_swap;
-+		else
-+			swap_func = generic_swap;
-+	}
-+
-+	/* heapify */
-+	for ( ; i >= 0; i -= size) {
-+		for (r = i; r * 2 + size < n; r  = c) {
-+			c = r * 2 + size;
-+			if (c < n - size &&
-+			    cmp_func(base + c, base + c + size, size) < 0)
-+				c += size;
-+			if (cmp_func(base + r, base + c, size) >= 0)
-+				break;
-+			swap_func(base + r, base + c, size);
-+		}
-+	}
-+
-+	/* sort */
-+	for (i = n - size; i > 0; i -= size) {
-+		swap_func(base, base + i, size);
-+		for (r = 0; r * 2 + size < i; r = c) {
-+			c = r * 2 + size;
-+			if (c < i - size &&
-+			    cmp_func(base + c, base + c + size, size) < 0)
-+				c += size;
-+			if (cmp_func(base + r, base + c, size) >= 0)
-+				break;
-+			swap_func(base + r, base + c, size);
-+		}
-+	}
-+}
-+
-+static void mempool_free_vp(void *element, void *pool_data)
-+{
-+	size_t size = (size_t) pool_data;
-+
-+	vpfree(element, size);
-+}
-+
-+static void *mempool_alloc_vp(gfp_t gfp_mask, void *pool_data)
-+{
-+	size_t size = (size_t) pool_data;
-+
-+	return vpmalloc(size, gfp_mask);
-+}
-+
-+int mempool_init_kvpmalloc_pool(mempool_t *pool, int min_nr, size_t size)
-+{
-+	return size < PAGE_SIZE
-+		? mempool_init_kmalloc_pool(pool, min_nr, size)
-+		: mempool_init(pool, min_nr, mempool_alloc_vp,
-+			       mempool_free_vp, (void *) size);
-+}
-+
-+#if 0
-+void eytzinger1_test(void)
-+{
-+	unsigned inorder, eytz, size;
-+
-+	pr_info("1 based eytzinger test:");
-+
-+	for (size = 2;
-+	     size < 65536;
-+	     size++) {
-+		unsigned extra = eytzinger1_extra(size);
-+
-+		if (!(size % 4096))
-+			pr_info("tree size %u", size);
-+
-+		BUG_ON(eytzinger1_prev(0, size) != eytzinger1_last(size));
-+		BUG_ON(eytzinger1_next(0, size) != eytzinger1_first(size));
-+
-+		BUG_ON(eytzinger1_prev(eytzinger1_first(size), size)	!= 0);
-+		BUG_ON(eytzinger1_next(eytzinger1_last(size), size)	!= 0);
-+
-+		inorder = 1;
-+		eytzinger1_for_each(eytz, size) {
-+			BUG_ON(__inorder_to_eytzinger1(inorder, size, extra) != eytz);
-+			BUG_ON(__eytzinger1_to_inorder(eytz, size, extra) != inorder);
-+			BUG_ON(eytz != eytzinger1_last(size) &&
-+			       eytzinger1_prev(eytzinger1_next(eytz, size), size) != eytz);
-+
-+			inorder++;
-+		}
-+	}
-+}
-+
-+void eytzinger0_test(void)
-+{
-+
-+	unsigned inorder, eytz, size;
-+
-+	pr_info("0 based eytzinger test:");
-+
-+	for (size = 1;
-+	     size < 65536;
-+	     size++) {
-+		unsigned extra = eytzinger0_extra(size);
-+
-+		if (!(size % 4096))
-+			pr_info("tree size %u", size);
-+
-+		BUG_ON(eytzinger0_prev(-1, size) != eytzinger0_last(size));
-+		BUG_ON(eytzinger0_next(-1, size) != eytzinger0_first(size));
-+
-+		BUG_ON(eytzinger0_prev(eytzinger0_first(size), size)	!= -1);
-+		BUG_ON(eytzinger0_next(eytzinger0_last(size), size)	!= -1);
-+
-+		inorder = 0;
-+		eytzinger0_for_each(eytz, size) {
-+			BUG_ON(__inorder_to_eytzinger0(inorder, size, extra) != eytz);
-+			BUG_ON(__eytzinger0_to_inorder(eytz, size, extra) != inorder);
-+			BUG_ON(eytz != eytzinger0_last(size) &&
-+			       eytzinger0_prev(eytzinger0_next(eytz, size), size) != eytz);
-+
-+			inorder++;
-+		}
-+	}
-+}
-+
-+static inline int cmp_u16(const void *_l, const void *_r, size_t size)
-+{
-+	const u16 *l = _l, *r = _r;
-+
-+	return (*l > *r) - (*r - *l);
-+}
-+
-+static void eytzinger0_find_test_val(u16 *test_array, unsigned nr, u16 search)
-+{
-+	int i, c1 = -1, c2 = -1;
-+	ssize_t r;
-+
-+	r = eytzinger0_find_le(test_array, nr,
-+			       sizeof(test_array[0]),
-+			       cmp_u16, &search);
-+	if (r >= 0)
-+		c1 = test_array[r];
-+
-+	for (i = 0; i < nr; i++)
-+		if (test_array[i] <= search && test_array[i] > c2)
-+			c2 = test_array[i];
-+
-+	if (c1 != c2) {
-+		eytzinger0_for_each(i, nr)
-+			pr_info("[%3u] = %12u", i, test_array[i]);
-+		pr_info("find_le(%2u) -> [%2zi] = %2i should be %2i",
-+			i, r, c1, c2);
-+	}
-+}
-+
-+void eytzinger0_find_test(void)
-+{
-+	unsigned i, nr, allocated = 1 << 12;
-+	u16 *test_array = kmalloc_array(allocated, sizeof(test_array[0]), GFP_KERNEL);
-+
-+	for (nr = 1; nr < allocated; nr++) {
-+		pr_info("testing %u elems", nr);
-+
-+		get_random_bytes(test_array, nr * sizeof(test_array[0]));
-+		eytzinger0_sort(test_array, nr, sizeof(test_array[0]), cmp_u16, NULL);
-+
-+		/* verify array is sorted correctly: */
-+		eytzinger0_for_each(i, nr)
-+			BUG_ON(i != eytzinger0_last(nr) &&
-+			       test_array[i] > test_array[eytzinger0_next(i, nr)]);
-+
-+		for (i = 0; i < U16_MAX; i += 1 << 12)
-+			eytzinger0_find_test_val(test_array, nr, i);
-+
-+		for (i = 0; i < nr; i++) {
-+			eytzinger0_find_test_val(test_array, nr, test_array[i] - 1);
-+			eytzinger0_find_test_val(test_array, nr, test_array[i]);
-+			eytzinger0_find_test_val(test_array, nr, test_array[i] + 1);
-+		}
-+	}
-+
-+	kfree(test_array);
-+}
-+#endif
-+
-+/*
-+ * Accumulate percpu counters onto one cpu's copy - only valid when access
-+ * against any percpu counter is guarded against
-+ */
-+u64 *bch2_acc_percpu_u64s(u64 __percpu *p, unsigned nr)
-+{
-+	u64 *ret;
-+	int cpu;
-+
-+	preempt_disable();
-+	ret = this_cpu_ptr(p);
-+	preempt_enable();
-+
-+	for_each_possible_cpu(cpu) {
-+		u64 *i = per_cpu_ptr(p, cpu);
-+
-+		if (i != ret) {
-+			acc_u64s(ret, i, nr);
-+			memset(i, 0, nr * sizeof(u64));
-+		}
-+	}
-+
-+	return ret;
-+}
-diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
-new file mode 100644
-index 000000000000..f48c6380684f
---- /dev/null
-+++ b/fs/bcachefs/util.h
-@@ -0,0 +1,761 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_UTIL_H
-+#define _BCACHEFS_UTIL_H
-+
-+#include <linux/bio.h>
-+#include <linux/blkdev.h>
-+#include <linux/closure.h>
-+#include <linux/errno.h>
-+#include <linux/freezer.h>
-+#include <linux/kernel.h>
-+#include <linux/sched/clock.h>
-+#include <linux/llist.h>
-+#include <linux/log2.h>
-+#include <linux/percpu.h>
-+#include <linux/preempt.h>
-+#include <linux/ratelimit.h>
-+#include <linux/slab.h>
-+#include <linux/vmalloc.h>
-+#include <linux/workqueue.h>
-+
-+#define PAGE_SECTOR_SHIFT	(PAGE_SHIFT - 9)
-+#define PAGE_SECTORS		(1UL << PAGE_SECTOR_SHIFT)
-+
-+struct closure;
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+
-+#define EBUG_ON(cond)		BUG_ON(cond)
-+#define atomic_dec_bug(v)	BUG_ON(atomic_dec_return(v) < 0)
-+#define atomic_inc_bug(v, i)	BUG_ON(atomic_inc_return(v) <= i)
-+#define atomic_sub_bug(i, v)	BUG_ON(atomic_sub_return(i, v) < 0)
-+#define atomic_add_bug(i, v)	BUG_ON(atomic_add_return(i, v) < 0)
-+#define atomic_long_dec_bug(v)		BUG_ON(atomic_long_dec_return(v) < 0)
-+#define atomic_long_sub_bug(i, v)	BUG_ON(atomic_long_sub_return(i, v) < 0)
-+#define atomic64_dec_bug(v)	BUG_ON(atomic64_dec_return(v) < 0)
-+#define atomic64_inc_bug(v, i)	BUG_ON(atomic64_inc_return(v) <= i)
-+#define atomic64_sub_bug(i, v)	BUG_ON(atomic64_sub_return(i, v) < 0)
-+#define atomic64_add_bug(i, v)	BUG_ON(atomic64_add_return(i, v) < 0)
-+
-+#define memcpy(dst, src, len)						\
-+({									\
-+	void *_dst = (dst);						\
-+	const void *_src = (src);					\
-+	size_t _len = (len);						\
-+									\
-+	BUG_ON(!((void *) (_dst) >= (void *) (_src) + (_len) ||		\
-+		 (void *) (_dst) + (_len) <= (void *) (_src)));		\
-+	memcpy(_dst, _src, _len);					\
-+})
-+
-+#else /* DEBUG */
-+
-+#define EBUG_ON(cond)
-+#define atomic_dec_bug(v)	atomic_dec(v)
-+#define atomic_inc_bug(v, i)	atomic_inc(v)
-+#define atomic_sub_bug(i, v)	atomic_sub(i, v)
-+#define atomic_add_bug(i, v)	atomic_add(i, v)
-+#define atomic_long_dec_bug(v)		atomic_long_dec(v)
-+#define atomic_long_sub_bug(i, v)	atomic_long_sub(i, v)
-+#define atomic64_dec_bug(v)	atomic64_dec(v)
-+#define atomic64_inc_bug(v, i)	atomic64_inc(v)
-+#define atomic64_sub_bug(i, v)	atomic64_sub(i, v)
-+#define atomic64_add_bug(i, v)	atomic64_add(i, v)
-+
-+#endif
-+
-+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-+#define CPU_BIG_ENDIAN		0
-+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-+#define CPU_BIG_ENDIAN		1
-+#endif
-+
-+/* type hackery */
-+
-+#define type_is_exact(_val, _type)					\
-+	__builtin_types_compatible_p(typeof(_val), _type)
-+
-+#define type_is(_val, _type)						\
-+	(__builtin_types_compatible_p(typeof(_val), _type) ||		\
-+	 __builtin_types_compatible_p(typeof(_val), const _type))
-+
-+/* Userspace doesn't align allocations as nicely as the kernel allocators: */
-+static inline size_t buf_pages(void *p, size_t len)
-+{
-+	return DIV_ROUND_UP(len +
-+			    ((unsigned long) p & (PAGE_SIZE - 1)),
-+			    PAGE_SIZE);
-+}
-+
-+static inline void vpfree(void *p, size_t size)
-+{
-+	if (is_vmalloc_addr(p))
-+		vfree(p);
-+	else
-+		free_pages((unsigned long) p, get_order(size));
-+}
-+
-+static inline void *vpmalloc(size_t size, gfp_t gfp_mask)
-+{
-+	return (void *) __get_free_pages(gfp_mask|__GFP_NOWARN,
-+					 get_order(size)) ?:
-+		__vmalloc(size, gfp_mask);
-+}
-+
-+static inline void kvpfree(void *p, size_t size)
-+{
-+	if (size < PAGE_SIZE)
-+		kfree(p);
-+	else
-+		vpfree(p, size);
-+}
-+
-+static inline void *kvpmalloc(size_t size, gfp_t gfp_mask)
-+{
-+	return size < PAGE_SIZE
-+		? kmalloc(size, gfp_mask)
-+		: vpmalloc(size, gfp_mask);
-+}
-+
-+int mempool_init_kvpmalloc_pool(mempool_t *, int, size_t);
-+
-+#define HEAP(type)							\
-+struct {								\
-+	size_t size, used;						\
-+	type *data;							\
-+}
-+
-+#define DECLARE_HEAP(type, name) HEAP(type) name
-+
-+#define init_heap(heap, _size, gfp)					\
-+({									\
-+	(heap)->used = 0;						\
-+	(heap)->size = (_size);						\
-+	(heap)->data = kvpmalloc((heap)->size * sizeof((heap)->data[0]),\
-+				 (gfp));				\
-+})
-+
-+#define free_heap(heap)							\
-+do {									\
-+	kvpfree((heap)->data, (heap)->size * sizeof((heap)->data[0]));	\
-+	(heap)->data = NULL;						\
-+} while (0)
-+
-+#define heap_set_backpointer(h, i, _fn)					\
-+do {									\
-+	void (*fn)(typeof(h), size_t) = _fn;				\
-+	if (fn)								\
-+		fn(h, i);						\
-+} while (0)
-+
-+#define heap_swap(h, i, j, set_backpointer)				\
-+do {									\
-+	swap((h)->data[i], (h)->data[j]);				\
-+	heap_set_backpointer(h, i, set_backpointer);			\
-+	heap_set_backpointer(h, j, set_backpointer);			\
-+} while (0)
-+
-+#define heap_peek(h)							\
-+({									\
-+	EBUG_ON(!(h)->used);						\
-+	(h)->data[0];							\
-+})
-+
-+#define heap_full(h)	((h)->used == (h)->size)
-+
-+#define heap_sift_down(h, i, cmp, set_backpointer)			\
-+do {									\
-+	size_t _c, _j = i;						\
-+									\
-+	for (; _j * 2 + 1 < (h)->used; _j = _c) {			\
-+		_c = _j * 2 + 1;					\
-+		if (_c + 1 < (h)->used &&				\
-+		    cmp(h, (h)->data[_c], (h)->data[_c + 1]) >= 0)	\
-+			_c++;						\
-+									\
-+		if (cmp(h, (h)->data[_c], (h)->data[_j]) >= 0)		\
-+			break;						\
-+		heap_swap(h, _c, _j, set_backpointer);			\
-+	}								\
-+} while (0)
-+
-+#define heap_sift_up(h, i, cmp, set_backpointer)			\
-+do {									\
-+	while (i) {							\
-+		size_t p = (i - 1) / 2;					\
-+		if (cmp(h, (h)->data[i], (h)->data[p]) >= 0)		\
-+			break;						\
-+		heap_swap(h, i, p, set_backpointer);			\
-+		i = p;							\
-+	}								\
-+} while (0)
-+
-+#define __heap_add(h, d, cmp, set_backpointer)				\
-+({									\
-+	size_t _i = (h)->used++;					\
-+	(h)->data[_i] = d;						\
-+	heap_set_backpointer(h, _i, set_backpointer);			\
-+									\
-+	heap_sift_up(h, _i, cmp, set_backpointer);			\
-+	_i;								\
-+})
-+
-+#define heap_add(h, d, cmp, set_backpointer)				\
-+({									\
-+	bool _r = !heap_full(h);					\
-+	if (_r)								\
-+		__heap_add(h, d, cmp, set_backpointer);			\
-+	_r;								\
-+})
-+
-+#define heap_add_or_replace(h, new, cmp, set_backpointer)		\
-+do {									\
-+	if (!heap_add(h, new, cmp, set_backpointer) &&			\
-+	    cmp(h, new, heap_peek(h)) >= 0) {				\
-+		(h)->data[0] = new;					\
-+		heap_set_backpointer(h, 0, set_backpointer);		\
-+		heap_sift_down(h, 0, cmp, set_backpointer);		\
-+	}								\
-+} while (0)
-+
-+#define heap_del(h, i, cmp, set_backpointer)				\
-+do {									\
-+	size_t _i = (i);						\
-+									\
-+	BUG_ON(_i >= (h)->used);					\
-+	(h)->used--;							\
-+	heap_swap(h, _i, (h)->used, set_backpointer);			\
-+	heap_sift_up(h, _i, cmp, set_backpointer);			\
-+	heap_sift_down(h, _i, cmp, set_backpointer);			\
-+} while (0)
-+
-+#define heap_pop(h, d, cmp, set_backpointer)				\
-+({									\
-+	bool _r = (h)->used;						\
-+	if (_r) {							\
-+		(d) = (h)->data[0];					\
-+		heap_del(h, 0, cmp, set_backpointer);			\
-+	}								\
-+	_r;								\
-+})
-+
-+#define heap_resort(heap, cmp, set_backpointer)				\
-+do {									\
-+	ssize_t _i;							\
-+	for (_i = (ssize_t) (heap)->used / 2 -  1; _i >= 0; --_i)	\
-+		heap_sift_down(heap, _i, cmp, set_backpointer);		\
-+} while (0)
-+
-+#define ANYSINT_MAX(t)							\
-+	((((t) 1 << (sizeof(t) * 8 - 2)) - (t) 1) * (t) 2 + (t) 1)
-+
-+struct printbuf {
-+	char		*pos;
-+	char		*end;
-+};
-+
-+static inline size_t printbuf_remaining(struct printbuf *buf)
-+{
-+	return buf->end - buf->pos;
-+}
-+
-+#define _PBUF(_buf, _len)						\
-+	((struct printbuf) {						\
-+		.pos	= _buf,						\
-+		.end	= _buf + _len,					\
-+	})
-+
-+#define PBUF(_buf) _PBUF(_buf, sizeof(_buf))
-+
-+#define pr_buf(_out, ...)						\
-+do {									\
-+	(_out)->pos += scnprintf((_out)->pos, printbuf_remaining(_out),	\
-+				 __VA_ARGS__);				\
-+} while (0)
-+
-+void bch_scnmemcpy(struct printbuf *, const char *, size_t);
-+
-+int bch2_strtoint_h(const char *, int *);
-+int bch2_strtouint_h(const char *, unsigned int *);
-+int bch2_strtoll_h(const char *, long long *);
-+int bch2_strtoull_h(const char *, unsigned long long *);
-+int bch2_strtou64_h(const char *, u64 *);
-+
-+static inline int bch2_strtol_h(const char *cp, long *res)
-+{
-+#if BITS_PER_LONG == 32
-+	return bch2_strtoint_h(cp, (int *) res);
-+#else
-+	return bch2_strtoll_h(cp, (long long *) res);
-+#endif
-+}
-+
-+static inline int bch2_strtoul_h(const char *cp, long *res)
-+{
-+#if BITS_PER_LONG == 32
-+	return bch2_strtouint_h(cp, (unsigned int *) res);
-+#else
-+	return bch2_strtoull_h(cp, (unsigned long long *) res);
-+#endif
-+}
-+
-+#define strtoi_h(cp, res)						\
-+	( type_is(*res, int)		? bch2_strtoint_h(cp, (void *) res)\
-+	: type_is(*res, long)		? bch2_strtol_h(cp, (void *) res)\
-+	: type_is(*res, long long)	? bch2_strtoll_h(cp, (void *) res)\
-+	: type_is(*res, unsigned)	? bch2_strtouint_h(cp, (void *) res)\
-+	: type_is(*res, unsigned long)	? bch2_strtoul_h(cp, (void *) res)\
-+	: type_is(*res, unsigned long long) ? bch2_strtoull_h(cp, (void *) res)\
-+	: -EINVAL)
-+
-+#define strtoul_safe(cp, var)						\
-+({									\
-+	unsigned long _v;						\
-+	int _r = kstrtoul(cp, 10, &_v);					\
-+	if (!_r)							\
-+		var = _v;						\
-+	_r;								\
-+})
-+
-+#define strtoul_safe_clamp(cp, var, min, max)				\
-+({									\
-+	unsigned long _v;						\
-+	int _r = kstrtoul(cp, 10, &_v);					\
-+	if (!_r)							\
-+		var = clamp_t(typeof(var), _v, min, max);		\
-+	_r;								\
-+})
-+
-+#define strtoul_safe_restrict(cp, var, min, max)			\
-+({									\
-+	unsigned long _v;						\
-+	int _r = kstrtoul(cp, 10, &_v);					\
-+	if (!_r && _v >= min && _v <= max)				\
-+		var = _v;						\
-+	else								\
-+		_r = -EINVAL;						\
-+	_r;								\
-+})
-+
-+#define snprint(buf, size, var)						\
-+	snprintf(buf, size,						\
-+		   type_is(var, int)		? "%i\n"		\
-+		 : type_is(var, unsigned)	? "%u\n"		\
-+		 : type_is(var, long)		? "%li\n"		\
-+		 : type_is(var, unsigned long)	? "%lu\n"		\
-+		 : type_is(var, s64)		? "%lli\n"		\
-+		 : type_is(var, u64)		? "%llu\n"		\
-+		 : type_is(var, char *)		? "%s\n"		\
-+		 : "%i\n", var)
-+
-+void bch2_hprint(struct printbuf *, s64);
-+
-+bool bch2_is_zero(const void *, size_t);
-+
-+void bch2_string_opt_to_text(struct printbuf *,
-+			     const char * const [], size_t);
-+
-+void bch2_flags_to_text(struct printbuf *, const char * const[], u64);
-+u64 bch2_read_flag_list(char *, const char * const[]);
-+
-+#define NR_QUANTILES	15
-+#define QUANTILE_IDX(i)	inorder_to_eytzinger0(i, NR_QUANTILES)
-+#define QUANTILE_FIRST	eytzinger0_first(NR_QUANTILES)
-+#define QUANTILE_LAST	eytzinger0_last(NR_QUANTILES)
-+
-+struct quantiles {
-+	struct quantile_entry {
-+		u64	m;
-+		u64	step;
-+	}		entries[NR_QUANTILES];
-+};
-+
-+struct time_stat_buffer {
-+	unsigned	nr;
-+	struct time_stat_buffer_entry {
-+		u64	start;
-+		u64	end;
-+	}		entries[32];
-+};
-+
-+struct time_stats {
-+	spinlock_t	lock;
-+	u64		count;
-+	/* all fields are in nanoseconds */
-+	u64		average_duration;
-+	u64		average_frequency;
-+	u64		max_duration;
-+	u64		last_event;
-+	struct quantiles quantiles;
-+
-+	struct time_stat_buffer __percpu *buffer;
-+};
-+
-+void __bch2_time_stats_update(struct time_stats *stats, u64, u64);
-+
-+static inline void bch2_time_stats_update(struct time_stats *stats, u64 start)
-+{
-+	__bch2_time_stats_update(stats, start, local_clock());
-+}
-+
-+void bch2_time_stats_to_text(struct printbuf *, struct time_stats *);
-+
-+void bch2_time_stats_exit(struct time_stats *);
-+void bch2_time_stats_init(struct time_stats *);
-+
-+#define ewma_add(ewma, val, weight)					\
-+({									\
-+	typeof(ewma) _ewma = (ewma);					\
-+	typeof(weight) _weight = (weight);				\
-+									\
-+	(((_ewma << _weight) - _ewma) + (val)) >> _weight;		\
-+})
-+
-+struct bch_ratelimit {
-+	/* Next time we want to do some work, in nanoseconds */
-+	u64			next;
-+
-+	/*
-+	 * Rate at which we want to do work, in units per nanosecond
-+	 * The units here correspond to the units passed to
-+	 * bch2_ratelimit_increment()
-+	 */
-+	unsigned		rate;
-+};
-+
-+static inline void bch2_ratelimit_reset(struct bch_ratelimit *d)
-+{
-+	d->next = local_clock();
-+}
-+
-+u64 bch2_ratelimit_delay(struct bch_ratelimit *);
-+void bch2_ratelimit_increment(struct bch_ratelimit *, u64);
-+
-+struct bch_pd_controller {
-+	struct bch_ratelimit	rate;
-+	unsigned long		last_update;
-+
-+	s64			last_actual;
-+	s64			smoothed_derivative;
-+
-+	unsigned		p_term_inverse;
-+	unsigned		d_smooth;
-+	unsigned		d_term;
-+
-+	/* for exporting to sysfs (no effect on behavior) */
-+	s64			last_derivative;
-+	s64			last_proportional;
-+	s64			last_change;
-+	s64			last_target;
-+
-+	/* If true, the rate will not increase if bch2_ratelimit_delay()
-+	 * is not being called often enough. */
-+	bool			backpressure;
-+};
-+
-+void bch2_pd_controller_update(struct bch_pd_controller *, s64, s64, int);
-+void bch2_pd_controller_init(struct bch_pd_controller *);
-+size_t bch2_pd_controller_print_debug(struct bch_pd_controller *, char *);
-+
-+#define sysfs_pd_controller_attribute(name)				\
-+	rw_attribute(name##_rate);					\
-+	rw_attribute(name##_rate_bytes);				\
-+	rw_attribute(name##_rate_d_term);				\
-+	rw_attribute(name##_rate_p_term_inverse);			\
-+	read_attribute(name##_rate_debug)
-+
-+#define sysfs_pd_controller_files(name)					\
-+	&sysfs_##name##_rate,						\
-+	&sysfs_##name##_rate_bytes,					\
-+	&sysfs_##name##_rate_d_term,					\
-+	&sysfs_##name##_rate_p_term_inverse,				\
-+	&sysfs_##name##_rate_debug
-+
-+#define sysfs_pd_controller_show(name, var)				\
-+do {									\
-+	sysfs_hprint(name##_rate,		(var)->rate.rate);	\
-+	sysfs_print(name##_rate_bytes,		(var)->rate.rate);	\
-+	sysfs_print(name##_rate_d_term,		(var)->d_term);		\
-+	sysfs_print(name##_rate_p_term_inverse,	(var)->p_term_inverse);	\
-+									\
-+	if (attr == &sysfs_##name##_rate_debug)				\
-+		return bch2_pd_controller_print_debug(var, buf);		\
-+} while (0)
-+
-+#define sysfs_pd_controller_store(name, var)				\
-+do {									\
-+	sysfs_strtoul_clamp(name##_rate,				\
-+			    (var)->rate.rate, 1, UINT_MAX);		\
-+	sysfs_strtoul_clamp(name##_rate_bytes,				\
-+			    (var)->rate.rate, 1, UINT_MAX);		\
-+	sysfs_strtoul(name##_rate_d_term,	(var)->d_term);		\
-+	sysfs_strtoul_clamp(name##_rate_p_term_inverse,			\
-+			    (var)->p_term_inverse, 1, INT_MAX);		\
-+} while (0)
-+
-+#define container_of_or_null(ptr, type, member)				\
-+({									\
-+	typeof(ptr) _ptr = ptr;						\
-+	_ptr ? container_of(_ptr, type, member) : NULL;			\
-+})
-+
-+/* Does linear interpolation between powers of two */
-+static inline unsigned fract_exp_two(unsigned x, unsigned fract_bits)
-+{
-+	unsigned fract = x & ~(~0 << fract_bits);
-+
-+	x >>= fract_bits;
-+	x   = 1 << x;
-+	x  += (x * fract) >> fract_bits;
-+
-+	return x;
-+}
-+
-+void bch2_bio_map(struct bio *bio, void *base, size_t);
-+int bch2_bio_alloc_pages(struct bio *, size_t, gfp_t);
-+
-+static inline sector_t bdev_sectors(struct block_device *bdev)
-+{
-+	return bdev->bd_inode->i_size >> 9;
-+}
-+
-+#define closure_bio_submit(bio, cl)					\
-+do {									\
-+	closure_get(cl);						\
-+	submit_bio(bio);						\
-+} while (0)
-+
-+#define kthread_wait_freezable(cond)					\
-+({									\
-+	int _ret = 0;							\
-+	while (1) {							\
-+		set_current_state(TASK_INTERRUPTIBLE);			\
-+		if (kthread_should_stop()) {				\
-+			_ret = -1;					\
-+			break;						\
-+		}							\
-+									\
-+		if (cond)						\
-+			break;						\
-+									\
-+		schedule();						\
-+		try_to_freeze();					\
-+	}								\
-+	set_current_state(TASK_RUNNING);				\
-+	_ret;								\
-+})
-+
-+size_t bch2_rand_range(size_t);
-+
-+void memcpy_to_bio(struct bio *, struct bvec_iter, const void *);
-+void memcpy_from_bio(void *, struct bio *, struct bvec_iter);
-+
-+static inline void memcpy_u64s_small(void *dst, const void *src,
-+				     unsigned u64s)
-+{
-+	u64 *d = dst;
-+	const u64 *s = src;
-+
-+	while (u64s--)
-+		*d++ = *s++;
-+}
-+
-+static inline void __memcpy_u64s(void *dst, const void *src,
-+				 unsigned u64s)
-+{
-+#ifdef CONFIG_X86_64
-+	long d0, d1, d2;
-+	asm volatile("rep ; movsq"
-+		     : "=&c" (d0), "=&D" (d1), "=&S" (d2)
-+		     : "0" (u64s), "1" (dst), "2" (src)
-+		     : "memory");
-+#else
-+	u64 *d = dst;
-+	const u64 *s = src;
-+
-+	while (u64s--)
-+		*d++ = *s++;
-+#endif
-+}
-+
-+static inline void memcpy_u64s(void *dst, const void *src,
-+			       unsigned u64s)
-+{
-+	EBUG_ON(!(dst >= src + u64s * sizeof(u64) ||
-+		 dst + u64s * sizeof(u64) <= src));
-+
-+	__memcpy_u64s(dst, src, u64s);
-+}
-+
-+static inline void __memmove_u64s_down(void *dst, const void *src,
-+				       unsigned u64s)
-+{
-+	__memcpy_u64s(dst, src, u64s);
-+}
-+
-+static inline void memmove_u64s_down(void *dst, const void *src,
-+				     unsigned u64s)
-+{
-+	EBUG_ON(dst > src);
-+
-+	__memmove_u64s_down(dst, src, u64s);
-+}
-+
-+static inline void __memmove_u64s_up_small(void *_dst, const void *_src,
-+					   unsigned u64s)
-+{
-+	u64 *dst = (u64 *) _dst + u64s;
-+	u64 *src = (u64 *) _src + u64s;
-+
-+	while (u64s--)
-+		*--dst = *--src;
-+}
-+
-+static inline void memmove_u64s_up_small(void *dst, const void *src,
-+					 unsigned u64s)
-+{
-+	EBUG_ON(dst < src);
-+
-+	__memmove_u64s_up_small(dst, src, u64s);
-+}
-+
-+static inline void __memmove_u64s_up(void *_dst, const void *_src,
-+				     unsigned u64s)
-+{
-+	u64 *dst = (u64 *) _dst + u64s - 1;
-+	u64 *src = (u64 *) _src + u64s - 1;
-+
-+#ifdef CONFIG_X86_64
-+	long d0, d1, d2;
-+	asm volatile("std ;\n"
-+		     "rep ; movsq\n"
-+		     "cld ;\n"
-+		     : "=&c" (d0), "=&D" (d1), "=&S" (d2)
-+		     : "0" (u64s), "1" (dst), "2" (src)
-+		     : "memory");
-+#else
-+	while (u64s--)
-+		*dst-- = *src--;
-+#endif
-+}
-+
-+static inline void memmove_u64s_up(void *dst, const void *src,
-+				   unsigned u64s)
-+{
-+	EBUG_ON(dst < src);
-+
-+	__memmove_u64s_up(dst, src, u64s);
-+}
-+
-+static inline void memmove_u64s(void *dst, const void *src,
-+				unsigned u64s)
-+{
-+	if (dst < src)
-+		__memmove_u64s_down(dst, src, u64s);
-+	else
-+		__memmove_u64s_up(dst, src, u64s);
-+}
-+
-+/* Set the last few bytes up to a u64 boundary given an offset into a buffer. */
-+static inline void memset_u64s_tail(void *s, int c, unsigned bytes)
-+{
-+	unsigned rem = round_up(bytes, sizeof(u64)) - bytes;
-+
-+	memset(s + bytes, c, rem);
-+}
-+
-+void sort_cmp_size(void *base, size_t num, size_t size,
-+	  int (*cmp_func)(const void *, const void *, size_t),
-+	  void (*swap_func)(void *, void *, size_t));
-+
-+/* just the memmove, doesn't update @_nr */
-+#define __array_insert_item(_array, _nr, _pos)				\
-+	memmove(&(_array)[(_pos) + 1],					\
-+		&(_array)[(_pos)],					\
-+		sizeof((_array)[0]) * ((_nr) - (_pos)))
-+
-+#define array_insert_item(_array, _nr, _pos, _new_item)			\
-+do {									\
-+	__array_insert_item(_array, _nr, _pos);				\
-+	(_nr)++;							\
-+	(_array)[(_pos)] = (_new_item);					\
-+} while (0)
-+
-+#define array_remove_items(_array, _nr, _pos, _nr_to_remove)		\
-+do {									\
-+	(_nr) -= (_nr_to_remove);					\
-+	memmove(&(_array)[(_pos)],					\
-+		&(_array)[(_pos) + (_nr_to_remove)],			\
-+		sizeof((_array)[0]) * ((_nr) - (_pos)));		\
-+} while (0)
-+
-+#define array_remove_item(_array, _nr, _pos)				\
-+	array_remove_items(_array, _nr, _pos, 1)
-+
-+#define bubble_sort(_base, _nr, _cmp)					\
-+do {									\
-+	ssize_t _i, _end;						\
-+	bool _swapped = true;						\
-+									\
-+	for (_end = (ssize_t) (_nr) - 1; _end > 0 && _swapped; --_end) {\
-+		_swapped = false;					\
-+		for (_i = 0; _i < _end; _i++)				\
-+			if (_cmp((_base)[_i], (_base)[_i + 1]) > 0) {	\
-+				swap((_base)[_i], (_base)[_i + 1]);	\
-+				_swapped = true;			\
-+			}						\
-+	}								\
-+} while (0)
-+
-+static inline u64 percpu_u64_get(u64 __percpu *src)
-+{
-+	u64 ret = 0;
-+	int cpu;
-+
-+	for_each_possible_cpu(cpu)
-+		ret += *per_cpu_ptr(src, cpu);
-+	return ret;
-+}
-+
-+static inline void percpu_u64_set(u64 __percpu *dst, u64 src)
-+{
-+	int cpu;
-+
-+	for_each_possible_cpu(cpu)
-+		*per_cpu_ptr(dst, cpu) = 0;
-+
-+	preempt_disable();
-+	*this_cpu_ptr(dst) = src;
-+	preempt_enable();
-+}
-+
-+static inline void acc_u64s(u64 *acc, const u64 *src, unsigned nr)
-+{
-+	unsigned i;
-+
-+	for (i = 0; i < nr; i++)
-+		acc[i] += src[i];
-+}
-+
-+static inline void acc_u64s_percpu(u64 *acc, const u64 __percpu *src,
-+				   unsigned nr)
-+{
-+	int cpu;
-+
-+	for_each_possible_cpu(cpu)
-+		acc_u64s(acc, per_cpu_ptr(src, cpu), nr);
-+}
-+
-+static inline void percpu_memset(void __percpu *p, int c, size_t bytes)
-+{
-+	int cpu;
-+
-+	for_each_possible_cpu(cpu)
-+		memset(per_cpu_ptr(p, cpu), c, bytes);
-+}
-+
-+u64 *bch2_acc_percpu_u64s(u64 __percpu *, unsigned);
-+
-+#define cmp_int(l, r)		((l > r) - (l < r))
-+
-+#endif /* _BCACHEFS_UTIL_H */
-diff --git a/fs/bcachefs/vstructs.h b/fs/bcachefs/vstructs.h
-new file mode 100644
-index 000000000000..c099cdc0605f
---- /dev/null
-+++ b/fs/bcachefs/vstructs.h
-@@ -0,0 +1,63 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _VSTRUCTS_H
-+#define _VSTRUCTS_H
-+
-+#include "util.h"
-+
-+/*
-+ * NOTE: we can't differentiate between __le64 and u64 with type_is - this
-+ * assumes u64 is little endian:
-+ */
-+#define __vstruct_u64s(_s)						\
-+({									\
-+	( type_is((_s)->u64s, u64) ? le64_to_cpu((__force __le64) (_s)->u64s)		\
-+	: type_is((_s)->u64s, u32) ? le32_to_cpu((__force __le32) (_s)->u64s)		\
-+	: type_is((_s)->u64s, u16) ? le16_to_cpu((__force __le16) (_s)->u64s)		\
-+	: ((__force u8) ((_s)->u64s)));						\
-+})
-+
-+#define __vstruct_bytes(_type, _u64s)					\
-+({									\
-+	BUILD_BUG_ON(offsetof(_type, _data) % sizeof(u64));		\
-+									\
-+	(offsetof(_type, _data) + (_u64s) * sizeof(u64));		\
-+})
-+
-+#define vstruct_bytes(_s)						\
-+	__vstruct_bytes(typeof(*(_s)), __vstruct_u64s(_s))
-+
-+#define __vstruct_blocks(_type, _sector_block_bits, _u64s)		\
-+	(round_up(__vstruct_bytes(_type, _u64s),			\
-+		  512 << (_sector_block_bits)) >> (9 + (_sector_block_bits)))
-+
-+#define vstruct_blocks(_s, _sector_block_bits)				\
-+	__vstruct_blocks(typeof(*(_s)), _sector_block_bits, __vstruct_u64s(_s))
-+
-+#define vstruct_blocks_plus(_s, _sector_block_bits, _u64s)		\
-+	__vstruct_blocks(typeof(*(_s)), _sector_block_bits,		\
-+			 __vstruct_u64s(_s) + (_u64s))
-+
-+#define vstruct_sectors(_s, _sector_block_bits)				\
-+	(round_up(vstruct_bytes(_s), 512 << (_sector_block_bits)) >> 9)
-+
-+#define vstruct_next(_s)						\
-+	((typeof(_s))			((_s)->_data + __vstruct_u64s(_s)))
-+#define vstruct_last(_s)						\
-+	((typeof(&(_s)->start[0]))	((_s)->_data + __vstruct_u64s(_s)))
-+#define vstruct_end(_s)							\
-+	((void *)			((_s)->_data + __vstruct_u64s(_s)))
-+
-+#define vstruct_for_each(_s, _i)					\
-+	for (_i = (_s)->start;						\
-+	     _i < vstruct_last(_s);					\
-+	     _i = vstruct_next(_i))
-+
-+#define vstruct_for_each_safe(_s, _i, _t)				\
-+	for (_i = (_s)->start;						\
-+	     _i < vstruct_last(_s) && (_t = vstruct_next(_i), true);	\
-+	     _i = _t)
-+
-+#define vstruct_idx(_s, _idx)						\
-+	((typeof(&(_s)->start[0])) ((_s)->_data + (_idx)))
-+
-+#endif /* _VSTRUCTS_H */
-diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c
-new file mode 100644
-index 000000000000..21f64cb7e402
---- /dev/null
-+++ b/fs/bcachefs/xattr.c
-@@ -0,0 +1,586 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "bkey_methods.h"
-+#include "btree_update.h"
-+#include "extents.h"
-+#include "fs.h"
-+#include "rebalance.h"
-+#include "str_hash.h"
-+#include "xattr.h"
-+
-+#include <linux/dcache.h>
-+#include <linux/posix_acl_xattr.h>
-+#include <linux/xattr.h>
-+
-+static const struct xattr_handler *bch2_xattr_type_to_handler(unsigned);
-+
-+static u64 bch2_xattr_hash(const struct bch_hash_info *info,
-+			  const struct xattr_search_key *key)
-+{
-+	struct bch_str_hash_ctx ctx;
-+
-+	bch2_str_hash_init(&ctx, info);
-+	bch2_str_hash_update(&ctx, info, &key->type, sizeof(key->type));
-+	bch2_str_hash_update(&ctx, info, key->name.name, key->name.len);
-+
-+	return bch2_str_hash_end(&ctx, info);
-+}
-+
-+static u64 xattr_hash_key(const struct bch_hash_info *info, const void *key)
-+{
-+	return bch2_xattr_hash(info, key);
-+}
-+
-+static u64 xattr_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k)
-+{
-+	struct bkey_s_c_xattr x = bkey_s_c_to_xattr(k);
-+
-+	return bch2_xattr_hash(info,
-+		 &X_SEARCH(x.v->x_type, x.v->x_name, x.v->x_name_len));
-+}
-+
-+static bool xattr_cmp_key(struct bkey_s_c _l, const void *_r)
-+{
-+	struct bkey_s_c_xattr l = bkey_s_c_to_xattr(_l);
-+	const struct xattr_search_key *r = _r;
-+
-+	return l.v->x_type != r->type ||
-+		l.v->x_name_len != r->name.len ||
-+		memcmp(l.v->x_name, r->name.name, r->name.len);
-+}
-+
-+static bool xattr_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r)
-+{
-+	struct bkey_s_c_xattr l = bkey_s_c_to_xattr(_l);
-+	struct bkey_s_c_xattr r = bkey_s_c_to_xattr(_r);
-+
-+	return l.v->x_type != r.v->x_type ||
-+		l.v->x_name_len != r.v->x_name_len ||
-+		memcmp(l.v->x_name, r.v->x_name, r.v->x_name_len);
-+}
-+
-+const struct bch_hash_desc bch2_xattr_hash_desc = {
-+	.btree_id	= BTREE_ID_XATTRS,
-+	.key_type	= KEY_TYPE_xattr,
-+	.hash_key	= xattr_hash_key,
-+	.hash_bkey	= xattr_hash_bkey,
-+	.cmp_key	= xattr_cmp_key,
-+	.cmp_bkey	= xattr_cmp_bkey,
-+};
-+
-+const char *bch2_xattr_invalid(const struct bch_fs *c, struct bkey_s_c k)
-+{
-+	const struct xattr_handler *handler;
-+	struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k);
-+
-+	if (bkey_val_bytes(k.k) < sizeof(struct bch_xattr))
-+		return "value too small";
-+
-+	if (bkey_val_u64s(k.k) <
-+	    xattr_val_u64s(xattr.v->x_name_len,
-+			   le16_to_cpu(xattr.v->x_val_len)))
-+		return "value too small";
-+
-+	if (bkey_val_u64s(k.k) >
-+	    xattr_val_u64s(xattr.v->x_name_len,
-+			   le16_to_cpu(xattr.v->x_val_len) + 4))
-+		return "value too big";
-+
-+	handler = bch2_xattr_type_to_handler(xattr.v->x_type);
-+	if (!handler)
-+		return "invalid type";
-+
-+	if (memchr(xattr.v->x_name, '\0', xattr.v->x_name_len))
-+		return "xattr name has invalid characters";
-+
-+	return NULL;
-+}
-+
-+void bch2_xattr_to_text(struct printbuf *out, struct bch_fs *c,
-+			struct bkey_s_c k)
-+{
-+	const struct xattr_handler *handler;
-+	struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k);
-+
-+	handler = bch2_xattr_type_to_handler(xattr.v->x_type);
-+	if (handler && handler->prefix)
-+		pr_buf(out, "%s", handler->prefix);
-+	else if (handler)
-+		pr_buf(out, "(type %u)", xattr.v->x_type);
-+	else
-+		pr_buf(out, "(unknown type %u)", xattr.v->x_type);
-+
-+	bch_scnmemcpy(out, xattr.v->x_name,
-+		      xattr.v->x_name_len);
-+	pr_buf(out, ":");
-+	bch_scnmemcpy(out, xattr_val(xattr.v),
-+		      le16_to_cpu(xattr.v->x_val_len));
-+}
-+
-+int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode,
-+		   const char *name, void *buffer, size_t size, int type)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c_xattr xattr;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = bch2_hash_lookup(&trans, bch2_xattr_hash_desc,
-+				&inode->ei_str_hash, inode->v.i_ino,
-+				&X_SEARCH(type, name, strlen(name)),
-+				0);
-+	if (IS_ERR(iter)) {
-+		bch2_trans_exit(&trans);
-+		BUG_ON(PTR_ERR(iter) == -EINTR);
-+
-+		return PTR_ERR(iter) == -ENOENT ? -ENODATA : PTR_ERR(iter);
-+	}
-+
-+	xattr = bkey_s_c_to_xattr(bch2_btree_iter_peek_slot(iter));
-+	ret = le16_to_cpu(xattr.v->x_val_len);
-+	if (buffer) {
-+		if (ret > size)
-+			ret = -ERANGE;
-+		else
-+			memcpy(buffer, xattr_val(xattr.v), ret);
-+	}
-+
-+	bch2_trans_exit(&trans);
-+	return ret;
-+}
-+
-+int bch2_xattr_set(struct btree_trans *trans, u64 inum,
-+		   const struct bch_hash_info *hash_info,
-+		   const char *name, const void *value, size_t size,
-+		   int type, int flags)
-+{
-+	int ret;
-+
-+	if (value) {
-+		struct bkey_i_xattr *xattr;
-+		unsigned namelen = strlen(name);
-+		unsigned u64s = BKEY_U64s +
-+			xattr_val_u64s(namelen, size);
-+
-+		if (u64s > U8_MAX)
-+			return -ERANGE;
-+
-+		xattr = bch2_trans_kmalloc(trans, u64s * sizeof(u64));
-+		if (IS_ERR(xattr))
-+			return PTR_ERR(xattr);
-+
-+		bkey_xattr_init(&xattr->k_i);
-+		xattr->k.u64s		= u64s;
-+		xattr->v.x_type		= type;
-+		xattr->v.x_name_len	= namelen;
-+		xattr->v.x_val_len	= cpu_to_le16(size);
-+		memcpy(xattr->v.x_name, name, namelen);
-+		memcpy(xattr_val(&xattr->v), value, size);
-+
-+		ret = bch2_hash_set(trans, bch2_xattr_hash_desc, hash_info,
-+			      inum, &xattr->k_i,
-+			      (flags & XATTR_CREATE ? BCH_HASH_SET_MUST_CREATE : 0)|
-+			      (flags & XATTR_REPLACE ? BCH_HASH_SET_MUST_REPLACE : 0));
-+	} else {
-+		struct xattr_search_key search =
-+			X_SEARCH(type, name, strlen(name));
-+
-+		ret = bch2_hash_delete(trans, bch2_xattr_hash_desc,
-+				       hash_info, inum, &search);
-+	}
-+
-+	if (ret == -ENOENT)
-+		ret = flags & XATTR_REPLACE ? -ENODATA : 0;
-+
-+	return ret;
-+}
-+
-+struct xattr_buf {
-+	char		*buf;
-+	size_t		len;
-+	size_t		used;
-+};
-+
-+static int __bch2_xattr_emit(const char *prefix,
-+			     const char *name, size_t name_len,
-+			     struct xattr_buf *buf)
-+{
-+	const size_t prefix_len = strlen(prefix);
-+	const size_t total_len = prefix_len + name_len + 1;
-+
-+	if (buf->buf) {
-+		if (buf->used + total_len > buf->len)
-+			return -ERANGE;
-+
-+		memcpy(buf->buf + buf->used, prefix, prefix_len);
-+		memcpy(buf->buf + buf->used + prefix_len,
-+		       name, name_len);
-+		buf->buf[buf->used + prefix_len + name_len] = '\0';
-+	}
-+
-+	buf->used += total_len;
-+	return 0;
-+}
-+
-+static int bch2_xattr_emit(struct dentry *dentry,
-+			    const struct bch_xattr *xattr,
-+			    struct xattr_buf *buf)
-+{
-+	const struct xattr_handler *handler =
-+		bch2_xattr_type_to_handler(xattr->x_type);
-+
-+	return handler && (!handler->list || handler->list(dentry))
-+		? __bch2_xattr_emit(handler->prefix ?: handler->name,
-+				    xattr->x_name, xattr->x_name_len, buf)
-+		: 0;
-+}
-+
-+static int bch2_xattr_list_bcachefs(struct bch_fs *c,
-+				    struct bch_inode_info *inode,
-+				    struct xattr_buf *buf,
-+				    bool all)
-+{
-+	const char *prefix = all ? "bcachefs_effective." : "bcachefs.";
-+	unsigned id;
-+	int ret = 0;
-+	u64 v;
-+
-+	for (id = 0; id < Inode_opt_nr; id++) {
-+		v = bch2_inode_opt_get(&inode->ei_inode, id);
-+		if (!v)
-+			continue;
-+
-+		if (!all &&
-+		    !(inode->ei_inode.bi_fields_set & (1 << id)))
-+			continue;
-+
-+		ret = __bch2_xattr_emit(prefix, bch2_inode_opts[id],
-+					strlen(bch2_inode_opts[id]), buf);
-+		if (ret)
-+			break;
-+	}
-+
-+	return ret;
-+}
-+
-+ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
-+{
-+	struct bch_fs *c = dentry->d_sb->s_fs_info;
-+	struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct xattr_buf buf = { .buf = buffer, .len = buffer_size };
-+	u64 inum = dentry->d_inode->i_ino;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_XATTRS,
-+			   POS(inum, 0), 0, k, ret) {
-+		BUG_ON(k.k->p.inode < inum);
-+
-+		if (k.k->p.inode > inum)
-+			break;
-+
-+		if (k.k->type != KEY_TYPE_xattr)
-+			continue;
-+
-+		ret = bch2_xattr_emit(dentry, bkey_s_c_to_xattr(k).v, &buf);
-+		if (ret)
-+			break;
-+	}
-+	ret = bch2_trans_exit(&trans) ?: ret;
-+
-+	if (ret)
-+		return ret;
-+
-+	ret = bch2_xattr_list_bcachefs(c, inode, &buf, false);
-+	if (ret)
-+		return ret;
-+
-+	ret = bch2_xattr_list_bcachefs(c, inode, &buf, true);
-+	if (ret)
-+		return ret;
-+
-+	return buf.used;
-+}
-+
-+static int bch2_xattr_get_handler(const struct xattr_handler *handler,
-+				  struct dentry *dentry, struct inode *vinode,
-+				  const char *name, void *buffer, size_t size)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(vinode);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+
-+	return bch2_xattr_get(c, inode, name, buffer, size, handler->flags);
-+}
-+
-+static int bch2_xattr_set_handler(const struct xattr_handler *handler,
-+				  struct dentry *dentry, struct inode *vinode,
-+				  const char *name, const void *value,
-+				  size_t size, int flags)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(vinode);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+
-+	return bch2_trans_do(c, NULL, &inode->ei_journal_seq, 0,
-+			bch2_xattr_set(&trans, inode->v.i_ino,
-+				       &inode->ei_str_hash,
-+				       name, value, size,
-+				       handler->flags, flags));
-+}
-+
-+static const struct xattr_handler bch_xattr_user_handler = {
-+	.prefix	= XATTR_USER_PREFIX,
-+	.get	= bch2_xattr_get_handler,
-+	.set	= bch2_xattr_set_handler,
-+	.flags	= KEY_TYPE_XATTR_INDEX_USER,
-+};
-+
-+static bool bch2_xattr_trusted_list(struct dentry *dentry)
-+{
-+	return capable(CAP_SYS_ADMIN);
-+}
-+
-+static const struct xattr_handler bch_xattr_trusted_handler = {
-+	.prefix	= XATTR_TRUSTED_PREFIX,
-+	.list	= bch2_xattr_trusted_list,
-+	.get	= bch2_xattr_get_handler,
-+	.set	= bch2_xattr_set_handler,
-+	.flags	= KEY_TYPE_XATTR_INDEX_TRUSTED,
-+};
-+
-+static const struct xattr_handler bch_xattr_security_handler = {
-+	.prefix	= XATTR_SECURITY_PREFIX,
-+	.get	= bch2_xattr_get_handler,
-+	.set	= bch2_xattr_set_handler,
-+	.flags	= KEY_TYPE_XATTR_INDEX_SECURITY,
-+};
-+
-+#ifndef NO_BCACHEFS_FS
-+
-+static int opt_to_inode_opt(int id)
-+{
-+	switch (id) {
-+#define x(name, ...)				\
-+	case Opt_##name: return Inode_opt_##name;
-+	BCH_INODE_OPTS()
-+#undef  x
-+	default:
-+		return -1;
-+	}
-+}
-+
-+static int __bch2_xattr_bcachefs_get(const struct xattr_handler *handler,
-+				struct dentry *dentry, struct inode *vinode,
-+				const char *name, void *buffer, size_t size,
-+				bool all)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(vinode);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct bch_opts opts =
-+		bch2_inode_opts_to_opts(bch2_inode_opts_get(&inode->ei_inode));
-+	const struct bch_option *opt;
-+	int id, inode_opt_id;
-+	char buf[512];
-+	struct printbuf out = PBUF(buf);
-+	unsigned val_len;
-+	u64 v;
-+
-+	id = bch2_opt_lookup(name);
-+	if (id < 0 || !bch2_opt_is_inode_opt(id))
-+		return -EINVAL;
-+
-+	inode_opt_id = opt_to_inode_opt(id);
-+	if (inode_opt_id < 0)
-+		return -EINVAL;
-+
-+	opt = bch2_opt_table + id;
-+
-+	if (!bch2_opt_defined_by_id(&opts, id))
-+		return -ENODATA;
-+
-+	if (!all &&
-+	    !(inode->ei_inode.bi_fields_set & (1 << inode_opt_id)))
-+		return -ENODATA;
-+
-+	v = bch2_opt_get_by_id(&opts, id);
-+	bch2_opt_to_text(&out, c, opt, v, 0);
-+
-+	val_len = out.pos - buf;
-+
-+	if (buffer && val_len > size)
-+		return -ERANGE;
-+
-+	if (buffer)
-+		memcpy(buffer, buf, val_len);
-+	return val_len;
-+}
-+
-+static int bch2_xattr_bcachefs_get(const struct xattr_handler *handler,
-+				   struct dentry *dentry, struct inode *vinode,
-+				   const char *name, void *buffer, size_t size)
-+{
-+	return __bch2_xattr_bcachefs_get(handler, dentry, vinode,
-+					 name, buffer, size, false);
-+}
-+
-+struct inode_opt_set {
-+	int			id;
-+	u64			v;
-+	bool			defined;
-+};
-+
-+static int inode_opt_set_fn(struct bch_inode_info *inode,
-+			    struct bch_inode_unpacked *bi,
-+			    void *p)
-+{
-+	struct inode_opt_set *s = p;
-+
-+	if (s->defined)
-+		bi->bi_fields_set |= 1U << s->id;
-+	else
-+		bi->bi_fields_set &= ~(1U << s->id);
-+
-+	bch2_inode_opt_set(bi, s->id, s->v);
-+
-+	return 0;
-+}
-+
-+static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler,
-+				   struct dentry *dentry, struct inode *vinode,
-+				   const char *name, const void *value,
-+				   size_t size, int flags)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(vinode);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	const struct bch_option *opt;
-+	char *buf;
-+	struct inode_opt_set s;
-+	int opt_id, inode_opt_id, ret;
-+
-+	opt_id = bch2_opt_lookup(name);
-+	if (opt_id < 0)
-+		return -EINVAL;
-+
-+	opt = bch2_opt_table + opt_id;
-+
-+	inode_opt_id = opt_to_inode_opt(opt_id);
-+	if (inode_opt_id < 0)
-+		return -EINVAL;
-+
-+	s.id = inode_opt_id;
-+
-+	if (value) {
-+		u64 v = 0;
-+
-+		buf = kmalloc(size + 1, GFP_KERNEL);
-+		if (!buf)
-+			return -ENOMEM;
-+		memcpy(buf, value, size);
-+		buf[size] = '\0';
-+
-+		ret = bch2_opt_parse(c, opt, buf, &v);
-+		kfree(buf);
-+
-+		if (ret < 0)
-+			return ret;
-+
-+		ret = bch2_opt_check_may_set(c, opt_id, v);
-+		if (ret < 0)
-+			return ret;
-+
-+		s.v = v + 1;
-+		s.defined = true;
-+	} else {
-+		if (!IS_ROOT(dentry)) {
-+			struct bch_inode_info *dir =
-+				to_bch_ei(d_inode(dentry->d_parent));
-+
-+			s.v = bch2_inode_opt_get(&dir->ei_inode, inode_opt_id);
-+		} else {
-+			s.v = 0;
-+		}
-+
-+		s.defined = false;
-+	}
-+
-+	mutex_lock(&inode->ei_update_lock);
-+	if (inode_opt_id == Inode_opt_project) {
-+		/*
-+		 * inode fields accessible via the xattr interface are stored
-+		 * with a +1 bias, so that 0 means unset:
-+		 */
-+		ret = bch2_set_projid(c, inode, s.v ? s.v - 1 : 0);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	ret = bch2_write_inode(c, inode, inode_opt_set_fn, &s, 0);
-+err:
-+	mutex_unlock(&inode->ei_update_lock);
-+
-+	if (value &&
-+	    (opt_id == Opt_background_compression ||
-+	     opt_id == Opt_background_target))
-+		bch2_rebalance_add_work(c, inode->v.i_blocks);
-+
-+	return ret;
-+}
-+
-+static const struct xattr_handler bch_xattr_bcachefs_handler = {
-+	.prefix	= "bcachefs.",
-+	.get	= bch2_xattr_bcachefs_get,
-+	.set	= bch2_xattr_bcachefs_set,
-+};
-+
-+static int bch2_xattr_bcachefs_get_effective(
-+				const struct xattr_handler *handler,
-+				struct dentry *dentry, struct inode *vinode,
-+				const char *name, void *buffer, size_t size)
-+{
-+	return __bch2_xattr_bcachefs_get(handler, dentry, vinode,
-+					 name, buffer, size, true);
-+}
-+
-+static const struct xattr_handler bch_xattr_bcachefs_effective_handler = {
-+	.prefix	= "bcachefs_effective.",
-+	.get	= bch2_xattr_bcachefs_get_effective,
-+	.set	= bch2_xattr_bcachefs_set,
-+};
-+
-+#endif /* NO_BCACHEFS_FS */
-+
-+const struct xattr_handler *bch2_xattr_handlers[] = {
-+	&bch_xattr_user_handler,
-+	&posix_acl_access_xattr_handler,
-+	&posix_acl_default_xattr_handler,
-+	&bch_xattr_trusted_handler,
-+	&bch_xattr_security_handler,
-+#ifndef NO_BCACHEFS_FS
-+	&bch_xattr_bcachefs_handler,
-+	&bch_xattr_bcachefs_effective_handler,
-+#endif
-+	NULL
-+};
-+
-+static const struct xattr_handler *bch_xattr_handler_map[] = {
-+	[KEY_TYPE_XATTR_INDEX_USER]			= &bch_xattr_user_handler,
-+	[KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS]	=
-+		&posix_acl_access_xattr_handler,
-+	[KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT]	=
-+		&posix_acl_default_xattr_handler,
-+	[KEY_TYPE_XATTR_INDEX_TRUSTED]		= &bch_xattr_trusted_handler,
-+	[KEY_TYPE_XATTR_INDEX_SECURITY]		= &bch_xattr_security_handler,
-+};
-+
-+static const struct xattr_handler *bch2_xattr_type_to_handler(unsigned type)
-+{
-+	return type < ARRAY_SIZE(bch_xattr_handler_map)
-+		? bch_xattr_handler_map[type]
-+		: NULL;
-+}
-diff --git a/fs/bcachefs/xattr.h b/fs/bcachefs/xattr.h
-new file mode 100644
-index 000000000000..4151065ab853
---- /dev/null
-+++ b/fs/bcachefs/xattr.h
-@@ -0,0 +1,49 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_XATTR_H
-+#define _BCACHEFS_XATTR_H
-+
-+#include "str_hash.h"
-+
-+extern const struct bch_hash_desc bch2_xattr_hash_desc;
-+
-+const char *bch2_xattr_invalid(const struct bch_fs *, struct bkey_s_c);
-+void bch2_xattr_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-+
-+#define bch2_bkey_ops_xattr (struct bkey_ops) {		\
-+	.key_invalid	= bch2_xattr_invalid,		\
-+	.val_to_text	= bch2_xattr_to_text,		\
-+}
-+
-+static inline unsigned xattr_val_u64s(unsigned name_len, unsigned val_len)
-+{
-+	return DIV_ROUND_UP(offsetof(struct bch_xattr, x_name) +
-+			    name_len + val_len, sizeof(u64));
-+}
-+
-+#define xattr_val(_xattr)					\
-+	((void *) (_xattr)->x_name + (_xattr)->x_name_len)
-+
-+struct xattr_search_key {
-+	u8		type;
-+	struct qstr	name;
-+};
-+
-+#define X_SEARCH(_type, _name, _len) ((struct xattr_search_key)	\
-+	{ .type = _type, .name = QSTR_INIT(_name, _len) })
-+
-+struct dentry;
-+struct xattr_handler;
-+struct bch_hash_info;
-+struct bch_inode_info;
-+
-+int bch2_xattr_get(struct bch_fs *, struct bch_inode_info *,
-+		  const char *, void *, size_t, int);
-+
-+int bch2_xattr_set(struct btree_trans *, u64, const struct bch_hash_info *,
-+		   const char *, const void *, size_t, int, int);
-+
-+ssize_t bch2_xattr_list(struct dentry *, char *, size_t);
-+
-+extern const struct xattr_handler *bch2_xattr_handlers[];
-+
-+#endif /* _BCACHEFS_XATTR_H */
-diff --git a/fs/cifs/file.c b/fs/cifs/file.c
-index be46fab4c96d..a17a21181e18 100644
---- a/fs/cifs/file.c
-+++ b/fs/cifs/file.c
-@@ -4296,20 +4296,12 @@ readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
- 
- 	page = lru_to_page(page_list);
- 
--	/*
--	 * Lock the page and put it in the cache. Since no one else
--	 * should have access to this page, we're safe to simply set
--	 * PG_locked without checking it first.
--	 */
--	__SetPageLocked(page);
--	rc = add_to_page_cache_locked(page, mapping,
--				      page->index, gfp);
-+	rc = add_to_page_cache(page, mapping,
-+			       page->index, gfp);
- 
- 	/* give up if we can't stick it in the cache */
--	if (rc) {
--		__ClearPageLocked(page);
-+	if (rc)
- 		return rc;
--	}
- 
- 	/* move first page to the tmplist */
- 	*offset = (loff_t)page->index << PAGE_SHIFT;
-@@ -4328,12 +4320,9 @@ readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
- 		if (*bytes + PAGE_SIZE > rsize)
- 			break;
- 
--		__SetPageLocked(page);
--		rc = add_to_page_cache_locked(page, mapping, page->index, gfp);
--		if (rc) {
--			__ClearPageLocked(page);
-+		rc = add_to_page_cache(page, mapping, page->index, gfp);
-+		if (rc)
- 			break;
--		}
- 		list_move_tail(&page->lru, tmplist);
- 		(*bytes) += PAGE_SIZE;
- 		expected_index++;
-diff --git a/fs/dcache.c b/fs/dcache.c
-index 361ea7ab30ea..6fbf68e60326 100644
---- a/fs/dcache.c
-+++ b/fs/dcache.c
-@@ -3132,9 +3132,8 @@ void d_genocide(struct dentry *parent)
- 
- EXPORT_SYMBOL(d_genocide);
- 
--void d_tmpfile(struct dentry *dentry, struct inode *inode)
-+void d_mark_tmpfile(struct dentry *dentry, struct inode *inode)
- {
--	inode_dec_link_count(inode);
- 	BUG_ON(dentry->d_name.name != dentry->d_iname ||
- 		!hlist_unhashed(&dentry->d_u.d_alias) ||
- 		!d_unlinked(dentry));
-@@ -3144,6 +3143,13 @@ void d_tmpfile(struct dentry *dentry, struct inode *inode)
- 				(unsigned long long)inode->i_ino);
- 	spin_unlock(&dentry->d_lock);
- 	spin_unlock(&dentry->d_parent->d_lock);
-+}
-+EXPORT_SYMBOL(d_mark_tmpfile);
-+
-+void d_tmpfile(struct dentry *dentry, struct inode *inode)
-+{
-+	inode_dec_link_count(inode);
-+	d_mark_tmpfile(dentry, inode);
- 	d_instantiate(dentry, inode);
- }
- EXPORT_SYMBOL(d_tmpfile);
-diff --git a/fs/inode.c b/fs/inode.c
-index 72c4c347afb7..e70ad3d2d01c 100644
---- a/fs/inode.c
-+++ b/fs/inode.c
-@@ -1578,6 +1578,46 @@ int insert_inode_locked(struct inode *inode)
- }
- EXPORT_SYMBOL(insert_inode_locked);
- 
-+struct inode *insert_inode_locked2(struct inode *inode)
-+{
-+	struct super_block *sb = inode->i_sb;
-+	ino_t ino = inode->i_ino;
-+	struct hlist_head *head = inode_hashtable + hash(sb, ino);
-+
-+	while (1) {
-+		struct inode *old = NULL;
-+		spin_lock(&inode_hash_lock);
-+		hlist_for_each_entry(old, head, i_hash) {
-+			if (old->i_ino != ino)
-+				continue;
-+			if (old->i_sb != sb)
-+				continue;
-+			spin_lock(&old->i_lock);
-+			if (old->i_state & (I_FREEING|I_WILL_FREE)) {
-+				spin_unlock(&old->i_lock);
-+				continue;
-+			}
-+			break;
-+		}
-+		if (likely(!old)) {
-+			spin_lock(&inode->i_lock);
-+			inode->i_state |= I_NEW | I_CREATING;
-+			hlist_add_head(&inode->i_hash, head);
-+			spin_unlock(&inode->i_lock);
-+			spin_unlock(&inode_hash_lock);
-+			return NULL;
-+		}
-+		__iget(old);
-+		spin_unlock(&old->i_lock);
-+		spin_unlock(&inode_hash_lock);
-+		wait_on_inode(old);
-+		if (unlikely(!inode_unhashed(old)))
-+			return old;
-+		iput(old);
-+	}
-+}
-+EXPORT_SYMBOL(insert_inode_locked2);
-+
- int insert_inode_locked4(struct inode *inode, unsigned long hashval,
- 		int (*test)(struct inode *, void *), void *data)
- {
-diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
-index 57241417ff2f..e080ccb4fdf1 100644
---- a/include/linux/blkdev.h
-+++ b/include/linux/blkdev.h
-@@ -908,6 +908,7 @@ extern const char *blk_op_str(unsigned int op);
- 
- int blk_status_to_errno(blk_status_t status);
- blk_status_t errno_to_blk_status(int errno);
-+const char *blk_status_to_str(blk_status_t status);
- 
- int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin);
- 
-diff --git a/include/linux/closure.h b/include/linux/closure.h
-new file mode 100644
-index 000000000000..36b4a83f9b77
---- /dev/null
-+++ b/include/linux/closure.h
-@@ -0,0 +1,399 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _LINUX_CLOSURE_H
-+#define _LINUX_CLOSURE_H
-+
-+#include <linux/llist.h>
-+#include <linux/sched.h>
-+#include <linux/sched/task_stack.h>
-+#include <linux/workqueue.h>
-+
-+/*
-+ * Closure is perhaps the most overused and abused term in computer science, but
-+ * since I've been unable to come up with anything better you're stuck with it
-+ * again.
-+ *
-+ * What are closures?
-+ *
-+ * They embed a refcount. The basic idea is they count "things that are in
-+ * progress" - in flight bios, some other thread that's doing something else -
-+ * anything you might want to wait on.
-+ *
-+ * The refcount may be manipulated with closure_get() and closure_put().
-+ * closure_put() is where many of the interesting things happen, when it causes
-+ * the refcount to go to 0.
-+ *
-+ * Closures can be used to wait on things both synchronously and asynchronously,
-+ * and synchronous and asynchronous use can be mixed without restriction. To
-+ * wait synchronously, use closure_sync() - you will sleep until your closure's
-+ * refcount hits 1.
-+ *
-+ * To wait asynchronously, use
-+ *   continue_at(cl, next_function, workqueue);
-+ *
-+ * passing it, as you might expect, the function to run when nothing is pending
-+ * and the workqueue to run that function out of.
-+ *
-+ * continue_at() also, critically, requires a 'return' immediately following the
-+ * location where this macro is referenced, to return to the calling function.
-+ * There's good reason for this.
-+ *
-+ * To use safely closures asynchronously, they must always have a refcount while
-+ * they are running owned by the thread that is running them. Otherwise, suppose
-+ * you submit some bios and wish to have a function run when they all complete:
-+ *
-+ * foo_endio(struct bio *bio)
-+ * {
-+ *	closure_put(cl);
-+ * }
-+ *
-+ * closure_init(cl);
-+ *
-+ * do_stuff();
-+ * closure_get(cl);
-+ * bio1->bi_endio = foo_endio;
-+ * bio_submit(bio1);
-+ *
-+ * do_more_stuff();
-+ * closure_get(cl);
-+ * bio2->bi_endio = foo_endio;
-+ * bio_submit(bio2);
-+ *
-+ * continue_at(cl, complete_some_read, system_wq);
-+ *
-+ * If closure's refcount started at 0, complete_some_read() could run before the
-+ * second bio was submitted - which is almost always not what you want! More
-+ * importantly, it wouldn't be possible to say whether the original thread or
-+ * complete_some_read()'s thread owned the closure - and whatever state it was
-+ * associated with!
-+ *
-+ * So, closure_init() initializes a closure's refcount to 1 - and when a
-+ * closure_fn is run, the refcount will be reset to 1 first.
-+ *
-+ * Then, the rule is - if you got the refcount with closure_get(), release it
-+ * with closure_put() (i.e, in a bio->bi_endio function). If you have a refcount
-+ * on a closure because you called closure_init() or you were run out of a
-+ * closure - _always_ use continue_at(). Doing so consistently will help
-+ * eliminate an entire class of particularly pernicious races.
-+ *
-+ * Lastly, you might have a wait list dedicated to a specific event, and have no
-+ * need for specifying the condition - you just want to wait until someone runs
-+ * closure_wake_up() on the appropriate wait list. In that case, just use
-+ * closure_wait(). It will return either true or false, depending on whether the
-+ * closure was already on a wait list or not - a closure can only be on one wait
-+ * list at a time.
-+ *
-+ * Parents:
-+ *
-+ * closure_init() takes two arguments - it takes the closure to initialize, and
-+ * a (possibly null) parent.
-+ *
-+ * If parent is non null, the new closure will have a refcount for its lifetime;
-+ * a closure is considered to be "finished" when its refcount hits 0 and the
-+ * function to run is null. Hence
-+ *
-+ * continue_at(cl, NULL, NULL);
-+ *
-+ * returns up the (spaghetti) stack of closures, precisely like normal return
-+ * returns up the C stack. continue_at() with non null fn is better thought of
-+ * as doing a tail call.
-+ *
-+ * All this implies that a closure should typically be embedded in a particular
-+ * struct (which its refcount will normally control the lifetime of), and that
-+ * struct can very much be thought of as a stack frame.
-+ */
-+
-+struct closure;
-+struct closure_syncer;
-+typedef void (closure_fn) (struct closure *);
-+extern struct dentry *bcache_debug;
-+
-+struct closure_waitlist {
-+	struct llist_head	list;
-+};
-+
-+enum closure_state {
-+	/*
-+	 * CLOSURE_WAITING: Set iff the closure is on a waitlist. Must be set by
-+	 * the thread that owns the closure, and cleared by the thread that's
-+	 * waking up the closure.
-+	 *
-+	 * The rest are for debugging and don't affect behaviour:
-+	 *
-+	 * CLOSURE_RUNNING: Set when a closure is running (i.e. by
-+	 * closure_init() and when closure_put() runs then next function), and
-+	 * must be cleared before remaining hits 0. Primarily to help guard
-+	 * against incorrect usage and accidentally transferring references.
-+	 * continue_at() and closure_return() clear it for you, if you're doing
-+	 * something unusual you can use closure_set_dead() which also helps
-+	 * annotate where references are being transferred.
-+	 */
-+
-+	CLOSURE_BITS_START	= (1U << 26),
-+	CLOSURE_DESTRUCTOR	= (1U << 26),
-+	CLOSURE_WAITING		= (1U << 28),
-+	CLOSURE_RUNNING		= (1U << 30),
-+};
-+
-+#define CLOSURE_GUARD_MASK					\
-+	((CLOSURE_DESTRUCTOR|CLOSURE_WAITING|CLOSURE_RUNNING) << 1)
-+
-+#define CLOSURE_REMAINING_MASK		(CLOSURE_BITS_START - 1)
-+#define CLOSURE_REMAINING_INITIALIZER	(1|CLOSURE_RUNNING)
-+
-+struct closure {
-+	union {
-+		struct {
-+			struct workqueue_struct *wq;
-+			struct closure_syncer	*s;
-+			struct llist_node	list;
-+			closure_fn		*fn;
-+		};
-+		struct work_struct	work;
-+	};
-+
-+	struct closure		*parent;
-+
-+	atomic_t		remaining;
-+
-+#ifdef CONFIG_DEBUG_CLOSURES
-+#define CLOSURE_MAGIC_DEAD	0xc054dead
-+#define CLOSURE_MAGIC_ALIVE	0xc054a11e
-+
-+	unsigned int		magic;
-+	struct list_head	all;
-+	unsigned long		ip;
-+	unsigned long		waiting_on;
-+#endif
-+};
-+
-+void closure_sub(struct closure *cl, int v);
-+void closure_put(struct closure *cl);
-+void __closure_wake_up(struct closure_waitlist *list);
-+bool closure_wait(struct closure_waitlist *list, struct closure *cl);
-+void __closure_sync(struct closure *cl);
-+
-+/**
-+ * closure_sync - sleep until a closure a closure has nothing left to wait on
-+ *
-+ * Sleeps until the refcount hits 1 - the thread that's running the closure owns
-+ * the last refcount.
-+ */
-+static inline void closure_sync(struct closure *cl)
-+{
-+	if ((atomic_read(&cl->remaining) & CLOSURE_REMAINING_MASK) != 1)
-+		__closure_sync(cl);
-+}
-+
-+#ifdef CONFIG_DEBUG_CLOSURES
-+
-+void closure_debug_create(struct closure *cl);
-+void closure_debug_destroy(struct closure *cl);
-+
-+#else
-+
-+static inline void closure_debug_create(struct closure *cl) {}
-+static inline void closure_debug_destroy(struct closure *cl) {}
-+
-+#endif
-+
-+static inline void closure_set_ip(struct closure *cl)
-+{
-+#ifdef CONFIG_DEBUG_CLOSURES
-+	cl->ip = _THIS_IP_;
-+#endif
-+}
-+
-+static inline void closure_set_ret_ip(struct closure *cl)
-+{
-+#ifdef CONFIG_DEBUG_CLOSURES
-+	cl->ip = _RET_IP_;
-+#endif
-+}
-+
-+static inline void closure_set_waiting(struct closure *cl, unsigned long f)
-+{
-+#ifdef CONFIG_DEBUG_CLOSURES
-+	cl->waiting_on = f;
-+#endif
-+}
-+
-+static inline void closure_set_stopped(struct closure *cl)
-+{
-+	atomic_sub(CLOSURE_RUNNING, &cl->remaining);
-+}
-+
-+static inline void set_closure_fn(struct closure *cl, closure_fn *fn,
-+				  struct workqueue_struct *wq)
-+{
-+	closure_set_ip(cl);
-+	cl->fn = fn;
-+	cl->wq = wq;
-+	/* between atomic_dec() in closure_put() */
-+	smp_mb__before_atomic();
-+}
-+
-+static inline void closure_queue(struct closure *cl)
-+{
-+	struct workqueue_struct *wq = cl->wq;
-+	/**
-+	 * Changes made to closure, work_struct, or a couple of other structs
-+	 * may cause work.func not pointing to the right location.
-+	 */
-+	BUILD_BUG_ON(offsetof(struct closure, fn)
-+		     != offsetof(struct work_struct, func));
-+
-+	if (wq) {
-+		INIT_WORK(&cl->work, cl->work.func);
-+		BUG_ON(!queue_work(wq, &cl->work));
-+	} else
-+		cl->fn(cl);
-+}
-+
-+/**
-+ * closure_get - increment a closure's refcount
-+ */
-+static inline void closure_get(struct closure *cl)
-+{
-+#ifdef CONFIG_DEBUG_CLOSURES
-+	BUG_ON((atomic_inc_return(&cl->remaining) &
-+		CLOSURE_REMAINING_MASK) <= 1);
-+#else
-+	atomic_inc(&cl->remaining);
-+#endif
-+}
-+
-+/**
-+ * closure_init - Initialize a closure, setting the refcount to 1
-+ * @cl:		closure to initialize
-+ * @parent:	parent of the new closure. cl will take a refcount on it for its
-+ *		lifetime; may be NULL.
-+ */
-+static inline void closure_init(struct closure *cl, struct closure *parent)
-+{
-+	cl->fn = NULL;
-+	cl->parent = parent;
-+	if (parent)
-+		closure_get(parent);
-+
-+	atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
-+
-+	closure_debug_create(cl);
-+	closure_set_ip(cl);
-+}
-+
-+static inline void closure_init_stack(struct closure *cl)
-+{
-+	memset(cl, 0, sizeof(struct closure));
-+	atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
-+}
-+
-+/**
-+ * closure_wake_up - wake up all closures on a wait list,
-+ *		     with memory barrier
-+ */
-+static inline void closure_wake_up(struct closure_waitlist *list)
-+{
-+	/* Memory barrier for the wait list */
-+	smp_mb();
-+	__closure_wake_up(list);
-+}
-+
-+/**
-+ * continue_at - jump to another function with barrier
-+ *
-+ * After @cl is no longer waiting on anything (i.e. all outstanding refs have
-+ * been dropped with closure_put()), it will resume execution at @fn running out
-+ * of @wq (or, if @wq is NULL, @fn will be called by closure_put() directly).
-+ *
-+ * This is because after calling continue_at() you no longer have a ref on @cl,
-+ * and whatever @cl owns may be freed out from under you - a running closure fn
-+ * has a ref on its own closure which continue_at() drops.
-+ *
-+ * Note you are expected to immediately return after using this macro.
-+ */
-+#define continue_at(_cl, _fn, _wq)					\
-+do {									\
-+	set_closure_fn(_cl, _fn, _wq);					\
-+	closure_sub(_cl, CLOSURE_RUNNING + 1);				\
-+} while (0)
-+
-+/**
-+ * closure_return - finish execution of a closure
-+ *
-+ * This is used to indicate that @cl is finished: when all outstanding refs on
-+ * @cl have been dropped @cl's ref on its parent closure (as passed to
-+ * closure_init()) will be dropped, if one was specified - thus this can be
-+ * thought of as returning to the parent closure.
-+ */
-+#define closure_return(_cl)	continue_at((_cl), NULL, NULL)
-+
-+/**
-+ * continue_at_nobarrier - jump to another function without barrier
-+ *
-+ * Causes @fn to be executed out of @cl, in @wq context (or called directly if
-+ * @wq is NULL).
-+ *
-+ * The ref the caller of continue_at_nobarrier() had on @cl is now owned by @fn,
-+ * thus it's not safe to touch anything protected by @cl after a
-+ * continue_at_nobarrier().
-+ */
-+#define continue_at_nobarrier(_cl, _fn, _wq)				\
-+do {									\
-+	set_closure_fn(_cl, _fn, _wq);					\
-+	closure_queue(_cl);						\
-+} while (0)
-+
-+/**
-+ * closure_return_with_destructor - finish execution of a closure,
-+ *				    with destructor
-+ *
-+ * Works like closure_return(), except @destructor will be called when all
-+ * outstanding refs on @cl have been dropped; @destructor may be used to safely
-+ * free the memory occupied by @cl, and it is called with the ref on the parent
-+ * closure still held - so @destructor could safely return an item to a
-+ * freelist protected by @cl's parent.
-+ */
-+#define closure_return_with_destructor(_cl, _destructor)		\
-+do {									\
-+	set_closure_fn(_cl, _destructor, NULL);				\
-+	closure_sub(_cl, CLOSURE_RUNNING - CLOSURE_DESTRUCTOR + 1);	\
-+} while (0)
-+
-+/**
-+ * closure_call - execute @fn out of a new, uninitialized closure
-+ *
-+ * Typically used when running out of one closure, and we want to run @fn
-+ * asynchronously out of a new closure - @parent will then wait for @cl to
-+ * finish.
-+ */
-+static inline void closure_call(struct closure *cl, closure_fn fn,
-+				struct workqueue_struct *wq,
-+				struct closure *parent)
-+{
-+	closure_init(cl, parent);
-+	continue_at_nobarrier(cl, fn, wq);
-+}
-+
-+#define __closure_wait_event(waitlist, _cond)				\
-+do {									\
-+	struct closure cl;						\
-+									\
-+	closure_init_stack(&cl);					\
-+									\
-+	while (1) {							\
-+		closure_wait(waitlist, &cl);				\
-+		if (_cond)						\
-+			break;						\
-+		closure_sync(&cl);					\
-+	}								\
-+	closure_wake_up(waitlist);					\
-+	closure_sync(&cl);						\
-+} while (0)
-+
-+#define closure_wait_event(waitlist, _cond)				\
-+do {									\
-+	if (!(_cond))							\
-+		__closure_wait_event(waitlist, _cond);			\
-+} while (0)
-+
-+#endif /* _LINUX_CLOSURE_H */
-diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h
-index c8f03d2969df..6165f4f769b6 100644
---- a/include/linux/compiler_attributes.h
-+++ b/include/linux/compiler_attributes.h
-@@ -271,4 +271,9 @@
-  */
- #define __weak                          __attribute__((__weak__))
- 
-+/*
-+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-flatten-function-attribute
-+ */
-+#define __flatten __attribute__((flatten))
-+
- #endif /* __LINUX_COMPILER_ATTRIBUTES_H */
-diff --git a/include/linux/dcache.h b/include/linux/dcache.h
-index a81f0c3cf352..053e33f5afd9 100644
---- a/include/linux/dcache.h
-+++ b/include/linux/dcache.h
-@@ -256,6 +256,7 @@ extern struct dentry * d_make_root(struct inode *);
- /* <clickety>-<click> the ramfs-type tree */
- extern void d_genocide(struct dentry *);
- 
-+extern void d_mark_tmpfile(struct dentry *, struct inode *);
- extern void d_tmpfile(struct dentry *, struct inode *);
- 
- extern struct dentry *d_find_alias(struct inode *);
-diff --git a/include/linux/fs.h b/include/linux/fs.h
-index f5abba86107d..a0793e83b266 100644
---- a/include/linux/fs.h
-+++ b/include/linux/fs.h
-@@ -3088,6 +3088,7 @@ extern struct inode *find_inode_rcu(struct super_block *, unsigned long,
- extern struct inode *find_inode_by_ino_rcu(struct super_block *, unsigned long);
- extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *);
- extern int insert_inode_locked(struct inode *);
-+extern struct inode *insert_inode_locked2(struct inode *);
- #ifdef CONFIG_DEBUG_LOCK_ALLOC
- extern void lockdep_annotate_inode_mutex_key(struct inode *inode);
- #else
-diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
-index cf2468da68e9..25cadac5e90d 100644
---- a/include/linux/pagemap.h
-+++ b/include/linux/pagemap.h
-@@ -645,10 +645,15 @@ static inline int fault_in_pages_readable(const char __user *uaddr, int size)
- 	return 0;
- }
- 
--int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
--				pgoff_t index, gfp_t gfp_mask);
-+int add_to_page_cache(struct page *page, struct address_space *mapping,
-+		      pgoff_t index, gfp_t gfp_mask);
- int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
- 				pgoff_t index, gfp_t gfp_mask);
-+int add_to_page_cache_lru_vec(struct address_space *mapping,
-+			      struct page **pages,
-+			      unsigned nr_pages,
-+			      pgoff_t offset, gfp_t gfp_mask);
-+
- extern void delete_from_page_cache(struct page *page);
- extern void __delete_from_page_cache(struct page *page, void *shadow);
- int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask);
-@@ -666,22 +671,6 @@ void page_cache_readahead_unbounded(struct address_space *, struct file *,
- 		pgoff_t index, unsigned long nr_to_read,
- 		unsigned long lookahead_count);
- 
--/*
-- * Like add_to_page_cache_locked, but used to add newly allocated pages:
-- * the page is new, so we can just run __SetPageLocked() against it.
-- */
--static inline int add_to_page_cache(struct page *page,
--		struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask)
--{
--	int error;
--
--	__SetPageLocked(page);
--	error = add_to_page_cache_locked(page, mapping, offset, gfp_mask);
--	if (unlikely(error))
--		__ClearPageLocked(page);
--	return error;
--}
--
- /**
-  * struct readahead_control - Describes a readahead request.
-  *
-diff --git a/include/linux/sched.h b/include/linux/sched.h
-index 683372943093..6340de2990ff 100644
---- a/include/linux/sched.h
-+++ b/include/linux/sched.h
-@@ -739,6 +739,7 @@ struct task_struct {
- 
- 	struct mm_struct		*mm;
- 	struct mm_struct		*active_mm;
-+	struct address_space		*faults_disabled_mapping;
- 
- 	/* Per-thread vma caching: */
- 	struct vmacache			vmacache;
-diff --git a/include/linux/six.h b/include/linux/six.h
-new file mode 100644
-index 000000000000..a16e94f482e9
---- /dev/null
-+++ b/include/linux/six.h
-@@ -0,0 +1,197 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+
-+#ifndef _LINUX_SIX_H
-+#define _LINUX_SIX_H
-+
-+/*
-+ * Shared/intent/exclusive locks: sleepable read/write locks, much like rw
-+ * semaphores, except with a third intermediate state, intent. Basic operations
-+ * are:
-+ *
-+ * six_lock_read(&foo->lock);
-+ * six_unlock_read(&foo->lock);
-+ *
-+ * six_lock_intent(&foo->lock);
-+ * six_unlock_intent(&foo->lock);
-+ *
-+ * six_lock_write(&foo->lock);
-+ * six_unlock_write(&foo->lock);
-+ *
-+ * Intent locks block other intent locks, but do not block read locks, and you
-+ * must have an intent lock held before taking a write lock, like so:
-+ *
-+ * six_lock_intent(&foo->lock);
-+ * six_lock_write(&foo->lock);
-+ * six_unlock_write(&foo->lock);
-+ * six_unlock_intent(&foo->lock);
-+ *
-+ * Other operations:
-+ *
-+ *   six_trylock_read()
-+ *   six_trylock_intent()
-+ *   six_trylock_write()
-+ *
-+ *   six_lock_downgrade():	convert from intent to read
-+ *   six_lock_tryupgrade():	attempt to convert from read to intent
-+ *
-+ * Locks also embed a sequence number, which is incremented when the lock is
-+ * locked or unlocked for write. The current sequence number can be grabbed
-+ * while a lock is held from lock->state.seq; then, if you drop the lock you can
-+ * use six_relock_(read|intent_write)(lock, seq) to attempt to retake the lock
-+ * iff it hasn't been locked for write in the meantime.
-+ *
-+ * There are also operations that take the lock type as a parameter, where the
-+ * type is one of SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write:
-+ *
-+ *   six_lock_type(lock, type)
-+ *   six_unlock_type(lock, type)
-+ *   six_relock(lock, type, seq)
-+ *   six_trylock_type(lock, type)
-+ *   six_trylock_convert(lock, from, to)
-+ *
-+ * A lock may be held multiple types by the same thread (for read or intent,
-+ * not write). However, the six locks code does _not_ implement the actual
-+ * recursive checks itself though - rather, if your code (e.g. btree iterator
-+ * code) knows that the current thread already has a lock held, and for the
-+ * correct type, six_lock_increment() may be used to bump up the counter for
-+ * that type - the only effect is that one more call to unlock will be required
-+ * before the lock is unlocked.
-+ */
-+
-+#include <linux/lockdep.h>
-+#include <linux/osq_lock.h>
-+#include <linux/sched.h>
-+#include <linux/types.h>
-+
-+#define SIX_LOCK_SEPARATE_LOCKFNS
-+
-+union six_lock_state {
-+	struct {
-+		atomic64_t	counter;
-+	};
-+
-+	struct {
-+		u64		v;
-+	};
-+
-+	struct {
-+		/* for waitlist_bitnr() */
-+		unsigned long	l;
-+	};
-+
-+	struct {
-+		unsigned	read_lock:28;
-+		unsigned	intent_lock:1;
-+		unsigned	waiters:3;
-+		/*
-+		 * seq works much like in seqlocks: it's incremented every time
-+		 * we lock and unlock for write.
-+		 *
-+		 * If it's odd write lock is held, even unlocked.
-+		 *
-+		 * Thus readers can unlock, and then lock again later iff it
-+		 * hasn't been modified in the meantime.
-+		 */
-+		u32		seq;
-+	};
-+};
-+
-+enum six_lock_type {
-+	SIX_LOCK_read,
-+	SIX_LOCK_intent,
-+	SIX_LOCK_write,
-+};
-+
-+struct six_lock {
-+	union six_lock_state	state;
-+	unsigned		intent_lock_recurse;
-+	struct task_struct	*owner;
-+	struct optimistic_spin_queue osq;
-+
-+	raw_spinlock_t		wait_lock;
-+	struct list_head	wait_list[2];
-+#ifdef CONFIG_DEBUG_LOCK_ALLOC
-+	struct lockdep_map	dep_map;
-+#endif
-+};
-+
-+typedef int (*six_lock_should_sleep_fn)(struct six_lock *lock, void *);
-+
-+static __always_inline void __six_lock_init(struct six_lock *lock,
-+					    const char *name,
-+					    struct lock_class_key *key)
-+{
-+	atomic64_set(&lock->state.counter, 0);
-+	raw_spin_lock_init(&lock->wait_lock);
-+	INIT_LIST_HEAD(&lock->wait_list[SIX_LOCK_read]);
-+	INIT_LIST_HEAD(&lock->wait_list[SIX_LOCK_intent]);
-+#ifdef CONFIG_DEBUG_LOCK_ALLOC
-+	debug_check_no_locks_freed((void *) lock, sizeof(*lock));
-+	lockdep_init_map(&lock->dep_map, name, key, 0);
-+#endif
-+}
-+
-+#define six_lock_init(lock)						\
-+do {									\
-+	static struct lock_class_key __key;				\
-+									\
-+	__six_lock_init((lock), #lock, &__key);				\
-+} while (0)
-+
-+#define __SIX_VAL(field, _v)	(((union six_lock_state) { .field = _v }).v)
-+
-+#define __SIX_LOCK(type)						\
-+bool six_trylock_##type(struct six_lock *);				\
-+bool six_relock_##type(struct six_lock *, u32);				\
-+int six_lock_##type(struct six_lock *, six_lock_should_sleep_fn, void *);\
-+void six_unlock_##type(struct six_lock *);
-+
-+__SIX_LOCK(read)
-+__SIX_LOCK(intent)
-+__SIX_LOCK(write)
-+#undef __SIX_LOCK
-+
-+#define SIX_LOCK_DISPATCH(type, fn, ...)			\
-+	switch (type) {						\
-+	case SIX_LOCK_read:					\
-+		return fn##_read(__VA_ARGS__);			\
-+	case SIX_LOCK_intent:					\
-+		return fn##_intent(__VA_ARGS__);		\
-+	case SIX_LOCK_write:					\
-+		return fn##_write(__VA_ARGS__);			\
-+	default:						\
-+		BUG();						\
-+	}
-+
-+static inline bool six_trylock_type(struct six_lock *lock, enum six_lock_type type)
-+{
-+	SIX_LOCK_DISPATCH(type, six_trylock, lock);
-+}
-+
-+static inline bool six_relock_type(struct six_lock *lock, enum six_lock_type type,
-+				   unsigned seq)
-+{
-+	SIX_LOCK_DISPATCH(type, six_relock, lock, seq);
-+}
-+
-+static inline int six_lock_type(struct six_lock *lock, enum six_lock_type type,
-+				six_lock_should_sleep_fn should_sleep_fn, void *p)
-+{
-+	SIX_LOCK_DISPATCH(type, six_lock, lock, should_sleep_fn, p);
-+}
-+
-+static inline void six_unlock_type(struct six_lock *lock, enum six_lock_type type)
-+{
-+	SIX_LOCK_DISPATCH(type, six_unlock, lock);
-+}
-+
-+void six_lock_downgrade(struct six_lock *);
-+bool six_lock_tryupgrade(struct six_lock *);
-+bool six_trylock_convert(struct six_lock *, enum six_lock_type,
-+			 enum six_lock_type);
-+
-+void six_lock_increment(struct six_lock *, enum six_lock_type);
-+
-+void six_lock_wakeup_all(struct six_lock *);
-+
-+#endif /* _LINUX_SIX_H */
-diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
-index 0221f852a7e1..f81f60d891ac 100644
---- a/include/linux/vmalloc.h
-+++ b/include/linux/vmalloc.h
-@@ -106,6 +106,7 @@ extern void *vzalloc(unsigned long size);
- extern void *vmalloc_user(unsigned long size);
- extern void *vmalloc_node(unsigned long size, int node);
- extern void *vzalloc_node(unsigned long size, int node);
-+extern void *vmalloc_exec(unsigned long size, gfp_t gfp_mask);
- extern void *vmalloc_32(unsigned long size);
- extern void *vmalloc_32_user(unsigned long size);
- extern void *__vmalloc(unsigned long size, gfp_t gfp_mask);
-diff --git a/include/trace/events/bcachefs.h b/include/trace/events/bcachefs.h
-new file mode 100644
-index 000000000000..9b4e8295ed75
---- /dev/null
-+++ b/include/trace/events/bcachefs.h
-@@ -0,0 +1,664 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#undef TRACE_SYSTEM
-+#define TRACE_SYSTEM bcachefs
-+
-+#if !defined(_TRACE_BCACHE_H) || defined(TRACE_HEADER_MULTI_READ)
-+#define _TRACE_BCACHE_H
-+
-+#include <linux/tracepoint.h>
-+
-+DECLARE_EVENT_CLASS(bpos,
-+	TP_PROTO(struct bpos *p),
-+	TP_ARGS(p),
-+
-+	TP_STRUCT__entry(
-+		__field(u64,	inode				)
-+		__field(u64,	offset				)
-+	),
-+
-+	TP_fast_assign(
-+		__entry->inode	= p->inode;
-+		__entry->offset	= p->offset;
-+	),
-+
-+	TP_printk("%llu:%llu", __entry->inode, __entry->offset)
-+);
-+
-+DECLARE_EVENT_CLASS(bkey,
-+	TP_PROTO(const struct bkey *k),
-+	TP_ARGS(k),
-+
-+	TP_STRUCT__entry(
-+		__field(u64,	inode				)
-+		__field(u64,	offset				)
-+		__field(u32,	size				)
-+	),
-+
-+	TP_fast_assign(
-+		__entry->inode	= k->p.inode;
-+		__entry->offset	= k->p.offset;
-+		__entry->size	= k->size;
-+	),
-+
-+	TP_printk("%llu:%llu len %u", __entry->inode,
-+		  __entry->offset, __entry->size)
-+);
-+
-+DECLARE_EVENT_CLASS(bch_fs,
-+	TP_PROTO(struct bch_fs *c),
-+	TP_ARGS(c),
-+
-+	TP_STRUCT__entry(
-+		__array(char,		uuid,	16 )
-+	),
-+
-+	TP_fast_assign(
-+		memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
-+	),
-+
-+	TP_printk("%pU", __entry->uuid)
-+);
-+
-+DECLARE_EVENT_CLASS(bio,
-+	TP_PROTO(struct bio *bio),
-+	TP_ARGS(bio),
-+
-+	TP_STRUCT__entry(
-+		__field(dev_t,		dev			)
-+		__field(sector_t,	sector			)
-+		__field(unsigned int,	nr_sector		)
-+		__array(char,		rwbs,	6		)
-+	),
-+
-+	TP_fast_assign(
-+		__entry->dev		= bio->bi_disk ? bio_dev(bio) : 0;
-+		__entry->sector		= bio->bi_iter.bi_sector;
-+		__entry->nr_sector	= bio->bi_iter.bi_size >> 9;
-+		blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
-+	),
-+
-+	TP_printk("%d,%d  %s %llu + %u",
-+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
-+		  (unsigned long long)__entry->sector, __entry->nr_sector)
-+);
-+
-+/* io.c: */
-+
-+DEFINE_EVENT(bio, read_split,
-+	TP_PROTO(struct bio *bio),
-+	TP_ARGS(bio)
-+);
-+
-+DEFINE_EVENT(bio, read_bounce,
-+	TP_PROTO(struct bio *bio),
-+	TP_ARGS(bio)
-+);
-+
-+DEFINE_EVENT(bio, read_retry,
-+	TP_PROTO(struct bio *bio),
-+	TP_ARGS(bio)
-+);
-+
-+DEFINE_EVENT(bio, promote,
-+	TP_PROTO(struct bio *bio),
-+	TP_ARGS(bio)
-+);
-+
-+/* Journal */
-+
-+DEFINE_EVENT(bch_fs, journal_full,
-+	TP_PROTO(struct bch_fs *c),
-+	TP_ARGS(c)
-+);
-+
-+DEFINE_EVENT(bch_fs, journal_entry_full,
-+	TP_PROTO(struct bch_fs *c),
-+	TP_ARGS(c)
-+);
-+
-+DEFINE_EVENT(bio, journal_write,
-+	TP_PROTO(struct bio *bio),
-+	TP_ARGS(bio)
-+);
-+
-+/* bset.c: */
-+
-+DEFINE_EVENT(bpos, bkey_pack_pos_fail,
-+	TP_PROTO(struct bpos *p),
-+	TP_ARGS(p)
-+);
-+
-+/* Btree */
-+
-+DECLARE_EVENT_CLASS(btree_node,
-+	TP_PROTO(struct bch_fs *c, struct btree *b),
-+	TP_ARGS(c, b),
-+
-+	TP_STRUCT__entry(
-+		__array(char,		uuid,		16	)
-+		__field(u8,		level			)
-+		__field(u8,		id			)
-+		__field(u64,		inode			)
-+		__field(u64,		offset			)
-+	),
-+
-+	TP_fast_assign(
-+		memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
-+		__entry->level		= b->c.level;
-+		__entry->id		= b->c.btree_id;
-+		__entry->inode		= b->key.k.p.inode;
-+		__entry->offset		= b->key.k.p.offset;
-+	),
-+
-+	TP_printk("%pU  %u id %u %llu:%llu",
-+		  __entry->uuid, __entry->level, __entry->id,
-+		  __entry->inode, __entry->offset)
-+);
-+
-+DEFINE_EVENT(btree_node, btree_read,
-+	TP_PROTO(struct bch_fs *c, struct btree *b),
-+	TP_ARGS(c, b)
-+);
-+
-+TRACE_EVENT(btree_write,
-+	TP_PROTO(struct btree *b, unsigned bytes, unsigned sectors),
-+	TP_ARGS(b, bytes, sectors),
-+
-+	TP_STRUCT__entry(
-+		__field(enum btree_node_type,	type)
-+		__field(unsigned,	bytes			)
-+		__field(unsigned,	sectors			)
-+	),
-+
-+	TP_fast_assign(
-+		__entry->type	= btree_node_type(b);
-+		__entry->bytes	= bytes;
-+		__entry->sectors = sectors;
-+	),
-+
-+	TP_printk("bkey type %u bytes %u sectors %u",
-+		  __entry->type , __entry->bytes, __entry->sectors)
-+);
-+
-+DEFINE_EVENT(btree_node, btree_node_alloc,
-+	TP_PROTO(struct bch_fs *c, struct btree *b),
-+	TP_ARGS(c, b)
-+);
-+
-+DEFINE_EVENT(btree_node, btree_node_free,
-+	TP_PROTO(struct bch_fs *c, struct btree *b),
-+	TP_ARGS(c, b)
-+);
-+
-+DEFINE_EVENT(btree_node, btree_node_reap,
-+	TP_PROTO(struct bch_fs *c, struct btree *b),
-+	TP_ARGS(c, b)
-+);
-+
-+DECLARE_EVENT_CLASS(btree_node_cannibalize_lock,
-+	TP_PROTO(struct bch_fs *c),
-+	TP_ARGS(c),
-+
-+	TP_STRUCT__entry(
-+		__array(char,			uuid,	16	)
-+	),
-+
-+	TP_fast_assign(
-+		memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
-+	),
-+
-+	TP_printk("%pU", __entry->uuid)
-+);
-+
-+DEFINE_EVENT(btree_node_cannibalize_lock, btree_node_cannibalize_lock_fail,
-+	TP_PROTO(struct bch_fs *c),
-+	TP_ARGS(c)
-+);
-+
-+DEFINE_EVENT(btree_node_cannibalize_lock, btree_node_cannibalize_lock,
-+	TP_PROTO(struct bch_fs *c),
-+	TP_ARGS(c)
-+);
-+
-+DEFINE_EVENT(btree_node_cannibalize_lock, btree_node_cannibalize,
-+	TP_PROTO(struct bch_fs *c),
-+	TP_ARGS(c)
-+);
-+
-+DEFINE_EVENT(bch_fs, btree_node_cannibalize_unlock,
-+	TP_PROTO(struct bch_fs *c),
-+	TP_ARGS(c)
-+);
-+
-+TRACE_EVENT(btree_reserve_get_fail,
-+	TP_PROTO(struct bch_fs *c, size_t required, struct closure *cl),
-+	TP_ARGS(c, required, cl),
-+
-+	TP_STRUCT__entry(
-+		__array(char,			uuid,	16	)
-+		__field(size_t,			required	)
-+		__field(struct closure *,	cl		)
-+	),
-+
-+	TP_fast_assign(
-+		memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
-+		__entry->required = required;
-+		__entry->cl = cl;
-+	),
-+
-+	TP_printk("%pU required %zu by %p", __entry->uuid,
-+		  __entry->required, __entry->cl)
-+);
-+
-+TRACE_EVENT(btree_insert_key,
-+	TP_PROTO(struct bch_fs *c, struct btree *b, struct bkey_i *k),
-+	TP_ARGS(c, b, k),
-+
-+	TP_STRUCT__entry(
-+		__field(u8,		id			)
-+		__field(u64,		inode			)
-+		__field(u64,		offset			)
-+		__field(u32,		size			)
-+	),
-+
-+	TP_fast_assign(
-+		__entry->id		= b->c.btree_id;
-+		__entry->inode		= k->k.p.inode;
-+		__entry->offset		= k->k.p.offset;
-+		__entry->size		= k->k.size;
-+	),
-+
-+	TP_printk("btree %u: %llu:%llu len %u", __entry->id,
-+		  __entry->inode, __entry->offset, __entry->size)
-+);
-+
-+DEFINE_EVENT(btree_node, btree_split,
-+	TP_PROTO(struct bch_fs *c, struct btree *b),
-+	TP_ARGS(c, b)
-+);
-+
-+DEFINE_EVENT(btree_node, btree_compact,
-+	TP_PROTO(struct bch_fs *c, struct btree *b),
-+	TP_ARGS(c, b)
-+);
-+
-+DEFINE_EVENT(btree_node, btree_merge,
-+	TP_PROTO(struct bch_fs *c, struct btree *b),
-+	TP_ARGS(c, b)
-+);
-+
-+DEFINE_EVENT(btree_node, btree_set_root,
-+	TP_PROTO(struct bch_fs *c, struct btree *b),
-+	TP_ARGS(c, b)
-+);
-+
-+/* Garbage collection */
-+
-+DEFINE_EVENT(btree_node, btree_gc_coalesce,
-+	TP_PROTO(struct bch_fs *c, struct btree *b),
-+	TP_ARGS(c, b)
-+);
-+
-+TRACE_EVENT(btree_gc_coalesce_fail,
-+	TP_PROTO(struct bch_fs *c, int reason),
-+	TP_ARGS(c, reason),
-+
-+	TP_STRUCT__entry(
-+		__field(u8,		reason			)
-+		__array(char,		uuid,	16		)
-+	),
-+
-+	TP_fast_assign(
-+		__entry->reason		= reason;
-+		memcpy(__entry->uuid, c->disk_sb.sb->user_uuid.b, 16);
-+	),
-+
-+	TP_printk("%pU: %u", __entry->uuid, __entry->reason)
-+);
-+
-+DEFINE_EVENT(btree_node, btree_gc_rewrite_node,
-+	TP_PROTO(struct bch_fs *c, struct btree *b),
-+	TP_ARGS(c, b)
-+);
-+
-+DEFINE_EVENT(btree_node, btree_gc_rewrite_node_fail,
-+	TP_PROTO(struct bch_fs *c, struct btree *b),
-+	TP_ARGS(c, b)
-+);
-+
-+DEFINE_EVENT(bch_fs, gc_start,
-+	TP_PROTO(struct bch_fs *c),
-+	TP_ARGS(c)
-+);
-+
-+DEFINE_EVENT(bch_fs, gc_end,
-+	TP_PROTO(struct bch_fs *c),
-+	TP_ARGS(c)
-+);
-+
-+DEFINE_EVENT(bch_fs, gc_coalesce_start,
-+	TP_PROTO(struct bch_fs *c),
-+	TP_ARGS(c)
-+);
-+
-+DEFINE_EVENT(bch_fs, gc_coalesce_end,
-+	TP_PROTO(struct bch_fs *c),
-+	TP_ARGS(c)
-+);
-+
-+DEFINE_EVENT(bch_fs, gc_cannot_inc_gens,
-+	TP_PROTO(struct bch_fs *c),
-+	TP_ARGS(c)
-+);
-+
-+/* Allocator */
-+
-+TRACE_EVENT(alloc_batch,
-+	TP_PROTO(struct bch_dev *ca, size_t free, size_t total),
-+	TP_ARGS(ca, free, total),
-+
-+	TP_STRUCT__entry(
-+		__array(char,		uuid,	16	)
-+		__field(size_t,		free		)
-+		__field(size_t,		total		)
-+	),
-+
-+	TP_fast_assign(
-+		memcpy(__entry->uuid, ca->uuid.b, 16);
-+		__entry->free = free;
-+		__entry->total = total;
-+	),
-+
-+	TP_printk("%pU free %zu total %zu",
-+		__entry->uuid, __entry->free, __entry->total)
-+);
-+
-+TRACE_EVENT(invalidate,
-+	TP_PROTO(struct bch_dev *ca, u64 offset, unsigned sectors),
-+	TP_ARGS(ca, offset, sectors),
-+
-+	TP_STRUCT__entry(
-+		__field(unsigned,	sectors			)
-+		__field(dev_t,		dev			)
-+		__field(__u64,		offset			)
-+	),
-+
-+	TP_fast_assign(
-+		__entry->dev		= ca->disk_sb.bdev->bd_dev;
-+		__entry->offset		= offset,
-+		__entry->sectors	= sectors;
-+	),
-+
-+	TP_printk("invalidated %u sectors at %d,%d sector=%llu",
-+		  __entry->sectors, MAJOR(__entry->dev),
-+		  MINOR(__entry->dev), __entry->offset)
-+);
-+
-+DEFINE_EVENT(bch_fs, rescale_prios,
-+	TP_PROTO(struct bch_fs *c),
-+	TP_ARGS(c)
-+);
-+
-+DECLARE_EVENT_CLASS(bucket_alloc,
-+	TP_PROTO(struct bch_dev *ca, enum alloc_reserve reserve),
-+	TP_ARGS(ca, reserve),
-+
-+	TP_STRUCT__entry(
-+		__array(char,			uuid,	16)
-+		__field(enum alloc_reserve,	reserve	  )
-+	),
-+
-+	TP_fast_assign(
-+		memcpy(__entry->uuid, ca->uuid.b, 16);
-+		__entry->reserve = reserve;
-+	),
-+
-+	TP_printk("%pU reserve %d", __entry->uuid, __entry->reserve)
-+);
-+
-+DEFINE_EVENT(bucket_alloc, bucket_alloc,
-+	TP_PROTO(struct bch_dev *ca, enum alloc_reserve reserve),
-+	TP_ARGS(ca, reserve)
-+);
-+
-+DEFINE_EVENT(bucket_alloc, bucket_alloc_fail,
-+	TP_PROTO(struct bch_dev *ca, enum alloc_reserve reserve),
-+	TP_ARGS(ca, reserve)
-+);
-+
-+DEFINE_EVENT(bucket_alloc, open_bucket_alloc_fail,
-+	TP_PROTO(struct bch_dev *ca, enum alloc_reserve reserve),
-+	TP_ARGS(ca, reserve)
-+);
-+
-+/* Moving IO */
-+
-+DEFINE_EVENT(bkey, move_extent,
-+	TP_PROTO(const struct bkey *k),
-+	TP_ARGS(k)
-+);
-+
-+DEFINE_EVENT(bkey, move_alloc_fail,
-+	TP_PROTO(const struct bkey *k),
-+	TP_ARGS(k)
-+);
-+
-+DEFINE_EVENT(bkey, move_race,
-+	TP_PROTO(const struct bkey *k),
-+	TP_ARGS(k)
-+);
-+
-+TRACE_EVENT(move_data,
-+	TP_PROTO(struct bch_fs *c, u64 sectors_moved,
-+		 u64 keys_moved),
-+	TP_ARGS(c, sectors_moved, keys_moved),
-+
-+	TP_STRUCT__entry(
-+		__array(char,		uuid,	16	)
-+		__field(u64,		sectors_moved	)
-+		__field(u64,		keys_moved	)
-+	),
-+
-+	TP_fast_assign(
-+		memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
-+		__entry->sectors_moved = sectors_moved;
-+		__entry->keys_moved = keys_moved;
-+	),
-+
-+	TP_printk("%pU sectors_moved %llu keys_moved %llu",
-+		__entry->uuid, __entry->sectors_moved, __entry->keys_moved)
-+);
-+
-+TRACE_EVENT(copygc,
-+	TP_PROTO(struct bch_fs *c,
-+		 u64 sectors_moved, u64 sectors_not_moved,
-+		 u64 buckets_moved, u64 buckets_not_moved),
-+	TP_ARGS(c,
-+		sectors_moved, sectors_not_moved,
-+		buckets_moved, buckets_not_moved),
-+
-+	TP_STRUCT__entry(
-+		__array(char,		uuid,	16		)
-+		__field(u64,		sectors_moved		)
-+		__field(u64,		sectors_not_moved	)
-+		__field(u64,		buckets_moved		)
-+		__field(u64,		buckets_not_moved	)
-+	),
-+
-+	TP_fast_assign(
-+		memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
-+		__entry->sectors_moved		= sectors_moved;
-+		__entry->sectors_not_moved	= sectors_not_moved;
-+		__entry->buckets_moved		= buckets_moved;
-+		__entry->buckets_not_moved = buckets_moved;
-+	),
-+
-+	TP_printk("%pU sectors moved %llu remain %llu buckets moved %llu remain %llu",
-+		__entry->uuid,
-+		__entry->sectors_moved, __entry->sectors_not_moved,
-+		__entry->buckets_moved, __entry->buckets_not_moved)
-+);
-+
-+TRACE_EVENT(transaction_restart_ip,
-+	TP_PROTO(unsigned long caller, unsigned long ip),
-+	TP_ARGS(caller, ip),
-+
-+	TP_STRUCT__entry(
-+		__field(unsigned long,		caller	)
-+		__field(unsigned long,		ip	)
-+	),
-+
-+	TP_fast_assign(
-+		__entry->caller	= caller;
-+		__entry->ip	= ip;
-+	),
-+
-+	TP_printk("%pF %pF", (void *) __entry->caller, (void *) __entry->ip)
-+);
-+
-+DECLARE_EVENT_CLASS(transaction_restart,
-+	TP_PROTO(unsigned long ip),
-+	TP_ARGS(ip),
-+
-+	TP_STRUCT__entry(
-+		__field(unsigned long,		ip	)
-+	),
-+
-+	TP_fast_assign(
-+		__entry->ip = ip;
-+	),
-+
-+	TP_printk("%pf", (void *) __entry->ip)
-+);
-+
-+DEFINE_EVENT(transaction_restart,	trans_restart_btree_node_reused,
-+	TP_PROTO(unsigned long ip),
-+	TP_ARGS(ip)
-+);
-+
-+DEFINE_EVENT(transaction_restart,	trans_restart_would_deadlock,
-+	TP_PROTO(unsigned long ip),
-+	TP_ARGS(ip)
-+);
-+
-+TRACE_EVENT(trans_restart_iters_realloced,
-+	TP_PROTO(unsigned long ip, unsigned nr),
-+	TP_ARGS(ip, nr),
-+
-+	TP_STRUCT__entry(
-+		__field(unsigned long,		ip	)
-+		__field(unsigned,		nr	)
-+	),
-+
-+	TP_fast_assign(
-+		__entry->ip	= ip;
-+		__entry->nr	= nr;
-+	),
-+
-+	TP_printk("%pf nr %u", (void *) __entry->ip, __entry->nr)
-+);
-+
-+TRACE_EVENT(trans_restart_mem_realloced,
-+	TP_PROTO(unsigned long ip, unsigned long bytes),
-+	TP_ARGS(ip, bytes),
-+
-+	TP_STRUCT__entry(
-+		__field(unsigned long,		ip	)
-+		__field(unsigned long,		bytes	)
-+	),
-+
-+	TP_fast_assign(
-+		__entry->ip	= ip;
-+		__entry->bytes	= bytes;
-+	),
-+
-+	TP_printk("%pf bytes %lu", (void *) __entry->ip, __entry->bytes)
-+);
-+
-+DEFINE_EVENT(transaction_restart,	trans_restart_journal_res_get,
-+	TP_PROTO(unsigned long ip),
-+	TP_ARGS(ip)
-+);
-+
-+DEFINE_EVENT(transaction_restart,	trans_restart_journal_preres_get,
-+	TP_PROTO(unsigned long ip),
-+	TP_ARGS(ip)
-+);
-+
-+DEFINE_EVENT(transaction_restart,	trans_restart_mark_replicas,
-+	TP_PROTO(unsigned long ip),
-+	TP_ARGS(ip)
-+);
-+
-+DEFINE_EVENT(transaction_restart,	trans_restart_fault_inject,
-+	TP_PROTO(unsigned long ip),
-+	TP_ARGS(ip)
-+);
-+
-+DEFINE_EVENT(transaction_restart,	trans_restart_btree_node_split,
-+	TP_PROTO(unsigned long ip),
-+	TP_ARGS(ip)
-+);
-+
-+DEFINE_EVENT(transaction_restart,	trans_restart_mark,
-+	TP_PROTO(unsigned long ip),
-+	TP_ARGS(ip)
-+);
-+
-+DEFINE_EVENT(transaction_restart,	trans_restart_upgrade,
-+	TP_PROTO(unsigned long ip),
-+	TP_ARGS(ip)
-+);
-+
-+DEFINE_EVENT(transaction_restart,	trans_restart_iter_upgrade,
-+	TP_PROTO(unsigned long ip),
-+	TP_ARGS(ip)
-+);
-+
-+DEFINE_EVENT(transaction_restart,	trans_restart_traverse,
-+	TP_PROTO(unsigned long ip),
-+	TP_ARGS(ip)
-+);
-+
-+DEFINE_EVENT(transaction_restart,	trans_restart_atomic,
-+	TP_PROTO(unsigned long ip),
-+	TP_ARGS(ip)
-+);
-+
-+DECLARE_EVENT_CLASS(node_lock_fail,
-+	TP_PROTO(unsigned level, u32 iter_seq, unsigned node, u32 node_seq),
-+	TP_ARGS(level, iter_seq, node, node_seq),
-+
-+	TP_STRUCT__entry(
-+		__field(u32,		level)
-+		__field(u32,		iter_seq)
-+		__field(u32,		node)
-+		__field(u32,		node_seq)
-+	),
-+
-+	TP_fast_assign(
-+		__entry->level		= level;
-+		__entry->iter_seq	= iter_seq;
-+		__entry->node		= node;
-+		__entry->node_seq	= node_seq;
-+	),
-+
-+	TP_printk("level %u iter seq %u node %u node seq %u",
-+		  __entry->level, __entry->iter_seq,
-+		  __entry->node, __entry->node_seq)
-+);
-+
-+DEFINE_EVENT(node_lock_fail, node_upgrade_fail,
-+	TP_PROTO(unsigned level, u32 iter_seq, unsigned node, u32 node_seq),
-+	TP_ARGS(level, iter_seq, node, node_seq)
-+);
-+
-+DEFINE_EVENT(node_lock_fail, node_relock_fail,
-+	TP_PROTO(unsigned level, u32 iter_seq, unsigned node, u32 node_seq),
-+	TP_ARGS(level, iter_seq, node, node_seq)
-+);
-+
-+#endif /* _TRACE_BCACHE_H */
-+
-+/* This part must be outside protection */
-+#include <trace/define_trace.h>
-diff --git a/init/init_task.c b/init/init_task.c
-index 15089d15010a..61d969e94569 100644
---- a/init/init_task.c
-+++ b/init/init_task.c
-@@ -83,6 +83,7 @@ struct task_struct init_task
- 	.nr_cpus_allowed= NR_CPUS,
- 	.mm		= NULL,
- 	.active_mm	= &init_mm,
-+	.faults_disabled_mapping = NULL,
- 	.restart_block	= {
- 		.fn = do_no_restart_syscall,
- 	},
-diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
-index 3de8fd11873b..ab8aa082ce56 100644
---- a/kernel/Kconfig.locks
-+++ b/kernel/Kconfig.locks
-@@ -259,3 +259,6 @@ config ARCH_HAS_MMIOWB
- config MMIOWB
- 	def_bool y if ARCH_HAS_MMIOWB
- 	depends on SMP
-+
-+config SIXLOCKS
-+	bool
-diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
-index 6d11cfb9b41f..4c13937e8f37 100644
---- a/kernel/locking/Makefile
-+++ b/kernel/locking/Makefile
-@@ -32,3 +32,4 @@ obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o
- obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o
- obj-$(CONFIG_WW_MUTEX_SELFTEST) += test-ww_mutex.o
- obj-$(CONFIG_LOCK_EVENT_COUNTS) += lock_events.o
-+obj-$(CONFIG_SIXLOCKS) += six.o
-diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h
-index baca699b94e9..4abb462d914d 100644
---- a/kernel/locking/lockdep_internals.h
-+++ b/kernel/locking/lockdep_internals.h
-@@ -96,7 +96,7 @@ static const unsigned long LOCKF_USED_IN_IRQ_READ =
- #else
- #define MAX_LOCKDEP_ENTRIES	32768UL
- 
--#define MAX_LOCKDEP_CHAINS_BITS	16
-+#define MAX_LOCKDEP_CHAINS_BITS	18
- 
- /*
-  * Stack-trace: tightly packed array of stack backtrace
-@@ -114,7 +114,7 @@ static const unsigned long LOCKF_USED_IN_IRQ_READ =
- 
- #define MAX_LOCKDEP_CHAINS	(1UL << MAX_LOCKDEP_CHAINS_BITS)
- 
--#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5)
-+#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*10)
- 
- extern struct list_head all_lock_classes;
- extern struct lock_chain lock_chains[];
-diff --git a/kernel/locking/six.c b/kernel/locking/six.c
-new file mode 100644
-index 000000000000..49d46ed2e18e
---- /dev/null
-+++ b/kernel/locking/six.c
-@@ -0,0 +1,553 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include <linux/export.h>
-+#include <linux/log2.h>
-+#include <linux/preempt.h>
-+#include <linux/rcupdate.h>
-+#include <linux/sched.h>
-+#include <linux/sched/rt.h>
-+#include <linux/six.h>
-+
-+#ifdef DEBUG
-+#define EBUG_ON(cond)		BUG_ON(cond)
-+#else
-+#define EBUG_ON(cond)		do {} while (0)
-+#endif
-+
-+#define six_acquire(l, t)	lock_acquire(l, 0, t, 0, 0, NULL, _RET_IP_)
-+#define six_release(l)		lock_release(l, _RET_IP_)
-+
-+struct six_lock_vals {
-+	/* Value we add to the lock in order to take the lock: */
-+	u64			lock_val;
-+
-+	/* If the lock has this value (used as a mask), taking the lock fails: */
-+	u64			lock_fail;
-+
-+	/* Value we add to the lock in order to release the lock: */
-+	u64			unlock_val;
-+
-+	/* Mask that indicates lock is held for this type: */
-+	u64			held_mask;
-+
-+	/* Waitlist we wakeup when releasing the lock: */
-+	enum six_lock_type	unlock_wakeup;
-+};
-+
-+#define __SIX_LOCK_HELD_read	__SIX_VAL(read_lock, ~0)
-+#define __SIX_LOCK_HELD_intent	__SIX_VAL(intent_lock, ~0)
-+#define __SIX_LOCK_HELD_write	__SIX_VAL(seq, 1)
-+
-+#define LOCK_VALS {							\
-+	[SIX_LOCK_read] = {						\
-+		.lock_val	= __SIX_VAL(read_lock, 1),		\
-+		.lock_fail	= __SIX_LOCK_HELD_write,		\
-+		.unlock_val	= -__SIX_VAL(read_lock, 1),		\
-+		.held_mask	= __SIX_LOCK_HELD_read,			\
-+		.unlock_wakeup	= SIX_LOCK_write,			\
-+	},								\
-+	[SIX_LOCK_intent] = {						\
-+		.lock_val	= __SIX_VAL(intent_lock, 1),		\
-+		.lock_fail	= __SIX_LOCK_HELD_intent,		\
-+		.unlock_val	= -__SIX_VAL(intent_lock, 1),		\
-+		.held_mask	= __SIX_LOCK_HELD_intent,		\
-+		.unlock_wakeup	= SIX_LOCK_intent,			\
-+	},								\
-+	[SIX_LOCK_write] = {						\
-+		.lock_val	= __SIX_VAL(seq, 1),			\
-+		.lock_fail	= __SIX_LOCK_HELD_read,			\
-+		.unlock_val	= __SIX_VAL(seq, 1),			\
-+		.held_mask	= __SIX_LOCK_HELD_write,		\
-+		.unlock_wakeup	= SIX_LOCK_read,			\
-+	},								\
-+}
-+
-+static inline void six_set_owner(struct six_lock *lock, enum six_lock_type type,
-+				 union six_lock_state old)
-+{
-+	if (type != SIX_LOCK_intent)
-+		return;
-+
-+	if (!old.intent_lock) {
-+		EBUG_ON(lock->owner);
-+		lock->owner = current;
-+	} else {
-+		EBUG_ON(lock->owner != current);
-+	}
-+}
-+
-+static __always_inline bool do_six_trylock_type(struct six_lock *lock,
-+						enum six_lock_type type)
-+{
-+	const struct six_lock_vals l[] = LOCK_VALS;
-+	union six_lock_state old;
-+	u64 v = READ_ONCE(lock->state.v);
-+
-+	EBUG_ON(type == SIX_LOCK_write && lock->owner != current);
-+
-+	do {
-+		old.v = v;
-+
-+		EBUG_ON(type == SIX_LOCK_write &&
-+			((old.v & __SIX_LOCK_HELD_write) ||
-+			 !(old.v & __SIX_LOCK_HELD_intent)));
-+
-+		if (old.v & l[type].lock_fail)
-+			return false;
-+	} while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
-+				old.v,
-+				old.v + l[type].lock_val)) != old.v);
-+
-+	six_set_owner(lock, type, old);
-+	return true;
-+}
-+
-+__always_inline __flatten
-+static bool __six_trylock_type(struct six_lock *lock, enum six_lock_type type)
-+{
-+	if (!do_six_trylock_type(lock, type))
-+		return false;
-+
-+	if (type != SIX_LOCK_write)
-+		six_acquire(&lock->dep_map, 1);
-+	return true;
-+}
-+
-+__always_inline __flatten
-+static bool __six_relock_type(struct six_lock *lock, enum six_lock_type type,
-+			      unsigned seq)
-+{
-+	const struct six_lock_vals l[] = LOCK_VALS;
-+	union six_lock_state old;
-+	u64 v = READ_ONCE(lock->state.v);
-+
-+	do {
-+		old.v = v;
-+
-+		if (old.seq != seq || old.v & l[type].lock_fail)
-+			return false;
-+	} while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
-+				old.v,
-+				old.v + l[type].lock_val)) != old.v);
-+
-+	six_set_owner(lock, type, old);
-+	if (type != SIX_LOCK_write)
-+		six_acquire(&lock->dep_map, 1);
-+	return true;
-+}
-+
-+struct six_lock_waiter {
-+	struct list_head	list;
-+	struct task_struct	*task;
-+};
-+
-+/* This is probably up there with the more evil things I've done */
-+#define waitlist_bitnr(id) ilog2((((union six_lock_state) { .waiters = 1 << (id) }).l))
-+
-+#ifdef CONFIG_LOCK_SPIN_ON_OWNER
-+
-+static inline int six_can_spin_on_owner(struct six_lock *lock)
-+{
-+	struct task_struct *owner;
-+	int retval = 1;
-+
-+	if (need_resched())
-+		return 0;
-+
-+	rcu_read_lock();
-+	owner = READ_ONCE(lock->owner);
-+	if (owner)
-+		retval = owner->on_cpu;
-+	rcu_read_unlock();
-+	/*
-+	 * if lock->owner is not set, the mutex owner may have just acquired
-+	 * it and not set the owner yet or the mutex has been released.
-+	 */
-+	return retval;
-+}
-+
-+static inline bool six_spin_on_owner(struct six_lock *lock,
-+				     struct task_struct *owner)
-+{
-+	bool ret = true;
-+
-+	rcu_read_lock();
-+	while (lock->owner == owner) {
-+		/*
-+		 * Ensure we emit the owner->on_cpu, dereference _after_
-+		 * checking lock->owner still matches owner. If that fails,
-+		 * owner might point to freed memory. If it still matches,
-+		 * the rcu_read_lock() ensures the memory stays valid.
-+		 */
-+		barrier();
-+
-+		if (!owner->on_cpu || need_resched()) {
-+			ret = false;
-+			break;
-+		}
-+
-+		cpu_relax();
-+	}
-+	rcu_read_unlock();
-+
-+	return ret;
-+}
-+
-+static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
-+{
-+	struct task_struct *task = current;
-+
-+	if (type == SIX_LOCK_write)
-+		return false;
-+
-+	preempt_disable();
-+	if (!six_can_spin_on_owner(lock))
-+		goto fail;
-+
-+	if (!osq_lock(&lock->osq))
-+		goto fail;
-+
-+	while (1) {
-+		struct task_struct *owner;
-+
-+		/*
-+		 * If there's an owner, wait for it to either
-+		 * release the lock or go to sleep.
-+		 */
-+		owner = READ_ONCE(lock->owner);
-+		if (owner && !six_spin_on_owner(lock, owner))
-+			break;
-+
-+		if (do_six_trylock_type(lock, type)) {
-+			osq_unlock(&lock->osq);
-+			preempt_enable();
-+			return true;
-+		}
-+
-+		/*
-+		 * When there's no owner, we might have preempted between the
-+		 * owner acquiring the lock and setting the owner field. If
-+		 * we're an RT task that will live-lock because we won't let
-+		 * the owner complete.
-+		 */
-+		if (!owner && (need_resched() || rt_task(task)))
-+			break;
-+
-+		/*
-+		 * The cpu_relax() call is a compiler barrier which forces
-+		 * everything in this loop to be re-loaded. We don't need
-+		 * memory barriers as we'll eventually observe the right
-+		 * values at the cost of a few extra spins.
-+		 */
-+		cpu_relax();
-+	}
-+
-+	osq_unlock(&lock->osq);
-+fail:
-+	preempt_enable();
-+
-+	/*
-+	 * If we fell out of the spin path because of need_resched(),
-+	 * reschedule now, before we try-lock again. This avoids getting
-+	 * scheduled out right after we obtained the lock.
-+	 */
-+	if (need_resched())
-+		schedule();
-+
-+	return false;
-+}
-+
-+#else /* CONFIG_LOCK_SPIN_ON_OWNER */
-+
-+static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
-+{
-+	return false;
-+}
-+
-+#endif
-+
-+noinline
-+static int __six_lock_type_slowpath(struct six_lock *lock, enum six_lock_type type,
-+				    six_lock_should_sleep_fn should_sleep_fn, void *p)
-+{
-+	const struct six_lock_vals l[] = LOCK_VALS;
-+	union six_lock_state old, new;
-+	struct six_lock_waiter wait;
-+	int ret = 0;
-+	u64 v;
-+
-+	ret = should_sleep_fn ? should_sleep_fn(lock, p) : 0;
-+	if (ret)
-+		return ret;
-+
-+	if (six_optimistic_spin(lock, type))
-+		return 0;
-+
-+	lock_contended(&lock->dep_map, _RET_IP_);
-+
-+	INIT_LIST_HEAD(&wait.list);
-+	wait.task = current;
-+
-+	while (1) {
-+		set_current_state(TASK_UNINTERRUPTIBLE);
-+		if (type == SIX_LOCK_write)
-+			EBUG_ON(lock->owner != current);
-+		else if (list_empty_careful(&wait.list)) {
-+			raw_spin_lock(&lock->wait_lock);
-+			list_add_tail(&wait.list, &lock->wait_list[type]);
-+			raw_spin_unlock(&lock->wait_lock);
-+		}
-+
-+		ret = should_sleep_fn ? should_sleep_fn(lock, p) : 0;
-+		if (ret)
-+			break;
-+
-+		v = READ_ONCE(lock->state.v);
-+		do {
-+			new.v = old.v = v;
-+
-+			if (!(old.v & l[type].lock_fail))
-+				new.v += l[type].lock_val;
-+			else if (!(new.waiters & (1 << type)))
-+				new.waiters |= 1 << type;
-+			else
-+				break; /* waiting bit already set */
-+		} while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
-+					old.v, new.v)) != old.v);
-+
-+		if (!(old.v & l[type].lock_fail))
-+			break;
-+
-+		schedule();
-+	}
-+
-+	if (!ret)
-+		six_set_owner(lock, type, old);
-+
-+	__set_current_state(TASK_RUNNING);
-+
-+	if (!list_empty_careful(&wait.list)) {
-+		raw_spin_lock(&lock->wait_lock);
-+		list_del_init(&wait.list);
-+		raw_spin_unlock(&lock->wait_lock);
-+	}
-+
-+	return ret;
-+}
-+
-+__always_inline
-+static int __six_lock_type(struct six_lock *lock, enum six_lock_type type,
-+			   six_lock_should_sleep_fn should_sleep_fn, void *p)
-+{
-+	int ret;
-+
-+	if (type != SIX_LOCK_write)
-+		six_acquire(&lock->dep_map, 0);
-+
-+	ret = do_six_trylock_type(lock, type) ? 0
-+		: __six_lock_type_slowpath(lock, type, should_sleep_fn, p);
-+
-+	if (ret && type != SIX_LOCK_write)
-+		six_release(&lock->dep_map);
-+	if (!ret)
-+		lock_acquired(&lock->dep_map, _RET_IP_);
-+
-+	return ret;
-+}
-+
-+static inline void six_lock_wakeup(struct six_lock *lock,
-+				   union six_lock_state state,
-+				   unsigned waitlist_id)
-+{
-+	struct list_head *wait_list = &lock->wait_list[waitlist_id];
-+	struct six_lock_waiter *w, *next;
-+
-+	if (waitlist_id == SIX_LOCK_write && state.read_lock)
-+		return;
-+
-+	if (!(state.waiters & (1 << waitlist_id)))
-+		return;
-+
-+	clear_bit(waitlist_bitnr(waitlist_id),
-+		  (unsigned long *) &lock->state.v);
-+
-+	if (waitlist_id == SIX_LOCK_write) {
-+		struct task_struct *p = READ_ONCE(lock->owner);
-+
-+		if (p)
-+			wake_up_process(p);
-+		return;
-+	}
-+
-+	raw_spin_lock(&lock->wait_lock);
-+
-+	list_for_each_entry_safe(w, next, wait_list, list) {
-+		list_del_init(&w->list);
-+
-+		if (wake_up_process(w->task) &&
-+		    waitlist_id != SIX_LOCK_read) {
-+			if (!list_empty(wait_list))
-+				set_bit(waitlist_bitnr(waitlist_id),
-+					(unsigned long *) &lock->state.v);
-+			break;
-+		}
-+	}
-+
-+	raw_spin_unlock(&lock->wait_lock);
-+}
-+
-+__always_inline __flatten
-+static void __six_unlock_type(struct six_lock *lock, enum six_lock_type type)
-+{
-+	const struct six_lock_vals l[] = LOCK_VALS;
-+	union six_lock_state state;
-+
-+	EBUG_ON(!(lock->state.v & l[type].held_mask));
-+	EBUG_ON(type == SIX_LOCK_write &&
-+		!(lock->state.v & __SIX_LOCK_HELD_intent));
-+
-+	if (type != SIX_LOCK_write)
-+		six_release(&lock->dep_map);
-+
-+	if (type == SIX_LOCK_intent) {
-+		EBUG_ON(lock->owner != current);
-+
-+		if (lock->intent_lock_recurse) {
-+			--lock->intent_lock_recurse;
-+			return;
-+		}
-+
-+		lock->owner = NULL;
-+	}
-+
-+	state.v = atomic64_add_return_release(l[type].unlock_val,
-+					      &lock->state.counter);
-+	six_lock_wakeup(lock, state, l[type].unlock_wakeup);
-+}
-+
-+#define __SIX_LOCK(type)						\
-+bool six_trylock_##type(struct six_lock *lock)				\
-+{									\
-+	return __six_trylock_type(lock, SIX_LOCK_##type);		\
-+}									\
-+EXPORT_SYMBOL_GPL(six_trylock_##type);					\
-+									\
-+bool six_relock_##type(struct six_lock *lock, u32 seq)			\
-+{									\
-+	return __six_relock_type(lock, SIX_LOCK_##type, seq);		\
-+}									\
-+EXPORT_SYMBOL_GPL(six_relock_##type);					\
-+									\
-+int six_lock_##type(struct six_lock *lock,				\
-+		    six_lock_should_sleep_fn should_sleep_fn, void *p)	\
-+{									\
-+	return __six_lock_type(lock, SIX_LOCK_##type, should_sleep_fn, p);\
-+}									\
-+EXPORT_SYMBOL_GPL(six_lock_##type);					\
-+									\
-+void six_unlock_##type(struct six_lock *lock)				\
-+{									\
-+	__six_unlock_type(lock, SIX_LOCK_##type);			\
-+}									\
-+EXPORT_SYMBOL_GPL(six_unlock_##type);
-+
-+__SIX_LOCK(read)
-+__SIX_LOCK(intent)
-+__SIX_LOCK(write)
-+
-+#undef __SIX_LOCK
-+
-+/* Convert from intent to read: */
-+void six_lock_downgrade(struct six_lock *lock)
-+{
-+	six_lock_increment(lock, SIX_LOCK_read);
-+	six_unlock_intent(lock);
-+}
-+EXPORT_SYMBOL_GPL(six_lock_downgrade);
-+
-+bool six_lock_tryupgrade(struct six_lock *lock)
-+{
-+	const struct six_lock_vals l[] = LOCK_VALS;
-+	union six_lock_state old, new;
-+	u64 v = READ_ONCE(lock->state.v);
-+
-+	do {
-+		new.v = old.v = v;
-+
-+		EBUG_ON(!(old.v & l[SIX_LOCK_read].held_mask));
-+
-+		new.v += l[SIX_LOCK_read].unlock_val;
-+
-+		if (new.v & l[SIX_LOCK_intent].lock_fail)
-+			return false;
-+
-+		new.v += l[SIX_LOCK_intent].lock_val;
-+	} while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
-+				old.v, new.v)) != old.v);
-+
-+	six_set_owner(lock, SIX_LOCK_intent, old);
-+	six_lock_wakeup(lock, new, l[SIX_LOCK_read].unlock_wakeup);
-+
-+	return true;
-+}
-+EXPORT_SYMBOL_GPL(six_lock_tryupgrade);
-+
-+bool six_trylock_convert(struct six_lock *lock,
-+			 enum six_lock_type from,
-+			 enum six_lock_type to)
-+{
-+	EBUG_ON(to == SIX_LOCK_write || from == SIX_LOCK_write);
-+
-+	if (to == from)
-+		return true;
-+
-+	if (to == SIX_LOCK_read) {
-+		six_lock_downgrade(lock);
-+		return true;
-+	} else {
-+		return six_lock_tryupgrade(lock);
-+	}
-+}
-+EXPORT_SYMBOL_GPL(six_trylock_convert);
-+
-+/*
-+ * Increment read/intent lock count, assuming we already have it read or intent
-+ * locked:
-+ */
-+void six_lock_increment(struct six_lock *lock, enum six_lock_type type)
-+{
-+	const struct six_lock_vals l[] = LOCK_VALS;
-+
-+	EBUG_ON(type == SIX_LOCK_write);
-+	six_acquire(&lock->dep_map, 0);
-+
-+	/* XXX: assert already locked, and that we don't overflow: */
-+
-+	switch (type) {
-+	case SIX_LOCK_read:
-+		atomic64_add(l[type].lock_val, &lock->state.counter);
-+		break;
-+	case SIX_LOCK_intent:
-+		lock->intent_lock_recurse++;
-+		break;
-+	case SIX_LOCK_write:
-+		BUG();
-+		break;
-+	}
-+}
-+EXPORT_SYMBOL_GPL(six_lock_increment);
-+
-+void six_lock_wakeup_all(struct six_lock *lock)
-+{
-+	struct six_lock_waiter *w;
-+
-+	raw_spin_lock(&lock->wait_lock);
-+
-+	list_for_each_entry(w, &lock->wait_list[0], list)
-+		wake_up_process(w->task);
-+	list_for_each_entry(w, &lock->wait_list[1], list)
-+		wake_up_process(w->task);
-+
-+	raw_spin_unlock(&lock->wait_lock);
-+}
-+EXPORT_SYMBOL_GPL(six_lock_wakeup_all);
-diff --git a/kernel/module.c b/kernel/module.c
-index aa183c9ac0a2..fdfe519a0393 100644
---- a/kernel/module.c
-+++ b/kernel/module.c
-@@ -2786,9 +2786,7 @@ static void dynamic_debug_remove(struct module *mod, struct _ddebug *debug)
- 
- void * __weak module_alloc(unsigned long size)
- {
--	return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
--			GFP_KERNEL, PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS,
--			NUMA_NO_NODE, __builtin_return_address(0));
-+	return vmalloc_exec(size, GFP_KERNEL);
- }
- 
- bool __weak module_init_section(const char *name)
-diff --git a/lib/Kconfig b/lib/Kconfig
-index df3f3da95990..086d332ab5c8 100644
---- a/lib/Kconfig
-+++ b/lib/Kconfig
-@@ -457,6 +457,9 @@ config ASSOCIATIVE_ARRAY
- 
- 	  for more information.
- 
-+config CLOSURES
-+	bool
-+
- config HAS_IOMEM
- 	bool
- 	depends on !NO_IOMEM
-diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
-index 9ad9210d70a1..51558639ee91 100644
---- a/lib/Kconfig.debug
-+++ b/lib/Kconfig.debug
-@@ -1466,6 +1466,15 @@ config DEBUG_CREDENTIALS
- 
- source "kernel/rcu/Kconfig.debug"
- 
-+config DEBUG_CLOSURES
-+	bool "Debug closures (bcache async widgits)"
-+	depends on CLOSURES
-+	select DEBUG_FS
-+	help
-+	Keeps all active closures in a linked list and provides a debugfs
-+	interface to list them, which makes it possible to see asynchronous
-+	operations that get stuck.
-+
- config DEBUG_WQ_FORCE_RR_CPU
- 	bool "Force round-robin CPU selection for unbound work items"
- 	depends on DEBUG_KERNEL
-diff --git a/lib/Makefile b/lib/Makefile
-index b1c42c10073b..7d6921a5c823 100644
---- a/lib/Makefile
-+++ b/lib/Makefile
-@@ -208,6 +208,8 @@ obj-$(CONFIG_ATOMIC64_SELFTEST) += atomic64_test.o
- 
- obj-$(CONFIG_CPU_RMAP) += cpu_rmap.o
- 
-+obj-$(CONFIG_CLOSURES) += closure.o
-+
- obj-$(CONFIG_DQL) += dynamic_queue_limits.o
- 
- obj-$(CONFIG_GLOB) += glob.o
-diff --git a/lib/closure.c b/lib/closure.c
-new file mode 100644
-index 000000000000..3e6366c26209
---- /dev/null
-+++ b/lib/closure.c
-@@ -0,0 +1,214 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Asynchronous refcounty things
-+ *
-+ * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
-+ * Copyright 2012 Google, Inc.
-+ */
-+
-+#include <linux/closure.h>
-+#include <linux/debugfs.h>
-+#include <linux/export.h>
-+#include <linux/seq_file.h>
-+#include <linux/sched/debug.h>
-+
-+static inline void closure_put_after_sub(struct closure *cl, int flags)
-+{
-+	int r = flags & CLOSURE_REMAINING_MASK;
-+
-+	BUG_ON(flags & CLOSURE_GUARD_MASK);
-+	BUG_ON(!r && (flags & ~CLOSURE_DESTRUCTOR));
-+
-+	if (!r) {
-+		if (cl->fn && !(flags & CLOSURE_DESTRUCTOR)) {
-+			atomic_set(&cl->remaining,
-+				   CLOSURE_REMAINING_INITIALIZER);
-+			closure_queue(cl);
-+		} else {
-+			struct closure *parent = cl->parent;
-+			closure_fn *destructor = cl->fn;
-+
-+			closure_debug_destroy(cl);
-+
-+			if (destructor)
-+				destructor(cl);
-+
-+			if (parent)
-+				closure_put(parent);
-+		}
-+	}
-+}
-+
-+/* For clearing flags with the same atomic op as a put */
-+void closure_sub(struct closure *cl, int v)
-+{
-+	closure_put_after_sub(cl, atomic_sub_return(v, &cl->remaining));
-+}
-+EXPORT_SYMBOL(closure_sub);
-+
-+/*
-+ * closure_put - decrement a closure's refcount
-+ */
-+void closure_put(struct closure *cl)
-+{
-+	closure_put_after_sub(cl, atomic_dec_return(&cl->remaining));
-+}
-+EXPORT_SYMBOL(closure_put);
-+
-+/*
-+ * closure_wake_up - wake up all closures on a wait list, without memory barrier
-+ */
-+void __closure_wake_up(struct closure_waitlist *wait_list)
-+{
-+	struct llist_node *list;
-+	struct closure *cl, *t;
-+	struct llist_node *reverse = NULL;
-+
-+	list = llist_del_all(&wait_list->list);
-+
-+	/* We first reverse the list to preserve FIFO ordering and fairness */
-+	reverse = llist_reverse_order(list);
-+
-+	/* Then do the wakeups */
-+	llist_for_each_entry_safe(cl, t, reverse, list) {
-+		closure_set_waiting(cl, 0);
-+		closure_sub(cl, CLOSURE_WAITING + 1);
-+	}
-+}
-+EXPORT_SYMBOL(__closure_wake_up);
-+
-+/**
-+ * closure_wait - add a closure to a waitlist
-+ * @waitlist: will own a ref on @cl, which will be released when
-+ * closure_wake_up() is called on @waitlist.
-+ * @cl: closure pointer.
-+ *
-+ */
-+bool closure_wait(struct closure_waitlist *waitlist, struct closure *cl)
-+{
-+	if (atomic_read(&cl->remaining) & CLOSURE_WAITING)
-+		return false;
-+
-+	closure_set_waiting(cl, _RET_IP_);
-+	atomic_add(CLOSURE_WAITING + 1, &cl->remaining);
-+	llist_add(&cl->list, &waitlist->list);
-+
-+	return true;
-+}
-+EXPORT_SYMBOL(closure_wait);
-+
-+struct closure_syncer {
-+	struct task_struct	*task;
-+	int			done;
-+};
-+
-+static void closure_sync_fn(struct closure *cl)
-+{
-+	struct closure_syncer *s = cl->s;
-+	struct task_struct *p;
-+
-+	rcu_read_lock();
-+	p = READ_ONCE(s->task);
-+	s->done = 1;
-+	wake_up_process(p);
-+	rcu_read_unlock();
-+}
-+
-+void __sched __closure_sync(struct closure *cl)
-+{
-+	struct closure_syncer s = { .task = current };
-+
-+	cl->s = &s;
-+	continue_at(cl, closure_sync_fn, NULL);
-+
-+	while (1) {
-+		set_current_state(TASK_UNINTERRUPTIBLE);
-+		if (s.done)
-+			break;
-+		schedule();
-+	}
-+
-+	__set_current_state(TASK_RUNNING);
-+}
-+EXPORT_SYMBOL(__closure_sync);
-+
-+#ifdef CONFIG_DEBUG_CLOSURES
-+
-+static LIST_HEAD(closure_list);
-+static DEFINE_SPINLOCK(closure_list_lock);
-+
-+void closure_debug_create(struct closure *cl)
-+{
-+	unsigned long flags;
-+
-+	BUG_ON(cl->magic == CLOSURE_MAGIC_ALIVE);
-+	cl->magic = CLOSURE_MAGIC_ALIVE;
-+
-+	spin_lock_irqsave(&closure_list_lock, flags);
-+	list_add(&cl->all, &closure_list);
-+	spin_unlock_irqrestore(&closure_list_lock, flags);
-+}
-+EXPORT_SYMBOL(closure_debug_create);
-+
-+void closure_debug_destroy(struct closure *cl)
-+{
-+	unsigned long flags;
-+
-+	BUG_ON(cl->magic != CLOSURE_MAGIC_ALIVE);
-+	cl->magic = CLOSURE_MAGIC_DEAD;
-+
-+	spin_lock_irqsave(&closure_list_lock, flags);
-+	list_del(&cl->all);
-+	spin_unlock_irqrestore(&closure_list_lock, flags);
-+}
-+EXPORT_SYMBOL(closure_debug_destroy);
-+
-+static int debug_seq_show(struct seq_file *f, void *data)
-+{
-+	struct closure *cl;
-+
-+	spin_lock_irq(&closure_list_lock);
-+
-+	list_for_each_entry(cl, &closure_list, all) {
-+		int r = atomic_read(&cl->remaining);
-+
-+		seq_printf(f, "%p: %pS -> %pS p %p r %i ",
-+			   cl, (void *) cl->ip, cl->fn, cl->parent,
-+			   r & CLOSURE_REMAINING_MASK);
-+
-+		seq_printf(f, "%s%s\n",
-+			   test_bit(WORK_STRUCT_PENDING_BIT,
-+				    work_data_bits(&cl->work)) ? "Q" : "",
-+			   r & CLOSURE_RUNNING	? "R" : "");
-+
-+		if (r & CLOSURE_WAITING)
-+			seq_printf(f, " W %pS\n",
-+				   (void *) cl->waiting_on);
-+
-+		seq_puts(f, "\n");
-+	}
-+
-+	spin_unlock_irq(&closure_list_lock);
-+	return 0;
-+}
-+
-+static int debug_seq_open(struct inode *inode, struct file *file)
-+{
-+	return single_open(file, debug_seq_show, NULL);
-+}
-+
-+static const struct file_operations debug_ops = {
-+	.owner		= THIS_MODULE,
-+	.open		= debug_seq_open,
-+	.read		= seq_read,
-+	.release	= single_release
-+};
-+
-+static int __init closure_debug_init(void)
-+{
-+	debugfs_create_file("closures", 0400, NULL, NULL, &debug_ops);
-+	return 0;
-+}
-+late_initcall(closure_debug_init)
-+
-+#endif
-diff --git a/mm/filemap.c b/mm/filemap.c
-index 385759c4ce4b..5ca0ff7b9357 100644
---- a/mm/filemap.c
-+++ b/mm/filemap.c
-@@ -116,6 +116,69 @@
-  *   ->tasklist_lock            (memory_failure, collect_procs_ao)
-  */
- 
-+static int page_cache_tree_insert_vec(struct page *pages[],
-+				      unsigned nr_pages,
-+				      struct address_space *mapping,
-+				      pgoff_t index,
-+				      gfp_t gfp_mask,
-+				      void *shadow[])
-+{
-+	XA_STATE(xas, &mapping->i_pages, index);
-+	void *old;
-+	int i = 0, error = 0;
-+
-+	mapping_set_update(&xas, mapping);
-+
-+	if (!nr_pages)
-+		return 0;
-+
-+	xa_lock_irq(&mapping->i_pages);
-+
-+	while (1) {
-+		old = xas_load(&xas);
-+		if (old && !xa_is_value(old)) {
-+			error = -EEXIST;
-+			break;
-+		}
-+
-+		xas_store(&xas, pages[i]);
-+		error = xas_error(&xas);
-+
-+		if (error == -ENOMEM) {
-+			xa_unlock_irq(&mapping->i_pages);
-+			if (xas_nomem(&xas, gfp_mask & GFP_RECLAIM_MASK))
-+				error = 0;
-+			xa_lock_irq(&mapping->i_pages);
-+
-+			if (!error)
-+				continue;
-+			break;
-+		}
-+
-+		if (error)
-+			break;
-+
-+		if (shadow)
-+			shadow[i] = old;
-+		if (xa_is_value(old))
-+			mapping->nrexceptional--;
-+		mapping->nrpages++;
-+
-+		/* hugetlb pages do not participate in page cache accounting. */
-+		if (!PageHuge(pages[i]))
-+			__inc_lruvec_page_state(pages[i], NR_FILE_PAGES);
-+
-+		if (++i == nr_pages)
-+			break;
-+
-+		xas_next(&xas);
-+	}
-+
-+	xa_unlock_irq(&mapping->i_pages);
-+
-+	return i ?: error;
-+}
-+
- static void page_cache_delete(struct address_space *mapping,
- 				   struct page *page, void *shadow)
- {
-@@ -826,114 +889,147 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
- }
- EXPORT_SYMBOL_GPL(replace_page_cache_page);
- 
--static int __add_to_page_cache_locked(struct page *page,
--				      struct address_space *mapping,
--				      pgoff_t offset, gfp_t gfp_mask,
--				      void **shadowp)
-+static int add_to_page_cache_vec(struct page **pages, unsigned nr_pages,
-+				 struct address_space *mapping,
-+				 pgoff_t index, gfp_t gfp_mask,
-+				 void *shadow[])
- {
--	XA_STATE(xas, &mapping->i_pages, offset);
--	int huge = PageHuge(page);
--	int error;
--	void *old;
-+	int i, nr_added = 0, error = 0;
- 
--	VM_BUG_ON_PAGE(!PageLocked(page), page);
--	VM_BUG_ON_PAGE(PageSwapBacked(page), page);
--	mapping_set_update(&xas, mapping);
-+	for (i = 0; i < nr_pages; i++) {
-+		struct page *page = pages[i];
- 
--	get_page(page);
--	page->mapping = mapping;
--	page->index = offset;
-+		VM_BUG_ON_PAGE(PageSwapBacked(page), page);
-+		VM_BUG_ON_PAGE(PageSwapCache(page), page);
- 
--	if (!huge) {
--		error = mem_cgroup_charge(page, current->mm, gfp_mask);
--		if (error)
--			goto error;
-+		__SetPageLocked(page);
-+		get_page(page);
-+		page->mapping = mapping;
-+		page->index = index + i;
-+
-+		if (!PageHuge(page)) {
-+			error = mem_cgroup_charge(page, current->mm, gfp_mask);
-+			if (error) {
-+				page->mapping = NULL;
-+				/* Leave page->index set: truncation relies upon it */
-+				put_page(page);
-+				__ClearPageLocked(page);
-+				if (!i)
-+					return error;
-+				nr_pages = i;
-+				break;
-+			}
-+		}
- 	}
- 
--	do {
--		xas_lock_irq(&xas);
--		old = xas_load(&xas);
--		if (old && !xa_is_value(old))
--			xas_set_err(&xas, -EEXIST);
--		xas_store(&xas, page);
--		if (xas_error(&xas))
--			goto unlock;
-+	error = page_cache_tree_insert_vec(pages, nr_pages, mapping,
-+					   index, gfp_mask, shadow);
-+	if (error > 0) {
-+		nr_added = error;
-+		error = 0;
-+	}
- 
--		if (xa_is_value(old)) {
--			mapping->nrexceptional--;
--			if (shadowp)
--				*shadowp = old;
--		}
--		mapping->nrpages++;
-+	for (i = 0; i < nr_added; i++)
-+		trace_mm_filemap_add_to_page_cache(pages[i]);
- 
--		/* hugetlb pages do not participate in page cache accounting */
--		if (!huge)
--			__inc_lruvec_page_state(page, NR_FILE_PAGES);
--unlock:
--		xas_unlock_irq(&xas);
--	} while (xas_nomem(&xas, gfp_mask & GFP_RECLAIM_MASK));
-+	for (i = nr_added; i < nr_pages; i++) {
-+		struct page *page = pages[i];
- 
--	if (xas_error(&xas)) {
--		error = xas_error(&xas);
--		goto error;
-+		/* Leave page->index set: truncation relies upon it */
-+		page->mapping = NULL;
-+		put_page(page);
-+		__ClearPageLocked(page);
- 	}
- 
--	trace_mm_filemap_add_to_page_cache(page);
--	return 0;
--error:
--	page->mapping = NULL;
--	/* Leave page->index set: truncation relies upon it */
--	put_page(page);
--	return error;
-+	return nr_added ?: error;
- }
--ALLOW_ERROR_INJECTION(__add_to_page_cache_locked, ERRNO);
- 
- /**
-- * add_to_page_cache_locked - add a locked page to the pagecache
-+ * add_to_page_cache - add a newly allocated page to the pagecache
-  * @page:	page to add
-  * @mapping:	the page's address_space
-  * @offset:	page index
-  * @gfp_mask:	page allocation mode
-  *
-- * This function is used to add a page to the pagecache. It must be locked.
-- * This function does not add the page to the LRU.  The caller must do that.
-+ * This function is used to add a page to the pagecache. It must be newly
-+ * allocated.  This function does not add the page to the LRU.  The caller must
-+ * do that.
-  *
-  * Return: %0 on success, negative error code otherwise.
-  */
--int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
--		pgoff_t offset, gfp_t gfp_mask)
-+int add_to_page_cache(struct page *page, struct address_space *mapping,
-+		      pgoff_t offset, gfp_t gfp_mask)
- {
--	return __add_to_page_cache_locked(page, mapping, offset,
--					  gfp_mask, NULL);
-+	int ret = add_to_page_cache_vec(&page, 1, mapping, offset,
-+					gfp_mask, NULL);
-+	if (ret < 0)
-+		return ret;
-+	return 0;
- }
--EXPORT_SYMBOL(add_to_page_cache_locked);
-+EXPORT_SYMBOL(add_to_page_cache);
-+ALLOW_ERROR_INJECTION(add_to_page_cache, ERRNO);
- 
--int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
--				pgoff_t offset, gfp_t gfp_mask)
-+int add_to_page_cache_lru_vec(struct address_space *mapping,
-+			      struct page **pages,
-+			      unsigned nr_pages,
-+			      pgoff_t offset, gfp_t gfp_mask)
- {
--	void *shadow = NULL;
--	int ret;
-+	void *shadow_stack[8], **shadow = shadow_stack;
-+	int i, ret = 0, err = 0, nr_added;
-+
-+	if (nr_pages > ARRAY_SIZE(shadow_stack)) {
-+		shadow = kmalloc_array(nr_pages, sizeof(void *), gfp_mask);
-+		if (!shadow)
-+			goto slowpath;
-+	}
-+
-+	for (i = 0; i < nr_pages; i++)
-+		VM_BUG_ON_PAGE(PageActive(pages[i]), pages[i]);
-+
-+	ret = add_to_page_cache_vec(pages, nr_pages, mapping,
-+				    offset, gfp_mask, shadow);
-+	nr_added = ret > 0 ? ret : 0;
-+
-+	/*
-+	 * The page might have been evicted from cache only recently, in which
-+	 * case it should be activated like any other repeatedly accessed page.
-+	 * The exception is pages getting rewritten; evicting other data from
-+	 * the working set, only to cache data that will get overwritten with
-+	 * something else, is a waste of memory.
-+	 */
-+	for (i = 0; i < nr_added; i++) {
-+		struct page *page = pages[i];
-+		void *s = shadow[i];
- 
--	__SetPageLocked(page);
--	ret = __add_to_page_cache_locked(page, mapping, offset,
--					 gfp_mask, &shadow);
--	if (unlikely(ret))
--		__ClearPageLocked(page);
--	else {
--		/*
--		 * The page might have been evicted from cache only
--		 * recently, in which case it should be activated like
--		 * any other repeatedly accessed page.
--		 * The exception is pages getting rewritten; evicting other
--		 * data from the working set, only to cache data that will
--		 * get overwritten with something else, is a waste of memory.
--		 */
- 		WARN_ON_ONCE(PageActive(page));
--		if (!(gfp_mask & __GFP_WRITE) && shadow)
--			workingset_refault(page, shadow);
-+		if (!(gfp_mask & __GFP_WRITE) && s)
-+			workingset_refault(page, s);
- 		lru_cache_add(page);
- 	}
-+
-+	if (shadow != shadow_stack)
-+		kfree(shadow);
-+
- 	return ret;
-+slowpath:
-+	for (i = 0; i < nr_pages; i++) {
-+		err = add_to_page_cache_lru(pages[i], mapping,
-+					    offset + i, gfp_mask);
-+		if (err)
-+			break;
-+	}
-+
-+	return i ?: err;
-+}
-+EXPORT_SYMBOL_GPL(add_to_page_cache_lru_vec);
-+
-+int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
-+				pgoff_t offset, gfp_t gfp_mask)
-+{
-+	int ret = add_to_page_cache_lru_vec(mapping, &page, 1, offset, gfp_mask);
-+	if (ret < 0)
-+		return ret;
-+	return 0;
- }
- EXPORT_SYMBOL_GPL(add_to_page_cache_lru);
- 
-@@ -1824,6 +1920,7 @@ unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start,
- 
- 	return ret;
- }
-+EXPORT_SYMBOL(find_get_pages_range);
- 
- /**
-  * find_get_pages_contig - gang contiguous pagecache lookup
-@@ -1972,6 +2069,244 @@ static void shrink_readahead_size_eio(struct file_ra_state *ra)
- 	ra->ra_pages /= 4;
- }
- 
-+static struct page *
-+generic_file_buffered_read_readpage(struct file *filp,
-+				    struct address_space *mapping,
-+				    struct page *page)
-+{
-+	struct file_ra_state *ra = &filp->f_ra;
-+	int error;
-+
-+	/*
-+	 * A previous I/O error may have been due to temporary
-+	 * failures, eg. multipath errors.
-+	 * PG_error will be set again if readpage fails.
-+	 */
-+	ClearPageError(page);
-+	/* Start the actual read. The read will unlock the page. */
-+	error = mapping->a_ops->readpage(filp, page);
-+
-+	if (unlikely(error)) {
-+		put_page(page);
-+		return error != AOP_TRUNCATED_PAGE ? ERR_PTR(error) : NULL;
-+	}
-+
-+	if (!PageUptodate(page)) {
-+		error = lock_page_killable(page);
-+		if (unlikely(error)) {
-+			put_page(page);
-+			return ERR_PTR(error);
-+		}
-+		if (!PageUptodate(page)) {
-+			if (page->mapping == NULL) {
-+				/*
-+				 * invalidate_mapping_pages got it
-+				 */
-+				unlock_page(page);
-+				put_page(page);
-+				return NULL;
-+			}
-+			unlock_page(page);
-+			shrink_readahead_size_eio(ra);
-+			put_page(page);
-+			return ERR_PTR(-EIO);
-+		}
-+		unlock_page(page);
-+	}
-+
-+	return page;
-+}
-+
-+static struct page *
-+generic_file_buffered_read_pagenotuptodate(struct kiocb *iocb,
-+					   struct file *filp,
-+					   struct iov_iter *iter,
-+					   struct page *page,
-+					   loff_t pos, loff_t count)
-+{
-+	struct address_space *mapping = filp->f_mapping;
-+	struct inode *inode = mapping->host;
-+	int error;
-+
-+	/*
-+	 * See comment in do_read_cache_page on why
-+	 * wait_on_page_locked is used to avoid unnecessarily
-+	 * serialisations and why it's safe.
-+	 */
-+	error = wait_on_page_locked_killable(page);
-+	if (unlikely(error)) {
-+		put_page(page);
-+		return ERR_PTR(error);
-+	}
-+
-+	if (PageUptodate(page))
-+		return page;
-+
-+	if (inode->i_blkbits == PAGE_SHIFT ||
-+	    !mapping->a_ops->is_partially_uptodate)
-+		goto page_not_up_to_date;
-+	/* pipes can't handle partially uptodate pages */
-+	if (unlikely(iov_iter_is_pipe(iter)))
-+		goto page_not_up_to_date;
-+	if (!trylock_page(page))
-+		goto page_not_up_to_date;
-+	/* Did it get truncated before we got the lock? */
-+	if (!page->mapping)
-+		goto page_not_up_to_date_locked;
-+
-+	if (!mapping->a_ops->is_partially_uptodate(page,
-+				pos & ~PAGE_MASK, count))
-+		goto page_not_up_to_date_locked;
-+	unlock_page(page);
-+	return page;
-+
-+page_not_up_to_date:
-+	/* Get exclusive access to the page ... */
-+	error = lock_page_killable(page);
-+	if (unlikely(error)) {
-+		put_page(page);
-+		return ERR_PTR(error);
-+	}
-+
-+page_not_up_to_date_locked:
-+	/* Did it get truncated before we got the lock? */
-+	if (!page->mapping) {
-+		unlock_page(page);
-+		put_page(page);
-+		return NULL;
-+	}
-+
-+	/* Did somebody else fill it already? */
-+	if (PageUptodate(page)) {
-+		unlock_page(page);
-+		return page;
-+	}
-+
-+	if (iocb->ki_flags & IOCB_NOIO) {
-+		unlock_page(page);
-+		put_page(page);
-+		return ERR_PTR(-EAGAIN);
-+	}
-+
-+	return generic_file_buffered_read_readpage(filp, mapping, page);
-+}
-+
-+static struct page *
-+generic_file_buffered_read_no_cached_page(struct kiocb *iocb,
-+					  struct iov_iter *iter)
-+{
-+	struct file *filp = iocb->ki_filp;
-+	struct address_space *mapping = filp->f_mapping;
-+	pgoff_t index = iocb->ki_pos >> PAGE_SHIFT;
-+	struct page *page;
-+	int error;
-+
-+	if (iocb->ki_flags & IOCB_NOIO)
-+		return ERR_PTR(-EAGAIN);
-+
-+	/*
-+	 * Ok, it wasn't cached, so we need to create a new
-+	 * page..
-+	 */
-+	page = page_cache_alloc(mapping);
-+	if (!page)
-+		return ERR_PTR(-ENOMEM);
-+
-+	error = add_to_page_cache_lru(page, mapping, index,
-+				      mapping_gfp_constraint(mapping, GFP_KERNEL));
-+	if (error) {
-+		put_page(page);
-+		return error != -EEXIST ? ERR_PTR(error) : NULL;
-+	}
-+
-+	return generic_file_buffered_read_readpage(filp, mapping, page);
-+}
-+
-+static int generic_file_buffered_read_get_pages(struct kiocb *iocb,
-+						struct iov_iter *iter,
-+						struct page **pages,
-+						unsigned int nr)
-+{
-+	struct file *filp = iocb->ki_filp;
-+	struct address_space *mapping = filp->f_mapping;
-+	struct file_ra_state *ra = &filp->f_ra;
-+	pgoff_t index = iocb->ki_pos >> PAGE_SHIFT;
-+	pgoff_t last_index = (iocb->ki_pos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
-+	int i, j, nr_got, err = 0;
-+
-+	nr = min_t(unsigned long, last_index - index, nr);
-+find_page:
-+	if (fatal_signal_pending(current))
-+		return -EINTR;
-+
-+	nr_got = find_get_pages_contig(mapping, index, nr, pages);
-+	if (nr_got)
-+		goto got_pages;
-+
-+	if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_NOIO))
-+		return -EAGAIN;
-+
-+	page_cache_sync_readahead(mapping, ra, filp, index, last_index - index);
-+
-+	nr_got = find_get_pages_contig(mapping, index, nr, pages);
-+	if (nr_got)
-+		goto got_pages;
-+
-+	pages[0] = generic_file_buffered_read_no_cached_page(iocb, iter);
-+	err = PTR_ERR_OR_ZERO(pages[0]);
-+	if (!IS_ERR_OR_NULL(pages[0]))
-+		nr_got = 1;
-+got_pages:
-+	for (i = 0; i < nr_got; i++) {
-+		struct page *page = pages[i];
-+		pgoff_t pg_index = index + i;
-+		loff_t pg_pos = max(iocb->ki_pos,
-+				    (loff_t) pg_index << PAGE_SHIFT);
-+		loff_t pg_count = iocb->ki_pos + iter->count - pg_pos;
-+
-+		if (PageReadahead(page)) {
-+			if (iocb->ki_flags & IOCB_NOIO) {
-+				for (j = i; j < nr_got; j++)
-+					put_page(pages[j]);
-+				nr_got = i;
-+				err = -EAGAIN;
-+				break;
-+			}
-+			page_cache_async_readahead(mapping, ra, filp, page,
-+					pg_index, last_index - pg_index);
-+		}
-+
-+		if (!PageUptodate(page)) {
-+			if (iocb->ki_flags & IOCB_NOWAIT) {
-+				for (j = i; j < nr_got; j++)
-+					put_page(pages[j]);
-+				nr_got = i;
-+				err = -EAGAIN;
-+				break;
-+			}
-+
-+			page = generic_file_buffered_read_pagenotuptodate(iocb,
-+					filp, iter, page, pg_pos, pg_count);
-+			if (IS_ERR_OR_NULL(page)) {
-+				for (j = i + 1; j < nr_got; j++)
-+					put_page(pages[j]);
-+				nr_got = i;
-+				err = PTR_ERR_OR_ZERO(page);
-+				break;
-+			}
-+		}
-+	}
-+
-+	if (likely(nr_got))
-+		return nr_got;
-+	if (err)
-+		return err;
-+	/*
-+	 * No pages and no error means we raced and should retry:
-+	 */
-+	goto find_page;
-+}
-+
- /**
-  * generic_file_buffered_read - generic file read routine
-  * @iocb:	the iocb to read
-@@ -1992,261 +2327,110 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
- 		struct iov_iter *iter, ssize_t written)
- {
- 	struct file *filp = iocb->ki_filp;
-+	struct file_ra_state *ra = &filp->f_ra;
- 	struct address_space *mapping = filp->f_mapping;
- 	struct inode *inode = mapping->host;
--	struct file_ra_state *ra = &filp->f_ra;
--	loff_t *ppos = &iocb->ki_pos;
--	pgoff_t index;
--	pgoff_t last_index;
--	pgoff_t prev_index;
--	unsigned long offset;      /* offset into pagecache page */
--	unsigned int prev_offset;
--	int error = 0;
--
--	if (unlikely(*ppos >= inode->i_sb->s_maxbytes))
-+	size_t orig_count = iov_iter_count(iter);
-+	struct page *pages_onstack[8], **pages = NULL;
-+	unsigned int nr_pages = min_t(unsigned int, 512,
-+			((iocb->ki_pos + iter->count + PAGE_SIZE - 1) >> PAGE_SHIFT) -
-+			(iocb->ki_pos >> PAGE_SHIFT));
-+	int i, pg_nr, error = 0;
-+	bool writably_mapped;
-+	loff_t isize, end_offset;
-+
-+	if (unlikely(iocb->ki_pos >= inode->i_sb->s_maxbytes))
- 		return 0;
- 	iov_iter_truncate(iter, inode->i_sb->s_maxbytes);
- 
--	index = *ppos >> PAGE_SHIFT;
--	prev_index = ra->prev_pos >> PAGE_SHIFT;
--	prev_offset = ra->prev_pos & (PAGE_SIZE-1);
--	last_index = (*ppos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
--	offset = *ppos & ~PAGE_MASK;
-+	if (nr_pages > ARRAY_SIZE(pages_onstack))
-+		pages = kmalloc_array(nr_pages, sizeof(void *), GFP_KERNEL);
- 
--	for (;;) {
--		struct page *page;
--		pgoff_t end_index;
--		loff_t isize;
--		unsigned long nr, ret;
-+	if (!pages) {
-+		pages = pages_onstack;
-+		nr_pages = min_t(unsigned int, nr_pages, ARRAY_SIZE(pages_onstack));
-+	}
- 
-+	do {
- 		cond_resched();
--find_page:
--		if (fatal_signal_pending(current)) {
--			error = -EINTR;
--			goto out;
--		}
- 
--		page = find_get_page(mapping, index);
--		if (!page) {
--			if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_NOIO))
--				goto would_block;
--			page_cache_sync_readahead(mapping,
--					ra, filp,
--					index, last_index - index);
--			page = find_get_page(mapping, index);
--			if (unlikely(page == NULL))
--				goto no_cached_page;
--		}
--		if (PageReadahead(page)) {
--			if (iocb->ki_flags & IOCB_NOIO) {
--				put_page(page);
--				goto out;
--			}
--			page_cache_async_readahead(mapping,
--					ra, filp, page,
--					index, last_index - index);
-+		i = 0;
-+		pg_nr = generic_file_buffered_read_get_pages(iocb, iter,
-+							 pages, nr_pages);
-+		if (pg_nr < 0) {
-+			error = pg_nr;
-+			break;
- 		}
--		if (!PageUptodate(page)) {
--			if (iocb->ki_flags & IOCB_NOWAIT) {
--				put_page(page);
--				goto would_block;
--			}
- 
--			/*
--			 * See comment in do_read_cache_page on why
--			 * wait_on_page_locked is used to avoid unnecessarily
--			 * serialisations and why it's safe.
--			 */
--			error = wait_on_page_locked_killable(page);
--			if (unlikely(error))
--				goto readpage_error;
--			if (PageUptodate(page))
--				goto page_ok;
--
--			if (inode->i_blkbits == PAGE_SHIFT ||
--					!mapping->a_ops->is_partially_uptodate)
--				goto page_not_up_to_date;
--			/* pipes can't handle partially uptodate pages */
--			if (unlikely(iov_iter_is_pipe(iter)))
--				goto page_not_up_to_date;
--			if (!trylock_page(page))
--				goto page_not_up_to_date;
--			/* Did it get truncated before we got the lock? */
--			if (!page->mapping)
--				goto page_not_up_to_date_locked;
--			if (!mapping->a_ops->is_partially_uptodate(page,
--							offset, iter->count))
--				goto page_not_up_to_date_locked;
--			unlock_page(page);
--		}
--page_ok:
- 		/*
--		 * i_size must be checked after we know the page is Uptodate.
-+		 * i_size must be checked after we know the pages are Uptodate.
- 		 *
- 		 * Checking i_size after the check allows us to calculate
- 		 * the correct value for "nr", which means the zero-filled
- 		 * part of the page is not copied back to userspace (unless
- 		 * another truncate extends the file - this is desired though).
- 		 */
--
- 		isize = i_size_read(inode);
--		end_index = (isize - 1) >> PAGE_SHIFT;
--		if (unlikely(!isize || index > end_index)) {
--			put_page(page);
--			goto out;
--		}
-+		if (unlikely(iocb->ki_pos >= isize))
-+			goto put_pages;
- 
--		/* nr is the maximum number of bytes to copy from this page */
--		nr = PAGE_SIZE;
--		if (index == end_index) {
--			nr = ((isize - 1) & ~PAGE_MASK) + 1;
--			if (nr <= offset) {
--				put_page(page);
--				goto out;
--			}
--		}
--		nr = nr - offset;
-+		end_offset = min_t(loff_t, isize, iocb->ki_pos + iter->count);
- 
--		/* If users can be writing to this page using arbitrary
--		 * virtual addresses, take care about potential aliasing
--		 * before reading the page on the kernel side.
--		 */
--		if (mapping_writably_mapped(mapping))
--			flush_dcache_page(page);
-+		while ((iocb->ki_pos >> PAGE_SHIFT) + pg_nr >
-+		       (end_offset + PAGE_SIZE - 1) >> PAGE_SHIFT)
-+			put_page(pages[--pg_nr]);
- 
- 		/*
--		 * When a sequential read accesses a page several times,
--		 * only mark it as accessed the first time.
-+		 * Once we start copying data, we don't want to be touching any
-+		 * cachelines that might be contended:
- 		 */
--		if (prev_index != index || offset != prev_offset)
--			mark_page_accessed(page);
--		prev_index = index;
-+		writably_mapped = mapping_writably_mapped(mapping);
- 
- 		/*
--		 * Ok, we have the page, and it's up-to-date, so
--		 * now we can copy it to user space...
-+		 * When a sequential read accesses a page several times, only
-+		 * mark it as accessed the first time.
- 		 */
-+		if (iocb->ki_pos >> PAGE_SHIFT !=
-+		    ra->prev_pos >> PAGE_SHIFT)
-+			mark_page_accessed(pages[0]);
-+		for (i = 1; i < pg_nr; i++)
-+			mark_page_accessed(pages[i]);
-+
-+		for (i = 0; i < pg_nr; i++) {
-+			unsigned int offset = iocb->ki_pos & ~PAGE_MASK;
-+			unsigned int bytes = min_t(loff_t, end_offset - iocb->ki_pos,
-+						   PAGE_SIZE - offset);
-+			unsigned int copied;
- 
--		ret = copy_page_to_iter(page, offset, nr, iter);
--		offset += ret;
--		index += offset >> PAGE_SHIFT;
--		offset &= ~PAGE_MASK;
--		prev_offset = offset;
--
--		put_page(page);
--		written += ret;
--		if (!iov_iter_count(iter))
--			goto out;
--		if (ret < nr) {
--			error = -EFAULT;
--			goto out;
--		}
--		continue;
--
--page_not_up_to_date:
--		/* Get exclusive access to the page ... */
--		error = lock_page_killable(page);
--		if (unlikely(error))
--			goto readpage_error;
--
--page_not_up_to_date_locked:
--		/* Did it get truncated before we got the lock? */
--		if (!page->mapping) {
--			unlock_page(page);
--			put_page(page);
--			continue;
--		}
--
--		/* Did somebody else fill it already? */
--		if (PageUptodate(page)) {
--			unlock_page(page);
--			goto page_ok;
--		}
-+			/*
-+			 * If users can be writing to this page using arbitrary
-+			 * virtual addresses, take care about potential aliasing
-+			 * before reading the page on the kernel side.
-+			 */
-+			if (writably_mapped)
-+				flush_dcache_page(pages[i]);
- 
--readpage:
--		if (iocb->ki_flags & IOCB_NOIO) {
--			unlock_page(page);
--			put_page(page);
--			goto would_block;
--		}
--		/*
--		 * A previous I/O error may have been due to temporary
--		 * failures, eg. multipath errors.
--		 * PG_error will be set again if readpage fails.
--		 */
--		ClearPageError(page);
--		/* Start the actual read. The read will unlock the page. */
--		error = mapping->a_ops->readpage(filp, page);
-+			copied = copy_page_to_iter(pages[i], offset, bytes, iter);
- 
--		if (unlikely(error)) {
--			if (error == AOP_TRUNCATED_PAGE) {
--				put_page(page);
--				error = 0;
--				goto find_page;
--			}
--			goto readpage_error;
--		}
-+			iocb->ki_pos += copied;
-+			ra->prev_pos = iocb->ki_pos;
- 
--		if (!PageUptodate(page)) {
--			error = lock_page_killable(page);
--			if (unlikely(error))
--				goto readpage_error;
--			if (!PageUptodate(page)) {
--				if (page->mapping == NULL) {
--					/*
--					 * invalidate_mapping_pages got it
--					 */
--					unlock_page(page);
--					put_page(page);
--					goto find_page;
--				}
--				unlock_page(page);
--				shrink_readahead_size_eio(ra);
--				error = -EIO;
--				goto readpage_error;
-+			if (copied < bytes) {
-+				error = -EFAULT;
-+				break;
- 			}
--			unlock_page(page);
- 		}
-+put_pages:
-+		for (i = 0; i < pg_nr; i++)
-+			put_page(pages[i]);
-+	} while (iov_iter_count(iter) && iocb->ki_pos < isize && !error);
- 
--		goto page_ok;
--
--readpage_error:
--		/* UHHUH! A synchronous read error occurred. Report it */
--		put_page(page);
--		goto out;
--
--no_cached_page:
--		/*
--		 * Ok, it wasn't cached, so we need to create a new
--		 * page..
--		 */
--		page = page_cache_alloc(mapping);
--		if (!page) {
--			error = -ENOMEM;
--			goto out;
--		}
--		error = add_to_page_cache_lru(page, mapping, index,
--				mapping_gfp_constraint(mapping, GFP_KERNEL));
--		if (error) {
--			put_page(page);
--			if (error == -EEXIST) {
--				error = 0;
--				goto find_page;
--			}
--			goto out;
--		}
--		goto readpage;
--	}
-+	file_accessed(filp);
-+	written += orig_count - iov_iter_count(iter);
- 
--would_block:
--	error = -EAGAIN;
--out:
--	ra->prev_pos = prev_index;
--	ra->prev_pos <<= PAGE_SHIFT;
--	ra->prev_pos |= prev_offset;
-+	if (pages != pages_onstack)
-+		kfree(pages);
- 
--	*ppos = ((loff_t)index << PAGE_SHIFT) + offset;
--	file_accessed(filp);
- 	return written ? written : error;
- }
- EXPORT_SYMBOL_GPL(generic_file_buffered_read);
-diff --git a/mm/gup.c b/mm/gup.c
-index 6f47697f8fb0..ccceb6d3e367 100644
---- a/mm/gup.c
-+++ b/mm/gup.c
-@@ -1108,6 +1108,13 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
- 		}
- 		cond_resched();
- 
-+		if (current->faults_disabled_mapping &&
-+		    vma->vm_file &&
-+		    vma->vm_file->f_mapping == current->faults_disabled_mapping) {
-+			ret = -EFAULT;
-+			goto out;
-+		}
-+
- 		page = follow_page_mask(vma, start, foll_flags, &ctx);
- 		if (!page) {
- 			ret = faultin_page(tsk, vma, start, &foll_flags,
-diff --git a/mm/nommu.c b/mm/nommu.c
-index f32a69095d50..f714f339e19b 100644
---- a/mm/nommu.c
-+++ b/mm/nommu.c
-@@ -290,6 +290,24 @@ void *vzalloc_node(unsigned long size, int node)
- }
- EXPORT_SYMBOL(vzalloc_node);
- 
-+/**
-+ *	vmalloc_exec  -  allocate virtually contiguous, executable memory
-+ *	@size:		allocation size
-+ *
-+ *	Kernel-internal function to allocate enough pages to cover @size
-+ *	the page level allocator and map them into contiguous and
-+ *	executable kernel virtual space.
-+ *
-+ *	For tight control over page level allocator and protection flags
-+ *	use __vmalloc() instead.
-+ */
-+
-+void *vmalloc_exec(unsigned long size, gfp_t gfp_mask)
-+{
-+	return __vmalloc(size, gfp_mask);
-+}
-+EXPORT_SYMBOL_GPL(vmalloc_exec);
-+
- /**
-  * vmalloc_32  -  allocate virtually contiguous memory (32bit addressable)
-  *	@size:		allocation size
-diff --git a/mm/page-writeback.c b/mm/page-writeback.c
-index 28b3e7a67565..2aa1e1e4c20b 100644
---- a/mm/page-writeback.c
-+++ b/mm/page-writeback.c
-@@ -2477,20 +2477,19 @@ int __set_page_dirty_nobuffers(struct page *page)
- 	lock_page_memcg(page);
- 	if (!TestSetPageDirty(page)) {
- 		struct address_space *mapping = page_mapping(page);
--		unsigned long flags;
- 
- 		if (!mapping) {
- 			unlock_page_memcg(page);
- 			return 1;
- 		}
- 
--		xa_lock_irqsave(&mapping->i_pages, flags);
-+		xa_lock_irq(&mapping->i_pages);
- 		BUG_ON(page_mapping(page) != mapping);
- 		WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
- 		account_page_dirtied(page, mapping);
- 		__xa_set_mark(&mapping->i_pages, page_index(page),
- 				   PAGECACHE_TAG_DIRTY);
--		xa_unlock_irqrestore(&mapping->i_pages, flags);
-+		xa_unlock_irq(&mapping->i_pages);
- 		unlock_page_memcg(page);
- 
- 		if (mapping->host) {
-diff --git a/mm/vmalloc.c b/mm/vmalloc.c
-index 5a2b55c8dd9a..f296b41e67f0 100644
---- a/mm/vmalloc.c
-+++ b/mm/vmalloc.c
-@@ -2695,6 +2695,27 @@ void *vzalloc_node(unsigned long size, int node)
- }
- EXPORT_SYMBOL(vzalloc_node);
- 
-+/**
-+ * vmalloc_exec - allocate virtually contiguous, executable memory
-+ * @size:	  allocation size
-+ *
-+ * Kernel-internal function to allocate enough pages to cover @size
-+ * the page level allocator and map them into contiguous and
-+ * executable kernel virtual space.
-+ *
-+ * For tight control over page level allocator and protection flags
-+ * use __vmalloc() instead.
-+ *
-+ * Return: pointer to the allocated memory or %NULL on error
-+ */
-+void *vmalloc_exec(unsigned long size, gfp_t gfp_mask)
-+{
-+	return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
-+			gfp_mask, PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS,
-+			NUMA_NO_NODE, __builtin_return_address(0));
-+}
-+EXPORT_SYMBOL_GPL(vmalloc_exec);
-+
- #if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
- #define GFP_VMALLOC32 (GFP_DMA32 | GFP_KERNEL)
- #elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA)
diff --git a/linux58-tkg/linux58-tkg-patches/0009-glitched-bmq.patch b/linux58-tkg/linux58-tkg-patches/0009-glitched-bmq.patch
deleted file mode 100644
index 38666e4..0000000
--- a/linux58-tkg/linux58-tkg-patches/0009-glitched-bmq.patch
+++ /dev/null
@@ -1,90 +0,0 @@
-From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
-From: Tk-Glitch <ti3nou@gmail.com>
-Date: Wed, 4 Jul 2018 04:30:08 +0200
-Subject: glitched - BMQ
-
-diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
-index 2a202a846757..1d9c7ed79b11 100644
---- a/kernel/Kconfig.hz
-+++ b/kernel/Kconfig.hz
-@@ -4,7 +4,7 @@
- 
- choice
- 	prompt "Timer frequency"
--	default HZ_250
-+	default HZ_500
- 	help
- 	 Allows the configuration of the timer frequency. It is customary
- 	 to have the timer interrupt run at 1000 Hz but 100 Hz may be more
-@@ -39,6 +39,13 @@ choice
- 	 on SMP and NUMA systems and exactly dividing by both PAL and
- 	 NTSC frame rates for video and multimedia work.
- 
-+	config HZ_500
-+		bool "500 HZ"
-+	help
-+	 500 Hz is a balanced timer frequency. Provides fast interactivity
-+	 on desktops with great smoothness without increasing CPU power
-+	 consumption and sacrificing the battery life on laptops.
-+
- 	config HZ_1000
- 		bool "1000 HZ"
- 	help
-@@ -52,6 +59,7 @@ config HZ
- 	default 100 if HZ_100
- 	default 250 if HZ_250
- 	default 300 if HZ_300
-+	default 500 if HZ_500
- 	default 1000 if HZ_1000
- 
- config SCHED_HRTICK
-
-diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
-index 2a202a846757..1d9c7ed79b11 100644
---- a/kernel/Kconfig.hz
-+++ b/kernel/Kconfig.hz
-@@ -4,7 +4,7 @@
- 
- choice
- 	prompt "Timer frequency"
--	default HZ_500
-+	default HZ_750
- 	help
- 	 Allows the configuration of the timer frequency. It is customary
- 	 to have the timer interrupt run at 1000 Hz but 100 Hz may be more
-@@ -46,6 +46,13 @@ choice
- 	 on desktops with great smoothness without increasing CPU power
- 	 consumption and sacrificing the battery life on laptops.
- 
-+	config HZ_750
-+		bool "750 HZ"
-+	help
-+	 750 Hz is a good timer frequency for desktops. Provides fast
-+	 interactivity with great smoothness without sacrificing too
-+	 much throughput.
-+
- 	config HZ_1000
- 		bool "1000 HZ"
- 	help
-@@ -60,6 +67,7 @@ config HZ
- 	default 250 if HZ_250
- 	default 300 if HZ_300
- 	default 500 if HZ_500
-+	default 750 if HZ_750
- 	default 1000 if HZ_1000
- 
- config SCHED_HRTICK
-
-diff --git a/mm/vmscan.c b/mm/vmscan.c
-index 9270a4370d54..30d01e647417 100644
---- a/mm/vmscan.c
-+++ b/mm/vmscan.c
-@@ -159,7 +159,7 @@ struct scan_control {
- /*
-  * From 0 .. 100.  Higher means more swappy.
-  */
--int vm_swappiness = 60;
-+int vm_swappiness = 20;
- /*
-  * The total number of pages which are beyond the high watermark within all
-  * zones.
diff --git a/linux58-tkg/linux58-tkg-patches/0009-glitched-ondemand-bmq.patch b/linux58-tkg/linux58-tkg-patches/0009-glitched-ondemand-bmq.patch
deleted file mode 100644
index a926040..0000000
--- a/linux58-tkg/linux58-tkg-patches/0009-glitched-ondemand-bmq.patch
+++ /dev/null
@@ -1,18 +0,0 @@
-diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
-index 6b423eebfd5d..61e3271675d6 100644
---- a/drivers/cpufreq/cpufreq_ondemand.c
-+++ b/drivers/cpufreq/cpufreq_ondemand.c
-@@ -21,10 +21,10 @@
- #include "cpufreq_ondemand.h"
- 
- /* On-demand governor macros */
--#define DEF_FREQUENCY_UP_THRESHOLD		(80)
--#define DEF_SAMPLING_DOWN_FACTOR		(1)
-+#define DEF_FREQUENCY_UP_THRESHOLD		(55)
-+#define DEF_SAMPLING_DOWN_FACTOR		(5)
- #define MAX_SAMPLING_DOWN_FACTOR		(100000)
--#define MICRO_FREQUENCY_UP_THRESHOLD		(95)
-+#define MICRO_FREQUENCY_UP_THRESHOLD		(63)
- #define MICRO_FREQUENCY_MIN_SAMPLE_RATE		(10000)
- #define MIN_FREQUENCY_UP_THRESHOLD		(1)
- #define MAX_FREQUENCY_UP_THRESHOLD		(100)
diff --git a/linux58-tkg/linux58-tkg-patches/0009-prjc_v5.8-r3.patch b/linux58-tkg/linux58-tkg-patches/0009-prjc_v5.8-r3.patch
deleted file mode 100644
index 01bffcd..0000000
--- a/linux58-tkg/linux58-tkg-patches/0009-prjc_v5.8-r3.patch
+++ /dev/null
@@ -1,8582 +0,0 @@
-diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
-index fb95fad81c79..6e3f8233600e 100644
---- a/Documentation/admin-guide/kernel-parameters.txt
-+++ b/Documentation/admin-guide/kernel-parameters.txt
-@@ -4525,6 +4525,12 @@
- 
- 	sbni=		[NET] Granch SBNI12 leased line adapter
- 
-+	sched_timeslice=
-+			[KNL] Time slice in us for BMQ/PDS scheduler.
-+			Format: <int> (must be >= 1000)
-+			Default: 4000
-+			See Documentation/scheduler/sched-BMQ.txt
-+
- 	sched_debug	[KNL] Enables verbose scheduler debug messages.
- 
- 	schedstats=	[KNL,X86] Enable or disable scheduled statistics.
-diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
-index 83acf5025488..313d2124e709 100644
---- a/Documentation/admin-guide/sysctl/kernel.rst
-+++ b/Documentation/admin-guide/sysctl/kernel.rst
-@@ -1428,3 +1428,13 @@ is 10 seconds.
- 
- The softlockup threshold is (``2 * watchdog_thresh``). Setting this
- tunable to zero will disable lockup detection altogether.
-+
-+yield_type:
-+===========
-+
-+BMQ/PDS CPU scheduler only. This determines what type of yield calls
-+to sched_yield will perform.
-+
-+  0 - No yield.
-+  1 - Deboost and requeue task. (default)
-+  2 - Set run queue skip task.
-diff --git a/Documentation/scheduler/sched-BMQ.txt b/Documentation/scheduler/sched-BMQ.txt
-new file mode 100644
-index 000000000000..05c84eec0f31
---- /dev/null
-+++ b/Documentation/scheduler/sched-BMQ.txt
-@@ -0,0 +1,110 @@
-+                         BitMap queue CPU Scheduler
-+                         --------------------------
-+
-+CONTENT
-+========
-+
-+ Background
-+ Design
-+   Overview
-+   Task policy
-+   Priority management
-+   BitMap Queue
-+   CPU Assignment and Migration
-+
-+
-+Background
-+==========
-+
-+BitMap Queue CPU scheduler, referred to as BMQ from here on, is an evolution
-+of previous Priority and Deadline based Skiplist multiple queue scheduler(PDS),
-+and inspired by Zircon scheduler. The goal of it is to keep the scheduler code
-+simple, while efficiency and scalable for interactive tasks, such as desktop,
-+movie playback and gaming etc.
-+
-+Design
-+======
-+
-+Overview
-+--------
-+
-+BMQ use per CPU run queue design, each CPU(logical) has it's own run queue,
-+each CPU is responsible for scheduling the tasks that are putting into it's
-+run queue.
-+
-+The run queue is a set of priority queues. Note that these queues are fifo
-+queue for non-rt tasks or priority queue for rt tasks in data structure. See
-+BitMap Queue below for details. BMQ is optimized for non-rt tasks in the fact
-+that most applications are non-rt tasks. No matter the queue is fifo or
-+priority, In each queue is an ordered list of runnable tasks awaiting execution
-+and the data structures are the same. When it is time for a new task to run,
-+the scheduler simply looks the lowest numbered queueue that contains a task,
-+and runs the first task from the head of that queue. And per CPU idle task is
-+also in the run queue, so the scheduler can always find a task to run on from
-+its run queue.
-+
-+Each task will assigned the same timeslice(default 4ms) when it is picked to
-+start running. Task will be reinserted at the end of the appropriate priority
-+queue when it uses its whole timeslice. When the scheduler selects a new task
-+from the priority queue it sets the CPU's preemption timer for the remainder of
-+the previous timeslice. When that timer fires the scheduler will stop execution
-+on that task, select another task and start over again.
-+
-+If a task blocks waiting for a shared resource then it's taken out of its
-+priority queue and is placed in a wait queue for the shared resource. When it
-+is unblocked it will be reinserted in the appropriate priority queue of an
-+eligible CPU.
-+
-+Task policy
-+-----------
-+
-+BMQ supports DEADLINE, FIFO, RR, NORMAL, BATCH and IDLE task policy like the
-+mainline CFS scheduler. But BMQ is heavy optimized for non-rt task, that's
-+NORMAL/BATCH/IDLE policy tasks. Below is the implementation detail of each
-+policy.
-+
-+DEADLINE
-+	It is squashed as priority 0 FIFO task.
-+
-+FIFO/RR
-+	All RT tasks share one single priority queue in BMQ run queue designed. The
-+complexity of insert operation is O(n). BMQ is not designed for system runs
-+with major rt policy tasks.
-+
-+NORMAL/BATCH/IDLE
-+	BATCH and IDLE tasks are treated as the same policy. They compete CPU with
-+NORMAL policy tasks, but they just don't boost. To control the priority of
-+NORMAL/BATCH/IDLE tasks, simply use nice level.
-+
-+ISO
-+	ISO policy is not supported in BMQ. Please use nice level -20 NORMAL policy
-+task instead.
-+
-+Priority management
-+-------------------
-+
-+RT tasks have priority from 0-99. For non-rt tasks, there are three different
-+factors used to determine the effective priority of a task. The effective
-+priority being what is used to determine which queue it will be in.
-+
-+The first factor is simply the task’s static priority. Which is assigned from
-+task's nice level, within [-20, 19] in userland's point of view and [0, 39]
-+internally.
-+
-+The second factor is the priority boost. This is a value bounded between
-+[-MAX_PRIORITY_ADJ, MAX_PRIORITY_ADJ] used to offset the base priority, it is
-+modified by the following cases:
-+
-+*When a thread has used up its entire timeslice, always deboost its boost by
-+increasing by one.
-+*When a thread gives up cpu control(voluntary or non-voluntary) to reschedule,
-+and its switch-in time(time after last switch and run) below the thredhold
-+based on its priority boost, will boost its boost by decreasing by one buti is
-+capped at 0 (won’t go negative).
-+
-+The intent in this system is to ensure that interactive threads are serviced
-+quickly. These are usually the threads that interact directly with the user
-+and cause user-perceivable latency. These threads usually do little work and
-+spend most of their time blocked awaiting another user event. So they get the
-+priority boost from unblocking while background threads that do most of the
-+processing receive the priority penalty for using their entire timeslice.
-diff --git a/fs/proc/base.c b/fs/proc/base.c
-index d86c0afc8a85..7f394a6fb9b6 100644
---- a/fs/proc/base.c
-+++ b/fs/proc/base.c
-@@ -479,7 +479,7 @@ static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns,
- 		seq_puts(m, "0 0 0\n");
- 	else
- 		seq_printf(m, "%llu %llu %lu\n",
--		   (unsigned long long)task->se.sum_exec_runtime,
-+		   (unsigned long long)tsk_seruntime(task),
- 		   (unsigned long long)task->sched_info.run_delay,
- 		   task->sched_info.pcount);
- 
-diff --git a/include/asm-generic/resource.h b/include/asm-generic/resource.h
-index 8874f681b056..59eb72bf7d5f 100644
---- a/include/asm-generic/resource.h
-+++ b/include/asm-generic/resource.h
-@@ -23,7 +23,7 @@
- 	[RLIMIT_LOCKS]		= {  RLIM_INFINITY,  RLIM_INFINITY },	\
- 	[RLIMIT_SIGPENDING]	= { 		0,	       0 },	\
- 	[RLIMIT_MSGQUEUE]	= {   MQ_BYTES_MAX,   MQ_BYTES_MAX },	\
--	[RLIMIT_NICE]		= { 0, 0 },				\
-+	[RLIMIT_NICE]		= { 30, 30 },				\
- 	[RLIMIT_RTPRIO]		= { 0, 0 },				\
- 	[RLIMIT_RTTIME]		= {  RLIM_INFINITY,  RLIM_INFINITY },	\
- }
-diff --git a/include/linux/sched.h b/include/linux/sched.h
-index 683372943093..d25f2501daf3 100644
---- a/include/linux/sched.h
-+++ b/include/linux/sched.h
-@@ -32,6 +32,7 @@
- #include <linux/posix-timers.h>
- #include <linux/rseq.h>
- #include <linux/kcsan.h>
-+#include <linux/skip_list.h>
- 
- /* task_struct member predeclarations (sorted alphabetically): */
- struct audit_context;
-@@ -650,12 +651,18 @@ struct task_struct {
- 	unsigned int			ptrace;
- 
- #ifdef CONFIG_SMP
--	int				on_cpu;
- 	struct __call_single_node	wake_entry;
-+#endif
-+#if defined(CONFIG_SMP) || defined(CONFIG_SCHED_ALT)
-+	int				on_cpu;
-+#endif
-+
-+#ifdef CONFIG_SMP
- #ifdef CONFIG_THREAD_INFO_IN_TASK
- 	/* Current CPU: */
- 	unsigned int			cpu;
- #endif
-+#ifndef CONFIG_SCHED_ALT
- 	unsigned int			wakee_flips;
- 	unsigned long			wakee_flip_decay_ts;
- 	struct task_struct		*last_wakee;
-@@ -669,6 +676,7 @@ struct task_struct {
- 	 */
- 	int				recent_used_cpu;
- 	int				wake_cpu;
-+#endif /* !CONFIG_SCHED_ALT */
- #endif
- 	int				on_rq;
- 
-@@ -677,13 +685,33 @@ struct task_struct {
- 	int				normal_prio;
- 	unsigned int			rt_priority;
- 
-+#ifdef CONFIG_SCHED_ALT
-+	u64				last_ran;
-+	s64				time_slice;
-+#ifdef CONFIG_SCHED_BMQ
-+	int				boost_prio;
-+	int				bmq_idx;
-+	struct list_head		bmq_node;
-+#endif /* CONFIG_SCHED_BMQ */
-+#ifdef CONFIG_SCHED_PDS
-+	u64				deadline;
-+	u64				priodl;
-+	/* skip list level */
-+	int				sl_level;
-+	/* skip list node */
-+	struct skiplist_node		sl_node;
-+#endif /* CONFIG_SCHED_PDS */
-+	/* sched_clock time spent running */
-+	u64				sched_time;
-+#else /* !CONFIG_SCHED_ALT */
- 	const struct sched_class	*sched_class;
- 	struct sched_entity		se;
- 	struct sched_rt_entity		rt;
-+	struct sched_dl_entity		dl;
-+#endif
- #ifdef CONFIG_CGROUP_SCHED
- 	struct task_group		*sched_task_group;
- #endif
--	struct sched_dl_entity		dl;
- 
- #ifdef CONFIG_UCLAMP_TASK
- 	/* Clamp values requested for a scheduling entity */
-@@ -1326,6 +1354,15 @@ struct task_struct {
- 	 */
- };
- 
-+#ifdef CONFIG_SCHED_ALT
-+#define tsk_seruntime(t)		((t)->sched_time)
-+/* replace the uncertian rt_timeout with 0UL */
-+#define tsk_rttimeout(t)		(0UL)
-+#else /* CFS */
-+#define tsk_seruntime(t)	((t)->se.sum_exec_runtime)
-+#define tsk_rttimeout(t)	((t)->rt.timeout)
-+#endif /* !CONFIG_SCHED_ALT */
-+
- static inline struct pid *task_pid(struct task_struct *task)
- {
- 	return task->thread_pid;
-diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h
-index 1aff00b65f3c..179d77c8360e 100644
---- a/include/linux/sched/deadline.h
-+++ b/include/linux/sched/deadline.h
-@@ -1,5 +1,24 @@
- /* SPDX-License-Identifier: GPL-2.0 */
- 
-+#ifdef CONFIG_SCHED_ALT
-+
-+static inline int dl_task(struct task_struct *p)
-+{
-+	return 0;
-+}
-+
-+#ifdef CONFIG_SCHED_BMQ
-+#define __tsk_deadline(p)	(0UL)
-+#endif
-+
-+#ifdef CONFIG_SCHED_PDS
-+#define __tsk_deadline(p)	((p)->priodl)
-+#endif
-+
-+#else
-+
-+#define __tsk_deadline(p)	((p)->dl.deadline)
-+
- /*
-  * SCHED_DEADLINE tasks has negative priorities, reflecting
-  * the fact that any of them has higher prio than RT and
-@@ -19,6 +38,7 @@ static inline int dl_task(struct task_struct *p)
- {
- 	return dl_prio(p->prio);
- }
-+#endif /* CONFIG_SCHED_ALT */
- 
- static inline bool dl_time_before(u64 a, u64 b)
- {
-diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h
-index 7d64feafc408..42730d27ceb5 100644
---- a/include/linux/sched/prio.h
-+++ b/include/linux/sched/prio.h
-@@ -20,11 +20,20 @@
-  */
- 
- #define MAX_USER_RT_PRIO	100
-+
- #define MAX_RT_PRIO		MAX_USER_RT_PRIO
- 
- #define MAX_PRIO		(MAX_RT_PRIO + NICE_WIDTH)
- #define DEFAULT_PRIO		(MAX_RT_PRIO + NICE_WIDTH / 2)
- 
-+/* +/- priority levels from the base priority */
-+#ifdef CONFIG_SCHED_BMQ
-+#define MAX_PRIORITY_ADJ	7
-+#endif
-+#ifdef CONFIG_SCHED_PDS
-+#define MAX_PRIORITY_ADJ	0
-+#endif
-+
- /*
-  * Convert user-nice values [ -20 ... 0 ... 19 ]
-  * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
-diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
-index e5af028c08b4..0a7565d0d3cf 100644
---- a/include/linux/sched/rt.h
-+++ b/include/linux/sched/rt.h
-@@ -24,8 +24,10 @@ static inline bool task_is_realtime(struct task_struct *tsk)
- 
- 	if (policy == SCHED_FIFO || policy == SCHED_RR)
- 		return true;
-+#ifndef CONFIG_SCHED_ALT
- 	if (policy == SCHED_DEADLINE)
- 		return true;
-+#endif
- 	return false;
- }
- 
-diff --git a/include/linux/skip_list.h b/include/linux/skip_list.h
-new file mode 100644
-index 000000000000..47ca955a451d
---- /dev/null
-+++ b/include/linux/skip_list.h
-@@ -0,0 +1,177 @@
-+/*
-+ * Copyright (C) 2016 Alfred Chen.
-+ *
-+ * Code based on Con Kolivas's skip list implementation for BFS, and
-+ * which is based on example originally by William Pugh.
-+ *
-+ * Skip Lists are a probabilistic alternative to balanced trees, as
-+ * described in the June 1990 issue of CACM and were invented by
-+ * William Pugh in 1987.
-+ *
-+ * A couple of comments about this implementation:
-+ *
-+ * This file only provides a infrastructure of skip list.
-+ *
-+ * skiplist_node is embedded into container data structure, to get rid
-+ * the dependency of kmalloc/kfree operation in scheduler code.
-+ *
-+ * A customized search function should be defined using DEFINE_SKIPLIST_INSERT
-+ * macro and be used for skip list insert operation.
-+ *
-+ * Random Level is also not defined in this file, instead, it should be
-+ * customized implemented and set to node->level then pass to the customized
-+ * skiplist_insert function.
-+ *
-+ * Levels start at zero and go up to (NUM_SKIPLIST_LEVEL -1)
-+ *
-+ * NUM_SKIPLIST_LEVEL in this implementation is 8 instead of origin 16,
-+ * considering that there will be 256 entries to enable the top level when using
-+ * random level p=0.5, and that number is more than enough for a run queue usage
-+ * in a scheduler usage. And it also help to reduce the memory usage of the
-+ * embedded skip list node in task_struct to about 50%.
-+ *
-+ * The insertion routine has been implemented so as to use the
-+ * dirty hack described in the CACM paper: if a random level is
-+ * generated that is more than the current maximum level, the
-+ * current maximum level plus one is used instead.
-+ *
-+ * BFS Notes: In this implementation of skiplists, there are bidirectional
-+ * next/prev pointers and the insert function returns a pointer to the actual
-+ * node the value is stored. The key here is chosen by the scheduler so as to
-+ * sort tasks according to the priority list requirements and is no longer used
-+ * by the scheduler after insertion. The scheduler lookup, however, occurs in
-+ * O(1) time because it is always the first item in the level 0 linked list.
-+ * Since the task struct stores a copy of the node pointer upon skiplist_insert,
-+ * it can also remove it much faster than the original implementation with the
-+ * aid of prev<->next pointer manipulation and no searching.
-+ */
-+#ifndef _LINUX_SKIP_LIST_H
-+#define _LINUX_SKIP_LIST_H
-+
-+#include <linux/kernel.h>
-+
-+#define NUM_SKIPLIST_LEVEL (8)
-+
-+struct skiplist_node {
-+	int level;	/* Levels in this node */
-+	struct skiplist_node *next[NUM_SKIPLIST_LEVEL];
-+	struct skiplist_node *prev[NUM_SKIPLIST_LEVEL];
-+};
-+
-+#define SKIPLIST_NODE_INIT(name) { 0,\
-+				   {&name, &name, &name, &name,\
-+				    &name, &name, &name, &name},\
-+				   {&name, &name, &name, &name,\
-+				    &name, &name, &name, &name},\
-+				 }
-+
-+static inline void INIT_SKIPLIST_NODE(struct skiplist_node *node)
-+{
-+	/* only level 0 ->next matters in skiplist_empty() */
-+	WRITE_ONCE(node->next[0], node);
-+}
-+
-+/**
-+ * FULL_INIT_SKIPLIST_NODE -- fully init a skiplist_node, expecially for header
-+ * @node: the skip list node to be inited.
-+ */
-+static inline void FULL_INIT_SKIPLIST_NODE(struct skiplist_node *node)
-+{
-+	int i;
-+
-+	node->level = 0;
-+	for (i = 0; i < NUM_SKIPLIST_LEVEL; i++) {
-+		WRITE_ONCE(node->next[i], node);
-+		node->prev[i] = node;
-+	}
-+}
-+
-+/**
-+ * skiplist_empty - test whether a skip list is empty
-+ * @head: the skip list to test.
-+ */
-+static inline int skiplist_empty(const struct skiplist_node *head)
-+{
-+	return READ_ONCE(head->next[0]) == head;
-+}
-+
-+/**
-+ * skiplist_entry - get the struct for this entry
-+ * @ptr: the &struct skiplist_node pointer.
-+ * @type:       the type of the struct this is embedded in.
-+ * @member:     the name of the skiplist_node within the struct.
-+ */
-+#define skiplist_entry(ptr, type, member) \
-+	container_of(ptr, type, member)
-+
-+/**
-+ * DEFINE_SKIPLIST_INSERT_FUNC -- macro to define a customized skip list insert
-+ * function, which takes two parameters, first one is the header node of the
-+ * skip list, second one is the skip list node to be inserted
-+ * @func_name: the customized skip list insert function name
-+ * @search_func: the search function to be used, which takes two parameters,
-+ * 1st one is the itrator of skiplist_node in the list, the 2nd is the skip list
-+ * node to be inserted, the function should return true if search should be
-+ * continued, otherwise return false.
-+ * Returns 1 if @node is inserted as the first item of skip list at level zero,
-+ * otherwise 0
-+ */
-+#define DEFINE_SKIPLIST_INSERT_FUNC(func_name, search_func)\
-+static inline int func_name(struct skiplist_node *head, struct skiplist_node *node)\
-+{\
-+	struct skiplist_node *update[NUM_SKIPLIST_LEVEL];\
-+	struct skiplist_node *p, *q;\
-+	int k = head->level;\
-+\
-+	p = head;\
-+	do {\
-+		while (q = p->next[k], q != head && search_func(q, node))\
-+			p = q;\
-+		update[k] = p;\
-+	} while (--k >= 0);\
-+\
-+	k = node->level;\
-+	if (unlikely(k > head->level)) {\
-+		node->level = k = ++head->level;\
-+		update[k] = head;\
-+	}\
-+\
-+	do {\
-+		p = update[k];\
-+		q = p->next[k];\
-+		node->next[k] = q;\
-+		p->next[k] = node;\
-+		node->prev[k] = p;\
-+		q->prev[k] = node;\
-+	} while (--k >= 0);\
-+\
-+	return (p == head);\
-+}
-+
-+/**
-+ * skiplist_del_init -- delete skip list node from a skip list and reset it's
-+ * init state
-+ * @head: the header node of the skip list to be deleted from.
-+ * @node: the skip list node to be deleted, the caller need to ensure @node is
-+ * in skip list which @head represent.
-+ * Returns 1 if @node is the first item of skip level at level zero, otherwise 0
-+ */
-+static inline int
-+skiplist_del_init(struct skiplist_node *head, struct skiplist_node *node)
-+{
-+	int l, m = node->level;
-+
-+	for (l = 0; l <= m; l++) {
-+		node->prev[l]->next[l] = node->next[l];
-+		node->next[l]->prev[l] = node->prev[l];
-+	}
-+	if (m == head->level && m > 0) {
-+		while (head->next[m] == head && m > 0)
-+			m--;
-+		head->level = m;
-+	}
-+	INIT_SKIPLIST_NODE(node);
-+
-+	return (node->prev[0] == head);
-+}
-+#endif /* _LINUX_SKIP_LIST_H */
-diff --git a/init/Kconfig b/init/Kconfig
-index 0498af567f70..aaa7c434eedf 100644
---- a/init/Kconfig
-+++ b/init/Kconfig
-@@ -742,9 +742,39 @@ config GENERIC_SCHED_CLOCK
- 
- menu "Scheduler features"
- 
-+menuconfig SCHED_ALT
-+	bool "Alternative CPU Schedulers"
-+	default y
-+	help
-+	  This feature enable alternative CPU scheduler"
-+
-+if SCHED_ALT
-+
-+choice
-+	prompt "Alternative CPU Scheduler"
-+	default SCHED_BMQ
-+
-+config SCHED_BMQ
-+	bool "BMQ CPU scheduler"
-+	help
-+	  The BitMap Queue CPU scheduler for excellent interactivity and
-+	  responsiveness on the desktop and solid scalability on normal
-+	  hardware and commodity servers.
-+
-+config SCHED_PDS
-+	bool "PDS CPU scheduler"
-+	help
-+	  The Priority and Deadline based Skip list multiple queue CPU
-+	  Scheduler.
-+
-+endchoice
-+
-+endif
-+
- config UCLAMP_TASK
- 	bool "Enable utilization clamping for RT/FAIR tasks"
- 	depends on CPU_FREQ_GOV_SCHEDUTIL
-+	depends on !SCHED_ALT
- 	help
- 	  This feature enables the scheduler to track the clamped utilization
- 	  of each CPU based on RUNNABLE tasks scheduled on that CPU.
-@@ -830,6 +860,7 @@ config NUMA_BALANCING
- 	depends on ARCH_SUPPORTS_NUMA_BALANCING
- 	depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY
- 	depends on SMP && NUMA && MIGRATION
-+	depends on !SCHED_ALT
- 	help
- 	  This option adds support for automatic NUMA aware memory/task placement.
- 	  The mechanism is quite primitive and is based on migrating memory when
-@@ -916,7 +947,7 @@ menuconfig CGROUP_SCHED
- 	  bandwidth allocation to such task groups. It uses cgroups to group
- 	  tasks.
- 
--if CGROUP_SCHED
-+if CGROUP_SCHED && !SCHED_ALT
- config FAIR_GROUP_SCHED
- 	bool "Group scheduling for SCHED_OTHER"
- 	depends on CGROUP_SCHED
-@@ -1172,6 +1203,7 @@ config CHECKPOINT_RESTORE
- 
- config SCHED_AUTOGROUP
- 	bool "Automatic process group scheduling"
-+	depends on !SCHED_ALT
- 	select CGROUPS
- 	select CGROUP_SCHED
- 	select FAIR_GROUP_SCHED
-diff --git a/init/init_task.c b/init/init_task.c
-index 15089d15010a..6bc94553d79a 100644
---- a/init/init_task.c
-+++ b/init/init_task.c
-@@ -74,9 +74,15 @@ struct task_struct init_task
- 	.stack		= init_stack,
- 	.usage		= REFCOUNT_INIT(2),
- 	.flags		= PF_KTHREAD,
-+#ifdef CONFIG_SCHED_ALT
-+	.prio		= DEFAULT_PRIO + MAX_PRIORITY_ADJ,
-+	.static_prio	= DEFAULT_PRIO,
-+	.normal_prio	= DEFAULT_PRIO + MAX_PRIORITY_ADJ,
-+#else
- 	.prio		= MAX_PRIO - 20,
- 	.static_prio	= MAX_PRIO - 20,
- 	.normal_prio	= MAX_PRIO - 20,
-+#endif
- 	.policy		= SCHED_NORMAL,
- 	.cpus_ptr	= &init_task.cpus_mask,
- 	.cpus_mask	= CPU_MASK_ALL,
-@@ -86,6 +92,19 @@ struct task_struct init_task
- 	.restart_block	= {
- 		.fn = do_no_restart_syscall,
- 	},
-+#ifdef CONFIG_SCHED_ALT
-+#ifdef CONFIG_SCHED_BMQ
-+	.boost_prio	= 0,
-+	.bmq_idx	= 15,
-+	.bmq_node	= LIST_HEAD_INIT(init_task.bmq_node),
-+#endif
-+#ifdef CONFIG_SCHED_PDS
-+	.deadline	= 0,
-+	.sl_level	= 0,
-+	.sl_node	= SKIPLIST_NODE_INIT(init_task.sl_node),
-+#endif
-+	.time_slice	= HZ,
-+#else
- 	.se		= {
- 		.group_node 	= LIST_HEAD_INIT(init_task.se.group_node),
- 	},
-@@ -93,6 +112,7 @@ struct task_struct init_task
- 		.run_list	= LIST_HEAD_INIT(init_task.rt.run_list),
- 		.time_slice	= RR_TIMESLICE,
- 	},
-+#endif
- 	.tasks		= LIST_HEAD_INIT(init_task.tasks),
- #ifdef CONFIG_SMP
- 	.pushable_tasks	= PLIST_NODE_INIT(init_task.pushable_tasks, MAX_PRIO),
-diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
-index 642415b8c3c9..7e0e1fe18035 100644
---- a/kernel/cgroup/cpuset.c
-+++ b/kernel/cgroup/cpuset.c
-@@ -636,7 +636,7 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial)
- 	return ret;
- }
- 
--#ifdef CONFIG_SMP
-+#if defined(CONFIG_SMP) && !defined(CONFIG_SCHED_ALT)
- /*
-  * Helper routine for generate_sched_domains().
-  * Do cpusets a, b have overlapping effective cpus_allowed masks?
-@@ -1009,7 +1009,7 @@ static void rebuild_sched_domains_locked(void)
- 	/* Have scheduler rebuild the domains */
- 	partition_and_rebuild_sched_domains(ndoms, doms, attr);
- }
--#else /* !CONFIG_SMP */
-+#else /* !CONFIG_SMP || CONFIG_SCHED_ALT */
- static void rebuild_sched_domains_locked(void)
- {
- }
-diff --git a/kernel/delayacct.c b/kernel/delayacct.c
-index 27725754ac99..769d773c7182 100644
---- a/kernel/delayacct.c
-+++ b/kernel/delayacct.c
-@@ -106,7 +106,7 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
- 	 */
- 	t1 = tsk->sched_info.pcount;
- 	t2 = tsk->sched_info.run_delay;
--	t3 = tsk->se.sum_exec_runtime;
-+	t3 = tsk_seruntime(tsk);
- 
- 	d->cpu_count += t1;
- 
-diff --git a/kernel/exit.c b/kernel/exit.c
-index 727150f28103..23ddd91a3d29 100644
---- a/kernel/exit.c
-+++ b/kernel/exit.c
-@@ -121,7 +121,7 @@ static void __exit_signal(struct task_struct *tsk)
- 			sig->curr_target = next_thread(tsk);
- 	}
- 
--	add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
-+	add_device_randomness((const void*) &tsk_seruntime(tsk),
- 			      sizeof(unsigned long long));
- 
- 	/*
-@@ -142,7 +142,7 @@ static void __exit_signal(struct task_struct *tsk)
- 	sig->inblock += task_io_get_inblock(tsk);
- 	sig->oublock += task_io_get_oublock(tsk);
- 	task_io_accounting_add(&sig->ioac, &tsk->ioac);
--	sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
-+	sig->sum_sched_runtime += tsk_seruntime(tsk);
- 	sig->nr_threads--;
- 	__unhash_process(tsk, group_dead);
- 	write_sequnlock(&sig->stats_lock);
-diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
-index f6310f848f34..4176ad070bc9 100644
---- a/kernel/livepatch/transition.c
-+++ b/kernel/livepatch/transition.c
-@@ -306,7 +306,11 @@ static bool klp_try_switch_task(struct task_struct *task)
- 	 */
- 	rq = task_rq_lock(task, &flags);
- 
-+#ifdef	CONFIG_SCHED_ALT
-+	if (task_running(task) && task != current) {
-+#else
- 	if (task_running(rq, task) && task != current) {
-+#endif
- 		snprintf(err_buf, STACK_ERR_BUF_SIZE,
- 			 "%s: %s:%d is running\n", __func__, task->comm,
- 			 task->pid);
-diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
-index cfdd5b93264d..84c284eb544a 100644
---- a/kernel/locking/rtmutex.c
-+++ b/kernel/locking/rtmutex.c
-@@ -227,15 +227,19 @@ static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
-  * Only use with rt_mutex_waiter_{less,equal}()
-  */
- #define task_to_waiter(p)	\
--	&(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline }
-+	&(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = __tsk_deadline(p) }
- 
- static inline int
- rt_mutex_waiter_less(struct rt_mutex_waiter *left,
- 		     struct rt_mutex_waiter *right)
- {
-+#ifdef CONFIG_SCHED_PDS
-+	return (left->deadline < right->deadline);
-+#else
- 	if (left->prio < right->prio)
- 		return 1;
- 
-+#ifndef CONFIG_SCHED_BMQ
- 	/*
- 	 * If both waiters have dl_prio(), we check the deadlines of the
- 	 * associated tasks.
-@@ -244,17 +248,23 @@ rt_mutex_waiter_less(struct rt_mutex_waiter *left,
- 	 */
- 	if (dl_prio(left->prio))
- 		return dl_time_before(left->deadline, right->deadline);
-+#endif
- 
- 	return 0;
-+#endif
- }
- 
- static inline int
- rt_mutex_waiter_equal(struct rt_mutex_waiter *left,
- 		      struct rt_mutex_waiter *right)
- {
-+#ifdef CONFIG_SCHED_PDS
-+	return (left->deadline == right->deadline);
-+#else
- 	if (left->prio != right->prio)
- 		return 0;
- 
-+#ifndef CONFIG_SCHED_BMQ
- 	/*
- 	 * If both waiters have dl_prio(), we check the deadlines of the
- 	 * associated tasks.
-@@ -263,8 +273,10 @@ rt_mutex_waiter_equal(struct rt_mutex_waiter *left,
- 	 */
- 	if (dl_prio(left->prio))
- 		return left->deadline == right->deadline;
-+#endif
- 
- 	return 1;
-+#endif
- }
- 
- static void
-@@ -678,7 +690,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
- 	 * the values of the node being removed.
- 	 */
- 	waiter->prio = task->prio;
--	waiter->deadline = task->dl.deadline;
-+	waiter->deadline = __tsk_deadline(task);
- 
- 	rt_mutex_enqueue(lock, waiter);
- 
-@@ -951,7 +963,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
- 	waiter->task = task;
- 	waiter->lock = lock;
- 	waiter->prio = task->prio;
--	waiter->deadline = task->dl.deadline;
-+	waiter->deadline = __tsk_deadline(task);
- 
- 	/* Get the top priority waiter on the lock */
- 	if (rt_mutex_has_waiters(lock))
-diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
-index 5fc9c9b70862..eb6d7d87779f 100644
---- a/kernel/sched/Makefile
-+++ b/kernel/sched/Makefile
-@@ -22,14 +22,20 @@ ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
- CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
- endif
- 
--obj-y += core.o loadavg.o clock.o cputime.o
--obj-y += idle.o fair.o rt.o deadline.o
--obj-y += wait.o wait_bit.o swait.o completion.o
--
--obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o
-+ifdef CONFIG_SCHED_ALT
-+obj-y += alt_core.o alt_debug.o
-+else
-+obj-y += core.o
-+obj-y += fair.o rt.o deadline.o
-+obj-$(CONFIG_SMP) += cpudeadline.o stop_task.o
- obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
--obj-$(CONFIG_SCHEDSTATS) += stats.o
- obj-$(CONFIG_SCHED_DEBUG) += debug.o
-+endif
-+obj-y += loadavg.o clock.o cputime.o
-+obj-y += idle.o
-+obj-y += wait.o wait_bit.o swait.o completion.o
-+obj-$(CONFIG_SMP) += cpupri.o pelt.o topology.o
-+obj-$(CONFIG_SCHEDSTATS) += stats.o
- obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
- obj-$(CONFIG_CPU_FREQ) += cpufreq.o
- obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o
-diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c
-new file mode 100644
-index 000000000000..b469c9488d18
---- /dev/null
-+++ b/kernel/sched/alt_core.c
-@@ -0,0 +1,6184 @@
-+/*
-+ *  kernel/sched/alt_core.c
-+ *
-+ *  Core alternative kernel scheduler code and related syscalls
-+ *
-+ *  Copyright (C) 1991-2002  Linus Torvalds
-+ *
-+ *  2009-08-13	Brainfuck deadline scheduling policy by Con Kolivas deletes
-+ *		a whole lot of those previous things.
-+ *  2017-09-06	Priority and Deadline based Skip list multiple queue kernel
-+ *		scheduler by Alfred Chen.
-+ *  2019-02-20	BMQ(BitMap Queue) kernel scheduler by Alfred Chen.
-+ */
-+#include "sched.h"
-+
-+#include <linux/sched/rt.h>
-+
-+#include <linux/context_tracking.h>
-+#include <linux/compat.h>
-+#include <linux/blkdev.h>
-+#include <linux/delayacct.h>
-+#include <linux/freezer.h>
-+#include <linux/init_task.h>
-+#include <linux/kprobes.h>
-+#include <linux/mmu_context.h>
-+#include <linux/nmi.h>
-+#include <linux/profile.h>
-+#include <linux/rcupdate_wait.h>
-+#include <linux/security.h>
-+#include <linux/syscalls.h>
-+#include <linux/wait_bit.h>
-+
-+#include <linux/kcov.h>
-+#include <linux/scs.h>
-+
-+#include <asm/switch_to.h>
-+
-+#include "../workqueue_internal.h"
-+#include "../../fs/io-wq.h"
-+#include "../smpboot.h"
-+
-+#include "pelt.h"
-+#include "smp.h"
-+
-+#define CREATE_TRACE_POINTS
-+#include <trace/events/sched.h>
-+
-+#define ALT_SCHED_VERSION "v5.8-r3"
-+
-+/* rt_prio(prio) defined in include/linux/sched/rt.h */
-+#define rt_task(p)		rt_prio((p)->prio)
-+#define rt_policy(policy)	((policy) == SCHED_FIFO || (policy) == SCHED_RR)
-+#define task_has_rt_policy(p)	(rt_policy((p)->policy))
-+
-+#define STOP_PRIO		(MAX_RT_PRIO - 1)
-+
-+/* Default time slice is 4 in ms, can be set via kernel parameter "sched_timeslice" */
-+u64 sched_timeslice_ns __read_mostly = (4 * 1000 * 1000);
-+
-+static int __init sched_timeslice(char *str)
-+{
-+	int timeslice_us;
-+
-+	get_option(&str, &timeslice_us);
-+	if (timeslice_us >= 1000)
-+		sched_timeslice_ns = timeslice_us * 1000;
-+
-+	return 0;
-+}
-+early_param("sched_timeslice", sched_timeslice);
-+
-+/* Reschedule if less than this many μs left */
-+#define RESCHED_NS		(100 * 1000)
-+
-+/**
-+ * sched_yield_type - Choose what sort of yield sched_yield will perform.
-+ * 0: No yield.
-+ * 1: Deboost and requeue task. (default)
-+ * 2: Set rq skip task.
-+ */
-+int sched_yield_type __read_mostly = 1;
-+
-+#ifdef CONFIG_SMP
-+static cpumask_t sched_rq_pending_mask ____cacheline_aligned_in_smp;
-+
-+DEFINE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_CHK_LEVEL], sched_cpu_affinity_masks);
-+DEFINE_PER_CPU(cpumask_t *, sched_cpu_affinity_end_mask);
-+DEFINE_PER_CPU(cpumask_t *, sched_cpu_llc_mask);
-+
-+#ifdef CONFIG_SCHED_SMT
-+DEFINE_STATIC_KEY_FALSE(sched_smt_present);
-+EXPORT_SYMBOL_GPL(sched_smt_present);
-+#endif
-+
-+/*
-+ * Keep a unique ID per domain (we use the first CPUs number in the cpumask of
-+ * the domain), this allows us to quickly tell if two cpus are in the same cache
-+ * domain, see cpus_share_cache().
-+ */
-+DEFINE_PER_CPU(int, sd_llc_id);
-+#endif /* CONFIG_SMP */
-+
-+static DEFINE_MUTEX(sched_hotcpu_mutex);
-+
-+DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
-+
-+#ifndef prepare_arch_switch
-+# define prepare_arch_switch(next)	do { } while (0)
-+#endif
-+#ifndef finish_arch_post_lock_switch
-+# define finish_arch_post_lock_switch()	do { } while (0)
-+#endif
-+
-+#define IDLE_WM	(IDLE_TASK_SCHED_PRIO)
-+
-+#ifdef CONFIG_SCHED_SMT
-+static cpumask_t sched_sg_idle_mask ____cacheline_aligned_in_smp;
-+#endif
-+static cpumask_t sched_rq_watermark[SCHED_BITS] ____cacheline_aligned_in_smp;
-+
-+#ifdef CONFIG_SCHED_BMQ
-+#include "bmq_imp.h"
-+#endif
-+#ifdef CONFIG_SCHED_PDS
-+#include "pds_imp.h"
-+#endif
-+
-+static inline void update_sched_rq_watermark(struct rq *rq)
-+{
-+	unsigned long watermark = sched_queue_watermark(rq);
-+	unsigned long last_wm = rq->watermark;
-+	unsigned long i;
-+	int cpu;
-+
-+	/*printk(KERN_INFO "sched: watermark(%d) %d, last %d\n",
-+	       cpu_of(rq), watermark, last_wm);*/
-+	if (watermark == last_wm)
-+		return;
-+
-+	rq->watermark = watermark;
-+	cpu = cpu_of(rq);
-+	if (watermark < last_wm) {
-+		for (i = watermark + 1; i <= last_wm; i++)
-+			cpumask_andnot(&sched_rq_watermark[i],
-+				       &sched_rq_watermark[i], cpumask_of(cpu));
-+#ifdef CONFIG_SCHED_SMT
-+		if (!static_branch_likely(&sched_smt_present))
-+			return;
-+		if (IDLE_WM == last_wm)
-+			cpumask_andnot(&sched_sg_idle_mask,
-+				       &sched_sg_idle_mask, cpu_smt_mask(cpu));
-+#endif
-+		return;
-+	}
-+	/* last_wm < watermark */
-+	for (i = last_wm + 1; i <= watermark; i++)
-+		cpumask_set_cpu(cpu, &sched_rq_watermark[i]);
-+#ifdef CONFIG_SCHED_SMT
-+	if (!static_branch_likely(&sched_smt_present))
-+		return;
-+	if (IDLE_WM == watermark) {
-+		cpumask_t tmp;
-+		cpumask_and(&tmp, cpu_smt_mask(cpu), &sched_rq_watermark[IDLE_WM]);
-+		if (cpumask_equal(&tmp, cpu_smt_mask(cpu)))
-+			cpumask_or(&sched_sg_idle_mask, cpu_smt_mask(cpu),
-+				   &sched_sg_idle_mask);
-+	}
-+#endif
-+}
-+
-+static inline struct task_struct *rq_runnable_task(struct rq *rq)
-+{
-+	struct task_struct *next = sched_rq_first_task(rq);
-+
-+	if (unlikely(next == rq->skip))
-+		next = sched_rq_next_task(next, rq);
-+
-+	return next;
-+}
-+
-+/*
-+ * Context: p->pi_lock
-+ */
-+static inline struct rq
-+*__task_access_lock(struct task_struct *p, raw_spinlock_t **plock)
-+{
-+	struct rq *rq;
-+	for (;;) {
-+		rq = task_rq(p);
-+		if (p->on_cpu || task_on_rq_queued(p)) {
-+			raw_spin_lock(&rq->lock);
-+			if (likely((p->on_cpu || task_on_rq_queued(p))
-+				   && rq == task_rq(p))) {
-+				*plock = &rq->lock;
-+				return rq;
-+			}
-+			raw_spin_unlock(&rq->lock);
-+		} else if (task_on_rq_migrating(p)) {
-+			do {
-+				cpu_relax();
-+			} while (unlikely(task_on_rq_migrating(p)));
-+		} else {
-+			*plock = NULL;
-+			return rq;
-+		}
-+	}
-+}
-+
-+static inline void
-+__task_access_unlock(struct task_struct *p, raw_spinlock_t *lock)
-+{
-+	if (NULL != lock)
-+		raw_spin_unlock(lock);
-+}
-+
-+static inline struct rq
-+*task_access_lock_irqsave(struct task_struct *p, raw_spinlock_t **plock,
-+			  unsigned long *flags)
-+{
-+	struct rq *rq;
-+	for (;;) {
-+		rq = task_rq(p);
-+		if (p->on_cpu || task_on_rq_queued(p)) {
-+			raw_spin_lock_irqsave(&rq->lock, *flags);
-+			if (likely((p->on_cpu || task_on_rq_queued(p))
-+				   && rq == task_rq(p))) {
-+				*plock = &rq->lock;
-+				return rq;
-+			}
-+			raw_spin_unlock_irqrestore(&rq->lock, *flags);
-+		} else if (task_on_rq_migrating(p)) {
-+			do {
-+				cpu_relax();
-+			} while (unlikely(task_on_rq_migrating(p)));
-+		} else {
-+			raw_spin_lock_irqsave(&p->pi_lock, *flags);
-+			if (likely(!p->on_cpu && !p->on_rq &&
-+				   rq == task_rq(p))) {
-+				*plock = &p->pi_lock;
-+				return rq;
-+			}
-+			raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
-+		}
-+	}
-+}
-+
-+static inline void
-+task_access_unlock_irqrestore(struct task_struct *p, raw_spinlock_t *lock,
-+			      unsigned long *flags)
-+{
-+	raw_spin_unlock_irqrestore(lock, *flags);
-+}
-+
-+/*
-+ * __task_rq_lock - lock the rq @p resides on.
-+ */
-+struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
-+	__acquires(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	lockdep_assert_held(&p->pi_lock);
-+
-+	for (;;) {
-+		rq = task_rq(p);
-+		raw_spin_lock(&rq->lock);
-+		if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
-+			return rq;
-+		raw_spin_unlock(&rq->lock);
-+
-+		while (unlikely(task_on_rq_migrating(p)))
-+			cpu_relax();
-+	}
-+}
-+
-+/*
-+ * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
-+ */
-+struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
-+	__acquires(p->pi_lock)
-+	__acquires(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	for (;;) {
-+		raw_spin_lock_irqsave(&p->pi_lock, rf->flags);
-+		rq = task_rq(p);
-+		raw_spin_lock(&rq->lock);
-+		/*
-+		 *	move_queued_task()		task_rq_lock()
-+		 *
-+		 *	ACQUIRE (rq->lock)
-+		 *	[S] ->on_rq = MIGRATING		[L] rq = task_rq()
-+		 *	WMB (__set_task_cpu())		ACQUIRE (rq->lock);
-+		 *	[S] ->cpu = new_cpu		[L] task_rq()
-+		 *					[L] ->on_rq
-+		 *	RELEASE (rq->lock)
-+		 *
-+		 * If we observe the old CPU in task_rq_lock(), the acquire of
-+		 * the old rq->lock will fully serialize against the stores.
-+		 *
-+		 * If we observe the new CPU in task_rq_lock(), the address
-+		 * dependency headed by '[L] rq = task_rq()' and the acquire
-+		 * will pair with the WMB to ensure we then also see migrating.
-+		 */
-+		if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
-+			return rq;
-+		}
-+		raw_spin_unlock(&rq->lock);
-+		raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
-+
-+		while (unlikely(task_on_rq_migrating(p)))
-+			cpu_relax();
-+	}
-+}
-+
-+static inline void
-+rq_lock_irqsave(struct rq *rq, struct rq_flags *rf)
-+	__acquires(rq->lock)
-+{
-+	raw_spin_lock_irqsave(&rq->lock, rf->flags);
-+}
-+
-+static inline void
-+rq_unlock_irqrestore(struct rq *rq, struct rq_flags *rf)
-+	__releases(rq->lock)
-+{
-+	raw_spin_unlock_irqrestore(&rq->lock, rf->flags);
-+}
-+
-+/*
-+ * RQ-clock updating methods:
-+ */
-+
-+static void update_rq_clock_task(struct rq *rq, s64 delta)
-+{
-+/*
-+ * In theory, the compile should just see 0 here, and optimize out the call
-+ * to sched_rt_avg_update. But I don't trust it...
-+ */
-+	s64 __maybe_unused steal = 0, irq_delta = 0;
-+
-+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-+	irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
-+
-+	/*
-+	 * Since irq_time is only updated on {soft,}irq_exit, we might run into
-+	 * this case when a previous update_rq_clock() happened inside a
-+	 * {soft,}irq region.
-+	 *
-+	 * When this happens, we stop ->clock_task and only update the
-+	 * prev_irq_time stamp to account for the part that fit, so that a next
-+	 * update will consume the rest. This ensures ->clock_task is
-+	 * monotonic.
-+	 *
-+	 * It does however cause some slight miss-attribution of {soft,}irq
-+	 * time, a more accurate solution would be to update the irq_time using
-+	 * the current rq->clock timestamp, except that would require using
-+	 * atomic ops.
-+	 */
-+	if (irq_delta > delta)
-+		irq_delta = delta;
-+
-+	rq->prev_irq_time += irq_delta;
-+	delta -= irq_delta;
-+#endif
-+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
-+	if (static_key_false((&paravirt_steal_rq_enabled))) {
-+		steal = paravirt_steal_clock(cpu_of(rq));
-+		steal -= rq->prev_steal_time_rq;
-+
-+		if (unlikely(steal > delta))
-+			steal = delta;
-+
-+		rq->prev_steal_time_rq += steal;
-+		delta -= steal;
-+	}
-+#endif
-+
-+	rq->clock_task += delta;
-+
-+#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
-+	if ((irq_delta + steal))
-+		update_irq_load_avg(rq, irq_delta + steal);
-+#endif
-+}
-+
-+static inline void update_rq_clock(struct rq *rq)
-+{
-+	s64 delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
-+
-+	if (unlikely(delta <= 0))
-+		return;
-+	rq->clock += delta;
-+	update_rq_clock_task(rq, delta);
-+}
-+
-+#ifdef CONFIG_NO_HZ_FULL
-+/*
-+ * Tick may be needed by tasks in the runqueue depending on their policy and
-+ * requirements. If tick is needed, lets send the target an IPI to kick it out
-+ * of nohz mode if necessary.
-+ */
-+static inline void sched_update_tick_dependency(struct rq *rq)
-+{
-+	int cpu;
-+
-+	if (!tick_nohz_full_enabled())
-+		return;
-+
-+	cpu = cpu_of(rq);
-+
-+	if (!tick_nohz_full_cpu(cpu))
-+		return;
-+
-+	if (rq->nr_running < 2)
-+		tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED);
-+	else
-+		tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
-+}
-+#else /* !CONFIG_NO_HZ_FULL */
-+static inline void sched_update_tick_dependency(struct rq *rq) { }
-+#endif
-+
-+/*
-+ * Add/Remove/Requeue task to/from the runqueue routines
-+ * Context: rq->lock
-+ */
-+static inline void dequeue_task(struct task_struct *p, struct rq *rq, int flags)
-+{
-+	lockdep_assert_held(&rq->lock);
-+
-+	/*printk(KERN_INFO "sched: dequeue(%d) %px %016llx\n", cpu_of(rq), p, p->priodl);*/
-+	WARN_ONCE(task_rq(p) != rq, "sched: dequeue task reside on cpu%d from cpu%d\n",
-+		  task_cpu(p), cpu_of(rq));
-+
-+	__SCHED_DEQUEUE_TASK(p, rq, flags, update_sched_rq_watermark(rq));
-+	--rq->nr_running;
-+#ifdef CONFIG_SMP
-+	if (1 == rq->nr_running)
-+		cpumask_clear_cpu(cpu_of(rq), &sched_rq_pending_mask);
-+#endif
-+
-+	sched_update_tick_dependency(rq);
-+}
-+
-+static inline void enqueue_task(struct task_struct *p, struct rq *rq, int flags)
-+{
-+	lockdep_assert_held(&rq->lock);
-+
-+	/*printk(KERN_INFO "sched: enqueue(%d) %px %016llx\n", cpu_of(rq), p, p->priodl);*/
-+	WARN_ONCE(task_rq(p) != rq, "sched: enqueue task reside on cpu%d to cpu%d\n",
-+		  task_cpu(p), cpu_of(rq));
-+
-+	__SCHED_ENQUEUE_TASK(p, rq, flags);
-+	update_sched_rq_watermark(rq);
-+	++rq->nr_running;
-+#ifdef CONFIG_SMP
-+	if (2 == rq->nr_running)
-+		cpumask_set_cpu(cpu_of(rq), &sched_rq_pending_mask);
-+#endif
-+
-+	sched_update_tick_dependency(rq);
-+
-+	/*
-+	 * If in_iowait is set, the code below may not trigger any cpufreq
-+	 * utilization updates, so do it here explicitly with the IOWAIT flag
-+	 * passed.
-+	 */
-+	if (p->in_iowait)
-+		cpufreq_update_util(rq, SCHED_CPUFREQ_IOWAIT);
-+}
-+
-+static inline void requeue_task(struct task_struct *p, struct rq *rq)
-+{
-+	lockdep_assert_held(&rq->lock);
-+	/*printk(KERN_INFO "sched: requeue(%d) %px %016llx\n", cpu_of(rq), p, p->priodl);*/
-+	WARN_ONCE(task_rq(p) != rq, "sched: cpu[%d] requeue task reside on cpu%d\n",
-+		  cpu_of(rq), task_cpu(p));
-+
-+	__SCHED_REQUEUE_TASK(p, rq, update_sched_rq_watermark(rq));
-+}
-+
-+/*
-+ * cmpxchg based fetch_or, macro so it works for different integer types
-+ */
-+#define fetch_or(ptr, mask)						\
-+	({								\
-+		typeof(ptr) _ptr = (ptr);				\
-+		typeof(mask) _mask = (mask);				\
-+		typeof(*_ptr) _old, _val = *_ptr;			\
-+									\
-+		for (;;) {						\
-+			_old = cmpxchg(_ptr, _val, _val | _mask);	\
-+			if (_old == _val)				\
-+				break;					\
-+			_val = _old;					\
-+		}							\
-+	_old;								\
-+})
-+
-+#if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
-+/*
-+ * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG,
-+ * this avoids any races wrt polling state changes and thereby avoids
-+ * spurious IPIs.
-+ */
-+static bool set_nr_and_not_polling(struct task_struct *p)
-+{
-+	struct thread_info *ti = task_thread_info(p);
-+	return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG);
-+}
-+
-+/*
-+ * Atomically set TIF_NEED_RESCHED if TIF_POLLING_NRFLAG is set.
-+ *
-+ * If this returns true, then the idle task promises to call
-+ * sched_ttwu_pending() and reschedule soon.
-+ */
-+static bool set_nr_if_polling(struct task_struct *p)
-+{
-+	struct thread_info *ti = task_thread_info(p);
-+	typeof(ti->flags) old, val = READ_ONCE(ti->flags);
-+
-+	for (;;) {
-+		if (!(val & _TIF_POLLING_NRFLAG))
-+			return false;
-+		if (val & _TIF_NEED_RESCHED)
-+			return true;
-+		old = cmpxchg(&ti->flags, val, val | _TIF_NEED_RESCHED);
-+		if (old == val)
-+			break;
-+		val = old;
-+	}
-+	return true;
-+}
-+
-+#else
-+static bool set_nr_and_not_polling(struct task_struct *p)
-+{
-+	set_tsk_need_resched(p);
-+	return true;
-+}
-+
-+#ifdef CONFIG_SMP
-+static bool set_nr_if_polling(struct task_struct *p)
-+{
-+	return false;
-+}
-+#endif
-+#endif
-+
-+static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task)
-+{
-+	struct wake_q_node *node = &task->wake_q;
-+
-+	/*
-+	 * Atomically grab the task, if ->wake_q is !nil already it means
-+	 * its already queued (either by us or someone else) and will get the
-+	 * wakeup due to that.
-+	 *
-+	 * In order to ensure that a pending wakeup will observe our pending
-+	 * state, even in the failed case, an explicit smp_mb() must be used.
-+	 */
-+	smp_mb__before_atomic();
-+	if (unlikely(cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL)))
-+		return false;
-+
-+	/*
-+	 * The head is context local, there can be no concurrency.
-+	 */
-+	*head->lastp = node;
-+	head->lastp = &node->next;
-+	return true;
-+}
-+
-+/**
-+ * wake_q_add() - queue a wakeup for 'later' waking.
-+ * @head: the wake_q_head to add @task to
-+ * @task: the task to queue for 'later' wakeup
-+ *
-+ * Queue a task for later wakeup, most likely by the wake_up_q() call in the
-+ * same context, _HOWEVER_ this is not guaranteed, the wakeup can come
-+ * instantly.
-+ *
-+ * This function must be used as-if it were wake_up_process(); IOW the task
-+ * must be ready to be woken at this location.
-+ */
-+void wake_q_add(struct wake_q_head *head, struct task_struct *task)
-+{
-+	if (__wake_q_add(head, task))
-+		get_task_struct(task);
-+}
-+
-+/**
-+ * wake_q_add_safe() - safely queue a wakeup for 'later' waking.
-+ * @head: the wake_q_head to add @task to
-+ * @task: the task to queue for 'later' wakeup
-+ *
-+ * Queue a task for later wakeup, most likely by the wake_up_q() call in the
-+ * same context, _HOWEVER_ this is not guaranteed, the wakeup can come
-+ * instantly.
-+ *
-+ * This function must be used as-if it were wake_up_process(); IOW the task
-+ * must be ready to be woken at this location.
-+ *
-+ * This function is essentially a task-safe equivalent to wake_q_add(). Callers
-+ * that already hold reference to @task can call the 'safe' version and trust
-+ * wake_q to do the right thing depending whether or not the @task is already
-+ * queued for wakeup.
-+ */
-+void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task)
-+{
-+	if (!__wake_q_add(head, task))
-+		put_task_struct(task);
-+}
-+
-+void wake_up_q(struct wake_q_head *head)
-+{
-+	struct wake_q_node *node = head->first;
-+
-+	while (node != WAKE_Q_TAIL) {
-+		struct task_struct *task;
-+
-+		task = container_of(node, struct task_struct, wake_q);
-+		BUG_ON(!task);
-+		/* task can safely be re-inserted now: */
-+		node = node->next;
-+		task->wake_q.next = NULL;
-+
-+		/*
-+		 * wake_up_process() executes a full barrier, which pairs with
-+		 * the queueing in wake_q_add() so as not to miss wakeups.
-+		 */
-+		wake_up_process(task);
-+		put_task_struct(task);
-+	}
-+}
-+
-+/*
-+ * resched_curr - mark rq's current task 'to be rescheduled now'.
-+ *
-+ * On UP this means the setting of the need_resched flag, on SMP it
-+ * might also involve a cross-CPU call to trigger the scheduler on
-+ * the target CPU.
-+ */
-+void resched_curr(struct rq *rq)
-+{
-+	struct task_struct *curr = rq->curr;
-+	int cpu;
-+
-+	lockdep_assert_held(&rq->lock);
-+
-+	if (test_tsk_need_resched(curr))
-+		return;
-+
-+	cpu = cpu_of(rq);
-+	if (cpu == smp_processor_id()) {
-+		set_tsk_need_resched(curr);
-+		set_preempt_need_resched();
-+		return;
-+	}
-+
-+	if (set_nr_and_not_polling(curr))
-+		smp_send_reschedule(cpu);
-+	else
-+		trace_sched_wake_idle_without_ipi(cpu);
-+}
-+
-+void resched_cpu(int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	raw_spin_lock_irqsave(&rq->lock, flags);
-+	if (cpu_online(cpu) || cpu == smp_processor_id())
-+		resched_curr(cpu_rq(cpu));
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+}
-+
-+#ifdef CONFIG_SMP
-+#ifdef CONFIG_NO_HZ_COMMON
-+void nohz_balance_enter_idle(int cpu) {}
-+
-+void select_nohz_load_balancer(int stop_tick) {}
-+
-+void set_cpu_sd_state_idle(void) {}
-+
-+/*
-+ * In the semi idle case, use the nearest busy CPU for migrating timers
-+ * from an idle CPU.  This is good for power-savings.
-+ *
-+ * We don't do similar optimization for completely idle system, as
-+ * selecting an idle CPU will add more delays to the timers than intended
-+ * (as that CPU's timer base may not be uptodate wrt jiffies etc).
-+ */
-+int get_nohz_timer_target(void)
-+{
-+	int i, cpu = smp_processor_id(), default_cpu = -1;
-+	struct cpumask *mask;
-+
-+	if (housekeeping_cpu(cpu, HK_FLAG_TIMER)) {
-+		if (!idle_cpu(cpu))
-+			return cpu;
-+		default_cpu = cpu;
-+	}
-+
-+	for (mask = &(per_cpu(sched_cpu_affinity_masks, cpu)[0]);
-+	     mask < per_cpu(sched_cpu_affinity_end_mask, cpu); mask++)
-+		for_each_cpu_and(i, mask, housekeeping_cpumask(HK_FLAG_TIMER))
-+			if (!idle_cpu(i))
-+				return i;
-+
-+	if (default_cpu == -1)
-+		default_cpu = housekeeping_any_cpu(HK_FLAG_TIMER);
-+	cpu = default_cpu;
-+
-+	return cpu;
-+}
-+
-+/*
-+ * When add_timer_on() enqueues a timer into the timer wheel of an
-+ * idle CPU then this timer might expire before the next timer event
-+ * which is scheduled to wake up that CPU. In case of a completely
-+ * idle system the next event might even be infinite time into the
-+ * future. wake_up_idle_cpu() ensures that the CPU is woken up and
-+ * leaves the inner idle loop so the newly added timer is taken into
-+ * account when the CPU goes back to idle and evaluates the timer
-+ * wheel for the next timer event.
-+ */
-+static inline void wake_up_idle_cpu(int cpu)
-+{
-+	if (cpu == smp_processor_id())
-+		return;
-+
-+	set_tsk_need_resched(cpu_rq(cpu)->idle);
-+	smp_send_reschedule(cpu);
-+}
-+
-+static inline bool wake_up_full_nohz_cpu(int cpu)
-+{
-+	/*
-+	 * We just need the target to call irq_exit() and re-evaluate
-+	 * the next tick. The nohz full kick at least implies that.
-+	 * If needed we can still optimize that later with an
-+	 * empty IRQ.
-+	 */
-+	if (tick_nohz_full_cpu(cpu)) {
-+		if (cpu != smp_processor_id() ||
-+		    tick_nohz_tick_stopped())
-+			tick_nohz_full_kick_cpu(cpu);
-+		return true;
-+	}
-+
-+	return false;
-+}
-+
-+void wake_up_nohz_cpu(int cpu)
-+{
-+	if (cpu_online(cpu) && !wake_up_full_nohz_cpu(cpu))
-+		wake_up_idle_cpu(cpu);
-+}
-+
-+static void nohz_csd_func(void *info)
-+{
-+	struct rq *rq = info;
-+	int cpu = cpu_of(rq);
-+	unsigned int flags;
-+
-+	/*
-+	 * Release the rq::nohz_csd.
-+	 */
-+	flags = atomic_fetch_andnot(NOHZ_KICK_MASK, nohz_flags(cpu));
-+	WARN_ON(!(flags & NOHZ_KICK_MASK));
-+
-+	rq->idle_balance = idle_cpu(cpu);
-+	if (rq->idle_balance && !need_resched()) {
-+		rq->nohz_idle_balance = flags;
-+		raise_softirq_irqoff(SCHED_SOFTIRQ);
-+	}
-+}
-+
-+#endif /* CONFIG_NO_HZ_COMMON */
-+#endif /* CONFIG_SMP */
-+
-+static inline void check_preempt_curr(struct rq *rq)
-+{
-+	if (sched_rq_first_task(rq) != rq->curr)
-+		resched_curr(rq);
-+}
-+
-+static inline void
-+rq_csd_init(struct rq *rq, call_single_data_t *csd, smp_call_func_t func)
-+{
-+	csd->flags = 0;
-+	csd->func = func;
-+	csd->info = rq;
-+}
-+
-+#ifdef CONFIG_SCHED_HRTICK
-+/*
-+ * Use HR-timers to deliver accurate preemption points.
-+ */
-+
-+static void hrtick_clear(struct rq *rq)
-+{
-+	if (hrtimer_active(&rq->hrtick_timer))
-+		hrtimer_cancel(&rq->hrtick_timer);
-+}
-+
-+/*
-+ * High-resolution timer tick.
-+ * Runs from hardirq context with interrupts disabled.
-+ */
-+static enum hrtimer_restart hrtick(struct hrtimer *timer)
-+{
-+	struct rq *rq = container_of(timer, struct rq, hrtick_timer);
-+	struct task_struct *p;
-+
-+	WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
-+
-+	raw_spin_lock(&rq->lock);
-+	p = rq->curr;
-+	p->time_slice = 0;
-+	resched_curr(rq);
-+	raw_spin_unlock(&rq->lock);
-+
-+	return HRTIMER_NORESTART;
-+}
-+
-+/*
-+ * Use hrtick when:
-+ *  - enabled by features
-+ *  - hrtimer is actually high res
-+ */
-+static inline int hrtick_enabled(struct rq *rq)
-+{
-+	/**
-+	 * Alt schedule FW doesn't support sched_feat yet
-+	if (!sched_feat(HRTICK))
-+		return 0;
-+	*/
-+	if (!cpu_active(cpu_of(rq)))
-+		return 0;
-+	return hrtimer_is_hres_active(&rq->hrtick_timer);
-+}
-+
-+#ifdef CONFIG_SMP
-+
-+static void __hrtick_restart(struct rq *rq)
-+{
-+	struct hrtimer *timer = &rq->hrtick_timer;
-+
-+	hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED_HARD);
-+}
-+
-+/*
-+ * called from hardirq (IPI) context
-+ */
-+static void __hrtick_start(void *arg)
-+{
-+	struct rq *rq = arg;
-+
-+	raw_spin_lock(&rq->lock);
-+	__hrtick_restart(rq);
-+	raw_spin_unlock(&rq->lock);
-+}
-+
-+/*
-+ * Called to set the hrtick timer state.
-+ *
-+ * called with rq->lock held and irqs disabled
-+ */
-+void hrtick_start(struct rq *rq, u64 delay)
-+{
-+	struct hrtimer *timer = &rq->hrtick_timer;
-+	ktime_t time;
-+	s64 delta;
-+
-+	/*
-+	 * Don't schedule slices shorter than 10000ns, that just
-+	 * doesn't make sense and can cause timer DoS.
-+	 */
-+	delta = max_t(s64, delay, 10000LL);
-+	time = ktime_add_ns(timer->base->get_time(), delta);
-+
-+	hrtimer_set_expires(timer, time);
-+
-+	if (rq == this_rq())
-+		__hrtick_restart(rq);
-+	else
-+		smp_call_function_single_async(cpu_of(rq), &rq->hrtick_csd);
-+}
-+
-+#else
-+/*
-+ * Called to set the hrtick timer state.
-+ *
-+ * called with rq->lock held and irqs disabled
-+ */
-+void hrtick_start(struct rq *rq, u64 delay)
-+{
-+	/*
-+	 * Don't schedule slices shorter than 10000ns, that just
-+	 * doesn't make sense. Rely on vruntime for fairness.
-+	 */
-+	delay = max_t(u64, delay, 10000LL);
-+	hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay),
-+		      HRTIMER_MODE_REL_PINNED_HARD);
-+}
-+#endif /* CONFIG_SMP */
-+
-+static void hrtick_rq_init(struct rq *rq)
-+{
-+#ifdef CONFIG_SMP
-+	rq_csd_init(rq, &rq->hrtick_csd, __hrtick_start);
-+#endif
-+
-+	hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
-+	rq->hrtick_timer.function = hrtick;
-+}
-+#else	/* CONFIG_SCHED_HRTICK */
-+static inline int hrtick_enabled(struct rq *rq)
-+{
-+	return 0;
-+}
-+
-+static inline void hrtick_clear(struct rq *rq)
-+{
-+}
-+
-+static inline void hrtick_rq_init(struct rq *rq)
-+{
-+}
-+#endif	/* CONFIG_SCHED_HRTICK */
-+
-+static inline int normal_prio(struct task_struct *p)
-+{
-+	if (task_has_rt_policy(p))
-+		return MAX_RT_PRIO - 1 - p->rt_priority;
-+
-+	return p->static_prio + MAX_PRIORITY_ADJ;
-+}
-+
-+/*
-+ * Calculate the current priority, i.e. the priority
-+ * taken into account by the scheduler. This value might
-+ * be boosted by RT tasks as it will be RT if the task got
-+ * RT-boosted. If not then it returns p->normal_prio.
-+ */
-+static int effective_prio(struct task_struct *p)
-+{
-+	p->normal_prio = normal_prio(p);
-+	/*
-+	 * If we are RT tasks or we were boosted to RT priority,
-+	 * keep the priority unchanged. Otherwise, update priority
-+	 * to the normal priority:
-+	 */
-+	if (!rt_prio(p->prio))
-+		return p->normal_prio;
-+	return p->prio;
-+}
-+
-+/*
-+ * activate_task - move a task to the runqueue.
-+ *
-+ * Context: rq->lock
-+ */
-+static void activate_task(struct task_struct *p, struct rq *rq)
-+{
-+	enqueue_task(p, rq, ENQUEUE_WAKEUP);
-+	p->on_rq = TASK_ON_RQ_QUEUED;
-+	cpufreq_update_util(rq, 0);
-+}
-+
-+/*
-+ * deactivate_task - remove a task from the runqueue.
-+ *
-+ * Context: rq->lock
-+ */
-+static inline void deactivate_task(struct task_struct *p, struct rq *rq)
-+{
-+	dequeue_task(p, rq, DEQUEUE_SLEEP);
-+	p->on_rq = 0;
-+	cpufreq_update_util(rq, 0);
-+}
-+
-+static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
-+{
-+#ifdef CONFIG_SMP
-+	/*
-+	 * After ->cpu is set up to a new value, task_access_lock(p, ...) can be
-+	 * successfully executed on another CPU. We must ensure that updates of
-+	 * per-task data have been completed by this moment.
-+	 */
-+	smp_wmb();
-+
-+#ifdef CONFIG_THREAD_INFO_IN_TASK
-+	WRITE_ONCE(p->cpu, cpu);
-+#else
-+	WRITE_ONCE(task_thread_info(p)->cpu, cpu);
-+#endif
-+#endif
-+}
-+
-+#ifdef CONFIG_SMP
-+void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
-+{
-+#ifdef CONFIG_SCHED_DEBUG
-+	/*
-+	 * We should never call set_task_cpu() on a blocked task,
-+	 * ttwu() will sort out the placement.
-+	 */
-+	WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING &&
-+		     !p->on_rq);
-+#ifdef CONFIG_LOCKDEP
-+	/*
-+	 * The caller should hold either p->pi_lock or rq->lock, when changing
-+	 * a task's CPU. ->pi_lock for waking tasks, rq->lock for runnable tasks.
-+	 *
-+	 * sched_move_task() holds both and thus holding either pins the cgroup,
-+	 * see task_group().
-+	 */
-+	WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) ||
-+				      lockdep_is_held(&task_rq(p)->lock)));
-+#endif
-+	/*
-+	 * Clearly, migrating tasks to offline CPUs is a fairly daft thing.
-+	 */
-+	WARN_ON_ONCE(!cpu_online(new_cpu));
-+#endif
-+	if (task_cpu(p) == new_cpu)
-+		return;
-+	trace_sched_migrate_task(p, new_cpu);
-+	rseq_migrate(p);
-+	perf_event_task_migrate(p);
-+
-+	__set_task_cpu(p, new_cpu);
-+}
-+
-+static inline bool is_per_cpu_kthread(struct task_struct *p)
-+{
-+	return ((p->flags & PF_KTHREAD) && (1 == p->nr_cpus_allowed));
-+}
-+
-+/*
-+ * Per-CPU kthreads are allowed to run on !active && online CPUs, see
-+ * __set_cpus_allowed_ptr() and select_fallback_rq().
-+ */
-+static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
-+{
-+	if (!cpumask_test_cpu(cpu, p->cpus_ptr))
-+		return false;
-+
-+	if (is_per_cpu_kthread(p))
-+		return cpu_online(cpu);
-+
-+	return cpu_active(cpu);
-+}
-+
-+/*
-+ * This is how migration works:
-+ *
-+ * 1) we invoke migration_cpu_stop() on the target CPU using
-+ *    stop_one_cpu().
-+ * 2) stopper starts to run (implicitly forcing the migrated thread
-+ *    off the CPU)
-+ * 3) it checks whether the migrated task is still in the wrong runqueue.
-+ * 4) if it's in the wrong runqueue then the migration thread removes
-+ *    it and puts it into the right queue.
-+ * 5) stopper completes and stop_one_cpu() returns and the migration
-+ *    is done.
-+ */
-+
-+/*
-+ * move_queued_task - move a queued task to new rq.
-+ *
-+ * Returns (locked) new rq. Old rq's lock is released.
-+ */
-+static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int
-+				   new_cpu)
-+{
-+	lockdep_assert_held(&rq->lock);
-+
-+	WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING);
-+	dequeue_task(p, rq, 0);
-+	set_task_cpu(p, new_cpu);
-+	raw_spin_unlock(&rq->lock);
-+
-+	rq = cpu_rq(new_cpu);
-+
-+	raw_spin_lock(&rq->lock);
-+	BUG_ON(task_cpu(p) != new_cpu);
-+	enqueue_task(p, rq, 0);
-+	p->on_rq = TASK_ON_RQ_QUEUED;
-+	check_preempt_curr(rq);
-+
-+	return rq;
-+}
-+
-+struct migration_arg {
-+	struct task_struct *task;
-+	int dest_cpu;
-+};
-+
-+/*
-+ * Move (not current) task off this CPU, onto the destination CPU. We're doing
-+ * this because either it can't run here any more (set_cpus_allowed()
-+ * away from this CPU, or CPU going down), or because we're
-+ * attempting to rebalance this task on exec (sched_exec).
-+ *
-+ * So we race with normal scheduler movements, but that's OK, as long
-+ * as the task is no longer on this CPU.
-+ */
-+static struct rq *__migrate_task(struct rq *rq, struct task_struct *p, int
-+				 dest_cpu)
-+{
-+	/* Affinity changed (again). */
-+	if (!is_cpu_allowed(p, dest_cpu))
-+		return rq;
-+
-+	update_rq_clock(rq);
-+	return move_queued_task(rq, p, dest_cpu);
-+}
-+
-+/*
-+ * migration_cpu_stop - this will be executed by a highprio stopper thread
-+ * and performs thread migration by bumping thread off CPU then
-+ * 'pushing' onto another runqueue.
-+ */
-+static int migration_cpu_stop(void *data)
-+{
-+	struct migration_arg *arg = data;
-+	struct task_struct *p = arg->task;
-+	struct rq *rq = this_rq();
-+
-+	/*
-+	 * The original target CPU might have gone down and we might
-+	 * be on another CPU but it doesn't matter.
-+	 */
-+	local_irq_disable();
-+	/*
-+	 * We need to explicitly wake pending tasks before running
-+	 * __migrate_task() such that we will not miss enforcing cpus_ptr
-+	 * during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test.
-+	 */
-+	flush_smp_call_function_from_idle();
-+
-+	raw_spin_lock(&p->pi_lock);
-+	raw_spin_lock(&rq->lock);
-+	/*
-+	 * If task_rq(p) != rq, it cannot be migrated here, because we're
-+	 * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because
-+	 * we're holding p->pi_lock.
-+	 */
-+	if (task_rq(p) == rq && task_on_rq_queued(p))
-+		rq = __migrate_task(rq, p, arg->dest_cpu);
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock(&p->pi_lock);
-+
-+	local_irq_enable();
-+	return 0;
-+}
-+
-+static inline void
-+set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	cpumask_copy(&p->cpus_mask, new_mask);
-+	p->nr_cpus_allowed = cpumask_weight(new_mask);
-+}
-+
-+void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	set_cpus_allowed_common(p, new_mask);
-+}
-+#endif
-+
-+/**
-+ * task_curr - is this task currently executing on a CPU?
-+ * @p: the task in question.
-+ *
-+ * Return: 1 if the task is currently executing. 0 otherwise.
-+ */
-+inline int task_curr(const struct task_struct *p)
-+{
-+	return cpu_curr(task_cpu(p)) == p;
-+}
-+
-+#ifdef CONFIG_SMP
-+/*
-+ * wait_task_inactive - wait for a thread to unschedule.
-+ *
-+ * If @match_state is nonzero, it's the @p->state value just checked and
-+ * not expected to change.  If it changes, i.e. @p might have woken up,
-+ * then return zero.  When we succeed in waiting for @p to be off its CPU,
-+ * we return a positive number (its total switch count).  If a second call
-+ * a short while later returns the same number, the caller can be sure that
-+ * @p has remained unscheduled the whole time.
-+ *
-+ * The caller must ensure that the task *will* unschedule sometime soon,
-+ * else this function might spin for a *long* time. This function can't
-+ * be called with interrupts off, or it may introduce deadlock with
-+ * smp_call_function() if an IPI is sent by the same process we are
-+ * waiting to become inactive.
-+ */
-+unsigned long wait_task_inactive(struct task_struct *p, long match_state)
-+{
-+	unsigned long flags;
-+	bool running, on_rq;
-+	unsigned long ncsw;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+
-+	for (;;) {
-+		rq = task_rq(p);
-+
-+		/*
-+		 * If the task is actively running on another CPU
-+		 * still, just relax and busy-wait without holding
-+		 * any locks.
-+		 *
-+		 * NOTE! Since we don't hold any locks, it's not
-+		 * even sure that "rq" stays as the right runqueue!
-+		 * But we don't care, since this will return false
-+		 * if the runqueue has changed and p is actually now
-+		 * running somewhere else!
-+		 */
-+		while (task_running(p) && p == rq->curr) {
-+			if (match_state && unlikely(p->state != match_state))
-+				return 0;
-+			cpu_relax();
-+		}
-+
-+		/*
-+		 * Ok, time to look more closely! We need the rq
-+		 * lock now, to be *sure*. If we're wrong, we'll
-+		 * just go back and repeat.
-+		 */
-+		task_access_lock_irqsave(p, &lock, &flags);
-+		trace_sched_wait_task(p);
-+		running = task_running(p);
-+		on_rq = p->on_rq;
-+		ncsw = 0;
-+		if (!match_state || p->state == match_state)
-+			ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
-+		task_access_unlock_irqrestore(p, lock, &flags);
-+
-+		/*
-+		 * If it changed from the expected state, bail out now.
-+		 */
-+		if (unlikely(!ncsw))
-+			break;
-+
-+		/*
-+		 * Was it really running after all now that we
-+		 * checked with the proper locks actually held?
-+		 *
-+		 * Oops. Go back and try again..
-+		 */
-+		if (unlikely(running)) {
-+			cpu_relax();
-+			continue;
-+		}
-+
-+		/*
-+		 * It's not enough that it's not actively running,
-+		 * it must be off the runqueue _entirely_, and not
-+		 * preempted!
-+		 *
-+		 * So if it was still runnable (but just not actively
-+		 * running right now), it's preempted, and we should
-+		 * yield - it could be a while.
-+		 */
-+		if (unlikely(on_rq)) {
-+			ktime_t to = NSEC_PER_SEC / HZ;
-+
-+			set_current_state(TASK_UNINTERRUPTIBLE);
-+			schedule_hrtimeout(&to, HRTIMER_MODE_REL);
-+			continue;
-+		}
-+
-+		/*
-+		 * Ahh, all good. It wasn't running, and it wasn't
-+		 * runnable, which means that it will never become
-+		 * running in the future either. We're all done!
-+		 */
-+		break;
-+	}
-+
-+	return ncsw;
-+}
-+
-+/***
-+ * kick_process - kick a running thread to enter/exit the kernel
-+ * @p: the to-be-kicked thread
-+ *
-+ * Cause a process which is running on another CPU to enter
-+ * kernel-mode, without any delay. (to get signals handled.)
-+ *
-+ * NOTE: this function doesn't have to take the runqueue lock,
-+ * because all it wants to ensure is that the remote task enters
-+ * the kernel. If the IPI races and the task has been migrated
-+ * to another CPU then no harm is done and the purpose has been
-+ * achieved as well.
-+ */
-+void kick_process(struct task_struct *p)
-+{
-+	int cpu;
-+
-+	preempt_disable();
-+	cpu = task_cpu(p);
-+	if ((cpu != smp_processor_id()) && task_curr(p))
-+		smp_send_reschedule(cpu);
-+	preempt_enable();
-+}
-+EXPORT_SYMBOL_GPL(kick_process);
-+
-+/*
-+ * ->cpus_ptr is protected by both rq->lock and p->pi_lock
-+ *
-+ * A few notes on cpu_active vs cpu_online:
-+ *
-+ *  - cpu_active must be a subset of cpu_online
-+ *
-+ *  - on CPU-up we allow per-CPU kthreads on the online && !active CPU,
-+ *    see __set_cpus_allowed_ptr(). At this point the newly online
-+ *    CPU isn't yet part of the sched domains, and balancing will not
-+ *    see it.
-+ *
-+ *  - on cpu-down we clear cpu_active() to mask the sched domains and
-+ *    avoid the load balancer to place new tasks on the to be removed
-+ *    CPU. Existing tasks will remain running there and will be taken
-+ *    off.
-+ *
-+ * This means that fallback selection must not select !active CPUs.
-+ * And can assume that any active CPU must be online. Conversely
-+ * select_task_rq() below may allow selection of !active CPUs in order
-+ * to satisfy the above rules.
-+ */
-+static int select_fallback_rq(int cpu, struct task_struct *p)
-+{
-+	int nid = cpu_to_node(cpu);
-+	const struct cpumask *nodemask = NULL;
-+	enum { cpuset, possible, fail } state = cpuset;
-+	int dest_cpu;
-+
-+	/*
-+	 * If the node that the CPU is on has been offlined, cpu_to_node()
-+	 * will return -1. There is no CPU on the node, and we should
-+	 * select the CPU on the other node.
-+	 */
-+	if (nid != -1) {
-+		nodemask = cpumask_of_node(nid);
-+
-+		/* Look for allowed, online CPU in same node. */
-+		for_each_cpu(dest_cpu, nodemask) {
-+			if (!cpu_active(dest_cpu))
-+				continue;
-+			if (cpumask_test_cpu(dest_cpu, p->cpus_ptr))
-+				return dest_cpu;
-+		}
-+	}
-+
-+	for (;;) {
-+		/* Any allowed, online CPU? */
-+		for_each_cpu(dest_cpu, p->cpus_ptr) {
-+			if (!is_cpu_allowed(p, dest_cpu))
-+				continue;
-+			goto out;
-+		}
-+
-+		/* No more Mr. Nice Guy. */
-+		switch (state) {
-+		case cpuset:
-+			if (IS_ENABLED(CONFIG_CPUSETS)) {
-+				cpuset_cpus_allowed_fallback(p);
-+				state = possible;
-+				break;
-+			}
-+			/* Fall-through */
-+		case possible:
-+			do_set_cpus_allowed(p, cpu_possible_mask);
-+			state = fail;
-+			break;
-+
-+		case fail:
-+			BUG();
-+			break;
-+		}
-+	}
-+
-+out:
-+	if (state != cpuset) {
-+		/*
-+		 * Don't tell them about moving exiting tasks or
-+		 * kernel threads (both mm NULL), since they never
-+		 * leave kernel.
-+		 */
-+		if (p->mm && printk_ratelimit()) {
-+			printk_deferred("process %d (%s) no longer affine to cpu%d\n",
-+					task_pid_nr(p), p->comm, cpu);
-+		}
-+	}
-+
-+	return dest_cpu;
-+}
-+
-+static inline int select_task_rq(struct task_struct *p, struct rq *rq)
-+{
-+	cpumask_t chk_mask, tmp;
-+
-+	if (unlikely(!cpumask_and(&chk_mask, p->cpus_ptr, cpu_online_mask)))
-+		return select_fallback_rq(task_cpu(p), p);
-+
-+	if (
-+#ifdef CONFIG_SCHED_SMT
-+	    cpumask_and(&tmp, &chk_mask, &sched_sg_idle_mask) ||
-+#endif
-+	    cpumask_and(&tmp, &chk_mask, &sched_rq_watermark[IDLE_WM]) ||
-+	    cpumask_and(&tmp, &chk_mask,
-+			&sched_rq_watermark[task_sched_prio(p, rq) + 1]))
-+		return best_mask_cpu(task_cpu(p), &tmp);
-+
-+	return best_mask_cpu(task_cpu(p), &chk_mask);
-+}
-+
-+void sched_set_stop_task(int cpu, struct task_struct *stop)
-+{
-+	struct sched_param stop_param = { .sched_priority = STOP_PRIO };
-+	struct sched_param start_param = { .sched_priority = 0 };
-+	struct task_struct *old_stop = cpu_rq(cpu)->stop;
-+
-+	if (stop) {
-+		/*
-+		 * Make it appear like a SCHED_FIFO task, its something
-+		 * userspace knows about and won't get confused about.
-+		 *
-+		 * Also, it will make PI more or less work without too
-+		 * much confusion -- but then, stop work should not
-+		 * rely on PI working anyway.
-+		 */
-+		sched_setscheduler_nocheck(stop, SCHED_FIFO, &stop_param);
-+	}
-+
-+	cpu_rq(cpu)->stop = stop;
-+
-+	if (old_stop) {
-+		/*
-+		 * Reset it back to a normal scheduling policy so that
-+		 * it can die in pieces.
-+		 */
-+		sched_setscheduler_nocheck(old_stop, SCHED_NORMAL, &start_param);
-+	}
-+}
-+
-+/*
-+ * Change a given task's CPU affinity. Migrate the thread to a
-+ * proper CPU and schedule it away if the CPU it's executing on
-+ * is removed from the allowed bitmask.
-+ *
-+ * NOTE: the caller must have a valid reference to the task, the
-+ * task must not exit() & deallocate itself prematurely. The
-+ * call is not atomic; no spinlocks may be held.
-+ */
-+static int __set_cpus_allowed_ptr(struct task_struct *p,
-+				  const struct cpumask *new_mask, bool check)
-+{
-+	const struct cpumask *cpu_valid_mask = cpu_active_mask;
-+	int dest_cpu;
-+	unsigned long flags;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+	int ret = 0;
-+
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	rq = __task_access_lock(p, &lock);
-+
-+	if (p->flags & PF_KTHREAD) {
-+		/*
-+		 * Kernel threads are allowed on online && !active CPUs
-+		 */
-+		cpu_valid_mask = cpu_online_mask;
-+	}
-+
-+	/*
-+	 * Must re-check here, to close a race against __kthread_bind(),
-+	 * sched_setaffinity() is not guaranteed to observe the flag.
-+	 */
-+	if (check && (p->flags & PF_NO_SETAFFINITY)) {
-+		ret = -EINVAL;
-+		goto out;
-+	}
-+
-+	if (cpumask_equal(&p->cpus_mask, new_mask))
-+		goto out;
-+
-+	dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
-+	if (dest_cpu >= nr_cpu_ids) {
-+		ret = -EINVAL;
-+		goto out;
-+	}
-+
-+	do_set_cpus_allowed(p, new_mask);
-+
-+	if (p->flags & PF_KTHREAD) {
-+		/*
-+		 * For kernel threads that do indeed end up on online &&
-+		 * !active we want to ensure they are strict per-CPU threads.
-+		 */
-+		WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) &&
-+			!cpumask_intersects(new_mask, cpu_active_mask) &&
-+			p->nr_cpus_allowed != 1);
-+	}
-+
-+	/* Can the task run on the task's current CPU? If so, we're done */
-+	if (cpumask_test_cpu(task_cpu(p), new_mask))
-+		goto out;
-+
-+	if (task_running(p) || p->state == TASK_WAKING) {
-+		struct migration_arg arg = { p, dest_cpu };
-+
-+		/* Need help from migration thread: drop lock and wait. */
-+		__task_access_unlock(p, lock);
-+		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+		stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
-+		return 0;
-+	}
-+	if (task_on_rq_queued(p)) {
-+		/*
-+		 * OK, since we're going to drop the lock immediately
-+		 * afterwards anyway.
-+		 */
-+		update_rq_clock(rq);
-+		rq = move_queued_task(rq, p, dest_cpu);
-+		lock = &rq->lock;
-+	}
-+
-+out:
-+	__task_access_unlock(p, lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+
-+	return ret;
-+}
-+
-+int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	return __set_cpus_allowed_ptr(p, new_mask, false);
-+}
-+EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
-+
-+#else /* CONFIG_SMP */
-+
-+static inline int select_task_rq(struct task_struct *p, struct rq *rq)
-+{
-+	return 0;
-+}
-+
-+static inline int
-+__set_cpus_allowed_ptr(struct task_struct *p,
-+		       const struct cpumask *new_mask, bool check)
-+{
-+	return set_cpus_allowed_ptr(p, new_mask);
-+}
-+
-+#endif /* CONFIG_SMP */
-+
-+static void
-+ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
-+{
-+	struct rq *rq;
-+
-+	if (!schedstat_enabled())
-+		return;
-+
-+	rq= this_rq();
-+
-+#ifdef CONFIG_SMP
-+	if (cpu == rq->cpu)
-+		__schedstat_inc(rq->ttwu_local);
-+	else {
-+		/** Alt schedule FW ToDo:
-+		 * How to do ttwu_wake_remote
-+		 */
-+	}
-+#endif /* CONFIG_SMP */
-+
-+	__schedstat_inc(rq->ttwu_count);
-+}
-+
-+/*
-+ * Mark the task runnable and perform wakeup-preemption.
-+ */
-+static inline void
-+ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
-+{
-+	check_preempt_curr(rq);
-+	p->state = TASK_RUNNING;
-+	trace_sched_wakeup(p);
-+}
-+
-+static inline void
-+ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags)
-+{
-+	if (p->sched_contributes_to_load)
-+		rq->nr_uninterruptible--;
-+
-+	activate_task(p, rq);
-+	ttwu_do_wakeup(rq, p, 0);
-+}
-+
-+static int ttwu_remote(struct task_struct *p, int wake_flags)
-+{
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+	int ret = 0;
-+
-+	rq = __task_access_lock(p, &lock);
-+	if (task_on_rq_queued(p)) {
-+		/* check_preempt_curr() may use rq clock */
-+		update_rq_clock(rq);
-+		ttwu_do_wakeup(rq, p, wake_flags);
-+		ret = 1;
-+	}
-+	__task_access_unlock(p, lock);
-+
-+	return ret;
-+}
-+
-+#ifdef CONFIG_SMP
-+void sched_ttwu_pending(void *arg)
-+{
-+	struct llist_node *llist = arg;
-+	struct rq *rq = this_rq();
-+	struct task_struct *p, *t;
-+	struct rq_flags rf;
-+
-+	if (!llist)
-+		return;
-+
-+	/*
-+	 * rq::ttwu_pending racy indication of out-standing wakeups.
-+	 * Races such that false-negatives are possible, since they
-+	 * are shorter lived that false-positives would be.
-+	 */
-+	WRITE_ONCE(rq->ttwu_pending, 0);
-+
-+	rq_lock_irqsave(rq, &rf);
-+	update_rq_clock(rq);
-+
-+	llist_for_each_entry_safe(p, t, llist, wake_entry.llist) {
-+		if (WARN_ON_ONCE(p->on_cpu))
-+			smp_cond_load_acquire(&p->on_cpu, !VAL);
-+
-+		if (WARN_ON_ONCE(task_cpu(p) != cpu_of(rq)))
-+			set_task_cpu(p, cpu_of(rq));
-+
-+		ttwu_do_activate(rq, p, p->sched_remote_wakeup ? WF_MIGRATED : 0);
-+	}
-+
-+	rq_unlock_irqrestore(rq, &rf);
-+}
-+
-+void send_call_function_single_ipi(int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	if (!set_nr_if_polling(rq->idle))
-+		arch_send_call_function_single_ipi(cpu);
-+	else
-+		trace_sched_wake_idle_without_ipi(cpu);
-+}
-+
-+/*
-+ * Queue a task on the target CPUs wake_list and wake the CPU via IPI if
-+ * necessary. The wakee CPU on receipt of the IPI will queue the task
-+ * via sched_ttwu_wakeup() for activation so the wakee incurs the cost
-+ * of the wakeup instead of the waker.
-+ */
-+static void __ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	p->sched_remote_wakeup = !!(wake_flags & WF_MIGRATED);
-+
-+	WRITE_ONCE(rq->ttwu_pending, 1);
-+	__smp_call_single_queue(cpu, &p->wake_entry.llist);
-+}
-+
-+static inline bool ttwu_queue_cond(int cpu, int wake_flags)
-+{
-+	/*
-+	 * If the CPU does not share cache, then queue the task on the
-+	 * remote rqs wakelist to avoid accessing remote data.
-+	 */
-+	if (!cpus_share_cache(smp_processor_id(), cpu))
-+		return true;
-+
-+	/*
-+	 * If the task is descheduling and the only running task on the
-+	 * CPU then use the wakelist to offload the task activation to
-+	 * the soon-to-be-idle CPU as the current CPU is likely busy.
-+	 * nr_running is checked to avoid unnecessary task stacking.
-+	 */
-+	if ((wake_flags & WF_ON_CPU) && cpu_rq(cpu)->nr_running <= 1)
-+		return true;
-+
-+	return false;
-+}
-+
-+static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
-+{
-+	if (ttwu_queue_cond(cpu, wake_flags)) {
-+		if (WARN_ON_ONCE(cpu == smp_processor_id()))
-+			return false;
-+
-+		sched_clock_cpu(cpu); /* Sync clocks across CPUs */
-+		__ttwu_queue_wakelist(p, cpu, wake_flags);
-+		return true;
-+	}
-+
-+	return false;
-+}
-+
-+void wake_up_if_idle(int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	rcu_read_lock();
-+
-+	if (!is_idle_task(rcu_dereference(rq->curr)))
-+		goto out;
-+
-+	if (set_nr_if_polling(rq->idle)) {
-+		trace_sched_wake_idle_without_ipi(cpu);
-+	} else {
-+		raw_spin_lock_irqsave(&rq->lock, flags);
-+		if (is_idle_task(rq->curr))
-+			smp_send_reschedule(cpu);
-+		/* Else CPU is not idle, do nothing here */
-+		raw_spin_unlock_irqrestore(&rq->lock, flags);
-+	}
-+
-+out:
-+	rcu_read_unlock();
-+}
-+
-+bool cpus_share_cache(int this_cpu, int that_cpu)
-+{
-+	return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu);
-+}
-+#endif /* CONFIG_SMP */
-+
-+static inline void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+#if defined(CONFIG_SMP)
-+	if (ttwu_queue_wakelist(p, cpu, wake_flags))
-+		return;
-+#endif
-+
-+	raw_spin_lock(&rq->lock);
-+	update_rq_clock(rq);
-+	ttwu_do_activate(rq, p, wake_flags);
-+	raw_spin_unlock(&rq->lock);
-+}
-+
-+/*
-+ * Notes on Program-Order guarantees on SMP systems.
-+ *
-+ *  MIGRATION
-+ *
-+ * The basic program-order guarantee on SMP systems is that when a task [t]
-+ * migrates, all its activity on its old CPU [c0] happens-before any subsequent
-+ * execution on its new CPU [c1].
-+ *
-+ * For migration (of runnable tasks) this is provided by the following means:
-+ *
-+ *  A) UNLOCK of the rq(c0)->lock scheduling out task t
-+ *  B) migration for t is required to synchronize *both* rq(c0)->lock and
-+ *     rq(c1)->lock (if not at the same time, then in that order).
-+ *  C) LOCK of the rq(c1)->lock scheduling in task
-+ *
-+ * Transitivity guarantees that B happens after A and C after B.
-+ * Note: we only require RCpc transitivity.
-+ * Note: the CPU doing B need not be c0 or c1
-+ *
-+ * Example:
-+ *
-+ *   CPU0            CPU1            CPU2
-+ *
-+ *   LOCK rq(0)->lock
-+ *   sched-out X
-+ *   sched-in Y
-+ *   UNLOCK rq(0)->lock
-+ *
-+ *                                   LOCK rq(0)->lock // orders against CPU0
-+ *                                   dequeue X
-+ *                                   UNLOCK rq(0)->lock
-+ *
-+ *                                   LOCK rq(1)->lock
-+ *                                   enqueue X
-+ *                                   UNLOCK rq(1)->lock
-+ *
-+ *                   LOCK rq(1)->lock // orders against CPU2
-+ *                   sched-out Z
-+ *                   sched-in X
-+ *                   UNLOCK rq(1)->lock
-+ *
-+ *
-+ *  BLOCKING -- aka. SLEEP + WAKEUP
-+ *
-+ * For blocking we (obviously) need to provide the same guarantee as for
-+ * migration. However the means are completely different as there is no lock
-+ * chain to provide order. Instead we do:
-+ *
-+ *   1) smp_store_release(X->on_cpu, 0)
-+ *   2) smp_cond_load_acquire(!X->on_cpu)
-+ *
-+ * Example:
-+ *
-+ *   CPU0 (schedule)  CPU1 (try_to_wake_up) CPU2 (schedule)
-+ *
-+ *   LOCK rq(0)->lock LOCK X->pi_lock
-+ *   dequeue X
-+ *   sched-out X
-+ *   smp_store_release(X->on_cpu, 0);
-+ *
-+ *                    smp_cond_load_acquire(&X->on_cpu, !VAL);
-+ *                    X->state = WAKING
-+ *                    set_task_cpu(X,2)
-+ *
-+ *                    LOCK rq(2)->lock
-+ *                    enqueue X
-+ *                    X->state = RUNNING
-+ *                    UNLOCK rq(2)->lock
-+ *
-+ *                                          LOCK rq(2)->lock // orders against CPU1
-+ *                                          sched-out Z
-+ *                                          sched-in X
-+ *                                          UNLOCK rq(2)->lock
-+ *
-+ *                    UNLOCK X->pi_lock
-+ *   UNLOCK rq(0)->lock
-+ *
-+ *
-+ * However; for wakeups there is a second guarantee we must provide, namely we
-+ * must observe the state that lead to our wakeup. That is, not only must our
-+ * task observe its own prior state, it must also observe the stores prior to
-+ * its wakeup.
-+ *
-+ * This means that any means of doing remote wakeups must order the CPU doing
-+ * the wakeup against the CPU the task is going to end up running on. This,
-+ * however, is already required for the regular Program-Order guarantee above,
-+ * since the waking CPU is the one issueing the ACQUIRE (smp_cond_load_acquire).
-+ *
-+ */
-+
-+/***
-+ * try_to_wake_up - wake up a thread
-+ * @p: the thread to be awakened
-+ * @state: the mask of task states that can be woken
-+ * @wake_flags: wake modifier flags (WF_*)
-+ *
-+ * Put it on the run-queue if it's not already there. The "current"
-+ * thread is always on the run-queue (except when the actual
-+ * re-schedule is in progress), and as such you're allowed to do
-+ * the simpler "current->state = TASK_RUNNING" to mark yourself
-+ * runnable without the overhead of this.
-+ *
-+ * Return: %true if @p was woken up, %false if it was already running.
-+ * or @state didn't match @p's state.
-+ */
-+static int try_to_wake_up(struct task_struct *p, unsigned int state,
-+			  int wake_flags)
-+{
-+	unsigned long flags;
-+	int cpu, success = 0;
-+
-+	preempt_disable();
-+	if (p == current) {
-+		/*
-+		 * We're waking current, this means 'p->on_rq' and 'task_cpu(p)
-+		 * == smp_processor_id()'. Together this means we can special
-+		 * case the whole 'p->on_rq && ttwu_remote()' case below
-+		 * without taking any locks.
-+		 *
-+		 * In particular:
-+		 *  - we rely on Program-Order guarantees for all the ordering,
-+		 *  - we're serialized against set_special_state() by virtue of
-+		 *    it disabling IRQs (this allows not taking ->pi_lock).
-+		 */
-+		if (!(p->state & state))
-+			goto out;
-+
-+		success = 1;
-+		trace_sched_waking(p);
-+		p->state = TASK_RUNNING;
-+		trace_sched_wakeup(p);
-+		goto out;
-+	}
-+
-+	/*
-+	 * If we are going to wake up a thread waiting for CONDITION we
-+	 * need to ensure that CONDITION=1 done by the caller can not be
-+	 * reordered with p->state check below. This pairs with mb() in
-+	 * set_current_state() the waiting thread does.
-+	 */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	smp_mb__after_spinlock();
-+	if (!(p->state & state))
-+		goto unlock;
-+
-+	trace_sched_waking(p);
-+
-+	/* We're going to change ->state: */
-+	success = 1;
-+
-+	/*
-+	 * Ensure we load p->on_rq _after_ p->state, otherwise it would
-+	 * be possible to, falsely, observe p->on_rq == 0 and get stuck
-+	 * in smp_cond_load_acquire() below.
-+	 *
-+	 * sched_ttwu_pending()			try_to_wake_up()
-+	 *   STORE p->on_rq = 1			  LOAD p->state
-+	 *   UNLOCK rq->lock
-+	 *
-+	 * __schedule() (switch to task 'p')
-+	 *   LOCK rq->lock			  smp_rmb();
-+	 *   smp_mb__after_spinlock();
-+	 *   UNLOCK rq->lock
-+	 *
-+	 * [task p]
-+	 *   STORE p->state = UNINTERRUPTIBLE	  LOAD p->on_rq
-+	 *
-+	 * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
-+	 * __schedule().  See the comment for smp_mb__after_spinlock().
-+	 *
-+	 * A similar smb_rmb() lives in try_invoke_on_locked_down_task().
-+	 */
-+	smp_rmb();
-+	if (READ_ONCE(p->on_rq) && ttwu_remote(p, wake_flags))
-+		goto unlock;
-+
-+	if (p->in_iowait) {
-+		delayacct_blkio_end(p);
-+		atomic_dec(&task_rq(p)->nr_iowait);
-+	}
-+
-+#ifdef CONFIG_SMP
-+	/*
-+	 * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
-+	 * possible to, falsely, observe p->on_cpu == 0.
-+	 *
-+	 * One must be running (->on_cpu == 1) in order to remove oneself
-+	 * from the runqueue.
-+	 *
-+	 * __schedule() (switch to task 'p')	try_to_wake_up()
-+	 *   STORE p->on_cpu = 1		  LOAD p->on_rq
-+	 *   UNLOCK rq->lock
-+	 *
-+	 * __schedule() (put 'p' to sleep)
-+	 *   LOCK rq->lock			  smp_rmb();
-+	 *   smp_mb__after_spinlock();
-+	 *   STORE p->on_rq = 0			  LOAD p->on_cpu
-+	 *
-+	 * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
-+	 * __schedule().  See the comment for smp_mb__after_spinlock().
-+	 *
-+	 * Form a control-dep-acquire with p->on_rq == 0 above, to ensure
-+	 * schedule()'s deactivate_task() has 'happened' and p will no longer
-+	 * care about it's own p->state. See the comment in __schedule().
-+	 */
-+	smp_acquire__after_ctrl_dep();
-+
-+	/*
-+	 * We're doing the wakeup (@success == 1), they did a dequeue (p->on_rq
-+	 * == 0), which means we need to do an enqueue, change p->state to
-+	 * TASK_WAKING such that we can unlock p->pi_lock before doing the
-+	 * enqueue, such as ttwu_queue_wakelist().
-+	 */
-+	p->state = TASK_WAKING;
-+
-+	/*
-+	 * If the owning (remote) CPU is still in the middle of schedule() with
-+	 * this task as prev, considering queueing p on the remote CPUs wake_list
-+	 * which potentially sends an IPI instead of spinning on p->on_cpu to
-+	 * let the waker make forward progress. This is safe because IRQs are
-+	 * disabled and the IPI will deliver after on_cpu is cleared.
-+	 *
-+	 * Ensure we load task_cpu(p) after p->on_cpu:
-+	 *
-+	 * set_task_cpu(p, cpu);
-+	 *   STORE p->cpu = @cpu
-+	 * __schedule() (switch to task 'p')
-+	 *   LOCK rq->lock
-+	 *   smp_mb__after_spin_lock()          smp_cond_load_acquire(&p->on_cpu)
-+	 *   STORE p->on_cpu = 1                LOAD p->cpu
-+	 *
-+	 * to ensure we observe the correct CPU on which the task is currently
-+	 * scheduling.
-+	 */
-+	if (smp_load_acquire(&p->on_cpu) &&
-+	    ttwu_queue_wakelist(p, task_cpu(p), wake_flags | WF_ON_CPU))
-+		goto unlock;
-+
-+	/*
-+	 * If the owning (remote) CPU is still in the middle of schedule() with
-+	 * this task as prev, wait until its done referencing the task.
-+	 *
-+	 * Pairs with the smp_store_release() in finish_task().
-+	 *
-+	 * This ensures that tasks getting woken will be fully ordered against
-+	 * their previous state and preserve Program Order.
-+	 */
-+	smp_cond_load_acquire(&p->on_cpu, !VAL);
-+
-+	sched_task_ttwu(p);
-+
-+	cpu = select_task_rq(p, this_rq());
-+
-+	if (cpu != task_cpu(p)) {
-+		wake_flags |= WF_MIGRATED;
-+		psi_ttwu_dequeue(p);
-+		set_task_cpu(p, cpu);
-+	}
-+#else
-+	cpu = task_cpu(p);
-+#endif /* CONFIG_SMP */
-+
-+	ttwu_queue(p, cpu, wake_flags);
-+unlock:
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+out:
-+	if (success)
-+		ttwu_stat(p, task_cpu(p), wake_flags);
-+	preempt_enable();
-+
-+	return success;
-+}
-+
-+/**
-+ * try_invoke_on_locked_down_task - Invoke a function on task in fixed state
-+ * @p: Process for which the function is to be invoked.
-+ * @func: Function to invoke.
-+ * @arg: Argument to function.
-+ *
-+ * If the specified task can be quickly locked into a definite state
-+ * (either sleeping or on a given runqueue), arrange to keep it in that
-+ * state while invoking @func(@arg).  This function can use ->on_rq and
-+ * task_curr() to work out what the state is, if required.  Given that
-+ * @func can be invoked with a runqueue lock held, it had better be quite
-+ * lightweight.
-+ *
-+ * Returns:
-+ *	@false if the task slipped out from under the locks.
-+ *	@true if the task was locked onto a runqueue or is sleeping.
-+ *		However, @func can override this by returning @false.
-+ */
-+bool try_invoke_on_locked_down_task(struct task_struct *p, bool (*func)(struct task_struct *t, void *arg), void *arg)
-+{
-+	bool ret = false;
-+	struct rq_flags rf;
-+	struct rq *rq;
-+
-+	lockdep_assert_irqs_enabled();
-+	raw_spin_lock_irq(&p->pi_lock);
-+	if (p->on_rq) {
-+		rq = __task_rq_lock(p, &rf);
-+		if (task_rq(p) == rq)
-+			ret = func(p, arg);
-+		__task_rq_unlock(rq, &rf);
-+	} else {
-+		switch (p->state) {
-+		case TASK_RUNNING:
-+		case TASK_WAKING:
-+			break;
-+		default:
-+			smp_rmb(); // See smp_rmb() comment in try_to_wake_up().
-+			if (!p->on_rq)
-+				ret = func(p, arg);
-+		}
-+	}
-+	raw_spin_unlock_irq(&p->pi_lock);
-+	return ret;
-+}
-+
-+/**
-+ * wake_up_process - Wake up a specific process
-+ * @p: The process to be woken up.
-+ *
-+ * Attempt to wake up the nominated process and move it to the set of runnable
-+ * processes.
-+ *
-+ * Return: 1 if the process was woken up, 0 if it was already running.
-+ *
-+ * This function executes a full memory barrier before accessing the task state.
-+ */
-+int wake_up_process(struct task_struct *p)
-+{
-+	return try_to_wake_up(p, TASK_NORMAL, 0);
-+}
-+EXPORT_SYMBOL(wake_up_process);
-+
-+int wake_up_state(struct task_struct *p, unsigned int state)
-+{
-+	return try_to_wake_up(p, state, 0);
-+}
-+
-+/*
-+ * Perform scheduler related setup for a newly forked process p.
-+ * p is forked by current.
-+ *
-+ * __sched_fork() is basic setup used by init_idle() too:
-+ */
-+static inline void __sched_fork(unsigned long clone_flags, struct task_struct *p)
-+{
-+	p->on_rq			= 0;
-+	p->on_cpu			= 0;
-+	p->utime			= 0;
-+	p->stime			= 0;
-+	p->sched_time			= 0;
-+
-+#ifdef CONFIG_PREEMPT_NOTIFIERS
-+	INIT_HLIST_HEAD(&p->preempt_notifiers);
-+#endif
-+
-+#ifdef CONFIG_COMPACTION
-+	p->capture_control = NULL;
-+#endif
-+#ifdef CONFIG_SMP
-+	p->wake_entry.u_flags = CSD_TYPE_TTWU;
-+#endif
-+}
-+
-+/*
-+ * fork()/clone()-time setup:
-+ */
-+int sched_fork(unsigned long clone_flags, struct task_struct *p)
-+{
-+	unsigned long flags;
-+	struct rq *rq;
-+
-+	__sched_fork(clone_flags, p);
-+	/*
-+	 * We mark the process as NEW here. This guarantees that
-+	 * nobody will actually run it, and a signal or other external
-+	 * event cannot wake it up and insert it on the runqueue either.
-+	 */
-+	p->state = TASK_NEW;
-+
-+	/*
-+	 * Make sure we do not leak PI boosting priority to the child.
-+	 */
-+	p->prio = current->normal_prio;
-+
-+	/*
-+	 * Revert to default priority/policy on fork if requested.
-+	 */
-+	if (unlikely(p->sched_reset_on_fork)) {
-+		if (task_has_rt_policy(p)) {
-+			p->policy = SCHED_NORMAL;
-+			p->static_prio = NICE_TO_PRIO(0);
-+			p->rt_priority = 0;
-+		} else if (PRIO_TO_NICE(p->static_prio) < 0)
-+			p->static_prio = NICE_TO_PRIO(0);
-+
-+		p->prio = p->normal_prio = normal_prio(p);
-+
-+		/*
-+		 * We don't need the reset flag anymore after the fork. It has
-+		 * fulfilled its duty:
-+		 */
-+		p->sched_reset_on_fork = 0;
-+	}
-+
-+	/*
-+	 * The child is not yet in the pid-hash so no cgroup attach races,
-+	 * and the cgroup is pinned to this child due to cgroup_fork()
-+	 * is ran before sched_fork().
-+	 *
-+	 * Silence PROVE_RCU.
-+	 */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	/*
-+	 * Share the timeslice between parent and child, thus the
-+	 * total amount of pending timeslices in the system doesn't change,
-+	 * resulting in more scheduling fairness.
-+	 */
-+	rq = this_rq();
-+	raw_spin_lock(&rq->lock);
-+
-+	rq->curr->time_slice /= 2;
-+	p->time_slice = rq->curr->time_slice;
-+#ifdef CONFIG_SCHED_HRTICK
-+	hrtick_start(rq, rq->curr->time_slice);
-+#endif
-+
-+	if (p->time_slice < RESCHED_NS) {
-+		p->time_slice = sched_timeslice_ns;
-+		resched_curr(rq);
-+	}
-+	sched_task_fork(p, rq);
-+	raw_spin_unlock(&rq->lock);
-+
-+	rseq_migrate(p);
-+	/*
-+	 * We're setting the CPU for the first time, we don't migrate,
-+	 * so use __set_task_cpu().
-+	 */
-+	__set_task_cpu(p, cpu_of(rq));
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+
-+#ifdef CONFIG_SCHED_INFO
-+	if (unlikely(sched_info_on()))
-+		memset(&p->sched_info, 0, sizeof(p->sched_info));
-+#endif
-+	init_task_preempt_count(p);
-+
-+	return 0;
-+}
-+
-+#ifdef CONFIG_SCHEDSTATS
-+
-+DEFINE_STATIC_KEY_FALSE(sched_schedstats);
-+static bool __initdata __sched_schedstats = false;
-+
-+static void set_schedstats(bool enabled)
-+{
-+	if (enabled)
-+		static_branch_enable(&sched_schedstats);
-+	else
-+		static_branch_disable(&sched_schedstats);
-+}
-+
-+void force_schedstat_enabled(void)
-+{
-+	if (!schedstat_enabled()) {
-+		pr_info("kernel profiling enabled schedstats, disable via kernel.sched_schedstats.\n");
-+		static_branch_enable(&sched_schedstats);
-+	}
-+}
-+
-+static int __init setup_schedstats(char *str)
-+{
-+	int ret = 0;
-+	if (!str)
-+		goto out;
-+
-+	/*
-+	 * This code is called before jump labels have been set up, so we can't
-+	 * change the static branch directly just yet.  Instead set a temporary
-+	 * variable so init_schedstats() can do it later.
-+	 */
-+	if (!strcmp(str, "enable")) {
-+		__sched_schedstats = true;
-+		ret = 1;
-+	} else if (!strcmp(str, "disable")) {
-+		__sched_schedstats = false;
-+		ret = 1;
-+	}
-+out:
-+	if (!ret)
-+		pr_warn("Unable to parse schedstats=\n");
-+
-+	return ret;
-+}
-+__setup("schedstats=", setup_schedstats);
-+
-+static void __init init_schedstats(void)
-+{
-+	set_schedstats(__sched_schedstats);
-+}
-+
-+#ifdef CONFIG_PROC_SYSCTL
-+int sysctl_schedstats(struct ctl_table *table, int write,
-+			 void __user *buffer, size_t *lenp, loff_t *ppos)
-+{
-+	struct ctl_table t;
-+	int err;
-+	int state = static_branch_likely(&sched_schedstats);
-+
-+	if (write && !capable(CAP_SYS_ADMIN))
-+		return -EPERM;
-+
-+	t = *table;
-+	t.data = &state;
-+	err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
-+	if (err < 0)
-+		return err;
-+	if (write)
-+		set_schedstats(state);
-+	return err;
-+}
-+#endif /* CONFIG_PROC_SYSCTL */
-+#else  /* !CONFIG_SCHEDSTATS */
-+static inline void init_schedstats(void) {}
-+#endif /* CONFIG_SCHEDSTATS */
-+
-+/*
-+ * wake_up_new_task - wake up a newly created task for the first time.
-+ *
-+ * This function will do some initial scheduler statistics housekeeping
-+ * that must be done for every newly created context, then puts the task
-+ * on the runqueue and wakes it.
-+ */
-+void wake_up_new_task(struct task_struct *p)
-+{
-+	unsigned long flags;
-+	struct rq *rq;
-+
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+
-+	p->state = TASK_RUNNING;
-+
-+	rq = cpu_rq(select_task_rq(p, this_rq()));
-+#ifdef CONFIG_SMP
-+	rseq_migrate(p);
-+	/*
-+	 * Fork balancing, do it here and not earlier because:
-+	 * - cpus_ptr can change in the fork path
-+	 * - any previously selected CPU might disappear through hotplug
-+	 * Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq,
-+	 * as we're not fully set-up yet.
-+	 */
-+	__set_task_cpu(p, cpu_of(rq));
-+#endif
-+
-+	raw_spin_lock(&rq->lock);
-+
-+	update_rq_clock(rq);
-+	activate_task(p, rq);
-+	trace_sched_wakeup_new(p);
-+	check_preempt_curr(rq);
-+
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+}
-+
-+#ifdef CONFIG_PREEMPT_NOTIFIERS
-+
-+static DEFINE_STATIC_KEY_FALSE(preempt_notifier_key);
-+
-+void preempt_notifier_inc(void)
-+{
-+	static_branch_inc(&preempt_notifier_key);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_inc);
-+
-+void preempt_notifier_dec(void)
-+{
-+	static_branch_dec(&preempt_notifier_key);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_dec);
-+
-+/**
-+ * preempt_notifier_register - tell me when current is being preempted & rescheduled
-+ * @notifier: notifier struct to register
-+ */
-+void preempt_notifier_register(struct preempt_notifier *notifier)
-+{
-+	if (!static_branch_unlikely(&preempt_notifier_key))
-+		WARN(1, "registering preempt_notifier while notifiers disabled\n");
-+
-+	hlist_add_head(&notifier->link, &current->preempt_notifiers);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_register);
-+
-+/**
-+ * preempt_notifier_unregister - no longer interested in preemption notifications
-+ * @notifier: notifier struct to unregister
-+ *
-+ * This is *not* safe to call from within a preemption notifier.
-+ */
-+void preempt_notifier_unregister(struct preempt_notifier *notifier)
-+{
-+	hlist_del(&notifier->link);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_unregister);
-+
-+static void __fire_sched_in_preempt_notifiers(struct task_struct *curr)
-+{
-+	struct preempt_notifier *notifier;
-+
-+	hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
-+		notifier->ops->sched_in(notifier, raw_smp_processor_id());
-+}
-+
-+static __always_inline void fire_sched_in_preempt_notifiers(struct task_struct *curr)
-+{
-+	if (static_branch_unlikely(&preempt_notifier_key))
-+		__fire_sched_in_preempt_notifiers(curr);
-+}
-+
-+static void
-+__fire_sched_out_preempt_notifiers(struct task_struct *curr,
-+				   struct task_struct *next)
-+{
-+	struct preempt_notifier *notifier;
-+
-+	hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
-+		notifier->ops->sched_out(notifier, next);
-+}
-+
-+static __always_inline void
-+fire_sched_out_preempt_notifiers(struct task_struct *curr,
-+				 struct task_struct *next)
-+{
-+	if (static_branch_unlikely(&preempt_notifier_key))
-+		__fire_sched_out_preempt_notifiers(curr, next);
-+}
-+
-+#else /* !CONFIG_PREEMPT_NOTIFIERS */
-+
-+static inline void fire_sched_in_preempt_notifiers(struct task_struct *curr)
-+{
-+}
-+
-+static inline void
-+fire_sched_out_preempt_notifiers(struct task_struct *curr,
-+				 struct task_struct *next)
-+{
-+}
-+
-+#endif /* CONFIG_PREEMPT_NOTIFIERS */
-+
-+static inline void prepare_task(struct task_struct *next)
-+{
-+	/*
-+	 * Claim the task as running, we do this before switching to it
-+	 * such that any running task will have this set.
-+	 */
-+	next->on_cpu = 1;
-+}
-+
-+static inline void finish_task(struct task_struct *prev)
-+{
-+#ifdef CONFIG_SMP
-+	/*
-+	 * After ->on_cpu is cleared, the task can be moved to a different CPU.
-+	 * We must ensure this doesn't happen until the switch is completely
-+	 * finished.
-+	 *
-+	 * In particular, the load of prev->state in finish_task_switch() must
-+	 * happen before this.
-+	 *
-+	 * Pairs with the smp_cond_load_acquire() in try_to_wake_up().
-+	 */
-+	smp_store_release(&prev->on_cpu, 0);
-+#else
-+	prev->on_cpu = 0;
-+#endif
-+}
-+
-+static inline void
-+prepare_lock_switch(struct rq *rq, struct task_struct *next)
-+{
-+	/*
-+	 * Since the runqueue lock will be released by the next
-+	 * task (which is an invalid locking op but in the case
-+	 * of the scheduler it's an obvious special-case), so we
-+	 * do an early lockdep release here:
-+	 */
-+	spin_release(&rq->lock.dep_map, _THIS_IP_);
-+#ifdef CONFIG_DEBUG_SPINLOCK
-+	/* this is a valid case when another task releases the spinlock */
-+	rq->lock.owner = next;
-+#endif
-+}
-+
-+static inline void finish_lock_switch(struct rq *rq)
-+{
-+	/*
-+	 * If we are tracking spinlock dependencies then we have to
-+	 * fix up the runqueue lock - which gets 'carried over' from
-+	 * prev into current:
-+	 */
-+	spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
-+	raw_spin_unlock_irq(&rq->lock);
-+}
-+
-+/**
-+ * prepare_task_switch - prepare to switch tasks
-+ * @rq: the runqueue preparing to switch
-+ * @next: the task we are going to switch to.
-+ *
-+ * This is called with the rq lock held and interrupts off. It must
-+ * be paired with a subsequent finish_task_switch after the context
-+ * switch.
-+ *
-+ * prepare_task_switch sets up locking and calls architecture specific
-+ * hooks.
-+ */
-+static inline void
-+prepare_task_switch(struct rq *rq, struct task_struct *prev,
-+		    struct task_struct *next)
-+{
-+	kcov_prepare_switch(prev);
-+	sched_info_switch(rq, prev, next);
-+	perf_event_task_sched_out(prev, next);
-+	rseq_preempt(prev);
-+	fire_sched_out_preempt_notifiers(prev, next);
-+	prepare_task(next);
-+	prepare_arch_switch(next);
-+}
-+
-+/**
-+ * finish_task_switch - clean up after a task-switch
-+ * @rq: runqueue associated with task-switch
-+ * @prev: the thread we just switched away from.
-+ *
-+ * finish_task_switch must be called after the context switch, paired
-+ * with a prepare_task_switch call before the context switch.
-+ * finish_task_switch will reconcile locking set up by prepare_task_switch,
-+ * and do any other architecture-specific cleanup actions.
-+ *
-+ * Note that we may have delayed dropping an mm in context_switch(). If
-+ * so, we finish that here outside of the runqueue lock.  (Doing it
-+ * with the lock held can cause deadlocks; see schedule() for
-+ * details.)
-+ *
-+ * The context switch have flipped the stack from under us and restored the
-+ * local variables which were saved when this task called schedule() in the
-+ * past. prev == current is still correct but we need to recalculate this_rq
-+ * because prev may have moved to another CPU.
-+ */
-+static struct rq *finish_task_switch(struct task_struct *prev)
-+	__releases(rq->lock)
-+{
-+	struct rq *rq = this_rq();
-+	struct mm_struct *mm = rq->prev_mm;
-+	long prev_state;
-+
-+	/*
-+	 * The previous task will have left us with a preempt_count of 2
-+	 * because it left us after:
-+	 *
-+	 *	schedule()
-+	 *	  preempt_disable();			// 1
-+	 *	  __schedule()
-+	 *	    raw_spin_lock_irq(&rq->lock)	// 2
-+	 *
-+	 * Also, see FORK_PREEMPT_COUNT.
-+	 */
-+	if (WARN_ONCE(preempt_count() != 2*PREEMPT_DISABLE_OFFSET,
-+		      "corrupted preempt_count: %s/%d/0x%x\n",
-+		      current->comm, current->pid, preempt_count()))
-+		preempt_count_set(FORK_PREEMPT_COUNT);
-+
-+	rq->prev_mm = NULL;
-+
-+	/*
-+	 * A task struct has one reference for the use as "current".
-+	 * If a task dies, then it sets TASK_DEAD in tsk->state and calls
-+	 * schedule one last time. The schedule call will never return, and
-+	 * the scheduled task must drop that reference.
-+	 *
-+	 * We must observe prev->state before clearing prev->on_cpu (in
-+	 * finish_task), otherwise a concurrent wakeup can get prev
-+	 * running on another CPU and we could rave with its RUNNING -> DEAD
-+	 * transition, resulting in a double drop.
-+	 */
-+	prev_state = prev->state;
-+	vtime_task_switch(prev);
-+	perf_event_task_sched_in(prev, current);
-+	finish_task(prev);
-+	finish_lock_switch(rq);
-+	finish_arch_post_lock_switch();
-+	kcov_finish_switch(current);
-+
-+	fire_sched_in_preempt_notifiers(current);
-+	/*
-+	 * When switching through a kernel thread, the loop in
-+	 * membarrier_{private,global}_expedited() may have observed that
-+	 * kernel thread and not issued an IPI. It is therefore possible to
-+	 * schedule between user->kernel->user threads without passing though
-+	 * switch_mm(). Membarrier requires a barrier after storing to
-+	 * rq->curr, before returning to userspace, so provide them here:
-+	 *
-+	 * - a full memory barrier for {PRIVATE,GLOBAL}_EXPEDITED, implicitly
-+	 *   provided by mmdrop(),
-+	 * - a sync_core for SYNC_CORE.
-+	 */
-+	if (mm) {
-+		membarrier_mm_sync_core_before_usermode(mm);
-+		mmdrop(mm);
-+	}
-+	if (unlikely(prev_state == TASK_DEAD)) {
-+		/*
-+		 * Remove function-return probe instances associated with this
-+		 * task and put them back on the free list.
-+		 */
-+		kprobe_flush_task(prev);
-+
-+		/* Task is done with its stack. */
-+		put_task_stack(prev);
-+
-+		put_task_struct_rcu_user(prev);
-+	}
-+
-+	tick_nohz_task_switch();
-+	return rq;
-+}
-+
-+/**
-+ * schedule_tail - first thing a freshly forked thread must call.
-+ * @prev: the thread we just switched away from.
-+ */
-+asmlinkage __visible void schedule_tail(struct task_struct *prev)
-+	__releases(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	/*
-+	 * New tasks start with FORK_PREEMPT_COUNT, see there and
-+	 * finish_task_switch() for details.
-+	 *
-+	 * finish_task_switch() will drop rq->lock() and lower preempt_count
-+	 * and the preempt_enable() will end up enabling preemption (on
-+	 * PREEMPT_COUNT kernels).
-+	 */
-+
-+	rq = finish_task_switch(prev);
-+	preempt_enable();
-+
-+	if (current->set_child_tid)
-+		put_user(task_pid_vnr(current), current->set_child_tid);
-+
-+	calculate_sigpending();
-+}
-+
-+/*
-+ * context_switch - switch to the new MM and the new thread's register state.
-+ */
-+static __always_inline struct rq *
-+context_switch(struct rq *rq, struct task_struct *prev,
-+	       struct task_struct *next)
-+{
-+	prepare_task_switch(rq, prev, next);
-+
-+	/*
-+	 * For paravirt, this is coupled with an exit in switch_to to
-+	 * combine the page table reload and the switch backend into
-+	 * one hypercall.
-+	 */
-+	arch_start_context_switch(prev);
-+
-+	/*
-+	 * kernel -> kernel   lazy + transfer active
-+	 *   user -> kernel   lazy + mmgrab() active
-+	 *
-+	 * kernel ->   user   switch + mmdrop() active
-+	 *   user ->   user   switch
-+	 */
-+	if (!next->mm) {                                // to kernel
-+		enter_lazy_tlb(prev->active_mm, next);
-+
-+		next->active_mm = prev->active_mm;
-+		if (prev->mm)                           // from user
-+			mmgrab(prev->active_mm);
-+		else
-+			prev->active_mm = NULL;
-+	} else {                                        // to user
-+		membarrier_switch_mm(rq, prev->active_mm, next->mm);
-+		/*
-+		 * sys_membarrier() requires an smp_mb() between setting
-+		 * rq->curr / membarrier_switch_mm() and returning to userspace.
-+		 *
-+		 * The below provides this either through switch_mm(), or in
-+		 * case 'prev->active_mm == next->mm' through
-+		 * finish_task_switch()'s mmdrop().
-+		 */
-+		switch_mm_irqs_off(prev->active_mm, next->mm, next);
-+
-+		if (!prev->mm) {                        // from kernel
-+			/* will mmdrop() in finish_task_switch(). */
-+			rq->prev_mm = prev->active_mm;
-+			prev->active_mm = NULL;
-+		}
-+	}
-+
-+	prepare_lock_switch(rq, next);
-+
-+	/* Here we just switch the register state and the stack. */
-+	switch_to(prev, next, prev);
-+	barrier();
-+
-+	return finish_task_switch(prev);
-+}
-+
-+/*
-+ * nr_running, nr_uninterruptible and nr_context_switches:
-+ *
-+ * externally visible scheduler statistics: current number of runnable
-+ * threads, total number of context switches performed since bootup.
-+ */
-+unsigned long nr_running(void)
-+{
-+	unsigned long i, sum = 0;
-+
-+	for_each_online_cpu(i)
-+		sum += cpu_rq(i)->nr_running;
-+
-+	return sum;
-+}
-+
-+/*
-+ * Check if only the current task is running on the CPU.
-+ *
-+ * Caution: this function does not check that the caller has disabled
-+ * preemption, thus the result might have a time-of-check-to-time-of-use
-+ * race.  The caller is responsible to use it correctly, for example:
-+ *
-+ * - from a non-preemptible section (of course)
-+ *
-+ * - from a thread that is bound to a single CPU
-+ *
-+ * - in a loop with very short iterations (e.g. a polling loop)
-+ */
-+bool single_task_running(void)
-+{
-+	return raw_rq()->nr_running == 1;
-+}
-+EXPORT_SYMBOL(single_task_running);
-+
-+unsigned long long nr_context_switches(void)
-+{
-+	int i;
-+	unsigned long long sum = 0;
-+
-+	for_each_possible_cpu(i)
-+		sum += cpu_rq(i)->nr_switches;
-+
-+	return sum;
-+}
-+
-+/*
-+ * Consumers of these two interfaces, like for example the cpuidle menu
-+ * governor, are using nonsensical data. Preferring shallow idle state selection
-+ * for a CPU that has IO-wait which might not even end up running the task when
-+ * it does become runnable.
-+ */
-+
-+unsigned long nr_iowait_cpu(int cpu)
-+{
-+	return atomic_read(&cpu_rq(cpu)->nr_iowait);
-+}
-+
-+/*
-+ * IO-wait accounting, and how its mostly bollocks (on SMP).
-+ *
-+ * The idea behind IO-wait account is to account the idle time that we could
-+ * have spend running if it were not for IO. That is, if we were to improve the
-+ * storage performance, we'd have a proportional reduction in IO-wait time.
-+ *
-+ * This all works nicely on UP, where, when a task blocks on IO, we account
-+ * idle time as IO-wait, because if the storage were faster, it could've been
-+ * running and we'd not be idle.
-+ *
-+ * This has been extended to SMP, by doing the same for each CPU. This however
-+ * is broken.
-+ *
-+ * Imagine for instance the case where two tasks block on one CPU, only the one
-+ * CPU will have IO-wait accounted, while the other has regular idle. Even
-+ * though, if the storage were faster, both could've ran at the same time,
-+ * utilising both CPUs.
-+ *
-+ * This means, that when looking globally, the current IO-wait accounting on
-+ * SMP is a lower bound, by reason of under accounting.
-+ *
-+ * Worse, since the numbers are provided per CPU, they are sometimes
-+ * interpreted per CPU, and that is nonsensical. A blocked task isn't strictly
-+ * associated with any one particular CPU, it can wake to another CPU than it
-+ * blocked on. This means the per CPU IO-wait number is meaningless.
-+ *
-+ * Task CPU affinities can make all that even more 'interesting'.
-+ */
-+
-+unsigned long nr_iowait(void)
-+{
-+	unsigned long i, sum = 0;
-+
-+	for_each_possible_cpu(i)
-+		sum += nr_iowait_cpu(i);
-+
-+	return sum;
-+}
-+
-+#ifdef CONFIG_SMP
-+
-+/*
-+ * sched_exec - execve() is a valuable balancing opportunity, because at
-+ * this point the task has the smallest effective memory and cache
-+ * footprint.
-+ */
-+void sched_exec(void)
-+{
-+	struct task_struct *p = current;
-+	unsigned long flags;
-+	int dest_cpu;
-+	struct rq *rq;
-+
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	rq = this_rq();
-+
-+	if (rq != task_rq(p) || rq->nr_running < 2)
-+		goto unlock;
-+
-+	dest_cpu = select_task_rq(p, task_rq(p));
-+	if (dest_cpu == smp_processor_id())
-+		goto unlock;
-+
-+	if (likely(cpu_active(dest_cpu))) {
-+		struct migration_arg arg = { p, dest_cpu };
-+
-+		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+		stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg);
-+		return;
-+	}
-+unlock:
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+}
-+
-+#endif
-+
-+DEFINE_PER_CPU(struct kernel_stat, kstat);
-+DEFINE_PER_CPU(struct kernel_cpustat, kernel_cpustat);
-+
-+EXPORT_PER_CPU_SYMBOL(kstat);
-+EXPORT_PER_CPU_SYMBOL(kernel_cpustat);
-+
-+static inline void update_curr(struct rq *rq, struct task_struct *p)
-+{
-+	s64 ns = rq->clock_task - p->last_ran;
-+
-+	p->sched_time += ns;
-+	account_group_exec_runtime(p, ns);
-+
-+	p->time_slice -= ns;
-+	p->last_ran = rq->clock_task;
-+}
-+
-+/*
-+ * Return accounted runtime for the task.
-+ * Return separately the current's pending runtime that have not been
-+ * accounted yet.
-+ */
-+unsigned long long task_sched_runtime(struct task_struct *p)
-+{
-+	unsigned long flags;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+	u64 ns;
-+
-+#if defined(CONFIG_64BIT) && defined(CONFIG_SMP)
-+	/*
-+	 * 64-bit doesn't need locks to atomically read a 64-bit value.
-+	 * So we have a optimization chance when the task's delta_exec is 0.
-+	 * Reading ->on_cpu is racy, but this is ok.
-+	 *
-+	 * If we race with it leaving CPU, we'll take a lock. So we're correct.
-+	 * If we race with it entering CPU, unaccounted time is 0. This is
-+	 * indistinguishable from the read occurring a few cycles earlier.
-+	 * If we see ->on_cpu without ->on_rq, the task is leaving, and has
-+	 * been accounted, so we're correct here as well.
-+	 */
-+	if (!p->on_cpu || !task_on_rq_queued(p))
-+		return tsk_seruntime(p);
-+#endif
-+
-+	rq = task_access_lock_irqsave(p, &lock, &flags);
-+	/*
-+	 * Must be ->curr _and_ ->on_rq.  If dequeued, we would
-+	 * project cycles that may never be accounted to this
-+	 * thread, breaking clock_gettime().
-+	 */
-+	if (p == rq->curr && task_on_rq_queued(p)) {
-+		update_rq_clock(rq);
-+		update_curr(rq, p);
-+	}
-+	ns = tsk_seruntime(p);
-+	task_access_unlock_irqrestore(p, lock, &flags);
-+
-+	return ns;
-+}
-+
-+DEFINE_PER_CPU(unsigned long, thermal_pressure);
-+
-+void arch_set_thermal_pressure(struct cpumask *cpus,
-+			       unsigned long th_pressure)
-+{
-+	int cpu;
-+
-+	for_each_cpu(cpu, cpus)
-+		WRITE_ONCE(per_cpu(thermal_pressure, cpu), th_pressure);
-+}
-+
-+/* This manages tasks that have run out of timeslice during a scheduler_tick */
-+static inline void scheduler_task_tick(struct rq *rq)
-+{
-+	struct task_struct *p = rq->curr;
-+
-+	if (is_idle_task(p))
-+		return;
-+
-+	update_curr(rq, p);
-+	cpufreq_update_util(rq, 0);
-+
-+	/*
-+	 * Tasks have less than RESCHED_NS of time slice left they will be
-+	 * rescheduled.
-+	 */
-+	if (p->time_slice >= RESCHED_NS)
-+		return;
-+	set_tsk_need_resched(p);
-+	set_preempt_need_resched();
-+}
-+
-+/*
-+ * This function gets called by the timer code, with HZ frequency.
-+ * We call it with interrupts disabled.
-+ */
-+void scheduler_tick(void)
-+{
-+	int cpu __maybe_unused = smp_processor_id();
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	arch_scale_freq_tick();
-+	sched_clock_tick();
-+
-+	raw_spin_lock(&rq->lock);
-+	update_rq_clock(rq);
-+
-+	scheduler_task_tick(rq);
-+	calc_global_load_tick(rq);
-+	psi_task_tick(rq);
-+
-+	rq->last_tick = rq->clock;
-+	raw_spin_unlock(&rq->lock);
-+
-+	perf_event_task_tick();
-+}
-+
-+#ifdef CONFIG_SCHED_SMT
-+static inline int active_load_balance_cpu_stop(void *data)
-+{
-+	struct rq *rq = this_rq();
-+	struct task_struct *p = data;
-+	cpumask_t tmp;
-+	unsigned long flags;
-+
-+	local_irq_save(flags);
-+
-+	raw_spin_lock(&p->pi_lock);
-+	raw_spin_lock(&rq->lock);
-+
-+	rq->active_balance = 0;
-+	/* _something_ may have changed the task, double check again */
-+	if (task_on_rq_queued(p) && task_rq(p) == rq &&
-+	    cpumask_and(&tmp, p->cpus_ptr, &sched_sg_idle_mask)) {
-+		int cpu = cpu_of(rq);
-+		int dcpu = __best_mask_cpu(cpu, &tmp,
-+					   per_cpu(sched_cpu_llc_mask, cpu));
-+		rq = move_queued_task(rq, p, dcpu);
-+	}
-+
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock(&p->pi_lock);
-+
-+	local_irq_restore(flags);
-+
-+	return 0;
-+}
-+
-+/* sg_balance_trigger - trigger slibing group balance for @cpu */
-+static inline int sg_balance_trigger(const int cpu)
-+{
-+	struct rq *rq= cpu_rq(cpu);
-+	unsigned long flags;
-+	struct task_struct *curr;
-+	int res;
-+
-+	if (!raw_spin_trylock_irqsave(&rq->lock, flags))
-+		return 0;
-+	curr = rq->curr;
-+	res = (!is_idle_task(curr)) && (1 == rq->nr_running) &&\
-+	      cpumask_intersects(curr->cpus_ptr, &sched_sg_idle_mask) &&\
-+	      (!rq->active_balance);
-+
-+	if (res)
-+		rq->active_balance = 1;
-+
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+
-+	if (res)
-+		stop_one_cpu_nowait(cpu, active_load_balance_cpu_stop,
-+				    curr, &rq->active_balance_work);
-+	return res;
-+}
-+
-+/*
-+ * sg_balance_check - slibing group balance check for run queue @rq
-+ */
-+static inline void sg_balance_check(struct rq *rq)
-+{
-+	cpumask_t chk;
-+	int cpu;
-+
-+	/* exit when no sg in idle */
-+	if (cpumask_empty(&sched_sg_idle_mask))
-+		return;
-+
-+	cpu = cpu_of(rq);
-+	/*
-+	 * Only cpu in slibing idle group will do the checking and then
-+	 * find potential cpus which can migrate the current running task
-+	 */
-+	if (cpumask_test_cpu(cpu, &sched_sg_idle_mask) &&
-+	    cpumask_andnot(&chk, cpu_online_mask, &sched_rq_pending_mask) &&
-+	    cpumask_andnot(&chk, &chk, &sched_rq_watermark[IDLE_WM])) {
-+		int i, tried = 0;
-+
-+		for_each_cpu_wrap(i, &chk, cpu) {
-+			if (cpumask_subset(cpu_smt_mask(i), &chk)) {
-+				if (sg_balance_trigger(i))
-+					return;
-+				if (tried)
-+					return;
-+				tried++;
-+			}
-+		}
-+	}
-+}
-+#endif /* CONFIG_SCHED_SMT */
-+
-+#ifdef CONFIG_NO_HZ_FULL
-+
-+struct tick_work {
-+	int			cpu;
-+	atomic_t		state;
-+	struct delayed_work	work;
-+};
-+/* Values for ->state, see diagram below. */
-+#define TICK_SCHED_REMOTE_OFFLINE	0
-+#define TICK_SCHED_REMOTE_OFFLINING	1
-+#define TICK_SCHED_REMOTE_RUNNING	2
-+
-+/*
-+ * State diagram for ->state:
-+ *
-+ *
-+ *          TICK_SCHED_REMOTE_OFFLINE
-+ *                    |   ^
-+ *                    |   |
-+ *                    |   | sched_tick_remote()
-+ *                    |   |
-+ *                    |   |
-+ *                    +--TICK_SCHED_REMOTE_OFFLINING
-+ *                    |   ^
-+ *                    |   |
-+ * sched_tick_start() |   | sched_tick_stop()
-+ *                    |   |
-+ *                    V   |
-+ *          TICK_SCHED_REMOTE_RUNNING
-+ *
-+ *
-+ * Other transitions get WARN_ON_ONCE(), except that sched_tick_remote()
-+ * and sched_tick_start() are happy to leave the state in RUNNING.
-+ */
-+
-+static struct tick_work __percpu *tick_work_cpu;
-+
-+static void sched_tick_remote(struct work_struct *work)
-+{
-+	struct delayed_work *dwork = to_delayed_work(work);
-+	struct tick_work *twork = container_of(dwork, struct tick_work, work);
-+	int cpu = twork->cpu;
-+	struct rq *rq = cpu_rq(cpu);
-+	struct task_struct *curr;
-+	unsigned long flags;
-+	u64 delta;
-+	int os;
-+
-+	/*
-+	 * Handle the tick only if it appears the remote CPU is running in full
-+	 * dynticks mode. The check is racy by nature, but missing a tick or
-+	 * having one too much is no big deal because the scheduler tick updates
-+	 * statistics and checks timeslices in a time-independent way, regardless
-+	 * of when exactly it is running.
-+	 */
-+	if (!tick_nohz_tick_stopped_cpu(cpu))
-+		goto out_requeue;
-+
-+	raw_spin_lock_irqsave(&rq->lock, flags);
-+	curr = rq->curr;
-+	if (cpu_is_offline(cpu))
-+		goto out_unlock;
-+
-+	update_rq_clock(rq);
-+	if (!is_idle_task(curr)) {
-+		/*
-+		 * Make sure the next tick runs within a reasonable
-+		 * amount of time.
-+		 */
-+		delta = rq_clock_task(rq) - curr->last_ran;
-+		WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
-+	}
-+	scheduler_task_tick(rq);
-+
-+	calc_load_nohz_remote(rq);
-+out_unlock:
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+
-+out_requeue:
-+	/*
-+	 * Run the remote tick once per second (1Hz). This arbitrary
-+	 * frequency is large enough to avoid overload but short enough
-+	 * to keep scheduler internal stats reasonably up to date.  But
-+	 * first update state to reflect hotplug activity if required.
-+	 */
-+	os = atomic_fetch_add_unless(&twork->state, -1, TICK_SCHED_REMOTE_RUNNING);
-+	WARN_ON_ONCE(os == TICK_SCHED_REMOTE_OFFLINE);
-+	if (os == TICK_SCHED_REMOTE_RUNNING)
-+		queue_delayed_work(system_unbound_wq, dwork, HZ);
-+}
-+
-+static void sched_tick_start(int cpu)
-+{
-+	int os;
-+	struct tick_work *twork;
-+
-+	if (housekeeping_cpu(cpu, HK_FLAG_TICK))
-+		return;
-+
-+	WARN_ON_ONCE(!tick_work_cpu);
-+
-+	twork = per_cpu_ptr(tick_work_cpu, cpu);
-+	os = atomic_xchg(&twork->state, TICK_SCHED_REMOTE_RUNNING);
-+	WARN_ON_ONCE(os == TICK_SCHED_REMOTE_RUNNING);
-+	if (os == TICK_SCHED_REMOTE_OFFLINE) {
-+		twork->cpu = cpu;
-+		INIT_DELAYED_WORK(&twork->work, sched_tick_remote);
-+		queue_delayed_work(system_unbound_wq, &twork->work, HZ);
-+	}
-+}
-+
-+#ifdef CONFIG_HOTPLUG_CPU
-+static void sched_tick_stop(int cpu)
-+{
-+	struct tick_work *twork;
-+
-+	if (housekeeping_cpu(cpu, HK_FLAG_TICK))
-+		return;
-+
-+	WARN_ON_ONCE(!tick_work_cpu);
-+
-+	twork = per_cpu_ptr(tick_work_cpu, cpu);
-+	cancel_delayed_work_sync(&twork->work);
-+}
-+#endif /* CONFIG_HOTPLUG_CPU */
-+
-+int __init sched_tick_offload_init(void)
-+{
-+	tick_work_cpu = alloc_percpu(struct tick_work);
-+	BUG_ON(!tick_work_cpu);
-+	return 0;
-+}
-+
-+#else /* !CONFIG_NO_HZ_FULL */
-+static inline void sched_tick_start(int cpu) { }
-+static inline void sched_tick_stop(int cpu) { }
-+#endif
-+
-+#if defined(CONFIG_PREEMPTION) && (defined(CONFIG_DEBUG_PREEMPT) || \
-+				defined(CONFIG_PREEMPT_TRACER))
-+/*
-+ * If the value passed in is equal to the current preempt count
-+ * then we just disabled preemption. Start timing the latency.
-+ */
-+static inline void preempt_latency_start(int val)
-+{
-+	if (preempt_count() == val) {
-+		unsigned long ip = get_lock_parent_ip();
-+#ifdef CONFIG_DEBUG_PREEMPT
-+		current->preempt_disable_ip = ip;
-+#endif
-+		trace_preempt_off(CALLER_ADDR0, ip);
-+	}
-+}
-+
-+void preempt_count_add(int val)
-+{
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	/*
-+	 * Underflow?
-+	 */
-+	if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
-+		return;
-+#endif
-+	__preempt_count_add(val);
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	/*
-+	 * Spinlock count overflowing soon?
-+	 */
-+	DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
-+				PREEMPT_MASK - 10);
-+#endif
-+	preempt_latency_start(val);
-+}
-+EXPORT_SYMBOL(preempt_count_add);
-+NOKPROBE_SYMBOL(preempt_count_add);
-+
-+/*
-+ * If the value passed in equals to the current preempt count
-+ * then we just enabled preemption. Stop timing the latency.
-+ */
-+static inline void preempt_latency_stop(int val)
-+{
-+	if (preempt_count() == val)
-+		trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
-+}
-+
-+void preempt_count_sub(int val)
-+{
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	/*
-+	 * Underflow?
-+	 */
-+	if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
-+		return;
-+	/*
-+	 * Is the spinlock portion underflowing?
-+	 */
-+	if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) &&
-+			!(preempt_count() & PREEMPT_MASK)))
-+		return;
-+#endif
-+
-+	preempt_latency_stop(val);
-+	__preempt_count_sub(val);
-+}
-+EXPORT_SYMBOL(preempt_count_sub);
-+NOKPROBE_SYMBOL(preempt_count_sub);
-+
-+#else
-+static inline void preempt_latency_start(int val) { }
-+static inline void preempt_latency_stop(int val) { }
-+#endif
-+
-+static inline unsigned long get_preempt_disable_ip(struct task_struct *p)
-+{
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	return p->preempt_disable_ip;
-+#else
-+	return 0;
-+#endif
-+}
-+
-+/*
-+ * Print scheduling while atomic bug:
-+ */
-+static noinline void __schedule_bug(struct task_struct *prev)
-+{
-+	/* Save this before calling printk(), since that will clobber it */
-+	unsigned long preempt_disable_ip = get_preempt_disable_ip(current);
-+
-+	if (oops_in_progress)
-+		return;
-+
-+	printk(KERN_ERR "BUG: scheduling while atomic: %s/%d/0x%08x\n",
-+		prev->comm, prev->pid, preempt_count());
-+
-+	debug_show_held_locks(prev);
-+	print_modules();
-+	if (irqs_disabled())
-+		print_irqtrace_events(prev);
-+	if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)
-+	    && in_atomic_preempt_off()) {
-+		pr_err("Preemption disabled at:");
-+		print_ip_sym(KERN_ERR, preempt_disable_ip);
-+	}
-+	if (panic_on_warn)
-+		panic("scheduling while atomic\n");
-+
-+	dump_stack();
-+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+}
-+
-+/*
-+ * Various schedule()-time debugging checks and statistics:
-+ */
-+static inline void schedule_debug(struct task_struct *prev, bool preempt)
-+{
-+#ifdef CONFIG_SCHED_STACK_END_CHECK
-+	if (task_stack_end_corrupted(prev))
-+		panic("corrupted stack end detected inside scheduler\n");
-+
-+	if (task_scs_end_corrupted(prev))
-+		panic("corrupted shadow stack detected inside scheduler\n");
-+#endif
-+
-+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-+	if (!preempt && prev->state && prev->non_block_count) {
-+		printk(KERN_ERR "BUG: scheduling in a non-blocking section: %s/%d/%i\n",
-+			prev->comm, prev->pid, prev->non_block_count);
-+		dump_stack();
-+		add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+	}
-+#endif
-+
-+	if (unlikely(in_atomic_preempt_off())) {
-+		__schedule_bug(prev);
-+		preempt_count_set(PREEMPT_DISABLED);
-+	}
-+	rcu_sleep_check();
-+
-+	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
-+
-+	schedstat_inc(this_rq()->sched_count);
-+}
-+
-+/*
-+ * Compile time debug macro
-+ * #define ALT_SCHED_DEBUG
-+ */
-+
-+#ifdef ALT_SCHED_DEBUG
-+void alt_sched_debug(void)
-+{
-+	printk(KERN_INFO "sched: pending: 0x%04lx, idle: 0x%04lx, sg_idle: 0x%04lx\n",
-+	       sched_rq_pending_mask.bits[0],
-+	       sched_rq_watermark[IDLE_WM].bits[0],
-+	       sched_sg_idle_mask.bits[0]);
-+}
-+#else
-+inline void alt_sched_debug(void) {}
-+#endif
-+
-+#ifdef	CONFIG_SMP
-+
-+#define SCHED_RQ_NR_MIGRATION (32UL)
-+/*
-+ * Migrate pending tasks in @rq to @dest_cpu
-+ * Will try to migrate mininal of half of @rq nr_running tasks and
-+ * SCHED_RQ_NR_MIGRATION to @dest_cpu
-+ */
-+static inline int
-+migrate_pending_tasks(struct rq *rq, struct rq *dest_rq, const int dest_cpu)
-+{
-+	struct task_struct *p, *skip = rq->curr;
-+	int nr_migrated = 0;
-+	int nr_tries = min(rq->nr_running / 2, SCHED_RQ_NR_MIGRATION);
-+
-+	while (skip != rq->idle && nr_tries &&
-+	       (p = sched_rq_next_task(skip, rq)) != rq->idle) {
-+		skip = sched_rq_next_task(p, rq);
-+		if (cpumask_test_cpu(dest_cpu, p->cpus_ptr)) {
-+			__SCHED_DEQUEUE_TASK(p, rq, 0, );
-+			set_task_cpu(p, dest_cpu);
-+			__SCHED_ENQUEUE_TASK(p, dest_rq, 0);
-+			nr_migrated++;
-+		}
-+		nr_tries--;
-+	}
-+
-+	return nr_migrated;
-+}
-+
-+static inline int take_other_rq_tasks(struct rq *rq, int cpu)
-+{
-+	struct cpumask *affinity_mask, *end_mask;
-+
-+	if (unlikely(!rq->online))
-+		return 0;
-+
-+	if (cpumask_empty(&sched_rq_pending_mask))
-+		return 0;
-+
-+	affinity_mask = &(per_cpu(sched_cpu_affinity_masks, cpu)[0]);
-+	end_mask = per_cpu(sched_cpu_affinity_end_mask, cpu);
-+	do {
-+		int i;
-+		for_each_cpu_and(i, &sched_rq_pending_mask, affinity_mask) {
-+			int nr_migrated;
-+			struct rq *src_rq;
-+
-+			src_rq = cpu_rq(i);
-+			if (!do_raw_spin_trylock(&src_rq->lock))
-+				continue;
-+			spin_acquire(&src_rq->lock.dep_map,
-+				     SINGLE_DEPTH_NESTING, 1, _RET_IP_);
-+
-+			if ((nr_migrated = migrate_pending_tasks(src_rq, rq, cpu))) {
-+				src_rq->nr_running -= nr_migrated;
-+#ifdef CONFIG_SMP
-+				if (src_rq->nr_running < 2)
-+					cpumask_clear_cpu(i, &sched_rq_pending_mask);
-+#endif
-+				rq->nr_running += nr_migrated;
-+#ifdef CONFIG_SMP
-+				if (rq->nr_running > 1)
-+					cpumask_set_cpu(cpu, &sched_rq_pending_mask);
-+#endif
-+				update_sched_rq_watermark(rq);
-+				cpufreq_update_util(rq, 0);
-+
-+				spin_release(&src_rq->lock.dep_map, _RET_IP_);
-+				do_raw_spin_unlock(&src_rq->lock);
-+
-+				return 1;
-+			}
-+
-+			spin_release(&src_rq->lock.dep_map, _RET_IP_);
-+			do_raw_spin_unlock(&src_rq->lock);
-+		}
-+	} while (++affinity_mask < end_mask);
-+
-+	return 0;
-+}
-+#endif
-+
-+/*
-+ * Timeslices below RESCHED_NS are considered as good as expired as there's no
-+ * point rescheduling when there's so little time left.
-+ */
-+static inline void check_curr(struct task_struct *p, struct rq *rq)
-+{
-+	if (unlikely(rq->idle == p))
-+		return;
-+
-+	update_curr(rq, p);
-+
-+	if (p->time_slice < RESCHED_NS)
-+		time_slice_expired(p, rq);
-+}
-+
-+static inline struct task_struct *
-+choose_next_task(struct rq *rq, int cpu, struct task_struct *prev)
-+{
-+	struct task_struct *next;
-+
-+	if (unlikely(rq->skip)) {
-+		next = rq_runnable_task(rq);
-+		if (next == rq->idle) {
-+#ifdef	CONFIG_SMP
-+			if (!take_other_rq_tasks(rq, cpu)) {
-+#endif
-+				rq->skip = NULL;
-+				schedstat_inc(rq->sched_goidle);
-+				return next;
-+#ifdef	CONFIG_SMP
-+			}
-+			next = rq_runnable_task(rq);
-+#endif
-+		}
-+		rq->skip = NULL;
-+#ifdef CONFIG_HIGH_RES_TIMERS
-+		hrtick_start(rq, next->time_slice);
-+#endif
-+		return next;
-+	}
-+
-+	next = sched_rq_first_task(rq);
-+	if (next == rq->idle) {
-+#ifdef	CONFIG_SMP
-+		if (!take_other_rq_tasks(rq, cpu)) {
-+#endif
-+			schedstat_inc(rq->sched_goidle);
-+			/*printk(KERN_INFO "sched: choose_next_task(%d) idle %px\n", cpu, next);*/
-+			return next;
-+#ifdef	CONFIG_SMP
-+		}
-+		next = sched_rq_first_task(rq);
-+#endif
-+	}
-+#ifdef CONFIG_HIGH_RES_TIMERS
-+	hrtick_start(rq, next->time_slice);
-+#endif
-+	/*printk(KERN_INFO "sched: choose_next_task(%d) next %px\n", cpu,
-+	 * next);*/
-+	return next;
-+}
-+
-+/*
-+ * schedule() is the main scheduler function.
-+ *
-+ * The main means of driving the scheduler and thus entering this function are:
-+ *
-+ *   1. Explicit blocking: mutex, semaphore, waitqueue, etc.
-+ *
-+ *   2. TIF_NEED_RESCHED flag is checked on interrupt and userspace return
-+ *      paths. For example, see arch/x86/entry_64.S.
-+ *
-+ *      To drive preemption between tasks, the scheduler sets the flag in timer
-+ *      interrupt handler scheduler_tick().
-+ *
-+ *   3. Wakeups don't really cause entry into schedule(). They add a
-+ *      task to the run-queue and that's it.
-+ *
-+ *      Now, if the new task added to the run-queue preempts the current
-+ *      task, then the wakeup sets TIF_NEED_RESCHED and schedule() gets
-+ *      called on the nearest possible occasion:
-+ *
-+ *       - If the kernel is preemptible (CONFIG_PREEMPTION=y):
-+ *
-+ *         - in syscall or exception context, at the next outmost
-+ *           preempt_enable(). (this might be as soon as the wake_up()'s
-+ *           spin_unlock()!)
-+ *
-+ *         - in IRQ context, return from interrupt-handler to
-+ *           preemptible context
-+ *
-+ *       - If the kernel is not preemptible (CONFIG_PREEMPTION is not set)
-+ *         then at the next:
-+ *
-+ *          - cond_resched() call
-+ *          - explicit schedule() call
-+ *          - return from syscall or exception to user-space
-+ *          - return from interrupt-handler to user-space
-+ *
-+ * WARNING: must be called with preemption disabled!
-+ */
-+static void __sched notrace __schedule(bool preempt)
-+{
-+	struct task_struct *prev, *next;
-+	unsigned long *switch_count;
-+	unsigned long prev_state;
-+	struct rq *rq;
-+	int cpu;
-+
-+	cpu = smp_processor_id();
-+	rq = cpu_rq(cpu);
-+	prev = rq->curr;
-+
-+	schedule_debug(prev, preempt);
-+
-+	/* by passing sched_feat(HRTICK) checking which Alt schedule FW doesn't support */
-+	hrtick_clear(rq);
-+
-+	local_irq_disable();
-+	rcu_note_context_switch(preempt);
-+
-+	/*
-+	 * Make sure that signal_pending_state()->signal_pending() below
-+	 * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
-+	 * done by the caller to avoid the race with signal_wake_up():
-+	 *
-+	 * __set_current_state(@state)		signal_wake_up()
-+	 * schedule()				  set_tsk_thread_flag(p, TIF_SIGPENDING)
-+	 *					  wake_up_state(p, state)
-+	 *   LOCK rq->lock			    LOCK p->pi_state
-+	 *   smp_mb__after_spinlock()		    smp_mb__after_spinlock()
-+	 *     if (signal_pending_state())	    if (p->state & @state)
-+	 *
-+	 * Also, the membarrier system call requires a full memory barrier
-+	 * after coming from user-space, before storing to rq->curr.
-+	 */
-+	raw_spin_lock(&rq->lock);
-+	smp_mb__after_spinlock();
-+
-+	update_rq_clock(rq);
-+
-+	switch_count = &prev->nivcsw;
-+	/*
-+	 * We must load prev->state once (task_struct::state is volatile), such
-+	 * that:
-+	 *
-+	 *  - we form a control dependency vs deactivate_task() below.
-+	 *  - ptrace_{,un}freeze_traced() can change ->state underneath us.
-+	 */
-+	prev_state = prev->state;
-+	if (!preempt && prev_state && prev_state == prev->state) {
-+		if (signal_pending_state(prev_state, prev)) {
-+			prev->state = TASK_RUNNING;
-+		} else {
-+			prev->sched_contributes_to_load =
-+				(prev_state & TASK_UNINTERRUPTIBLE) &&
-+				!(prev_state & TASK_NOLOAD) &&
-+				!(prev->flags & PF_FROZEN);
-+
-+			if (prev->sched_contributes_to_load)
-+				rq->nr_uninterruptible++;
-+
-+			/*
-+			 * __schedule()			ttwu()
-+			 *   prev_state = prev->state;    if (p->on_rq && ...)
-+			 *   if (prev_state)		    goto out;
-+			 *     p->on_rq = 0;		  smp_acquire__after_ctrl_dep();
-+			 *				  p->state = TASK_WAKING
-+			 *
-+			 * Where __schedule() and ttwu() have matching control dependencies.
-+			 *
-+			 * After this, schedule() must not care about p->state any more.
-+			 */
-+			sched_task_deactivate(prev, rq);
-+			deactivate_task(prev, rq);
-+
-+			if (prev->in_iowait) {
-+				atomic_inc(&rq->nr_iowait);
-+				delayacct_blkio_start();
-+			}
-+		}
-+		switch_count = &prev->nvcsw;
-+	}
-+
-+	clear_tsk_need_resched(prev);
-+	clear_preempt_need_resched();
-+
-+	check_curr(prev, rq);
-+
-+	next = choose_next_task(rq, cpu, prev);
-+
-+	if (likely(prev != next)) {
-+		next->last_ran = rq->clock_task;
-+		rq->last_ts_switch = rq->clock;
-+
-+		rq->nr_switches++;
-+		/*
-+		 * RCU users of rcu_dereference(rq->curr) may not see
-+		 * changes to task_struct made by pick_next_task().
-+		 */
-+		RCU_INIT_POINTER(rq->curr, next);
-+		/*
-+		 * The membarrier system call requires each architecture
-+		 * to have a full memory barrier after updating
-+		 * rq->curr, before returning to user-space.
-+		 *
-+		 * Here are the schemes providing that barrier on the
-+		 * various architectures:
-+		 * - mm ? switch_mm() : mmdrop() for x86, s390, sparc, PowerPC.
-+		 *   switch_mm() rely on membarrier_arch_switch_mm() on PowerPC.
-+		 * - finish_lock_switch() for weakly-ordered
-+		 *   architectures where spin_unlock is a full barrier,
-+		 * - switch_to() for arm64 (weakly-ordered, spin_unlock
-+		 *   is a RELEASE barrier),
-+		 */
-+		++*switch_count;
-+
-+		psi_sched_switch(prev, next, !task_on_rq_queued(prev));
-+
-+		trace_sched_switch(preempt, prev, next);
-+
-+		/* Also unlocks the rq: */
-+		rq = context_switch(rq, prev, next);
-+	} else
-+		raw_spin_unlock_irq(&rq->lock);
-+
-+#ifdef CONFIG_SCHED_SMT
-+	sg_balance_check(rq);
-+#endif
-+}
-+
-+void __noreturn do_task_dead(void)
-+{
-+	/* Causes final put_task_struct in finish_task_switch(): */
-+	set_special_state(TASK_DEAD);
-+
-+	/* Tell freezer to ignore us: */
-+	current->flags |= PF_NOFREEZE;
-+
-+	__schedule(false);
-+	BUG();
-+
-+	/* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */
-+	for (;;)
-+		cpu_relax();
-+}
-+
-+static inline void sched_submit_work(struct task_struct *tsk)
-+{
-+	if (!tsk->state)
-+		return;
-+
-+	/*
-+	 * If a worker went to sleep, notify and ask workqueue whether
-+	 * it wants to wake up a task to maintain concurrency.
-+	 * As this function is called inside the schedule() context,
-+	 * we disable preemption to avoid it calling schedule() again
-+	 * in the possible wakeup of a kworker and because wq_worker_sleeping()
-+	 * requires it.
-+	 */
-+	if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) {
-+		preempt_disable();
-+		if (tsk->flags & PF_WQ_WORKER)
-+			wq_worker_sleeping(tsk);
-+		else
-+			io_wq_worker_sleeping(tsk);
-+		preempt_enable_no_resched();
-+	}
-+
-+	if (tsk_is_pi_blocked(tsk))
-+		return;
-+
-+	/*
-+	 * If we are going to sleep and we have plugged IO queued,
-+	 * make sure to submit it to avoid deadlocks.
-+	 */
-+	if (blk_needs_flush_plug(tsk))
-+		blk_schedule_flush_plug(tsk);
-+}
-+
-+static void sched_update_worker(struct task_struct *tsk)
-+{
-+	if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) {
-+		if (tsk->flags & PF_WQ_WORKER)
-+			wq_worker_running(tsk);
-+		else
-+			io_wq_worker_running(tsk);
-+	}
-+}
-+
-+asmlinkage __visible void __sched schedule(void)
-+{
-+	struct task_struct *tsk = current;
-+
-+	sched_submit_work(tsk);
-+	do {
-+		preempt_disable();
-+		__schedule(false);
-+		sched_preempt_enable_no_resched();
-+	} while (need_resched());
-+	sched_update_worker(tsk);
-+}
-+EXPORT_SYMBOL(schedule);
-+
-+/*
-+ * synchronize_rcu_tasks() makes sure that no task is stuck in preempted
-+ * state (have scheduled out non-voluntarily) by making sure that all
-+ * tasks have either left the run queue or have gone into user space.
-+ * As idle tasks do not do either, they must not ever be preempted
-+ * (schedule out non-voluntarily).
-+ *
-+ * schedule_idle() is similar to schedule_preempt_disable() except that it
-+ * never enables preemption because it does not call sched_submit_work().
-+ */
-+void __sched schedule_idle(void)
-+{
-+	/*
-+	 * As this skips calling sched_submit_work(), which the idle task does
-+	 * regardless because that function is a nop when the task is in a
-+	 * TASK_RUNNING state, make sure this isn't used someplace that the
-+	 * current task can be in any other state. Note, idle is always in the
-+	 * TASK_RUNNING state.
-+	 */
-+	WARN_ON_ONCE(current->state);
-+	do {
-+		__schedule(false);
-+	} while (need_resched());
-+}
-+
-+#ifdef CONFIG_CONTEXT_TRACKING
-+asmlinkage __visible void __sched schedule_user(void)
-+{
-+	/*
-+	 * If we come here after a random call to set_need_resched(),
-+	 * or we have been woken up remotely but the IPI has not yet arrived,
-+	 * we haven't yet exited the RCU idle mode. Do it here manually until
-+	 * we find a better solution.
-+	 *
-+	 * NB: There are buggy callers of this function.  Ideally we
-+	 * should warn if prev_state != CONTEXT_USER, but that will trigger
-+	 * too frequently to make sense yet.
-+	 */
-+	enum ctx_state prev_state = exception_enter();
-+	schedule();
-+	exception_exit(prev_state);
-+}
-+#endif
-+
-+/**
-+ * schedule_preempt_disabled - called with preemption disabled
-+ *
-+ * Returns with preemption disabled. Note: preempt_count must be 1
-+ */
-+void __sched schedule_preempt_disabled(void)
-+{
-+	sched_preempt_enable_no_resched();
-+	schedule();
-+	preempt_disable();
-+}
-+
-+static void __sched notrace preempt_schedule_common(void)
-+{
-+	do {
-+		/*
-+		 * Because the function tracer can trace preempt_count_sub()
-+		 * and it also uses preempt_enable/disable_notrace(), if
-+		 * NEED_RESCHED is set, the preempt_enable_notrace() called
-+		 * by the function tracer will call this function again and
-+		 * cause infinite recursion.
-+		 *
-+		 * Preemption must be disabled here before the function
-+		 * tracer can trace. Break up preempt_disable() into two
-+		 * calls. One to disable preemption without fear of being
-+		 * traced. The other to still record the preemption latency,
-+		 * which can also be traced by the function tracer.
-+		 */
-+		preempt_disable_notrace();
-+		preempt_latency_start(1);
-+		__schedule(true);
-+		preempt_latency_stop(1);
-+		preempt_enable_no_resched_notrace();
-+
-+		/*
-+		 * Check again in case we missed a preemption opportunity
-+		 * between schedule and now.
-+		 */
-+	} while (need_resched());
-+}
-+
-+#ifdef CONFIG_PREEMPTION
-+/*
-+ * This is the entry point to schedule() from in-kernel preemption
-+ * off of preempt_enable.
-+ */
-+asmlinkage __visible void __sched notrace preempt_schedule(void)
-+{
-+	/*
-+	 * If there is a non-zero preempt_count or interrupts are disabled,
-+	 * we do not want to preempt the current task. Just return..
-+	 */
-+	if (likely(!preemptible()))
-+		return;
-+
-+	preempt_schedule_common();
-+}
-+NOKPROBE_SYMBOL(preempt_schedule);
-+EXPORT_SYMBOL(preempt_schedule);
-+
-+/**
-+ * preempt_schedule_notrace - preempt_schedule called by tracing
-+ *
-+ * The tracing infrastructure uses preempt_enable_notrace to prevent
-+ * recursion and tracing preempt enabling caused by the tracing
-+ * infrastructure itself. But as tracing can happen in areas coming
-+ * from userspace or just about to enter userspace, a preempt enable
-+ * can occur before user_exit() is called. This will cause the scheduler
-+ * to be called when the system is still in usermode.
-+ *
-+ * To prevent this, the preempt_enable_notrace will use this function
-+ * instead of preempt_schedule() to exit user context if needed before
-+ * calling the scheduler.
-+ */
-+asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
-+{
-+	enum ctx_state prev_ctx;
-+
-+	if (likely(!preemptible()))
-+		return;
-+
-+	do {
-+		/*
-+		 * Because the function tracer can trace preempt_count_sub()
-+		 * and it also uses preempt_enable/disable_notrace(), if
-+		 * NEED_RESCHED is set, the preempt_enable_notrace() called
-+		 * by the function tracer will call this function again and
-+		 * cause infinite recursion.
-+		 *
-+		 * Preemption must be disabled here before the function
-+		 * tracer can trace. Break up preempt_disable() into two
-+		 * calls. One to disable preemption without fear of being
-+		 * traced. The other to still record the preemption latency,
-+		 * which can also be traced by the function tracer.
-+		 */
-+		preempt_disable_notrace();
-+		preempt_latency_start(1);
-+		/*
-+		 * Needs preempt disabled in case user_exit() is traced
-+		 * and the tracer calls preempt_enable_notrace() causing
-+		 * an infinite recursion.
-+		 */
-+		prev_ctx = exception_enter();
-+		__schedule(true);
-+		exception_exit(prev_ctx);
-+
-+		preempt_latency_stop(1);
-+		preempt_enable_no_resched_notrace();
-+	} while (need_resched());
-+}
-+EXPORT_SYMBOL_GPL(preempt_schedule_notrace);
-+
-+#endif /* CONFIG_PREEMPTION */
-+
-+/*
-+ * This is the entry point to schedule() from kernel preemption
-+ * off of irq context.
-+ * Note, that this is called and return with irqs disabled. This will
-+ * protect us against recursive calling from irq.
-+ */
-+asmlinkage __visible void __sched preempt_schedule_irq(void)
-+{
-+	enum ctx_state prev_state;
-+
-+	/* Catch callers which need to be fixed */
-+	BUG_ON(preempt_count() || !irqs_disabled());
-+
-+	prev_state = exception_enter();
-+
-+	do {
-+		preempt_disable();
-+		local_irq_enable();
-+		__schedule(true);
-+		local_irq_disable();
-+		sched_preempt_enable_no_resched();
-+	} while (need_resched());
-+
-+	exception_exit(prev_state);
-+}
-+
-+int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags,
-+			  void *key)
-+{
-+	WARN_ON_ONCE(IS_ENABLED(CONFIG_SCHED_DEBUG) && wake_flags & ~WF_SYNC);
-+	return try_to_wake_up(curr->private, mode, wake_flags);
-+}
-+EXPORT_SYMBOL(default_wake_function);
-+
-+static inline void check_task_changed(struct rq *rq, struct task_struct *p)
-+{
-+	/* Trigger resched if task sched_prio has been modified. */
-+	if (task_on_rq_queued(p) && sched_task_need_requeue(p, rq)) {
-+		requeue_task(p, rq);
-+		check_preempt_curr(rq);
-+	}
-+}
-+
-+#ifdef CONFIG_RT_MUTEXES
-+
-+static inline int __rt_effective_prio(struct task_struct *pi_task, int prio)
-+{
-+	if (pi_task)
-+		prio = min(prio, pi_task->prio);
-+
-+	return prio;
-+}
-+
-+static inline int rt_effective_prio(struct task_struct *p, int prio)
-+{
-+	struct task_struct *pi_task = rt_mutex_get_top_task(p);
-+
-+	return __rt_effective_prio(pi_task, prio);
-+}
-+
-+/*
-+ * rt_mutex_setprio - set the current priority of a task
-+ * @p: task to boost
-+ * @pi_task: donor task
-+ *
-+ * This function changes the 'effective' priority of a task. It does
-+ * not touch ->normal_prio like __setscheduler().
-+ *
-+ * Used by the rt_mutex code to implement priority inheritance
-+ * logic. Call site only calls if the priority of the task changed.
-+ */
-+void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
-+{
-+	int prio;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+
-+	/* XXX used to be waiter->prio, not waiter->task->prio */
-+	prio = __rt_effective_prio(pi_task, p->normal_prio);
-+
-+	/*
-+	 * If nothing changed; bail early.
-+	 */
-+	if (p->pi_top_task == pi_task && prio == p->prio)
-+		return;
-+
-+	rq = __task_access_lock(p, &lock);
-+	/*
-+	 * Set under pi_lock && rq->lock, such that the value can be used under
-+	 * either lock.
-+	 *
-+	 * Note that there is loads of tricky to make this pointer cache work
-+	 * right. rt_mutex_slowunlock()+rt_mutex_postunlock() work together to
-+	 * ensure a task is de-boosted (pi_task is set to NULL) before the
-+	 * task is allowed to run again (and can exit). This ensures the pointer
-+	 * points to a blocked task -- which guaratees the task is present.
-+	 */
-+	p->pi_top_task = pi_task;
-+
-+	/*
-+	 * For FIFO/RR we only need to set prio, if that matches we're done.
-+	 */
-+	if (prio == p->prio)
-+		goto out_unlock;
-+
-+	/*
-+	 * Idle task boosting is a nono in general. There is one
-+	 * exception, when PREEMPT_RT and NOHZ is active:
-+	 *
-+	 * The idle task calls get_next_timer_interrupt() and holds
-+	 * the timer wheel base->lock on the CPU and another CPU wants
-+	 * to access the timer (probably to cancel it). We can safely
-+	 * ignore the boosting request, as the idle CPU runs this code
-+	 * with interrupts disabled and will complete the lock
-+	 * protected section without being interrupted. So there is no
-+	 * real need to boost.
-+	 */
-+	if (unlikely(p == rq->idle)) {
-+		WARN_ON(p != rq->curr);
-+		WARN_ON(p->pi_blocked_on);
-+		goto out_unlock;
-+	}
-+
-+	trace_sched_pi_setprio(p, pi_task);
-+	p->prio = prio;
-+	update_task_priodl(p);
-+
-+	check_task_changed(rq, p);
-+out_unlock:
-+	__task_access_unlock(p, lock);
-+}
-+#else
-+static inline int rt_effective_prio(struct task_struct *p, int prio)
-+{
-+	return prio;
-+}
-+#endif
-+
-+void set_user_nice(struct task_struct *p, long nice)
-+{
-+	unsigned long flags;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+
-+	if (task_nice(p) == nice || nice < MIN_NICE || nice > MAX_NICE)
-+		return;
-+	/*
-+	 * We have to be careful, if called from sys_setpriority(),
-+	 * the task might be in the middle of scheduling on another CPU.
-+	 */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	rq = __task_access_lock(p, &lock);
-+
-+	p->static_prio = NICE_TO_PRIO(nice);
-+	/*
-+	 * The RT priorities are set via sched_setscheduler(), but we still
-+	 * allow the 'normal' nice value to be set - but as expected
-+	 * it wont have any effect on scheduling until the task is
-+	 * not SCHED_NORMAL/SCHED_BATCH:
-+	 */
-+	if (task_has_rt_policy(p))
-+		goto out_unlock;
-+
-+	p->prio = effective_prio(p);
-+	update_task_priodl(p);
-+
-+	check_task_changed(rq, p);
-+out_unlock:
-+	__task_access_unlock(p, lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+}
-+EXPORT_SYMBOL(set_user_nice);
-+
-+/*
-+ * can_nice - check if a task can reduce its nice value
-+ * @p: task
-+ * @nice: nice value
-+ */
-+int can_nice(const struct task_struct *p, const int nice)
-+{
-+	/* Convert nice value [19,-20] to rlimit style value [1,40] */
-+	int nice_rlim = nice_to_rlimit(nice);
-+
-+	return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) ||
-+		capable(CAP_SYS_NICE));
-+}
-+
-+#ifdef __ARCH_WANT_SYS_NICE
-+
-+/*
-+ * sys_nice - change the priority of the current process.
-+ * @increment: priority increment
-+ *
-+ * sys_setpriority is a more generic, but much slower function that
-+ * does similar things.
-+ */
-+SYSCALL_DEFINE1(nice, int, increment)
-+{
-+	long nice, retval;
-+
-+	/*
-+	 * Setpriority might change our priority at the same moment.
-+	 * We don't have to worry. Conceptually one call occurs first
-+	 * and we have a single winner.
-+	 */
-+
-+	increment = clamp(increment, -NICE_WIDTH, NICE_WIDTH);
-+	nice = task_nice(current) + increment;
-+
-+	nice = clamp_val(nice, MIN_NICE, MAX_NICE);
-+	if (increment < 0 && !can_nice(current, nice))
-+		return -EPERM;
-+
-+	retval = security_task_setnice(current, nice);
-+	if (retval)
-+		return retval;
-+
-+	set_user_nice(current, nice);
-+	return 0;
-+}
-+
-+#endif
-+
-+/**
-+ * idle_cpu - is a given CPU idle currently?
-+ * @cpu: the processor in question.
-+ *
-+ * Return: 1 if the CPU is currently idle. 0 otherwise.
-+ */
-+int idle_cpu(int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	if (rq->curr != rq->idle)
-+		return 0;
-+
-+	if (rq->nr_running)
-+		return 0;
-+
-+#ifdef CONFIG_SMP
-+	if (rq->ttwu_pending)
-+		return 0;
-+#endif
-+
-+	return 1;
-+}
-+
-+/**
-+ * idle_task - return the idle task for a given CPU.
-+ * @cpu: the processor in question.
-+ *
-+ * Return: The idle task for the cpu @cpu.
-+ */
-+struct task_struct *idle_task(int cpu)
-+{
-+	return cpu_rq(cpu)->idle;
-+}
-+
-+/**
-+ * find_process_by_pid - find a process with a matching PID value.
-+ * @pid: the pid in question.
-+ *
-+ * The task of @pid, if found. %NULL otherwise.
-+ */
-+static inline struct task_struct *find_process_by_pid(pid_t pid)
-+{
-+	return pid ? find_task_by_vpid(pid) : current;
-+}
-+
-+/*
-+ * sched_setparam() passes in -1 for its policy, to let the functions
-+ * it calls know not to change it.
-+ */
-+#define SETPARAM_POLICY -1
-+
-+static void __setscheduler_params(struct task_struct *p,
-+		const struct sched_attr *attr)
-+{
-+	int policy = attr->sched_policy;
-+
-+	if (policy == SETPARAM_POLICY)
-+		policy = p->policy;
-+
-+	p->policy = policy;
-+
-+	/*
-+	 * allow normal nice value to be set, but will not have any
-+	 * effect on scheduling until the task not SCHED_NORMAL/
-+	 * SCHED_BATCH
-+	 */
-+	p->static_prio = NICE_TO_PRIO(attr->sched_nice);
-+
-+	/*
-+	 * __sched_setscheduler() ensures attr->sched_priority == 0 when
-+	 * !rt_policy. Always setting this ensures that things like
-+	 * getparam()/getattr() don't report silly values for !rt tasks.
-+	 */
-+	p->rt_priority = attr->sched_priority;
-+	p->normal_prio = normal_prio(p);
-+}
-+
-+/* Actually do priority change: must hold rq lock. */
-+static void __setscheduler(struct rq *rq, struct task_struct *p,
-+			   const struct sched_attr *attr, bool keep_boost)
-+{
-+	__setscheduler_params(p, attr);
-+
-+	/*
-+	 * Keep a potential priority boosting if called from
-+	 * sched_setscheduler().
-+	 */
-+	p->prio = normal_prio(p);
-+	if (keep_boost)
-+		p->prio = rt_effective_prio(p, p->prio);
-+	update_task_priodl(p);
-+}
-+
-+/*
-+ * check the target process has a UID that matches the current process's
-+ */
-+static bool check_same_owner(struct task_struct *p)
-+{
-+	const struct cred *cred = current_cred(), *pcred;
-+	bool match;
-+
-+	rcu_read_lock();
-+	pcred = __task_cred(p);
-+	match = (uid_eq(cred->euid, pcred->euid) ||
-+		 uid_eq(cred->euid, pcred->uid));
-+	rcu_read_unlock();
-+	return match;
-+}
-+
-+static int __sched_setscheduler(struct task_struct *p,
-+				const struct sched_attr *attr,
-+				bool user, bool pi)
-+{
-+	const struct sched_attr dl_squash_attr = {
-+		.size		= sizeof(struct sched_attr),
-+		.sched_policy	= SCHED_FIFO,
-+		.sched_nice	= 0,
-+		.sched_priority = 99,
-+	};
-+	int newprio = MAX_RT_PRIO - 1 - attr->sched_priority;
-+	int retval, oldpolicy = -1;
-+	int policy = attr->sched_policy;
-+	unsigned long flags;
-+	struct rq *rq;
-+	int reset_on_fork;
-+	raw_spinlock_t *lock;
-+
-+	/* The pi code expects interrupts enabled */
-+	BUG_ON(pi && in_interrupt());
-+
-+	/*
-+	 * Alt schedule FW supports SCHED_DEADLINE by squash it as prio 0 SCHED_FIFO
-+	 */
-+	if (unlikely(SCHED_DEADLINE == policy)) {
-+		attr = &dl_squash_attr;
-+		policy = attr->sched_policy;
-+		newprio = MAX_RT_PRIO - 1 - attr->sched_priority;
-+	}
-+recheck:
-+	/* Double check policy once rq lock held */
-+	if (policy < 0) {
-+		reset_on_fork = p->sched_reset_on_fork;
-+		policy = oldpolicy = p->policy;
-+	} else {
-+		reset_on_fork = !!(attr->sched_flags & SCHED_RESET_ON_FORK);
-+
-+		if (policy > SCHED_IDLE)
-+			return -EINVAL;
-+	}
-+
-+	if (attr->sched_flags & ~(SCHED_FLAG_ALL))
-+		return -EINVAL;
-+
-+	/*
-+	 * Valid priorities for SCHED_FIFO and SCHED_RR are
-+	 * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL and
-+	 * SCHED_BATCH and SCHED_IDLE is 0.
-+	 */
-+	if (attr->sched_priority < 0 ||
-+	    (p->mm && attr->sched_priority > MAX_USER_RT_PRIO - 1) ||
-+	    (!p->mm && attr->sched_priority > MAX_RT_PRIO - 1))
-+		return -EINVAL;
-+	if ((SCHED_RR == policy || SCHED_FIFO == policy) !=
-+	    (attr->sched_priority != 0))
-+		return -EINVAL;
-+
-+	/*
-+	 * Allow unprivileged RT tasks to decrease priority:
-+	 */
-+	if (user && !capable(CAP_SYS_NICE)) {
-+		if (SCHED_FIFO == policy || SCHED_RR == policy) {
-+			unsigned long rlim_rtprio =
-+					task_rlimit(p, RLIMIT_RTPRIO);
-+
-+			/* Can't set/change the rt policy */
-+			if (policy != p->policy && !rlim_rtprio)
-+				return -EPERM;
-+
-+			/* Can't increase priority */
-+			if (attr->sched_priority > p->rt_priority &&
-+			    attr->sched_priority > rlim_rtprio)
-+				return -EPERM;
-+		}
-+
-+		/* Can't change other user's priorities */
-+		if (!check_same_owner(p))
-+			return -EPERM;
-+
-+		/* Normal users shall not reset the sched_reset_on_fork flag */
-+		if (p->sched_reset_on_fork && !reset_on_fork)
-+			return -EPERM;
-+	}
-+
-+	if (user) {
-+		retval = security_task_setscheduler(p);
-+		if (retval)
-+			return retval;
-+	}
-+
-+	if (pi)
-+		cpuset_read_lock();
-+
-+	/*
-+	 * Make sure no PI-waiters arrive (or leave) while we are
-+	 * changing the priority of the task:
-+	 */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+
-+	/*
-+	 * To be able to change p->policy safely, task_access_lock()
-+	 * must be called.
-+	 * IF use task_access_lock() here:
-+	 * For the task p which is not running, reading rq->stop is
-+	 * racy but acceptable as ->stop doesn't change much.
-+	 * An enhancemnet can be made to read rq->stop saftly.
-+	 */
-+	rq = __task_access_lock(p, &lock);
-+
-+	/*
-+	 * Changing the policy of the stop threads its a very bad idea
-+	 */
-+	if (p == rq->stop) {
-+		retval = -EINVAL;
-+		goto unlock;
-+	}
-+
-+	/*
-+	 * If not changing anything there's no need to proceed further:
-+	 */
-+	if (unlikely(policy == p->policy)) {
-+		if (rt_policy(policy) && attr->sched_priority != p->rt_priority)
-+			goto change;
-+		if (!rt_policy(policy) &&
-+		    NICE_TO_PRIO(attr->sched_nice) != p->static_prio)
-+			goto change;
-+
-+		p->sched_reset_on_fork = reset_on_fork;
-+		retval = 0;
-+		goto unlock;
-+	}
-+change:
-+
-+	/* Re-check policy now with rq lock held */
-+	if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
-+		policy = oldpolicy = -1;
-+		__task_access_unlock(p, lock);
-+		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+		if (pi)
-+			cpuset_read_unlock();
-+		goto recheck;
-+	}
-+
-+	p->sched_reset_on_fork = reset_on_fork;
-+
-+	if (pi) {
-+		/*
-+		 * Take priority boosted tasks into account. If the new
-+		 * effective priority is unchanged, we just store the new
-+		 * normal parameters and do not touch the scheduler class and
-+		 * the runqueue. This will be done when the task deboost
-+		 * itself.
-+		 */
-+		if (rt_effective_prio(p, newprio) == p->prio) {
-+			__setscheduler_params(p, attr);
-+			retval = 0;
-+			goto unlock;
-+		}
-+	}
-+
-+	__setscheduler(rq, p, attr, pi);
-+
-+	check_task_changed(rq, p);
-+
-+	/* Avoid rq from going away on us: */
-+	preempt_disable();
-+	__task_access_unlock(p, lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+
-+	if (pi) {
-+		cpuset_read_unlock();
-+		rt_mutex_adjust_pi(p);
-+	}
-+
-+	preempt_enable();
-+
-+	return 0;
-+
-+unlock:
-+	__task_access_unlock(p, lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+	if (pi)
-+		cpuset_read_unlock();
-+	return retval;
-+}
-+
-+static int _sched_setscheduler(struct task_struct *p, int policy,
-+			       const struct sched_param *param, bool check)
-+{
-+	struct sched_attr attr = {
-+		.sched_policy   = policy,
-+		.sched_priority = param->sched_priority,
-+		.sched_nice     = PRIO_TO_NICE(p->static_prio),
-+	};
-+
-+	/* Fixup the legacy SCHED_RESET_ON_FORK hack. */
-+	if ((policy != SETPARAM_POLICY) && (policy & SCHED_RESET_ON_FORK)) {
-+		attr.sched_flags |= SCHED_FLAG_RESET_ON_FORK;
-+		policy &= ~SCHED_RESET_ON_FORK;
-+		attr.sched_policy = policy;
-+	}
-+
-+	return __sched_setscheduler(p, &attr, check, true);
-+}
-+
-+/**
-+ * sched_setscheduler - change the scheduling policy and/or RT priority of a thread.
-+ * @p: the task in question.
-+ * @policy: new policy.
-+ * @param: structure containing the new RT priority.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ *
-+ * NOTE that the task may be already dead.
-+ */
-+int sched_setscheduler(struct task_struct *p, int policy,
-+		       const struct sched_param *param)
-+{
-+	return _sched_setscheduler(p, policy, param, true);
-+}
-+
-+EXPORT_SYMBOL_GPL(sched_setscheduler);
-+
-+int sched_setattr(struct task_struct *p, const struct sched_attr *attr)
-+{
-+	return __sched_setscheduler(p, attr, true, true);
-+}
-+EXPORT_SYMBOL_GPL(sched_setattr);
-+
-+int sched_setattr_nocheck(struct task_struct *p, const struct sched_attr *attr)
-+{
-+	return __sched_setscheduler(p, attr, false, true);
-+}
-+
-+/**
-+ * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace.
-+ * @p: the task in question.
-+ * @policy: new policy.
-+ * @param: structure containing the new RT priority.
-+ *
-+ * Just like sched_setscheduler, only don't bother checking if the
-+ * current context has permission.  For example, this is needed in
-+ * stop_machine(): we create temporary high priority worker threads,
-+ * but our caller might not have that capability.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+int sched_setscheduler_nocheck(struct task_struct *p, int policy,
-+			       const struct sched_param *param)
-+{
-+	return _sched_setscheduler(p, policy, param, false);
-+}
-+EXPORT_SYMBOL_GPL(sched_setscheduler_nocheck);
-+
-+static int
-+do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
-+{
-+	struct sched_param lparam;
-+	struct task_struct *p;
-+	int retval;
-+
-+	if (!param || pid < 0)
-+		return -EINVAL;
-+	if (copy_from_user(&lparam, param, sizeof(struct sched_param)))
-+		return -EFAULT;
-+
-+	rcu_read_lock();
-+	retval = -ESRCH;
-+	p = find_process_by_pid(pid);
-+	if (likely(p))
-+		get_task_struct(p);
-+	rcu_read_unlock();
-+
-+	if (likely(p)) {
-+		retval = sched_setscheduler(p, policy, &lparam);
-+		put_task_struct(p);
-+	}
-+
-+	return retval;
-+}
-+
-+/*
-+ * Mimics kernel/events/core.c perf_copy_attr().
-+ */
-+static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *attr)
-+{
-+	u32 size;
-+	int ret;
-+
-+	/* Zero the full structure, so that a short copy will be nice: */
-+	memset(attr, 0, sizeof(*attr));
-+
-+	ret = get_user(size, &uattr->size);
-+	if (ret)
-+		return ret;
-+
-+	/* ABI compatibility quirk: */
-+	if (!size)
-+		size = SCHED_ATTR_SIZE_VER0;
-+
-+	if (size < SCHED_ATTR_SIZE_VER0 || size > PAGE_SIZE)
-+		goto err_size;
-+
-+	ret = copy_struct_from_user(attr, sizeof(*attr), uattr, size);
-+	if (ret) {
-+		if (ret == -E2BIG)
-+			goto err_size;
-+		return ret;
-+	}
-+
-+	/*
-+	 * XXX: Do we want to be lenient like existing syscalls; or do we want
-+	 * to be strict and return an error on out-of-bounds values?
-+	 */
-+	attr->sched_nice = clamp(attr->sched_nice, -20, 19);
-+
-+	/* sched/core.c uses zero here but we already know ret is zero */
-+	return 0;
-+
-+err_size:
-+	put_user(sizeof(*attr), &uattr->size);
-+	return -E2BIG;
-+}
-+
-+/**
-+ * sys_sched_setscheduler - set/change the scheduler policy and RT priority
-+ * @pid: the pid in question.
-+ * @policy: new policy.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ * @param: structure containing the new RT priority.
-+ */
-+SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy, struct sched_param __user *, param)
-+{
-+	if (policy < 0)
-+		return -EINVAL;
-+
-+	return do_sched_setscheduler(pid, policy, param);
-+}
-+
-+/**
-+ * sys_sched_setparam - set/change the RT priority of a thread
-+ * @pid: the pid in question.
-+ * @param: structure containing the new RT priority.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
-+{
-+	return do_sched_setscheduler(pid, SETPARAM_POLICY, param);
-+}
-+
-+/**
-+ * sys_sched_setattr - same as above, but with extended sched_attr
-+ * @pid: the pid in question.
-+ * @uattr: structure containing the extended parameters.
-+ */
-+SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr,
-+			       unsigned int, flags)
-+{
-+	struct sched_attr attr;
-+	struct task_struct *p;
-+	int retval;
-+
-+	if (!uattr || pid < 0 || flags)
-+		return -EINVAL;
-+
-+	retval = sched_copy_attr(uattr, &attr);
-+	if (retval)
-+		return retval;
-+
-+	if ((int)attr.sched_policy < 0)
-+		return -EINVAL;
-+
-+	rcu_read_lock();
-+	retval = -ESRCH;
-+	p = find_process_by_pid(pid);
-+	if (p != NULL)
-+		retval = sched_setattr(p, &attr);
-+	rcu_read_unlock();
-+
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_getscheduler - get the policy (scheduling class) of a thread
-+ * @pid: the pid in question.
-+ *
-+ * Return: On success, the policy of the thread. Otherwise, a negative error
-+ * code.
-+ */
-+SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
-+{
-+	struct task_struct *p;
-+	int retval = -EINVAL;
-+
-+	if (pid < 0)
-+		goto out_nounlock;
-+
-+	retval = -ESRCH;
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	if (p) {
-+		retval = security_task_getscheduler(p);
-+		if (!retval)
-+			retval = p->policy;
-+	}
-+	rcu_read_unlock();
-+
-+out_nounlock:
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_getscheduler - get the RT priority of a thread
-+ * @pid: the pid in question.
-+ * @param: structure containing the RT priority.
-+ *
-+ * Return: On success, 0 and the RT priority is in @param. Otherwise, an error
-+ * code.
-+ */
-+SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
-+{
-+	struct sched_param lp = { .sched_priority = 0 };
-+	struct task_struct *p;
-+	int retval = -EINVAL;
-+
-+	if (!param || pid < 0)
-+		goto out_nounlock;
-+
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	retval = -ESRCH;
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	if (task_has_rt_policy(p))
-+		lp.sched_priority = p->rt_priority;
-+	rcu_read_unlock();
-+
-+	/*
-+	 * This one might sleep, we cannot do it with a spinlock held ...
-+	 */
-+	retval = copy_to_user(param, &lp, sizeof(*param)) ? -EFAULT : 0;
-+
-+out_nounlock:
-+	return retval;
-+
-+out_unlock:
-+	rcu_read_unlock();
-+	return retval;
-+}
-+
-+/*
-+ * Copy the kernel size attribute structure (which might be larger
-+ * than what user-space knows about) to user-space.
-+ *
-+ * Note that all cases are valid: user-space buffer can be larger or
-+ * smaller than the kernel-space buffer. The usual case is that both
-+ * have the same size.
-+ */
-+static int
-+sched_attr_copy_to_user(struct sched_attr __user *uattr,
-+			struct sched_attr *kattr,
-+			unsigned int usize)
-+{
-+	unsigned int ksize = sizeof(*kattr);
-+
-+	if (!access_ok(uattr, usize))
-+		return -EFAULT;
-+
-+	/*
-+	 * sched_getattr() ABI forwards and backwards compatibility:
-+	 *
-+	 * If usize == ksize then we just copy everything to user-space and all is good.
-+	 *
-+	 * If usize < ksize then we only copy as much as user-space has space for,
-+	 * this keeps ABI compatibility as well. We skip the rest.
-+	 *
-+	 * If usize > ksize then user-space is using a newer version of the ABI,
-+	 * which part the kernel doesn't know about. Just ignore it - tooling can
-+	 * detect the kernel's knowledge of attributes from the attr->size value
-+	 * which is set to ksize in this case.
-+	 */
-+	kattr->size = min(usize, ksize);
-+
-+	if (copy_to_user(uattr, kattr, kattr->size))
-+		return -EFAULT;
-+
-+	return 0;
-+}
-+
-+/**
-+ * sys_sched_getattr - similar to sched_getparam, but with sched_attr
-+ * @pid: the pid in question.
-+ * @uattr: structure containing the extended parameters.
-+ * @usize: sizeof(attr) for fwd/bwd comp.
-+ * @flags: for future extension.
-+ */
-+SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
-+		unsigned int, usize, unsigned int, flags)
-+{
-+	struct sched_attr kattr = { };
-+	struct task_struct *p;
-+	int retval;
-+
-+	if (!uattr || pid < 0 || usize > PAGE_SIZE ||
-+	    usize < SCHED_ATTR_SIZE_VER0 || flags)
-+		return -EINVAL;
-+
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	retval = -ESRCH;
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	kattr.sched_policy = p->policy;
-+	if (p->sched_reset_on_fork)
-+		kattr.sched_flags |= SCHED_FLAG_RESET_ON_FORK;
-+	if (task_has_rt_policy(p))
-+		kattr.sched_priority = p->rt_priority;
-+	else
-+		kattr.sched_nice = task_nice(p);
-+
-+#ifdef CONFIG_UCLAMP_TASK
-+	kattr.sched_util_min = p->uclamp_req[UCLAMP_MIN].value;
-+	kattr.sched_util_max = p->uclamp_req[UCLAMP_MAX].value;
-+#endif
-+
-+	rcu_read_unlock();
-+
-+	return sched_attr_copy_to_user(uattr, &kattr, usize);
-+
-+out_unlock:
-+	rcu_read_unlock();
-+	return retval;
-+}
-+
-+long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
-+{
-+	cpumask_var_t cpus_allowed, new_mask;
-+	struct task_struct *p;
-+	int retval;
-+
-+	get_online_cpus();
-+	rcu_read_lock();
-+
-+	p = find_process_by_pid(pid);
-+	if (!p) {
-+		rcu_read_unlock();
-+		put_online_cpus();
-+		return -ESRCH;
-+	}
-+
-+	/* Prevent p going away */
-+	get_task_struct(p);
-+	rcu_read_unlock();
-+
-+	if (p->flags & PF_NO_SETAFFINITY) {
-+		retval = -EINVAL;
-+		goto out_put_task;
-+	}
-+	if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
-+		retval = -ENOMEM;
-+		goto out_put_task;
-+	}
-+	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) {
-+		retval = -ENOMEM;
-+		goto out_free_cpus_allowed;
-+	}
-+	retval = -EPERM;
-+	if (!check_same_owner(p)) {
-+		rcu_read_lock();
-+		if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
-+			rcu_read_unlock();
-+			goto out_unlock;
-+		}
-+		rcu_read_unlock();
-+	}
-+
-+	retval = security_task_setscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	cpuset_cpus_allowed(p, cpus_allowed);
-+	cpumask_and(new_mask, in_mask, cpus_allowed);
-+again:
-+	retval = __set_cpus_allowed_ptr(p, new_mask, true);
-+
-+	if (!retval) {
-+		cpuset_cpus_allowed(p, cpus_allowed);
-+		if (!cpumask_subset(new_mask, cpus_allowed)) {
-+			/*
-+			 * We must have raced with a concurrent cpuset
-+			 * update. Just reset the cpus_allowed to the
-+			 * cpuset's cpus_allowed
-+			 */
-+			cpumask_copy(new_mask, cpus_allowed);
-+			goto again;
-+		}
-+	}
-+out_unlock:
-+	free_cpumask_var(new_mask);
-+out_free_cpus_allowed:
-+	free_cpumask_var(cpus_allowed);
-+out_put_task:
-+	put_task_struct(p);
-+	put_online_cpus();
-+	return retval;
-+}
-+
-+static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
-+			     struct cpumask *new_mask)
-+{
-+	if (len < cpumask_size())
-+		cpumask_clear(new_mask);
-+	else if (len > cpumask_size())
-+		len = cpumask_size();
-+
-+	return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0;
-+}
-+
-+/**
-+ * sys_sched_setaffinity - set the CPU affinity of a process
-+ * @pid: pid of the process
-+ * @len: length in bytes of the bitmask pointed to by user_mask_ptr
-+ * @user_mask_ptr: user-space pointer to the new CPU mask
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
-+		unsigned long __user *, user_mask_ptr)
-+{
-+	cpumask_var_t new_mask;
-+	int retval;
-+
-+	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
-+		return -ENOMEM;
-+
-+	retval = get_user_cpu_mask(user_mask_ptr, len, new_mask);
-+	if (retval == 0)
-+		retval = sched_setaffinity(pid, new_mask);
-+	free_cpumask_var(new_mask);
-+	return retval;
-+}
-+
-+long sched_getaffinity(pid_t pid, cpumask_t *mask)
-+{
-+	struct task_struct *p;
-+	raw_spinlock_t *lock;
-+	unsigned long flags;
-+	int retval;
-+
-+	rcu_read_lock();
-+
-+	retval = -ESRCH;
-+	p = find_process_by_pid(pid);
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	task_access_lock_irqsave(p, &lock, &flags);
-+	cpumask_and(mask, &p->cpus_mask, cpu_active_mask);
-+	task_access_unlock_irqrestore(p, lock, &flags);
-+
-+out_unlock:
-+	rcu_read_unlock();
-+
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_getaffinity - get the CPU affinity of a process
-+ * @pid: pid of the process
-+ * @len: length in bytes of the bitmask pointed to by user_mask_ptr
-+ * @user_mask_ptr: user-space pointer to hold the current CPU mask
-+ *
-+ * Return: size of CPU mask copied to user_mask_ptr on success. An
-+ * error code otherwise.
-+ */
-+SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
-+		unsigned long __user *, user_mask_ptr)
-+{
-+	int ret;
-+	cpumask_var_t mask;
-+
-+	if ((len * BITS_PER_BYTE) < nr_cpu_ids)
-+		return -EINVAL;
-+	if (len & (sizeof(unsigned long)-1))
-+		return -EINVAL;
-+
-+	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
-+		return -ENOMEM;
-+
-+	ret = sched_getaffinity(pid, mask);
-+	if (ret == 0) {
-+		unsigned int retlen = min_t(size_t, len, cpumask_size());
-+
-+		if (copy_to_user(user_mask_ptr, mask, retlen))
-+			ret = -EFAULT;
-+		else
-+			ret = retlen;
-+	}
-+	free_cpumask_var(mask);
-+
-+	return ret;
-+}
-+
-+/**
-+ * sys_sched_yield - yield the current processor to other threads.
-+ *
-+ * This function yields the current CPU to other tasks. It does this by
-+ * scheduling away the current task. If it still has the earliest deadline
-+ * it will be scheduled again as the next task.
-+ *
-+ * Return: 0.
-+ */
-+static void do_sched_yield(void)
-+{
-+	struct rq *rq;
-+	struct rq_flags rf;
-+
-+	if (!sched_yield_type)
-+		return;
-+
-+	rq = this_rq_lock_irq(&rf);
-+
-+	schedstat_inc(rq->yld_count);
-+
-+	if (1 == sched_yield_type) {
-+		if (!rt_task(current))
-+			do_sched_yield_type_1(current, rq);
-+	} else if (2 == sched_yield_type) {
-+		if (rq->nr_running > 1)
-+			rq->skip = current;
-+	}
-+
-+	/*
-+	 * Since we are going to call schedule() anyway, there's
-+	 * no need to preempt or enable interrupts:
-+	 */
-+	preempt_disable();
-+	raw_spin_unlock(&rq->lock);
-+	sched_preempt_enable_no_resched();
-+
-+	schedule();
-+}
-+
-+SYSCALL_DEFINE0(sched_yield)
-+{
-+	do_sched_yield();
-+	return 0;
-+}
-+
-+#ifndef CONFIG_PREEMPTION
-+int __sched _cond_resched(void)
-+{
-+	if (should_resched(0)) {
-+		preempt_schedule_common();
-+		return 1;
-+	}
-+	rcu_all_qs();
-+	return 0;
-+}
-+EXPORT_SYMBOL(_cond_resched);
-+#endif
-+
-+/*
-+ * __cond_resched_lock() - if a reschedule is pending, drop the given lock,
-+ * call schedule, and on return reacquire the lock.
-+ *
-+ * This works OK both with and without CONFIG_PREEMPTION.  We do strange low-level
-+ * operations here to prevent schedule() from being called twice (once via
-+ * spin_unlock(), once by hand).
-+ */
-+int __cond_resched_lock(spinlock_t *lock)
-+{
-+	int resched = should_resched(PREEMPT_LOCK_OFFSET);
-+	int ret = 0;
-+
-+	lockdep_assert_held(lock);
-+
-+	if (spin_needbreak(lock) || resched) {
-+		spin_unlock(lock);
-+		if (resched)
-+			preempt_schedule_common();
-+		else
-+			cpu_relax();
-+		ret = 1;
-+		spin_lock(lock);
-+	}
-+	return ret;
-+}
-+EXPORT_SYMBOL(__cond_resched_lock);
-+
-+/**
-+ * yield - yield the current processor to other threads.
-+ *
-+ * Do not ever use this function, there's a 99% chance you're doing it wrong.
-+ *
-+ * The scheduler is at all times free to pick the calling task as the most
-+ * eligible task to run, if removing the yield() call from your code breaks
-+ * it, its already broken.
-+ *
-+ * Typical broken usage is:
-+ *
-+ * while (!event)
-+ * 	yield();
-+ *
-+ * where one assumes that yield() will let 'the other' process run that will
-+ * make event true. If the current task is a SCHED_FIFO task that will never
-+ * happen. Never use yield() as a progress guarantee!!
-+ *
-+ * If you want to use yield() to wait for something, use wait_event().
-+ * If you want to use yield() to be 'nice' for others, use cond_resched().
-+ * If you still want to use yield(), do not!
-+ */
-+void __sched yield(void)
-+{
-+	set_current_state(TASK_RUNNING);
-+	do_sched_yield();
-+}
-+EXPORT_SYMBOL(yield);
-+
-+/**
-+ * yield_to - yield the current processor to another thread in
-+ * your thread group, or accelerate that thread toward the
-+ * processor it's on.
-+ * @p: target task
-+ * @preempt: whether task preemption is allowed or not
-+ *
-+ * It's the caller's job to ensure that the target task struct
-+ * can't go away on us before we can do any checks.
-+ *
-+ * In Alt schedule FW, yield_to is not supported.
-+ *
-+ * Return:
-+ *	true (>0) if we indeed boosted the target task.
-+ *	false (0) if we failed to boost the target.
-+ *	-ESRCH if there's no task to yield to.
-+ */
-+int __sched yield_to(struct task_struct *p, bool preempt)
-+{
-+	return 0;
-+}
-+EXPORT_SYMBOL_GPL(yield_to);
-+
-+int io_schedule_prepare(void)
-+{
-+	int old_iowait = current->in_iowait;
-+
-+	current->in_iowait = 1;
-+	blk_schedule_flush_plug(current);
-+
-+	return old_iowait;
-+}
-+
-+void io_schedule_finish(int token)
-+{
-+	current->in_iowait = token;
-+}
-+
-+/*
-+ * This task is about to go to sleep on IO.  Increment rq->nr_iowait so
-+ * that process accounting knows that this is a task in IO wait state.
-+ *
-+ * But don't do that if it is a deliberate, throttling IO wait (this task
-+ * has set its backing_dev_info: the queue against which it should throttle)
-+ */
-+
-+long __sched io_schedule_timeout(long timeout)
-+{
-+	int token;
-+	long ret;
-+
-+	token = io_schedule_prepare();
-+	ret = schedule_timeout(timeout);
-+	io_schedule_finish(token);
-+
-+	return ret;
-+}
-+EXPORT_SYMBOL(io_schedule_timeout);
-+
-+void __sched io_schedule(void)
-+{
-+	int token;
-+
-+	token = io_schedule_prepare();
-+	schedule();
-+	io_schedule_finish(token);
-+}
-+EXPORT_SYMBOL(io_schedule);
-+
-+/**
-+ * sys_sched_get_priority_max - return maximum RT priority.
-+ * @policy: scheduling class.
-+ *
-+ * Return: On success, this syscall returns the maximum
-+ * rt_priority that can be used by a given scheduling class.
-+ * On failure, a negative error code is returned.
-+ */
-+SYSCALL_DEFINE1(sched_get_priority_max, int, policy)
-+{
-+	int ret = -EINVAL;
-+
-+	switch (policy) {
-+	case SCHED_FIFO:
-+	case SCHED_RR:
-+		ret = MAX_USER_RT_PRIO-1;
-+		break;
-+	case SCHED_NORMAL:
-+	case SCHED_BATCH:
-+	case SCHED_IDLE:
-+		ret = 0;
-+		break;
-+	}
-+	return ret;
-+}
-+
-+/**
-+ * sys_sched_get_priority_min - return minimum RT priority.
-+ * @policy: scheduling class.
-+ *
-+ * Return: On success, this syscall returns the minimum
-+ * rt_priority that can be used by a given scheduling class.
-+ * On failure, a negative error code is returned.
-+ */
-+SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
-+{
-+	int ret = -EINVAL;
-+
-+	switch (policy) {
-+	case SCHED_FIFO:
-+	case SCHED_RR:
-+		ret = 1;
-+		break;
-+	case SCHED_NORMAL:
-+	case SCHED_BATCH:
-+	case SCHED_IDLE:
-+		ret = 0;
-+		break;
-+	}
-+	return ret;
-+}
-+
-+static int sched_rr_get_interval(pid_t pid, struct timespec64 *t)
-+{
-+	struct task_struct *p;
-+	int retval;
-+
-+	alt_sched_debug();
-+
-+	if (pid < 0)
-+		return -EINVAL;
-+
-+	retval = -ESRCH;
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+	rcu_read_unlock();
-+
-+	*t = ns_to_timespec64(sched_timeslice_ns);
-+	return 0;
-+
-+out_unlock:
-+	rcu_read_unlock();
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_rr_get_interval - return the default timeslice of a process.
-+ * @pid: pid of the process.
-+ * @interval: userspace pointer to the timeslice value.
-+ *
-+ *
-+ * Return: On success, 0 and the timeslice is in @interval. Otherwise,
-+ * an error code.
-+ */
-+SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
-+		struct __kernel_timespec __user *, interval)
-+{
-+	struct timespec64 t;
-+	int retval = sched_rr_get_interval(pid, &t);
-+
-+	if (retval == 0)
-+		retval = put_timespec64(&t, interval);
-+
-+	return retval;
-+}
-+
-+#ifdef CONFIG_COMPAT_32BIT_TIME
-+SYSCALL_DEFINE2(sched_rr_get_interval_time32, pid_t, pid,
-+		struct old_timespec32 __user *, interval)
-+{
-+	struct timespec64 t;
-+	int retval = sched_rr_get_interval(pid, &t);
-+
-+	if (retval == 0)
-+		retval = put_old_timespec32(&t, interval);
-+	return retval;
-+}
-+#endif
-+
-+void sched_show_task(struct task_struct *p)
-+{
-+	unsigned long free = 0;
-+	int ppid;
-+
-+	if (!try_get_task_stack(p))
-+		return;
-+
-+	printk(KERN_INFO "%-15.15s %c", p->comm, task_state_to_char(p));
-+
-+	if (p->state == TASK_RUNNING)
-+		printk(KERN_CONT "  running task    ");
-+#ifdef CONFIG_DEBUG_STACK_USAGE
-+	free = stack_not_used(p);
-+#endif
-+	ppid = 0;
-+	rcu_read_lock();
-+	if (pid_alive(p))
-+		ppid = task_pid_nr(rcu_dereference(p->real_parent));
-+	rcu_read_unlock();
-+	printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free,
-+		task_pid_nr(p), ppid,
-+		(unsigned long)task_thread_info(p)->flags);
-+
-+	print_worker_info(KERN_INFO, p);
-+	show_stack(p, NULL, KERN_INFO);
-+	put_task_stack(p);
-+}
-+EXPORT_SYMBOL_GPL(sched_show_task);
-+
-+static inline bool
-+state_filter_match(unsigned long state_filter, struct task_struct *p)
-+{
-+	/* no filter, everything matches */
-+	if (!state_filter)
-+		return true;
-+
-+	/* filter, but doesn't match */
-+	if (!(p->state & state_filter))
-+		return false;
-+
-+	/*
-+	 * When looking for TASK_UNINTERRUPTIBLE skip TASK_IDLE (allows
-+	 * TASK_KILLABLE).
-+	 */
-+	if (state_filter == TASK_UNINTERRUPTIBLE && p->state == TASK_IDLE)
-+		return false;
-+
-+	return true;
-+}
-+
-+
-+void show_state_filter(unsigned long state_filter)
-+{
-+	struct task_struct *g, *p;
-+
-+#if BITS_PER_LONG == 32
-+	printk(KERN_INFO
-+		"  task                PC stack   pid father\n");
-+#else
-+	printk(KERN_INFO
-+		"  task                        PC stack   pid father\n");
-+#endif
-+	rcu_read_lock();
-+	for_each_process_thread(g, p) {
-+		/*
-+		 * reset the NMI-timeout, listing all files on a slow
-+		 * console might take a lot of time:
-+		 * Also, reset softlockup watchdogs on all CPUs, because
-+		 * another CPU might be blocked waiting for us to process
-+		 * an IPI.
-+		 */
-+		touch_nmi_watchdog();
-+		touch_all_softlockup_watchdogs();
-+		if (state_filter_match(state_filter, p))
-+			sched_show_task(p);
-+	}
-+
-+#ifdef CONFIG_SCHED_DEBUG
-+	/* TODO: Alt schedule FW should support this
-+	if (!state_filter)
-+		sysrq_sched_debug_show();
-+	*/
-+#endif
-+	rcu_read_unlock();
-+	/*
-+	 * Only show locks if all tasks are dumped:
-+	 */
-+	if (!state_filter)
-+		debug_show_all_locks();
-+}
-+
-+void dump_cpu_task(int cpu)
-+{
-+	pr_info("Task dump for CPU %d:\n", cpu);
-+	sched_show_task(cpu_curr(cpu));
-+}
-+
-+/**
-+ * init_idle - set up an idle thread for a given CPU
-+ * @idle: task in question
-+ * @cpu: CPU the idle task belongs to
-+ *
-+ * NOTE: this function does not set the idle thread's NEED_RESCHED
-+ * flag, to make booting more robust.
-+ */
-+void init_idle(struct task_struct *idle, int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	__sched_fork(0, idle);
-+
-+	raw_spin_lock_irqsave(&idle->pi_lock, flags);
-+	raw_spin_lock(&rq->lock);
-+	update_rq_clock(rq);
-+
-+	idle->last_ran = rq->clock_task;
-+	idle->state = TASK_RUNNING;
-+	idle->flags |= PF_IDLE;
-+	sched_queue_init_idle(rq, idle);
-+
-+	scs_task_reset(idle);
-+	kasan_unpoison_task_stack(idle);
-+
-+#ifdef CONFIG_SMP
-+	/*
-+	 * It's possible that init_idle() gets called multiple times on a task,
-+	 * in that case do_set_cpus_allowed() will not do the right thing.
-+	 *
-+	 * And since this is boot we can forgo the serialisation.
-+	 */
-+	set_cpus_allowed_common(idle, cpumask_of(cpu));
-+#endif
-+
-+	/* Silence PROVE_RCU */
-+	rcu_read_lock();
-+	__set_task_cpu(idle, cpu);
-+	rcu_read_unlock();
-+
-+	rq->idle = idle;
-+	rcu_assign_pointer(rq->curr, idle);
-+	idle->on_cpu = 1;
-+
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock_irqrestore(&idle->pi_lock, flags);
-+
-+	/* Set the preempt count _outside_ the spinlocks! */
-+	init_idle_preempt_count(idle, cpu);
-+
-+	ftrace_graph_init_idle_task(idle, cpu);
-+	vtime_init_idle(idle, cpu);
-+#ifdef CONFIG_SMP
-+	sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
-+#endif
-+}
-+
-+#ifdef CONFIG_SMP
-+
-+int cpuset_cpumask_can_shrink(const struct cpumask __maybe_unused *cur,
-+			      const struct cpumask __maybe_unused *trial)
-+{
-+	return 1;
-+}
-+
-+int task_can_attach(struct task_struct *p,
-+		    const struct cpumask *cs_cpus_allowed)
-+{
-+	int ret = 0;
-+
-+	/*
-+	 * Kthreads which disallow setaffinity shouldn't be moved
-+	 * to a new cpuset; we don't want to change their CPU
-+	 * affinity and isolating such threads by their set of
-+	 * allowed nodes is unnecessary.  Thus, cpusets are not
-+	 * applicable for such threads.  This prevents checking for
-+	 * success of set_cpus_allowed_ptr() on all attached tasks
-+	 * before cpus_mask may be changed.
-+	 */
-+	if (p->flags & PF_NO_SETAFFINITY)
-+		ret = -EINVAL;
-+
-+	return ret;
-+}
-+
-+bool sched_smp_initialized __read_mostly;
-+
-+#ifdef CONFIG_HOTPLUG_CPU
-+/*
-+ * Ensures that the idle task is using init_mm right before its CPU goes
-+ * offline.
-+ */
-+void idle_task_exit(void)
-+{
-+	struct mm_struct *mm = current->active_mm;
-+
-+	BUG_ON(current != this_rq()->idle);
-+
-+	if (mm != &init_mm) {
-+		switch_mm(mm, &init_mm, current);
-+		finish_arch_post_lock_switch();
-+	}
-+
-+	/* finish_cpu(), as ran on the BP, will clean up the active_mm state */
-+}
-+
-+/*
-+ * Migrate all tasks from the rq, sleeping tasks will be migrated by
-+ * try_to_wake_up()->select_task_rq().
-+ *
-+ * Called with rq->lock held even though we'er in stop_machine() and
-+ * there's no concurrency possible, we hold the required locks anyway
-+ * because of lock validation efforts.
-+ */
-+static void migrate_tasks(struct rq *dead_rq)
-+{
-+	struct rq *rq = dead_rq;
-+	struct task_struct *p, *stop = rq->stop;
-+	int count = 0;
-+
-+	/*
-+	 * Fudge the rq selection such that the below task selection loop
-+	 * doesn't get stuck on the currently eligible stop task.
-+	 *
-+	 * We're currently inside stop_machine() and the rq is either stuck
-+	 * in the stop_machine_cpu_stop() loop, or we're executing this code,
-+	 * either way we should never end up calling schedule() until we're
-+	 * done here.
-+	 */
-+	rq->stop = NULL;
-+
-+	p = sched_rq_first_task(rq);
-+	while (p != rq->idle) {
-+		int dest_cpu;
-+
-+		/* skip the running task */
-+		if (task_running(p) || 1 == p->nr_cpus_allowed) {
-+			p = sched_rq_next_task(p, rq);
-+			continue;
-+		}
-+
-+		/*
-+		 * Rules for changing task_struct::cpus_allowed are holding
-+		 * both pi_lock and rq->lock, such that holding either
-+		 * stabilizes the mask.
-+		 *
-+		 * Drop rq->lock is not quite as disastrous as it usually is
-+		 * because !cpu_active at this point, which means load-balance
-+		 * will not interfere. Also, stop-machine.
-+		 */
-+		raw_spin_unlock(&rq->lock);
-+		raw_spin_lock(&p->pi_lock);
-+		raw_spin_lock(&rq->lock);
-+
-+		/*
-+		 * Since we're inside stop-machine, _nothing_ should have
-+		 * changed the task, WARN if weird stuff happened, because in
-+		 * that case the above rq->lock drop is a fail too.
-+		 */
-+		if (WARN_ON(task_rq(p) != rq || !task_on_rq_queued(p))) {
-+			raw_spin_unlock(&p->pi_lock);
-+			p = sched_rq_next_task(p, rq);
-+			continue;
-+		}
-+
-+		count++;
-+		/* Find suitable destination for @next, with force if needed. */
-+		dest_cpu = select_fallback_rq(dead_rq->cpu, p);
-+		rq = __migrate_task(rq, p, dest_cpu);
-+		raw_spin_unlock(&rq->lock);
-+		raw_spin_unlock(&p->pi_lock);
-+
-+		rq = dead_rq;
-+		raw_spin_lock(&rq->lock);
-+		/* Check queued task all over from the header again */
-+		p = sched_rq_first_task(rq);
-+	}
-+
-+	rq->stop = stop;
-+}
-+
-+static void set_rq_offline(struct rq *rq)
-+{
-+	if (rq->online)
-+		rq->online = false;
-+}
-+#endif /* CONFIG_HOTPLUG_CPU */
-+
-+static void set_rq_online(struct rq *rq)
-+{
-+	if (!rq->online)
-+		rq->online = true;
-+}
-+
-+/*
-+ * used to mark begin/end of suspend/resume:
-+ */
-+static int num_cpus_frozen;
-+
-+/*
-+ * Update cpusets according to cpu_active mask.  If cpusets are
-+ * disabled, cpuset_update_active_cpus() becomes a simple wrapper
-+ * around partition_sched_domains().
-+ *
-+ * If we come here as part of a suspend/resume, don't touch cpusets because we
-+ * want to restore it back to its original state upon resume anyway.
-+ */
-+static void cpuset_cpu_active(void)
-+{
-+	if (cpuhp_tasks_frozen) {
-+		/*
-+		 * num_cpus_frozen tracks how many CPUs are involved in suspend
-+		 * resume sequence. As long as this is not the last online
-+		 * operation in the resume sequence, just build a single sched
-+		 * domain, ignoring cpusets.
-+		 */
-+		partition_sched_domains(1, NULL, NULL);
-+		if (--num_cpus_frozen)
-+			return;
-+		/*
-+		 * This is the last CPU online operation. So fall through and
-+		 * restore the original sched domains by considering the
-+		 * cpuset configurations.
-+		 */
-+		cpuset_force_rebuild();
-+	}
-+
-+	cpuset_update_active_cpus();
-+}
-+
-+static int cpuset_cpu_inactive(unsigned int cpu)
-+{
-+	if (!cpuhp_tasks_frozen) {
-+		cpuset_update_active_cpus();
-+	} else {
-+		num_cpus_frozen++;
-+		partition_sched_domains(1, NULL, NULL);
-+	}
-+	return 0;
-+}
-+
-+int sched_cpu_activate(unsigned int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+#ifdef CONFIG_SCHED_SMT
-+	/*
-+	 * When going up, increment the number of cores with SMT present.
-+	 */
-+	if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
-+		static_branch_inc_cpuslocked(&sched_smt_present);
-+#endif
-+	set_cpu_active(cpu, true);
-+
-+	if (sched_smp_initialized)
-+		cpuset_cpu_active();
-+
-+	/*
-+	 * Put the rq online, if not already. This happens:
-+	 *
-+	 * 1) In the early boot process, because we build the real domains
-+	 *    after all cpus have been brought up.
-+	 *
-+	 * 2) At runtime, if cpuset_cpu_active() fails to rebuild the
-+	 *    domains.
-+	 */
-+	raw_spin_lock_irqsave(&rq->lock, flags);
-+	set_rq_online(rq);
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+
-+	return 0;
-+}
-+
-+int sched_cpu_deactivate(unsigned int cpu)
-+{
-+	int ret;
-+
-+	set_cpu_active(cpu, false);
-+	/*
-+	 * We've cleared cpu_active_mask, wait for all preempt-disabled and RCU
-+	 * users of this state to go away such that all new such users will
-+	 * observe it.
-+	 *
-+	 * Do sync before park smpboot threads to take care the rcu boost case.
-+	 */
-+	synchronize_rcu();
-+
-+#ifdef CONFIG_SCHED_SMT
-+	/*
-+	 * When going down, decrement the number of cores with SMT present.
-+	 */
-+	if (cpumask_weight(cpu_smt_mask(cpu)) == 2) {
-+		static_branch_dec_cpuslocked(&sched_smt_present);
-+		if (!static_branch_likely(&sched_smt_present))
-+			cpumask_clear(&sched_sg_idle_mask);
-+	}
-+#endif
-+
-+	if (!sched_smp_initialized)
-+		return 0;
-+
-+	ret = cpuset_cpu_inactive(cpu);
-+	if (ret) {
-+		set_cpu_active(cpu, true);
-+		return ret;
-+	}
-+	return 0;
-+}
-+
-+static void sched_rq_cpu_starting(unsigned int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	rq->calc_load_update = calc_load_update;
-+}
-+
-+int sched_cpu_starting(unsigned int cpu)
-+{
-+	sched_rq_cpu_starting(cpu);
-+	sched_tick_start(cpu);
-+	return 0;
-+}
-+
-+#ifdef CONFIG_HOTPLUG_CPU
-+int sched_cpu_dying(unsigned int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	/* Handle pending wakeups and then migrate everything off */
-+	sched_tick_stop(cpu);
-+
-+	raw_spin_lock_irqsave(&rq->lock, flags);
-+	set_rq_offline(rq);
-+	migrate_tasks(rq);
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+
-+	hrtick_clear(rq);
-+	return 0;
-+}
-+#endif
-+
-+#ifdef CONFIG_SMP
-+static void sched_init_topology_cpumask_early(void)
-+{
-+	int cpu, level;
-+	cpumask_t *tmp;
-+
-+	for_each_possible_cpu(cpu) {
-+		for (level = 0; level < NR_CPU_AFFINITY_CHK_LEVEL; level++) {
-+			tmp = &(per_cpu(sched_cpu_affinity_masks, cpu)[level]);
-+			cpumask_copy(tmp, cpu_possible_mask);
-+			cpumask_clear_cpu(cpu, tmp);
-+		}
-+		per_cpu(sched_cpu_llc_mask, cpu) =
-+			&(per_cpu(sched_cpu_affinity_masks, cpu)[0]);
-+		per_cpu(sched_cpu_affinity_end_mask, cpu) =
-+			&(per_cpu(sched_cpu_affinity_masks, cpu)[1]);
-+		/*per_cpu(sd_llc_id, cpu) = cpu;*/
-+	}
-+}
-+
-+#define TOPOLOGY_CPUMASK(name, mask, last) \
-+	if (cpumask_and(chk, chk, mask))					\
-+		printk(KERN_INFO "sched: cpu#%02d affinity mask: 0x%08lx - "#name,\
-+		       cpu, (chk++)->bits[0]);					\
-+	if (!last)								\
-+		cpumask_complement(chk, mask)
-+
-+static void sched_init_topology_cpumask(void)
-+{
-+	int cpu;
-+	cpumask_t *chk;
-+
-+	for_each_online_cpu(cpu) {
-+		/* take chance to reset time slice for idle tasks */
-+		cpu_rq(cpu)->idle->time_slice = sched_timeslice_ns;
-+
-+		chk = &(per_cpu(sched_cpu_affinity_masks, cpu)[0]);
-+
-+		cpumask_complement(chk, cpumask_of(cpu));
-+#ifdef CONFIG_SCHED_SMT
-+		TOPOLOGY_CPUMASK(smt, topology_sibling_cpumask(cpu), false);
-+#endif
-+		per_cpu(sd_llc_id, cpu) = cpumask_first(cpu_coregroup_mask(cpu));
-+		per_cpu(sched_cpu_llc_mask, cpu) = chk;
-+		TOPOLOGY_CPUMASK(coregroup, cpu_coregroup_mask(cpu), false);
-+
-+		TOPOLOGY_CPUMASK(core, topology_core_cpumask(cpu), false);
-+
-+		TOPOLOGY_CPUMASK(others, cpu_online_mask, true);
-+
-+		per_cpu(sched_cpu_affinity_end_mask, cpu) = chk;
-+		printk(KERN_INFO "sched: cpu#%02d llc_id = %d, llc_mask idx = %d\n",
-+		       cpu, per_cpu(sd_llc_id, cpu),
-+		       (int) (per_cpu(sched_cpu_llc_mask, cpu) -
-+			      &(per_cpu(sched_cpu_affinity_masks, cpu)[0])));
-+	}
-+}
-+#endif
-+
-+void __init sched_init_smp(void)
-+{
-+	/* Move init over to a non-isolated CPU */
-+	if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0)
-+		BUG();
-+
-+	sched_init_topology_cpumask();
-+
-+	sched_smp_initialized = true;
-+}
-+#else
-+void __init sched_init_smp(void)
-+{
-+	cpu_rq(0)->idle->time_slice = sched_timeslice_ns;
-+}
-+#endif /* CONFIG_SMP */
-+
-+int in_sched_functions(unsigned long addr)
-+{
-+	return in_lock_functions(addr) ||
-+		(addr >= (unsigned long)__sched_text_start
-+		&& addr < (unsigned long)__sched_text_end);
-+}
-+
-+#ifdef CONFIG_CGROUP_SCHED
-+/* task group related information */
-+struct task_group {
-+	struct cgroup_subsys_state css;
-+
-+	struct rcu_head rcu;
-+	struct list_head list;
-+
-+	struct task_group *parent;
-+	struct list_head siblings;
-+	struct list_head children;
-+};
-+
-+/*
-+ * Default task group.
-+ * Every task in system belongs to this group at bootup.
-+ */
-+struct task_group root_task_group;
-+LIST_HEAD(task_groups);
-+
-+/* Cacheline aligned slab cache for task_group */
-+static struct kmem_cache *task_group_cache __read_mostly;
-+#endif /* CONFIG_CGROUP_SCHED */
-+
-+void __init sched_init(void)
-+{
-+	int i;
-+	struct rq *rq;
-+
-+	printk(KERN_INFO ALT_SCHED_VERSION_MSG);
-+
-+	wait_bit_init();
-+
-+#ifdef CONFIG_SMP
-+	for (i = 0; i < SCHED_BITS; i++)
-+		cpumask_copy(&sched_rq_watermark[i], cpu_present_mask);
-+#endif
-+
-+#ifdef CONFIG_CGROUP_SCHED
-+	task_group_cache = KMEM_CACHE(task_group, 0);
-+
-+	list_add(&root_task_group.list, &task_groups);
-+	INIT_LIST_HEAD(&root_task_group.children);
-+	INIT_LIST_HEAD(&root_task_group.siblings);
-+#endif /* CONFIG_CGROUP_SCHED */
-+	for_each_possible_cpu(i) {
-+		rq = cpu_rq(i);
-+
-+		sched_queue_init(rq);
-+		rq->watermark = IDLE_WM;
-+		rq->skip = NULL;
-+
-+		raw_spin_lock_init(&rq->lock);
-+		rq->nr_running = rq->nr_uninterruptible = 0;
-+		rq->calc_load_active = 0;
-+		rq->calc_load_update = jiffies + LOAD_FREQ;
-+#ifdef CONFIG_SMP
-+		rq->online = false;
-+		rq->cpu = i;
-+
-+#ifdef CONFIG_SCHED_SMT
-+		rq->active_balance = 0;
-+#endif
-+
-+#ifdef CONFIG_NO_HZ_COMMON
-+		rq_csd_init(rq, &rq->nohz_csd, nohz_csd_func);
-+#endif
-+#endif /* CONFIG_SMP */
-+		rq->nr_switches = 0;
-+
-+		hrtick_rq_init(rq);
-+		atomic_set(&rq->nr_iowait, 0);
-+	}
-+#ifdef CONFIG_SMP
-+	/* Set rq->online for cpu 0 */
-+	cpu_rq(0)->online = true;
-+#endif
-+	/*
-+	 * The boot idle thread does lazy MMU switching as well:
-+	 */
-+	mmgrab(&init_mm);
-+	enter_lazy_tlb(&init_mm, current);
-+
-+	/*
-+	 * Make us the idle thread. Technically, schedule() should not be
-+	 * called from this thread, however somewhere below it might be,
-+	 * but because we are the idle thread, we just pick up running again
-+	 * when this runqueue becomes "idle".
-+	 */
-+	init_idle(current, smp_processor_id());
-+
-+	calc_load_update = jiffies + LOAD_FREQ;
-+
-+#ifdef CONFIG_SMP
-+	idle_thread_set_boot_cpu();
-+
-+	sched_init_topology_cpumask_early();
-+#endif /* SMP */
-+
-+	init_schedstats();
-+
-+	psi_init();
-+}
-+
-+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-+static inline int preempt_count_equals(int preempt_offset)
-+{
-+	int nested = preempt_count() + rcu_preempt_depth();
-+
-+	return (nested == preempt_offset);
-+}
-+
-+void __might_sleep(const char *file, int line, int preempt_offset)
-+{
-+	/*
-+	 * Blocking primitives will set (and therefore destroy) current->state,
-+	 * since we will exit with TASK_RUNNING make sure we enter with it,
-+	 * otherwise we will destroy state.
-+	 */
-+	WARN_ONCE(current->state != TASK_RUNNING && current->task_state_change,
-+			"do not call blocking ops when !TASK_RUNNING; "
-+			"state=%lx set at [<%p>] %pS\n",
-+			current->state,
-+			(void *)current->task_state_change,
-+			(void *)current->task_state_change);
-+
-+	___might_sleep(file, line, preempt_offset);
-+}
-+EXPORT_SYMBOL(__might_sleep);
-+
-+void ___might_sleep(const char *file, int line, int preempt_offset)
-+{
-+	/* Ratelimiting timestamp: */
-+	static unsigned long prev_jiffy;
-+
-+	unsigned long preempt_disable_ip;
-+
-+	/* WARN_ON_ONCE() by default, no rate limit required: */
-+	rcu_sleep_check();
-+
-+	if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
-+	     !is_idle_task(current) && !current->non_block_count) ||
-+	    system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING ||
-+	    oops_in_progress)
-+		return;
-+	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
-+		return;
-+	prev_jiffy = jiffies;
-+
-+	/* Save this before calling printk(), since that will clobber it: */
-+	preempt_disable_ip = get_preempt_disable_ip(current);
-+
-+	printk(KERN_ERR
-+		"BUG: sleeping function called from invalid context at %s:%d\n",
-+			file, line);
-+	printk(KERN_ERR
-+		"in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n",
-+			in_atomic(), irqs_disabled(), current->non_block_count,
-+			current->pid, current->comm);
-+
-+	if (task_stack_end_corrupted(current))
-+		printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");
-+
-+	debug_show_held_locks(current);
-+	if (irqs_disabled())
-+		print_irqtrace_events(current);
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	if (!preempt_count_equals(preempt_offset)) {
-+		pr_err("Preemption disabled at:");
-+		print_ip_sym(KERN_ERR, preempt_disable_ip);
-+	}
-+#endif
-+	dump_stack();
-+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+}
-+EXPORT_SYMBOL(___might_sleep);
-+
-+void __cant_sleep(const char *file, int line, int preempt_offset)
-+{
-+	static unsigned long prev_jiffy;
-+
-+	if (irqs_disabled())
-+		return;
-+
-+	if (!IS_ENABLED(CONFIG_PREEMPT_COUNT))
-+		return;
-+
-+	if (preempt_count() > preempt_offset)
-+		return;
-+
-+	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
-+		return;
-+	prev_jiffy = jiffies;
-+
-+	printk(KERN_ERR "BUG: assuming atomic context at %s:%d\n", file, line);
-+	printk(KERN_ERR "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
-+			in_atomic(), irqs_disabled(),
-+			current->pid, current->comm);
-+
-+	debug_show_held_locks(current);
-+	dump_stack();
-+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+}
-+EXPORT_SYMBOL_GPL(__cant_sleep);
-+#endif
-+
-+#ifdef CONFIG_MAGIC_SYSRQ
-+void normalize_rt_tasks(void)
-+{
-+	struct task_struct *g, *p;
-+	struct sched_attr attr = {
-+		.sched_policy = SCHED_NORMAL,
-+	};
-+
-+	read_lock(&tasklist_lock);
-+	for_each_process_thread(g, p) {
-+		/*
-+		 * Only normalize user tasks:
-+		 */
-+		if (p->flags & PF_KTHREAD)
-+			continue;
-+
-+		if (!rt_task(p)) {
-+			/*
-+			 * Renice negative nice level userspace
-+			 * tasks back to 0:
-+			 */
-+			if (task_nice(p) < 0)
-+				set_user_nice(p, 0);
-+			continue;
-+		}
-+
-+		__sched_setscheduler(p, &attr, false, false);
-+	}
-+	read_unlock(&tasklist_lock);
-+}
-+#endif /* CONFIG_MAGIC_SYSRQ */
-+
-+#if defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB)
-+/*
-+ * These functions are only useful for the IA64 MCA handling, or kdb.
-+ *
-+ * They can only be called when the whole system has been
-+ * stopped - every CPU needs to be quiescent, and no scheduling
-+ * activity can take place. Using them for anything else would
-+ * be a serious bug, and as a result, they aren't even visible
-+ * under any other configuration.
-+ */
-+
-+/**
-+ * curr_task - return the current task for a given CPU.
-+ * @cpu: the processor in question.
-+ *
-+ * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
-+ *
-+ * Return: The current task for @cpu.
-+ */
-+struct task_struct *curr_task(int cpu)
-+{
-+	return cpu_curr(cpu);
-+}
-+
-+#endif /* defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB) */
-+
-+#ifdef CONFIG_IA64
-+/**
-+ * ia64_set_curr_task - set the current task for a given CPU.
-+ * @cpu: the processor in question.
-+ * @p: the task pointer to set.
-+ *
-+ * Description: This function must only be used when non-maskable interrupts
-+ * are serviced on a separate stack.  It allows the architecture to switch the
-+ * notion of the current task on a CPU in a non-blocking manner.  This function
-+ * must be called with all CPU's synchronised, and interrupts disabled, the
-+ * and caller must save the original value of the current task (see
-+ * curr_task() above) and restore that value before reenabling interrupts and
-+ * re-starting the system.
-+ *
-+ * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
-+ */
-+void ia64_set_curr_task(int cpu, struct task_struct *p)
-+{
-+	cpu_curr(cpu) = p;
-+}
-+
-+#endif
-+
-+#ifdef CONFIG_CGROUP_SCHED
-+static void sched_free_group(struct task_group *tg)
-+{
-+	kmem_cache_free(task_group_cache, tg);
-+}
-+
-+/* allocate runqueue etc for a new task group */
-+struct task_group *sched_create_group(struct task_group *parent)
-+{
-+	struct task_group *tg;
-+
-+	tg = kmem_cache_alloc(task_group_cache, GFP_KERNEL | __GFP_ZERO);
-+	if (!tg)
-+		return ERR_PTR(-ENOMEM);
-+
-+	return tg;
-+}
-+
-+void sched_online_group(struct task_group *tg, struct task_group *parent)
-+{
-+}
-+
-+/* rcu callback to free various structures associated with a task group */
-+static void sched_free_group_rcu(struct rcu_head *rhp)
-+{
-+	/* Now it should be safe to free those cfs_rqs */
-+	sched_free_group(container_of(rhp, struct task_group, rcu));
-+}
-+
-+void sched_destroy_group(struct task_group *tg)
-+{
-+	/* Wait for possible concurrent references to cfs_rqs complete */
-+	call_rcu(&tg->rcu, sched_free_group_rcu);
-+}
-+
-+void sched_offline_group(struct task_group *tg)
-+{
-+}
-+
-+static inline struct task_group *css_tg(struct cgroup_subsys_state *css)
-+{
-+	return css ? container_of(css, struct task_group, css) : NULL;
-+}
-+
-+static struct cgroup_subsys_state *
-+cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
-+{
-+	struct task_group *parent = css_tg(parent_css);
-+	struct task_group *tg;
-+
-+	if (!parent) {
-+		/* This is early initialization for the top cgroup */
-+		return &root_task_group.css;
-+	}
-+
-+	tg = sched_create_group(parent);
-+	if (IS_ERR(tg))
-+		return ERR_PTR(-ENOMEM);
-+	return &tg->css;
-+}
-+
-+/* Expose task group only after completing cgroup initialization */
-+static int cpu_cgroup_css_online(struct cgroup_subsys_state *css)
-+{
-+	struct task_group *tg = css_tg(css);
-+	struct task_group *parent = css_tg(css->parent);
-+
-+	if (parent)
-+		sched_online_group(tg, parent);
-+	return 0;
-+}
-+
-+static void cpu_cgroup_css_released(struct cgroup_subsys_state *css)
-+{
-+	struct task_group *tg = css_tg(css);
-+
-+	sched_offline_group(tg);
-+}
-+
-+static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
-+{
-+	struct task_group *tg = css_tg(css);
-+
-+	/*
-+	 * Relies on the RCU grace period between css_released() and this.
-+	 */
-+	sched_free_group(tg);
-+}
-+
-+static void cpu_cgroup_fork(struct task_struct *task)
-+{
-+}
-+
-+static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
-+{
-+	return 0;
-+}
-+
-+static void cpu_cgroup_attach(struct cgroup_taskset *tset)
-+{
-+}
-+
-+static struct cftype cpu_legacy_files[] = {
-+	{ }	/* Terminate */
-+};
-+
-+
-+static struct cftype cpu_files[] = {
-+	{ }	/* terminate */
-+};
-+
-+static int cpu_extra_stat_show(struct seq_file *sf,
-+			       struct cgroup_subsys_state *css)
-+{
-+	return 0;
-+}
-+
-+struct cgroup_subsys cpu_cgrp_subsys = {
-+	.css_alloc	= cpu_cgroup_css_alloc,
-+	.css_online	= cpu_cgroup_css_online,
-+	.css_released	= cpu_cgroup_css_released,
-+	.css_free	= cpu_cgroup_css_free,
-+	.css_extra_stat_show = cpu_extra_stat_show,
-+	.fork		= cpu_cgroup_fork,
-+	.can_attach	= cpu_cgroup_can_attach,
-+	.attach		= cpu_cgroup_attach,
-+	.legacy_cftypes	= cpu_files,
-+	.legacy_cftypes	= cpu_legacy_files,
-+	.dfl_cftypes	= cpu_files,
-+	.early_init	= true,
-+	.threaded	= true,
-+};
-+#endif	/* CONFIG_CGROUP_SCHED */
-+
-+#undef CREATE_TRACE_POINTS
-diff --git a/kernel/sched/alt_debug.c b/kernel/sched/alt_debug.c
-new file mode 100644
-index 000000000000..1212a031700e
---- /dev/null
-+++ b/kernel/sched/alt_debug.c
-@@ -0,0 +1,31 @@
-+/*
-+ * kernel/sched/alt_debug.c
-+ *
-+ * Print the alt scheduler debugging details
-+ *
-+ * Author: Alfred Chen
-+ * Date  : 2020
-+ */
-+#include "sched.h"
-+
-+/*
-+ * This allows printing both to /proc/sched_debug and
-+ * to the console
-+ */
-+#define SEQ_printf(m, x...)			\
-+ do {						\
-+	if (m)					\
-+		seq_printf(m, x);		\
-+	else					\
-+		pr_cont(x);			\
-+ } while (0)
-+
-+void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
-+			  struct seq_file *m)
-+{
-+	SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, task_pid_nr_ns(p, ns),
-+						get_nr_threads(p));
-+}
-+
-+void proc_sched_set_task(struct task_struct *p)
-+{}
-diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h
-new file mode 100644
-index 000000000000..99be2c51c88d
---- /dev/null
-+++ b/kernel/sched/alt_sched.h
-@@ -0,0 +1,555 @@
-+#ifndef ALT_SCHED_H
-+#define ALT_SCHED_H
-+
-+#include <linux/sched.h>
-+
-+#include <linux/sched/clock.h>
-+#include <linux/sched/cpufreq.h>
-+#include <linux/sched/cputime.h>
-+#include <linux/sched/debug.h>
-+#include <linux/sched/init.h>
-+#include <linux/sched/isolation.h>
-+#include <linux/sched/loadavg.h>
-+#include <linux/sched/mm.h>
-+#include <linux/sched/nohz.h>
-+#include <linux/sched/signal.h>
-+#include <linux/sched/stat.h>
-+#include <linux/sched/sysctl.h>
-+#include <linux/sched/task.h>
-+#include <linux/sched/topology.h>
-+#include <linux/sched/wake_q.h>
-+
-+#include <uapi/linux/sched/types.h>
-+
-+#include <linux/cgroup.h>
-+#include <linux/cpufreq.h>
-+#include <linux/cpuidle.h>
-+#include <linux/cpuset.h>
-+#include <linux/ctype.h>
-+#include <linux/kthread.h>
-+#include <linux/livepatch.h>
-+#include <linux/membarrier.h>
-+#include <linux/proc_fs.h>
-+#include <linux/psi.h>
-+#include <linux/slab.h>
-+#include <linux/stop_machine.h>
-+#include <linux/suspend.h>
-+#include <linux/swait.h>
-+#include <linux/syscalls.h>
-+#include <linux/tsacct_kern.h>
-+
-+#include <asm/tlb.h>
-+
-+#ifdef CONFIG_PARAVIRT
-+# include <asm/paravirt.h>
-+#endif
-+
-+#include "cpupri.h"
-+
-+#ifdef CONFIG_SCHED_BMQ
-+#include "bmq.h"
-+#endif
-+#ifdef CONFIG_SCHED_PDS
-+#include "pds.h"
-+#endif
-+
-+/* task_struct::on_rq states: */
-+#define TASK_ON_RQ_QUEUED	1
-+#define TASK_ON_RQ_MIGRATING	2
-+
-+static inline int task_on_rq_queued(struct task_struct *p)
-+{
-+	return p->on_rq == TASK_ON_RQ_QUEUED;
-+}
-+
-+static inline int task_on_rq_migrating(struct task_struct *p)
-+{
-+	return READ_ONCE(p->on_rq) == TASK_ON_RQ_MIGRATING;
-+}
-+
-+/*
-+ * wake flags
-+ */
-+#define WF_SYNC		0x01		/* waker goes to sleep after wakeup */
-+#define WF_FORK		0x02		/* child wakeup after fork */
-+#define WF_MIGRATED	0x04		/* internal use, task got migrated */
-+#define WF_ON_CPU	0x08		/* Wakee is on_rq */
-+
-+/*
-+ * This is the main, per-CPU runqueue data structure.
-+ * This data should only be modified by the local cpu.
-+ */
-+struct rq {
-+	/* runqueue lock: */
-+	raw_spinlock_t lock;
-+
-+	struct task_struct __rcu *curr;
-+	struct task_struct *idle, *stop, *skip;
-+	struct mm_struct *prev_mm;
-+
-+#ifdef CONFIG_SCHED_BMQ
-+	struct bmq queue;
-+#endif
-+#ifdef CONFIG_SCHED_PDS
-+	struct skiplist_node sl_header;
-+#endif
-+	unsigned long watermark;
-+
-+	/* switch count */
-+	u64 nr_switches;
-+
-+	atomic_t nr_iowait;
-+
-+#ifdef CONFIG_MEMBARRIER
-+	int membarrier_state;
-+#endif
-+
-+#ifdef CONFIG_SMP
-+	int cpu;		/* cpu of this runqueue */
-+	bool online;
-+
-+	unsigned int		ttwu_pending;
-+	unsigned char		nohz_idle_balance;
-+	unsigned char		idle_balance;
-+
-+#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
-+	struct sched_avg	avg_irq;
-+#endif
-+
-+#ifdef CONFIG_SCHED_SMT
-+	int active_balance;
-+	struct cpu_stop_work active_balance_work;
-+#endif
-+#endif /* CONFIG_SMP */
-+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-+	u64 prev_irq_time;
-+#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
-+#ifdef CONFIG_PARAVIRT
-+	u64 prev_steal_time;
-+#endif /* CONFIG_PARAVIRT */
-+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
-+	u64 prev_steal_time_rq;
-+#endif /* CONFIG_PARAVIRT_TIME_ACCOUNTING */
-+
-+	/* calc_load related fields */
-+	unsigned long calc_load_update;
-+	long calc_load_active;
-+
-+	u64 clock, last_tick;
-+	u64 last_ts_switch;
-+	u64 clock_task;
-+
-+	unsigned long nr_running;
-+	unsigned long nr_uninterruptible;
-+
-+#ifdef CONFIG_SCHED_HRTICK
-+#ifdef CONFIG_SMP
-+	call_single_data_t hrtick_csd;
-+#endif
-+	struct hrtimer hrtick_timer;
-+#endif
-+
-+#ifdef CONFIG_SCHEDSTATS
-+
-+	/* latency stats */
-+	struct sched_info rq_sched_info;
-+	unsigned long long rq_cpu_time;
-+	/* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
-+
-+	/* sys_sched_yield() stats */
-+	unsigned int yld_count;
-+
-+	/* schedule() stats */
-+	unsigned int sched_switch;
-+	unsigned int sched_count;
-+	unsigned int sched_goidle;
-+
-+	/* try_to_wake_up() stats */
-+	unsigned int ttwu_count;
-+	unsigned int ttwu_local;
-+#endif /* CONFIG_SCHEDSTATS */
-+
-+#ifdef CONFIG_CPU_IDLE
-+	/* Must be inspected within a rcu lock section */
-+	struct cpuidle_state *idle_state;
-+#endif
-+
-+#ifdef CONFIG_NO_HZ_COMMON
-+#ifdef CONFIG_SMP
-+	call_single_data_t	nohz_csd;
-+#endif
-+	atomic_t		nohz_flags;
-+#endif /* CONFIG_NO_HZ_COMMON */
-+};
-+
-+extern unsigned long calc_load_update;
-+extern atomic_long_t calc_load_tasks;
-+
-+extern void calc_global_load_tick(struct rq *this_rq);
-+extern long calc_load_fold_active(struct rq *this_rq, long adjust);
-+
-+DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
-+#define cpu_rq(cpu)		(&per_cpu(runqueues, (cpu)))
-+#define this_rq()		this_cpu_ptr(&runqueues)
-+#define task_rq(p)		cpu_rq(task_cpu(p))
-+#define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
-+#define raw_rq()		raw_cpu_ptr(&runqueues)
-+
-+#ifdef CONFIG_SMP
-+#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
-+void register_sched_domain_sysctl(void);
-+void unregister_sched_domain_sysctl(void);
-+#else
-+static inline void register_sched_domain_sysctl(void)
-+{
-+}
-+static inline void unregister_sched_domain_sysctl(void)
-+{
-+}
-+#endif
-+
-+extern bool sched_smp_initialized;
-+
-+enum {
-+	BASE_CPU_AFFINITY_CHK_LEVEL = 1,
-+#ifdef CONFIG_SCHED_SMT
-+	SMT_CPU_AFFINITY_CHK_LEVEL_SPACE_HOLDER,
-+#endif
-+#ifdef CONFIG_SCHED_MC
-+	MC_CPU_AFFINITY_CHK_LEVEL_SPACE_HOLDER,
-+#endif
-+	NR_CPU_AFFINITY_CHK_LEVEL
-+};
-+
-+DECLARE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_CHK_LEVEL], sched_cpu_affinity_masks);
-+
-+static inline int __best_mask_cpu(int cpu, const cpumask_t *cpumask,
-+				  const cpumask_t *mask)
-+{
-+	while ((cpu = cpumask_any_and(cpumask, mask)) >= nr_cpu_ids)
-+		mask++;
-+	return cpu;
-+}
-+
-+static inline int best_mask_cpu(int cpu, const cpumask_t *cpumask)
-+{
-+	return cpumask_test_cpu(cpu, cpumask)? cpu :
-+		__best_mask_cpu(cpu, cpumask, &(per_cpu(sched_cpu_affinity_masks, cpu)[0]));
-+}
-+
-+extern void flush_smp_call_function_from_idle(void);
-+
-+#else  /* !CONFIG_SMP */
-+static inline void flush_smp_call_function_from_idle(void) { }
-+#endif
-+
-+#ifndef arch_scale_freq_tick
-+static __always_inline
-+void arch_scale_freq_tick(void)
-+{
-+}
-+#endif
-+
-+#ifndef arch_scale_freq_capacity
-+static __always_inline
-+unsigned long arch_scale_freq_capacity(int cpu)
-+{
-+	return SCHED_CAPACITY_SCALE;
-+}
-+#endif
-+
-+static inline u64 __rq_clock_broken(struct rq *rq)
-+{
-+	return READ_ONCE(rq->clock);
-+}
-+
-+static inline u64 rq_clock(struct rq *rq)
-+{
-+	/*
-+	 * Relax lockdep_assert_held() checking as in VRQ, call to
-+	 * sched_info_xxxx() may not held rq->lock
-+	 * lockdep_assert_held(&rq->lock);
-+	 */
-+	return rq->clock;
-+}
-+
-+static inline u64 rq_clock_task(struct rq *rq)
-+{
-+	/*
-+	 * Relax lockdep_assert_held() checking as in VRQ, call to
-+	 * sched_info_xxxx() may not held rq->lock
-+	 * lockdep_assert_held(&rq->lock);
-+	 */
-+	return rq->clock_task;
-+}
-+
-+/*
-+ * {de,en}queue flags:
-+ *
-+ * DEQUEUE_SLEEP  - task is no longer runnable
-+ * ENQUEUE_WAKEUP - task just became runnable
-+ *
-+ */
-+
-+#define DEQUEUE_SLEEP		0x01
-+
-+#define ENQUEUE_WAKEUP		0x01
-+
-+
-+/*
-+ * Below are scheduler API which using in other kernel code
-+ * It use the dummy rq_flags
-+ * ToDo : BMQ need to support these APIs for compatibility with mainline
-+ * scheduler code.
-+ */
-+struct rq_flags {
-+	unsigned long flags;
-+};
-+
-+struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
-+	__acquires(rq->lock);
-+
-+struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
-+	__acquires(p->pi_lock)
-+	__acquires(rq->lock);
-+
-+static inline void __task_rq_unlock(struct rq *rq, struct rq_flags *rf)
-+	__releases(rq->lock)
-+{
-+	raw_spin_unlock(&rq->lock);
-+}
-+
-+static inline void
-+task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
-+	__releases(rq->lock)
-+	__releases(p->pi_lock)
-+{
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
-+}
-+
-+static inline void
-+rq_unlock_irq(struct rq *rq, struct rq_flags *rf)
-+	__releases(rq->lock)
-+{
-+	raw_spin_unlock_irq(&rq->lock);
-+}
-+
-+static inline struct rq *
-+this_rq_lock_irq(struct rq_flags *rf)
-+	__acquires(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	local_irq_disable();
-+	rq = this_rq();
-+	raw_spin_lock(&rq->lock);
-+
-+	return rq;
-+}
-+
-+static inline int task_current(struct rq *rq, struct task_struct *p)
-+{
-+	return rq->curr == p;
-+}
-+
-+static inline bool task_running(struct task_struct *p)
-+{
-+	return p->on_cpu;
-+}
-+
-+extern struct static_key_false sched_schedstats;
-+
-+#ifdef CONFIG_CPU_IDLE
-+static inline void idle_set_state(struct rq *rq,
-+				  struct cpuidle_state *idle_state)
-+{
-+	rq->idle_state = idle_state;
-+}
-+
-+static inline struct cpuidle_state *idle_get_state(struct rq *rq)
-+{
-+	WARN_ON(!rcu_read_lock_held());
-+	return rq->idle_state;
-+}
-+#else
-+static inline void idle_set_state(struct rq *rq,
-+				  struct cpuidle_state *idle_state)
-+{
-+}
-+
-+static inline struct cpuidle_state *idle_get_state(struct rq *rq)
-+{
-+	return NULL;
-+}
-+#endif
-+
-+static inline int cpu_of(const struct rq *rq)
-+{
-+#ifdef CONFIG_SMP
-+	return rq->cpu;
-+#else
-+	return 0;
-+#endif
-+}
-+
-+#include "stats.h"
-+
-+#ifdef CONFIG_NO_HZ_COMMON
-+#define NOHZ_BALANCE_KICK_BIT	0
-+#define NOHZ_STATS_KICK_BIT	1
-+
-+#define NOHZ_BALANCE_KICK	BIT(NOHZ_BALANCE_KICK_BIT)
-+#define NOHZ_STATS_KICK		BIT(NOHZ_STATS_KICK_BIT)
-+
-+#define NOHZ_KICK_MASK	(NOHZ_BALANCE_KICK | NOHZ_STATS_KICK)
-+
-+#define nohz_flags(cpu)	(&cpu_rq(cpu)->nohz_flags)
-+
-+/* TODO: needed?
-+extern void nohz_balance_exit_idle(struct rq *rq);
-+#else
-+static inline void nohz_balance_exit_idle(struct rq *rq) { }
-+*/
-+#endif
-+
-+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-+struct irqtime {
-+	u64			total;
-+	u64			tick_delta;
-+	u64			irq_start_time;
-+	struct u64_stats_sync	sync;
-+};
-+
-+DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
-+
-+/*
-+ * Returns the irqtime minus the softirq time computed by ksoftirqd.
-+ * Otherwise ksoftirqd's sum_exec_runtime is substracted its own runtime
-+ * and never move forward.
-+ */
-+static inline u64 irq_time_read(int cpu)
-+{
-+	struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu);
-+	unsigned int seq;
-+	u64 total;
-+
-+	do {
-+		seq = __u64_stats_fetch_begin(&irqtime->sync);
-+		total = irqtime->total;
-+	} while (__u64_stats_fetch_retry(&irqtime->sync, seq));
-+
-+	return total;
-+}
-+#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
-+
-+#ifdef CONFIG_CPU_FREQ
-+DECLARE_PER_CPU(struct update_util_data __rcu *, cpufreq_update_util_data);
-+
-+/**
-+ * cpufreq_update_util - Take a note about CPU utilization changes.
-+ * @rq: Runqueue to carry out the update for.
-+ * @flags: Update reason flags.
-+ *
-+ * This function is called by the scheduler on the CPU whose utilization is
-+ * being updated.
-+ *
-+ * It can only be called from RCU-sched read-side critical sections.
-+ *
-+ * The way cpufreq is currently arranged requires it to evaluate the CPU
-+ * performance state (frequency/voltage) on a regular basis to prevent it from
-+ * being stuck in a completely inadequate performance level for too long.
-+ * That is not guaranteed to happen if the updates are only triggered from CFS
-+ * and DL, though, because they may not be coming in if only RT tasks are
-+ * active all the time (or there are RT tasks only).
-+ *
-+ * As a workaround for that issue, this function is called periodically by the
-+ * RT sched class to trigger extra cpufreq updates to prevent it from stalling,
-+ * but that really is a band-aid.  Going forward it should be replaced with
-+ * solutions targeted more specifically at RT tasks.
-+ */
-+static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
-+{
-+	struct update_util_data *data;
-+
-+	data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data));
-+	if (data)
-+		data->func(data, rq_clock(rq), flags);
-+}
-+#else
-+static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
-+#endif /* CONFIG_CPU_FREQ */
-+
-+#ifdef CONFIG_NO_HZ_FULL
-+extern int __init sched_tick_offload_init(void);
-+#else
-+static inline int sched_tick_offload_init(void) { return 0; }
-+#endif
-+
-+#ifdef arch_scale_freq_capacity
-+#ifndef arch_scale_freq_invariant
-+#define arch_scale_freq_invariant()	(true)
-+#endif
-+#else /* arch_scale_freq_capacity */
-+#define arch_scale_freq_invariant()	(false)
-+#endif
-+
-+extern void schedule_idle(void);
-+
-+/*
-+ * !! For sched_setattr_nocheck() (kernel) only !!
-+ *
-+ * This is actually gross. :(
-+ *
-+ * It is used to make schedutil kworker(s) higher priority than SCHED_DEADLINE
-+ * tasks, but still be able to sleep. We need this on platforms that cannot
-+ * atomically change clock frequency. Remove once fast switching will be
-+ * available on such platforms.
-+ *
-+ * SUGOV stands for SchedUtil GOVernor.
-+ */
-+#define SCHED_FLAG_SUGOV	0x10000000
-+
-+#ifdef CONFIG_MEMBARRIER
-+/*
-+ * The scheduler provides memory barriers required by membarrier between:
-+ * - prior user-space memory accesses and store to rq->membarrier_state,
-+ * - store to rq->membarrier_state and following user-space memory accesses.
-+ * In the same way it provides those guarantees around store to rq->curr.
-+ */
-+static inline void membarrier_switch_mm(struct rq *rq,
-+					struct mm_struct *prev_mm,
-+					struct mm_struct *next_mm)
-+{
-+	int membarrier_state;
-+
-+	if (prev_mm == next_mm)
-+		return;
-+
-+	membarrier_state = atomic_read(&next_mm->membarrier_state);
-+	if (READ_ONCE(rq->membarrier_state) == membarrier_state)
-+		return;
-+
-+	WRITE_ONCE(rq->membarrier_state, membarrier_state);
-+}
-+#else
-+static inline void membarrier_switch_mm(struct rq *rq,
-+					struct mm_struct *prev_mm,
-+					struct mm_struct *next_mm)
-+{
-+}
-+#endif
-+
-+#ifdef CONFIG_NUMA
-+extern int sched_numa_find_closest(const struct cpumask *cpus, int cpu);
-+#else
-+static inline int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
-+{
-+	return nr_cpu_ids;
-+}
-+#endif
-+
-+void swake_up_all_locked(struct swait_queue_head *q);
-+void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
-+
-+#endif /* ALT_SCHED_H */
-diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h
-new file mode 100644
-index 000000000000..aff0bb30a884
---- /dev/null
-+++ b/kernel/sched/bmq.h
-@@ -0,0 +1,20 @@
-+#ifndef BMQ_H
-+#define BMQ_H
-+
-+/* bits:
-+ * RT(0-99), (Low prio adj range, nice width, high prio adj range) / 2, cpu idle task */
-+#define SCHED_BITS	(MAX_RT_PRIO + NICE_WIDTH / 2 + MAX_PRIORITY_ADJ + 1)
-+#define IDLE_TASK_SCHED_PRIO	(SCHED_BITS - 1)
-+
-+struct bmq {
-+	DECLARE_BITMAP(bitmap, SCHED_BITS);
-+	struct list_head heads[SCHED_BITS];
-+};
-+
-+
-+static inline int task_running_nice(struct task_struct *p)
-+{
-+	return (p->prio + p->boost_prio > DEFAULT_PRIO + MAX_PRIORITY_ADJ);
-+}
-+
-+#endif
-diff --git a/kernel/sched/bmq_imp.h b/kernel/sched/bmq_imp.h
-new file mode 100644
-index 000000000000..ad9a7c448da7
---- /dev/null
-+++ b/kernel/sched/bmq_imp.h
-@@ -0,0 +1,185 @@
-+#define ALT_SCHED_VERSION_MSG "sched/bmq: BMQ CPU Scheduler "ALT_SCHED_VERSION" by Alfred Chen.\n"
-+
-+/*
-+ * BMQ only routines
-+ */
-+#define rq_switch_time(rq)	((rq)->clock - (rq)->last_ts_switch)
-+#define boost_threshold(p)	(sched_timeslice_ns >>\
-+				 (15 - MAX_PRIORITY_ADJ -  (p)->boost_prio))
-+
-+static inline void boost_task(struct task_struct *p)
-+{
-+	int limit;
-+
-+	switch (p->policy) {
-+	case SCHED_NORMAL:
-+		limit = -MAX_PRIORITY_ADJ;
-+		break;
-+	case SCHED_BATCH:
-+	case SCHED_IDLE:
-+		limit = 0;
-+		break;
-+	default:
-+		return;
-+	}
-+
-+	if (p->boost_prio > limit)
-+		p->boost_prio--;
-+}
-+
-+static inline void deboost_task(struct task_struct *p)
-+{
-+	if (p->boost_prio < MAX_PRIORITY_ADJ)
-+		p->boost_prio++;
-+}
-+
-+/*
-+ * Common interfaces
-+ */
-+static inline int task_sched_prio(struct task_struct *p, struct rq *rq)
-+{
-+	return (p->prio < MAX_RT_PRIO)? p->prio : MAX_RT_PRIO / 2 + (p->prio + p->boost_prio) / 2;
-+}
-+
-+static inline void requeue_task(struct task_struct *p, struct rq *rq);
-+
-+static inline void time_slice_expired(struct task_struct *p, struct rq *rq)
-+{
-+	p->time_slice = sched_timeslice_ns;
-+
-+	if (SCHED_FIFO != p->policy && task_on_rq_queued(p)) {
-+		if (SCHED_RR != p->policy)
-+			deboost_task(p);
-+		requeue_task(p, rq);
-+	}
-+}
-+
-+static inline void update_task_priodl(struct task_struct *p) {}
-+
-+static inline unsigned long sched_queue_watermark(struct rq *rq)
-+{
-+	return find_first_bit(rq->queue.bitmap, SCHED_BITS);
-+}
-+
-+static inline void sched_queue_init(struct rq *rq)
-+{
-+	struct bmq *q = &rq->queue;
-+	int i;
-+
-+	bitmap_zero(q->bitmap, SCHED_BITS);
-+	for(i = 0; i < SCHED_BITS; i++)
-+		INIT_LIST_HEAD(&q->heads[i]);
-+}
-+
-+static inline void sched_queue_init_idle(struct rq *rq, struct task_struct *idle)
-+{
-+	struct bmq *q = &rq->queue;
-+
-+	idle->bmq_idx = IDLE_TASK_SCHED_PRIO;
-+	INIT_LIST_HEAD(&q->heads[idle->bmq_idx]);
-+	list_add(&idle->bmq_node, &q->heads[idle->bmq_idx]);
-+	set_bit(idle->bmq_idx, q->bitmap);
-+}
-+
-+/*
-+ * This routine used in bmq scheduler only which assume the idle task in the bmq
-+ */
-+static inline struct task_struct *sched_rq_first_task(struct rq *rq)
-+{
-+	unsigned long idx = find_first_bit(rq->queue.bitmap, SCHED_BITS);
-+	const struct list_head *head = &rq->queue.heads[idx];
-+
-+	return list_first_entry(head, struct task_struct, bmq_node);
-+}
-+
-+static inline struct task_struct *
-+sched_rq_next_task(struct task_struct *p, struct rq *rq)
-+{
-+	unsigned long idx = p->bmq_idx;
-+	struct list_head *head = &rq->queue.heads[idx];
-+
-+	if (list_is_last(&p->bmq_node, head)) {
-+		idx = find_next_bit(rq->queue.bitmap, SCHED_BITS, idx + 1);
-+		head = &rq->queue.heads[idx];
-+
-+		return list_first_entry(head, struct task_struct, bmq_node);
-+	}
-+
-+	return list_next_entry(p, bmq_node);
-+}
-+
-+#define __SCHED_DEQUEUE_TASK(p, rq, flags, func)	\
-+	psi_dequeue(p, flags & DEQUEUE_SLEEP);		\
-+	sched_info_dequeued(rq, p);			\
-+							\
-+	list_del(&p->bmq_node);				\
-+	if (list_empty(&rq->queue.heads[p->bmq_idx])) {	\
-+		clear_bit(p->bmq_idx, rq->queue.bitmap);\
-+		func;					\
-+	}
-+
-+#define __SCHED_ENQUEUE_TASK(p, rq, flags)				\
-+	sched_info_queued(rq, p);					\
-+	psi_enqueue(p, flags);						\
-+									\
-+	p->bmq_idx = task_sched_prio(p, rq);				\
-+	list_add_tail(&p->bmq_node, &rq->queue.heads[p->bmq_idx]);	\
-+	set_bit(p->bmq_idx, rq->queue.bitmap)
-+
-+#define __SCHED_REQUEUE_TASK(p, rq, func)				\
-+{									\
-+	int idx = task_sched_prio(p, rq);				\
-+\
-+	list_del(&p->bmq_node);						\
-+	list_add_tail(&p->bmq_node, &rq->queue.heads[idx]);		\
-+	if (idx != p->bmq_idx) {					\
-+		if (list_empty(&rq->queue.heads[p->bmq_idx]))		\
-+			clear_bit(p->bmq_idx, rq->queue.bitmap);	\
-+		p->bmq_idx = idx;					\
-+		set_bit(p->bmq_idx, rq->queue.bitmap);			\
-+		func;							\
-+	}								\
-+}
-+
-+static inline bool sched_task_need_requeue(struct task_struct *p, struct rq *rq)
-+{
-+	return (task_sched_prio(p, rq) != p->bmq_idx);
-+}
-+
-+static void sched_task_fork(struct task_struct *p, struct rq *rq)
-+{
-+	p->boost_prio = (p->boost_prio < 0) ?
-+		p->boost_prio + MAX_PRIORITY_ADJ : MAX_PRIORITY_ADJ;
-+}
-+
-+/**
-+ * task_prio - return the priority value of a given task.
-+ * @p: the task in question.
-+ *
-+ * Return: The priority value as seen by users in /proc.
-+ * RT tasks are offset by -100. Normal tasks are centered around 1, value goes
-+ * from 0(SCHED_ISO) up to 82 (nice +19 SCHED_IDLE).
-+ */
-+int task_prio(const struct task_struct *p)
-+{
-+	if (p->prio < MAX_RT_PRIO)
-+		return (p->prio - MAX_RT_PRIO);
-+	return (p->prio - MAX_RT_PRIO + p->boost_prio);
-+}
-+
-+static void do_sched_yield_type_1(struct task_struct *p, struct rq *rq)
-+{
-+	p->boost_prio = MAX_PRIORITY_ADJ;
-+}
-+
-+static void sched_task_ttwu(struct task_struct *p)
-+{
-+	if(this_rq()->clock_task - p->last_ran > sched_timeslice_ns)
-+		boost_task(p);
-+}
-+
-+static void sched_task_deactivate(struct task_struct *p, struct rq *rq)
-+{
-+	if (rq_switch_time(rq) < boost_threshold(p))
-+		boost_task(p);
-+}
-diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
-index 7fbaee24c824..0d7ad05b84fe 100644
---- a/kernel/sched/cpufreq_schedutil.c
-+++ b/kernel/sched/cpufreq_schedutil.c
-@@ -183,6 +183,7 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy,
- 	return cpufreq_driver_resolve_freq(policy, freq);
- }
- 
-+#ifndef CONFIG_SCHED_ALT
- /*
-  * This function computes an effective utilization for the given CPU, to be
-  * used for frequency selection given the linear relation: f = u * f_max.
-@@ -300,6 +301,13 @@ static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu)
- 
- 	return schedutil_cpu_util(sg_cpu->cpu, util, max, FREQUENCY_UTIL, NULL);
- }
-+#else /* CONFIG_SCHED_ALT */
-+static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu)
-+{
-+	sg_cpu->max = arch_scale_cpu_capacity(sg_cpu->cpu);
-+	return sg_cpu->max;
-+}
-+#endif
- 
- /**
-  * sugov_iowait_reset() - Reset the IO boost status of a CPU.
-@@ -443,7 +451,9 @@ static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; }
-  */
- static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu, struct sugov_policy *sg_policy)
- {
-+#ifndef CONFIG_SCHED_ALT
- 	if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_dl)
-+#endif
- 		sg_policy->limits_changed = true;
- }
- 
-@@ -686,6 +696,7 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy)
- 	}
- 
- 	ret = sched_setattr_nocheck(thread, &attr);
-+
- 	if (ret) {
- 		kthread_stop(thread);
- 		pr_warn("%s: failed to set SCHED_DEADLINE\n", __func__);
-@@ -916,6 +927,7 @@ static int __init sugov_register(void)
- core_initcall(sugov_register);
- 
- #ifdef CONFIG_ENERGY_MODEL
-+#ifndef CONFIG_SCHED_ALT
- extern bool sched_energy_update;
- extern struct mutex sched_energy_mutex;
- 
-@@ -946,4 +958,10 @@ void sched_cpufreq_governor_change(struct cpufreq_policy *policy,
- 	}
- 
- }
-+#else /* CONFIG_SCHED_ALT */
-+void sched_cpufreq_governor_change(struct cpufreq_policy *policy,
-+				  struct cpufreq_governor *old_gov)
-+{
-+}
-+#endif
- #endif
-diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
-index ff9435dee1df..0ee9967d2d74 100644
---- a/kernel/sched/cputime.c
-+++ b/kernel/sched/cputime.c
-@@ -122,7 +122,7 @@ void account_user_time(struct task_struct *p, u64 cputime)
- 	p->utime += cputime;
- 	account_group_user_time(p, cputime);
- 
--	index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
-+	index = task_running_nice(p) ? CPUTIME_NICE : CPUTIME_USER;
- 
- 	/* Add user time to cpustat. */
- 	task_group_account_field(p, index, cputime);
-@@ -146,7 +146,7 @@ void account_guest_time(struct task_struct *p, u64 cputime)
- 	p->gtime += cputime;
- 
- 	/* Add guest time to cpustat. */
--	if (task_nice(p) > 0) {
-+	if (task_running_nice(p)) {
- 		cpustat[CPUTIME_NICE] += cputime;
- 		cpustat[CPUTIME_GUEST_NICE] += cputime;
- 	} else {
-@@ -269,7 +269,7 @@ static inline u64 account_other_time(u64 max)
- #ifdef CONFIG_64BIT
- static inline u64 read_sum_exec_runtime(struct task_struct *t)
- {
--	return t->se.sum_exec_runtime;
-+	return tsk_seruntime(t);
- }
- #else
- static u64 read_sum_exec_runtime(struct task_struct *t)
-@@ -279,7 +279,7 @@ static u64 read_sum_exec_runtime(struct task_struct *t)
- 	struct rq *rq;
- 
- 	rq = task_rq_lock(t, &rf);
--	ns = t->se.sum_exec_runtime;
-+	ns = tsk_seruntime(t);
- 	task_rq_unlock(rq, t, &rf);
- 
- 	return ns;
-@@ -658,7 +658,7 @@ void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev,
- void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
- {
- 	struct task_cputime cputime = {
--		.sum_exec_runtime = p->se.sum_exec_runtime,
-+		.sum_exec_runtime = tsk_seruntime(p),
- 	};
- 
- 	task_cputime(p, &cputime.utime, &cputime.stime);
-diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
-index 1ae95b9150d3..f5c3aa20d172 100644
---- a/kernel/sched/idle.c
-+++ b/kernel/sched/idle.c
-@@ -372,6 +372,7 @@ void cpu_startup_entry(enum cpuhp_state state)
- 		do_idle();
- }
- 
-+#ifndef CONFIG_SCHED_ALT
- /*
-  * idle-task scheduling class.
-  */
-@@ -492,3 +493,4 @@ const struct sched_class idle_sched_class = {
- 	.switched_to		= switched_to_idle,
- 	.update_curr		= update_curr_idle,
- };
-+#endif
-diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h
-new file mode 100644
-index 000000000000..7fdeace7e8a5
---- /dev/null
-+++ b/kernel/sched/pds.h
-@@ -0,0 +1,14 @@
-+#ifndef PDS_H
-+#define PDS_H
-+
-+/* bits:
-+ * RT(0-99), (Low prio adj range, nice width, high prio adj range) / 2, cpu idle task */
-+#define SCHED_BITS	(MAX_RT_PRIO + 20 + 1)
-+#define IDLE_TASK_SCHED_PRIO	(SCHED_BITS - 1)
-+
-+static inline int task_running_nice(struct task_struct *p)
-+{
-+	return (p->prio > DEFAULT_PRIO);
-+}
-+
-+#endif
-diff --git a/kernel/sched/pds_imp.h b/kernel/sched/pds_imp.h
-new file mode 100644
-index 000000000000..6baee5e961b9
---- /dev/null
-+++ b/kernel/sched/pds_imp.h
-@@ -0,0 +1,257 @@
-+#define ALT_SCHED_VERSION_MSG "sched/pds: PDS CPU Scheduler "ALT_SCHED_VERSION" by Alfred Chen.\n"
-+
-+static const u64 user_prio2deadline[NICE_WIDTH] = {
-+/* -20 */	  4194304,   4613734,   5075107,   5582617,   6140878,
-+/* -15 */	  6754965,   7430461,   8173507,   8990857,   9889942,
-+/* -10 */	 10878936,  11966829,  13163511,  14479862,  15927848,
-+/*  -5 */	 17520632,  19272695,  21199964,  23319960,  25651956,
-+/*   0 */	 28217151,  31038866,  34142752,  37557027,  41312729,
-+/*   5 */	 45444001,  49988401,  54987241,  60485965,  66534561,
-+/*  10 */	 73188017,  80506818,  88557499,  97413248, 107154572,
-+/*  15 */	117870029, 129657031, 142622734, 156885007, 172573507
-+};
-+
-+static const unsigned char dl_level_map[] = {
-+/*       0               4               8              12           */
-+	19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18,
-+/*      16              20              24              28           */
-+	18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 17, 17, 17, 17, 17,
-+/*      32              36              40              44           */
-+	17, 17, 17, 17, 16, 16, 16, 16, 16, 16, 16, 16, 15, 15, 15, 15,
-+/*      48              52              56              60           */
-+	15, 15, 15, 14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 12, 12, 12,
-+/*      64              68              72              76           */
-+	12, 11, 11, 11, 10, 10, 10,  9,  9,  8,  7,  6,  5,  4,  3,  2,
-+/*      80              84              88              92           */
-+	 1,  0
-+};
-+
-+static inline int
-+task_sched_prio(const struct task_struct *p, const struct rq *rq)
-+{
-+	size_t delta;
-+
-+	if (p == rq->idle)
-+		return IDLE_TASK_SCHED_PRIO;
-+
-+	if (p->prio < MAX_RT_PRIO)
-+		return p->prio;
-+
-+	delta = (rq->clock + user_prio2deadline[39] - p->deadline) >> 21;
-+	delta = min((size_t)delta, ARRAY_SIZE(dl_level_map) - 1);
-+
-+	return MAX_RT_PRIO + dl_level_map[delta];
-+}
-+
-+static inline void update_task_priodl(struct task_struct *p)
-+{
-+	p->priodl = (((u64) (p->prio))<<56) | ((p->deadline)>>8);
-+}
-+
-+static inline void requeue_task(struct task_struct *p, struct rq *rq);
-+
-+static inline void time_slice_expired(struct task_struct *p, struct rq *rq)
-+{
-+	/*printk(KERN_INFO "sched: time_slice_expired(%d) - %px\n", cpu_of(rq), p);*/
-+	p->time_slice = sched_timeslice_ns;
-+
-+	if (p->prio >= MAX_RT_PRIO)
-+		p->deadline = rq->clock + user_prio2deadline[TASK_USER_PRIO(p)];
-+	update_task_priodl(p);
-+
-+	if (SCHED_FIFO != p->policy && task_on_rq_queued(p))
-+		requeue_task(p, rq);
-+}
-+
-+/*
-+ * pds_skiplist_task_search -- search function used in PDS run queue skip list
-+ * node insert operation.
-+ * @it: iterator pointer to the node in the skip list
-+ * @node: pointer to the skiplist_node to be inserted
-+ *
-+ * Returns true if key of @it is less or equal to key value of @node, otherwise
-+ * false.
-+ */
-+static inline bool
-+pds_skiplist_task_search(struct skiplist_node *it, struct skiplist_node *node)
-+{
-+	return (skiplist_entry(it, struct task_struct, sl_node)->priodl <=
-+		skiplist_entry(node, struct task_struct, sl_node)->priodl);
-+}
-+
-+/*
-+ * Define the skip list insert function for PDS
-+ */
-+DEFINE_SKIPLIST_INSERT_FUNC(pds_skiplist_insert, pds_skiplist_task_search);
-+
-+/*
-+ * Init the queue structure in rq
-+ */
-+static inline void sched_queue_init(struct rq *rq)
-+{
-+	FULL_INIT_SKIPLIST_NODE(&rq->sl_header);
-+}
-+
-+/*
-+ * Init idle task and put into queue structure of rq
-+ * IMPORTANT: may be called multiple times for a single cpu
-+ */
-+static inline void sched_queue_init_idle(struct rq *rq, struct task_struct *idle)
-+{
-+	/*printk(KERN_INFO "sched: init(%d) - %px\n", cpu_of(rq), idle);*/
-+	int default_prio = idle->prio;
-+
-+	idle->prio = MAX_PRIO;
-+	idle->deadline = 0ULL;
-+	update_task_priodl(idle);
-+
-+	FULL_INIT_SKIPLIST_NODE(&rq->sl_header);
-+
-+	idle->sl_node.level = idle->sl_level;
-+	pds_skiplist_insert(&rq->sl_header, &idle->sl_node);
-+
-+	idle->prio = default_prio;
-+}
-+
-+/*
-+ * This routine assume that the idle task always in queue
-+ */
-+static inline struct task_struct *sched_rq_first_task(struct rq *rq)
-+{
-+	struct skiplist_node *node = rq->sl_header.next[0];
-+
-+	BUG_ON(node == &rq->sl_header);
-+	return skiplist_entry(node, struct task_struct, sl_node);
-+}
-+
-+static inline struct task_struct *
-+sched_rq_next_task(struct task_struct *p, struct rq *rq)
-+{
-+	struct skiplist_node *next = p->sl_node.next[0];
-+
-+	BUG_ON(next == &rq->sl_header);
-+	return skiplist_entry(next, struct task_struct, sl_node);
-+}
-+
-+static inline unsigned long sched_queue_watermark(struct rq *rq)
-+{
-+	return task_sched_prio(sched_rq_first_task(rq), rq);
-+}
-+
-+#define __SCHED_DEQUEUE_TASK(p, rq, flags, func)		\
-+	psi_dequeue(p, flags & DEQUEUE_SLEEP);			\
-+	sched_info_dequeued(rq, p);				\
-+								\
-+	if (skiplist_del_init(&rq->sl_header, &p->sl_node)) {	\
-+		func;						\
-+	}
-+
-+#define __SCHED_ENQUEUE_TASK(p, rq, flags)				\
-+	sched_info_queued(rq, p);					\
-+	psi_enqueue(p, flags);						\
-+									\
-+	p->sl_node.level = p->sl_level;					\
-+	pds_skiplist_insert(&rq->sl_header, &p->sl_node)
-+
-+/*
-+ * Requeue a task @p to @rq
-+ */
-+#define __SCHED_REQUEUE_TASK(p, rq, func)					\
-+{\
-+	bool b_first = skiplist_del_init(&rq->sl_header, &p->sl_node);		\
-+\
-+	p->sl_node.level = p->sl_level;						\
-+	if (pds_skiplist_insert(&rq->sl_header, &p->sl_node) || b_first) {	\
-+		func;								\
-+	}									\
-+}
-+
-+static inline bool sched_task_need_requeue(struct task_struct *p, struct rq *rq)
-+{
-+	struct skiplist_node *node = p->sl_node.prev[0];
-+
-+	if (node != &rq->sl_header) {
-+		struct task_struct *t = skiplist_entry(node, struct task_struct, sl_node);
-+
-+		if (t->priodl > p->priodl)
-+			return true;
-+	}
-+
-+	node = p->sl_node.next[0];
-+	if (node != &rq->sl_header) {
-+		struct task_struct *t = skiplist_entry(node, struct task_struct, sl_node);
-+
-+		if (t->priodl < p->priodl)
-+			return true;
-+	}
-+
-+	return false;
-+}
-+
-+/*
-+ * pds_skiplist_random_level -- Returns a pseudo-random level number for skip
-+ * list node which is used in PDS run queue.
-+ *
-+ * In current implementation, based on testing, the first 8 bits in microseconds
-+ * of niffies are suitable for random level population.
-+ * find_first_bit() is used to satisfy p = 0.5 between each levels, and there
-+ * should be platform hardware supported instruction(known as ctz/clz) to speed
-+ * up this function.
-+ * The skiplist level for a task is populated when task is created and doesn't
-+ * change in task's life time. When task is being inserted into run queue, this
-+ * skiplist level is set to task's sl_node->level, the skiplist insert function
-+ * may change it based on current level of the skip lsit.
-+ */
-+static inline int pds_skiplist_random_level(const struct task_struct *p)
-+{
-+	long unsigned int randseed;
-+
-+	/*
-+	 * 1. Some architectures don't have better than microsecond resolution
-+	 * so mask out ~microseconds as a factor of the random seed for skiplist
-+	 * insertion.
-+	 * 2. Use address of task structure pointer as another factor of the
-+	 * random seed for task burst forking scenario.
-+	 */
-+	randseed = (task_rq(p)->clock ^ (long unsigned int)p) >> 10;
-+
-+	return find_first_bit(&randseed, NUM_SKIPLIST_LEVEL - 1);
-+}
-+
-+static void sched_task_fork(struct task_struct *p, struct rq *rq)
-+{
-+	p->sl_level = pds_skiplist_random_level(p);
-+	if (p->prio >= MAX_RT_PRIO)
-+		p->deadline = rq->clock + user_prio2deadline[TASK_USER_PRIO(p)];
-+	update_task_priodl(p);
-+}
-+
-+/**
-+ * task_prio - return the priority value of a given task.
-+ * @p: the task in question.
-+ *
-+ * Return: The priority value as seen by users in /proc.
-+ * RT tasks are offset by -100. Normal tasks are centered around 1, value goes
-+ * from 0(SCHED_ISO) up to 82 (nice +19 SCHED_IDLE).
-+ */
-+int task_prio(const struct task_struct *p)
-+{
-+	int ret;
-+
-+	if (p->prio < MAX_RT_PRIO)
-+		return (p->prio - MAX_RT_PRIO);
-+
-+	preempt_disable();
-+	ret = task_sched_prio(p, this_rq()) - MAX_RT_PRIO;
-+	preempt_enable();
-+
-+	return ret;
-+}
-+
-+static void do_sched_yield_type_1(struct task_struct *p, struct rq *rq)
-+{
-+	time_slice_expired(p, rq);
-+}
-+
-+static void sched_task_ttwu(struct task_struct *p) {}
-+static void sched_task_deactivate(struct task_struct *p, struct rq *rq) {}
-diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c
-index b4b1ff96642f..0ead9625081f 100644
---- a/kernel/sched/pelt.c
-+++ b/kernel/sched/pelt.c
-@@ -274,6 +274,7 @@ ___update_load_avg(struct sched_avg *sa, unsigned long load)
- 	WRITE_ONCE(sa->util_avg, sa->util_sum / divider);
- }
- 
-+#ifndef CONFIG_SCHED_ALT
- /*
-  * sched_entity:
-  *
-@@ -391,8 +392,9 @@ int update_dl_rq_load_avg(u64 now, struct rq *rq, int running)
- 
- 	return 0;
- }
-+#endif
- 
--#ifdef CONFIG_SCHED_THERMAL_PRESSURE
-+#if defined(CONFIG_SCHED_THERMAL_PRESSURE) && !defined(CONFIG_SCHED_ALT)
- /*
-  * thermal:
-  *
-diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h
-index eb034d9f024d..7fefc89b377a 100644
---- a/kernel/sched/pelt.h
-+++ b/kernel/sched/pelt.h
-@@ -1,13 +1,15 @@
- #ifdef CONFIG_SMP
- #include "sched-pelt.h"
- 
-+#ifndef CONFIG_SCHED_ALT
- int __update_load_avg_blocked_se(u64 now, struct sched_entity *se);
- int __update_load_avg_se(u64 now, struct cfs_rq *cfs_rq, struct sched_entity *se);
- int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq);
- int update_rt_rq_load_avg(u64 now, struct rq *rq, int running);
- int update_dl_rq_load_avg(u64 now, struct rq *rq, int running);
-+#endif
- 
--#ifdef CONFIG_SCHED_THERMAL_PRESSURE
-+#if defined(CONFIG_SCHED_THERMAL_PRESSURE) && !defined(CONFIG_SCHED_ALT)
- int update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity);
- 
- static inline u64 thermal_load_avg(struct rq *rq)
-@@ -37,6 +39,7 @@ update_irq_load_avg(struct rq *rq, u64 running)
- }
- #endif
- 
-+#ifndef CONFIG_SCHED_ALT
- /*
-  * When a task is dequeued, its estimated utilization should not be update if
-  * its util_avg has not been updated at least once.
-@@ -157,9 +160,11 @@ static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq)
- 	return rq_clock_pelt(rq_of(cfs_rq));
- }
- #endif
-+#endif /* CONFIG_SCHED_ALT */
- 
- #else
- 
-+#ifndef CONFIG_SCHED_ALT
- static inline int
- update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
- {
-@@ -177,6 +182,7 @@ update_dl_rq_load_avg(u64 now, struct rq *rq, int running)
- {
- 	return 0;
- }
-+#endif
- 
- static inline int
- update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity)
-diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
-index 877fb08eb1b0..da6a01b591a0 100644
---- a/kernel/sched/sched.h
-+++ b/kernel/sched/sched.h
-@@ -2,6 +2,10 @@
- /*
-  * Scheduler internal types and methods:
-  */
-+#ifdef CONFIG_SCHED_ALT
-+#include "alt_sched.h"
-+#else
-+
- #include <linux/sched.h>
- 
- #include <linux/sched/autogroup.h>
-@@ -2542,3 +2546,9 @@ static inline bool is_per_cpu_kthread(struct task_struct *p)
- 
- void swake_up_all_locked(struct swait_queue_head *q);
- void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
-+
-+static inline int task_running_nice(struct task_struct *p)
-+{
-+	return (task_nice(p) > 0);
-+}
-+#endif /* !CONFIG_SCHED_ALT */
-diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c
-index 750fb3c67eed..108422ebc7bf 100644
---- a/kernel/sched/stats.c
-+++ b/kernel/sched/stats.c
-@@ -22,8 +22,10 @@ static int show_schedstat(struct seq_file *seq, void *v)
- 	} else {
- 		struct rq *rq;
- #ifdef CONFIG_SMP
-+#ifndef CONFIG_SCHED_ALT
- 		struct sched_domain *sd;
- 		int dcount = 0;
-+#endif
- #endif
- 		cpu = (unsigned long)(v - 2);
- 		rq = cpu_rq(cpu);
-@@ -40,6 +42,7 @@ static int show_schedstat(struct seq_file *seq, void *v)
- 		seq_printf(seq, "\n");
- 
- #ifdef CONFIG_SMP
-+#ifndef CONFIG_SCHED_ALT
- 		/* domain-specific stats */
- 		rcu_read_lock();
- 		for_each_domain(cpu, sd) {
-@@ -68,6 +71,7 @@ static int show_schedstat(struct seq_file *seq, void *v)
- 			    sd->ttwu_move_balance);
- 		}
- 		rcu_read_unlock();
-+#endif
- #endif
- 	}
- 	return 0;
-diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
-index ba81187bb7af..996b5786b058 100644
---- a/kernel/sched/topology.c
-+++ b/kernel/sched/topology.c
-@@ -4,6 +4,7 @@
-  */
- #include "sched.h"
- 
-+#ifndef CONFIG_SCHED_ALT
- DEFINE_MUTEX(sched_domains_mutex);
- 
- /* Protected by sched_domains_mutex: */
-@@ -1180,8 +1181,10 @@ static void init_sched_groups_capacity(int cpu, struct sched_domain *sd)
-  */
- 
- static int default_relax_domain_level = -1;
-+#endif /* CONFIG_SCHED_ALT */
- int sched_domain_level_max;
- 
-+#ifndef CONFIG_SCHED_ALT
- static int __init setup_relax_domain_level(char *str)
- {
- 	if (kstrtoint(str, 0, &default_relax_domain_level))
-@@ -1413,6 +1416,7 @@ sd_init(struct sched_domain_topology_level *tl,
- 
- 	return sd;
- }
-+#endif /* CONFIG_SCHED_ALT */
- 
- /*
-  * Topology list, bottom-up.
-@@ -1442,6 +1446,7 @@ void set_sched_topology(struct sched_domain_topology_level *tl)
- 	sched_domain_topology = tl;
- }
- 
-+#ifndef CONFIG_SCHED_ALT
- #ifdef CONFIG_NUMA
- 
- static const struct cpumask *sd_numa_mask(int cpu)
-@@ -2316,3 +2321,17 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
- 	partition_sched_domains_locked(ndoms_new, doms_new, dattr_new);
- 	mutex_unlock(&sched_domains_mutex);
- }
-+#else /* CONFIG_SCHED_ALT */
-+void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
-+			     struct sched_domain_attr *dattr_new)
-+{}
-+
-+#ifdef CONFIG_NUMA
-+int __read_mostly		node_reclaim_distance = RECLAIM_DISTANCE;
-+
-+int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
-+{
-+	return best_mask_cpu(cpu, cpus);
-+}
-+#endif /* CONFIG_NUMA */
-+#endif
-diff --git a/kernel/sysctl.c b/kernel/sysctl.c
-index db1ce7af2563..4437a207d061 100644
---- a/kernel/sysctl.c
-+++ b/kernel/sysctl.c
-@@ -120,6 +120,10 @@ static unsigned long long_max = LONG_MAX;
- static int one_hundred = 100;
- static int two_hundred = 200;
- static int one_thousand = 1000;
-+#ifdef CONFIG_SCHED_ALT
-+static int __maybe_unused zero = 0;
-+extern int sched_yield_type;
-+#endif
- #ifdef CONFIG_PRINTK
- static int ten_thousand = 10000;
- #endif
-@@ -184,7 +188,7 @@ static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
- int sysctl_legacy_va_layout;
- #endif
- 
--#ifdef CONFIG_SCHED_DEBUG
-+#if defined(CONFIG_SCHED_DEBUG) && !defined(CONFIG_SCHED_ALT)
- static int min_sched_granularity_ns = 100000;		/* 100 usecs */
- static int max_sched_granularity_ns = NSEC_PER_SEC;	/* 1 second */
- static int min_wakeup_granularity_ns;			/* 0 usecs */
-@@ -1653,6 +1657,7 @@ int proc_do_static_key(struct ctl_table *table, int write,
- }
- 
- static struct ctl_table kern_table[] = {
-+#ifndef CONFIG_SCHED_ALT
- 	{
- 		.procname	= "sched_child_runs_first",
- 		.data		= &sysctl_sched_child_runs_first,
-@@ -1834,6 +1839,7 @@ static struct ctl_table kern_table[] = {
- 		.extra2		= SYSCTL_ONE,
- 	},
- #endif
-+#endif /* !CONFIG_SCHED_ALT */
- #ifdef CONFIG_PROVE_LOCKING
- 	{
- 		.procname	= "prove_locking",
-@@ -2410,6 +2416,17 @@ static struct ctl_table kern_table[] = {
- 		.proc_handler	= proc_dointvec,
- 	},
- #endif
-+#ifdef CONFIG_SCHED_ALT
-+	{
-+		.procname	= "yield_type",
-+		.data		= &sched_yield_type,
-+		.maxlen		= sizeof (int),
-+		.mode		= 0644,
-+		.proc_handler	= &proc_dointvec_minmax,
-+		.extra1		= &zero,
-+		.extra2		= &two,
-+	},
-+#endif
- #if defined(CONFIG_S390) && defined(CONFIG_SMP)
- 	{
- 		.procname	= "spin_retry",
-diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
-index d89da1c7e005..a73adff9f309 100644
---- a/kernel/time/hrtimer.c
-+++ b/kernel/time/hrtimer.c
-@@ -1923,8 +1923,10 @@ long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode,
- 	int ret = 0;
- 	u64 slack;
- 
-+#ifndef CONFIG_SCHED_ALT
- 	slack = current->timer_slack_ns;
- 	if (dl_task(current) || rt_task(current))
-+#endif
- 		slack = 0;
- 
- 	hrtimer_init_sleeper_on_stack(&t, clockid, mode);
-diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
-index 165117996ea0..bd8718a51499 100644
---- a/kernel/time/posix-cpu-timers.c
-+++ b/kernel/time/posix-cpu-timers.c
-@@ -216,7 +216,7 @@ static void task_sample_cputime(struct task_struct *p, u64 *samples)
- 	u64 stime, utime;
- 
- 	task_cputime(p, &utime, &stime);
--	store_samples(samples, stime, utime, p->se.sum_exec_runtime);
-+	store_samples(samples, stime, utime, tsk_seruntime(p));
- }
- 
- static void proc_sample_cputime_atomic(struct task_cputime_atomic *at,
-@@ -789,6 +789,7 @@ static void collect_posix_cputimers(struct posix_cputimers *pct, u64 *samples,
- 	}
- }
- 
-+#ifndef CONFIG_SCHED_ALT
- static inline void check_dl_overrun(struct task_struct *tsk)
- {
- 	if (tsk->dl.dl_overrun) {
-@@ -796,6 +797,7 @@ static inline void check_dl_overrun(struct task_struct *tsk)
- 		__group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
- 	}
- }
-+#endif
- 
- static bool check_rlimit(u64 time, u64 limit, int signo, bool rt, bool hard)
- {
-@@ -823,8 +825,10 @@ static void check_thread_timers(struct task_struct *tsk,
- 	u64 samples[CPUCLOCK_MAX];
- 	unsigned long soft;
- 
-+#ifndef CONFIG_SCHED_ALT
- 	if (dl_task(tsk))
- 		check_dl_overrun(tsk);
-+#endif
- 
- 	if (expiry_cache_is_inactive(pct))
- 		return;
-@@ -838,7 +842,7 @@ static void check_thread_timers(struct task_struct *tsk,
- 	soft = task_rlimit(tsk, RLIMIT_RTTIME);
- 	if (soft != RLIM_INFINITY) {
- 		/* Task RT timeout is accounted in jiffies. RTTIME is usec */
--		unsigned long rttime = tsk->rt.timeout * (USEC_PER_SEC / HZ);
-+		unsigned long rttime = tsk_rttimeout(tsk) * (USEC_PER_SEC / HZ);
- 		unsigned long hard = task_rlimit_max(tsk, RLIMIT_RTTIME);
- 
- 		/* At the hard limit, send SIGKILL. No further action. */
-@@ -1074,8 +1078,10 @@ static inline bool fastpath_timer_check(struct task_struct *tsk)
- 			return true;
- 	}
- 
-+#ifndef CONFIG_SCHED_ALT
- 	if (dl_task(tsk) && tsk->dl.dl_overrun)
- 		return true;
-+#endif
- 
- 	return false;
- }
-diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
-index b5e3496cf803..65f60c77bc50 100644
---- a/kernel/trace/trace_selftest.c
-+++ b/kernel/trace/trace_selftest.c
-@@ -1048,10 +1048,15 @@ static int trace_wakeup_test_thread(void *data)
- {
- 	/* Make this a -deadline thread */
- 	static const struct sched_attr attr = {
-+#ifdef CONFIG_SCHED_ALT
-+		/* No deadline on BMQ/PDS, use RR */
-+		.sched_policy = SCHED_RR,
-+#else
- 		.sched_policy = SCHED_DEADLINE,
- 		.sched_runtime = 100000ULL,
- 		.sched_deadline = 10000000ULL,
- 		.sched_period = 10000000ULL
-+#endif
- 	};
- 	struct wakeup_test_data *x = data;
- 
diff --git a/linux58-tkg/linux58-tkg-patches/0011-ZFS-fix.patch b/linux58-tkg/linux58-tkg-patches/0011-ZFS-fix.patch
deleted file mode 100644
index af71d04..0000000
--- a/linux58-tkg/linux58-tkg-patches/0011-ZFS-fix.patch
+++ /dev/null
@@ -1,43 +0,0 @@
-From 1e010beda2896bdf3082fb37a3e49f8ce20e04d8 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?J=C3=B6rg=20Thalheim?= <joerg@thalheim.io>
-Date: Thu, 2 May 2019 05:28:08 +0100
-Subject: [PATCH] x86/fpu: Export kernel_fpu_{begin,end}() with
- EXPORT_SYMBOL_GPL
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-We need these symbols in zfs as the fpu implementation breaks userspace:
-
-https://github.com/zfsonlinux/zfs/issues/9346
-Signed-off-by: Jörg Thalheim <joerg@thalheim.io>
----
- arch/x86/kernel/fpu/core.c | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
-index 12c70840980e..352538b3bb5d 100644
---- a/arch/x86/kernel/fpu/core.c
-+++ b/arch/x86/kernel/fpu/core.c
-@@ -102,7 +102,7 @@ void kernel_fpu_begin(void)
- 	}
- 	__cpu_invalidate_fpregs_state();
- }
--EXPORT_SYMBOL_GPL(kernel_fpu_begin);
-+EXPORT_SYMBOL(kernel_fpu_begin);
- 
- void kernel_fpu_end(void)
- {
-@@ -111,7 +111,7 @@ void kernel_fpu_end(void)
- 	this_cpu_write(in_kernel_fpu, false);
- 	preempt_enable();
- }
--EXPORT_SYMBOL_GPL(kernel_fpu_end);
-+EXPORT_SYMBOL(kernel_fpu_end);
- 
- /*
-  * Save the FPU state (mark it for reload if necessary):
--- 
-2.23.0
-
-
diff --git a/linux58-tkg/linux58-tkg-patches/0012-misc-additions.patch b/linux58-tkg/linux58-tkg-patches/0012-misc-additions.patch
deleted file mode 100644
index ae06419..0000000
--- a/linux58-tkg/linux58-tkg-patches/0012-misc-additions.patch
+++ /dev/null
@@ -1,54 +0,0 @@
-diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig
-index 0840d27381ea..73aba9a31064 100644
---- a/drivers/tty/Kconfig
-+++ b/drivers/tty/Kconfig
-@@ -75,6 +75,19 @@ config VT_CONSOLE_SLEEP
- 	def_bool y
- 	depends on VT_CONSOLE && PM_SLEEP
- 
-+config NR_TTY_DEVICES
-+        int "Maximum tty device number"
-+        depends on VT
-+        range 12 63
-+        default 63
-+        ---help---
-+          This option is used to change the number of tty devices in /dev.
-+          The default value is 63. The lowest number you can set is 12,
-+          63 is also the upper limit so we don't overrun the serial
-+          consoles.
-+
-+          If unsure, say 63.
-+
- config HW_CONSOLE
- 	bool
- 	depends on VT && !UML
-diff --git a/include/uapi/linux/vt.h b/include/uapi/linux/vt.h
-index e9d39c48520a..3bceead8da40 100644
---- a/include/uapi/linux/vt.h
-+++ b/include/uapi/linux/vt.h
-@@ -3,12 +3,25 @@
- #define _UAPI_LINUX_VT_H
- 
- 
-+/*
-+ * We will make this definition solely for the purpose of making packages
-+ * such as splashutils build, because they can not understand that
-+ * NR_TTY_DEVICES is defined in the kernel configuration.
-+ */
-+#ifndef CONFIG_NR_TTY_DEVICES
-+#define CONFIG_NR_TTY_DEVICES 63
-+#endif
-+
- /*
-  * These constants are also useful for user-level apps (e.g., VC
-  * resizing).
-  */
- #define MIN_NR_CONSOLES 1       /* must be at least 1 */
--#define MAX_NR_CONSOLES	63	/* serial lines start at 64 */
-+/*
-+ * NR_TTY_DEVICES:
-+ * Value MUST be at least 12 and must never be higher then 63
-+ */
-+#define MAX_NR_CONSOLES CONFIG_NR_TTY_DEVICES	/* serial lines start above this */
- 		/* Note: the ioctl VT_GETSTATE does not work for
- 		   consoles 16 and higher (since it returns a short) */
\ No newline at end of file
diff --git a/linux59-tkg/PKGBUILD b/linux59-tkg/PKGBUILD
deleted file mode 100644
index d1939b1..0000000
--- a/linux59-tkg/PKGBUILD
+++ /dev/null
@@ -1,285 +0,0 @@
-# Based on the file created for Arch Linux by:
-# Tobias Powalowski <tpowa@archlinux.org>
-# Thomas Baechler <thomas@archlinux.org>
-
-# Contributor: Tk-Glitch <ti3nou at gmail dot com>
-
-plain '       .---.`               `.---.'
-plain '    `/syhhhyso-           -osyhhhys/`'
-plain '   .syNMdhNNhss/``.---.``/sshNNhdMNys.'
-plain '   +sdMh.`+MNsssssssssssssssNM+`.hMds+'
-plain '   :syNNdhNNhssssssssssssssshNNhdNNys:'
-plain '    /ssyhhhysssssssssssssssssyhhhyss/'
-plain '    .ossssssssssssssssssssssssssssso.'
-plain '   :sssssssssssssssssssssssssssssssss:'
-plain '  /sssssssssssssssssssssssssssssssssss/'
-plain ' :sssssssssssssoosssssssoosssssssssssss:'
-plain ' osssssssssssssoosssssssoossssssssssssso'
-plain ' osssssssssssyyyyhhhhhhhyyyyssssssssssso'
-plain ' /yyyyyyhhdmmmmNNNNNNNNNNNmmmmdhhyyyyyy/'
-plain '  smmmNNNNNNNNNNNNNNNNNNNNNNNNNNNNNmmms'
-plain '   /dNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNd/'
-plain '    `:sdNNNNNNNNNNNNNNNNNNNNNNNNNds:`'
-plain '       `-+shdNNNNNNNNNNNNNNNdhs+-`'
-plain '             `.-:///////:-.`'
-
-_where="$PWD" # track basedir as different Arch based distros are moving srcdir around
-
-source "$_where"/customization.cfg # load default configuration from file
-source "$_where"/linux*-tkg-config/prepare
-
-if [[ "$_sub" = rc* ]]; then
-  _srcpath="linux-${_basekernel}-${_sub}"
-else
-  _srcpath="linux-${_basekernel}"
-fi
-
-_tkg_initscript
-
-_distro="Arch"
-
-if [ -n "$_custom_pkgbase" ]; then
-  pkgbase="${_custom_pkgbase}"
-else
-  pkgbase=linux"${_basever}"-tkg-"${_cpusched}"
-fi
-pkgname=("${pkgbase}" "${pkgbase}-headers")
-pkgver="${_basekernel}"."${_sub}"
-pkgrel=8
-pkgdesc='Linux-tkg'
-arch=('x86_64') # no i686 in here
-url="http://www.kernel.org/"
-license=('GPL2')
-makedepends=('xmlto' 'docbook-xsl' 'kmod' 'inetutils' 'bc' 'libelf' 'pahole' 'patchutils' 'flex' 'python-sphinx' 'python-sphinx_rtd_theme' 'graphviz' 'imagemagick' 'git')
-optdepends=('schedtool')
-options=('!strip' 'docs')
-source=("https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-${_basekernel}.tar.xz"
-        "https://cdn.kernel.org/pub/linux/kernel/v5.x/patch-${pkgver}.xz"
-        "https://raw.githubusercontent.com/graysky2/kernel_gcc_patch/master/enable_additional_cpu_optimizations_for_gcc_v10.1%2B_kernel_v5.8%2B.patch"
-        'config.x86_64' # stock Arch config
-        #'config_hardened.x86_64' # hardened Arch config
-        90-cleanup.hook
-        cleanup
-        # ARCH Patches
-        0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch
-        # TkG
-        0002-clear-patches.patch
-        0003-glitched-base.patch
-        0003-glitched-cfs.patch
-        0004-glitched-ondemand-muqss.patch
-        0004-glitched-muqss.patch
-        0004-5.9-ck1.patch
-        #0005-undead-glitched-ondemand-pds.patch
-        #0005-undead-glitched-pds.patch
-        #0005-v5.8_undead-pds099o.patch
-        0005-glitched-pds.patch
-        0006-add-acs-overrides_iommu.patch
-        0007-v5.9-fsync.patch
-        0008-5.9-bcachefs.patch
-        0009-glitched-ondemand-bmq.patch
-        0009-glitched-bmq.patch
-        0009-prjc_v5.9-r0.patch
-        0011-ZFS-fix.patch
-        #0012-linux-hardened.patch
-        0012-misc-additions.patch
-)
-sha256sums=('3239a4ee1250bf2048be988cc8cb46c487b2c8a0de5b1b032d38394d5c6b1a06'
-            '7edb7b9d06b02f9b88d868c74ab618baf899c94edb19a73291f640dbea55c312'
-            '5ab29eb64e57df83b395a29a6a4f89030d142feffbfbf73b3afc6d97a2a7fd12'
-            'ca84d1966bf13570768a65015ddcbde198d866d2a5a44df21a581ed57860b887'
-            '1e15fc2ef3fa770217ecc63a220e5df2ddbcf3295eb4a021171e7edd4c6cc898'
-            '66a03c246037451a77b4d448565b1d7e9368270c7d02872fbd0b5d024ed0a997'
-            'f6383abef027fd9a430fd33415355e0df492cdc3c90e9938bf2d98f4f63b32e6'
-            '35a7cde86fb94939c0f25a62b8c47f3de0dbd3c65f876f460b263181b3e92fc0'
-            'b9ebe0ae69bc2b2091d6bfcf6c7875a87ea7969fcfa4e306c48d47a60f9ef4d6'
-            '7058e57fd68367b029adc77f2a82928f1433daaf02c8c279cb2d13556c8804d7'
-            'c605f638d74c61861ebdc36ebd4cb8b6475eae2f6273e1ccb2bbb3e10a2ec3fe'
-            '2bbbac963b6ca44ef3f8a71ec7c5cad7d66df860869a73059087ee236775970a'
-            '45a9ab99215ab3313be6e66e073d29154aac55bc58975a4df2dad116c918d27c'
-            'fca63d15ca4502aebd73e76d7499b243d2c03db71ff5ab0bf5cf268b2e576320'
-            '19661ec0d39f9663452b34433214c755179894528bf73a42f6ba52ccf572832a'
-            'b302ba6c5bbe8ed19b20207505d513208fae1e678cf4d8e7ac0b154e5fe3f456'
-            '3956c324798f25bcf8e6c5f6d160551245304c5cfa3a2cba73e5b1e350c364ce'
-            '9fad4a40449e09522899955762c8928ae17f4cdaa16e01239fd12592e9d58177'
-            'a557b342111849a5f920bbe1c129f3ff1fc1eff62c6bd6685e0972fc88e39911'
-            '88c7e308e474c845e0cc09e09bd223fc39876eca757abf6d6c3b8321f49ce1f1'
-            '49262ce4a8089fa70275aad742fc914baa28d9c384f710c9a62f64796d13e104'
-            '433b919e6a0be26784fb4304c43b1811a28f12ad3de9e26c0af827f64c0c316e')
-
-export KBUILD_BUILD_HOST=archlinux
-export KBUILD_BUILD_USER=$pkgbase
-export KBUILD_BUILD_TIMESTAMP="$(date -Ru${SOURCE_DATE_EPOCH:+d @$SOURCE_DATE_EPOCH})"
-
-prepare() {
-  rm -rf $pkgdir # Nuke the entire pkg folder so it'll get regenerated clean on next build
-
-  ln -s "${_where}/customization.cfg" "${srcdir}" # workaround
-
-  cd "${srcdir}/${_srcpath}"
-
-  _tkg_srcprep
-}
-
-build() {
-  cd "${srcdir}/${_srcpath}"
-
-  # Use custom compiler paths if defined
-  if [ -n "${CUSTOM_GCC_PATH}" ]; then
-    PATH=${CUSTOM_GCC_PATH}/bin:${CUSTOM_GCC_PATH}/lib:${CUSTOM_GCC_PATH}/include:${PATH}
-  fi
-
-  if [ "$_force_all_threads" = "true" ]; then
-    _force_all_threads="-j$((`nproc`*2))"
-  else
-    _force_all_threads="${MAKEFLAGS}"
-  fi
-
-  # ccache
-  if [ "$_noccache" != "true" ] && pacman -Qq ccache &> /dev/null; then
-    export PATH="/usr/lib/ccache/bin/:$PATH"
-    export CCACHE_SLOPPINESS="file_macro,locale,time_macros"
-    export CCACHE_NOHASHDIR="true"
-    msg2 'ccache was found and will be used'
-  fi
-
-  # document the TkG variables, excluding "_", "_EXT_CONFIG_PATH", and "_where".
-  declare -p | cut -d ' ' -f 3 | grep -P '^_(?!=|EXT_CONFIG_PATH|where)' > "${srcdir}/customization-full.cfg"
-
-  # build!
-  _runtime=$( time ( schedtool -B -n 1 -e ionice -n 1 make ${_force_all_threads} LOCALVERSION= bzImage modules 2>&1 ) 3>&1 1>&2 2>&3 ) || _runtime=$( time ( make ${_force_all_threads} LOCALVERSION= bzImage modules 2>&1 ) 3>&1 1>&2 2>&3 )
-}
-
-hackbase() {
-  pkgdesc="The $pkgdesc kernel and modules"
-  depends=('coreutils' 'kmod' 'initramfs')
-  optdepends=('linux-docs: Kernel hackers manual - HTML documentation that comes with the Linux kernel.'
-              'crda: to set the correct wireless channels of your country.'
-              'linux-firmware: Firmware files for Linux'
-              'modprobed-db: Keeps track of EVERY kernel module that has ever been probed. Useful for make localmodconfig.'
-              'nvidia-tkg: NVIDIA drivers for all installed kernels - non-dkms version.'
-              'nvidia-dkms-tkg: NVIDIA drivers for all installed kernels - dkms version.'
-              'update-grub: Simple wrapper around grub-mkconfig.')
-  provides=("linux=${pkgver}" "${pkgbase}" VIRTUALBOX-GUEST-MODULES WIREGUARD-MODULE)
-  replaces=(virtualbox-guest-modules-arch wireguard-arch)
-
-  cd "${srcdir}/${_srcpath}"
-
-  # get kernel version
-  local _kernver="$(<version)"
-  local modulesdir="$pkgdir/usr/lib/modules/$_kernver"
-
-  msg2 "Installing boot image..."
-  # systemd expects to find the kernel here to allow hibernation
-  # https://github.com/systemd/systemd/commit/edda44605f06a41fb86b7ab8128dcf99161d2344
-  install -Dm644 "$(make -s image_name)" "$modulesdir/vmlinuz"
-
-  # Used by mkinitcpio to name the kernel
-  echo "$pkgbase" | install -Dm644 /dev/stdin "$modulesdir/pkgbase"
-
-  msg2 "Installing modules..."
-  make INSTALL_MOD_PATH="$pkgdir/usr" INSTALL_MOD_STRIP=1 modules_install
-
-  # remove build and source links
-  rm "$modulesdir"/{source,build}
-
-  # install cleanup pacman hook and script
-  sed -e "s|cleanup|${pkgbase}-cleanup|g" "${srcdir}"/90-cleanup.hook |
-    install -Dm644 /dev/stdin "${pkgdir}/usr/share/libalpm/hooks/90-${pkgbase}.hook"
-  install -Dm755 "${srcdir}"/cleanup "${pkgdir}/usr/share/libalpm/scripts/${pkgbase}-cleanup"
-
-  # install customization file, for reference
-  install -Dm644 "${srcdir}"/customization-full.cfg "${pkgdir}/usr/share/doc/${pkgbase}/customization.cfg"
-}
-
-hackheaders() {
-  pkgdesc="Headers and scripts for building modules for the $pkgdesc kernel"
-  provides=("linux-headers=${pkgver}" "${pkgbase}-headers=${pkgver}")
-
-  cd "${srcdir}/${_srcpath}"
-  local builddir="${pkgdir}/usr/lib/modules/$(<version)/build"
-
-  msg2 "Installing build files..."
-  install -Dt "$builddir" -m644 .config Makefile Module.symvers System.map \
-    localversion.* version vmlinux
-  install -Dt "$builddir/kernel" -m644 kernel/Makefile
-  install -Dt "$builddir/arch/x86" -m644 arch/x86/Makefile
-  cp -t "$builddir" -a scripts
-
-  # add objtool for external module building and enabled VALIDATION_STACK option
-  install -Dt "$builddir/tools/objtool" tools/objtool/objtool
-
-  # add xfs and shmem for aufs building
-  mkdir -p "$builddir"/{fs/xfs,mm}
-
-  msg2 "Installing headers..."
-  cp -t "$builddir" -a include
-  cp -t "$builddir/arch/x86" -a arch/x86/include
-  install -Dt "$builddir/arch/x86/kernel" -m644 arch/x86/kernel/asm-offsets.s
-
-  install -Dt "$builddir/drivers/md" -m644 drivers/md/*.h
-  install -Dt "$builddir/net/mac80211" -m644 net/mac80211/*.h
-
-  # http://bugs.archlinux.org/task/13146
-  install -Dt "$builddir/drivers/media/i2c" -m644 drivers/media/i2c/msp3400-driver.h
-
-  # http://bugs.archlinux.org/task/20402
-  install -Dt "$builddir/drivers/media/usb/dvb-usb" -m644 drivers/media/usb/dvb-usb/*.h
-  install -Dt "$builddir/drivers/media/dvb-frontends" -m644 drivers/media/dvb-frontends/*.h
-  install -Dt "$builddir/drivers/media/tuners" -m644 drivers/media/tuners/*.h
-
-  msg2 "Installing KConfig files..."
-  find . -name 'Kconfig*' -exec install -Dm644 {} "$builddir/{}" \;
-
-  msg2 "Removing unneeded architectures..."
-  local arch
-  for arch in "$builddir"/arch/*/; do
-    [[ $arch = */x86/ ]] && continue
-    echo "Removing $(basename "$arch")"
-    rm -r "$arch"
-  done
-
-  msg2 "Removing documentation..."
-  rm -r "$builddir/Documentation"
-
-  msg2 "Removing broken symlinks..."
-  find -L "$builddir" -type l -printf 'Removing %P\n' -delete
-
-  msg2 "Removing loose objects..."
-  find "$builddir" -type f -name '*.o' -printf 'Removing %P\n' -delete
-
-  msg2 "Stripping build tools..."
-  local file
-  while read -rd '' file; do
-    case "$(file -bi "$file")" in
-      application/x-sharedlib\;*)      # Libraries (.so)
-        strip -v $STRIP_SHARED "$file" ;;
-      application/x-archive\;*)        # Libraries (.a)
-        strip -v $STRIP_STATIC "$file" ;;
-      application/x-executable\;*)     # Binaries
-        strip -v $STRIP_BINARIES "$file" ;;
-      application/x-pie-executable\;*) # Relocatable binaries
-        strip -v $STRIP_SHARED "$file" ;;
-    esac
-  done < <(find "$builddir" -type f -perm -u+x ! -name vmlinux -print0)
-
-  msg2 "Adding symlink..."
-  mkdir -p "$pkgdir/usr/src"
-  ln -sr "$builddir" "$pkgdir/usr/src/$pkgbase"
-
-  echo "Stripping vmlinux..."
-  strip -v $STRIP_STATIC "$builddir/vmlinux"
-
-  if [ $_NUKR = "true" ]; then
-    rm -rf "$srcdir" # Nuke the entire src folder so it'll get regenerated clean on next build
-  fi
-}
-
-source /dev/stdin <<EOF
-package_${pkgbase}() {
-hackbase
-}
-
-package_${pkgbase}-headers() {
-hackheaders
-}
-EOF
diff --git a/linux59-tkg/README.md b/linux59-tkg/README.md
deleted file mode 100644
index cdf9df6..0000000
--- a/linux59-tkg/README.md
+++ /dev/null
@@ -1,52 +0,0 @@
-**Due to intel_pstate poor performances as of late, I have decided to set it to passive mode to make use of the acpi_cpufreq governors passthrough, keeping full support for turbo frequencies.**
-
-A custom Linux kernel 5.9.y with specific MuQSS, Project C / PDS & BMQ CPU schedulers related patchsets selector (stock CFS is also an option) and added tweaks for a nice interactivity/performance balance, aiming for the best gaming experience.
-
-- MuQSS : http://ck-hack.blogspot.com/
-
-- Project C / PDS & BMQ : http://cchalpha.blogspot.com/
-
-Various personalization options available and userpatches support (put your own patches in the same dir as the PKGBUILD, with the ".mypatch" extension). The options built with are installed to `/usr/share/doc/$pkgbase/customization.cfg`, where `$pkgbase` is the package name.
-
-Comes with a slightly modified Arch config asking for a few core personalization settings at compilation time.
-
-If you want to streamline your kernel config for lower footprint and faster compilations : https://wiki.archlinux.org/index.php/Modprobed-db
-You can optionally enable support for it at the beginning of the PKGBUILD file. **Make sure to read everything you need to know about it as there are big caevats making it NOT recommended for most users.**
-
-## Other stuff included:
-- Graysky's per-CPU-arch native optimizations - https://github.com/graysky2/kernel_gcc_patch
-- memory management and swapping tweaks
-- scheduling tweaks
-- optional "Zenify" patchset using core blk, mm and scheduler tweaks from Zen
-- CFS tweaks
-- using yeah TCP congestion algo by default
-- using cake network queue management system
-- using vm.max_map_count=524288 by default
-- cherry-picked clear linux patches
-- **optional** overrides for missing ACS capabilities
-- **optional** Fsync support (proton)
-
-## Install procedure
-
-### DEB (Debian, Ubuntu and derivatives) and RPM (Fedora, SUSE and derivatives) based distributions
-```
-git clone https://github.com/Frogging-Family/linux-tkg.git
-cd linux-tkg/linux59-tkg
-# Optional: edit customization.cfg file
-./install.sh install
-```
-Uninstalling custom kernels installed through the script has to be done manually.
-The script can can help out with some useful information:
-```
-cd path/to/linux-tkg/linux59-tkg
-./install.sh uninstall-help
-```
-
-### Other linux distributions
-If your distro is not DEB or RPM based, `install.sh` script can clone the kernel tree, patch and edit a `.config` file from your current distro's 
-that is expected at ``/boot/config-`uname -r`.config`` (otherwise it won't work as-is)
-
-The command to do for that is:
-```
-./install.sh config
-```
diff --git a/linux59-tkg/customization.cfg b/linux59-tkg/customization.cfg
deleted file mode 100644
index 1bca36b..0000000
--- a/linux59-tkg/customization.cfg
+++ /dev/null
@@ -1,189 +0,0 @@
-# linux59-TkG config file
-
-# Linux distribution you are using, options are "Arch", "Ubuntu", "Debian", "Fedora" or "Suse".
-# It is automatically set to "Arch" when using PKGBUILD.
-# If left empty, the script will prompt
-_distro=""
-
-#### MISC OPTIONS ####
-
-# External config file to use - If the given file exists in path, it will override default config (customization.cfg) - Default is ~/.config/frogminer/linux59-tkg.cfg
-_EXT_CONFIG_PATH=~/.config/frogminer/linux59-tkg.cfg
-
-# [Arch specific] Set to anything else than "true" to limit cleanup operations and keep source and files generated during compilation.
-# Default is "true".
-_NUKR="true"
-
-# Custom compiler root dirs - Leave empty to use system compilers
-# Example: CUSTOM_GCC_PATH="/home/frog/PKGBUILDS/mostlyportable-gcc/gcc-mostlyportable-9.2.0"
-CUSTOM_GCC_PATH=""
-
-# Set to the number corresponding to a predefined profile to use it. Current list of available profiles :
-# 1 - Custom (meaning nothing will be enforced and you get to configure everything)
-# 2 - Ryzen desktop (performance)
-# 3 - Generic Desktop (Performance)
-_OPTIPROFILE=""
-
-# Set to true to bypass makepkg.conf and use all available threads for compilation. False will respect your makepkg.conf options.
-_force_all_threads="true"
-
-# Set to true to prevent ccache from being used and set CONFIG_GCC_PLUGINS=y (which needs to be disabled for ccache to work properly)
-_noccache="false"
-
-# Set to true to use modprobed db to clean config from unneeded modules. Speeds up compilation considerably. Requires root - https://wiki.archlinux.org/index.php/Modprobed-db
-# !!!! Make sure to have a well populated db !!!! - Leave empty to be asked about it at build time
-_modprobeddb="false"
-
-# Set to "1" to call make menuconfig, "2" to call make nconfig, "3" to call make xconfig, before building the kernel. Set to false to disable and skip the prompt.
-_menunconfig=""
-
-# Set to true to generate a kernel config fragment from your changes in menuconfig/nconfig. Set to false to disable and skip the prompt.
-_diffconfig=""
-
-# Set to the file name where the generated config fragment should be written to. Only used if _diffconfig is active.
-_diffconfig_name=""
-
-#### KERNEL OPTIONS ####
-
-# [Arch specific] Name of the default config file to use from the linux???-tkg-config folder.
-# Default is "config.x86_64" and hardened is "config_hardened.x86_64" (Arch-based).
-# To get a complete hardened setup, you have to use "cfs" as _cpusched
-_configfile="config.x86_64"
-
-# Disable some non-module debugging - See PKGBUILD for the list
-_debugdisable="false"
-
-# LEAVE AN EMPTY VALUE TO BE PROMPTED ABOUT FOLLOWING OPTIONS AT BUILD TIME
-
-# CPU scheduler - Options are "pds", "bmq", "muqss" or "cfs"
-# "pds" is the recommended option for gaming
-_cpusched=""
-
-# CPU sched_yield_type - Choose what sort of yield sched_yield will perform
-# For PDS and MuQSS: 0: No yield. (Recommended option for gaming on PDS and MuQSS)
-#                    1: Yield only to better priority/deadline tasks. (Default - can be unstable with PDS on some platforms)
-#                    2: Expire timeslice and recalculate deadline. (Usually the slowest option for PDS and MuQSS, not recommended)
-# For BMQ:           0: No yield.
-#                    1: Deboost and requeue task. (Default)
-#                    2: Set rq skip task.
-_sched_yield_type="0"
-
-# Round Robin interval is the longest duration two tasks with the same nice level will be delayed for. When CPU time is requested by a task, it receives a time slice equal
-# to the rr_interval in addition to a virtual deadline. When using yield_type 2, a low value can help offset the disadvantages of rescheduling a process that has yielded.
-# MuQSS default: 6ms"
-# PDS default: 4ms"
-# BMQ default: 2ms"
-# Set to "1" for 2ms, "2" for 4ms, "3" for 6ms, "4" for 8ms, or "default" to keep the chosen scheduler defaults.
-_rr_interval=""
-
-# Set to "true" to disable FUNCTION_TRACER/GRAPH_TRACER, lowering overhead but limiting debugging and analyzing of kernel functions - Kernel default is "false"
-_ftracedisable="false"
-
-# Set to "true" to disable NUMA, lowering overhead, but breaking CUDA/NvEnc on Nvidia equipped systems - Kernel default is "false"
-_numadisable="false"
-
-# Set to "true" to enable misc additions - May contain temporary fixes pending upstream or changes that can break on non-Arch - Kernel default is "true"
-_misc_adds="true"
-
-# Set to "1" to use CattaRappa mode (enabling full tickless), "2" for tickless idle only, or "0" for periodic ticks.
-# Full tickless can give higher performances in various cases but, depending on hardware, lower consistency. Just tickless idle can perform better on some platforms (mostly AMD based).
-_tickless=""
-
-# Setting this to to "true" can improve latency on PDS (at the cost of throughput) and improve throughput on other schedulers (at the cost of latency) - Can improve VMs performance - Kernel default is "false"
-_voluntary_preempt=""
-
-# Set to "true" to enable Device Tree and Open Firmware support. If you don't know about it, you don't need it - Default is "false"
-_OFenable="false"
-
-# Set to "true" to use ACS override patch - https://wiki.archlinux.org/index.php/PCI_passthrough_via_OVMF#Bypassing_the_IOMMU_groups_.28ACS_override_patch.29 - Kernel default is "false"
-_acs_override=""
-
-# Set to "true" to add Bcache filesystem support. You'll have to install bcachefs-tools-git from AUR for utilities - https://bcachefs.org/ - If in doubt, set to "false"
-_bcachefs=""
-
-# Set to "true" to add back missing symbol for AES-NI/AVX support on ZFS - https://github.com/NixOS/nixpkgs/blob/master/pkgs/os-specific/linux/kernel/export_kernel_fpu_functions.patch - Kernel default is "false"
-_zfsfix="true"
-
-# Set to "true" to enable support for fsync, an experimental replacement for esync found in Valve Proton 4.11+ - https://steamcommunity.com/games/221410/announcements/detail/2957094910196249305
-_fsync=""
-
-# A selection of patches from Zen/Liquorix kernel and additional tweaks for a better gaming experience (ZENIFY) - Default is "true"
-_zenify="true"
-
-# compiler optimization level - 1. Optimize for performance (-O2); 2. Optimize harder (-O3); 3. Optimize for size (-Os) - Kernel default is "1"
-_compileroptlevel="1"
-
-# CPU compiler optimizations - Defaults to generic optimizations if left empty
-# AMD CPUs : "k8" "k8sse3" "k10" "barcelona" "bobcat" "jaguar" "bulldozer" "piledriver" "steamroller" "excavator" "zen" "zen2"
-# Intel CPUs : "mpsc"(P4 & older Netburst based Xeon) "atom" "core2" "nehalem" "westmere" "silvermont" "sandybridge" "ivybridge" "haswell" "broadwell" "skylake" "skylakex" "cannonlake" "icelake" "goldmont" "goldmontplus" "cascadelake" "cooperlake" "tigerlake"
-# Other options :
-# - "generic" (to share the package between machines with different CPUs)
-# - "native" (use compiler autodetection and will prompt for P6_NOPS - Selecting your arch manually in the list above is recommended instead of this option)
-_processor_opt=""
-
-# MuQSS only - Make IRQ threading compulsory (FORCE_IRQ_THREADING) - Default is "false"
-_irq_threading="false"
-
-# MuQSS and PDS only - SMT (Hyperthreading) aware nice priority and policy support (SMT_NICE) - Kernel default is "true" - You can disable this on non-SMT/HT CPUs for lower overhead
-_smt_nice=""
-
-# Trust the CPU manufacturer to initialize Linux's CRNG (RANDOM_TRUST_CPU) - Kernel default is "false"
-_random_trust_cpu="false"
-
-# MuQSS only - CPU scheduler runqueue sharing - No sharing (RQ_NONE), SMT (hyperthread) siblings (RQ_SMT), Multicore siblings (RQ_MC), Symmetric Multi-Processing (RQ_SMP), NUMA (RQ_ALL)
-# Valid values are "none", "smt", "mc", "mc-llc"(for zen), "smp", "all" - Kernel default is "smt"
-_runqueue_sharing=""
-
-# Timer frequency - "100" "500", "750" or "1000" - More options available in kernel config prompt when left empty depending on selected cpusched - Kernel default is "500" - For MuQSS, 100Hz is recommended
-_timer_freq=""
-
-# Default CPU governor - "performance", "ondemand", "schedutil" or leave empty for default (schedutil)
-_default_cpu_gov="ondemand"
-
-# Use an aggressive ondemand governor instead of default ondemand to improve performance on low loads/high core count CPUs while keeping some power efficiency from frequency scaling.
-# It still requires you to either set ondemand as default governor or to select it some way.
-_aggressive_ondemand="true"
-
-# On some platforms, an acpi_cpufreq bug affects performance negatively. Set to "true" to disable it as a workaround, but it will use more power.
-# https://github.com/Tk-Glitch/PKGBUILDS/issues/263
-_disable_acpi_cpufreq=""
-
-# You can pass a default set of kernel command line options here - example: "intel_pstate=passive nowatchdog amdgpu.ppfeaturemask=0xfffd7fff mitigations=off"
-_custom_commandline="intel_pstate=passive"
-
-
-#### SPESHUL OPTION ####
-
-# If you want to bypass the stock naming scheme and enforce something else (example : "linux") - Useful for some bootloaders requiring manual entry editing on each release.
-# !!! It will also change pkgname - If you don't explicitely need this, don't use it !!!
-_custom_pkgbase=""
-
-# [non-Arch specific] Kernel localversion. Putting it to "Mario" will make for example the kernel version be 5.7.0-tkg-Mario (given by uname -r)
-# If left empty, it will use -tkg-"${_cpusched}" where "${_cpusched}" will be replaced by the user chosen scheduler
-_kernel_localversion=""
-
-#### USER PATCHES ####
-
-# community patches - add patches (separated by a space) of your choice by name from the community-patches dir
-# example: _community_patches="clear_nack_in_tend_isr.myrevert ffb_regression_fix.mypatch 0008-drm-amd-powerplay-force-the-trim-of-the-mclk-dpm-levels-if-OD-is-enabled.mypatch"
-_community_patches=""
-
-# You can use your own patches by putting them in the same folder as the PKGBUILD and giving them the .mypatch extension.
-# You can also revert patches by putting them in the same folder as the PKGBUILD and giving them the .myrevert extension.
-
-# Also, userpatches variable below must be set to true for the above to work.
-_user_patches="true"
-
-# Apply all user patches without confirmation - !!! NOT RECOMMENDED !!!
-_user_patches_no_confirm="false"
-
-
-#### CONFIG FRAGMENTS ####
-
-# You can use your own kernel config fragments by putting them in the same folder as the PKGBUILD and giving them the .myfrag extension.
-
-# Also, the config fragments variable below must be set to true for the above to work.
-_config_fragments="true"
-
-# Apply all config fragments without confirmation - !!! NOT RECOMMENDED !!!
-_config_fragments_no_confirm="false"
diff --git a/linux59-tkg/install.sh b/linux59-tkg/install.sh
deleted file mode 100755
index c8cfe5b..0000000
--- a/linux59-tkg/install.sh
+++ /dev/null
@@ -1,283 +0,0 @@
-#!/bin/bash
-
-msg2() {
- echo -e " \033[1;34m->\033[1;0m \033[1;1m$1\033[1;0m" >&2
-}
-
-error() {
- echo -e " \033[1;31m==> ERROR: $1\033[1;0m" >&2
-}
-
-warning() {
- echo -e " \033[1;33m==> WARNING: $1\033[1;0m" >&2
-}
-
-plain() {
- echo "$1" >&2
-}
-
-# Stop the script at any ecountered error
-set -e
-
-_where=`pwd`
-srcdir="$_where"
-
-source linux*-tkg-config/prepare
-
-_cpu_opt_patch_link="https://raw.githubusercontent.com/graysky2/kernel_gcc_patch/master/enable_additional_cpu_optimizations_for_gcc_v10.1%2B_kernel_v5.8%2B.patch"
-
-source customization.cfg
-
-if [ "$1" != "install" ] && [ "$1" != "config" ] && [ "$1" != "uninstall-help" ]; then
-  msg2 "Argument not recognised, options are:
-        - config : shallow clones the linux ${_basekernel}.x git tree into the folder linux-${_basekernel}, then applies on it the extra patches and prepares the .config file 
-                   by copying the one from the current linux system in /boot/config-`uname -r` and updates it. 
-        - install : [RPM and DEB based distros only], does the config step, proceeds to compile, then prompts to install
-        - uninstall-help : [RPM and DEB based distros only], lists the installed kernels in this system, then gives a hint on how to uninstall them manually."
-  exit 0
-fi
-
-# Load external configuration file if present. Available variable values will overwrite customization.cfg ones.
-if [ -e "$_EXT_CONFIG_PATH" ]; then
-  msg2 "External configuration file $_EXT_CONFIG_PATH will be used and will override customization.cfg values."
-  source "$_EXT_CONFIG_PATH"
-fi
-
-_misc_adds="false" # We currently don't want this enabled on non-Arch
-
-if [ "$1" = "install" ] || [ "$1" = "config" ]; then
-
-  if [ -z $_distro ] && [ "$1" = "install" ]; then
-    while true; do
-      echo "Which linux distribution are you running ?"
-      echo "if it's not on the list, chose the closest one to it: Fedora/Suse for RPM, Ubuntu/Debian for DEB"
-      echo "   1) Debian"
-      echo "   2) Fedora"
-      echo "   3) Suse"
-      echo "   4) Ubuntu"
-      read -p "[1-4]: " _distro_index
-
-      if [ "$_distro_index" = "1" ]; then
-        _distro="Debian"
-        break
-      elif [ "$_distro_index" = "2" ]; then
-        _distro="Fedora"
-        break
-      elif [ "$_distro_index" = "3" ]; then
-        _distro="Suse"
-        break
-      elif [ "$_distro_index" = "4" ]; then
-        _distro="Ubuntu"
-        break
-      else
-        echo "Wrong index."
-      fi
-    done
-  fi
-
-  if [[ $1 = "install" && "$_distro" != "Ubuntu" && "$_distro" != "Debian" &&  "$_distro" != "Fedora" && "$_distro" != "Suse" ]]; then 
-    msg2 "Variable \"_distro\" in \"customization.cfg\" hasn't been set to \"Ubuntu\", \"Debian\",  \"Fedora\" or \"Suse\""
-    msg2 "This script can only install custom kernels for RPM and DEB based distros, though only those keywords are permitted. Exiting..."
-    exit 0
-  fi
-
-  if [ "$_distro" = "Ubuntu" ] || [ "$_distro" = "Debian" ]; then
-    msg2 "Installing dependencies"
-    sudo apt install git build-essential kernel-package fakeroot libncurses5-dev libssl-dev ccache bison flex qtbase5-dev -y
-  elif [ "$_distro" = "Fedora" ]; then
-    msg2 "Installing dependencies"
-    sudo dnf install fedpkg fedora-packager rpmdevtools ncurses-devel pesign grubby qt5-devel libXi-devel gcc-c++ git ccache flex bison elfutils-libelf-devel openssl-devel dwarves rpm-build -y
-  elif [ "$_distro" = "Suse" ]; then
-    msg2 "Installing dependencies"
-    sudo zypper install -y rpmdevtools ncurses-devel pesign libXi-devel gcc-c++ git ccache flex bison elfutils libelf-devel openssl-devel dwarves make patch bc rpm-build libqt5-qtbase-common-devel libqt5-qtbase-devel lz4
-  fi
-
-  # Force prepare script to avoid Arch specific commands if the user is using `config`
-  if [ "$1" = "config" ]; then
-    _distro=""
-  fi
-
-  if [ -d linux-${_basekernel}.orig ]; then
-    rm -rf linux-${_basekernel}.orig
-  fi
-
-  if [ -d linux-${_basekernel} ]; then
-    msg2 "Reseting files in linux-$_basekernel to their original state and getting latest updates"
-    cd "$_where"/linux-${_basekernel}
-    git checkout --force linux-$_basekernel.y
-    git clean -f -d -x
-    git pull
-    msg2 "Done" 
-    cd "$_where"
-  else
-    msg2 "Shallow git cloning linux $_basekernel"
-    git clone --branch linux-$_basekernel.y --single-branch --depth=1 https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git linux-${_basekernel}
-    msg2 "Done"
-  fi
-
-  # Define current kernel subversion
-  if [ -z $_kernel_subver ]; then
-    cd "$_where"/linux-${_basekernel}
-    _kernelverstr=`git describe`
-    _kernel_subver=${_kernelverstr:5}
-    cd "$_where"
-  fi
-
-
-  # Run init script that is also run in PKGBUILD, it will define some env vars that we will use
-  _tkg_initscript
-
-  cd "$_where"
-  msg2 "Downloading Graysky2's CPU optimisations patch"
-  wget "$_cpu_opt_patch_link"
-
-  # Follow Ubuntu install isntructions in https://wiki.ubuntu.com/KernelTeam/GitKernelBuild
-
-  # cd in linux folder, copy Ubuntu's current config file, update with new params
-  cd "$_where"/linux-${_basekernel}
-
-  msg2 "Copying current kernel's config and running make oldconfig..."
-  cp /boot/config-`uname -r` .config
-  if [ "$_distro" = "Debian" ]; then #Help Debian cert problem.
-    sed -i -e 's#CONFIG_SYSTEM_TRUSTED_KEYS="debian/certs/test-signing-certs.pem"#CONFIG_SYSTEM_TRUSTED_KEYS=""#g' .config
-    sed -i -e 's#CONFIG_SYSTEM_TRUSTED_KEYS="debian/certs/debian-uefi-certs.pem"#CONFIG_SYSTEM_TRUSTED_KEYS=""#g' .config
-  fi
-  yes '' | make oldconfig
-  msg2 "Done"
-
-  # apply linux-tkg patching script
-  _tkg_srcprep
-
-  msg2 "Configuration done."
-fi
-
-if [ "$1" = "install" ]; then
-
-  # Use custom compiler paths if defined
-  if [ -n "${CUSTOM_GCC_PATH}" ]; then
-    PATH=${CUSTOM_GCC_PATH}/bin:${CUSTOM_GCC_PATH}/lib:${CUSTOM_GCC_PATH}/include:${PATH}
-  fi
-
-  if [ "$_force_all_threads" = "true" ]; then
-    _thread_num=`nproc`
-  else
-    _thread_num=`expr \`nproc\` / 4`
-    if [ "$_thread_num" = "0" ]; then
-      _thread_num=1
-    fi
-  fi
-
-  # ccache
-  if [ "$_noccache" != "true" ]; then
-
-    if [ "$_distro" = "Ubuntu" ] || [ "$_distro" = "Debian" ]; then
-      export PATH="/usr/lib/ccache/bin/:$PATH"
-    elif [ "$_distro" = "Fedora" ] || [ "$_distro" = "Suse" ]; then
-      export PATH="/usr/lib64/ccache/:$PATH" 
-    fi
-
-    export CCACHE_SLOPPINESS="file_macro,locale,time_macros"
-    export CCACHE_NOHASHDIR="true"
-    msg2 'ccache was found and will be used'
-
-  fi
-
-  if [ -z $_kernel_localversion ]; then
-    _kernel_flavor="tkg-${_cpusched}"
-  else
-    _kernel_flavor="tkg-${_kernel_localversion}"
-  fi
-
-  if [ "$_distro" = "Ubuntu" ]  || [ "$_distro" = "Debian" ]; then
-
-    if make -j ${_thread_num} deb-pkg LOCALVERSION=-${_kernel_flavor}; then
-      msg2 "Building successfully finished!"
-
-      cd "$_where"
-
-      # Create DEBS folder if it doesn't exist
-      mkdir -p DEBS
-      
-      # Move rpm files to RPMS folder inside the linux-tkg folder
-      mv "$_where"/*.deb "$_where"/DEBS/
-
-      read -p "Do you want to install the new Kernel ? y/[n]: " _install
-      if [[ $_install =~ [yY] ]] || [ $_install = "yes" ] || [ $_install = "Yes" ]; then
-        cd "$_where"
-        _kernelname=$_basekernel.$_kernel_subver-$_kernel_flavor
-        _headers_deb="linux-headers-${_kernelname}*.deb"
-        _image_deb="linux-image-${_kernelname}_*.deb"
-        _kernel_devel_deb="linux-libc-dev_${_kernelname}*.deb"
-        
-        cd DEBS
-        sudo dpkg -i $_headers_deb $_image_deb $_kernel_devel_deb
-      fi
-    fi
-
-  elif [[ "$_distro" = "Fedora" ||  "$_distro" = "Suse" ]]; then
-
-    # Replace dashes with underscores, it seems that it's being done by binrpm-pkg
-    # Se we can actually refer properly to the rpm files.
-    _kernel_flavor=${_kernel_flavor//-/_}
-
-    if make -j ${_thread_num} rpm-pkg EXTRAVERSION="_${_kernel_flavor}"; then
-      msg2 "Building successfully finished!"
-
-      cd "$_where"
-
-      # Create RPMS folder if it doesn't exist
-      mkdir -p RPMS
-      
-      # Move rpm files to RPMS folder inside the linux-tkg folder
-      mv ~/rpmbuild/RPMS/x86_64/* "$_where"/RPMS/
-
-      #Clean up the original folder, unneeded and takes a lot of space
-      rm -rf ~/rpmbuild/
-
-      read -p "Do you want to install the new Kernel ? y/[n]: " _install
-      if [ "$_install" = "y" ] || [ "$_install" = "Y" ] || [ "$_install" = "yes" ] || [ "$_install" = "Yes" ]; then
-        
-        _kernelname=$_basekernel.${_kernel_subver}_$_kernel_flavor
-        _headers_rpm="kernel-headers-${_kernelname}*.rpm"
-        _kernel_rpm="kernel-${_kernelname}*.rpm"
-        _kernel_devel_rpm="kernel-devel-${_kernelname}*.rpm"
-        
-        cd RPMS
-        if [ "$_distro" = "Fedora" ]; then
-          sudo dnf install $_headers_rpm $_kernel_rpm $_kernel_devel_rpm
-        elif [ "$_distro" = "Suse" ]; then
-          msg2 "Some files from 'linux-glibc-devel' will be replaced by files from the custom kernel-hearders package"
-          msg2 "To revert back to the original kernel headers do 'sudo zypper install -f linux-glibc-devel'" 
-          sudo zypper install --replacefiles --allow-unsigned-rpm $_headers_rpm $_kernel_rpm $_kernel_devel_rpm
-        fi
-        
-        msg2 "Install successful" 
-      fi
-    fi
-  fi
-fi
-
-if [ "$1" = "uninstall-help" ]; then
-
-  cd "$_where"
-  msg2 "List of installed custom tkg kernels: "
-
-  if [ "$_distro" = "Ubuntu" ]; then
-    dpkg -l "*tkg*" | grep "linux.*tkg"
-    dpkg -l "*linux-libc-dev*" | grep "linux.*tkg"
-    msg2 "To uninstall a version, you should remove the linux-image, linux-headers and linux-libc-dev associated to it (if installed), with: "
-    msg2 "      sudo apt remove linux-image-VERSION linux-headers-VERSION linux-libc-dev-VERSION"
-    msg2 "       where VERSION is displayed in the lists above, uninstall only versions that have \"tkg\" in its name"
-  elif [ "$_distro" = "Fedora" ]; then
-    dnf list --installed kernel*
-    msg2 "To uninstall a version, you should remove the kernel, kernel-headers and kernel-devel associated to it (if installed), with: "
-    msg2 "      sudo dnf remove --noautoremove kernel-VERSION kernel-devel-VERSION kernel-headers-VERSION"
-    msg2 "       where VERSION is displayed in the second column"
-  elif [ "$_distro" = "Suse" ]; then
-    zypper packages --installed-only | grep "kernel.*tkg"
-    msg2 "To uninstall a version, you should remove the kernel, kernel-headers and kernel-devel associated to it (if installed), with: "
-    msg2 "      sudo zypper remove --no-clean-deps kernel-VERSION kernel-devel-VERSION kernel-headers-VERSION"
-    msg2 "       where VERSION is displayed in the second to last column"
-  fi
-
-fi
diff --git a/linux59-tkg/linux59-tkg-config/90-cleanup.hook b/linux59-tkg/linux59-tkg-config/90-cleanup.hook
deleted file mode 100644
index 99f5221..0000000
--- a/linux59-tkg/linux59-tkg-config/90-cleanup.hook
+++ /dev/null
@@ -1,14 +0,0 @@
-[Trigger]
-Type = File
-Operation = Install
-Operation = Upgrade
-Operation = Remove
-Target = usr/lib/modules/*/
-Target = !usr/lib/modules/*/?*
-
-[Action]
-Description = Cleaning up...
-When = PostTransaction
-Exec = /usr/share/libalpm/scripts/cleanup
-NeedsTargets
- 
diff --git a/linux59-tkg/linux59-tkg-config/cleanup b/linux59-tkg/linux59-tkg-config/cleanup
deleted file mode 100755
index c00c08d..0000000
--- a/linux59-tkg/linux59-tkg-config/cleanup
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/bash
-
-for _f in /usr/lib/modules/*tkg*; do
-  if [[ ! -e ${_f}/vmlinuz ]]; then
-    rm -rf "$_f"
-  fi
-done
-
-# vim:set ft=sh sw=2 et:
- 
diff --git a/linux59-tkg/linux59-tkg-config/config.x86_64 b/linux59-tkg/linux59-tkg-config/config.x86_64
deleted file mode 100644
index 9524eeb..0000000
--- a/linux59-tkg/linux59-tkg-config/config.x86_64
+++ /dev/null
@@ -1,11076 +0,0 @@
-#
-# Automatically generated file; DO NOT EDIT.
-# Linux/x86 5.9.0 Kernel Configuration
-#
-CONFIG_CC_VERSION_TEXT="gcc (GCC) 10.2.0"
-CONFIG_CC_IS_GCC=y
-CONFIG_GCC_VERSION=100200
-CONFIG_LD_VERSION=235000000
-CONFIG_CLANG_VERSION=0
-CONFIG_CC_CAN_LINK=y
-CONFIG_CC_CAN_LINK_STATIC=y
-CONFIG_CC_HAS_ASM_GOTO=y
-CONFIG_CC_HAS_ASM_INLINE=y
-CONFIG_IRQ_WORK=y
-CONFIG_BUILDTIME_TABLE_SORT=y
-CONFIG_THREAD_INFO_IN_TASK=y
-
-#
-# General setup
-#
-CONFIG_INIT_ENV_ARG_LIMIT=32
-# CONFIG_COMPILE_TEST is not set
-CONFIG_LOCALVERSION=""
-CONFIG_LOCALVERSION_AUTO=y
-CONFIG_BUILD_SALT=""
-CONFIG_HAVE_KERNEL_GZIP=y
-CONFIG_HAVE_KERNEL_BZIP2=y
-CONFIG_HAVE_KERNEL_LZMA=y
-CONFIG_HAVE_KERNEL_XZ=y
-CONFIG_HAVE_KERNEL_LZO=y
-CONFIG_HAVE_KERNEL_LZ4=y
-CONFIG_HAVE_KERNEL_ZSTD=y
-# CONFIG_KERNEL_GZIP is not set
-# CONFIG_KERNEL_BZIP2 is not set
-# CONFIG_KERNEL_LZMA is not set
-CONFIG_KERNEL_XZ=y
-# CONFIG_KERNEL_LZO is not set
-# CONFIG_KERNEL_LZ4 is not set
-# CONFIG_KERNEL_ZSTD is not set
-CONFIG_DEFAULT_INIT=""
-CONFIG_DEFAULT_HOSTNAME="archlinux"
-CONFIG_SWAP=y
-CONFIG_SYSVIPC=y
-CONFIG_SYSVIPC_SYSCTL=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_POSIX_MQUEUE_SYSCTL=y
-CONFIG_WATCH_QUEUE=y
-CONFIG_CROSS_MEMORY_ATTACH=y
-# CONFIG_USELIB is not set
-CONFIG_AUDIT=y
-CONFIG_HAVE_ARCH_AUDITSYSCALL=y
-CONFIG_AUDITSYSCALL=y
-
-#
-# IRQ subsystem
-#
-CONFIG_GENERIC_IRQ_PROBE=y
-CONFIG_GENERIC_IRQ_SHOW=y
-CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK=y
-CONFIG_GENERIC_PENDING_IRQ=y
-CONFIG_GENERIC_IRQ_MIGRATION=y
-CONFIG_HARDIRQS_SW_RESEND=y
-CONFIG_GENERIC_IRQ_CHIP=y
-CONFIG_IRQ_DOMAIN=y
-CONFIG_IRQ_SIM=y
-CONFIG_IRQ_DOMAIN_HIERARCHY=y
-CONFIG_GENERIC_MSI_IRQ=y
-CONFIG_GENERIC_MSI_IRQ_DOMAIN=y
-CONFIG_IRQ_MSI_IOMMU=y
-CONFIG_GENERIC_IRQ_MATRIX_ALLOCATOR=y
-CONFIG_GENERIC_IRQ_RESERVATION_MODE=y
-CONFIG_IRQ_FORCED_THREADING=y
-CONFIG_SPARSE_IRQ=y
-# CONFIG_GENERIC_IRQ_DEBUGFS is not set
-# end of IRQ subsystem
-
-CONFIG_CLOCKSOURCE_WATCHDOG=y
-CONFIG_ARCH_CLOCKSOURCE_INIT=y
-CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE=y
-CONFIG_GENERIC_TIME_VSYSCALL=y
-CONFIG_GENERIC_CLOCKEVENTS=y
-CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y
-CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST=y
-CONFIG_GENERIC_CMOS_UPDATE=y
-CONFIG_HAVE_POSIX_CPU_TIMERS_TASK_WORK=y
-CONFIG_POSIX_CPU_TIMERS_TASK_WORK=y
-
-#
-# Timers subsystem
-#
-CONFIG_TICK_ONESHOT=y
-CONFIG_NO_HZ_COMMON=y
-# CONFIG_HZ_PERIODIC is not set
-CONFIG_NO_HZ_IDLE=y
-# CONFIG_NO_HZ_FULL is not set
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-# end of Timers subsystem
-
-# CONFIG_PREEMPT_NONE is not set
-# CONFIG_PREEMPT_VOLUNTARY is not set
-CONFIG_PREEMPT=y
-CONFIG_PREEMPT_COUNT=y
-CONFIG_PREEMPTION=y
-
-#
-# CPU/Task time and stats accounting
-#
-CONFIG_TICK_CPU_ACCOUNTING=y
-# CONFIG_VIRT_CPU_ACCOUNTING_GEN is not set
-CONFIG_IRQ_TIME_ACCOUNTING=y
-CONFIG_HAVE_SCHED_AVG_IRQ=y
-# CONFIG_SCHED_THERMAL_PRESSURE is not set
-CONFIG_BSD_PROCESS_ACCT=y
-CONFIG_BSD_PROCESS_ACCT_V3=y
-CONFIG_TASKSTATS=y
-CONFIG_TASK_DELAY_ACCT=y
-CONFIG_TASK_XACCT=y
-CONFIG_TASK_IO_ACCOUNTING=y
-CONFIG_PSI=y
-# CONFIG_PSI_DEFAULT_DISABLED is not set
-# end of CPU/Task time and stats accounting
-
-CONFIG_CPU_ISOLATION=y
-
-#
-# RCU Subsystem
-#
-CONFIG_TREE_RCU=y
-CONFIG_PREEMPT_RCU=y
-CONFIG_RCU_EXPERT=y
-CONFIG_SRCU=y
-CONFIG_TREE_SRCU=y
-CONFIG_TASKS_RCU_GENERIC=y
-CONFIG_TASKS_RCU=y
-CONFIG_TASKS_RUDE_RCU=y
-CONFIG_RCU_STALL_COMMON=y
-CONFIG_RCU_NEED_SEGCBLIST=y
-CONFIG_RCU_FANOUT=64
-CONFIG_RCU_FANOUT_LEAF=16
-CONFIG_RCU_FAST_NO_HZ=y
-CONFIG_RCU_BOOST=y
-CONFIG_RCU_BOOST_DELAY=500
-# CONFIG_RCU_NOCB_CPU is not set
-# CONFIG_TASKS_TRACE_RCU_READ_MB is not set
-# end of RCU Subsystem
-
-CONFIG_BUILD_BIN2C=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-# CONFIG_IKHEADERS is not set
-CONFIG_LOG_BUF_SHIFT=17
-CONFIG_LOG_CPU_MAX_BUF_SHIFT=12
-CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=13
-CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y
-
-#
-# Scheduler features
-#
-CONFIG_UCLAMP_TASK=y
-CONFIG_UCLAMP_BUCKETS_COUNT=5
-# end of Scheduler features
-
-CONFIG_ARCH_SUPPORTS_NUMA_BALANCING=y
-CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH=y
-CONFIG_CC_HAS_INT128=y
-CONFIG_ARCH_SUPPORTS_INT128=y
-CONFIG_NUMA_BALANCING=y
-CONFIG_NUMA_BALANCING_DEFAULT_ENABLED=y
-CONFIG_CGROUPS=y
-CONFIG_PAGE_COUNTER=y
-CONFIG_MEMCG=y
-CONFIG_MEMCG_SWAP=y
-CONFIG_MEMCG_KMEM=y
-CONFIG_BLK_CGROUP=y
-CONFIG_CGROUP_WRITEBACK=y
-CONFIG_CGROUP_SCHED=y
-CONFIG_FAIR_GROUP_SCHED=y
-CONFIG_CFS_BANDWIDTH=y
-# CONFIG_RT_GROUP_SCHED is not set
-CONFIG_UCLAMP_TASK_GROUP=y
-CONFIG_CGROUP_PIDS=y
-CONFIG_CGROUP_RDMA=y
-CONFIG_CGROUP_FREEZER=y
-CONFIG_CGROUP_HUGETLB=y
-CONFIG_CPUSETS=y
-CONFIG_PROC_PID_CPUSET=y
-CONFIG_CGROUP_DEVICE=y
-CONFIG_CGROUP_CPUACCT=y
-CONFIG_CGROUP_PERF=y
-CONFIG_CGROUP_BPF=y
-# CONFIG_CGROUP_DEBUG is not set
-CONFIG_SOCK_CGROUP_DATA=y
-CONFIG_NAMESPACES=y
-CONFIG_UTS_NS=y
-CONFIG_TIME_NS=y
-CONFIG_IPC_NS=y
-CONFIG_USER_NS=y
-CONFIG_USER_NS_UNPRIVILEGED=y
-CONFIG_PID_NS=y
-CONFIG_NET_NS=y
-CONFIG_CHECKPOINT_RESTORE=y
-CONFIG_SCHED_AUTOGROUP=y
-# CONFIG_SYSFS_DEPRECATED is not set
-CONFIG_RELAY=y
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE=""
-CONFIG_RD_GZIP=y
-CONFIG_RD_BZIP2=y
-CONFIG_RD_LZMA=y
-CONFIG_RD_XZ=y
-CONFIG_RD_LZO=y
-CONFIG_RD_LZ4=y
-CONFIG_RD_ZSTD=y
-CONFIG_BOOT_CONFIG=y
-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-CONFIG_SYSCTL=y
-CONFIG_HAVE_UID16=y
-CONFIG_SYSCTL_EXCEPTION_TRACE=y
-CONFIG_HAVE_PCSPKR_PLATFORM=y
-CONFIG_BPF=y
-CONFIG_EXPERT=y
-# CONFIG_UID16 is not set
-CONFIG_MULTIUSER=y
-CONFIG_SGETMASK_SYSCALL=y
-# CONFIG_SYSFS_SYSCALL is not set
-CONFIG_FHANDLE=y
-CONFIG_POSIX_TIMERS=y
-CONFIG_PRINTK=y
-CONFIG_PRINTK_NMI=y
-CONFIG_BUG=y
-CONFIG_ELF_CORE=y
-CONFIG_PCSPKR_PLATFORM=y
-CONFIG_BASE_FULL=y
-CONFIG_FUTEX=y
-CONFIG_FUTEX_PI=y
-CONFIG_EPOLL=y
-CONFIG_SIGNALFD=y
-CONFIG_TIMERFD=y
-CONFIG_EVENTFD=y
-CONFIG_SHMEM=y
-CONFIG_AIO=y
-CONFIG_IO_URING=y
-CONFIG_ADVISE_SYSCALLS=y
-CONFIG_HAVE_ARCH_USERFAULTFD_WP=y
-CONFIG_MEMBARRIER=y
-CONFIG_KALLSYMS=y
-CONFIG_KALLSYMS_ALL=y
-CONFIG_KALLSYMS_ABSOLUTE_PERCPU=y
-CONFIG_KALLSYMS_BASE_RELATIVE=y
-CONFIG_BPF_LSM=y
-CONFIG_BPF_SYSCALL=y
-CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y
-CONFIG_BPF_JIT_ALWAYS_ON=y
-CONFIG_BPF_JIT_DEFAULT_ON=y
-CONFIG_USERFAULTFD=y
-CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE=y
-CONFIG_RSEQ=y
-# CONFIG_DEBUG_RSEQ is not set
-# CONFIG_EMBEDDED is not set
-CONFIG_HAVE_PERF_EVENTS=y
-# CONFIG_PC104 is not set
-
-#
-# Kernel Performance Events And Counters
-#
-CONFIG_PERF_EVENTS=y
-# CONFIG_DEBUG_PERF_USE_VMALLOC is not set
-# end of Kernel Performance Events And Counters
-
-CONFIG_VM_EVENT_COUNTERS=y
-CONFIG_SLUB_DEBUG=y
-# CONFIG_SLUB_MEMCG_SYSFS_ON is not set
-# CONFIG_COMPAT_BRK is not set
-# CONFIG_SLAB is not set
-CONFIG_SLUB=y
-# CONFIG_SLOB is not set
-CONFIG_SLAB_MERGE_DEFAULT=y
-CONFIG_SLAB_FREELIST_RANDOM=y
-CONFIG_SLAB_FREELIST_HARDENED=y
-CONFIG_SHUFFLE_PAGE_ALLOCATOR=y
-CONFIG_SLUB_CPU_PARTIAL=y
-CONFIG_SYSTEM_DATA_VERIFICATION=y
-CONFIG_PROFILING=y
-CONFIG_TRACEPOINTS=y
-# end of General setup
-
-CONFIG_64BIT=y
-CONFIG_X86_64=y
-CONFIG_X86=y
-CONFIG_INSTRUCTION_DECODER=y
-CONFIG_OUTPUT_FORMAT="elf64-x86-64"
-CONFIG_LOCKDEP_SUPPORT=y
-CONFIG_STACKTRACE_SUPPORT=y
-CONFIG_MMU=y
-CONFIG_ARCH_MMAP_RND_BITS_MIN=28
-CONFIG_ARCH_MMAP_RND_BITS_MAX=32
-CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN=8
-CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX=16
-CONFIG_GENERIC_ISA_DMA=y
-CONFIG_GENERIC_BUG=y
-CONFIG_GENERIC_BUG_RELATIVE_POINTERS=y
-CONFIG_ARCH_MAY_HAVE_PC_FDC=y
-CONFIG_GENERIC_CALIBRATE_DELAY=y
-CONFIG_ARCH_HAS_CPU_RELAX=y
-CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
-CONFIG_ARCH_HAS_FILTER_PGPROT=y
-CONFIG_HAVE_SETUP_PER_CPU_AREA=y
-CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y
-CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y
-CONFIG_ARCH_HIBERNATION_POSSIBLE=y
-CONFIG_ARCH_SUSPEND_POSSIBLE=y
-CONFIG_ARCH_WANT_GENERAL_HUGETLB=y
-CONFIG_ZONE_DMA32=y
-CONFIG_AUDIT_ARCH=y
-CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y
-CONFIG_HAVE_INTEL_TXT=y
-CONFIG_X86_64_SMP=y
-CONFIG_ARCH_SUPPORTS_UPROBES=y
-CONFIG_FIX_EARLYCON_MEM=y
-CONFIG_DYNAMIC_PHYSICAL_MASK=y
-CONFIG_PGTABLE_LEVELS=5
-CONFIG_CC_HAS_SANE_STACKPROTECTOR=y
-
-#
-# Processor type and features
-#
-CONFIG_ZONE_DMA=y
-CONFIG_SMP=y
-CONFIG_X86_FEATURE_NAMES=y
-CONFIG_X86_X2APIC=y
-CONFIG_X86_MPPARSE=y
-# CONFIG_GOLDFISH is not set
-CONFIG_RETPOLINE=y
-CONFIG_X86_CPU_RESCTRL=y
-# CONFIG_X86_EXTENDED_PLATFORM is not set
-CONFIG_X86_INTEL_LPSS=y
-CONFIG_X86_AMD_PLATFORM_DEVICE=y
-CONFIG_IOSF_MBI=y
-# CONFIG_IOSF_MBI_DEBUG is not set
-CONFIG_X86_SUPPORTS_MEMORY_FAILURE=y
-CONFIG_SCHED_OMIT_FRAME_POINTER=y
-CONFIG_HYPERVISOR_GUEST=y
-CONFIG_PARAVIRT=y
-CONFIG_PARAVIRT_XXL=y
-# CONFIG_PARAVIRT_DEBUG is not set
-CONFIG_PARAVIRT_SPINLOCKS=y
-CONFIG_X86_HV_CALLBACK_VECTOR=y
-CONFIG_XEN=y
-CONFIG_XEN_PV=y
-CONFIG_XEN_PV_SMP=y
-CONFIG_XEN_DOM0=y
-CONFIG_XEN_PVHVM=y
-CONFIG_XEN_PVHVM_SMP=y
-CONFIG_XEN_512GB=y
-CONFIG_XEN_SAVE_RESTORE=y
-# CONFIG_XEN_DEBUG_FS is not set
-CONFIG_XEN_PVH=y
-CONFIG_KVM_GUEST=y
-CONFIG_ARCH_CPUIDLE_HALTPOLL=y
-CONFIG_PVH=y
-CONFIG_PARAVIRT_TIME_ACCOUNTING=y
-CONFIG_PARAVIRT_CLOCK=y
-CONFIG_JAILHOUSE_GUEST=y
-CONFIG_ACRN_GUEST=y
-# CONFIG_MK8 is not set
-# CONFIG_MPSC is not set
-# CONFIG_MCORE2 is not set
-# CONFIG_MATOM is not set
-CONFIG_GENERIC_CPU=y
-CONFIG_X86_INTERNODE_CACHE_SHIFT=6
-CONFIG_X86_L1_CACHE_SHIFT=6
-CONFIG_X86_TSC=y
-CONFIG_X86_CMPXCHG64=y
-CONFIG_X86_CMOV=y
-CONFIG_X86_MINIMUM_CPU_FAMILY=64
-CONFIG_X86_DEBUGCTLMSR=y
-CONFIG_IA32_FEAT_CTL=y
-CONFIG_X86_VMX_FEATURE_NAMES=y
-CONFIG_PROCESSOR_SELECT=y
-CONFIG_CPU_SUP_INTEL=y
-CONFIG_CPU_SUP_AMD=y
-CONFIG_CPU_SUP_HYGON=y
-CONFIG_CPU_SUP_CENTAUR=y
-CONFIG_CPU_SUP_ZHAOXIN=y
-CONFIG_HPET_TIMER=y
-CONFIG_HPET_EMULATE_RTC=y
-CONFIG_DMI=y
-CONFIG_GART_IOMMU=y
-# CONFIG_MAXSMP is not set
-CONFIG_NR_CPUS_RANGE_BEGIN=2
-CONFIG_NR_CPUS_RANGE_END=512
-CONFIG_NR_CPUS_DEFAULT=64
-CONFIG_NR_CPUS=320
-CONFIG_SCHED_SMT=y
-CONFIG_SCHED_MC=y
-CONFIG_SCHED_MC_PRIO=y
-CONFIG_X86_LOCAL_APIC=y
-CONFIG_X86_IO_APIC=y
-CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
-CONFIG_X86_MCE=y
-# CONFIG_X86_MCELOG_LEGACY is not set
-CONFIG_X86_MCE_INTEL=y
-CONFIG_X86_MCE_AMD=y
-CONFIG_X86_MCE_THRESHOLD=y
-CONFIG_X86_MCE_INJECT=m
-CONFIG_X86_THERMAL_VECTOR=y
-
-#
-# Performance monitoring
-#
-CONFIG_PERF_EVENTS_INTEL_UNCORE=m
-CONFIG_PERF_EVENTS_INTEL_RAPL=m
-CONFIG_PERF_EVENTS_INTEL_CSTATE=m
-CONFIG_PERF_EVENTS_AMD_POWER=m
-# end of Performance monitoring
-
-CONFIG_X86_16BIT=y
-CONFIG_X86_ESPFIX64=y
-CONFIG_X86_VSYSCALL_EMULATION=y
-CONFIG_X86_IOPL_IOPERM=y
-CONFIG_I8K=m
-CONFIG_MICROCODE=y
-CONFIG_MICROCODE_INTEL=y
-CONFIG_MICROCODE_AMD=y
-CONFIG_MICROCODE_OLD_INTERFACE=y
-CONFIG_X86_MSR=m
-CONFIG_X86_CPUID=m
-CONFIG_X86_5LEVEL=y
-CONFIG_X86_DIRECT_GBPAGES=y
-# CONFIG_X86_CPA_STATISTICS is not set
-CONFIG_AMD_MEM_ENCRYPT=y
-# CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT is not set
-CONFIG_NUMA=y
-CONFIG_AMD_NUMA=y
-CONFIG_X86_64_ACPI_NUMA=y
-# CONFIG_NUMA_EMU is not set
-CONFIG_NODES_SHIFT=5
-CONFIG_ARCH_SPARSEMEM_ENABLE=y
-CONFIG_ARCH_SPARSEMEM_DEFAULT=y
-CONFIG_ARCH_SELECT_MEMORY_MODEL=y
-CONFIG_ARCH_MEMORY_PROBE=y
-CONFIG_ARCH_PROC_KCORE_TEXT=y
-CONFIG_ILLEGAL_POINTER_VALUE=0xdead000000000000
-CONFIG_X86_PMEM_LEGACY_DEVICE=y
-CONFIG_X86_PMEM_LEGACY=m
-CONFIG_X86_CHECK_BIOS_CORRUPTION=y
-CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK=y
-CONFIG_X86_RESERVE_LOW=64
-CONFIG_MTRR=y
-CONFIG_MTRR_SANITIZER=y
-CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT=1
-CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT=0
-CONFIG_X86_PAT=y
-CONFIG_ARCH_USES_PG_UNCACHED=y
-CONFIG_ARCH_RANDOM=y
-CONFIG_X86_SMAP=y
-CONFIG_X86_UMIP=y
-CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS=y
-# CONFIG_X86_INTEL_TSX_MODE_OFF is not set
-# CONFIG_X86_INTEL_TSX_MODE_ON is not set
-CONFIG_X86_INTEL_TSX_MODE_AUTO=y
-CONFIG_EFI=y
-CONFIG_EFI_STUB=y
-CONFIG_EFI_MIXED=y
-CONFIG_SECCOMP=y
-# CONFIG_HZ_100 is not set
-# CONFIG_HZ_250 is not set
-CONFIG_HZ_300=y
-# CONFIG_HZ_1000 is not set
-CONFIG_HZ=300
-CONFIG_SCHED_HRTICK=y
-CONFIG_KEXEC=y
-CONFIG_KEXEC_FILE=y
-CONFIG_ARCH_HAS_KEXEC_PURGATORY=y
-# CONFIG_KEXEC_SIG is not set
-CONFIG_CRASH_DUMP=y
-CONFIG_KEXEC_JUMP=y
-CONFIG_PHYSICAL_START=0x1000000
-CONFIG_RELOCATABLE=y
-CONFIG_RANDOMIZE_BASE=y
-CONFIG_X86_NEED_RELOCS=y
-CONFIG_PHYSICAL_ALIGN=0x200000
-CONFIG_DYNAMIC_MEMORY_LAYOUT=y
-CONFIG_RANDOMIZE_MEMORY=y
-CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING=0x1
-CONFIG_HOTPLUG_CPU=y
-# CONFIG_BOOTPARAM_HOTPLUG_CPU0 is not set
-# CONFIG_DEBUG_HOTPLUG_CPU0 is not set
-# CONFIG_COMPAT_VDSO is not set
-# CONFIG_LEGACY_VSYSCALL_EMULATE is not set
-CONFIG_LEGACY_VSYSCALL_XONLY=y
-# CONFIG_LEGACY_VSYSCALL_NONE is not set
-# CONFIG_CMDLINE_BOOL is not set
-CONFIG_MODIFY_LDT_SYSCALL=y
-CONFIG_HAVE_LIVEPATCH=y
-# CONFIG_LIVEPATCH is not set
-# end of Processor type and features
-
-CONFIG_ARCH_HAS_ADD_PAGES=y
-CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
-CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y
-CONFIG_USE_PERCPU_NUMA_NODE_ID=y
-CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK=y
-CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION=y
-CONFIG_ARCH_ENABLE_THP_MIGRATION=y
-
-#
-# Power management and ACPI options
-#
-CONFIG_ARCH_HIBERNATION_HEADER=y
-CONFIG_SUSPEND=y
-CONFIG_SUSPEND_FREEZER=y
-# CONFIG_SUSPEND_SKIP_SYNC is not set
-CONFIG_HIBERNATE_CALLBACKS=y
-CONFIG_HIBERNATION=y
-CONFIG_HIBERNATION_SNAPSHOT_DEV=y
-CONFIG_PM_STD_PARTITION=""
-CONFIG_PM_SLEEP=y
-CONFIG_PM_SLEEP_SMP=y
-CONFIG_PM_AUTOSLEEP=y
-CONFIG_PM_WAKELOCKS=y
-CONFIG_PM_WAKELOCKS_LIMIT=100
-CONFIG_PM_WAKELOCKS_GC=y
-CONFIG_PM=y
-CONFIG_PM_DEBUG=y
-CONFIG_PM_ADVANCED_DEBUG=y
-# CONFIG_PM_TEST_SUSPEND is not set
-CONFIG_PM_SLEEP_DEBUG=y
-# CONFIG_DPM_WATCHDOG is not set
-CONFIG_PM_TRACE=y
-CONFIG_PM_TRACE_RTC=y
-CONFIG_PM_CLK=y
-CONFIG_PM_GENERIC_DOMAINS=y
-CONFIG_WQ_POWER_EFFICIENT_DEFAULT=y
-CONFIG_PM_GENERIC_DOMAINS_SLEEP=y
-CONFIG_PM_GENERIC_DOMAINS_OF=y
-CONFIG_ENERGY_MODEL=y
-CONFIG_ARCH_SUPPORTS_ACPI=y
-CONFIG_ACPI=y
-CONFIG_ACPI_LEGACY_TABLES_LOOKUP=y
-CONFIG_ARCH_MIGHT_HAVE_ACPI_PDC=y
-CONFIG_ACPI_SYSTEM_POWER_STATES_SUPPORT=y
-# CONFIG_ACPI_DEBUGGER is not set
-CONFIG_ACPI_SPCR_TABLE=y
-CONFIG_ACPI_LPIT=y
-CONFIG_ACPI_SLEEP=y
-CONFIG_ACPI_REV_OVERRIDE_POSSIBLE=y
-CONFIG_ACPI_EC_DEBUGFS=y
-CONFIG_ACPI_AC=m
-CONFIG_ACPI_BATTERY=m
-CONFIG_ACPI_BUTTON=y
-CONFIG_ACPI_VIDEO=y
-CONFIG_ACPI_FAN=y
-CONFIG_ACPI_TAD=m
-CONFIG_ACPI_DOCK=y
-CONFIG_ACPI_CPU_FREQ_PSS=y
-CONFIG_ACPI_PROCESSOR_CSTATE=y
-CONFIG_ACPI_PROCESSOR_IDLE=y
-CONFIG_ACPI_CPPC_LIB=y
-CONFIG_ACPI_PROCESSOR=y
-CONFIG_ACPI_IPMI=m
-CONFIG_ACPI_HOTPLUG_CPU=y
-CONFIG_ACPI_PROCESSOR_AGGREGATOR=y
-CONFIG_ACPI_THERMAL=y
-CONFIG_ARCH_HAS_ACPI_TABLE_UPGRADE=y
-CONFIG_ACPI_TABLE_UPGRADE=y
-CONFIG_ACPI_DEBUG=y
-CONFIG_ACPI_PCI_SLOT=y
-CONFIG_ACPI_CONTAINER=y
-CONFIG_ACPI_HOTPLUG_MEMORY=y
-CONFIG_ACPI_HOTPLUG_IOAPIC=y
-CONFIG_ACPI_SBS=m
-CONFIG_ACPI_HED=y
-CONFIG_ACPI_CUSTOM_METHOD=m
-CONFIG_ACPI_BGRT=y
-# CONFIG_ACPI_REDUCED_HARDWARE_ONLY is not set
-CONFIG_ACPI_NFIT=m
-# CONFIG_NFIT_SECURITY_DEBUG is not set
-CONFIG_ACPI_NUMA=y
-CONFIG_ACPI_HMAT=y
-CONFIG_HAVE_ACPI_APEI=y
-CONFIG_HAVE_ACPI_APEI_NMI=y
-CONFIG_ACPI_APEI=y
-CONFIG_ACPI_APEI_GHES=y
-CONFIG_ACPI_APEI_PCIEAER=y
-CONFIG_ACPI_APEI_MEMORY_FAILURE=y
-CONFIG_ACPI_APEI_EINJ=m
-CONFIG_ACPI_APEI_ERST_DEBUG=m
-CONFIG_DPTF_POWER=m
-CONFIG_ACPI_WATCHDOG=y
-CONFIG_ACPI_EXTLOG=m
-CONFIG_ACPI_ADXL=y
-CONFIG_PMIC_OPREGION=y
-CONFIG_BYTCRC_PMIC_OPREGION=y
-CONFIG_CHTCRC_PMIC_OPREGION=y
-CONFIG_XPOWER_PMIC_OPREGION=y
-CONFIG_BXT_WC_PMIC_OPREGION=y
-CONFIG_CHT_WC_PMIC_OPREGION=y
-CONFIG_CHT_DC_TI_PMIC_OPREGION=y
-CONFIG_ACPI_CONFIGFS=m
-CONFIG_TPS68470_PMIC_OPREGION=y
-CONFIG_X86_PM_TIMER=y
-CONFIG_SFI=y
-
-#
-# CPU Frequency scaling
-#
-CONFIG_CPU_FREQ=y
-CONFIG_CPU_FREQ_GOV_ATTR_SET=y
-CONFIG_CPU_FREQ_GOV_COMMON=y
-CONFIG_CPU_FREQ_STAT=y
-# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set
-# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set
-# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set
-# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set
-# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set
-CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL=y
-CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
-CONFIG_CPU_FREQ_GOV_POWERSAVE=m
-CONFIG_CPU_FREQ_GOV_USERSPACE=m
-CONFIG_CPU_FREQ_GOV_ONDEMAND=m
-CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m
-CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y
-
-#
-# CPU frequency scaling drivers
-#
-CONFIG_CPUFREQ_DT=m
-CONFIG_CPUFREQ_DT_PLATDEV=y
-CONFIG_X86_INTEL_PSTATE=y
-CONFIG_X86_PCC_CPUFREQ=m
-CONFIG_X86_ACPI_CPUFREQ=m
-CONFIG_X86_ACPI_CPUFREQ_CPB=y
-CONFIG_X86_POWERNOW_K8=m
-CONFIG_X86_AMD_FREQ_SENSITIVITY=m
-# CONFIG_X86_SPEEDSTEP_CENTRINO is not set
-CONFIG_X86_P4_CLOCKMOD=m
-
-#
-# shared options
-#
-CONFIG_X86_SPEEDSTEP_LIB=m
-# end of CPU Frequency scaling
-
-#
-# CPU Idle
-#
-CONFIG_CPU_IDLE=y
-CONFIG_CPU_IDLE_GOV_LADDER=y
-CONFIG_CPU_IDLE_GOV_MENU=y
-CONFIG_CPU_IDLE_GOV_TEO=y
-CONFIG_CPU_IDLE_GOV_HALTPOLL=y
-CONFIG_HALTPOLL_CPUIDLE=m
-# end of CPU Idle
-
-CONFIG_INTEL_IDLE=y
-# end of Power management and ACPI options
-
-#
-# Bus options (PCI etc.)
-#
-CONFIG_PCI_DIRECT=y
-CONFIG_PCI_MMCONFIG=y
-CONFIG_PCI_XEN=y
-CONFIG_MMCONF_FAM10H=y
-# CONFIG_PCI_CNB20LE_QUIRK is not set
-# CONFIG_ISA_BUS is not set
-CONFIG_ISA_DMA_API=y
-CONFIG_AMD_NB=y
-# CONFIG_X86_SYSFB is not set
-# end of Bus options (PCI etc.)
-
-#
-# Binary Emulations
-#
-CONFIG_IA32_EMULATION=y
-# CONFIG_X86_X32 is not set
-CONFIG_COMPAT_32=y
-CONFIG_COMPAT=y
-CONFIG_COMPAT_FOR_U64_ALIGNMENT=y
-CONFIG_SYSVIPC_COMPAT=y
-# end of Binary Emulations
-
-#
-# Firmware Drivers
-#
-CONFIG_EDD=m
-# CONFIG_EDD_OFF is not set
-CONFIG_FIRMWARE_MEMMAP=y
-CONFIG_DMIID=y
-CONFIG_DMI_SYSFS=m
-CONFIG_DMI_SCAN_MACHINE_NON_EFI_FALLBACK=y
-CONFIG_ISCSI_IBFT_FIND=y
-CONFIG_ISCSI_IBFT=m
-CONFIG_FW_CFG_SYSFS=m
-# CONFIG_FW_CFG_SYSFS_CMDLINE is not set
-CONFIG_GOOGLE_FIRMWARE=y
-# CONFIG_GOOGLE_SMI is not set
-CONFIG_GOOGLE_COREBOOT_TABLE=m
-CONFIG_GOOGLE_MEMCONSOLE=m
-# CONFIG_GOOGLE_MEMCONSOLE_X86_LEGACY is not set
-CONFIG_GOOGLE_FRAMEBUFFER_COREBOOT=m
-CONFIG_GOOGLE_MEMCONSOLE_COREBOOT=m
-CONFIG_GOOGLE_VPD=m
-
-#
-# EFI (Extensible Firmware Interface) Support
-#
-# CONFIG_EFI_VARS is not set
-CONFIG_EFI_ESRT=y
-CONFIG_EFI_RUNTIME_MAP=y
-# CONFIG_EFI_FAKE_MEMMAP is not set
-CONFIG_EFI_SOFT_RESERVE=y
-CONFIG_EFI_RUNTIME_WRAPPERS=y
-CONFIG_EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER=y
-CONFIG_EFI_CAPSULE_LOADER=m
-# CONFIG_EFI_TEST is not set
-CONFIG_APPLE_PROPERTIES=y
-# CONFIG_RESET_ATTACK_MITIGATION is not set
-CONFIG_EFI_RCI2_TABLE=y
-# CONFIG_EFI_DISABLE_PCI_DMA is not set
-# end of EFI (Extensible Firmware Interface) Support
-
-CONFIG_EFI_EMBEDDED_FIRMWARE=y
-CONFIG_UEFI_CPER=y
-CONFIG_UEFI_CPER_X86=y
-CONFIG_EFI_DEV_PATH_PARSER=y
-CONFIG_EFI_EARLYCON=y
-
-#
-# Tegra firmware driver
-#
-# end of Tegra firmware driver
-# end of Firmware Drivers
-
-CONFIG_HAVE_KVM=y
-CONFIG_HAVE_KVM_IRQCHIP=y
-CONFIG_HAVE_KVM_IRQFD=y
-CONFIG_HAVE_KVM_IRQ_ROUTING=y
-CONFIG_HAVE_KVM_EVENTFD=y
-CONFIG_KVM_MMIO=y
-CONFIG_KVM_ASYNC_PF=y
-CONFIG_HAVE_KVM_MSI=y
-CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT=y
-CONFIG_KVM_VFIO=y
-CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT=y
-CONFIG_KVM_COMPAT=y
-CONFIG_HAVE_KVM_IRQ_BYPASS=y
-CONFIG_HAVE_KVM_NO_POLL=y
-CONFIG_KVM_XFER_TO_GUEST_WORK=y
-CONFIG_VIRTUALIZATION=y
-CONFIG_KVM=m
-CONFIG_KVM_WERROR=y
-CONFIG_KVM_INTEL=m
-CONFIG_KVM_AMD=m
-CONFIG_KVM_AMD_SEV=y
-CONFIG_KVM_MMU_AUDIT=y
-CONFIG_AS_AVX512=y
-CONFIG_AS_SHA1_NI=y
-CONFIG_AS_SHA256_NI=y
-CONFIG_AS_TPAUSE=y
-
-#
-# General architecture-dependent options
-#
-CONFIG_CRASH_CORE=y
-CONFIG_KEXEC_CORE=y
-CONFIG_HOTPLUG_SMT=y
-CONFIG_GENERIC_ENTRY=y
-CONFIG_OPROFILE=m
-# CONFIG_OPROFILE_EVENT_MULTIPLEX is not set
-CONFIG_HAVE_OPROFILE=y
-CONFIG_OPROFILE_NMI_TIMER=y
-CONFIG_KPROBES=y
-CONFIG_JUMP_LABEL=y
-# CONFIG_STATIC_KEYS_SELFTEST is not set
-CONFIG_OPTPROBES=y
-CONFIG_KPROBES_ON_FTRACE=y
-CONFIG_UPROBES=y
-CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y
-CONFIG_ARCH_USE_BUILTIN_BSWAP=y
-CONFIG_KRETPROBES=y
-CONFIG_USER_RETURN_NOTIFIER=y
-CONFIG_HAVE_IOREMAP_PROT=y
-CONFIG_HAVE_KPROBES=y
-CONFIG_HAVE_KRETPROBES=y
-CONFIG_HAVE_OPTPROBES=y
-CONFIG_HAVE_KPROBES_ON_FTRACE=y
-CONFIG_HAVE_FUNCTION_ERROR_INJECTION=y
-CONFIG_HAVE_NMI=y
-CONFIG_HAVE_ARCH_TRACEHOOK=y
-CONFIG_HAVE_DMA_CONTIGUOUS=y
-CONFIG_GENERIC_SMP_IDLE_THREAD=y
-CONFIG_ARCH_HAS_FORTIFY_SOURCE=y
-CONFIG_ARCH_HAS_SET_MEMORY=y
-CONFIG_ARCH_HAS_SET_DIRECT_MAP=y
-CONFIG_HAVE_ARCH_THREAD_STRUCT_WHITELIST=y
-CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT=y
-CONFIG_HAVE_ASM_MODVERSIONS=y
-CONFIG_HAVE_REGS_AND_STACK_ACCESS_API=y
-CONFIG_HAVE_RSEQ=y
-CONFIG_HAVE_FUNCTION_ARG_ACCESS_API=y
-CONFIG_HAVE_HW_BREAKPOINT=y
-CONFIG_HAVE_MIXED_BREAKPOINTS_REGS=y
-CONFIG_HAVE_USER_RETURN_NOTIFIER=y
-CONFIG_HAVE_PERF_EVENTS_NMI=y
-CONFIG_HAVE_HARDLOCKUP_DETECTOR_PERF=y
-CONFIG_HAVE_PERF_REGS=y
-CONFIG_HAVE_PERF_USER_STACK_DUMP=y
-CONFIG_HAVE_ARCH_JUMP_LABEL=y
-CONFIG_HAVE_ARCH_JUMP_LABEL_RELATIVE=y
-CONFIG_MMU_GATHER_TABLE_FREE=y
-CONFIG_MMU_GATHER_RCU_TABLE_FREE=y
-CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG=y
-CONFIG_HAVE_ALIGNED_STRUCT_PAGE=y
-CONFIG_HAVE_CMPXCHG_LOCAL=y
-CONFIG_HAVE_CMPXCHG_DOUBLE=y
-CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION=y
-CONFIG_ARCH_WANT_OLD_COMPAT_IPC=y
-CONFIG_HAVE_ARCH_SECCOMP_FILTER=y
-CONFIG_SECCOMP_FILTER=y
-CONFIG_HAVE_ARCH_STACKLEAK=y
-CONFIG_HAVE_STACKPROTECTOR=y
-CONFIG_STACKPROTECTOR=y
-CONFIG_STACKPROTECTOR_STRONG=y
-CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES=y
-CONFIG_HAVE_CONTEXT_TRACKING=y
-CONFIG_HAVE_VIRT_CPU_ACCOUNTING_GEN=y
-CONFIG_HAVE_IRQ_TIME_ACCOUNTING=y
-CONFIG_HAVE_MOVE_PMD=y
-CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE=y
-CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD=y
-CONFIG_HAVE_ARCH_HUGE_VMAP=y
-CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y
-CONFIG_HAVE_ARCH_SOFT_DIRTY=y
-CONFIG_HAVE_MOD_ARCH_SPECIFIC=y
-CONFIG_MODULES_USE_ELF_RELA=y
-CONFIG_ARCH_HAS_ELF_RANDOMIZE=y
-CONFIG_HAVE_ARCH_MMAP_RND_BITS=y
-CONFIG_HAVE_EXIT_THREAD=y
-CONFIG_ARCH_MMAP_RND_BITS=28
-CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS=y
-CONFIG_ARCH_MMAP_RND_COMPAT_BITS=8
-CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES=y
-CONFIG_HAVE_STACK_VALIDATION=y
-CONFIG_HAVE_RELIABLE_STACKTRACE=y
-CONFIG_ISA_BUS_API=y
-CONFIG_OLD_SIGSUSPEND3=y
-CONFIG_COMPAT_OLD_SIGACTION=y
-CONFIG_COMPAT_32BIT_TIME=y
-CONFIG_HAVE_ARCH_VMAP_STACK=y
-CONFIG_VMAP_STACK=y
-CONFIG_ARCH_HAS_STRICT_KERNEL_RWX=y
-CONFIG_STRICT_KERNEL_RWX=y
-CONFIG_ARCH_HAS_STRICT_MODULE_RWX=y
-CONFIG_STRICT_MODULE_RWX=y
-CONFIG_HAVE_ARCH_PREL32_RELOCATIONS=y
-CONFIG_ARCH_USE_MEMREMAP_PROT=y
-CONFIG_LOCK_EVENT_COUNTS=y
-CONFIG_ARCH_HAS_MEM_ENCRYPT=y
-
-#
-# GCOV-based kernel profiling
-#
-# CONFIG_GCOV_KERNEL is not set
-CONFIG_ARCH_HAS_GCOV_PROFILE_ALL=y
-# end of GCOV-based kernel profiling
-
-CONFIG_HAVE_GCC_PLUGINS=y
-CONFIG_GCC_PLUGINS=y
-# CONFIG_GCC_PLUGIN_CYC_COMPLEXITY is not set
-# CONFIG_GCC_PLUGIN_LATENT_ENTROPY is not set
-# CONFIG_GCC_PLUGIN_RANDSTRUCT is not set
-# end of General architecture-dependent options
-
-CONFIG_RT_MUTEXES=y
-CONFIG_BASE_SMALL=0
-CONFIG_MODULE_SIG_FORMAT=y
-CONFIG_MODULES=y
-CONFIG_MODULE_FORCE_LOAD=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODULE_FORCE_UNLOAD=y
-# CONFIG_MODVERSIONS is not set
-CONFIG_MODULE_SRCVERSION_ALL=y
-CONFIG_MODULE_SIG=y
-# CONFIG_MODULE_SIG_FORCE is not set
-CONFIG_MODULE_SIG_ALL=y
-# CONFIG_MODULE_SIG_SHA1 is not set
-# CONFIG_MODULE_SIG_SHA224 is not set
-# CONFIG_MODULE_SIG_SHA256 is not set
-# CONFIG_MODULE_SIG_SHA384 is not set
-CONFIG_MODULE_SIG_SHA512=y
-CONFIG_MODULE_SIG_HASH="sha512"
-CONFIG_MODULE_COMPRESS=y
-# CONFIG_MODULE_COMPRESS_GZIP is not set
-CONFIG_MODULE_COMPRESS_XZ=y
-CONFIG_MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS=y
-CONFIG_UNUSED_SYMBOLS=y
-CONFIG_MODULES_TREE_LOOKUP=y
-CONFIG_BLOCK=y
-CONFIG_BLK_RQ_ALLOC_TIME=y
-CONFIG_BLK_SCSI_REQUEST=y
-CONFIG_BLK_CGROUP_RWSTAT=y
-CONFIG_BLK_DEV_BSG=y
-CONFIG_BLK_DEV_BSGLIB=y
-CONFIG_BLK_DEV_INTEGRITY=y
-CONFIG_BLK_DEV_INTEGRITY_T10=y
-CONFIG_BLK_DEV_ZONED=y
-CONFIG_BLK_DEV_THROTTLING=y
-CONFIG_BLK_DEV_THROTTLING_LOW=y
-# CONFIG_BLK_CMDLINE_PARSER is not set
-CONFIG_BLK_WBT=y
-CONFIG_BLK_CGROUP_IOLATENCY=y
-CONFIG_BLK_CGROUP_IOCOST=y
-CONFIG_BLK_WBT_MQ=y
-CONFIG_BLK_DEBUG_FS=y
-CONFIG_BLK_DEBUG_FS_ZONED=y
-CONFIG_BLK_SED_OPAL=y
-CONFIG_BLK_INLINE_ENCRYPTION=y
-CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK=y
-
-#
-# Partition Types
-#
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_ACORN_PARTITION is not set
-CONFIG_AIX_PARTITION=y
-# CONFIG_OSF_PARTITION is not set
-# CONFIG_AMIGA_PARTITION is not set
-# CONFIG_ATARI_PARTITION is not set
-CONFIG_MAC_PARTITION=y
-CONFIG_MSDOS_PARTITION=y
-CONFIG_BSD_DISKLABEL=y
-CONFIG_MINIX_SUBPARTITION=y
-CONFIG_SOLARIS_X86_PARTITION=y
-# CONFIG_UNIXWARE_DISKLABEL is not set
-CONFIG_LDM_PARTITION=y
-# CONFIG_LDM_DEBUG is not set
-# CONFIG_SGI_PARTITION is not set
-# CONFIG_ULTRIX_PARTITION is not set
-# CONFIG_SUN_PARTITION is not set
-CONFIG_KARMA_PARTITION=y
-CONFIG_EFI_PARTITION=y
-# CONFIG_SYSV68_PARTITION is not set
-# CONFIG_CMDLINE_PARTITION is not set
-# end of Partition Types
-
-CONFIG_BLOCK_COMPAT=y
-CONFIG_BLK_MQ_PCI=y
-CONFIG_BLK_MQ_VIRTIO=y
-CONFIG_BLK_MQ_RDMA=y
-CONFIG_BLK_PM=y
-
-#
-# IO Schedulers
-#
-CONFIG_MQ_IOSCHED_DEADLINE=y
-CONFIG_MQ_IOSCHED_KYBER=y
-CONFIG_IOSCHED_BFQ=y
-CONFIG_BFQ_GROUP_IOSCHED=y
-# CONFIG_BFQ_CGROUP_DEBUG is not set
-# end of IO Schedulers
-
-CONFIG_PREEMPT_NOTIFIERS=y
-CONFIG_PADATA=y
-CONFIG_ASN1=y
-CONFIG_UNINLINE_SPIN_UNLOCK=y
-CONFIG_ARCH_SUPPORTS_ATOMIC_RMW=y
-CONFIG_MUTEX_SPIN_ON_OWNER=y
-CONFIG_RWSEM_SPIN_ON_OWNER=y
-CONFIG_LOCK_SPIN_ON_OWNER=y
-CONFIG_ARCH_USE_QUEUED_SPINLOCKS=y
-CONFIG_QUEUED_SPINLOCKS=y
-CONFIG_ARCH_USE_QUEUED_RWLOCKS=y
-CONFIG_QUEUED_RWLOCKS=y
-CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE=y
-CONFIG_ARCH_HAS_SYNC_CORE_BEFORE_USERMODE=y
-CONFIG_ARCH_HAS_SYSCALL_WRAPPER=y
-CONFIG_FREEZER=y
-
-#
-# Executable file formats
-#
-CONFIG_BINFMT_ELF=y
-CONFIG_COMPAT_BINFMT_ELF=y
-CONFIG_ELFCORE=y
-CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
-CONFIG_BINFMT_SCRIPT=y
-CONFIG_BINFMT_MISC=y
-CONFIG_COREDUMP=y
-# end of Executable file formats
-
-#
-# Memory Management options
-#
-CONFIG_SELECT_MEMORY_MODEL=y
-CONFIG_SPARSEMEM_MANUAL=y
-CONFIG_SPARSEMEM=y
-CONFIG_NEED_MULTIPLE_NODES=y
-CONFIG_SPARSEMEM_EXTREME=y
-CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
-CONFIG_SPARSEMEM_VMEMMAP=y
-CONFIG_HAVE_FAST_GUP=y
-CONFIG_NUMA_KEEP_MEMINFO=y
-CONFIG_MEMORY_ISOLATION=y
-CONFIG_HAVE_BOOTMEM_INFO_NODE=y
-CONFIG_MEMORY_HOTPLUG=y
-CONFIG_MEMORY_HOTPLUG_SPARSE=y
-CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE=y
-CONFIG_MEMORY_HOTREMOVE=y
-CONFIG_SPLIT_PTLOCK_CPUS=4
-CONFIG_MEMORY_BALLOON=y
-CONFIG_BALLOON_COMPACTION=y
-CONFIG_COMPACTION=y
-CONFIG_PAGE_REPORTING=y
-CONFIG_MIGRATION=y
-CONFIG_CONTIG_ALLOC=y
-CONFIG_PHYS_ADDR_T_64BIT=y
-CONFIG_BOUNCE=y
-CONFIG_VIRT_TO_BUS=y
-CONFIG_MMU_NOTIFIER=y
-CONFIG_KSM=y
-CONFIG_DEFAULT_MMAP_MIN_ADDR=65536
-CONFIG_ARCH_SUPPORTS_MEMORY_FAILURE=y
-CONFIG_MEMORY_FAILURE=y
-CONFIG_HWPOISON_INJECT=m
-CONFIG_TRANSPARENT_HUGEPAGE=y
-# CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS is not set
-CONFIG_TRANSPARENT_HUGEPAGE_MADVISE=y
-CONFIG_ARCH_WANTS_THP_SWAP=y
-CONFIG_THP_SWAP=y
-CONFIG_CLEANCACHE=y
-CONFIG_FRONTSWAP=y
-# CONFIG_CMA is not set
-CONFIG_MEM_SOFT_DIRTY=y
-CONFIG_ZSWAP=y
-# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_DEFLATE is not set
-# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZO is not set
-# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_842 is not set
-CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZ4=y
-# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZ4HC is not set
-# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_ZSTD is not set
-CONFIG_ZSWAP_COMPRESSOR_DEFAULT="lz4"
-# CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD is not set
-CONFIG_ZSWAP_ZPOOL_DEFAULT_Z3FOLD=y
-# CONFIG_ZSWAP_ZPOOL_DEFAULT_ZSMALLOC is not set
-CONFIG_ZSWAP_ZPOOL_DEFAULT="z3fold"
-CONFIG_ZSWAP_DEFAULT_ON=y
-CONFIG_ZPOOL=y
-CONFIG_ZBUD=y
-CONFIG_Z3FOLD=y
-CONFIG_ZSMALLOC=y
-# CONFIG_ZSMALLOC_PGTABLE_MAPPING is not set
-# CONFIG_ZSMALLOC_STAT is not set
-CONFIG_GENERIC_EARLY_IOREMAP=y
-# CONFIG_DEFERRED_STRUCT_PAGE_INIT is not set
-# CONFIG_IDLE_PAGE_TRACKING is not set
-CONFIG_ARCH_HAS_PTE_DEVMAP=y
-CONFIG_ZONE_DEVICE=y
-CONFIG_DEV_PAGEMAP_OPS=y
-CONFIG_HMM_MIRROR=y
-CONFIG_DEVICE_PRIVATE=y
-CONFIG_FRAME_VECTOR=y
-CONFIG_ARCH_USES_HIGH_VMA_FLAGS=y
-CONFIG_ARCH_HAS_PKEYS=y
-# CONFIG_PERCPU_STATS is not set
-# CONFIG_GUP_BENCHMARK is not set
-CONFIG_READ_ONLY_THP_FOR_FS=y
-CONFIG_ARCH_HAS_PTE_SPECIAL=y
-CONFIG_MAPPING_DIRTY_HELPERS=y
-# end of Memory Management options
-
-CONFIG_NET=y
-CONFIG_COMPAT_NETLINK_MESSAGES=y
-CONFIG_NET_INGRESS=y
-CONFIG_NET_EGRESS=y
-CONFIG_NET_REDIRECT=y
-CONFIG_SKB_EXTENSIONS=y
-
-#
-# Networking options
-#
-CONFIG_PACKET=y
-CONFIG_PACKET_DIAG=y
-CONFIG_UNIX=y
-CONFIG_UNIX_SCM=y
-CONFIG_UNIX_DIAG=y
-CONFIG_TLS=m
-CONFIG_TLS_DEVICE=y
-# CONFIG_TLS_TOE is not set
-CONFIG_XFRM=y
-CONFIG_XFRM_OFFLOAD=y
-CONFIG_XFRM_ALGO=m
-CONFIG_XFRM_USER=m
-CONFIG_XFRM_INTERFACE=m
-CONFIG_XFRM_SUB_POLICY=y
-CONFIG_XFRM_MIGRATE=y
-CONFIG_XFRM_STATISTICS=y
-CONFIG_XFRM_AH=m
-CONFIG_XFRM_ESP=m
-CONFIG_XFRM_IPCOMP=m
-CONFIG_NET_KEY=m
-CONFIG_NET_KEY_MIGRATE=y
-CONFIG_XFRM_ESPINTCP=y
-CONFIG_SMC=m
-CONFIG_SMC_DIAG=m
-CONFIG_XDP_SOCKETS=y
-CONFIG_XDP_SOCKETS_DIAG=y
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_ADVANCED_ROUTER=y
-# CONFIG_IP_FIB_TRIE_STATS is not set
-CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_ROUTE_MULTIPATH=y
-CONFIG_IP_ROUTE_VERBOSE=y
-CONFIG_IP_ROUTE_CLASSID=y
-# CONFIG_IP_PNP is not set
-CONFIG_NET_IPIP=m
-CONFIG_NET_IPGRE_DEMUX=m
-CONFIG_NET_IP_TUNNEL=m
-CONFIG_NET_IPGRE=m
-# CONFIG_NET_IPGRE_BROADCAST is not set
-CONFIG_IP_MROUTE_COMMON=y
-CONFIG_IP_MROUTE=y
-CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
-CONFIG_IP_PIMSM_V1=y
-CONFIG_IP_PIMSM_V2=y
-CONFIG_SYN_COOKIES=y
-CONFIG_NET_IPVTI=m
-CONFIG_NET_UDP_TUNNEL=m
-CONFIG_NET_FOU=m
-CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_INET_AH=m
-CONFIG_INET_ESP=m
-CONFIG_INET_ESP_OFFLOAD=m
-CONFIG_INET_ESPINTCP=y
-CONFIG_INET_IPCOMP=m
-CONFIG_INET_XFRM_TUNNEL=m
-CONFIG_INET_TUNNEL=m
-CONFIG_INET_DIAG=m
-CONFIG_INET_TCP_DIAG=m
-CONFIG_INET_UDP_DIAG=m
-CONFIG_INET_RAW_DIAG=m
-CONFIG_INET_DIAG_DESTROY=y
-CONFIG_TCP_CONG_ADVANCED=y
-CONFIG_TCP_CONG_BIC=m
-CONFIG_TCP_CONG_CUBIC=y
-CONFIG_TCP_CONG_WESTWOOD=m
-CONFIG_TCP_CONG_HTCP=m
-CONFIG_TCP_CONG_HSTCP=m
-CONFIG_TCP_CONG_HYBLA=m
-CONFIG_TCP_CONG_VEGAS=m
-CONFIG_TCP_CONG_NV=m
-CONFIG_TCP_CONG_SCALABLE=m
-CONFIG_TCP_CONG_LP=m
-CONFIG_TCP_CONG_VENO=m
-CONFIG_TCP_CONG_YEAH=m
-CONFIG_TCP_CONG_ILLINOIS=m
-CONFIG_TCP_CONG_DCTCP=m
-CONFIG_TCP_CONG_CDG=m
-CONFIG_TCP_CONG_BBR=m
-CONFIG_DEFAULT_CUBIC=y
-# CONFIG_DEFAULT_RENO is not set
-CONFIG_DEFAULT_TCP_CONG="cubic"
-CONFIG_TCP_MD5SIG=y
-CONFIG_IPV6=y
-CONFIG_IPV6_ROUTER_PREF=y
-CONFIG_IPV6_ROUTE_INFO=y
-CONFIG_IPV6_OPTIMISTIC_DAD=y
-CONFIG_INET6_AH=m
-CONFIG_INET6_ESP=m
-CONFIG_INET6_ESP_OFFLOAD=m
-CONFIG_INET6_ESPINTCP=y
-CONFIG_INET6_IPCOMP=m
-CONFIG_IPV6_MIP6=m
-CONFIG_IPV6_ILA=m
-CONFIG_INET6_XFRM_TUNNEL=m
-CONFIG_INET6_TUNNEL=m
-CONFIG_IPV6_VTI=m
-CONFIG_IPV6_SIT=m
-CONFIG_IPV6_SIT_6RD=y
-CONFIG_IPV6_NDISC_NODETYPE=y
-CONFIG_IPV6_TUNNEL=m
-CONFIG_IPV6_GRE=m
-CONFIG_IPV6_FOU=m
-CONFIG_IPV6_FOU_TUNNEL=m
-CONFIG_IPV6_MULTIPLE_TABLES=y
-CONFIG_IPV6_SUBTREES=y
-CONFIG_IPV6_MROUTE=y
-CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y
-CONFIG_IPV6_PIMSM_V2=y
-CONFIG_IPV6_SEG6_LWTUNNEL=y
-CONFIG_IPV6_SEG6_HMAC=y
-CONFIG_IPV6_SEG6_BPF=y
-CONFIG_IPV6_RPL_LWTUNNEL=y
-CONFIG_NETLABEL=y
-CONFIG_MPTCP=y
-CONFIG_INET_MPTCP_DIAG=m
-CONFIG_MPTCP_IPV6=y
-CONFIG_NETWORK_SECMARK=y
-CONFIG_NET_PTP_CLASSIFY=y
-CONFIG_NETWORK_PHY_TIMESTAMPING=y
-CONFIG_NETFILTER=y
-CONFIG_NETFILTER_ADVANCED=y
-CONFIG_BRIDGE_NETFILTER=m
-
-#
-# Core Netfilter Configuration
-#
-CONFIG_NETFILTER_INGRESS=y
-CONFIG_NETFILTER_NETLINK=m
-CONFIG_NETFILTER_FAMILY_BRIDGE=y
-CONFIG_NETFILTER_FAMILY_ARP=y
-CONFIG_NETFILTER_NETLINK_ACCT=m
-CONFIG_NETFILTER_NETLINK_QUEUE=m
-CONFIG_NETFILTER_NETLINK_LOG=m
-CONFIG_NETFILTER_NETLINK_OSF=m
-CONFIG_NF_CONNTRACK=m
-CONFIG_NF_LOG_COMMON=m
-CONFIG_NF_LOG_NETDEV=m
-CONFIG_NETFILTER_CONNCOUNT=m
-CONFIG_NF_CONNTRACK_MARK=y
-CONFIG_NF_CONNTRACK_SECMARK=y
-CONFIG_NF_CONNTRACK_ZONES=y
-CONFIG_NF_CONNTRACK_PROCFS=y
-CONFIG_NF_CONNTRACK_EVENTS=y
-CONFIG_NF_CONNTRACK_TIMEOUT=y
-CONFIG_NF_CONNTRACK_TIMESTAMP=y
-CONFIG_NF_CONNTRACK_LABELS=y
-CONFIG_NF_CT_PROTO_DCCP=y
-CONFIG_NF_CT_PROTO_GRE=y
-CONFIG_NF_CT_PROTO_SCTP=y
-CONFIG_NF_CT_PROTO_UDPLITE=y
-CONFIG_NF_CONNTRACK_AMANDA=m
-CONFIG_NF_CONNTRACK_FTP=m
-CONFIG_NF_CONNTRACK_H323=m
-CONFIG_NF_CONNTRACK_IRC=m
-CONFIG_NF_CONNTRACK_BROADCAST=m
-CONFIG_NF_CONNTRACK_NETBIOS_NS=m
-CONFIG_NF_CONNTRACK_SNMP=m
-CONFIG_NF_CONNTRACK_PPTP=m
-CONFIG_NF_CONNTRACK_SANE=m
-CONFIG_NF_CONNTRACK_SIP=m
-CONFIG_NF_CONNTRACK_TFTP=m
-CONFIG_NF_CT_NETLINK=m
-CONFIG_NF_CT_NETLINK_TIMEOUT=m
-CONFIG_NF_CT_NETLINK_HELPER=m
-CONFIG_NETFILTER_NETLINK_GLUE_CT=y
-CONFIG_NF_NAT=m
-CONFIG_NF_NAT_AMANDA=m
-CONFIG_NF_NAT_FTP=m
-CONFIG_NF_NAT_IRC=m
-CONFIG_NF_NAT_SIP=m
-CONFIG_NF_NAT_TFTP=m
-CONFIG_NF_NAT_REDIRECT=y
-CONFIG_NF_NAT_MASQUERADE=y
-CONFIG_NETFILTER_SYNPROXY=m
-CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_INET=y
-CONFIG_NF_TABLES_NETDEV=y
-CONFIG_NFT_NUMGEN=m
-CONFIG_NFT_CT=m
-CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NFT_COUNTER=m
-CONFIG_NFT_CONNLIMIT=m
-CONFIG_NFT_LOG=m
-CONFIG_NFT_LIMIT=m
-CONFIG_NFT_MASQ=m
-CONFIG_NFT_REDIR=m
-CONFIG_NFT_NAT=m
-CONFIG_NFT_TUNNEL=m
-CONFIG_NFT_OBJREF=m
-CONFIG_NFT_QUEUE=m
-CONFIG_NFT_QUOTA=m
-CONFIG_NFT_REJECT=m
-CONFIG_NFT_REJECT_INET=m
-CONFIG_NFT_COMPAT=m
-CONFIG_NFT_HASH=m
-CONFIG_NFT_FIB=m
-CONFIG_NFT_FIB_INET=m
-CONFIG_NFT_XFRM=m
-CONFIG_NFT_SOCKET=m
-CONFIG_NFT_OSF=m
-CONFIG_NFT_TPROXY=m
-CONFIG_NFT_SYNPROXY=m
-CONFIG_NF_DUP_NETDEV=m
-CONFIG_NFT_DUP_NETDEV=m
-CONFIG_NFT_FWD_NETDEV=m
-CONFIG_NFT_FIB_NETDEV=m
-CONFIG_NF_FLOW_TABLE_INET=m
-CONFIG_NF_FLOW_TABLE=m
-CONFIG_NETFILTER_XTABLES=m
-
-#
-# Xtables combined modules
-#
-CONFIG_NETFILTER_XT_MARK=m
-CONFIG_NETFILTER_XT_CONNMARK=m
-CONFIG_NETFILTER_XT_SET=m
-
-#
-# Xtables targets
-#
-CONFIG_NETFILTER_XT_TARGET_AUDIT=m
-CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
-CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
-CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
-CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
-CONFIG_NETFILTER_XT_TARGET_CT=m
-CONFIG_NETFILTER_XT_TARGET_DSCP=m
-CONFIG_NETFILTER_XT_TARGET_HL=m
-CONFIG_NETFILTER_XT_TARGET_HMARK=m
-CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
-CONFIG_NETFILTER_XT_TARGET_LED=m
-CONFIG_NETFILTER_XT_TARGET_LOG=m
-CONFIG_NETFILTER_XT_TARGET_MARK=m
-CONFIG_NETFILTER_XT_NAT=m
-CONFIG_NETFILTER_XT_TARGET_NETMAP=m
-CONFIG_NETFILTER_XT_TARGET_NFLOG=m
-CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
-CONFIG_NETFILTER_XT_TARGET_NOTRACK=m
-CONFIG_NETFILTER_XT_TARGET_RATEEST=m
-CONFIG_NETFILTER_XT_TARGET_REDIRECT=m
-CONFIG_NETFILTER_XT_TARGET_MASQUERADE=m
-CONFIG_NETFILTER_XT_TARGET_TEE=m
-CONFIG_NETFILTER_XT_TARGET_TPROXY=m
-CONFIG_NETFILTER_XT_TARGET_TRACE=m
-CONFIG_NETFILTER_XT_TARGET_SECMARK=m
-CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
-CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
-
-#
-# Xtables matches
-#
-CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
-CONFIG_NETFILTER_XT_MATCH_BPF=m
-CONFIG_NETFILTER_XT_MATCH_CGROUP=m
-CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
-CONFIG_NETFILTER_XT_MATCH_COMMENT=m
-CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
-CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m
-CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
-CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
-CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
-CONFIG_NETFILTER_XT_MATCH_CPU=m
-CONFIG_NETFILTER_XT_MATCH_DCCP=m
-CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m
-CONFIG_NETFILTER_XT_MATCH_DSCP=m
-CONFIG_NETFILTER_XT_MATCH_ECN=m
-CONFIG_NETFILTER_XT_MATCH_ESP=m
-CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
-CONFIG_NETFILTER_XT_MATCH_HELPER=m
-CONFIG_NETFILTER_XT_MATCH_HL=m
-CONFIG_NETFILTER_XT_MATCH_IPCOMP=m
-CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
-CONFIG_NETFILTER_XT_MATCH_IPVS=m
-CONFIG_NETFILTER_XT_MATCH_L2TP=m
-CONFIG_NETFILTER_XT_MATCH_LENGTH=m
-CONFIG_NETFILTER_XT_MATCH_LIMIT=m
-CONFIG_NETFILTER_XT_MATCH_MAC=m
-CONFIG_NETFILTER_XT_MATCH_MARK=m
-CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
-CONFIG_NETFILTER_XT_MATCH_NFACCT=m
-CONFIG_NETFILTER_XT_MATCH_OSF=m
-CONFIG_NETFILTER_XT_MATCH_OWNER=m
-CONFIG_NETFILTER_XT_MATCH_POLICY=m
-CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
-CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
-CONFIG_NETFILTER_XT_MATCH_QUOTA=m
-CONFIG_NETFILTER_XT_MATCH_RATEEST=m
-CONFIG_NETFILTER_XT_MATCH_REALM=m
-CONFIG_NETFILTER_XT_MATCH_RECENT=m
-CONFIG_NETFILTER_XT_MATCH_SCTP=m
-CONFIG_NETFILTER_XT_MATCH_SOCKET=m
-CONFIG_NETFILTER_XT_MATCH_STATE=m
-CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
-CONFIG_NETFILTER_XT_MATCH_STRING=m
-CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
-CONFIG_NETFILTER_XT_MATCH_TIME=m
-CONFIG_NETFILTER_XT_MATCH_U32=m
-# end of Core Netfilter Configuration
-
-CONFIG_IP_SET=m
-CONFIG_IP_SET_MAX=256
-CONFIG_IP_SET_BITMAP_IP=m
-CONFIG_IP_SET_BITMAP_IPMAC=m
-CONFIG_IP_SET_BITMAP_PORT=m
-CONFIG_IP_SET_HASH_IP=m
-CONFIG_IP_SET_HASH_IPMARK=m
-CONFIG_IP_SET_HASH_IPPORT=m
-CONFIG_IP_SET_HASH_IPPORTIP=m
-CONFIG_IP_SET_HASH_IPPORTNET=m
-CONFIG_IP_SET_HASH_IPMAC=m
-CONFIG_IP_SET_HASH_MAC=m
-CONFIG_IP_SET_HASH_NETPORTNET=m
-CONFIG_IP_SET_HASH_NET=m
-CONFIG_IP_SET_HASH_NETNET=m
-CONFIG_IP_SET_HASH_NETPORT=m
-CONFIG_IP_SET_HASH_NETIFACE=m
-CONFIG_IP_SET_LIST_SET=m
-CONFIG_IP_VS=m
-CONFIG_IP_VS_IPV6=y
-# CONFIG_IP_VS_DEBUG is not set
-CONFIG_IP_VS_TAB_BITS=15
-
-#
-# IPVS transport protocol load balancing support
-#
-CONFIG_IP_VS_PROTO_TCP=y
-CONFIG_IP_VS_PROTO_UDP=y
-CONFIG_IP_VS_PROTO_AH_ESP=y
-CONFIG_IP_VS_PROTO_ESP=y
-CONFIG_IP_VS_PROTO_AH=y
-CONFIG_IP_VS_PROTO_SCTP=y
-
-#
-# IPVS scheduler
-#
-CONFIG_IP_VS_RR=m
-CONFIG_IP_VS_WRR=m
-CONFIG_IP_VS_LC=m
-CONFIG_IP_VS_WLC=m
-CONFIG_IP_VS_FO=m
-CONFIG_IP_VS_OVF=m
-CONFIG_IP_VS_LBLC=m
-CONFIG_IP_VS_LBLCR=m
-CONFIG_IP_VS_DH=m
-CONFIG_IP_VS_SH=m
-CONFIG_IP_VS_MH=m
-CONFIG_IP_VS_SED=m
-CONFIG_IP_VS_NQ=m
-
-#
-# IPVS SH scheduler
-#
-CONFIG_IP_VS_SH_TAB_BITS=8
-
-#
-# IPVS MH scheduler
-#
-CONFIG_IP_VS_MH_TAB_INDEX=12
-
-#
-# IPVS application helper
-#
-CONFIG_IP_VS_FTP=m
-CONFIG_IP_VS_NFCT=y
-CONFIG_IP_VS_PE_SIP=m
-
-#
-# IP: Netfilter Configuration
-#
-CONFIG_NF_DEFRAG_IPV4=m
-CONFIG_NF_SOCKET_IPV4=m
-CONFIG_NF_TPROXY_IPV4=m
-CONFIG_NF_TABLES_IPV4=y
-CONFIG_NFT_REJECT_IPV4=m
-CONFIG_NFT_DUP_IPV4=m
-CONFIG_NFT_FIB_IPV4=m
-CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_FLOW_TABLE_IPV4=m
-CONFIG_NF_DUP_IPV4=m
-CONFIG_NF_LOG_ARP=m
-CONFIG_NF_LOG_IPV4=m
-CONFIG_NF_REJECT_IPV4=m
-CONFIG_NF_NAT_SNMP_BASIC=m
-CONFIG_NF_NAT_PPTP=m
-CONFIG_NF_NAT_H323=m
-CONFIG_IP_NF_IPTABLES=m
-CONFIG_IP_NF_MATCH_AH=m
-CONFIG_IP_NF_MATCH_ECN=m
-CONFIG_IP_NF_MATCH_RPFILTER=m
-CONFIG_IP_NF_MATCH_TTL=m
-CONFIG_IP_NF_FILTER=m
-CONFIG_IP_NF_TARGET_REJECT=m
-CONFIG_IP_NF_TARGET_SYNPROXY=m
-CONFIG_IP_NF_NAT=m
-CONFIG_IP_NF_TARGET_MASQUERADE=m
-CONFIG_IP_NF_TARGET_NETMAP=m
-CONFIG_IP_NF_TARGET_REDIRECT=m
-CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
-CONFIG_IP_NF_TARGET_ECN=m
-CONFIG_IP_NF_TARGET_TTL=m
-CONFIG_IP_NF_RAW=m
-CONFIG_IP_NF_SECURITY=m
-CONFIG_IP_NF_ARPTABLES=m
-CONFIG_IP_NF_ARPFILTER=m
-CONFIG_IP_NF_ARP_MANGLE=m
-# end of IP: Netfilter Configuration
-
-#
-# IPv6: Netfilter Configuration
-#
-CONFIG_NF_SOCKET_IPV6=m
-CONFIG_NF_TPROXY_IPV6=m
-CONFIG_NF_TABLES_IPV6=y
-CONFIG_NFT_REJECT_IPV6=m
-CONFIG_NFT_DUP_IPV6=m
-CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
-CONFIG_NF_DUP_IPV6=m
-CONFIG_NF_REJECT_IPV6=m
-CONFIG_NF_LOG_IPV6=m
-CONFIG_IP6_NF_IPTABLES=m
-CONFIG_IP6_NF_MATCH_AH=m
-CONFIG_IP6_NF_MATCH_EUI64=m
-CONFIG_IP6_NF_MATCH_FRAG=m
-CONFIG_IP6_NF_MATCH_OPTS=m
-CONFIG_IP6_NF_MATCH_HL=m
-CONFIG_IP6_NF_MATCH_IPV6HEADER=m
-CONFIG_IP6_NF_MATCH_MH=m
-CONFIG_IP6_NF_MATCH_RPFILTER=m
-CONFIG_IP6_NF_MATCH_RT=m
-CONFIG_IP6_NF_MATCH_SRH=m
-CONFIG_IP6_NF_TARGET_HL=m
-CONFIG_IP6_NF_FILTER=m
-CONFIG_IP6_NF_TARGET_REJECT=m
-CONFIG_IP6_NF_TARGET_SYNPROXY=m
-CONFIG_IP6_NF_MANGLE=m
-CONFIG_IP6_NF_RAW=m
-CONFIG_IP6_NF_SECURITY=m
-CONFIG_IP6_NF_NAT=m
-CONFIG_IP6_NF_TARGET_MASQUERADE=m
-CONFIG_IP6_NF_TARGET_NPT=m
-# end of IPv6: Netfilter Configuration
-
-CONFIG_NF_DEFRAG_IPV6=m
-CONFIG_NF_TABLES_BRIDGE=m
-CONFIG_NFT_BRIDGE_META=m
-CONFIG_NFT_BRIDGE_REJECT=m
-CONFIG_NF_LOG_BRIDGE=m
-CONFIG_NF_CONNTRACK_BRIDGE=m
-CONFIG_BRIDGE_NF_EBTABLES=m
-CONFIG_BRIDGE_EBT_BROUTE=m
-CONFIG_BRIDGE_EBT_T_FILTER=m
-CONFIG_BRIDGE_EBT_T_NAT=m
-CONFIG_BRIDGE_EBT_802_3=m
-CONFIG_BRIDGE_EBT_AMONG=m
-CONFIG_BRIDGE_EBT_ARP=m
-CONFIG_BRIDGE_EBT_IP=m
-CONFIG_BRIDGE_EBT_IP6=m
-CONFIG_BRIDGE_EBT_LIMIT=m
-CONFIG_BRIDGE_EBT_MARK=m
-CONFIG_BRIDGE_EBT_PKTTYPE=m
-CONFIG_BRIDGE_EBT_STP=m
-CONFIG_BRIDGE_EBT_VLAN=m
-CONFIG_BRIDGE_EBT_ARPREPLY=m
-CONFIG_BRIDGE_EBT_DNAT=m
-CONFIG_BRIDGE_EBT_MARK_T=m
-CONFIG_BRIDGE_EBT_REDIRECT=m
-CONFIG_BRIDGE_EBT_SNAT=m
-CONFIG_BRIDGE_EBT_LOG=m
-CONFIG_BRIDGE_EBT_NFLOG=m
-# CONFIG_BPFILTER is not set
-CONFIG_IP_DCCP=m
-CONFIG_INET_DCCP_DIAG=m
-
-#
-# DCCP CCIDs Configuration
-#
-# CONFIG_IP_DCCP_CCID2_DEBUG is not set
-CONFIG_IP_DCCP_CCID3=y
-# CONFIG_IP_DCCP_CCID3_DEBUG is not set
-CONFIG_IP_DCCP_TFRC_LIB=y
-# end of DCCP CCIDs Configuration
-
-#
-# DCCP Kernel Hacking
-#
-# CONFIG_IP_DCCP_DEBUG is not set
-# end of DCCP Kernel Hacking
-
-CONFIG_IP_SCTP=m
-# CONFIG_SCTP_DBG_OBJCNT is not set
-# CONFIG_SCTP_DEFAULT_COOKIE_HMAC_MD5 is not set
-CONFIG_SCTP_DEFAULT_COOKIE_HMAC_SHA1=y
-# CONFIG_SCTP_DEFAULT_COOKIE_HMAC_NONE is not set
-CONFIG_SCTP_COOKIE_HMAC_MD5=y
-CONFIG_SCTP_COOKIE_HMAC_SHA1=y
-CONFIG_INET_SCTP_DIAG=m
-CONFIG_RDS=m
-CONFIG_RDS_RDMA=m
-CONFIG_RDS_TCP=m
-# CONFIG_RDS_DEBUG is not set
-CONFIG_TIPC=m
-CONFIG_TIPC_MEDIA_IB=y
-CONFIG_TIPC_MEDIA_UDP=y
-CONFIG_TIPC_CRYPTO=y
-CONFIG_TIPC_DIAG=m
-CONFIG_ATM=m
-CONFIG_ATM_CLIP=m
-# CONFIG_ATM_CLIP_NO_ICMP is not set
-CONFIG_ATM_LANE=m
-CONFIG_ATM_MPOA=m
-CONFIG_ATM_BR2684=m
-# CONFIG_ATM_BR2684_IPFILTER is not set
-CONFIG_L2TP=m
-# CONFIG_L2TP_DEBUGFS is not set
-CONFIG_L2TP_V3=y
-CONFIG_L2TP_IP=m
-CONFIG_L2TP_ETH=m
-CONFIG_STP=m
-CONFIG_GARP=m
-CONFIG_MRP=m
-CONFIG_BRIDGE=m
-CONFIG_BRIDGE_IGMP_SNOOPING=y
-CONFIG_BRIDGE_VLAN_FILTERING=y
-CONFIG_BRIDGE_MRP=y
-CONFIG_HAVE_NET_DSA=y
-CONFIG_NET_DSA=m
-CONFIG_NET_DSA_TAG_8021Q=m
-CONFIG_NET_DSA_TAG_AR9331=m
-CONFIG_NET_DSA_TAG_BRCM_COMMON=m
-CONFIG_NET_DSA_TAG_BRCM=m
-CONFIG_NET_DSA_TAG_BRCM_PREPEND=m
-CONFIG_NET_DSA_TAG_GSWIP=m
-CONFIG_NET_DSA_TAG_DSA=m
-CONFIG_NET_DSA_TAG_EDSA=m
-CONFIG_NET_DSA_TAG_MTK=m
-CONFIG_NET_DSA_TAG_KSZ=m
-CONFIG_NET_DSA_TAG_RTL4_A=m
-CONFIG_NET_DSA_TAG_OCELOT=m
-CONFIG_NET_DSA_TAG_QCA=m
-CONFIG_NET_DSA_TAG_LAN9303=m
-CONFIG_NET_DSA_TAG_SJA1105=m
-CONFIG_NET_DSA_TAG_TRAILER=m
-CONFIG_VLAN_8021Q=m
-CONFIG_VLAN_8021Q_GVRP=y
-CONFIG_VLAN_8021Q_MVRP=y
-# CONFIG_DECNET is not set
-CONFIG_LLC=m
-CONFIG_LLC2=m
-CONFIG_ATALK=m
-CONFIG_DEV_APPLETALK=m
-CONFIG_IPDDP=m
-CONFIG_IPDDP_ENCAP=y
-# CONFIG_X25 is not set
-# CONFIG_LAPB is not set
-CONFIG_PHONET=m
-CONFIG_6LOWPAN=m
-# CONFIG_6LOWPAN_DEBUGFS is not set
-CONFIG_6LOWPAN_NHC=m
-CONFIG_6LOWPAN_NHC_DEST=m
-CONFIG_6LOWPAN_NHC_FRAGMENT=m
-CONFIG_6LOWPAN_NHC_HOP=m
-CONFIG_6LOWPAN_NHC_IPV6=m
-CONFIG_6LOWPAN_NHC_MOBILITY=m
-CONFIG_6LOWPAN_NHC_ROUTING=m
-CONFIG_6LOWPAN_NHC_UDP=m
-CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m
-CONFIG_6LOWPAN_GHC_UDP=m
-CONFIG_6LOWPAN_GHC_ICMPV6=m
-CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m
-CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
-CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
-CONFIG_IEEE802154=m
-CONFIG_IEEE802154_NL802154_EXPERIMENTAL=y
-CONFIG_IEEE802154_SOCKET=m
-CONFIG_IEEE802154_6LOWPAN=m
-CONFIG_MAC802154=m
-CONFIG_NET_SCHED=y
-
-#
-# Queueing/Scheduling
-#
-CONFIG_NET_SCH_CBQ=m
-CONFIG_NET_SCH_HTB=m
-CONFIG_NET_SCH_HFSC=m
-CONFIG_NET_SCH_ATM=m
-CONFIG_NET_SCH_PRIO=m
-CONFIG_NET_SCH_MULTIQ=m
-CONFIG_NET_SCH_RED=m
-CONFIG_NET_SCH_SFB=m
-CONFIG_NET_SCH_SFQ=m
-CONFIG_NET_SCH_TEQL=m
-CONFIG_NET_SCH_TBF=m
-CONFIG_NET_SCH_CBS=m
-CONFIG_NET_SCH_ETF=m
-CONFIG_NET_SCH_TAPRIO=m
-CONFIG_NET_SCH_GRED=m
-CONFIG_NET_SCH_DSMARK=m
-CONFIG_NET_SCH_NETEM=m
-CONFIG_NET_SCH_DRR=m
-CONFIG_NET_SCH_MQPRIO=m
-CONFIG_NET_SCH_SKBPRIO=m
-CONFIG_NET_SCH_CHOKE=m
-CONFIG_NET_SCH_QFQ=m
-CONFIG_NET_SCH_CODEL=m
-CONFIG_NET_SCH_FQ_CODEL=y
-CONFIG_NET_SCH_CAKE=m
-CONFIG_NET_SCH_FQ=m
-CONFIG_NET_SCH_HHF=m
-CONFIG_NET_SCH_PIE=m
-CONFIG_NET_SCH_FQ_PIE=m
-CONFIG_NET_SCH_INGRESS=m
-CONFIG_NET_SCH_PLUG=m
-CONFIG_NET_SCH_ETS=m
-CONFIG_NET_SCH_DEFAULT=y
-# CONFIG_DEFAULT_FQ is not set
-# CONFIG_DEFAULT_CODEL is not set
-CONFIG_DEFAULT_FQ_CODEL=y
-# CONFIG_DEFAULT_FQ_PIE is not set
-# CONFIG_DEFAULT_SFQ is not set
-# CONFIG_DEFAULT_PFIFO_FAST is not set
-CONFIG_DEFAULT_NET_SCH="fq_codel"
-
-#
-# Classification
-#
-CONFIG_NET_CLS=y
-CONFIG_NET_CLS_BASIC=m
-CONFIG_NET_CLS_TCINDEX=m
-CONFIG_NET_CLS_ROUTE4=m
-CONFIG_NET_CLS_FW=m
-CONFIG_NET_CLS_U32=m
-CONFIG_CLS_U32_PERF=y
-CONFIG_CLS_U32_MARK=y
-CONFIG_NET_CLS_RSVP=m
-CONFIG_NET_CLS_RSVP6=m
-CONFIG_NET_CLS_FLOW=m
-CONFIG_NET_CLS_CGROUP=m
-CONFIG_NET_CLS_BPF=m
-CONFIG_NET_CLS_FLOWER=m
-CONFIG_NET_CLS_MATCHALL=m
-CONFIG_NET_EMATCH=y
-CONFIG_NET_EMATCH_STACK=32
-CONFIG_NET_EMATCH_CMP=m
-CONFIG_NET_EMATCH_NBYTE=m
-CONFIG_NET_EMATCH_U32=m
-CONFIG_NET_EMATCH_META=m
-CONFIG_NET_EMATCH_TEXT=m
-CONFIG_NET_EMATCH_CANID=m
-CONFIG_NET_EMATCH_IPSET=m
-CONFIG_NET_EMATCH_IPT=m
-CONFIG_NET_CLS_ACT=y
-CONFIG_NET_ACT_POLICE=m
-CONFIG_NET_ACT_GACT=m
-CONFIG_GACT_PROB=y
-CONFIG_NET_ACT_MIRRED=m
-CONFIG_NET_ACT_SAMPLE=m
-CONFIG_NET_ACT_IPT=m
-CONFIG_NET_ACT_NAT=m
-CONFIG_NET_ACT_PEDIT=m
-CONFIG_NET_ACT_SIMP=m
-CONFIG_NET_ACT_SKBEDIT=m
-CONFIG_NET_ACT_CSUM=m
-CONFIG_NET_ACT_MPLS=m
-CONFIG_NET_ACT_VLAN=m
-CONFIG_NET_ACT_BPF=m
-CONFIG_NET_ACT_CONNMARK=m
-CONFIG_NET_ACT_CTINFO=m
-CONFIG_NET_ACT_SKBMOD=m
-CONFIG_NET_ACT_IFE=m
-CONFIG_NET_ACT_TUNNEL_KEY=m
-CONFIG_NET_ACT_CT=m
-CONFIG_NET_ACT_GATE=m
-CONFIG_NET_IFE_SKBMARK=m
-CONFIG_NET_IFE_SKBPRIO=m
-CONFIG_NET_IFE_SKBTCINDEX=m
-CONFIG_NET_TC_SKB_EXT=y
-CONFIG_NET_SCH_FIFO=y
-CONFIG_DCB=y
-CONFIG_DNS_RESOLVER=m
-CONFIG_BATMAN_ADV=m
-CONFIG_BATMAN_ADV_BATMAN_V=y
-CONFIG_BATMAN_ADV_BLA=y
-CONFIG_BATMAN_ADV_DAT=y
-CONFIG_BATMAN_ADV_NC=y
-CONFIG_BATMAN_ADV_MCAST=y
-CONFIG_BATMAN_ADV_DEBUGFS=y
-# CONFIG_BATMAN_ADV_DEBUG is not set
-CONFIG_BATMAN_ADV_SYSFS=y
-# CONFIG_BATMAN_ADV_TRACING is not set
-CONFIG_OPENVSWITCH=m
-CONFIG_OPENVSWITCH_GRE=m
-CONFIG_OPENVSWITCH_VXLAN=m
-CONFIG_OPENVSWITCH_GENEVE=m
-CONFIG_VSOCKETS=m
-CONFIG_VSOCKETS_DIAG=m
-CONFIG_VSOCKETS_LOOPBACK=m
-CONFIG_VMWARE_VMCI_VSOCKETS=m
-CONFIG_VIRTIO_VSOCKETS=m
-CONFIG_VIRTIO_VSOCKETS_COMMON=m
-CONFIG_HYPERV_VSOCKETS=m
-CONFIG_NETLINK_DIAG=m
-CONFIG_MPLS=y
-CONFIG_NET_MPLS_GSO=m
-CONFIG_MPLS_ROUTING=m
-CONFIG_MPLS_IPTUNNEL=m
-CONFIG_NET_NSH=m
-CONFIG_HSR=m
-CONFIG_NET_SWITCHDEV=y
-CONFIG_NET_L3_MASTER_DEV=y
-CONFIG_QRTR=m
-CONFIG_QRTR_SMD=m
-CONFIG_QRTR_TUN=m
-CONFIG_QRTR_MHI=m
-CONFIG_NET_NCSI=y
-CONFIG_NCSI_OEM_CMD_GET_MAC=y
-CONFIG_RPS=y
-CONFIG_RFS_ACCEL=y
-CONFIG_XPS=y
-CONFIG_CGROUP_NET_PRIO=y
-CONFIG_CGROUP_NET_CLASSID=y
-CONFIG_NET_RX_BUSY_POLL=y
-CONFIG_BQL=y
-CONFIG_BPF_JIT=y
-CONFIG_BPF_STREAM_PARSER=y
-CONFIG_NET_FLOW_LIMIT=y
-
-#
-# Network testing
-#
-CONFIG_NET_PKTGEN=m
-CONFIG_NET_DROP_MONITOR=y
-# end of Network testing
-# end of Networking options
-
-CONFIG_HAMRADIO=y
-
-#
-# Packet Radio protocols
-#
-CONFIG_AX25=m
-CONFIG_AX25_DAMA_SLAVE=y
-CONFIG_NETROM=m
-CONFIG_ROSE=m
-
-#
-# AX.25 network device drivers
-#
-CONFIG_MKISS=m
-CONFIG_6PACK=m
-CONFIG_BPQETHER=m
-CONFIG_BAYCOM_SER_FDX=m
-CONFIG_BAYCOM_SER_HDX=m
-CONFIG_BAYCOM_PAR=m
-CONFIG_YAM=m
-# end of AX.25 network device drivers
-
-CONFIG_CAN=m
-CONFIG_CAN_RAW=m
-CONFIG_CAN_BCM=m
-CONFIG_CAN_GW=m
-CONFIG_CAN_J1939=m
-
-#
-# CAN Device Drivers
-#
-CONFIG_CAN_VCAN=m
-CONFIG_CAN_VXCAN=m
-CONFIG_CAN_SLCAN=m
-CONFIG_CAN_DEV=m
-CONFIG_CAN_CALC_BITTIMING=y
-CONFIG_CAN_FLEXCAN=m
-CONFIG_CAN_GRCAN=m
-CONFIG_CAN_JANZ_ICAN3=m
-CONFIG_CAN_KVASER_PCIEFD=m
-CONFIG_CAN_C_CAN=m
-CONFIG_CAN_C_CAN_PLATFORM=m
-CONFIG_CAN_C_CAN_PCI=m
-CONFIG_CAN_CC770=m
-# CONFIG_CAN_CC770_ISA is not set
-CONFIG_CAN_CC770_PLATFORM=m
-CONFIG_CAN_IFI_CANFD=m
-CONFIG_CAN_M_CAN=m
-CONFIG_CAN_M_CAN_PLATFORM=m
-CONFIG_CAN_M_CAN_TCAN4X5X=m
-CONFIG_CAN_PEAK_PCIEFD=m
-CONFIG_CAN_SJA1000=m
-CONFIG_CAN_EMS_PCI=m
-# CONFIG_CAN_EMS_PCMCIA is not set
-CONFIG_CAN_F81601=m
-CONFIG_CAN_KVASER_PCI=m
-CONFIG_CAN_PEAK_PCI=m
-CONFIG_CAN_PEAK_PCIEC=y
-CONFIG_CAN_PEAK_PCMCIA=m
-CONFIG_CAN_PLX_PCI=m
-# CONFIG_CAN_SJA1000_ISA is not set
-CONFIG_CAN_SJA1000_PLATFORM=m
-CONFIG_CAN_SOFTING=m
-CONFIG_CAN_SOFTING_CS=m
-
-#
-# CAN SPI interfaces
-#
-CONFIG_CAN_HI311X=m
-CONFIG_CAN_MCP251X=m
-# end of CAN SPI interfaces
-
-#
-# CAN USB interfaces
-#
-CONFIG_CAN_8DEV_USB=m
-CONFIG_CAN_EMS_USB=m
-CONFIG_CAN_ESD_USB2=m
-CONFIG_CAN_GS_USB=m
-CONFIG_CAN_KVASER_USB=m
-CONFIG_CAN_MCBA_USB=m
-CONFIG_CAN_PEAK_USB=m
-CONFIG_CAN_UCAN=m
-# end of CAN USB interfaces
-
-# CONFIG_CAN_DEBUG_DEVICES is not set
-# end of CAN Device Drivers
-
-CONFIG_BT=m
-CONFIG_BT_BREDR=y
-CONFIG_BT_RFCOMM=m
-CONFIG_BT_RFCOMM_TTY=y
-CONFIG_BT_BNEP=m
-CONFIG_BT_BNEP_MC_FILTER=y
-CONFIG_BT_BNEP_PROTO_FILTER=y
-CONFIG_BT_CMTP=m
-CONFIG_BT_HIDP=m
-CONFIG_BT_HS=y
-CONFIG_BT_LE=y
-CONFIG_BT_6LOWPAN=m
-CONFIG_BT_LEDS=y
-CONFIG_BT_MSFTEXT=y
-CONFIG_BT_DEBUGFS=y
-# CONFIG_BT_SELFTEST is not set
-
-#
-# Bluetooth device drivers
-#
-CONFIG_BT_INTEL=m
-CONFIG_BT_BCM=m
-CONFIG_BT_RTL=m
-CONFIG_BT_QCA=m
-CONFIG_BT_HCIBTUSB=m
-CONFIG_BT_HCIBTUSB_AUTOSUSPEND=y
-CONFIG_BT_HCIBTUSB_BCM=y
-CONFIG_BT_HCIBTUSB_MTK=y
-CONFIG_BT_HCIBTUSB_RTL=y
-CONFIG_BT_HCIBTSDIO=m
-CONFIG_BT_HCIUART=m
-CONFIG_BT_HCIUART_SERDEV=y
-CONFIG_BT_HCIUART_H4=y
-CONFIG_BT_HCIUART_NOKIA=m
-CONFIG_BT_HCIUART_BCSP=y
-CONFIG_BT_HCIUART_ATH3K=y
-CONFIG_BT_HCIUART_LL=y
-CONFIG_BT_HCIUART_3WIRE=y
-CONFIG_BT_HCIUART_INTEL=y
-CONFIG_BT_HCIUART_BCM=y
-CONFIG_BT_HCIUART_RTL=y
-CONFIG_BT_HCIUART_QCA=y
-CONFIG_BT_HCIUART_AG6XX=y
-CONFIG_BT_HCIUART_MRVL=y
-CONFIG_BT_HCIBCM203X=m
-CONFIG_BT_HCIBPA10X=m
-CONFIG_BT_HCIBFUSB=m
-CONFIG_BT_HCIDTL1=m
-CONFIG_BT_HCIBT3C=m
-CONFIG_BT_HCIBLUECARD=m
-CONFIG_BT_HCIVHCI=m
-CONFIG_BT_MRVL=m
-CONFIG_BT_MRVL_SDIO=m
-CONFIG_BT_ATH3K=m
-CONFIG_BT_MTKSDIO=m
-CONFIG_BT_MTKUART=m
-CONFIG_BT_HCIRSI=m
-# end of Bluetooth device drivers
-
-CONFIG_AF_RXRPC=m
-CONFIG_AF_RXRPC_IPV6=y
-# CONFIG_AF_RXRPC_INJECT_LOSS is not set
-CONFIG_AF_RXRPC_DEBUG=y
-CONFIG_RXKAD=y
-CONFIG_AF_KCM=m
-CONFIG_STREAM_PARSER=y
-CONFIG_FIB_RULES=y
-CONFIG_WIRELESS=y
-CONFIG_WIRELESS_EXT=y
-CONFIG_WEXT_CORE=y
-CONFIG_WEXT_PROC=y
-CONFIG_WEXT_SPY=y
-CONFIG_WEXT_PRIV=y
-CONFIG_CFG80211=m
-# CONFIG_NL80211_TESTMODE is not set
-# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set
-# CONFIG_CFG80211_CERTIFICATION_ONUS is not set
-CONFIG_CFG80211_REQUIRE_SIGNED_REGDB=y
-CONFIG_CFG80211_USE_KERNEL_REGDB_KEYS=y
-CONFIG_CFG80211_DEFAULT_PS=y
-CONFIG_CFG80211_DEBUGFS=y
-CONFIG_CFG80211_CRDA_SUPPORT=y
-CONFIG_CFG80211_WEXT=y
-CONFIG_CFG80211_WEXT_EXPORT=y
-CONFIG_LIB80211=m
-CONFIG_LIB80211_CRYPT_WEP=m
-CONFIG_LIB80211_CRYPT_CCMP=m
-CONFIG_LIB80211_CRYPT_TKIP=m
-# CONFIG_LIB80211_DEBUG is not set
-CONFIG_MAC80211=m
-CONFIG_MAC80211_HAS_RC=y
-CONFIG_MAC80211_RC_MINSTREL=y
-CONFIG_MAC80211_RC_DEFAULT_MINSTREL=y
-CONFIG_MAC80211_RC_DEFAULT="minstrel_ht"
-CONFIG_MAC80211_MESH=y
-CONFIG_MAC80211_LEDS=y
-CONFIG_MAC80211_DEBUGFS=y
-# CONFIG_MAC80211_MESSAGE_TRACING is not set
-# CONFIG_MAC80211_DEBUG_MENU is not set
-CONFIG_MAC80211_STA_HASH_MAX_SIZE=0
-CONFIG_WIMAX=m
-CONFIG_WIMAX_DEBUG_LEVEL=8
-CONFIG_RFKILL=m
-CONFIG_RFKILL_LEDS=y
-CONFIG_RFKILL_INPUT=y
-CONFIG_RFKILL_GPIO=m
-CONFIG_NET_9P=m
-CONFIG_NET_9P_VIRTIO=m
-CONFIG_NET_9P_XEN=m
-CONFIG_NET_9P_RDMA=m
-# CONFIG_NET_9P_DEBUG is not set
-CONFIG_CAIF=m
-# CONFIG_CAIF_DEBUG is not set
-CONFIG_CAIF_NETDEV=m
-CONFIG_CAIF_USB=m
-CONFIG_CEPH_LIB=m
-CONFIG_CEPH_LIB_PRETTYDEBUG=y
-CONFIG_CEPH_LIB_USE_DNS_RESOLVER=y
-CONFIG_NFC=m
-CONFIG_NFC_DIGITAL=m
-CONFIG_NFC_NCI=m
-CONFIG_NFC_NCI_SPI=m
-CONFIG_NFC_NCI_UART=m
-CONFIG_NFC_HCI=m
-CONFIG_NFC_SHDLC=y
-
-#
-# Near Field Communication (NFC) devices
-#
-CONFIG_NFC_TRF7970A=m
-CONFIG_NFC_MEI_PHY=m
-CONFIG_NFC_SIM=m
-CONFIG_NFC_PORT100=m
-CONFIG_NFC_FDP=m
-CONFIG_NFC_FDP_I2C=m
-CONFIG_NFC_PN544=m
-CONFIG_NFC_PN544_I2C=m
-CONFIG_NFC_PN544_MEI=m
-CONFIG_NFC_PN533=m
-CONFIG_NFC_PN533_USB=m
-CONFIG_NFC_PN533_I2C=m
-CONFIG_NFC_PN532_UART=m
-CONFIG_NFC_MICROREAD=m
-CONFIG_NFC_MICROREAD_I2C=m
-CONFIG_NFC_MICROREAD_MEI=m
-CONFIG_NFC_MRVL=m
-CONFIG_NFC_MRVL_USB=m
-CONFIG_NFC_MRVL_UART=m
-CONFIG_NFC_MRVL_I2C=m
-CONFIG_NFC_MRVL_SPI=m
-CONFIG_NFC_ST21NFCA=m
-CONFIG_NFC_ST21NFCA_I2C=m
-CONFIG_NFC_ST_NCI=m
-CONFIG_NFC_ST_NCI_I2C=m
-CONFIG_NFC_ST_NCI_SPI=m
-CONFIG_NFC_NXP_NCI=m
-CONFIG_NFC_NXP_NCI_I2C=m
-CONFIG_NFC_S3FWRN5=m
-CONFIG_NFC_S3FWRN5_I2C=m
-CONFIG_NFC_ST95HF=m
-# end of Near Field Communication (NFC) devices
-
-CONFIG_PSAMPLE=m
-CONFIG_NET_IFE=m
-CONFIG_LWTUNNEL=y
-CONFIG_LWTUNNEL_BPF=y
-CONFIG_DST_CACHE=y
-CONFIG_GRO_CELLS=y
-CONFIG_SOCK_VALIDATE_XMIT=y
-CONFIG_NET_SOCK_MSG=y
-CONFIG_NET_DEVLINK=y
-CONFIG_PAGE_POOL=y
-CONFIG_FAILOVER=m
-CONFIG_ETHTOOL_NETLINK=y
-CONFIG_HAVE_EBPF_JIT=y
-
-#
-# Device Drivers
-#
-CONFIG_HAVE_EISA=y
-# CONFIG_EISA is not set
-CONFIG_HAVE_PCI=y
-CONFIG_PCI=y
-CONFIG_PCI_DOMAINS=y
-CONFIG_PCIEPORTBUS=y
-CONFIG_HOTPLUG_PCI_PCIE=y
-CONFIG_PCIEAER=y
-# CONFIG_PCIEAER_INJECT is not set
-CONFIG_PCIE_ECRC=y
-CONFIG_PCIEASPM=y
-CONFIG_PCIEASPM_DEFAULT=y
-# CONFIG_PCIEASPM_POWERSAVE is not set
-# CONFIG_PCIEASPM_POWER_SUPERSAVE is not set
-# CONFIG_PCIEASPM_PERFORMANCE is not set
-CONFIG_PCIE_PME=y
-CONFIG_PCIE_DPC=y
-CONFIG_PCIE_PTM=y
-# CONFIG_PCIE_BW is not set
-CONFIG_PCIE_EDR=y
-CONFIG_PCI_MSI=y
-CONFIG_PCI_MSI_IRQ_DOMAIN=y
-CONFIG_PCI_QUIRKS=y
-# CONFIG_PCI_DEBUG is not set
-CONFIG_PCI_REALLOC_ENABLE_AUTO=y
-CONFIG_PCI_STUB=y
-CONFIG_PCI_PF_STUB=m
-CONFIG_XEN_PCIDEV_FRONTEND=m
-CONFIG_PCI_ATS=y
-CONFIG_PCI_ECAM=y
-CONFIG_PCI_LOCKLESS_CONFIG=y
-CONFIG_PCI_IOV=y
-CONFIG_PCI_PRI=y
-CONFIG_PCI_PASID=y
-CONFIG_PCI_P2PDMA=y
-CONFIG_PCI_LABEL=y
-CONFIG_PCI_HYPERV=m
-CONFIG_HOTPLUG_PCI=y
-CONFIG_HOTPLUG_PCI_ACPI=y
-CONFIG_HOTPLUG_PCI_ACPI_IBM=m
-CONFIG_HOTPLUG_PCI_CPCI=y
-CONFIG_HOTPLUG_PCI_CPCI_ZT5550=m
-CONFIG_HOTPLUG_PCI_CPCI_GENERIC=m
-CONFIG_HOTPLUG_PCI_SHPC=y
-
-#
-# PCI controller drivers
-#
-CONFIG_PCI_FTPCI100=y
-CONFIG_PCI_HOST_COMMON=y
-CONFIG_PCI_HOST_GENERIC=y
-CONFIG_PCIE_XILINX=y
-CONFIG_VMD=m
-CONFIG_PCI_HYPERV_INTERFACE=m
-
-#
-# DesignWare PCI Core Support
-#
-CONFIG_PCIE_DW=y
-CONFIG_PCIE_DW_HOST=y
-CONFIG_PCIE_DW_EP=y
-CONFIG_PCIE_DW_PLAT=y
-CONFIG_PCIE_DW_PLAT_HOST=y
-CONFIG_PCIE_DW_PLAT_EP=y
-CONFIG_PCIE_INTEL_GW=y
-CONFIG_PCI_MESON=y
-# end of DesignWare PCI Core Support
-
-#
-# Mobiveil PCIe Core Support
-#
-# end of Mobiveil PCIe Core Support
-
-#
-# Cadence PCIe controllers support
-#
-CONFIG_PCIE_CADENCE=y
-CONFIG_PCIE_CADENCE_HOST=y
-CONFIG_PCIE_CADENCE_EP=y
-CONFIG_PCIE_CADENCE_PLAT=y
-CONFIG_PCIE_CADENCE_PLAT_HOST=y
-CONFIG_PCIE_CADENCE_PLAT_EP=y
-# CONFIG_PCI_J721E_HOST is not set
-# CONFIG_PCI_J721E_EP is not set
-# end of Cadence PCIe controllers support
-# end of PCI controller drivers
-
-#
-# PCI Endpoint
-#
-CONFIG_PCI_ENDPOINT=y
-CONFIG_PCI_ENDPOINT_CONFIGFS=y
-# CONFIG_PCI_EPF_TEST is not set
-# end of PCI Endpoint
-
-#
-# PCI switch controller drivers
-#
-CONFIG_PCI_SW_SWITCHTEC=m
-# end of PCI switch controller drivers
-
-CONFIG_PCCARD=m
-CONFIG_PCMCIA=m
-CONFIG_PCMCIA_LOAD_CIS=y
-CONFIG_CARDBUS=y
-
-#
-# PC-card bridges
-#
-CONFIG_YENTA=m
-CONFIG_YENTA_O2=y
-CONFIG_YENTA_RICOH=y
-CONFIG_YENTA_TI=y
-CONFIG_YENTA_ENE_TUNE=y
-CONFIG_YENTA_TOSHIBA=y
-CONFIG_PD6729=m
-CONFIG_I82092=m
-CONFIG_PCCARD_NONSTATIC=y
-CONFIG_RAPIDIO=m
-CONFIG_RAPIDIO_TSI721=m
-CONFIG_RAPIDIO_DISC_TIMEOUT=30
-CONFIG_RAPIDIO_ENABLE_RX_TX_PORTS=y
-CONFIG_RAPIDIO_DMA_ENGINE=y
-# CONFIG_RAPIDIO_DEBUG is not set
-CONFIG_RAPIDIO_ENUM_BASIC=m
-CONFIG_RAPIDIO_CHMAN=m
-CONFIG_RAPIDIO_MPORT_CDEV=m
-
-#
-# RapidIO Switch drivers
-#
-CONFIG_RAPIDIO_TSI57X=m
-CONFIG_RAPIDIO_CPS_XX=m
-CONFIG_RAPIDIO_TSI568=m
-CONFIG_RAPIDIO_CPS_GEN2=m
-CONFIG_RAPIDIO_RXS_GEN3=m
-# end of RapidIO Switch drivers
-
-#
-# Generic Driver Options
-#
-# CONFIG_UEVENT_HELPER is not set
-CONFIG_DEVTMPFS=y
-CONFIG_DEVTMPFS_MOUNT=y
-CONFIG_STANDALONE=y
-CONFIG_PREVENT_FIRMWARE_BUILD=y
-
-#
-# Firmware loader
-#
-CONFIG_FW_LOADER=y
-CONFIG_FW_LOADER_PAGED_BUF=y
-CONFIG_EXTRA_FIRMWARE=""
-# CONFIG_FW_LOADER_USER_HELPER is not set
-CONFIG_FW_LOADER_COMPRESS=y
-CONFIG_FW_CACHE=y
-# end of Firmware loader
-
-CONFIG_WANT_DEV_COREDUMP=y
-CONFIG_ALLOW_DEV_COREDUMP=y
-CONFIG_DEV_COREDUMP=y
-# CONFIG_DEBUG_DRIVER is not set
-# CONFIG_DEBUG_DEVRES is not set
-# CONFIG_DEBUG_TEST_DRIVER_REMOVE is not set
-CONFIG_HMEM_REPORTING=y
-# CONFIG_TEST_ASYNC_DRIVER_PROBE is not set
-CONFIG_SYS_HYPERVISOR=y
-CONFIG_GENERIC_CPU_AUTOPROBE=y
-CONFIG_GENERIC_CPU_VULNERABILITIES=y
-CONFIG_REGMAP=y
-CONFIG_REGMAP_I2C=y
-CONFIG_REGMAP_SLIMBUS=m
-CONFIG_REGMAP_SPI=y
-CONFIG_REGMAP_SPMI=m
-CONFIG_REGMAP_W1=m
-CONFIG_REGMAP_MMIO=y
-CONFIG_REGMAP_IRQ=y
-CONFIG_REGMAP_SOUNDWIRE=m
-CONFIG_REGMAP_SCCB=m
-CONFIG_REGMAP_I3C=m
-CONFIG_DMA_SHARED_BUFFER=y
-# CONFIG_DMA_FENCE_TRACE is not set
-# end of Generic Driver Options
-
-#
-# Bus devices
-#
-CONFIG_MOXTET=m
-CONFIG_SIMPLE_PM_BUS=y
-CONFIG_MHI_BUS=m
-# end of Bus devices
-
-CONFIG_CONNECTOR=y
-CONFIG_PROC_EVENTS=y
-CONFIG_GNSS=m
-CONFIG_GNSS_SERIAL=m
-CONFIG_GNSS_MTK_SERIAL=m
-CONFIG_GNSS_SIRF_SERIAL=m
-CONFIG_GNSS_UBX_SERIAL=m
-CONFIG_MTD=m
-CONFIG_MTD_TESTS=m
-
-#
-# Partition parsers
-#
-CONFIG_MTD_AR7_PARTS=m
-CONFIG_MTD_CMDLINE_PARTS=m
-CONFIG_MTD_OF_PARTS=m
-CONFIG_MTD_REDBOOT_PARTS=m
-CONFIG_MTD_REDBOOT_DIRECTORY_BLOCK=-1
-# CONFIG_MTD_REDBOOT_PARTS_UNALLOCATED is not set
-# CONFIG_MTD_REDBOOT_PARTS_READONLY is not set
-# end of Partition parsers
-
-#
-# User Modules And Translation Layers
-#
-CONFIG_MTD_BLKDEVS=m
-CONFIG_MTD_BLOCK=m
-CONFIG_MTD_BLOCK_RO=m
-CONFIG_FTL=m
-CONFIG_NFTL=m
-CONFIG_NFTL_RW=y
-CONFIG_INFTL=m
-CONFIG_RFD_FTL=m
-CONFIG_SSFDC=m
-CONFIG_SM_FTL=m
-CONFIG_MTD_OOPS=m
-CONFIG_MTD_PSTORE=m
-CONFIG_MTD_SWAP=m
-CONFIG_MTD_PARTITIONED_MASTER=y
-
-#
-# RAM/ROM/Flash chip drivers
-#
-CONFIG_MTD_CFI=m
-CONFIG_MTD_JEDECPROBE=m
-CONFIG_MTD_GEN_PROBE=m
-# CONFIG_MTD_CFI_ADV_OPTIONS is not set
-CONFIG_MTD_MAP_BANK_WIDTH_1=y
-CONFIG_MTD_MAP_BANK_WIDTH_2=y
-CONFIG_MTD_MAP_BANK_WIDTH_4=y
-CONFIG_MTD_CFI_I1=y
-CONFIG_MTD_CFI_I2=y
-CONFIG_MTD_CFI_INTELEXT=m
-CONFIG_MTD_CFI_AMDSTD=m
-CONFIG_MTD_CFI_STAA=m
-CONFIG_MTD_CFI_UTIL=m
-CONFIG_MTD_RAM=m
-CONFIG_MTD_ROM=m
-CONFIG_MTD_ABSENT=m
-# end of RAM/ROM/Flash chip drivers
-
-#
-# Mapping drivers for chip access
-#
-CONFIG_MTD_COMPLEX_MAPPINGS=y
-CONFIG_MTD_PHYSMAP=m
-# CONFIG_MTD_PHYSMAP_COMPAT is not set
-CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_MTD_PHYSMAP_VERSATILE=y
-CONFIG_MTD_PHYSMAP_GEMINI=y
-CONFIG_MTD_PHYSMAP_GPIO_ADDR=y
-CONFIG_MTD_SBC_GXX=m
-CONFIG_MTD_AMD76XROM=m
-CONFIG_MTD_ICHXROM=m
-CONFIG_MTD_ESB2ROM=m
-CONFIG_MTD_CK804XROM=m
-CONFIG_MTD_SCB2_FLASH=m
-CONFIG_MTD_NETtel=m
-CONFIG_MTD_L440GX=m
-CONFIG_MTD_PCI=m
-CONFIG_MTD_PCMCIA=m
-# CONFIG_MTD_PCMCIA_ANONYMOUS is not set
-CONFIG_MTD_INTEL_VR_NOR=m
-CONFIG_MTD_PLATRAM=m
-# end of Mapping drivers for chip access
-
-#
-# Self-contained MTD device drivers
-#
-CONFIG_MTD_PMC551=m
-# CONFIG_MTD_PMC551_BUGFIX is not set
-# CONFIG_MTD_PMC551_DEBUG is not set
-CONFIG_MTD_DATAFLASH=m
-# CONFIG_MTD_DATAFLASH_WRITE_VERIFY is not set
-CONFIG_MTD_DATAFLASH_OTP=y
-CONFIG_MTD_MCHP23K256=m
-CONFIG_MTD_SST25L=m
-CONFIG_MTD_SLRAM=m
-CONFIG_MTD_PHRAM=m
-CONFIG_MTD_MTDRAM=m
-CONFIG_MTDRAM_TOTAL_SIZE=4096
-CONFIG_MTDRAM_ERASE_SIZE=128
-CONFIG_MTD_BLOCK2MTD=m
-
-#
-# Disk-On-Chip Device Drivers
-#
-CONFIG_MTD_DOCG3=m
-CONFIG_BCH_CONST_M=14
-CONFIG_BCH_CONST_T=4
-# end of Self-contained MTD device drivers
-
-#
-# NAND
-#
-CONFIG_MTD_NAND_CORE=m
-CONFIG_MTD_ONENAND=m
-# CONFIG_MTD_ONENAND_VERIFY_WRITE is not set
-CONFIG_MTD_ONENAND_GENERIC=m
-CONFIG_MTD_ONENAND_OTP=y
-CONFIG_MTD_ONENAND_2X_PROGRAM=y
-CONFIG_MTD_NAND_ECC_SW_HAMMING=m
-CONFIG_MTD_NAND_ECC_SW_HAMMING_SMC=y
-CONFIG_MTD_RAW_NAND=m
-CONFIG_MTD_NAND_ECC_SW_BCH=y
-
-#
-# Raw/parallel NAND flash controllers
-#
-CONFIG_MTD_NAND_DENALI=m
-CONFIG_MTD_NAND_DENALI_PCI=m
-CONFIG_MTD_NAND_DENALI_DT=m
-CONFIG_MTD_NAND_CAFE=m
-CONFIG_MTD_NAND_MXIC=m
-CONFIG_MTD_NAND_GPIO=m
-CONFIG_MTD_NAND_PLATFORM=m
-CONFIG_MTD_NAND_CADENCE=m
-CONFIG_MTD_NAND_ARASAN=m
-
-#
-# Misc
-#
-CONFIG_MTD_SM_COMMON=m
-CONFIG_MTD_NAND_NANDSIM=m
-CONFIG_MTD_NAND_RICOH=m
-CONFIG_MTD_NAND_DISKONCHIP=m
-# CONFIG_MTD_NAND_DISKONCHIP_PROBE_ADVANCED is not set
-CONFIG_MTD_NAND_DISKONCHIP_PROBE_ADDRESS=0
-CONFIG_MTD_NAND_DISKONCHIP_BBTWRITE=y
-CONFIG_MTD_SPI_NAND=m
-# end of NAND
-
-#
-# LPDDR & LPDDR2 PCM memory drivers
-#
-CONFIG_MTD_LPDDR=m
-CONFIG_MTD_QINFO_PROBE=m
-# end of LPDDR & LPDDR2 PCM memory drivers
-
-CONFIG_MTD_SPI_NOR=m
-CONFIG_MTD_SPI_NOR_USE_4K_SECTORS=y
-CONFIG_SPI_INTEL_SPI=m
-CONFIG_SPI_INTEL_SPI_PCI=m
-CONFIG_SPI_INTEL_SPI_PLATFORM=m
-CONFIG_MTD_UBI=m
-CONFIG_MTD_UBI_WL_THRESHOLD=4096
-CONFIG_MTD_UBI_BEB_LIMIT=20
-CONFIG_MTD_UBI_FASTMAP=y
-CONFIG_MTD_UBI_GLUEBI=m
-CONFIG_MTD_UBI_BLOCK=y
-CONFIG_MTD_HYPERBUS=m
-CONFIG_DTC=y
-CONFIG_OF=y
-# CONFIG_OF_UNITTEST is not set
-CONFIG_OF_FLATTREE=y
-CONFIG_OF_EARLY_FLATTREE=y
-CONFIG_OF_KOBJ=y
-CONFIG_OF_DYNAMIC=y
-CONFIG_OF_ADDRESS=y
-CONFIG_OF_IRQ=y
-CONFIG_OF_NET=y
-CONFIG_OF_MDIO=m
-CONFIG_OF_RESERVED_MEM=y
-CONFIG_OF_RESOLVE=y
-CONFIG_OF_OVERLAY=y
-CONFIG_ARCH_MIGHT_HAVE_PC_PARPORT=y
-CONFIG_PARPORT=m
-CONFIG_PARPORT_PC=m
-CONFIG_PARPORT_SERIAL=m
-CONFIG_PARPORT_PC_FIFO=y
-CONFIG_PARPORT_PC_SUPERIO=y
-CONFIG_PARPORT_PC_PCMCIA=m
-CONFIG_PARPORT_AX88796=m
-CONFIG_PARPORT_1284=y
-CONFIG_PARPORT_NOT_PC=y
-CONFIG_PNP=y
-CONFIG_PNP_DEBUG_MESSAGES=y
-
-#
-# Protocols
-#
-CONFIG_PNPACPI=y
-CONFIG_BLK_DEV=y
-# CONFIG_BLK_DEV_NULL_BLK is not set
-CONFIG_BLK_DEV_FD=m
-CONFIG_CDROM=m
-# CONFIG_PARIDE is not set
-CONFIG_BLK_DEV_PCIESSD_MTIP32XX=m
-CONFIG_ZRAM=m
-CONFIG_ZRAM_WRITEBACK=y
-# CONFIG_ZRAM_MEMORY_TRACKING is not set
-CONFIG_BLK_DEV_UMEM=m
-CONFIG_BLK_DEV_LOOP=m
-CONFIG_BLK_DEV_LOOP_MIN_COUNT=8
-CONFIG_BLK_DEV_CRYPTOLOOP=m
-CONFIG_BLK_DEV_DRBD=m
-# CONFIG_DRBD_FAULT_INJECTION is not set
-CONFIG_BLK_DEV_NBD=m
-CONFIG_BLK_DEV_SKD=m
-CONFIG_BLK_DEV_SX8=m
-CONFIG_BLK_DEV_RAM=m
-CONFIG_BLK_DEV_RAM_COUNT=16
-CONFIG_BLK_DEV_RAM_SIZE=16384
-CONFIG_CDROM_PKTCDVD=m
-CONFIG_CDROM_PKTCDVD_BUFFERS=8
-# CONFIG_CDROM_PKTCDVD_WCACHE is not set
-CONFIG_ATA_OVER_ETH=m
-CONFIG_XEN_BLKDEV_FRONTEND=m
-CONFIG_XEN_BLKDEV_BACKEND=m
-CONFIG_VIRTIO_BLK=m
-CONFIG_BLK_DEV_RBD=m
-CONFIG_BLK_DEV_RSXX=m
-CONFIG_BLK_DEV_RNBD=y
-CONFIG_BLK_DEV_RNBD_CLIENT=m
-CONFIG_BLK_DEV_RNBD_SERVER=m
-
-#
-# NVME Support
-#
-CONFIG_NVME_CORE=y
-CONFIG_BLK_DEV_NVME=y
-CONFIG_NVME_MULTIPATH=y
-CONFIG_NVME_HWMON=y
-CONFIG_NVME_FABRICS=m
-CONFIG_NVME_RDMA=m
-CONFIG_NVME_FC=m
-CONFIG_NVME_TCP=m
-CONFIG_NVME_TARGET=m
-# CONFIG_NVME_TARGET_PASSTHRU is not set
-CONFIG_NVME_TARGET_LOOP=m
-CONFIG_NVME_TARGET_RDMA=m
-CONFIG_NVME_TARGET_FC=m
-CONFIG_NVME_TARGET_FCLOOP=m
-CONFIG_NVME_TARGET_TCP=m
-# end of NVME Support
-
-#
-# Misc devices
-#
-CONFIG_SENSORS_LIS3LV02D=m
-CONFIG_AD525X_DPOT=m
-CONFIG_AD525X_DPOT_I2C=m
-CONFIG_AD525X_DPOT_SPI=m
-# CONFIG_DUMMY_IRQ is not set
-CONFIG_IBM_ASM=m
-CONFIG_PHANTOM=m
-CONFIG_TIFM_CORE=m
-CONFIG_TIFM_7XX1=m
-CONFIG_ICS932S401=m
-CONFIG_ENCLOSURE_SERVICES=m
-CONFIG_HP_ILO=m
-CONFIG_APDS9802ALS=m
-CONFIG_ISL29003=m
-CONFIG_ISL29020=m
-CONFIG_SENSORS_TSL2550=m
-CONFIG_SENSORS_BH1770=m
-CONFIG_SENSORS_APDS990X=m
-CONFIG_HMC6352=m
-CONFIG_DS1682=m
-CONFIG_VMWARE_BALLOON=m
-CONFIG_LATTICE_ECP3_CONFIG=m
-# CONFIG_SRAM is not set
-CONFIG_PCI_ENDPOINT_TEST=m
-CONFIG_XILINX_SDFEC=m
-CONFIG_MISC_RTSX=m
-CONFIG_PVPANIC=m
-CONFIG_C2PORT=m
-CONFIG_C2PORT_DURAMAR_2150=m
-
-#
-# EEPROM support
-#
-CONFIG_EEPROM_AT24=m
-# CONFIG_EEPROM_AT25 is not set
-CONFIG_EEPROM_LEGACY=m
-CONFIG_EEPROM_MAX6875=m
-CONFIG_EEPROM_93CX6=m
-# CONFIG_EEPROM_93XX46 is not set
-CONFIG_EEPROM_IDT_89HPESX=m
-CONFIG_EEPROM_EE1004=m
-# end of EEPROM support
-
-CONFIG_CB710_CORE=m
-# CONFIG_CB710_DEBUG is not set
-CONFIG_CB710_DEBUG_ASSUMPTIONS=y
-
-#
-# Texas Instruments shared transport line discipline
-#
-CONFIG_TI_ST=m
-# end of Texas Instruments shared transport line discipline
-
-CONFIG_SENSORS_LIS3_I2C=m
-CONFIG_ALTERA_STAPL=m
-CONFIG_INTEL_MEI=m
-CONFIG_INTEL_MEI_ME=m
-CONFIG_INTEL_MEI_TXE=m
-CONFIG_INTEL_MEI_HDCP=m
-CONFIG_VMWARE_VMCI=m
-
-#
-# Intel MIC & related support
-#
-CONFIG_INTEL_MIC_BUS=m
-CONFIG_SCIF_BUS=m
-CONFIG_VOP_BUS=m
-CONFIG_INTEL_MIC_HOST=m
-CONFIG_INTEL_MIC_CARD=m
-CONFIG_SCIF=m
-CONFIG_MIC_COSM=m
-CONFIG_VOP=m
-# end of Intel MIC & related support
-
-CONFIG_GENWQE=m
-CONFIG_GENWQE_PLATFORM_ERROR_RECOVERY=0
-CONFIG_ECHO=m
-CONFIG_MISC_ALCOR_PCI=m
-CONFIG_MISC_RTSX_PCI=m
-CONFIG_MISC_RTSX_USB=m
-CONFIG_HABANA_AI=m
-CONFIG_UACCE=m
-# end of Misc devices
-
-CONFIG_HAVE_IDE=y
-# CONFIG_IDE is not set
-
-#
-# SCSI device support
-#
-CONFIG_SCSI_MOD=y
-CONFIG_RAID_ATTRS=m
-CONFIG_SCSI=y
-CONFIG_SCSI_DMA=y
-CONFIG_SCSI_NETLINK=y
-CONFIG_SCSI_PROC_FS=y
-
-#
-# SCSI support type (disk, tape, CD-ROM)
-#
-CONFIG_BLK_DEV_SD=y
-CONFIG_CHR_DEV_ST=m
-CONFIG_BLK_DEV_SR=m
-CONFIG_CHR_DEV_SG=m
-CONFIG_CHR_DEV_SCH=m
-CONFIG_SCSI_ENCLOSURE=m
-CONFIG_SCSI_CONSTANTS=y
-CONFIG_SCSI_LOGGING=y
-CONFIG_SCSI_SCAN_ASYNC=y
-
-#
-# SCSI Transports
-#
-CONFIG_SCSI_SPI_ATTRS=m
-CONFIG_SCSI_FC_ATTRS=m
-CONFIG_SCSI_ISCSI_ATTRS=m
-CONFIG_SCSI_SAS_ATTRS=m
-CONFIG_SCSI_SAS_LIBSAS=m
-CONFIG_SCSI_SAS_ATA=y
-CONFIG_SCSI_SAS_HOST_SMP=y
-CONFIG_SCSI_SRP_ATTRS=m
-# end of SCSI Transports
-
-CONFIG_SCSI_LOWLEVEL=y
-CONFIG_ISCSI_TCP=m
-CONFIG_ISCSI_BOOT_SYSFS=m
-CONFIG_SCSI_CXGB3_ISCSI=m
-CONFIG_SCSI_CXGB4_ISCSI=m
-CONFIG_SCSI_BNX2_ISCSI=m
-CONFIG_SCSI_BNX2X_FCOE=m
-CONFIG_BE2ISCSI=m
-CONFIG_BLK_DEV_3W_XXXX_RAID=m
-CONFIG_SCSI_HPSA=m
-CONFIG_SCSI_3W_9XXX=m
-CONFIG_SCSI_3W_SAS=m
-CONFIG_SCSI_ACARD=m
-CONFIG_SCSI_AACRAID=m
-CONFIG_SCSI_AIC7XXX=m
-CONFIG_AIC7XXX_CMDS_PER_DEVICE=32
-CONFIG_AIC7XXX_RESET_DELAY_MS=15000
-CONFIG_AIC7XXX_DEBUG_ENABLE=y
-CONFIG_AIC7XXX_DEBUG_MASK=0
-CONFIG_AIC7XXX_REG_PRETTY_PRINT=y
-CONFIG_SCSI_AIC79XX=m
-CONFIG_AIC79XX_CMDS_PER_DEVICE=32
-CONFIG_AIC79XX_RESET_DELAY_MS=15000
-CONFIG_AIC79XX_DEBUG_ENABLE=y
-CONFIG_AIC79XX_DEBUG_MASK=0
-CONFIG_AIC79XX_REG_PRETTY_PRINT=y
-CONFIG_SCSI_AIC94XX=m
-CONFIG_AIC94XX_DEBUG=y
-CONFIG_SCSI_MVSAS=m
-CONFIG_SCSI_MVSAS_DEBUG=y
-CONFIG_SCSI_MVSAS_TASKLET=y
-CONFIG_SCSI_MVUMI=m
-CONFIG_SCSI_DPT_I2O=m
-CONFIG_SCSI_ADVANSYS=m
-CONFIG_SCSI_ARCMSR=m
-CONFIG_SCSI_ESAS2R=m
-CONFIG_MEGARAID_NEWGEN=y
-CONFIG_MEGARAID_MM=m
-CONFIG_MEGARAID_MAILBOX=m
-CONFIG_MEGARAID_LEGACY=m
-CONFIG_MEGARAID_SAS=m
-CONFIG_SCSI_MPT3SAS=m
-CONFIG_SCSI_MPT2SAS_MAX_SGE=128
-CONFIG_SCSI_MPT3SAS_MAX_SGE=128
-CONFIG_SCSI_MPT2SAS=m
-CONFIG_SCSI_SMARTPQI=m
-CONFIG_SCSI_UFSHCD=m
-CONFIG_SCSI_UFSHCD_PCI=m
-# CONFIG_SCSI_UFS_DWC_TC_PCI is not set
-CONFIG_SCSI_UFSHCD_PLATFORM=m
-CONFIG_SCSI_UFS_CDNS_PLATFORM=m
-# CONFIG_SCSI_UFS_DWC_TC_PLATFORM is not set
-CONFIG_SCSI_UFS_BSG=y
-# CONFIG_SCSI_UFS_CRYPTO is not set
-CONFIG_SCSI_HPTIOP=m
-CONFIG_SCSI_BUSLOGIC=m
-CONFIG_SCSI_FLASHPOINT=y
-CONFIG_SCSI_MYRB=m
-CONFIG_SCSI_MYRS=m
-CONFIG_VMWARE_PVSCSI=m
-CONFIG_XEN_SCSI_FRONTEND=m
-CONFIG_HYPERV_STORAGE=m
-CONFIG_LIBFC=m
-CONFIG_LIBFCOE=m
-CONFIG_FCOE=m
-CONFIG_FCOE_FNIC=m
-CONFIG_SCSI_SNIC=m
-# CONFIG_SCSI_SNIC_DEBUG_FS is not set
-CONFIG_SCSI_DMX3191D=m
-CONFIG_SCSI_FDOMAIN=m
-CONFIG_SCSI_FDOMAIN_PCI=m
-CONFIG_SCSI_GDTH=m
-CONFIG_SCSI_ISCI=m
-CONFIG_SCSI_IPS=m
-CONFIG_SCSI_INITIO=m
-CONFIG_SCSI_INIA100=m
-CONFIG_SCSI_PPA=m
-CONFIG_SCSI_IMM=m
-# CONFIG_SCSI_IZIP_EPP16 is not set
-# CONFIG_SCSI_IZIP_SLOW_CTR is not set
-CONFIG_SCSI_STEX=m
-CONFIG_SCSI_SYM53C8XX_2=m
-CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1
-CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
-CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
-CONFIG_SCSI_SYM53C8XX_MMIO=y
-CONFIG_SCSI_IPR=m
-CONFIG_SCSI_IPR_TRACE=y
-CONFIG_SCSI_IPR_DUMP=y
-CONFIG_SCSI_QLOGIC_1280=m
-CONFIG_SCSI_QLA_FC=m
-CONFIG_TCM_QLA2XXX=m
-# CONFIG_TCM_QLA2XXX_DEBUG is not set
-CONFIG_SCSI_QLA_ISCSI=m
-CONFIG_QEDI=m
-CONFIG_QEDF=m
-CONFIG_SCSI_LPFC=m
-# CONFIG_SCSI_LPFC_DEBUG_FS is not set
-CONFIG_SCSI_DC395x=m
-CONFIG_SCSI_AM53C974=m
-CONFIG_SCSI_WD719X=m
-CONFIG_SCSI_DEBUG=m
-CONFIG_SCSI_PMCRAID=m
-CONFIG_SCSI_PM8001=m
-CONFIG_SCSI_BFA_FC=m
-CONFIG_SCSI_VIRTIO=m
-CONFIG_SCSI_CHELSIO_FCOE=m
-CONFIG_SCSI_LOWLEVEL_PCMCIA=y
-CONFIG_PCMCIA_AHA152X=m
-CONFIG_PCMCIA_FDOMAIN=m
-CONFIG_PCMCIA_QLOGIC=m
-CONFIG_PCMCIA_SYM53C500=m
-CONFIG_SCSI_DH=y
-CONFIG_SCSI_DH_RDAC=m
-CONFIG_SCSI_DH_HP_SW=m
-CONFIG_SCSI_DH_EMC=m
-CONFIG_SCSI_DH_ALUA=m
-# end of SCSI device support
-
-CONFIG_ATA=y
-CONFIG_SATA_HOST=y
-CONFIG_PATA_TIMINGS=y
-CONFIG_ATA_VERBOSE_ERROR=y
-CONFIG_ATA_FORCE=y
-CONFIG_ATA_ACPI=y
-CONFIG_SATA_ZPODD=y
-CONFIG_SATA_PMP=y
-
-#
-# Controllers with non-SFF native interface
-#
-CONFIG_SATA_AHCI=y
-CONFIG_SATA_MOBILE_LPM_POLICY=3
-CONFIG_SATA_AHCI_PLATFORM=m
-CONFIG_AHCI_CEVA=m
-CONFIG_AHCI_QORIQ=m
-CONFIG_SATA_INIC162X=m
-CONFIG_SATA_ACARD_AHCI=m
-CONFIG_SATA_SIL24=m
-CONFIG_ATA_SFF=y
-
-#
-# SFF controllers with custom DMA interface
-#
-CONFIG_PDC_ADMA=m
-CONFIG_SATA_QSTOR=m
-CONFIG_SATA_SX4=m
-CONFIG_ATA_BMDMA=y
-
-#
-# SATA SFF controllers with BMDMA
-#
-CONFIG_ATA_PIIX=m
-CONFIG_SATA_DWC=m
-# CONFIG_SATA_DWC_OLD_DMA is not set
-# CONFIG_SATA_DWC_DEBUG is not set
-CONFIG_SATA_MV=m
-CONFIG_SATA_NV=m
-CONFIG_SATA_PROMISE=m
-CONFIG_SATA_SIL=m
-CONFIG_SATA_SIS=m
-CONFIG_SATA_SVW=m
-CONFIG_SATA_ULI=m
-CONFIG_SATA_VIA=m
-CONFIG_SATA_VITESSE=m
-
-#
-# PATA SFF controllers with BMDMA
-#
-CONFIG_PATA_ALI=m
-CONFIG_PATA_AMD=m
-CONFIG_PATA_ARTOP=m
-CONFIG_PATA_ATIIXP=m
-CONFIG_PATA_ATP867X=m
-CONFIG_PATA_CMD64X=m
-CONFIG_PATA_CYPRESS=m
-CONFIG_PATA_EFAR=m
-CONFIG_PATA_HPT366=m
-CONFIG_PATA_HPT37X=m
-CONFIG_PATA_HPT3X2N=m
-CONFIG_PATA_HPT3X3=m
-CONFIG_PATA_HPT3X3_DMA=y
-CONFIG_PATA_IT8213=m
-CONFIG_PATA_IT821X=m
-CONFIG_PATA_JMICRON=m
-CONFIG_PATA_MARVELL=m
-CONFIG_PATA_NETCELL=m
-CONFIG_PATA_NINJA32=m
-CONFIG_PATA_NS87415=m
-CONFIG_PATA_OLDPIIX=m
-CONFIG_PATA_OPTIDMA=m
-CONFIG_PATA_PDC2027X=m
-CONFIG_PATA_PDC_OLD=m
-CONFIG_PATA_RADISYS=m
-CONFIG_PATA_RDC=m
-CONFIG_PATA_SCH=m
-CONFIG_PATA_SERVERWORKS=m
-CONFIG_PATA_SIL680=m
-CONFIG_PATA_SIS=m
-CONFIG_PATA_TOSHIBA=m
-CONFIG_PATA_TRIFLEX=m
-CONFIG_PATA_VIA=m
-CONFIG_PATA_WINBOND=m
-
-#
-# PIO-only SFF controllers
-#
-CONFIG_PATA_CMD640_PCI=m
-CONFIG_PATA_MPIIX=m
-CONFIG_PATA_NS87410=m
-CONFIG_PATA_OPTI=m
-CONFIG_PATA_PCMCIA=m
-# CONFIG_PATA_PLATFORM is not set
-CONFIG_PATA_RZ1000=m
-
-#
-# Generic fallback / legacy drivers
-#
-CONFIG_PATA_ACPI=m
-CONFIG_ATA_GENERIC=m
-CONFIG_PATA_LEGACY=m
-CONFIG_MD=y
-CONFIG_BLK_DEV_MD=m
-CONFIG_MD_LINEAR=m
-CONFIG_MD_RAID0=m
-CONFIG_MD_RAID1=m
-CONFIG_MD_RAID10=m
-CONFIG_MD_RAID456=m
-CONFIG_MD_MULTIPATH=m
-CONFIG_MD_FAULTY=m
-CONFIG_MD_CLUSTER=m
-CONFIG_BCACHE=m
-# CONFIG_BCACHE_DEBUG is not set
-# CONFIG_BCACHE_CLOSURES_DEBUG is not set
-CONFIG_BCACHE_ASYNC_REGISTRATION=y
-CONFIG_BLK_DEV_DM_BUILTIN=y
-CONFIG_BLK_DEV_DM=m
-CONFIG_DM_DEBUG=y
-CONFIG_DM_BUFIO=m
-# CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING is not set
-CONFIG_DM_BIO_PRISON=m
-CONFIG_DM_PERSISTENT_DATA=m
-CONFIG_DM_UNSTRIPED=m
-CONFIG_DM_CRYPT=m
-CONFIG_DM_SNAPSHOT=m
-CONFIG_DM_THIN_PROVISIONING=m
-CONFIG_DM_CACHE=m
-CONFIG_DM_CACHE_SMQ=m
-CONFIG_DM_WRITECACHE=m
-CONFIG_DM_EBS=m
-CONFIG_DM_ERA=m
-CONFIG_DM_CLONE=m
-CONFIG_DM_MIRROR=m
-CONFIG_DM_LOG_USERSPACE=m
-CONFIG_DM_RAID=m
-CONFIG_DM_ZERO=m
-CONFIG_DM_MULTIPATH=m
-CONFIG_DM_MULTIPATH_QL=m
-CONFIG_DM_MULTIPATH_ST=m
-CONFIG_DM_MULTIPATH_HST=m
-CONFIG_DM_DELAY=m
-CONFIG_DM_DUST=m
-CONFIG_DM_UEVENT=y
-CONFIG_DM_FLAKEY=m
-CONFIG_DM_VERITY=m
-CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG=y
-CONFIG_DM_VERITY_FEC=y
-CONFIG_DM_SWITCH=m
-CONFIG_DM_LOG_WRITES=m
-CONFIG_DM_INTEGRITY=m
-CONFIG_DM_ZONED=m
-CONFIG_TARGET_CORE=m
-CONFIG_TCM_IBLOCK=m
-CONFIG_TCM_FILEIO=m
-CONFIG_TCM_PSCSI=m
-CONFIG_TCM_USER2=m
-CONFIG_LOOPBACK_TARGET=m
-CONFIG_TCM_FC=m
-CONFIG_ISCSI_TARGET=m
-CONFIG_ISCSI_TARGET_CXGB4=m
-CONFIG_SBP_TARGET=m
-CONFIG_FUSION=y
-CONFIG_FUSION_SPI=m
-CONFIG_FUSION_FC=m
-CONFIG_FUSION_SAS=m
-CONFIG_FUSION_MAX_SGE=128
-CONFIG_FUSION_CTL=m
-CONFIG_FUSION_LAN=m
-# CONFIG_FUSION_LOGGING is not set
-
-#
-# IEEE 1394 (FireWire) support
-#
-CONFIG_FIREWIRE=m
-CONFIG_FIREWIRE_OHCI=m
-CONFIG_FIREWIRE_SBP2=m
-CONFIG_FIREWIRE_NET=m
-CONFIG_FIREWIRE_NOSY=m
-# end of IEEE 1394 (FireWire) support
-
-CONFIG_MACINTOSH_DRIVERS=y
-CONFIG_MAC_EMUMOUSEBTN=m
-CONFIG_NETDEVICES=y
-CONFIG_MII=m
-CONFIG_NET_CORE=y
-CONFIG_BONDING=m
-CONFIG_DUMMY=m
-CONFIG_WIREGUARD=m
-# CONFIG_WIREGUARD_DEBUG is not set
-CONFIG_EQUALIZER=m
-CONFIG_NET_FC=y
-CONFIG_IFB=m
-CONFIG_NET_TEAM=m
-CONFIG_NET_TEAM_MODE_BROADCAST=m
-CONFIG_NET_TEAM_MODE_ROUNDROBIN=m
-CONFIG_NET_TEAM_MODE_RANDOM=m
-CONFIG_NET_TEAM_MODE_ACTIVEBACKUP=m
-CONFIG_NET_TEAM_MODE_LOADBALANCE=m
-CONFIG_MACVLAN=m
-CONFIG_MACVTAP=m
-CONFIG_IPVLAN_L3S=y
-CONFIG_IPVLAN=m
-CONFIG_IPVTAP=m
-CONFIG_VXLAN=m
-CONFIG_GENEVE=m
-CONFIG_BAREUDP=m
-CONFIG_GTP=m
-CONFIG_MACSEC=m
-CONFIG_NETCONSOLE=m
-CONFIG_NETCONSOLE_DYNAMIC=y
-CONFIG_NETPOLL=y
-CONFIG_NET_POLL_CONTROLLER=y
-CONFIG_NTB_NETDEV=m
-CONFIG_RIONET=m
-CONFIG_RIONET_TX_SIZE=128
-CONFIG_RIONET_RX_SIZE=128
-CONFIG_TUN=m
-CONFIG_TAP=m
-# CONFIG_TUN_VNET_CROSS_LE is not set
-CONFIG_VETH=m
-CONFIG_VIRTIO_NET=m
-CONFIG_NLMON=m
-CONFIG_NET_VRF=m
-CONFIG_VSOCKMON=m
-CONFIG_SUNGEM_PHY=m
-# CONFIG_ARCNET is not set
-CONFIG_ATM_DRIVERS=y
-# CONFIG_ATM_DUMMY is not set
-CONFIG_ATM_TCP=m
-CONFIG_ATM_LANAI=m
-CONFIG_ATM_ENI=m
-# CONFIG_ATM_ENI_DEBUG is not set
-# CONFIG_ATM_ENI_TUNE_BURST is not set
-CONFIG_ATM_FIRESTREAM=m
-CONFIG_ATM_ZATM=m
-# CONFIG_ATM_ZATM_DEBUG is not set
-CONFIG_ATM_NICSTAR=m
-# CONFIG_ATM_NICSTAR_USE_SUNI is not set
-# CONFIG_ATM_NICSTAR_USE_IDT77105 is not set
-CONFIG_ATM_IDT77252=m
-# CONFIG_ATM_IDT77252_DEBUG is not set
-# CONFIG_ATM_IDT77252_RCV_ALL is not set
-CONFIG_ATM_IDT77252_USE_SUNI=y
-CONFIG_ATM_AMBASSADOR=m
-# CONFIG_ATM_AMBASSADOR_DEBUG is not set
-CONFIG_ATM_HORIZON=m
-# CONFIG_ATM_HORIZON_DEBUG is not set
-CONFIG_ATM_IA=m
-# CONFIG_ATM_IA_DEBUG is not set
-CONFIG_ATM_FORE200E=m
-CONFIG_ATM_FORE200E_USE_TASKLET=y
-CONFIG_ATM_FORE200E_TX_RETRY=16
-CONFIG_ATM_FORE200E_DEBUG=0
-CONFIG_ATM_HE=m
-CONFIG_ATM_HE_USE_SUNI=y
-CONFIG_ATM_SOLOS=m
-CONFIG_CAIF_DRIVERS=y
-CONFIG_CAIF_TTY=m
-CONFIG_CAIF_SPI_SLAVE=m
-CONFIG_CAIF_SPI_SYNC=y
-CONFIG_CAIF_HSI=m
-CONFIG_CAIF_VIRTIO=m
-
-#
-# Distributed Switch Architecture drivers
-#
-CONFIG_B53=m
-# CONFIG_B53_SPI_DRIVER is not set
-CONFIG_B53_MDIO_DRIVER=m
-CONFIG_B53_MMAP_DRIVER=m
-CONFIG_B53_SRAB_DRIVER=m
-CONFIG_B53_SERDES=m
-CONFIG_NET_DSA_BCM_SF2=m
-CONFIG_NET_DSA_LOOP=m
-CONFIG_NET_DSA_LANTIQ_GSWIP=m
-CONFIG_NET_DSA_MT7530=m
-CONFIG_NET_DSA_MV88E6060=m
-CONFIG_NET_DSA_MICROCHIP_KSZ_COMMON=m
-CONFIG_NET_DSA_MICROCHIP_KSZ9477=m
-CONFIG_NET_DSA_MICROCHIP_KSZ9477_I2C=m
-CONFIG_NET_DSA_MICROCHIP_KSZ9477_SPI=m
-CONFIG_NET_DSA_MICROCHIP_KSZ8795=m
-CONFIG_NET_DSA_MICROCHIP_KSZ8795_SPI=m
-CONFIG_NET_DSA_MV88E6XXX=m
-CONFIG_NET_DSA_MV88E6XXX_GLOBAL2=y
-CONFIG_NET_DSA_MV88E6XXX_PTP=y
-CONFIG_NET_DSA_AR9331=m
-CONFIG_NET_DSA_SJA1105=m
-CONFIG_NET_DSA_SJA1105_PTP=y
-CONFIG_NET_DSA_SJA1105_TAS=y
-CONFIG_NET_DSA_SJA1105_VL=y
-CONFIG_NET_DSA_QCA8K=m
-CONFIG_NET_DSA_REALTEK_SMI=m
-CONFIG_NET_DSA_SMSC_LAN9303=m
-CONFIG_NET_DSA_SMSC_LAN9303_I2C=m
-CONFIG_NET_DSA_SMSC_LAN9303_MDIO=m
-CONFIG_NET_DSA_VITESSE_VSC73XX=m
-CONFIG_NET_DSA_VITESSE_VSC73XX_SPI=m
-CONFIG_NET_DSA_VITESSE_VSC73XX_PLATFORM=m
-# end of Distributed Switch Architecture drivers
-
-CONFIG_ETHERNET=y
-CONFIG_MDIO=m
-CONFIG_NET_VENDOR_3COM=y
-CONFIG_PCMCIA_3C574=m
-CONFIG_PCMCIA_3C589=m
-CONFIG_VORTEX=m
-CONFIG_TYPHOON=m
-CONFIG_NET_VENDOR_ADAPTEC=y
-CONFIG_ADAPTEC_STARFIRE=m
-CONFIG_NET_VENDOR_AGERE=y
-CONFIG_ET131X=m
-CONFIG_NET_VENDOR_ALACRITECH=y
-CONFIG_SLICOSS=m
-CONFIG_NET_VENDOR_ALTEON=y
-CONFIG_ACENIC=m
-# CONFIG_ACENIC_OMIT_TIGON_I is not set
-CONFIG_ALTERA_TSE=m
-CONFIG_NET_VENDOR_AMAZON=y
-CONFIG_ENA_ETHERNET=m
-CONFIG_NET_VENDOR_AMD=y
-CONFIG_AMD8111_ETH=m
-CONFIG_PCNET32=m
-CONFIG_PCMCIA_NMCLAN=m
-CONFIG_AMD_XGBE=m
-CONFIG_AMD_XGBE_DCB=y
-CONFIG_AMD_XGBE_HAVE_ECC=y
-CONFIG_NET_VENDOR_AQUANTIA=y
-CONFIG_AQTION=m
-CONFIG_NET_VENDOR_ARC=y
-CONFIG_NET_VENDOR_ATHEROS=y
-CONFIG_ATL2=m
-CONFIG_ATL1=m
-CONFIG_ATL1E=m
-CONFIG_ATL1C=m
-CONFIG_ALX=m
-CONFIG_NET_VENDOR_AURORA=y
-CONFIG_AURORA_NB8800=m
-CONFIG_NET_VENDOR_BROADCOM=y
-CONFIG_B44=m
-CONFIG_B44_PCI_AUTOSELECT=y
-CONFIG_B44_PCICORE_AUTOSELECT=y
-CONFIG_B44_PCI=y
-CONFIG_BCMGENET=m
-CONFIG_BNX2=m
-CONFIG_CNIC=m
-CONFIG_TIGON3=m
-CONFIG_TIGON3_HWMON=y
-CONFIG_BNX2X=m
-CONFIG_BNX2X_SRIOV=y
-CONFIG_SYSTEMPORT=m
-CONFIG_BNXT=m
-CONFIG_BNXT_SRIOV=y
-CONFIG_BNXT_FLOWER_OFFLOAD=y
-CONFIG_BNXT_DCB=y
-CONFIG_BNXT_HWMON=y
-CONFIG_NET_VENDOR_BROCADE=y
-CONFIG_BNA=m
-CONFIG_NET_VENDOR_CADENCE=y
-CONFIG_MACB=m
-CONFIG_MACB_USE_HWSTAMP=y
-CONFIG_MACB_PCI=m
-CONFIG_NET_VENDOR_CAVIUM=y
-CONFIG_THUNDER_NIC_PF=m
-CONFIG_THUNDER_NIC_VF=m
-CONFIG_THUNDER_NIC_BGX=m
-CONFIG_THUNDER_NIC_RGX=m
-CONFIG_CAVIUM_PTP=m
-CONFIG_LIQUIDIO=m
-CONFIG_LIQUIDIO_VF=m
-CONFIG_NET_VENDOR_CHELSIO=y
-CONFIG_CHELSIO_T1=m
-CONFIG_CHELSIO_T1_1G=y
-CONFIG_CHELSIO_T3=m
-CONFIG_CHELSIO_T4=m
-CONFIG_CHELSIO_T4_DCB=y
-CONFIG_CHELSIO_T4_FCOE=y
-CONFIG_CHELSIO_T4VF=m
-CONFIG_CHELSIO_LIB=m
-CONFIG_NET_VENDOR_CISCO=y
-CONFIG_ENIC=m
-CONFIG_NET_VENDOR_CORTINA=y
-CONFIG_GEMINI_ETHERNET=m
-CONFIG_CX_ECAT=m
-CONFIG_DNET=m
-CONFIG_NET_VENDOR_DEC=y
-CONFIG_NET_TULIP=y
-CONFIG_DE2104X=m
-CONFIG_DE2104X_DSL=0
-CONFIG_TULIP=m
-CONFIG_TULIP_MWI=y
-CONFIG_TULIP_MMIO=y
-CONFIG_TULIP_NAPI=y
-CONFIG_TULIP_NAPI_HW_MITIGATION=y
-CONFIG_DE4X5=m
-CONFIG_WINBOND_840=m
-CONFIG_DM9102=m
-CONFIG_ULI526X=m
-CONFIG_PCMCIA_XIRCOM=m
-CONFIG_NET_VENDOR_DLINK=y
-CONFIG_DL2K=m
-CONFIG_SUNDANCE=m
-# CONFIG_SUNDANCE_MMIO is not set
-CONFIG_NET_VENDOR_EMULEX=y
-CONFIG_BE2NET=m
-CONFIG_BE2NET_HWMON=y
-CONFIG_BE2NET_BE2=y
-CONFIG_BE2NET_BE3=y
-CONFIG_BE2NET_LANCER=y
-CONFIG_BE2NET_SKYHAWK=y
-CONFIG_NET_VENDOR_EZCHIP=y
-CONFIG_EZCHIP_NPS_MANAGEMENT_ENET=m
-CONFIG_NET_VENDOR_FUJITSU=y
-CONFIG_PCMCIA_FMVJ18X=m
-CONFIG_NET_VENDOR_GOOGLE=y
-CONFIG_GVE=m
-CONFIG_NET_VENDOR_HUAWEI=y
-CONFIG_HINIC=m
-CONFIG_NET_VENDOR_I825XX=y
-CONFIG_NET_VENDOR_INTEL=y
-CONFIG_E100=m
-CONFIG_E1000=m
-CONFIG_E1000E=m
-CONFIG_E1000E_HWTS=y
-CONFIG_IGB=m
-CONFIG_IGB_HWMON=y
-CONFIG_IGB_DCA=y
-CONFIG_IGBVF=m
-CONFIG_IXGB=m
-CONFIG_IXGBE=m
-CONFIG_IXGBE_HWMON=y
-CONFIG_IXGBE_DCA=y
-CONFIG_IXGBE_DCB=y
-# CONFIG_IXGBE_IPSEC is not set
-CONFIG_IXGBEVF=m
-CONFIG_IXGBEVF_IPSEC=y
-CONFIG_I40E=m
-CONFIG_I40E_DCB=y
-CONFIG_IAVF=m
-CONFIG_I40EVF=m
-CONFIG_ICE=m
-CONFIG_FM10K=m
-CONFIG_IGC=m
-CONFIG_JME=m
-CONFIG_NET_VENDOR_MARVELL=y
-CONFIG_MVMDIO=m
-CONFIG_SKGE=m
-# CONFIG_SKGE_DEBUG is not set
-CONFIG_SKGE_GENESIS=y
-CONFIG_SKY2=m
-# CONFIG_SKY2_DEBUG is not set
-CONFIG_NET_VENDOR_MELLANOX=y
-CONFIG_MLX4_EN=m
-CONFIG_MLX4_EN_DCB=y
-CONFIG_MLX4_CORE=m
-CONFIG_MLX4_DEBUG=y
-CONFIG_MLX4_CORE_GEN2=y
-CONFIG_MLX5_CORE=m
-CONFIG_MLX5_ACCEL=y
-CONFIG_MLX5_FPGA=y
-CONFIG_MLX5_CORE_EN=y
-CONFIG_MLX5_EN_ARFS=y
-CONFIG_MLX5_EN_RXNFC=y
-CONFIG_MLX5_MPFS=y
-CONFIG_MLX5_ESWITCH=y
-CONFIG_MLX5_CLS_ACT=y
-CONFIG_MLX5_TC_CT=y
-CONFIG_MLX5_CORE_EN_DCB=y
-CONFIG_MLX5_CORE_IPOIB=y
-CONFIG_MLX5_FPGA_IPSEC=y
-# CONFIG_MLX5_IPSEC is not set
-CONFIG_MLX5_EN_IPSEC=y
-CONFIG_MLX5_FPGA_TLS=y
-CONFIG_MLX5_TLS=y
-CONFIG_MLX5_EN_TLS=y
-CONFIG_MLX5_SW_STEERING=y
-CONFIG_MLXSW_CORE=m
-CONFIG_MLXSW_CORE_HWMON=y
-CONFIG_MLXSW_CORE_THERMAL=y
-CONFIG_MLXSW_PCI=m
-CONFIG_MLXSW_I2C=m
-CONFIG_MLXSW_SWITCHIB=m
-CONFIG_MLXSW_SWITCHX2=m
-CONFIG_MLXSW_SPECTRUM=m
-CONFIG_MLXSW_SPECTRUM_DCB=y
-CONFIG_MLXSW_MINIMAL=m
-CONFIG_MLXFW=m
-CONFIG_NET_VENDOR_MICREL=y
-CONFIG_KS8842=m
-CONFIG_KS8851=m
-CONFIG_KS8851_MLL=m
-CONFIG_KSZ884X_PCI=m
-CONFIG_NET_VENDOR_MICROCHIP=y
-CONFIG_ENC28J60=m
-# CONFIG_ENC28J60_WRITEVERIFY is not set
-CONFIG_ENCX24J600=m
-CONFIG_LAN743X=m
-CONFIG_NET_VENDOR_MICROSEMI=y
-CONFIG_MSCC_OCELOT_SWITCH_LIB=m
-CONFIG_MSCC_OCELOT_SWITCH=m
-CONFIG_NET_VENDOR_MYRI=y
-CONFIG_MYRI10GE=m
-CONFIG_MYRI10GE_DCA=y
-CONFIG_FEALNX=m
-CONFIG_NET_VENDOR_NATSEMI=y
-CONFIG_NATSEMI=m
-CONFIG_NS83820=m
-CONFIG_NET_VENDOR_NETERION=y
-CONFIG_S2IO=m
-CONFIG_VXGE=m
-# CONFIG_VXGE_DEBUG_TRACE_ALL is not set
-CONFIG_NET_VENDOR_NETRONOME=y
-CONFIG_NFP=m
-CONFIG_NFP_APP_FLOWER=y
-CONFIG_NFP_APP_ABM_NIC=y
-# CONFIG_NFP_DEBUG is not set
-CONFIG_NET_VENDOR_NI=y
-CONFIG_NI_XGE_MANAGEMENT_ENET=m
-CONFIG_NET_VENDOR_8390=y
-CONFIG_PCMCIA_AXNET=m
-CONFIG_NE2K_PCI=m
-CONFIG_PCMCIA_PCNET=m
-CONFIG_NET_VENDOR_NVIDIA=y
-CONFIG_FORCEDETH=m
-CONFIG_NET_VENDOR_OKI=y
-CONFIG_ETHOC=m
-CONFIG_NET_VENDOR_PACKET_ENGINES=y
-CONFIG_HAMACHI=m
-CONFIG_YELLOWFIN=m
-CONFIG_NET_VENDOR_PENSANDO=y
-CONFIG_IONIC=m
-CONFIG_NET_VENDOR_QLOGIC=y
-CONFIG_QLA3XXX=m
-CONFIG_QLCNIC=m
-CONFIG_QLCNIC_SRIOV=y
-CONFIG_QLCNIC_DCB=y
-CONFIG_QLCNIC_HWMON=y
-CONFIG_NETXEN_NIC=m
-CONFIG_QED=m
-CONFIG_QED_LL2=y
-CONFIG_QED_SRIOV=y
-CONFIG_QEDE=m
-CONFIG_QED_RDMA=y
-CONFIG_QED_ISCSI=y
-CONFIG_QED_FCOE=y
-CONFIG_QED_OOO=y
-CONFIG_NET_VENDOR_QUALCOMM=y
-CONFIG_QCA7000=m
-CONFIG_QCA7000_SPI=m
-CONFIG_QCA7000_UART=m
-CONFIG_QCOM_EMAC=m
-CONFIG_RMNET=m
-CONFIG_NET_VENDOR_RDC=y
-CONFIG_R6040=m
-CONFIG_NET_VENDOR_REALTEK=y
-CONFIG_ATP=m
-CONFIG_8139CP=m
-CONFIG_8139TOO=m
-# CONFIG_8139TOO_PIO is not set
-CONFIG_8139TOO_TUNE_TWISTER=y
-CONFIG_8139TOO_8129=y
-# CONFIG_8139_OLD_RX_RESET is not set
-CONFIG_R8169=m
-CONFIG_NET_VENDOR_RENESAS=y
-CONFIG_NET_VENDOR_ROCKER=y
-CONFIG_ROCKER=m
-CONFIG_NET_VENDOR_SAMSUNG=y
-CONFIG_SXGBE_ETH=m
-CONFIG_NET_VENDOR_SEEQ=y
-CONFIG_NET_VENDOR_SOLARFLARE=y
-CONFIG_SFC=m
-CONFIG_SFC_MTD=y
-CONFIG_SFC_MCDI_MON=y
-CONFIG_SFC_SRIOV=y
-CONFIG_SFC_MCDI_LOGGING=y
-CONFIG_SFC_FALCON=m
-CONFIG_SFC_FALCON_MTD=y
-CONFIG_NET_VENDOR_SILAN=y
-CONFIG_SC92031=m
-CONFIG_NET_VENDOR_SIS=y
-CONFIG_SIS900=m
-CONFIG_SIS190=m
-CONFIG_NET_VENDOR_SMSC=y
-CONFIG_PCMCIA_SMC91C92=m
-CONFIG_EPIC100=m
-CONFIG_SMSC911X=m
-CONFIG_SMSC9420=m
-CONFIG_NET_VENDOR_SOCIONEXT=y
-CONFIG_NET_VENDOR_STMICRO=y
-CONFIG_STMMAC_ETH=m
-# CONFIG_STMMAC_SELFTESTS is not set
-CONFIG_STMMAC_PLATFORM=m
-CONFIG_DWMAC_DWC_QOS_ETH=m
-CONFIG_DWMAC_GENERIC=m
-CONFIG_DWMAC_INTEL=m
-CONFIG_STMMAC_PCI=m
-CONFIG_NET_VENDOR_SUN=y
-CONFIG_HAPPYMEAL=m
-CONFIG_SUNGEM=m
-CONFIG_CASSINI=m
-CONFIG_NIU=m
-CONFIG_NET_VENDOR_SYNOPSYS=y
-CONFIG_DWC_XLGMAC=m
-CONFIG_DWC_XLGMAC_PCI=m
-CONFIG_NET_VENDOR_TEHUTI=y
-CONFIG_TEHUTI=m
-CONFIG_NET_VENDOR_TI=y
-# CONFIG_TI_CPSW_PHY_SEL is not set
-CONFIG_TLAN=m
-CONFIG_NET_VENDOR_VIA=y
-CONFIG_VIA_RHINE=m
-CONFIG_VIA_RHINE_MMIO=y
-CONFIG_VIA_VELOCITY=m
-CONFIG_NET_VENDOR_WIZNET=y
-CONFIG_WIZNET_W5100=m
-CONFIG_WIZNET_W5300=m
-# CONFIG_WIZNET_BUS_DIRECT is not set
-# CONFIG_WIZNET_BUS_INDIRECT is not set
-CONFIG_WIZNET_BUS_ANY=y
-CONFIG_WIZNET_W5100_SPI=m
-CONFIG_NET_VENDOR_XILINX=y
-CONFIG_XILINX_AXI_EMAC=m
-CONFIG_XILINX_LL_TEMAC=m
-CONFIG_NET_VENDOR_XIRCOM=y
-CONFIG_PCMCIA_XIRC2PS=m
-CONFIG_FDDI=m
-CONFIG_DEFXX=m
-CONFIG_DEFXX_MMIO=y
-CONFIG_SKFP=m
-# CONFIG_HIPPI is not set
-CONFIG_NET_SB1000=m
-CONFIG_MDIO_DEVICE=m
-CONFIG_MDIO_BUS=m
-CONFIG_MDIO_DEVRES=m
-CONFIG_MDIO_BCM_UNIMAC=m
-CONFIG_MDIO_BITBANG=m
-CONFIG_MDIO_BUS_MUX=m
-CONFIG_MDIO_BUS_MUX_GPIO=m
-CONFIG_MDIO_BUS_MUX_MMIOREG=m
-CONFIG_MDIO_BUS_MUX_MULTIPLEXER=m
-CONFIG_MDIO_CAVIUM=m
-CONFIG_MDIO_GPIO=m
-CONFIG_MDIO_HISI_FEMAC=m
-CONFIG_MDIO_I2C=m
-CONFIG_MDIO_IPQ4019=m
-CONFIG_MDIO_IPQ8064=m
-CONFIG_MDIO_MSCC_MIIM=m
-CONFIG_MDIO_MVUSB=m
-CONFIG_MDIO_OCTEON=m
-CONFIG_MDIO_THUNDER=m
-CONFIG_MDIO_XPCS=m
-CONFIG_PHYLINK=m
-CONFIG_PHYLIB=m
-CONFIG_SWPHY=y
-CONFIG_LED_TRIGGER_PHY=y
-
-#
-# MII PHY device drivers
-#
-CONFIG_SFP=m
-CONFIG_ADIN_PHY=m
-CONFIG_AMD_PHY=m
-CONFIG_AQUANTIA_PHY=m
-CONFIG_AX88796B_PHY=m
-CONFIG_BCM7XXX_PHY=m
-CONFIG_BCM87XX_PHY=m
-CONFIG_BCM_NET_PHYLIB=m
-CONFIG_BROADCOM_PHY=m
-CONFIG_BCM54140_PHY=m
-CONFIG_BCM84881_PHY=m
-CONFIG_CICADA_PHY=m
-CONFIG_CORTINA_PHY=m
-CONFIG_DAVICOM_PHY=m
-CONFIG_DP83822_PHY=m
-CONFIG_DP83TC811_PHY=m
-CONFIG_DP83848_PHY=m
-CONFIG_DP83867_PHY=m
-CONFIG_DP83869_PHY=m
-CONFIG_FIXED_PHY=m
-CONFIG_ICPLUS_PHY=m
-CONFIG_INTEL_XWAY_PHY=m
-CONFIG_LSI_ET1011C_PHY=m
-CONFIG_LXT_PHY=m
-CONFIG_MARVELL_PHY=m
-CONFIG_MARVELL_10G_PHY=m
-CONFIG_MICREL_PHY=m
-CONFIG_MICROCHIP_PHY=m
-CONFIG_MICROCHIP_T1_PHY=m
-CONFIG_MICROSEMI_PHY=m
-CONFIG_NATIONAL_PHY=m
-CONFIG_NXP_TJA11XX_PHY=m
-CONFIG_AT803X_PHY=m
-CONFIG_QSEMI_PHY=m
-CONFIG_REALTEK_PHY=m
-CONFIG_RENESAS_PHY=m
-CONFIG_ROCKCHIP_PHY=m
-CONFIG_SMSC_PHY=m
-CONFIG_STE10XP=m
-CONFIG_TERANETICS_PHY=m
-CONFIG_VITESSE_PHY=m
-CONFIG_XILINX_GMII2RGMII=m
-CONFIG_MICREL_KS8995MA=m
-CONFIG_PLIP=m
-CONFIG_PPP=m
-CONFIG_PPP_BSDCOMP=m
-CONFIG_PPP_DEFLATE=m
-CONFIG_PPP_FILTER=y
-CONFIG_PPP_MPPE=m
-CONFIG_PPP_MULTILINK=y
-CONFIG_PPPOATM=m
-CONFIG_PPPOE=m
-CONFIG_PPTP=m
-CONFIG_PPPOL2TP=m
-CONFIG_PPP_ASYNC=m
-CONFIG_PPP_SYNC_TTY=m
-CONFIG_SLIP=m
-CONFIG_SLHC=m
-CONFIG_SLIP_COMPRESSED=y
-CONFIG_SLIP_SMART=y
-CONFIG_SLIP_MODE_SLIP6=y
-CONFIG_USB_NET_DRIVERS=m
-CONFIG_USB_CATC=m
-CONFIG_USB_KAWETH=m
-CONFIG_USB_PEGASUS=m
-CONFIG_USB_RTL8150=m
-CONFIG_USB_RTL8152=m
-CONFIG_USB_LAN78XX=m
-CONFIG_USB_USBNET=m
-CONFIG_USB_NET_AX8817X=m
-CONFIG_USB_NET_AX88179_178A=m
-CONFIG_USB_NET_CDCETHER=m
-CONFIG_USB_NET_CDC_EEM=m
-CONFIG_USB_NET_CDC_NCM=m
-CONFIG_USB_NET_HUAWEI_CDC_NCM=m
-CONFIG_USB_NET_CDC_MBIM=m
-CONFIG_USB_NET_DM9601=m
-CONFIG_USB_NET_SR9700=m
-CONFIG_USB_NET_SR9800=m
-CONFIG_USB_NET_SMSC75XX=m
-CONFIG_USB_NET_SMSC95XX=m
-CONFIG_USB_NET_GL620A=m
-CONFIG_USB_NET_NET1080=m
-CONFIG_USB_NET_PLUSB=m
-CONFIG_USB_NET_MCS7830=m
-CONFIG_USB_NET_RNDIS_HOST=m
-CONFIG_USB_NET_CDC_SUBSET_ENABLE=m
-CONFIG_USB_NET_CDC_SUBSET=m
-CONFIG_USB_ALI_M5632=y
-CONFIG_USB_AN2720=y
-CONFIG_USB_BELKIN=y
-CONFIG_USB_ARMLINUX=y
-CONFIG_USB_EPSON2888=y
-CONFIG_USB_KC2190=y
-CONFIG_USB_NET_ZAURUS=m
-CONFIG_USB_NET_CX82310_ETH=m
-CONFIG_USB_NET_KALMIA=m
-CONFIG_USB_NET_QMI_WWAN=m
-CONFIG_USB_HSO=m
-CONFIG_USB_NET_INT51X1=m
-CONFIG_USB_CDC_PHONET=m
-CONFIG_USB_IPHETH=m
-CONFIG_USB_SIERRA_NET=m
-CONFIG_USB_VL600=m
-CONFIG_USB_NET_CH9200=m
-CONFIG_USB_NET_AQC111=m
-CONFIG_WLAN=y
-# CONFIG_WIRELESS_WDS is not set
-CONFIG_WLAN_VENDOR_ADMTEK=y
-CONFIG_ADM8211=m
-CONFIG_ATH_COMMON=m
-CONFIG_WLAN_VENDOR_ATH=y
-# CONFIG_ATH_DEBUG is not set
-CONFIG_ATH5K=m
-CONFIG_ATH5K_DEBUG=y
-CONFIG_ATH5K_TRACER=y
-CONFIG_ATH5K_PCI=y
-CONFIG_ATH9K_HW=m
-CONFIG_ATH9K_COMMON=m
-CONFIG_ATH9K_COMMON_DEBUG=y
-CONFIG_ATH9K_BTCOEX_SUPPORT=y
-CONFIG_ATH9K=m
-CONFIG_ATH9K_PCI=y
-CONFIG_ATH9K_AHB=y
-CONFIG_ATH9K_DEBUGFS=y
-CONFIG_ATH9K_STATION_STATISTICS=y
-CONFIG_ATH9K_DYNACK=y
-CONFIG_ATH9K_WOW=y
-CONFIG_ATH9K_RFKILL=y
-CONFIG_ATH9K_CHANNEL_CONTEXT=y
-CONFIG_ATH9K_PCOEM=y
-CONFIG_ATH9K_PCI_NO_EEPROM=m
-CONFIG_ATH9K_HTC=m
-CONFIG_ATH9K_HTC_DEBUGFS=y
-CONFIG_ATH9K_HWRNG=y
-CONFIG_ATH9K_COMMON_SPECTRAL=y
-CONFIG_CARL9170=m
-CONFIG_CARL9170_LEDS=y
-CONFIG_CARL9170_DEBUGFS=y
-CONFIG_CARL9170_WPC=y
-# CONFIG_CARL9170_HWRNG is not set
-CONFIG_ATH6KL=m
-CONFIG_ATH6KL_SDIO=m
-CONFIG_ATH6KL_USB=m
-CONFIG_ATH6KL_DEBUG=y
-CONFIG_ATH6KL_TRACING=y
-CONFIG_AR5523=m
-CONFIG_WIL6210=m
-CONFIG_WIL6210_ISR_COR=y
-CONFIG_WIL6210_TRACING=y
-CONFIG_WIL6210_DEBUGFS=y
-CONFIG_ATH10K=m
-CONFIG_ATH10K_CE=y
-CONFIG_ATH10K_PCI=m
-CONFIG_ATH10K_AHB=y
-CONFIG_ATH10K_SDIO=m
-CONFIG_ATH10K_USB=m
-CONFIG_ATH10K_DEBUG=y
-CONFIG_ATH10K_DEBUGFS=y
-CONFIG_ATH10K_SPECTRAL=y
-CONFIG_ATH10K_TRACING=y
-CONFIG_WCN36XX=m
-CONFIG_WCN36XX_DEBUGFS=y
-CONFIG_WLAN_VENDOR_ATMEL=y
-CONFIG_ATMEL=m
-CONFIG_PCI_ATMEL=m
-CONFIG_PCMCIA_ATMEL=m
-CONFIG_AT76C50X_USB=m
-CONFIG_WLAN_VENDOR_BROADCOM=y
-CONFIG_B43=m
-CONFIG_B43_BCMA=y
-CONFIG_B43_SSB=y
-CONFIG_B43_BUSES_BCMA_AND_SSB=y
-# CONFIG_B43_BUSES_BCMA is not set
-# CONFIG_B43_BUSES_SSB is not set
-CONFIG_B43_PCI_AUTOSELECT=y
-CONFIG_B43_PCICORE_AUTOSELECT=y
-CONFIG_B43_SDIO=y
-CONFIG_B43_BCMA_PIO=y
-CONFIG_B43_PIO=y
-CONFIG_B43_PHY_G=y
-CONFIG_B43_PHY_N=y
-CONFIG_B43_PHY_LP=y
-CONFIG_B43_PHY_HT=y
-CONFIG_B43_LEDS=y
-CONFIG_B43_HWRNG=y
-# CONFIG_B43_DEBUG is not set
-CONFIG_B43LEGACY=m
-CONFIG_B43LEGACY_PCI_AUTOSELECT=y
-CONFIG_B43LEGACY_PCICORE_AUTOSELECT=y
-CONFIG_B43LEGACY_LEDS=y
-CONFIG_B43LEGACY_HWRNG=y
-CONFIG_B43LEGACY_DEBUG=y
-CONFIG_B43LEGACY_DMA=y
-CONFIG_B43LEGACY_PIO=y
-CONFIG_B43LEGACY_DMA_AND_PIO_MODE=y
-# CONFIG_B43LEGACY_DMA_MODE is not set
-# CONFIG_B43LEGACY_PIO_MODE is not set
-CONFIG_BRCMUTIL=m
-CONFIG_BRCMSMAC=m
-CONFIG_BRCMFMAC=m
-CONFIG_BRCMFMAC_PROTO_BCDC=y
-CONFIG_BRCMFMAC_PROTO_MSGBUF=y
-CONFIG_BRCMFMAC_SDIO=y
-CONFIG_BRCMFMAC_USB=y
-CONFIG_BRCMFMAC_PCIE=y
-CONFIG_BRCM_TRACING=y
-CONFIG_BRCMDBG=y
-CONFIG_WLAN_VENDOR_CISCO=y
-CONFIG_AIRO=m
-CONFIG_AIRO_CS=m
-CONFIG_WLAN_VENDOR_INTEL=y
-CONFIG_IPW2100=m
-CONFIG_IPW2100_MONITOR=y
-# CONFIG_IPW2100_DEBUG is not set
-CONFIG_IPW2200=m
-CONFIG_IPW2200_MONITOR=y
-CONFIG_IPW2200_RADIOTAP=y
-CONFIG_IPW2200_PROMISCUOUS=y
-CONFIG_IPW2200_QOS=y
-# CONFIG_IPW2200_DEBUG is not set
-CONFIG_LIBIPW=m
-# CONFIG_LIBIPW_DEBUG is not set
-CONFIG_IWLEGACY=m
-CONFIG_IWL4965=m
-CONFIG_IWL3945=m
-
-#
-# iwl3945 / iwl4965 Debugging Options
-#
-CONFIG_IWLEGACY_DEBUG=y
-CONFIG_IWLEGACY_DEBUGFS=y
-# end of iwl3945 / iwl4965 Debugging Options
-
-CONFIG_IWLWIFI=m
-CONFIG_IWLWIFI_LEDS=y
-CONFIG_IWLDVM=m
-CONFIG_IWLMVM=m
-CONFIG_IWLWIFI_OPMODE_MODULAR=y
-# CONFIG_IWLWIFI_BCAST_FILTERING is not set
-
-#
-# Debugging Options
-#
-CONFIG_IWLWIFI_DEBUG=y
-CONFIG_IWLWIFI_DEBUGFS=y
-CONFIG_IWLWIFI_DEVICE_TRACING=y
-# end of Debugging Options
-
-CONFIG_WLAN_VENDOR_INTERSIL=y
-CONFIG_HOSTAP=m
-CONFIG_HOSTAP_FIRMWARE=y
-CONFIG_HOSTAP_FIRMWARE_NVRAM=y
-CONFIG_HOSTAP_PLX=m
-CONFIG_HOSTAP_PCI=m
-CONFIG_HOSTAP_CS=m
-CONFIG_HERMES=m
-CONFIG_HERMES_PRISM=y
-CONFIG_HERMES_CACHE_FW_ON_INIT=y
-CONFIG_PLX_HERMES=m
-CONFIG_TMD_HERMES=m
-CONFIG_NORTEL_HERMES=m
-CONFIG_PCI_HERMES=m
-CONFIG_PCMCIA_HERMES=m
-CONFIG_PCMCIA_SPECTRUM=m
-CONFIG_ORINOCO_USB=m
-CONFIG_P54_COMMON=m
-CONFIG_P54_USB=m
-CONFIG_P54_PCI=m
-CONFIG_P54_SPI=m
-# CONFIG_P54_SPI_DEFAULT_EEPROM is not set
-CONFIG_P54_LEDS=y
-CONFIG_PRISM54=m
-CONFIG_WLAN_VENDOR_MARVELL=y
-CONFIG_LIBERTAS=m
-CONFIG_LIBERTAS_USB=m
-CONFIG_LIBERTAS_CS=m
-CONFIG_LIBERTAS_SDIO=m
-CONFIG_LIBERTAS_SPI=m
-# CONFIG_LIBERTAS_DEBUG is not set
-CONFIG_LIBERTAS_MESH=y
-CONFIG_LIBERTAS_THINFIRM=m
-# CONFIG_LIBERTAS_THINFIRM_DEBUG is not set
-CONFIG_LIBERTAS_THINFIRM_USB=m
-CONFIG_MWIFIEX=m
-CONFIG_MWIFIEX_SDIO=m
-CONFIG_MWIFIEX_PCIE=m
-CONFIG_MWIFIEX_USB=m
-CONFIG_MWL8K=m
-CONFIG_WLAN_VENDOR_MEDIATEK=y
-CONFIG_MT7601U=m
-CONFIG_MT76_CORE=m
-CONFIG_MT76_LEDS=y
-CONFIG_MT76_USB=m
-CONFIG_MT76x02_LIB=m
-CONFIG_MT76x02_USB=m
-CONFIG_MT76x0_COMMON=m
-CONFIG_MT76x0U=m
-CONFIG_MT76x0E=m
-CONFIG_MT76x2_COMMON=m
-CONFIG_MT76x2E=m
-CONFIG_MT76x2U=m
-CONFIG_MT7603E=m
-CONFIG_MT7615_COMMON=m
-CONFIG_MT7615E=m
-CONFIG_MT7663_USB_SDIO_COMMON=m
-CONFIG_MT7663U=m
-# CONFIG_MT7663S is not set
-CONFIG_MT7915E=m
-CONFIG_WLAN_VENDOR_MICROCHIP=y
-CONFIG_WILC1000=m
-CONFIG_WILC1000_SDIO=m
-CONFIG_WILC1000_SPI=m
-# CONFIG_WILC1000_HW_OOB_INTR is not set
-CONFIG_WLAN_VENDOR_RALINK=y
-CONFIG_RT2X00=m
-CONFIG_RT2400PCI=m
-CONFIG_RT2500PCI=m
-CONFIG_RT61PCI=m
-CONFIG_RT2800PCI=m
-CONFIG_RT2800PCI_RT33XX=y
-CONFIG_RT2800PCI_RT35XX=y
-CONFIG_RT2800PCI_RT53XX=y
-CONFIG_RT2800PCI_RT3290=y
-CONFIG_RT2500USB=m
-CONFIG_RT73USB=m
-CONFIG_RT2800USB=m
-CONFIG_RT2800USB_RT33XX=y
-CONFIG_RT2800USB_RT35XX=y
-CONFIG_RT2800USB_RT3573=y
-CONFIG_RT2800USB_RT53XX=y
-CONFIG_RT2800USB_RT55XX=y
-CONFIG_RT2800USB_UNKNOWN=y
-CONFIG_RT2800_LIB=m
-CONFIG_RT2800_LIB_MMIO=m
-CONFIG_RT2X00_LIB_MMIO=m
-CONFIG_RT2X00_LIB_PCI=m
-CONFIG_RT2X00_LIB_USB=m
-CONFIG_RT2X00_LIB=m
-CONFIG_RT2X00_LIB_FIRMWARE=y
-CONFIG_RT2X00_LIB_CRYPTO=y
-CONFIG_RT2X00_LIB_LEDS=y
-CONFIG_RT2X00_LIB_DEBUGFS=y
-# CONFIG_RT2X00_DEBUG is not set
-CONFIG_WLAN_VENDOR_REALTEK=y
-CONFIG_RTL8180=m
-CONFIG_RTL8187=m
-CONFIG_RTL8187_LEDS=y
-CONFIG_RTL_CARDS=m
-CONFIG_RTL8192CE=m
-CONFIG_RTL8192SE=m
-CONFIG_RTL8192DE=m
-CONFIG_RTL8723AE=m
-CONFIG_RTL8723BE=m
-CONFIG_RTL8188EE=m
-CONFIG_RTL8192EE=m
-CONFIG_RTL8821AE=m
-CONFIG_RTL8192CU=m
-CONFIG_RTLWIFI=m
-CONFIG_RTLWIFI_PCI=m
-CONFIG_RTLWIFI_USB=m
-CONFIG_RTLWIFI_DEBUG=y
-CONFIG_RTL8192C_COMMON=m
-CONFIG_RTL8723_COMMON=m
-CONFIG_RTLBTCOEXIST=m
-CONFIG_RTL8XXXU=m
-CONFIG_RTL8XXXU_UNTESTED=y
-CONFIG_RTW88=m
-CONFIG_RTW88_CORE=m
-CONFIG_RTW88_PCI=m
-CONFIG_RTW88_8822B=m
-CONFIG_RTW88_8822C=m
-CONFIG_RTW88_8723D=m
-CONFIG_RTW88_8821C=m
-CONFIG_RTW88_8822BE=m
-CONFIG_RTW88_8822CE=m
-CONFIG_RTW88_8723DE=m
-CONFIG_RTW88_8821CE=m
-CONFIG_RTW88_DEBUG=y
-CONFIG_RTW88_DEBUGFS=y
-CONFIG_WLAN_VENDOR_RSI=y
-CONFIG_RSI_91X=m
-CONFIG_RSI_DEBUGFS=y
-CONFIG_RSI_SDIO=m
-CONFIG_RSI_USB=m
-CONFIG_RSI_COEX=y
-CONFIG_WLAN_VENDOR_ST=y
-CONFIG_CW1200=m
-CONFIG_CW1200_WLAN_SDIO=m
-CONFIG_CW1200_WLAN_SPI=m
-CONFIG_WLAN_VENDOR_TI=y
-CONFIG_WL1251=m
-CONFIG_WL1251_SPI=m
-CONFIG_WL1251_SDIO=m
-CONFIG_WL12XX=m
-CONFIG_WL18XX=m
-CONFIG_WLCORE=m
-CONFIG_WLCORE_SPI=m
-CONFIG_WLCORE_SDIO=m
-CONFIG_WILINK_PLATFORM_DATA=y
-CONFIG_WLAN_VENDOR_ZYDAS=y
-CONFIG_USB_ZD1201=m
-CONFIG_ZD1211RW=m
-# CONFIG_ZD1211RW_DEBUG is not set
-CONFIG_WLAN_VENDOR_QUANTENNA=y
-CONFIG_QTNFMAC=m
-CONFIG_QTNFMAC_PCIE=m
-CONFIG_PCMCIA_RAYCS=m
-CONFIG_PCMCIA_WL3501=m
-CONFIG_MAC80211_HWSIM=m
-CONFIG_USB_NET_RNDIS_WLAN=m
-CONFIG_VIRT_WIFI=m
-
-#
-# WiMAX Wireless Broadband devices
-#
-CONFIG_WIMAX_I2400M=m
-CONFIG_WIMAX_I2400M_USB=m
-CONFIG_WIMAX_I2400M_DEBUG_LEVEL=8
-# end of WiMAX Wireless Broadband devices
-
-# CONFIG_WAN is not set
-CONFIG_IEEE802154_DRIVERS=m
-CONFIG_IEEE802154_FAKELB=m
-CONFIG_IEEE802154_AT86RF230=m
-# CONFIG_IEEE802154_AT86RF230_DEBUGFS is not set
-CONFIG_IEEE802154_MRF24J40=m
-CONFIG_IEEE802154_CC2520=m
-CONFIG_IEEE802154_ATUSB=m
-CONFIG_IEEE802154_ADF7242=m
-CONFIG_IEEE802154_CA8210=m
-# CONFIG_IEEE802154_CA8210_DEBUGFS is not set
-CONFIG_IEEE802154_MCR20A=m
-CONFIG_IEEE802154_HWSIM=m
-CONFIG_XEN_NETDEV_FRONTEND=m
-CONFIG_XEN_NETDEV_BACKEND=m
-CONFIG_VMXNET3=m
-CONFIG_FUJITSU_ES=m
-CONFIG_USB4_NET=m
-CONFIG_HYPERV_NET=m
-CONFIG_NETDEVSIM=m
-CONFIG_NET_FAILOVER=m
-CONFIG_ISDN=y
-CONFIG_ISDN_CAPI=y
-CONFIG_CAPI_TRACE=y
-CONFIG_ISDN_CAPI_MIDDLEWARE=y
-CONFIG_MISDN=m
-CONFIG_MISDN_DSP=m
-CONFIG_MISDN_L1OIP=m
-
-#
-# mISDN hardware drivers
-#
-CONFIG_MISDN_HFCPCI=m
-CONFIG_MISDN_HFCMULTI=m
-CONFIG_MISDN_HFCUSB=m
-CONFIG_MISDN_AVMFRITZ=m
-CONFIG_MISDN_SPEEDFAX=m
-CONFIG_MISDN_INFINEON=m
-CONFIG_MISDN_W6692=m
-CONFIG_MISDN_NETJET=m
-CONFIG_MISDN_HDLC=m
-CONFIG_MISDN_IPAC=m
-CONFIG_MISDN_ISAR=m
-CONFIG_NVM=y
-CONFIG_NVM_PBLK=m
-# CONFIG_NVM_PBLK_DEBUG is not set
-
-#
-# Input device support
-#
-CONFIG_INPUT=y
-CONFIG_INPUT_LEDS=m
-CONFIG_INPUT_FF_MEMLESS=m
-CONFIG_INPUT_POLLDEV=m
-CONFIG_INPUT_SPARSEKMAP=m
-CONFIG_INPUT_MATRIXKMAP=m
-
-#
-# Userland interfaces
-#
-CONFIG_INPUT_MOUSEDEV=m
-CONFIG_INPUT_MOUSEDEV_PSAUX=y
-CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
-CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
-CONFIG_INPUT_JOYDEV=m
-CONFIG_INPUT_EVDEV=m
-# CONFIG_INPUT_EVBUG is not set
-
-#
-# Input Device Drivers
-#
-CONFIG_INPUT_KEYBOARD=y
-CONFIG_KEYBOARD_ADC=m
-CONFIG_KEYBOARD_ADP5520=m
-CONFIG_KEYBOARD_ADP5588=m
-CONFIG_KEYBOARD_ADP5589=m
-CONFIG_KEYBOARD_APPLESPI=m
-CONFIG_KEYBOARD_ATKBD=m
-CONFIG_KEYBOARD_QT1050=m
-CONFIG_KEYBOARD_QT1070=m
-CONFIG_KEYBOARD_QT2160=m
-CONFIG_KEYBOARD_DLINK_DIR685=m
-CONFIG_KEYBOARD_LKKBD=m
-CONFIG_KEYBOARD_GPIO=m
-CONFIG_KEYBOARD_GPIO_POLLED=m
-CONFIG_KEYBOARD_TCA6416=m
-CONFIG_KEYBOARD_TCA8418=m
-CONFIG_KEYBOARD_MATRIX=m
-CONFIG_KEYBOARD_LM8323=m
-CONFIG_KEYBOARD_LM8333=m
-CONFIG_KEYBOARD_MAX7359=m
-CONFIG_KEYBOARD_MCS=m
-CONFIG_KEYBOARD_MPR121=m
-CONFIG_KEYBOARD_NEWTON=m
-CONFIG_KEYBOARD_OPENCORES=m
-CONFIG_KEYBOARD_SAMSUNG=m
-CONFIG_KEYBOARD_STOWAWAY=m
-CONFIG_KEYBOARD_SUNKBD=m
-CONFIG_KEYBOARD_STMPE=m
-CONFIG_KEYBOARD_IQS62X=m
-CONFIG_KEYBOARD_OMAP4=m
-CONFIG_KEYBOARD_TC3589X=m
-CONFIG_KEYBOARD_TM2_TOUCHKEY=m
-CONFIG_KEYBOARD_TWL4030=m
-CONFIG_KEYBOARD_XTKBD=m
-CONFIG_KEYBOARD_CROS_EC=m
-CONFIG_KEYBOARD_CAP11XX=m
-CONFIG_KEYBOARD_BCM=m
-CONFIG_KEYBOARD_MTK_PMIC=m
-CONFIG_INPUT_MOUSE=y
-CONFIG_MOUSE_PS2=m
-CONFIG_MOUSE_PS2_ALPS=y
-CONFIG_MOUSE_PS2_BYD=y
-CONFIG_MOUSE_PS2_LOGIPS2PP=y
-CONFIG_MOUSE_PS2_SYNAPTICS=y
-CONFIG_MOUSE_PS2_SYNAPTICS_SMBUS=y
-CONFIG_MOUSE_PS2_CYPRESS=y
-CONFIG_MOUSE_PS2_LIFEBOOK=y
-CONFIG_MOUSE_PS2_TRACKPOINT=y
-CONFIG_MOUSE_PS2_ELANTECH=y
-CONFIG_MOUSE_PS2_ELANTECH_SMBUS=y
-CONFIG_MOUSE_PS2_SENTELIC=y
-CONFIG_MOUSE_PS2_TOUCHKIT=y
-CONFIG_MOUSE_PS2_FOCALTECH=y
-CONFIG_MOUSE_PS2_VMMOUSE=y
-CONFIG_MOUSE_PS2_SMBUS=y
-CONFIG_MOUSE_SERIAL=m
-CONFIG_MOUSE_APPLETOUCH=m
-CONFIG_MOUSE_BCM5974=m
-CONFIG_MOUSE_CYAPA=m
-CONFIG_MOUSE_ELAN_I2C=m
-CONFIG_MOUSE_ELAN_I2C_I2C=y
-CONFIG_MOUSE_ELAN_I2C_SMBUS=y
-CONFIG_MOUSE_VSXXXAA=m
-CONFIG_MOUSE_GPIO=m
-CONFIG_MOUSE_SYNAPTICS_I2C=m
-CONFIG_MOUSE_SYNAPTICS_USB=m
-CONFIG_INPUT_JOYSTICK=y
-CONFIG_JOYSTICK_ANALOG=m
-CONFIG_JOYSTICK_A3D=m
-CONFIG_JOYSTICK_ADI=m
-CONFIG_JOYSTICK_COBRA=m
-CONFIG_JOYSTICK_GF2K=m
-CONFIG_JOYSTICK_GRIP=m
-CONFIG_JOYSTICK_GRIP_MP=m
-CONFIG_JOYSTICK_GUILLEMOT=m
-CONFIG_JOYSTICK_INTERACT=m
-CONFIG_JOYSTICK_SIDEWINDER=m
-CONFIG_JOYSTICK_TMDC=m
-CONFIG_JOYSTICK_IFORCE=m
-CONFIG_JOYSTICK_IFORCE_USB=m
-CONFIG_JOYSTICK_IFORCE_232=m
-CONFIG_JOYSTICK_WARRIOR=m
-CONFIG_JOYSTICK_MAGELLAN=m
-CONFIG_JOYSTICK_SPACEORB=m
-CONFIG_JOYSTICK_SPACEBALL=m
-CONFIG_JOYSTICK_STINGER=m
-CONFIG_JOYSTICK_TWIDJOY=m
-CONFIG_JOYSTICK_ZHENHUA=m
-CONFIG_JOYSTICK_DB9=m
-CONFIG_JOYSTICK_GAMECON=m
-CONFIG_JOYSTICK_TURBOGRAFX=m
-CONFIG_JOYSTICK_AS5011=m
-CONFIG_JOYSTICK_JOYDUMP=m
-CONFIG_JOYSTICK_XPAD=m
-CONFIG_JOYSTICK_XPAD_FF=y
-CONFIG_JOYSTICK_XPAD_LEDS=y
-CONFIG_JOYSTICK_WALKERA0701=m
-CONFIG_JOYSTICK_PSXPAD_SPI=m
-CONFIG_JOYSTICK_PSXPAD_SPI_FF=y
-CONFIG_JOYSTICK_PXRC=m
-CONFIG_JOYSTICK_FSIA6B=m
-CONFIG_INPUT_TABLET=y
-CONFIG_TABLET_USB_ACECAD=m
-CONFIG_TABLET_USB_AIPTEK=m
-CONFIG_TABLET_USB_GTCO=m
-CONFIG_TABLET_USB_HANWANG=m
-CONFIG_TABLET_USB_KBTAB=m
-CONFIG_TABLET_USB_PEGASUS=m
-CONFIG_TABLET_SERIAL_WACOM4=m
-CONFIG_INPUT_TOUCHSCREEN=y
-CONFIG_TOUCHSCREEN_PROPERTIES=y
-CONFIG_TOUCHSCREEN_88PM860X=m
-CONFIG_TOUCHSCREEN_ADS7846=m
-CONFIG_TOUCHSCREEN_AD7877=m
-CONFIG_TOUCHSCREEN_AD7879=m
-CONFIG_TOUCHSCREEN_AD7879_I2C=m
-CONFIG_TOUCHSCREEN_AD7879_SPI=m
-CONFIG_TOUCHSCREEN_ADC=m
-CONFIG_TOUCHSCREEN_AR1021_I2C=m
-CONFIG_TOUCHSCREEN_ATMEL_MXT=m
-CONFIG_TOUCHSCREEN_ATMEL_MXT_T37=y
-CONFIG_TOUCHSCREEN_AUO_PIXCIR=m
-CONFIG_TOUCHSCREEN_BU21013=m
-CONFIG_TOUCHSCREEN_BU21029=m
-CONFIG_TOUCHSCREEN_CHIPONE_ICN8318=m
-CONFIG_TOUCHSCREEN_CHIPONE_ICN8505=m
-CONFIG_TOUCHSCREEN_CY8CTMA140=m
-CONFIG_TOUCHSCREEN_CY8CTMG110=m
-CONFIG_TOUCHSCREEN_CYTTSP_CORE=m
-CONFIG_TOUCHSCREEN_CYTTSP_I2C=m
-CONFIG_TOUCHSCREEN_CYTTSP_SPI=m
-CONFIG_TOUCHSCREEN_CYTTSP4_CORE=m
-CONFIG_TOUCHSCREEN_CYTTSP4_I2C=m
-CONFIG_TOUCHSCREEN_CYTTSP4_SPI=m
-CONFIG_TOUCHSCREEN_DA9034=m
-CONFIG_TOUCHSCREEN_DA9052=m
-CONFIG_TOUCHSCREEN_DYNAPRO=m
-CONFIG_TOUCHSCREEN_HAMPSHIRE=m
-CONFIG_TOUCHSCREEN_EETI=m
-CONFIG_TOUCHSCREEN_EGALAX=m
-CONFIG_TOUCHSCREEN_EGALAX_SERIAL=m
-CONFIG_TOUCHSCREEN_EXC3000=m
-CONFIG_TOUCHSCREEN_FUJITSU=m
-CONFIG_TOUCHSCREEN_GOODIX=m
-CONFIG_TOUCHSCREEN_HIDEEP=m
-CONFIG_TOUCHSCREEN_ILI210X=m
-CONFIG_TOUCHSCREEN_S6SY761=m
-CONFIG_TOUCHSCREEN_GUNZE=m
-CONFIG_TOUCHSCREEN_EKTF2127=m
-CONFIG_TOUCHSCREEN_ELAN=m
-CONFIG_TOUCHSCREEN_ELO=m
-CONFIG_TOUCHSCREEN_WACOM_W8001=m
-CONFIG_TOUCHSCREEN_WACOM_I2C=m
-CONFIG_TOUCHSCREEN_MAX11801=m
-CONFIG_TOUCHSCREEN_MCS5000=m
-CONFIG_TOUCHSCREEN_MMS114=m
-CONFIG_TOUCHSCREEN_MELFAS_MIP4=m
-CONFIG_TOUCHSCREEN_MTOUCH=m
-CONFIG_TOUCHSCREEN_IMX6UL_TSC=m
-CONFIG_TOUCHSCREEN_INEXIO=m
-CONFIG_TOUCHSCREEN_MK712=m
-CONFIG_TOUCHSCREEN_PENMOUNT=m
-CONFIG_TOUCHSCREEN_EDT_FT5X06=m
-CONFIG_TOUCHSCREEN_TOUCHRIGHT=m
-CONFIG_TOUCHSCREEN_TOUCHWIN=m
-CONFIG_TOUCHSCREEN_TI_AM335X_TSC=m
-CONFIG_TOUCHSCREEN_UCB1400=m
-CONFIG_TOUCHSCREEN_PIXCIR=m
-CONFIG_TOUCHSCREEN_WDT87XX_I2C=m
-CONFIG_TOUCHSCREEN_WM831X=m
-CONFIG_TOUCHSCREEN_WM97XX=m
-CONFIG_TOUCHSCREEN_WM9705=y
-CONFIG_TOUCHSCREEN_WM9712=y
-CONFIG_TOUCHSCREEN_WM9713=y
-CONFIG_TOUCHSCREEN_USB_COMPOSITE=m
-CONFIG_TOUCHSCREEN_MC13783=m
-CONFIG_TOUCHSCREEN_USB_EGALAX=y
-CONFIG_TOUCHSCREEN_USB_PANJIT=y
-CONFIG_TOUCHSCREEN_USB_3M=y
-CONFIG_TOUCHSCREEN_USB_ITM=y
-CONFIG_TOUCHSCREEN_USB_ETURBO=y
-CONFIG_TOUCHSCREEN_USB_GUNZE=y
-CONFIG_TOUCHSCREEN_USB_DMC_TSC10=y
-CONFIG_TOUCHSCREEN_USB_IRTOUCH=y
-CONFIG_TOUCHSCREEN_USB_IDEALTEK=y
-CONFIG_TOUCHSCREEN_USB_GENERAL_TOUCH=y
-CONFIG_TOUCHSCREEN_USB_GOTOP=y
-CONFIG_TOUCHSCREEN_USB_JASTEC=y
-CONFIG_TOUCHSCREEN_USB_ELO=y
-CONFIG_TOUCHSCREEN_USB_E2I=y
-CONFIG_TOUCHSCREEN_USB_ZYTRONIC=y
-CONFIG_TOUCHSCREEN_USB_ETT_TC45USB=y
-CONFIG_TOUCHSCREEN_USB_NEXIO=y
-CONFIG_TOUCHSCREEN_USB_EASYTOUCH=y
-CONFIG_TOUCHSCREEN_TOUCHIT213=m
-CONFIG_TOUCHSCREEN_TSC_SERIO=m
-CONFIG_TOUCHSCREEN_TSC200X_CORE=m
-CONFIG_TOUCHSCREEN_TSC2004=m
-CONFIG_TOUCHSCREEN_TSC2005=m
-CONFIG_TOUCHSCREEN_TSC2007=m
-CONFIG_TOUCHSCREEN_TSC2007_IIO=y
-CONFIG_TOUCHSCREEN_PCAP=m
-CONFIG_TOUCHSCREEN_RM_TS=m
-CONFIG_TOUCHSCREEN_SILEAD=m
-CONFIG_TOUCHSCREEN_SIS_I2C=m
-CONFIG_TOUCHSCREEN_ST1232=m
-CONFIG_TOUCHSCREEN_STMFTS=m
-CONFIG_TOUCHSCREEN_STMPE=m
-CONFIG_TOUCHSCREEN_SUR40=m
-CONFIG_TOUCHSCREEN_SURFACE3_SPI=m
-CONFIG_TOUCHSCREEN_SX8654=m
-CONFIG_TOUCHSCREEN_TPS6507X=m
-CONFIG_TOUCHSCREEN_ZET6223=m
-CONFIG_TOUCHSCREEN_ZFORCE=m
-CONFIG_TOUCHSCREEN_COLIBRI_VF50=m
-CONFIG_TOUCHSCREEN_ROHM_BU21023=m
-CONFIG_TOUCHSCREEN_IQS5XX=m
-CONFIG_INPUT_MISC=y
-CONFIG_INPUT_88PM860X_ONKEY=m
-CONFIG_INPUT_88PM80X_ONKEY=m
-CONFIG_INPUT_AD714X=m
-CONFIG_INPUT_AD714X_I2C=m
-CONFIG_INPUT_AD714X_SPI=m
-CONFIG_INPUT_ARIZONA_HAPTICS=m
-CONFIG_INPUT_ATMEL_CAPTOUCH=m
-CONFIG_INPUT_BMA150=m
-CONFIG_INPUT_E3X0_BUTTON=m
-CONFIG_INPUT_PCSPKR=m
-CONFIG_INPUT_MAX77650_ONKEY=m
-CONFIG_INPUT_MAX77693_HAPTIC=m
-CONFIG_INPUT_MAX8925_ONKEY=m
-CONFIG_INPUT_MAX8997_HAPTIC=m
-CONFIG_INPUT_MC13783_PWRBUTTON=m
-CONFIG_INPUT_MMA8450=m
-CONFIG_INPUT_APANEL=m
-CONFIG_INPUT_GPIO_BEEPER=m
-CONFIG_INPUT_GPIO_DECODER=m
-CONFIG_INPUT_GPIO_VIBRA=m
-CONFIG_INPUT_CPCAP_PWRBUTTON=m
-CONFIG_INPUT_ATLAS_BTNS=m
-CONFIG_INPUT_ATI_REMOTE2=m
-CONFIG_INPUT_KEYSPAN_REMOTE=m
-CONFIG_INPUT_KXTJ9=m
-CONFIG_INPUT_POWERMATE=m
-CONFIG_INPUT_YEALINK=m
-CONFIG_INPUT_CM109=m
-CONFIG_INPUT_REGULATOR_HAPTIC=m
-CONFIG_INPUT_RETU_PWRBUTTON=m
-CONFIG_INPUT_TPS65218_PWRBUTTON=m
-CONFIG_INPUT_AXP20X_PEK=m
-CONFIG_INPUT_TWL4030_PWRBUTTON=m
-CONFIG_INPUT_TWL4030_VIBRA=m
-CONFIG_INPUT_TWL6040_VIBRA=m
-CONFIG_INPUT_UINPUT=m
-CONFIG_INPUT_PALMAS_PWRBUTTON=m
-CONFIG_INPUT_PCF50633_PMU=m
-CONFIG_INPUT_PCF8574=m
-CONFIG_INPUT_PWM_BEEPER=m
-CONFIG_INPUT_PWM_VIBRA=m
-CONFIG_INPUT_RK805_PWRKEY=m
-CONFIG_INPUT_GPIO_ROTARY_ENCODER=m
-CONFIG_INPUT_DA9052_ONKEY=m
-CONFIG_INPUT_DA9055_ONKEY=m
-CONFIG_INPUT_DA9063_ONKEY=m
-CONFIG_INPUT_WM831X_ON=m
-CONFIG_INPUT_PCAP=m
-CONFIG_INPUT_ADXL34X=m
-CONFIG_INPUT_ADXL34X_I2C=m
-CONFIG_INPUT_ADXL34X_SPI=m
-CONFIG_INPUT_IMS_PCU=m
-CONFIG_INPUT_IQS269A=m
-CONFIG_INPUT_CMA3000=m
-CONFIG_INPUT_CMA3000_I2C=m
-CONFIG_INPUT_XEN_KBDDEV_FRONTEND=m
-CONFIG_INPUT_IDEAPAD_SLIDEBAR=m
-CONFIG_INPUT_SOC_BUTTON_ARRAY=m
-CONFIG_INPUT_DRV260X_HAPTICS=m
-CONFIG_INPUT_DRV2665_HAPTICS=m
-CONFIG_INPUT_DRV2667_HAPTICS=m
-CONFIG_INPUT_RAVE_SP_PWRBUTTON=m
-CONFIG_INPUT_STPMIC1_ONKEY=m
-CONFIG_RMI4_CORE=m
-CONFIG_RMI4_I2C=m
-CONFIG_RMI4_SPI=m
-CONFIG_RMI4_SMB=m
-CONFIG_RMI4_F03=y
-CONFIG_RMI4_F03_SERIO=m
-CONFIG_RMI4_2D_SENSOR=y
-CONFIG_RMI4_F11=y
-CONFIG_RMI4_F12=y
-CONFIG_RMI4_F30=y
-CONFIG_RMI4_F34=y
-# CONFIG_RMI4_F54 is not set
-CONFIG_RMI4_F55=y
-
-#
-# Hardware I/O ports
-#
-CONFIG_SERIO=m
-CONFIG_ARCH_MIGHT_HAVE_PC_SERIO=y
-CONFIG_SERIO_I8042=m
-CONFIG_SERIO_SERPORT=m
-CONFIG_SERIO_CT82C710=m
-CONFIG_SERIO_PARKBD=m
-CONFIG_SERIO_PCIPS2=m
-CONFIG_SERIO_LIBPS2=m
-CONFIG_SERIO_RAW=m
-CONFIG_SERIO_ALTERA_PS2=m
-CONFIG_SERIO_PS2MULT=m
-CONFIG_SERIO_ARC_PS2=m
-# CONFIG_SERIO_APBPS2 is not set
-CONFIG_HYPERV_KEYBOARD=m
-CONFIG_SERIO_GPIO_PS2=m
-CONFIG_USERIO=m
-CONFIG_GAMEPORT=m
-CONFIG_GAMEPORT_NS558=m
-CONFIG_GAMEPORT_L4=m
-CONFIG_GAMEPORT_EMU10K1=m
-CONFIG_GAMEPORT_FM801=m
-# end of Hardware I/O ports
-# end of Input device support
-
-#
-# Character devices
-#
-CONFIG_TTY=y
-CONFIG_VT=y
-CONFIG_CONSOLE_TRANSLATIONS=y
-CONFIG_VT_CONSOLE=y
-CONFIG_VT_CONSOLE_SLEEP=y
-CONFIG_HW_CONSOLE=y
-CONFIG_VT_HW_CONSOLE_BINDING=y
-CONFIG_UNIX98_PTYS=y
-# CONFIG_LEGACY_PTYS is not set
-CONFIG_LDISC_AUTOLOAD=y
-
-#
-# Serial drivers
-#
-CONFIG_SERIAL_EARLYCON=y
-CONFIG_SERIAL_8250=y
-# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
-CONFIG_SERIAL_8250_PNP=y
-# CONFIG_SERIAL_8250_16550A_VARIANTS is not set
-CONFIG_SERIAL_8250_FINTEK=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_DMA=y
-CONFIG_SERIAL_8250_PCI=y
-CONFIG_SERIAL_8250_EXAR=m
-CONFIG_SERIAL_8250_CS=m
-CONFIG_SERIAL_8250_MEN_MCB=m
-CONFIG_SERIAL_8250_NR_UARTS=32
-CONFIG_SERIAL_8250_RUNTIME_UARTS=4
-CONFIG_SERIAL_8250_EXTENDED=y
-CONFIG_SERIAL_8250_MANY_PORTS=y
-CONFIG_SERIAL_8250_ASPEED_VUART=m
-CONFIG_SERIAL_8250_SHARE_IRQ=y
-# CONFIG_SERIAL_8250_DETECT_IRQ is not set
-CONFIG_SERIAL_8250_RSA=y
-CONFIG_SERIAL_8250_DWLIB=y
-CONFIG_SERIAL_8250_DW=m
-CONFIG_SERIAL_8250_RT288X=y
-CONFIG_SERIAL_8250_LPSS=y
-CONFIG_SERIAL_8250_MID=y
-CONFIG_SERIAL_OF_PLATFORM=m
-
-#
-# Non-8250 serial port support
-#
-CONFIG_SERIAL_MAX3100=m
-CONFIG_SERIAL_MAX310X=m
-CONFIG_SERIAL_UARTLITE=m
-CONFIG_SERIAL_UARTLITE_NR_UARTS=1
-CONFIG_SERIAL_CORE=y
-CONFIG_SERIAL_CORE_CONSOLE=y
-CONFIG_SERIAL_JSM=m
-CONFIG_SERIAL_SIFIVE=m
-CONFIG_SERIAL_LANTIQ=m
-CONFIG_SERIAL_SCCNXP=m
-CONFIG_SERIAL_SC16IS7XX_CORE=m
-CONFIG_SERIAL_SC16IS7XX=m
-CONFIG_SERIAL_SC16IS7XX_I2C=y
-CONFIG_SERIAL_SC16IS7XX_SPI=y
-CONFIG_SERIAL_ALTERA_JTAGUART=m
-CONFIG_SERIAL_ALTERA_UART=m
-CONFIG_SERIAL_ALTERA_UART_MAXPORTS=4
-CONFIG_SERIAL_ALTERA_UART_BAUDRATE=115200
-CONFIG_SERIAL_IFX6X60=m
-CONFIG_SERIAL_XILINX_PS_UART=m
-CONFIG_SERIAL_ARC=m
-CONFIG_SERIAL_ARC_NR_PORTS=1
-CONFIG_SERIAL_RP2=m
-CONFIG_SERIAL_RP2_NR_UARTS=32
-CONFIG_SERIAL_FSL_LPUART=m
-CONFIG_SERIAL_FSL_LINFLEXUART=m
-CONFIG_SERIAL_CONEXANT_DIGICOLOR=m
-CONFIG_SERIAL_MEN_Z135=m
-CONFIG_SERIAL_SPRD=m
-# end of Serial drivers
-
-CONFIG_SERIAL_MCTRL_GPIO=y
-CONFIG_SERIAL_NONSTANDARD=y
-CONFIG_ROCKETPORT=m
-CONFIG_CYCLADES=m
-CONFIG_CYZ_INTR=y
-CONFIG_MOXA_INTELLIO=m
-CONFIG_MOXA_SMARTIO=m
-CONFIG_SYNCLINK=m
-CONFIG_SYNCLINKMP=m
-CONFIG_SYNCLINK_GT=m
-CONFIG_ISI=m
-CONFIG_N_HDLC=m
-CONFIG_N_GSM=m
-CONFIG_NOZOMI=m
-CONFIG_NULL_TTY=m
-CONFIG_TRACE_ROUTER=m
-CONFIG_TRACE_SINK=m
-CONFIG_HVC_DRIVER=y
-CONFIG_HVC_IRQ=y
-CONFIG_HVC_XEN=y
-CONFIG_HVC_XEN_FRONTEND=y
-CONFIG_SERIAL_DEV_BUS=y
-CONFIG_SERIAL_DEV_CTRL_TTYPORT=y
-# CONFIG_TTY_PRINTK is not set
-CONFIG_PRINTER=m
-# CONFIG_LP_CONSOLE is not set
-CONFIG_PPDEV=m
-CONFIG_VIRTIO_CONSOLE=m
-CONFIG_IPMI_HANDLER=m
-CONFIG_IPMI_DMI_DECODE=y
-CONFIG_IPMI_PLAT_DATA=y
-# CONFIG_IPMI_PANIC_EVENT is not set
-CONFIG_IPMI_DEVICE_INTERFACE=m
-CONFIG_IPMI_SI=m
-CONFIG_IPMI_SSIF=m
-CONFIG_IPMI_WATCHDOG=m
-CONFIG_IPMI_POWEROFF=m
-CONFIG_IPMB_DEVICE_INTERFACE=m
-CONFIG_HW_RANDOM=m
-CONFIG_HW_RANDOM_TIMERIOMEM=m
-CONFIG_HW_RANDOM_INTEL=m
-CONFIG_HW_RANDOM_AMD=m
-# CONFIG_HW_RANDOM_BA431 is not set
-CONFIG_HW_RANDOM_VIA=m
-CONFIG_HW_RANDOM_VIRTIO=m
-CONFIG_HW_RANDOM_CCTRNG=m
-CONFIG_APPLICOM=m
-
-#
-# PCMCIA character devices
-#
-CONFIG_SYNCLINK_CS=m
-CONFIG_CARDMAN_4000=m
-CONFIG_CARDMAN_4040=m
-CONFIG_SCR24X=m
-CONFIG_IPWIRELESS=m
-# end of PCMCIA character devices
-
-CONFIG_MWAVE=m
-CONFIG_DEVMEM=y
-# CONFIG_DEVKMEM is not set
-CONFIG_NVRAM=m
-CONFIG_RAW_DRIVER=m
-CONFIG_MAX_RAW_DEVS=256
-CONFIG_DEVPORT=y
-CONFIG_HPET=y
-CONFIG_HPET_MMAP=y
-CONFIG_HPET_MMAP_DEFAULT=y
-CONFIG_HANGCHECK_TIMER=m
-CONFIG_TCG_TPM=m
-CONFIG_HW_RANDOM_TPM=y
-CONFIG_TCG_TIS_CORE=m
-CONFIG_TCG_TIS=m
-CONFIG_TCG_TIS_SPI=m
-CONFIG_TCG_TIS_SPI_CR50=y
-CONFIG_TCG_TIS_I2C_ATMEL=m
-CONFIG_TCG_TIS_I2C_INFINEON=m
-CONFIG_TCG_TIS_I2C_NUVOTON=m
-CONFIG_TCG_NSC=m
-CONFIG_TCG_ATMEL=m
-CONFIG_TCG_INFINEON=m
-CONFIG_TCG_XEN=m
-CONFIG_TCG_CRB=m
-CONFIG_TCG_VTPM_PROXY=m
-CONFIG_TCG_TIS_ST33ZP24=m
-CONFIG_TCG_TIS_ST33ZP24_I2C=m
-CONFIG_TCG_TIS_ST33ZP24_SPI=m
-CONFIG_TELCLOCK=m
-CONFIG_XILLYBUS=m
-CONFIG_XILLYBUS_PCIE=m
-CONFIG_XILLYBUS_OF=m
-# end of Character devices
-
-# CONFIG_RANDOM_TRUST_CPU is not set
-# CONFIG_RANDOM_TRUST_BOOTLOADER is not set
-
-#
-# I2C support
-#
-CONFIG_I2C=y
-CONFIG_ACPI_I2C_OPREGION=y
-CONFIG_I2C_BOARDINFO=y
-CONFIG_I2C_COMPAT=y
-CONFIG_I2C_CHARDEV=m
-CONFIG_I2C_MUX=m
-
-#
-# Multiplexer I2C Chip support
-#
-CONFIG_I2C_ARB_GPIO_CHALLENGE=m
-CONFIG_I2C_MUX_GPIO=m
-CONFIG_I2C_MUX_GPMUX=m
-CONFIG_I2C_MUX_LTC4306=m
-CONFIG_I2C_MUX_PCA9541=m
-CONFIG_I2C_MUX_PCA954x=m
-CONFIG_I2C_MUX_PINCTRL=m
-CONFIG_I2C_MUX_REG=m
-CONFIG_I2C_DEMUX_PINCTRL=m
-CONFIG_I2C_MUX_MLXCPLD=m
-# end of Multiplexer I2C Chip support
-
-CONFIG_I2C_HELPER_AUTO=y
-CONFIG_I2C_SMBUS=m
-CONFIG_I2C_ALGOBIT=m
-CONFIG_I2C_ALGOPCA=m
-
-#
-# I2C Hardware Bus support
-#
-
-#
-# PC SMBus host controller drivers
-#
-CONFIG_I2C_ALI1535=m
-CONFIG_I2C_ALI1563=m
-CONFIG_I2C_ALI15X3=m
-CONFIG_I2C_AMD756=m
-CONFIG_I2C_AMD756_S4882=m
-CONFIG_I2C_AMD8111=m
-CONFIG_I2C_AMD_MP2=m
-CONFIG_I2C_I801=m
-CONFIG_I2C_ISCH=m
-CONFIG_I2C_ISMT=m
-CONFIG_I2C_PIIX4=m
-CONFIG_I2C_CHT_WC=m
-CONFIG_I2C_NFORCE2=m
-CONFIG_I2C_NFORCE2_S4985=m
-CONFIG_I2C_NVIDIA_GPU=m
-CONFIG_I2C_SIS5595=m
-CONFIG_I2C_SIS630=m
-CONFIG_I2C_SIS96X=m
-CONFIG_I2C_VIA=m
-CONFIG_I2C_VIAPRO=m
-
-#
-# ACPI drivers
-#
-CONFIG_I2C_SCMI=m
-
-#
-# I2C system bus drivers (mostly embedded / system-on-chip)
-#
-CONFIG_I2C_CBUS_GPIO=m
-CONFIG_I2C_DESIGNWARE_CORE=y
-CONFIG_I2C_DESIGNWARE_SLAVE=y
-CONFIG_I2C_DESIGNWARE_PLATFORM=y
-CONFIG_I2C_DESIGNWARE_BAYTRAIL=y
-CONFIG_I2C_DESIGNWARE_PCI=m
-CONFIG_I2C_EMEV2=m
-CONFIG_I2C_GPIO=m
-# CONFIG_I2C_GPIO_FAULT_INJECTOR is not set
-CONFIG_I2C_KEMPLD=m
-CONFIG_I2C_OCORES=m
-CONFIG_I2C_PCA_PLATFORM=m
-CONFIG_I2C_RK3X=m
-CONFIG_I2C_SIMTEC=m
-CONFIG_I2C_XILINX=m
-
-#
-# External I2C/SMBus adapter drivers
-#
-CONFIG_I2C_DIOLAN_U2C=m
-CONFIG_I2C_DLN2=m
-CONFIG_I2C_PARPORT=m
-CONFIG_I2C_ROBOTFUZZ_OSIF=m
-CONFIG_I2C_TAOS_EVM=m
-CONFIG_I2C_TINY_USB=m
-CONFIG_I2C_VIPERBOARD=m
-
-#
-# Other I2C/SMBus bus drivers
-#
-CONFIG_I2C_MLXCPLD=m
-CONFIG_I2C_CROS_EC_TUNNEL=m
-CONFIG_I2C_FSI=m
-# end of I2C Hardware Bus support
-
-CONFIG_I2C_STUB=m
-CONFIG_I2C_SLAVE=y
-CONFIG_I2C_SLAVE_EEPROM=m
-# CONFIG_I2C_DEBUG_CORE is not set
-# CONFIG_I2C_DEBUG_ALGO is not set
-# CONFIG_I2C_DEBUG_BUS is not set
-# end of I2C support
-
-CONFIG_I3C=m
-CONFIG_CDNS_I3C_MASTER=m
-CONFIG_DW_I3C_MASTER=m
-CONFIG_SPI=y
-# CONFIG_SPI_DEBUG is not set
-CONFIG_SPI_MASTER=y
-CONFIG_SPI_MEM=y
-
-#
-# SPI Master Controller Drivers
-#
-CONFIG_SPI_ALTERA=m
-CONFIG_SPI_AXI_SPI_ENGINE=m
-CONFIG_SPI_BITBANG=m
-CONFIG_SPI_BUTTERFLY=m
-CONFIG_SPI_CADENCE=m
-CONFIG_SPI_DESIGNWARE=m
-CONFIG_SPI_DW_DMA=y
-CONFIG_SPI_DW_PCI=m
-CONFIG_SPI_DW_MMIO=m
-CONFIG_SPI_DLN2=m
-CONFIG_SPI_FSI=m
-CONFIG_SPI_NXP_FLEXSPI=m
-CONFIG_SPI_GPIO=m
-CONFIG_SPI_LM70_LLP=m
-CONFIG_SPI_FSL_LIB=m
-CONFIG_SPI_FSL_SPI=m
-# CONFIG_SPI_LANTIQ_SSC is not set
-CONFIG_SPI_OC_TINY=m
-CONFIG_SPI_PXA2XX=m
-CONFIG_SPI_PXA2XX_PCI=m
-CONFIG_SPI_ROCKCHIP=m
-CONFIG_SPI_SC18IS602=m
-CONFIG_SPI_SIFIVE=m
-CONFIG_SPI_MXIC=m
-CONFIG_SPI_XCOMM=m
-CONFIG_SPI_XILINX=m
-CONFIG_SPI_ZYNQMP_GQSPI=m
-CONFIG_SPI_AMD=m
-
-#
-# SPI Multiplexer support
-#
-CONFIG_SPI_MUX=m
-
-#
-# SPI Protocol Masters
-#
-CONFIG_SPI_SPIDEV=m
-CONFIG_SPI_LOOPBACK_TEST=m
-CONFIG_SPI_TLE62X0=m
-CONFIG_SPI_SLAVE=y
-CONFIG_SPI_SLAVE_TIME=m
-CONFIG_SPI_SLAVE_SYSTEM_CONTROL=m
-CONFIG_SPI_DYNAMIC=y
-CONFIG_SPMI=m
-CONFIG_HSI=m
-CONFIG_HSI_BOARDINFO=y
-
-#
-# HSI controllers
-#
-
-#
-# HSI clients
-#
-CONFIG_HSI_CHAR=m
-CONFIG_PPS=y
-# CONFIG_PPS_DEBUG is not set
-
-#
-# PPS clients support
-#
-CONFIG_PPS_CLIENT_KTIMER=m
-CONFIG_PPS_CLIENT_LDISC=m
-CONFIG_PPS_CLIENT_PARPORT=m
-CONFIG_PPS_CLIENT_GPIO=m
-
-#
-# PPS generators support
-#
-
-#
-# PTP clock support
-#
-CONFIG_PTP_1588_CLOCK=y
-CONFIG_DP83640_PHY=m
-CONFIG_PTP_1588_CLOCK_INES=m
-CONFIG_PTP_1588_CLOCK_KVM=m
-CONFIG_PTP_1588_CLOCK_IDT82P33=m
-CONFIG_PTP_1588_CLOCK_IDTCM=m
-CONFIG_PTP_1588_CLOCK_VMW=m
-# end of PTP clock support
-
-CONFIG_PINCTRL=y
-CONFIG_GENERIC_PINCTRL_GROUPS=y
-CONFIG_PINMUX=y
-CONFIG_GENERIC_PINMUX_FUNCTIONS=y
-CONFIG_PINCONF=y
-CONFIG_GENERIC_PINCONF=y
-# CONFIG_DEBUG_PINCTRL is not set
-CONFIG_PINCTRL_AS3722=m
-CONFIG_PINCTRL_AXP209=m
-CONFIG_PINCTRL_AMD=m
-CONFIG_PINCTRL_DA9062=m
-CONFIG_PINCTRL_MCP23S08_I2C=m
-CONFIG_PINCTRL_MCP23S08_SPI=m
-CONFIG_PINCTRL_MCP23S08=m
-CONFIG_PINCTRL_SINGLE=m
-CONFIG_PINCTRL_SX150X=y
-CONFIG_PINCTRL_STMFX=m
-CONFIG_PINCTRL_MAX77620=m
-CONFIG_PINCTRL_PALMAS=m
-CONFIG_PINCTRL_RK805=m
-CONFIG_PINCTRL_OCELOT=y
-CONFIG_PINCTRL_BAYTRAIL=y
-CONFIG_PINCTRL_CHERRYVIEW=y
-CONFIG_PINCTRL_LYNXPOINT=y
-CONFIG_PINCTRL_INTEL=y
-CONFIG_PINCTRL_BROXTON=y
-CONFIG_PINCTRL_CANNONLAKE=y
-CONFIG_PINCTRL_CEDARFORK=y
-CONFIG_PINCTRL_DENVERTON=y
-# CONFIG_PINCTRL_EMMITSBURG is not set
-CONFIG_PINCTRL_GEMINILAKE=y
-CONFIG_PINCTRL_ICELAKE=y
-CONFIG_PINCTRL_JASPERLAKE=y
-CONFIG_PINCTRL_LEWISBURG=y
-CONFIG_PINCTRL_SUNRISEPOINT=y
-CONFIG_PINCTRL_TIGERLAKE=y
-CONFIG_PINCTRL_LOCHNAGAR=m
-CONFIG_PINCTRL_MADERA=m
-CONFIG_PINCTRL_CS47L15=y
-CONFIG_PINCTRL_CS47L35=y
-CONFIG_PINCTRL_CS47L85=y
-CONFIG_PINCTRL_CS47L90=y
-CONFIG_PINCTRL_CS47L92=y
-CONFIG_PINCTRL_EQUILIBRIUM=m
-CONFIG_GPIOLIB=y
-CONFIG_GPIOLIB_FASTPATH_LIMIT=512
-CONFIG_OF_GPIO=y
-CONFIG_GPIO_ACPI=y
-CONFIG_GPIOLIB_IRQCHIP=y
-# CONFIG_DEBUG_GPIO is not set
-CONFIG_GPIO_SYSFS=y
-CONFIG_GPIO_GENERIC=y
-CONFIG_GPIO_MAX730X=m
-
-#
-# Memory mapped GPIO drivers
-#
-CONFIG_GPIO_74XX_MMIO=m
-CONFIG_GPIO_ALTERA=m
-CONFIG_GPIO_AMDPT=m
-CONFIG_GPIO_CADENCE=m
-CONFIG_GPIO_DWAPB=m
-CONFIG_GPIO_EXAR=m
-CONFIG_GPIO_FTGPIO010=y
-CONFIG_GPIO_GENERIC_PLATFORM=m
-CONFIG_GPIO_GRGPIO=m
-CONFIG_GPIO_HLWD=m
-CONFIG_GPIO_ICH=m
-CONFIG_GPIO_LOGICVC=m
-CONFIG_GPIO_MB86S7X=m
-CONFIG_GPIO_MENZ127=m
-CONFIG_GPIO_SAMA5D2_PIOBU=m
-CONFIG_GPIO_SIFIVE=y
-CONFIG_GPIO_SIOX=m
-CONFIG_GPIO_SYSCON=m
-CONFIG_GPIO_VX855=m
-CONFIG_GPIO_WCD934X=m
-CONFIG_GPIO_XILINX=m
-CONFIG_GPIO_AMD_FCH=m
-# end of Memory mapped GPIO drivers
-
-#
-# Port-mapped I/O GPIO drivers
-#
-CONFIG_GPIO_F7188X=m
-CONFIG_GPIO_IT87=m
-CONFIG_GPIO_SCH=m
-CONFIG_GPIO_SCH311X=m
-CONFIG_GPIO_WINBOND=m
-CONFIG_GPIO_WS16C48=m
-# end of Port-mapped I/O GPIO drivers
-
-#
-# I2C GPIO expanders
-#
-CONFIG_GPIO_ADP5588=m
-CONFIG_GPIO_ADNP=m
-CONFIG_GPIO_GW_PLD=m
-CONFIG_GPIO_MAX7300=m
-CONFIG_GPIO_MAX732X=m
-CONFIG_GPIO_PCA953X=m
-CONFIG_GPIO_PCA953X_IRQ=y
-CONFIG_GPIO_PCA9570=m
-CONFIG_GPIO_PCF857X=m
-CONFIG_GPIO_TPIC2810=m
-# end of I2C GPIO expanders
-
-#
-# MFD GPIO expanders
-#
-CONFIG_GPIO_ADP5520=m
-CONFIG_GPIO_ARIZONA=m
-CONFIG_GPIO_BD70528=m
-CONFIG_GPIO_BD71828=m
-CONFIG_GPIO_BD9571MWV=m
-CONFIG_GPIO_CRYSTAL_COVE=m
-CONFIG_GPIO_DA9052=m
-CONFIG_GPIO_DA9055=m
-CONFIG_GPIO_DLN2=m
-CONFIG_GPIO_JANZ_TTL=m
-CONFIG_GPIO_KEMPLD=m
-CONFIG_GPIO_LP3943=m
-CONFIG_GPIO_LP873X=m
-CONFIG_GPIO_LP87565=m
-CONFIG_GPIO_MADERA=m
-CONFIG_GPIO_MAX77620=m
-CONFIG_GPIO_MAX77650=m
-CONFIG_GPIO_MSIC=y
-CONFIG_GPIO_PALMAS=y
-CONFIG_GPIO_RC5T583=y
-CONFIG_GPIO_STMPE=y
-CONFIG_GPIO_TC3589X=y
-CONFIG_GPIO_TPS65086=m
-CONFIG_GPIO_TPS65218=m
-CONFIG_GPIO_TPS6586X=y
-CONFIG_GPIO_TPS65910=y
-CONFIG_GPIO_TPS65912=m
-CONFIG_GPIO_TPS68470=y
-CONFIG_GPIO_TQMX86=m
-CONFIG_GPIO_TWL4030=m
-CONFIG_GPIO_TWL6040=m
-CONFIG_GPIO_UCB1400=m
-CONFIG_GPIO_WHISKEY_COVE=m
-CONFIG_GPIO_WM831X=m
-CONFIG_GPIO_WM8350=m
-CONFIG_GPIO_WM8994=m
-# end of MFD GPIO expanders
-
-#
-# PCI GPIO expanders
-#
-CONFIG_GPIO_AMD8111=m
-CONFIG_GPIO_ML_IOH=m
-CONFIG_GPIO_PCI_IDIO_16=m
-CONFIG_GPIO_PCIE_IDIO_24=m
-CONFIG_GPIO_RDC321X=m
-CONFIG_GPIO_SODAVILLE=y
-# end of PCI GPIO expanders
-
-#
-# SPI GPIO expanders
-#
-CONFIG_GPIO_74X164=m
-CONFIG_GPIO_MAX3191X=m
-CONFIG_GPIO_MAX7301=m
-CONFIG_GPIO_MC33880=m
-CONFIG_GPIO_PISOSR=m
-CONFIG_GPIO_XRA1403=m
-CONFIG_GPIO_MOXTET=m
-# end of SPI GPIO expanders
-
-#
-# USB GPIO expanders
-#
-CONFIG_GPIO_VIPERBOARD=m
-# end of USB GPIO expanders
-
-CONFIG_GPIO_AGGREGATOR=m
-CONFIG_GPIO_MOCKUP=m
-CONFIG_W1=m
-CONFIG_W1_CON=y
-
-#
-# 1-wire Bus Masters
-#
-CONFIG_W1_MASTER_MATROX=m
-CONFIG_W1_MASTER_DS2490=m
-CONFIG_W1_MASTER_DS2482=m
-CONFIG_W1_MASTER_DS1WM=m
-CONFIG_W1_MASTER_GPIO=m
-CONFIG_W1_MASTER_SGI=m
-# end of 1-wire Bus Masters
-
-#
-# 1-wire Slaves
-#
-CONFIG_W1_SLAVE_THERM=m
-CONFIG_W1_SLAVE_SMEM=m
-CONFIG_W1_SLAVE_DS2405=m
-CONFIG_W1_SLAVE_DS2408=m
-# CONFIG_W1_SLAVE_DS2408_READBACK is not set
-CONFIG_W1_SLAVE_DS2413=m
-CONFIG_W1_SLAVE_DS2406=m
-CONFIG_W1_SLAVE_DS2423=m
-CONFIG_W1_SLAVE_DS2805=m
-CONFIG_W1_SLAVE_DS2430=m
-CONFIG_W1_SLAVE_DS2431=m
-CONFIG_W1_SLAVE_DS2433=m
-# CONFIG_W1_SLAVE_DS2433_CRC is not set
-CONFIG_W1_SLAVE_DS2438=m
-CONFIG_W1_SLAVE_DS250X=m
-CONFIG_W1_SLAVE_DS2780=m
-CONFIG_W1_SLAVE_DS2781=m
-CONFIG_W1_SLAVE_DS28E04=m
-CONFIG_W1_SLAVE_DS28E17=m
-# end of 1-wire Slaves
-
-CONFIG_POWER_AVS=y
-CONFIG_QCOM_CPR=m
-CONFIG_POWER_RESET=y
-CONFIG_POWER_RESET_AS3722=y
-CONFIG_POWER_RESET_GPIO=y
-CONFIG_POWER_RESET_GPIO_RESTART=y
-CONFIG_POWER_RESET_LTC2952=y
-CONFIG_POWER_RESET_MT6323=y
-CONFIG_POWER_RESET_RESTART=y
-CONFIG_POWER_RESET_SYSCON=y
-CONFIG_POWER_RESET_SYSCON_POWEROFF=y
-CONFIG_REBOOT_MODE=m
-CONFIG_SYSCON_REBOOT_MODE=m
-CONFIG_NVMEM_REBOOT_MODE=m
-CONFIG_POWER_SUPPLY=y
-# CONFIG_POWER_SUPPLY_DEBUG is not set
-CONFIG_POWER_SUPPLY_HWMON=y
-CONFIG_PDA_POWER=m
-CONFIG_GENERIC_ADC_BATTERY=m
-CONFIG_MAX8925_POWER=m
-CONFIG_WM831X_BACKUP=m
-CONFIG_WM831X_POWER=m
-CONFIG_WM8350_POWER=m
-CONFIG_TEST_POWER=m
-CONFIG_BATTERY_88PM860X=m
-CONFIG_CHARGER_ADP5061=m
-CONFIG_BATTERY_ACT8945A=m
-CONFIG_BATTERY_CPCAP=m
-CONFIG_BATTERY_CW2015=m
-CONFIG_BATTERY_DS2760=m
-CONFIG_BATTERY_DS2780=m
-CONFIG_BATTERY_DS2781=m
-CONFIG_BATTERY_DS2782=m
-CONFIG_BATTERY_LEGO_EV3=m
-CONFIG_BATTERY_SBS=m
-CONFIG_CHARGER_SBS=m
-CONFIG_MANAGER_SBS=m
-CONFIG_BATTERY_BQ27XXX=m
-CONFIG_BATTERY_BQ27XXX_I2C=m
-CONFIG_BATTERY_BQ27XXX_HDQ=m
-# CONFIG_BATTERY_BQ27XXX_DT_UPDATES_NVM is not set
-CONFIG_BATTERY_DA9030=m
-CONFIG_BATTERY_DA9052=m
-CONFIG_CHARGER_DA9150=m
-CONFIG_BATTERY_DA9150=m
-CONFIG_CHARGER_AXP20X=m
-CONFIG_BATTERY_AXP20X=m
-CONFIG_AXP20X_POWER=m
-CONFIG_AXP288_CHARGER=m
-CONFIG_AXP288_FUEL_GAUGE=m
-CONFIG_BATTERY_MAX17040=m
-CONFIG_BATTERY_MAX17042=m
-CONFIG_BATTERY_MAX1721X=m
-CONFIG_BATTERY_TWL4030_MADC=m
-CONFIG_CHARGER_88PM860X=m
-CONFIG_CHARGER_PCF50633=m
-CONFIG_BATTERY_RX51=m
-CONFIG_CHARGER_ISP1704=m
-CONFIG_CHARGER_MAX8903=m
-CONFIG_CHARGER_TWL4030=m
-CONFIG_CHARGER_LP8727=m
-CONFIG_CHARGER_LP8788=m
-CONFIG_CHARGER_GPIO=m
-CONFIG_CHARGER_MANAGER=y
-CONFIG_CHARGER_LT3651=m
-CONFIG_CHARGER_MAX14577=m
-CONFIG_CHARGER_DETECTOR_MAX14656=m
-CONFIG_CHARGER_MAX77650=m
-CONFIG_CHARGER_MAX77693=m
-CONFIG_CHARGER_MAX8997=m
-CONFIG_CHARGER_MAX8998=m
-CONFIG_CHARGER_MP2629=m
-CONFIG_CHARGER_BQ2415X=m
-CONFIG_CHARGER_BQ24190=m
-CONFIG_CHARGER_BQ24257=m
-CONFIG_CHARGER_BQ24735=m
-# CONFIG_CHARGER_BQ2515X is not set
-CONFIG_CHARGER_BQ25890=m
-CONFIG_CHARGER_SMB347=m
-CONFIG_CHARGER_TPS65090=m
-CONFIG_CHARGER_TPS65217=m
-CONFIG_BATTERY_GAUGE_LTC2941=m
-CONFIG_BATTERY_RT5033=m
-CONFIG_CHARGER_RT9455=m
-CONFIG_CHARGER_CROS_USBPD=m
-CONFIG_CHARGER_UCS1002=m
-CONFIG_CHARGER_BD70528=m
-CONFIG_CHARGER_BD99954=m
-CONFIG_CHARGER_WILCO=m
-CONFIG_HWMON=y
-CONFIG_HWMON_VID=m
-# CONFIG_HWMON_DEBUG_CHIP is not set
-
-#
-# Native drivers
-#
-CONFIG_SENSORS_ABITUGURU=m
-CONFIG_SENSORS_ABITUGURU3=m
-CONFIG_SENSORS_AD7314=m
-CONFIG_SENSORS_AD7414=m
-CONFIG_SENSORS_AD7418=m
-CONFIG_SENSORS_ADM1021=m
-CONFIG_SENSORS_ADM1025=m
-CONFIG_SENSORS_ADM1026=m
-CONFIG_SENSORS_ADM1029=m
-CONFIG_SENSORS_ADM1031=m
-CONFIG_SENSORS_ADM1177=m
-CONFIG_SENSORS_ADM9240=m
-CONFIG_SENSORS_ADT7X10=m
-CONFIG_SENSORS_ADT7310=m
-CONFIG_SENSORS_ADT7410=m
-CONFIG_SENSORS_ADT7411=m
-CONFIG_SENSORS_ADT7462=m
-CONFIG_SENSORS_ADT7470=m
-CONFIG_SENSORS_ADT7475=m
-CONFIG_SENSORS_AS370=m
-CONFIG_SENSORS_ASC7621=m
-CONFIG_SENSORS_AXI_FAN_CONTROL=m
-CONFIG_SENSORS_K8TEMP=m
-CONFIG_SENSORS_K10TEMP=m
-CONFIG_SENSORS_FAM15H_POWER=m
-CONFIG_SENSORS_AMD_ENERGY=m
-CONFIG_SENSORS_APPLESMC=m
-CONFIG_SENSORS_ASB100=m
-CONFIG_SENSORS_ASPEED=m
-CONFIG_SENSORS_ATXP1=m
-CONFIG_SENSORS_CORSAIR_CPRO=m
-CONFIG_SENSORS_DRIVETEMP=m
-CONFIG_SENSORS_DS620=m
-CONFIG_SENSORS_DS1621=m
-CONFIG_SENSORS_DELL_SMM=m
-CONFIG_SENSORS_DA9052_ADC=m
-CONFIG_SENSORS_DA9055=m
-CONFIG_SENSORS_I5K_AMB=m
-CONFIG_SENSORS_F71805F=m
-CONFIG_SENSORS_F71882FG=m
-CONFIG_SENSORS_F75375S=m
-CONFIG_SENSORS_GSC=m
-CONFIG_SENSORS_MC13783_ADC=m
-CONFIG_SENSORS_FSCHMD=m
-CONFIG_SENSORS_FTSTEUTATES=m
-CONFIG_SENSORS_GL518SM=m
-CONFIG_SENSORS_GL520SM=m
-CONFIG_SENSORS_G760A=m
-CONFIG_SENSORS_G762=m
-CONFIG_SENSORS_GPIO_FAN=m
-CONFIG_SENSORS_HIH6130=m
-CONFIG_SENSORS_IBMAEM=m
-CONFIG_SENSORS_IBMPEX=m
-CONFIG_SENSORS_IIO_HWMON=m
-CONFIG_SENSORS_I5500=m
-CONFIG_SENSORS_CORETEMP=m
-CONFIG_SENSORS_IT87=m
-CONFIG_SENSORS_JC42=m
-CONFIG_SENSORS_POWR1220=m
-CONFIG_SENSORS_LINEAGE=m
-CONFIG_SENSORS_LOCHNAGAR=m
-CONFIG_SENSORS_LTC2945=m
-CONFIG_SENSORS_LTC2947=m
-CONFIG_SENSORS_LTC2947_I2C=m
-CONFIG_SENSORS_LTC2947_SPI=m
-CONFIG_SENSORS_LTC2990=m
-CONFIG_SENSORS_LTC4151=m
-CONFIG_SENSORS_LTC4215=m
-CONFIG_SENSORS_LTC4222=m
-CONFIG_SENSORS_LTC4245=m
-CONFIG_SENSORS_LTC4260=m
-CONFIG_SENSORS_LTC4261=m
-CONFIG_SENSORS_MAX1111=m
-CONFIG_SENSORS_MAX16065=m
-CONFIG_SENSORS_MAX1619=m
-CONFIG_SENSORS_MAX1668=m
-CONFIG_SENSORS_MAX197=m
-CONFIG_SENSORS_MAX31722=m
-CONFIG_SENSORS_MAX31730=m
-CONFIG_SENSORS_MAX6621=m
-CONFIG_SENSORS_MAX6639=m
-CONFIG_SENSORS_MAX6642=m
-CONFIG_SENSORS_MAX6650=m
-CONFIG_SENSORS_MAX6697=m
-CONFIG_SENSORS_MAX31790=m
-CONFIG_SENSORS_MCP3021=m
-CONFIG_SENSORS_MLXREG_FAN=m
-CONFIG_SENSORS_TC654=m
-CONFIG_SENSORS_MENF21BMC_HWMON=m
-CONFIG_SENSORS_ADCXX=m
-CONFIG_SENSORS_LM63=m
-CONFIG_SENSORS_LM70=m
-CONFIG_SENSORS_LM73=m
-CONFIG_SENSORS_LM75=m
-CONFIG_SENSORS_LM77=m
-CONFIG_SENSORS_LM78=m
-CONFIG_SENSORS_LM80=m
-CONFIG_SENSORS_LM83=m
-CONFIG_SENSORS_LM85=m
-CONFIG_SENSORS_LM87=m
-CONFIG_SENSORS_LM90=m
-CONFIG_SENSORS_LM92=m
-CONFIG_SENSORS_LM93=m
-CONFIG_SENSORS_LM95234=m
-CONFIG_SENSORS_LM95241=m
-CONFIG_SENSORS_LM95245=m
-CONFIG_SENSORS_PC87360=m
-CONFIG_SENSORS_PC87427=m
-CONFIG_SENSORS_NTC_THERMISTOR=m
-CONFIG_SENSORS_NCT6683=m
-CONFIG_SENSORS_NCT6775=m
-CONFIG_SENSORS_NCT7802=m
-CONFIG_SENSORS_NCT7904=m
-CONFIG_SENSORS_NPCM7XX=m
-CONFIG_SENSORS_PCF8591=m
-CONFIG_PMBUS=m
-CONFIG_SENSORS_PMBUS=m
-CONFIG_SENSORS_ADM1275=m
-CONFIG_SENSORS_BEL_PFE=m
-CONFIG_SENSORS_IBM_CFFPS=m
-CONFIG_SENSORS_INSPUR_IPSPS=m
-CONFIG_SENSORS_IR35221=m
-CONFIG_SENSORS_IR38064=m
-CONFIG_SENSORS_IRPS5401=m
-CONFIG_SENSORS_ISL68137=m
-CONFIG_SENSORS_LM25066=m
-CONFIG_SENSORS_LTC2978=m
-# CONFIG_SENSORS_LTC2978_REGULATOR is not set
-CONFIG_SENSORS_LTC3815=m
-CONFIG_SENSORS_MAX16064=m
-CONFIG_SENSORS_MAX16601=m
-CONFIG_SENSORS_MAX20730=m
-CONFIG_SENSORS_MAX20751=m
-CONFIG_SENSORS_MAX31785=m
-CONFIG_SENSORS_MAX34440=m
-CONFIG_SENSORS_MAX8688=m
-CONFIG_SENSORS_PXE1610=m
-CONFIG_SENSORS_TPS40422=m
-CONFIG_SENSORS_TPS53679=m
-CONFIG_SENSORS_UCD9000=m
-CONFIG_SENSORS_UCD9200=m
-CONFIG_SENSORS_XDPE122=m
-CONFIG_SENSORS_ZL6100=m
-CONFIG_SENSORS_PWM_FAN=m
-CONFIG_SENSORS_SHT15=m
-CONFIG_SENSORS_SHT21=m
-CONFIG_SENSORS_SHT3x=m
-CONFIG_SENSORS_SHTC1=m
-CONFIG_SENSORS_SIS5595=m
-CONFIG_SENSORS_DME1737=m
-CONFIG_SENSORS_EMC1403=m
-CONFIG_SENSORS_EMC2103=m
-CONFIG_SENSORS_EMC6W201=m
-CONFIG_SENSORS_SMSC47M1=m
-CONFIG_SENSORS_SMSC47M192=m
-CONFIG_SENSORS_SMSC47B397=m
-CONFIG_SENSORS_SCH56XX_COMMON=m
-CONFIG_SENSORS_SCH5627=m
-CONFIG_SENSORS_SCH5636=m
-CONFIG_SENSORS_STTS751=m
-CONFIG_SENSORS_SMM665=m
-CONFIG_SENSORS_ADC128D818=m
-CONFIG_SENSORS_ADS7828=m
-CONFIG_SENSORS_ADS7871=m
-CONFIG_SENSORS_AMC6821=m
-CONFIG_SENSORS_INA209=m
-CONFIG_SENSORS_INA2XX=m
-CONFIG_SENSORS_INA3221=m
-CONFIG_SENSORS_TC74=m
-CONFIG_SENSORS_THMC50=m
-CONFIG_SENSORS_TMP102=m
-CONFIG_SENSORS_TMP103=m
-CONFIG_SENSORS_TMP108=m
-CONFIG_SENSORS_TMP401=m
-CONFIG_SENSORS_TMP421=m
-CONFIG_SENSORS_TMP513=m
-CONFIG_SENSORS_VIA_CPUTEMP=m
-CONFIG_SENSORS_VIA686A=m
-CONFIG_SENSORS_VT1211=m
-CONFIG_SENSORS_VT8231=m
-CONFIG_SENSORS_W83773G=m
-CONFIG_SENSORS_W83781D=m
-CONFIG_SENSORS_W83791D=m
-CONFIG_SENSORS_W83792D=m
-CONFIG_SENSORS_W83793=m
-CONFIG_SENSORS_W83795=m
-# CONFIG_SENSORS_W83795_FANCTRL is not set
-CONFIG_SENSORS_W83L785TS=m
-CONFIG_SENSORS_W83L786NG=m
-CONFIG_SENSORS_W83627HF=m
-CONFIG_SENSORS_W83627EHF=m
-CONFIG_SENSORS_WM831X=m
-CONFIG_SENSORS_WM8350=m
-CONFIG_SENSORS_XGENE=m
-
-#
-# ACPI drivers
-#
-CONFIG_SENSORS_ACPI_POWER=m
-CONFIG_SENSORS_ATK0110=m
-CONFIG_THERMAL=y
-# CONFIG_THERMAL_NETLINK is not set
-# CONFIG_THERMAL_STATISTICS is not set
-CONFIG_THERMAL_EMERGENCY_POWEROFF_DELAY_MS=100
-CONFIG_THERMAL_HWMON=y
-CONFIG_THERMAL_OF=y
-CONFIG_THERMAL_WRITABLE_TRIPS=y
-CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE=y
-# CONFIG_THERMAL_DEFAULT_GOV_FAIR_SHARE is not set
-# CONFIG_THERMAL_DEFAULT_GOV_USER_SPACE is not set
-# CONFIG_THERMAL_DEFAULT_GOV_POWER_ALLOCATOR is not set
-CONFIG_THERMAL_GOV_FAIR_SHARE=y
-CONFIG_THERMAL_GOV_STEP_WISE=y
-CONFIG_THERMAL_GOV_BANG_BANG=y
-CONFIG_THERMAL_GOV_USER_SPACE=y
-CONFIG_THERMAL_GOV_POWER_ALLOCATOR=y
-CONFIG_CPU_THERMAL=y
-CONFIG_CPU_FREQ_THERMAL=y
-CONFIG_CPU_IDLE_THERMAL=y
-CONFIG_DEVFREQ_THERMAL=y
-# CONFIG_THERMAL_EMULATION is not set
-CONFIG_THERMAL_MMIO=m
-CONFIG_MAX77620_THERMAL=m
-CONFIG_DA9062_THERMAL=m
-
-#
-# Intel thermal drivers
-#
-CONFIG_INTEL_POWERCLAMP=m
-CONFIG_X86_PKG_TEMP_THERMAL=m
-CONFIG_INTEL_SOC_DTS_IOSF_CORE=m
-CONFIG_INTEL_SOC_DTS_THERMAL=m
-
-#
-# ACPI INT340X thermal drivers
-#
-CONFIG_INT340X_THERMAL=m
-CONFIG_ACPI_THERMAL_REL=m
-CONFIG_INT3406_THERMAL=m
-CONFIG_PROC_THERMAL_MMIO_RAPL=y
-# end of ACPI INT340X thermal drivers
-
-CONFIG_INTEL_BXT_PMIC_THERMAL=m
-CONFIG_INTEL_PCH_THERMAL=m
-# end of Intel thermal drivers
-
-# CONFIG_TI_SOC_THERMAL is not set
-CONFIG_GENERIC_ADC_THERMAL=m
-CONFIG_WATCHDOG=y
-CONFIG_WATCHDOG_CORE=y
-# CONFIG_WATCHDOG_NOWAYOUT is not set
-CONFIG_WATCHDOG_HANDLE_BOOT_ENABLED=y
-CONFIG_WATCHDOG_OPEN_TIMEOUT=0
-CONFIG_WATCHDOG_SYSFS=y
-
-#
-# Watchdog Pretimeout Governors
-#
-CONFIG_WATCHDOG_PRETIMEOUT_GOV=y
-CONFIG_WATCHDOG_PRETIMEOUT_GOV_SEL=m
-CONFIG_WATCHDOG_PRETIMEOUT_GOV_NOOP=m
-CONFIG_WATCHDOG_PRETIMEOUT_GOV_PANIC=y
-# CONFIG_WATCHDOG_PRETIMEOUT_DEFAULT_GOV_NOOP is not set
-CONFIG_WATCHDOG_PRETIMEOUT_DEFAULT_GOV_PANIC=y
-
-#
-# Watchdog Device Drivers
-#
-CONFIG_SOFT_WATCHDOG=m
-# CONFIG_SOFT_WATCHDOG_PRETIMEOUT is not set
-CONFIG_BD70528_WATCHDOG=m
-CONFIG_DA9052_WATCHDOG=m
-CONFIG_DA9055_WATCHDOG=m
-CONFIG_DA9063_WATCHDOG=m
-CONFIG_DA9062_WATCHDOG=m
-CONFIG_GPIO_WATCHDOG=m
-CONFIG_MENF21BMC_WATCHDOG=m
-CONFIG_MENZ069_WATCHDOG=m
-CONFIG_WDAT_WDT=m
-CONFIG_WM831X_WATCHDOG=m
-CONFIG_WM8350_WATCHDOG=m
-CONFIG_XILINX_WATCHDOG=m
-CONFIG_ZIIRAVE_WATCHDOG=m
-CONFIG_RAVE_SP_WATCHDOG=m
-CONFIG_MLX_WDT=m
-CONFIG_CADENCE_WATCHDOG=m
-CONFIG_DW_WATCHDOG=m
-CONFIG_RN5T618_WATCHDOG=m
-CONFIG_TWL4030_WATCHDOG=m
-CONFIG_MAX63XX_WATCHDOG=m
-CONFIG_MAX77620_WATCHDOG=m
-CONFIG_RETU_WATCHDOG=m
-CONFIG_STPMIC1_WATCHDOG=m
-CONFIG_ACQUIRE_WDT=m
-CONFIG_ADVANTECH_WDT=m
-CONFIG_ALIM1535_WDT=m
-CONFIG_ALIM7101_WDT=m
-CONFIG_EBC_C384_WDT=m
-CONFIG_F71808E_WDT=m
-CONFIG_SP5100_TCO=m
-CONFIG_SBC_FITPC2_WATCHDOG=m
-CONFIG_EUROTECH_WDT=m
-CONFIG_IB700_WDT=m
-CONFIG_IBMASR=m
-CONFIG_WAFER_WDT=m
-CONFIG_I6300ESB_WDT=m
-CONFIG_IE6XX_WDT=m
-CONFIG_ITCO_WDT=m
-CONFIG_ITCO_VENDOR_SUPPORT=y
-CONFIG_IT8712F_WDT=m
-CONFIG_IT87_WDT=m
-CONFIG_HP_WATCHDOG=m
-CONFIG_HPWDT_NMI_DECODING=y
-CONFIG_KEMPLD_WDT=m
-CONFIG_SC1200_WDT=m
-CONFIG_PC87413_WDT=m
-CONFIG_NV_TCO=m
-CONFIG_60XX_WDT=m
-CONFIG_CPU5_WDT=m
-CONFIG_SMSC_SCH311X_WDT=m
-CONFIG_SMSC37B787_WDT=m
-CONFIG_TQMX86_WDT=m
-CONFIG_VIA_WDT=m
-CONFIG_W83627HF_WDT=m
-CONFIG_W83877F_WDT=m
-CONFIG_W83977F_WDT=m
-CONFIG_MACHZ_WDT=m
-CONFIG_SBC_EPX_C3_WATCHDOG=m
-CONFIG_INTEL_MEI_WDT=m
-CONFIG_NI903X_WDT=m
-CONFIG_NIC7018_WDT=m
-CONFIG_MEN_A21_WDT=m
-CONFIG_XEN_WDT=m
-
-#
-# PCI-based Watchdog Cards
-#
-CONFIG_PCIPCWATCHDOG=m
-CONFIG_WDTPCI=m
-
-#
-# USB-based Watchdog Cards
-#
-CONFIG_USBPCWATCHDOG=m
-CONFIG_SSB_POSSIBLE=y
-CONFIG_SSB=m
-CONFIG_SSB_SPROM=y
-CONFIG_SSB_BLOCKIO=y
-CONFIG_SSB_PCIHOST_POSSIBLE=y
-CONFIG_SSB_PCIHOST=y
-CONFIG_SSB_B43_PCI_BRIDGE=y
-CONFIG_SSB_PCMCIAHOST_POSSIBLE=y
-CONFIG_SSB_PCMCIAHOST=y
-CONFIG_SSB_SDIOHOST_POSSIBLE=y
-CONFIG_SSB_SDIOHOST=y
-CONFIG_SSB_DRIVER_PCICORE_POSSIBLE=y
-CONFIG_SSB_DRIVER_PCICORE=y
-CONFIG_SSB_DRIVER_GPIO=y
-CONFIG_BCMA_POSSIBLE=y
-CONFIG_BCMA=m
-CONFIG_BCMA_BLOCKIO=y
-CONFIG_BCMA_HOST_PCI_POSSIBLE=y
-CONFIG_BCMA_HOST_PCI=y
-# CONFIG_BCMA_HOST_SOC is not set
-CONFIG_BCMA_DRIVER_PCI=y
-CONFIG_BCMA_DRIVER_GMAC_CMN=y
-CONFIG_BCMA_DRIVER_GPIO=y
-# CONFIG_BCMA_DEBUG is not set
-
-#
-# Multifunction device drivers
-#
-CONFIG_MFD_CORE=y
-CONFIG_MFD_ACT8945A=m
-CONFIG_MFD_AS3711=y
-CONFIG_MFD_AS3722=m
-CONFIG_PMIC_ADP5520=y
-CONFIG_MFD_AAT2870_CORE=y
-CONFIG_MFD_ATMEL_FLEXCOM=m
-CONFIG_MFD_ATMEL_HLCDC=m
-CONFIG_MFD_BCM590XX=m
-CONFIG_MFD_BD9571MWV=m
-CONFIG_MFD_AXP20X=m
-CONFIG_MFD_AXP20X_I2C=m
-CONFIG_MFD_CROS_EC_DEV=m
-CONFIG_MFD_MADERA=m
-CONFIG_MFD_MADERA_I2C=m
-CONFIG_MFD_MADERA_SPI=m
-CONFIG_MFD_CS47L15=y
-CONFIG_MFD_CS47L35=y
-CONFIG_MFD_CS47L85=y
-CONFIG_MFD_CS47L90=y
-CONFIG_MFD_CS47L92=y
-CONFIG_PMIC_DA903X=y
-CONFIG_PMIC_DA9052=y
-CONFIG_MFD_DA9052_SPI=y
-CONFIG_MFD_DA9052_I2C=y
-CONFIG_MFD_DA9055=y
-CONFIG_MFD_DA9062=m
-CONFIG_MFD_DA9063=m
-CONFIG_MFD_DA9150=m
-CONFIG_MFD_DLN2=m
-CONFIG_MFD_GATEWORKS_GSC=m
-CONFIG_MFD_MC13XXX=m
-CONFIG_MFD_MC13XXX_SPI=m
-CONFIG_MFD_MC13XXX_I2C=m
-CONFIG_MFD_MP2629=m
-CONFIG_MFD_HI6421_PMIC=m
-CONFIG_HTC_PASIC3=m
-CONFIG_HTC_I2CPLD=y
-CONFIG_MFD_INTEL_QUARK_I2C_GPIO=m
-CONFIG_LPC_ICH=m
-CONFIG_LPC_SCH=m
-CONFIG_INTEL_SOC_PMIC=y
-CONFIG_INTEL_SOC_PMIC_BXTWC=m
-CONFIG_INTEL_SOC_PMIC_CHTWC=y
-CONFIG_INTEL_SOC_PMIC_CHTDC_TI=m
-CONFIG_INTEL_SOC_PMIC_MRFLD=m
-CONFIG_MFD_INTEL_LPSS=m
-CONFIG_MFD_INTEL_LPSS_ACPI=m
-CONFIG_MFD_INTEL_LPSS_PCI=m
-CONFIG_MFD_INTEL_MSIC=y
-CONFIG_MFD_INTEL_PMC_BXT=m
-CONFIG_MFD_IQS62X=m
-CONFIG_MFD_JANZ_CMODIO=m
-CONFIG_MFD_KEMPLD=m
-CONFIG_MFD_88PM800=m
-CONFIG_MFD_88PM805=m
-CONFIG_MFD_88PM860X=y
-CONFIG_MFD_MAX14577=m
-CONFIG_MFD_MAX77620=y
-CONFIG_MFD_MAX77650=m
-CONFIG_MFD_MAX77686=m
-CONFIG_MFD_MAX77693=m
-CONFIG_MFD_MAX77843=y
-CONFIG_MFD_MAX8907=m
-CONFIG_MFD_MAX8925=y
-CONFIG_MFD_MAX8997=y
-CONFIG_MFD_MAX8998=y
-CONFIG_MFD_MT6360=m
-CONFIG_MFD_MT6397=m
-CONFIG_MFD_MENF21BMC=m
-CONFIG_EZX_PCAP=y
-CONFIG_MFD_CPCAP=m
-CONFIG_MFD_VIPERBOARD=m
-CONFIG_MFD_RETU=m
-CONFIG_MFD_PCF50633=m
-CONFIG_PCF50633_ADC=m
-CONFIG_PCF50633_GPIO=m
-CONFIG_UCB1400_CORE=m
-CONFIG_MFD_RDC321X=m
-CONFIG_MFD_RT5033=m
-CONFIG_MFD_RC5T583=y
-CONFIG_MFD_RK808=m
-CONFIG_MFD_RN5T618=m
-CONFIG_MFD_SEC_CORE=y
-CONFIG_MFD_SI476X_CORE=m
-CONFIG_MFD_SM501=m
-CONFIG_MFD_SM501_GPIO=y
-CONFIG_MFD_SKY81452=m
-CONFIG_ABX500_CORE=y
-CONFIG_AB3100_CORE=y
-CONFIG_AB3100_OTP=y
-CONFIG_MFD_STMPE=y
-
-#
-# STMicroelectronics STMPE Interface Drivers
-#
-CONFIG_STMPE_I2C=y
-CONFIG_STMPE_SPI=y
-# end of STMicroelectronics STMPE Interface Drivers
-
-CONFIG_MFD_SYSCON=y
-CONFIG_MFD_TI_AM335X_TSCADC=m
-CONFIG_MFD_LP3943=m
-CONFIG_MFD_LP8788=y
-CONFIG_MFD_TI_LMU=m
-CONFIG_MFD_PALMAS=y
-CONFIG_TPS6105X=m
-CONFIG_TPS65010=m
-CONFIG_TPS6507X=m
-CONFIG_MFD_TPS65086=m
-CONFIG_MFD_TPS65090=y
-CONFIG_MFD_TPS65217=m
-CONFIG_MFD_TPS68470=y
-CONFIG_MFD_TI_LP873X=m
-CONFIG_MFD_TI_LP87565=m
-CONFIG_MFD_TPS65218=m
-CONFIG_MFD_TPS6586X=y
-CONFIG_MFD_TPS65910=y
-CONFIG_MFD_TPS65912=m
-CONFIG_MFD_TPS65912_I2C=m
-CONFIG_MFD_TPS65912_SPI=m
-CONFIG_MFD_TPS80031=y
-CONFIG_TWL4030_CORE=y
-CONFIG_MFD_TWL4030_AUDIO=y
-CONFIG_TWL6040_CORE=y
-CONFIG_MFD_WL1273_CORE=m
-CONFIG_MFD_LM3533=m
-CONFIG_MFD_TC3589X=y
-CONFIG_MFD_TQMX86=m
-CONFIG_MFD_VX855=m
-CONFIG_MFD_LOCHNAGAR=y
-CONFIG_MFD_ARIZONA=y
-CONFIG_MFD_ARIZONA_I2C=m
-CONFIG_MFD_ARIZONA_SPI=m
-CONFIG_MFD_CS47L24=y
-CONFIG_MFD_WM5102=y
-CONFIG_MFD_WM5110=y
-CONFIG_MFD_WM8997=y
-CONFIG_MFD_WM8998=y
-CONFIG_MFD_WM8400=y
-CONFIG_MFD_WM831X=y
-CONFIG_MFD_WM831X_I2C=y
-CONFIG_MFD_WM831X_SPI=y
-CONFIG_MFD_WM8350=y
-CONFIG_MFD_WM8350_I2C=y
-CONFIG_MFD_WM8994=m
-CONFIG_MFD_ROHM_BD718XX=m
-CONFIG_MFD_ROHM_BD70528=m
-CONFIG_MFD_ROHM_BD71828=m
-CONFIG_MFD_STPMIC1=m
-CONFIG_MFD_STMFX=m
-CONFIG_MFD_WCD934X=m
-CONFIG_RAVE_SP_CORE=m
-# end of Multifunction device drivers
-
-CONFIG_REGULATOR=y
-# CONFIG_REGULATOR_DEBUG is not set
-CONFIG_REGULATOR_FIXED_VOLTAGE=m
-CONFIG_REGULATOR_VIRTUAL_CONSUMER=m
-CONFIG_REGULATOR_USERSPACE_CONSUMER=m
-CONFIG_REGULATOR_88PG86X=m
-CONFIG_REGULATOR_88PM800=m
-CONFIG_REGULATOR_88PM8607=m
-CONFIG_REGULATOR_ACT8865=m
-CONFIG_REGULATOR_ACT8945A=m
-CONFIG_REGULATOR_AD5398=m
-CONFIG_REGULATOR_AAT2870=m
-CONFIG_REGULATOR_AB3100=m
-CONFIG_REGULATOR_ARIZONA_LDO1=m
-CONFIG_REGULATOR_ARIZONA_MICSUPP=m
-CONFIG_REGULATOR_AS3711=m
-CONFIG_REGULATOR_AS3722=m
-CONFIG_REGULATOR_AXP20X=m
-CONFIG_REGULATOR_BCM590XX=m
-CONFIG_REGULATOR_BD70528=m
-CONFIG_REGULATOR_BD71828=m
-CONFIG_REGULATOR_BD718XX=m
-CONFIG_REGULATOR_BD9571MWV=m
-CONFIG_REGULATOR_CPCAP=m
-CONFIG_REGULATOR_CROS_EC=m
-CONFIG_REGULATOR_DA903X=m
-CONFIG_REGULATOR_DA9052=m
-CONFIG_REGULATOR_DA9055=m
-CONFIG_REGULATOR_DA9062=m
-CONFIG_REGULATOR_DA9063=m
-CONFIG_REGULATOR_DA9210=m
-CONFIG_REGULATOR_DA9211=m
-CONFIG_REGULATOR_FAN53555=m
-CONFIG_REGULATOR_FAN53880=m
-CONFIG_REGULATOR_GPIO=m
-CONFIG_REGULATOR_HI6421=m
-CONFIG_REGULATOR_HI6421V530=m
-CONFIG_REGULATOR_ISL9305=m
-CONFIG_REGULATOR_ISL6271A=m
-CONFIG_REGULATOR_LM363X=m
-CONFIG_REGULATOR_LOCHNAGAR=m
-CONFIG_REGULATOR_LP3971=m
-CONFIG_REGULATOR_LP3972=m
-CONFIG_REGULATOR_LP872X=m
-CONFIG_REGULATOR_LP873X=m
-CONFIG_REGULATOR_LP8755=m
-CONFIG_REGULATOR_LP87565=m
-CONFIG_REGULATOR_LP8788=m
-CONFIG_REGULATOR_LTC3589=m
-CONFIG_REGULATOR_LTC3676=m
-CONFIG_REGULATOR_MAX14577=m
-CONFIG_REGULATOR_MAX1586=m
-CONFIG_REGULATOR_MAX77620=m
-CONFIG_REGULATOR_MAX77650=m
-CONFIG_REGULATOR_MAX8649=m
-CONFIG_REGULATOR_MAX8660=m
-CONFIG_REGULATOR_MAX8907=m
-CONFIG_REGULATOR_MAX8925=m
-CONFIG_REGULATOR_MAX8952=m
-CONFIG_REGULATOR_MAX8973=m
-CONFIG_REGULATOR_MAX8997=m
-CONFIG_REGULATOR_MAX8998=m
-CONFIG_REGULATOR_MAX77686=m
-CONFIG_REGULATOR_MAX77693=m
-CONFIG_REGULATOR_MAX77802=m
-CONFIG_REGULATOR_MAX77826=m
-CONFIG_REGULATOR_MC13XXX_CORE=m
-CONFIG_REGULATOR_MC13783=m
-CONFIG_REGULATOR_MC13892=m
-CONFIG_REGULATOR_MCP16502=m
-CONFIG_REGULATOR_MP5416=m
-CONFIG_REGULATOR_MP8859=m
-CONFIG_REGULATOR_MP886X=m
-CONFIG_REGULATOR_MPQ7920=m
-CONFIG_REGULATOR_MT6311=m
-CONFIG_REGULATOR_MT6323=m
-CONFIG_REGULATOR_MT6358=m
-CONFIG_REGULATOR_MT6397=m
-CONFIG_REGULATOR_PALMAS=m
-CONFIG_REGULATOR_PCA9450=m
-CONFIG_REGULATOR_PCAP=m
-CONFIG_REGULATOR_PCF50633=m
-CONFIG_REGULATOR_PFUZE100=m
-CONFIG_REGULATOR_PV88060=m
-CONFIG_REGULATOR_PV88080=m
-CONFIG_REGULATOR_PV88090=m
-CONFIG_REGULATOR_PWM=m
-CONFIG_REGULATOR_QCOM_SPMI=m
-CONFIG_REGULATOR_QCOM_USB_VBUS=m
-CONFIG_REGULATOR_RC5T583=m
-CONFIG_REGULATOR_RK808=m
-CONFIG_REGULATOR_RN5T618=m
-CONFIG_REGULATOR_ROHM=m
-CONFIG_REGULATOR_RT5033=m
-CONFIG_REGULATOR_S2MPA01=m
-CONFIG_REGULATOR_S2MPS11=m
-CONFIG_REGULATOR_S5M8767=m
-CONFIG_REGULATOR_SKY81452=m
-CONFIG_REGULATOR_SLG51000=m
-CONFIG_REGULATOR_STPMIC1=m
-CONFIG_REGULATOR_SY8106A=m
-CONFIG_REGULATOR_SY8824X=m
-CONFIG_REGULATOR_SY8827N=m
-CONFIG_REGULATOR_TPS51632=m
-CONFIG_REGULATOR_TPS6105X=m
-CONFIG_REGULATOR_TPS62360=m
-CONFIG_REGULATOR_TPS65023=m
-CONFIG_REGULATOR_TPS6507X=m
-CONFIG_REGULATOR_TPS65086=m
-CONFIG_REGULATOR_TPS65090=m
-CONFIG_REGULATOR_TPS65132=m
-CONFIG_REGULATOR_TPS65217=m
-CONFIG_REGULATOR_TPS65218=m
-CONFIG_REGULATOR_TPS6524X=m
-CONFIG_REGULATOR_TPS6586X=m
-CONFIG_REGULATOR_TPS65910=m
-CONFIG_REGULATOR_TPS65912=m
-CONFIG_REGULATOR_TPS80031=m
-CONFIG_REGULATOR_TWL4030=m
-CONFIG_REGULATOR_VCTRL=m
-CONFIG_REGULATOR_WM831X=m
-CONFIG_REGULATOR_WM8350=m
-CONFIG_REGULATOR_WM8400=m
-CONFIG_REGULATOR_WM8994=m
-CONFIG_REGULATOR_QCOM_LABIBB=m
-CONFIG_RC_CORE=m
-CONFIG_RC_MAP=m
-CONFIG_LIRC=y
-CONFIG_RC_DECODERS=y
-CONFIG_IR_NEC_DECODER=m
-CONFIG_IR_RC5_DECODER=m
-CONFIG_IR_RC6_DECODER=m
-CONFIG_IR_JVC_DECODER=m
-CONFIG_IR_SONY_DECODER=m
-CONFIG_IR_SANYO_DECODER=m
-CONFIG_IR_SHARP_DECODER=m
-CONFIG_IR_MCE_KBD_DECODER=m
-CONFIG_IR_XMP_DECODER=m
-CONFIG_IR_IMON_DECODER=m
-CONFIG_IR_RCMM_DECODER=m
-CONFIG_RC_DEVICES=y
-CONFIG_RC_ATI_REMOTE=m
-CONFIG_IR_ENE=m
-CONFIG_IR_HIX5HD2=m
-CONFIG_IR_IMON=m
-CONFIG_IR_IMON_RAW=m
-CONFIG_IR_MCEUSB=m
-CONFIG_IR_ITE_CIR=m
-CONFIG_IR_FINTEK=m
-CONFIG_IR_NUVOTON=m
-CONFIG_IR_REDRAT3=m
-CONFIG_IR_SPI=m
-CONFIG_IR_STREAMZAP=m
-CONFIG_IR_WINBOND_CIR=m
-CONFIG_IR_IGORPLUGUSB=m
-CONFIG_IR_IGUANA=m
-CONFIG_IR_TTUSBIR=m
-CONFIG_RC_LOOPBACK=m
-CONFIG_IR_GPIO_CIR=m
-CONFIG_IR_GPIO_TX=m
-CONFIG_IR_PWM_TX=m
-CONFIG_IR_SERIAL=m
-CONFIG_IR_SERIAL_TRANSMITTER=y
-CONFIG_IR_SIR=m
-CONFIG_RC_XBOX_DVD=m
-CONFIG_IR_TOY=m
-CONFIG_CEC_CORE=m
-CONFIG_CEC_NOTIFIER=y
-CONFIG_CEC_PIN=y
-CONFIG_MEDIA_CEC_RC=y
-# CONFIG_CEC_PIN_ERROR_INJ is not set
-CONFIG_MEDIA_CEC_SUPPORT=y
-CONFIG_CEC_CH7322=m
-CONFIG_CEC_CROS_EC=m
-CONFIG_CEC_GPIO=m
-CONFIG_CEC_SECO=m
-CONFIG_CEC_SECO_RC=y
-CONFIG_USB_PULSE8_CEC=m
-CONFIG_USB_RAINSHADOW_CEC=m
-CONFIG_MEDIA_SUPPORT=m
-# CONFIG_MEDIA_SUPPORT_FILTER is not set
-CONFIG_MEDIA_SUBDRV_AUTOSELECT=y
-
-#
-# Media device types
-#
-CONFIG_MEDIA_CAMERA_SUPPORT=y
-CONFIG_MEDIA_ANALOG_TV_SUPPORT=y
-CONFIG_MEDIA_DIGITAL_TV_SUPPORT=y
-CONFIG_MEDIA_RADIO_SUPPORT=y
-CONFIG_MEDIA_SDR_SUPPORT=y
-CONFIG_MEDIA_PLATFORM_SUPPORT=y
-CONFIG_MEDIA_TEST_SUPPORT=y
-# end of Media device types
-
-#
-# Media core support
-#
-CONFIG_VIDEO_DEV=m
-CONFIG_MEDIA_CONTROLLER=y
-CONFIG_DVB_CORE=m
-# end of Media core support
-
-#
-# Video4Linux options
-#
-CONFIG_VIDEO_V4L2=m
-CONFIG_VIDEO_V4L2_I2C=y
-CONFIG_VIDEO_V4L2_SUBDEV_API=y
-# CONFIG_VIDEO_ADV_DEBUG is not set
-# CONFIG_VIDEO_FIXED_MINOR_RANGES is not set
-CONFIG_VIDEO_TUNER=m
-CONFIG_V4L2_MEM2MEM_DEV=m
-CONFIG_V4L2_FLASH_LED_CLASS=m
-CONFIG_V4L2_FWNODE=m
-CONFIG_VIDEOBUF_GEN=m
-CONFIG_VIDEOBUF_DMA_SG=m
-CONFIG_VIDEOBUF_VMALLOC=m
-# end of Video4Linux options
-
-#
-# Media controller options
-#
-CONFIG_MEDIA_CONTROLLER_DVB=y
-CONFIG_MEDIA_CONTROLLER_REQUEST_API=y
-
-#
-# Please notice that the enabled Media controller Request API is EXPERIMENTAL
-#
-# end of Media controller options
-
-#
-# Digital TV options
-#
-CONFIG_DVB_MMAP=y
-CONFIG_DVB_NET=y
-CONFIG_DVB_MAX_ADAPTERS=16
-# CONFIG_DVB_DYNAMIC_MINORS is not set
-# CONFIG_DVB_DEMUX_SECTION_LOSS_LOG is not set
-# CONFIG_DVB_ULE_DEBUG is not set
-# end of Digital TV options
-
-#
-# Media drivers
-#
-CONFIG_TTPCI_EEPROM=m
-CONFIG_MEDIA_USB_SUPPORT=y
-
-#
-# Webcam devices
-#
-CONFIG_USB_VIDEO_CLASS=m
-CONFIG_USB_VIDEO_CLASS_INPUT_EVDEV=y
-CONFIG_USB_GSPCA=m
-CONFIG_USB_M5602=m
-CONFIG_USB_STV06XX=m
-CONFIG_USB_GL860=m
-CONFIG_USB_GSPCA_BENQ=m
-CONFIG_USB_GSPCA_CONEX=m
-CONFIG_USB_GSPCA_CPIA1=m
-CONFIG_USB_GSPCA_DTCS033=m
-CONFIG_USB_GSPCA_ETOMS=m
-CONFIG_USB_GSPCA_FINEPIX=m
-CONFIG_USB_GSPCA_JEILINJ=m
-CONFIG_USB_GSPCA_JL2005BCD=m
-CONFIG_USB_GSPCA_KINECT=m
-CONFIG_USB_GSPCA_KONICA=m
-CONFIG_USB_GSPCA_MARS=m
-CONFIG_USB_GSPCA_MR97310A=m
-CONFIG_USB_GSPCA_NW80X=m
-CONFIG_USB_GSPCA_OV519=m
-CONFIG_USB_GSPCA_OV534=m
-CONFIG_USB_GSPCA_OV534_9=m
-CONFIG_USB_GSPCA_PAC207=m
-CONFIG_USB_GSPCA_PAC7302=m
-CONFIG_USB_GSPCA_PAC7311=m
-CONFIG_USB_GSPCA_SE401=m
-CONFIG_USB_GSPCA_SN9C2028=m
-CONFIG_USB_GSPCA_SN9C20X=m
-CONFIG_USB_GSPCA_SONIXB=m
-CONFIG_USB_GSPCA_SONIXJ=m
-CONFIG_USB_GSPCA_SPCA500=m
-CONFIG_USB_GSPCA_SPCA501=m
-CONFIG_USB_GSPCA_SPCA505=m
-CONFIG_USB_GSPCA_SPCA506=m
-CONFIG_USB_GSPCA_SPCA508=m
-CONFIG_USB_GSPCA_SPCA561=m
-CONFIG_USB_GSPCA_SPCA1528=m
-CONFIG_USB_GSPCA_SQ905=m
-CONFIG_USB_GSPCA_SQ905C=m
-CONFIG_USB_GSPCA_SQ930X=m
-CONFIG_USB_GSPCA_STK014=m
-CONFIG_USB_GSPCA_STK1135=m
-CONFIG_USB_GSPCA_STV0680=m
-CONFIG_USB_GSPCA_SUNPLUS=m
-CONFIG_USB_GSPCA_T613=m
-CONFIG_USB_GSPCA_TOPRO=m
-CONFIG_USB_GSPCA_TOUPTEK=m
-CONFIG_USB_GSPCA_TV8532=m
-CONFIG_USB_GSPCA_VC032X=m
-CONFIG_USB_GSPCA_VICAM=m
-CONFIG_USB_GSPCA_XIRLINK_CIT=m
-CONFIG_USB_GSPCA_ZC3XX=m
-CONFIG_USB_PWC=m
-# CONFIG_USB_PWC_DEBUG is not set
-CONFIG_USB_PWC_INPUT_EVDEV=y
-CONFIG_VIDEO_CPIA2=m
-CONFIG_USB_ZR364XX=m
-CONFIG_USB_STKWEBCAM=m
-CONFIG_USB_S2255=m
-CONFIG_VIDEO_USBTV=m
-
-#
-# Analog TV USB devices
-#
-CONFIG_VIDEO_PVRUSB2=m
-CONFIG_VIDEO_PVRUSB2_SYSFS=y
-CONFIG_VIDEO_PVRUSB2_DVB=y
-# CONFIG_VIDEO_PVRUSB2_DEBUGIFC is not set
-CONFIG_VIDEO_HDPVR=m
-CONFIG_VIDEO_STK1160_COMMON=m
-CONFIG_VIDEO_STK1160=m
-CONFIG_VIDEO_GO7007=m
-CONFIG_VIDEO_GO7007_USB=m
-CONFIG_VIDEO_GO7007_LOADER=m
-CONFIG_VIDEO_GO7007_USB_S2250_BOARD=m
-
-#
-# Analog/digital TV USB devices
-#
-CONFIG_VIDEO_AU0828=m
-CONFIG_VIDEO_AU0828_V4L2=y
-CONFIG_VIDEO_AU0828_RC=y
-CONFIG_VIDEO_CX231XX=m
-CONFIG_VIDEO_CX231XX_RC=y
-CONFIG_VIDEO_CX231XX_ALSA=m
-CONFIG_VIDEO_CX231XX_DVB=m
-CONFIG_VIDEO_TM6000=m
-CONFIG_VIDEO_TM6000_ALSA=m
-CONFIG_VIDEO_TM6000_DVB=m
-
-#
-# Digital TV USB devices
-#
-CONFIG_DVB_USB=m
-# CONFIG_DVB_USB_DEBUG is not set
-CONFIG_DVB_USB_DIB3000MC=m
-CONFIG_DVB_USB_A800=m
-CONFIG_DVB_USB_DIBUSB_MB=m
-CONFIG_DVB_USB_DIBUSB_MB_FAULTY=y
-CONFIG_DVB_USB_DIBUSB_MC=m
-CONFIG_DVB_USB_DIB0700=m
-CONFIG_DVB_USB_UMT_010=m
-CONFIG_DVB_USB_CXUSB=m
-CONFIG_DVB_USB_CXUSB_ANALOG=y
-CONFIG_DVB_USB_M920X=m
-CONFIG_DVB_USB_DIGITV=m
-CONFIG_DVB_USB_VP7045=m
-CONFIG_DVB_USB_VP702X=m
-CONFIG_DVB_USB_GP8PSK=m
-CONFIG_DVB_USB_NOVA_T_USB2=m
-CONFIG_DVB_USB_TTUSB2=m
-CONFIG_DVB_USB_DTT200U=m
-CONFIG_DVB_USB_OPERA1=m
-CONFIG_DVB_USB_AF9005=m
-CONFIG_DVB_USB_AF9005_REMOTE=m
-CONFIG_DVB_USB_PCTV452E=m
-CONFIG_DVB_USB_DW2102=m
-CONFIG_DVB_USB_CINERGY_T2=m
-CONFIG_DVB_USB_DTV5100=m
-CONFIG_DVB_USB_AZ6027=m
-CONFIG_DVB_USB_TECHNISAT_USB2=m
-CONFIG_DVB_USB_V2=m
-CONFIG_DVB_USB_AF9015=m
-CONFIG_DVB_USB_AF9035=m
-CONFIG_DVB_USB_ANYSEE=m
-CONFIG_DVB_USB_AU6610=m
-CONFIG_DVB_USB_AZ6007=m
-CONFIG_DVB_USB_CE6230=m
-CONFIG_DVB_USB_EC168=m
-CONFIG_DVB_USB_GL861=m
-CONFIG_DVB_USB_LME2510=m
-CONFIG_DVB_USB_MXL111SF=m
-CONFIG_DVB_USB_RTL28XXU=m
-CONFIG_DVB_USB_DVBSKY=m
-CONFIG_DVB_USB_ZD1301=m
-CONFIG_DVB_TTUSB_BUDGET=m
-CONFIG_DVB_TTUSB_DEC=m
-CONFIG_SMS_USB_DRV=m
-CONFIG_DVB_B2C2_FLEXCOP_USB=m
-# CONFIG_DVB_B2C2_FLEXCOP_USB_DEBUG is not set
-CONFIG_DVB_AS102=m
-
-#
-# Webcam, TV (analog/digital) USB devices
-#
-CONFIG_VIDEO_EM28XX=m
-CONFIG_VIDEO_EM28XX_V4L2=m
-CONFIG_VIDEO_EM28XX_ALSA=m
-CONFIG_VIDEO_EM28XX_DVB=m
-CONFIG_VIDEO_EM28XX_RC=m
-
-#
-# Software defined radio USB devices
-#
-CONFIG_USB_AIRSPY=m
-CONFIG_USB_HACKRF=m
-CONFIG_USB_MSI2500=m
-CONFIG_MEDIA_PCI_SUPPORT=y
-
-#
-# Media capture support
-#
-CONFIG_VIDEO_MEYE=m
-CONFIG_VIDEO_SOLO6X10=m
-CONFIG_VIDEO_TW5864=m
-CONFIG_VIDEO_TW68=m
-CONFIG_VIDEO_TW686X=m
-
-#
-# Media capture/analog TV support
-#
-CONFIG_VIDEO_IVTV=m
-# CONFIG_VIDEO_IVTV_DEPRECATED_IOCTLS is not set
-CONFIG_VIDEO_IVTV_ALSA=m
-CONFIG_VIDEO_FB_IVTV=m
-# CONFIG_VIDEO_FB_IVTV_FORCE_PAT is not set
-CONFIG_VIDEO_HEXIUM_GEMINI=m
-CONFIG_VIDEO_HEXIUM_ORION=m
-CONFIG_VIDEO_MXB=m
-CONFIG_VIDEO_DT3155=m
-
-#
-# Media capture/analog/hybrid TV support
-#
-CONFIG_VIDEO_CX18=m
-CONFIG_VIDEO_CX18_ALSA=m
-CONFIG_VIDEO_CX23885=m
-CONFIG_MEDIA_ALTERA_CI=m
-CONFIG_VIDEO_CX25821=m
-CONFIG_VIDEO_CX25821_ALSA=m
-CONFIG_VIDEO_CX88=m
-CONFIG_VIDEO_CX88_ALSA=m
-CONFIG_VIDEO_CX88_BLACKBIRD=m
-CONFIG_VIDEO_CX88_DVB=m
-CONFIG_VIDEO_CX88_ENABLE_VP3054=y
-CONFIG_VIDEO_CX88_VP3054=m
-CONFIG_VIDEO_CX88_MPEG=m
-CONFIG_VIDEO_BT848=m
-CONFIG_DVB_BT8XX=m
-CONFIG_VIDEO_SAA7134=m
-CONFIG_VIDEO_SAA7134_ALSA=m
-CONFIG_VIDEO_SAA7134_RC=y
-CONFIG_VIDEO_SAA7134_DVB=m
-CONFIG_VIDEO_SAA7134_GO7007=m
-CONFIG_VIDEO_SAA7164=m
-
-#
-# Media digital TV PCI Adapters
-#
-CONFIG_DVB_AV7110_IR=y
-CONFIG_DVB_AV7110=m
-CONFIG_DVB_AV7110_OSD=y
-CONFIG_DVB_BUDGET_CORE=m
-CONFIG_DVB_BUDGET=m
-CONFIG_DVB_BUDGET_CI=m
-CONFIG_DVB_BUDGET_AV=m
-CONFIG_DVB_BUDGET_PATCH=m
-CONFIG_DVB_B2C2_FLEXCOP_PCI=m
-# CONFIG_DVB_B2C2_FLEXCOP_PCI_DEBUG is not set
-CONFIG_DVB_PLUTO2=m
-CONFIG_DVB_DM1105=m
-CONFIG_DVB_PT1=m
-CONFIG_DVB_PT3=m
-CONFIG_MANTIS_CORE=m
-CONFIG_DVB_MANTIS=m
-CONFIG_DVB_HOPPER=m
-CONFIG_DVB_NGENE=m
-CONFIG_DVB_DDBRIDGE=m
-# CONFIG_DVB_DDBRIDGE_MSIENABLE is not set
-CONFIG_DVB_SMIPCIE=m
-CONFIG_DVB_NETUP_UNIDVB=m
-CONFIG_VIDEO_IPU3_CIO2=m
-CONFIG_RADIO_ADAPTERS=y
-CONFIG_RADIO_TEA575X=m
-CONFIG_RADIO_SI470X=m
-CONFIG_USB_SI470X=m
-CONFIG_I2C_SI470X=m
-CONFIG_RADIO_SI4713=m
-CONFIG_USB_SI4713=m
-CONFIG_PLATFORM_SI4713=m
-CONFIG_I2C_SI4713=m
-CONFIG_RADIO_SI476X=m
-CONFIG_USB_MR800=m
-CONFIG_USB_DSBR=m
-CONFIG_RADIO_MAXIRADIO=m
-CONFIG_RADIO_SHARK=m
-CONFIG_RADIO_SHARK2=m
-CONFIG_USB_KEENE=m
-CONFIG_USB_RAREMONO=m
-CONFIG_USB_MA901=m
-CONFIG_RADIO_TEA5764=m
-CONFIG_RADIO_SAA7706H=m
-CONFIG_RADIO_TEF6862=m
-CONFIG_RADIO_WL1273=m
-CONFIG_RADIO_WL128X=m
-CONFIG_MEDIA_COMMON_OPTIONS=y
-
-#
-# common driver options
-#
-CONFIG_VIDEO_CX2341X=m
-CONFIG_VIDEO_TVEEPROM=m
-CONFIG_CYPRESS_FIRMWARE=m
-CONFIG_VIDEOBUF2_CORE=m
-CONFIG_VIDEOBUF2_V4L2=m
-CONFIG_VIDEOBUF2_MEMOPS=m
-CONFIG_VIDEOBUF2_DMA_CONTIG=m
-CONFIG_VIDEOBUF2_VMALLOC=m
-CONFIG_VIDEOBUF2_DMA_SG=m
-CONFIG_VIDEOBUF2_DVB=m
-CONFIG_DVB_B2C2_FLEXCOP=m
-CONFIG_VIDEO_SAA7146=m
-CONFIG_VIDEO_SAA7146_VV=m
-CONFIG_SMS_SIANO_MDTV=m
-CONFIG_SMS_SIANO_RC=y
-# CONFIG_SMS_SIANO_DEBUGFS is not set
-CONFIG_VIDEO_V4L2_TPG=m
-CONFIG_V4L_PLATFORM_DRIVERS=y
-CONFIG_VIDEO_CAFE_CCIC=m
-CONFIG_VIDEO_CADENCE=y
-CONFIG_VIDEO_CADENCE_CSI2RX=m
-CONFIG_VIDEO_CADENCE_CSI2TX=m
-CONFIG_VIDEO_ASPEED=m
-CONFIG_VIDEO_MUX=m
-CONFIG_VIDEO_XILINX=m
-# CONFIG_VIDEO_XILINX_CSI2RXSS is not set
-CONFIG_VIDEO_XILINX_TPG=m
-CONFIG_VIDEO_XILINX_VTC=m
-CONFIG_V4L_MEM2MEM_DRIVERS=y
-CONFIG_VIDEO_MEM2MEM_DEINTERLACE=m
-CONFIG_DVB_PLATFORM_DRIVERS=y
-CONFIG_SDR_PLATFORM_DRIVERS=y
-
-#
-# MMC/SDIO DVB adapters
-#
-CONFIG_SMS_SDIO_DRV=m
-CONFIG_V4L_TEST_DRIVERS=y
-CONFIG_VIDEO_VIMC=m
-CONFIG_VIDEO_VIVID=m
-CONFIG_VIDEO_VIVID_CEC=y
-CONFIG_VIDEO_VIVID_MAX_DEVS=64
-CONFIG_VIDEO_VIM2M=m
-CONFIG_VIDEO_VICODEC=m
-
-#
-# FireWire (IEEE 1394) Adapters
-#
-CONFIG_DVB_FIREDTV=m
-CONFIG_DVB_FIREDTV_INPUT=y
-# end of Media drivers
-
-#
-# Media ancillary drivers
-#
-CONFIG_MEDIA_ATTACH=y
-
-#
-# IR I2C driver auto-selected by 'Autoselect ancillary drivers'
-#
-CONFIG_VIDEO_IR_I2C=m
-
-#
-# Audio decoders, processors and mixers
-#
-CONFIG_VIDEO_TVAUDIO=m
-CONFIG_VIDEO_TDA7432=m
-CONFIG_VIDEO_TDA9840=m
-CONFIG_VIDEO_TDA1997X=m
-CONFIG_VIDEO_TEA6415C=m
-CONFIG_VIDEO_TEA6420=m
-CONFIG_VIDEO_MSP3400=m
-CONFIG_VIDEO_CS3308=m
-CONFIG_VIDEO_CS5345=m
-CONFIG_VIDEO_CS53L32A=m
-CONFIG_VIDEO_TLV320AIC23B=m
-CONFIG_VIDEO_UDA1342=m
-CONFIG_VIDEO_WM8775=m
-CONFIG_VIDEO_WM8739=m
-CONFIG_VIDEO_VP27SMPX=m
-CONFIG_VIDEO_SONY_BTF_MPX=m
-# end of Audio decoders, processors and mixers
-
-#
-# RDS decoders
-#
-CONFIG_VIDEO_SAA6588=m
-# end of RDS decoders
-
-#
-# Video decoders
-#
-CONFIG_VIDEO_ADV7180=m
-CONFIG_VIDEO_ADV7183=m
-CONFIG_VIDEO_ADV748X=m
-CONFIG_VIDEO_ADV7604=m
-CONFIG_VIDEO_ADV7604_CEC=y
-CONFIG_VIDEO_ADV7842=m
-CONFIG_VIDEO_ADV7842_CEC=y
-CONFIG_VIDEO_BT819=m
-CONFIG_VIDEO_BT856=m
-CONFIG_VIDEO_BT866=m
-CONFIG_VIDEO_KS0127=m
-CONFIG_VIDEO_ML86V7667=m
-CONFIG_VIDEO_SAA7110=m
-CONFIG_VIDEO_SAA711X=m
-CONFIG_VIDEO_TC358743=m
-CONFIG_VIDEO_TC358743_CEC=y
-CONFIG_VIDEO_TVP514X=m
-CONFIG_VIDEO_TVP5150=m
-CONFIG_VIDEO_TVP7002=m
-CONFIG_VIDEO_TW2804=m
-CONFIG_VIDEO_TW9903=m
-CONFIG_VIDEO_TW9906=m
-CONFIG_VIDEO_TW9910=m
-CONFIG_VIDEO_VPX3220=m
-# CONFIG_VIDEO_MAX9286 is not set
-
-#
-# Video and audio decoders
-#
-CONFIG_VIDEO_SAA717X=m
-CONFIG_VIDEO_CX25840=m
-# end of Video decoders
-
-#
-# Video encoders
-#
-CONFIG_VIDEO_SAA7127=m
-CONFIG_VIDEO_SAA7185=m
-CONFIG_VIDEO_ADV7170=m
-CONFIG_VIDEO_ADV7175=m
-CONFIG_VIDEO_ADV7343=m
-CONFIG_VIDEO_ADV7393=m
-CONFIG_VIDEO_AD9389B=m
-CONFIG_VIDEO_AK881X=m
-CONFIG_VIDEO_THS8200=m
-# end of Video encoders
-
-#
-# Video improvement chips
-#
-CONFIG_VIDEO_UPD64031A=m
-CONFIG_VIDEO_UPD64083=m
-# end of Video improvement chips
-
-#
-# Audio/Video compression chips
-#
-CONFIG_VIDEO_SAA6752HS=m
-# end of Audio/Video compression chips
-
-#
-# SDR tuner chips
-#
-CONFIG_SDR_MAX2175=m
-# end of SDR tuner chips
-
-#
-# Miscellaneous helper chips
-#
-CONFIG_VIDEO_THS7303=m
-CONFIG_VIDEO_M52790=m
-CONFIG_VIDEO_I2C=m
-CONFIG_VIDEO_ST_MIPID02=m
-# end of Miscellaneous helper chips
-
-#
-# Camera sensor devices
-#
-CONFIG_VIDEO_APTINA_PLL=m
-CONFIG_VIDEO_SMIAPP_PLL=m
-CONFIG_VIDEO_HI556=m
-CONFIG_VIDEO_IMX214=m
-CONFIG_VIDEO_IMX219=m
-CONFIG_VIDEO_IMX258=m
-CONFIG_VIDEO_IMX274=m
-CONFIG_VIDEO_IMX290=m
-CONFIG_VIDEO_IMX319=m
-CONFIG_VIDEO_IMX355=m
-CONFIG_VIDEO_OV2640=m
-CONFIG_VIDEO_OV2659=m
-CONFIG_VIDEO_OV2680=m
-CONFIG_VIDEO_OV2685=m
-CONFIG_VIDEO_OV2740=m
-CONFIG_VIDEO_OV5640=m
-CONFIG_VIDEO_OV5645=m
-CONFIG_VIDEO_OV5647=m
-CONFIG_VIDEO_OV6650=m
-CONFIG_VIDEO_OV5670=m
-CONFIG_VIDEO_OV5675=m
-CONFIG_VIDEO_OV5695=m
-CONFIG_VIDEO_OV7251=m
-CONFIG_VIDEO_OV772X=m
-CONFIG_VIDEO_OV7640=m
-CONFIG_VIDEO_OV7670=m
-CONFIG_VIDEO_OV7740=m
-CONFIG_VIDEO_OV8856=m
-CONFIG_VIDEO_OV9640=m
-CONFIG_VIDEO_OV9650=m
-CONFIG_VIDEO_OV13858=m
-CONFIG_VIDEO_VS6624=m
-CONFIG_VIDEO_MT9M001=m
-CONFIG_VIDEO_MT9M032=m
-CONFIG_VIDEO_MT9M111=m
-CONFIG_VIDEO_MT9P031=m
-CONFIG_VIDEO_MT9T001=m
-CONFIG_VIDEO_MT9T112=m
-CONFIG_VIDEO_MT9V011=m
-CONFIG_VIDEO_MT9V032=m
-CONFIG_VIDEO_MT9V111=m
-CONFIG_VIDEO_SR030PC30=m
-CONFIG_VIDEO_NOON010PC30=m
-CONFIG_VIDEO_M5MOLS=m
-# CONFIG_VIDEO_RDACM20 is not set
-CONFIG_VIDEO_RJ54N1=m
-CONFIG_VIDEO_S5K6AA=m
-CONFIG_VIDEO_S5K6A3=m
-CONFIG_VIDEO_S5K4ECGX=m
-CONFIG_VIDEO_S5K5BAF=m
-CONFIG_VIDEO_SMIAPP=m
-CONFIG_VIDEO_ET8EK8=m
-CONFIG_VIDEO_S5C73M3=m
-# end of Camera sensor devices
-
-#
-# Lens drivers
-#
-CONFIG_VIDEO_AD5820=m
-CONFIG_VIDEO_AK7375=m
-CONFIG_VIDEO_DW9714=m
-CONFIG_VIDEO_DW9768=m
-CONFIG_VIDEO_DW9807_VCM=m
-# end of Lens drivers
-
-#
-# Flash devices
-#
-CONFIG_VIDEO_ADP1653=m
-CONFIG_VIDEO_LM3560=m
-CONFIG_VIDEO_LM3646=m
-# end of Flash devices
-
-#
-# SPI helper chips
-#
-CONFIG_VIDEO_GS1662=m
-# end of SPI helper chips
-
-#
-# Media SPI Adapters
-#
-CONFIG_CXD2880_SPI_DRV=m
-# end of Media SPI Adapters
-
-CONFIG_MEDIA_TUNER=m
-
-#
-# Customize TV tuners
-#
-CONFIG_MEDIA_TUNER_SIMPLE=m
-CONFIG_MEDIA_TUNER_TDA18250=m
-CONFIG_MEDIA_TUNER_TDA8290=m
-CONFIG_MEDIA_TUNER_TDA827X=m
-CONFIG_MEDIA_TUNER_TDA18271=m
-CONFIG_MEDIA_TUNER_TDA9887=m
-CONFIG_MEDIA_TUNER_TEA5761=m
-CONFIG_MEDIA_TUNER_TEA5767=m
-CONFIG_MEDIA_TUNER_MSI001=m
-CONFIG_MEDIA_TUNER_MT20XX=m
-CONFIG_MEDIA_TUNER_MT2060=m
-CONFIG_MEDIA_TUNER_MT2063=m
-CONFIG_MEDIA_TUNER_MT2266=m
-CONFIG_MEDIA_TUNER_MT2131=m
-CONFIG_MEDIA_TUNER_QT1010=m
-CONFIG_MEDIA_TUNER_XC2028=m
-CONFIG_MEDIA_TUNER_XC5000=m
-CONFIG_MEDIA_TUNER_XC4000=m
-CONFIG_MEDIA_TUNER_MXL5005S=m
-CONFIG_MEDIA_TUNER_MXL5007T=m
-CONFIG_MEDIA_TUNER_MC44S803=m
-CONFIG_MEDIA_TUNER_MAX2165=m
-CONFIG_MEDIA_TUNER_TDA18218=m
-CONFIG_MEDIA_TUNER_FC0011=m
-CONFIG_MEDIA_TUNER_FC0012=m
-CONFIG_MEDIA_TUNER_FC0013=m
-CONFIG_MEDIA_TUNER_TDA18212=m
-CONFIG_MEDIA_TUNER_E4000=m
-CONFIG_MEDIA_TUNER_FC2580=m
-CONFIG_MEDIA_TUNER_M88RS6000T=m
-CONFIG_MEDIA_TUNER_TUA9001=m
-CONFIG_MEDIA_TUNER_SI2157=m
-CONFIG_MEDIA_TUNER_IT913X=m
-CONFIG_MEDIA_TUNER_R820T=m
-CONFIG_MEDIA_TUNER_MXL301RF=m
-CONFIG_MEDIA_TUNER_QM1D1C0042=m
-CONFIG_MEDIA_TUNER_QM1D1B0004=m
-# end of Customize TV tuners
-
-#
-# Customise DVB Frontends
-#
-
-#
-# Multistandard (satellite) frontends
-#
-CONFIG_DVB_STB0899=m
-CONFIG_DVB_STB6100=m
-CONFIG_DVB_STV090x=m
-CONFIG_DVB_STV0910=m
-CONFIG_DVB_STV6110x=m
-CONFIG_DVB_STV6111=m
-CONFIG_DVB_MXL5XX=m
-CONFIG_DVB_M88DS3103=m
-
-#
-# Multistandard (cable + terrestrial) frontends
-#
-CONFIG_DVB_DRXK=m
-CONFIG_DVB_TDA18271C2DD=m
-CONFIG_DVB_SI2165=m
-CONFIG_DVB_MN88472=m
-CONFIG_DVB_MN88473=m
-
-#
-# DVB-S (satellite) frontends
-#
-CONFIG_DVB_CX24110=m
-CONFIG_DVB_CX24123=m
-CONFIG_DVB_MT312=m
-CONFIG_DVB_ZL10036=m
-CONFIG_DVB_ZL10039=m
-CONFIG_DVB_S5H1420=m
-CONFIG_DVB_STV0288=m
-CONFIG_DVB_STB6000=m
-CONFIG_DVB_STV0299=m
-CONFIG_DVB_STV6110=m
-CONFIG_DVB_STV0900=m
-CONFIG_DVB_TDA8083=m
-CONFIG_DVB_TDA10086=m
-CONFIG_DVB_TDA8261=m
-CONFIG_DVB_VES1X93=m
-CONFIG_DVB_TUNER_ITD1000=m
-CONFIG_DVB_TUNER_CX24113=m
-CONFIG_DVB_TDA826X=m
-CONFIG_DVB_TUA6100=m
-CONFIG_DVB_CX24116=m
-CONFIG_DVB_CX24117=m
-CONFIG_DVB_CX24120=m
-CONFIG_DVB_SI21XX=m
-CONFIG_DVB_TS2020=m
-CONFIG_DVB_DS3000=m
-CONFIG_DVB_MB86A16=m
-CONFIG_DVB_TDA10071=m
-
-#
-# DVB-T (terrestrial) frontends
-#
-CONFIG_DVB_SP8870=m
-CONFIG_DVB_SP887X=m
-CONFIG_DVB_CX22700=m
-CONFIG_DVB_CX22702=m
-CONFIG_DVB_S5H1432=m
-CONFIG_DVB_DRXD=m
-CONFIG_DVB_L64781=m
-CONFIG_DVB_TDA1004X=m
-CONFIG_DVB_NXT6000=m
-CONFIG_DVB_MT352=m
-CONFIG_DVB_ZL10353=m
-CONFIG_DVB_DIB3000MB=m
-CONFIG_DVB_DIB3000MC=m
-CONFIG_DVB_DIB7000M=m
-CONFIG_DVB_DIB7000P=m
-CONFIG_DVB_DIB9000=m
-CONFIG_DVB_TDA10048=m
-CONFIG_DVB_AF9013=m
-CONFIG_DVB_EC100=m
-CONFIG_DVB_STV0367=m
-CONFIG_DVB_CXD2820R=m
-CONFIG_DVB_CXD2841ER=m
-CONFIG_DVB_RTL2830=m
-CONFIG_DVB_RTL2832=m
-CONFIG_DVB_RTL2832_SDR=m
-CONFIG_DVB_SI2168=m
-CONFIG_DVB_AS102_FE=m
-CONFIG_DVB_ZD1301_DEMOD=m
-CONFIG_DVB_GP8PSK_FE=m
-CONFIG_DVB_CXD2880=m
-
-#
-# DVB-C (cable) frontends
-#
-CONFIG_DVB_VES1820=m
-CONFIG_DVB_TDA10021=m
-CONFIG_DVB_TDA10023=m
-CONFIG_DVB_STV0297=m
-
-#
-# ATSC (North American/Korean Terrestrial/Cable DTV) frontends
-#
-CONFIG_DVB_NXT200X=m
-CONFIG_DVB_OR51211=m
-CONFIG_DVB_OR51132=m
-CONFIG_DVB_BCM3510=m
-CONFIG_DVB_LGDT330X=m
-CONFIG_DVB_LGDT3305=m
-CONFIG_DVB_LGDT3306A=m
-CONFIG_DVB_LG2160=m
-CONFIG_DVB_S5H1409=m
-CONFIG_DVB_AU8522=m
-CONFIG_DVB_AU8522_DTV=m
-CONFIG_DVB_AU8522_V4L=m
-CONFIG_DVB_S5H1411=m
-
-#
-# ISDB-T (terrestrial) frontends
-#
-CONFIG_DVB_S921=m
-CONFIG_DVB_DIB8000=m
-CONFIG_DVB_MB86A20S=m
-
-#
-# ISDB-S (satellite) & ISDB-T (terrestrial) frontends
-#
-CONFIG_DVB_TC90522=m
-CONFIG_DVB_MN88443X=m
-
-#
-# Digital terrestrial only tuners/PLL
-#
-CONFIG_DVB_PLL=m
-CONFIG_DVB_TUNER_DIB0070=m
-CONFIG_DVB_TUNER_DIB0090=m
-
-#
-# SEC control devices for DVB-S
-#
-CONFIG_DVB_DRX39XYJ=m
-CONFIG_DVB_LNBH25=m
-CONFIG_DVB_LNBH29=m
-CONFIG_DVB_LNBP21=m
-CONFIG_DVB_LNBP22=m
-CONFIG_DVB_ISL6405=m
-CONFIG_DVB_ISL6421=m
-CONFIG_DVB_ISL6423=m
-CONFIG_DVB_A8293=m
-CONFIG_DVB_LGS8GL5=m
-CONFIG_DVB_LGS8GXX=m
-CONFIG_DVB_ATBM8830=m
-CONFIG_DVB_TDA665x=m
-CONFIG_DVB_IX2505V=m
-CONFIG_DVB_M88RS2000=m
-CONFIG_DVB_AF9033=m
-CONFIG_DVB_HORUS3A=m
-CONFIG_DVB_ASCOT2E=m
-CONFIG_DVB_HELENE=m
-
-#
-# Common Interface (EN50221) controller drivers
-#
-CONFIG_DVB_CXD2099=m
-CONFIG_DVB_SP2=m
-# end of Customise DVB Frontends
-
-#
-# Tools to develop new frontends
-#
-CONFIG_DVB_DUMMY_FE=m
-# end of Media ancillary drivers
-
-#
-# Graphics support
-#
-CONFIG_AGP=m
-CONFIG_AGP_AMD64=m
-CONFIG_AGP_INTEL=m
-CONFIG_AGP_SIS=m
-CONFIG_AGP_VIA=m
-CONFIG_INTEL_GTT=m
-CONFIG_VGA_ARB=y
-CONFIG_VGA_ARB_MAX_GPUS=10
-CONFIG_VGA_SWITCHEROO=y
-CONFIG_DRM=m
-CONFIG_DRM_MIPI_DBI=m
-CONFIG_DRM_MIPI_DSI=y
-CONFIG_DRM_DP_AUX_CHARDEV=y
-# CONFIG_DRM_DEBUG_SELFTEST is not set
-CONFIG_DRM_KMS_HELPER=m
-CONFIG_DRM_KMS_FB_HELPER=y
-# CONFIG_DRM_DEBUG_DP_MST_TOPOLOGY_REFS is not set
-CONFIG_DRM_FBDEV_EMULATION=y
-CONFIG_DRM_FBDEV_OVERALLOC=100
-# CONFIG_DRM_FBDEV_LEAK_PHYS_SMEM is not set
-CONFIG_DRM_LOAD_EDID_FIRMWARE=y
-CONFIG_DRM_DP_CEC=y
-CONFIG_DRM_TTM=m
-CONFIG_DRM_TTM_DMA_PAGE_POOL=y
-CONFIG_DRM_VRAM_HELPER=m
-CONFIG_DRM_TTM_HELPER=m
-CONFIG_DRM_GEM_CMA_HELPER=y
-CONFIG_DRM_KMS_CMA_HELPER=y
-CONFIG_DRM_GEM_SHMEM_HELPER=y
-CONFIG_DRM_SCHED=m
-
-#
-# I2C encoder or helper chips
-#
-CONFIG_DRM_I2C_CH7006=m
-CONFIG_DRM_I2C_SIL164=m
-CONFIG_DRM_I2C_NXP_TDA998X=m
-CONFIG_DRM_I2C_NXP_TDA9950=m
-# end of I2C encoder or helper chips
-
-#
-# ARM devices
-#
-CONFIG_DRM_KOMEDA=m
-# end of ARM devices
-
-CONFIG_DRM_RADEON=m
-CONFIG_DRM_RADEON_USERPTR=y
-CONFIG_DRM_AMDGPU=m
-CONFIG_DRM_AMDGPU_SI=y
-CONFIG_DRM_AMDGPU_CIK=y
-CONFIG_DRM_AMDGPU_USERPTR=y
-# CONFIG_DRM_AMDGPU_GART_DEBUGFS is not set
-
-#
-# ACP (Audio CoProcessor) Configuration
-#
-CONFIG_DRM_AMD_ACP=y
-# end of ACP (Audio CoProcessor) Configuration
-
-#
-# Display Engine Configuration
-#
-CONFIG_DRM_AMD_DC=y
-CONFIG_DRM_AMD_DC_DCN=y
-CONFIG_DRM_AMD_DC_DCN3_0=y
-CONFIG_DRM_AMD_DC_HDCP=y
-# CONFIG_DEBUG_KERNEL_DC is not set
-# end of Display Engine Configuration
-
-CONFIG_HSA_AMD=y
-CONFIG_DRM_NOUVEAU=m
-# CONFIG_NOUVEAU_LEGACY_CTX_SUPPORT is not set
-CONFIG_NOUVEAU_DEBUG=5
-CONFIG_NOUVEAU_DEBUG_DEFAULT=3
-# CONFIG_NOUVEAU_DEBUG_MMU is not set
-# CONFIG_NOUVEAU_DEBUG_PUSH is not set
-CONFIG_DRM_NOUVEAU_BACKLIGHT=y
-CONFIG_DRM_NOUVEAU_SVM=y
-CONFIG_DRM_I915=m
-CONFIG_DRM_I915_FORCE_PROBE="*"
-CONFIG_DRM_I915_CAPTURE_ERROR=y
-CONFIG_DRM_I915_COMPRESS_ERROR=y
-CONFIG_DRM_I915_USERPTR=y
-CONFIG_DRM_I915_GVT=y
-CONFIG_DRM_I915_GVT_KVMGT=m
-
-#
-# drm/i915 Debugging
-#
-# CONFIG_DRM_I915_WERROR is not set
-# CONFIG_DRM_I915_DEBUG is not set
-# CONFIG_DRM_I915_DEBUG_MMIO is not set
-# CONFIG_DRM_I915_SW_FENCE_DEBUG_OBJECTS is not set
-# CONFIG_DRM_I915_SW_FENCE_CHECK_DAG is not set
-# CONFIG_DRM_I915_DEBUG_GUC is not set
-# CONFIG_DRM_I915_SELFTEST is not set
-# CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS is not set
-# CONFIG_DRM_I915_DEBUG_VBLANK_EVADE is not set
-# CONFIG_DRM_I915_DEBUG_RUNTIME_PM is not set
-# end of drm/i915 Debugging
-
-#
-# drm/i915 Profile Guided Optimisation
-#
-CONFIG_DRM_I915_FENCE_TIMEOUT=10000
-CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND=250
-CONFIG_DRM_I915_HEARTBEAT_INTERVAL=2500
-CONFIG_DRM_I915_PREEMPT_TIMEOUT=640
-CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT=8000
-CONFIG_DRM_I915_STOP_TIMEOUT=100
-CONFIG_DRM_I915_TIMESLICE_DURATION=1
-# end of drm/i915 Profile Guided Optimisation
-
-CONFIG_DRM_VGEM=m
-CONFIG_DRM_VKMS=m
-CONFIG_DRM_VMWGFX=m
-CONFIG_DRM_VMWGFX_FBCON=y
-CONFIG_DRM_GMA500=m
-CONFIG_DRM_GMA600=y
-CONFIG_DRM_GMA3600=y
-CONFIG_DRM_UDL=m
-CONFIG_DRM_AST=m
-CONFIG_DRM_MGAG200=m
-CONFIG_DRM_RCAR_DW_HDMI=m
-CONFIG_DRM_RCAR_LVDS=m
-CONFIG_DRM_QXL=m
-CONFIG_DRM_BOCHS=m
-CONFIG_DRM_VIRTIO_GPU=m
-CONFIG_DRM_PANEL=y
-
-#
-# Display Panels
-#
-CONFIG_DRM_PANEL_ARM_VERSATILE=m
-CONFIG_DRM_PANEL_ASUS_Z00T_TM5P5_NT35596=m
-CONFIG_DRM_PANEL_BOE_HIMAX8279D=m
-CONFIG_DRM_PANEL_BOE_TV101WUM_NL6=m
-CONFIG_DRM_PANEL_LVDS=m
-CONFIG_DRM_PANEL_SIMPLE=m
-CONFIG_DRM_PANEL_ELIDA_KD35T133=m
-CONFIG_DRM_PANEL_FEIXIN_K101_IM2BA02=m
-CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D=m
-CONFIG_DRM_PANEL_ILITEK_IL9322=m
-CONFIG_DRM_PANEL_ILITEK_ILI9881C=m
-CONFIG_DRM_PANEL_INNOLUX_P079ZCA=m
-CONFIG_DRM_PANEL_JDI_LT070ME05000=m
-CONFIG_DRM_PANEL_KINGDISPLAY_KD097D04=m
-CONFIG_DRM_PANEL_LEADTEK_LTK050H3146W=m
-CONFIG_DRM_PANEL_LEADTEK_LTK500HD1829=m
-CONFIG_DRM_PANEL_SAMSUNG_LD9040=m
-CONFIG_DRM_PANEL_LG_LB035Q02=m
-CONFIG_DRM_PANEL_LG_LG4573=m
-CONFIG_DRM_PANEL_NEC_NL8048HL11=m
-CONFIG_DRM_PANEL_NOVATEK_NT35510=m
-CONFIG_DRM_PANEL_NOVATEK_NT39016=m
-CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO=m
-CONFIG_DRM_PANEL_ORISETECH_OTM8009A=m
-CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS=m
-CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00=m
-CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN=m
-CONFIG_DRM_PANEL_RAYDIUM_RM67191=m
-CONFIG_DRM_PANEL_RAYDIUM_RM68200=m
-CONFIG_DRM_PANEL_RONBO_RB070D30=m
-CONFIG_DRM_PANEL_SAMSUNG_S6D16D0=m
-CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2=m
-CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03=m
-CONFIG_DRM_PANEL_SAMSUNG_S6E63M0=m
-CONFIG_DRM_PANEL_SAMSUNG_S6E88A0_AMS452EF01=m
-CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0=m
-CONFIG_DRM_PANEL_SEIKO_43WVF1G=m
-CONFIG_DRM_PANEL_SHARP_LQ101R1SX01=m
-CONFIG_DRM_PANEL_SHARP_LS037V7DW01=m
-CONFIG_DRM_PANEL_SHARP_LS043T1LE01=m
-CONFIG_DRM_PANEL_SITRONIX_ST7701=m
-# CONFIG_DRM_PANEL_SITRONIX_ST7703 is not set
-CONFIG_DRM_PANEL_SITRONIX_ST7789V=m
-CONFIG_DRM_PANEL_SONY_ACX424AKP=m
-CONFIG_DRM_PANEL_SONY_ACX565AKM=m
-CONFIG_DRM_PANEL_TPO_TD028TTEC1=m
-CONFIG_DRM_PANEL_TPO_TD043MTEA1=m
-CONFIG_DRM_PANEL_TPO_TPG110=m
-CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA=m
-CONFIG_DRM_PANEL_VISIONOX_RM69299=m
-CONFIG_DRM_PANEL_XINPENG_XPP055C272=m
-# end of Display Panels
-
-CONFIG_DRM_BRIDGE=y
-CONFIG_DRM_PANEL_BRIDGE=y
-
-#
-# Display Interface Bridges
-#
-CONFIG_DRM_CDNS_DSI=m
-CONFIG_DRM_CHRONTEL_CH7033=m
-CONFIG_DRM_DISPLAY_CONNECTOR=m
-CONFIG_DRM_LVDS_CODEC=m
-CONFIG_DRM_MEGACHIPS_STDPXXXX_GE_B850V3_FW=m
-CONFIG_DRM_NWL_MIPI_DSI=m
-CONFIG_DRM_NXP_PTN3460=m
-CONFIG_DRM_PARADE_PS8622=m
-CONFIG_DRM_PARADE_PS8640=m
-CONFIG_DRM_SIL_SII8620=m
-CONFIG_DRM_SII902X=m
-CONFIG_DRM_SII9234=m
-CONFIG_DRM_SIMPLE_BRIDGE=m
-CONFIG_DRM_THINE_THC63LVD1024=m
-CONFIG_DRM_TOSHIBA_TC358764=m
-CONFIG_DRM_TOSHIBA_TC358767=m
-CONFIG_DRM_TOSHIBA_TC358768=m
-CONFIG_DRM_TI_TFP410=m
-CONFIG_DRM_TI_SN65DSI86=m
-CONFIG_DRM_TI_TPD12S015=m
-CONFIG_DRM_ANALOGIX_ANX6345=m
-CONFIG_DRM_ANALOGIX_ANX78XX=m
-CONFIG_DRM_ANALOGIX_DP=m
-CONFIG_DRM_I2C_ADV7511=m
-CONFIG_DRM_I2C_ADV7511_AUDIO=y
-CONFIG_DRM_I2C_ADV7511_CEC=y
-CONFIG_DRM_DW_HDMI=m
-CONFIG_DRM_DW_HDMI_AHB_AUDIO=m
-CONFIG_DRM_DW_HDMI_I2S_AUDIO=m
-CONFIG_DRM_DW_HDMI_CEC=m
-# end of Display Interface Bridges
-
-# CONFIG_DRM_ETNAVIV is not set
-CONFIG_DRM_ARCPGU=m
-CONFIG_DRM_MXS=y
-CONFIG_DRM_MXSFB=m
-CONFIG_DRM_CIRRUS_QEMU=m
-CONFIG_DRM_GM12U320=m
-CONFIG_TINYDRM_HX8357D=m
-CONFIG_TINYDRM_ILI9225=m
-CONFIG_TINYDRM_ILI9341=m
-CONFIG_TINYDRM_ILI9486=m
-CONFIG_TINYDRM_MI0283QT=m
-CONFIG_TINYDRM_REPAPER=m
-CONFIG_TINYDRM_ST7586=m
-CONFIG_TINYDRM_ST7735R=m
-CONFIG_DRM_XEN=y
-CONFIG_DRM_XEN_FRONTEND=m
-CONFIG_DRM_VBOXVIDEO=m
-# CONFIG_DRM_LEGACY is not set
-CONFIG_DRM_PANEL_ORIENTATION_QUIRKS=y
-
-#
-# Frame buffer Devices
-#
-CONFIG_FB_CMDLINE=y
-CONFIG_FB_NOTIFY=y
-CONFIG_FB=y
-CONFIG_FIRMWARE_EDID=y
-CONFIG_FB_BOOT_VESA_SUPPORT=y
-CONFIG_FB_CFB_FILLRECT=y
-CONFIG_FB_CFB_COPYAREA=y
-CONFIG_FB_CFB_IMAGEBLIT=y
-CONFIG_FB_SYS_FILLRECT=m
-CONFIG_FB_SYS_COPYAREA=m
-CONFIG_FB_SYS_IMAGEBLIT=m
-# CONFIG_FB_FOREIGN_ENDIAN is not set
-CONFIG_FB_SYS_FOPS=m
-CONFIG_FB_DEFERRED_IO=y
-CONFIG_FB_BACKLIGHT=m
-CONFIG_FB_MODE_HELPERS=y
-CONFIG_FB_TILEBLITTING=y
-
-#
-# Frame buffer hardware drivers
-#
-# CONFIG_FB_CIRRUS is not set
-# CONFIG_FB_PM2 is not set
-# CONFIG_FB_CYBER2000 is not set
-# CONFIG_FB_ARC is not set
-# CONFIG_FB_ASILIANT is not set
-# CONFIG_FB_IMSTT is not set
-# CONFIG_FB_VGA16 is not set
-# CONFIG_FB_UVESA is not set
-CONFIG_FB_VESA=y
-CONFIG_FB_EFI=y
-# CONFIG_FB_N411 is not set
-# CONFIG_FB_HGA is not set
-# CONFIG_FB_OPENCORES is not set
-# CONFIG_FB_S1D13XXX is not set
-# CONFIG_FB_NVIDIA is not set
-# CONFIG_FB_RIVA is not set
-# CONFIG_FB_I740 is not set
-# CONFIG_FB_LE80578 is not set
-# CONFIG_FB_INTEL is not set
-# CONFIG_FB_MATROX is not set
-# CONFIG_FB_RADEON is not set
-# CONFIG_FB_ATY128 is not set
-# CONFIG_FB_ATY is not set
-# CONFIG_FB_S3 is not set
-# CONFIG_FB_SAVAGE is not set
-# CONFIG_FB_SIS is not set
-# CONFIG_FB_VIA is not set
-# CONFIG_FB_NEOMAGIC is not set
-# CONFIG_FB_KYRO is not set
-# CONFIG_FB_3DFX is not set
-# CONFIG_FB_VOODOO1 is not set
-# CONFIG_FB_VT8623 is not set
-# CONFIG_FB_TRIDENT is not set
-# CONFIG_FB_ARK is not set
-# CONFIG_FB_PM3 is not set
-# CONFIG_FB_CARMINE is not set
-# CONFIG_FB_SM501 is not set
-# CONFIG_FB_SMSCUFX is not set
-# CONFIG_FB_UDL is not set
-# CONFIG_FB_IBM_GXT4500 is not set
-# CONFIG_FB_VIRTUAL is not set
-CONFIG_XEN_FBDEV_FRONTEND=m
-# CONFIG_FB_METRONOME is not set
-# CONFIG_FB_MB862XX is not set
-CONFIG_FB_HYPERV=m
-CONFIG_FB_SIMPLE=y
-# CONFIG_FB_SSD1307 is not set
-# CONFIG_FB_SM712 is not set
-# end of Frame buffer Devices
-
-#
-# Backlight & LCD device support
-#
-CONFIG_LCD_CLASS_DEVICE=m
-CONFIG_LCD_L4F00242T03=m
-CONFIG_LCD_LMS283GF05=m
-CONFIG_LCD_LTV350QV=m
-CONFIG_LCD_ILI922X=m
-CONFIG_LCD_ILI9320=m
-CONFIG_LCD_TDO24M=m
-CONFIG_LCD_VGG2432A4=m
-CONFIG_LCD_PLATFORM=m
-CONFIG_LCD_AMS369FG06=m
-CONFIG_LCD_LMS501KF03=m
-CONFIG_LCD_HX8357=m
-CONFIG_LCD_OTM3225A=m
-CONFIG_BACKLIGHT_CLASS_DEVICE=y
-CONFIG_BACKLIGHT_LM3533=m
-CONFIG_BACKLIGHT_PWM=m
-CONFIG_BACKLIGHT_DA903X=m
-CONFIG_BACKLIGHT_DA9052=m
-CONFIG_BACKLIGHT_MAX8925=m
-CONFIG_BACKLIGHT_APPLE=m
-CONFIG_BACKLIGHT_QCOM_WLED=m
-CONFIG_BACKLIGHT_SAHARA=m
-CONFIG_BACKLIGHT_WM831X=m
-CONFIG_BACKLIGHT_ADP5520=m
-CONFIG_BACKLIGHT_ADP8860=m
-CONFIG_BACKLIGHT_ADP8870=m
-CONFIG_BACKLIGHT_88PM860X=m
-CONFIG_BACKLIGHT_PCF50633=m
-CONFIG_BACKLIGHT_AAT2870=m
-CONFIG_BACKLIGHT_LM3630A=m
-CONFIG_BACKLIGHT_LM3639=m
-CONFIG_BACKLIGHT_LP855X=m
-CONFIG_BACKLIGHT_LP8788=m
-CONFIG_BACKLIGHT_PANDORA=m
-CONFIG_BACKLIGHT_SKY81452=m
-CONFIG_BACKLIGHT_TPS65217=m
-CONFIG_BACKLIGHT_AS3711=m
-CONFIG_BACKLIGHT_GPIO=m
-CONFIG_BACKLIGHT_LV5207LP=m
-CONFIG_BACKLIGHT_BD6107=m
-CONFIG_BACKLIGHT_ARCXCNN=m
-CONFIG_BACKLIGHT_RAVE_SP=m
-CONFIG_BACKLIGHT_LED=m
-# end of Backlight & LCD device support
-
-CONFIG_VIDEOMODE_HELPERS=y
-CONFIG_HDMI=y
-
-#
-# Console display driver support
-#
-CONFIG_VGA_CONSOLE=y
-CONFIG_DUMMY_CONSOLE=y
-CONFIG_DUMMY_CONSOLE_COLUMNS=80
-CONFIG_DUMMY_CONSOLE_ROWS=25
-CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
-CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y
-CONFIG_FRAMEBUFFER_CONSOLE_DEFERRED_TAKEOVER=y
-# end of Console display driver support
-
-# CONFIG_LOGO is not set
-# end of Graphics support
-
-CONFIG_SOUND=m
-CONFIG_SOUND_OSS_CORE=y
-# CONFIG_SOUND_OSS_CORE_PRECLAIM is not set
-CONFIG_SND=m
-CONFIG_SND_TIMER=m
-CONFIG_SND_PCM=m
-CONFIG_SND_PCM_ELD=y
-CONFIG_SND_PCM_IEC958=y
-CONFIG_SND_DMAENGINE_PCM=m
-CONFIG_SND_HWDEP=m
-CONFIG_SND_SEQ_DEVICE=m
-CONFIG_SND_RAWMIDI=m
-CONFIG_SND_COMPRESS_OFFLOAD=m
-CONFIG_SND_JACK=y
-CONFIG_SND_JACK_INPUT_DEV=y
-CONFIG_SND_OSSEMUL=y
-CONFIG_SND_MIXER_OSS=m
-CONFIG_SND_PCM_OSS=m
-CONFIG_SND_PCM_OSS_PLUGINS=y
-CONFIG_SND_PCM_TIMER=y
-CONFIG_SND_HRTIMER=m
-CONFIG_SND_DYNAMIC_MINORS=y
-CONFIG_SND_MAX_CARDS=32
-# CONFIG_SND_SUPPORT_OLD_API is not set
-CONFIG_SND_PROC_FS=y
-CONFIG_SND_VERBOSE_PROCFS=y
-CONFIG_SND_VERBOSE_PRINTK=y
-CONFIG_SND_DEBUG=y
-# CONFIG_SND_DEBUG_VERBOSE is not set
-# CONFIG_SND_PCM_XRUN_DEBUG is not set
-# CONFIG_SND_CTL_VALIDATION is not set
-CONFIG_SND_VMASTER=y
-CONFIG_SND_DMA_SGBUF=y
-CONFIG_SND_SEQUENCER=m
-CONFIG_SND_SEQ_DUMMY=m
-CONFIG_SND_SEQUENCER_OSS=m
-CONFIG_SND_SEQ_HRTIMER_DEFAULT=y
-CONFIG_SND_SEQ_MIDI_EVENT=m
-CONFIG_SND_SEQ_MIDI=m
-CONFIG_SND_SEQ_MIDI_EMUL=m
-CONFIG_SND_SEQ_VIRMIDI=m
-CONFIG_SND_MPU401_UART=m
-CONFIG_SND_OPL3_LIB=m
-CONFIG_SND_OPL3_LIB_SEQ=m
-CONFIG_SND_VX_LIB=m
-CONFIG_SND_AC97_CODEC=m
-CONFIG_SND_DRIVERS=y
-# CONFIG_SND_PCSP is not set
-CONFIG_SND_DUMMY=m
-CONFIG_SND_ALOOP=m
-CONFIG_SND_VIRMIDI=m
-CONFIG_SND_MTPAV=m
-CONFIG_SND_MTS64=m
-CONFIG_SND_SERIAL_U16550=m
-CONFIG_SND_MPU401=m
-CONFIG_SND_PORTMAN2X4=m
-CONFIG_SND_AC97_POWER_SAVE=y
-CONFIG_SND_AC97_POWER_SAVE_DEFAULT=0
-CONFIG_SND_SB_COMMON=m
-CONFIG_SND_PCI=y
-CONFIG_SND_AD1889=m
-CONFIG_SND_ALS300=m
-CONFIG_SND_ALS4000=m
-CONFIG_SND_ALI5451=m
-CONFIG_SND_ASIHPI=m
-CONFIG_SND_ATIIXP=m
-CONFIG_SND_ATIIXP_MODEM=m
-CONFIG_SND_AU8810=m
-CONFIG_SND_AU8820=m
-CONFIG_SND_AU8830=m
-CONFIG_SND_AW2=m
-CONFIG_SND_AZT3328=m
-CONFIG_SND_BT87X=m
-# CONFIG_SND_BT87X_OVERCLOCK is not set
-CONFIG_SND_CA0106=m
-CONFIG_SND_CMIPCI=m
-CONFIG_SND_OXYGEN_LIB=m
-CONFIG_SND_OXYGEN=m
-CONFIG_SND_CS4281=m
-CONFIG_SND_CS46XX=m
-CONFIG_SND_CS46XX_NEW_DSP=y
-CONFIG_SND_CTXFI=m
-CONFIG_SND_DARLA20=m
-CONFIG_SND_GINA20=m
-CONFIG_SND_LAYLA20=m
-CONFIG_SND_DARLA24=m
-CONFIG_SND_GINA24=m
-CONFIG_SND_LAYLA24=m
-CONFIG_SND_MONA=m
-CONFIG_SND_MIA=m
-CONFIG_SND_ECHO3G=m
-CONFIG_SND_INDIGO=m
-CONFIG_SND_INDIGOIO=m
-CONFIG_SND_INDIGODJ=m
-CONFIG_SND_INDIGOIOX=m
-CONFIG_SND_INDIGODJX=m
-CONFIG_SND_EMU10K1=m
-CONFIG_SND_EMU10K1_SEQ=m
-CONFIG_SND_EMU10K1X=m
-CONFIG_SND_ENS1370=m
-CONFIG_SND_ENS1371=m
-CONFIG_SND_ES1938=m
-CONFIG_SND_ES1968=m
-CONFIG_SND_ES1968_INPUT=y
-CONFIG_SND_ES1968_RADIO=y
-CONFIG_SND_FM801=m
-CONFIG_SND_FM801_TEA575X_BOOL=y
-CONFIG_SND_HDSP=m
-CONFIG_SND_HDSPM=m
-CONFIG_SND_ICE1712=m
-CONFIG_SND_ICE1724=m
-CONFIG_SND_INTEL8X0=m
-CONFIG_SND_INTEL8X0M=m
-CONFIG_SND_KORG1212=m
-CONFIG_SND_LOLA=m
-CONFIG_SND_LX6464ES=m
-CONFIG_SND_MAESTRO3=m
-CONFIG_SND_MAESTRO3_INPUT=y
-CONFIG_SND_MIXART=m
-CONFIG_SND_NM256=m
-CONFIG_SND_PCXHR=m
-CONFIG_SND_RIPTIDE=m
-CONFIG_SND_RME32=m
-CONFIG_SND_RME96=m
-CONFIG_SND_RME9652=m
-CONFIG_SND_SONICVIBES=m
-CONFIG_SND_TRIDENT=m
-CONFIG_SND_VIA82XX=m
-CONFIG_SND_VIA82XX_MODEM=m
-CONFIG_SND_VIRTUOSO=m
-CONFIG_SND_VX222=m
-CONFIG_SND_YMFPCI=m
-
-#
-# HD-Audio
-#
-CONFIG_SND_HDA=m
-CONFIG_SND_HDA_GENERIC_LEDS=y
-CONFIG_SND_HDA_INTEL=m
-CONFIG_SND_HDA_HWDEP=y
-CONFIG_SND_HDA_RECONFIG=y
-CONFIG_SND_HDA_INPUT_BEEP=y
-CONFIG_SND_HDA_INPUT_BEEP_MODE=1
-CONFIG_SND_HDA_PATCH_LOADER=y
-CONFIG_SND_HDA_CODEC_REALTEK=m
-CONFIG_SND_HDA_CODEC_ANALOG=m
-CONFIG_SND_HDA_CODEC_SIGMATEL=m
-CONFIG_SND_HDA_CODEC_VIA=m
-CONFIG_SND_HDA_CODEC_HDMI=m
-CONFIG_SND_HDA_CODEC_CIRRUS=m
-CONFIG_SND_HDA_CODEC_CONEXANT=m
-CONFIG_SND_HDA_CODEC_CA0110=m
-CONFIG_SND_HDA_CODEC_CA0132=m
-CONFIG_SND_HDA_CODEC_CA0132_DSP=y
-CONFIG_SND_HDA_CODEC_CMEDIA=m
-CONFIG_SND_HDA_CODEC_SI3054=m
-CONFIG_SND_HDA_GENERIC=m
-CONFIG_SND_HDA_POWER_SAVE_DEFAULT=0
-# CONFIG_SND_HDA_INTEL_HDMI_SILENT_STREAM is not set
-# end of HD-Audio
-
-CONFIG_SND_HDA_CORE=m
-CONFIG_SND_HDA_DSP_LOADER=y
-CONFIG_SND_HDA_COMPONENT=y
-CONFIG_SND_HDA_I915=y
-CONFIG_SND_HDA_EXT_CORE=m
-CONFIG_SND_HDA_PREALLOC_SIZE=0
-CONFIG_SND_INTEL_NHLT=y
-CONFIG_SND_INTEL_DSP_CONFIG=m
-CONFIG_SND_SPI=y
-CONFIG_SND_USB=y
-CONFIG_SND_USB_AUDIO=m
-CONFIG_SND_USB_AUDIO_USE_MEDIA_CONTROLLER=y
-CONFIG_SND_USB_UA101=m
-CONFIG_SND_USB_USX2Y=m
-CONFIG_SND_USB_CAIAQ=m
-CONFIG_SND_USB_CAIAQ_INPUT=y
-CONFIG_SND_USB_US122L=m
-CONFIG_SND_USB_6FIRE=m
-CONFIG_SND_USB_HIFACE=m
-CONFIG_SND_BCD2000=m
-CONFIG_SND_USB_LINE6=m
-CONFIG_SND_USB_POD=m
-CONFIG_SND_USB_PODHD=m
-CONFIG_SND_USB_TONEPORT=m
-CONFIG_SND_USB_VARIAX=m
-CONFIG_SND_FIREWIRE=y
-CONFIG_SND_FIREWIRE_LIB=m
-CONFIG_SND_DICE=m
-CONFIG_SND_OXFW=m
-CONFIG_SND_ISIGHT=m
-CONFIG_SND_FIREWORKS=m
-CONFIG_SND_BEBOB=m
-CONFIG_SND_FIREWIRE_DIGI00X=m
-CONFIG_SND_FIREWIRE_TASCAM=m
-CONFIG_SND_FIREWIRE_MOTU=m
-CONFIG_SND_FIREFACE=m
-CONFIG_SND_PCMCIA=y
-CONFIG_SND_VXPOCKET=m
-CONFIG_SND_PDAUDIOCF=m
-CONFIG_SND_SOC=m
-CONFIG_SND_SOC_AC97_BUS=y
-CONFIG_SND_SOC_GENERIC_DMAENGINE_PCM=y
-CONFIG_SND_SOC_COMPRESS=y
-CONFIG_SND_SOC_TOPOLOGY=y
-CONFIG_SND_SOC_ACPI=m
-CONFIG_SND_SOC_AMD_ACP=m
-CONFIG_SND_SOC_AMD_CZ_DA7219MX98357_MACH=m
-CONFIG_SND_SOC_AMD_CZ_RT5645_MACH=m
-CONFIG_SND_SOC_AMD_ACP3x=m
-CONFIG_SND_SOC_AMD_RV_RT5682_MACH=m
-CONFIG_SND_SOC_AMD_RENOIR=m
-CONFIG_SND_SOC_AMD_RENOIR_MACH=m
-CONFIG_SND_ATMEL_SOC=m
-CONFIG_SND_SOC_MIKROE_PROTO=m
-CONFIG_SND_BCM63XX_I2S_WHISTLER=m
-CONFIG_SND_DESIGNWARE_I2S=m
-CONFIG_SND_DESIGNWARE_PCM=y
-
-#
-# SoC Audio for Freescale CPUs
-#
-
-#
-# Common SoC Audio options for Freescale CPUs:
-#
-# CONFIG_SND_SOC_FSL_ASRC is not set
-# CONFIG_SND_SOC_FSL_SAI is not set
-# CONFIG_SND_SOC_FSL_AUDMIX is not set
-# CONFIG_SND_SOC_FSL_SSI is not set
-# CONFIG_SND_SOC_FSL_SPDIF is not set
-# CONFIG_SND_SOC_FSL_ESAI is not set
-# CONFIG_SND_SOC_FSL_MICFIL is not set
-# CONFIG_SND_SOC_IMX_AUDMUX is not set
-# end of SoC Audio for Freescale CPUs
-
-CONFIG_SND_I2S_HI6210_I2S=m
-CONFIG_SND_SOC_IMG=y
-CONFIG_SND_SOC_IMG_I2S_IN=m
-CONFIG_SND_SOC_IMG_I2S_OUT=m
-CONFIG_SND_SOC_IMG_PARALLEL_OUT=m
-CONFIG_SND_SOC_IMG_SPDIF_IN=m
-CONFIG_SND_SOC_IMG_SPDIF_OUT=m
-CONFIG_SND_SOC_IMG_PISTACHIO_INTERNAL_DAC=m
-CONFIG_SND_SOC_INTEL_SST_TOPLEVEL=y
-CONFIG_SND_SST_IPC=m
-CONFIG_SND_SST_IPC_PCI=m
-CONFIG_SND_SST_IPC_ACPI=m
-CONFIG_SND_SOC_INTEL_SST_ACPI=m
-CONFIG_SND_SOC_INTEL_SST=m
-CONFIG_SND_SOC_INTEL_SST_FIRMWARE=m
-CONFIG_SND_SOC_INTEL_HASWELL=m
-CONFIG_SND_SST_ATOM_HIFI2_PLATFORM=m
-CONFIG_SND_SST_ATOM_HIFI2_PLATFORM_PCI=m
-CONFIG_SND_SST_ATOM_HIFI2_PLATFORM_ACPI=m
-CONFIG_SND_SOC_INTEL_SKYLAKE=m
-CONFIG_SND_SOC_INTEL_SKL=m
-CONFIG_SND_SOC_INTEL_APL=m
-CONFIG_SND_SOC_INTEL_KBL=m
-CONFIG_SND_SOC_INTEL_GLK=m
-CONFIG_SND_SOC_INTEL_CNL=m
-CONFIG_SND_SOC_INTEL_CFL=m
-CONFIG_SND_SOC_INTEL_CML_H=m
-CONFIG_SND_SOC_INTEL_CML_LP=m
-CONFIG_SND_SOC_INTEL_SKYLAKE_FAMILY=m
-CONFIG_SND_SOC_INTEL_SKYLAKE_SSP_CLK=m
-# CONFIG_SND_SOC_INTEL_SKYLAKE_HDAUDIO_CODEC is not set
-CONFIG_SND_SOC_INTEL_SKYLAKE_COMMON=m
-CONFIG_SND_SOC_ACPI_INTEL_MATCH=m
-CONFIG_SND_SOC_INTEL_MACH=y
-# CONFIG_SND_SOC_INTEL_USER_FRIENDLY_LONG_NAMES is not set
-CONFIG_SND_SOC_INTEL_HASWELL_MACH=m
-CONFIG_SND_SOC_INTEL_BDW_RT5650_MACH=m
-CONFIG_SND_SOC_INTEL_BDW_RT5677_MACH=m
-CONFIG_SND_SOC_INTEL_BROADWELL_MACH=m
-CONFIG_SND_SOC_INTEL_BYTCR_RT5640_MACH=m
-CONFIG_SND_SOC_INTEL_BYTCR_RT5651_MACH=m
-CONFIG_SND_SOC_INTEL_CHT_BSW_RT5672_MACH=m
-CONFIG_SND_SOC_INTEL_CHT_BSW_RT5645_MACH=m
-CONFIG_SND_SOC_INTEL_CHT_BSW_MAX98090_TI_MACH=m
-CONFIG_SND_SOC_INTEL_CHT_BSW_NAU8824_MACH=m
-CONFIG_SND_SOC_INTEL_BYT_CHT_CX2072X_MACH=m
-CONFIG_SND_SOC_INTEL_BYT_CHT_DA7213_MACH=m
-CONFIG_SND_SOC_INTEL_BYT_CHT_ES8316_MACH=m
-# CONFIG_SND_SOC_INTEL_BYT_CHT_NOCODEC_MACH is not set
-CONFIG_SND_SOC_INTEL_SKL_RT286_MACH=m
-CONFIG_SND_SOC_INTEL_SKL_NAU88L25_SSM4567_MACH=m
-CONFIG_SND_SOC_INTEL_SKL_NAU88L25_MAX98357A_MACH=m
-CONFIG_SND_SOC_INTEL_DA7219_MAX98357A_GENERIC=m
-CONFIG_SND_SOC_INTEL_BXT_DA7219_MAX98357A_COMMON=m
-CONFIG_SND_SOC_INTEL_BXT_DA7219_MAX98357A_MACH=m
-CONFIG_SND_SOC_INTEL_BXT_RT298_MACH=m
-CONFIG_SND_SOC_INTEL_SOF_WM8804_MACH=m
-CONFIG_SND_SOC_INTEL_KBL_RT5663_MAX98927_MACH=m
-CONFIG_SND_SOC_INTEL_KBL_RT5663_RT5514_MAX98927_MACH=m
-CONFIG_SND_SOC_INTEL_KBL_DA7219_MAX98357A_MACH=m
-CONFIG_SND_SOC_INTEL_KBL_DA7219_MAX98927_MACH=m
-CONFIG_SND_SOC_INTEL_KBL_RT5660_MACH=m
-CONFIG_SND_SOC_INTEL_GLK_DA7219_MAX98357A_MACH=m
-CONFIG_SND_SOC_INTEL_GLK_RT5682_MAX98357A_MACH=m
-CONFIG_SND_SOC_INTEL_SKL_HDA_DSP_GENERIC_MACH=m
-CONFIG_SND_SOC_INTEL_SOF_RT5682_MACH=m
-CONFIG_SND_SOC_INTEL_SOF_PCM512x_MACH=m
-CONFIG_SND_SOC_INTEL_CML_LP_DA7219_MAX98357A_MACH=m
-CONFIG_SND_SOC_INTEL_SOF_CML_RT1011_RT5682_MACH=m
-CONFIG_SND_SOC_INTEL_SOF_DA7219_MAX98373_MACH=m
-CONFIG_SND_SOC_INTEL_EHL_RT5660_MACH=m
-CONFIG_SND_SOC_MTK_BTCVSD=m
-CONFIG_SND_SOC_SOF_TOPLEVEL=y
-CONFIG_SND_SOC_SOF_PCI=m
-CONFIG_SND_SOC_SOF_ACPI=m
-CONFIG_SND_SOC_SOF_OF=m
-# CONFIG_SND_SOC_SOF_DEBUG_PROBES is not set
-# CONFIG_SND_SOC_SOF_DEVELOPER_SUPPORT is not set
-CONFIG_SND_SOC_SOF=m
-CONFIG_SND_SOC_SOF_PROBE_WORK_QUEUE=y
-CONFIG_SND_SOC_SOF_INTEL_TOPLEVEL=y
-CONFIG_SND_SOC_SOF_INTEL_ACPI=m
-CONFIG_SND_SOC_SOF_INTEL_PCI=m
-CONFIG_SND_SOC_SOF_INTEL_HIFI_EP_IPC=m
-CONFIG_SND_SOC_SOF_INTEL_ATOM_HIFI_EP=m
-CONFIG_SND_SOC_SOF_INTEL_COMMON=m
-CONFIG_SND_SOC_SOF_MERRIFIELD_SUPPORT=y
-CONFIG_SND_SOC_SOF_MERRIFIELD=m
-CONFIG_SND_SOC_SOF_APOLLOLAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_APOLLOLAKE=m
-CONFIG_SND_SOC_SOF_GEMINILAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_GEMINILAKE=m
-CONFIG_SND_SOC_SOF_CANNONLAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_CANNONLAKE=m
-CONFIG_SND_SOC_SOF_COFFEELAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_COFFEELAKE=m
-CONFIG_SND_SOC_SOF_ICELAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_ICELAKE=m
-CONFIG_SND_SOC_SOF_COMETLAKE=m
-CONFIG_SND_SOC_SOF_COMETLAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_COMETLAKE_LP_SUPPORT=y
-CONFIG_SND_SOC_SOF_TIGERLAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_TIGERLAKE=m
-CONFIG_SND_SOC_SOF_ELKHARTLAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_ELKHARTLAKE=m
-CONFIG_SND_SOC_SOF_JASPERLAKE_SUPPORT=y
-CONFIG_SND_SOC_SOF_JASPERLAKE=m
-CONFIG_SND_SOC_SOF_HDA_COMMON=m
-CONFIG_SND_SOC_SOF_HDA_LINK=y
-CONFIG_SND_SOC_SOF_HDA_AUDIO_CODEC=y
-# CONFIG_SND_SOC_SOF_HDA_ALWAYS_ENABLE_DMI_L1 is not set
-CONFIG_SND_SOC_SOF_HDA_LINK_BASELINE=m
-CONFIG_SND_SOC_SOF_HDA=m
-CONFIG_SND_SOC_SOF_XTENSA=m
-
-#
-# STMicroelectronics STM32 SOC audio support
-#
-# end of STMicroelectronics STM32 SOC audio support
-
-CONFIG_SND_SOC_XILINX_I2S=m
-CONFIG_SND_SOC_XILINX_AUDIO_FORMATTER=m
-CONFIG_SND_SOC_XILINX_SPDIF=m
-CONFIG_SND_SOC_XTFPGA_I2S=m
-CONFIG_ZX_TDM=m
-CONFIG_SND_SOC_I2C_AND_SPI=m
-
-#
-# CODEC drivers
-#
-CONFIG_SND_SOC_AC97_CODEC=m
-CONFIG_SND_SOC_ADAU_UTILS=m
-CONFIG_SND_SOC_ADAU1701=m
-CONFIG_SND_SOC_ADAU17X1=m
-CONFIG_SND_SOC_ADAU1761=m
-CONFIG_SND_SOC_ADAU1761_I2C=m
-CONFIG_SND_SOC_ADAU1761_SPI=m
-CONFIG_SND_SOC_ADAU7002=m
-CONFIG_SND_SOC_ADAU7118=m
-CONFIG_SND_SOC_ADAU7118_HW=m
-CONFIG_SND_SOC_ADAU7118_I2C=m
-CONFIG_SND_SOC_AK4104=m
-CONFIG_SND_SOC_AK4118=m
-CONFIG_SND_SOC_AK4458=m
-CONFIG_SND_SOC_AK4554=m
-CONFIG_SND_SOC_AK4613=m
-CONFIG_SND_SOC_AK4642=m
-CONFIG_SND_SOC_AK5386=m
-CONFIG_SND_SOC_AK5558=m
-CONFIG_SND_SOC_ALC5623=m
-CONFIG_SND_SOC_BD28623=m
-# CONFIG_SND_SOC_BT_SCO is not set
-CONFIG_SND_SOC_CPCAP=m
-CONFIG_SND_SOC_CROS_EC_CODEC=m
-CONFIG_SND_SOC_CS35L32=m
-CONFIG_SND_SOC_CS35L33=m
-CONFIG_SND_SOC_CS35L34=m
-CONFIG_SND_SOC_CS35L35=m
-CONFIG_SND_SOC_CS35L36=m
-CONFIG_SND_SOC_CS42L42=m
-CONFIG_SND_SOC_CS42L51=m
-CONFIG_SND_SOC_CS42L51_I2C=m
-CONFIG_SND_SOC_CS42L52=m
-CONFIG_SND_SOC_CS42L56=m
-CONFIG_SND_SOC_CS42L73=m
-CONFIG_SND_SOC_CS4265=m
-CONFIG_SND_SOC_CS4270=m
-CONFIG_SND_SOC_CS4271=m
-CONFIG_SND_SOC_CS4271_I2C=m
-CONFIG_SND_SOC_CS4271_SPI=m
-CONFIG_SND_SOC_CS42XX8=m
-CONFIG_SND_SOC_CS42XX8_I2C=m
-CONFIG_SND_SOC_CS43130=m
-CONFIG_SND_SOC_CS4341=m
-CONFIG_SND_SOC_CS4349=m
-CONFIG_SND_SOC_CS53L30=m
-CONFIG_SND_SOC_CX2072X=m
-CONFIG_SND_SOC_DA7213=m
-CONFIG_SND_SOC_DA7219=m
-CONFIG_SND_SOC_DMIC=m
-CONFIG_SND_SOC_HDMI_CODEC=m
-CONFIG_SND_SOC_ES7134=m
-CONFIG_SND_SOC_ES7241=m
-CONFIG_SND_SOC_ES8316=m
-CONFIG_SND_SOC_ES8328=m
-CONFIG_SND_SOC_ES8328_I2C=m
-CONFIG_SND_SOC_ES8328_SPI=m
-CONFIG_SND_SOC_GTM601=m
-CONFIG_SND_SOC_HDAC_HDMI=m
-CONFIG_SND_SOC_HDAC_HDA=m
-CONFIG_SND_SOC_INNO_RK3036=m
-CONFIG_SND_SOC_LOCHNAGAR_SC=m
-CONFIG_SND_SOC_MAX98088=m
-CONFIG_SND_SOC_MAX98090=m
-CONFIG_SND_SOC_MAX98357A=m
-CONFIG_SND_SOC_MAX98504=m
-CONFIG_SND_SOC_MAX9867=m
-CONFIG_SND_SOC_MAX98927=m
-CONFIG_SND_SOC_MAX98373=m
-CONFIG_SND_SOC_MAX98373_I2C=m
-# CONFIG_SND_SOC_MAX98373_SDW is not set
-CONFIG_SND_SOC_MAX98390=m
-CONFIG_SND_SOC_MAX9860=m
-CONFIG_SND_SOC_MSM8916_WCD_ANALOG=m
-CONFIG_SND_SOC_MSM8916_WCD_DIGITAL=m
-CONFIG_SND_SOC_PCM1681=m
-CONFIG_SND_SOC_PCM1789=m
-CONFIG_SND_SOC_PCM1789_I2C=m
-CONFIG_SND_SOC_PCM179X=m
-CONFIG_SND_SOC_PCM179X_I2C=m
-CONFIG_SND_SOC_PCM179X_SPI=m
-CONFIG_SND_SOC_PCM186X=m
-CONFIG_SND_SOC_PCM186X_I2C=m
-CONFIG_SND_SOC_PCM186X_SPI=m
-CONFIG_SND_SOC_PCM3060=m
-CONFIG_SND_SOC_PCM3060_I2C=m
-CONFIG_SND_SOC_PCM3060_SPI=m
-CONFIG_SND_SOC_PCM3168A=m
-CONFIG_SND_SOC_PCM3168A_I2C=m
-CONFIG_SND_SOC_PCM3168A_SPI=m
-CONFIG_SND_SOC_PCM512x=m
-CONFIG_SND_SOC_PCM512x_I2C=m
-CONFIG_SND_SOC_PCM512x_SPI=m
-CONFIG_SND_SOC_RK3328=m
-CONFIG_SND_SOC_RL6231=m
-CONFIG_SND_SOC_RL6347A=m
-CONFIG_SND_SOC_RT286=m
-CONFIG_SND_SOC_RT298=m
-CONFIG_SND_SOC_RT1011=m
-CONFIG_SND_SOC_RT1015=m
-CONFIG_SND_SOC_RT1308_SDW=m
-CONFIG_SND_SOC_RT5514=m
-CONFIG_SND_SOC_RT5514_SPI=m
-CONFIG_SND_SOC_RT5616=m
-CONFIG_SND_SOC_RT5631=m
-CONFIG_SND_SOC_RT5640=m
-CONFIG_SND_SOC_RT5645=m
-CONFIG_SND_SOC_RT5651=m
-CONFIG_SND_SOC_RT5660=m
-CONFIG_SND_SOC_RT5663=m
-CONFIG_SND_SOC_RT5670=m
-CONFIG_SND_SOC_RT5677=m
-CONFIG_SND_SOC_RT5677_SPI=m
-CONFIG_SND_SOC_RT5682=m
-CONFIG_SND_SOC_RT5682_I2C=m
-CONFIG_SND_SOC_RT5682_SDW=m
-CONFIG_SND_SOC_RT700=m
-CONFIG_SND_SOC_RT700_SDW=m
-CONFIG_SND_SOC_RT711=m
-CONFIG_SND_SOC_RT711_SDW=m
-CONFIG_SND_SOC_RT715=m
-CONFIG_SND_SOC_RT715_SDW=m
-CONFIG_SND_SOC_SGTL5000=m
-CONFIG_SND_SOC_SI476X=m
-CONFIG_SND_SOC_SIGMADSP=m
-CONFIG_SND_SOC_SIGMADSP_I2C=m
-CONFIG_SND_SOC_SIGMADSP_REGMAP=m
-CONFIG_SND_SOC_SIMPLE_AMPLIFIER=m
-CONFIG_SND_SOC_SIRF_AUDIO_CODEC=m
-CONFIG_SND_SOC_SPDIF=m
-CONFIG_SND_SOC_SSM2305=m
-CONFIG_SND_SOC_SSM2602=m
-CONFIG_SND_SOC_SSM2602_SPI=m
-CONFIG_SND_SOC_SSM2602_I2C=m
-CONFIG_SND_SOC_SSM4567=m
-CONFIG_SND_SOC_STA32X=m
-CONFIG_SND_SOC_STA350=m
-CONFIG_SND_SOC_STI_SAS=m
-CONFIG_SND_SOC_TAS2552=m
-CONFIG_SND_SOC_TAS2562=m
-CONFIG_SND_SOC_TAS2770=m
-CONFIG_SND_SOC_TAS5086=m
-CONFIG_SND_SOC_TAS571X=m
-CONFIG_SND_SOC_TAS5720=m
-CONFIG_SND_SOC_TAS6424=m
-CONFIG_SND_SOC_TDA7419=m
-CONFIG_SND_SOC_TFA9879=m
-CONFIG_SND_SOC_TLV320AIC23=m
-CONFIG_SND_SOC_TLV320AIC23_I2C=m
-CONFIG_SND_SOC_TLV320AIC23_SPI=m
-CONFIG_SND_SOC_TLV320AIC31XX=m
-CONFIG_SND_SOC_TLV320AIC32X4=m
-CONFIG_SND_SOC_TLV320AIC32X4_I2C=m
-CONFIG_SND_SOC_TLV320AIC32X4_SPI=m
-CONFIG_SND_SOC_TLV320AIC3X=m
-CONFIG_SND_SOC_TLV320ADCX140=m
-CONFIG_SND_SOC_TS3A227E=m
-CONFIG_SND_SOC_TSCS42XX=m
-CONFIG_SND_SOC_TSCS454=m
-CONFIG_SND_SOC_UDA1334=m
-CONFIG_SND_SOC_WCD9335=m
-CONFIG_SND_SOC_WCD934X=m
-CONFIG_SND_SOC_WM8510=m
-CONFIG_SND_SOC_WM8523=m
-CONFIG_SND_SOC_WM8524=m
-CONFIG_SND_SOC_WM8580=m
-CONFIG_SND_SOC_WM8711=m
-CONFIG_SND_SOC_WM8728=m
-CONFIG_SND_SOC_WM8731=m
-CONFIG_SND_SOC_WM8737=m
-CONFIG_SND_SOC_WM8741=m
-CONFIG_SND_SOC_WM8750=m
-CONFIG_SND_SOC_WM8753=m
-CONFIG_SND_SOC_WM8770=m
-CONFIG_SND_SOC_WM8776=m
-CONFIG_SND_SOC_WM8782=m
-CONFIG_SND_SOC_WM8804=m
-CONFIG_SND_SOC_WM8804_I2C=m
-CONFIG_SND_SOC_WM8804_SPI=m
-CONFIG_SND_SOC_WM8903=m
-CONFIG_SND_SOC_WM8904=m
-CONFIG_SND_SOC_WM8960=m
-CONFIG_SND_SOC_WM8962=m
-CONFIG_SND_SOC_WM8974=m
-CONFIG_SND_SOC_WM8978=m
-CONFIG_SND_SOC_WM8985=m
-CONFIG_SND_SOC_WSA881X=m
-CONFIG_SND_SOC_ZL38060=m
-CONFIG_SND_SOC_ZX_AUD96P22=m
-CONFIG_SND_SOC_MAX9759=m
-CONFIG_SND_SOC_MT6351=m
-CONFIG_SND_SOC_MT6358=m
-CONFIG_SND_SOC_MT6660=m
-CONFIG_SND_SOC_NAU8540=m
-CONFIG_SND_SOC_NAU8810=m
-CONFIG_SND_SOC_NAU8822=m
-CONFIG_SND_SOC_NAU8824=m
-CONFIG_SND_SOC_NAU8825=m
-CONFIG_SND_SOC_TPA6130A2=m
-# end of CODEC drivers
-
-CONFIG_SND_SIMPLE_CARD_UTILS=m
-CONFIG_SND_SIMPLE_CARD=m
-CONFIG_SND_AUDIO_GRAPH_CARD=m
-CONFIG_SND_X86=y
-CONFIG_HDMI_LPE_AUDIO=m
-CONFIG_SND_SYNTH_EMUX=m
-CONFIG_SND_XEN_FRONTEND=m
-CONFIG_AC97_BUS=m
-
-#
-# HID support
-#
-CONFIG_HID=m
-CONFIG_HID_BATTERY_STRENGTH=y
-CONFIG_HIDRAW=y
-CONFIG_UHID=m
-CONFIG_HID_GENERIC=m
-
-#
-# Special HID drivers
-#
-CONFIG_HID_A4TECH=m
-CONFIG_HID_ACCUTOUCH=m
-CONFIG_HID_ACRUX=m
-CONFIG_HID_ACRUX_FF=y
-CONFIG_HID_APPLE=m
-CONFIG_HID_APPLEIR=m
-CONFIG_HID_ASUS=m
-CONFIG_HID_AUREAL=m
-CONFIG_HID_BELKIN=m
-CONFIG_HID_BETOP_FF=m
-CONFIG_HID_BIGBEN_FF=m
-CONFIG_HID_CHERRY=m
-CONFIG_HID_CHICONY=m
-CONFIG_HID_CORSAIR=m
-CONFIG_HID_COUGAR=m
-CONFIG_HID_MACALLY=m
-CONFIG_HID_PRODIKEYS=m
-CONFIG_HID_CMEDIA=m
-CONFIG_HID_CP2112=m
-CONFIG_HID_CREATIVE_SB0540=m
-CONFIG_HID_CYPRESS=m
-CONFIG_HID_DRAGONRISE=m
-CONFIG_DRAGONRISE_FF=y
-CONFIG_HID_EMS_FF=m
-CONFIG_HID_ELAN=m
-CONFIG_HID_ELECOM=m
-CONFIG_HID_ELO=m
-CONFIG_HID_EZKEY=m
-CONFIG_HID_GEMBIRD=m
-CONFIG_HID_GFRM=m
-CONFIG_HID_GLORIOUS=m
-CONFIG_HID_HOLTEK=m
-CONFIG_HOLTEK_FF=y
-CONFIG_HID_GOOGLE_HAMMER=m
-CONFIG_HID_GT683R=m
-CONFIG_HID_KEYTOUCH=m
-CONFIG_HID_KYE=m
-CONFIG_HID_UCLOGIC=m
-CONFIG_HID_WALTOP=m
-CONFIG_HID_VIEWSONIC=m
-CONFIG_HID_GYRATION=m
-CONFIG_HID_ICADE=m
-CONFIG_HID_ITE=m
-CONFIG_HID_JABRA=m
-CONFIG_HID_TWINHAN=m
-CONFIG_HID_KENSINGTON=m
-CONFIG_HID_LCPOWER=m
-CONFIG_HID_LED=m
-CONFIG_HID_LENOVO=m
-CONFIG_HID_LOGITECH=m
-CONFIG_HID_LOGITECH_DJ=m
-CONFIG_HID_LOGITECH_HIDPP=m
-CONFIG_LOGITECH_FF=y
-CONFIG_LOGIRUMBLEPAD2_FF=y
-CONFIG_LOGIG940_FF=y
-CONFIG_LOGIWHEELS_FF=y
-CONFIG_HID_MAGICMOUSE=m
-CONFIG_HID_MALTRON=m
-CONFIG_HID_MAYFLASH=m
-CONFIG_HID_REDRAGON=m
-CONFIG_HID_MICROSOFT=m
-CONFIG_HID_MONTEREY=m
-CONFIG_HID_MULTITOUCH=m
-CONFIG_HID_NTI=m
-CONFIG_HID_NTRIG=m
-CONFIG_HID_ORTEK=m
-CONFIG_HID_PANTHERLORD=m
-CONFIG_PANTHERLORD_FF=y
-CONFIG_HID_PENMOUNT=m
-CONFIG_HID_PETALYNX=m
-CONFIG_HID_PICOLCD=m
-CONFIG_HID_PICOLCD_FB=y
-CONFIG_HID_PICOLCD_BACKLIGHT=y
-CONFIG_HID_PICOLCD_LCD=y
-CONFIG_HID_PICOLCD_LEDS=y
-CONFIG_HID_PICOLCD_CIR=y
-CONFIG_HID_PLANTRONICS=m
-CONFIG_HID_PRIMAX=m
-CONFIG_HID_RETRODE=m
-CONFIG_HID_ROCCAT=m
-CONFIG_HID_SAITEK=m
-CONFIG_HID_SAMSUNG=m
-CONFIG_HID_SONY=m
-CONFIG_SONY_FF=y
-CONFIG_HID_SPEEDLINK=m
-CONFIG_HID_STEAM=m
-CONFIG_HID_STEELSERIES=m
-CONFIG_HID_SUNPLUS=m
-CONFIG_HID_RMI=m
-CONFIG_HID_GREENASIA=m
-CONFIG_GREENASIA_FF=y
-CONFIG_HID_HYPERV_MOUSE=m
-CONFIG_HID_SMARTJOYPLUS=m
-CONFIG_SMARTJOYPLUS_FF=y
-CONFIG_HID_TIVO=m
-CONFIG_HID_TOPSEED=m
-CONFIG_HID_THINGM=m
-CONFIG_HID_THRUSTMASTER=m
-CONFIG_THRUSTMASTER_FF=y
-CONFIG_HID_UDRAW_PS3=m
-CONFIG_HID_U2FZERO=m
-CONFIG_HID_WACOM=m
-CONFIG_HID_WIIMOTE=m
-CONFIG_HID_XINMO=m
-CONFIG_HID_ZEROPLUS=m
-CONFIG_ZEROPLUS_FF=y
-CONFIG_HID_ZYDACRON=m
-CONFIG_HID_SENSOR_HUB=m
-# CONFIG_HID_SENSOR_CUSTOM_SENSOR is not set
-CONFIG_HID_ALPS=m
-CONFIG_HID_MCP2221=m
-# end of Special HID drivers
-
-#
-# USB HID support
-#
-CONFIG_USB_HID=m
-CONFIG_HID_PID=y
-CONFIG_USB_HIDDEV=y
-
-#
-# USB HID Boot Protocol drivers
-#
-# CONFIG_USB_KBD is not set
-# CONFIG_USB_MOUSE is not set
-# end of USB HID Boot Protocol drivers
-# end of USB HID support
-
-#
-# I2C HID support
-#
-CONFIG_I2C_HID=m
-# end of I2C HID support
-
-#
-# Intel ISH HID support
-#
-CONFIG_INTEL_ISH_HID=m
-CONFIG_INTEL_ISH_FIRMWARE_DOWNLOADER=m
-# end of Intel ISH HID support
-# end of HID support
-
-CONFIG_USB_OHCI_LITTLE_ENDIAN=y
-CONFIG_USB_SUPPORT=y
-CONFIG_USB_COMMON=y
-CONFIG_USB_LED_TRIG=y
-CONFIG_USB_ULPI_BUS=m
-CONFIG_USB_CONN_GPIO=m
-CONFIG_USB_ARCH_HAS_HCD=y
-CONFIG_USB=y
-CONFIG_USB_PCI=y
-CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
-
-#
-# Miscellaneous USB options
-#
-CONFIG_USB_DEFAULT_PERSIST=y
-CONFIG_USB_DYNAMIC_MINORS=y
-# CONFIG_USB_OTG is not set
-# CONFIG_USB_OTG_PRODUCTLIST is not set
-# CONFIG_USB_OTG_DISABLE_EXTERNAL_HUB is not set
-CONFIG_USB_LEDS_TRIGGER_USBPORT=m
-CONFIG_USB_AUTOSUSPEND_DELAY=2
-CONFIG_USB_MON=m
-
-#
-# USB Host Controller Drivers
-#
-CONFIG_USB_C67X00_HCD=m
-CONFIG_USB_XHCI_HCD=m
-# CONFIG_USB_XHCI_DBGCAP is not set
-CONFIG_USB_XHCI_PCI=m
-CONFIG_USB_XHCI_PCI_RENESAS=m
-CONFIG_USB_XHCI_PLATFORM=m
-CONFIG_USB_EHCI_HCD=m
-CONFIG_USB_EHCI_ROOT_HUB_TT=y
-CONFIG_USB_EHCI_TT_NEWSCHED=y
-CONFIG_USB_EHCI_PCI=m
-CONFIG_USB_EHCI_FSL=m
-CONFIG_USB_EHCI_HCD_PLATFORM=m
-CONFIG_USB_OXU210HP_HCD=m
-CONFIG_USB_ISP116X_HCD=m
-CONFIG_USB_FOTG210_HCD=m
-CONFIG_USB_MAX3421_HCD=m
-CONFIG_USB_OHCI_HCD=m
-CONFIG_USB_OHCI_HCD_PCI=m
-# CONFIG_USB_OHCI_HCD_SSB is not set
-CONFIG_USB_OHCI_HCD_PLATFORM=m
-CONFIG_USB_UHCI_HCD=m
-CONFIG_USB_U132_HCD=m
-CONFIG_USB_SL811_HCD=m
-# CONFIG_USB_SL811_HCD_ISO is not set
-CONFIG_USB_SL811_CS=m
-CONFIG_USB_R8A66597_HCD=m
-CONFIG_USB_HCD_BCMA=m
-CONFIG_USB_HCD_SSB=m
-# CONFIG_USB_HCD_TEST_MODE is not set
-
-#
-# USB Device Class drivers
-#
-CONFIG_USB_ACM=m
-CONFIG_USB_PRINTER=m
-CONFIG_USB_WDM=m
-CONFIG_USB_TMC=m
-
-#
-# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may
-#
-
-#
-# also be needed; see USB_STORAGE Help for more info
-#
-CONFIG_USB_STORAGE=m
-# CONFIG_USB_STORAGE_DEBUG is not set
-CONFIG_USB_STORAGE_REALTEK=m
-CONFIG_REALTEK_AUTOPM=y
-CONFIG_USB_STORAGE_DATAFAB=m
-CONFIG_USB_STORAGE_FREECOM=m
-CONFIG_USB_STORAGE_ISD200=m
-CONFIG_USB_STORAGE_USBAT=m
-CONFIG_USB_STORAGE_SDDR09=m
-CONFIG_USB_STORAGE_SDDR55=m
-CONFIG_USB_STORAGE_JUMPSHOT=m
-CONFIG_USB_STORAGE_ALAUDA=m
-CONFIG_USB_STORAGE_ONETOUCH=m
-CONFIG_USB_STORAGE_KARMA=m
-CONFIG_USB_STORAGE_CYPRESS_ATACB=m
-CONFIG_USB_STORAGE_ENE_UB6250=m
-CONFIG_USB_UAS=m
-
-#
-# USB Imaging devices
-#
-CONFIG_USB_MDC800=m
-CONFIG_USB_MICROTEK=m
-CONFIG_USBIP_CORE=m
-CONFIG_USBIP_VHCI_HCD=m
-CONFIG_USBIP_VHCI_HC_PORTS=8
-CONFIG_USBIP_VHCI_NR_HCS=1
-CONFIG_USBIP_HOST=m
-CONFIG_USBIP_VUDC=m
-# CONFIG_USBIP_DEBUG is not set
-CONFIG_USB_CDNS3=m
-CONFIG_USB_CDNS3_GADGET=y
-CONFIG_USB_CDNS3_HOST=y
-CONFIG_USB_CDNS3_PCI_WRAP=m
-CONFIG_USB_MUSB_HDRC=m
-# CONFIG_USB_MUSB_HOST is not set
-# CONFIG_USB_MUSB_GADGET is not set
-CONFIG_USB_MUSB_DUAL_ROLE=y
-
-#
-# Platform Glue Layer
-#
-
-#
-# MUSB DMA mode
-#
-# CONFIG_MUSB_PIO_ONLY is not set
-CONFIG_USB_DWC3=m
-CONFIG_USB_DWC3_ULPI=y
-# CONFIG_USB_DWC3_HOST is not set
-# CONFIG_USB_DWC3_GADGET is not set
-CONFIG_USB_DWC3_DUAL_ROLE=y
-
-#
-# Platform Glue Driver Support
-#
-CONFIG_USB_DWC3_PCI=m
-CONFIG_USB_DWC3_HAPS=m
-CONFIG_USB_DWC3_OF_SIMPLE=m
-CONFIG_USB_DWC2=m
-# CONFIG_USB_DWC2_HOST is not set
-
-#
-# Gadget/Dual-role mode requires USB Gadget support to be enabled
-#
-# CONFIG_USB_DWC2_PERIPHERAL is not set
-CONFIG_USB_DWC2_DUAL_ROLE=y
-CONFIG_USB_DWC2_PCI=m
-# CONFIG_USB_DWC2_DEBUG is not set
-# CONFIG_USB_DWC2_TRACK_MISSED_SOFS is not set
-CONFIG_USB_CHIPIDEA=m
-CONFIG_USB_CHIPIDEA_UDC=y
-CONFIG_USB_CHIPIDEA_HOST=y
-CONFIG_USB_CHIPIDEA_PCI=m
-CONFIG_USB_CHIPIDEA_MSM=m
-CONFIG_USB_CHIPIDEA_IMX=m
-CONFIG_USB_CHIPIDEA_GENERIC=m
-CONFIG_USB_CHIPIDEA_TEGRA=m
-CONFIG_USB_ISP1760=m
-CONFIG_USB_ISP1760_HCD=y
-CONFIG_USB_ISP1761_UDC=y
-# CONFIG_USB_ISP1760_HOST_ROLE is not set
-# CONFIG_USB_ISP1760_GADGET_ROLE is not set
-CONFIG_USB_ISP1760_DUAL_ROLE=y
-
-#
-# USB port drivers
-#
-CONFIG_USB_USS720=m
-CONFIG_USB_SERIAL=y
-CONFIG_USB_SERIAL_CONSOLE=y
-CONFIG_USB_SERIAL_GENERIC=y
-CONFIG_USB_SERIAL_SIMPLE=m
-CONFIG_USB_SERIAL_AIRCABLE=m
-CONFIG_USB_SERIAL_ARK3116=m
-CONFIG_USB_SERIAL_BELKIN=m
-CONFIG_USB_SERIAL_CH341=m
-CONFIG_USB_SERIAL_WHITEHEAT=m
-CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m
-CONFIG_USB_SERIAL_CP210X=m
-CONFIG_USB_SERIAL_CYPRESS_M8=m
-CONFIG_USB_SERIAL_EMPEG=m
-CONFIG_USB_SERIAL_FTDI_SIO=m
-CONFIG_USB_SERIAL_VISOR=m
-CONFIG_USB_SERIAL_IPAQ=m
-CONFIG_USB_SERIAL_IR=m
-CONFIG_USB_SERIAL_EDGEPORT=m
-CONFIG_USB_SERIAL_EDGEPORT_TI=m
-CONFIG_USB_SERIAL_F81232=m
-CONFIG_USB_SERIAL_F8153X=m
-CONFIG_USB_SERIAL_GARMIN=m
-CONFIG_USB_SERIAL_IPW=m
-CONFIG_USB_SERIAL_IUU=m
-CONFIG_USB_SERIAL_KEYSPAN_PDA=m
-CONFIG_USB_SERIAL_KEYSPAN=m
-CONFIG_USB_SERIAL_KLSI=m
-CONFIG_USB_SERIAL_KOBIL_SCT=m
-CONFIG_USB_SERIAL_MCT_U232=m
-CONFIG_USB_SERIAL_METRO=m
-CONFIG_USB_SERIAL_MOS7720=m
-CONFIG_USB_SERIAL_MOS7715_PARPORT=y
-CONFIG_USB_SERIAL_MOS7840=m
-CONFIG_USB_SERIAL_MXUPORT=m
-CONFIG_USB_SERIAL_NAVMAN=m
-CONFIG_USB_SERIAL_PL2303=m
-CONFIG_USB_SERIAL_OTI6858=m
-CONFIG_USB_SERIAL_QCAUX=m
-CONFIG_USB_SERIAL_QUALCOMM=m
-CONFIG_USB_SERIAL_SPCP8X5=m
-CONFIG_USB_SERIAL_SAFE=m
-# CONFIG_USB_SERIAL_SAFE_PADDED is not set
-CONFIG_USB_SERIAL_SIERRAWIRELESS=m
-CONFIG_USB_SERIAL_SYMBOL=m
-CONFIG_USB_SERIAL_TI=m
-CONFIG_USB_SERIAL_CYBERJACK=m
-CONFIG_USB_SERIAL_XIRCOM=m
-CONFIG_USB_SERIAL_WWAN=m
-CONFIG_USB_SERIAL_OPTION=m
-CONFIG_USB_SERIAL_OMNINET=m
-CONFIG_USB_SERIAL_OPTICON=m
-CONFIG_USB_SERIAL_XSENS_MT=m
-CONFIG_USB_SERIAL_WISHBONE=m
-CONFIG_USB_SERIAL_SSU100=m
-CONFIG_USB_SERIAL_QT2=m
-CONFIG_USB_SERIAL_UPD78F0730=m
-CONFIG_USB_SERIAL_DEBUG=m
-
-#
-# USB Miscellaneous drivers
-#
-CONFIG_USB_EMI62=m
-CONFIG_USB_EMI26=m
-CONFIG_USB_ADUTUX=m
-CONFIG_USB_SEVSEG=m
-CONFIG_USB_LEGOTOWER=m
-CONFIG_USB_LCD=m
-CONFIG_USB_CYPRESS_CY7C63=m
-CONFIG_USB_CYTHERM=m
-CONFIG_USB_IDMOUSE=m
-CONFIG_USB_FTDI_ELAN=m
-CONFIG_USB_APPLEDISPLAY=m
-CONFIG_APPLE_MFI_FASTCHARGE=m
-CONFIG_USB_SISUSBVGA=m
-CONFIG_USB_SISUSBVGA_CON=y
-CONFIG_USB_LD=m
-CONFIG_USB_TRANCEVIBRATOR=m
-CONFIG_USB_IOWARRIOR=m
-CONFIG_USB_TEST=m
-CONFIG_USB_EHSET_TEST_FIXTURE=m
-CONFIG_USB_ISIGHTFW=m
-CONFIG_USB_YUREX=m
-CONFIG_USB_EZUSB_FX2=m
-CONFIG_USB_HUB_USB251XB=m
-CONFIG_USB_HSIC_USB3503=m
-CONFIG_USB_HSIC_USB4604=m
-CONFIG_USB_LINK_LAYER_TEST=m
-CONFIG_USB_CHAOSKEY=m
-CONFIG_USB_ATM=m
-CONFIG_USB_SPEEDTOUCH=m
-CONFIG_USB_CXACRU=m
-CONFIG_USB_UEAGLEATM=m
-CONFIG_USB_XUSBATM=m
-
-#
-# USB Physical Layer drivers
-#
-CONFIG_USB_PHY=y
-CONFIG_NOP_USB_XCEIV=m
-CONFIG_USB_GPIO_VBUS=m
-CONFIG_TAHVO_USB=m
-# CONFIG_TAHVO_USB_HOST_BY_DEFAULT is not set
-CONFIG_USB_ISP1301=m
-# end of USB Physical Layer drivers
-
-CONFIG_USB_GADGET=m
-# CONFIG_USB_GADGET_DEBUG is not set
-# CONFIG_USB_GADGET_DEBUG_FILES is not set
-# CONFIG_USB_GADGET_DEBUG_FS is not set
-CONFIG_USB_GADGET_VBUS_DRAW=2
-CONFIG_USB_GADGET_STORAGE_NUM_BUFFERS=2
-CONFIG_U_SERIAL_CONSOLE=y
-
-#
-# USB Peripheral Controller
-#
-CONFIG_USB_FOTG210_UDC=m
-CONFIG_USB_GR_UDC=m
-CONFIG_USB_R8A66597=m
-CONFIG_USB_PXA27X=m
-CONFIG_USB_MV_UDC=m
-CONFIG_USB_MV_U3D=m
-CONFIG_USB_SNP_CORE=m
-CONFIG_USB_SNP_UDC_PLAT=m
-CONFIG_USB_M66592=m
-CONFIG_USB_BDC_UDC=m
-
-#
-# Platform Support
-#
-CONFIG_USB_BDC_PCI=m
-CONFIG_USB_AMD5536UDC=m
-CONFIG_USB_NET2272=m
-CONFIG_USB_NET2272_DMA=y
-CONFIG_USB_NET2280=m
-CONFIG_USB_GOKU=m
-CONFIG_USB_EG20T=m
-CONFIG_USB_GADGET_XILINX=m
-CONFIG_USB_MAX3420_UDC=m
-CONFIG_USB_DUMMY_HCD=m
-# end of USB Peripheral Controller
-
-CONFIG_USB_LIBCOMPOSITE=m
-CONFIG_USB_F_ACM=m
-CONFIG_USB_F_SS_LB=m
-CONFIG_USB_U_SERIAL=m
-CONFIG_USB_U_ETHER=m
-CONFIG_USB_U_AUDIO=m
-CONFIG_USB_F_SERIAL=m
-CONFIG_USB_F_OBEX=m
-CONFIG_USB_F_NCM=m
-CONFIG_USB_F_ECM=m
-CONFIG_USB_F_PHONET=m
-CONFIG_USB_F_EEM=m
-CONFIG_USB_F_SUBSET=m
-CONFIG_USB_F_RNDIS=m
-CONFIG_USB_F_MASS_STORAGE=m
-CONFIG_USB_F_FS=m
-CONFIG_USB_F_UAC1=m
-CONFIG_USB_F_UAC1_LEGACY=m
-CONFIG_USB_F_UAC2=m
-CONFIG_USB_F_UVC=m
-CONFIG_USB_F_MIDI=m
-CONFIG_USB_F_HID=m
-CONFIG_USB_F_PRINTER=m
-CONFIG_USB_F_TCM=m
-CONFIG_USB_CONFIGFS=m
-CONFIG_USB_CONFIGFS_SERIAL=y
-CONFIG_USB_CONFIGFS_ACM=y
-CONFIG_USB_CONFIGFS_OBEX=y
-CONFIG_USB_CONFIGFS_NCM=y
-CONFIG_USB_CONFIGFS_ECM=y
-CONFIG_USB_CONFIGFS_ECM_SUBSET=y
-CONFIG_USB_CONFIGFS_RNDIS=y
-CONFIG_USB_CONFIGFS_EEM=y
-CONFIG_USB_CONFIGFS_PHONET=y
-CONFIG_USB_CONFIGFS_MASS_STORAGE=y
-CONFIG_USB_CONFIGFS_F_LB_SS=y
-CONFIG_USB_CONFIGFS_F_FS=y
-CONFIG_USB_CONFIGFS_F_UAC1=y
-CONFIG_USB_CONFIGFS_F_UAC1_LEGACY=y
-CONFIG_USB_CONFIGFS_F_UAC2=y
-CONFIG_USB_CONFIGFS_F_MIDI=y
-CONFIG_USB_CONFIGFS_F_HID=y
-CONFIG_USB_CONFIGFS_F_UVC=y
-CONFIG_USB_CONFIGFS_F_PRINTER=y
-CONFIG_USB_CONFIGFS_F_TCM=y
-
-#
-# USB Gadget precomposed configurations
-#
-CONFIG_USB_ZERO=m
-CONFIG_USB_AUDIO=m
-# CONFIG_GADGET_UAC1 is not set
-CONFIG_USB_ETH=m
-CONFIG_USB_ETH_RNDIS=y
-CONFIG_USB_ETH_EEM=y
-CONFIG_USB_G_NCM=m
-CONFIG_USB_GADGETFS=m
-CONFIG_USB_FUNCTIONFS=m
-CONFIG_USB_FUNCTIONFS_ETH=y
-CONFIG_USB_FUNCTIONFS_RNDIS=y
-CONFIG_USB_FUNCTIONFS_GENERIC=y
-CONFIG_USB_MASS_STORAGE=m
-CONFIG_USB_GADGET_TARGET=m
-CONFIG_USB_G_SERIAL=m
-CONFIG_USB_MIDI_GADGET=m
-CONFIG_USB_G_PRINTER=m
-CONFIG_USB_CDC_COMPOSITE=m
-CONFIG_USB_G_NOKIA=m
-CONFIG_USB_G_ACM_MS=m
-CONFIG_USB_G_MULTI=m
-CONFIG_USB_G_MULTI_RNDIS=y
-CONFIG_USB_G_MULTI_CDC=y
-CONFIG_USB_G_HID=m
-CONFIG_USB_G_DBGP=m
-# CONFIG_USB_G_DBGP_PRINTK is not set
-CONFIG_USB_G_DBGP_SERIAL=y
-CONFIG_USB_G_WEBCAM=m
-CONFIG_USB_RAW_GADGET=m
-# end of USB Gadget precomposed configurations
-
-CONFIG_TYPEC=m
-CONFIG_TYPEC_TCPM=m
-CONFIG_TYPEC_TCPCI=m
-CONFIG_TYPEC_RT1711H=m
-CONFIG_TYPEC_FUSB302=m
-CONFIG_TYPEC_WCOVE=m
-CONFIG_TYPEC_UCSI=m
-CONFIG_UCSI_CCG=m
-CONFIG_UCSI_ACPI=m
-CONFIG_TYPEC_HD3SS3220=m
-CONFIG_TYPEC_TPS6598X=m
-
-#
-# USB Type-C Multiplexer/DeMultiplexer Switch support
-#
-CONFIG_TYPEC_MUX_PI3USB30532=m
-CONFIG_TYPEC_MUX_INTEL_PMC=m
-# end of USB Type-C Multiplexer/DeMultiplexer Switch support
-
-#
-# USB Type-C Alternate Mode drivers
-#
-CONFIG_TYPEC_DP_ALTMODE=m
-CONFIG_TYPEC_NVIDIA_ALTMODE=m
-# end of USB Type-C Alternate Mode drivers
-
-CONFIG_USB_ROLE_SWITCH=m
-CONFIG_USB_ROLES_INTEL_XHCI=m
-CONFIG_MMC=m
-CONFIG_PWRSEQ_EMMC=m
-CONFIG_PWRSEQ_SD8787=m
-CONFIG_PWRSEQ_SIMPLE=m
-CONFIG_MMC_BLOCK=m
-CONFIG_MMC_BLOCK_MINORS=8
-CONFIG_SDIO_UART=m
-CONFIG_MMC_TEST=m
-
-#
-# MMC/SD/SDIO Host Controller Drivers
-#
-# CONFIG_MMC_DEBUG is not set
-CONFIG_MMC_SDHCI=m
-CONFIG_MMC_SDHCI_IO_ACCESSORS=y
-CONFIG_MMC_SDHCI_PCI=m
-CONFIG_MMC_RICOH_MMC=y
-CONFIG_MMC_SDHCI_ACPI=m
-CONFIG_MMC_SDHCI_PLTFM=m
-CONFIG_MMC_SDHCI_OF_ARASAN=m
-CONFIG_MMC_SDHCI_OF_ASPEED=m
-CONFIG_MMC_SDHCI_OF_AT91=m
-CONFIG_MMC_SDHCI_OF_DWCMSHC=m
-CONFIG_MMC_SDHCI_CADENCE=m
-CONFIG_MMC_SDHCI_F_SDH30=m
-CONFIG_MMC_SDHCI_MILBEAUT=m
-CONFIG_MMC_WBSD=m
-CONFIG_MMC_ALCOR=m
-CONFIG_MMC_TIFM_SD=m
-CONFIG_MMC_SPI=m
-CONFIG_MMC_SDRICOH_CS=m
-CONFIG_MMC_CB710=m
-CONFIG_MMC_VIA_SDMMC=m
-CONFIG_MMC_VUB300=m
-CONFIG_MMC_USHC=m
-CONFIG_MMC_USDHI6ROL0=m
-CONFIG_MMC_REALTEK_PCI=m
-CONFIG_MMC_REALTEK_USB=m
-CONFIG_MMC_CQHCI=m
-CONFIG_MMC_HSQ=m
-CONFIG_MMC_TOSHIBA_PCI=m
-CONFIG_MMC_MTK=m
-CONFIG_MMC_SDHCI_XENON=m
-CONFIG_MMC_SDHCI_OMAP=m
-CONFIG_MMC_SDHCI_AM654=m
-CONFIG_MMC_SDHCI_EXTERNAL_DMA=y
-CONFIG_MEMSTICK=m
-# CONFIG_MEMSTICK_DEBUG is not set
-
-#
-# MemoryStick drivers
-#
-# CONFIG_MEMSTICK_UNSAFE_RESUME is not set
-CONFIG_MSPRO_BLOCK=m
-CONFIG_MS_BLOCK=m
-
-#
-# MemoryStick Host Controller Drivers
-#
-CONFIG_MEMSTICK_TIFM_MS=m
-CONFIG_MEMSTICK_JMICRON_38X=m
-CONFIG_MEMSTICK_R592=m
-CONFIG_MEMSTICK_REALTEK_PCI=m
-CONFIG_MEMSTICK_REALTEK_USB=m
-CONFIG_NEW_LEDS=y
-CONFIG_LEDS_CLASS=y
-CONFIG_LEDS_CLASS_FLASH=m
-CONFIG_LEDS_CLASS_MULTICOLOR=m
-CONFIG_LEDS_BRIGHTNESS_HW_CHANGED=y
-
-#
-# LED drivers
-#
-CONFIG_LEDS_88PM860X=m
-CONFIG_LEDS_AAT1290=m
-CONFIG_LEDS_AN30259A=m
-CONFIG_LEDS_APU=m
-CONFIG_LEDS_AS3645A=m
-CONFIG_LEDS_AW2013=m
-CONFIG_LEDS_BCM6328=m
-CONFIG_LEDS_BCM6358=m
-CONFIG_LEDS_CPCAP=m
-CONFIG_LEDS_CR0014114=m
-CONFIG_LEDS_EL15203000=m
-CONFIG_LEDS_LM3530=m
-CONFIG_LEDS_LM3532=m
-CONFIG_LEDS_LM3533=m
-CONFIG_LEDS_LM3642=m
-CONFIG_LEDS_LM3692X=m
-CONFIG_LEDS_LM3601X=m
-CONFIG_LEDS_MT6323=m
-CONFIG_LEDS_PCA9532=m
-CONFIG_LEDS_PCA9532_GPIO=y
-CONFIG_LEDS_GPIO=m
-CONFIG_LEDS_LP3944=m
-CONFIG_LEDS_LP3952=m
-# CONFIG_LEDS_LP55XX_COMMON is not set
-CONFIG_LEDS_LP8788=m
-CONFIG_LEDS_LP8860=m
-CONFIG_LEDS_CLEVO_MAIL=m
-CONFIG_LEDS_PCA955X=m
-CONFIG_LEDS_PCA955X_GPIO=y
-CONFIG_LEDS_PCA963X=m
-CONFIG_LEDS_WM831X_STATUS=m
-CONFIG_LEDS_WM8350=m
-CONFIG_LEDS_DA903X=m
-CONFIG_LEDS_DA9052=m
-CONFIG_LEDS_DAC124S085=m
-CONFIG_LEDS_PWM=m
-CONFIG_LEDS_REGULATOR=m
-CONFIG_LEDS_BD2802=m
-CONFIG_LEDS_INTEL_SS4200=m
-CONFIG_LEDS_LT3593=m
-CONFIG_LEDS_ADP5520=m
-CONFIG_LEDS_MC13783=m
-CONFIG_LEDS_TCA6507=m
-CONFIG_LEDS_TLC591XX=m
-CONFIG_LEDS_MAX77650=m
-CONFIG_LEDS_MAX77693=m
-CONFIG_LEDS_MAX8997=m
-CONFIG_LEDS_LM355x=m
-CONFIG_LEDS_MENF21BMC=m
-CONFIG_LEDS_KTD2692=m
-CONFIG_LEDS_IS31FL319X=m
-CONFIG_LEDS_IS31FL32XX=m
-
-#
-# LED driver for blink(1) USB RGB LED is under Special HID drivers (HID_THINGM)
-#
-CONFIG_LEDS_BLINKM=m
-CONFIG_LEDS_SYSCON=y
-CONFIG_LEDS_MLXCPLD=m
-CONFIG_LEDS_MLXREG=m
-CONFIG_LEDS_USER=m
-CONFIG_LEDS_NIC78BX=m
-CONFIG_LEDS_SPI_BYTE=m
-CONFIG_LEDS_TI_LMU_COMMON=m
-CONFIG_LEDS_LM3697=m
-CONFIG_LEDS_LM36274=m
-CONFIG_LEDS_TPS6105X=m
-CONFIG_LEDS_SGM3140=m
-
-#
-# LED Triggers
-#
-CONFIG_LEDS_TRIGGERS=y
-CONFIG_LEDS_TRIGGER_TIMER=m
-CONFIG_LEDS_TRIGGER_ONESHOT=m
-CONFIG_LEDS_TRIGGER_DISK=y
-CONFIG_LEDS_TRIGGER_MTD=y
-CONFIG_LEDS_TRIGGER_HEARTBEAT=m
-CONFIG_LEDS_TRIGGER_BACKLIGHT=m
-CONFIG_LEDS_TRIGGER_CPU=y
-CONFIG_LEDS_TRIGGER_ACTIVITY=m
-CONFIG_LEDS_TRIGGER_GPIO=m
-CONFIG_LEDS_TRIGGER_DEFAULT_ON=m
-
-#
-# iptables trigger is under Netfilter config (LED target)
-#
-CONFIG_LEDS_TRIGGER_TRANSIENT=m
-CONFIG_LEDS_TRIGGER_CAMERA=m
-CONFIG_LEDS_TRIGGER_PANIC=y
-CONFIG_LEDS_TRIGGER_NETDEV=m
-CONFIG_LEDS_TRIGGER_PATTERN=m
-CONFIG_LEDS_TRIGGER_AUDIO=m
-CONFIG_ACCESSIBILITY=y
-CONFIG_A11Y_BRAILLE_CONSOLE=y
-
-#
-# Speakup console speech
-#
-CONFIG_SPEAKUP=m
-CONFIG_SPEAKUP_SYNTH_ACNTSA=m
-CONFIG_SPEAKUP_SYNTH_APOLLO=m
-CONFIG_SPEAKUP_SYNTH_AUDPTR=m
-CONFIG_SPEAKUP_SYNTH_BNS=m
-CONFIG_SPEAKUP_SYNTH_DECTLK=m
-CONFIG_SPEAKUP_SYNTH_DECEXT=m
-CONFIG_SPEAKUP_SYNTH_LTLK=m
-CONFIG_SPEAKUP_SYNTH_SOFT=m
-CONFIG_SPEAKUP_SYNTH_SPKOUT=m
-CONFIG_SPEAKUP_SYNTH_TXPRT=m
-CONFIG_SPEAKUP_SYNTH_DUMMY=m
-# end of Speakup console speech
-
-CONFIG_INFINIBAND=m
-CONFIG_INFINIBAND_USER_MAD=m
-CONFIG_INFINIBAND_USER_ACCESS=m
-CONFIG_INFINIBAND_USER_MEM=y
-CONFIG_INFINIBAND_ON_DEMAND_PAGING=y
-CONFIG_INFINIBAND_ADDR_TRANS=y
-CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS=y
-CONFIG_INFINIBAND_MTHCA=m
-CONFIG_INFINIBAND_MTHCA_DEBUG=y
-CONFIG_INFINIBAND_QIB=m
-CONFIG_INFINIBAND_QIB_DCA=y
-CONFIG_INFINIBAND_CXGB4=m
-CONFIG_INFINIBAND_EFA=m
-CONFIG_INFINIBAND_I40IW=m
-CONFIG_MLX4_INFINIBAND=m
-CONFIG_MLX5_INFINIBAND=m
-CONFIG_INFINIBAND_OCRDMA=m
-CONFIG_INFINIBAND_VMWARE_PVRDMA=m
-CONFIG_INFINIBAND_USNIC=m
-CONFIG_INFINIBAND_BNXT_RE=m
-CONFIG_INFINIBAND_HFI1=m
-# CONFIG_HFI1_DEBUG_SDMA_ORDER is not set
-# CONFIG_SDMA_VERBOSITY is not set
-CONFIG_INFINIBAND_QEDR=m
-CONFIG_INFINIBAND_RDMAVT=m
-CONFIG_RDMA_RXE=m
-CONFIG_RDMA_SIW=m
-CONFIG_INFINIBAND_IPOIB=m
-CONFIG_INFINIBAND_IPOIB_CM=y
-CONFIG_INFINIBAND_IPOIB_DEBUG=y
-# CONFIG_INFINIBAND_IPOIB_DEBUG_DATA is not set
-CONFIG_INFINIBAND_SRP=m
-CONFIG_INFINIBAND_SRPT=m
-CONFIG_INFINIBAND_ISER=m
-CONFIG_INFINIBAND_ISERT=m
-CONFIG_INFINIBAND_RTRS=m
-CONFIG_INFINIBAND_RTRS_CLIENT=m
-CONFIG_INFINIBAND_RTRS_SERVER=m
-CONFIG_INFINIBAND_OPA_VNIC=m
-CONFIG_EDAC_ATOMIC_SCRUB=y
-CONFIG_EDAC_SUPPORT=y
-CONFIG_EDAC=y
-CONFIG_EDAC_LEGACY_SYSFS=y
-# CONFIG_EDAC_DEBUG is not set
-CONFIG_EDAC_DECODE_MCE=m
-CONFIG_EDAC_GHES=y
-CONFIG_EDAC_AMD64=m
-# CONFIG_EDAC_AMD64_ERROR_INJECTION is not set
-CONFIG_EDAC_E752X=m
-CONFIG_EDAC_I82975X=m
-CONFIG_EDAC_I3000=m
-CONFIG_EDAC_I3200=m
-CONFIG_EDAC_IE31200=m
-CONFIG_EDAC_X38=m
-CONFIG_EDAC_I5400=m
-CONFIG_EDAC_I7CORE=m
-CONFIG_EDAC_I5000=m
-CONFIG_EDAC_I5100=m
-CONFIG_EDAC_I7300=m
-CONFIG_EDAC_SBRIDGE=m
-CONFIG_EDAC_SKX=m
-CONFIG_EDAC_I10NM=m
-CONFIG_EDAC_PND2=m
-CONFIG_RTC_LIB=y
-CONFIG_RTC_MC146818_LIB=y
-CONFIG_RTC_CLASS=y
-CONFIG_RTC_HCTOSYS=y
-CONFIG_RTC_HCTOSYS_DEVICE="rtc0"
-CONFIG_RTC_SYSTOHC=y
-CONFIG_RTC_SYSTOHC_DEVICE="rtc0"
-# CONFIG_RTC_DEBUG is not set
-CONFIG_RTC_NVMEM=y
-
-#
-# RTC interfaces
-#
-CONFIG_RTC_INTF_SYSFS=y
-CONFIG_RTC_INTF_PROC=y
-CONFIG_RTC_INTF_DEV=y
-CONFIG_RTC_INTF_DEV_UIE_EMUL=y
-# CONFIG_RTC_DRV_TEST is not set
-
-#
-# I2C RTC drivers
-#
-CONFIG_RTC_DRV_88PM860X=m
-CONFIG_RTC_DRV_88PM80X=m
-CONFIG_RTC_DRV_ABB5ZES3=m
-CONFIG_RTC_DRV_ABEOZ9=m
-CONFIG_RTC_DRV_ABX80X=m
-CONFIG_RTC_DRV_AS3722=m
-CONFIG_RTC_DRV_DS1307=m
-CONFIG_RTC_DRV_DS1307_CENTURY=y
-CONFIG_RTC_DRV_DS1374=m
-CONFIG_RTC_DRV_DS1374_WDT=y
-CONFIG_RTC_DRV_DS1672=m
-CONFIG_RTC_DRV_HYM8563=m
-CONFIG_RTC_DRV_LP8788=m
-CONFIG_RTC_DRV_MAX6900=m
-CONFIG_RTC_DRV_MAX8907=m
-CONFIG_RTC_DRV_MAX8925=m
-CONFIG_RTC_DRV_MAX8998=m
-CONFIG_RTC_DRV_MAX8997=m
-CONFIG_RTC_DRV_MAX77686=m
-CONFIG_RTC_DRV_RK808=m
-CONFIG_RTC_DRV_RS5C372=m
-CONFIG_RTC_DRV_ISL1208=m
-CONFIG_RTC_DRV_ISL12022=m
-CONFIG_RTC_DRV_ISL12026=m
-CONFIG_RTC_DRV_X1205=m
-CONFIG_RTC_DRV_PCF8523=m
-CONFIG_RTC_DRV_PCF85063=m
-CONFIG_RTC_DRV_PCF85363=m
-CONFIG_RTC_DRV_PCF8563=m
-CONFIG_RTC_DRV_PCF8583=m
-CONFIG_RTC_DRV_M41T80=m
-CONFIG_RTC_DRV_M41T80_WDT=y
-CONFIG_RTC_DRV_BD70528=m
-CONFIG_RTC_DRV_BQ32K=m
-CONFIG_RTC_DRV_TWL4030=m
-CONFIG_RTC_DRV_PALMAS=m
-CONFIG_RTC_DRV_TPS6586X=m
-CONFIG_RTC_DRV_TPS65910=m
-CONFIG_RTC_DRV_TPS80031=m
-CONFIG_RTC_DRV_RC5T583=m
-CONFIG_RTC_DRV_RC5T619=m
-CONFIG_RTC_DRV_S35390A=m
-CONFIG_RTC_DRV_FM3130=m
-CONFIG_RTC_DRV_RX8010=m
-CONFIG_RTC_DRV_RX8581=m
-CONFIG_RTC_DRV_RX8025=m
-CONFIG_RTC_DRV_EM3027=m
-CONFIG_RTC_DRV_RV3028=m
-CONFIG_RTC_DRV_RV8803=m
-CONFIG_RTC_DRV_S5M=m
-CONFIG_RTC_DRV_SD3078=m
-
-#
-# SPI RTC drivers
-#
-CONFIG_RTC_DRV_M41T93=m
-CONFIG_RTC_DRV_M41T94=m
-CONFIG_RTC_DRV_DS1302=m
-CONFIG_RTC_DRV_DS1305=m
-CONFIG_RTC_DRV_DS1343=m
-CONFIG_RTC_DRV_DS1347=m
-CONFIG_RTC_DRV_DS1390=m
-CONFIG_RTC_DRV_MAX6916=m
-CONFIG_RTC_DRV_R9701=m
-CONFIG_RTC_DRV_RX4581=m
-CONFIG_RTC_DRV_RX6110=m
-CONFIG_RTC_DRV_RS5C348=m
-CONFIG_RTC_DRV_MAX6902=m
-CONFIG_RTC_DRV_PCF2123=m
-CONFIG_RTC_DRV_MCP795=m
-CONFIG_RTC_I2C_AND_SPI=y
-
-#
-# SPI and I2C RTC drivers
-#
-CONFIG_RTC_DRV_DS3232=m
-CONFIG_RTC_DRV_DS3232_HWMON=y
-CONFIG_RTC_DRV_PCF2127=m
-CONFIG_RTC_DRV_RV3029C2=m
-CONFIG_RTC_DRV_RV3029_HWMON=y
-
-#
-# Platform RTC drivers
-#
-CONFIG_RTC_DRV_CMOS=y
-CONFIG_RTC_DRV_DS1286=m
-CONFIG_RTC_DRV_DS1511=m
-CONFIG_RTC_DRV_DS1553=m
-CONFIG_RTC_DRV_DS1685_FAMILY=m
-CONFIG_RTC_DRV_DS1685=y
-# CONFIG_RTC_DRV_DS1689 is not set
-# CONFIG_RTC_DRV_DS17285 is not set
-# CONFIG_RTC_DRV_DS17485 is not set
-# CONFIG_RTC_DRV_DS17885 is not set
-CONFIG_RTC_DRV_DS1742=m
-CONFIG_RTC_DRV_DS2404=m
-CONFIG_RTC_DRV_DA9052=m
-CONFIG_RTC_DRV_DA9055=m
-CONFIG_RTC_DRV_DA9063=m
-CONFIG_RTC_DRV_STK17TA8=m
-CONFIG_RTC_DRV_M48T86=m
-CONFIG_RTC_DRV_M48T35=m
-CONFIG_RTC_DRV_M48T59=m
-CONFIG_RTC_DRV_MSM6242=m
-CONFIG_RTC_DRV_BQ4802=m
-CONFIG_RTC_DRV_RP5C01=m
-CONFIG_RTC_DRV_V3020=m
-CONFIG_RTC_DRV_WM831X=m
-CONFIG_RTC_DRV_WM8350=m
-CONFIG_RTC_DRV_PCF50633=m
-CONFIG_RTC_DRV_AB3100=m
-CONFIG_RTC_DRV_ZYNQMP=m
-CONFIG_RTC_DRV_CROS_EC=m
-
-#
-# on-CPU RTC drivers
-#
-CONFIG_RTC_DRV_CADENCE=m
-CONFIG_RTC_DRV_FTRTC010=m
-CONFIG_RTC_DRV_PCAP=m
-CONFIG_RTC_DRV_MC13XXX=m
-CONFIG_RTC_DRV_MT6397=m
-CONFIG_RTC_DRV_R7301=m
-CONFIG_RTC_DRV_CPCAP=m
-
-#
-# HID Sensor RTC drivers
-#
-CONFIG_RTC_DRV_HID_SENSOR_TIME=m
-CONFIG_RTC_DRV_WILCO_EC=m
-CONFIG_DMADEVICES=y
-# CONFIG_DMADEVICES_DEBUG is not set
-
-#
-# DMA Devices
-#
-CONFIG_DMA_ENGINE=y
-CONFIG_DMA_VIRTUAL_CHANNELS=y
-CONFIG_DMA_ACPI=y
-CONFIG_DMA_OF=y
-CONFIG_ALTERA_MSGDMA=m
-CONFIG_DW_AXI_DMAC=m
-CONFIG_FSL_EDMA=m
-CONFIG_INTEL_IDMA64=m
-CONFIG_INTEL_IDXD=m
-CONFIG_INTEL_IOATDMA=m
-CONFIG_INTEL_MIC_X100_DMA=m
-CONFIG_PLX_DMA=m
-# CONFIG_XILINX_ZYNQMP_DPDMA is not set
-CONFIG_QCOM_HIDMA_MGMT=m
-CONFIG_QCOM_HIDMA=m
-CONFIG_DW_DMAC_CORE=y
-CONFIG_DW_DMAC=y
-CONFIG_DW_DMAC_PCI=y
-CONFIG_DW_EDMA=m
-CONFIG_DW_EDMA_PCIE=m
-CONFIG_HSU_DMA=y
-CONFIG_SF_PDMA=m
-
-#
-# DMA Clients
-#
-CONFIG_ASYNC_TX_DMA=y
-# CONFIG_DMATEST is not set
-CONFIG_DMA_ENGINE_RAID=y
-
-#
-# DMABUF options
-#
-CONFIG_SYNC_FILE=y
-# CONFIG_SW_SYNC is not set
-CONFIG_UDMABUF=y
-# CONFIG_DMABUF_MOVE_NOTIFY is not set
-# CONFIG_DMABUF_SELFTESTS is not set
-CONFIG_DMABUF_HEAPS=y
-CONFIG_DMABUF_HEAPS_SYSTEM=y
-# end of DMABUF options
-
-CONFIG_DCA=m
-CONFIG_AUXDISPLAY=y
-CONFIG_HD44780=m
-CONFIG_KS0108=m
-CONFIG_KS0108_PORT=0x378
-CONFIG_KS0108_DELAY=2
-CONFIG_CFAG12864B=m
-CONFIG_CFAG12864B_RATE=20
-CONFIG_IMG_ASCII_LCD=m
-CONFIG_HT16K33=m
-CONFIG_PARPORT_PANEL=m
-CONFIG_PANEL_PARPORT=0
-CONFIG_PANEL_PROFILE=5
-# CONFIG_PANEL_CHANGE_MESSAGE is not set
-# CONFIG_CHARLCD_BL_OFF is not set
-# CONFIG_CHARLCD_BL_ON is not set
-CONFIG_CHARLCD_BL_FLASH=y
-CONFIG_PANEL=m
-CONFIG_CHARLCD=m
-CONFIG_UIO=m
-CONFIG_UIO_CIF=m
-CONFIG_UIO_PDRV_GENIRQ=m
-CONFIG_UIO_DMEM_GENIRQ=m
-CONFIG_UIO_AEC=m
-CONFIG_UIO_SERCOS3=m
-CONFIG_UIO_PCI_GENERIC=m
-CONFIG_UIO_NETX=m
-CONFIG_UIO_PRUSS=m
-CONFIG_UIO_MF624=m
-CONFIG_UIO_HV_GENERIC=m
-CONFIG_VFIO_IOMMU_TYPE1=m
-CONFIG_VFIO_VIRQFD=m
-CONFIG_VFIO=m
-# CONFIG_VFIO_NOIOMMU is not set
-CONFIG_VFIO_PCI=m
-CONFIG_VFIO_PCI_VGA=y
-CONFIG_VFIO_PCI_MMAP=y
-CONFIG_VFIO_PCI_INTX=y
-CONFIG_VFIO_PCI_IGD=y
-CONFIG_VFIO_MDEV=m
-CONFIG_VFIO_MDEV_DEVICE=m
-CONFIG_IRQ_BYPASS_MANAGER=m
-CONFIG_VIRT_DRIVERS=y
-CONFIG_VBOXGUEST=m
-CONFIG_VIRTIO=y
-CONFIG_VIRTIO_MENU=y
-CONFIG_VIRTIO_PCI=m
-CONFIG_VIRTIO_PCI_LEGACY=y
-CONFIG_VIRTIO_VDPA=m
-CONFIG_VIRTIO_PMEM=m
-CONFIG_VIRTIO_BALLOON=m
-CONFIG_VIRTIO_MEM=m
-CONFIG_VIRTIO_INPUT=m
-CONFIG_VIRTIO_MMIO=m
-CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y
-CONFIG_VDPA=m
-CONFIG_VDPA_SIM=m
-CONFIG_IFCVF=m
-CONFIG_MLX5_VDPA=y
-CONFIG_MLX5_VDPA_NET=m
-CONFIG_VHOST_IOTLB=m
-CONFIG_VHOST_RING=m
-CONFIG_VHOST=m
-CONFIG_VHOST_MENU=y
-CONFIG_VHOST_NET=m
-CONFIG_VHOST_SCSI=m
-CONFIG_VHOST_VSOCK=m
-CONFIG_VHOST_VDPA=m
-# CONFIG_VHOST_CROSS_ENDIAN_LEGACY is not set
-
-#
-# Microsoft Hyper-V guest support
-#
-CONFIG_HYPERV=m
-CONFIG_HYPERV_TIMER=y
-CONFIG_HYPERV_UTILS=m
-CONFIG_HYPERV_BALLOON=m
-# end of Microsoft Hyper-V guest support
-
-#
-# Xen driver support
-#
-CONFIG_XEN_BALLOON=y
-CONFIG_XEN_BALLOON_MEMORY_HOTPLUG=y
-CONFIG_XEN_BALLOON_MEMORY_HOTPLUG_LIMIT=512
-CONFIG_XEN_SCRUB_PAGES_DEFAULT=y
-CONFIG_XEN_DEV_EVTCHN=m
-CONFIG_XEN_BACKEND=y
-CONFIG_XENFS=m
-CONFIG_XEN_COMPAT_XENFS=y
-CONFIG_XEN_SYS_HYPERVISOR=y
-CONFIG_XEN_XENBUS_FRONTEND=y
-CONFIG_XEN_GNTDEV=m
-CONFIG_XEN_GNTDEV_DMABUF=y
-CONFIG_XEN_GRANT_DEV_ALLOC=m
-CONFIG_XEN_GRANT_DMA_ALLOC=y
-CONFIG_SWIOTLB_XEN=y
-CONFIG_XEN_PCIDEV_BACKEND=m
-CONFIG_XEN_PVCALLS_FRONTEND=m
-CONFIG_XEN_PVCALLS_BACKEND=y
-CONFIG_XEN_SCSI_BACKEND=m
-CONFIG_XEN_PRIVCMD=m
-CONFIG_XEN_ACPI_PROCESSOR=m
-CONFIG_XEN_MCE_LOG=y
-CONFIG_XEN_HAVE_PVMMU=y
-CONFIG_XEN_EFI=y
-CONFIG_XEN_AUTO_XLATE=y
-CONFIG_XEN_ACPI=y
-CONFIG_XEN_SYMS=y
-CONFIG_XEN_HAVE_VPMU=y
-CONFIG_XEN_FRONT_PGDIR_SHBUF=m
-CONFIG_XEN_UNPOPULATED_ALLOC=y
-# end of Xen driver support
-
-# CONFIG_GREYBUS is not set
-CONFIG_STAGING=y
-CONFIG_PRISM2_USB=m
-CONFIG_COMEDI=m
-# CONFIG_COMEDI_DEBUG is not set
-CONFIG_COMEDI_DEFAULT_BUF_SIZE_KB=2048
-CONFIG_COMEDI_DEFAULT_BUF_MAXSIZE_KB=20480
-CONFIG_COMEDI_MISC_DRIVERS=y
-CONFIG_COMEDI_BOND=m
-CONFIG_COMEDI_TEST=m
-CONFIG_COMEDI_PARPORT=m
-# CONFIG_COMEDI_ISA_DRIVERS is not set
-CONFIG_COMEDI_PCI_DRIVERS=m
-CONFIG_COMEDI_8255_PCI=m
-CONFIG_COMEDI_ADDI_WATCHDOG=m
-CONFIG_COMEDI_ADDI_APCI_1032=m
-CONFIG_COMEDI_ADDI_APCI_1500=m
-CONFIG_COMEDI_ADDI_APCI_1516=m
-CONFIG_COMEDI_ADDI_APCI_1564=m
-CONFIG_COMEDI_ADDI_APCI_16XX=m
-CONFIG_COMEDI_ADDI_APCI_2032=m
-CONFIG_COMEDI_ADDI_APCI_2200=m
-CONFIG_COMEDI_ADDI_APCI_3120=m
-CONFIG_COMEDI_ADDI_APCI_3501=m
-CONFIG_COMEDI_ADDI_APCI_3XXX=m
-CONFIG_COMEDI_ADL_PCI6208=m
-CONFIG_COMEDI_ADL_PCI7X3X=m
-CONFIG_COMEDI_ADL_PCI8164=m
-CONFIG_COMEDI_ADL_PCI9111=m
-CONFIG_COMEDI_ADL_PCI9118=m
-CONFIG_COMEDI_ADV_PCI1710=m
-CONFIG_COMEDI_ADV_PCI1720=m
-CONFIG_COMEDI_ADV_PCI1723=m
-CONFIG_COMEDI_ADV_PCI1724=m
-CONFIG_COMEDI_ADV_PCI1760=m
-CONFIG_COMEDI_ADV_PCI_DIO=m
-CONFIG_COMEDI_AMPLC_DIO200_PCI=m
-CONFIG_COMEDI_AMPLC_PC236_PCI=m
-CONFIG_COMEDI_AMPLC_PC263_PCI=m
-CONFIG_COMEDI_AMPLC_PCI224=m
-CONFIG_COMEDI_AMPLC_PCI230=m
-CONFIG_COMEDI_CONTEC_PCI_DIO=m
-CONFIG_COMEDI_DAS08_PCI=m
-CONFIG_COMEDI_DT3000=m
-CONFIG_COMEDI_DYNA_PCI10XX=m
-CONFIG_COMEDI_GSC_HPDI=m
-CONFIG_COMEDI_MF6X4=m
-CONFIG_COMEDI_ICP_MULTI=m
-CONFIG_COMEDI_DAQBOARD2000=m
-CONFIG_COMEDI_JR3_PCI=m
-CONFIG_COMEDI_KE_COUNTER=m
-CONFIG_COMEDI_CB_PCIDAS64=m
-CONFIG_COMEDI_CB_PCIDAS=m
-CONFIG_COMEDI_CB_PCIDDA=m
-CONFIG_COMEDI_CB_PCIMDAS=m
-CONFIG_COMEDI_CB_PCIMDDA=m
-CONFIG_COMEDI_ME4000=m
-CONFIG_COMEDI_ME_DAQ=m
-CONFIG_COMEDI_NI_6527=m
-CONFIG_COMEDI_NI_65XX=m
-CONFIG_COMEDI_NI_660X=m
-CONFIG_COMEDI_NI_670X=m
-CONFIG_COMEDI_NI_LABPC_PCI=m
-CONFIG_COMEDI_NI_PCIDIO=m
-CONFIG_COMEDI_NI_PCIMIO=m
-CONFIG_COMEDI_RTD520=m
-CONFIG_COMEDI_S626=m
-CONFIG_COMEDI_MITE=m
-CONFIG_COMEDI_NI_TIOCMD=m
-CONFIG_COMEDI_PCMCIA_DRIVERS=m
-CONFIG_COMEDI_CB_DAS16_CS=m
-CONFIG_COMEDI_DAS08_CS=m
-CONFIG_COMEDI_NI_DAQ_700_CS=m
-CONFIG_COMEDI_NI_DAQ_DIO24_CS=m
-CONFIG_COMEDI_NI_LABPC_CS=m
-CONFIG_COMEDI_NI_MIO_CS=m
-CONFIG_COMEDI_QUATECH_DAQP_CS=m
-CONFIG_COMEDI_USB_DRIVERS=m
-CONFIG_COMEDI_DT9812=m
-CONFIG_COMEDI_NI_USB6501=m
-CONFIG_COMEDI_USBDUX=m
-CONFIG_COMEDI_USBDUXFAST=m
-CONFIG_COMEDI_USBDUXSIGMA=m
-CONFIG_COMEDI_VMK80XX=m
-CONFIG_COMEDI_8254=m
-CONFIG_COMEDI_8255=m
-CONFIG_COMEDI_8255_SA=m
-CONFIG_COMEDI_KCOMEDILIB=m
-CONFIG_COMEDI_AMPLC_DIO200=m
-CONFIG_COMEDI_AMPLC_PC236=m
-CONFIG_COMEDI_DAS08=m
-CONFIG_COMEDI_NI_LABPC=m
-CONFIG_COMEDI_NI_TIO=m
-CONFIG_COMEDI_NI_ROUTING=m
-CONFIG_RTL8192U=m
-CONFIG_RTLLIB=m
-CONFIG_RTLLIB_CRYPTO_CCMP=m
-CONFIG_RTLLIB_CRYPTO_TKIP=m
-CONFIG_RTLLIB_CRYPTO_WEP=m
-CONFIG_RTL8192E=m
-CONFIG_RTL8723BS=m
-CONFIG_R8712U=m
-CONFIG_R8188EU=m
-CONFIG_88EU_AP_MODE=y
-CONFIG_RTS5208=m
-CONFIG_VT6655=m
-CONFIG_VT6656=m
-
-#
-# IIO staging drivers
-#
-
-#
-# Accelerometers
-#
-CONFIG_ADIS16203=m
-CONFIG_ADIS16240=m
-# end of Accelerometers
-
-#
-# Analog to digital converters
-#
-CONFIG_AD7816=m
-CONFIG_AD7280=m
-# end of Analog to digital converters
-
-#
-# Analog digital bi-direction converters
-#
-CONFIG_ADT7316=m
-CONFIG_ADT7316_SPI=m
-CONFIG_ADT7316_I2C=m
-# end of Analog digital bi-direction converters
-
-#
-# Capacitance to digital converters
-#
-CONFIG_AD7150=m
-CONFIG_AD7746=m
-# end of Capacitance to digital converters
-
-#
-# Direct Digital Synthesis
-#
-CONFIG_AD9832=m
-CONFIG_AD9834=m
-# end of Direct Digital Synthesis
-
-#
-# Network Analyzer, Impedance Converters
-#
-CONFIG_AD5933=m
-# end of Network Analyzer, Impedance Converters
-
-#
-# Active energy metering IC
-#
-CONFIG_ADE7854=m
-CONFIG_ADE7854_I2C=m
-CONFIG_ADE7854_SPI=m
-# end of Active energy metering IC
-
-#
-# Resolver to digital converters
-#
-CONFIG_AD2S1210=m
-# end of Resolver to digital converters
-# end of IIO staging drivers
-
-# CONFIG_FB_SM750 is not set
-CONFIG_STAGING_MEDIA=y
-CONFIG_INTEL_ATOMISP=y
-CONFIG_VIDEO_ATOMISP=m
-CONFIG_VIDEO_ATOMISP_ISP2401=y
-CONFIG_VIDEO_ATOMISP_OV5693=m
-CONFIG_VIDEO_ATOMISP_OV2722=m
-CONFIG_VIDEO_ATOMISP_GC2235=m
-CONFIG_VIDEO_ATOMISP_MSRLIST_HELPER=m
-CONFIG_VIDEO_ATOMISP_MT9M114=m
-CONFIG_VIDEO_ATOMISP_GC0310=m
-CONFIG_VIDEO_ATOMISP_OV2680=m
-CONFIG_VIDEO_ATOMISP_LM3554=m
-CONFIG_VIDEO_IPU3_IMGU=m
-CONFIG_VIDEO_USBVISION=m
-
-#
-# Android
-#
-# end of Android
-
-CONFIG_STAGING_BOARD=y
-CONFIG_LTE_GDM724X=m
-CONFIG_FIREWIRE_SERIAL=m
-CONFIG_FWTTY_MAX_TOTAL_PORTS=64
-CONFIG_FWTTY_MAX_CARD_PORTS=32
-CONFIG_GS_FPGABOOT=m
-CONFIG_UNISYSSPAR=y
-CONFIG_UNISYS_VISORNIC=m
-CONFIG_UNISYS_VISORINPUT=m
-CONFIG_UNISYS_VISORHBA=m
-# CONFIG_FB_TFT is not set
-CONFIG_MOST_COMPONENTS=m
-CONFIG_MOST_CDEV=m
-CONFIG_MOST_NET=m
-CONFIG_MOST_SOUND=m
-CONFIG_MOST_VIDEO=m
-CONFIG_MOST_DIM2=m
-CONFIG_MOST_I2C=m
-CONFIG_KS7010=m
-CONFIG_PI433=m
-
-#
-# Gasket devices
-#
-CONFIG_STAGING_GASKET_FRAMEWORK=m
-CONFIG_STAGING_APEX_DRIVER=m
-# end of Gasket devices
-
-CONFIG_XIL_AXIS_FIFO=m
-CONFIG_FIELDBUS_DEV=m
-CONFIG_HMS_ANYBUSS_BUS=m
-CONFIG_ARCX_ANYBUS_CONTROLLER=m
-CONFIG_HMS_PROFINET=m
-CONFIG_KPC2000=y
-CONFIG_KPC2000_CORE=m
-CONFIG_KPC2000_SPI=m
-CONFIG_KPC2000_I2C=m
-CONFIG_KPC2000_DMA=m
-CONFIG_QLGE=m
-CONFIG_WFX=m
-CONFIG_X86_PLATFORM_DEVICES=y
-CONFIG_ACPI_WMI=m
-CONFIG_WMI_BMOF=m
-CONFIG_ALIENWARE_WMI=m
-CONFIG_HUAWEI_WMI=m
-CONFIG_INTEL_WMI_SBL_FW_UPDATE=m
-CONFIG_INTEL_WMI_THUNDERBOLT=m
-CONFIG_MXM_WMI=m
-CONFIG_PEAQ_WMI=m
-CONFIG_XIAOMI_WMI=m
-CONFIG_ACERHDF=m
-CONFIG_ACER_WIRELESS=m
-CONFIG_ACER_WMI=m
-CONFIG_APPLE_GMUX=m
-CONFIG_ASUS_LAPTOP=m
-CONFIG_ASUS_WIRELESS=m
-CONFIG_ASUS_WMI=m
-CONFIG_ASUS_NB_WMI=m
-CONFIG_EEEPC_LAPTOP=m
-CONFIG_EEEPC_WMI=m
-CONFIG_DCDBAS=m
-CONFIG_DELL_SMBIOS=m
-CONFIG_DELL_SMBIOS_WMI=y
-CONFIG_DELL_SMBIOS_SMM=y
-CONFIG_DELL_LAPTOP=m
-CONFIG_DELL_RBTN=m
-# CONFIG_DELL_RBU is not set
-CONFIG_DELL_SMO8800=m
-CONFIG_DELL_WMI=m
-CONFIG_DELL_WMI_DESCRIPTOR=m
-CONFIG_DELL_WMI_AIO=m
-CONFIG_DELL_WMI_LED=m
-CONFIG_AMILO_RFKILL=m
-CONFIG_FUJITSU_LAPTOP=m
-CONFIG_FUJITSU_TABLET=m
-CONFIG_GPD_POCKET_FAN=m
-CONFIG_HP_ACCEL=m
-CONFIG_HP_WIRELESS=m
-CONFIG_HP_WMI=m
-CONFIG_IBM_RTL=m
-CONFIG_IDEAPAD_LAPTOP=m
-CONFIG_SENSORS_HDAPS=m
-CONFIG_THINKPAD_ACPI=m
-CONFIG_THINKPAD_ACPI_ALSA_SUPPORT=y
-# CONFIG_THINKPAD_ACPI_DEBUGFACILITIES is not set
-# CONFIG_THINKPAD_ACPI_DEBUG is not set
-# CONFIG_THINKPAD_ACPI_UNSAFE_LEDS is not set
-CONFIG_THINKPAD_ACPI_VIDEO=y
-CONFIG_THINKPAD_ACPI_HOTKEY_POLL=y
-CONFIG_INTEL_ATOMISP2_LED=m
-CONFIG_INTEL_CHT_INT33FE=m
-CONFIG_INTEL_HID_EVENT=m
-CONFIG_INTEL_INT0002_VGPIO=m
-CONFIG_INTEL_MENLOW=m
-CONFIG_INTEL_OAKTRAIL=m
-CONFIG_INTEL_VBTN=m
-CONFIG_SURFACE3_WMI=m
-CONFIG_SURFACE_3_BUTTON=m
-CONFIG_SURFACE_3_POWER_OPREGION=m
-CONFIG_SURFACE_PRO3_BUTTON=m
-CONFIG_MSI_LAPTOP=m
-CONFIG_MSI_WMI=m
-CONFIG_PCENGINES_APU2=m
-CONFIG_SAMSUNG_LAPTOP=m
-CONFIG_SAMSUNG_Q10=m
-CONFIG_ACPI_TOSHIBA=m
-CONFIG_TOSHIBA_BT_RFKILL=m
-CONFIG_TOSHIBA_HAPS=m
-CONFIG_TOSHIBA_WMI=m
-CONFIG_ACPI_CMPC=m
-CONFIG_COMPAL_LAPTOP=m
-CONFIG_LG_LAPTOP=m
-CONFIG_PANASONIC_LAPTOP=m
-CONFIG_SONY_LAPTOP=m
-CONFIG_SONYPI_COMPAT=y
-CONFIG_SYSTEM76_ACPI=m
-CONFIG_TOPSTAR_LAPTOP=m
-CONFIG_I2C_MULTI_INSTANTIATE=m
-CONFIG_MLX_PLATFORM=m
-CONFIG_TOUCHSCREEN_DMI=y
-CONFIG_INTEL_IPS=m
-CONFIG_INTEL_RST=m
-CONFIG_INTEL_SMARTCONNECT=m
-
-#
-# Intel Speed Select Technology interface support
-#
-CONFIG_INTEL_SPEED_SELECT_INTERFACE=m
-# end of Intel Speed Select Technology interface support
-
-CONFIG_INTEL_TURBO_MAX_3=y
-CONFIG_INTEL_UNCORE_FREQ_CONTROL=m
-CONFIG_INTEL_BXTWC_PMIC_TMU=m
-CONFIG_INTEL_CHTDC_TI_PWRBTN=m
-CONFIG_INTEL_MFLD_THERMAL=m
-CONFIG_INTEL_MID_POWER_BUTTON=m
-CONFIG_INTEL_MRFLD_PWRBTN=m
-CONFIG_INTEL_PMC_CORE=y
-CONFIG_INTEL_PUNIT_IPC=m
-CONFIG_INTEL_SCU_IPC=y
-CONFIG_INTEL_SCU=y
-CONFIG_INTEL_SCU_PCI=y
-CONFIG_INTEL_SCU_PLATFORM=m
-CONFIG_INTEL_SCU_IPC_UTIL=m
-CONFIG_INTEL_TELEMETRY=m
-CONFIG_PMC_ATOM=y
-CONFIG_MFD_CROS_EC=m
-CONFIG_CHROME_PLATFORMS=y
-CONFIG_CHROMEOS_LAPTOP=m
-CONFIG_CHROMEOS_PSTORE=m
-CONFIG_CHROMEOS_TBMC=m
-CONFIG_CROS_EC=m
-CONFIG_CROS_EC_I2C=m
-CONFIG_CROS_EC_RPMSG=m
-CONFIG_CROS_EC_ISHTP=m
-CONFIG_CROS_EC_SPI=m
-CONFIG_CROS_EC_LPC=m
-CONFIG_CROS_EC_PROTO=y
-CONFIG_CROS_KBD_LED_BACKLIGHT=m
-CONFIG_CROS_EC_CHARDEV=m
-CONFIG_CROS_EC_LIGHTBAR=m
-CONFIG_CROS_EC_VBC=m
-CONFIG_CROS_EC_DEBUGFS=m
-CONFIG_CROS_EC_SENSORHUB=m
-CONFIG_CROS_EC_SYSFS=m
-CONFIG_CROS_EC_TYPEC=m
-CONFIG_CROS_USBPD_LOGGER=m
-CONFIG_CROS_USBPD_NOTIFY=m
-CONFIG_WILCO_EC=m
-CONFIG_WILCO_EC_DEBUGFS=m
-CONFIG_WILCO_EC_EVENTS=m
-CONFIG_WILCO_EC_TELEMETRY=m
-CONFIG_MELLANOX_PLATFORM=y
-CONFIG_MLXREG_HOTPLUG=m
-CONFIG_MLXREG_IO=m
-CONFIG_HAVE_CLK=y
-CONFIG_CLKDEV_LOOKUP=y
-CONFIG_HAVE_CLK_PREPARE=y
-CONFIG_COMMON_CLK=y
-CONFIG_COMMON_CLK_WM831X=m
-CONFIG_CLK_HSDK=y
-CONFIG_COMMON_CLK_MAX77686=m
-CONFIG_COMMON_CLK_MAX9485=m
-CONFIG_COMMON_CLK_RK808=m
-CONFIG_COMMON_CLK_SI5341=m
-CONFIG_COMMON_CLK_SI5351=m
-CONFIG_COMMON_CLK_SI514=m
-CONFIG_COMMON_CLK_SI544=m
-CONFIG_COMMON_CLK_SI570=m
-CONFIG_COMMON_CLK_CDCE706=m
-CONFIG_COMMON_CLK_CDCE925=m
-CONFIG_COMMON_CLK_CS2000_CP=m
-CONFIG_COMMON_CLK_S2MPS11=m
-CONFIG_CLK_TWL6040=m
-CONFIG_COMMON_CLK_LOCHNAGAR=m
-CONFIG_COMMON_CLK_PALMAS=m
-CONFIG_COMMON_CLK_PWM=m
-CONFIG_COMMON_CLK_VC5=m
-CONFIG_COMMON_CLK_BD718XX=m
-CONFIG_COMMON_CLK_FIXED_MMIO=y
-CONFIG_CLK_LGM_CGU=y
-CONFIG_HWSPINLOCK=y
-
-#
-# Clock Source drivers
-#
-CONFIG_TIMER_OF=y
-CONFIG_TIMER_PROBE=y
-CONFIG_CLKEVT_I8253=y
-CONFIG_I8253_LOCK=y
-CONFIG_CLKBLD_I8253=y
-CONFIG_CLKSRC_MMIO=y
-CONFIG_MICROCHIP_PIT64B=y
-# end of Clock Source drivers
-
-CONFIG_MAILBOX=y
-CONFIG_PLATFORM_MHU=m
-CONFIG_PCC=y
-CONFIG_ALTERA_MBOX=m
-CONFIG_MAILBOX_TEST=m
-CONFIG_IOMMU_IOVA=y
-CONFIG_IOASID=y
-CONFIG_IOMMU_API=y
-CONFIG_IOMMU_SUPPORT=y
-
-#
-# Generic IOMMU Pagetable Support
-#
-# end of Generic IOMMU Pagetable Support
-
-# CONFIG_IOMMU_DEBUGFS is not set
-# CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set
-CONFIG_OF_IOMMU=y
-CONFIG_IOMMU_DMA=y
-CONFIG_AMD_IOMMU=y
-CONFIG_AMD_IOMMU_V2=y
-CONFIG_DMAR_TABLE=y
-CONFIG_INTEL_IOMMU=y
-CONFIG_INTEL_IOMMU_SVM=y
-# CONFIG_INTEL_IOMMU_DEFAULT_ON is not set
-CONFIG_INTEL_IOMMU_FLOPPY_WA=y
-# CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON is not set
-CONFIG_IRQ_REMAP=y
-CONFIG_HYPERV_IOMMU=y
-
-#
-# Remoteproc drivers
-#
-CONFIG_REMOTEPROC=y
-# CONFIG_REMOTEPROC_CDEV is not set
-# end of Remoteproc drivers
-
-#
-# Rpmsg drivers
-#
-CONFIG_RPMSG=m
-CONFIG_RPMSG_CHAR=m
-CONFIG_RPMSG_QCOM_GLINK=m
-CONFIG_RPMSG_QCOM_GLINK_RPM=m
-CONFIG_RPMSG_VIRTIO=m
-# end of Rpmsg drivers
-
-CONFIG_SOUNDWIRE=m
-
-#
-# SoundWire Devices
-#
-CONFIG_SOUNDWIRE_CADENCE=m
-CONFIG_SOUNDWIRE_INTEL=m
-CONFIG_SOUNDWIRE_QCOM=m
-
-#
-# SOC (System On Chip) specific Drivers
-#
-
-#
-# Amlogic SoC drivers
-#
-# end of Amlogic SoC drivers
-
-#
-# Aspeed SoC drivers
-#
-# end of Aspeed SoC drivers
-
-#
-# Broadcom SoC drivers
-#
-# end of Broadcom SoC drivers
-
-#
-# NXP/Freescale QorIQ SoC drivers
-#
-# end of NXP/Freescale QorIQ SoC drivers
-
-#
-# i.MX SoC drivers
-#
-# end of i.MX SoC drivers
-
-#
-# Qualcomm SoC drivers
-#
-# end of Qualcomm SoC drivers
-
-CONFIG_SOC_TI=y
-
-#
-# Xilinx SoC drivers
-#
-CONFIG_XILINX_VCU=m
-# end of Xilinx SoC drivers
-# end of SOC (System On Chip) specific Drivers
-
-CONFIG_PM_DEVFREQ=y
-
-#
-# DEVFREQ Governors
-#
-CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=m
-CONFIG_DEVFREQ_GOV_PERFORMANCE=m
-CONFIG_DEVFREQ_GOV_POWERSAVE=m
-CONFIG_DEVFREQ_GOV_USERSPACE=m
-CONFIG_DEVFREQ_GOV_PASSIVE=m
-
-#
-# DEVFREQ Drivers
-#
-CONFIG_PM_DEVFREQ_EVENT=y
-CONFIG_EXTCON=y
-
-#
-# Extcon Device Drivers
-#
-CONFIG_EXTCON_ADC_JACK=m
-CONFIG_EXTCON_ARIZONA=m
-CONFIG_EXTCON_AXP288=m
-CONFIG_EXTCON_FSA9480=m
-CONFIG_EXTCON_GPIO=m
-CONFIG_EXTCON_INTEL_INT3496=m
-CONFIG_EXTCON_INTEL_CHT_WC=m
-CONFIG_EXTCON_INTEL_MRFLD=m
-CONFIG_EXTCON_MAX14577=m
-CONFIG_EXTCON_MAX3355=m
-CONFIG_EXTCON_MAX77693=m
-CONFIG_EXTCON_MAX77843=m
-CONFIG_EXTCON_MAX8997=m
-CONFIG_EXTCON_PALMAS=m
-CONFIG_EXTCON_PTN5150=m
-CONFIG_EXTCON_RT8973A=m
-CONFIG_EXTCON_SM5502=m
-CONFIG_EXTCON_USB_GPIO=m
-CONFIG_EXTCON_USBC_CROS_EC=m
-CONFIG_MEMORY=y
-CONFIG_IIO=m
-CONFIG_IIO_BUFFER=y
-CONFIG_IIO_BUFFER_CB=m
-CONFIG_IIO_BUFFER_DMA=m
-CONFIG_IIO_BUFFER_DMAENGINE=m
-CONFIG_IIO_BUFFER_HW_CONSUMER=m
-CONFIG_IIO_KFIFO_BUF=m
-CONFIG_IIO_TRIGGERED_BUFFER=m
-CONFIG_IIO_CONFIGFS=m
-CONFIG_IIO_TRIGGER=y
-CONFIG_IIO_CONSUMERS_PER_TRIGGER=2
-CONFIG_IIO_SW_DEVICE=m
-CONFIG_IIO_SW_TRIGGER=m
-CONFIG_IIO_TRIGGERED_EVENT=m
-
-#
-# Accelerometers
-#
-CONFIG_ADIS16201=m
-CONFIG_ADIS16209=m
-CONFIG_ADXL372=m
-CONFIG_ADXL372_SPI=m
-CONFIG_ADXL372_I2C=m
-CONFIG_BMA220=m
-CONFIG_BMA400=m
-CONFIG_BMA400_I2C=m
-CONFIG_BMA400_SPI=m
-CONFIG_BMC150_ACCEL=m
-CONFIG_BMC150_ACCEL_I2C=m
-CONFIG_BMC150_ACCEL_SPI=m
-CONFIG_DA280=m
-CONFIG_DA311=m
-CONFIG_DMARD06=m
-CONFIG_DMARD09=m
-CONFIG_DMARD10=m
-CONFIG_HID_SENSOR_ACCEL_3D=m
-CONFIG_IIO_CROS_EC_ACCEL_LEGACY=m
-CONFIG_IIO_ST_ACCEL_3AXIS=m
-CONFIG_IIO_ST_ACCEL_I2C_3AXIS=m
-CONFIG_IIO_ST_ACCEL_SPI_3AXIS=m
-CONFIG_KXSD9=m
-CONFIG_KXSD9_SPI=m
-CONFIG_KXSD9_I2C=m
-CONFIG_KXCJK1013=m
-CONFIG_MC3230=m
-CONFIG_MMA7455=m
-CONFIG_MMA7455_I2C=m
-CONFIG_MMA7455_SPI=m
-CONFIG_MMA7660=m
-CONFIG_MMA8452=m
-CONFIG_MMA9551_CORE=m
-CONFIG_MMA9551=m
-CONFIG_MMA9553=m
-CONFIG_MXC4005=m
-CONFIG_MXC6255=m
-CONFIG_SCA3000=m
-CONFIG_STK8312=m
-CONFIG_STK8BA50=m
-# end of Accelerometers
-
-#
-# Analog to digital converters
-#
-CONFIG_AD_SIGMA_DELTA=m
-CONFIG_AD7091R5=m
-CONFIG_AD7124=m
-CONFIG_AD7192=m
-CONFIG_AD7266=m
-CONFIG_AD7291=m
-CONFIG_AD7292=m
-CONFIG_AD7298=m
-CONFIG_AD7476=m
-CONFIG_AD7606=m
-CONFIG_AD7606_IFACE_PARALLEL=m
-CONFIG_AD7606_IFACE_SPI=m
-CONFIG_AD7766=m
-CONFIG_AD7768_1=m
-CONFIG_AD7780=m
-CONFIG_AD7791=m
-CONFIG_AD7793=m
-CONFIG_AD7887=m
-CONFIG_AD7923=m
-CONFIG_AD7949=m
-CONFIG_AD799X=m
-CONFIG_AD9467=m
-CONFIG_ADI_AXI_ADC=m
-CONFIG_AXP20X_ADC=m
-CONFIG_AXP288_ADC=m
-CONFIG_CC10001_ADC=m
-CONFIG_CPCAP_ADC=m
-CONFIG_DA9150_GPADC=m
-CONFIG_DLN2_ADC=m
-CONFIG_ENVELOPE_DETECTOR=m
-CONFIG_HI8435=m
-CONFIG_HX711=m
-CONFIG_INA2XX_ADC=m
-CONFIG_INTEL_MRFLD_ADC=m
-CONFIG_LP8788_ADC=m
-CONFIG_LTC2471=m
-CONFIG_LTC2485=m
-CONFIG_LTC2496=m
-CONFIG_LTC2497=m
-CONFIG_MAX1027=m
-CONFIG_MAX11100=m
-CONFIG_MAX1118=m
-CONFIG_MAX1241=m
-CONFIG_MAX1363=m
-CONFIG_MAX9611=m
-CONFIG_MCP320X=m
-CONFIG_MCP3422=m
-CONFIG_MCP3911=m
-CONFIG_MEN_Z188_ADC=m
-CONFIG_MP2629_ADC=m
-CONFIG_NAU7802=m
-CONFIG_PALMAS_GPADC=m
-CONFIG_QCOM_VADC_COMMON=m
-CONFIG_QCOM_SPMI_IADC=m
-CONFIG_QCOM_SPMI_VADC=m
-CONFIG_QCOM_SPMI_ADC5=m
-CONFIG_RN5T618_ADC=m
-CONFIG_SD_ADC_MODULATOR=m
-CONFIG_STMPE_ADC=m
-CONFIG_TI_ADC081C=m
-CONFIG_TI_ADC0832=m
-CONFIG_TI_ADC084S021=m
-CONFIG_TI_ADC12138=m
-CONFIG_TI_ADC108S102=m
-CONFIG_TI_ADC128S052=m
-CONFIG_TI_ADC161S626=m
-CONFIG_TI_ADS1015=m
-CONFIG_TI_ADS7950=m
-CONFIG_TI_ADS8344=m
-CONFIG_TI_ADS8688=m
-CONFIG_TI_ADS124S08=m
-CONFIG_TI_AM335X_ADC=m
-CONFIG_TI_TLC4541=m
-CONFIG_TWL4030_MADC=m
-CONFIG_TWL6030_GPADC=m
-CONFIG_VF610_ADC=m
-CONFIG_VIPERBOARD_ADC=m
-CONFIG_XILINX_XADC=m
-# end of Analog to digital converters
-
-#
-# Analog Front Ends
-#
-CONFIG_IIO_RESCALE=m
-# end of Analog Front Ends
-
-#
-# Amplifiers
-#
-CONFIG_AD8366=m
-CONFIG_HMC425=m
-# end of Amplifiers
-
-#
-# Chemical Sensors
-#
-CONFIG_ATLAS_PH_SENSOR=m
-CONFIG_ATLAS_EZO_SENSOR=m
-CONFIG_BME680=m
-CONFIG_BME680_I2C=m
-CONFIG_BME680_SPI=m
-CONFIG_CCS811=m
-CONFIG_IAQCORE=m
-CONFIG_PMS7003=m
-# CONFIG_SCD30_CORE is not set
-CONFIG_SENSIRION_SGP30=m
-CONFIG_SPS30=m
-CONFIG_VZ89X=m
-# end of Chemical Sensors
-
-CONFIG_IIO_CROS_EC_SENSORS_CORE=m
-CONFIG_IIO_CROS_EC_SENSORS=m
-CONFIG_IIO_CROS_EC_SENSORS_LID_ANGLE=m
-
-#
-# Hid Sensor IIO Common
-#
-CONFIG_HID_SENSOR_IIO_COMMON=m
-CONFIG_HID_SENSOR_IIO_TRIGGER=m
-# end of Hid Sensor IIO Common
-
-CONFIG_IIO_MS_SENSORS_I2C=m
-
-#
-# SSP Sensor Common
-#
-CONFIG_IIO_SSP_SENSORS_COMMONS=m
-CONFIG_IIO_SSP_SENSORHUB=m
-# end of SSP Sensor Common
-
-CONFIG_IIO_ST_SENSORS_I2C=m
-CONFIG_IIO_ST_SENSORS_SPI=m
-CONFIG_IIO_ST_SENSORS_CORE=m
-
-#
-# Digital to analog converters
-#
-CONFIG_AD5064=m
-CONFIG_AD5360=m
-CONFIG_AD5380=m
-CONFIG_AD5421=m
-CONFIG_AD5446=m
-CONFIG_AD5449=m
-CONFIG_AD5592R_BASE=m
-CONFIG_AD5592R=m
-CONFIG_AD5593R=m
-CONFIG_AD5504=m
-CONFIG_AD5624R_SPI=m
-CONFIG_AD5686=m
-CONFIG_AD5686_SPI=m
-CONFIG_AD5696_I2C=m
-CONFIG_AD5755=m
-CONFIG_AD5758=m
-CONFIG_AD5761=m
-CONFIG_AD5764=m
-CONFIG_AD5770R=m
-CONFIG_AD5791=m
-CONFIG_AD7303=m
-CONFIG_AD8801=m
-CONFIG_DPOT_DAC=m
-CONFIG_DS4424=m
-CONFIG_LTC1660=m
-CONFIG_LTC2632=m
-CONFIG_M62332=m
-CONFIG_MAX517=m
-CONFIG_MAX5821=m
-CONFIG_MCP4725=m
-CONFIG_MCP4922=m
-CONFIG_TI_DAC082S085=m
-CONFIG_TI_DAC5571=m
-CONFIG_TI_DAC7311=m
-CONFIG_TI_DAC7612=m
-CONFIG_VF610_DAC=m
-# end of Digital to analog converters
-
-#
-# IIO dummy driver
-#
-# CONFIG_IIO_SIMPLE_DUMMY is not set
-# end of IIO dummy driver
-
-#
-# Frequency Synthesizers DDS/PLL
-#
-
-#
-# Clock Generator/Distribution
-#
-CONFIG_AD9523=m
-# end of Clock Generator/Distribution
-
-#
-# Phase-Locked Loop (PLL) frequency synthesizers
-#
-CONFIG_ADF4350=m
-CONFIG_ADF4371=m
-# end of Phase-Locked Loop (PLL) frequency synthesizers
-# end of Frequency Synthesizers DDS/PLL
-
-#
-# Digital gyroscope sensors
-#
-CONFIG_ADIS16080=m
-CONFIG_ADIS16130=m
-CONFIG_ADIS16136=m
-CONFIG_ADIS16260=m
-CONFIG_ADXRS450=m
-CONFIG_BMG160=m
-CONFIG_BMG160_I2C=m
-CONFIG_BMG160_SPI=m
-CONFIG_FXAS21002C=m
-CONFIG_FXAS21002C_I2C=m
-CONFIG_FXAS21002C_SPI=m
-CONFIG_HID_SENSOR_GYRO_3D=m
-CONFIG_MPU3050=m
-CONFIG_MPU3050_I2C=m
-CONFIG_IIO_ST_GYRO_3AXIS=m
-CONFIG_IIO_ST_GYRO_I2C_3AXIS=m
-CONFIG_IIO_ST_GYRO_SPI_3AXIS=m
-CONFIG_ITG3200=m
-# end of Digital gyroscope sensors
-
-#
-# Health Sensors
-#
-
-#
-# Heart Rate Monitors
-#
-CONFIG_AFE4403=m
-CONFIG_AFE4404=m
-CONFIG_MAX30100=m
-CONFIG_MAX30102=m
-# end of Heart Rate Monitors
-# end of Health Sensors
-
-#
-# Humidity sensors
-#
-CONFIG_AM2315=m
-CONFIG_DHT11=m
-CONFIG_HDC100X=m
-CONFIG_HID_SENSOR_HUMIDITY=m
-CONFIG_HTS221=m
-CONFIG_HTS221_I2C=m
-CONFIG_HTS221_SPI=m
-CONFIG_HTU21=m
-CONFIG_SI7005=m
-CONFIG_SI7020=m
-# end of Humidity sensors
-
-#
-# Inertial measurement units
-#
-CONFIG_ADIS16400=m
-CONFIG_ADIS16460=m
-CONFIG_ADIS16475=m
-CONFIG_ADIS16480=m
-CONFIG_BMI160=m
-CONFIG_BMI160_I2C=m
-CONFIG_BMI160_SPI=m
-CONFIG_FXOS8700=m
-CONFIG_FXOS8700_I2C=m
-CONFIG_FXOS8700_SPI=m
-CONFIG_KMX61=m
-CONFIG_INV_ICM42600=m
-CONFIG_INV_ICM42600_I2C=m
-CONFIG_INV_ICM42600_SPI=m
-CONFIG_INV_MPU6050_IIO=m
-CONFIG_INV_MPU6050_I2C=m
-CONFIG_INV_MPU6050_SPI=m
-CONFIG_IIO_ST_LSM6DSX=m
-CONFIG_IIO_ST_LSM6DSX_I2C=m
-CONFIG_IIO_ST_LSM6DSX_SPI=m
-CONFIG_IIO_ST_LSM6DSX_I3C=m
-# end of Inertial measurement units
-
-CONFIG_IIO_ADIS_LIB=m
-CONFIG_IIO_ADIS_LIB_BUFFER=y
-
-#
-# Light sensors
-#
-CONFIG_ACPI_ALS=m
-CONFIG_ADJD_S311=m
-CONFIG_ADUX1020=m
-CONFIG_AL3010=m
-CONFIG_AL3320A=m
-CONFIG_APDS9300=m
-CONFIG_APDS9960=m
-CONFIG_BH1750=m
-CONFIG_BH1780=m
-CONFIG_CM32181=m
-CONFIG_CM3232=m
-CONFIG_CM3323=m
-CONFIG_CM3605=m
-CONFIG_CM36651=m
-CONFIG_IIO_CROS_EC_LIGHT_PROX=m
-CONFIG_GP2AP002=m
-CONFIG_GP2AP020A00F=m
-CONFIG_IQS621_ALS=m
-CONFIG_SENSORS_ISL29018=m
-CONFIG_SENSORS_ISL29028=m
-CONFIG_ISL29125=m
-CONFIG_HID_SENSOR_ALS=m
-CONFIG_HID_SENSOR_PROX=m
-CONFIG_JSA1212=m
-CONFIG_RPR0521=m
-CONFIG_SENSORS_LM3533=m
-CONFIG_LTR501=m
-CONFIG_LV0104CS=m
-CONFIG_MAX44000=m
-CONFIG_MAX44009=m
-CONFIG_NOA1305=m
-CONFIG_OPT3001=m
-CONFIG_PA12203001=m
-CONFIG_SI1133=m
-CONFIG_SI1145=m
-CONFIG_STK3310=m
-CONFIG_ST_UVIS25=m
-CONFIG_ST_UVIS25_I2C=m
-CONFIG_ST_UVIS25_SPI=m
-CONFIG_TCS3414=m
-CONFIG_TCS3472=m
-CONFIG_SENSORS_TSL2563=m
-CONFIG_TSL2583=m
-CONFIG_TSL2772=m
-CONFIG_TSL4531=m
-CONFIG_US5182D=m
-CONFIG_VCNL4000=m
-CONFIG_VCNL4035=m
-CONFIG_VEML6030=m
-CONFIG_VEML6070=m
-CONFIG_VL6180=m
-CONFIG_ZOPT2201=m
-# end of Light sensors
-
-#
-# Magnetometer sensors
-#
-CONFIG_AK8974=m
-CONFIG_AK8975=m
-CONFIG_AK09911=m
-CONFIG_BMC150_MAGN=m
-CONFIG_BMC150_MAGN_I2C=m
-CONFIG_BMC150_MAGN_SPI=m
-CONFIG_MAG3110=m
-CONFIG_HID_SENSOR_MAGNETOMETER_3D=m
-CONFIG_MMC35240=m
-CONFIG_IIO_ST_MAGN_3AXIS=m
-CONFIG_IIO_ST_MAGN_I2C_3AXIS=m
-CONFIG_IIO_ST_MAGN_SPI_3AXIS=m
-CONFIG_SENSORS_HMC5843=m
-CONFIG_SENSORS_HMC5843_I2C=m
-CONFIG_SENSORS_HMC5843_SPI=m
-CONFIG_SENSORS_RM3100=m
-CONFIG_SENSORS_RM3100_I2C=m
-CONFIG_SENSORS_RM3100_SPI=m
-# end of Magnetometer sensors
-
-#
-# Multiplexers
-#
-CONFIG_IIO_MUX=m
-# end of Multiplexers
-
-#
-# Inclinometer sensors
-#
-CONFIG_HID_SENSOR_INCLINOMETER_3D=m
-CONFIG_HID_SENSOR_DEVICE_ROTATION=m
-# end of Inclinometer sensors
-
-#
-# Triggers - standalone
-#
-CONFIG_IIO_HRTIMER_TRIGGER=m
-CONFIG_IIO_INTERRUPT_TRIGGER=m
-CONFIG_IIO_TIGHTLOOP_TRIGGER=m
-CONFIG_IIO_SYSFS_TRIGGER=m
-# end of Triggers - standalone
-
-#
-# Linear and angular position sensors
-#
-CONFIG_IQS624_POS=m
-# end of Linear and angular position sensors
-
-#
-# Digital potentiometers
-#
-CONFIG_AD5272=m
-CONFIG_DS1803=m
-CONFIG_MAX5432=m
-CONFIG_MAX5481=m
-CONFIG_MAX5487=m
-CONFIG_MCP4018=m
-CONFIG_MCP4131=m
-CONFIG_MCP4531=m
-CONFIG_MCP41010=m
-CONFIG_TPL0102=m
-# end of Digital potentiometers
-
-#
-# Digital potentiostats
-#
-CONFIG_LMP91000=m
-# end of Digital potentiostats
-
-#
-# Pressure sensors
-#
-CONFIG_ABP060MG=m
-CONFIG_BMP280=m
-CONFIG_BMP280_I2C=m
-CONFIG_BMP280_SPI=m
-CONFIG_IIO_CROS_EC_BARO=m
-CONFIG_DLHL60D=m
-CONFIG_DPS310=m
-CONFIG_HID_SENSOR_PRESS=m
-CONFIG_HP03=m
-CONFIG_ICP10100=m
-CONFIG_MPL115=m
-CONFIG_MPL115_I2C=m
-CONFIG_MPL115_SPI=m
-CONFIG_MPL3115=m
-CONFIG_MS5611=m
-CONFIG_MS5611_I2C=m
-CONFIG_MS5611_SPI=m
-CONFIG_MS5637=m
-CONFIG_IIO_ST_PRESS=m
-CONFIG_IIO_ST_PRESS_I2C=m
-CONFIG_IIO_ST_PRESS_SPI=m
-CONFIG_T5403=m
-CONFIG_HP206C=m
-CONFIG_ZPA2326=m
-CONFIG_ZPA2326_I2C=m
-CONFIG_ZPA2326_SPI=m
-# end of Pressure sensors
-
-#
-# Lightning sensors
-#
-CONFIG_AS3935=m
-# end of Lightning sensors
-
-#
-# Proximity and distance sensors
-#
-CONFIG_ISL29501=m
-CONFIG_LIDAR_LITE_V2=m
-CONFIG_MB1232=m
-CONFIG_PING=m
-CONFIG_RFD77402=m
-CONFIG_SRF04=m
-CONFIG_SX9310=m
-CONFIG_SX9500=m
-CONFIG_SRF08=m
-CONFIG_VCNL3020=m
-CONFIG_VL53L0X_I2C=m
-# end of Proximity and distance sensors
-
-#
-# Resolver to digital converters
-#
-CONFIG_AD2S90=m
-CONFIG_AD2S1200=m
-# end of Resolver to digital converters
-
-#
-# Temperature sensors
-#
-CONFIG_IQS620AT_TEMP=m
-CONFIG_LTC2983=m
-CONFIG_MAXIM_THERMOCOUPLE=m
-CONFIG_HID_SENSOR_TEMP=m
-CONFIG_MLX90614=m
-CONFIG_MLX90632=m
-CONFIG_TMP006=m
-CONFIG_TMP007=m
-CONFIG_TSYS01=m
-CONFIG_TSYS02D=m
-CONFIG_MAX31856=m
-# end of Temperature sensors
-
-CONFIG_NTB=m
-CONFIG_NTB_MSI=y
-CONFIG_NTB_AMD=m
-CONFIG_NTB_IDT=m
-CONFIG_NTB_INTEL=m
-CONFIG_NTB_SWITCHTEC=m
-# CONFIG_NTB_PINGPONG is not set
-# CONFIG_NTB_TOOL is not set
-# CONFIG_NTB_PERF is not set
-# CONFIG_NTB_MSI_TEST is not set
-CONFIG_NTB_TRANSPORT=m
-CONFIG_VME_BUS=y
-
-#
-# VME Bridge Drivers
-#
-CONFIG_VME_CA91CX42=m
-CONFIG_VME_TSI148=m
-# CONFIG_VME_FAKE is not set
-
-#
-# VME Board Drivers
-#
-CONFIG_VMIVME_7805=m
-
-#
-# VME Device Drivers
-#
-CONFIG_VME_USER=m
-CONFIG_PWM=y
-CONFIG_PWM_SYSFS=y
-# CONFIG_PWM_DEBUG is not set
-CONFIG_PWM_ATMEL_HLCDC_PWM=m
-CONFIG_PWM_CRC=y
-CONFIG_PWM_CROS_EC=m
-CONFIG_PWM_FSL_FTM=m
-CONFIG_PWM_IQS620A=m
-CONFIG_PWM_LP3943=m
-CONFIG_PWM_LPSS=m
-CONFIG_PWM_LPSS_PCI=m
-CONFIG_PWM_LPSS_PLATFORM=m
-CONFIG_PWM_PCA9685=m
-CONFIG_PWM_STMPE=y
-CONFIG_PWM_TWL=m
-CONFIG_PWM_TWL_LED=m
-
-#
-# IRQ chip support
-#
-CONFIG_IRQCHIP=y
-CONFIG_AL_FIC=y
-CONFIG_MADERA_IRQ=m
-# end of IRQ chip support
-
-CONFIG_IPACK_BUS=m
-CONFIG_BOARD_TPCI200=m
-CONFIG_SERIAL_IPOCTAL=m
-CONFIG_RESET_CONTROLLER=y
-CONFIG_RESET_BRCMSTB_RESCAL=y
-CONFIG_RESET_INTEL_GW=y
-CONFIG_RESET_TI_SYSCON=m
-
-#
-# PHY Subsystem
-#
-CONFIG_GENERIC_PHY=y
-CONFIG_GENERIC_PHY_MIPI_DPHY=y
-CONFIG_BCM_KONA_USB2_PHY=m
-CONFIG_PHY_CADENCE_TORRENT=m
-CONFIG_PHY_CADENCE_DPHY=m
-CONFIG_PHY_CADENCE_SIERRA=m
-CONFIG_PHY_CADENCE_SALVO=m
-CONFIG_PHY_FSL_IMX8MQ_USB=m
-CONFIG_PHY_MIXEL_MIPI_DPHY=m
-CONFIG_PHY_PXA_28NM_HSIC=m
-CONFIG_PHY_PXA_28NM_USB2=m
-CONFIG_PHY_CPCAP_USB=m
-CONFIG_PHY_MAPPHONE_MDM6600=m
-CONFIG_PHY_OCELOT_SERDES=m
-CONFIG_PHY_QCOM_USB_HS=m
-CONFIG_PHY_QCOM_USB_HSIC=m
-CONFIG_PHY_SAMSUNG_USB2=m
-CONFIG_PHY_TUSB1210=m
-CONFIG_PHY_INTEL_COMBO=y
-CONFIG_PHY_INTEL_EMMC=m
-# end of PHY Subsystem
-
-CONFIG_POWERCAP=y
-CONFIG_INTEL_RAPL_CORE=m
-CONFIG_INTEL_RAPL=m
-CONFIG_IDLE_INJECT=y
-CONFIG_MCB=m
-CONFIG_MCB_PCI=m
-CONFIG_MCB_LPC=m
-
-#
-# Performance monitor support
-#
-# end of Performance monitor support
-
-CONFIG_RAS=y
-CONFIG_RAS_CEC=y
-# CONFIG_RAS_CEC_DEBUG is not set
-CONFIG_USB4=m
-
-#
-# Android
-#
-# CONFIG_ANDROID is not set
-# end of Android
-
-CONFIG_LIBNVDIMM=y
-CONFIG_BLK_DEV_PMEM=m
-CONFIG_ND_BLK=m
-CONFIG_ND_CLAIM=y
-CONFIG_ND_BTT=m
-CONFIG_BTT=y
-CONFIG_ND_PFN=m
-CONFIG_NVDIMM_PFN=y
-CONFIG_NVDIMM_DAX=y
-CONFIG_OF_PMEM=m
-CONFIG_DAX_DRIVER=y
-CONFIG_DAX=y
-CONFIG_DEV_DAX=m
-CONFIG_DEV_DAX_PMEM=m
-CONFIG_DEV_DAX_HMEM=m
-CONFIG_DEV_DAX_KMEM=m
-CONFIG_DEV_DAX_PMEM_COMPAT=m
-CONFIG_NVMEM=y
-CONFIG_NVMEM_SYSFS=y
-CONFIG_NVMEM_SPMI_SDAM=m
-CONFIG_RAVE_SP_EEPROM=m
-
-#
-# HW tracing support
-#
-CONFIG_STM=m
-CONFIG_STM_PROTO_BASIC=m
-CONFIG_STM_PROTO_SYS_T=m
-# CONFIG_STM_DUMMY is not set
-CONFIG_STM_SOURCE_CONSOLE=m
-CONFIG_STM_SOURCE_HEARTBEAT=m
-CONFIG_STM_SOURCE_FTRACE=m
-CONFIG_INTEL_TH=m
-CONFIG_INTEL_TH_PCI=m
-CONFIG_INTEL_TH_ACPI=m
-CONFIG_INTEL_TH_GTH=m
-CONFIG_INTEL_TH_STH=m
-CONFIG_INTEL_TH_MSU=m
-CONFIG_INTEL_TH_PTI=m
-# CONFIG_INTEL_TH_DEBUG is not set
-# end of HW tracing support
-
-CONFIG_FPGA=m
-CONFIG_ALTERA_PR_IP_CORE=m
-CONFIG_ALTERA_PR_IP_CORE_PLAT=m
-CONFIG_FPGA_MGR_ALTERA_PS_SPI=m
-CONFIG_FPGA_MGR_ALTERA_CVP=m
-CONFIG_FPGA_MGR_XILINX_SPI=m
-CONFIG_FPGA_MGR_ICE40_SPI=m
-CONFIG_FPGA_MGR_MACHXO2_SPI=m
-CONFIG_FPGA_BRIDGE=m
-CONFIG_ALTERA_FREEZE_BRIDGE=m
-CONFIG_XILINX_PR_DECOUPLER=m
-CONFIG_FPGA_REGION=m
-CONFIG_OF_FPGA_REGION=m
-CONFIG_FPGA_DFL=m
-CONFIG_FPGA_DFL_FME=m
-CONFIG_FPGA_DFL_FME_MGR=m
-CONFIG_FPGA_DFL_FME_BRIDGE=m
-CONFIG_FPGA_DFL_FME_REGION=m
-CONFIG_FPGA_DFL_AFU=m
-CONFIG_FPGA_DFL_PCI=m
-CONFIG_FSI=m
-CONFIG_FSI_NEW_DEV_NODE=y
-CONFIG_FSI_MASTER_GPIO=m
-CONFIG_FSI_MASTER_HUB=m
-CONFIG_FSI_MASTER_ASPEED=m
-CONFIG_FSI_SCOM=m
-CONFIG_FSI_SBEFIFO=m
-CONFIG_FSI_OCC=m
-CONFIG_TEE=m
-
-#
-# TEE drivers
-#
-CONFIG_AMDTEE=m
-# end of TEE drivers
-
-CONFIG_MULTIPLEXER=m
-
-#
-# Multiplexer drivers
-#
-CONFIG_MUX_ADG792A=m
-CONFIG_MUX_ADGS1408=m
-CONFIG_MUX_GPIO=m
-CONFIG_MUX_MMIO=m
-# end of Multiplexer drivers
-
-CONFIG_PM_OPP=y
-CONFIG_UNISYS_VISORBUS=m
-CONFIG_SIOX=m
-CONFIG_SIOX_BUS_GPIO=m
-CONFIG_SLIMBUS=m
-CONFIG_SLIM_QCOM_CTRL=m
-CONFIG_INTERCONNECT=y
-CONFIG_COUNTER=m
-CONFIG_FTM_QUADDEC=m
-CONFIG_MICROCHIP_TCB_CAPTURE=m
-CONFIG_MOST=m
-# CONFIG_MOST_USB_HDM is not set
-# end of Device Drivers
-
-#
-# File systems
-#
-CONFIG_DCACHE_WORD_ACCESS=y
-CONFIG_VALIDATE_FS_PARSER=y
-CONFIG_FS_IOMAP=y
-# CONFIG_EXT2_FS is not set
-# CONFIG_EXT3_FS is not set
-CONFIG_EXT4_FS=m
-CONFIG_EXT4_USE_FOR_EXT2=y
-CONFIG_EXT4_FS_POSIX_ACL=y
-CONFIG_EXT4_FS_SECURITY=y
-# CONFIG_EXT4_DEBUG is not set
-CONFIG_JBD2=m
-# CONFIG_JBD2_DEBUG is not set
-CONFIG_FS_MBCACHE=m
-CONFIG_REISERFS_FS=m
-# CONFIG_REISERFS_CHECK is not set
-CONFIG_REISERFS_PROC_INFO=y
-CONFIG_REISERFS_FS_XATTR=y
-CONFIG_REISERFS_FS_POSIX_ACL=y
-CONFIG_REISERFS_FS_SECURITY=y
-CONFIG_JFS_FS=m
-CONFIG_JFS_POSIX_ACL=y
-CONFIG_JFS_SECURITY=y
-# CONFIG_JFS_DEBUG is not set
-CONFIG_JFS_STATISTICS=y
-CONFIG_XFS_FS=m
-CONFIG_XFS_QUOTA=y
-CONFIG_XFS_POSIX_ACL=y
-CONFIG_XFS_RT=y
-CONFIG_XFS_ONLINE_SCRUB=y
-CONFIG_XFS_ONLINE_REPAIR=y
-# CONFIG_XFS_WARN is not set
-# CONFIG_XFS_DEBUG is not set
-CONFIG_GFS2_FS=m
-CONFIG_GFS2_FS_LOCKING_DLM=y
-CONFIG_OCFS2_FS=m
-CONFIG_OCFS2_FS_O2CB=m
-CONFIG_OCFS2_FS_USERSPACE_CLUSTER=m
-CONFIG_OCFS2_FS_STATS=y
-CONFIG_OCFS2_DEBUG_MASKLOG=y
-# CONFIG_OCFS2_DEBUG_FS is not set
-CONFIG_BTRFS_FS=m
-CONFIG_BTRFS_FS_POSIX_ACL=y
-# CONFIG_BTRFS_FS_CHECK_INTEGRITY is not set
-# CONFIG_BTRFS_FS_RUN_SANITY_TESTS is not set
-# CONFIG_BTRFS_DEBUG is not set
-# CONFIG_BTRFS_ASSERT is not set
-# CONFIG_BTRFS_FS_REF_VERIFY is not set
-CONFIG_NILFS2_FS=m
-CONFIG_F2FS_FS=m
-CONFIG_F2FS_STAT_FS=y
-CONFIG_F2FS_FS_XATTR=y
-CONFIG_F2FS_FS_POSIX_ACL=y
-CONFIG_F2FS_FS_SECURITY=y
-CONFIG_F2FS_CHECK_FS=y
-# CONFIG_F2FS_IO_TRACE is not set
-# CONFIG_F2FS_FAULT_INJECTION is not set
-CONFIG_F2FS_FS_COMPRESSION=y
-CONFIG_F2FS_FS_LZO=y
-CONFIG_F2FS_FS_LZ4=y
-CONFIG_F2FS_FS_ZSTD=y
-CONFIG_F2FS_FS_LZORLE=y
-CONFIG_ZONEFS_FS=m
-CONFIG_FS_DAX=y
-CONFIG_FS_DAX_PMD=y
-CONFIG_FS_POSIX_ACL=y
-CONFIG_EXPORTFS=y
-CONFIG_EXPORTFS_BLOCK_OPS=y
-CONFIG_FILE_LOCKING=y
-# CONFIG_MANDATORY_FILE_LOCKING is not set
-CONFIG_FS_ENCRYPTION=y
-CONFIG_FS_ENCRYPTION_ALGS=m
-# CONFIG_FS_ENCRYPTION_INLINE_CRYPT is not set
-CONFIG_FS_VERITY=y
-# CONFIG_FS_VERITY_DEBUG is not set
-CONFIG_FS_VERITY_BUILTIN_SIGNATURES=y
-CONFIG_FSNOTIFY=y
-CONFIG_DNOTIFY=y
-CONFIG_INOTIFY_USER=y
-CONFIG_FANOTIFY=y
-CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
-CONFIG_QUOTA=y
-CONFIG_QUOTA_NETLINK_INTERFACE=y
-# CONFIG_PRINT_QUOTA_WARNING is not set
-# CONFIG_QUOTA_DEBUG is not set
-CONFIG_QUOTA_TREE=m
-CONFIG_QFMT_V1=m
-CONFIG_QFMT_V2=m
-CONFIG_QUOTACTL=y
-CONFIG_QUOTACTL_COMPAT=y
-CONFIG_AUTOFS4_FS=y
-CONFIG_AUTOFS_FS=y
-CONFIG_FUSE_FS=m
-CONFIG_CUSE=m
-CONFIG_VIRTIO_FS=m
-CONFIG_OVERLAY_FS=m
-CONFIG_OVERLAY_FS_REDIRECT_DIR=y
-# CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW is not set
-CONFIG_OVERLAY_FS_INDEX=y
-CONFIG_OVERLAY_FS_XINO_AUTO=y
-CONFIG_OVERLAY_FS_METACOPY=y
-
-#
-# Caches
-#
-CONFIG_FSCACHE=m
-CONFIG_FSCACHE_STATS=y
-CONFIG_FSCACHE_HISTOGRAM=y
-# CONFIG_FSCACHE_DEBUG is not set
-# CONFIG_FSCACHE_OBJECT_LIST is not set
-CONFIG_CACHEFILES=m
-# CONFIG_CACHEFILES_DEBUG is not set
-# CONFIG_CACHEFILES_HISTOGRAM is not set
-# end of Caches
-
-#
-# CD-ROM/DVD Filesystems
-#
-CONFIG_ISO9660_FS=m
-CONFIG_JOLIET=y
-CONFIG_ZISOFS=y
-CONFIG_UDF_FS=m
-# end of CD-ROM/DVD Filesystems
-
-#
-# DOS/FAT/EXFAT/NT Filesystems
-#
-CONFIG_FAT_FS=m
-CONFIG_MSDOS_FS=m
-CONFIG_VFAT_FS=m
-CONFIG_FAT_DEFAULT_CODEPAGE=437
-CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
-CONFIG_FAT_DEFAULT_UTF8=y
-CONFIG_EXFAT_FS=m
-CONFIG_EXFAT_DEFAULT_IOCHARSET="utf8"
-CONFIG_NTFS_FS=m
-# CONFIG_NTFS_DEBUG is not set
-CONFIG_NTFS_RW=y
-# end of DOS/FAT/EXFAT/NT Filesystems
-
-#
-# Pseudo filesystems
-#
-CONFIG_PROC_FS=y
-CONFIG_PROC_KCORE=y
-CONFIG_PROC_VMCORE=y
-CONFIG_PROC_VMCORE_DEVICE_DUMP=y
-CONFIG_PROC_SYSCTL=y
-CONFIG_PROC_PAGE_MONITOR=y
-CONFIG_PROC_CHILDREN=y
-CONFIG_PROC_PID_ARCH_STATUS=y
-CONFIG_PROC_CPU_RESCTRL=y
-CONFIG_KERNFS=y
-CONFIG_SYSFS=y
-CONFIG_TMPFS=y
-CONFIG_TMPFS_POSIX_ACL=y
-CONFIG_TMPFS_XATTR=y
-# CONFIG_TMPFS_INODE64 is not set
-CONFIG_HUGETLBFS=y
-CONFIG_HUGETLB_PAGE=y
-CONFIG_MEMFD_CREATE=y
-CONFIG_ARCH_HAS_GIGANTIC_PAGE=y
-CONFIG_CONFIGFS_FS=y
-CONFIG_EFIVAR_FS=y
-# end of Pseudo filesystems
-
-CONFIG_MISC_FILESYSTEMS=y
-CONFIG_ORANGEFS_FS=m
-# CONFIG_ADFS_FS is not set
-CONFIG_AFFS_FS=m
-CONFIG_ECRYPT_FS=m
-# CONFIG_ECRYPT_FS_MESSAGING is not set
-CONFIG_HFS_FS=m
-CONFIG_HFSPLUS_FS=m
-CONFIG_BEFS_FS=m
-# CONFIG_BEFS_DEBUG is not set
-# CONFIG_BFS_FS is not set
-# CONFIG_EFS_FS is not set
-CONFIG_JFFS2_FS=m
-CONFIG_JFFS2_FS_DEBUG=0
-CONFIG_JFFS2_FS_WRITEBUFFER=y
-# CONFIG_JFFS2_FS_WBUF_VERIFY is not set
-CONFIG_JFFS2_SUMMARY=y
-CONFIG_JFFS2_FS_XATTR=y
-CONFIG_JFFS2_FS_POSIX_ACL=y
-CONFIG_JFFS2_FS_SECURITY=y
-# CONFIG_JFFS2_COMPRESSION_OPTIONS is not set
-CONFIG_JFFS2_ZLIB=y
-CONFIG_JFFS2_RTIME=y
-CONFIG_UBIFS_FS=m
-# CONFIG_UBIFS_FS_ADVANCED_COMPR is not set
-CONFIG_UBIFS_FS_LZO=y
-CONFIG_UBIFS_FS_ZLIB=y
-CONFIG_UBIFS_FS_ZSTD=y
-CONFIG_UBIFS_ATIME_SUPPORT=y
-CONFIG_UBIFS_FS_XATTR=y
-CONFIG_UBIFS_FS_SECURITY=y
-CONFIG_UBIFS_FS_AUTHENTICATION=y
-CONFIG_CRAMFS=m
-CONFIG_CRAMFS_BLOCKDEV=y
-CONFIG_CRAMFS_MTD=y
-CONFIG_SQUASHFS=m
-# CONFIG_SQUASHFS_FILE_CACHE is not set
-CONFIG_SQUASHFS_FILE_DIRECT=y
-# CONFIG_SQUASHFS_DECOMP_SINGLE is not set
-CONFIG_SQUASHFS_DECOMP_MULTI=y
-# CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU is not set
-CONFIG_SQUASHFS_XATTR=y
-CONFIG_SQUASHFS_ZLIB=y
-CONFIG_SQUASHFS_LZ4=y
-CONFIG_SQUASHFS_LZO=y
-CONFIG_SQUASHFS_XZ=y
-CONFIG_SQUASHFS_ZSTD=y
-# CONFIG_SQUASHFS_4K_DEVBLK_SIZE is not set
-# CONFIG_SQUASHFS_EMBEDDED is not set
-CONFIG_SQUASHFS_FRAGMENT_CACHE_SIZE=3
-# CONFIG_VXFS_FS is not set
-CONFIG_MINIX_FS=m
-CONFIG_OMFS_FS=m
-# CONFIG_HPFS_FS is not set
-# CONFIG_QNX4FS_FS is not set
-# CONFIG_QNX6FS_FS is not set
-CONFIG_ROMFS_FS=m
-CONFIG_ROMFS_BACKED_BY_BLOCK=y
-# CONFIG_ROMFS_BACKED_BY_MTD is not set
-# CONFIG_ROMFS_BACKED_BY_BOTH is not set
-CONFIG_ROMFS_ON_BLOCK=y
-CONFIG_PSTORE=y
-CONFIG_PSTORE_DEFLATE_COMPRESS=m
-CONFIG_PSTORE_LZO_COMPRESS=m
-CONFIG_PSTORE_LZ4_COMPRESS=m
-CONFIG_PSTORE_LZ4HC_COMPRESS=m
-# CONFIG_PSTORE_842_COMPRESS is not set
-CONFIG_PSTORE_ZSTD_COMPRESS=y
-CONFIG_PSTORE_COMPRESS=y
-# CONFIG_PSTORE_DEFLATE_COMPRESS_DEFAULT is not set
-# CONFIG_PSTORE_LZO_COMPRESS_DEFAULT is not set
-# CONFIG_PSTORE_LZ4_COMPRESS_DEFAULT is not set
-# CONFIG_PSTORE_LZ4HC_COMPRESS_DEFAULT is not set
-CONFIG_PSTORE_ZSTD_COMPRESS_DEFAULT=y
-CONFIG_PSTORE_COMPRESS_DEFAULT="zstd"
-# CONFIG_PSTORE_CONSOLE is not set
-# CONFIG_PSTORE_PMSG is not set
-# CONFIG_PSTORE_FTRACE is not set
-CONFIG_PSTORE_RAM=y
-CONFIG_PSTORE_ZONE=m
-CONFIG_PSTORE_BLK=m
-CONFIG_PSTORE_BLK_BLKDEV=""
-CONFIG_PSTORE_BLK_KMSG_SIZE=64
-CONFIG_PSTORE_BLK_MAX_REASON=2
-# CONFIG_SYSV_FS is not set
-CONFIG_UFS_FS=m
-# CONFIG_UFS_FS_WRITE is not set
-# CONFIG_UFS_DEBUG is not set
-CONFIG_EROFS_FS=m
-# CONFIG_EROFS_FS_DEBUG is not set
-CONFIG_EROFS_FS_XATTR=y
-CONFIG_EROFS_FS_POSIX_ACL=y
-CONFIG_EROFS_FS_SECURITY=y
-CONFIG_EROFS_FS_ZIP=y
-CONFIG_EROFS_FS_CLUSTER_PAGE_LIMIT=2
-CONFIG_VBOXSF_FS=m
-CONFIG_NETWORK_FILESYSTEMS=y
-CONFIG_NFS_FS=m
-CONFIG_NFS_V2=m
-CONFIG_NFS_V3=m
-CONFIG_NFS_V3_ACL=y
-CONFIG_NFS_V4=m
-CONFIG_NFS_SWAP=y
-CONFIG_NFS_V4_1=y
-CONFIG_NFS_V4_2=y
-CONFIG_PNFS_FILE_LAYOUT=m
-CONFIG_PNFS_BLOCK=m
-CONFIG_PNFS_FLEXFILE_LAYOUT=m
-CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN="kernel.org"
-CONFIG_NFS_V4_1_MIGRATION=y
-CONFIG_NFS_V4_SECURITY_LABEL=y
-CONFIG_NFS_FSCACHE=y
-# CONFIG_NFS_USE_LEGACY_DNS is not set
-CONFIG_NFS_USE_KERNEL_DNS=y
-CONFIG_NFS_DEBUG=y
-# CONFIG_NFS_DISABLE_UDP_SUPPORT is not set
-CONFIG_NFSD=m
-CONFIG_NFSD_V2_ACL=y
-CONFIG_NFSD_V3=y
-CONFIG_NFSD_V3_ACL=y
-CONFIG_NFSD_V4=y
-CONFIG_NFSD_PNFS=y
-CONFIG_NFSD_BLOCKLAYOUT=y
-CONFIG_NFSD_SCSILAYOUT=y
-# CONFIG_NFSD_FLEXFILELAYOUT is not set
-CONFIG_NFSD_V4_SECURITY_LABEL=y
-CONFIG_GRACE_PERIOD=m
-CONFIG_LOCKD=m
-CONFIG_LOCKD_V4=y
-CONFIG_NFS_ACL_SUPPORT=m
-CONFIG_NFS_COMMON=y
-CONFIG_SUNRPC=m
-CONFIG_SUNRPC_GSS=m
-CONFIG_SUNRPC_BACKCHANNEL=y
-CONFIG_SUNRPC_SWAP=y
-CONFIG_RPCSEC_GSS_KRB5=m
-CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES=y
-CONFIG_SUNRPC_DEBUG=y
-CONFIG_SUNRPC_XPRT_RDMA=m
-CONFIG_CEPH_FS=m
-CONFIG_CEPH_FSCACHE=y
-CONFIG_CEPH_FS_POSIX_ACL=y
-CONFIG_CEPH_FS_SECURITY_LABEL=y
-CONFIG_CIFS=m
-# CONFIG_CIFS_STATS2 is not set
-CONFIG_CIFS_ALLOW_INSECURE_LEGACY=y
-# CONFIG_CIFS_WEAK_PW_HASH is not set
-CONFIG_CIFS_UPCALL=y
-CONFIG_CIFS_XATTR=y
-CONFIG_CIFS_POSIX=y
-CONFIG_CIFS_DEBUG=y
-# CONFIG_CIFS_DEBUG2 is not set
-# CONFIG_CIFS_DEBUG_DUMP_KEYS is not set
-CONFIG_CIFS_DFS_UPCALL=y
-# CONFIG_CIFS_SMB_DIRECT is not set
-CONFIG_CIFS_FSCACHE=y
-CONFIG_CODA_FS=m
-CONFIG_AFS_FS=m
-# CONFIG_AFS_DEBUG is not set
-CONFIG_AFS_FSCACHE=y
-# CONFIG_AFS_DEBUG_CURSOR is not set
-CONFIG_9P_FS=m
-CONFIG_9P_FSCACHE=y
-CONFIG_9P_FS_POSIX_ACL=y
-CONFIG_9P_FS_SECURITY=y
-CONFIG_NLS=y
-CONFIG_NLS_DEFAULT="utf8"
-CONFIG_NLS_CODEPAGE_437=m
-CONFIG_NLS_CODEPAGE_737=m
-CONFIG_NLS_CODEPAGE_775=m
-CONFIG_NLS_CODEPAGE_850=m
-CONFIG_NLS_CODEPAGE_852=m
-CONFIG_NLS_CODEPAGE_855=m
-CONFIG_NLS_CODEPAGE_857=m
-CONFIG_NLS_CODEPAGE_860=m
-CONFIG_NLS_CODEPAGE_861=m
-CONFIG_NLS_CODEPAGE_862=m
-CONFIG_NLS_CODEPAGE_863=m
-CONFIG_NLS_CODEPAGE_864=m
-CONFIG_NLS_CODEPAGE_865=m
-CONFIG_NLS_CODEPAGE_866=m
-CONFIG_NLS_CODEPAGE_869=m
-CONFIG_NLS_CODEPAGE_936=m
-CONFIG_NLS_CODEPAGE_950=m
-CONFIG_NLS_CODEPAGE_932=m
-CONFIG_NLS_CODEPAGE_949=m
-CONFIG_NLS_CODEPAGE_874=m
-CONFIG_NLS_ISO8859_8=m
-CONFIG_NLS_CODEPAGE_1250=m
-CONFIG_NLS_CODEPAGE_1251=m
-CONFIG_NLS_ASCII=m
-CONFIG_NLS_ISO8859_1=m
-CONFIG_NLS_ISO8859_2=m
-CONFIG_NLS_ISO8859_3=m
-CONFIG_NLS_ISO8859_4=m
-CONFIG_NLS_ISO8859_5=m
-CONFIG_NLS_ISO8859_6=m
-CONFIG_NLS_ISO8859_7=m
-CONFIG_NLS_ISO8859_9=m
-CONFIG_NLS_ISO8859_13=m
-CONFIG_NLS_ISO8859_14=m
-CONFIG_NLS_ISO8859_15=m
-CONFIG_NLS_KOI8_R=m
-CONFIG_NLS_KOI8_U=m
-CONFIG_NLS_MAC_ROMAN=m
-CONFIG_NLS_MAC_CELTIC=m
-CONFIG_NLS_MAC_CENTEURO=m
-CONFIG_NLS_MAC_CROATIAN=m
-CONFIG_NLS_MAC_CYRILLIC=m
-CONFIG_NLS_MAC_GAELIC=m
-CONFIG_NLS_MAC_GREEK=m
-CONFIG_NLS_MAC_ICELAND=m
-CONFIG_NLS_MAC_INUIT=m
-CONFIG_NLS_MAC_ROMANIAN=m
-CONFIG_NLS_MAC_TURKISH=m
-CONFIG_NLS_UTF8=m
-CONFIG_DLM=m
-# CONFIG_DLM_DEBUG is not set
-CONFIG_UNICODE=y
-# CONFIG_UNICODE_NORMALIZATION_SELFTEST is not set
-CONFIG_IO_WQ=y
-# end of File systems
-
-#
-# Security options
-#
-CONFIG_KEYS=y
-CONFIG_KEYS_REQUEST_CACHE=y
-CONFIG_PERSISTENT_KEYRINGS=y
-CONFIG_TRUSTED_KEYS=m
-CONFIG_ENCRYPTED_KEYS=m
-CONFIG_KEY_DH_OPERATIONS=y
-CONFIG_KEY_NOTIFICATIONS=y
-# CONFIG_SECURITY_DMESG_RESTRICT is not set
-CONFIG_SECURITY=y
-CONFIG_SECURITYFS=y
-CONFIG_SECURITY_NETWORK=y
-CONFIG_PAGE_TABLE_ISOLATION=y
-CONFIG_SECURITY_INFINIBAND=y
-CONFIG_SECURITY_NETWORK_XFRM=y
-CONFIG_SECURITY_PATH=y
-# CONFIG_INTEL_TXT is not set
-CONFIG_LSM_MMAP_MIN_ADDR=65536
-CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR=y
-CONFIG_HARDENED_USERCOPY=y
-CONFIG_HARDENED_USERCOPY_FALLBACK=y
-# CONFIG_HARDENED_USERCOPY_PAGESPAN is not set
-CONFIG_FORTIFY_SOURCE=y
-# CONFIG_STATIC_USERMODEHELPER is not set
-CONFIG_SECURITY_SELINUX=y
-CONFIG_SECURITY_SELINUX_BOOTPARAM=y
-# CONFIG_SECURITY_SELINUX_DISABLE is not set
-CONFIG_SECURITY_SELINUX_DEVELOP=y
-CONFIG_SECURITY_SELINUX_AVC_STATS=y
-CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=0
-CONFIG_SECURITY_SELINUX_SIDTAB_HASH_BITS=9
-CONFIG_SECURITY_SELINUX_SID2STR_CACHE_SIZE=256
-CONFIG_SECURITY_SMACK=y
-CONFIG_SECURITY_SMACK_BRINGUP=y
-CONFIG_SECURITY_SMACK_NETFILTER=y
-CONFIG_SECURITY_SMACK_APPEND_SIGNALS=y
-CONFIG_SECURITY_TOMOYO=y
-CONFIG_SECURITY_TOMOYO_MAX_ACCEPT_ENTRY=2048
-CONFIG_SECURITY_TOMOYO_MAX_AUDIT_LOG=1024
-# CONFIG_SECURITY_TOMOYO_OMIT_USERSPACE_LOADER is not set
-CONFIG_SECURITY_TOMOYO_POLICY_LOADER="/sbin/tomoyo-init"
-CONFIG_SECURITY_TOMOYO_ACTIVATION_TRIGGER="/sbin/init"
-# CONFIG_SECURITY_TOMOYO_INSECURE_BUILTIN_SETTING is not set
-CONFIG_SECURITY_APPARMOR=y
-CONFIG_SECURITY_APPARMOR_HASH=y
-CONFIG_SECURITY_APPARMOR_HASH_DEFAULT=y
-# CONFIG_SECURITY_APPARMOR_DEBUG is not set
-# CONFIG_SECURITY_LOADPIN is not set
-CONFIG_SECURITY_YAMA=y
-CONFIG_SECURITY_SAFESETID=y
-CONFIG_SECURITY_LOCKDOWN_LSM=y
-# CONFIG_SECURITY_LOCKDOWN_LSM_EARLY is not set
-CONFIG_LOCK_DOWN_KERNEL_FORCE_NONE=y
-# CONFIG_LOCK_DOWN_KERNEL_FORCE_INTEGRITY is not set
-# CONFIG_LOCK_DOWN_KERNEL_FORCE_CONFIDENTIALITY is not set
-# CONFIG_INTEGRITY is not set
-# CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT is not set
-# CONFIG_DEFAULT_SECURITY_SELINUX is not set
-# CONFIG_DEFAULT_SECURITY_SMACK is not set
-# CONFIG_DEFAULT_SECURITY_TOMOYO is not set
-# CONFIG_DEFAULT_SECURITY_APPARMOR is not set
-CONFIG_DEFAULT_SECURITY_DAC=y
-CONFIG_LSM="lockdown,yama"
-
-#
-# Kernel hardening options
-#
-CONFIG_GCC_PLUGIN_STRUCTLEAK=y
-
-#
-# Memory initialization
-#
-# CONFIG_INIT_STACK_NONE is not set
-# CONFIG_GCC_PLUGIN_STRUCTLEAK_USER is not set
-# CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF is not set
-CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL=y
-# CONFIG_GCC_PLUGIN_STRUCTLEAK_VERBOSE is not set
-# CONFIG_GCC_PLUGIN_STACKLEAK is not set
-CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y
-# CONFIG_INIT_ON_FREE_DEFAULT_ON is not set
-# end of Memory initialization
-# end of Kernel hardening options
-# end of Security options
-
-CONFIG_XOR_BLOCKS=m
-CONFIG_ASYNC_CORE=m
-CONFIG_ASYNC_MEMCPY=m
-CONFIG_ASYNC_XOR=m
-CONFIG_ASYNC_PQ=m
-CONFIG_ASYNC_RAID6_RECOV=m
-CONFIG_CRYPTO=y
-
-#
-# Crypto core or helper
-#
-CONFIG_CRYPTO_ALGAPI=y
-CONFIG_CRYPTO_ALGAPI2=y
-CONFIG_CRYPTO_AEAD=y
-CONFIG_CRYPTO_AEAD2=y
-CONFIG_CRYPTO_SKCIPHER=y
-CONFIG_CRYPTO_SKCIPHER2=y
-CONFIG_CRYPTO_HASH=y
-CONFIG_CRYPTO_HASH2=y
-CONFIG_CRYPTO_RNG=y
-CONFIG_CRYPTO_RNG2=y
-CONFIG_CRYPTO_RNG_DEFAULT=y
-CONFIG_CRYPTO_AKCIPHER2=y
-CONFIG_CRYPTO_AKCIPHER=y
-CONFIG_CRYPTO_KPP2=y
-CONFIG_CRYPTO_KPP=y
-CONFIG_CRYPTO_ACOMP2=y
-CONFIG_CRYPTO_MANAGER=y
-CONFIG_CRYPTO_MANAGER2=y
-CONFIG_CRYPTO_USER=m
-CONFIG_CRYPTO_MANAGER_DISABLE_TESTS=y
-CONFIG_CRYPTO_GF128MUL=y
-CONFIG_CRYPTO_NULL=y
-CONFIG_CRYPTO_NULL2=y
-CONFIG_CRYPTO_PCRYPT=m
-CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_AUTHENC=m
-CONFIG_CRYPTO_TEST=m
-CONFIG_CRYPTO_SIMD=m
-CONFIG_CRYPTO_GLUE_HELPER_X86=m
-CONFIG_CRYPTO_ENGINE=m
-
-#
-# Public-key cryptography
-#
-CONFIG_CRYPTO_RSA=y
-CONFIG_CRYPTO_DH=y
-CONFIG_CRYPTO_ECC=m
-CONFIG_CRYPTO_ECDH=m
-CONFIG_CRYPTO_ECRDSA=m
-CONFIG_CRYPTO_CURVE25519=m
-CONFIG_CRYPTO_CURVE25519_X86=m
-
-#
-# Authenticated Encryption with Associated Data
-#
-CONFIG_CRYPTO_CCM=m
-CONFIG_CRYPTO_GCM=y
-CONFIG_CRYPTO_CHACHA20POLY1305=m
-CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128_AESNI_SSE2=m
-CONFIG_CRYPTO_SEQIV=y
-CONFIG_CRYPTO_ECHAINIV=m
-
-#
-# Block modes
-#
-CONFIG_CRYPTO_CBC=m
-CONFIG_CRYPTO_CFB=m
-CONFIG_CRYPTO_CTR=y
-CONFIG_CRYPTO_CTS=m
-CONFIG_CRYPTO_ECB=m
-CONFIG_CRYPTO_LRW=m
-CONFIG_CRYPTO_OFB=m
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_XTS=m
-CONFIG_CRYPTO_KEYWRAP=m
-CONFIG_CRYPTO_NHPOLY1305=m
-CONFIG_CRYPTO_NHPOLY1305_SSE2=m
-CONFIG_CRYPTO_NHPOLY1305_AVX2=m
-CONFIG_CRYPTO_ADIANTUM=m
-CONFIG_CRYPTO_ESSIV=m
-
-#
-# Hash modes
-#
-CONFIG_CRYPTO_CMAC=m
-CONFIG_CRYPTO_HMAC=y
-CONFIG_CRYPTO_XCBC=m
-CONFIG_CRYPTO_VMAC=m
-
-#
-# Digest
-#
-CONFIG_CRYPTO_CRC32C=m
-CONFIG_CRYPTO_CRC32C_INTEL=m
-CONFIG_CRYPTO_CRC32=m
-CONFIG_CRYPTO_CRC32_PCLMUL=m
-CONFIG_CRYPTO_XXHASH=m
-CONFIG_CRYPTO_BLAKE2B=m
-CONFIG_CRYPTO_BLAKE2S=m
-CONFIG_CRYPTO_BLAKE2S_X86=m
-CONFIG_CRYPTO_CRCT10DIF=y
-CONFIG_CRYPTO_CRCT10DIF_PCLMUL=m
-CONFIG_CRYPTO_GHASH=y
-CONFIG_CRYPTO_POLY1305=m
-CONFIG_CRYPTO_POLY1305_X86_64=m
-CONFIG_CRYPTO_MD4=m
-CONFIG_CRYPTO_MD5=y
-CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_RMD128=m
-CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_RMD256=m
-CONFIG_CRYPTO_RMD320=m
-CONFIG_CRYPTO_SHA1=y
-CONFIG_CRYPTO_SHA1_SSSE3=m
-CONFIG_CRYPTO_SHA256_SSSE3=m
-CONFIG_CRYPTO_SHA512_SSSE3=m
-CONFIG_CRYPTO_SHA256=y
-CONFIG_CRYPTO_SHA512=y
-CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_SM3=m
-CONFIG_CRYPTO_STREEBOG=m
-CONFIG_CRYPTO_TGR192=m
-CONFIG_CRYPTO_WP512=m
-CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL=m
-
-#
-# Ciphers
-#
-CONFIG_CRYPTO_AES=y
-CONFIG_CRYPTO_AES_TI=m
-CONFIG_CRYPTO_AES_NI_INTEL=m
-CONFIG_CRYPTO_ANUBIS=m
-CONFIG_CRYPTO_ARC4=m
-CONFIG_CRYPTO_BLOWFISH=m
-CONFIG_CRYPTO_BLOWFISH_COMMON=m
-CONFIG_CRYPTO_BLOWFISH_X86_64=m
-CONFIG_CRYPTO_CAMELLIA=m
-CONFIG_CRYPTO_CAMELLIA_X86_64=m
-CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64=m
-CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64=m
-CONFIG_CRYPTO_CAST_COMMON=m
-CONFIG_CRYPTO_CAST5=m
-CONFIG_CRYPTO_CAST5_AVX_X86_64=m
-CONFIG_CRYPTO_CAST6=m
-CONFIG_CRYPTO_CAST6_AVX_X86_64=m
-CONFIG_CRYPTO_DES=m
-CONFIG_CRYPTO_DES3_EDE_X86_64=m
-CONFIG_CRYPTO_FCRYPT=m
-CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SALSA20=m
-CONFIG_CRYPTO_CHACHA20=m
-CONFIG_CRYPTO_CHACHA20_X86_64=m
-CONFIG_CRYPTO_SEED=m
-CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_SERPENT_SSE2_X86_64=m
-CONFIG_CRYPTO_SERPENT_AVX_X86_64=m
-CONFIG_CRYPTO_SERPENT_AVX2_X86_64=m
-CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_TEA=m
-CONFIG_CRYPTO_TWOFISH=m
-CONFIG_CRYPTO_TWOFISH_COMMON=m
-CONFIG_CRYPTO_TWOFISH_X86_64=m
-CONFIG_CRYPTO_TWOFISH_X86_64_3WAY=m
-CONFIG_CRYPTO_TWOFISH_AVX_X86_64=m
-
-#
-# Compression
-#
-CONFIG_CRYPTO_DEFLATE=m
-CONFIG_CRYPTO_LZO=m
-CONFIG_CRYPTO_842=m
-CONFIG_CRYPTO_LZ4=y
-CONFIG_CRYPTO_LZ4HC=m
-CONFIG_CRYPTO_ZSTD=y
-
-#
-# Random Number Generation
-#
-CONFIG_CRYPTO_ANSI_CPRNG=m
-CONFIG_CRYPTO_DRBG_MENU=y
-CONFIG_CRYPTO_DRBG_HMAC=y
-CONFIG_CRYPTO_DRBG_HASH=y
-CONFIG_CRYPTO_DRBG_CTR=y
-CONFIG_CRYPTO_DRBG=y
-CONFIG_CRYPTO_JITTERENTROPY=y
-CONFIG_CRYPTO_USER_API=m
-CONFIG_CRYPTO_USER_API_HASH=m
-CONFIG_CRYPTO_USER_API_SKCIPHER=m
-CONFIG_CRYPTO_USER_API_RNG=m
-CONFIG_CRYPTO_USER_API_AEAD=m
-# CONFIG_CRYPTO_STATS is not set
-CONFIG_CRYPTO_HASH_INFO=y
-
-#
-# Crypto library routines
-#
-CONFIG_CRYPTO_LIB_AES=y
-CONFIG_CRYPTO_LIB_ARC4=m
-CONFIG_CRYPTO_ARCH_HAVE_LIB_BLAKE2S=m
-CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC=m
-CONFIG_CRYPTO_LIB_BLAKE2S=m
-CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA=m
-CONFIG_CRYPTO_LIB_CHACHA_GENERIC=m
-CONFIG_CRYPTO_LIB_CHACHA=m
-CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519=m
-CONFIG_CRYPTO_LIB_CURVE25519_GENERIC=m
-CONFIG_CRYPTO_LIB_CURVE25519=m
-CONFIG_CRYPTO_LIB_DES=m
-CONFIG_CRYPTO_LIB_POLY1305_RSIZE=11
-CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305=m
-CONFIG_CRYPTO_LIB_POLY1305_GENERIC=m
-CONFIG_CRYPTO_LIB_POLY1305=m
-CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
-CONFIG_CRYPTO_LIB_SHA256=y
-CONFIG_CRYPTO_HW=y
-CONFIG_CRYPTO_DEV_PADLOCK=m
-CONFIG_CRYPTO_DEV_PADLOCK_AES=m
-CONFIG_CRYPTO_DEV_PADLOCK_SHA=m
-CONFIG_CRYPTO_DEV_ATMEL_I2C=m
-CONFIG_CRYPTO_DEV_ATMEL_ECC=m
-CONFIG_CRYPTO_DEV_ATMEL_SHA204A=m
-CONFIG_CRYPTO_DEV_CCP=y
-CONFIG_CRYPTO_DEV_CCP_DD=m
-CONFIG_CRYPTO_DEV_SP_CCP=y
-CONFIG_CRYPTO_DEV_CCP_CRYPTO=m
-CONFIG_CRYPTO_DEV_SP_PSP=y
-CONFIG_CRYPTO_DEV_CCP_DEBUGFS=y
-CONFIG_CRYPTO_DEV_QAT=m
-CONFIG_CRYPTO_DEV_QAT_DH895xCC=m
-CONFIG_CRYPTO_DEV_QAT_C3XXX=m
-CONFIG_CRYPTO_DEV_QAT_C62X=m
-CONFIG_CRYPTO_DEV_QAT_DH895xCCVF=m
-CONFIG_CRYPTO_DEV_QAT_C3XXXVF=m
-CONFIG_CRYPTO_DEV_QAT_C62XVF=m
-CONFIG_CRYPTO_DEV_NITROX=m
-CONFIG_CRYPTO_DEV_NITROX_CNN55XX=m
-CONFIG_CRYPTO_DEV_CHELSIO=m
-CONFIG_CHELSIO_IPSEC_INLINE=y
-CONFIG_CHELSIO_TLS_DEVICE=y
-CONFIG_CRYPTO_DEV_VIRTIO=m
-CONFIG_CRYPTO_DEV_SAFEXCEL=m
-CONFIG_CRYPTO_DEV_CCREE=m
-CONFIG_CRYPTO_DEV_AMLOGIC_GXL=m
-CONFIG_CRYPTO_DEV_AMLOGIC_GXL_DEBUG=y
-CONFIG_ASYMMETRIC_KEY_TYPE=y
-CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y
-CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE=m
-CONFIG_X509_CERTIFICATE_PARSER=y
-CONFIG_PKCS8_PRIVATE_KEY_PARSER=m
-CONFIG_TPM_KEY_PARSER=m
-CONFIG_PKCS7_MESSAGE_PARSER=y
-# CONFIG_PKCS7_TEST_KEY is not set
-CONFIG_SIGNED_PE_FILE_VERIFICATION=y
-
-#
-# Certificates for signature checking
-#
-CONFIG_MODULE_SIG_KEY="certs/signing_key.pem"
-CONFIG_SYSTEM_TRUSTED_KEYRING=y
-CONFIG_SYSTEM_TRUSTED_KEYS=""
-# CONFIG_SYSTEM_EXTRA_CERTIFICATE is not set
-CONFIG_SECONDARY_TRUSTED_KEYRING=y
-CONFIG_SYSTEM_BLACKLIST_KEYRING=y
-CONFIG_SYSTEM_BLACKLIST_HASH_LIST=""
-# end of Certificates for signature checking
-
-CONFIG_BINARY_PRINTF=y
-
-#
-# Library routines
-#
-CONFIG_RAID6_PQ=m
-CONFIG_RAID6_PQ_BENCHMARK=y
-CONFIG_LINEAR_RANGES=y
-CONFIG_PACKING=y
-CONFIG_BITREVERSE=y
-CONFIG_GENERIC_STRNCPY_FROM_USER=y
-CONFIG_GENERIC_STRNLEN_USER=y
-CONFIG_GENERIC_NET_UTILS=y
-CONFIG_GENERIC_FIND_FIRST_BIT=y
-CONFIG_CORDIC=m
-# CONFIG_PRIME_NUMBERS is not set
-CONFIG_RATIONAL=y
-CONFIG_GENERIC_PCI_IOMAP=y
-CONFIG_GENERIC_IOMAP=y
-CONFIG_ARCH_USE_CMPXCHG_LOCKREF=y
-CONFIG_ARCH_HAS_FAST_MULTIPLIER=y
-CONFIG_ARCH_USE_SYM_ANNOTATIONS=y
-CONFIG_CRC_CCITT=y
-CONFIG_CRC16=m
-CONFIG_CRC_T10DIF=y
-CONFIG_CRC_ITU_T=m
-CONFIG_CRC32=y
-# CONFIG_CRC32_SELFTEST is not set
-CONFIG_CRC32_SLICEBY8=y
-# CONFIG_CRC32_SLICEBY4 is not set
-# CONFIG_CRC32_SARWATE is not set
-# CONFIG_CRC32_BIT is not set
-CONFIG_CRC64=m
-CONFIG_CRC4=m
-CONFIG_CRC7=m
-CONFIG_LIBCRC32C=m
-CONFIG_CRC8=m
-CONFIG_XXHASH=y
-# CONFIG_RANDOM32_SELFTEST is not set
-CONFIG_842_COMPRESS=m
-CONFIG_842_DECOMPRESS=m
-CONFIG_ZLIB_INFLATE=y
-CONFIG_ZLIB_DEFLATE=y
-CONFIG_LZO_COMPRESS=y
-CONFIG_LZO_DECOMPRESS=y
-CONFIG_LZ4_COMPRESS=y
-CONFIG_LZ4HC_COMPRESS=m
-CONFIG_LZ4_DECOMPRESS=y
-CONFIG_ZSTD_COMPRESS=y
-CONFIG_ZSTD_DECOMPRESS=y
-CONFIG_XZ_DEC=y
-CONFIG_XZ_DEC_X86=y
-CONFIG_XZ_DEC_POWERPC=y
-CONFIG_XZ_DEC_IA64=y
-CONFIG_XZ_DEC_ARM=y
-CONFIG_XZ_DEC_ARMTHUMB=y
-CONFIG_XZ_DEC_SPARC=y
-CONFIG_XZ_DEC_BCJ=y
-# CONFIG_XZ_DEC_TEST is not set
-CONFIG_DECOMPRESS_GZIP=y
-CONFIG_DECOMPRESS_BZIP2=y
-CONFIG_DECOMPRESS_LZMA=y
-CONFIG_DECOMPRESS_XZ=y
-CONFIG_DECOMPRESS_LZO=y
-CONFIG_DECOMPRESS_LZ4=y
-CONFIG_DECOMPRESS_ZSTD=y
-CONFIG_GENERIC_ALLOCATOR=y
-CONFIG_REED_SOLOMON=y
-CONFIG_REED_SOLOMON_ENC8=y
-CONFIG_REED_SOLOMON_DEC8=y
-CONFIG_REED_SOLOMON_DEC16=y
-CONFIG_BCH=m
-CONFIG_TEXTSEARCH=y
-CONFIG_TEXTSEARCH_KMP=m
-CONFIG_TEXTSEARCH_BM=m
-CONFIG_TEXTSEARCH_FSM=m
-CONFIG_BTREE=y
-CONFIG_INTERVAL_TREE=y
-CONFIG_XARRAY_MULTI=y
-CONFIG_ASSOCIATIVE_ARRAY=y
-CONFIG_HAS_IOMEM=y
-CONFIG_HAS_IOPORT_MAP=y
-CONFIG_HAS_DMA=y
-CONFIG_DMA_OPS=y
-CONFIG_NEED_SG_DMA_LENGTH=y
-CONFIG_NEED_DMA_MAP_STATE=y
-CONFIG_ARCH_DMA_ADDR_T_64BIT=y
-CONFIG_DMA_DECLARE_COHERENT=y
-CONFIG_ARCH_HAS_FORCE_DMA_UNENCRYPTED=y
-CONFIG_DMA_VIRT_OPS=y
-CONFIG_SWIOTLB=y
-CONFIG_DMA_COHERENT_POOL=y
-# CONFIG_DMA_API_DEBUG is not set
-CONFIG_SGL_ALLOC=y
-CONFIG_IOMMU_HELPER=y
-CONFIG_CHECK_SIGNATURE=y
-CONFIG_CPU_RMAP=y
-CONFIG_DQL=y
-CONFIG_GLOB=y
-# CONFIG_GLOB_SELFTEST is not set
-CONFIG_NLATTR=y
-CONFIG_LRU_CACHE=m
-CONFIG_CLZ_TAB=y
-CONFIG_IRQ_POLL=y
-CONFIG_MPILIB=y
-CONFIG_DIMLIB=y
-CONFIG_LIBFDT=y
-CONFIG_OID_REGISTRY=y
-CONFIG_UCS2_STRING=y
-CONFIG_HAVE_GENERIC_VDSO=y
-CONFIG_GENERIC_GETTIMEOFDAY=y
-CONFIG_GENERIC_VDSO_TIME_NS=y
-CONFIG_FONT_SUPPORT=y
-CONFIG_FONTS=y
-# CONFIG_FONT_8x8 is not set
-CONFIG_FONT_8x16=y
-# CONFIG_FONT_6x11 is not set
-# CONFIG_FONT_7x14 is not set
-# CONFIG_FONT_PEARL_8x8 is not set
-# CONFIG_FONT_ACORN_8x8 is not set
-# CONFIG_FONT_MINI_4x6 is not set
-# CONFIG_FONT_6x10 is not set
-# CONFIG_FONT_10x18 is not set
-# CONFIG_FONT_SUN8x16 is not set
-# CONFIG_FONT_SUN12x22 is not set
-CONFIG_FONT_TER16x32=y
-CONFIG_SG_POOL=y
-CONFIG_ARCH_HAS_PMEM_API=y
-CONFIG_MEMREGION=y
-CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE=y
-CONFIG_ARCH_HAS_UACCESS_MCSAFE=y
-CONFIG_ARCH_STACKWALK=y
-CONFIG_SBITMAP=y
-CONFIG_PARMAN=m
-CONFIG_OBJAGG=m
-# CONFIG_STRING_SELFTEST is not set
-# end of Library routines
-
-CONFIG_PLDMFW=y
-
-#
-# Kernel hacking
-#
-
-#
-# printk and dmesg options
-#
-CONFIG_PRINTK_TIME=y
-# CONFIG_PRINTK_CALLER is not set
-CONFIG_CONSOLE_LOGLEVEL_DEFAULT=4
-CONFIG_CONSOLE_LOGLEVEL_QUIET=1
-CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4
-# CONFIG_BOOT_PRINTK_DELAY is not set
-CONFIG_DYNAMIC_DEBUG=y
-CONFIG_DYNAMIC_DEBUG_CORE=y
-CONFIG_SYMBOLIC_ERRNAME=y
-CONFIG_DEBUG_BUGVERBOSE=y
-# end of printk and dmesg options
-
-#
-# Compile-time checks and compiler options
-#
-CONFIG_DEBUG_INFO=y
-# CONFIG_DEBUG_INFO_REDUCED is not set
-# CONFIG_DEBUG_INFO_COMPRESSED is not set
-# CONFIG_DEBUG_INFO_SPLIT is not set
-CONFIG_DEBUG_INFO_DWARF4=y
-CONFIG_DEBUG_INFO_BTF=y
-# CONFIG_GDB_SCRIPTS is not set
-# CONFIG_ENABLE_MUST_CHECK is not set
-CONFIG_FRAME_WARN=2048
-CONFIG_STRIP_ASM_SYMS=y
-# CONFIG_READABLE_ASM is not set
-# CONFIG_HEADERS_INSTALL is not set
-# CONFIG_DEBUG_SECTION_MISMATCH is not set
-CONFIG_SECTION_MISMATCH_WARN_ONLY=y
-# CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_32B is not set
-CONFIG_STACK_VALIDATION=y
-# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set
-# end of Compile-time checks and compiler options
-
-#
-# Generic Kernel Debugging Instruments
-#
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE=0x0
-CONFIG_MAGIC_SYSRQ_SERIAL=y
-CONFIG_MAGIC_SYSRQ_SERIAL_SEQUENCE=""
-CONFIG_DEBUG_FS=y
-CONFIG_DEBUG_FS_ALLOW_ALL=y
-# CONFIG_DEBUG_FS_DISALLOW_MOUNT is not set
-# CONFIG_DEBUG_FS_ALLOW_NONE is not set
-CONFIG_HAVE_ARCH_KGDB=y
-# CONFIG_KGDB is not set
-CONFIG_ARCH_HAS_UBSAN_SANITIZE_ALL=y
-# CONFIG_UBSAN is not set
-CONFIG_HAVE_ARCH_KCSAN=y
-# end of Generic Kernel Debugging Instruments
-
-CONFIG_DEBUG_KERNEL=y
-CONFIG_DEBUG_MISC=y
-
-#
-# Memory Debugging
-#
-# CONFIG_PAGE_EXTENSION is not set
-# CONFIG_DEBUG_PAGEALLOC is not set
-# CONFIG_PAGE_OWNER is not set
-CONFIG_PAGE_POISONING=y
-CONFIG_PAGE_POISONING_NO_SANITY=y
-CONFIG_PAGE_POISONING_ZERO=y
-# CONFIG_DEBUG_PAGE_REF is not set
-# CONFIG_DEBUG_RODATA_TEST is not set
-CONFIG_ARCH_HAS_DEBUG_WX=y
-CONFIG_DEBUG_WX=y
-CONFIG_GENERIC_PTDUMP=y
-CONFIG_PTDUMP_CORE=y
-# CONFIG_PTDUMP_DEBUGFS is not set
-# CONFIG_DEBUG_OBJECTS is not set
-# CONFIG_SLUB_DEBUG_ON is not set
-# CONFIG_SLUB_STATS is not set
-CONFIG_HAVE_DEBUG_KMEMLEAK=y
-# CONFIG_DEBUG_KMEMLEAK is not set
-# CONFIG_DEBUG_STACK_USAGE is not set
-CONFIG_SCHED_STACK_END_CHECK=y
-CONFIG_ARCH_HAS_DEBUG_VM_PGTABLE=y
-# CONFIG_DEBUG_VM is not set
-# CONFIG_DEBUG_VM_PGTABLE is not set
-CONFIG_ARCH_HAS_DEBUG_VIRTUAL=y
-# CONFIG_DEBUG_VIRTUAL is not set
-CONFIG_DEBUG_MEMORY_INIT=y
-# CONFIG_DEBUG_PER_CPU_MAPS is not set
-CONFIG_HAVE_ARCH_KASAN=y
-CONFIG_HAVE_ARCH_KASAN_VMALLOC=y
-CONFIG_CC_HAS_KASAN_GENERIC=y
-CONFIG_CC_HAS_WORKING_NOSANITIZE_ADDRESS=y
-# CONFIG_KASAN is not set
-# end of Memory Debugging
-
-# CONFIG_DEBUG_SHIRQ is not set
-
-#
-# Debug Oops, Lockups and Hangs
-#
-# CONFIG_PANIC_ON_OOPS is not set
-CONFIG_PANIC_ON_OOPS_VALUE=0
-CONFIG_PANIC_TIMEOUT=0
-CONFIG_LOCKUP_DETECTOR=y
-CONFIG_SOFTLOCKUP_DETECTOR=y
-# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
-CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
-CONFIG_HARDLOCKUP_DETECTOR_PERF=y
-CONFIG_HARDLOCKUP_CHECK_TIMESTAMP=y
-CONFIG_HARDLOCKUP_DETECTOR=y
-# CONFIG_BOOTPARAM_HARDLOCKUP_PANIC is not set
-CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE=0
-CONFIG_DETECT_HUNG_TASK=y
-CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=120
-# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
-CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
-# CONFIG_WQ_WATCHDOG is not set
-# CONFIG_TEST_LOCKUP is not set
-# end of Debug Oops, Lockups and Hangs
-
-#
-# Scheduler Debugging
-#
-CONFIG_SCHED_DEBUG=y
-CONFIG_SCHED_INFO=y
-CONFIG_SCHEDSTATS=y
-# end of Scheduler Debugging
-
-# CONFIG_DEBUG_TIMEKEEPING is not set
-CONFIG_DEBUG_PREEMPT=y
-
-#
-# Lock Debugging (spinlocks, mutexes, etc...)
-#
-CONFIG_LOCK_DEBUGGING_SUPPORT=y
-# CONFIG_PROVE_LOCKING is not set
-# CONFIG_LOCK_STAT is not set
-# CONFIG_DEBUG_RT_MUTEXES is not set
-# CONFIG_DEBUG_SPINLOCK is not set
-# CONFIG_DEBUG_MUTEXES is not set
-# CONFIG_DEBUG_WW_MUTEX_SLOWPATH is not set
-# CONFIG_DEBUG_RWSEMS is not set
-# CONFIG_DEBUG_LOCK_ALLOC is not set
-# CONFIG_DEBUG_ATOMIC_SLEEP is not set
-# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
-# CONFIG_LOCK_TORTURE_TEST is not set
-# CONFIG_WW_MUTEX_SELFTEST is not set
-# end of Lock Debugging (spinlocks, mutexes, etc...)
-
-CONFIG_STACKTRACE=y
-# CONFIG_WARN_ALL_UNSEEDED_RANDOM is not set
-# CONFIG_DEBUG_KOBJECT is not set
-
-#
-# Debug kernel data structures
-#
-# CONFIG_DEBUG_LIST is not set
-# CONFIG_DEBUG_PLIST is not set
-# CONFIG_DEBUG_SG is not set
-# CONFIG_DEBUG_NOTIFIERS is not set
-# CONFIG_BUG_ON_DATA_CORRUPTION is not set
-# end of Debug kernel data structures
-
-# CONFIG_DEBUG_CREDENTIALS is not set
-
-#
-# RCU Debugging
-#
-# CONFIG_RCU_PERF_TEST is not set
-# CONFIG_RCU_TORTURE_TEST is not set
-# CONFIG_RCU_REF_SCALE_TEST is not set
-CONFIG_RCU_CPU_STALL_TIMEOUT=60
-# CONFIG_RCU_TRACE is not set
-# CONFIG_RCU_EQS_DEBUG is not set
-# end of RCU Debugging
-
-# CONFIG_DEBUG_WQ_FORCE_RR_CPU is not set
-# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
-# CONFIG_CPU_HOTPLUG_STATE_CONTROL is not set
-CONFIG_LATENCYTOP=y
-CONFIG_USER_STACKTRACE_SUPPORT=y
-CONFIG_NOP_TRACER=y
-CONFIG_HAVE_FUNCTION_TRACER=y
-CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
-CONFIG_HAVE_DYNAMIC_FTRACE=y
-CONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS=y
-CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS=y
-CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
-CONFIG_HAVE_SYSCALL_TRACEPOINTS=y
-CONFIG_HAVE_FENTRY=y
-CONFIG_HAVE_C_RECORDMCOUNT=y
-CONFIG_TRACER_MAX_TRACE=y
-CONFIG_TRACE_CLOCK=y
-CONFIG_RING_BUFFER=y
-CONFIG_EVENT_TRACING=y
-CONFIG_CONTEXT_SWITCH_TRACER=y
-CONFIG_RING_BUFFER_ALLOW_SWAP=y
-CONFIG_TRACING=y
-CONFIG_GENERIC_TRACER=y
-CONFIG_TRACING_SUPPORT=y
-CONFIG_FTRACE=y
-# CONFIG_BOOTTIME_TRACING is not set
-CONFIG_FUNCTION_TRACER=y
-CONFIG_FUNCTION_GRAPH_TRACER=y
-CONFIG_DYNAMIC_FTRACE=y
-CONFIG_DYNAMIC_FTRACE_WITH_REGS=y
-CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS=y
-CONFIG_FUNCTION_PROFILER=y
-CONFIG_STACK_TRACER=y
-# CONFIG_IRQSOFF_TRACER is not set
-# CONFIG_PREEMPT_TRACER is not set
-CONFIG_SCHED_TRACER=y
-CONFIG_HWLAT_TRACER=y
-CONFIG_MMIOTRACE=y
-CONFIG_FTRACE_SYSCALLS=y
-CONFIG_TRACER_SNAPSHOT=y
-# CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP is not set
-CONFIG_BRANCH_PROFILE_NONE=y
-# CONFIG_PROFILE_ANNOTATED_BRANCHES is not set
-CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_KPROBE_EVENTS=y
-# CONFIG_KPROBE_EVENTS_ON_NOTRACE is not set
-CONFIG_UPROBE_EVENTS=y
-CONFIG_BPF_EVENTS=y
-CONFIG_DYNAMIC_EVENTS=y
-CONFIG_PROBE_EVENTS=y
-CONFIG_BPF_KPROBE_OVERRIDE=y
-CONFIG_FTRACE_MCOUNT_RECORD=y
-CONFIG_TRACING_MAP=y
-CONFIG_SYNTH_EVENTS=y
-CONFIG_HIST_TRIGGERS=y
-# CONFIG_TRACE_EVENT_INJECT is not set
-# CONFIG_TRACEPOINT_BENCHMARK is not set
-# CONFIG_RING_BUFFER_BENCHMARK is not set
-# CONFIG_TRACE_EVAL_MAP_FILE is not set
-# CONFIG_FTRACE_STARTUP_TEST is not set
-# CONFIG_RING_BUFFER_STARTUP_TEST is not set
-# CONFIG_MMIOTRACE_TEST is not set
-# CONFIG_PREEMPTIRQ_DELAY_TEST is not set
-# CONFIG_SYNTH_EVENT_GEN_TEST is not set
-# CONFIG_KPROBE_EVENT_GEN_TEST is not set
-# CONFIG_HIST_TRIGGERS_DEBUG is not set
-# CONFIG_PROVIDE_OHCI1394_DMA_INIT is not set
-# CONFIG_SAMPLES is not set
-CONFIG_ARCH_HAS_DEVMEM_IS_ALLOWED=y
-CONFIG_STRICT_DEVMEM=y
-CONFIG_IO_STRICT_DEVMEM=y
-
-#
-# x86 Debugging
-#
-CONFIG_TRACE_IRQFLAGS_SUPPORT=y
-CONFIG_TRACE_IRQFLAGS_NMI_SUPPORT=y
-# CONFIG_X86_VERBOSE_BOOTUP is not set
-CONFIG_EARLY_PRINTK=y
-# CONFIG_EARLY_PRINTK_DBGP is not set
-# CONFIG_EARLY_PRINTK_USB_XDBC is not set
-# CONFIG_EFI_PGT_DUMP is not set
-# CONFIG_DEBUG_TLBFLUSH is not set
-# CONFIG_IOMMU_DEBUG is not set
-CONFIG_HAVE_MMIOTRACE_SUPPORT=y
-# CONFIG_X86_DECODER_SELFTEST is not set
-CONFIG_IO_DELAY_0X80=y
-# CONFIG_IO_DELAY_0XED is not set
-# CONFIG_IO_DELAY_UDELAY is not set
-# CONFIG_IO_DELAY_NONE is not set
-CONFIG_DEBUG_BOOT_PARAMS=y
-# CONFIG_CPA_DEBUG is not set
-# CONFIG_DEBUG_ENTRY is not set
-# CONFIG_DEBUG_NMI_SELFTEST is not set
-# CONFIG_X86_DEBUG_FPU is not set
-# CONFIG_PUNIT_ATOM_DEBUG is not set
-CONFIG_UNWINDER_ORC=y
-# CONFIG_UNWINDER_FRAME_POINTER is not set
-# CONFIG_UNWINDER_GUESS is not set
-# end of x86 Debugging
-
-#
-# Kernel Testing and Coverage
-#
-# CONFIG_KUNIT is not set
-# CONFIG_NOTIFIER_ERROR_INJECTION is not set
-CONFIG_FUNCTION_ERROR_INJECTION=y
-# CONFIG_FAULT_INJECTION is not set
-CONFIG_ARCH_HAS_KCOV=y
-CONFIG_CC_HAS_SANCOV_TRACE_PC=y
-# CONFIG_KCOV is not set
-CONFIG_RUNTIME_TESTING_MENU=y
-CONFIG_LKDTM=m
-# CONFIG_TEST_LIST_SORT is not set
-# CONFIG_TEST_MIN_HEAP is not set
-# CONFIG_TEST_SORT is not set
-# CONFIG_KPROBES_SANITY_TEST is not set
-# CONFIG_BACKTRACE_SELF_TEST is not set
-# CONFIG_RBTREE_TEST is not set
-# CONFIG_REED_SOLOMON_TEST is not set
-# CONFIG_INTERVAL_TREE_TEST is not set
-# CONFIG_PERCPU_TEST is not set
-# CONFIG_ATOMIC64_SELFTEST is not set
-# CONFIG_ASYNC_RAID6_TEST is not set
-# CONFIG_TEST_HEXDUMP is not set
-# CONFIG_TEST_STRING_HELPERS is not set
-# CONFIG_TEST_STRSCPY is not set
-# CONFIG_TEST_KSTRTOX is not set
-# CONFIG_TEST_PRINTF is not set
-# CONFIG_TEST_BITMAP is not set
-# CONFIG_TEST_BITFIELD is not set
-# CONFIG_TEST_UUID is not set
-# CONFIG_TEST_XARRAY is not set
-# CONFIG_TEST_OVERFLOW is not set
-# CONFIG_TEST_RHASHTABLE is not set
-# CONFIG_TEST_HASH is not set
-# CONFIG_TEST_IDA is not set
-# CONFIG_TEST_PARMAN is not set
-# CONFIG_TEST_LKM is not set
-# CONFIG_TEST_BITOPS is not set
-# CONFIG_TEST_VMALLOC is not set
-# CONFIG_TEST_USER_COPY is not set
-# CONFIG_TEST_BPF is not set
-# CONFIG_TEST_BLACKHOLE_DEV is not set
-# CONFIG_FIND_BIT_BENCHMARK is not set
-# CONFIG_TEST_FIRMWARE is not set
-# CONFIG_TEST_SYSCTL is not set
-# CONFIG_TEST_UDELAY is not set
-# CONFIG_TEST_STATIC_KEYS is not set
-# CONFIG_TEST_KMOD is not set
-# CONFIG_TEST_MEMCAT_P is not set
-# CONFIG_TEST_OBJAGG is not set
-# CONFIG_TEST_STACKINIT is not set
-# CONFIG_TEST_MEMINIT is not set
-# CONFIG_TEST_HMM is not set
-# CONFIG_TEST_FPU is not set
-# CONFIG_MEMTEST is not set
-# CONFIG_HYPERV_TESTING is not set
-# end of Kernel Testing and Coverage
-# end of Kernel hacking
diff --git a/linux59-tkg/linux59-tkg-config/generic-desktop-profile.cfg b/linux59-tkg/linux59-tkg-config/generic-desktop-profile.cfg
deleted file mode 100644
index 9f33a13..0000000
--- a/linux59-tkg/linux59-tkg-config/generic-desktop-profile.cfg
+++ /dev/null
@@ -1,35 +0,0 @@
-# linux59-TkG config file
-# Generic Desktop
-
-
-#### KERNEL OPTIONS ####
-
-# Disable some non-module debugging - See PKGBUILD for the list
-_debugdisable="false"
-
-# LEAVE AN EMPTY VALUE TO BE PROMPTED ABOUT FOLLOWING OPTIONS AT BUILD TIME
-
-# Set to "true" to disable FUNCTION_TRACER/GRAPH_TRACER, lowering overhead but limiting debugging and analyzing of kernel functions - Kernel default is "false"
-_ftracedisable="false"
-
-# Set to "true" to disable NUMA, lowering overhead, but breaking CUDA/NvEnc on Nvidia equipped systems - Kernel default is "false"
-_numadisable="false"
-
-# Set to "true" to use explicit preemption points to lower latency at the cost of a small throughput loss - Can give a nice perf boost in VMs - Kernel default is "false"
-_voluntary_preempt="false"
-
-# A selection of patches from Zen/Liquorix kernel and additional tweaks for a better gaming experience (ZENIFY) - Default is "true"
-_zenify="true"
-
-# compiler optimization level - 1. Optimize for performance (-O2); 2. Optimize harder (-O3); 3. Optimize for size (-Os) - Kernel default is "2"
-_compileroptlevel="1"
-
-# Trust the CPU manufacturer to initialize Linux's CRNG (RANDOM_TRUST_CPU) - Kernel default is "false"
-_random_trust_cpu="false"
-
-# CPU scheduler runqueue sharing - No sharing (RQ_NONE), SMT (hyperthread) siblings (RQ_SMT), Multicore siblings (RQ_MC), Symmetric Multi-Processing (RQ_SMP), NUMA (RQ_ALL)
-# Valid values are "none", "smt", "mc", "mc-llc"(for zen), "smp", "all" - Kernel default is "mc"
-_runqueue_sharing="mc"
-
-# Timer frequency - "500", "750" or "1000" - More options available in kernel config prompt when left empty depending on selected cpusched - Kernel default is "750"
-_timer_freq="500"
diff --git a/linux59-tkg/linux59-tkg-config/prepare b/linux59-tkg/linux59-tkg-config/prepare
deleted file mode 100644
index dc2eaba..0000000
--- a/linux59-tkg/linux59-tkg-config/prepare
+++ /dev/null
@@ -1,991 +0,0 @@
-#!/bin/bash
-
-_basever=59
-_basekernel=5.9
-_sub=1
-
-_tkg_initscript() {
-
-  cp "$_where"/linux"$_basever"-tkg-patches/* "$_where" # copy patches inside the PKGBUILD's dir to preserve makepkg sourcing and md5sum checking
-  cp "$_where"/linux"$_basever"-tkg-config/* "$_where" # copy config files and hooks inside the PKGBUILD's dir to preserve makepkg sourcing and md5sum checking
-
-  # Load external configuration file if present. Available variable values will overwrite customization.cfg ones.
-  if [ -e "$_EXT_CONFIG_PATH" ]; then
-    source "$_EXT_CONFIG_PATH" && msg2 "External configuration file $_EXT_CONFIG_PATH will be used to override customization.cfg values." && msg2 ""
-  fi
-
-  if [ -z "$_OPTIPROFILE" ] && [ ! -e "$_where"/cpuschedset ]; then
-    # Prompt about optimized configurations. Available variable values will overwrite customization.cfg/external config ones.
-    plain "Do you want to use a predefined optimized profile?"
-    read -rp "`echo $'  > 1.Custom\n    2.Ryzen Desktop (Performance)\n    3.Other Desktop (Performance)\nchoice[1-3?]: '`" _OPTIPROFILE;
-  fi
-  if [ "$_OPTIPROFILE" = "2" ]; then
-    source "$_where"/ryzen-desktop-profile.cfg && msg2 "Ryzen Desktop (Performance) profile will be used." && msg2 ""
-  elif [ "$_OPTIPROFILE" = "3" ]; then
-    source "$_where"/generic-desktop-profile.cfg && msg2 "Generic Desktop (Performance) profile will be used." && msg2 ""
-  fi
-
-  # source cpuschedset early if present
-  if [ -e "$_where"/cpuschedset ]; then
-    source "$_where"/cpuschedset
-  fi
-
-  # CPU SCHED selector
-  if [ -z "$_cpusched" ] && [ ! -e "$_where"/cpuschedset ]; then
-    plain "What CPU sched variant do you want to build/install?"
-    read -rp "`echo $'  > 1.Project C / PDS\n    2.Project C / BMQ\n    3.MuQSS\n    4.CFS\nchoice[1-4?]: '`" CONDITION;
-    if [ "$CONDITION" = "2" ]; then
-      echo "_cpusched=\"bmq\"" > "$_where"/cpuschedset
-    elif [ "$CONDITION" = "3" ]; then
-      echo "_cpusched=\"MuQSS\"" > "$_where"/cpuschedset
-    elif [ "$CONDITION" = "4" ]; then
-      echo "_cpusched=\"cfs\"" > "$_where"/cpuschedset
-    else
-      echo "_cpusched=\"pds\"" > "$_where"/cpuschedset
-    fi
-    if [ -n "$_custom_pkgbase" ]; then
-      echo "_custom_pkgbase=\"${_custom_pkgbase}\"" >> "$_where"/cpuschedset
-    fi
-  elif [ "$_cpusched" = "muqss" ] || [ "$_cpusched" = "MuQSS" ]; then
-    echo "_cpusched=\"MuQSS\"" > "$_where"/cpuschedset
-  elif [ "$_cpusched" = "pds" ]; then
-    echo "_cpusched=\"pds\"" > "$_where"/cpuschedset
-  elif [ "$_cpusched" = "cfs" ]; then
-    echo "_cpusched=\"cfs\"" > "$_where"/cpuschedset
-  elif [ "$_cpusched" = "bmq" ]; then
-    echo "_cpusched=\"bmq\"" > "$_where"/cpuschedset
-  else
-    if [ "$_nofallback" != "true" ]; then
-      warning "Something is wrong with your cpusched selection. Do you want to fallback to CFS (default)?"
-      read -rp "`echo $'    > N/y : '`" _fallback;
-    fi
-    if [[ "$_fallback" =~ [yY] ]] || [ "$_nofallback" = "true" ]; then
-      echo "_cpusched=\"cfs\"" > "$_where"/cpuschedset
-    else
-      error "Exiting..."
-      exit 1
-    fi
-  fi
-
-  source "$_where"/cpuschedset
-}
-
-user_patcher() {
-	# To patch the user because all your base are belong to us
-	local _patches=("$_where"/*."${_userpatch_ext}revert")
-	if [ ${#_patches[@]} -ge 2 ] || [ -e "${_patches}" ]; then
-	  if [ "$_user_patches_no_confirm" != "true" ]; then
-	    msg2 "Found ${#_patches[@]} 'to revert' userpatches for ${_userpatch_target}:"
-	    printf '%s\n' "${_patches[@]}"
-	    read -rp "Do you want to install it/them? - Be careful with that ;)"$'\n> N/y : ' _CONDITION;
-	  fi
-	  if [[ "$_CONDITION" =~ [yY] ]] || [ "$_user_patches_no_confirm" = "true" ]; then
-	    for _f in "${_patches[@]}"; do
-	      if [ -e "${_f}" ]; then
-	        msg2 "######################################################"
-	        msg2 ""
-	        msg2 "Reverting your own ${_userpatch_target} patch ${_f}"
-	        msg2 ""
-	        msg2 "######################################################"
-	        patch -Np1 -R < "${_f}"
-	        echo "Reverted your own patch ${_f}" >> "$_where"/last_build_config.log
-	      fi
-	    done
-	  fi
-	fi
-
-	_patches=("$_where"/*."${_userpatch_ext}patch")
-	if [ ${#_patches[@]} -ge 2 ] || [ -e "${_patches}" ]; then
-	  if [ "$_user_patches_no_confirm" != "true" ]; then
-	    msg2 "Found ${#_patches[@]} userpatches for ${_userpatch_target}:"
-	    printf '%s\n' "${_patches[@]}"
-	    read -rp "Do you want to install it/them? - Be careful with that ;)"$'\n> N/y : ' _CONDITION;
-	  fi
-	  if [[ "$_CONDITION" =~ [yY] ]] || [ "$_user_patches_no_confirm" = "true" ]; then
-	    for _f in "${_patches[@]}"; do
-	      if [ -e "${_f}" ]; then
-	        msg2 "######################################################"
-	        msg2 ""
-	        msg2 "Applying your own ${_userpatch_target} patch ${_f}"
-	        msg2 ""
-	        msg2 "######################################################"
-	        patch -Np1 < "${_f}"
-	        echo "Applied your own patch ${_f}" >> "$_where"/last_build_config.log
-	      fi
-	    done
-	  fi
-	fi
-}
-
-_tkg_srcprep() {
-
-  if [ "${_distro}" = "Arch" ]; then
-    msg2 "Setting version..."
-    scripts/setlocalversion --save-scmversion
-    echo "-$pkgrel-tkg-${_cpusched}" > localversion.10-pkgrel
-    echo "" > localversion.20-pkgname
-
-    # add upstream patch
-    msg2 "Patching from $_basekernel to $pkgver"
-    patch -p1 -i "$srcdir"/patch-"${pkgver}"
-
-    # ARCH Patches
-    if [ "${_configfile}" = "config_hardened.x86_64" ] && [ "${_cpusched}" = "cfs" ]; then
-      msg2 "Using linux hardened patchset"
-      patch -Np1 -i "$srcdir"/0012-linux-hardened.patch
-    else
-      patch -Np1 -i "$srcdir"/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch
-    fi
-  fi
-
-  # graysky's cpu opts - https://github.com/graysky2/kernel_gcc_patch
-  msg2 "Applying graysky's cpu opts patch"
-  if [ "${_distro}" = "Arch" ]; then
-    patch -Np1 -i "$srcdir"/enable_additional_cpu_optimizations_for_gcc_v10.1%2B_kernel_v5.8%2B.patch
-  else
-    patch -Np1 -i "$srcdir"/enable_additional_cpu_optimizations_for_gcc_v10.1+_kernel_v5.8+.patch
-  fi
-
-  # TkG
-  msg2 "Applying clear linux patches"
-  patch -Np1 -i "$srcdir"/0002-clear-patches.patch
-
-  msg2 "Applying glitched base patch"
-  patch -Np1 -i "$srcdir"/0003-glitched-base.patch
-
-  if [ -z $_misc_adds ]; then
-    plain "Enable misc additions ? May contain temporary fixes pending upstream or changes that can break on non-Arch. "
-    read -rp "`echo $'    > [Y]/n : '`" _interactive_misc_adds;
-    if [ "$_interactive_misc_adds" != "n" ] && [ "$_interactive_misc_adds" != "N" ]; then
-      _misc_adds="true"
-    fi
-  fi
-
-  if [ "$_misc_adds" = "true" ]; then
-    msg2 "Applying misc additions patch"
-    patch -Np1 -i "$srcdir"/0012-misc-additions.patch
-  fi
-
-  if [ "${_cpusched}" = "MuQSS" ]; then
-    # MuQSS
-    msg2 "Applying MuQSS base patch"
-    patch -Np1 -i "$srcdir"/0004-5.9-ck1.patch
-
-    if [ "${_aggressive_ondemand}" = "true" ]; then
-      msg2 "Applying MuQSS agressive ondemand governor patch"
-      patch -Np1 -i "$srcdir"/0004-glitched-ondemand-muqss.patch
-    fi
-
-    msg2 "Applying Glitched MuQSS patch"
-    patch -Np1 -i "$srcdir"/0004-glitched-muqss.patch
-
-  elif [ "${_cpusched}" = "pds" ]; then
-    # PDS-mq
-    msg2 "Applying PDS base patch"
-    patch -Np1 -i "$srcdir"/0009-prjc_v5.9-r0.patch
-
-    if [ "${_aggressive_ondemand}" = "true" ]; then
-      msg2 "Applying PDS agressive ondemand governor patch"
-      patch -Np1 -i "$srcdir"/0009-glitched-ondemand-bmq.patch
-    fi
-
-    msg2 "Applying Glitched PDS patch"
-    patch -Np1 -i "$srcdir"/0005-glitched-pds.patch
-
-  elif [ "${_cpusched}" = "bmq" ]; then
-    # Project C / BMQ
-    msg2 "Applying Project C / BMQ base patch"
-
-    patch -Np1 -i "$srcdir"/0009-prjc_v5.9-r0.patch
-
-    if [ "${_aggressive_ondemand}" = "true" ]; then
-      msg2 "Applying BMQ agressive ondemand governor patch"
-      patch -Np1 -i "$srcdir"/0009-glitched-ondemand-bmq.patch
-    fi
-
-    msg2 "Applying Glitched BMQ patch"
-    patch -Np1 -i "$srcdir"/0009-glitched-bmq.patch
-
-  elif [ "${_cpusched}" = "cfs" ]; then
-    msg2 "Applying Glitched CFS patch"
-    patch -Np1 -i "$srcdir"/0003-glitched-cfs.patch
-  fi
-
-  if [ "${_distro}" = "Arch" ]; then
-    if [ -z "${_configfile}" ]; then
-    _configfile="config.x86_64"
-    fi
-
-    cat "${srcdir}/${_configfile}" > ./.config
-  fi
-
-
-  # Set some -tkg defaults
-  echo "# CONFIG_DYNAMIC_FAULT is not set" >> ./.config
-  sed -i -e 's/CONFIG_DEFAULT_FQ_CODEL=y/# CONFIG_DEFAULT_FQ_CODEL is not set/' ./.config
-  echo "CONFIG_DEFAULT_CAKE=y" >> ./.config
-  echo "CONFIG_NR_TTY_DEVICES=63" >> ./.config
-  echo "# CONFIG_NTP_PPS is not set" >> ./.config
-  sed -i -e 's/CONFIG_CRYPTO_LZ4=m/CONFIG_CRYPTO_LZ4=y/' ./.config
-  sed -i -e 's/CONFIG_CRYPTO_LZ4HC=m/CONFIG_CRYPTO_LZ4HC=y/' ./.config
-  sed -i -e 's/CONFIG_LZ4_COMPRESS=m/CONFIG_LZ4_COMPRESS=y/' ./.config
-  sed -i -e 's/CONFIG_LZ4HC_COMPRESS=m/CONFIG_LZ4HC_COMPRESS=y/' ./.config
-  sed -i -e 's/CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZO=y/# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZO is not set/' ./.config
-  sed -i -e 's/# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZ4 is not set/CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZ4=y/' ./.config
-  sed -i -e 's/CONFIG_ZSWAP_COMPRESSOR_DEFAULT="lzo"/CONFIG_ZSWAP_COMPRESSOR_DEFAULT="lz4"/' ./.config
-  sed -i -e 's/CONFIG_RCU_BOOST_DELAY=500/CONFIG_RCU_BOOST_DELAY=0/' ./.config
-  sed -i -e 's/# CONFIG_CMDLINE_BOOL is not set/CONFIG_CMDLINE_BOOL=y/' ./.config
-  echo "CONFIG_CMDLINE=\"${_custom_commandline}\"" >> ./.config
-  echo "# CONFIG_CMDLINE_OVERRIDE is not set" >> ./.config
-  echo "# CONFIG_X86_P6_NOP is not set" >> ./.config
-
-  # openrgb
-  echo "CONFIG_I2C_NCT6775=m" >> ./.config
-
-  # ccache fix
-  if [ "$_noccache" != "true" ]; then
-    if { [ "$_distro" = "Arch" ] && pacman -Qq ccache &> /dev/null; } || { [ "$_distro" = "Ubuntu" ] && dpkg -l ccache > /dev/null; }; then
-      sed -i -e 's/CONFIG_GCC_PLUGINS=y/# CONFIG_GCC_PLUGINS is not set/' ./.config
-    fi
-  fi
-  # Skip dbg package creation on non-Arch
-  if [ "$_distro" != "Arch" ]; then
-    sed -i -e 's/CONFIG_DEBUG_INFO.*/CONFIG_DEBUG_INFO=n/' ./.config
-  fi
-
-  if [ "$_font_autoselect" != "false" ]; then
-    sed -i -e 's/CONFIG_FONT_TER16x32=y/# CONFIG_FONT_TER16x32 is not set\nCONFIG_FONT_AUTOSELECT=y/' ./.config
-  fi
-
-  # Inject cpuopts options
-  echo "# CONFIG_MK8SSE3 is not set" >> ./.config
-  echo "# CONFIG_MK10 is not set" >> ./.config
-  echo "# CONFIG_MBARCELONA is not set" >> ./.config
-  echo "# CONFIG_MBOBCAT is not set" >> ./.config
-  echo "# CONFIG_MJAGUAR is not set" >> ./.config
-  echo "# CONFIG_MBULLDOZER is not set" >> ./.config
-  echo "# CONFIG_MPILEDRIVER is not set" >> ./.config
-  echo "# CONFIG_MSTEAMROLLER is not set" >> ./.config
-  echo "# CONFIG_MEXCAVATOR is not set" >> ./.config
-  echo "# CONFIG_MZEN is not set" >> ./.config
-  echo "# CONFIG_MZEN2 is not set" >> ./.config
-  echo "# CONFIG_MATOM is not set" >> ./.config
-  echo "# CONFIG_MNEHALEM is not set" >> ./.config
-  echo "# CONFIG_MWESTMERE is not set" >> ./.config
-  echo "# CONFIG_MSILVERMONT is not set" >> ./.config
-  echo "# CONFIG_MSANDYBRIDGE is not set" >> ./.config
-  echo "# CONFIG_MIVYBRIDGE is not set" >> ./.config
-  echo "# CONFIG_MHASWELL is not set" >> ./.config
-  echo "# CONFIG_MBROADWELL is not set" >> ./.config
-  echo "# CONFIG_MSKYLAKE is not set" >> ./.config
-  echo "# CONFIG_MSKYLAKEX is not set" >> ./.config
-  echo "# CONFIG_MCANNONLAKE is not set" >> ./.config
-  echo "# CONFIG_MICELAKE is not set" >> ./.config
-  echo "# CONFIG_MGOLDMONT is not set" >> ./.config
-  echo "# CONFIG_MGOLDMONTPLUS is not set" >> ./.config
-  echo "# CONFIG_MCASCADELAKE is not set" >> ./.config
-  echo "# CONFIG_MCOOPERLAKE is not set" >> ./.config
-  echo "# CONFIG_MTIGERLAKE is not set" >> ./.config
-
-  # Disable some debugging
-  if [ "${_debugdisable}" = "true" ]; then
-    sed -i -e 's/CONFIG_SLUB_DEBUG=y/# CONFIG_SLUB_DEBUG is not set/' ./.config
-    sed -i -e 's/CONFIG_PM_DEBUG=y/# CONFIG_PM_DEBUG is not set/' ./.config
-    sed -i -e 's/CONFIG_PM_ADVANCED_DEBUG=y/# CONFIG_PM_ADVANCED_DEBUG is not set/' ./.config
-    sed -i -e 's/CONFIG_PM_SLEEP_DEBUG=y/# CONFIG_PM_SLEEP_DEBUG is not set/' ./.config
-    sed -i -e 's/CONFIG_ACPI_DEBUG=y/# CONFIG_ACPI_DEBUG is not set/' ./.config
-    sed -i -e 's/CONFIG_SCHED_DEBUG=y/# CONFIG_SCHED_DEBUG is not set/' ./.config
-    sed -i -e 's/CONFIG_LATENCYTOP=y/# CONFIG_LATENCYTOP is not set/' ./.config
-    sed -i -e 's/CONFIG_DEBUG_PREEMPT=y/# CONFIG_DEBUG_PREEMPT is not set/' ./.config
-  fi
-
-  if [ "${_cpusched}" = "MuQSS" ]; then
-    # MuQSS default config
-    echo "CONFIG_SCHED_MUQSS=y" >> ./.config
-  elif [ "${_cpusched}" = "pds" ]; then
-    # PDS default config
-    echo "CONFIG_SCHED_ALT=y" >> ./.config
-    echo "CONFIG_SCHED_PDS=y" >> ./.config
-    echo "# CONFIG_SCHED_BMQ is not set" >> ./.config
-  elif [ "${_cpusched}" = "bmq" ]; then
-    # BMQ default config
-    echo "CONFIG_SCHED_ALT=y" >> ./.config
-    echo "CONFIG_SCHED_BMQ=y" >> ./.config
-    echo "# CONFIG_SCHED_PDS is not set" >> ./.config
-  fi
-
-  if [ "${_cpusched}" = "MuQSS" ] || [ "${_cpusched}" = "pds" ] || [ "${_cpusched}" = "bmq" ]; then
-    # Disable CFS
-    sed -i -e 's/CONFIG_FAIR_GROUP_SCHED=y/# CONFIG_FAIR_GROUP_SCHED is not set/' ./.config
-    sed -i -e 's/CONFIG_CFS_BANDWIDTH=y/# CONFIG_CFS_BANDWIDTH is not set/' ./.config
-    # sched yield type
-    if [ -n "$_sched_yield_type" ]; then
-      CONDITION0="$_sched_yield_type"
-    else
-      plain ""
-      plain "CPU sched_yield_type - Choose what sort of yield sched_yield will perform."
-      plain ""
-      plain "For PDS and MuQSS:"
-      plain "0: No yield."
-      plain "1: Yield only to better priority/deadline tasks."
-      plain "2: Expire timeslice and recalculate deadline."
-      plain ""
-      plain "For BMQ (experimental) - No recommended value yet, so try for yourself x) :"
-      plain "0: No yield."
-      plain "1: Deboost and requeue task. (default)"
-      plain "2: Set rq skip task."
-      if [ "${_cpusched}" = "MuQSS" ]; then
-        read -rp "`echo $'\n      0. Supposedly best option for gaming performance - could lead to stability issues on some (AMD) platforms when combined with MuQSS\n    > 1. Default and recommended option for MuQSS - could lead to stability issues on some (Intel) platforms\n      2. Can be a good option with low rr_interval on MuQSS\n    [0-2?]: '`" CONDITION0;
-      else
-        read -rp "`echo $'\n    > 0. Recommended option for gaming on PDS - "tkg" default\n      1. Default, but can lead to stability issues on some platforms\n      2. Can be a good option with low rr_interval on MuQSS\n    [0-2?]: '`" CONDITION0;
-      fi
-    fi
-    if [ "$CONDITION0" = "0" ]; then
-      if [ "${_cpusched}" = "bmq" ] || [ "${_cpusched}" = "pds" ]; then
-        sed -i -e 's/int sched_yield_type __read_mostly = 1;/int sched_yield_type __read_mostly = 0;/' ./kernel/sched/alt_core.c
-      else
-        sed -i -e 's/int sched_yield_type __read_mostly = 1;/int sched_yield_type __read_mostly = 0;/' ./kernel/sched/"${_cpusched}".c
-      fi
-    elif [ "$CONDITION0" = "1" ]; then
-      msg2 "Using default CPU sched yield type (1)"
-    elif [ "$CONDITION0" = "2" ]; then
-      if [ "${_cpusched}" = "bmq" ] || [ "${_cpusched}" = "pds" ]; then
-        sed -i -e 's/int sched_yield_type __read_mostly = 1;/int sched_yield_type __read_mostly = 2;/' ./kernel/sched/alt_core.c
-      else
-        sed -i -e 's/int sched_yield_type __read_mostly = 1;/int sched_yield_type __read_mostly = 2;/' ./kernel/sched/"${_cpusched}".c
-      fi
-    else
-      if [ "${_cpusched}" = "MuQSS" ]; then
-        msg2 "Using default CPU sched yield type (1)"
-      elif [ "${_cpusched}" = "bmq" ] || [ "${_cpusched}" = "pds" ]; then
-        sed -i -e 's/int sched_yield_type __read_mostly = 1;/int sched_yield_type __read_mostly = 0;/' ./kernel/sched/alt_core.c
-      else
-        sed -i -e 's/int sched_yield_type __read_mostly = 1;/int sched_yield_type __read_mostly = 0;/' ./kernel/sched/"${_cpusched}".c
-      fi
-    fi
-  fi
-
-  # Round Robin interval
-  if [ "${_cpusched}" = "MuQSS" ] || [ "${_cpusched}" = "pds" ] || [ "${_cpusched}" = "bmq" ]; then
-    if [ -n "$_rr_interval" ]; then
-      CONDITION1="$_rr_interval"
-    else
-      plain ""
-      plain "Round Robin interval is the longest duration two tasks with the same nice level will"
-      plain "be delayed for. When CPU time is requested by a task, it receives a time slice equal"
-      plain "to the rr_interval in addition to a virtual deadline. When using yield_type 2, a low"
-      plain "value can help offset the disadvantages of rescheduling a process that has yielded."
-      plain ""
-      plain "MuQSS default: 6ms"
-      plain "PDS default: 4ms"
-      plain "BMQ default: 2ms"
-      read -rp "`echo $'\n    > 0.Keep defaults\n      1.2ms\n      2.4ms\n      3.6ms\n      4.8ms\n    [0-4?]: '`" CONDITION1;
-    fi
-    if [ "$CONDITION1" = "1" ]; then
-      msg2 "Using 2ms rr_interval"
-      _rrvalue="2"
-    elif [ "$CONDITION1" = "2" ]; then
-      msg2 "Using 4ms rr_interval"
-      _rrvalue="4"
-    elif [ "$CONDITION1" = "3" ]; then
-      msg2 "Using 6ms rr_interval"
-      _rrvalue="6"
-    elif [ "$CONDITION1" = "4" ]; then
-      msg2 "Using 8ms rr_interval"
-      _rrvalue="8"
-    else
-      msg2 "Using default rr_interval"
-      _rrvalue="default"
-    fi
-    if [ "$_rrvalue" != "default" ]; then
-      if [ "${_cpusched}" = "MuQSS" ]; then
-        sed -i -e "s/int rr_interval __read_mostly = 6;/int rr_interval __read_mostly = ${_rrvalue};/" ./kernel/sched/"${_cpusched}".c
-      elif [ "${_cpusched}" = "bmq" ] || [ "${_cpusched}" = "pds" ]; then
-        sed -i -e "s/u64 sched_timeslice_ns __read_mostly = (4 * 1000 * 1000);/u64 sched_timeslice_ns __read_mostly = (${_rrvalue} * 1000 * 1000);/" ./kernel/sched/alt_core.c
-      fi
-    else
-      if [ "${_cpusched}" = "bmq" ] || [ "${_cpusched}" = "pds" ]; then
-        sed -i -e "s/u64 sched_timeslice_ns __read_mostly = (4 * 1000 * 1000);/u64 sched_timeslice_ns __read_mostly = (2 * 1000 * 1000);/" ./kernel/sched/alt_core.c
-      fi
-    fi
-  fi
-
-  # zenify
-  if [ "$_zenify" = "true" ]; then
-    echo "CONFIG_ZENIFY=y" >> ./.config
-  elif [ "$_zenify" = "false" ]; then
-    echo "# CONFIG_ZENIFY is not set" >> ./.config
-  fi
-
-  # compiler optimization level
-  if [ "$_compileroptlevel" = "1" ]; then
-    echo "# CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3 is not set" >> ./.config
-  elif [ "$_compileroptlevel" = "2" ]; then
-    sed -i -e 's/CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y/# CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE is not set/' ./.config
-    echo "CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y" >> ./.config
-  elif [ "$_compileroptlevel" = "3" ]; then
-    sed -i -e 's/CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y/# CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE is not set/' ./.config
-    sed -i -e 's/# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set/CONFIG_CC_OPTIMIZE_FOR_SIZE=y/' ./.config
-    echo "# CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3 is not set" >> ./.config
-  fi
-
-  # cpu opt
-  if [ -n "$_processor_opt" ] && [ "$_processor_opt" != "native" ]; then
-    echo "# CONFIG_MNATIVE is not set" >> ./.config
-  fi
-
-  if [ -n "$_processor_opt" ] && [ "$_processor_opt" != "generic" ]; then
-    sed -i -e 's/CONFIG_GENERIC_CPU=y/# CONFIG_GENERIC_CPU is not set/' ./.config
-  fi
-
-  if [ "$_processor_opt" = "native" ]; then
-    echo "CONFIG_MNATIVE=y" >> ./.config
-  elif [ "$_processor_opt" = "k8" ]; then
-    sed -i -e 's/# CONFIG_MK8 is not set/CONFIG_MK8=y/' ./.config
-  elif [ "$_processor_opt" = "k8sse3" ]; then
-    sed -i -e 's/# CONFIG_MK8SSE3 is not set/CONFIG_MK8SSE3=y/' ./.config
-  elif [ "$_processor_opt" = "k10" ]; then
-    sed -i -e 's/# CONFIG_MK10 is not set/CONFIG_MK10=y/' ./.config
-  elif [ "$_processor_opt" = "barcelona" ]; then
-    sed -i -e 's/# CONFIG_MBARCELONA is not set/CONFIG_MBARCELONA=y/' ./.config
-  elif [ "$_processor_opt" = "bobcat" ]; then
-    sed -i -e 's/# CONFIG_MBOBCAT is not set/CONFIG_MBOBCAT=y/' ./.config
-  elif [ "$_processor_opt" = "jaguar" ]; then
-    sed -i -e 's/# CONFIG_MJAGUAR is not set/CONFIG_MJAGUAR=y/' ./.config
-  elif [ "$_processor_opt" = "bulldozer" ]; then
-    sed -i -e 's/# CONFIG_MBULLDOZER is not set/CONFIG_MBULLDOZER=y/' ./.config
-  elif [ "$_processor_opt" = "piledriver" ]; then
-    sed -i -e 's/# CONFIG_MPILEDRIVER is not set/CONFIG_MPILEDRIVER=y/' ./.config
-  elif [ "$_processor_opt" = "steamroller" ]; then
-    sed -i -e 's/# CONFIG_MSTEAMROLLER is not set/CONFIG_MSTEAMROLLER=y/' ./.config
-  elif [ "$_processor_opt" = "excavator" ]; then
-    sed -i -e 's/# CONFIG_MEXCAVATOR is not set/CONFIG_MEXCAVATOR=y/' ./.config
-  elif [ "$_processor_opt" = "zen" ]; then
-    sed -i -e 's/# CONFIG_MZEN is not set/CONFIG_MZEN=y/' ./.config
-  elif [ "$_processor_opt" = "zen2" ]; then
-    sed -i -e 's/# CONFIG_MZEN2 is not set/CONFIG_MZEN2=y/' ./.config
-  elif [ "$_processor_opt" = "mpsc" ]; then
-    sed -i -e 's/# CONFIG_MPSC is not set/CONFIG_MPSC=y/' ./.config
-  elif [ "$_processor_opt" = "atom" ]; then
-    sed -i -e 's/# CONFIG_MATOM is not set/CONFIG_MATOM=y/' ./.config
-  elif [ "$_processor_opt" = "core2" ]; then
-    sed -i -e 's/# CONFIG_MCORE2 is not set/CONFIG_MCORE2=y/' ./.config
-  elif [ "$_processor_opt" = "nehalem" ]; then
-    sed -i -e 's/# CONFIG_MNEHALEM is not set/CONFIG_MNEHALEM=y/' ./.config
-  elif [ "$_processor_opt" = "westmere" ]; then
-    sed -i -e 's/# CONFIG_MWESTMERE is not set/CONFIG_MWESTMERE=y/' ./.config
-  elif [ "$_processor_opt" = "silvermont" ]; then
-    sed -i -e 's/# CONFIG_MSILVERMONT is not set/CONFIG_MSILVERMONT=y/' ./.config
-  elif [ "$_processor_opt" = "sandybridge" ]; then
-    sed -i -e 's/# CONFIG_MSANDYBRIDGE is not set/CONFIG_MSANDYBRIDGE=y/' ./.config
-  elif [ "$_processor_opt" = "ivybridge" ]; then
-    sed -i -e 's/# CONFIG_MIVYBRIDGE is not set/CONFIG_MIVYBRIDGE=y/' ./.config
-  elif [ "$_processor_opt" = "haswell" ]; then
-    sed -i -e 's/# CONFIG_MHASWELL is not set/CONFIG_MHASWELL=y/' ./.config
-  elif [ "$_processor_opt" = "broadwell" ]; then
-    sed -i -e 's/# CONFIG_MBROADWELL is not set/CONFIG_MBROADWELL=y/' ./.config
-  elif [ "$_processor_opt" = "skylake" ]; then
-    sed -i -e 's/# CONFIG_MSKYLAKE is not set/CONFIG_MSKYLAKE=y/' ./.config
-  elif [ "$_processor_opt" = "skylakex" ]; then
-    sed -i -e 's/# CONFIG_MSKYLAKEX is not set/CONFIG_MSKYLAKEX=y/' ./.config
-  elif [ "$_processor_opt" = "cannonlake" ]; then
-    sed -i -e 's/# CONFIG_MCANNONLAKE is not set/CONFIG_MCANNONLAKE=y/' ./.config
-  elif [ "$_processor_opt" = "icelake" ]; then
-    sed -i -e 's/# CONFIG_MICELAKE is not set/CONFIG_MICELAKE=y/' ./.config
-  elif [ "$_processor_opt" = "goldmont" ]; then
-    sed -i -e 's/# CONFIG_MGOLDMONT is not set/CONFIG_MGOLDMONT=y/' ./.config
-  elif [ "$_processor_opt" = "goldmontplus" ]; then
-    sed -i -e 's/# CONFIG_MGOLDMONTPLUS is not set/CONFIG_MGOLDMONTPLUS=y/' ./.config
-  elif [ "$_processor_opt" = "cascadelake" ]; then
-    sed -i -e 's/# CONFIG_MCASCADELAKE is not set/CONFIG_MCASCADELAKE=y/' ./.config
-  elif [ "$_processor_opt" = "cooperlake" ]; then
-    sed -i -e 's/# CONFIG_MCOOPERLAKE is not set/CONFIG_MCOOPERLAKE=y/' ./.config
-  elif [ "$_processor_opt" = "tigerlake" ]; then
-    sed -i -e 's/# CONFIG_MTIGERLAKE is not set/CONFIG_MTIGERLAKE=y/' ./.config
-  fi
-
-  # irq threading
-  if [ "$_irq_threading" = "true" ]; then
-    echo "CONFIG_FORCE_IRQ_THREADING=y" >> ./.config
-  elif [ "$_irq_threading" = "false" ]; then
-    echo "# CONFIG_FORCE_IRQ_THREADING is not set" >> ./.config
-  fi
-
-  # smt nice
-  if [ "$_smt_nice" = "true" ]; then
-    echo "CONFIG_SMT_NICE=y" >> ./.config
-  elif [ "$_smt_nice" = "false" ]; then
-    echo "# CONFIG_SMT_NICE is not set" >> ./.config
-  fi
-
-  # random trust cpu
-  if [ "$_random_trust_cpu" = "true" ]; then
-    sed -i -e 's/# CONFIG_RANDOM_TRUST_CPU is not set/CONFIG_RANDOM_TRUST_CPU=y/' ./.config
-  fi
-
-  # rq sharing
-  if [ "$_runqueue_sharing" = "none" ]; then
-    echo -e "CONFIG_RQ_NONE=y\n# CONFIG_RQ_SMT is not set\n# CONFIG_RQ_MC is not set\n# CONFIG_RQ_MC_LLC is not set\n# CONFIG_RQ_SMP is not set\n# CONFIG_RQ_ALL is not set" >> ./.config
-  elif [ -z "$_runqueue_sharing" ] || [ "$_runqueue_sharing" = "smt" ]; then
-    echo -e "# CONFIG_RQ_NONE is not set\nCONFIG_RQ_SMT=y\n# CONFIG_RQ_MC is not set\n# CONFIG_RQ_MC_LLC is not set\n# CONFIG_RQ_SMP is not set\n# CONFIG_RQ_ALL is not set" >> ./.config
-  elif [ "$_runqueue_sharing" = "mc" ]; then
-    echo -e "# CONFIG_RQ_NONE is not set\n# CONFIG_RQ_SMT is not set\nCONFIG_RQ_MC=y\n# CONFIG_RQ_MC_LLC is not set\n# CONFIG_RQ_SMP is not set\n# CONFIG_RQ_ALL is not set" >> ./.config
-  elif [ "$_runqueue_sharing" = "smp" ]; then
-    echo -e "# CONFIG_RQ_NONE is not set\n# CONFIG_RQ_SMT is not set\n# CONFIG_RQ_MC is not set\n# CONFIG_RQ_MC_LLC is not set\nCONFIG_RQ_SMP=y\n# CONFIG_RQ_ALL is not set" >> ./.config
-  elif [ "$_runqueue_sharing" = "all" ]; then
-    echo -e "# CONFIG_RQ_NONE is not set\n# CONFIG_RQ_SMT is not set\n# CONFIG_RQ_MC is not set\n# CONFIG_RQ_MC_LLC is not set\n# CONFIG_RQ_SMP is not set\nCONFIG_RQ_ALL=y" >> ./.config
-  elif [ "$_runqueue_sharing" = "mc-llc" ]; then
-    echo -e "# CONFIG_RQ_NONE is not set\n# CONFIG_RQ_SMT is not set\n# CONFIG_RQ_MC is not set\nCONFIG_RQ_MC_LLC=y\n# CONFIG_RQ_SMP is not set\n# CONFIG_RQ_ALL is not set" >> ./.config
-  fi
-
-  # timer freq
-  if [ -n "$_timer_freq" ] && [ "$_timer_freq" != "300" ]; then
-    sed -i -e 's/CONFIG_HZ_300=y/# CONFIG_HZ_300 is not set/' ./.config
-    sed -i -e 's/CONFIG_HZ_300_NODEF=y/# CONFIG_HZ_300_NODEF is not set/' ./.config
-    if [ "$_timer_freq" = "1000" ]; then
-      sed -i -e 's/# CONFIG_HZ_1000 is not set/CONFIG_HZ_1000=y/' ./.config
-      sed -i -e 's/CONFIG_HZ=300/CONFIG_HZ=1000/' ./.config
-      echo "# CONFIG_HZ_500 is not set" >> ./.config
-      echo "# CONFIG_HZ_500_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_750 is not set" >> ./.config
-      echo "# CONFIG_HZ_750_NODEF is not set" >> ./.config
-      echo "CONFIG_HZ_1000_NODEF=y" >> ./.config
-      echo "# CONFIG_HZ_250_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_300_NODEF is not set" >> ./.config
-    elif [ "$_timer_freq" = "750" ]; then
-      sed -i -e 's/CONFIG_HZ=300/CONFIG_HZ=750/' ./.config
-      echo "# CONFIG_HZ_500 is not set" >> ./.config
-      echo "# CONFIG_HZ_500_NODEF is not set" >> ./.config
-      echo "CONFIG_HZ_750=y" >> ./.config
-      echo "CONFIG_HZ_750_NODEF=y" >> ./.config
-      echo "# CONFIG_HZ_1000_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_250_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_300_NODEF is not set" >> ./.config
-    elif [ "$_timer_freq" = "500" ]; then
-      sed -i -e 's/CONFIG_HZ=300/CONFIG_HZ=500/' ./.config
-      echo "CONFIG_HZ_500=y" >> ./.config
-      echo "CONFIG_HZ_500_NODEF=y" >> ./.config
-      echo "# CONFIG_HZ_750 is not set" >> ./.config
-      echo "# CONFIG_HZ_750_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_1000_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_250_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_300_NODEF is not set" >> ./.config
-    elif [ "$_timer_freq" = "100" ]; then
-      sed -i -e 's/CONFIG_HZ=300/CONFIG_HZ=100/' ./.config
-      echo "# CONFIG_HZ_500 is not set" >> ./.config
-      echo "# CONFIG_HZ_750 is not set" >> ./.config
-      echo "# CONFIG_HZ_1000_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_750_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_500_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_250_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_300_NODEF is not set" >> ./.config
-      echo "CONFIG_HZ_100=y" >> ./.config
-      echo "CONFIG_HZ_100_NODEF=y" >> ./.config
-    fi
-  elif [ "${_cpusched}" = "MuQSS" ] && [ -z "$_timer_freq" ]; then
-      sed -i -e 's/CONFIG_HZ=300/CONFIG_HZ=100/' ./.config
-      echo "# CONFIG_HZ_500 is not set" >> ./.config
-      echo "# CONFIG_HZ_750 is not set" >> ./.config
-      echo "# CONFIG_HZ_1000_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_750_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_500_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_250_NODEF is not set" >> ./.config
-      echo "# CONFIG_HZ_300_NODEF is not set" >> ./.config
-      echo "CONFIG_HZ_100=y" >> ./.config
-      echo "CONFIG_HZ_100_NODEF=y" >> ./.config
-  else
-    sed -i -e 's/CONFIG_HZ_300=y/# CONFIG_HZ_300 is not set/' ./.config
-    sed -i -e 's/CONFIG_HZ_300_NODEF=y/# CONFIG_HZ_300_NODEF is not set/' ./.config
-    sed -i -e 's/CONFIG_HZ=300/CONFIG_HZ=500/' ./.config
-    echo "CONFIG_HZ_500=y" >> ./.config
-    echo "CONFIG_HZ_500_NODEF=y" >> ./.config
-    echo "# CONFIG_HZ_250_NODEF is not set" >> ./.config
-    echo "# CONFIG_HZ_300_NODEF is not set" >> ./.config
-  fi
-
-  # default cpu gov
-  if [ "$_default_cpu_gov" = "performance" ]; then
-    sed -i -e 's/CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL=y/# CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL is not set/' ./.config
-    sed -i -e 's/# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set/CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y/' ./.config
-  elif [ "$_default_cpu_gov" = "ondemand" ]; then
-    sed -i -e 's/CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL=y/# CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL is not set/' ./.config
-    sed -i -e 's/# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set/CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y/' ./.config
-  fi
-
-  # ACPI_CPUFREQ disablement
-  if [ "$_disable_acpi_cpufreq" = "true" ]; then
-    sed -i -e 's/CONFIG_X86_ACPI_CPUFREQ=m/# CONFIG_X86_ACPI_CPUFREQ is not set/' ./.config
-  fi
-
-  # ftrace
-  if [ -z "$_ftracedisable" ]; then
-    plain ""
-    plain "Disable FUNCTION_TRACER/GRAPH_TRACER? Lowers overhead but limits debugging"
-    plain "and analyzing of kernel functions."
-    read -rp "`echo $'    > N/y : '`" CONDITION2;
-  fi
-  if [[ "$CONDITION2" =~ [yY] ]] || [ "$_ftracedisable" = "true" ]; then
-    sed -i -e 's/CONFIG_FUNCTION_TRACER=y/# CONFIG_FUNCTION_TRACER is not set/' ./.config
-    sed -i -e 's/CONFIG_FUNCTION_GRAPH_TRACER=y/# CONFIG_FUNCTION_GRAPH_TRACER is not set/' ./.config
-  fi
-
-  # disable numa
-  if [ -z "$_numadisable" ]; then
-    plain ""
-    plain "Disable NUMA? Lowers overhead, but breaks CUDA/NvEnc on Nvidia if disabled."
-    plain "https://bbs.archlinux.org/viewtopic.php?id=239174"
-    read -rp "`echo $'    > N/y : '`" CONDITION3;
-  fi
-  if [[ "$CONDITION3" =~ [yY] ]] || [ "$_numadisable" = "true" ]; then
-    # disable NUMA since 99.9% of users do not have multiple CPUs but do have multiple cores in one CPU
-    sed -i -e 's/CONFIG_NUMA=y/# CONFIG_NUMA is not set/' \
-        -i -e '/CONFIG_AMD_NUMA=y/d' \
-        -i -e '/CONFIG_X86_64_ACPI_NUMA=y/d' \
-        -i -e '/CONFIG_NODES_SPAN_OTHER_NODES=y/d' \
-        -i -e '/# CONFIG_NUMA_EMU is not set/d' \
-        -i -e '/CONFIG_NODES_SHIFT=6/d' \
-        -i -e '/CONFIG_NEED_MULTIPLE_NODES=y/d' \
-        -i -e '/CONFIG_USE_PERCPU_NUMA_NODE_ID=y/d' \
-        -i -e '/CONFIG_ACPI_NUMA=y/d' ./.config
-  fi
-
-  # tickless
-  if [ -z "$_tickless" ]; then
-    plain ""
-    plain "Use CattaRappa mode (Tickless/Dynticks) ?"
-    plain "Can give higher performances in many cases but lower consistency on some hardware."
-    plain "Just tickless idle can perform better with some platforms (mostly AMD) or CPU schedulers (mostly MuQSS)."
-    if [ "${_cpusched}" = "MuQSS" ]; then
-      read -rp "`echo $'\n      0.No, use periodic ticks\n      1.Yes, full tickless baby!\n    > 2.Just tickless idle plz\n    [0-2?]: '`" CONDITION4;
-    else
-      read -rp "`echo $'\n      0.No, use periodic ticks\n    > 1.Yes, full tickless baby!\n      2.Just tickless idle plz\n    [0-2?]: '`" CONDITION4;
-    fi
-  fi
-  if [ "$CONDITION4" = "0" ] || [ "$_tickless" = "0" ]; then
-    echo "# CONFIG_NO_HZ_FULL_NODEF is not set" >> ./.config
-    sed -i -e 's/# CONFIG_HZ_PERIODIC is not set/CONFIG_HZ_PERIODIC=y/' ./.config
-    sed -i -e 's/CONFIG_NO_HZ_IDLE=y/# CONFIG_NO_HZ_IDLE is not set/' ./.config
-    sed -i -e 's/CONFIG_NO_HZ_FULL=y/# CONFIG_NO_HZ_FULL is not set/' ./.config
-    sed -i -e 's/CONFIG_NO_HZ=y/# CONFIG_NO_HZ is not set/' ./.config
-    sed -i -e 's/CONFIG_NO_HZ_COMMON=y/# CONFIG_NO_HZ_COMMON is not set/' ./.config
-  elif [ "$CONDITION4" = "2" ] || [ "$_tickless" = "2" ]; then
-    echo "# CONFIG_NO_HZ_FULL_NODEF is not set" >> ./.config
-    sed -i -e 's/CONFIG_HZ_PERIODIC=y/# CONFIG_HZ_PERIODIC is not set/' ./.config
-    sed -i -e 's/# CONFIG_NO_HZ_IDLE is not set/CONFIG_NO_HZ_IDLE=y/' ./.config
-    sed -i -e 's/CONFIG_NO_HZ_FULL=y/# CONFIG_NO_HZ_FULL is not set/' ./.config
-    sed -i -e 's/# CONFIG_NO_HZ is not set/CONFIG_NO_HZ=y/' ./.config
-    sed -i -e 's/# CONFIG_NO_HZ_COMMON is not set/CONFIG_NO_HZ_COMMON=y/' ./.config
-  else
-    if [ "${_cpusched}" = "MuQSS" ]; then
-      echo "# CONFIG_NO_HZ_FULL_NODEF is not set" >> ./.config
-      sed -i -e 's/CONFIG_HZ_PERIODIC=y/# CONFIG_HZ_PERIODIC is not set/' ./.config
-      sed -i -e 's/# CONFIG_NO_HZ_IDLE is not set/CONFIG_NO_HZ_IDLE=y/' ./.config
-      sed -i -e 's/CONFIG_NO_HZ_FULL=y/# CONFIG_NO_HZ_FULL is not set/' ./.config
-      sed -i -e 's/# CONFIG_NO_HZ is not set/CONFIG_NO_HZ=y/' ./.config
-      sed -i -e 's/# CONFIG_NO_HZ_COMMON is not set/CONFIG_NO_HZ_COMMON=y/' ./.config
-    else
-      echo "CONFIG_NO_HZ_FULL_NODEF=y" >> ./.config
-      sed -i -e 's/CONFIG_HZ_PERIODIC=y/# CONFIG_HZ_PERIODIC is not set/' ./.config
-      sed -i -e 's/CONFIG_NO_HZ_IDLE=y/# CONFIG_NO_HZ_IDLE is not set/' ./.config
-      sed -i -e 's/# CONFIG_NO_HZ_FULL is not set/CONFIG_NO_HZ_FULL=y/' ./.config
-      sed -i -e 's/# CONFIG_NO_HZ is not set/CONFIG_NO_HZ=y/' ./.config
-      sed -i -e 's/# CONFIG_NO_HZ_COMMON is not set/CONFIG_NO_HZ_COMMON=y/' ./.config
-      echo "CONFIG_CONTEXT_TRACKING=y" >> ./.config
-      echo "# CONFIG_CONTEXT_TRACKING_FORCE is not set" >> ./.config
-    fi
-  fi
-
-  # voluntary preempt
-  if [ -z "$_voluntary_preempt" ]; then
-    plain ""
-    plain "Use explicit preemption points?"
-    plain "It can improve latency on PDS (at the cost of throughput)"
-    plain "and improve throughput on other schedulers (at the cost of latency)"
-    read -rp "`echo $'    > N/y : '`" CONDITION5;
-  fi
-  if [[ "$CONDITION5" =~ [yY] ]] || [ "$_voluntary_preempt" = "true" ]; then
-    sed -i -e 's/CONFIG_PREEMPT=y/# CONFIG_PREEMPT is not set/' ./.config
-    sed -i -e 's/CONFIG_PREEMPT_LL=y/# CONFIG_PREEMPT_LL is not set/' ./.config
-    sed -i -e 's/# CONFIG_PREEMPT_VOLUNTARY is not set/CONFIG_PREEMPT_VOLUNTARY=y/' ./.config
-  fi
-
-  # Open Firmware support
-  if [ -z "$_OFenable" ]; then
-    plain ""
-    plain "Enable Device Tree and Open Firmware support?"
-    read -rp "`echo $'    > N/y : '`" CONDITION6;
-  fi
-  if [[ "$CONDITION6" =~ [yY] ]] || [ "$_OFenable" = "true" ]; then
-    sed -i -e 's/# CONFIG_OF is not set/CONFIG_OF=y/' ./.config
-  fi
-
-  # acs override
-  if [ -z "$_acs_override" ]; then
-    plain ""
-    plain "Use ACS override patch?"
-    plain "https://wiki.archlinux.org/index.php/PCI_passthrough_via_OVMF#Bypassing_the_IOMMU_groups_.28ACS_override_patch.29"
-    read -rp "`echo $'    > N/y : '`" CONDITION7;
-  fi
-  if [[ "$CONDITION7" =~ [yY] ]] || [ "$_acs_override" = "true" ]; then
-    msg2 "Patching ACS override"
-    patch -Np1 -i "$srcdir"/0006-add-acs-overrides_iommu.patch
-  fi
-
-  # bcachefs
-  if [ -z "$_bcachefs" ]; then
-     plain ""
-     plain "Add Bcache filesystem support? You'll have to install bcachefs-tools-git from AUR for utilities."
-     plain "https://bcachefs.org/"
-     read -rp "`echo $'    > N/y : '`" CONDITION8;
-  fi
-  if [[ "$CONDITION8" =~ [yY] ]] || [ "$_bcachefs" = "true" ]; then
-     msg2 "Patching Bcache filesystem support override"
-     patch -Np1 -i "$srcdir"/0008-5.9-bcachefs.patch
-     echo "CONFIG_BCACHEFS_FS=m" >> ./.config
-     echo "CONFIG_BCACHEFS_QUOTA=y" >> ./.config
-     echo "CONFIG_BCACHEFS_POSIX_ACL=y" >> ./.config
-     echo "# CONFIG_BCACHEFS_DEBUG is not set" >> ./.config
-     echo "# CONFIG_BCACHEFS_TESTS is not set" >> ./.config
-     echo "# CONFIG_DEBUG_CLOSURES is not set" >> ./.config
-  fi
-
-  # fsync support
-  if [ -z "$_fsync" ]; then
-    plain ""
-    plain "Enable support for fsync, an experimental replacement for esync in Valve Proton 4.11+"
-    plain "https://steamcommunity.com/games/221410/announcements/detail/2957094910196249305"
-    read -rp "`echo $'    > N/y : '`" CONDITION9;
-  fi
-  if [[ "$CONDITION9" =~ [yY] ]] || [ "$_fsync" = "true" ]; then
-    msg2 "Patching Fsync support"
-    patch -Np1 -i "$srcdir"/0007-v5.9-fsync.patch
-  fi
-
-  # ZFS fix
-  if [ -z "$_zfsfix" ]; then
-    plain ""
-    plain "Add back missing symbol for AES-NI/AVX support on ZFS"
-    plain "https://github.com/NixOS/nixpkgs/blob/master/pkgs/os-specific/linux/kernel/export_kernel_fpu_functions_5_3.patch"
-    read -rp "`echo $'    > N/y : '`" CONDITION11;
-  fi
-  if [[ "$CONDITION11" =~ [yY] ]] || [ "$_zfsfix" = "true" ]; then
-    msg2 "Patching missing symbol for AES-NI/AVX support on ZFS"
-    patch -Np1 -i "$srcdir"/0011-ZFS-fix.patch
-  fi
-
-  # Community patches
-  if [ -n "$_community_patches" ]; then
-    if [ ! -d "$_where/../../community-patches" ]; then
-      cd "$_where/../.." && git clone https://github.com/Frogging-Family/community-patches.git && cd "${srcdir}/${_srcpath}"
-    fi
-    _community_patches=($_community_patches)
-    for _p in ${_community_patches[@]}; do
-      ln -s "$_where"/../../community-patches/linux"$_basever"-tkg/$_p "$_where"/
-    done
-  fi
-
-  # userpatches
-  if [ "$_user_patches" = "true" ]; then
-    _userpatch_target="linux-${_basekernel}"
-    _userpatch_ext="my"
-    user_patcher
-  fi
-
-  # Community patches removal
-  for _p in ${_community_patches[@]}; do
-    rm -f "$_where"/$_p
-  done
-
-  if [ "$_distro" = "Arch" ]; then
-    # don't run depmod on 'make install'. We'll do this ourselves in packaging
-    sed -i '2iexit 0' scripts/depmod.sh
-
-    # get kernel version
-    make prepare
-  fi
-
-  # modprobed-db
-  if [ -z "$_modprobeddb" ]; then
-    plain ""
-    plain "Use modprobed db to clean config from unneeded modules?"
-    plain "Speeds up compilation considerably. Requires root."
-    plain "https://wiki.archlinux.org/index.php/Modprobed-db"
-    plain "!!!! Make sure to have a well populated db !!!!"
-    read -rp "`echo $'    > N/y : '`" CONDITIONMPDB;
-  fi
-  if [[ "$CONDITIONMPDB" =~ [yY] ]] || [ "$_modprobeddb" = "true" ]; then
-    sudo modprobed-db recall
-    yes "" | make localmodconfig
-  fi
-
-  if [ true = "$_config_fragments" ]; then
-    local fragments=()
-    mapfile -d '' -t fragments < <(find "$_where"/ -type f -name "*.myfrag" -print0)
-
-    if [ true = "$_config_fragments_no_confirm" ]; then
-      printf 'Using config fragment %s\n' "${fragments[@]#$_where/}"
-    else
-      for i in "${!fragments[@]}"; do
-        while true; do
-          read -r -p 'Found config fragment '"${fragments[$i]#$_where/}"', apply it? [y/N] ' CONDITIONMPDB
-          CONDITIONMPDB="$(printf '%s' "$CONDITIONMPDB" | tr '[:upper:]' '[:lower:]')"
-          case "$CONDITIONMPDB" in
-            y|yes)
-              break;;
-            n|no|'')
-              unset fragments[$i]
-              break;;
-            *)
-              echo 'Please answer with yes or no'
-          esac
-        done
-      done
-    fi
-
-    if [ 0 -lt "${#fragments[@]}" ]; then
-      scripts/kconfig/merge_config.sh -m .config "${fragments[@]}"
-    fi
-  fi
-
-  # menuconfig / nconfig
-  if [ -z "$_menunconfig" ]; then
-    plain ""
-    plain "*Optional* For advanced users - Do you want to use make menuconfig or nconfig"
-    plain "to configure the kernel before building it?"
-    plain "If you do, make sure your terminal is currently"
-    plain "at least 19 lines by 80 columns large or you'll get an error :D"
-    read -rp "`echo $'    > 0. nope\n      1. menuconfig\n      2. nconfig\n      3. xconfig\n      choice[0-3?]: '`" CONDITIONMNC;
-    _menunconfig="$CONDITIONMNC"
-  fi
-  if [ 1 = "$_menunconfig" ]; then
-    cp .config .config.orig
-    make menuconfig
-  elif [ 2 = "$_menunconfig" ]; then
-    cp .config .config.orig
-    make nconfig
-  elif [ 3 = "$_menunconfig" ]; then
-    cp .config .config.orig
-    make xconfig
-  else
-    # rewrite configuration
-    yes "" | make config >/dev/null
-  fi
-  if [ 1 = "$_menunconfig" ] || [ 2 = "$_menunconfig" ] || [ 3 = "$_menunconfig" ]; then
-    if [ -z "${_diffconfig}" ]; then
-      while true; do
-        read -r -p 'Generate a config fragment from your changes? [y/N] ' CONDITIONF
-        CONDITIONF="$(printf '%s' "$CONDITIONF" | tr '[:upper:]' '[:lower:]')"
-        case "$CONDITIONF" in
-          y|yes)
-            _diffconfig=true
-            break;;
-          n|no|'')
-            _diffconfig=false
-            break;;
-          *)
-            echo 'Please answer with yes or no'
-        esac
-      done
-    fi
-    if [ true = "$_diffconfig" ]; then
-      if [ -z "$_diffconfig_name" ]; then
-        IFS= read -r -p 'Filename for the config fragment [leave empty to not generate fragment]: ' _diffconfig_name
-      fi
-      if [ -z "$_diffconfig_name" ]; then
-        echo 'No file name given, not generating config fragment.'
-      else (
-        prev_pwd="${PWD:-$(pwd)}"
-        cd "$_where"
-        "${prev_pwd}/scripts/diffconfig" -m "${prev_pwd}/.config.orig" "${prev_pwd}/.config" > "$_diffconfig_name"
-      ) fi
-    fi
-    rm .config.orig
-  fi
-
-  if [ "$_distro" = "Arch" ]; then
-    make -s kernelrelease > version
-    msg2 "Prepared %s version %s" "$pkgbase" "$(<version)"
-  fi
-}
-
-exit_cleanup() {
-  # Remove state tracker
-  rm -f "$_where"/cpuschedset
-
-  # Remove temporarily copied files
-  rm -rf "$_where"/*.patch
-  rm -rf "$_where"/*-profile.cfg
-  rm -f "$_where"/config*
-  rm -f "$_where"/*.hook
-  rm -f "$_where"/cleanup
-  rm -f "$_where"/prepare
-
-  # Community patches removal in case of failure
-  for _p in ${_community_patches[@]}; do
-    rm -f "$_where"/"$_p"
-  done
-
-  if [ "${_distro}" = "Arch" ]; then
-    if [ "$_NUKR" = "true" ] && [ "$_where" != "$srcdir" ]; then
-      rm -rf "$_where"/src/*
-      # Double tap
-      rm -rf "$srcdir"/linux-*
-      rm -rf "$srcdir"/*.xz
-      rm -rf "$srcdir"/*.patch
-      rm -rf "$srcdir"/*-profile.cfg
-      rm -f "$srcdir"/config.x86_64
-      rm -f "$srcdir"/customization.cfg
-    else
-      # Meh
-      rm -rf "$srcdir"/linux-${_basekernel}/Documentation/filesystems/aufs/*
-      rm -f "$srcdir"/linux-${_basekernel}/Documentation/ABI/testing/*-aufs
-      rm -rf "$srcdir"/linux-${_basekernel}/fs/aufs/*
-      rm -f "$srcdir"/linux-${_basekernel}/include/uapi/linux/aufs*
-
-      rm -f "$srcdir"/linux-${_basekernel}/mm/prfile.c
-
-      rm -f "$srcdir"/linux-${_basekernel}/block/bfq*
-
-      rm -rf "$srcdir"/linux-${_basekernel}/drivers/scsi/vhba/*
-
-      rm -rf "$srcdir"/linux-${_basekernel}/fs/exfat/*
-      rm -f "$srcdir"/linux-${_basekernel}/include/trace/events/fs.h
-
-      rm -f "$srcdir"/linux-${_basekernel}/Documentation/scheduler/sched-PDS-mq.txt
-      rm -f "$srcdir"/linux-${_basekernel}/include/linux/skip_list.h
-      rm -f "$srcdir"/linux-${_basekernel}/kernel/sched/pds.c
-      rm -f "$srcdir"/linux-${_basekernel}/kernel/sched/pds_sched.h
-
-      rm -f "$srcdir"/linux-${_basekernel}/Documentation/scheduler/sched-BMQ.txt
-      rm -f "$srcdir"/linux-${_basekernel}/kernel/sched/alt_core.c
-      rm -f "$srcdir"/linux-${_basekernel}/kernel/sched/sched/alt_debug.c
-      rm -f "$srcdir"/linux-${_basekernel}/kernel/sched/alt_sched.h
-
-      rm -f "$srcdir"/linux-${_basekernel}/Documentation/scheduler/sched-BFS.txt
-      rm -f "$srcdir"/linux-${_basekernel}/Documentation/scheduler/sched-MuQSS.txt
-      rm -rf "$srcdir"/linux-${_basekernel}/arch/blackfin/*
-      rm -f "$srcdir"/linux-${_basekernel}/arch/powerpc/configs/c2k_defconfig
-      rm -f "$srcdir"/linux-${_basekernel}/arch/score/configs/spct6600_defconfig
-      rm -f "$srcdir"/linux-${_basekernel}/arch/tile/configs/tilegx_defconfig
-      rm -f "$srcdir"/linux-${_basekernel}/arch/tile/configs/tilepro_defconfig
-      rm -f "$srcdir"/linux-${_basekernel}/drivers/staging/lustre/lnet/lnet/lib-eq.c
-      rm -f "$srcdir"/linux-${_basekernel}/kernel/sched/MuQSS*
-      rm -f "$srcdir"/linux-${_basekernel}/kernel/skip_list.c
-
-      rm -f "$srcdir"/linux-${_basekernel}/Documentation/vm/uksm.txt
-      rm -f "$srcdir"/linux-${_basekernel}/include/linux/sradix-tree.h
-      rm -f "$srcdir"/linux-${_basekernel}/include/linux/uksm.h
-      rm -f "$srcdir"/linux-${_basekernel}/lib/sradix-tree.c
-      rm -f "$srcdir"/linux-${_basekernel}/mm/uksm.c
-    fi
-
-    remove_deps
-  fi
-
-  msg2 'exit cleanup done\n'
-  if [ -n "$_runtime" ]; then
-    msg2 "compilation time : \n$_runtime"
-  fi
-}
-
-trap exit_cleanup EXIT
diff --git a/linux59-tkg/linux59-tkg-config/ryzen-desktop-profile.cfg b/linux59-tkg/linux59-tkg-config/ryzen-desktop-profile.cfg
deleted file mode 100644
index 510d06e..0000000
--- a/linux59-tkg/linux59-tkg-config/ryzen-desktop-profile.cfg
+++ /dev/null
@@ -1,38 +0,0 @@
-# linux59-TkG config file
-# Ryzen Desktop
-
-
-#### KERNEL OPTIONS ####
-
-# Disable some non-module debugging - See PKGBUILD for the list
-_debugdisable="false"
-
-# LEAVE AN EMPTY VALUE TO BE PROMPTED ABOUT FOLLOWING OPTIONS AT BUILD TIME
-
-# Set to "true" to disable FUNCTION_TRACER/GRAPH_TRACER, lowering overhead but limiting debugging and analyzing of kernel functions - Kernel default is "false"
-_ftracedisable="false"
-
-# Set to "true" to disable NUMA, lowering overhead, but breaking CUDA/NvEnc on Nvidia equipped systems - Kernel default is "false"
-_numadisable="false"
-
-# Set to "true" to use explicit preemption points to lower latency at the cost of a small throughput loss - Can give a nice perf boost in VMs - Kernel default is "false"
-_voluntary_preempt="false"
-
-# A selection of patches from Zen/Liquorix kernel and additional tweaks for a better gaming experience (ZENIFY) - Default is "true"
-_zenify="true"
-
-# compiler optimization level - 1. Optimize for performance (-O2); 2. Optimize harder (-O3); 3. Optimize for size (-Os) - Kernel default is "2"
-_compileroptlevel="1"
-
-# Trust the CPU manufacturer to initialize Linux's CRNG (RANDOM_TRUST_CPU) - Kernel default is "false"
-_random_trust_cpu="false"
-
-# CPU scheduler runqueue sharing - No sharing (RQ_NONE), SMT (hyperthread) siblings (RQ_SMT), Multicore siblings (RQ_MC), Symmetric Multi-Processing (RQ_SMP), NUMA (RQ_ALL)
-# Valid values are "none", "smt", "mc", "mc-llc"(for zen), "smp", "all" - Kernel default is "mc"
-_runqueue_sharing="mc-llc"
-
-# Timer frequency - "500", "750" or "1000" - More options available in kernel config prompt when left empty depending on selected cpusched - Kernel default is "500"
-_timer_freq="500"
-
-# Default CPU governor - "performance", "ondemand" (tweaked), "schedutil" or leave empty for default (schedutil on AMD and legacy Intel, intel_pstate on modern Intel) - Enforcing an option will disable intel_pstate altogether!
-_default_cpu_gov="performance"
diff --git a/linux59-tkg/linux59-tkg-patches/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch b/linux59-tkg/linux59-tkg-patches/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch
deleted file mode 100644
index 83240cb..0000000
--- a/linux59-tkg/linux59-tkg-patches/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch
+++ /dev/null
@@ -1,156 +0,0 @@
-From 5ec2dd3a095442ec1a21d86042a4994f2ba24e63 Mon Sep 17 00:00:00 2001
-Message-Id: <5ec2dd3a095442ec1a21d86042a4994f2ba24e63.1512651251.git.jan.steffens@gmail.com>
-From: Serge Hallyn <serge.hallyn@canonical.com>
-Date: Fri, 31 May 2013 19:12:12 +0100
-Subject: [PATCH] add sysctl to disallow unprivileged CLONE_NEWUSER by default
-
-Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com>
-[bwh: Remove unneeded binary sysctl bits]
-Signed-off-by: Daniel Micay <danielmicay@gmail.com>
----
- kernel/fork.c           | 15 +++++++++++++++
- kernel/sysctl.c         | 12 ++++++++++++
- kernel/user_namespace.c |  3 +++
- 3 files changed, 30 insertions(+)
-
-diff --git a/kernel/fork.c b/kernel/fork.c
-index 07cc743698d3668e..4011d68a8ff9305c 100644
---- a/kernel/fork.c
-+++ b/kernel/fork.c
-@@ -102,6 +102,11 @@
- 
- #define CREATE_TRACE_POINTS
- #include <trace/events/task.h>
-+#ifdef CONFIG_USER_NS
-+extern int unprivileged_userns_clone;
-+#else
-+#define unprivileged_userns_clone 0
-+#endif
- 
- /*
-  * Minimum number of threads to boot the kernel
-@@ -1555,6 +1560,10 @@ static __latent_entropy struct task_struct *copy_process(
- 	if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
- 		return ERR_PTR(-EINVAL);
- 
-+	if ((clone_flags & CLONE_NEWUSER) && !unprivileged_userns_clone)
-+		if (!capable(CAP_SYS_ADMIN))
-+			return ERR_PTR(-EPERM);
-+
- 	/*
- 	 * Thread groups must share signals as well, and detached threads
- 	 * can only be started up within the thread group.
-@@ -2348,6 +2357,12 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
- 	if (unshare_flags & CLONE_NEWNS)
- 		unshare_flags |= CLONE_FS;
- 
-+	if ((unshare_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) {
-+		err = -EPERM;
-+		if (!capable(CAP_SYS_ADMIN))
-+			goto bad_unshare_out;
-+	}
-+
- 	err = check_unshare_flags(unshare_flags);
- 	if (err)
- 		goto bad_unshare_out;
-diff --git a/kernel/sysctl.c b/kernel/sysctl.c
-index b86520ed3fb60fbf..f7dab3760839f1a1 100644
---- a/kernel/sysctl.c
-+++ b/kernel/sysctl.c
-@@ -105,6 +105,9 @@ extern int core_uses_pid;
- 
- #if defined(CONFIG_SYSCTL)
- 
-+#ifdef CONFIG_USER_NS
-+extern int unprivileged_userns_clone;
-+#endif
- /* Constants used for minimum and  maximum */
- #ifdef CONFIG_LOCKUP_DETECTOR
- static int sixty = 60;
-@@ -513,6 +516,15 @@ static struct ctl_table kern_table[] = {
- 		.proc_handler	= proc_dointvec,
- 	},
- #endif
-+#ifdef CONFIG_USER_NS
-+	{
-+		.procname	= "unprivileged_userns_clone",
-+		.data		= &unprivileged_userns_clone,
-+		.maxlen		= sizeof(int),
-+		.mode		= 0644,
-+		.proc_handler	= proc_dointvec,
-+	},
-+#endif
- #ifdef CONFIG_PROC_SYSCTL
- 	{
- 		.procname	= "tainted",
-diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
-index c490f1e4313b998a..dd03bd39d7bf194d 100644
---- a/kernel/user_namespace.c
-+++ b/kernel/user_namespace.c
-@@ -24,6 +24,9 @@
- #include <linux/projid.h>
- #include <linux/fs_struct.h>
- 
-+/* sysctl */
-+int unprivileged_userns_clone;
-+
- static struct kmem_cache *user_ns_cachep __read_mostly;
- static DEFINE_MUTEX(userns_state_mutex);
- 
--- 
-2.15.1
-
-From b5202296055dd333db4425120d3f93ef4e6a0573 Mon Sep 17 00:00:00 2001
-From: "Jan Alexander Steffens (heftig)" <jan.steffens@gmail.com>
-Date: Thu, 7 Dec 2017 13:50:48 +0100
-Subject: ZEN: Add CONFIG for unprivileged_userns_clone
-
-This way our default behavior continues to match the vanilla kernel.
----
- init/Kconfig            | 16 ++++++++++++++++
- kernel/user_namespace.c |  4 ++++
- 2 files changed, 20 insertions(+)
-
-diff --git a/init/Kconfig b/init/Kconfig
-index 4592bf7997c0..f3df02990aff 100644
---- a/init/Kconfig
-+++ b/init/Kconfig
-@@ -1004,6 +1004,22 @@ config USER_NS
- 
- 	  If unsure, say N.
- 
-+config USER_NS_UNPRIVILEGED
-+	bool "Allow unprivileged users to create namespaces"
-+	default y
-+	depends on USER_NS
-+	help
-+	  When disabled, unprivileged users will not be able to create
-+	  new namespaces. Allowing users to create their own namespaces
-+	  has been part of several recent local privilege escalation
-+	  exploits, so if you need user namespaces but are
-+	  paranoid^Wsecurity-conscious you want to disable this.
-+
-+	  This setting can be overridden at runtime via the
-+	  kernel.unprivileged_userns_clone sysctl.
-+
-+	  If unsure, say Y.
-+
- config PID_NS
- 	bool "PID Namespaces"
- 	default y
-diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
-index 6b9dbc257e34..107b17f0d528 100644
---- a/kernel/user_namespace.c
-+++ b/kernel/user_namespace.c
-@@ -27,7 +27,11 @@
- #include <linux/sort.h>
- 
- /* sysctl */
-+#ifdef CONFIG_USER_NS_UNPRIVILEGED
-+int unprivileged_userns_clone = 1;
-+#else
- int unprivileged_userns_clone;
-+#endif
- 
- static struct kmem_cache *user_ns_cachep __read_mostly;
- static DEFINE_MUTEX(userns_state_mutex);
diff --git a/linux59-tkg/linux59-tkg-patches/0002-clear-patches.patch b/linux59-tkg/linux59-tkg-patches/0002-clear-patches.patch
deleted file mode 100644
index 22a32f5..0000000
--- a/linux59-tkg/linux59-tkg-patches/0002-clear-patches.patch
+++ /dev/null
@@ -1,360 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Arjan van de Ven <arjan@linux.intel.com>
-Date: Mon, 14 Mar 2016 11:10:58 -0600
-Subject: [PATCH] pci pme wakeups
-
-Reduce wakeups for PME checks, which are a workaround for miswired
-boards (sadly, too many of them) in laptops.
----
- drivers/pci/pci.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
-index c9338f9..6974fbf 100644
---- a/drivers/pci/pci.c
-+++ b/drivers/pci/pci.c
-@@ -62,7 +62,7 @@ struct pci_pme_device {
- 	struct pci_dev *dev;
- };
- 
--#define PME_TIMEOUT 1000 /* How long between PME checks */
-+#define PME_TIMEOUT 4000 /* How long between PME checks */
- 
- static void pci_dev_d3_sleep(struct pci_dev *dev)
- {
--- 
-https://clearlinux.org
-
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Arjan van de Ven <arjan@linux.intel.com>
-Date: Sat, 19 Mar 2016 21:32:19 -0400
-Subject: [PATCH] intel_idle: tweak cpuidle cstates
-
-Increase target_residency in cpuidle cstate
-
-Tune intel_idle to be a bit less agressive;
-Clear linux is cleaner in hygiene (wakupes) than the average linux,
-so we can afford changing these in a way that increases
-performance while keeping power efficiency
----
- drivers/idle/intel_idle.c | 44 +++++++++++++++++++--------------------
- 1 file changed, 22 insertions(+), 22 deletions(-)
-
-diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
-index f449584..c994d24 100644
---- a/drivers/idle/intel_idle.c
-+++ b/drivers/idle/intel_idle.c
-@@ -531,7 +531,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
- 		.desc = "MWAIT 0x01",
- 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
- 		.exit_latency = 10,
--		.target_residency = 20,
-+		.target_residency = 120,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -539,7 +539,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
- 		.desc = "MWAIT 0x10",
- 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 33,
--		.target_residency = 100,
-+		.target_residency = 900,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -547,7 +547,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
- 		.desc = "MWAIT 0x20",
- 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 133,
--		.target_residency = 400,
-+		.target_residency = 1000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -555,7 +555,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
- 		.desc = "MWAIT 0x32",
- 		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 166,
--		.target_residency = 500,
-+		.target_residency = 1500,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -563,7 +563,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
- 		.desc = "MWAIT 0x40",
- 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 300,
--		.target_residency = 900,
-+		.target_residency = 2000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -571,7 +571,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
- 		.desc = "MWAIT 0x50",
- 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 600,
--		.target_residency = 1800,
-+		.target_residency = 5000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -579,7 +579,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
- 		.desc = "MWAIT 0x60",
- 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 2600,
--		.target_residency = 7700,
-+		.target_residency = 9000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -599,7 +599,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
- 		.desc = "MWAIT 0x01",
- 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
- 		.exit_latency = 10,
--		.target_residency = 20,
-+		.target_residency = 120,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -607,7 +607,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
- 		.desc = "MWAIT 0x10",
- 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 40,
--		.target_residency = 100,
-+		.target_residency = 1000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -615,7 +615,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
- 		.desc = "MWAIT 0x20",
- 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 133,
--		.target_residency = 400,
-+		.target_residency = 1000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -623,7 +623,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
- 		.desc = "MWAIT 0x32",
- 		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 166,
--		.target_residency = 500,
-+		.target_residency = 2000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -631,7 +631,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
- 		.desc = "MWAIT 0x40",
- 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 300,
--		.target_residency = 900,
-+		.target_residency = 4000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -639,7 +639,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
- 		.desc = "MWAIT 0x50",
- 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 600,
--		.target_residency = 1800,
-+		.target_residency = 7000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -647,7 +647,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
- 		.desc = "MWAIT 0x60",
- 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 2600,
--		.target_residency = 7700,
-+		.target_residency = 9000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -668,7 +668,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
- 		.desc = "MWAIT 0x01",
- 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
- 		.exit_latency = 10,
--		.target_residency = 20,
-+		.target_residency = 120,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -676,7 +676,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
- 		.desc = "MWAIT 0x10",
- 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 70,
--		.target_residency = 100,
-+		.target_residency = 1000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -684,7 +684,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
- 		.desc = "MWAIT 0x20",
- 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 85,
--		.target_residency = 200,
-+		.target_residency = 600,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -692,7 +692,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
- 		.desc = "MWAIT 0x33",
- 		.flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 124,
--		.target_residency = 800,
-+		.target_residency = 3000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -700,7 +700,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
- 		.desc = "MWAIT 0x40",
- 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 200,
--		.target_residency = 800,
-+		.target_residency = 3200,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -708,7 +708,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
- 		.desc = "MWAIT 0x50",
- 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 480,
--		.target_residency = 5000,
-+		.target_residency = 9000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -716,7 +716,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
- 		.desc = "MWAIT 0x60",
- 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
- 		.exit_latency = 890,
--		.target_residency = 5000,
-+		.target_residency = 9000,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
-@@ -737,7 +737,7 @@ static struct cpuidle_state skx_cstates[] __initdata = {
- 		.desc = "MWAIT 0x01",
- 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
- 		.exit_latency = 10,
--		.target_residency = 20,
-+		.target_residency = 300,
- 		.enter = &intel_idle,
- 		.enter_s2idle = intel_idle_s2idle, },
- 	{
--- 
-https://clearlinux.org
-
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Arjan van de Ven <arjan@linux.intel.com>
-Date: Fri, 6 Jan 2017 15:34:09 +0000
-Subject: [PATCH] ipv4/tcp: allow the memory tuning for tcp to go a little
- bigger than default
-
----
- net/ipv4/tcp.c | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
-index 30c1142..4345075 100644
---- a/net/ipv4/tcp.c
-+++ b/net/ipv4/tcp.c
-@@ -4201,8 +4201,8 @@ void __init tcp_init(void)
- 	tcp_init_mem();
- 	/* Set per-socket limits to no more than 1/128 the pressure threshold */
- 	limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7);
--	max_wshare = min(4UL*1024*1024, limit);
--	max_rshare = min(6UL*1024*1024, limit);
-+	max_wshare = min(16UL*1024*1024, limit);
-+	max_rshare = min(16UL*1024*1024, limit);
- 
- 	init_net.ipv4.sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
- 	init_net.ipv4.sysctl_tcp_wmem[1] = 16*1024;
--- 
-https://clearlinux.org
-
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Arjan van de Ven <arjan@linux.intel.com>
-Date: Sun, 18 Feb 2018 23:35:41 +0000
-Subject: [PATCH] locking: rwsem: spin faster
-
-tweak rwsem owner spinning a bit
----
- kernel/locking/rwsem.c | 4 +++-
- 1 file changed, 3 insertions(+), 1 deletion(-)
-
-diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
-index f11b9bd..1bbfcc1 100644
---- a/kernel/locking/rwsem.c
-+++ b/kernel/locking/rwsem.c
-@@ -717,6 +717,7 @@ rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable)
- 	struct task_struct *new, *owner;
- 	unsigned long flags, new_flags;
- 	enum owner_state state;
-+	int i = 0;
- 
- 	owner = rwsem_owner_flags(sem, &flags);
- 	state = rwsem_owner_state(owner, flags, nonspinnable);
-@@ -750,7 +751,8 @@ rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable)
- 			break;
- 		}
- 
--		cpu_relax();
-+		if (i++ > 1000)
-+			cpu_relax();
- 	}
- 	rcu_read_unlock();
- 
--- 
-https://clearlinux.org
-
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Arjan van de Ven <arjan@linux.intel.com>
-Date: Thu, 2 Jun 2016 23:36:32 -0500
-Subject: [PATCH] initialize ata before graphics
-
-ATA init is the long pole in the boot process, and its asynchronous.
-move the graphics init after it so that ata and graphics initialize
-in parallel
----
- drivers/Makefile | 15 ++++++++-------
- 1 file changed, 8 insertions(+), 7 deletions(-)
-
-diff --git a/drivers/Makefile b/drivers/Makefile
-index c0cd1b9..af1e2fb 100644
---- a/drivers/Makefile
-+++ b/drivers/Makefile
-@@ -59,15 +59,8 @@ obj-y				+= char/
- # iommu/ comes before gpu as gpu are using iommu controllers
- obj-y				+= iommu/
- 
--# gpu/ comes after char for AGP vs DRM startup and after iommu
--obj-y				+= gpu/
--
- obj-$(CONFIG_CONNECTOR)		+= connector/
- 
--# i810fb and intelfb depend on char/agp/
--obj-$(CONFIG_FB_I810)           += video/fbdev/i810/
--obj-$(CONFIG_FB_INTEL)          += video/fbdev/intelfb/
--
- obj-$(CONFIG_PARPORT)		+= parport/
- obj-$(CONFIG_NVM)		+= lightnvm/
- obj-y				+= base/ block/ misc/ mfd/ nfc/
-@@ -80,6 +73,14 @@ obj-$(CONFIG_IDE)		+= ide/
- obj-y				+= scsi/
- obj-y				+= nvme/
- obj-$(CONFIG_ATA)		+= ata/
-+
-+# gpu/ comes after char for AGP vs DRM startup and after iommu
-+obj-y				+= gpu/
-+
-+# i810fb and intelfb depend on char/agp/
-+obj-$(CONFIG_FB_I810)           += video/fbdev/i810/
-+obj-$(CONFIG_FB_INTEL)          += video/fbdev/intelfb/
-+
- obj-$(CONFIG_TARGET_CORE)	+= target/
- obj-$(CONFIG_MTD)		+= mtd/
- obj-$(CONFIG_SPI)		+= spi/
--- 
-https://clearlinux.org
-
diff --git a/linux59-tkg/linux59-tkg-patches/0003-glitched-base.patch b/linux59-tkg/linux59-tkg-patches/0003-glitched-base.patch
deleted file mode 100644
index fb09b35..0000000
--- a/linux59-tkg/linux59-tkg-patches/0003-glitched-base.patch
+++ /dev/null
@@ -1,708 +0,0 @@
-From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
-From: Tk-Glitch <ti3nou@gmail.com>
-Date: Wed, 4 Jul 2018 04:30:08 +0200
-Subject: [PATCH 01/17] glitched
-
----
- scripts/mkcompile_h | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/scripts/mkcompile_h b/scripts/mkcompile_h
-index baf3ab8d9d49..854e32e6aec7 100755
---- a/scripts/mkcompile_h
-+++ b/scripts/mkcompile_h
-@@ -41,8 +41,8 @@ else
- fi
- 
- UTS_VERSION="#$VERSION"
--CONFIG_FLAGS=""
--if [ -n "$SMP" ] ; then CONFIG_FLAGS="SMP"; fi
-+CONFIG_FLAGS="TKG"
-+if [ -n "$SMP" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS SMP"; fi
- if [ -n "$PREEMPT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT"; fi
- if [ -n "$PREEMPT_RT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT_RT"; fi
- 
--- 
-2.28.0
-
-
-From c304f43d14e98d4bf1215fc10bc5012f554bdd8a Mon Sep 17 00:00:00 2001
-From: Alexandre Frade <admfrade@gmail.com>
-Date: Mon, 29 Jan 2018 16:59:22 +0000
-Subject: [PATCH 02/17] dcache: cache_pressure = 50 decreases the rate at which
- VFS caches are reclaimed
-
-Signed-off-by: Alexandre Frade <admfrade@gmail.com>
----
- fs/dcache.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/fs/dcache.c b/fs/dcache.c
-index 361ea7ab30ea..0c5cf69b241a 100644
---- a/fs/dcache.c
-+++ b/fs/dcache.c
-@@ -71,7 +71,7 @@
-  * If no ancestor relationship:
-  * arbitrary, since it's serialized on rename_lock
-  */
--int sysctl_vfs_cache_pressure __read_mostly = 100;
-+int sysctl_vfs_cache_pressure __read_mostly = 50;
- EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
- 
- __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
--- 
-2.28.0
-
-
-From 28f32f59d9d55ac7ec3a20b79bdd02d2a0a5f7e1 Mon Sep 17 00:00:00 2001
-From: Alexandre Frade <admfrade@gmail.com>
-Date: Mon, 29 Jan 2018 18:29:13 +0000
-Subject: [PATCH 03/17] sched/core: nr_migrate = 128 increases number of tasks
- to iterate in a single balance run.
-
-Signed-off-by: Alexandre Frade <admfrade@gmail.com>
----
- kernel/sched/core.c | 6 +++---
- 1 file changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/kernel/sched/core.c b/kernel/sched/core.c
-index f788cd61df21..2bfbb4213707 100644
---- a/kernel/sched/core.c
-+++ b/kernel/sched/core.c
-@@ -59,7 +59,7 @@ const_debug unsigned int sysctl_sched_features =
-  * Number of tasks to iterate in a single balance run.
-  * Limited because this is done with IRQs disabled.
-  */
--const_debug unsigned int sysctl_sched_nr_migrate = 32;
-+const_debug unsigned int sysctl_sched_nr_migrate = 128;
- 
- /*
-  * period over which we measure -rt task CPU usage in us.
-@@ -71,9 +71,9 @@ __read_mostly int scheduler_running;
- 
- /*
-  * part of the period that we allow rt tasks to run in us.
-- * default: 0.95s
-+ * XanMod default: 0.98s
-  */
--int sysctl_sched_rt_runtime = 950000;
-+int sysctl_sched_rt_runtime = 980000;
- 
- /*
-  * __task_rq_lock - lock the rq @p resides on.
--- 
-2.28.0
-
-
-From acc49f33a10f61dc66c423888cbb883ba46710e4 Mon Sep 17 00:00:00 2001
-From: Alexandre Frade <admfrade@gmail.com>
-Date: Mon, 29 Jan 2018 17:41:29 +0000
-Subject: [PATCH 04/17] scripts: disable the localversion "+" tag of a git repo
-
-Signed-off-by: Alexandre Frade <admfrade@gmail.com>
----
- scripts/setlocalversion | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/scripts/setlocalversion b/scripts/setlocalversion
-index 20f2efd57b11..0552d8b9f582 100755
---- a/scripts/setlocalversion
-+++ b/scripts/setlocalversion
-@@ -54,7 +54,7 @@ scm_version()
- 			# If only the short version is requested, don't bother
- 			# running further git commands
- 			if $short; then
--				echo "+"
-+			#	echo "+"
- 				return
- 			fi
- 			# If we are past a tagged commit (like
--- 
-2.28.0
-
-
-From 61fcb33fb0de8bc0f060e0a1ada38ed149217f4d Mon Sep 17 00:00:00 2001
-From: Oleksandr Natalenko <oleksandr@redhat.com>
-Date: Wed, 11 Dec 2019 11:46:19 +0100
-Subject: [PATCH 05/17] init/Kconfig: enable -O3 for all arches
-
-Building a kernel with -O3 may help in hunting bugs like [1] and thus
-using this switch should not be restricted to one specific arch only.
-
-With that, lets expose it for everyone.
-
-[1] https://lore.kernel.org/lkml/673b885183fb64f1cbb3ed2387524077@natalenko.name/
-
-Signed-off-by: Oleksandr Natalenko <oleksandr@redhat.com>
----
- init/Kconfig | 1 -
- 1 file changed, 1 deletion(-)
-
-diff --git a/init/Kconfig b/init/Kconfig
-index 0498af567f70..3ae8678e1145 100644
---- a/init/Kconfig
-+++ b/init/Kconfig
-@@ -1278,7 +1278,6 @@ config CC_OPTIMIZE_FOR_PERFORMANCE
- 
- config CC_OPTIMIZE_FOR_PERFORMANCE_O3
- 	bool "Optimize more for performance (-O3)"
--	depends on ARC
- 	help
- 	  Choosing this option will pass "-O3" to your compiler to optimize
- 	  the kernel yet more for performance.
--- 
-2.28.0
-
-
-From 360c6833e07cc9fdef5746f6bc45bdbc7212288d Mon Sep 17 00:00:00 2001
-From: "Jan Alexander Steffens (heftig)" <jan.steffens@gmail.com>
-Date: Fri, 26 Oct 2018 11:22:33 +0100
-Subject: [PATCH 06/17] infiniband: Fix __read_overflow2 error with -O3
- inlining
-
----
- drivers/infiniband/core/addr.c | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
-index 3a98439bba83..6efc4f907f58 100644
---- a/drivers/infiniband/core/addr.c
-+++ b/drivers/infiniband/core/addr.c
-@@ -820,6 +820,7 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
- 	union {
- 		struct sockaddr_in  _sockaddr_in;
- 		struct sockaddr_in6 _sockaddr_in6;
-+		struct sockaddr_ib  _sockaddr_ib;
- 	} sgid_addr, dgid_addr;
- 	int ret;
- 
--- 
-2.28.0
-
-
-From f85ed068b4d0e6c31edce8574a95757a60e58b87 Mon Sep 17 00:00:00 2001
-From: Etienne Juvigny <Ti3noU@gmail.com>
-Date: Mon, 3 Sep 2018 17:36:25 +0200
-Subject: [PATCH 07/17] Zenify & stuff
-
----
- init/Kconfig           | 32 ++++++++++++++++++++++++++++++++
- kernel/sched/fair.c    | 25 +++++++++++++++++++++++++
- mm/page-writeback.c    |  8 ++++++++
- 3 files changed, 65 insertions(+)
-
-diff --git a/init/Kconfig b/init/Kconfig
-index 3ae8678e1145..da708eed0f1e 100644
---- a/init/Kconfig
-+++ b/init/Kconfig
-@@ -92,6 +92,38 @@ config THREAD_INFO_IN_TASK
- 
- menu "General setup"
- 
-+config ZENIFY
-+	bool "A selection of patches from Zen/Liquorix kernel and additional tweaks for a better gaming experience"
-+	default y
-+	help
-+	  Tunes the kernel for responsiveness at the cost of throughput and power usage.
-+
-+	  --- Virtual Memory Subsystem ---------------------------
-+
-+	    Mem dirty before bg writeback..:  10 %  ->  20 %
-+	    Mem dirty before sync writeback:  20 %  ->  50 %
-+
-+	  --- Block Layer ----------------------------------------
-+
-+	    Queue depth...............:      128    -> 512
-+	    Default MQ scheduler......: mq-deadline -> bfq
-+
-+	  --- CFS CPU Scheduler ----------------------------------
-+
-+	    Scheduling latency.............:   6    ->   3    ms
-+	    Minimal granularity............:   0.75 ->   0.3  ms
-+	    Wakeup granularity.............:   1    ->   0.5  ms
-+	    CPU migration cost.............:   0.5  ->   0.25 ms
-+	    Bandwidth slice size...........:   5    ->   3    ms
-+	    Ondemand fine upscaling limit..:  95 %  ->  85 %
-+
-+	  --- MuQSS CPU Scheduler --------------------------------
-+
-+	    Scheduling interval............:   6    ->   3    ms
-+	    ISO task max realtime use......:  70 %  ->  25 %
-+	    Ondemand coarse upscaling limit:  80 %  ->  45 %
-+	    Ondemand fine upscaling limit..:  95 %  ->  45 %
-+
- config BROKEN
- 	bool
- 
-diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
-index 6b3b59cc51d6..2a0072192c3d 100644
---- a/kernel/sched/fair.c
-+++ b/kernel/sched/fair.c
-@@ -37,8 +37,13 @@
-  *
-  * (default: 6ms * (1 + ilog(ncpus)), units: nanoseconds)
-  */
-+#ifdef CONFIG_ZENIFY
-+unsigned int sysctl_sched_latency			= 3000000ULL;
-+static unsigned int normalized_sysctl_sched_latency	= 3000000ULL;
-+#else
- unsigned int sysctl_sched_latency			= 6000000ULL;
- static unsigned int normalized_sysctl_sched_latency	= 6000000ULL;
-+#endif
- 
- /*
-  * The initial- and re-scaling of tunables is configurable
-@@ -58,13 +63,22 @@ enum sched_tunable_scaling sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_L
-  *
-  * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
-  */
-+#ifdef CONFIG_ZENIFY
-+unsigned int sysctl_sched_min_granularity			= 300000ULL;
-+static unsigned int normalized_sysctl_sched_min_granularity	= 300000ULL;
-+#else
- unsigned int sysctl_sched_min_granularity			= 750000ULL;
- static unsigned int normalized_sysctl_sched_min_granularity	= 750000ULL;
-+#endif
- 
- /*
-  * This value is kept at sysctl_sched_latency/sysctl_sched_min_granularity
-  */
-+#ifdef CONFIG_ZENIFY
-+static unsigned int sched_nr_latency = 10;
-+#else
- static unsigned int sched_nr_latency = 8;
-+#endif
- 
- /*
-  * After fork, child runs first. If set to 0 (default) then
-@@ -81,10 +95,17 @@ unsigned int sysctl_sched_child_runs_first __read_mostly;
-  *
-  * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds)
-  */
-+#ifdef CONFIG_ZENIFY
-+unsigned int sysctl_sched_wakeup_granularity			= 500000UL;
-+static unsigned int normalized_sysctl_sched_wakeup_granularity	= 500000UL;
-+
-+const_debug unsigned int sysctl_sched_migration_cost	= 50000UL;
-+#else
- unsigned int sysctl_sched_wakeup_granularity			= 1000000UL;
- static unsigned int normalized_sysctl_sched_wakeup_granularity	= 1000000UL;
- 
- const_debug unsigned int sysctl_sched_migration_cost	= 500000UL;
-+#endif
- 
- int sched_thermal_decay_shift;
- static int __init setup_sched_thermal_decay_shift(char *str)
-@@ -128,8 +149,12 @@ int __weak arch_asym_cpu_priority(int cpu)
-  *
-  * (default: 5 msec, units: microseconds)
-  */
-+#ifdef CONFIG_ZENIFY
-+unsigned int sysctl_sched_cfs_bandwidth_slice		= 3000UL;
-+#else
- unsigned int sysctl_sched_cfs_bandwidth_slice		= 5000UL;
- #endif
-+#endif
- 
- static inline void update_load_add(struct load_weight *lw, unsigned long inc)
- {
-diff --git a/mm/page-writeback.c b/mm/page-writeback.c
-index 28b3e7a67565..01a1aef2b9b1 100644
---- a/mm/page-writeback.c
-+++ b/mm/page-writeback.c
-@@ -71,7 +71,11 @@ static long ratelimit_pages = 32;
- /*
-  * Start background writeback (via writeback threads) at this percentage
-  */
-+#ifdef CONFIG_ZENIFY
-+int dirty_background_ratio = 20;
-+#else
- int dirty_background_ratio = 10;
-+#endif
- 
- /*
-  * dirty_background_bytes starts at 0 (disabled) so that it is a function of
-@@ -88,7 +92,11 @@ int vm_highmem_is_dirtyable;
- /*
-  * The generator of dirty data starts writeback at this percentage
-  */
-+#ifdef CONFIG_ZENIFY
-+int vm_dirty_ratio = 50;
-+#else
- int vm_dirty_ratio = 20;
-+#endif
- 
- /*
-  * vm_dirty_bytes starts at 0 (disabled) so that it is a function of
--- 
-2.28.0
-
-
-From e92e67143385cf285851e12aa8b7f083dd38dd24 Mon Sep 17 00:00:00 2001
-From: Steven Barrett <damentz@liquorix.net>
-Date: Sun, 16 Jan 2011 18:57:32 -0600
-Subject: [PATCH 08/17] ZEN: Allow TCP YeAH as default congestion control
-
-4.4: In my tests YeAH dramatically slowed down transfers over a WLAN,
-     reducing throughput from ~65Mbps (CUBIC) to ~7MBps (YeAH) over 10
-     seconds (netperf TCP_STREAM) including long stalls.
-
-     Be careful when choosing this. ~heftig
----
- net/ipv4/Kconfig | 4 ++++
- 1 file changed, 4 insertions(+)
-
-diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
-index e64e59b536d3..bfb55ef7ebbe 100644
---- a/net/ipv4/Kconfig
-+++ b/net/ipv4/Kconfig
-@@ -691,6 +691,9 @@ choice
- 	config DEFAULT_VEGAS
- 		bool "Vegas" if TCP_CONG_VEGAS=y
- 
-+	config DEFAULT_YEAH
-+		bool "YeAH" if TCP_CONG_YEAH=y
-+
- 	config DEFAULT_VENO
- 		bool "Veno" if TCP_CONG_VENO=y
- 
-@@ -724,6 +727,7 @@ config DEFAULT_TCP_CONG
- 	default "htcp" if DEFAULT_HTCP
- 	default "hybla" if DEFAULT_HYBLA
- 	default "vegas" if DEFAULT_VEGAS
-+	default "yeah" if DEFAULT_YEAH
- 	default "westwood" if DEFAULT_WESTWOOD
- 	default "veno" if DEFAULT_VENO
- 	default "reno" if DEFAULT_RENO
--- 
-2.28.0
-
-
-From 76dbe7477bfde1b5e8bf29a71b5af7ab2be9b98e Mon Sep 17 00:00:00 2001
-From: Steven Barrett <steven@liquorix.net>
-Date: Wed, 28 Nov 2018 19:01:27 -0600
-Subject: [PATCH 09/17] zen: Use [defer+madvise] as default khugepaged defrag
- strategy
-
-For some reason, the default strategy to respond to THP fault fallbacks
-is still just madvise, meaning stall if the program wants transparent
-hugepages, but don't trigger a background reclaim / compaction if THP
-begins to fail allocations.  This creates a snowball affect where we
-still use the THP code paths, but we almost always fail once a system
-has been active and busy for a while.
-
-The option "defer" was created for interactive systems where THP can
-still improve performance.  If we have to fallback to a regular page due
-to an allocation failure or anything else, we will trigger a background
-reclaim and compaction so future THP attempts succeed and previous
-attempts eventually have their smaller pages combined without stalling
-running applications.
-
-We still want madvise to stall applications that explicitely want THP,
-so defer+madvise _does_ make a ton of sense.  Make it the default for
-interactive systems, especially if the kernel maintainer left
-transparent hugepages on "always".
-
-Reasoning and details in the original patch: https://lwn.net/Articles/711248/
----
- mm/huge_memory.c | 4 ++++
- 1 file changed, 4 insertions(+)
-
-diff --git a/mm/huge_memory.c b/mm/huge_memory.c
-index 74300e337c3c..9277f22c10a7 100644
---- a/mm/huge_memory.c
-+++ b/mm/huge_memory.c
-@@ -53,7 +53,11 @@ unsigned long transparent_hugepage_flags __read_mostly =
- #ifdef CONFIG_TRANSPARENT_HUGEPAGE_MADVISE
- 	(1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)|
- #endif
-+#ifdef CONFIG_ZENIFY
-+	(1<<TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG)|
-+#else
- 	(1<<TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG)|
-+#endif
- 	(1<<TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG)|
- 	(1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);
- 
--- 
-2.28.0
-
-
-From 2b65a1329cb220b43c19c4d0de5833fae9e2b22d Mon Sep 17 00:00:00 2001
-From: Alexandre Frade <admfrade@gmail.com>
-Date: Wed, 24 Oct 2018 16:58:52 -0300
-Subject: [PATCH 10/17] net/sched: allow configuring cake qdisc as default
-
-Signed-off-by: Alexandre Frade <admfrade@gmail.com>
----
- net/sched/Kconfig | 4 ++++
- 1 file changed, 4 insertions(+)
-
-diff --git a/net/sched/Kconfig b/net/sched/Kconfig
-index 84badf00647e..6a922bca9f39 100644
---- a/net/sched/Kconfig
-+++ b/net/sched/Kconfig
-@@ -471,6 +471,9 @@ choice
- 	config DEFAULT_SFQ
- 		bool "Stochastic Fair Queue" if NET_SCH_SFQ
- 
-+	config DEFAULT_CAKE
-+		bool "Common Applications Kept Enhanced" if NET_SCH_CAKE
-+
- 	config DEFAULT_PFIFO_FAST
- 		bool "Priority FIFO Fast"
- endchoice
-@@ -481,6 +484,7 @@ config DEFAULT_NET_SCH
- 	default "fq" if DEFAULT_FQ
- 	default "fq_codel" if DEFAULT_FQ_CODEL
- 	default "sfq" if DEFAULT_SFQ
-+	default "cake" if DEFAULT_CAKE
- 	default "pfifo_fast"
- endif
- 
--- 
-2.28.0
-
-
-From 816ee502759e954304693813bd03d94986b28dba Mon Sep 17 00:00:00 2001
-From: Tk-Glitch <ti3nou@gmail.com>
-Date: Mon, 18 Feb 2019 17:40:57 +0100
-Subject: [PATCH 11/17] mm: Set watermark_scale_factor to 200 (from 10)
-
-Multiple users have reported it's helping reducing/eliminating stuttering
-with DXVK.
----
- mm/page_alloc.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/mm/page_alloc.c b/mm/page_alloc.c
-index 898ff44f2c7b..e72074034793 100644
---- a/mm/page_alloc.c
-+++ b/mm/page_alloc.c
-@@ -330,7 +330,7 @@ int watermark_boost_factor __read_mostly;
- #else
- int watermark_boost_factor __read_mostly = 15000;
- #endif
--int watermark_scale_factor = 10;
-+int watermark_scale_factor = 200;
- 
- static unsigned long nr_kernel_pages __initdata;
- static unsigned long nr_all_pages __initdata;
--- 
-2.28.0
-
-
-From 90240bcd90a568878738e66c0d45bed3e38e347b Mon Sep 17 00:00:00 2001
-From: Tk-Glitch <ti3nou@gmail.com>
-Date: Fri, 19 Apr 2019 12:33:38 +0200
-Subject: [PATCH 12/17] Set vm.max_map_count to 262144 by default
-
-The value is still pretty low, and AMD64-ABI and ELF extended numbering
-supports that, so we should be fine on modern x86 systems.
-
-This fixes crashes in some applications using more than 65535 vmas (also
-affects some windows games running in wine, such as Star Citizen).
----
- include/linux/mm.h | 3 +--
- 1 file changed, 1 insertion(+), 2 deletions(-)
-
-diff --git a/include/linux/mm.h b/include/linux/mm.h
-index bc05c3588aa3..b0cefe94920d 100644
---- a/include/linux/mm.h
-+++ b/include/linux/mm.h
-@@ -190,8 +190,7 @@ static inline void __mm_zero_struct_page(struct page *page)
-  * not a hard limit any more. Although some userspace tools can be surprised by
-  * that.
-  */
--#define MAPCOUNT_ELF_CORE_MARGIN	(5)
--#define DEFAULT_MAX_MAP_COUNT	(USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
-+#define DEFAULT_MAX_MAP_COUNT	(262144)
- 
- extern int sysctl_max_map_count;
- 
--- 
-2.28.0
-
-
-From 3a34034dba5efe91bcec491efe8c66e8087f509b Mon Sep 17 00:00:00 2001
-From: Tk-Glitch <ti3nou@gmail.com>
-Date: Mon, 27 Jul 2020 00:19:18 +0200
-Subject: [PATCH 13/17] mm: bump DEFAULT_MAX_MAP_COUNT
-
-Some games such as Detroit: Become Human tend to be very crash prone with
-lower values.
----
- include/linux/mm.h | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/include/linux/mm.h b/include/linux/mm.h
-index b0cefe94920d..890165099b07 100644
---- a/include/linux/mm.h
-+++ b/include/linux/mm.h
-@@ -190,7 +190,7 @@ static inline void __mm_zero_struct_page(struct page *page)
-  * not a hard limit any more. Although some userspace tools can be surprised by
-  * that.
-  */
--#define DEFAULT_MAX_MAP_COUNT	(262144)
-+#define DEFAULT_MAX_MAP_COUNT	(524288)
- 
- extern int sysctl_max_map_count;
- 
--- 
-2.28.0
-
-
-From 977812938da7c7226415778c340832141d9278b7 Mon Sep 17 00:00:00 2001
-From: Alexandre Frade <admfrade@gmail.com>
-Date: Mon, 25 Nov 2019 15:13:06 -0300
-Subject: [PATCH 14/17] elevator: set default scheduler to bfq for blk-mq
-
-Signed-off-by: Alexandre Frade <admfrade@gmail.com>
----
- block/elevator.c | 6 +++---
- 1 file changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/block/elevator.c b/block/elevator.c
-index 4eab3d70e880..79669aa39d79 100644
---- a/block/elevator.c
-+++ b/block/elevator.c
-@@ -623,15 +623,15 @@ static inline bool elv_support_iosched(struct request_queue *q)
- }
- 
- /*
-- * For single queue devices, default to using mq-deadline. If we have multiple
-- * queues or mq-deadline is not available, default to "none".
-+ * For single queue devices, default to using bfq. If we have multiple
-+ * queues or bfq is not available, default to "none".
-  */
- static struct elevator_type *elevator_get_default(struct request_queue *q)
- {
- 	if (q->nr_hw_queues != 1)
- 		return NULL;
- 
--	return elevator_get(q, "mq-deadline", false);
-+	return elevator_get(q, "bfq", false);
- }
- 
- /*
--- 
-2.28.0
-
-
-From e2111bc5989131c675659d40e0cc4f214df2f990 Mon Sep 17 00:00:00 2001
-From: Alexandre Frade <admfrade@gmail.com>
-Date: Fri, 10 May 2019 16:45:59 -0300
-Subject: [PATCH 15/17] block: set rq_affinity = 2 for full multithreading I/O
- requests
-
-Signed-off-by: Alexandre Frade <admfrade@gmail.com>
----
- include/linux/blkdev.h | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
-index 28efe374a2e1..d4e5d35d2ece 100644
---- a/include/linux/blkdev.h
-+++ b/include/linux/blkdev.h
-@@ -624,7 +624,8 @@ struct request_queue {
- #define QUEUE_FLAG_RQ_ALLOC_TIME 27	/* record rq->alloc_time_ns */
- 
- #define QUEUE_FLAG_MQ_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
--				 (1 << QUEUE_FLAG_SAME_COMP))
-+				 (1 << QUEUE_FLAG_SAME_COMP)	|	\
-+				 (1 << QUEUE_FLAG_SAME_FORCE))
- 
- void blk_queue_flag_set(unsigned int flag, struct request_queue *q);
- void blk_queue_flag_clear(unsigned int flag, struct request_queue *q);
--- 
-2.28.0
-
-
-From 3c229f434aca65c4ca61772bc03c3e0370817b92 Mon Sep 17 00:00:00 2001
-From: Alexandre Frade <kernel@xanmod.org>
-Date: Mon, 3 Aug 2020 17:05:04 +0000
-Subject: [PATCH 16/17] mm: set 2 megabytes for address_space-level file
- read-ahead pages size
-
-Signed-off-by: Alexandre Frade <kernel@xanmod.org>
----
- include/linux/pagemap.h | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
-index cf2468da68e9..007dea784451 100644
---- a/include/linux/pagemap.h
-+++ b/include/linux/pagemap.h
-@@ -655,7 +655,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask);
- void delete_from_page_cache_batch(struct address_space *mapping,
- 				  struct pagevec *pvec);
- 
--#define VM_READAHEAD_PAGES	(SZ_128K / PAGE_SIZE)
-+#define VM_READAHEAD_PAGES	(SZ_2M / PAGE_SIZE)
- 
- void page_cache_sync_readahead(struct address_space *, struct file_ra_state *,
- 		struct file *, pgoff_t index, unsigned long req_count);
--- 
-2.28.0
-
-
-From 716f41cf6631f3a85834dcb67b4ce99185b6387f Mon Sep 17 00:00:00 2001
-From: Steven Barrett <steven@liquorix.net>
-Date: Wed, 15 Jan 2020 20:43:56 -0600
-Subject: [PATCH 17/17] ZEN: intel-pstate: Implement "enable" parameter
-
-If intel-pstate is compiled into the kernel, it will preempt the loading
-of acpi-cpufreq so you can take advantage of hardware p-states without
-any friction.
-
-However, intel-pstate is not completely superior to cpufreq's ondemand
-for one reason.  There's no concept of an up_threshold property.
-
-In ondemand, up_threshold essentially reduces the maximum utilization to
-compare against, allowing you to hit max frequencies and turbo boost
-from a much lower core utilization.
-
-With intel-pstate, you have the concept of minimum and maximum
-performance, but no tunable that lets you define, maximum frequency
-means 50% core utilization.  For just this oversight, there's reasons
-you may want ondemand.
-
-Lets support setting "enable" in kernel boot parameters.  This lets
-kernel maintainers include "intel_pstate=disable" statically in the
-static boot parameters, but let users of the kernel override this
-selection.
----
- Documentation/admin-guide/kernel-parameters.txt | 3 +++
- drivers/cpufreq/intel_pstate.c                  | 2 ++
- 2 files changed, 5 insertions(+)
-
-diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
-index fb95fad81c79..3e92fee81e33 100644
---- a/Documentation/admin-guide/kernel-parameters.txt
-+++ b/Documentation/admin-guide/kernel-parameters.txt
-@@ -1857,6 +1857,9 @@
- 			disable
- 			  Do not enable intel_pstate as the default
- 			  scaling driver for the supported processors
-+			enable
-+			  Enable intel_pstate in-case "disable" was passed
-+			  previously in the kernel boot parameters
- 			passive
- 			  Use intel_pstate as a scaling driver, but configure it
- 			  to work with generic cpufreq governors (instead of
-diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
-index 36a469150ff9..aee891c9b78a 100644
---- a/drivers/cpufreq/intel_pstate.c
-+++ b/drivers/cpufreq/intel_pstate.c
-@@ -2845,6 +2845,8 @@ static int __init intel_pstate_setup(char *str)
- 		pr_info("HWP disabled\n");
- 		no_hwp = 1;
- 	}
-+	if (!strcmp(str, "enable"))
-+		no_load = 0;
- 	if (!strcmp(str, "force"))
- 		force_load = 1;
- 	if (!strcmp(str, "hwp_only"))
--- 
-2.28.0
-
diff --git a/linux59-tkg/linux59-tkg-patches/0003-glitched-cfs.patch b/linux59-tkg/linux59-tkg-patches/0003-glitched-cfs.patch
deleted file mode 100644
index 06b7f02..0000000
--- a/linux59-tkg/linux59-tkg-patches/0003-glitched-cfs.patch
+++ /dev/null
@@ -1,72 +0,0 @@
-diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
-index 2a202a846757..1d9c7ed79b11 100644
---- a/kernel/Kconfig.hz
-+++ b/kernel/Kconfig.hz
-@@ -4,7 +4,7 @@
- 
- choice
- 	prompt "Timer frequency"
--	default HZ_250
-+	default HZ_500
- 	help
- 	 Allows the configuration of the timer frequency. It is customary
- 	 to have the timer interrupt run at 1000 Hz but 100 Hz may be more
-@@ -39,6 +39,13 @@ choice
- 	 on SMP and NUMA systems and exactly dividing by both PAL and
- 	 NTSC frame rates for video and multimedia work.
- 
-+	config HZ_500
-+		bool "500 HZ"
-+	help
-+	 500 Hz is a balanced timer frequency. Provides fast interactivity
-+	 on desktops with great smoothness without increasing CPU power
-+	 consumption and sacrificing the battery life on laptops.
-+
- 	config HZ_1000
- 		bool "1000 HZ"
- 	help
-@@ -52,6 +59,7 @@ config HZ
- 	default 100 if HZ_100
- 	default 250 if HZ_250
- 	default 300 if HZ_300
-+	default 500 if HZ_500
- 	default 1000 if HZ_1000
- 
- config SCHED_HRTICK
-
-diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
-index 2a202a846757..1d9c7ed79b11 100644
---- a/kernel/Kconfig.hz
-+++ b/kernel/Kconfig.hz
-@@ -4,7 +4,7 @@
- 
- choice
- 	prompt "Timer frequency"
--	default HZ_500
-+	default HZ_750
- 	help
- 	 Allows the configuration of the timer frequency. It is customary
- 	 to have the timer interrupt run at 1000 Hz but 100 Hz may be more
-@@ -46,6 +46,13 @@ choice
- 	 on desktops with great smoothness without increasing CPU power
- 	 consumption and sacrificing the battery life on laptops.
- 
-+	config HZ_750
-+		bool "750 HZ"
-+	help
-+	 750 Hz is a good timer frequency for desktops. Provides fast
-+	 interactivity with great smoothness without sacrificing too
-+	 much throughput.
-+
- 	config HZ_1000
- 		bool "1000 HZ"
- 	help
-@@ -60,6 +67,7 @@ config HZ
- 	default 250 if HZ_250
- 	default 300 if HZ_300
- 	default 500 if HZ_500
-+	default 750 if HZ_750
- 	default 1000 if HZ_1000
- 
- config SCHED_HRTICK
-
diff --git a/linux59-tkg/linux59-tkg-patches/0004-5.9-ck1.patch b/linux59-tkg/linux59-tkg-patches/0004-5.9-ck1.patch
deleted file mode 100644
index 33e9da3..0000000
--- a/linux59-tkg/linux59-tkg-patches/0004-5.9-ck1.patch
+++ /dev/null
@@ -1,13384 +0,0 @@
-diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
-index a1068742a6df..d2a8f1c637d2 100644
---- a/Documentation/admin-guide/kernel-parameters.txt
-+++ b/Documentation/admin-guide/kernel-parameters.txt
-@@ -4595,6 +4595,14 @@
- 			Memory area to be used by remote processor image,
- 			managed by CMA.
- 
-+	rqshare=	[X86] Select the MuQSS scheduler runqueue sharing type.
-+			Format: <string>
-+			smt -- Share SMT (hyperthread) sibling runqueues
-+			mc -- Share MC (multicore) sibling runqueues
-+			smp -- Share SMP runqueues
-+			none -- So not share any runqueues
-+			Default value is mc
-+
- 	rw		[KNL] Mount root device read-write on boot
- 
- 	S		[KNL] Run init in single mode
-diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
-index d4b32cc32bb7..9e1e71fc66d0 100644
---- a/Documentation/admin-guide/sysctl/kernel.rst
-+++ b/Documentation/admin-guide/sysctl/kernel.rst
-@@ -436,6 +436,16 @@ this allows system administrators to override the
- ``IA64_THREAD_UAC_NOPRINT`` ``prctl`` and avoid logs being flooded.
- 
- 
-+iso_cpu: (MuQSS CPU scheduler only)
-+===================================
-+
-+This sets the percentage cpu that the unprivileged SCHED_ISO tasks can
-+run effectively at realtime priority, averaged over a rolling five
-+seconds over the -whole- system, meaning all cpus.
-+
-+Set to 70 (percent) by default.
-+
-+
- kexec_load_disabled
- ===================
- 
-@@ -1077,6 +1087,20 @@ ROM/Flash boot loader. Maybe to tell it what to do after
- rebooting. ???
- 
- 
-+rr_interval: (MuQSS CPU scheduler only)
-+=======================================
-+
-+This is the smallest duration that any cpu process scheduling unit
-+will run for. Increasing this value can increase throughput of cpu
-+bound tasks substantially but at the expense of increased latencies
-+overall. Conversely decreasing it will decrease average and maximum
-+latencies but at the expense of throughput. This value is in
-+milliseconds and the default value chosen depends on the number of
-+cpus available at scheduler initialisation with a minimum of 6.
-+
-+Valid values are from 1-1000.
-+
-+
- sched_energy_aware
- ==================
- 
-@@ -1515,3 +1539,13 @@ is 10 seconds.
- 
- The softlockup threshold is (``2 * watchdog_thresh``). Setting this
- tunable to zero will disable lockup detection altogether.
-+
-+
-+yield_type: (MuQSS CPU scheduler only)
-+======================================
-+
-+This determines what type of yield calls to sched_yield will perform.
-+
-+ 0: No yield.
-+ 1: Yield only to better priority/deadline tasks. (default)
-+ 2: Expire timeslice and recalculate deadline.
-diff --git a/Documentation/scheduler/sched-BFS.txt b/Documentation/scheduler/sched-BFS.txt
-new file mode 100644
-index 000000000000..c0282002a079
---- /dev/null
-+++ b/Documentation/scheduler/sched-BFS.txt
-@@ -0,0 +1,351 @@
-+BFS - The Brain Fuck Scheduler by Con Kolivas.
-+
-+Goals.
-+
-+The goal of the Brain Fuck Scheduler, referred to as BFS from here on, is to
-+completely do away with the complex designs of the past for the cpu process
-+scheduler and instead implement one that is very simple in basic design.
-+The main focus of BFS is to achieve excellent desktop interactivity and
-+responsiveness without heuristics and tuning knobs that are difficult to
-+understand, impossible to model and predict the effect of, and when tuned to
-+one workload cause massive detriment to another.
-+
-+
-+Design summary.
-+
-+BFS is best described as a single runqueue, O(n) lookup, earliest effective
-+virtual deadline first design, loosely based on EEVDF (earliest eligible virtual
-+deadline first) and my previous Staircase Deadline scheduler. Each component
-+shall be described in order to understand the significance of, and reasoning for
-+it. The codebase when the first stable version was released was approximately
-+9000 lines less code than the existing mainline linux kernel scheduler (in
-+2.6.31). This does not even take into account the removal of documentation and
-+the cgroups code that is not used.
-+
-+Design reasoning.
-+
-+The single runqueue refers to the queued but not running processes for the
-+entire system, regardless of the number of CPUs. The reason for going back to
-+a single runqueue design is that once multiple runqueues are introduced,
-+per-CPU or otherwise, there will be complex interactions as each runqueue will
-+be responsible for the scheduling latency and fairness of the tasks only on its
-+own runqueue, and to achieve fairness and low latency across multiple CPUs, any
-+advantage in throughput of having CPU local tasks causes other disadvantages.
-+This is due to requiring a very complex balancing system to at best achieve some
-+semblance of fairness across CPUs and can only maintain relatively low latency
-+for tasks bound to the same CPUs, not across them. To increase said fairness
-+and latency across CPUs, the advantage of local runqueue locking, which makes
-+for better scalability, is lost due to having to grab multiple locks.
-+
-+A significant feature of BFS is that all accounting is done purely based on CPU
-+used and nowhere is sleep time used in any way to determine entitlement or
-+interactivity. Interactivity "estimators" that use some kind of sleep/run
-+algorithm are doomed to fail to detect all interactive tasks, and to falsely tag
-+tasks that aren't interactive as being so. The reason for this is that it is
-+close to impossible to determine that when a task is sleeping, whether it is
-+doing it voluntarily, as in a userspace application waiting for input in the
-+form of a mouse click or otherwise, or involuntarily, because it is waiting for
-+another thread, process, I/O, kernel activity or whatever. Thus, such an
-+estimator will introduce corner cases, and more heuristics will be required to
-+cope with those corner cases, introducing more corner cases and failed
-+interactivity detection and so on. Interactivity in BFS is built into the design
-+by virtue of the fact that tasks that are waking up have not used up their quota
-+of CPU time, and have earlier effective deadlines, thereby making it very likely
-+they will preempt any CPU bound task of equivalent nice level. See below for
-+more information on the virtual deadline mechanism. Even if they do not preempt
-+a running task, because the rr interval is guaranteed to have a bound upper
-+limit on how long a task will wait for, it will be scheduled within a timeframe
-+that will not cause visible interface jitter.
-+
-+
-+Design details.
-+
-+Task insertion.
-+
-+BFS inserts tasks into each relevant queue as an O(1) insertion into a double
-+linked list. On insertion, *every* running queue is checked to see if the newly
-+queued task can run on any idle queue, or preempt the lowest running task on the
-+system. This is how the cross-CPU scheduling of BFS achieves significantly lower
-+latency per extra CPU the system has. In this case the lookup is, in the worst
-+case scenario, O(n) where n is the number of CPUs on the system.
-+
-+Data protection.
-+
-+BFS has one single lock protecting the process local data of every task in the
-+global queue. Thus every insertion, removal and modification of task data in the
-+global runqueue needs to grab the global lock. However, once a task is taken by
-+a CPU, the CPU has its own local data copy of the running process' accounting
-+information which only that CPU accesses and modifies (such as during a
-+timer tick) thus allowing the accounting data to be updated lockless. Once a
-+CPU has taken a task to run, it removes it from the global queue. Thus the
-+global queue only ever has, at most,
-+
-+	(number of tasks requesting cpu time) - (number of logical CPUs) + 1
-+
-+tasks in the global queue. This value is relevant for the time taken to look up
-+tasks during scheduling. This will increase if many tasks with CPU affinity set
-+in their policy to limit which CPUs they're allowed to run on if they outnumber
-+the number of CPUs. The +1 is because when rescheduling a task, the CPU's
-+currently running task is put back on the queue. Lookup will be described after
-+the virtual deadline mechanism is explained.
-+
-+Virtual deadline.
-+
-+The key to achieving low latency, scheduling fairness, and "nice level"
-+distribution in BFS is entirely in the virtual deadline mechanism. The one
-+tunable in BFS is the rr_interval, or "round robin interval". This is the
-+maximum time two SCHED_OTHER (or SCHED_NORMAL, the common scheduling policy)
-+tasks of the same nice level will be running for, or looking at it the other
-+way around, the longest duration two tasks of the same nice level will be
-+delayed for. When a task requests cpu time, it is given a quota (time_slice)
-+equal to the rr_interval and a virtual deadline. The virtual deadline is
-+offset from the current time in jiffies by this equation:
-+
-+	jiffies + (prio_ratio * rr_interval)
-+
-+The prio_ratio is determined as a ratio compared to the baseline of nice -20
-+and increases by 10% per nice level. The deadline is a virtual one only in that
-+no guarantee is placed that a task will actually be scheduled by this time, but
-+it is used to compare which task should go next. There are three components to
-+how a task is next chosen. First is time_slice expiration. If a task runs out
-+of its time_slice, it is descheduled, the time_slice is refilled, and the
-+deadline reset to that formula above. Second is sleep, where a task no longer
-+is requesting CPU for whatever reason. The time_slice and deadline are _not_
-+adjusted in this case and are just carried over for when the task is next
-+scheduled. Third is preemption, and that is when a newly waking task is deemed
-+higher priority than a currently running task on any cpu by virtue of the fact
-+that it has an earlier virtual deadline than the currently running task. The
-+earlier deadline is the key to which task is next chosen for the first and
-+second cases. Once a task is descheduled, it is put back on the queue, and an
-+O(n) lookup of all queued-but-not-running tasks is done to determine which has
-+the earliest deadline and that task is chosen to receive CPU next.
-+
-+The CPU proportion of different nice tasks works out to be approximately the
-+
-+	(prio_ratio difference)^2
-+
-+The reason it is squared is that a task's deadline does not change while it is
-+running unless it runs out of time_slice. Thus, even if the time actually
-+passes the deadline of another task that is queued, it will not get CPU time
-+unless the current running task deschedules, and the time "base" (jiffies) is
-+constantly moving.
-+
-+Task lookup.
-+
-+BFS has 103 priority queues. 100 of these are dedicated to the static priority
-+of realtime tasks, and the remaining 3 are, in order of best to worst priority,
-+SCHED_ISO (isochronous), SCHED_NORMAL, and SCHED_IDLEPRIO (idle priority
-+scheduling). When a task of these priorities is queued, a bitmap of running
-+priorities is set showing which of these priorities has tasks waiting for CPU
-+time. When a CPU is made to reschedule, the lookup for the next task to get
-+CPU time is performed in the following way:
-+
-+First the bitmap is checked to see what static priority tasks are queued. If
-+any realtime priorities are found, the corresponding queue is checked and the
-+first task listed there is taken (provided CPU affinity is suitable) and lookup
-+is complete. If the priority corresponds to a SCHED_ISO task, they are also
-+taken in FIFO order (as they behave like SCHED_RR). If the priority corresponds
-+to either SCHED_NORMAL or SCHED_IDLEPRIO, then the lookup becomes O(n). At this
-+stage, every task in the runlist that corresponds to that priority is checked
-+to see which has the earliest set deadline, and (provided it has suitable CPU
-+affinity) it is taken off the runqueue and given the CPU. If a task has an
-+expired deadline, it is taken and the rest of the lookup aborted (as they are
-+chosen in FIFO order).
-+
-+Thus, the lookup is O(n) in the worst case only, where n is as described
-+earlier, as tasks may be chosen before the whole task list is looked over.
-+
-+
-+Scalability.
-+
-+The major limitations of BFS will be that of scalability, as the separate
-+runqueue designs will have less lock contention as the number of CPUs rises.
-+However they do not scale linearly even with separate runqueues as multiple
-+runqueues will need to be locked concurrently on such designs to be able to
-+achieve fair CPU balancing, to try and achieve some sort of nice-level fairness
-+across CPUs, and to achieve low enough latency for tasks on a busy CPU when
-+other CPUs would be more suited. BFS has the advantage that it requires no
-+balancing algorithm whatsoever, as balancing occurs by proxy simply because
-+all CPUs draw off the global runqueue, in priority and deadline order. Despite
-+the fact that scalability is _not_ the prime concern of BFS, it both shows very
-+good scalability to smaller numbers of CPUs and is likely a more scalable design
-+at these numbers of CPUs.
-+
-+It also has some very low overhead scalability features built into the design
-+when it has been deemed their overhead is so marginal that they're worth adding.
-+The first is the local copy of the running process' data to the CPU it's running
-+on to allow that data to be updated lockless where possible. Then there is
-+deference paid to the last CPU a task was running on, by trying that CPU first
-+when looking for an idle CPU to use the next time it's scheduled. Finally there
-+is the notion of cache locality beyond the last running CPU. The sched_domains
-+information is used to determine the relative virtual "cache distance" that
-+other CPUs have from the last CPU a task was running on. CPUs with shared
-+caches, such as SMT siblings, or multicore CPUs with shared caches, are treated
-+as cache local. CPUs without shared caches are treated as not cache local, and
-+CPUs on different NUMA nodes are treated as very distant. This "relative cache
-+distance" is used by modifying the virtual deadline value when doing lookups.
-+Effectively, the deadline is unaltered between "cache local" CPUs, doubled for
-+"cache distant" CPUs, and quadrupled for "very distant" CPUs. The reasoning
-+behind the doubling of deadlines is as follows. The real cost of migrating a
-+task from one CPU to another is entirely dependant on the cache footprint of
-+the task, how cache intensive the task is, how long it's been running on that
-+CPU to take up the bulk of its cache, how big the CPU cache is, how fast and
-+how layered the CPU cache is, how fast a context switch is... and so on. In
-+other words, it's close to random in the real world where we do more than just
-+one sole workload. The only thing we can be sure of is that it's not free. So
-+BFS uses the principle that an idle CPU is a wasted CPU and utilising idle CPUs
-+is more important than cache locality, and cache locality only plays a part
-+after that. Doubling the effective deadline is based on the premise that the
-+"cache local" CPUs will tend to work on the same tasks up to double the number
-+of cache local CPUs, and once the workload is beyond that amount, it is likely
-+that none of the tasks are cache warm anywhere anyway. The quadrupling for NUMA
-+is a value I pulled out of my arse.
-+
-+When choosing an idle CPU for a waking task, the cache locality is determined
-+according to where the task last ran and then idle CPUs are ranked from best
-+to worst to choose the most suitable idle CPU based on cache locality, NUMA
-+node locality and hyperthread sibling business. They are chosen in the
-+following preference (if idle):
-+
-+* Same core, idle or busy cache, idle threads
-+* Other core, same cache, idle or busy cache, idle threads.
-+* Same node, other CPU, idle cache, idle threads.
-+* Same node, other CPU, busy cache, idle threads.
-+* Same core, busy threads.
-+* Other core, same cache, busy threads.
-+* Same node, other CPU, busy threads.
-+* Other node, other CPU, idle cache, idle threads.
-+* Other node, other CPU, busy cache, idle threads.
-+* Other node, other CPU, busy threads.
-+
-+This shows the SMT or "hyperthread" awareness in the design as well which will
-+choose a real idle core first before a logical SMT sibling which already has
-+tasks on the physical CPU.
-+
-+Early benchmarking of BFS suggested scalability dropped off at the 16 CPU mark.
-+However this benchmarking was performed on an earlier design that was far less
-+scalable than the current one so it's hard to know how scalable it is in terms
-+of both CPUs (due to the global runqueue) and heavily loaded machines (due to
-+O(n) lookup) at this stage. Note that in terms of scalability, the number of
-+_logical_ CPUs matters, not the number of _physical_ CPUs. Thus, a dual (2x)
-+quad core (4X) hyperthreaded (2X) machine is effectively a 16X. Newer benchmark
-+results are very promising indeed, without needing to tweak any knobs, features
-+or options. Benchmark contributions are most welcome.
-+
-+
-+Features
-+
-+As the initial prime target audience for BFS was the average desktop user, it
-+was designed to not need tweaking, tuning or have features set to obtain benefit
-+from it. Thus the number of knobs and features has been kept to an absolute
-+minimum and should not require extra user input for the vast majority of cases.
-+There are precisely 2 tunables, and 2 extra scheduling policies. The rr_interval
-+and iso_cpu tunables, and the SCHED_ISO and SCHED_IDLEPRIO policies. In addition
-+to this, BFS also uses sub-tick accounting. What BFS does _not_ now feature is
-+support for CGROUPS. The average user should neither need to know what these
-+are, nor should they need to be using them to have good desktop behaviour.
-+
-+rr_interval
-+
-+There is only one "scheduler" tunable, the round robin interval. This can be
-+accessed in
-+
-+	/proc/sys/kernel/rr_interval
-+
-+The value is in milliseconds, and the default value is set to 6 on a
-+uniprocessor machine, and automatically set to a progressively higher value on
-+multiprocessor machines. The reasoning behind increasing the value on more CPUs
-+is that the effective latency is decreased by virtue of there being more CPUs on
-+BFS (for reasons explained above), and increasing the value allows for less
-+cache contention and more throughput. Valid values are from 1 to 1000
-+Decreasing the value will decrease latencies at the cost of decreasing
-+throughput, while increasing it will improve throughput, but at the cost of
-+worsening latencies. The accuracy of the rr interval is limited by HZ resolution
-+of the kernel configuration. Thus, the worst case latencies are usually slightly
-+higher than this actual value. The default value of 6 is not an arbitrary one.
-+It is based on the fact that humans can detect jitter at approximately 7ms, so
-+aiming for much lower latencies is pointless under most circumstances. It is
-+worth noting this fact when comparing the latency performance of BFS to other
-+schedulers. Worst case latencies being higher than 7ms are far worse than
-+average latencies not being in the microsecond range.
-+
-+Isochronous scheduling.
-+
-+Isochronous scheduling is a unique scheduling policy designed to provide
-+near-real-time performance to unprivileged (ie non-root) users without the
-+ability to starve the machine indefinitely. Isochronous tasks (which means
-+"same time") are set using, for example, the schedtool application like so:
-+
-+	schedtool -I -e amarok
-+
-+This will start the audio application "amarok" as SCHED_ISO. How SCHED_ISO works
-+is that it has a priority level between true realtime tasks and SCHED_NORMAL
-+which would allow them to preempt all normal tasks, in a SCHED_RR fashion (ie,
-+if multiple SCHED_ISO tasks are running, they purely round robin at rr_interval
-+rate). However if ISO tasks run for more than a tunable finite amount of time,
-+they are then demoted back to SCHED_NORMAL scheduling. This finite amount of
-+time is the percentage of _total CPU_ available across the machine, configurable
-+as a percentage in the following "resource handling" tunable (as opposed to a
-+scheduler tunable):
-+
-+	/proc/sys/kernel/iso_cpu
-+
-+and is set to 70% by default. It is calculated over a rolling 5 second average
-+Because it is the total CPU available, it means that on a multi CPU machine, it
-+is possible to have an ISO task running as realtime scheduling indefinitely on
-+just one CPU, as the other CPUs will be available. Setting this to 100 is the
-+equivalent of giving all users SCHED_RR access and setting it to 0 removes the
-+ability to run any pseudo-realtime tasks.
-+
-+A feature of BFS is that it detects when an application tries to obtain a
-+realtime policy (SCHED_RR or SCHED_FIFO) and the caller does not have the
-+appropriate privileges to use those policies. When it detects this, it will
-+give the task SCHED_ISO policy instead. Thus it is transparent to the user.
-+Because some applications constantly set their policy as well as their nice
-+level, there is potential for them to undo the override specified by the user
-+on the command line of setting the policy to SCHED_ISO. To counter this, once
-+a task has been set to SCHED_ISO policy, it needs superuser privileges to set
-+it back to SCHED_NORMAL. This will ensure the task remains ISO and all child
-+processes and threads will also inherit the ISO policy.
-+
-+Idleprio scheduling.
-+
-+Idleprio scheduling is a scheduling policy designed to give out CPU to a task
-+_only_ when the CPU would be otherwise idle. The idea behind this is to allow
-+ultra low priority tasks to be run in the background that have virtually no
-+effect on the foreground tasks. This is ideally suited to distributed computing
-+clients (like setiathome, folding, mprime etc) but can also be used to start
-+a video encode or so on without any slowdown of other tasks. To avoid this
-+policy from grabbing shared resources and holding them indefinitely, if it
-+detects a state where the task is waiting on I/O, the machine is about to
-+suspend to ram and so on, it will transiently schedule them as SCHED_NORMAL. As
-+per the Isochronous task management, once a task has been scheduled as IDLEPRIO,
-+it cannot be put back to SCHED_NORMAL without superuser privileges. Tasks can
-+be set to start as SCHED_IDLEPRIO with the schedtool command like so:
-+
-+	schedtool -D -e ./mprime
-+
-+Subtick accounting.
-+
-+It is surprisingly difficult to get accurate CPU accounting, and in many cases,
-+the accounting is done by simply determining what is happening at the precise
-+moment a timer tick fires off. This becomes increasingly inaccurate as the
-+timer tick frequency (HZ) is lowered. It is possible to create an application
-+which uses almost 100% CPU, yet by being descheduled at the right time, records
-+zero CPU usage. While the main problem with this is that there are possible
-+security implications, it is also difficult to determine how much CPU a task
-+really does use. BFS tries to use the sub-tick accounting from the TSC clock,
-+where possible, to determine real CPU usage. This is not entirely reliable, but
-+is far more likely to produce accurate CPU usage data than the existing designs
-+and will not show tasks as consuming no CPU usage when they actually are. Thus,
-+the amount of CPU reported as being used by BFS will more accurately represent
-+how much CPU the task itself is using (as is shown for example by the 'time'
-+application), so the reported values may be quite different to other schedulers.
-+Values reported as the 'load' are more prone to problems with this design, but
-+per process values are closer to real usage. When comparing throughput of BFS
-+to other designs, it is important to compare the actual completed work in terms
-+of total wall clock time taken and total work done, rather than the reported
-+"cpu usage".
-+
-+
-+Con Kolivas <kernel@kolivas.org> Fri Aug 27 2010
-diff --git a/Documentation/scheduler/sched-MuQSS.txt b/Documentation/scheduler/sched-MuQSS.txt
-new file mode 100644
-index 000000000000..ae28b85c9995
---- /dev/null
-+++ b/Documentation/scheduler/sched-MuQSS.txt
-@@ -0,0 +1,373 @@
-+MuQSS - The Multiple Queue Skiplist Scheduler by Con Kolivas.
-+
-+MuQSS is a per-cpu runqueue variant of the original BFS scheduler with
-+one 8 level skiplist per runqueue, and fine grained locking for much more
-+scalability.
-+
-+
-+Goals.
-+
-+The goal of the Multiple Queue Skiplist Scheduler, referred to as MuQSS from
-+here on (pronounced mux) is to completely do away with the complex designs of
-+the past for the cpu process scheduler and instead implement one that is very
-+simple in basic design. The main focus of MuQSS is to achieve excellent desktop
-+interactivity and responsiveness without heuristics and tuning knobs that are
-+difficult to understand, impossible to model and predict the effect of, and when
-+tuned to one workload cause massive detriment to another, while still being
-+scalable to many CPUs and processes.
-+
-+
-+Design summary.
-+
-+MuQSS is best described as per-cpu multiple runqueue, O(log n) insertion, O(1)
-+lookup, earliest effective virtual deadline first tickless design, loosely based
-+on EEVDF (earliest eligible virtual deadline first) and my previous Staircase
-+Deadline scheduler, and evolved from the single runqueue O(n) BFS scheduler.
-+Each component shall be described in order to understand the significance of,
-+and reasoning for it.
-+
-+
-+Design reasoning.
-+
-+In BFS, the use of a single runqueue across all CPUs meant that each CPU would
-+need to scan the entire runqueue looking for the process with the earliest
-+deadline and schedule that next, regardless of which CPU it originally came
-+from. This made BFS deterministic with respect to latency and provided
-+guaranteed latencies dependent on number of processes and CPUs. The single
-+runqueue, however, meant that all CPUs would compete for the single lock
-+protecting it, which would lead to increasing lock contention as the number of
-+CPUs rose and appeared to limit scalability of common workloads beyond 16
-+logical CPUs. Additionally, the O(n) lookup of the runqueue list obviously
-+increased overhead proportionate to the number of queued proecesses and led to
-+cache thrashing while iterating over the linked list.
-+
-+MuQSS is an evolution of BFS, designed to maintain the same scheduling
-+decision mechanism and be virtually deterministic without relying on the
-+constrained design of the single runqueue by splitting out the single runqueue
-+to be per-CPU and use skiplists instead of linked lists.
-+
-+The original reason for going back to a single runqueue design for BFS was that
-+once multiple runqueues are introduced, per-CPU or otherwise, there will be
-+complex interactions as each runqueue will be responsible for the scheduling
-+latency and fairness of the tasks only on its own runqueue, and to achieve
-+fairness and low latency across multiple CPUs, any advantage in throughput of
-+having CPU local tasks causes other disadvantages. This is due to requiring a
-+very complex balancing system to at best achieve some semblance of fairness
-+across CPUs and can only maintain relatively low latency for tasks bound to the
-+same CPUs, not across them. To increase said fairness and latency across CPUs,
-+the advantage of local runqueue locking, which makes for better scalability, is
-+lost due to having to grab multiple locks.
-+
-+MuQSS works around the problems inherent in multiple runqueue designs by
-+making its skip lists priority ordered and through novel use of lockless
-+examination of each other runqueue it can decide if it should take the earliest
-+deadline task from another runqueue for latency reasons, or for CPU balancing
-+reasons. It still does not have a balancing system, choosing to allow the
-+next task scheduling decision and task wakeup CPU choice to allow balancing to
-+happen by virtue of its choices.
-+
-+As a further evolution of the design, MuQSS normally configures sharing of
-+runqueues in a logical fashion for when CPU resources are shared for improved
-+latency and throughput. By default it shares runqueues and locks between
-+multicore siblings. Optionally it can be configured to run with sharing of
-+SMT siblings only, all SMP packages or no sharing at all. Additionally it can
-+be selected at boot time.
-+
-+
-+Design details.
-+
-+Custom skip list implementation:
-+
-+To avoid the overhead of building up and tearing down skip list structures,
-+the variant used by MuQSS has a number of optimisations making it specific for
-+its use case in the scheduler. It uses static arrays of 8 'levels' instead of
-+building up and tearing down structures dynamically. This makes each runqueue
-+only scale O(log N) up to 64k tasks. However as there is one runqueue per CPU
-+it means that it scales O(log N) up to 64k x number of logical CPUs which is
-+far beyond the realistic task limits each CPU could handle. By being 8 levels
-+it also makes the array exactly one cacheline in size. Additionally, each
-+skip list node is bidirectional making insertion and removal amortised O(1),
-+being O(k) where k is 1-8. Uniquely, we are only ever interested in the very
-+first entry in each list at all times with MuQSS, so there is never a need to
-+do a search and thus look up is always O(1). In interactive mode, the queues
-+will be searched beyond their first entry if the first task is not suitable
-+for affinity or SMT nice reasons.
-+
-+Task insertion:
-+
-+MuQSS inserts tasks into a per CPU runqueue as an O(log N) insertion into
-+a custom skip list as described above (based on the original design by William
-+Pugh). Insertion is ordered in such a way that there is never a need to do a
-+search by ordering tasks according to static priority primarily, and then
-+virtual deadline at the time of insertion.
-+
-+Niffies:
-+
-+Niffies are a monotonic forward moving timer not unlike the "jiffies" but are
-+of nanosecond resolution. Niffies are calculated per-runqueue from the high
-+resolution TSC timers, and in order to maintain fairness are synchronised
-+between CPUs whenever both runqueues are locked concurrently.
-+
-+Virtual deadline:
-+
-+The key to achieving low latency, scheduling fairness, and "nice level"
-+distribution in MuQSS is entirely in the virtual deadline mechanism. The one
-+tunable in MuQSS is the rr_interval, or "round robin interval". This is the
-+maximum time two SCHED_OTHER (or SCHED_NORMAL, the common scheduling policy)
-+tasks of the same nice level will be running for, or looking at it the other
-+way around, the longest duration two tasks of the same nice level will be
-+delayed for. When a task requests cpu time, it is given a quota (time_slice)
-+equal to the rr_interval and a virtual deadline. The virtual deadline is
-+offset from the current time in niffies by this equation:
-+
-+	niffies + (prio_ratio * rr_interval)
-+
-+The prio_ratio is determined as a ratio compared to the baseline of nice -20
-+and increases by 10% per nice level. The deadline is a virtual one only in that
-+no guarantee is placed that a task will actually be scheduled by this time, but
-+it is used to compare which task should go next. There are three components to
-+how a task is next chosen. First is time_slice expiration. If a task runs out
-+of its time_slice, it is descheduled, the time_slice is refilled, and the
-+deadline reset to that formula above. Second is sleep, where a task no longer
-+is requesting CPU for whatever reason. The time_slice and deadline are _not_
-+adjusted in this case and are just carried over for when the task is next
-+scheduled. Third is preemption, and that is when a newly waking task is deemed
-+higher priority than a currently running task on any cpu by virtue of the fact
-+that it has an earlier virtual deadline than the currently running task. The
-+earlier deadline is the key to which task is next chosen for the first and
-+second cases.
-+
-+The CPU proportion of different nice tasks works out to be approximately the
-+
-+	(prio_ratio difference)^2
-+
-+The reason it is squared is that a task's deadline does not change while it is
-+running unless it runs out of time_slice. Thus, even if the time actually
-+passes the deadline of another task that is queued, it will not get CPU time
-+unless the current running task deschedules, and the time "base" (niffies) is
-+constantly moving.
-+
-+Task lookup:
-+
-+As tasks are already pre-ordered according to anticipated scheduling order in
-+the skip lists, lookup for the next suitable task per-runqueue is always a
-+matter of simply selecting the first task in the 0th level skip list entry.
-+In order to maintain optimal latency and fairness across CPUs, MuQSS does a
-+novel examination of every other runqueue in cache locality order, choosing the
-+best task across all runqueues. This provides near-determinism of how long any
-+task across the entire system may wait before receiving CPU time. The other
-+runqueues are first examine lockless and then trylocked to minimise the
-+potential lock contention if they are likely to have a suitable better task.
-+Each other runqueue lock is only held for as long as it takes to examine the
-+entry for suitability. In "interactive" mode, the default setting, MuQSS will
-+look for the best deadline task across all CPUs, while in !interactive mode,
-+it will only select a better deadline task from another CPU if it is more
-+heavily laden than the current one.
-+
-+Lookup is therefore O(k) where k is number of CPUs.
-+
-+
-+Latency.
-+
-+Through the use of virtual deadlines to govern the scheduling order of normal
-+tasks, queue-to-activation latency per runqueue is guaranteed to be bound by
-+the rr_interval tunable which is set to 6ms by default. This means that the
-+longest a CPU bound task will wait for more CPU is proportional to the number
-+of running tasks and in the common case of 0-2 running tasks per CPU, will be
-+under the 7ms threshold for human perception of jitter. Additionally, as newly
-+woken tasks will have an early deadline from their previous runtime, the very
-+tasks that are usually latency sensitive will have the shortest interval for
-+activation, usually preempting any existing CPU bound tasks.
-+
-+Tickless expiry:
-+
-+A feature of MuQSS is that it is not tied to the resolution of the chosen tick
-+rate in Hz, instead depending entirely on the high resolution timers where
-+possible for sub-millisecond accuracy on timeouts regarless of the underlying
-+tick rate. This allows MuQSS to be run with the low overhead of low Hz rates
-+such as 100 by default, benefiting from the improved throughput and lower
-+power usage it provides. Another advantage of this approach is that in
-+combination with the Full No HZ option, which disables ticks on running task
-+CPUs instead of just idle CPUs, the tick can be disabled at all times
-+regardless of how many tasks are running instead of being limited to just one
-+running task. Note that this option is NOT recommended for regular desktop
-+users.
-+
-+
-+Scalability and balancing.
-+
-+Unlike traditional approaches where balancing is a combination of CPU selection
-+at task wakeup and intermittent balancing based on a vast array of rules set
-+according to architecture, busyness calculations and special case management,
-+MuQSS indirectly balances on the fly at task wakeup and next task selection.
-+During initialisation, MuQSS creates a cache coherency ordered list of CPUs for
-+each logical CPU and uses this to aid task/CPU selection when CPUs are busy.
-+Additionally it selects any idle CPUs, if they are available, at any time over
-+busy CPUs according to the following preference:
-+
-+ * Same thread, idle or busy cache, idle or busy threads
-+ * Other core, same cache, idle or busy cache, idle threads.
-+ * Same node, other CPU, idle cache, idle threads.
-+ * Same node, other CPU, busy cache, idle threads.
-+ * Other core, same cache, busy threads.
-+ * Same node, other CPU, busy threads.
-+ * Other node, other CPU, idle cache, idle threads.
-+ * Other node, other CPU, busy cache, idle threads.
-+ * Other node, other CPU, busy threads.
-+
-+Mux is therefore SMT, MC and Numa aware without the need for extra
-+intermittent balancing to maintain CPUs busy and make the most of cache
-+coherency.
-+
-+
-+Features
-+
-+As the initial prime target audience for MuQSS was the average desktop user, it
-+was designed to not need tweaking, tuning or have features set to obtain benefit
-+from it. Thus the number of knobs and features has been kept to an absolute
-+minimum and should not require extra user input for the vast majority of cases.
-+There are 3 optional tunables, and 2 extra scheduling policies. The rr_interval,
-+interactive, and iso_cpu tunables, and the SCHED_ISO and SCHED_IDLEPRIO
-+policies. In addition to this, MuQSS also uses sub-tick accounting. What MuQSS
-+does _not_ now feature is support for CGROUPS. The average user should neither
-+need to know what these are, nor should they need to be using them to have good
-+desktop behaviour. However since some applications refuse to work without
-+cgroups, one can enable them with MuQSS as a stub and the filesystem will be
-+created which will allow the applications to work.
-+
-+rr_interval:
-+
-+	/proc/sys/kernel/rr_interval
-+
-+The value is in milliseconds, and the default value is set to 6. Valid values
-+are from 1 to 1000 Decreasing the value will decrease latencies at the cost of
-+decreasing throughput, while increasing it will improve throughput, but at the
-+cost of worsening latencies. It is based on the fact that humans can detect
-+jitter at approximately 7ms, so aiming for much lower latencies is pointless
-+under most circumstances. It is worth noting this fact when comparing the
-+latency performance of MuQSS to other schedulers. Worst case latencies being
-+higher than 7ms are far worse than average latencies not being in the
-+microsecond range.
-+
-+interactive:
-+
-+	/proc/sys/kernel/interactive
-+
-+The value is a simple boolean of 1 for on and 0 for off and is set to on by
-+default. Disabling this will disable the near-determinism of MuQSS when
-+selecting the next task by not examining all CPUs for the earliest deadline
-+task, or which CPU to wake to, instead prioritising CPU balancing for improved
-+throughput. Latency will still be bound by rr_interval, but on a per-CPU basis
-+instead of across the whole system.
-+
-+Runqueue sharing.
-+
-+By default MuQSS chooses to share runqueue resources (specifically the skip
-+list and locking) between multicore siblings. It is configurable at build time
-+to select between None, SMT, MC and SMP, corresponding to no sharing, sharing
-+only between simultaneous mulithreading siblings, multicore siblings, or
-+symmetric multiprocessing physical packages. Additionally it can be se at
-+bootime with the use of the rqshare parameter. The reason for configurability
-+is that some architectures have CPUs with many multicore siblings (>= 16)
-+where it may be detrimental to throughput to share runqueues and another
-+sharing option may be desirable. Additionally, more sharing than usual can
-+improve latency on a system-wide level at the expense of throughput if desired.
-+
-+The options are:
-+none, smt, mc, smp
-+
-+eg:
-+	rqshare=mc
-+
-+Isochronous scheduling:
-+
-+Isochronous scheduling is a unique scheduling policy designed to provide
-+near-real-time performance to unprivileged (ie non-root) users without the
-+ability to starve the machine indefinitely. Isochronous tasks (which means
-+"same time") are set using, for example, the schedtool application like so:
-+
-+	schedtool -I -e amarok
-+
-+This will start the audio application "amarok" as SCHED_ISO. How SCHED_ISO works
-+is that it has a priority level between true realtime tasks and SCHED_NORMAL
-+which would allow them to preempt all normal tasks, in a SCHED_RR fashion (ie,
-+if multiple SCHED_ISO tasks are running, they purely round robin at rr_interval
-+rate). However if ISO tasks run for more than a tunable finite amount of time,
-+they are then demoted back to SCHED_NORMAL scheduling. This finite amount of
-+time is the percentage of CPU available per CPU, configurable as a percentage in
-+the following "resource handling" tunable (as opposed to a scheduler tunable):
-+
-+iso_cpu:
-+
-+	/proc/sys/kernel/iso_cpu
-+
-+and is set to 70% by default. It is calculated over a rolling 5 second average
-+Because it is the total CPU available, it means that on a multi CPU machine, it
-+is possible to have an ISO task running as realtime scheduling indefinitely on
-+just one CPU, as the other CPUs will be available. Setting this to 100 is the
-+equivalent of giving all users SCHED_RR access and setting it to 0 removes the
-+ability to run any pseudo-realtime tasks.
-+
-+A feature of MuQSS is that it detects when an application tries to obtain a
-+realtime policy (SCHED_RR or SCHED_FIFO) and the caller does not have the
-+appropriate privileges to use those policies. When it detects this, it will
-+give the task SCHED_ISO policy instead. Thus it is transparent to the user.
-+
-+
-+Idleprio scheduling:
-+
-+Idleprio scheduling is a scheduling policy designed to give out CPU to a task
-+_only_ when the CPU would be otherwise idle. The idea behind this is to allow
-+ultra low priority tasks to be run in the background that have virtually no
-+effect on the foreground tasks. This is ideally suited to distributed computing
-+clients (like setiathome, folding, mprime etc) but can also be used to start a
-+video encode or so on without any slowdown of other tasks. To avoid this policy
-+from grabbing shared resources and holding them indefinitely, if it detects a
-+state where the task is waiting on I/O, the machine is about to suspend to ram
-+and so on, it will transiently schedule them as SCHED_NORMAL. Once a task has
-+been scheduled as IDLEPRIO, it cannot be put back to SCHED_NORMAL without
-+superuser privileges since it is effectively a lower scheduling policy. Tasks
-+can be set to start as SCHED_IDLEPRIO with the schedtool command like so:
-+
-+schedtool -D -e ./mprime
-+
-+Subtick accounting:
-+
-+It is surprisingly difficult to get accurate CPU accounting, and in many cases,
-+the accounting is done by simply determining what is happening at the precise
-+moment a timer tick fires off. This becomes increasingly inaccurate as the timer
-+tick frequency (HZ) is lowered. It is possible to create an application which
-+uses almost 100% CPU, yet by being descheduled at the right time, records zero
-+CPU usage. While the main problem with this is that there are possible security
-+implications, it is also difficult to determine how much CPU a task really does
-+use. Mux uses sub-tick accounting from the TSC clock to determine real CPU
-+usage. Thus, the amount of CPU reported as being used by MuQSS will more
-+accurately represent how much CPU the task itself is using (as is shown for
-+example by the 'time' application), so the reported values may be quite
-+different to other schedulers. When comparing throughput of MuQSS to other
-+designs, it is important to compare the actual completed work in terms of total
-+wall clock time taken and total work done, rather than the reported "cpu usage".
-+
-+Symmetric MultiThreading (SMT) aware nice:
-+
-+SMT, a.k.a. hyperthreading, is a very common feature on modern CPUs. While the
-+logical CPU count rises by adding thread units to each CPU core, allowing more
-+than one task to be run simultaneously on the same core, the disadvantage of it
-+is that the CPU power is shared between the tasks, not summating to the power
-+of two CPUs. The practical upshot of this is that two tasks running on
-+separate threads of the same core run significantly slower than if they had one
-+core each to run on. While smart CPU selection allows each task to have a core
-+to itself whenever available (as is done on MuQSS), it cannot offset the
-+slowdown that occurs when the cores are all loaded and only a thread is left.
-+Most of the time this is harmless as the CPU is effectively overloaded at this
-+point and the extra thread is of benefit. However when running a niced task in
-+the presence of an un-niced task (say nice 19 v nice 0), the nice task gets
-+precisely the same amount of CPU power as the unniced one. MuQSS has an
-+optional configuration feature known as SMT-NICE which selectively idles the
-+secondary niced thread for a period proportional to the nice difference,
-+allowing CPU distribution according to nice level to be maintained, at the
-+expense of a small amount of extra overhead. If this is configured in on a
-+machine without SMT threads, the overhead is minimal.
-+
-+
-+Con Kolivas <kernel@kolivas.org> Sat, 29th October 2016
-diff --git a/Makefile b/Makefile
-index 51540b291738..ab8c480660a6 100644
---- a/Makefile
-+++ b/Makefile
-@@ -18,6 +18,10 @@ $(if $(filter __%, $(MAKECMDGOALS)), \
- PHONY := __all
- __all:
- 
-+CKVERSION = -ck1
-+CKNAME = MuQSS Powered
-+EXTRAVERSION := $(EXTRAVERSION)$(CKVERSION)
-+
- # We are using a recursive build, so we need to do a little thinking
- # to get the ordering right.
- #
-diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
-index 9c5f06e8eb9b..0d1069eee09c 100644
---- a/arch/alpha/Kconfig
-+++ b/arch/alpha/Kconfig
-@@ -666,6 +666,8 @@ config HZ
- 	default 1200 if HZ_1200
- 	default 1024
- 
-+source "kernel/Kconfig.MuQSS"
-+
- config SRM_ENV
- 	tristate "SRM environment through procfs"
- 	depends on PROC_FS
-diff --git a/arch/arc/configs/tb10x_defconfig b/arch/arc/configs/tb10x_defconfig
-index a12656ec0072..b46b6ddc7636 100644
---- a/arch/arc/configs/tb10x_defconfig
-+++ b/arch/arc/configs/tb10x_defconfig
-@@ -29,7 +29,7 @@ CONFIG_ARC_PLAT_TB10X=y
- CONFIG_ARC_CACHE_LINE_SHIFT=5
- CONFIG_HZ=250
- CONFIG_ARC_BUILTIN_DTB_NAME="abilis_tb100_dvk"
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- # CONFIG_COMPACTION is not set
- CONFIG_NET=y
- CONFIG_PACKET=y
-diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
-index e00d94b16658..efabbd09475a 100644
---- a/arch/arm/Kconfig
-+++ b/arch/arm/Kconfig
-@@ -1236,6 +1236,8 @@ config SCHED_SMT
- 	  MultiThreading at a cost of slightly increased overhead in some
- 	  places. If unsure say N here.
- 
-+source "kernel/Kconfig.MuQSS"
-+
- config HAVE_ARM_SCU
- 	bool
- 	help
-diff --git a/arch/arm/configs/bcm2835_defconfig b/arch/arm/configs/bcm2835_defconfig
-index 44ff9cd88d81..9c639c998015 100644
---- a/arch/arm/configs/bcm2835_defconfig
-+++ b/arch/arm/configs/bcm2835_defconfig
-@@ -29,7 +29,7 @@ CONFIG_MODULE_UNLOAD=y
- CONFIG_ARCH_MULTI_V6=y
- CONFIG_ARCH_BCM=y
- CONFIG_ARCH_BCM2835=y
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- CONFIG_AEABI=y
- CONFIG_KSM=y
- CONFIG_CLEANCACHE=y
-diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig
-index 82d3ffb18e70..bb05667427a6 100644
---- a/arch/arm/configs/imx_v6_v7_defconfig
-+++ b/arch/arm/configs/imx_v6_v7_defconfig
-@@ -45,6 +45,7 @@ CONFIG_PCI_MSI=y
- CONFIG_PCI_IMX6=y
- CONFIG_SMP=y
- CONFIG_ARM_PSCI=y
-+CONFIG_PREEMPT=y
- CONFIG_HIGHMEM=y
- CONFIG_FORCE_MAX_ZONEORDER=14
- CONFIG_CMDLINE="noinitrd console=ttymxc0,115200"
-diff --git a/arch/arm/configs/mps2_defconfig b/arch/arm/configs/mps2_defconfig
-index 1d923dbb9928..9c1931f1fafd 100644
---- a/arch/arm/configs/mps2_defconfig
-+++ b/arch/arm/configs/mps2_defconfig
-@@ -18,7 +18,7 @@ CONFIG_ARCH_MPS2=y
- CONFIG_SET_MEM_PARAM=y
- CONFIG_DRAM_BASE=0x21000000
- CONFIG_DRAM_SIZE=0x1000000
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- # CONFIG_ATAGS is not set
- CONFIG_ZBOOT_ROM_TEXT=0x0
- CONFIG_ZBOOT_ROM_BSS=0x0
-diff --git a/arch/arm/configs/mxs_defconfig b/arch/arm/configs/mxs_defconfig
-index a9c6f32a9b1c..870866aaa39d 100644
---- a/arch/arm/configs/mxs_defconfig
-+++ b/arch/arm/configs/mxs_defconfig
-@@ -1,7 +1,7 @@
- CONFIG_SYSVIPC=y
- CONFIG_NO_HZ=y
- CONFIG_HIGH_RES_TIMERS=y
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT_VOLUNTARY=n
- CONFIG_TASKSTATS=y
- CONFIG_TASK_DELAY_ACCT=y
- CONFIG_TASK_XACCT=y
-@@ -25,6 +25,13 @@ CONFIG_MODULE_UNLOAD=y
- CONFIG_MODULE_FORCE_UNLOAD=y
- CONFIG_MODVERSIONS=y
- CONFIG_BLK_DEV_INTEGRITY=y
-+# CONFIG_IOSCHED_DEADLINE is not set
-+# CONFIG_IOSCHED_CFQ is not set
-+# CONFIG_ARCH_MULTI_V7 is not set
-+CONFIG_ARCH_MXS=y
-+# CONFIG_ARM_THUMB is not set
-+CONFIG_PREEMPT=y
-+CONFIG_AEABI=y
- CONFIG_NET=y
- CONFIG_PACKET=y
- CONFIG_UNIX=y
-diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
-index 6d232837cbee..052cae73d674 100644
---- a/arch/arm64/Kconfig
-+++ b/arch/arm64/Kconfig
-@@ -945,6 +945,8 @@ config SCHED_SMT
- 	  MultiThreading at a cost of slightly increased overhead in some
- 	  places. If unsure say N here.
- 
-+source "kernel/Kconfig.MuQSS"
-+
- config NR_CPUS
- 	int "Maximum number of CPUs (2-4096)"
- 	range 2 4096
-diff --git a/arch/mips/configs/fuloong2e_defconfig b/arch/mips/configs/fuloong2e_defconfig
-index 023b4e644b1c..013e630b96a6 100644
---- a/arch/mips/configs/fuloong2e_defconfig
-+++ b/arch/mips/configs/fuloong2e_defconfig
-@@ -4,7 +4,7 @@ CONFIG_SYSVIPC=y
- CONFIG_POSIX_MQUEUE=y
- CONFIG_NO_HZ=y
- CONFIG_HIGH_RES_TIMERS=y
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- CONFIG_BSD_PROCESS_ACCT=y
- CONFIG_IKCONFIG=y
- CONFIG_IKCONFIG_PROC=y
-diff --git a/arch/mips/configs/gpr_defconfig b/arch/mips/configs/gpr_defconfig
-index 9085f4d6c698..fb23111d45f6 100644
---- a/arch/mips/configs/gpr_defconfig
-+++ b/arch/mips/configs/gpr_defconfig
-@@ -1,8 +1,8 @@
-+CONFIG_PREEMPT=y
- # CONFIG_LOCALVERSION_AUTO is not set
- CONFIG_SYSVIPC=y
- CONFIG_POSIX_MQUEUE=y
- CONFIG_HIGH_RES_TIMERS=y
--CONFIG_PREEMPT_VOLUNTARY=y
- CONFIG_BSD_PROCESS_ACCT=y
- CONFIG_BSD_PROCESS_ACCT_V3=y
- CONFIG_RELAY=y
-diff --git a/arch/mips/configs/ip22_defconfig b/arch/mips/configs/ip22_defconfig
-index 21a1168ae301..529a1b1007cf 100644
---- a/arch/mips/configs/ip22_defconfig
-+++ b/arch/mips/configs/ip22_defconfig
-@@ -1,7 +1,7 @@
- CONFIG_SYSVIPC=y
- CONFIG_NO_HZ=y
- CONFIG_HIGH_RES_TIMERS=y
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- CONFIG_IKCONFIG=y
- CONFIG_IKCONFIG_PROC=y
- CONFIG_LOG_BUF_SHIFT=14
-diff --git a/arch/mips/configs/ip28_defconfig b/arch/mips/configs/ip28_defconfig
-index 0921ef38e9fb..6da05cef46f8 100644
---- a/arch/mips/configs/ip28_defconfig
-+++ b/arch/mips/configs/ip28_defconfig
-@@ -1,5 +1,5 @@
- CONFIG_SYSVIPC=y
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- CONFIG_IKCONFIG=y
- CONFIG_IKCONFIG_PROC=y
- CONFIG_LOG_BUF_SHIFT=14
-diff --git a/arch/mips/configs/jazz_defconfig b/arch/mips/configs/jazz_defconfig
-index 8c223035921f..a3bf87450343 100644
---- a/arch/mips/configs/jazz_defconfig
-+++ b/arch/mips/configs/jazz_defconfig
-@@ -1,8 +1,8 @@
-+CONFIG_PREEMPT=y
- CONFIG_SYSVIPC=y
- CONFIG_POSIX_MQUEUE=y
- CONFIG_NO_HZ=y
- CONFIG_HIGH_RES_TIMERS=y
--CONFIG_PREEMPT_VOLUNTARY=y
- CONFIG_BSD_PROCESS_ACCT=y
- CONFIG_LOG_BUF_SHIFT=14
- CONFIG_RELAY=y
-diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig
-index 914af125a7fa..76a64290373f 100644
---- a/arch/mips/configs/mtx1_defconfig
-+++ b/arch/mips/configs/mtx1_defconfig
-@@ -1,8 +1,8 @@
-+CONFIG_PREEMPT=y
- # CONFIG_LOCALVERSION_AUTO is not set
- CONFIG_SYSVIPC=y
- CONFIG_POSIX_MQUEUE=y
- CONFIG_AUDIT=y
--CONFIG_PREEMPT_VOLUNTARY=y
- CONFIG_BSD_PROCESS_ACCT=y
- CONFIG_BSD_PROCESS_ACCT_V3=y
- CONFIG_RELAY=y
-diff --git a/arch/mips/configs/nlm_xlr_defconfig b/arch/mips/configs/nlm_xlr_defconfig
-index 4ecb157e56d4..ea7309283b01 100644
---- a/arch/mips/configs/nlm_xlr_defconfig
-+++ b/arch/mips/configs/nlm_xlr_defconfig
-@@ -1,10 +1,10 @@
-+CONFIG_PREEMPT=y
- # CONFIG_LOCALVERSION_AUTO is not set
- CONFIG_SYSVIPC=y
- CONFIG_POSIX_MQUEUE=y
- CONFIG_AUDIT=y
- CONFIG_NO_HZ=y
- CONFIG_HIGH_RES_TIMERS=y
--CONFIG_PREEMPT_VOLUNTARY=y
- CONFIG_BSD_PROCESS_ACCT=y
- CONFIG_BSD_PROCESS_ACCT_V3=y
- CONFIG_TASKSTATS=y
-diff --git a/arch/mips/configs/pic32mzda_defconfig b/arch/mips/configs/pic32mzda_defconfig
-index 63fe2da1b37f..7f08ee237345 100644
---- a/arch/mips/configs/pic32mzda_defconfig
-+++ b/arch/mips/configs/pic32mzda_defconfig
-@@ -1,7 +1,7 @@
-+CONFIG_PREEMPT=y
- CONFIG_SYSVIPC=y
- CONFIG_NO_HZ=y
- CONFIG_HIGH_RES_TIMERS=y
--CONFIG_PREEMPT_VOLUNTARY=y
- CONFIG_IKCONFIG=y
- CONFIG_IKCONFIG_PROC=y
- CONFIG_LOG_BUF_SHIFT=14
-diff --git a/arch/mips/configs/pistachio_defconfig b/arch/mips/configs/pistachio_defconfig
-index b9adf15ebbec..0025b56dc300 100644
---- a/arch/mips/configs/pistachio_defconfig
-+++ b/arch/mips/configs/pistachio_defconfig
-@@ -1,9 +1,9 @@
-+CONFIG_PREEMPT=y
- # CONFIG_LOCALVERSION_AUTO is not set
- CONFIG_DEFAULT_HOSTNAME="localhost"
- CONFIG_SYSVIPC=y
- CONFIG_NO_HZ=y
- CONFIG_HIGH_RES_TIMERS=y
--CONFIG_PREEMPT_VOLUNTARY=y
- CONFIG_IKCONFIG=m
- CONFIG_IKCONFIG_PROC=y
- CONFIG_LOG_BUF_SHIFT=18
-diff --git a/arch/mips/configs/pnx8335_stb225_defconfig b/arch/mips/configs/pnx8335_stb225_defconfig
-index d06db6b87959..fb2cd3234d95 100644
---- a/arch/mips/configs/pnx8335_stb225_defconfig
-+++ b/arch/mips/configs/pnx8335_stb225_defconfig
-@@ -1,9 +1,9 @@
-+CONFIG_PREEMPT=y
- # CONFIG_LOCALVERSION_AUTO is not set
- # CONFIG_SWAP is not set
- CONFIG_SYSVIPC=y
- CONFIG_NO_HZ=y
- CONFIG_HIGH_RES_TIMERS=y
--CONFIG_PREEMPT_VOLUNTARY=y
- CONFIG_LOG_BUF_SHIFT=14
- CONFIG_EXPERT=y
- CONFIG_SLAB=y
-diff --git a/arch/mips/configs/rm200_defconfig b/arch/mips/configs/rm200_defconfig
-index 30d7c3db884e..9e68acfa0d0e 100644
---- a/arch/mips/configs/rm200_defconfig
-+++ b/arch/mips/configs/rm200_defconfig
-@@ -1,6 +1,6 @@
-+CONFIG_PREEMPT=y
- CONFIG_SYSVIPC=y
- CONFIG_POSIX_MQUEUE=y
--CONFIG_PREEMPT_VOLUNTARY=y
- CONFIG_BSD_PROCESS_ACCT=y
- CONFIG_IKCONFIG=y
- CONFIG_IKCONFIG_PROC=y
-diff --git a/arch/parisc/configs/712_defconfig b/arch/parisc/configs/712_defconfig
-new file mode 100644
-index 000000000000..578524f80cc4
---- /dev/null
-+++ b/arch/parisc/configs/712_defconfig
-@@ -0,0 +1,181 @@
-+# CONFIG_LOCALVERSION_AUTO is not set
-+CONFIG_SYSVIPC=y
-+CONFIG_POSIX_MQUEUE=y
-+CONFIG_IKCONFIG=y
-+CONFIG_IKCONFIG_PROC=y
-+CONFIG_LOG_BUF_SHIFT=16
-+CONFIG_BLK_DEV_INITRD=y
-+CONFIG_KALLSYMS_ALL=y
-+CONFIG_SLAB=y
-+CONFIG_PROFILING=y
-+CONFIG_OPROFILE=m
-+CONFIG_MODULES=y
-+CONFIG_MODULE_UNLOAD=y
-+CONFIG_MODULE_FORCE_UNLOAD=y
-+CONFIG_PA7100LC=y
-+CONFIG_PREEMPT=y
-+CONFIG_GSC_LASI=y
-+# CONFIG_PDC_CHASSIS is not set
-+CONFIG_BINFMT_MISC=m
-+CONFIG_NET=y
-+CONFIG_PACKET=y
-+CONFIG_UNIX=y
-+CONFIG_XFRM_USER=m
-+CONFIG_NET_KEY=m
-+CONFIG_INET=y
-+CONFIG_IP_MULTICAST=y
-+CONFIG_IP_PNP=y
-+CONFIG_IP_PNP_DHCP=y
-+CONFIG_IP_PNP_BOOTP=y
-+CONFIG_INET_AH=m
-+CONFIG_INET_ESP=m
-+CONFIG_INET_DIAG=m
-+# CONFIG_IPV6 is not set
-+CONFIG_NETFILTER=y
-+CONFIG_LLC2=m
-+CONFIG_NET_PKTGEN=m
-+CONFIG_DEVTMPFS=y
-+CONFIG_DEVTMPFS_MOUNT=y
-+# CONFIG_STANDALONE is not set
-+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
-+CONFIG_PARPORT=y
-+CONFIG_PARPORT_PC=m
-+CONFIG_BLK_DEV_LOOP=y
-+CONFIG_BLK_DEV_CRYPTOLOOP=y
-+CONFIG_BLK_DEV_RAM=y
-+CONFIG_BLK_DEV_RAM_SIZE=6144
-+CONFIG_ATA_OVER_ETH=m
-+CONFIG_SCSI=y
-+CONFIG_BLK_DEV_SD=y
-+CONFIG_CHR_DEV_ST=y
-+CONFIG_BLK_DEV_SR=y
-+CONFIG_CHR_DEV_SG=y
-+CONFIG_SCSI_ISCSI_ATTRS=m
-+CONFIG_SCSI_LASI700=y
-+CONFIG_SCSI_DEBUG=m
-+CONFIG_MD=y
-+CONFIG_BLK_DEV_MD=m
-+CONFIG_MD_LINEAR=m
-+CONFIG_MD_RAID0=m
-+CONFIG_MD_RAID1=m
-+CONFIG_NETDEVICES=y
-+CONFIG_BONDING=m
-+CONFIG_DUMMY=m
-+CONFIG_TUN=m
-+CONFIG_LASI_82596=y
-+CONFIG_PPP=m
-+CONFIG_PPP_BSDCOMP=m
-+CONFIG_PPP_DEFLATE=m
-+CONFIG_PPP_MPPE=m
-+CONFIG_PPPOE=m
-+CONFIG_PPP_ASYNC=m
-+CONFIG_PPP_SYNC_TTY=m
-+# CONFIG_KEYBOARD_HIL_OLD is not set
-+CONFIG_MOUSE_SERIAL=m
-+CONFIG_LEGACY_PTY_COUNT=64
-+CONFIG_SERIAL_8250=y
-+CONFIG_SERIAL_8250_CONSOLE=y
-+CONFIG_SERIAL_8250_NR_UARTS=17
-+CONFIG_SERIAL_8250_EXTENDED=y
-+CONFIG_SERIAL_8250_MANY_PORTS=y
-+CONFIG_SERIAL_8250_SHARE_IRQ=y
-+# CONFIG_SERIAL_MUX is not set
-+CONFIG_PDC_CONSOLE=y
-+CONFIG_PRINTER=m
-+CONFIG_PPDEV=m
-+# CONFIG_HW_RANDOM is not set
-+CONFIG_RAW_DRIVER=y
-+# CONFIG_HWMON is not set
-+CONFIG_FB=y
-+CONFIG_FB_MODE_HELPERS=y
-+CONFIG_FB_TILEBLITTING=y
-+CONFIG_DUMMY_CONSOLE_COLUMNS=128
-+CONFIG_DUMMY_CONSOLE_ROWS=48
-+CONFIG_FRAMEBUFFER_CONSOLE=y
-+CONFIG_LOGO=y
-+# CONFIG_LOGO_LINUX_MONO is not set
-+# CONFIG_LOGO_LINUX_VGA16 is not set
-+# CONFIG_LOGO_LINUX_CLUT224 is not set
-+CONFIG_SOUND=y
-+CONFIG_SND=y
-+CONFIG_SND_SEQUENCER=y
-+CONFIG_SND_HARMONY=y
-+CONFIG_EXT2_FS=y
-+CONFIG_EXT3_FS=y
-+CONFIG_JFS_FS=m
-+CONFIG_XFS_FS=m
-+CONFIG_AUTOFS4_FS=y
-+CONFIG_ISO9660_FS=y
-+CONFIG_JOLIET=y
-+CONFIG_UDF_FS=m
-+CONFIG_MSDOS_FS=m
-+CONFIG_VFAT_FS=m
-+CONFIG_PROC_KCORE=y
-+CONFIG_TMPFS=y
-+CONFIG_UFS_FS=m
-+CONFIG_NFS_FS=y
-+CONFIG_NFS_V4=y
-+CONFIG_ROOT_NFS=y
-+CONFIG_NFSD=m
-+CONFIG_NFSD_V4=y
-+CONFIG_CIFS=m
-+CONFIG_NLS_CODEPAGE_437=m
-+CONFIG_NLS_CODEPAGE_737=m
-+CONFIG_NLS_CODEPAGE_775=m
-+CONFIG_NLS_CODEPAGE_850=m
-+CONFIG_NLS_CODEPAGE_852=m
-+CONFIG_NLS_CODEPAGE_855=m
-+CONFIG_NLS_CODEPAGE_857=m
-+CONFIG_NLS_CODEPAGE_860=m
-+CONFIG_NLS_CODEPAGE_861=m
-+CONFIG_NLS_CODEPAGE_862=m
-+CONFIG_NLS_CODEPAGE_863=m
-+CONFIG_NLS_CODEPAGE_864=m
-+CONFIG_NLS_CODEPAGE_865=m
-+CONFIG_NLS_CODEPAGE_866=m
-+CONFIG_NLS_CODEPAGE_869=m
-+CONFIG_NLS_CODEPAGE_936=m
-+CONFIG_NLS_CODEPAGE_950=m
-+CONFIG_NLS_CODEPAGE_932=m
-+CONFIG_NLS_CODEPAGE_949=m
-+CONFIG_NLS_CODEPAGE_874=m
-+CONFIG_NLS_ISO8859_8=m
-+CONFIG_NLS_CODEPAGE_1250=m
-+CONFIG_NLS_CODEPAGE_1251=m
-+CONFIG_NLS_ASCII=m
-+CONFIG_NLS_ISO8859_1=m
-+CONFIG_NLS_ISO8859_2=m
-+CONFIG_NLS_ISO8859_3=m
-+CONFIG_NLS_ISO8859_4=m
-+CONFIG_NLS_ISO8859_5=m
-+CONFIG_NLS_ISO8859_6=m
-+CONFIG_NLS_ISO8859_7=m
-+CONFIG_NLS_ISO8859_9=m
-+CONFIG_NLS_ISO8859_13=m
-+CONFIG_NLS_ISO8859_14=m
-+CONFIG_NLS_ISO8859_15=m
-+CONFIG_NLS_KOI8_R=m
-+CONFIG_NLS_KOI8_U=m
-+CONFIG_NLS_UTF8=m
-+CONFIG_DEBUG_FS=y
-+CONFIG_MAGIC_SYSRQ=y
-+CONFIG_DEBUG_KERNEL=y
-+CONFIG_DEBUG_MUTEXES=y
-+CONFIG_CRYPTO_TEST=m
-+CONFIG_CRYPTO_HMAC=y
-+CONFIG_CRYPTO_MICHAEL_MIC=m
-+CONFIG_CRYPTO_SHA512=m
-+CONFIG_CRYPTO_TGR192=m
-+CONFIG_CRYPTO_WP512=m
-+CONFIG_CRYPTO_ANUBIS=m
-+CONFIG_CRYPTO_BLOWFISH=m
-+CONFIG_CRYPTO_CAST6=m
-+CONFIG_CRYPTO_KHAZAD=m
-+CONFIG_CRYPTO_SERPENT=m
-+CONFIG_CRYPTO_TEA=m
-+CONFIG_CRYPTO_TWOFISH=m
-+CONFIG_CRYPTO_DEFLATE=m
-+# CONFIG_CRYPTO_HW is not set
-+CONFIG_FONTS=y
-+CONFIG_FONT_8x8=y
-+CONFIG_FONT_8x16=y
-diff --git a/arch/parisc/configs/c3000_defconfig b/arch/parisc/configs/c3000_defconfig
-new file mode 100644
-index 000000000000..d1bdfad94048
---- /dev/null
-+++ b/arch/parisc/configs/c3000_defconfig
-@@ -0,0 +1,151 @@
-+# CONFIG_LOCALVERSION_AUTO is not set
-+CONFIG_SYSVIPC=y
-+CONFIG_IKCONFIG=y
-+CONFIG_IKCONFIG_PROC=y
-+CONFIG_LOG_BUF_SHIFT=16
-+CONFIG_BLK_DEV_INITRD=y
-+CONFIG_EXPERT=y
-+CONFIG_KALLSYMS_ALL=y
-+CONFIG_SLAB=y
-+CONFIG_PROFILING=y
-+CONFIG_OPROFILE=m
-+CONFIG_MODULES=y
-+CONFIG_MODULE_UNLOAD=y
-+CONFIG_MODULE_FORCE_UNLOAD=y
-+CONFIG_PA8X00=y
-+CONFIG_PREEMPT=y
-+# CONFIG_GSC is not set
-+CONFIG_PCI=y
-+CONFIG_PCI_LBA=y
-+# CONFIG_PDC_CHASSIS is not set
-+CONFIG_NET=y
-+CONFIG_PACKET=y
-+CONFIG_UNIX=y
-+CONFIG_XFRM_USER=m
-+CONFIG_NET_KEY=m
-+CONFIG_INET=y
-+CONFIG_IP_MULTICAST=y
-+CONFIG_IP_PNP=y
-+CONFIG_IP_PNP_BOOTP=y
-+# CONFIG_INET_DIAG is not set
-+CONFIG_INET6_IPCOMP=m
-+CONFIG_IPV6_TUNNEL=m
-+CONFIG_NETFILTER=y
-+CONFIG_NET_PKTGEN=m
-+CONFIG_DEVTMPFS=y
-+CONFIG_DEVTMPFS_MOUNT=y
-+# CONFIG_STANDALONE is not set
-+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
-+CONFIG_BLK_DEV_UMEM=m
-+CONFIG_BLK_DEV_LOOP=y
-+CONFIG_BLK_DEV_CRYPTOLOOP=m
-+CONFIG_IDE=y
-+CONFIG_BLK_DEV_IDECD=y
-+CONFIG_BLK_DEV_NS87415=y
-+CONFIG_SCSI=y
-+CONFIG_BLK_DEV_SD=y
-+CONFIG_CHR_DEV_ST=y
-+CONFIG_BLK_DEV_SR=y
-+CONFIG_CHR_DEV_SG=y
-+CONFIG_SCSI_ISCSI_ATTRS=m
-+CONFIG_SCSI_SYM53C8XX_2=y
-+CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0
-+CONFIG_SCSI_DEBUG=m
-+CONFIG_MD=y
-+CONFIG_BLK_DEV_MD=y
-+CONFIG_MD_LINEAR=y
-+CONFIG_MD_RAID0=y
-+CONFIG_MD_RAID1=y
-+CONFIG_BLK_DEV_DM=m
-+CONFIG_DM_CRYPT=m
-+CONFIG_DM_SNAPSHOT=m
-+CONFIG_DM_MIRROR=m
-+CONFIG_DM_ZERO=m
-+CONFIG_DM_MULTIPATH=m
-+CONFIG_FUSION=y
-+CONFIG_FUSION_SPI=m
-+CONFIG_FUSION_CTL=m
-+CONFIG_NETDEVICES=y
-+CONFIG_BONDING=m
-+CONFIG_DUMMY=m
-+CONFIG_TUN=m
-+CONFIG_ACENIC=m
-+CONFIG_TIGON3=m
-+CONFIG_NET_TULIP=y
-+CONFIG_DE2104X=m
-+CONFIG_TULIP=y
-+CONFIG_TULIP_MMIO=y
-+CONFIG_E100=m
-+CONFIG_E1000=m
-+CONFIG_PPP=m
-+CONFIG_PPP_BSDCOMP=m
-+CONFIG_PPP_DEFLATE=m
-+CONFIG_PPPOE=m
-+CONFIG_PPP_ASYNC=m
-+CONFIG_PPP_SYNC_TTY=m
-+# CONFIG_KEYBOARD_ATKBD is not set
-+# CONFIG_MOUSE_PS2 is not set
-+CONFIG_SERIO=m
-+CONFIG_SERIO_LIBPS2=m
-+CONFIG_SERIAL_8250=y
-+CONFIG_SERIAL_8250_CONSOLE=y
-+CONFIG_SERIAL_8250_NR_UARTS=13
-+CONFIG_SERIAL_8250_EXTENDED=y
-+CONFIG_SERIAL_8250_MANY_PORTS=y
-+CONFIG_SERIAL_8250_SHARE_IRQ=y
-+# CONFIG_HW_RANDOM is not set
-+CONFIG_RAW_DRIVER=y
-+# CONFIG_HWMON is not set
-+CONFIG_FB=y
-+CONFIG_FRAMEBUFFER_CONSOLE=y
-+CONFIG_LOGO=y
-+# CONFIG_LOGO_LINUX_MONO is not set
-+# CONFIG_LOGO_LINUX_VGA16 is not set
-+# CONFIG_LOGO_LINUX_CLUT224 is not set
-+CONFIG_SOUND=y
-+CONFIG_SND=y
-+CONFIG_SND_SEQUENCER=y
-+CONFIG_SND_AD1889=y
-+CONFIG_USB_HIDDEV=y
-+CONFIG_USB=y
-+CONFIG_USB_OHCI_HCD=y
-+CONFIG_USB_PRINTER=m
-+CONFIG_USB_STORAGE=m
-+CONFIG_USB_STORAGE_USBAT=m
-+CONFIG_USB_STORAGE_SDDR09=m
-+CONFIG_USB_STORAGE_SDDR55=m
-+CONFIG_USB_STORAGE_JUMPSHOT=m
-+CONFIG_USB_MDC800=m
-+CONFIG_USB_MICROTEK=m
-+CONFIG_USB_LEGOTOWER=m
-+CONFIG_EXT2_FS=y
-+CONFIG_EXT3_FS=y
-+CONFIG_XFS_FS=m
-+CONFIG_AUTOFS4_FS=y
-+CONFIG_ISO9660_FS=y
-+CONFIG_JOLIET=y
-+CONFIG_MSDOS_FS=m
-+CONFIG_VFAT_FS=m
-+CONFIG_PROC_KCORE=y
-+CONFIG_TMPFS=y
-+CONFIG_NFS_FS=y
-+CONFIG_ROOT_NFS=y
-+CONFIG_NFSD=y
-+CONFIG_NFSD_V3=y
-+CONFIG_NLS_CODEPAGE_437=m
-+CONFIG_NLS_CODEPAGE_850=m
-+CONFIG_NLS_ASCII=m
-+CONFIG_NLS_ISO8859_1=m
-+CONFIG_NLS_ISO8859_15=m
-+CONFIG_NLS_UTF8=m
-+CONFIG_DEBUG_FS=y
-+CONFIG_HEADERS_INSTALL=y
-+CONFIG_HEADERS_CHECK=y
-+CONFIG_MAGIC_SYSRQ=y
-+CONFIG_DEBUG_MUTEXES=y
-+# CONFIG_DEBUG_BUGVERBOSE is not set
-+CONFIG_CRYPTO_TEST=m
-+CONFIG_CRYPTO_MD5=m
-+CONFIG_CRYPTO_BLOWFISH=m
-+CONFIG_CRYPTO_DES=m
-+# CONFIG_CRYPTO_HW is not set
-diff --git a/arch/parisc/configs/defconfig b/arch/parisc/configs/defconfig
-new file mode 100644
-index 000000000000..0d976614934c
---- /dev/null
-+++ b/arch/parisc/configs/defconfig
-@@ -0,0 +1,206 @@
-+# CONFIG_LOCALVERSION_AUTO is not set
-+CONFIG_SYSVIPC=y
-+CONFIG_POSIX_MQUEUE=y
-+CONFIG_IKCONFIG=y
-+CONFIG_IKCONFIG_PROC=y
-+CONFIG_LOG_BUF_SHIFT=16
-+CONFIG_BLK_DEV_INITRD=y
-+CONFIG_KALLSYMS_ALL=y
-+CONFIG_SLAB=y
-+CONFIG_PROFILING=y
-+CONFIG_OPROFILE=m
-+CONFIG_MODULES=y
-+CONFIG_MODULE_UNLOAD=y
-+CONFIG_MODULE_FORCE_UNLOAD=y
-+# CONFIG_BLK_DEV_BSG is not set
-+CONFIG_PA7100LC=y
-+CONFIG_PREEMPT=y
-+CONFIG_IOMMU_CCIO=y
-+CONFIG_GSC_LASI=y
-+CONFIG_GSC_WAX=y
-+CONFIG_EISA=y
-+CONFIG_PCI=y
-+CONFIG_GSC_DINO=y
-+CONFIG_PCI_LBA=y
-+CONFIG_PCCARD=y
-+CONFIG_YENTA=y
-+CONFIG_PD6729=y
-+CONFIG_I82092=y
-+CONFIG_BINFMT_MISC=m
-+CONFIG_NET=y
-+CONFIG_PACKET=y
-+CONFIG_UNIX=y
-+CONFIG_XFRM_USER=m
-+CONFIG_NET_KEY=m
-+CONFIG_INET=y
-+CONFIG_IP_MULTICAST=y
-+CONFIG_IP_PNP=y
-+CONFIG_IP_PNP_DHCP=y
-+CONFIG_IP_PNP_BOOTP=y
-+CONFIG_INET_AH=m
-+CONFIG_INET_ESP=m
-+CONFIG_INET_DIAG=m
-+CONFIG_INET6_AH=y
-+CONFIG_INET6_ESP=y
-+CONFIG_INET6_IPCOMP=y
-+CONFIG_LLC2=m
-+CONFIG_DEVTMPFS=y
-+CONFIG_DEVTMPFS_MOUNT=y
-+# CONFIG_STANDALONE is not set
-+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
-+CONFIG_PARPORT=y
-+CONFIG_PARPORT_PC=m
-+CONFIG_PARPORT_PC_PCMCIA=m
-+CONFIG_PARPORT_1284=y
-+CONFIG_BLK_DEV_LOOP=y
-+CONFIG_BLK_DEV_CRYPTOLOOP=y
-+CONFIG_BLK_DEV_RAM=y
-+CONFIG_BLK_DEV_RAM_SIZE=6144
-+CONFIG_IDE=y
-+CONFIG_BLK_DEV_IDECS=y
-+CONFIG_BLK_DEV_IDECD=y
-+CONFIG_BLK_DEV_GENERIC=y
-+CONFIG_BLK_DEV_NS87415=y
-+CONFIG_SCSI=y
-+CONFIG_BLK_DEV_SD=y
-+CONFIG_CHR_DEV_ST=y
-+CONFIG_BLK_DEV_SR=y
-+CONFIG_CHR_DEV_SG=y
-+CONFIG_SCSI_LASI700=y
-+CONFIG_SCSI_SYM53C8XX_2=y
-+CONFIG_SCSI_ZALON=y
-+CONFIG_MD=y
-+CONFIG_BLK_DEV_MD=y
-+CONFIG_MD_LINEAR=y
-+CONFIG_MD_RAID0=y
-+CONFIG_MD_RAID1=y
-+CONFIG_MD_RAID10=y
-+CONFIG_BLK_DEV_DM=y
-+CONFIG_NETDEVICES=y
-+CONFIG_BONDING=m
-+CONFIG_DUMMY=m
-+CONFIG_TUN=m
-+CONFIG_ACENIC=y
-+CONFIG_TIGON3=y
-+CONFIG_NET_TULIP=y
-+CONFIG_TULIP=y
-+CONFIG_LASI_82596=y
-+CONFIG_PPP=m
-+CONFIG_PPP_BSDCOMP=m
-+CONFIG_PPP_DEFLATE=m
-+CONFIG_PPPOE=m
-+CONFIG_PPP_ASYNC=m
-+CONFIG_PPP_SYNC_TTY=m
-+# CONFIG_KEYBOARD_HIL_OLD is not set
-+CONFIG_MOUSE_SERIAL=y
-+CONFIG_LEGACY_PTY_COUNT=64
-+CONFIG_SERIAL_8250=y
-+CONFIG_SERIAL_8250_CONSOLE=y
-+CONFIG_SERIAL_8250_CS=y
-+CONFIG_SERIAL_8250_NR_UARTS=17
-+CONFIG_SERIAL_8250_EXTENDED=y
-+CONFIG_SERIAL_8250_MANY_PORTS=y
-+CONFIG_SERIAL_8250_SHARE_IRQ=y
-+CONFIG_PRINTER=m
-+CONFIG_PPDEV=m
-+# CONFIG_HW_RANDOM is not set
-+# CONFIG_HWMON is not set
-+CONFIG_FB=y
-+CONFIG_FB_MODE_HELPERS=y
-+CONFIG_FB_TILEBLITTING=y
-+CONFIG_DUMMY_CONSOLE_COLUMNS=128
-+CONFIG_DUMMY_CONSOLE_ROWS=48
-+CONFIG_FRAMEBUFFER_CONSOLE=y
-+CONFIG_LOGO=y
-+# CONFIG_LOGO_LINUX_MONO is not set
-+# CONFIG_LOGO_LINUX_VGA16 is not set
-+# CONFIG_LOGO_LINUX_CLUT224 is not set
-+CONFIG_SOUND=y
-+CONFIG_SND=y
-+CONFIG_SND_DYNAMIC_MINORS=y
-+CONFIG_SND_SEQUENCER=y
-+CONFIG_SND_AD1889=y
-+CONFIG_SND_HARMONY=y
-+CONFIG_HID_GYRATION=y
-+CONFIG_HID_NTRIG=y
-+CONFIG_HID_PANTHERLORD=y
-+CONFIG_HID_PETALYNX=y
-+CONFIG_HID_SAMSUNG=y
-+CONFIG_HID_SUNPLUS=y
-+CONFIG_HID_TOPSEED=y
-+CONFIG_USB=y
-+CONFIG_USB_MON=y
-+CONFIG_USB_OHCI_HCD=y
-+CONFIG_USB_UHCI_HCD=y
-+CONFIG_EXT2_FS=y
-+CONFIG_EXT3_FS=y
-+CONFIG_ISO9660_FS=y
-+CONFIG_JOLIET=y
-+CONFIG_VFAT_FS=y
-+CONFIG_PROC_KCORE=y
-+CONFIG_TMPFS=y
-+CONFIG_NFS_FS=y
-+CONFIG_ROOT_NFS=y
-+CONFIG_NFSD=y
-+CONFIG_NFSD_V4=y
-+CONFIG_CIFS=m
-+CONFIG_NLS_CODEPAGE_437=y
-+CONFIG_NLS_CODEPAGE_737=m
-+CONFIG_NLS_CODEPAGE_775=m
-+CONFIG_NLS_CODEPAGE_850=m
-+CONFIG_NLS_CODEPAGE_852=m
-+CONFIG_NLS_CODEPAGE_855=m
-+CONFIG_NLS_CODEPAGE_857=m
-+CONFIG_NLS_CODEPAGE_860=m
-+CONFIG_NLS_CODEPAGE_861=m
-+CONFIG_NLS_CODEPAGE_862=m
-+CONFIG_NLS_CODEPAGE_863=m
-+CONFIG_NLS_CODEPAGE_864=m
-+CONFIG_NLS_CODEPAGE_865=m
-+CONFIG_NLS_CODEPAGE_866=m
-+CONFIG_NLS_CODEPAGE_869=m
-+CONFIG_NLS_CODEPAGE_936=m
-+CONFIG_NLS_CODEPAGE_950=m
-+CONFIG_NLS_CODEPAGE_932=m
-+CONFIG_NLS_CODEPAGE_949=m
-+CONFIG_NLS_CODEPAGE_874=m
-+CONFIG_NLS_ISO8859_8=m
-+CONFIG_NLS_CODEPAGE_1250=y
-+CONFIG_NLS_CODEPAGE_1251=m
-+CONFIG_NLS_ASCII=m
-+CONFIG_NLS_ISO8859_1=y
-+CONFIG_NLS_ISO8859_2=m
-+CONFIG_NLS_ISO8859_3=m
-+CONFIG_NLS_ISO8859_4=m
-+CONFIG_NLS_ISO8859_5=m
-+CONFIG_NLS_ISO8859_6=m
-+CONFIG_NLS_ISO8859_7=m
-+CONFIG_NLS_ISO8859_9=m
-+CONFIG_NLS_ISO8859_13=m
-+CONFIG_NLS_ISO8859_14=m
-+CONFIG_NLS_ISO8859_15=m
-+CONFIG_NLS_KOI8_R=m
-+CONFIG_NLS_KOI8_U=m
-+CONFIG_NLS_UTF8=y
-+CONFIG_DEBUG_FS=y
-+CONFIG_HEADERS_INSTALL=y
-+CONFIG_HEADERS_CHECK=y
-+CONFIG_MAGIC_SYSRQ=y
-+CONFIG_DEBUG_KERNEL=y
-+CONFIG_DEBUG_MUTEXES=y
-+CONFIG_KEYS=y
-+CONFIG_CRYPTO_TEST=m
-+CONFIG_CRYPTO_MICHAEL_MIC=m
-+CONFIG_CRYPTO_SHA512=m
-+CONFIG_CRYPTO_TGR192=m
-+CONFIG_CRYPTO_WP512=m
-+CONFIG_CRYPTO_ANUBIS=m
-+CONFIG_CRYPTO_BLOWFISH=m
-+CONFIG_CRYPTO_CAST6=m
-+CONFIG_CRYPTO_KHAZAD=m
-+CONFIG_CRYPTO_SERPENT=m
-+CONFIG_CRYPTO_TEA=m
-+CONFIG_CRYPTO_TWOFISH=m
-+# CONFIG_CRYPTO_HW is not set
-+CONFIG_LIBCRC32C=m
-+CONFIG_FONTS=y
-diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
-index 787e829b6f25..22914bbb4caa 100644
---- a/arch/powerpc/Kconfig
-+++ b/arch/powerpc/Kconfig
-@@ -882,6 +882,8 @@ config SCHED_SMT
- 	  when dealing with POWER5 cpus at a cost of slightly increased
- 	  overhead in some places. If unsure say N here.
- 
-+source "kernel/Kconfig.MuQSS"
-+
- config PPC_DENORMALISATION
- 	bool "PowerPC denormalisation exception handling"
- 	depends on PPC_BOOK3S_64
-diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
-index 66e9a0fd64ff..c8531232efb7 100644
---- a/arch/powerpc/configs/ppc6xx_defconfig
-+++ b/arch/powerpc/configs/ppc6xx_defconfig
-@@ -73,7 +73,7 @@ CONFIG_QE_GPIO=y
- CONFIG_MCU_MPC8349EMITX=y
- CONFIG_HIGHMEM=y
- CONFIG_HZ_1000=y
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- CONFIG_BINFMT_MISC=y
- CONFIG_HIBERNATION=y
- CONFIG_PM_DEBUG=y
-diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
-index f18d5067cd0f..fe489fc01c73 100644
---- a/arch/powerpc/platforms/cell/spufs/sched.c
-+++ b/arch/powerpc/platforms/cell/spufs/sched.c
-@@ -51,11 +51,6 @@ static struct task_struct *spusched_task;
- static struct timer_list spusched_timer;
- static struct timer_list spuloadavg_timer;
- 
--/*
-- * Priority of a normal, non-rt, non-niced'd process (aka nice level 0).
-- */
--#define NORMAL_PRIO		120
--
- /*
-  * Frequency of the spu scheduler tick.  By default we do one SPU scheduler
-  * tick for every 10 CPU scheduler ticks.
-diff --git a/arch/sh/configs/se7712_defconfig b/arch/sh/configs/se7712_defconfig
-index ee6d28ae08de..827e4693c5b2 100644
---- a/arch/sh/configs/se7712_defconfig
-+++ b/arch/sh/configs/se7712_defconfig
-@@ -21,7 +21,7 @@ CONFIG_FLATMEM_MANUAL=y
- CONFIG_SH_SOLUTION_ENGINE=y
- CONFIG_SH_PCLK_FREQ=66666666
- CONFIG_HEARTBEAT=y
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- CONFIG_CMDLINE_OVERWRITE=y
- CONFIG_CMDLINE="console=ttySC0,115200 root=/dev/sda1"
- CONFIG_NET=y
-diff --git a/arch/sh/configs/se7721_defconfig b/arch/sh/configs/se7721_defconfig
-index bad921bc10f8..e8f42bc0d370 100644
---- a/arch/sh/configs/se7721_defconfig
-+++ b/arch/sh/configs/se7721_defconfig
-@@ -21,7 +21,7 @@ CONFIG_FLATMEM_MANUAL=y
- CONFIG_SH_7721_SOLUTION_ENGINE=y
- CONFIG_SH_PCLK_FREQ=33333333
- CONFIG_HEARTBEAT=y
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- CONFIG_CMDLINE_OVERWRITE=y
- CONFIG_CMDLINE="console=ttySC0,115200 root=/dev/sda2"
- CONFIG_NET=y
-diff --git a/arch/sh/configs/titan_defconfig b/arch/sh/configs/titan_defconfig
-index ba887f1351be..4434e93b70bc 100644
---- a/arch/sh/configs/titan_defconfig
-+++ b/arch/sh/configs/titan_defconfig
-@@ -19,7 +19,7 @@ CONFIG_SH_TITAN=y
- CONFIG_SH_PCLK_FREQ=30000000
- CONFIG_SH_DMA=y
- CONFIG_SH_DMA_API=y
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- CONFIG_CMDLINE_OVERWRITE=y
- CONFIG_CMDLINE="console=ttySC1,38400N81 root=/dev/nfs ip=:::::eth1:autoconf rw"
- CONFIG_PCI=y
-diff --git a/arch/sparc/configs/sparc64_defconfig b/arch/sparc/configs/sparc64_defconfig
-index bde4d21a8ac8..c054ec82d91b 100644
---- a/arch/sparc/configs/sparc64_defconfig
-+++ b/arch/sparc/configs/sparc64_defconfig
-@@ -22,7 +22,7 @@ CONFIG_NO_HZ=y
- CONFIG_HIGH_RES_TIMERS=y
- CONFIG_NUMA=y
- CONFIG_DEFAULT_MMAP_MIN_ADDR=8192
--CONFIG_PREEMPT_VOLUNTARY=y
-+CONFIG_PREEMPT=y
- CONFIG_SUN_LDOMS=y
- CONFIG_PCI=y
- CONFIG_PCI_MSI=y
-diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
-index 7101ac64bb20..6f56ad1894d1 100644
---- a/arch/x86/Kconfig
-+++ b/arch/x86/Kconfig
-@@ -1005,6 +1005,22 @@ config NR_CPUS
- config SCHED_SMT
- 	def_bool y if SMP
- 
-+config SMT_NICE
-+	bool "SMT (Hyperthreading) aware nice priority and policy support"
-+	depends on SCHED_MUQSS && SCHED_SMT
-+	default y
-+	help
-+	  Enabling Hyperthreading on Intel CPUs decreases the effectiveness
-+	  of the use of 'nice' levels and different scheduling policies
-+	  (e.g. realtime) due to sharing of CPU power between hyperthreads.
-+	  SMT nice support makes each logical CPU aware of what is running on
-+	  its hyperthread siblings, maintaining appropriate distribution of
-+	  CPU according to nice levels and scheduling policies at the expense
-+	  of slightly increased overhead.
-+
-+	  If unsure say Y here.
-+
-+
- config SCHED_MC
- 	def_bool y
- 	prompt "Multi-core scheduler support"
-@@ -1035,6 +1051,8 @@ config SCHED_MC_PRIO
- 
- 	  If unsure say Y here.
- 
-+source "kernel/Kconfig.MuQSS"
-+
- config UP_LATE_INIT
- 	def_bool y
- 	depends on !SMP && X86_LOCAL_APIC
-@@ -1419,7 +1437,7 @@ config HIGHMEM64G
- endchoice
- 
- choice
--	prompt "Memory split" if EXPERT
-+	prompt "Memory split"
- 	default VMSPLIT_3G
- 	depends on X86_32
- 	help
-@@ -1439,17 +1457,17 @@ choice
- 	  option alone!
- 
- 	config VMSPLIT_3G
--		bool "3G/1G user/kernel split"
-+		bool "Default 896MB lowmem (3G/1G user/kernel split)"
- 	config VMSPLIT_3G_OPT
- 		depends on !X86_PAE
--		bool "3G/1G user/kernel split (for full 1G low memory)"
-+		bool "1GB lowmem (3G/1G user/kernel split)"
- 	config VMSPLIT_2G
--		bool "2G/2G user/kernel split"
-+		bool "2GB lowmem (2G/2G user/kernel split)"
- 	config VMSPLIT_2G_OPT
- 		depends on !X86_PAE
--		bool "2G/2G user/kernel split (for full 2G low memory)"
-+		bool "2GB lowmem (2G/2G user/kernel split)"
- 	config VMSPLIT_1G
--		bool "1G/3G user/kernel split"
-+		bool "3GB lowmem (1G/3G user/kernel split)"
- endchoice
- 
- config PAGE_OFFSET
-diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
-index 78210793d357..0c4415b23002 100644
---- a/arch/x86/configs/i386_defconfig
-+++ b/arch/x86/configs/i386_defconfig
-@@ -23,6 +23,8 @@ CONFIG_PROFILING=y
- CONFIG_SMP=y
- CONFIG_X86_GENERIC=y
- CONFIG_HPET_TIMER=y
-+CONFIG_SCHED_SMT=y
-+CONFIG_PREEMPT=y
- CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
- CONFIG_X86_REBOOTFIXUPS=y
- CONFIG_MICROCODE_AMD=y
-diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
-index 9936528e1939..328c7d0a38a1 100644
---- a/arch/x86/configs/x86_64_defconfig
-+++ b/arch/x86/configs/x86_64_defconfig
-@@ -20,6 +20,9 @@ CONFIG_BLK_DEV_INITRD=y
- # CONFIG_COMPAT_BRK is not set
- CONFIG_PROFILING=y
- CONFIG_SMP=y
-+CONFIG_NR_CPUS=64
-+CONFIG_SCHED_SMT=y
-+CONFIG_PREEMPT=y
- CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
- CONFIG_MICROCODE_AMD=y
- CONFIG_X86_MSR=y
-diff --git a/drivers/accessibility/speakup/speakup_acntpc.c b/drivers/accessibility/speakup/speakup_acntpc.c
-index c94328a5bd4a..6e7d4671aa69 100644
---- a/drivers/accessibility/speakup/speakup_acntpc.c
-+++ b/drivers/accessibility/speakup/speakup_acntpc.c
-@@ -198,7 +198,7 @@ static void do_catch_up(struct spk_synth *synth)
- 		full_time_val = full_time->u.n.value;
- 		spin_unlock_irqrestore(&speakup_info.spinlock, flags);
- 		if (synth_full()) {
--			schedule_timeout(msecs_to_jiffies(full_time_val));
-+			schedule_msec_hrtimeout((full_time_val));
- 			continue;
- 		}
- 		set_current_state(TASK_RUNNING);
-@@ -226,7 +226,7 @@ static void do_catch_up(struct spk_synth *synth)
- 			jiffy_delta_val = jiffy_delta->u.n.value;
- 			delay_time_val = delay_time->u.n.value;
- 			spin_unlock_irqrestore(&speakup_info.spinlock, flags);
--			schedule_timeout(msecs_to_jiffies(delay_time_val));
-+			schedule_msec_hrtimeout(delay_time_val);
- 			jiff_max = jiffies + jiffy_delta_val;
- 		}
- 	}
-diff --git a/drivers/accessibility/speakup/speakup_apollo.c b/drivers/accessibility/speakup/speakup_apollo.c
-index 0877b4044c28..627102d048c1 100644
---- a/drivers/accessibility/speakup/speakup_apollo.c
-+++ b/drivers/accessibility/speakup/speakup_apollo.c
-@@ -165,7 +165,7 @@ static void do_catch_up(struct spk_synth *synth)
- 		if (!synth->io_ops->synth_out(synth, ch)) {
- 			synth->io_ops->tiocmset(0, UART_MCR_RTS);
- 			synth->io_ops->tiocmset(UART_MCR_RTS, 0);
--			schedule_timeout(msecs_to_jiffies(full_time_val));
-+			schedule_msec_hrtimeout(full_time_val);
- 			continue;
- 		}
- 		if (time_after_eq(jiffies, jiff_max) && (ch == SPACE)) {
-diff --git a/drivers/accessibility/speakup/speakup_decext.c b/drivers/accessibility/speakup/speakup_decext.c
-index 7408eb29cf38..938a0c35968f 100644
---- a/drivers/accessibility/speakup/speakup_decext.c
-+++ b/drivers/accessibility/speakup/speakup_decext.c
-@@ -180,7 +180,7 @@ static void do_catch_up(struct spk_synth *synth)
- 		if (ch == '\n')
- 			ch = 0x0D;
- 		if (synth_full() || !synth->io_ops->synth_out(synth, ch)) {
--			schedule_timeout(msecs_to_jiffies(delay_time_val));
-+			schedule_msec_hrtimeout(delay_time_val);
- 			continue;
- 		}
- 		set_current_state(TASK_RUNNING);
-diff --git a/drivers/accessibility/speakup/speakup_decpc.c b/drivers/accessibility/speakup/speakup_decpc.c
-index 96f24c848cc5..1130dfe4da6c 100644
---- a/drivers/accessibility/speakup/speakup_decpc.c
-+++ b/drivers/accessibility/speakup/speakup_decpc.c
-@@ -398,7 +398,7 @@ static void do_catch_up(struct spk_synth *synth)
- 		if (ch == '\n')
- 			ch = 0x0D;
- 		if (dt_sendchar(ch)) {
--			schedule_timeout(msecs_to_jiffies(delay_time_val));
-+			schedule_msec_hrtimeout((delay_time_val));
- 			continue;
- 		}
- 		set_current_state(TASK_RUNNING);
-diff --git a/drivers/accessibility/speakup/speakup_dectlk.c b/drivers/accessibility/speakup/speakup_dectlk.c
-index 780214b5ca16..7b91594c57aa 100644
---- a/drivers/accessibility/speakup/speakup_dectlk.c
-+++ b/drivers/accessibility/speakup/speakup_dectlk.c
-@@ -247,7 +247,7 @@ static void do_catch_up(struct spk_synth *synth)
- 		if (ch == '\n')
- 			ch = 0x0D;
- 		if (synth_full_val || !synth->io_ops->synth_out(synth, ch)) {
--			schedule_timeout(msecs_to_jiffies(delay_time_val));
-+			schedule_msec_hrtimeout(delay_time_val);
- 			continue;
- 		}
- 		set_current_state(TASK_RUNNING);
-diff --git a/drivers/accessibility/speakup/speakup_dtlk.c b/drivers/accessibility/speakup/speakup_dtlk.c
-index dbebed0eeeec..6d83c13ca4a6 100644
---- a/drivers/accessibility/speakup/speakup_dtlk.c
-+++ b/drivers/accessibility/speakup/speakup_dtlk.c
-@@ -211,7 +211,7 @@ static void do_catch_up(struct spk_synth *synth)
- 		delay_time_val = delay_time->u.n.value;
- 		spin_unlock_irqrestore(&speakup_info.spinlock, flags);
- 		if (synth_full()) {
--			schedule_timeout(msecs_to_jiffies(delay_time_val));
-+			schedule_msec_hrtimeout((delay_time_val));
- 			continue;
- 		}
- 		set_current_state(TASK_RUNNING);
-@@ -227,7 +227,7 @@ static void do_catch_up(struct spk_synth *synth)
- 			delay_time_val = delay_time->u.n.value;
- 			jiffy_delta_val = jiffy_delta->u.n.value;
- 			spin_unlock_irqrestore(&speakup_info.spinlock, flags);
--			schedule_timeout(msecs_to_jiffies(delay_time_val));
-+			schedule_msec_hrtimeout((delay_time_val));
- 			jiff_max = jiffies + jiffy_delta_val;
- 		}
- 	}
-diff --git a/drivers/accessibility/speakup/speakup_keypc.c b/drivers/accessibility/speakup/speakup_keypc.c
-index 414827e888fc..cb31c9176daa 100644
---- a/drivers/accessibility/speakup/speakup_keypc.c
-+++ b/drivers/accessibility/speakup/speakup_keypc.c
-@@ -199,7 +199,7 @@ static void do_catch_up(struct spk_synth *synth)
- 		full_time_val = full_time->u.n.value;
- 		spin_unlock_irqrestore(&speakup_info.spinlock, flags);
- 		if (synth_full()) {
--			schedule_timeout(msecs_to_jiffies(full_time_val));
-+			schedule_msec_hrtimeout((full_time_val));
- 			continue;
- 		}
- 		set_current_state(TASK_RUNNING);
-@@ -232,7 +232,7 @@ static void do_catch_up(struct spk_synth *synth)
- 			jiffy_delta_val = jiffy_delta->u.n.value;
- 			delay_time_val = delay_time->u.n.value;
- 			spin_unlock_irqrestore(&speakup_info.spinlock, flags);
--			schedule_timeout(msecs_to_jiffies(delay_time_val));
-+			schedule_msec_hrtimeout(delay_time_val);
- 			jiff_max = jiffies + jiffy_delta_val;
- 		}
- 	}
-diff --git a/drivers/accessibility/speakup/synth.c b/drivers/accessibility/speakup/synth.c
-index ac47dbac7207..09f6ba829dfd 100644
---- a/drivers/accessibility/speakup/synth.c
-+++ b/drivers/accessibility/speakup/synth.c
-@@ -93,12 +93,8 @@ static void _spk_do_catch_up(struct spk_synth *synth, int unicode)
- 		spin_unlock_irqrestore(&speakup_info.spinlock, flags);
- 		if (ch == '\n')
- 			ch = synth->procspeech;
--		if (unicode)
--			ret = synth->io_ops->synth_out_unicode(synth, ch);
--		else
--			ret = synth->io_ops->synth_out(synth, ch);
--		if (!ret) {
--			schedule_timeout(msecs_to_jiffies(full_time_val));
-+		if (!synth->io_ops->synth_out(synth, ch)) {
-+			schedule_msec_hrtimeout(full_time_val);
- 			continue;
- 		}
- 		if (time_after_eq(jiffies, jiff_max) && (ch == SPACE)) {
-@@ -108,11 +104,9 @@ static void _spk_do_catch_up(struct spk_synth *synth, int unicode)
- 			full_time_val = full_time->u.n.value;
- 			spin_unlock_irqrestore(&speakup_info.spinlock, flags);
- 			if (synth->io_ops->synth_out(synth, synth->procspeech))
--				schedule_timeout(
--					msecs_to_jiffies(delay_time_val));
-+				schedule_msec_hrtimeout(delay_time_val);
- 			else
--				schedule_timeout(
--					msecs_to_jiffies(full_time_val));
-+				schedule_msec_hrtimeout(full_time_val);
- 			jiff_max = jiffies + jiffy_delta_val;
- 		}
- 		set_current_state(TASK_RUNNING);
-diff --git a/drivers/block/swim.c b/drivers/block/swim.c
-index dd34504382e5..0caa1c7e9223 100644
---- a/drivers/block/swim.c
-+++ b/drivers/block/swim.c
-@@ -328,7 +328,7 @@ static inline void swim_motor(struct swim __iomem *base,
- 			if (swim_readbit(base, MOTOR_ON))
- 				break;
- 			set_current_state(TASK_INTERRUPTIBLE);
--			schedule_timeout(1);
-+			schedule_min_hrtimeout();
- 		}
- 	} else if (action == OFF) {
- 		swim_action(base, MOTOR_OFF);
-@@ -347,7 +347,7 @@ static inline void swim_eject(struct swim __iomem *base)
- 		if (!swim_readbit(base, DISK_IN))
- 			break;
- 		set_current_state(TASK_INTERRUPTIBLE);
--		schedule_timeout(1);
-+		schedule_min_hrtimeout();
- 	}
- 	swim_select(base, RELAX);
- }
-@@ -372,6 +372,7 @@ static inline int swim_step(struct swim __iomem *base)
- 
- 		set_current_state(TASK_INTERRUPTIBLE);
- 		schedule_timeout(1);
-+		schedule_min_hrtimeout();
- 
- 		swim_select(base, RELAX);
- 		if (!swim_readbit(base, STEP))
-diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
-index 737c0b6b24ea..a3db1f42bb3b 100644
---- a/drivers/char/ipmi/ipmi_msghandler.c
-+++ b/drivers/char/ipmi/ipmi_msghandler.c
-@@ -3542,7 +3542,7 @@ static void cleanup_smi_msgs(struct ipmi_smi *intf)
- 	/* Current message first, to preserve order */
- 	while (intf->curr_msg && !list_empty(&intf->waiting_rcv_msgs)) {
- 		/* Wait for the message to clear out. */
--		schedule_timeout(1);
-+		schedule_min_hrtimeout();
- 	}
- 
- 	/* No need for locks, the interface is down. */
-diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c
-index 0416b9c9d410..9ce5fae0f1cf 100644
---- a/drivers/char/ipmi/ipmi_ssif.c
-+++ b/drivers/char/ipmi/ipmi_ssif.c
-@@ -1288,7 +1288,7 @@ static void shutdown_ssif(void *send_info)
- 
- 	/* make sure the driver is not looking for flags any more. */
- 	while (ssif_info->ssif_state != SSIF_NORMAL)
--		schedule_timeout(1);
-+		schedule_min_hrtimeout();
- 
- 	ssif_info->stopping = true;
- 	del_timer_sync(&ssif_info->watch_timer);
-diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
-index a95156fc5db7..8f07c8900184 100644
---- a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
-+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
-@@ -235,7 +235,7 @@ static int vmw_fifo_wait_noirq(struct vmw_private *dev_priv,
- 			DRM_ERROR("SVGA device lockup.\n");
- 			break;
- 		}
--		schedule_timeout(1);
-+		schedule_min_hrtimeout();
- 		if (interruptible && signal_pending(current)) {
- 			ret = -ERESTARTSYS;
- 			break;
-diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
-index 75f3efee21a4..09b1932ce85b 100644
---- a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
-+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
-@@ -203,7 +203,7 @@ int vmw_fallback_wait(struct vmw_private *dev_priv,
- 			break;
- 		}
- 		if (lazy)
--			schedule_timeout(1);
-+			schedule_min_hrtimeout();
- 		else if ((++count & 0x0F) == 0) {
- 			/**
- 			 * FIXME: Use schedule_hr_timeout here for
-diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c
-index 29f5fed28c2a..974cb08c7aa7 100644
---- a/drivers/hwmon/fam15h_power.c
-+++ b/drivers/hwmon/fam15h_power.c
-@@ -221,7 +221,7 @@ static ssize_t power1_average_show(struct device *dev,
- 		prev_ptsc[cu] = data->cpu_sw_pwr_ptsc[cu];
- 	}
- 
--	leftover = schedule_timeout_interruptible(msecs_to_jiffies(data->power_period));
-+	leftover = schedule_msec_hrtimeout_interruptible((data->power_period));
- 	if (leftover)
- 		return 0;
- 
-diff --git a/drivers/iio/light/tsl2563.c b/drivers/iio/light/tsl2563.c
-index abc8d7db8dc1..baa9d6338a52 100644
---- a/drivers/iio/light/tsl2563.c
-+++ b/drivers/iio/light/tsl2563.c
-@@ -269,11 +269,7 @@ static void tsl2563_wait_adc(struct tsl2563_chip *chip)
- 	default:
- 		delay = 402;
- 	}
--	/*
--	 * TODO: Make sure that we wait at least required delay but why we
--	 * have to extend it one tick more?
--	 */
--	schedule_timeout_interruptible(msecs_to_jiffies(delay) + 2);
-+	schedule_msec_hrtimeout_interruptible(delay + 1);
- }
- 
- static int tsl2563_adjust_gainlevel(struct tsl2563_chip *chip, u16 adc)
-diff --git a/drivers/media/i2c/msp3400-driver.c b/drivers/media/i2c/msp3400-driver.c
-index 39530d43590e..a7caf2eb5771 100644
---- a/drivers/media/i2c/msp3400-driver.c
-+++ b/drivers/media/i2c/msp3400-driver.c
-@@ -170,7 +170,7 @@ static int msp_read(struct i2c_client *client, int dev, int addr)
- 			break;
- 		dev_warn(&client->dev, "I/O error #%d (read 0x%02x/0x%02x)\n", err,
- 		       dev, addr);
--		schedule_timeout_interruptible(msecs_to_jiffies(10));
-+		schedule_msec_hrtimeout_interruptible((10));
- 	}
- 	if (err == 3) {
- 		dev_warn(&client->dev, "resetting chip, sound will go off.\n");
-@@ -211,7 +211,7 @@ static int msp_write(struct i2c_client *client, int dev, int addr, int val)
- 			break;
- 		dev_warn(&client->dev, "I/O error #%d (write 0x%02x/0x%02x)\n", err,
- 		       dev, addr);
--		schedule_timeout_interruptible(msecs_to_jiffies(10));
-+		schedule_msec_hrtimeout_interruptible((10));
- 	}
- 	if (err == 3) {
- 		dev_warn(&client->dev, "resetting chip, sound will go off.\n");
-diff --git a/drivers/media/pci/cx18/cx18-gpio.c b/drivers/media/pci/cx18/cx18-gpio.c
-index cf7cfda94107..f63e17489547 100644
---- a/drivers/media/pci/cx18/cx18-gpio.c
-+++ b/drivers/media/pci/cx18/cx18-gpio.c
-@@ -81,11 +81,11 @@ static void gpio_reset_seq(struct cx18 *cx, u32 active_lo, u32 active_hi,
- 
- 	/* Assert */
- 	gpio_update(cx, mask, ~active_lo);
--	schedule_timeout_uninterruptible(msecs_to_jiffies(assert_msecs));
-+	schedule_msec_hrtimeout_uninterruptible((assert_msecs));
- 
- 	/* Deassert */
- 	gpio_update(cx, mask, ~active_hi);
--	schedule_timeout_uninterruptible(msecs_to_jiffies(recovery_msecs));
-+	schedule_msec_hrtimeout_uninterruptible((recovery_msecs));
- }
- 
- /*
-diff --git a/drivers/media/pci/ivtv/ivtv-gpio.c b/drivers/media/pci/ivtv/ivtv-gpio.c
-index 856e7ab7f33e..766a26251337 100644
---- a/drivers/media/pci/ivtv/ivtv-gpio.c
-+++ b/drivers/media/pci/ivtv/ivtv-gpio.c
-@@ -105,7 +105,7 @@ void ivtv_reset_ir_gpio(struct ivtv *itv)
- 	curout = (curout & ~0xF) | 1;
- 	write_reg(curout, IVTV_REG_GPIO_OUT);
- 	/* We could use something else for smaller time */
--	schedule_timeout_interruptible(msecs_to_jiffies(1));
-+	schedule_msec_hrtimeout_interruptible((1));
- 	curout |= 2;
- 	write_reg(curout, IVTV_REG_GPIO_OUT);
- 	curdir &= ~0x80;
-@@ -125,11 +125,11 @@ int ivtv_reset_tuner_gpio(void *dev, int component, int cmd, int value)
- 	curout = read_reg(IVTV_REG_GPIO_OUT);
- 	curout &= ~(1 << itv->card->xceive_pin);
- 	write_reg(curout, IVTV_REG_GPIO_OUT);
--	schedule_timeout_interruptible(msecs_to_jiffies(1));
-+	schedule_msec_hrtimeout_interruptible((1));
- 
- 	curout |= 1 << itv->card->xceive_pin;
- 	write_reg(curout, IVTV_REG_GPIO_OUT);
--	schedule_timeout_interruptible(msecs_to_jiffies(1));
-+	schedule_msec_hrtimeout_interruptible((1));
- 	return 0;
- }
- 
-diff --git a/drivers/media/pci/ivtv/ivtv-ioctl.c b/drivers/media/pci/ivtv/ivtv-ioctl.c
-index 35dccb31174c..8181cd65e876 100644
---- a/drivers/media/pci/ivtv/ivtv-ioctl.c
-+++ b/drivers/media/pci/ivtv/ivtv-ioctl.c
-@@ -1139,7 +1139,7 @@ void ivtv_s_std_dec(struct ivtv *itv, v4l2_std_id std)
- 				TASK_UNINTERRUPTIBLE);
- 		if ((read_reg(IVTV_REG_DEC_LINE_FIELD) >> 16) < 100)
- 			break;
--		schedule_timeout(msecs_to_jiffies(25));
-+		schedule_msec_hrtimeout((25));
- 	}
- 	finish_wait(&itv->vsync_waitq, &wait);
- 	mutex_lock(&itv->serialize_lock);
-diff --git a/drivers/media/pci/ivtv/ivtv-streams.c b/drivers/media/pci/ivtv/ivtv-streams.c
-index f04ee84bab5f..c4469b4b8f99 100644
---- a/drivers/media/pci/ivtv/ivtv-streams.c
-+++ b/drivers/media/pci/ivtv/ivtv-streams.c
-@@ -849,7 +849,7 @@ int ivtv_stop_v4l2_encode_stream(struct ivtv_stream *s, int gop_end)
- 			while (!test_bit(IVTV_F_I_EOS, &itv->i_flags) &&
- 				time_before(jiffies,
- 					    then + msecs_to_jiffies(2000))) {
--				schedule_timeout(msecs_to_jiffies(10));
-+				schedule_msec_hrtimeout((10));
- 			}
- 
- 			/* To convert jiffies to ms, we must multiply by 1000
-diff --git a/drivers/media/radio/radio-mr800.c b/drivers/media/radio/radio-mr800.c
-index cb0437b4c331..163fffc0e1d4 100644
---- a/drivers/media/radio/radio-mr800.c
-+++ b/drivers/media/radio/radio-mr800.c
-@@ -366,7 +366,7 @@ static int vidioc_s_hw_freq_seek(struct file *file, void *priv,
- 			retval = -ENODATA;
- 			break;
- 		}
--		if (schedule_timeout_interruptible(msecs_to_jiffies(10))) {
-+		if (schedule_msec_hrtimeout_interruptible((10))) {
- 			retval = -ERESTARTSYS;
- 			break;
- 		}
-diff --git a/drivers/media/radio/radio-tea5777.c b/drivers/media/radio/radio-tea5777.c
-index fb9de7bbcd19..e53cf45e7f3f 100644
---- a/drivers/media/radio/radio-tea5777.c
-+++ b/drivers/media/radio/radio-tea5777.c
-@@ -235,7 +235,7 @@ static int radio_tea5777_update_read_reg(struct radio_tea5777 *tea, int wait)
- 	}
- 
- 	if (wait) {
--		if (schedule_timeout_interruptible(msecs_to_jiffies(wait)))
-+		if (schedule_msec_hrtimeout_interruptible((wait)))
- 			return -ERESTARTSYS;
- 	}
- 
-diff --git a/drivers/media/radio/tea575x.c b/drivers/media/radio/tea575x.c
-index c37315226c42..e73e6393403c 100644
---- a/drivers/media/radio/tea575x.c
-+++ b/drivers/media/radio/tea575x.c
-@@ -401,7 +401,7 @@ int snd_tea575x_s_hw_freq_seek(struct file *file, struct snd_tea575x *tea,
- 	for (;;) {
- 		if (time_after(jiffies, timeout))
- 			break;
--		if (schedule_timeout_interruptible(msecs_to_jiffies(10))) {
-+		if (schedule_msec_hrtimeout_interruptible((10))) {
- 			/* some signal arrived, stop search */
- 			tea->val &= ~TEA575X_BIT_SEARCH;
- 			snd_tea575x_set_freq(tea);
-diff --git a/drivers/mfd/ucb1x00-core.c b/drivers/mfd/ucb1x00-core.c
-index b690796d24d4..448b13da62b4 100644
---- a/drivers/mfd/ucb1x00-core.c
-+++ b/drivers/mfd/ucb1x00-core.c
-@@ -250,7 +250,7 @@ unsigned int ucb1x00_adc_read(struct ucb1x00 *ucb, int adc_channel, int sync)
- 			break;
- 		/* yield to other processes */
- 		set_current_state(TASK_INTERRUPTIBLE);
--		schedule_timeout(1);
-+		schedule_min_hrtimeout();
- 	}
- 
- 	return UCB_ADC_DAT(val);
-diff --git a/drivers/misc/sgi-xp/xpc_channel.c b/drivers/misc/sgi-xp/xpc_channel.c
-index 8e6607fc8a67..b9ab770bbdb5 100644
---- a/drivers/misc/sgi-xp/xpc_channel.c
-+++ b/drivers/misc/sgi-xp/xpc_channel.c
-@@ -834,7 +834,7 @@ xpc_allocate_msg_wait(struct xpc_channel *ch)
- 
- 	atomic_inc(&ch->n_on_msg_allocate_wq);
- 	prepare_to_wait(&ch->msg_allocate_wq, &wait, TASK_INTERRUPTIBLE);
--	ret = schedule_timeout(1);
-+	ret = schedule_min_hrtimeout();
- 	finish_wait(&ch->msg_allocate_wq, &wait);
- 	atomic_dec(&ch->n_on_msg_allocate_wq);
- 
-diff --git a/drivers/net/caif/caif_hsi.c b/drivers/net/caif/caif_hsi.c
-index 4a33ec4fc089..da85f847ebb4 100644
---- a/drivers/net/caif/caif_hsi.c
-+++ b/drivers/net/caif/caif_hsi.c
-@@ -939,7 +939,7 @@ static void cfhsi_wake_down(struct work_struct *work)
- 			break;
- 
- 		set_current_state(TASK_INTERRUPTIBLE);
--		schedule_timeout(1);
-+		schedule_min_hrtimeout();
- 		retry--;
- 	}
- 
-diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c
-index 66d0198e7834..ce1c7bf9be87 100644
---- a/drivers/net/can/usb/peak_usb/pcan_usb.c
-+++ b/drivers/net/can/usb/peak_usb/pcan_usb.c
-@@ -242,7 +242,7 @@ static int pcan_usb_write_mode(struct peak_usb_device *dev, u8 onoff)
- 	} else {
- 		/* the PCAN-USB needs time to init */
- 		set_current_state(TASK_INTERRUPTIBLE);
--		schedule_timeout(msecs_to_jiffies(PCAN_USB_STARTUP_TIMEOUT));
-+		schedule_msec_hrtimeout((PCAN_USB_STARTUP_TIMEOUT));
- 	}
- 
- 	return err;
-diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
-index 65b315bc60ab..2b3f71086f5f 100644
---- a/drivers/net/usb/lan78xx.c
-+++ b/drivers/net/usb/lan78xx.c
-@@ -2666,7 +2666,7 @@ static void lan78xx_terminate_urbs(struct lan78xx_net *dev)
- 	while (!skb_queue_empty(&dev->rxq) &&
- 	       !skb_queue_empty(&dev->txq) &&
- 	       !skb_queue_empty(&dev->done)) {
--		schedule_timeout(msecs_to_jiffies(UNLINK_TIMEOUT_MS));
-+		schedule_msec_hrtimeout((UNLINK_TIMEOUT_MS));
- 		set_current_state(TASK_UNINTERRUPTIBLE);
- 		netif_dbg(dev, ifdown, dev->net,
- 			  "waited for %d urb completions\n", temp);
-diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
-index 2b2a841cd938..1a4d27179db1 100644
---- a/drivers/net/usb/usbnet.c
-+++ b/drivers/net/usb/usbnet.c
-@@ -767,7 +767,7 @@ static void wait_skb_queue_empty(struct sk_buff_head *q)
- 	spin_lock_irqsave(&q->lock, flags);
- 	while (!skb_queue_empty(q)) {
- 		spin_unlock_irqrestore(&q->lock, flags);
--		schedule_timeout(msecs_to_jiffies(UNLINK_TIMEOUT_MS));
-+		schedule_msec_hrtimeout((UNLINK_TIMEOUT_MS));
- 		set_current_state(TASK_UNINTERRUPTIBLE);
- 		spin_lock_irqsave(&q->lock, flags);
- 	}
-diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2100.c b/drivers/net/wireless/intel/ipw2x00/ipw2100.c
-index 461e955aa259..5ab8e7396ea4 100644
---- a/drivers/net/wireless/intel/ipw2x00/ipw2100.c
-+++ b/drivers/net/wireless/intel/ipw2x00/ipw2100.c
-@@ -816,7 +816,7 @@ static int ipw2100_hw_send_command(struct ipw2100_priv *priv,
- 	 * doesn't seem to have as many firmware restart cycles...
- 	 *
- 	 * As a test, we're sticking in a 1/100s delay here */
--	schedule_timeout_uninterruptible(msecs_to_jiffies(10));
-+	schedule_msec_hrtimeout_uninterruptible((10));
- 
- 	return 0;
- 
-@@ -1267,7 +1267,7 @@ static int ipw2100_start_adapter(struct ipw2100_priv *priv)
- 	IPW_DEBUG_FW("Waiting for f/w initialization to complete...\n");
- 	i = 5000;
- 	do {
--		schedule_timeout_uninterruptible(msecs_to_jiffies(40));
-+		schedule_msec_hrtimeout_uninterruptible((40));
- 		/* Todo... wait for sync command ... */
- 
- 		read_register(priv->net_dev, IPW_REG_INTA, &inta);
-diff --git a/drivers/parport/ieee1284.c b/drivers/parport/ieee1284.c
-index 4547ac44c8d4..8fa1a7fdf12c 100644
---- a/drivers/parport/ieee1284.c
-+++ b/drivers/parport/ieee1284.c
-@@ -202,7 +202,7 @@ int parport_wait_peripheral(struct parport *port,
- 			/* parport_wait_event didn't time out, but the
- 			 * peripheral wasn't actually ready either.
- 			 * Wait for another 10ms. */
--			schedule_timeout_interruptible(msecs_to_jiffies(10));
-+			schedule_msec_hrtimeout_interruptible((10));
- 		}
- 	}
- 
-diff --git a/drivers/parport/ieee1284_ops.c b/drivers/parport/ieee1284_ops.c
-index 2c11bd3fe1fd..8cb6b61c0880 100644
---- a/drivers/parport/ieee1284_ops.c
-+++ b/drivers/parport/ieee1284_ops.c
-@@ -520,7 +520,7 @@ size_t parport_ieee1284_ecp_read_data (struct parport *port,
- 			/* Yield the port for a while. */
- 			if (count && dev->port->irq != PARPORT_IRQ_NONE) {
- 				parport_release (dev);
--				schedule_timeout_interruptible(msecs_to_jiffies(40));
-+				schedule_msec_hrtimeout_interruptible((40));
- 				parport_claim_or_block (dev);
- 			}
- 			else
-diff --git a/drivers/platform/x86/intel_ips.c b/drivers/platform/x86/intel_ips.c
-index bffe548187ee..c2918ee3e100 100644
---- a/drivers/platform/x86/intel_ips.c
-+++ b/drivers/platform/x86/intel_ips.c
-@@ -798,7 +798,7 @@ static int ips_adjust(void *data)
- 			ips_gpu_lower(ips);
- 
- sleep:
--		schedule_timeout_interruptible(msecs_to_jiffies(IPS_ADJUST_PERIOD));
-+		schedule_msec_hrtimeout_interruptible((IPS_ADJUST_PERIOD));
- 	} while (!kthread_should_stop());
- 
- 	dev_dbg(ips->dev, "ips-adjust thread stopped\n");
-@@ -974,7 +974,7 @@ static int ips_monitor(void *data)
- 	seqno_timestamp = get_jiffies_64();
- 
- 	old_cpu_power = thm_readl(THM_CEC);
--	schedule_timeout_interruptible(msecs_to_jiffies(IPS_SAMPLE_PERIOD));
-+	schedule_msec_hrtimeout_interruptible((IPS_SAMPLE_PERIOD));
- 
- 	/* Collect an initial average */
- 	for (i = 0; i < IPS_SAMPLE_COUNT; i++) {
-@@ -1001,7 +1001,7 @@ static int ips_monitor(void *data)
- 			mchp_samples[i] = mchp;
- 		}
- 
--		schedule_timeout_interruptible(msecs_to_jiffies(IPS_SAMPLE_PERIOD));
-+		schedule_msec_hrtimeout_interruptible((IPS_SAMPLE_PERIOD));
- 		if (kthread_should_stop())
- 			break;
- 	}
-@@ -1028,7 +1028,7 @@ static int ips_monitor(void *data)
- 	 * us to reduce the sample frequency if the CPU and GPU are idle.
- 	 */
- 	old_cpu_power = thm_readl(THM_CEC);
--	schedule_timeout_interruptible(msecs_to_jiffies(IPS_SAMPLE_PERIOD));
-+	schedule_msec_hrtimeout_interruptible((IPS_SAMPLE_PERIOD));
- 	last_sample_period = IPS_SAMPLE_PERIOD;
- 
- 	timer_setup(&ips->timer, monitor_timeout, TIMER_DEFERRABLE);
-diff --git a/drivers/rtc/rtc-wm8350.c b/drivers/rtc/rtc-wm8350.c
-index 2018614f258f..fc19b312c345 100644
---- a/drivers/rtc/rtc-wm8350.c
-+++ b/drivers/rtc/rtc-wm8350.c
-@@ -114,7 +114,7 @@ static int wm8350_rtc_settime(struct device *dev, struct rtc_time *tm)
- 	/* Wait until confirmation of stopping */
- 	do {
- 		rtc_ctrl = wm8350_reg_read(wm8350, WM8350_RTC_TIME_CONTROL);
--		schedule_timeout_uninterruptible(msecs_to_jiffies(1));
-+		schedule_msec_hrtimeout_uninterruptible((1));
- 	} while (--retries && !(rtc_ctrl & WM8350_RTC_STS));
- 
- 	if (!retries) {
-@@ -197,7 +197,7 @@ static int wm8350_rtc_stop_alarm(struct wm8350 *wm8350)
- 	/* Wait until confirmation of stopping */
- 	do {
- 		rtc_ctrl = wm8350_reg_read(wm8350, WM8350_RTC_TIME_CONTROL);
--		schedule_timeout_uninterruptible(msecs_to_jiffies(1));
-+		schedule_msec_hrtimeout_uninterruptible((1));
- 	} while (retries-- && !(rtc_ctrl & WM8350_RTC_ALMSTS));
- 
- 	if (!(rtc_ctrl & WM8350_RTC_ALMSTS))
-@@ -220,7 +220,7 @@ static int wm8350_rtc_start_alarm(struct wm8350 *wm8350)
- 	/* Wait until confirmation */
- 	do {
- 		rtc_ctrl = wm8350_reg_read(wm8350, WM8350_RTC_TIME_CONTROL);
--		schedule_timeout_uninterruptible(msecs_to_jiffies(1));
-+		schedule_msec_hrtimeout_uninterruptible((1));
- 	} while (retries-- && rtc_ctrl & WM8350_RTC_ALMSTS);
- 
- 	if (rtc_ctrl & WM8350_RTC_ALMSTS)
-diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c
-index 03b1805b106c..41ee54ff304a 100644
---- a/drivers/scsi/fnic/fnic_scsi.c
-+++ b/drivers/scsi/fnic/fnic_scsi.c
-@@ -217,7 +217,7 @@ int fnic_fw_reset_handler(struct fnic *fnic)
- 
- 	/* wait for io cmpl */
- 	while (atomic_read(&fnic->in_flight))
--		schedule_timeout(msecs_to_jiffies(1));
-+		schedule_msec_hrtimeout((1));
- 
- 	spin_lock_irqsave(&fnic->wq_copy_lock[0], flags);
- 
-@@ -2278,7 +2278,7 @@ static int fnic_clean_pending_aborts(struct fnic *fnic,
- 		}
- 	}
- 
--	schedule_timeout(msecs_to_jiffies(2 * fnic->config.ed_tov));
-+	schedule_msec_hrtimeout((2 * fnic->config.ed_tov));
- 
- 	/* walk again to check, if IOs are still pending in fw */
- 	if (fnic_is_abts_pending(fnic, lr_sc))
-diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
-index 983eeb0e3d07..007966930f94 100644
---- a/drivers/scsi/lpfc/lpfc_scsi.c
-+++ b/drivers/scsi/lpfc/lpfc_scsi.c
-@@ -5194,7 +5194,7 @@ lpfc_reset_flush_io_context(struct lpfc_vport *vport, uint16_t tgt_id,
- 					tgt_id, lun_id, context);
- 	later = msecs_to_jiffies(2 * vport->cfg_devloss_tmo * 1000) + jiffies;
- 	while (time_after(later, jiffies) && cnt) {
--		schedule_timeout_uninterruptible(msecs_to_jiffies(20));
-+		schedule_msec_hrtimeout_uninterruptible((20));
- 		cnt = lpfc_sli_sum_iocb(vport, tgt_id, lun_id, context);
- 	}
- 	if (cnt) {
-diff --git a/drivers/scsi/snic/snic_scsi.c b/drivers/scsi/snic/snic_scsi.c
-index b3650c989ed4..7ed1fb285754 100644
---- a/drivers/scsi/snic/snic_scsi.c
-+++ b/drivers/scsi/snic/snic_scsi.c
-@@ -2353,7 +2353,7 @@ snic_reset(struct Scsi_Host *shost, struct scsi_cmnd *sc)
- 
- 	/* Wait for all the IOs that are entered in Qcmd */
- 	while (atomic_read(&snic->ios_inflight))
--		schedule_timeout(msecs_to_jiffies(1));
-+		schedule_msec_hrtimeout((1));
- 
- 	ret = snic_issue_hba_reset(snic, sc);
- 	if (ret) {
-diff --git a/drivers/staging/comedi/drivers/ni_mio_common.c b/drivers/staging/comedi/drivers/ni_mio_common.c
-index 9266e13f6271..df5c53216d78 100644
---- a/drivers/staging/comedi/drivers/ni_mio_common.c
-+++ b/drivers/staging/comedi/drivers/ni_mio_common.c
-@@ -4748,7 +4748,7 @@ static int cs5529_wait_for_idle(struct comedi_device *dev)
- 		if ((status & NI67XX_CAL_STATUS_BUSY) == 0)
- 			break;
- 		set_current_state(TASK_INTERRUPTIBLE);
--		if (schedule_timeout(1))
-+		if (schedule_min_hrtimeout())
- 			return -EIO;
- 	}
- 	if (i == timeout) {
-diff --git a/drivers/staging/rts5208/rtsx.c b/drivers/staging/rts5208/rtsx.c
-index 898add4d1fc8..0aa9dd467349 100644
---- a/drivers/staging/rts5208/rtsx.c
-+++ b/drivers/staging/rts5208/rtsx.c
-@@ -477,7 +477,7 @@ static int rtsx_polling_thread(void *__dev)
- 
- 	for (;;) {
- 		set_current_state(TASK_INTERRUPTIBLE);
--		schedule_timeout(msecs_to_jiffies(POLLING_INTERVAL));
-+		schedule_msec_hrtimeout((POLLING_INTERVAL));
- 
- 		/* lock the device pointers */
- 		mutex_lock(&dev->dev_mutex);
-diff --git a/drivers/staging/unisys/visornic/visornic_main.c b/drivers/staging/unisys/visornic/visornic_main.c
-index 0433536930a9..d8726f28843f 100644
---- a/drivers/staging/unisys/visornic/visornic_main.c
-+++ b/drivers/staging/unisys/visornic/visornic_main.c
-@@ -549,7 +549,7 @@ static int visornic_disable_with_timeout(struct net_device *netdev,
- 		}
- 		set_current_state(TASK_INTERRUPTIBLE);
- 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
--		wait += schedule_timeout(msecs_to_jiffies(10));
-+		wait += schedule_msec_hrtimeout((10));
- 		spin_lock_irqsave(&devdata->priv_lock, flags);
- 	}
- 
-@@ -560,7 +560,7 @@ static int visornic_disable_with_timeout(struct net_device *netdev,
- 		while (1) {
- 			set_current_state(TASK_INTERRUPTIBLE);
- 			spin_unlock_irqrestore(&devdata->priv_lock, flags);
--			schedule_timeout(msecs_to_jiffies(10));
-+			schedule_msec_hrtimeout((10));
- 			spin_lock_irqsave(&devdata->priv_lock, flags);
- 			if (atomic_read(&devdata->usage))
- 				break;
-@@ -714,7 +714,7 @@ static int visornic_enable_with_timeout(struct net_device *netdev,
- 		}
- 		set_current_state(TASK_INTERRUPTIBLE);
- 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
--		wait += schedule_timeout(msecs_to_jiffies(10));
-+		wait += schedule_msec_hrtimeout((10));
- 		spin_lock_irqsave(&devdata->priv_lock, flags);
- 	}
- 
-diff --git a/drivers/video/fbdev/omap/hwa742.c b/drivers/video/fbdev/omap/hwa742.c
-index cfe63932f825..71c00ef772a3 100644
---- a/drivers/video/fbdev/omap/hwa742.c
-+++ b/drivers/video/fbdev/omap/hwa742.c
-@@ -913,7 +913,7 @@ static void hwa742_resume(void)
- 		if (hwa742_read_reg(HWA742_PLL_DIV_REG) & (1 << 7))
- 			break;
- 		set_current_state(TASK_UNINTERRUPTIBLE);
--		schedule_timeout(msecs_to_jiffies(5));
-+		schedule_msec_hrtimeout((5));
- 	}
- 	hwa742_set_update_mode(hwa742.update_mode_before_suspend);
- }
-diff --git a/drivers/video/fbdev/pxafb.c b/drivers/video/fbdev/pxafb.c
-index f1551e00eb12..f0f651e92504 100644
---- a/drivers/video/fbdev/pxafb.c
-+++ b/drivers/video/fbdev/pxafb.c
-@@ -1287,7 +1287,7 @@ static int pxafb_smart_thread(void *arg)
- 		mutex_unlock(&fbi->ctrlr_lock);
- 
- 		set_current_state(TASK_INTERRUPTIBLE);
--		schedule_timeout(msecs_to_jiffies(30));
-+		schedule_msec_hrtimeout((30));
- 	}
- 
- 	pr_debug("%s(): task ending\n", __func__);
-diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
-index 76d2e43817ea..6ba0604e2162 100644
---- a/fs/btrfs/inode-map.c
-+++ b/fs/btrfs/inode-map.c
-@@ -91,7 +91,7 @@ static int caching_kthread(void *data)
- 				btrfs_release_path(path);
- 				root->ino_cache_progress = last;
- 				up_read(&fs_info->commit_root_sem);
--				schedule_timeout(1);
-+				schedule_min_hrtimeout();
- 				goto again;
- 			} else
- 				continue;
-diff --git a/fs/proc/base.c b/fs/proc/base.c
-index 617db4e0faa0..f85926764f9a 100644
---- a/fs/proc/base.c
-+++ b/fs/proc/base.c
-@@ -479,7 +479,7 @@ static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns,
- 		seq_puts(m, "0 0 0\n");
- 	else
- 		seq_printf(m, "%llu %llu %lu\n",
--		   (unsigned long long)task->se.sum_exec_runtime,
-+		   (unsigned long long)tsk_seruntime(task),
- 		   (unsigned long long)task->sched_info.run_delay,
- 		   task->sched_info.pcount);
- 
-diff --git a/include/linux/freezer.h b/include/linux/freezer.h
-index 27828145ca09..504cc97bf475 100644
---- a/include/linux/freezer.h
-+++ b/include/linux/freezer.h
-@@ -311,6 +311,7 @@ static inline void set_freezable(void) {}
- #define wait_event_freezekillable_unsafe(wq, condition)			\
- 		wait_event_killable(wq, condition)
- 
-+#define pm_freezing (false)
- #endif /* !CONFIG_FREEZER */
- 
- #endif	/* FREEZER_H_INCLUDED */
-diff --git a/include/linux/init_task.h b/include/linux/init_task.h
-index 2c620d7ac432..73417df5daa2 100644
---- a/include/linux/init_task.h
-+++ b/include/linux/init_task.h
-@@ -36,7 +36,11 @@ extern struct cred init_cred;
- #define INIT_PREV_CPUTIME(x)
- #endif
- 
-+#ifdef CONFIG_SCHED_MUQSS
-+#define INIT_TASK_COMM "MuQSS"
-+#else
- #define INIT_TASK_COMM "swapper"
-+#endif
- 
- /* Attach to the init_task data structure for proper alignment */
- #ifdef CONFIG_ARCH_TASK_STRUCT_ON_STACK
-diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h
-index e9bfe6972aed..16ba1c7e5bde 100644
---- a/include/linux/ioprio.h
-+++ b/include/linux/ioprio.h
-@@ -53,6 +53,8 @@ enum {
-  */
- static inline int task_nice_ioprio(struct task_struct *task)
- {
-+	if (iso_task(task))
-+		return 0;
- 	return (task_nice(task) + 20) / 5;
- }
- 
-diff --git a/include/linux/sched.h b/include/linux/sched.h
-index afe01e232935..139e4535fcc6 100644
---- a/include/linux/sched.h
-+++ b/include/linux/sched.h
-@@ -35,6 +35,10 @@
- #include <linux/seqlock.h>
- #include <linux/kcsan.h>
- 
-+#ifdef CONFIG_SCHED_MUQSS
-+#include <linux/skip_list.h>
-+#endif
-+
- /* task_struct member predeclarations (sorted alphabetically): */
- struct audit_context;
- struct backing_dev_info;
-@@ -213,13 +217,40 @@ struct task_group;
- 
- extern void scheduler_tick(void);
- 
--#define	MAX_SCHEDULE_TIMEOUT		LONG_MAX
--
-+#define	MAX_SCHEDULE_TIMEOUT	LONG_MAX
- extern long schedule_timeout(long timeout);
- extern long schedule_timeout_interruptible(long timeout);
- extern long schedule_timeout_killable(long timeout);
- extern long schedule_timeout_uninterruptible(long timeout);
- extern long schedule_timeout_idle(long timeout);
-+
-+#ifdef CONFIG_HIGH_RES_TIMERS
-+extern long schedule_msec_hrtimeout(long timeout);
-+extern long schedule_min_hrtimeout(void);
-+extern long schedule_msec_hrtimeout_interruptible(long timeout);
-+extern long schedule_msec_hrtimeout_uninterruptible(long timeout);
-+#else
-+static inline long schedule_msec_hrtimeout(long timeout)
-+{
-+	return schedule_timeout(msecs_to_jiffies(timeout));
-+}
-+
-+static inline long schedule_min_hrtimeout(void)
-+{
-+	return schedule_timeout(1);
-+}
-+
-+static inline long schedule_msec_hrtimeout_interruptible(long timeout)
-+{
-+	return schedule_timeout_interruptible(msecs_to_jiffies(timeout));
-+}
-+
-+static inline long schedule_msec_hrtimeout_uninterruptible(long timeout)
-+{
-+	return schedule_timeout_uninterruptible(msecs_to_jiffies(timeout));
-+}
-+#endif
-+
- asmlinkage void schedule(void);
- extern void schedule_preempt_disabled(void);
- asmlinkage void preempt_schedule_irq(void);
-@@ -651,8 +682,10 @@ struct task_struct {
- 	unsigned int			flags;
- 	unsigned int			ptrace;
- 
--#ifdef CONFIG_SMP
-+#if defined(CONFIG_SMP) || defined(CONFIG_SCHED_MUQSS)
- 	int				on_cpu;
-+#endif
-+#ifdef CONFIG_SMP
- 	struct __call_single_node	wake_entry;
- #ifdef CONFIG_THREAD_INFO_IN_TASK
- 	/* Current CPU: */
-@@ -678,10 +711,25 @@ struct task_struct {
- 	int				static_prio;
- 	int				normal_prio;
- 	unsigned int			rt_priority;
-+#ifdef CONFIG_SCHED_MUQSS
-+	int time_slice;
-+	u64 deadline;
-+	skiplist_node node; /* Skip list node */
-+	u64 last_ran;
-+	u64 sched_time; /* sched_clock time spent running */
-+#ifdef CONFIG_SMT_NICE
-+	int smt_bias; /* Policy/nice level bias across smt siblings */
-+#endif
-+#ifdef CONFIG_HOTPLUG_CPU
-+	bool zerobound; /* Bound to CPU0 for hotplug */
-+#endif
-+	unsigned long rt_timeout;
-+#else /* CONFIG_SCHED_MUQSS */
- 
- 	const struct sched_class	*sched_class;
- 	struct sched_entity		se;
- 	struct sched_rt_entity		rt;
-+#endif
- #ifdef CONFIG_CGROUP_SCHED
- 	struct task_group		*sched_task_group;
- #endif
-@@ -863,6 +911,10 @@ struct task_struct {
- #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
- 	u64				utimescaled;
- 	u64				stimescaled;
-+#endif
-+#ifdef CONFIG_SCHED_MUQSS
-+	/* Unbanked cpu time */
-+	unsigned long utime_ns, stime_ns;
- #endif
- 	u64				gtime;
- 	struct prev_cputime		prev_cputime;
-@@ -1332,6 +1384,40 @@ struct task_struct {
- 	 */
- };
- 
-+#ifdef CONFIG_SCHED_MUQSS
-+#define tsk_seruntime(t)		((t)->sched_time)
-+#define tsk_rttimeout(t)		((t)->rt_timeout)
-+
-+static inline void tsk_cpus_current(struct task_struct *p)
-+{
-+}
-+
-+void print_scheduler_version(void);
-+
-+static inline bool iso_task(struct task_struct *p)
-+{
-+	return (p->policy == SCHED_ISO);
-+}
-+#else /* CFS */
-+#define tsk_seruntime(t)	((t)->se.sum_exec_runtime)
-+#define tsk_rttimeout(t)	((t)->rt.timeout)
-+
-+static inline void tsk_cpus_current(struct task_struct *p)
-+{
-+	p->nr_cpus_allowed = current->nr_cpus_allowed;
-+}
-+
-+static inline void print_scheduler_version(void)
-+{
-+	printk(KERN_INFO "CFS CPU scheduler.\n");
-+}
-+
-+static inline bool iso_task(struct task_struct *p)
-+{
-+	return false;
-+}
-+#endif /* CONFIG_SCHED_MUQSS */
-+
- static inline struct pid *task_pid(struct task_struct *task)
- {
- 	return task->thread_pid;
-diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h
-index 1aff00b65f3c..73d6319a856a 100644
---- a/include/linux/sched/deadline.h
-+++ b/include/linux/sched/deadline.h
-@@ -28,7 +28,16 @@ static inline bool dl_time_before(u64 a, u64 b)
- #ifdef CONFIG_SMP
- 
- struct root_domain;
-+#ifdef CONFIG_SCHED_MUQSS
-+static inline void dl_clear_root_domain(struct root_domain *rd)
-+{
-+}
-+static inline void dl_add_task_root_domain(struct task_struct *p)
-+{
-+}
-+#else /* CONFIG_SCHED_MUQSS */
- extern void dl_add_task_root_domain(struct task_struct *p);
- extern void dl_clear_root_domain(struct root_domain *rd);
-+#endif /* CONFIG_SCHED_MUQSS */
- 
- #endif /* CONFIG_SMP */
-diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h
-index 6d67e9a5af6b..101fe470aa8f 100644
---- a/include/linux/sched/nohz.h
-+++ b/include/linux/sched/nohz.h
-@@ -13,7 +13,7 @@ extern int get_nohz_timer_target(void);
- static inline void nohz_balance_enter_idle(int cpu) { }
- #endif
- 
--#ifdef CONFIG_NO_HZ_COMMON
-+#if defined(CONFIG_NO_HZ_COMMON) && !defined(CONFIG_SCHED_MUQSS)
- void calc_load_nohz_start(void);
- void calc_load_nohz_remote(struct rq *rq);
- void calc_load_nohz_stop(void);
-diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h
-index 7d64feafc408..43c9d9e50c09 100644
---- a/include/linux/sched/prio.h
-+++ b/include/linux/sched/prio.h
-@@ -20,8 +20,20 @@
-  */
- 
- #define MAX_USER_RT_PRIO	100
-+
-+#ifdef CONFIG_SCHED_MUQSS
-+/* Note different MAX_RT_PRIO */
-+#define MAX_RT_PRIO		(MAX_USER_RT_PRIO + 1)
-+
-+#define ISO_PRIO		(MAX_RT_PRIO)
-+#define NORMAL_PRIO		(MAX_RT_PRIO + 1)
-+#define IDLE_PRIO		(MAX_RT_PRIO + 2)
-+#define PRIO_LIMIT		((IDLE_PRIO) + 1)
-+#else /* CONFIG_SCHED_MUQSS */
- #define MAX_RT_PRIO		MAX_USER_RT_PRIO
- 
-+#endif /* CONFIG_SCHED_MUQSS */
-+
- #define MAX_PRIO		(MAX_RT_PRIO + NICE_WIDTH)
- #define DEFAULT_PRIO		(MAX_RT_PRIO + NICE_WIDTH / 2)
- 
-diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
-index e5af028c08b4..010b2244e0b6 100644
---- a/include/linux/sched/rt.h
-+++ b/include/linux/sched/rt.h
-@@ -24,8 +24,10 @@ static inline bool task_is_realtime(struct task_struct *tsk)
- 
- 	if (policy == SCHED_FIFO || policy == SCHED_RR)
- 		return true;
-+#ifndef CONFIG_SCHED_MUQSS
- 	if (policy == SCHED_DEADLINE)
- 		return true;
-+#endif
- 	return false;
- }
- 
-diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
-index a98965007eef..743f67fd012e 100644
---- a/include/linux/sched/task.h
-+++ b/include/linux/sched/task.h
-@@ -93,7 +93,7 @@ int kernel_wait(pid_t pid, int *stat);
- extern void free_task(struct task_struct *tsk);
- 
- /* sched_exec is called by processes performing an exec */
--#ifdef CONFIG_SMP
-+#if defined(CONFIG_SMP) && !defined(CONFIG_SCHED_MUQSS)
- extern void sched_exec(void);
- #else
- #define sched_exec()   {}
-diff --git a/include/linux/skip_list.h b/include/linux/skip_list.h
-new file mode 100644
-index 000000000000..d4be84ba273b
---- /dev/null
-+++ b/include/linux/skip_list.h
-@@ -0,0 +1,33 @@
-+#ifndef _LINUX_SKIP_LISTS_H
-+#define _LINUX_SKIP_LISTS_H
-+typedef u64 keyType;
-+typedef void *valueType;
-+
-+typedef struct nodeStructure skiplist_node;
-+
-+struct nodeStructure {
-+	int level;	/* Levels in this structure */
-+	keyType key;
-+	valueType value;
-+	skiplist_node *next[8];
-+	skiplist_node *prev[8];
-+};
-+
-+typedef struct listStructure {
-+	int entries;
-+	int level;	/* Maximum level of the list
-+			(1 more than the number of levels in the list) */
-+	skiplist_node *header; /* pointer to header */
-+} skiplist;
-+
-+void skiplist_init(skiplist_node *slnode);
-+skiplist *new_skiplist(skiplist_node *slnode);
-+void free_skiplist(skiplist *l);
-+void skiplist_node_init(skiplist_node *node);
-+void skiplist_insert(skiplist *l, skiplist_node *node, keyType key, valueType value, unsigned int randseed);
-+void skiplist_delete(skiplist *l, skiplist_node *node);
-+
-+static inline bool skiplist_node_empty(skiplist_node *node) {
-+	return (!node->next[0]);
-+}
-+#endif /* _LINUX_SKIP_LISTS_H */
-diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
-index 3bac0a8ceab2..f48c5c5da651 100644
---- a/include/uapi/linux/sched.h
-+++ b/include/uapi/linux/sched.h
-@@ -115,9 +115,16 @@ struct clone_args {
- #define SCHED_FIFO		1
- #define SCHED_RR		2
- #define SCHED_BATCH		3
--/* SCHED_ISO: reserved but not implemented yet */
-+/* SCHED_ISO: Implemented on MuQSS only */
- #define SCHED_IDLE		5
-+#ifdef CONFIG_SCHED_MUQSS
-+#define SCHED_ISO		4
-+#define SCHED_IDLEPRIO		SCHED_IDLE
-+#define SCHED_MAX		(SCHED_IDLEPRIO)
-+#define SCHED_RANGE(policy)	((policy) <= SCHED_MAX)
-+#else /* CONFIG_SCHED_MUQSS */
- #define SCHED_DEADLINE		6
-+#endif /* CONFIG_SCHED_MUQSS */
- 
- /* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */
- #define SCHED_RESET_ON_FORK     0x40000000
-diff --git a/init/Kconfig b/init/Kconfig
-index d6a0b31b13dc..7e0eb99bd607 100644
---- a/init/Kconfig
-+++ b/init/Kconfig
-@@ -92,6 +92,18 @@ config THREAD_INFO_IN_TASK
- 
- menu "General setup"
- 
-+config SCHED_MUQSS
-+	bool "MuQSS cpu scheduler"
-+	select HIGH_RES_TIMERS
-+	help
-+	  The Multiple Queue Skiplist Scheduler for excellent interactivity and
-+	  responsiveness on the desktop and highly scalable deterministic
-+	  low latency on any hardware.
-+
-+          Say Y here.
-+	default y
-+
-+
- config BROKEN
- 	bool
- 
-@@ -510,6 +522,7 @@ config SCHED_THERMAL_PRESSURE
- 	default y if ARM64
- 	depends on SMP
- 	depends on CPU_FREQ_THERMAL
-+	depends on !SCHED_MUQSS
- 	help
- 	  Select this option to enable thermal pressure accounting in the
- 	  scheduler. Thermal pressure is the value conveyed to the scheduler
-@@ -858,6 +871,7 @@ config NUMA_BALANCING
- 	depends on ARCH_SUPPORTS_NUMA_BALANCING
- 	depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY
- 	depends on SMP && NUMA && MIGRATION
-+	depends on !SCHED_MUQSS
- 	help
- 	  This option adds support for automatic NUMA aware memory/task placement.
- 	  The mechanism is quite primitive and is based on migrating memory when
-@@ -942,9 +956,13 @@ menuconfig CGROUP_SCHED
- 	help
- 	  This feature lets CPU scheduler recognize task groups and control CPU
- 	  bandwidth allocation to such task groups. It uses cgroups to group
--	  tasks.
-+	  tasks. In combination with MuQSS this is purely a STUB to create the
-+	  files associated with the CPU controller cgroup but most of the
-+	  controls do nothing. This is useful for working in environments and
-+	  with applications that will only work if this control group is
-+	  present.
- 
--if CGROUP_SCHED
-+if CGROUP_SCHED && !SCHED_MUQSS
- config FAIR_GROUP_SCHED
- 	bool "Group scheduling for SCHED_OTHER"
- 	depends on CGROUP_SCHED
-@@ -1073,6 +1091,7 @@ config CGROUP_DEVICE
- 
- config CGROUP_CPUACCT
- 	bool "Simple CPU accounting controller"
-+	depends on !SCHED_MUQSS
- 	help
- 	  Provides a simple controller for monitoring the
- 	  total CPU consumed by the tasks in a cgroup.
-@@ -1200,6 +1219,7 @@ config CHECKPOINT_RESTORE
- 
- config SCHED_AUTOGROUP
- 	bool "Automatic process group scheduling"
-+	depends on !SCHED_MUQSS
- 	select CGROUPS
- 	select CGROUP_SCHED
- 	select FAIR_GROUP_SCHED
-diff --git a/init/init_task.c b/init/init_task.c
-index f6889fce64af..2557beb609c0 100644
---- a/init/init_task.c
-+++ b/init/init_task.c
-@@ -75,9 +75,17 @@ struct task_struct init_task
- 	.stack		= init_stack,
- 	.usage		= REFCOUNT_INIT(2),
- 	.flags		= PF_KTHREAD,
-+#ifdef CONFIG_SCHED_MUQSS
-+	.prio		= NORMAL_PRIO,
-+	.static_prio	= MAX_PRIO - 20,
-+	.normal_prio	= NORMAL_PRIO,
-+	.deadline	= 0,
-+	.time_slice	= 1000000,
-+#else
- 	.prio		= MAX_PRIO - 20,
- 	.static_prio	= MAX_PRIO - 20,
- 	.normal_prio	= MAX_PRIO - 20,
-+#endif
- 	.policy		= SCHED_NORMAL,
- 	.cpus_ptr	= &init_task.cpus_mask,
- 	.cpus_mask	= CPU_MASK_ALL,
-@@ -87,6 +95,7 @@ struct task_struct init_task
- 	.restart_block	= {
- 		.fn = do_no_restart_syscall,
- 	},
-+#ifndef CONFIG_SCHED_MUQSS
- 	.se		= {
- 		.group_node 	= LIST_HEAD_INIT(init_task.se.group_node),
- 	},
-@@ -94,6 +103,7 @@ struct task_struct init_task
- 		.run_list	= LIST_HEAD_INIT(init_task.rt.run_list),
- 		.time_slice	= RR_TIMESLICE,
- 	},
-+#endif
- 	.tasks		= LIST_HEAD_INIT(init_task.tasks),
- #ifdef CONFIG_SMP
- 	.pushable_tasks	= PLIST_NODE_INIT(init_task.pushable_tasks, MAX_PRIO),
-diff --git a/init/main.c b/init/main.c
-index e880b4ecb314..fe0a705e83f2 100644
---- a/init/main.c
-+++ b/init/main.c
-@@ -1421,6 +1421,8 @@ static int __ref kernel_init(void *unused)
- 
- 	do_sysctl_args();
- 
-+	print_scheduler_version();
-+
- 	if (ramdisk_execute_command) {
- 		ret = run_init_process(ramdisk_execute_command);
- 		if (!ret)
-diff --git a/kernel/Kconfig.MuQSS b/kernel/Kconfig.MuQSS
-new file mode 100644
-index 000000000000..a6a58781ef91
---- /dev/null
-+++ b/kernel/Kconfig.MuQSS
-@@ -0,0 +1,105 @@
-+choice
-+	prompt "CPU scheduler runqueue sharing"
-+	default RQ_MC if SCHED_MUQSS
-+	default RQ_NONE
-+
-+config RQ_NONE
-+	bool "No sharing"
-+	help
-+	  This is the default behaviour where the CPU scheduler has one runqueue
-+	  per CPU, whether it is a physical or logical CPU (hyperthread).
-+
-+	  This can still be enabled runtime with the boot parameter
-+	  rqshare=none
-+
-+	  If unsure, say N.
-+
-+config RQ_SMT
-+	bool "SMT (hyperthread) siblings"
-+	depends on SCHED_SMT && SCHED_MUQSS
-+
-+	help
-+	  With this option enabled, the CPU scheduler will have one runqueue
-+	  shared by SMT (hyperthread) siblings. As these logical cores share
-+	  one physical core, sharing the runqueue resource can lead to decreased
-+	  overhead, lower latency and higher throughput.
-+
-+	  This can still be enabled runtime with the boot parameter
-+	  rqshare=smt
-+
-+	  If unsure, say N.
-+
-+config RQ_MC
-+	bool "Multicore siblings"
-+	depends on SCHED_MC && SCHED_MUQSS
-+	help
-+	  With this option enabled, the CPU scheduler will have one runqueue
-+	  shared by multicore siblings in addition to any SMT siblings.
-+	  As these physical cores share caches, sharing the runqueue resource
-+	  will lead to lower latency, but its effects on overhead and throughput
-+	  are less predictable. As a general rule, 6 or fewer cores will likely
-+	  benefit from this, while larger CPUs will only derive a latency
-+	  benefit. If your workloads are primarily single threaded, this will
-+	  possibly worsen throughput. If you are only concerned about latency
-+	  then enable this regardless of how many cores you have.
-+
-+	  This can still be enabled runtime with the boot parameter
-+	  rqshare=mc
-+
-+	  If unsure, say Y.
-+
-+config RQ_MC_LLC
-+	bool "Multicore siblings (LLC)"
-+	depends on SCHED_MC && SCHED_MUQSS
-+	help
-+	  With this option enabled, the CPU scheduler will behave similarly as
-+	  with "Multicore siblings".
-+	  This option takes LLC cache into account when scheduling tasks.
-+	  Option may benefit CPUs with multiple LLC caches, such as Ryzen
-+	  and Xeon CPUs.
-+
-+	  This can still be enabled runtime with the boot parameter
-+	  rqshare=llc
-+
-+	  If unsure, say N.
-+
-+config RQ_SMP
-+	bool "Symmetric Multi-Processing"
-+	depends on SMP && SCHED_MUQSS
-+	help
-+	  With this option enabled, the CPU scheduler will have one runqueue
-+	  shared by all physical CPUs unless they are on separate NUMA nodes.
-+	  As physical CPUs usually do not share resources, sharing the runqueue
-+	  will normally worsen throughput but improve latency. If you only
-+	  care about latency enable this.
-+
-+	  This can still be enabled runtime with the boot parameter
-+	  rqshare=smp
-+
-+	  If unsure, say N.
-+
-+config RQ_ALL
-+	bool "NUMA"
-+	depends on SMP && SCHED_MUQSS
-+	help
-+	  With this option enabled, the CPU scheduler will have one runqueue
-+	  regardless of the architecture configuration, including across NUMA
-+	  nodes. This can substantially decrease throughput in NUMA
-+	  configurations, but light NUMA designs will not be dramatically
-+	  affected. This option should only be chosen if latency is the prime
-+	  concern.
-+
-+	  This can still be enabled runtime with the boot parameter
-+	  rqshare=all
-+
-+	  If unsure, say N.
-+endchoice
-+
-+config SHARERQ
-+	int
-+	default 0 if RQ_NONE
-+	default 1 if RQ_SMT
-+	default 2 if RQ_MC
-+	default 3 if RQ_MC_LLC
-+	default 4 if RQ_SMP
-+	default 5 if RQ_ALL
-diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
-index 38ef6d06888e..89ed751ac4e4 100644
---- a/kernel/Kconfig.hz
-+++ b/kernel/Kconfig.hz
-@@ -5,7 +5,8 @@
- 
- choice
- 	prompt "Timer frequency"
--	default HZ_250
-+	default HZ_100 if SCHED_MUQSS
-+	default HZ_250_NODEF if !SCHED_MUQSS
- 	help
- 	 Allows the configuration of the timer frequency. It is customary
- 	 to have the timer interrupt run at 1000 Hz but 100 Hz may be more
-@@ -20,11 +21,18 @@ choice
- 	config HZ_100
- 		bool "100 HZ"
- 	help
-+	  100 Hz is a suitable choice in combination with MuQSS which does
-+	  not rely on ticks for rescheduling interrupts, and is not Hz limited
-+	  for timeouts and sleeps from both the kernel and userspace.
-+	  This allows us to benefit from the lower overhead and higher
-+	  throughput of fewer timer ticks.
-+
-+	  Non-MuQSS kernels:
- 	  100 Hz is a typical choice for servers, SMP and NUMA systems
- 	  with lots of processors that may show reduced performance if
- 	  too many timer interrupts are occurring.
- 
--	config HZ_250
-+	config HZ_250_NODEF
- 		bool "250 HZ"
- 	help
- 	 250 Hz is a good compromise choice allowing server performance
-@@ -32,7 +40,10 @@ choice
- 	 on SMP and NUMA systems. If you are going to be using NTSC video
- 	 or multimedia, selected 300Hz instead.
- 
--	config HZ_300
-+	 250 Hz is the default choice for the mainline scheduler but not
-+	 advantageous in combination with MuQSS.
-+
-+	config HZ_300_NODEF
- 		bool "300 HZ"
- 	help
- 	 300 Hz is a good compromise choice allowing server performance
-@@ -40,7 +51,7 @@ choice
- 	 on SMP and NUMA systems and exactly dividing by both PAL and
- 	 NTSC frame rates for video and multimedia work.
- 
--	config HZ_1000
-+	config HZ_1000_NODEF
- 		bool "1000 HZ"
- 	help
- 	 1000 Hz is the preferred choice for desktop systems and other
-@@ -51,9 +62,9 @@ endchoice
- config HZ
- 	int
- 	default 100 if HZ_100
--	default 250 if HZ_250
--	default 300 if HZ_300
--	default 1000 if HZ_1000
-+	default 250 if HZ_250_NODEF
-+	default 300 if HZ_300_NODEF
-+	default 1000 if HZ_1000_NODEF
- 
- config SCHED_HRTICK
- 	def_bool HIGH_RES_TIMERS
-diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
-index bf82259cff96..d9438eb6f91c 100644
---- a/kernel/Kconfig.preempt
-+++ b/kernel/Kconfig.preempt
-@@ -2,7 +2,7 @@
- 
- choice
- 	prompt "Preemption Model"
--	default PREEMPT_NONE
-+	default PREEMPT
- 
- config PREEMPT_NONE
- 	bool "No Forced Preemption (Server)"
-@@ -18,7 +18,7 @@ config PREEMPT_NONE
- 	  latencies.
- 
- config PREEMPT_VOLUNTARY
--	bool "Voluntary Kernel Preemption (Desktop)"
-+	bool "Voluntary Kernel Preemption (Nothing)"
- 	depends on !ARCH_NO_PREEMPT
- 	help
- 	  This option reduces the latency of the kernel by adding more
-@@ -33,7 +33,8 @@ config PREEMPT_VOLUNTARY
- 	  applications to run more 'smoothly' even when the system is
- 	  under load.
- 
--	  Select this if you are building a kernel for a desktop system.
-+	  Select this for no system in particular (choose Preemptible
-+	  instead on a desktop if you know what's good for you).
- 
- config PREEMPT
- 	bool "Preemptible Kernel (Low-Latency Desktop)"
-diff --git a/kernel/Makefile b/kernel/Makefile
-index 9a20016d4900..a2640d78eadb 100644
---- a/kernel/Makefile
-+++ b/kernel/Makefile
-@@ -10,7 +10,8 @@ obj-y     = fork.o exec_domain.o panic.o \
- 	    extable.o params.o \
- 	    kthread.o sys_ni.o nsproxy.o \
- 	    notifier.o ksysfs.o cred.o reboot.o \
--	    async.o range.o smpboot.o ucount.o regset.o
-+	    async.o range.o smpboot.o ucount.o regset.o \
-+	    skip_list.o
- 
- obj-$(CONFIG_BPFILTER) += usermode_driver.o
- obj-$(CONFIG_MODULES) += kmod.o
-diff --git a/kernel/delayacct.c b/kernel/delayacct.c
-index 27725754ac99..769d773c7182 100644
---- a/kernel/delayacct.c
-+++ b/kernel/delayacct.c
-@@ -106,7 +106,7 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
- 	 */
- 	t1 = tsk->sched_info.pcount;
- 	t2 = tsk->sched_info.run_delay;
--	t3 = tsk->se.sum_exec_runtime;
-+	t3 = tsk_seruntime(tsk);
- 
- 	d->cpu_count += t1;
- 
-diff --git a/kernel/exit.c b/kernel/exit.c
-index 733e80f334e7..3f3506c851fd 100644
---- a/kernel/exit.c
-+++ b/kernel/exit.c
-@@ -121,7 +121,7 @@ static void __exit_signal(struct task_struct *tsk)
- 			sig->curr_target = next_thread(tsk);
- 	}
- 
--	add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
-+	add_device_randomness((const void*) &tsk_seruntime(tsk),
- 			      sizeof(unsigned long long));
- 
- 	/*
-@@ -142,7 +142,7 @@ static void __exit_signal(struct task_struct *tsk)
- 	sig->inblock += task_io_get_inblock(tsk);
- 	sig->oublock += task_io_get_oublock(tsk);
- 	task_io_accounting_add(&sig->ioac, &tsk->ioac);
--	sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
-+	sig->sum_sched_runtime += tsk_seruntime(tsk);
- 	sig->nr_threads--;
- 	__unhash_process(tsk, group_dead);
- 	write_sequnlock(&sig->stats_lock);
-diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
-index 10a5aff4eecc..ce3bcc66b48d 100644
---- a/kernel/irq/Kconfig
-+++ b/kernel/irq/Kconfig
-@@ -112,6 +112,23 @@ config GENERIC_IRQ_RESERVATION_MODE
- config IRQ_FORCED_THREADING
-        bool
- 
-+config FORCE_IRQ_THREADING
-+	bool "Make IRQ threading compulsory"
-+	depends on IRQ_FORCED_THREADING
-+	default n
-+	help
-+
-+	  Make IRQ threading mandatory for any IRQ handlers that support it
-+	  instead of being optional and requiring the threadirqs kernel
-+	  parameter. Instead they can be optionally disabled with the
-+	  nothreadirqs kernel parameter.
-+
-+	  Enabling this may make some architectures not boot with runqueue
-+	  sharing and MuQSS.
-+
-+	  Enable if you are building for a desktop or low latency system,
-+	  otherwise say N.
-+
- config SPARSE_IRQ
- 	bool "Support sparse irq numbering" if MAY_HAVE_SPARSE_IRQ
- 	help
-diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
-index 5df903fccb60..17a0dd194582 100644
---- a/kernel/irq/manage.c
-+++ b/kernel/irq/manage.c
-@@ -25,9 +25,20 @@
- #include "internals.h"
- 
- #if defined(CONFIG_IRQ_FORCED_THREADING) && !defined(CONFIG_PREEMPT_RT)
-+#ifdef CONFIG_FORCE_IRQ_THREADING
-+__read_mostly bool force_irqthreads = true;
-+#else
- __read_mostly bool force_irqthreads;
-+#endif
- EXPORT_SYMBOL_GPL(force_irqthreads);
- 
-+static int __init setup_noforced_irqthreads(char *arg)
-+{
-+	force_irqthreads = false;
-+	return 0;
-+}
-+early_param("nothreadirqs", setup_noforced_irqthreads);
-+
- static int __init setup_forced_irqthreads(char *arg)
- {
- 	force_irqthreads = true;
-diff --git a/kernel/kthread.c b/kernel/kthread.c
-index 3edaa380dc7b..a1712699726b 100644
---- a/kernel/kthread.c
-+++ b/kernel/kthread.c
-@@ -471,6 +471,34 @@ void kthread_bind(struct task_struct *p, unsigned int cpu)
- }
- EXPORT_SYMBOL(kthread_bind);
- 
-+#if defined(CONFIG_SCHED_MUQSS) && defined(CONFIG_SMP)
-+extern void __do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask);
-+
-+/*
-+ * new_kthread_bind is a special variant of __kthread_bind_mask.
-+ * For new threads to work on muqss we want to call do_set_cpus_allowed
-+ * without the task_cpu being set and the task rescheduled until they're
-+ * rescheduled on their own so we call __do_set_cpus_allowed directly which
-+ * only changes the cpumask. This is particularly important for smpboot threads
-+ * to work.
-+ */
-+static void new_kthread_bind(struct task_struct *p, unsigned int cpu)
-+{
-+	unsigned long flags;
-+
-+	if (WARN_ON(!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)))
-+		return;
-+
-+	/* It's safe because the task is inactive. */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	__do_set_cpus_allowed(p, cpumask_of(cpu));
-+	p->flags |= PF_NO_SETAFFINITY;
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+}
-+#else
-+#define new_kthread_bind(p, cpu) kthread_bind(p, cpu)
-+#endif
-+
- /**
-  * kthread_create_on_cpu - Create a cpu bound kthread
-  * @threadfn: the function to run until signal_pending(current).
-@@ -491,7 +519,7 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
- 				   cpu);
- 	if (IS_ERR(p))
- 		return p;
--	kthread_bind(p, cpu);
-+	new_kthread_bind(p, cpu);
- 	/* CPU hotplug need to bind once again when unparking the thread. */
- 	set_bit(KTHREAD_IS_PER_CPU, &to_kthread(p)->flags);
- 	to_kthread(p)->cpu = cpu;
-diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
-index f6310f848f34..825f9b8e228f 100644
---- a/kernel/livepatch/transition.c
-+++ b/kernel/livepatch/transition.c
-@@ -282,7 +282,7 @@ static bool klp_try_switch_task(struct task_struct *task)
- {
- 	static char err_buf[STACK_ERR_BUF_SIZE];
- 	struct rq *rq;
--	struct rq_flags flags;
-+	struct rq_flags rf;
- 	int ret;
- 	bool success = false;
- 
-@@ -304,7 +304,7 @@ static bool klp_try_switch_task(struct task_struct *task)
- 	 * functions.  If all goes well, switch the task to the target patch
- 	 * state.
- 	 */
--	rq = task_rq_lock(task, &flags);
-+	rq = task_rq_lock(task, &rf);
- 
- 	if (task_running(rq, task) && task != current) {
- 		snprintf(err_buf, STACK_ERR_BUF_SIZE,
-@@ -323,7 +323,7 @@ static bool klp_try_switch_task(struct task_struct *task)
- 	task->patch_state = klp_target_state;
- 
- done:
--	task_rq_unlock(rq, task, &flags);
-+	task_rq_unlock(rq, task, &rf);
- 
- 	/*
- 	 * Due to console deadlock issues, pr_debug() can't be used while
-diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
-index 5fc9c9b70862..1ff14a21193d 100644
---- a/kernel/sched/Makefile
-+++ b/kernel/sched/Makefile
-@@ -22,15 +22,23 @@ ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
- CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
- endif
- 
-+ifdef CONFIG_SCHED_MUQSS
-+obj-y += MuQSS.o clock.o cputime.o
-+obj-y += idle.o
-+obj-y += wait.o wait_bit.o swait.o completion.o
-+
-+obj-$(CONFIG_SMP) += topology.o
-+else
- obj-y += core.o loadavg.o clock.o cputime.o
- obj-y += idle.o fair.o rt.o deadline.o
- obj-y += wait.o wait_bit.o swait.o completion.o
- 
- obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o
- obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
--obj-$(CONFIG_SCHEDSTATS) += stats.o
- obj-$(CONFIG_SCHED_DEBUG) += debug.o
- obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
-+endif
-+obj-$(CONFIG_SCHEDSTATS) += stats.o
- obj-$(CONFIG_CPU_FREQ) += cpufreq.o
- obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o
- obj-$(CONFIG_MEMBARRIER) += membarrier.o
-diff --git a/kernel/sched/MuQSS.c b/kernel/sched/MuQSS.c
-new file mode 100644
-index 000000000000..8da537d5226c
---- /dev/null
-+++ b/kernel/sched/MuQSS.c
-@@ -0,0 +1,7855 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ *  kernel/sched/MuQSS.c, was kernel/sched.c
-+ *
-+ *  Kernel scheduler and related syscalls
-+ *
-+ *  Copyright (C) 1991-2002  Linus Torvalds
-+ *
-+ *  1996-12-23  Modified by Dave Grothe to fix bugs in semaphores and
-+ *		make semaphores SMP safe
-+ *  1998-11-19	Implemented schedule_timeout() and related stuff
-+ *		by Andrea Arcangeli
-+ *  2002-01-04	New ultra-scalable O(1) scheduler by Ingo Molnar:
-+ *		hybrid priority-list and round-robin design with
-+ *		an array-switch method of distributing timeslices
-+ *		and per-CPU runqueues.  Cleanups and useful suggestions
-+ *		by Davide Libenzi, preemptible kernel bits by Robert Love.
-+ *  2003-09-03	Interactivity tuning by Con Kolivas.
-+ *  2004-04-02	Scheduler domains code by Nick Piggin
-+ *  2007-04-15  Work begun on replacing all interactivity tuning with a
-+ *              fair scheduling design by Con Kolivas.
-+ *  2007-05-05  Load balancing (smp-nice) and other improvements
-+ *              by Peter Williams
-+ *  2007-05-06  Interactivity improvements to CFS by Mike Galbraith
-+ *  2007-07-01  Group scheduling enhancements by Srivatsa Vaddagiri
-+ *  2007-11-29  RT balancing improvements by Steven Rostedt, Gregory Haskins,
-+ *              Thomas Gleixner, Mike Kravetz
-+ *  2009-08-13	Brainfuck deadline scheduling policy by Con Kolivas deletes
-+ *              a whole lot of those previous things.
-+ *  2016-10-01  Multiple Queue Skiplist Scheduler scalable evolution of BFS
-+ * 		scheduler by Con Kolivas.
-+ *  2019-08-31  LLC bits by Eduards Bezverhijs
-+ */
-+#define CREATE_TRACE_POINTS
-+#include <trace/events/sched.h>
-+#undef CREATE_TRACE_POINTS
-+
-+#include <linux/sched/isolation.h>
-+#include <linux/sched/loadavg.h>
-+
-+#include <linux/binfmts.h>
-+#include <linux/blkdev.h>
-+#include <linux/compat.h>
-+#include <linux/context_tracking.h>
-+#include <linux/cpuset.h>
-+#include <linux/delayacct.h>
-+#include <linux/init_task.h>
-+#include <linux/kcov.h>
-+#include <linux/kprobes.h>
-+#include <linux/mmu_context.h>
-+#include <linux/module.h>
-+#include <linux/nmi.h>
-+#include <linux/prefetch.h>
-+#include <linux/profile.h>
-+#include <linux/rcupdate_wait.h>
-+#include <linux/sched.h>
-+#include <linux/scs.h>
-+#include <linux/security.h>
-+#include <linux/skip_list.h>
-+#include <linux/syscalls.h>
-+#include <linux/tick.h>
-+#include <linux/wait_bit.h>
-+
-+#include <asm/irq_regs.h>
-+#include <asm/switch_to.h>
-+#include <asm/tlb.h>
-+
-+#include "../workqueue_internal.h"
-+#include "../../fs/io-wq.h"
-+#include "../smpboot.h"
-+
-+#include "MuQSS.h"
-+#include "smp.h"
-+
-+#define rt_prio(prio)		unlikely((prio) < MAX_RT_PRIO)
-+#define rt_task(p)		rt_prio((p)->prio)
-+#define batch_task(p)		(unlikely((p)->policy == SCHED_BATCH))
-+#define is_rt_policy(policy)	((policy) == SCHED_FIFO || \
-+					(policy) == SCHED_RR)
-+#define has_rt_policy(p)	unlikely(is_rt_policy((p)->policy))
-+
-+#define is_idle_policy(policy)	((policy) == SCHED_IDLEPRIO)
-+#define idleprio_task(p)	unlikely(is_idle_policy((p)->policy))
-+#define task_running_idle(p)	unlikely((p)->prio == IDLE_PRIO)
-+
-+#define is_iso_policy(policy)	((policy) == SCHED_ISO)
-+#define iso_task(p)		unlikely(is_iso_policy((p)->policy))
-+#define task_running_iso(p)	unlikely((p)->prio == ISO_PRIO)
-+
-+#define rq_idle(rq)		((rq)->rq_prio == PRIO_LIMIT)
-+
-+#define ISO_PERIOD		(5 * HZ)
-+
-+#define STOP_PRIO		(MAX_RT_PRIO - 1)
-+
-+/*
-+ * Some helpers for converting to/from various scales. Use shifts to get
-+ * approximate multiples of ten for less overhead.
-+ */
-+#define APPROX_NS_PS		(1073741824) /* Approximate ns per second */
-+#define JIFFIES_TO_NS(TIME)	((TIME) * (APPROX_NS_PS / HZ))
-+#define JIFFY_NS		(APPROX_NS_PS / HZ)
-+#define JIFFY_US		(1048576 / HZ)
-+#define NS_TO_JIFFIES(TIME)	((TIME) / JIFFY_NS)
-+#define HALF_JIFFY_NS		(APPROX_NS_PS / HZ / 2)
-+#define HALF_JIFFY_US		(1048576 / HZ / 2)
-+#define MS_TO_NS(TIME)		((TIME) << 20)
-+#define MS_TO_US(TIME)		((TIME) << 10)
-+#define NS_TO_MS(TIME)		((TIME) >> 20)
-+#define NS_TO_US(TIME)		((TIME) >> 10)
-+#define US_TO_NS(TIME)		((TIME) << 10)
-+#define TICK_APPROX_NS		((APPROX_NS_PS+HZ/2)/HZ)
-+
-+#define RESCHED_US	(100) /* Reschedule if less than this many μs left */
-+
-+void print_scheduler_version(void)
-+{
-+	printk(KERN_INFO "MuQSS CPU scheduler v0.204 by Con Kolivas.\n");
-+}
-+
-+/* Define RQ share levels */
-+#define RQSHARE_NONE 0
-+#define RQSHARE_SMT 1
-+#define RQSHARE_MC 2
-+#define RQSHARE_MC_LLC 3
-+#define RQSHARE_SMP 4
-+#define RQSHARE_ALL 5
-+
-+/* Define locality levels */
-+#define LOCALITY_SAME 0
-+#define LOCALITY_SMT 1
-+#define LOCALITY_MC_LLC 2
-+#define LOCALITY_MC 3
-+#define LOCALITY_SMP 4
-+#define LOCALITY_DISTANT 5
-+
-+/*
-+ * This determines what level of runqueue sharing will be done and is
-+ * configurable at boot time with the bootparam rqshare =
-+ */
-+static int rqshare __read_mostly = CONFIG_SHARERQ; /* Default RQSHARE_MC */
-+
-+static int __init set_rqshare(char *str)
-+{
-+	if (!strncmp(str, "none", 4)) {
-+		rqshare = RQSHARE_NONE;
-+		return 0;
-+	}
-+	if (!strncmp(str, "smt", 3)) {
-+		rqshare = RQSHARE_SMT;
-+		return 0;
-+	}
-+	if (!strncmp(str, "mc", 2)) {
-+		rqshare = RQSHARE_MC;
-+		return 0;
-+	}
-+	if (!strncmp(str, "llc", 3)) {
-+		rqshare = RQSHARE_MC_LLC;
-+		return 0;
-+	}
-+	if (!strncmp(str, "smp", 3)) {
-+		rqshare = RQSHARE_SMP;
-+		return 0;
-+	}
-+	if (!strncmp(str, "all", 3)) {
-+		rqshare = RQSHARE_ALL;
-+		return 0;
-+	}
-+	return 1;
-+}
-+__setup("rqshare=", set_rqshare);
-+
-+/*
-+ * This is the time all tasks within the same priority round robin.
-+ * Value is in ms and set to a minimum of 6ms.
-+ * Tunable via /proc interface.
-+ */
-+int rr_interval __read_mostly = 6;
-+
-+/*
-+ * Tunable to choose whether to prioritise latency or throughput, simple
-+ * binary yes or no
-+ */
-+int sched_interactive __read_mostly = 1;
-+
-+/*
-+ * sched_iso_cpu - sysctl which determines the cpu percentage SCHED_ISO tasks
-+ * are allowed to run five seconds as real time tasks. This is the total over
-+ * all online cpus.
-+ */
-+int sched_iso_cpu __read_mostly = 70;
-+
-+/*
-+ * sched_yield_type - Choose what sort of yield sched_yield will perform.
-+ * 0: No yield.
-+ * 1: Yield only to better priority/deadline tasks. (default)
-+ * 2: Expire timeslice and recalculate deadline.
-+ */
-+int sched_yield_type __read_mostly = 1;
-+
-+/*
-+ * The relative length of deadline for each priority(nice) level.
-+ */
-+static int prio_ratios[NICE_WIDTH] __read_mostly;
-+
-+
-+/*
-+ * The quota handed out to tasks of all priority levels when refilling their
-+ * time_slice.
-+ */
-+static inline int timeslice(void)
-+{
-+	return MS_TO_US(rr_interval);
-+}
-+
-+DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
-+
-+#ifdef CONFIG_SMP
-+/*
-+ * Total number of runqueues. Equals number of CPUs when there is no runqueue
-+ * sharing but is usually less with SMT/MC sharing of runqueues.
-+ */
-+static int total_runqueues __read_mostly = 1;
-+
-+static cpumask_t cpu_idle_map ____cacheline_aligned_in_smp;
-+
-+struct rq *cpu_rq(int cpu)
-+{
-+	return &per_cpu(runqueues, (cpu));
-+}
-+#define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
-+
-+/*
-+ * For asym packing, by default the lower numbered cpu has higher priority.
-+ */
-+int __weak arch_asym_cpu_priority(int cpu)
-+{
-+	return -cpu;
-+}
-+
-+int __weak arch_sd_sibling_asym_packing(void)
-+{
-+       return 0*SD_ASYM_PACKING;
-+}
-+
-+#ifdef CONFIG_SCHED_SMT
-+DEFINE_STATIC_KEY_FALSE(sched_smt_present);
-+EXPORT_SYMBOL_GPL(sched_smt_present);
-+#endif
-+
-+#else
-+struct rq *uprq;
-+#endif /* CONFIG_SMP */
-+
-+#include "stats.h"
-+
-+/*
-+ * All common locking functions performed on rq->lock. rq->clock is local to
-+ * the CPU accessing it so it can be modified just with interrupts disabled
-+ * when we're not updating niffies.
-+ * Looking up task_rq must be done under rq->lock to be safe.
-+ */
-+
-+/*
-+ * RQ-clock updating methods:
-+ */
-+
-+#ifdef HAVE_SCHED_AVG_IRQ
-+static void update_irq_load_avg(struct rq *rq, long delta);
-+#else
-+static inline void update_irq_load_avg(struct rq *rq, long delta) {}
-+#endif
-+
-+static void update_rq_clock_task(struct rq *rq, s64 delta)
-+{
-+/*
-+ * In theory, the compile should just see 0 here, and optimize out the call
-+ * to sched_rt_avg_update. But I don't trust it...
-+ */
-+	s64 __maybe_unused steal = 0, irq_delta = 0;
-+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-+	irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
-+
-+	/*
-+	 * Since irq_time is only updated on {soft,}irq_exit, we might run into
-+	 * this case when a previous update_rq_clock() happened inside a
-+	 * {soft,}irq region.
-+	 *
-+	 * When this happens, we stop ->clock_task and only update the
-+	 * prev_irq_time stamp to account for the part that fit, so that a next
-+	 * update will consume the rest. This ensures ->clock_task is
-+	 * monotonic.
-+	 *
-+	 * It does however cause some slight miss-attribution of {soft,}irq
-+	 * time, a more accurate solution would be to update the irq_time using
-+	 * the current rq->clock timestamp, except that would require using
-+	 * atomic ops.
-+	 */
-+	if (irq_delta > delta)
-+		irq_delta = delta;
-+
-+	rq->prev_irq_time += irq_delta;
-+	delta -= irq_delta;
-+#endif
-+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
-+	if (static_key_false((&paravirt_steal_rq_enabled))) {
-+		steal = paravirt_steal_clock(cpu_of(rq));
-+		steal -= rq->prev_steal_time_rq;
-+
-+		if (unlikely(steal > delta))
-+			steal = delta;
-+
-+		rq->prev_steal_time_rq += steal;
-+		delta -= steal;
-+	}
-+#endif
-+	rq->clock_task += delta;
-+
-+#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
-+	if (irq_delta + steal)
-+		update_irq_load_avg(rq, irq_delta + steal);
-+#endif
-+}
-+
-+static inline void update_rq_clock(struct rq *rq)
-+{
-+	s64 delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
-+
-+	if (unlikely(delta < 0))
-+		return;
-+	rq->clock += delta;
-+	update_rq_clock_task(rq, delta);
-+}
-+
-+/*
-+ * Niffies are a globally increasing nanosecond counter. They're only used by
-+ * update_load_avg and time_slice_expired, however deadlines are based on them
-+ * across CPUs. Update them whenever we will call one of those functions, and
-+ * synchronise them across CPUs whenever we hold both runqueue locks.
-+ */
-+static inline void update_clocks(struct rq *rq)
-+{
-+	s64 ndiff, minndiff;
-+	long jdiff;
-+
-+	update_rq_clock(rq);
-+	ndiff = rq->clock - rq->old_clock;
-+	rq->old_clock = rq->clock;
-+	jdiff = jiffies - rq->last_jiffy;
-+
-+	/* Subtract any niffies added by balancing with other rqs */
-+	ndiff -= rq->niffies - rq->last_niffy;
-+	minndiff = JIFFIES_TO_NS(jdiff) - rq->niffies + rq->last_jiffy_niffies;
-+	if (minndiff < 0)
-+		minndiff = 0;
-+	ndiff = max(ndiff, minndiff);
-+	rq->niffies += ndiff;
-+	rq->last_niffy = rq->niffies;
-+	if (jdiff) {
-+		rq->last_jiffy += jdiff;
-+		rq->last_jiffy_niffies = rq->niffies;
-+	}
-+}
-+
-+/*
-+ * Any time we have two runqueues locked we use that as an opportunity to
-+ * synchronise niffies to the highest value as idle ticks may have artificially
-+ * kept niffies low on one CPU and the truth can only be later.
-+ */
-+static inline void synchronise_niffies(struct rq *rq1, struct rq *rq2)
-+{
-+	if (rq1->niffies > rq2->niffies)
-+		rq2->niffies = rq1->niffies;
-+	else
-+		rq1->niffies = rq2->niffies;
-+}
-+
-+/*
-+ * double_rq_lock - safely lock two runqueues
-+ *
-+ * Note this does not disable interrupts like task_rq_lock,
-+ * you need to do so manually before calling.
-+ */
-+
-+/* For when we know rq1 != rq2 */
-+static inline void __double_rq_lock(struct rq *rq1, struct rq *rq2)
-+	__acquires(rq1->lock)
-+	__acquires(rq2->lock)
-+{
-+	if (rq1 < rq2) {
-+		raw_spin_lock(rq1->lock);
-+		raw_spin_lock_nested(rq2->lock, SINGLE_DEPTH_NESTING);
-+	} else {
-+		raw_spin_lock(rq2->lock);
-+		raw_spin_lock_nested(rq1->lock, SINGLE_DEPTH_NESTING);
-+	}
-+}
-+
-+static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
-+	__acquires(rq1->lock)
-+	__acquires(rq2->lock)
-+{
-+	BUG_ON(!irqs_disabled());
-+	if (rq1->lock == rq2->lock) {
-+		raw_spin_lock(rq1->lock);
-+		__acquire(rq2->lock);	/* Fake it out ;) */
-+	} else
-+		__double_rq_lock(rq1, rq2);
-+	synchronise_niffies(rq1, rq2);
-+}
-+
-+/*
-+ * double_rq_unlock - safely unlock two runqueues
-+ *
-+ * Note this does not restore interrupts like task_rq_unlock,
-+ * you need to do so manually after calling.
-+ */
-+static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
-+	__releases(rq1->lock)
-+	__releases(rq2->lock)
-+{
-+	raw_spin_unlock(rq1->lock);
-+	if (rq1->lock != rq2->lock)
-+		raw_spin_unlock(rq2->lock);
-+	else
-+		__release(rq2->lock);
-+}
-+
-+static inline void lock_all_rqs(void)
-+{
-+	int cpu;
-+
-+	preempt_disable();
-+	for_each_possible_cpu(cpu) {
-+		struct rq *rq = cpu_rq(cpu);
-+
-+		do_raw_spin_lock(rq->lock);
-+	}
-+}
-+
-+static inline void unlock_all_rqs(void)
-+{
-+	int cpu;
-+
-+	for_each_possible_cpu(cpu) {
-+		struct rq *rq = cpu_rq(cpu);
-+
-+		do_raw_spin_unlock(rq->lock);
-+	}
-+	preempt_enable();
-+}
-+
-+/* Specially nest trylock an rq */
-+static inline bool trylock_rq(struct rq *this_rq, struct rq *rq)
-+{
-+	if (unlikely(!do_raw_spin_trylock(rq->lock)))
-+		return false;
-+	spin_acquire(&rq->lock->dep_map, SINGLE_DEPTH_NESTING, 1, _RET_IP_);
-+	synchronise_niffies(this_rq, rq);
-+	return true;
-+}
-+
-+/* Unlock a specially nested trylocked rq */
-+static inline void unlock_rq(struct rq *rq)
-+{
-+	spin_release(&rq->lock->dep_map, _RET_IP_);
-+	do_raw_spin_unlock(rq->lock);
-+}
-+
-+/*
-+ * cmpxchg based fetch_or, macro so it works for different integer types
-+ */
-+#define fetch_or(ptr, mask)						\
-+	({								\
-+		typeof(ptr) _ptr = (ptr);				\
-+		typeof(mask) _mask = (mask);				\
-+		typeof(*_ptr) _old, _val = *_ptr;			\
-+									\
-+		for (;;) {						\
-+			_old = cmpxchg(_ptr, _val, _val | _mask);	\
-+			if (_old == _val)				\
-+				break;					\
-+			_val = _old;					\
-+		}							\
-+	_old;								\
-+})
-+
-+#if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
-+/*
-+ * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG,
-+ * this avoids any races wrt polling state changes and thereby avoids
-+ * spurious IPIs.
-+ */
-+static bool set_nr_and_not_polling(struct task_struct *p)
-+{
-+	struct thread_info *ti = task_thread_info(p);
-+	return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG);
-+}
-+
-+/*
-+ * Atomically set TIF_NEED_RESCHED if TIF_POLLING_NRFLAG is set.
-+ *
-+ * If this returns true, then the idle task promises to call
-+ * sched_ttwu_pending() and reschedule soon.
-+ */
-+static bool set_nr_if_polling(struct task_struct *p)
-+{
-+	struct thread_info *ti = task_thread_info(p);
-+	typeof(ti->flags) old, val = READ_ONCE(ti->flags);
-+
-+	for (;;) {
-+		if (!(val & _TIF_POLLING_NRFLAG))
-+			return false;
-+		if (val & _TIF_NEED_RESCHED)
-+			return true;
-+		old = cmpxchg(&ti->flags, val, val | _TIF_NEED_RESCHED);
-+		if (old == val)
-+			break;
-+		val = old;
-+	}
-+	return true;
-+}
-+
-+#else
-+static bool set_nr_and_not_polling(struct task_struct *p)
-+{
-+	set_tsk_need_resched(p);
-+	return true;
-+}
-+
-+#ifdef CONFIG_SMP
-+static bool set_nr_if_polling(struct task_struct *p)
-+{
-+	return false;
-+}
-+#endif
-+#endif
-+
-+static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task)
-+{
-+	struct wake_q_node *node = &task->wake_q;
-+
-+	/*
-+	 * Atomically grab the task, if ->wake_q is !nil already it means
-+	 * its already queued (either by us or someone else) and will get the
-+	 * wakeup due to that.
-+	 *
-+	 * In order to ensure that a pending wakeup will observe our pending
-+	 * state, even in the failed case, an explicit smp_mb() must be used.
-+	 */
-+	smp_mb__before_atomic();
-+	if (unlikely(cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL)))
-+		return false;
-+
-+	/*
-+	 * The head is context local, there can be no concurrency.
-+	 */
-+	*head->lastp = node;
-+	head->lastp = &node->next;
-+	return true;
-+}
-+
-+/**
-+ * wake_q_add() - queue a wakeup for 'later' waking.
-+ * @head: the wake_q_head to add @task to
-+ * @task: the task to queue for 'later' wakeup
-+ *
-+ * Queue a task for later wakeup, most likely by the wake_up_q() call in the
-+ * same context, _HOWEVER_ this is not guaranteed, the wakeup can come
-+ * instantly.
-+ *
-+ * This function must be used as-if it were wake_up_process(); IOW the task
-+ * must be ready to be woken at this location.
-+ */
-+void wake_q_add(struct wake_q_head *head, struct task_struct *task)
-+{
-+	if (__wake_q_add(head, task))
-+		get_task_struct(task);
-+}
-+
-+/**
-+ * wake_q_add_safe() - safely queue a wakeup for 'later' waking.
-+ * @head: the wake_q_head to add @task to
-+ * @task: the task to queue for 'later' wakeup
-+ *
-+ * Queue a task for later wakeup, most likely by the wake_up_q() call in the
-+ * same context, _HOWEVER_ this is not guaranteed, the wakeup can come
-+ * instantly.
-+ *
-+ * This function must be used as-if it were wake_up_process(); IOW the task
-+ * must be ready to be woken at this location.
-+ *
-+ * This function is essentially a task-safe equivalent to wake_q_add(). Callers
-+ * that already hold reference to @task can call the 'safe' version and trust
-+ * wake_q to do the right thing depending whether or not the @task is already
-+ * queued for wakeup.
-+ */
-+void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task)
-+{
-+	if (!__wake_q_add(head, task))
-+		put_task_struct(task);
-+}
-+
-+void wake_up_q(struct wake_q_head *head)
-+{
-+	struct wake_q_node *node = head->first;
-+
-+	while (node != WAKE_Q_TAIL) {
-+		struct task_struct *task;
-+
-+		task = container_of(node, struct task_struct, wake_q);
-+		BUG_ON(!task);
-+		/* Task can safely be re-inserted now */
-+		node = node->next;
-+		task->wake_q.next = NULL;
-+
-+		/*
-+		 * wake_up_process() executes a full barrier, which pairs with
-+		 * the queueing in wake_q_add() so as not to miss wakeups.
-+		 */
-+		wake_up_process(task);
-+		put_task_struct(task);
-+	}
-+}
-+
-+static inline void smp_sched_reschedule(int cpu)
-+{
-+	if (likely(cpu_online(cpu)))
-+		smp_send_reschedule(cpu);
-+}
-+
-+/*
-+ * resched_task - mark a task 'to be rescheduled now'.
-+ *
-+ * On UP this means the setting of the need_resched flag, on SMP it
-+ * might also involve a cross-CPU call to trigger the scheduler on
-+ * the target CPU.
-+ */
-+void resched_task(struct task_struct *p)
-+{
-+	int cpu;
-+#ifdef CONFIG_LOCKDEP
-+	/* Kernel threads call this when creating workqueues while still
-+	 * inactive from __kthread_bind_mask, holding only the pi_lock */
-+	if (!(p->flags & PF_KTHREAD)) {
-+		struct rq *rq = task_rq(p);
-+
-+		lockdep_assert_held(rq->lock);
-+	}
-+#endif
-+	if (test_tsk_need_resched(p))
-+		return;
-+
-+	cpu = task_cpu(p);
-+	if (cpu == smp_processor_id()) {
-+		set_tsk_need_resched(p);
-+		set_preempt_need_resched();
-+		return;
-+	}
-+
-+	if (set_nr_and_not_polling(p))
-+		smp_sched_reschedule(cpu);
-+	else
-+		trace_sched_wake_idle_without_ipi(cpu);
-+}
-+
-+/*
-+ * A task that is not running or queued will not have a node set.
-+ * A task that is queued but not running will have a node set.
-+ * A task that is currently running will have ->on_cpu set but no node set.
-+ */
-+static inline bool task_queued(struct task_struct *p)
-+{
-+	return !skiplist_node_empty(&p->node);
-+}
-+
-+static void enqueue_task(struct rq *rq, struct task_struct *p, int flags);
-+static inline void resched_if_idle(struct rq *rq);
-+
-+static inline bool deadline_before(u64 deadline, u64 time)
-+{
-+	return (deadline < time);
-+}
-+
-+/*
-+ * Deadline is "now" in niffies + (offset by priority). Setting the deadline
-+ * is the key to everything. It distributes cpu fairly amongst tasks of the
-+ * same nice value, it proportions cpu according to nice level, it means the
-+ * task that last woke up the longest ago has the earliest deadline, thus
-+ * ensuring that interactive tasks get low latency on wake up. The CPU
-+ * proportion works out to the square of the virtual deadline difference, so
-+ * this equation will give nice 19 3% CPU compared to nice 0.
-+ */
-+static inline u64 prio_deadline_diff(int user_prio)
-+{
-+	return (prio_ratios[user_prio] * rr_interval * (MS_TO_NS(1) / 128));
-+}
-+
-+static inline u64 task_deadline_diff(struct task_struct *p)
-+{
-+	return prio_deadline_diff(TASK_USER_PRIO(p));
-+}
-+
-+static inline u64 static_deadline_diff(int static_prio)
-+{
-+	return prio_deadline_diff(USER_PRIO(static_prio));
-+}
-+
-+static inline int longest_deadline_diff(void)
-+{
-+	return prio_deadline_diff(39);
-+}
-+
-+static inline int ms_longest_deadline_diff(void)
-+{
-+	return NS_TO_MS(longest_deadline_diff());
-+}
-+
-+static inline bool rq_local(struct rq *rq);
-+
-+#ifndef SCHED_CAPACITY_SCALE
-+#define SCHED_CAPACITY_SCALE 1024
-+#endif
-+
-+static inline int rq_load(struct rq *rq)
-+{
-+	return rq->nr_running;
-+}
-+
-+/*
-+ * Update the load average for feeding into cpu frequency governors. Use a
-+ * rough estimate of a rolling average with ~ time constant of 32ms.
-+ * 80/128 ~ 0.63. * 80 / 32768 / 128 == * 5 / 262144
-+ * Make sure a call to update_clocks has been made before calling this to get
-+ * an updated rq->niffies.
-+ */
-+static void update_load_avg(struct rq *rq, unsigned int flags)
-+{
-+	long us_interval, load;
-+
-+	us_interval = NS_TO_US(rq->niffies - rq->load_update);
-+	if (unlikely(us_interval <= 0))
-+		return;
-+
-+	load = rq->load_avg - (rq->load_avg * us_interval * 5 / 262144);
-+	if (unlikely(load < 0))
-+		load = 0;
-+	load += rq_load(rq) * SCHED_CAPACITY_SCALE * us_interval * 5 / 262144;
-+	rq->load_avg = load;
-+
-+	rq->load_update = rq->niffies;
-+	update_irq_load_avg(rq, 0);
-+	if (likely(rq_local(rq)))
-+		cpufreq_trigger(rq, flags);
-+}
-+
-+#ifdef HAVE_SCHED_AVG_IRQ
-+/*
-+ * IRQ variant of update_load_avg below. delta is actually time in nanoseconds
-+ * here so we scale curload to how long it's been since the last update.
-+ */
-+static void update_irq_load_avg(struct rq *rq, long delta)
-+{
-+	long us_interval, load;
-+
-+	us_interval = NS_TO_US(rq->niffies - rq->irq_load_update);
-+	if (unlikely(us_interval <= 0))
-+		return;
-+
-+	load = rq->irq_load_avg - (rq->irq_load_avg * us_interval * 5 / 262144);
-+	if (unlikely(load < 0))
-+		load = 0;
-+	load += NS_TO_US(delta) * SCHED_CAPACITY_SCALE * 5 / 262144;
-+	rq->irq_load_avg = load;
-+
-+	rq->irq_load_update = rq->niffies;
-+}
-+#endif
-+
-+/*
-+ * Removing from the runqueue. Enter with rq locked. Deleting a task
-+ * from the skip list is done via the stored node reference in the task struct
-+ * and does not require a full look up. Thus it occurs in O(k) time where k
-+ * is the "level" of the list the task was stored at - usually < 4, max 8.
-+ */
-+static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
-+{
-+	skiplist_delete(rq->sl, &p->node);
-+	rq->best_key = rq->node->next[0]->key;
-+	update_clocks(rq);
-+
-+	if (!(flags & DEQUEUE_SAVE)) {
-+		sched_info_dequeued(rq, p);
-+		psi_dequeue(p, flags & DEQUEUE_SLEEP);
-+	}
-+	rq->nr_running--;
-+	if (rt_task(p))
-+		rq->rt_nr_running--;
-+	update_load_avg(rq, flags);
-+}
-+
-+#ifdef CONFIG_PREEMPT_RCU
-+static bool rcu_read_critical(struct task_struct *p)
-+{
-+	return p->rcu_read_unlock_special.b.blocked;
-+}
-+#else /* CONFIG_PREEMPT_RCU */
-+#define rcu_read_critical(p) (false)
-+#endif /* CONFIG_PREEMPT_RCU */
-+
-+/*
-+ * To determine if it's safe for a task of SCHED_IDLEPRIO to actually run as
-+ * an idle task, we ensure none of the following conditions are met.
-+ */
-+static bool idleprio_suitable(struct task_struct *p)
-+{
-+	return (!(p->sched_contributes_to_load) && !(p->flags & (PF_EXITING)) &&
-+		!signal_pending(p) && !rcu_read_critical(p) && !freezing(p));
-+}
-+
-+/*
-+ * To determine if a task of SCHED_ISO can run in pseudo-realtime, we check
-+ * that the iso_refractory flag is not set.
-+ */
-+static inline bool isoprio_suitable(struct rq *rq)
-+{
-+	return !rq->iso_refractory;
-+}
-+
-+static inline void inc_nr_running(struct rq *rq)
-+{
-+	rq->nr_running++;
-+	if (trace_sched_update_nr_running_tp_enabled()) {
-+		call_trace_sched_update_nr_running(rq, 1);
-+	}
-+}
-+
-+static inline void dec_nr_running(struct rq *rq)
-+{
-+	rq->nr_running--;
-+	if (trace_sched_update_nr_running_tp_enabled()) {
-+		call_trace_sched_update_nr_running(rq, -1);
-+	}
-+}
-+
-+/*
-+ * Adding to the runqueue. Enter with rq locked.
-+ */
-+static void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
-+{
-+	unsigned int randseed, cflags = 0;
-+	u64 sl_id;
-+
-+	if (!rt_task(p)) {
-+		/* Check it hasn't gotten rt from PI */
-+		if ((idleprio_task(p) && idleprio_suitable(p)) ||
-+		   (iso_task(p) && isoprio_suitable(rq)))
-+			p->prio = p->normal_prio;
-+		else
-+			p->prio = NORMAL_PRIO;
-+	} else
-+		rq->rt_nr_running++;
-+	/*
-+	 * The sl_id key passed to the skiplist generates a sorted list.
-+	 * Realtime and sched iso tasks run FIFO so they only need be sorted
-+	 * according to priority. The skiplist will put tasks of the same
-+	 * key inserted later in FIFO order. Tasks of sched normal, batch
-+	 * and idleprio are sorted according to their deadlines. Idleprio
-+	 * tasks are offset by an impossibly large deadline value ensuring
-+	 * they get sorted into last positions, but still according to their
-+	 * own deadlines. This creates a "landscape" of skiplists running
-+	 * from priority 0 realtime in first place to the lowest priority
-+	 * idleprio tasks last. Skiplist insertion is an O(log n) process.
-+	 */
-+	if (p->prio <= ISO_PRIO) {
-+		sl_id = p->prio;
-+	} else {
-+		sl_id = p->deadline;
-+		if (idleprio_task(p)) {
-+			if (p->prio == IDLE_PRIO)
-+				sl_id |= 0xF000000000000000;
-+			else
-+				sl_id += longest_deadline_diff();
-+		}
-+	}
-+	/*
-+	 * Some architectures don't have better than microsecond resolution
-+	 * so mask out ~microseconds as the random seed for skiplist insertion.
-+	 */
-+	update_clocks(rq);
-+	if (!(flags & ENQUEUE_RESTORE)) {
-+		sched_info_queued(rq, p);
-+		psi_enqueue(p, flags & ENQUEUE_WAKEUP);
-+	}
-+
-+	randseed = (rq->niffies >> 10) & 0xFFFFFFFF;
-+	skiplist_insert(rq->sl, &p->node, sl_id, p, randseed);
-+	rq->best_key = rq->node->next[0]->key;
-+	if (p->in_iowait)
-+		cflags |= SCHED_CPUFREQ_IOWAIT;
-+	inc_nr_running(rq);
-+	update_load_avg(rq, cflags);
-+}
-+
-+/*
-+ * Returns the relative length of deadline all compared to the shortest
-+ * deadline which is that of nice -20.
-+ */
-+static inline int task_prio_ratio(struct task_struct *p)
-+{
-+	return prio_ratios[TASK_USER_PRIO(p)];
-+}
-+
-+/*
-+ * task_timeslice - all tasks of all priorities get the exact same timeslice
-+ * length. CPU distribution is handled by giving different deadlines to
-+ * tasks of different priorities. Use 128 as the base value for fast shifts.
-+ */
-+static inline int task_timeslice(struct task_struct *p)
-+{
-+	return (rr_interval * task_prio_ratio(p) / 128);
-+}
-+
-+#ifdef CONFIG_SMP
-+/* Entered with rq locked */
-+static inline void resched_if_idle(struct rq *rq)
-+{
-+	if (rq_idle(rq))
-+		resched_task(rq->curr);
-+}
-+
-+static inline bool rq_local(struct rq *rq)
-+{
-+	return (rq->cpu == smp_processor_id());
-+}
-+#ifdef CONFIG_SMT_NICE
-+static const cpumask_t *thread_cpumask(int cpu);
-+
-+/* Find the best real time priority running on any SMT siblings of cpu and if
-+ * none are running, the static priority of the best deadline task running.
-+ * The lookups to the other runqueues is done lockless as the occasional wrong
-+ * value would be harmless. */
-+static int best_smt_bias(struct rq *this_rq)
-+{
-+	int other_cpu, best_bias = 0;
-+
-+	for_each_cpu(other_cpu, &this_rq->thread_mask) {
-+		struct rq *rq = cpu_rq(other_cpu);
-+
-+		if (rq_idle(rq))
-+			continue;
-+		if (unlikely(!rq->online))
-+			continue;
-+		if (!rq->rq_mm)
-+			continue;
-+		if (likely(rq->rq_smt_bias > best_bias))
-+			best_bias = rq->rq_smt_bias;
-+	}
-+	return best_bias;
-+}
-+
-+static int task_prio_bias(struct task_struct *p)
-+{
-+	if (rt_task(p))
-+		return 1 << 30;
-+	else if (task_running_iso(p))
-+		return 1 << 29;
-+	else if (task_running_idle(p))
-+		return 0;
-+	return MAX_PRIO - p->static_prio;
-+}
-+
-+static bool smt_always_schedule(struct task_struct __maybe_unused *p, struct rq __maybe_unused *this_rq)
-+{
-+	return true;
-+}
-+
-+static bool (*smt_schedule)(struct task_struct *p, struct rq *this_rq) = &smt_always_schedule;
-+
-+/* We've already decided p can run on CPU, now test if it shouldn't for SMT
-+ * nice reasons. */
-+static bool smt_should_schedule(struct task_struct *p, struct rq *this_rq)
-+{
-+	int best_bias, task_bias;
-+
-+	/* Kernel threads always run */
-+	if (unlikely(!p->mm))
-+		return true;
-+	if (rt_task(p))
-+		return true;
-+	if (!idleprio_suitable(p))
-+		return true;
-+	best_bias = best_smt_bias(this_rq);
-+	/* The smt siblings are all idle or running IDLEPRIO */
-+	if (best_bias < 1)
-+		return true;
-+	task_bias = task_prio_bias(p);
-+	if (task_bias < 1)
-+		return false;
-+	if (task_bias >= best_bias)
-+		return true;
-+	/* Dither 25% cpu of normal tasks regardless of nice difference */
-+	if (best_bias % 4 == 1)
-+		return true;
-+	/* Sorry, you lose */
-+	return false;
-+}
-+#else /* CONFIG_SMT_NICE */
-+#define smt_schedule(p, this_rq) (true)
-+#endif /* CONFIG_SMT_NICE */
-+
-+static inline void atomic_set_cpu(int cpu, cpumask_t *cpumask)
-+{
-+	set_bit(cpu, (volatile unsigned long *)cpumask);
-+}
-+
-+/*
-+ * The cpu_idle_map stores a bitmap of all the CPUs currently idle to
-+ * allow easy lookup of whether any suitable idle CPUs are available.
-+ * It's cheaper to maintain a binary yes/no if there are any idle CPUs on the
-+ * idle_cpus variable than to do a full bitmask check when we are busy. The
-+ * bits are set atomically but read locklessly as occasional false positive /
-+ * negative is harmless.
-+ */
-+static inline void set_cpuidle_map(int cpu)
-+{
-+	if (likely(cpu_online(cpu)))
-+		atomic_set_cpu(cpu, &cpu_idle_map);
-+}
-+
-+static inline void atomic_clear_cpu(int cpu, cpumask_t *cpumask)
-+{
-+	clear_bit(cpu, (volatile unsigned long *)cpumask);
-+}
-+
-+static inline void clear_cpuidle_map(int cpu)
-+{
-+	atomic_clear_cpu(cpu, &cpu_idle_map);
-+}
-+
-+static bool suitable_idle_cpus(struct task_struct *p)
-+{
-+	return (cpumask_intersects(p->cpus_ptr, &cpu_idle_map));
-+}
-+
-+/*
-+ * Resched current on rq. We don't know if rq is local to this CPU nor if it
-+ * is locked so we do not use an intermediate variable for the task to avoid
-+ * having it dereferenced.
-+ */
-+static void resched_curr(struct rq *rq)
-+{
-+	int cpu;
-+
-+	if (test_tsk_need_resched(rq->curr))
-+		return;
-+
-+	rq->preempt = rq->curr;
-+	cpu = rq->cpu;
-+
-+	/* We're doing this without holding the rq lock if it's not task_rq */
-+
-+	if (cpu == smp_processor_id()) {
-+		set_tsk_need_resched(rq->curr);
-+		set_preempt_need_resched();
-+		return;
-+	}
-+
-+	if (set_nr_and_not_polling(rq->curr))
-+		smp_sched_reschedule(cpu);
-+	else
-+		trace_sched_wake_idle_without_ipi(cpu);
-+}
-+
-+#define CPUIDLE_DIFF_THREAD     (1)
-+#define CPUIDLE_DIFF_CORE_LLC   (2)
-+#define CPUIDLE_DIFF_CORE       (4)
-+#define CPUIDLE_CACHE_BUSY      (8)
-+#define CPUIDLE_DIFF_CPU        (16)
-+#define CPUIDLE_THREAD_BUSY     (32)
-+#define CPUIDLE_DIFF_NODE       (64)
-+
-+/*
-+ * The best idle CPU is chosen according to the CPUIDLE ranking above where the
-+ * lowest value would give the most suitable CPU to schedule p onto next. The
-+ * order works out to be the following:
-+ *
-+ * Same thread, idle or busy cache, idle or busy threads
-+ * Other core, same cache, idle or busy cache, idle threads.
-+ * Same node, other CPU, idle cache, idle threads.
-+ * Same node, other CPU, busy cache, idle threads.
-+ * Other core, same cache, busy threads.
-+ * Same node, other CPU, busy threads.
-+ * Other node, other CPU, idle cache, idle threads.
-+ * Other node, other CPU, busy cache, idle threads.
-+ * Other node, other CPU, busy threads.
-+ */
-+static int best_mask_cpu(int best_cpu, struct rq *rq, cpumask_t *tmpmask)
-+{
-+	int best_ranking = CPUIDLE_DIFF_NODE | CPUIDLE_THREAD_BUSY |
-+		CPUIDLE_DIFF_CPU | CPUIDLE_CACHE_BUSY | CPUIDLE_DIFF_CORE |
-+		CPUIDLE_DIFF_CORE_LLC | CPUIDLE_DIFF_THREAD;
-+	int cpu_tmp;
-+
-+	if (cpumask_test_cpu(best_cpu, tmpmask))
-+		goto out;
-+
-+	for_each_cpu(cpu_tmp, tmpmask) {
-+		int ranking, locality;
-+		struct rq *tmp_rq;
-+
-+		ranking = 0;
-+		tmp_rq = cpu_rq(cpu_tmp);
-+
-+		locality = rq->cpu_locality[cpu_tmp];
-+#ifdef CONFIG_NUMA
-+		if (locality > LOCALITY_SMP)
-+			ranking |= CPUIDLE_DIFF_NODE;
-+		else
-+#endif
-+			if (locality > LOCALITY_MC)
-+				ranking |= CPUIDLE_DIFF_CPU;
-+#ifdef CONFIG_SCHED_MC
-+			else if (locality == LOCALITY_MC_LLC)
-+				ranking |= CPUIDLE_DIFF_CORE_LLC;
-+			else if (locality == LOCALITY_MC)
-+				ranking |= CPUIDLE_DIFF_CORE;
-+		if (!(tmp_rq->cache_idle(tmp_rq)))
-+			ranking |= CPUIDLE_CACHE_BUSY;
-+#endif
-+#ifdef CONFIG_SCHED_SMT
-+		if (locality == LOCALITY_SMT)
-+			ranking |= CPUIDLE_DIFF_THREAD;
-+#endif
-+		if (ranking < best_ranking
-+#ifdef CONFIG_SCHED_SMT
-+			|| (ranking == best_ranking && (tmp_rq->siblings_idle(tmp_rq)))
-+#endif
-+		) {
-+			best_cpu = cpu_tmp;
-+			best_ranking = ranking;
-+		}
-+	}
-+out:
-+	return best_cpu;
-+}
-+
-+bool cpus_share_cache(int this_cpu, int that_cpu)
-+{
-+	struct rq *this_rq = cpu_rq(this_cpu);
-+
-+	return (this_rq->cpu_locality[that_cpu] < LOCALITY_SMP);
-+}
-+
-+/* As per resched_curr but only will resched idle task */
-+static inline void resched_idle(struct rq *rq)
-+{
-+	if (test_tsk_need_resched(rq->idle))
-+		return;
-+
-+	rq->preempt = rq->idle;
-+
-+	set_tsk_need_resched(rq->idle);
-+
-+	if (rq_local(rq)) {
-+		set_preempt_need_resched();
-+		return;
-+	}
-+
-+	smp_sched_reschedule(rq->cpu);
-+}
-+
-+DEFINE_PER_CPU(cpumask_t, idlemask);
-+
-+static struct rq *resched_best_idle(struct task_struct *p, int cpu)
-+{
-+	cpumask_t *tmpmask = &(per_cpu(idlemask, cpu));
-+	struct rq *rq;
-+	int best_cpu;
-+
-+	cpumask_and(tmpmask, p->cpus_ptr, &cpu_idle_map);
-+	best_cpu = best_mask_cpu(cpu, task_rq(p), tmpmask);
-+	rq = cpu_rq(best_cpu);
-+	if (!smt_schedule(p, rq))
-+		return NULL;
-+	rq->preempt = p;
-+	resched_idle(rq);
-+	return rq;
-+}
-+
-+static inline void resched_suitable_idle(struct task_struct *p)
-+{
-+	if (suitable_idle_cpus(p))
-+		resched_best_idle(p, task_cpu(p));
-+}
-+
-+static inline struct rq *rq_order(struct rq *rq, int cpu)
-+{
-+	return rq->rq_order[cpu];
-+}
-+#else /* CONFIG_SMP */
-+static inline void set_cpuidle_map(int cpu)
-+{
-+}
-+
-+static inline void clear_cpuidle_map(int cpu)
-+{
-+}
-+
-+static inline bool suitable_idle_cpus(struct task_struct *p)
-+{
-+	return uprq->curr == uprq->idle;
-+}
-+
-+static inline void resched_suitable_idle(struct task_struct *p)
-+{
-+}
-+
-+static inline void resched_curr(struct rq *rq)
-+{
-+	resched_task(rq->curr);
-+}
-+
-+static inline void resched_if_idle(struct rq *rq)
-+{
-+}
-+
-+static inline bool rq_local(struct rq *rq)
-+{
-+	return true;
-+}
-+
-+static inline struct rq *rq_order(struct rq *rq, int cpu)
-+{
-+	return rq;
-+}
-+
-+static inline bool smt_schedule(struct task_struct *p, struct rq *rq)
-+{
-+	return true;
-+}
-+#endif /* CONFIG_SMP */
-+
-+static inline int normal_prio(struct task_struct *p)
-+{
-+	if (has_rt_policy(p))
-+		return MAX_RT_PRIO - 1 - p->rt_priority;
-+	if (idleprio_task(p))
-+		return IDLE_PRIO;
-+	if (iso_task(p))
-+		return ISO_PRIO;
-+	return NORMAL_PRIO;
-+}
-+
-+/*
-+ * Calculate the current priority, i.e. the priority
-+ * taken into account by the scheduler. This value might
-+ * be boosted by RT tasks as it will be RT if the task got
-+ * RT-boosted. If not then it returns p->normal_prio.
-+ */
-+static int effective_prio(struct task_struct *p)
-+{
-+	p->normal_prio = normal_prio(p);
-+	/*
-+	 * If we are RT tasks or we were boosted to RT priority,
-+	 * keep the priority unchanged. Otherwise, update priority
-+	 * to the normal priority:
-+	 */
-+	if (!rt_prio(p->prio))
-+		return p->normal_prio;
-+	return p->prio;
-+}
-+
-+/*
-+ * activate_task - move a task to the runqueue. Enter with rq locked.
-+ */
-+static void activate_task(struct rq *rq, struct task_struct *p, int flags)
-+{
-+	resched_if_idle(rq);
-+
-+	/*
-+	 * Sleep time is in units of nanosecs, so shift by 20 to get a
-+	 * milliseconds-range estimation of the amount of time that the task
-+	 * spent sleeping:
-+	 */
-+	if (unlikely(prof_on == SLEEP_PROFILING)) {
-+		if (p->state == TASK_UNINTERRUPTIBLE)
-+			profile_hits(SLEEP_PROFILING, (void *)get_wchan(p),
-+				     (rq->niffies - p->last_ran) >> 20);
-+	}
-+
-+	p->prio = effective_prio(p);
-+	enqueue_task(rq, p, flags);
-+	p->on_rq = TASK_ON_RQ_QUEUED;
-+}
-+
-+/*
-+ * deactivate_task - If it's running, it's not on the runqueue and we can just
-+ * decrement the nr_running. Enter with rq locked.
-+ */
-+static inline void deactivate_task(struct task_struct *p, struct rq *rq)
-+{
-+	p->on_rq = 0;
-+	sched_info_dequeued(rq, p);
-+	/* deactivate_task is always DEQUEUE_SLEEP in muqss */
-+	psi_dequeue(p, DEQUEUE_SLEEP);
-+}
-+
-+#ifdef CONFIG_SMP
-+void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
-+{
-+	struct rq *rq;
-+
-+	if (task_cpu(p) == new_cpu)
-+		return;
-+
-+	/* Do NOT call set_task_cpu on a currently queued task as we will not
-+	 * be reliably holding the rq lock after changing CPU. */
-+	BUG_ON(task_queued(p));
-+	rq = task_rq(p);
-+
-+#ifdef CONFIG_LOCKDEP
-+	/*
-+	 * The caller should hold either p->pi_lock or rq->lock, when changing
-+	 * a task's CPU. ->pi_lock for waking tasks, rq->lock for runnable tasks.
-+	 *
-+	 * Furthermore, all task_rq users should acquire both locks, see
-+	 * task_rq_lock().
-+	 */
-+	WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) ||
-+				      lockdep_is_held(rq->lock)));
-+#endif
-+
-+	trace_sched_migrate_task(p, new_cpu);
-+	rseq_migrate(p);
-+	perf_event_task_migrate(p);
-+
-+	/*
-+	 * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
-+	 * successfully executed on another CPU. We must ensure that updates of
-+	 * per-task data have been completed by this moment.
-+	 */
-+	smp_wmb();
-+
-+	p->wake_cpu = new_cpu;
-+
-+	if (task_running(rq, p)) {
-+		/*
-+		 * We should only be calling this on a running task if we're
-+		 * holding rq lock.
-+		 */
-+		lockdep_assert_held(rq->lock);
-+
-+		/*
-+		 * We can't change the task_thread_info CPU on a running task
-+		 * as p will still be protected by the rq lock of the CPU it
-+		 * is still running on so we only set the wake_cpu for it to be
-+		 * lazily updated once off the CPU.
-+		 */
-+		return;
-+	}
-+
-+#ifdef CONFIG_THREAD_INFO_IN_TASK
-+	WRITE_ONCE(p->cpu, new_cpu);
-+#else
-+	WRITE_ONCE(task_thread_info(p)->cpu, new_cpu);
-+#endif
-+	/* We're no longer protecting p after this point since we're holding
-+	 * the wrong runqueue lock. */
-+}
-+#endif /* CONFIG_SMP */
-+
-+/*
-+ * Move a task off the runqueue and take it to a cpu for it will
-+ * become the running task.
-+ */
-+static inline void take_task(struct rq *rq, int cpu, struct task_struct *p)
-+{
-+	struct rq *p_rq = task_rq(p);
-+
-+	dequeue_task(p_rq, p, DEQUEUE_SAVE);
-+	if (p_rq != rq) {
-+		sched_info_dequeued(p_rq, p);
-+		sched_info_queued(rq, p);
-+	}
-+	set_task_cpu(p, cpu);
-+}
-+
-+/*
-+ * Returns a descheduling task to the runqueue unless it is being
-+ * deactivated.
-+ */
-+static inline void return_task(struct task_struct *p, struct rq *rq,
-+			       int cpu, bool deactivate)
-+{
-+	if (deactivate)
-+		deactivate_task(p, rq);
-+	else {
-+#ifdef CONFIG_SMP
-+		/*
-+		 * set_task_cpu was called on the running task that doesn't
-+		 * want to deactivate so it has to be enqueued to a different
-+		 * CPU and we need its lock. Tag it to be moved with as the
-+		 * lock is dropped in finish_lock_switch.
-+		 */
-+		if (unlikely(p->wake_cpu != cpu))
-+			WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING);
-+		else
-+#endif
-+			enqueue_task(rq, p, ENQUEUE_RESTORE);
-+	}
-+}
-+
-+/* Enter with rq lock held. We know p is on the local cpu */
-+static inline void __set_tsk_resched(struct task_struct *p)
-+{
-+	set_tsk_need_resched(p);
-+	set_preempt_need_resched();
-+}
-+
-+/**
-+ * task_curr - is this task currently executing on a CPU?
-+ * @p: the task in question.
-+ *
-+ * Return: 1 if the task is currently executing. 0 otherwise.
-+ */
-+inline int task_curr(const struct task_struct *p)
-+{
-+	return cpu_curr(task_cpu(p)) == p;
-+}
-+
-+#ifdef CONFIG_SMP
-+/*
-+ * wait_task_inactive - wait for a thread to unschedule.
-+ *
-+ * If @match_state is nonzero, it's the @p->state value just checked and
-+ * not expected to change.  If it changes, i.e. @p might have woken up,
-+ * then return zero.  When we succeed in waiting for @p to be off its CPU,
-+ * we return a positive number (its total switch count).  If a second call
-+ * a short while later returns the same number, the caller can be sure that
-+ * @p has remained unscheduled the whole time.
-+ *
-+ * The caller must ensure that the task *will* unschedule sometime soon,
-+ * else this function might spin for a *long* time. This function can't
-+ * be called with interrupts off, or it may introduce deadlock with
-+ * smp_call_function() if an IPI is sent by the same process we are
-+ * waiting to become inactive.
-+ */
-+unsigned long wait_task_inactive(struct task_struct *p, long match_state)
-+{
-+	int running, queued;
-+	struct rq_flags rf;
-+	unsigned long ncsw;
-+	struct rq *rq;
-+
-+	for (;;) {
-+		rq = task_rq(p);
-+
-+		/*
-+		 * If the task is actively running on another CPU
-+		 * still, just relax and busy-wait without holding
-+		 * any locks.
-+		 *
-+		 * NOTE! Since we don't hold any locks, it's not
-+		 * even sure that "rq" stays as the right runqueue!
-+		 * But we don't care, since this will return false
-+		 * if the runqueue has changed and p is actually now
-+		 * running somewhere else!
-+		 */
-+		while (task_running(rq, p)) {
-+			if (match_state && unlikely(p->state != match_state))
-+				return 0;
-+			cpu_relax();
-+		}
-+
-+		/*
-+		 * Ok, time to look more closely! We need the rq
-+		 * lock now, to be *sure*. If we're wrong, we'll
-+		 * just go back and repeat.
-+		 */
-+		rq = task_rq_lock(p, &rf);
-+		trace_sched_wait_task(p);
-+		running = task_running(rq, p);
-+		queued = task_on_rq_queued(p);
-+		ncsw = 0;
-+		if (!match_state || p->state == match_state)
-+			ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
-+		task_rq_unlock(rq, p, &rf);
-+
-+		/*
-+		 * If it changed from the expected state, bail out now.
-+		 */
-+		if (unlikely(!ncsw))
-+			break;
-+
-+		/*
-+		 * Was it really running after all now that we
-+		 * checked with the proper locks actually held?
-+		 *
-+		 * Oops. Go back and try again..
-+		 */
-+		if (unlikely(running)) {
-+			cpu_relax();
-+			continue;
-+		}
-+
-+		/*
-+		 * It's not enough that it's not actively running,
-+		 * it must be off the runqueue _entirely_, and not
-+		 * preempted!
-+		 *
-+		 * So if it was still runnable (but just not actively
-+		 * running right now), it's preempted, and we should
-+		 * yield - it could be a while.
-+		 */
-+		if (unlikely(queued)) {
-+			ktime_t to = NSEC_PER_SEC / HZ;
-+
-+			set_current_state(TASK_UNINTERRUPTIBLE);
-+			schedule_hrtimeout(&to, HRTIMER_MODE_REL);
-+			continue;
-+		}
-+
-+		/*
-+		 * Ahh, all good. It wasn't running, and it wasn't
-+		 * runnable, which means that it will never become
-+		 * running in the future either. We're all done!
-+		 */
-+		break;
-+	}
-+
-+	return ncsw;
-+}
-+
-+/***
-+ * kick_process - kick a running thread to enter/exit the kernel
-+ * @p: the to-be-kicked thread
-+ *
-+ * Cause a process which is running on another CPU to enter
-+ * kernel-mode, without any delay. (to get signals handled.)
-+ *
-+ * NOTE: this function doesn't have to take the runqueue lock,
-+ * because all it wants to ensure is that the remote task enters
-+ * the kernel. If the IPI races and the task has been migrated
-+ * to another CPU then no harm is done and the purpose has been
-+ * achieved as well.
-+ */
-+void kick_process(struct task_struct *p)
-+{
-+	int cpu;
-+
-+	preempt_disable();
-+	cpu = task_cpu(p);
-+	if ((cpu != smp_processor_id()) && task_curr(p))
-+		smp_sched_reschedule(cpu);
-+	preempt_enable();
-+}
-+EXPORT_SYMBOL_GPL(kick_process);
-+#endif
-+
-+/*
-+ * RT tasks preempt purely on priority. SCHED_NORMAL tasks preempt on the
-+ * basis of earlier deadlines. SCHED_IDLEPRIO don't preempt anything else or
-+ * between themselves, they cooperatively multitask. An idle rq scores as
-+ * prio PRIO_LIMIT so it is always preempted.
-+ */
-+static inline bool
-+can_preempt(struct task_struct *p, int prio, u64 deadline)
-+{
-+	/* Better static priority RT task or better policy preemption */
-+	if (p->prio < prio)
-+		return true;
-+	if (p->prio > prio)
-+		return false;
-+	if (p->policy == SCHED_BATCH)
-+		return false;
-+	/* SCHED_NORMAL and ISO will preempt based on deadline */
-+	if (!deadline_before(p->deadline, deadline))
-+		return false;
-+	return true;
-+}
-+
-+#ifdef CONFIG_SMP
-+
-+/*
-+ * Per-CPU kthreads are allowed to run on !active && online CPUs, see
-+ * __set_cpus_allowed_ptr().
-+ */
-+static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
-+{
-+	if (!cpumask_test_cpu(cpu, p->cpus_ptr))
-+		return false;
-+
-+	if (is_per_cpu_kthread(p))
-+		return cpu_online(cpu);
-+
-+	return cpu_active(cpu);
-+}
-+
-+/*
-+ * Check to see if p can run on cpu, and if not, whether there are any online
-+ * CPUs it can run on instead. This only happens with the hotplug threads that
-+ * bring up the CPUs.
-+ */
-+static inline bool sched_other_cpu(struct task_struct *p, int cpu)
-+{
-+	if (likely(cpumask_test_cpu(cpu, p->cpus_ptr)))
-+		return false;
-+	if (p->nr_cpus_allowed == 1) {
-+		cpumask_t valid_mask;
-+
-+		cpumask_and(&valid_mask, p->cpus_ptr, cpu_online_mask);
-+		if (unlikely(cpumask_empty(&valid_mask)))
-+			return false;
-+	}
-+	return true;
-+}
-+
-+static inline bool needs_other_cpu(struct task_struct *p, int cpu)
-+{
-+	if (cpumask_test_cpu(cpu, p->cpus_ptr))
-+		return false;
-+	return true;
-+}
-+
-+#define cpu_online_map		(*(cpumask_t *)cpu_online_mask)
-+
-+static void try_preempt(struct task_struct *p, struct rq *this_rq)
-+{
-+	int i, this_entries = rq_load(this_rq);
-+	cpumask_t tmp;
-+
-+	if (suitable_idle_cpus(p) && resched_best_idle(p, task_cpu(p)))
-+		return;
-+
-+	/* IDLEPRIO tasks never preempt anything but idle */
-+	if (p->policy == SCHED_IDLEPRIO)
-+		return;
-+
-+	cpumask_and(&tmp, &cpu_online_map, p->cpus_ptr);
-+
-+	for (i = 0; i < num_online_cpus(); i++) {
-+		struct rq *rq = this_rq->cpu_order[i];
-+
-+		if (!cpumask_test_cpu(rq->cpu, &tmp))
-+			continue;
-+
-+		if (!sched_interactive && rq != this_rq && rq_load(rq) <= this_entries)
-+			continue;
-+		if (smt_schedule(p, rq) && can_preempt(p, rq->rq_prio, rq->rq_deadline)) {
-+			/* We set rq->preempting lockless, it's a hint only */
-+			rq->preempting = p;
-+			resched_curr(rq);
-+			return;
-+		}
-+	}
-+}
-+
-+static int __set_cpus_allowed_ptr(struct task_struct *p,
-+				  const struct cpumask *new_mask, bool check);
-+#else /* CONFIG_SMP */
-+static inline bool needs_other_cpu(struct task_struct *p, int cpu)
-+{
-+	return false;
-+}
-+
-+static void try_preempt(struct task_struct *p, struct rq *this_rq)
-+{
-+	if (p->policy == SCHED_IDLEPRIO)
-+		return;
-+	if (can_preempt(p, uprq->rq_prio, uprq->rq_deadline))
-+		resched_curr(uprq);
-+}
-+
-+static inline int __set_cpus_allowed_ptr(struct task_struct *p,
-+					 const struct cpumask *new_mask, bool check)
-+{
-+	return set_cpus_allowed_ptr(p, new_mask);
-+}
-+#endif /* CONFIG_SMP */
-+
-+static void
-+ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
-+{
-+	struct rq *rq;
-+
-+	if (!schedstat_enabled())
-+		return;
-+
-+	rq = this_rq();
-+
-+#ifdef CONFIG_SMP
-+	if (cpu == rq->cpu) {
-+		__schedstat_inc(rq->ttwu_local);
-+	} else {
-+		struct sched_domain *sd;
-+
-+		rcu_read_lock();
-+		for_each_domain(rq->cpu, sd) {
-+			if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
-+				__schedstat_inc(sd->ttwu_wake_remote);
-+				break;
-+			}
-+		}
-+		rcu_read_unlock();
-+	}
-+
-+#endif /* CONFIG_SMP */
-+
-+	__schedstat_inc(rq->ttwu_count);
-+}
-+
-+/*
-+ * Mark the task runnable and perform wakeup-preemption.
-+ */
-+static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
-+{
-+	/*
-+	 * Sync wakeups (i.e. those types of wakeups where the waker
-+	 * has indicated that it will leave the CPU in short order)
-+	 * don't trigger a preemption if there are no idle cpus,
-+	 * instead waiting for current to deschedule.
-+	 */
-+	if (wake_flags & WF_SYNC)
-+		resched_suitable_idle(p);
-+	else
-+		try_preempt(p, rq);
-+	p->state = TASK_RUNNING;
-+	trace_sched_wakeup(p);
-+}
-+
-+static void
-+ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags)
-+{
-+	int en_flags = ENQUEUE_WAKEUP;
-+
-+	lockdep_assert_held(rq->lock);
-+
-+	if (p->sched_contributes_to_load)
-+		rq->nr_uninterruptible--;
-+
-+#ifdef CONFIG_SMP
-+	if (wake_flags & WF_MIGRATED)
-+		en_flags |= ENQUEUE_MIGRATED;
-+#endif
-+
-+	activate_task(rq, p, en_flags);
-+	ttwu_do_wakeup(rq, p, wake_flags);
-+}
-+
-+/*
-+ * Consider @p being inside a wait loop:
-+ *
-+ *   for (;;) {
-+ *      set_current_state(TASK_UNINTERRUPTIBLE);
-+ *
-+ *      if (CONDITION)
-+ *         break;
-+ *
-+ *      schedule();
-+ *   }
-+ *   __set_current_state(TASK_RUNNING);
-+ *
-+ * between set_current_state() and schedule(). In this case @p is still
-+ * runnable, so all that needs doing is change p->state back to TASK_RUNNING in
-+ * an atomic manner.
-+ *
-+ * By taking task_rq(p)->lock we serialize against schedule(), if @p->on_rq
-+ * then schedule() must still happen and p->state can be changed to
-+ * TASK_RUNNING. Otherwise we lost the race, schedule() has happened, and we
-+ * need to do a full wakeup with enqueue.
-+ *
-+ * Returns: %true when the wakeup is done,
-+ *          %false otherwise.
-+ */
-+static int ttwu_runnable(struct task_struct *p, int wake_flags)
-+{
-+	struct rq *rq;
-+	int ret = 0;
-+
-+	rq = __task_rq_lock(p, NULL);
-+	if (likely(task_on_rq_queued(p))) {
-+		ttwu_do_wakeup(rq, p, wake_flags);
-+		ret = 1;
-+	}
-+	__task_rq_unlock(rq, NULL);
-+
-+	return ret;
-+}
-+
-+#ifdef CONFIG_SMP
-+void sched_ttwu_pending(void *arg)
-+{
-+	struct llist_node *llist = arg;
-+	struct rq *rq = this_rq();
-+	struct task_struct *p, *t;
-+	struct rq_flags rf;
-+
-+	if (!llist)
-+		return;
-+
-+	/*
-+	 * rq::ttwu_pending racy indication of out-standing wakeups.
-+	 * Races such that false-negatives are possible, since they
-+	 * are shorter lived that false-positives would be.
-+	 */
-+	WRITE_ONCE(rq->ttwu_pending, 0);
-+
-+	rq_lock_irqsave(rq, &rf);
-+
-+	llist_for_each_entry_safe(p, t, llist, wake_entry.llist) {
-+		if (WARN_ON_ONCE(p->on_cpu))
-+			smp_cond_load_acquire(&p->on_cpu, !VAL);
-+
-+		if (WARN_ON_ONCE(task_cpu(p) != cpu_of(rq)))
-+			set_task_cpu(p, cpu_of(rq));
-+
-+		ttwu_do_activate(rq, p, 0);
-+	}
-+
-+	rq_unlock_irqrestore(rq, &rf);
-+}
-+
-+void send_call_function_single_ipi(int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	if (!set_nr_if_polling(rq->idle))
-+		arch_send_call_function_single_ipi(cpu);
-+	else
-+		trace_sched_wake_idle_without_ipi(cpu);
-+}
-+
-+/*
-+ * Queue a task on the target CPUs wake_list and wake the CPU via IPI if
-+ * necessary. The wakee CPU on receipt of the IPI will queue the task
-+ * via sched_ttwu_wakeup() for activation so the wakee incurs the cost
-+ * of the wakeup instead of the waker.
-+ */
-+static void __ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	WRITE_ONCE(rq->ttwu_pending, 1);
-+	__smp_call_single_queue(cpu, &p->wake_entry.llist);
-+}
-+
-+void wake_up_if_idle(int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	struct rq_flags rf;
-+
-+	rcu_read_lock();
-+
-+	if (!is_idle_task(rcu_dereference(rq->curr)))
-+		goto out;
-+
-+	if (set_nr_if_polling(rq->idle)) {
-+		trace_sched_wake_idle_without_ipi(cpu);
-+	} else {
-+		rq_lock_irqsave(rq, &rf);
-+		if (likely(is_idle_task(rq->curr)))
-+			smp_sched_reschedule(cpu);
-+		/* Else cpu is not in idle, do nothing here */
-+		rq_unlock_irqrestore(rq, &rf);
-+	}
-+
-+out:
-+	rcu_read_unlock();
-+}
-+
-+static inline bool ttwu_queue_cond(int cpu, int wake_flags)
-+{
-+	/*
-+	 * If the CPU does not share cache, then queue the task on the
-+	 * remote rqs wakelist to avoid accessing remote data.
-+	 */
-+	if (!cpus_share_cache(smp_processor_id(), cpu))
-+		return true;
-+
-+	/*
-+	 * If the task is descheduling and the only running task on the
-+	 * CPU then use the wakelist to offload the task activation to
-+	 * the soon-to-be-idle CPU as the current CPU is likely busy.
-+	 * nr_running is checked to avoid unnecessary task stacking.
-+	 */
-+	if ((wake_flags & WF_ON_CPU) && cpu_rq(cpu)->nr_running <= 1)
-+		return true;
-+
-+	return false;
-+}
-+
-+static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
-+{
-+	if (sched_feat(TTWU_QUEUE) && ttwu_queue_cond(cpu, wake_flags)) {
-+		if (WARN_ON_ONCE(cpu == smp_processor_id()))
-+			return false;
-+
-+		sched_clock_cpu(cpu); /* Sync clocks across CPUs */
-+		__ttwu_queue_wakelist(p, cpu, wake_flags);
-+		return true;
-+	}
-+
-+	return false;
-+}
-+
-+static int valid_task_cpu(struct task_struct *p)
-+{
-+	cpumask_t valid_mask;
-+
-+	if (p->flags & PF_KTHREAD)
-+		cpumask_and(&valid_mask, p->cpus_ptr, cpu_all_mask);
-+	else
-+		cpumask_and(&valid_mask, p->cpus_ptr, cpu_active_mask);
-+
-+	if (unlikely(!cpumask_weight(&valid_mask))) {
-+		/* We shouldn't be hitting this any more */
-+		printk(KERN_WARNING "SCHED: No cpumask for %s/%d weight %d\n", p->comm,
-+		       p->pid, cpumask_weight(p->cpus_ptr));
-+		return cpumask_any(p->cpus_ptr);
-+	}
-+	return cpumask_any(&valid_mask);
-+}
-+
-+/*
-+ * For a task that's just being woken up we have a valuable balancing
-+ * opportunity so choose the nearest cache most lightly loaded runqueue.
-+ * Entered with rq locked and returns with the chosen runqueue locked.
-+ */
-+static inline int select_best_cpu(struct task_struct *p)
-+{
-+	unsigned int idlest = ~0U;
-+	struct rq *rq = NULL;
-+	int i;
-+
-+	if (suitable_idle_cpus(p)) {
-+		int cpu = task_cpu(p);
-+
-+		if (unlikely(needs_other_cpu(p, cpu)))
-+			cpu = valid_task_cpu(p);
-+		rq = resched_best_idle(p, cpu);
-+		if (likely(rq))
-+			return rq->cpu;
-+	}
-+
-+	for (i = 0; i < num_online_cpus(); i++) {
-+		struct rq *other_rq = task_rq(p)->cpu_order[i];
-+		int entries;
-+
-+		if (!other_rq->online)
-+			continue;
-+		if (needs_other_cpu(p, other_rq->cpu))
-+			continue;
-+		entries = rq_load(other_rq);
-+		if (entries >= idlest)
-+			continue;
-+		idlest = entries;
-+		rq = other_rq;
-+	}
-+	if (unlikely(!rq))
-+		return task_cpu(p);
-+	return rq->cpu;
-+}
-+#else /* CONFIG_SMP */
-+
-+static inline bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
-+{
-+	return false;
-+}
-+
-+static int valid_task_cpu(struct task_struct *p)
-+{
-+	return 0;
-+}
-+
-+static inline int select_best_cpu(struct task_struct *p)
-+{
-+	return 0;
-+}
-+
-+static struct rq *resched_best_idle(struct task_struct *p, int cpu)
-+{
-+	return NULL;
-+}
-+#endif /* CONFIG_SMP */
-+
-+static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	if (ttwu_queue_wakelist(p, cpu, wake_flags))
-+		return;
-+
-+	rq_lock(rq);
-+	update_rq_clock(rq);
-+	ttwu_do_activate(rq, p, wake_flags);
-+	rq_unlock(rq);
-+}
-+
-+/***
-+ * try_to_wake_up - wake up a thread
-+ * @p: the thread to be awakened
-+ * @state: the mask of task states that can be woken
-+ * @wake_flags: wake modifier flags (WF_*)
-+ *
-+ * Put it on the run-queue if it's not already there. The "current"
-+ * thread is always on the run-queue (except when the actual
-+ * re-schedule is in progress), and as such you're allowed to do
-+ * the simpler "current->state = TASK_RUNNING" to mark yourself
-+ * runnable without the overhead of this.
-+ *
-+ * Return: %true if @p was woken up, %false if it was already running.
-+ * or @state didn't match @p's state.
-+ */
-+static int
-+try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
-+{
-+	unsigned long flags;
-+	int cpu, success = 0;
-+
-+	preempt_disable();
-+	if (p == current) {
-+		/*
-+		 * We're waking current, this means 'p->on_rq' and 'task_cpu(p)
-+		 * == smp_processor_id()'. Together this means we can special
-+		 * case the whole 'p->on_rq && ttwu_runnable()' case below
-+		 * without taking any locks.
-+		 *
-+		 * In particular:
-+		 *  - we rely on Program-Order guarantees for all the ordering,
-+		 *  - we're serialized against set_special_state() by virtue of
-+		 *    it disabling IRQs (this allows not taking ->pi_lock).
-+		 */
-+		if (!(p->state & state))
-+			goto out;
-+
-+		success = 1;
-+		trace_sched_waking(p);
-+		p->state = TASK_RUNNING;
-+		trace_sched_wakeup(p);
-+		goto out;
-+	}
-+
-+	/*
-+	 * If we are going to wake up a thread waiting for CONDITION we
-+	 * need to ensure that CONDITION=1 done by the caller can not be
-+	 * reordered with p->state check below. This pairs with smp_store_mb()
-+	 * in set_current_state() that the waiting thread does.
-+	 */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	smp_mb__after_spinlock();
-+	if (!(p->state & state))
-+		goto unlock;
-+
-+	trace_sched_waking(p);
-+
-+	/* We're going to change ->state: */
-+	success = 1;
-+
-+	/*
-+	 * Ensure we load p->on_rq _after_ p->state, otherwise it would
-+	 * be possible to, falsely, observe p->on_rq == 0 and get stuck
-+	 * in smp_cond_load_acquire() below.
-+	 *
-+	 * sched_ttwu_pending()			try_to_wake_up()
-+	 *   STORE p->on_rq = 1			  LOAD p->state
-+	 *   UNLOCK rq->lock
-+	 *
-+	 * __schedule() (switch to task 'p')
-+	 *   LOCK rq->lock			  smp_rmb();
-+	 *   smp_mb__after_spinlock();
-+	 *   UNLOCK rq->lock
-+	 *
-+	 * [task p]
-+	 *   STORE p->state = UNINTERRUPTIBLE	  LOAD p->on_rq
-+	 *
-+	 * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
-+	 * __schedule().  See the comment for smp_mb__after_spinlock().
-+	 */
-+	smp_rmb();
-+	if (READ_ONCE(p->on_rq) && ttwu_runnable(p, wake_flags))
-+		goto unlock;
-+
-+	if (p->in_iowait) {
-+		delayacct_blkio_end(p);
-+		atomic_dec(&task_rq(p)->nr_iowait);
-+	}
-+
-+#ifdef CONFIG_SMP
-+	/*
-+	 * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
-+	 * possible to, falsely, observe p->on_cpu == 0.
-+	 *
-+	 * One must be running (->on_cpu == 1) in order to remove oneself
-+	 * from the runqueue.
-+	 *
-+	 * __schedule() (switch to task 'p')	try_to_wake_up()
-+	 *   STORE p->on_cpu = 1		  LOAD p->on_rq
-+	 *   UNLOCK rq->lock
-+	 *
-+	 * __schedule() (put 'p' to sleep)
-+	 *   LOCK rq->lock			  smp_rmb();
-+	 *   smp_mb__after_spinlock();
-+	 *   STORE p->on_rq = 0			  LOAD p->on_cpu
-+	 *
-+	 * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
-+	 * __schedule().  See the comment for smp_mb__after_spinlock().
-+	 *
-+	 * Form a control-dep-acquire with p->on_rq == 0 above, to ensure
-+	 * schedule()'s deactivate_task() has 'happened' and p will no longer
-+	 * care about it's own p->state. See the comment in __schedule().
-+	 */
-+	smp_acquire__after_ctrl_dep();
-+
-+	/*
-+	 * We're doing the wakeup (@success == 1), they did a dequeue (p->on_rq
-+	 * == 0), which means we need to do an enqueue, change p->state to
-+	 * TASK_WAKING such that we can unlock p->pi_lock before doing the
-+	 * enqueue, such as ttwu_queue_wakelist().
-+	 */
-+	p->state = TASK_WAKING;
-+
-+	/*
-+	 * If the owning (remote) CPU is still in the middle of schedule() with
-+	 * this task as prev, considering queueing p on the remote CPUs wake_list
-+	 * which potentially sends an IPI instead of spinning on p->on_cpu to
-+	 * let the waker make forward progress. This is safe because IRQs are
-+	 * disabled and the IPI will deliver after on_cpu is cleared.
-+	 *
-+	 * Ensure we load task_cpu(p) after p->on_cpu:
-+	 *
-+	 * set_task_cpu(p, cpu);
-+	 *   STORE p->cpu = @cpu
-+	 * __schedule() (switch to task 'p')
-+	 *   LOCK rq->lock
-+	 *   smp_mb__after_spin_lock()		smp_cond_load_acquire(&p->on_cpu)
-+	 *   STORE p->on_cpu = 1		LOAD p->cpu
-+	 *
-+	 * to ensure we observe the correct CPU on which the task is currently
-+	 * scheduling.
-+	 */
-+	if (smp_load_acquire(&p->on_cpu) &&
-+	    ttwu_queue_wakelist(p, task_cpu(p), wake_flags | WF_ON_CPU))
-+		goto unlock;
-+
-+	/*
-+	 * If the owning (remote) CPU is still in the middle of schedule() with
-+	 * this task as prev, wait until its done referencing the task.
-+	 *
-+	 * Pairs with the smp_store_release() in finish_task().
-+	 *
-+	 * This ensures that tasks getting woken will be fully ordered against
-+	 * their previous state and preserve Program Order.
-+	 */
-+	smp_cond_load_acquire(&p->on_cpu, !VAL);
-+
-+	cpu = select_best_cpu(p);
-+	if (task_cpu(p) != cpu) {
-+		wake_flags |= WF_MIGRATED;
-+		psi_ttwu_dequeue(p);
-+		set_task_cpu(p, cpu);
-+	}
-+
-+#else
-+	cpu = task_cpu(p);
-+#endif /* CONFIG_SMP */
-+
-+	ttwu_queue(p, cpu, wake_flags);
-+unlock:
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+out:
-+	if (success)
-+		ttwu_stat(p, task_cpu(p), wake_flags);
-+	preempt_enable();
-+
-+	return success;
-+}
-+
-+/**
-+ * try_invoke_on_locked_down_task - Invoke a function on task in fixed state
-+ * @p: Process for which the function is to be invoked.
-+ * @func: Function to invoke.
-+ * @arg: Argument to function.
-+ *
-+ * If the specified task can be quickly locked into a definite state
-+ * (either sleeping or on a given runqueue), arrange to keep it in that
-+ * state while invoking @func(@arg).  This function can use ->on_rq and
-+ * task_curr() to work out what the state is, if required.  Given that
-+ * @func can be invoked with a runqueue lock held, it had better be quite
-+ * lightweight.
-+ *
-+ * Returns:
-+ *	@false if the task slipped out from under the locks.
-+ *	@true if the task was locked onto a runqueue or is sleeping.
-+ *		However, @func can override this by returning @false.
-+ */
-+bool try_invoke_on_locked_down_task(struct task_struct *p, bool (*func)(struct task_struct *t, void *arg), void *arg)
-+{
-+	bool ret = false;
-+	struct rq *rq;
-+
-+	lockdep_assert_irqs_enabled();
-+	raw_spin_lock_irq(&p->pi_lock);
-+	if (p->on_rq) {
-+		rq = __task_rq_lock(p, NULL);
-+		if (task_rq(p) == rq)
-+			ret = func(p, arg);
-+		rq_unlock(rq);
-+	} else {
-+		switch (p->state) {
-+		case TASK_RUNNING:
-+		case TASK_WAKING:
-+			break;
-+		default:
-+			smp_rmb(); // See smp_rmb() comment in try_to_wake_up().
-+			if (!p->on_rq)
-+				ret = func(p, arg);
-+		}
-+	}
-+	raw_spin_unlock_irq(&p->pi_lock);
-+	return ret;
-+}
-+
-+/**
-+ * wake_up_process - Wake up a specific process
-+ * @p: The process to be woken up.
-+ *
-+ * Attempt to wake up the nominated process and move it to the set of runnable
-+ * processes.
-+ *
-+ * Return: 1 if the process was woken up, 0 if it was already running.
-+ *
-+ * This function executes a full memory barrier before accessing the task state.
-+ */
-+int wake_up_process(struct task_struct *p)
-+{
-+	return try_to_wake_up(p, TASK_NORMAL, 0);
-+}
-+EXPORT_SYMBOL(wake_up_process);
-+
-+int wake_up_state(struct task_struct *p, unsigned int state)
-+{
-+	return try_to_wake_up(p, state, 0);
-+}
-+
-+static void time_slice_expired(struct task_struct *p, struct rq *rq);
-+
-+/*
-+ * Perform scheduler related setup for a newly forked process p.
-+ * p is forked by current.
-+ */
-+int sched_fork(unsigned long __maybe_unused clone_flags, struct task_struct *p)
-+{
-+	unsigned long flags;
-+
-+#ifdef CONFIG_PREEMPT_NOTIFIERS
-+	INIT_HLIST_HEAD(&p->preempt_notifiers);
-+#endif
-+
-+#ifdef CONFIG_COMPACTION
-+	p->capture_control = NULL;
-+#endif
-+
-+#ifdef CONFIG_SMP
-+	p->wake_entry.u_flags = CSD_TYPE_TTWU;
-+#endif
-+	/*
-+	 * We mark the process as NEW here. This guarantees that
-+	 * nobody will actually run it, and a signal or other external
-+	 * event cannot wake it up and insert it on the runqueue either.
-+	 */
-+	p->state = TASK_NEW;
-+
-+	/*
-+	 * The process state is set to the same value of the process executing
-+	 * do_fork() code. That is running. This guarantees that nobody will
-+	 * actually run it, and a signal or other external event cannot wake
-+	 * it up and insert it on the runqueue either.
-+	 */
-+
-+	/* Should be reset in fork.c but done here for ease of MuQSS patching */
-+	p->on_cpu =
-+	p->on_rq =
-+	p->utime =
-+	p->stime =
-+	p->sched_time =
-+	p->stime_ns =
-+	p->utime_ns = 0;
-+	skiplist_node_init(&p->node);
-+
-+	/*
-+	 * Revert to default priority/policy on fork if requested.
-+	 */
-+	if (unlikely(p->sched_reset_on_fork)) {
-+		if (p->policy == SCHED_FIFO || p->policy == SCHED_RR || p-> policy == SCHED_ISO) {
-+			p->policy = SCHED_NORMAL;
-+			p->normal_prio = normal_prio(p);
-+		}
-+
-+		if (PRIO_TO_NICE(p->static_prio) < 0) {
-+			p->static_prio = NICE_TO_PRIO(0);
-+			p->normal_prio = p->static_prio;
-+		}
-+
-+		/*
-+		 * We don't need the reset flag anymore after the fork. It has
-+		 * fulfilled its duty:
-+		 */
-+		p->sched_reset_on_fork = 0;
-+	}
-+
-+	/*
-+	 * Silence PROVE_RCU.
-+	 */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	rseq_migrate(p);
-+	set_task_cpu(p, smp_processor_id());
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+
-+#ifdef CONFIG_SCHED_INFO
-+	if (unlikely(sched_info_on()))
-+		memset(&p->sched_info, 0, sizeof(p->sched_info));
-+#endif
-+	init_task_preempt_count(p);
-+
-+	return 0;
-+}
-+
-+void sched_post_fork(struct task_struct *p)
-+{
-+}
-+
-+#ifdef CONFIG_SCHEDSTATS
-+
-+DEFINE_STATIC_KEY_FALSE(sched_schedstats);
-+static bool __initdata __sched_schedstats = false;
-+
-+static void set_schedstats(bool enabled)
-+{
-+	if (enabled)
-+		static_branch_enable(&sched_schedstats);
-+	else
-+		static_branch_disable(&sched_schedstats);
-+}
-+
-+void force_schedstat_enabled(void)
-+{
-+	if (!schedstat_enabled()) {
-+		pr_info("kernel profiling enabled schedstats, disable via kernel.sched_schedstats.\n");
-+		static_branch_enable(&sched_schedstats);
-+	}
-+}
-+
-+static int __init setup_schedstats(char *str)
-+{
-+	int ret = 0;
-+	if (!str)
-+		goto out;
-+
-+	/*
-+	 * This code is called before jump labels have been set up, so we can't
-+	 * change the static branch directly just yet.  Instead set a temporary
-+	 * variable so init_schedstats() can do it later.
-+	 */
-+	if (!strcmp(str, "enable")) {
-+		__sched_schedstats = true;
-+		ret = 1;
-+	} else if (!strcmp(str, "disable")) {
-+		__sched_schedstats = false;
-+		ret = 1;
-+	}
-+out:
-+	if (!ret)
-+		pr_warn("Unable to parse schedstats=\n");
-+
-+	return ret;
-+}
-+__setup("schedstats=", setup_schedstats);
-+
-+static void __init init_schedstats(void)
-+{
-+	set_schedstats(__sched_schedstats);
-+}
-+
-+#ifdef CONFIG_PROC_SYSCTL
-+int sysctl_schedstats(struct ctl_table *table, int write, void *buffer,
-+		size_t *lenp, loff_t *ppos)
-+{
-+	struct ctl_table t;
-+	int err;
-+	int state = static_branch_likely(&sched_schedstats);
-+
-+	if (write && !capable(CAP_SYS_ADMIN))
-+		return -EPERM;
-+
-+	t = *table;
-+	t.data = &state;
-+	err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
-+	if (err < 0)
-+		return err;
-+	if (write)
-+		set_schedstats(state);
-+	return err;
-+}
-+#endif /* CONFIG_PROC_SYSCTL */
-+#else  /* !CONFIG_SCHEDSTATS */
-+static inline void init_schedstats(void) {}
-+#endif /* CONFIG_SCHEDSTATS */
-+
-+static void update_cpu_clock_switch(struct rq *rq, struct task_struct *p);
-+
-+static void account_task_cpu(struct rq *rq, struct task_struct *p)
-+{
-+	update_clocks(rq);
-+	/* This isn't really a context switch but accounting is the same */
-+	update_cpu_clock_switch(rq, p);
-+	p->last_ran = rq->niffies;
-+}
-+
-+bool sched_smp_initialized __read_mostly;
-+
-+static inline int hrexpiry_enabled(struct rq *rq)
-+{
-+	if (unlikely(!cpu_active(cpu_of(rq)) || !sched_smp_initialized))
-+		return 0;
-+	return hrtimer_is_hres_active(&rq->hrexpiry_timer);
-+}
-+
-+/*
-+ * Use HR-timers to deliver accurate preemption points.
-+ */
-+static inline void hrexpiry_clear(struct rq *rq)
-+{
-+	if (!hrexpiry_enabled(rq))
-+		return;
-+	if (hrtimer_active(&rq->hrexpiry_timer))
-+		hrtimer_cancel(&rq->hrexpiry_timer);
-+}
-+
-+/*
-+ * High-resolution time_slice expiry.
-+ * Runs from hardirq context with interrupts disabled.
-+ */
-+static enum hrtimer_restart hrexpiry(struct hrtimer *timer)
-+{
-+	struct rq *rq = container_of(timer, struct rq, hrexpiry_timer);
-+	struct task_struct *p;
-+
-+	/* This can happen during CPU hotplug / resume */
-+	if (unlikely(cpu_of(rq) != smp_processor_id()))
-+		goto out;
-+
-+	/*
-+	 * We're doing this without the runqueue lock but this should always
-+	 * be run on the local CPU. Time slice should run out in __schedule
-+	 * but we set it to zero here in case niffies is slightly less.
-+	 */
-+	p = rq->curr;
-+	p->time_slice = 0;
-+	__set_tsk_resched(p);
-+out:
-+	return HRTIMER_NORESTART;
-+}
-+
-+/*
-+ * Called to set the hrexpiry timer state.
-+ *
-+ * called with irqs disabled from the local CPU only
-+ */
-+static void hrexpiry_start(struct rq *rq, u64 delay)
-+{
-+	if (!hrexpiry_enabled(rq))
-+		return;
-+
-+	hrtimer_start(&rq->hrexpiry_timer, ns_to_ktime(delay),
-+		      HRTIMER_MODE_REL_PINNED);
-+}
-+
-+static void init_rq_hrexpiry(struct rq *rq)
-+{
-+	hrtimer_init(&rq->hrexpiry_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-+	rq->hrexpiry_timer.function = hrexpiry;
-+}
-+
-+static inline int rq_dither(struct rq *rq)
-+{
-+	if (!hrexpiry_enabled(rq))
-+		return HALF_JIFFY_US;
-+	return 0;
-+}
-+
-+/*
-+ * wake_up_new_task - wake up a newly created task for the first time.
-+ *
-+ * This function will do some initial scheduler statistics housekeeping
-+ * that must be done for every newly created context, then puts the task
-+ * on the runqueue and wakes it.
-+ */
-+void wake_up_new_task(struct task_struct *p)
-+{
-+	struct task_struct *parent, *rq_curr;
-+	struct rq *rq, *new_rq;
-+	unsigned long flags;
-+
-+	parent = p->parent;
-+
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	p->state = TASK_RUNNING;
-+	/* Task_rq can't change yet on a new task */
-+	new_rq = rq = task_rq(p);
-+	if (unlikely(needs_other_cpu(p, task_cpu(p)))) {
-+		set_task_cpu(p, valid_task_cpu(p));
-+		new_rq = task_rq(p);
-+	}
-+
-+	double_rq_lock(rq, new_rq);
-+	rq_curr = rq->curr;
-+
-+	/*
-+	 * Make sure we do not leak PI boosting priority to the child.
-+	 */
-+	p->prio = rq_curr->normal_prio;
-+
-+	trace_sched_wakeup_new(p);
-+
-+	/*
-+	 * Share the timeslice between parent and child, thus the
-+	 * total amount of pending timeslices in the system doesn't change,
-+	 * resulting in more scheduling fairness. If it's negative, it won't
-+	 * matter since that's the same as being 0. rq->rq_deadline is only
-+	 * modified within schedule() so it is always equal to
-+	 * current->deadline.
-+	 */
-+	account_task_cpu(rq, rq_curr);
-+	p->last_ran = rq_curr->last_ran;
-+	if (likely(rq_curr->policy != SCHED_FIFO)) {
-+		rq_curr->time_slice /= 2;
-+		if (rq_curr->time_slice < RESCHED_US) {
-+			/*
-+			 * Forking task has run out of timeslice. Reschedule it and
-+			 * start its child with a new time slice and deadline. The
-+			 * child will end up running first because its deadline will
-+			 * be slightly earlier.
-+			 */
-+			__set_tsk_resched(rq_curr);
-+			time_slice_expired(p, new_rq);
-+			if (suitable_idle_cpus(p))
-+				resched_best_idle(p, task_cpu(p));
-+			else if (unlikely(rq != new_rq))
-+				try_preempt(p, new_rq);
-+		} else {
-+			p->time_slice = rq_curr->time_slice;
-+			if (rq_curr == parent && rq == new_rq && !suitable_idle_cpus(p)) {
-+				/*
-+				 * The VM isn't cloned, so we're in a good position to
-+				 * do child-runs-first in anticipation of an exec. This
-+				 * usually avoids a lot of COW overhead.
-+				 */
-+				__set_tsk_resched(rq_curr);
-+			} else {
-+				/*
-+				 * Adjust the hrexpiry since rq_curr will keep
-+				 * running and its timeslice has been shortened.
-+				 */
-+				hrexpiry_start(rq, US_TO_NS(rq_curr->time_slice));
-+				try_preempt(p, new_rq);
-+			}
-+		}
-+	} else {
-+		time_slice_expired(p, new_rq);
-+		try_preempt(p, new_rq);
-+	}
-+	activate_task(new_rq, p, 0);
-+	double_rq_unlock(rq, new_rq);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+}
-+
-+#ifdef CONFIG_PREEMPT_NOTIFIERS
-+
-+static DEFINE_STATIC_KEY_FALSE(preempt_notifier_key);
-+
-+void preempt_notifier_inc(void)
-+{
-+	static_branch_inc(&preempt_notifier_key);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_inc);
-+
-+void preempt_notifier_dec(void)
-+{
-+	static_branch_dec(&preempt_notifier_key);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_dec);
-+
-+/**
-+ * preempt_notifier_register - tell me when current is being preempted & rescheduled
-+ * @notifier: notifier struct to register
-+ */
-+void preempt_notifier_register(struct preempt_notifier *notifier)
-+{
-+	if (!static_branch_unlikely(&preempt_notifier_key))
-+		WARN(1, "registering preempt_notifier while notifiers disabled\n");
-+
-+	hlist_add_head(&notifier->link, &current->preempt_notifiers);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_register);
-+
-+/**
-+ * preempt_notifier_unregister - no longer interested in preemption notifications
-+ * @notifier: notifier struct to unregister
-+ *
-+ * This is *not* safe to call from within a preemption notifier.
-+ */
-+void preempt_notifier_unregister(struct preempt_notifier *notifier)
-+{
-+	hlist_del(&notifier->link);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_unregister);
-+
-+static void __fire_sched_in_preempt_notifiers(struct task_struct *curr)
-+{
-+	struct preempt_notifier *notifier;
-+
-+	hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
-+		notifier->ops->sched_in(notifier, raw_smp_processor_id());
-+}
-+
-+static __always_inline void fire_sched_in_preempt_notifiers(struct task_struct *curr)
-+{
-+	if (static_branch_unlikely(&preempt_notifier_key))
-+		__fire_sched_in_preempt_notifiers(curr);
-+}
-+
-+static void
-+__fire_sched_out_preempt_notifiers(struct task_struct *curr,
-+				 struct task_struct *next)
-+{
-+	struct preempt_notifier *notifier;
-+
-+	hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
-+		notifier->ops->sched_out(notifier, next);
-+}
-+
-+static __always_inline void
-+fire_sched_out_preempt_notifiers(struct task_struct *curr,
-+				 struct task_struct *next)
-+{
-+	if (static_branch_unlikely(&preempt_notifier_key))
-+		__fire_sched_out_preempt_notifiers(curr, next);
-+}
-+
-+#else /* !CONFIG_PREEMPT_NOTIFIERS */
-+
-+static inline void fire_sched_in_preempt_notifiers(struct task_struct *curr)
-+{
-+}
-+
-+static inline void
-+fire_sched_out_preempt_notifiers(struct task_struct *curr,
-+				 struct task_struct *next)
-+{
-+}
-+
-+#endif /* CONFIG_PREEMPT_NOTIFIERS */
-+
-+static inline void prepare_task(struct task_struct *next)
-+{
-+	/*
-+	 * Claim the task as running, we do this before switching to it
-+	 * such that any running task will have this set.
-+	 */
-+	next->on_cpu = 1;
-+}
-+
-+static inline void finish_task(struct task_struct *prev)
-+{
-+#ifdef CONFIG_SMP
-+	/*
-+	 * This must be the very last reference to @prev from this CPU. After
-+	 * p->on_cpu is cleared, the task can be moved to a different CPU. We
-+	 * must ensure this doesn't happen until the switch is completely
-+	 * finished.
-+	 *
-+	 * In particular, the load of prev->state in finish_task_switch() must
-+	 * happen before this.
-+	 *
-+	 * Pairs with the smp_cond_load_acquire() in try_to_wake_up().
-+	 */
-+	smp_store_release(&prev->on_cpu, 0);
-+#endif
-+}
-+
-+static inline void
-+prepare_lock_switch(struct rq *rq, struct task_struct *next)
-+{
-+	/*
-+	 * Since the runqueue lock will be released by the next
-+	 * task (which is an invalid locking op but in the case
-+	 * of the scheduler it's an obvious special-case), so we
-+	 * do an early lockdep release here:
-+	 */
-+	spin_release(&rq->lock->dep_map, _THIS_IP_);
-+#ifdef CONFIG_DEBUG_SPINLOCK
-+	/* this is a valid case when another task releases the spinlock */
-+	rq->lock->owner = next;
-+#endif
-+}
-+
-+static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
-+{
-+	/*
-+	 * If we are tracking spinlock dependencies then we have to
-+	 * fix up the runqueue lock - which gets 'carried over' from
-+	 * prev into current:
-+	 */
-+	spin_acquire(&rq->lock->dep_map, 0, 0, _THIS_IP_);
-+
-+#ifdef CONFIG_SMP
-+	/*
-+	 * If prev was marked as migrating to another CPU in return_task, drop
-+	 * the local runqueue lock but leave interrupts disabled and grab the
-+	 * remote lock we're migrating it to before enabling them.
-+	 */
-+	if (unlikely(task_on_rq_migrating(prev))) {
-+		sched_info_dequeued(rq, prev);
-+		/*
-+		 * We move the ownership of prev to the new cpu now. ttwu can't
-+		 * activate prev to the wrong cpu since it has to grab this
-+		 * runqueue in ttwu_remote.
-+		 */
-+#ifdef CONFIG_THREAD_INFO_IN_TASK
-+		prev->cpu = prev->wake_cpu;
-+#else
-+		task_thread_info(prev)->cpu = prev->wake_cpu;
-+#endif
-+		raw_spin_unlock(rq->lock);
-+
-+		raw_spin_lock(&prev->pi_lock);
-+		rq = __task_rq_lock(prev, NULL);
-+		/* Check that someone else hasn't already queued prev */
-+		if (likely(!task_queued(prev))) {
-+			enqueue_task(rq, prev, 0);
-+			prev->on_rq = TASK_ON_RQ_QUEUED;
-+			/* Wake up the CPU if it's not already running */
-+			resched_if_idle(rq);
-+		}
-+		raw_spin_unlock(&prev->pi_lock);
-+	}
-+#endif
-+	rq_unlock(rq);
-+	local_irq_enable();
-+}
-+
-+#ifndef prepare_arch_switch
-+# define prepare_arch_switch(next)	do { } while (0)
-+#endif
-+#ifndef finish_arch_switch
-+# define finish_arch_switch(prev)	do { } while (0)
-+#endif
-+#ifndef finish_arch_post_lock_switch
-+# define finish_arch_post_lock_switch()	do { } while (0)
-+#endif
-+
-+/**
-+ * prepare_task_switch - prepare to switch tasks
-+ * @rq: the runqueue preparing to switch
-+ * @next: the task we are going to switch to.
-+ *
-+ * This is called with the rq lock held and interrupts off. It must
-+ * be paired with a subsequent finish_task_switch after the context
-+ * switch.
-+ *
-+ * prepare_task_switch sets up locking and calls architecture specific
-+ * hooks.
-+ */
-+static inline void
-+prepare_task_switch(struct rq *rq, struct task_struct *prev,
-+		    struct task_struct *next)
-+{
-+	kcov_prepare_switch(prev);
-+	sched_info_switch(rq, prev, next);
-+	perf_event_task_sched_out(prev, next);
-+	rseq_preempt(prev);
-+	fire_sched_out_preempt_notifiers(prev, next);
-+	prepare_task(next);
-+	prepare_arch_switch(next);
-+}
-+
-+/**
-+ * finish_task_switch - clean up after a task-switch
-+ * @rq: runqueue associated with task-switch
-+ * @prev: the thread we just switched away from.
-+ *
-+ * finish_task_switch must be called after the context switch, paired
-+ * with a prepare_task_switch call before the context switch.
-+ * finish_task_switch will reconcile locking set up by prepare_task_switch,
-+ * and do any other architecture-specific cleanup actions.
-+ *
-+ * Note that we may have delayed dropping an mm in context_switch(). If
-+ * so, we finish that here outside of the runqueue lock.  (Doing it
-+ * with the lock held can cause deadlocks; see schedule() for
-+ * details.)
-+ *
-+ * The context switch have flipped the stack from under us and restored the
-+ * local variables which were saved when this task called schedule() in the
-+ * past. prev == current is still correct but we need to recalculate this_rq
-+ * because prev may have moved to another CPU.
-+ */
-+static void finish_task_switch(struct task_struct *prev)
-+	__releases(rq->lock)
-+{
-+	struct rq *rq = this_rq();
-+	struct mm_struct *mm = rq->prev_mm;
-+	long prev_state;
-+
-+	/*
-+	 * The previous task will have left us with a preempt_count of 2
-+	 * because it left us after:
-+	 *
-+	 *	schedule()
-+	 *	  preempt_disable();			// 1
-+	 *	  __schedule()
-+	 *	    raw_spin_lock_irq(rq->lock)	// 2
-+	 *
-+	 * Also, see FORK_PREEMPT_COUNT.
-+	 */
-+	if (WARN_ONCE(preempt_count() != 2*PREEMPT_DISABLE_OFFSET,
-+		      "corrupted preempt_count: %s/%d/0x%x\n",
-+		      current->comm, current->pid, preempt_count()))
-+		preempt_count_set(FORK_PREEMPT_COUNT);
-+
-+	rq->prev_mm = NULL;
-+
-+	/*
-+	 * A task struct has one reference for the use as "current".
-+	 * If a task dies, then it sets TASK_DEAD in tsk->state and calls
-+	 * schedule one last time. The schedule call will never return, and
-+	 * the scheduled task must drop that reference.
-+	 *
-+	 * We must observe prev->state before clearing prev->on_cpu (in
-+	 * finish_task), otherwise a concurrent wakeup can get prev
-+	 * running on another CPU and we could rave with its RUNNING -> DEAD
-+	 * transition, resulting in a double drop.
-+	 */
-+	prev_state = prev->state;
-+	vtime_task_switch(prev);
-+	perf_event_task_sched_in(prev, current);
-+	finish_task(prev);
-+	finish_lock_switch(rq, prev);
-+	finish_arch_post_lock_switch();
-+	kcov_finish_switch(current);
-+
-+	fire_sched_in_preempt_notifiers(current);
-+	/*
-+	 * When switching through a kernel thread, the loop in
-+	 * membarrier_{private,global}_expedited() may have observed that
-+	 * kernel thread and not issued an IPI. It is therefore possible to
-+	 * schedule between user->kernel->user threads without passing though
-+	 * switch_mm(). Membarrier requires a barrier after storing to
-+	 * rq->curr, before returning to userspace, so provide them here:
-+	 *
-+	 * - a full memory barrier for {PRIVATE,GLOBAL}_EXPEDITED, implicitly
-+	 *   provided by mmdrop(),
-+	 * - a sync_core for SYNC_CORE.
-+	 */
-+	if (mm) {
-+		membarrier_mm_sync_core_before_usermode(mm);
-+		mmdrop(mm);
-+	}
-+	if (unlikely(prev_state == TASK_DEAD)) {
-+		/*
-+		 * Remove function-return probe instances associated with this
-+		 * task and put them back on the free list.
-+		 */
-+		kprobe_flush_task(prev);
-+
-+		/* Task is done with its stack. */
-+		put_task_stack(prev);
-+
-+		put_task_struct_rcu_user(prev);
-+	}
-+}
-+
-+/**
-+ * schedule_tail - first thing a freshly forked thread must call.
-+ * @prev: the thread we just switched away from.
-+ */
-+asmlinkage __visible void schedule_tail(struct task_struct *prev)
-+{
-+	/*
-+	 * New tasks start with FORK_PREEMPT_COUNT, see there and
-+	 * finish_task_switch() for details.
-+	 *
-+	 * finish_task_switch() will drop rq->lock() and lower preempt_count
-+	 * and the preempt_enable() will end up enabling preemption (on
-+	 * PREEMPT_COUNT kernels).
-+	 */
-+
-+	finish_task_switch(prev);
-+	preempt_enable();
-+
-+	if (current->set_child_tid)
-+		put_user(task_pid_vnr(current), current->set_child_tid);
-+
-+	calculate_sigpending();
-+}
-+
-+/*
-+ * context_switch - switch to the new MM and the new thread's register state.
-+ */
-+static __always_inline void
-+context_switch(struct rq *rq, struct task_struct *prev,
-+	       struct task_struct *next)
-+{
-+	prepare_task_switch(rq, prev, next);
-+
-+	/*
-+	 * For paravirt, this is coupled with an exit in switch_to to
-+	 * combine the page table reload and the switch backend into
-+	 * one hypercall.
-+	 */
-+	arch_start_context_switch(prev);
-+
-+	/*
-+	 * kernel -> kernel   lazy + transfer active
-+	 *   user -> kernel   lazy + mmgrab() active
-+	 *
-+	 * kernel ->   user   switch + mmdrop() active
-+	 *   user ->   user   switch
-+	 */
-+	if (!next->mm) {                                // to kernel
-+		enter_lazy_tlb(prev->active_mm, next);
-+
-+		next->active_mm = prev->active_mm;
-+		if (prev->mm)                           // from user
-+			mmgrab(prev->active_mm);
-+		else
-+			prev->active_mm = NULL;
-+	} else {                                        // to user
-+		membarrier_switch_mm(rq, prev->active_mm, next->mm);
-+		/*
-+		 * sys_membarrier() requires an smp_mb() between setting
-+		 * rq->curr / membarrier_switch_mm() and returning to userspace.
-+		 *
-+		 * The below provides this either through switch_mm(), or in
-+		 * case 'prev->active_mm == next->mm' through
-+		 * finish_task_switch()'s mmdrop().
-+		 */
-+		switch_mm_irqs_off(prev->active_mm, next->mm, next);
-+
-+		if (!prev->mm) {                        // from kernel
-+			/* will mmdrop() in finish_task_switch(). */
-+			rq->prev_mm = prev->active_mm;
-+			prev->active_mm = NULL;
-+		}
-+	}
-+	prepare_lock_switch(rq, next);
-+
-+	/* Here we just switch the register state and the stack. */
-+	switch_to(prev, next, prev);
-+	barrier();
-+
-+	finish_task_switch(prev);
-+}
-+
-+/*
-+ * nr_running, nr_uninterruptible and nr_context_switches:
-+ *
-+ * externally visible scheduler statistics: current number of runnable
-+ * threads, total number of context switches performed since bootup.
-+ */
-+unsigned long nr_running(void)
-+{
-+	unsigned long i, sum = 0;
-+
-+	for_each_online_cpu(i)
-+		sum += cpu_rq(i)->nr_running;
-+
-+	return sum;
-+}
-+
-+static unsigned long nr_uninterruptible(void)
-+{
-+	unsigned long i, sum = 0;
-+
-+	for_each_online_cpu(i)
-+		sum += cpu_rq(i)->nr_uninterruptible;
-+
-+	return sum;
-+}
-+
-+/*
-+ * Check if only the current task is running on the CPU.
-+ *
-+ * Caution: this function does not check that the caller has disabled
-+ * preemption, thus the result might have a time-of-check-to-time-of-use
-+ * race.  The caller is responsible to use it correctly, for example:
-+ *
-+ * - from a non-preemptible section (of course)
-+ *
-+ * - from a thread that is bound to a single CPU
-+ *
-+ * - in a loop with very short iterations (e.g. a polling loop)
-+ */
-+bool single_task_running(void)
-+{
-+	if (rq_load(raw_rq()) == 1)
-+		return true;
-+	else
-+		return false;
-+}
-+EXPORT_SYMBOL(single_task_running);
-+
-+unsigned long long nr_context_switches(void)
-+{
-+	int cpu;
-+	unsigned long long sum = 0;
-+
-+	for_each_possible_cpu(cpu)
-+		sum += cpu_rq(cpu)->nr_switches;
-+
-+	return sum;
-+}
-+
-+/*
-+ * Consumers of these two interfaces, like for example the cpufreq menu
-+ * governor are using nonsensical data. Boosting frequency for a CPU that has
-+ * IO-wait which might not even end up running the task when it does become
-+ * runnable.
-+ */
-+
-+unsigned long nr_iowait_cpu(int cpu)
-+{
-+	return atomic_read(&cpu_rq(cpu)->nr_iowait);
-+}
-+
-+/*
-+ * IO-wait accounting, and how its mostly bollocks (on SMP).
-+ *
-+ * The idea behind IO-wait account is to account the idle time that we could
-+ * have spend running if it were not for IO. That is, if we were to improve the
-+ * storage performance, we'd have a proportional reduction in IO-wait time.
-+ *
-+ * This all works nicely on UP, where, when a task blocks on IO, we account
-+ * idle time as IO-wait, because if the storage were faster, it could've been
-+ * running and we'd not be idle.
-+ *
-+ * This has been extended to SMP, by doing the same for each CPU. This however
-+ * is broken.
-+ *
-+ * Imagine for instance the case where two tasks block on one CPU, only the one
-+ * CPU will have IO-wait accounted, while the other has regular idle. Even
-+ * though, if the storage were faster, both could've ran at the same time,
-+ * utilising both CPUs.
-+ *
-+ * This means, that when looking globally, the current IO-wait accounting on
-+ * SMP is a lower bound, by reason of under accounting.
-+ *
-+ * Worse, since the numbers are provided per CPU, they are sometimes
-+ * interpreted per CPU, and that is nonsensical. A blocked task isn't strictly
-+ * associated with any one particular CPU, it can wake to another CPU than it
-+ * blocked on. This means the per CPU IO-wait number is meaningless.
-+ *
-+ * Task CPU affinities can make all that even more 'interesting'.
-+ */
-+
-+unsigned long nr_iowait(void)
-+{
-+	unsigned long cpu, sum = 0;
-+
-+	for_each_possible_cpu(cpu)
-+		sum += nr_iowait_cpu(cpu);
-+
-+	return sum;
-+}
-+
-+unsigned long nr_active(void)
-+{
-+	return nr_running() + nr_uninterruptible();
-+}
-+
-+/* Variables and functions for calc_load */
-+static unsigned long calc_load_update;
-+unsigned long avenrun[3];
-+EXPORT_SYMBOL(avenrun);
-+
-+/**
-+ * get_avenrun - get the load average array
-+ * @loads:	pointer to dest load array
-+ * @offset:	offset to add
-+ * @shift:	shift count to shift the result left
-+ *
-+ * These values are estimates at best, so no need for locking.
-+ */
-+void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
-+{
-+	loads[0] = (avenrun[0] + offset) << shift;
-+	loads[1] = (avenrun[1] + offset) << shift;
-+	loads[2] = (avenrun[2] + offset) << shift;
-+}
-+
-+/*
-+ * calc_load - update the avenrun load estimates every LOAD_FREQ seconds.
-+ */
-+void calc_global_load(void)
-+{
-+	long active;
-+
-+	if (time_before(jiffies, READ_ONCE(calc_load_update)))
-+		return;
-+	active = nr_active() * FIXED_1;
-+
-+	avenrun[0] = calc_load(avenrun[0], EXP_1, active);
-+	avenrun[1] = calc_load(avenrun[1], EXP_5, active);
-+	avenrun[2] = calc_load(avenrun[2], EXP_15, active);
-+
-+	calc_load_update = jiffies + LOAD_FREQ;
-+}
-+
-+/**
-+ * fixed_power_int - compute: x^n, in O(log n) time
-+ *
-+ * @x:         base of the power
-+ * @frac_bits: fractional bits of @x
-+ * @n:         power to raise @x to.
-+ *
-+ * By exploiting the relation between the definition of the natural power
-+ * function: x^n := x*x*...*x (x multiplied by itself for n times), and
-+ * the binary encoding of numbers used by computers: n := \Sum n_i * 2^i,
-+ * (where: n_i \elem {0, 1}, the binary vector representing n),
-+ * we find: x^n := x^(\Sum n_i * 2^i) := \Prod x^(n_i * 2^i), which is
-+ * of course trivially computable in O(log_2 n), the length of our binary
-+ * vector.
-+ */
-+static unsigned long
-+fixed_power_int(unsigned long x, unsigned int frac_bits, unsigned int n)
-+{
-+	unsigned long result = 1UL << frac_bits;
-+
-+	if (n) {
-+		for (;;) {
-+			if (n & 1) {
-+				result *= x;
-+				result += 1UL << (frac_bits - 1);
-+				result >>= frac_bits;
-+			}
-+			n >>= 1;
-+			if (!n)
-+				break;
-+			x *= x;
-+			x += 1UL << (frac_bits - 1);
-+			x >>= frac_bits;
-+		}
-+	}
-+
-+	return result;
-+}
-+
-+/*
-+ * a1 = a0 * e + a * (1 - e)
-+ *
-+ * a2 = a1 * e + a * (1 - e)
-+ *    = (a0 * e + a * (1 - e)) * e + a * (1 - e)
-+ *    = a0 * e^2 + a * (1 - e) * (1 + e)
-+ *
-+ * a3 = a2 * e + a * (1 - e)
-+ *    = (a0 * e^2 + a * (1 - e) * (1 + e)) * e + a * (1 - e)
-+ *    = a0 * e^3 + a * (1 - e) * (1 + e + e^2)
-+ *
-+ *  ...
-+ *
-+ * an = a0 * e^n + a * (1 - e) * (1 + e + ... + e^n-1) [1]
-+ *    = a0 * e^n + a * (1 - e) * (1 - e^n)/(1 - e)
-+ *    = a0 * e^n + a * (1 - e^n)
-+ *
-+ * [1] application of the geometric series:
-+ *
-+ *              n         1 - x^(n+1)
-+ *     S_n := \Sum x^i = -------------
-+ *             i=0          1 - x
-+ */
-+unsigned long
-+calc_load_n(unsigned long load, unsigned long exp,
-+	    unsigned long active, unsigned int n)
-+{
-+	return calc_load(load, fixed_power_int(exp, FSHIFT, n), active);
-+}
-+
-+DEFINE_PER_CPU(struct kernel_stat, kstat);
-+DEFINE_PER_CPU(struct kernel_cpustat, kernel_cpustat);
-+
-+EXPORT_PER_CPU_SYMBOL(kstat);
-+EXPORT_PER_CPU_SYMBOL(kernel_cpustat);
-+
-+#ifdef CONFIG_PARAVIRT
-+static inline u64 steal_ticks(u64 steal)
-+{
-+	if (unlikely(steal > NSEC_PER_SEC))
-+		return div_u64(steal, TICK_NSEC);
-+
-+	return __iter_div_u64_rem(steal, TICK_NSEC, &steal);
-+}
-+#endif
-+
-+#ifndef nsecs_to_cputime
-+# define nsecs_to_cputime(__nsecs)	nsecs_to_jiffies(__nsecs)
-+#endif
-+
-+/*
-+ * On each tick, add the number of nanoseconds to the unbanked variables and
-+ * once one tick's worth has accumulated, account it allowing for accurate
-+ * sub-tick accounting and totals. Use the TICK_APPROX_NS to match the way we
-+ * deduct nanoseconds.
-+ */
-+static void pc_idle_time(struct rq *rq, struct task_struct *idle, unsigned long ns)
-+{
-+	u64 *cpustat = kcpustat_this_cpu->cpustat;
-+	unsigned long ticks;
-+
-+	if (atomic_read(&rq->nr_iowait) > 0) {
-+		rq->iowait_ns += ns;
-+		if (rq->iowait_ns >= JIFFY_NS) {
-+			ticks = NS_TO_JIFFIES(rq->iowait_ns);
-+			cpustat[CPUTIME_IOWAIT] += (__force u64)TICK_APPROX_NS * ticks;
-+			rq->iowait_ns %= JIFFY_NS;
-+		}
-+	} else {
-+		rq->idle_ns += ns;
-+		if (rq->idle_ns >= JIFFY_NS) {
-+			ticks = NS_TO_JIFFIES(rq->idle_ns);
-+			cpustat[CPUTIME_IDLE] += (__force u64)TICK_APPROX_NS * ticks;
-+			rq->idle_ns %= JIFFY_NS;
-+		}
-+	}
-+	acct_update_integrals(idle);
-+}
-+
-+static void pc_system_time(struct rq *rq, struct task_struct *p,
-+			   int hardirq_offset, unsigned long ns)
-+{
-+	u64 *cpustat = kcpustat_this_cpu->cpustat;
-+	unsigned long ticks;
-+
-+	p->stime_ns += ns;
-+	if (p->stime_ns >= JIFFY_NS) {
-+		ticks = NS_TO_JIFFIES(p->stime_ns);
-+		p->stime_ns %= JIFFY_NS;
-+		p->stime += (__force u64)TICK_APPROX_NS * ticks;
-+		account_group_system_time(p, TICK_APPROX_NS * ticks);
-+	}
-+	p->sched_time += ns;
-+	account_group_exec_runtime(p, ns);
-+
-+	if (hardirq_count() - hardirq_offset) {
-+		rq->irq_ns += ns;
-+		if (rq->irq_ns >= JIFFY_NS) {
-+			ticks = NS_TO_JIFFIES(rq->irq_ns);
-+			cpustat[CPUTIME_IRQ] += (__force u64)TICK_APPROX_NS * ticks;
-+			rq->irq_ns %= JIFFY_NS;
-+		}
-+	} else if (in_serving_softirq()) {
-+		rq->softirq_ns += ns;
-+		if (rq->softirq_ns >= JIFFY_NS) {
-+			ticks = NS_TO_JIFFIES(rq->softirq_ns);
-+			cpustat[CPUTIME_SOFTIRQ] += (__force u64)TICK_APPROX_NS * ticks;
-+			rq->softirq_ns %= JIFFY_NS;
-+		}
-+	} else {
-+		rq->system_ns += ns;
-+		if (rq->system_ns >= JIFFY_NS) {
-+			ticks = NS_TO_JIFFIES(rq->system_ns);
-+			cpustat[CPUTIME_SYSTEM] += (__force u64)TICK_APPROX_NS * ticks;
-+			rq->system_ns %= JIFFY_NS;
-+		}
-+	}
-+	acct_update_integrals(p);
-+}
-+
-+static void pc_user_time(struct rq *rq, struct task_struct *p, unsigned long ns)
-+{
-+	u64 *cpustat = kcpustat_this_cpu->cpustat;
-+	unsigned long ticks;
-+
-+	p->utime_ns += ns;
-+	if (p->utime_ns >= JIFFY_NS) {
-+		ticks = NS_TO_JIFFIES(p->utime_ns);
-+		p->utime_ns %= JIFFY_NS;
-+		p->utime += (__force u64)TICK_APPROX_NS * ticks;
-+		account_group_user_time(p, TICK_APPROX_NS * ticks);
-+	}
-+	p->sched_time += ns;
-+	account_group_exec_runtime(p, ns);
-+
-+	if (this_cpu_ksoftirqd() == p) {
-+		/*
-+		 * ksoftirqd time do not get accounted in cpu_softirq_time.
-+		 * So, we have to handle it separately here.
-+		 */
-+		rq->softirq_ns += ns;
-+		if (rq->softirq_ns >= JIFFY_NS) {
-+			ticks = NS_TO_JIFFIES(rq->softirq_ns);
-+			cpustat[CPUTIME_SOFTIRQ] += (__force u64)TICK_APPROX_NS * ticks;
-+			rq->softirq_ns %= JIFFY_NS;
-+		}
-+	}
-+
-+	if (task_nice(p) > 0 || idleprio_task(p)) {
-+		rq->nice_ns += ns;
-+		if (rq->nice_ns >= JIFFY_NS) {
-+			ticks = NS_TO_JIFFIES(rq->nice_ns);
-+			cpustat[CPUTIME_NICE] += (__force u64)TICK_APPROX_NS * ticks;
-+			rq->nice_ns %= JIFFY_NS;
-+		}
-+	} else {
-+		rq->user_ns += ns;
-+		if (rq->user_ns >= JIFFY_NS) {
-+			ticks = NS_TO_JIFFIES(rq->user_ns);
-+			cpustat[CPUTIME_USER] += (__force u64)TICK_APPROX_NS * ticks;
-+			rq->user_ns %= JIFFY_NS;
-+		}
-+	}
-+	acct_update_integrals(p);
-+}
-+
-+/*
-+ * This is called on clock ticks.
-+ * Bank in p->sched_time the ns elapsed since the last tick or switch.
-+ * CPU scheduler quota accounting is also performed here in microseconds.
-+ */
-+static void update_cpu_clock_tick(struct rq *rq, struct task_struct *p)
-+{
-+	s64 account_ns = rq->niffies - p->last_ran;
-+	struct task_struct *idle = rq->idle;
-+
-+	/* Accurate tick timekeeping */
-+	if (user_mode(get_irq_regs()))
-+		pc_user_time(rq, p, account_ns);
-+	else if (p != idle || (irq_count() != HARDIRQ_OFFSET)) {
-+		pc_system_time(rq, p, HARDIRQ_OFFSET, account_ns);
-+	} else
-+		pc_idle_time(rq, idle, account_ns);
-+
-+	/* time_slice accounting is done in usecs to avoid overflow on 32bit */
-+	if (p->policy != SCHED_FIFO && p != idle)
-+		p->time_slice -= NS_TO_US(account_ns);
-+
-+	p->last_ran = rq->niffies;
-+}
-+
-+/*
-+ * This is called on context switches.
-+ * Bank in p->sched_time the ns elapsed since the last tick or switch.
-+ * CPU scheduler quota accounting is also performed here in microseconds.
-+ */
-+static void update_cpu_clock_switch(struct rq *rq, struct task_struct *p)
-+{
-+	s64 account_ns = rq->niffies - p->last_ran;
-+	struct task_struct *idle = rq->idle;
-+
-+	/* Accurate subtick timekeeping */
-+	if (p != idle)
-+		pc_user_time(rq, p, account_ns);
-+	else
-+		pc_idle_time(rq, idle, account_ns);
-+
-+	/* time_slice accounting is done in usecs to avoid overflow on 32bit */
-+	if (p->policy != SCHED_FIFO && p != idle)
-+		p->time_slice -= NS_TO_US(account_ns);
-+}
-+
-+/*
-+ * Return any ns on the sched_clock that have not yet been accounted in
-+ * @p in case that task is currently running.
-+ *
-+ * Called with task_rq_lock(p) held.
-+ */
-+static inline u64 do_task_delta_exec(struct task_struct *p, struct rq *rq)
-+{
-+	u64 ns = 0;
-+
-+	/*
-+	 * Must be ->curr _and_ ->on_rq.  If dequeued, we would
-+	 * project cycles that may never be accounted to this
-+	 * thread, breaking clock_gettime().
-+	 */
-+	if (p == rq->curr && task_on_rq_queued(p)) {
-+		update_clocks(rq);
-+		ns = rq->niffies - p->last_ran;
-+	}
-+
-+	return ns;
-+}
-+
-+/*
-+ * Return accounted runtime for the task.
-+ * Return separately the current's pending runtime that have not been
-+ * accounted yet.
-+ */
-+unsigned long long task_sched_runtime(struct task_struct *p)
-+{
-+	struct rq_flags rf;
-+	struct rq *rq;
-+	u64 ns;
-+
-+#if defined(CONFIG_64BIT) && defined(CONFIG_SMP)
-+	/*
-+	 * 64-bit doesn't need locks to atomically read a 64-bit value.
-+	 * So we have a optimisation chance when the task's delta_exec is 0.
-+	 * Reading ->on_cpu is racy, but this is ok.
-+	 *
-+	 * If we race with it leaving CPU, we'll take a lock. So we're correct.
-+	 * If we race with it entering CPU, unaccounted time is 0. This is
-+	 * indistinguishable from the read occurring a few cycles earlier.
-+	 * If we see ->on_cpu without ->on_rq, the task is leaving, and has
-+	 * been accounted, so we're correct here as well.
-+	 */
-+	if (!p->on_cpu || !task_on_rq_queued(p))
-+		return tsk_seruntime(p);
-+#endif
-+
-+	rq = task_rq_lock(p, &rf);
-+	ns = p->sched_time + do_task_delta_exec(p, rq);
-+	task_rq_unlock(rq, p, &rf);
-+
-+	return ns;
-+}
-+
-+/*
-+ * Functions to test for when SCHED_ISO tasks have used their allocated
-+ * quota as real time scheduling and convert them back to SCHED_NORMAL. All
-+ * data is modified only by the local runqueue during scheduler_tick with
-+ * interrupts disabled.
-+ */
-+
-+/*
-+ * Test if SCHED_ISO tasks have run longer than their alloted period as RT
-+ * tasks and set the refractory flag if necessary. There is 10% hysteresis
-+ * for unsetting the flag. 115/128 is ~90/100 as a fast shift instead of a
-+ * slow division.
-+ */
-+static inline void iso_tick(struct rq *rq)
-+{
-+	rq->iso_ticks = rq->iso_ticks * (ISO_PERIOD - 1) / ISO_PERIOD;
-+	rq->iso_ticks += 100;
-+	if (rq->iso_ticks > ISO_PERIOD * sched_iso_cpu) {
-+		rq->iso_refractory = true;
-+		if (unlikely(rq->iso_ticks > ISO_PERIOD * 100))
-+			rq->iso_ticks = ISO_PERIOD * 100;
-+	}
-+}
-+
-+/* No SCHED_ISO task was running so decrease rq->iso_ticks */
-+static inline void no_iso_tick(struct rq *rq, int ticks)
-+{
-+	if (rq->iso_ticks > 0 || rq->iso_refractory) {
-+		rq->iso_ticks = rq->iso_ticks * (ISO_PERIOD - ticks) / ISO_PERIOD;
-+		if (rq->iso_ticks < ISO_PERIOD * (sched_iso_cpu * 115 / 128)) {
-+			rq->iso_refractory = false;
-+			if (unlikely(rq->iso_ticks < 0))
-+				rq->iso_ticks = 0;
-+		}
-+	}
-+}
-+
-+/* This manages tasks that have run out of timeslice during a scheduler_tick */
-+static void task_running_tick(struct rq *rq)
-+{
-+	struct task_struct *p = rq->curr;
-+
-+	/*
-+	 * If a SCHED_ISO task is running we increment the iso_ticks. In
-+	 * order to prevent SCHED_ISO tasks from causing starvation in the
-+	 * presence of true RT tasks we account those as iso_ticks as well.
-+	 */
-+	if (rt_task(p) || task_running_iso(p))
-+		iso_tick(rq);
-+	else
-+		no_iso_tick(rq, 1);
-+
-+	/* SCHED_FIFO tasks never run out of timeslice. */
-+	if (p->policy == SCHED_FIFO)
-+		return;
-+
-+	if (iso_task(p)) {
-+		if (task_running_iso(p)) {
-+			if (rq->iso_refractory) {
-+				/*
-+				 * SCHED_ISO task is running as RT and limit
-+				 * has been hit. Force it to reschedule as
-+				 * SCHED_NORMAL by zeroing its time_slice
-+				 */
-+				p->time_slice = 0;
-+			}
-+		} else if (!rq->iso_refractory) {
-+			/* Can now run again ISO. Reschedule to pick up prio */
-+			goto out_resched;
-+		}
-+	}
-+
-+	/*
-+	 * Tasks that were scheduled in the first half of a tick are not
-+	 * allowed to run into the 2nd half of the next tick if they will
-+	 * run out of time slice in the interim. Otherwise, if they have
-+	 * less than RESCHED_US μs of time slice left they will be rescheduled.
-+	 * Dither is used as a backup for when hrexpiry is disabled or high res
-+	 * timers not configured in.
-+	 */
-+	if (p->time_slice - rq->dither >= RESCHED_US)
-+		return;
-+out_resched:
-+	rq_lock(rq);
-+	__set_tsk_resched(p);
-+	rq_unlock(rq);
-+}
-+
-+static inline void task_tick(struct rq *rq)
-+{
-+	if (!rq_idle(rq))
-+		task_running_tick(rq);
-+	else if (rq->last_jiffy > rq->last_scheduler_tick)
-+		no_iso_tick(rq, rq->last_jiffy - rq->last_scheduler_tick);
-+}
-+
-+#ifdef CONFIG_NO_HZ_FULL
-+/*
-+ * We can stop the timer tick any time highres timers are active since
-+ * we rely entirely on highres timeouts for task expiry rescheduling.
-+ */
-+static void sched_stop_tick(struct rq *rq, int cpu)
-+{
-+	if (!hrexpiry_enabled(rq))
-+		return;
-+	if (!tick_nohz_full_enabled())
-+		return;
-+	if (!tick_nohz_full_cpu(cpu))
-+		return;
-+	tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED);
-+}
-+
-+static inline void sched_start_tick(struct rq *rq, int cpu)
-+{
-+	tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
-+}
-+
-+struct tick_work {
-+	int			cpu;
-+	atomic_t		state;
-+	struct delayed_work	work;
-+};
-+/* Values for ->state, see diagram below. */
-+#define TICK_SCHED_REMOTE_OFFLINE	0
-+#define TICK_SCHED_REMOTE_OFFLINING	1
-+#define TICK_SCHED_REMOTE_RUNNING	2
-+
-+/*
-+ * State diagram for ->state:
-+ *
-+ *
-+ *          TICK_SCHED_REMOTE_OFFLINE
-+ *                    |   ^
-+ *                    |   |
-+ *                    |   | sched_tick_remote()
-+ *                    |   |
-+ *                    |   |
-+ *                    +--TICK_SCHED_REMOTE_OFFLINING
-+ *                    |   ^
-+ *                    |   |
-+ * sched_tick_start() |   | sched_tick_stop()
-+ *                    |   |
-+ *                    V   |
-+ *          TICK_SCHED_REMOTE_RUNNING
-+ *
-+ *
-+ * Other transitions get WARN_ON_ONCE(), except that sched_tick_remote()
-+ * and sched_tick_start() are happy to leave the state in RUNNING.
-+ */
-+
-+static struct tick_work __percpu *tick_work_cpu;
-+
-+static void sched_tick_remote(struct work_struct *work)
-+{
-+	struct delayed_work *dwork = to_delayed_work(work);
-+	struct tick_work *twork = container_of(dwork, struct tick_work, work);
-+	int cpu = twork->cpu;
-+	struct rq *rq = cpu_rq(cpu);
-+	struct task_struct *curr;
-+	u64 delta;
-+	int os;
-+
-+	/*
-+	 * Handle the tick only if it appears the remote CPU is running in full
-+	 * dynticks mode. The check is racy by nature, but missing a tick or
-+	 * having one too much is no big deal because the scheduler tick updates
-+	 * statistics and checks timeslices in a time-independent way, regardless
-+	 * of when exactly it is running.
-+	 */
-+	if (!tick_nohz_tick_stopped_cpu(cpu))
-+		goto out_requeue;
-+
-+	rq_lock_irq(rq);
-+	if (cpu_is_offline(cpu))
-+		goto out_unlock;
-+
-+	curr = rq->curr;
-+	update_rq_clock(rq);
-+
-+	if (!is_idle_task(curr)) {
-+		/*
-+		 * Make sure the next tick runs within a reasonable
-+		 * amount of time.
-+		 */
-+		delta = rq_clock_task(rq) - curr->last_ran;
-+		WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
-+	}
-+	task_tick(rq);
-+
-+out_unlock:
-+	rq_unlock_irq(rq, NULL);
-+
-+out_requeue:
-+
-+	/*
-+	 * Run the remote tick once per second (1Hz). This arbitrary
-+	 * frequency is large enough to avoid overload but short enough
-+	 * to keep scheduler internal stats reasonably up to date.  But
-+	 * first update state to reflect hotplug activity if required.
-+	 */
-+	os = atomic_fetch_add_unless(&twork->state, -1, TICK_SCHED_REMOTE_RUNNING);
-+	WARN_ON_ONCE(os == TICK_SCHED_REMOTE_OFFLINE);
-+	if (os == TICK_SCHED_REMOTE_RUNNING)
-+		queue_delayed_work(system_unbound_wq, dwork, HZ);
-+}
-+
-+static void sched_tick_start(int cpu)
-+{
-+	struct tick_work *twork;
-+	int os;
-+
-+	if (housekeeping_cpu(cpu, HK_FLAG_TICK))
-+		return;
-+
-+	WARN_ON_ONCE(!tick_work_cpu);
-+
-+	twork = per_cpu_ptr(tick_work_cpu, cpu);
-+	os = atomic_xchg(&twork->state, TICK_SCHED_REMOTE_RUNNING);
-+	WARN_ON_ONCE(os == TICK_SCHED_REMOTE_RUNNING);
-+	if (os == TICK_SCHED_REMOTE_OFFLINE) {
-+		twork->cpu = cpu;
-+		INIT_DELAYED_WORK(&twork->work, sched_tick_remote);
-+		queue_delayed_work(system_unbound_wq, &twork->work, HZ);
-+	}
-+}
-+
-+#ifdef CONFIG_HOTPLUG_CPU
-+static void sched_tick_stop(int cpu)
-+{
-+	struct tick_work *twork;
-+	int os;
-+
-+	if (housekeeping_cpu(cpu, HK_FLAG_TICK))
-+		return;
-+
-+	WARN_ON_ONCE(!tick_work_cpu);
-+
-+	twork = per_cpu_ptr(tick_work_cpu, cpu);
-+	/* There cannot be competing actions, but don't rely on stop-machine. */
-+	os = atomic_xchg(&twork->state, TICK_SCHED_REMOTE_OFFLINING);
-+	WARN_ON_ONCE(os != TICK_SCHED_REMOTE_RUNNING);
-+	/* Don't cancel, as this would mess up the state machine. */
-+}
-+#endif /* CONFIG_HOTPLUG_CPU */
-+
-+int __init sched_tick_offload_init(void)
-+{
-+	tick_work_cpu = alloc_percpu(struct tick_work);
-+	BUG_ON(!tick_work_cpu);
-+	return 0;
-+}
-+
-+#else /* !CONFIG_NO_HZ_FULL */
-+static inline void sched_stop_tick(struct rq *rq, int cpu) {}
-+static inline void sched_start_tick(struct rq *rq, int cpu) {}
-+static inline void sched_tick_start(int cpu) { }
-+static inline void sched_tick_stop(int cpu) { }
-+#endif
-+
-+/*
-+ * This function gets called by the timer code, with HZ frequency.
-+ * We call it with interrupts disabled.
-+ */
-+void scheduler_tick(void)
-+{
-+	int cpu __maybe_unused = smp_processor_id();
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	arch_scale_freq_tick();
-+	sched_clock_tick();
-+	update_clocks(rq);
-+	update_load_avg(rq, 0);
-+	update_cpu_clock_tick(rq, rq->curr);
-+	task_tick(rq);
-+	rq->last_scheduler_tick = rq->last_jiffy;
-+	rq->last_tick = rq->clock;
-+	psi_task_tick(rq);
-+	perf_event_task_tick();
-+	sched_stop_tick(rq, cpu);
-+}
-+
-+#if defined(CONFIG_PREEMPTION) && (defined(CONFIG_DEBUG_PREEMPT) || \
-+				defined(CONFIG_TRACE_PREEMPT_TOGGLE))
-+/*
-+ * If the value passed in is equal to the current preempt count
-+ * then we just disabled preemption. Start timing the latency.
-+ */
-+static inline void preempt_latency_start(int val)
-+{
-+	if (preempt_count() == val) {
-+		unsigned long ip = get_lock_parent_ip();
-+#ifdef CONFIG_DEBUG_PREEMPT
-+		current->preempt_disable_ip = ip;
-+#endif
-+		trace_preempt_off(CALLER_ADDR0, ip);
-+	}
-+}
-+
-+void preempt_count_add(int val)
-+{
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	/*
-+	 * Underflow?
-+	 */
-+	if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
-+		return;
-+#endif
-+	__preempt_count_add(val);
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	/*
-+	 * Spinlock count overflowing soon?
-+	 */
-+	DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
-+				PREEMPT_MASK - 10);
-+#endif
-+	preempt_latency_start(val);
-+}
-+EXPORT_SYMBOL(preempt_count_add);
-+NOKPROBE_SYMBOL(preempt_count_add);
-+
-+/*
-+ * If the value passed in equals to the current preempt count
-+ * then we just enabled preemption. Stop timing the latency.
-+ */
-+static inline void preempt_latency_stop(int val)
-+{
-+	if (preempt_count() == val)
-+		trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
-+}
-+
-+void preempt_count_sub(int val)
-+{
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	/*
-+	 * Underflow?
-+	 */
-+	if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
-+		return;
-+	/*
-+	 * Is the spinlock portion underflowing?
-+	 */
-+	if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) &&
-+			!(preempt_count() & PREEMPT_MASK)))
-+		return;
-+#endif
-+
-+	preempt_latency_stop(val);
-+	__preempt_count_sub(val);
-+}
-+EXPORT_SYMBOL(preempt_count_sub);
-+NOKPROBE_SYMBOL(preempt_count_sub);
-+
-+#else
-+static inline void preempt_latency_start(int val) { }
-+static inline void preempt_latency_stop(int val) { }
-+#endif
-+
-+static inline unsigned long get_preempt_disable_ip(struct task_struct *p)
-+{
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	return p->preempt_disable_ip;
-+#else
-+	return 0;
-+#endif
-+}
-+
-+/*
-+ * The time_slice is only refilled when it is empty and that is when we set a
-+ * new deadline. Make sure update_clocks has been called recently to update
-+ * rq->niffies.
-+ */
-+static void time_slice_expired(struct task_struct *p, struct rq *rq)
-+{
-+	p->time_slice = timeslice();
-+	p->deadline = rq->niffies + task_deadline_diff(p);
-+#ifdef CONFIG_SMT_NICE
-+	if (!p->mm)
-+		p->smt_bias = 0;
-+	else if (rt_task(p))
-+		p->smt_bias = 1 << 30;
-+	else if (task_running_iso(p))
-+		p->smt_bias = 1 << 29;
-+	else if (idleprio_task(p)) {
-+		if (task_running_idle(p))
-+			p->smt_bias = 0;
-+		else
-+			p->smt_bias = 1;
-+	} else if (--p->smt_bias < 1)
-+		p->smt_bias = MAX_PRIO - p->static_prio;
-+#endif
-+}
-+
-+/*
-+ * Timeslices below RESCHED_US are considered as good as expired as there's no
-+ * point rescheduling when there's so little time left. SCHED_BATCH tasks
-+ * have been flagged be not latency sensitive and likely to be fully CPU
-+ * bound so every time they're rescheduled they have their time_slice
-+ * refilled, but get a new later deadline to have little effect on
-+ * SCHED_NORMAL tasks.
-+
-+ */
-+static inline void check_deadline(struct task_struct *p, struct rq *rq)
-+{
-+	if (p->time_slice < RESCHED_US || batch_task(p))
-+		time_slice_expired(p, rq);
-+}
-+
-+/*
-+ * Task selection with skiplists is a simple matter of picking off the first
-+ * task in the sorted list, an O(1) operation. The lookup is amortised O(1)
-+ * being bound to the number of processors.
-+ *
-+ * Runqueues are selectively locked based on their unlocked data and then
-+ * unlocked if not needed. At most 3 locks will be held at any time and are
-+ * released as soon as they're no longer needed. All balancing between CPUs
-+ * is thus done here in an extremely simple first come best fit manner.
-+ *
-+ * This iterates over runqueues in cache locality order. In interactive mode
-+ * it iterates over all CPUs and finds the task with the best key/deadline.
-+ * In non-interactive mode it will only take a task if it's from the current
-+ * runqueue or a runqueue with more tasks than the current one with a better
-+ * key/deadline.
-+ */
-+#ifdef CONFIG_SMP
-+static inline struct task_struct
-+*earliest_deadline_task(struct rq *rq, int cpu, struct task_struct *idle)
-+{
-+	struct rq *locked = NULL, *chosen = NULL;
-+	struct task_struct *edt = idle;
-+	int i, best_entries = 0;
-+	u64 best_key = ~0ULL;
-+
-+	for (i = 0; i < total_runqueues; i++) {
-+		struct rq *other_rq = rq_order(rq, i);
-+		skiplist_node *next;
-+		int entries;
-+
-+		entries = other_rq->sl->entries;
-+		/*
-+		 * Check for queued entres lockless first. The local runqueue
-+		 * is locked so entries will always be accurate.
-+		 */
-+		if (!sched_interactive) {
-+			/*
-+			 * Don't reschedule balance across nodes unless the CPU
-+			 * is idle.
-+			 */
-+			if (edt != idle && rq->cpu_locality[other_rq->cpu] > LOCALITY_SMP)
-+				break;
-+			if (entries <= best_entries)
-+				continue;
-+		} else if (!entries)
-+			continue;
-+
-+		/* if (i) implies other_rq != rq */
-+		if (i) {
-+			/* Check for best id queued lockless first */
-+			if (other_rq->best_key >= best_key)
-+				continue;
-+
-+			if (unlikely(!trylock_rq(rq, other_rq)))
-+				continue;
-+
-+			/* Need to reevaluate entries after locking */
-+			entries = other_rq->sl->entries;
-+			if (unlikely(!entries)) {
-+				unlock_rq(other_rq);
-+				continue;
-+			}
-+		}
-+
-+		next = other_rq->node;
-+		/*
-+		 * In interactive mode we check beyond the best entry on other
-+		 * runqueues if we can't get the best for smt or affinity
-+		 * reasons.
-+		 */
-+		while ((next = next->next[0]) != other_rq->node) {
-+			struct task_struct *p;
-+			u64 key = next->key;
-+
-+			/* Reevaluate key after locking */
-+			if (key >= best_key)
-+				break;
-+
-+			p = next->value;
-+			if (!smt_schedule(p, rq)) {
-+				if (i && !sched_interactive)
-+					break;
-+				continue;
-+			}
-+
-+			if (sched_other_cpu(p, cpu)) {
-+				if (sched_interactive || !i)
-+					continue;
-+				break;
-+			}
-+			/* Make sure affinity is ok */
-+			if (i) {
-+				/* From this point on p is the best so far */
-+				if (locked)
-+					unlock_rq(locked);
-+				chosen = locked = other_rq;
-+			}
-+			best_entries = entries;
-+			best_key = key;
-+			edt = p;
-+			break;
-+		}
-+		/* rq->preempting is a hint only as the state may have changed
-+		 * since it was set with the resched call but if we have met
-+		 * the condition we can break out here. */
-+		if (edt == rq->preempting)
-+			break;
-+		if (i && other_rq != chosen)
-+			unlock_rq(other_rq);
-+	}
-+
-+	if (likely(edt != idle))
-+		take_task(rq, cpu, edt);
-+
-+	if (locked)
-+		unlock_rq(locked);
-+
-+	rq->preempting = NULL;
-+
-+	return edt;
-+}
-+#else /* CONFIG_SMP */
-+static inline struct task_struct
-+*earliest_deadline_task(struct rq *rq, int cpu, struct task_struct *idle)
-+{
-+	struct task_struct *edt;
-+
-+	if (unlikely(!rq->sl->entries))
-+		return idle;
-+	edt = rq->node->next[0]->value;
-+	take_task(rq, cpu, edt);
-+	return edt;
-+}
-+#endif /* CONFIG_SMP */
-+
-+/*
-+ * Print scheduling while atomic bug:
-+ */
-+static noinline void __schedule_bug(struct task_struct *prev)
-+{
-+	/* Save this before calling printk(), since that will clobber it */
-+	unsigned long preempt_disable_ip = get_preempt_disable_ip(current);
-+
-+	if (oops_in_progress)
-+		return;
-+
-+	printk(KERN_ERR "BUG: scheduling while atomic: %s/%d/0x%08x\n",
-+		prev->comm, prev->pid, preempt_count());
-+
-+	debug_show_held_locks(prev);
-+	print_modules();
-+	if (irqs_disabled())
-+		print_irqtrace_events(prev);
-+	if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)
-+	    && in_atomic_preempt_off()) {
-+		pr_err("Preemption disabled at:");
-+		print_ip_sym(KERN_ERR, preempt_disable_ip);
-+	}
-+	dump_stack();
-+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+}
-+
-+/*
-+ * Various schedule()-time debugging checks and statistics:
-+ */
-+static inline void schedule_debug(struct task_struct *prev, bool preempt)
-+{
-+#ifdef CONFIG_SCHED_STACK_END_CHECK
-+	if (task_stack_end_corrupted(prev))
-+		panic("corrupted stack end detected inside scheduler\n");
-+
-+	if (task_scs_end_corrupted(prev))
-+		panic("corrupted shadow stack detected inside scheduler\n");
-+#endif
-+
-+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-+	if (!preempt && prev->state && prev->non_block_count) {
-+		printk(KERN_ERR "BUG: scheduling in a non-blocking section: %s/%d/%i\n",
-+			prev->comm, prev->pid, prev->non_block_count);
-+		dump_stack();
-+		add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+	}
-+#endif
-+
-+	if (unlikely(in_atomic_preempt_off())) {
-+		__schedule_bug(prev);
-+		preempt_count_set(PREEMPT_DISABLED);
-+	}
-+	rcu_sleep_check();
-+
-+	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
-+
-+	schedstat_inc(this_rq()->sched_count);
-+}
-+
-+/*
-+ * The currently running task's information is all stored in rq local data
-+ * which is only modified by the local CPU.
-+ */
-+static inline void set_rq_task(struct rq *rq, struct task_struct *p)
-+{
-+	if (p == rq->idle || p->policy == SCHED_FIFO)
-+		hrexpiry_clear(rq);
-+	else
-+		hrexpiry_start(rq, US_TO_NS(p->time_slice));
-+	if (rq->clock - rq->last_tick > HALF_JIFFY_NS)
-+		rq->dither = 0;
-+	else
-+		rq->dither = rq_dither(rq);
-+
-+	rq->rq_deadline = p->deadline;
-+	rq->rq_prio = p->prio;
-+#ifdef CONFIG_SMT_NICE
-+	rq->rq_mm = p->mm;
-+	rq->rq_smt_bias = p->smt_bias;
-+#endif
-+}
-+
-+#ifdef CONFIG_SMT_NICE
-+static void check_no_siblings(struct rq __maybe_unused *this_rq) {}
-+static void wake_no_siblings(struct rq __maybe_unused *this_rq) {}
-+static void (*check_siblings)(struct rq *this_rq) = &check_no_siblings;
-+static void (*wake_siblings)(struct rq *this_rq) = &wake_no_siblings;
-+
-+/* Iterate over smt siblings when we've scheduled a process on cpu and decide
-+ * whether they should continue running or be descheduled. */
-+static void check_smt_siblings(struct rq *this_rq)
-+{
-+	int other_cpu;
-+
-+	for_each_cpu(other_cpu, &this_rq->thread_mask) {
-+		struct task_struct *p;
-+		struct rq *rq;
-+
-+		rq = cpu_rq(other_cpu);
-+		if (rq_idle(rq))
-+			continue;
-+		p = rq->curr;
-+		if (!smt_schedule(p, this_rq))
-+			resched_curr(rq);
-+	}
-+}
-+
-+static void wake_smt_siblings(struct rq *this_rq)
-+{
-+	int other_cpu;
-+
-+	for_each_cpu(other_cpu, &this_rq->thread_mask) {
-+		struct rq *rq;
-+
-+		rq = cpu_rq(other_cpu);
-+		if (rq_idle(rq))
-+			resched_idle(rq);
-+	}
-+}
-+#else
-+static void check_siblings(struct rq __maybe_unused *this_rq) {}
-+static void wake_siblings(struct rq __maybe_unused *this_rq) {}
-+#endif
-+
-+/*
-+ * schedule() is the main scheduler function.
-+ *
-+ * The main means of driving the scheduler and thus entering this function are:
-+ *
-+ *   1. Explicit blocking: mutex, semaphore, waitqueue, etc.
-+ *
-+ *   2. TIF_NEED_RESCHED flag is checked on interrupt and userspace return
-+ *      paths. For example, see arch/x86/entry_64.S.
-+ *
-+ *      To drive preemption between tasks, the scheduler sets the flag in timer
-+ *      interrupt handler scheduler_tick().
-+ *
-+ *   3. Wakeups don't really cause entry into schedule(). They add a
-+ *      task to the run-queue and that's it.
-+ *
-+ *      Now, if the new task added to the run-queue preempts the current
-+ *      task, then the wakeup sets TIF_NEED_RESCHED and schedule() gets
-+ *      called on the nearest possible occasion:
-+ *
-+ *       - If the kernel is preemptible (CONFIG_PREEMPTION=y):
-+ *
-+ *         - in syscall or exception context, at the next outmost
-+ *           preempt_enable(). (this might be as soon as the wake_up()'s
-+ *           spin_unlock()!)
-+ *
-+ *         - in IRQ context, return from interrupt-handler to
-+ *           preemptible context
-+ *
-+ *       - If the kernel is not preemptible (CONFIG_PREEMPTION is not set)
-+ *         then at the next:
-+ *
-+ *          - cond_resched() call
-+ *          - explicit schedule() call
-+ *          - return from syscall or exception to user-space
-+ *          - return from interrupt-handler to user-space
-+ *
-+ * WARNING: must be called with preemption disabled!
-+ */
-+static void __sched notrace __schedule(bool preempt)
-+{
-+	struct task_struct *prev, *next, *idle;
-+	unsigned long *switch_count;
-+	unsigned long prev_state;
-+	bool deactivate = false;
-+	struct rq *rq;
-+	u64 niffies;
-+	int cpu;
-+
-+	cpu = smp_processor_id();
-+	rq = cpu_rq(cpu);
-+	prev = rq->curr;
-+	idle = rq->idle;
-+
-+	schedule_debug(prev, preempt);
-+
-+	local_irq_disable();
-+	rcu_note_context_switch(preempt);
-+
-+	/*
-+	 * Make sure that signal_pending_state()->signal_pending() below
-+	 * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
-+	 * done by the caller to avoid the race with signal_wake_up():
-+	 *
-+	 * __set_current_state(@state)		signal_wake_up()
-+	 * schedule()				  set_tsk_thread_flag(p, TIF_SIGPENDING)
-+	 *					  wake_up_state(p, state)
-+	 *   LOCK rq->lock			    LOCK p->pi_state
-+	 *   smp_mb__after_spinlock()		    smp_mb__after_spinlock()
-+	 *     if (signal_pending_state())	    if (p->state & @state)
-+	 *
-+	 * Also, the membarrier system call requires a full memory barrier
-+	 * after coming from user-space, before storing to rq->curr.
-+	 */
-+	rq_lock(rq);
-+	smp_mb__after_spinlock();
-+#ifdef CONFIG_SMP
-+	if (rq->preempt) {
-+		/*
-+		 * Make sure resched_curr hasn't triggered a preemption
-+		 * locklessly on a task that has since scheduled away. Spurious
-+		 * wakeup of idle is okay though.
-+		 */
-+		if (unlikely(preempt && prev != idle && !test_tsk_need_resched(prev))) {
-+			rq->preempt = NULL;
-+			clear_preempt_need_resched();
-+			rq_unlock_irq(rq, NULL);
-+			return;
-+		}
-+		rq->preempt = NULL;
-+	}
-+#endif
-+
-+	switch_count = &prev->nivcsw;
-+
-+	/*
-+	 * We must load prev->state once (task_struct::state is volatile), such
-+	 * that:
-+	 *
-+	 *  - we form a control dependency vs deactivate_task() below.
-+	 *  - ptrace_{,un}freeze_traced() can change ->state underneath us.
-+	 */
-+	prev_state = prev->state;
-+	if (!preempt && prev_state) {
-+		if (signal_pending_state(prev_state, prev)) {
-+			prev->state = TASK_RUNNING;
-+		} else {
-+			prev->sched_contributes_to_load =
-+				(prev_state & TASK_UNINTERRUPTIBLE) &&
-+				!(prev_state & TASK_NOLOAD) &&
-+				!(prev->flags & PF_FROZEN);
-+
-+			if (prev->sched_contributes_to_load)
-+				rq->nr_uninterruptible++;
-+
-+			/*
-+			 * __schedule()			ttwu()
-+			 *   prev_state = prev->state;    if (p->on_rq && ...)
-+			 *   if (prev_state)		    goto out;
-+			 *     p->on_rq = 0;		  smp_acquire__after_ctrl_dep();
-+			 *				  p->state = TASK_WAKING
-+			 *
-+			 * Where __schedule() and ttwu() have matching control dependencies.
-+			 *
-+			 * After this, schedule() must not care about p->state any more.
-+			 */
-+			deactivate = true;
-+
-+			if (prev->in_iowait) {
-+				atomic_inc(&rq->nr_iowait);
-+				delayacct_blkio_start();
-+			}
-+		}
-+		switch_count = &prev->nvcsw;
-+	}
-+
-+	/*
-+	 * Store the niffy value here for use by the next task's last_ran
-+	 * below to avoid losing niffies due to update_clocks being called
-+	 * again after this point.
-+	 */
-+	update_clocks(rq);
-+	niffies = rq->niffies;
-+	update_cpu_clock_switch(rq, prev);
-+
-+	clear_tsk_need_resched(prev);
-+	clear_preempt_need_resched();
-+
-+	if (idle != prev) {
-+		check_deadline(prev, rq);
-+		return_task(prev, rq, cpu, deactivate);
-+	}
-+
-+	next = earliest_deadline_task(rq, cpu, idle);
-+	if (likely(next->prio != PRIO_LIMIT))
-+		clear_cpuidle_map(cpu);
-+	else {
-+		set_cpuidle_map(cpu);
-+		update_load_avg(rq, 0);
-+	}
-+
-+	set_rq_task(rq, next);
-+	next->last_ran = niffies;
-+
-+	if (likely(prev != next)) {
-+		/*
-+		 * Don't reschedule an idle task or deactivated tasks
-+		 */
-+		if (prev == idle) {
-+			inc_nr_running(rq);
-+			if (rt_task(next))
-+				rq->rt_nr_running++;
-+		} else if (!deactivate)
-+			resched_suitable_idle(prev);
-+		if (unlikely(next == idle)) {
-+			dec_nr_running(rq);
-+			if (rt_task(prev))
-+				rq->rt_nr_running--;
-+			wake_siblings(rq);
-+		} else
-+			check_siblings(rq);
-+		rq->nr_switches++;
-+		/*
-+		 * RCU users of rcu_dereference(rq->curr) may not see
-+		 * changes to task_struct made by pick_next_task().
-+		 */
-+		RCU_INIT_POINTER(rq->curr, next);
-+		/*
-+		 * The membarrier system call requires each architecture
-+		 * to have a full memory barrier after updating
-+		 * rq->curr, before returning to user-space.
-+		 *
-+		 * Here are the schemes providing that barrier on the
-+		 * various architectures:
-+		 * - mm ? switch_mm() : mmdrop() for x86, s390, sparc, PowerPC.
-+		 *   switch_mm() rely on membarrier_arch_switch_mm() on PowerPC.
-+		 * - finish_lock_switch() for weakly-ordered
-+		 *   architectures where spin_unlock is a full barrier,
-+		 * - switch_to() for arm64 (weakly-ordered, spin_unlock
-+		 *   is a RELEASE barrier),
-+		 */
-+		++*switch_count;
-+
-+		psi_sched_switch(prev, next, !task_on_rq_queued(prev));
-+
-+		trace_sched_switch(preempt, prev, next);
-+		context_switch(rq, prev, next); /* unlocks the rq */
-+	} else {
-+		check_siblings(rq);
-+		rq_unlock(rq);
-+		local_irq_enable();
-+	}
-+}
-+
-+void __noreturn do_task_dead(void)
-+{
-+	/* Causes final put_task_struct in finish_task_switch(). */
-+	set_special_state(TASK_DEAD);
-+
-+	/* Tell freezer to ignore us: */
-+	current->flags |= PF_NOFREEZE;
-+	__schedule(false);
-+	BUG();
-+
-+	/* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */
-+	for (;;)
-+		cpu_relax();
-+}
-+
-+static inline void sched_submit_work(struct task_struct *tsk)
-+{
-+	if (!tsk->state)
-+		return;
-+
-+	/*
-+	 * If a worker went to sleep, notify and ask workqueue whether
-+	 * it wants to wake up a task to maintain concurrency.
-+	 * As this function is called inside the schedule() context,
-+	 * we disable preemption to avoid it calling schedule() again
-+	 * in the possible wakeup of a kworker and because wq_worker_sleeping()
-+	 * requires it.
-+	 */
-+	if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) {
-+		preempt_disable();
-+		if (tsk->flags & PF_WQ_WORKER)
-+			wq_worker_sleeping(tsk);
-+		else
-+			io_wq_worker_sleeping(tsk);
-+		preempt_enable_no_resched();
-+	}
-+
-+	if (tsk_is_pi_blocked(tsk))
-+		return;
-+
-+	/*
-+	 * If we are going to sleep and we have plugged IO queued,
-+	 * make sure to submit it to avoid deadlocks.
-+	 */
-+	if (blk_needs_flush_plug(tsk))
-+		blk_schedule_flush_plug(tsk);
-+}
-+
-+static inline void sched_update_worker(struct task_struct *tsk)
-+{
-+	if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) {
-+		if (tsk->flags & PF_WQ_WORKER)
-+			wq_worker_running(tsk);
-+		else
-+			io_wq_worker_running(tsk);
-+	}
-+}
-+
-+asmlinkage __visible void __sched schedule(void)
-+{
-+	struct task_struct *tsk = current;
-+
-+	sched_submit_work(tsk);
-+	do {
-+		preempt_disable();
-+		__schedule(false);
-+		sched_preempt_enable_no_resched();
-+	} while (need_resched());
-+	sched_update_worker(tsk);
-+}
-+
-+EXPORT_SYMBOL(schedule);
-+
-+/*
-+ * synchronize_rcu_tasks() makes sure that no task is stuck in preempted
-+ * state (have scheduled out non-voluntarily) by making sure that all
-+ * tasks have either left the run queue or have gone into user space.
-+ * As idle tasks do not do either, they must not ever be preempted
-+ * (schedule out non-voluntarily).
-+ *
-+ * schedule_idle() is similar to schedule_preempt_disable() except that it
-+ * never enables preemption because it does not call sched_submit_work().
-+ */
-+void __sched schedule_idle(void)
-+{
-+	/*
-+	 * As this skips calling sched_submit_work(), which the idle task does
-+	 * regardless because that function is a nop when the task is in a
-+	 * TASK_RUNNING state, make sure this isn't used someplace that the
-+	 * current task can be in any other state. Note, idle is always in the
-+	 * TASK_RUNNING state.
-+	 */
-+	WARN_ON_ONCE(current->state);
-+	do {
-+		__schedule(false);
-+	} while (need_resched());
-+}
-+
-+#ifdef CONFIG_CONTEXT_TRACKING
-+asmlinkage __visible void __sched schedule_user(void)
-+{
-+	/*
-+	 * If we come here after a random call to set_need_resched(),
-+	 * or we have been woken up remotely but the IPI has not yet arrived,
-+	 * we haven't yet exited the RCU idle mode. Do it here manually until
-+	 * we find a better solution.
-+	 *
-+	 * NB: There are buggy callers of this function.  Ideally we
-+	 * should warn if prev_state != IN_USER, but that will trigger
-+	 * too frequently to make sense yet.
-+	 */
-+	enum ctx_state prev_state = exception_enter();
-+	schedule();
-+	exception_exit(prev_state);
-+}
-+#endif
-+
-+/**
-+ * schedule_preempt_disabled - called with preemption disabled
-+ *
-+ * Returns with preemption disabled. Note: preempt_count must be 1
-+ */
-+void __sched schedule_preempt_disabled(void)
-+{
-+	sched_preempt_enable_no_resched();
-+	schedule();
-+	preempt_disable();
-+}
-+
-+static void __sched notrace preempt_schedule_common(void)
-+{
-+	do {
-+		/*
-+		 * Because the function tracer can trace preempt_count_sub()
-+		 * and it also uses preempt_enable/disable_notrace(), if
-+		 * NEED_RESCHED is set, the preempt_enable_notrace() called
-+		 * by the function tracer will call this function again and
-+		 * cause infinite recursion.
-+		 *
-+		 * Preemption must be disabled here before the function
-+		 * tracer can trace. Break up preempt_disable() into two
-+		 * calls. One to disable preemption without fear of being
-+		 * traced. The other to still record the preemption latency,
-+		 * which can also be traced by the function tracer.
-+		 */
-+		preempt_disable_notrace();
-+		preempt_latency_start(1);
-+		__schedule(true);
-+		preempt_latency_stop(1);
-+		preempt_enable_no_resched_notrace();
-+
-+		/*
-+		 * Check again in case we missed a preemption opportunity
-+		 * between schedule and now.
-+		 */
-+	} while (need_resched());
-+}
-+
-+#ifdef CONFIG_PREEMPTION
-+/*
-+ * This is the entry point to schedule() from in-kernel preemption
-+ * off of preempt_enable.
-+ */
-+asmlinkage __visible void __sched notrace preempt_schedule(void)
-+{
-+	/*
-+	 * If there is a non-zero preempt_count or interrupts are disabled,
-+	 * we do not want to preempt the current task. Just return..
-+	 */
-+	if (likely(!preemptible()))
-+		return;
-+
-+	preempt_schedule_common();
-+}
-+NOKPROBE_SYMBOL(preempt_schedule);
-+EXPORT_SYMBOL(preempt_schedule);
-+
-+/**
-+ * preempt_schedule_notrace - preempt_schedule called by tracing
-+ *
-+ * The tracing infrastructure uses preempt_enable_notrace to prevent
-+ * recursion and tracing preempt enabling caused by the tracing
-+ * infrastructure itself. But as tracing can happen in areas coming
-+ * from userspace or just about to enter userspace, a preempt enable
-+ * can occur before user_exit() is called. This will cause the scheduler
-+ * to be called when the system is still in usermode.
-+ *
-+ * To prevent this, the preempt_enable_notrace will use this function
-+ * instead of preempt_schedule() to exit user context if needed before
-+ * calling the scheduler.
-+ */
-+asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
-+{
-+	enum ctx_state prev_ctx;
-+
-+	if (likely(!preemptible()))
-+		return;
-+
-+	do {
-+		/*
-+		 * Because the function tracer can trace preempt_count_sub()
-+		 * and it also uses preempt_enable/disable_notrace(), if
-+		 * NEED_RESCHED is set, the preempt_enable_notrace() called
-+		 * by the function tracer will call this function again and
-+		 * cause infinite recursion.
-+		 *
-+		 * Preemption must be disabled here before the function
-+		 * tracer can trace. Break up preempt_disable() into two
-+		 * calls. One to disable preemption without fear of being
-+		 * traced. The other to still record the preemption latency,
-+		 * which can also be traced by the function tracer.
-+		 */
-+		preempt_disable_notrace();
-+		preempt_latency_start(1);
-+		/*
-+		 * Needs preempt disabled in case user_exit() is traced
-+		 * and the tracer calls preempt_enable_notrace() causing
-+		 * an infinite recursion.
-+		 */
-+		prev_ctx = exception_enter();
-+		__schedule(true);
-+		exception_exit(prev_ctx);
-+
-+		preempt_latency_stop(1);
-+		preempt_enable_no_resched_notrace();
-+	} while (need_resched());
-+}
-+EXPORT_SYMBOL_GPL(preempt_schedule_notrace);
-+
-+#endif /* CONFIG_PREEMPTION */
-+
-+/*
-+ * This is the entry point to schedule() from kernel preemption
-+ * off of irq context.
-+ * Note, that this is called and return with irqs disabled. This will
-+ * protect us against recursive calling from irq.
-+ */
-+asmlinkage __visible void __sched preempt_schedule_irq(void)
-+{
-+	enum ctx_state prev_state;
-+
-+	/* Catch callers which need to be fixed */
-+	BUG_ON(preempt_count() || !irqs_disabled());
-+
-+	prev_state = exception_enter();
-+
-+	do {
-+		preempt_disable();
-+		local_irq_enable();
-+		__schedule(true);
-+		local_irq_disable();
-+		sched_preempt_enable_no_resched();
-+	} while (need_resched());
-+
-+	exception_exit(prev_state);
-+}
-+
-+int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags,
-+			  void *key)
-+{
-+	WARN_ON_ONCE(IS_ENABLED(CONFIG_SCHED_DEBUG) && wake_flags & ~WF_SYNC);
-+	return try_to_wake_up(curr->private, mode, wake_flags);
-+}
-+EXPORT_SYMBOL(default_wake_function);
-+
-+#ifdef CONFIG_RT_MUTEXES
-+
-+static inline int __rt_effective_prio(struct task_struct *pi_task, int prio)
-+{
-+	if (pi_task)
-+		prio = min(prio, pi_task->prio);
-+
-+	return prio;
-+}
-+
-+static inline int rt_effective_prio(struct task_struct *p, int prio)
-+{
-+	struct task_struct *pi_task = rt_mutex_get_top_task(p);
-+
-+	return __rt_effective_prio(pi_task, prio);
-+}
-+
-+/*
-+ * rt_mutex_setprio - set the current priority of a task
-+ * @p: task to boost
-+ * @pi_task: donor task
-+ *
-+ * This function changes the 'effective' priority of a task. It does
-+ * not touch ->normal_prio like __setscheduler().
-+ *
-+ * Used by the rt_mutex code to implement priority inheritance
-+ * logic. Call site only calls if the priority of the task changed.
-+ */
-+void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
-+{
-+	int prio, oldprio;
-+	struct rq *rq;
-+
-+	/* XXX used to be waiter->prio, not waiter->task->prio */
-+	prio = __rt_effective_prio(pi_task, p->normal_prio);
-+
-+	/*
-+	 * If nothing changed; bail early.
-+	 */
-+	if (p->pi_top_task == pi_task && prio == p->prio)
-+		return;
-+
-+	rq = __task_rq_lock(p, NULL);
-+	update_rq_clock(rq);
-+	/*
-+	 * Set under pi_lock && rq->lock, such that the value can be used under
-+	 * either lock.
-+	 *
-+	 * Note that there is loads of tricky to make this pointer cache work
-+	 * right. rt_mutex_slowunlock()+rt_mutex_postunlock() work together to
-+	 * ensure a task is de-boosted (pi_task is set to NULL) before the
-+	 * task is allowed to run again (and can exit). This ensures the pointer
-+	 * points to a blocked task -- which guaratees the task is present.
-+	 */
-+	p->pi_top_task = pi_task;
-+
-+	/*
-+	 * For FIFO/RR we only need to set prio, if that matches we're done.
-+	 */
-+	if (prio == p->prio)
-+		goto out_unlock;
-+
-+	/*
-+	 * Idle task boosting is a nono in general. There is one
-+	 * exception, when PREEMPT_RT and NOHZ is active:
-+	 *
-+	 * The idle task calls get_next_timer_interrupt() and holds
-+	 * the timer wheel base->lock on the CPU and another CPU wants
-+	 * to access the timer (probably to cancel it). We can safely
-+	 * ignore the boosting request, as the idle CPU runs this code
-+	 * with interrupts disabled and will complete the lock
-+	 * protected section without being interrupted. So there is no
-+	 * real need to boost.
-+	 */
-+	if (unlikely(p == rq->idle)) {
-+		WARN_ON(p != rq->curr);
-+		WARN_ON(p->pi_blocked_on);
-+		goto out_unlock;
-+	}
-+
-+	trace_sched_pi_setprio(p, pi_task);
-+	oldprio = p->prio;
-+	p->prio = prio;
-+	if (task_running(rq, p)){
-+		if (prio > oldprio)
-+			resched_task(p);
-+	} else if (task_queued(p)) {
-+		dequeue_task(rq, p, DEQUEUE_SAVE);
-+		enqueue_task(rq, p, ENQUEUE_RESTORE);
-+		if (prio < oldprio)
-+			try_preempt(p, rq);
-+	}
-+out_unlock:
-+	__task_rq_unlock(rq, NULL);
-+}
-+#else
-+static inline int rt_effective_prio(struct task_struct *p, int prio)
-+{
-+	return prio;
-+}
-+#endif
-+
-+/*
-+ * Adjust the deadline for when the priority is to change, before it's
-+ * changed.
-+ */
-+static inline void adjust_deadline(struct task_struct *p, int new_prio)
-+{
-+	p->deadline += static_deadline_diff(new_prio) - task_deadline_diff(p);
-+}
-+
-+void set_user_nice(struct task_struct *p, long nice)
-+{
-+	int new_static, old_static;
-+	struct rq_flags rf;
-+	struct rq *rq;
-+
-+	if (task_nice(p) == nice || nice < MIN_NICE || nice > MAX_NICE)
-+		return;
-+	new_static = NICE_TO_PRIO(nice);
-+	/*
-+	 * We have to be careful, if called from sys_setpriority(),
-+	 * the task might be in the middle of scheduling on another CPU.
-+	 */
-+	rq = task_rq_lock(p, &rf);
-+	update_rq_clock(rq);
-+
-+	/*
-+	 * The RT priorities are set via sched_setscheduler(), but we still
-+	 * allow the 'normal' nice value to be set - but as expected
-+	 * it wont have any effect on scheduling until the task is
-+	 * not SCHED_NORMAL/SCHED_BATCH:
-+	 */
-+	if (has_rt_policy(p)) {
-+		p->static_prio = new_static;
-+		goto out_unlock;
-+	}
-+
-+	adjust_deadline(p, new_static);
-+	old_static = p->static_prio;
-+	p->static_prio = new_static;
-+	p->prio = effective_prio(p);
-+
-+	if (task_queued(p)) {
-+		dequeue_task(rq, p, DEQUEUE_SAVE);
-+		enqueue_task(rq, p, ENQUEUE_RESTORE);
-+		if (new_static < old_static)
-+			try_preempt(p, rq);
-+	} else if (task_running(rq, p)) {
-+		set_rq_task(rq, p);
-+		if (old_static < new_static)
-+			resched_task(p);
-+	}
-+out_unlock:
-+	task_rq_unlock(rq, p, &rf);
-+}
-+EXPORT_SYMBOL(set_user_nice);
-+
-+/*
-+ * can_nice - check if a task can reduce its nice value
-+ * @p: task
-+ * @nice: nice value
-+ */
-+int can_nice(const struct task_struct *p, const int nice)
-+{
-+	/* Convert nice value [19,-20] to rlimit style value [1,40] */
-+	int nice_rlim = nice_to_rlimit(nice);
-+
-+	return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) ||
-+		capable(CAP_SYS_NICE));
-+}
-+
-+#ifdef __ARCH_WANT_SYS_NICE
-+
-+/*
-+ * sys_nice - change the priority of the current process.
-+ * @increment: priority increment
-+ *
-+ * sys_setpriority is a more generic, but much slower function that
-+ * does similar things.
-+ */
-+SYSCALL_DEFINE1(nice, int, increment)
-+{
-+	long nice, retval;
-+
-+	/*
-+	 * Setpriority might change our priority at the same moment.
-+	 * We don't have to worry. Conceptually one call occurs first
-+	 * and we have a single winner.
-+	 */
-+
-+	increment = clamp(increment, -NICE_WIDTH, NICE_WIDTH);
-+	nice = task_nice(current) + increment;
-+
-+	nice = clamp_val(nice, MIN_NICE, MAX_NICE);
-+	if (increment < 0 && !can_nice(current, nice))
-+		return -EPERM;
-+
-+	retval = security_task_setnice(current, nice);
-+	if (retval)
-+		return retval;
-+
-+	set_user_nice(current, nice);
-+	return 0;
-+}
-+
-+#endif
-+
-+/**
-+ * task_prio - return the priority value of a given task.
-+ * @p: the task in question.
-+ *
-+ * Return: The priority value as seen by users in /proc.
-+ * RT tasks are offset by -100. Normal tasks are centered around 1, value goes
-+ * from 0 (SCHED_ISO) up to 82 (nice +19 SCHED_IDLEPRIO).
-+ */
-+int task_prio(const struct task_struct *p)
-+{
-+	int delta, prio = p->prio - MAX_RT_PRIO;
-+
-+	/* rt tasks and iso tasks */
-+	if (prio <= 0)
-+		goto out;
-+
-+	/* Convert to ms to avoid overflows */
-+	delta = NS_TO_MS(p->deadline - task_rq(p)->niffies);
-+	if (unlikely(delta < 0))
-+		delta = 0;
-+	delta = delta * 40 / ms_longest_deadline_diff();
-+	if (delta <= 80)
-+		prio += delta;
-+	if (idleprio_task(p))
-+		prio += 40;
-+out:
-+	return prio;
-+}
-+
-+/**
-+ * idle_cpu - is a given CPU idle currently?
-+ * @cpu: the processor in question.
-+ *
-+ * Return: 1 if the CPU is currently idle. 0 otherwise.
-+ */
-+int idle_cpu(int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	if (rq->curr != rq->idle)
-+		return 0;
-+
-+	if (rq->nr_running)
-+		return 0;
-+
-+#ifdef CONFIG_SMP
-+	if (rq->ttwu_pending)
-+		return 0;
-+#endif
-+
-+	return 1;
-+}
-+
-+/**
-+ * available_idle_cpu - is a given CPU idle for enqueuing work.
-+ * @cpu: the CPU in question.
-+ *
-+ * Return: 1 if the CPU is currently idle. 0 otherwise.
-+ */
-+int available_idle_cpu(int cpu)
-+{
-+	if (!idle_cpu(cpu))
-+		return 0;
-+
-+	if (vcpu_is_preempted(cpu))
-+		return 0;
-+
-+	return 1;
-+}
-+
-+/**
-+ * idle_task - return the idle task for a given CPU.
-+ * @cpu: the processor in question.
-+ *
-+ * Return: The idle task for the CPU @cpu.
-+ */
-+struct task_struct *idle_task(int cpu)
-+{
-+	return cpu_rq(cpu)->idle;
-+}
-+
-+/**
-+ * find_process_by_pid - find a process with a matching PID value.
-+ * @pid: the pid in question.
-+ *
-+ * The task of @pid, if found. %NULL otherwise.
-+ */
-+static inline struct task_struct *find_process_by_pid(pid_t pid)
-+{
-+	return pid ? find_task_by_vpid(pid) : current;
-+}
-+
-+/* Actually do priority change: must hold rq lock. */
-+static void __setscheduler(struct task_struct *p, struct rq *rq, int policy,
-+			   int prio, const struct sched_attr *attr,
-+			   bool keep_boost)
-+{
-+	int oldrtprio, oldprio;
-+
-+	/*
-+	 * If params can't change scheduling class changes aren't allowed
-+	 * either.
-+	 */
-+	if (attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)
-+		return;
-+
-+	p->policy = policy;
-+	oldrtprio = p->rt_priority;
-+	p->rt_priority = prio;
-+	p->normal_prio = normal_prio(p);
-+	oldprio = p->prio;
-+	/*
-+	 * Keep a potential priority boosting if called from
-+	 * sched_setscheduler().
-+	 */
-+	p->prio = normal_prio(p);
-+	if (keep_boost)
-+		p->prio = rt_effective_prio(p, p->prio);
-+
-+	if (task_running(rq, p)) {
-+		set_rq_task(rq, p);
-+		resched_task(p);
-+	} else if (task_queued(p)) {
-+		dequeue_task(rq, p, DEQUEUE_SAVE);
-+		enqueue_task(rq, p, ENQUEUE_RESTORE);
-+		if (p->prio < oldprio || p->rt_priority > oldrtprio)
-+			try_preempt(p, rq);
-+	}
-+}
-+
-+/*
-+ * Check the target process has a UID that matches the current process's
-+ */
-+static bool check_same_owner(struct task_struct *p)
-+{
-+	const struct cred *cred = current_cred(), *pcred;
-+	bool match;
-+
-+	rcu_read_lock();
-+	pcred = __task_cred(p);
-+	match = (uid_eq(cred->euid, pcred->euid) ||
-+		 uid_eq(cred->euid, pcred->uid));
-+	rcu_read_unlock();
-+	return match;
-+}
-+
-+static int __sched_setscheduler(struct task_struct *p,
-+				const struct sched_attr *attr,
-+				bool user, bool pi)
-+{
-+	int retval, policy = attr->sched_policy, oldpolicy = -1, priority = attr->sched_priority;
-+	unsigned long rlim_rtprio = 0;
-+	struct rq_flags rf;
-+	int reset_on_fork;
-+	struct rq *rq;
-+
-+	/* The pi code expects interrupts enabled */
-+	BUG_ON(pi && in_interrupt());
-+
-+	if (is_rt_policy(policy) && !capable(CAP_SYS_NICE)) {
-+		unsigned long lflags;
-+
-+		if (!lock_task_sighand(p, &lflags))
-+			return -ESRCH;
-+		rlim_rtprio = task_rlimit(p, RLIMIT_RTPRIO);
-+		unlock_task_sighand(p, &lflags);
-+		if (rlim_rtprio)
-+			goto recheck;
-+		/*
-+		 * If the caller requested an RT policy without having the
-+		 * necessary rights, we downgrade the policy to SCHED_ISO.
-+		 * We also set the parameter to zero to pass the checks.
-+		 */
-+		policy = SCHED_ISO;
-+		priority = 0;
-+	}
-+recheck:
-+	/* Double check policy once rq lock held */
-+	if (policy < 0) {
-+		reset_on_fork = p->sched_reset_on_fork;
-+		policy = oldpolicy = p->policy;
-+	} else {
-+		reset_on_fork = !!(policy & SCHED_RESET_ON_FORK);
-+		policy &= ~SCHED_RESET_ON_FORK;
-+
-+		if (!SCHED_RANGE(policy))
-+			return -EINVAL;
-+	}
-+
-+	if (attr->sched_flags & ~(SCHED_FLAG_ALL | SCHED_FLAG_SUGOV))
-+		return -EINVAL;
-+
-+	/*
-+	 * Valid priorities for SCHED_FIFO and SCHED_RR are
-+	 * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL and
-+	 * SCHED_BATCH is 0.
-+	 */
-+	if (priority < 0 ||
-+	    (p->mm && priority > MAX_USER_RT_PRIO - 1) ||
-+	    (!p->mm && priority > MAX_RT_PRIO - 1))
-+		return -EINVAL;
-+	if (is_rt_policy(policy) != (priority != 0))
-+		return -EINVAL;
-+
-+	/*
-+	 * Allow unprivileged RT tasks to decrease priority:
-+	 */
-+	if (user && !capable(CAP_SYS_NICE)) {
-+		if (is_rt_policy(policy)) {
-+			unsigned long rlim_rtprio =
-+					task_rlimit(p, RLIMIT_RTPRIO);
-+
-+			/* Can't set/change the rt policy */
-+			if (policy != p->policy && !rlim_rtprio)
-+				return -EPERM;
-+
-+			/* Can't increase priority */
-+			if (priority > p->rt_priority &&
-+			    priority > rlim_rtprio)
-+				return -EPERM;
-+		} else {
-+			switch (p->policy) {
-+				/*
-+				 * Can only downgrade policies but not back to
-+				 * SCHED_NORMAL
-+				 */
-+				case SCHED_ISO:
-+					if (policy == SCHED_ISO)
-+						goto out;
-+					if (policy != SCHED_NORMAL)
-+						return -EPERM;
-+					break;
-+				case SCHED_BATCH:
-+					if (policy == SCHED_BATCH)
-+						goto out;
-+					if (policy != SCHED_IDLEPRIO)
-+						return -EPERM;
-+					break;
-+				case SCHED_IDLEPRIO:
-+					if (policy == SCHED_IDLEPRIO)
-+						goto out;
-+					return -EPERM;
-+				default:
-+					break;
-+			}
-+		}
-+
-+		/* Can't change other user's priorities */
-+		if (!check_same_owner(p))
-+			return -EPERM;
-+
-+		/* Normal users shall not reset the sched_reset_on_fork flag: */
-+		if (p->sched_reset_on_fork && !reset_on_fork)
-+			return -EPERM;
-+	}
-+
-+	if (user) {
-+		retval = security_task_setscheduler(p);
-+		if (retval)
-+			return retval;
-+	}
-+
-+	if (pi)
-+		cpuset_read_lock();
-+
-+	/*
-+	 * Make sure no PI-waiters arrive (or leave) while we are
-+	 * changing the priority of the task:
-+	 *
-+	 * To be able to change p->policy safely, the runqueue lock must be
-+	 * held.
-+	 */
-+	rq = task_rq_lock(p, &rf);
-+	update_rq_clock(rq);
-+
-+	/*
-+	 * Changing the policy of the stop threads its a very bad idea:
-+	 */
-+	if (p == rq->stop) {
-+		retval = -EINVAL;
-+		goto unlock;
-+	}
-+
-+	/*
-+	 * If not changing anything there's no need to proceed further:
-+	 */
-+	if (unlikely(policy == p->policy && (!is_rt_policy(policy) ||
-+	    priority == p->rt_priority))) {
-+		retval = 0;
-+		goto unlock;
-+	}
-+
-+	/* Re-check policy now with rq lock held */
-+	if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
-+		policy = oldpolicy = -1;
-+		task_rq_unlock(rq, p, &rf);
-+		if (pi)
-+			cpuset_read_unlock();
-+		goto recheck;
-+	}
-+	p->sched_reset_on_fork = reset_on_fork;
-+
-+	__setscheduler(p, rq, policy, priority, attr, pi);
-+
-+	/* Avoid rq from going away on us: */
-+	preempt_disable();
-+	task_rq_unlock(rq, p, &rf);
-+
-+	if (pi) {
-+		cpuset_read_unlock();
-+		rt_mutex_adjust_pi(p);
-+	}
-+	preempt_enable();
-+out:
-+	return 0;
-+
-+unlock:
-+	task_rq_unlock(rq, p, &rf);
-+	if (pi)
-+		cpuset_read_unlock();
-+	return retval;
-+}
-+
-+static int _sched_setscheduler(struct task_struct *p, int policy,
-+			       const struct sched_param *param, bool check)
-+{
-+	struct sched_attr attr = {
-+		.sched_policy   = policy,
-+		.sched_priority = param->sched_priority,
-+		.sched_nice	= PRIO_TO_NICE(p->static_prio),
-+	};
-+
-+	return __sched_setscheduler(p, &attr, check, true);
-+}
-+/**
-+ * sched_setscheduler - change the scheduling policy and/or RT priority of a thread.
-+ * @p: the task in question.
-+ * @policy: new policy.
-+ * @param: structure containing the new RT priority.
-+ *
-+ * Use sched_set_fifo(), read its comment.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ *
-+ * NOTE that the task may be already dead.
-+ */
-+int sched_setscheduler(struct task_struct *p, int policy,
-+		       const struct sched_param *param)
-+{
-+	return _sched_setscheduler(p, policy, param, true);
-+}
-+
-+
-+int sched_setattr(struct task_struct *p, const struct sched_attr *attr)
-+{
-+	return __sched_setscheduler(p, attr, true, true);
-+}
-+
-+int sched_setattr_nocheck(struct task_struct *p, const struct sched_attr *attr)
-+{
-+	return __sched_setscheduler(p, attr, false, true);
-+}
-+
-+/**
-+ * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace.
-+ * @p: the task in question.
-+ * @policy: new policy.
-+ * @param: structure containing the new RT priority.
-+ *
-+ * Just like sched_setscheduler, only don't bother checking if the
-+ * current context has permission.  For example, this is needed in
-+ * stop_machine(): we create temporary high priority worker threads,
-+ * but our caller might not have that capability.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+int sched_setscheduler_nocheck(struct task_struct *p, int policy,
-+			       const struct sched_param *param)
-+{
-+	return _sched_setscheduler(p, policy, param, false);
-+}
-+
-+/*
-+ * SCHED_FIFO is a broken scheduler model; that is, it is fundamentally
-+ * incapable of resource management, which is the one thing an OS really should
-+ * be doing.
-+ *
-+ * This is of course the reason it is limited to privileged users only.
-+ *
-+ * Worse still; it is fundamentally impossible to compose static priority
-+ * workloads. You cannot take two correctly working static prio workloads
-+ * and smash them together and still expect them to work.
-+ *
-+ * For this reason 'all' FIFO tasks the kernel creates are basically at:
-+ *
-+ *   MAX_RT_PRIO / 2
-+ *
-+ * The administrator _MUST_ configure the system, the kernel simply doesn't
-+ * know enough information to make a sensible choice.
-+ */
-+void sched_set_fifo(struct task_struct *p)
-+{
-+	struct sched_param sp = { .sched_priority = MAX_RT_PRIO / 2 };
-+	WARN_ON_ONCE(sched_setscheduler_nocheck(p, SCHED_FIFO, &sp) != 0);
-+}
-+EXPORT_SYMBOL_GPL(sched_set_fifo);
-+
-+/*
-+ * For when you don't much care about FIFO, but want to be above SCHED_NORMAL.
-+ */
-+void sched_set_fifo_low(struct task_struct *p)
-+{
-+	struct sched_param sp = { .sched_priority = 1 };
-+	WARN_ON_ONCE(sched_setscheduler_nocheck(p, SCHED_FIFO, &sp) != 0);
-+}
-+EXPORT_SYMBOL_GPL(sched_set_fifo_low);
-+
-+void sched_set_normal(struct task_struct *p, int nice)
-+{
-+	struct sched_attr attr = {
-+		.sched_policy = SCHED_NORMAL,
-+		.sched_nice = nice,
-+	};
-+	WARN_ON_ONCE(sched_setattr_nocheck(p, &attr) != 0);
-+}
-+EXPORT_SYMBOL_GPL(sched_set_normal);
-+
-+static int
-+do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
-+{
-+	struct sched_param lparam;
-+	struct task_struct *p;
-+	int retval;
-+
-+	if (!param || pid < 0)
-+		return -EINVAL;
-+	if (copy_from_user(&lparam, param, sizeof(struct sched_param)))
-+		return -EFAULT;
-+
-+	rcu_read_lock();
-+	retval = -ESRCH;
-+	p = find_process_by_pid(pid);
-+	if (likely(p))
-+		get_task_struct(p);
-+	rcu_read_unlock();
-+
-+	if (likely(p)) {
-+		retval = sched_setscheduler(p, policy, &lparam);
-+		put_task_struct(p);
-+	}
-+
-+	return retval;
-+}
-+
-+/*
-+ * Mimics kernel/events/core.c perf_copy_attr().
-+ */
-+static int sched_copy_attr(struct sched_attr __user *uattr,
-+			   struct sched_attr *attr)
-+{
-+	u32 size;
-+	int ret;
-+
-+	/* Zero the full structure, so that a short copy will be nice: */
-+	memset(attr, 0, sizeof(*attr));
-+
-+	ret = get_user(size, &uattr->size);
-+	if (ret)
-+		return ret;
-+
-+	/* ABI compatibility quirk: */
-+	if (!size)
-+		size = SCHED_ATTR_SIZE_VER0;
-+
-+	if (size < SCHED_ATTR_SIZE_VER0 || size > PAGE_SIZE)
-+		goto err_size;
-+
-+	ret = copy_struct_from_user(attr, sizeof(*attr), uattr, size);
-+	if (ret) {
-+		if (ret == -E2BIG)
-+			goto err_size;
-+		return ret;
-+	}
-+
-+	/*
-+	 * XXX: Do we want to be lenient like existing syscalls; or do we want
-+	 * to be strict and return an error on out-of-bounds values?
-+	 */
-+	attr->sched_nice = clamp(attr->sched_nice, -20, 19);
-+
-+	/* sched/core.c uses zero here but we already know ret is zero */
-+	return 0;
-+
-+err_size:
-+	put_user(sizeof(*attr), &uattr->size);
-+	return -E2BIG;
-+}
-+
-+/*
-+ * sched_setparam() passes in -1 for its policy, to let the functions
-+ * it calls know not to change it.
-+ */
-+#define SETPARAM_POLICY	-1
-+
-+/**
-+ * sys_sched_setscheduler - set/change the scheduler policy and RT priority
-+ * @pid: the pid in question.
-+ * @policy: new policy.
-+ * @param: structure containing the new RT priority.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy, struct sched_param __user *, param)
-+{
-+	if (policy < 0)
-+		return -EINVAL;
-+
-+	return do_sched_setscheduler(pid, policy, param);
-+}
-+
-+/**
-+ * sys_sched_setparam - set/change the RT priority of a thread
-+ * @pid: the pid in question.
-+ * @param: structure containing the new RT priority.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
-+{
-+	return do_sched_setscheduler(pid, SETPARAM_POLICY, param);
-+}
-+
-+/**
-+ * sys_sched_setattr - same as above, but with extended sched_attr
-+ * @pid: the pid in question.
-+ * @uattr: structure containing the extended parameters.
-+ */
-+SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr,
-+			       unsigned int, flags)
-+{
-+	struct sched_attr attr;
-+	struct task_struct *p;
-+	int retval;
-+
-+	if (!uattr || pid < 0 || flags)
-+		return -EINVAL;
-+
-+	retval = sched_copy_attr(uattr, &attr);
-+	if (retval)
-+		return retval;
-+
-+	if ((int)attr.sched_policy < 0)
-+		return -EINVAL;
-+	if (attr.sched_flags & SCHED_FLAG_KEEP_POLICY)
-+		attr.sched_policy = SETPARAM_POLICY;
-+
-+	rcu_read_lock();
-+	retval = -ESRCH;
-+	p = find_process_by_pid(pid);
-+	if (likely(p))
-+		get_task_struct(p);
-+	rcu_read_unlock();
-+
-+	if (likely(p)) {
-+		retval = sched_setattr(p, &attr);
-+		put_task_struct(p);
-+	}
-+
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_getscheduler - get the policy (scheduling class) of a thread
-+ * @pid: the pid in question.
-+ *
-+ * Return: On success, the policy of the thread. Otherwise, a negative error
-+ * code.
-+ */
-+SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
-+{
-+	struct task_struct *p;
-+	int retval = -EINVAL;
-+
-+	if (pid < 0)
-+		goto out_nounlock;
-+
-+	retval = -ESRCH;
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	if (p) {
-+		retval = security_task_getscheduler(p);
-+		if (!retval)
-+			retval = p->policy;
-+	}
-+	rcu_read_unlock();
-+
-+out_nounlock:
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_getscheduler - get the RT priority of a thread
-+ * @pid: the pid in question.
-+ * @param: structure containing the RT priority.
-+ *
-+ * Return: On success, 0 and the RT priority is in @param. Otherwise, an error
-+ * code.
-+ */
-+SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
-+{
-+	struct sched_param lp = { .sched_priority = 0 };
-+	struct task_struct *p;
-+	int retval = -EINVAL;
-+
-+	if (!param || pid < 0)
-+		goto out_nounlock;
-+
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	retval = -ESRCH;
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	if (has_rt_policy(p))
-+		lp.sched_priority = p->rt_priority;
-+	rcu_read_unlock();
-+
-+	/*
-+	 * This one might sleep, we cannot do it with a spinlock held ...
-+	 */
-+	retval = copy_to_user(param, &lp, sizeof(*param)) ? -EFAULT : 0;
-+
-+out_nounlock:
-+	return retval;
-+
-+out_unlock:
-+	rcu_read_unlock();
-+	return retval;
-+}
-+
-+/*
-+ * Copy the kernel size attribute structure (which might be larger
-+ * than what user-space knows about) to user-space.
-+ *
-+ * Note that all cases are valid: user-space buffer can be larger or
-+ * smaller than the kernel-space buffer. The usual case is that both
-+ * have the same size.
-+ */
-+static int
-+sched_attr_copy_to_user(struct sched_attr __user *uattr,
-+			struct sched_attr *kattr,
-+			unsigned int usize)
-+{
-+	unsigned int ksize = sizeof(*kattr);
-+
-+	if (!access_ok(uattr, usize))
-+		return -EFAULT;
-+
-+	/*
-+	 * sched_getattr() ABI forwards and backwards compatibility:
-+	 *
-+	 * If usize == ksize then we just copy everything to user-space and all is good.
-+	 *
-+	 * If usize < ksize then we only copy as much as user-space has space for,
-+	 * this keeps ABI compatibility as well. We skip the rest.
-+	 *
-+	 * If usize > ksize then user-space is using a newer version of the ABI,
-+	 * which part the kernel doesn't know about. Just ignore it - tooling can
-+	 * detect the kernel's knowledge of attributes from the attr->size value
-+	 * which is set to ksize in this case.
-+	 */
-+	kattr->size = min(usize, ksize);
-+
-+	if (copy_to_user(uattr, kattr, kattr->size))
-+		return -EFAULT;
-+
-+	return 0;
-+}
-+
-+/**
-+ * sys_sched_getattr - similar to sched_getparam, but with sched_attr
-+ * @pid: the pid in question.
-+ * @uattr: structure containing the extended parameters.
-+ * @usize: sizeof(attr) for fwd/bwd comp.
-+ * @flags: for future extension.
-+ */
-+SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
-+		unsigned int, usize, unsigned int, flags)
-+{
-+	struct sched_attr kattr = { };
-+	struct task_struct *p;
-+	int retval;
-+
-+	if (!uattr || pid < 0 || usize > PAGE_SIZE ||
-+	    usize < SCHED_ATTR_SIZE_VER0 || flags)
-+		return -EINVAL;
-+
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	retval = -ESRCH;
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	kattr.sched_policy = p->policy;
-+	if (rt_task(p))
-+		kattr.sched_priority = p->rt_priority;
-+	else
-+		kattr.sched_nice = task_nice(p);
-+
-+	rcu_read_unlock();
-+
-+	return sched_attr_copy_to_user(uattr, &kattr, usize);
-+
-+out_unlock:
-+	rcu_read_unlock();
-+	return retval;
-+}
-+
-+long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
-+{
-+	cpumask_var_t cpus_allowed, new_mask;
-+	struct task_struct *p;
-+	int retval;
-+
-+	rcu_read_lock();
-+
-+	p = find_process_by_pid(pid);
-+	if (!p) {
-+		rcu_read_unlock();
-+		return -ESRCH;
-+	}
-+
-+	/* Prevent p going away */
-+	get_task_struct(p);
-+	rcu_read_unlock();
-+
-+	if (p->flags & PF_NO_SETAFFINITY) {
-+		retval = -EINVAL;
-+		goto out_put_task;
-+	}
-+	if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
-+		retval = -ENOMEM;
-+		goto out_put_task;
-+	}
-+	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) {
-+		retval = -ENOMEM;
-+		goto out_free_cpus_allowed;
-+	}
-+	retval = -EPERM;
-+	if (!check_same_owner(p)) {
-+		rcu_read_lock();
-+		if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
-+			rcu_read_unlock();
-+			goto out_unlock;
-+		}
-+		rcu_read_unlock();
-+	}
-+
-+	retval = security_task_setscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	cpuset_cpus_allowed(p, cpus_allowed);
-+	cpumask_and(new_mask, in_mask, cpus_allowed);
-+again:
-+	retval = __set_cpus_allowed_ptr(p, new_mask, true);
-+
-+	if (!retval) {
-+		cpuset_cpus_allowed(p, cpus_allowed);
-+		if (!cpumask_subset(new_mask, cpus_allowed)) {
-+			/*
-+			 * We must have raced with a concurrent cpuset
-+			 * update. Just reset the cpus_allowed to the
-+			 * cpuset's cpus_allowed
-+			 */
-+			cpumask_copy(new_mask, cpus_allowed);
-+			goto again;
-+		}
-+	}
-+out_unlock:
-+	free_cpumask_var(new_mask);
-+out_free_cpus_allowed:
-+	free_cpumask_var(cpus_allowed);
-+out_put_task:
-+	put_task_struct(p);
-+	return retval;
-+}
-+
-+static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
-+			     cpumask_t *new_mask)
-+{
-+	if (len < cpumask_size())
-+		cpumask_clear(new_mask);
-+	else if (len > cpumask_size())
-+		len = cpumask_size();
-+
-+	return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0;
-+}
-+
-+
-+/**
-+ * sys_sched_setaffinity - set the CPU affinity of a process
-+ * @pid: pid of the process
-+ * @len: length in bytes of the bitmask pointed to by user_mask_ptr
-+ * @user_mask_ptr: user-space pointer to the new CPU mask
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
-+		unsigned long __user *, user_mask_ptr)
-+{
-+	cpumask_var_t new_mask;
-+	int retval;
-+
-+	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
-+		return -ENOMEM;
-+
-+	retval = get_user_cpu_mask(user_mask_ptr, len, new_mask);
-+	if (retval == 0)
-+		retval = sched_setaffinity(pid, new_mask);
-+	free_cpumask_var(new_mask);
-+	return retval;
-+}
-+
-+long sched_getaffinity(pid_t pid, cpumask_t *mask)
-+{
-+	struct task_struct *p;
-+	unsigned long flags;
-+	int retval;
-+
-+	get_online_cpus();
-+	rcu_read_lock();
-+
-+	retval = -ESRCH;
-+	p = find_process_by_pid(pid);
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	cpumask_and(mask, &p->cpus_mask, cpu_active_mask);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+
-+out_unlock:
-+	rcu_read_unlock();
-+	put_online_cpus();
-+
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_getaffinity - get the CPU affinity of a process
-+ * @pid: pid of the process
-+ * @len: length in bytes of the bitmask pointed to by user_mask_ptr
-+ * @user_mask_ptr: user-space pointer to hold the current CPU mask
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
-+		unsigned long __user *, user_mask_ptr)
-+{
-+	int ret;
-+	cpumask_var_t mask;
-+
-+	if ((len * BITS_PER_BYTE) < nr_cpu_ids)
-+		return -EINVAL;
-+	if (len & (sizeof(unsigned long)-1))
-+		return -EINVAL;
-+
-+	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
-+		return -ENOMEM;
-+
-+	ret = sched_getaffinity(pid, mask);
-+	if (ret == 0) {
-+		unsigned int retlen = min(len, cpumask_size());
-+
-+		if (copy_to_user(user_mask_ptr, mask, retlen))
-+			ret = -EFAULT;
-+		else
-+			ret = retlen;
-+	}
-+	free_cpumask_var(mask);
-+
-+	return ret;
-+}
-+
-+/**
-+ * sys_sched_yield - yield the current processor to other threads.
-+ *
-+ * This function yields the current CPU to other tasks. It does this by
-+ * scheduling away the current task. If it still has the earliest deadline
-+ * it will be scheduled again as the next task.
-+ *
-+ * Return: 0.
-+ */
-+static void do_sched_yield(void)
-+{
-+	struct rq *rq;
-+
-+	if (!sched_yield_type)
-+		return;
-+
-+	local_irq_disable();
-+	rq = this_rq();
-+	rq_lock(rq);
-+
-+	if (sched_yield_type > 1)
-+		time_slice_expired(current, rq);
-+	schedstat_inc(rq->yld_count);
-+
-+	/*
-+	 * Since we are going to call schedule() anyway, there's
-+	 * no need to preempt or enable interrupts:
-+	 */
-+	preempt_disable();
-+	rq_unlock(rq);
-+	sched_preempt_enable_no_resched();
-+
-+	schedule();
-+}
-+
-+SYSCALL_DEFINE0(sched_yield)
-+{
-+	do_sched_yield();
-+	return 0;
-+}
-+
-+#ifndef CONFIG_PREEMPTION
-+int __sched _cond_resched(void)
-+{
-+	if (should_resched(0)) {
-+		preempt_schedule_common();
-+		return 1;
-+	}
-+	rcu_all_qs();
-+	return 0;
-+}
-+EXPORT_SYMBOL(_cond_resched);
-+#endif
-+
-+/*
-+ * __cond_resched_lock() - if a reschedule is pending, drop the given lock,
-+ * call schedule, and on return reacquire the lock.
-+ *
-+ * This works OK both with and without CONFIG_PREEMPTION.  We do strange low-level
-+ * operations here to prevent schedule() from being called twice (once via
-+ * spin_unlock(), once by hand).
-+ */
-+int __cond_resched_lock(spinlock_t *lock)
-+{
-+	int resched = should_resched(PREEMPT_LOCK_OFFSET);
-+	int ret = 0;
-+
-+	lockdep_assert_held(lock);
-+
-+	if (spin_needbreak(lock) || resched) {
-+		spin_unlock(lock);
-+		if (resched)
-+			preempt_schedule_common();
-+		else
-+			cpu_relax();
-+		ret = 1;
-+		spin_lock(lock);
-+	}
-+	return ret;
-+}
-+EXPORT_SYMBOL(__cond_resched_lock);
-+
-+/**
-+ * yield - yield the current processor to other threads.
-+ *
-+ * Do not ever use this function, there's a 99% chance you're doing it wrong.
-+ *
-+ * The scheduler is at all times free to pick the calling task as the most
-+ * eligible task to run, if removing the yield() call from your code breaks
-+ * it, its already broken.
-+ *
-+ * Typical broken usage is:
-+ *
-+ * while (!event)
-+ *	yield();
-+ *
-+ * where one assumes that yield() will let 'the other' process run that will
-+ * make event true. If the current task is a SCHED_FIFO task that will never
-+ * happen. Never use yield() as a progress guarantee!!
-+ *
-+ * If you want to use yield() to wait for something, use wait_event().
-+ * If you want to use yield() to be 'nice' for others, use cond_resched().
-+ * If you still want to use yield(), do not!
-+ */
-+void __sched yield(void)
-+{
-+	set_current_state(TASK_RUNNING);
-+	do_sched_yield();
-+}
-+EXPORT_SYMBOL(yield);
-+
-+/**
-+ * yield_to - yield the current processor to another thread in
-+ * your thread group, or accelerate that thread toward the
-+ * processor it's on.
-+ * @p: target task
-+ * @preempt: whether task preemption is allowed or not
-+ *
-+ * It's the caller's job to ensure that the target task struct
-+ * can't go away on us before we can do any checks.
-+ *
-+ * Return:
-+ *	true (>0) if we indeed boosted the target task.
-+ *	false (0) if we failed to boost the target.
-+ *	-ESRCH if there's no task to yield to.
-+ */
-+int __sched yield_to(struct task_struct *p, bool preempt)
-+{
-+	struct task_struct *rq_p;
-+	struct rq *rq, *p_rq;
-+	unsigned long flags;
-+	int yielded = 0;
-+
-+	local_irq_save(flags);
-+	rq = this_rq();
-+
-+again:
-+	p_rq = task_rq(p);
-+	/*
-+	 * If we're the only runnable task on the rq and target rq also
-+	 * has only one task, there's absolutely no point in yielding.
-+	 */
-+	if (task_running(p_rq, p) || p->state) {
-+		yielded = -ESRCH;
-+		goto out_irq;
-+	}
-+
-+	double_rq_lock(rq, p_rq);
-+	if (unlikely(task_rq(p) != p_rq)) {
-+		double_rq_unlock(rq, p_rq);
-+		goto again;
-+	}
-+
-+	yielded = 1;
-+	schedstat_inc(rq->yld_count);
-+	rq_p = rq->curr;
-+	if (p->deadline > rq_p->deadline)
-+		p->deadline = rq_p->deadline;
-+	p->time_slice += rq_p->time_slice;
-+	if (p->time_slice > timeslice())
-+		p->time_slice = timeslice();
-+	time_slice_expired(rq_p, rq);
-+	if (preempt && rq != p_rq)
-+		resched_task(p_rq->curr);
-+	double_rq_unlock(rq, p_rq);
-+out_irq:
-+	local_irq_restore(flags);
-+
-+	if (yielded > 0)
-+		schedule();
-+	return yielded;
-+}
-+EXPORT_SYMBOL_GPL(yield_to);
-+
-+int io_schedule_prepare(void)
-+{
-+	int old_iowait = current->in_iowait;
-+
-+	current->in_iowait = 1;
-+	blk_schedule_flush_plug(current);
-+
-+	return old_iowait;
-+}
-+
-+void io_schedule_finish(int token)
-+{
-+	current->in_iowait = token;
-+}
-+
-+/*
-+ * This task is about to go to sleep on IO.  Increment rq->nr_iowait so
-+ * that process accounting knows that this is a task in IO wait state.
-+ *
-+ * But don't do that if it is a deliberate, throttling IO wait (this task
-+ * has set its backing_dev_info: the queue against which it should throttle)
-+ */
-+
-+long __sched io_schedule_timeout(long timeout)
-+{
-+	int token;
-+	long ret;
-+
-+	token = io_schedule_prepare();
-+	ret = schedule_timeout(timeout);
-+	io_schedule_finish(token);
-+
-+	return ret;
-+}
-+EXPORT_SYMBOL(io_schedule_timeout);
-+
-+void __sched io_schedule(void)
-+{
-+	int token;
-+
-+	token = io_schedule_prepare();
-+	schedule();
-+	io_schedule_finish(token);
-+}
-+EXPORT_SYMBOL(io_schedule);
-+
-+/**
-+ * sys_sched_get_priority_max - return maximum RT priority.
-+ * @policy: scheduling class.
-+ *
-+ * Return: On success, this syscall returns the maximum
-+ * rt_priority that can be used by a given scheduling class.
-+ * On failure, a negative error code is returned.
-+ */
-+SYSCALL_DEFINE1(sched_get_priority_max, int, policy)
-+{
-+	int ret = -EINVAL;
-+
-+	switch (policy) {
-+	case SCHED_FIFO:
-+	case SCHED_RR:
-+		ret = MAX_USER_RT_PRIO-1;
-+		break;
-+	case SCHED_NORMAL:
-+	case SCHED_BATCH:
-+	case SCHED_ISO:
-+	case SCHED_IDLEPRIO:
-+		ret = 0;
-+		break;
-+	}
-+	return ret;
-+}
-+
-+/**
-+ * sys_sched_get_priority_min - return minimum RT priority.
-+ * @policy: scheduling class.
-+ *
-+ * Return: On success, this syscall returns the minimum
-+ * rt_priority that can be used by a given scheduling class.
-+ * On failure, a negative error code is returned.
-+ */
-+SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
-+{
-+	int ret = -EINVAL;
-+
-+	switch (policy) {
-+	case SCHED_FIFO:
-+	case SCHED_RR:
-+		ret = 1;
-+		break;
-+	case SCHED_NORMAL:
-+	case SCHED_BATCH:
-+	case SCHED_ISO:
-+	case SCHED_IDLEPRIO:
-+		ret = 0;
-+		break;
-+	}
-+	return ret;
-+}
-+
-+static int sched_rr_get_interval(pid_t pid, struct timespec64 *t)
-+{
-+	struct task_struct *p;
-+	unsigned int time_slice;
-+	struct rq_flags rf;
-+	struct rq *rq;
-+	int retval;
-+
-+	if (pid < 0)
-+		return -EINVAL;
-+
-+	retval = -ESRCH;
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	rq = task_rq_lock(p, &rf);
-+	time_slice = p->policy == SCHED_FIFO ? 0 : MS_TO_NS(task_timeslice(p));
-+	task_rq_unlock(rq, p, &rf);
-+
-+	rcu_read_unlock();
-+	*t = ns_to_timespec64(time_slice);
-+	return 0;
-+
-+out_unlock:
-+	rcu_read_unlock();
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_rr_get_interval - return the default timeslice of a process.
-+ * @pid: pid of the process.
-+ * @interval: userspace pointer to the timeslice value.
-+ *
-+ * this syscall writes the default timeslice value of a given process
-+ * into the user-space timespec buffer. A value of '0' means infinity.
-+ *
-+ * Return: On success, 0 and the timeslice is in @interval. Otherwise,
-+ * an error code.
-+ */
-+SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
-+		struct __kernel_timespec __user *, interval)
-+{
-+	struct timespec64 t;
-+	int retval = sched_rr_get_interval(pid, &t);
-+
-+	if (retval == 0)
-+		retval = put_timespec64(&t, interval);
-+
-+	return retval;
-+}
-+
-+#ifdef CONFIG_COMPAT_32BIT_TIME
-+SYSCALL_DEFINE2(sched_rr_get_interval_time32, pid_t, pid,
-+		struct old_timespec32 __user *, interval)
-+{
-+	struct timespec64 t;
-+	int retval = sched_rr_get_interval(pid, &t);
-+
-+	if (retval == 0)
-+		retval = put_old_timespec32(&t, interval);
-+	return retval;
-+}
-+#endif
-+
-+void sched_show_task(struct task_struct *p)
-+{
-+	unsigned long free = 0;
-+	int ppid;
-+
-+	if (!try_get_task_stack(p))
-+		return;
-+
-+	printk(KERN_INFO "%-15.15s %c", p->comm, task_state_to_char(p));
-+
-+	if (p->state == TASK_RUNNING)
-+		printk(KERN_CONT "  running task    ");
-+#ifdef CONFIG_DEBUG_STACK_USAGE
-+	free = stack_not_used(p);
-+#endif
-+	ppid = 0;
-+	rcu_read_lock();
-+	if (pid_alive(p))
-+		ppid = task_pid_nr(rcu_dereference(p->real_parent));
-+	rcu_read_unlock();
-+	pr_cont(" stack:%5lu pid:%5d ppid:%6d flags:0x%08lx\n",
-+		free, task_pid_nr(p), ppid,
-+		(unsigned long)task_thread_info(p)->flags);
-+
-+	print_worker_info(KERN_INFO, p);
-+	show_stack(p, NULL, KERN_INFO);
-+	put_task_stack(p);
-+}
-+EXPORT_SYMBOL_GPL(sched_show_task);
-+
-+static inline bool
-+state_filter_match(unsigned long state_filter, struct task_struct *p)
-+{
-+	/* no filter, everything matches */
-+	if (!state_filter)
-+		return true;
-+
-+	/* filter, but doesn't match */
-+	if (!(p->state & state_filter))
-+		return false;
-+
-+	/*
-+	 * When looking for TASK_UNINTERRUPTIBLE skip TASK_IDLE (allows
-+	 * TASK_KILLABLE).
-+	 */
-+	if (state_filter == TASK_UNINTERRUPTIBLE && p->state == TASK_IDLE)
-+		return false;
-+
-+	return true;
-+}
-+
-+void show_state_filter(unsigned long state_filter)
-+{
-+	struct task_struct *g, *p;
-+
-+	rcu_read_lock();
-+	for_each_process_thread(g, p) {
-+		/*
-+		 * reset the NMI-timeout, listing all files on a slow
-+		 * console might take a lot of time:
-+		 * Also, reset softlockup watchdogs on all CPUs, because
-+		 * another CPU might be blocked waiting for us to process
-+		 * an IPI.
-+		 */
-+		touch_nmi_watchdog();
-+		touch_all_softlockup_watchdogs();
-+		if (state_filter_match(state_filter, p))
-+			sched_show_task(p);
-+	}
-+
-+	rcu_read_unlock();
-+	/*
-+	 * Only show locks if all tasks are dumped:
-+	 */
-+	if (!state_filter)
-+		debug_show_all_locks();
-+}
-+
-+void dump_cpu_task(int cpu)
-+{
-+	pr_info("Task dump for CPU %d:\n", cpu);
-+	sched_show_task(cpu_curr(cpu));
-+}
-+
-+#ifdef CONFIG_SMP
-+void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	cpumask_copy(&p->cpus_mask, new_mask);
-+	p->nr_cpus_allowed = cpumask_weight(new_mask);
-+}
-+
-+void __do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	struct rq *rq = task_rq(p);
-+
-+	lockdep_assert_held(&p->pi_lock);
-+
-+	cpumask_copy(&p->cpus_mask, new_mask);
-+
-+	if (task_queued(p)) {
-+		/*
-+		 * Because __kthread_bind() calls this on blocked tasks without
-+		 * holding rq->lock.
-+		 */
-+		lockdep_assert_held(rq->lock);
-+	}
-+}
-+
-+/*
-+ * Calling do_set_cpus_allowed from outside the scheduler code should not be
-+ * called on a running or queued task. We should be holding pi_lock.
-+ */
-+void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	__do_set_cpus_allowed(p, new_mask);
-+	if (needs_other_cpu(p, task_cpu(p))) {
-+		struct rq *rq;
-+
-+		rq = __task_rq_lock(p, NULL);
-+		set_task_cpu(p, valid_task_cpu(p));
-+		resched_task(p);
-+		__task_rq_unlock(rq, NULL);
-+	}
-+}
-+#endif
-+
-+/**
-+ * init_idle - set up an idle thread for a given CPU
-+ * @idle: task in question
-+ * @cpu: cpu the idle task belongs to
-+ *
-+ * NOTE: this function does not set the idle thread's NEED_RESCHED
-+ * flag, to make booting more robust.
-+ */
-+void init_idle(struct task_struct *idle, int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	raw_spin_lock_irqsave(&idle->pi_lock, flags);
-+	raw_spin_lock(rq->lock);
-+	idle->last_ran = rq->niffies;
-+	time_slice_expired(idle, rq);
-+	idle->state = TASK_RUNNING;
-+	/* Setting prio to illegal value shouldn't matter when never queued */
-+	idle->prio = PRIO_LIMIT;
-+	idle->flags |= PF_IDLE;
-+
-+	scs_task_reset(idle);
-+	kasan_unpoison_task_stack(idle);
-+
-+#ifdef CONFIG_SMP
-+	/*
-+	 * It's possible that init_idle() gets called multiple times on a task,
-+	 * in that case do_set_cpus_allowed() will not do the right thing.
-+	 *
-+	 * And since this is boot we can forgo the serialisation.
-+	 */
-+	set_cpus_allowed_common(idle, cpumask_of(cpu));
-+#ifdef CONFIG_SMT_NICE
-+	idle->smt_bias = 0;
-+#endif
-+#endif
-+	set_rq_task(rq, idle);
-+
-+	/* Silence PROVE_RCU */
-+	rcu_read_lock();
-+	set_task_cpu(idle, cpu);
-+	rcu_read_unlock();
-+
-+	rq->idle = idle;
-+	rcu_assign_pointer(rq->curr, idle);
-+	idle->on_rq = TASK_ON_RQ_QUEUED;
-+	raw_spin_unlock(rq->lock);
-+	raw_spin_unlock_irqrestore(&idle->pi_lock, flags);
-+
-+	/* Set the preempt count _outside_ the spinlocks! */
-+	init_idle_preempt_count(idle, cpu);
-+
-+	ftrace_graph_init_idle_task(idle, cpu);
-+	vtime_init_idle(idle, cpu);
-+#ifdef CONFIG_SMP
-+	sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
-+#endif
-+}
-+
-+int cpuset_cpumask_can_shrink(const struct cpumask __maybe_unused *cur,
-+			      const struct cpumask __maybe_unused *trial)
-+{
-+	return 1;
-+}
-+
-+int task_can_attach(struct task_struct *p,
-+		    const struct cpumask *cs_cpus_allowed)
-+{
-+	int ret = 0;
-+
-+	/*
-+	 * Kthreads which disallow setaffinity shouldn't be moved
-+	 * to a new cpuset; we don't want to change their CPU
-+	 * affinity and isolating such threads by their set of
-+	 * allowed nodes is unnecessary.  Thus, cpusets are not
-+	 * applicable for such threads.  This prevents checking for
-+	 * success of set_cpus_allowed_ptr() on all attached tasks
-+	 * before cpus_mask may be changed.
-+	 */
-+	if (p->flags & PF_NO_SETAFFINITY)
-+		ret = -EINVAL;
-+
-+	return ret;
-+}
-+
-+void resched_cpu(int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	struct rq_flags rf;
-+
-+	rq_lock_irqsave(rq, &rf);
-+	if (cpu_online(cpu) || cpu == smp_processor_id())
-+		resched_curr(rq);
-+	rq_unlock_irqrestore(rq, &rf);
-+}
-+
-+#ifdef CONFIG_SMP
-+#ifdef CONFIG_NO_HZ_COMMON
-+void select_nohz_load_balancer(int stop_tick)
-+{
-+}
-+
-+void set_cpu_sd_state_idle(void) {}
-+void nohz_balance_enter_idle(int cpu) {}
-+
-+/*
-+ * In the semi idle case, use the nearest busy CPU for migrating timers
-+ * from an idle CPU.  This is good for power-savings.
-+ *
-+ * We don't do similar optimization for completely idle system, as
-+ * selecting an idle CPU will add more delays to the timers than intended
-+ * (as that CPU's timer base may not be uptodate wrt jiffies etc).
-+ */
-+int get_nohz_timer_target(void)
-+{
-+	int i, cpu = smp_processor_id(), default_cpu = -1;
-+	struct sched_domain *sd;
-+
-+	if (housekeeping_cpu(cpu, HK_FLAG_TIMER)) {
-+		if (!idle_cpu(cpu))
-+			return cpu;
-+		default_cpu = cpu;
-+	}
-+
-+	rcu_read_lock();
-+	for_each_domain(cpu, sd) {
-+		for_each_cpu_and(i, sched_domain_span(sd),
-+			housekeeping_cpumask(HK_FLAG_TIMER)) {
-+			if (cpu == i)
-+				continue;
-+
-+			if (!idle_cpu(i)) {
-+				cpu = i;
-+				goto unlock;
-+			}
-+		}
-+	}
-+
-+	if (default_cpu == -1)
-+		default_cpu = housekeeping_any_cpu(HK_FLAG_TIMER);
-+	cpu = default_cpu;
-+unlock:
-+	rcu_read_unlock();
-+	return cpu;
-+}
-+
-+/*
-+ * When add_timer_on() enqueues a timer into the timer wheel of an
-+ * idle CPU then this timer might expire before the next timer event
-+ * which is scheduled to wake up that CPU. In case of a completely
-+ * idle system the next event might even be infinite time into the
-+ * future. wake_up_idle_cpu() ensures that the CPU is woken up and
-+ * leaves the inner idle loop so the newly added timer is taken into
-+ * account when the CPU goes back to idle and evaluates the timer
-+ * wheel for the next timer event.
-+ */
-+void wake_up_idle_cpu(int cpu)
-+{
-+	if (cpu == smp_processor_id())
-+		return;
-+
-+	if (set_nr_and_not_polling(cpu_rq(cpu)->idle))
-+		smp_sched_reschedule(cpu);
-+	else
-+		trace_sched_wake_idle_without_ipi(cpu);
-+}
-+
-+static bool wake_up_full_nohz_cpu(int cpu)
-+{
-+	/*
-+	 * We just need the target to call irq_exit() and re-evaluate
-+	 * the next tick. The nohz full kick at least implies that.
-+	 * If needed we can still optimize that later with an
-+	 * empty IRQ.
-+	 */
-+	if (cpu_is_offline(cpu))
-+		return true;  /* Don't try to wake offline CPUs. */
-+	if (tick_nohz_full_cpu(cpu)) {
-+		if (cpu != smp_processor_id() ||
-+		    tick_nohz_tick_stopped())
-+			tick_nohz_full_kick_cpu(cpu);
-+		return true;
-+	}
-+
-+	return false;
-+}
-+
-+/*
-+ * Wake up the specified CPU.  If the CPU is going offline, it is the
-+ * caller's responsibility to deal with the lost wakeup, for example,
-+ * by hooking into the CPU_DEAD notifier like timers and hrtimers do.
-+ */
-+void wake_up_nohz_cpu(int cpu)
-+{
-+	if (!wake_up_full_nohz_cpu(cpu))
-+		wake_up_idle_cpu(cpu);
-+}
-+#endif /* CONFIG_NO_HZ_COMMON */
-+
-+/*
-+ * Change a given task's CPU affinity. Migrate the thread to a
-+ * proper CPU and schedule it away if the CPU it's executing on
-+ * is removed from the allowed bitmask.
-+ *
-+ * NOTE: the caller must have a valid reference to the task, the
-+ * task must not exit() & deallocate itself prematurely. The
-+ * call is not atomic; no spinlocks may be held.
-+ */
-+static int __set_cpus_allowed_ptr(struct task_struct *p,
-+				  const struct cpumask *new_mask, bool check)
-+{
-+	const struct cpumask *cpu_valid_mask = cpu_active_mask;
-+	bool queued = false, running_wrong = false, kthread;
-+	unsigned int dest_cpu;
-+	struct rq_flags rf;
-+	struct rq *rq;
-+	int ret = 0;
-+
-+	rq = task_rq_lock(p, &rf);
-+	update_rq_clock(rq);
-+
-+	kthread = !!(p->flags & PF_KTHREAD);
-+	if (kthread) {
-+		/*
-+		 * Kernel threads are allowed on online && !active CPUs
-+		 */
-+		cpu_valid_mask = cpu_online_mask;
-+	}
-+
-+	/*
-+	 * Must re-check here, to close a race against __kthread_bind(),
-+	 * sched_setaffinity() is not guaranteed to observe the flag.
-+	 */
-+	if (check && (p->flags & PF_NO_SETAFFINITY)) {
-+		ret = -EINVAL;
-+		goto out;
-+	}
-+
-+	if (cpumask_equal(&p->cpus_mask, new_mask))
-+		goto out;
-+
-+	/*
-+	 * Picking a ~random cpu helps in cases where we are changing affinity
-+	 * for groups of tasks (ie. cpuset), so that load balancing is not
-+	 * immediately required to distribute the tasks within their new mask.
-+	 */
-+	dest_cpu = cpumask_any_and_distribute(cpu_valid_mask, new_mask);
-+	if (dest_cpu >= nr_cpu_ids) {
-+		ret = -EINVAL;
-+		goto out;
-+	}
-+
-+	queued = task_queued(p);
-+	__do_set_cpus_allowed(p, new_mask);
-+
-+	if (kthread) {
-+		/*
-+		 * For kernel threads that do indeed end up on online &&
-+		 * !active we want to ensure they are strict per-CPU threads.
-+		 */
-+		WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) &&
-+			!cpumask_intersects(new_mask, cpu_active_mask) &&
-+			p->nr_cpus_allowed != 1);
-+	}
-+
-+	/* Can the task run on the task's current CPU? If so, we're done */
-+	if (cpumask_test_cpu(task_cpu(p), new_mask))
-+		goto out;
-+
-+	if (task_running(rq, p)) {
-+		/* Task is running on the wrong cpu now, reschedule it. */
-+		if (rq == this_rq()) {
-+			set_task_cpu(p, dest_cpu);
-+			set_tsk_need_resched(p);
-+			running_wrong = true;
-+		} else
-+			resched_task(p);
-+	} else {
-+		if (queued) {
-+			/*
-+			 * Switch runqueue locks after dequeueing the task
-+			 * here while still holding the pi_lock to be holding
-+			 * the correct lock for enqueueing.
-+			 */
-+			dequeue_task(rq, p, 0);
-+			rq_unlock(rq);
-+
-+			rq = cpu_rq(dest_cpu);
-+			rq_lock(rq);
-+		}
-+		set_task_cpu(p, dest_cpu);
-+		if (queued)
-+			enqueue_task(rq, p, 0);
-+	}
-+	if (queued)
-+		try_preempt(p, rq);
-+	if (running_wrong)
-+		preempt_disable();
-+out:
-+	task_rq_unlock(rq, p, &rf);
-+
-+	if (running_wrong) {
-+		__schedule(true);
-+		preempt_enable();
-+	}
-+
-+	return ret;
-+}
-+
-+int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	return __set_cpus_allowed_ptr(p, new_mask, false);
-+}
-+EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
-+
-+#ifdef CONFIG_HOTPLUG_CPU
-+/*
-+ * Run through task list and find tasks affined to the dead cpu, then remove
-+ * that cpu from the list, enable cpu0 and set the zerobound flag. Must hold
-+ * cpu 0 and src_cpu's runqueue locks. We should be holding both rq lock and
-+ * pi_lock to change cpus_mask but it's not going to matter here.
-+ */
-+static void bind_zero(int src_cpu)
-+{
-+	struct task_struct *p, *t;
-+	struct rq *rq0;
-+	int bound = 0;
-+
-+	if (src_cpu == 0)
-+		return;
-+
-+	rq0 = cpu_rq(0);
-+
-+	do_each_thread(t, p) {
-+		if (cpumask_test_cpu(src_cpu, p->cpus_ptr)) {
-+			bool local = (task_cpu(p) == src_cpu);
-+			struct rq *rq = task_rq(p);
-+
-+			/* task_running is the cpu stopper thread */
-+			if (local && task_running(rq, p))
-+				continue;
-+			atomic_clear_cpu(src_cpu, &p->cpus_mask);
-+			atomic_set_cpu(0, &p->cpus_mask);
-+			p->zerobound = true;
-+			bound++;
-+			if (local) {
-+				bool queued = task_queued(p);
-+
-+				if (queued)
-+					dequeue_task(rq, p, 0);
-+				set_task_cpu(p, 0);
-+				if (queued)
-+					enqueue_task(rq0, p, 0);
-+			}
-+		}
-+	} while_each_thread(t, p);
-+
-+	if (bound) {
-+		printk(KERN_INFO "MuQSS removed affinity for %d processes to cpu %d\n",
-+		       bound, src_cpu);
-+	}
-+}
-+
-+/* Find processes with the zerobound flag and reenable their affinity for the
-+ * CPU coming alive. */
-+static void unbind_zero(int src_cpu)
-+{
-+	int unbound = 0, zerobound = 0;
-+	struct task_struct *p, *t;
-+
-+	if (src_cpu == 0)
-+		return;
-+
-+	do_each_thread(t, p) {
-+		if (!p->mm)
-+			p->zerobound = false;
-+		if (p->zerobound) {
-+			unbound++;
-+			cpumask_set_cpu(src_cpu, &p->cpus_mask);
-+			/* Once every CPU affinity has been re-enabled, remove
-+			 * the zerobound flag */
-+			if (cpumask_subset(cpu_possible_mask, p->cpus_ptr)) {
-+				p->zerobound = false;
-+				zerobound++;
-+			}
-+		}
-+	} while_each_thread(t, p);
-+
-+	if (unbound) {
-+		printk(KERN_INFO "MuQSS added affinity for %d processes to cpu %d\n",
-+		       unbound, src_cpu);
-+	}
-+	if (zerobound) {
-+		printk(KERN_INFO "MuQSS released forced binding to cpu0 for %d processes\n",
-+		       zerobound);
-+	}
-+}
-+
-+/*
-+ * Ensure that the idle task is using init_mm right before its cpu goes
-+ * offline.
-+ */
-+void idle_task_exit(void)
-+{
-+	struct mm_struct *mm = current->active_mm;
-+
-+	BUG_ON(cpu_online(smp_processor_id()));
-+	BUG_ON(current != this_rq()->idle);
-+
-+	if (mm != &init_mm) {
-+		switch_mm(mm, &init_mm, current);
-+		finish_arch_post_lock_switch();
-+	}
-+
-+	/* finish_cpu(), as ran on the BP, will clean up the active_mm state */
-+}
-+#else /* CONFIG_HOTPLUG_CPU */
-+static void unbind_zero(int src_cpu) {}
-+#endif /* CONFIG_HOTPLUG_CPU */
-+
-+void sched_set_stop_task(int cpu, struct task_struct *stop)
-+{
-+	struct sched_param stop_param = { .sched_priority = STOP_PRIO };
-+	struct sched_param start_param = { .sched_priority = 0 };
-+	struct task_struct *old_stop = cpu_rq(cpu)->stop;
-+
-+	if (stop) {
-+		/*
-+		 * Make it appear like a SCHED_FIFO task, its something
-+		 * userspace knows about and won't get confused about.
-+		 *
-+		 * Also, it will make PI more or less work without too
-+		 * much confusion -- but then, stop work should not
-+		 * rely on PI working anyway.
-+		 */
-+		sched_setscheduler_nocheck(stop, SCHED_FIFO, &stop_param);
-+	}
-+
-+	cpu_rq(cpu)->stop = stop;
-+
-+	if (old_stop) {
-+		/*
-+		 * Reset it back to a normal scheduling policy so that
-+		 * it can die in pieces.
-+		 */
-+		sched_setscheduler_nocheck(old_stop, SCHED_NORMAL, &start_param);
-+	}
-+}
-+
-+#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
-+
-+static struct ctl_table sd_ctl_dir[] = {
-+	{
-+		.procname	= "sched_domain",
-+		.mode		= 0555,
-+	},
-+	{}
-+};
-+
-+static struct ctl_table sd_ctl_root[] = {
-+	{
-+		.procname	= "kernel",
-+		.mode		= 0555,
-+		.child		= sd_ctl_dir,
-+	},
-+	{}
-+};
-+
-+static struct ctl_table *sd_alloc_ctl_entry(int n)
-+{
-+	struct ctl_table *entry =
-+		kcalloc(n, sizeof(struct ctl_table), GFP_KERNEL);
-+
-+	return entry;
-+}
-+
-+static void sd_free_ctl_entry(struct ctl_table **tablep)
-+{
-+	struct ctl_table *entry;
-+
-+	/*
-+	 * In the intermediate directories, both the child directory and
-+	 * procname are dynamically allocated and could fail but the mode
-+	 * will always be set. In the lowest directory the names are
-+	 * static strings and all have proc handlers.
-+	 */
-+	for (entry = *tablep; entry->mode; entry++) {
-+		if (entry->child)
-+			sd_free_ctl_entry(&entry->child);
-+		if (entry->proc_handler == NULL)
-+			kfree(entry->procname);
-+	}
-+
-+	kfree(*tablep);
-+	*tablep = NULL;
-+}
-+
-+static void
-+set_table_entry(struct ctl_table *entry,
-+		const char *procname, void *data, int maxlen,
-+		umode_t mode, proc_handler *proc_handler)
-+{
-+	entry->procname = procname;
-+	entry->data = data;
-+	entry->maxlen = maxlen;
-+	entry->mode = mode;
-+	entry->proc_handler = proc_handler;
-+}
-+
-+static struct ctl_table *
-+sd_alloc_ctl_domain_table(struct sched_domain *sd)
-+{
-+	struct ctl_table *table = sd_alloc_ctl_entry(9);
-+
-+	if (table == NULL)
-+		return NULL;
-+
-+	set_table_entry(&table[0], "min_interval",	  &sd->min_interval,	    sizeof(long), 0644, proc_doulongvec_minmax);
-+	set_table_entry(&table[1], "max_interval",	  &sd->max_interval,	    sizeof(long), 0644, proc_doulongvec_minmax);
-+	set_table_entry(&table[2], "busy_factor",	  &sd->busy_factor,	    sizeof(int),  0644, proc_dointvec_minmax);
-+	set_table_entry(&table[3], "imbalance_pct",	  &sd->imbalance_pct,	    sizeof(int),  0644, proc_dointvec_minmax);
-+	set_table_entry(&table[4], "cache_nice_tries",	  &sd->cache_nice_tries,    sizeof(int),  0644, proc_dointvec_minmax);
-+	set_table_entry(&table[5], "flags",		  &sd->flags,		    sizeof(int),  0644, proc_dointvec_minmax);
-+	set_table_entry(&table[6], "max_newidle_lb_cost", &sd->max_newidle_lb_cost, sizeof(long), 0644, proc_doulongvec_minmax);
-+	set_table_entry(&table[7], "name",		  sd->name,	       CORENAME_MAX_SIZE, 0444, proc_dostring);
-+	/* &table[8] is terminator */
-+
-+	return table;
-+}
-+
-+static struct ctl_table *sd_alloc_ctl_cpu_table(int cpu)
-+{
-+	struct ctl_table *entry, *table;
-+	struct sched_domain *sd;
-+	int domain_num = 0, i;
-+	char buf[32];
-+
-+	for_each_domain(cpu, sd)
-+		domain_num++;
-+	entry = table = sd_alloc_ctl_entry(domain_num + 1);
-+	if (table == NULL)
-+		return NULL;
-+
-+	i = 0;
-+	for_each_domain(cpu, sd) {
-+		snprintf(buf, 32, "domain%d", i);
-+		entry->procname = kstrdup(buf, GFP_KERNEL);
-+		entry->mode = 0555;
-+		entry->child = sd_alloc_ctl_domain_table(sd);
-+		entry++;
-+		i++;
-+	}
-+	return table;
-+}
-+
-+static cpumask_var_t sd_sysctl_cpus;
-+static struct ctl_table_header *sd_sysctl_header;
-+
-+void register_sched_domain_sysctl(void)
-+{
-+	static struct ctl_table *cpu_entries;
-+	static struct ctl_table **cpu_idx;
-+	char buf[32];
-+	int i;
-+
-+	if (!cpu_entries) {
-+		cpu_entries = sd_alloc_ctl_entry(num_possible_cpus() + 1);
-+		if (!cpu_entries)
-+			return;
-+
-+		WARN_ON(sd_ctl_dir[0].child);
-+		sd_ctl_dir[0].child = cpu_entries;
-+	}
-+
-+	if (!cpu_idx) {
-+		struct ctl_table *e = cpu_entries;
-+
-+		cpu_idx = kcalloc(nr_cpu_ids, sizeof(struct ctl_table*), GFP_KERNEL);
-+		if (!cpu_idx)
-+			return;
-+
-+		/* deal with sparse possible map */
-+		for_each_possible_cpu(i) {
-+			cpu_idx[i] = e;
-+			e++;
-+		}
-+	}
-+
-+	if (!cpumask_available(sd_sysctl_cpus)) {
-+		if (!alloc_cpumask_var(&sd_sysctl_cpus, GFP_KERNEL))
-+			return;
-+
-+		/* init to possible to not have holes in @cpu_entries */
-+		cpumask_copy(sd_sysctl_cpus, cpu_possible_mask);
-+	}
-+
-+	for_each_cpu(i, sd_sysctl_cpus) {
-+		struct ctl_table *e = cpu_idx[i];
-+
-+		if (e->child)
-+			sd_free_ctl_entry(&e->child);
-+
-+		if (!e->procname) {
-+			snprintf(buf, 32, "cpu%d", i);
-+			e->procname = kstrdup(buf, GFP_KERNEL);
-+		}
-+		e->mode = 0555;
-+		e->child = sd_alloc_ctl_cpu_table(i);
-+
-+		__cpumask_clear_cpu(i, sd_sysctl_cpus);
-+	}
-+
-+	WARN_ON(sd_sysctl_header);
-+	sd_sysctl_header = register_sysctl_table(sd_ctl_root);
-+}
-+
-+void dirty_sched_domain_sysctl(int cpu)
-+{
-+	if (cpumask_available(sd_sysctl_cpus))
-+		__cpumask_set_cpu(cpu, sd_sysctl_cpus);
-+}
-+
-+/* may be called multiple times per register */
-+void unregister_sched_domain_sysctl(void)
-+{
-+	unregister_sysctl_table(sd_sysctl_header);
-+	sd_sysctl_header = NULL;
-+}
-+#endif /* CONFIG_SYSCTL */
-+
-+void set_rq_online(struct rq *rq)
-+{
-+	if (!rq->online) {
-+		cpumask_set_cpu(cpu_of(rq), rq->rd->online);
-+		rq->online = true;
-+	}
-+}
-+
-+void set_rq_offline(struct rq *rq)
-+{
-+	if (rq->online) {
-+		int cpu = cpu_of(rq);
-+
-+		cpumask_clear_cpu(cpu, rq->rd->online);
-+		rq->online = false;
-+		clear_cpuidle_map(cpu);
-+	}
-+}
-+
-+/*
-+ * used to mark begin/end of suspend/resume:
-+ */
-+static int num_cpus_frozen;
-+
-+/*
-+ * Update cpusets according to cpu_active mask.  If cpusets are
-+ * disabled, cpuset_update_active_cpus() becomes a simple wrapper
-+ * around partition_sched_domains().
-+ *
-+ * If we come here as part of a suspend/resume, don't touch cpusets because we
-+ * want to restore it back to its original state upon resume anyway.
-+ */
-+static void cpuset_cpu_active(void)
-+{
-+	if (cpuhp_tasks_frozen) {
-+		/*
-+		 * num_cpus_frozen tracks how many CPUs are involved in suspend
-+		 * resume sequence. As long as this is not the last online
-+		 * operation in the resume sequence, just build a single sched
-+		 * domain, ignoring cpusets.
-+		 */
-+		partition_sched_domains(1, NULL, NULL);
-+		if (--num_cpus_frozen)
-+			return;
-+		/*
-+		 * This is the last CPU online operation. So fall through and
-+		 * restore the original sched domains by considering the
-+		 * cpuset configurations.
-+		 */
-+		cpuset_force_rebuild();
-+	}
-+
-+	cpuset_update_active_cpus();
-+}
-+
-+static int cpuset_cpu_inactive(unsigned int cpu)
-+{
-+	if (!cpuhp_tasks_frozen) {
-+		cpuset_update_active_cpus();
-+	} else {
-+		num_cpus_frozen++;
-+		partition_sched_domains(1, NULL, NULL);
-+	}
-+	return 0;
-+}
-+
-+int sched_cpu_activate(unsigned int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	struct rq_flags rf;
-+
-+#ifdef CONFIG_SCHED_SMT
-+	/*
-+	 * When going up, increment the number of cores with SMT present.
-+	 */
-+	if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
-+		static_branch_inc_cpuslocked(&sched_smt_present);
-+#endif
-+	set_cpu_active(cpu, true);
-+
-+	if (sched_smp_initialized) {
-+		sched_domains_numa_masks_set(cpu);
-+		cpuset_cpu_active();
-+	}
-+
-+	/*
-+	 * Put the rq online, if not already. This happens:
-+	 *
-+	 * 1) In the early boot process, because we build the real domains
-+	 *    after all CPUs have been brought up.
-+	 *
-+	 * 2) At runtime, if cpuset_cpu_active() fails to rebuild the
-+	 *    domains.
-+	 */
-+	rq_lock_irqsave(rq, &rf);
-+	if (rq->rd) {
-+		BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
-+		set_rq_online(rq);
-+	}
-+	unbind_zero(cpu);
-+	rq_unlock_irqrestore(rq, &rf);
-+
-+	return 0;
-+}
-+
-+int sched_cpu_deactivate(unsigned int cpu)
-+{
-+	int ret;
-+
-+	set_cpu_active(cpu, false);
-+	/*
-+	 * We've cleared cpu_active_mask, wait for all preempt-disabled and RCU
-+	 * users of this state to go away such that all new such users will
-+	 * observe it.
-+	 *
-+	 * Do sync before park smpboot threads to take care the rcu boost case.
-+	 */
-+	synchronize_rcu();
-+
-+#ifdef CONFIG_SCHED_SMT
-+	/*
-+	 * When going down, decrement the number of cores with SMT present.
-+	 */
-+	if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
-+		static_branch_dec_cpuslocked(&sched_smt_present);
-+#endif
-+
-+	if (!sched_smp_initialized)
-+		return 0;
-+
-+	ret = cpuset_cpu_inactive(cpu);
-+	if (ret) {
-+		set_cpu_active(cpu, true);
-+		return ret;
-+	}
-+	sched_domains_numa_masks_clear(cpu);
-+	return 0;
-+}
-+
-+int sched_cpu_starting(unsigned int cpu)
-+{
-+	sched_tick_start(cpu);
-+	return 0;
-+}
-+
-+#ifdef CONFIG_HOTPLUG_CPU
-+int sched_cpu_dying(unsigned int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	/* Handle pending wakeups and then migrate everything off */
-+	sched_tick_stop(cpu);
-+
-+	local_irq_save(flags);
-+	double_rq_lock(rq, cpu_rq(0));
-+	if (rq->rd) {
-+		BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
-+		set_rq_offline(rq);
-+	}
-+	bind_zero(cpu);
-+	double_rq_unlock(rq, cpu_rq(0));
-+	sched_start_tick(rq, cpu);
-+	hrexpiry_clear(rq);
-+	local_irq_restore(flags);
-+
-+	return 0;
-+}
-+#endif
-+
-+#if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_MC)
-+/*
-+ * Cheaper version of the below functions in case support for SMT and MC is
-+ * compiled in but CPUs have no siblings.
-+ */
-+static bool sole_cpu_idle(struct rq *rq)
-+{
-+	return rq_idle(rq);
-+}
-+#endif
-+#ifdef CONFIG_SCHED_SMT
-+static const cpumask_t *thread_cpumask(int cpu)
-+{
-+	return topology_sibling_cpumask(cpu);
-+}
-+/* All this CPU's SMT siblings are idle */
-+static bool siblings_cpu_idle(struct rq *rq)
-+{
-+	return cpumask_subset(&rq->thread_mask, &cpu_idle_map);
-+}
-+#endif
-+#ifdef CONFIG_SCHED_MC
-+static const cpumask_t *core_cpumask(int cpu)
-+{
-+	return topology_core_cpumask(cpu);
-+}
-+/* All this CPU's shared cache siblings are idle */
-+static bool cache_cpu_idle(struct rq *rq)
-+{
-+	return cpumask_subset(&rq->core_mask, &cpu_idle_map);
-+}
-+/* MC siblings CPU mask which share the same LLC */
-+static const cpumask_t *llc_core_cpumask(int cpu)
-+{
-+#ifdef CONFIG_X86
-+	return per_cpu(cpu_llc_shared_map, cpu);
-+#else
-+	return topology_core_cpumask(cpu);
-+#endif
-+}
-+#endif
-+
-+enum sched_domain_level {
-+	SD_LV_NONE = 0,
-+	SD_LV_SIBLING,
-+	SD_LV_MC,
-+	SD_LV_BOOK,
-+	SD_LV_CPU,
-+	SD_LV_NODE,
-+	SD_LV_ALLNODES,
-+	SD_LV_MAX
-+};
-+
-+/*
-+ * Set up the relative cache distance of each online cpu from each
-+ * other in a simple array for quick lookup. Locality is determined
-+ * by the closest sched_domain that CPUs are separated by. CPUs with
-+ * shared cache in SMT and MC are treated as local. Separate CPUs
-+ * (within the same package or physically) within the same node are
-+ * treated as not local. CPUs not even in the same domain (different
-+ * nodes) are treated as very distant.
-+ */
-+static void __init select_leaders(void)
-+{
-+	struct rq *rq, *other_rq, *leader;
-+	struct sched_domain *sd;
-+	int cpu, other_cpu;
-+#ifdef CONFIG_SCHED_SMT
-+	bool smt_threads = false;
-+#endif
-+
-+	for (cpu = 0; cpu < num_online_cpus(); cpu++) {
-+		rq = cpu_rq(cpu);
-+		leader = NULL;
-+		/* First check if this cpu is in the same node */
-+		for_each_domain(cpu, sd) {
-+			if (sd->level > SD_LV_MC)
-+				continue;
-+			if (rqshare != RQSHARE_ALL)
-+				leader = NULL;
-+			/* Set locality to local node if not already found lower */
-+			for_each_cpu(other_cpu, sched_domain_span(sd)) {
-+				if (rqshare >= RQSHARE_SMP) {
-+					other_rq = cpu_rq(other_cpu);
-+
-+					/* Set the smp_leader to the first CPU */
-+					if (!leader)
-+						leader = rq;
-+					if (!other_rq->smp_leader)
-+						other_rq->smp_leader = leader;
-+				}
-+				if (rq->cpu_locality[other_cpu] > LOCALITY_SMP)
-+					rq->cpu_locality[other_cpu] = LOCALITY_SMP;
-+			}
-+		}
-+
-+		/*
-+		 * Each runqueue has its own function in case it doesn't have
-+		 * siblings of its own allowing mixed topologies.
-+		 */
-+#ifdef CONFIG_SCHED_MC
-+		leader = NULL;
-+		if (cpumask_weight(core_cpumask(cpu)) > 1) {
-+			cpumask_copy(&rq->core_mask, llc_core_cpumask(cpu));
-+			cpumask_clear_cpu(cpu, &rq->core_mask);
-+			for_each_cpu(other_cpu, core_cpumask(cpu)) {
-+				if (rqshare == RQSHARE_MC ||
-+					(rqshare == RQSHARE_MC_LLC && cpumask_test_cpu(other_cpu, llc_core_cpumask(cpu)))) {
-+					other_rq = cpu_rq(other_cpu);
-+
-+					/* Set the mc_leader to the first CPU */
-+					if (!leader)
-+						leader = rq;
-+					if (!other_rq->mc_leader)
-+						other_rq->mc_leader = leader;
-+				}
-+				if (rq->cpu_locality[other_cpu] > LOCALITY_MC) {
-+					/* this is to get LLC into play even in case LLC sharing is not used */
-+					if (cpumask_test_cpu(other_cpu, llc_core_cpumask(cpu)))
-+						rq->cpu_locality[other_cpu] = LOCALITY_MC_LLC;
-+					else
-+						rq->cpu_locality[other_cpu] = LOCALITY_MC;
-+				}
-+			}
-+			rq->cache_idle = cache_cpu_idle;
-+		}
-+#endif
-+#ifdef CONFIG_SCHED_SMT
-+		leader = NULL;
-+		if (cpumask_weight(thread_cpumask(cpu)) > 1) {
-+			cpumask_copy(&rq->thread_mask, thread_cpumask(cpu));
-+			cpumask_clear_cpu(cpu, &rq->thread_mask);
-+			for_each_cpu(other_cpu, thread_cpumask(cpu)) {
-+				if (rqshare == RQSHARE_SMT) {
-+					other_rq = cpu_rq(other_cpu);
-+
-+					/* Set the smt_leader to the first CPU */
-+					if (!leader)
-+						leader = rq;
-+					if (!other_rq->smt_leader)
-+						other_rq->smt_leader = leader;
-+				}
-+				if (rq->cpu_locality[other_cpu] > LOCALITY_SMT)
-+					rq->cpu_locality[other_cpu] = LOCALITY_SMT;
-+			}
-+			rq->siblings_idle = siblings_cpu_idle;
-+			smt_threads = true;
-+		}
-+#endif
-+	}
-+
-+#ifdef CONFIG_SMT_NICE
-+	if (smt_threads) {
-+		check_siblings = &check_smt_siblings;
-+		wake_siblings = &wake_smt_siblings;
-+		smt_schedule = &smt_should_schedule;
-+	}
-+#endif
-+
-+	for_each_online_cpu(cpu) {
-+		rq = cpu_rq(cpu);
-+		for_each_online_cpu(other_cpu) {
-+			printk(KERN_DEBUG "MuQSS locality CPU %d to %d: %d\n", cpu, other_cpu, rq->cpu_locality[other_cpu]);
-+		}
-+	}
-+}
-+
-+/* FIXME freeing locked spinlock */
-+static void __init share_and_free_rq(struct rq *leader, struct rq *rq)
-+{
-+	WARN_ON(rq->nr_running > 0);
-+
-+	kfree(rq->node);
-+	kfree(rq->sl);
-+	kfree(rq->lock);
-+	rq->node = leader->node;
-+	rq->sl = leader->sl;
-+	rq->lock = leader->lock;
-+	rq->is_leader = false;
-+	barrier();
-+	/* To make up for not unlocking the freed runlock */
-+	preempt_enable();
-+}
-+
-+static void __init share_rqs(void)
-+{
-+	struct rq *rq, *leader;
-+	int cpu;
-+
-+	for_each_online_cpu(cpu) {
-+		rq = cpu_rq(cpu);
-+		leader = rq->smp_leader;
-+
-+		rq_lock(rq);
-+		if (leader && rq != leader) {
-+			printk(KERN_INFO "MuQSS sharing SMP runqueue from CPU %d to CPU %d\n",
-+			       leader->cpu, rq->cpu);
-+			share_and_free_rq(leader, rq);
-+		} else
-+			rq_unlock(rq);
-+	}
-+
-+#ifdef CONFIG_SCHED_MC
-+	for_each_online_cpu(cpu) {
-+		rq = cpu_rq(cpu);
-+		leader = rq->mc_leader;
-+
-+		rq_lock(rq);
-+		if (leader && rq != leader) {
-+			printk(KERN_INFO "MuQSS sharing MC runqueue from CPU %d to CPU %d\n",
-+			       leader->cpu, rq->cpu);
-+			share_and_free_rq(leader, rq);
-+		} else
-+			rq_unlock(rq);
-+	}
-+#endif /* CONFIG_SCHED_MC */
-+
-+#ifdef CONFIG_SCHED_SMT
-+	for_each_online_cpu(cpu) {
-+		rq = cpu_rq(cpu);
-+		leader = rq->smt_leader;
-+
-+		rq_lock(rq);
-+		if (leader && rq != leader) {
-+			printk(KERN_INFO "MuQSS sharing SMT runqueue from CPU %d to CPU %d\n",
-+			       leader->cpu, rq->cpu);
-+			share_and_free_rq(leader, rq);
-+		} else
-+			rq_unlock(rq);
-+	}
-+#endif /* CONFIG_SCHED_SMT */
-+}
-+
-+static void __init setup_rq_orders(void)
-+{
-+	int *selected_cpus, *ordered_cpus;
-+	struct rq *rq, *other_rq;
-+	int cpu, other_cpu, i;
-+
-+	selected_cpus = kmalloc(sizeof(int) * NR_CPUS, GFP_ATOMIC);
-+	ordered_cpus = kmalloc(sizeof(int) * NR_CPUS, GFP_ATOMIC);
-+
-+	total_runqueues = 0;
-+	for_each_online_cpu(cpu) {
-+		int locality, total_rqs = 0, total_cpus = 0;
-+
-+		rq = cpu_rq(cpu);
-+		if (rq->is_leader)
-+			total_runqueues++;
-+
-+		for (locality = LOCALITY_SAME; locality <= LOCALITY_DISTANT; locality++) {
-+			int selected_cpu_cnt, selected_cpu_idx, test_cpu_idx, cpu_idx, best_locality, test_cpu;
-+			int ordered_cpus_idx;
-+
-+			ordered_cpus_idx = -1;
-+			selected_cpu_cnt = 0;
-+
-+			for_each_online_cpu(test_cpu) {
-+				if (cpu < num_online_cpus() / 2)
-+					other_cpu = cpu + test_cpu;
-+				else
-+					other_cpu = cpu - test_cpu;
-+				if (other_cpu < 0)
-+					other_cpu += num_online_cpus();
-+				else
-+					other_cpu %= num_online_cpus();
-+				/* gather CPUs of the same locality */
-+				if (rq->cpu_locality[other_cpu] == locality) {
-+					selected_cpus[selected_cpu_cnt] = other_cpu;
-+					selected_cpu_cnt++;
-+				}
-+			}
-+
-+			/* reserve first CPU as starting point */
-+			if (selected_cpu_cnt > 0) {
-+				ordered_cpus_idx++;
-+				ordered_cpus[ordered_cpus_idx] = selected_cpus[ordered_cpus_idx];
-+				selected_cpus[ordered_cpus_idx] = -1;
-+			}
-+
-+			/* take each CPU and sort it within the same locality based on each inter-CPU localities */
-+			for (test_cpu_idx = 1; test_cpu_idx < selected_cpu_cnt; test_cpu_idx++) {
-+				/* starting point with worst locality and current CPU */
-+				best_locality = LOCALITY_DISTANT;
-+				selected_cpu_idx = test_cpu_idx;
-+
-+				/* try to find the best locality within group */
-+				for (cpu_idx = 1; cpu_idx < selected_cpu_cnt; cpu_idx++) {
-+					/* if CPU has not been used and locality is better */
-+					if (selected_cpus[cpu_idx] > -1) {
-+						other_rq = cpu_rq(ordered_cpus[ordered_cpus_idx]);
-+						if (best_locality > other_rq->cpu_locality[selected_cpus[cpu_idx]]) {
-+							/* assign best locality and best CPU idx in array */
-+							best_locality = other_rq->cpu_locality[selected_cpus[cpu_idx]];
-+							selected_cpu_idx = cpu_idx;
-+						}
-+					}
-+				}
-+
-+				/* add our next best CPU to ordered list */
-+				ordered_cpus_idx++;
-+				ordered_cpus[ordered_cpus_idx] = selected_cpus[selected_cpu_idx];
-+				/* mark this CPU as used */
-+				selected_cpus[selected_cpu_idx] =  -1;
-+			}
-+
-+			/* set up RQ and CPU orders */
-+			for (test_cpu = 0; test_cpu <= ordered_cpus_idx; test_cpu++) {
-+				other_rq = cpu_rq(ordered_cpus[test_cpu]);
-+				/* set up cpu orders */
-+				rq->cpu_order[total_cpus++] = other_rq;
-+				if (other_rq->is_leader) {
-+					/* set up RQ orders */
-+					rq->rq_order[total_rqs++] = other_rq;
-+				}
-+			}
-+		}
-+	}
-+
-+	kfree(selected_cpus);
-+	kfree(ordered_cpus);
-+
-+#ifdef CONFIG_X86
-+	for_each_online_cpu(cpu) {
-+		rq = cpu_rq(cpu);
-+		for (i = 0; i < total_runqueues; i++) {
-+			printk(KERN_DEBUG "MuQSS CPU %d llc %d RQ order %d RQ %d llc %d\n", cpu, per_cpu(cpu_llc_id, cpu), i,
-+			       rq->rq_order[i]->cpu, per_cpu(cpu_llc_id, rq->rq_order[i]->cpu));
-+		}
-+	}
-+
-+	for_each_online_cpu(cpu) {
-+		rq = cpu_rq(cpu);
-+		for (i = 0; i < num_online_cpus(); i++) {
-+			printk(KERN_DEBUG "MuQSS CPU %d llc %d CPU order %d RQ %d llc %d\n", cpu, per_cpu(cpu_llc_id, cpu), i,
-+			       rq->cpu_order[i]->cpu, per_cpu(cpu_llc_id, rq->cpu_order[i]->cpu));
-+		}
-+	}
-+#endif
-+}
-+
-+void __init sched_init_smp(void)
-+{
-+	sched_init_numa();
-+
-+	/*
-+	 * There's no userspace yet to cause hotplug operations; hence all the
-+	 * cpu masks are stable and all blatant races in the below code cannot
-+	 * happen.
-+	 */
-+	mutex_lock(&sched_domains_mutex);
-+	sched_init_domains(cpu_active_mask);
-+	mutex_unlock(&sched_domains_mutex);
-+
-+	/* Move init over to a non-isolated CPU */
-+	if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0)
-+		BUG();
-+
-+	local_irq_disable();
-+	mutex_lock(&sched_domains_mutex);
-+	lock_all_rqs();
-+
-+	printk(KERN_INFO "MuQSS possible/present/online CPUs: %d/%d/%d\n",
-+		num_possible_cpus(), num_present_cpus(), num_online_cpus());
-+
-+	select_leaders();
-+
-+	unlock_all_rqs();
-+	mutex_unlock(&sched_domains_mutex);
-+
-+	share_rqs();
-+
-+	local_irq_enable();
-+
-+	setup_rq_orders();
-+
-+	switch (rqshare) {
-+		case RQSHARE_ALL:
-+			/* This should only ever read 1 */
-+			printk(KERN_INFO "MuQSS runqueue share type ALL total runqueues: %d\n",
-+			       total_runqueues);
-+			break;
-+		case RQSHARE_SMP:
-+			printk(KERN_INFO "MuQSS runqueue share type SMP total runqueues: %d\n",
-+			       total_runqueues);
-+			break;
-+		case RQSHARE_MC:
-+			printk(KERN_INFO "MuQSS runqueue share type MC total runqueues: %d\n",
-+			       total_runqueues);
-+			break;
-+		case RQSHARE_MC_LLC:
-+			printk(KERN_INFO "MuQSS runqueue share type LLC total runqueues: %d\n",
-+			       total_runqueues);
-+			break;
-+		case RQSHARE_SMT:
-+			printk(KERN_INFO "MuQSS runqueue share type SMT total runqueues: %d\n",
-+			       total_runqueues);
-+			break;
-+		case RQSHARE_NONE:
-+			printk(KERN_INFO "MuQSS runqueue share type NONE total runqueues: %d\n",
-+			       total_runqueues);
-+			break;
-+	}
-+
-+	sched_smp_initialized = true;
-+}
-+#else
-+void __init sched_init_smp(void)
-+{
-+	sched_smp_initialized = true;
-+}
-+#endif /* CONFIG_SMP */
-+
-+int in_sched_functions(unsigned long addr)
-+{
-+	return in_lock_functions(addr) ||
-+		(addr >= (unsigned long)__sched_text_start
-+		&& addr < (unsigned long)__sched_text_end);
-+}
-+
-+#ifdef CONFIG_CGROUP_SCHED
-+/* task group related information */
-+struct task_group {
-+	struct cgroup_subsys_state css;
-+
-+	struct rcu_head rcu;
-+	struct list_head list;
-+
-+	struct task_group *parent;
-+	struct list_head siblings;
-+	struct list_head children;
-+};
-+
-+/*
-+ * Default task group.
-+ * Every task in system belongs to this group at bootup.
-+ */
-+struct task_group root_task_group;
-+LIST_HEAD(task_groups);
-+
-+/* Cacheline aligned slab cache for task_group */
-+static struct kmem_cache *task_group_cache __read_mostly;
-+#endif /* CONFIG_CGROUP_SCHED */
-+
-+void __init sched_init(void)
-+{
-+#ifdef CONFIG_SMP
-+	int cpu_ids;
-+#endif
-+	int i;
-+	struct rq *rq;
-+
-+	wait_bit_init();
-+
-+	prio_ratios[0] = 128;
-+	for (i = 1 ; i < NICE_WIDTH ; i++)
-+		prio_ratios[i] = prio_ratios[i - 1] * 11 / 10;
-+
-+	skiplist_node_init(&init_task.node);
-+
-+#ifdef CONFIG_SMP
-+	init_defrootdomain();
-+	cpumask_clear(&cpu_idle_map);
-+#else
-+	uprq = &per_cpu(runqueues, 0);
-+#endif
-+
-+#ifdef CONFIG_CGROUP_SCHED
-+	task_group_cache = KMEM_CACHE(task_group, 0);
-+
-+	list_add(&root_task_group.list, &task_groups);
-+	INIT_LIST_HEAD(&root_task_group.children);
-+	INIT_LIST_HEAD(&root_task_group.siblings);
-+#endif /* CONFIG_CGROUP_SCHED */
-+	for_each_possible_cpu(i) {
-+		rq = cpu_rq(i);
-+		rq->node = kmalloc(sizeof(skiplist_node), GFP_ATOMIC);
-+		skiplist_init(rq->node);
-+		rq->sl = new_skiplist(rq->node);
-+		rq->lock = kmalloc(sizeof(raw_spinlock_t), GFP_ATOMIC);
-+		raw_spin_lock_init(rq->lock);
-+		rq->nr_running = 0;
-+		rq->nr_uninterruptible = 0;
-+		rq->nr_switches = 0;
-+		rq->clock = rq->old_clock = rq->last_niffy = rq->niffies = 0;
-+		rq->last_jiffy = jiffies;
-+		rq->user_ns = rq->nice_ns = rq->softirq_ns = rq->system_ns =
-+			      rq->iowait_ns = rq->idle_ns = 0;
-+		rq->dither = 0;
-+		set_rq_task(rq, &init_task);
-+		rq->iso_ticks = 0;
-+		rq->iso_refractory = false;
-+#ifdef CONFIG_SMP
-+		rq->is_leader = true;
-+		rq->smp_leader = NULL;
-+#ifdef CONFIG_SCHED_MC
-+		rq->mc_leader = NULL;
-+#endif
-+#ifdef CONFIG_SCHED_SMT
-+		rq->smt_leader = NULL;
-+#endif
-+		rq->sd = NULL;
-+		rq->rd = NULL;
-+		rq->online = false;
-+		rq->cpu = i;
-+		rq_attach_root(rq, &def_root_domain);
-+#endif
-+		init_rq_hrexpiry(rq);
-+		atomic_set(&rq->nr_iowait, 0);
-+	}
-+
-+#ifdef CONFIG_SMP
-+	cpu_ids = i;
-+	/*
-+	 * Set the base locality for cpu cache distance calculation to
-+	 * "distant" (3). Make sure the distance from a CPU to itself is 0.
-+	 */
-+	for_each_possible_cpu(i) {
-+		int j;
-+
-+		rq = cpu_rq(i);
-+#ifdef CONFIG_SCHED_SMT
-+		rq->siblings_idle = sole_cpu_idle;
-+#endif
-+#ifdef CONFIG_SCHED_MC
-+		rq->cache_idle = sole_cpu_idle;
-+#endif
-+		rq->cpu_locality = kmalloc(cpu_ids * sizeof(int *), GFP_ATOMIC);
-+		for_each_possible_cpu(j) {
-+			if (i == j)
-+				rq->cpu_locality[j] = LOCALITY_SAME;
-+			else
-+				rq->cpu_locality[j] = LOCALITY_DISTANT;
-+		}
-+		rq->rq_order = kmalloc(cpu_ids * sizeof(struct rq *), GFP_ATOMIC);
-+		rq->cpu_order = kmalloc(cpu_ids * sizeof(struct rq *), GFP_ATOMIC);
-+		rq->rq_order[0] = rq->cpu_order[0] = rq;
-+		for (j = 1; j < cpu_ids; j++)
-+			rq->rq_order[j] = rq->cpu_order[j] = cpu_rq(j);
-+	}
-+#endif
-+
-+	/*
-+	 * The boot idle thread does lazy MMU switching as well:
-+	 */
-+	mmgrab(&init_mm);
-+	enter_lazy_tlb(&init_mm, current);
-+
-+	/*
-+	 * Make us the idle thread. Technically, schedule() should not be
-+	 * called from this thread, however somewhere below it might be,
-+	 * but because we are the idle thread, we just pick up running again
-+	 * when this runqueue becomes "idle".
-+	 */
-+	init_idle(current, smp_processor_id());
-+
-+#ifdef CONFIG_SMP
-+	idle_thread_set_boot_cpu();
-+#endif /* SMP */
-+
-+	init_schedstats();
-+
-+	psi_init();
-+}
-+
-+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-+static inline int preempt_count_equals(int preempt_offset)
-+{
-+	int nested = preempt_count() + rcu_preempt_depth();
-+
-+	return (nested == preempt_offset);
-+}
-+
-+void __might_sleep(const char *file, int line, int preempt_offset)
-+{
-+	/*
-+	 * Blocking primitives will set (and therefore destroy) current->state,
-+	 * since we will exit with TASK_RUNNING make sure we enter with it,
-+	 * otherwise we will destroy state.
-+	 */
-+	WARN_ONCE(current->state != TASK_RUNNING && current->task_state_change,
-+			"do not call blocking ops when !TASK_RUNNING; "
-+			"state=%lx set at [<%p>] %pS\n",
-+			current->state,
-+			(void *)current->task_state_change,
-+			(void *)current->task_state_change);
-+
-+	___might_sleep(file, line, preempt_offset);
-+}
-+EXPORT_SYMBOL(__might_sleep);
-+
-+void __cant_sleep(const char *file, int line, int preempt_offset)
-+{
-+	static unsigned long prev_jiffy;
-+
-+	if (irqs_disabled())
-+		return;
-+
-+	if (!IS_ENABLED(CONFIG_PREEMPT_COUNT))
-+		return;
-+
-+	if (preempt_count() > preempt_offset)
-+		return;
-+
-+	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
-+		return;
-+	prev_jiffy = jiffies;
-+
-+	printk(KERN_ERR "BUG: assuming atomic context at %s:%d\n", file, line);
-+	printk(KERN_ERR "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
-+			in_atomic(), irqs_disabled(),
-+			current->pid, current->comm);
-+
-+	debug_show_held_locks(current);
-+	dump_stack();
-+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+}
-+EXPORT_SYMBOL_GPL(__cant_sleep);
-+
-+void ___might_sleep(const char *file, int line, int preempt_offset)
-+{
-+	/* Ratelimiting timestamp: */
-+	static unsigned long prev_jiffy;
-+
-+	unsigned long preempt_disable_ip;
-+
-+	/* WARN_ON_ONCE() by default, no rate limit required: */
-+	rcu_sleep_check();
-+
-+	if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
-+	     !is_idle_task(current) && !current->non_block_count) ||
-+	    system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING ||
-+	    oops_in_progress)
-+		return;
-+
-+	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
-+		return;
-+	prev_jiffy = jiffies;
-+
-+	/* Save this before calling printk(), since that will clobber it: */
-+	preempt_disable_ip = get_preempt_disable_ip(current);
-+
-+	printk(KERN_ERR
-+		"BUG: sleeping function called from invalid context at %s:%d\n",
-+			file, line);
-+	printk(KERN_ERR
-+		"in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n",
-+			in_atomic(), irqs_disabled(), current->non_block_count,
-+			current->pid, current->comm);
-+
-+	if (task_stack_end_corrupted(current))
-+		printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");
-+
-+	debug_show_held_locks(current);
-+	if (irqs_disabled())
-+		print_irqtrace_events(current);
-+	if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)
-+	    && !preempt_count_equals(preempt_offset)) {
-+		pr_err("Preemption disabled at:");
-+		print_ip_sym(KERN_ERR, preempt_disable_ip);
-+	}
-+	dump_stack();
-+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+}
-+EXPORT_SYMBOL(___might_sleep);
-+#endif
-+
-+#ifdef CONFIG_MAGIC_SYSRQ
-+static inline void normalise_rt_tasks(void)
-+{
-+	struct sched_attr attr = {};
-+	struct task_struct *g, *p;
-+	struct rq_flags rf;
-+	struct rq *rq;
-+
-+	read_lock(&tasklist_lock);
-+	for_each_process_thread(g, p) {
-+		/*
-+		 * Only normalize user tasks:
-+		 */
-+		if (p->flags & PF_KTHREAD)
-+			continue;
-+
-+		if (!rt_task(p) && !iso_task(p))
-+			continue;
-+
-+		rq = task_rq_lock(p, &rf);
-+		__setscheduler(p, rq, SCHED_NORMAL, 0, &attr, false);
-+		task_rq_unlock(rq, p, &rf);
-+	}
-+	read_unlock(&tasklist_lock);
-+}
-+
-+void normalize_rt_tasks(void)
-+{
-+	normalise_rt_tasks();
-+}
-+#endif /* CONFIG_MAGIC_SYSRQ */
-+
-+#if defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB)
-+/*
-+ * These functions are only useful for the IA64 MCA handling, or kdb.
-+ *
-+ * They can only be called when the whole system has been
-+ * stopped - every CPU needs to be quiescent, and no scheduling
-+ * activity can take place. Using them for anything else would
-+ * be a serious bug, and as a result, they aren't even visible
-+ * under any other configuration.
-+ */
-+
-+/**
-+ * curr_task - return the current task for a given CPU.
-+ * @cpu: the processor in question.
-+ *
-+ * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
-+ *
-+ * Return: The current task for @cpu.
-+ */
-+struct task_struct *curr_task(int cpu)
-+{
-+	return cpu_curr(cpu);
-+}
-+
-+#endif /* defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB) */
-+
-+#ifdef CONFIG_IA64
-+/**
-+ * ia64_set_curr_task - set the current task for a given CPU.
-+ * @cpu: the processor in question.
-+ * @p: the task pointer to set.
-+ *
-+ * Description: This function must only be used when non-maskable interrupts
-+ * are serviced on a separate stack.  It allows the architecture to switch the
-+ * notion of the current task on a CPU in a non-blocking manner.  This function
-+ * must be called with all CPU's synchronised, and interrupts disabled, the
-+ * and caller must save the original value of the current task (see
-+ * curr_task() above) and restore that value before reenabling interrupts and
-+ * re-starting the system.
-+ *
-+ * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
-+ */
-+void ia64_set_curr_task(int cpu, struct task_struct *p)
-+{
-+	cpu_curr(cpu) = p;
-+}
-+
-+#endif
-+
-+void init_idle_bootup_task(struct task_struct *idle)
-+{}
-+
-+#ifdef CONFIG_SCHED_DEBUG
-+__read_mostly bool sched_debug_enabled;
-+
-+void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
-+			  struct seq_file *m)
-+{
-+	seq_printf(m, "%s (%d, #threads: %d)\n", p->comm, task_pid_nr_ns(p, ns),
-+		   get_nr_threads(p));
-+}
-+
-+void proc_sched_set_task(struct task_struct *p)
-+{}
-+#endif
-+
-+#ifdef CONFIG_CGROUP_SCHED
-+static void sched_free_group(struct task_group *tg)
-+{
-+	kmem_cache_free(task_group_cache, tg);
-+}
-+
-+/* allocate runqueue etc for a new task group */
-+struct task_group *sched_create_group(struct task_group *parent)
-+{
-+	struct task_group *tg;
-+
-+	tg = kmem_cache_alloc(task_group_cache, GFP_KERNEL | __GFP_ZERO);
-+	if (!tg)
-+		return ERR_PTR(-ENOMEM);
-+
-+	return tg;
-+}
-+
-+void sched_online_group(struct task_group *tg, struct task_group *parent)
-+{
-+}
-+
-+/* rcu callback to free various structures associated with a task group */
-+static void sched_free_group_rcu(struct rcu_head *rhp)
-+{
-+	/* Now it should be safe to free those cfs_rqs */
-+	sched_free_group(container_of(rhp, struct task_group, rcu));
-+}
-+
-+void sched_destroy_group(struct task_group *tg)
-+{
-+	/* Wait for possible concurrent references to cfs_rqs complete */
-+	call_rcu(&tg->rcu, sched_free_group_rcu);
-+}
-+
-+void sched_offline_group(struct task_group *tg)
-+{
-+}
-+
-+static inline struct task_group *css_tg(struct cgroup_subsys_state *css)
-+{
-+	return css ? container_of(css, struct task_group, css) : NULL;
-+}
-+
-+static struct cgroup_subsys_state *
-+cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
-+{
-+	struct task_group *parent = css_tg(parent_css);
-+	struct task_group *tg;
-+
-+	if (!parent) {
-+		/* This is early initialization for the top cgroup */
-+		return &root_task_group.css;
-+	}
-+
-+	tg = sched_create_group(parent);
-+	if (IS_ERR(tg))
-+		return ERR_PTR(-ENOMEM);
-+	return &tg->css;
-+}
-+
-+/* Expose task group only after completing cgroup initialization */
-+static int cpu_cgroup_css_online(struct cgroup_subsys_state *css)
-+{
-+	struct task_group *tg = css_tg(css);
-+	struct task_group *parent = css_tg(css->parent);
-+
-+	if (parent)
-+		sched_online_group(tg, parent);
-+	return 0;
-+}
-+
-+static void cpu_cgroup_css_released(struct cgroup_subsys_state *css)
-+{
-+	struct task_group *tg = css_tg(css);
-+
-+	sched_offline_group(tg);
-+}
-+
-+static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
-+{
-+	struct task_group *tg = css_tg(css);
-+
-+	/*
-+	 * Relies on the RCU grace period between css_released() and this.
-+	 */
-+	sched_free_group(tg);
-+}
-+
-+static void cpu_cgroup_fork(struct task_struct *task)
-+{
-+}
-+
-+static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
-+{
-+	return 0;
-+}
-+
-+static void cpu_cgroup_attach(struct cgroup_taskset *tset)
-+{
-+}
-+
-+static struct cftype cpu_legacy_files[] = {
-+	{ }	/* Terminate */
-+};
-+
-+static struct cftype cpu_files[] = {
-+	{ }	/* terminate */
-+};
-+
-+static int cpu_extra_stat_show(struct seq_file *sf,
-+			       struct cgroup_subsys_state *css)
-+{
-+	return 0;
-+}
-+
-+struct cgroup_subsys cpu_cgrp_subsys = {
-+	.css_alloc	= cpu_cgroup_css_alloc,
-+	.css_online	= cpu_cgroup_css_online,
-+	.css_released	= cpu_cgroup_css_released,
-+	.css_free	= cpu_cgroup_css_free,
-+	.css_extra_stat_show = cpu_extra_stat_show,
-+	.fork		= cpu_cgroup_fork,
-+	.can_attach	= cpu_cgroup_can_attach,
-+	.attach		= cpu_cgroup_attach,
-+	.legacy_cftypes	= cpu_files,
-+	.legacy_cftypes	= cpu_legacy_files,
-+	.dfl_cftypes	= cpu_files,
-+	.early_init	= true,
-+	.threaded	= true,
-+};
-+#endif	/* CONFIG_CGROUP_SCHED */
-+
-+void call_trace_sched_update_nr_running(struct rq *rq, int count)
-+{
-+        trace_sched_update_nr_running_tp(rq, count);
-+}
-+
-+/* CFS Compat */
-+#ifdef CONFIG_RCU_TORTURE_TEST
-+int sysctl_sched_rt_runtime;
-+#endif
-diff --git a/kernel/sched/MuQSS.h b/kernel/sched/MuQSS.h
-new file mode 100644
-index 000000000000..09a1f2fe64ba
---- /dev/null
-+++ b/kernel/sched/MuQSS.h
-@@ -0,0 +1,1070 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef MUQSS_SCHED_H
-+#define MUQSS_SCHED_H
-+
-+#include <linux/sched/clock.h>
-+#include <linux/sched/cpufreq.h>
-+#include <linux/sched/cputime.h>
-+#include <linux/sched/deadline.h>
-+#include <linux/sched/debug.h>
-+#include <linux/sched/hotplug.h>
-+#include <linux/sched/init.h>
-+#include <linux/sched/isolation.h>
-+#include <linux/sched/mm.h>
-+#include <linux/sched/nohz.h>
-+#include <linux/sched/signal.h>
-+#include <linux/sched/smt.h>
-+#include <linux/sched/stat.h>
-+#include <linux/sched/task.h>
-+#include <linux/sched/task_stack.h>
-+#include <linux/sched/topology.h>
-+#include <linux/sched/wake_q.h>
-+
-+#include <uapi/linux/sched/types.h>
-+
-+#include <linux/cgroup.h>
-+#include <linux/cpufreq.h>
-+#include <linux/cpuidle.h>
-+#include <linux/cpuset.h>
-+#include <linux/ctype.h>
-+#include <linux/energy_model.h>
-+#include <linux/freezer.h>
-+#include <linux/kernel_stat.h>
-+#include <linux/kthread.h>
-+#include <linux/membarrier.h>
-+#include <linux/livepatch.h>
-+#include <linux/proc_fs.h>
-+#include <linux/psi.h>
-+#include <linux/sched.h>
-+#include <linux/slab.h>
-+#include <linux/skip_list.h>
-+#include <linux/stop_machine.h>
-+#include <linux/suspend.h>
-+#include <linux/swait.h>
-+#include <linux/syscalls.h>
-+#include <linux/tick.h>
-+#include <linux/tsacct_kern.h>
-+#include <linux/u64_stats_sync.h>
-+
-+#ifdef CONFIG_PARAVIRT
-+#include <asm/paravirt.h>
-+#endif
-+
-+#include "cpupri.h"
-+
-+#include <trace/events/sched.h>
-+
-+#ifdef CONFIG_SCHED_DEBUG
-+# define SCHED_WARN_ON(x)	WARN_ONCE(x, #x)
-+#else
-+# define SCHED_WARN_ON(x)	((void)(x))
-+#endif
-+
-+/*
-+ * wake flags
-+ */
-+#define WF_SYNC			0x01		/* waker goes to sleep after wakeup */
-+#define WF_FORK			0x02		/* child wakeup after fork */
-+#define WF_MIGRATED		0x04		/* internal use, task got migrated */
-+#define WF_ON_CPU		0x08		/* Wakee is on_cpu */
-+
-+/* task_struct::on_rq states: */
-+#define TASK_ON_RQ_QUEUED	1
-+#define TASK_ON_RQ_MIGRATING	2
-+
-+extern void call_trace_sched_update_nr_running(struct rq *rq, int count);
-+
-+struct rq;
-+
-+#ifdef CONFIG_SMP
-+
-+static inline bool sched_asym_prefer(int a, int b)
-+{
-+	return arch_asym_cpu_priority(a) > arch_asym_cpu_priority(b);
-+}
-+
-+struct perf_domain {
-+	struct em_perf_domain *em_pd;
-+	struct perf_domain *next;
-+	struct rcu_head rcu;
-+};
-+
-+/* Scheduling group status flags */
-+#define SG_OVERLOAD		0x1 /* More than one runnable task on a CPU. */
-+#define SG_OVERUTILIZED		0x2 /* One or more CPUs are over-utilized. */
-+
-+/*
-+ * We add the notion of a root-domain which will be used to define per-domain
-+ * variables. Each exclusive cpuset essentially defines an island domain by
-+ * fully partitioning the member cpus from any other cpuset. Whenever a new
-+ * exclusive cpuset is created, we also create and attach a new root-domain
-+ * object.
-+ *
-+ */
-+struct root_domain {
-+	atomic_t refcount;
-+	atomic_t rto_count;
-+	struct rcu_head rcu;
-+	cpumask_var_t span;
-+	cpumask_var_t online;
-+
-+	/*
-+	 * Indicate pullable load on at least one CPU, e.g:
-+	 * - More than one runnable task
-+	 * - Running task is misfit
-+	 */
-+	int			overload;
-+
-+	/* Indicate one or more cpus over-utilized (tipping point) */
-+	int			overutilized;
-+
-+	/*
-+	 * The bit corresponding to a CPU gets set here if such CPU has more
-+	 * than one runnable -deadline task (as it is below for RT tasks).
-+	 */
-+	cpumask_var_t dlo_mask;
-+	atomic_t dlo_count;
-+	/* Replace unused CFS structures with void */
-+	//struct dl_bw dl_bw;
-+	//struct cpudl cpudl;
-+	void *dl_bw;
-+	void *cpudl;
-+
-+	/*
-+	 * The "RT overload" flag: it gets set if a CPU has more than
-+	 * one runnable RT task.
-+	 */
-+	cpumask_var_t rto_mask;
-+	//struct cpupri cpupri;
-+	void *cpupri;
-+
-+	unsigned long max_cpu_capacity;
-+
-+	/*
-+	 * NULL-terminated list of performance domains intersecting with the
-+	 * CPUs of the rd. Protected by RCU.
-+	 */
-+	struct perf_domain	*pd;
-+};
-+
-+extern void init_defrootdomain(void);
-+extern int sched_init_domains(const struct cpumask *cpu_map);
-+extern void rq_attach_root(struct rq *rq, struct root_domain *rd);
-+
-+static inline void cpupri_cleanup(void __maybe_unused *cpupri)
-+{
-+}
-+
-+static inline void cpudl_cleanup(void __maybe_unused *cpudl)
-+{
-+}
-+
-+static inline void init_dl_bw(void __maybe_unused *dl_bw)
-+{
-+}
-+
-+static inline int cpudl_init(void __maybe_unused *dl_bw)
-+{
-+	return 0;
-+}
-+
-+static inline int cpupri_init(void __maybe_unused *cpupri)
-+{
-+	return 0;
-+}
-+#endif /* CONFIG_SMP */
-+
-+/*
-+ * This is the main, per-CPU runqueue data structure.
-+ * This data should only be modified by the local cpu.
-+ */
-+struct rq {
-+	raw_spinlock_t *lock;
-+	raw_spinlock_t *orig_lock;
-+
-+	struct task_struct __rcu	*curr;
-+	struct task_struct	*idle;
-+	struct task_struct	*stop;
-+	struct mm_struct *prev_mm;
-+
-+	unsigned int nr_running;
-+	/*
-+	 * This is part of a global counter where only the total sum
-+	 * over all CPUs matters. A task can increase this counter on
-+	 * one CPU and if it got migrated afterwards it may decrease
-+	 * it on another CPU. Always updated under the runqueue lock:
-+	 */
-+	unsigned long nr_uninterruptible;
-+#ifdef CONFIG_SMP
-+	unsigned int		ttwu_pending;
-+#endif
-+	u64 nr_switches;
-+
-+	/* Stored data about rq->curr to work outside rq lock */
-+	u64 rq_deadline;
-+	int rq_prio;
-+
-+	/* Best queued id for use outside lock */
-+	u64 best_key;
-+
-+	unsigned long last_scheduler_tick; /* Last jiffy this RQ ticked */
-+	unsigned long last_jiffy; /* Last jiffy this RQ updated rq clock */
-+	u64 niffies; /* Last time this RQ updated rq clock */
-+	u64 last_niffy; /* Last niffies as updated by local clock */
-+	u64 last_jiffy_niffies; /* Niffies @ last_jiffy */
-+
-+	u64 load_update; /* When we last updated load */
-+	unsigned long load_avg; /* Rolling load average */
-+#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
-+	u64 irq_load_update; /* When we last updated IRQ load */
-+	unsigned long irq_load_avg; /* Rolling IRQ load average */
-+#endif
-+#ifdef CONFIG_SMT_NICE
-+	struct mm_struct *rq_mm;
-+	int rq_smt_bias; /* Policy/nice level bias across smt siblings */
-+#endif
-+	/* Accurate timekeeping data */
-+	unsigned long user_ns, nice_ns, irq_ns, softirq_ns, system_ns,
-+		iowait_ns, idle_ns;
-+	atomic_t nr_iowait;
-+
-+#ifdef CONFIG_MEMBARRIER
-+	int membarrier_state;
-+#endif
-+
-+	skiplist_node *node;
-+	skiplist *sl;
-+#ifdef CONFIG_SMP
-+	struct task_struct *preempt; /* Preempt triggered on this task */
-+	struct task_struct *preempting; /* Hint only, what task is preempting */
-+
-+	int cpu;		/* cpu of this runqueue */
-+	bool online;
-+
-+	struct root_domain *rd;
-+	struct sched_domain *sd;
-+
-+	unsigned long cpu_capacity_orig;
-+
-+	int *cpu_locality; /* CPU relative cache distance */
-+	struct rq **rq_order; /* Shared RQs ordered by relative cache distance */
-+	struct rq **cpu_order; /* RQs of discrete CPUs ordered by distance */
-+
-+	bool is_leader;
-+	struct rq *smp_leader; /* First physical CPU per node */
-+#ifdef CONFIG_SCHED_THERMAL_PRESSURE
-+	struct sched_avg	avg_thermal;
-+#endif /* CONFIG_SCHED_THERMAL_PRESSURE */
-+#ifdef CONFIG_SCHED_SMT
-+	struct rq *smt_leader; /* First logical CPU in SMT siblings */
-+	cpumask_t thread_mask;
-+	bool (*siblings_idle)(struct rq *rq);
-+	/* See if all smt siblings are idle */
-+#endif /* CONFIG_SCHED_SMT */
-+#ifdef CONFIG_SCHED_MC
-+	struct rq *mc_leader; /* First logical CPU in MC siblings */
-+	cpumask_t core_mask;
-+	bool (*cache_idle)(struct rq *rq);
-+	/* See if all cache siblings are idle */
-+#endif /* CONFIG_SCHED_MC */
-+#endif /* CONFIG_SMP */
-+
-+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-+	u64 prev_irq_time;
-+#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
-+#ifdef CONFIG_PARAVIRT
-+	u64 prev_steal_time;
-+#endif /* CONFIG_PARAVIRT */
-+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
-+	u64 prev_steal_time_rq;
-+#endif /* CONFIG_PARAVIRT_TIME_ACCOUNTING */
-+
-+	u64 clock, old_clock, last_tick;
-+	/* Ensure that all clocks are in the same cache line */
-+	u64 clock_task ____cacheline_aligned;
-+	int dither;
-+
-+	int iso_ticks;
-+	bool iso_refractory;
-+
-+#ifdef CONFIG_HIGH_RES_TIMERS
-+	struct hrtimer hrexpiry_timer;
-+#endif
-+
-+	int rt_nr_running; /* Number real time tasks running */
-+#ifdef CONFIG_SCHEDSTATS
-+
-+	/* latency stats */
-+	struct sched_info rq_sched_info;
-+	unsigned long long rq_cpu_time;
-+	/* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
-+
-+	/* sys_sched_yield() stats */
-+	unsigned int yld_count;
-+
-+	/* schedule() stats */
-+	unsigned int sched_switch;
-+	unsigned int sched_count;
-+	unsigned int sched_goidle;
-+
-+	/* try_to_wake_up() stats */
-+	unsigned int ttwu_count;
-+	unsigned int ttwu_local;
-+#endif /* CONFIG_SCHEDSTATS */
-+
-+#ifdef CONFIG_CPU_IDLE
-+	/* Must be inspected within a rcu lock section */
-+	struct cpuidle_state *idle_state;
-+#endif
-+};
-+
-+static inline u64 __rq_clock_broken(struct rq *rq)
-+{
-+	return READ_ONCE(rq->clock);
-+}
-+
-+static inline u64 rq_clock(struct rq *rq)
-+{
-+	lockdep_assert_held(rq->lock);
-+
-+	return rq->clock;
-+}
-+
-+static inline u64 rq_clock_task(struct rq *rq)
-+{
-+	lockdep_assert_held(rq->lock);
-+
-+	return rq->clock_task;
-+}
-+
-+/**
-+ * By default the decay is the default pelt decay period.
-+ * The decay shift can change the decay period in
-+ * multiples of 32.
-+ *  Decay shift		Decay period(ms)
-+ *	0			32
-+ *	1			64
-+ *	2			128
-+ *	3			256
-+ *	4			512
-+ */
-+extern int sched_thermal_decay_shift;
-+
-+static inline u64 rq_clock_thermal(struct rq *rq)
-+{
-+	return rq_clock_task(rq) >> sched_thermal_decay_shift;
-+}
-+
-+struct rq_flags {
-+	unsigned long flags;
-+};
-+
-+#ifdef CONFIG_SMP
-+struct rq *cpu_rq(int cpu);
-+#endif
-+
-+#ifndef CONFIG_SMP
-+extern struct rq *uprq;
-+#define cpu_rq(cpu)	(uprq)
-+#define this_rq()	(uprq)
-+#define raw_rq()	(uprq)
-+#define task_rq(p)	(uprq)
-+#define cpu_curr(cpu)	((uprq)->curr)
-+#else /* CONFIG_SMP */
-+DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
-+#define this_rq()		this_cpu_ptr(&runqueues)
-+#define raw_rq()		raw_cpu_ptr(&runqueues)
-+#define task_rq(p)		cpu_rq(task_cpu(p))
-+#endif /* CONFIG_SMP */
-+
-+static inline int task_current(struct rq *rq, struct task_struct *p)
-+{
-+	return rq->curr == p;
-+}
-+
-+static inline int task_running(struct rq *rq, struct task_struct *p)
-+{
-+#ifdef CONFIG_SMP
-+	return p->on_cpu;
-+#else
-+	return task_current(rq, p);
-+#endif
-+}
-+
-+static inline int task_on_rq_queued(struct task_struct *p)
-+{
-+	return p->on_rq == TASK_ON_RQ_QUEUED;
-+}
-+
-+static inline int task_on_rq_migrating(struct task_struct *p)
-+{
-+	return READ_ONCE(p->on_rq) == TASK_ON_RQ_MIGRATING;
-+}
-+
-+static inline void rq_lock(struct rq *rq)
-+	__acquires(rq->lock)
-+{
-+	raw_spin_lock(rq->lock);
-+}
-+
-+static inline void rq_unlock(struct rq *rq)
-+	__releases(rq->lock)
-+{
-+	raw_spin_unlock(rq->lock);
-+}
-+
-+static inline void rq_lock_irq(struct rq *rq)
-+	__acquires(rq->lock)
-+{
-+	raw_spin_lock_irq(rq->lock);
-+}
-+
-+static inline void rq_unlock_irq(struct rq *rq, struct rq_flags __always_unused *rf)
-+	__releases(rq->lock)
-+{
-+	raw_spin_unlock_irq(rq->lock);
-+}
-+
-+static inline void rq_lock_irqsave(struct rq *rq, struct rq_flags *rf)
-+	__acquires(rq->lock)
-+{
-+	raw_spin_lock_irqsave(rq->lock, rf->flags);
-+}
-+
-+static inline void rq_unlock_irqrestore(struct rq *rq, struct rq_flags *rf)
-+	__releases(rq->lock)
-+{
-+	raw_spin_unlock_irqrestore(rq->lock, rf->flags);
-+}
-+
-+static inline struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
-+	__acquires(p->pi_lock)
-+	__acquires(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	while (42) {
-+		raw_spin_lock_irqsave(&p->pi_lock, rf->flags);
-+		rq = task_rq(p);
-+		raw_spin_lock(rq->lock);
-+		if (likely(rq == task_rq(p)))
-+			break;
-+		raw_spin_unlock(rq->lock);
-+		raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
-+	}
-+	return rq;
-+}
-+
-+static inline void task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
-+	__releases(rq->lock)
-+	__releases(p->pi_lock)
-+{
-+	rq_unlock(rq);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
-+}
-+
-+static inline struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags __always_unused *rf)
-+	__acquires(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	lockdep_assert_held(&p->pi_lock);
-+
-+	while (42) {
-+		rq = task_rq(p);
-+		raw_spin_lock(rq->lock);
-+		if (likely(rq == task_rq(p)))
-+			break;
-+		raw_spin_unlock(rq->lock);
-+	}
-+	return rq;
-+}
-+
-+static inline void __task_rq_unlock(struct rq *rq, struct rq_flags __always_unused *rf)
-+{
-+	rq_unlock(rq);
-+}
-+
-+static inline struct rq *
-+this_rq_lock_irq(struct rq_flags *rf)
-+	__acquires(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	local_irq_disable();
-+	rq = this_rq();
-+	rq_lock(rq);
-+	return rq;
-+}
-+
-+/*
-+ * {de,en}queue flags: Most not used on MuQSS.
-+ *
-+ * DEQUEUE_SLEEP  - task is no longer runnable
-+ * ENQUEUE_WAKEUP - task just became runnable
-+ *
-+ * SAVE/RESTORE - an otherwise spurious dequeue/enqueue, done to ensure tasks
-+ *                are in a known state which allows modification. Such pairs
-+ *                should preserve as much state as possible.
-+ *
-+ * MOVE - paired with SAVE/RESTORE, explicitly does not preserve the location
-+ *        in the runqueue.
-+ *
-+ * ENQUEUE_HEAD      - place at front of runqueue (tail if not specified)
-+ * ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline)
-+ * ENQUEUE_MIGRATED  - the task was migrated during wakeup
-+ *
-+ */
-+
-+#define DEQUEUE_SLEEP		0x01
-+#define DEQUEUE_SAVE		0x02 /* matches ENQUEUE_RESTORE */
-+
-+#define ENQUEUE_WAKEUP		0x01
-+#define ENQUEUE_RESTORE		0x02
-+
-+#ifdef CONFIG_SMP
-+#define ENQUEUE_MIGRATED	0x40
-+#else
-+#define ENQUEUE_MIGRATED	0x00
-+#endif
-+
-+#ifdef CONFIG_NUMA
-+enum numa_topology_type {
-+	NUMA_DIRECT,
-+	NUMA_GLUELESS_MESH,
-+	NUMA_BACKPLANE,
-+};
-+extern enum numa_topology_type sched_numa_topology_type;
-+extern int sched_max_numa_distance;
-+extern bool find_numa_distance(int distance);
-+extern void sched_init_numa(void);
-+extern void sched_domains_numa_masks_set(unsigned int cpu);
-+extern void sched_domains_numa_masks_clear(unsigned int cpu);
-+extern int sched_numa_find_closest(const struct cpumask *cpus, int cpu);
-+#else
-+static inline void sched_init_numa(void) { }
-+static inline void sched_domains_numa_masks_set(unsigned int cpu) { }
-+static inline void sched_domains_numa_masks_clear(unsigned int cpu) { }
-+static inline int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
-+{
-+	return nr_cpu_ids;
-+}
-+#endif
-+
-+extern struct mutex sched_domains_mutex;
-+extern struct static_key_false sched_schedstats;
-+
-+#define rcu_dereference_check_sched_domain(p) \
-+	rcu_dereference_check((p), \
-+			      lockdep_is_held(&sched_domains_mutex))
-+
-+#ifdef CONFIG_SMP
-+
-+/*
-+ * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
-+ * See destroy_sched_domains: call_rcu for details.
-+ *
-+ * The domain tree of any CPU may only be accessed from within
-+ * preempt-disabled sections.
-+ */
-+#define for_each_domain(cpu, __sd) \
-+	for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); \
-+			__sd; __sd = __sd->parent)
-+
-+/**
-+ * highest_flag_domain - Return highest sched_domain containing flag.
-+ * @cpu:	The cpu whose highest level of sched domain is to
-+ *		be returned.
-+ * @flag:	The flag to check for the highest sched_domain
-+ *		for the given cpu.
-+ *
-+ * Returns the highest sched_domain of a cpu which contains the given flag.
-+ */
-+static inline struct sched_domain *highest_flag_domain(int cpu, int flag)
-+{
-+	struct sched_domain *sd, *hsd = NULL;
-+
-+	for_each_domain(cpu, sd) {
-+		if (!(sd->flags & flag))
-+			break;
-+		hsd = sd;
-+	}
-+
-+	return hsd;
-+}
-+
-+static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
-+{
-+	struct sched_domain *sd;
-+
-+	for_each_domain(cpu, sd) {
-+		if (sd->flags & flag)
-+			break;
-+	}
-+
-+	return sd;
-+}
-+
-+DECLARE_PER_CPU(struct sched_domain *, sd_llc);
-+DECLARE_PER_CPU(int, sd_llc_size);
-+DECLARE_PER_CPU(int, sd_llc_id);
-+DECLARE_PER_CPU(struct sched_domain_shared *, sd_llc_shared);
-+DECLARE_PER_CPU(struct sched_domain *, sd_numa);
-+DECLARE_PER_CPU(struct sched_domain *, sd_asym_packing);
-+DECLARE_PER_CPU(struct sched_domain *, sd_asym_cpucapacity);
-+
-+struct sched_group_capacity {
-+	atomic_t ref;
-+	/*
-+	 * CPU capacity of this group, SCHED_CAPACITY_SCALE being max capacity
-+	 * for a single CPU.
-+	 */
-+	unsigned long		capacity;
-+	unsigned long		min_capacity;		/* Min per-CPU capacity in group */
-+	unsigned long		max_capacity;		/* Max per-CPU capacity in group */
-+	unsigned long		next_update;
-+	int			imbalance;		/* XXX unrelated to capacity but shared group state */
-+
-+#ifdef CONFIG_SCHED_DEBUG
-+	int id;
-+#endif
-+
-+	unsigned long cpumask[]; /* balance mask */
-+};
-+
-+struct sched_group {
-+	struct sched_group *next;	/* Must be a circular list */
-+	atomic_t ref;
-+
-+	unsigned int group_weight;
-+	struct sched_group_capacity *sgc;
-+	int asym_prefer_cpu;		/* cpu of highest priority in group */
-+
-+	/*
-+	 * The CPUs this group covers.
-+	 *
-+	 * NOTE: this field is variable length. (Allocated dynamically
-+	 * by attaching extra space to the end of the structure,
-+	 * depending on how many CPUs the kernel has booted up with)
-+	 */
-+	unsigned long cpumask[0];
-+};
-+
-+static inline struct cpumask *sched_group_span(struct sched_group *sg)
-+{
-+	return to_cpumask(sg->cpumask);
-+}
-+
-+/*
-+ * See build_balance_mask().
-+ */
-+static inline struct cpumask *group_balance_mask(struct sched_group *sg)
-+{
-+	return to_cpumask(sg->sgc->cpumask);
-+}
-+
-+/**
-+ * group_first_cpu - Returns the first cpu in the cpumask of a sched_group.
-+ * @group: The group whose first cpu is to be returned.
-+ */
-+static inline unsigned int group_first_cpu(struct sched_group *group)
-+{
-+	return cpumask_first(sched_group_span(group));
-+}
-+
-+
-+#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
-+void register_sched_domain_sysctl(void);
-+void dirty_sched_domain_sysctl(int cpu);
-+void unregister_sched_domain_sysctl(void);
-+#else
-+static inline void register_sched_domain_sysctl(void)
-+{
-+}
-+static inline void dirty_sched_domain_sysctl(int cpu)
-+{
-+}
-+static inline void unregister_sched_domain_sysctl(void)
-+{
-+}
-+#endif
-+
-+extern void flush_smp_call_function_from_idle(void);
-+
-+extern void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask);
-+extern void set_rq_online (struct rq *rq);
-+extern void set_rq_offline(struct rq *rq);
-+extern bool sched_smp_initialized;
-+
-+static inline void update_group_capacity(struct sched_domain *sd, int cpu)
-+{
-+}
-+
-+static inline void trigger_load_balance(struct rq *rq)
-+{
-+}
-+
-+#define sched_feat(x) 0
-+
-+#else /* CONFIG_SMP */
-+
-+static inline void flush_smp_call_function_from_idle(void) { }
-+
-+#endif /* CONFIG_SMP */
-+
-+#ifdef CONFIG_CPU_IDLE
-+static inline void idle_set_state(struct rq *rq,
-+				  struct cpuidle_state *idle_state)
-+{
-+	rq->idle_state = idle_state;
-+}
-+
-+static inline struct cpuidle_state *idle_get_state(struct rq *rq)
-+{
-+	SCHED_WARN_ON(!rcu_read_lock_held());
-+	return rq->idle_state;
-+}
-+#else
-+static inline void idle_set_state(struct rq *rq,
-+				  struct cpuidle_state *idle_state)
-+{
-+}
-+
-+static inline struct cpuidle_state *idle_get_state(struct rq *rq)
-+{
-+	return NULL;
-+}
-+#endif
-+
-+#ifdef CONFIG_SCHED_DEBUG
-+extern bool sched_debug_enabled;
-+#endif
-+
-+extern void schedule_idle(void);
-+
-+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-+struct irqtime {
-+	u64			total;
-+	u64			tick_delta;
-+	u64			irq_start_time;
-+	struct u64_stats_sync	sync;
-+};
-+
-+DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
-+
-+/*
-+ * Returns the irqtime minus the softirq time computed by ksoftirqd.
-+ * Otherwise ksoftirqd's sum_exec_runtime is substracted its own runtime
-+ * and never move forward.
-+ */
-+static inline u64 irq_time_read(int cpu)
-+{
-+	struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu);
-+	unsigned int seq;
-+	u64 total;
-+
-+	do {
-+		seq = __u64_stats_fetch_begin(&irqtime->sync);
-+		total = irqtime->total;
-+	} while (__u64_stats_fetch_retry(&irqtime->sync, seq));
-+
-+	return total;
-+}
-+#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
-+
-+static inline bool sched_stop_runnable(struct rq *rq)
-+{
-+	return rq->stop && task_on_rq_queued(rq->stop);
-+}
-+
-+#ifdef CONFIG_SMP
-+static inline int cpu_of(struct rq *rq)
-+{
-+	return rq->cpu;
-+}
-+#else /* CONFIG_SMP */
-+static inline int cpu_of(struct rq *rq)
-+{
-+	return 0;
-+}
-+#endif
-+
-+#ifdef CONFIG_CPU_FREQ
-+DECLARE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
-+
-+static inline void cpufreq_trigger(struct rq *rq, unsigned int flags)
-+{
-+	struct update_util_data *data;
-+
-+	data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data,
-+						  cpu_of(rq)));
-+
-+	if (data)
-+		data->func(data, rq->niffies, flags);
-+}
-+#else
-+static inline void cpufreq_trigger(struct rq *rq, unsigned int flag)
-+{
-+}
-+#endif /* CONFIG_CPU_FREQ */
-+
-+static __always_inline
-+unsigned int uclamp_rq_util_with(struct rq __maybe_unused *rq, unsigned int util,
-+			      struct task_struct __maybe_unused *p)
-+{
-+	return util;
-+}
-+
-+static inline bool uclamp_is_used(void)
-+{
-+	return false;
-+}
-+
-+#ifndef arch_scale_freq_tick
-+static __always_inline
-+void arch_scale_freq_tick(void)
-+{
-+}
-+#endif
-+
-+#ifdef arch_scale_freq_capacity
-+#ifndef arch_scale_freq_invariant
-+#define arch_scale_freq_invariant()	(true)
-+#endif
-+#else /* arch_scale_freq_capacity */
-+#define arch_scale_freq_invariant()	(false)
-+#endif
-+
-+#ifdef CONFIG_64BIT
-+static inline u64 read_sum_exec_runtime(struct task_struct *t)
-+{
-+	return tsk_seruntime(t);
-+}
-+#else
-+static inline u64 read_sum_exec_runtime(struct task_struct *t)
-+{
-+	struct rq_flags rf;
-+	u64 ns;
-+	struct rq *rq;
-+
-+	rq = task_rq_lock(t, &rf);
-+	ns = tsk_seruntime(t);
-+	task_rq_unlock(rq, t, &rf);
-+
-+	return ns;
-+}
-+#endif
-+
-+#ifndef arch_scale_freq_capacity
-+/**
-+ * arch_scale_freq_capacity - get the frequency scale factor of a given CPU.
-+ * @cpu: the CPU in question.
-+ *
-+ * Return: the frequency scale factor normalized against SCHED_CAPACITY_SCALE, i.e.
-+ *
-+ *     f_curr
-+ *     ------ * SCHED_CAPACITY_SCALE
-+ *     f_max
-+ */
-+static __always_inline
-+unsigned long arch_scale_freq_capacity(int cpu)
-+{
-+	return SCHED_CAPACITY_SCALE;
-+}
-+#endif
-+
-+#ifdef CONFIG_NO_HZ_FULL
-+extern bool sched_can_stop_tick(struct rq *rq);
-+extern int __init sched_tick_offload_init(void);
-+
-+/*
-+ * Tick may be needed by tasks in the runqueue depending on their policy and
-+ * requirements. If tick is needed, lets send the target an IPI to kick it out of
-+ * nohz mode if necessary.
-+ */
-+static inline void sched_update_tick_dependency(struct rq *rq)
-+{
-+	int cpu = cpu_of(rq);
-+
-+	if (!tick_nohz_full_cpu(cpu))
-+		return;
-+
-+	if (sched_can_stop_tick(rq))
-+		tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED);
-+	else
-+		tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
-+}
-+#else
-+static inline int sched_tick_offload_init(void) { return 0; }
-+static inline void sched_update_tick_dependency(struct rq *rq) { }
-+#endif
-+
-+#define SCHED_FLAG_SUGOV	0x10000000
-+
-+static inline bool rt_rq_is_runnable(struct rq *rt_rq)
-+{
-+	return rt_rq->rt_nr_running;
-+}
-+
-+/**
-+ * enum schedutil_type - CPU utilization type
-+ * @FREQUENCY_UTIL:	Utilization used to select frequency
-+ * @ENERGY_UTIL:	Utilization used during energy calculation
-+ *
-+ * The utilization signals of all scheduling classes (CFS/RT/DL) and IRQ time
-+ * need to be aggregated differently depending on the usage made of them. This
-+ * enum is used within schedutil_freq_util() to differentiate the types of
-+ * utilization expected by the callers, and adjust the aggregation accordingly.
-+ */
-+enum schedutil_type {
-+	FREQUENCY_UTIL,
-+	ENERGY_UTIL,
-+};
-+
-+#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
-+
-+unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs,
-+				 unsigned long max, enum schedutil_type type,
-+				 struct task_struct *p);
-+
-+static inline unsigned long cpu_bw_dl(struct rq *rq)
-+{
-+	return 0;
-+}
-+
-+static inline unsigned long cpu_util_dl(struct rq *rq)
-+{
-+	return 0;
-+}
-+
-+static inline unsigned long cpu_util_cfs(struct rq *rq)
-+{
-+	unsigned long ret = READ_ONCE(rq->load_avg);
-+
-+	if (ret > SCHED_CAPACITY_SCALE)
-+		ret = SCHED_CAPACITY_SCALE;
-+	return ret;
-+}
-+
-+static inline unsigned long cpu_util_rt(struct rq *rq)
-+{
-+	unsigned long ret = READ_ONCE(rq->rt_nr_running);
-+
-+	if (ret > SCHED_CAPACITY_SCALE)
-+		ret = SCHED_CAPACITY_SCALE;
-+	return ret;
-+}
-+
-+#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
-+static inline unsigned long cpu_util_irq(struct rq *rq)
-+{
-+	unsigned long ret = READ_ONCE(rq->irq_load_avg);
-+
-+	if (ret > SCHED_CAPACITY_SCALE)
-+		ret = SCHED_CAPACITY_SCALE;
-+	return ret;
-+}
-+
-+static inline
-+unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned long max)
-+{
-+	util *= (max - irq);
-+	util /= max;
-+
-+	return util;
-+
-+}
-+#else
-+static inline unsigned long cpu_util_irq(struct rq *rq)
-+{
-+	return 0;
-+}
-+
-+static inline
-+unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned long max)
-+{
-+	return util;
-+}
-+#endif
-+#endif
-+
-+#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
-+#define perf_domain_span(pd) (to_cpumask(((pd)->em_pd->cpus)))
-+
-+DECLARE_STATIC_KEY_FALSE(sched_energy_present);
-+
-+static inline bool sched_energy_enabled(void)
-+{
-+	return static_branch_unlikely(&sched_energy_present);
-+}
-+
-+#else /* ! (CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL) */
-+
-+#define perf_domain_span(pd) NULL
-+static inline bool sched_energy_enabled(void) { return false; }
-+
-+#endif /* CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL */
-+
-+#ifdef CONFIG_MEMBARRIER
-+/*
-+ * The scheduler provides memory barriers required by membarrier between:
-+ * - prior user-space memory accesses and store to rq->membarrier_state,
-+ * - store to rq->membarrier_state and following user-space memory accesses.
-+ * In the same way it provides those guarantees around store to rq->curr.
-+ */
-+static inline void membarrier_switch_mm(struct rq *rq,
-+					struct mm_struct *prev_mm,
-+					struct mm_struct *next_mm)
-+{
-+	int membarrier_state;
-+
-+	if (prev_mm == next_mm)
-+		return;
-+
-+	membarrier_state = atomic_read(&next_mm->membarrier_state);
-+	if (READ_ONCE(rq->membarrier_state) == membarrier_state)
-+		return;
-+
-+	WRITE_ONCE(rq->membarrier_state, membarrier_state);
-+}
-+#else
-+static inline void membarrier_switch_mm(struct rq *rq,
-+					struct mm_struct *prev_mm,
-+					struct mm_struct *next_mm)
-+{
-+}
-+#endif
-+
-+#ifdef CONFIG_SMP
-+static inline bool is_per_cpu_kthread(struct task_struct *p)
-+{
-+	if (!(p->flags & PF_KTHREAD))
-+		return false;
-+
-+	if (p->nr_cpus_allowed != 1)
-+		return false;
-+
-+	return true;
-+}
-+#endif
-+
-+void swake_up_all_locked(struct swait_queue_head *q);
-+void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
-+
-+/* pelt.h compat CONFIG_SCHED_THERMAL_PRESSURE impossible with MUQSS */
-+static inline int
-+update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity)
-+{
-+	return 0;
-+}
-+
-+static inline u64 thermal_load_avg(struct rq *rq)
-+{
-+	return 0;
-+}
-+
-+#ifdef CONFIG_RCU_TORTURE_TEST
-+extern int sysctl_sched_rt_runtime;
-+#endif
-+
-+#endif /* MUQSS_SCHED_H */
-diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
-index e39008242cf4..146a3dfe626f 100644
---- a/kernel/sched/cpufreq_schedutil.c
-+++ b/kernel/sched/cpufreq_schedutil.c
-@@ -183,6 +183,12 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy,
- 	return cpufreq_driver_resolve_freq(policy, freq);
- }
- 
-+#ifdef CONFIG_SCHED_MUQSS
-+#define rt_rq_runnable(rq_rt) rt_rq_is_runnable(rq)
-+#else
-+#define rt_rq_runnable(rq_rt) rt_rq_is_runnable(&rq->rt)
-+#endif
-+
- /*
-  * This function computes an effective utilization for the given CPU, to be
-  * used for frequency selection given the linear relation: f = u * f_max.
-@@ -211,7 +217,7 @@ unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs,
- 	struct rq *rq = cpu_rq(cpu);
- 
- 	if (!uclamp_is_used() &&
--	    type == FREQUENCY_UTIL && rt_rq_is_runnable(&rq->rt)) {
-+	    type == FREQUENCY_UTIL && rt_rq_runnable(rq)) {
- 		return max;
- 	}
- 
-@@ -656,7 +662,11 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy)
- 	struct task_struct *thread;
- 	struct sched_attr attr = {
- 		.size		= sizeof(struct sched_attr),
-+#ifdef CONFIG_SCHED_MUQSS
-+		.sched_policy	= SCHED_RR,
-+#else
- 		.sched_policy	= SCHED_DEADLINE,
-+#endif
- 		.sched_flags	= SCHED_FLAG_SUGOV,
- 		.sched_nice	= 0,
- 		.sched_priority	= 0,
-diff --git a/kernel/sched/cpupri.h b/kernel/sched/cpupri.h
-index efbb492bb94c..f0288c32ab17 100644
---- a/kernel/sched/cpupri.h
-+++ b/kernel/sched/cpupri.h
-@@ -17,6 +17,7 @@ struct cpupri {
- 	int			*cpu_to_pri;
- };
- 
-+#ifndef CONFIG_SCHED_MUQSS
- #ifdef CONFIG_SMP
- int  cpupri_find(struct cpupri *cp, struct task_struct *p,
- 		 struct cpumask *lowest_mask);
-@@ -27,3 +28,4 @@ void cpupri_set(struct cpupri *cp, int cpu, int pri);
- int  cpupri_init(struct cpupri *cp);
- void cpupri_cleanup(struct cpupri *cp);
- #endif
-+#endif
-diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
-index 5a55d2300452..283a580754a7 100644
---- a/kernel/sched/cputime.c
-+++ b/kernel/sched/cputime.c
-@@ -266,26 +266,6 @@ static inline u64 account_other_time(u64 max)
- 	return accounted;
- }
- 
--#ifdef CONFIG_64BIT
--static inline u64 read_sum_exec_runtime(struct task_struct *t)
--{
--	return t->se.sum_exec_runtime;
--}
--#else
--static u64 read_sum_exec_runtime(struct task_struct *t)
--{
--	u64 ns;
--	struct rq_flags rf;
--	struct rq *rq;
--
--	rq = task_rq_lock(t, &rf);
--	ns = t->se.sum_exec_runtime;
--	task_rq_unlock(rq, t, &rf);
--
--	return ns;
--}
--#endif
--
- /*
-  * Accumulate raw cputime values of dead tasks (sig->[us]time) and live
-  * tasks (sum on group iteration) belonging to @tsk's group.
-@@ -614,7 +594,7 @@ void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev,
- void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
- {
- 	struct task_cputime cputime = {
--		.sum_exec_runtime = p->se.sum_exec_runtime,
-+		.sum_exec_runtime = tsk_seruntime(p),
- 	};
- 
- 	task_cputime(p, &cputime.utime, &cputime.stime);
-diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
-index f324dc36fc43..43ca13ed9ab0 100644
---- a/kernel/sched/idle.c
-+++ b/kernel/sched/idle.c
-@@ -369,6 +369,7 @@ void cpu_startup_entry(enum cpuhp_state state)
- 		do_idle();
- }
- 
-+#ifndef CONFIG_SCHED_MUQSS
- /*
-  * idle-task scheduling class.
-  */
-@@ -482,3 +483,4 @@ const struct sched_class idle_sched_class
- 	.switched_to		= switched_to_idle,
- 	.update_curr		= update_curr_idle,
- };
-+#endif /* CONFIG_SCHED_MUQSS */
-diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
-index 28709f6b0975..4478c11cb51a 100644
---- a/kernel/sched/sched.h
-+++ b/kernel/sched/sched.h
-@@ -2,6 +2,19 @@
- /*
-  * Scheduler internal types and methods:
-  */
-+#ifdef CONFIG_SCHED_MUQSS
-+#include "MuQSS.h"
-+
-+/* Begin compatibility wrappers for MuQSS/CFS differences */
-+#define rq_rt_nr_running(rq) ((rq)->rt_nr_running)
-+#define rq_h_nr_running(rq) ((rq)->nr_running)
-+
-+#else /* CONFIG_SCHED_MUQSS */
-+
-+#define rq_rt_nr_running(rq) ((rq)->rt.rt_nr_running)
-+#define rq_h_nr_running(rq) ((rq)->cfs.h_nr_running)
-+
-+
- #include <linux/sched.h>
- 
- #include <linux/sched/autogroup.h>
-@@ -2626,3 +2639,25 @@ static inline bool is_per_cpu_kthread(struct task_struct *p)
- 
- void swake_up_all_locked(struct swait_queue_head *q);
- void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
-+
-+/* MuQSS compatibility functions */
-+#ifdef CONFIG_64BIT
-+static inline u64 read_sum_exec_runtime(struct task_struct *t)
-+{
-+	return t->se.sum_exec_runtime;
-+}
-+#else
-+static inline u64 read_sum_exec_runtime(struct task_struct *t)
-+{
-+	u64 ns;
-+	struct rq_flags rf;
-+	struct rq *rq;
-+
-+	rq = task_rq_lock(t, &rf);
-+	ns = t->se.sum_exec_runtime;
-+	task_rq_unlock(rq, t, &rf);
-+
-+	return ns;
-+}
-+#endif
-+#endif /* CONFIG_SCHED_MUQSS */
-diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
-index 1bd7e3af904f..a1dc490c15e4 100644
---- a/kernel/sched/topology.c
-+++ b/kernel/sched/topology.c
-@@ -440,7 +440,11 @@ void rq_attach_root(struct rq *rq, struct root_domain *rd)
- 	struct root_domain *old_rd = NULL;
- 	unsigned long flags;
- 
-+#ifdef CONFIG_SCHED_MUQSS
-+	raw_spin_lock_irqsave(rq->lock, flags);
-+#else
- 	raw_spin_lock_irqsave(&rq->lock, flags);
-+#endif
- 
- 	if (rq->rd) {
- 		old_rd = rq->rd;
-@@ -466,7 +470,11 @@ void rq_attach_root(struct rq *rq, struct root_domain *rd)
- 	if (cpumask_test_cpu(rq->cpu, cpu_active_mask))
- 		set_rq_online(rq);
- 
-+#ifdef CONFIG_SCHED_MUQSS
-+	raw_spin_unlock_irqrestore(rq->lock, flags);
-+#else
- 	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+#endif
- 
- 	if (old_rd)
- 		call_rcu(&old_rd->rcu, free_rootdomain);
-diff --git a/kernel/skip_list.c b/kernel/skip_list.c
-new file mode 100644
-index 000000000000..bf5c6e97e139
---- /dev/null
-+++ b/kernel/skip_list.c
-@@ -0,0 +1,148 @@
-+/*
-+  Copyright (C) 2011,2016 Con Kolivas.
-+
-+  Code based on example originally by William Pugh.
-+
-+Skip Lists are a probabilistic alternative to balanced trees, as
-+described in the June 1990 issue of CACM and were invented by
-+William Pugh in 1987.
-+
-+A couple of comments about this implementation:
-+The routine randomLevel has been hard-coded to generate random
-+levels using p=0.25. It can be easily changed.
-+
-+The insertion routine has been implemented so as to use the
-+dirty hack described in the CACM paper: if a random level is
-+generated that is more than the current maximum level, the
-+current maximum level plus one is used instead.
-+
-+Levels start at zero and go up to MaxLevel (which is equal to
-+MaxNumberOfLevels-1).
-+
-+The routines defined in this file are:
-+
-+init: defines slnode
-+
-+new_skiplist: returns a new, empty list
-+
-+randomLevel: Returns a random level based on a u64 random seed passed to it.
-+In MuQSS, the "niffy" time is used for this purpose.
-+
-+insert(l,key, value): inserts the binding (key, value) into l. This operation
-+occurs in O(log n) time.
-+
-+delnode(slnode, l, node): deletes any binding of key from the l based on the
-+actual node value. This operation occurs in O(k) time where k is the
-+number of levels of the node in question (max 8). The original delete
-+function occurred in O(log n) time and involved a search.
-+
-+MuQSS Notes: In this implementation of skiplists, there are bidirectional
-+next/prev pointers and the insert function returns a pointer to the actual
-+node the value is stored. The key here is chosen by the scheduler so as to
-+sort tasks according to the priority list requirements and is no longer used
-+by the scheduler after insertion. The scheduler lookup, however, occurs in
-+O(1) time because it is always the first item in the level 0 linked list.
-+Since the task struct stores a copy of the node pointer upon skiplist_insert,
-+it can also remove it much faster than the original implementation with the
-+aid of prev<->next pointer manipulation and no searching.
-+
-+*/
-+
-+#include <linux/slab.h>
-+#include <linux/skip_list.h>
-+
-+#define MaxNumberOfLevels 8
-+#define MaxLevel (MaxNumberOfLevels - 1)
-+
-+void skiplist_init(skiplist_node *slnode)
-+{
-+	int i;
-+
-+	slnode->key = 0xFFFFFFFFFFFFFFFF;
-+	slnode->level = 0;
-+	slnode->value = NULL;
-+	for (i = 0; i < MaxNumberOfLevels; i++)
-+		slnode->next[i] = slnode->prev[i] = slnode;
-+}
-+
-+skiplist *new_skiplist(skiplist_node *slnode)
-+{
-+	skiplist *l = kzalloc(sizeof(skiplist), GFP_ATOMIC);
-+
-+	BUG_ON(!l);
-+	l->header = slnode;
-+	return l;
-+}
-+
-+void free_skiplist(skiplist *l)
-+{
-+	skiplist_node *p, *q;
-+
-+	p = l->header;
-+	do {
-+		q = p->next[0];
-+		p->next[0]->prev[0] = q->prev[0];
-+		skiplist_node_init(p);
-+		p = q;
-+	} while (p != l->header);
-+	kfree(l);
-+}
-+
-+void skiplist_node_init(skiplist_node *node)
-+{
-+	memset(node, 0, sizeof(skiplist_node));
-+}
-+
-+static inline unsigned int randomLevel(const long unsigned int randseed)
-+{
-+	return find_first_bit(&randseed, MaxLevel) / 2;
-+}
-+
-+void skiplist_insert(skiplist *l, skiplist_node *node, keyType key, valueType value, unsigned int randseed)
-+{
-+	skiplist_node *update[MaxNumberOfLevels];
-+	skiplist_node *p, *q;
-+	int k = l->level;
-+
-+	p = l->header;
-+	do {
-+		while (q = p->next[k], q->key <= key)
-+			p = q;
-+		update[k] = p;
-+	} while (--k >= 0);
-+
-+	++l->entries;
-+	k = randomLevel(randseed);
-+	if (k > l->level) {
-+		k = ++l->level;
-+		update[k] = l->header;
-+	}
-+
-+	node->level = k;
-+	node->key = key;
-+	node->value = value;
-+	do {
-+		p = update[k];
-+		node->next[k] = p->next[k];
-+		p->next[k] = node;
-+		node->prev[k] = p;
-+		node->next[k]->prev[k] = node;
-+	} while (--k >= 0);
-+}
-+
-+void skiplist_delete(skiplist *l, skiplist_node *node)
-+{
-+	int k, m = node->level;
-+
-+	for (k = 0; k <= m; k++) {
-+		node->prev[k]->next[k] = node->next[k];
-+		node->next[k]->prev[k] = node->prev[k];
-+	}
-+	skiplist_node_init(node);
-+	if (m == l->level) {
-+		while (l->header->next[m] == l->header && l->header->prev[m] == l->header && m > 0)
-+			m--;
-+		l->level = m;
-+	}
-+	l->entries--;
-+}
-diff --git a/kernel/sysctl.c b/kernel/sysctl.c
-index afad085960b8..d2e35cd54f94 100644
---- a/kernel/sysctl.c
-+++ b/kernel/sysctl.c
-@@ -120,7 +120,17 @@ static unsigned long long_max = LONG_MAX;
- static int one_hundred = 100;
- static int two_hundred = 200;
- static int one_thousand = 1000;
--#ifdef CONFIG_PRINTK
-+static int zero = 0;
-+static int one = 1;
-+#ifdef CONFIG_SCHED_MUQSS
-+extern int rr_interval;
-+extern int sched_interactive;
-+extern int sched_iso_cpu;
-+extern int sched_yield_type;
-+#endif
-+extern int hrtimer_granularity_us;
-+extern int hrtimeout_min_us;
-+#if defined(CONFIG_PRINTK) || defined(CONFIG_SCHED_MUQSS)
- static int ten_thousand = 10000;
- #endif
- #ifdef CONFIG_PERF_EVENTS
-@@ -184,7 +194,7 @@ static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
- int sysctl_legacy_va_layout;
- #endif
- 
--#ifdef CONFIG_SCHED_DEBUG
-+#if defined(CONFIG_SCHED_DEBUG) && !defined(CONFIG_SCHED_MUQSS)
- static int min_sched_granularity_ns = 100000;		/* 100 usecs */
- static int max_sched_granularity_ns = NSEC_PER_SEC;	/* 1 second */
- static int min_wakeup_granularity_ns;			/* 0 usecs */
-@@ -193,7 +203,7 @@ static int max_wakeup_granularity_ns = NSEC_PER_SEC;	/* 1 second */
- static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
- static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
- #endif /* CONFIG_SMP */
--#endif /* CONFIG_SCHED_DEBUG */
-+#endif /* CONFIG_SCHED_DEBUG && !CONFIG_SCHED_MUQSS */
- 
- #ifdef CONFIG_COMPACTION
- static int min_extfrag_threshold;
-@@ -1652,6 +1662,7 @@ int proc_do_static_key(struct ctl_table *table, int write,
- }
- 
- static struct ctl_table kern_table[] = {
-+#ifndef CONFIG_SCHED_MUQSS
- 	{
- 		.procname	= "sched_child_runs_first",
- 		.data		= &sysctl_sched_child_runs_first,
-@@ -1843,6 +1854,73 @@ static struct ctl_table kern_table[] = {
- 		.extra1		= SYSCTL_ONE,
- 	},
- #endif
-+#elif defined(CONFIG_SCHED_MUQSS)
-+	{
-+		.procname	= "rr_interval",
-+		.data		= &rr_interval,
-+		.maxlen		= sizeof (int),
-+		.mode		= 0644,
-+		.proc_handler	= &proc_dointvec_minmax,
-+		.extra1		= &one,
-+		.extra2		= &one_thousand,
-+	},
-+	{
-+		.procname	= "interactive",
-+		.data		= &sched_interactive,
-+		.maxlen		= sizeof(int),
-+		.mode		= 0644,
-+		.proc_handler	= &proc_dointvec_minmax,
-+		.extra1		= &zero,
-+		.extra2		= &one,
-+	},
-+	{
-+		.procname	= "iso_cpu",
-+		.data		= &sched_iso_cpu,
-+		.maxlen		= sizeof (int),
-+		.mode		= 0644,
-+		.proc_handler	= &proc_dointvec_minmax,
-+		.extra1		= &zero,
-+		.extra2		= &one_hundred,
-+	},
-+	{
-+		.procname	= "yield_type",
-+		.data		= &sched_yield_type,
-+		.maxlen		= sizeof (int),
-+		.mode		= 0644,
-+		.proc_handler	= &proc_dointvec_minmax,
-+		.extra1		= &zero,
-+		.extra2		= &two,
-+	},
-+#if defined(CONFIG_SMP) && defined(CONFIG_SCHEDSTATS)
-+	{
-+		.procname	= "sched_schedstats",
-+		.data		= NULL,
-+		.maxlen		= sizeof(unsigned int),
-+		.mode		= 0644,
-+		.proc_handler	= sysctl_schedstats,
-+		.extra1		= SYSCTL_ZERO,
-+		.extra2		= SYSCTL_ONE,
-+	},
-+#endif /* CONFIG_SMP && CONFIG_SCHEDSTATS */
-+#endif /* CONFIG_SCHED_MUQSS */
-+	{
-+		.procname	= "hrtimer_granularity_us",
-+		.data		= &hrtimer_granularity_us,
-+		.maxlen		= sizeof(int),
-+		.mode		= 0644,
-+		.proc_handler	= &proc_dointvec_minmax,
-+		.extra1		= &one,
-+		.extra2		= &ten_thousand,
-+	},
-+	{
-+		.procname	= "hrtimeout_min_us",
-+		.data		= &hrtimeout_min_us,
-+		.maxlen		= sizeof(int),
-+		.mode		= 0644,
-+		.proc_handler	= &proc_dointvec_minmax,
-+		.extra1		= &one,
-+		.extra2		= &ten_thousand,
-+	},
- #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
- 	{
- 		.procname	= "sched_energy_aware",
-diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
-index a09b1d61df6a..e7662101fcc3 100644
---- a/kernel/time/Kconfig
-+++ b/kernel/time/Kconfig
-@@ -75,6 +75,9 @@ config NO_HZ_COMMON
- 	depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS
- 	select TICK_ONESHOT
- 
-+config NO_HZ_FULL
-+	bool
-+
- choice
- 	prompt "Timer tick handling"
- 	default NO_HZ_IDLE if NO_HZ
-@@ -96,8 +99,9 @@ config NO_HZ_IDLE
- 
- 	  Most of the time you want to say Y here.
- 
--config NO_HZ_FULL
-+config NO_HZ_FULL_NODEF
- 	bool "Full dynticks system (tickless)"
-+	select NO_HZ_FULL
- 	# NO_HZ_COMMON dependency
- 	depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS
- 	# We need at least one periodic CPU for timekeeping
-@@ -123,6 +127,8 @@ config NO_HZ_FULL
- 	 transitions: syscalls, exceptions and interrupts. Even when it's
- 	 dynamically off.
- 
-+	 Not recommended for desktops,laptops, or mobile devices.
-+
- 	 Say N.
- 
- endchoice
-@@ -132,7 +138,7 @@ config CONTEXT_TRACKING
- 
- config CONTEXT_TRACKING_FORCE
- 	bool "Force context tracking"
--	depends on CONTEXT_TRACKING
-+	depends on CONTEXT_TRACKING && !SCHED_MUQSS
- 	default y if !NO_HZ_FULL
- 	help
- 	  The major pre-requirement for full dynticks to work is to
-diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
-index f5490222e134..544c58c29267 100644
---- a/kernel/time/clockevents.c
-+++ b/kernel/time/clockevents.c
-@@ -190,8 +190,9 @@ int clockevents_tick_resume(struct clock_event_device *dev)
- 
- #ifdef CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST
- 
--/* Limit min_delta to a jiffie */
--#define MIN_DELTA_LIMIT		(NSEC_PER_SEC / HZ)
-+int __read_mostly hrtimer_granularity_us = 100;
-+/* Limit min_delta to 100us */
-+#define MIN_DELTA_LIMIT		(hrtimer_granularity_us * NSEC_PER_USEC)
- 
- /**
-  * clockevents_increase_min_delta - raise minimum delta of a clock event device
-diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
-index 95b6a708b040..19918cf649b0 100644
---- a/kernel/time/hrtimer.c
-+++ b/kernel/time/hrtimer.c
-@@ -2223,3 +2223,113 @@ int __sched schedule_hrtimeout(ktime_t *expires,
- 	return schedule_hrtimeout_range(expires, 0, mode);
- }
- EXPORT_SYMBOL_GPL(schedule_hrtimeout);
-+
-+/*
-+ * As per schedule_hrtimeout but taskes a millisecond value and returns how
-+ * many milliseconds are left.
-+ */
-+long __sched schedule_msec_hrtimeout(long timeout)
-+{
-+	struct hrtimer_sleeper t;
-+	int delta, jiffs;
-+	ktime_t expires;
-+
-+	if (!timeout) {
-+		__set_current_state(TASK_RUNNING);
-+		return 0;
-+	}
-+
-+	jiffs = msecs_to_jiffies(timeout);
-+	/*
-+	 * If regular timer resolution is adequate or hrtimer resolution is not
-+	 * (yet) better than Hz, as would occur during startup, use regular
-+	 * timers.
-+	 */
-+	if (jiffs > 4 || hrtimer_resolution >= NSEC_PER_SEC / HZ || pm_freezing)
-+		return schedule_timeout(jiffs);
-+
-+	delta = (timeout % 1000) * NSEC_PER_MSEC;
-+	expires = ktime_set(0, delta);
-+
-+	hrtimer_init_sleeper_on_stack(&t, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-+	hrtimer_set_expires_range_ns(&t.timer, expires, delta);
-+
-+	hrtimer_sleeper_start_expires(&t, HRTIMER_MODE_REL);
-+
-+	if (likely(t.task))
-+		schedule();
-+
-+	hrtimer_cancel(&t.timer);
-+	destroy_hrtimer_on_stack(&t.timer);
-+
-+	__set_current_state(TASK_RUNNING);
-+
-+	expires = hrtimer_expires_remaining(&t.timer);
-+	timeout = ktime_to_ms(expires);
-+	return timeout < 0 ? 0 : timeout;
-+}
-+
-+EXPORT_SYMBOL(schedule_msec_hrtimeout);
-+
-+#define USECS_PER_SEC 1000000
-+extern int hrtimer_granularity_us;
-+
-+static inline long schedule_usec_hrtimeout(long timeout)
-+{
-+	struct hrtimer_sleeper t;
-+	ktime_t expires;
-+	int delta;
-+
-+	if (!timeout) {
-+		__set_current_state(TASK_RUNNING);
-+		return 0;
-+	}
-+
-+	if (hrtimer_resolution >= NSEC_PER_SEC / HZ)
-+		return schedule_timeout(usecs_to_jiffies(timeout));
-+
-+	if (timeout < hrtimer_granularity_us)
-+		timeout = hrtimer_granularity_us;
-+	delta = (timeout % USECS_PER_SEC) * NSEC_PER_USEC;
-+	expires = ktime_set(0, delta);
-+
-+	hrtimer_init_sleeper_on_stack(&t, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-+	hrtimer_set_expires_range_ns(&t.timer, expires, delta);
-+
-+	hrtimer_sleeper_start_expires(&t, HRTIMER_MODE_REL);
-+
-+	if (likely(t.task))
-+		schedule();
-+
-+	hrtimer_cancel(&t.timer);
-+	destroy_hrtimer_on_stack(&t.timer);
-+
-+	__set_current_state(TASK_RUNNING);
-+
-+	expires = hrtimer_expires_remaining(&t.timer);
-+	timeout = ktime_to_us(expires);
-+	return timeout < 0 ? 0 : timeout;
-+}
-+
-+int __read_mostly hrtimeout_min_us = 500;
-+
-+long __sched schedule_min_hrtimeout(void)
-+{
-+	return usecs_to_jiffies(schedule_usec_hrtimeout(hrtimeout_min_us));
-+}
-+
-+EXPORT_SYMBOL(schedule_min_hrtimeout);
-+
-+long __sched schedule_msec_hrtimeout_interruptible(long timeout)
-+{
-+	__set_current_state(TASK_INTERRUPTIBLE);
-+	return schedule_msec_hrtimeout(timeout);
-+}
-+EXPORT_SYMBOL(schedule_msec_hrtimeout_interruptible);
-+
-+long __sched schedule_msec_hrtimeout_uninterruptible(long timeout)
-+{
-+	__set_current_state(TASK_UNINTERRUPTIBLE);
-+	return schedule_msec_hrtimeout(timeout);
-+}
-+EXPORT_SYMBOL(schedule_msec_hrtimeout_uninterruptible);
-diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
-index a71758e34e45..ebb84a65d928 100644
---- a/kernel/time/posix-cpu-timers.c
-+++ b/kernel/time/posix-cpu-timers.c
-@@ -216,7 +216,7 @@ static void task_sample_cputime(struct task_struct *p, u64 *samples)
- 	u64 stime, utime;
- 
- 	task_cputime(p, &utime, &stime);
--	store_samples(samples, stime, utime, p->se.sum_exec_runtime);
-+	store_samples(samples, stime, utime, tsk_seruntime(p));
- }
- 
- static void proc_sample_cputime_atomic(struct task_cputime_atomic *at,
-@@ -850,7 +850,7 @@ static void check_thread_timers(struct task_struct *tsk,
- 	soft = task_rlimit(tsk, RLIMIT_RTTIME);
- 	if (soft != RLIM_INFINITY) {
- 		/* Task RT timeout is accounted in jiffies. RTTIME is usec */
--		unsigned long rttime = tsk->rt.timeout * (USEC_PER_SEC / HZ);
-+		unsigned long rttime = tsk_rttimeout(tsk) * (USEC_PER_SEC / HZ);
- 		unsigned long hard = task_rlimit_max(tsk, RLIMIT_RTTIME);
- 
- 		/* At the hard limit, send SIGKILL. No further action. */
-diff --git a/kernel/time/timer.c b/kernel/time/timer.c
-index a50364df1054..a86e4530e530 100644
---- a/kernel/time/timer.c
-+++ b/kernel/time/timer.c
-@@ -44,6 +44,7 @@
- #include <linux/slab.h>
- #include <linux/compat.h>
- #include <linux/random.h>
-+#include <linux/freezer.h>
- 
- #include <linux/uaccess.h>
- #include <asm/unistd.h>
-@@ -1587,7 +1588,7 @@ static unsigned long __next_timer_interrupt(struct timer_base *base)
-  * Check, if the next hrtimer event is before the next timer wheel
-  * event:
-  */
--static u64 cmp_next_hrtimer_event(u64 basem, u64 expires)
-+static u64 cmp_next_hrtimer_event(struct timer_base *base, u64 basem, u64 expires)
- {
- 	u64 nextevt = hrtimer_get_next_event();
- 
-@@ -1605,6 +1606,9 @@ static u64 cmp_next_hrtimer_event(u64 basem, u64 expires)
- 	if (nextevt <= basem)
- 		return basem;
- 
-+	if (nextevt < expires && nextevt - basem <= TICK_NSEC)
-+		base->is_idle = false;
-+
- 	/*
- 	 * Round up to the next jiffie. High resolution timers are
- 	 * off, so the hrtimers are expired in the tick and we need to
-@@ -1674,7 +1678,7 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
- 	}
- 	raw_spin_unlock(&base->lock);
- 
--	return cmp_next_hrtimer_event(basem, expires);
-+	return cmp_next_hrtimer_event(base, basem, expires);
- }
- 
- /**
-@@ -1873,6 +1877,18 @@ signed long __sched schedule_timeout(signed long timeout)
- 
- 	expire = timeout + jiffies;
- 
-+#ifdef CONFIG_HIGH_RES_TIMERS
-+	if (timeout == 1 && hrtimer_resolution < NSEC_PER_SEC / HZ) {
-+		/*
-+		 * Special case 1 as being a request for the minimum timeout
-+		 * and use highres timers to timeout after 1ms to workaround
-+		 * the granularity of low Hz tick timers.
-+		 */
-+		if (!schedule_min_hrtimeout())
-+			return 0;
-+		goto out_timeout;
-+	}
-+#endif
- 	timer.task = current;
- 	timer_setup_on_stack(&timer.timer, process_timeout, 0);
- 	__mod_timer(&timer.timer, expire, MOD_TIMER_NOTPENDING);
-@@ -1881,10 +1897,10 @@ signed long __sched schedule_timeout(signed long timeout)
- 
- 	/* Remove the timer from the object tracker */
- 	destroy_timer_on_stack(&timer.timer);
--
-+out_timeout:
- 	timeout = expire - jiffies;
- 
-- out:
-+out:
- 	return timeout < 0 ? 0 : timeout;
- }
- EXPORT_SYMBOL(schedule_timeout);
-@@ -2027,7 +2043,19 @@ void __init init_timers(void)
-  */
- void msleep(unsigned int msecs)
- {
--	unsigned long timeout = msecs_to_jiffies(msecs) + 1;
-+	int jiffs = msecs_to_jiffies(msecs);
-+	unsigned long timeout;
-+
-+	/*
-+	 * Use high resolution timers where the resolution of tick based
-+	 * timers is inadequate.
-+	 */
-+	if (jiffs < 5 && hrtimer_resolution < NSEC_PER_SEC / HZ && !pm_freezing) {
-+		while (msecs)
-+			msecs = schedule_msec_hrtimeout_uninterruptible(msecs);
-+		return;
-+	}
-+	timeout = jiffs + 1;
- 
- 	while (timeout)
- 		timeout = schedule_timeout_uninterruptible(timeout);
-@@ -2041,7 +2069,15 @@ EXPORT_SYMBOL(msleep);
-  */
- unsigned long msleep_interruptible(unsigned int msecs)
- {
--	unsigned long timeout = msecs_to_jiffies(msecs) + 1;
-+	int jiffs = msecs_to_jiffies(msecs);
-+	unsigned long timeout;
-+
-+	if (jiffs < 5 && hrtimer_resolution < NSEC_PER_SEC / HZ && !pm_freezing) {
-+		while (msecs && !signal_pending(current))
-+			msecs = schedule_msec_hrtimeout_interruptible(msecs);
-+		return msecs;
-+	}
-+	timeout = jiffs + 1;
- 
- 	while (timeout && !signal_pending(current))
- 		timeout = schedule_timeout_interruptible(timeout);
-diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
-index b5e3496cf803..68930e7f4d28 100644
---- a/kernel/trace/trace_selftest.c
-+++ b/kernel/trace/trace_selftest.c
-@@ -1048,10 +1048,15 @@ static int trace_wakeup_test_thread(void *data)
- {
- 	/* Make this a -deadline thread */
- 	static const struct sched_attr attr = {
-+#ifdef CONFIG_SCHED_MUQSS
-+		/* No deadline on MuQSS, use RR */
-+		.sched_policy = SCHED_RR,
-+#else
- 		.sched_policy = SCHED_DEADLINE,
- 		.sched_runtime = 100000ULL,
- 		.sched_deadline = 10000000ULL,
- 		.sched_period = 10000000ULL
-+#endif
- 	};
- 	struct wakeup_test_data *x = data;
- 
-diff --git a/mm/vmscan.c b/mm/vmscan.c
-index 466fc3144fff..27224c2d7674 100644
---- a/mm/vmscan.c
-+++ b/mm/vmscan.c
-@@ -169,7 +169,7 @@ struct scan_control {
- /*
-  * From 0 .. 200.  Higher means more swappy.
-  */
--int vm_swappiness = 60;
-+int vm_swappiness = 33;
- 
- static void set_task_reclaim_state(struct task_struct *task,
- 				   struct reclaim_state *rs)
-diff --git a/net/core/pktgen.c b/net/core/pktgen.c
-index 44fdbb9c6e53..ae0adfc677c2 100644
---- a/net/core/pktgen.c
-+++ b/net/core/pktgen.c
-@@ -1894,7 +1894,7 @@ static void pktgen_mark_device(const struct pktgen_net *pn, const char *ifname)
- 		mutex_unlock(&pktgen_thread_lock);
- 		pr_debug("%s: waiting for %s to disappear....\n",
- 			 __func__, ifname);
--		schedule_timeout_interruptible(msecs_to_jiffies(msec_per_try));
-+		schedule_msec_hrtimeout_interruptible((msec_per_try));
- 		mutex_lock(&pktgen_thread_lock);
- 
- 		if (++i >= max_tries) {
-diff --git a/sound/pci/maestro3.c b/sound/pci/maestro3.c
-index 40232a278b1a..d87fae1113aa 100644
---- a/sound/pci/maestro3.c
-+++ b/sound/pci/maestro3.c
-@@ -1995,7 +1995,7 @@ static void snd_m3_ac97_reset(struct snd_m3 *chip)
- 		outw(0, io + GPIO_DATA);
- 		outw(dir | GPO_PRIMARY_AC97, io + GPIO_DIRECTION);
- 
--		schedule_timeout_uninterruptible(msecs_to_jiffies(delay1));
-+		schedule_msec_hrtimeout_uninterruptible((delay1));
- 
- 		outw(GPO_PRIMARY_AC97, io + GPIO_DATA);
- 		udelay(5);
-@@ -2003,7 +2003,7 @@ static void snd_m3_ac97_reset(struct snd_m3 *chip)
- 		outw(IO_SRAM_ENABLE | SERIAL_AC_LINK_ENABLE, io + RING_BUS_CTRL_A);
- 		outw(~0, io + GPIO_MASK);
- 
--		schedule_timeout_uninterruptible(msecs_to_jiffies(delay2));
-+		schedule_msec_hrtimeout_uninterruptible((delay2));
- 
- 		if (! snd_m3_try_read_vendor(chip))
- 			break;
-diff --git a/sound/soc/codecs/rt5631.c b/sound/soc/codecs/rt5631.c
-index 653da3eaf355..d77d12902594 100644
---- a/sound/soc/codecs/rt5631.c
-+++ b/sound/soc/codecs/rt5631.c
-@@ -417,7 +417,7 @@ static void onebit_depop_mute_stage(struct snd_soc_component *component, int ena
- 	hp_zc = snd_soc_component_read(component, RT5631_INT_ST_IRQ_CTRL_2);
- 	snd_soc_component_write(component, RT5631_INT_ST_IRQ_CTRL_2, hp_zc & 0xf7ff);
- 	if (enable) {
--		schedule_timeout_uninterruptible(msecs_to_jiffies(10));
-+		schedule_msec_hrtimeout_uninterruptible((10));
- 		/* config one-bit depop parameter */
- 		rt5631_write_index(component, RT5631_SPK_INTL_CTRL, 0x307f);
- 		snd_soc_component_update_bits(component, RT5631_HP_OUT_VOL,
-@@ -529,7 +529,7 @@ static void depop_seq_mute_stage(struct snd_soc_component *component, int enable
- 	hp_zc = snd_soc_component_read(component, RT5631_INT_ST_IRQ_CTRL_2);
- 	snd_soc_component_write(component, RT5631_INT_ST_IRQ_CTRL_2, hp_zc & 0xf7ff);
- 	if (enable) {
--		schedule_timeout_uninterruptible(msecs_to_jiffies(10));
-+		schedule_msec_hrtimeout_uninterruptible((10));
- 
- 		/* config depop sequence parameter */
- 		rt5631_write_index(component, RT5631_SPK_INTL_CTRL, 0x302f);
-diff --git a/sound/soc/codecs/wm8350.c b/sound/soc/codecs/wm8350.c
-index a6aa212fa0c8..8bfa549b38db 100644
---- a/sound/soc/codecs/wm8350.c
-+++ b/sound/soc/codecs/wm8350.c
-@@ -233,10 +233,10 @@ static void wm8350_pga_work(struct work_struct *work)
- 		    out2->ramp == WM8350_RAMP_UP) {
- 			/* delay is longer over 0dB as increases are larger */
- 			if (i >= WM8350_OUTn_0dB)
--				schedule_timeout_interruptible(msecs_to_jiffies
-+				schedule_msec_hrtimeout_interruptible(
- 							       (2));
- 			else
--				schedule_timeout_interruptible(msecs_to_jiffies
-+				schedule_msec_hrtimeout_interruptible(
- 							       (1));
- 		} else
- 			udelay(50);	/* doesn't matter if we delay longer */
-@@ -1120,7 +1120,7 @@ static int wm8350_set_bias_level(struct snd_soc_component *component,
- 					 (platform->dis_out4 << 6));
- 
- 			/* wait for discharge */
--			schedule_timeout_interruptible(msecs_to_jiffies
-+			schedule_msec_hrtimeout_interruptible(
- 						       (platform->
- 							cap_discharge_msecs));
- 
-@@ -1136,7 +1136,7 @@ static int wm8350_set_bias_level(struct snd_soc_component *component,
- 					 WM8350_VBUFEN);
- 
- 			/* wait for vmid */
--			schedule_timeout_interruptible(msecs_to_jiffies
-+			schedule_msec_hrtimeout_interruptible(
- 						       (platform->
- 							vmid_charge_msecs));
- 
-@@ -1187,7 +1187,7 @@ static int wm8350_set_bias_level(struct snd_soc_component *component,
- 		wm8350_reg_write(wm8350, WM8350_POWER_MGMT_1, pm1);
- 
- 		/* wait */
--		schedule_timeout_interruptible(msecs_to_jiffies
-+		schedule_msec_hrtimeout_interruptible(
- 					       (platform->
- 						vmid_discharge_msecs));
- 
-@@ -1205,7 +1205,7 @@ static int wm8350_set_bias_level(struct snd_soc_component *component,
- 				 pm1 | WM8350_OUTPUT_DRAIN_EN);
- 
- 		/* wait */
--		schedule_timeout_interruptible(msecs_to_jiffies
-+		schedule_msec_hrtimeout_interruptible(
- 					       (platform->drain_msecs));
- 
- 		pm1 &= ~WM8350_BIASEN;
-diff --git a/sound/soc/codecs/wm8900.c b/sound/soc/codecs/wm8900.c
-index a9a6d766a176..45bf31de6282 100644
---- a/sound/soc/codecs/wm8900.c
-+++ b/sound/soc/codecs/wm8900.c
-@@ -1104,7 +1104,7 @@ static int wm8900_set_bias_level(struct snd_soc_component *component,
- 		/* Need to let things settle before stopping the clock
- 		 * to ensure that restart works, see "Stopping the
- 		 * master clock" in the datasheet. */
--		schedule_timeout_interruptible(msecs_to_jiffies(1));
-+		schedule_msec_hrtimeout_interruptible(1);
- 		snd_soc_component_write(component, WM8900_REG_POWER2,
- 			     WM8900_REG_POWER2_SYSCLK_ENA);
- 		break;
-diff --git a/sound/soc/codecs/wm9713.c b/sound/soc/codecs/wm9713.c
-index 7072ffacbdfd..e8414ec4759c 100644
---- a/sound/soc/codecs/wm9713.c
-+++ b/sound/soc/codecs/wm9713.c
-@@ -199,7 +199,7 @@ static int wm9713_voice_shutdown(struct snd_soc_dapm_widget *w,
- 
- 	/* Gracefully shut down the voice interface. */
- 	snd_soc_component_update_bits(component, AC97_HANDSET_RATE, 0x0f00, 0x0200);
--	schedule_timeout_interruptible(msecs_to_jiffies(1));
-+	schedule_msec_hrtimeout_interruptible(1);
- 	snd_soc_component_update_bits(component, AC97_HANDSET_RATE, 0x0f00, 0x0f00);
- 	snd_soc_component_update_bits(component, AC97_EXTENDED_MID, 0x1000, 0x1000);
- 
-@@ -868,7 +868,7 @@ static int wm9713_set_pll(struct snd_soc_component *component,
- 	wm9713->pll_in = freq_in;
- 
- 	/* wait 10ms AC97 link frames for the link to stabilise */
--	schedule_timeout_interruptible(msecs_to_jiffies(10));
-+	schedule_msec_hrtimeout_interruptible((10));
- 	return 0;
- }
- 
-diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
-index 3273161e2787..7fb9b4c6dd7b 100644
---- a/sound/soc/soc-dapm.c
-+++ b/sound/soc/soc-dapm.c
-@@ -154,7 +154,7 @@ static void dapm_assert_locked(struct snd_soc_dapm_context *dapm)
- static void pop_wait(u32 pop_time)
- {
- 	if (pop_time)
--		schedule_timeout_uninterruptible(msecs_to_jiffies(pop_time));
-+		schedule_msec_hrtimeout_uninterruptible((pop_time));
- }
- 
- __printf(3, 4)
-diff --git a/sound/usb/line6/pcm.c b/sound/usb/line6/pcm.c
-index fdbdfb7bce92..fa8e8faf3eb3 100644
---- a/sound/usb/line6/pcm.c
-+++ b/sound/usb/line6/pcm.c
-@@ -127,7 +127,7 @@ static void line6_wait_clear_audio_urbs(struct snd_line6_pcm *line6pcm,
- 		if (!alive)
- 			break;
- 		set_current_state(TASK_UNINTERRUPTIBLE);
--		schedule_timeout(1);
-+		schedule_min_hrtimeout();
- 	} while (--timeout > 0);
- 	if (alive)
- 		dev_err(line6pcm->line6->ifcdev,
diff --git a/linux59-tkg/linux59-tkg-patches/0004-glitched-muqss.patch b/linux59-tkg/linux59-tkg-patches/0004-glitched-muqss.patch
deleted file mode 100644
index 46b094f..0000000
--- a/linux59-tkg/linux59-tkg-patches/0004-glitched-muqss.patch
+++ /dev/null
@@ -1,90 +0,0 @@
-From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
-From: Tk-Glitch <ti3nou@gmail.com>
-Date: Wed, 4 Jul 2018 04:30:08 +0200
-Subject: glitched - MuQSS
-
-diff --git a/kernel/sched/MuQSS.c b/kernel/sched/MuQSS.c
-index 84a1d08d68551..57c3036a68952 100644
---- a/kernel/sched/MuQSS.c
-+++ b/kernel/sched/MuQSS.c
-@@ -163,7 +167,11 @@ int sched_interactive __read_mostly = 1;
-  * are allowed to run five seconds as real time tasks. This is the total over
-  * all online cpus.
-  */
-+#ifdef CONFIG_ZENIFY
-+int sched_iso_cpu __read_mostly = 25;
-+#else
- int sched_iso_cpu __read_mostly = 70;
-+#endif
- 
- /*
-  * sched_yield_type - Choose what sort of yield sched_yield will perform.
-
-diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
-index 2a202a846757..1d9c7ed79b11 100644
---- a/kernel/Kconfig.hz
-+++ b/kernel/Kconfig.hz
-@@ -5,7 +5,7 @@
- choice
- 	prompt "Timer frequency"
- 	default HZ_100 if SCHED_MUQSS
--	default HZ_250_NODEF if !SCHED_MUQSS
-+	default HZ_500_NODEF if !SCHED_MUQSS
- 	help
- 	 Allows the configuration of the timer frequency. It is customary
- 	 to have the timer interrupt run at 1000 Hz but 100 Hz may be more
-@@ -50,6 +50,20 @@ choice
- 	 on SMP and NUMA systems and exactly dividing by both PAL and
- 	 NTSC frame rates for video and multimedia work.
- 
-+	config HZ_500_NODEF
-+		bool "500 HZ"
-+	help
-+	 500 Hz is a good timer frequency for desktops. Provides fast
-+	 interactivity with great smoothness without sacrificing too
-+	 much throughput.
-+
-+	config HZ_750_NODEF
-+		bool "750 HZ"
-+	help
-+	 750 Hz is a good timer frequency for desktops. Provides fast
-+	 interactivity with great smoothness without sacrificing too
-+	 much throughput.
-+
- 	config HZ_1000_NODEF
- 		bool "1000 HZ"
- 	help
-@@ -63,6 +70,8 @@ config HZ
- 	default 100 if HZ_100
- 	default 250 if HZ_250_NODEF
- 	default 300 if HZ_300_NODEF
-+	default 500 if HZ_500_NODEF
-+	default 750 if HZ_750_NODEF
- 	default 1000 if HZ_1000_NODEF
- 
- config SCHED_HRTICK
-
-diff --git a/Makefile b/Makefile
-index d4d36c61940b..4a9dfe471f1f 100644
---- a/Makefile
-+++ b/Makefile
-@@ -15,7 +15,6 @@ NAME = Kleptomaniac Octopus
- 
- CKVERSION = -ck1
- CKNAME = MuQSS Powered
--EXTRAVERSION := $(EXTRAVERSION)$(CKVERSION)
- 
- # We are using a recursive build, so we need to do a little thinking
- # to get the ordering right.
-diff --git a/scripts/headers_install.sh b/scripts/headers_install.sh
-index dd554bd43..75030ad93 100755
---- a/scripts/headers_install.sh
-+++ b/scripts/headers_install.sh
-@@ -89,6 +89,7 @@ include/uapi/linux/atmdev.h:CONFIG_COMPAT
- include/uapi/linux/eventpoll.h:CONFIG_PM_SLEEP
- include/uapi/linux/hw_breakpoint.h:CONFIG_HAVE_MIXED_BREAKPOINTS_REGS
- include/uapi/linux/pktcdvd.h:CONFIG_CDROM_PKTCDVD_WCACHE
-+include/uapi/linux/sched.h:CONFIG_SCHED_MUQSS
- "
- 
- for c in $configs
diff --git a/linux59-tkg/linux59-tkg-patches/0004-glitched-ondemand-muqss.patch b/linux59-tkg/linux59-tkg-patches/0004-glitched-ondemand-muqss.patch
deleted file mode 100644
index 02933e4..0000000
--- a/linux59-tkg/linux59-tkg-patches/0004-glitched-ondemand-muqss.patch
+++ /dev/null
@@ -1,18 +0,0 @@
-diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
-index 6b423eebfd5d..61e3271675d6 100644
---- a/drivers/cpufreq/cpufreq_ondemand.c
-+++ b/drivers/cpufreq/cpufreq_ondemand.c
-@@ -21,10 +21,10 @@
- #include "cpufreq_ondemand.h"
- 
- /* On-demand governor macros */
--#define DEF_FREQUENCY_UP_THRESHOLD		(80)
--#define DEF_SAMPLING_DOWN_FACTOR		(1)
-+#define DEF_FREQUENCY_UP_THRESHOLD		(45)
-+#define DEF_SAMPLING_DOWN_FACTOR		(5)
- #define MAX_SAMPLING_DOWN_FACTOR		(100000)
--#define MICRO_FREQUENCY_UP_THRESHOLD		(95)
-+#define MICRO_FREQUENCY_UP_THRESHOLD		(45)
- #define MICRO_FREQUENCY_MIN_SAMPLE_RATE		(10000)
- #define MIN_FREQUENCY_UP_THRESHOLD		(1)
- #define MAX_FREQUENCY_UP_THRESHOLD		(100) 
diff --git a/linux59-tkg/linux59-tkg-patches/0005-glitched-pds.patch b/linux59-tkg/linux59-tkg-patches/0005-glitched-pds.patch
deleted file mode 100644
index 08c9ef3..0000000
--- a/linux59-tkg/linux59-tkg-patches/0005-glitched-pds.patch
+++ /dev/null
@@ -1,90 +0,0 @@
-From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
-From: Tk-Glitch <ti3nou@gmail.com>
-Date: Wed, 4 Jul 2018 04:30:08 +0200
-Subject: glitched - PDS
-
-diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
-index 2a202a846757..1d9c7ed79b11 100644
---- a/kernel/Kconfig.hz
-+++ b/kernel/Kconfig.hz
-@@ -4,7 +4,7 @@
- 
- choice
- 	prompt "Timer frequency"
--	default HZ_250
-+	default HZ_500
- 	help
- 	 Allows the configuration of the timer frequency. It is customary
- 	 to have the timer interrupt run at 1000 Hz but 100 Hz may be more
-@@ -39,6 +39,13 @@ choice
- 	 on SMP and NUMA systems and exactly dividing by both PAL and
- 	 NTSC frame rates for video and multimedia work.
- 
-+	config HZ_500
-+		bool "500 HZ"
-+	help
-+	 500 Hz is a balanced timer frequency. Provides fast interactivity
-+	 on desktops with great smoothness without increasing CPU power
-+	 consumption and sacrificing the battery life on laptops.
-+
- 	config HZ_1000
- 		bool "1000 HZ"
- 	help
-@@ -52,6 +59,7 @@ config HZ
- 	default 100 if HZ_100
- 	default 250 if HZ_250
- 	default 300 if HZ_300
-+	default 500 if HZ_500
- 	default 1000 if HZ_1000
- 
- config SCHED_HRTICK
-
-diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
-index 2a202a846757..1d9c7ed79b11 100644
---- a/kernel/Kconfig.hz
-+++ b/kernel/Kconfig.hz
-@@ -4,7 +4,7 @@
- 
- choice
- 	prompt "Timer frequency"
--	default HZ_500
-+	default HZ_750
- 	help
- 	 Allows the configuration of the timer frequency. It is customary
- 	 to have the timer interrupt run at 1000 Hz but 100 Hz may be more
-@@ -46,6 +46,13 @@ choice
- 	 on desktops with great smoothness without increasing CPU power
- 	 consumption and sacrificing the battery life on laptops.
- 
-+	config HZ_750
-+		bool "750 HZ"
-+	help
-+	 750 Hz is a good timer frequency for desktops. Provides fast
-+	 interactivity with great smoothness without sacrificing too
-+	 much throughput.
-+
- 	config HZ_1000
- 		bool "1000 HZ"
- 	help
-@@ -60,6 +67,7 @@ config HZ
- 	default 250 if HZ_250
- 	default 300 if HZ_300
- 	default 500 if HZ_500
-+	default 750 if HZ_750
- 	default 1000 if HZ_1000
- 
- config SCHED_HRTICK
-
-diff --git a/mm/vmscan.c b/mm/vmscan.c
-index 9270a4370d54..30d01e647417 100644
---- a/mm/vmscan.c
-+++ b/mm/vmscan.c
-@@ -169,7 +169,7 @@  
- /*
-  * From 0 .. 200.  Higher means more swappy.
-  */
--int vm_swappiness = 60;
-+int vm_swappiness = 20;
- 
- static void set_task_reclaim_state(struct task_struct *task,
- 				   struct reclaim_state *rs)
diff --git a/linux59-tkg/linux59-tkg-patches/0006-add-acs-overrides_iommu.patch b/linux59-tkg/linux59-tkg-patches/0006-add-acs-overrides_iommu.patch
deleted file mode 100644
index d1303a5..0000000
--- a/linux59-tkg/linux59-tkg-patches/0006-add-acs-overrides_iommu.patch
+++ /dev/null
@@ -1,193 +0,0 @@
-From cdeab384f48dd9c88e2dff2e9ad8d57dca1a1b1c Mon Sep 17 00:00:00 2001
-From: Mark Weiman <mark.weiman@markzz.com>
-Date: Sun, 12 Aug 2018 11:36:21 -0400
-Subject: [PATCH] pci: Enable overrides for missing ACS capabilities
-
-This an updated version of Alex Williamson's patch from:
-https://lkml.org/lkml/2013/5/30/513
-
-Original commit message follows:
-
-PCIe ACS (Access Control Services) is the PCIe 2.0+ feature that
-allows us to control whether transactions are allowed to be redirected
-in various subnodes of a PCIe topology.  For instance, if two
-endpoints are below a root port or downsteam switch port, the
-downstream port may optionally redirect transactions between the
-devices, bypassing upstream devices.  The same can happen internally
-on multifunction devices.  The transaction may never be visible to the
-upstream devices.
-
-One upstream device that we particularly care about is the IOMMU.  If
-a redirection occurs in the topology below the IOMMU, then the IOMMU
-cannot provide isolation between devices.  This is why the PCIe spec
-encourages topologies to include ACS support.  Without it, we have to
-assume peer-to-peer DMA within a hierarchy can bypass IOMMU isolation.
-
-Unfortunately, far too many topologies do not support ACS to make this
-a steadfast requirement.  Even the latest chipsets from Intel are only
-sporadically supporting ACS.  We have trouble getting interconnect
-vendors to include the PCIe spec required PCIe capability, let alone
-suggested features.
-
-Therefore, we need to add some flexibility.  The pcie_acs_override=
-boot option lets users opt-in specific devices or sets of devices to
-assume ACS support.  The "downstream" option assumes full ACS support
-on root ports and downstream switch ports.  The "multifunction"
-option assumes the subset of ACS features available on multifunction
-endpoints and upstream switch ports are supported.  The "id:nnnn:nnnn"
-option enables ACS support on devices matching the provided vendor
-and device IDs, allowing more strategic ACS overrides.  These options
-may be combined in any order.  A maximum of 16 id specific overrides
-are available.  It's suggested to use the most limited set of options
-necessary to avoid completely disabling ACS across the topology.
-Note to hardware vendors, we have facilities to permanently quirk
-specific devices which enforce isolation but not provide an ACS
-capability.  Please contact me to have your devices added and save
-your customers the hassle of this boot option.
-
-Signed-off-by: Mark Weiman <mark.weiman@markzz.com>
----
- .../admin-guide/kernel-parameters.txt         |   9 ++
- drivers/pci/quirks.c                          | 101 ++++++++++++++++++
- 2 files changed, 110 insertions(+)
-
-diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
-index aefd358a5ca3..173b3596fd9e 100644
---- a/Documentation/admin-guide/kernel-parameters.txt
-+++ b/Documentation/admin-guide/kernel-parameters.txt
-@@ -3190,6 +3190,15 @@
- 		nomsi		[MSI] If the PCI_MSI kernel config parameter is
- 				enabled, this kernel boot option can be used to
- 				disable the use of MSI interrupts system-wide.
-+		pcie_acs_override =
-+					[PCIE] Override missing PCIe ACS support for:
-+				downstream
-+					All downstream ports - full ACS capabilities
-+				multifunction
-+					All multifunction devices - multifunction ACS subset
-+				id:nnnn:nnnn
-+					Specific device - full ACS capabilities
-+					Specified as vid:did (vendor/device ID) in hex
- 		noioapicquirk	[APIC] Disable all boot interrupt quirks.
- 				Safety option to keep boot IRQs enabled. This
- 				should never be necessary.
-diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
-index 4700d24e5d55..8f7a3d7fd9c1 100644
---- a/drivers/pci/quirks.c
-+++ b/drivers/pci/quirks.c
-@@ -3372,6 +3372,106 @@ static void quirk_no_bus_reset(struct pci_dev *dev)
- 	dev->dev_flags |= PCI_DEV_FLAGS_NO_BUS_RESET;
- }
- 
-+static bool acs_on_downstream;
-+static bool acs_on_multifunction;
-+
-+#define NUM_ACS_IDS 16
-+struct acs_on_id {
-+	unsigned short vendor;
-+	unsigned short device;
-+};
-+static struct acs_on_id acs_on_ids[NUM_ACS_IDS];
-+static u8 max_acs_id;
-+
-+static __init int pcie_acs_override_setup(char *p)
-+{
-+	if (!p)
-+		return -EINVAL;
-+
-+	while (*p) {
-+		if (!strncmp(p, "downstream", 10))
-+			acs_on_downstream = true;
-+		if (!strncmp(p, "multifunction", 13))
-+			acs_on_multifunction = true;
-+		if (!strncmp(p, "id:", 3)) {
-+			char opt[5];
-+			int ret;
-+			long val;
-+
-+			if (max_acs_id >= NUM_ACS_IDS - 1) {
-+				pr_warn("Out of PCIe ACS override slots (%d)\n",
-+						NUM_ACS_IDS);
-+				goto next;
-+			}
-+
-+			p += 3;
-+			snprintf(opt, 5, "%s", p);
-+			ret = kstrtol(opt, 16, &val);
-+			if (ret) {
-+				pr_warn("PCIe ACS ID parse error %d\n", ret);
-+				goto next;
-+			}
-+			acs_on_ids[max_acs_id].vendor = val;
-+
-+			p += strcspn(p, ":");
-+			if (*p != ':') {
-+				pr_warn("PCIe ACS invalid ID\n");
-+				goto next;
-+			}
-+
-+			p++;
-+			snprintf(opt, 5, "%s", p);
-+			ret = kstrtol(opt, 16, &val);
-+			if (ret) {
-+				pr_warn("PCIe ACS ID parse error %d\n", ret);
-+				goto next;
-+			}
-+			acs_on_ids[max_acs_id].device = val;
-+			max_acs_id++;
-+		}
-+next:
-+		p += strcspn(p, ",");
-+		if (*p == ',')
-+			p++;
-+	}
-+
-+	if (acs_on_downstream || acs_on_multifunction || max_acs_id)
-+		pr_warn("Warning: PCIe ACS overrides enabled; This may allow non-IOMMU protected peer-to-peer DMA\n");
-+
-+	return 0;
-+}
-+early_param("pcie_acs_override", pcie_acs_override_setup);
-+
-+static int pcie_acs_overrides(struct pci_dev *dev, u16 acs_flags)
-+{
-+	int i;
-+
-+	/* Never override ACS for legacy devices or devices with ACS caps */
-+	if (!pci_is_pcie(dev) ||
-+		pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS))
-+			return -ENOTTY;
-+
-+	for (i = 0; i < max_acs_id; i++)
-+		if (acs_on_ids[i].vendor == dev->vendor &&
-+			acs_on_ids[i].device == dev->device)
-+				return 1;
-+
-+	switch (pci_pcie_type(dev)) {
-+	case PCI_EXP_TYPE_DOWNSTREAM:
-+	case PCI_EXP_TYPE_ROOT_PORT:
-+		if (acs_on_downstream)
-+			return 1;
-+		break;
-+	case PCI_EXP_TYPE_ENDPOINT:
-+	case PCI_EXP_TYPE_UPSTREAM:
-+	case PCI_EXP_TYPE_LEG_END:
-+	case PCI_EXP_TYPE_RC_END:
-+		if (acs_on_multifunction && dev->multifunction)
-+			return 1;
-+	}
-+
-+	return -ENOTTY;
-+}
- /*
-  * Some Atheros AR9xxx and QCA988x chips do not behave after a bus reset.
-  * The device will throw a Link Down error on AER-capable systems and
-@@ -4513,6 +4613,7 @@ static const struct pci_dev_acs_enabled {
- 	{ PCI_VENDOR_ID_ZHAOXIN, 0x9083, pci_quirk_mf_endpoint_acs },
- 	/* Zhaoxin Root/Downstream Ports */
- 	{ PCI_VENDOR_ID_ZHAOXIN, PCI_ANY_ID, pci_quirk_zhaoxin_pcie_ports_acs },
-+ 	{ PCI_ANY_ID, PCI_ANY_ID, pcie_acs_overrides },
- 	{ 0 }
- };
- 
-
diff --git a/linux59-tkg/linux59-tkg-patches/0007-v5.9-fsync.patch b/linux59-tkg/linux59-tkg-patches/0007-v5.9-fsync.patch
deleted file mode 100644
index 47badbb..0000000
--- a/linux59-tkg/linux59-tkg-patches/0007-v5.9-fsync.patch
+++ /dev/null
@@ -1,597 +0,0 @@
-From 7b5df0248ce255ef5b7204d65a7b3783ebb76a3d Mon Sep 17 00:00:00 2001
-From: Gabriel Krisman Bertazi <krisman@collabora.com>
-Date: Fri, 13 Dec 2019 11:08:02 -0300
-Subject: [PATCH 1/2] futex: Implement mechanism to wait on any of several
- futexes
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-This is a new futex operation, called FUTEX_WAIT_MULTIPLE, which allows
-a thread to wait on several futexes at the same time, and be awoken by
-any of them.  In a sense, it implements one of the features that was
-supported by pooling on the old FUTEX_FD interface.
-
-The use case lies in the Wine implementation of the Windows NT interface
-WaitMultipleObjects. This Windows API function allows a thread to sleep
-waiting on the first of a set of event sources (mutexes, timers, signal,
-console input, etc) to signal.  Considering this is a primitive
-synchronization operation for Windows applications, being able to quickly
-signal events on the producer side, and quickly go to sleep on the
-consumer side is essential for good performance of those running over Wine.
-
-Wine developers have an implementation that uses eventfd, but it suffers
-from FD exhaustion (there is applications that go to the order of
-multi-milion FDs), and higher CPU utilization than this new operation.
-
-The futex list is passed as an array of `struct futex_wait_block`
-(pointer, value, bitset) to the kernel, which will enqueue all of them
-and sleep if none was already triggered. It returns a hint of which
-futex caused the wake up event to userspace, but the hint doesn't
-guarantee that is the only futex triggered.  Before calling the syscall
-again, userspace should traverse the list, trying to re-acquire any of
-the other futexes, to prevent an immediate -EWOULDBLOCK return code from
-the kernel.
-
-This was tested using three mechanisms:
-
-1) By reimplementing FUTEX_WAIT in terms of FUTEX_WAIT_MULTIPLE and
-running the unmodified tools/testing/selftests/futex and a full linux
-distro on top of this kernel.
-
-2) By an example code that exercises the FUTEX_WAIT_MULTIPLE path on a
-multi-threaded, event-handling setup.
-
-3) By running the Wine fsync implementation and executing multi-threaded
-applications, in particular modern games, on top of this implementation.
-
-Changes were tested for the following ABIs: x86_64, i386 and x32.
-Support for x32 applications is not implemented since it would
-take a major rework adding a new entry point and splitting the current
-futex 64 entry point in two and we can't change the current x32 syscall
-number without breaking user space compatibility.
-
-CC: Steven Rostedt <rostedt@goodmis.org>
-Cc: Richard Yao <ryao@gentoo.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Co-developed-by: Zebediah Figura <z.figura12@gmail.com>
-Signed-off-by: Zebediah Figura <z.figura12@gmail.com>
-Co-developed-by: Steven Noonan <steven@valvesoftware.com>
-Signed-off-by: Steven Noonan <steven@valvesoftware.com>
-Co-developed-by: Pierre-Loup A. Griffais <pgriffais@valvesoftware.com>
-Signed-off-by: Pierre-Loup A. Griffais <pgriffais@valvesoftware.com>
-Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
-[Added compatibility code]
-Co-developed-by: André Almeida <andrealmeid@collabora.com>
-Signed-off-by: André Almeida <andrealmeid@collabora.com>
-
-Adjusted for v5.9: Removed `put_futex_key` calls.
----
- include/uapi/linux/futex.h |  20 +++
- kernel/futex.c             | 352 ++++++++++++++++++++++++++++++++++++-
- 2 files changed, 370 insertions(+), 2 deletions(-)
-
-diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h
-index a89eb0accd5e2..580001e89c6ca 100644
---- a/include/uapi/linux/futex.h
-+++ b/include/uapi/linux/futex.h
-@@ -21,6 +21,7 @@
- #define FUTEX_WAKE_BITSET	10
- #define FUTEX_WAIT_REQUEUE_PI	11
- #define FUTEX_CMP_REQUEUE_PI	12
-+#define FUTEX_WAIT_MULTIPLE	13
- 
- #define FUTEX_PRIVATE_FLAG	128
- #define FUTEX_CLOCK_REALTIME	256
-@@ -40,6 +41,8 @@
- 					 FUTEX_PRIVATE_FLAG)
- #define FUTEX_CMP_REQUEUE_PI_PRIVATE	(FUTEX_CMP_REQUEUE_PI | \
- 					 FUTEX_PRIVATE_FLAG)
-+#define FUTEX_WAIT_MULTIPLE_PRIVATE	(FUTEX_WAIT_MULTIPLE | \
-+					 FUTEX_PRIVATE_FLAG)
- 
- /*
-  * Support for robust futexes: the kernel cleans up held futexes at
-@@ -150,4 +153,21 @@ struct robust_list_head {
-   (((op & 0xf) << 28) | ((cmp & 0xf) << 24)		\
-    | ((oparg & 0xfff) << 12) | (cmparg & 0xfff))
- 
-+/*
-+ * Maximum number of multiple futexes to wait for
-+ */
-+#define FUTEX_MULTIPLE_MAX_COUNT	128
-+
-+/**
-+ * struct futex_wait_block - Block of futexes to be waited for
-+ * @uaddr:	User address of the futex
-+ * @val:	Futex value expected by userspace
-+ * @bitset:	Bitset for the optional bitmasked wakeup
-+ */
-+struct futex_wait_block {
-+	__u32 __user *uaddr;
-+	__u32 val;
-+	__u32 bitset;
-+};
-+
- #endif /* _UAPI_LINUX_FUTEX_H */
-diff --git a/kernel/futex.c b/kernel/futex.c
-index a5876694a60eb..6f4bea76df460 100644
---- a/kernel/futex.c
-+++ b/kernel/futex.c
-@@ -197,6 +197,8 @@ struct futex_pi_state {
-  * @rt_waiter:		rt_waiter storage for use with requeue_pi
-  * @requeue_pi_key:	the requeue_pi target futex key
-  * @bitset:		bitset for the optional bitmasked wakeup
-+ * @uaddr:             userspace address of futex
-+ * @uval:              expected futex's value
-  *
-  * We use this hashed waitqueue, instead of a normal wait_queue_entry_t, so
-  * we can wake only the relevant ones (hashed queues may be shared).
-@@ -219,6 +221,8 @@ struct futex_q {
- 	struct rt_mutex_waiter *rt_waiter;
- 	union futex_key *requeue_pi_key;
- 	u32 bitset;
-+	u32 __user *uaddr;
-+	u32 uval;
- } __randomize_layout;
- 
- static const struct futex_q futex_q_init = {
-@@ -2304,6 +2308,29 @@ static int unqueue_me(struct futex_q *q)
- 	return ret;
- }
- 
-+/**
-+ * unqueue_multiple() - Remove several futexes from their futex_hash_bucket
-+ * @q:	The list of futexes to unqueue
-+ * @count: Number of futexes in the list
-+ *
-+ * Helper to unqueue a list of futexes. This can't fail.
-+ *
-+ * Return:
-+ *  - >=0 - Index of the last futex that was awoken;
-+ *  - -1  - If no futex was awoken
-+ */
-+static int unqueue_multiple(struct futex_q *q, int count)
-+{
-+	int ret = -1;
-+	int i;
-+
-+	for (i = 0; i < count; i++) {
-+		if (!unqueue_me(&q[i]))
-+			ret = i;
-+	}
-+	return ret;
-+}
-+
- /*
-  * PI futexes can not be requeued and must remove themself from the
-  * hash bucket. The hash bucket lock (i.e. lock_ptr) is held on entry
-@@ -2662,6 +2689,205 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
- 	return ret;
- }
- 
-+/**
-+ * futex_wait_multiple_setup() - Prepare to wait and enqueue multiple futexes
-+ * @qs:		The corresponding futex list
-+ * @count:	The size of the lists
-+ * @flags:	Futex flags (FLAGS_SHARED, etc.)
-+ * @awaken:	Index of the last awoken futex
-+ *
-+ * Prepare multiple futexes in a single step and enqueue them. This may fail if
-+ * the futex list is invalid or if any futex was already awoken. On success the
-+ * task is ready to interruptible sleep.
-+ *
-+ * Return:
-+ *  -  1 - One of the futexes was awaken by another thread
-+ *  -  0 - Success
-+ *  - <0 - -EFAULT, -EWOULDBLOCK or -EINVAL
-+ */
-+static int futex_wait_multiple_setup(struct futex_q *qs, int count,
-+				     unsigned int flags, int *awaken)
-+{
-+	struct futex_hash_bucket *hb;
-+	int ret, i;
-+	u32 uval;
-+
-+	/*
-+	 * Enqueuing multiple futexes is tricky, because we need to
-+	 * enqueue each futex in the list before dealing with the next
-+	 * one to avoid deadlocking on the hash bucket.  But, before
-+	 * enqueuing, we need to make sure that current->state is
-+	 * TASK_INTERRUPTIBLE, so we don't absorb any awake events, which
-+	 * cannot be done before the get_futex_key of the next key,
-+	 * because it calls get_user_pages, which can sleep.  Thus, we
-+	 * fetch the list of futexes keys in two steps, by first pinning
-+	 * all the memory keys in the futex key, and only then we read
-+	 * each key and queue the corresponding futex.
-+	 */
-+retry:
-+	for (i = 0; i < count; i++) {
-+		qs[i].key = FUTEX_KEY_INIT;
-+		ret = get_futex_key(qs[i].uaddr, flags & FLAGS_SHARED,
-+				    &qs[i].key, FUTEX_READ);
-+		if (unlikely(ret)) {
-+			return ret;
-+		}
-+	}
-+
-+	set_current_state(TASK_INTERRUPTIBLE);
-+
-+	for (i = 0; i < count; i++) {
-+		struct futex_q *q = &qs[i];
-+
-+		hb = queue_lock(q);
-+
-+		ret = get_futex_value_locked(&uval, q->uaddr);
-+		if (ret) {
-+			/*
-+			 * We need to try to handle the fault, which
-+			 * cannot be done without sleep, so we need to
-+			 * undo all the work already done, to make sure
-+			 * we don't miss any wake ups.  Therefore, clean
-+			 * up, handle the fault and retry from the
-+			 * beginning.
-+			 */
-+			queue_unlock(hb);
-+
-+			/*
-+			 * Keys 0..(i-1) are implicitly put
-+			 * on unqueue_multiple.
-+			 */
-+			*awaken = unqueue_multiple(qs, i);
-+
-+			__set_current_state(TASK_RUNNING);
-+
-+			/*
-+			 * On a real fault, prioritize the error even if
-+			 * some other futex was awoken.  Userspace gave
-+			 * us a bad address, -EFAULT them.
-+			 */
-+			ret = get_user(uval, q->uaddr);
-+			if (ret)
-+				return ret;
-+
-+			/*
-+			 * Even if the page fault was handled, If
-+			 * something was already awaken, we can safely
-+			 * give up and succeed to give a hint for userspace to
-+			 * acquire the right futex faster.
-+			 */
-+			if (*awaken >= 0)
-+				return 1;
-+
-+			goto retry;
-+		}
-+
-+		if (uval != q->uval) {
-+			queue_unlock(hb);
-+
-+			/*
-+			 * If something was already awaken, we can
-+			 * safely ignore the error and succeed.
-+			 */
-+			*awaken = unqueue_multiple(qs, i);
-+			__set_current_state(TASK_RUNNING);
-+			if (*awaken >= 0)
-+				return 1;
-+
-+			return -EWOULDBLOCK;
-+		}
-+
-+		/*
-+		 * The bucket lock can't be held while dealing with the
-+		 * next futex. Queue each futex at this moment so hb can
-+		 * be unlocked.
-+		 */
-+		queue_me(&qs[i], hb);
-+	}
-+	return 0;
-+}
-+
-+/**
-+ * futex_wait_multiple() - Prepare to wait on and enqueue several futexes
-+ * @qs:		The list of futexes to wait on
-+ * @op:		Operation code from futex's syscall
-+ * @count:	The number of objects
-+ * @abs_time:	Timeout before giving up and returning to userspace
-+ *
-+ * Entry point for the FUTEX_WAIT_MULTIPLE futex operation, this function
-+ * sleeps on a group of futexes and returns on the first futex that
-+ * triggered, or after the timeout has elapsed.
-+ *
-+ * Return:
-+ *  - >=0 - Hint to the futex that was awoken
-+ *  - <0  - On error
-+ */
-+static int futex_wait_multiple(struct futex_q *qs, int op,
-+			       u32 count, ktime_t *abs_time)
-+{
-+	struct hrtimer_sleeper timeout, *to;
-+	int ret, flags = 0, hint = 0;
-+	unsigned int i;
-+
-+	if (!(op & FUTEX_PRIVATE_FLAG))
-+		flags |= FLAGS_SHARED;
-+
-+	if (op & FUTEX_CLOCK_REALTIME)
-+		flags |= FLAGS_CLOCKRT;
-+
-+	to = futex_setup_timer(abs_time, &timeout, flags, 0);
-+	while (1) {
-+		ret = futex_wait_multiple_setup(qs, count, flags, &hint);
-+		if (ret) {
-+			if (ret > 0) {
-+				/* A futex was awaken during setup */
-+				ret = hint;
-+			}
-+			break;
-+		}
-+
-+		if (to)
-+			hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS);
-+
-+		/*
-+		 * Avoid sleeping if another thread already tried to
-+		 * wake us.
-+		 */
-+		for (i = 0; i < count; i++) {
-+			if (plist_node_empty(&qs[i].list))
-+				break;
-+		}
-+
-+		if (i == count && (!to || to->task))
-+			freezable_schedule();
-+
-+		ret = unqueue_multiple(qs, count);
-+
-+		__set_current_state(TASK_RUNNING);
-+
-+		if (ret >= 0)
-+			break;
-+		if (to && !to->task) {
-+			ret = -ETIMEDOUT;
-+			break;
-+		} else if (signal_pending(current)) {
-+			ret = -ERESTARTSYS;
-+			break;
-+		}
-+		/*
-+		 * The final case is a spurious wakeup, for
-+		 * which just retry.
-+		 */
-+	}
-+
-+	if (to) {
-+		hrtimer_cancel(&to->timer);
-+		destroy_hrtimer_on_stack(&to->timer);
-+	}
-+
-+	return ret;
-+}
-+
- static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
- 		      ktime_t *abs_time, u32 bitset)
- {
-@@ -3774,6 +4000,43 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
- 	return -ENOSYS;
- }
- 
-+/**
-+ * futex_read_wait_block - Read an array of futex_wait_block from userspace
-+ * @uaddr:	Userspace address of the block
-+ * @count:	Number of blocks to be read
-+ *
-+ * This function creates and allocate an array of futex_q (we zero it to
-+ * initialize the fields) and then, for each futex_wait_block element from
-+ * userspace, fill a futex_q element with proper values.
-+ */
-+inline struct futex_q *futex_read_wait_block(u32 __user *uaddr, u32 count)
-+{
-+	unsigned int i;
-+	struct futex_q *qs;
-+	struct futex_wait_block fwb;
-+	struct futex_wait_block __user *entry =
-+		(struct futex_wait_block __user *)uaddr;
-+
-+	if (!count || count > FUTEX_MULTIPLE_MAX_COUNT)
-+		return ERR_PTR(-EINVAL);
-+
-+	qs = kcalloc(count, sizeof(*qs), GFP_KERNEL);
-+	if (!qs)
-+		return ERR_PTR(-ENOMEM);
-+
-+	for (i = 0; i < count; i++) {
-+		if (copy_from_user(&fwb, &entry[i], sizeof(fwb))) {
-+			kfree(qs);
-+			return ERR_PTR(-EFAULT);
-+		}
-+
-+		qs[i].uaddr = fwb.uaddr;
-+		qs[i].uval = fwb.val;
-+		qs[i].bitset = fwb.bitset;
-+	}
-+
-+	return qs;
-+}
- 
- SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
- 		struct __kernel_timespec __user *, utime, u32 __user *, uaddr2,
-@@ -3786,7 +4049,8 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
- 
- 	if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
- 		      cmd == FUTEX_WAIT_BITSET ||
--		      cmd == FUTEX_WAIT_REQUEUE_PI)) {
-+		      cmd == FUTEX_WAIT_REQUEUE_PI ||
-+		      cmd == FUTEX_WAIT_MULTIPLE)) {
- 		if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG))))
- 			return -EFAULT;
- 		if (get_timespec64(&ts, utime))
-@@ -3807,6 +4071,25 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
- 	    cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
- 		val2 = (u32) (unsigned long) utime;
- 
-+	if (cmd == FUTEX_WAIT_MULTIPLE) {
-+		int ret;
-+		struct futex_q *qs;
-+
-+#ifdef CONFIG_X86_X32
-+		if (unlikely(in_x32_syscall()))
-+			return -ENOSYS;
-+#endif
-+		qs = futex_read_wait_block(uaddr, val);
-+
-+		if (IS_ERR(qs))
-+			return PTR_ERR(qs);
-+
-+		ret = futex_wait_multiple(qs, op, val, tp);
-+		kfree(qs);
-+
-+		return ret;
-+	}
-+
- 	return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
- }
- 
-@@ -3969,6 +4252,57 @@ COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
- #endif /* CONFIG_COMPAT */
- 
- #ifdef CONFIG_COMPAT_32BIT_TIME
-+/**
-+ * struct compat_futex_wait_block - Block of futexes to be waited for
-+ * @uaddr:	User address of the futex (compatible pointer)
-+ * @val:	Futex value expected by userspace
-+ * @bitset:	Bitset for the optional bitmasked wakeup
-+ */
-+struct compat_futex_wait_block {
-+	compat_uptr_t	uaddr;
-+	__u32 val;
-+	__u32 bitset;
-+};
-+
-+/**
-+ * compat_futex_read_wait_block - Read an array of futex_wait_block from
-+ * userspace
-+ * @uaddr:	Userspace address of the block
-+ * @count:	Number of blocks to be read
-+ *
-+ * This function does the same as futex_read_wait_block(), except that it
-+ * converts the pointer to the futex from the compat version to the regular one.
-+ */
-+inline struct futex_q *compat_futex_read_wait_block(u32 __user *uaddr,
-+						    u32 count)
-+{
-+	unsigned int i;
-+	struct futex_q *qs;
-+	struct compat_futex_wait_block fwb;
-+	struct compat_futex_wait_block __user *entry =
-+		(struct compat_futex_wait_block __user *)uaddr;
-+
-+	if (!count || count > FUTEX_MULTIPLE_MAX_COUNT)
-+		return ERR_PTR(-EINVAL);
-+
-+	qs = kcalloc(count, sizeof(*qs), GFP_KERNEL);
-+	if (!qs)
-+		return ERR_PTR(-ENOMEM);
-+
-+	for (i = 0; i < count; i++) {
-+		if (copy_from_user(&fwb, &entry[i], sizeof(fwb))) {
-+			kfree(qs);
-+			return ERR_PTR(-EFAULT);
-+		}
-+
-+		qs[i].uaddr = compat_ptr(fwb.uaddr);
-+		qs[i].uval = fwb.val;
-+		qs[i].bitset = fwb.bitset;
-+	}
-+
-+	return qs;
-+}
-+
- SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
- 		struct old_timespec32 __user *, utime, u32 __user *, uaddr2,
- 		u32, val3)
-@@ -3980,7 +4314,8 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
- 
- 	if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
- 		      cmd == FUTEX_WAIT_BITSET ||
--		      cmd == FUTEX_WAIT_REQUEUE_PI)) {
-+		      cmd == FUTEX_WAIT_REQUEUE_PI ||
-+		      cmd == FUTEX_WAIT_MULTIPLE)) {
- 		if (get_old_timespec32(&ts, utime))
- 			return -EFAULT;
- 		if (!timespec64_valid(&ts))
-@@ -3995,6 +4330,19 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
- 	    cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
- 		val2 = (int) (unsigned long) utime;
- 
-+	if (cmd == FUTEX_WAIT_MULTIPLE) {
-+		int ret;
-+		struct futex_q *qs = compat_futex_read_wait_block(uaddr, val);
-+
-+		if (IS_ERR(qs))
-+			return PTR_ERR(qs);
-+
-+		ret = futex_wait_multiple(qs, op, val, tp);
-+		kfree(qs);
-+
-+		return ret;
-+	}
-+
- 	return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
- }
- #endif /* CONFIG_COMPAT_32BIT_TIME */
-
-From ccdddb50d330d2ee1a4d2cbfdd27bdd7fb10eec3 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@collabora.com>
-Date: Fri, 7 Feb 2020 23:28:02 -0300
-Subject: [PATCH 2/2] futex: Add Proton compatibility code
-
----
- include/uapi/linux/futex.h | 2 +-
- kernel/futex.c             | 5 +++--
- 2 files changed, 4 insertions(+), 3 deletions(-)
-
-diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h
-index 580001e89c6ca..a3e760886b8e7 100644
---- a/include/uapi/linux/futex.h
-+++ b/include/uapi/linux/futex.h
-@@ -21,7 +21,7 @@
- #define FUTEX_WAKE_BITSET	10
- #define FUTEX_WAIT_REQUEUE_PI	11
- #define FUTEX_CMP_REQUEUE_PI	12
--#define FUTEX_WAIT_MULTIPLE	13
-+#define FUTEX_WAIT_MULTIPLE	31
- 
- #define FUTEX_PRIVATE_FLAG	128
- #define FUTEX_CLOCK_REALTIME	256
-diff --git a/kernel/futex.c b/kernel/futex.c
-index 6f4bea76df460..03d89fe7b8392 100644
---- a/kernel/futex.c
-+++ b/kernel/futex.c
-@@ -4059,7 +4059,7 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
- 			return -EINVAL;
- 
- 		t = timespec64_to_ktime(ts);
--		if (cmd == FUTEX_WAIT)
-+		if (cmd == FUTEX_WAIT || cmd == FUTEX_WAIT_MULTIPLE)
- 			t = ktime_add_safe(ktime_get(), t);
- 		tp = &t;
- 	}
-@@ -4260,6 +4260,7 @@ COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
-  */
- struct compat_futex_wait_block {
- 	compat_uptr_t	uaddr;
-+	__u32 pad;
- 	__u32 val;
- 	__u32 bitset;
- };
-@@ -4322,7 +4323,7 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
- 			return -EINVAL;
- 
- 		t = timespec64_to_ktime(ts);
--		if (cmd == FUTEX_WAIT)
-+		if (cmd == FUTEX_WAIT || cmd == FUTEX_WAIT_MULTIPLE)
- 			t = ktime_add_safe(ktime_get(), t);
- 		tp = &t;
- 	}
diff --git a/linux59-tkg/linux59-tkg-patches/0008-5.9-bcachefs.patch b/linux59-tkg/linux59-tkg-patches/0008-5.9-bcachefs.patch
deleted file mode 100644
index 5e81fb6..0000000
--- a/linux59-tkg/linux59-tkg-patches/0008-5.9-bcachefs.patch
+++ /dev/null
@@ -1,70821 +0,0 @@
-diff --git a/block/bio.c b/block/bio.c
-index e865ea55b9f9..72a65c4113be 100644
---- a/block/bio.c
-+++ b/block/bio.c
-@@ -1320,6 +1320,7 @@ void bio_set_pages_dirty(struct bio *bio)
- 			set_page_dirty_lock(bvec->bv_page);
- 	}
- }
-+EXPORT_SYMBOL_GPL(bio_set_pages_dirty);
- 
- /*
-  * bio_check_pages_dirty() will check that all the BIO's pages are still dirty.
-@@ -1379,6 +1380,7 @@ void bio_check_pages_dirty(struct bio *bio)
- 	spin_unlock_irqrestore(&bio_dirty_lock, flags);
- 	schedule_work(&bio_dirty_work);
- }
-+EXPORT_SYMBOL_GPL(bio_check_pages_dirty);
- 
- static inline bool bio_remaining_done(struct bio *bio)
- {
-diff --git a/block/blk-core.c b/block/blk-core.c
-index 10c08ac50697..d68f24a7ee48 100644
---- a/block/blk-core.c
-+++ b/block/blk-core.c
-@@ -213,18 +213,23 @@ int blk_status_to_errno(blk_status_t status)
- }
- EXPORT_SYMBOL_GPL(blk_status_to_errno);
- 
--static void print_req_error(struct request *req, blk_status_t status,
--		const char *caller)
-+const char *blk_status_to_str(blk_status_t status)
- {
- 	int idx = (__force int)status;
- 
- 	if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors)))
--		return;
-+		return "(invalid error)";
-+	return blk_errors[idx].name;
-+}
-+EXPORT_SYMBOL_GPL(blk_status_to_str);
- 
-+static void print_req_error(struct request *req, blk_status_t status,
-+		const char *caller)
-+{
- 	printk_ratelimited(KERN_ERR
- 		"%s: %s error, dev %s, sector %llu op 0x%x:(%s) flags 0x%x "
- 		"phys_seg %u prio class %u\n",
--		caller, blk_errors[idx].name,
-+		caller, blk_status_to_str(status),
- 		req->rq_disk ? req->rq_disk->disk_name : "?",
- 		blk_rq_pos(req), req_op(req), blk_op_str(req_op(req)),
- 		req->cmd_flags & ~REQ_OP_MASK,
-diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig
-index d1ca4d059c20..e63646b103c4 100644
---- a/drivers/md/bcache/Kconfig
-+++ b/drivers/md/bcache/Kconfig
-@@ -3,6 +3,7 @@
- config BCACHE
- 	tristate "Block device as cache"
- 	select CRC64
-+	select CLOSURES
- 	help
- 	Allows a block device to be used as cache for other devices; uses
- 	a btree for indexing and the layout is optimized for SSDs.
-@@ -18,15 +19,6 @@ config BCACHE_DEBUG
- 	Enables extra debugging tools, allows expensive runtime checks to be
- 	turned on.
- 
--config BCACHE_CLOSURES_DEBUG
--	bool "Debug closures"
--	depends on BCACHE
--	select DEBUG_FS
--	help
--	Keeps all active closures in a linked list and provides a debugfs
--	interface to list them, which makes it possible to see asynchronous
--	operations that get stuck.
--
- config BCACHE_ASYNC_REGISTRATION
- 	bool "Asynchronous device registration (EXPERIMENTAL)"
- 	depends on BCACHE
-diff --git a/drivers/md/bcache/Makefile b/drivers/md/bcache/Makefile
-index 5b87e59676b8..054e8a33a7ab 100644
---- a/drivers/md/bcache/Makefile
-+++ b/drivers/md/bcache/Makefile
-@@ -2,6 +2,6 @@
- 
- obj-$(CONFIG_BCACHE)	+= bcache.o
- 
--bcache-y		:= alloc.o bset.o btree.o closure.o debug.o extents.o\
--	io.o journal.o movinggc.o request.o stats.o super.o sysfs.o trace.o\
-+bcache-y		:= alloc.o bset.o btree.o debug.o extents.o io.o\
-+	journal.o movinggc.o request.o stats.o super.o sysfs.o trace.o\
- 	util.o writeback.o features.o
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index 4fd03d2496d8..498625095807 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -180,6 +180,7 @@
- 
- #include <linux/bcache.h>
- #include <linux/bio.h>
-+#include <linux/closure.h>
- #include <linux/kobject.h>
- #include <linux/list.h>
- #include <linux/mutex.h>
-@@ -192,7 +193,6 @@
- 
- #include "bset.h"
- #include "util.h"
--#include "closure.h"
- 
- struct bucket {
- 	atomic_t	pin;
-diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c
-deleted file mode 100644
-index 0164a1fe94a9..000000000000
---- a/drivers/md/bcache/closure.c
-+++ /dev/null
-@@ -1,217 +0,0 @@
--// SPDX-License-Identifier: GPL-2.0
--/*
-- * Asynchronous refcounty things
-- *
-- * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
-- * Copyright 2012 Google, Inc.
-- */
--
--#include <linux/debugfs.h>
--#include <linux/module.h>
--#include <linux/seq_file.h>
--#include <linux/sched/debug.h>
--
--#include "closure.h"
--
--static inline void closure_put_after_sub(struct closure *cl, int flags)
--{
--	int r = flags & CLOSURE_REMAINING_MASK;
--
--	BUG_ON(flags & CLOSURE_GUARD_MASK);
--	BUG_ON(!r && (flags & ~CLOSURE_DESTRUCTOR));
--
--	if (!r) {
--		if (cl->fn && !(flags & CLOSURE_DESTRUCTOR)) {
--			atomic_set(&cl->remaining,
--				   CLOSURE_REMAINING_INITIALIZER);
--			closure_queue(cl);
--		} else {
--			struct closure *parent = cl->parent;
--			closure_fn *destructor = cl->fn;
--
--			closure_debug_destroy(cl);
--
--			if (destructor)
--				destructor(cl);
--
--			if (parent)
--				closure_put(parent);
--		}
--	}
--}
--
--/* For clearing flags with the same atomic op as a put */
--void closure_sub(struct closure *cl, int v)
--{
--	closure_put_after_sub(cl, atomic_sub_return(v, &cl->remaining));
--}
--
--/*
-- * closure_put - decrement a closure's refcount
-- */
--void closure_put(struct closure *cl)
--{
--	closure_put_after_sub(cl, atomic_dec_return(&cl->remaining));
--}
--
--/*
-- * closure_wake_up - wake up all closures on a wait list, without memory barrier
-- */
--void __closure_wake_up(struct closure_waitlist *wait_list)
--{
--	struct llist_node *list;
--	struct closure *cl, *t;
--	struct llist_node *reverse = NULL;
--
--	list = llist_del_all(&wait_list->list);
--
--	/* We first reverse the list to preserve FIFO ordering and fairness */
--	reverse = llist_reverse_order(list);
--
--	/* Then do the wakeups */
--	llist_for_each_entry_safe(cl, t, reverse, list) {
--		closure_set_waiting(cl, 0);
--		closure_sub(cl, CLOSURE_WAITING + 1);
--	}
--}
--
--/**
-- * closure_wait - add a closure to a waitlist
-- * @waitlist: will own a ref on @cl, which will be released when
-- * closure_wake_up() is called on @waitlist.
-- * @cl: closure pointer.
-- *
-- */
--bool closure_wait(struct closure_waitlist *waitlist, struct closure *cl)
--{
--	if (atomic_read(&cl->remaining) & CLOSURE_WAITING)
--		return false;
--
--	closure_set_waiting(cl, _RET_IP_);
--	atomic_add(CLOSURE_WAITING + 1, &cl->remaining);
--	llist_add(&cl->list, &waitlist->list);
--
--	return true;
--}
--
--struct closure_syncer {
--	struct task_struct	*task;
--	int			done;
--};
--
--static void closure_sync_fn(struct closure *cl)
--{
--	struct closure_syncer *s = cl->s;
--	struct task_struct *p;
--
--	rcu_read_lock();
--	p = READ_ONCE(s->task);
--	s->done = 1;
--	wake_up_process(p);
--	rcu_read_unlock();
--}
--
--void __sched __closure_sync(struct closure *cl)
--{
--	struct closure_syncer s = { .task = current };
--
--	cl->s = &s;
--	continue_at(cl, closure_sync_fn, NULL);
--
--	while (1) {
--		set_current_state(TASK_UNINTERRUPTIBLE);
--		if (s.done)
--			break;
--		schedule();
--	}
--
--	__set_current_state(TASK_RUNNING);
--}
--
--#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
--
--static LIST_HEAD(closure_list);
--static DEFINE_SPINLOCK(closure_list_lock);
--
--void closure_debug_create(struct closure *cl)
--{
--	unsigned long flags;
--
--	BUG_ON(cl->magic == CLOSURE_MAGIC_ALIVE);
--	cl->magic = CLOSURE_MAGIC_ALIVE;
--
--	spin_lock_irqsave(&closure_list_lock, flags);
--	list_add(&cl->all, &closure_list);
--	spin_unlock_irqrestore(&closure_list_lock, flags);
--}
--
--void closure_debug_destroy(struct closure *cl)
--{
--	unsigned long flags;
--
--	BUG_ON(cl->magic != CLOSURE_MAGIC_ALIVE);
--	cl->magic = CLOSURE_MAGIC_DEAD;
--
--	spin_lock_irqsave(&closure_list_lock, flags);
--	list_del(&cl->all);
--	spin_unlock_irqrestore(&closure_list_lock, flags);
--}
--
--static struct dentry *closure_debug;
--
--static int debug_seq_show(struct seq_file *f, void *data)
--{
--	struct closure *cl;
--
--	spin_lock_irq(&closure_list_lock);
--
--	list_for_each_entry(cl, &closure_list, all) {
--		int r = atomic_read(&cl->remaining);
--
--		seq_printf(f, "%p: %pS -> %pS p %p r %i ",
--			   cl, (void *) cl->ip, cl->fn, cl->parent,
--			   r & CLOSURE_REMAINING_MASK);
--
--		seq_printf(f, "%s%s\n",
--			   test_bit(WORK_STRUCT_PENDING_BIT,
--				    work_data_bits(&cl->work)) ? "Q" : "",
--			   r & CLOSURE_RUNNING	? "R" : "");
--
--		if (r & CLOSURE_WAITING)
--			seq_printf(f, " W %pS\n",
--				   (void *) cl->waiting_on);
--
--		seq_printf(f, "\n");
--	}
--
--	spin_unlock_irq(&closure_list_lock);
--	return 0;
--}
--
--static int debug_seq_open(struct inode *inode, struct file *file)
--{
--	return single_open(file, debug_seq_show, NULL);
--}
--
--static const struct file_operations debug_ops = {
--	.owner		= THIS_MODULE,
--	.open		= debug_seq_open,
--	.read		= seq_read,
--	.release	= single_release
--};
--
--void  __init closure_debug_init(void)
--{
--	if (!IS_ERR_OR_NULL(bcache_debug))
--		/*
--		 * it is unnecessary to check return value of
--		 * debugfs_create_file(), we should not care
--		 * about this.
--		 */
--		closure_debug = debugfs_create_file(
--			"closures", 0400, bcache_debug, NULL, &debug_ops);
--}
--#endif
--
--MODULE_AUTHOR("Kent Overstreet <koverstreet@google.com>");
--MODULE_LICENSE("GPL");
-diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h
-deleted file mode 100644
-index c88cdc4ae4ec..000000000000
---- a/drivers/md/bcache/closure.h
-+++ /dev/null
-@@ -1,378 +0,0 @@
--/* SPDX-License-Identifier: GPL-2.0 */
--#ifndef _LINUX_CLOSURE_H
--#define _LINUX_CLOSURE_H
--
--#include <linux/llist.h>
--#include <linux/sched.h>
--#include <linux/sched/task_stack.h>
--#include <linux/workqueue.h>
--
--/*
-- * Closure is perhaps the most overused and abused term in computer science, but
-- * since I've been unable to come up with anything better you're stuck with it
-- * again.
-- *
-- * What are closures?
-- *
-- * They embed a refcount. The basic idea is they count "things that are in
-- * progress" - in flight bios, some other thread that's doing something else -
-- * anything you might want to wait on.
-- *
-- * The refcount may be manipulated with closure_get() and closure_put().
-- * closure_put() is where many of the interesting things happen, when it causes
-- * the refcount to go to 0.
-- *
-- * Closures can be used to wait on things both synchronously and asynchronously,
-- * and synchronous and asynchronous use can be mixed without restriction. To
-- * wait synchronously, use closure_sync() - you will sleep until your closure's
-- * refcount hits 1.
-- *
-- * To wait asynchronously, use
-- *   continue_at(cl, next_function, workqueue);
-- *
-- * passing it, as you might expect, the function to run when nothing is pending
-- * and the workqueue to run that function out of.
-- *
-- * continue_at() also, critically, requires a 'return' immediately following the
-- * location where this macro is referenced, to return to the calling function.
-- * There's good reason for this.
-- *
-- * To use safely closures asynchronously, they must always have a refcount while
-- * they are running owned by the thread that is running them. Otherwise, suppose
-- * you submit some bios and wish to have a function run when they all complete:
-- *
-- * foo_endio(struct bio *bio)
-- * {
-- *	closure_put(cl);
-- * }
-- *
-- * closure_init(cl);
-- *
-- * do_stuff();
-- * closure_get(cl);
-- * bio1->bi_endio = foo_endio;
-- * bio_submit(bio1);
-- *
-- * do_more_stuff();
-- * closure_get(cl);
-- * bio2->bi_endio = foo_endio;
-- * bio_submit(bio2);
-- *
-- * continue_at(cl, complete_some_read, system_wq);
-- *
-- * If closure's refcount started at 0, complete_some_read() could run before the
-- * second bio was submitted - which is almost always not what you want! More
-- * importantly, it wouldn't be possible to say whether the original thread or
-- * complete_some_read()'s thread owned the closure - and whatever state it was
-- * associated with!
-- *
-- * So, closure_init() initializes a closure's refcount to 1 - and when a
-- * closure_fn is run, the refcount will be reset to 1 first.
-- *
-- * Then, the rule is - if you got the refcount with closure_get(), release it
-- * with closure_put() (i.e, in a bio->bi_endio function). If you have a refcount
-- * on a closure because you called closure_init() or you were run out of a
-- * closure - _always_ use continue_at(). Doing so consistently will help
-- * eliminate an entire class of particularly pernicious races.
-- *
-- * Lastly, you might have a wait list dedicated to a specific event, and have no
-- * need for specifying the condition - you just want to wait until someone runs
-- * closure_wake_up() on the appropriate wait list. In that case, just use
-- * closure_wait(). It will return either true or false, depending on whether the
-- * closure was already on a wait list or not - a closure can only be on one wait
-- * list at a time.
-- *
-- * Parents:
-- *
-- * closure_init() takes two arguments - it takes the closure to initialize, and
-- * a (possibly null) parent.
-- *
-- * If parent is non null, the new closure will have a refcount for its lifetime;
-- * a closure is considered to be "finished" when its refcount hits 0 and the
-- * function to run is null. Hence
-- *
-- * continue_at(cl, NULL, NULL);
-- *
-- * returns up the (spaghetti) stack of closures, precisely like normal return
-- * returns up the C stack. continue_at() with non null fn is better thought of
-- * as doing a tail call.
-- *
-- * All this implies that a closure should typically be embedded in a particular
-- * struct (which its refcount will normally control the lifetime of), and that
-- * struct can very much be thought of as a stack frame.
-- */
--
--struct closure;
--struct closure_syncer;
--typedef void (closure_fn) (struct closure *);
--extern struct dentry *bcache_debug;
--
--struct closure_waitlist {
--	struct llist_head	list;
--};
--
--enum closure_state {
--	/*
--	 * CLOSURE_WAITING: Set iff the closure is on a waitlist. Must be set by
--	 * the thread that owns the closure, and cleared by the thread that's
--	 * waking up the closure.
--	 *
--	 * The rest are for debugging and don't affect behaviour:
--	 *
--	 * CLOSURE_RUNNING: Set when a closure is running (i.e. by
--	 * closure_init() and when closure_put() runs then next function), and
--	 * must be cleared before remaining hits 0. Primarily to help guard
--	 * against incorrect usage and accidentally transferring references.
--	 * continue_at() and closure_return() clear it for you, if you're doing
--	 * something unusual you can use closure_set_dead() which also helps
--	 * annotate where references are being transferred.
--	 */
--
--	CLOSURE_BITS_START	= (1U << 26),
--	CLOSURE_DESTRUCTOR	= (1U << 26),
--	CLOSURE_WAITING		= (1U << 28),
--	CLOSURE_RUNNING		= (1U << 30),
--};
--
--#define CLOSURE_GUARD_MASK					\
--	((CLOSURE_DESTRUCTOR|CLOSURE_WAITING|CLOSURE_RUNNING) << 1)
--
--#define CLOSURE_REMAINING_MASK		(CLOSURE_BITS_START - 1)
--#define CLOSURE_REMAINING_INITIALIZER	(1|CLOSURE_RUNNING)
--
--struct closure {
--	union {
--		struct {
--			struct workqueue_struct *wq;
--			struct closure_syncer	*s;
--			struct llist_node	list;
--			closure_fn		*fn;
--		};
--		struct work_struct	work;
--	};
--
--	struct closure		*parent;
--
--	atomic_t		remaining;
--
--#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
--#define CLOSURE_MAGIC_DEAD	0xc054dead
--#define CLOSURE_MAGIC_ALIVE	0xc054a11e
--
--	unsigned int		magic;
--	struct list_head	all;
--	unsigned long		ip;
--	unsigned long		waiting_on;
--#endif
--};
--
--void closure_sub(struct closure *cl, int v);
--void closure_put(struct closure *cl);
--void __closure_wake_up(struct closure_waitlist *list);
--bool closure_wait(struct closure_waitlist *list, struct closure *cl);
--void __closure_sync(struct closure *cl);
--
--/**
-- * closure_sync - sleep until a closure a closure has nothing left to wait on
-- *
-- * Sleeps until the refcount hits 1 - the thread that's running the closure owns
-- * the last refcount.
-- */
--static inline void closure_sync(struct closure *cl)
--{
--	if ((atomic_read(&cl->remaining) & CLOSURE_REMAINING_MASK) != 1)
--		__closure_sync(cl);
--}
--
--#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
--
--void closure_debug_init(void);
--void closure_debug_create(struct closure *cl);
--void closure_debug_destroy(struct closure *cl);
--
--#else
--
--static inline void closure_debug_init(void) {}
--static inline void closure_debug_create(struct closure *cl) {}
--static inline void closure_debug_destroy(struct closure *cl) {}
--
--#endif
--
--static inline void closure_set_ip(struct closure *cl)
--{
--#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
--	cl->ip = _THIS_IP_;
--#endif
--}
--
--static inline void closure_set_ret_ip(struct closure *cl)
--{
--#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
--	cl->ip = _RET_IP_;
--#endif
--}
--
--static inline void closure_set_waiting(struct closure *cl, unsigned long f)
--{
--#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
--	cl->waiting_on = f;
--#endif
--}
--
--static inline void closure_set_stopped(struct closure *cl)
--{
--	atomic_sub(CLOSURE_RUNNING, &cl->remaining);
--}
--
--static inline void set_closure_fn(struct closure *cl, closure_fn *fn,
--				  struct workqueue_struct *wq)
--{
--	closure_set_ip(cl);
--	cl->fn = fn;
--	cl->wq = wq;
--	/* between atomic_dec() in closure_put() */
--	smp_mb__before_atomic();
--}
--
--static inline void closure_queue(struct closure *cl)
--{
--	struct workqueue_struct *wq = cl->wq;
--	/**
--	 * Changes made to closure, work_struct, or a couple of other structs
--	 * may cause work.func not pointing to the right location.
--	 */
--	BUILD_BUG_ON(offsetof(struct closure, fn)
--		     != offsetof(struct work_struct, func));
--	if (wq) {
--		INIT_WORK(&cl->work, cl->work.func);
--		BUG_ON(!queue_work(wq, &cl->work));
--	} else
--		cl->fn(cl);
--}
--
--/**
-- * closure_get - increment a closure's refcount
-- */
--static inline void closure_get(struct closure *cl)
--{
--#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
--	BUG_ON((atomic_inc_return(&cl->remaining) &
--		CLOSURE_REMAINING_MASK) <= 1);
--#else
--	atomic_inc(&cl->remaining);
--#endif
--}
--
--/**
-- * closure_init - Initialize a closure, setting the refcount to 1
-- * @cl:		closure to initialize
-- * @parent:	parent of the new closure. cl will take a refcount on it for its
-- *		lifetime; may be NULL.
-- */
--static inline void closure_init(struct closure *cl, struct closure *parent)
--{
--	memset(cl, 0, sizeof(struct closure));
--	cl->parent = parent;
--	if (parent)
--		closure_get(parent);
--
--	atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
--
--	closure_debug_create(cl);
--	closure_set_ip(cl);
--}
--
--static inline void closure_init_stack(struct closure *cl)
--{
--	memset(cl, 0, sizeof(struct closure));
--	atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
--}
--
--/**
-- * closure_wake_up - wake up all closures on a wait list,
-- *		     with memory barrier
-- */
--static inline void closure_wake_up(struct closure_waitlist *list)
--{
--	/* Memory barrier for the wait list */
--	smp_mb();
--	__closure_wake_up(list);
--}
--
--/**
-- * continue_at - jump to another function with barrier
-- *
-- * After @cl is no longer waiting on anything (i.e. all outstanding refs have
-- * been dropped with closure_put()), it will resume execution at @fn running out
-- * of @wq (or, if @wq is NULL, @fn will be called by closure_put() directly).
-- *
-- * This is because after calling continue_at() you no longer have a ref on @cl,
-- * and whatever @cl owns may be freed out from under you - a running closure fn
-- * has a ref on its own closure which continue_at() drops.
-- *
-- * Note you are expected to immediately return after using this macro.
-- */
--#define continue_at(_cl, _fn, _wq)					\
--do {									\
--	set_closure_fn(_cl, _fn, _wq);					\
--	closure_sub(_cl, CLOSURE_RUNNING + 1);				\
--} while (0)
--
--/**
-- * closure_return - finish execution of a closure
-- *
-- * This is used to indicate that @cl is finished: when all outstanding refs on
-- * @cl have been dropped @cl's ref on its parent closure (as passed to
-- * closure_init()) will be dropped, if one was specified - thus this can be
-- * thought of as returning to the parent closure.
-- */
--#define closure_return(_cl)	continue_at((_cl), NULL, NULL)
--
--/**
-- * continue_at_nobarrier - jump to another function without barrier
-- *
-- * Causes @fn to be executed out of @cl, in @wq context (or called directly if
-- * @wq is NULL).
-- *
-- * The ref the caller of continue_at_nobarrier() had on @cl is now owned by @fn,
-- * thus it's not safe to touch anything protected by @cl after a
-- * continue_at_nobarrier().
-- */
--#define continue_at_nobarrier(_cl, _fn, _wq)				\
--do {									\
--	set_closure_fn(_cl, _fn, _wq);					\
--	closure_queue(_cl);						\
--} while (0)
--
--/**
-- * closure_return_with_destructor - finish execution of a closure,
-- *				    with destructor
-- *
-- * Works like closure_return(), except @destructor will be called when all
-- * outstanding refs on @cl have been dropped; @destructor may be used to safely
-- * free the memory occupied by @cl, and it is called with the ref on the parent
-- * closure still held - so @destructor could safely return an item to a
-- * freelist protected by @cl's parent.
-- */
--#define closure_return_with_destructor(_cl, _destructor)		\
--do {									\
--	set_closure_fn(_cl, _destructor, NULL);				\
--	closure_sub(_cl, CLOSURE_RUNNING - CLOSURE_DESTRUCTOR + 1);	\
--} while (0)
--
--/**
-- * closure_call - execute @fn out of a new, uninitialized closure
-- *
-- * Typically used when running out of one closure, and we want to run @fn
-- * asynchronously out of a new closure - @parent will then wait for @cl to
-- * finish.
-- */
--static inline void closure_call(struct closure *cl, closure_fn fn,
--				struct workqueue_struct *wq,
--				struct closure *parent)
--{
--	closure_init(cl, parent);
--	continue_at_nobarrier(cl, fn, wq);
--}
--
--#endif /* _LINUX_CLOSURE_H */
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 1bbdc410ee3c..3b9e991ea475 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -2925,7 +2925,6 @@ static int __init bcache_init(void)
- 		goto err;
- 
- 	bch_debug_init();
--	closure_debug_init();
- 
- 	bcache_is_reboot = false;
- 
-diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
-index c029f7443190..59093f9f1793 100644
---- a/drivers/md/bcache/util.h
-+++ b/drivers/md/bcache/util.h
-@@ -4,6 +4,7 @@
- #define _BCACHE_UTIL_H
- 
- #include <linux/blkdev.h>
-+#include <linux/closure.h>
- #include <linux/errno.h>
- #include <linux/kernel.h>
- #include <linux/sched/clock.h>
-@@ -13,8 +14,6 @@
- #include <linux/workqueue.h>
- #include <linux/crc64.h>
- 
--#include "closure.h"
--
- #define PAGE_SECTORS		(PAGE_SIZE / 512)
- 
- struct closure;
-diff --git a/fs/Kconfig b/fs/Kconfig
-index aa4c12282301..88082e3663cb 100644
---- a/fs/Kconfig
-+++ b/fs/Kconfig
-@@ -40,6 +40,7 @@ source "fs/ocfs2/Kconfig"
- source "fs/btrfs/Kconfig"
- source "fs/nilfs2/Kconfig"
- source "fs/f2fs/Kconfig"
-+source "fs/bcachefs/Kconfig"
- source "fs/zonefs/Kconfig"
- 
- config FS_DAX
-diff --git a/fs/Makefile b/fs/Makefile
-index 1c7b0e3f6daa..8afa8e3bc14f 100644
---- a/fs/Makefile
-+++ b/fs/Makefile
-@@ -130,6 +130,7 @@ obj-$(CONFIG_OCFS2_FS)		+= ocfs2/
- obj-$(CONFIG_BTRFS_FS)		+= btrfs/
- obj-$(CONFIG_GFS2_FS)           += gfs2/
- obj-$(CONFIG_F2FS_FS)		+= f2fs/
-+obj-$(CONFIG_BCACHEFS_FS)	+= bcachefs/
- obj-$(CONFIG_CEPH_FS)		+= ceph/
- obj-$(CONFIG_PSTORE)		+= pstore/
- obj-$(CONFIG_EFIVAR_FS)		+= efivarfs/
-diff --git a/fs/bcachefs/Kconfig b/fs/bcachefs/Kconfig
-new file mode 100644
-index 000000000000..5594af719b2a
---- /dev/null
-+++ b/fs/bcachefs/Kconfig
-@@ -0,0 +1,50 @@
-+
-+config BCACHEFS_FS
-+	tristate "bcachefs filesystem support"
-+	depends on BLOCK
-+	select EXPORTFS
-+	select CLOSURES
-+	select LIBCRC32C
-+	select CRC64
-+	select FS_POSIX_ACL
-+	select LZ4_COMPRESS
-+	select LZ4_DECOMPRESS
-+	select ZLIB_DEFLATE
-+	select ZLIB_INFLATE
-+	select ZSTD_COMPRESS
-+	select ZSTD_DECOMPRESS
-+	select CRYPTO_SHA256
-+	select CRYPTO_CHACHA20
-+	select CRYPTO_POLY1305
-+	select KEYS
-+	select SIXLOCKS
-+	select RAID6_PQ
-+	select XOR_BLOCKS
-+	help
-+	The bcachefs filesystem - a modern, copy on write filesystem, with
-+	support for multiple devices, compression, checksumming, etc.
-+
-+config BCACHEFS_QUOTA
-+	bool "bcachefs quota support"
-+	depends on BCACHEFS_FS
-+	select QUOTACTL
-+
-+config BCACHEFS_POSIX_ACL
-+	bool "bcachefs POSIX ACL support"
-+	depends on BCACHEFS_FS
-+	select FS_POSIX_ACL
-+
-+config BCACHEFS_DEBUG
-+	bool "bcachefs debugging"
-+	depends on BCACHEFS_FS
-+	help
-+	Enables many extra debugging checks and assertions.
-+
-+	The resulting code will be significantly slower than normal; you
-+	probably shouldn't select this option unless you're a developer.
-+
-+config BCACHEFS_TESTS
-+	bool "bcachefs unit and performance tests"
-+	depends on BCACHEFS_FS
-+	help
-+	Include some unit and performance tests for the core btree code
-diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile
-new file mode 100644
-index 000000000000..d85ced62c0dd
---- /dev/null
-+++ b/fs/bcachefs/Makefile
-@@ -0,0 +1,59 @@
-+
-+obj-$(CONFIG_BCACHEFS_FS)	+= bcachefs.o
-+
-+bcachefs-y		:=	\
-+	acl.o			\
-+	alloc_background.o	\
-+	alloc_foreground.o	\
-+	bkey.o			\
-+	bkey_methods.o		\
-+	bkey_sort.o		\
-+	bset.o			\
-+	btree_cache.o		\
-+	btree_gc.o		\
-+	btree_io.o		\
-+	btree_iter.o		\
-+	btree_key_cache.o	\
-+	btree_update_interior.o	\
-+	btree_update_leaf.o	\
-+	buckets.o		\
-+	chardev.o		\
-+	checksum.o		\
-+	clock.o			\
-+	compress.o		\
-+	debug.o			\
-+	dirent.o		\
-+	disk_groups.o		\
-+	ec.o			\
-+	error.o			\
-+	extents.o		\
-+	extent_update.o		\
-+	fs.o			\
-+	fs-common.o		\
-+	fs-ioctl.o		\
-+	fs-io.o			\
-+	fsck.o			\
-+	inode.o			\
-+	io.o			\
-+	journal.o		\
-+	journal_io.o		\
-+	journal_reclaim.o	\
-+	journal_seq_blacklist.o	\
-+	keylist.o		\
-+	migrate.o		\
-+	move.o			\
-+	movinggc.o		\
-+	opts.o			\
-+	quota.o			\
-+	rebalance.o		\
-+	recovery.o		\
-+	reflink.o		\
-+	replicas.o		\
-+	siphash.o		\
-+	super.o			\
-+	super-io.o		\
-+	sysfs.o			\
-+	tests.o			\
-+	trace.o			\
-+	util.o			\
-+	xattr.o
-diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c
-new file mode 100644
-index 000000000000..76c98ddbf628
---- /dev/null
-+++ b/fs/bcachefs/acl.c
-@@ -0,0 +1,388 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#ifdef CONFIG_BCACHEFS_POSIX_ACL
-+
-+#include "bcachefs.h"
-+
-+#include <linux/fs.h>
-+#include <linux/posix_acl.h>
-+#include <linux/posix_acl_xattr.h>
-+#include <linux/sched.h>
-+#include <linux/slab.h>
-+
-+#include "acl.h"
-+#include "fs.h"
-+#include "xattr.h"
-+
-+static inline size_t bch2_acl_size(unsigned nr_short, unsigned nr_long)
-+{
-+	return sizeof(bch_acl_header) +
-+		sizeof(bch_acl_entry_short) * nr_short +
-+		sizeof(bch_acl_entry) * nr_long;
-+}
-+
-+static inline int acl_to_xattr_type(int type)
-+{
-+	switch (type) {
-+	case ACL_TYPE_ACCESS:
-+		return KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS;
-+	case ACL_TYPE_DEFAULT:
-+		return KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT;
-+	default:
-+		BUG();
-+	}
-+}
-+
-+/*
-+ * Convert from filesystem to in-memory representation.
-+ */
-+static struct posix_acl *bch2_acl_from_disk(const void *value, size_t size)
-+{
-+	const void *p, *end = value + size;
-+	struct posix_acl *acl;
-+	struct posix_acl_entry *out;
-+	unsigned count = 0;
-+
-+	if (!value)
-+		return NULL;
-+	if (size < sizeof(bch_acl_header))
-+		goto invalid;
-+	if (((bch_acl_header *)value)->a_version !=
-+	    cpu_to_le32(BCH_ACL_VERSION))
-+		goto invalid;
-+
-+	p = value + sizeof(bch_acl_header);
-+	while (p < end) {
-+		const bch_acl_entry *entry = p;
-+
-+		if (p + sizeof(bch_acl_entry_short) > end)
-+			goto invalid;
-+
-+		switch (le16_to_cpu(entry->e_tag)) {
-+		case ACL_USER_OBJ:
-+		case ACL_GROUP_OBJ:
-+		case ACL_MASK:
-+		case ACL_OTHER:
-+			p += sizeof(bch_acl_entry_short);
-+			break;
-+		case ACL_USER:
-+		case ACL_GROUP:
-+			p += sizeof(bch_acl_entry);
-+			break;
-+		default:
-+			goto invalid;
-+		}
-+
-+		count++;
-+	}
-+
-+	if (p > end)
-+		goto invalid;
-+
-+	if (!count)
-+		return NULL;
-+
-+	acl = posix_acl_alloc(count, GFP_KERNEL);
-+	if (!acl)
-+		return ERR_PTR(-ENOMEM);
-+
-+	out = acl->a_entries;
-+
-+	p = value + sizeof(bch_acl_header);
-+	while (p < end) {
-+		const bch_acl_entry *in = p;
-+
-+		out->e_tag  = le16_to_cpu(in->e_tag);
-+		out->e_perm = le16_to_cpu(in->e_perm);
-+
-+		switch (out->e_tag) {
-+		case ACL_USER_OBJ:
-+		case ACL_GROUP_OBJ:
-+		case ACL_MASK:
-+		case ACL_OTHER:
-+			p += sizeof(bch_acl_entry_short);
-+			break;
-+		case ACL_USER:
-+			out->e_uid = make_kuid(&init_user_ns,
-+					       le32_to_cpu(in->e_id));
-+			p += sizeof(bch_acl_entry);
-+			break;
-+		case ACL_GROUP:
-+			out->e_gid = make_kgid(&init_user_ns,
-+					       le32_to_cpu(in->e_id));
-+			p += sizeof(bch_acl_entry);
-+			break;
-+		}
-+
-+		out++;
-+	}
-+
-+	BUG_ON(out != acl->a_entries + acl->a_count);
-+
-+	return acl;
-+invalid:
-+	pr_err("invalid acl entry");
-+	return ERR_PTR(-EINVAL);
-+}
-+
-+#define acl_for_each_entry(acl, acl_e)			\
-+	for (acl_e = acl->a_entries;			\
-+	     acl_e < acl->a_entries + acl->a_count;	\
-+	     acl_e++)
-+
-+/*
-+ * Convert from in-memory to filesystem representation.
-+ */
-+static struct bkey_i_xattr *
-+bch2_acl_to_xattr(struct btree_trans *trans,
-+		  const struct posix_acl *acl,
-+		  int type)
-+{
-+	struct bkey_i_xattr *xattr;
-+	bch_acl_header *acl_header;
-+	const struct posix_acl_entry *acl_e;
-+	void *outptr;
-+	unsigned nr_short = 0, nr_long = 0, acl_len, u64s;
-+
-+	acl_for_each_entry(acl, acl_e) {
-+		switch (acl_e->e_tag) {
-+		case ACL_USER:
-+		case ACL_GROUP:
-+			nr_long++;
-+			break;
-+		case ACL_USER_OBJ:
-+		case ACL_GROUP_OBJ:
-+		case ACL_MASK:
-+		case ACL_OTHER:
-+			nr_short++;
-+			break;
-+		default:
-+			return ERR_PTR(-EINVAL);
-+		}
-+	}
-+
-+	acl_len = bch2_acl_size(nr_short, nr_long);
-+	u64s = BKEY_U64s + xattr_val_u64s(0, acl_len);
-+
-+	if (u64s > U8_MAX)
-+		return ERR_PTR(-E2BIG);
-+
-+	xattr = bch2_trans_kmalloc(trans, u64s * sizeof(u64));
-+	if (IS_ERR(xattr))
-+		return xattr;
-+
-+	bkey_xattr_init(&xattr->k_i);
-+	xattr->k.u64s		= u64s;
-+	xattr->v.x_type		= acl_to_xattr_type(type);
-+	xattr->v.x_name_len	= 0,
-+	xattr->v.x_val_len	= cpu_to_le16(acl_len);
-+
-+	acl_header = xattr_val(&xattr->v);
-+	acl_header->a_version = cpu_to_le32(BCH_ACL_VERSION);
-+
-+	outptr = (void *) acl_header + sizeof(*acl_header);
-+
-+	acl_for_each_entry(acl, acl_e) {
-+		bch_acl_entry *entry = outptr;
-+
-+		entry->e_tag = cpu_to_le16(acl_e->e_tag);
-+		entry->e_perm = cpu_to_le16(acl_e->e_perm);
-+		switch (acl_e->e_tag) {
-+		case ACL_USER:
-+			entry->e_id = cpu_to_le32(
-+				from_kuid(&init_user_ns, acl_e->e_uid));
-+			outptr += sizeof(bch_acl_entry);
-+			break;
-+		case ACL_GROUP:
-+			entry->e_id = cpu_to_le32(
-+				from_kgid(&init_user_ns, acl_e->e_gid));
-+			outptr += sizeof(bch_acl_entry);
-+			break;
-+
-+		case ACL_USER_OBJ:
-+		case ACL_GROUP_OBJ:
-+		case ACL_MASK:
-+		case ACL_OTHER:
-+			outptr += sizeof(bch_acl_entry_short);
-+			break;
-+		}
-+	}
-+
-+	BUG_ON(outptr != xattr_val(&xattr->v) + acl_len);
-+
-+	return xattr;
-+}
-+
-+struct posix_acl *bch2_get_acl(struct inode *vinode, int type)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(vinode);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c_xattr xattr;
-+	struct posix_acl *acl = NULL;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+retry:
-+	bch2_trans_begin(&trans);
-+
-+	iter = bch2_hash_lookup(&trans, bch2_xattr_hash_desc,
-+			&inode->ei_str_hash, inode->v.i_ino,
-+			&X_SEARCH(acl_to_xattr_type(type), "", 0),
-+			0);
-+	if (IS_ERR(iter)) {
-+		if (PTR_ERR(iter) == -EINTR)
-+			goto retry;
-+
-+		if (PTR_ERR(iter) != -ENOENT)
-+			acl = ERR_CAST(iter);
-+		goto out;
-+	}
-+
-+	xattr = bkey_s_c_to_xattr(bch2_btree_iter_peek_slot(iter));
-+
-+	acl = bch2_acl_from_disk(xattr_val(xattr.v),
-+			le16_to_cpu(xattr.v->x_val_len));
-+
-+	if (!IS_ERR(acl))
-+		set_cached_acl(&inode->v, type, acl);
-+out:
-+	bch2_trans_exit(&trans);
-+	return acl;
-+}
-+
-+int bch2_set_acl_trans(struct btree_trans *trans,
-+		       struct bch_inode_unpacked *inode_u,
-+		       const struct bch_hash_info *hash_info,
-+		       struct posix_acl *acl, int type)
-+{
-+	int ret;
-+
-+	if (type == ACL_TYPE_DEFAULT &&
-+	    !S_ISDIR(inode_u->bi_mode))
-+		return acl ? -EACCES : 0;
-+
-+	if (acl) {
-+		struct bkey_i_xattr *xattr =
-+			bch2_acl_to_xattr(trans, acl, type);
-+		if (IS_ERR(xattr))
-+			return PTR_ERR(xattr);
-+
-+		ret = bch2_hash_set(trans, bch2_xattr_hash_desc, hash_info,
-+				    inode_u->bi_inum, &xattr->k_i, 0);
-+	} else {
-+		struct xattr_search_key search =
-+			X_SEARCH(acl_to_xattr_type(type), "", 0);
-+
-+		ret = bch2_hash_delete(trans, bch2_xattr_hash_desc, hash_info,
-+				       inode_u->bi_inum, &search);
-+	}
-+
-+	return ret == -ENOENT ? 0 : ret;
-+}
-+
-+int bch2_set_acl(struct inode *vinode, struct posix_acl *_acl, int type)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(vinode);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct btree_trans trans;
-+	struct btree_iter *inode_iter;
-+	struct bch_inode_unpacked inode_u;
-+	struct posix_acl *acl;
-+	umode_t mode;
-+	int ret;
-+
-+	mutex_lock(&inode->ei_update_lock);
-+	bch2_trans_init(&trans, c, 0, 0);
-+retry:
-+	bch2_trans_begin(&trans);
-+	acl = _acl;
-+
-+	inode_iter = bch2_inode_peek(&trans, &inode_u, inode->v.i_ino,
-+				     BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(inode_iter);
-+	if (ret)
-+		goto btree_err;
-+
-+	mode = inode_u.bi_mode;
-+
-+	if (type == ACL_TYPE_ACCESS) {
-+		ret = posix_acl_update_mode(&inode->v, &mode, &acl);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	ret = bch2_set_acl_trans(&trans, &inode_u,
-+				 &inode->ei_str_hash,
-+				 acl, type);
-+	if (ret)
-+		goto btree_err;
-+
-+	inode_u.bi_ctime	= bch2_current_time(c);
-+	inode_u.bi_mode		= mode;
-+
-+	ret =   bch2_inode_write(&trans, inode_iter, &inode_u) ?:
-+		bch2_trans_commit(&trans, NULL,
-+				  &inode->ei_journal_seq,
-+				  BTREE_INSERT_NOUNLOCK);
-+btree_err:
-+	if (ret == -EINTR)
-+		goto retry;
-+	if (unlikely(ret))
-+		goto err;
-+
-+	bch2_inode_update_after_write(c, inode, &inode_u,
-+				      ATTR_CTIME|ATTR_MODE);
-+
-+	set_cached_acl(&inode->v, type, acl);
-+err:
-+	bch2_trans_exit(&trans);
-+	mutex_unlock(&inode->ei_update_lock);
-+
-+	return ret;
-+}
-+
-+int bch2_acl_chmod(struct btree_trans *trans,
-+		   struct bch_inode_info *inode,
-+		   umode_t mode,
-+		   struct posix_acl **new_acl)
-+{
-+	struct btree_iter *iter;
-+	struct bkey_s_c_xattr xattr;
-+	struct bkey_i_xattr *new;
-+	struct posix_acl *acl;
-+	int ret = 0;
-+
-+	iter = bch2_hash_lookup(trans, bch2_xattr_hash_desc,
-+			&inode->ei_str_hash, inode->v.i_ino,
-+			&X_SEARCH(KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS, "", 0),
-+			BTREE_ITER_INTENT);
-+	if (IS_ERR(iter))
-+		return PTR_ERR(iter) != -ENOENT ? PTR_ERR(iter) : 0;
-+
-+	xattr = bkey_s_c_to_xattr(bch2_btree_iter_peek_slot(iter));
-+
-+	acl = bch2_acl_from_disk(xattr_val(xattr.v),
-+			le16_to_cpu(xattr.v->x_val_len));
-+	if (IS_ERR_OR_NULL(acl))
-+		return PTR_ERR(acl);
-+
-+	ret = __posix_acl_chmod(&acl, GFP_KERNEL, mode);
-+	if (ret)
-+		goto err;
-+
-+	new = bch2_acl_to_xattr(trans, acl, ACL_TYPE_ACCESS);
-+	if (IS_ERR(new)) {
-+		ret = PTR_ERR(new);
-+		goto err;
-+	}
-+
-+	new->k.p = iter->pos;
-+	bch2_trans_update(trans, iter, &new->k_i, 0);
-+	*new_acl = acl;
-+	acl = NULL;
-+err:
-+	kfree(acl);
-+	return ret;
-+}
-+
-+#endif /* CONFIG_BCACHEFS_POSIX_ACL */
-diff --git a/fs/bcachefs/acl.h b/fs/bcachefs/acl.h
-new file mode 100644
-index 000000000000..cb62d502a7ff
---- /dev/null
-+++ b/fs/bcachefs/acl.h
-@@ -0,0 +1,59 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_ACL_H
-+#define _BCACHEFS_ACL_H
-+
-+struct bch_inode_unpacked;
-+struct bch_hash_info;
-+struct bch_inode_info;
-+struct posix_acl;
-+
-+#ifdef CONFIG_BCACHEFS_POSIX_ACL
-+
-+#define BCH_ACL_VERSION	0x0001
-+
-+typedef struct {
-+	__le16		e_tag;
-+	__le16		e_perm;
-+	__le32		e_id;
-+} bch_acl_entry;
-+
-+typedef struct {
-+	__le16		e_tag;
-+	__le16		e_perm;
-+} bch_acl_entry_short;
-+
-+typedef struct {
-+	__le32		a_version;
-+} bch_acl_header;
-+
-+struct posix_acl *bch2_get_acl(struct inode *, int);
-+
-+int bch2_set_acl_trans(struct btree_trans *,
-+		       struct bch_inode_unpacked *,
-+		       const struct bch_hash_info *,
-+		       struct posix_acl *, int);
-+int bch2_set_acl(struct inode *, struct posix_acl *, int);
-+int bch2_acl_chmod(struct btree_trans *, struct bch_inode_info *,
-+		   umode_t, struct posix_acl **);
-+
-+#else
-+
-+static inline int bch2_set_acl_trans(struct btree_trans *trans,
-+				     struct bch_inode_unpacked *inode_u,
-+				     const struct bch_hash_info *hash_info,
-+				     struct posix_acl *acl, int type)
-+{
-+	return 0;
-+}
-+
-+static inline int bch2_acl_chmod(struct btree_trans *trans,
-+				 struct bch_inode_info *inode,
-+				 umode_t mode,
-+				 struct posix_acl **new_acl)
-+{
-+	return 0;
-+}
-+
-+#endif /* CONFIG_BCACHEFS_POSIX_ACL */
-+
-+#endif /* _BCACHEFS_ACL_H */
-diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
-new file mode 100644
-index 000000000000..97508de9f721
---- /dev/null
-+++ b/fs/bcachefs/alloc_background.c
-@@ -0,0 +1,1477 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "alloc_background.h"
-+#include "alloc_foreground.h"
-+#include "btree_cache.h"
-+#include "btree_io.h"
-+#include "btree_key_cache.h"
-+#include "btree_update.h"
-+#include "btree_update_interior.h"
-+#include "btree_gc.h"
-+#include "buckets.h"
-+#include "clock.h"
-+#include "debug.h"
-+#include "ec.h"
-+#include "error.h"
-+#include "recovery.h"
-+
-+#include <linux/kthread.h>
-+#include <linux/math64.h>
-+#include <linux/random.h>
-+#include <linux/rculist.h>
-+#include <linux/rcupdate.h>
-+#include <linux/sched/task.h>
-+#include <linux/sort.h>
-+#include <trace/events/bcachefs.h>
-+
-+static const char * const bch2_alloc_field_names[] = {
-+#define x(name, bytes) #name,
-+	BCH_ALLOC_FIELDS()
-+#undef x
-+	NULL
-+};
-+
-+static void bch2_recalc_oldest_io(struct bch_fs *, struct bch_dev *, int);
-+
-+/* Ratelimiting/PD controllers */
-+
-+static void pd_controllers_update(struct work_struct *work)
-+{
-+	struct bch_fs *c = container_of(to_delayed_work(work),
-+					   struct bch_fs,
-+					   pd_controllers_update);
-+	struct bch_dev *ca;
-+	s64 free = 0, fragmented = 0;
-+	unsigned i;
-+
-+	for_each_member_device(ca, c, i) {
-+		struct bch_dev_usage stats = bch2_dev_usage_read(ca);
-+
-+		free += bucket_to_sector(ca,
-+				__dev_buckets_free(ca, stats)) << 9;
-+		/*
-+		 * Bytes of internal fragmentation, which can be
-+		 * reclaimed by copy GC
-+		 */
-+		fragmented += max_t(s64, 0, (bucket_to_sector(ca,
-+					stats.buckets[BCH_DATA_user] +
-+					stats.buckets[BCH_DATA_cached]) -
-+				  (stats.sectors[BCH_DATA_user] +
-+				   stats.sectors[BCH_DATA_cached])) << 9);
-+	}
-+
-+	bch2_pd_controller_update(&c->copygc_pd, free, fragmented, -1);
-+	schedule_delayed_work(&c->pd_controllers_update,
-+			      c->pd_controllers_update_seconds * HZ);
-+}
-+
-+/* Persistent alloc info: */
-+
-+static inline u64 get_alloc_field(const struct bch_alloc *a,
-+				  const void **p, unsigned field)
-+{
-+	unsigned bytes = BCH_ALLOC_FIELD_BYTES[field];
-+	u64 v;
-+
-+	if (!(a->fields & (1 << field)))
-+		return 0;
-+
-+	switch (bytes) {
-+	case 1:
-+		v = *((const u8 *) *p);
-+		break;
-+	case 2:
-+		v = le16_to_cpup(*p);
-+		break;
-+	case 4:
-+		v = le32_to_cpup(*p);
-+		break;
-+	case 8:
-+		v = le64_to_cpup(*p);
-+		break;
-+	default:
-+		BUG();
-+	}
-+
-+	*p += bytes;
-+	return v;
-+}
-+
-+static inline void put_alloc_field(struct bkey_i_alloc *a, void **p,
-+				   unsigned field, u64 v)
-+{
-+	unsigned bytes = BCH_ALLOC_FIELD_BYTES[field];
-+
-+	if (!v)
-+		return;
-+
-+	a->v.fields |= 1 << field;
-+
-+	switch (bytes) {
-+	case 1:
-+		*((u8 *) *p) = v;
-+		break;
-+	case 2:
-+		*((__le16 *) *p) = cpu_to_le16(v);
-+		break;
-+	case 4:
-+		*((__le32 *) *p) = cpu_to_le32(v);
-+		break;
-+	case 8:
-+		*((__le64 *) *p) = cpu_to_le64(v);
-+		break;
-+	default:
-+		BUG();
-+	}
-+
-+	*p += bytes;
-+}
-+
-+struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k)
-+{
-+	struct bkey_alloc_unpacked ret = { .gen = 0 };
-+
-+	if (k.k->type == KEY_TYPE_alloc) {
-+		const struct bch_alloc *a = bkey_s_c_to_alloc(k).v;
-+		const void *d = a->data;
-+		unsigned idx = 0;
-+
-+		ret.gen = a->gen;
-+
-+#define x(_name, _bits)	ret._name = get_alloc_field(a, &d, idx++);
-+		BCH_ALLOC_FIELDS()
-+#undef  x
-+	}
-+	return ret;
-+}
-+
-+void bch2_alloc_pack(struct bkey_i_alloc *dst,
-+		     const struct bkey_alloc_unpacked src)
-+{
-+	unsigned idx = 0;
-+	void *d = dst->v.data;
-+	unsigned bytes;
-+
-+	dst->v.fields	= 0;
-+	dst->v.gen	= src.gen;
-+
-+#define x(_name, _bits)	put_alloc_field(dst, &d, idx++, src._name);
-+	BCH_ALLOC_FIELDS()
-+#undef  x
-+
-+	bytes = (void *) d - (void *) &dst->v;
-+	set_bkey_val_bytes(&dst->k, bytes);
-+	memset_u64s_tail(&dst->v, 0, bytes);
-+}
-+
-+static unsigned bch_alloc_val_u64s(const struct bch_alloc *a)
-+{
-+	unsigned i, bytes = offsetof(struct bch_alloc, data);
-+
-+	for (i = 0; i < ARRAY_SIZE(BCH_ALLOC_FIELD_BYTES); i++)
-+		if (a->fields & (1 << i))
-+			bytes += BCH_ALLOC_FIELD_BYTES[i];
-+
-+	return DIV_ROUND_UP(bytes, sizeof(u64));
-+}
-+
-+const char *bch2_alloc_invalid(const struct bch_fs *c, struct bkey_s_c k)
-+{
-+	struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k);
-+
-+	if (k.k->p.inode >= c->sb.nr_devices ||
-+	    !c->devs[k.k->p.inode])
-+		return "invalid device";
-+
-+	/* allow for unknown fields */
-+	if (bkey_val_u64s(a.k) < bch_alloc_val_u64s(a.v))
-+		return "incorrect value size";
-+
-+	return NULL;
-+}
-+
-+void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c,
-+			struct bkey_s_c k)
-+{
-+	struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k);
-+	const void *d = a.v->data;
-+	unsigned i;
-+
-+	pr_buf(out, "gen %u", a.v->gen);
-+
-+	for (i = 0; i < BCH_ALLOC_FIELD_NR; i++)
-+		if (a.v->fields & (1 << i))
-+			pr_buf(out, " %s %llu",
-+			       bch2_alloc_field_names[i],
-+			       get_alloc_field(a.v, &d, i));
-+}
-+
-+static int bch2_alloc_read_fn(struct bch_fs *c, enum btree_id id,
-+			      unsigned level, struct bkey_s_c k)
-+{
-+	struct bch_dev *ca;
-+	struct bucket *g;
-+	struct bkey_alloc_unpacked u;
-+
-+	if (level || k.k->type != KEY_TYPE_alloc)
-+		return 0;
-+
-+	ca = bch_dev_bkey_exists(c, k.k->p.inode);
-+	g = __bucket(ca, k.k->p.offset, 0);
-+	u = bch2_alloc_unpack(k);
-+
-+	g->_mark.gen		= u.gen;
-+	g->_mark.data_type	= u.data_type;
-+	g->_mark.dirty_sectors	= u.dirty_sectors;
-+	g->_mark.cached_sectors	= u.cached_sectors;
-+	g->io_time[READ]	= u.read_time;
-+	g->io_time[WRITE]	= u.write_time;
-+	g->oldest_gen		= u.oldest_gen;
-+	g->gen_valid		= 1;
-+
-+	return 0;
-+}
-+
-+int bch2_alloc_read(struct bch_fs *c, struct journal_keys *journal_keys)
-+{
-+	struct bch_dev *ca;
-+	unsigned i;
-+	int ret = 0;
-+
-+	down_read(&c->gc_lock);
-+	ret = bch2_btree_and_journal_walk(c, journal_keys, BTREE_ID_ALLOC,
-+					  NULL, bch2_alloc_read_fn);
-+	up_read(&c->gc_lock);
-+
-+	if (ret) {
-+		bch_err(c, "error reading alloc info: %i", ret);
-+		return ret;
-+	}
-+
-+	percpu_down_write(&c->mark_lock);
-+	bch2_dev_usage_from_buckets(c);
-+	percpu_up_write(&c->mark_lock);
-+
-+	mutex_lock(&c->bucket_clock[READ].lock);
-+	for_each_member_device(ca, c, i) {
-+		down_read(&ca->bucket_lock);
-+		bch2_recalc_oldest_io(c, ca, READ);
-+		up_read(&ca->bucket_lock);
-+	}
-+	mutex_unlock(&c->bucket_clock[READ].lock);
-+
-+	mutex_lock(&c->bucket_clock[WRITE].lock);
-+	for_each_member_device(ca, c, i) {
-+		down_read(&ca->bucket_lock);
-+		bch2_recalc_oldest_io(c, ca, WRITE);
-+		up_read(&ca->bucket_lock);
-+	}
-+	mutex_unlock(&c->bucket_clock[WRITE].lock);
-+
-+	return 0;
-+}
-+
-+static int bch2_alloc_write_key(struct btree_trans *trans,
-+				struct btree_iter *iter,
-+				unsigned flags)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct bkey_s_c k;
-+	struct bch_dev *ca;
-+	struct bucket_array *ba;
-+	struct bucket *g;
-+	struct bucket_mark m;
-+	struct bkey_alloc_unpacked old_u, new_u;
-+	__BKEY_PADDED(k, 8) alloc_key; /* hack: */
-+	struct bkey_i_alloc *a;
-+	int ret;
-+retry:
-+	bch2_trans_begin(trans);
-+
-+	ret = bch2_btree_key_cache_flush(trans,
-+			BTREE_ID_ALLOC, iter->pos);
-+	if (ret)
-+		goto err;
-+
-+	k = bch2_btree_iter_peek_slot(iter);
-+	ret = bkey_err(k);
-+	if (ret)
-+		goto err;
-+
-+	old_u = bch2_alloc_unpack(k);
-+
-+	percpu_down_read(&c->mark_lock);
-+	ca	= bch_dev_bkey_exists(c, iter->pos.inode);
-+	ba	= bucket_array(ca);
-+
-+	g	= &ba->b[iter->pos.offset];
-+	m	= READ_ONCE(g->mark);
-+	new_u	= alloc_mem_to_key(g, m);
-+	percpu_up_read(&c->mark_lock);
-+
-+	if (!bkey_alloc_unpacked_cmp(old_u, new_u))
-+		return 0;
-+
-+	a = bkey_alloc_init(&alloc_key.k);
-+	a->k.p = iter->pos;
-+	bch2_alloc_pack(a, new_u);
-+
-+	bch2_trans_update(trans, iter, &a->k_i,
-+			  BTREE_TRIGGER_NORUN);
-+	ret = bch2_trans_commit(trans, NULL, NULL,
-+				BTREE_INSERT_NOFAIL|
-+				BTREE_INSERT_USE_RESERVE|
-+				flags);
-+err:
-+	if (ret == -EINTR)
-+		goto retry;
-+	return ret;
-+}
-+
-+int bch2_dev_alloc_write(struct bch_fs *c, struct bch_dev *ca, unsigned flags)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	u64 first_bucket, nbuckets;
-+	int ret = 0;
-+
-+	percpu_down_read(&c->mark_lock);
-+	first_bucket	= bucket_array(ca)->first_bucket;
-+	nbuckets	= bucket_array(ca)->nbuckets;
-+	percpu_up_read(&c->mark_lock);
-+
-+	BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8);
-+
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC,
-+				   POS(ca->dev_idx, first_bucket),
-+				   BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
-+
-+	while (iter->pos.offset < nbuckets) {
-+		bch2_trans_cond_resched(&trans);
-+
-+		ret = bch2_alloc_write_key(&trans, iter, flags);
-+		if (ret)
-+			break;
-+		bch2_btree_iter_next_slot(iter);
-+	}
-+
-+	bch2_trans_exit(&trans);
-+
-+	return ret;
-+}
-+
-+int bch2_alloc_write(struct bch_fs *c, unsigned flags)
-+{
-+	struct bch_dev *ca;
-+	unsigned i;
-+	int ret = 0;
-+
-+	for_each_rw_member(ca, c, i) {
-+		bch2_dev_alloc_write(c, ca, flags);
-+		if (ret) {
-+			percpu_ref_put(&ca->io_ref);
-+			break;
-+		}
-+	}
-+
-+	return ret;
-+}
-+
-+/* Bucket IO clocks: */
-+
-+static void bch2_recalc_oldest_io(struct bch_fs *c, struct bch_dev *ca, int rw)
-+{
-+	struct bucket_clock *clock = &c->bucket_clock[rw];
-+	struct bucket_array *buckets = bucket_array(ca);
-+	struct bucket *g;
-+	u16 max_last_io = 0;
-+	unsigned i;
-+
-+	lockdep_assert_held(&c->bucket_clock[rw].lock);
-+
-+	/* Recalculate max_last_io for this device: */
-+	for_each_bucket(g, buckets)
-+		max_last_io = max(max_last_io, bucket_last_io(c, g, rw));
-+
-+	ca->max_last_bucket_io[rw] = max_last_io;
-+
-+	/* Recalculate global max_last_io: */
-+	max_last_io = 0;
-+
-+	for_each_member_device(ca, c, i)
-+		max_last_io = max(max_last_io, ca->max_last_bucket_io[rw]);
-+
-+	clock->max_last_io = max_last_io;
-+}
-+
-+static void bch2_rescale_bucket_io_times(struct bch_fs *c, int rw)
-+{
-+	struct bucket_clock *clock = &c->bucket_clock[rw];
-+	struct bucket_array *buckets;
-+	struct bch_dev *ca;
-+	struct bucket *g;
-+	unsigned i;
-+
-+	trace_rescale_prios(c);
-+
-+	for_each_member_device(ca, c, i) {
-+		down_read(&ca->bucket_lock);
-+		buckets = bucket_array(ca);
-+
-+		for_each_bucket(g, buckets)
-+			g->io_time[rw] = clock->hand -
-+			bucket_last_io(c, g, rw) / 2;
-+
-+		bch2_recalc_oldest_io(c, ca, rw);
-+
-+		up_read(&ca->bucket_lock);
-+	}
-+}
-+
-+static inline u64 bucket_clock_freq(u64 capacity)
-+{
-+	return max(capacity >> 10, 2028ULL);
-+}
-+
-+static void bch2_inc_clock_hand(struct io_timer *timer)
-+{
-+	struct bucket_clock *clock = container_of(timer,
-+						struct bucket_clock, rescale);
-+	struct bch_fs *c = container_of(clock,
-+					struct bch_fs, bucket_clock[clock->rw]);
-+	struct bch_dev *ca;
-+	u64 capacity;
-+	unsigned i;
-+
-+	mutex_lock(&clock->lock);
-+
-+	/* if clock cannot be advanced more, rescale prio */
-+	if (clock->max_last_io >= U16_MAX - 2)
-+		bch2_rescale_bucket_io_times(c, clock->rw);
-+
-+	BUG_ON(clock->max_last_io >= U16_MAX - 2);
-+
-+	for_each_member_device(ca, c, i)
-+		ca->max_last_bucket_io[clock->rw]++;
-+	clock->max_last_io++;
-+	clock->hand++;
-+
-+	mutex_unlock(&clock->lock);
-+
-+	capacity = READ_ONCE(c->capacity);
-+
-+	if (!capacity)
-+		return;
-+
-+	/*
-+	 * we only increment when 0.1% of the filesystem capacity has been read
-+	 * or written too, this determines if it's time
-+	 *
-+	 * XXX: we shouldn't really be going off of the capacity of devices in
-+	 * RW mode (that will be 0 when we're RO, yet we can still service
-+	 * reads)
-+	 */
-+	timer->expire += bucket_clock_freq(capacity);
-+
-+	bch2_io_timer_add(&c->io_clock[clock->rw], timer);
-+}
-+
-+static void bch2_bucket_clock_init(struct bch_fs *c, int rw)
-+{
-+	struct bucket_clock *clock = &c->bucket_clock[rw];
-+
-+	clock->hand		= 1;
-+	clock->rw		= rw;
-+	clock->rescale.fn	= bch2_inc_clock_hand;
-+	clock->rescale.expire	= bucket_clock_freq(c->capacity);
-+	mutex_init(&clock->lock);
-+}
-+
-+int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev,
-+			      size_t bucket_nr, int rw)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct bch_dev *ca = bch_dev_bkey_exists(c, dev);
-+	struct btree_iter *iter;
-+	struct bucket *g;
-+	struct bkey_i_alloc *a;
-+	struct bkey_alloc_unpacked u;
-+	u16 *time;
-+	int ret = 0;
-+
-+	iter = bch2_trans_get_iter(trans, BTREE_ID_ALLOC, POS(dev, bucket_nr),
-+				   BTREE_ITER_CACHED|
-+				   BTREE_ITER_CACHED_NOFILL|
-+				   BTREE_ITER_INTENT);
-+	if (IS_ERR(iter))
-+		return PTR_ERR(iter);
-+
-+	a = bch2_trans_kmalloc(trans, BKEY_ALLOC_U64s_MAX * 8);
-+	ret = PTR_ERR_OR_ZERO(a);
-+	if (ret)
-+		goto out;
-+
-+	percpu_down_read(&c->mark_lock);
-+	g = bucket(ca, bucket_nr);
-+	u = alloc_mem_to_key(g, READ_ONCE(g->mark));
-+	percpu_up_read(&c->mark_lock);
-+
-+	bkey_alloc_init(&a->k_i);
-+	a->k.p = iter->pos;
-+
-+	time = rw == READ ? &u.read_time : &u.write_time;
-+	if (*time == c->bucket_clock[rw].hand)
-+		goto out;
-+
-+	*time = c->bucket_clock[rw].hand;
-+
-+	bch2_alloc_pack(a, u);
-+
-+	ret   = bch2_trans_update(trans, iter, &a->k_i, 0) ?:
-+		bch2_trans_commit(trans, NULL, NULL, 0);
-+out:
-+	bch2_trans_iter_put(trans, iter);
-+	return ret;
-+}
-+
-+/* Background allocator thread: */
-+
-+/*
-+ * Scans for buckets to be invalidated, invalidates them, rewrites prios/gens
-+ * (marking them as invalidated on disk), then optionally issues discard
-+ * commands to the newly free buckets, then puts them on the various freelists.
-+ */
-+
-+/**
-+ * wait_buckets_available - wait on reclaimable buckets
-+ *
-+ * If there aren't enough available buckets to fill up free_inc, wait until
-+ * there are.
-+ */
-+static int wait_buckets_available(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	unsigned long gc_count = c->gc_count;
-+	u64 available;
-+	int ret = 0;
-+
-+	ca->allocator_state = ALLOCATOR_BLOCKED;
-+	closure_wake_up(&c->freelist_wait);
-+
-+	while (1) {
-+		set_current_state(TASK_INTERRUPTIBLE);
-+		if (kthread_should_stop()) {
-+			ret = 1;
-+			break;
-+		}
-+
-+		if (gc_count != c->gc_count)
-+			ca->inc_gen_really_needs_gc = 0;
-+
-+		available = max_t(s64, 0, dev_buckets_available(ca) -
-+				  ca->inc_gen_really_needs_gc);
-+
-+		if (available > fifo_free(&ca->free_inc) ||
-+		    (available &&
-+		     (!fifo_full(&ca->free[RESERVE_BTREE]) ||
-+		      !fifo_full(&ca->free[RESERVE_MOVINGGC]))))
-+			break;
-+
-+		up_read(&c->gc_lock);
-+		schedule();
-+		try_to_freeze();
-+		down_read(&c->gc_lock);
-+	}
-+
-+	__set_current_state(TASK_RUNNING);
-+	ca->allocator_state = ALLOCATOR_RUNNING;
-+	closure_wake_up(&c->freelist_wait);
-+
-+	return ret;
-+}
-+
-+static bool bch2_can_invalidate_bucket(struct bch_dev *ca,
-+				       size_t bucket,
-+				       struct bucket_mark mark)
-+{
-+	u8 gc_gen;
-+
-+	if (!is_available_bucket(mark))
-+		return false;
-+
-+	if (ca->buckets_nouse &&
-+	    test_bit(bucket, ca->buckets_nouse))
-+		return false;
-+
-+	gc_gen = bucket_gc_gen(ca, bucket);
-+
-+	if (gc_gen >= BUCKET_GC_GEN_MAX / 2)
-+		ca->inc_gen_needs_gc++;
-+
-+	if (gc_gen >= BUCKET_GC_GEN_MAX)
-+		ca->inc_gen_really_needs_gc++;
-+
-+	return gc_gen < BUCKET_GC_GEN_MAX;
-+}
-+
-+/*
-+ * Determines what order we're going to reuse buckets, smallest bucket_key()
-+ * first.
-+ *
-+ *
-+ * - We take into account the read prio of the bucket, which gives us an
-+ *   indication of how hot the data is -- we scale the prio so that the prio
-+ *   farthest from the clock is worth 1/8th of the closest.
-+ *
-+ * - The number of sectors of cached data in the bucket, which gives us an
-+ *   indication of the cost in cache misses this eviction will cause.
-+ *
-+ * - If hotness * sectors used compares equal, we pick the bucket with the
-+ *   smallest bucket_gc_gen() - since incrementing the same bucket's generation
-+ *   number repeatedly forces us to run mark and sweep gc to avoid generation
-+ *   number wraparound.
-+ */
-+
-+static unsigned long bucket_sort_key(struct bch_fs *c, struct bch_dev *ca,
-+				     size_t b, struct bucket_mark m)
-+{
-+	unsigned last_io = bucket_last_io(c, bucket(ca, b), READ);
-+	unsigned max_last_io = ca->max_last_bucket_io[READ];
-+
-+	/*
-+	 * Time since last read, scaled to [0, 8) where larger value indicates
-+	 * more recently read data:
-+	 */
-+	unsigned long hotness = (max_last_io - last_io) * 7 / max_last_io;
-+
-+	/* How much we want to keep the data in this bucket: */
-+	unsigned long data_wantness =
-+		(hotness + 1) * bucket_sectors_used(m);
-+
-+	unsigned long needs_journal_commit =
-+		bucket_needs_journal_commit(m, c->journal.last_seq_ondisk);
-+
-+	return  (data_wantness << 9) |
-+		(needs_journal_commit << 8) |
-+		(bucket_gc_gen(ca, b) / 16);
-+}
-+
-+static inline int bucket_alloc_cmp(alloc_heap *h,
-+				   struct alloc_heap_entry l,
-+				   struct alloc_heap_entry r)
-+{
-+	return  cmp_int(l.key, r.key) ?:
-+		cmp_int(r.nr, l.nr) ?:
-+		cmp_int(l.bucket, r.bucket);
-+}
-+
-+static inline int bucket_idx_cmp(const void *_l, const void *_r)
-+{
-+	const struct alloc_heap_entry *l = _l, *r = _r;
-+
-+	return cmp_int(l->bucket, r->bucket);
-+}
-+
-+static void find_reclaimable_buckets_lru(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	struct bucket_array *buckets;
-+	struct alloc_heap_entry e = { 0 };
-+	size_t b, i, nr = 0;
-+
-+	ca->alloc_heap.used = 0;
-+
-+	mutex_lock(&c->bucket_clock[READ].lock);
-+	down_read(&ca->bucket_lock);
-+
-+	buckets = bucket_array(ca);
-+
-+	bch2_recalc_oldest_io(c, ca, READ);
-+
-+	/*
-+	 * Find buckets with lowest read priority, by building a maxheap sorted
-+	 * by read priority and repeatedly replacing the maximum element until
-+	 * all buckets have been visited.
-+	 */
-+	for (b = ca->mi.first_bucket; b < ca->mi.nbuckets; b++) {
-+		struct bucket_mark m = READ_ONCE(buckets->b[b].mark);
-+		unsigned long key = bucket_sort_key(c, ca, b, m);
-+
-+		if (!bch2_can_invalidate_bucket(ca, b, m))
-+			continue;
-+
-+		if (e.nr && e.bucket + e.nr == b && e.key == key) {
-+			e.nr++;
-+		} else {
-+			if (e.nr)
-+				heap_add_or_replace(&ca->alloc_heap, e,
-+					-bucket_alloc_cmp, NULL);
-+
-+			e = (struct alloc_heap_entry) {
-+				.bucket = b,
-+				.nr	= 1,
-+				.key	= key,
-+			};
-+		}
-+
-+		cond_resched();
-+	}
-+
-+	if (e.nr)
-+		heap_add_or_replace(&ca->alloc_heap, e,
-+				-bucket_alloc_cmp, NULL);
-+
-+	for (i = 0; i < ca->alloc_heap.used; i++)
-+		nr += ca->alloc_heap.data[i].nr;
-+
-+	while (nr - ca->alloc_heap.data[0].nr >= ALLOC_SCAN_BATCH(ca)) {
-+		nr -= ca->alloc_heap.data[0].nr;
-+		heap_pop(&ca->alloc_heap, e, -bucket_alloc_cmp, NULL);
-+	}
-+
-+	up_read(&ca->bucket_lock);
-+	mutex_unlock(&c->bucket_clock[READ].lock);
-+}
-+
-+static void find_reclaimable_buckets_fifo(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	struct bucket_array *buckets = bucket_array(ca);
-+	struct bucket_mark m;
-+	size_t b, start;
-+
-+	if (ca->fifo_last_bucket <  ca->mi.first_bucket ||
-+	    ca->fifo_last_bucket >= ca->mi.nbuckets)
-+		ca->fifo_last_bucket = ca->mi.first_bucket;
-+
-+	start = ca->fifo_last_bucket;
-+
-+	do {
-+		ca->fifo_last_bucket++;
-+		if (ca->fifo_last_bucket == ca->mi.nbuckets)
-+			ca->fifo_last_bucket = ca->mi.first_bucket;
-+
-+		b = ca->fifo_last_bucket;
-+		m = READ_ONCE(buckets->b[b].mark);
-+
-+		if (bch2_can_invalidate_bucket(ca, b, m)) {
-+			struct alloc_heap_entry e = { .bucket = b, .nr = 1, };
-+
-+			heap_add(&ca->alloc_heap, e, bucket_alloc_cmp, NULL);
-+			if (heap_full(&ca->alloc_heap))
-+				break;
-+		}
-+
-+		cond_resched();
-+	} while (ca->fifo_last_bucket != start);
-+}
-+
-+static void find_reclaimable_buckets_random(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	struct bucket_array *buckets = bucket_array(ca);
-+	struct bucket_mark m;
-+	size_t checked, i;
-+
-+	for (checked = 0;
-+	     checked < ca->mi.nbuckets / 2;
-+	     checked++) {
-+		size_t b = bch2_rand_range(ca->mi.nbuckets -
-+					   ca->mi.first_bucket) +
-+			ca->mi.first_bucket;
-+
-+		m = READ_ONCE(buckets->b[b].mark);
-+
-+		if (bch2_can_invalidate_bucket(ca, b, m)) {
-+			struct alloc_heap_entry e = { .bucket = b, .nr = 1, };
-+
-+			heap_add(&ca->alloc_heap, e, bucket_alloc_cmp, NULL);
-+			if (heap_full(&ca->alloc_heap))
-+				break;
-+		}
-+
-+		cond_resched();
-+	}
-+
-+	sort(ca->alloc_heap.data,
-+	     ca->alloc_heap.used,
-+	     sizeof(ca->alloc_heap.data[0]),
-+	     bucket_idx_cmp, NULL);
-+
-+	/* remove duplicates: */
-+	for (i = 0; i + 1 < ca->alloc_heap.used; i++)
-+		if (ca->alloc_heap.data[i].bucket ==
-+		    ca->alloc_heap.data[i + 1].bucket)
-+			ca->alloc_heap.data[i].nr = 0;
-+}
-+
-+static size_t find_reclaimable_buckets(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	size_t i, nr = 0;
-+
-+	ca->inc_gen_needs_gc			= 0;
-+
-+	switch (ca->mi.replacement) {
-+	case CACHE_REPLACEMENT_LRU:
-+		find_reclaimable_buckets_lru(c, ca);
-+		break;
-+	case CACHE_REPLACEMENT_FIFO:
-+		find_reclaimable_buckets_fifo(c, ca);
-+		break;
-+	case CACHE_REPLACEMENT_RANDOM:
-+		find_reclaimable_buckets_random(c, ca);
-+		break;
-+	}
-+
-+	heap_resort(&ca->alloc_heap, bucket_alloc_cmp, NULL);
-+
-+	for (i = 0; i < ca->alloc_heap.used; i++)
-+		nr += ca->alloc_heap.data[i].nr;
-+
-+	return nr;
-+}
-+
-+static inline long next_alloc_bucket(struct bch_dev *ca)
-+{
-+	struct alloc_heap_entry e, *top = ca->alloc_heap.data;
-+
-+	while (ca->alloc_heap.used) {
-+		if (top->nr) {
-+			size_t b = top->bucket;
-+
-+			top->bucket++;
-+			top->nr--;
-+			return b;
-+		}
-+
-+		heap_pop(&ca->alloc_heap, e, bucket_alloc_cmp, NULL);
-+	}
-+
-+	return -1;
-+}
-+
-+/*
-+ * returns sequence number of most recent journal entry that updated this
-+ * bucket:
-+ */
-+static u64 bucket_journal_seq(struct bch_fs *c, struct bucket_mark m)
-+{
-+	if (m.journal_seq_valid) {
-+		u64 journal_seq = atomic64_read(&c->journal.seq);
-+		u64 bucket_seq	= journal_seq;
-+
-+		bucket_seq &= ~((u64) U16_MAX);
-+		bucket_seq |= m.journal_seq;
-+
-+		if (bucket_seq > journal_seq)
-+			bucket_seq -= 1 << 16;
-+
-+		return bucket_seq;
-+	} else {
-+		return 0;
-+	}
-+}
-+
-+static int bch2_invalidate_one_bucket2(struct btree_trans *trans,
-+				       struct bch_dev *ca,
-+				       struct btree_iter *iter,
-+				       u64 *journal_seq, unsigned flags)
-+{
-+#if 0
-+	__BKEY_PADDED(k, BKEY_ALLOC_VAL_U64s_MAX) alloc_key;
-+#else
-+	/* hack: */
-+	__BKEY_PADDED(k, 8) alloc_key;
-+#endif
-+	struct bch_fs *c = trans->c;
-+	struct bkey_i_alloc *a;
-+	struct bkey_alloc_unpacked u;
-+	struct bucket *g;
-+	struct bucket_mark m;
-+	bool invalidating_cached_data;
-+	size_t b;
-+	int ret = 0;
-+
-+	BUG_ON(!ca->alloc_heap.used ||
-+	       !ca->alloc_heap.data[0].nr);
-+	b = ca->alloc_heap.data[0].bucket;
-+
-+	/* first, put on free_inc and mark as owned by allocator: */
-+	percpu_down_read(&c->mark_lock);
-+	spin_lock(&c->freelist_lock);
-+
-+	verify_not_on_freelist(c, ca, b);
-+
-+	BUG_ON(!fifo_push(&ca->free_inc, b));
-+
-+	g = bucket(ca, b);
-+	m = READ_ONCE(g->mark);
-+
-+	invalidating_cached_data = m.cached_sectors != 0;
-+
-+	/*
-+	 * If we're not invalidating cached data, we only increment the bucket
-+	 * gen in memory here, the incremented gen will be updated in the btree
-+	 * by bch2_trans_mark_pointer():
-+	 */
-+
-+	if (!invalidating_cached_data)
-+		bch2_invalidate_bucket(c, ca, b, &m);
-+	else
-+		bch2_mark_alloc_bucket(c, ca, b, true, gc_pos_alloc(c, NULL), 0);
-+
-+	spin_unlock(&c->freelist_lock);
-+	percpu_up_read(&c->mark_lock);
-+
-+	if (!invalidating_cached_data)
-+		goto out;
-+
-+	/*
-+	 * If the read-only path is trying to shut down, we can't be generating
-+	 * new btree updates:
-+	 */
-+	if (test_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags)) {
-+		ret = 1;
-+		goto out;
-+	}
-+
-+	BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8);
-+
-+	bch2_btree_iter_set_pos(iter, POS(ca->dev_idx, b));
-+retry:
-+	ret = bch2_btree_iter_traverse(iter);
-+	if (ret)
-+		return ret;
-+
-+	percpu_down_read(&c->mark_lock);
-+	g = bucket(ca, iter->pos.offset);
-+	m = READ_ONCE(g->mark);
-+	u = alloc_mem_to_key(g, m);
-+
-+	percpu_up_read(&c->mark_lock);
-+
-+	invalidating_cached_data = u.cached_sectors != 0;
-+
-+	u.gen++;
-+	u.data_type	= 0;
-+	u.dirty_sectors	= 0;
-+	u.cached_sectors = 0;
-+	u.read_time	= c->bucket_clock[READ].hand;
-+	u.write_time	= c->bucket_clock[WRITE].hand;
-+
-+	a = bkey_alloc_init(&alloc_key.k);
-+	a->k.p = iter->pos;
-+	bch2_alloc_pack(a, u);
-+
-+	bch2_trans_update(trans, iter, &a->k_i,
-+			  BTREE_TRIGGER_BUCKET_INVALIDATE);
-+
-+	/*
-+	 * XXX:
-+	 * when using deferred btree updates, we have journal reclaim doing
-+	 * btree updates and thus requiring the allocator to make forward
-+	 * progress, and here the allocator is requiring space in the journal -
-+	 * so we need a journal pre-reservation:
-+	 */
-+	ret = bch2_trans_commit(trans, NULL,
-+				invalidating_cached_data ? journal_seq : NULL,
-+				BTREE_INSERT_NOUNLOCK|
-+				BTREE_INSERT_NOCHECK_RW|
-+				BTREE_INSERT_NOFAIL|
-+				BTREE_INSERT_USE_RESERVE|
-+				BTREE_INSERT_USE_ALLOC_RESERVE|
-+				flags);
-+	if (ret == -EINTR)
-+		goto retry;
-+out:
-+	if (!ret) {
-+		/* remove from alloc_heap: */
-+		struct alloc_heap_entry e, *top = ca->alloc_heap.data;
-+
-+		top->bucket++;
-+		top->nr--;
-+
-+		if (!top->nr)
-+			heap_pop(&ca->alloc_heap, e, bucket_alloc_cmp, NULL);
-+
-+		/*
-+		 * Make sure we flush the last journal entry that updated this
-+		 * bucket (i.e. deleting the last reference) before writing to
-+		 * this bucket again:
-+		 */
-+		*journal_seq = max(*journal_seq, bucket_journal_seq(c, m));
-+	} else {
-+		size_t b2;
-+
-+		/* remove from free_inc: */
-+		percpu_down_read(&c->mark_lock);
-+		spin_lock(&c->freelist_lock);
-+
-+		bch2_mark_alloc_bucket(c, ca, b, false,
-+				       gc_pos_alloc(c, NULL), 0);
-+
-+		BUG_ON(!fifo_pop_back(&ca->free_inc, b2));
-+		BUG_ON(b != b2);
-+
-+		spin_unlock(&c->freelist_lock);
-+		percpu_up_read(&c->mark_lock);
-+	}
-+
-+	return ret < 0 ? ret : 0;
-+}
-+
-+/*
-+ * Pull buckets off ca->alloc_heap, invalidate them, move them to ca->free_inc:
-+ */
-+static int bch2_invalidate_buckets(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	u64 journal_seq = 0;
-+	int ret = 0;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC,
-+				   POS(ca->dev_idx, 0),
-+				   BTREE_ITER_CACHED|
-+				   BTREE_ITER_CACHED_NOFILL|
-+				   BTREE_ITER_INTENT);
-+
-+	/* Only use nowait if we've already invalidated at least one bucket: */
-+	while (!ret &&
-+	       !fifo_full(&ca->free_inc) &&
-+	       ca->alloc_heap.used)
-+		ret = bch2_invalidate_one_bucket2(&trans, ca, iter, &journal_seq,
-+				BTREE_INSERT_GC_LOCK_HELD|
-+				(!fifo_empty(&ca->free_inc)
-+				 ? BTREE_INSERT_NOWAIT : 0));
-+
-+	bch2_trans_exit(&trans);
-+
-+	/* If we used NOWAIT, don't return the error: */
-+	if (!fifo_empty(&ca->free_inc))
-+		ret = 0;
-+	if (ret) {
-+		bch_err(ca, "error invalidating buckets: %i", ret);
-+		return ret;
-+	}
-+
-+	if (journal_seq)
-+		ret = bch2_journal_flush_seq(&c->journal, journal_seq);
-+	if (ret) {
-+		bch_err(ca, "journal error: %i", ret);
-+		return ret;
-+	}
-+
-+	return 0;
-+}
-+
-+static int push_invalidated_bucket(struct bch_fs *c, struct bch_dev *ca, size_t bucket)
-+{
-+	unsigned i;
-+	int ret = 0;
-+
-+	while (1) {
-+		set_current_state(TASK_INTERRUPTIBLE);
-+
-+		spin_lock(&c->freelist_lock);
-+		for (i = 0; i < RESERVE_NR; i++) {
-+
-+			/*
-+			 * Don't strand buckets on the copygc freelist until
-+			 * after recovery is finished:
-+			 */
-+			if (!test_bit(BCH_FS_STARTED, &c->flags) &&
-+			    i == RESERVE_MOVINGGC)
-+				continue;
-+
-+			if (fifo_push(&ca->free[i], bucket)) {
-+				fifo_pop(&ca->free_inc, bucket);
-+
-+				closure_wake_up(&c->freelist_wait);
-+				ca->allocator_state = ALLOCATOR_RUNNING;
-+
-+				spin_unlock(&c->freelist_lock);
-+				goto out;
-+			}
-+		}
-+
-+		if (ca->allocator_state != ALLOCATOR_BLOCKED_FULL) {
-+			ca->allocator_state = ALLOCATOR_BLOCKED_FULL;
-+			closure_wake_up(&c->freelist_wait);
-+		}
-+
-+		spin_unlock(&c->freelist_lock);
-+
-+		if ((current->flags & PF_KTHREAD) &&
-+		    kthread_should_stop()) {
-+			ret = 1;
-+			break;
-+		}
-+
-+		schedule();
-+		try_to_freeze();
-+	}
-+out:
-+	__set_current_state(TASK_RUNNING);
-+	return ret;
-+}
-+
-+/*
-+ * Pulls buckets off free_inc, discards them (if enabled), then adds them to
-+ * freelists, waiting until there's room if necessary:
-+ */
-+static int discard_invalidated_buckets(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	while (!fifo_empty(&ca->free_inc)) {
-+		size_t bucket = fifo_peek(&ca->free_inc);
-+
-+		if (ca->mi.discard &&
-+		    blk_queue_discard(bdev_get_queue(ca->disk_sb.bdev)))
-+			blkdev_issue_discard(ca->disk_sb.bdev,
-+					     bucket_to_sector(ca, bucket),
-+					     ca->mi.bucket_size, GFP_NOIO, 0);
-+
-+		if (push_invalidated_bucket(c, ca, bucket))
-+			return 1;
-+	}
-+
-+	return 0;
-+}
-+
-+/**
-+ * bch_allocator_thread - move buckets from free_inc to reserves
-+ *
-+ * The free_inc FIFO is populated by find_reclaimable_buckets(), and
-+ * the reserves are depleted by bucket allocation. When we run out
-+ * of free_inc, try to invalidate some buckets and write out
-+ * prios and gens.
-+ */
-+static int bch2_allocator_thread(void *arg)
-+{
-+	struct bch_dev *ca = arg;
-+	struct bch_fs *c = ca->fs;
-+	size_t nr;
-+	int ret;
-+
-+	set_freezable();
-+	ca->allocator_state = ALLOCATOR_RUNNING;
-+
-+	while (1) {
-+		cond_resched();
-+		if (kthread_should_stop())
-+			break;
-+
-+		pr_debug("discarding %zu invalidated buckets",
-+			 fifo_used(&ca->free_inc));
-+
-+		ret = discard_invalidated_buckets(c, ca);
-+		if (ret)
-+			goto stop;
-+
-+		down_read(&c->gc_lock);
-+
-+		ret = bch2_invalidate_buckets(c, ca);
-+		if (ret) {
-+			up_read(&c->gc_lock);
-+			goto stop;
-+		}
-+
-+		if (!fifo_empty(&ca->free_inc)) {
-+			up_read(&c->gc_lock);
-+			continue;
-+		}
-+
-+		pr_debug("free_inc now empty");
-+
-+		do {
-+			/*
-+			 * Find some buckets that we can invalidate, either
-+			 * they're completely unused, or only contain clean data
-+			 * that's been written back to the backing device or
-+			 * another cache tier
-+			 */
-+
-+			pr_debug("scanning for reclaimable buckets");
-+
-+			nr = find_reclaimable_buckets(c, ca);
-+
-+			pr_debug("found %zu buckets", nr);
-+
-+			trace_alloc_batch(ca, nr, ca->alloc_heap.size);
-+
-+			if ((ca->inc_gen_needs_gc >= ALLOC_SCAN_BATCH(ca) ||
-+			     ca->inc_gen_really_needs_gc) &&
-+			    c->gc_thread) {
-+				atomic_inc(&c->kick_gc);
-+				wake_up_process(c->gc_thread);
-+			}
-+
-+			/*
-+			 * If we found any buckets, we have to invalidate them
-+			 * before we scan for more - but if we didn't find very
-+			 * many we may want to wait on more buckets being
-+			 * available so we don't spin:
-+			 */
-+			if (!nr ||
-+			    (nr < ALLOC_SCAN_BATCH(ca) &&
-+			     !fifo_empty(&ca->free[RESERVE_NONE]))) {
-+				ret = wait_buckets_available(c, ca);
-+				if (ret) {
-+					up_read(&c->gc_lock);
-+					goto stop;
-+				}
-+			}
-+		} while (!nr);
-+
-+		up_read(&c->gc_lock);
-+
-+		pr_debug("%zu buckets to invalidate", nr);
-+
-+		/*
-+		 * alloc_heap is now full of newly-invalidated buckets: next,
-+		 * write out the new bucket gens:
-+		 */
-+	}
-+
-+stop:
-+	pr_debug("alloc thread stopping (ret %i)", ret);
-+	ca->allocator_state = ALLOCATOR_STOPPED;
-+	closure_wake_up(&c->freelist_wait);
-+	return 0;
-+}
-+
-+/* Startup/shutdown (ro/rw): */
-+
-+void bch2_recalc_capacity(struct bch_fs *c)
-+{
-+	struct bch_dev *ca;
-+	u64 capacity = 0, reserved_sectors = 0, gc_reserve, copygc_threshold = 0;
-+	unsigned bucket_size_max = 0;
-+	unsigned long ra_pages = 0;
-+	unsigned i, j;
-+
-+	lockdep_assert_held(&c->state_lock);
-+
-+	for_each_online_member(ca, c, i) {
-+		struct backing_dev_info *bdi = ca->disk_sb.bdev->bd_bdi;
-+
-+		ra_pages += bdi->ra_pages;
-+	}
-+
-+	bch2_set_ra_pages(c, ra_pages);
-+
-+	for_each_rw_member(ca, c, i) {
-+		u64 dev_reserve = 0;
-+
-+		/*
-+		 * We need to reserve buckets (from the number
-+		 * of currently available buckets) against
-+		 * foreground writes so that mainly copygc can
-+		 * make forward progress.
-+		 *
-+		 * We need enough to refill the various reserves
-+		 * from scratch - copygc will use its entire
-+		 * reserve all at once, then run against when
-+		 * its reserve is refilled (from the formerly
-+		 * available buckets).
-+		 *
-+		 * This reserve is just used when considering if
-+		 * allocations for foreground writes must wait -
-+		 * not -ENOSPC calculations.
-+		 */
-+		for (j = 0; j < RESERVE_NONE; j++)
-+			dev_reserve += ca->free[j].size;
-+
-+		dev_reserve += 1;	/* btree write point */
-+		dev_reserve += 1;	/* copygc write point */
-+		dev_reserve += 1;	/* rebalance write point */
-+
-+		dev_reserve *= ca->mi.bucket_size;
-+
-+		copygc_threshold += dev_reserve;
-+
-+		capacity += bucket_to_sector(ca, ca->mi.nbuckets -
-+					     ca->mi.first_bucket);
-+
-+		reserved_sectors += dev_reserve * 2;
-+
-+		bucket_size_max = max_t(unsigned, bucket_size_max,
-+					ca->mi.bucket_size);
-+	}
-+
-+	gc_reserve = c->opts.gc_reserve_bytes
-+		? c->opts.gc_reserve_bytes >> 9
-+		: div64_u64(capacity * c->opts.gc_reserve_percent, 100);
-+
-+	reserved_sectors = max(gc_reserve, reserved_sectors);
-+
-+	reserved_sectors = min(reserved_sectors, capacity);
-+
-+	c->copygc_threshold = copygc_threshold;
-+	c->capacity = capacity - reserved_sectors;
-+
-+	c->bucket_size_max = bucket_size_max;
-+
-+	/* Wake up case someone was waiting for buckets */
-+	closure_wake_up(&c->freelist_wait);
-+}
-+
-+static bool bch2_dev_has_open_write_point(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	struct open_bucket *ob;
-+	bool ret = false;
-+
-+	for (ob = c->open_buckets;
-+	     ob < c->open_buckets + ARRAY_SIZE(c->open_buckets);
-+	     ob++) {
-+		spin_lock(&ob->lock);
-+		if (ob->valid && !ob->on_partial_list &&
-+		    ob->ptr.dev == ca->dev_idx)
-+			ret = true;
-+		spin_unlock(&ob->lock);
-+	}
-+
-+	return ret;
-+}
-+
-+/* device goes ro: */
-+void bch2_dev_allocator_remove(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	unsigned i;
-+
-+	BUG_ON(ca->alloc_thread);
-+
-+	/* First, remove device from allocation groups: */
-+
-+	for (i = 0; i < ARRAY_SIZE(c->rw_devs); i++)
-+		clear_bit(ca->dev_idx, c->rw_devs[i].d);
-+
-+	/*
-+	 * Capacity is calculated based off of devices in allocation groups:
-+	 */
-+	bch2_recalc_capacity(c);
-+
-+	/* Next, close write points that point to this device... */
-+	for (i = 0; i < ARRAY_SIZE(c->write_points); i++)
-+		bch2_writepoint_stop(c, ca, &c->write_points[i]);
-+
-+	bch2_writepoint_stop(c, ca, &c->copygc_write_point);
-+	bch2_writepoint_stop(c, ca, &c->rebalance_write_point);
-+	bch2_writepoint_stop(c, ca, &c->btree_write_point);
-+
-+	mutex_lock(&c->btree_reserve_cache_lock);
-+	while (c->btree_reserve_cache_nr) {
-+		struct btree_alloc *a =
-+			&c->btree_reserve_cache[--c->btree_reserve_cache_nr];
-+
-+		bch2_open_buckets_put(c, &a->ob);
-+	}
-+	mutex_unlock(&c->btree_reserve_cache_lock);
-+
-+	while (1) {
-+		struct open_bucket *ob;
-+
-+		spin_lock(&c->freelist_lock);
-+		if (!ca->open_buckets_partial_nr) {
-+			spin_unlock(&c->freelist_lock);
-+			break;
-+		}
-+		ob = c->open_buckets +
-+			ca->open_buckets_partial[--ca->open_buckets_partial_nr];
-+		ob->on_partial_list = false;
-+		spin_unlock(&c->freelist_lock);
-+
-+		bch2_open_bucket_put(c, ob);
-+	}
-+
-+	bch2_ec_stop_dev(c, ca);
-+
-+	/*
-+	 * Wake up threads that were blocked on allocation, so they can notice
-+	 * the device can no longer be removed and the capacity has changed:
-+	 */
-+	closure_wake_up(&c->freelist_wait);
-+
-+	/*
-+	 * journal_res_get() can block waiting for free space in the journal -
-+	 * it needs to notice there may not be devices to allocate from anymore:
-+	 */
-+	wake_up(&c->journal.wait);
-+
-+	/* Now wait for any in flight writes: */
-+
-+	closure_wait_event(&c->open_buckets_wait,
-+			   !bch2_dev_has_open_write_point(c, ca));
-+}
-+
-+/* device goes rw: */
-+void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	unsigned i;
-+
-+	for (i = 0; i < ARRAY_SIZE(c->rw_devs); i++)
-+		if (ca->mi.data_allowed & (1 << i))
-+			set_bit(ca->dev_idx, c->rw_devs[i].d);
-+}
-+
-+void bch2_dev_allocator_quiesce(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	if (ca->alloc_thread)
-+		closure_wait_event(&c->freelist_wait,
-+				   ca->allocator_state != ALLOCATOR_RUNNING);
-+}
-+
-+/* stop allocator thread: */
-+void bch2_dev_allocator_stop(struct bch_dev *ca)
-+{
-+	struct task_struct *p;
-+
-+	p = rcu_dereference_protected(ca->alloc_thread, 1);
-+	ca->alloc_thread = NULL;
-+
-+	/*
-+	 * We need an rcu barrier between setting ca->alloc_thread = NULL and
-+	 * the thread shutting down to avoid bch2_wake_allocator() racing:
-+	 *
-+	 * XXX: it would be better to have the rcu barrier be asynchronous
-+	 * instead of blocking us here
-+	 */
-+	synchronize_rcu();
-+
-+	if (p) {
-+		kthread_stop(p);
-+		put_task_struct(p);
-+	}
-+}
-+
-+/* start allocator thread: */
-+int bch2_dev_allocator_start(struct bch_dev *ca)
-+{
-+	struct task_struct *p;
-+
-+	/*
-+	 * allocator thread already started?
-+	 */
-+	if (ca->alloc_thread)
-+		return 0;
-+
-+	p = kthread_create(bch2_allocator_thread, ca,
-+			   "bch_alloc[%s]", ca->name);
-+	if (IS_ERR(p))
-+		return PTR_ERR(p);
-+
-+	get_task_struct(p);
-+	rcu_assign_pointer(ca->alloc_thread, p);
-+	wake_up_process(p);
-+	return 0;
-+}
-+
-+void bch2_fs_allocator_background_init(struct bch_fs *c)
-+{
-+	spin_lock_init(&c->freelist_lock);
-+	bch2_bucket_clock_init(c, READ);
-+	bch2_bucket_clock_init(c, WRITE);
-+
-+	c->pd_controllers_update_seconds = 5;
-+	INIT_DELAYED_WORK(&c->pd_controllers_update, pd_controllers_update);
-+}
-diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h
-new file mode 100644
-index 000000000000..cbaff56f7473
---- /dev/null
-+++ b/fs/bcachefs/alloc_background.h
-@@ -0,0 +1,105 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_ALLOC_BACKGROUND_H
-+#define _BCACHEFS_ALLOC_BACKGROUND_H
-+
-+#include "bcachefs.h"
-+#include "alloc_types.h"
-+#include "debug.h"
-+
-+struct bkey_alloc_unpacked {
-+	u8		gen;
-+#define x(_name, _bits)	u##_bits _name;
-+	BCH_ALLOC_FIELDS()
-+#undef  x
-+};
-+
-+/* How out of date a pointer gen is allowed to be: */
-+#define BUCKET_GC_GEN_MAX	96U
-+
-+/* returns true if not equal */
-+static inline bool bkey_alloc_unpacked_cmp(struct bkey_alloc_unpacked l,
-+					   struct bkey_alloc_unpacked r)
-+{
-+	return l.gen != r.gen
-+#define x(_name, _bits)	|| l._name != r._name
-+	BCH_ALLOC_FIELDS()
-+#undef  x
-+	;
-+}
-+
-+struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c);
-+void bch2_alloc_pack(struct bkey_i_alloc *,
-+		     const struct bkey_alloc_unpacked);
-+
-+int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int);
-+
-+static inline struct bkey_alloc_unpacked
-+alloc_mem_to_key(struct bucket *g, struct bucket_mark m)
-+{
-+	return (struct bkey_alloc_unpacked) {
-+		.gen		= m.gen,
-+		.oldest_gen	= g->oldest_gen,
-+		.data_type	= m.data_type,
-+		.dirty_sectors	= m.dirty_sectors,
-+		.cached_sectors	= m.cached_sectors,
-+		.read_time	= g->io_time[READ],
-+		.write_time	= g->io_time[WRITE],
-+	};
-+}
-+
-+#define ALLOC_SCAN_BATCH(ca)		max_t(size_t, 1, (ca)->mi.nbuckets >> 9)
-+
-+const char *bch2_alloc_invalid(const struct bch_fs *, struct bkey_s_c);
-+void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-+
-+#define bch2_bkey_ops_alloc (struct bkey_ops) {		\
-+	.key_invalid	= bch2_alloc_invalid,		\
-+	.val_to_text	= bch2_alloc_to_text,		\
-+}
-+
-+struct journal_keys;
-+int bch2_alloc_read(struct bch_fs *, struct journal_keys *);
-+
-+static inline void bch2_wake_allocator(struct bch_dev *ca)
-+{
-+	struct task_struct *p;
-+
-+	rcu_read_lock();
-+	p = rcu_dereference(ca->alloc_thread);
-+	if (p) {
-+		wake_up_process(p);
-+		ca->allocator_state = ALLOCATOR_RUNNING;
-+	}
-+	rcu_read_unlock();
-+}
-+
-+static inline void verify_not_on_freelist(struct bch_fs *c, struct bch_dev *ca,
-+					  size_t bucket)
-+{
-+	if (expensive_debug_checks(c)) {
-+		size_t iter;
-+		long i;
-+		unsigned j;
-+
-+		for (j = 0; j < RESERVE_NR; j++)
-+			fifo_for_each_entry(i, &ca->free[j], iter)
-+				BUG_ON(i == bucket);
-+		fifo_for_each_entry(i, &ca->free_inc, iter)
-+			BUG_ON(i == bucket);
-+	}
-+}
-+
-+void bch2_recalc_capacity(struct bch_fs *);
-+
-+void bch2_dev_allocator_remove(struct bch_fs *, struct bch_dev *);
-+void bch2_dev_allocator_add(struct bch_fs *, struct bch_dev *);
-+
-+void bch2_dev_allocator_quiesce(struct bch_fs *, struct bch_dev *);
-+void bch2_dev_allocator_stop(struct bch_dev *);
-+int bch2_dev_allocator_start(struct bch_dev *);
-+
-+int bch2_dev_alloc_write(struct bch_fs *, struct bch_dev *, unsigned);
-+int bch2_alloc_write(struct bch_fs *, unsigned);
-+void bch2_fs_allocator_background_init(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_ALLOC_BACKGROUND_H */
-diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
-new file mode 100644
-index 000000000000..7a92e3d53254
---- /dev/null
-+++ b/fs/bcachefs/alloc_foreground.c
-@@ -0,0 +1,990 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Primary bucket allocation code
-+ *
-+ * Copyright 2012 Google, Inc.
-+ *
-+ * Allocation in bcache is done in terms of buckets:
-+ *
-+ * Each bucket has associated an 8 bit gen; this gen corresponds to the gen in
-+ * btree pointers - they must match for the pointer to be considered valid.
-+ *
-+ * Thus (assuming a bucket has no dirty data or metadata in it) we can reuse a
-+ * bucket simply by incrementing its gen.
-+ *
-+ * The gens (along with the priorities; it's really the gens are important but
-+ * the code is named as if it's the priorities) are written in an arbitrary list
-+ * of buckets on disk, with a pointer to them in the journal header.
-+ *
-+ * When we invalidate a bucket, we have to write its new gen to disk and wait
-+ * for that write to complete before we use it - otherwise after a crash we
-+ * could have pointers that appeared to be good but pointed to data that had
-+ * been overwritten.
-+ *
-+ * Since the gens and priorities are all stored contiguously on disk, we can
-+ * batch this up: We fill up the free_inc list with freshly invalidated buckets,
-+ * call prio_write(), and when prio_write() finishes we pull buckets off the
-+ * free_inc list and optionally discard them.
-+ *
-+ * free_inc isn't the only freelist - if it was, we'd often have to sleep while
-+ * priorities and gens were being written before we could allocate. c->free is a
-+ * smaller freelist, and buckets on that list are always ready to be used.
-+ *
-+ * If we've got discards enabled, that happens when a bucket moves from the
-+ * free_inc list to the free list.
-+ *
-+ * It's important to ensure that gens don't wrap around - with respect to
-+ * either the oldest gen in the btree or the gen on disk. This is quite
-+ * difficult to do in practice, but we explicitly guard against it anyways - if
-+ * a bucket is in danger of wrapping around we simply skip invalidating it that
-+ * time around, and we garbage collect or rewrite the priorities sooner than we
-+ * would have otherwise.
-+ *
-+ * bch2_bucket_alloc() allocates a single bucket from a specific device.
-+ *
-+ * bch2_bucket_alloc_set() allocates one or more buckets from different devices
-+ * in a given filesystem.
-+ *
-+ * invalidate_buckets() drives all the processes described above. It's called
-+ * from bch2_bucket_alloc() and a few other places that need to make sure free
-+ * buckets are ready.
-+ *
-+ * invalidate_buckets_(lru|fifo)() find buckets that are available to be
-+ * invalidated, and then invalidate them and stick them on the free_inc list -
-+ * in either lru or fifo order.
-+ */
-+
-+#include "bcachefs.h"
-+#include "alloc_background.h"
-+#include "alloc_foreground.h"
-+#include "btree_gc.h"
-+#include "buckets.h"
-+#include "clock.h"
-+#include "debug.h"
-+#include "disk_groups.h"
-+#include "ec.h"
-+#include "io.h"
-+
-+#include <linux/math64.h>
-+#include <linux/rculist.h>
-+#include <linux/rcupdate.h>
-+#include <trace/events/bcachefs.h>
-+
-+/*
-+ * Open buckets represent a bucket that's currently being allocated from.  They
-+ * serve two purposes:
-+ *
-+ *  - They track buckets that have been partially allocated, allowing for
-+ *    sub-bucket sized allocations - they're used by the sector allocator below
-+ *
-+ *  - They provide a reference to the buckets they own that mark and sweep GC
-+ *    can find, until the new allocation has a pointer to it inserted into the
-+ *    btree
-+ *
-+ * When allocating some space with the sector allocator, the allocation comes
-+ * with a reference to an open bucket - the caller is required to put that
-+ * reference _after_ doing the index update that makes its allocation reachable.
-+ */
-+
-+void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
-+{
-+	struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
-+
-+	if (ob->ec) {
-+		bch2_ec_bucket_written(c, ob);
-+		return;
-+	}
-+
-+	percpu_down_read(&c->mark_lock);
-+	spin_lock(&ob->lock);
-+
-+	bch2_mark_alloc_bucket(c, ca, PTR_BUCKET_NR(ca, &ob->ptr),
-+			       false, gc_pos_alloc(c, ob), 0);
-+	ob->valid = false;
-+	ob->type = 0;
-+
-+	spin_unlock(&ob->lock);
-+	percpu_up_read(&c->mark_lock);
-+
-+	spin_lock(&c->freelist_lock);
-+	ob->freelist = c->open_buckets_freelist;
-+	c->open_buckets_freelist = ob - c->open_buckets;
-+	c->open_buckets_nr_free++;
-+	spin_unlock(&c->freelist_lock);
-+
-+	closure_wake_up(&c->open_buckets_wait);
-+}
-+
-+void bch2_open_bucket_write_error(struct bch_fs *c,
-+				  struct open_buckets *obs,
-+				  unsigned dev)
-+{
-+	struct open_bucket *ob;
-+	unsigned i;
-+
-+	open_bucket_for_each(c, obs, ob, i)
-+		if (ob->ptr.dev == dev &&
-+		    ob->ec)
-+			bch2_ec_bucket_cancel(c, ob);
-+}
-+
-+static struct open_bucket *bch2_open_bucket_alloc(struct bch_fs *c)
-+{
-+	struct open_bucket *ob;
-+
-+	BUG_ON(!c->open_buckets_freelist || !c->open_buckets_nr_free);
-+
-+	ob = c->open_buckets + c->open_buckets_freelist;
-+	c->open_buckets_freelist = ob->freelist;
-+	atomic_set(&ob->pin, 1);
-+	ob->type = 0;
-+
-+	c->open_buckets_nr_free--;
-+	return ob;
-+}
-+
-+static void open_bucket_free_unused(struct bch_fs *c,
-+				    struct write_point *wp,
-+				    struct open_bucket *ob)
-+{
-+	struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
-+	bool may_realloc = wp->type == BCH_DATA_user;
-+
-+	BUG_ON(ca->open_buckets_partial_nr >
-+	       ARRAY_SIZE(ca->open_buckets_partial));
-+
-+	if (ca->open_buckets_partial_nr <
-+	    ARRAY_SIZE(ca->open_buckets_partial) &&
-+	    may_realloc) {
-+		spin_lock(&c->freelist_lock);
-+		ob->on_partial_list = true;
-+		ca->open_buckets_partial[ca->open_buckets_partial_nr++] =
-+			ob - c->open_buckets;
-+		spin_unlock(&c->freelist_lock);
-+
-+		closure_wake_up(&c->open_buckets_wait);
-+		closure_wake_up(&c->freelist_wait);
-+	} else {
-+		bch2_open_bucket_put(c, ob);
-+	}
-+}
-+
-+static void verify_not_stale(struct bch_fs *c, const struct open_buckets *obs)
-+{
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	struct open_bucket *ob;
-+	unsigned i;
-+
-+	open_bucket_for_each(c, obs, ob, i) {
-+		struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
-+
-+		BUG_ON(ptr_stale(ca, &ob->ptr));
-+	}
-+#endif
-+}
-+
-+/* _only_ for allocating the journal on a new device: */
-+long bch2_bucket_alloc_new_fs(struct bch_dev *ca)
-+{
-+	struct bucket_array *buckets;
-+	ssize_t b;
-+
-+	rcu_read_lock();
-+	buckets = bucket_array(ca);
-+
-+	for (b = ca->mi.first_bucket; b < ca->mi.nbuckets; b++)
-+		if (is_available_bucket(buckets->b[b].mark))
-+			goto success;
-+	b = -1;
-+success:
-+	rcu_read_unlock();
-+	return b;
-+}
-+
-+static inline unsigned open_buckets_reserved(enum alloc_reserve reserve)
-+{
-+	switch (reserve) {
-+	case RESERVE_ALLOC:
-+		return 0;
-+	case RESERVE_BTREE:
-+		return OPEN_BUCKETS_COUNT / 4;
-+	default:
-+		return OPEN_BUCKETS_COUNT / 2;
-+	}
-+}
-+
-+/**
-+ * bch_bucket_alloc - allocate a single bucket from a specific device
-+ *
-+ * Returns index of bucket on success, 0 on failure
-+ * */
-+struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
-+				      enum alloc_reserve reserve,
-+				      bool may_alloc_partial,
-+				      struct closure *cl)
-+{
-+	struct bucket_array *buckets;
-+	struct open_bucket *ob;
-+	long bucket = 0;
-+
-+	spin_lock(&c->freelist_lock);
-+
-+	if (may_alloc_partial) {
-+		int i;
-+
-+		for (i = ca->open_buckets_partial_nr - 1; i >= 0; --i) {
-+			ob = c->open_buckets + ca->open_buckets_partial[i];
-+
-+			if (reserve <= ob->alloc_reserve) {
-+				array_remove_item(ca->open_buckets_partial,
-+						  ca->open_buckets_partial_nr,
-+						  i);
-+				ob->on_partial_list = false;
-+				ob->alloc_reserve = reserve;
-+				spin_unlock(&c->freelist_lock);
-+				return ob;
-+			}
-+		}
-+	}
-+
-+	if (unlikely(c->open_buckets_nr_free <= open_buckets_reserved(reserve))) {
-+		if (cl)
-+			closure_wait(&c->open_buckets_wait, cl);
-+
-+		if (!c->blocked_allocate_open_bucket)
-+			c->blocked_allocate_open_bucket = local_clock();
-+
-+		spin_unlock(&c->freelist_lock);
-+		trace_open_bucket_alloc_fail(ca, reserve);
-+		return ERR_PTR(-OPEN_BUCKETS_EMPTY);
-+	}
-+
-+	if (likely(fifo_pop(&ca->free[RESERVE_NONE], bucket)))
-+		goto out;
-+
-+	switch (reserve) {
-+	case RESERVE_ALLOC:
-+		if (fifo_pop(&ca->free[RESERVE_BTREE], bucket))
-+			goto out;
-+		break;
-+	case RESERVE_BTREE:
-+		if (fifo_used(&ca->free[RESERVE_BTREE]) * 2 >=
-+		    ca->free[RESERVE_BTREE].size &&
-+		    fifo_pop(&ca->free[RESERVE_BTREE], bucket))
-+			goto out;
-+		break;
-+	case RESERVE_MOVINGGC:
-+		if (fifo_pop(&ca->free[RESERVE_MOVINGGC], bucket))
-+			goto out;
-+		break;
-+	default:
-+		break;
-+	}
-+
-+	if (cl)
-+		closure_wait(&c->freelist_wait, cl);
-+
-+	if (!c->blocked_allocate)
-+		c->blocked_allocate = local_clock();
-+
-+	spin_unlock(&c->freelist_lock);
-+
-+	trace_bucket_alloc_fail(ca, reserve);
-+	return ERR_PTR(-FREELIST_EMPTY);
-+out:
-+	verify_not_on_freelist(c, ca, bucket);
-+
-+	ob = bch2_open_bucket_alloc(c);
-+
-+	spin_lock(&ob->lock);
-+	buckets = bucket_array(ca);
-+
-+	ob->valid	= true;
-+	ob->sectors_free = ca->mi.bucket_size;
-+	ob->alloc_reserve = reserve;
-+	ob->ptr		= (struct bch_extent_ptr) {
-+		.type	= 1 << BCH_EXTENT_ENTRY_ptr,
-+		.gen	= buckets->b[bucket].mark.gen,
-+		.offset	= bucket_to_sector(ca, bucket),
-+		.dev	= ca->dev_idx,
-+	};
-+
-+	spin_unlock(&ob->lock);
-+
-+	if (c->blocked_allocate_open_bucket) {
-+		bch2_time_stats_update(
-+			&c->times[BCH_TIME_blocked_allocate_open_bucket],
-+			c->blocked_allocate_open_bucket);
-+		c->blocked_allocate_open_bucket = 0;
-+	}
-+
-+	if (c->blocked_allocate) {
-+		bch2_time_stats_update(
-+			&c->times[BCH_TIME_blocked_allocate],
-+			c->blocked_allocate);
-+		c->blocked_allocate = 0;
-+	}
-+
-+	spin_unlock(&c->freelist_lock);
-+
-+	bch2_wake_allocator(ca);
-+
-+	trace_bucket_alloc(ca, reserve);
-+	return ob;
-+}
-+
-+static int __dev_stripe_cmp(struct dev_stripe_state *stripe,
-+			    unsigned l, unsigned r)
-+{
-+	return ((stripe->next_alloc[l] > stripe->next_alloc[r]) -
-+		(stripe->next_alloc[l] < stripe->next_alloc[r]));
-+}
-+
-+#define dev_stripe_cmp(l, r) __dev_stripe_cmp(stripe, l, r)
-+
-+struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *c,
-+					  struct dev_stripe_state *stripe,
-+					  struct bch_devs_mask *devs)
-+{
-+	struct dev_alloc_list ret = { .nr = 0 };
-+	unsigned i;
-+
-+	for_each_set_bit(i, devs->d, BCH_SB_MEMBERS_MAX)
-+		ret.devs[ret.nr++] = i;
-+
-+	bubble_sort(ret.devs, ret.nr, dev_stripe_cmp);
-+	return ret;
-+}
-+
-+void bch2_dev_stripe_increment(struct bch_dev *ca,
-+			       struct dev_stripe_state *stripe)
-+{
-+	u64 *v = stripe->next_alloc + ca->dev_idx;
-+	u64 free_space = dev_buckets_free(ca);
-+	u64 free_space_inv = free_space
-+		? div64_u64(1ULL << 48, free_space)
-+		: 1ULL << 48;
-+	u64 scale = *v / 4;
-+
-+	if (*v + free_space_inv >= *v)
-+		*v += free_space_inv;
-+	else
-+		*v = U64_MAX;
-+
-+	for (v = stripe->next_alloc;
-+	     v < stripe->next_alloc + ARRAY_SIZE(stripe->next_alloc); v++)
-+		*v = *v < scale ? 0 : *v - scale;
-+}
-+
-+#define BUCKET_MAY_ALLOC_PARTIAL	(1 << 0)
-+#define BUCKET_ALLOC_USE_DURABILITY	(1 << 1)
-+
-+static void add_new_bucket(struct bch_fs *c,
-+			   struct open_buckets *ptrs,
-+			   struct bch_devs_mask *devs_may_alloc,
-+			   unsigned *nr_effective,
-+			   bool *have_cache,
-+			   unsigned flags,
-+			   struct open_bucket *ob)
-+{
-+	unsigned durability =
-+		bch_dev_bkey_exists(c, ob->ptr.dev)->mi.durability;
-+
-+	__clear_bit(ob->ptr.dev, devs_may_alloc->d);
-+	*nr_effective	+= (flags & BUCKET_ALLOC_USE_DURABILITY)
-+		? durability : 1;
-+	*have_cache	|= !durability;
-+
-+	ob_push(c, ptrs, ob);
-+}
-+
-+enum bucket_alloc_ret
-+bch2_bucket_alloc_set(struct bch_fs *c,
-+		      struct open_buckets *ptrs,
-+		      struct dev_stripe_state *stripe,
-+		      struct bch_devs_mask *devs_may_alloc,
-+		      unsigned nr_replicas,
-+		      unsigned *nr_effective,
-+		      bool *have_cache,
-+		      enum alloc_reserve reserve,
-+		      unsigned flags,
-+		      struct closure *cl)
-+{
-+	struct dev_alloc_list devs_sorted =
-+		bch2_dev_alloc_list(c, stripe, devs_may_alloc);
-+	struct bch_dev *ca;
-+	enum bucket_alloc_ret ret = INSUFFICIENT_DEVICES;
-+	unsigned i;
-+
-+	BUG_ON(*nr_effective >= nr_replicas);
-+
-+	for (i = 0; i < devs_sorted.nr; i++) {
-+		struct open_bucket *ob;
-+
-+		ca = rcu_dereference(c->devs[devs_sorted.devs[i]]);
-+		if (!ca)
-+			continue;
-+
-+		if (!ca->mi.durability && *have_cache)
-+			continue;
-+
-+		ob = bch2_bucket_alloc(c, ca, reserve,
-+				flags & BUCKET_MAY_ALLOC_PARTIAL, cl);
-+		if (IS_ERR(ob)) {
-+			ret = -PTR_ERR(ob);
-+
-+			if (cl)
-+				return ret;
-+			continue;
-+		}
-+
-+		add_new_bucket(c, ptrs, devs_may_alloc,
-+			       nr_effective, have_cache, flags, ob);
-+
-+		bch2_dev_stripe_increment(ca, stripe);
-+
-+		if (*nr_effective >= nr_replicas)
-+			return ALLOC_SUCCESS;
-+	}
-+
-+	return ret;
-+}
-+
-+/* Allocate from stripes: */
-+
-+/*
-+ * if we can't allocate a new stripe because there are already too many
-+ * partially filled stripes, force allocating from an existing stripe even when
-+ * it's to a device we don't want:
-+ */
-+
-+static void bucket_alloc_from_stripe(struct bch_fs *c,
-+				     struct open_buckets *ptrs,
-+				     struct write_point *wp,
-+				     struct bch_devs_mask *devs_may_alloc,
-+				     u16 target,
-+				     unsigned erasure_code,
-+				     unsigned nr_replicas,
-+				     unsigned *nr_effective,
-+				     bool *have_cache,
-+				     unsigned flags)
-+{
-+	struct dev_alloc_list devs_sorted;
-+	struct ec_stripe_head *h;
-+	struct open_bucket *ob;
-+	struct bch_dev *ca;
-+	unsigned i, ec_idx;
-+
-+	if (!erasure_code)
-+		return;
-+
-+	if (nr_replicas < 2)
-+		return;
-+
-+	if (ec_open_bucket(c, ptrs))
-+		return;
-+
-+	h = bch2_ec_stripe_head_get(c, target, 0, nr_replicas - 1);
-+	if (!h)
-+		return;
-+
-+	devs_sorted = bch2_dev_alloc_list(c, &wp->stripe, devs_may_alloc);
-+
-+	for (i = 0; i < devs_sorted.nr; i++)
-+		open_bucket_for_each(c, &h->s->blocks, ob, ec_idx)
-+			if (ob->ptr.dev == devs_sorted.devs[i] &&
-+			    !test_and_set_bit(h->s->data_block_idx[ec_idx],
-+					      h->s->blocks_allocated))
-+				goto got_bucket;
-+	goto out_put_head;
-+got_bucket:
-+	ca = bch_dev_bkey_exists(c, ob->ptr.dev);
-+
-+	ob->ec_idx	= h->s->data_block_idx[ec_idx];
-+	ob->ec		= h->s;
-+
-+	add_new_bucket(c, ptrs, devs_may_alloc,
-+		       nr_effective, have_cache, flags, ob);
-+	atomic_inc(&h->s->pin);
-+out_put_head:
-+	bch2_ec_stripe_head_put(c, h);
-+}
-+
-+/* Sector allocator */
-+
-+static void get_buckets_from_writepoint(struct bch_fs *c,
-+					struct open_buckets *ptrs,
-+					struct write_point *wp,
-+					struct bch_devs_mask *devs_may_alloc,
-+					unsigned nr_replicas,
-+					unsigned *nr_effective,
-+					bool *have_cache,
-+					unsigned flags,
-+					bool need_ec)
-+{
-+	struct open_buckets ptrs_skip = { .nr = 0 };
-+	struct open_bucket *ob;
-+	unsigned i;
-+
-+	open_bucket_for_each(c, &wp->ptrs, ob, i) {
-+		struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
-+
-+		if (*nr_effective < nr_replicas &&
-+		    test_bit(ob->ptr.dev, devs_may_alloc->d) &&
-+		    (ca->mi.durability ||
-+		     (wp->type == BCH_DATA_user && !*have_cache)) &&
-+		    (ob->ec || !need_ec)) {
-+			add_new_bucket(c, ptrs, devs_may_alloc,
-+				       nr_effective, have_cache,
-+				       flags, ob);
-+		} else {
-+			ob_push(c, &ptrs_skip, ob);
-+		}
-+	}
-+	wp->ptrs = ptrs_skip;
-+}
-+
-+static enum bucket_alloc_ret
-+open_bucket_add_buckets(struct bch_fs *c,
-+			struct open_buckets *ptrs,
-+			struct write_point *wp,
-+			struct bch_devs_list *devs_have,
-+			u16 target,
-+			unsigned erasure_code,
-+			unsigned nr_replicas,
-+			unsigned *nr_effective,
-+			bool *have_cache,
-+			enum alloc_reserve reserve,
-+			unsigned flags,
-+			struct closure *_cl)
-+{
-+	struct bch_devs_mask devs;
-+	struct open_bucket *ob;
-+	struct closure *cl = NULL;
-+	enum bucket_alloc_ret ret;
-+	unsigned i;
-+
-+	rcu_read_lock();
-+	devs = target_rw_devs(c, wp->type, target);
-+	rcu_read_unlock();
-+
-+	/* Don't allocate from devices we already have pointers to: */
-+	for (i = 0; i < devs_have->nr; i++)
-+		__clear_bit(devs_have->devs[i], devs.d);
-+
-+	open_bucket_for_each(c, ptrs, ob, i)
-+		__clear_bit(ob->ptr.dev, devs.d);
-+
-+	if (erasure_code) {
-+		if (!ec_open_bucket(c, ptrs)) {
-+			get_buckets_from_writepoint(c, ptrs, wp, &devs,
-+						    nr_replicas, nr_effective,
-+						    have_cache, flags, true);
-+			if (*nr_effective >= nr_replicas)
-+				return 0;
-+		}
-+
-+		if (!ec_open_bucket(c, ptrs)) {
-+			bucket_alloc_from_stripe(c, ptrs, wp, &devs,
-+						 target, erasure_code,
-+						 nr_replicas, nr_effective,
-+						 have_cache, flags);
-+			if (*nr_effective >= nr_replicas)
-+				return 0;
-+		}
-+	}
-+
-+	get_buckets_from_writepoint(c, ptrs, wp, &devs,
-+				    nr_replicas, nr_effective,
-+				    have_cache, flags, false);
-+	if (*nr_effective >= nr_replicas)
-+		return 0;
-+
-+	percpu_down_read(&c->mark_lock);
-+	rcu_read_lock();
-+
-+retry_blocking:
-+	/*
-+	 * Try nonblocking first, so that if one device is full we'll try from
-+	 * other devices:
-+	 */
-+	ret = bch2_bucket_alloc_set(c, ptrs, &wp->stripe, &devs,
-+				nr_replicas, nr_effective, have_cache,
-+				reserve, flags, cl);
-+	if (ret && ret != INSUFFICIENT_DEVICES && !cl && _cl) {
-+		cl = _cl;
-+		goto retry_blocking;
-+	}
-+
-+	rcu_read_unlock();
-+	percpu_up_read(&c->mark_lock);
-+
-+	return ret;
-+}
-+
-+void bch2_open_buckets_stop_dev(struct bch_fs *c, struct bch_dev *ca,
-+				struct open_buckets *obs)
-+{
-+	struct open_buckets ptrs = { .nr = 0 };
-+	struct open_bucket *ob, *ob2;
-+	unsigned i, j;
-+
-+	open_bucket_for_each(c, obs, ob, i) {
-+		bool drop = !ca || ob->ptr.dev == ca->dev_idx;
-+
-+		if (!drop && ob->ec) {
-+			mutex_lock(&ob->ec->lock);
-+			open_bucket_for_each(c, &ob->ec->blocks, ob2, j)
-+				drop |= ob2->ptr.dev == ca->dev_idx;
-+			open_bucket_for_each(c, &ob->ec->parity, ob2, j)
-+				drop |= ob2->ptr.dev == ca->dev_idx;
-+			mutex_unlock(&ob->ec->lock);
-+		}
-+
-+		if (drop)
-+			bch2_open_bucket_put(c, ob);
-+		else
-+			ob_push(c, &ptrs, ob);
-+	}
-+
-+	*obs = ptrs;
-+}
-+
-+void bch2_writepoint_stop(struct bch_fs *c, struct bch_dev *ca,
-+			  struct write_point *wp)
-+{
-+	mutex_lock(&wp->lock);
-+	bch2_open_buckets_stop_dev(c, ca, &wp->ptrs);
-+	mutex_unlock(&wp->lock);
-+}
-+
-+static inline struct hlist_head *writepoint_hash(struct bch_fs *c,
-+						 unsigned long write_point)
-+{
-+	unsigned hash =
-+		hash_long(write_point, ilog2(ARRAY_SIZE(c->write_points_hash)));
-+
-+	return &c->write_points_hash[hash];
-+}
-+
-+static struct write_point *__writepoint_find(struct hlist_head *head,
-+					     unsigned long write_point)
-+{
-+	struct write_point *wp;
-+
-+	hlist_for_each_entry_rcu(wp, head, node)
-+		if (wp->write_point == write_point)
-+			return wp;
-+
-+	return NULL;
-+}
-+
-+static inline bool too_many_writepoints(struct bch_fs *c, unsigned factor)
-+{
-+	u64 stranded	= c->write_points_nr * c->bucket_size_max;
-+	u64 free	= bch2_fs_usage_read_short(c).free;
-+
-+	return stranded * factor > free;
-+}
-+
-+static bool try_increase_writepoints(struct bch_fs *c)
-+{
-+	struct write_point *wp;
-+
-+	if (c->write_points_nr == ARRAY_SIZE(c->write_points) ||
-+	    too_many_writepoints(c, 32))
-+		return false;
-+
-+	wp = c->write_points + c->write_points_nr++;
-+	hlist_add_head_rcu(&wp->node, writepoint_hash(c, wp->write_point));
-+	return true;
-+}
-+
-+static bool try_decrease_writepoints(struct bch_fs *c,
-+				     unsigned old_nr)
-+{
-+	struct write_point *wp;
-+
-+	mutex_lock(&c->write_points_hash_lock);
-+	if (c->write_points_nr < old_nr) {
-+		mutex_unlock(&c->write_points_hash_lock);
-+		return true;
-+	}
-+
-+	if (c->write_points_nr == 1 ||
-+	    !too_many_writepoints(c, 8)) {
-+		mutex_unlock(&c->write_points_hash_lock);
-+		return false;
-+	}
-+
-+	wp = c->write_points + --c->write_points_nr;
-+
-+	hlist_del_rcu(&wp->node);
-+	mutex_unlock(&c->write_points_hash_lock);
-+
-+	bch2_writepoint_stop(c, NULL, wp);
-+	return true;
-+}
-+
-+static struct write_point *writepoint_find(struct bch_fs *c,
-+					   unsigned long write_point)
-+{
-+	struct write_point *wp, *oldest;
-+	struct hlist_head *head;
-+
-+	if (!(write_point & 1UL)) {
-+		wp = (struct write_point *) write_point;
-+		mutex_lock(&wp->lock);
-+		return wp;
-+	}
-+
-+	head = writepoint_hash(c, write_point);
-+restart_find:
-+	wp = __writepoint_find(head, write_point);
-+	if (wp) {
-+lock_wp:
-+		mutex_lock(&wp->lock);
-+		if (wp->write_point == write_point)
-+			goto out;
-+		mutex_unlock(&wp->lock);
-+		goto restart_find;
-+	}
-+restart_find_oldest:
-+	oldest = NULL;
-+	for (wp = c->write_points;
-+	     wp < c->write_points + c->write_points_nr; wp++)
-+		if (!oldest || time_before64(wp->last_used, oldest->last_used))
-+			oldest = wp;
-+
-+	mutex_lock(&oldest->lock);
-+	mutex_lock(&c->write_points_hash_lock);
-+	if (oldest >= c->write_points + c->write_points_nr ||
-+	    try_increase_writepoints(c)) {
-+		mutex_unlock(&c->write_points_hash_lock);
-+		mutex_unlock(&oldest->lock);
-+		goto restart_find_oldest;
-+	}
-+
-+	wp = __writepoint_find(head, write_point);
-+	if (wp && wp != oldest) {
-+		mutex_unlock(&c->write_points_hash_lock);
-+		mutex_unlock(&oldest->lock);
-+		goto lock_wp;
-+	}
-+
-+	wp = oldest;
-+	hlist_del_rcu(&wp->node);
-+	wp->write_point = write_point;
-+	hlist_add_head_rcu(&wp->node, head);
-+	mutex_unlock(&c->write_points_hash_lock);
-+out:
-+	wp->last_used = sched_clock();
-+	return wp;
-+}
-+
-+/*
-+ * Get us an open_bucket we can allocate from, return with it locked:
-+ */
-+struct write_point *bch2_alloc_sectors_start(struct bch_fs *c,
-+				unsigned target,
-+				unsigned erasure_code,
-+				struct write_point_specifier write_point,
-+				struct bch_devs_list *devs_have,
-+				unsigned nr_replicas,
-+				unsigned nr_replicas_required,
-+				enum alloc_reserve reserve,
-+				unsigned flags,
-+				struct closure *cl)
-+{
-+	struct write_point *wp;
-+	struct open_bucket *ob;
-+	struct open_buckets ptrs;
-+	unsigned nr_effective, write_points_nr;
-+	unsigned ob_flags = 0;
-+	bool have_cache;
-+	enum bucket_alloc_ret ret;
-+	int i;
-+
-+	if (!(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS))
-+		ob_flags |= BUCKET_ALLOC_USE_DURABILITY;
-+
-+	BUG_ON(!nr_replicas || !nr_replicas_required);
-+retry:
-+	ptrs.nr		= 0;
-+	nr_effective	= 0;
-+	write_points_nr = c->write_points_nr;
-+	have_cache	= false;
-+
-+	wp = writepoint_find(c, write_point.v);
-+
-+	if (wp->type == BCH_DATA_user)
-+		ob_flags |= BUCKET_MAY_ALLOC_PARTIAL;
-+
-+	/* metadata may not allocate on cache devices: */
-+	if (wp->type != BCH_DATA_user)
-+		have_cache = true;
-+
-+	if (!target || (flags & BCH_WRITE_ONLY_SPECIFIED_DEVS)) {
-+		ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have,
-+					      target, erasure_code,
-+					      nr_replicas, &nr_effective,
-+					      &have_cache, reserve,
-+					      ob_flags, cl);
-+	} else {
-+		ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have,
-+					      target, erasure_code,
-+					      nr_replicas, &nr_effective,
-+					      &have_cache, reserve,
-+					      ob_flags, NULL);
-+		if (!ret)
-+			goto alloc_done;
-+
-+		ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have,
-+					      0, erasure_code,
-+					      nr_replicas, &nr_effective,
-+					      &have_cache, reserve,
-+					      ob_flags, cl);
-+	}
-+alloc_done:
-+	BUG_ON(!ret && nr_effective < nr_replicas);
-+
-+	if (erasure_code && !ec_open_bucket(c, &ptrs))
-+		pr_debug("failed to get ec bucket: ret %u", ret);
-+
-+	if (ret == INSUFFICIENT_DEVICES &&
-+	    nr_effective >= nr_replicas_required)
-+		ret = 0;
-+
-+	if (ret)
-+		goto err;
-+
-+	/* Free buckets we didn't use: */
-+	open_bucket_for_each(c, &wp->ptrs, ob, i)
-+		open_bucket_free_unused(c, wp, ob);
-+
-+	wp->ptrs = ptrs;
-+
-+	wp->sectors_free = UINT_MAX;
-+
-+	open_bucket_for_each(c, &wp->ptrs, ob, i)
-+		wp->sectors_free = min(wp->sectors_free, ob->sectors_free);
-+
-+	BUG_ON(!wp->sectors_free || wp->sectors_free == UINT_MAX);
-+
-+	verify_not_stale(c, &wp->ptrs);
-+
-+	return wp;
-+err:
-+	open_bucket_for_each(c, &wp->ptrs, ob, i)
-+		if (ptrs.nr < ARRAY_SIZE(ptrs.v))
-+			ob_push(c, &ptrs, ob);
-+		else
-+			open_bucket_free_unused(c, wp, ob);
-+	wp->ptrs = ptrs;
-+
-+	mutex_unlock(&wp->lock);
-+
-+	if (ret == FREELIST_EMPTY &&
-+	    try_decrease_writepoints(c, write_points_nr))
-+		goto retry;
-+
-+	switch (ret) {
-+	case OPEN_BUCKETS_EMPTY:
-+	case FREELIST_EMPTY:
-+		return cl ? ERR_PTR(-EAGAIN) : ERR_PTR(-ENOSPC);
-+	case INSUFFICIENT_DEVICES:
-+		return ERR_PTR(-EROFS);
-+	default:
-+		BUG();
-+	}
-+}
-+
-+/*
-+ * Append pointers to the space we just allocated to @k, and mark @sectors space
-+ * as allocated out of @ob
-+ */
-+void bch2_alloc_sectors_append_ptrs(struct bch_fs *c, struct write_point *wp,
-+				    struct bkey_i *k, unsigned sectors)
-+
-+{
-+	struct open_bucket *ob;
-+	unsigned i;
-+
-+	BUG_ON(sectors > wp->sectors_free);
-+	wp->sectors_free -= sectors;
-+
-+	open_bucket_for_each(c, &wp->ptrs, ob, i) {
-+		struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
-+		struct bch_extent_ptr tmp = ob->ptr;
-+
-+		tmp.cached = !ca->mi.durability &&
-+			wp->type == BCH_DATA_user;
-+
-+		tmp.offset += ca->mi.bucket_size - ob->sectors_free;
-+		bch2_bkey_append_ptr(k, tmp);
-+
-+		BUG_ON(sectors > ob->sectors_free);
-+		ob->sectors_free -= sectors;
-+	}
-+}
-+
-+/*
-+ * Append pointers to the space we just allocated to @k, and mark @sectors space
-+ * as allocated out of @ob
-+ */
-+void bch2_alloc_sectors_done(struct bch_fs *c, struct write_point *wp)
-+{
-+	struct open_buckets ptrs = { .nr = 0 }, keep = { .nr = 0 };
-+	struct open_bucket *ob;
-+	unsigned i;
-+
-+	open_bucket_for_each(c, &wp->ptrs, ob, i)
-+		ob_push(c, !ob->sectors_free ? &ptrs : &keep, ob);
-+	wp->ptrs = keep;
-+
-+	mutex_unlock(&wp->lock);
-+
-+	bch2_open_buckets_put(c, &ptrs);
-+}
-+
-+static inline void writepoint_init(struct write_point *wp,
-+				   enum bch_data_type type)
-+{
-+	mutex_init(&wp->lock);
-+	wp->type = type;
-+}
-+
-+void bch2_fs_allocator_foreground_init(struct bch_fs *c)
-+{
-+	struct open_bucket *ob;
-+	struct write_point *wp;
-+
-+	mutex_init(&c->write_points_hash_lock);
-+	c->write_points_nr = ARRAY_SIZE(c->write_points);
-+
-+	/* open bucket 0 is a sentinal NULL: */
-+	spin_lock_init(&c->open_buckets[0].lock);
-+
-+	for (ob = c->open_buckets + 1;
-+	     ob < c->open_buckets + ARRAY_SIZE(c->open_buckets); ob++) {
-+		spin_lock_init(&ob->lock);
-+		c->open_buckets_nr_free++;
-+
-+		ob->freelist = c->open_buckets_freelist;
-+		c->open_buckets_freelist = ob - c->open_buckets;
-+	}
-+
-+	writepoint_init(&c->btree_write_point,		BCH_DATA_btree);
-+	writepoint_init(&c->rebalance_write_point,	BCH_DATA_user);
-+	writepoint_init(&c->copygc_write_point,		BCH_DATA_user);
-+
-+	for (wp = c->write_points;
-+	     wp < c->write_points + c->write_points_nr; wp++) {
-+		writepoint_init(wp, BCH_DATA_user);
-+
-+		wp->last_used	= sched_clock();
-+		wp->write_point	= (unsigned long) wp;
-+		hlist_add_head_rcu(&wp->node,
-+				   writepoint_hash(c, wp->write_point));
-+	}
-+}
-diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h
-new file mode 100644
-index 000000000000..c658295cb8e0
---- /dev/null
-+++ b/fs/bcachefs/alloc_foreground.h
-@@ -0,0 +1,138 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_ALLOC_FOREGROUND_H
-+#define _BCACHEFS_ALLOC_FOREGROUND_H
-+
-+#include "bcachefs.h"
-+#include "alloc_types.h"
-+
-+#include <linux/hash.h>
-+
-+struct bkey;
-+struct bch_dev;
-+struct bch_fs;
-+struct bch_devs_List;
-+
-+enum bucket_alloc_ret {
-+	ALLOC_SUCCESS,
-+	OPEN_BUCKETS_EMPTY,
-+	FREELIST_EMPTY,		/* Allocator thread not keeping up */
-+	INSUFFICIENT_DEVICES,
-+};
-+
-+struct dev_alloc_list {
-+	unsigned	nr;
-+	u8		devs[BCH_SB_MEMBERS_MAX];
-+};
-+
-+struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *,
-+					  struct dev_stripe_state *,
-+					  struct bch_devs_mask *);
-+void bch2_dev_stripe_increment(struct bch_dev *, struct dev_stripe_state *);
-+
-+long bch2_bucket_alloc_new_fs(struct bch_dev *);
-+
-+struct open_bucket *bch2_bucket_alloc(struct bch_fs *, struct bch_dev *,
-+				      enum alloc_reserve, bool,
-+				      struct closure *);
-+
-+static inline void ob_push(struct bch_fs *c, struct open_buckets *obs,
-+			   struct open_bucket *ob)
-+{
-+	BUG_ON(obs->nr >= ARRAY_SIZE(obs->v));
-+
-+	obs->v[obs->nr++] = ob - c->open_buckets;
-+}
-+
-+#define open_bucket_for_each(_c, _obs, _ob, _i)				\
-+	for ((_i) = 0;							\
-+	     (_i) < (_obs)->nr &&					\
-+	     ((_ob) = (_c)->open_buckets + (_obs)->v[_i], true);	\
-+	     (_i)++)
-+
-+static inline struct open_bucket *ec_open_bucket(struct bch_fs *c,
-+						 struct open_buckets *obs)
-+{
-+	struct open_bucket *ob;
-+	unsigned i;
-+
-+	open_bucket_for_each(c, obs, ob, i)
-+		if (ob->ec)
-+			return ob;
-+
-+	return NULL;
-+}
-+
-+void bch2_open_bucket_write_error(struct bch_fs *,
-+			struct open_buckets *, unsigned);
-+
-+void __bch2_open_bucket_put(struct bch_fs *, struct open_bucket *);
-+
-+static inline void bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
-+{
-+	if (atomic_dec_and_test(&ob->pin))
-+		__bch2_open_bucket_put(c, ob);
-+}
-+
-+static inline void bch2_open_buckets_put(struct bch_fs *c,
-+					 struct open_buckets *ptrs)
-+{
-+	struct open_bucket *ob;
-+	unsigned i;
-+
-+	open_bucket_for_each(c, ptrs, ob, i)
-+		bch2_open_bucket_put(c, ob);
-+	ptrs->nr = 0;
-+}
-+
-+static inline void bch2_open_bucket_get(struct bch_fs *c,
-+					struct write_point *wp,
-+					struct open_buckets *ptrs)
-+{
-+	struct open_bucket *ob;
-+	unsigned i;
-+
-+	open_bucket_for_each(c, &wp->ptrs, ob, i) {
-+		ob->type = wp->type;
-+		atomic_inc(&ob->pin);
-+		ob_push(c, ptrs, ob);
-+	}
-+}
-+
-+enum bucket_alloc_ret
-+bch2_bucket_alloc_set(struct bch_fs *, struct open_buckets *,
-+		      struct dev_stripe_state *, struct bch_devs_mask *,
-+		      unsigned, unsigned *, bool *, enum alloc_reserve,
-+		      unsigned, struct closure *);
-+
-+struct write_point *bch2_alloc_sectors_start(struct bch_fs *,
-+					     unsigned, unsigned,
-+					     struct write_point_specifier,
-+					     struct bch_devs_list *,
-+					     unsigned, unsigned,
-+					     enum alloc_reserve,
-+					     unsigned,
-+					     struct closure *);
-+
-+void bch2_alloc_sectors_append_ptrs(struct bch_fs *, struct write_point *,
-+				    struct bkey_i *, unsigned);
-+void bch2_alloc_sectors_done(struct bch_fs *, struct write_point *);
-+
-+void bch2_open_buckets_stop_dev(struct bch_fs *, struct bch_dev *,
-+				struct open_buckets *);
-+
-+void bch2_writepoint_stop(struct bch_fs *, struct bch_dev *,
-+			  struct write_point *);
-+
-+static inline struct write_point_specifier writepoint_hashed(unsigned long v)
-+{
-+	return (struct write_point_specifier) { .v = v | 1 };
-+}
-+
-+static inline struct write_point_specifier writepoint_ptr(struct write_point *wp)
-+{
-+	return (struct write_point_specifier) { .v = (unsigned long) wp };
-+}
-+
-+void bch2_fs_allocator_foreground_init(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_ALLOC_FOREGROUND_H */
-diff --git a/fs/bcachefs/alloc_types.h b/fs/bcachefs/alloc_types.h
-new file mode 100644
-index 000000000000..20705460bb0a
---- /dev/null
-+++ b/fs/bcachefs/alloc_types.h
-@@ -0,0 +1,113 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_ALLOC_TYPES_H
-+#define _BCACHEFS_ALLOC_TYPES_H
-+
-+#include <linux/mutex.h>
-+#include <linux/spinlock.h>
-+
-+#include "clock_types.h"
-+#include "fifo.h"
-+
-+struct ec_bucket_buf;
-+
-+/* There's two of these clocks, one for reads and one for writes: */
-+struct bucket_clock {
-+	/*
-+	 * "now" in (read/write) IO time - incremented whenever we do X amount
-+	 * of reads or writes.
-+	 *
-+	 * Goes with the bucket read/write prios: when we read or write to a
-+	 * bucket we reset the bucket's prio to the current hand; thus hand -
-+	 * prio = time since bucket was last read/written.
-+	 *
-+	 * The units are some amount (bytes/sectors) of data read/written, and
-+	 * the units can change on the fly if we need to rescale to fit
-+	 * everything in a u16 - your only guarantee is that the units are
-+	 * consistent.
-+	 */
-+	u16			hand;
-+	u16			max_last_io;
-+
-+	int			rw;
-+
-+	struct io_timer		rescale;
-+	struct mutex		lock;
-+};
-+
-+/* There is one reserve for each type of btree, one for prios and gens
-+ * and one for moving GC */
-+enum alloc_reserve {
-+	RESERVE_ALLOC		= -1,
-+	RESERVE_BTREE		= 0,
-+	RESERVE_MOVINGGC	= 1,
-+	RESERVE_NONE		= 2,
-+	RESERVE_NR		= 3,
-+};
-+
-+typedef FIFO(long)	alloc_fifo;
-+
-+#define OPEN_BUCKETS_COUNT	1024
-+
-+#define WRITE_POINT_HASH_NR	32
-+#define WRITE_POINT_MAX		32
-+
-+typedef u16			open_bucket_idx_t;
-+
-+struct open_bucket {
-+	spinlock_t		lock;
-+	atomic_t		pin;
-+	open_bucket_idx_t	freelist;
-+
-+	/*
-+	 * When an open bucket has an ec_stripe attached, this is the index of
-+	 * the block in the stripe this open_bucket corresponds to:
-+	 */
-+	u8			ec_idx;
-+	u8			type;
-+	unsigned		valid:1;
-+	unsigned		on_partial_list:1;
-+	int			alloc_reserve:3;
-+	unsigned		sectors_free;
-+	struct bch_extent_ptr	ptr;
-+	struct ec_stripe_new	*ec;
-+};
-+
-+#define OPEN_BUCKET_LIST_MAX	15
-+
-+struct open_buckets {
-+	open_bucket_idx_t	nr;
-+	open_bucket_idx_t	v[OPEN_BUCKET_LIST_MAX];
-+};
-+
-+struct dev_stripe_state {
-+	u64			next_alloc[BCH_SB_MEMBERS_MAX];
-+};
-+
-+struct write_point {
-+	struct hlist_node	node;
-+	struct mutex		lock;
-+	u64			last_used;
-+	unsigned long		write_point;
-+	enum bch_data_type	type;
-+	bool			is_ec;
-+
-+	/* calculated based on how many pointers we're actually going to use: */
-+	unsigned		sectors_free;
-+
-+	struct open_buckets	ptrs;
-+	struct dev_stripe_state	stripe;
-+};
-+
-+struct write_point_specifier {
-+	unsigned long		v;
-+};
-+
-+struct alloc_heap_entry {
-+	size_t			bucket;
-+	size_t			nr;
-+	unsigned long		key;
-+};
-+
-+typedef HEAP(struct alloc_heap_entry) alloc_heap;
-+
-+#endif /* _BCACHEFS_ALLOC_TYPES_H */
-diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
-new file mode 100644
-index 000000000000..29f411635f29
---- /dev/null
-+++ b/fs/bcachefs/bcachefs.h
-@@ -0,0 +1,882 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_H
-+#define _BCACHEFS_H
-+
-+/*
-+ * SOME HIGH LEVEL CODE DOCUMENTATION:
-+ *
-+ * Bcache mostly works with cache sets, cache devices, and backing devices.
-+ *
-+ * Support for multiple cache devices hasn't quite been finished off yet, but
-+ * it's about 95% plumbed through. A cache set and its cache devices is sort of
-+ * like a md raid array and its component devices. Most of the code doesn't care
-+ * about individual cache devices, the main abstraction is the cache set.
-+ *
-+ * Multiple cache devices is intended to give us the ability to mirror dirty
-+ * cached data and metadata, without mirroring clean cached data.
-+ *
-+ * Backing devices are different, in that they have a lifetime independent of a
-+ * cache set. When you register a newly formatted backing device it'll come up
-+ * in passthrough mode, and then you can attach and detach a backing device from
-+ * a cache set at runtime - while it's mounted and in use. Detaching implicitly
-+ * invalidates any cached data for that backing device.
-+ *
-+ * A cache set can have multiple (many) backing devices attached to it.
-+ *
-+ * There's also flash only volumes - this is the reason for the distinction
-+ * between struct cached_dev and struct bcache_device. A flash only volume
-+ * works much like a bcache device that has a backing device, except the
-+ * "cached" data is always dirty. The end result is that we get thin
-+ * provisioning with very little additional code.
-+ *
-+ * Flash only volumes work but they're not production ready because the moving
-+ * garbage collector needs more work. More on that later.
-+ *
-+ * BUCKETS/ALLOCATION:
-+ *
-+ * Bcache is primarily designed for caching, which means that in normal
-+ * operation all of our available space will be allocated. Thus, we need an
-+ * efficient way of deleting things from the cache so we can write new things to
-+ * it.
-+ *
-+ * To do this, we first divide the cache device up into buckets. A bucket is the
-+ * unit of allocation; they're typically around 1 mb - anywhere from 128k to 2M+
-+ * works efficiently.
-+ *
-+ * Each bucket has a 16 bit priority, and an 8 bit generation associated with
-+ * it. The gens and priorities for all the buckets are stored contiguously and
-+ * packed on disk (in a linked list of buckets - aside from the superblock, all
-+ * of bcache's metadata is stored in buckets).
-+ *
-+ * The priority is used to implement an LRU. We reset a bucket's priority when
-+ * we allocate it or on cache it, and every so often we decrement the priority
-+ * of each bucket. It could be used to implement something more sophisticated,
-+ * if anyone ever gets around to it.
-+ *
-+ * The generation is used for invalidating buckets. Each pointer also has an 8
-+ * bit generation embedded in it; for a pointer to be considered valid, its gen
-+ * must match the gen of the bucket it points into.  Thus, to reuse a bucket all
-+ * we have to do is increment its gen (and write its new gen to disk; we batch
-+ * this up).
-+ *
-+ * Bcache is entirely COW - we never write twice to a bucket, even buckets that
-+ * contain metadata (including btree nodes).
-+ *
-+ * THE BTREE:
-+ *
-+ * Bcache is in large part design around the btree.
-+ *
-+ * At a high level, the btree is just an index of key -> ptr tuples.
-+ *
-+ * Keys represent extents, and thus have a size field. Keys also have a variable
-+ * number of pointers attached to them (potentially zero, which is handy for
-+ * invalidating the cache).
-+ *
-+ * The key itself is an inode:offset pair. The inode number corresponds to a
-+ * backing device or a flash only volume. The offset is the ending offset of the
-+ * extent within the inode - not the starting offset; this makes lookups
-+ * slightly more convenient.
-+ *
-+ * Pointers contain the cache device id, the offset on that device, and an 8 bit
-+ * generation number. More on the gen later.
-+ *
-+ * Index lookups are not fully abstracted - cache lookups in particular are
-+ * still somewhat mixed in with the btree code, but things are headed in that
-+ * direction.
-+ *
-+ * Updates are fairly well abstracted, though. There are two different ways of
-+ * updating the btree; insert and replace.
-+ *
-+ * BTREE_INSERT will just take a list of keys and insert them into the btree -
-+ * overwriting (possibly only partially) any extents they overlap with. This is
-+ * used to update the index after a write.
-+ *
-+ * BTREE_REPLACE is really cmpxchg(); it inserts a key into the btree iff it is
-+ * overwriting a key that matches another given key. This is used for inserting
-+ * data into the cache after a cache miss, and for background writeback, and for
-+ * the moving garbage collector.
-+ *
-+ * There is no "delete" operation; deleting things from the index is
-+ * accomplished by either by invalidating pointers (by incrementing a bucket's
-+ * gen) or by inserting a key with 0 pointers - which will overwrite anything
-+ * previously present at that location in the index.
-+ *
-+ * This means that there are always stale/invalid keys in the btree. They're
-+ * filtered out by the code that iterates through a btree node, and removed when
-+ * a btree node is rewritten.
-+ *
-+ * BTREE NODES:
-+ *
-+ * Our unit of allocation is a bucket, and we we can't arbitrarily allocate and
-+ * free smaller than a bucket - so, that's how big our btree nodes are.
-+ *
-+ * (If buckets are really big we'll only use part of the bucket for a btree node
-+ * - no less than 1/4th - but a bucket still contains no more than a single
-+ * btree node. I'd actually like to change this, but for now we rely on the
-+ * bucket's gen for deleting btree nodes when we rewrite/split a node.)
-+ *
-+ * Anyways, btree nodes are big - big enough to be inefficient with a textbook
-+ * btree implementation.
-+ *
-+ * The way this is solved is that btree nodes are internally log structured; we
-+ * can append new keys to an existing btree node without rewriting it. This
-+ * means each set of keys we write is sorted, but the node is not.
-+ *
-+ * We maintain this log structure in memory - keeping 1Mb of keys sorted would
-+ * be expensive, and we have to distinguish between the keys we have written and
-+ * the keys we haven't. So to do a lookup in a btree node, we have to search
-+ * each sorted set. But we do merge written sets together lazily, so the cost of
-+ * these extra searches is quite low (normally most of the keys in a btree node
-+ * will be in one big set, and then there'll be one or two sets that are much
-+ * smaller).
-+ *
-+ * This log structure makes bcache's btree more of a hybrid between a
-+ * conventional btree and a compacting data structure, with some of the
-+ * advantages of both.
-+ *
-+ * GARBAGE COLLECTION:
-+ *
-+ * We can't just invalidate any bucket - it might contain dirty data or
-+ * metadata. If it once contained dirty data, other writes might overwrite it
-+ * later, leaving no valid pointers into that bucket in the index.
-+ *
-+ * Thus, the primary purpose of garbage collection is to find buckets to reuse.
-+ * It also counts how much valid data it each bucket currently contains, so that
-+ * allocation can reuse buckets sooner when they've been mostly overwritten.
-+ *
-+ * It also does some things that are really internal to the btree
-+ * implementation. If a btree node contains pointers that are stale by more than
-+ * some threshold, it rewrites the btree node to avoid the bucket's generation
-+ * wrapping around. It also merges adjacent btree nodes if they're empty enough.
-+ *
-+ * THE JOURNAL:
-+ *
-+ * Bcache's journal is not necessary for consistency; we always strictly
-+ * order metadata writes so that the btree and everything else is consistent on
-+ * disk in the event of an unclean shutdown, and in fact bcache had writeback
-+ * caching (with recovery from unclean shutdown) before journalling was
-+ * implemented.
-+ *
-+ * Rather, the journal is purely a performance optimization; we can't complete a
-+ * write until we've updated the index on disk, otherwise the cache would be
-+ * inconsistent in the event of an unclean shutdown. This means that without the
-+ * journal, on random write workloads we constantly have to update all the leaf
-+ * nodes in the btree, and those writes will be mostly empty (appending at most
-+ * a few keys each) - highly inefficient in terms of amount of metadata writes,
-+ * and it puts more strain on the various btree resorting/compacting code.
-+ *
-+ * The journal is just a log of keys we've inserted; on startup we just reinsert
-+ * all the keys in the open journal entries. That means that when we're updating
-+ * a node in the btree, we can wait until a 4k block of keys fills up before
-+ * writing them out.
-+ *
-+ * For simplicity, we only journal updates to leaf nodes; updates to parent
-+ * nodes are rare enough (since our leaf nodes are huge) that it wasn't worth
-+ * the complexity to deal with journalling them (in particular, journal replay)
-+ * - updates to non leaf nodes just happen synchronously (see btree_split()).
-+ */
-+
-+#undef pr_fmt
-+#define pr_fmt(fmt) "bcachefs: %s() " fmt "\n", __func__
-+
-+#include <linux/bug.h>
-+#include <linux/bio.h>
-+#include <linux/closure.h>
-+#include <linux/kobject.h>
-+#include <linux/list.h>
-+#include <linux/math64.h>
-+#include <linux/mutex.h>
-+#include <linux/percpu-refcount.h>
-+#include <linux/percpu-rwsem.h>
-+#include <linux/rhashtable.h>
-+#include <linux/rwsem.h>
-+#include <linux/semaphore.h>
-+#include <linux/seqlock.h>
-+#include <linux/shrinker.h>
-+#include <linux/types.h>
-+#include <linux/workqueue.h>
-+#include <linux/zstd.h>
-+
-+#include "bcachefs_format.h"
-+#include "fifo.h"
-+#include "opts.h"
-+#include "util.h"
-+
-+#define dynamic_fault(...)		0
-+#define race_fault(...)			0
-+
-+#define bch2_fs_init_fault(name)					\
-+	dynamic_fault("bcachefs:bch_fs_init:" name)
-+#define bch2_meta_read_fault(name)					\
-+	 dynamic_fault("bcachefs:meta:read:" name)
-+#define bch2_meta_write_fault(name)					\
-+	 dynamic_fault("bcachefs:meta:write:" name)
-+
-+#ifdef __KERNEL__
-+#define bch2_fmt(_c, fmt)	"bcachefs (%s): " fmt "\n", ((_c)->name)
-+#else
-+#define bch2_fmt(_c, fmt)	fmt "\n"
-+#endif
-+
-+#define bch_info(c, fmt, ...) \
-+	printk(KERN_INFO bch2_fmt(c, fmt), ##__VA_ARGS__)
-+#define bch_notice(c, fmt, ...) \
-+	printk(KERN_NOTICE bch2_fmt(c, fmt), ##__VA_ARGS__)
-+#define bch_warn(c, fmt, ...) \
-+	printk(KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__)
-+#define bch_warn_ratelimited(c, fmt, ...) \
-+	printk_ratelimited(KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__)
-+#define bch_err(c, fmt, ...) \
-+	printk(KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__)
-+#define bch_err_ratelimited(c, fmt, ...) \
-+	printk_ratelimited(KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__)
-+
-+#define bch_verbose(c, fmt, ...)					\
-+do {									\
-+	if ((c)->opts.verbose)						\
-+		bch_info(c, fmt, ##__VA_ARGS__);			\
-+} while (0)
-+
-+#define pr_verbose_init(opts, fmt, ...)					\
-+do {									\
-+	if (opt_get(opts, verbose))					\
-+		pr_info(fmt, ##__VA_ARGS__);				\
-+} while (0)
-+
-+/* Parameters that are useful for debugging, but should always be compiled in: */
-+#define BCH_DEBUG_PARAMS_ALWAYS()					\
-+	BCH_DEBUG_PARAM(key_merging_disabled,				\
-+		"Disables merging of extents")				\
-+	BCH_DEBUG_PARAM(btree_gc_always_rewrite,			\
-+		"Causes mark and sweep to compact and rewrite every "	\
-+		"btree node it traverses")				\
-+	BCH_DEBUG_PARAM(btree_gc_rewrite_disabled,			\
-+		"Disables rewriting of btree nodes during mark and sweep")\
-+	BCH_DEBUG_PARAM(btree_shrinker_disabled,			\
-+		"Disables the shrinker callback for the btree node cache")
-+
-+/* Parameters that should only be compiled in in debug mode: */
-+#define BCH_DEBUG_PARAMS_DEBUG()					\
-+	BCH_DEBUG_PARAM(expensive_debug_checks,				\
-+		"Enables various runtime debugging checks that "	\
-+		"significantly affect performance")			\
-+	BCH_DEBUG_PARAM(debug_check_iterators,				\
-+		"Enables extra verification for btree iterators")	\
-+	BCH_DEBUG_PARAM(debug_check_bkeys,				\
-+		"Run bkey_debugcheck (primarily checking GC/allocation "\
-+		"information) when iterating over keys")		\
-+	BCH_DEBUG_PARAM(verify_btree_ondisk,				\
-+		"Reread btree nodes at various points to verify the "	\
-+		"mergesort in the read path against modifications "	\
-+		"done in memory")					\
-+	BCH_DEBUG_PARAM(journal_seq_verify,				\
-+		"Store the journal sequence number in the version "	\
-+		"number of every btree key, and verify that btree "	\
-+		"update ordering is preserved during recovery")		\
-+	BCH_DEBUG_PARAM(inject_invalid_keys,				\
-+		"Store the journal sequence number in the version "	\
-+		"number of every btree key, and verify that btree "	\
-+		"update ordering is preserved during recovery")		\
-+	BCH_DEBUG_PARAM(test_alloc_startup,				\
-+		"Force allocator startup to use the slowpath where it"	\
-+		"can't find enough free buckets without invalidating"	\
-+		"cached data")						\
-+	BCH_DEBUG_PARAM(force_reconstruct_read,				\
-+		"Force reads to use the reconstruct path, when reading"	\
-+		"from erasure coded extents")				\
-+	BCH_DEBUG_PARAM(test_restart_gc,				\
-+		"Test restarting mark and sweep gc when bucket gens change")
-+
-+#define BCH_DEBUG_PARAMS_ALL() BCH_DEBUG_PARAMS_ALWAYS() BCH_DEBUG_PARAMS_DEBUG()
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+#define BCH_DEBUG_PARAMS() BCH_DEBUG_PARAMS_ALL()
-+#else
-+#define BCH_DEBUG_PARAMS() BCH_DEBUG_PARAMS_ALWAYS()
-+#endif
-+
-+#define BCH_TIME_STATS()			\
-+	x(btree_node_mem_alloc)			\
-+	x(btree_node_split)			\
-+	x(btree_node_sort)			\
-+	x(btree_node_read)			\
-+	x(btree_gc)				\
-+	x(btree_lock_contended_read)		\
-+	x(btree_lock_contended_intent)		\
-+	x(btree_lock_contended_write)		\
-+	x(data_write)				\
-+	x(data_read)				\
-+	x(data_promote)				\
-+	x(journal_write)			\
-+	x(journal_delay)			\
-+	x(journal_flush_seq)			\
-+	x(blocked_journal)			\
-+	x(blocked_allocate)			\
-+	x(blocked_allocate_open_bucket)
-+
-+enum bch_time_stats {
-+#define x(name) BCH_TIME_##name,
-+	BCH_TIME_STATS()
-+#undef x
-+	BCH_TIME_STAT_NR
-+};
-+
-+#include "alloc_types.h"
-+#include "btree_types.h"
-+#include "buckets_types.h"
-+#include "clock_types.h"
-+#include "ec_types.h"
-+#include "journal_types.h"
-+#include "keylist_types.h"
-+#include "quota_types.h"
-+#include "rebalance_types.h"
-+#include "replicas_types.h"
-+#include "super_types.h"
-+
-+/* Number of nodes btree coalesce will try to coalesce at once */
-+#define GC_MERGE_NODES		4U
-+
-+/* Maximum number of nodes we might need to allocate atomically: */
-+#define BTREE_RESERVE_MAX	(BTREE_MAX_DEPTH + (BTREE_MAX_DEPTH - 1))
-+
-+/* Size of the freelist we allocate btree nodes from: */
-+#define BTREE_NODE_RESERVE	(BTREE_RESERVE_MAX * 4)
-+
-+#define BTREE_NODE_OPEN_BUCKET_RESERVE	(BTREE_RESERVE_MAX * BCH_REPLICAS_MAX)
-+
-+struct btree;
-+
-+enum gc_phase {
-+	GC_PHASE_NOT_RUNNING,
-+	GC_PHASE_START,
-+	GC_PHASE_SB,
-+
-+	GC_PHASE_BTREE_EC,
-+	GC_PHASE_BTREE_EXTENTS,
-+	GC_PHASE_BTREE_INODES,
-+	GC_PHASE_BTREE_DIRENTS,
-+	GC_PHASE_BTREE_XATTRS,
-+	GC_PHASE_BTREE_ALLOC,
-+	GC_PHASE_BTREE_QUOTAS,
-+	GC_PHASE_BTREE_REFLINK,
-+
-+	GC_PHASE_PENDING_DELETE,
-+	GC_PHASE_ALLOC,
-+};
-+
-+struct gc_pos {
-+	enum gc_phase		phase;
-+	struct bpos		pos;
-+	unsigned		level;
-+};
-+
-+struct io_count {
-+	u64			sectors[2][BCH_DATA_NR];
-+};
-+
-+struct bch_dev {
-+	struct kobject		kobj;
-+	struct percpu_ref	ref;
-+	struct completion	ref_completion;
-+	struct percpu_ref	io_ref;
-+	struct completion	io_ref_completion;
-+
-+	struct bch_fs		*fs;
-+
-+	u8			dev_idx;
-+	/*
-+	 * Cached version of this device's member info from superblock
-+	 * Committed by bch2_write_super() -> bch_fs_mi_update()
-+	 */
-+	struct bch_member_cpu	mi;
-+	uuid_le			uuid;
-+	char			name[BDEVNAME_SIZE];
-+
-+	struct bch_sb_handle	disk_sb;
-+	struct bch_sb		*sb_read_scratch;
-+	int			sb_write_error;
-+
-+	struct bch_devs_mask	self;
-+
-+	/* biosets used in cloned bios for writing multiple replicas */
-+	struct bio_set		replica_set;
-+
-+	/*
-+	 * Buckets:
-+	 * Per-bucket arrays are protected by c->mark_lock, bucket_lock and
-+	 * gc_lock, for device resize - holding any is sufficient for access:
-+	 * Or rcu_read_lock(), but only for ptr_stale():
-+	 */
-+	struct bucket_array __rcu *buckets[2];
-+	unsigned long		*buckets_nouse;
-+	struct rw_semaphore	bucket_lock;
-+
-+	struct bch_dev_usage __percpu *usage[2];
-+
-+	/* Allocator: */
-+	struct task_struct __rcu *alloc_thread;
-+
-+	/*
-+	 * free: Buckets that are ready to be used
-+	 *
-+	 * free_inc: Incoming buckets - these are buckets that currently have
-+	 * cached data in them, and we can't reuse them until after we write
-+	 * their new gen to disk. After prio_write() finishes writing the new
-+	 * gens/prios, they'll be moved to the free list (and possibly discarded
-+	 * in the process)
-+	 */
-+	alloc_fifo		free[RESERVE_NR];
-+	alloc_fifo		free_inc;
-+
-+	open_bucket_idx_t	open_buckets_partial[OPEN_BUCKETS_COUNT];
-+	open_bucket_idx_t	open_buckets_partial_nr;
-+
-+	size_t			fifo_last_bucket;
-+
-+	/* last calculated minimum prio */
-+	u16			max_last_bucket_io[2];
-+
-+	size_t			inc_gen_needs_gc;
-+	size_t			inc_gen_really_needs_gc;
-+
-+	/*
-+	 * XXX: this should be an enum for allocator state, so as to include
-+	 * error state
-+	 */
-+	enum {
-+		ALLOCATOR_STOPPED,
-+		ALLOCATOR_RUNNING,
-+		ALLOCATOR_BLOCKED,
-+		ALLOCATOR_BLOCKED_FULL,
-+	}			allocator_state;
-+
-+	alloc_heap		alloc_heap;
-+
-+	atomic64_t		rebalance_work;
-+
-+	struct journal_device	journal;
-+
-+	struct work_struct	io_error_work;
-+
-+	/* The rest of this all shows up in sysfs */
-+	atomic64_t		cur_latency[2];
-+	struct time_stats	io_latency[2];
-+
-+#define CONGESTED_MAX		1024
-+	atomic_t		congested;
-+	u64			congested_last;
-+
-+	struct io_count __percpu *io_done;
-+};
-+
-+enum {
-+	/* startup: */
-+	BCH_FS_ALLOC_READ_DONE,
-+	BCH_FS_ALLOC_CLEAN,
-+	BCH_FS_ALLOCATOR_RUNNING,
-+	BCH_FS_ALLOCATOR_STOPPING,
-+	BCH_FS_INITIAL_GC_DONE,
-+	BCH_FS_BTREE_INTERIOR_REPLAY_DONE,
-+	BCH_FS_FSCK_DONE,
-+	BCH_FS_STARTED,
-+	BCH_FS_RW,
-+
-+	/* shutdown: */
-+	BCH_FS_STOPPING,
-+	BCH_FS_EMERGENCY_RO,
-+	BCH_FS_WRITE_DISABLE_COMPLETE,
-+
-+	/* errors: */
-+	BCH_FS_ERROR,
-+	BCH_FS_ERRORS_FIXED,
-+
-+	/* misc: */
-+	BCH_FS_FIXED_GENS,
-+	BCH_FS_ALLOC_WRITTEN,
-+	BCH_FS_REBUILD_REPLICAS,
-+	BCH_FS_HOLD_BTREE_WRITES,
-+};
-+
-+struct btree_debug {
-+	unsigned		id;
-+	struct dentry		*btree;
-+	struct dentry		*btree_format;
-+	struct dentry		*failed;
-+};
-+
-+struct bch_fs_pcpu {
-+	u64			sectors_available;
-+};
-+
-+struct journal_seq_blacklist_table {
-+	size_t			nr;
-+	struct journal_seq_blacklist_table_entry {
-+		u64		start;
-+		u64		end;
-+		bool		dirty;
-+	}			entries[0];
-+};
-+
-+struct journal_keys {
-+	struct journal_key {
-+		enum btree_id	btree_id:8;
-+		unsigned	level:8;
-+		struct bkey_i	*k;
-+		u32		journal_seq;
-+		u32		journal_offset;
-+	}			*d;
-+	size_t			nr;
-+	u64			journal_seq_base;
-+};
-+
-+struct bch_fs {
-+	struct closure		cl;
-+
-+	struct list_head	list;
-+	struct kobject		kobj;
-+	struct kobject		internal;
-+	struct kobject		opts_dir;
-+	struct kobject		time_stats;
-+	unsigned long		flags;
-+
-+	int			minor;
-+	struct device		*chardev;
-+	struct super_block	*vfs_sb;
-+	char			name[40];
-+
-+	/* ro/rw, add/remove/resize devices: */
-+	struct rw_semaphore	state_lock;
-+
-+	/* Counts outstanding writes, for clean transition to read-only */
-+	struct percpu_ref	writes;
-+	struct work_struct	read_only_work;
-+
-+	struct bch_dev __rcu	*devs[BCH_SB_MEMBERS_MAX];
-+
-+	struct bch_replicas_cpu replicas;
-+	struct bch_replicas_cpu replicas_gc;
-+	struct mutex		replicas_gc_lock;
-+
-+	struct journal_entry_res replicas_journal_res;
-+
-+	struct bch_disk_groups_cpu __rcu *disk_groups;
-+
-+	struct bch_opts		opts;
-+
-+	/* Updated by bch2_sb_update():*/
-+	struct {
-+		uuid_le		uuid;
-+		uuid_le		user_uuid;
-+
-+		u16		version;
-+		u16		encoded_extent_max;
-+
-+		u8		nr_devices;
-+		u8		clean;
-+
-+		u8		encryption_type;
-+
-+		u64		time_base_lo;
-+		u32		time_base_hi;
-+		u32		time_precision;
-+		u64		features;
-+		u64		compat;
-+	}			sb;
-+
-+	struct bch_sb_handle	disk_sb;
-+
-+	unsigned short		block_bits;	/* ilog2(block_size) */
-+
-+	u16			btree_foreground_merge_threshold;
-+
-+	struct closure		sb_write;
-+	struct mutex		sb_lock;
-+
-+	/* BTREE CACHE */
-+	struct bio_set		btree_bio;
-+
-+	struct btree_root	btree_roots[BTREE_ID_NR];
-+	struct mutex		btree_root_lock;
-+
-+	struct btree_cache	btree_cache;
-+
-+	/*
-+	 * Cache of allocated btree nodes - if we allocate a btree node and
-+	 * don't use it, if we free it that space can't be reused until going
-+	 * _all_ the way through the allocator (which exposes us to a livelock
-+	 * when allocating btree reserves fail halfway through) - instead, we
-+	 * can stick them here:
-+	 */
-+	struct btree_alloc	btree_reserve_cache[BTREE_NODE_RESERVE * 2];
-+	unsigned		btree_reserve_cache_nr;
-+	struct mutex		btree_reserve_cache_lock;
-+
-+	mempool_t		btree_interior_update_pool;
-+	struct list_head	btree_interior_update_list;
-+	struct list_head	btree_interior_updates_unwritten;
-+	struct mutex		btree_interior_update_lock;
-+	struct closure_waitlist	btree_interior_update_wait;
-+
-+	struct workqueue_struct	*btree_interior_update_worker;
-+	struct work_struct	btree_interior_update_work;
-+
-+	/* btree_iter.c: */
-+	struct mutex		btree_trans_lock;
-+	struct list_head	btree_trans_list;
-+	mempool_t		btree_iters_pool;
-+
-+	struct btree_key_cache	btree_key_cache;
-+
-+	struct workqueue_struct	*wq;
-+	/* copygc needs its own workqueue for index updates.. */
-+	struct workqueue_struct	*copygc_wq;
-+	struct workqueue_struct	*journal_reclaim_wq;
-+
-+	/* ALLOCATION */
-+	struct delayed_work	pd_controllers_update;
-+	unsigned		pd_controllers_update_seconds;
-+
-+	struct bch_devs_mask	rw_devs[BCH_DATA_NR];
-+
-+	u64			capacity; /* sectors */
-+
-+	/*
-+	 * When capacity _decreases_ (due to a disk being removed), we
-+	 * increment capacity_gen - this invalidates outstanding reservations
-+	 * and forces them to be revalidated
-+	 */
-+	u32			capacity_gen;
-+	unsigned		bucket_size_max;
-+
-+	atomic64_t		sectors_available;
-+
-+	struct bch_fs_pcpu __percpu	*pcpu;
-+
-+	struct percpu_rw_semaphore	mark_lock;
-+
-+	seqcount_t			usage_lock;
-+	struct bch_fs_usage		*usage_base;
-+	struct bch_fs_usage __percpu	*usage[2];
-+	struct bch_fs_usage __percpu	*usage_gc;
-+
-+	/* single element mempool: */
-+	struct mutex		usage_scratch_lock;
-+	struct bch_fs_usage	*usage_scratch;
-+
-+	/*
-+	 * When we invalidate buckets, we use both the priority and the amount
-+	 * of good data to determine which buckets to reuse first - to weight
-+	 * those together consistently we keep track of the smallest nonzero
-+	 * priority of any bucket.
-+	 */
-+	struct bucket_clock	bucket_clock[2];
-+
-+	struct io_clock		io_clock[2];
-+
-+	/* JOURNAL SEQ BLACKLIST */
-+	struct journal_seq_blacklist_table *
-+				journal_seq_blacklist_table;
-+	struct work_struct	journal_seq_blacklist_gc_work;
-+
-+	/* ALLOCATOR */
-+	spinlock_t		freelist_lock;
-+	struct closure_waitlist	freelist_wait;
-+	u64			blocked_allocate;
-+	u64			blocked_allocate_open_bucket;
-+	open_bucket_idx_t	open_buckets_freelist;
-+	open_bucket_idx_t	open_buckets_nr_free;
-+	struct closure_waitlist	open_buckets_wait;
-+	struct open_bucket	open_buckets[OPEN_BUCKETS_COUNT];
-+
-+	struct write_point	btree_write_point;
-+	struct write_point	rebalance_write_point;
-+
-+	struct write_point	write_points[WRITE_POINT_MAX];
-+	struct hlist_head	write_points_hash[WRITE_POINT_HASH_NR];
-+	struct mutex		write_points_hash_lock;
-+	unsigned		write_points_nr;
-+
-+	/* GARBAGE COLLECTION */
-+	struct task_struct	*gc_thread;
-+	atomic_t		kick_gc;
-+	unsigned long		gc_count;
-+
-+	/*
-+	 * Tracks GC's progress - everything in the range [ZERO_KEY..gc_cur_pos]
-+	 * has been marked by GC.
-+	 *
-+	 * gc_cur_phase is a superset of btree_ids (BTREE_ID_EXTENTS etc.)
-+	 *
-+	 * Protected by gc_pos_lock. Only written to by GC thread, so GC thread
-+	 * can read without a lock.
-+	 */
-+	seqcount_t		gc_pos_lock;
-+	struct gc_pos		gc_pos;
-+
-+	/*
-+	 * The allocation code needs gc_mark in struct bucket to be correct, but
-+	 * it's not while a gc is in progress.
-+	 */
-+	struct rw_semaphore	gc_lock;
-+
-+	/* IO PATH */
-+	struct semaphore	io_in_flight;
-+	struct bio_set		bio_read;
-+	struct bio_set		bio_read_split;
-+	struct bio_set		bio_write;
-+	struct mutex		bio_bounce_pages_lock;
-+	mempool_t		bio_bounce_pages;
-+	struct rhashtable	promote_table;
-+
-+	mempool_t		compression_bounce[2];
-+	mempool_t		compress_workspace[BCH_COMPRESSION_TYPE_NR];
-+	mempool_t		decompress_workspace;
-+	ZSTD_parameters		zstd_params;
-+
-+	struct crypto_shash	*sha256;
-+	struct crypto_sync_skcipher *chacha20;
-+	struct crypto_shash	*poly1305;
-+
-+	atomic64_t		key_version;
-+
-+	mempool_t		large_bkey_pool;
-+
-+	/* REBALANCE */
-+	struct bch_fs_rebalance	rebalance;
-+
-+	/* COPYGC */
-+	struct task_struct	*copygc_thread;
-+	copygc_heap		copygc_heap;
-+	struct bch_pd_controller copygc_pd;
-+	struct write_point	copygc_write_point;
-+	u64			copygc_threshold;
-+
-+	/* STRIPES: */
-+	GENRADIX(struct stripe) stripes[2];
-+
-+	ec_stripes_heap		ec_stripes_heap;
-+	spinlock_t		ec_stripes_heap_lock;
-+
-+	/* ERASURE CODING */
-+	struct list_head	ec_stripe_head_list;
-+	struct mutex		ec_stripe_head_lock;
-+
-+	struct list_head	ec_stripe_new_list;
-+	struct mutex		ec_stripe_new_lock;
-+
-+	struct work_struct	ec_stripe_create_work;
-+	u64			ec_stripe_hint;
-+
-+	struct bio_set		ec_bioset;
-+
-+	struct work_struct	ec_stripe_delete_work;
-+	struct llist_head	ec_stripe_delete_list;
-+
-+	/* REFLINK */
-+	u64			reflink_hint;
-+
-+	/* VFS IO PATH - fs-io.c */
-+	struct bio_set		writepage_bioset;
-+	struct bio_set		dio_write_bioset;
-+	struct bio_set		dio_read_bioset;
-+
-+	struct bio_list		btree_write_error_list;
-+	struct work_struct	btree_write_error_work;
-+	spinlock_t		btree_write_error_lock;
-+
-+	/* ERRORS */
-+	struct list_head	fsck_errors;
-+	struct mutex		fsck_error_lock;
-+	bool			fsck_alloc_err;
-+
-+	/* QUOTAS */
-+	struct bch_memquota_type quotas[QTYP_NR];
-+
-+	/* DEBUG JUNK */
-+	struct dentry		*debug;
-+	struct btree_debug	btree_debug[BTREE_ID_NR];
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	struct btree		*verify_data;
-+	struct btree_node	*verify_ondisk;
-+	struct mutex		verify_lock;
-+#endif
-+
-+	u64			unused_inode_hint;
-+
-+	/*
-+	 * A btree node on disk could have too many bsets for an iterator to fit
-+	 * on the stack - have to dynamically allocate them
-+	 */
-+	mempool_t		fill_iter;
-+
-+	mempool_t		btree_bounce_pool;
-+
-+	struct journal		journal;
-+	struct list_head	journal_entries;
-+	struct journal_keys	journal_keys;
-+
-+	u64			last_bucket_seq_cleanup;
-+
-+	/* The rest of this all shows up in sysfs */
-+	atomic_long_t		read_realloc_races;
-+	atomic_long_t		extent_migrate_done;
-+	atomic_long_t		extent_migrate_raced;
-+
-+	unsigned		btree_gc_periodic:1;
-+	unsigned		copy_gc_enabled:1;
-+	bool			promote_whole_extents;
-+
-+#define BCH_DEBUG_PARAM(name, description) bool name;
-+	BCH_DEBUG_PARAMS_ALL()
-+#undef BCH_DEBUG_PARAM
-+
-+	struct time_stats	times[BCH_TIME_STAT_NR];
-+};
-+
-+static inline void bch2_set_ra_pages(struct bch_fs *c, unsigned ra_pages)
-+{
-+#ifndef NO_BCACHEFS_FS
-+	if (c->vfs_sb)
-+		c->vfs_sb->s_bdi->ra_pages = ra_pages;
-+#endif
-+}
-+
-+static inline unsigned bucket_bytes(const struct bch_dev *ca)
-+{
-+	return ca->mi.bucket_size << 9;
-+}
-+
-+static inline unsigned block_bytes(const struct bch_fs *c)
-+{
-+	return c->opts.block_size << 9;
-+}
-+
-+static inline struct timespec64 bch2_time_to_timespec(struct bch_fs *c, u64 time)
-+{
-+	return ns_to_timespec64(time * c->sb.time_precision + c->sb.time_base_lo);
-+}
-+
-+static inline s64 timespec_to_bch2_time(struct bch_fs *c, struct timespec64 ts)
-+{
-+	s64 ns = timespec64_to_ns(&ts) - c->sb.time_base_lo;
-+
-+	if (c->sb.time_precision == 1)
-+		return ns;
-+
-+	return div_s64(ns, c->sb.time_precision);
-+}
-+
-+static inline s64 bch2_current_time(struct bch_fs *c)
-+{
-+	struct timespec64 now;
-+
-+	ktime_get_coarse_real_ts64(&now);
-+	return timespec_to_bch2_time(c, now);
-+}
-+
-+static inline bool bch2_dev_exists2(const struct bch_fs *c, unsigned dev)
-+{
-+	return dev < c->sb.nr_devices && c->devs[dev];
-+}
-+
-+#endif /* _BCACHEFS_H */
-diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
-new file mode 100644
-index 000000000000..d5a2230e403c
---- /dev/null
-+++ b/fs/bcachefs/bcachefs_format.h
-@@ -0,0 +1,1671 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_FORMAT_H
-+#define _BCACHEFS_FORMAT_H
-+
-+/*
-+ * bcachefs on disk data structures
-+ *
-+ * OVERVIEW:
-+ *
-+ * There are three main types of on disk data structures in bcachefs (this is
-+ * reduced from 5 in bcache)
-+ *
-+ *  - superblock
-+ *  - journal
-+ *  - btree
-+ *
-+ * The btree is the primary structure; most metadata exists as keys in the
-+ * various btrees. There are only a small number of btrees, they're not
-+ * sharded - we have one btree for extents, another for inodes, et cetera.
-+ *
-+ * SUPERBLOCK:
-+ *
-+ * The superblock contains the location of the journal, the list of devices in
-+ * the filesystem, and in general any metadata we need in order to decide
-+ * whether we can start a filesystem or prior to reading the journal/btree
-+ * roots.
-+ *
-+ * The superblock is extensible, and most of the contents of the superblock are
-+ * in variable length, type tagged fields; see struct bch_sb_field.
-+ *
-+ * Backup superblocks do not reside in a fixed location; also, superblocks do
-+ * not have a fixed size. To locate backup superblocks we have struct
-+ * bch_sb_layout; we store a copy of this inside every superblock, and also
-+ * before the first superblock.
-+ *
-+ * JOURNAL:
-+ *
-+ * The journal primarily records btree updates in the order they occurred;
-+ * journal replay consists of just iterating over all the keys in the open
-+ * journal entries and re-inserting them into the btrees.
-+ *
-+ * The journal also contains entry types for the btree roots, and blacklisted
-+ * journal sequence numbers (see journal_seq_blacklist.c).
-+ *
-+ * BTREE:
-+ *
-+ * bcachefs btrees are copy on write b+ trees, where nodes are big (typically
-+ * 128k-256k) and log structured. We use struct btree_node for writing the first
-+ * entry in a given node (offset 0), and struct btree_node_entry for all
-+ * subsequent writes.
-+ *
-+ * After the header, btree node entries contain a list of keys in sorted order.
-+ * Values are stored inline with the keys; since values are variable length (and
-+ * keys effectively are variable length too, due to packing) we can't do random
-+ * access without building up additional in memory tables in the btree node read
-+ * path.
-+ *
-+ * BTREE KEYS (struct bkey):
-+ *
-+ * The various btrees share a common format for the key - so as to avoid
-+ * switching in fastpath lookup/comparison code - but define their own
-+ * structures for the key values.
-+ *
-+ * The size of a key/value pair is stored as a u8 in units of u64s, so the max
-+ * size is just under 2k. The common part also contains a type tag for the
-+ * value, and a format field indicating whether the key is packed or not (and
-+ * also meant to allow adding new key fields in the future, if desired).
-+ *
-+ * bkeys, when stored within a btree node, may also be packed. In that case, the
-+ * bkey_format in that node is used to unpack it. Packed bkeys mean that we can
-+ * be generous with field sizes in the common part of the key format (64 bit
-+ * inode number, 64 bit offset, 96 bit version field, etc.) for negligible cost.
-+ */
-+
-+#include <asm/types.h>
-+#include <asm/byteorder.h>
-+#include <linux/kernel.h>
-+#include <linux/uuid.h>
-+
-+#define LE_BITMASK(_bits, name, type, field, offset, end)		\
-+static const unsigned	name##_OFFSET = offset;				\
-+static const unsigned	name##_BITS = (end - offset);			\
-+static const __u##_bits	name##_MAX = (1ULL << (end - offset)) - 1;	\
-+									\
-+static inline __u64 name(const type *k)					\
-+{									\
-+	return (__le##_bits##_to_cpu(k->field) >> offset) &		\
-+		~(~0ULL << (end - offset));				\
-+}									\
-+									\
-+static inline void SET_##name(type *k, __u64 v)				\
-+{									\
-+	__u##_bits new = __le##_bits##_to_cpu(k->field);		\
-+									\
-+	new &= ~(~(~0ULL << (end - offset)) << offset);			\
-+	new |= (v & ~(~0ULL << (end - offset))) << offset;		\
-+	k->field = __cpu_to_le##_bits(new);				\
-+}
-+
-+#define LE16_BITMASK(n, t, f, o, e)	LE_BITMASK(16, n, t, f, o, e)
-+#define LE32_BITMASK(n, t, f, o, e)	LE_BITMASK(32, n, t, f, o, e)
-+#define LE64_BITMASK(n, t, f, o, e)	LE_BITMASK(64, n, t, f, o, e)
-+
-+struct bkey_format {
-+	__u8		key_u64s;
-+	__u8		nr_fields;
-+	/* One unused slot for now: */
-+	__u8		bits_per_field[6];
-+	__le64		field_offset[6];
-+};
-+
-+/* Btree keys - all units are in sectors */
-+
-+struct bpos {
-+	/*
-+	 * Word order matches machine byte order - btree code treats a bpos as a
-+	 * single large integer, for search/comparison purposes
-+	 *
-+	 * Note that wherever a bpos is embedded in another on disk data
-+	 * structure, it has to be byte swabbed when reading in metadata that
-+	 * wasn't written in native endian order:
-+	 */
-+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-+	__u32		snapshot;
-+	__u64		offset;
-+	__u64		inode;
-+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-+	__u64		inode;
-+	__u64		offset;		/* Points to end of extent - sectors */
-+	__u32		snapshot;
-+#else
-+#error edit for your odd byteorder.
-+#endif
-+} __attribute__((packed, aligned(4)));
-+
-+#define KEY_INODE_MAX			((__u64)~0ULL)
-+#define KEY_OFFSET_MAX			((__u64)~0ULL)
-+#define KEY_SNAPSHOT_MAX		((__u32)~0U)
-+#define KEY_SIZE_MAX			((__u32)~0U)
-+
-+static inline struct bpos POS(__u64 inode, __u64 offset)
-+{
-+	struct bpos ret;
-+
-+	ret.inode	= inode;
-+	ret.offset	= offset;
-+	ret.snapshot	= 0;
-+
-+	return ret;
-+}
-+
-+#define POS_MIN				POS(0, 0)
-+#define POS_MAX				POS(KEY_INODE_MAX, KEY_OFFSET_MAX)
-+
-+/* Empty placeholder struct, for container_of() */
-+struct bch_val {
-+	__u64		__nothing[0];
-+};
-+
-+struct bversion {
-+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-+	__u64		lo;
-+	__u32		hi;
-+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-+	__u32		hi;
-+	__u64		lo;
-+#endif
-+} __attribute__((packed, aligned(4)));
-+
-+struct bkey {
-+	/* Size of combined key and value, in u64s */
-+	__u8		u64s;
-+
-+	/* Format of key (0 for format local to btree node) */
-+#if defined(__LITTLE_ENDIAN_BITFIELD)
-+	__u8		format:7,
-+			needs_whiteout:1;
-+#elif defined (__BIG_ENDIAN_BITFIELD)
-+	__u8		needs_whiteout:1,
-+			format:7;
-+#else
-+#error edit for your odd byteorder.
-+#endif
-+
-+	/* Type of the value */
-+	__u8		type;
-+
-+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-+	__u8		pad[1];
-+
-+	struct bversion	version;
-+	__u32		size;		/* extent size, in sectors */
-+	struct bpos	p;
-+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-+	struct bpos	p;
-+	__u32		size;		/* extent size, in sectors */
-+	struct bversion	version;
-+
-+	__u8		pad[1];
-+#endif
-+} __attribute__((packed, aligned(8)));
-+
-+struct bkey_packed {
-+	__u64		_data[0];
-+
-+	/* Size of combined key and value, in u64s */
-+	__u8		u64s;
-+
-+	/* Format of key (0 for format local to btree node) */
-+
-+	/*
-+	 * XXX: next incompat on disk format change, switch format and
-+	 * needs_whiteout - bkey_packed() will be cheaper if format is the high
-+	 * bits of the bitfield
-+	 */
-+#if defined(__LITTLE_ENDIAN_BITFIELD)
-+	__u8		format:7,
-+			needs_whiteout:1;
-+#elif defined (__BIG_ENDIAN_BITFIELD)
-+	__u8		needs_whiteout:1,
-+			format:7;
-+#endif
-+
-+	/* Type of the value */
-+	__u8		type;
-+	__u8		key_start[0];
-+
-+	/*
-+	 * We copy bkeys with struct assignment in various places, and while
-+	 * that shouldn't be done with packed bkeys we can't disallow it in C,
-+	 * and it's legal to cast a bkey to a bkey_packed  - so padding it out
-+	 * to the same size as struct bkey should hopefully be safest.
-+	 */
-+	__u8		pad[sizeof(struct bkey) - 3];
-+} __attribute__((packed, aligned(8)));
-+
-+#define BKEY_U64s			(sizeof(struct bkey) / sizeof(__u64))
-+#define BKEY_U64s_MAX			U8_MAX
-+#define BKEY_VAL_U64s_MAX		(BKEY_U64s_MAX - BKEY_U64s)
-+
-+#define KEY_PACKED_BITS_START		24
-+
-+#define KEY_FORMAT_LOCAL_BTREE		0
-+#define KEY_FORMAT_CURRENT		1
-+
-+enum bch_bkey_fields {
-+	BKEY_FIELD_INODE,
-+	BKEY_FIELD_OFFSET,
-+	BKEY_FIELD_SNAPSHOT,
-+	BKEY_FIELD_SIZE,
-+	BKEY_FIELD_VERSION_HI,
-+	BKEY_FIELD_VERSION_LO,
-+	BKEY_NR_FIELDS,
-+};
-+
-+#define bkey_format_field(name, field)					\
-+	[BKEY_FIELD_##name] = (sizeof(((struct bkey *) NULL)->field) * 8)
-+
-+#define BKEY_FORMAT_CURRENT						\
-+((struct bkey_format) {							\
-+	.key_u64s	= BKEY_U64s,					\
-+	.nr_fields	= BKEY_NR_FIELDS,				\
-+	.bits_per_field = {						\
-+		bkey_format_field(INODE,	p.inode),		\
-+		bkey_format_field(OFFSET,	p.offset),		\
-+		bkey_format_field(SNAPSHOT,	p.snapshot),		\
-+		bkey_format_field(SIZE,		size),			\
-+		bkey_format_field(VERSION_HI,	version.hi),		\
-+		bkey_format_field(VERSION_LO,	version.lo),		\
-+	},								\
-+})
-+
-+/* bkey with inline value */
-+struct bkey_i {
-+	__u64			_data[0];
-+
-+	union {
-+	struct {
-+		/* Size of combined key and value, in u64s */
-+		__u8		u64s;
-+	};
-+	struct {
-+		struct bkey	k;
-+		struct bch_val	v;
-+	};
-+	};
-+};
-+
-+#define KEY(_inode, _offset, _size)					\
-+((struct bkey) {							\
-+	.u64s		= BKEY_U64s,					\
-+	.format		= KEY_FORMAT_CURRENT,				\
-+	.p		= POS(_inode, _offset),				\
-+	.size		= _size,					\
-+})
-+
-+static inline void bkey_init(struct bkey *k)
-+{
-+	*k = KEY(0, 0, 0);
-+}
-+
-+#define bkey_bytes(_k)		((_k)->u64s * sizeof(__u64))
-+
-+#define __BKEY_PADDED(key, pad)					\
-+	struct { struct bkey_i key; __u64 key ## _pad[pad]; }
-+
-+/*
-+ * - DELETED keys are used internally to mark keys that should be ignored but
-+ *   override keys in composition order.  Their version number is ignored.
-+ *
-+ * - DISCARDED keys indicate that the data is all 0s because it has been
-+ *   discarded. DISCARDs may have a version; if the version is nonzero the key
-+ *   will be persistent, otherwise the key will be dropped whenever the btree
-+ *   node is rewritten (like DELETED keys).
-+ *
-+ * - ERROR: any read of the data returns a read error, as the data was lost due
-+ *   to a failing device. Like DISCARDED keys, they can be removed (overridden)
-+ *   by new writes or cluster-wide GC. Node repair can also overwrite them with
-+ *   the same or a more recent version number, but not with an older version
-+ *   number.
-+ *
-+ * - WHITEOUT: for hash table btrees
-+*/
-+#define BCH_BKEY_TYPES()				\
-+	x(deleted,		0)			\
-+	x(discard,		1)			\
-+	x(error,		2)			\
-+	x(cookie,		3)			\
-+	x(whiteout,		4)			\
-+	x(btree_ptr,		5)			\
-+	x(extent,		6)			\
-+	x(reservation,		7)			\
-+	x(inode,		8)			\
-+	x(inode_generation,	9)			\
-+	x(dirent,		10)			\
-+	x(xattr,		11)			\
-+	x(alloc,		12)			\
-+	x(quota,		13)			\
-+	x(stripe,		14)			\
-+	x(reflink_p,		15)			\
-+	x(reflink_v,		16)			\
-+	x(inline_data,		17)			\
-+	x(btree_ptr_v2,		18)
-+
-+enum bch_bkey_type {
-+#define x(name, nr) KEY_TYPE_##name	= nr,
-+	BCH_BKEY_TYPES()
-+#undef x
-+	KEY_TYPE_MAX,
-+};
-+
-+struct bch_cookie {
-+	struct bch_val		v;
-+	__le64			cookie;
-+};
-+
-+/* Extents */
-+
-+/*
-+ * In extent bkeys, the value is a list of pointers (bch_extent_ptr), optionally
-+ * preceded by checksum/compression information (bch_extent_crc32 or
-+ * bch_extent_crc64).
-+ *
-+ * One major determining factor in the format of extents is how we handle and
-+ * represent extents that have been partially overwritten and thus trimmed:
-+ *
-+ * If an extent is not checksummed or compressed, when the extent is trimmed we
-+ * don't have to remember the extent we originally allocated and wrote: we can
-+ * merely adjust ptr->offset to point to the start of the data that is currently
-+ * live. The size field in struct bkey records the current (live) size of the
-+ * extent, and is also used to mean "size of region on disk that we point to" in
-+ * this case.
-+ *
-+ * Thus an extent that is not checksummed or compressed will consist only of a
-+ * list of bch_extent_ptrs, with none of the fields in
-+ * bch_extent_crc32/bch_extent_crc64.
-+ *
-+ * When an extent is checksummed or compressed, it's not possible to read only
-+ * the data that is currently live: we have to read the entire extent that was
-+ * originally written, and then return only the part of the extent that is
-+ * currently live.
-+ *
-+ * Thus, in addition to the current size of the extent in struct bkey, we need
-+ * to store the size of the originally allocated space - this is the
-+ * compressed_size and uncompressed_size fields in bch_extent_crc32/64. Also,
-+ * when the extent is trimmed, instead of modifying the offset field of the
-+ * pointer, we keep a second smaller offset field - "offset into the original
-+ * extent of the currently live region".
-+ *
-+ * The other major determining factor is replication and data migration:
-+ *
-+ * Each pointer may have its own bch_extent_crc32/64. When doing a replicated
-+ * write, we will initially write all the replicas in the same format, with the
-+ * same checksum type and compression format - however, when copygc runs later (or
-+ * tiering/cache promotion, anything that moves data), it is not in general
-+ * going to rewrite all the pointers at once - one of the replicas may be in a
-+ * bucket on one device that has very little fragmentation while another lives
-+ * in a bucket that has become heavily fragmented, and thus is being rewritten
-+ * sooner than the rest.
-+ *
-+ * Thus it will only move a subset of the pointers (or in the case of
-+ * tiering/cache promotion perhaps add a single pointer without dropping any
-+ * current pointers), and if the extent has been partially overwritten it must
-+ * write only the currently live portion (or copygc would not be able to reduce
-+ * fragmentation!) - which necessitates a different bch_extent_crc format for
-+ * the new pointer.
-+ *
-+ * But in the interests of space efficiency, we don't want to store one
-+ * bch_extent_crc for each pointer if we don't have to.
-+ *
-+ * Thus, a bch_extent consists of bch_extent_crc32s, bch_extent_crc64s, and
-+ * bch_extent_ptrs appended arbitrarily one after the other. We determine the
-+ * type of a given entry with a scheme similar to utf8 (except we're encoding a
-+ * type, not a size), encoding the type in the position of the first set bit:
-+ *
-+ * bch_extent_crc32	- 0b1
-+ * bch_extent_ptr	- 0b10
-+ * bch_extent_crc64	- 0b100
-+ *
-+ * We do it this way because bch_extent_crc32 is _very_ constrained on bits (and
-+ * bch_extent_crc64 is the least constrained).
-+ *
-+ * Then, each bch_extent_crc32/64 applies to the pointers that follow after it,
-+ * until the next bch_extent_crc32/64.
-+ *
-+ * If there are no bch_extent_crcs preceding a bch_extent_ptr, then that pointer
-+ * is neither checksummed nor compressed.
-+ */
-+
-+/* 128 bits, sufficient for cryptographic MACs: */
-+struct bch_csum {
-+	__le64			lo;
-+	__le64			hi;
-+} __attribute__((packed, aligned(8)));
-+
-+#define BCH_EXTENT_ENTRY_TYPES()		\
-+	x(ptr,			0)		\
-+	x(crc32,		1)		\
-+	x(crc64,		2)		\
-+	x(crc128,		3)		\
-+	x(stripe_ptr,		4)
-+#define BCH_EXTENT_ENTRY_MAX	5
-+
-+enum bch_extent_entry_type {
-+#define x(f, n) BCH_EXTENT_ENTRY_##f = n,
-+	BCH_EXTENT_ENTRY_TYPES()
-+#undef x
-+};
-+
-+/* Compressed/uncompressed size are stored biased by 1: */
-+struct bch_extent_crc32 {
-+#if defined(__LITTLE_ENDIAN_BITFIELD)
-+	__u32			type:2,
-+				_compressed_size:7,
-+				_uncompressed_size:7,
-+				offset:7,
-+				_unused:1,
-+				csum_type:4,
-+				compression_type:4;
-+	__u32			csum;
-+#elif defined (__BIG_ENDIAN_BITFIELD)
-+	__u32			csum;
-+	__u32			compression_type:4,
-+				csum_type:4,
-+				_unused:1,
-+				offset:7,
-+				_uncompressed_size:7,
-+				_compressed_size:7,
-+				type:2;
-+#endif
-+} __attribute__((packed, aligned(8)));
-+
-+#define CRC32_SIZE_MAX		(1U << 7)
-+#define CRC32_NONCE_MAX		0
-+
-+struct bch_extent_crc64 {
-+#if defined(__LITTLE_ENDIAN_BITFIELD)
-+	__u64			type:3,
-+				_compressed_size:9,
-+				_uncompressed_size:9,
-+				offset:9,
-+				nonce:10,
-+				csum_type:4,
-+				compression_type:4,
-+				csum_hi:16;
-+#elif defined (__BIG_ENDIAN_BITFIELD)
-+	__u64			csum_hi:16,
-+				compression_type:4,
-+				csum_type:4,
-+				nonce:10,
-+				offset:9,
-+				_uncompressed_size:9,
-+				_compressed_size:9,
-+				type:3;
-+#endif
-+	__u64			csum_lo;
-+} __attribute__((packed, aligned(8)));
-+
-+#define CRC64_SIZE_MAX		(1U << 9)
-+#define CRC64_NONCE_MAX		((1U << 10) - 1)
-+
-+struct bch_extent_crc128 {
-+#if defined(__LITTLE_ENDIAN_BITFIELD)
-+	__u64			type:4,
-+				_compressed_size:13,
-+				_uncompressed_size:13,
-+				offset:13,
-+				nonce:13,
-+				csum_type:4,
-+				compression_type:4;
-+#elif defined (__BIG_ENDIAN_BITFIELD)
-+	__u64			compression_type:4,
-+				csum_type:4,
-+				nonce:13,
-+				offset:13,
-+				_uncompressed_size:13,
-+				_compressed_size:13,
-+				type:4;
-+#endif
-+	struct bch_csum		csum;
-+} __attribute__((packed, aligned(8)));
-+
-+#define CRC128_SIZE_MAX		(1U << 13)
-+#define CRC128_NONCE_MAX	((1U << 13) - 1)
-+
-+/*
-+ * @reservation - pointer hasn't been written to, just reserved
-+ */
-+struct bch_extent_ptr {
-+#if defined(__LITTLE_ENDIAN_BITFIELD)
-+	__u64			type:1,
-+				cached:1,
-+				unused:1,
-+				reservation:1,
-+				offset:44, /* 8 petabytes */
-+				dev:8,
-+				gen:8;
-+#elif defined (__BIG_ENDIAN_BITFIELD)
-+	__u64			gen:8,
-+				dev:8,
-+				offset:44,
-+				reservation:1,
-+				unused:1,
-+				cached:1,
-+				type:1;
-+#endif
-+} __attribute__((packed, aligned(8)));
-+
-+struct bch_extent_stripe_ptr {
-+#if defined(__LITTLE_ENDIAN_BITFIELD)
-+	__u64			type:5,
-+				block:8,
-+				idx:51;
-+#elif defined (__BIG_ENDIAN_BITFIELD)
-+	__u64			idx:51,
-+				block:8,
-+				type:5;
-+#endif
-+};
-+
-+struct bch_extent_reservation {
-+#if defined(__LITTLE_ENDIAN_BITFIELD)
-+	__u64			type:6,
-+				unused:22,
-+				replicas:4,
-+				generation:32;
-+#elif defined (__BIG_ENDIAN_BITFIELD)
-+	__u64			generation:32,
-+				replicas:4,
-+				unused:22,
-+				type:6;
-+#endif
-+};
-+
-+union bch_extent_entry {
-+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ ||  __BITS_PER_LONG == 64
-+	unsigned long			type;
-+#elif __BITS_PER_LONG == 32
-+	struct {
-+		unsigned long		pad;
-+		unsigned long		type;
-+	};
-+#else
-+#error edit for your odd byteorder.
-+#endif
-+
-+#define x(f, n) struct bch_extent_##f	f;
-+	BCH_EXTENT_ENTRY_TYPES()
-+#undef x
-+};
-+
-+struct bch_btree_ptr {
-+	struct bch_val		v;
-+
-+	struct bch_extent_ptr	start[0];
-+	__u64			_data[0];
-+} __attribute__((packed, aligned(8)));
-+
-+struct bch_btree_ptr_v2 {
-+	struct bch_val		v;
-+
-+	__u64			mem_ptr;
-+	__le64			seq;
-+	__le16			sectors_written;
-+	/* In case we ever decide to do variable size btree nodes: */
-+	__le16			sectors;
-+	struct bpos		min_key;
-+	struct bch_extent_ptr	start[0];
-+	__u64			_data[0];
-+} __attribute__((packed, aligned(8)));
-+
-+struct bch_extent {
-+	struct bch_val		v;
-+
-+	union bch_extent_entry	start[0];
-+	__u64			_data[0];
-+} __attribute__((packed, aligned(8)));
-+
-+struct bch_reservation {
-+	struct bch_val		v;
-+
-+	__le32			generation;
-+	__u8			nr_replicas;
-+	__u8			pad[3];
-+} __attribute__((packed, aligned(8)));
-+
-+/* Maximum size (in u64s) a single pointer could be: */
-+#define BKEY_EXTENT_PTR_U64s_MAX\
-+	((sizeof(struct bch_extent_crc128) +			\
-+	  sizeof(struct bch_extent_ptr)) / sizeof(u64))
-+
-+/* Maximum possible size of an entire extent value: */
-+#define BKEY_EXTENT_VAL_U64s_MAX				\
-+	(1 + BKEY_EXTENT_PTR_U64s_MAX * (BCH_REPLICAS_MAX + 1))
-+
-+#define BKEY_PADDED(key)	__BKEY_PADDED(key, BKEY_EXTENT_VAL_U64s_MAX)
-+
-+/* * Maximum possible size of an entire extent, key + value: */
-+#define BKEY_EXTENT_U64s_MAX		(BKEY_U64s + BKEY_EXTENT_VAL_U64s_MAX)
-+
-+/* Btree pointers don't carry around checksums: */
-+#define BKEY_BTREE_PTR_VAL_U64s_MAX				\
-+	((sizeof(struct bch_btree_ptr_v2) +			\
-+	  sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX) / sizeof(u64))
-+#define BKEY_BTREE_PTR_U64s_MAX					\
-+	(BKEY_U64s + BKEY_BTREE_PTR_VAL_U64s_MAX)
-+
-+/* Inodes */
-+
-+#define BLOCKDEV_INODE_MAX	4096
-+
-+#define BCACHEFS_ROOT_INO	4096
-+
-+struct bch_inode {
-+	struct bch_val		v;
-+
-+	__le64			bi_hash_seed;
-+	__le32			bi_flags;
-+	__le16			bi_mode;
-+	__u8			fields[0];
-+} __attribute__((packed, aligned(8)));
-+
-+struct bch_inode_generation {
-+	struct bch_val		v;
-+
-+	__le32			bi_generation;
-+	__le32			pad;
-+} __attribute__((packed, aligned(8)));
-+
-+#define BCH_INODE_FIELDS()			\
-+	x(bi_atime,			64)	\
-+	x(bi_ctime,			64)	\
-+	x(bi_mtime,			64)	\
-+	x(bi_otime,			64)	\
-+	x(bi_size,			64)	\
-+	x(bi_sectors,			64)	\
-+	x(bi_uid,			32)	\
-+	x(bi_gid,			32)	\
-+	x(bi_nlink,			32)	\
-+	x(bi_generation,		32)	\
-+	x(bi_dev,			32)	\
-+	x(bi_data_checksum,		8)	\
-+	x(bi_compression,		8)	\
-+	x(bi_project,			32)	\
-+	x(bi_background_compression,	8)	\
-+	x(bi_data_replicas,		8)	\
-+	x(bi_promote_target,		16)	\
-+	x(bi_foreground_target,		16)	\
-+	x(bi_background_target,		16)	\
-+	x(bi_erasure_code,		16)	\
-+	x(bi_fields_set,		16)
-+
-+/* subset of BCH_INODE_FIELDS */
-+#define BCH_INODE_OPTS()			\
-+	x(data_checksum,		8)	\
-+	x(compression,			8)	\
-+	x(project,			32)	\
-+	x(background_compression,	8)	\
-+	x(data_replicas,		8)	\
-+	x(promote_target,		16)	\
-+	x(foreground_target,		16)	\
-+	x(background_target,		16)	\
-+	x(erasure_code,			16)
-+
-+enum inode_opt_id {
-+#define x(name, ...)				\
-+	Inode_opt_##name,
-+	BCH_INODE_OPTS()
-+#undef  x
-+	Inode_opt_nr,
-+};
-+
-+enum {
-+	/*
-+	 * User flags (get/settable with FS_IOC_*FLAGS, correspond to FS_*_FL
-+	 * flags)
-+	 */
-+	__BCH_INODE_SYNC	= 0,
-+	__BCH_INODE_IMMUTABLE	= 1,
-+	__BCH_INODE_APPEND	= 2,
-+	__BCH_INODE_NODUMP	= 3,
-+	__BCH_INODE_NOATIME	= 4,
-+
-+	__BCH_INODE_I_SIZE_DIRTY= 5,
-+	__BCH_INODE_I_SECTORS_DIRTY= 6,
-+	__BCH_INODE_UNLINKED	= 7,
-+
-+	/* bits 20+ reserved for packed fields below: */
-+};
-+
-+#define BCH_INODE_SYNC		(1 << __BCH_INODE_SYNC)
-+#define BCH_INODE_IMMUTABLE	(1 << __BCH_INODE_IMMUTABLE)
-+#define BCH_INODE_APPEND	(1 << __BCH_INODE_APPEND)
-+#define BCH_INODE_NODUMP	(1 << __BCH_INODE_NODUMP)
-+#define BCH_INODE_NOATIME	(1 << __BCH_INODE_NOATIME)
-+#define BCH_INODE_I_SIZE_DIRTY	(1 << __BCH_INODE_I_SIZE_DIRTY)
-+#define BCH_INODE_I_SECTORS_DIRTY (1 << __BCH_INODE_I_SECTORS_DIRTY)
-+#define BCH_INODE_UNLINKED	(1 << __BCH_INODE_UNLINKED)
-+
-+LE32_BITMASK(INODE_STR_HASH,	struct bch_inode, bi_flags, 20, 24);
-+LE32_BITMASK(INODE_NR_FIELDS,	struct bch_inode, bi_flags, 24, 32);
-+
-+/* Dirents */
-+
-+/*
-+ * Dirents (and xattrs) have to implement string lookups; since our b-tree
-+ * doesn't support arbitrary length strings for the key, we instead index by a
-+ * 64 bit hash (currently truncated sha1) of the string, stored in the offset
-+ * field of the key - using linear probing to resolve hash collisions. This also
-+ * provides us with the readdir cookie posix requires.
-+ *
-+ * Linear probing requires us to use whiteouts for deletions, in the event of a
-+ * collision:
-+ */
-+
-+struct bch_dirent {
-+	struct bch_val		v;
-+
-+	/* Target inode number: */
-+	__le64			d_inum;
-+
-+	/*
-+	 * Copy of mode bits 12-15 from the target inode - so userspace can get
-+	 * the filetype without having to do a stat()
-+	 */
-+	__u8			d_type;
-+
-+	__u8			d_name[];
-+} __attribute__((packed, aligned(8)));
-+
-+#define BCH_NAME_MAX	(U8_MAX * sizeof(u64) -				\
-+			 sizeof(struct bkey) -				\
-+			 offsetof(struct bch_dirent, d_name))
-+
-+
-+/* Xattrs */
-+
-+#define KEY_TYPE_XATTR_INDEX_USER			0
-+#define KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS	1
-+#define KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT	2
-+#define KEY_TYPE_XATTR_INDEX_TRUSTED			3
-+#define KEY_TYPE_XATTR_INDEX_SECURITY	        4
-+
-+struct bch_xattr {
-+	struct bch_val		v;
-+	__u8			x_type;
-+	__u8			x_name_len;
-+	__le16			x_val_len;
-+	__u8			x_name[];
-+} __attribute__((packed, aligned(8)));
-+
-+/* Bucket/allocation information: */
-+
-+struct bch_alloc {
-+	struct bch_val		v;
-+	__u8			fields;
-+	__u8			gen;
-+	__u8			data[];
-+} __attribute__((packed, aligned(8)));
-+
-+#define BCH_ALLOC_FIELDS()			\
-+	x(read_time,		16)		\
-+	x(write_time,		16)		\
-+	x(data_type,		8)		\
-+	x(dirty_sectors,	16)		\
-+	x(cached_sectors,	16)		\
-+	x(oldest_gen,		8)
-+
-+enum {
-+#define x(name, bytes) BCH_ALLOC_FIELD_##name,
-+	BCH_ALLOC_FIELDS()
-+#undef x
-+	BCH_ALLOC_FIELD_NR
-+};
-+
-+static const unsigned BCH_ALLOC_FIELD_BYTES[] = {
-+#define x(name, bits) [BCH_ALLOC_FIELD_##name] = bits / 8,
-+	BCH_ALLOC_FIELDS()
-+#undef x
-+};
-+
-+#define x(name, bits) + (bits / 8)
-+static const unsigned BKEY_ALLOC_VAL_U64s_MAX =
-+	DIV_ROUND_UP(offsetof(struct bch_alloc, data)
-+		     BCH_ALLOC_FIELDS(), sizeof(u64));
-+#undef x
-+
-+#define BKEY_ALLOC_U64s_MAX	(BKEY_U64s + BKEY_ALLOC_VAL_U64s_MAX)
-+
-+/* Quotas: */
-+
-+enum quota_types {
-+	QTYP_USR		= 0,
-+	QTYP_GRP		= 1,
-+	QTYP_PRJ		= 2,
-+	QTYP_NR			= 3,
-+};
-+
-+enum quota_counters {
-+	Q_SPC			= 0,
-+	Q_INO			= 1,
-+	Q_COUNTERS		= 2,
-+};
-+
-+struct bch_quota_counter {
-+	__le64			hardlimit;
-+	__le64			softlimit;
-+};
-+
-+struct bch_quota {
-+	struct bch_val		v;
-+	struct bch_quota_counter c[Q_COUNTERS];
-+} __attribute__((packed, aligned(8)));
-+
-+/* Erasure coding */
-+
-+struct bch_stripe {
-+	struct bch_val		v;
-+	__le16			sectors;
-+	__u8			algorithm;
-+	__u8			nr_blocks;
-+	__u8			nr_redundant;
-+
-+	__u8			csum_granularity_bits;
-+	__u8			csum_type;
-+	__u8			pad;
-+
-+	struct bch_extent_ptr	ptrs[0];
-+} __attribute__((packed, aligned(8)));
-+
-+/* Reflink: */
-+
-+struct bch_reflink_p {
-+	struct bch_val		v;
-+	__le64			idx;
-+
-+	__le32			reservation_generation;
-+	__u8			nr_replicas;
-+	__u8			pad[3];
-+};
-+
-+struct bch_reflink_v {
-+	struct bch_val		v;
-+	__le64			refcount;
-+	union bch_extent_entry	start[0];
-+	__u64			_data[0];
-+};
-+
-+/* Inline data */
-+
-+struct bch_inline_data {
-+	struct bch_val		v;
-+	u8			data[0];
-+};
-+
-+/* Optional/variable size superblock sections: */
-+
-+struct bch_sb_field {
-+	__u64			_data[0];
-+	__le32			u64s;
-+	__le32			type;
-+};
-+
-+#define BCH_SB_FIELDS()		\
-+	x(journal,	0)	\
-+	x(members,	1)	\
-+	x(crypt,	2)	\
-+	x(replicas_v0,	3)	\
-+	x(quota,	4)	\
-+	x(disk_groups,	5)	\
-+	x(clean,	6)	\
-+	x(replicas,	7)	\
-+	x(journal_seq_blacklist, 8)
-+
-+enum bch_sb_field_type {
-+#define x(f, nr)	BCH_SB_FIELD_##f = nr,
-+	BCH_SB_FIELDS()
-+#undef x
-+	BCH_SB_FIELD_NR
-+};
-+
-+/* BCH_SB_FIELD_journal: */
-+
-+struct bch_sb_field_journal {
-+	struct bch_sb_field	field;
-+	__le64			buckets[0];
-+};
-+
-+/* BCH_SB_FIELD_members: */
-+
-+#define BCH_MIN_NR_NBUCKETS	(1 << 6)
-+
-+struct bch_member {
-+	uuid_le			uuid;
-+	__le64			nbuckets;	/* device size */
-+	__le16			first_bucket;   /* index of first bucket used */
-+	__le16			bucket_size;	/* sectors */
-+	__le32			pad;
-+	__le64			last_mount;	/* time_t */
-+
-+	__le64			flags[2];
-+};
-+
-+LE64_BITMASK(BCH_MEMBER_STATE,		struct bch_member, flags[0],  0,  4)
-+/* 4-10 unused, was TIER, HAS_(META)DATA */
-+LE64_BITMASK(BCH_MEMBER_REPLACEMENT,	struct bch_member, flags[0], 10, 14)
-+LE64_BITMASK(BCH_MEMBER_DISCARD,	struct bch_member, flags[0], 14, 15)
-+LE64_BITMASK(BCH_MEMBER_DATA_ALLOWED,	struct bch_member, flags[0], 15, 20)
-+LE64_BITMASK(BCH_MEMBER_GROUP,		struct bch_member, flags[0], 20, 28)
-+LE64_BITMASK(BCH_MEMBER_DURABILITY,	struct bch_member, flags[0], 28, 30)
-+
-+#define BCH_TIER_MAX			4U
-+
-+#if 0
-+LE64_BITMASK(BCH_MEMBER_NR_READ_ERRORS,	struct bch_member, flags[1], 0,  20);
-+LE64_BITMASK(BCH_MEMBER_NR_WRITE_ERRORS,struct bch_member, flags[1], 20, 40);
-+#endif
-+
-+enum bch_member_state {
-+	BCH_MEMBER_STATE_RW		= 0,
-+	BCH_MEMBER_STATE_RO		= 1,
-+	BCH_MEMBER_STATE_FAILED		= 2,
-+	BCH_MEMBER_STATE_SPARE		= 3,
-+	BCH_MEMBER_STATE_NR		= 4,
-+};
-+
-+enum cache_replacement {
-+	CACHE_REPLACEMENT_LRU		= 0,
-+	CACHE_REPLACEMENT_FIFO		= 1,
-+	CACHE_REPLACEMENT_RANDOM	= 2,
-+	CACHE_REPLACEMENT_NR		= 3,
-+};
-+
-+struct bch_sb_field_members {
-+	struct bch_sb_field	field;
-+	struct bch_member	members[0];
-+};
-+
-+/* BCH_SB_FIELD_crypt: */
-+
-+struct nonce {
-+	__le32			d[4];
-+};
-+
-+struct bch_key {
-+	__le64			key[4];
-+};
-+
-+#define BCH_KEY_MAGIC					\
-+	(((u64) 'b' <<  0)|((u64) 'c' <<  8)|		\
-+	 ((u64) 'h' << 16)|((u64) '*' << 24)|		\
-+	 ((u64) '*' << 32)|((u64) 'k' << 40)|		\
-+	 ((u64) 'e' << 48)|((u64) 'y' << 56))
-+
-+struct bch_encrypted_key {
-+	__le64			magic;
-+	struct bch_key		key;
-+};
-+
-+/*
-+ * If this field is present in the superblock, it stores an encryption key which
-+ * is used encrypt all other data/metadata. The key will normally be encrypted
-+ * with the key userspace provides, but if encryption has been turned off we'll
-+ * just store the master key unencrypted in the superblock so we can access the
-+ * previously encrypted data.
-+ */
-+struct bch_sb_field_crypt {
-+	struct bch_sb_field	field;
-+
-+	__le64			flags;
-+	__le64			kdf_flags;
-+	struct bch_encrypted_key key;
-+};
-+
-+LE64_BITMASK(BCH_CRYPT_KDF_TYPE,	struct bch_sb_field_crypt, flags, 0, 4);
-+
-+enum bch_kdf_types {
-+	BCH_KDF_SCRYPT		= 0,
-+	BCH_KDF_NR		= 1,
-+};
-+
-+/* stored as base 2 log of scrypt params: */
-+LE64_BITMASK(BCH_KDF_SCRYPT_N,	struct bch_sb_field_crypt, kdf_flags,  0, 16);
-+LE64_BITMASK(BCH_KDF_SCRYPT_R,	struct bch_sb_field_crypt, kdf_flags, 16, 32);
-+LE64_BITMASK(BCH_KDF_SCRYPT_P,	struct bch_sb_field_crypt, kdf_flags, 32, 48);
-+
-+/* BCH_SB_FIELD_replicas: */
-+
-+#define BCH_DATA_TYPES()		\
-+	x(none,		0)		\
-+	x(sb,		1)		\
-+	x(journal,	2)		\
-+	x(btree,	3)		\
-+	x(user,		4)		\
-+	x(cached,	5)
-+
-+enum bch_data_type {
-+#define x(t, n) BCH_DATA_##t,
-+	BCH_DATA_TYPES()
-+#undef x
-+	BCH_DATA_NR
-+};
-+
-+struct bch_replicas_entry_v0 {
-+	__u8			data_type;
-+	__u8			nr_devs;
-+	__u8			devs[0];
-+} __attribute__((packed));
-+
-+struct bch_sb_field_replicas_v0 {
-+	struct bch_sb_field	field;
-+	struct bch_replicas_entry_v0 entries[0];
-+} __attribute__((packed, aligned(8)));
-+
-+struct bch_replicas_entry {
-+	__u8			data_type;
-+	__u8			nr_devs;
-+	__u8			nr_required;
-+	__u8			devs[0];
-+} __attribute__((packed));
-+
-+#define replicas_entry_bytes(_i)					\
-+	(offsetof(typeof(*(_i)), devs) + (_i)->nr_devs)
-+
-+struct bch_sb_field_replicas {
-+	struct bch_sb_field	field;
-+	struct bch_replicas_entry entries[0];
-+} __attribute__((packed, aligned(8)));
-+
-+/* BCH_SB_FIELD_quota: */
-+
-+struct bch_sb_quota_counter {
-+	__le32				timelimit;
-+	__le32				warnlimit;
-+};
-+
-+struct bch_sb_quota_type {
-+	__le64				flags;
-+	struct bch_sb_quota_counter	c[Q_COUNTERS];
-+};
-+
-+struct bch_sb_field_quota {
-+	struct bch_sb_field		field;
-+	struct bch_sb_quota_type	q[QTYP_NR];
-+} __attribute__((packed, aligned(8)));
-+
-+/* BCH_SB_FIELD_disk_groups: */
-+
-+#define BCH_SB_LABEL_SIZE		32
-+
-+struct bch_disk_group {
-+	__u8			label[BCH_SB_LABEL_SIZE];
-+	__le64			flags[2];
-+} __attribute__((packed, aligned(8)));
-+
-+LE64_BITMASK(BCH_GROUP_DELETED,		struct bch_disk_group, flags[0], 0,  1)
-+LE64_BITMASK(BCH_GROUP_DATA_ALLOWED,	struct bch_disk_group, flags[0], 1,  6)
-+LE64_BITMASK(BCH_GROUP_PARENT,		struct bch_disk_group, flags[0], 6, 24)
-+
-+struct bch_sb_field_disk_groups {
-+	struct bch_sb_field	field;
-+	struct bch_disk_group	entries[0];
-+} __attribute__((packed, aligned(8)));
-+
-+/*
-+ * On clean shutdown, store btree roots and current journal sequence number in
-+ * the superblock:
-+ */
-+struct jset_entry {
-+	__le16			u64s;
-+	__u8			btree_id;
-+	__u8			level;
-+	__u8			type; /* designates what this jset holds */
-+	__u8			pad[3];
-+
-+	union {
-+		struct bkey_i	start[0];
-+		__u64		_data[0];
-+	};
-+};
-+
-+struct bch_sb_field_clean {
-+	struct bch_sb_field	field;
-+
-+	__le32			flags;
-+	__le16			read_clock;
-+	__le16			write_clock;
-+	__le64			journal_seq;
-+
-+	union {
-+		struct jset_entry start[0];
-+		__u64		_data[0];
-+	};
-+};
-+
-+struct journal_seq_blacklist_entry {
-+	__le64			start;
-+	__le64			end;
-+};
-+
-+struct bch_sb_field_journal_seq_blacklist {
-+	struct bch_sb_field	field;
-+
-+	union {
-+		struct journal_seq_blacklist_entry start[0];
-+		__u64		_data[0];
-+	};
-+};
-+
-+/* Superblock: */
-+
-+/*
-+ * New versioning scheme:
-+ * One common version number for all on disk data structures - superblock, btree
-+ * nodes, journal entries
-+ */
-+#define BCH_JSET_VERSION_OLD			2
-+#define BCH_BSET_VERSION_OLD			3
-+
-+enum bcachefs_metadata_version {
-+	bcachefs_metadata_version_min			= 9,
-+	bcachefs_metadata_version_new_versioning	= 10,
-+	bcachefs_metadata_version_bkey_renumber		= 10,
-+	bcachefs_metadata_version_inode_btree_change	= 11,
-+	bcachefs_metadata_version_max			= 12,
-+};
-+
-+#define bcachefs_metadata_version_current	(bcachefs_metadata_version_max - 1)
-+
-+#define BCH_SB_SECTOR			8
-+#define BCH_SB_MEMBERS_MAX		64 /* XXX kill */
-+
-+struct bch_sb_layout {
-+	uuid_le			magic;	/* bcachefs superblock UUID */
-+	__u8			layout_type;
-+	__u8			sb_max_size_bits; /* base 2 of 512 byte sectors */
-+	__u8			nr_superblocks;
-+	__u8			pad[5];
-+	__le64			sb_offset[61];
-+} __attribute__((packed, aligned(8)));
-+
-+#define BCH_SB_LAYOUT_SECTOR	7
-+
-+/*
-+ * @offset	- sector where this sb was written
-+ * @version	- on disk format version
-+ * @version_min	- Oldest metadata version this filesystem contains; so we can
-+ *		  safely drop compatibility code and refuse to mount filesystems
-+ *		  we'd need it for
-+ * @magic	- identifies as a bcachefs superblock (BCACHE_MAGIC)
-+ * @seq		- incremented each time superblock is written
-+ * @uuid	- used for generating various magic numbers and identifying
-+ *                member devices, never changes
-+ * @user_uuid	- user visible UUID, may be changed
-+ * @label	- filesystem label
-+ * @seq		- identifies most recent superblock, incremented each time
-+ *		  superblock is written
-+ * @features	- enabled incompatible features
-+ */
-+struct bch_sb {
-+	struct bch_csum		csum;
-+	__le16			version;
-+	__le16			version_min;
-+	__le16			pad[2];
-+	uuid_le			magic;
-+	uuid_le			uuid;
-+	uuid_le			user_uuid;
-+	__u8			label[BCH_SB_LABEL_SIZE];
-+	__le64			offset;
-+	__le64			seq;
-+
-+	__le16			block_size;
-+	__u8			dev_idx;
-+	__u8			nr_devices;
-+	__le32			u64s;
-+
-+	__le64			time_base_lo;
-+	__le32			time_base_hi;
-+	__le32			time_precision;
-+
-+	__le64			flags[8];
-+	__le64			features[2];
-+	__le64			compat[2];
-+
-+	struct bch_sb_layout	layout;
-+
-+	union {
-+		struct bch_sb_field start[0];
-+		__le64		_data[0];
-+	};
-+} __attribute__((packed, aligned(8)));
-+
-+/*
-+ * Flags:
-+ * BCH_SB_INITALIZED	- set on first mount
-+ * BCH_SB_CLEAN		- did we shut down cleanly? Just a hint, doesn't affect
-+ *			  behaviour of mount/recovery path:
-+ * BCH_SB_INODE_32BIT	- limit inode numbers to 32 bits
-+ * BCH_SB_128_BIT_MACS	- 128 bit macs instead of 80
-+ * BCH_SB_ENCRYPTION_TYPE - if nonzero encryption is enabled; overrides
-+ *			   DATA/META_CSUM_TYPE. Also indicates encryption
-+ *			   algorithm in use, if/when we get more than one
-+ */
-+
-+LE16_BITMASK(BCH_SB_BLOCK_SIZE,		struct bch_sb, block_size, 0, 16);
-+
-+LE64_BITMASK(BCH_SB_INITIALIZED,	struct bch_sb, flags[0],  0,  1);
-+LE64_BITMASK(BCH_SB_CLEAN,		struct bch_sb, flags[0],  1,  2);
-+LE64_BITMASK(BCH_SB_CSUM_TYPE,		struct bch_sb, flags[0],  2,  8);
-+LE64_BITMASK(BCH_SB_ERROR_ACTION,	struct bch_sb, flags[0],  8, 12);
-+
-+LE64_BITMASK(BCH_SB_BTREE_NODE_SIZE,	struct bch_sb, flags[0], 12, 28);
-+
-+LE64_BITMASK(BCH_SB_GC_RESERVE,		struct bch_sb, flags[0], 28, 33);
-+LE64_BITMASK(BCH_SB_ROOT_RESERVE,	struct bch_sb, flags[0], 33, 40);
-+
-+LE64_BITMASK(BCH_SB_META_CSUM_TYPE,	struct bch_sb, flags[0], 40, 44);
-+LE64_BITMASK(BCH_SB_DATA_CSUM_TYPE,	struct bch_sb, flags[0], 44, 48);
-+
-+LE64_BITMASK(BCH_SB_META_REPLICAS_WANT,	struct bch_sb, flags[0], 48, 52);
-+LE64_BITMASK(BCH_SB_DATA_REPLICAS_WANT,	struct bch_sb, flags[0], 52, 56);
-+
-+LE64_BITMASK(BCH_SB_POSIX_ACL,		struct bch_sb, flags[0], 56, 57);
-+LE64_BITMASK(BCH_SB_USRQUOTA,		struct bch_sb, flags[0], 57, 58);
-+LE64_BITMASK(BCH_SB_GRPQUOTA,		struct bch_sb, flags[0], 58, 59);
-+LE64_BITMASK(BCH_SB_PRJQUOTA,		struct bch_sb, flags[0], 59, 60);
-+
-+LE64_BITMASK(BCH_SB_HAS_ERRORS,		struct bch_sb, flags[0], 60, 61);
-+
-+LE64_BITMASK(BCH_SB_REFLINK,		struct bch_sb, flags[0], 61, 62);
-+
-+/* 61-64 unused */
-+
-+LE64_BITMASK(BCH_SB_STR_HASH_TYPE,	struct bch_sb, flags[1],  0,  4);
-+LE64_BITMASK(BCH_SB_COMPRESSION_TYPE,	struct bch_sb, flags[1],  4,  8);
-+LE64_BITMASK(BCH_SB_INODE_32BIT,	struct bch_sb, flags[1],  8,  9);
-+
-+LE64_BITMASK(BCH_SB_128_BIT_MACS,	struct bch_sb, flags[1],  9, 10);
-+LE64_BITMASK(BCH_SB_ENCRYPTION_TYPE,	struct bch_sb, flags[1], 10, 14);
-+
-+/*
-+ * Max size of an extent that may require bouncing to read or write
-+ * (checksummed, compressed): 64k
-+ */
-+LE64_BITMASK(BCH_SB_ENCODED_EXTENT_MAX_BITS,
-+					struct bch_sb, flags[1], 14, 20);
-+
-+LE64_BITMASK(BCH_SB_META_REPLICAS_REQ,	struct bch_sb, flags[1], 20, 24);
-+LE64_BITMASK(BCH_SB_DATA_REPLICAS_REQ,	struct bch_sb, flags[1], 24, 28);
-+
-+LE64_BITMASK(BCH_SB_PROMOTE_TARGET,	struct bch_sb, flags[1], 28, 40);
-+LE64_BITMASK(BCH_SB_FOREGROUND_TARGET,	struct bch_sb, flags[1], 40, 52);
-+LE64_BITMASK(BCH_SB_BACKGROUND_TARGET,	struct bch_sb, flags[1], 52, 64);
-+
-+LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE,
-+					struct bch_sb, flags[2],  0,  4);
-+LE64_BITMASK(BCH_SB_GC_RESERVE_BYTES,	struct bch_sb, flags[2],  4, 64);
-+
-+LE64_BITMASK(BCH_SB_ERASURE_CODE,	struct bch_sb, flags[3],  0, 16);
-+
-+/*
-+ * Features:
-+ *
-+ * journal_seq_blacklist_v3:	gates BCH_SB_FIELD_journal_seq_blacklist
-+ * reflink:			gates KEY_TYPE_reflink
-+ * inline_data:			gates KEY_TYPE_inline_data
-+ * new_siphash:			gates BCH_STR_HASH_SIPHASH
-+ * new_extent_overwrite:	gates BTREE_NODE_NEW_EXTENT_OVERWRITE
-+ */
-+#define BCH_SB_FEATURES()			\
-+	x(lz4,				0)	\
-+	x(gzip,				1)	\
-+	x(zstd,				2)	\
-+	x(atomic_nlink,			3)	\
-+	x(ec,				4)	\
-+	x(journal_seq_blacklist_v3,	5)	\
-+	x(reflink,			6)	\
-+	x(new_siphash,			7)	\
-+	x(inline_data,			8)	\
-+	x(new_extent_overwrite,		9)	\
-+	x(incompressible,		10)	\
-+	x(btree_ptr_v2,			11)	\
-+	x(extents_above_btree_updates,	12)	\
-+	x(btree_updates_journalled,	13)
-+
-+#define BCH_SB_FEATURES_ALL				\
-+	((1ULL << BCH_FEATURE_new_siphash)|		\
-+	 (1ULL << BCH_FEATURE_new_extent_overwrite)|	\
-+	 (1ULL << BCH_FEATURE_btree_ptr_v2)|		\
-+	 (1ULL << BCH_FEATURE_extents_above_btree_updates))
-+
-+enum bch_sb_feature {
-+#define x(f, n) BCH_FEATURE_##f,
-+	BCH_SB_FEATURES()
-+#undef x
-+	BCH_FEATURE_NR,
-+};
-+
-+enum bch_sb_compat {
-+	BCH_COMPAT_FEAT_ALLOC_INFO	= 0,
-+	BCH_COMPAT_FEAT_ALLOC_METADATA	= 1,
-+};
-+
-+/* options: */
-+
-+#define BCH_REPLICAS_MAX		4U
-+
-+enum bch_error_actions {
-+	BCH_ON_ERROR_CONTINUE		= 0,
-+	BCH_ON_ERROR_RO			= 1,
-+	BCH_ON_ERROR_PANIC		= 2,
-+	BCH_NR_ERROR_ACTIONS		= 3,
-+};
-+
-+enum bch_str_hash_type {
-+	BCH_STR_HASH_CRC32C		= 0,
-+	BCH_STR_HASH_CRC64		= 1,
-+	BCH_STR_HASH_SIPHASH_OLD	= 2,
-+	BCH_STR_HASH_SIPHASH		= 3,
-+	BCH_STR_HASH_NR			= 4,
-+};
-+
-+enum bch_str_hash_opts {
-+	BCH_STR_HASH_OPT_CRC32C		= 0,
-+	BCH_STR_HASH_OPT_CRC64		= 1,
-+	BCH_STR_HASH_OPT_SIPHASH	= 2,
-+	BCH_STR_HASH_OPT_NR		= 3,
-+};
-+
-+enum bch_csum_type {
-+	BCH_CSUM_NONE			= 0,
-+	BCH_CSUM_CRC32C_NONZERO		= 1,
-+	BCH_CSUM_CRC64_NONZERO		= 2,
-+	BCH_CSUM_CHACHA20_POLY1305_80	= 3,
-+	BCH_CSUM_CHACHA20_POLY1305_128	= 4,
-+	BCH_CSUM_CRC32C			= 5,
-+	BCH_CSUM_CRC64			= 6,
-+	BCH_CSUM_NR			= 7,
-+};
-+
-+static const unsigned bch_crc_bytes[] = {
-+	[BCH_CSUM_NONE]				= 0,
-+	[BCH_CSUM_CRC32C_NONZERO]		= 4,
-+	[BCH_CSUM_CRC32C]			= 4,
-+	[BCH_CSUM_CRC64_NONZERO]		= 8,
-+	[BCH_CSUM_CRC64]			= 8,
-+	[BCH_CSUM_CHACHA20_POLY1305_80]		= 10,
-+	[BCH_CSUM_CHACHA20_POLY1305_128]	= 16,
-+};
-+
-+static inline _Bool bch2_csum_type_is_encryption(enum bch_csum_type type)
-+{
-+	switch (type) {
-+	case BCH_CSUM_CHACHA20_POLY1305_80:
-+	case BCH_CSUM_CHACHA20_POLY1305_128:
-+		return true;
-+	default:
-+		return false;
-+	}
-+}
-+
-+enum bch_csum_opts {
-+	BCH_CSUM_OPT_NONE		= 0,
-+	BCH_CSUM_OPT_CRC32C		= 1,
-+	BCH_CSUM_OPT_CRC64		= 2,
-+	BCH_CSUM_OPT_NR			= 3,
-+};
-+
-+#define BCH_COMPRESSION_TYPES()		\
-+	x(none,			0)	\
-+	x(lz4_old,		1)	\
-+	x(gzip,			2)	\
-+	x(lz4,			3)	\
-+	x(zstd,			4)	\
-+	x(incompressible,	5)
-+
-+enum bch_compression_type {
-+#define x(t, n) BCH_COMPRESSION_TYPE_##t,
-+	BCH_COMPRESSION_TYPES()
-+#undef x
-+	BCH_COMPRESSION_TYPE_NR
-+};
-+
-+#define BCH_COMPRESSION_OPTS()		\
-+	x(none,		0)		\
-+	x(lz4,		1)		\
-+	x(gzip,		2)		\
-+	x(zstd,		3)
-+
-+enum bch_compression_opts {
-+#define x(t, n) BCH_COMPRESSION_OPT_##t,
-+	BCH_COMPRESSION_OPTS()
-+#undef x
-+	BCH_COMPRESSION_OPT_NR
-+};
-+
-+/*
-+ * Magic numbers
-+ *
-+ * The various other data structures have their own magic numbers, which are
-+ * xored with the first part of the cache set's UUID
-+ */
-+
-+#define BCACHE_MAGIC							\
-+	UUID_LE(0xf67385c6, 0x1a4e, 0xca45,				\
-+		0x82, 0x65, 0xf5, 0x7f, 0x48, 0xba, 0x6d, 0x81)
-+
-+#define BCACHEFS_STATFS_MAGIC		0xca451a4e
-+
-+#define JSET_MAGIC		__cpu_to_le64(0x245235c1a3625032ULL)
-+#define BSET_MAGIC		__cpu_to_le64(0x90135c78b99e07f5ULL)
-+
-+static inline __le64 __bch2_sb_magic(struct bch_sb *sb)
-+{
-+	__le64 ret;
-+	memcpy(&ret, &sb->uuid, sizeof(ret));
-+	return ret;
-+}
-+
-+static inline __u64 __jset_magic(struct bch_sb *sb)
-+{
-+	return __le64_to_cpu(__bch2_sb_magic(sb) ^ JSET_MAGIC);
-+}
-+
-+static inline __u64 __bset_magic(struct bch_sb *sb)
-+{
-+	return __le64_to_cpu(__bch2_sb_magic(sb) ^ BSET_MAGIC);
-+}
-+
-+/* Journal */
-+
-+#define JSET_KEYS_U64s	(sizeof(struct jset_entry) / sizeof(__u64))
-+
-+#define BCH_JSET_ENTRY_TYPES()			\
-+	x(btree_keys,		0)		\
-+	x(btree_root,		1)		\
-+	x(prio_ptrs,		2)		\
-+	x(blacklist,		3)		\
-+	x(blacklist_v2,		4)		\
-+	x(usage,		5)		\
-+	x(data_usage,		6)
-+
-+enum {
-+#define x(f, nr)	BCH_JSET_ENTRY_##f	= nr,
-+	BCH_JSET_ENTRY_TYPES()
-+#undef x
-+	BCH_JSET_ENTRY_NR
-+};
-+
-+/*
-+ * Journal sequence numbers can be blacklisted: bsets record the max sequence
-+ * number of all the journal entries they contain updates for, so that on
-+ * recovery we can ignore those bsets that contain index updates newer that what
-+ * made it into the journal.
-+ *
-+ * This means that we can't reuse that journal_seq - we have to skip it, and
-+ * then record that we skipped it so that the next time we crash and recover we
-+ * don't think there was a missing journal entry.
-+ */
-+struct jset_entry_blacklist {
-+	struct jset_entry	entry;
-+	__le64			seq;
-+};
-+
-+struct jset_entry_blacklist_v2 {
-+	struct jset_entry	entry;
-+	__le64			start;
-+	__le64			end;
-+};
-+
-+enum {
-+	FS_USAGE_RESERVED		= 0,
-+	FS_USAGE_INODES			= 1,
-+	FS_USAGE_KEY_VERSION		= 2,
-+	FS_USAGE_NR			= 3
-+};
-+
-+struct jset_entry_usage {
-+	struct jset_entry	entry;
-+	__le64			v;
-+} __attribute__((packed));
-+
-+struct jset_entry_data_usage {
-+	struct jset_entry	entry;
-+	__le64			v;
-+	struct bch_replicas_entry r;
-+} __attribute__((packed));
-+
-+/*
-+ * On disk format for a journal entry:
-+ * seq is monotonically increasing; every journal entry has its own unique
-+ * sequence number.
-+ *
-+ * last_seq is the oldest journal entry that still has keys the btree hasn't
-+ * flushed to disk yet.
-+ *
-+ * version is for on disk format changes.
-+ */
-+struct jset {
-+	struct bch_csum		csum;
-+
-+	__le64			magic;
-+	__le64			seq;
-+	__le32			version;
-+	__le32			flags;
-+
-+	__le32			u64s; /* size of d[] in u64s */
-+
-+	__u8			encrypted_start[0];
-+
-+	__le16			read_clock;
-+	__le16			write_clock;
-+
-+	/* Sequence number of oldest dirty journal entry */
-+	__le64			last_seq;
-+
-+
-+	union {
-+		struct jset_entry start[0];
-+		__u64		_data[0];
-+	};
-+} __attribute__((packed, aligned(8)));
-+
-+LE32_BITMASK(JSET_CSUM_TYPE,	struct jset, flags, 0, 4);
-+LE32_BITMASK(JSET_BIG_ENDIAN,	struct jset, flags, 4, 5);
-+
-+#define BCH_JOURNAL_BUCKETS_MIN		8
-+
-+/* Btree: */
-+
-+#define BCH_BTREE_IDS()					\
-+	x(EXTENTS,	0, "extents")			\
-+	x(INODES,	1, "inodes")			\
-+	x(DIRENTS,	2, "dirents")			\
-+	x(XATTRS,	3, "xattrs")			\
-+	x(ALLOC,	4, "alloc")			\
-+	x(QUOTAS,	5, "quotas")			\
-+	x(EC,		6, "stripes")			\
-+	x(REFLINK,	7, "reflink")
-+
-+enum btree_id {
-+#define x(kwd, val, name) BTREE_ID_##kwd = val,
-+	BCH_BTREE_IDS()
-+#undef x
-+	BTREE_ID_NR
-+};
-+
-+#define BTREE_MAX_DEPTH		4U
-+
-+/* Btree nodes */
-+
-+/*
-+ * Btree nodes
-+ *
-+ * On disk a btree node is a list/log of these; within each set the keys are
-+ * sorted
-+ */
-+struct bset {
-+	__le64			seq;
-+
-+	/*
-+	 * Highest journal entry this bset contains keys for.
-+	 * If on recovery we don't see that journal entry, this bset is ignored:
-+	 * this allows us to preserve the order of all index updates after a
-+	 * crash, since the journal records a total order of all index updates
-+	 * and anything that didn't make it to the journal doesn't get used.
-+	 */
-+	__le64			journal_seq;
-+
-+	__le32			flags;
-+	__le16			version;
-+	__le16			u64s; /* count of d[] in u64s */
-+
-+	union {
-+		struct bkey_packed start[0];
-+		__u64		_data[0];
-+	};
-+} __attribute__((packed, aligned(8)));
-+
-+LE32_BITMASK(BSET_CSUM_TYPE,	struct bset, flags, 0, 4);
-+
-+LE32_BITMASK(BSET_BIG_ENDIAN,	struct bset, flags, 4, 5);
-+LE32_BITMASK(BSET_SEPARATE_WHITEOUTS,
-+				struct bset, flags, 5, 6);
-+
-+struct btree_node {
-+	struct bch_csum		csum;
-+	__le64			magic;
-+
-+	/* this flags field is encrypted, unlike bset->flags: */
-+	__le64			flags;
-+
-+	/* Closed interval: */
-+	struct bpos		min_key;
-+	struct bpos		max_key;
-+	struct bch_extent_ptr	ptr;
-+	struct bkey_format	format;
-+
-+	union {
-+	struct bset		keys;
-+	struct {
-+		__u8		pad[22];
-+		__le16		u64s;
-+		__u64		_data[0];
-+
-+	};
-+	};
-+} __attribute__((packed, aligned(8)));
-+
-+LE64_BITMASK(BTREE_NODE_ID,	struct btree_node, flags,  0,  4);
-+LE64_BITMASK(BTREE_NODE_LEVEL,	struct btree_node, flags,  4,  8);
-+LE64_BITMASK(BTREE_NODE_NEW_EXTENT_OVERWRITE,
-+				struct btree_node, flags,  8,  9);
-+/* 9-32 unused */
-+LE64_BITMASK(BTREE_NODE_SEQ,	struct btree_node, flags, 32, 64);
-+
-+struct btree_node_entry {
-+	struct bch_csum		csum;
-+
-+	union {
-+	struct bset		keys;
-+	struct {
-+		__u8		pad[22];
-+		__le16		u64s;
-+		__u64		_data[0];
-+
-+	};
-+	};
-+} __attribute__((packed, aligned(8)));
-+
-+#endif /* _BCACHEFS_FORMAT_H */
-diff --git a/fs/bcachefs/bcachefs_ioctl.h b/fs/bcachefs/bcachefs_ioctl.h
-new file mode 100644
-index 000000000000..d71157a3e073
---- /dev/null
-+++ b/fs/bcachefs/bcachefs_ioctl.h
-@@ -0,0 +1,332 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_IOCTL_H
-+#define _BCACHEFS_IOCTL_H
-+
-+#include <linux/uuid.h>
-+#include <asm/ioctl.h>
-+#include "bcachefs_format.h"
-+
-+/*
-+ * Flags common to multiple ioctls:
-+ */
-+#define BCH_FORCE_IF_DATA_LOST		(1 << 0)
-+#define BCH_FORCE_IF_METADATA_LOST	(1 << 1)
-+#define BCH_FORCE_IF_DATA_DEGRADED	(1 << 2)
-+#define BCH_FORCE_IF_METADATA_DEGRADED	(1 << 3)
-+
-+#define BCH_FORCE_IF_DEGRADED			\
-+	(BCH_FORCE_IF_DATA_DEGRADED|		\
-+	 BCH_FORCE_IF_METADATA_DEGRADED)
-+
-+/*
-+ * If cleared, ioctl that refer to a device pass it as a pointer to a pathname
-+ * (e.g. /dev/sda1); if set, the dev field is the device's index within the
-+ * filesystem:
-+ */
-+#define BCH_BY_INDEX			(1 << 4)
-+
-+/*
-+ * For BCH_IOCTL_READ_SUPER: get superblock of a specific device, not filesystem
-+ * wide superblock:
-+ */
-+#define BCH_READ_DEV			(1 << 5)
-+
-+/* global control dev: */
-+
-+/* These are currently broken, and probably unnecessary: */
-+#if 0
-+#define BCH_IOCTL_ASSEMBLE	_IOW(0xbc, 1, struct bch_ioctl_assemble)
-+#define BCH_IOCTL_INCREMENTAL	_IOW(0xbc, 2, struct bch_ioctl_incremental)
-+
-+struct bch_ioctl_assemble {
-+	__u32			flags;
-+	__u32			nr_devs;
-+	__u64			pad;
-+	__u64			devs[];
-+};
-+
-+struct bch_ioctl_incremental {
-+	__u32			flags;
-+	__u64			pad;
-+	__u64			dev;
-+};
-+#endif
-+
-+/* filesystem ioctls: */
-+
-+#define BCH_IOCTL_QUERY_UUID	_IOR(0xbc,	1,  struct bch_ioctl_query_uuid)
-+
-+/* These only make sense when we also have incremental assembly */
-+#if 0
-+#define BCH_IOCTL_START		_IOW(0xbc,	2,  struct bch_ioctl_start)
-+#define BCH_IOCTL_STOP		_IO(0xbc,	3)
-+#endif
-+
-+#define BCH_IOCTL_DISK_ADD	_IOW(0xbc,	4,  struct bch_ioctl_disk)
-+#define BCH_IOCTL_DISK_REMOVE	_IOW(0xbc,	5,  struct bch_ioctl_disk)
-+#define BCH_IOCTL_DISK_ONLINE	_IOW(0xbc,	6,  struct bch_ioctl_disk)
-+#define BCH_IOCTL_DISK_OFFLINE	_IOW(0xbc,	7,  struct bch_ioctl_disk)
-+#define BCH_IOCTL_DISK_SET_STATE _IOW(0xbc,	8,  struct bch_ioctl_disk_set_state)
-+#define BCH_IOCTL_DATA		_IOW(0xbc,	10, struct bch_ioctl_data)
-+#define BCH_IOCTL_FS_USAGE	_IOWR(0xbc,	11, struct bch_ioctl_fs_usage)
-+#define BCH_IOCTL_DEV_USAGE	_IOWR(0xbc,	11, struct bch_ioctl_dev_usage)
-+#define BCH_IOCTL_READ_SUPER	_IOW(0xbc,	12, struct bch_ioctl_read_super)
-+#define BCH_IOCTL_DISK_GET_IDX	_IOW(0xbc,	13,  struct bch_ioctl_disk_get_idx)
-+#define BCH_IOCTL_DISK_RESIZE	_IOW(0xbc,	14,  struct bch_ioctl_disk_resize)
-+
-+/* ioctl below act on a particular file, not the filesystem as a whole: */
-+
-+#define BCHFS_IOC_REINHERIT_ATTRS	_IOR(0xbc, 64, const char __user *)
-+
-+/*
-+ * BCH_IOCTL_QUERY_UUID: get filesystem UUID
-+ *
-+ * Returns user visible UUID, not internal UUID (which may not ever be changed);
-+ * the filesystem's sysfs directory may be found under /sys/fs/bcachefs with
-+ * this UUID.
-+ */
-+struct bch_ioctl_query_uuid {
-+	uuid_le			uuid;
-+};
-+
-+#if 0
-+struct bch_ioctl_start {
-+	__u32			flags;
-+	__u32			pad;
-+};
-+#endif
-+
-+/*
-+ * BCH_IOCTL_DISK_ADD: add a new device to an existing filesystem
-+ *
-+ * The specified device must not be open or in use. On success, the new device
-+ * will be an online member of the filesystem just like any other member.
-+ *
-+ * The device must first be prepared by userspace by formatting with a bcachefs
-+ * superblock, which is only used for passing in superblock options/parameters
-+ * for that device (in struct bch_member). The new device's superblock should
-+ * not claim to be a member of any existing filesystem - UUIDs on it will be
-+ * ignored.
-+ */
-+
-+/*
-+ * BCH_IOCTL_DISK_REMOVE: permanently remove a member device from a filesystem
-+ *
-+ * Any data present on @dev will be permanently deleted, and @dev will be
-+ * removed from its slot in the filesystem's list of member devices. The device
-+ * may be either offline or offline.
-+ *
-+ * Will fail removing @dev would leave us with insufficient read write devices
-+ * or degraded/unavailable data, unless the approprate BCH_FORCE_IF_* flags are
-+ * set.
-+ */
-+
-+/*
-+ * BCH_IOCTL_DISK_ONLINE: given a disk that is already a member of a filesystem
-+ * but is not open (e.g. because we started in degraded mode), bring it online
-+ *
-+ * all existing data on @dev will be available once the device is online,
-+ * exactly as if @dev was present when the filesystem was first mounted
-+ */
-+
-+/*
-+ * BCH_IOCTL_DISK_OFFLINE: offline a disk, causing the kernel to close that
-+ * block device, without removing it from the filesystem (so it can be brought
-+ * back online later)
-+ *
-+ * Data present on @dev will be unavailable while @dev is offline (unless
-+ * replicated), but will still be intact and untouched if @dev is brought back
-+ * online
-+ *
-+ * Will fail (similarly to BCH_IOCTL_DISK_SET_STATE) if offlining @dev would
-+ * leave us with insufficient read write devices or degraded/unavailable data,
-+ * unless the approprate BCH_FORCE_IF_* flags are set.
-+ */
-+
-+struct bch_ioctl_disk {
-+	__u32			flags;
-+	__u32			pad;
-+	__u64			dev;
-+};
-+
-+/*
-+ * BCH_IOCTL_DISK_SET_STATE: modify state of a member device of a filesystem
-+ *
-+ * @new_state		- one of the bch_member_state states (rw, ro, failed,
-+ *			  spare)
-+ *
-+ * Will refuse to change member state if we would then have insufficient devices
-+ * to write to, or if it would result in degraded data (when @new_state is
-+ * failed or spare) unless the appropriate BCH_FORCE_IF_* flags are set.
-+ */
-+struct bch_ioctl_disk_set_state {
-+	__u32			flags;
-+	__u8			new_state;
-+	__u8			pad[3];
-+	__u64			dev;
-+};
-+
-+enum bch_data_ops {
-+	BCH_DATA_OP_SCRUB	= 0,
-+	BCH_DATA_OP_REREPLICATE	= 1,
-+	BCH_DATA_OP_MIGRATE	= 2,
-+	BCH_DATA_OP_NR		= 3,
-+};
-+
-+/*
-+ * BCH_IOCTL_DATA: operations that walk and manipulate filesystem data (e.g.
-+ * scrub, rereplicate, migrate).
-+ *
-+ * This ioctl kicks off a job in the background, and returns a file descriptor.
-+ * Reading from the file descriptor returns a struct bch_ioctl_data_event,
-+ * indicating current progress, and closing the file descriptor will stop the
-+ * job. The file descriptor is O_CLOEXEC.
-+ */
-+struct bch_ioctl_data {
-+	__u32			op;
-+	__u32			flags;
-+
-+	struct bpos		start;
-+	struct bpos		end;
-+
-+	union {
-+	struct {
-+		__u32		dev;
-+		__u32		pad;
-+	}			migrate;
-+	struct {
-+		__u64		pad[8];
-+	};
-+	};
-+} __attribute__((packed, aligned(8)));
-+
-+enum bch_data_event {
-+	BCH_DATA_EVENT_PROGRESS	= 0,
-+	/* XXX: add an event for reporting errors */
-+	BCH_DATA_EVENT_NR	= 1,
-+};
-+
-+struct bch_ioctl_data_progress {
-+	__u8			data_type;
-+	__u8			btree_id;
-+	__u8			pad[2];
-+	struct bpos		pos;
-+
-+	__u64			sectors_done;
-+	__u64			sectors_total;
-+} __attribute__((packed, aligned(8)));
-+
-+struct bch_ioctl_data_event {
-+	__u8			type;
-+	__u8			pad[7];
-+	union {
-+	struct bch_ioctl_data_progress p;
-+	__u64			pad2[15];
-+	};
-+} __attribute__((packed, aligned(8)));
-+
-+struct bch_replicas_usage {
-+	__u64			sectors;
-+	struct bch_replicas_entry r;
-+} __attribute__((packed));
-+
-+static inline struct bch_replicas_usage *
-+replicas_usage_next(struct bch_replicas_usage *u)
-+{
-+	return (void *) u + replicas_entry_bytes(&u->r) + 8;
-+}
-+
-+/*
-+ * BCH_IOCTL_FS_USAGE: query filesystem disk space usage
-+ *
-+ * Returns disk space usage broken out by data type, number of replicas, and
-+ * by component device
-+ *
-+ * @replica_entries_bytes - size, in bytes, allocated for replica usage entries
-+ *
-+ * On success, @replica_entries_bytes will be changed to indicate the number of
-+ * bytes actually used.
-+ *
-+ * Returns -ERANGE if @replica_entries_bytes was too small
-+ */
-+struct bch_ioctl_fs_usage {
-+	__u64			capacity;
-+	__u64			used;
-+	__u64			online_reserved;
-+	__u64			persistent_reserved[BCH_REPLICAS_MAX];
-+
-+	__u32			replica_entries_bytes;
-+	__u32			pad;
-+
-+	struct bch_replicas_usage replicas[0];
-+};
-+
-+/*
-+ * BCH_IOCTL_DEV_USAGE: query device disk space usage
-+ *
-+ * Returns disk space usage broken out by data type - both by buckets and
-+ * sectors.
-+ */
-+struct bch_ioctl_dev_usage {
-+	__u64			dev;
-+	__u32			flags;
-+	__u8			state;
-+	__u8			pad[7];
-+
-+	__u32			bucket_size;
-+	__u64			nr_buckets;
-+	__u64			available_buckets;
-+
-+	__u64			buckets[BCH_DATA_NR];
-+	__u64			sectors[BCH_DATA_NR];
-+
-+	__u64			ec_buckets;
-+	__u64			ec_sectors;
-+};
-+
-+/*
-+ * BCH_IOCTL_READ_SUPER: read filesystem superblock
-+ *
-+ * Equivalent to reading the superblock directly from the block device, except
-+ * avoids racing with the kernel writing the superblock or having to figure out
-+ * which block device to read
-+ *
-+ * @sb		- buffer to read into
-+ * @size	- size of userspace allocated buffer
-+ * @dev		- device to read superblock for, if BCH_READ_DEV flag is
-+ *		  specified
-+ *
-+ * Returns -ERANGE if buffer provided is too small
-+ */
-+struct bch_ioctl_read_super {
-+	__u32			flags;
-+	__u32			pad;
-+	__u64			dev;
-+	__u64			size;
-+	__u64			sb;
-+};
-+
-+/*
-+ * BCH_IOCTL_DISK_GET_IDX: give a path to a block device, query filesystem to
-+ * determine if disk is a (online) member - if so, returns device's index
-+ *
-+ * Returns -ENOENT if not found
-+ */
-+struct bch_ioctl_disk_get_idx {
-+	__u64			dev;
-+};
-+
-+/*
-+ * BCH_IOCTL_DISK_RESIZE: resize filesystem on a device
-+ *
-+ * @dev		- member to resize
-+ * @nbuckets	- new number of buckets
-+ */
-+struct bch_ioctl_disk_resize {
-+	__u32			flags;
-+	__u32			pad;
-+	__u64			dev;
-+	__u64			nbuckets;
-+};
-+
-+#endif /* _BCACHEFS_IOCTL_H */
-diff --git a/fs/bcachefs/bkey.c b/fs/bcachefs/bkey.c
-new file mode 100644
-index 000000000000..4d0c9129cd4a
---- /dev/null
-+++ b/fs/bcachefs/bkey.c
-@@ -0,0 +1,1154 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "bkey.h"
-+#include "bkey_methods.h"
-+#include "bset.h"
-+#include "util.h"
-+
-+#undef EBUG_ON
-+
-+#ifdef DEBUG_BKEYS
-+#define EBUG_ON(cond)		BUG_ON(cond)
-+#else
-+#define EBUG_ON(cond)
-+#endif
-+
-+const struct bkey_format bch2_bkey_format_current = BKEY_FORMAT_CURRENT;
-+
-+struct bkey __bch2_bkey_unpack_key(const struct bkey_format *,
-+			      const struct bkey_packed *);
-+
-+void bch2_to_binary(char *out, const u64 *p, unsigned nr_bits)
-+{
-+	unsigned bit = high_bit_offset, done = 0;
-+
-+	while (1) {
-+		while (bit < 64) {
-+			if (done && !(done % 8))
-+				*out++ = ' ';
-+			*out++ = *p & (1ULL << (63 - bit)) ? '1' : '0';
-+			bit++;
-+			done++;
-+			if (done == nr_bits) {
-+				*out++ = '\0';
-+				return;
-+			}
-+		}
-+
-+		p = next_word(p);
-+		bit = 0;
-+	}
-+}
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+
-+static void bch2_bkey_pack_verify(const struct bkey_packed *packed,
-+				 const struct bkey *unpacked,
-+				 const struct bkey_format *format)
-+{
-+	struct bkey tmp;
-+
-+	BUG_ON(bkeyp_val_u64s(format, packed) !=
-+	       bkey_val_u64s(unpacked));
-+
-+	BUG_ON(packed->u64s < bkeyp_key_u64s(format, packed));
-+
-+	tmp = __bch2_bkey_unpack_key(format, packed);
-+
-+	if (memcmp(&tmp, unpacked, sizeof(struct bkey))) {
-+		char buf1[160], buf2[160];
-+		char buf3[160], buf4[160];
-+
-+		bch2_bkey_to_text(&PBUF(buf1), unpacked);
-+		bch2_bkey_to_text(&PBUF(buf2), &tmp);
-+		bch2_to_binary(buf3, (void *) unpacked, 80);
-+		bch2_to_binary(buf4, high_word(format, packed), 80);
-+
-+		panic("keys differ: format u64s %u fields %u %u %u %u %u\n%s\n%s\n%s\n%s\n",
-+		      format->key_u64s,
-+		      format->bits_per_field[0],
-+		      format->bits_per_field[1],
-+		      format->bits_per_field[2],
-+		      format->bits_per_field[3],
-+		      format->bits_per_field[4],
-+		      buf1, buf2, buf3, buf4);
-+	}
-+}
-+
-+#else
-+static inline void bch2_bkey_pack_verify(const struct bkey_packed *packed,
-+					const struct bkey *unpacked,
-+					const struct bkey_format *format) {}
-+#endif
-+
-+struct pack_state {
-+	const struct bkey_format *format;
-+	unsigned		bits;	/* bits remaining in current word */
-+	u64			w;	/* current word */
-+	u64			*p;	/* pointer to next word */
-+};
-+
-+__always_inline
-+static struct pack_state pack_state_init(const struct bkey_format *format,
-+					 struct bkey_packed *k)
-+{
-+	u64 *p = high_word(format, k);
-+
-+	return (struct pack_state) {
-+		.format	= format,
-+		.bits	= 64 - high_bit_offset,
-+		.w	= 0,
-+		.p	= p,
-+	};
-+}
-+
-+__always_inline
-+static void pack_state_finish(struct pack_state *state,
-+			      struct bkey_packed *k)
-+{
-+	EBUG_ON(state->p <  k->_data);
-+	EBUG_ON(state->p >= k->_data + state->format->key_u64s);
-+
-+	*state->p = state->w;
-+}
-+
-+struct unpack_state {
-+	const struct bkey_format *format;
-+	unsigned		bits;	/* bits remaining in current word */
-+	u64			w;	/* current word */
-+	const u64		*p;	/* pointer to next word */
-+};
-+
-+__always_inline
-+static struct unpack_state unpack_state_init(const struct bkey_format *format,
-+					     const struct bkey_packed *k)
-+{
-+	const u64 *p = high_word(format, k);
-+
-+	return (struct unpack_state) {
-+		.format	= format,
-+		.bits	= 64 - high_bit_offset,
-+		.w	= *p << high_bit_offset,
-+		.p	= p,
-+	};
-+}
-+
-+__always_inline
-+static u64 get_inc_field(struct unpack_state *state, unsigned field)
-+{
-+	unsigned bits = state->format->bits_per_field[field];
-+	u64 v = 0, offset = le64_to_cpu(state->format->field_offset[field]);
-+
-+	if (bits >= state->bits) {
-+		v = state->w >> (64 - bits);
-+		bits -= state->bits;
-+
-+		state->p = next_word(state->p);
-+		state->w = *state->p;
-+		state->bits = 64;
-+	}
-+
-+	/* avoid shift by 64 if bits is 0 - bits is never 64 here: */
-+	v |= (state->w >> 1) >> (63 - bits);
-+	state->w <<= bits;
-+	state->bits -= bits;
-+
-+	return v + offset;
-+}
-+
-+__always_inline
-+static bool set_inc_field(struct pack_state *state, unsigned field, u64 v)
-+{
-+	unsigned bits = state->format->bits_per_field[field];
-+	u64 offset = le64_to_cpu(state->format->field_offset[field]);
-+
-+	if (v < offset)
-+		return false;
-+
-+	v -= offset;
-+
-+	if (fls64(v) > bits)
-+		return false;
-+
-+	if (bits > state->bits) {
-+		bits -= state->bits;
-+		/* avoid shift by 64 if bits is 0 - bits is never 64 here: */
-+		state->w |= (v >> 1) >> (bits - 1);
-+
-+		*state->p = state->w;
-+		state->p = next_word(state->p);
-+		state->w = 0;
-+		state->bits = 64;
-+	}
-+
-+	state->bits -= bits;
-+	state->w |= v << state->bits;
-+
-+	return true;
-+}
-+
-+/*
-+ * Note: does NOT set out->format (we don't know what it should be here!)
-+ *
-+ * Also: doesn't work on extents - it doesn't preserve the invariant that
-+ * if k is packed bkey_start_pos(k) will successfully pack
-+ */
-+static bool bch2_bkey_transform_key(const struct bkey_format *out_f,
-+				   struct bkey_packed *out,
-+				   const struct bkey_format *in_f,
-+				   const struct bkey_packed *in)
-+{
-+	struct pack_state out_s = pack_state_init(out_f, out);
-+	struct unpack_state in_s = unpack_state_init(in_f, in);
-+	unsigned i;
-+
-+	out->_data[0] = 0;
-+
-+	for (i = 0; i < BKEY_NR_FIELDS; i++)
-+		if (!set_inc_field(&out_s, i, get_inc_field(&in_s, i)))
-+			return false;
-+
-+	/* Can't happen because the val would be too big to unpack: */
-+	EBUG_ON(in->u64s - in_f->key_u64s + out_f->key_u64s > U8_MAX);
-+
-+	pack_state_finish(&out_s, out);
-+	out->u64s	= out_f->key_u64s + in->u64s - in_f->key_u64s;
-+	out->needs_whiteout = in->needs_whiteout;
-+	out->type	= in->type;
-+
-+	return true;
-+}
-+
-+bool bch2_bkey_transform(const struct bkey_format *out_f,
-+			struct bkey_packed *out,
-+			const struct bkey_format *in_f,
-+			const struct bkey_packed *in)
-+{
-+	if (!bch2_bkey_transform_key(out_f, out, in_f, in))
-+		return false;
-+
-+	memcpy_u64s((u64 *) out + out_f->key_u64s,
-+		    (u64 *) in + in_f->key_u64s,
-+		    (in->u64s - in_f->key_u64s));
-+	return true;
-+}
-+
-+#define bkey_fields()							\
-+	x(BKEY_FIELD_INODE,		p.inode)			\
-+	x(BKEY_FIELD_OFFSET,		p.offset)			\
-+	x(BKEY_FIELD_SNAPSHOT,		p.snapshot)			\
-+	x(BKEY_FIELD_SIZE,		size)				\
-+	x(BKEY_FIELD_VERSION_HI,	version.hi)			\
-+	x(BKEY_FIELD_VERSION_LO,	version.lo)
-+
-+struct bkey __bch2_bkey_unpack_key(const struct bkey_format *format,
-+			      const struct bkey_packed *in)
-+{
-+	struct unpack_state state = unpack_state_init(format, in);
-+	struct bkey out;
-+
-+	EBUG_ON(format->nr_fields != BKEY_NR_FIELDS);
-+	EBUG_ON(in->u64s < format->key_u64s);
-+	EBUG_ON(in->format != KEY_FORMAT_LOCAL_BTREE);
-+	EBUG_ON(in->u64s - format->key_u64s + BKEY_U64s > U8_MAX);
-+
-+	out.u64s	= BKEY_U64s + in->u64s - format->key_u64s;
-+	out.format	= KEY_FORMAT_CURRENT;
-+	out.needs_whiteout = in->needs_whiteout;
-+	out.type	= in->type;
-+	out.pad[0]	= 0;
-+
-+#define x(id, field)	out.field = get_inc_field(&state, id);
-+	bkey_fields()
-+#undef x
-+
-+	return out;
-+}
-+
-+#ifndef HAVE_BCACHEFS_COMPILED_UNPACK
-+struct bpos __bkey_unpack_pos(const struct bkey_format *format,
-+				     const struct bkey_packed *in)
-+{
-+	struct unpack_state state = unpack_state_init(format, in);
-+	struct bpos out;
-+
-+	EBUG_ON(format->nr_fields != BKEY_NR_FIELDS);
-+	EBUG_ON(in->u64s < format->key_u64s);
-+	EBUG_ON(in->format != KEY_FORMAT_LOCAL_BTREE);
-+
-+	out.inode	= get_inc_field(&state, BKEY_FIELD_INODE);
-+	out.offset	= get_inc_field(&state, BKEY_FIELD_OFFSET);
-+	out.snapshot	= get_inc_field(&state, BKEY_FIELD_SNAPSHOT);
-+
-+	return out;
-+}
-+#endif
-+
-+/**
-+ * bch2_bkey_pack_key -- pack just the key, not the value
-+ */
-+bool bch2_bkey_pack_key(struct bkey_packed *out, const struct bkey *in,
-+		   const struct bkey_format *format)
-+{
-+	struct pack_state state = pack_state_init(format, out);
-+
-+	EBUG_ON((void *) in == (void *) out);
-+	EBUG_ON(format->nr_fields != BKEY_NR_FIELDS);
-+	EBUG_ON(in->format != KEY_FORMAT_CURRENT);
-+
-+	out->_data[0] = 0;
-+
-+#define x(id, field)	if (!set_inc_field(&state, id, in->field)) return false;
-+	bkey_fields()
-+#undef x
-+
-+	/*
-+	 * Extents - we have to guarantee that if an extent is packed, a trimmed
-+	 * version will also pack:
-+	 */
-+	if (bkey_start_offset(in) <
-+	    le64_to_cpu(format->field_offset[BKEY_FIELD_OFFSET]))
-+		return false;
-+
-+	pack_state_finish(&state, out);
-+	out->u64s	= format->key_u64s + in->u64s - BKEY_U64s;
-+	out->format	= KEY_FORMAT_LOCAL_BTREE;
-+	out->needs_whiteout = in->needs_whiteout;
-+	out->type	= in->type;
-+
-+	bch2_bkey_pack_verify(out, in, format);
-+	return true;
-+}
-+
-+/**
-+ * bch2_bkey_unpack -- unpack the key and the value
-+ */
-+void bch2_bkey_unpack(const struct btree *b, struct bkey_i *dst,
-+		 const struct bkey_packed *src)
-+{
-+	__bkey_unpack_key(b, &dst->k, src);
-+
-+	memcpy_u64s(&dst->v,
-+		    bkeyp_val(&b->format, src),
-+		    bkeyp_val_u64s(&b->format, src));
-+}
-+
-+/**
-+ * bch2_bkey_pack -- pack the key and the value
-+ */
-+bool bch2_bkey_pack(struct bkey_packed *out, const struct bkey_i *in,
-+	       const struct bkey_format *format)
-+{
-+	struct bkey_packed tmp;
-+
-+	if (!bch2_bkey_pack_key(&tmp, &in->k, format))
-+		return false;
-+
-+	memmove_u64s((u64 *) out + format->key_u64s,
-+		     &in->v,
-+		     bkey_val_u64s(&in->k));
-+	memcpy_u64s(out, &tmp, format->key_u64s);
-+
-+	return true;
-+}
-+
-+__always_inline
-+static bool set_inc_field_lossy(struct pack_state *state, unsigned field, u64 v)
-+{
-+	unsigned bits = state->format->bits_per_field[field];
-+	u64 offset = le64_to_cpu(state->format->field_offset[field]);
-+	bool ret = true;
-+
-+	EBUG_ON(v < offset);
-+	v -= offset;
-+
-+	if (fls64(v) > bits) {
-+		v = ~(~0ULL << bits);
-+		ret = false;
-+	}
-+
-+	if (bits > state->bits) {
-+		bits -= state->bits;
-+		state->w |= (v >> 1) >> (bits - 1);
-+
-+		*state->p = state->w;
-+		state->p = next_word(state->p);
-+		state->w = 0;
-+		state->bits = 64;
-+	}
-+
-+	state->bits -= bits;
-+	state->w |= v << state->bits;
-+
-+	return ret;
-+}
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+static bool bkey_packed_successor(struct bkey_packed *out,
-+				  const struct btree *b,
-+				  struct bkey_packed k)
-+{
-+	const struct bkey_format *f = &b->format;
-+	unsigned nr_key_bits = b->nr_key_bits;
-+	unsigned first_bit, offset;
-+	u64 *p;
-+
-+	EBUG_ON(b->nr_key_bits != bkey_format_key_bits(f));
-+
-+	if (!nr_key_bits)
-+		return false;
-+
-+	*out = k;
-+
-+	first_bit = high_bit_offset + nr_key_bits - 1;
-+	p = nth_word(high_word(f, out), first_bit >> 6);
-+	offset = 63 - (first_bit & 63);
-+
-+	while (nr_key_bits) {
-+		unsigned bits = min(64 - offset, nr_key_bits);
-+		u64 mask = (~0ULL >> (64 - bits)) << offset;
-+
-+		if ((*p & mask) != mask) {
-+			*p += 1ULL << offset;
-+			EBUG_ON(bkey_cmp_packed(b, out, &k) <= 0);
-+			return true;
-+		}
-+
-+		*p &= ~mask;
-+		p = prev_word(p);
-+		nr_key_bits -= bits;
-+		offset = 0;
-+	}
-+
-+	return false;
-+}
-+#endif
-+
-+/*
-+ * Returns a packed key that compares <= in
-+ *
-+ * This is used in bset_search_tree(), where we need a packed pos in order to be
-+ * able to compare against the keys in the auxiliary search tree - and it's
-+ * legal to use a packed pos that isn't equivalent to the original pos,
-+ * _provided_ it compares <= to the original pos.
-+ */
-+enum bkey_pack_pos_ret bch2_bkey_pack_pos_lossy(struct bkey_packed *out,
-+					   struct bpos in,
-+					   const struct btree *b)
-+{
-+	const struct bkey_format *f = &b->format;
-+	struct pack_state state = pack_state_init(f, out);
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	struct bpos orig = in;
-+#endif
-+	bool exact = true;
-+
-+	out->_data[0] = 0;
-+
-+	if (unlikely(in.snapshot <
-+		     le64_to_cpu(f->field_offset[BKEY_FIELD_SNAPSHOT]))) {
-+		if (!in.offset-- &&
-+		    !in.inode--)
-+			return BKEY_PACK_POS_FAIL;
-+		in.snapshot	= KEY_SNAPSHOT_MAX;
-+		exact = false;
-+	}
-+
-+	if (unlikely(in.offset <
-+		     le64_to_cpu(f->field_offset[BKEY_FIELD_OFFSET]))) {
-+		if (!in.inode--)
-+			return BKEY_PACK_POS_FAIL;
-+		in.offset	= KEY_OFFSET_MAX;
-+		in.snapshot	= KEY_SNAPSHOT_MAX;
-+		exact = false;
-+	}
-+
-+	if (unlikely(in.inode <
-+		     le64_to_cpu(f->field_offset[BKEY_FIELD_INODE])))
-+		return BKEY_PACK_POS_FAIL;
-+
-+	if (!set_inc_field_lossy(&state, BKEY_FIELD_INODE, in.inode)) {
-+		in.offset	= KEY_OFFSET_MAX;
-+		in.snapshot	= KEY_SNAPSHOT_MAX;
-+		exact = false;
-+	}
-+
-+	if (!set_inc_field_lossy(&state, BKEY_FIELD_OFFSET, in.offset)) {
-+		in.snapshot	= KEY_SNAPSHOT_MAX;
-+		exact = false;
-+	}
-+
-+	if (!set_inc_field_lossy(&state, BKEY_FIELD_SNAPSHOT, in.snapshot))
-+		exact = false;
-+
-+	pack_state_finish(&state, out);
-+	out->u64s	= f->key_u64s;
-+	out->format	= KEY_FORMAT_LOCAL_BTREE;
-+	out->type	= KEY_TYPE_deleted;
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	if (exact) {
-+		BUG_ON(bkey_cmp_left_packed(b, out, &orig));
-+	} else {
-+		struct bkey_packed successor;
-+
-+		BUG_ON(bkey_cmp_left_packed(b, out, &orig) >= 0);
-+		BUG_ON(bkey_packed_successor(&successor, b, *out) &&
-+		       bkey_cmp_left_packed(b, &successor, &orig) < 0);
-+	}
-+#endif
-+
-+	return exact ? BKEY_PACK_POS_EXACT : BKEY_PACK_POS_SMALLER;
-+}
-+
-+void bch2_bkey_format_init(struct bkey_format_state *s)
-+{
-+	unsigned i;
-+
-+	for (i = 0; i < ARRAY_SIZE(s->field_min); i++)
-+		s->field_min[i] = U64_MAX;
-+
-+	for (i = 0; i < ARRAY_SIZE(s->field_max); i++)
-+		s->field_max[i] = 0;
-+
-+	/* Make sure we can store a size of 0: */
-+	s->field_min[BKEY_FIELD_SIZE] = 0;
-+}
-+
-+static void __bkey_format_add(struct bkey_format_state *s,
-+			      unsigned field, u64 v)
-+{
-+	s->field_min[field] = min(s->field_min[field], v);
-+	s->field_max[field] = max(s->field_max[field], v);
-+}
-+
-+/*
-+ * Changes @format so that @k can be successfully packed with @format
-+ */
-+void bch2_bkey_format_add_key(struct bkey_format_state *s, const struct bkey *k)
-+{
-+#define x(id, field) __bkey_format_add(s, id, k->field);
-+	bkey_fields()
-+#undef x
-+	__bkey_format_add(s, BKEY_FIELD_OFFSET, bkey_start_offset(k));
-+}
-+
-+void bch2_bkey_format_add_pos(struct bkey_format_state *s, struct bpos p)
-+{
-+	unsigned field = 0;
-+
-+	__bkey_format_add(s, field++, p.inode);
-+	__bkey_format_add(s, field++, p.offset);
-+	__bkey_format_add(s, field++, p.snapshot);
-+}
-+
-+/*
-+ * We don't want it to be possible for the packed format to represent fields
-+ * bigger than a u64... that will cause confusion and issues (like with
-+ * bkey_packed_successor())
-+ */
-+static void set_format_field(struct bkey_format *f, enum bch_bkey_fields i,
-+			     unsigned bits, u64 offset)
-+{
-+	offset = bits == 64 ? 0 : min(offset, U64_MAX - ((1ULL << bits) - 1));
-+
-+	f->bits_per_field[i]	= bits;
-+	f->field_offset[i]	= cpu_to_le64(offset);
-+}
-+
-+struct bkey_format bch2_bkey_format_done(struct bkey_format_state *s)
-+{
-+	unsigned i, bits = KEY_PACKED_BITS_START;
-+	struct bkey_format ret = {
-+		.nr_fields = BKEY_NR_FIELDS,
-+	};
-+
-+	for (i = 0; i < ARRAY_SIZE(s->field_min); i++) {
-+		s->field_min[i] = min(s->field_min[i], s->field_max[i]);
-+
-+		set_format_field(&ret, i,
-+				 fls64(s->field_max[i] - s->field_min[i]),
-+				 s->field_min[i]);
-+
-+		bits += ret.bits_per_field[i];
-+	}
-+
-+	/* allow for extent merging: */
-+	if (ret.bits_per_field[BKEY_FIELD_SIZE]) {
-+		ret.bits_per_field[BKEY_FIELD_SIZE] += 4;
-+		bits += 4;
-+	}
-+
-+	ret.key_u64s = DIV_ROUND_UP(bits, 64);
-+
-+	/* if we have enough spare bits, round fields up to nearest byte */
-+	bits = ret.key_u64s * 64 - bits;
-+
-+	for (i = 0; i < ARRAY_SIZE(ret.bits_per_field); i++) {
-+		unsigned r = round_up(ret.bits_per_field[i], 8) -
-+			ret.bits_per_field[i];
-+
-+		if (r <= bits) {
-+			set_format_field(&ret, i,
-+					 ret.bits_per_field[i] + r,
-+					 le64_to_cpu(ret.field_offset[i]));
-+			bits -= r;
-+		}
-+	}
-+
-+	EBUG_ON(bch2_bkey_format_validate(&ret));
-+	return ret;
-+}
-+
-+const char *bch2_bkey_format_validate(struct bkey_format *f)
-+{
-+	unsigned i, bits = KEY_PACKED_BITS_START;
-+
-+	if (f->nr_fields != BKEY_NR_FIELDS)
-+		return "incorrect number of fields";
-+
-+	for (i = 0; i < f->nr_fields; i++) {
-+		u64 field_offset = le64_to_cpu(f->field_offset[i]);
-+
-+		if (f->bits_per_field[i] > 64)
-+			return "field too large";
-+
-+		if (field_offset &&
-+		    (f->bits_per_field[i] == 64 ||
-+		    (field_offset + ((1ULL << f->bits_per_field[i]) - 1) <
-+		     field_offset)))
-+			return "offset + bits overflow";
-+
-+		bits += f->bits_per_field[i];
-+	}
-+
-+	if (f->key_u64s != DIV_ROUND_UP(bits, 64))
-+		return "incorrect key_u64s";
-+
-+	return NULL;
-+}
-+
-+/*
-+ * Most significant differing bit
-+ * Bits are indexed from 0 - return is [0, nr_key_bits)
-+ */
-+__pure
-+unsigned bch2_bkey_greatest_differing_bit(const struct btree *b,
-+					  const struct bkey_packed *l_k,
-+					  const struct bkey_packed *r_k)
-+{
-+	const u64 *l = high_word(&b->format, l_k);
-+	const u64 *r = high_word(&b->format, r_k);
-+	unsigned nr_key_bits = b->nr_key_bits;
-+	unsigned word_bits = 64 - high_bit_offset;
-+	u64 l_v, r_v;
-+
-+	EBUG_ON(b->nr_key_bits != bkey_format_key_bits(&b->format));
-+
-+	/* for big endian, skip past header */
-+	l_v = *l & (~0ULL >> high_bit_offset);
-+	r_v = *r & (~0ULL >> high_bit_offset);
-+
-+	while (nr_key_bits) {
-+		if (nr_key_bits < word_bits) {
-+			l_v >>= word_bits - nr_key_bits;
-+			r_v >>= word_bits - nr_key_bits;
-+			nr_key_bits = 0;
-+		} else {
-+			nr_key_bits -= word_bits;
-+		}
-+
-+		if (l_v != r_v)
-+			return fls64(l_v ^ r_v) - 1 + nr_key_bits;
-+
-+		l = next_word(l);
-+		r = next_word(r);
-+
-+		l_v = *l;
-+		r_v = *r;
-+		word_bits = 64;
-+	}
-+
-+	return 0;
-+}
-+
-+/*
-+ * First set bit
-+ * Bits are indexed from 0 - return is [0, nr_key_bits)
-+ */
-+__pure
-+unsigned bch2_bkey_ffs(const struct btree *b, const struct bkey_packed *k)
-+{
-+	const u64 *p = high_word(&b->format, k);
-+	unsigned nr_key_bits = b->nr_key_bits;
-+	unsigned ret = 0, offset;
-+
-+	EBUG_ON(b->nr_key_bits != bkey_format_key_bits(&b->format));
-+
-+	offset = nr_key_bits;
-+	while (offset > 64) {
-+		p = next_word(p);
-+		offset -= 64;
-+	}
-+
-+	offset = 64 - offset;
-+
-+	while (nr_key_bits) {
-+		unsigned bits = nr_key_bits + offset < 64
-+			? nr_key_bits
-+			: 64 - offset;
-+
-+		u64 mask = (~0ULL >> (64 - bits)) << offset;
-+
-+		if (*p & mask)
-+			return ret + __ffs64(*p & mask) - offset;
-+
-+		p = prev_word(p);
-+		nr_key_bits -= bits;
-+		ret += bits;
-+		offset = 0;
-+	}
-+
-+	return 0;
-+}
-+
-+#ifdef CONFIG_X86_64
-+
-+static inline int __bkey_cmp_bits(const u64 *l, const u64 *r,
-+				  unsigned nr_key_bits)
-+{
-+	long d0, d1, d2, d3;
-+	int cmp;
-+
-+	/* we shouldn't need asm for this, but gcc is being retarded: */
-+
-+	asm(".intel_syntax noprefix;"
-+	    "xor eax, eax;"
-+	    "xor edx, edx;"
-+	    "1:;"
-+	    "mov r8, [rdi];"
-+	    "mov r9, [rsi];"
-+	    "sub ecx, 64;"
-+	    "jl 2f;"
-+
-+	    "cmp r8, r9;"
-+	    "jnz 3f;"
-+
-+	    "lea rdi, [rdi - 8];"
-+	    "lea rsi, [rsi - 8];"
-+	    "jmp 1b;"
-+
-+	    "2:;"
-+	    "not ecx;"
-+	    "shr r8, 1;"
-+	    "shr r9, 1;"
-+	    "shr r8, cl;"
-+	    "shr r9, cl;"
-+	    "cmp r8, r9;"
-+
-+	    "3:\n"
-+	    "seta al;"
-+	    "setb dl;"
-+	    "sub eax, edx;"
-+	    ".att_syntax prefix;"
-+	    : "=&D" (d0), "=&S" (d1), "=&d" (d2), "=&c" (d3), "=&a" (cmp)
-+	    : "0" (l), "1" (r), "3" (nr_key_bits)
-+	    : "r8", "r9", "cc", "memory");
-+
-+	return cmp;
-+}
-+
-+#define I(_x)			(*(out)++ = (_x))
-+#define I1(i0)						I(i0)
-+#define I2(i0, i1)		(I1(i0),		I(i1))
-+#define I3(i0, i1, i2)		(I2(i0, i1),		I(i2))
-+#define I4(i0, i1, i2, i3)	(I3(i0, i1, i2),	I(i3))
-+#define I5(i0, i1, i2, i3, i4)	(I4(i0, i1, i2, i3),	I(i4))
-+
-+static u8 *compile_bkey_field(const struct bkey_format *format, u8 *out,
-+			      enum bch_bkey_fields field,
-+			      unsigned dst_offset, unsigned dst_size,
-+			      bool *eax_zeroed)
-+{
-+	unsigned bits = format->bits_per_field[field];
-+	u64 offset = le64_to_cpu(format->field_offset[field]);
-+	unsigned i, byte, bit_offset, align, shl, shr;
-+
-+	if (!bits && !offset) {
-+		if (!*eax_zeroed) {
-+			/* xor eax, eax */
-+			I2(0x31, 0xc0);
-+		}
-+
-+		*eax_zeroed = true;
-+		goto set_field;
-+	}
-+
-+	if (!bits) {
-+		/* just return offset: */
-+
-+		switch (dst_size) {
-+		case 8:
-+			if (offset > S32_MAX) {
-+				/* mov [rdi + dst_offset], offset */
-+				I3(0xc7, 0x47, dst_offset);
-+				memcpy(out, &offset, 4);
-+				out += 4;
-+
-+				I3(0xc7, 0x47, dst_offset + 4);
-+				memcpy(out, (void *) &offset + 4, 4);
-+				out += 4;
-+			} else {
-+				/* mov [rdi + dst_offset], offset */
-+				/* sign extended */
-+				I4(0x48, 0xc7, 0x47, dst_offset);
-+				memcpy(out, &offset, 4);
-+				out += 4;
-+			}
-+			break;
-+		case 4:
-+			/* mov [rdi + dst_offset], offset */
-+			I3(0xc7, 0x47, dst_offset);
-+			memcpy(out, &offset, 4);
-+			out += 4;
-+			break;
-+		default:
-+			BUG();
-+		}
-+
-+		return out;
-+	}
-+
-+	bit_offset = format->key_u64s * 64;
-+	for (i = 0; i <= field; i++)
-+		bit_offset -= format->bits_per_field[i];
-+
-+	byte = bit_offset / 8;
-+	bit_offset -= byte * 8;
-+
-+	*eax_zeroed = false;
-+
-+	if (bit_offset == 0 && bits == 8) {
-+		/* movzx eax, BYTE PTR [rsi + imm8] */
-+		I4(0x0f, 0xb6, 0x46, byte);
-+	} else if (bit_offset == 0 && bits == 16) {
-+		/* movzx eax, WORD PTR [rsi + imm8] */
-+		I4(0x0f, 0xb7, 0x46, byte);
-+	} else if (bit_offset + bits <= 32) {
-+		align = min(4 - DIV_ROUND_UP(bit_offset + bits, 8), byte & 3);
-+		byte -= align;
-+		bit_offset += align * 8;
-+
-+		BUG_ON(bit_offset + bits > 32);
-+
-+		/* mov eax, [rsi + imm8] */
-+		I3(0x8b, 0x46, byte);
-+
-+		if (bit_offset) {
-+			/* shr eax, imm8 */
-+			I3(0xc1, 0xe8, bit_offset);
-+		}
-+
-+		if (bit_offset + bits < 32) {
-+			unsigned mask = ~0U >> (32 - bits);
-+
-+			/* and eax, imm32 */
-+			I1(0x25);
-+			memcpy(out, &mask, 4);
-+			out += 4;
-+		}
-+	} else if (bit_offset + bits <= 64) {
-+		align = min(8 - DIV_ROUND_UP(bit_offset + bits, 8), byte & 7);
-+		byte -= align;
-+		bit_offset += align * 8;
-+
-+		BUG_ON(bit_offset + bits > 64);
-+
-+		/* mov rax, [rsi + imm8] */
-+		I4(0x48, 0x8b, 0x46, byte);
-+
-+		shl = 64 - bit_offset - bits;
-+		shr = bit_offset + shl;
-+
-+		if (shl) {
-+			/* shl rax, imm8 */
-+			I4(0x48, 0xc1, 0xe0, shl);
-+		}
-+
-+		if (shr) {
-+			/* shr rax, imm8 */
-+			I4(0x48, 0xc1, 0xe8, shr);
-+		}
-+	} else {
-+		align = min(4 - DIV_ROUND_UP(bit_offset + bits, 8), byte & 3);
-+		byte -= align;
-+		bit_offset += align * 8;
-+
-+		BUG_ON(bit_offset + bits > 96);
-+
-+		/* mov rax, [rsi + byte] */
-+		I4(0x48, 0x8b, 0x46, byte);
-+
-+		/* mov edx, [rsi + byte + 8] */
-+		I3(0x8b, 0x56, byte + 8);
-+
-+		/* bits from next word: */
-+		shr = bit_offset + bits - 64;
-+		BUG_ON(shr > bit_offset);
-+
-+		/* shr rax, bit_offset */
-+		I4(0x48, 0xc1, 0xe8, shr);
-+
-+		/* shl rdx, imm8 */
-+		I4(0x48, 0xc1, 0xe2, 64 - shr);
-+
-+		/* or rax, rdx */
-+		I3(0x48, 0x09, 0xd0);
-+
-+		shr = bit_offset - shr;
-+
-+		if (shr) {
-+			/* shr rax, imm8 */
-+			I4(0x48, 0xc1, 0xe8, shr);
-+		}
-+	}
-+
-+	/* rax += offset: */
-+	if (offset > S32_MAX) {
-+		/* mov rdx, imm64 */
-+		I2(0x48, 0xba);
-+		memcpy(out, &offset, 8);
-+		out += 8;
-+		/* add %rdx, %rax */
-+		I3(0x48, 0x01, 0xd0);
-+	} else if (offset + (~0ULL >> (64 - bits)) > U32_MAX) {
-+		/* add rax, imm32 */
-+		I2(0x48, 0x05);
-+		memcpy(out, &offset, 4);
-+		out += 4;
-+	} else if (offset) {
-+		/* add eax, imm32 */
-+		I1(0x05);
-+		memcpy(out, &offset, 4);
-+		out += 4;
-+	}
-+set_field:
-+	switch (dst_size) {
-+	case 8:
-+		/* mov [rdi + dst_offset], rax */
-+		I4(0x48, 0x89, 0x47, dst_offset);
-+		break;
-+	case 4:
-+		/* mov [rdi + dst_offset], eax */
-+		I3(0x89, 0x47, dst_offset);
-+		break;
-+	default:
-+		BUG();
-+	}
-+
-+	return out;
-+}
-+
-+int bch2_compile_bkey_format(const struct bkey_format *format, void *_out)
-+{
-+	bool eax_zeroed = false;
-+	u8 *out = _out;
-+
-+	/*
-+	 * rdi: dst - unpacked key
-+	 * rsi: src - packed key
-+	 */
-+
-+	/* k->u64s, k->format, k->type */
-+
-+	/* mov eax, [rsi] */
-+	I2(0x8b, 0x06);
-+
-+	/* add eax, BKEY_U64s - format->key_u64s */
-+	I5(0x05, BKEY_U64s - format->key_u64s, KEY_FORMAT_CURRENT, 0, 0);
-+
-+	/* and eax, imm32: mask out k->pad: */
-+	I5(0x25, 0xff, 0xff, 0xff, 0);
-+
-+	/* mov [rdi], eax */
-+	I2(0x89, 0x07);
-+
-+#define x(id, field)							\
-+	out = compile_bkey_field(format, out, id,			\
-+				 offsetof(struct bkey, field),		\
-+				 sizeof(((struct bkey *) NULL)->field),	\
-+				 &eax_zeroed);
-+	bkey_fields()
-+#undef x
-+
-+	/* retq */
-+	I1(0xc3);
-+
-+	return (void *) out - _out;
-+}
-+
-+#else
-+static inline int __bkey_cmp_bits(const u64 *l, const u64 *r,
-+				  unsigned nr_key_bits)
-+{
-+	u64 l_v, r_v;
-+
-+	if (!nr_key_bits)
-+		return 0;
-+
-+	/* for big endian, skip past header */
-+	nr_key_bits += high_bit_offset;
-+	l_v = *l & (~0ULL >> high_bit_offset);
-+	r_v = *r & (~0ULL >> high_bit_offset);
-+
-+	while (1) {
-+		if (nr_key_bits < 64) {
-+			l_v >>= 64 - nr_key_bits;
-+			r_v >>= 64 - nr_key_bits;
-+			nr_key_bits = 0;
-+		} else {
-+			nr_key_bits -= 64;
-+		}
-+
-+		if (!nr_key_bits || l_v != r_v)
-+			break;
-+
-+		l = next_word(l);
-+		r = next_word(r);
-+
-+		l_v = *l;
-+		r_v = *r;
-+	}
-+
-+	return cmp_int(l_v, r_v);
-+}
-+#endif
-+
-+__pure
-+int __bch2_bkey_cmp_packed_format_checked(const struct bkey_packed *l,
-+					  const struct bkey_packed *r,
-+					  const struct btree *b)
-+{
-+	const struct bkey_format *f = &b->format;
-+	int ret;
-+
-+	EBUG_ON(!bkey_packed(l) || !bkey_packed(r));
-+	EBUG_ON(b->nr_key_bits != bkey_format_key_bits(f));
-+
-+	ret = __bkey_cmp_bits(high_word(f, l),
-+			      high_word(f, r),
-+			      b->nr_key_bits);
-+
-+	EBUG_ON(ret != bkey_cmp(bkey_unpack_pos(b, l),
-+				bkey_unpack_pos(b, r)));
-+	return ret;
-+}
-+
-+__pure __flatten
-+int __bch2_bkey_cmp_left_packed_format_checked(const struct btree *b,
-+					       const struct bkey_packed *l,
-+					       const struct bpos *r)
-+{
-+	return bkey_cmp(bkey_unpack_pos_format_checked(b, l), *r);
-+}
-+
-+__pure __flatten
-+int __bch2_bkey_cmp_packed(const struct bkey_packed *l,
-+			   const struct bkey_packed *r,
-+			   const struct btree *b)
-+{
-+	struct bkey unpacked;
-+
-+	if (likely(bkey_packed(l) && bkey_packed(r)))
-+		return __bch2_bkey_cmp_packed_format_checked(l, r, b);
-+
-+	if (bkey_packed(l)) {
-+		__bkey_unpack_key_format_checked(b, &unpacked, l);
-+		l = (void*) &unpacked;
-+	} else if (bkey_packed(r)) {
-+		__bkey_unpack_key_format_checked(b, &unpacked, r);
-+		r = (void*) &unpacked;
-+	}
-+
-+	return bkey_cmp(((struct bkey *) l)->p, ((struct bkey *) r)->p);
-+}
-+
-+__pure __flatten
-+int __bch2_bkey_cmp_left_packed(const struct btree *b,
-+				const struct bkey_packed *l,
-+				const struct bpos *r)
-+{
-+	const struct bkey *l_unpacked;
-+
-+	return unlikely(l_unpacked = packed_to_bkey_c(l))
-+		? bkey_cmp(l_unpacked->p, *r)
-+		: __bch2_bkey_cmp_left_packed_format_checked(b, l, r);
-+}
-+
-+void bch2_bpos_swab(struct bpos *p)
-+{
-+	u8 *l = (u8 *) p;
-+	u8 *h = ((u8 *) &p[1]) - 1;
-+
-+	while (l < h) {
-+		swap(*l, *h);
-+		l++;
-+		--h;
-+	}
-+}
-+
-+void bch2_bkey_swab_key(const struct bkey_format *_f, struct bkey_packed *k)
-+{
-+	const struct bkey_format *f = bkey_packed(k) ? _f : &bch2_bkey_format_current;
-+	u8 *l = k->key_start;
-+	u8 *h = (u8 *) (k->_data + f->key_u64s) - 1;
-+
-+	while (l < h) {
-+		swap(*l, *h);
-+		l++;
-+		--h;
-+	}
-+}
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+void bch2_bkey_pack_test(void)
-+{
-+	struct bkey t = KEY(4134ULL, 1250629070527416633ULL, 0);
-+	struct bkey_packed p;
-+
-+	struct bkey_format test_format = {
-+		.key_u64s	= 2,
-+		.nr_fields	= BKEY_NR_FIELDS,
-+		.bits_per_field = {
-+			13,
-+			64,
-+		},
-+	};
-+
-+	struct unpack_state in_s =
-+		unpack_state_init(&bch2_bkey_format_current, (void *) &t);
-+	struct pack_state out_s = pack_state_init(&test_format, &p);
-+	unsigned i;
-+
-+	for (i = 0; i < out_s.format->nr_fields; i++) {
-+		u64 a, v = get_inc_field(&in_s, i);
-+
-+		switch (i) {
-+#define x(id, field)	case id: a = t.field; break;
-+	bkey_fields()
-+#undef x
-+		default:
-+			BUG();
-+		}
-+
-+		if (a != v)
-+			panic("got %llu actual %llu i %u\n", v, a, i);
-+
-+		if (!set_inc_field(&out_s, i, v))
-+			panic("failed at %u\n", i);
-+	}
-+
-+	BUG_ON(!bch2_bkey_pack_key(&p, &t, &test_format));
-+}
-+#endif
-diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h
-new file mode 100644
-index 000000000000..cbcfbd26bc58
---- /dev/null
-+++ b/fs/bcachefs/bkey.h
-@@ -0,0 +1,605 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BKEY_H
-+#define _BCACHEFS_BKEY_H
-+
-+#include <linux/bug.h>
-+#include "bcachefs_format.h"
-+
-+#include "util.h"
-+#include "vstructs.h"
-+
-+#ifdef CONFIG_X86_64
-+#define HAVE_BCACHEFS_COMPILED_UNPACK	1
-+#endif
-+
-+void bch2_to_binary(char *, const u64 *, unsigned);
-+
-+/* bkey with split value, const */
-+struct bkey_s_c {
-+	const struct bkey	*k;
-+	const struct bch_val	*v;
-+};
-+
-+/* bkey with split value */
-+struct bkey_s {
-+	union {
-+	struct {
-+		struct bkey	*k;
-+		struct bch_val	*v;
-+	};
-+	struct bkey_s_c		s_c;
-+	};
-+};
-+
-+#define bkey_next(_k)		vstruct_next(_k)
-+
-+static inline struct bkey_packed *bkey_next_skip_noops(struct bkey_packed *k,
-+						       struct bkey_packed *end)
-+{
-+	k = bkey_next(k);
-+
-+	while (k != end && !k->u64s)
-+		k = (void *) ((u64 *) k + 1);
-+	return k;
-+}
-+
-+#define bkey_val_u64s(_k)	((_k)->u64s - BKEY_U64s)
-+
-+static inline size_t bkey_val_bytes(const struct bkey *k)
-+{
-+	return bkey_val_u64s(k) * sizeof(u64);
-+}
-+
-+static inline void set_bkey_val_u64s(struct bkey *k, unsigned val_u64s)
-+{
-+	k->u64s = BKEY_U64s + val_u64s;
-+}
-+
-+static inline void set_bkey_val_bytes(struct bkey *k, unsigned bytes)
-+{
-+	k->u64s = BKEY_U64s + DIV_ROUND_UP(bytes, sizeof(u64));
-+}
-+
-+#define bkey_val_end(_k)	((void *) (((u64 *) (_k).v) + bkey_val_u64s((_k).k)))
-+
-+#define bkey_deleted(_k)	((_k)->type == KEY_TYPE_deleted)
-+
-+#define bkey_whiteout(_k)				\
-+	((_k)->type == KEY_TYPE_deleted || (_k)->type == KEY_TYPE_discard)
-+
-+#define bkey_packed_typecheck(_k)					\
-+({									\
-+	BUILD_BUG_ON(!type_is(_k, struct bkey *) &&			\
-+		     !type_is(_k, struct bkey_packed *));		\
-+	type_is(_k, struct bkey_packed *);				\
-+})
-+
-+enum bkey_lr_packed {
-+	BKEY_PACKED_BOTH,
-+	BKEY_PACKED_RIGHT,
-+	BKEY_PACKED_LEFT,
-+	BKEY_PACKED_NONE,
-+};
-+
-+#define bkey_lr_packed_typecheck(_l, _r)				\
-+	(!bkey_packed_typecheck(_l) + ((!bkey_packed_typecheck(_r)) << 1))
-+
-+#define bkey_lr_packed(_l, _r)						\
-+	((_l)->format + ((_r)->format << 1))
-+
-+#define bkey_copy(_dst, _src)					\
-+do {								\
-+	BUILD_BUG_ON(!type_is(_dst, struct bkey_i *) &&		\
-+		     !type_is(_dst, struct bkey_packed *));	\
-+	BUILD_BUG_ON(!type_is(_src, struct bkey_i *) &&		\
-+		     !type_is(_src, struct bkey_packed *));	\
-+	EBUG_ON((u64 *) (_dst) > (u64 *) (_src) &&		\
-+		(u64 *) (_dst) < (u64 *) (_src) +		\
-+		((struct bkey *) (_src))->u64s);		\
-+								\
-+	memcpy_u64s_small((_dst), (_src),			\
-+			  ((struct bkey *) (_src))->u64s);	\
-+} while (0)
-+
-+struct btree;
-+
-+struct bkey_format_state {
-+	u64 field_min[BKEY_NR_FIELDS];
-+	u64 field_max[BKEY_NR_FIELDS];
-+};
-+
-+void bch2_bkey_format_init(struct bkey_format_state *);
-+void bch2_bkey_format_add_key(struct bkey_format_state *, const struct bkey *);
-+void bch2_bkey_format_add_pos(struct bkey_format_state *, struct bpos);
-+struct bkey_format bch2_bkey_format_done(struct bkey_format_state *);
-+const char *bch2_bkey_format_validate(struct bkey_format *);
-+
-+__pure
-+unsigned bch2_bkey_greatest_differing_bit(const struct btree *,
-+					  const struct bkey_packed *,
-+					  const struct bkey_packed *);
-+__pure
-+unsigned bch2_bkey_ffs(const struct btree *, const struct bkey_packed *);
-+
-+__pure
-+int __bch2_bkey_cmp_packed_format_checked(const struct bkey_packed *,
-+				     const struct bkey_packed *,
-+				     const struct btree *);
-+
-+__pure
-+int __bch2_bkey_cmp_left_packed_format_checked(const struct btree *,
-+					  const struct bkey_packed *,
-+					  const struct bpos *);
-+
-+__pure
-+int __bch2_bkey_cmp_packed(const struct bkey_packed *,
-+			   const struct bkey_packed *,
-+			   const struct btree *);
-+
-+__pure
-+int __bch2_bkey_cmp_left_packed(const struct btree *,
-+				const struct bkey_packed *,
-+				const struct bpos *);
-+
-+static inline __pure
-+int bkey_cmp_left_packed(const struct btree *b,
-+			 const struct bkey_packed *l, const struct bpos *r)
-+{
-+	return __bch2_bkey_cmp_left_packed(b, l, r);
-+}
-+
-+/*
-+ * we prefer to pass bpos by ref, but it's often enough terribly convenient to
-+ * pass it by by val... as much as I hate c++, const ref would be nice here:
-+ */
-+__pure __flatten
-+static inline int bkey_cmp_left_packed_byval(const struct btree *b,
-+					     const struct bkey_packed *l,
-+					     struct bpos r)
-+{
-+	return bkey_cmp_left_packed(b, l, &r);
-+}
-+
-+/*
-+ * If @_l or @_r are struct bkey * (not bkey_packed *), uses type information to
-+ * skip dispatching on k->format:
-+ */
-+#define bkey_cmp_packed(_b, _l, _r)					\
-+({									\
-+	int _cmp;							\
-+									\
-+	switch (bkey_lr_packed_typecheck(_l, _r)) {			\
-+	case BKEY_PACKED_NONE:						\
-+		_cmp = bkey_cmp(((struct bkey *) (_l))->p,		\
-+				((struct bkey *) (_r))->p);		\
-+		break;							\
-+	case BKEY_PACKED_LEFT:						\
-+		_cmp = bkey_cmp_left_packed((_b),			\
-+				  (struct bkey_packed *) (_l),		\
-+				  &((struct bkey *) (_r))->p);		\
-+		break;							\
-+	case BKEY_PACKED_RIGHT:						\
-+		_cmp = -bkey_cmp_left_packed((_b),			\
-+				  (struct bkey_packed *) (_r),		\
-+				  &((struct bkey *) (_l))->p);		\
-+		break;							\
-+	case BKEY_PACKED_BOTH:						\
-+		_cmp = __bch2_bkey_cmp_packed((void *) (_l),		\
-+					 (void *) (_r), (_b));		\
-+		break;							\
-+	}								\
-+	_cmp;								\
-+})
-+
-+#if 1
-+static __always_inline int bkey_cmp(struct bpos l, struct bpos r)
-+{
-+	if (l.inode != r.inode)
-+		return l.inode < r.inode ? -1 : 1;
-+	if (l.offset != r.offset)
-+		return l.offset < r.offset ? -1 : 1;
-+	if (l.snapshot != r.snapshot)
-+		return l.snapshot < r.snapshot ? -1 : 1;
-+	return 0;
-+}
-+#else
-+int bkey_cmp(struct bpos l, struct bpos r);
-+#endif
-+
-+static inline struct bpos bpos_min(struct bpos l, struct bpos r)
-+{
-+	return bkey_cmp(l, r) < 0 ? l : r;
-+}
-+
-+void bch2_bpos_swab(struct bpos *);
-+void bch2_bkey_swab_key(const struct bkey_format *, struct bkey_packed *);
-+
-+static __always_inline int bversion_cmp(struct bversion l, struct bversion r)
-+{
-+	return  cmp_int(l.hi, r.hi) ?:
-+		cmp_int(l.lo, r.lo);
-+}
-+
-+#define ZERO_VERSION	((struct bversion) { .hi = 0, .lo = 0 })
-+#define MAX_VERSION	((struct bversion) { .hi = ~0, .lo = ~0ULL })
-+
-+static __always_inline int bversion_zero(struct bversion v)
-+{
-+	return !bversion_cmp(v, ZERO_VERSION);
-+}
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+/* statement expressions confusing unlikely()? */
-+#define bkey_packed(_k)							\
-+	({ EBUG_ON((_k)->format > KEY_FORMAT_CURRENT);			\
-+	 (_k)->format != KEY_FORMAT_CURRENT; })
-+#else
-+#define bkey_packed(_k)		((_k)->format != KEY_FORMAT_CURRENT)
-+#endif
-+
-+/*
-+ * It's safe to treat an unpacked bkey as a packed one, but not the reverse
-+ */
-+static inline struct bkey_packed *bkey_to_packed(struct bkey_i *k)
-+{
-+	return (struct bkey_packed *) k;
-+}
-+
-+static inline const struct bkey_packed *bkey_to_packed_c(const struct bkey_i *k)
-+{
-+	return (const struct bkey_packed *) k;
-+}
-+
-+static inline struct bkey_i *packed_to_bkey(struct bkey_packed *k)
-+{
-+	return bkey_packed(k) ? NULL : (struct bkey_i *) k;
-+}
-+
-+static inline const struct bkey *packed_to_bkey_c(const struct bkey_packed *k)
-+{
-+	return bkey_packed(k) ? NULL : (const struct bkey *) k;
-+}
-+
-+static inline unsigned bkey_format_key_bits(const struct bkey_format *format)
-+{
-+	return format->bits_per_field[BKEY_FIELD_INODE] +
-+		format->bits_per_field[BKEY_FIELD_OFFSET] +
-+		format->bits_per_field[BKEY_FIELD_SNAPSHOT];
-+}
-+
-+static inline struct bpos bkey_successor(struct bpos p)
-+{
-+	struct bpos ret = p;
-+
-+	if (!++ret.offset)
-+		BUG_ON(!++ret.inode);
-+
-+	return ret;
-+}
-+
-+static inline struct bpos bkey_predecessor(struct bpos p)
-+{
-+	struct bpos ret = p;
-+
-+	if (!ret.offset--)
-+		BUG_ON(!ret.inode--);
-+
-+	return ret;
-+}
-+
-+static inline u64 bkey_start_offset(const struct bkey *k)
-+{
-+	return k->p.offset - k->size;
-+}
-+
-+static inline struct bpos bkey_start_pos(const struct bkey *k)
-+{
-+	return (struct bpos) {
-+		.inode		= k->p.inode,
-+		.offset		= bkey_start_offset(k),
-+		.snapshot	= k->p.snapshot,
-+	};
-+}
-+
-+/* Packed helpers */
-+
-+static inline unsigned bkeyp_key_u64s(const struct bkey_format *format,
-+				      const struct bkey_packed *k)
-+{
-+	unsigned ret = bkey_packed(k) ? format->key_u64s : BKEY_U64s;
-+
-+	EBUG_ON(k->u64s < ret);
-+	return ret;
-+}
-+
-+static inline unsigned bkeyp_key_bytes(const struct bkey_format *format,
-+				       const struct bkey_packed *k)
-+{
-+	return bkeyp_key_u64s(format, k) * sizeof(u64);
-+}
-+
-+static inline unsigned bkeyp_val_u64s(const struct bkey_format *format,
-+				      const struct bkey_packed *k)
-+{
-+	return k->u64s - bkeyp_key_u64s(format, k);
-+}
-+
-+static inline size_t bkeyp_val_bytes(const struct bkey_format *format,
-+				     const struct bkey_packed *k)
-+{
-+	return bkeyp_val_u64s(format, k) * sizeof(u64);
-+}
-+
-+static inline void set_bkeyp_val_u64s(const struct bkey_format *format,
-+				      struct bkey_packed *k, unsigned val_u64s)
-+{
-+	k->u64s = bkeyp_key_u64s(format, k) + val_u64s;
-+}
-+
-+#define bkeyp_val(_format, _k)						\
-+	 ((struct bch_val *) ((_k)->_data + bkeyp_key_u64s(_format, _k)))
-+
-+extern const struct bkey_format bch2_bkey_format_current;
-+
-+bool bch2_bkey_transform(const struct bkey_format *,
-+			 struct bkey_packed *,
-+			 const struct bkey_format *,
-+			 const struct bkey_packed *);
-+
-+struct bkey __bch2_bkey_unpack_key(const struct bkey_format *,
-+				   const struct bkey_packed *);
-+
-+#ifndef HAVE_BCACHEFS_COMPILED_UNPACK
-+struct bpos __bkey_unpack_pos(const struct bkey_format *,
-+			      const struct bkey_packed *);
-+#endif
-+
-+bool bch2_bkey_pack_key(struct bkey_packed *, const struct bkey *,
-+		   const struct bkey_format *);
-+
-+enum bkey_pack_pos_ret {
-+	BKEY_PACK_POS_EXACT,
-+	BKEY_PACK_POS_SMALLER,
-+	BKEY_PACK_POS_FAIL,
-+};
-+
-+enum bkey_pack_pos_ret bch2_bkey_pack_pos_lossy(struct bkey_packed *, struct bpos,
-+					   const struct btree *);
-+
-+static inline bool bkey_pack_pos(struct bkey_packed *out, struct bpos in,
-+				 const struct btree *b)
-+{
-+	return bch2_bkey_pack_pos_lossy(out, in, b) == BKEY_PACK_POS_EXACT;
-+}
-+
-+void bch2_bkey_unpack(const struct btree *, struct bkey_i *,
-+		 const struct bkey_packed *);
-+bool bch2_bkey_pack(struct bkey_packed *, const struct bkey_i *,
-+	       const struct bkey_format *);
-+
-+static inline u64 bkey_field_max(const struct bkey_format *f,
-+				 enum bch_bkey_fields nr)
-+{
-+	return f->bits_per_field[nr] < 64
-+		? (le64_to_cpu(f->field_offset[nr]) +
-+		   ~(~0ULL << f->bits_per_field[nr]))
-+		: U64_MAX;
-+}
-+
-+#ifdef HAVE_BCACHEFS_COMPILED_UNPACK
-+
-+int bch2_compile_bkey_format(const struct bkey_format *, void *);
-+
-+#else
-+
-+static inline int bch2_compile_bkey_format(const struct bkey_format *format,
-+					  void *out) { return 0; }
-+
-+#endif
-+
-+static inline void bkey_reassemble(struct bkey_i *dst,
-+				   struct bkey_s_c src)
-+{
-+	dst->k = *src.k;
-+	memcpy_u64s_small(&dst->v, src.v, bkey_val_u64s(src.k));
-+}
-+
-+#define bkey_s_null		((struct bkey_s)   { .k = NULL })
-+#define bkey_s_c_null		((struct bkey_s_c) { .k = NULL })
-+
-+#define bkey_s_err(err)		((struct bkey_s)   { .k = ERR_PTR(err) })
-+#define bkey_s_c_err(err)	((struct bkey_s_c) { .k = ERR_PTR(err) })
-+
-+static inline struct bkey_s bkey_to_s(struct bkey *k)
-+{
-+	return (struct bkey_s) { .k = k, .v = NULL };
-+}
-+
-+static inline struct bkey_s_c bkey_to_s_c(const struct bkey *k)
-+{
-+	return (struct bkey_s_c) { .k = k, .v = NULL };
-+}
-+
-+static inline struct bkey_s bkey_i_to_s(struct bkey_i *k)
-+{
-+	return (struct bkey_s) { .k = &k->k, .v = &k->v };
-+}
-+
-+static inline struct bkey_s_c bkey_i_to_s_c(const struct bkey_i *k)
-+{
-+	return (struct bkey_s_c) { .k = &k->k, .v = &k->v };
-+}
-+
-+/*
-+ * For a given type of value (e.g. struct bch_extent), generates the types for
-+ * bkey + bch_extent - inline, split, split const - and also all the conversion
-+ * functions, which also check that the value is of the correct type.
-+ *
-+ * We use anonymous unions for upcasting - e.g. converting from e.g. a
-+ * bkey_i_extent to a bkey_i - since that's always safe, instead of conversion
-+ * functions.
-+ */
-+#define BKEY_VAL_ACCESSORS(name)					\
-+struct bkey_i_##name {							\
-+	union {								\
-+		struct bkey		k;				\
-+		struct bkey_i		k_i;				\
-+	};								\
-+	struct bch_##name		v;				\
-+};									\
-+									\
-+struct bkey_s_c_##name {						\
-+	union {								\
-+	struct {							\
-+		const struct bkey	*k;				\
-+		const struct bch_##name	*v;				\
-+	};								\
-+	struct bkey_s_c			s_c;				\
-+	};								\
-+};									\
-+									\
-+struct bkey_s_##name {							\
-+	union {								\
-+	struct {							\
-+		struct bkey		*k;				\
-+		struct bch_##name	*v;				\
-+	};								\
-+	struct bkey_s_c_##name		c;				\
-+	struct bkey_s			s;				\
-+	struct bkey_s_c			s_c;				\
-+	};								\
-+};									\
-+									\
-+static inline struct bkey_i_##name *bkey_i_to_##name(struct bkey_i *k)	\
-+{									\
-+	EBUG_ON(k->k.type != KEY_TYPE_##name);				\
-+	return container_of(&k->k, struct bkey_i_##name, k);		\
-+}									\
-+									\
-+static inline const struct bkey_i_##name *				\
-+bkey_i_to_##name##_c(const struct bkey_i *k)				\
-+{									\
-+	EBUG_ON(k->k.type != KEY_TYPE_##name);				\
-+	return container_of(&k->k, struct bkey_i_##name, k);		\
-+}									\
-+									\
-+static inline struct bkey_s_##name bkey_s_to_##name(struct bkey_s k)	\
-+{									\
-+	EBUG_ON(k.k->type != KEY_TYPE_##name);				\
-+	return (struct bkey_s_##name) {					\
-+		.k = k.k,						\
-+		.v = container_of(k.v, struct bch_##name, v),		\
-+	};								\
-+}									\
-+									\
-+static inline struct bkey_s_c_##name bkey_s_c_to_##name(struct bkey_s_c k)\
-+{									\
-+	EBUG_ON(k.k->type != KEY_TYPE_##name);				\
-+	return (struct bkey_s_c_##name) {				\
-+		.k = k.k,						\
-+		.v = container_of(k.v, struct bch_##name, v),		\
-+	};								\
-+}									\
-+									\
-+static inline struct bkey_s_##name name##_i_to_s(struct bkey_i_##name *k)\
-+{									\
-+	return (struct bkey_s_##name) {					\
-+		.k = &k->k,						\
-+		.v = &k->v,						\
-+	};								\
-+}									\
-+									\
-+static inline struct bkey_s_c_##name					\
-+name##_i_to_s_c(const struct bkey_i_##name *k)				\
-+{									\
-+	return (struct bkey_s_c_##name) {				\
-+		.k = &k->k,						\
-+		.v = &k->v,						\
-+	};								\
-+}									\
-+									\
-+static inline struct bkey_s_##name bkey_i_to_s_##name(struct bkey_i *k)	\
-+{									\
-+	EBUG_ON(k->k.type != KEY_TYPE_##name);				\
-+	return (struct bkey_s_##name) {					\
-+		.k = &k->k,						\
-+		.v = container_of(&k->v, struct bch_##name, v),		\
-+	};								\
-+}									\
-+									\
-+static inline struct bkey_s_c_##name					\
-+bkey_i_to_s_c_##name(const struct bkey_i *k)				\
-+{									\
-+	EBUG_ON(k->k.type != KEY_TYPE_##name);				\
-+	return (struct bkey_s_c_##name) {				\
-+		.k = &k->k,						\
-+		.v = container_of(&k->v, struct bch_##name, v),		\
-+	};								\
-+}									\
-+									\
-+static inline struct bkey_i_##name *bkey_##name##_init(struct bkey_i *_k)\
-+{									\
-+	struct bkey_i_##name *k =					\
-+		container_of(&_k->k, struct bkey_i_##name, k);		\
-+									\
-+	bkey_init(&k->k);						\
-+	memset(&k->v, 0, sizeof(k->v));					\
-+	k->k.type = KEY_TYPE_##name;					\
-+	set_bkey_val_bytes(&k->k, sizeof(k->v));			\
-+									\
-+	return k;							\
-+}
-+
-+BKEY_VAL_ACCESSORS(cookie);
-+BKEY_VAL_ACCESSORS(btree_ptr);
-+BKEY_VAL_ACCESSORS(extent);
-+BKEY_VAL_ACCESSORS(reservation);
-+BKEY_VAL_ACCESSORS(inode);
-+BKEY_VAL_ACCESSORS(inode_generation);
-+BKEY_VAL_ACCESSORS(dirent);
-+BKEY_VAL_ACCESSORS(xattr);
-+BKEY_VAL_ACCESSORS(alloc);
-+BKEY_VAL_ACCESSORS(quota);
-+BKEY_VAL_ACCESSORS(stripe);
-+BKEY_VAL_ACCESSORS(reflink_p);
-+BKEY_VAL_ACCESSORS(reflink_v);
-+BKEY_VAL_ACCESSORS(inline_data);
-+BKEY_VAL_ACCESSORS(btree_ptr_v2);
-+
-+/* byte order helpers */
-+
-+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-+
-+static inline unsigned high_word_offset(const struct bkey_format *f)
-+{
-+	return f->key_u64s - 1;
-+}
-+
-+#define high_bit_offset		0
-+#define nth_word(p, n)		((p) - (n))
-+
-+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-+
-+static inline unsigned high_word_offset(const struct bkey_format *f)
-+{
-+	return 0;
-+}
-+
-+#define high_bit_offset		KEY_PACKED_BITS_START
-+#define nth_word(p, n)		((p) + (n))
-+
-+#else
-+#error edit for your odd byteorder.
-+#endif
-+
-+#define high_word(f, k)		((k)->_data + high_word_offset(f))
-+#define next_word(p)		nth_word(p, 1)
-+#define prev_word(p)		nth_word(p, -1)
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+void bch2_bkey_pack_test(void);
-+#else
-+static inline void bch2_bkey_pack_test(void) {}
-+#endif
-+
-+#endif /* _BCACHEFS_BKEY_H */
-diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c
-new file mode 100644
-index 000000000000..36e0c5152b47
---- /dev/null
-+++ b/fs/bcachefs/bkey_methods.c
-@@ -0,0 +1,353 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "bkey_methods.h"
-+#include "btree_types.h"
-+#include "alloc_background.h"
-+#include "dirent.h"
-+#include "ec.h"
-+#include "error.h"
-+#include "extents.h"
-+#include "inode.h"
-+#include "quota.h"
-+#include "reflink.h"
-+#include "xattr.h"
-+
-+const char * const bch2_bkey_types[] = {
-+#define x(name, nr) #name,
-+	BCH_BKEY_TYPES()
-+#undef x
-+	NULL
-+};
-+
-+static const char *deleted_key_invalid(const struct bch_fs *c,
-+					struct bkey_s_c k)
-+{
-+	return NULL;
-+}
-+
-+#define bch2_bkey_ops_deleted (struct bkey_ops) {	\
-+	.key_invalid = deleted_key_invalid,		\
-+}
-+
-+#define bch2_bkey_ops_discard (struct bkey_ops) {	\
-+	.key_invalid = deleted_key_invalid,		\
-+}
-+
-+static const char *empty_val_key_invalid(const struct bch_fs *c, struct bkey_s_c k)
-+{
-+	if (bkey_val_bytes(k.k))
-+		return "value size should be zero";
-+
-+	return NULL;
-+}
-+
-+#define bch2_bkey_ops_error (struct bkey_ops) {		\
-+	.key_invalid = empty_val_key_invalid,		\
-+}
-+
-+static const char *key_type_cookie_invalid(const struct bch_fs *c,
-+					   struct bkey_s_c k)
-+{
-+	if (bkey_val_bytes(k.k) != sizeof(struct bch_cookie))
-+		return "incorrect value size";
-+
-+	return NULL;
-+}
-+
-+#define bch2_bkey_ops_cookie (struct bkey_ops) {	\
-+	.key_invalid = key_type_cookie_invalid,		\
-+}
-+
-+#define bch2_bkey_ops_whiteout (struct bkey_ops) {	\
-+	.key_invalid = empty_val_key_invalid,		\
-+}
-+
-+static const char *key_type_inline_data_invalid(const struct bch_fs *c,
-+					   struct bkey_s_c k)
-+{
-+	return NULL;
-+}
-+
-+static void key_type_inline_data_to_text(struct printbuf *out, struct bch_fs *c,
-+					 struct bkey_s_c k)
-+{
-+	pr_buf(out, "(%zu bytes)", bkey_val_bytes(k.k));
-+}
-+
-+#define bch2_bkey_ops_inline_data (struct bkey_ops) {	\
-+	.key_invalid	= key_type_inline_data_invalid,	\
-+	.val_to_text	= key_type_inline_data_to_text,	\
-+}
-+
-+static const struct bkey_ops bch2_bkey_ops[] = {
-+#define x(name, nr) [KEY_TYPE_##name]	= bch2_bkey_ops_##name,
-+	BCH_BKEY_TYPES()
-+#undef x
-+};
-+
-+const char *bch2_bkey_val_invalid(struct bch_fs *c, struct bkey_s_c k)
-+{
-+	if (k.k->type >= KEY_TYPE_MAX)
-+		return "invalid type";
-+
-+	return bch2_bkey_ops[k.k->type].key_invalid(c, k);
-+}
-+
-+const char *__bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
-+				enum btree_node_type type)
-+{
-+	if (k.k->u64s < BKEY_U64s)
-+		return "u64s too small";
-+
-+	if (type == BKEY_TYPE_BTREE &&
-+	    bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX)
-+		return "value too big";
-+
-+	if (btree_node_type_is_extents(type)) {
-+		if ((k.k->size == 0) != bkey_deleted(k.k))
-+			return "bad size field";
-+
-+		if (k.k->size > k.k->p.offset)
-+			return "size greater than offset";
-+	} else {
-+		if (k.k->size)
-+			return "nonzero size field";
-+	}
-+
-+	if (k.k->p.snapshot)
-+		return "nonzero snapshot";
-+
-+	if (type != BKEY_TYPE_BTREE &&
-+	    !bkey_cmp(k.k->p, POS_MAX))
-+		return "POS_MAX key";
-+
-+	return NULL;
-+}
-+
-+const char *bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
-+			      enum btree_node_type type)
-+{
-+	return __bch2_bkey_invalid(c, k, type) ?:
-+		bch2_bkey_val_invalid(c, k);
-+}
-+
-+const char *bch2_bkey_in_btree_node(struct btree *b, struct bkey_s_c k)
-+{
-+	if (bkey_cmp(k.k->p, b->data->min_key) < 0)
-+		return "key before start of btree node";
-+
-+	if (bkey_cmp(k.k->p, b->data->max_key) > 0)
-+		return "key past end of btree node";
-+
-+	return NULL;
-+}
-+
-+void bch2_bkey_debugcheck(struct bch_fs *c, struct btree *b, struct bkey_s_c k)
-+{
-+	const struct bkey_ops *ops = &bch2_bkey_ops[k.k->type];
-+	const char *invalid;
-+
-+	BUG_ON(!k.k->u64s);
-+
-+	invalid = bch2_bkey_invalid(c, k, btree_node_type(b)) ?:
-+		bch2_bkey_in_btree_node(b, k);
-+	if (invalid) {
-+		char buf[160];
-+
-+		bch2_bkey_val_to_text(&PBUF(buf), c, k);
-+		bch2_fs_inconsistent(c, "invalid bkey %s: %s", buf, invalid);
-+		return;
-+	}
-+
-+	if (ops->key_debugcheck)
-+		ops->key_debugcheck(c, k);
-+}
-+
-+void bch2_bpos_to_text(struct printbuf *out, struct bpos pos)
-+{
-+	if (!bkey_cmp(pos, POS_MIN))
-+		pr_buf(out, "POS_MIN");
-+	else if (!bkey_cmp(pos, POS_MAX))
-+		pr_buf(out, "POS_MAX");
-+	else
-+		pr_buf(out, "%llu:%llu", pos.inode, pos.offset);
-+}
-+
-+void bch2_bkey_to_text(struct printbuf *out, const struct bkey *k)
-+{
-+	if (k) {
-+		pr_buf(out, "u64s %u type %s ", k->u64s,
-+		       bch2_bkey_types[k->type]);
-+
-+		bch2_bpos_to_text(out, k->p);
-+
-+		pr_buf(out, " snap %u len %u ver %llu",
-+		       k->p.snapshot, k->size, k->version.lo);
-+	} else {
-+		pr_buf(out, "(null)");
-+	}
-+}
-+
-+void bch2_val_to_text(struct printbuf *out, struct bch_fs *c,
-+		      struct bkey_s_c k)
-+{
-+	const struct bkey_ops *ops = &bch2_bkey_ops[k.k->type];
-+
-+	if (likely(ops->val_to_text))
-+		ops->val_to_text(out, c, k);
-+}
-+
-+void bch2_bkey_val_to_text(struct printbuf *out, struct bch_fs *c,
-+			   struct bkey_s_c k)
-+{
-+	bch2_bkey_to_text(out, k.k);
-+
-+	if (k.k) {
-+		pr_buf(out, ": ");
-+		bch2_val_to_text(out, c, k);
-+	}
-+}
-+
-+void bch2_bkey_swab_val(struct bkey_s k)
-+{
-+	const struct bkey_ops *ops = &bch2_bkey_ops[k.k->type];
-+
-+	if (ops->swab)
-+		ops->swab(k);
-+}
-+
-+bool bch2_bkey_normalize(struct bch_fs *c, struct bkey_s k)
-+{
-+	const struct bkey_ops *ops = &bch2_bkey_ops[k.k->type];
-+
-+	return ops->key_normalize
-+		? ops->key_normalize(c, k)
-+		: false;
-+}
-+
-+enum merge_result bch2_bkey_merge(struct bch_fs *c,
-+				  struct bkey_s l, struct bkey_s r)
-+{
-+	const struct bkey_ops *ops = &bch2_bkey_ops[l.k->type];
-+	enum merge_result ret;
-+
-+	if (key_merging_disabled(c) ||
-+	    !ops->key_merge ||
-+	    l.k->type != r.k->type ||
-+	    bversion_cmp(l.k->version, r.k->version) ||
-+	    bkey_cmp(l.k->p, bkey_start_pos(r.k)))
-+		return BCH_MERGE_NOMERGE;
-+
-+	ret = ops->key_merge(c, l, r);
-+
-+	if (ret != BCH_MERGE_NOMERGE)
-+		l.k->needs_whiteout |= r.k->needs_whiteout;
-+	return ret;
-+}
-+
-+static const struct old_bkey_type {
-+	u8		btree_node_type;
-+	u8		old;
-+	u8		new;
-+} bkey_renumber_table[] = {
-+	{BKEY_TYPE_BTREE,	128, KEY_TYPE_btree_ptr		},
-+	{BKEY_TYPE_EXTENTS,	128, KEY_TYPE_extent		},
-+	{BKEY_TYPE_EXTENTS,	129, KEY_TYPE_extent		},
-+	{BKEY_TYPE_EXTENTS,	130, KEY_TYPE_reservation	},
-+	{BKEY_TYPE_INODES,	128, KEY_TYPE_inode		},
-+	{BKEY_TYPE_INODES,	130, KEY_TYPE_inode_generation	},
-+	{BKEY_TYPE_DIRENTS,	128, KEY_TYPE_dirent		},
-+	{BKEY_TYPE_DIRENTS,	129, KEY_TYPE_whiteout		},
-+	{BKEY_TYPE_XATTRS,	128, KEY_TYPE_xattr		},
-+	{BKEY_TYPE_XATTRS,	129, KEY_TYPE_whiteout		},
-+	{BKEY_TYPE_ALLOC,	128, KEY_TYPE_alloc		},
-+	{BKEY_TYPE_QUOTAS,	128, KEY_TYPE_quota		},
-+};
-+
-+void bch2_bkey_renumber(enum btree_node_type btree_node_type,
-+			struct bkey_packed *k,
-+			int write)
-+{
-+	const struct old_bkey_type *i;
-+
-+	for (i = bkey_renumber_table;
-+	     i < bkey_renumber_table + ARRAY_SIZE(bkey_renumber_table);
-+	     i++)
-+		if (btree_node_type == i->btree_node_type &&
-+		    k->type == (write ? i->new : i->old)) {
-+			k->type = write ? i->old : i->new;
-+			break;
-+		}
-+}
-+
-+void __bch2_bkey_compat(unsigned level, enum btree_id btree_id,
-+			unsigned version, unsigned big_endian,
-+			int write,
-+			struct bkey_format *f,
-+			struct bkey_packed *k)
-+{
-+	const struct bkey_ops *ops;
-+	struct bkey uk;
-+	struct bkey_s u;
-+	int i;
-+
-+	/*
-+	 * Do these operations in reverse order in the write path:
-+	 */
-+
-+	for (i = 0; i < 4; i++)
-+	switch (!write ? i : 3 - i) {
-+	case 0:
-+		if (big_endian != CPU_BIG_ENDIAN)
-+			bch2_bkey_swab_key(f, k);
-+		break;
-+	case 1:
-+		if (version < bcachefs_metadata_version_bkey_renumber)
-+			bch2_bkey_renumber(__btree_node_type(level, btree_id), k, write);
-+		break;
-+	case 2:
-+		if (version < bcachefs_metadata_version_inode_btree_change &&
-+		    btree_id == BTREE_ID_INODES) {
-+			if (!bkey_packed(k)) {
-+				struct bkey_i *u = packed_to_bkey(k);
-+				swap(u->k.p.inode, u->k.p.offset);
-+			} else if (f->bits_per_field[BKEY_FIELD_INODE] &&
-+				   f->bits_per_field[BKEY_FIELD_OFFSET]) {
-+				struct bkey_format tmp = *f, *in = f, *out = &tmp;
-+
-+				swap(tmp.bits_per_field[BKEY_FIELD_INODE],
-+				     tmp.bits_per_field[BKEY_FIELD_OFFSET]);
-+				swap(tmp.field_offset[BKEY_FIELD_INODE],
-+				     tmp.field_offset[BKEY_FIELD_OFFSET]);
-+
-+				if (!write)
-+					swap(in, out);
-+
-+				uk = __bch2_bkey_unpack_key(in, k);
-+				swap(uk.p.inode, uk.p.offset);
-+				BUG_ON(!bch2_bkey_pack_key(k, &uk, out));
-+			}
-+		}
-+		break;
-+	case 3:
-+		if (!bkey_packed(k)) {
-+			u = bkey_i_to_s(packed_to_bkey(k));
-+		} else {
-+			uk = __bch2_bkey_unpack_key(f, k);
-+			u.k = &uk;
-+			u.v = bkeyp_val(f, k);
-+		}
-+
-+		if (big_endian != CPU_BIG_ENDIAN)
-+			bch2_bkey_swab_val(u);
-+
-+		ops = &bch2_bkey_ops[k->type];
-+
-+		if (ops->compat)
-+			ops->compat(btree_id, version, big_endian, write, u);
-+		break;
-+	default:
-+		BUG();
-+	}
-+}
-diff --git a/fs/bcachefs/bkey_methods.h b/fs/bcachefs/bkey_methods.h
-new file mode 100644
-index 000000000000..0bca725ae3b8
---- /dev/null
-+++ b/fs/bcachefs/bkey_methods.h
-@@ -0,0 +1,82 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BKEY_METHODS_H
-+#define _BCACHEFS_BKEY_METHODS_H
-+
-+#include "bkey.h"
-+
-+struct bch_fs;
-+struct btree;
-+struct bkey;
-+enum btree_node_type;
-+
-+extern const char * const bch2_bkey_types[];
-+
-+enum merge_result {
-+	BCH_MERGE_NOMERGE,
-+
-+	/*
-+	 * The keys were mergeable, but would have overflowed size - so instead
-+	 * l was changed to the maximum size, and both keys were modified:
-+	 */
-+	BCH_MERGE_PARTIAL,
-+	BCH_MERGE_MERGE,
-+};
-+
-+struct bkey_ops {
-+	/* Returns reason for being invalid if invalid, else NULL: */
-+	const char *	(*key_invalid)(const struct bch_fs *,
-+				       struct bkey_s_c);
-+	void		(*key_debugcheck)(struct bch_fs *, struct bkey_s_c);
-+	void		(*val_to_text)(struct printbuf *, struct bch_fs *,
-+				       struct bkey_s_c);
-+	void		(*swab)(struct bkey_s);
-+	bool		(*key_normalize)(struct bch_fs *, struct bkey_s);
-+	enum merge_result (*key_merge)(struct bch_fs *,
-+				       struct bkey_s, struct bkey_s);
-+	void		(*compat)(enum btree_id id, unsigned version,
-+				  unsigned big_endian, int write,
-+				  struct bkey_s);
-+};
-+
-+const char *bch2_bkey_val_invalid(struct bch_fs *, struct bkey_s_c);
-+const char *__bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c,
-+				enum btree_node_type);
-+const char *bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c,
-+			      enum btree_node_type);
-+const char *bch2_bkey_in_btree_node(struct btree *, struct bkey_s_c);
-+
-+void bch2_bkey_debugcheck(struct bch_fs *, struct btree *, struct bkey_s_c);
-+
-+void bch2_bpos_to_text(struct printbuf *, struct bpos);
-+void bch2_bkey_to_text(struct printbuf *, const struct bkey *);
-+void bch2_val_to_text(struct printbuf *, struct bch_fs *,
-+		      struct bkey_s_c);
-+void bch2_bkey_val_to_text(struct printbuf *, struct bch_fs *,
-+			   struct bkey_s_c);
-+
-+void bch2_bkey_swab_val(struct bkey_s);
-+
-+bool bch2_bkey_normalize(struct bch_fs *, struct bkey_s);
-+
-+enum merge_result bch2_bkey_merge(struct bch_fs *,
-+				  struct bkey_s, struct bkey_s);
-+
-+void bch2_bkey_renumber(enum btree_node_type, struct bkey_packed *, int);
-+
-+void __bch2_bkey_compat(unsigned, enum btree_id, unsigned, unsigned,
-+			int, struct bkey_format *, struct bkey_packed *);
-+
-+static inline void bch2_bkey_compat(unsigned level, enum btree_id btree_id,
-+			       unsigned version, unsigned big_endian,
-+			       int write,
-+			       struct bkey_format *f,
-+			       struct bkey_packed *k)
-+{
-+	if (version < bcachefs_metadata_version_current ||
-+	    big_endian != CPU_BIG_ENDIAN)
-+		__bch2_bkey_compat(level, btree_id, version,
-+				   big_endian, write, f, k);
-+
-+}
-+
-+#endif /* _BCACHEFS_BKEY_METHODS_H */
-diff --git a/fs/bcachefs/bkey_on_stack.h b/fs/bcachefs/bkey_on_stack.h
-new file mode 100644
-index 000000000000..f607a0cb37ed
---- /dev/null
-+++ b/fs/bcachefs/bkey_on_stack.h
-@@ -0,0 +1,43 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BKEY_ON_STACK_H
-+#define _BCACHEFS_BKEY_ON_STACK_H
-+
-+#include "bcachefs.h"
-+
-+struct bkey_on_stack {
-+	struct bkey_i	*k;
-+	u64		onstack[12];
-+};
-+
-+static inline void bkey_on_stack_realloc(struct bkey_on_stack *s,
-+					 struct bch_fs *c, unsigned u64s)
-+{
-+	if (s->k == (void *) s->onstack &&
-+	    u64s > ARRAY_SIZE(s->onstack)) {
-+		s->k = mempool_alloc(&c->large_bkey_pool, GFP_NOFS);
-+		memcpy(s->k, s->onstack, sizeof(s->onstack));
-+	}
-+}
-+
-+static inline void bkey_on_stack_reassemble(struct bkey_on_stack *s,
-+					    struct bch_fs *c,
-+					    struct bkey_s_c k)
-+{
-+	bkey_on_stack_realloc(s, c, k.k->u64s);
-+	bkey_reassemble(s->k, k);
-+}
-+
-+static inline void bkey_on_stack_init(struct bkey_on_stack *s)
-+{
-+	s->k = (void *) s->onstack;
-+}
-+
-+static inline void bkey_on_stack_exit(struct bkey_on_stack *s,
-+				      struct bch_fs *c)
-+{
-+	if (s->k != (void *) s->onstack)
-+		mempool_free(s->k, &c->large_bkey_pool);
-+	s->k = NULL;
-+}
-+
-+#endif /* _BCACHEFS_BKEY_ON_STACK_H */
-diff --git a/fs/bcachefs/bkey_sort.c b/fs/bcachefs/bkey_sort.c
-new file mode 100644
-index 000000000000..839e78d1dc35
---- /dev/null
-+++ b/fs/bcachefs/bkey_sort.c
-@@ -0,0 +1,515 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "bkey_on_stack.h"
-+#include "bkey_sort.h"
-+#include "bset.h"
-+#include "extents.h"
-+
-+typedef int (*sort_cmp_fn)(struct btree *,
-+			   struct bkey_packed *,
-+			   struct bkey_packed *);
-+
-+static inline bool sort_iter_end(struct sort_iter *iter)
-+{
-+	return !iter->used;
-+}
-+
-+static inline void __sort_iter_sift(struct sort_iter *iter,
-+				    unsigned from,
-+				    sort_cmp_fn cmp)
-+{
-+	unsigned i;
-+
-+	for (i = from;
-+	     i + 1 < iter->used &&
-+	     cmp(iter->b, iter->data[i].k, iter->data[i + 1].k) > 0;
-+	     i++)
-+		swap(iter->data[i], iter->data[i + 1]);
-+}
-+
-+static inline void sort_iter_sift(struct sort_iter *iter, sort_cmp_fn cmp)
-+{
-+
-+	__sort_iter_sift(iter, 0, cmp);
-+}
-+
-+static inline void sort_iter_sort(struct sort_iter *iter, sort_cmp_fn cmp)
-+{
-+	unsigned i = iter->used;
-+
-+	while (i--)
-+		__sort_iter_sift(iter, i, cmp);
-+}
-+
-+static inline struct bkey_packed *sort_iter_peek(struct sort_iter *iter)
-+{
-+	return !sort_iter_end(iter) ? iter->data->k : NULL;
-+}
-+
-+static inline void __sort_iter_advance(struct sort_iter *iter,
-+				       unsigned idx, sort_cmp_fn cmp)
-+{
-+	struct sort_iter_set *i = iter->data + idx;
-+
-+	BUG_ON(idx >= iter->used);
-+
-+	i->k = bkey_next_skip_noops(i->k, i->end);
-+
-+	BUG_ON(i->k > i->end);
-+
-+	if (i->k == i->end)
-+		array_remove_item(iter->data, iter->used, idx);
-+	else
-+		__sort_iter_sift(iter, idx, cmp);
-+}
-+
-+static inline void sort_iter_advance(struct sort_iter *iter, sort_cmp_fn cmp)
-+{
-+	__sort_iter_advance(iter, 0, cmp);
-+}
-+
-+static inline struct bkey_packed *sort_iter_next(struct sort_iter *iter,
-+						 sort_cmp_fn cmp)
-+{
-+	struct bkey_packed *ret = sort_iter_peek(iter);
-+
-+	if (ret)
-+		sort_iter_advance(iter, cmp);
-+
-+	return ret;
-+}
-+
-+/*
-+ * If keys compare equal, compare by pointer order:
-+ */
-+static inline int key_sort_fix_overlapping_cmp(struct btree *b,
-+					       struct bkey_packed *l,
-+					       struct bkey_packed *r)
-+{
-+	return bkey_cmp_packed(b, l, r) ?:
-+		cmp_int((unsigned long) l, (unsigned long) r);
-+}
-+
-+static inline bool should_drop_next_key(struct sort_iter *iter)
-+{
-+	/*
-+	 * key_sort_cmp() ensures that when keys compare equal the older key
-+	 * comes first; so if l->k compares equal to r->k then l->k is older
-+	 * and should be dropped.
-+	 */
-+	return iter->used >= 2 &&
-+		!bkey_cmp_packed(iter->b,
-+				 iter->data[0].k,
-+				 iter->data[1].k);
-+}
-+
-+struct btree_nr_keys
-+bch2_key_sort_fix_overlapping(struct bch_fs *c, struct bset *dst,
-+			      struct sort_iter *iter)
-+{
-+	struct bkey_packed *out = dst->start;
-+	struct bkey_packed *k;
-+	struct btree_nr_keys nr;
-+
-+	memset(&nr, 0, sizeof(nr));
-+
-+	sort_iter_sort(iter, key_sort_fix_overlapping_cmp);
-+
-+	while ((k = sort_iter_peek(iter))) {
-+		if (!bkey_whiteout(k) &&
-+		    !should_drop_next_key(iter)) {
-+			bkey_copy(out, k);
-+			btree_keys_account_key_add(&nr, 0, out);
-+			out = bkey_next(out);
-+		}
-+
-+		sort_iter_advance(iter, key_sort_fix_overlapping_cmp);
-+	}
-+
-+	dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
-+	return nr;
-+}
-+
-+static void extent_sort_append(struct bch_fs *c,
-+			       struct bkey_format *f,
-+			       struct btree_nr_keys *nr,
-+			       struct bkey_packed **out,
-+			       struct bkey_s k)
-+{
-+	if (!bkey_whiteout(k.k)) {
-+		if (!bch2_bkey_pack_key(*out, k.k, f))
-+			memcpy_u64s_small(*out, k.k, BKEY_U64s);
-+
-+		memcpy_u64s_small(bkeyp_val(f, *out), k.v, bkey_val_u64s(k.k));
-+
-+		btree_keys_account_key_add(nr, 0, *out);
-+		*out = bkey_next(*out);
-+	}
-+}
-+
-+/* Sort + repack in a new format: */
-+struct btree_nr_keys
-+bch2_sort_repack(struct bset *dst, struct btree *src,
-+		 struct btree_node_iter *src_iter,
-+		 struct bkey_format *out_f,
-+		 bool filter_whiteouts)
-+{
-+	struct bkey_format *in_f = &src->format;
-+	struct bkey_packed *in, *out = vstruct_last(dst);
-+	struct btree_nr_keys nr;
-+
-+	memset(&nr, 0, sizeof(nr));
-+
-+	while ((in = bch2_btree_node_iter_next_all(src_iter, src))) {
-+		if (filter_whiteouts && bkey_whiteout(in))
-+			continue;
-+
-+		if (bch2_bkey_transform(out_f, out, bkey_packed(in)
-+				       ? in_f : &bch2_bkey_format_current, in))
-+			out->format = KEY_FORMAT_LOCAL_BTREE;
-+		else
-+			bch2_bkey_unpack(src, (void *) out, in);
-+
-+		btree_keys_account_key_add(&nr, 0, out);
-+		out = bkey_next(out);
-+	}
-+
-+	dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
-+	return nr;
-+}
-+
-+/* Sort, repack, and call bch2_bkey_normalize() to drop stale pointers: */
-+struct btree_nr_keys
-+bch2_sort_repack_merge(struct bch_fs *c,
-+		       struct bset *dst, struct btree *src,
-+		       struct btree_node_iter *iter,
-+		       struct bkey_format *out_f,
-+		       bool filter_whiteouts)
-+{
-+	struct bkey_packed *out = vstruct_last(dst), *k_packed;
-+	struct bkey_on_stack k;
-+	struct btree_nr_keys nr;
-+
-+	memset(&nr, 0, sizeof(nr));
-+	bkey_on_stack_init(&k);
-+
-+	while ((k_packed = bch2_btree_node_iter_next_all(iter, src))) {
-+		if (filter_whiteouts && bkey_whiteout(k_packed))
-+			continue;
-+
-+		/*
-+		 * NOTE:
-+		 * bch2_bkey_normalize may modify the key we pass it (dropping
-+		 * stale pointers) and we don't have a write lock on the src
-+		 * node; we have to make a copy of the entire key before calling
-+		 * normalize
-+		 */
-+		bkey_on_stack_realloc(&k, c, k_packed->u64s + BKEY_U64s);
-+		bch2_bkey_unpack(src, k.k, k_packed);
-+
-+		if (filter_whiteouts &&
-+		    bch2_bkey_normalize(c, bkey_i_to_s(k.k)))
-+			continue;
-+
-+		extent_sort_append(c, out_f, &nr, &out, bkey_i_to_s(k.k));
-+	}
-+
-+	dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
-+	bkey_on_stack_exit(&k, c);
-+	return nr;
-+}
-+
-+static inline int sort_keys_cmp(struct btree *b,
-+				struct bkey_packed *l,
-+				struct bkey_packed *r)
-+{
-+	return bkey_cmp_packed(b, l, r) ?:
-+		(int) bkey_deleted(r) - (int) bkey_deleted(l) ?:
-+		(int) l->needs_whiteout - (int) r->needs_whiteout;
-+}
-+
-+unsigned bch2_sort_keys(struct bkey_packed *dst,
-+			struct sort_iter *iter,
-+			bool filter_whiteouts)
-+{
-+	const struct bkey_format *f = &iter->b->format;
-+	struct bkey_packed *in, *next, *out = dst;
-+
-+	sort_iter_sort(iter, sort_keys_cmp);
-+
-+	while ((in = sort_iter_next(iter, sort_keys_cmp))) {
-+		bool needs_whiteout = false;
-+
-+		if (bkey_whiteout(in) &&
-+		    (filter_whiteouts || !in->needs_whiteout))
-+			continue;
-+
-+		while ((next = sort_iter_peek(iter)) &&
-+		       !bkey_cmp_packed(iter->b, in, next)) {
-+			BUG_ON(in->needs_whiteout &&
-+			       next->needs_whiteout);
-+			needs_whiteout |= in->needs_whiteout;
-+			in = sort_iter_next(iter, sort_keys_cmp);
-+		}
-+
-+		if (bkey_whiteout(in)) {
-+			memcpy_u64s(out, in, bkeyp_key_u64s(f, in));
-+			set_bkeyp_val_u64s(f, out, 0);
-+		} else {
-+			bkey_copy(out, in);
-+		}
-+		out->needs_whiteout |= needs_whiteout;
-+		out = bkey_next(out);
-+	}
-+
-+	return (u64 *) out - (u64 *) dst;
-+}
-+
-+/* Compat code for btree_node_old_extent_overwrite: */
-+
-+/*
-+ * If keys compare equal, compare by pointer order:
-+ *
-+ * Necessary for sort_fix_overlapping() - if there are multiple keys that
-+ * compare equal in different sets, we have to process them newest to oldest.
-+ */
-+static inline int extent_sort_fix_overlapping_cmp(struct btree *b,
-+						  struct bkey_packed *l,
-+						  struct bkey_packed *r)
-+{
-+	struct bkey ul = bkey_unpack_key(b, l);
-+	struct bkey ur = bkey_unpack_key(b, r);
-+
-+	return bkey_cmp(bkey_start_pos(&ul),
-+			bkey_start_pos(&ur)) ?:
-+		cmp_int((unsigned long) r, (unsigned long) l);
-+}
-+
-+/*
-+ * The algorithm in extent_sort_fix_overlapping() relies on keys in the same
-+ * bset being ordered by start offset - but 0 size whiteouts (which are always
-+ * KEY_TYPE_deleted) break this ordering, so we need to skip over them:
-+ */
-+static void extent_iter_advance(struct sort_iter *iter, unsigned idx)
-+{
-+	struct sort_iter_set *i = iter->data + idx;
-+
-+	do {
-+		i->k = bkey_next_skip_noops(i->k, i->end);
-+	} while (i->k != i->end && bkey_deleted(i->k));
-+
-+	if (i->k == i->end)
-+		array_remove_item(iter->data, iter->used, idx);
-+	else
-+		__sort_iter_sift(iter, idx, extent_sort_fix_overlapping_cmp);
-+}
-+
-+struct btree_nr_keys
-+bch2_extent_sort_fix_overlapping(struct bch_fs *c, struct bset *dst,
-+				 struct sort_iter *iter)
-+{
-+	struct btree *b = iter->b;
-+	struct bkey_format *f = &b->format;
-+	struct sort_iter_set *_l = iter->data, *_r = iter->data + 1;
-+	struct bkey_packed *out = dst->start;
-+	struct bkey l_unpacked, r_unpacked;
-+	struct bkey_s l, r;
-+	struct btree_nr_keys nr;
-+	struct bkey_on_stack split;
-+	unsigned i;
-+
-+	memset(&nr, 0, sizeof(nr));
-+	bkey_on_stack_init(&split);
-+
-+	sort_iter_sort(iter, extent_sort_fix_overlapping_cmp);
-+	for (i = 0; i < iter->used;) {
-+		if (bkey_deleted(iter->data[i].k))
-+			__sort_iter_advance(iter, i,
-+					    extent_sort_fix_overlapping_cmp);
-+		else
-+			i++;
-+	}
-+
-+	while (!sort_iter_end(iter)) {
-+		l = __bkey_disassemble(b, _l->k, &l_unpacked);
-+
-+		if (iter->used == 1) {
-+			extent_sort_append(c, f, &nr, &out, l);
-+			extent_iter_advance(iter, 0);
-+			continue;
-+		}
-+
-+		r = __bkey_disassemble(b, _r->k, &r_unpacked);
-+
-+		/* If current key and next key don't overlap, just append */
-+		if (bkey_cmp(l.k->p, bkey_start_pos(r.k)) <= 0) {
-+			extent_sort_append(c, f, &nr, &out, l);
-+			extent_iter_advance(iter, 0);
-+			continue;
-+		}
-+
-+		/* Skip 0 size keys */
-+		if (!r.k->size) {
-+			extent_iter_advance(iter, 1);
-+			continue;
-+		}
-+
-+		/*
-+		 * overlap: keep the newer key and trim the older key so they
-+		 * don't overlap. comparing pointers tells us which one is
-+		 * newer, since the bsets are appended one after the other.
-+		 */
-+
-+		/* can't happen because of comparison func */
-+		BUG_ON(_l->k < _r->k &&
-+		       !bkey_cmp(bkey_start_pos(l.k), bkey_start_pos(r.k)));
-+
-+		if (_l->k > _r->k) {
-+			/* l wins, trim r */
-+			if (bkey_cmp(l.k->p, r.k->p) >= 0) {
-+				extent_iter_advance(iter, 1);
-+			} else {
-+				bch2_cut_front_s(l.k->p, r);
-+				extent_save(b, _r->k, r.k);
-+				__sort_iter_sift(iter, 1,
-+					 extent_sort_fix_overlapping_cmp);
-+			}
-+		} else if (bkey_cmp(l.k->p, r.k->p) > 0) {
-+
-+			/*
-+			 * r wins, but it overlaps in the middle of l - split l:
-+			 */
-+			bkey_on_stack_reassemble(&split, c, l.s_c);
-+			bch2_cut_back(bkey_start_pos(r.k), split.k);
-+
-+			bch2_cut_front_s(r.k->p, l);
-+			extent_save(b, _l->k, l.k);
-+
-+			__sort_iter_sift(iter, 0,
-+					 extent_sort_fix_overlapping_cmp);
-+
-+			extent_sort_append(c, f, &nr, &out,
-+					   bkey_i_to_s(split.k));
-+		} else {
-+			bch2_cut_back_s(bkey_start_pos(r.k), l);
-+			extent_save(b, _l->k, l.k);
-+		}
-+	}
-+
-+	dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
-+
-+	bkey_on_stack_exit(&split, c);
-+	return nr;
-+}
-+
-+static inline int sort_extents_cmp(struct btree *b,
-+				   struct bkey_packed *l,
-+				   struct bkey_packed *r)
-+{
-+	return bkey_cmp_packed(b, l, r) ?:
-+		(int) bkey_deleted(l) - (int) bkey_deleted(r);
-+}
-+
-+unsigned bch2_sort_extents(struct bkey_packed *dst,
-+			   struct sort_iter *iter,
-+			   bool filter_whiteouts)
-+{
-+	struct bkey_packed *in, *out = dst;
-+
-+	sort_iter_sort(iter, sort_extents_cmp);
-+
-+	while ((in = sort_iter_next(iter, sort_extents_cmp))) {
-+		if (bkey_deleted(in))
-+			continue;
-+
-+		if (bkey_whiteout(in) &&
-+		    (filter_whiteouts || !in->needs_whiteout))
-+			continue;
-+
-+		bkey_copy(out, in);
-+		out = bkey_next(out);
-+	}
-+
-+	return (u64 *) out - (u64 *) dst;
-+}
-+
-+static inline int sort_extent_whiteouts_cmp(struct btree *b,
-+					    struct bkey_packed *l,
-+					    struct bkey_packed *r)
-+{
-+	struct bkey ul = bkey_unpack_key(b, l);
-+	struct bkey ur = bkey_unpack_key(b, r);
-+
-+	return bkey_cmp(bkey_start_pos(&ul), bkey_start_pos(&ur));
-+}
-+
-+unsigned bch2_sort_extent_whiteouts(struct bkey_packed *dst,
-+				    struct sort_iter *iter)
-+{
-+	const struct bkey_format *f = &iter->b->format;
-+	struct bkey_packed *in, *out = dst;
-+	struct bkey_i l, r;
-+	bool prev = false, l_packed = false;
-+	u64 max_packed_size	= bkey_field_max(f, BKEY_FIELD_SIZE);
-+	u64 max_packed_offset	= bkey_field_max(f, BKEY_FIELD_OFFSET);
-+	u64 new_size;
-+
-+	max_packed_size = min_t(u64, max_packed_size, KEY_SIZE_MAX);
-+
-+	sort_iter_sort(iter, sort_extent_whiteouts_cmp);
-+
-+	while ((in = sort_iter_next(iter, sort_extent_whiteouts_cmp))) {
-+		if (bkey_deleted(in))
-+			continue;
-+
-+		EBUG_ON(bkeyp_val_u64s(f, in));
-+		EBUG_ON(in->type != KEY_TYPE_discard);
-+
-+		r.k = bkey_unpack_key(iter->b, in);
-+
-+		if (prev &&
-+		    bkey_cmp(l.k.p, bkey_start_pos(&r.k)) >= 0) {
-+			if (bkey_cmp(l.k.p, r.k.p) >= 0)
-+				continue;
-+
-+			new_size = l_packed
-+				? min(max_packed_size, max_packed_offset -
-+				      bkey_start_offset(&l.k))
-+				: KEY_SIZE_MAX;
-+
-+			new_size = min(new_size, r.k.p.offset -
-+				       bkey_start_offset(&l.k));
-+
-+			BUG_ON(new_size < l.k.size);
-+
-+			bch2_key_resize(&l.k, new_size);
-+
-+			if (bkey_cmp(l.k.p, r.k.p) >= 0)
-+				continue;
-+
-+			bch2_cut_front(l.k.p, &r);
-+		}
-+
-+		if (prev) {
-+			if (!bch2_bkey_pack(out, &l, f)) {
-+				BUG_ON(l_packed);
-+				bkey_copy(out, &l);
-+			}
-+			out = bkey_next(out);
-+		}
-+
-+		l = r;
-+		prev = true;
-+		l_packed = bkey_packed(in);
-+	}
-+
-+	if (prev) {
-+		if (!bch2_bkey_pack(out, &l, f)) {
-+			BUG_ON(l_packed);
-+			bkey_copy(out, &l);
-+		}
-+		out = bkey_next(out);
-+	}
-+
-+	return (u64 *) out - (u64 *) dst;
-+}
-diff --git a/fs/bcachefs/bkey_sort.h b/fs/bcachefs/bkey_sort.h
-new file mode 100644
-index 000000000000..458a051fdac5
---- /dev/null
-+++ b/fs/bcachefs/bkey_sort.h
-@@ -0,0 +1,57 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BKEY_SORT_H
-+#define _BCACHEFS_BKEY_SORT_H
-+
-+struct sort_iter {
-+	struct btree		*b;
-+	unsigned		used;
-+	unsigned		size;
-+
-+	struct sort_iter_set {
-+		struct bkey_packed *k, *end;
-+	} data[MAX_BSETS + 1];
-+};
-+
-+static inline void sort_iter_init(struct sort_iter *iter, struct btree *b)
-+{
-+	iter->b = b;
-+	iter->used = 0;
-+	iter->size = ARRAY_SIZE(iter->data);
-+}
-+
-+static inline void sort_iter_add(struct sort_iter *iter,
-+				 struct bkey_packed *k,
-+				 struct bkey_packed *end)
-+{
-+	BUG_ON(iter->used >= iter->size);
-+
-+	if (k != end)
-+		iter->data[iter->used++] = (struct sort_iter_set) { k, end };
-+}
-+
-+struct btree_nr_keys
-+bch2_key_sort_fix_overlapping(struct bch_fs *, struct bset *,
-+			      struct sort_iter *);
-+struct btree_nr_keys
-+bch2_extent_sort_fix_overlapping(struct bch_fs *, struct bset *,
-+				 struct sort_iter *);
-+
-+struct btree_nr_keys
-+bch2_sort_repack(struct bset *, struct btree *,
-+		 struct btree_node_iter *,
-+		 struct bkey_format *, bool);
-+struct btree_nr_keys
-+bch2_sort_repack_merge(struct bch_fs *,
-+		       struct bset *, struct btree *,
-+		       struct btree_node_iter *,
-+		       struct bkey_format *, bool);
-+
-+unsigned bch2_sort_keys(struct bkey_packed *,
-+			struct sort_iter *, bool);
-+unsigned bch2_sort_extents(struct bkey_packed *,
-+			   struct sort_iter *, bool);
-+
-+unsigned bch2_sort_extent_whiteouts(struct bkey_packed *,
-+				    struct sort_iter *);
-+
-+#endif /* _BCACHEFS_BKEY_SORT_H */
-diff --git a/fs/bcachefs/bset.c b/fs/bcachefs/bset.c
-new file mode 100644
-index 000000000000..f7c2841ed8a7
---- /dev/null
-+++ b/fs/bcachefs/bset.c
-@@ -0,0 +1,1742 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Code for working with individual keys, and sorted sets of keys with in a
-+ * btree node
-+ *
-+ * Copyright 2012 Google, Inc.
-+ */
-+
-+#include "bcachefs.h"
-+#include "btree_cache.h"
-+#include "bset.h"
-+#include "eytzinger.h"
-+#include "util.h"
-+
-+#include <asm/unaligned.h>
-+#include <linux/console.h>
-+#include <linux/random.h>
-+#include <linux/prefetch.h>
-+
-+/* hack.. */
-+#include "alloc_types.h"
-+#include <trace/events/bcachefs.h>
-+
-+static inline void __bch2_btree_node_iter_advance(struct btree_node_iter *,
-+						  struct btree *);
-+
-+static inline unsigned __btree_node_iter_used(struct btree_node_iter *iter)
-+{
-+	unsigned n = ARRAY_SIZE(iter->data);
-+
-+	while (n && __btree_node_iter_set_end(iter, n - 1))
-+		--n;
-+
-+	return n;
-+}
-+
-+struct bset_tree *bch2_bkey_to_bset(struct btree *b, struct bkey_packed *k)
-+{
-+	unsigned offset = __btree_node_key_to_offset(b, k);
-+	struct bset_tree *t;
-+
-+	for_each_bset(b, t)
-+		if (offset <= t->end_offset) {
-+			EBUG_ON(offset < btree_bkey_first_offset(t));
-+			return t;
-+		}
-+
-+	BUG();
-+}
-+
-+/*
-+ * There are never duplicate live keys in the btree - but including keys that
-+ * have been flagged as deleted (and will be cleaned up later) we _will_ see
-+ * duplicates.
-+ *
-+ * Thus the sort order is: usual key comparison first, but for keys that compare
-+ * equal the deleted key(s) come first, and the (at most one) live version comes
-+ * last.
-+ *
-+ * The main reason for this is insertion: to handle overwrites, we first iterate
-+ * over keys that compare equal to our insert key, and then insert immediately
-+ * prior to the first key greater than the key we're inserting - our insert
-+ * position will be after all keys that compare equal to our insert key, which
-+ * by the time we actually do the insert will all be deleted.
-+ */
-+
-+void bch2_dump_bset(struct bch_fs *c, struct btree *b,
-+		    struct bset *i, unsigned set)
-+{
-+	struct bkey_packed *_k, *_n;
-+	struct bkey uk, n;
-+	struct bkey_s_c k;
-+	char buf[200];
-+
-+	if (!i->u64s)
-+		return;
-+
-+	for (_k = i->start;
-+	     _k < vstruct_last(i);
-+	     _k = _n) {
-+		_n = bkey_next_skip_noops(_k, vstruct_last(i));
-+
-+		k = bkey_disassemble(b, _k, &uk);
-+		if (c)
-+			bch2_bkey_val_to_text(&PBUF(buf), c, k);
-+		else
-+			bch2_bkey_to_text(&PBUF(buf), k.k);
-+		printk(KERN_ERR "block %u key %5zu: %s\n", set,
-+		       _k->_data - i->_data, buf);
-+
-+		if (_n == vstruct_last(i))
-+			continue;
-+
-+		n = bkey_unpack_key(b, _n);
-+
-+		if (bkey_cmp(bkey_start_pos(&n), k.k->p) < 0) {
-+			printk(KERN_ERR "Key skipped backwards\n");
-+			continue;
-+		}
-+
-+		if (!bkey_deleted(k.k) &&
-+		    !bkey_cmp(n.p, k.k->p))
-+			printk(KERN_ERR "Duplicate keys\n");
-+	}
-+}
-+
-+void bch2_dump_btree_node(struct bch_fs *c, struct btree *b)
-+{
-+	struct bset_tree *t;
-+
-+	console_lock();
-+	for_each_bset(b, t)
-+		bch2_dump_bset(c, b, bset(b, t), t - b->set);
-+	console_unlock();
-+}
-+
-+void bch2_dump_btree_node_iter(struct btree *b,
-+			      struct btree_node_iter *iter)
-+{
-+	struct btree_node_iter_set *set;
-+
-+	printk(KERN_ERR "btree node iter with %u/%u sets:\n",
-+	       __btree_node_iter_used(iter), b->nsets);
-+
-+	btree_node_iter_for_each(iter, set) {
-+		struct bkey_packed *k = __btree_node_offset_to_key(b, set->k);
-+		struct bset_tree *t = bch2_bkey_to_bset(b, k);
-+		struct bkey uk = bkey_unpack_key(b, k);
-+		char buf[100];
-+
-+		bch2_bkey_to_text(&PBUF(buf), &uk);
-+		printk(KERN_ERR "set %zu key %u: %s\n",
-+		       t - b->set, set->k, buf);
-+	}
-+}
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+
-+void __bch2_verify_btree_nr_keys(struct btree *b)
-+{
-+	struct bset_tree *t;
-+	struct bkey_packed *k;
-+	struct btree_nr_keys nr = { 0 };
-+
-+	for_each_bset(b, t)
-+		bset_tree_for_each_key(b, t, k)
-+			if (!bkey_whiteout(k))
-+				btree_keys_account_key_add(&nr, t - b->set, k);
-+
-+	BUG_ON(memcmp(&nr, &b->nr, sizeof(nr)));
-+}
-+
-+static void bch2_btree_node_iter_next_check(struct btree_node_iter *_iter,
-+					    struct btree *b)
-+{
-+	struct btree_node_iter iter = *_iter;
-+	const struct bkey_packed *k, *n;
-+
-+	k = bch2_btree_node_iter_peek_all(&iter, b);
-+	__bch2_btree_node_iter_advance(&iter, b);
-+	n = bch2_btree_node_iter_peek_all(&iter, b);
-+
-+	bkey_unpack_key(b, k);
-+
-+	if (n &&
-+	    bkey_iter_cmp(b, k, n) > 0) {
-+		struct btree_node_iter_set *set;
-+		struct bkey ku = bkey_unpack_key(b, k);
-+		struct bkey nu = bkey_unpack_key(b, n);
-+		char buf1[80], buf2[80];
-+
-+		bch2_dump_btree_node(NULL, b);
-+		bch2_bkey_to_text(&PBUF(buf1), &ku);
-+		bch2_bkey_to_text(&PBUF(buf2), &nu);
-+		printk(KERN_ERR "out of order/overlapping:\n%s\n%s\n",
-+		       buf1, buf2);
-+		printk(KERN_ERR "iter was:");
-+
-+		btree_node_iter_for_each(_iter, set) {
-+			struct bkey_packed *k = __btree_node_offset_to_key(b, set->k);
-+			struct bset_tree *t = bch2_bkey_to_bset(b, k);
-+			printk(" [%zi %zi]", t - b->set,
-+			       k->_data - bset(b, t)->_data);
-+		}
-+		panic("\n");
-+	}
-+}
-+
-+void bch2_btree_node_iter_verify(struct btree_node_iter *iter,
-+				 struct btree *b)
-+{
-+	struct btree_node_iter_set *set, *s2;
-+	struct bkey_packed *k, *p;
-+	struct bset_tree *t;
-+
-+	if (bch2_btree_node_iter_end(iter))
-+		return;
-+
-+	/* Verify no duplicates: */
-+	btree_node_iter_for_each(iter, set)
-+		btree_node_iter_for_each(iter, s2)
-+			BUG_ON(set != s2 && set->end == s2->end);
-+
-+	/* Verify that set->end is correct: */
-+	btree_node_iter_for_each(iter, set) {
-+		for_each_bset(b, t)
-+			if (set->end == t->end_offset)
-+				goto found;
-+		BUG();
-+found:
-+		BUG_ON(set->k < btree_bkey_first_offset(t) ||
-+		       set->k >= t->end_offset);
-+	}
-+
-+	/* Verify iterator is sorted: */
-+	btree_node_iter_for_each(iter, set)
-+		BUG_ON(set != iter->data &&
-+		       btree_node_iter_cmp(b, set[-1], set[0]) > 0);
-+
-+	k = bch2_btree_node_iter_peek_all(iter, b);
-+
-+	for_each_bset(b, t) {
-+		if (iter->data[0].end == t->end_offset)
-+			continue;
-+
-+		p = bch2_bkey_prev_all(b, t,
-+			bch2_btree_node_iter_bset_pos(iter, b, t));
-+
-+		BUG_ON(p && bkey_iter_cmp(b, k, p) < 0);
-+	}
-+}
-+
-+void bch2_verify_insert_pos(struct btree *b, struct bkey_packed *where,
-+			    struct bkey_packed *insert, unsigned clobber_u64s)
-+{
-+	struct bset_tree *t = bch2_bkey_to_bset(b, where);
-+	struct bkey_packed *prev = bch2_bkey_prev_all(b, t, where);
-+	struct bkey_packed *next = (void *) (where->_data + clobber_u64s);
-+#if 0
-+	BUG_ON(prev &&
-+	       bkey_iter_cmp(b, prev, insert) > 0);
-+#else
-+	if (prev &&
-+	    bkey_iter_cmp(b, prev, insert) > 0) {
-+		struct bkey k1 = bkey_unpack_key(b, prev);
-+		struct bkey k2 = bkey_unpack_key(b, insert);
-+		char buf1[100];
-+		char buf2[100];
-+
-+		bch2_dump_btree_node(NULL, b);
-+		bch2_bkey_to_text(&PBUF(buf1), &k1);
-+		bch2_bkey_to_text(&PBUF(buf2), &k2);
-+
-+		panic("prev > insert:\n"
-+		      "prev    key %s\n"
-+		      "insert  key %s\n",
-+		      buf1, buf2);
-+	}
-+#endif
-+#if 0
-+	BUG_ON(next != btree_bkey_last(b, t) &&
-+	       bkey_iter_cmp(b, insert, next) > 0);
-+#else
-+	if (next != btree_bkey_last(b, t) &&
-+	    bkey_iter_cmp(b, insert, next) > 0) {
-+		struct bkey k1 = bkey_unpack_key(b, insert);
-+		struct bkey k2 = bkey_unpack_key(b, next);
-+		char buf1[100];
-+		char buf2[100];
-+
-+		bch2_dump_btree_node(NULL, b);
-+		bch2_bkey_to_text(&PBUF(buf1), &k1);
-+		bch2_bkey_to_text(&PBUF(buf2), &k2);
-+
-+		panic("insert > next:\n"
-+		      "insert  key %s\n"
-+		      "next    key %s\n",
-+		      buf1, buf2);
-+	}
-+#endif
-+}
-+
-+#else
-+
-+static inline void bch2_btree_node_iter_next_check(struct btree_node_iter *iter,
-+						   struct btree *b) {}
-+
-+#endif
-+
-+/* Auxiliary search trees */
-+
-+#define BFLOAT_FAILED_UNPACKED	U8_MAX
-+#define BFLOAT_FAILED		U8_MAX
-+
-+struct bkey_float {
-+	u8		exponent;
-+	u8		key_offset;
-+	u16		mantissa;
-+};
-+#define BKEY_MANTISSA_BITS	16
-+
-+static unsigned bkey_float_byte_offset(unsigned idx)
-+{
-+	return idx * sizeof(struct bkey_float);
-+}
-+
-+struct ro_aux_tree {
-+	struct bkey_float	f[0];
-+};
-+
-+struct rw_aux_tree {
-+	u16		offset;
-+	struct bpos	k;
-+};
-+
-+static unsigned bset_aux_tree_buf_end(const struct bset_tree *t)
-+{
-+	BUG_ON(t->aux_data_offset == U16_MAX);
-+
-+	switch (bset_aux_tree_type(t)) {
-+	case BSET_NO_AUX_TREE:
-+		return t->aux_data_offset;
-+	case BSET_RO_AUX_TREE:
-+		return t->aux_data_offset +
-+			DIV_ROUND_UP(t->size * sizeof(struct bkey_float) +
-+				     t->size * sizeof(u8), 8);
-+	case BSET_RW_AUX_TREE:
-+		return t->aux_data_offset +
-+			DIV_ROUND_UP(sizeof(struct rw_aux_tree) * t->size, 8);
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static unsigned bset_aux_tree_buf_start(const struct btree *b,
-+					const struct bset_tree *t)
-+{
-+	return t == b->set
-+		? DIV_ROUND_UP(b->unpack_fn_len, 8)
-+		: bset_aux_tree_buf_end(t - 1);
-+}
-+
-+static void *__aux_tree_base(const struct btree *b,
-+			     const struct bset_tree *t)
-+{
-+	return b->aux_data + t->aux_data_offset * 8;
-+}
-+
-+static struct ro_aux_tree *ro_aux_tree_base(const struct btree *b,
-+					    const struct bset_tree *t)
-+{
-+	EBUG_ON(bset_aux_tree_type(t) != BSET_RO_AUX_TREE);
-+
-+	return __aux_tree_base(b, t);
-+}
-+
-+static u8 *ro_aux_tree_prev(const struct btree *b,
-+			    const struct bset_tree *t)
-+{
-+	EBUG_ON(bset_aux_tree_type(t) != BSET_RO_AUX_TREE);
-+
-+	return __aux_tree_base(b, t) + bkey_float_byte_offset(t->size);
-+}
-+
-+static struct bkey_float *bkey_float(const struct btree *b,
-+				     const struct bset_tree *t,
-+				     unsigned idx)
-+{
-+	return ro_aux_tree_base(b, t)->f + idx;
-+}
-+
-+static void bset_aux_tree_verify(struct btree *b)
-+{
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	struct bset_tree *t;
-+
-+	for_each_bset(b, t) {
-+		if (t->aux_data_offset == U16_MAX)
-+			continue;
-+
-+		BUG_ON(t != b->set &&
-+		       t[-1].aux_data_offset == U16_MAX);
-+
-+		BUG_ON(t->aux_data_offset < bset_aux_tree_buf_start(b, t));
-+		BUG_ON(t->aux_data_offset > btree_aux_data_u64s(b));
-+		BUG_ON(bset_aux_tree_buf_end(t) > btree_aux_data_u64s(b));
-+	}
-+#endif
-+}
-+
-+void bch2_btree_keys_init(struct btree *b, bool *expensive_debug_checks)
-+{
-+	unsigned i;
-+
-+	b->nsets		= 0;
-+	memset(&b->nr, 0, sizeof(b->nr));
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	b->expensive_debug_checks = expensive_debug_checks;
-+#endif
-+	for (i = 0; i < MAX_BSETS; i++)
-+		b->set[i].data_offset = U16_MAX;
-+
-+	bch2_bset_set_no_aux_tree(b, b->set);
-+}
-+
-+/* Binary tree stuff for auxiliary search trees */
-+
-+/*
-+ * Cacheline/offset <-> bkey pointer arithmetic:
-+ *
-+ * t->tree is a binary search tree in an array; each node corresponds to a key
-+ * in one cacheline in t->set (BSET_CACHELINE bytes).
-+ *
-+ * This means we don't have to store the full index of the key that a node in
-+ * the binary tree points to; eytzinger1_to_inorder() gives us the cacheline, and
-+ * then bkey_float->m gives us the offset within that cacheline, in units of 8
-+ * bytes.
-+ *
-+ * cacheline_to_bkey() and friends abstract out all the pointer arithmetic to
-+ * make this work.
-+ *
-+ * To construct the bfloat for an arbitrary key we need to know what the key
-+ * immediately preceding it is: we have to check if the two keys differ in the
-+ * bits we're going to store in bkey_float->mantissa. t->prev[j] stores the size
-+ * of the previous key so we can walk backwards to it from t->tree[j]'s key.
-+ */
-+
-+static inline void *bset_cacheline(const struct btree *b,
-+				   const struct bset_tree *t,
-+				   unsigned cacheline)
-+{
-+	return (void *) round_down((unsigned long) btree_bkey_first(b, t),
-+				   L1_CACHE_BYTES) +
-+		cacheline * BSET_CACHELINE;
-+}
-+
-+static struct bkey_packed *cacheline_to_bkey(const struct btree *b,
-+					     const struct bset_tree *t,
-+					     unsigned cacheline,
-+					     unsigned offset)
-+{
-+	return bset_cacheline(b, t, cacheline) + offset * 8;
-+}
-+
-+static unsigned bkey_to_cacheline(const struct btree *b,
-+				  const struct bset_tree *t,
-+				  const struct bkey_packed *k)
-+{
-+	return ((void *) k - bset_cacheline(b, t, 0)) / BSET_CACHELINE;
-+}
-+
-+static ssize_t __bkey_to_cacheline_offset(const struct btree *b,
-+					  const struct bset_tree *t,
-+					  unsigned cacheline,
-+					  const struct bkey_packed *k)
-+{
-+	return (u64 *) k - (u64 *) bset_cacheline(b, t, cacheline);
-+}
-+
-+static unsigned bkey_to_cacheline_offset(const struct btree *b,
-+					 const struct bset_tree *t,
-+					 unsigned cacheline,
-+					 const struct bkey_packed *k)
-+{
-+	size_t m = __bkey_to_cacheline_offset(b, t, cacheline, k);
-+
-+	EBUG_ON(m > U8_MAX);
-+	return m;
-+}
-+
-+static inline struct bkey_packed *tree_to_bkey(const struct btree *b,
-+					       const struct bset_tree *t,
-+					       unsigned j)
-+{
-+	return cacheline_to_bkey(b, t,
-+			__eytzinger1_to_inorder(j, t->size, t->extra),
-+			bkey_float(b, t, j)->key_offset);
-+}
-+
-+static struct bkey_packed *tree_to_prev_bkey(const struct btree *b,
-+					     const struct bset_tree *t,
-+					     unsigned j)
-+{
-+	unsigned prev_u64s = ro_aux_tree_prev(b, t)[j];
-+
-+	return (void *) (tree_to_bkey(b, t, j)->_data - prev_u64s);
-+}
-+
-+static struct rw_aux_tree *rw_aux_tree(const struct btree *b,
-+				       const struct bset_tree *t)
-+{
-+	EBUG_ON(bset_aux_tree_type(t) != BSET_RW_AUX_TREE);
-+
-+	return __aux_tree_base(b, t);
-+}
-+
-+/*
-+ * For the write set - the one we're currently inserting keys into - we don't
-+ * maintain a full search tree, we just keep a simple lookup table in t->prev.
-+ */
-+static struct bkey_packed *rw_aux_to_bkey(const struct btree *b,
-+					  struct bset_tree *t,
-+					  unsigned j)
-+{
-+	return __btree_node_offset_to_key(b, rw_aux_tree(b, t)[j].offset);
-+}
-+
-+static void rw_aux_tree_set(const struct btree *b, struct bset_tree *t,
-+			    unsigned j, struct bkey_packed *k)
-+{
-+	EBUG_ON(k >= btree_bkey_last(b, t));
-+
-+	rw_aux_tree(b, t)[j] = (struct rw_aux_tree) {
-+		.offset	= __btree_node_key_to_offset(b, k),
-+		.k	= bkey_unpack_pos(b, k),
-+	};
-+}
-+
-+static void bch2_bset_verify_rw_aux_tree(struct btree *b,
-+					struct bset_tree *t)
-+{
-+	struct bkey_packed *k = btree_bkey_first(b, t);
-+	unsigned j = 0;
-+
-+	if (!btree_keys_expensive_checks(b))
-+		return;
-+
-+	BUG_ON(bset_has_ro_aux_tree(t));
-+
-+	if (!bset_has_rw_aux_tree(t))
-+		return;
-+
-+	BUG_ON(t->size < 1);
-+	BUG_ON(rw_aux_to_bkey(b, t, j) != k);
-+
-+	goto start;
-+	while (1) {
-+		if (rw_aux_to_bkey(b, t, j) == k) {
-+			BUG_ON(bkey_cmp(rw_aux_tree(b, t)[j].k,
-+					bkey_unpack_pos(b, k)));
-+start:
-+			if (++j == t->size)
-+				break;
-+
-+			BUG_ON(rw_aux_tree(b, t)[j].offset <=
-+			       rw_aux_tree(b, t)[j - 1].offset);
-+		}
-+
-+		k = bkey_next_skip_noops(k, btree_bkey_last(b, t));
-+		BUG_ON(k >= btree_bkey_last(b, t));
-+	}
-+}
-+
-+/* returns idx of first entry >= offset: */
-+static unsigned rw_aux_tree_bsearch(struct btree *b,
-+				    struct bset_tree *t,
-+				    unsigned offset)
-+{
-+	unsigned bset_offs = offset - btree_bkey_first_offset(t);
-+	unsigned bset_u64s = t->end_offset - btree_bkey_first_offset(t);
-+	unsigned idx = bset_u64s ? bset_offs * t->size / bset_u64s : 0;
-+
-+	EBUG_ON(bset_aux_tree_type(t) != BSET_RW_AUX_TREE);
-+	EBUG_ON(!t->size);
-+	EBUG_ON(idx > t->size);
-+
-+	while (idx < t->size &&
-+	       rw_aux_tree(b, t)[idx].offset < offset)
-+		idx++;
-+
-+	while (idx &&
-+	       rw_aux_tree(b, t)[idx - 1].offset >= offset)
-+		idx--;
-+
-+	EBUG_ON(idx < t->size &&
-+		rw_aux_tree(b, t)[idx].offset < offset);
-+	EBUG_ON(idx && rw_aux_tree(b, t)[idx - 1].offset >= offset);
-+	EBUG_ON(idx + 1 < t->size &&
-+		rw_aux_tree(b, t)[idx].offset ==
-+		rw_aux_tree(b, t)[idx + 1].offset);
-+
-+	return idx;
-+}
-+
-+static inline unsigned bkey_mantissa(const struct bkey_packed *k,
-+				     const struct bkey_float *f,
-+				     unsigned idx)
-+{
-+	u64 v;
-+
-+	EBUG_ON(!bkey_packed(k));
-+
-+	v = get_unaligned((u64 *) (((u8 *) k->_data) + (f->exponent >> 3)));
-+
-+	/*
-+	 * In little endian, we're shifting off low bits (and then the bits we
-+	 * want are at the low end), in big endian we're shifting off high bits
-+	 * (and then the bits we want are at the high end, so we shift them
-+	 * back down):
-+	 */
-+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-+	v >>= f->exponent & 7;
-+#else
-+	v >>= 64 - (f->exponent & 7) - BKEY_MANTISSA_BITS;
-+#endif
-+	return (u16) v;
-+}
-+
-+static void make_bfloat(struct btree *b, struct bset_tree *t,
-+			unsigned j,
-+			struct bkey_packed *min_key,
-+			struct bkey_packed *max_key)
-+{
-+	struct bkey_float *f = bkey_float(b, t, j);
-+	struct bkey_packed *m = tree_to_bkey(b, t, j);
-+	struct bkey_packed *l, *r;
-+	unsigned mantissa;
-+	int shift, exponent, high_bit;
-+
-+	if (is_power_of_2(j)) {
-+		l = min_key;
-+
-+		if (!l->u64s) {
-+			if (!bkey_pack_pos(l, b->data->min_key, b)) {
-+				struct bkey_i tmp;
-+
-+				bkey_init(&tmp.k);
-+				tmp.k.p = b->data->min_key;
-+				bkey_copy(l, &tmp);
-+			}
-+		}
-+	} else {
-+		l = tree_to_prev_bkey(b, t, j >> ffs(j));
-+
-+		EBUG_ON(m < l);
-+	}
-+
-+	if (is_power_of_2(j + 1)) {
-+		r = max_key;
-+
-+		if (!r->u64s) {
-+			if (!bkey_pack_pos(r, t->max_key, b)) {
-+				struct bkey_i tmp;
-+
-+				bkey_init(&tmp.k);
-+				tmp.k.p = t->max_key;
-+				bkey_copy(r, &tmp);
-+			}
-+		}
-+	} else {
-+		r = tree_to_bkey(b, t, j >> (ffz(j) + 1));
-+
-+		EBUG_ON(m > r);
-+	}
-+
-+	/*
-+	 * for failed bfloats, the lookup code falls back to comparing against
-+	 * the original key.
-+	 */
-+
-+	if (!bkey_packed(l) || !bkey_packed(r) || !bkey_packed(m) ||
-+	    !b->nr_key_bits) {
-+		f->exponent = BFLOAT_FAILED_UNPACKED;
-+		return;
-+	}
-+
-+	/*
-+	 * The greatest differing bit of l and r is the first bit we must
-+	 * include in the bfloat mantissa we're creating in order to do
-+	 * comparisons - that bit always becomes the high bit of
-+	 * bfloat->mantissa, and thus the exponent we're calculating here is
-+	 * the position of what will become the low bit in bfloat->mantissa:
-+	 *
-+	 * Note that this may be negative - we may be running off the low end
-+	 * of the key: we handle this later:
-+	 */
-+	high_bit = max(bch2_bkey_greatest_differing_bit(b, l, r),
-+		       min_t(unsigned, BKEY_MANTISSA_BITS, b->nr_key_bits) - 1);
-+	exponent = high_bit - (BKEY_MANTISSA_BITS - 1);
-+
-+	/*
-+	 * Then we calculate the actual shift value, from the start of the key
-+	 * (k->_data), to get the key bits starting at exponent:
-+	 */
-+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-+	shift = (int) (b->format.key_u64s * 64 - b->nr_key_bits) + exponent;
-+
-+	EBUG_ON(shift + BKEY_MANTISSA_BITS > b->format.key_u64s * 64);
-+#else
-+	shift = high_bit_offset +
-+		b->nr_key_bits -
-+		exponent -
-+		BKEY_MANTISSA_BITS;
-+
-+	EBUG_ON(shift < KEY_PACKED_BITS_START);
-+#endif
-+	EBUG_ON(shift < 0 || shift >= BFLOAT_FAILED);
-+
-+	f->exponent = shift;
-+	mantissa = bkey_mantissa(m, f, j);
-+
-+	/*
-+	 * If we've got garbage bits, set them to all 1s - it's legal for the
-+	 * bfloat to compare larger than the original key, but not smaller:
-+	 */
-+	if (exponent < 0)
-+		mantissa |= ~(~0U << -exponent);
-+
-+	f->mantissa = mantissa;
-+}
-+
-+/* bytes remaining - only valid for last bset: */
-+static unsigned __bset_tree_capacity(struct btree *b, struct bset_tree *t)
-+{
-+	bset_aux_tree_verify(b);
-+
-+	return btree_aux_data_bytes(b) - t->aux_data_offset * sizeof(u64);
-+}
-+
-+static unsigned bset_ro_tree_capacity(struct btree *b, struct bset_tree *t)
-+{
-+	return __bset_tree_capacity(b, t) /
-+		(sizeof(struct bkey_float) + sizeof(u8));
-+}
-+
-+static unsigned bset_rw_tree_capacity(struct btree *b, struct bset_tree *t)
-+{
-+	return __bset_tree_capacity(b, t) / sizeof(struct rw_aux_tree);
-+}
-+
-+static void __build_rw_aux_tree(struct btree *b, struct bset_tree *t)
-+{
-+	struct bkey_packed *k;
-+
-+	t->size = 1;
-+	t->extra = BSET_RW_AUX_TREE_VAL;
-+	rw_aux_tree(b, t)[0].offset =
-+		__btree_node_key_to_offset(b, btree_bkey_first(b, t));
-+
-+	bset_tree_for_each_key(b, t, k) {
-+		if (t->size == bset_rw_tree_capacity(b, t))
-+			break;
-+
-+		if ((void *) k - (void *) rw_aux_to_bkey(b, t, t->size - 1) >
-+		    L1_CACHE_BYTES)
-+			rw_aux_tree_set(b, t, t->size++, k);
-+	}
-+}
-+
-+static void __build_ro_aux_tree(struct btree *b, struct bset_tree *t)
-+{
-+	struct bkey_packed *prev = NULL, *k = btree_bkey_first(b, t);
-+	struct bkey_packed min_key, max_key;
-+	unsigned j, cacheline = 1;
-+
-+	/* signal to make_bfloat() that they're uninitialized: */
-+	min_key.u64s = max_key.u64s = 0;
-+
-+	t->size = min(bkey_to_cacheline(b, t, btree_bkey_last(b, t)),
-+		      bset_ro_tree_capacity(b, t));
-+retry:
-+	if (t->size < 2) {
-+		t->size = 0;
-+		t->extra = BSET_NO_AUX_TREE_VAL;
-+		return;
-+	}
-+
-+	t->extra = (t->size - rounddown_pow_of_two(t->size - 1)) << 1;
-+
-+	/* First we figure out where the first key in each cacheline is */
-+	eytzinger1_for_each(j, t->size) {
-+		while (bkey_to_cacheline(b, t, k) < cacheline)
-+			prev = k, k = bkey_next_skip_noops(k, btree_bkey_last(b, t));
-+
-+		if (k >= btree_bkey_last(b, t)) {
-+			/* XXX: this path sucks */
-+			t->size--;
-+			goto retry;
-+		}
-+
-+		ro_aux_tree_prev(b, t)[j] = prev->u64s;
-+		bkey_float(b, t, j)->key_offset =
-+			bkey_to_cacheline_offset(b, t, cacheline++, k);
-+
-+		EBUG_ON(tree_to_prev_bkey(b, t, j) != prev);
-+		EBUG_ON(tree_to_bkey(b, t, j) != k);
-+	}
-+
-+	while (k != btree_bkey_last(b, t))
-+		prev = k, k = bkey_next_skip_noops(k, btree_bkey_last(b, t));
-+
-+	t->max_key = bkey_unpack_pos(b, prev);
-+
-+	/* Then we build the tree */
-+	eytzinger1_for_each(j, t->size)
-+		make_bfloat(b, t, j, &min_key, &max_key);
-+}
-+
-+static void bset_alloc_tree(struct btree *b, struct bset_tree *t)
-+{
-+	struct bset_tree *i;
-+
-+	for (i = b->set; i != t; i++)
-+		BUG_ON(bset_has_rw_aux_tree(i));
-+
-+	bch2_bset_set_no_aux_tree(b, t);
-+
-+	/* round up to next cacheline: */
-+	t->aux_data_offset = round_up(bset_aux_tree_buf_start(b, t),
-+				      SMP_CACHE_BYTES / sizeof(u64));
-+
-+	bset_aux_tree_verify(b);
-+}
-+
-+void bch2_bset_build_aux_tree(struct btree *b, struct bset_tree *t,
-+			     bool writeable)
-+{
-+	if (writeable
-+	    ? bset_has_rw_aux_tree(t)
-+	    : bset_has_ro_aux_tree(t))
-+		return;
-+
-+	bset_alloc_tree(b, t);
-+
-+	if (!__bset_tree_capacity(b, t))
-+		return;
-+
-+	if (writeable)
-+		__build_rw_aux_tree(b, t);
-+	else
-+		__build_ro_aux_tree(b, t);
-+
-+	bset_aux_tree_verify(b);
-+}
-+
-+void bch2_bset_init_first(struct btree *b, struct bset *i)
-+{
-+	struct bset_tree *t;
-+
-+	BUG_ON(b->nsets);
-+
-+	memset(i, 0, sizeof(*i));
-+	get_random_bytes(&i->seq, sizeof(i->seq));
-+	SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN);
-+
-+	t = &b->set[b->nsets++];
-+	set_btree_bset(b, t, i);
-+}
-+
-+void bch2_bset_init_next(struct bch_fs *c, struct btree *b,
-+			 struct btree_node_entry *bne)
-+{
-+	struct bset *i = &bne->keys;
-+	struct bset_tree *t;
-+
-+	BUG_ON(bset_byte_offset(b, bne) >= btree_bytes(c));
-+	BUG_ON((void *) bne < (void *) btree_bkey_last(b, bset_tree_last(b)));
-+	BUG_ON(b->nsets >= MAX_BSETS);
-+
-+	memset(i, 0, sizeof(*i));
-+	i->seq = btree_bset_first(b)->seq;
-+	SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN);
-+
-+	t = &b->set[b->nsets++];
-+	set_btree_bset(b, t, i);
-+}
-+
-+/*
-+ * find _some_ key in the same bset as @k that precedes @k - not necessarily the
-+ * immediate predecessor:
-+ */
-+static struct bkey_packed *__bkey_prev(struct btree *b, struct bset_tree *t,
-+				       struct bkey_packed *k)
-+{
-+	struct bkey_packed *p;
-+	unsigned offset;
-+	int j;
-+
-+	EBUG_ON(k < btree_bkey_first(b, t) ||
-+		k > btree_bkey_last(b, t));
-+
-+	if (k == btree_bkey_first(b, t))
-+		return NULL;
-+
-+	switch (bset_aux_tree_type(t)) {
-+	case BSET_NO_AUX_TREE:
-+		p = btree_bkey_first(b, t);
-+		break;
-+	case BSET_RO_AUX_TREE:
-+		j = min_t(unsigned, t->size - 1, bkey_to_cacheline(b, t, k));
-+
-+		do {
-+			p = j ? tree_to_bkey(b, t,
-+					__inorder_to_eytzinger1(j--,
-+							t->size, t->extra))
-+			      : btree_bkey_first(b, t);
-+		} while (p >= k);
-+		break;
-+	case BSET_RW_AUX_TREE:
-+		offset = __btree_node_key_to_offset(b, k);
-+		j = rw_aux_tree_bsearch(b, t, offset);
-+		p = j ? rw_aux_to_bkey(b, t, j - 1)
-+		      : btree_bkey_first(b, t);
-+		break;
-+	}
-+
-+	return p;
-+}
-+
-+struct bkey_packed *bch2_bkey_prev_filter(struct btree *b,
-+					  struct bset_tree *t,
-+					  struct bkey_packed *k,
-+					  unsigned min_key_type)
-+{
-+	struct bkey_packed *p, *i, *ret = NULL, *orig_k = k;
-+
-+	while ((p = __bkey_prev(b, t, k)) && !ret) {
-+		for (i = p; i != k; i = bkey_next_skip_noops(i, k))
-+			if (i->type >= min_key_type)
-+				ret = i;
-+
-+		k = p;
-+	}
-+
-+	if (btree_keys_expensive_checks(b)) {
-+		BUG_ON(ret >= orig_k);
-+
-+		for (i = ret
-+			? bkey_next_skip_noops(ret, orig_k)
-+			: btree_bkey_first(b, t);
-+		     i != orig_k;
-+		     i = bkey_next_skip_noops(i, orig_k))
-+			BUG_ON(i->type >= min_key_type);
-+	}
-+
-+	return ret;
-+}
-+
-+/* Insert */
-+
-+static void rw_aux_tree_fix_invalidated_key(struct btree *b,
-+					    struct bset_tree *t,
-+					    struct bkey_packed *k)
-+{
-+	unsigned offset = __btree_node_key_to_offset(b, k);
-+	unsigned j = rw_aux_tree_bsearch(b, t, offset);
-+
-+	if (j < t->size &&
-+	    rw_aux_tree(b, t)[j].offset == offset)
-+		rw_aux_tree_set(b, t, j, k);
-+
-+	bch2_bset_verify_rw_aux_tree(b, t);
-+}
-+
-+static void ro_aux_tree_fix_invalidated_key(struct btree *b,
-+					    struct bset_tree *t,
-+					    struct bkey_packed *k)
-+{
-+	struct bkey_packed min_key, max_key;
-+	unsigned inorder, j;
-+
-+	EBUG_ON(bset_aux_tree_type(t) != BSET_RO_AUX_TREE);
-+
-+	/* signal to make_bfloat() that they're uninitialized: */
-+	min_key.u64s = max_key.u64s = 0;
-+
-+	if (bkey_next_skip_noops(k, btree_bkey_last(b, t)) == btree_bkey_last(b, t)) {
-+		t->max_key = bkey_unpack_pos(b, k);
-+
-+		for (j = 1; j < t->size; j = j * 2 + 1)
-+			make_bfloat(b, t, j, &min_key, &max_key);
-+	}
-+
-+	inorder = bkey_to_cacheline(b, t, k);
-+
-+	if (inorder &&
-+	    inorder < t->size) {
-+		j = __inorder_to_eytzinger1(inorder, t->size, t->extra);
-+
-+		if (k == tree_to_bkey(b, t, j)) {
-+			/* Fix the node this key corresponds to */
-+			make_bfloat(b, t, j, &min_key, &max_key);
-+
-+			/* Children for which this key is the right boundary */
-+			for (j = eytzinger1_left_child(j);
-+			     j < t->size;
-+			     j = eytzinger1_right_child(j))
-+				make_bfloat(b, t, j, &min_key, &max_key);
-+		}
-+	}
-+
-+	if (inorder + 1 < t->size) {
-+		j = __inorder_to_eytzinger1(inorder + 1, t->size, t->extra);
-+
-+		if (k == tree_to_prev_bkey(b, t, j)) {
-+			make_bfloat(b, t, j, &min_key, &max_key);
-+
-+			/* Children for which this key is the left boundary */
-+			for (j = eytzinger1_right_child(j);
-+			     j < t->size;
-+			     j = eytzinger1_left_child(j))
-+				make_bfloat(b, t, j, &min_key, &max_key);
-+		}
-+	}
-+}
-+
-+/**
-+ * bch2_bset_fix_invalidated_key() - given an existing  key @k that has been
-+ * modified, fix any auxiliary search tree by remaking all the nodes in the
-+ * auxiliary search tree that @k corresponds to
-+ */
-+void bch2_bset_fix_invalidated_key(struct btree *b, struct bkey_packed *k)
-+{
-+	struct bset_tree *t = bch2_bkey_to_bset(b, k);
-+
-+	switch (bset_aux_tree_type(t)) {
-+	case BSET_NO_AUX_TREE:
-+		break;
-+	case BSET_RO_AUX_TREE:
-+		ro_aux_tree_fix_invalidated_key(b, t, k);
-+		break;
-+	case BSET_RW_AUX_TREE:
-+		rw_aux_tree_fix_invalidated_key(b, t, k);
-+		break;
-+	}
-+}
-+
-+static void bch2_bset_fix_lookup_table(struct btree *b,
-+				       struct bset_tree *t,
-+				       struct bkey_packed *_where,
-+				       unsigned clobber_u64s,
-+				       unsigned new_u64s)
-+{
-+	int shift = new_u64s - clobber_u64s;
-+	unsigned l, j, where = __btree_node_key_to_offset(b, _where);
-+
-+	EBUG_ON(bset_has_ro_aux_tree(t));
-+
-+	if (!bset_has_rw_aux_tree(t))
-+		return;
-+
-+	/* returns first entry >= where */
-+	l = rw_aux_tree_bsearch(b, t, where);
-+
-+	if (!l) /* never delete first entry */
-+		l++;
-+	else if (l < t->size &&
-+		 where < t->end_offset &&
-+		 rw_aux_tree(b, t)[l].offset == where)
-+		rw_aux_tree_set(b, t, l++, _where);
-+
-+	/* l now > where */
-+
-+	for (j = l;
-+	     j < t->size &&
-+	     rw_aux_tree(b, t)[j].offset < where + clobber_u64s;
-+	     j++)
-+		;
-+
-+	if (j < t->size &&
-+	    rw_aux_tree(b, t)[j].offset + shift ==
-+	    rw_aux_tree(b, t)[l - 1].offset)
-+		j++;
-+
-+	memmove(&rw_aux_tree(b, t)[l],
-+		&rw_aux_tree(b, t)[j],
-+		(void *) &rw_aux_tree(b, t)[t->size] -
-+		(void *) &rw_aux_tree(b, t)[j]);
-+	t->size -= j - l;
-+
-+	for (j = l; j < t->size; j++)
-+	       rw_aux_tree(b, t)[j].offset += shift;
-+
-+	EBUG_ON(l < t->size &&
-+		rw_aux_tree(b, t)[l].offset ==
-+		rw_aux_tree(b, t)[l - 1].offset);
-+
-+	if (t->size < bset_rw_tree_capacity(b, t) &&
-+	    (l < t->size
-+	     ? rw_aux_tree(b, t)[l].offset
-+	     : t->end_offset) -
-+	    rw_aux_tree(b, t)[l - 1].offset >
-+	    L1_CACHE_BYTES / sizeof(u64)) {
-+		struct bkey_packed *start = rw_aux_to_bkey(b, t, l - 1);
-+		struct bkey_packed *end = l < t->size
-+			? rw_aux_to_bkey(b, t, l)
-+			: btree_bkey_last(b, t);
-+		struct bkey_packed *k = start;
-+
-+		while (1) {
-+			k = bkey_next_skip_noops(k, end);
-+			if (k == end)
-+				break;
-+
-+			if ((void *) k - (void *) start >= L1_CACHE_BYTES) {
-+				memmove(&rw_aux_tree(b, t)[l + 1],
-+					&rw_aux_tree(b, t)[l],
-+					(void *) &rw_aux_tree(b, t)[t->size] -
-+					(void *) &rw_aux_tree(b, t)[l]);
-+				t->size++;
-+				rw_aux_tree_set(b, t, l, k);
-+				break;
-+			}
-+		}
-+	}
-+
-+	bch2_bset_verify_rw_aux_tree(b, t);
-+	bset_aux_tree_verify(b);
-+}
-+
-+void bch2_bset_insert(struct btree *b,
-+		      struct btree_node_iter *iter,
-+		      struct bkey_packed *where,
-+		      struct bkey_i *insert,
-+		      unsigned clobber_u64s)
-+{
-+	struct bkey_format *f = &b->format;
-+	struct bset_tree *t = bset_tree_last(b);
-+	struct bkey_packed packed, *src = bkey_to_packed(insert);
-+
-+	bch2_bset_verify_rw_aux_tree(b, t);
-+	bch2_verify_insert_pos(b, where, bkey_to_packed(insert), clobber_u64s);
-+
-+	if (bch2_bkey_pack_key(&packed, &insert->k, f))
-+		src = &packed;
-+
-+	if (!bkey_whiteout(&insert->k))
-+		btree_keys_account_key_add(&b->nr, t - b->set, src);
-+
-+	if (src->u64s != clobber_u64s) {
-+		u64 *src_p = where->_data + clobber_u64s;
-+		u64 *dst_p = where->_data + src->u64s;
-+
-+		EBUG_ON((int) le16_to_cpu(bset(b, t)->u64s) <
-+			(int) clobber_u64s - src->u64s);
-+
-+		memmove_u64s(dst_p, src_p, btree_bkey_last(b, t)->_data - src_p);
-+		le16_add_cpu(&bset(b, t)->u64s, src->u64s - clobber_u64s);
-+		set_btree_bset_end(b, t);
-+	}
-+
-+	memcpy_u64s(where, src,
-+		    bkeyp_key_u64s(f, src));
-+	memcpy_u64s(bkeyp_val(f, where), &insert->v,
-+		    bkeyp_val_u64s(f, src));
-+
-+	if (src->u64s != clobber_u64s)
-+		bch2_bset_fix_lookup_table(b, t, where, clobber_u64s, src->u64s);
-+
-+	bch2_verify_btree_nr_keys(b);
-+}
-+
-+void bch2_bset_delete(struct btree *b,
-+		      struct bkey_packed *where,
-+		      unsigned clobber_u64s)
-+{
-+	struct bset_tree *t = bset_tree_last(b);
-+	u64 *src_p = where->_data + clobber_u64s;
-+	u64 *dst_p = where->_data;
-+
-+	bch2_bset_verify_rw_aux_tree(b, t);
-+
-+	EBUG_ON(le16_to_cpu(bset(b, t)->u64s) < clobber_u64s);
-+
-+	memmove_u64s_down(dst_p, src_p, btree_bkey_last(b, t)->_data - src_p);
-+	le16_add_cpu(&bset(b, t)->u64s, -clobber_u64s);
-+	set_btree_bset_end(b, t);
-+
-+	bch2_bset_fix_lookup_table(b, t, where, clobber_u64s, 0);
-+}
-+
-+/* Lookup */
-+
-+__flatten
-+static struct bkey_packed *bset_search_write_set(const struct btree *b,
-+				struct bset_tree *t,
-+				struct bpos *search,
-+				const struct bkey_packed *packed_search)
-+{
-+	unsigned l = 0, r = t->size;
-+
-+	while (l + 1 != r) {
-+		unsigned m = (l + r) >> 1;
-+
-+		if (bkey_cmp(rw_aux_tree(b, t)[m].k, *search) < 0)
-+			l = m;
-+		else
-+			r = m;
-+	}
-+
-+	return rw_aux_to_bkey(b, t, l);
-+}
-+
-+static inline void prefetch_four_cachelines(void *p)
-+{
-+#ifdef CONFIG_X86_64
-+	asm(".intel_syntax noprefix;"
-+	    "prefetcht0 [%0 - 127 + 64 * 0];"
-+	    "prefetcht0 [%0 - 127 + 64 * 1];"
-+	    "prefetcht0 [%0 - 127 + 64 * 2];"
-+	    "prefetcht0 [%0 - 127 + 64 * 3];"
-+	    ".att_syntax prefix;"
-+	    :
-+	    : "r" (p + 127));
-+#else
-+	prefetch(p + L1_CACHE_BYTES * 0);
-+	prefetch(p + L1_CACHE_BYTES * 1);
-+	prefetch(p + L1_CACHE_BYTES * 2);
-+	prefetch(p + L1_CACHE_BYTES * 3);
-+#endif
-+}
-+
-+static inline bool bkey_mantissa_bits_dropped(const struct btree *b,
-+					      const struct bkey_float *f,
-+					      unsigned idx)
-+{
-+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-+	unsigned key_bits_start = b->format.key_u64s * 64 - b->nr_key_bits;
-+
-+	return f->exponent > key_bits_start;
-+#else
-+	unsigned key_bits_end = high_bit_offset + b->nr_key_bits;
-+
-+	return f->exponent + BKEY_MANTISSA_BITS < key_bits_end;
-+#endif
-+}
-+
-+__flatten
-+static struct bkey_packed *bset_search_tree(const struct btree *b,
-+				struct bset_tree *t,
-+				struct bpos *search,
-+				const struct bkey_packed *packed_search)
-+{
-+	struct ro_aux_tree *base = ro_aux_tree_base(b, t);
-+	struct bkey_float *f;
-+	struct bkey_packed *k;
-+	unsigned inorder, n = 1, l, r;
-+	int cmp;
-+
-+	do {
-+		if (likely(n << 4 < t->size))
-+			prefetch(&base->f[n << 4]);
-+
-+		f = &base->f[n];
-+
-+		if (!unlikely(packed_search))
-+			goto slowpath;
-+		if (unlikely(f->exponent >= BFLOAT_FAILED))
-+			goto slowpath;
-+
-+		l = f->mantissa;
-+		r = bkey_mantissa(packed_search, f, n);
-+
-+		if (unlikely(l == r) && bkey_mantissa_bits_dropped(b, f, n))
-+			goto slowpath;
-+
-+		n = n * 2 + (l < r);
-+		continue;
-+slowpath:
-+		k = tree_to_bkey(b, t, n);
-+		cmp = bkey_cmp_p_or_unp(b, k, packed_search, search);
-+		if (!cmp)
-+			return k;
-+
-+		n = n * 2 + (cmp < 0);
-+	} while (n < t->size);
-+
-+	inorder = __eytzinger1_to_inorder(n >> 1, t->size, t->extra);
-+
-+	/*
-+	 * n would have been the node we recursed to - the low bit tells us if
-+	 * we recursed left or recursed right.
-+	 */
-+	if (likely(!(n & 1))) {
-+		--inorder;
-+		if (unlikely(!inorder))
-+			return btree_bkey_first(b, t);
-+
-+		f = &base->f[eytzinger1_prev(n >> 1, t->size)];
-+	}
-+
-+	return cacheline_to_bkey(b, t, inorder, f->key_offset);
-+}
-+
-+static __always_inline __flatten
-+struct bkey_packed *__bch2_bset_search(struct btree *b,
-+				struct bset_tree *t,
-+				struct bpos *search,
-+				const struct bkey_packed *lossy_packed_search)
-+{
-+
-+	/*
-+	 * First, we search for a cacheline, then lastly we do a linear search
-+	 * within that cacheline.
-+	 *
-+	 * To search for the cacheline, there's three different possibilities:
-+	 *  * The set is too small to have a search tree, so we just do a linear
-+	 *    search over the whole set.
-+	 *  * The set is the one we're currently inserting into; keeping a full
-+	 *    auxiliary search tree up to date would be too expensive, so we
-+	 *    use a much simpler lookup table to do a binary search -
-+	 *    bset_search_write_set().
-+	 *  * Or we use the auxiliary search tree we constructed earlier -
-+	 *    bset_search_tree()
-+	 */
-+
-+	switch (bset_aux_tree_type(t)) {
-+	case BSET_NO_AUX_TREE:
-+		return btree_bkey_first(b, t);
-+	case BSET_RW_AUX_TREE:
-+		return bset_search_write_set(b, t, search, lossy_packed_search);
-+	case BSET_RO_AUX_TREE:
-+		/*
-+		 * Each node in the auxiliary search tree covers a certain range
-+		 * of bits, and keys above and below the set it covers might
-+		 * differ outside those bits - so we have to special case the
-+		 * start and end - handle that here:
-+		 */
-+
-+		if (bkey_cmp(*search, t->max_key) > 0)
-+			return btree_bkey_last(b, t);
-+
-+		return bset_search_tree(b, t, search, lossy_packed_search);
-+	default:
-+		unreachable();
-+	}
-+}
-+
-+static __always_inline __flatten
-+struct bkey_packed *bch2_bset_search_linear(struct btree *b,
-+				struct bset_tree *t,
-+				struct bpos *search,
-+				struct bkey_packed *packed_search,
-+				const struct bkey_packed *lossy_packed_search,
-+				struct bkey_packed *m)
-+{
-+	if (lossy_packed_search)
-+		while (m != btree_bkey_last(b, t) &&
-+		       bkey_iter_cmp_p_or_unp(b, m,
-+					lossy_packed_search, search) < 0)
-+			m = bkey_next_skip_noops(m, btree_bkey_last(b, t));
-+
-+	if (!packed_search)
-+		while (m != btree_bkey_last(b, t) &&
-+		       bkey_iter_pos_cmp(b, m, search) < 0)
-+			m = bkey_next_skip_noops(m, btree_bkey_last(b, t));
-+
-+	if (btree_keys_expensive_checks(b)) {
-+		struct bkey_packed *prev = bch2_bkey_prev_all(b, t, m);
-+
-+		BUG_ON(prev &&
-+		       bkey_iter_cmp_p_or_unp(b, prev,
-+					packed_search, search) >= 0);
-+	}
-+
-+	return m;
-+}
-+
-+/*
-+ * Returns the first key greater than or equal to @search
-+ */
-+static __always_inline __flatten
-+struct bkey_packed *bch2_bset_search(struct btree *b,
-+				struct bset_tree *t,
-+				struct bpos *search,
-+				struct bkey_packed *packed_search,
-+				const struct bkey_packed *lossy_packed_search)
-+{
-+	struct bkey_packed *m = __bch2_bset_search(b, t, search,
-+						   lossy_packed_search);
-+
-+	return bch2_bset_search_linear(b, t, search,
-+				 packed_search, lossy_packed_search, m);
-+}
-+
-+/* Btree node iterator */
-+
-+static inline void __bch2_btree_node_iter_push(struct btree_node_iter *iter,
-+			      struct btree *b,
-+			      const struct bkey_packed *k,
-+			      const struct bkey_packed *end)
-+{
-+	if (k != end) {
-+		struct btree_node_iter_set *pos;
-+
-+		btree_node_iter_for_each(iter, pos)
-+			;
-+
-+		BUG_ON(pos >= iter->data + ARRAY_SIZE(iter->data));
-+		*pos = (struct btree_node_iter_set) {
-+			__btree_node_key_to_offset(b, k),
-+			__btree_node_key_to_offset(b, end)
-+		};
-+	}
-+}
-+
-+void bch2_btree_node_iter_push(struct btree_node_iter *iter,
-+			       struct btree *b,
-+			       const struct bkey_packed *k,
-+			       const struct bkey_packed *end)
-+{
-+	__bch2_btree_node_iter_push(iter, b, k, end);
-+	bch2_btree_node_iter_sort(iter, b);
-+}
-+
-+noinline __flatten __attribute__((cold))
-+static void btree_node_iter_init_pack_failed(struct btree_node_iter *iter,
-+			      struct btree *b, struct bpos *search)
-+{
-+	struct bset_tree *t;
-+
-+	trace_bkey_pack_pos_fail(search);
-+
-+	for_each_bset(b, t)
-+		__bch2_btree_node_iter_push(iter, b,
-+			bch2_bset_search(b, t, search, NULL, NULL),
-+			btree_bkey_last(b, t));
-+
-+	bch2_btree_node_iter_sort(iter, b);
-+}
-+
-+/**
-+ * bch_btree_node_iter_init - initialize a btree node iterator, starting from a
-+ * given position
-+ *
-+ * Main entry point to the lookup code for individual btree nodes:
-+ *
-+ * NOTE:
-+ *
-+ * When you don't filter out deleted keys, btree nodes _do_ contain duplicate
-+ * keys. This doesn't matter for most code, but it does matter for lookups.
-+ *
-+ * Some adjacent keys with a string of equal keys:
-+ *	i j k k k k l m
-+ *
-+ * If you search for k, the lookup code isn't guaranteed to return you any
-+ * specific k. The lookup code is conceptually doing a binary search and
-+ * iterating backwards is very expensive so if the pivot happens to land at the
-+ * last k that's what you'll get.
-+ *
-+ * This works out ok, but it's something to be aware of:
-+ *
-+ *  - For non extents, we guarantee that the live key comes last - see
-+ *    btree_node_iter_cmp(), keys_out_of_order(). So the duplicates you don't
-+ *    see will only be deleted keys you don't care about.
-+ *
-+ *  - For extents, deleted keys sort last (see the comment at the top of this
-+ *    file). But when you're searching for extents, you actually want the first
-+ *    key strictly greater than your search key - an extent that compares equal
-+ *    to the search key is going to have 0 sectors after the search key.
-+ *
-+ *    But this does mean that we can't just search for
-+ *    bkey_successor(start_of_range) to get the first extent that overlaps with
-+ *    the range we want - if we're unlucky and there's an extent that ends
-+ *    exactly where we searched, then there could be a deleted key at the same
-+ *    position and we'd get that when we search instead of the preceding extent
-+ *    we needed.
-+ *
-+ *    So we've got to search for start_of_range, then after the lookup iterate
-+ *    past any extents that compare equal to the position we searched for.
-+ */
-+__flatten
-+void bch2_btree_node_iter_init(struct btree_node_iter *iter,
-+			       struct btree *b, struct bpos *search)
-+{
-+	struct bkey_packed p, *packed_search = NULL;
-+	struct btree_node_iter_set *pos = iter->data;
-+	struct bkey_packed *k[MAX_BSETS];
-+	unsigned i;
-+
-+	EBUG_ON(bkey_cmp(*search, b->data->min_key) < 0);
-+	bset_aux_tree_verify(b);
-+
-+	memset(iter, 0, sizeof(*iter));
-+
-+	switch (bch2_bkey_pack_pos_lossy(&p, *search, b)) {
-+	case BKEY_PACK_POS_EXACT:
-+		packed_search = &p;
-+		break;
-+	case BKEY_PACK_POS_SMALLER:
-+		packed_search = NULL;
-+		break;
-+	case BKEY_PACK_POS_FAIL:
-+		btree_node_iter_init_pack_failed(iter, b, search);
-+		return;
-+	}
-+
-+	for (i = 0; i < b->nsets; i++) {
-+		k[i] = __bch2_bset_search(b, b->set + i, search, &p);
-+		prefetch_four_cachelines(k[i]);
-+	}
-+
-+	for (i = 0; i < b->nsets; i++) {
-+		struct bset_tree *t = b->set + i;
-+		struct bkey_packed *end = btree_bkey_last(b, t);
-+
-+		k[i] = bch2_bset_search_linear(b, t, search,
-+					       packed_search, &p, k[i]);
-+		if (k[i] != end)
-+			*pos++ = (struct btree_node_iter_set) {
-+				__btree_node_key_to_offset(b, k[i]),
-+				__btree_node_key_to_offset(b, end)
-+			};
-+	}
-+
-+	bch2_btree_node_iter_sort(iter, b);
-+}
-+
-+void bch2_btree_node_iter_init_from_start(struct btree_node_iter *iter,
-+					  struct btree *b)
-+{
-+	struct bset_tree *t;
-+
-+	memset(iter, 0, sizeof(*iter));
-+
-+	for_each_bset(b, t)
-+		__bch2_btree_node_iter_push(iter, b,
-+					   btree_bkey_first(b, t),
-+					   btree_bkey_last(b, t));
-+	bch2_btree_node_iter_sort(iter, b);
-+}
-+
-+struct bkey_packed *bch2_btree_node_iter_bset_pos(struct btree_node_iter *iter,
-+						  struct btree *b,
-+						  struct bset_tree *t)
-+{
-+	struct btree_node_iter_set *set;
-+
-+	btree_node_iter_for_each(iter, set)
-+		if (set->end == t->end_offset)
-+			return __btree_node_offset_to_key(b, set->k);
-+
-+	return btree_bkey_last(b, t);
-+}
-+
-+static inline bool btree_node_iter_sort_two(struct btree_node_iter *iter,
-+					    struct btree *b,
-+					    unsigned first)
-+{
-+	bool ret;
-+
-+	if ((ret = (btree_node_iter_cmp(b,
-+					iter->data[first],
-+					iter->data[first + 1]) > 0)))
-+		swap(iter->data[first], iter->data[first + 1]);
-+	return ret;
-+}
-+
-+void bch2_btree_node_iter_sort(struct btree_node_iter *iter,
-+			       struct btree *b)
-+{
-+	/* unrolled bubble sort: */
-+
-+	if (!__btree_node_iter_set_end(iter, 2)) {
-+		btree_node_iter_sort_two(iter, b, 0);
-+		btree_node_iter_sort_two(iter, b, 1);
-+	}
-+
-+	if (!__btree_node_iter_set_end(iter, 1))
-+		btree_node_iter_sort_two(iter, b, 0);
-+}
-+
-+void bch2_btree_node_iter_set_drop(struct btree_node_iter *iter,
-+				   struct btree_node_iter_set *set)
-+{
-+	struct btree_node_iter_set *last =
-+		iter->data + ARRAY_SIZE(iter->data) - 1;
-+
-+	memmove(&set[0], &set[1], (void *) last - (void *) set);
-+	*last = (struct btree_node_iter_set) { 0, 0 };
-+}
-+
-+static inline void __bch2_btree_node_iter_advance(struct btree_node_iter *iter,
-+						  struct btree *b)
-+{
-+	iter->data->k += __bch2_btree_node_iter_peek_all(iter, b)->u64s;
-+
-+	EBUG_ON(iter->data->k > iter->data->end);
-+
-+	while (!__btree_node_iter_set_end(iter, 0) &&
-+	       !__bch2_btree_node_iter_peek_all(iter, b)->u64s)
-+		iter->data->k++;
-+
-+	if (unlikely(__btree_node_iter_set_end(iter, 0))) {
-+		bch2_btree_node_iter_set_drop(iter, iter->data);
-+		return;
-+	}
-+
-+	if (__btree_node_iter_set_end(iter, 1))
-+		return;
-+
-+	if (!btree_node_iter_sort_two(iter, b, 0))
-+		return;
-+
-+	if (__btree_node_iter_set_end(iter, 2))
-+		return;
-+
-+	btree_node_iter_sort_two(iter, b, 1);
-+}
-+
-+void bch2_btree_node_iter_advance(struct btree_node_iter *iter,
-+				  struct btree *b)
-+{
-+	if (btree_keys_expensive_checks(b)) {
-+		bch2_btree_node_iter_verify(iter, b);
-+		bch2_btree_node_iter_next_check(iter, b);
-+	}
-+
-+	__bch2_btree_node_iter_advance(iter, b);
-+}
-+
-+/*
-+ * Expensive:
-+ */
-+struct bkey_packed *bch2_btree_node_iter_prev_all(struct btree_node_iter *iter,
-+						  struct btree *b)
-+{
-+	struct bkey_packed *k, *prev = NULL;
-+	struct btree_node_iter_set *set;
-+	struct bset_tree *t;
-+	unsigned end = 0;
-+
-+	if (btree_keys_expensive_checks(b))
-+		bch2_btree_node_iter_verify(iter, b);
-+
-+	for_each_bset(b, t) {
-+		k = bch2_bkey_prev_all(b, t,
-+			bch2_btree_node_iter_bset_pos(iter, b, t));
-+		if (k &&
-+		    (!prev || bkey_iter_cmp(b, k, prev) > 0)) {
-+			prev = k;
-+			end = t->end_offset;
-+		}
-+	}
-+
-+	if (!prev)
-+		return NULL;
-+
-+	/*
-+	 * We're manually memmoving instead of just calling sort() to ensure the
-+	 * prev we picked ends up in slot 0 - sort won't necessarily put it
-+	 * there because of duplicate deleted keys:
-+	 */
-+	btree_node_iter_for_each(iter, set)
-+		if (set->end == end)
-+			goto found;
-+
-+	BUG_ON(set != &iter->data[__btree_node_iter_used(iter)]);
-+found:
-+	BUG_ON(set >= iter->data + ARRAY_SIZE(iter->data));
-+
-+	memmove(&iter->data[1],
-+		&iter->data[0],
-+		(void *) set - (void *) &iter->data[0]);
-+
-+	iter->data[0].k = __btree_node_key_to_offset(b, prev);
-+	iter->data[0].end = end;
-+
-+	if (btree_keys_expensive_checks(b))
-+		bch2_btree_node_iter_verify(iter, b);
-+	return prev;
-+}
-+
-+struct bkey_packed *bch2_btree_node_iter_prev_filter(struct btree_node_iter *iter,
-+						     struct btree *b,
-+						     unsigned min_key_type)
-+{
-+	struct bkey_packed *prev;
-+
-+	do {
-+		prev = bch2_btree_node_iter_prev_all(iter, b);
-+	} while (prev && prev->type < min_key_type);
-+
-+	return prev;
-+}
-+
-+struct bkey_s_c bch2_btree_node_iter_peek_unpack(struct btree_node_iter *iter,
-+						 struct btree *b,
-+						 struct bkey *u)
-+{
-+	struct bkey_packed *k = bch2_btree_node_iter_peek(iter, b);
-+
-+	return k ? bkey_disassemble(b, k, u) : bkey_s_c_null;
-+}
-+
-+/* Mergesort */
-+
-+void bch2_btree_keys_stats(struct btree *b, struct bset_stats *stats)
-+{
-+	struct bset_tree *t;
-+
-+	for_each_bset(b, t) {
-+		enum bset_aux_tree_type type = bset_aux_tree_type(t);
-+		size_t j;
-+
-+		stats->sets[type].nr++;
-+		stats->sets[type].bytes += le16_to_cpu(bset(b, t)->u64s) *
-+			sizeof(u64);
-+
-+		if (bset_has_ro_aux_tree(t)) {
-+			stats->floats += t->size - 1;
-+
-+			for (j = 1; j < t->size; j++)
-+				stats->failed +=
-+					bkey_float(b, t, j)->exponent ==
-+					BFLOAT_FAILED;
-+		}
-+	}
-+}
-+
-+void bch2_bfloat_to_text(struct printbuf *out, struct btree *b,
-+			 struct bkey_packed *k)
-+{
-+	struct bset_tree *t = bch2_bkey_to_bset(b, k);
-+	struct bkey uk;
-+	unsigned j, inorder;
-+
-+	if (out->pos != out->end)
-+		*out->pos = '\0';
-+
-+	if (!bset_has_ro_aux_tree(t))
-+		return;
-+
-+	inorder = bkey_to_cacheline(b, t, k);
-+	if (!inorder || inorder >= t->size)
-+		return;
-+
-+	j = __inorder_to_eytzinger1(inorder, t->size, t->extra);
-+	if (k != tree_to_bkey(b, t, j))
-+		return;
-+
-+	switch (bkey_float(b, t, j)->exponent) {
-+	case BFLOAT_FAILED:
-+		uk = bkey_unpack_key(b, k);
-+		pr_buf(out,
-+		       "    failed unpacked at depth %u\n"
-+		       "\t%llu:%llu\n",
-+		       ilog2(j),
-+		       uk.p.inode, uk.p.offset);
-+		break;
-+	}
-+}
-diff --git a/fs/bcachefs/bset.h b/fs/bcachefs/bset.h
-new file mode 100644
-index 000000000000..5921cf689105
---- /dev/null
-+++ b/fs/bcachefs/bset.h
-@@ -0,0 +1,661 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BSET_H
-+#define _BCACHEFS_BSET_H
-+
-+#include <linux/kernel.h>
-+#include <linux/types.h>
-+
-+#include "bcachefs_format.h"
-+#include "bkey.h"
-+#include "bkey_methods.h"
-+#include "btree_types.h"
-+#include "util.h" /* for time_stats */
-+#include "vstructs.h"
-+
-+/*
-+ * BKEYS:
-+ *
-+ * A bkey contains a key, a size field, a variable number of pointers, and some
-+ * ancillary flag bits.
-+ *
-+ * We use two different functions for validating bkeys, bkey_invalid and
-+ * bkey_deleted().
-+ *
-+ * The one exception to the rule that ptr_invalid() filters out invalid keys is
-+ * that it also filters out keys of size 0 - these are keys that have been
-+ * completely overwritten. It'd be safe to delete these in memory while leaving
-+ * them on disk, just unnecessary work - so we filter them out when resorting
-+ * instead.
-+ *
-+ * We can't filter out stale keys when we're resorting, because garbage
-+ * collection needs to find them to ensure bucket gens don't wrap around -
-+ * unless we're rewriting the btree node those stale keys still exist on disk.
-+ *
-+ * We also implement functions here for removing some number of sectors from the
-+ * front or the back of a bkey - this is mainly used for fixing overlapping
-+ * extents, by removing the overlapping sectors from the older key.
-+ *
-+ * BSETS:
-+ *
-+ * A bset is an array of bkeys laid out contiguously in memory in sorted order,
-+ * along with a header. A btree node is made up of a number of these, written at
-+ * different times.
-+ *
-+ * There could be many of them on disk, but we never allow there to be more than
-+ * 4 in memory - we lazily resort as needed.
-+ *
-+ * We implement code here for creating and maintaining auxiliary search trees
-+ * (described below) for searching an individial bset, and on top of that we
-+ * implement a btree iterator.
-+ *
-+ * BTREE ITERATOR:
-+ *
-+ * Most of the code in bcache doesn't care about an individual bset - it needs
-+ * to search entire btree nodes and iterate over them in sorted order.
-+ *
-+ * The btree iterator code serves both functions; it iterates through the keys
-+ * in a btree node in sorted order, starting from either keys after a specific
-+ * point (if you pass it a search key) or the start of the btree node.
-+ *
-+ * AUXILIARY SEARCH TREES:
-+ *
-+ * Since keys are variable length, we can't use a binary search on a bset - we
-+ * wouldn't be able to find the start of the next key. But binary searches are
-+ * slow anyways, due to terrible cache behaviour; bcache originally used binary
-+ * searches and that code topped out at under 50k lookups/second.
-+ *
-+ * So we need to construct some sort of lookup table. Since we only insert keys
-+ * into the last (unwritten) set, most of the keys within a given btree node are
-+ * usually in sets that are mostly constant. We use two different types of
-+ * lookup tables to take advantage of this.
-+ *
-+ * Both lookup tables share in common that they don't index every key in the
-+ * set; they index one key every BSET_CACHELINE bytes, and then a linear search
-+ * is used for the rest.
-+ *
-+ * For sets that have been written to disk and are no longer being inserted
-+ * into, we construct a binary search tree in an array - traversing a binary
-+ * search tree in an array gives excellent locality of reference and is very
-+ * fast, since both children of any node are adjacent to each other in memory
-+ * (and their grandchildren, and great grandchildren...) - this means
-+ * prefetching can be used to great effect.
-+ *
-+ * It's quite useful performance wise to keep these nodes small - not just
-+ * because they're more likely to be in L2, but also because we can prefetch
-+ * more nodes on a single cacheline and thus prefetch more iterations in advance
-+ * when traversing this tree.
-+ *
-+ * Nodes in the auxiliary search tree must contain both a key to compare against
-+ * (we don't want to fetch the key from the set, that would defeat the purpose),
-+ * and a pointer to the key. We use a few tricks to compress both of these.
-+ *
-+ * To compress the pointer, we take advantage of the fact that one node in the
-+ * search tree corresponds to precisely BSET_CACHELINE bytes in the set. We have
-+ * a function (to_inorder()) that takes the index of a node in a binary tree and
-+ * returns what its index would be in an inorder traversal, so we only have to
-+ * store the low bits of the offset.
-+ *
-+ * The key is 84 bits (KEY_DEV + key->key, the offset on the device). To
-+ * compress that,  we take advantage of the fact that when we're traversing the
-+ * search tree at every iteration we know that both our search key and the key
-+ * we're looking for lie within some range - bounded by our previous
-+ * comparisons. (We special case the start of a search so that this is true even
-+ * at the root of the tree).
-+ *
-+ * So we know the key we're looking for is between a and b, and a and b don't
-+ * differ higher than bit 50, we don't need to check anything higher than bit
-+ * 50.
-+ *
-+ * We don't usually need the rest of the bits, either; we only need enough bits
-+ * to partition the key range we're currently checking.  Consider key n - the
-+ * key our auxiliary search tree node corresponds to, and key p, the key
-+ * immediately preceding n.  The lowest bit we need to store in the auxiliary
-+ * search tree is the highest bit that differs between n and p.
-+ *
-+ * Note that this could be bit 0 - we might sometimes need all 80 bits to do the
-+ * comparison. But we'd really like our nodes in the auxiliary search tree to be
-+ * of fixed size.
-+ *
-+ * The solution is to make them fixed size, and when we're constructing a node
-+ * check if p and n differed in the bits we needed them to. If they don't we
-+ * flag that node, and when doing lookups we fallback to comparing against the
-+ * real key. As long as this doesn't happen to often (and it seems to reliably
-+ * happen a bit less than 1% of the time), we win - even on failures, that key
-+ * is then more likely to be in cache than if we were doing binary searches all
-+ * the way, since we're touching so much less memory.
-+ *
-+ * The keys in the auxiliary search tree are stored in (software) floating
-+ * point, with an exponent and a mantissa. The exponent needs to be big enough
-+ * to address all the bits in the original key, but the number of bits in the
-+ * mantissa is somewhat arbitrary; more bits just gets us fewer failures.
-+ *
-+ * We need 7 bits for the exponent and 3 bits for the key's offset (since keys
-+ * are 8 byte aligned); using 22 bits for the mantissa means a node is 4 bytes.
-+ * We need one node per 128 bytes in the btree node, which means the auxiliary
-+ * search trees take up 3% as much memory as the btree itself.
-+ *
-+ * Constructing these auxiliary search trees is moderately expensive, and we
-+ * don't want to be constantly rebuilding the search tree for the last set
-+ * whenever we insert another key into it. For the unwritten set, we use a much
-+ * simpler lookup table - it's just a flat array, so index i in the lookup table
-+ * corresponds to the i range of BSET_CACHELINE bytes in the set. Indexing
-+ * within each byte range works the same as with the auxiliary search trees.
-+ *
-+ * These are much easier to keep up to date when we insert a key - we do it
-+ * somewhat lazily; when we shift a key up we usually just increment the pointer
-+ * to it, only when it would overflow do we go to the trouble of finding the
-+ * first key in that range of bytes again.
-+ */
-+
-+extern bool bch2_expensive_debug_checks;
-+
-+static inline bool btree_keys_expensive_checks(const struct btree *b)
-+{
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	return bch2_expensive_debug_checks || *b->expensive_debug_checks;
-+#else
-+	return false;
-+#endif
-+}
-+
-+enum bset_aux_tree_type {
-+	BSET_NO_AUX_TREE,
-+	BSET_RO_AUX_TREE,
-+	BSET_RW_AUX_TREE,
-+};
-+
-+#define BSET_TREE_NR_TYPES	3
-+
-+#define BSET_NO_AUX_TREE_VAL	(U16_MAX)
-+#define BSET_RW_AUX_TREE_VAL	(U16_MAX - 1)
-+
-+static inline enum bset_aux_tree_type bset_aux_tree_type(const struct bset_tree *t)
-+{
-+	switch (t->extra) {
-+	case BSET_NO_AUX_TREE_VAL:
-+		EBUG_ON(t->size);
-+		return BSET_NO_AUX_TREE;
-+	case BSET_RW_AUX_TREE_VAL:
-+		EBUG_ON(!t->size);
-+		return BSET_RW_AUX_TREE;
-+	default:
-+		EBUG_ON(!t->size);
-+		return BSET_RO_AUX_TREE;
-+	}
-+}
-+
-+/*
-+ * BSET_CACHELINE was originally intended to match the hardware cacheline size -
-+ * it used to be 64, but I realized the lookup code would touch slightly less
-+ * memory if it was 128.
-+ *
-+ * It definites the number of bytes (in struct bset) per struct bkey_float in
-+ * the auxiliar search tree - when we're done searching the bset_float tree we
-+ * have this many bytes left that we do a linear search over.
-+ *
-+ * Since (after level 5) every level of the bset_tree is on a new cacheline,
-+ * we're touching one fewer cacheline in the bset tree in exchange for one more
-+ * cacheline in the linear search - but the linear search might stop before it
-+ * gets to the second cacheline.
-+ */
-+
-+#define BSET_CACHELINE		128
-+
-+static inline size_t btree_keys_cachelines(struct btree *b)
-+{
-+	return (1U << b->byte_order) / BSET_CACHELINE;
-+}
-+
-+static inline size_t btree_aux_data_bytes(struct btree *b)
-+{
-+	return btree_keys_cachelines(b) * 8;
-+}
-+
-+static inline size_t btree_aux_data_u64s(struct btree *b)
-+{
-+	return btree_aux_data_bytes(b) / sizeof(u64);
-+}
-+
-+typedef void (*compiled_unpack_fn)(struct bkey *, const struct bkey_packed *);
-+
-+static inline void
-+__bkey_unpack_key_format_checked(const struct btree *b,
-+			       struct bkey *dst,
-+			       const struct bkey_packed *src)
-+{
-+#ifdef HAVE_BCACHEFS_COMPILED_UNPACK
-+	{
-+		compiled_unpack_fn unpack_fn = b->aux_data;
-+		unpack_fn(dst, src);
-+
-+		if (btree_keys_expensive_checks(b)) {
-+			struct bkey dst2 = __bch2_bkey_unpack_key(&b->format, src);
-+
-+			BUG_ON(memcmp(dst, &dst2, sizeof(*dst)));
-+		}
-+	}
-+#else
-+	*dst = __bch2_bkey_unpack_key(&b->format, src);
-+#endif
-+}
-+
-+static inline struct bkey
-+bkey_unpack_key_format_checked(const struct btree *b,
-+			       const struct bkey_packed *src)
-+{
-+	struct bkey dst;
-+
-+	__bkey_unpack_key_format_checked(b, &dst, src);
-+	return dst;
-+}
-+
-+static inline void __bkey_unpack_key(const struct btree *b,
-+				     struct bkey *dst,
-+				     const struct bkey_packed *src)
-+{
-+	if (likely(bkey_packed(src)))
-+		__bkey_unpack_key_format_checked(b, dst, src);
-+	else
-+		*dst = *packed_to_bkey_c(src);
-+}
-+
-+/**
-+ * bkey_unpack_key -- unpack just the key, not the value
-+ */
-+static inline struct bkey bkey_unpack_key(const struct btree *b,
-+					  const struct bkey_packed *src)
-+{
-+	return likely(bkey_packed(src))
-+		? bkey_unpack_key_format_checked(b, src)
-+		: *packed_to_bkey_c(src);
-+}
-+
-+static inline struct bpos
-+bkey_unpack_pos_format_checked(const struct btree *b,
-+			       const struct bkey_packed *src)
-+{
-+#ifdef HAVE_BCACHEFS_COMPILED_UNPACK
-+	return bkey_unpack_key_format_checked(b, src).p;
-+#else
-+	return __bkey_unpack_pos(&b->format, src);
-+#endif
-+}
-+
-+static inline struct bpos bkey_unpack_pos(const struct btree *b,
-+					  const struct bkey_packed *src)
-+{
-+	return likely(bkey_packed(src))
-+		? bkey_unpack_pos_format_checked(b, src)
-+		: packed_to_bkey_c(src)->p;
-+}
-+
-+/* Disassembled bkeys */
-+
-+static inline struct bkey_s_c bkey_disassemble(struct btree *b,
-+					       const struct bkey_packed *k,
-+					       struct bkey *u)
-+{
-+	__bkey_unpack_key(b, u, k);
-+
-+	return (struct bkey_s_c) { u, bkeyp_val(&b->format, k), };
-+}
-+
-+/* non const version: */
-+static inline struct bkey_s __bkey_disassemble(struct btree *b,
-+					       struct bkey_packed *k,
-+					       struct bkey *u)
-+{
-+	__bkey_unpack_key(b, u, k);
-+
-+	return (struct bkey_s) { .k = u, .v = bkeyp_val(&b->format, k), };
-+}
-+
-+#define for_each_bset(_b, _t)						\
-+	for (_t = (_b)->set; _t < (_b)->set + (_b)->nsets; _t++)
-+
-+#define bset_tree_for_each_key(_b, _t, _k)				\
-+	for (_k = btree_bkey_first(_b, _t);				\
-+	     _k != btree_bkey_last(_b, _t);				\
-+	     _k = bkey_next_skip_noops(_k, btree_bkey_last(_b, _t)))
-+
-+static inline bool bset_has_ro_aux_tree(struct bset_tree *t)
-+{
-+	return bset_aux_tree_type(t) == BSET_RO_AUX_TREE;
-+}
-+
-+static inline bool bset_has_rw_aux_tree(struct bset_tree *t)
-+{
-+	return bset_aux_tree_type(t) == BSET_RW_AUX_TREE;
-+}
-+
-+static inline void bch2_bset_set_no_aux_tree(struct btree *b,
-+					    struct bset_tree *t)
-+{
-+	BUG_ON(t < b->set);
-+
-+	for (; t < b->set + ARRAY_SIZE(b->set); t++) {
-+		t->size = 0;
-+		t->extra = BSET_NO_AUX_TREE_VAL;
-+		t->aux_data_offset = U16_MAX;
-+	}
-+}
-+
-+static inline void btree_node_set_format(struct btree *b,
-+					 struct bkey_format f)
-+{
-+	int len;
-+
-+	b->format	= f;
-+	b->nr_key_bits	= bkey_format_key_bits(&f);
-+
-+	len = bch2_compile_bkey_format(&b->format, b->aux_data);
-+	BUG_ON(len < 0 || len > U8_MAX);
-+
-+	b->unpack_fn_len = len;
-+
-+	bch2_bset_set_no_aux_tree(b, b->set);
-+}
-+
-+static inline struct bset *bset_next_set(struct btree *b,
-+					 unsigned block_bytes)
-+{
-+	struct bset *i = btree_bset_last(b);
-+
-+	EBUG_ON(!is_power_of_2(block_bytes));
-+
-+	return ((void *) i) + round_up(vstruct_bytes(i), block_bytes);
-+}
-+
-+void bch2_btree_keys_init(struct btree *, bool *);
-+
-+void bch2_bset_init_first(struct btree *, struct bset *);
-+void bch2_bset_init_next(struct bch_fs *, struct btree *,
-+			 struct btree_node_entry *);
-+void bch2_bset_build_aux_tree(struct btree *, struct bset_tree *, bool);
-+void bch2_bset_fix_invalidated_key(struct btree *, struct bkey_packed *);
-+
-+void bch2_bset_insert(struct btree *, struct btree_node_iter *,
-+		     struct bkey_packed *, struct bkey_i *, unsigned);
-+void bch2_bset_delete(struct btree *, struct bkey_packed *, unsigned);
-+
-+/* Bkey utility code */
-+
-+/* packed or unpacked */
-+static inline int bkey_cmp_p_or_unp(const struct btree *b,
-+				    const struct bkey_packed *l,
-+				    const struct bkey_packed *r_packed,
-+				    const struct bpos *r)
-+{
-+	EBUG_ON(r_packed && !bkey_packed(r_packed));
-+
-+	if (unlikely(!bkey_packed(l)))
-+		return bkey_cmp(packed_to_bkey_c(l)->p, *r);
-+
-+	if (likely(r_packed))
-+		return __bch2_bkey_cmp_packed_format_checked(l, r_packed, b);
-+
-+	return __bch2_bkey_cmp_left_packed_format_checked(b, l, r);
-+}
-+
-+struct bset_tree *bch2_bkey_to_bset(struct btree *, struct bkey_packed *);
-+
-+struct bkey_packed *bch2_bkey_prev_filter(struct btree *, struct bset_tree *,
-+					  struct bkey_packed *, unsigned);
-+
-+static inline struct bkey_packed *
-+bch2_bkey_prev_all(struct btree *b, struct bset_tree *t, struct bkey_packed *k)
-+{
-+	return bch2_bkey_prev_filter(b, t, k, 0);
-+}
-+
-+static inline struct bkey_packed *
-+bch2_bkey_prev(struct btree *b, struct bset_tree *t, struct bkey_packed *k)
-+{
-+	return bch2_bkey_prev_filter(b, t, k, KEY_TYPE_discard + 1);
-+}
-+
-+enum bch_extent_overlap {
-+	BCH_EXTENT_OVERLAP_ALL		= 0,
-+	BCH_EXTENT_OVERLAP_BACK		= 1,
-+	BCH_EXTENT_OVERLAP_FRONT	= 2,
-+	BCH_EXTENT_OVERLAP_MIDDLE	= 3,
-+};
-+
-+/* Returns how k overlaps with m */
-+static inline enum bch_extent_overlap bch2_extent_overlap(const struct bkey *k,
-+							  const struct bkey *m)
-+{
-+	int cmp1 = bkey_cmp(k->p, m->p) < 0;
-+	int cmp2 = bkey_cmp(bkey_start_pos(k),
-+			    bkey_start_pos(m)) > 0;
-+
-+	return (cmp1 << 1) + cmp2;
-+}
-+
-+/* Btree key iteration */
-+
-+void bch2_btree_node_iter_push(struct btree_node_iter *, struct btree *,
-+			      const struct bkey_packed *,
-+			      const struct bkey_packed *);
-+void bch2_btree_node_iter_init(struct btree_node_iter *, struct btree *,
-+			       struct bpos *);
-+void bch2_btree_node_iter_init_from_start(struct btree_node_iter *,
-+					  struct btree *);
-+struct bkey_packed *bch2_btree_node_iter_bset_pos(struct btree_node_iter *,
-+						 struct btree *,
-+						 struct bset_tree *);
-+
-+void bch2_btree_node_iter_sort(struct btree_node_iter *, struct btree *);
-+void bch2_btree_node_iter_set_drop(struct btree_node_iter *,
-+				   struct btree_node_iter_set *);
-+void bch2_btree_node_iter_advance(struct btree_node_iter *, struct btree *);
-+
-+#define btree_node_iter_for_each(_iter, _set)				\
-+	for (_set = (_iter)->data;					\
-+	     _set < (_iter)->data + ARRAY_SIZE((_iter)->data) &&	\
-+	     (_set)->k != (_set)->end;					\
-+	     _set++)
-+
-+static inline bool __btree_node_iter_set_end(struct btree_node_iter *iter,
-+					     unsigned i)
-+{
-+	return iter->data[i].k == iter->data[i].end;
-+}
-+
-+static inline bool bch2_btree_node_iter_end(struct btree_node_iter *iter)
-+{
-+	return __btree_node_iter_set_end(iter, 0);
-+}
-+
-+/*
-+ * When keys compare equal, deleted keys compare first:
-+ *
-+ * XXX: only need to compare pointers for keys that are both within a
-+ * btree_node_iterator - we need to break ties for prev() to work correctly
-+ */
-+static inline int bkey_iter_cmp(const struct btree *b,
-+				const struct bkey_packed *l,
-+				const struct bkey_packed *r)
-+{
-+	return bkey_cmp_packed(b, l, r)
-+		?: (int) bkey_deleted(r) - (int) bkey_deleted(l)
-+		?: cmp_int(l, r);
-+}
-+
-+static inline int btree_node_iter_cmp(const struct btree *b,
-+				      struct btree_node_iter_set l,
-+				      struct btree_node_iter_set r)
-+{
-+	return bkey_iter_cmp(b,
-+			__btree_node_offset_to_key(b, l.k),
-+			__btree_node_offset_to_key(b, r.k));
-+}
-+
-+/* These assume r (the search key) is not a deleted key: */
-+static inline int bkey_iter_pos_cmp(const struct btree *b,
-+			const struct bkey_packed *l,
-+			const struct bpos *r)
-+{
-+	return bkey_cmp_left_packed(b, l, r)
-+		?: -((int) bkey_deleted(l));
-+}
-+
-+static inline int bkey_iter_cmp_p_or_unp(const struct btree *b,
-+				    const struct bkey_packed *l,
-+				    const struct bkey_packed *r_packed,
-+				    const struct bpos *r)
-+{
-+	return bkey_cmp_p_or_unp(b, l, r_packed, r)
-+		?: -((int) bkey_deleted(l));
-+}
-+
-+static inline struct bkey_packed *
-+__bch2_btree_node_iter_peek_all(struct btree_node_iter *iter,
-+				struct btree *b)
-+{
-+	return __btree_node_offset_to_key(b, iter->data->k);
-+}
-+
-+static inline struct bkey_packed *
-+bch2_btree_node_iter_peek_filter(struct btree_node_iter *iter,
-+				 struct btree *b,
-+				 unsigned min_key_type)
-+{
-+	while (!bch2_btree_node_iter_end(iter)) {
-+		struct bkey_packed *k = __bch2_btree_node_iter_peek_all(iter, b);
-+
-+		if (k->type >= min_key_type)
-+			return k;
-+
-+		bch2_btree_node_iter_advance(iter, b);
-+	}
-+
-+	return NULL;
-+}
-+
-+static inline struct bkey_packed *
-+bch2_btree_node_iter_peek_all(struct btree_node_iter *iter,
-+			      struct btree *b)
-+{
-+	return bch2_btree_node_iter_peek_filter(iter, b, 0);
-+}
-+
-+static inline struct bkey_packed *
-+bch2_btree_node_iter_peek(struct btree_node_iter *iter, struct btree *b)
-+{
-+	return bch2_btree_node_iter_peek_filter(iter, b, KEY_TYPE_discard + 1);
-+}
-+
-+static inline struct bkey_packed *
-+bch2_btree_node_iter_next_all(struct btree_node_iter *iter, struct btree *b)
-+{
-+	struct bkey_packed *ret = bch2_btree_node_iter_peek_all(iter, b);
-+
-+	if (ret)
-+		bch2_btree_node_iter_advance(iter, b);
-+
-+	return ret;
-+}
-+
-+struct bkey_packed *bch2_btree_node_iter_prev_all(struct btree_node_iter *,
-+						  struct btree *);
-+struct bkey_packed *bch2_btree_node_iter_prev_filter(struct btree_node_iter *,
-+						     struct btree *, unsigned);
-+
-+static inline struct bkey_packed *
-+bch2_btree_node_iter_prev(struct btree_node_iter *iter, struct btree *b)
-+{
-+	return bch2_btree_node_iter_prev_filter(iter, b, KEY_TYPE_discard + 1);
-+}
-+
-+struct bkey_s_c bch2_btree_node_iter_peek_unpack(struct btree_node_iter *,
-+						struct btree *,
-+						struct bkey *);
-+
-+#define for_each_btree_node_key_unpack(b, k, iter, unpacked)		\
-+	for (bch2_btree_node_iter_init_from_start((iter), (b));		\
-+	     (k = bch2_btree_node_iter_peek_unpack((iter), (b), (unpacked))).k;\
-+	     bch2_btree_node_iter_advance(iter, b))
-+
-+/* Accounting: */
-+
-+static inline void btree_keys_account_key(struct btree_nr_keys *n,
-+					  unsigned bset,
-+					  struct bkey_packed *k,
-+					  int sign)
-+{
-+	n->live_u64s		+= k->u64s * sign;
-+	n->bset_u64s[bset]	+= k->u64s * sign;
-+
-+	if (bkey_packed(k))
-+		n->packed_keys	+= sign;
-+	else
-+		n->unpacked_keys += sign;
-+}
-+
-+static inline void btree_keys_account_val_delta(struct btree *b,
-+						struct bkey_packed *k,
-+						int delta)
-+{
-+	struct bset_tree *t = bch2_bkey_to_bset(b, k);
-+
-+	b->nr.live_u64s			+= delta;
-+	b->nr.bset_u64s[t - b->set]	+= delta;
-+}
-+
-+#define btree_keys_account_key_add(_nr, _bset_idx, _k)		\
-+	btree_keys_account_key(_nr, _bset_idx, _k, 1)
-+#define btree_keys_account_key_drop(_nr, _bset_idx, _k)	\
-+	btree_keys_account_key(_nr, _bset_idx, _k, -1)
-+
-+#define btree_account_key_add(_b, _k)				\
-+	btree_keys_account_key(&(_b)->nr,			\
-+		bch2_bkey_to_bset(_b, _k) - (_b)->set, _k, 1)
-+#define btree_account_key_drop(_b, _k)				\
-+	btree_keys_account_key(&(_b)->nr,			\
-+		bch2_bkey_to_bset(_b, _k) - (_b)->set, _k, -1)
-+
-+struct bset_stats {
-+	struct {
-+		size_t nr, bytes;
-+	} sets[BSET_TREE_NR_TYPES];
-+
-+	size_t floats;
-+	size_t failed;
-+};
-+
-+void bch2_btree_keys_stats(struct btree *, struct bset_stats *);
-+void bch2_bfloat_to_text(struct printbuf *, struct btree *,
-+			 struct bkey_packed *);
-+
-+/* Debug stuff */
-+
-+void bch2_dump_bset(struct bch_fs *, struct btree *, struct bset *, unsigned);
-+void bch2_dump_btree_node(struct bch_fs *, struct btree *);
-+void bch2_dump_btree_node_iter(struct btree *, struct btree_node_iter *);
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+
-+void __bch2_verify_btree_nr_keys(struct btree *);
-+void bch2_btree_node_iter_verify(struct btree_node_iter *, struct btree *);
-+void bch2_verify_insert_pos(struct btree *, struct bkey_packed *,
-+			    struct bkey_packed *, unsigned);
-+
-+#else
-+
-+static inline void __bch2_verify_btree_nr_keys(struct btree *b) {}
-+static inline void bch2_btree_node_iter_verify(struct btree_node_iter *iter,
-+					      struct btree *b) {}
-+static inline void bch2_verify_insert_pos(struct btree *b,
-+					  struct bkey_packed *where,
-+					  struct bkey_packed *insert,
-+					  unsigned clobber_u64s) {}
-+#endif
-+
-+static inline void bch2_verify_btree_nr_keys(struct btree *b)
-+{
-+	if (btree_keys_expensive_checks(b))
-+		__bch2_verify_btree_nr_keys(b);
-+}
-+
-+#endif /* _BCACHEFS_BSET_H */
-diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c
-new file mode 100644
-index 000000000000..bb94fa2341ee
---- /dev/null
-+++ b/fs/bcachefs/btree_cache.c
-@@ -0,0 +1,1063 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "btree_cache.h"
-+#include "btree_io.h"
-+#include "btree_iter.h"
-+#include "btree_locking.h"
-+#include "debug.h"
-+
-+#include <linux/prefetch.h>
-+#include <linux/sched/mm.h>
-+#include <trace/events/bcachefs.h>
-+
-+const char * const bch2_btree_ids[] = {
-+#define x(kwd, val, name) name,
-+	BCH_BTREE_IDS()
-+#undef x
-+	NULL
-+};
-+
-+void bch2_recalc_btree_reserve(struct bch_fs *c)
-+{
-+	unsigned i, reserve = 16;
-+
-+	if (!c->btree_roots[0].b)
-+		reserve += 8;
-+
-+	for (i = 0; i < BTREE_ID_NR; i++)
-+		if (c->btree_roots[i].b)
-+			reserve += min_t(unsigned, 1,
-+					 c->btree_roots[i].b->c.level) * 8;
-+
-+	c->btree_cache.reserve = reserve;
-+}
-+
-+static inline unsigned btree_cache_can_free(struct btree_cache *bc)
-+{
-+	return max_t(int, 0, bc->used - bc->reserve);
-+}
-+
-+static void __btree_node_data_free(struct bch_fs *c, struct btree *b)
-+{
-+	EBUG_ON(btree_node_write_in_flight(b));
-+
-+	kvpfree(b->data, btree_bytes(c));
-+	b->data = NULL;
-+	vfree(b->aux_data);
-+	b->aux_data = NULL;
-+}
-+
-+static void btree_node_data_free(struct bch_fs *c, struct btree *b)
-+{
-+	struct btree_cache *bc = &c->btree_cache;
-+
-+	__btree_node_data_free(c, b);
-+	bc->used--;
-+	list_move(&b->list, &bc->freed);
-+}
-+
-+static int bch2_btree_cache_cmp_fn(struct rhashtable_compare_arg *arg,
-+				   const void *obj)
-+{
-+	const struct btree *b = obj;
-+	const u64 *v = arg->key;
-+
-+	return b->hash_val == *v ? 0 : 1;
-+}
-+
-+static const struct rhashtable_params bch_btree_cache_params = {
-+	.head_offset	= offsetof(struct btree, hash),
-+	.key_offset	= offsetof(struct btree, hash_val),
-+	.key_len	= sizeof(u64),
-+	.obj_cmpfn	= bch2_btree_cache_cmp_fn,
-+};
-+
-+static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
-+{
-+	BUG_ON(b->data || b->aux_data);
-+
-+	b->data = kvpmalloc(btree_bytes(c), gfp);
-+	if (!b->data)
-+		return -ENOMEM;
-+
-+	b->aux_data = vmalloc_exec(btree_aux_data_bytes(b), gfp);
-+	if (!b->aux_data) {
-+		kvpfree(b->data, btree_bytes(c));
-+		b->data = NULL;
-+		return -ENOMEM;
-+	}
-+
-+	return 0;
-+}
-+
-+static struct btree *__btree_node_mem_alloc(struct bch_fs *c)
-+{
-+	struct btree *b = kzalloc(sizeof(struct btree), GFP_KERNEL);
-+	if (!b)
-+		return NULL;
-+
-+	bkey_btree_ptr_init(&b->key);
-+	six_lock_init(&b->c.lock);
-+	INIT_LIST_HEAD(&b->list);
-+	INIT_LIST_HEAD(&b->write_blocked);
-+	b->byte_order = ilog2(btree_bytes(c));
-+	return b;
-+}
-+
-+static struct btree *btree_node_mem_alloc(struct bch_fs *c)
-+{
-+	struct btree_cache *bc = &c->btree_cache;
-+	struct btree *b = __btree_node_mem_alloc(c);
-+	if (!b)
-+		return NULL;
-+
-+	if (btree_node_data_alloc(c, b, GFP_KERNEL)) {
-+		kfree(b);
-+		return NULL;
-+	}
-+
-+	bc->used++;
-+	list_add(&b->list, &bc->freeable);
-+	return b;
-+}
-+
-+/* Btree in memory cache - hash table */
-+
-+void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
-+{
-+	rhashtable_remove_fast(&bc->table, &b->hash, bch_btree_cache_params);
-+
-+	/* Cause future lookups for this node to fail: */
-+	b->hash_val = 0;
-+
-+	six_lock_wakeup_all(&b->c.lock);
-+}
-+
-+int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b)
-+{
-+	BUG_ON(b->hash_val);
-+	b->hash_val = btree_ptr_hash_val(&b->key);
-+
-+	return rhashtable_lookup_insert_fast(&bc->table, &b->hash,
-+					     bch_btree_cache_params);
-+}
-+
-+int bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b,
-+				unsigned level, enum btree_id id)
-+{
-+	int ret;
-+
-+	b->c.level	= level;
-+	b->c.btree_id	= id;
-+
-+	mutex_lock(&bc->lock);
-+	ret = __bch2_btree_node_hash_insert(bc, b);
-+	if (!ret)
-+		list_add(&b->list, &bc->live);
-+	mutex_unlock(&bc->lock);
-+
-+	return ret;
-+}
-+
-+__flatten
-+static inline struct btree *btree_cache_find(struct btree_cache *bc,
-+				     const struct bkey_i *k)
-+{
-+	u64 v = btree_ptr_hash_val(k);
-+
-+	return rhashtable_lookup_fast(&bc->table, &v, bch_btree_cache_params);
-+}
-+
-+/*
-+ * this version is for btree nodes that have already been freed (we're not
-+ * reaping a real btree node)
-+ */
-+static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
-+{
-+	struct btree_cache *bc = &c->btree_cache;
-+	int ret = 0;
-+
-+	lockdep_assert_held(&bc->lock);
-+
-+	if (!six_trylock_intent(&b->c.lock))
-+		return -ENOMEM;
-+
-+	if (!six_trylock_write(&b->c.lock))
-+		goto out_unlock_intent;
-+
-+	if (btree_node_noevict(b))
-+		goto out_unlock;
-+
-+	if (!btree_node_may_write(b))
-+		goto out_unlock;
-+
-+	if (btree_node_dirty(b) &&
-+	    test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags))
-+		goto out_unlock;
-+
-+	if (btree_node_dirty(b) ||
-+	    btree_node_write_in_flight(b) ||
-+	    btree_node_read_in_flight(b)) {
-+		if (!flush)
-+			goto out_unlock;
-+
-+		wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight,
-+			       TASK_UNINTERRUPTIBLE);
-+
-+		/*
-+		 * Using the underscore version because we don't want to compact
-+		 * bsets after the write, since this node is about to be evicted
-+		 * - unless btree verify mode is enabled, since it runs out of
-+		 * the post write cleanup:
-+		 */
-+		if (verify_btree_ondisk(c))
-+			bch2_btree_node_write(c, b, SIX_LOCK_intent);
-+		else
-+			__bch2_btree_node_write(c, b, SIX_LOCK_read);
-+
-+		/* wait for any in flight btree write */
-+		btree_node_wait_on_io(b);
-+	}
-+out:
-+	if (b->hash_val && !ret)
-+		trace_btree_node_reap(c, b);
-+	return ret;
-+out_unlock:
-+	six_unlock_write(&b->c.lock);
-+out_unlock_intent:
-+	six_unlock_intent(&b->c.lock);
-+	ret = -ENOMEM;
-+	goto out;
-+}
-+
-+static int btree_node_reclaim(struct bch_fs *c, struct btree *b)
-+{
-+	return __btree_node_reclaim(c, b, false);
-+}
-+
-+static int btree_node_write_and_reclaim(struct bch_fs *c, struct btree *b)
-+{
-+	return __btree_node_reclaim(c, b, true);
-+}
-+
-+static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
-+					   struct shrink_control *sc)
-+{
-+	struct bch_fs *c = container_of(shrink, struct bch_fs,
-+					btree_cache.shrink);
-+	struct btree_cache *bc = &c->btree_cache;
-+	struct btree *b, *t;
-+	unsigned long nr = sc->nr_to_scan;
-+	unsigned long can_free;
-+	unsigned long touched = 0;
-+	unsigned long freed = 0;
-+	unsigned i, flags;
-+
-+	if (btree_shrinker_disabled(c))
-+		return SHRINK_STOP;
-+
-+	/* Return -1 if we can't do anything right now */
-+	if (sc->gfp_mask & __GFP_FS)
-+		mutex_lock(&bc->lock);
-+	else if (!mutex_trylock(&bc->lock))
-+		return -1;
-+
-+	flags = memalloc_nofs_save();
-+
-+	/*
-+	 * It's _really_ critical that we don't free too many btree nodes - we
-+	 * have to always leave ourselves a reserve. The reserve is how we
-+	 * guarantee that allocating memory for a new btree node can always
-+	 * succeed, so that inserting keys into the btree can always succeed and
-+	 * IO can always make forward progress:
-+	 */
-+	nr /= btree_pages(c);
-+	can_free = btree_cache_can_free(bc);
-+	nr = min_t(unsigned long, nr, can_free);
-+
-+	i = 0;
-+	list_for_each_entry_safe(b, t, &bc->freeable, list) {
-+		touched++;
-+
-+		if (freed >= nr)
-+			break;
-+
-+		if (++i > 3 &&
-+		    !btree_node_reclaim(c, b)) {
-+			btree_node_data_free(c, b);
-+			six_unlock_write(&b->c.lock);
-+			six_unlock_intent(&b->c.lock);
-+			freed++;
-+		}
-+	}
-+restart:
-+	list_for_each_entry_safe(b, t, &bc->live, list) {
-+		touched++;
-+
-+		if (freed >= nr) {
-+			/* Save position */
-+			if (&t->list != &bc->live)
-+				list_move_tail(&bc->live, &t->list);
-+			break;
-+		}
-+
-+		if (!btree_node_accessed(b) &&
-+		    !btree_node_reclaim(c, b)) {
-+			/* can't call bch2_btree_node_hash_remove under lock  */
-+			freed++;
-+			if (&t->list != &bc->live)
-+				list_move_tail(&bc->live, &t->list);
-+
-+			btree_node_data_free(c, b);
-+			mutex_unlock(&bc->lock);
-+
-+			bch2_btree_node_hash_remove(bc, b);
-+			six_unlock_write(&b->c.lock);
-+			six_unlock_intent(&b->c.lock);
-+
-+			if (freed >= nr)
-+				goto out;
-+
-+			if (sc->gfp_mask & __GFP_FS)
-+				mutex_lock(&bc->lock);
-+			else if (!mutex_trylock(&bc->lock))
-+				goto out;
-+			goto restart;
-+		} else
-+			clear_btree_node_accessed(b);
-+	}
-+
-+	memalloc_nofs_restore(flags);
-+	mutex_unlock(&bc->lock);
-+out:
-+	return (unsigned long) freed * btree_pages(c);
-+}
-+
-+static unsigned long bch2_btree_cache_count(struct shrinker *shrink,
-+					    struct shrink_control *sc)
-+{
-+	struct bch_fs *c = container_of(shrink, struct bch_fs,
-+					btree_cache.shrink);
-+	struct btree_cache *bc = &c->btree_cache;
-+
-+	if (btree_shrinker_disabled(c))
-+		return 0;
-+
-+	return btree_cache_can_free(bc) * btree_pages(c);
-+}
-+
-+void bch2_fs_btree_cache_exit(struct bch_fs *c)
-+{
-+	struct btree_cache *bc = &c->btree_cache;
-+	struct btree *b;
-+	unsigned i, flags;
-+
-+	if (bc->shrink.list.next)
-+		unregister_shrinker(&bc->shrink);
-+
-+	/* vfree() can allocate memory: */
-+	flags = memalloc_nofs_save();
-+	mutex_lock(&bc->lock);
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	if (c->verify_data)
-+		list_move(&c->verify_data->list, &bc->live);
-+
-+	kvpfree(c->verify_ondisk, btree_bytes(c));
-+#endif
-+
-+	for (i = 0; i < BTREE_ID_NR; i++)
-+		if (c->btree_roots[i].b)
-+			list_add(&c->btree_roots[i].b->list, &bc->live);
-+
-+	list_splice(&bc->freeable, &bc->live);
-+
-+	while (!list_empty(&bc->live)) {
-+		b = list_first_entry(&bc->live, struct btree, list);
-+
-+		BUG_ON(btree_node_read_in_flight(b) ||
-+		       btree_node_write_in_flight(b));
-+
-+		if (btree_node_dirty(b))
-+			bch2_btree_complete_write(c, b, btree_current_write(b));
-+		clear_btree_node_dirty(b);
-+
-+		btree_node_data_free(c, b);
-+	}
-+
-+	while (!list_empty(&bc->freed)) {
-+		b = list_first_entry(&bc->freed, struct btree, list);
-+		list_del(&b->list);
-+		kfree(b);
-+	}
-+
-+	mutex_unlock(&bc->lock);
-+	memalloc_nofs_restore(flags);
-+
-+	if (bc->table_init_done)
-+		rhashtable_destroy(&bc->table);
-+}
-+
-+int bch2_fs_btree_cache_init(struct bch_fs *c)
-+{
-+	struct btree_cache *bc = &c->btree_cache;
-+	unsigned i;
-+	int ret = 0;
-+
-+	pr_verbose_init(c->opts, "");
-+
-+	ret = rhashtable_init(&bc->table, &bch_btree_cache_params);
-+	if (ret)
-+		goto out;
-+
-+	bc->table_init_done = true;
-+
-+	bch2_recalc_btree_reserve(c);
-+
-+	for (i = 0; i < bc->reserve; i++)
-+		if (!btree_node_mem_alloc(c)) {
-+			ret = -ENOMEM;
-+			goto out;
-+		}
-+
-+	list_splice_init(&bc->live, &bc->freeable);
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	mutex_init(&c->verify_lock);
-+
-+	c->verify_ondisk = kvpmalloc(btree_bytes(c), GFP_KERNEL);
-+	if (!c->verify_ondisk) {
-+		ret = -ENOMEM;
-+		goto out;
-+	}
-+
-+	c->verify_data = btree_node_mem_alloc(c);
-+	if (!c->verify_data) {
-+		ret = -ENOMEM;
-+		goto out;
-+	}
-+
-+	list_del_init(&c->verify_data->list);
-+#endif
-+
-+	bc->shrink.count_objects	= bch2_btree_cache_count;
-+	bc->shrink.scan_objects		= bch2_btree_cache_scan;
-+	bc->shrink.seeks		= 4;
-+	bc->shrink.batch		= btree_pages(c) * 2;
-+	register_shrinker(&bc->shrink);
-+out:
-+	pr_verbose_init(c->opts, "ret %i", ret);
-+	return ret;
-+}
-+
-+void bch2_fs_btree_cache_init_early(struct btree_cache *bc)
-+{
-+	mutex_init(&bc->lock);
-+	INIT_LIST_HEAD(&bc->live);
-+	INIT_LIST_HEAD(&bc->freeable);
-+	INIT_LIST_HEAD(&bc->freed);
-+}
-+
-+/*
-+ * We can only have one thread cannibalizing other cached btree nodes at a time,
-+ * or we'll deadlock. We use an open coded mutex to ensure that, which a
-+ * cannibalize_bucket() will take. This means every time we unlock the root of
-+ * the btree, we need to release this lock if we have it held.
-+ */
-+void bch2_btree_cache_cannibalize_unlock(struct bch_fs *c)
-+{
-+	struct btree_cache *bc = &c->btree_cache;
-+
-+	if (bc->alloc_lock == current) {
-+		trace_btree_node_cannibalize_unlock(c);
-+		bc->alloc_lock = NULL;
-+		closure_wake_up(&bc->alloc_wait);
-+	}
-+}
-+
-+int bch2_btree_cache_cannibalize_lock(struct bch_fs *c, struct closure *cl)
-+{
-+	struct btree_cache *bc = &c->btree_cache;
-+	struct task_struct *old;
-+
-+	old = cmpxchg(&bc->alloc_lock, NULL, current);
-+	if (old == NULL || old == current)
-+		goto success;
-+
-+	if (!cl) {
-+		trace_btree_node_cannibalize_lock_fail(c);
-+		return -ENOMEM;
-+	}
-+
-+	closure_wait(&bc->alloc_wait, cl);
-+
-+	/* Try again, after adding ourselves to waitlist */
-+	old = cmpxchg(&bc->alloc_lock, NULL, current);
-+	if (old == NULL || old == current) {
-+		/* We raced */
-+		closure_wake_up(&bc->alloc_wait);
-+		goto success;
-+	}
-+
-+	trace_btree_node_cannibalize_lock_fail(c);
-+	return -EAGAIN;
-+
-+success:
-+	trace_btree_node_cannibalize_lock(c);
-+	return 0;
-+}
-+
-+static struct btree *btree_node_cannibalize(struct bch_fs *c)
-+{
-+	struct btree_cache *bc = &c->btree_cache;
-+	struct btree *b;
-+
-+	list_for_each_entry_reverse(b, &bc->live, list)
-+		if (!btree_node_reclaim(c, b))
-+			return b;
-+
-+	while (1) {
-+		list_for_each_entry_reverse(b, &bc->live, list)
-+			if (!btree_node_write_and_reclaim(c, b))
-+				return b;
-+
-+		/*
-+		 * Rare case: all nodes were intent-locked.
-+		 * Just busy-wait.
-+		 */
-+		WARN_ONCE(1, "btree cache cannibalize failed\n");
-+		cond_resched();
-+	}
-+}
-+
-+struct btree *bch2_btree_node_mem_alloc(struct bch_fs *c)
-+{
-+	struct btree_cache *bc = &c->btree_cache;
-+	struct btree *b;
-+	u64 start_time = local_clock();
-+	unsigned flags;
-+
-+	flags = memalloc_nofs_save();
-+	mutex_lock(&bc->lock);
-+
-+	/*
-+	 * btree_free() doesn't free memory; it sticks the node on the end of
-+	 * the list. Check if there's any freed nodes there:
-+	 */
-+	list_for_each_entry(b, &bc->freeable, list)
-+		if (!btree_node_reclaim(c, b))
-+			goto got_node;
-+
-+	/*
-+	 * We never free struct btree itself, just the memory that holds the on
-+	 * disk node. Check the freed list before allocating a new one:
-+	 */
-+	list_for_each_entry(b, &bc->freed, list)
-+		if (!btree_node_reclaim(c, b))
-+			goto got_node;
-+
-+	b = NULL;
-+got_node:
-+	if (b)
-+		list_del_init(&b->list);
-+	mutex_unlock(&bc->lock);
-+
-+	if (!b) {
-+		b = __btree_node_mem_alloc(c);
-+		if (!b)
-+			goto err;
-+
-+		BUG_ON(!six_trylock_intent(&b->c.lock));
-+		BUG_ON(!six_trylock_write(&b->c.lock));
-+	}
-+
-+	if (!b->data) {
-+		if (btree_node_data_alloc(c, b, __GFP_NOWARN|GFP_KERNEL))
-+			goto err;
-+
-+		mutex_lock(&bc->lock);
-+		bc->used++;
-+		mutex_unlock(&bc->lock);
-+	}
-+
-+	BUG_ON(btree_node_hashed(b));
-+	BUG_ON(btree_node_write_in_flight(b));
-+out:
-+	b->flags		= 0;
-+	b->written		= 0;
-+	b->nsets		= 0;
-+	b->sib_u64s[0]		= 0;
-+	b->sib_u64s[1]		= 0;
-+	b->whiteout_u64s	= 0;
-+	bch2_btree_keys_init(b, &c->expensive_debug_checks);
-+
-+	bch2_time_stats_update(&c->times[BCH_TIME_btree_node_mem_alloc],
-+			       start_time);
-+
-+	memalloc_nofs_restore(flags);
-+	return b;
-+err:
-+	mutex_lock(&bc->lock);
-+
-+	if (b) {
-+		list_add(&b->list, &bc->freed);
-+		six_unlock_write(&b->c.lock);
-+		six_unlock_intent(&b->c.lock);
-+	}
-+
-+	/* Try to cannibalize another cached btree node: */
-+	if (bc->alloc_lock == current) {
-+		b = btree_node_cannibalize(c);
-+		list_del_init(&b->list);
-+		mutex_unlock(&bc->lock);
-+
-+		bch2_btree_node_hash_remove(bc, b);
-+
-+		trace_btree_node_cannibalize(c);
-+		goto out;
-+	}
-+
-+	mutex_unlock(&bc->lock);
-+	memalloc_nofs_restore(flags);
-+	return ERR_PTR(-ENOMEM);
-+}
-+
-+/* Slowpath, don't want it inlined into btree_iter_traverse() */
-+static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
-+				struct btree_iter *iter,
-+				const struct bkey_i *k,
-+				enum btree_id btree_id,
-+				unsigned level,
-+				enum six_lock_type lock_type,
-+				bool sync)
-+{
-+	struct btree_cache *bc = &c->btree_cache;
-+	struct btree *b;
-+
-+	BUG_ON(level + 1 >= BTREE_MAX_DEPTH);
-+	/*
-+	 * Parent node must be locked, else we could read in a btree node that's
-+	 * been freed:
-+	 */
-+	if (iter && !bch2_btree_node_relock(iter, level + 1))
-+		return ERR_PTR(-EINTR);
-+
-+	b = bch2_btree_node_mem_alloc(c);
-+	if (IS_ERR(b))
-+		return b;
-+
-+	bkey_copy(&b->key, k);
-+	if (bch2_btree_node_hash_insert(bc, b, level, btree_id)) {
-+		/* raced with another fill: */
-+
-+		/* mark as unhashed... */
-+		b->hash_val = 0;
-+
-+		mutex_lock(&bc->lock);
-+		list_add(&b->list, &bc->freeable);
-+		mutex_unlock(&bc->lock);
-+
-+		six_unlock_write(&b->c.lock);
-+		six_unlock_intent(&b->c.lock);
-+		return NULL;
-+	}
-+
-+	/*
-+	 * Unlock before doing IO:
-+	 *
-+	 * XXX: ideally should be dropping all btree node locks here
-+	 */
-+	if (iter && btree_node_read_locked(iter, level + 1))
-+		btree_node_unlock(iter, level + 1);
-+
-+	bch2_btree_node_read(c, b, sync);
-+
-+	six_unlock_write(&b->c.lock);
-+
-+	if (!sync) {
-+		six_unlock_intent(&b->c.lock);
-+		return NULL;
-+	}
-+
-+	if (lock_type == SIX_LOCK_read)
-+		six_lock_downgrade(&b->c.lock);
-+
-+	return b;
-+}
-+
-+static int lock_node_check_fn(struct six_lock *lock, void *p)
-+{
-+	struct btree *b = container_of(lock, struct btree, c.lock);
-+	const struct bkey_i *k = p;
-+
-+	return b->hash_val == btree_ptr_hash_val(k) ? 0 : -1;
-+}
-+
-+/**
-+ * bch_btree_node_get - find a btree node in the cache and lock it, reading it
-+ * in from disk if necessary.
-+ *
-+ * If IO is necessary and running under generic_make_request, returns -EAGAIN.
-+ *
-+ * The btree node will have either a read or a write lock held, depending on
-+ * the @write parameter.
-+ */
-+struct btree *bch2_btree_node_get(struct bch_fs *c, struct btree_iter *iter,
-+				  const struct bkey_i *k, unsigned level,
-+				  enum six_lock_type lock_type)
-+{
-+	struct btree_cache *bc = &c->btree_cache;
-+	struct btree *b;
-+	struct bset_tree *t;
-+
-+	EBUG_ON(level >= BTREE_MAX_DEPTH);
-+
-+	b = btree_node_mem_ptr(k);
-+	if (b)
-+		goto lock_node;
-+retry:
-+	b = btree_cache_find(bc, k);
-+	if (unlikely(!b)) {
-+		/*
-+		 * We must have the parent locked to call bch2_btree_node_fill(),
-+		 * else we could read in a btree node from disk that's been
-+		 * freed:
-+		 */
-+		b = bch2_btree_node_fill(c, iter, k, iter->btree_id,
-+					 level, lock_type, true);
-+
-+		/* We raced and found the btree node in the cache */
-+		if (!b)
-+			goto retry;
-+
-+		if (IS_ERR(b))
-+			return b;
-+	} else {
-+lock_node:
-+		/*
-+		 * There's a potential deadlock with splits and insertions into
-+		 * interior nodes we have to avoid:
-+		 *
-+		 * The other thread might be holding an intent lock on the node
-+		 * we want, and they want to update its parent node so they're
-+		 * going to upgrade their intent lock on the parent node to a
-+		 * write lock.
-+		 *
-+		 * But if we're holding a read lock on the parent, and we're
-+		 * trying to get the intent lock they're holding, we deadlock.
-+		 *
-+		 * So to avoid this we drop the read locks on parent nodes when
-+		 * we're starting to take intent locks - and handle the race.
-+		 *
-+		 * The race is that they might be about to free the node we
-+		 * want, and dropping our read lock on the parent node lets them
-+		 * update the parent marking the node we want as freed, and then
-+		 * free it:
-+		 *
-+		 * To guard against this, btree nodes are evicted from the cache
-+		 * when they're freed - and b->hash_val is zeroed out, which we
-+		 * check for after we lock the node.
-+		 *
-+		 * Then, bch2_btree_node_relock() on the parent will fail - because
-+		 * the parent was modified, when the pointer to the node we want
-+		 * was removed - and we'll bail out:
-+		 */
-+		if (btree_node_read_locked(iter, level + 1))
-+			btree_node_unlock(iter, level + 1);
-+
-+		if (!btree_node_lock(b, k->k.p, level, iter, lock_type,
-+				     lock_node_check_fn, (void *) k)) {
-+			if (b->hash_val != btree_ptr_hash_val(k))
-+				goto retry;
-+			return ERR_PTR(-EINTR);
-+		}
-+
-+		if (unlikely(b->hash_val != btree_ptr_hash_val(k) ||
-+			     b->c.level != level ||
-+			     race_fault())) {
-+			six_unlock_type(&b->c.lock, lock_type);
-+			if (bch2_btree_node_relock(iter, level + 1))
-+				goto retry;
-+
-+			trace_trans_restart_btree_node_reused(iter->trans->ip);
-+			return ERR_PTR(-EINTR);
-+		}
-+	}
-+
-+	/* XXX: waiting on IO with btree locks held: */
-+	wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight,
-+		       TASK_UNINTERRUPTIBLE);
-+
-+	prefetch(b->aux_data);
-+
-+	for_each_bset(b, t) {
-+		void *p = (u64 *) b->aux_data + t->aux_data_offset;
-+
-+		prefetch(p + L1_CACHE_BYTES * 0);
-+		prefetch(p + L1_CACHE_BYTES * 1);
-+		prefetch(p + L1_CACHE_BYTES * 2);
-+	}
-+
-+	/* avoid atomic set bit if it's not needed: */
-+	if (!btree_node_accessed(b))
-+		set_btree_node_accessed(b);
-+
-+	if (unlikely(btree_node_read_error(b))) {
-+		six_unlock_type(&b->c.lock, lock_type);
-+		return ERR_PTR(-EIO);
-+	}
-+
-+	EBUG_ON(b->c.btree_id != iter->btree_id ||
-+		BTREE_NODE_LEVEL(b->data) != level ||
-+		bkey_cmp(b->data->max_key, k->k.p));
-+
-+	return b;
-+}
-+
-+struct btree *bch2_btree_node_get_noiter(struct bch_fs *c,
-+					 const struct bkey_i *k,
-+					 enum btree_id btree_id,
-+					 unsigned level)
-+{
-+	struct btree_cache *bc = &c->btree_cache;
-+	struct btree *b;
-+	struct bset_tree *t;
-+	int ret;
-+
-+	EBUG_ON(level >= BTREE_MAX_DEPTH);
-+
-+	b = btree_node_mem_ptr(k);
-+	if (b)
-+		goto lock_node;
-+retry:
-+	b = btree_cache_find(bc, k);
-+	if (unlikely(!b)) {
-+		b = bch2_btree_node_fill(c, NULL, k, btree_id,
-+					 level, SIX_LOCK_read, true);
-+
-+		/* We raced and found the btree node in the cache */
-+		if (!b)
-+			goto retry;
-+
-+		if (IS_ERR(b))
-+			return b;
-+	} else {
-+lock_node:
-+		ret = six_lock_read(&b->c.lock, lock_node_check_fn, (void *) k);
-+		if (ret)
-+			goto retry;
-+
-+		if (unlikely(b->hash_val != btree_ptr_hash_val(k) ||
-+			     b->c.btree_id != btree_id ||
-+			     b->c.level != level)) {
-+			six_unlock_read(&b->c.lock);
-+			goto retry;
-+		}
-+	}
-+
-+	/* XXX: waiting on IO with btree locks held: */
-+	wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight,
-+		       TASK_UNINTERRUPTIBLE);
-+
-+	prefetch(b->aux_data);
-+
-+	for_each_bset(b, t) {
-+		void *p = (u64 *) b->aux_data + t->aux_data_offset;
-+
-+		prefetch(p + L1_CACHE_BYTES * 0);
-+		prefetch(p + L1_CACHE_BYTES * 1);
-+		prefetch(p + L1_CACHE_BYTES * 2);
-+	}
-+
-+	/* avoid atomic set bit if it's not needed: */
-+	if (!btree_node_accessed(b))
-+		set_btree_node_accessed(b);
-+
-+	if (unlikely(btree_node_read_error(b))) {
-+		six_unlock_read(&b->c.lock);
-+		return ERR_PTR(-EIO);
-+	}
-+
-+	EBUG_ON(b->c.btree_id != btree_id ||
-+		BTREE_NODE_LEVEL(b->data) != level ||
-+		bkey_cmp(b->data->max_key, k->k.p));
-+
-+	return b;
-+}
-+
-+struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
-+					  struct btree_iter *iter,
-+					  struct btree *b,
-+					  enum btree_node_sibling sib)
-+{
-+	struct btree_trans *trans = iter->trans;
-+	struct btree *parent;
-+	struct btree_node_iter node_iter;
-+	struct bkey_packed *k;
-+	BKEY_PADDED(k) tmp;
-+	struct btree *ret = NULL;
-+	unsigned level = b->c.level;
-+
-+	parent = btree_iter_node(iter, level + 1);
-+	if (!parent)
-+		return NULL;
-+
-+	/*
-+	 * There's a corner case where a btree_iter might have a node locked
-+	 * that is just outside its current pos - when
-+	 * bch2_btree_iter_set_pos_same_leaf() gets to the end of the node.
-+	 *
-+	 * But the lock ordering checks in __bch2_btree_node_lock() go off of
-+	 * iter->pos, not the node's key: so if the iterator is marked as
-+	 * needing to be traversed, we risk deadlock if we don't bail out here:
-+	 */
-+	if (iter->uptodate >= BTREE_ITER_NEED_TRAVERSE)
-+		return ERR_PTR(-EINTR);
-+
-+	if (!bch2_btree_node_relock(iter, level + 1)) {
-+		ret = ERR_PTR(-EINTR);
-+		goto out;
-+	}
-+
-+	node_iter = iter->l[parent->c.level].iter;
-+
-+	k = bch2_btree_node_iter_peek_all(&node_iter, parent);
-+	BUG_ON(bkey_cmp_left_packed(parent, k, &b->key.k.p));
-+
-+	k = sib == btree_prev_sib
-+		? bch2_btree_node_iter_prev(&node_iter, parent)
-+		: (bch2_btree_node_iter_advance(&node_iter, parent),
-+		   bch2_btree_node_iter_peek(&node_iter, parent));
-+	if (!k)
-+		goto out;
-+
-+	bch2_bkey_unpack(parent, &tmp.k, k);
-+
-+	ret = bch2_btree_node_get(c, iter, &tmp.k, level,
-+				  SIX_LOCK_intent);
-+
-+	if (PTR_ERR_OR_ZERO(ret) == -EINTR && !trans->nounlock) {
-+		struct btree_iter *linked;
-+
-+		if (!bch2_btree_node_relock(iter, level + 1))
-+			goto out;
-+
-+		/*
-+		 * We might have got -EINTR because trylock failed, and we're
-+		 * holding other locks that would cause us to deadlock:
-+		 */
-+		trans_for_each_iter(trans, linked)
-+			if (btree_iter_cmp(iter, linked) < 0)
-+				__bch2_btree_iter_unlock(linked);
-+
-+		if (sib == btree_prev_sib)
-+			btree_node_unlock(iter, level);
-+
-+		ret = bch2_btree_node_get(c, iter, &tmp.k, level,
-+					  SIX_LOCK_intent);
-+
-+		/*
-+		 * before btree_iter_relock() calls btree_iter_verify_locks():
-+		 */
-+		if (btree_lock_want(iter, level + 1) == BTREE_NODE_UNLOCKED)
-+			btree_node_unlock(iter, level + 1);
-+
-+		if (!bch2_btree_node_relock(iter, level)) {
-+			btree_iter_set_dirty(iter, BTREE_ITER_NEED_RELOCK);
-+
-+			if (!IS_ERR(ret)) {
-+				six_unlock_intent(&ret->c.lock);
-+				ret = ERR_PTR(-EINTR);
-+			}
-+		}
-+
-+		bch2_trans_relock(trans);
-+	}
-+out:
-+	if (btree_lock_want(iter, level + 1) == BTREE_NODE_UNLOCKED)
-+		btree_node_unlock(iter, level + 1);
-+
-+	if (PTR_ERR_OR_ZERO(ret) == -EINTR)
-+		bch2_btree_iter_upgrade(iter, level + 2);
-+
-+	BUG_ON(!IS_ERR(ret) && !btree_node_locked(iter, level));
-+
-+	if (!IS_ERR_OR_NULL(ret)) {
-+		struct btree *n1 = ret, *n2 = b;
-+
-+		if (sib != btree_prev_sib)
-+			swap(n1, n2);
-+
-+		BUG_ON(bkey_cmp(bkey_successor(n1->key.k.p),
-+				n2->data->min_key));
-+	}
-+
-+	bch2_btree_trans_verify_locks(trans);
-+
-+	return ret;
-+}
-+
-+void bch2_btree_node_prefetch(struct bch_fs *c, struct btree_iter *iter,
-+			      const struct bkey_i *k, unsigned level)
-+{
-+	struct btree_cache *bc = &c->btree_cache;
-+	struct btree *b;
-+
-+	BUG_ON(!btree_node_locked(iter, level + 1));
-+	BUG_ON(level >= BTREE_MAX_DEPTH);
-+
-+	b = btree_cache_find(bc, k);
-+	if (b)
-+		return;
-+
-+	bch2_btree_node_fill(c, iter, k, iter->btree_id,
-+			     level, SIX_LOCK_read, false);
-+}
-+
-+void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
-+			     struct btree *b)
-+{
-+	const struct bkey_format *f = &b->format;
-+	struct bset_stats stats;
-+
-+	memset(&stats, 0, sizeof(stats));
-+
-+	bch2_btree_keys_stats(b, &stats);
-+
-+	pr_buf(out,
-+	       "l %u %llu:%llu - %llu:%llu:\n"
-+	       "    ptrs: ",
-+	       b->c.level,
-+	       b->data->min_key.inode,
-+	       b->data->min_key.offset,
-+	       b->data->max_key.inode,
-+	       b->data->max_key.offset);
-+	bch2_val_to_text(out, c, bkey_i_to_s_c(&b->key));
-+	pr_buf(out, "\n"
-+	       "    format: u64s %u fields %u %u %u %u %u\n"
-+	       "    unpack fn len: %u\n"
-+	       "    bytes used %zu/%zu (%zu%% full)\n"
-+	       "    sib u64s: %u, %u (merge threshold %zu)\n"
-+	       "    nr packed keys %u\n"
-+	       "    nr unpacked keys %u\n"
-+	       "    floats %zu\n"
-+	       "    failed unpacked %zu\n",
-+	       f->key_u64s,
-+	       f->bits_per_field[0],
-+	       f->bits_per_field[1],
-+	       f->bits_per_field[2],
-+	       f->bits_per_field[3],
-+	       f->bits_per_field[4],
-+	       b->unpack_fn_len,
-+	       b->nr.live_u64s * sizeof(u64),
-+	       btree_bytes(c) - sizeof(struct btree_node),
-+	       b->nr.live_u64s * 100 / btree_max_u64s(c),
-+	       b->sib_u64s[0],
-+	       b->sib_u64s[1],
-+	       BTREE_FOREGROUND_MERGE_THRESHOLD(c),
-+	       b->nr.packed_keys,
-+	       b->nr.unpacked_keys,
-+	       stats.floats,
-+	       stats.failed);
-+}
-diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h
-new file mode 100644
-index 000000000000..d0d3a85bb8be
---- /dev/null
-+++ b/fs/bcachefs/btree_cache.h
-@@ -0,0 +1,104 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BTREE_CACHE_H
-+#define _BCACHEFS_BTREE_CACHE_H
-+
-+#include "bcachefs.h"
-+#include "btree_types.h"
-+
-+struct btree_iter;
-+
-+extern const char * const bch2_btree_ids[];
-+
-+void bch2_recalc_btree_reserve(struct bch_fs *);
-+
-+void bch2_btree_node_hash_remove(struct btree_cache *, struct btree *);
-+int __bch2_btree_node_hash_insert(struct btree_cache *, struct btree *);
-+int bch2_btree_node_hash_insert(struct btree_cache *, struct btree *,
-+				unsigned, enum btree_id);
-+
-+void bch2_btree_cache_cannibalize_unlock(struct bch_fs *);
-+int bch2_btree_cache_cannibalize_lock(struct bch_fs *, struct closure *);
-+
-+struct btree *bch2_btree_node_mem_alloc(struct bch_fs *);
-+
-+struct btree *bch2_btree_node_get(struct bch_fs *, struct btree_iter *,
-+				  const struct bkey_i *, unsigned,
-+				  enum six_lock_type);
-+
-+struct btree *bch2_btree_node_get_noiter(struct bch_fs *, const struct bkey_i *,
-+					 enum btree_id, unsigned);
-+
-+struct btree *bch2_btree_node_get_sibling(struct bch_fs *, struct btree_iter *,
-+				struct btree *, enum btree_node_sibling);
-+
-+void bch2_btree_node_prefetch(struct bch_fs *, struct btree_iter *,
-+			      const struct bkey_i *, unsigned);
-+
-+void bch2_fs_btree_cache_exit(struct bch_fs *);
-+int bch2_fs_btree_cache_init(struct bch_fs *);
-+void bch2_fs_btree_cache_init_early(struct btree_cache *);
-+
-+static inline u64 btree_ptr_hash_val(const struct bkey_i *k)
-+{
-+	switch (k->k.type) {
-+	case KEY_TYPE_btree_ptr:
-+		return *((u64 *) bkey_i_to_btree_ptr_c(k)->v.start);
-+	case KEY_TYPE_btree_ptr_v2:
-+		return bkey_i_to_btree_ptr_v2_c(k)->v.seq;
-+	default:
-+		return 0;
-+	}
-+}
-+
-+static inline struct btree *btree_node_mem_ptr(const struct bkey_i *k)
-+{
-+	return k->k.type == KEY_TYPE_btree_ptr_v2
-+		? (void *)(unsigned long)bkey_i_to_btree_ptr_v2_c(k)->v.mem_ptr
-+		: NULL;
-+}
-+
-+/* is btree node in hash table? */
-+static inline bool btree_node_hashed(struct btree *b)
-+{
-+	return b->hash_val != 0;
-+}
-+
-+#define for_each_cached_btree(_b, _c, _tbl, _iter, _pos)		\
-+	for ((_tbl) = rht_dereference_rcu((_c)->btree_cache.table.tbl,	\
-+					  &(_c)->btree_cache.table),	\
-+	     _iter = 0;	_iter < (_tbl)->size; _iter++)			\
-+		rht_for_each_entry_rcu((_b), (_pos), _tbl, _iter, hash)
-+
-+static inline size_t btree_bytes(struct bch_fs *c)
-+{
-+	return c->opts.btree_node_size << 9;
-+}
-+
-+static inline size_t btree_max_u64s(struct bch_fs *c)
-+{
-+	return (btree_bytes(c) - sizeof(struct btree_node)) / sizeof(u64);
-+}
-+
-+static inline size_t btree_pages(struct bch_fs *c)
-+{
-+	return btree_bytes(c) / PAGE_SIZE;
-+}
-+
-+static inline unsigned btree_blocks(struct bch_fs *c)
-+{
-+	return c->opts.btree_node_size >> c->block_bits;
-+}
-+
-+#define BTREE_SPLIT_THRESHOLD(c)		(btree_max_u64s(c) * 2 / 3)
-+
-+#define BTREE_FOREGROUND_MERGE_THRESHOLD(c)	(btree_max_u64s(c) * 1 / 3)
-+#define BTREE_FOREGROUND_MERGE_HYSTERESIS(c)			\
-+	(BTREE_FOREGROUND_MERGE_THRESHOLD(c) +			\
-+	 (BTREE_FOREGROUND_MERGE_THRESHOLD(c) << 2))
-+
-+#define btree_node_root(_c, _b)	((_c)->btree_roots[(_b)->c.btree_id].b)
-+
-+void bch2_btree_node_to_text(struct printbuf *, struct bch_fs *,
-+			     struct btree *);
-+
-+#endif /* _BCACHEFS_BTREE_CACHE_H */
-diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
-new file mode 100644
-index 000000000000..e8c1e752a25d
---- /dev/null
-+++ b/fs/bcachefs/btree_gc.c
-@@ -0,0 +1,1438 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Copyright (C) 2010 Kent Overstreet <kent.overstreet@gmail.com>
-+ * Copyright (C) 2014 Datera Inc.
-+ */
-+
-+#include "bcachefs.h"
-+#include "alloc_background.h"
-+#include "alloc_foreground.h"
-+#include "bkey_methods.h"
-+#include "bkey_on_stack.h"
-+#include "btree_locking.h"
-+#include "btree_update_interior.h"
-+#include "btree_io.h"
-+#include "btree_gc.h"
-+#include "buckets.h"
-+#include "clock.h"
-+#include "debug.h"
-+#include "ec.h"
-+#include "error.h"
-+#include "extents.h"
-+#include "journal.h"
-+#include "keylist.h"
-+#include "move.h"
-+#include "recovery.h"
-+#include "replicas.h"
-+#include "super-io.h"
-+
-+#include <linux/slab.h>
-+#include <linux/bitops.h>
-+#include <linux/freezer.h>
-+#include <linux/kthread.h>
-+#include <linux/preempt.h>
-+#include <linux/rcupdate.h>
-+#include <linux/sched/task.h>
-+#include <trace/events/bcachefs.h>
-+
-+static inline void __gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
-+{
-+	preempt_disable();
-+	write_seqcount_begin(&c->gc_pos_lock);
-+	c->gc_pos = new_pos;
-+	write_seqcount_end(&c->gc_pos_lock);
-+	preempt_enable();
-+}
-+
-+static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
-+{
-+	BUG_ON(gc_pos_cmp(new_pos, c->gc_pos) <= 0);
-+	__gc_pos_set(c, new_pos);
-+}
-+
-+static int bch2_gc_check_topology(struct bch_fs *c,
-+				  struct bkey_s_c k,
-+				  struct bpos *expected_start,
-+				  struct bpos expected_end,
-+				  bool is_last)
-+{
-+	int ret = 0;
-+
-+	if (k.k->type == KEY_TYPE_btree_ptr_v2) {
-+		struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k);
-+
-+		if (fsck_err_on(bkey_cmp(*expected_start, bp.v->min_key), c,
-+				"btree node with incorrect min_key: got %llu:%llu, should be %llu:%llu",
-+				bp.v->min_key.inode,
-+				bp.v->min_key.offset,
-+				expected_start->inode,
-+				expected_start->offset)) {
-+			BUG();
-+		}
-+	}
-+
-+	*expected_start = bkey_cmp(k.k->p, POS_MAX)
-+		? bkey_successor(k.k->p)
-+		: k.k->p;
-+
-+	if (fsck_err_on(is_last &&
-+			bkey_cmp(k.k->p, expected_end), c,
-+			"btree node with incorrect max_key: got %llu:%llu, should be %llu:%llu",
-+			k.k->p.inode,
-+			k.k->p.offset,
-+			expected_end.inode,
-+			expected_end.offset)) {
-+		BUG();
-+	}
-+fsck_err:
-+	return ret;
-+}
-+
-+/* marking of btree keys/nodes: */
-+
-+static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k,
-+			    u8 *max_stale, bool initial)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const struct bch_extent_ptr *ptr;
-+	unsigned flags =
-+		BTREE_TRIGGER_GC|
-+		(initial ? BTREE_TRIGGER_NOATOMIC : 0);
-+	int ret = 0;
-+
-+	if (initial) {
-+		BUG_ON(journal_seq_verify(c) &&
-+		       k.k->version.lo > journal_cur_seq(&c->journal));
-+
-+		/* XXX change to fsck check */
-+		if (fsck_err_on(k.k->version.lo > atomic64_read(&c->key_version), c,
-+				"key version number higher than recorded: %llu > %llu",
-+				k.k->version.lo,
-+				atomic64_read(&c->key_version)))
-+			atomic64_set(&c->key_version, k.k->version.lo);
-+
-+		if (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
-+		    fsck_err_on(!bch2_bkey_replicas_marked(c, k), c,
-+				"superblock not marked as containing replicas (type %u)",
-+				k.k->type)) {
-+			ret = bch2_mark_bkey_replicas(c, k);
-+			if (ret)
-+				return ret;
-+		}
-+
-+		bkey_for_each_ptr(ptrs, ptr) {
-+			struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-+			struct bucket *g = PTR_BUCKET(ca, ptr, true);
-+			struct bucket *g2 = PTR_BUCKET(ca, ptr, false);
-+
-+			if (mustfix_fsck_err_on(!g->gen_valid, c,
-+					"bucket %u:%zu data type %s ptr gen %u missing in alloc btree",
-+					ptr->dev, PTR_BUCKET_NR(ca, ptr),
-+					bch2_data_types[ptr_data_type(k.k, ptr)],
-+					ptr->gen)) {
-+				g2->_mark.gen	= g->_mark.gen		= ptr->gen;
-+				g2->gen_valid	= g->gen_valid		= true;
-+			}
-+
-+			if (mustfix_fsck_err_on(gen_cmp(ptr->gen, g->mark.gen) > 0, c,
-+					"bucket %u:%zu data type %s ptr gen in the future: %u > %u",
-+					ptr->dev, PTR_BUCKET_NR(ca, ptr),
-+					bch2_data_types[ptr_data_type(k.k, ptr)],
-+					ptr->gen, g->mark.gen)) {
-+				g2->_mark.gen	= g->_mark.gen		= ptr->gen;
-+				g2->gen_valid	= g->gen_valid		= true;
-+				g2->_mark.data_type		= 0;
-+				g2->_mark.dirty_sectors		= 0;
-+				g2->_mark.cached_sectors	= 0;
-+				set_bit(BCH_FS_FIXED_GENS, &c->flags);
-+			}
-+		}
-+	}
-+
-+	bkey_for_each_ptr(ptrs, ptr) {
-+		struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-+		struct bucket *g = PTR_BUCKET(ca, ptr, true);
-+
-+		if (gen_after(g->oldest_gen, ptr->gen))
-+			g->oldest_gen = ptr->gen;
-+
-+		*max_stale = max(*max_stale, ptr_stale(ca, ptr));
-+	}
-+
-+	bch2_mark_key(c, k, 0, k.k->size, NULL, 0, flags);
-+fsck_err:
-+	return ret;
-+}
-+
-+static int btree_gc_mark_node(struct bch_fs *c, struct btree *b, u8 *max_stale,
-+			      bool initial)
-+{
-+	struct bpos next_node_start = b->data->min_key;
-+	struct btree_node_iter iter;
-+	struct bkey unpacked;
-+	struct bkey_s_c k;
-+	int ret = 0;
-+
-+	*max_stale = 0;
-+
-+	if (!btree_node_type_needs_gc(btree_node_type(b)))
-+		return 0;
-+
-+	bch2_btree_node_iter_init_from_start(&iter, b);
-+
-+	while ((k = bch2_btree_node_iter_peek_unpack(&iter, b, &unpacked)).k) {
-+		bch2_bkey_debugcheck(c, b, k);
-+
-+		ret = bch2_gc_mark_key(c, k, max_stale, initial);
-+		if (ret)
-+			break;
-+
-+		bch2_btree_node_iter_advance(&iter, b);
-+
-+		if (b->c.level) {
-+			ret = bch2_gc_check_topology(c, k,
-+					&next_node_start,
-+					b->data->max_key,
-+					bch2_btree_node_iter_end(&iter));
-+			if (ret)
-+				break;
-+		}
-+	}
-+
-+	return ret;
-+}
-+
-+static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
-+			 bool initial, bool metadata_only)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct btree *b;
-+	unsigned depth = metadata_only			? 1
-+		: expensive_debug_checks(c)		? 0
-+		: !btree_node_type_needs_gc(btree_id)	? 1
-+		: 0;
-+	u8 max_stale = 0;
-+	int ret = 0;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	gc_pos_set(c, gc_pos_btree(btree_id, POS_MIN, 0));
-+
-+	__for_each_btree_node(&trans, iter, btree_id, POS_MIN,
-+			      0, depth, BTREE_ITER_PREFETCH, b) {
-+		bch2_verify_btree_nr_keys(b);
-+
-+		gc_pos_set(c, gc_pos_btree_node(b));
-+
-+		ret = btree_gc_mark_node(c, b, &max_stale, initial);
-+		if (ret)
-+			break;
-+
-+		if (!initial) {
-+			if (max_stale > 64)
-+				bch2_btree_node_rewrite(c, iter,
-+						b->data->keys.seq,
-+						BTREE_INSERT_USE_RESERVE|
-+						BTREE_INSERT_NOWAIT|
-+						BTREE_INSERT_GC_LOCK_HELD);
-+			else if (!btree_gc_rewrite_disabled(c) &&
-+				 (btree_gc_always_rewrite(c) || max_stale > 16))
-+				bch2_btree_node_rewrite(c, iter,
-+						b->data->keys.seq,
-+						BTREE_INSERT_NOWAIT|
-+						BTREE_INSERT_GC_LOCK_HELD);
-+		}
-+
-+		bch2_trans_cond_resched(&trans);
-+	}
-+	ret = bch2_trans_exit(&trans) ?: ret;
-+	if (ret)
-+		return ret;
-+
-+	mutex_lock(&c->btree_root_lock);
-+	b = c->btree_roots[btree_id].b;
-+	if (!btree_node_fake(b))
-+		ret = bch2_gc_mark_key(c, bkey_i_to_s_c(&b->key),
-+				       &max_stale, initial);
-+	gc_pos_set(c, gc_pos_btree_root(b->c.btree_id));
-+	mutex_unlock(&c->btree_root_lock);
-+
-+	return ret;
-+}
-+
-+static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b,
-+				      struct journal_keys *journal_keys,
-+				      unsigned target_depth)
-+{
-+	struct btree_and_journal_iter iter;
-+	struct bkey_s_c k;
-+	struct bpos next_node_start = b->data->min_key;
-+	u8 max_stale = 0;
-+	int ret = 0;
-+
-+	bch2_btree_and_journal_iter_init_node_iter(&iter, journal_keys, b);
-+
-+	while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
-+		bch2_bkey_debugcheck(c, b, k);
-+
-+		BUG_ON(bkey_cmp(k.k->p, b->data->min_key) < 0);
-+		BUG_ON(bkey_cmp(k.k->p, b->data->max_key) > 0);
-+
-+		ret = bch2_gc_mark_key(c, k, &max_stale, true);
-+		if (ret)
-+			break;
-+
-+		if (b->c.level) {
-+			struct btree *child;
-+			BKEY_PADDED(k) tmp;
-+
-+			bkey_reassemble(&tmp.k, k);
-+			k = bkey_i_to_s_c(&tmp.k);
-+
-+			bch2_btree_and_journal_iter_advance(&iter);
-+
-+			ret = bch2_gc_check_topology(c, k,
-+					&next_node_start,
-+					b->data->max_key,
-+					!bch2_btree_and_journal_iter_peek(&iter).k);
-+			if (ret)
-+				break;
-+
-+			if (b->c.level > target_depth) {
-+				child = bch2_btree_node_get_noiter(c, &tmp.k,
-+							b->c.btree_id, b->c.level - 1);
-+				ret = PTR_ERR_OR_ZERO(child);
-+				if (ret)
-+					break;
-+
-+				ret = bch2_gc_btree_init_recurse(c, child,
-+						journal_keys, target_depth);
-+				six_unlock_read(&child->c.lock);
-+
-+				if (ret)
-+					break;
-+			}
-+		} else {
-+			bch2_btree_and_journal_iter_advance(&iter);
-+		}
-+	}
-+
-+	return ret;
-+}
-+
-+static int bch2_gc_btree_init(struct bch_fs *c,
-+			      struct journal_keys *journal_keys,
-+			      enum btree_id btree_id,
-+			      bool metadata_only)
-+{
-+	struct btree *b;
-+	unsigned target_depth = metadata_only		? 1
-+		: expensive_debug_checks(c)		? 0
-+		: !btree_node_type_needs_gc(btree_id)	? 1
-+		: 0;
-+	u8 max_stale = 0;
-+	int ret = 0;
-+
-+	b = c->btree_roots[btree_id].b;
-+
-+	if (btree_node_fake(b))
-+		return 0;
-+
-+	six_lock_read(&b->c.lock, NULL, NULL);
-+	if (fsck_err_on(bkey_cmp(b->data->min_key, POS_MIN), c,
-+			"btree root with incorrect min_key: %llu:%llu",
-+			b->data->min_key.inode,
-+			b->data->min_key.offset)) {
-+		BUG();
-+	}
-+
-+	if (fsck_err_on(bkey_cmp(b->data->max_key, POS_MAX), c,
-+			"btree root with incorrect min_key: %llu:%llu",
-+			b->data->max_key.inode,
-+			b->data->max_key.offset)) {
-+		BUG();
-+	}
-+
-+	if (b->c.level >= target_depth)
-+		ret = bch2_gc_btree_init_recurse(c, b,
-+					journal_keys, target_depth);
-+
-+	if (!ret)
-+		ret = bch2_gc_mark_key(c, bkey_i_to_s_c(&b->key),
-+				       &max_stale, true);
-+fsck_err:
-+	six_unlock_read(&b->c.lock);
-+
-+	return ret;
-+}
-+
-+static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r)
-+{
-+	return  (int) btree_id_to_gc_phase(l) -
-+		(int) btree_id_to_gc_phase(r);
-+}
-+
-+static int bch2_gc_btrees(struct bch_fs *c, struct journal_keys *journal_keys,
-+			  bool initial, bool metadata_only)
-+{
-+	enum btree_id ids[BTREE_ID_NR];
-+	unsigned i;
-+
-+	for (i = 0; i < BTREE_ID_NR; i++)
-+		ids[i] = i;
-+	bubble_sort(ids, BTREE_ID_NR, btree_id_gc_phase_cmp);
-+
-+	for (i = 0; i < BTREE_ID_NR; i++) {
-+		enum btree_id id = ids[i];
-+		int ret = initial
-+			? bch2_gc_btree_init(c, journal_keys,
-+					     id, metadata_only)
-+			: bch2_gc_btree(c, id, initial, metadata_only);
-+		if (ret)
-+			return ret;
-+	}
-+
-+	return 0;
-+}
-+
-+static void mark_metadata_sectors(struct bch_fs *c, struct bch_dev *ca,
-+				  u64 start, u64 end,
-+				  enum bch_data_type type,
-+				  unsigned flags)
-+{
-+	u64 b = sector_to_bucket(ca, start);
-+
-+	do {
-+		unsigned sectors =
-+			min_t(u64, bucket_to_sector(ca, b + 1), end) - start;
-+
-+		bch2_mark_metadata_bucket(c, ca, b, type, sectors,
-+					  gc_phase(GC_PHASE_SB), flags);
-+		b++;
-+		start += sectors;
-+	} while (start < end);
-+}
-+
-+void bch2_mark_dev_superblock(struct bch_fs *c, struct bch_dev *ca,
-+			      unsigned flags)
-+{
-+	struct bch_sb_layout *layout = &ca->disk_sb.sb->layout;
-+	unsigned i;
-+	u64 b;
-+
-+	/*
-+	 * This conditional is kind of gross, but we may be called from the
-+	 * device add path, before the new device has actually been added to the
-+	 * running filesystem:
-+	 */
-+	if (c) {
-+		lockdep_assert_held(&c->sb_lock);
-+		percpu_down_read(&c->mark_lock);
-+	}
-+
-+	for (i = 0; i < layout->nr_superblocks; i++) {
-+		u64 offset = le64_to_cpu(layout->sb_offset[i]);
-+
-+		if (offset == BCH_SB_SECTOR)
-+			mark_metadata_sectors(c, ca, 0, BCH_SB_SECTOR,
-+					      BCH_DATA_sb, flags);
-+
-+		mark_metadata_sectors(c, ca, offset,
-+				      offset + (1 << layout->sb_max_size_bits),
-+				      BCH_DATA_sb, flags);
-+	}
-+
-+	for (i = 0; i < ca->journal.nr; i++) {
-+		b = ca->journal.buckets[i];
-+		bch2_mark_metadata_bucket(c, ca, b, BCH_DATA_journal,
-+					  ca->mi.bucket_size,
-+					  gc_phase(GC_PHASE_SB), flags);
-+	}
-+
-+	if (c)
-+		percpu_up_read(&c->mark_lock);
-+}
-+
-+static void bch2_mark_superblocks(struct bch_fs *c)
-+{
-+	struct bch_dev *ca;
-+	unsigned i;
-+
-+	mutex_lock(&c->sb_lock);
-+	gc_pos_set(c, gc_phase(GC_PHASE_SB));
-+
-+	for_each_online_member(ca, c, i)
-+		bch2_mark_dev_superblock(c, ca, BTREE_TRIGGER_GC);
-+	mutex_unlock(&c->sb_lock);
-+}
-+
-+#if 0
-+/* Also see bch2_pending_btree_node_free_insert_done() */
-+static void bch2_mark_pending_btree_node_frees(struct bch_fs *c)
-+{
-+	struct btree_update *as;
-+	struct pending_btree_node_free *d;
-+
-+	mutex_lock(&c->btree_interior_update_lock);
-+	gc_pos_set(c, gc_phase(GC_PHASE_PENDING_DELETE));
-+
-+	for_each_pending_btree_node_free(c, as, d)
-+		if (d->index_update_done)
-+			bch2_mark_key(c, bkey_i_to_s_c(&d->key),
-+				      0, 0, NULL, 0,
-+				      BTREE_TRIGGER_GC);
-+
-+	mutex_unlock(&c->btree_interior_update_lock);
-+}
-+#endif
-+
-+static void bch2_mark_allocator_buckets(struct bch_fs *c)
-+{
-+	struct bch_dev *ca;
-+	struct open_bucket *ob;
-+	size_t i, j, iter;
-+	unsigned ci;
-+
-+	percpu_down_read(&c->mark_lock);
-+
-+	spin_lock(&c->freelist_lock);
-+	gc_pos_set(c, gc_pos_alloc(c, NULL));
-+
-+	for_each_member_device(ca, c, ci) {
-+		fifo_for_each_entry(i, &ca->free_inc, iter)
-+			bch2_mark_alloc_bucket(c, ca, i, true,
-+					       gc_pos_alloc(c, NULL),
-+					       BTREE_TRIGGER_GC);
-+
-+
-+
-+		for (j = 0; j < RESERVE_NR; j++)
-+			fifo_for_each_entry(i, &ca->free[j], iter)
-+				bch2_mark_alloc_bucket(c, ca, i, true,
-+						       gc_pos_alloc(c, NULL),
-+						       BTREE_TRIGGER_GC);
-+	}
-+
-+	spin_unlock(&c->freelist_lock);
-+
-+	for (ob = c->open_buckets;
-+	     ob < c->open_buckets + ARRAY_SIZE(c->open_buckets);
-+	     ob++) {
-+		spin_lock(&ob->lock);
-+		if (ob->valid) {
-+			gc_pos_set(c, gc_pos_alloc(c, ob));
-+			ca = bch_dev_bkey_exists(c, ob->ptr.dev);
-+			bch2_mark_alloc_bucket(c, ca, PTR_BUCKET_NR(ca, &ob->ptr), true,
-+					       gc_pos_alloc(c, ob),
-+					       BTREE_TRIGGER_GC);
-+		}
-+		spin_unlock(&ob->lock);
-+	}
-+
-+	percpu_up_read(&c->mark_lock);
-+}
-+
-+static void bch2_gc_free(struct bch_fs *c)
-+{
-+	struct bch_dev *ca;
-+	unsigned i;
-+
-+	genradix_free(&c->stripes[1]);
-+
-+	for_each_member_device(ca, c, i) {
-+		kvpfree(rcu_dereference_protected(ca->buckets[1], 1),
-+			sizeof(struct bucket_array) +
-+			ca->mi.nbuckets * sizeof(struct bucket));
-+		ca->buckets[1] = NULL;
-+
-+		free_percpu(ca->usage[1]);
-+		ca->usage[1] = NULL;
-+	}
-+
-+	free_percpu(c->usage_gc);
-+	c->usage_gc = NULL;
-+}
-+
-+static int bch2_gc_done(struct bch_fs *c,
-+			bool initial, bool metadata_only)
-+{
-+	struct bch_dev *ca;
-+	bool verify = !metadata_only &&
-+		(!initial ||
-+		 (c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)));
-+	unsigned i;
-+	int ret = 0;
-+
-+#define copy_field(_f, _msg, ...)					\
-+	if (dst->_f != src->_f) {					\
-+		if (verify)						\
-+			fsck_err(c, _msg ": got %llu, should be %llu"	\
-+				, ##__VA_ARGS__, dst->_f, src->_f);	\
-+		dst->_f = src->_f;					\
-+		ret = 1;						\
-+	}
-+#define copy_stripe_field(_f, _msg, ...)				\
-+	if (dst->_f != src->_f) {					\
-+		if (verify)						\
-+			fsck_err(c, "stripe %zu has wrong "_msg		\
-+				": got %u, should be %u",		\
-+				dst_iter.pos, ##__VA_ARGS__,		\
-+				dst->_f, src->_f);			\
-+		dst->_f = src->_f;					\
-+		dst->dirty = true;					\
-+		ret = 1;						\
-+	}
-+#define copy_bucket_field(_f)						\
-+	if (dst->b[b].mark._f != src->b[b].mark._f) {			\
-+		if (verify)						\
-+			fsck_err(c, "bucket %u:%zu gen %u data type %s has wrong " #_f	\
-+				": got %u, should be %u", i, b,		\
-+				dst->b[b].mark.gen,			\
-+				bch2_data_types[dst->b[b].mark.data_type],\
-+				dst->b[b].mark._f, src->b[b].mark._f);	\
-+		dst->b[b]._mark._f = src->b[b].mark._f;			\
-+		ret = 1;						\
-+	}
-+#define copy_dev_field(_f, _msg, ...)					\
-+	copy_field(_f, "dev %u has wrong " _msg, i, ##__VA_ARGS__)
-+#define copy_fs_field(_f, _msg, ...)					\
-+	copy_field(_f, "fs has wrong " _msg, ##__VA_ARGS__)
-+
-+	if (!metadata_only) {
-+		struct genradix_iter dst_iter = genradix_iter_init(&c->stripes[0], 0);
-+		struct genradix_iter src_iter = genradix_iter_init(&c->stripes[1], 0);
-+		struct stripe *dst, *src;
-+		unsigned i;
-+
-+		c->ec_stripes_heap.used = 0;
-+
-+		while ((dst = genradix_iter_peek(&dst_iter, &c->stripes[0])) &&
-+		       (src = genradix_iter_peek(&src_iter, &c->stripes[1]))) {
-+			BUG_ON(src_iter.pos != dst_iter.pos);
-+
-+			copy_stripe_field(alive,	"alive");
-+			copy_stripe_field(sectors,	"sectors");
-+			copy_stripe_field(algorithm,	"algorithm");
-+			copy_stripe_field(nr_blocks,	"nr_blocks");
-+			copy_stripe_field(nr_redundant,	"nr_redundant");
-+			copy_stripe_field(blocks_nonempty,
-+					  "blocks_nonempty");
-+
-+			for (i = 0; i < ARRAY_SIZE(dst->block_sectors); i++)
-+				copy_stripe_field(block_sectors[i],
-+						  "block_sectors[%u]", i);
-+
-+			if (dst->alive) {
-+				spin_lock(&c->ec_stripes_heap_lock);
-+				bch2_stripes_heap_insert(c, dst, dst_iter.pos);
-+				spin_unlock(&c->ec_stripes_heap_lock);
-+			}
-+
-+			genradix_iter_advance(&dst_iter, &c->stripes[0]);
-+			genradix_iter_advance(&src_iter, &c->stripes[1]);
-+		}
-+	}
-+
-+	for_each_member_device(ca, c, i) {
-+		struct bucket_array *dst = __bucket_array(ca, 0);
-+		struct bucket_array *src = __bucket_array(ca, 1);
-+		size_t b;
-+
-+		for (b = 0; b < src->nbuckets; b++) {
-+			copy_bucket_field(gen);
-+			copy_bucket_field(data_type);
-+			copy_bucket_field(owned_by_allocator);
-+			copy_bucket_field(stripe);
-+			copy_bucket_field(dirty_sectors);
-+			copy_bucket_field(cached_sectors);
-+
-+			dst->b[b].oldest_gen = src->b[b].oldest_gen;
-+		}
-+	};
-+
-+	bch2_fs_usage_acc_to_base(c, 0);
-+	bch2_fs_usage_acc_to_base(c, 1);
-+
-+	bch2_dev_usage_from_buckets(c);
-+
-+	{
-+		unsigned nr = fs_usage_u64s(c);
-+		struct bch_fs_usage *dst = c->usage_base;
-+		struct bch_fs_usage *src = (void *)
-+			bch2_acc_percpu_u64s((void *) c->usage_gc, nr);
-+
-+		copy_fs_field(hidden,		"hidden");
-+		copy_fs_field(btree,		"btree");
-+
-+		if (!metadata_only) {
-+			copy_fs_field(data,	"data");
-+			copy_fs_field(cached,	"cached");
-+			copy_fs_field(reserved,	"reserved");
-+			copy_fs_field(nr_inodes,"nr_inodes");
-+
-+			for (i = 0; i < BCH_REPLICAS_MAX; i++)
-+				copy_fs_field(persistent_reserved[i],
-+					      "persistent_reserved[%i]", i);
-+		}
-+
-+		for (i = 0; i < c->replicas.nr; i++) {
-+			struct bch_replicas_entry *e =
-+				cpu_replicas_entry(&c->replicas, i);
-+			char buf[80];
-+
-+			if (metadata_only &&
-+			    (e->data_type == BCH_DATA_user ||
-+			     e->data_type == BCH_DATA_cached))
-+				continue;
-+
-+			bch2_replicas_entry_to_text(&PBUF(buf), e);
-+
-+			copy_fs_field(replicas[i], "%s", buf);
-+		}
-+	}
-+
-+#undef copy_fs_field
-+#undef copy_dev_field
-+#undef copy_bucket_field
-+#undef copy_stripe_field
-+#undef copy_field
-+fsck_err:
-+	return ret;
-+}
-+
-+static int bch2_gc_start(struct bch_fs *c,
-+			 bool metadata_only)
-+{
-+	struct bch_dev *ca;
-+	unsigned i;
-+	int ret;
-+
-+	BUG_ON(c->usage_gc);
-+
-+	c->usage_gc = __alloc_percpu_gfp(fs_usage_u64s(c) * sizeof(u64),
-+					 sizeof(u64), GFP_KERNEL);
-+	if (!c->usage_gc) {
-+		bch_err(c, "error allocating c->usage_gc");
-+		return -ENOMEM;
-+	}
-+
-+	for_each_member_device(ca, c, i) {
-+		BUG_ON(ca->buckets[1]);
-+		BUG_ON(ca->usage[1]);
-+
-+		ca->buckets[1] = kvpmalloc(sizeof(struct bucket_array) +
-+				ca->mi.nbuckets * sizeof(struct bucket),
-+				GFP_KERNEL|__GFP_ZERO);
-+		if (!ca->buckets[1]) {
-+			percpu_ref_put(&ca->ref);
-+			bch_err(c, "error allocating ca->buckets[gc]");
-+			return -ENOMEM;
-+		}
-+
-+		ca->usage[1] = alloc_percpu(struct bch_dev_usage);
-+		if (!ca->usage[1]) {
-+			bch_err(c, "error allocating ca->usage[gc]");
-+			percpu_ref_put(&ca->ref);
-+			return -ENOMEM;
-+		}
-+	}
-+
-+	ret = bch2_ec_mem_alloc(c, true);
-+	if (ret) {
-+		bch_err(c, "error allocating ec gc mem");
-+		return ret;
-+	}
-+
-+	percpu_down_write(&c->mark_lock);
-+
-+	/*
-+	 * indicate to stripe code that we need to allocate for the gc stripes
-+	 * radix tree, too
-+	 */
-+	gc_pos_set(c, gc_phase(GC_PHASE_START));
-+
-+	for_each_member_device(ca, c, i) {
-+		struct bucket_array *dst = __bucket_array(ca, 1);
-+		struct bucket_array *src = __bucket_array(ca, 0);
-+		size_t b;
-+
-+		dst->first_bucket	= src->first_bucket;
-+		dst->nbuckets		= src->nbuckets;
-+
-+		for (b = 0; b < src->nbuckets; b++) {
-+			struct bucket *d = &dst->b[b];
-+			struct bucket *s = &src->b[b];
-+
-+			d->_mark.gen = dst->b[b].oldest_gen = s->mark.gen;
-+			d->gen_valid = s->gen_valid;
-+
-+			if (metadata_only &&
-+			    (s->mark.data_type == BCH_DATA_user ||
-+			     s->mark.data_type == BCH_DATA_cached)) {
-+				d->_mark = s->mark;
-+				d->_mark.owned_by_allocator = 0;
-+			}
-+		}
-+	};
-+
-+	percpu_up_write(&c->mark_lock);
-+
-+	return 0;
-+}
-+
-+/**
-+ * bch2_gc - walk _all_ references to buckets, and recompute them:
-+ *
-+ * Order matters here:
-+ *  - Concurrent GC relies on the fact that we have a total ordering for
-+ *    everything that GC walks - see  gc_will_visit_node(),
-+ *    gc_will_visit_root()
-+ *
-+ *  - also, references move around in the course of index updates and
-+ *    various other crap: everything needs to agree on the ordering
-+ *    references are allowed to move around in - e.g., we're allowed to
-+ *    start with a reference owned by an open_bucket (the allocator) and
-+ *    move it to the btree, but not the reverse.
-+ *
-+ *    This is necessary to ensure that gc doesn't miss references that
-+ *    move around - if references move backwards in the ordering GC
-+ *    uses, GC could skip past them
-+ */
-+int bch2_gc(struct bch_fs *c, struct journal_keys *journal_keys,
-+	    bool initial, bool metadata_only)
-+{
-+	struct bch_dev *ca;
-+	u64 start_time = local_clock();
-+	unsigned i, iter = 0;
-+	int ret;
-+
-+	lockdep_assert_held(&c->state_lock);
-+	trace_gc_start(c);
-+
-+	down_write(&c->gc_lock);
-+
-+	/* flush interior btree updates: */
-+	closure_wait_event(&c->btree_interior_update_wait,
-+			   !bch2_btree_interior_updates_nr_pending(c));
-+again:
-+	ret = bch2_gc_start(c, metadata_only);
-+	if (ret)
-+		goto out;
-+
-+	bch2_mark_superblocks(c);
-+
-+	ret = bch2_gc_btrees(c, journal_keys, initial, metadata_only);
-+	if (ret)
-+		goto out;
-+
-+#if 0
-+	bch2_mark_pending_btree_node_frees(c);
-+#endif
-+	bch2_mark_allocator_buckets(c);
-+
-+	c->gc_count++;
-+out:
-+	if (!ret &&
-+	    (test_bit(BCH_FS_FIXED_GENS, &c->flags) ||
-+	     (!iter && test_restart_gc(c)))) {
-+		/*
-+		 * XXX: make sure gens we fixed got saved
-+		 */
-+		if (iter++ <= 2) {
-+			bch_info(c, "Fixed gens, restarting mark and sweep:");
-+			clear_bit(BCH_FS_FIXED_GENS, &c->flags);
-+			__gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
-+
-+			percpu_down_write(&c->mark_lock);
-+			bch2_gc_free(c);
-+			percpu_up_write(&c->mark_lock);
-+			/* flush fsck errors, reset counters */
-+			bch2_flush_fsck_errs(c);
-+
-+			goto again;
-+		}
-+
-+		bch_info(c, "Unable to fix bucket gens, looping");
-+		ret = -EINVAL;
-+	}
-+
-+	if (!ret) {
-+		bch2_journal_block(&c->journal);
-+
-+		percpu_down_write(&c->mark_lock);
-+		ret = bch2_gc_done(c, initial, metadata_only);
-+
-+		bch2_journal_unblock(&c->journal);
-+	} else {
-+		percpu_down_write(&c->mark_lock);
-+	}
-+
-+	/* Indicates that gc is no longer in progress: */
-+	__gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
-+
-+	bch2_gc_free(c);
-+	percpu_up_write(&c->mark_lock);
-+
-+	up_write(&c->gc_lock);
-+
-+	trace_gc_end(c);
-+	bch2_time_stats_update(&c->times[BCH_TIME_btree_gc], start_time);
-+
-+	/*
-+	 * Wake up allocator in case it was waiting for buckets
-+	 * because of not being able to inc gens
-+	 */
-+	for_each_member_device(ca, c, i)
-+		bch2_wake_allocator(ca);
-+
-+	/*
-+	 * At startup, allocations can happen directly instead of via the
-+	 * allocator thread - issue wakeup in case they blocked on gc_lock:
-+	 */
-+	closure_wake_up(&c->freelist_wait);
-+	return ret;
-+}
-+
-+static bool gc_btree_gens_key(struct bch_fs *c, struct bkey_s_c k)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const struct bch_extent_ptr *ptr;
-+
-+	percpu_down_read(&c->mark_lock);
-+	bkey_for_each_ptr(ptrs, ptr) {
-+		struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-+		struct bucket *g = PTR_BUCKET(ca, ptr, false);
-+
-+		if (gen_after(g->mark.gen, ptr->gen) > 16) {
-+			percpu_up_read(&c->mark_lock);
-+			return true;
-+		}
-+	}
-+
-+	bkey_for_each_ptr(ptrs, ptr) {
-+		struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-+		struct bucket *g = PTR_BUCKET(ca, ptr, false);
-+
-+		if (gen_after(g->gc_gen, ptr->gen))
-+			g->gc_gen = ptr->gen;
-+	}
-+	percpu_up_read(&c->mark_lock);
-+
-+	return false;
-+}
-+
-+/*
-+ * For recalculating oldest gen, we only need to walk keys in leaf nodes; btree
-+ * node pointers currently never have cached pointers that can become stale:
-+ */
-+static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct bkey_on_stack sk;
-+	int ret = 0;
-+
-+	bkey_on_stack_init(&sk);
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, btree_id, POS_MIN,
-+				   BTREE_ITER_PREFETCH);
-+
-+	while ((k = bch2_btree_iter_peek(iter)).k &&
-+	       !(ret = bkey_err(k))) {
-+		if (gc_btree_gens_key(c, k)) {
-+			bkey_on_stack_reassemble(&sk, c, k);
-+			bch2_extent_normalize(c, bkey_i_to_s(sk.k));
-+
-+			bch2_btree_iter_set_pos(iter, bkey_start_pos(&sk.k->k));
-+
-+			bch2_trans_update(&trans, iter, sk.k, 0);
-+
-+			ret = bch2_trans_commit(&trans, NULL, NULL,
-+						BTREE_INSERT_NOFAIL);
-+			if (ret == -EINTR)
-+				continue;
-+			if (ret) {
-+				break;
-+			}
-+		}
-+
-+		bch2_btree_iter_next(iter);
-+	}
-+
-+	bch2_trans_exit(&trans);
-+	bkey_on_stack_exit(&sk, c);
-+
-+	return ret;
-+}
-+
-+int bch2_gc_gens(struct bch_fs *c)
-+{
-+	struct bch_dev *ca;
-+	struct bucket_array *buckets;
-+	struct bucket *g;
-+	unsigned i;
-+	int ret;
-+
-+	/*
-+	 * Ideally we would be using state_lock and not gc_lock here, but that
-+	 * introduces a deadlock in the RO path - we currently take the state
-+	 * lock at the start of going RO, thus the gc thread may get stuck:
-+	 */
-+	down_read(&c->gc_lock);
-+
-+	for_each_member_device(ca, c, i) {
-+		down_read(&ca->bucket_lock);
-+		buckets = bucket_array(ca);
-+
-+		for_each_bucket(g, buckets)
-+			g->gc_gen = g->mark.gen;
-+		up_read(&ca->bucket_lock);
-+	}
-+
-+	for (i = 0; i < BTREE_ID_NR; i++)
-+		if (btree_node_type_needs_gc(i)) {
-+			ret = bch2_gc_btree_gens(c, i);
-+			if (ret) {
-+				bch_err(c, "error recalculating oldest_gen: %i", ret);
-+				goto err;
-+			}
-+		}
-+
-+	for_each_member_device(ca, c, i) {
-+		down_read(&ca->bucket_lock);
-+		buckets = bucket_array(ca);
-+
-+		for_each_bucket(g, buckets)
-+			g->oldest_gen = g->gc_gen;
-+		up_read(&ca->bucket_lock);
-+	}
-+
-+	c->gc_count++;
-+err:
-+	up_read(&c->gc_lock);
-+	return ret;
-+}
-+
-+/* Btree coalescing */
-+
-+static void recalc_packed_keys(struct btree *b)
-+{
-+	struct bset *i = btree_bset_first(b);
-+	struct bkey_packed *k;
-+
-+	memset(&b->nr, 0, sizeof(b->nr));
-+
-+	BUG_ON(b->nsets != 1);
-+
-+	vstruct_for_each(i, k)
-+		btree_keys_account_key_add(&b->nr, 0, k);
-+}
-+
-+static void bch2_coalesce_nodes(struct bch_fs *c, struct btree_iter *iter,
-+				struct btree *old_nodes[GC_MERGE_NODES])
-+{
-+	struct btree *parent = btree_node_parent(iter, old_nodes[0]);
-+	unsigned i, nr_old_nodes, nr_new_nodes, u64s = 0;
-+	unsigned blocks = btree_blocks(c) * 2 / 3;
-+	struct btree *new_nodes[GC_MERGE_NODES];
-+	struct btree_update *as;
-+	struct keylist keylist;
-+	struct bkey_format_state format_state;
-+	struct bkey_format new_format;
-+
-+	memset(new_nodes, 0, sizeof(new_nodes));
-+	bch2_keylist_init(&keylist, NULL);
-+
-+	/* Count keys that are not deleted */
-+	for (i = 0; i < GC_MERGE_NODES && old_nodes[i]; i++)
-+		u64s += old_nodes[i]->nr.live_u64s;
-+
-+	nr_old_nodes = nr_new_nodes = i;
-+
-+	/* Check if all keys in @old_nodes could fit in one fewer node */
-+	if (nr_old_nodes <= 1 ||
-+	    __vstruct_blocks(struct btree_node, c->block_bits,
-+			     DIV_ROUND_UP(u64s, nr_old_nodes - 1)) > blocks)
-+		return;
-+
-+	/* Find a format that all keys in @old_nodes can pack into */
-+	bch2_bkey_format_init(&format_state);
-+
-+	for (i = 0; i < nr_old_nodes; i++)
-+		__bch2_btree_calc_format(&format_state, old_nodes[i]);
-+
-+	new_format = bch2_bkey_format_done(&format_state);
-+
-+	/* Check if repacking would make any nodes too big to fit */
-+	for (i = 0; i < nr_old_nodes; i++)
-+		if (!bch2_btree_node_format_fits(c, old_nodes[i], &new_format)) {
-+			trace_btree_gc_coalesce_fail(c,
-+					BTREE_GC_COALESCE_FAIL_FORMAT_FITS);
-+			return;
-+		}
-+
-+	if (bch2_keylist_realloc(&keylist, NULL, 0,
-+			(BKEY_U64s + BKEY_EXTENT_U64s_MAX) * nr_old_nodes)) {
-+		trace_btree_gc_coalesce_fail(c,
-+				BTREE_GC_COALESCE_FAIL_KEYLIST_REALLOC);
-+		return;
-+	}
-+
-+	as = bch2_btree_update_start(iter->trans, iter->btree_id,
-+			btree_update_reserve_required(c, parent) + nr_old_nodes,
-+			BTREE_INSERT_NOFAIL|
-+			BTREE_INSERT_USE_RESERVE,
-+			NULL);
-+	if (IS_ERR(as)) {
-+		trace_btree_gc_coalesce_fail(c,
-+				BTREE_GC_COALESCE_FAIL_RESERVE_GET);
-+		bch2_keylist_free(&keylist, NULL);
-+		return;
-+	}
-+
-+	trace_btree_gc_coalesce(c, old_nodes[0]);
-+
-+	for (i = 0; i < nr_old_nodes; i++)
-+		bch2_btree_interior_update_will_free_node(as, old_nodes[i]);
-+
-+	/* Repack everything with @new_format and sort down to one bset */
-+	for (i = 0; i < nr_old_nodes; i++)
-+		new_nodes[i] =
-+			__bch2_btree_node_alloc_replacement(as, old_nodes[i],
-+							    new_format);
-+
-+	/*
-+	 * Conceptually we concatenate the nodes together and slice them
-+	 * up at different boundaries.
-+	 */
-+	for (i = nr_new_nodes - 1; i > 0; --i) {
-+		struct btree *n1 = new_nodes[i];
-+		struct btree *n2 = new_nodes[i - 1];
-+
-+		struct bset *s1 = btree_bset_first(n1);
-+		struct bset *s2 = btree_bset_first(n2);
-+		struct bkey_packed *k, *last = NULL;
-+
-+		/* Calculate how many keys from @n2 we could fit inside @n1 */
-+		u64s = 0;
-+
-+		for (k = s2->start;
-+		     k < vstruct_last(s2) &&
-+		     vstruct_blocks_plus(n1->data, c->block_bits,
-+					 u64s + k->u64s) <= blocks;
-+		     k = bkey_next_skip_noops(k, vstruct_last(s2))) {
-+			last = k;
-+			u64s += k->u64s;
-+		}
-+
-+		if (u64s == le16_to_cpu(s2->u64s)) {
-+			/* n2 fits entirely in n1 */
-+			n1->key.k.p = n1->data->max_key = n2->data->max_key;
-+
-+			memcpy_u64s(vstruct_last(s1),
-+				    s2->start,
-+				    le16_to_cpu(s2->u64s));
-+			le16_add_cpu(&s1->u64s, le16_to_cpu(s2->u64s));
-+
-+			set_btree_bset_end(n1, n1->set);
-+
-+			six_unlock_write(&n2->c.lock);
-+			bch2_btree_node_free_never_inserted(c, n2);
-+			six_unlock_intent(&n2->c.lock);
-+
-+			memmove(new_nodes + i - 1,
-+				new_nodes + i,
-+				sizeof(new_nodes[0]) * (nr_new_nodes - i));
-+			new_nodes[--nr_new_nodes] = NULL;
-+		} else if (u64s) {
-+			/* move part of n2 into n1 */
-+			n1->key.k.p = n1->data->max_key =
-+				bkey_unpack_pos(n1, last);
-+
-+			n2->data->min_key = bkey_successor(n1->data->max_key);
-+
-+			memcpy_u64s(vstruct_last(s1),
-+				    s2->start, u64s);
-+			le16_add_cpu(&s1->u64s, u64s);
-+
-+			memmove(s2->start,
-+				vstruct_idx(s2, u64s),
-+				(le16_to_cpu(s2->u64s) - u64s) * sizeof(u64));
-+			s2->u64s = cpu_to_le16(le16_to_cpu(s2->u64s) - u64s);
-+
-+			set_btree_bset_end(n1, n1->set);
-+			set_btree_bset_end(n2, n2->set);
-+		}
-+	}
-+
-+	for (i = 0; i < nr_new_nodes; i++) {
-+		struct btree *n = new_nodes[i];
-+
-+		recalc_packed_keys(n);
-+		btree_node_reset_sib_u64s(n);
-+
-+		bch2_btree_build_aux_trees(n);
-+
-+		bch2_btree_update_add_new_node(as, n);
-+		six_unlock_write(&n->c.lock);
-+
-+		bch2_btree_node_write(c, n, SIX_LOCK_intent);
-+	}
-+
-+	/*
-+	 * The keys for the old nodes get deleted. We don't want to insert keys
-+	 * that compare equal to the keys for the new nodes we'll also be
-+	 * inserting - we can't because keys on a keylist must be strictly
-+	 * greater than the previous keys, and we also don't need to since the
-+	 * key for the new node will serve the same purpose (overwriting the key
-+	 * for the old node).
-+	 */
-+	for (i = 0; i < nr_old_nodes; i++) {
-+		struct bkey_i delete;
-+		unsigned j;
-+
-+		for (j = 0; j < nr_new_nodes; j++)
-+			if (!bkey_cmp(old_nodes[i]->key.k.p,
-+				      new_nodes[j]->key.k.p))
-+				goto next;
-+
-+		bkey_init(&delete.k);
-+		delete.k.p = old_nodes[i]->key.k.p;
-+		bch2_keylist_add_in_order(&keylist, &delete);
-+next:
-+		i = i;
-+	}
-+
-+	/*
-+	 * Keys for the new nodes get inserted: bch2_btree_insert_keys() only
-+	 * does the lookup once and thus expects the keys to be in sorted order
-+	 * so we have to make sure the new keys are correctly ordered with
-+	 * respect to the deleted keys added in the previous loop
-+	 */
-+	for (i = 0; i < nr_new_nodes; i++)
-+		bch2_keylist_add_in_order(&keylist, &new_nodes[i]->key);
-+
-+	/* Insert the newly coalesced nodes */
-+	bch2_btree_insert_node(as, parent, iter, &keylist, 0);
-+
-+	BUG_ON(!bch2_keylist_empty(&keylist));
-+
-+	BUG_ON(iter->l[old_nodes[0]->c.level].b != old_nodes[0]);
-+
-+	bch2_btree_iter_node_replace(iter, new_nodes[0]);
-+
-+	for (i = 0; i < nr_new_nodes; i++)
-+		bch2_btree_update_get_open_buckets(as, new_nodes[i]);
-+
-+	/* Free the old nodes and update our sliding window */
-+	for (i = 0; i < nr_old_nodes; i++) {
-+		bch2_btree_node_free_inmem(c, old_nodes[i], iter);
-+
-+		/*
-+		 * the index update might have triggered a split, in which case
-+		 * the nodes we coalesced - the new nodes we just created -
-+		 * might not be sibling nodes anymore - don't add them to the
-+		 * sliding window (except the first):
-+		 */
-+		if (!i) {
-+			old_nodes[i] = new_nodes[i];
-+		} else {
-+			old_nodes[i] = NULL;
-+		}
-+	}
-+
-+	for (i = 0; i < nr_new_nodes; i++)
-+		six_unlock_intent(&new_nodes[i]->c.lock);
-+
-+	bch2_btree_update_done(as);
-+	bch2_keylist_free(&keylist, NULL);
-+}
-+
-+static int bch2_coalesce_btree(struct bch_fs *c, enum btree_id btree_id)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct btree *b;
-+	bool kthread = (current->flags & PF_KTHREAD) != 0;
-+	unsigned i;
-+
-+	/* Sliding window of adjacent btree nodes */
-+	struct btree *merge[GC_MERGE_NODES];
-+	u32 lock_seq[GC_MERGE_NODES];
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	/*
-+	 * XXX: We don't have a good way of positively matching on sibling nodes
-+	 * that have the same parent - this code works by handling the cases
-+	 * where they might not have the same parent, and is thus fragile. Ugh.
-+	 *
-+	 * Perhaps redo this to use multiple linked iterators?
-+	 */
-+	memset(merge, 0, sizeof(merge));
-+
-+	__for_each_btree_node(&trans, iter, btree_id, POS_MIN,
-+			      BTREE_MAX_DEPTH, 0,
-+			      BTREE_ITER_PREFETCH, b) {
-+		memmove(merge + 1, merge,
-+			sizeof(merge) - sizeof(merge[0]));
-+		memmove(lock_seq + 1, lock_seq,
-+			sizeof(lock_seq) - sizeof(lock_seq[0]));
-+
-+		merge[0] = b;
-+
-+		for (i = 1; i < GC_MERGE_NODES; i++) {
-+			if (!merge[i] ||
-+			    !six_relock_intent(&merge[i]->c.lock, lock_seq[i]))
-+				break;
-+
-+			if (merge[i]->c.level != merge[0]->c.level) {
-+				six_unlock_intent(&merge[i]->c.lock);
-+				break;
-+			}
-+		}
-+		memset(merge + i, 0, (GC_MERGE_NODES - i) * sizeof(merge[0]));
-+
-+		bch2_coalesce_nodes(c, iter, merge);
-+
-+		for (i = 1; i < GC_MERGE_NODES && merge[i]; i++) {
-+			lock_seq[i] = merge[i]->c.lock.state.seq;
-+			six_unlock_intent(&merge[i]->c.lock);
-+		}
-+
-+		lock_seq[0] = merge[0]->c.lock.state.seq;
-+
-+		if (kthread && kthread_should_stop()) {
-+			bch2_trans_exit(&trans);
-+			return -ESHUTDOWN;
-+		}
-+
-+		bch2_trans_cond_resched(&trans);
-+
-+		/*
-+		 * If the parent node wasn't relocked, it might have been split
-+		 * and the nodes in our sliding window might not have the same
-+		 * parent anymore - blow away the sliding window:
-+		 */
-+		if (btree_iter_node(iter, iter->level + 1) &&
-+		    !btree_node_intent_locked(iter, iter->level + 1))
-+			memset(merge + 1, 0,
-+			       (GC_MERGE_NODES - 1) * sizeof(merge[0]));
-+	}
-+	return bch2_trans_exit(&trans);
-+}
-+
-+/**
-+ * bch_coalesce - coalesce adjacent nodes with low occupancy
-+ */
-+void bch2_coalesce(struct bch_fs *c)
-+{
-+	enum btree_id id;
-+
-+	down_read(&c->gc_lock);
-+	trace_gc_coalesce_start(c);
-+
-+	for (id = 0; id < BTREE_ID_NR; id++) {
-+		int ret = c->btree_roots[id].b
-+			? bch2_coalesce_btree(c, id)
-+			: 0;
-+
-+		if (ret) {
-+			if (ret != -ESHUTDOWN)
-+				bch_err(c, "btree coalescing failed: %d", ret);
-+			return;
-+		}
-+	}
-+
-+	trace_gc_coalesce_end(c);
-+	up_read(&c->gc_lock);
-+}
-+
-+static int bch2_gc_thread(void *arg)
-+{
-+	struct bch_fs *c = arg;
-+	struct io_clock *clock = &c->io_clock[WRITE];
-+	unsigned long last = atomic_long_read(&clock->now);
-+	unsigned last_kick = atomic_read(&c->kick_gc);
-+	int ret;
-+
-+	set_freezable();
-+
-+	while (1) {
-+		while (1) {
-+			set_current_state(TASK_INTERRUPTIBLE);
-+
-+			if (kthread_should_stop()) {
-+				__set_current_state(TASK_RUNNING);
-+				return 0;
-+			}
-+
-+			if (atomic_read(&c->kick_gc) != last_kick)
-+				break;
-+
-+			if (c->btree_gc_periodic) {
-+				unsigned long next = last + c->capacity / 16;
-+
-+				if (atomic_long_read(&clock->now) >= next)
-+					break;
-+
-+				bch2_io_clock_schedule_timeout(clock, next);
-+			} else {
-+				schedule();
-+			}
-+
-+			try_to_freeze();
-+		}
-+		__set_current_state(TASK_RUNNING);
-+
-+		last = atomic_long_read(&clock->now);
-+		last_kick = atomic_read(&c->kick_gc);
-+
-+		/*
-+		 * Full gc is currently incompatible with btree key cache:
-+		 */
-+#if 0
-+		ret = bch2_gc(c, NULL, false, false);
-+#else
-+		ret = bch2_gc_gens(c);
-+#endif
-+		if (ret < 0)
-+			bch_err(c, "btree gc failed: %i", ret);
-+
-+		debug_check_no_locks_held();
-+	}
-+
-+	return 0;
-+}
-+
-+void bch2_gc_thread_stop(struct bch_fs *c)
-+{
-+	struct task_struct *p;
-+
-+	p = c->gc_thread;
-+	c->gc_thread = NULL;
-+
-+	if (p) {
-+		kthread_stop(p);
-+		put_task_struct(p);
-+	}
-+}
-+
-+int bch2_gc_thread_start(struct bch_fs *c)
-+{
-+	struct task_struct *p;
-+
-+	BUG_ON(c->gc_thread);
-+
-+	p = kthread_create(bch2_gc_thread, c, "bch_gc");
-+	if (IS_ERR(p))
-+		return PTR_ERR(p);
-+
-+	get_task_struct(p);
-+	c->gc_thread = p;
-+	wake_up_process(p);
-+	return 0;
-+}
-diff --git a/fs/bcachefs/btree_gc.h b/fs/bcachefs/btree_gc.h
-new file mode 100644
-index 000000000000..3694a3df62a8
---- /dev/null
-+++ b/fs/bcachefs/btree_gc.h
-@@ -0,0 +1,121 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BTREE_GC_H
-+#define _BCACHEFS_BTREE_GC_H
-+
-+#include "btree_types.h"
-+
-+void bch2_coalesce(struct bch_fs *);
-+
-+struct journal_keys;
-+int bch2_gc(struct bch_fs *, struct journal_keys *, bool, bool);
-+int bch2_gc_gens(struct bch_fs *);
-+void bch2_gc_thread_stop(struct bch_fs *);
-+int bch2_gc_thread_start(struct bch_fs *);
-+void bch2_mark_dev_superblock(struct bch_fs *, struct bch_dev *, unsigned);
-+
-+/*
-+ * For concurrent mark and sweep (with other index updates), we define a total
-+ * ordering of _all_ references GC walks:
-+ *
-+ * Note that some references will have the same GC position as others - e.g.
-+ * everything within the same btree node; in those cases we're relying on
-+ * whatever locking exists for where those references live, i.e. the write lock
-+ * on a btree node.
-+ *
-+ * That locking is also required to ensure GC doesn't pass the updater in
-+ * between the updater adding/removing the reference and updating the GC marks;
-+ * without that, we would at best double count sometimes.
-+ *
-+ * That part is important - whenever calling bch2_mark_pointers(), a lock _must_
-+ * be held that prevents GC from passing the position the updater is at.
-+ *
-+ * (What about the start of gc, when we're clearing all the marks? GC clears the
-+ * mark with the gc pos seqlock held, and bch_mark_bucket checks against the gc
-+ * position inside its cmpxchg loop, so crap magically works).
-+ */
-+
-+/* Position of (the start of) a gc phase: */
-+static inline struct gc_pos gc_phase(enum gc_phase phase)
-+{
-+	return (struct gc_pos) {
-+		.phase	= phase,
-+		.pos	= POS_MIN,
-+		.level	= 0,
-+	};
-+}
-+
-+static inline int gc_pos_cmp(struct gc_pos l, struct gc_pos r)
-+{
-+	if (l.phase != r.phase)
-+		return l.phase < r.phase ? -1 : 1;
-+	if (bkey_cmp(l.pos, r.pos))
-+		return bkey_cmp(l.pos, r.pos);
-+	if (l.level != r.level)
-+		return l.level < r.level ? -1 : 1;
-+	return 0;
-+}
-+
-+static inline enum gc_phase btree_id_to_gc_phase(enum btree_id id)
-+{
-+	switch (id) {
-+#define x(n, v, s) case BTREE_ID_##n: return GC_PHASE_BTREE_##n;
-+	BCH_BTREE_IDS()
-+#undef x
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static inline struct gc_pos gc_pos_btree(enum btree_id id,
-+					 struct bpos pos, unsigned level)
-+{
-+	return (struct gc_pos) {
-+		.phase	= btree_id_to_gc_phase(id),
-+		.pos	= pos,
-+		.level	= level,
-+	};
-+}
-+
-+/*
-+ * GC position of the pointers within a btree node: note, _not_ for &b->key
-+ * itself, that lives in the parent node:
-+ */
-+static inline struct gc_pos gc_pos_btree_node(struct btree *b)
-+{
-+	return gc_pos_btree(b->c.btree_id, b->key.k.p, b->c.level);
-+}
-+
-+/*
-+ * GC position of the pointer to a btree root: we don't use
-+ * gc_pos_pointer_to_btree_node() here to avoid a potential race with
-+ * btree_split() increasing the tree depth - the new root will have level > the
-+ * old root and thus have a greater gc position than the old root, but that
-+ * would be incorrect since once gc has marked the root it's not coming back.
-+ */
-+static inline struct gc_pos gc_pos_btree_root(enum btree_id id)
-+{
-+	return gc_pos_btree(id, POS_MAX, BTREE_MAX_DEPTH);
-+}
-+
-+static inline struct gc_pos gc_pos_alloc(struct bch_fs *c, struct open_bucket *ob)
-+{
-+	return (struct gc_pos) {
-+		.phase	= GC_PHASE_ALLOC,
-+		.pos	= POS(ob ? ob - c->open_buckets : 0, 0),
-+	};
-+}
-+
-+static inline bool gc_visited(struct bch_fs *c, struct gc_pos pos)
-+{
-+	unsigned seq;
-+	bool ret;
-+
-+	do {
-+		seq = read_seqcount_begin(&c->gc_pos_lock);
-+		ret = gc_pos_cmp(pos, c->gc_pos) <= 0;
-+	} while (read_seqcount_retry(&c->gc_pos_lock, seq));
-+
-+	return ret;
-+}
-+
-+#endif /* _BCACHEFS_BTREE_GC_H */
-diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
-new file mode 100644
-index 000000000000..2f5097218f9c
---- /dev/null
-+++ b/fs/bcachefs/btree_io.c
-@@ -0,0 +1,1834 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "bkey_methods.h"
-+#include "bkey_sort.h"
-+#include "btree_cache.h"
-+#include "btree_io.h"
-+#include "btree_iter.h"
-+#include "btree_locking.h"
-+#include "btree_update.h"
-+#include "btree_update_interior.h"
-+#include "buckets.h"
-+#include "checksum.h"
-+#include "debug.h"
-+#include "error.h"
-+#include "extents.h"
-+#include "io.h"
-+#include "journal_reclaim.h"
-+#include "journal_seq_blacklist.h"
-+#include "super-io.h"
-+
-+#include <linux/sched/mm.h>
-+#include <trace/events/bcachefs.h>
-+
-+static void verify_no_dups(struct btree *b,
-+			   struct bkey_packed *start,
-+			   struct bkey_packed *end,
-+			   bool extents)
-+{
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	struct bkey_packed *k, *p;
-+
-+	if (start == end)
-+		return;
-+
-+	for (p = start, k = bkey_next_skip_noops(start, end);
-+	     k != end;
-+	     p = k, k = bkey_next_skip_noops(k, end)) {
-+		struct bkey l = bkey_unpack_key(b, p);
-+		struct bkey r = bkey_unpack_key(b, k);
-+
-+		BUG_ON(extents
-+		       ? bkey_cmp(l.p, bkey_start_pos(&r)) > 0
-+		       : bkey_cmp(l.p, bkey_start_pos(&r)) >= 0);
-+		//BUG_ON(bkey_cmp_packed(&b->format, p, k) >= 0);
-+	}
-+#endif
-+}
-+
-+static void set_needs_whiteout(struct bset *i, int v)
-+{
-+	struct bkey_packed *k;
-+
-+	for (k = i->start;
-+	     k != vstruct_last(i);
-+	     k = bkey_next_skip_noops(k, vstruct_last(i)))
-+		k->needs_whiteout = v;
-+}
-+
-+static void btree_bounce_free(struct bch_fs *c, size_t size,
-+			      bool used_mempool, void *p)
-+{
-+	if (used_mempool)
-+		mempool_free(p, &c->btree_bounce_pool);
-+	else
-+		vpfree(p, size);
-+}
-+
-+static void *btree_bounce_alloc(struct bch_fs *c, size_t size,
-+				bool *used_mempool)
-+{
-+	unsigned flags = memalloc_nofs_save();
-+	void *p;
-+
-+	BUG_ON(size > btree_bytes(c));
-+
-+	*used_mempool = false;
-+	p = vpmalloc(size, __GFP_NOWARN|GFP_NOWAIT);
-+	if (!p) {
-+		*used_mempool = true;
-+		p = mempool_alloc(&c->btree_bounce_pool, GFP_NOIO);
-+	}
-+	memalloc_nofs_restore(flags);
-+	return p;
-+}
-+
-+static void sort_bkey_ptrs(const struct btree *bt,
-+			   struct bkey_packed **ptrs, unsigned nr)
-+{
-+	unsigned n = nr, a = nr / 2, b, c, d;
-+
-+	if (!a)
-+		return;
-+
-+	/* Heap sort: see lib/sort.c: */
-+	while (1) {
-+		if (a)
-+			a--;
-+		else if (--n)
-+			swap(ptrs[0], ptrs[n]);
-+		else
-+			break;
-+
-+		for (b = a; c = 2 * b + 1, (d = c + 1) < n;)
-+			b = bkey_cmp_packed(bt,
-+					    ptrs[c],
-+					    ptrs[d]) >= 0 ? c : d;
-+		if (d == n)
-+			b = c;
-+
-+		while (b != a &&
-+		       bkey_cmp_packed(bt,
-+				       ptrs[a],
-+				       ptrs[b]) >= 0)
-+			b = (b - 1) / 2;
-+		c = b;
-+		while (b != a) {
-+			b = (b - 1) / 2;
-+			swap(ptrs[b], ptrs[c]);
-+		}
-+	}
-+}
-+
-+static void bch2_sort_whiteouts(struct bch_fs *c, struct btree *b)
-+{
-+	struct bkey_packed *new_whiteouts, **ptrs, **ptrs_end, *k;
-+	bool used_mempool = false;
-+	size_t bytes = b->whiteout_u64s * sizeof(u64);
-+
-+	if (!b->whiteout_u64s)
-+		return;
-+
-+	new_whiteouts = btree_bounce_alloc(c, bytes, &used_mempool);
-+
-+	ptrs = ptrs_end = ((void *) new_whiteouts + bytes);
-+
-+	for (k = unwritten_whiteouts_start(c, b);
-+	     k != unwritten_whiteouts_end(c, b);
-+	     k = bkey_next(k))
-+		*--ptrs = k;
-+
-+	sort_bkey_ptrs(b, ptrs, ptrs_end - ptrs);
-+
-+	k = new_whiteouts;
-+
-+	while (ptrs != ptrs_end) {
-+		bkey_copy(k, *ptrs);
-+		k = bkey_next(k);
-+		ptrs++;
-+	}
-+
-+	verify_no_dups(b, new_whiteouts,
-+		       (void *) ((u64 *) new_whiteouts + b->whiteout_u64s),
-+		       btree_node_old_extent_overwrite(b));
-+
-+	memcpy_u64s(unwritten_whiteouts_start(c, b),
-+		    new_whiteouts, b->whiteout_u64s);
-+
-+	btree_bounce_free(c, bytes, used_mempool, new_whiteouts);
-+}
-+
-+static bool should_compact_bset(struct btree *b, struct bset_tree *t,
-+				bool compacting, enum compact_mode mode)
-+{
-+	if (!bset_dead_u64s(b, t))
-+		return false;
-+
-+	switch (mode) {
-+	case COMPACT_LAZY:
-+		return should_compact_bset_lazy(b, t) ||
-+			(compacting && !bset_written(b, bset(b, t)));
-+	case COMPACT_ALL:
-+		return true;
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static bool bch2_compact_extent_whiteouts(struct bch_fs *c,
-+					  struct btree *b,
-+					  enum compact_mode mode)
-+{
-+	const struct bkey_format *f = &b->format;
-+	struct bset_tree *t;
-+	struct bkey_packed *whiteouts = NULL;
-+	struct bkey_packed *u_start, *u_pos;
-+	struct sort_iter sort_iter;
-+	unsigned bytes, whiteout_u64s = 0, u64s;
-+	bool used_mempool, compacting = false;
-+
-+	BUG_ON(!btree_node_is_extents(b));
-+
-+	for_each_bset(b, t)
-+		if (should_compact_bset(b, t, whiteout_u64s != 0, mode))
-+			whiteout_u64s += bset_dead_u64s(b, t);
-+
-+	if (!whiteout_u64s)
-+		return false;
-+
-+	bch2_sort_whiteouts(c, b);
-+
-+	sort_iter_init(&sort_iter, b);
-+
-+	whiteout_u64s += b->whiteout_u64s;
-+	bytes = whiteout_u64s * sizeof(u64);
-+
-+	whiteouts = btree_bounce_alloc(c, bytes, &used_mempool);
-+	u_start = u_pos = whiteouts;
-+
-+	memcpy_u64s(u_pos, unwritten_whiteouts_start(c, b),
-+		    b->whiteout_u64s);
-+	u_pos = (void *) u_pos + b->whiteout_u64s * sizeof(u64);
-+
-+	sort_iter_add(&sort_iter, u_start, u_pos);
-+
-+	for_each_bset(b, t) {
-+		struct bset *i = bset(b, t);
-+		struct bkey_packed *k, *n, *out, *start, *end;
-+		struct btree_node_entry *src = NULL, *dst = NULL;
-+
-+		if (t != b->set && !bset_written(b, i)) {
-+			src = container_of(i, struct btree_node_entry, keys);
-+			dst = max(write_block(b),
-+				  (void *) btree_bkey_last(b, t - 1));
-+		}
-+
-+		if (src != dst)
-+			compacting = true;
-+
-+		if (!should_compact_bset(b, t, compacting, mode)) {
-+			if (src != dst) {
-+				memmove(dst, src, sizeof(*src) +
-+					le16_to_cpu(src->keys.u64s) *
-+					sizeof(u64));
-+				i = &dst->keys;
-+				set_btree_bset(b, t, i);
-+			}
-+			continue;
-+		}
-+
-+		compacting = true;
-+		u_start = u_pos;
-+		start = i->start;
-+		end = vstruct_last(i);
-+
-+		if (src != dst) {
-+			memmove(dst, src, sizeof(*src));
-+			i = &dst->keys;
-+			set_btree_bset(b, t, i);
-+		}
-+
-+		out = i->start;
-+
-+		for (k = start; k != end; k = n) {
-+			n = bkey_next_skip_noops(k, end);
-+
-+			if (bkey_deleted(k))
-+				continue;
-+
-+			BUG_ON(bkey_whiteout(k) &&
-+			       k->needs_whiteout &&
-+			       bkey_written(b, k));
-+
-+			if (bkey_whiteout(k) && !k->needs_whiteout)
-+				continue;
-+
-+			if (bkey_whiteout(k)) {
-+				memcpy_u64s(u_pos, k, bkeyp_key_u64s(f, k));
-+				set_bkeyp_val_u64s(f, u_pos, 0);
-+				u_pos = bkey_next(u_pos);
-+			} else {
-+				bkey_copy(out, k);
-+				out = bkey_next(out);
-+			}
-+		}
-+
-+		sort_iter_add(&sort_iter, u_start, u_pos);
-+
-+		i->u64s = cpu_to_le16((u64 *) out - i->_data);
-+		set_btree_bset_end(b, t);
-+		bch2_bset_set_no_aux_tree(b, t);
-+	}
-+
-+	b->whiteout_u64s = (u64 *) u_pos - (u64 *) whiteouts;
-+
-+	BUG_ON((void *) unwritten_whiteouts_start(c, b) <
-+	       (void *) btree_bkey_last(b, bset_tree_last(b)));
-+
-+	u64s = bch2_sort_extent_whiteouts(unwritten_whiteouts_start(c, b),
-+					  &sort_iter);
-+
-+	BUG_ON(u64s > b->whiteout_u64s);
-+	BUG_ON(u_pos != whiteouts && !u64s);
-+
-+	if (u64s != b->whiteout_u64s) {
-+		void *src = unwritten_whiteouts_start(c, b);
-+
-+		b->whiteout_u64s = u64s;
-+		memmove_u64s_up(unwritten_whiteouts_start(c, b), src, u64s);
-+	}
-+
-+	verify_no_dups(b,
-+		       unwritten_whiteouts_start(c, b),
-+		       unwritten_whiteouts_end(c, b),
-+		       true);
-+
-+	btree_bounce_free(c, bytes, used_mempool, whiteouts);
-+
-+	bch2_btree_build_aux_trees(b);
-+
-+	bch_btree_keys_u64s_remaining(c, b);
-+	bch2_verify_btree_nr_keys(b);
-+
-+	return true;
-+}
-+
-+static bool bch2_drop_whiteouts(struct btree *b, enum compact_mode mode)
-+{
-+	struct bset_tree *t;
-+	bool ret = false;
-+
-+	for_each_bset(b, t) {
-+		struct bset *i = bset(b, t);
-+		struct bkey_packed *k, *n, *out, *start, *end;
-+		struct btree_node_entry *src = NULL, *dst = NULL;
-+
-+		if (t != b->set && !bset_written(b, i)) {
-+			src = container_of(i, struct btree_node_entry, keys);
-+			dst = max(write_block(b),
-+				  (void *) btree_bkey_last(b, t - 1));
-+		}
-+
-+		if (src != dst)
-+			ret = true;
-+
-+		if (!should_compact_bset(b, t, ret, mode)) {
-+			if (src != dst) {
-+				memmove(dst, src, sizeof(*src) +
-+					le16_to_cpu(src->keys.u64s) *
-+					sizeof(u64));
-+				i = &dst->keys;
-+				set_btree_bset(b, t, i);
-+			}
-+			continue;
-+		}
-+
-+		start	= btree_bkey_first(b, t);
-+		end	= btree_bkey_last(b, t);
-+
-+		if (src != dst) {
-+			memmove(dst, src, sizeof(*src));
-+			i = &dst->keys;
-+			set_btree_bset(b, t, i);
-+		}
-+
-+		out = i->start;
-+
-+		for (k = start; k != end; k = n) {
-+			n = bkey_next_skip_noops(k, end);
-+
-+			if (!bkey_whiteout(k)) {
-+				bkey_copy(out, k);
-+				out = bkey_next(out);
-+			} else {
-+				BUG_ON(k->needs_whiteout);
-+			}
-+		}
-+
-+		i->u64s = cpu_to_le16((u64 *) out - i->_data);
-+		set_btree_bset_end(b, t);
-+		bch2_bset_set_no_aux_tree(b, t);
-+		ret = true;
-+	}
-+
-+	bch2_verify_btree_nr_keys(b);
-+
-+	bch2_btree_build_aux_trees(b);
-+
-+	return ret;
-+}
-+
-+bool bch2_compact_whiteouts(struct bch_fs *c, struct btree *b,
-+			    enum compact_mode mode)
-+{
-+	return !btree_node_old_extent_overwrite(b)
-+		? bch2_drop_whiteouts(b, mode)
-+		: bch2_compact_extent_whiteouts(c, b, mode);
-+}
-+
-+static void btree_node_sort(struct bch_fs *c, struct btree *b,
-+			    struct btree_iter *iter,
-+			    unsigned start_idx,
-+			    unsigned end_idx,
-+			    bool filter_whiteouts)
-+{
-+	struct btree_node *out;
-+	struct sort_iter sort_iter;
-+	struct bset_tree *t;
-+	struct bset *start_bset = bset(b, &b->set[start_idx]);
-+	bool used_mempool = false;
-+	u64 start_time, seq = 0;
-+	unsigned i, u64s = 0, bytes, shift = end_idx - start_idx - 1;
-+	bool sorting_entire_node = start_idx == 0 &&
-+		end_idx == b->nsets;
-+
-+	sort_iter_init(&sort_iter, b);
-+
-+	for (t = b->set + start_idx;
-+	     t < b->set + end_idx;
-+	     t++) {
-+		u64s += le16_to_cpu(bset(b, t)->u64s);
-+		sort_iter_add(&sort_iter,
-+			      btree_bkey_first(b, t),
-+			      btree_bkey_last(b, t));
-+	}
-+
-+	bytes = sorting_entire_node
-+		? btree_bytes(c)
-+		: __vstruct_bytes(struct btree_node, u64s);
-+
-+	out = btree_bounce_alloc(c, bytes, &used_mempool);
-+
-+	start_time = local_clock();
-+
-+	if (btree_node_old_extent_overwrite(b))
-+		filter_whiteouts = bset_written(b, start_bset);
-+
-+	u64s = (btree_node_old_extent_overwrite(b)
-+		? bch2_sort_extents
-+		: bch2_sort_keys)(out->keys.start,
-+				  &sort_iter,
-+				  filter_whiteouts);
-+
-+	out->keys.u64s = cpu_to_le16(u64s);
-+
-+	BUG_ON(vstruct_end(&out->keys) > (void *) out + bytes);
-+
-+	if (sorting_entire_node)
-+		bch2_time_stats_update(&c->times[BCH_TIME_btree_node_sort],
-+				       start_time);
-+
-+	/* Make sure we preserve bset journal_seq: */
-+	for (t = b->set + start_idx; t < b->set + end_idx; t++)
-+		seq = max(seq, le64_to_cpu(bset(b, t)->journal_seq));
-+	start_bset->journal_seq = cpu_to_le64(seq);
-+
-+	if (sorting_entire_node) {
-+		unsigned u64s = le16_to_cpu(out->keys.u64s);
-+
-+		BUG_ON(bytes != btree_bytes(c));
-+
-+		/*
-+		 * Our temporary buffer is the same size as the btree node's
-+		 * buffer, we can just swap buffers instead of doing a big
-+		 * memcpy()
-+		 */
-+		*out = *b->data;
-+		out->keys.u64s = cpu_to_le16(u64s);
-+		swap(out, b->data);
-+		set_btree_bset(b, b->set, &b->data->keys);
-+	} else {
-+		start_bset->u64s = out->keys.u64s;
-+		memcpy_u64s(start_bset->start,
-+			    out->keys.start,
-+			    le16_to_cpu(out->keys.u64s));
-+	}
-+
-+	for (i = start_idx + 1; i < end_idx; i++)
-+		b->nr.bset_u64s[start_idx] +=
-+			b->nr.bset_u64s[i];
-+
-+	b->nsets -= shift;
-+
-+	for (i = start_idx + 1; i < b->nsets; i++) {
-+		b->nr.bset_u64s[i]	= b->nr.bset_u64s[i + shift];
-+		b->set[i]		= b->set[i + shift];
-+	}
-+
-+	for (i = b->nsets; i < MAX_BSETS; i++)
-+		b->nr.bset_u64s[i] = 0;
-+
-+	set_btree_bset_end(b, &b->set[start_idx]);
-+	bch2_bset_set_no_aux_tree(b, &b->set[start_idx]);
-+
-+	btree_bounce_free(c, bytes, used_mempool, out);
-+
-+	bch2_verify_btree_nr_keys(b);
-+}
-+
-+void bch2_btree_sort_into(struct bch_fs *c,
-+			 struct btree *dst,
-+			 struct btree *src)
-+{
-+	struct btree_nr_keys nr;
-+	struct btree_node_iter src_iter;
-+	u64 start_time = local_clock();
-+
-+	BUG_ON(dst->nsets != 1);
-+
-+	bch2_bset_set_no_aux_tree(dst, dst->set);
-+
-+	bch2_btree_node_iter_init_from_start(&src_iter, src);
-+
-+	if (btree_node_is_extents(src))
-+		nr = bch2_sort_repack_merge(c, btree_bset_first(dst),
-+				src, &src_iter,
-+				&dst->format,
-+				true);
-+	else
-+		nr = bch2_sort_repack(btree_bset_first(dst),
-+				src, &src_iter,
-+				&dst->format,
-+				true);
-+
-+	bch2_time_stats_update(&c->times[BCH_TIME_btree_node_sort],
-+			       start_time);
-+
-+	set_btree_bset_end(dst, dst->set);
-+
-+	dst->nr.live_u64s	+= nr.live_u64s;
-+	dst->nr.bset_u64s[0]	+= nr.bset_u64s[0];
-+	dst->nr.packed_keys	+= nr.packed_keys;
-+	dst->nr.unpacked_keys	+= nr.unpacked_keys;
-+
-+	bch2_verify_btree_nr_keys(dst);
-+}
-+
-+#define SORT_CRIT	(4096 / sizeof(u64))
-+
-+/*
-+ * We're about to add another bset to the btree node, so if there's currently
-+ * too many bsets - sort some of them together:
-+ */
-+static bool btree_node_compact(struct bch_fs *c, struct btree *b,
-+			       struct btree_iter *iter)
-+{
-+	unsigned unwritten_idx;
-+	bool ret = false;
-+
-+	for (unwritten_idx = 0;
-+	     unwritten_idx < b->nsets;
-+	     unwritten_idx++)
-+		if (!bset_written(b, bset(b, &b->set[unwritten_idx])))
-+			break;
-+
-+	if (b->nsets - unwritten_idx > 1) {
-+		btree_node_sort(c, b, iter, unwritten_idx,
-+				b->nsets, false);
-+		ret = true;
-+	}
-+
-+	if (unwritten_idx > 1) {
-+		btree_node_sort(c, b, iter, 0, unwritten_idx, false);
-+		ret = true;
-+	}
-+
-+	return ret;
-+}
-+
-+void bch2_btree_build_aux_trees(struct btree *b)
-+{
-+	struct bset_tree *t;
-+
-+	for_each_bset(b, t)
-+		bch2_bset_build_aux_tree(b, t,
-+				!bset_written(b, bset(b, t)) &&
-+				t == bset_tree_last(b));
-+}
-+
-+/*
-+ * @bch_btree_init_next - initialize a new (unwritten) bset that can then be
-+ * inserted into
-+ *
-+ * Safe to call if there already is an unwritten bset - will only add a new bset
-+ * if @b doesn't already have one.
-+ *
-+ * Returns true if we sorted (i.e. invalidated iterators
-+ */
-+void bch2_btree_init_next(struct bch_fs *c, struct btree *b,
-+			  struct btree_iter *iter)
-+{
-+	struct btree_node_entry *bne;
-+	bool did_sort;
-+
-+	EBUG_ON(!(b->c.lock.state.seq & 1));
-+	EBUG_ON(iter && iter->l[b->c.level].b != b);
-+
-+	did_sort = btree_node_compact(c, b, iter);
-+
-+	bne = want_new_bset(c, b);
-+	if (bne)
-+		bch2_bset_init_next(c, b, bne);
-+
-+	bch2_btree_build_aux_trees(b);
-+
-+	if (iter && did_sort)
-+		bch2_btree_iter_reinit_node(iter, b);
-+}
-+
-+static void btree_err_msg(struct printbuf *out, struct bch_fs *c,
-+			  struct btree *b, struct bset *i,
-+			  unsigned offset, int write)
-+{
-+	pr_buf(out, "error validating btree node %sat btree %u level %u/%u\n"
-+	       "pos ",
-+	       write ? "before write " : "",
-+	       b->c.btree_id, b->c.level,
-+	       c->btree_roots[b->c.btree_id].level);
-+	bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key));
-+
-+	pr_buf(out, " node offset %u", b->written);
-+	if (i)
-+		pr_buf(out, " bset u64s %u", le16_to_cpu(i->u64s));
-+}
-+
-+enum btree_err_type {
-+	BTREE_ERR_FIXABLE,
-+	BTREE_ERR_WANT_RETRY,
-+	BTREE_ERR_MUST_RETRY,
-+	BTREE_ERR_FATAL,
-+};
-+
-+enum btree_validate_ret {
-+	BTREE_RETRY_READ = 64,
-+};
-+
-+#define btree_err(type, c, b, i, msg, ...)				\
-+({									\
-+	__label__ out;							\
-+	char _buf[300];							\
-+	struct printbuf out = PBUF(_buf);				\
-+									\
-+	btree_err_msg(&out, c, b, i, b->written, write);		\
-+	pr_buf(&out, ": " msg, ##__VA_ARGS__);				\
-+									\
-+	if (type == BTREE_ERR_FIXABLE &&				\
-+	    write == READ &&						\
-+	    !test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)) {		\
-+		mustfix_fsck_err(c, "%s", _buf);			\
-+		goto out;						\
-+	}								\
-+									\
-+	switch (write) {						\
-+	case READ:							\
-+		bch_err(c, "%s", _buf);					\
-+									\
-+		switch (type) {						\
-+		case BTREE_ERR_FIXABLE:					\
-+			ret = BCH_FSCK_ERRORS_NOT_FIXED;		\
-+			goto fsck_err;					\
-+		case BTREE_ERR_WANT_RETRY:				\
-+			if (have_retry) {				\
-+				ret = BTREE_RETRY_READ;			\
-+				goto fsck_err;				\
-+			}						\
-+			break;						\
-+		case BTREE_ERR_MUST_RETRY:				\
-+			ret = BTREE_RETRY_READ;				\
-+			goto fsck_err;					\
-+		case BTREE_ERR_FATAL:					\
-+			ret = BCH_FSCK_ERRORS_NOT_FIXED;		\
-+			goto fsck_err;					\
-+		}							\
-+		break;							\
-+	case WRITE:							\
-+		bch_err(c, "corrupt metadata before write: %s", _buf);	\
-+									\
-+		if (bch2_fs_inconsistent(c)) {				\
-+			ret = BCH_FSCK_ERRORS_NOT_FIXED;		\
-+			goto fsck_err;					\
-+		}							\
-+		break;							\
-+	}								\
-+out:									\
-+	true;								\
-+})
-+
-+#define btree_err_on(cond, ...)	((cond) ? btree_err(__VA_ARGS__) : false)
-+
-+static int validate_bset(struct bch_fs *c, struct btree *b,
-+			 struct bset *i, unsigned sectors,
-+			 int write, bool have_retry)
-+{
-+	unsigned version = le16_to_cpu(i->version);
-+	const char *err;
-+	int ret = 0;
-+
-+	btree_err_on((version != BCH_BSET_VERSION_OLD &&
-+		      version < bcachefs_metadata_version_min) ||
-+		     version >= bcachefs_metadata_version_max,
-+		     BTREE_ERR_FATAL, c, b, i,
-+		     "unsupported bset version");
-+
-+	if (btree_err_on(b->written + sectors > c->opts.btree_node_size,
-+			 BTREE_ERR_FIXABLE, c, b, i,
-+			 "bset past end of btree node")) {
-+		i->u64s = 0;
-+		return 0;
-+	}
-+
-+	btree_err_on(b->written && !i->u64s,
-+		     BTREE_ERR_FIXABLE, c, b, i,
-+		     "empty bset");
-+
-+	if (!b->written) {
-+		struct btree_node *bn =
-+			container_of(i, struct btree_node, keys);
-+		/* These indicate that we read the wrong btree node: */
-+
-+		if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
-+			struct bch_btree_ptr_v2 *bp =
-+				&bkey_i_to_btree_ptr_v2(&b->key)->v;
-+
-+			/* XXX endianness */
-+			btree_err_on(bp->seq != bn->keys.seq,
-+				     BTREE_ERR_MUST_RETRY, c, b, NULL,
-+				     "incorrect sequence number (wrong btree node)");
-+		}
-+
-+		btree_err_on(BTREE_NODE_ID(bn) != b->c.btree_id,
-+			     BTREE_ERR_MUST_RETRY, c, b, i,
-+			     "incorrect btree id");
-+
-+		btree_err_on(BTREE_NODE_LEVEL(bn) != b->c.level,
-+			     BTREE_ERR_MUST_RETRY, c, b, i,
-+			     "incorrect level");
-+
-+		if (BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN) {
-+			u64 *p = (u64 *) &bn->ptr;
-+
-+			*p = swab64(*p);
-+		}
-+
-+		if (!write)
-+			compat_btree_node(b->c.level, b->c.btree_id, version,
-+					  BSET_BIG_ENDIAN(i), write, bn);
-+
-+		if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
-+			struct bch_btree_ptr_v2 *bp =
-+				&bkey_i_to_btree_ptr_v2(&b->key)->v;
-+
-+			btree_err_on(bkey_cmp(b->data->min_key, bp->min_key),
-+				     BTREE_ERR_MUST_RETRY, c, b, NULL,
-+				     "incorrect min_key: got %llu:%llu should be %llu:%llu",
-+				     b->data->min_key.inode,
-+				     b->data->min_key.offset,
-+				     bp->min_key.inode,
-+				     bp->min_key.offset);
-+		}
-+
-+		btree_err_on(bkey_cmp(bn->max_key, b->key.k.p),
-+			     BTREE_ERR_MUST_RETRY, c, b, i,
-+			     "incorrect max key");
-+
-+		if (write)
-+			compat_btree_node(b->c.level, b->c.btree_id, version,
-+					  BSET_BIG_ENDIAN(i), write, bn);
-+
-+		/* XXX: ideally we would be validating min_key too */
-+#if 0
-+		/*
-+		 * not correct anymore, due to btree node write error
-+		 * handling
-+		 *
-+		 * need to add bn->seq to btree keys and verify
-+		 * against that
-+		 */
-+		btree_err_on(!extent_contains_ptr(bkey_i_to_s_c_extent(&b->key),
-+						  bn->ptr),
-+			     BTREE_ERR_FATAL, c, b, i,
-+			     "incorrect backpointer");
-+#endif
-+		err = bch2_bkey_format_validate(&bn->format);
-+		btree_err_on(err,
-+			     BTREE_ERR_FATAL, c, b, i,
-+			     "invalid bkey format: %s", err);
-+
-+		compat_bformat(b->c.level, b->c.btree_id, version,
-+			       BSET_BIG_ENDIAN(i), write,
-+			       &bn->format);
-+	}
-+fsck_err:
-+	return ret;
-+}
-+
-+static int validate_bset_keys(struct bch_fs *c, struct btree *b,
-+			 struct bset *i, unsigned *whiteout_u64s,
-+			 int write, bool have_retry)
-+{
-+	unsigned version = le16_to_cpu(i->version);
-+	struct bkey_packed *k, *prev = NULL;
-+	bool seen_non_whiteout = false;
-+	int ret = 0;
-+
-+	if (!BSET_SEPARATE_WHITEOUTS(i)) {
-+		seen_non_whiteout = true;
-+		*whiteout_u64s = 0;
-+	}
-+
-+	for (k = i->start;
-+	     k != vstruct_last(i);) {
-+		struct bkey_s u;
-+		struct bkey tmp;
-+		const char *invalid;
-+
-+		if (btree_err_on(bkey_next(k) > vstruct_last(i),
-+				 BTREE_ERR_FIXABLE, c, b, i,
-+				 "key extends past end of bset")) {
-+			i->u64s = cpu_to_le16((u64 *) k - i->_data);
-+			break;
-+		}
-+
-+		if (btree_err_on(k->format > KEY_FORMAT_CURRENT,
-+				 BTREE_ERR_FIXABLE, c, b, i,
-+				 "invalid bkey format %u", k->format)) {
-+			i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
-+			memmove_u64s_down(k, bkey_next(k),
-+					  (u64 *) vstruct_end(i) - (u64 *) k);
-+			continue;
-+		}
-+
-+		/* XXX: validate k->u64s */
-+		if (!write)
-+			bch2_bkey_compat(b->c.level, b->c.btree_id, version,
-+				    BSET_BIG_ENDIAN(i), write,
-+				    &b->format, k);
-+
-+		u = __bkey_disassemble(b, k, &tmp);
-+
-+		invalid = __bch2_bkey_invalid(c, u.s_c, btree_node_type(b)) ?:
-+			bch2_bkey_in_btree_node(b, u.s_c) ?:
-+			(write ? bch2_bkey_val_invalid(c, u.s_c) : NULL);
-+		if (invalid) {
-+			char buf[160];
-+
-+			bch2_bkey_val_to_text(&PBUF(buf), c, u.s_c);
-+			btree_err(BTREE_ERR_FIXABLE, c, b, i,
-+				  "invalid bkey:\n%s\n%s", invalid, buf);
-+
-+			i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
-+			memmove_u64s_down(k, bkey_next(k),
-+					  (u64 *) vstruct_end(i) - (u64 *) k);
-+			continue;
-+		}
-+
-+		if (write)
-+			bch2_bkey_compat(b->c.level, b->c.btree_id, version,
-+				    BSET_BIG_ENDIAN(i), write,
-+				    &b->format, k);
-+
-+		/*
-+		 * with the separate whiteouts thing (used for extents), the
-+		 * second set of keys actually can have whiteouts too, so we
-+		 * can't solely go off bkey_whiteout()...
-+		 */
-+
-+		if (!seen_non_whiteout &&
-+		    (!bkey_whiteout(k) ||
-+		     (prev && bkey_iter_cmp(b, prev, k) > 0))) {
-+			*whiteout_u64s = k->_data - i->_data;
-+			seen_non_whiteout = true;
-+		} else if (prev && bkey_iter_cmp(b, prev, k) > 0) {
-+			char buf1[80];
-+			char buf2[80];
-+			struct bkey up = bkey_unpack_key(b, prev);
-+
-+			bch2_bkey_to_text(&PBUF(buf1), &up);
-+			bch2_bkey_to_text(&PBUF(buf2), u.k);
-+
-+			bch2_dump_bset(c, b, i, 0);
-+			btree_err(BTREE_ERR_FATAL, c, b, i,
-+				  "keys out of order: %s > %s",
-+				  buf1, buf2);
-+			/* XXX: repair this */
-+		}
-+
-+		prev = k;
-+		k = bkey_next_skip_noops(k, vstruct_last(i));
-+	}
-+fsck_err:
-+	return ret;
-+}
-+
-+int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry)
-+{
-+	struct btree_node_entry *bne;
-+	struct sort_iter *iter;
-+	struct btree_node *sorted;
-+	struct bkey_packed *k;
-+	struct bch_extent_ptr *ptr;
-+	struct bset *i;
-+	bool used_mempool, blacklisted;
-+	unsigned u64s;
-+	int ret, retry_read = 0, write = READ;
-+
-+	iter = mempool_alloc(&c->fill_iter, GFP_NOIO);
-+	sort_iter_init(iter, b);
-+	iter->size = (btree_blocks(c) + 1) * 2;
-+
-+	if (bch2_meta_read_fault("btree"))
-+		btree_err(BTREE_ERR_MUST_RETRY, c, b, NULL,
-+			  "dynamic fault");
-+
-+	btree_err_on(le64_to_cpu(b->data->magic) != bset_magic(c),
-+		     BTREE_ERR_MUST_RETRY, c, b, NULL,
-+		     "bad magic");
-+
-+	btree_err_on(!b->data->keys.seq,
-+		     BTREE_ERR_MUST_RETRY, c, b, NULL,
-+		     "bad btree header");
-+
-+	if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
-+		struct bch_btree_ptr_v2 *bp =
-+			&bkey_i_to_btree_ptr_v2(&b->key)->v;
-+
-+		btree_err_on(b->data->keys.seq != bp->seq,
-+			     BTREE_ERR_MUST_RETRY, c, b, NULL,
-+			     "got wrong btree node (seq %llx want %llx)",
-+			     b->data->keys.seq, bp->seq);
-+	}
-+
-+	while (b->written < c->opts.btree_node_size) {
-+		unsigned sectors, whiteout_u64s = 0;
-+		struct nonce nonce;
-+		struct bch_csum csum;
-+		bool first = !b->written;
-+
-+		if (!b->written) {
-+			i = &b->data->keys;
-+
-+			btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)),
-+				     BTREE_ERR_WANT_RETRY, c, b, i,
-+				     "unknown checksum type");
-+
-+			nonce = btree_nonce(i, b->written << 9);
-+			csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data);
-+
-+			btree_err_on(bch2_crc_cmp(csum, b->data->csum),
-+				     BTREE_ERR_WANT_RETRY, c, b, i,
-+				     "invalid checksum");
-+
-+			bset_encrypt(c, i, b->written << 9);
-+
-+			if (btree_node_is_extents(b) &&
-+			    !BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data)) {
-+				set_btree_node_old_extent_overwrite(b);
-+				set_btree_node_need_rewrite(b);
-+			}
-+
-+			sectors = vstruct_sectors(b->data, c->block_bits);
-+		} else {
-+			bne = write_block(b);
-+			i = &bne->keys;
-+
-+			if (i->seq != b->data->keys.seq)
-+				break;
-+
-+			btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)),
-+				     BTREE_ERR_WANT_RETRY, c, b, i,
-+				     "unknown checksum type");
-+
-+			nonce = btree_nonce(i, b->written << 9);
-+			csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
-+
-+			btree_err_on(bch2_crc_cmp(csum, bne->csum),
-+				     BTREE_ERR_WANT_RETRY, c, b, i,
-+				     "invalid checksum");
-+
-+			bset_encrypt(c, i, b->written << 9);
-+
-+			sectors = vstruct_sectors(bne, c->block_bits);
-+		}
-+
-+		ret = validate_bset(c, b, i, sectors,
-+				    READ, have_retry);
-+		if (ret)
-+			goto fsck_err;
-+
-+		if (!b->written)
-+			btree_node_set_format(b, b->data->format);
-+
-+		ret = validate_bset_keys(c, b, i, &whiteout_u64s,
-+				    READ, have_retry);
-+		if (ret)
-+			goto fsck_err;
-+
-+		SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN);
-+
-+		b->written += sectors;
-+
-+		blacklisted = bch2_journal_seq_is_blacklisted(c,
-+					le64_to_cpu(i->journal_seq),
-+					true);
-+
-+		btree_err_on(blacklisted && first,
-+			     BTREE_ERR_FIXABLE, c, b, i,
-+			     "first btree node bset has blacklisted journal seq");
-+		if (blacklisted && !first)
-+			continue;
-+
-+		sort_iter_add(iter, i->start,
-+			      vstruct_idx(i, whiteout_u64s));
-+
-+		sort_iter_add(iter,
-+			      vstruct_idx(i, whiteout_u64s),
-+			      vstruct_last(i));
-+	}
-+
-+	for (bne = write_block(b);
-+	     bset_byte_offset(b, bne) < btree_bytes(c);
-+	     bne = (void *) bne + block_bytes(c))
-+		btree_err_on(bne->keys.seq == b->data->keys.seq,
-+			     BTREE_ERR_WANT_RETRY, c, b, NULL,
-+			     "found bset signature after last bset");
-+
-+	sorted = btree_bounce_alloc(c, btree_bytes(c), &used_mempool);
-+	sorted->keys.u64s = 0;
-+
-+	set_btree_bset(b, b->set, &b->data->keys);
-+
-+	b->nr = (btree_node_old_extent_overwrite(b)
-+		 ? bch2_extent_sort_fix_overlapping
-+		 : bch2_key_sort_fix_overlapping)(c, &sorted->keys, iter);
-+
-+	u64s = le16_to_cpu(sorted->keys.u64s);
-+	*sorted = *b->data;
-+	sorted->keys.u64s = cpu_to_le16(u64s);
-+	swap(sorted, b->data);
-+	set_btree_bset(b, b->set, &b->data->keys);
-+	b->nsets = 1;
-+
-+	BUG_ON(b->nr.live_u64s != u64s);
-+
-+	btree_bounce_free(c, btree_bytes(c), used_mempool, sorted);
-+
-+	i = &b->data->keys;
-+	for (k = i->start; k != vstruct_last(i);) {
-+		struct bkey tmp;
-+		struct bkey_s u = __bkey_disassemble(b, k, &tmp);
-+		const char *invalid = bch2_bkey_val_invalid(c, u.s_c);
-+
-+		if (invalid ||
-+		    (inject_invalid_keys(c) &&
-+		     !bversion_cmp(u.k->version, MAX_VERSION))) {
-+			char buf[160];
-+
-+			bch2_bkey_val_to_text(&PBUF(buf), c, u.s_c);
-+			btree_err(BTREE_ERR_FIXABLE, c, b, i,
-+				  "invalid bkey %s: %s", buf, invalid);
-+
-+			btree_keys_account_key_drop(&b->nr, 0, k);
-+
-+			i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
-+			memmove_u64s_down(k, bkey_next(k),
-+					  (u64 *) vstruct_end(i) - (u64 *) k);
-+			set_btree_bset_end(b, b->set);
-+			continue;
-+		}
-+
-+		if (u.k->type == KEY_TYPE_btree_ptr_v2) {
-+			struct bkey_s_btree_ptr_v2 bp = bkey_s_to_btree_ptr_v2(u);
-+
-+			bp.v->mem_ptr = 0;
-+		}
-+
-+		k = bkey_next_skip_noops(k, vstruct_last(i));
-+	}
-+
-+	bch2_bset_build_aux_tree(b, b->set, false);
-+
-+	set_needs_whiteout(btree_bset_first(b), true);
-+
-+	btree_node_reset_sib_u64s(b);
-+
-+	bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&b->key)), ptr) {
-+		struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-+
-+		if (ca->mi.state != BCH_MEMBER_STATE_RW)
-+			set_btree_node_need_rewrite(b);
-+	}
-+out:
-+	mempool_free(iter, &c->fill_iter);
-+	return retry_read;
-+fsck_err:
-+	if (ret == BTREE_RETRY_READ) {
-+		retry_read = 1;
-+	} else {
-+		bch2_inconsistent_error(c);
-+		set_btree_node_read_error(b);
-+	}
-+	goto out;
-+}
-+
-+static void btree_node_read_work(struct work_struct *work)
-+{
-+	struct btree_read_bio *rb =
-+		container_of(work, struct btree_read_bio, work);
-+	struct bch_fs *c	= rb->c;
-+	struct bch_dev *ca	= bch_dev_bkey_exists(c, rb->pick.ptr.dev);
-+	struct btree *b		= rb->bio.bi_private;
-+	struct bio *bio		= &rb->bio;
-+	struct bch_io_failures failed = { .nr = 0 };
-+	bool can_retry;
-+
-+	goto start;
-+	while (1) {
-+		bch_info(c, "retrying read");
-+		ca = bch_dev_bkey_exists(c, rb->pick.ptr.dev);
-+		rb->have_ioref		= bch2_dev_get_ioref(ca, READ);
-+		bio_reset(bio);
-+		bio->bi_opf		= REQ_OP_READ|REQ_SYNC|REQ_META;
-+		bio->bi_iter.bi_sector	= rb->pick.ptr.offset;
-+		bio->bi_iter.bi_size	= btree_bytes(c);
-+
-+		if (rb->have_ioref) {
-+			bio_set_dev(bio, ca->disk_sb.bdev);
-+			submit_bio_wait(bio);
-+		} else {
-+			bio->bi_status = BLK_STS_REMOVED;
-+		}
-+start:
-+		bch2_dev_io_err_on(bio->bi_status, ca, "btree read: %s",
-+				   bch2_blk_status_to_str(bio->bi_status));
-+		if (rb->have_ioref)
-+			percpu_ref_put(&ca->io_ref);
-+		rb->have_ioref = false;
-+
-+		bch2_mark_io_failure(&failed, &rb->pick);
-+
-+		can_retry = bch2_bkey_pick_read_device(c,
-+				bkey_i_to_s_c(&b->key),
-+				&failed, &rb->pick) > 0;
-+
-+		if (!bio->bi_status &&
-+		    !bch2_btree_node_read_done(c, b, can_retry))
-+			break;
-+
-+		if (!can_retry) {
-+			set_btree_node_read_error(b);
-+			break;
-+		}
-+	}
-+
-+	bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read],
-+			       rb->start_time);
-+	bio_put(&rb->bio);
-+	clear_btree_node_read_in_flight(b);
-+	wake_up_bit(&b->flags, BTREE_NODE_read_in_flight);
-+}
-+
-+static void btree_node_read_endio(struct bio *bio)
-+{
-+	struct btree_read_bio *rb =
-+		container_of(bio, struct btree_read_bio, bio);
-+	struct bch_fs *c	= rb->c;
-+
-+	if (rb->have_ioref) {
-+		struct bch_dev *ca = bch_dev_bkey_exists(c, rb->pick.ptr.dev);
-+		bch2_latency_acct(ca, rb->start_time, READ);
-+	}
-+
-+	queue_work(system_unbound_wq, &rb->work);
-+}
-+
-+void bch2_btree_node_read(struct bch_fs *c, struct btree *b,
-+			  bool sync)
-+{
-+	struct extent_ptr_decoded pick;
-+	struct btree_read_bio *rb;
-+	struct bch_dev *ca;
-+	struct bio *bio;
-+	int ret;
-+
-+	trace_btree_read(c, b);
-+
-+	ret = bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key),
-+					 NULL, &pick);
-+	if (bch2_fs_fatal_err_on(ret <= 0, c,
-+			"btree node read error: no device to read from")) {
-+		set_btree_node_read_error(b);
-+		return;
-+	}
-+
-+	ca = bch_dev_bkey_exists(c, pick.ptr.dev);
-+
-+	bio = bio_alloc_bioset(GFP_NOIO, buf_pages(b->data,
-+						   btree_bytes(c)),
-+			       &c->btree_bio);
-+	rb = container_of(bio, struct btree_read_bio, bio);
-+	rb->c			= c;
-+	rb->start_time		= local_clock();
-+	rb->have_ioref		= bch2_dev_get_ioref(ca, READ);
-+	rb->pick		= pick;
-+	INIT_WORK(&rb->work, btree_node_read_work);
-+	bio->bi_opf		= REQ_OP_READ|REQ_SYNC|REQ_META;
-+	bio->bi_iter.bi_sector	= pick.ptr.offset;
-+	bio->bi_end_io		= btree_node_read_endio;
-+	bio->bi_private		= b;
-+	bch2_bio_map(bio, b->data, btree_bytes(c));
-+
-+	set_btree_node_read_in_flight(b);
-+
-+	if (rb->have_ioref) {
-+		this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_btree],
-+			     bio_sectors(bio));
-+		bio_set_dev(bio, ca->disk_sb.bdev);
-+
-+		if (sync) {
-+			submit_bio_wait(bio);
-+
-+			bio->bi_private	= b;
-+			btree_node_read_work(&rb->work);
-+		} else {
-+			submit_bio(bio);
-+		}
-+	} else {
-+		bio->bi_status = BLK_STS_REMOVED;
-+
-+		if (sync)
-+			btree_node_read_work(&rb->work);
-+		else
-+			queue_work(system_unbound_wq, &rb->work);
-+
-+	}
-+}
-+
-+int bch2_btree_root_read(struct bch_fs *c, enum btree_id id,
-+			const struct bkey_i *k, unsigned level)
-+{
-+	struct closure cl;
-+	struct btree *b;
-+	int ret;
-+
-+	closure_init_stack(&cl);
-+
-+	do {
-+		ret = bch2_btree_cache_cannibalize_lock(c, &cl);
-+		closure_sync(&cl);
-+	} while (ret);
-+
-+	b = bch2_btree_node_mem_alloc(c);
-+	bch2_btree_cache_cannibalize_unlock(c);
-+
-+	BUG_ON(IS_ERR(b));
-+
-+	bkey_copy(&b->key, k);
-+	BUG_ON(bch2_btree_node_hash_insert(&c->btree_cache, b, level, id));
-+
-+	bch2_btree_node_read(c, b, true);
-+
-+	if (btree_node_read_error(b)) {
-+		bch2_btree_node_hash_remove(&c->btree_cache, b);
-+
-+		mutex_lock(&c->btree_cache.lock);
-+		list_move(&b->list, &c->btree_cache.freeable);
-+		mutex_unlock(&c->btree_cache.lock);
-+
-+		ret = -EIO;
-+		goto err;
-+	}
-+
-+	bch2_btree_set_root_for_read(c, b);
-+err:
-+	six_unlock_write(&b->c.lock);
-+	six_unlock_intent(&b->c.lock);
-+
-+	return ret;
-+}
-+
-+void bch2_btree_complete_write(struct bch_fs *c, struct btree *b,
-+			      struct btree_write *w)
-+{
-+	unsigned long old, new, v = READ_ONCE(b->will_make_reachable);
-+
-+	do {
-+		old = new = v;
-+		if (!(old & 1))
-+			break;
-+
-+		new &= ~1UL;
-+	} while ((v = cmpxchg(&b->will_make_reachable, old, new)) != old);
-+
-+	if (old & 1)
-+		closure_put(&((struct btree_update *) new)->cl);
-+
-+	bch2_journal_pin_drop(&c->journal, &w->journal);
-+}
-+
-+static void btree_node_write_done(struct bch_fs *c, struct btree *b)
-+{
-+	struct btree_write *w = btree_prev_write(b);
-+
-+	bch2_btree_complete_write(c, b, w);
-+	btree_node_io_unlock(b);
-+}
-+
-+static void bch2_btree_node_write_error(struct bch_fs *c,
-+					struct btree_write_bio *wbio)
-+{
-+	struct btree *b		= wbio->wbio.bio.bi_private;
-+	__BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp;
-+	struct bch_extent_ptr *ptr;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = bch2_trans_get_node_iter(&trans, b->c.btree_id, b->key.k.p,
-+					BTREE_MAX_DEPTH, b->c.level, 0);
-+retry:
-+	ret = bch2_btree_iter_traverse(iter);
-+	if (ret)
-+		goto err;
-+
-+	/* has node been freed? */
-+	if (iter->l[b->c.level].b != b) {
-+		/* node has been freed: */
-+		BUG_ON(!btree_node_dying(b));
-+		goto out;
-+	}
-+
-+	BUG_ON(!btree_node_hashed(b));
-+
-+	bkey_copy(&tmp.k, &b->key);
-+
-+	bch2_bkey_drop_ptrs(bkey_i_to_s(&tmp.k), ptr,
-+		bch2_dev_list_has_dev(wbio->wbio.failed, ptr->dev));
-+
-+	if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(&tmp.k)))
-+		goto err;
-+
-+	ret = bch2_btree_node_update_key(c, iter, b, &tmp.k);
-+	if (ret == -EINTR)
-+		goto retry;
-+	if (ret)
-+		goto err;
-+out:
-+	bch2_trans_exit(&trans);
-+	bio_put(&wbio->wbio.bio);
-+	btree_node_write_done(c, b);
-+	return;
-+err:
-+	set_btree_node_noevict(b);
-+	bch2_fs_fatal_error(c, "fatal error writing btree node");
-+	goto out;
-+}
-+
-+void bch2_btree_write_error_work(struct work_struct *work)
-+{
-+	struct bch_fs *c = container_of(work, struct bch_fs,
-+					btree_write_error_work);
-+	struct bio *bio;
-+
-+	while (1) {
-+		spin_lock_irq(&c->btree_write_error_lock);
-+		bio = bio_list_pop(&c->btree_write_error_list);
-+		spin_unlock_irq(&c->btree_write_error_lock);
-+
-+		if (!bio)
-+			break;
-+
-+		bch2_btree_node_write_error(c,
-+			container_of(bio, struct btree_write_bio, wbio.bio));
-+	}
-+}
-+
-+static void btree_node_write_work(struct work_struct *work)
-+{
-+	struct btree_write_bio *wbio =
-+		container_of(work, struct btree_write_bio, work);
-+	struct bch_fs *c	= wbio->wbio.c;
-+	struct btree *b		= wbio->wbio.bio.bi_private;
-+
-+	btree_bounce_free(c,
-+		wbio->bytes,
-+		wbio->wbio.used_mempool,
-+		wbio->data);
-+
-+	if (wbio->wbio.failed.nr) {
-+		unsigned long flags;
-+
-+		spin_lock_irqsave(&c->btree_write_error_lock, flags);
-+		bio_list_add(&c->btree_write_error_list, &wbio->wbio.bio);
-+		spin_unlock_irqrestore(&c->btree_write_error_lock, flags);
-+
-+		queue_work(c->wq, &c->btree_write_error_work);
-+		return;
-+	}
-+
-+	bio_put(&wbio->wbio.bio);
-+	btree_node_write_done(c, b);
-+}
-+
-+static void btree_node_write_endio(struct bio *bio)
-+{
-+	struct bch_write_bio *wbio	= to_wbio(bio);
-+	struct bch_write_bio *parent	= wbio->split ? wbio->parent : NULL;
-+	struct bch_write_bio *orig	= parent ?: wbio;
-+	struct bch_fs *c		= wbio->c;
-+	struct bch_dev *ca		= bch_dev_bkey_exists(c, wbio->dev);
-+	unsigned long flags;
-+
-+	if (wbio->have_ioref)
-+		bch2_latency_acct(ca, wbio->submit_time, WRITE);
-+
-+	if (bch2_dev_io_err_on(bio->bi_status, ca, "btree write: %s",
-+			       bch2_blk_status_to_str(bio->bi_status)) ||
-+	    bch2_meta_write_fault("btree")) {
-+		spin_lock_irqsave(&c->btree_write_error_lock, flags);
-+		bch2_dev_list_add_dev(&orig->failed, wbio->dev);
-+		spin_unlock_irqrestore(&c->btree_write_error_lock, flags);
-+	}
-+
-+	if (wbio->have_ioref)
-+		percpu_ref_put(&ca->io_ref);
-+
-+	if (parent) {
-+		bio_put(bio);
-+		bio_endio(&parent->bio);
-+	} else {
-+		struct btree_write_bio *wb =
-+			container_of(orig, struct btree_write_bio, wbio);
-+
-+		INIT_WORK(&wb->work, btree_node_write_work);
-+		queue_work(system_unbound_wq, &wb->work);
-+	}
-+}
-+
-+static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
-+				   struct bset *i, unsigned sectors)
-+{
-+	unsigned whiteout_u64s = 0;
-+	int ret;
-+
-+	if (bch2_bkey_invalid(c, bkey_i_to_s_c(&b->key), BKEY_TYPE_BTREE))
-+		return -1;
-+
-+	ret = validate_bset(c, b, i, sectors, WRITE, false) ?:
-+		validate_bset_keys(c, b, i, &whiteout_u64s, WRITE, false);
-+	if (ret)
-+		bch2_inconsistent_error(c);
-+
-+	return ret;
-+}
-+
-+void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
-+			    enum six_lock_type lock_type_held)
-+{
-+	struct btree_write_bio *wbio;
-+	struct bset_tree *t;
-+	struct bset *i;
-+	struct btree_node *bn = NULL;
-+	struct btree_node_entry *bne = NULL;
-+	BKEY_PADDED(key) k;
-+	struct bch_extent_ptr *ptr;
-+	struct sort_iter sort_iter;
-+	struct nonce nonce;
-+	unsigned bytes_to_write, sectors_to_write, bytes, u64s;
-+	u64 seq = 0;
-+	bool used_mempool;
-+	unsigned long old, new;
-+	bool validate_before_checksum = false;
-+	void *data;
-+
-+	if (test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags))
-+		return;
-+
-+	/*
-+	 * We may only have a read lock on the btree node - the dirty bit is our
-+	 * "lock" against racing with other threads that may be trying to start
-+	 * a write, we do a write iff we clear the dirty bit. Since setting the
-+	 * dirty bit requires a write lock, we can't race with other threads
-+	 * redirtying it:
-+	 */
-+	do {
-+		old = new = READ_ONCE(b->flags);
-+
-+		if (!(old & (1 << BTREE_NODE_dirty)))
-+			return;
-+
-+		if (!btree_node_may_write(b))
-+			return;
-+
-+		if (old & (1 << BTREE_NODE_write_in_flight)) {
-+			btree_node_wait_on_io(b);
-+			continue;
-+		}
-+
-+		new &= ~(1 << BTREE_NODE_dirty);
-+		new &= ~(1 << BTREE_NODE_need_write);
-+		new |=  (1 << BTREE_NODE_write_in_flight);
-+		new |=  (1 << BTREE_NODE_just_written);
-+		new ^=  (1 << BTREE_NODE_write_idx);
-+	} while (cmpxchg_acquire(&b->flags, old, new) != old);
-+
-+	BUG_ON(btree_node_fake(b));
-+	BUG_ON((b->will_make_reachable != 0) != !b->written);
-+
-+	BUG_ON(b->written >= c->opts.btree_node_size);
-+	BUG_ON(b->written & (c->opts.block_size - 1));
-+	BUG_ON(bset_written(b, btree_bset_last(b)));
-+	BUG_ON(le64_to_cpu(b->data->magic) != bset_magic(c));
-+	BUG_ON(memcmp(&b->data->format, &b->format, sizeof(b->format)));
-+
-+	bch2_sort_whiteouts(c, b);
-+
-+	sort_iter_init(&sort_iter, b);
-+
-+	bytes = !b->written
-+		? sizeof(struct btree_node)
-+		: sizeof(struct btree_node_entry);
-+
-+	bytes += b->whiteout_u64s * sizeof(u64);
-+
-+	for_each_bset(b, t) {
-+		i = bset(b, t);
-+
-+		if (bset_written(b, i))
-+			continue;
-+
-+		bytes += le16_to_cpu(i->u64s) * sizeof(u64);
-+		sort_iter_add(&sort_iter,
-+			      btree_bkey_first(b, t),
-+			      btree_bkey_last(b, t));
-+		seq = max(seq, le64_to_cpu(i->journal_seq));
-+	}
-+
-+	data = btree_bounce_alloc(c, bytes, &used_mempool);
-+
-+	if (!b->written) {
-+		bn = data;
-+		*bn = *b->data;
-+		i = &bn->keys;
-+	} else {
-+		bne = data;
-+		bne->keys = b->data->keys;
-+		i = &bne->keys;
-+	}
-+
-+	i->journal_seq	= cpu_to_le64(seq);
-+	i->u64s		= 0;
-+
-+	if (!btree_node_old_extent_overwrite(b)) {
-+		sort_iter_add(&sort_iter,
-+			      unwritten_whiteouts_start(c, b),
-+			      unwritten_whiteouts_end(c, b));
-+		SET_BSET_SEPARATE_WHITEOUTS(i, false);
-+	} else {
-+		memcpy_u64s(i->start,
-+			    unwritten_whiteouts_start(c, b),
-+			    b->whiteout_u64s);
-+		i->u64s = cpu_to_le16(b->whiteout_u64s);
-+		SET_BSET_SEPARATE_WHITEOUTS(i, true);
-+	}
-+
-+	b->whiteout_u64s = 0;
-+
-+	u64s = btree_node_old_extent_overwrite(b)
-+		? bch2_sort_extents(vstruct_last(i), &sort_iter, false)
-+		: bch2_sort_keys(i->start, &sort_iter, false);
-+	le16_add_cpu(&i->u64s, u64s);
-+
-+	set_needs_whiteout(i, false);
-+
-+	/* do we have data to write? */
-+	if (b->written && !i->u64s)
-+		goto nowrite;
-+
-+	bytes_to_write = vstruct_end(i) - data;
-+	sectors_to_write = round_up(bytes_to_write, block_bytes(c)) >> 9;
-+
-+	memset(data + bytes_to_write, 0,
-+	       (sectors_to_write << 9) - bytes_to_write);
-+
-+	BUG_ON(b->written + sectors_to_write > c->opts.btree_node_size);
-+	BUG_ON(BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN);
-+	BUG_ON(i->seq != b->data->keys.seq);
-+
-+	i->version = c->sb.version < bcachefs_metadata_version_new_versioning
-+		? cpu_to_le16(BCH_BSET_VERSION_OLD)
-+		: cpu_to_le16(c->sb.version);
-+	SET_BSET_CSUM_TYPE(i, bch2_meta_checksum_type(c));
-+
-+	if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i)))
-+		validate_before_checksum = true;
-+
-+	/* validate_bset will be modifying: */
-+	if (le16_to_cpu(i->version) < bcachefs_metadata_version_max)
-+		validate_before_checksum = true;
-+
-+	/* if we're going to be encrypting, check metadata validity first: */
-+	if (validate_before_checksum &&
-+	    validate_bset_for_write(c, b, i, sectors_to_write))
-+		goto err;
-+
-+	bset_encrypt(c, i, b->written << 9);
-+
-+	nonce = btree_nonce(i, b->written << 9);
-+
-+	if (bn)
-+		bn->csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bn);
-+	else
-+		bne->csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
-+
-+	/* if we're not encrypting, check metadata after checksumming: */
-+	if (!validate_before_checksum &&
-+	    validate_bset_for_write(c, b, i, sectors_to_write))
-+		goto err;
-+
-+	/*
-+	 * We handle btree write errors by immediately halting the journal -
-+	 * after we've done that, we can't issue any subsequent btree writes
-+	 * because they might have pointers to new nodes that failed to write.
-+	 *
-+	 * Furthermore, there's no point in doing any more btree writes because
-+	 * with the journal stopped, we're never going to update the journal to
-+	 * reflect that those writes were done and the data flushed from the
-+	 * journal:
-+	 *
-+	 * Also on journal error, the pending write may have updates that were
-+	 * never journalled (interior nodes, see btree_update_nodes_written()) -
-+	 * it's critical that we don't do the write in that case otherwise we
-+	 * will have updates visible that weren't in the journal:
-+	 *
-+	 * Make sure to update b->written so bch2_btree_init_next() doesn't
-+	 * break:
-+	 */
-+	if (bch2_journal_error(&c->journal) ||
-+	    c->opts.nochanges)
-+		goto err;
-+
-+	trace_btree_write(b, bytes_to_write, sectors_to_write);
-+
-+	wbio = container_of(bio_alloc_bioset(GFP_NOIO,
-+				buf_pages(data, sectors_to_write << 9),
-+				&c->btree_bio),
-+			    struct btree_write_bio, wbio.bio);
-+	wbio_init(&wbio->wbio.bio);
-+	wbio->data			= data;
-+	wbio->bytes			= bytes;
-+	wbio->wbio.used_mempool		= used_mempool;
-+	wbio->wbio.bio.bi_opf		= REQ_OP_WRITE|REQ_META;
-+	wbio->wbio.bio.bi_end_io	= btree_node_write_endio;
-+	wbio->wbio.bio.bi_private	= b;
-+
-+	bch2_bio_map(&wbio->wbio.bio, data, sectors_to_write << 9);
-+
-+	/*
-+	 * If we're appending to a leaf node, we don't technically need FUA -
-+	 * this write just needs to be persisted before the next journal write,
-+	 * which will be marked FLUSH|FUA.
-+	 *
-+	 * Similarly if we're writing a new btree root - the pointer is going to
-+	 * be in the next journal entry.
-+	 *
-+	 * But if we're writing a new btree node (that isn't a root) or
-+	 * appending to a non leaf btree node, we need either FUA or a flush
-+	 * when we write the parent with the new pointer. FUA is cheaper than a
-+	 * flush, and writes appending to leaf nodes aren't blocking anything so
-+	 * just make all btree node writes FUA to keep things sane.
-+	 */
-+
-+	bkey_copy(&k.key, &b->key);
-+
-+	bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&k.key)), ptr)
-+		ptr->offset += b->written;
-+
-+	b->written += sectors_to_write;
-+
-+	/* XXX: submitting IO with btree locks held: */
-+	bch2_submit_wbio_replicas(&wbio->wbio, c, BCH_DATA_btree, &k.key);
-+	return;
-+err:
-+	set_btree_node_noevict(b);
-+	b->written += sectors_to_write;
-+nowrite:
-+	btree_bounce_free(c, bytes, used_mempool, data);
-+	btree_node_write_done(c, b);
-+}
-+
-+/*
-+ * Work that must be done with write lock held:
-+ */
-+bool bch2_btree_post_write_cleanup(struct bch_fs *c, struct btree *b)
-+{
-+	bool invalidated_iter = false;
-+	struct btree_node_entry *bne;
-+	struct bset_tree *t;
-+
-+	if (!btree_node_just_written(b))
-+		return false;
-+
-+	BUG_ON(b->whiteout_u64s);
-+
-+	clear_btree_node_just_written(b);
-+
-+	/*
-+	 * Note: immediately after write, bset_written() doesn't work - the
-+	 * amount of data we had to write after compaction might have been
-+	 * smaller than the offset of the last bset.
-+	 *
-+	 * However, we know that all bsets have been written here, as long as
-+	 * we're still holding the write lock:
-+	 */
-+
-+	/*
-+	 * XXX: decide if we really want to unconditionally sort down to a
-+	 * single bset:
-+	 */
-+	if (b->nsets > 1) {
-+		btree_node_sort(c, b, NULL, 0, b->nsets, true);
-+		invalidated_iter = true;
-+	} else {
-+		invalidated_iter = bch2_drop_whiteouts(b, COMPACT_ALL);
-+	}
-+
-+	for_each_bset(b, t)
-+		set_needs_whiteout(bset(b, t), true);
-+
-+	bch2_btree_verify(c, b);
-+
-+	/*
-+	 * If later we don't unconditionally sort down to a single bset, we have
-+	 * to ensure this is still true:
-+	 */
-+	BUG_ON((void *) btree_bkey_last(b, bset_tree_last(b)) > write_block(b));
-+
-+	bne = want_new_bset(c, b);
-+	if (bne)
-+		bch2_bset_init_next(c, b, bne);
-+
-+	bch2_btree_build_aux_trees(b);
-+
-+	return invalidated_iter;
-+}
-+
-+/*
-+ * Use this one if the node is intent locked:
-+ */
-+void bch2_btree_node_write(struct bch_fs *c, struct btree *b,
-+			  enum six_lock_type lock_type_held)
-+{
-+	BUG_ON(lock_type_held == SIX_LOCK_write);
-+
-+	if (lock_type_held == SIX_LOCK_intent ||
-+	    six_lock_tryupgrade(&b->c.lock)) {
-+		__bch2_btree_node_write(c, b, SIX_LOCK_intent);
-+
-+		/* don't cycle lock unnecessarily: */
-+		if (btree_node_just_written(b) &&
-+		    six_trylock_write(&b->c.lock)) {
-+			bch2_btree_post_write_cleanup(c, b);
-+			six_unlock_write(&b->c.lock);
-+		}
-+
-+		if (lock_type_held == SIX_LOCK_read)
-+			six_lock_downgrade(&b->c.lock);
-+	} else {
-+		__bch2_btree_node_write(c, b, SIX_LOCK_read);
-+	}
-+}
-+
-+static void __bch2_btree_flush_all(struct bch_fs *c, unsigned flag)
-+{
-+	struct bucket_table *tbl;
-+	struct rhash_head *pos;
-+	struct btree *b;
-+	unsigned i;
-+restart:
-+	rcu_read_lock();
-+	for_each_cached_btree(b, c, tbl, i, pos)
-+		if (test_bit(flag, &b->flags)) {
-+			rcu_read_unlock();
-+			wait_on_bit_io(&b->flags, flag, TASK_UNINTERRUPTIBLE);
-+			goto restart;
-+
-+		}
-+	rcu_read_unlock();
-+}
-+
-+void bch2_btree_flush_all_reads(struct bch_fs *c)
-+{
-+	__bch2_btree_flush_all(c, BTREE_NODE_read_in_flight);
-+}
-+
-+void bch2_btree_flush_all_writes(struct bch_fs *c)
-+{
-+	__bch2_btree_flush_all(c, BTREE_NODE_write_in_flight);
-+}
-+
-+void bch2_btree_verify_flushed(struct bch_fs *c)
-+{
-+	struct bucket_table *tbl;
-+	struct rhash_head *pos;
-+	struct btree *b;
-+	unsigned i;
-+
-+	rcu_read_lock();
-+	for_each_cached_btree(b, c, tbl, i, pos) {
-+		unsigned long flags = READ_ONCE(b->flags);
-+
-+		BUG_ON((flags & (1 << BTREE_NODE_dirty)) ||
-+		       (flags & (1 << BTREE_NODE_write_in_flight)));
-+	}
-+	rcu_read_unlock();
-+}
-+
-+void bch2_dirty_btree_nodes_to_text(struct printbuf *out, struct bch_fs *c)
-+{
-+	struct bucket_table *tbl;
-+	struct rhash_head *pos;
-+	struct btree *b;
-+	unsigned i;
-+
-+	rcu_read_lock();
-+	for_each_cached_btree(b, c, tbl, i, pos) {
-+		unsigned long flags = READ_ONCE(b->flags);
-+
-+		if (!(flags & (1 << BTREE_NODE_dirty)))
-+			continue;
-+
-+		pr_buf(out, "%p d %u n %u l %u w %u b %u r %u:%lu\n",
-+		       b,
-+		       (flags & (1 << BTREE_NODE_dirty)) != 0,
-+		       (flags & (1 << BTREE_NODE_need_write)) != 0,
-+		       b->c.level,
-+		       b->written,
-+		       !list_empty_careful(&b->write_blocked),
-+		       b->will_make_reachable != 0,
-+		       b->will_make_reachable & 1);
-+	}
-+	rcu_read_unlock();
-+}
-diff --git a/fs/bcachefs/btree_io.h b/fs/bcachefs/btree_io.h
-new file mode 100644
-index 000000000000..626d0f071b70
---- /dev/null
-+++ b/fs/bcachefs/btree_io.h
-@@ -0,0 +1,220 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BTREE_IO_H
-+#define _BCACHEFS_BTREE_IO_H
-+
-+#include "bkey_methods.h"
-+#include "bset.h"
-+#include "btree_locking.h"
-+#include "checksum.h"
-+#include "extents.h"
-+#include "io_types.h"
-+
-+struct bch_fs;
-+struct btree_write;
-+struct btree;
-+struct btree_iter;
-+
-+struct btree_read_bio {
-+	struct bch_fs		*c;
-+	u64			start_time;
-+	unsigned		have_ioref:1;
-+	struct extent_ptr_decoded	pick;
-+	struct work_struct	work;
-+	struct bio		bio;
-+};
-+
-+struct btree_write_bio {
-+	struct work_struct	work;
-+	void			*data;
-+	unsigned		bytes;
-+	struct bch_write_bio	wbio;
-+};
-+
-+static inline void btree_node_io_unlock(struct btree *b)
-+{
-+	EBUG_ON(!btree_node_write_in_flight(b));
-+	clear_btree_node_write_in_flight(b);
-+	wake_up_bit(&b->flags, BTREE_NODE_write_in_flight);
-+}
-+
-+static inline void btree_node_io_lock(struct btree *b)
-+{
-+	wait_on_bit_lock_io(&b->flags, BTREE_NODE_write_in_flight,
-+			    TASK_UNINTERRUPTIBLE);
-+}
-+
-+static inline void btree_node_wait_on_io(struct btree *b)
-+{
-+	wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight,
-+		       TASK_UNINTERRUPTIBLE);
-+}
-+
-+static inline bool btree_node_may_write(struct btree *b)
-+{
-+	return list_empty_careful(&b->write_blocked) &&
-+		(!b->written || !b->will_make_reachable);
-+}
-+
-+enum compact_mode {
-+	COMPACT_LAZY,
-+	COMPACT_ALL,
-+};
-+
-+bool bch2_compact_whiteouts(struct bch_fs *, struct btree *,
-+			    enum compact_mode);
-+
-+static inline bool should_compact_bset_lazy(struct btree *b,
-+					    struct bset_tree *t)
-+{
-+	unsigned total_u64s = bset_u64s(t);
-+	unsigned dead_u64s = bset_dead_u64s(b, t);
-+
-+	return dead_u64s > 64 && dead_u64s * 3 > total_u64s;
-+}
-+
-+static inline bool bch2_maybe_compact_whiteouts(struct bch_fs *c, struct btree *b)
-+{
-+	struct bset_tree *t;
-+
-+	for_each_bset(b, t)
-+		if (should_compact_bset_lazy(b, t))
-+			return bch2_compact_whiteouts(c, b, COMPACT_LAZY);
-+
-+	return false;
-+}
-+
-+static inline struct nonce btree_nonce(struct bset *i, unsigned offset)
-+{
-+	return (struct nonce) {{
-+		[0] = cpu_to_le32(offset),
-+		[1] = ((__le32 *) &i->seq)[0],
-+		[2] = ((__le32 *) &i->seq)[1],
-+		[3] = ((__le32 *) &i->journal_seq)[0]^BCH_NONCE_BTREE,
-+	}};
-+}
-+
-+static inline void bset_encrypt(struct bch_fs *c, struct bset *i, unsigned offset)
-+{
-+	struct nonce nonce = btree_nonce(i, offset);
-+
-+	if (!offset) {
-+		struct btree_node *bn = container_of(i, struct btree_node, keys);
-+		unsigned bytes = (void *) &bn->keys - (void *) &bn->flags;
-+
-+		bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, &bn->flags,
-+			     bytes);
-+
-+		nonce = nonce_add(nonce, round_up(bytes, CHACHA_BLOCK_SIZE));
-+	}
-+
-+	bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, i->_data,
-+		     vstruct_end(i) - (void *) i->_data);
-+}
-+
-+void bch2_btree_sort_into(struct bch_fs *, struct btree *, struct btree *);
-+
-+void bch2_btree_build_aux_trees(struct btree *);
-+void bch2_btree_init_next(struct bch_fs *, struct btree *,
-+			 struct btree_iter *);
-+
-+int bch2_btree_node_read_done(struct bch_fs *, struct btree *, bool);
-+void bch2_btree_node_read(struct bch_fs *, struct btree *, bool);
-+int bch2_btree_root_read(struct bch_fs *, enum btree_id,
-+			 const struct bkey_i *, unsigned);
-+
-+void bch2_btree_complete_write(struct bch_fs *, struct btree *,
-+			      struct btree_write *);
-+void bch2_btree_write_error_work(struct work_struct *);
-+
-+void __bch2_btree_node_write(struct bch_fs *, struct btree *,
-+			    enum six_lock_type);
-+bool bch2_btree_post_write_cleanup(struct bch_fs *, struct btree *);
-+
-+void bch2_btree_node_write(struct bch_fs *, struct btree *,
-+			  enum six_lock_type);
-+
-+static inline void btree_node_write_if_need(struct bch_fs *c, struct btree *b,
-+					    enum six_lock_type lock_held)
-+{
-+	while (b->written &&
-+	       btree_node_need_write(b) &&
-+	       btree_node_may_write(b)) {
-+		if (!btree_node_write_in_flight(b)) {
-+			bch2_btree_node_write(c, b, lock_held);
-+			break;
-+		}
-+
-+		six_unlock_type(&b->c.lock, lock_held);
-+		btree_node_wait_on_io(b);
-+		btree_node_lock_type(c, b, lock_held);
-+	}
-+}
-+
-+#define bch2_btree_node_write_cond(_c, _b, cond)			\
-+do {									\
-+	unsigned long old, new, v = READ_ONCE((_b)->flags);		\
-+									\
-+	do {								\
-+		old = new = v;						\
-+									\
-+		if (!(old & (1 << BTREE_NODE_dirty)) || !(cond))	\
-+			break;						\
-+									\
-+		new |= (1 << BTREE_NODE_need_write);			\
-+	} while ((v = cmpxchg(&(_b)->flags, old, new)) != old);		\
-+									\
-+	btree_node_write_if_need(_c, _b, SIX_LOCK_read);		\
-+} while (0)
-+
-+void bch2_btree_flush_all_reads(struct bch_fs *);
-+void bch2_btree_flush_all_writes(struct bch_fs *);
-+void bch2_btree_verify_flushed(struct bch_fs *);
-+void bch2_dirty_btree_nodes_to_text(struct printbuf *, struct bch_fs *);
-+
-+static inline void compat_bformat(unsigned level, enum btree_id btree_id,
-+				 unsigned version, unsigned big_endian,
-+				 int write, struct bkey_format *f)
-+{
-+	if (version < bcachefs_metadata_version_inode_btree_change &&
-+	    btree_id == BTREE_ID_INODES) {
-+		swap(f->bits_per_field[BKEY_FIELD_INODE],
-+		     f->bits_per_field[BKEY_FIELD_OFFSET]);
-+		swap(f->field_offset[BKEY_FIELD_INODE],
-+		     f->field_offset[BKEY_FIELD_OFFSET]);
-+	}
-+}
-+
-+static inline void compat_bpos(unsigned level, enum btree_id btree_id,
-+			       unsigned version, unsigned big_endian,
-+			       int write, struct bpos *p)
-+{
-+	if (big_endian != CPU_BIG_ENDIAN)
-+		bch2_bpos_swab(p);
-+
-+	if (version < bcachefs_metadata_version_inode_btree_change &&
-+	    btree_id == BTREE_ID_INODES)
-+		swap(p->inode, p->offset);
-+}
-+
-+static inline void compat_btree_node(unsigned level, enum btree_id btree_id,
-+				     unsigned version, unsigned big_endian,
-+				     int write,
-+				     struct btree_node *bn)
-+{
-+	if (version < bcachefs_metadata_version_inode_btree_change &&
-+	    btree_node_type_is_extents(btree_id) &&
-+	    bkey_cmp(bn->min_key, POS_MIN) &&
-+	    write)
-+		bn->min_key = bkey_predecessor(bn->min_key);
-+
-+	compat_bpos(level, btree_id, version, big_endian, write, &bn->min_key);
-+	compat_bpos(level, btree_id, version, big_endian, write, &bn->max_key);
-+
-+	if (version < bcachefs_metadata_version_inode_btree_change &&
-+	    btree_node_type_is_extents(btree_id) &&
-+	    bkey_cmp(bn->min_key, POS_MIN) &&
-+	    !write)
-+		bn->min_key = bkey_successor(bn->min_key);
-+}
-+
-+#endif /* _BCACHEFS_BTREE_IO_H */
-diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
-new file mode 100644
-index 000000000000..6fab76c3220c
---- /dev/null
-+++ b/fs/bcachefs/btree_iter.c
-@@ -0,0 +1,2445 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "bkey_methods.h"
-+#include "btree_cache.h"
-+#include "btree_iter.h"
-+#include "btree_key_cache.h"
-+#include "btree_locking.h"
-+#include "btree_update.h"
-+#include "debug.h"
-+#include "extents.h"
-+#include "journal.h"
-+
-+#include <linux/prefetch.h>
-+#include <trace/events/bcachefs.h>
-+
-+static inline bool is_btree_node(struct btree_iter *iter, unsigned l)
-+{
-+	return l < BTREE_MAX_DEPTH &&
-+		(unsigned long) iter->l[l].b >= 128;
-+}
-+
-+static inline struct bpos btree_iter_search_key(struct btree_iter *iter)
-+{
-+	struct bpos pos = iter->pos;
-+
-+	if ((iter->flags & BTREE_ITER_IS_EXTENTS) &&
-+	    bkey_cmp(pos, POS_MAX))
-+		pos = bkey_successor(pos);
-+	return pos;
-+}
-+
-+static inline bool btree_iter_pos_before_node(struct btree_iter *iter,
-+					      struct btree *b)
-+{
-+	return bkey_cmp(btree_iter_search_key(iter), b->data->min_key) < 0;
-+}
-+
-+static inline bool btree_iter_pos_after_node(struct btree_iter *iter,
-+					     struct btree *b)
-+{
-+	return bkey_cmp(b->key.k.p, btree_iter_search_key(iter)) < 0;
-+}
-+
-+static inline bool btree_iter_pos_in_node(struct btree_iter *iter,
-+					  struct btree *b)
-+{
-+	return iter->btree_id == b->c.btree_id &&
-+		!btree_iter_pos_before_node(iter, b) &&
-+		!btree_iter_pos_after_node(iter, b);
-+}
-+
-+/* Btree node locking: */
-+
-+void bch2_btree_node_unlock_write(struct btree *b, struct btree_iter *iter)
-+{
-+	bch2_btree_node_unlock_write_inlined(b, iter);
-+}
-+
-+void __bch2_btree_node_lock_write(struct btree *b, struct btree_iter *iter)
-+{
-+	struct btree_iter *linked;
-+	unsigned readers = 0;
-+
-+	EBUG_ON(!btree_node_intent_locked(iter, b->c.level));
-+
-+	trans_for_each_iter(iter->trans, linked)
-+		if (linked->l[b->c.level].b == b &&
-+		    btree_node_read_locked(linked, b->c.level))
-+			readers++;
-+
-+	/*
-+	 * Must drop our read locks before calling six_lock_write() -
-+	 * six_unlock() won't do wakeups until the reader count
-+	 * goes to 0, and it's safe because we have the node intent
-+	 * locked:
-+	 */
-+	atomic64_sub(__SIX_VAL(read_lock, readers),
-+		     &b->c.lock.state.counter);
-+	btree_node_lock_type(iter->trans->c, b, SIX_LOCK_write);
-+	atomic64_add(__SIX_VAL(read_lock, readers),
-+		     &b->c.lock.state.counter);
-+}
-+
-+bool __bch2_btree_node_relock(struct btree_iter *iter, unsigned level)
-+{
-+	struct btree *b = btree_iter_node(iter, level);
-+	int want = __btree_lock_want(iter, level);
-+
-+	if (!is_btree_node(iter, level))
-+		return false;
-+
-+	if (race_fault())
-+		return false;
-+
-+	if (six_relock_type(&b->c.lock, want, iter->l[level].lock_seq) ||
-+	    (btree_node_lock_seq_matches(iter, b, level) &&
-+	     btree_node_lock_increment(iter->trans, b, level, want))) {
-+		mark_btree_node_locked(iter, level, want);
-+		return true;
-+	} else {
-+		return false;
-+	}
-+}
-+
-+static bool bch2_btree_node_upgrade(struct btree_iter *iter, unsigned level)
-+{
-+	struct btree *b = iter->l[level].b;
-+
-+	EBUG_ON(btree_lock_want(iter, level) != BTREE_NODE_INTENT_LOCKED);
-+
-+	if (!is_btree_node(iter, level))
-+		return false;
-+
-+	if (btree_node_intent_locked(iter, level))
-+		return true;
-+
-+	if (race_fault())
-+		return false;
-+
-+	if (btree_node_locked(iter, level)
-+	    ? six_lock_tryupgrade(&b->c.lock)
-+	    : six_relock_type(&b->c.lock, SIX_LOCK_intent, iter->l[level].lock_seq))
-+		goto success;
-+
-+	if (btree_node_lock_seq_matches(iter, b, level) &&
-+	    btree_node_lock_increment(iter->trans, b, level, BTREE_NODE_INTENT_LOCKED)) {
-+		btree_node_unlock(iter, level);
-+		goto success;
-+	}
-+
-+	return false;
-+success:
-+	mark_btree_node_intent_locked(iter, level);
-+	return true;
-+}
-+
-+static inline bool btree_iter_get_locks(struct btree_iter *iter,
-+					bool upgrade, bool trace)
-+{
-+	unsigned l = iter->level;
-+	int fail_idx = -1;
-+
-+	do {
-+		if (!btree_iter_node(iter, l))
-+			break;
-+
-+		if (!(upgrade
-+		      ? bch2_btree_node_upgrade(iter, l)
-+		      : bch2_btree_node_relock(iter, l))) {
-+			if (trace)
-+				(upgrade
-+				 ? trace_node_upgrade_fail
-+				 : trace_node_relock_fail)(l, iter->l[l].lock_seq,
-+						is_btree_node(iter, l)
-+						? 0
-+						: (unsigned long) iter->l[l].b,
-+						is_btree_node(iter, l)
-+						? iter->l[l].b->c.lock.state.seq
-+						: 0);
-+
-+			fail_idx = l;
-+			btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
-+		}
-+
-+		l++;
-+	} while (l < iter->locks_want);
-+
-+	/*
-+	 * When we fail to get a lock, we have to ensure that any child nodes
-+	 * can't be relocked so bch2_btree_iter_traverse has to walk back up to
-+	 * the node that we failed to relock:
-+	 */
-+	while (fail_idx >= 0) {
-+		btree_node_unlock(iter, fail_idx);
-+		iter->l[fail_idx].b = BTREE_ITER_NO_NODE_GET_LOCKS;
-+		--fail_idx;
-+	}
-+
-+	if (iter->uptodate == BTREE_ITER_NEED_RELOCK)
-+		iter->uptodate = BTREE_ITER_NEED_PEEK;
-+
-+	bch2_btree_trans_verify_locks(iter->trans);
-+
-+	return iter->uptodate < BTREE_ITER_NEED_RELOCK;
-+}
-+
-+static struct bpos btree_node_pos(struct btree_bkey_cached_common *_b,
-+				  enum btree_iter_type type)
-+{
-+	return  type != BTREE_ITER_CACHED
-+		? container_of(_b, struct btree, c)->key.k.p
-+		: container_of(_b, struct bkey_cached, c)->key.pos;
-+}
-+
-+/* Slowpath: */
-+bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
-+			    unsigned level, struct btree_iter *iter,
-+			    enum six_lock_type type,
-+			    six_lock_should_sleep_fn should_sleep_fn,
-+			    void *p)
-+{
-+	struct btree_trans *trans = iter->trans;
-+	struct btree_iter *linked;
-+	u64 start_time = local_clock();
-+	bool ret = true;
-+
-+	/* Check if it's safe to block: */
-+	trans_for_each_iter(trans, linked) {
-+		if (!linked->nodes_locked)
-+			continue;
-+
-+		/*
-+		 * Can't block taking an intent lock if we have _any_ nodes read
-+		 * locked:
-+		 *
-+		 * - Our read lock blocks another thread with an intent lock on
-+		 *   the same node from getting a write lock, and thus from
-+		 *   dropping its intent lock
-+		 *
-+		 * - And the other thread may have multiple nodes intent locked:
-+		 *   both the node we want to intent lock, and the node we
-+		 *   already have read locked - deadlock:
-+		 */
-+		if (type == SIX_LOCK_intent &&
-+		    linked->nodes_locked != linked->nodes_intent_locked) {
-+			if (!(trans->nounlock)) {
-+				linked->locks_want = max_t(unsigned,
-+						linked->locks_want,
-+						__fls(linked->nodes_locked) + 1);
-+				if (!btree_iter_get_locks(linked, true, false))
-+					ret = false;
-+			} else {
-+				ret = false;
-+			}
-+		}
-+
-+		/*
-+		 * Interior nodes must be locked before their descendants: if
-+		 * another iterator has possible descendants locked of the node
-+		 * we're about to lock, it must have the ancestors locked too:
-+		 */
-+		if (linked->btree_id == iter->btree_id &&
-+		    level > __fls(linked->nodes_locked)) {
-+			if (!(trans->nounlock)) {
-+				linked->locks_want =
-+					max(level + 1, max_t(unsigned,
-+					    linked->locks_want,
-+					    iter->locks_want));
-+				if (!btree_iter_get_locks(linked, true, false))
-+					ret = false;
-+			} else {
-+				ret = false;
-+			}
-+		}
-+
-+		/* Must lock btree nodes in key order: */
-+		if ((cmp_int(iter->btree_id, linked->btree_id) ?:
-+		     -cmp_int(btree_iter_type(iter), btree_iter_type(linked))) < 0)
-+			ret = false;
-+
-+		if (iter->btree_id == linked->btree_id &&
-+		    btree_node_locked(linked, level) &&
-+		    bkey_cmp(pos, btree_node_pos((void *) linked->l[level].b,
-+						 btree_iter_type(linked))) <= 0)
-+			ret = false;
-+
-+		/*
-+		 * Recheck if this is a node we already have locked - since one
-+		 * of the get_locks() calls might've successfully
-+		 * upgraded/relocked it:
-+		 */
-+		if (linked->l[level].b == b &&
-+		    btree_node_locked_type(linked, level) >= type) {
-+			six_lock_increment(&b->c.lock, type);
-+			return true;
-+		}
-+	}
-+
-+	if (unlikely(!ret)) {
-+		trace_trans_restart_would_deadlock(iter->trans->ip);
-+		return false;
-+	}
-+
-+	if (six_trylock_type(&b->c.lock, type))
-+		return true;
-+
-+	if (six_lock_type(&b->c.lock, type, should_sleep_fn, p))
-+		return false;
-+
-+	bch2_time_stats_update(&trans->c->times[lock_to_time_stat(type)],
-+			       start_time);
-+	return true;
-+}
-+
-+/* Btree iterator locking: */
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+static void bch2_btree_iter_verify_locks(struct btree_iter *iter)
-+{
-+	unsigned l;
-+
-+	if (!(iter->trans->iters_linked & (1ULL << iter->idx))) {
-+		BUG_ON(iter->nodes_locked);
-+		return;
-+	}
-+
-+	for (l = 0; is_btree_node(iter, l); l++) {
-+		if (iter->uptodate >= BTREE_ITER_NEED_RELOCK &&
-+		    !btree_node_locked(iter, l))
-+			continue;
-+
-+		BUG_ON(btree_lock_want(iter, l) !=
-+		       btree_node_locked_type(iter, l));
-+	}
-+}
-+
-+void bch2_btree_trans_verify_locks(struct btree_trans *trans)
-+{
-+	struct btree_iter *iter;
-+
-+	trans_for_each_iter_all(trans, iter)
-+		bch2_btree_iter_verify_locks(iter);
-+}
-+#else
-+static inline void bch2_btree_iter_verify_locks(struct btree_iter *iter) {}
-+#endif
-+
-+__flatten
-+bool bch2_btree_iter_relock(struct btree_iter *iter, bool trace)
-+{
-+	return btree_iter_get_locks(iter, false, trace);
-+}
-+
-+bool __bch2_btree_iter_upgrade(struct btree_iter *iter,
-+			       unsigned new_locks_want)
-+{
-+	struct btree_iter *linked;
-+
-+	EBUG_ON(iter->locks_want >= new_locks_want);
-+
-+	iter->locks_want = new_locks_want;
-+
-+	if (btree_iter_get_locks(iter, true, true))
-+		return true;
-+
-+	/*
-+	 * Ancestor nodes must be locked before child nodes, so set locks_want
-+	 * on iterators that might lock ancestors before us to avoid getting
-+	 * -EINTR later:
-+	 */
-+	trans_for_each_iter(iter->trans, linked)
-+		if (linked != iter &&
-+		    linked->btree_id == iter->btree_id &&
-+		    linked->locks_want < new_locks_want) {
-+			linked->locks_want = new_locks_want;
-+			btree_iter_get_locks(linked, true, false);
-+		}
-+
-+	return false;
-+}
-+
-+bool __bch2_btree_iter_upgrade_nounlock(struct btree_iter *iter,
-+					unsigned new_locks_want)
-+{
-+	unsigned l = iter->level;
-+
-+	EBUG_ON(iter->locks_want >= new_locks_want);
-+
-+	iter->locks_want = new_locks_want;
-+
-+	do {
-+		if (!btree_iter_node(iter, l))
-+			break;
-+
-+		if (!bch2_btree_node_upgrade(iter, l)) {
-+			iter->locks_want = l;
-+			return false;
-+		}
-+
-+		l++;
-+	} while (l < iter->locks_want);
-+
-+	return true;
-+}
-+
-+void __bch2_btree_iter_downgrade(struct btree_iter *iter,
-+				 unsigned downgrade_to)
-+{
-+	unsigned l, new_locks_want = downgrade_to ?:
-+		(iter->flags & BTREE_ITER_INTENT ? 1 : 0);
-+
-+	if (iter->locks_want < downgrade_to) {
-+		iter->locks_want = new_locks_want;
-+
-+		while (iter->nodes_locked &&
-+		       (l = __fls(iter->nodes_locked)) >= iter->locks_want) {
-+			if (l > iter->level) {
-+				btree_node_unlock(iter, l);
-+			} else {
-+				if (btree_node_intent_locked(iter, l)) {
-+					six_lock_downgrade(&iter->l[l].b->c.lock);
-+					iter->nodes_intent_locked ^= 1 << l;
-+				}
-+				break;
-+			}
-+		}
-+	}
-+
-+	bch2_btree_trans_verify_locks(iter->trans);
-+}
-+
-+void bch2_trans_downgrade(struct btree_trans *trans)
-+{
-+	struct btree_iter *iter;
-+
-+	trans_for_each_iter(trans, iter)
-+		bch2_btree_iter_downgrade(iter);
-+}
-+
-+/* Btree transaction locking: */
-+
-+bool bch2_trans_relock(struct btree_trans *trans)
-+{
-+	struct btree_iter *iter;
-+	bool ret = true;
-+
-+	trans_for_each_iter(trans, iter)
-+		if (iter->uptodate == BTREE_ITER_NEED_RELOCK)
-+			ret &= bch2_btree_iter_relock(iter, true);
-+
-+	return ret;
-+}
-+
-+void bch2_trans_unlock(struct btree_trans *trans)
-+{
-+	struct btree_iter *iter;
-+
-+	trans_for_each_iter(trans, iter)
-+		__bch2_btree_iter_unlock(iter);
-+}
-+
-+/* Btree iterator: */
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+
-+static void bch2_btree_iter_verify_cached(struct btree_iter *iter)
-+{
-+	struct bkey_cached *ck;
-+	bool locked = btree_node_locked(iter, 0);
-+
-+	if (!bch2_btree_node_relock(iter, 0))
-+		return;
-+
-+	ck = (void *) iter->l[0].b;
-+	BUG_ON(ck->key.btree_id != iter->btree_id ||
-+	       bkey_cmp(ck->key.pos, iter->pos));
-+
-+	if (!locked)
-+		btree_node_unlock(iter, 0);
-+}
-+
-+static void bch2_btree_iter_verify_level(struct btree_iter *iter,
-+					 unsigned level)
-+{
-+	struct bpos pos = btree_iter_search_key(iter);
-+	struct btree_iter_level *l = &iter->l[level];
-+	struct btree_node_iter tmp = l->iter;
-+	bool locked = btree_node_locked(iter, level);
-+	struct bkey_packed *p, *k;
-+	char buf1[100], buf2[100];
-+	const char *msg;
-+
-+	if (!debug_check_iterators(iter->trans->c))
-+		return;
-+
-+	if (btree_iter_type(iter) == BTREE_ITER_CACHED) {
-+		if (!level)
-+			bch2_btree_iter_verify_cached(iter);
-+		return;
-+	}
-+
-+	BUG_ON(iter->level < iter->min_depth);
-+
-+	if (!btree_iter_node(iter, level))
-+		return;
-+
-+	if (!bch2_btree_node_relock(iter, level))
-+		return;
-+
-+	/*
-+	 * Ideally this invariant would always be true, and hopefully in the
-+	 * future it will be, but for now set_pos_same_leaf() breaks it:
-+	 */
-+	BUG_ON(iter->uptodate < BTREE_ITER_NEED_TRAVERSE &&
-+	       !btree_iter_pos_in_node(iter, l->b));
-+
-+	/*
-+	 * node iterators don't use leaf node iterator:
-+	 */
-+	if (btree_iter_type(iter) == BTREE_ITER_NODES &&
-+	    level <= iter->min_depth)
-+		goto unlock;
-+
-+	bch2_btree_node_iter_verify(&l->iter, l->b);
-+
-+	/*
-+	 * For interior nodes, the iterator will have skipped past
-+	 * deleted keys:
-+	 *
-+	 * For extents, the iterator may have skipped past deleted keys (but not
-+	 * whiteouts)
-+	 */
-+	p = level || btree_node_type_is_extents(iter->btree_id)
-+		? bch2_btree_node_iter_prev_filter(&tmp, l->b, KEY_TYPE_discard)
-+		: bch2_btree_node_iter_prev_all(&tmp, l->b);
-+	k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
-+
-+	if (p && bkey_iter_pos_cmp(l->b, p, &pos) >= 0) {
-+		msg = "before";
-+		goto err;
-+	}
-+
-+	if (k && bkey_iter_pos_cmp(l->b, k, &pos) < 0) {
-+		msg = "after";
-+		goto err;
-+	}
-+unlock:
-+	if (!locked)
-+		btree_node_unlock(iter, level);
-+	return;
-+err:
-+	strcpy(buf1, "(none)");
-+	strcpy(buf2, "(none)");
-+
-+	if (p) {
-+		struct bkey uk = bkey_unpack_key(l->b, p);
-+		bch2_bkey_to_text(&PBUF(buf1), &uk);
-+	}
-+
-+	if (k) {
-+		struct bkey uk = bkey_unpack_key(l->b, k);
-+		bch2_bkey_to_text(&PBUF(buf2), &uk);
-+	}
-+
-+	panic("iterator should be %s key at level %u:\n"
-+	      "iter pos %s %llu:%llu\n"
-+	      "prev key %s\n"
-+	      "cur  key %s\n",
-+	      msg, level,
-+	      iter->flags & BTREE_ITER_IS_EXTENTS ? ">" : "=>",
-+	      iter->pos.inode, iter->pos.offset,
-+	      buf1, buf2);
-+}
-+
-+static void bch2_btree_iter_verify(struct btree_iter *iter)
-+{
-+	unsigned i;
-+
-+	bch2_btree_trans_verify_locks(iter->trans);
-+
-+	for (i = 0; i < BTREE_MAX_DEPTH; i++)
-+		bch2_btree_iter_verify_level(iter, i);
-+}
-+
-+void bch2_btree_trans_verify_iters(struct btree_trans *trans, struct btree *b)
-+{
-+	struct btree_iter *iter;
-+
-+	if (!debug_check_iterators(trans->c))
-+		return;
-+
-+	trans_for_each_iter_with_node(trans, b, iter)
-+		bch2_btree_iter_verify_level(iter, b->c.level);
-+}
-+
-+#else
-+
-+static inline void bch2_btree_iter_verify_level(struct btree_iter *iter, unsigned l) {}
-+static inline void bch2_btree_iter_verify(struct btree_iter *iter) {}
-+
-+#endif
-+
-+static void btree_node_iter_set_set_pos(struct btree_node_iter *iter,
-+					struct btree *b,
-+					struct bset_tree *t,
-+					struct bkey_packed *k)
-+{
-+	struct btree_node_iter_set *set;
-+
-+	btree_node_iter_for_each(iter, set)
-+		if (set->end == t->end_offset) {
-+			set->k = __btree_node_key_to_offset(b, k);
-+			bch2_btree_node_iter_sort(iter, b);
-+			return;
-+		}
-+
-+	bch2_btree_node_iter_push(iter, b, k, btree_bkey_last(b, t));
-+}
-+
-+static void __bch2_btree_iter_fix_key_modified(struct btree_iter *iter,
-+					       struct btree *b,
-+					       struct bkey_packed *where)
-+{
-+	struct btree_iter_level *l = &iter->l[b->c.level];
-+	struct bpos pos = btree_iter_search_key(iter);
-+
-+	if (where != bch2_btree_node_iter_peek_all(&l->iter, l->b))
-+		return;
-+
-+	if (bkey_iter_pos_cmp(l->b, where, &pos) < 0)
-+		bch2_btree_node_iter_advance(&l->iter, l->b);
-+
-+	btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
-+}
-+
-+void bch2_btree_iter_fix_key_modified(struct btree_iter *iter,
-+				      struct btree *b,
-+				      struct bkey_packed *where)
-+{
-+	struct btree_iter *linked;
-+
-+	trans_for_each_iter_with_node(iter->trans, b, linked) {
-+		__bch2_btree_iter_fix_key_modified(linked, b, where);
-+		bch2_btree_iter_verify_level(linked, b->c.level);
-+	}
-+}
-+
-+static void __bch2_btree_node_iter_fix(struct btree_iter *iter,
-+				      struct btree *b,
-+				      struct btree_node_iter *node_iter,
-+				      struct bset_tree *t,
-+				      struct bkey_packed *where,
-+				      unsigned clobber_u64s,
-+				      unsigned new_u64s)
-+{
-+	const struct bkey_packed *end = btree_bkey_last(b, t);
-+	struct btree_node_iter_set *set;
-+	unsigned offset = __btree_node_key_to_offset(b, where);
-+	int shift = new_u64s - clobber_u64s;
-+	unsigned old_end = t->end_offset - shift;
-+	unsigned orig_iter_pos = node_iter->data[0].k;
-+	bool iter_current_key_modified =
-+		orig_iter_pos >= offset &&
-+		orig_iter_pos <= offset + clobber_u64s;
-+	struct bpos iter_pos = btree_iter_search_key(iter);
-+
-+	btree_node_iter_for_each(node_iter, set)
-+		if (set->end == old_end)
-+			goto found;
-+
-+	/* didn't find the bset in the iterator - might have to readd it: */
-+	if (new_u64s &&
-+	    bkey_iter_pos_cmp(b, where, &iter_pos) >= 0) {
-+		bch2_btree_node_iter_push(node_iter, b, where, end);
-+		goto fixup_done;
-+	} else {
-+		/* Iterator is after key that changed */
-+		return;
-+	}
-+found:
-+	set->end = t->end_offset;
-+
-+	/* Iterator hasn't gotten to the key that changed yet: */
-+	if (set->k < offset)
-+		return;
-+
-+	if (new_u64s &&
-+	    bkey_iter_pos_cmp(b, where, &iter_pos) >= 0) {
-+		set->k = offset;
-+	} else if (set->k < offset + clobber_u64s) {
-+		set->k = offset + new_u64s;
-+		if (set->k == set->end)
-+			bch2_btree_node_iter_set_drop(node_iter, set);
-+	} else {
-+		/* Iterator is after key that changed */
-+		set->k = (int) set->k + shift;
-+		return;
-+	}
-+
-+	bch2_btree_node_iter_sort(node_iter, b);
-+fixup_done:
-+	if (node_iter->data[0].k != orig_iter_pos)
-+		iter_current_key_modified = true;
-+
-+	/*
-+	 * When a new key is added, and the node iterator now points to that
-+	 * key, the iterator might have skipped past deleted keys that should
-+	 * come after the key the iterator now points to. We have to rewind to
-+	 * before those deleted keys - otherwise
-+	 * bch2_btree_node_iter_prev_all() breaks:
-+	 */
-+	if (!bch2_btree_node_iter_end(node_iter) &&
-+	    iter_current_key_modified &&
-+	    (b->c.level ||
-+	     btree_node_type_is_extents(iter->btree_id))) {
-+		struct bset_tree *t;
-+		struct bkey_packed *k, *k2, *p;
-+
-+		k = bch2_btree_node_iter_peek_all(node_iter, b);
-+
-+		for_each_bset(b, t) {
-+			bool set_pos = false;
-+
-+			if (node_iter->data[0].end == t->end_offset)
-+				continue;
-+
-+			k2 = bch2_btree_node_iter_bset_pos(node_iter, b, t);
-+
-+			while ((p = bch2_bkey_prev_all(b, t, k2)) &&
-+			       bkey_iter_cmp(b, k, p) < 0) {
-+				k2 = p;
-+				set_pos = true;
-+			}
-+
-+			if (set_pos)
-+				btree_node_iter_set_set_pos(node_iter,
-+							    b, t, k2);
-+		}
-+	}
-+
-+	if (!b->c.level &&
-+	    node_iter == &iter->l[0].iter &&
-+	    iter_current_key_modified)
-+		btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
-+}
-+
-+void bch2_btree_node_iter_fix(struct btree_iter *iter,
-+			      struct btree *b,
-+			      struct btree_node_iter *node_iter,
-+			      struct bkey_packed *where,
-+			      unsigned clobber_u64s,
-+			      unsigned new_u64s)
-+{
-+	struct bset_tree *t = bch2_bkey_to_bset(b, where);
-+	struct btree_iter *linked;
-+
-+	if (node_iter != &iter->l[b->c.level].iter) {
-+		__bch2_btree_node_iter_fix(iter, b, node_iter, t,
-+					   where, clobber_u64s, new_u64s);
-+
-+		if (debug_check_iterators(iter->trans->c))
-+			bch2_btree_node_iter_verify(node_iter, b);
-+	}
-+
-+	trans_for_each_iter_with_node(iter->trans, b, linked) {
-+		__bch2_btree_node_iter_fix(linked, b,
-+					   &linked->l[b->c.level].iter, t,
-+					   where, clobber_u64s, new_u64s);
-+		bch2_btree_iter_verify_level(linked, b->c.level);
-+	}
-+}
-+
-+static inline struct bkey_s_c __btree_iter_unpack(struct btree_iter *iter,
-+						  struct btree_iter_level *l,
-+						  struct bkey *u,
-+						  struct bkey_packed *k)
-+{
-+	struct bkey_s_c ret;
-+
-+	if (unlikely(!k)) {
-+		/*
-+		 * signal to bch2_btree_iter_peek_slot() that we're currently at
-+		 * a hole
-+		 */
-+		u->type = KEY_TYPE_deleted;
-+		return bkey_s_c_null;
-+	}
-+
-+	ret = bkey_disassemble(l->b, k, u);
-+
-+	if (debug_check_bkeys(iter->trans->c))
-+		bch2_bkey_debugcheck(iter->trans->c, l->b, ret);
-+
-+	return ret;
-+}
-+
-+/* peek_all() doesn't skip deleted keys */
-+static inline struct bkey_s_c __btree_iter_peek_all(struct btree_iter *iter,
-+						    struct btree_iter_level *l,
-+						    struct bkey *u)
-+{
-+	return __btree_iter_unpack(iter, l, u,
-+			bch2_btree_node_iter_peek_all(&l->iter, l->b));
-+}
-+
-+static inline struct bkey_s_c __btree_iter_peek(struct btree_iter *iter,
-+						struct btree_iter_level *l)
-+{
-+	return __btree_iter_unpack(iter, l, &iter->k,
-+			bch2_btree_node_iter_peek(&l->iter, l->b));
-+}
-+
-+static inline struct bkey_s_c __btree_iter_prev(struct btree_iter *iter,
-+						struct btree_iter_level *l)
-+{
-+	return __btree_iter_unpack(iter, l, &iter->k,
-+			bch2_btree_node_iter_prev(&l->iter, l->b));
-+}
-+
-+static inline bool btree_iter_advance_to_pos(struct btree_iter *iter,
-+					     struct btree_iter_level *l,
-+					     int max_advance)
-+{
-+	struct bpos pos = btree_iter_search_key(iter);
-+	struct bkey_packed *k;
-+	int nr_advanced = 0;
-+
-+	while ((k = bch2_btree_node_iter_peek_all(&l->iter, l->b)) &&
-+	       bkey_iter_pos_cmp(l->b, k, &pos) < 0) {
-+		if (max_advance > 0 && nr_advanced >= max_advance)
-+			return false;
-+
-+		bch2_btree_node_iter_advance(&l->iter, l->b);
-+		nr_advanced++;
-+	}
-+
-+	return true;
-+}
-+
-+/*
-+ * Verify that iterator for parent node points to child node:
-+ */
-+static void btree_iter_verify_new_node(struct btree_iter *iter, struct btree *b)
-+{
-+	struct btree_iter_level *l;
-+	unsigned plevel;
-+	bool parent_locked;
-+	struct bkey_packed *k;
-+
-+	if (!IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
-+		return;
-+
-+	plevel = b->c.level + 1;
-+	if (!btree_iter_node(iter, plevel))
-+		return;
-+
-+	parent_locked = btree_node_locked(iter, plevel);
-+
-+	if (!bch2_btree_node_relock(iter, plevel))
-+		return;
-+
-+	l = &iter->l[plevel];
-+	k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
-+	if (!k ||
-+	    bkey_deleted(k) ||
-+	    bkey_cmp_left_packed(l->b, k, &b->key.k.p)) {
-+		char buf[100];
-+		struct bkey uk = bkey_unpack_key(b, k);
-+
-+		bch2_bkey_to_text(&PBUF(buf), &uk);
-+		panic("parent iter doesn't point to new node:\n%s\n%llu:%llu\n",
-+		      buf, b->key.k.p.inode, b->key.k.p.offset);
-+	}
-+
-+	if (!parent_locked)
-+		btree_node_unlock(iter, b->c.level + 1);
-+}
-+
-+static inline void __btree_iter_init(struct btree_iter *iter,
-+				     unsigned level)
-+{
-+	struct bpos pos = btree_iter_search_key(iter);
-+	struct btree_iter_level *l = &iter->l[level];
-+
-+	bch2_btree_node_iter_init(&l->iter, l->b, &pos);
-+
-+	btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
-+}
-+
-+static inline void btree_iter_node_set(struct btree_iter *iter,
-+				       struct btree *b)
-+{
-+	BUG_ON(btree_iter_type(iter) == BTREE_ITER_CACHED);
-+
-+	btree_iter_verify_new_node(iter, b);
-+
-+	EBUG_ON(!btree_iter_pos_in_node(iter, b));
-+	EBUG_ON(b->c.lock.state.seq & 1);
-+
-+	iter->l[b->c.level].lock_seq = b->c.lock.state.seq;
-+	iter->l[b->c.level].b = b;
-+	__btree_iter_init(iter, b->c.level);
-+}
-+
-+/*
-+ * A btree node is being replaced - update the iterator to point to the new
-+ * node:
-+ */
-+void bch2_btree_iter_node_replace(struct btree_iter *iter, struct btree *b)
-+{
-+	enum btree_node_locked_type t;
-+	struct btree_iter *linked;
-+
-+	trans_for_each_iter(iter->trans, linked)
-+		if (btree_iter_type(linked) != BTREE_ITER_CACHED &&
-+		    btree_iter_pos_in_node(linked, b)) {
-+			/*
-+			 * bch2_btree_iter_node_drop() has already been called -
-+			 * the old node we're replacing has already been
-+			 * unlocked and the pointer invalidated
-+			 */
-+			BUG_ON(btree_node_locked(linked, b->c.level));
-+
-+			t = btree_lock_want(linked, b->c.level);
-+			if (t != BTREE_NODE_UNLOCKED) {
-+				six_lock_increment(&b->c.lock, t);
-+				mark_btree_node_locked(linked, b->c.level, t);
-+			}
-+
-+			btree_iter_node_set(linked, b);
-+		}
-+}
-+
-+void bch2_btree_iter_node_drop(struct btree_iter *iter, struct btree *b)
-+{
-+	struct btree_iter *linked;
-+	unsigned level = b->c.level;
-+
-+	trans_for_each_iter(iter->trans, linked)
-+		if (linked->l[level].b == b) {
-+			__btree_node_unlock(linked, level);
-+			linked->l[level].b = BTREE_ITER_NO_NODE_DROP;
-+		}
-+}
-+
-+/*
-+ * A btree node has been modified in such a way as to invalidate iterators - fix
-+ * them:
-+ */
-+void bch2_btree_iter_reinit_node(struct btree_iter *iter, struct btree *b)
-+{
-+	struct btree_iter *linked;
-+
-+	trans_for_each_iter_with_node(iter->trans, b, linked)
-+		__btree_iter_init(linked, b->c.level);
-+}
-+
-+static int lock_root_check_fn(struct six_lock *lock, void *p)
-+{
-+	struct btree *b = container_of(lock, struct btree, c.lock);
-+	struct btree **rootp = p;
-+
-+	return b == *rootp ? 0 : -1;
-+}
-+
-+static inline int btree_iter_lock_root(struct btree_iter *iter,
-+				       unsigned depth_want)
-+{
-+	struct bch_fs *c = iter->trans->c;
-+	struct btree *b, **rootp = &c->btree_roots[iter->btree_id].b;
-+	enum six_lock_type lock_type;
-+	unsigned i;
-+
-+	EBUG_ON(iter->nodes_locked);
-+
-+	while (1) {
-+		b = READ_ONCE(*rootp);
-+		iter->level = READ_ONCE(b->c.level);
-+
-+		if (unlikely(iter->level < depth_want)) {
-+			/*
-+			 * the root is at a lower depth than the depth we want:
-+			 * got to the end of the btree, or we're walking nodes
-+			 * greater than some depth and there are no nodes >=
-+			 * that depth
-+			 */
-+			iter->level = depth_want;
-+			for (i = iter->level; i < BTREE_MAX_DEPTH; i++)
-+				iter->l[i].b = NULL;
-+			return 1;
-+		}
-+
-+		lock_type = __btree_lock_want(iter, iter->level);
-+		if (unlikely(!btree_node_lock(b, POS_MAX, iter->level,
-+					      iter, lock_type,
-+					      lock_root_check_fn, rootp)))
-+			return -EINTR;
-+
-+		if (likely(b == READ_ONCE(*rootp) &&
-+			   b->c.level == iter->level &&
-+			   !race_fault())) {
-+			for (i = 0; i < iter->level; i++)
-+				iter->l[i].b = BTREE_ITER_NO_NODE_LOCK_ROOT;
-+			iter->l[iter->level].b = b;
-+			for (i = iter->level + 1; i < BTREE_MAX_DEPTH; i++)
-+				iter->l[i].b = NULL;
-+
-+			mark_btree_node_locked(iter, iter->level, lock_type);
-+			btree_iter_node_set(iter, b);
-+			return 0;
-+		}
-+
-+		six_unlock_type(&b->c.lock, lock_type);
-+	}
-+}
-+
-+noinline
-+static void btree_iter_prefetch(struct btree_iter *iter)
-+{
-+	struct bch_fs *c = iter->trans->c;
-+	struct btree_iter_level *l = &iter->l[iter->level];
-+	struct btree_node_iter node_iter = l->iter;
-+	struct bkey_packed *k;
-+	BKEY_PADDED(k) tmp;
-+	unsigned nr = test_bit(BCH_FS_STARTED, &c->flags)
-+		? (iter->level > 1 ? 0 :  2)
-+		: (iter->level > 1 ? 1 : 16);
-+	bool was_locked = btree_node_locked(iter, iter->level);
-+
-+	while (nr) {
-+		if (!bch2_btree_node_relock(iter, iter->level))
-+			return;
-+
-+		bch2_btree_node_iter_advance(&node_iter, l->b);
-+		k = bch2_btree_node_iter_peek(&node_iter, l->b);
-+		if (!k)
-+			break;
-+
-+		bch2_bkey_unpack(l->b, &tmp.k, k);
-+		bch2_btree_node_prefetch(c, iter, &tmp.k, iter->level - 1);
-+	}
-+
-+	if (!was_locked)
-+		btree_node_unlock(iter, iter->level);
-+}
-+
-+static noinline void btree_node_mem_ptr_set(struct btree_iter *iter,
-+					    unsigned plevel, struct btree *b)
-+{
-+	struct btree_iter_level *l = &iter->l[plevel];
-+	bool locked = btree_node_locked(iter, plevel);
-+	struct bkey_packed *k;
-+	struct bch_btree_ptr_v2 *bp;
-+
-+	if (!bch2_btree_node_relock(iter, plevel))
-+		return;
-+
-+	k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
-+	BUG_ON(k->type != KEY_TYPE_btree_ptr_v2);
-+
-+	bp = (void *) bkeyp_val(&l->b->format, k);
-+	bp->mem_ptr = (unsigned long)b;
-+
-+	if (!locked)
-+		btree_node_unlock(iter, plevel);
-+}
-+
-+static __always_inline int btree_iter_down(struct btree_iter *iter)
-+{
-+	struct bch_fs *c = iter->trans->c;
-+	struct btree_iter_level *l = &iter->l[iter->level];
-+	struct btree *b;
-+	unsigned level = iter->level - 1;
-+	enum six_lock_type lock_type = __btree_lock_want(iter, level);
-+	BKEY_PADDED(k) tmp;
-+
-+	EBUG_ON(!btree_node_locked(iter, iter->level));
-+
-+	bch2_bkey_unpack(l->b, &tmp.k,
-+			 bch2_btree_node_iter_peek(&l->iter, l->b));
-+
-+	b = bch2_btree_node_get(c, iter, &tmp.k, level, lock_type);
-+	if (unlikely(IS_ERR(b)))
-+		return PTR_ERR(b);
-+
-+	mark_btree_node_locked(iter, level, lock_type);
-+	btree_iter_node_set(iter, b);
-+
-+	if (tmp.k.k.type == KEY_TYPE_btree_ptr_v2 &&
-+	    unlikely(b != btree_node_mem_ptr(&tmp.k)))
-+		btree_node_mem_ptr_set(iter, level + 1, b);
-+
-+	if (iter->flags & BTREE_ITER_PREFETCH)
-+		btree_iter_prefetch(iter);
-+
-+	iter->level = level;
-+
-+	return 0;
-+}
-+
-+static void btree_iter_up(struct btree_iter *iter)
-+{
-+	btree_node_unlock(iter, iter->level++);
-+}
-+
-+static int btree_iter_traverse_one(struct btree_iter *);
-+
-+static int __btree_iter_traverse_all(struct btree_trans *trans, int ret)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct btree_iter *iter;
-+	u8 sorted[BTREE_ITER_MAX];
-+	unsigned i, nr_sorted = 0;
-+
-+	if (trans->in_traverse_all)
-+		return -EINTR;
-+
-+	trans->in_traverse_all = true;
-+retry_all:
-+	nr_sorted = 0;
-+
-+	trans_for_each_iter(trans, iter)
-+		sorted[nr_sorted++] = iter->idx;
-+
-+#define btree_iter_cmp_by_idx(_l, _r)				\
-+		btree_iter_cmp(&trans->iters[_l], &trans->iters[_r])
-+
-+	bubble_sort(sorted, nr_sorted, btree_iter_cmp_by_idx);
-+#undef btree_iter_cmp_by_idx
-+	bch2_trans_unlock(trans);
-+
-+	if (unlikely(ret == -ENOMEM)) {
-+		struct closure cl;
-+
-+		closure_init_stack(&cl);
-+
-+		do {
-+			ret = bch2_btree_cache_cannibalize_lock(c, &cl);
-+			closure_sync(&cl);
-+		} while (ret);
-+	}
-+
-+	if (unlikely(ret == -EIO)) {
-+		trans->error = true;
-+		goto out;
-+	}
-+
-+	BUG_ON(ret && ret != -EINTR);
-+
-+	/* Now, redo traversals in correct order: */
-+	for (i = 0; i < nr_sorted; i++) {
-+		unsigned idx = sorted[i];
-+
-+		/*
-+		 * sucessfully traversing one iterator can cause another to be
-+		 * unlinked, in btree_key_cache_fill()
-+		 */
-+		if (!(trans->iters_linked & (1ULL << idx)))
-+			continue;
-+
-+		ret = btree_iter_traverse_one(&trans->iters[idx]);
-+		if (ret)
-+			goto retry_all;
-+	}
-+
-+	if (hweight64(trans->iters_live) > 1)
-+		ret = -EINTR;
-+	else
-+		trans_for_each_iter(trans, iter)
-+			if (iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT) {
-+				ret = -EINTR;
-+				break;
-+			}
-+out:
-+	bch2_btree_cache_cannibalize_unlock(c);
-+
-+	trans->in_traverse_all = false;
-+	return ret;
-+}
-+
-+int bch2_btree_iter_traverse_all(struct btree_trans *trans)
-+{
-+	return __btree_iter_traverse_all(trans, 0);
-+}
-+
-+static inline bool btree_iter_good_node(struct btree_iter *iter,
-+					unsigned l, int check_pos)
-+{
-+	if (!is_btree_node(iter, l) ||
-+	    !bch2_btree_node_relock(iter, l))
-+		return false;
-+
-+	if (check_pos <= 0 && btree_iter_pos_before_node(iter, iter->l[l].b))
-+		return false;
-+	if (check_pos >= 0 && btree_iter_pos_after_node(iter, iter->l[l].b))
-+		return false;
-+	return true;
-+}
-+
-+static inline unsigned btree_iter_up_until_good_node(struct btree_iter *iter,
-+						     int check_pos)
-+{
-+	unsigned l = iter->level;
-+
-+	while (btree_iter_node(iter, l) &&
-+	       !btree_iter_good_node(iter, l, check_pos)) {
-+		btree_node_unlock(iter, l);
-+		iter->l[l].b = BTREE_ITER_NO_NODE_UP;
-+		l++;
-+	}
-+
-+	return l;
-+}
-+
-+/*
-+ * This is the main state machine for walking down the btree - walks down to a
-+ * specified depth
-+ *
-+ * Returns 0 on success, -EIO on error (error reading in a btree node).
-+ *
-+ * On error, caller (peek_node()/peek_key()) must return NULL; the error is
-+ * stashed in the iterator and returned from bch2_trans_exit().
-+ */
-+static int btree_iter_traverse_one(struct btree_iter *iter)
-+{
-+	unsigned depth_want = iter->level;
-+
-+	/*
-+	 * if we need interior nodes locked, call btree_iter_relock() to make
-+	 * sure we walk back up enough that we lock them:
-+	 */
-+	if (iter->uptodate == BTREE_ITER_NEED_RELOCK ||
-+	    iter->locks_want > 1)
-+		bch2_btree_iter_relock(iter, false);
-+
-+	if (btree_iter_type(iter) == BTREE_ITER_CACHED)
-+		return bch2_btree_iter_traverse_cached(iter);
-+
-+	if (iter->uptodate < BTREE_ITER_NEED_RELOCK)
-+		return 0;
-+
-+	if (unlikely(iter->level >= BTREE_MAX_DEPTH))
-+		return 0;
-+
-+	/*
-+	 * XXX: correctly using BTREE_ITER_UPTODATE should make using check_pos
-+	 * here unnecessary
-+	 */
-+	iter->level = btree_iter_up_until_good_node(iter, 0);
-+
-+	/*
-+	 * If we've got a btree node locked (i.e. we aren't about to relock the
-+	 * root) - advance its node iterator if necessary:
-+	 *
-+	 * XXX correctly using BTREE_ITER_UPTODATE should make this unnecessary
-+	 */
-+	if (is_btree_node(iter, iter->level)) {
-+		BUG_ON(!btree_iter_pos_in_node(iter, iter->l[iter->level].b));
-+
-+		btree_iter_advance_to_pos(iter, &iter->l[iter->level], -1);
-+	}
-+
-+	/*
-+	 * Note: iter->nodes[iter->level] may be temporarily NULL here - that
-+	 * would indicate to other code that we got to the end of the btree,
-+	 * here it indicates that relocking the root failed - it's critical that
-+	 * btree_iter_lock_root() comes next and that it can't fail
-+	 */
-+	while (iter->level > depth_want) {
-+		int ret = btree_iter_node(iter, iter->level)
-+			? btree_iter_down(iter)
-+			: btree_iter_lock_root(iter, depth_want);
-+		if (unlikely(ret)) {
-+			if (ret == 1)
-+				return 0;
-+
-+			iter->level = depth_want;
-+
-+			if (ret == -EIO) {
-+				iter->flags |= BTREE_ITER_ERROR;
-+				iter->l[iter->level].b =
-+					BTREE_ITER_NO_NODE_ERROR;
-+			} else {
-+				iter->l[iter->level].b =
-+					BTREE_ITER_NO_NODE_DOWN;
-+			}
-+			return ret;
-+		}
-+	}
-+
-+	iter->uptodate = BTREE_ITER_NEED_PEEK;
-+
-+	bch2_btree_iter_verify(iter);
-+	return 0;
-+}
-+
-+int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
-+{
-+	struct btree_trans *trans = iter->trans;
-+	int ret;
-+
-+	ret =   bch2_trans_cond_resched(trans) ?:
-+		btree_iter_traverse_one(iter);
-+	if (unlikely(ret))
-+		ret = __btree_iter_traverse_all(trans, ret);
-+
-+	return ret;
-+}
-+
-+static inline void bch2_btree_iter_checks(struct btree_iter *iter)
-+{
-+	enum btree_iter_type type = btree_iter_type(iter);
-+
-+	EBUG_ON(iter->btree_id >= BTREE_ID_NR);
-+
-+	BUG_ON((type == BTREE_ITER_KEYS ||
-+		type == BTREE_ITER_CACHED) &&
-+	       (bkey_cmp(iter->pos, bkey_start_pos(&iter->k)) < 0 ||
-+		bkey_cmp(iter->pos, iter->k.p) > 0));
-+
-+	bch2_btree_iter_verify_locks(iter);
-+	bch2_btree_iter_verify_level(iter, iter->level);
-+}
-+
-+/* Iterate across nodes (leaf and interior nodes) */
-+
-+struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter)
-+{
-+	struct btree *b;
-+	int ret;
-+
-+	EBUG_ON(btree_iter_type(iter) != BTREE_ITER_NODES);
-+	bch2_btree_iter_checks(iter);
-+
-+	if (iter->uptodate == BTREE_ITER_UPTODATE)
-+		return iter->l[iter->level].b;
-+
-+	ret = bch2_btree_iter_traverse(iter);
-+	if (ret)
-+		return NULL;
-+
-+	b = btree_iter_node(iter, iter->level);
-+	if (!b)
-+		return NULL;
-+
-+	BUG_ON(bkey_cmp(b->key.k.p, iter->pos) < 0);
-+
-+	iter->pos = b->key.k.p;
-+	iter->uptodate = BTREE_ITER_UPTODATE;
-+
-+	bch2_btree_iter_verify(iter);
-+
-+	return b;
-+}
-+
-+struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
-+{
-+	struct btree *b;
-+	int ret;
-+
-+	EBUG_ON(btree_iter_type(iter) != BTREE_ITER_NODES);
-+	bch2_btree_iter_checks(iter);
-+
-+	/* already got to end? */
-+	if (!btree_iter_node(iter, iter->level))
-+		return NULL;
-+
-+	bch2_trans_cond_resched(iter->trans);
-+
-+	btree_iter_up(iter);
-+
-+	if (!bch2_btree_node_relock(iter, iter->level))
-+		btree_iter_set_dirty(iter, BTREE_ITER_NEED_RELOCK);
-+
-+	ret = bch2_btree_iter_traverse(iter);
-+	if (ret)
-+		return NULL;
-+
-+	/* got to end? */
-+	b = btree_iter_node(iter, iter->level);
-+	if (!b)
-+		return NULL;
-+
-+	if (bkey_cmp(iter->pos, b->key.k.p) < 0) {
-+		/*
-+		 * Haven't gotten to the end of the parent node: go back down to
-+		 * the next child node
-+		 */
-+
-+		/*
-+		 * We don't really want to be unlocking here except we can't
-+		 * directly tell btree_iter_traverse() "traverse to this level"
-+		 * except by setting iter->level, so we have to unlock so we
-+		 * don't screw up our lock invariants:
-+		 */
-+		if (btree_node_read_locked(iter, iter->level))
-+			btree_node_unlock(iter, iter->level);
-+
-+		iter->pos	= bkey_successor(iter->pos);
-+		iter->level	= iter->min_depth;
-+
-+		btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
-+		ret = bch2_btree_iter_traverse(iter);
-+		if (ret)
-+			return NULL;
-+
-+		b = iter->l[iter->level].b;
-+	}
-+
-+	iter->pos = b->key.k.p;
-+	iter->uptodate = BTREE_ITER_UPTODATE;
-+
-+	bch2_btree_iter_verify(iter);
-+
-+	return b;
-+}
-+
-+/* Iterate across keys (in leaf nodes only) */
-+
-+void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *iter, struct bpos new_pos)
-+{
-+	struct btree_iter_level *l = &iter->l[0];
-+
-+	EBUG_ON(iter->level != 0);
-+	EBUG_ON(bkey_cmp(new_pos, iter->pos) < 0);
-+	EBUG_ON(!btree_node_locked(iter, 0));
-+	EBUG_ON(bkey_cmp(new_pos, l->b->key.k.p) > 0);
-+
-+	bkey_init(&iter->k);
-+	iter->k.p = iter->pos = new_pos;
-+	btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
-+
-+	btree_iter_advance_to_pos(iter, l, -1);
-+
-+	/*
-+	 * XXX:
-+	 * keeping a node locked that's outside (even just outside) iter->pos
-+	 * breaks __bch2_btree_node_lock(). This seems to only affect
-+	 * bch2_btree_node_get_sibling so for now it's fixed there, but we
-+	 * should try to get rid of this corner case.
-+	 *
-+	 * (this behaviour is currently needed for BTREE_INSERT_NOUNLOCK)
-+	 */
-+
-+	if (bch2_btree_node_iter_end(&l->iter) &&
-+	    btree_iter_pos_after_node(iter, l->b))
-+		btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
-+}
-+
-+static void btree_iter_pos_changed(struct btree_iter *iter, int cmp)
-+{
-+	unsigned l = iter->level;
-+
-+	if (!cmp)
-+		goto out;
-+
-+	if (unlikely(btree_iter_type(iter) == BTREE_ITER_CACHED)) {
-+		btree_node_unlock(iter, 0);
-+		iter->l[0].b = BTREE_ITER_NO_NODE_UP;
-+		btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
-+		return;
-+	}
-+
-+	l = btree_iter_up_until_good_node(iter, cmp);
-+
-+	if (btree_iter_node(iter, l)) {
-+		/*
-+		 * We might have to skip over many keys, or just a few: try
-+		 * advancing the node iterator, and if we have to skip over too
-+		 * many keys just reinit it (or if we're rewinding, since that
-+		 * is expensive).
-+		 */
-+		if (cmp < 0 ||
-+		    !btree_iter_advance_to_pos(iter, &iter->l[l], 8))
-+			__btree_iter_init(iter, l);
-+
-+		/* Don't leave it locked if we're not supposed to: */
-+		if (btree_lock_want(iter, l) == BTREE_NODE_UNLOCKED)
-+			btree_node_unlock(iter, l);
-+	}
-+out:
-+	if (l != iter->level)
-+		btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
-+	else
-+		btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
-+}
-+
-+void __bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos,
-+			       bool strictly_greater)
-+{
-+	struct bpos old = btree_iter_search_key(iter);
-+	int cmp;
-+
-+	iter->flags &= ~BTREE_ITER_IS_EXTENTS;
-+	iter->flags |= strictly_greater ? BTREE_ITER_IS_EXTENTS : 0;
-+
-+	bkey_init(&iter->k);
-+	iter->k.p = iter->pos = new_pos;
-+
-+	cmp = bkey_cmp(btree_iter_search_key(iter), old);
-+
-+	btree_iter_pos_changed(iter, cmp);
-+}
-+
-+void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
-+{
-+	int cmp = bkey_cmp(new_pos, iter->pos);
-+
-+	bkey_init(&iter->k);
-+	iter->k.p = iter->pos = new_pos;
-+
-+	btree_iter_pos_changed(iter, cmp);
-+}
-+
-+static inline bool btree_iter_set_pos_to_next_leaf(struct btree_iter *iter)
-+{
-+	struct btree_iter_level *l = &iter->l[0];
-+	bool ret;
-+
-+	bkey_init(&iter->k);
-+	iter->k.p = iter->pos = l->b->key.k.p;
-+
-+	ret = bkey_cmp(iter->pos, POS_MAX) != 0;
-+	if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS))
-+		iter->k.p = iter->pos = bkey_successor(iter->pos);
-+
-+	btree_iter_pos_changed(iter, 1);
-+	return ret;
-+}
-+
-+static inline bool btree_iter_set_pos_to_prev_leaf(struct btree_iter *iter)
-+{
-+	struct btree_iter_level *l = &iter->l[0];
-+	bool ret;
-+
-+	bkey_init(&iter->k);
-+	iter->k.p = iter->pos = l->b->data->min_key;
-+	iter->uptodate	= BTREE_ITER_NEED_TRAVERSE;
-+
-+	ret = bkey_cmp(iter->pos, POS_MIN) != 0;
-+	if (ret) {
-+		iter->k.p = iter->pos = bkey_predecessor(iter->pos);
-+
-+		if (iter->flags & BTREE_ITER_IS_EXTENTS)
-+			iter->k.p = iter->pos = bkey_predecessor(iter->pos);
-+	}
-+
-+	btree_iter_pos_changed(iter, -1);
-+	return ret;
-+}
-+
-+/**
-+ * btree_iter_peek_uptodate - given an iterator that is uptodate, return the key
-+ * it currently points to
-+ */
-+static inline struct bkey_s_c btree_iter_peek_uptodate(struct btree_iter *iter)
-+{
-+	struct btree_iter_level *l = &iter->l[0];
-+	struct bkey_s_c ret = { .k = &iter->k };
-+
-+	if (!bkey_deleted(&iter->k)) {
-+		struct bkey_packed *_k =
-+			__bch2_btree_node_iter_peek_all(&l->iter, l->b);
-+
-+		ret.v = bkeyp_val(&l->b->format, _k);
-+
-+		if (debug_check_iterators(iter->trans->c)) {
-+			struct bkey k = bkey_unpack_key(l->b, _k);
-+
-+			BUG_ON(memcmp(&k, &iter->k, sizeof(k)));
-+		}
-+
-+		if (debug_check_bkeys(iter->trans->c))
-+			bch2_bkey_debugcheck(iter->trans->c, l->b, ret);
-+	}
-+
-+	return ret;
-+}
-+
-+/**
-+ * bch2_btree_iter_peek: returns first key greater than or equal to iterator's
-+ * current position
-+ */
-+struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
-+{
-+	struct btree_iter_level *l = &iter->l[0];
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	EBUG_ON(btree_iter_type(iter) != BTREE_ITER_KEYS);
-+	bch2_btree_iter_checks(iter);
-+
-+	if (iter->uptodate == BTREE_ITER_UPTODATE &&
-+	    !bkey_deleted(&iter->k))
-+		return btree_iter_peek_uptodate(iter);
-+
-+	while (1) {
-+		ret = bch2_btree_iter_traverse(iter);
-+		if (unlikely(ret))
-+			return bkey_s_c_err(ret);
-+
-+		k = __btree_iter_peek(iter, l);
-+		if (likely(k.k))
-+			break;
-+
-+		if (!btree_iter_set_pos_to_next_leaf(iter))
-+			return bkey_s_c_null;
-+	}
-+
-+	/*
-+	 * iter->pos should always be equal to the key we just
-+	 * returned - except extents can straddle iter->pos:
-+	 */
-+	if (!(iter->flags & BTREE_ITER_IS_EXTENTS) ||
-+	    bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0)
-+		iter->pos = bkey_start_pos(k.k);
-+
-+	iter->uptodate = BTREE_ITER_UPTODATE;
-+
-+	bch2_btree_iter_verify_level(iter, 0);
-+	return k;
-+}
-+
-+/**
-+ * bch2_btree_iter_next: returns first key greater than iterator's current
-+ * position
-+ */
-+struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
-+{
-+	if (unlikely(!bkey_cmp(iter->k.p, POS_MAX)))
-+		return bkey_s_c_null;
-+
-+	bch2_btree_iter_set_pos(iter,
-+		(iter->flags & BTREE_ITER_IS_EXTENTS)
-+		? iter->k.p
-+		: bkey_successor(iter->k.p));
-+
-+	return bch2_btree_iter_peek(iter);
-+}
-+
-+static struct bkey_s_c __btree_trans_updates_peek(struct btree_iter *iter)
-+{
-+	struct bpos pos = btree_iter_search_key(iter);
-+	struct btree_trans *trans = iter->trans;
-+	struct btree_insert_entry *i;
-+
-+	trans_for_each_update2(trans, i)
-+		if ((cmp_int(iter->btree_id,	i->iter->btree_id) ?:
-+		     bkey_cmp(pos,		i->k->k.p)) <= 0)
-+			break;
-+
-+	return i < trans->updates2 + trans->nr_updates2 &&
-+		iter->btree_id == i->iter->btree_id
-+		? bkey_i_to_s_c(i->k)
-+		: bkey_s_c_null;
-+}
-+
-+static struct bkey_s_c __bch2_btree_iter_peek_with_updates(struct btree_iter *iter)
-+{
-+	struct btree_iter_level *l = &iter->l[0];
-+	struct bkey_s_c k = __btree_iter_peek(iter, l);
-+	struct bkey_s_c u = __btree_trans_updates_peek(iter);
-+
-+	if (k.k && (!u.k || bkey_cmp(k.k->p, u.k->p) < 0))
-+		return k;
-+	if (u.k && bkey_cmp(u.k->p, l->b->key.k.p) <= 0) {
-+		iter->k = *u.k;
-+		return u;
-+	}
-+	return bkey_s_c_null;
-+}
-+
-+struct bkey_s_c bch2_btree_iter_peek_with_updates(struct btree_iter *iter)
-+{
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	EBUG_ON(btree_iter_type(iter) != BTREE_ITER_KEYS);
-+	bch2_btree_iter_checks(iter);
-+
-+	while (1) {
-+		ret = bch2_btree_iter_traverse(iter);
-+		if (unlikely(ret))
-+			return bkey_s_c_err(ret);
-+
-+		k = __bch2_btree_iter_peek_with_updates(iter);
-+
-+		if (k.k && bkey_deleted(k.k)) {
-+			bch2_btree_iter_set_pos(iter,
-+				(iter->flags & BTREE_ITER_IS_EXTENTS)
-+				? iter->k.p
-+				: bkey_successor(iter->k.p));
-+			continue;
-+		}
-+
-+		if (likely(k.k))
-+			break;
-+
-+		if (!btree_iter_set_pos_to_next_leaf(iter))
-+			return bkey_s_c_null;
-+	}
-+
-+	/*
-+	 * iter->pos should always be equal to the key we just
-+	 * returned - except extents can straddle iter->pos:
-+	 */
-+	if (!(iter->flags & BTREE_ITER_IS_EXTENTS) ||
-+	    bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0)
-+		iter->pos = bkey_start_pos(k.k);
-+
-+	iter->uptodate = BTREE_ITER_UPTODATE;
-+	return k;
-+}
-+
-+struct bkey_s_c bch2_btree_iter_next_with_updates(struct btree_iter *iter)
-+{
-+	if (unlikely(!bkey_cmp(iter->k.p, POS_MAX)))
-+		return bkey_s_c_null;
-+
-+	bch2_btree_iter_set_pos(iter,
-+		(iter->flags & BTREE_ITER_IS_EXTENTS)
-+		? iter->k.p
-+		: bkey_successor(iter->k.p));
-+
-+	return bch2_btree_iter_peek_with_updates(iter);
-+}
-+
-+/**
-+ * bch2_btree_iter_peek_prev: returns first key less than or equal to
-+ * iterator's current position
-+ */
-+struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
-+{
-+	struct bpos pos = iter->pos;
-+	struct btree_iter_level *l = &iter->l[0];
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	EBUG_ON(btree_iter_type(iter) != BTREE_ITER_KEYS);
-+	bch2_btree_iter_checks(iter);
-+
-+	if (iter->uptodate == BTREE_ITER_UPTODATE &&
-+	    !bkey_deleted(&iter->k))
-+		return btree_iter_peek_uptodate(iter);
-+
-+	while (1) {
-+		ret = bch2_btree_iter_traverse(iter);
-+		if (unlikely(ret))
-+			return bkey_s_c_err(ret);
-+
-+		k = __btree_iter_peek(iter, l);
-+		if (!k.k || bkey_cmp(bkey_start_pos(k.k), pos) > 0)
-+			k = __btree_iter_prev(iter, l);
-+
-+		if (likely(k.k))
-+			break;
-+
-+		if (!btree_iter_set_pos_to_prev_leaf(iter))
-+			return bkey_s_c_null;
-+	}
-+
-+	EBUG_ON(bkey_cmp(bkey_start_pos(k.k), pos) > 0);
-+	iter->pos	= bkey_start_pos(k.k);
-+	iter->uptodate	= BTREE_ITER_UPTODATE;
-+	return k;
-+}
-+
-+/**
-+ * bch2_btree_iter_prev: returns first key less than iterator's current
-+ * position
-+ */
-+struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter)
-+{
-+	struct bpos pos = bkey_start_pos(&iter->k);
-+
-+	EBUG_ON(btree_iter_type(iter) != BTREE_ITER_KEYS);
-+	bch2_btree_iter_checks(iter);
-+
-+	if (unlikely(!bkey_cmp(pos, POS_MIN)))
-+		return bkey_s_c_null;
-+
-+	bch2_btree_iter_set_pos(iter, bkey_predecessor(pos));
-+
-+	return bch2_btree_iter_peek_prev(iter);
-+}
-+
-+static inline struct bkey_s_c
-+__bch2_btree_iter_peek_slot_extents(struct btree_iter *iter)
-+{
-+	struct btree_iter_level *l = &iter->l[0];
-+	struct btree_node_iter node_iter;
-+	struct bkey_s_c k;
-+	struct bkey n;
-+	int ret;
-+
-+	/* keys & holes can't span inode numbers: */
-+	if (iter->pos.offset == KEY_OFFSET_MAX) {
-+		if (iter->pos.inode == KEY_INODE_MAX)
-+			return bkey_s_c_null;
-+
-+		bch2_btree_iter_set_pos(iter, bkey_successor(iter->pos));
-+
-+		ret = bch2_btree_iter_traverse(iter);
-+		if (unlikely(ret))
-+			return bkey_s_c_err(ret);
-+	}
-+
-+	/*
-+	 * iterator is now at the correct position for inserting at iter->pos,
-+	 * but we need to keep iterating until we find the first non whiteout so
-+	 * we know how big a hole we have, if any:
-+	 */
-+
-+	node_iter = l->iter;
-+	k = __btree_iter_unpack(iter, l, &iter->k,
-+		bch2_btree_node_iter_peek(&node_iter, l->b));
-+
-+	if (k.k && bkey_cmp(bkey_start_pos(k.k), iter->pos) <= 0) {
-+		/*
-+		 * We're not setting iter->uptodate because the node iterator
-+		 * doesn't necessarily point at the key we're returning:
-+		 */
-+
-+		EBUG_ON(bkey_cmp(k.k->p, iter->pos) <= 0);
-+		bch2_btree_iter_verify_level(iter, 0);
-+		return k;
-+	}
-+
-+	/* hole */
-+
-+	if (!k.k)
-+		k.k = &l->b->key.k;
-+
-+	bkey_init(&n);
-+	n.p = iter->pos;
-+	bch2_key_resize(&n,
-+			min_t(u64, KEY_SIZE_MAX,
-+			      (k.k->p.inode == n.p.inode
-+			       ? bkey_start_offset(k.k)
-+			       : KEY_OFFSET_MAX) -
-+			      n.p.offset));
-+
-+	EBUG_ON(!n.size);
-+
-+	iter->k	= n;
-+	iter->uptodate = BTREE_ITER_UPTODATE;
-+
-+	bch2_btree_iter_verify_level(iter, 0);
-+	return (struct bkey_s_c) { &iter->k, NULL };
-+}
-+
-+struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
-+{
-+	struct btree_iter_level *l = &iter->l[0];
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	EBUG_ON(btree_iter_type(iter) != BTREE_ITER_KEYS);
-+	bch2_btree_iter_checks(iter);
-+
-+	if (iter->uptodate == BTREE_ITER_UPTODATE)
-+		return btree_iter_peek_uptodate(iter);
-+
-+	ret = bch2_btree_iter_traverse(iter);
-+	if (unlikely(ret))
-+		return bkey_s_c_err(ret);
-+
-+	if (iter->flags & BTREE_ITER_IS_EXTENTS)
-+		return __bch2_btree_iter_peek_slot_extents(iter);
-+
-+	k = __btree_iter_peek_all(iter, l, &iter->k);
-+
-+	EBUG_ON(k.k && bkey_deleted(k.k) && bkey_cmp(k.k->p, iter->pos) == 0);
-+
-+	if (!k.k || bkey_cmp(iter->pos, k.k->p)) {
-+		/* hole */
-+		bkey_init(&iter->k);
-+		iter->k.p = iter->pos;
-+		k = (struct bkey_s_c) { &iter->k, NULL };
-+	}
-+
-+	iter->uptodate = BTREE_ITER_UPTODATE;
-+	bch2_btree_iter_verify_level(iter, 0);
-+	return k;
-+}
-+
-+struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *iter)
-+{
-+	if (unlikely(!bkey_cmp(iter->k.p, POS_MAX)))
-+		return bkey_s_c_null;
-+
-+	bch2_btree_iter_set_pos(iter,
-+		(iter->flags & BTREE_ITER_IS_EXTENTS)
-+		? iter->k.p
-+		: bkey_successor(iter->k.p));
-+
-+	return bch2_btree_iter_peek_slot(iter);
-+}
-+
-+struct bkey_s_c bch2_btree_iter_peek_cached(struct btree_iter *iter)
-+{
-+	struct bkey_cached *ck;
-+	int ret;
-+
-+	EBUG_ON(btree_iter_type(iter) != BTREE_ITER_CACHED);
-+	bch2_btree_iter_checks(iter);
-+
-+	ret = bch2_btree_iter_traverse(iter);
-+	if (unlikely(ret))
-+		return bkey_s_c_err(ret);
-+
-+	ck = (void *) iter->l[0].b;
-+
-+	EBUG_ON(iter->btree_id != ck->key.btree_id ||
-+		bkey_cmp(iter->pos, ck->key.pos));
-+	BUG_ON(!ck->valid);
-+
-+	return bkey_i_to_s_c(ck->k);
-+}
-+
-+static inline void bch2_btree_iter_init(struct btree_trans *trans,
-+			struct btree_iter *iter, enum btree_id btree_id,
-+			struct bpos pos, unsigned flags)
-+{
-+	struct bch_fs *c = trans->c;
-+	unsigned i;
-+
-+	if (btree_node_type_is_extents(btree_id) &&
-+	    !(flags & BTREE_ITER_NODES))
-+		flags |= BTREE_ITER_IS_EXTENTS;
-+
-+	iter->trans			= trans;
-+	iter->pos			= pos;
-+	bkey_init(&iter->k);
-+	iter->k.p			= pos;
-+	iter->flags			= flags;
-+	iter->uptodate			= BTREE_ITER_NEED_TRAVERSE;
-+	iter->btree_id			= btree_id;
-+	iter->level			= 0;
-+	iter->min_depth			= 0;
-+	iter->locks_want		= flags & BTREE_ITER_INTENT ? 1 : 0;
-+	iter->nodes_locked		= 0;
-+	iter->nodes_intent_locked	= 0;
-+	for (i = 0; i < ARRAY_SIZE(iter->l); i++)
-+		iter->l[i].b		= BTREE_ITER_NO_NODE_INIT;
-+
-+	prefetch(c->btree_roots[btree_id].b);
-+}
-+
-+/* new transactional stuff: */
-+
-+static inline void __bch2_trans_iter_free(struct btree_trans *trans,
-+					  unsigned idx)
-+{
-+	__bch2_btree_iter_unlock(&trans->iters[idx]);
-+	trans->iters_linked		&= ~(1ULL << idx);
-+	trans->iters_live		&= ~(1ULL << idx);
-+	trans->iters_touched		&= ~(1ULL << idx);
-+}
-+
-+int bch2_trans_iter_put(struct btree_trans *trans,
-+			struct btree_iter *iter)
-+{
-+	int ret;
-+
-+	if (IS_ERR_OR_NULL(iter))
-+		return 0;
-+
-+	BUG_ON(trans->iters + iter->idx != iter);
-+
-+	ret = btree_iter_err(iter);
-+
-+	if (!(trans->iters_touched & (1ULL << iter->idx)) &&
-+	    !(iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT))
-+		__bch2_trans_iter_free(trans, iter->idx);
-+
-+	trans->iters_live	&= ~(1ULL << iter->idx);
-+	return ret;
-+}
-+
-+int bch2_trans_iter_free(struct btree_trans *trans,
-+			 struct btree_iter *iter)
-+{
-+	if (IS_ERR_OR_NULL(iter))
-+		return 0;
-+
-+	trans->iters_touched &= ~(1ULL << iter->idx);
-+
-+	return bch2_trans_iter_put(trans, iter);
-+}
-+
-+static int bch2_trans_realloc_iters(struct btree_trans *trans,
-+				    unsigned new_size)
-+{
-+	void *p, *new_iters, *new_updates, *new_updates2;
-+	size_t iters_bytes;
-+	size_t updates_bytes;
-+
-+	new_size = roundup_pow_of_two(new_size);
-+
-+	BUG_ON(new_size > BTREE_ITER_MAX);
-+
-+	if (new_size <= trans->size)
-+		return 0;
-+
-+	BUG_ON(trans->used_mempool);
-+
-+	bch2_trans_unlock(trans);
-+
-+	iters_bytes	= sizeof(struct btree_iter) * new_size;
-+	updates_bytes	= sizeof(struct btree_insert_entry) * new_size;
-+
-+	p = kmalloc(iters_bytes +
-+		    updates_bytes +
-+		    updates_bytes, GFP_NOFS);
-+	if (p)
-+		goto success;
-+
-+	p = mempool_alloc(&trans->c->btree_iters_pool, GFP_NOFS);
-+	new_size = BTREE_ITER_MAX;
-+
-+	trans->used_mempool = true;
-+success:
-+	new_iters	= p; p += iters_bytes;
-+	new_updates	= p; p += updates_bytes;
-+	new_updates2	= p; p += updates_bytes;
-+
-+	memcpy(new_iters, trans->iters,
-+	       sizeof(struct btree_iter) * trans->nr_iters);
-+	memcpy(new_updates, trans->updates,
-+	       sizeof(struct btree_insert_entry) * trans->nr_updates);
-+	memcpy(new_updates2, trans->updates2,
-+	       sizeof(struct btree_insert_entry) * trans->nr_updates2);
-+
-+	if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
-+		memset(trans->iters, POISON_FREE,
-+		       sizeof(struct btree_iter) * trans->nr_iters +
-+		       sizeof(struct btree_insert_entry) * trans->nr_iters);
-+
-+	if (trans->iters != trans->iters_onstack)
-+		kfree(trans->iters);
-+
-+	trans->iters		= new_iters;
-+	trans->updates		= new_updates;
-+	trans->updates2		= new_updates2;
-+	trans->size		= new_size;
-+
-+	if (trans->iters_live) {
-+		trace_trans_restart_iters_realloced(trans->ip, trans->size);
-+		return -EINTR;
-+	}
-+
-+	return 0;
-+}
-+
-+static struct btree_iter *btree_trans_iter_alloc(struct btree_trans *trans)
-+{
-+	unsigned idx = __ffs64(~trans->iters_linked);
-+
-+	if (idx < trans->nr_iters)
-+		goto got_slot;
-+
-+	if (trans->nr_iters == trans->size) {
-+		int ret;
-+
-+		if (trans->nr_iters >= BTREE_ITER_MAX) {
-+			struct btree_iter *iter;
-+
-+			trans_for_each_iter(trans, iter) {
-+				pr_err("iter: btree %s pos %llu:%llu%s%s%s %ps",
-+				       bch2_btree_ids[iter->btree_id],
-+				       iter->pos.inode,
-+				       iter->pos.offset,
-+				       (trans->iters_live & (1ULL << iter->idx)) ? " live" : "",
-+				       (trans->iters_touched & (1ULL << iter->idx)) ? " touched" : "",
-+				       iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT ? " keep" : "",
-+				       (void *) iter->ip_allocated);
-+			}
-+
-+			panic("trans iter oveflow\n");
-+		}
-+
-+		ret = bch2_trans_realloc_iters(trans, trans->size * 2);
-+		if (ret)
-+			return ERR_PTR(ret);
-+	}
-+
-+	idx = trans->nr_iters++;
-+	BUG_ON(trans->nr_iters > trans->size);
-+
-+	trans->iters[idx].idx = idx;
-+got_slot:
-+	BUG_ON(trans->iters_linked & (1ULL << idx));
-+	trans->iters_linked |= 1ULL << idx;
-+	trans->iters[idx].flags = 0;
-+	return &trans->iters[idx];
-+}
-+
-+static inline void btree_iter_copy(struct btree_iter *dst,
-+				   struct btree_iter *src)
-+{
-+	unsigned i, idx = dst->idx;
-+
-+	*dst = *src;
-+	dst->idx = idx;
-+	dst->flags &= ~BTREE_ITER_KEEP_UNTIL_COMMIT;
-+
-+	for (i = 0; i < BTREE_MAX_DEPTH; i++)
-+		if (btree_node_locked(dst, i))
-+			six_lock_increment(&dst->l[i].b->c.lock,
-+					   __btree_lock_want(dst, i));
-+
-+	dst->flags &= ~BTREE_ITER_KEEP_UNTIL_COMMIT;
-+	dst->flags &= ~BTREE_ITER_SET_POS_AFTER_COMMIT;
-+}
-+
-+static inline struct bpos bpos_diff(struct bpos l, struct bpos r)
-+{
-+	if (bkey_cmp(l, r) > 0)
-+		swap(l, r);
-+
-+	return POS(r.inode - l.inode, r.offset - l.offset);
-+}
-+
-+static struct btree_iter *__btree_trans_get_iter(struct btree_trans *trans,
-+						 unsigned btree_id, struct bpos pos,
-+						 unsigned flags)
-+{
-+	struct btree_iter *iter, *best = NULL;
-+
-+	BUG_ON(trans->nr_iters > BTREE_ITER_MAX);
-+
-+	trans_for_each_iter(trans, iter) {
-+		if (btree_iter_type(iter) != (flags & BTREE_ITER_TYPE))
-+			continue;
-+
-+		if (iter->btree_id != btree_id)
-+			continue;
-+
-+		if (best &&
-+		    bkey_cmp(bpos_diff(best->pos, pos),
-+			     bpos_diff(iter->pos, pos)) < 0)
-+			continue;
-+
-+		best = iter;
-+	}
-+
-+	if (!best) {
-+		iter = btree_trans_iter_alloc(trans);
-+		if (IS_ERR(iter))
-+			return iter;
-+
-+		bch2_btree_iter_init(trans, iter, btree_id, pos, flags);
-+	} else if ((trans->iters_live & (1ULL << best->idx)) ||
-+		   (best->flags & BTREE_ITER_KEEP_UNTIL_COMMIT)) {
-+		iter = btree_trans_iter_alloc(trans);
-+		if (IS_ERR(iter))
-+			return iter;
-+
-+		btree_iter_copy(iter, best);
-+	} else {
-+		iter = best;
-+	}
-+
-+	iter->flags &= ~BTREE_ITER_KEEP_UNTIL_COMMIT;
-+	iter->flags &= ~BTREE_ITER_USER_FLAGS;
-+	iter->flags |= flags & BTREE_ITER_USER_FLAGS;
-+
-+	if (iter->flags & BTREE_ITER_INTENT)
-+		bch2_btree_iter_upgrade(iter, 1);
-+	else
-+		bch2_btree_iter_downgrade(iter);
-+
-+	BUG_ON(iter->btree_id != btree_id);
-+	BUG_ON((iter->flags ^ flags) & BTREE_ITER_TYPE);
-+	BUG_ON(iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT);
-+	BUG_ON(iter->flags & BTREE_ITER_SET_POS_AFTER_COMMIT);
-+	BUG_ON(trans->iters_live & (1ULL << iter->idx));
-+
-+	trans->iters_live	|= 1ULL << iter->idx;
-+	trans->iters_touched	|= 1ULL << iter->idx;
-+
-+	return iter;
-+}
-+
-+struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
-+					 enum btree_id btree_id,
-+					 struct bpos pos, unsigned flags)
-+{
-+	struct btree_iter *iter =
-+		__btree_trans_get_iter(trans, btree_id, pos, flags);
-+
-+	if (!IS_ERR(iter))
-+		__bch2_btree_iter_set_pos(iter, pos,
-+			btree_node_type_is_extents(btree_id));
-+	return iter;
-+}
-+
-+struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *trans,
-+					    enum btree_id btree_id,
-+					    struct bpos pos,
-+					    unsigned locks_want,
-+					    unsigned depth,
-+					    unsigned flags)
-+{
-+	struct btree_iter *iter =
-+		__btree_trans_get_iter(trans, btree_id, pos,
-+				       flags|BTREE_ITER_NODES);
-+	unsigned i;
-+
-+	BUG_ON(IS_ERR(iter));
-+	BUG_ON(bkey_cmp(iter->pos, pos));
-+
-+	iter->locks_want = locks_want;
-+	iter->level	= depth;
-+	iter->min_depth	= depth;
-+
-+	for (i = 0; i < ARRAY_SIZE(iter->l); i++)
-+		iter->l[i].b		= NULL;
-+	iter->l[iter->level].b		= BTREE_ITER_NO_NODE_INIT;
-+
-+	return iter;
-+}
-+
-+struct btree_iter *__bch2_trans_copy_iter(struct btree_trans *trans,
-+					struct btree_iter *src)
-+{
-+	struct btree_iter *iter;
-+
-+	iter = btree_trans_iter_alloc(trans);
-+	if (IS_ERR(iter))
-+		return iter;
-+
-+	btree_iter_copy(iter, src);
-+
-+	trans->iters_live |= 1ULL << iter->idx;
-+	/*
-+	 * We don't need to preserve this iter since it's cheap to copy it
-+	 * again - this will cause trans_iter_put() to free it right away:
-+	 */
-+	trans->iters_touched &= ~(1ULL << iter->idx);
-+
-+	return iter;
-+}
-+
-+static int bch2_trans_preload_mem(struct btree_trans *trans, size_t size)
-+{
-+	if (size > trans->mem_bytes) {
-+		size_t old_bytes = trans->mem_bytes;
-+		size_t new_bytes = roundup_pow_of_two(size);
-+		void *new_mem = krealloc(trans->mem, new_bytes, GFP_NOFS);
-+
-+		if (!new_mem)
-+			return -ENOMEM;
-+
-+		trans->mem = new_mem;
-+		trans->mem_bytes = new_bytes;
-+
-+		if (old_bytes) {
-+			trace_trans_restart_mem_realloced(trans->ip, new_bytes);
-+			return -EINTR;
-+		}
-+	}
-+
-+	return 0;
-+}
-+
-+void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
-+{
-+	void *p;
-+	int ret;
-+
-+	ret = bch2_trans_preload_mem(trans, trans->mem_top + size);
-+	if (ret)
-+		return ERR_PTR(ret);
-+
-+	p = trans->mem + trans->mem_top;
-+	trans->mem_top += size;
-+	return p;
-+}
-+
-+inline void bch2_trans_unlink_iters(struct btree_trans *trans)
-+{
-+	u64 iters = trans->iters_linked &
-+		~trans->iters_touched &
-+		~trans->iters_live;
-+
-+	while (iters) {
-+		unsigned idx = __ffs64(iters);
-+
-+		iters &= ~(1ULL << idx);
-+		__bch2_trans_iter_free(trans, idx);
-+	}
-+}
-+
-+void bch2_trans_reset(struct btree_trans *trans, unsigned flags)
-+{
-+	struct btree_iter *iter;
-+
-+	trans_for_each_iter(trans, iter)
-+		iter->flags &= ~(BTREE_ITER_KEEP_UNTIL_COMMIT|
-+				 BTREE_ITER_SET_POS_AFTER_COMMIT);
-+
-+	bch2_trans_unlink_iters(trans);
-+
-+	trans->iters_touched &= trans->iters_live;
-+
-+	trans->need_reset		= 0;
-+	trans->nr_updates		= 0;
-+	trans->nr_updates2		= 0;
-+	trans->mem_top			= 0;
-+
-+	trans->extra_journal_entries	= NULL;
-+	trans->extra_journal_entry_u64s	= 0;
-+
-+	if (trans->fs_usage_deltas) {
-+		trans->fs_usage_deltas->used = 0;
-+		memset(&trans->fs_usage_deltas->memset_start, 0,
-+		       (void *) &trans->fs_usage_deltas->memset_end -
-+		       (void *) &trans->fs_usage_deltas->memset_start);
-+	}
-+
-+	if (!(flags & TRANS_RESET_NOTRAVERSE))
-+		bch2_btree_iter_traverse_all(trans);
-+}
-+
-+void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
-+		     unsigned expected_nr_iters,
-+		     size_t expected_mem_bytes)
-+{
-+	memset(trans, 0, offsetof(struct btree_trans, iters_onstack));
-+
-+	/*
-+	 * reallocating iterators currently completely breaks
-+	 * bch2_trans_iter_put():
-+	 */
-+	expected_nr_iters = BTREE_ITER_MAX;
-+
-+	trans->c		= c;
-+	trans->ip		= _RET_IP_;
-+	trans->size		= ARRAY_SIZE(trans->iters_onstack);
-+	trans->iters		= trans->iters_onstack;
-+	trans->updates		= trans->updates_onstack;
-+	trans->updates2		= trans->updates2_onstack;
-+	trans->fs_usage_deltas	= NULL;
-+
-+	if (expected_nr_iters > trans->size)
-+		bch2_trans_realloc_iters(trans, expected_nr_iters);
-+
-+	if (expected_mem_bytes)
-+		bch2_trans_preload_mem(trans, expected_mem_bytes);
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	trans->pid = current->pid;
-+	mutex_lock(&c->btree_trans_lock);
-+	list_add(&trans->list, &c->btree_trans_list);
-+	mutex_unlock(&c->btree_trans_lock);
-+#endif
-+}
-+
-+int bch2_trans_exit(struct btree_trans *trans)
-+{
-+	bch2_trans_unlock(trans);
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	mutex_lock(&trans->c->btree_trans_lock);
-+	list_del(&trans->list);
-+	mutex_unlock(&trans->c->btree_trans_lock);
-+#endif
-+
-+	bch2_journal_preres_put(&trans->c->journal, &trans->journal_preres);
-+
-+	kfree(trans->fs_usage_deltas);
-+	kfree(trans->mem);
-+	if (trans->used_mempool)
-+		mempool_free(trans->iters, &trans->c->btree_iters_pool);
-+	else if (trans->iters != trans->iters_onstack)
-+		kfree(trans->iters);
-+	trans->mem	= (void *) 0x1;
-+	trans->iters	= (void *) 0x1;
-+
-+	return trans->error ? -EIO : 0;
-+}
-+
-+static void bch2_btree_iter_node_to_text(struct printbuf *out,
-+				 struct btree_bkey_cached_common *_b,
-+				 enum btree_iter_type type)
-+{
-+	pr_buf(out, "    %px l=%u %s:",
-+	       _b, _b->level, bch2_btree_ids[_b->btree_id]);
-+	bch2_bpos_to_text(out, btree_node_pos(_b, type));
-+}
-+
-+void bch2_btree_trans_to_text(struct printbuf *out, struct bch_fs *c)
-+{
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	struct btree_trans *trans;
-+	struct btree_iter *iter;
-+	struct btree *b;
-+	unsigned l;
-+
-+	mutex_lock(&c->btree_trans_lock);
-+	list_for_each_entry(trans, &c->btree_trans_list, list) {
-+		pr_buf(out, "%i %px %ps\n", trans->pid, trans, (void *) trans->ip);
-+
-+		trans_for_each_iter(trans, iter) {
-+			if (!iter->nodes_locked)
-+				continue;
-+
-+			pr_buf(out, "  iter %u %s:",
-+			       iter->idx,
-+			       bch2_btree_ids[iter->btree_id]);
-+			bch2_bpos_to_text(out, iter->pos);
-+			pr_buf(out, "\n");
-+
-+			for (l = 0; l < BTREE_MAX_DEPTH; l++) {
-+				if (btree_node_locked(iter, l)) {
-+					pr_buf(out, "    %s l=%u ",
-+					       btree_node_intent_locked(iter, l) ? "i" : "r", l);
-+					bch2_btree_iter_node_to_text(out,
-+							(void *) iter->l[l].b,
-+							btree_iter_type(iter));
-+					pr_buf(out, "\n");
-+				}
-+			}
-+		}
-+
-+		b = READ_ONCE(trans->locking);
-+		if (b) {
-+			pr_buf(out, "  locking iter %u l=%u %s:",
-+			       trans->locking_iter_idx,
-+			       trans->locking_level,
-+			       bch2_btree_ids[trans->locking_btree_id]);
-+			bch2_bpos_to_text(out, trans->locking_pos);
-+
-+
-+			pr_buf(out, " node ");
-+			bch2_btree_iter_node_to_text(out,
-+					(void *) b,
-+					btree_iter_type(&trans->iters[trans->locking_iter_idx]));
-+			pr_buf(out, "\n");
-+		}
-+	}
-+	mutex_unlock(&c->btree_trans_lock);
-+#endif
-+}
-+
-+void bch2_fs_btree_iter_exit(struct bch_fs *c)
-+{
-+	mempool_exit(&c->btree_iters_pool);
-+}
-+
-+int bch2_fs_btree_iter_init(struct bch_fs *c)
-+{
-+	unsigned nr = BTREE_ITER_MAX;
-+
-+	INIT_LIST_HEAD(&c->btree_trans_list);
-+	mutex_init(&c->btree_trans_lock);
-+
-+	return mempool_init_kmalloc_pool(&c->btree_iters_pool, 1,
-+			sizeof(struct btree_iter) * nr +
-+			sizeof(struct btree_insert_entry) * nr +
-+			sizeof(struct btree_insert_entry) * nr);
-+}
-diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h
-new file mode 100644
-index 000000000000..bd9ec3ec9a92
---- /dev/null
-+++ b/fs/bcachefs/btree_iter.h
-@@ -0,0 +1,314 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BTREE_ITER_H
-+#define _BCACHEFS_BTREE_ITER_H
-+
-+#include "bset.h"
-+#include "btree_types.h"
-+
-+static inline void btree_iter_set_dirty(struct btree_iter *iter,
-+					enum btree_iter_uptodate u)
-+{
-+	iter->uptodate = max_t(unsigned, iter->uptodate, u);
-+}
-+
-+static inline struct btree *btree_iter_node(struct btree_iter *iter,
-+					    unsigned level)
-+{
-+	return level < BTREE_MAX_DEPTH ? iter->l[level].b : NULL;
-+}
-+
-+static inline bool btree_node_lock_seq_matches(const struct btree_iter *iter,
-+					const struct btree *b, unsigned level)
-+{
-+	/*
-+	 * We don't compare the low bits of the lock sequence numbers because
-+	 * @iter might have taken a write lock on @b, and we don't want to skip
-+	 * the linked iterator if the sequence numbers were equal before taking
-+	 * that write lock. The lock sequence number is incremented by taking
-+	 * and releasing write locks and is even when unlocked:
-+	 */
-+	return iter->l[level].lock_seq >> 1 == b->c.lock.state.seq >> 1;
-+}
-+
-+static inline struct btree *btree_node_parent(struct btree_iter *iter,
-+					      struct btree *b)
-+{
-+	return btree_iter_node(iter, b->c.level + 1);
-+}
-+
-+static inline bool btree_trans_has_multiple_iters(const struct btree_trans *trans)
-+{
-+	return hweight64(trans->iters_linked) > 1;
-+}
-+
-+static inline int btree_iter_err(const struct btree_iter *iter)
-+{
-+	return iter->flags & BTREE_ITER_ERROR ? -EIO : 0;
-+}
-+
-+/* Iterate over iters within a transaction: */
-+
-+#define trans_for_each_iter_all(_trans, _iter)				\
-+	for (_iter = (_trans)->iters;					\
-+	     _iter < (_trans)->iters + (_trans)->nr_iters;		\
-+	     _iter++)
-+
-+static inline struct btree_iter *
-+__trans_next_iter(struct btree_trans *trans, unsigned idx)
-+{
-+	EBUG_ON(idx < trans->nr_iters && trans->iters[idx].idx != idx);
-+
-+	for (; idx < trans->nr_iters; idx++)
-+		if (trans->iters_linked & (1ULL << idx))
-+			return &trans->iters[idx];
-+
-+	return NULL;
-+}
-+
-+#define trans_for_each_iter(_trans, _iter)				\
-+	for (_iter = __trans_next_iter((_trans), 0);			\
-+	     (_iter);							\
-+	     _iter = __trans_next_iter((_trans), (_iter)->idx + 1))
-+
-+static inline bool __iter_has_node(const struct btree_iter *iter,
-+				   const struct btree *b)
-+{
-+	return iter->l[b->c.level].b == b &&
-+		btree_node_lock_seq_matches(iter, b, b->c.level);
-+}
-+
-+static inline struct btree_iter *
-+__trans_next_iter_with_node(struct btree_trans *trans, struct btree *b,
-+			    unsigned idx)
-+{
-+	struct btree_iter *iter = __trans_next_iter(trans, idx);
-+
-+	while (iter && !__iter_has_node(iter, b))
-+		iter = __trans_next_iter(trans, iter->idx + 1);
-+
-+	return iter;
-+}
-+
-+#define trans_for_each_iter_with_node(_trans, _b, _iter)		\
-+	for (_iter = __trans_next_iter_with_node((_trans), (_b), 0);	\
-+	     (_iter);							\
-+	     _iter = __trans_next_iter_with_node((_trans), (_b),	\
-+						 (_iter)->idx + 1))
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+void bch2_btree_trans_verify_iters(struct btree_trans *, struct btree *);
-+void bch2_btree_trans_verify_locks(struct btree_trans *);
-+#else
-+static inline void bch2_btree_trans_verify_iters(struct btree_trans *trans,
-+						 struct btree *b) {}
-+static inline void bch2_btree_trans_verify_locks(struct btree_trans *iter) {}
-+#endif
-+
-+void bch2_btree_iter_fix_key_modified(struct btree_iter *, struct btree *,
-+					   struct bkey_packed *);
-+void bch2_btree_node_iter_fix(struct btree_iter *, struct btree *,
-+			      struct btree_node_iter *, struct bkey_packed *,
-+			      unsigned, unsigned);
-+
-+bool bch2_btree_iter_relock(struct btree_iter *, bool);
-+bool bch2_trans_relock(struct btree_trans *);
-+void bch2_trans_unlock(struct btree_trans *);
-+
-+bool __bch2_btree_iter_upgrade(struct btree_iter *, unsigned);
-+bool __bch2_btree_iter_upgrade_nounlock(struct btree_iter *, unsigned);
-+
-+static inline bool bch2_btree_iter_upgrade(struct btree_iter *iter,
-+					   unsigned new_locks_want)
-+{
-+	new_locks_want = min(new_locks_want, BTREE_MAX_DEPTH);
-+
-+	return iter->locks_want < new_locks_want
-+		? (!iter->trans->nounlock
-+		   ? __bch2_btree_iter_upgrade(iter, new_locks_want)
-+		   : __bch2_btree_iter_upgrade_nounlock(iter, new_locks_want))
-+		: iter->uptodate <= BTREE_ITER_NEED_PEEK;
-+}
-+
-+void __bch2_btree_iter_downgrade(struct btree_iter *, unsigned);
-+
-+static inline void bch2_btree_iter_downgrade(struct btree_iter *iter)
-+{
-+	if (iter->locks_want > (iter->flags & BTREE_ITER_INTENT) ? 1 : 0)
-+		__bch2_btree_iter_downgrade(iter, 0);
-+}
-+
-+void bch2_trans_downgrade(struct btree_trans *);
-+
-+void bch2_btree_iter_node_replace(struct btree_iter *, struct btree *);
-+void bch2_btree_iter_node_drop(struct btree_iter *, struct btree *);
-+
-+void bch2_btree_iter_reinit_node(struct btree_iter *, struct btree *);
-+
-+int __must_check __bch2_btree_iter_traverse(struct btree_iter *);
-+
-+static inline int __must_check
-+bch2_btree_iter_traverse(struct btree_iter *iter)
-+{
-+	return iter->uptodate >= BTREE_ITER_NEED_RELOCK
-+		? __bch2_btree_iter_traverse(iter)
-+		: 0;
-+}
-+
-+int bch2_btree_iter_traverse_all(struct btree_trans *);
-+
-+struct btree *bch2_btree_iter_peek_node(struct btree_iter *);
-+struct btree *bch2_btree_iter_next_node(struct btree_iter *);
-+
-+struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *);
-+struct bkey_s_c bch2_btree_iter_next(struct btree_iter *);
-+
-+struct bkey_s_c bch2_btree_iter_peek_with_updates(struct btree_iter *);
-+struct bkey_s_c bch2_btree_iter_next_with_updates(struct btree_iter *);
-+
-+struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *);
-+struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *);
-+
-+struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *);
-+struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *);
-+
-+struct bkey_s_c bch2_btree_iter_peek_cached(struct btree_iter *);
-+
-+void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *, struct bpos);
-+void __bch2_btree_iter_set_pos(struct btree_iter *, struct bpos, bool);
-+void bch2_btree_iter_set_pos(struct btree_iter *, struct bpos);
-+
-+static inline int btree_iter_cmp(const struct btree_iter *l,
-+				 const struct btree_iter *r)
-+{
-+	return   cmp_int(l->btree_id, r->btree_id) ?:
-+		-cmp_int(btree_iter_type(l), btree_iter_type(r)) ?:
-+		 bkey_cmp(l->pos, r->pos);
-+}
-+
-+/*
-+ * Unlocks before scheduling
-+ * Note: does not revalidate iterator
-+ */
-+static inline int bch2_trans_cond_resched(struct btree_trans *trans)
-+{
-+	if (need_resched() || race_fault()) {
-+		bch2_trans_unlock(trans);
-+		schedule();
-+		return bch2_trans_relock(trans) ? 0 : -EINTR;
-+	} else {
-+		return 0;
-+	}
-+}
-+
-+#define __for_each_btree_node(_trans, _iter, _btree_id, _start,	\
-+			      _locks_want, _depth, _flags, _b)		\
-+	for (iter = bch2_trans_get_node_iter((_trans), (_btree_id),	\
-+				_start, _locks_want, _depth, _flags),	\
-+	     _b = bch2_btree_iter_peek_node(_iter);			\
-+	     (_b);							\
-+	     (_b) = bch2_btree_iter_next_node(_iter))
-+
-+#define for_each_btree_node(_trans, _iter, _btree_id, _start,		\
-+			    _flags, _b)					\
-+	__for_each_btree_node(_trans, _iter, _btree_id, _start,		\
-+			      0, 0, _flags, _b)
-+
-+static inline struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter,
-+						     unsigned flags)
-+{
-+	if ((flags & BTREE_ITER_TYPE) == BTREE_ITER_CACHED)
-+		return bch2_btree_iter_peek_cached(iter);
-+	else
-+		return flags & BTREE_ITER_SLOTS
-+			? bch2_btree_iter_peek_slot(iter)
-+			: bch2_btree_iter_peek(iter);
-+}
-+
-+static inline struct bkey_s_c __bch2_btree_iter_next(struct btree_iter *iter,
-+						     unsigned flags)
-+{
-+	return flags & BTREE_ITER_SLOTS
-+		? bch2_btree_iter_next_slot(iter)
-+		: bch2_btree_iter_next(iter);
-+}
-+
-+static inline int bkey_err(struct bkey_s_c k)
-+{
-+	return PTR_ERR_OR_ZERO(k.k);
-+}
-+
-+#define for_each_btree_key(_trans, _iter, _btree_id,			\
-+			   _start, _flags, _k, _ret)			\
-+	for ((_ret) = PTR_ERR_OR_ZERO((_iter) =				\
-+			bch2_trans_get_iter((_trans), (_btree_id),	\
-+					    (_start), (_flags))) ?:	\
-+		      PTR_ERR_OR_ZERO(((_k) =				\
-+			__bch2_btree_iter_peek(_iter, _flags)).k);	\
-+	     !_ret && (_k).k;						\
-+	     (_ret) = PTR_ERR_OR_ZERO(((_k) =				\
-+			__bch2_btree_iter_next(_iter, _flags)).k))
-+
-+#define for_each_btree_key_continue(_iter, _flags, _k, _ret)		\
-+	for ((_k) = __bch2_btree_iter_peek(_iter, _flags);		\
-+	     !((_ret) = bkey_err(_k)) && (_k).k;			\
-+	     (_k) = __bch2_btree_iter_next(_iter, _flags))
-+
-+/* new multiple iterator interface: */
-+
-+int bch2_trans_iter_put(struct btree_trans *, struct btree_iter *);
-+int bch2_trans_iter_free(struct btree_trans *, struct btree_iter *);
-+
-+void bch2_trans_unlink_iters(struct btree_trans *);
-+
-+struct btree_iter *__bch2_trans_get_iter(struct btree_trans *, enum btree_id,
-+					 struct bpos, unsigned);
-+
-+static inline struct btree_iter *
-+bch2_trans_get_iter(struct btree_trans *trans, enum btree_id btree_id,
-+		    struct bpos pos, unsigned flags)
-+{
-+	struct btree_iter *iter =
-+		__bch2_trans_get_iter(trans, btree_id, pos, flags);
-+
-+	if (!IS_ERR(iter))
-+		iter->ip_allocated = _THIS_IP_;
-+	return iter;
-+}
-+
-+struct btree_iter *__bch2_trans_copy_iter(struct btree_trans *,
-+					struct btree_iter *);
-+static inline struct btree_iter *
-+bch2_trans_copy_iter(struct btree_trans *trans, struct btree_iter *src)
-+{
-+	struct btree_iter *iter =
-+		__bch2_trans_copy_iter(trans, src);
-+
-+	if (!IS_ERR(iter))
-+		iter->ip_allocated = _THIS_IP_;
-+	return iter;
-+
-+}
-+
-+struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *,
-+				enum btree_id, struct bpos,
-+				unsigned, unsigned, unsigned);
-+
-+#define TRANS_RESET_NOTRAVERSE		(1 << 0)
-+
-+void bch2_trans_reset(struct btree_trans *, unsigned);
-+
-+static inline void bch2_trans_begin(struct btree_trans *trans)
-+{
-+	return bch2_trans_reset(trans, 0);
-+}
-+
-+void *bch2_trans_kmalloc(struct btree_trans *, size_t);
-+void bch2_trans_init(struct btree_trans *, struct bch_fs *, unsigned, size_t);
-+int bch2_trans_exit(struct btree_trans *);
-+
-+void bch2_btree_trans_to_text(struct printbuf *, struct bch_fs *);
-+
-+void bch2_fs_btree_iter_exit(struct bch_fs *);
-+int bch2_fs_btree_iter_init(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_BTREE_ITER_H */
-diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c
-new file mode 100644
-index 000000000000..61662750dfc0
---- /dev/null
-+++ b/fs/bcachefs/btree_key_cache.c
-@@ -0,0 +1,519 @@
-+
-+#include "bcachefs.h"
-+#include "btree_cache.h"
-+#include "btree_iter.h"
-+#include "btree_key_cache.h"
-+#include "btree_locking.h"
-+#include "btree_update.h"
-+#include "error.h"
-+#include "journal.h"
-+#include "journal_reclaim.h"
-+
-+#include <trace/events/bcachefs.h>
-+
-+static int bch2_btree_key_cache_cmp_fn(struct rhashtable_compare_arg *arg,
-+				       const void *obj)
-+{
-+	const struct bkey_cached *ck = obj;
-+	const struct bkey_cached_key *key = arg->key;
-+
-+	return cmp_int(ck->key.btree_id, key->btree_id) ?:
-+		bkey_cmp(ck->key.pos, key->pos);
-+}
-+
-+static const struct rhashtable_params bch2_btree_key_cache_params = {
-+	.head_offset	= offsetof(struct bkey_cached, hash),
-+	.key_offset	= offsetof(struct bkey_cached, key),
-+	.key_len	= sizeof(struct bkey_cached_key),
-+	.obj_cmpfn	= bch2_btree_key_cache_cmp_fn,
-+};
-+
-+__flatten
-+static inline struct bkey_cached *
-+btree_key_cache_find(struct bch_fs *c, enum btree_id btree_id, struct bpos pos)
-+{
-+	struct bkey_cached_key key = {
-+		.btree_id	= btree_id,
-+		.pos		= pos,
-+	};
-+
-+	return rhashtable_lookup_fast(&c->btree_key_cache.table, &key,
-+				      bch2_btree_key_cache_params);
-+}
-+
-+static bool bkey_cached_lock_for_evict(struct bkey_cached *ck)
-+{
-+	if (!six_trylock_intent(&ck->c.lock))
-+		return false;
-+
-+	if (!six_trylock_write(&ck->c.lock)) {
-+		six_unlock_intent(&ck->c.lock);
-+		return false;
-+	}
-+
-+	if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
-+		six_unlock_write(&ck->c.lock);
-+		six_unlock_intent(&ck->c.lock);
-+		return false;
-+	}
-+
-+	return true;
-+}
-+
-+static void bkey_cached_evict(struct btree_key_cache *c,
-+			      struct bkey_cached *ck)
-+{
-+	BUG_ON(rhashtable_remove_fast(&c->table, &ck->hash,
-+				      bch2_btree_key_cache_params));
-+	memset(&ck->key, ~0, sizeof(ck->key));
-+}
-+
-+static void bkey_cached_free(struct btree_key_cache *c,
-+			     struct bkey_cached *ck)
-+{
-+	list_move(&ck->list, &c->freed);
-+
-+	kfree(ck->k);
-+	ck->k		= NULL;
-+	ck->u64s	= 0;
-+
-+	six_unlock_write(&ck->c.lock);
-+	six_unlock_intent(&ck->c.lock);
-+}
-+
-+static struct bkey_cached *
-+bkey_cached_alloc(struct btree_key_cache *c)
-+{
-+	struct bkey_cached *ck;
-+
-+	list_for_each_entry(ck, &c->freed, list)
-+		if (bkey_cached_lock_for_evict(ck))
-+			return ck;
-+
-+	list_for_each_entry(ck, &c->clean, list)
-+		if (bkey_cached_lock_for_evict(ck)) {
-+			bkey_cached_evict(c, ck);
-+			return ck;
-+		}
-+
-+	ck = kzalloc(sizeof(*ck), GFP_NOFS);
-+	if (!ck)
-+		return NULL;
-+
-+	INIT_LIST_HEAD(&ck->list);
-+	six_lock_init(&ck->c.lock);
-+	BUG_ON(!six_trylock_intent(&ck->c.lock));
-+	BUG_ON(!six_trylock_write(&ck->c.lock));
-+
-+	return ck;
-+}
-+
-+static struct bkey_cached *
-+btree_key_cache_create(struct btree_key_cache *c,
-+		       enum btree_id btree_id,
-+		       struct bpos pos)
-+{
-+	struct bkey_cached *ck;
-+
-+	ck = bkey_cached_alloc(c);
-+	if (!ck)
-+		return ERR_PTR(-ENOMEM);
-+
-+	ck->c.level		= 0;
-+	ck->c.btree_id		= btree_id;
-+	ck->key.btree_id	= btree_id;
-+	ck->key.pos		= pos;
-+	ck->valid		= false;
-+
-+	BUG_ON(ck->flags);
-+
-+	if (rhashtable_lookup_insert_fast(&c->table,
-+					  &ck->hash,
-+					  bch2_btree_key_cache_params)) {
-+		/* We raced with another fill: */
-+		bkey_cached_free(c, ck);
-+		return NULL;
-+	}
-+
-+	list_move(&ck->list, &c->clean);
-+	six_unlock_write(&ck->c.lock);
-+
-+	return ck;
-+}
-+
-+static int btree_key_cache_fill(struct btree_trans *trans,
-+				struct btree_iter *ck_iter,
-+				struct bkey_cached *ck)
-+{
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	unsigned new_u64s = 0;
-+	struct bkey_i *new_k = NULL;
-+	int ret;
-+
-+	iter = bch2_trans_get_iter(trans, ck->key.btree_id,
-+				   ck->key.pos, BTREE_ITER_SLOTS);
-+	if (IS_ERR(iter))
-+		return PTR_ERR(iter);
-+
-+	k = bch2_btree_iter_peek_slot(iter);
-+	ret = bkey_err(k);
-+	if (ret) {
-+		bch2_trans_iter_put(trans, iter);
-+		return ret;
-+	}
-+
-+	if (!bch2_btree_node_relock(ck_iter, 0)) {
-+		bch2_trans_iter_put(trans, iter);
-+		trace_transaction_restart_ip(trans->ip, _THIS_IP_);
-+		return -EINTR;
-+	}
-+
-+	if (k.k->u64s > ck->u64s) {
-+		new_u64s = roundup_pow_of_two(k.k->u64s);
-+		new_k = kmalloc(new_u64s * sizeof(u64), GFP_NOFS);
-+		if (!new_k) {
-+			bch2_trans_iter_put(trans, iter);
-+			return -ENOMEM;
-+		}
-+	}
-+
-+	bch2_btree_node_lock_write(ck_iter->l[0].b, ck_iter);
-+	if (new_k) {
-+		kfree(ck->k);
-+		ck->u64s = new_u64s;
-+		ck->k = new_k;
-+	}
-+
-+	bkey_reassemble(ck->k, k);
-+	ck->valid = true;
-+	bch2_btree_node_unlock_write(ck_iter->l[0].b, ck_iter);
-+
-+	/* We're not likely to need this iterator again: */
-+	bch2_trans_iter_free(trans, iter);
-+
-+	return 0;
-+}
-+
-+static int bkey_cached_check_fn(struct six_lock *lock, void *p)
-+{
-+	struct bkey_cached *ck = container_of(lock, struct bkey_cached, c.lock);
-+	const struct btree_iter *iter = p;
-+
-+	return ck->key.btree_id == iter->btree_id &&
-+		!bkey_cmp(ck->key.pos, iter->pos) ? 0 : -1;
-+}
-+
-+int bch2_btree_iter_traverse_cached(struct btree_iter *iter)
-+{
-+	struct btree_trans *trans = iter->trans;
-+	struct bch_fs *c = trans->c;
-+	struct bkey_cached *ck;
-+	int ret = 0;
-+
-+	BUG_ON(iter->level);
-+
-+	if (btree_node_locked(iter, 0)) {
-+		ck = (void *) iter->l[0].b;
-+		goto fill;
-+	}
-+retry:
-+	ck = btree_key_cache_find(c, iter->btree_id, iter->pos);
-+	if (!ck) {
-+		if (iter->flags & BTREE_ITER_CACHED_NOCREATE) {
-+			iter->l[0].b = NULL;
-+			return 0;
-+		}
-+
-+		mutex_lock(&c->btree_key_cache.lock);
-+		ck = btree_key_cache_create(&c->btree_key_cache,
-+					    iter->btree_id, iter->pos);
-+		mutex_unlock(&c->btree_key_cache.lock);
-+
-+		ret = PTR_ERR_OR_ZERO(ck);
-+		if (ret)
-+			goto err;
-+		if (!ck)
-+			goto retry;
-+
-+		mark_btree_node_locked(iter, 0, SIX_LOCK_intent);
-+		iter->locks_want = 1;
-+	} else {
-+		enum six_lock_type lock_want = __btree_lock_want(iter, 0);
-+
-+		if (!btree_node_lock((void *) ck, iter->pos, 0, iter, lock_want,
-+				     bkey_cached_check_fn, iter)) {
-+			if (ck->key.btree_id != iter->btree_id ||
-+			    bkey_cmp(ck->key.pos, iter->pos)) {
-+				goto retry;
-+			}
-+
-+			trace_transaction_restart_ip(trans->ip, _THIS_IP_);
-+			ret = -EINTR;
-+			goto err;
-+		}
-+
-+		if (ck->key.btree_id != iter->btree_id ||
-+		    bkey_cmp(ck->key.pos, iter->pos)) {
-+			six_unlock_type(&ck->c.lock, lock_want);
-+			goto retry;
-+		}
-+
-+		mark_btree_node_locked(iter, 0, lock_want);
-+	}
-+
-+	iter->l[0].lock_seq	= ck->c.lock.state.seq;
-+	iter->l[0].b		= (void *) ck;
-+fill:
-+	if (!ck->valid && !(iter->flags & BTREE_ITER_CACHED_NOFILL)) {
-+		if (!btree_node_intent_locked(iter, 0))
-+			bch2_btree_iter_upgrade(iter, 1);
-+		if (!btree_node_intent_locked(iter, 0)) {
-+			trace_transaction_restart_ip(trans->ip, _THIS_IP_);
-+			ret = -EINTR;
-+			goto err;
-+		}
-+
-+		ret = btree_key_cache_fill(trans, iter, ck);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	iter->uptodate = BTREE_ITER_NEED_PEEK;
-+	bch2_btree_iter_downgrade(iter);
-+	return ret;
-+err:
-+	if (ret != -EINTR) {
-+		btree_node_unlock(iter, 0);
-+		iter->flags |= BTREE_ITER_ERROR;
-+		iter->l[0].b = BTREE_ITER_NO_NODE_ERROR;
-+	}
-+	return ret;
-+}
-+
-+static int btree_key_cache_flush_pos(struct btree_trans *trans,
-+				     struct bkey_cached_key key,
-+				     u64 journal_seq,
-+				     bool evict)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct journal *j = &c->journal;
-+	struct btree_iter *c_iter = NULL, *b_iter = NULL;
-+	struct bkey_cached *ck;
-+	int ret;
-+
-+	b_iter = bch2_trans_get_iter(trans, key.btree_id, key.pos,
-+				     BTREE_ITER_SLOTS|
-+				     BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(b_iter);
-+	if (ret)
-+		goto out;
-+
-+	c_iter = bch2_trans_get_iter(trans, key.btree_id, key.pos,
-+				     BTREE_ITER_CACHED|
-+				     BTREE_ITER_CACHED_NOFILL|
-+				     BTREE_ITER_CACHED_NOCREATE|
-+				     BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(c_iter);
-+	if (ret)
-+		goto out;
-+retry:
-+	ret = bch2_btree_iter_traverse(c_iter);
-+	if (ret)
-+		goto err;
-+
-+	ck = (void *) c_iter->l[0].b;
-+	if (!ck ||
-+	    (journal_seq && ck->journal.seq != journal_seq))
-+		goto out;
-+
-+	if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
-+		if (!evict)
-+			goto out;
-+		goto evict;
-+	}
-+
-+	ret   = bch2_btree_iter_traverse(b_iter) ?:
-+		bch2_trans_update(trans, b_iter, ck->k, BTREE_TRIGGER_NORUN) ?:
-+		bch2_trans_commit(trans, NULL, NULL,
-+				  BTREE_INSERT_NOUNLOCK|
-+				  BTREE_INSERT_NOCHECK_RW|
-+				  BTREE_INSERT_NOFAIL|
-+				  BTREE_INSERT_USE_RESERVE|
-+				  BTREE_INSERT_USE_ALLOC_RESERVE|
-+				  BTREE_INSERT_JOURNAL_RESERVED|
-+				  BTREE_INSERT_JOURNAL_RECLAIM);
-+err:
-+	if (ret == -EINTR)
-+		goto retry;
-+
-+	BUG_ON(ret && !bch2_journal_error(j));
-+
-+	if (ret)
-+		goto out;
-+
-+	bch2_journal_pin_drop(j, &ck->journal);
-+	bch2_journal_preres_put(j, &ck->res);
-+	clear_bit(BKEY_CACHED_DIRTY, &ck->flags);
-+
-+	if (!evict) {
-+		mutex_lock(&c->btree_key_cache.lock);
-+		list_move_tail(&ck->list, &c->btree_key_cache.clean);
-+		mutex_unlock(&c->btree_key_cache.lock);
-+	} else {
-+evict:
-+		BUG_ON(!btree_node_intent_locked(c_iter, 0));
-+
-+		mark_btree_node_unlocked(c_iter, 0);
-+		c_iter->l[0].b = NULL;
-+
-+		six_lock_write(&ck->c.lock, NULL, NULL);
-+
-+		mutex_lock(&c->btree_key_cache.lock);
-+		bkey_cached_evict(&c->btree_key_cache, ck);
-+		bkey_cached_free(&c->btree_key_cache, ck);
-+		mutex_unlock(&c->btree_key_cache.lock);
-+	}
-+out:
-+	bch2_trans_iter_put(trans, b_iter);
-+	bch2_trans_iter_put(trans, c_iter);
-+	return ret;
-+}
-+
-+static void btree_key_cache_journal_flush(struct journal *j,
-+					  struct journal_entry_pin *pin,
-+					  u64 seq)
-+{
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	struct bkey_cached *ck =
-+		container_of(pin, struct bkey_cached, journal);
-+	struct bkey_cached_key key;
-+	struct btree_trans trans;
-+
-+	six_lock_read(&ck->c.lock, NULL, NULL);
-+	key = ck->key;
-+
-+	if (ck->journal.seq != seq ||
-+	    !test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
-+		six_unlock_read(&ck->c.lock);
-+		return;
-+	}
-+	six_unlock_read(&ck->c.lock);
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+	btree_key_cache_flush_pos(&trans, key, seq, false);
-+	bch2_trans_exit(&trans);
-+}
-+
-+/*
-+ * Flush and evict a key from the key cache:
-+ */
-+int bch2_btree_key_cache_flush(struct btree_trans *trans,
-+			       enum btree_id id, struct bpos pos)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct bkey_cached_key key = { id, pos };
-+
-+	/* Fastpath - assume it won't be found: */
-+	if (!btree_key_cache_find(c, id, pos))
-+		return 0;
-+
-+	return btree_key_cache_flush_pos(trans, key, 0, true);
-+}
-+
-+bool bch2_btree_insert_key_cached(struct btree_trans *trans,
-+				  struct btree_iter *iter,
-+				  struct bkey_i *insert)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct bkey_cached *ck = (void *) iter->l[0].b;
-+
-+	BUG_ON(insert->u64s > ck->u64s);
-+
-+	if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) {
-+		int difference;
-+
-+		BUG_ON(jset_u64s(insert->u64s) > trans->journal_preres.u64s);
-+
-+		difference = jset_u64s(insert->u64s) - ck->res.u64s;
-+		if (difference > 0) {
-+			trans->journal_preres.u64s	-= difference;
-+			ck->res.u64s			+= difference;
-+		}
-+	}
-+
-+	bkey_copy(ck->k, insert);
-+	ck->valid = true;
-+
-+	if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
-+		mutex_lock(&c->btree_key_cache.lock);
-+		list_del_init(&ck->list);
-+
-+		set_bit(BKEY_CACHED_DIRTY, &ck->flags);
-+		mutex_unlock(&c->btree_key_cache.lock);
-+	}
-+
-+	bch2_journal_pin_update(&c->journal, trans->journal_res.seq,
-+				&ck->journal, btree_key_cache_journal_flush);
-+	return true;
-+}
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+void bch2_btree_key_cache_verify_clean(struct btree_trans *trans,
-+			       enum btree_id id, struct bpos pos)
-+{
-+	BUG_ON(btree_key_cache_find(trans->c, id, pos));
-+}
-+#endif
-+
-+void bch2_fs_btree_key_cache_exit(struct btree_key_cache *c)
-+{
-+	struct bkey_cached *ck, *n;
-+
-+	mutex_lock(&c->lock);
-+	list_for_each_entry_safe(ck, n, &c->clean, list) {
-+		kfree(ck->k);
-+		kfree(ck);
-+	}
-+	list_for_each_entry_safe(ck, n, &c->freed, list)
-+		kfree(ck);
-+	mutex_unlock(&c->lock);
-+
-+	rhashtable_destroy(&c->table);
-+}
-+
-+void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *c)
-+{
-+	mutex_init(&c->lock);
-+	INIT_LIST_HEAD(&c->freed);
-+	INIT_LIST_HEAD(&c->clean);
-+}
-+
-+int bch2_fs_btree_key_cache_init(struct btree_key_cache *c)
-+{
-+	return rhashtable_init(&c->table, &bch2_btree_key_cache_params);
-+}
-+
-+void bch2_btree_key_cache_to_text(struct printbuf *out, struct btree_key_cache *c)
-+{
-+	struct bucket_table *tbl;
-+	struct bkey_cached *ck;
-+	struct rhash_head *pos;
-+	size_t i;
-+
-+	mutex_lock(&c->lock);
-+	tbl = rht_dereference_rcu(c->table.tbl, &c->table);
-+
-+	for (i = 0; i < tbl->size; i++) {
-+		rht_for_each_entry_rcu(ck, pos, tbl, i, hash) {
-+			pr_buf(out, "%s:",
-+			       bch2_btree_ids[ck->key.btree_id]);
-+			bch2_bpos_to_text(out, ck->key.pos);
-+
-+			if (test_bit(BKEY_CACHED_DIRTY, &ck->flags))
-+				pr_buf(out, " journal seq %llu", ck->journal.seq);
-+			pr_buf(out, "\n");
-+		}
-+	}
-+	mutex_unlock(&c->lock);
-+}
-diff --git a/fs/bcachefs/btree_key_cache.h b/fs/bcachefs/btree_key_cache.h
-new file mode 100644
-index 000000000000..b1756c6c622c
---- /dev/null
-+++ b/fs/bcachefs/btree_key_cache.h
-@@ -0,0 +1,25 @@
-+#ifndef _BCACHEFS_BTREE_KEY_CACHE_H
-+#define _BCACHEFS_BTREE_KEY_CACHE_H
-+
-+int bch2_btree_iter_traverse_cached(struct btree_iter *);
-+
-+bool bch2_btree_insert_key_cached(struct btree_trans *,
-+			struct btree_iter *, struct bkey_i *);
-+int bch2_btree_key_cache_flush(struct btree_trans *,
-+			       enum btree_id, struct bpos);
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+void bch2_btree_key_cache_verify_clean(struct btree_trans *,
-+				enum btree_id, struct bpos);
-+#else
-+static inline void
-+bch2_btree_key_cache_verify_clean(struct btree_trans *trans,
-+				enum btree_id id, struct bpos pos) {}
-+#endif
-+
-+void bch2_fs_btree_key_cache_exit(struct btree_key_cache *);
-+void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *);
-+int bch2_fs_btree_key_cache_init(struct btree_key_cache *);
-+
-+void bch2_btree_key_cache_to_text(struct printbuf *, struct btree_key_cache *);
-+
-+#endif /* _BCACHEFS_BTREE_KEY_CACHE_H */
-diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h
-new file mode 100644
-index 000000000000..81fbf3e18647
---- /dev/null
-+++ b/fs/bcachefs/btree_locking.h
-@@ -0,0 +1,257 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BTREE_LOCKING_H
-+#define _BCACHEFS_BTREE_LOCKING_H
-+
-+/*
-+ * Only for internal btree use:
-+ *
-+ * The btree iterator tracks what locks it wants to take, and what locks it
-+ * currently has - here we have wrappers for locking/unlocking btree nodes and
-+ * updating the iterator state
-+ */
-+
-+#include <linux/six.h>
-+
-+#include "btree_iter.h"
-+
-+/* matches six lock types */
-+enum btree_node_locked_type {
-+	BTREE_NODE_UNLOCKED		= -1,
-+	BTREE_NODE_READ_LOCKED		= SIX_LOCK_read,
-+	BTREE_NODE_INTENT_LOCKED	= SIX_LOCK_intent,
-+};
-+
-+static inline int btree_node_locked_type(struct btree_iter *iter,
-+					 unsigned level)
-+{
-+	/*
-+	 * We're relying on the fact that if nodes_intent_locked is set
-+	 * nodes_locked must be set as well, so that we can compute without
-+	 * branches:
-+	 */
-+	return BTREE_NODE_UNLOCKED +
-+		((iter->nodes_locked >> level) & 1) +
-+		((iter->nodes_intent_locked >> level) & 1);
-+}
-+
-+static inline bool btree_node_intent_locked(struct btree_iter *iter,
-+					    unsigned level)
-+{
-+	return btree_node_locked_type(iter, level) == BTREE_NODE_INTENT_LOCKED;
-+}
-+
-+static inline bool btree_node_read_locked(struct btree_iter *iter,
-+					  unsigned level)
-+{
-+	return btree_node_locked_type(iter, level) == BTREE_NODE_READ_LOCKED;
-+}
-+
-+static inline bool btree_node_locked(struct btree_iter *iter, unsigned level)
-+{
-+	return iter->nodes_locked & (1 << level);
-+}
-+
-+static inline void mark_btree_node_unlocked(struct btree_iter *iter,
-+					    unsigned level)
-+{
-+	iter->nodes_locked &= ~(1 << level);
-+	iter->nodes_intent_locked &= ~(1 << level);
-+}
-+
-+static inline void mark_btree_node_locked(struct btree_iter *iter,
-+					  unsigned level,
-+					  enum six_lock_type type)
-+{
-+	/* relying on this to avoid a branch */
-+	BUILD_BUG_ON(SIX_LOCK_read   != 0);
-+	BUILD_BUG_ON(SIX_LOCK_intent != 1);
-+
-+	iter->nodes_locked |= 1 << level;
-+	iter->nodes_intent_locked |= type << level;
-+}
-+
-+static inline void mark_btree_node_intent_locked(struct btree_iter *iter,
-+						 unsigned level)
-+{
-+	mark_btree_node_locked(iter, level, SIX_LOCK_intent);
-+}
-+
-+static inline enum six_lock_type __btree_lock_want(struct btree_iter *iter, int level)
-+{
-+	return level < iter->locks_want
-+		? SIX_LOCK_intent
-+		: SIX_LOCK_read;
-+}
-+
-+static inline enum btree_node_locked_type
-+btree_lock_want(struct btree_iter *iter, int level)
-+{
-+	if (level < iter->level)
-+		return BTREE_NODE_UNLOCKED;
-+	if (level < iter->locks_want)
-+		return BTREE_NODE_INTENT_LOCKED;
-+	if (level == iter->level)
-+		return BTREE_NODE_READ_LOCKED;
-+	return BTREE_NODE_UNLOCKED;
-+}
-+
-+static inline void __btree_node_unlock(struct btree_iter *iter, unsigned level)
-+{
-+	int lock_type = btree_node_locked_type(iter, level);
-+
-+	EBUG_ON(level >= BTREE_MAX_DEPTH);
-+
-+	if (lock_type != BTREE_NODE_UNLOCKED)
-+		six_unlock_type(&iter->l[level].b->c.lock, lock_type);
-+	mark_btree_node_unlocked(iter, level);
-+}
-+
-+static inline void btree_node_unlock(struct btree_iter *iter, unsigned level)
-+{
-+	EBUG_ON(!level && iter->trans->nounlock);
-+
-+	__btree_node_unlock(iter, level);
-+}
-+
-+static inline void __bch2_btree_iter_unlock(struct btree_iter *iter)
-+{
-+	btree_iter_set_dirty(iter, BTREE_ITER_NEED_RELOCK);
-+
-+	while (iter->nodes_locked)
-+		btree_node_unlock(iter, __ffs(iter->nodes_locked));
-+}
-+
-+static inline enum bch_time_stats lock_to_time_stat(enum six_lock_type type)
-+{
-+	switch (type) {
-+	case SIX_LOCK_read:
-+		return BCH_TIME_btree_lock_contended_read;
-+	case SIX_LOCK_intent:
-+		return BCH_TIME_btree_lock_contended_intent;
-+	case SIX_LOCK_write:
-+		return BCH_TIME_btree_lock_contended_write;
-+	default:
-+		BUG();
-+	}
-+}
-+
-+/*
-+ * wrapper around six locks that just traces lock contended time
-+ */
-+static inline void __btree_node_lock_type(struct bch_fs *c, struct btree *b,
-+					  enum six_lock_type type)
-+{
-+	u64 start_time = local_clock();
-+
-+	six_lock_type(&b->c.lock, type, NULL, NULL);
-+	bch2_time_stats_update(&c->times[lock_to_time_stat(type)], start_time);
-+}
-+
-+static inline void btree_node_lock_type(struct bch_fs *c, struct btree *b,
-+					enum six_lock_type type)
-+{
-+	if (!six_trylock_type(&b->c.lock, type))
-+		__btree_node_lock_type(c, b, type);
-+}
-+
-+/*
-+ * Lock a btree node if we already have it locked on one of our linked
-+ * iterators:
-+ */
-+static inline bool btree_node_lock_increment(struct btree_trans *trans,
-+					     struct btree *b, unsigned level,
-+					     enum btree_node_locked_type want)
-+{
-+	struct btree_iter *iter;
-+
-+	trans_for_each_iter(trans, iter)
-+		if (iter->l[level].b == b &&
-+		    btree_node_locked_type(iter, level) >= want) {
-+			six_lock_increment(&b->c.lock, want);
-+			return true;
-+		}
-+
-+	return false;
-+}
-+
-+bool __bch2_btree_node_lock(struct btree *, struct bpos, unsigned,
-+			    struct btree_iter *, enum six_lock_type,
-+			    six_lock_should_sleep_fn, void *);
-+
-+static inline bool btree_node_lock(struct btree *b,
-+			struct bpos pos, unsigned level,
-+			struct btree_iter *iter,
-+			enum six_lock_type type,
-+			six_lock_should_sleep_fn should_sleep_fn, void *p)
-+{
-+	struct btree_trans *trans = iter->trans;
-+	bool ret;
-+
-+	EBUG_ON(level >= BTREE_MAX_DEPTH);
-+	EBUG_ON(!(trans->iters_linked & (1ULL << iter->idx)));
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	trans->locking		= b;
-+	trans->locking_iter_idx = iter->idx;
-+	trans->locking_pos	= pos;
-+	trans->locking_btree_id	= iter->btree_id;
-+	trans->locking_level	= level;
-+#endif
-+	ret   = likely(six_trylock_type(&b->c.lock, type)) ||
-+		btree_node_lock_increment(trans, b, level, type) ||
-+		__bch2_btree_node_lock(b, pos, level, iter, type,
-+				       should_sleep_fn, p);
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	trans->locking = NULL;
-+#endif
-+	return ret;
-+}
-+
-+bool __bch2_btree_node_relock(struct btree_iter *, unsigned);
-+
-+static inline bool bch2_btree_node_relock(struct btree_iter *iter,
-+					  unsigned level)
-+{
-+	EBUG_ON(btree_node_locked(iter, level) &&
-+		btree_node_locked_type(iter, level) !=
-+		__btree_lock_want(iter, level));
-+
-+	return likely(btree_node_locked(iter, level)) ||
-+		__bch2_btree_node_relock(iter, level);
-+}
-+
-+/*
-+ * Updates the saved lock sequence number, so that bch2_btree_node_relock() will
-+ * succeed:
-+ */
-+static inline void
-+bch2_btree_node_unlock_write_inlined(struct btree *b, struct btree_iter *iter)
-+{
-+	struct btree_iter *linked;
-+
-+	EBUG_ON(iter->l[b->c.level].b != b);
-+	EBUG_ON(iter->l[b->c.level].lock_seq + 1 != b->c.lock.state.seq);
-+
-+	trans_for_each_iter_with_node(iter->trans, b, linked)
-+		linked->l[b->c.level].lock_seq += 2;
-+
-+	six_unlock_write(&b->c.lock);
-+}
-+
-+void bch2_btree_node_unlock_write(struct btree *, struct btree_iter *);
-+
-+void __bch2_btree_node_lock_write(struct btree *, struct btree_iter *);
-+
-+static inline void bch2_btree_node_lock_write(struct btree *b, struct btree_iter *iter)
-+{
-+	EBUG_ON(iter->l[b->c.level].b != b);
-+	EBUG_ON(iter->l[b->c.level].lock_seq != b->c.lock.state.seq);
-+
-+	if (unlikely(!six_trylock_write(&b->c.lock)))
-+		__bch2_btree_node_lock_write(b, iter);
-+}
-+
-+#endif /* _BCACHEFS_BTREE_LOCKING_H */
-+
-+
-diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
-new file mode 100644
-index 000000000000..cc01baeec138
---- /dev/null
-+++ b/fs/bcachefs/btree_types.h
-@@ -0,0 +1,663 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BTREE_TYPES_H
-+#define _BCACHEFS_BTREE_TYPES_H
-+
-+#include <linux/list.h>
-+#include <linux/rhashtable.h>
-+#include <linux/six.h>
-+
-+#include "bkey_methods.h"
-+#include "buckets_types.h"
-+#include "journal_types.h"
-+
-+struct open_bucket;
-+struct btree_update;
-+struct btree_trans;
-+
-+#define MAX_BSETS		3U
-+
-+struct btree_nr_keys {
-+
-+	/*
-+	 * Amount of live metadata (i.e. size of node after a compaction) in
-+	 * units of u64s
-+	 */
-+	u16			live_u64s;
-+	u16			bset_u64s[MAX_BSETS];
-+
-+	/* live keys only: */
-+	u16			packed_keys;
-+	u16			unpacked_keys;
-+};
-+
-+struct bset_tree {
-+	/*
-+	 * We construct a binary tree in an array as if the array
-+	 * started at 1, so that things line up on the same cachelines
-+	 * better: see comments in bset.c at cacheline_to_bkey() for
-+	 * details
-+	 */
-+
-+	/* size of the binary tree and prev array */
-+	u16			size;
-+
-+	/* function of size - precalculated for to_inorder() */
-+	u16			extra;
-+
-+	u16			data_offset;
-+	u16			aux_data_offset;
-+	u16			end_offset;
-+
-+	struct bpos		max_key;
-+};
-+
-+struct btree_write {
-+	struct journal_entry_pin	journal;
-+};
-+
-+struct btree_alloc {
-+	struct open_buckets	ob;
-+	BKEY_PADDED(k);
-+};
-+
-+struct btree_bkey_cached_common {
-+	struct six_lock		lock;
-+	u8			level;
-+	u8			btree_id;
-+};
-+
-+struct btree {
-+	struct btree_bkey_cached_common c;
-+
-+	struct rhash_head	hash;
-+	u64			hash_val;
-+
-+	unsigned long		flags;
-+	u16			written;
-+	u8			nsets;
-+	u8			nr_key_bits;
-+
-+	struct bkey_format	format;
-+
-+	struct btree_node	*data;
-+	void			*aux_data;
-+
-+	/*
-+	 * Sets of sorted keys - the real btree node - plus a binary search tree
-+	 *
-+	 * set[0] is special; set[0]->tree, set[0]->prev and set[0]->data point
-+	 * to the memory we have allocated for this btree node. Additionally,
-+	 * set[0]->data points to the entire btree node as it exists on disk.
-+	 */
-+	struct bset_tree	set[MAX_BSETS];
-+
-+	struct btree_nr_keys	nr;
-+	u16			sib_u64s[2];
-+	u16			whiteout_u64s;
-+	u8			byte_order;
-+	u8			unpack_fn_len;
-+
-+	/*
-+	 * XXX: add a delete sequence number, so when bch2_btree_node_relock()
-+	 * fails because the lock sequence number has changed - i.e. the
-+	 * contents were modified - we can still relock the node if it's still
-+	 * the one we want, without redoing the traversal
-+	 */
-+
-+	/*
-+	 * For asynchronous splits/interior node updates:
-+	 * When we do a split, we allocate new child nodes and update the parent
-+	 * node to point to them: we update the parent in memory immediately,
-+	 * but then we must wait until the children have been written out before
-+	 * the update to the parent can be written - this is a list of the
-+	 * btree_updates that are blocking this node from being
-+	 * written:
-+	 */
-+	struct list_head	write_blocked;
-+
-+	/*
-+	 * Also for asynchronous splits/interior node updates:
-+	 * If a btree node isn't reachable yet, we don't want to kick off
-+	 * another write - because that write also won't yet be reachable and
-+	 * marking it as completed before it's reachable would be incorrect:
-+	 */
-+	unsigned long		will_make_reachable;
-+
-+	struct open_buckets	ob;
-+
-+	/* lru list */
-+	struct list_head	list;
-+
-+	struct btree_write	writes[2];
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	bool			*expensive_debug_checks;
-+#endif
-+
-+	/* Key/pointer for this btree node */
-+	__BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
-+};
-+
-+struct btree_cache {
-+	struct rhashtable	table;
-+	bool			table_init_done;
-+	/*
-+	 * We never free a struct btree, except on shutdown - we just put it on
-+	 * the btree_cache_freed list and reuse it later. This simplifies the
-+	 * code, and it doesn't cost us much memory as the memory usage is
-+	 * dominated by buffers that hold the actual btree node data and those
-+	 * can be freed - and the number of struct btrees allocated is
-+	 * effectively bounded.
-+	 *
-+	 * btree_cache_freeable effectively is a small cache - we use it because
-+	 * high order page allocations can be rather expensive, and it's quite
-+	 * common to delete and allocate btree nodes in quick succession. It
-+	 * should never grow past ~2-3 nodes in practice.
-+	 */
-+	struct mutex		lock;
-+	struct list_head	live;
-+	struct list_head	freeable;
-+	struct list_head	freed;
-+
-+	/* Number of elements in live + freeable lists */
-+	unsigned		used;
-+	unsigned		reserve;
-+	struct shrinker		shrink;
-+
-+	/*
-+	 * If we need to allocate memory for a new btree node and that
-+	 * allocation fails, we can cannibalize another node in the btree cache
-+	 * to satisfy the allocation - lock to guarantee only one thread does
-+	 * this at a time:
-+	 */
-+	struct task_struct	*alloc_lock;
-+	struct closure_waitlist	alloc_wait;
-+};
-+
-+struct btree_node_iter {
-+	struct btree_node_iter_set {
-+		u16	k, end;
-+	} data[MAX_BSETS];
-+};
-+
-+enum btree_iter_type {
-+	BTREE_ITER_KEYS,
-+	BTREE_ITER_NODES,
-+	BTREE_ITER_CACHED,
-+};
-+
-+#define BTREE_ITER_TYPE			((1 << 2) - 1)
-+
-+/*
-+ * Iterate over all possible positions, synthesizing deleted keys for holes:
-+ */
-+#define BTREE_ITER_SLOTS		(1 << 2)
-+/*
-+ * Indicates that intent locks should be taken on leaf nodes, because we expect
-+ * to be doing updates:
-+ */
-+#define BTREE_ITER_INTENT		(1 << 3)
-+/*
-+ * Causes the btree iterator code to prefetch additional btree nodes from disk:
-+ */
-+#define BTREE_ITER_PREFETCH		(1 << 4)
-+/*
-+ * Indicates that this iterator should not be reused until transaction commit,
-+ * either because a pending update references it or because the update depends
-+ * on that particular key being locked (e.g. by the str_hash code, for hash
-+ * table consistency)
-+ */
-+#define BTREE_ITER_KEEP_UNTIL_COMMIT	(1 << 5)
-+/*
-+ * Used in bch2_btree_iter_traverse(), to indicate whether we're searching for
-+ * @pos or the first key strictly greater than @pos
-+ */
-+#define BTREE_ITER_IS_EXTENTS		(1 << 6)
-+#define BTREE_ITER_ERROR		(1 << 7)
-+#define BTREE_ITER_SET_POS_AFTER_COMMIT	(1 << 8)
-+#define BTREE_ITER_CACHED_NOFILL	(1 << 9)
-+#define BTREE_ITER_CACHED_NOCREATE	(1 << 10)
-+
-+#define BTREE_ITER_USER_FLAGS				\
-+	(BTREE_ITER_SLOTS				\
-+	|BTREE_ITER_INTENT				\
-+	|BTREE_ITER_PREFETCH				\
-+	|BTREE_ITER_CACHED_NOFILL			\
-+	|BTREE_ITER_CACHED_NOCREATE)
-+
-+enum btree_iter_uptodate {
-+	BTREE_ITER_UPTODATE		= 0,
-+	BTREE_ITER_NEED_PEEK		= 1,
-+	BTREE_ITER_NEED_RELOCK		= 2,
-+	BTREE_ITER_NEED_TRAVERSE	= 3,
-+};
-+
-+#define BTREE_ITER_NO_NODE_GET_LOCKS	((struct btree *) 1)
-+#define BTREE_ITER_NO_NODE_DROP		((struct btree *) 2)
-+#define BTREE_ITER_NO_NODE_LOCK_ROOT	((struct btree *) 3)
-+#define BTREE_ITER_NO_NODE_UP		((struct btree *) 4)
-+#define BTREE_ITER_NO_NODE_DOWN		((struct btree *) 5)
-+#define BTREE_ITER_NO_NODE_INIT		((struct btree *) 6)
-+#define BTREE_ITER_NO_NODE_ERROR	((struct btree *) 7)
-+
-+/*
-+ * @pos			- iterator's current position
-+ * @level		- current btree depth
-+ * @locks_want		- btree level below which we start taking intent locks
-+ * @nodes_locked	- bitmask indicating which nodes in @nodes are locked
-+ * @nodes_intent_locked	- bitmask indicating which locks are intent locks
-+ */
-+struct btree_iter {
-+	struct btree_trans	*trans;
-+	struct bpos		pos;
-+	struct bpos		pos_after_commit;
-+
-+	u16			flags;
-+	u8			idx;
-+
-+	enum btree_id		btree_id:4;
-+	enum btree_iter_uptodate uptodate:4;
-+	unsigned		level:4,
-+				min_depth:4,
-+				locks_want:4,
-+				nodes_locked:4,
-+				nodes_intent_locked:4;
-+
-+	struct btree_iter_level {
-+		struct btree	*b;
-+		struct btree_node_iter iter;
-+		u32		lock_seq;
-+	}			l[BTREE_MAX_DEPTH];
-+
-+	/*
-+	 * Current unpacked key - so that bch2_btree_iter_next()/
-+	 * bch2_btree_iter_next_slot() can correctly advance pos.
-+	 */
-+	struct bkey		k;
-+	unsigned long		ip_allocated;
-+};
-+
-+static inline enum btree_iter_type
-+btree_iter_type(const struct btree_iter *iter)
-+{
-+	return iter->flags & BTREE_ITER_TYPE;
-+}
-+
-+static inline struct btree_iter_level *iter_l(struct btree_iter *iter)
-+{
-+	return iter->l + iter->level;
-+}
-+
-+struct btree_key_cache {
-+	struct mutex		lock;
-+	struct rhashtable	table;
-+	struct list_head	freed;
-+	struct list_head	clean;
-+};
-+
-+struct bkey_cached_key {
-+	u32			btree_id;
-+	struct bpos		pos;
-+} __attribute__((packed, aligned(4)));
-+
-+#define BKEY_CACHED_DIRTY		0
-+
-+struct bkey_cached {
-+	struct btree_bkey_cached_common c;
-+
-+	unsigned long		flags;
-+	u8			u64s;
-+	bool			valid;
-+	struct bkey_cached_key	key;
-+
-+	struct rhash_head	hash;
-+	struct list_head	list;
-+
-+	struct journal_preres	res;
-+	struct journal_entry_pin journal;
-+
-+	struct bkey_i		*k;
-+};
-+
-+struct btree_insert_entry {
-+	unsigned		trigger_flags;
-+	unsigned		trans_triggers_run:1;
-+	struct bkey_i		*k;
-+	struct btree_iter	*iter;
-+};
-+
-+#ifndef CONFIG_LOCKDEP
-+#define BTREE_ITER_MAX		64
-+#else
-+#define BTREE_ITER_MAX		32
-+#endif
-+
-+struct btree_trans {
-+	struct bch_fs		*c;
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	struct list_head	list;
-+	struct btree		*locking;
-+	unsigned		locking_iter_idx;
-+	struct bpos		locking_pos;
-+	u8			locking_btree_id;
-+	u8			locking_level;
-+	pid_t			pid;
-+#endif
-+	unsigned long		ip;
-+
-+	u64			iters_linked;
-+	u64			iters_live;
-+	u64			iters_touched;
-+
-+	u8			nr_iters;
-+	u8			nr_updates;
-+	u8			nr_updates2;
-+	u8			size;
-+	unsigned		used_mempool:1;
-+	unsigned		error:1;
-+	unsigned		nounlock:1;
-+	unsigned		need_reset:1;
-+	unsigned		in_traverse_all:1;
-+
-+	unsigned		mem_top;
-+	unsigned		mem_bytes;
-+	void			*mem;
-+
-+	struct btree_iter	*iters;
-+	struct btree_insert_entry *updates;
-+	struct btree_insert_entry *updates2;
-+
-+	/* update path: */
-+	struct jset_entry	*extra_journal_entries;
-+	unsigned		extra_journal_entry_u64s;
-+	struct journal_entry_pin *journal_pin;
-+
-+	struct journal_res	journal_res;
-+	struct journal_preres	journal_preres;
-+	u64			*journal_seq;
-+	struct disk_reservation *disk_res;
-+	unsigned		flags;
-+	unsigned		journal_u64s;
-+	unsigned		journal_preres_u64s;
-+	struct replicas_delta_list *fs_usage_deltas;
-+
-+	struct btree_iter	iters_onstack[2];
-+	struct btree_insert_entry updates_onstack[2];
-+	struct btree_insert_entry updates2_onstack[2];
-+};
-+
-+#define BTREE_FLAG(flag)						\
-+static inline bool btree_node_ ## flag(struct btree *b)			\
-+{	return test_bit(BTREE_NODE_ ## flag, &b->flags); }		\
-+									\
-+static inline void set_btree_node_ ## flag(struct btree *b)		\
-+{	set_bit(BTREE_NODE_ ## flag, &b->flags); }			\
-+									\
-+static inline void clear_btree_node_ ## flag(struct btree *b)		\
-+{	clear_bit(BTREE_NODE_ ## flag, &b->flags); }
-+
-+enum btree_flags {
-+	BTREE_NODE_read_in_flight,
-+	BTREE_NODE_read_error,
-+	BTREE_NODE_dirty,
-+	BTREE_NODE_need_write,
-+	BTREE_NODE_noevict,
-+	BTREE_NODE_write_idx,
-+	BTREE_NODE_accessed,
-+	BTREE_NODE_write_in_flight,
-+	BTREE_NODE_just_written,
-+	BTREE_NODE_dying,
-+	BTREE_NODE_fake,
-+	BTREE_NODE_old_extent_overwrite,
-+	BTREE_NODE_need_rewrite,
-+};
-+
-+BTREE_FLAG(read_in_flight);
-+BTREE_FLAG(read_error);
-+BTREE_FLAG(dirty);
-+BTREE_FLAG(need_write);
-+BTREE_FLAG(noevict);
-+BTREE_FLAG(write_idx);
-+BTREE_FLAG(accessed);
-+BTREE_FLAG(write_in_flight);
-+BTREE_FLAG(just_written);
-+BTREE_FLAG(dying);
-+BTREE_FLAG(fake);
-+BTREE_FLAG(old_extent_overwrite);
-+BTREE_FLAG(need_rewrite);
-+
-+static inline struct btree_write *btree_current_write(struct btree *b)
-+{
-+	return b->writes + btree_node_write_idx(b);
-+}
-+
-+static inline struct btree_write *btree_prev_write(struct btree *b)
-+{
-+	return b->writes + (btree_node_write_idx(b) ^ 1);
-+}
-+
-+static inline struct bset_tree *bset_tree_last(struct btree *b)
-+{
-+	EBUG_ON(!b->nsets);
-+	return b->set + b->nsets - 1;
-+}
-+
-+static inline void *
-+__btree_node_offset_to_ptr(const struct btree *b, u16 offset)
-+{
-+	return (void *) ((u64 *) b->data + 1 + offset);
-+}
-+
-+static inline u16
-+__btree_node_ptr_to_offset(const struct btree *b, const void *p)
-+{
-+	u16 ret = (u64 *) p - 1 - (u64 *) b->data;
-+
-+	EBUG_ON(__btree_node_offset_to_ptr(b, ret) != p);
-+	return ret;
-+}
-+
-+static inline struct bset *bset(const struct btree *b,
-+				const struct bset_tree *t)
-+{
-+	return __btree_node_offset_to_ptr(b, t->data_offset);
-+}
-+
-+static inline void set_btree_bset_end(struct btree *b, struct bset_tree *t)
-+{
-+	t->end_offset =
-+		__btree_node_ptr_to_offset(b, vstruct_last(bset(b, t)));
-+}
-+
-+static inline void set_btree_bset(struct btree *b, struct bset_tree *t,
-+				  const struct bset *i)
-+{
-+	t->data_offset = __btree_node_ptr_to_offset(b, i);
-+	set_btree_bset_end(b, t);
-+}
-+
-+static inline struct bset *btree_bset_first(struct btree *b)
-+{
-+	return bset(b, b->set);
-+}
-+
-+static inline struct bset *btree_bset_last(struct btree *b)
-+{
-+	return bset(b, bset_tree_last(b));
-+}
-+
-+static inline u16
-+__btree_node_key_to_offset(const struct btree *b, const struct bkey_packed *k)
-+{
-+	return __btree_node_ptr_to_offset(b, k);
-+}
-+
-+static inline struct bkey_packed *
-+__btree_node_offset_to_key(const struct btree *b, u16 k)
-+{
-+	return __btree_node_offset_to_ptr(b, k);
-+}
-+
-+static inline unsigned btree_bkey_first_offset(const struct bset_tree *t)
-+{
-+	return t->data_offset + offsetof(struct bset, _data) / sizeof(u64);
-+}
-+
-+#define btree_bkey_first(_b, _t)					\
-+({									\
-+	EBUG_ON(bset(_b, _t)->start !=					\
-+		__btree_node_offset_to_key(_b, btree_bkey_first_offset(_t)));\
-+									\
-+	bset(_b, _t)->start;						\
-+})
-+
-+#define btree_bkey_last(_b, _t)						\
-+({									\
-+	EBUG_ON(__btree_node_offset_to_key(_b, (_t)->end_offset) !=	\
-+		vstruct_last(bset(_b, _t)));				\
-+									\
-+	__btree_node_offset_to_key(_b, (_t)->end_offset);		\
-+})
-+
-+static inline unsigned bset_u64s(struct bset_tree *t)
-+{
-+	return t->end_offset - t->data_offset -
-+		sizeof(struct bset) / sizeof(u64);
-+}
-+
-+static inline unsigned bset_dead_u64s(struct btree *b, struct bset_tree *t)
-+{
-+	return bset_u64s(t) - b->nr.bset_u64s[t - b->set];
-+}
-+
-+static inline unsigned bset_byte_offset(struct btree *b, void *i)
-+{
-+	return i - (void *) b->data;
-+}
-+
-+enum btree_node_type {
-+#define x(kwd, val, name) BKEY_TYPE_##kwd = val,
-+	BCH_BTREE_IDS()
-+#undef x
-+	BKEY_TYPE_BTREE,
-+};
-+
-+/* Type of a key in btree @id at level @level: */
-+static inline enum btree_node_type __btree_node_type(unsigned level, enum btree_id id)
-+{
-+	return level ? BKEY_TYPE_BTREE : (enum btree_node_type) id;
-+}
-+
-+/* Type of keys @b contains: */
-+static inline enum btree_node_type btree_node_type(struct btree *b)
-+{
-+	return __btree_node_type(b->c.level, b->c.btree_id);
-+}
-+
-+static inline bool btree_node_type_is_extents(enum btree_node_type type)
-+{
-+	switch (type) {
-+	case BKEY_TYPE_EXTENTS:
-+	case BKEY_TYPE_REFLINK:
-+		return true;
-+	default:
-+		return false;
-+	}
-+}
-+
-+static inline bool btree_node_is_extents(struct btree *b)
-+{
-+	return btree_node_type_is_extents(btree_node_type(b));
-+}
-+
-+static inline enum btree_node_type btree_iter_key_type(struct btree_iter *iter)
-+{
-+	return __btree_node_type(iter->level, iter->btree_id);
-+}
-+
-+static inline bool btree_iter_is_extents(struct btree_iter *iter)
-+{
-+	return btree_node_type_is_extents(btree_iter_key_type(iter));
-+}
-+
-+#define BTREE_NODE_TYPE_HAS_TRIGGERS			\
-+	((1U << BKEY_TYPE_EXTENTS)|			\
-+	 (1U << BKEY_TYPE_ALLOC)|			\
-+	 (1U << BKEY_TYPE_INODES)|			\
-+	 (1U << BKEY_TYPE_REFLINK)|			\
-+	 (1U << BKEY_TYPE_EC)|				\
-+	 (1U << BKEY_TYPE_BTREE))
-+
-+#define BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS		\
-+	((1U << BKEY_TYPE_EXTENTS)|			\
-+	 (1U << BKEY_TYPE_INODES)|			\
-+	 (1U << BKEY_TYPE_EC)|				\
-+	 (1U << BKEY_TYPE_REFLINK))
-+
-+enum btree_trigger_flags {
-+	__BTREE_TRIGGER_NORUN,		/* Don't run triggers at all */
-+
-+	__BTREE_TRIGGER_INSERT,
-+	__BTREE_TRIGGER_OVERWRITE,
-+	__BTREE_TRIGGER_OVERWRITE_SPLIT,
-+
-+	__BTREE_TRIGGER_GC,
-+	__BTREE_TRIGGER_BUCKET_INVALIDATE,
-+	__BTREE_TRIGGER_NOATOMIC,
-+};
-+
-+#define BTREE_TRIGGER_NORUN		(1U << __BTREE_TRIGGER_NORUN)
-+
-+#define BTREE_TRIGGER_INSERT		(1U << __BTREE_TRIGGER_INSERT)
-+#define BTREE_TRIGGER_OVERWRITE		(1U << __BTREE_TRIGGER_OVERWRITE)
-+#define BTREE_TRIGGER_OVERWRITE_SPLIT	(1U << __BTREE_TRIGGER_OVERWRITE_SPLIT)
-+
-+#define BTREE_TRIGGER_GC		(1U << __BTREE_TRIGGER_GC)
-+#define BTREE_TRIGGER_BUCKET_INVALIDATE	(1U << __BTREE_TRIGGER_BUCKET_INVALIDATE)
-+#define BTREE_TRIGGER_NOATOMIC		(1U << __BTREE_TRIGGER_NOATOMIC)
-+
-+static inline bool btree_node_type_needs_gc(enum btree_node_type type)
-+{
-+	return BTREE_NODE_TYPE_HAS_TRIGGERS & (1U << type);
-+}
-+
-+struct btree_root {
-+	struct btree		*b;
-+
-+	/* On disk root - see async splits: */
-+	__BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
-+	u8			level;
-+	u8			alive;
-+	s8			error;
-+};
-+
-+/*
-+ * Optional hook that will be called just prior to a btree node update, when
-+ * we're holding the write lock and we know what key is about to be overwritten:
-+ */
-+
-+enum btree_insert_ret {
-+	BTREE_INSERT_OK,
-+	/* leaf node needs to be split */
-+	BTREE_INSERT_BTREE_NODE_FULL,
-+	BTREE_INSERT_ENOSPC,
-+	BTREE_INSERT_NEED_MARK_REPLICAS,
-+	BTREE_INSERT_NEED_JOURNAL_RES,
-+};
-+
-+enum btree_gc_coalesce_fail_reason {
-+	BTREE_GC_COALESCE_FAIL_RESERVE_GET,
-+	BTREE_GC_COALESCE_FAIL_KEYLIST_REALLOC,
-+	BTREE_GC_COALESCE_FAIL_FORMAT_FITS,
-+};
-+
-+enum btree_node_sibling {
-+	btree_prev_sib,
-+	btree_next_sib,
-+};
-+
-+typedef struct btree_nr_keys (*sort_fix_overlapping_fn)(struct bset *,
-+							struct btree *,
-+							struct btree_node_iter *);
-+
-+#endif /* _BCACHEFS_BTREE_TYPES_H */
-diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h
-new file mode 100644
-index 000000000000..e0b1bde37484
---- /dev/null
-+++ b/fs/bcachefs/btree_update.h
-@@ -0,0 +1,144 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BTREE_UPDATE_H
-+#define _BCACHEFS_BTREE_UPDATE_H
-+
-+#include "btree_iter.h"
-+#include "journal.h"
-+
-+struct bch_fs;
-+struct btree;
-+
-+void bch2_btree_node_lock_for_insert(struct bch_fs *, struct btree *,
-+				     struct btree_iter *);
-+bool bch2_btree_bset_insert_key(struct btree_iter *, struct btree *,
-+				struct btree_node_iter *, struct bkey_i *);
-+void bch2_btree_add_journal_pin(struct bch_fs *, struct btree *, u64);
-+
-+enum btree_insert_flags {
-+	__BTREE_INSERT_NOUNLOCK,
-+	__BTREE_INSERT_NOFAIL,
-+	__BTREE_INSERT_NOCHECK_RW,
-+	__BTREE_INSERT_LAZY_RW,
-+	__BTREE_INSERT_USE_RESERVE,
-+	__BTREE_INSERT_USE_ALLOC_RESERVE,
-+	__BTREE_INSERT_JOURNAL_REPLAY,
-+	__BTREE_INSERT_JOURNAL_RESERVED,
-+	__BTREE_INSERT_JOURNAL_RECLAIM,
-+	__BTREE_INSERT_NOWAIT,
-+	__BTREE_INSERT_GC_LOCK_HELD,
-+	__BCH_HASH_SET_MUST_CREATE,
-+	__BCH_HASH_SET_MUST_REPLACE,
-+};
-+
-+/*
-+ * Don't drop locks _after_ successfully updating btree:
-+ */
-+#define BTREE_INSERT_NOUNLOCK		(1 << __BTREE_INSERT_NOUNLOCK)
-+
-+/* Don't check for -ENOSPC: */
-+#define BTREE_INSERT_NOFAIL		(1 << __BTREE_INSERT_NOFAIL)
-+
-+#define BTREE_INSERT_NOCHECK_RW		(1 << __BTREE_INSERT_NOCHECK_RW)
-+#define BTREE_INSERT_LAZY_RW		(1 << __BTREE_INSERT_LAZY_RW)
-+
-+/* for copygc, or when merging btree nodes */
-+#define BTREE_INSERT_USE_RESERVE	(1 << __BTREE_INSERT_USE_RESERVE)
-+#define BTREE_INSERT_USE_ALLOC_RESERVE	(1 << __BTREE_INSERT_USE_ALLOC_RESERVE)
-+
-+/* Insert is for journal replay - don't get journal reservations: */
-+#define BTREE_INSERT_JOURNAL_REPLAY	(1 << __BTREE_INSERT_JOURNAL_REPLAY)
-+
-+/* Indicates that we have pre-reserved space in the journal: */
-+#define BTREE_INSERT_JOURNAL_RESERVED	(1 << __BTREE_INSERT_JOURNAL_RESERVED)
-+
-+/* Insert is being called from journal reclaim path: */
-+#define BTREE_INSERT_JOURNAL_RECLAIM (1 << __BTREE_INSERT_JOURNAL_RECLAIM)
-+
-+/* Don't block on allocation failure (for new btree nodes: */
-+#define BTREE_INSERT_NOWAIT		(1 << __BTREE_INSERT_NOWAIT)
-+#define BTREE_INSERT_GC_LOCK_HELD	(1 << __BTREE_INSERT_GC_LOCK_HELD)
-+
-+#define BCH_HASH_SET_MUST_CREATE	(1 << __BCH_HASH_SET_MUST_CREATE)
-+#define BCH_HASH_SET_MUST_REPLACE	(1 << __BCH_HASH_SET_MUST_REPLACE)
-+
-+int bch2_btree_delete_at(struct btree_trans *, struct btree_iter *, unsigned);
-+
-+int __bch2_btree_insert(struct btree_trans *, enum btree_id, struct bkey_i *);
-+int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *,
-+		     struct disk_reservation *, u64 *, int flags);
-+
-+int bch2_btree_delete_at_range(struct btree_trans *, struct btree_iter *,
-+			       struct bpos, u64 *);
-+int bch2_btree_delete_range(struct bch_fs *, enum btree_id,
-+			    struct bpos, struct bpos, u64 *);
-+
-+int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *,
-+			    __le64, unsigned);
-+int bch2_btree_node_update_key(struct bch_fs *, struct btree_iter *,
-+			       struct btree *, struct bkey_i *);
-+
-+int bch2_trans_update(struct btree_trans *, struct btree_iter *,
-+		      struct bkey_i *, enum btree_trigger_flags);
-+int __bch2_trans_commit(struct btree_trans *);
-+
-+/**
-+ * bch2_trans_commit - insert keys at given iterator positions
-+ *
-+ * This is main entry point for btree updates.
-+ *
-+ * Return values:
-+ * -EINTR: locking changed, this function should be called again.
-+ * -EROFS: filesystem read only
-+ * -EIO: journal or btree node IO error
-+ */
-+static inline int bch2_trans_commit(struct btree_trans *trans,
-+				    struct disk_reservation *disk_res,
-+				    u64 *journal_seq,
-+				    unsigned flags)
-+{
-+	trans->disk_res		= disk_res;
-+	trans->journal_seq	= journal_seq;
-+	trans->flags		= flags;
-+
-+	return __bch2_trans_commit(trans);
-+}
-+
-+#define __bch2_trans_do(_trans, _disk_res, _journal_seq, _flags, _do)	\
-+({									\
-+	int _ret;							\
-+									\
-+	while (1) {							\
-+		_ret = (_do) ?:	bch2_trans_commit(_trans, (_disk_res),	\
-+					(_journal_seq), (_flags));	\
-+		if (_ret != -EINTR)					\
-+			break;						\
-+		bch2_trans_reset(_trans, 0);				\
-+	}								\
-+									\
-+	_ret;								\
-+})
-+
-+#define bch2_trans_do(_c, _disk_res, _journal_seq, _flags, _do)		\
-+({									\
-+	struct btree_trans trans;					\
-+	int _ret, _ret2;						\
-+									\
-+	bch2_trans_init(&trans, (_c), 0, 0);				\
-+	_ret = __bch2_trans_do(&trans, _disk_res, _journal_seq, _flags,	\
-+			       _do);					\
-+	_ret2 = bch2_trans_exit(&trans);				\
-+									\
-+	_ret ?: _ret2;							\
-+})
-+
-+#define trans_for_each_update(_trans, _i)				\
-+	for ((_i) = (_trans)->updates;					\
-+	     (_i) < (_trans)->updates + (_trans)->nr_updates;		\
-+	     (_i)++)
-+
-+#define trans_for_each_update2(_trans, _i)				\
-+	for ((_i) = (_trans)->updates2;					\
-+	     (_i) < (_trans)->updates2 + (_trans)->nr_updates2;		\
-+	     (_i)++)
-+
-+#endif /* _BCACHEFS_BTREE_UPDATE_H */
-diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
-new file mode 100644
-index 000000000000..a2604b0ce2d8
---- /dev/null
-+++ b/fs/bcachefs/btree_update_interior.c
-@@ -0,0 +1,2075 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "alloc_foreground.h"
-+#include "bkey_methods.h"
-+#include "btree_cache.h"
-+#include "btree_gc.h"
-+#include "btree_update.h"
-+#include "btree_update_interior.h"
-+#include "btree_io.h"
-+#include "btree_iter.h"
-+#include "btree_locking.h"
-+#include "buckets.h"
-+#include "extents.h"
-+#include "journal.h"
-+#include "journal_reclaim.h"
-+#include "keylist.h"
-+#include "replicas.h"
-+#include "super-io.h"
-+
-+#include <linux/random.h>
-+#include <trace/events/bcachefs.h>
-+
-+/* Debug code: */
-+
-+/*
-+ * Verify that child nodes correctly span parent node's range:
-+ */
-+static void btree_node_interior_verify(struct bch_fs *c, struct btree *b)
-+{
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	struct bpos next_node = b->data->min_key;
-+	struct btree_node_iter iter;
-+	struct bkey_s_c k;
-+	struct bkey_s_c_btree_ptr_v2 bp;
-+	struct bkey unpacked;
-+
-+	BUG_ON(!b->c.level);
-+
-+	if (!test_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags))
-+		return;
-+
-+	bch2_btree_node_iter_init_from_start(&iter, b);
-+
-+	while (1) {
-+		k = bch2_btree_node_iter_peek_unpack(&iter, b, &unpacked);
-+		if (k.k->type != KEY_TYPE_btree_ptr_v2)
-+			break;
-+		bp = bkey_s_c_to_btree_ptr_v2(k);
-+
-+		BUG_ON(bkey_cmp(next_node, bp.v->min_key));
-+
-+		bch2_btree_node_iter_advance(&iter, b);
-+
-+		if (bch2_btree_node_iter_end(&iter)) {
-+			BUG_ON(bkey_cmp(k.k->p, b->key.k.p));
-+			break;
-+		}
-+
-+		next_node = bkey_successor(k.k->p);
-+	}
-+#endif
-+}
-+
-+/* Calculate ideal packed bkey format for new btree nodes: */
-+
-+void __bch2_btree_calc_format(struct bkey_format_state *s, struct btree *b)
-+{
-+	struct bkey_packed *k;
-+	struct bset_tree *t;
-+	struct bkey uk;
-+
-+	bch2_bkey_format_add_pos(s, b->data->min_key);
-+
-+	for_each_bset(b, t)
-+		bset_tree_for_each_key(b, t, k)
-+			if (!bkey_whiteout(k)) {
-+				uk = bkey_unpack_key(b, k);
-+				bch2_bkey_format_add_key(s, &uk);
-+			}
-+}
-+
-+static struct bkey_format bch2_btree_calc_format(struct btree *b)
-+{
-+	struct bkey_format_state s;
-+
-+	bch2_bkey_format_init(&s);
-+	__bch2_btree_calc_format(&s, b);
-+
-+	return bch2_bkey_format_done(&s);
-+}
-+
-+static size_t btree_node_u64s_with_format(struct btree *b,
-+					  struct bkey_format *new_f)
-+{
-+	struct bkey_format *old_f = &b->format;
-+
-+	/* stupid integer promotion rules */
-+	ssize_t delta =
-+	    (((int) new_f->key_u64s - old_f->key_u64s) *
-+	     (int) b->nr.packed_keys) +
-+	    (((int) new_f->key_u64s - BKEY_U64s) *
-+	     (int) b->nr.unpacked_keys);
-+
-+	BUG_ON(delta + b->nr.live_u64s < 0);
-+
-+	return b->nr.live_u64s + delta;
-+}
-+
-+/**
-+ * btree_node_format_fits - check if we could rewrite node with a new format
-+ *
-+ * This assumes all keys can pack with the new format -- it just checks if
-+ * the re-packed keys would fit inside the node itself.
-+ */
-+bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *b,
-+				 struct bkey_format *new_f)
-+{
-+	size_t u64s = btree_node_u64s_with_format(b, new_f);
-+
-+	return __vstruct_bytes(struct btree_node, u64s) < btree_bytes(c);
-+}
-+
-+/* Btree node freeing/allocation: */
-+
-+static void __btree_node_free(struct bch_fs *c, struct btree *b)
-+{
-+	trace_btree_node_free(c, b);
-+
-+	BUG_ON(btree_node_dirty(b));
-+	BUG_ON(btree_node_need_write(b));
-+	BUG_ON(b == btree_node_root(c, b));
-+	BUG_ON(b->ob.nr);
-+	BUG_ON(!list_empty(&b->write_blocked));
-+	BUG_ON(b->will_make_reachable);
-+
-+	clear_btree_node_noevict(b);
-+
-+	bch2_btree_node_hash_remove(&c->btree_cache, b);
-+
-+	mutex_lock(&c->btree_cache.lock);
-+	list_move(&b->list, &c->btree_cache.freeable);
-+	mutex_unlock(&c->btree_cache.lock);
-+}
-+
-+void bch2_btree_node_free_never_inserted(struct bch_fs *c, struct btree *b)
-+{
-+	struct open_buckets ob = b->ob;
-+
-+	b->ob.nr = 0;
-+
-+	clear_btree_node_dirty(b);
-+
-+	btree_node_lock_type(c, b, SIX_LOCK_write);
-+	__btree_node_free(c, b);
-+	six_unlock_write(&b->c.lock);
-+
-+	bch2_open_buckets_put(c, &ob);
-+}
-+
-+void bch2_btree_node_free_inmem(struct bch_fs *c, struct btree *b,
-+				struct btree_iter *iter)
-+{
-+	struct btree_iter *linked;
-+
-+	trans_for_each_iter(iter->trans, linked)
-+		BUG_ON(linked->l[b->c.level].b == b);
-+
-+	six_lock_write(&b->c.lock, NULL, NULL);
-+	__btree_node_free(c, b);
-+	six_unlock_write(&b->c.lock);
-+	six_unlock_intent(&b->c.lock);
-+}
-+
-+static struct btree *__bch2_btree_node_alloc(struct bch_fs *c,
-+					     struct disk_reservation *res,
-+					     struct closure *cl,
-+					     unsigned flags)
-+{
-+	struct write_point *wp;
-+	struct btree *b;
-+	BKEY_PADDED(k) tmp;
-+	struct open_buckets ob = { .nr = 0 };
-+	struct bch_devs_list devs_have = (struct bch_devs_list) { 0 };
-+	unsigned nr_reserve;
-+	enum alloc_reserve alloc_reserve;
-+
-+	if (flags & BTREE_INSERT_USE_ALLOC_RESERVE) {
-+		nr_reserve	= 0;
-+		alloc_reserve	= RESERVE_ALLOC;
-+	} else if (flags & BTREE_INSERT_USE_RESERVE) {
-+		nr_reserve	= BTREE_NODE_RESERVE / 2;
-+		alloc_reserve	= RESERVE_BTREE;
-+	} else {
-+		nr_reserve	= BTREE_NODE_RESERVE;
-+		alloc_reserve	= RESERVE_NONE;
-+	}
-+
-+	mutex_lock(&c->btree_reserve_cache_lock);
-+	if (c->btree_reserve_cache_nr > nr_reserve) {
-+		struct btree_alloc *a =
-+			&c->btree_reserve_cache[--c->btree_reserve_cache_nr];
-+
-+		ob = a->ob;
-+		bkey_copy(&tmp.k, &a->k);
-+		mutex_unlock(&c->btree_reserve_cache_lock);
-+		goto mem_alloc;
-+	}
-+	mutex_unlock(&c->btree_reserve_cache_lock);
-+
-+retry:
-+	wp = bch2_alloc_sectors_start(c, c->opts.foreground_target, 0,
-+				      writepoint_ptr(&c->btree_write_point),
-+				      &devs_have,
-+				      res->nr_replicas,
-+				      c->opts.metadata_replicas_required,
-+				      alloc_reserve, 0, cl);
-+	if (IS_ERR(wp))
-+		return ERR_CAST(wp);
-+
-+	if (wp->sectors_free < c->opts.btree_node_size) {
-+		struct open_bucket *ob;
-+		unsigned i;
-+
-+		open_bucket_for_each(c, &wp->ptrs, ob, i)
-+			if (ob->sectors_free < c->opts.btree_node_size)
-+				ob->sectors_free = 0;
-+
-+		bch2_alloc_sectors_done(c, wp);
-+		goto retry;
-+	}
-+
-+	if (c->sb.features & (1ULL << BCH_FEATURE_btree_ptr_v2))
-+		bkey_btree_ptr_v2_init(&tmp.k);
-+	else
-+		bkey_btree_ptr_init(&tmp.k);
-+
-+	bch2_alloc_sectors_append_ptrs(c, wp, &tmp.k, c->opts.btree_node_size);
-+
-+	bch2_open_bucket_get(c, wp, &ob);
-+	bch2_alloc_sectors_done(c, wp);
-+mem_alloc:
-+	b = bch2_btree_node_mem_alloc(c);
-+
-+	/* we hold cannibalize_lock: */
-+	BUG_ON(IS_ERR(b));
-+	BUG_ON(b->ob.nr);
-+
-+	bkey_copy(&b->key, &tmp.k);
-+	b->ob = ob;
-+
-+	return b;
-+}
-+
-+static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned level)
-+{
-+	struct bch_fs *c = as->c;
-+	struct btree *b;
-+	int ret;
-+
-+	BUG_ON(level >= BTREE_MAX_DEPTH);
-+	BUG_ON(!as->nr_prealloc_nodes);
-+
-+	b = as->prealloc_nodes[--as->nr_prealloc_nodes];
-+
-+	set_btree_node_accessed(b);
-+	set_btree_node_dirty(b);
-+	set_btree_node_need_write(b);
-+
-+	bch2_bset_init_first(b, &b->data->keys);
-+	b->c.level	= level;
-+	b->c.btree_id	= as->btree_id;
-+
-+	memset(&b->nr, 0, sizeof(b->nr));
-+	b->data->magic = cpu_to_le64(bset_magic(c));
-+	b->data->flags = 0;
-+	SET_BTREE_NODE_ID(b->data, as->btree_id);
-+	SET_BTREE_NODE_LEVEL(b->data, level);
-+	b->data->ptr = bch2_bkey_ptrs_c(bkey_i_to_s_c(&b->key)).start->ptr;
-+
-+	if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
-+		struct bkey_i_btree_ptr_v2 *bp = bkey_i_to_btree_ptr_v2(&b->key);
-+
-+		bp->v.mem_ptr		= 0;
-+		bp->v.seq		= b->data->keys.seq;
-+		bp->v.sectors_written	= 0;
-+		bp->v.sectors		= cpu_to_le16(c->opts.btree_node_size);
-+	}
-+
-+	if (c->sb.features & (1ULL << BCH_FEATURE_new_extent_overwrite))
-+		SET_BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data, true);
-+
-+	if (btree_node_is_extents(b) &&
-+	    !BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data)) {
-+		set_btree_node_old_extent_overwrite(b);
-+		set_btree_node_need_rewrite(b);
-+	}
-+
-+	bch2_btree_build_aux_trees(b);
-+
-+	ret = bch2_btree_node_hash_insert(&c->btree_cache, b, level, as->btree_id);
-+	BUG_ON(ret);
-+
-+	trace_btree_node_alloc(c, b);
-+	return b;
-+}
-+
-+static void btree_set_min(struct btree *b, struct bpos pos)
-+{
-+	if (b->key.k.type == KEY_TYPE_btree_ptr_v2)
-+		bkey_i_to_btree_ptr_v2(&b->key)->v.min_key = pos;
-+	b->data->min_key = pos;
-+}
-+
-+static void btree_set_max(struct btree *b, struct bpos pos)
-+{
-+	b->key.k.p = pos;
-+	b->data->max_key = pos;
-+}
-+
-+struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *as,
-+						  struct btree *b,
-+						  struct bkey_format format)
-+{
-+	struct btree *n;
-+
-+	n = bch2_btree_node_alloc(as, b->c.level);
-+
-+	SET_BTREE_NODE_SEQ(n->data, BTREE_NODE_SEQ(b->data) + 1);
-+
-+	btree_set_min(n, b->data->min_key);
-+	btree_set_max(n, b->data->max_key);
-+
-+	n->data->format		= format;
-+	btree_node_set_format(n, format);
-+
-+	bch2_btree_sort_into(as->c, n, b);
-+
-+	btree_node_reset_sib_u64s(n);
-+
-+	n->key.k.p = b->key.k.p;
-+	return n;
-+}
-+
-+static struct btree *bch2_btree_node_alloc_replacement(struct btree_update *as,
-+						       struct btree *b)
-+{
-+	struct bkey_format new_f = bch2_btree_calc_format(b);
-+
-+	/*
-+	 * The keys might expand with the new format - if they wouldn't fit in
-+	 * the btree node anymore, use the old format for now:
-+	 */
-+	if (!bch2_btree_node_format_fits(as->c, b, &new_f))
-+		new_f = b->format;
-+
-+	return __bch2_btree_node_alloc_replacement(as, b, new_f);
-+}
-+
-+static struct btree *__btree_root_alloc(struct btree_update *as, unsigned level)
-+{
-+	struct btree *b = bch2_btree_node_alloc(as, level);
-+
-+	btree_set_min(b, POS_MIN);
-+	btree_set_max(b, POS_MAX);
-+	b->data->format = bch2_btree_calc_format(b);
-+
-+	btree_node_set_format(b, b->data->format);
-+	bch2_btree_build_aux_trees(b);
-+
-+	bch2_btree_update_add_new_node(as, b);
-+	six_unlock_write(&b->c.lock);
-+
-+	return b;
-+}
-+
-+static void bch2_btree_reserve_put(struct btree_update *as)
-+{
-+	struct bch_fs *c = as->c;
-+
-+	mutex_lock(&c->btree_reserve_cache_lock);
-+
-+	while (as->nr_prealloc_nodes) {
-+		struct btree *b = as->prealloc_nodes[--as->nr_prealloc_nodes];
-+
-+		six_unlock_write(&b->c.lock);
-+
-+		if (c->btree_reserve_cache_nr <
-+		    ARRAY_SIZE(c->btree_reserve_cache)) {
-+			struct btree_alloc *a =
-+				&c->btree_reserve_cache[c->btree_reserve_cache_nr++];
-+
-+			a->ob = b->ob;
-+			b->ob.nr = 0;
-+			bkey_copy(&a->k, &b->key);
-+		} else {
-+			bch2_open_buckets_put(c, &b->ob);
-+		}
-+
-+		btree_node_lock_type(c, b, SIX_LOCK_write);
-+		__btree_node_free(c, b);
-+		six_unlock_write(&b->c.lock);
-+
-+		six_unlock_intent(&b->c.lock);
-+	}
-+
-+	mutex_unlock(&c->btree_reserve_cache_lock);
-+}
-+
-+static int bch2_btree_reserve_get(struct btree_update *as, unsigned nr_nodes,
-+				  unsigned flags, struct closure *cl)
-+{
-+	struct bch_fs *c = as->c;
-+	struct btree *b;
-+	int ret;
-+
-+	BUG_ON(nr_nodes > BTREE_RESERVE_MAX);
-+
-+	/*
-+	 * Protects reaping from the btree node cache and using the btree node
-+	 * open bucket reserve:
-+	 */
-+	ret = bch2_btree_cache_cannibalize_lock(c, cl);
-+	if (ret)
-+		return ret;
-+
-+	while (as->nr_prealloc_nodes < nr_nodes) {
-+		b = __bch2_btree_node_alloc(c, &as->disk_res,
-+					    flags & BTREE_INSERT_NOWAIT
-+					    ? NULL : cl, flags);
-+		if (IS_ERR(b)) {
-+			ret = PTR_ERR(b);
-+			goto err_free;
-+		}
-+
-+		ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&b->key));
-+		if (ret)
-+			goto err_free;
-+
-+		as->prealloc_nodes[as->nr_prealloc_nodes++] = b;
-+	}
-+
-+	bch2_btree_cache_cannibalize_unlock(c);
-+	return 0;
-+err_free:
-+	bch2_btree_cache_cannibalize_unlock(c);
-+	trace_btree_reserve_get_fail(c, nr_nodes, cl);
-+	return ret;
-+}
-+
-+/* Asynchronous interior node update machinery */
-+
-+static void bch2_btree_update_free(struct btree_update *as)
-+{
-+	struct bch_fs *c = as->c;
-+
-+	bch2_journal_preres_put(&c->journal, &as->journal_preres);
-+
-+	bch2_journal_pin_drop(&c->journal, &as->journal);
-+	bch2_journal_pin_flush(&c->journal, &as->journal);
-+	bch2_disk_reservation_put(c, &as->disk_res);
-+	bch2_btree_reserve_put(as);
-+
-+	mutex_lock(&c->btree_interior_update_lock);
-+	list_del(&as->unwritten_list);
-+	list_del(&as->list);
-+	mutex_unlock(&c->btree_interior_update_lock);
-+
-+	closure_debug_destroy(&as->cl);
-+	mempool_free(as, &c->btree_interior_update_pool);
-+
-+	closure_wake_up(&c->btree_interior_update_wait);
-+}
-+
-+static void btree_update_will_delete_key(struct btree_update *as,
-+					 struct bkey_i *k)
-+{
-+	BUG_ON(bch2_keylist_u64s(&as->old_keys) + k->k.u64s >
-+	       ARRAY_SIZE(as->_old_keys));
-+	bch2_keylist_add(&as->old_keys, k);
-+}
-+
-+static void btree_update_will_add_key(struct btree_update *as,
-+				      struct bkey_i *k)
-+{
-+	BUG_ON(bch2_keylist_u64s(&as->new_keys) + k->k.u64s >
-+	       ARRAY_SIZE(as->_new_keys));
-+	bch2_keylist_add(&as->new_keys, k);
-+}
-+
-+/*
-+ * The transactional part of an interior btree node update, where we journal the
-+ * update we did to the interior node and update alloc info:
-+ */
-+static int btree_update_nodes_written_trans(struct btree_trans *trans,
-+					    struct btree_update *as)
-+{
-+	struct bkey_i *k;
-+	int ret;
-+
-+	trans->extra_journal_entries = (void *) &as->journal_entries[0];
-+	trans->extra_journal_entry_u64s = as->journal_u64s;
-+	trans->journal_pin = &as->journal;
-+
-+	for_each_keylist_key(&as->new_keys, k) {
-+		ret = bch2_trans_mark_key(trans, bkey_i_to_s_c(k),
-+					  0, 0, BTREE_TRIGGER_INSERT);
-+		if (ret)
-+			return ret;
-+	}
-+
-+	for_each_keylist_key(&as->old_keys, k) {
-+		ret = bch2_trans_mark_key(trans, bkey_i_to_s_c(k),
-+					  0, 0, BTREE_TRIGGER_OVERWRITE);
-+		if (ret)
-+			return ret;
-+	}
-+
-+	return 0;
-+}
-+
-+static void btree_update_nodes_written(struct btree_update *as)
-+{
-+	struct bch_fs *c = as->c;
-+	struct btree *b = as->b;
-+	u64 journal_seq = 0;
-+	unsigned i;
-+	int ret;
-+
-+	/*
-+	 * We did an update to a parent node where the pointers we added pointed
-+	 * to child nodes that weren't written yet: now, the child nodes have
-+	 * been written so we can write out the update to the interior node.
-+	 */
-+
-+	/*
-+	 * We can't call into journal reclaim here: we'd block on the journal
-+	 * reclaim lock, but we may need to release the open buckets we have
-+	 * pinned in order for other btree updates to make forward progress, and
-+	 * journal reclaim does btree updates when flushing bkey_cached entries,
-+	 * which may require allocations as well.
-+	 */
-+	ret = bch2_trans_do(c, &as->disk_res, &journal_seq,
-+			    BTREE_INSERT_NOFAIL|
-+			    BTREE_INSERT_USE_RESERVE|
-+			    BTREE_INSERT_USE_ALLOC_RESERVE|
-+			    BTREE_INSERT_NOCHECK_RW|
-+			    BTREE_INSERT_JOURNAL_RECLAIM|
-+			    BTREE_INSERT_JOURNAL_RESERVED,
-+			    btree_update_nodes_written_trans(&trans, as));
-+	BUG_ON(ret && !bch2_journal_error(&c->journal));
-+
-+	if (b) {
-+		/*
-+		 * @b is the node we did the final insert into:
-+		 *
-+		 * On failure to get a journal reservation, we still have to
-+		 * unblock the write and allow most of the write path to happen
-+		 * so that shutdown works, but the i->journal_seq mechanism
-+		 * won't work to prevent the btree write from being visible (we
-+		 * didn't get a journal sequence number) - instead
-+		 * __bch2_btree_node_write() doesn't do the actual write if
-+		 * we're in journal error state:
-+		 */
-+
-+		btree_node_lock_type(c, b, SIX_LOCK_intent);
-+		btree_node_lock_type(c, b, SIX_LOCK_write);
-+		mutex_lock(&c->btree_interior_update_lock);
-+
-+		list_del(&as->write_blocked_list);
-+
-+		if (!ret && as->b == b) {
-+			struct bset *i = btree_bset_last(b);
-+
-+			BUG_ON(!b->c.level);
-+			BUG_ON(!btree_node_dirty(b));
-+
-+			i->journal_seq = cpu_to_le64(
-+				max(journal_seq,
-+				    le64_to_cpu(i->journal_seq)));
-+
-+			bch2_btree_add_journal_pin(c, b, journal_seq);
-+		}
-+
-+		mutex_unlock(&c->btree_interior_update_lock);
-+		six_unlock_write(&b->c.lock);
-+
-+		btree_node_write_if_need(c, b, SIX_LOCK_intent);
-+		six_unlock_intent(&b->c.lock);
-+	}
-+
-+	bch2_journal_pin_drop(&c->journal, &as->journal);
-+
-+	bch2_journal_preres_put(&c->journal, &as->journal_preres);
-+
-+	mutex_lock(&c->btree_interior_update_lock);
-+	for (i = 0; i < as->nr_new_nodes; i++) {
-+		b = as->new_nodes[i];
-+
-+		BUG_ON(b->will_make_reachable != (unsigned long) as);
-+		b->will_make_reachable = 0;
-+	}
-+	mutex_unlock(&c->btree_interior_update_lock);
-+
-+	for (i = 0; i < as->nr_new_nodes; i++) {
-+		b = as->new_nodes[i];
-+
-+		btree_node_lock_type(c, b, SIX_LOCK_read);
-+		btree_node_write_if_need(c, b, SIX_LOCK_read);
-+		six_unlock_read(&b->c.lock);
-+	}
-+
-+	for (i = 0; i < as->nr_open_buckets; i++)
-+		bch2_open_bucket_put(c, c->open_buckets + as->open_buckets[i]);
-+
-+	bch2_btree_update_free(as);
-+}
-+
-+static void btree_interior_update_work(struct work_struct *work)
-+{
-+	struct bch_fs *c =
-+		container_of(work, struct bch_fs, btree_interior_update_work);
-+	struct btree_update *as;
-+
-+	while (1) {
-+		mutex_lock(&c->btree_interior_update_lock);
-+		as = list_first_entry_or_null(&c->btree_interior_updates_unwritten,
-+					      struct btree_update, unwritten_list);
-+		if (as && !as->nodes_written)
-+			as = NULL;
-+		mutex_unlock(&c->btree_interior_update_lock);
-+
-+		if (!as)
-+			break;
-+
-+		btree_update_nodes_written(as);
-+	}
-+}
-+
-+static void btree_update_set_nodes_written(struct closure *cl)
-+{
-+	struct btree_update *as = container_of(cl, struct btree_update, cl);
-+	struct bch_fs *c = as->c;
-+
-+	mutex_lock(&c->btree_interior_update_lock);
-+	as->nodes_written = true;
-+	mutex_unlock(&c->btree_interior_update_lock);
-+
-+	queue_work(c->btree_interior_update_worker, &c->btree_interior_update_work);
-+}
-+
-+/*
-+ * We're updating @b with pointers to nodes that haven't finished writing yet:
-+ * block @b from being written until @as completes
-+ */
-+static void btree_update_updated_node(struct btree_update *as, struct btree *b)
-+{
-+	struct bch_fs *c = as->c;
-+
-+	mutex_lock(&c->btree_interior_update_lock);
-+	list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten);
-+
-+	BUG_ON(as->mode != BTREE_INTERIOR_NO_UPDATE);
-+	BUG_ON(!btree_node_dirty(b));
-+
-+	as->mode	= BTREE_INTERIOR_UPDATING_NODE;
-+	as->b		= b;
-+	list_add(&as->write_blocked_list, &b->write_blocked);
-+
-+	mutex_unlock(&c->btree_interior_update_lock);
-+}
-+
-+static void btree_update_reparent(struct btree_update *as,
-+				  struct btree_update *child)
-+{
-+	struct bch_fs *c = as->c;
-+
-+	lockdep_assert_held(&c->btree_interior_update_lock);
-+
-+	child->b = NULL;
-+	child->mode = BTREE_INTERIOR_UPDATING_AS;
-+
-+	/*
-+	 * When we write a new btree root, we have to drop our journal pin
-+	 * _before_ the new nodes are technically reachable; see
-+	 * btree_update_nodes_written().
-+	 *
-+	 * This goes for journal pins that are recursively blocked on us - so,
-+	 * just transfer the journal pin to the new interior update so
-+	 * btree_update_nodes_written() can drop it.
-+	 */
-+	bch2_journal_pin_copy(&c->journal, &as->journal, &child->journal, NULL);
-+	bch2_journal_pin_drop(&c->journal, &child->journal);
-+}
-+
-+static void btree_update_updated_root(struct btree_update *as, struct btree *b)
-+{
-+	struct bkey_i *insert = &b->key;
-+	struct bch_fs *c = as->c;
-+
-+	BUG_ON(as->mode != BTREE_INTERIOR_NO_UPDATE);
-+
-+	BUG_ON(as->journal_u64s + jset_u64s(insert->k.u64s) >
-+	       ARRAY_SIZE(as->journal_entries));
-+
-+	as->journal_u64s +=
-+		journal_entry_set((void *) &as->journal_entries[as->journal_u64s],
-+				  BCH_JSET_ENTRY_btree_root,
-+				  b->c.btree_id, b->c.level,
-+				  insert, insert->k.u64s);
-+
-+	mutex_lock(&c->btree_interior_update_lock);
-+	list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten);
-+
-+	as->mode	= BTREE_INTERIOR_UPDATING_ROOT;
-+	mutex_unlock(&c->btree_interior_update_lock);
-+}
-+
-+/*
-+ * bch2_btree_update_add_new_node:
-+ *
-+ * This causes @as to wait on @b to be written, before it gets to
-+ * bch2_btree_update_nodes_written
-+ *
-+ * Additionally, it sets b->will_make_reachable to prevent any additional writes
-+ * to @b from happening besides the first until @b is reachable on disk
-+ *
-+ * And it adds @b to the list of @as's new nodes, so that we can update sector
-+ * counts in bch2_btree_update_nodes_written:
-+ */
-+void bch2_btree_update_add_new_node(struct btree_update *as, struct btree *b)
-+{
-+	struct bch_fs *c = as->c;
-+
-+	closure_get(&as->cl);
-+
-+	mutex_lock(&c->btree_interior_update_lock);
-+	BUG_ON(as->nr_new_nodes >= ARRAY_SIZE(as->new_nodes));
-+	BUG_ON(b->will_make_reachable);
-+
-+	as->new_nodes[as->nr_new_nodes++] = b;
-+	b->will_make_reachable = 1UL|(unsigned long) as;
-+
-+	mutex_unlock(&c->btree_interior_update_lock);
-+
-+	btree_update_will_add_key(as, &b->key);
-+}
-+
-+/*
-+ * returns true if @b was a new node
-+ */
-+static void btree_update_drop_new_node(struct bch_fs *c, struct btree *b)
-+{
-+	struct btree_update *as;
-+	unsigned long v;
-+	unsigned i;
-+
-+	mutex_lock(&c->btree_interior_update_lock);
-+	/*
-+	 * When b->will_make_reachable != 0, it owns a ref on as->cl that's
-+	 * dropped when it gets written by bch2_btree_complete_write - the
-+	 * xchg() is for synchronization with bch2_btree_complete_write:
-+	 */
-+	v = xchg(&b->will_make_reachable, 0);
-+	as = (struct btree_update *) (v & ~1UL);
-+
-+	if (!as) {
-+		mutex_unlock(&c->btree_interior_update_lock);
-+		return;
-+	}
-+
-+	for (i = 0; i < as->nr_new_nodes; i++)
-+		if (as->new_nodes[i] == b)
-+			goto found;
-+
-+	BUG();
-+found:
-+	array_remove_item(as->new_nodes, as->nr_new_nodes, i);
-+	mutex_unlock(&c->btree_interior_update_lock);
-+
-+	if (v & 1)
-+		closure_put(&as->cl);
-+}
-+
-+void bch2_btree_update_get_open_buckets(struct btree_update *as, struct btree *b)
-+{
-+	while (b->ob.nr)
-+		as->open_buckets[as->nr_open_buckets++] =
-+			b->ob.v[--b->ob.nr];
-+}
-+
-+/*
-+ * @b is being split/rewritten: it may have pointers to not-yet-written btree
-+ * nodes and thus outstanding btree_updates - redirect @b's
-+ * btree_updates to point to this btree_update:
-+ */
-+void bch2_btree_interior_update_will_free_node(struct btree_update *as,
-+					       struct btree *b)
-+{
-+	struct bch_fs *c = as->c;
-+	struct btree_update *p, *n;
-+	struct btree_write *w;
-+
-+	set_btree_node_dying(b);
-+
-+	if (btree_node_fake(b))
-+		return;
-+
-+	mutex_lock(&c->btree_interior_update_lock);
-+
-+	/*
-+	 * Does this node have any btree_update operations preventing
-+	 * it from being written?
-+	 *
-+	 * If so, redirect them to point to this btree_update: we can
-+	 * write out our new nodes, but we won't make them visible until those
-+	 * operations complete
-+	 */
-+	list_for_each_entry_safe(p, n, &b->write_blocked, write_blocked_list) {
-+		list_del_init(&p->write_blocked_list);
-+		btree_update_reparent(as, p);
-+
-+		/*
-+		 * for flush_held_btree_writes() waiting on updates to flush or
-+		 * nodes to be writeable:
-+		 */
-+		closure_wake_up(&c->btree_interior_update_wait);
-+	}
-+
-+	clear_btree_node_dirty(b);
-+	clear_btree_node_need_write(b);
-+
-+	/*
-+	 * Does this node have unwritten data that has a pin on the journal?
-+	 *
-+	 * If so, transfer that pin to the btree_update operation -
-+	 * note that if we're freeing multiple nodes, we only need to keep the
-+	 * oldest pin of any of the nodes we're freeing. We'll release the pin
-+	 * when the new nodes are persistent and reachable on disk:
-+	 */
-+	w = btree_current_write(b);
-+	bch2_journal_pin_copy(&c->journal, &as->journal, &w->journal, NULL);
-+	bch2_journal_pin_drop(&c->journal, &w->journal);
-+
-+	w = btree_prev_write(b);
-+	bch2_journal_pin_copy(&c->journal, &as->journal, &w->journal, NULL);
-+	bch2_journal_pin_drop(&c->journal, &w->journal);
-+
-+	mutex_unlock(&c->btree_interior_update_lock);
-+
-+	/*
-+	 * Is this a node that isn't reachable on disk yet?
-+	 *
-+	 * Nodes that aren't reachable yet have writes blocked until they're
-+	 * reachable - now that we've cancelled any pending writes and moved
-+	 * things waiting on that write to wait on this update, we can drop this
-+	 * node from the list of nodes that the other update is making
-+	 * reachable, prior to freeing it:
-+	 */
-+	btree_update_drop_new_node(c, b);
-+
-+	btree_update_will_delete_key(as, &b->key);
-+}
-+
-+void bch2_btree_update_done(struct btree_update *as)
-+{
-+	BUG_ON(as->mode == BTREE_INTERIOR_NO_UPDATE);
-+
-+	bch2_btree_reserve_put(as);
-+
-+	continue_at(&as->cl, btree_update_set_nodes_written, system_freezable_wq);
-+}
-+
-+struct btree_update *
-+bch2_btree_update_start(struct btree_trans *trans, enum btree_id id,
-+			unsigned nr_nodes, unsigned flags,
-+			struct closure *cl)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct btree_update *as;
-+	int disk_res_flags = (flags & BTREE_INSERT_NOFAIL)
-+		? BCH_DISK_RESERVATION_NOFAIL : 0;
-+	int journal_flags = (flags & BTREE_INSERT_JOURNAL_RESERVED)
-+		? JOURNAL_RES_GET_RECLAIM : 0;
-+	int ret = 0;
-+
-+	/*
-+	 * This check isn't necessary for correctness - it's just to potentially
-+	 * prevent us from doing a lot of work that'll end up being wasted:
-+	 */
-+	ret = bch2_journal_error(&c->journal);
-+	if (ret)
-+		return ERR_PTR(ret);
-+
-+	as = mempool_alloc(&c->btree_interior_update_pool, GFP_NOIO);
-+	memset(as, 0, sizeof(*as));
-+	closure_init(&as->cl, NULL);
-+	as->c		= c;
-+	as->mode	= BTREE_INTERIOR_NO_UPDATE;
-+	as->btree_id	= id;
-+	INIT_LIST_HEAD(&as->list);
-+	INIT_LIST_HEAD(&as->unwritten_list);
-+	INIT_LIST_HEAD(&as->write_blocked_list);
-+	bch2_keylist_init(&as->old_keys, as->_old_keys);
-+	bch2_keylist_init(&as->new_keys, as->_new_keys);
-+	bch2_keylist_init(&as->parent_keys, as->inline_keys);
-+
-+	ret = bch2_journal_preres_get(&c->journal, &as->journal_preres,
-+				      BTREE_UPDATE_JOURNAL_RES,
-+				      journal_flags|JOURNAL_RES_GET_NONBLOCK);
-+	if (ret == -EAGAIN) {
-+		if (flags & BTREE_INSERT_NOUNLOCK)
-+			return ERR_PTR(-EINTR);
-+
-+		bch2_trans_unlock(trans);
-+
-+		ret = bch2_journal_preres_get(&c->journal, &as->journal_preres,
-+				BTREE_UPDATE_JOURNAL_RES,
-+				journal_flags);
-+		if (ret)
-+			return ERR_PTR(ret);
-+
-+		if (!bch2_trans_relock(trans)) {
-+			ret = -EINTR;
-+			goto err;
-+		}
-+	}
-+
-+	ret = bch2_disk_reservation_get(c, &as->disk_res,
-+			nr_nodes * c->opts.btree_node_size,
-+			c->opts.metadata_replicas,
-+			disk_res_flags);
-+	if (ret)
-+		goto err;
-+
-+	ret = bch2_btree_reserve_get(as, nr_nodes, flags, cl);
-+	if (ret)
-+		goto err;
-+
-+	mutex_lock(&c->btree_interior_update_lock);
-+	list_add_tail(&as->list, &c->btree_interior_update_list);
-+	mutex_unlock(&c->btree_interior_update_lock);
-+
-+	return as;
-+err:
-+	bch2_btree_update_free(as);
-+	return ERR_PTR(ret);
-+}
-+
-+/* Btree root updates: */
-+
-+static void bch2_btree_set_root_inmem(struct bch_fs *c, struct btree *b)
-+{
-+	/* Root nodes cannot be reaped */
-+	mutex_lock(&c->btree_cache.lock);
-+	list_del_init(&b->list);
-+	mutex_unlock(&c->btree_cache.lock);
-+
-+	mutex_lock(&c->btree_root_lock);
-+	BUG_ON(btree_node_root(c, b) &&
-+	       (b->c.level < btree_node_root(c, b)->c.level ||
-+		!btree_node_dying(btree_node_root(c, b))));
-+
-+	btree_node_root(c, b) = b;
-+	mutex_unlock(&c->btree_root_lock);
-+
-+	bch2_recalc_btree_reserve(c);
-+}
-+
-+/**
-+ * bch_btree_set_root - update the root in memory and on disk
-+ *
-+ * To ensure forward progress, the current task must not be holding any
-+ * btree node write locks. However, you must hold an intent lock on the
-+ * old root.
-+ *
-+ * Note: This allocates a journal entry but doesn't add any keys to
-+ * it.  All the btree roots are part of every journal write, so there
-+ * is nothing new to be done.  This just guarantees that there is a
-+ * journal write.
-+ */
-+static void bch2_btree_set_root(struct btree_update *as, struct btree *b,
-+				struct btree_iter *iter)
-+{
-+	struct bch_fs *c = as->c;
-+	struct btree *old;
-+
-+	trace_btree_set_root(c, b);
-+	BUG_ON(!b->written &&
-+	       !test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags));
-+
-+	old = btree_node_root(c, b);
-+
-+	/*
-+	 * Ensure no one is using the old root while we switch to the
-+	 * new root:
-+	 */
-+	bch2_btree_node_lock_write(old, iter);
-+
-+	bch2_btree_set_root_inmem(c, b);
-+
-+	btree_update_updated_root(as, b);
-+
-+	/*
-+	 * Unlock old root after new root is visible:
-+	 *
-+	 * The new root isn't persistent, but that's ok: we still have
-+	 * an intent lock on the new root, and any updates that would
-+	 * depend on the new root would have to update the new root.
-+	 */
-+	bch2_btree_node_unlock_write(old, iter);
-+}
-+
-+/* Interior node updates: */
-+
-+static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b,
-+					struct btree_iter *iter,
-+					struct bkey_i *insert,
-+					struct btree_node_iter *node_iter)
-+{
-+	struct bkey_packed *k;
-+
-+	BUG_ON(as->journal_u64s + jset_u64s(insert->k.u64s) >
-+	       ARRAY_SIZE(as->journal_entries));
-+
-+	as->journal_u64s +=
-+		journal_entry_set((void *) &as->journal_entries[as->journal_u64s],
-+				  BCH_JSET_ENTRY_btree_keys,
-+				  b->c.btree_id, b->c.level,
-+				  insert, insert->k.u64s);
-+
-+	while ((k = bch2_btree_node_iter_peek_all(node_iter, b)) &&
-+	       bkey_iter_pos_cmp(b, k, &insert->k.p) < 0)
-+		bch2_btree_node_iter_advance(node_iter, b);
-+
-+	bch2_btree_bset_insert_key(iter, b, node_iter, insert);
-+	set_btree_node_dirty(b);
-+	set_btree_node_need_write(b);
-+}
-+
-+/*
-+ * Move keys from n1 (original replacement node, now lower node) to n2 (higher
-+ * node)
-+ */
-+static struct btree *__btree_split_node(struct btree_update *as,
-+					struct btree *n1,
-+					struct btree_iter *iter)
-+{
-+	size_t nr_packed = 0, nr_unpacked = 0;
-+	struct btree *n2;
-+	struct bset *set1, *set2;
-+	struct bkey_packed *k, *prev = NULL;
-+
-+	n2 = bch2_btree_node_alloc(as, n1->c.level);
-+	bch2_btree_update_add_new_node(as, n2);
-+
-+	n2->data->max_key	= n1->data->max_key;
-+	n2->data->format	= n1->format;
-+	SET_BTREE_NODE_SEQ(n2->data, BTREE_NODE_SEQ(n1->data));
-+	n2->key.k.p = n1->key.k.p;
-+
-+	btree_node_set_format(n2, n2->data->format);
-+
-+	set1 = btree_bset_first(n1);
-+	set2 = btree_bset_first(n2);
-+
-+	/*
-+	 * Has to be a linear search because we don't have an auxiliary
-+	 * search tree yet
-+	 */
-+	k = set1->start;
-+	while (1) {
-+		struct bkey_packed *n = bkey_next_skip_noops(k, vstruct_last(set1));
-+
-+		if (n == vstruct_last(set1))
-+			break;
-+		if (k->_data - set1->_data >= (le16_to_cpu(set1->u64s) * 3) / 5)
-+			break;
-+
-+		if (bkey_packed(k))
-+			nr_packed++;
-+		else
-+			nr_unpacked++;
-+
-+		prev = k;
-+		k = n;
-+	}
-+
-+	BUG_ON(!prev);
-+
-+	btree_set_max(n1, bkey_unpack_pos(n1, prev));
-+	btree_set_min(n2, bkey_successor(n1->key.k.p));
-+
-+	set2->u64s = cpu_to_le16((u64 *) vstruct_end(set1) - (u64 *) k);
-+	set1->u64s = cpu_to_le16(le16_to_cpu(set1->u64s) - le16_to_cpu(set2->u64s));
-+
-+	set_btree_bset_end(n1, n1->set);
-+	set_btree_bset_end(n2, n2->set);
-+
-+	n2->nr.live_u64s	= le16_to_cpu(set2->u64s);
-+	n2->nr.bset_u64s[0]	= le16_to_cpu(set2->u64s);
-+	n2->nr.packed_keys	= n1->nr.packed_keys - nr_packed;
-+	n2->nr.unpacked_keys	= n1->nr.unpacked_keys - nr_unpacked;
-+
-+	n1->nr.live_u64s	= le16_to_cpu(set1->u64s);
-+	n1->nr.bset_u64s[0]	= le16_to_cpu(set1->u64s);
-+	n1->nr.packed_keys	= nr_packed;
-+	n1->nr.unpacked_keys	= nr_unpacked;
-+
-+	BUG_ON(!set1->u64s);
-+	BUG_ON(!set2->u64s);
-+
-+	memcpy_u64s(set2->start,
-+		    vstruct_end(set1),
-+		    le16_to_cpu(set2->u64s));
-+
-+	btree_node_reset_sib_u64s(n1);
-+	btree_node_reset_sib_u64s(n2);
-+
-+	bch2_verify_btree_nr_keys(n1);
-+	bch2_verify_btree_nr_keys(n2);
-+
-+	if (n1->c.level) {
-+		btree_node_interior_verify(as->c, n1);
-+		btree_node_interior_verify(as->c, n2);
-+	}
-+
-+	return n2;
-+}
-+
-+/*
-+ * For updates to interior nodes, we've got to do the insert before we split
-+ * because the stuff we're inserting has to be inserted atomically. Post split,
-+ * the keys might have to go in different nodes and the split would no longer be
-+ * atomic.
-+ *
-+ * Worse, if the insert is from btree node coalescing, if we do the insert after
-+ * we do the split (and pick the pivot) - the pivot we pick might be between
-+ * nodes that were coalesced, and thus in the middle of a child node post
-+ * coalescing:
-+ */
-+static void btree_split_insert_keys(struct btree_update *as, struct btree *b,
-+				    struct btree_iter *iter,
-+				    struct keylist *keys)
-+{
-+	struct btree_node_iter node_iter;
-+	struct bkey_i *k = bch2_keylist_front(keys);
-+	struct bkey_packed *src, *dst, *n;
-+	struct bset *i;
-+
-+	BUG_ON(btree_node_type(b) != BKEY_TYPE_BTREE);
-+
-+	bch2_btree_node_iter_init(&node_iter, b, &k->k.p);
-+
-+	while (!bch2_keylist_empty(keys)) {
-+		k = bch2_keylist_front(keys);
-+
-+		bch2_insert_fixup_btree_ptr(as, b, iter, k, &node_iter);
-+		bch2_keylist_pop_front(keys);
-+	}
-+
-+	/*
-+	 * We can't tolerate whiteouts here - with whiteouts there can be
-+	 * duplicate keys, and it would be rather bad if we picked a duplicate
-+	 * for the pivot:
-+	 */
-+	i = btree_bset_first(b);
-+	src = dst = i->start;
-+	while (src != vstruct_last(i)) {
-+		n = bkey_next_skip_noops(src, vstruct_last(i));
-+		if (!bkey_deleted(src)) {
-+			memmove_u64s_down(dst, src, src->u64s);
-+			dst = bkey_next(dst);
-+		}
-+		src = n;
-+	}
-+
-+	i->u64s = cpu_to_le16((u64 *) dst - i->_data);
-+	set_btree_bset_end(b, b->set);
-+
-+	BUG_ON(b->nsets != 1 ||
-+	       b->nr.live_u64s != le16_to_cpu(btree_bset_first(b)->u64s));
-+
-+	btree_node_interior_verify(as->c, b);
-+}
-+
-+static void btree_split(struct btree_update *as, struct btree *b,
-+			struct btree_iter *iter, struct keylist *keys,
-+			unsigned flags)
-+{
-+	struct bch_fs *c = as->c;
-+	struct btree *parent = btree_node_parent(iter, b);
-+	struct btree *n1, *n2 = NULL, *n3 = NULL;
-+	u64 start_time = local_clock();
-+
-+	BUG_ON(!parent && (b != btree_node_root(c, b)));
-+	BUG_ON(!btree_node_intent_locked(iter, btree_node_root(c, b)->c.level));
-+
-+	bch2_btree_interior_update_will_free_node(as, b);
-+
-+	n1 = bch2_btree_node_alloc_replacement(as, b);
-+	bch2_btree_update_add_new_node(as, n1);
-+
-+	if (keys)
-+		btree_split_insert_keys(as, n1, iter, keys);
-+
-+	if (bset_u64s(&n1->set[0]) > BTREE_SPLIT_THRESHOLD(c)) {
-+		trace_btree_split(c, b);
-+
-+		n2 = __btree_split_node(as, n1, iter);
-+
-+		bch2_btree_build_aux_trees(n2);
-+		bch2_btree_build_aux_trees(n1);
-+		six_unlock_write(&n2->c.lock);
-+		six_unlock_write(&n1->c.lock);
-+
-+		bch2_btree_node_write(c, n2, SIX_LOCK_intent);
-+
-+		/*
-+		 * Note that on recursive parent_keys == keys, so we
-+		 * can't start adding new keys to parent_keys before emptying it
-+		 * out (which we did with btree_split_insert_keys() above)
-+		 */
-+		bch2_keylist_add(&as->parent_keys, &n1->key);
-+		bch2_keylist_add(&as->parent_keys, &n2->key);
-+
-+		if (!parent) {
-+			/* Depth increases, make a new root */
-+			n3 = __btree_root_alloc(as, b->c.level + 1);
-+
-+			n3->sib_u64s[0] = U16_MAX;
-+			n3->sib_u64s[1] = U16_MAX;
-+
-+			btree_split_insert_keys(as, n3, iter, &as->parent_keys);
-+
-+			bch2_btree_node_write(c, n3, SIX_LOCK_intent);
-+		}
-+	} else {
-+		trace_btree_compact(c, b);
-+
-+		bch2_btree_build_aux_trees(n1);
-+		six_unlock_write(&n1->c.lock);
-+
-+		if (parent)
-+			bch2_keylist_add(&as->parent_keys, &n1->key);
-+	}
-+
-+	bch2_btree_node_write(c, n1, SIX_LOCK_intent);
-+
-+	/* New nodes all written, now make them visible: */
-+
-+	if (parent) {
-+		/* Split a non root node */
-+		bch2_btree_insert_node(as, parent, iter, &as->parent_keys, flags);
-+	} else if (n3) {
-+		bch2_btree_set_root(as, n3, iter);
-+	} else {
-+		/* Root filled up but didn't need to be split */
-+		bch2_btree_set_root(as, n1, iter);
-+	}
-+
-+	bch2_btree_update_get_open_buckets(as, n1);
-+	if (n2)
-+		bch2_btree_update_get_open_buckets(as, n2);
-+	if (n3)
-+		bch2_btree_update_get_open_buckets(as, n3);
-+
-+	/* Successful split, update the iterator to point to the new nodes: */
-+
-+	six_lock_increment(&b->c.lock, SIX_LOCK_intent);
-+	bch2_btree_iter_node_drop(iter, b);
-+	if (n3)
-+		bch2_btree_iter_node_replace(iter, n3);
-+	if (n2)
-+		bch2_btree_iter_node_replace(iter, n2);
-+	bch2_btree_iter_node_replace(iter, n1);
-+
-+	/*
-+	 * The old node must be freed (in memory) _before_ unlocking the new
-+	 * nodes - else another thread could re-acquire a read lock on the old
-+	 * node after another thread has locked and updated the new node, thus
-+	 * seeing stale data:
-+	 */
-+	bch2_btree_node_free_inmem(c, b, iter);
-+
-+	if (n3)
-+		six_unlock_intent(&n3->c.lock);
-+	if (n2)
-+		six_unlock_intent(&n2->c.lock);
-+	six_unlock_intent(&n1->c.lock);
-+
-+	bch2_btree_trans_verify_locks(iter->trans);
-+
-+	bch2_time_stats_update(&c->times[BCH_TIME_btree_node_split],
-+			       start_time);
-+}
-+
-+static void
-+bch2_btree_insert_keys_interior(struct btree_update *as, struct btree *b,
-+				struct btree_iter *iter, struct keylist *keys)
-+{
-+	struct btree_iter *linked;
-+	struct btree_node_iter node_iter;
-+	struct bkey_i *insert = bch2_keylist_front(keys);
-+	struct bkey_packed *k;
-+
-+	/* Don't screw up @iter's position: */
-+	node_iter = iter->l[b->c.level].iter;
-+
-+	/*
-+	 * btree_split(), btree_gc_coalesce() will insert keys before
-+	 * the iterator's current position - they know the keys go in
-+	 * the node the iterator points to:
-+	 */
-+	while ((k = bch2_btree_node_iter_prev_all(&node_iter, b)) &&
-+	       (bkey_cmp_packed(b, k, &insert->k) >= 0))
-+		;
-+
-+	for_each_keylist_key(keys, insert)
-+		bch2_insert_fixup_btree_ptr(as, b, iter, insert, &node_iter);
-+
-+	btree_update_updated_node(as, b);
-+
-+	trans_for_each_iter_with_node(iter->trans, b, linked)
-+		bch2_btree_node_iter_peek(&linked->l[b->c.level].iter, b);
-+
-+	bch2_btree_trans_verify_iters(iter->trans, b);
-+}
-+
-+/**
-+ * bch_btree_insert_node - insert bkeys into a given btree node
-+ *
-+ * @iter:		btree iterator
-+ * @keys:		list of keys to insert
-+ * @hook:		insert callback
-+ * @persistent:		if not null, @persistent will wait on journal write
-+ *
-+ * Inserts as many keys as it can into a given btree node, splitting it if full.
-+ * If a split occurred, this function will return early. This can only happen
-+ * for leaf nodes -- inserts into interior nodes have to be atomic.
-+ */
-+void bch2_btree_insert_node(struct btree_update *as, struct btree *b,
-+			    struct btree_iter *iter, struct keylist *keys,
-+			    unsigned flags)
-+{
-+	struct bch_fs *c = as->c;
-+	int old_u64s = le16_to_cpu(btree_bset_last(b)->u64s);
-+	int old_live_u64s = b->nr.live_u64s;
-+	int live_u64s_added, u64s_added;
-+
-+	BUG_ON(!btree_node_intent_locked(iter, btree_node_root(c, b)->c.level));
-+	BUG_ON(!b->c.level);
-+	BUG_ON(!as || as->b);
-+	bch2_verify_keylist_sorted(keys);
-+
-+	if (as->must_rewrite)
-+		goto split;
-+
-+	bch2_btree_node_lock_for_insert(c, b, iter);
-+
-+	if (!bch2_btree_node_insert_fits(c, b, bch2_keylist_u64s(keys))) {
-+		bch2_btree_node_unlock_write(b, iter);
-+		goto split;
-+	}
-+
-+	bch2_btree_insert_keys_interior(as, b, iter, keys);
-+
-+	live_u64s_added = (int) b->nr.live_u64s - old_live_u64s;
-+	u64s_added = (int) le16_to_cpu(btree_bset_last(b)->u64s) - old_u64s;
-+
-+	if (b->sib_u64s[0] != U16_MAX && live_u64s_added < 0)
-+		b->sib_u64s[0] = max(0, (int) b->sib_u64s[0] + live_u64s_added);
-+	if (b->sib_u64s[1] != U16_MAX && live_u64s_added < 0)
-+		b->sib_u64s[1] = max(0, (int) b->sib_u64s[1] + live_u64s_added);
-+
-+	if (u64s_added > live_u64s_added &&
-+	    bch2_maybe_compact_whiteouts(c, b))
-+		bch2_btree_iter_reinit_node(iter, b);
-+
-+	bch2_btree_node_unlock_write(b, iter);
-+
-+	btree_node_interior_verify(c, b);
-+
-+	/*
-+	 * when called from the btree_split path the new nodes aren't added to
-+	 * the btree iterator yet, so the merge path's unlock/wait/relock dance
-+	 * won't work:
-+	 */
-+	bch2_foreground_maybe_merge(c, iter, b->c.level,
-+				    flags|BTREE_INSERT_NOUNLOCK);
-+	return;
-+split:
-+	btree_split(as, b, iter, keys, flags);
-+}
-+
-+int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter,
-+			  unsigned flags)
-+{
-+	struct btree_trans *trans = iter->trans;
-+	struct btree *b = iter_l(iter)->b;
-+	struct btree_update *as;
-+	struct closure cl;
-+	int ret = 0;
-+	struct btree_insert_entry *i;
-+
-+	/*
-+	 * We already have a disk reservation and open buckets pinned; this
-+	 * allocation must not block:
-+	 */
-+	trans_for_each_update(trans, i)
-+		if (btree_node_type_needs_gc(i->iter->btree_id))
-+			flags |= BTREE_INSERT_USE_RESERVE;
-+
-+	closure_init_stack(&cl);
-+
-+	/* Hack, because gc and splitting nodes doesn't mix yet: */
-+	if (!(flags & BTREE_INSERT_GC_LOCK_HELD) &&
-+	    !down_read_trylock(&c->gc_lock)) {
-+		if (flags & BTREE_INSERT_NOUNLOCK) {
-+			trace_transaction_restart_ip(trans->ip, _THIS_IP_);
-+			return -EINTR;
-+		}
-+
-+		bch2_trans_unlock(trans);
-+		down_read(&c->gc_lock);
-+
-+		if (!bch2_trans_relock(trans))
-+			ret = -EINTR;
-+	}
-+
-+	/*
-+	 * XXX: figure out how far we might need to split,
-+	 * instead of locking/reserving all the way to the root:
-+	 */
-+	if (!bch2_btree_iter_upgrade(iter, U8_MAX)) {
-+		trace_trans_restart_iter_upgrade(trans->ip);
-+		ret = -EINTR;
-+		goto out;
-+	}
-+
-+	as = bch2_btree_update_start(trans, iter->btree_id,
-+		btree_update_reserve_required(c, b), flags,
-+		!(flags & BTREE_INSERT_NOUNLOCK) ? &cl : NULL);
-+	if (IS_ERR(as)) {
-+		ret = PTR_ERR(as);
-+		if (ret == -EAGAIN) {
-+			BUG_ON(flags & BTREE_INSERT_NOUNLOCK);
-+			bch2_trans_unlock(trans);
-+			ret = -EINTR;
-+
-+			trace_transaction_restart_ip(trans->ip, _THIS_IP_);
-+		}
-+		goto out;
-+	}
-+
-+	btree_split(as, b, iter, NULL, flags);
-+	bch2_btree_update_done(as);
-+
-+	/*
-+	 * We haven't successfully inserted yet, so don't downgrade all the way
-+	 * back to read locks;
-+	 */
-+	__bch2_btree_iter_downgrade(iter, 1);
-+out:
-+	if (!(flags & BTREE_INSERT_GC_LOCK_HELD))
-+		up_read(&c->gc_lock);
-+	closure_sync(&cl);
-+	return ret;
-+}
-+
-+void __bch2_foreground_maybe_merge(struct bch_fs *c,
-+				   struct btree_iter *iter,
-+				   unsigned level,
-+				   unsigned flags,
-+				   enum btree_node_sibling sib)
-+{
-+	struct btree_trans *trans = iter->trans;
-+	struct btree_update *as;
-+	struct bkey_format_state new_s;
-+	struct bkey_format new_f;
-+	struct bkey_i delete;
-+	struct btree *b, *m, *n, *prev, *next, *parent;
-+	struct closure cl;
-+	size_t sib_u64s;
-+	int ret = 0;
-+
-+	BUG_ON(!btree_node_locked(iter, level));
-+
-+	closure_init_stack(&cl);
-+retry:
-+	BUG_ON(!btree_node_locked(iter, level));
-+
-+	b = iter->l[level].b;
-+
-+	parent = btree_node_parent(iter, b);
-+	if (!parent)
-+		goto out;
-+
-+	if (b->sib_u64s[sib] > BTREE_FOREGROUND_MERGE_THRESHOLD(c))
-+		goto out;
-+
-+	/* XXX: can't be holding read locks */
-+	m = bch2_btree_node_get_sibling(c, iter, b, sib);
-+	if (IS_ERR(m)) {
-+		ret = PTR_ERR(m);
-+		goto err;
-+	}
-+
-+	/* NULL means no sibling: */
-+	if (!m) {
-+		b->sib_u64s[sib] = U16_MAX;
-+		goto out;
-+	}
-+
-+	if (sib == btree_prev_sib) {
-+		prev = m;
-+		next = b;
-+	} else {
-+		prev = b;
-+		next = m;
-+	}
-+
-+	bch2_bkey_format_init(&new_s);
-+	__bch2_btree_calc_format(&new_s, b);
-+	__bch2_btree_calc_format(&new_s, m);
-+	new_f = bch2_bkey_format_done(&new_s);
-+
-+	sib_u64s = btree_node_u64s_with_format(b, &new_f) +
-+		btree_node_u64s_with_format(m, &new_f);
-+
-+	if (sib_u64s > BTREE_FOREGROUND_MERGE_HYSTERESIS(c)) {
-+		sib_u64s -= BTREE_FOREGROUND_MERGE_HYSTERESIS(c);
-+		sib_u64s /= 2;
-+		sib_u64s += BTREE_FOREGROUND_MERGE_HYSTERESIS(c);
-+	}
-+
-+	sib_u64s = min(sib_u64s, btree_max_u64s(c));
-+	b->sib_u64s[sib] = sib_u64s;
-+
-+	if (b->sib_u64s[sib] > BTREE_FOREGROUND_MERGE_THRESHOLD(c)) {
-+		six_unlock_intent(&m->c.lock);
-+		goto out;
-+	}
-+
-+	/* We're changing btree topology, doesn't mix with gc: */
-+	if (!(flags & BTREE_INSERT_GC_LOCK_HELD) &&
-+	    !down_read_trylock(&c->gc_lock))
-+		goto err_cycle_gc_lock;
-+
-+	if (!bch2_btree_iter_upgrade(iter, U8_MAX)) {
-+		ret = -EINTR;
-+		goto err_unlock;
-+	}
-+
-+	as = bch2_btree_update_start(trans, iter->btree_id,
-+			 btree_update_reserve_required(c, parent) + 1,
-+			 flags|
-+			 BTREE_INSERT_NOFAIL|
-+			 BTREE_INSERT_USE_RESERVE,
-+			 !(flags & BTREE_INSERT_NOUNLOCK) ? &cl : NULL);
-+	if (IS_ERR(as)) {
-+		ret = PTR_ERR(as);
-+		goto err_unlock;
-+	}
-+
-+	trace_btree_merge(c, b);
-+
-+	bch2_btree_interior_update_will_free_node(as, b);
-+	bch2_btree_interior_update_will_free_node(as, m);
-+
-+	n = bch2_btree_node_alloc(as, b->c.level);
-+	bch2_btree_update_add_new_node(as, n);
-+
-+	btree_set_min(n, prev->data->min_key);
-+	btree_set_max(n, next->data->max_key);
-+	n->data->format		= new_f;
-+
-+	btree_node_set_format(n, new_f);
-+
-+	bch2_btree_sort_into(c, n, prev);
-+	bch2_btree_sort_into(c, n, next);
-+
-+	bch2_btree_build_aux_trees(n);
-+	six_unlock_write(&n->c.lock);
-+
-+	bkey_init(&delete.k);
-+	delete.k.p = prev->key.k.p;
-+	bch2_keylist_add(&as->parent_keys, &delete);
-+	bch2_keylist_add(&as->parent_keys, &n->key);
-+
-+	bch2_btree_node_write(c, n, SIX_LOCK_intent);
-+
-+	bch2_btree_insert_node(as, parent, iter, &as->parent_keys, flags);
-+
-+	bch2_btree_update_get_open_buckets(as, n);
-+
-+	six_lock_increment(&b->c.lock, SIX_LOCK_intent);
-+	bch2_btree_iter_node_drop(iter, b);
-+	bch2_btree_iter_node_drop(iter, m);
-+
-+	bch2_btree_iter_node_replace(iter, n);
-+
-+	bch2_btree_trans_verify_iters(trans, n);
-+
-+	bch2_btree_node_free_inmem(c, b, iter);
-+	bch2_btree_node_free_inmem(c, m, iter);
-+
-+	six_unlock_intent(&n->c.lock);
-+
-+	bch2_btree_update_done(as);
-+
-+	if (!(flags & BTREE_INSERT_GC_LOCK_HELD))
-+		up_read(&c->gc_lock);
-+out:
-+	bch2_btree_trans_verify_locks(trans);
-+
-+	/*
-+	 * Don't downgrade locks here: we're called after successful insert,
-+	 * and the caller will downgrade locks after a successful insert
-+	 * anyways (in case e.g. a split was required first)
-+	 *
-+	 * And we're also called when inserting into interior nodes in the
-+	 * split path, and downgrading to read locks in there is potentially
-+	 * confusing:
-+	 */
-+	closure_sync(&cl);
-+	return;
-+
-+err_cycle_gc_lock:
-+	six_unlock_intent(&m->c.lock);
-+
-+	if (flags & BTREE_INSERT_NOUNLOCK)
-+		goto out;
-+
-+	bch2_trans_unlock(trans);
-+
-+	down_read(&c->gc_lock);
-+	up_read(&c->gc_lock);
-+	ret = -EINTR;
-+	goto err;
-+
-+err_unlock:
-+	six_unlock_intent(&m->c.lock);
-+	if (!(flags & BTREE_INSERT_GC_LOCK_HELD))
-+		up_read(&c->gc_lock);
-+err:
-+	BUG_ON(ret == -EAGAIN && (flags & BTREE_INSERT_NOUNLOCK));
-+
-+	if ((ret == -EAGAIN || ret == -EINTR) &&
-+	    !(flags & BTREE_INSERT_NOUNLOCK)) {
-+		bch2_trans_unlock(trans);
-+		closure_sync(&cl);
-+		ret = bch2_btree_iter_traverse(iter);
-+		if (ret)
-+			goto out;
-+
-+		goto retry;
-+	}
-+
-+	goto out;
-+}
-+
-+static int __btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
-+				struct btree *b, unsigned flags,
-+				struct closure *cl)
-+{
-+	struct btree *n, *parent = btree_node_parent(iter, b);
-+	struct btree_update *as;
-+
-+	as = bch2_btree_update_start(iter->trans, iter->btree_id,
-+		(parent
-+		 ? btree_update_reserve_required(c, parent)
-+		 : 0) + 1,
-+		flags, cl);
-+	if (IS_ERR(as)) {
-+		trace_btree_gc_rewrite_node_fail(c, b);
-+		return PTR_ERR(as);
-+	}
-+
-+	bch2_btree_interior_update_will_free_node(as, b);
-+
-+	n = bch2_btree_node_alloc_replacement(as, b);
-+	bch2_btree_update_add_new_node(as, n);
-+
-+	bch2_btree_build_aux_trees(n);
-+	six_unlock_write(&n->c.lock);
-+
-+	trace_btree_gc_rewrite_node(c, b);
-+
-+	bch2_btree_node_write(c, n, SIX_LOCK_intent);
-+
-+	if (parent) {
-+		bch2_keylist_add(&as->parent_keys, &n->key);
-+		bch2_btree_insert_node(as, parent, iter, &as->parent_keys, flags);
-+	} else {
-+		bch2_btree_set_root(as, n, iter);
-+	}
-+
-+	bch2_btree_update_get_open_buckets(as, n);
-+
-+	six_lock_increment(&b->c.lock, SIX_LOCK_intent);
-+	bch2_btree_iter_node_drop(iter, b);
-+	bch2_btree_iter_node_replace(iter, n);
-+	bch2_btree_node_free_inmem(c, b, iter);
-+	six_unlock_intent(&n->c.lock);
-+
-+	bch2_btree_update_done(as);
-+	return 0;
-+}
-+
-+/**
-+ * bch_btree_node_rewrite - Rewrite/move a btree node
-+ *
-+ * Returns 0 on success, -EINTR or -EAGAIN on failure (i.e.
-+ * btree_check_reserve() has to wait)
-+ */
-+int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
-+			    __le64 seq, unsigned flags)
-+{
-+	struct btree_trans *trans = iter->trans;
-+	struct closure cl;
-+	struct btree *b;
-+	int ret;
-+
-+	flags |= BTREE_INSERT_NOFAIL;
-+
-+	closure_init_stack(&cl);
-+
-+	bch2_btree_iter_upgrade(iter, U8_MAX);
-+
-+	if (!(flags & BTREE_INSERT_GC_LOCK_HELD)) {
-+		if (!down_read_trylock(&c->gc_lock)) {
-+			bch2_trans_unlock(trans);
-+			down_read(&c->gc_lock);
-+		}
-+	}
-+
-+	while (1) {
-+		ret = bch2_btree_iter_traverse(iter);
-+		if (ret)
-+			break;
-+
-+		b = bch2_btree_iter_peek_node(iter);
-+		if (!b || b->data->keys.seq != seq)
-+			break;
-+
-+		ret = __btree_node_rewrite(c, iter, b, flags, &cl);
-+		if (ret != -EAGAIN &&
-+		    ret != -EINTR)
-+			break;
-+
-+		bch2_trans_unlock(trans);
-+		closure_sync(&cl);
-+	}
-+
-+	bch2_btree_iter_downgrade(iter);
-+
-+	if (!(flags & BTREE_INSERT_GC_LOCK_HELD))
-+		up_read(&c->gc_lock);
-+
-+	closure_sync(&cl);
-+	return ret;
-+}
-+
-+static void __bch2_btree_node_update_key(struct bch_fs *c,
-+					 struct btree_update *as,
-+					 struct btree_iter *iter,
-+					 struct btree *b, struct btree *new_hash,
-+					 struct bkey_i *new_key)
-+{
-+	struct btree *parent;
-+	int ret;
-+
-+	btree_update_will_delete_key(as, &b->key);
-+	btree_update_will_add_key(as, new_key);
-+
-+	parent = btree_node_parent(iter, b);
-+	if (parent) {
-+		if (new_hash) {
-+			bkey_copy(&new_hash->key, new_key);
-+			ret = bch2_btree_node_hash_insert(&c->btree_cache,
-+					new_hash, b->c.level, b->c.btree_id);
-+			BUG_ON(ret);
-+		}
-+
-+		bch2_keylist_add(&as->parent_keys, new_key);
-+		bch2_btree_insert_node(as, parent, iter, &as->parent_keys, 0);
-+
-+		if (new_hash) {
-+			mutex_lock(&c->btree_cache.lock);
-+			bch2_btree_node_hash_remove(&c->btree_cache, new_hash);
-+
-+			bch2_btree_node_hash_remove(&c->btree_cache, b);
-+
-+			bkey_copy(&b->key, new_key);
-+			ret = __bch2_btree_node_hash_insert(&c->btree_cache, b);
-+			BUG_ON(ret);
-+			mutex_unlock(&c->btree_cache.lock);
-+		} else {
-+			bkey_copy(&b->key, new_key);
-+		}
-+	} else {
-+		BUG_ON(btree_node_root(c, b) != b);
-+
-+		bch2_btree_node_lock_write(b, iter);
-+		bkey_copy(&b->key, new_key);
-+
-+		if (btree_ptr_hash_val(&b->key) != b->hash_val) {
-+			mutex_lock(&c->btree_cache.lock);
-+			bch2_btree_node_hash_remove(&c->btree_cache, b);
-+
-+			ret = __bch2_btree_node_hash_insert(&c->btree_cache, b);
-+			BUG_ON(ret);
-+			mutex_unlock(&c->btree_cache.lock);
-+		}
-+
-+		btree_update_updated_root(as, b);
-+		bch2_btree_node_unlock_write(b, iter);
-+	}
-+
-+	bch2_btree_update_done(as);
-+}
-+
-+int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
-+			       struct btree *b,
-+			       struct bkey_i *new_key)
-+{
-+	struct btree *parent = btree_node_parent(iter, b);
-+	struct btree_update *as = NULL;
-+	struct btree *new_hash = NULL;
-+	struct closure cl;
-+	int ret;
-+
-+	closure_init_stack(&cl);
-+
-+	if (!bch2_btree_iter_upgrade(iter, U8_MAX))
-+		return -EINTR;
-+
-+	if (!down_read_trylock(&c->gc_lock)) {
-+		bch2_trans_unlock(iter->trans);
-+		down_read(&c->gc_lock);
-+
-+		if (!bch2_trans_relock(iter->trans)) {
-+			ret = -EINTR;
-+			goto err;
-+		}
-+	}
-+
-+	/*
-+	 * check btree_ptr_hash_val() after @b is locked by
-+	 * btree_iter_traverse():
-+	 */
-+	if (btree_ptr_hash_val(new_key) != b->hash_val) {
-+		/* bch2_btree_reserve_get will unlock */
-+		ret = bch2_btree_cache_cannibalize_lock(c, &cl);
-+		if (ret) {
-+			bch2_trans_unlock(iter->trans);
-+			up_read(&c->gc_lock);
-+			closure_sync(&cl);
-+			down_read(&c->gc_lock);
-+
-+			if (!bch2_trans_relock(iter->trans)) {
-+				ret = -EINTR;
-+				goto err;
-+			}
-+		}
-+
-+		new_hash = bch2_btree_node_mem_alloc(c);
-+	}
-+retry:
-+	as = bch2_btree_update_start(iter->trans, iter->btree_id,
-+		parent ? btree_update_reserve_required(c, parent) : 0,
-+		BTREE_INSERT_NOFAIL|
-+		BTREE_INSERT_USE_RESERVE|
-+		BTREE_INSERT_USE_ALLOC_RESERVE,
-+		&cl);
-+
-+	if (IS_ERR(as)) {
-+		ret = PTR_ERR(as);
-+		if (ret == -EAGAIN)
-+			ret = -EINTR;
-+
-+		if (ret == -EINTR) {
-+			bch2_trans_unlock(iter->trans);
-+			up_read(&c->gc_lock);
-+			closure_sync(&cl);
-+			down_read(&c->gc_lock);
-+
-+			if (bch2_trans_relock(iter->trans))
-+				goto retry;
-+		}
-+
-+		goto err;
-+	}
-+
-+	ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(new_key));
-+	if (ret)
-+		goto err_free_update;
-+
-+	__bch2_btree_node_update_key(c, as, iter, b, new_hash, new_key);
-+
-+	bch2_btree_iter_downgrade(iter);
-+err:
-+	if (new_hash) {
-+		mutex_lock(&c->btree_cache.lock);
-+		list_move(&new_hash->list, &c->btree_cache.freeable);
-+		mutex_unlock(&c->btree_cache.lock);
-+
-+		six_unlock_write(&new_hash->c.lock);
-+		six_unlock_intent(&new_hash->c.lock);
-+	}
-+	up_read(&c->gc_lock);
-+	closure_sync(&cl);
-+	return ret;
-+err_free_update:
-+	bch2_btree_update_free(as);
-+	goto err;
-+}
-+
-+/* Init code: */
-+
-+/*
-+ * Only for filesystem bringup, when first reading the btree roots or allocating
-+ * btree roots when initializing a new filesystem:
-+ */
-+void bch2_btree_set_root_for_read(struct bch_fs *c, struct btree *b)
-+{
-+	BUG_ON(btree_node_root(c, b));
-+
-+	bch2_btree_set_root_inmem(c, b);
-+}
-+
-+void bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id)
-+{
-+	struct closure cl;
-+	struct btree *b;
-+	int ret;
-+
-+	closure_init_stack(&cl);
-+
-+	do {
-+		ret = bch2_btree_cache_cannibalize_lock(c, &cl);
-+		closure_sync(&cl);
-+	} while (ret);
-+
-+	b = bch2_btree_node_mem_alloc(c);
-+	bch2_btree_cache_cannibalize_unlock(c);
-+
-+	set_btree_node_fake(b);
-+	set_btree_node_need_rewrite(b);
-+	b->c.level	= 0;
-+	b->c.btree_id	= id;
-+
-+	bkey_btree_ptr_init(&b->key);
-+	b->key.k.p = POS_MAX;
-+	*((u64 *) bkey_i_to_btree_ptr(&b->key)->v.start) = U64_MAX - id;
-+
-+	bch2_bset_init_first(b, &b->data->keys);
-+	bch2_btree_build_aux_trees(b);
-+
-+	b->data->flags = 0;
-+	btree_set_min(b, POS_MIN);
-+	btree_set_max(b, POS_MAX);
-+	b->data->format = bch2_btree_calc_format(b);
-+	btree_node_set_format(b, b->data->format);
-+
-+	ret = bch2_btree_node_hash_insert(&c->btree_cache, b,
-+					  b->c.level, b->c.btree_id);
-+	BUG_ON(ret);
-+
-+	bch2_btree_set_root_inmem(c, b);
-+
-+	six_unlock_write(&b->c.lock);
-+	six_unlock_intent(&b->c.lock);
-+}
-+
-+void bch2_btree_updates_to_text(struct printbuf *out, struct bch_fs *c)
-+{
-+	struct btree_update *as;
-+
-+	mutex_lock(&c->btree_interior_update_lock);
-+	list_for_each_entry(as, &c->btree_interior_update_list, list)
-+		pr_buf(out, "%p m %u w %u r %u j %llu\n",
-+		       as,
-+		       as->mode,
-+		       as->nodes_written,
-+		       atomic_read(&as->cl.remaining) & CLOSURE_REMAINING_MASK,
-+		       as->journal.seq);
-+	mutex_unlock(&c->btree_interior_update_lock);
-+}
-+
-+size_t bch2_btree_interior_updates_nr_pending(struct bch_fs *c)
-+{
-+	size_t ret = 0;
-+	struct list_head *i;
-+
-+	mutex_lock(&c->btree_interior_update_lock);
-+	list_for_each(i, &c->btree_interior_update_list)
-+		ret++;
-+	mutex_unlock(&c->btree_interior_update_lock);
-+
-+	return ret;
-+}
-+
-+void bch2_journal_entries_to_btree_roots(struct bch_fs *c, struct jset *jset)
-+{
-+	struct btree_root *r;
-+	struct jset_entry *entry;
-+
-+	mutex_lock(&c->btree_root_lock);
-+
-+	vstruct_for_each(jset, entry)
-+		if (entry->type == BCH_JSET_ENTRY_btree_root) {
-+			r = &c->btree_roots[entry->btree_id];
-+			r->level = entry->level;
-+			r->alive = true;
-+			bkey_copy(&r->key, &entry->start[0]);
-+		}
-+
-+	mutex_unlock(&c->btree_root_lock);
-+}
-+
-+struct jset_entry *
-+bch2_btree_roots_to_journal_entries(struct bch_fs *c,
-+				    struct jset_entry *start,
-+				    struct jset_entry *end)
-+{
-+	struct jset_entry *entry;
-+	unsigned long have = 0;
-+	unsigned i;
-+
-+	for (entry = start; entry < end; entry = vstruct_next(entry))
-+		if (entry->type == BCH_JSET_ENTRY_btree_root)
-+			__set_bit(entry->btree_id, &have);
-+
-+	mutex_lock(&c->btree_root_lock);
-+
-+	for (i = 0; i < BTREE_ID_NR; i++)
-+		if (c->btree_roots[i].alive && !test_bit(i, &have)) {
-+			journal_entry_set(end,
-+					  BCH_JSET_ENTRY_btree_root,
-+					  i, c->btree_roots[i].level,
-+					  &c->btree_roots[i].key,
-+					  c->btree_roots[i].key.u64s);
-+			end = vstruct_next(end);
-+		}
-+
-+	mutex_unlock(&c->btree_root_lock);
-+
-+	return end;
-+}
-+
-+void bch2_fs_btree_interior_update_exit(struct bch_fs *c)
-+{
-+	if (c->btree_interior_update_worker)
-+		destroy_workqueue(c->btree_interior_update_worker);
-+	mempool_exit(&c->btree_interior_update_pool);
-+}
-+
-+int bch2_fs_btree_interior_update_init(struct bch_fs *c)
-+{
-+	mutex_init(&c->btree_reserve_cache_lock);
-+	INIT_LIST_HEAD(&c->btree_interior_update_list);
-+	INIT_LIST_HEAD(&c->btree_interior_updates_unwritten);
-+	mutex_init(&c->btree_interior_update_lock);
-+	INIT_WORK(&c->btree_interior_update_work, btree_interior_update_work);
-+
-+	c->btree_interior_update_worker =
-+		alloc_workqueue("btree_update", WQ_UNBOUND|WQ_MEM_RECLAIM, 1);
-+	if (!c->btree_interior_update_worker)
-+		return -ENOMEM;
-+
-+	return mempool_init_kmalloc_pool(&c->btree_interior_update_pool, 1,
-+					 sizeof(struct btree_update));
-+}
-diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h
-new file mode 100644
-index 000000000000..7668225e72c6
---- /dev/null
-+++ b/fs/bcachefs/btree_update_interior.h
-@@ -0,0 +1,331 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BTREE_UPDATE_INTERIOR_H
-+#define _BCACHEFS_BTREE_UPDATE_INTERIOR_H
-+
-+#include "btree_cache.h"
-+#include "btree_locking.h"
-+#include "btree_update.h"
-+
-+void __bch2_btree_calc_format(struct bkey_format_state *, struct btree *);
-+bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *,
-+				struct bkey_format *);
-+
-+#define BTREE_UPDATE_NODES_MAX		((BTREE_MAX_DEPTH - 2) * 2 + GC_MERGE_NODES)
-+
-+#define BTREE_UPDATE_JOURNAL_RES	(BTREE_UPDATE_NODES_MAX * (BKEY_BTREE_PTR_U64s_MAX + 1))
-+
-+/*
-+ * Tracks an in progress split/rewrite of a btree node and the update to the
-+ * parent node:
-+ *
-+ * When we split/rewrite a node, we do all the updates in memory without
-+ * waiting for any writes to complete - we allocate the new node(s) and update
-+ * the parent node, possibly recursively up to the root.
-+ *
-+ * The end result is that we have one or more new nodes being written -
-+ * possibly several, if there were multiple splits - and then a write (updating
-+ * an interior node) which will make all these new nodes visible.
-+ *
-+ * Additionally, as we split/rewrite nodes we free the old nodes - but the old
-+ * nodes can't be freed (their space on disk can't be reclaimed) until the
-+ * update to the interior node that makes the new node visible completes -
-+ * until then, the old nodes are still reachable on disk.
-+ *
-+ */
-+struct btree_update {
-+	struct closure			cl;
-+	struct bch_fs			*c;
-+
-+	struct list_head		list;
-+	struct list_head		unwritten_list;
-+
-+	/* What kind of update are we doing? */
-+	enum {
-+		BTREE_INTERIOR_NO_UPDATE,
-+		BTREE_INTERIOR_UPDATING_NODE,
-+		BTREE_INTERIOR_UPDATING_ROOT,
-+		BTREE_INTERIOR_UPDATING_AS,
-+	} mode;
-+
-+	unsigned			must_rewrite:1;
-+	unsigned			nodes_written:1;
-+
-+	enum btree_id			btree_id;
-+
-+	struct disk_reservation		disk_res;
-+	struct journal_preres		journal_preres;
-+
-+	/*
-+	 * BTREE_INTERIOR_UPDATING_NODE:
-+	 * The update that made the new nodes visible was a regular update to an
-+	 * existing interior node - @b. We can't write out the update to @b
-+	 * until the new nodes we created are finished writing, so we block @b
-+	 * from writing by putting this btree_interior update on the
-+	 * @b->write_blocked list with @write_blocked_list:
-+	 */
-+	struct btree			*b;
-+	struct list_head		write_blocked_list;
-+
-+	/*
-+	 * We may be freeing nodes that were dirty, and thus had journal entries
-+	 * pinned: we need to transfer the oldest of those pins to the
-+	 * btree_update operation, and release it when the new node(s)
-+	 * are all persistent and reachable:
-+	 */
-+	struct journal_entry_pin	journal;
-+
-+	/* Preallocated nodes we reserve when we start the update: */
-+	struct btree			*prealloc_nodes[BTREE_UPDATE_NODES_MAX];
-+	unsigned			nr_prealloc_nodes;
-+
-+	/* Nodes being freed: */
-+	struct keylist			old_keys;
-+	u64				_old_keys[BTREE_UPDATE_NODES_MAX *
-+						  BKEY_BTREE_PTR_VAL_U64s_MAX];
-+
-+	/* Nodes being added: */
-+	struct keylist			new_keys;
-+	u64				_new_keys[BTREE_UPDATE_NODES_MAX *
-+						  BKEY_BTREE_PTR_VAL_U64s_MAX];
-+
-+	/* New nodes, that will be made reachable by this update: */
-+	struct btree			*new_nodes[BTREE_UPDATE_NODES_MAX];
-+	unsigned			nr_new_nodes;
-+
-+	open_bucket_idx_t		open_buckets[BTREE_UPDATE_NODES_MAX *
-+						     BCH_REPLICAS_MAX];
-+	open_bucket_idx_t		nr_open_buckets;
-+
-+	unsigned			journal_u64s;
-+	u64				journal_entries[BTREE_UPDATE_JOURNAL_RES];
-+
-+	/* Only here to reduce stack usage on recursive splits: */
-+	struct keylist			parent_keys;
-+	/*
-+	 * Enough room for btree_split's keys without realloc - btree node
-+	 * pointers never have crc/compression info, so we only need to acount
-+	 * for the pointers for three keys
-+	 */
-+	u64				inline_keys[BKEY_BTREE_PTR_U64s_MAX * 3];
-+};
-+
-+void bch2_btree_node_free_inmem(struct bch_fs *, struct btree *,
-+				struct btree_iter *);
-+void bch2_btree_node_free_never_inserted(struct bch_fs *, struct btree *);
-+
-+void bch2_btree_update_get_open_buckets(struct btree_update *, struct btree *);
-+
-+struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *,
-+						  struct btree *,
-+						  struct bkey_format);
-+
-+void bch2_btree_update_done(struct btree_update *);
-+struct btree_update *
-+bch2_btree_update_start(struct btree_trans *, enum btree_id, unsigned,
-+			unsigned, struct closure *);
-+
-+void bch2_btree_interior_update_will_free_node(struct btree_update *,
-+					       struct btree *);
-+void bch2_btree_update_add_new_node(struct btree_update *, struct btree *);
-+
-+void bch2_btree_insert_node(struct btree_update *, struct btree *,
-+			    struct btree_iter *, struct keylist *,
-+			    unsigned);
-+int bch2_btree_split_leaf(struct bch_fs *, struct btree_iter *, unsigned);
-+
-+void __bch2_foreground_maybe_merge(struct bch_fs *, struct btree_iter *,
-+				   unsigned, unsigned, enum btree_node_sibling);
-+
-+static inline void bch2_foreground_maybe_merge_sibling(struct bch_fs *c,
-+					struct btree_iter *iter,
-+					unsigned level, unsigned flags,
-+					enum btree_node_sibling sib)
-+{
-+	struct btree *b;
-+
-+	if (iter->uptodate >= BTREE_ITER_NEED_TRAVERSE)
-+		return;
-+
-+	if (!bch2_btree_node_relock(iter, level))
-+		return;
-+
-+	b = iter->l[level].b;
-+	if (b->sib_u64s[sib] > c->btree_foreground_merge_threshold)
-+		return;
-+
-+	__bch2_foreground_maybe_merge(c, iter, level, flags, sib);
-+}
-+
-+static inline void bch2_foreground_maybe_merge(struct bch_fs *c,
-+					       struct btree_iter *iter,
-+					       unsigned level,
-+					       unsigned flags)
-+{
-+	bch2_foreground_maybe_merge_sibling(c, iter, level, flags,
-+					    btree_prev_sib);
-+	bch2_foreground_maybe_merge_sibling(c, iter, level, flags,
-+					    btree_next_sib);
-+}
-+
-+void bch2_btree_set_root_for_read(struct bch_fs *, struct btree *);
-+void bch2_btree_root_alloc(struct bch_fs *, enum btree_id);
-+
-+static inline unsigned btree_update_reserve_required(struct bch_fs *c,
-+						     struct btree *b)
-+{
-+	unsigned depth = btree_node_root(c, b)->c.level + 1;
-+
-+	/*
-+	 * Number of nodes we might have to allocate in a worst case btree
-+	 * split operation - we split all the way up to the root, then allocate
-+	 * a new root, unless we're already at max depth:
-+	 */
-+	if (depth < BTREE_MAX_DEPTH)
-+		return (depth - b->c.level) * 2 + 1;
-+	else
-+		return (depth - b->c.level) * 2 - 1;
-+}
-+
-+static inline void btree_node_reset_sib_u64s(struct btree *b)
-+{
-+	b->sib_u64s[0] = b->nr.live_u64s;
-+	b->sib_u64s[1] = b->nr.live_u64s;
-+}
-+
-+static inline void *btree_data_end(struct bch_fs *c, struct btree *b)
-+{
-+	return (void *) b->data + btree_bytes(c);
-+}
-+
-+static inline struct bkey_packed *unwritten_whiteouts_start(struct bch_fs *c,
-+							    struct btree *b)
-+{
-+	return (void *) ((u64 *) btree_data_end(c, b) - b->whiteout_u64s);
-+}
-+
-+static inline struct bkey_packed *unwritten_whiteouts_end(struct bch_fs *c,
-+							  struct btree *b)
-+{
-+	return btree_data_end(c, b);
-+}
-+
-+static inline void *write_block(struct btree *b)
-+{
-+	return (void *) b->data + (b->written << 9);
-+}
-+
-+static inline bool __btree_addr_written(struct btree *b, void *p)
-+{
-+	return p < write_block(b);
-+}
-+
-+static inline bool bset_written(struct btree *b, struct bset *i)
-+{
-+	return __btree_addr_written(b, i);
-+}
-+
-+static inline bool bkey_written(struct btree *b, struct bkey_packed *k)
-+{
-+	return __btree_addr_written(b, k);
-+}
-+
-+static inline ssize_t __bch_btree_u64s_remaining(struct bch_fs *c,
-+						 struct btree *b,
-+						 void *end)
-+{
-+	ssize_t used = bset_byte_offset(b, end) / sizeof(u64) +
-+		b->whiteout_u64s;
-+	ssize_t total = c->opts.btree_node_size << 6;
-+
-+	return total - used;
-+}
-+
-+static inline size_t bch_btree_keys_u64s_remaining(struct bch_fs *c,
-+						   struct btree *b)
-+{
-+	ssize_t remaining = __bch_btree_u64s_remaining(c, b,
-+				btree_bkey_last(b, bset_tree_last(b)));
-+
-+	BUG_ON(remaining < 0);
-+
-+	if (bset_written(b, btree_bset_last(b)))
-+		return 0;
-+
-+	return remaining;
-+}
-+
-+static inline unsigned btree_write_set_buffer(struct btree *b)
-+{
-+	/*
-+	 * Could buffer up larger amounts of keys for btrees with larger keys,
-+	 * pending benchmarking:
-+	 */
-+	return 4 << 10;
-+}
-+
-+static inline struct btree_node_entry *want_new_bset(struct bch_fs *c,
-+						     struct btree *b)
-+{
-+	struct bset_tree *t = bset_tree_last(b);
-+	struct btree_node_entry *bne = max(write_block(b),
-+			(void *) btree_bkey_last(b, bset_tree_last(b)));
-+	ssize_t remaining_space =
-+		__bch_btree_u64s_remaining(c, b, &bne->keys.start[0]);
-+
-+	if (unlikely(bset_written(b, bset(b, t)))) {
-+		if (remaining_space > (ssize_t) (block_bytes(c) >> 3))
-+			return bne;
-+	} else {
-+		if (unlikely(bset_u64s(t) * sizeof(u64) > btree_write_set_buffer(b)) &&
-+		    remaining_space > (ssize_t) (btree_write_set_buffer(b) >> 3))
-+			return bne;
-+	}
-+
-+	return NULL;
-+}
-+
-+static inline void push_whiteout(struct bch_fs *c, struct btree *b,
-+				 struct bpos pos)
-+{
-+	struct bkey_packed k;
-+
-+	BUG_ON(bch_btree_keys_u64s_remaining(c, b) < BKEY_U64s);
-+
-+	if (!bkey_pack_pos(&k, pos, b)) {
-+		struct bkey *u = (void *) &k;
-+
-+		bkey_init(u);
-+		u->p = pos;
-+	}
-+
-+	k.needs_whiteout = true;
-+
-+	b->whiteout_u64s += k.u64s;
-+	bkey_copy(unwritten_whiteouts_start(c, b), &k);
-+}
-+
-+/*
-+ * write lock must be held on @b (else the dirty bset that we were going to
-+ * insert into could be written out from under us)
-+ */
-+static inline bool bch2_btree_node_insert_fits(struct bch_fs *c,
-+					       struct btree *b, unsigned u64s)
-+{
-+	if (unlikely(btree_node_need_rewrite(b)))
-+		return false;
-+
-+	return u64s <= bch_btree_keys_u64s_remaining(c, b);
-+}
-+
-+void bch2_btree_updates_to_text(struct printbuf *, struct bch_fs *);
-+
-+size_t bch2_btree_interior_updates_nr_pending(struct bch_fs *);
-+
-+void bch2_journal_entries_to_btree_roots(struct bch_fs *, struct jset *);
-+struct jset_entry *bch2_btree_roots_to_journal_entries(struct bch_fs *,
-+					struct jset_entry *, struct jset_entry *);
-+
-+void bch2_fs_btree_interior_update_exit(struct bch_fs *);
-+int bch2_fs_btree_interior_update_init(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_BTREE_UPDATE_INTERIOR_H */
-diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c
-new file mode 100644
-index 000000000000..49995cd00c16
---- /dev/null
-+++ b/fs/bcachefs/btree_update_leaf.c
-@@ -0,0 +1,1172 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "btree_update.h"
-+#include "btree_update_interior.h"
-+#include "btree_gc.h"
-+#include "btree_io.h"
-+#include "btree_iter.h"
-+#include "btree_key_cache.h"
-+#include "btree_locking.h"
-+#include "buckets.h"
-+#include "debug.h"
-+#include "error.h"
-+#include "extent_update.h"
-+#include "journal.h"
-+#include "journal_reclaim.h"
-+#include "keylist.h"
-+#include "replicas.h"
-+
-+#include <linux/prefetch.h>
-+#include <linux/sort.h>
-+#include <trace/events/bcachefs.h>
-+
-+static inline bool same_leaf_as_prev(struct btree_trans *trans,
-+				     struct btree_insert_entry *i)
-+{
-+	return i != trans->updates2 &&
-+		iter_l(i[0].iter)->b == iter_l(i[-1].iter)->b;
-+}
-+
-+inline void bch2_btree_node_lock_for_insert(struct bch_fs *c, struct btree *b,
-+					    struct btree_iter *iter)
-+{
-+	bch2_btree_node_lock_write(b, iter);
-+
-+	if (btree_iter_type(iter) == BTREE_ITER_CACHED)
-+		return;
-+
-+	if (unlikely(btree_node_just_written(b)) &&
-+	    bch2_btree_post_write_cleanup(c, b))
-+		bch2_btree_iter_reinit_node(iter, b);
-+
-+	/*
-+	 * If the last bset has been written, or if it's gotten too big - start
-+	 * a new bset to insert into:
-+	 */
-+	if (want_new_bset(c, b))
-+		bch2_btree_init_next(c, b, iter);
-+}
-+
-+/* Inserting into a given leaf node (last stage of insert): */
-+
-+/* Handle overwrites and do insert, for non extents: */
-+bool bch2_btree_bset_insert_key(struct btree_iter *iter,
-+				struct btree *b,
-+				struct btree_node_iter *node_iter,
-+				struct bkey_i *insert)
-+{
-+	struct bkey_packed *k;
-+	unsigned clobber_u64s = 0, new_u64s = 0;
-+
-+	EBUG_ON(btree_node_just_written(b));
-+	EBUG_ON(bset_written(b, btree_bset_last(b)));
-+	EBUG_ON(bkey_deleted(&insert->k) && bkey_val_u64s(&insert->k));
-+	EBUG_ON(bkey_cmp(b->data->min_key, POS_MIN) &&
-+		bkey_cmp(bkey_start_pos(&insert->k),
-+			 bkey_predecessor(b->data->min_key)) < 0);
-+	EBUG_ON(bkey_cmp(insert->k.p, b->data->min_key) < 0);
-+	EBUG_ON(bkey_cmp(insert->k.p, b->data->max_key) > 0);
-+	EBUG_ON(insert->k.u64s >
-+		bch_btree_keys_u64s_remaining(iter->trans->c, b));
-+	EBUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS);
-+
-+	k = bch2_btree_node_iter_peek_all(node_iter, b);
-+	if (k && bkey_cmp_packed(b, k, &insert->k))
-+		k = NULL;
-+
-+	/* @k is the key being overwritten/deleted, if any: */
-+	EBUG_ON(k && bkey_whiteout(k));
-+
-+	/* Deleting, but not found? nothing to do: */
-+	if (bkey_whiteout(&insert->k) && !k)
-+		return false;
-+
-+	if (bkey_whiteout(&insert->k)) {
-+		/* Deleting: */
-+		btree_account_key_drop(b, k);
-+		k->type = KEY_TYPE_deleted;
-+
-+		if (k->needs_whiteout)
-+			push_whiteout(iter->trans->c, b, insert->k.p);
-+		k->needs_whiteout = false;
-+
-+		if (k >= btree_bset_last(b)->start) {
-+			clobber_u64s = k->u64s;
-+			bch2_bset_delete(b, k, clobber_u64s);
-+			goto fix_iter;
-+		} else {
-+			bch2_btree_iter_fix_key_modified(iter, b, k);
-+		}
-+
-+		return true;
-+	}
-+
-+	if (k) {
-+		/* Overwriting: */
-+		btree_account_key_drop(b, k);
-+		k->type = KEY_TYPE_deleted;
-+
-+		insert->k.needs_whiteout = k->needs_whiteout;
-+		k->needs_whiteout = false;
-+
-+		if (k >= btree_bset_last(b)->start) {
-+			clobber_u64s = k->u64s;
-+			goto overwrite;
-+		} else {
-+			bch2_btree_iter_fix_key_modified(iter, b, k);
-+		}
-+	}
-+
-+	k = bch2_btree_node_iter_bset_pos(node_iter, b, bset_tree_last(b));
-+overwrite:
-+	bch2_bset_insert(b, node_iter, k, insert, clobber_u64s);
-+	new_u64s = k->u64s;
-+fix_iter:
-+	if (clobber_u64s != new_u64s)
-+		bch2_btree_node_iter_fix(iter, b, node_iter, k,
-+					 clobber_u64s, new_u64s);
-+	return true;
-+}
-+
-+static void __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
-+			       unsigned i, u64 seq)
-+{
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	struct btree_write *w = container_of(pin, struct btree_write, journal);
-+	struct btree *b = container_of(w, struct btree, writes[i]);
-+
-+	btree_node_lock_type(c, b, SIX_LOCK_read);
-+	bch2_btree_node_write_cond(c, b,
-+		(btree_current_write(b) == w && w->journal.seq == seq));
-+	six_unlock_read(&b->c.lock);
-+}
-+
-+static void btree_node_flush0(struct journal *j, struct journal_entry_pin *pin, u64 seq)
-+{
-+	return __btree_node_flush(j, pin, 0, seq);
-+}
-+
-+static void btree_node_flush1(struct journal *j, struct journal_entry_pin *pin, u64 seq)
-+{
-+	return __btree_node_flush(j, pin, 1, seq);
-+}
-+
-+inline void bch2_btree_add_journal_pin(struct bch_fs *c,
-+				       struct btree *b, u64 seq)
-+{
-+	struct btree_write *w = btree_current_write(b);
-+
-+	bch2_journal_pin_add(&c->journal, seq, &w->journal,
-+			     btree_node_write_idx(b) == 0
-+			     ? btree_node_flush0
-+			     : btree_node_flush1);
-+}
-+
-+/**
-+ * btree_insert_key - insert a key one key into a leaf node
-+ */
-+static bool btree_insert_key_leaf(struct btree_trans *trans,
-+				  struct btree_iter *iter,
-+				  struct bkey_i *insert)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct btree *b = iter_l(iter)->b;
-+	struct bset_tree *t = bset_tree_last(b);
-+	struct bset *i = bset(b, t);
-+	int old_u64s = bset_u64s(t);
-+	int old_live_u64s = b->nr.live_u64s;
-+	int live_u64s_added, u64s_added;
-+
-+	EBUG_ON(!iter->level &&
-+		!test_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags));
-+
-+	if (unlikely(!bch2_btree_bset_insert_key(iter, b,
-+					&iter_l(iter)->iter, insert)))
-+		return false;
-+
-+	i->journal_seq = cpu_to_le64(max(trans->journal_res.seq,
-+					 le64_to_cpu(i->journal_seq)));
-+
-+	bch2_btree_add_journal_pin(c, b, trans->journal_res.seq);
-+
-+	if (unlikely(!btree_node_dirty(b)))
-+		set_btree_node_dirty(b);
-+
-+	live_u64s_added = (int) b->nr.live_u64s - old_live_u64s;
-+	u64s_added = (int) bset_u64s(t) - old_u64s;
-+
-+	if (b->sib_u64s[0] != U16_MAX && live_u64s_added < 0)
-+		b->sib_u64s[0] = max(0, (int) b->sib_u64s[0] + live_u64s_added);
-+	if (b->sib_u64s[1] != U16_MAX && live_u64s_added < 0)
-+		b->sib_u64s[1] = max(0, (int) b->sib_u64s[1] + live_u64s_added);
-+
-+	if (u64s_added > live_u64s_added &&
-+	    bch2_maybe_compact_whiteouts(c, b))
-+		bch2_btree_iter_reinit_node(iter, b);
-+
-+	trace_btree_insert_key(c, b, insert);
-+	return true;
-+}
-+
-+/* Cached btree updates: */
-+
-+/* Normal update interface: */
-+
-+static inline void btree_insert_entry_checks(struct btree_trans *trans,
-+					     struct btree_iter *iter,
-+					     struct bkey_i *insert)
-+{
-+	struct bch_fs *c = trans->c;
-+
-+	BUG_ON(bkey_cmp(insert->k.p, iter->pos));
-+	BUG_ON(debug_check_bkeys(c) &&
-+	       bch2_bkey_invalid(c, bkey_i_to_s_c(insert),
-+				 __btree_node_type(iter->level, iter->btree_id)));
-+}
-+
-+static noinline int
-+bch2_trans_journal_preres_get_cold(struct btree_trans *trans, unsigned u64s)
-+{
-+	struct bch_fs *c = trans->c;
-+	int ret;
-+
-+	bch2_trans_unlock(trans);
-+
-+	ret = bch2_journal_preres_get(&c->journal,
-+			&trans->journal_preres, u64s, 0);
-+	if (ret)
-+		return ret;
-+
-+	if (!bch2_trans_relock(trans)) {
-+		trace_trans_restart_journal_preres_get(trans->ip);
-+		return -EINTR;
-+	}
-+
-+	return 0;
-+}
-+
-+static inline int bch2_trans_journal_res_get(struct btree_trans *trans,
-+					     unsigned flags)
-+{
-+	struct bch_fs *c = trans->c;
-+	int ret;
-+
-+	if (trans->flags & BTREE_INSERT_JOURNAL_RESERVED)
-+		flags |= JOURNAL_RES_GET_RESERVED;
-+
-+	ret = bch2_journal_res_get(&c->journal, &trans->journal_res,
-+				   trans->journal_u64s, flags);
-+
-+	return ret == -EAGAIN ? BTREE_INSERT_NEED_JOURNAL_RES : ret;
-+}
-+
-+static enum btree_insert_ret
-+btree_key_can_insert(struct btree_trans *trans,
-+		     struct btree_iter *iter,
-+		     unsigned u64s)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct btree *b = iter_l(iter)->b;
-+
-+	if (!bch2_btree_node_insert_fits(c, b, u64s))
-+		return BTREE_INSERT_BTREE_NODE_FULL;
-+
-+	return BTREE_INSERT_OK;
-+}
-+
-+static enum btree_insert_ret
-+btree_key_can_insert_cached(struct btree_trans *trans,
-+			    struct btree_iter *iter,
-+			    unsigned u64s)
-+{
-+	struct bkey_cached *ck = (void *) iter->l[0].b;
-+	unsigned new_u64s;
-+	struct bkey_i *new_k;
-+
-+	BUG_ON(iter->level);
-+
-+	if (u64s <= ck->u64s)
-+		return BTREE_INSERT_OK;
-+
-+	new_u64s	= roundup_pow_of_two(u64s);
-+	new_k		= krealloc(ck->k, new_u64s * sizeof(u64), GFP_NOFS);
-+	if (!new_k)
-+		return -ENOMEM;
-+
-+	ck->u64s	= new_u64s;
-+	ck->k		= new_k;
-+	return BTREE_INSERT_OK;
-+}
-+
-+static inline void do_btree_insert_one(struct btree_trans *trans,
-+				       struct btree_iter *iter,
-+				       struct bkey_i *insert)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct journal *j = &c->journal;
-+	bool did_work;
-+
-+	EBUG_ON(trans->journal_res.ref !=
-+		!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY));
-+
-+	insert->k.needs_whiteout = false;
-+
-+	did_work = (btree_iter_type(iter) != BTREE_ITER_CACHED)
-+		? btree_insert_key_leaf(trans, iter, insert)
-+		: bch2_btree_insert_key_cached(trans, iter, insert);
-+	if (!did_work)
-+		return;
-+
-+	if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) {
-+		bch2_journal_add_keys(j, &trans->journal_res,
-+				      iter->btree_id, insert);
-+
-+		bch2_journal_set_has_inode(j, &trans->journal_res,
-+					   insert->k.p.inode);
-+
-+		if (trans->journal_seq)
-+			*trans->journal_seq = trans->journal_res.seq;
-+	}
-+}
-+
-+static inline bool iter_has_trans_triggers(struct btree_iter *iter)
-+{
-+	return BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS & (1U << iter->btree_id);
-+}
-+
-+static inline bool iter_has_nontrans_triggers(struct btree_iter *iter)
-+{
-+	return (((BTREE_NODE_TYPE_HAS_TRIGGERS &
-+		  ~BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS)) |
-+		(1U << BTREE_ID_EC)) &
-+		(1U << iter->btree_id);
-+}
-+
-+static noinline void bch2_btree_iter_unlock_noinline(struct btree_iter *iter)
-+{
-+	__bch2_btree_iter_unlock(iter);
-+}
-+
-+static noinline void bch2_trans_mark_gc(struct btree_trans *trans)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct btree_insert_entry *i;
-+
-+	trans_for_each_update(trans, i) {
-+		/*
-+		 * XXX: synchronization of cached update triggers with gc
-+		 */
-+		BUG_ON(btree_iter_type(i->iter) == BTREE_ITER_CACHED);
-+
-+		if (gc_visited(c, gc_pos_btree_node(i->iter->l[0].b)))
-+			bch2_mark_update(trans, i->iter, i->k, NULL,
-+					 i->trigger_flags|BTREE_TRIGGER_GC);
-+	}
-+}
-+
-+static inline int
-+bch2_trans_commit_write_locked(struct btree_trans *trans,
-+			       struct btree_insert_entry **stopped_at)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct bch_fs_usage *fs_usage = NULL;
-+	struct btree_insert_entry *i;
-+	unsigned u64s = 0;
-+	bool marking = false;
-+	int ret;
-+
-+	if (race_fault()) {
-+		trace_trans_restart_fault_inject(trans->ip);
-+		return -EINTR;
-+	}
-+
-+	/*
-+	 * Check if the insert will fit in the leaf node with the write lock
-+	 * held, otherwise another thread could write the node changing the
-+	 * amount of space available:
-+	 */
-+
-+	prefetch(&trans->c->journal.flags);
-+
-+	trans_for_each_update2(trans, i) {
-+		/* Multiple inserts might go to same leaf: */
-+		if (!same_leaf_as_prev(trans, i))
-+			u64s = 0;
-+
-+		u64s += i->k->k.u64s;
-+		ret = btree_iter_type(i->iter) != BTREE_ITER_CACHED
-+			? btree_key_can_insert(trans, i->iter, u64s)
-+			: btree_key_can_insert_cached(trans, i->iter, u64s);
-+		if (ret) {
-+			*stopped_at = i;
-+			return ret;
-+		}
-+
-+		if (btree_node_type_needs_gc(i->iter->btree_id))
-+			marking = true;
-+	}
-+
-+	if (marking) {
-+		percpu_down_read(&c->mark_lock);
-+		fs_usage = bch2_fs_usage_scratch_get(c);
-+	}
-+
-+	/*
-+	 * Don't get journal reservation until after we know insert will
-+	 * succeed:
-+	 */
-+	if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) {
-+		ret = bch2_trans_journal_res_get(trans,
-+				JOURNAL_RES_GET_NONBLOCK);
-+		if (ret)
-+			goto err;
-+	} else {
-+		trans->journal_res.seq = c->journal.replay_journal_seq;
-+	}
-+
-+	if (unlikely(trans->extra_journal_entry_u64s)) {
-+		memcpy_u64s_small(journal_res_entry(&c->journal, &trans->journal_res),
-+				  trans->extra_journal_entries,
-+				  trans->extra_journal_entry_u64s);
-+
-+		trans->journal_res.offset	+= trans->extra_journal_entry_u64s;
-+		trans->journal_res.u64s		-= trans->extra_journal_entry_u64s;
-+	}
-+
-+	/*
-+	 * Not allowed to fail after we've gotten our journal reservation - we
-+	 * have to use it:
-+	 */
-+
-+	if (!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) {
-+		if (journal_seq_verify(c))
-+			trans_for_each_update2(trans, i)
-+				i->k->k.version.lo = trans->journal_res.seq;
-+		else if (inject_invalid_keys(c))
-+			trans_for_each_update2(trans, i)
-+				i->k->k.version = MAX_VERSION;
-+	}
-+
-+	/* Must be called under mark_lock: */
-+	if (marking && trans->fs_usage_deltas &&
-+	    bch2_replicas_delta_list_apply(c, fs_usage,
-+					   trans->fs_usage_deltas)) {
-+		ret = BTREE_INSERT_NEED_MARK_REPLICAS;
-+		goto err;
-+	}
-+
-+	trans_for_each_update(trans, i)
-+		if (iter_has_nontrans_triggers(i->iter))
-+			bch2_mark_update(trans, i->iter, i->k,
-+					 fs_usage, i->trigger_flags);
-+
-+	if (marking)
-+		bch2_trans_fs_usage_apply(trans, fs_usage);
-+
-+	if (unlikely(c->gc_pos.phase))
-+		bch2_trans_mark_gc(trans);
-+
-+	trans_for_each_update2(trans, i)
-+		do_btree_insert_one(trans, i->iter, i->k);
-+err:
-+	if (marking) {
-+		bch2_fs_usage_scratch_put(c, fs_usage);
-+		percpu_up_read(&c->mark_lock);
-+	}
-+
-+	return ret;
-+}
-+
-+/*
-+ * Get journal reservation, take write locks, and attempt to do btree update(s):
-+ */
-+static inline int do_bch2_trans_commit(struct btree_trans *trans,
-+				       struct btree_insert_entry **stopped_at)
-+{
-+	struct btree_insert_entry *i;
-+	struct btree_iter *iter;
-+	int ret;
-+
-+	trans_for_each_update2(trans, i)
-+		BUG_ON(!btree_node_intent_locked(i->iter, i->iter->level));
-+
-+	ret = bch2_journal_preres_get(&trans->c->journal,
-+			&trans->journal_preres, trans->journal_preres_u64s,
-+			JOURNAL_RES_GET_NONBLOCK|
-+			((trans->flags & BTREE_INSERT_JOURNAL_RECLAIM)
-+			 ? JOURNAL_RES_GET_RECLAIM : 0));
-+	if (unlikely(ret == -EAGAIN))
-+		ret = bch2_trans_journal_preres_get_cold(trans,
-+						trans->journal_preres_u64s);
-+	if (unlikely(ret))
-+		return ret;
-+
-+	/*
-+	 * Can't be holding any read locks when we go to take write locks:
-+	 *
-+	 * note - this must be done after bch2_trans_journal_preres_get_cold()
-+	 * or anything else that might call bch2_trans_relock(), since that
-+	 * would just retake the read locks:
-+	 */
-+	trans_for_each_iter(trans, iter) {
-+		if (iter->nodes_locked != iter->nodes_intent_locked) {
-+			EBUG_ON(iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT);
-+			EBUG_ON(trans->iters_live & (1ULL << iter->idx));
-+			bch2_btree_iter_unlock_noinline(iter);
-+		}
-+	}
-+
-+	if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
-+		trans_for_each_update2(trans, i)
-+			btree_insert_entry_checks(trans, i->iter, i->k);
-+	bch2_btree_trans_verify_locks(trans);
-+
-+	trans_for_each_update2(trans, i)
-+		if (!same_leaf_as_prev(trans, i))
-+			bch2_btree_node_lock_for_insert(trans->c,
-+					iter_l(i->iter)->b, i->iter);
-+
-+	ret = bch2_trans_commit_write_locked(trans, stopped_at);
-+
-+	trans_for_each_update2(trans, i)
-+		if (!same_leaf_as_prev(trans, i))
-+			bch2_btree_node_unlock_write_inlined(iter_l(i->iter)->b,
-+							     i->iter);
-+
-+	if (!ret && trans->journal_pin)
-+		bch2_journal_pin_add(&trans->c->journal, trans->journal_res.seq,
-+				     trans->journal_pin, NULL);
-+
-+	/*
-+	 * Drop journal reservation after dropping write locks, since dropping
-+	 * the journal reservation may kick off a journal write:
-+	 */
-+	bch2_journal_res_put(&trans->c->journal, &trans->journal_res);
-+
-+	if (unlikely(ret))
-+		return ret;
-+
-+	if (trans->flags & BTREE_INSERT_NOUNLOCK)
-+		trans->nounlock = true;
-+
-+	trans_for_each_update2(trans, i)
-+		if (btree_iter_type(i->iter) != BTREE_ITER_CACHED &&
-+		    !same_leaf_as_prev(trans, i))
-+			bch2_foreground_maybe_merge(trans->c, i->iter,
-+						    0, trans->flags);
-+
-+	trans->nounlock = false;
-+
-+	bch2_trans_downgrade(trans);
-+
-+	return 0;
-+}
-+
-+static noinline
-+int bch2_trans_commit_error(struct btree_trans *trans,
-+			    struct btree_insert_entry *i,
-+			    int ret)
-+{
-+	struct bch_fs *c = trans->c;
-+	unsigned flags = trans->flags;
-+
-+	/*
-+	 * BTREE_INSERT_NOUNLOCK means don't unlock _after_ successful btree
-+	 * update; if we haven't done anything yet it doesn't apply
-+	 */
-+	flags &= ~BTREE_INSERT_NOUNLOCK;
-+
-+	switch (ret) {
-+	case BTREE_INSERT_BTREE_NODE_FULL:
-+		ret = bch2_btree_split_leaf(c, i->iter, flags);
-+
-+		/*
-+		 * if the split succeeded without dropping locks the insert will
-+		 * still be atomic (what the caller peeked() and is overwriting
-+		 * won't have changed)
-+		 */
-+#if 0
-+		/*
-+		 * XXX:
-+		 * split -> btree node merging (of parent node) might still drop
-+		 * locks when we're not passing it BTREE_INSERT_NOUNLOCK
-+		 *
-+		 * we don't want to pass BTREE_INSERT_NOUNLOCK to split as that
-+		 * will inhibit merging - but we don't have a reliable way yet
-+		 * (do we?) of checking if we dropped locks in this path
-+		 */
-+		if (!ret)
-+			goto retry;
-+#endif
-+
-+		/*
-+		 * don't care if we got ENOSPC because we told split it
-+		 * couldn't block:
-+		 */
-+		if (!ret ||
-+		    ret == -EINTR ||
-+		    (flags & BTREE_INSERT_NOUNLOCK)) {
-+			trace_trans_restart_btree_node_split(trans->ip);
-+			ret = -EINTR;
-+		}
-+		break;
-+	case BTREE_INSERT_ENOSPC:
-+		ret = -ENOSPC;
-+		break;
-+	case BTREE_INSERT_NEED_MARK_REPLICAS:
-+		bch2_trans_unlock(trans);
-+
-+		trans_for_each_update(trans, i) {
-+			ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(i->k));
-+			if (ret)
-+				return ret;
-+		}
-+
-+		if (bch2_trans_relock(trans))
-+			return 0;
-+
-+		trace_trans_restart_mark_replicas(trans->ip);
-+		ret = -EINTR;
-+		break;
-+	case BTREE_INSERT_NEED_JOURNAL_RES:
-+		bch2_trans_unlock(trans);
-+
-+		ret = bch2_trans_journal_res_get(trans, JOURNAL_RES_GET_CHECK);
-+		if (ret)
-+			return ret;
-+
-+		if (bch2_trans_relock(trans))
-+			return 0;
-+
-+		trace_trans_restart_journal_res_get(trans->ip);
-+		ret = -EINTR;
-+		break;
-+	default:
-+		BUG_ON(ret >= 0);
-+		break;
-+	}
-+
-+	if (ret == -EINTR) {
-+		int ret2 = bch2_btree_iter_traverse_all(trans);
-+
-+		if (ret2) {
-+			trace_trans_restart_traverse(trans->ip);
-+			return ret2;
-+		}
-+
-+		trace_trans_restart_atomic(trans->ip);
-+	}
-+
-+	return ret;
-+}
-+
-+static noinline int
-+bch2_trans_commit_get_rw_cold(struct btree_trans *trans)
-+{
-+	struct bch_fs *c = trans->c;
-+	int ret;
-+
-+	if (likely(!(trans->flags & BTREE_INSERT_LAZY_RW)))
-+		return -EROFS;
-+
-+	bch2_trans_unlock(trans);
-+
-+	ret = bch2_fs_read_write_early(c);
-+	if (ret)
-+		return ret;
-+
-+	percpu_ref_get(&c->writes);
-+	return 0;
-+}
-+
-+static void bch2_trans_update2(struct btree_trans *trans,
-+			       struct btree_iter *iter,
-+			       struct bkey_i *insert)
-+{
-+	struct btree_insert_entry *i, n = (struct btree_insert_entry) {
-+		.iter = iter, .k = insert
-+	};
-+
-+	btree_insert_entry_checks(trans, n.iter, n.k);
-+
-+	BUG_ON(iter->uptodate > BTREE_ITER_NEED_PEEK);
-+
-+	EBUG_ON(trans->nr_updates2 >= trans->nr_iters);
-+
-+	iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
-+
-+	trans_for_each_update2(trans, i) {
-+		if (btree_iter_cmp(n.iter, i->iter) == 0) {
-+			*i = n;
-+			return;
-+		}
-+
-+		if (btree_iter_cmp(n.iter, i->iter) <= 0)
-+			break;
-+	}
-+
-+	array_insert_item(trans->updates2, trans->nr_updates2,
-+			  i - trans->updates2, n);
-+}
-+
-+static int extent_update_to_keys(struct btree_trans *trans,
-+				 struct btree_iter *orig_iter,
-+				 struct bkey_i *insert)
-+{
-+	struct btree_iter *iter;
-+	int ret;
-+
-+	ret = bch2_extent_can_insert(trans, orig_iter, insert);
-+	if (ret)
-+		return ret;
-+
-+	if (bkey_deleted(&insert->k))
-+		return 0;
-+
-+	iter = bch2_trans_copy_iter(trans, orig_iter);
-+	if (IS_ERR(iter))
-+		return PTR_ERR(iter);
-+
-+	iter->flags |= BTREE_ITER_INTENT;
-+	__bch2_btree_iter_set_pos(iter, insert->k.p, false);
-+	bch2_trans_update2(trans, iter, insert);
-+	bch2_trans_iter_put(trans, iter);
-+	return 0;
-+}
-+
-+static int extent_handle_overwrites(struct btree_trans *trans,
-+				    enum btree_id btree_id,
-+				    struct bpos start, struct bpos end)
-+{
-+	struct btree_iter *iter = NULL, *update_iter;
-+	struct bkey_i *update;
-+	struct bkey_s_c k;
-+	int ret = 0;
-+
-+	iter = bch2_trans_get_iter(trans, btree_id, start, BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(iter);
-+	if (ret)
-+		return ret;
-+
-+	k = bch2_btree_iter_peek_with_updates(iter);
-+
-+	while (k.k && !(ret = bkey_err(k))) {
-+		if (bkey_cmp(end, bkey_start_pos(k.k)) <= 0)
-+			break;
-+
-+		if (bkey_cmp(bkey_start_pos(k.k), start) < 0) {
-+			update_iter = bch2_trans_copy_iter(trans, iter);
-+			if ((ret = PTR_ERR_OR_ZERO(update_iter)))
-+				goto err;
-+
-+			update = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
-+			if ((ret = PTR_ERR_OR_ZERO(update)))
-+				goto err;
-+
-+			bkey_reassemble(update, k);
-+			bch2_cut_back(start, update);
-+
-+			__bch2_btree_iter_set_pos(update_iter, update->k.p, false);
-+			bch2_trans_update2(trans, update_iter, update);
-+			bch2_trans_iter_put(trans, update_iter);
-+		}
-+
-+		if (bkey_cmp(k.k->p, end) > 0) {
-+			update_iter = bch2_trans_copy_iter(trans, iter);
-+			if ((ret = PTR_ERR_OR_ZERO(update_iter)))
-+				goto err;
-+
-+			update = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
-+			if ((ret = PTR_ERR_OR_ZERO(update)))
-+				goto err;
-+
-+			bkey_reassemble(update, k);
-+			bch2_cut_front(end, update);
-+
-+			__bch2_btree_iter_set_pos(update_iter, update->k.p, false);
-+			bch2_trans_update2(trans, update_iter, update);
-+			bch2_trans_iter_put(trans, update_iter);
-+		} else {
-+			update_iter = bch2_trans_copy_iter(trans, iter);
-+			if ((ret = PTR_ERR_OR_ZERO(update_iter)))
-+				goto err;
-+
-+			update = bch2_trans_kmalloc(trans, sizeof(struct bkey));
-+			if ((ret = PTR_ERR_OR_ZERO(update)))
-+				goto err;
-+
-+			update->k = *k.k;
-+			set_bkey_val_u64s(&update->k, 0);
-+			update->k.type = KEY_TYPE_deleted;
-+			update->k.size = 0;
-+
-+			__bch2_btree_iter_set_pos(update_iter, update->k.p, false);
-+			bch2_trans_update2(trans, update_iter, update);
-+			bch2_trans_iter_put(trans, update_iter);
-+		}
-+
-+		k = bch2_btree_iter_next_with_updates(iter);
-+	}
-+err:
-+	if (!IS_ERR_OR_NULL(iter))
-+		bch2_trans_iter_put(trans, iter);
-+	return ret;
-+}
-+
-+int __bch2_trans_commit(struct btree_trans *trans)
-+{
-+	struct btree_insert_entry *i = NULL;
-+	struct btree_iter *iter;
-+	bool trans_trigger_run;
-+	unsigned u64s;
-+	int ret = 0;
-+
-+	BUG_ON(trans->need_reset);
-+
-+	if (!trans->nr_updates)
-+		goto out_noupdates;
-+
-+	if (trans->flags & BTREE_INSERT_GC_LOCK_HELD)
-+		lockdep_assert_held(&trans->c->gc_lock);
-+
-+	memset(&trans->journal_preres, 0, sizeof(trans->journal_preres));
-+
-+	trans->journal_u64s		= trans->extra_journal_entry_u64s;
-+	trans->journal_preres_u64s	= 0;
-+
-+	if (!(trans->flags & BTREE_INSERT_NOCHECK_RW) &&
-+	    unlikely(!percpu_ref_tryget(&trans->c->writes))) {
-+		ret = bch2_trans_commit_get_rw_cold(trans);
-+		if (ret)
-+			return ret;
-+	}
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	trans_for_each_update(trans, i)
-+		if (btree_iter_type(i->iter) != BTREE_ITER_CACHED &&
-+		    !(i->trigger_flags & BTREE_TRIGGER_NORUN))
-+			bch2_btree_key_cache_verify_clean(trans,
-+					i->iter->btree_id, i->iter->pos);
-+#endif
-+
-+	/*
-+	 * Running triggers will append more updates to the list of updates as
-+	 * we're walking it:
-+	 */
-+	do {
-+		trans_trigger_run = false;
-+
-+		trans_for_each_update(trans, i) {
-+			if (unlikely(i->iter->uptodate > BTREE_ITER_NEED_PEEK &&
-+				     (ret = bch2_btree_iter_traverse(i->iter)))) {
-+				trace_trans_restart_traverse(trans->ip);
-+				goto out;
-+			}
-+
-+			/*
-+			 * We're not using bch2_btree_iter_upgrade here because
-+			 * we know trans->nounlock can't be set:
-+			 */
-+			if (unlikely(i->iter->locks_want < 1 &&
-+				     !__bch2_btree_iter_upgrade(i->iter, 1))) {
-+				trace_trans_restart_upgrade(trans->ip);
-+				ret = -EINTR;
-+				goto out;
-+			}
-+
-+			if (iter_has_trans_triggers(i->iter) &&
-+			    !i->trans_triggers_run) {
-+				i->trans_triggers_run = true;
-+				trans_trigger_run = true;
-+
-+				ret = bch2_trans_mark_update(trans, i->iter, i->k,
-+							     i->trigger_flags);
-+				if (unlikely(ret)) {
-+					if (ret == -EINTR)
-+						trace_trans_restart_mark(trans->ip);
-+					goto out;
-+				}
-+			}
-+		}
-+	} while (trans_trigger_run);
-+
-+	/* Turn extents updates into keys: */
-+	trans_for_each_update(trans, i)
-+		if (i->iter->flags & BTREE_ITER_IS_EXTENTS) {
-+			struct bpos start = bkey_start_pos(&i->k->k);
-+
-+			while (i + 1 < trans->updates + trans->nr_updates &&
-+			       i[0].iter->btree_id == i[1].iter->btree_id &&
-+			       !bkey_cmp(i[0].k->k.p, bkey_start_pos(&i[1].k->k)))
-+				i++;
-+
-+			ret = extent_handle_overwrites(trans, i->iter->btree_id,
-+						       start, i->k->k.p);
-+			if (ret)
-+				goto out;
-+		}
-+
-+	trans_for_each_update(trans, i) {
-+		if (i->iter->flags & BTREE_ITER_IS_EXTENTS) {
-+			ret = extent_update_to_keys(trans, i->iter, i->k);
-+			if (ret)
-+				goto out;
-+		} else {
-+			bch2_trans_update2(trans, i->iter, i->k);
-+		}
-+	}
-+
-+	trans_for_each_update2(trans, i) {
-+		BUG_ON(i->iter->uptodate > BTREE_ITER_NEED_PEEK);
-+		BUG_ON(i->iter->locks_want < 1);
-+
-+		u64s = jset_u64s(i->k->k.u64s);
-+		if (btree_iter_type(i->iter) == BTREE_ITER_CACHED &&
-+		    likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)))
-+			trans->journal_preres_u64s += u64s;
-+		trans->journal_u64s += u64s;
-+	}
-+retry:
-+	memset(&trans->journal_res, 0, sizeof(trans->journal_res));
-+
-+	ret = do_bch2_trans_commit(trans, &i);
-+
-+	/* make sure we didn't drop or screw up locks: */
-+	bch2_btree_trans_verify_locks(trans);
-+
-+	if (ret)
-+		goto err;
-+
-+	trans_for_each_iter(trans, iter)
-+		if ((trans->iters_live & (1ULL << iter->idx)) &&
-+		    (iter->flags & BTREE_ITER_SET_POS_AFTER_COMMIT)) {
-+			if (trans->flags & BTREE_INSERT_NOUNLOCK)
-+				bch2_btree_iter_set_pos_same_leaf(iter, iter->pos_after_commit);
-+			else
-+				bch2_btree_iter_set_pos(iter, iter->pos_after_commit);
-+		}
-+out:
-+	bch2_journal_preres_put(&trans->c->journal, &trans->journal_preres);
-+
-+	if (likely(!(trans->flags & BTREE_INSERT_NOCHECK_RW)))
-+		percpu_ref_put(&trans->c->writes);
-+out_noupdates:
-+	bch2_trans_reset(trans, !ret ? TRANS_RESET_NOTRAVERSE : 0);
-+
-+	return ret;
-+err:
-+	ret = bch2_trans_commit_error(trans, i, ret);
-+	if (ret)
-+		goto out;
-+
-+	goto retry;
-+}
-+
-+int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
-+		      struct bkey_i *k, enum btree_trigger_flags flags)
-+{
-+	struct btree_insert_entry *i, n = (struct btree_insert_entry) {
-+		.trigger_flags = flags, .iter = iter, .k = k
-+	};
-+
-+	EBUG_ON(bkey_cmp(iter->pos,
-+			 (iter->flags & BTREE_ITER_IS_EXTENTS)
-+			 ? bkey_start_pos(&k->k)
-+			 : k->k.p));
-+
-+	iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
-+
-+	if (btree_node_type_is_extents(iter->btree_id)) {
-+		iter->pos_after_commit = k->k.p;
-+		iter->flags |= BTREE_ITER_SET_POS_AFTER_COMMIT;
-+	}
-+
-+	/*
-+	 * Pending updates are kept sorted: first, find position of new update:
-+	 */
-+	trans_for_each_update(trans, i)
-+		if (btree_iter_cmp(iter, i->iter) <= 0)
-+			break;
-+
-+	/*
-+	 * Now delete/trim any updates the new update overwrites:
-+	 */
-+	if (i > trans->updates &&
-+	    i[-1].iter->btree_id == iter->btree_id &&
-+	    bkey_cmp(iter->pos, i[-1].k->k.p) < 0)
-+		bch2_cut_back(n.iter->pos, i[-1].k);
-+
-+	while (i < trans->updates + trans->nr_updates &&
-+	       iter->btree_id == i->iter->btree_id &&
-+	       bkey_cmp(n.k->k.p, i->k->k.p) >= 0)
-+		array_remove_item(trans->updates, trans->nr_updates,
-+				  i - trans->updates);
-+
-+	if (i < trans->updates + trans->nr_updates &&
-+	    iter->btree_id == i->iter->btree_id &&
-+	    bkey_cmp(n.k->k.p, i->iter->pos) > 0) {
-+		/*
-+		 * When we have an extent that overwrites the start of another
-+		 * update, trimming that extent will mean the iterator's
-+		 * position has to change since the iterator position has to
-+		 * match the extent's start pos - but we don't want to change
-+		 * the iterator pos if some other code is using it, so we may
-+		 * need to clone it:
-+		 */
-+		if (trans->iters_live & (1ULL << i->iter->idx)) {
-+			i->iter = bch2_trans_copy_iter(trans, i->iter);
-+			if (IS_ERR(i->iter)) {
-+				trans->need_reset = true;
-+				return PTR_ERR(i->iter);
-+			}
-+
-+			i->iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
-+			bch2_trans_iter_put(trans, i->iter);
-+		}
-+
-+		bch2_cut_front(n.k->k.p, i->k);
-+		bch2_btree_iter_set_pos(i->iter, n.k->k.p);
-+	}
-+
-+	EBUG_ON(trans->nr_updates >= trans->nr_iters);
-+
-+	array_insert_item(trans->updates, trans->nr_updates,
-+			  i - trans->updates, n);
-+	return 0;
-+}
-+
-+int __bch2_btree_insert(struct btree_trans *trans,
-+			enum btree_id id, struct bkey_i *k)
-+{
-+	struct btree_iter *iter;
-+	int ret;
-+
-+	iter = bch2_trans_get_iter(trans, id, bkey_start_pos(&k->k),
-+				   BTREE_ITER_INTENT);
-+	if (IS_ERR(iter))
-+		return PTR_ERR(iter);
-+
-+	ret   = bch2_btree_iter_traverse(iter) ?:
-+		bch2_trans_update(trans, iter, k, 0);
-+	bch2_trans_iter_put(trans, iter);
-+	return ret;
-+}
-+
-+/**
-+ * bch2_btree_insert - insert keys into the extent btree
-+ * @c:			pointer to struct bch_fs
-+ * @id:			btree to insert into
-+ * @insert_keys:	list of keys to insert
-+ * @hook:		insert callback
-+ */
-+int bch2_btree_insert(struct bch_fs *c, enum btree_id id,
-+		      struct bkey_i *k,
-+		      struct disk_reservation *disk_res,
-+		      u64 *journal_seq, int flags)
-+{
-+	return bch2_trans_do(c, disk_res, journal_seq, flags,
-+			     __bch2_btree_insert(&trans, id, k));
-+}
-+
-+int bch2_btree_delete_at_range(struct btree_trans *trans,
-+			       struct btree_iter *iter,
-+			       struct bpos end,
-+			       u64 *journal_seq)
-+{
-+	struct bkey_s_c k;
-+	int ret = 0;
-+retry:
-+	while ((k = bch2_btree_iter_peek(iter)).k &&
-+	       !(ret = bkey_err(k)) &&
-+	       bkey_cmp(iter->pos, end) < 0) {
-+		struct bkey_i delete;
-+
-+		bch2_trans_begin(trans);
-+
-+		bkey_init(&delete.k);
-+
-+		/*
-+		 * For extents, iter.pos won't necessarily be the same as
-+		 * bkey_start_pos(k.k) (for non extents they always will be the
-+		 * same). It's important that we delete starting from iter.pos
-+		 * because the range we want to delete could start in the middle
-+		 * of k.
-+		 *
-+		 * (bch2_btree_iter_peek() does guarantee that iter.pos >=
-+		 * bkey_start_pos(k.k)).
-+		 */
-+		delete.k.p = iter->pos;
-+
-+		if (btree_node_type_is_extents(iter->btree_id)) {
-+			unsigned max_sectors =
-+				KEY_SIZE_MAX & (~0 << trans->c->block_bits);
-+
-+			/* create the biggest key we can */
-+			bch2_key_resize(&delete.k, max_sectors);
-+			bch2_cut_back(end, &delete);
-+
-+			ret = bch2_extent_trim_atomic(&delete, iter);
-+			if (ret)
-+				break;
-+		}
-+
-+		bch2_trans_update(trans, iter, &delete, 0);
-+		ret = bch2_trans_commit(trans, NULL, journal_seq,
-+					BTREE_INSERT_NOFAIL);
-+		if (ret)
-+			break;
-+
-+		bch2_trans_cond_resched(trans);
-+	}
-+
-+	if (ret == -EINTR) {
-+		ret = 0;
-+		goto retry;
-+	}
-+
-+	return ret;
-+
-+}
-+
-+int bch2_btree_delete_at(struct btree_trans *trans,
-+			 struct btree_iter *iter, unsigned flags)
-+{
-+	struct bkey_i k;
-+
-+	bkey_init(&k.k);
-+	k.k.p = iter->pos;
-+
-+	bch2_trans_update(trans, iter, &k, 0);
-+	return bch2_trans_commit(trans, NULL, NULL,
-+				 BTREE_INSERT_NOFAIL|
-+				 BTREE_INSERT_USE_RESERVE|flags);
-+}
-+
-+/*
-+ * bch_btree_delete_range - delete everything within a given range
-+ *
-+ * Range is a half open interval - [start, end)
-+ */
-+int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
-+			    struct bpos start, struct bpos end,
-+			    u64 *journal_seq)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	int ret = 0;
-+
-+	/*
-+	 * XXX: whether we need mem/more iters depends on whether this btree id
-+	 * has triggers
-+	 */
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 512);
-+
-+	iter = bch2_trans_get_iter(&trans, id, start, BTREE_ITER_INTENT);
-+
-+	ret = bch2_btree_delete_at_range(&trans, iter, end, journal_seq);
-+	ret = bch2_trans_exit(&trans) ?: ret;
-+
-+	BUG_ON(ret == -EINTR);
-+	return ret;
-+}
-diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
-new file mode 100644
-index 000000000000..2a3b95968a86
---- /dev/null
-+++ b/fs/bcachefs/buckets.c
-@@ -0,0 +1,2230 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Code for manipulating bucket marks for garbage collection.
-+ *
-+ * Copyright 2014 Datera, Inc.
-+ *
-+ * Bucket states:
-+ * - free bucket: mark == 0
-+ *   The bucket contains no data and will not be read
-+ *
-+ * - allocator bucket: owned_by_allocator == 1
-+ *   The bucket is on a free list, or it is an open bucket
-+ *
-+ * - cached bucket: owned_by_allocator == 0 &&
-+ *                  dirty_sectors == 0 &&
-+ *                  cached_sectors > 0
-+ *   The bucket contains data but may be safely discarded as there are
-+ *   enough replicas of the data on other cache devices, or it has been
-+ *   written back to the backing device
-+ *
-+ * - dirty bucket: owned_by_allocator == 0 &&
-+ *                 dirty_sectors > 0
-+ *   The bucket contains data that we must not discard (either only copy,
-+ *   or one of the 'main copies' for data requiring multiple replicas)
-+ *
-+ * - metadata bucket: owned_by_allocator == 0 && is_metadata == 1
-+ *   This is a btree node, journal or gen/prio bucket
-+ *
-+ * Lifecycle:
-+ *
-+ * bucket invalidated => bucket on freelist => open bucket =>
-+ *     [dirty bucket =>] cached bucket => bucket invalidated => ...
-+ *
-+ * Note that cache promotion can skip the dirty bucket step, as data
-+ * is copied from a deeper tier to a shallower tier, onto a cached
-+ * bucket.
-+ * Note also that a cached bucket can spontaneously become dirty --
-+ * see below.
-+ *
-+ * Only a traversal of the key space can determine whether a bucket is
-+ * truly dirty or cached.
-+ *
-+ * Transitions:
-+ *
-+ * - free => allocator: bucket was invalidated
-+ * - cached => allocator: bucket was invalidated
-+ *
-+ * - allocator => dirty: open bucket was filled up
-+ * - allocator => cached: open bucket was filled up
-+ * - allocator => metadata: metadata was allocated
-+ *
-+ * - dirty => cached: dirty sectors were copied to a deeper tier
-+ * - dirty => free: dirty sectors were overwritten or moved (copy gc)
-+ * - cached => free: cached sectors were overwritten
-+ *
-+ * - metadata => free: metadata was freed
-+ *
-+ * Oddities:
-+ * - cached => dirty: a device was removed so formerly replicated data
-+ *                    is no longer sufficiently replicated
-+ * - free => cached: cannot happen
-+ * - free => dirty: cannot happen
-+ * - free => metadata: cannot happen
-+ */
-+
-+#include "bcachefs.h"
-+#include "alloc_background.h"
-+#include "bset.h"
-+#include "btree_gc.h"
-+#include "btree_update.h"
-+#include "buckets.h"
-+#include "ec.h"
-+#include "error.h"
-+#include "movinggc.h"
-+#include "replicas.h"
-+
-+#include <linux/preempt.h>
-+#include <trace/events/bcachefs.h>
-+
-+/*
-+ * Clear journal_seq_valid for buckets for which it's not needed, to prevent
-+ * wraparound:
-+ */
-+void bch2_bucket_seq_cleanup(struct bch_fs *c)
-+{
-+	u64 journal_seq = atomic64_read(&c->journal.seq);
-+	u16 last_seq_ondisk = c->journal.last_seq_ondisk;
-+	struct bch_dev *ca;
-+	struct bucket_array *buckets;
-+	struct bucket *g;
-+	struct bucket_mark m;
-+	unsigned i;
-+
-+	if (journal_seq - c->last_bucket_seq_cleanup <
-+	    (1U << (BUCKET_JOURNAL_SEQ_BITS - 2)))
-+		return;
-+
-+	c->last_bucket_seq_cleanup = journal_seq;
-+
-+	for_each_member_device(ca, c, i) {
-+		down_read(&ca->bucket_lock);
-+		buckets = bucket_array(ca);
-+
-+		for_each_bucket(g, buckets) {
-+			bucket_cmpxchg(g, m, ({
-+				if (!m.journal_seq_valid ||
-+				    bucket_needs_journal_commit(m, last_seq_ondisk))
-+					break;
-+
-+				m.journal_seq_valid = 0;
-+			}));
-+		}
-+		up_read(&ca->bucket_lock);
-+	}
-+}
-+
-+void bch2_fs_usage_initialize(struct bch_fs *c)
-+{
-+	struct bch_fs_usage *usage;
-+	unsigned i;
-+
-+	percpu_down_write(&c->mark_lock);
-+	usage = c->usage_base;
-+
-+	bch2_fs_usage_acc_to_base(c, 0);
-+	bch2_fs_usage_acc_to_base(c, 1);
-+
-+	for (i = 0; i < BCH_REPLICAS_MAX; i++)
-+		usage->reserved += usage->persistent_reserved[i];
-+
-+	for (i = 0; i < c->replicas.nr; i++) {
-+		struct bch_replicas_entry *e =
-+			cpu_replicas_entry(&c->replicas, i);
-+
-+		switch (e->data_type) {
-+		case BCH_DATA_btree:
-+			usage->btree	+= usage->replicas[i];
-+			break;
-+		case BCH_DATA_user:
-+			usage->data	+= usage->replicas[i];
-+			break;
-+		case BCH_DATA_cached:
-+			usage->cached	+= usage->replicas[i];
-+			break;
-+		}
-+	}
-+
-+	percpu_up_write(&c->mark_lock);
-+}
-+
-+void bch2_fs_usage_scratch_put(struct bch_fs *c, struct bch_fs_usage *fs_usage)
-+{
-+	if (fs_usage == c->usage_scratch)
-+		mutex_unlock(&c->usage_scratch_lock);
-+	else
-+		kfree(fs_usage);
-+}
-+
-+struct bch_fs_usage *bch2_fs_usage_scratch_get(struct bch_fs *c)
-+{
-+	struct bch_fs_usage *ret;
-+	unsigned bytes = fs_usage_u64s(c) * sizeof(u64);
-+
-+	ret = kzalloc(bytes, GFP_NOWAIT|__GFP_NOWARN);
-+	if (ret)
-+		return ret;
-+
-+	if (mutex_trylock(&c->usage_scratch_lock))
-+		goto out_pool;
-+
-+	ret = kzalloc(bytes, GFP_NOFS);
-+	if (ret)
-+		return ret;
-+
-+	mutex_lock(&c->usage_scratch_lock);
-+out_pool:
-+	ret = c->usage_scratch;
-+	memset(ret, 0, bytes);
-+	return ret;
-+}
-+
-+struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *ca)
-+{
-+	struct bch_dev_usage ret;
-+
-+	memset(&ret, 0, sizeof(ret));
-+	acc_u64s_percpu((u64 *) &ret,
-+			(u64 __percpu *) ca->usage[0],
-+			sizeof(ret) / sizeof(u64));
-+
-+	return ret;
-+}
-+
-+static inline struct bch_fs_usage *fs_usage_ptr(struct bch_fs *c,
-+						unsigned journal_seq,
-+						bool gc)
-+{
-+	return this_cpu_ptr(gc
-+			    ? c->usage_gc
-+			    : c->usage[journal_seq & 1]);
-+}
-+
-+u64 bch2_fs_usage_read_one(struct bch_fs *c, u64 *v)
-+{
-+	ssize_t offset = v - (u64 *) c->usage_base;
-+	unsigned seq;
-+	u64 ret;
-+
-+	BUG_ON(offset < 0 || offset >= fs_usage_u64s(c));
-+	percpu_rwsem_assert_held(&c->mark_lock);
-+
-+	do {
-+		seq = read_seqcount_begin(&c->usage_lock);
-+		ret = *v +
-+			percpu_u64_get((u64 __percpu *) c->usage[0] + offset) +
-+			percpu_u64_get((u64 __percpu *) c->usage[1] + offset);
-+	} while (read_seqcount_retry(&c->usage_lock, seq));
-+
-+	return ret;
-+}
-+
-+struct bch_fs_usage *bch2_fs_usage_read(struct bch_fs *c)
-+{
-+	struct bch_fs_usage *ret;
-+	unsigned seq, v, u64s = fs_usage_u64s(c);
-+retry:
-+	ret = kmalloc(u64s * sizeof(u64), GFP_NOFS);
-+	if (unlikely(!ret))
-+		return NULL;
-+
-+	percpu_down_read(&c->mark_lock);
-+
-+	v = fs_usage_u64s(c);
-+	if (unlikely(u64s != v)) {
-+		u64s = v;
-+		percpu_up_read(&c->mark_lock);
-+		kfree(ret);
-+		goto retry;
-+	}
-+
-+	do {
-+		seq = read_seqcount_begin(&c->usage_lock);
-+		memcpy(ret, c->usage_base, u64s * sizeof(u64));
-+		acc_u64s_percpu((u64 *) ret, (u64 __percpu *) c->usage[0], u64s);
-+		acc_u64s_percpu((u64 *) ret, (u64 __percpu *) c->usage[1], u64s);
-+	} while (read_seqcount_retry(&c->usage_lock, seq));
-+
-+	return ret;
-+}
-+
-+void bch2_fs_usage_acc_to_base(struct bch_fs *c, unsigned idx)
-+{
-+	unsigned u64s = fs_usage_u64s(c);
-+
-+	BUG_ON(idx >= 2);
-+
-+	preempt_disable();
-+	write_seqcount_begin(&c->usage_lock);
-+
-+	acc_u64s_percpu((u64 *) c->usage_base,
-+			(u64 __percpu *) c->usage[idx], u64s);
-+	percpu_memset(c->usage[idx], 0, u64s * sizeof(u64));
-+
-+	write_seqcount_end(&c->usage_lock);
-+	preempt_enable();
-+}
-+
-+void bch2_fs_usage_to_text(struct printbuf *out,
-+			   struct bch_fs *c,
-+			   struct bch_fs_usage *fs_usage)
-+{
-+	unsigned i;
-+
-+	pr_buf(out, "capacity:\t\t\t%llu\n", c->capacity);
-+
-+	pr_buf(out, "hidden:\t\t\t\t%llu\n",
-+	       fs_usage->hidden);
-+	pr_buf(out, "data:\t\t\t\t%llu\n",
-+	       fs_usage->data);
-+	pr_buf(out, "cached:\t\t\t\t%llu\n",
-+	       fs_usage->cached);
-+	pr_buf(out, "reserved:\t\t\t%llu\n",
-+	       fs_usage->reserved);
-+	pr_buf(out, "nr_inodes:\t\t\t%llu\n",
-+	       fs_usage->nr_inodes);
-+	pr_buf(out, "online reserved:\t\t%llu\n",
-+	       fs_usage->online_reserved);
-+
-+	for (i = 0;
-+	     i < ARRAY_SIZE(fs_usage->persistent_reserved);
-+	     i++) {
-+		pr_buf(out, "%u replicas:\n", i + 1);
-+		pr_buf(out, "\treserved:\t\t%llu\n",
-+		       fs_usage->persistent_reserved[i]);
-+	}
-+
-+	for (i = 0; i < c->replicas.nr; i++) {
-+		struct bch_replicas_entry *e =
-+			cpu_replicas_entry(&c->replicas, i);
-+
-+		pr_buf(out, "\t");
-+		bch2_replicas_entry_to_text(out, e);
-+		pr_buf(out, ":\t%llu\n", fs_usage->replicas[i]);
-+	}
-+}
-+
-+#define RESERVE_FACTOR	6
-+
-+static u64 reserve_factor(u64 r)
-+{
-+	return r + (round_up(r, (1 << RESERVE_FACTOR)) >> RESERVE_FACTOR);
-+}
-+
-+static u64 avail_factor(u64 r)
-+{
-+	return (r << RESERVE_FACTOR) / ((1 << RESERVE_FACTOR) + 1);
-+}
-+
-+u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage *fs_usage)
-+{
-+	return min(fs_usage->hidden +
-+		   fs_usage->btree +
-+		   fs_usage->data +
-+		   reserve_factor(fs_usage->reserved +
-+				  fs_usage->online_reserved),
-+		   c->capacity);
-+}
-+
-+static struct bch_fs_usage_short
-+__bch2_fs_usage_read_short(struct bch_fs *c)
-+{
-+	struct bch_fs_usage_short ret;
-+	u64 data, reserved;
-+
-+	ret.capacity = c->capacity -
-+		bch2_fs_usage_read_one(c, &c->usage_base->hidden);
-+
-+	data		= bch2_fs_usage_read_one(c, &c->usage_base->data) +
-+		bch2_fs_usage_read_one(c, &c->usage_base->btree);
-+	reserved	= bch2_fs_usage_read_one(c, &c->usage_base->reserved) +
-+		bch2_fs_usage_read_one(c, &c->usage_base->online_reserved);
-+
-+	ret.used	= min(ret.capacity, data + reserve_factor(reserved));
-+	ret.free	= ret.capacity - ret.used;
-+
-+	ret.nr_inodes	= bch2_fs_usage_read_one(c, &c->usage_base->nr_inodes);
-+
-+	return ret;
-+}
-+
-+struct bch_fs_usage_short
-+bch2_fs_usage_read_short(struct bch_fs *c)
-+{
-+	struct bch_fs_usage_short ret;
-+
-+	percpu_down_read(&c->mark_lock);
-+	ret = __bch2_fs_usage_read_short(c);
-+	percpu_up_read(&c->mark_lock);
-+
-+	return ret;
-+}
-+
-+static inline int is_unavailable_bucket(struct bucket_mark m)
-+{
-+	return !is_available_bucket(m);
-+}
-+
-+static inline int is_fragmented_bucket(struct bucket_mark m,
-+				       struct bch_dev *ca)
-+{
-+	if (!m.owned_by_allocator &&
-+	    m.data_type == BCH_DATA_user &&
-+	    bucket_sectors_used(m))
-+		return max_t(int, 0, (int) ca->mi.bucket_size -
-+			     bucket_sectors_used(m));
-+	return 0;
-+}
-+
-+static inline int bucket_stripe_sectors(struct bucket_mark m)
-+{
-+	return m.stripe ? m.dirty_sectors : 0;
-+}
-+
-+static inline enum bch_data_type bucket_type(struct bucket_mark m)
-+{
-+	return m.cached_sectors && !m.dirty_sectors
-+		? BCH_DATA_cached
-+		: m.data_type;
-+}
-+
-+static bool bucket_became_unavailable(struct bucket_mark old,
-+				      struct bucket_mark new)
-+{
-+	return is_available_bucket(old) &&
-+	       !is_available_bucket(new);
-+}
-+
-+int bch2_fs_usage_apply(struct bch_fs *c,
-+			struct bch_fs_usage *fs_usage,
-+			struct disk_reservation *disk_res,
-+			unsigned journal_seq)
-+{
-+	s64 added = fs_usage->data + fs_usage->reserved;
-+	s64 should_not_have_added;
-+	int ret = 0;
-+
-+	percpu_rwsem_assert_held(&c->mark_lock);
-+
-+	/*
-+	 * Not allowed to reduce sectors_available except by getting a
-+	 * reservation:
-+	 */
-+	should_not_have_added = added - (s64) (disk_res ? disk_res->sectors : 0);
-+	if (WARN_ONCE(should_not_have_added > 0,
-+		      "disk usage increased by %lli without a reservation",
-+		      should_not_have_added)) {
-+		atomic64_sub(should_not_have_added, &c->sectors_available);
-+		added -= should_not_have_added;
-+		ret = -1;
-+	}
-+
-+	if (added > 0) {
-+		disk_res->sectors		-= added;
-+		fs_usage->online_reserved	-= added;
-+	}
-+
-+	preempt_disable();
-+	acc_u64s((u64 *) fs_usage_ptr(c, journal_seq, false),
-+		 (u64 *) fs_usage, fs_usage_u64s(c));
-+	preempt_enable();
-+
-+	return ret;
-+}
-+
-+static inline void account_bucket(struct bch_fs_usage *fs_usage,
-+				  struct bch_dev_usage *dev_usage,
-+				  enum bch_data_type type,
-+				  int nr, s64 size)
-+{
-+	if (type == BCH_DATA_sb || type == BCH_DATA_journal)
-+		fs_usage->hidden	+= size;
-+
-+	dev_usage->buckets[type]	+= nr;
-+}
-+
-+static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
-+				  struct bch_fs_usage *fs_usage,
-+				  struct bucket_mark old, struct bucket_mark new,
-+				  bool gc)
-+{
-+	struct bch_dev_usage *u;
-+
-+	percpu_rwsem_assert_held(&c->mark_lock);
-+
-+	preempt_disable();
-+	u = this_cpu_ptr(ca->usage[gc]);
-+
-+	if (bucket_type(old))
-+		account_bucket(fs_usage, u, bucket_type(old),
-+			       -1, -ca->mi.bucket_size);
-+
-+	if (bucket_type(new))
-+		account_bucket(fs_usage, u, bucket_type(new),
-+			       1, ca->mi.bucket_size);
-+
-+	u->buckets_alloc +=
-+		(int) new.owned_by_allocator - (int) old.owned_by_allocator;
-+	u->buckets_unavailable +=
-+		is_unavailable_bucket(new) - is_unavailable_bucket(old);
-+
-+	u->buckets_ec += (int) new.stripe - (int) old.stripe;
-+	u->sectors_ec += bucket_stripe_sectors(new) -
-+			 bucket_stripe_sectors(old);
-+
-+	u->sectors[old.data_type] -= old.dirty_sectors;
-+	u->sectors[new.data_type] += new.dirty_sectors;
-+	u->sectors[BCH_DATA_cached] +=
-+		(int) new.cached_sectors - (int) old.cached_sectors;
-+	u->sectors_fragmented +=
-+		is_fragmented_bucket(new, ca) - is_fragmented_bucket(old, ca);
-+	preempt_enable();
-+
-+	if (!is_available_bucket(old) && is_available_bucket(new))
-+		bch2_wake_allocator(ca);
-+}
-+
-+__flatten
-+void bch2_dev_usage_from_buckets(struct bch_fs *c)
-+{
-+	struct bch_dev *ca;
-+	struct bucket_mark old = { .v.counter = 0 };
-+	struct bucket_array *buckets;
-+	struct bucket *g;
-+	unsigned i;
-+	int cpu;
-+
-+	c->usage_base->hidden = 0;
-+
-+	for_each_member_device(ca, c, i) {
-+		for_each_possible_cpu(cpu)
-+			memset(per_cpu_ptr(ca->usage[0], cpu), 0,
-+			       sizeof(*ca->usage[0]));
-+
-+		buckets = bucket_array(ca);
-+
-+		for_each_bucket(g, buckets)
-+			bch2_dev_usage_update(c, ca, c->usage_base,
-+					      old, g->mark, false);
-+	}
-+}
-+
-+static inline int update_replicas(struct bch_fs *c,
-+				  struct bch_fs_usage *fs_usage,
-+				  struct bch_replicas_entry *r,
-+				  s64 sectors)
-+{
-+	int idx = bch2_replicas_entry_idx(c, r);
-+
-+	if (idx < 0)
-+		return -1;
-+
-+	if (!fs_usage)
-+		return 0;
-+
-+	switch (r->data_type) {
-+	case BCH_DATA_btree:
-+		fs_usage->btree		+= sectors;
-+		break;
-+	case BCH_DATA_user:
-+		fs_usage->data		+= sectors;
-+		break;
-+	case BCH_DATA_cached:
-+		fs_usage->cached	+= sectors;
-+		break;
-+	}
-+	fs_usage->replicas[idx]		+= sectors;
-+	return 0;
-+}
-+
-+static inline void update_cached_sectors(struct bch_fs *c,
-+					 struct bch_fs_usage *fs_usage,
-+					 unsigned dev, s64 sectors)
-+{
-+	struct bch_replicas_padded r;
-+
-+	bch2_replicas_entry_cached(&r.e, dev);
-+
-+	update_replicas(c, fs_usage, &r.e, sectors);
-+}
-+
-+static struct replicas_delta_list *
-+replicas_deltas_realloc(struct btree_trans *trans, unsigned more)
-+{
-+	struct replicas_delta_list *d = trans->fs_usage_deltas;
-+	unsigned new_size = d ? (d->size + more) * 2 : 128;
-+
-+	if (!d || d->used + more > d->size) {
-+		d = krealloc(d, sizeof(*d) + new_size, GFP_NOIO|__GFP_ZERO);
-+		BUG_ON(!d);
-+
-+		d->size = new_size;
-+		trans->fs_usage_deltas = d;
-+	}
-+	return d;
-+}
-+
-+static inline void update_replicas_list(struct btree_trans *trans,
-+					struct bch_replicas_entry *r,
-+					s64 sectors)
-+{
-+	struct replicas_delta_list *d;
-+	struct replicas_delta *n;
-+	unsigned b;
-+
-+	if (!sectors)
-+		return;
-+
-+	b = replicas_entry_bytes(r) + 8;
-+	d = replicas_deltas_realloc(trans, b);
-+
-+	n = (void *) d->d + d->used;
-+	n->delta = sectors;
-+	memcpy(&n->r, r, replicas_entry_bytes(r));
-+	d->used += b;
-+}
-+
-+static inline void update_cached_sectors_list(struct btree_trans *trans,
-+					      unsigned dev, s64 sectors)
-+{
-+	struct bch_replicas_padded r;
-+
-+	bch2_replicas_entry_cached(&r.e, dev);
-+
-+	update_replicas_list(trans, &r.e, sectors);
-+}
-+
-+static inline struct replicas_delta *
-+replicas_delta_next(struct replicas_delta *d)
-+{
-+	return (void *) d + replicas_entry_bytes(&d->r) + 8;
-+}
-+
-+int bch2_replicas_delta_list_apply(struct bch_fs *c,
-+				   struct bch_fs_usage *fs_usage,
-+				   struct replicas_delta_list *r)
-+{
-+	struct replicas_delta *d = r->d;
-+	struct replicas_delta *top = (void *) r->d + r->used;
-+	unsigned i;
-+
-+	for (d = r->d; d != top; d = replicas_delta_next(d))
-+		if (update_replicas(c, fs_usage, &d->r, d->delta)) {
-+			top = d;
-+			goto unwind;
-+		}
-+
-+	if (!fs_usage)
-+		return 0;
-+
-+	fs_usage->nr_inodes += r->nr_inodes;
-+
-+	for (i = 0; i < BCH_REPLICAS_MAX; i++) {
-+		fs_usage->reserved += r->persistent_reserved[i];
-+		fs_usage->persistent_reserved[i] += r->persistent_reserved[i];
-+	}
-+
-+	return 0;
-+unwind:
-+	for (d = r->d; d != top; d = replicas_delta_next(d))
-+		update_replicas(c, fs_usage, &d->r, -d->delta);
-+	return -1;
-+}
-+
-+#define do_mark_fn(fn, c, pos, flags, ...)				\
-+({									\
-+	int gc, ret = 0;						\
-+									\
-+	percpu_rwsem_assert_held(&c->mark_lock);			\
-+									\
-+	for (gc = 0; gc < 2 && !ret; gc++)				\
-+		if (!gc == !(flags & BTREE_TRIGGER_GC) ||		\
-+		    (gc && gc_visited(c, pos)))				\
-+			ret = fn(c, __VA_ARGS__, gc);			\
-+	ret;								\
-+})
-+
-+static int __bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
-+				    size_t b, struct bucket_mark *ret,
-+				    bool gc)
-+{
-+	struct bch_fs_usage *fs_usage = fs_usage_ptr(c, 0, gc);
-+	struct bucket *g = __bucket(ca, b, gc);
-+	struct bucket_mark old, new;
-+
-+	old = bucket_cmpxchg(g, new, ({
-+		BUG_ON(!is_available_bucket(new));
-+
-+		new.owned_by_allocator	= true;
-+		new.data_type		= 0;
-+		new.cached_sectors	= 0;
-+		new.dirty_sectors	= 0;
-+		new.gen++;
-+	}));
-+
-+	bch2_dev_usage_update(c, ca, fs_usage, old, new, gc);
-+
-+	if (old.cached_sectors)
-+		update_cached_sectors(c, fs_usage, ca->dev_idx,
-+				      -((s64) old.cached_sectors));
-+
-+	if (!gc)
-+		*ret = old;
-+	return 0;
-+}
-+
-+void bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
-+			    size_t b, struct bucket_mark *old)
-+{
-+	do_mark_fn(__bch2_invalidate_bucket, c, gc_phase(GC_PHASE_START), 0,
-+		   ca, b, old);
-+
-+	if (!old->owned_by_allocator && old->cached_sectors)
-+		trace_invalidate(ca, bucket_to_sector(ca, b),
-+				 old->cached_sectors);
-+}
-+
-+static int __bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
-+				    size_t b, bool owned_by_allocator,
-+				    bool gc)
-+{
-+	struct bch_fs_usage *fs_usage = fs_usage_ptr(c, 0, gc);
-+	struct bucket *g = __bucket(ca, b, gc);
-+	struct bucket_mark old, new;
-+
-+	old = bucket_cmpxchg(g, new, ({
-+		new.owned_by_allocator	= owned_by_allocator;
-+	}));
-+
-+	bch2_dev_usage_update(c, ca, fs_usage, old, new, gc);
-+
-+	BUG_ON(!gc &&
-+	       !owned_by_allocator && !old.owned_by_allocator);
-+
-+	return 0;
-+}
-+
-+void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
-+			    size_t b, bool owned_by_allocator,
-+			    struct gc_pos pos, unsigned flags)
-+{
-+	preempt_disable();
-+
-+	do_mark_fn(__bch2_mark_alloc_bucket, c, pos, flags,
-+		   ca, b, owned_by_allocator);
-+
-+	preempt_enable();
-+}
-+
-+static int bch2_mark_alloc(struct bch_fs *c,
-+			   struct bkey_s_c old, struct bkey_s_c new,
-+			   struct bch_fs_usage *fs_usage,
-+			   u64 journal_seq, unsigned flags)
-+{
-+	bool gc = flags & BTREE_TRIGGER_GC;
-+	struct bkey_alloc_unpacked u;
-+	struct bch_dev *ca;
-+	struct bucket *g;
-+	struct bucket_mark old_m, m;
-+
-+	/* We don't do anything for deletions - do we?: */
-+	if (new.k->type != KEY_TYPE_alloc)
-+		return 0;
-+
-+	/*
-+	 * alloc btree is read in by bch2_alloc_read, not gc:
-+	 */
-+	if ((flags & BTREE_TRIGGER_GC) &&
-+	    !(flags & BTREE_TRIGGER_BUCKET_INVALIDATE))
-+		return 0;
-+
-+	ca = bch_dev_bkey_exists(c, new.k->p.inode);
-+
-+	if (new.k->p.offset >= ca->mi.nbuckets)
-+		return 0;
-+
-+	g = __bucket(ca, new.k->p.offset, gc);
-+	u = bch2_alloc_unpack(new);
-+
-+	old_m = bucket_cmpxchg(g, m, ({
-+		m.gen			= u.gen;
-+		m.data_type		= u.data_type;
-+		m.dirty_sectors		= u.dirty_sectors;
-+		m.cached_sectors	= u.cached_sectors;
-+
-+		if (journal_seq) {
-+			m.journal_seq_valid	= 1;
-+			m.journal_seq		= journal_seq;
-+		}
-+	}));
-+
-+	bch2_dev_usage_update(c, ca, fs_usage, old_m, m, gc);
-+
-+	g->io_time[READ]	= u.read_time;
-+	g->io_time[WRITE]	= u.write_time;
-+	g->oldest_gen		= u.oldest_gen;
-+	g->gen_valid		= 1;
-+
-+	/*
-+	 * need to know if we're getting called from the invalidate path or
-+	 * not:
-+	 */
-+
-+	if ((flags & BTREE_TRIGGER_BUCKET_INVALIDATE) &&
-+	    old_m.cached_sectors) {
-+		update_cached_sectors(c, fs_usage, ca->dev_idx,
-+				      -old_m.cached_sectors);
-+		trace_invalidate(ca, bucket_to_sector(ca, new.k->p.offset),
-+				 old_m.cached_sectors);
-+	}
-+
-+	return 0;
-+}
-+
-+#define checked_add(a, b)					\
-+({								\
-+	unsigned _res = (unsigned) (a) + (b);			\
-+	bool overflow = _res > U16_MAX;				\
-+	if (overflow)						\
-+		_res = U16_MAX;					\
-+	(a) = _res;						\
-+	overflow;						\
-+})
-+
-+static int __bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
-+				       size_t b, enum bch_data_type data_type,
-+				       unsigned sectors, bool gc)
-+{
-+	struct bucket *g = __bucket(ca, b, gc);
-+	struct bucket_mark old, new;
-+	bool overflow;
-+
-+	BUG_ON(data_type != BCH_DATA_sb &&
-+	       data_type != BCH_DATA_journal);
-+
-+	old = bucket_cmpxchg(g, new, ({
-+		new.data_type	= data_type;
-+		overflow = checked_add(new.dirty_sectors, sectors);
-+	}));
-+
-+	bch2_fs_inconsistent_on(old.data_type &&
-+				old.data_type != data_type, c,
-+		"different types of data in same bucket: %s, %s",
-+		bch2_data_types[old.data_type],
-+		bch2_data_types[data_type]);
-+
-+	bch2_fs_inconsistent_on(overflow, c,
-+		"bucket %u:%zu gen %u data type %s sector count overflow: %u + %u > U16_MAX",
-+		ca->dev_idx, b, new.gen,
-+		bch2_data_types[old.data_type ?: data_type],
-+		old.dirty_sectors, sectors);
-+
-+	if (c)
-+		bch2_dev_usage_update(c, ca, fs_usage_ptr(c, 0, gc),
-+				      old, new, gc);
-+
-+	return 0;
-+}
-+
-+void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
-+			       size_t b, enum bch_data_type type,
-+			       unsigned sectors, struct gc_pos pos,
-+			       unsigned flags)
-+{
-+	BUG_ON(type != BCH_DATA_sb &&
-+	       type != BCH_DATA_journal);
-+
-+	preempt_disable();
-+
-+	if (likely(c)) {
-+		do_mark_fn(__bch2_mark_metadata_bucket, c, pos, flags,
-+			   ca, b, type, sectors);
-+	} else {
-+		__bch2_mark_metadata_bucket(c, ca, b, type, sectors, 0);
-+	}
-+
-+	preempt_enable();
-+}
-+
-+static s64 disk_sectors_scaled(unsigned n, unsigned d, unsigned sectors)
-+{
-+	return DIV_ROUND_UP(sectors * n, d);
-+}
-+
-+static s64 __ptr_disk_sectors_delta(unsigned old_size,
-+				    unsigned offset, s64 delta,
-+				    unsigned flags,
-+				    unsigned n, unsigned d)
-+{
-+	BUG_ON(!n || !d);
-+
-+	if (flags & BTREE_TRIGGER_OVERWRITE_SPLIT) {
-+		BUG_ON(offset + -delta > old_size);
-+
-+		return -disk_sectors_scaled(n, d, old_size) +
-+			disk_sectors_scaled(n, d, offset) +
-+			disk_sectors_scaled(n, d, old_size - offset + delta);
-+	} else if (flags & BTREE_TRIGGER_OVERWRITE) {
-+		BUG_ON(offset + -delta > old_size);
-+
-+		return -disk_sectors_scaled(n, d, old_size) +
-+			disk_sectors_scaled(n, d, old_size + delta);
-+	} else {
-+		return  disk_sectors_scaled(n, d, delta);
-+	}
-+}
-+
-+static s64 ptr_disk_sectors_delta(struct extent_ptr_decoded p,
-+				  unsigned offset, s64 delta,
-+				  unsigned flags)
-+{
-+	return __ptr_disk_sectors_delta(p.crc.live_size,
-+					offset, delta, flags,
-+					p.crc.compressed_size,
-+					p.crc.uncompressed_size);
-+}
-+
-+static int check_bucket_ref(struct bch_fs *c, struct bkey_s_c k,
-+			    const struct bch_extent_ptr *ptr,
-+			    s64 sectors, enum bch_data_type ptr_data_type,
-+			    u8 bucket_gen, u8 bucket_data_type,
-+			    u16 dirty_sectors, u16 cached_sectors)
-+{
-+	size_t bucket_nr = PTR_BUCKET_NR(bch_dev_bkey_exists(c, ptr->dev), ptr);
-+	u16 bucket_sectors = !ptr->cached
-+		? dirty_sectors
-+		: cached_sectors;
-+	char buf[200];
-+
-+	if (gen_after(ptr->gen, bucket_gen)) {
-+		bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
-+			"bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n"
-+			"while marking %s",
-+			ptr->dev, bucket_nr, bucket_gen,
-+			bch2_data_types[bucket_data_type ?: ptr_data_type],
-+			ptr->gen,
-+			(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
-+		return -EIO;
-+	}
-+
-+	if (gen_cmp(bucket_gen, ptr->gen) > BUCKET_GC_GEN_MAX) {
-+		bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
-+			"bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n"
-+			"while marking %s",
-+			ptr->dev, bucket_nr, bucket_gen,
-+			bch2_data_types[bucket_data_type ?: ptr_data_type],
-+			ptr->gen,
-+			(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
-+		return -EIO;
-+	}
-+
-+	if (bucket_gen != ptr->gen && !ptr->cached) {
-+		bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
-+			"bucket %u:%zu gen %u data type %s: stale dirty ptr (gen %u)\n"
-+			"while marking %s",
-+			ptr->dev, bucket_nr, bucket_gen,
-+			bch2_data_types[bucket_data_type ?: ptr_data_type],
-+			ptr->gen,
-+			(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
-+		return -EIO;
-+	}
-+
-+	if (bucket_gen != ptr->gen)
-+		return 1;
-+
-+	if (bucket_data_type && ptr_data_type &&
-+	    bucket_data_type != ptr_data_type) {
-+		bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
-+			"bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n"
-+			"while marking %s",
-+			ptr->dev, bucket_nr, bucket_gen,
-+			bch2_data_types[bucket_data_type],
-+			bch2_data_types[ptr_data_type],
-+			(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
-+		return -EIO;
-+	}
-+
-+	if ((unsigned) (bucket_sectors + sectors) > U16_MAX) {
-+		bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
-+			"bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U16_MAX\n"
-+			"while marking %s",
-+			ptr->dev, bucket_nr, bucket_gen,
-+			bch2_data_types[bucket_data_type ?: ptr_data_type],
-+			bucket_sectors, sectors,
-+			(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
-+		return -EIO;
-+	}
-+
-+	return 0;
-+}
-+
-+static int bucket_set_stripe(struct bch_fs *c, struct bkey_s_c k,
-+			     const struct bch_extent_ptr *ptr,
-+			     struct bch_fs_usage *fs_usage,
-+			     u64 journal_seq,
-+			     unsigned flags,
-+			     bool enabled)
-+{
-+	bool gc = flags & BTREE_TRIGGER_GC;
-+	struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-+	struct bucket *g = PTR_BUCKET(ca, ptr, gc);
-+	struct bucket_mark new, old;
-+	char buf[200];
-+	int ret;
-+
-+	old = bucket_cmpxchg(g, new, ({
-+		ret = check_bucket_ref(c, k, ptr, 0, 0, new.gen, new.data_type,
-+				       new.dirty_sectors, new.cached_sectors);
-+		if (ret)
-+			return ret;
-+
-+		if (new.stripe && enabled)
-+			bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
-+				      "bucket %u:%zu gen %u: multiple stripes using same bucket\n%s",
-+				      ptr->dev, PTR_BUCKET_NR(ca, ptr), new.gen,
-+				      (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
-+
-+		if (!new.stripe && !enabled)
-+			bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
-+				      "bucket %u:%zu gen %u: deleting stripe but not marked\n%s",
-+				      ptr->dev, PTR_BUCKET_NR(ca, ptr), new.gen,
-+				      (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
-+
-+		new.stripe			= enabled;
-+		if (journal_seq) {
-+			new.journal_seq_valid	= 1;
-+			new.journal_seq		= journal_seq;
-+		}
-+	}));
-+
-+	bch2_dev_usage_update(c, ca, fs_usage, old, new, gc);
-+	return 0;
-+}
-+
-+static int __mark_pointer(struct bch_fs *c, struct bkey_s_c k,
-+			  const struct bch_extent_ptr *ptr,
-+			  s64 sectors, enum bch_data_type ptr_data_type,
-+			  u8 bucket_gen, u8 *bucket_data_type,
-+			  u16 *dirty_sectors, u16 *cached_sectors)
-+{
-+	u16 *dst_sectors = !ptr->cached
-+		? dirty_sectors
-+		: cached_sectors;
-+	int ret = check_bucket_ref(c, k, ptr, sectors, ptr_data_type,
-+				   bucket_gen, *bucket_data_type,
-+				   *dirty_sectors, *cached_sectors);
-+
-+	if (ret)
-+		return ret;
-+
-+	*dst_sectors += sectors;
-+	*bucket_data_type = *dirty_sectors || *cached_sectors
-+		? ptr_data_type : 0;
-+	return 0;
-+}
-+
-+static int bch2_mark_pointer(struct bch_fs *c, struct bkey_s_c k,
-+			     struct extent_ptr_decoded p,
-+			     s64 sectors, enum bch_data_type data_type,
-+			     struct bch_fs_usage *fs_usage,
-+			     u64 journal_seq, unsigned flags)
-+{
-+	bool gc = flags & BTREE_TRIGGER_GC;
-+	struct bucket_mark old, new;
-+	struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
-+	struct bucket *g = PTR_BUCKET(ca, &p.ptr, gc);
-+	u8 bucket_data_type;
-+	u64 v;
-+	int ret;
-+
-+	v = atomic64_read(&g->_mark.v);
-+	do {
-+		new.v.counter = old.v.counter = v;
-+		bucket_data_type = new.data_type;
-+
-+		ret = __mark_pointer(c, k, &p.ptr, sectors, data_type, new.gen,
-+				     &bucket_data_type,
-+				     &new.dirty_sectors,
-+				     &new.cached_sectors);
-+		if (ret)
-+			return ret;
-+
-+		new.data_type = bucket_data_type;
-+
-+		if (journal_seq) {
-+			new.journal_seq_valid = 1;
-+			new.journal_seq = journal_seq;
-+		}
-+
-+		if (flags & BTREE_TRIGGER_NOATOMIC) {
-+			g->_mark = new;
-+			break;
-+		}
-+	} while ((v = atomic64_cmpxchg(&g->_mark.v,
-+			      old.v.counter,
-+			      new.v.counter)) != old.v.counter);
-+
-+	bch2_dev_usage_update(c, ca, fs_usage, old, new, gc);
-+
-+	BUG_ON(!gc && bucket_became_unavailable(old, new));
-+
-+	return 0;
-+}
-+
-+static int bch2_mark_stripe_ptr(struct bch_fs *c,
-+				struct bch_extent_stripe_ptr p,
-+				enum bch_data_type data_type,
-+				struct bch_fs_usage *fs_usage,
-+				s64 sectors, unsigned flags,
-+				struct bch_replicas_padded *r,
-+				unsigned *nr_data,
-+				unsigned *nr_parity)
-+{
-+	bool gc = flags & BTREE_TRIGGER_GC;
-+	struct stripe *m;
-+	unsigned i, blocks_nonempty = 0;
-+
-+	m = genradix_ptr(&c->stripes[gc], p.idx);
-+
-+	spin_lock(&c->ec_stripes_heap_lock);
-+
-+	if (!m || !m->alive) {
-+		spin_unlock(&c->ec_stripes_heap_lock);
-+		bch_err_ratelimited(c, "pointer to nonexistent stripe %llu",
-+				    (u64) p.idx);
-+		return -EIO;
-+	}
-+
-+	BUG_ON(m->r.e.data_type != data_type);
-+
-+	*nr_data	= m->nr_blocks - m->nr_redundant;
-+	*nr_parity	= m->nr_redundant;
-+	*r = m->r;
-+
-+	m->block_sectors[p.block] += sectors;
-+
-+	for (i = 0; i < m->nr_blocks; i++)
-+		blocks_nonempty += m->block_sectors[i] != 0;
-+
-+	if (m->blocks_nonempty != blocks_nonempty) {
-+		m->blocks_nonempty = blocks_nonempty;
-+		if (!gc)
-+			bch2_stripes_heap_update(c, m, p.idx);
-+	}
-+
-+	spin_unlock(&c->ec_stripes_heap_lock);
-+
-+	return 0;
-+}
-+
-+static int bch2_mark_extent(struct bch_fs *c,
-+			    struct bkey_s_c old, struct bkey_s_c new,
-+			    unsigned offset, s64 sectors,
-+			    enum bch_data_type data_type,
-+			    struct bch_fs_usage *fs_usage,
-+			    unsigned journal_seq, unsigned flags)
-+{
-+	struct bkey_s_c k = flags & BTREE_TRIGGER_INSERT ? new : old;
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const union bch_extent_entry *entry;
-+	struct extent_ptr_decoded p;
-+	struct bch_replicas_padded r;
-+	s64 dirty_sectors = 0;
-+	bool stale;
-+	int ret;
-+
-+	r.e.data_type	= data_type;
-+	r.e.nr_devs	= 0;
-+	r.e.nr_required	= 1;
-+
-+	BUG_ON(!sectors);
-+
-+	bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
-+		s64 disk_sectors = data_type == BCH_DATA_btree
-+			? sectors
-+			: ptr_disk_sectors_delta(p, offset, sectors, flags);
-+
-+		ret = bch2_mark_pointer(c, k, p, disk_sectors, data_type,
-+					fs_usage, journal_seq, flags);
-+		if (ret < 0)
-+			return ret;
-+
-+		stale = ret > 0;
-+
-+		if (p.ptr.cached) {
-+			if (!stale)
-+				update_cached_sectors(c, fs_usage, p.ptr.dev,
-+						      disk_sectors);
-+		} else if (!p.has_ec) {
-+			dirty_sectors	       += disk_sectors;
-+			r.e.devs[r.e.nr_devs++]	= p.ptr.dev;
-+		} else {
-+			struct bch_replicas_padded ec_r;
-+			unsigned nr_data, nr_parity;
-+			s64 parity_sectors;
-+
-+			ret = bch2_mark_stripe_ptr(c, p.ec, data_type,
-+					fs_usage, disk_sectors, flags,
-+					&ec_r, &nr_data, &nr_parity);
-+			if (ret)
-+				return ret;
-+
-+			parity_sectors =
-+				__ptr_disk_sectors_delta(p.crc.live_size,
-+					offset, sectors, flags,
-+					p.crc.compressed_size * nr_parity,
-+					p.crc.uncompressed_size * nr_data);
-+
-+			update_replicas(c, fs_usage, &ec_r.e,
-+					disk_sectors + parity_sectors);
-+
-+			/*
-+			 * There may be other dirty pointers in this extent, but
-+			 * if so they're not required for mounting if we have an
-+			 * erasure coded pointer in this extent:
-+			 */
-+			r.e.nr_required = 0;
-+		}
-+	}
-+
-+	if (r.e.nr_devs)
-+		update_replicas(c, fs_usage, &r.e, dirty_sectors);
-+
-+	return 0;
-+}
-+
-+static int bch2_mark_stripe(struct bch_fs *c,
-+			    struct bkey_s_c old, struct bkey_s_c new,
-+			    struct bch_fs_usage *fs_usage,
-+			    u64 journal_seq, unsigned flags)
-+{
-+	bool gc = flags & BTREE_TRIGGER_GC;
-+	size_t idx = new.k->p.offset;
-+	const struct bch_stripe *old_s = old.k->type == KEY_TYPE_stripe
-+		? bkey_s_c_to_stripe(old).v : NULL;
-+	const struct bch_stripe *new_s = new.k->type == KEY_TYPE_stripe
-+		? bkey_s_c_to_stripe(new).v : NULL;
-+	struct stripe *m = genradix_ptr(&c->stripes[gc], idx);
-+	unsigned i;
-+	int ret;
-+
-+	if (!m || (old_s && !m->alive)) {
-+		bch_err_ratelimited(c, "error marking nonexistent stripe %zu",
-+				    idx);
-+		return -1;
-+	}
-+
-+	if (!new_s) {
-+		/* Deleting: */
-+		for (i = 0; i < old_s->nr_blocks; i++) {
-+			ret = bucket_set_stripe(c, old, old_s->ptrs + i, fs_usage,
-+						journal_seq, flags, false);
-+			if (ret)
-+				return ret;
-+		}
-+
-+		if (!gc && m->on_heap) {
-+			spin_lock(&c->ec_stripes_heap_lock);
-+			bch2_stripes_heap_del(c, m, idx);
-+			spin_unlock(&c->ec_stripes_heap_lock);
-+		}
-+
-+		memset(m, 0, sizeof(*m));
-+	} else {
-+		BUG_ON(old_s && new_s->nr_blocks != old_s->nr_blocks);
-+		BUG_ON(old_s && new_s->nr_redundant != old_s->nr_redundant);
-+
-+		for (i = 0; i < new_s->nr_blocks; i++) {
-+			if (!old_s ||
-+			    memcmp(new_s->ptrs + i,
-+				   old_s->ptrs + i,
-+				   sizeof(struct bch_extent_ptr))) {
-+
-+				if (old_s) {
-+					bucket_set_stripe(c, old, old_s->ptrs + i, fs_usage,
-+							  journal_seq, flags, false);
-+					if (ret)
-+						return ret;
-+				}
-+				ret = bucket_set_stripe(c, new, new_s->ptrs + i, fs_usage,
-+							journal_seq, flags, true);
-+				if (ret)
-+					return ret;
-+			}
-+		}
-+
-+		m->alive	= true;
-+		m->sectors	= le16_to_cpu(new_s->sectors);
-+		m->algorithm	= new_s->algorithm;
-+		m->nr_blocks	= new_s->nr_blocks;
-+		m->nr_redundant	= new_s->nr_redundant;
-+
-+		bch2_bkey_to_replicas(&m->r.e, new);
-+
-+		/* gc recalculates these fields: */
-+		if (!(flags & BTREE_TRIGGER_GC)) {
-+			m->blocks_nonempty = 0;
-+
-+			for (i = 0; i < new_s->nr_blocks; i++) {
-+				m->block_sectors[i] =
-+					stripe_blockcount_get(new_s, i);
-+				m->blocks_nonempty += !!m->block_sectors[i];
-+			}
-+		}
-+
-+		if (!gc) {
-+			spin_lock(&c->ec_stripes_heap_lock);
-+			bch2_stripes_heap_update(c, m, idx);
-+			spin_unlock(&c->ec_stripes_heap_lock);
-+		}
-+	}
-+
-+	return 0;
-+}
-+
-+static int bch2_mark_key_locked(struct bch_fs *c,
-+		   struct bkey_s_c old,
-+		   struct bkey_s_c new,
-+		   unsigned offset, s64 sectors,
-+		   struct bch_fs_usage *fs_usage,
-+		   u64 journal_seq, unsigned flags)
-+{
-+	struct bkey_s_c k = flags & BTREE_TRIGGER_INSERT ? new : old;
-+	int ret = 0;
-+
-+	BUG_ON(!(flags & (BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE)));
-+
-+	preempt_disable();
-+
-+	if (!fs_usage || (flags & BTREE_TRIGGER_GC))
-+		fs_usage = fs_usage_ptr(c, journal_seq,
-+					flags & BTREE_TRIGGER_GC);
-+
-+	switch (k.k->type) {
-+	case KEY_TYPE_alloc:
-+		ret = bch2_mark_alloc(c, old, new, fs_usage, journal_seq, flags);
-+		break;
-+	case KEY_TYPE_btree_ptr:
-+	case KEY_TYPE_btree_ptr_v2:
-+		sectors = !(flags & BTREE_TRIGGER_OVERWRITE)
-+			?  c->opts.btree_node_size
-+			: -c->opts.btree_node_size;
-+
-+		ret = bch2_mark_extent(c, old, new, offset, sectors,
-+				BCH_DATA_btree, fs_usage, journal_seq, flags);
-+		break;
-+	case KEY_TYPE_extent:
-+	case KEY_TYPE_reflink_v:
-+		ret = bch2_mark_extent(c, old, new, offset, sectors,
-+				BCH_DATA_user, fs_usage, journal_seq, flags);
-+		break;
-+	case KEY_TYPE_stripe:
-+		ret = bch2_mark_stripe(c, old, new, fs_usage, journal_seq, flags);
-+		break;
-+	case KEY_TYPE_inode:
-+		if (!(flags & BTREE_TRIGGER_OVERWRITE))
-+			fs_usage->nr_inodes++;
-+		else
-+			fs_usage->nr_inodes--;
-+		break;
-+	case KEY_TYPE_reservation: {
-+		unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
-+
-+		sectors *= replicas;
-+		replicas = clamp_t(unsigned, replicas, 1,
-+				   ARRAY_SIZE(fs_usage->persistent_reserved));
-+
-+		fs_usage->reserved				+= sectors;
-+		fs_usage->persistent_reserved[replicas - 1]	+= sectors;
-+		break;
-+	}
-+	}
-+
-+	preempt_enable();
-+
-+	return ret;
-+}
-+
-+int bch2_mark_key(struct bch_fs *c, struct bkey_s_c new,
-+		  unsigned offset, s64 sectors,
-+		  struct bch_fs_usage *fs_usage,
-+		  u64 journal_seq, unsigned flags)
-+{
-+	struct bkey deleted;
-+	struct bkey_s_c old = (struct bkey_s_c) { &deleted, NULL };
-+	int ret;
-+
-+	bkey_init(&deleted);
-+
-+	percpu_down_read(&c->mark_lock);
-+	ret = bch2_mark_key_locked(c, old, new, offset, sectors,
-+				   fs_usage, journal_seq,
-+				   BTREE_TRIGGER_INSERT|flags);
-+	percpu_up_read(&c->mark_lock);
-+
-+	return ret;
-+}
-+
-+int bch2_mark_update(struct btree_trans *trans,
-+		     struct btree_iter *iter,
-+		     struct bkey_i *new,
-+		     struct bch_fs_usage *fs_usage,
-+		     unsigned flags)
-+{
-+	struct bch_fs		*c = trans->c;
-+	struct btree		*b = iter_l(iter)->b;
-+	struct btree_node_iter	node_iter = iter_l(iter)->iter;
-+	struct bkey_packed	*_old;
-+	struct bkey_s_c		old;
-+	struct bkey		unpacked;
-+	int ret = 0;
-+
-+	if (unlikely(flags & BTREE_TRIGGER_NORUN))
-+		return 0;
-+
-+	if (!btree_node_type_needs_gc(iter->btree_id))
-+		return 0;
-+
-+	bkey_init(&unpacked);
-+	old = (struct bkey_s_c) { &unpacked, NULL };
-+
-+	if (!btree_node_type_is_extents(iter->btree_id)) {
-+		if (btree_iter_type(iter) != BTREE_ITER_CACHED) {
-+			_old = bch2_btree_node_iter_peek(&node_iter, b);
-+			if (_old)
-+				old = bkey_disassemble(b, _old, &unpacked);
-+		} else {
-+			struct bkey_cached *ck = (void *) iter->l[0].b;
-+
-+			if (ck->valid)
-+				old = bkey_i_to_s_c(ck->k);
-+		}
-+
-+		if (old.k->type == new->k.type) {
-+			bch2_mark_key_locked(c, old, bkey_i_to_s_c(new), 0, 0,
-+				fs_usage, trans->journal_res.seq,
-+				BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags);
-+
-+		} else {
-+			bch2_mark_key_locked(c, old, bkey_i_to_s_c(new), 0, 0,
-+				fs_usage, trans->journal_res.seq,
-+				BTREE_TRIGGER_INSERT|flags);
-+			bch2_mark_key_locked(c, old, bkey_i_to_s_c(new), 0, 0,
-+				fs_usage, trans->journal_res.seq,
-+				BTREE_TRIGGER_OVERWRITE|flags);
-+		}
-+	} else {
-+		BUG_ON(btree_iter_type(iter) == BTREE_ITER_CACHED);
-+		bch2_mark_key_locked(c, old, bkey_i_to_s_c(new),
-+			0, new->k.size,
-+			fs_usage, trans->journal_res.seq,
-+			BTREE_TRIGGER_INSERT|flags);
-+
-+		while ((_old = bch2_btree_node_iter_peek(&node_iter, b))) {
-+			unsigned offset = 0;
-+			s64 sectors;
-+
-+			old = bkey_disassemble(b, _old, &unpacked);
-+			sectors = -((s64) old.k->size);
-+
-+			flags |= BTREE_TRIGGER_OVERWRITE;
-+
-+			if (bkey_cmp(new->k.p, bkey_start_pos(old.k)) <= 0)
-+				return 0;
-+
-+			switch (bch2_extent_overlap(&new->k, old.k)) {
-+			case BCH_EXTENT_OVERLAP_ALL:
-+				offset = 0;
-+				sectors = -((s64) old.k->size);
-+				break;
-+			case BCH_EXTENT_OVERLAP_BACK:
-+				offset = bkey_start_offset(&new->k) -
-+					bkey_start_offset(old.k);
-+				sectors = bkey_start_offset(&new->k) -
-+					old.k->p.offset;
-+				break;
-+			case BCH_EXTENT_OVERLAP_FRONT:
-+				offset = 0;
-+				sectors = bkey_start_offset(old.k) -
-+					new->k.p.offset;
-+				break;
-+			case BCH_EXTENT_OVERLAP_MIDDLE:
-+				offset = bkey_start_offset(&new->k) -
-+					bkey_start_offset(old.k);
-+				sectors = -((s64) new->k.size);
-+				flags |= BTREE_TRIGGER_OVERWRITE_SPLIT;
-+				break;
-+			}
-+
-+			BUG_ON(sectors >= 0);
-+
-+			ret = bch2_mark_key_locked(c, old, bkey_i_to_s_c(new),
-+					offset, sectors, fs_usage,
-+					trans->journal_res.seq, flags) ?: 1;
-+			if (ret <= 0)
-+				break;
-+
-+			bch2_btree_node_iter_advance(&node_iter, b);
-+		}
-+	}
-+
-+	return ret;
-+}
-+
-+void bch2_trans_fs_usage_apply(struct btree_trans *trans,
-+			       struct bch_fs_usage *fs_usage)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct btree_insert_entry *i;
-+	static int warned_disk_usage = 0;
-+	u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0;
-+	char buf[200];
-+
-+	if (!bch2_fs_usage_apply(c, fs_usage, trans->disk_res,
-+				 trans->journal_res.seq) ||
-+	    warned_disk_usage ||
-+	    xchg(&warned_disk_usage, 1))
-+		return;
-+
-+	bch_err(c, "disk usage increased more than %llu sectors reserved",
-+		disk_res_sectors);
-+
-+	trans_for_each_update(trans, i) {
-+		pr_err("while inserting");
-+		bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(i->k));
-+		pr_err("%s", buf);
-+		pr_err("overlapping with");
-+
-+		if (btree_iter_type(i->iter) != BTREE_ITER_CACHED) {
-+			struct btree		*b = iter_l(i->iter)->b;
-+			struct btree_node_iter	node_iter = iter_l(i->iter)->iter;
-+			struct bkey_packed	*_k;
-+
-+			while ((_k = bch2_btree_node_iter_peek(&node_iter, b))) {
-+				struct bkey		unpacked;
-+				struct bkey_s_c		k;
-+
-+				pr_info("_k %px format %u", _k, _k->format);
-+				k = bkey_disassemble(b, _k, &unpacked);
-+
-+				if (btree_node_is_extents(b)
-+				    ? bkey_cmp(i->k->k.p, bkey_start_pos(k.k)) <= 0
-+				    : bkey_cmp(i->k->k.p, k.k->p))
-+					break;
-+
-+				bch2_bkey_val_to_text(&PBUF(buf), c, k);
-+				pr_err("%s", buf);
-+
-+				bch2_btree_node_iter_advance(&node_iter, b);
-+			}
-+		} else {
-+			struct bkey_cached *ck = (void *) i->iter->l[0].b;
-+
-+			if (ck->valid) {
-+				bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(ck->k));
-+				pr_err("%s", buf);
-+			}
-+		}
-+	}
-+}
-+
-+/* trans_mark: */
-+
-+static struct btree_iter *trans_get_update(struct btree_trans *trans,
-+			    enum btree_id btree_id, struct bpos pos,
-+			    struct bkey_s_c *k)
-+{
-+	struct btree_insert_entry *i;
-+
-+	trans_for_each_update(trans, i)
-+		if (i->iter->btree_id == btree_id &&
-+		    (btree_node_type_is_extents(btree_id)
-+		     ? bkey_cmp(pos, bkey_start_pos(&i->k->k)) >= 0 &&
-+		       bkey_cmp(pos, i->k->k.p) < 0
-+		     : !bkey_cmp(pos, i->iter->pos))) {
-+			*k = bkey_i_to_s_c(i->k);
-+			return i->iter;
-+		}
-+
-+	return NULL;
-+}
-+
-+static int trans_get_key(struct btree_trans *trans,
-+			 enum btree_id btree_id, struct bpos pos,
-+			 struct btree_iter **iter,
-+			 struct bkey_s_c *k)
-+{
-+	unsigned flags = btree_id != BTREE_ID_ALLOC
-+		? BTREE_ITER_SLOTS
-+		: BTREE_ITER_CACHED;
-+	int ret;
-+
-+	*iter = trans_get_update(trans, btree_id, pos, k);
-+	if (*iter)
-+		return 1;
-+
-+	*iter = bch2_trans_get_iter(trans, btree_id, pos,
-+				    flags|BTREE_ITER_INTENT);
-+	if (IS_ERR(*iter))
-+		return PTR_ERR(*iter);
-+
-+	*k = __bch2_btree_iter_peek(*iter, flags);
-+	ret = bkey_err(*k);
-+	if (ret)
-+		bch2_trans_iter_put(trans, *iter);
-+	return ret;
-+}
-+
-+static int bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter **_iter,
-+					 const struct bch_extent_ptr *ptr,
-+					 struct bkey_alloc_unpacked *u)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-+	struct bpos pos = POS(ptr->dev, PTR_BUCKET_NR(ca, ptr));
-+	struct bucket *g;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	iter = trans_get_update(trans, BTREE_ID_ALLOC, pos, &k);
-+	if (iter) {
-+		*u = bch2_alloc_unpack(k);
-+	} else {
-+		iter = bch2_trans_get_iter(trans, BTREE_ID_ALLOC, pos,
-+					   BTREE_ITER_CACHED|
-+					   BTREE_ITER_CACHED_NOFILL|
-+					   BTREE_ITER_INTENT);
-+		if (IS_ERR(iter))
-+			return PTR_ERR(iter);
-+
-+		ret = bch2_btree_iter_traverse(iter);
-+		if (ret) {
-+			bch2_trans_iter_put(trans, iter);
-+			return ret;
-+		}
-+
-+		percpu_down_read(&c->mark_lock);
-+		g = bucket(ca, pos.offset);
-+		*u = alloc_mem_to_key(g, READ_ONCE(g->mark));
-+		percpu_up_read(&c->mark_lock);
-+	}
-+
-+	*_iter = iter;
-+	return 0;
-+}
-+
-+static int bch2_trans_mark_pointer(struct btree_trans *trans,
-+			struct bkey_s_c k, struct extent_ptr_decoded p,
-+			s64 sectors, enum bch_data_type data_type)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct btree_iter *iter;
-+	struct bkey_alloc_unpacked u;
-+	struct bkey_i_alloc *a;
-+	int ret;
-+
-+	ret = bch2_trans_start_alloc_update(trans, &iter, &p.ptr, &u);
-+	if (ret)
-+		return ret;
-+
-+	ret = __mark_pointer(c, k, &p.ptr, sectors, data_type, u.gen, &u.data_type,
-+			     &u.dirty_sectors, &u.cached_sectors);
-+	if (ret)
-+		goto out;
-+
-+	a = bch2_trans_kmalloc(trans, BKEY_ALLOC_U64s_MAX * 8);
-+	ret = PTR_ERR_OR_ZERO(a);
-+	if (ret)
-+		goto out;
-+
-+	bkey_alloc_init(&a->k_i);
-+	a->k.p = iter->pos;
-+	bch2_alloc_pack(a, u);
-+	bch2_trans_update(trans, iter, &a->k_i, 0);
-+out:
-+	bch2_trans_iter_put(trans, iter);
-+	return ret;
-+}
-+
-+static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
-+			struct bch_extent_stripe_ptr p,
-+			s64 sectors, enum bch_data_type data_type,
-+			struct bch_replicas_padded *r,
-+			unsigned *nr_data,
-+			unsigned *nr_parity)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct bkey_i_stripe *s;
-+	int ret = 0;
-+
-+	ret = trans_get_key(trans, BTREE_ID_EC, POS(0, p.idx), &iter, &k);
-+	if (ret < 0)
-+		return ret;
-+
-+	if (k.k->type != KEY_TYPE_stripe) {
-+		bch2_fs_inconsistent(c,
-+			"pointer to nonexistent stripe %llu",
-+			(u64) p.idx);
-+		ret = -EIO;
-+		goto out;
-+	}
-+
-+	s = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
-+	ret = PTR_ERR_OR_ZERO(s);
-+	if (ret)
-+		goto out;
-+
-+	bkey_reassemble(&s->k_i, k);
-+
-+	stripe_blockcount_set(&s->v, p.block,
-+		stripe_blockcount_get(&s->v, p.block) +
-+		sectors);
-+
-+	*nr_data	= s->v.nr_blocks - s->v.nr_redundant;
-+	*nr_parity	= s->v.nr_redundant;
-+	bch2_bkey_to_replicas(&r->e, bkey_i_to_s_c(&s->k_i));
-+	bch2_trans_update(trans, iter, &s->k_i, 0);
-+out:
-+	bch2_trans_iter_put(trans, iter);
-+	return ret;
-+}
-+
-+static int bch2_trans_mark_extent(struct btree_trans *trans,
-+			struct bkey_s_c k, unsigned offset,
-+			s64 sectors, unsigned flags,
-+			enum bch_data_type data_type)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const union bch_extent_entry *entry;
-+	struct extent_ptr_decoded p;
-+	struct bch_replicas_padded r;
-+	s64 dirty_sectors = 0;
-+	bool stale;
-+	int ret;
-+
-+	r.e.data_type	= data_type;
-+	r.e.nr_devs	= 0;
-+	r.e.nr_required	= 1;
-+
-+	BUG_ON(!sectors);
-+
-+	bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
-+		s64 disk_sectors = data_type == BCH_DATA_btree
-+			? sectors
-+			: ptr_disk_sectors_delta(p, offset, sectors, flags);
-+
-+		ret = bch2_trans_mark_pointer(trans, k, p, disk_sectors,
-+					      data_type);
-+		if (ret < 0)
-+			return ret;
-+
-+		stale = ret > 0;
-+
-+		if (p.ptr.cached) {
-+			if (!stale)
-+				update_cached_sectors_list(trans, p.ptr.dev,
-+							   disk_sectors);
-+		} else if (!p.has_ec) {
-+			dirty_sectors	       += disk_sectors;
-+			r.e.devs[r.e.nr_devs++]	= p.ptr.dev;
-+		} else {
-+			struct bch_replicas_padded ec_r;
-+			unsigned nr_data, nr_parity;
-+			s64 parity_sectors;
-+
-+			ret = bch2_trans_mark_stripe_ptr(trans, p.ec,
-+					disk_sectors, data_type,
-+					&ec_r, &nr_data, &nr_parity);
-+			if (ret)
-+				return ret;
-+
-+			parity_sectors =
-+				__ptr_disk_sectors_delta(p.crc.live_size,
-+					offset, sectors, flags,
-+					p.crc.compressed_size * nr_parity,
-+					p.crc.uncompressed_size * nr_data);
-+
-+			update_replicas_list(trans, &ec_r.e,
-+					     disk_sectors + parity_sectors);
-+
-+			r.e.nr_required = 0;
-+		}
-+	}
-+
-+	if (r.e.nr_devs)
-+		update_replicas_list(trans, &r.e, dirty_sectors);
-+
-+	return 0;
-+}
-+
-+static int bch2_trans_mark_stripe(struct btree_trans *trans,
-+				  struct bkey_s_c k)
-+{
-+	const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
-+	struct bkey_alloc_unpacked u;
-+	struct bkey_i_alloc *a;
-+	struct btree_iter *iter;
-+	unsigned i;
-+	int ret = 0;
-+
-+	/*
-+	 * The allocator code doesn't necessarily update bucket gens in the
-+	 * btree when incrementing them, right before handing out new buckets -
-+	 * we just need to persist those updates here along with the new stripe:
-+	 */
-+
-+	for (i = 0; i < s->nr_blocks && !ret; i++) {
-+		ret = bch2_trans_start_alloc_update(trans, &iter,
-+						    &s->ptrs[i], &u);
-+		if (ret)
-+			break;
-+
-+		a = bch2_trans_kmalloc(trans, BKEY_ALLOC_U64s_MAX * 8);
-+		ret = PTR_ERR_OR_ZERO(a);
-+		if (ret)
-+			goto put_iter;
-+
-+		bkey_alloc_init(&a->k_i);
-+		a->k.p = iter->pos;
-+		bch2_alloc_pack(a, u);
-+		bch2_trans_update(trans, iter, &a->k_i, 0);
-+put_iter:
-+		bch2_trans_iter_put(trans, iter);
-+	}
-+
-+	return ret;
-+}
-+
-+static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
-+			struct bkey_s_c_reflink_p p,
-+			u64 idx, unsigned sectors,
-+			unsigned flags)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct bkey_i_reflink_v *r_v;
-+	s64 ret;
-+
-+	ret = trans_get_key(trans, BTREE_ID_REFLINK,
-+			    POS(0, idx), &iter, &k);
-+	if (ret < 0)
-+		return ret;
-+
-+	if (k.k->type != KEY_TYPE_reflink_v) {
-+		bch2_fs_inconsistent(c,
-+			"%llu:%llu len %u points to nonexistent indirect extent %llu",
-+			p.k->p.inode, p.k->p.offset, p.k->size, idx);
-+		ret = -EIO;
-+		goto err;
-+	}
-+
-+	if ((flags & BTREE_TRIGGER_OVERWRITE) &&
-+	    (bkey_start_offset(k.k) < idx ||
-+	     k.k->p.offset > idx + sectors))
-+		goto out;
-+
-+	sectors = k.k->p.offset - idx;
-+
-+	r_v = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
-+	ret = PTR_ERR_OR_ZERO(r_v);
-+	if (ret)
-+		goto err;
-+
-+	bkey_reassemble(&r_v->k_i, k);
-+
-+	le64_add_cpu(&r_v->v.refcount,
-+		     !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1);
-+
-+	if (!r_v->v.refcount) {
-+		r_v->k.type = KEY_TYPE_deleted;
-+		set_bkey_val_u64s(&r_v->k, 0);
-+	}
-+
-+	bch2_btree_iter_set_pos(iter, bkey_start_pos(k.k));
-+	BUG_ON(iter->uptodate > BTREE_ITER_NEED_PEEK);
-+
-+	bch2_trans_update(trans, iter, &r_v->k_i, 0);
-+out:
-+	ret = sectors;
-+err:
-+	bch2_trans_iter_put(trans, iter);
-+	return ret;
-+}
-+
-+static int bch2_trans_mark_reflink_p(struct btree_trans *trans,
-+			struct bkey_s_c_reflink_p p, unsigned offset,
-+			s64 sectors, unsigned flags)
-+{
-+	u64 idx = le64_to_cpu(p.v->idx) + offset;
-+	s64 ret = 0;
-+
-+	sectors = abs(sectors);
-+	BUG_ON(offset + sectors > p.k->size);
-+
-+	while (sectors) {
-+		ret = __bch2_trans_mark_reflink_p(trans, p, idx, sectors, flags);
-+		if (ret < 0)
-+			break;
-+
-+		idx += ret;
-+		sectors = max_t(s64, 0LL, sectors - ret);
-+		ret = 0;
-+	}
-+
-+	return ret;
-+}
-+
-+int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k,
-+			unsigned offset, s64 sectors, unsigned flags)
-+{
-+	struct replicas_delta_list *d;
-+	struct bch_fs *c = trans->c;
-+
-+	switch (k.k->type) {
-+	case KEY_TYPE_btree_ptr:
-+	case KEY_TYPE_btree_ptr_v2:
-+		sectors = !(flags & BTREE_TRIGGER_OVERWRITE)
-+			?  c->opts.btree_node_size
-+			: -c->opts.btree_node_size;
-+
-+		return bch2_trans_mark_extent(trans, k, offset, sectors,
-+					      flags, BCH_DATA_btree);
-+	case KEY_TYPE_extent:
-+	case KEY_TYPE_reflink_v:
-+		return bch2_trans_mark_extent(trans, k, offset, sectors,
-+					      flags, BCH_DATA_user);
-+	case KEY_TYPE_stripe:
-+		return bch2_trans_mark_stripe(trans, k);
-+	case KEY_TYPE_inode:
-+		d = replicas_deltas_realloc(trans, 0);
-+
-+		if (!(flags & BTREE_TRIGGER_OVERWRITE))
-+			d->nr_inodes++;
-+		else
-+			d->nr_inodes--;
-+		return 0;
-+	case KEY_TYPE_reservation: {
-+		unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
-+
-+		d = replicas_deltas_realloc(trans, 0);
-+
-+		sectors *= replicas;
-+		replicas = clamp_t(unsigned, replicas, 1,
-+				   ARRAY_SIZE(d->persistent_reserved));
-+
-+		d->persistent_reserved[replicas - 1] += sectors;
-+		return 0;
-+	}
-+	case KEY_TYPE_reflink_p:
-+		return bch2_trans_mark_reflink_p(trans,
-+					bkey_s_c_to_reflink_p(k),
-+					offset, sectors, flags);
-+	default:
-+		return 0;
-+	}
-+}
-+
-+int bch2_trans_mark_update(struct btree_trans *trans,
-+			   struct btree_iter *iter,
-+			   struct bkey_i *insert,
-+			   unsigned flags)
-+{
-+	struct btree		*b = iter_l(iter)->b;
-+	struct btree_node_iter	node_iter = iter_l(iter)->iter;
-+	struct bkey_packed	*_k;
-+	int ret;
-+
-+	if (unlikely(flags & BTREE_TRIGGER_NORUN))
-+		return 0;
-+
-+	if (!btree_node_type_needs_gc(iter->btree_id))
-+		return 0;
-+
-+	ret = bch2_trans_mark_key(trans, bkey_i_to_s_c(insert),
-+			0, insert->k.size, BTREE_TRIGGER_INSERT);
-+	if (ret)
-+		return ret;
-+
-+	if (btree_iter_type(iter) == BTREE_ITER_CACHED) {
-+		struct bkey_cached *ck = (void *) iter->l[0].b;
-+
-+		return bch2_trans_mark_key(trans, bkey_i_to_s_c(ck->k),
-+					   0, 0, BTREE_TRIGGER_OVERWRITE);
-+	}
-+
-+	while ((_k = bch2_btree_node_iter_peek(&node_iter, b))) {
-+		struct bkey		unpacked;
-+		struct bkey_s_c		k;
-+		unsigned		offset = 0;
-+		s64			sectors = 0;
-+		unsigned		flags = BTREE_TRIGGER_OVERWRITE;
-+
-+		k = bkey_disassemble(b, _k, &unpacked);
-+
-+		if (btree_node_is_extents(b)
-+		    ? bkey_cmp(insert->k.p, bkey_start_pos(k.k)) <= 0
-+		    : bkey_cmp(insert->k.p, k.k->p))
-+			break;
-+
-+		if (btree_node_is_extents(b)) {
-+			switch (bch2_extent_overlap(&insert->k, k.k)) {
-+			case BCH_EXTENT_OVERLAP_ALL:
-+				offset = 0;
-+				sectors = -((s64) k.k->size);
-+				break;
-+			case BCH_EXTENT_OVERLAP_BACK:
-+				offset = bkey_start_offset(&insert->k) -
-+					bkey_start_offset(k.k);
-+				sectors = bkey_start_offset(&insert->k) -
-+					k.k->p.offset;
-+				break;
-+			case BCH_EXTENT_OVERLAP_FRONT:
-+				offset = 0;
-+				sectors = bkey_start_offset(k.k) -
-+					insert->k.p.offset;
-+				break;
-+			case BCH_EXTENT_OVERLAP_MIDDLE:
-+				offset = bkey_start_offset(&insert->k) -
-+					bkey_start_offset(k.k);
-+				sectors = -((s64) insert->k.size);
-+				flags |= BTREE_TRIGGER_OVERWRITE_SPLIT;
-+				break;
-+			}
-+
-+			BUG_ON(sectors >= 0);
-+		}
-+
-+		ret = bch2_trans_mark_key(trans, k, offset, sectors, flags);
-+		if (ret)
-+			return ret;
-+
-+		bch2_btree_node_iter_advance(&node_iter, b);
-+	}
-+
-+	return 0;
-+}
-+
-+/* Disk reservations: */
-+
-+static u64 bch2_recalc_sectors_available(struct bch_fs *c)
-+{
-+	percpu_u64_set(&c->pcpu->sectors_available, 0);
-+
-+	return avail_factor(__bch2_fs_usage_read_short(c).free);
-+}
-+
-+void __bch2_disk_reservation_put(struct bch_fs *c, struct disk_reservation *res)
-+{
-+	percpu_down_read(&c->mark_lock);
-+	this_cpu_sub(c->usage[0]->online_reserved,
-+		     res->sectors);
-+	percpu_up_read(&c->mark_lock);
-+
-+	res->sectors = 0;
-+}
-+
-+#define SECTORS_CACHE	1024
-+
-+int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
-+			      unsigned sectors, int flags)
-+{
-+	struct bch_fs_pcpu *pcpu;
-+	u64 old, v, get;
-+	s64 sectors_available;
-+	int ret;
-+
-+	percpu_down_read(&c->mark_lock);
-+	preempt_disable();
-+	pcpu = this_cpu_ptr(c->pcpu);
-+
-+	if (sectors <= pcpu->sectors_available)
-+		goto out;
-+
-+	v = atomic64_read(&c->sectors_available);
-+	do {
-+		old = v;
-+		get = min((u64) sectors + SECTORS_CACHE, old);
-+
-+		if (get < sectors) {
-+			preempt_enable();
-+			percpu_up_read(&c->mark_lock);
-+			goto recalculate;
-+		}
-+	} while ((v = atomic64_cmpxchg(&c->sectors_available,
-+				       old, old - get)) != old);
-+
-+	pcpu->sectors_available		+= get;
-+
-+out:
-+	pcpu->sectors_available		-= sectors;
-+	this_cpu_add(c->usage[0]->online_reserved, sectors);
-+	res->sectors			+= sectors;
-+
-+	preempt_enable();
-+	percpu_up_read(&c->mark_lock);
-+	return 0;
-+
-+recalculate:
-+	percpu_down_write(&c->mark_lock);
-+
-+	sectors_available = bch2_recalc_sectors_available(c);
-+
-+	if (sectors <= sectors_available ||
-+	    (flags & BCH_DISK_RESERVATION_NOFAIL)) {
-+		atomic64_set(&c->sectors_available,
-+			     max_t(s64, 0, sectors_available - sectors));
-+		this_cpu_add(c->usage[0]->online_reserved, sectors);
-+		res->sectors			+= sectors;
-+		ret = 0;
-+	} else {
-+		atomic64_set(&c->sectors_available, sectors_available);
-+		ret = -ENOSPC;
-+	}
-+
-+	percpu_up_write(&c->mark_lock);
-+
-+	return ret;
-+}
-+
-+/* Startup/shutdown: */
-+
-+static void buckets_free_rcu(struct rcu_head *rcu)
-+{
-+	struct bucket_array *buckets =
-+		container_of(rcu, struct bucket_array, rcu);
-+
-+	kvpfree(buckets,
-+		sizeof(struct bucket_array) +
-+		buckets->nbuckets * sizeof(struct bucket));
-+}
-+
-+int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
-+{
-+	struct bucket_array *buckets = NULL, *old_buckets = NULL;
-+	unsigned long *buckets_nouse = NULL;
-+	alloc_fifo	free[RESERVE_NR];
-+	alloc_fifo	free_inc;
-+	alloc_heap	alloc_heap;
-+
-+	size_t btree_reserve	= DIV_ROUND_UP(BTREE_NODE_RESERVE,
-+			     ca->mi.bucket_size / c->opts.btree_node_size);
-+	/* XXX: these should be tunable */
-+	size_t reserve_none	= max_t(size_t, 1, nbuckets >> 9);
-+	size_t copygc_reserve	= max_t(size_t, 2, nbuckets >> 7);
-+	size_t free_inc_nr	= max(max_t(size_t, 1, nbuckets >> 12),
-+				      btree_reserve * 2);
-+	bool resize = ca->buckets[0] != NULL;
-+	int ret = -ENOMEM;
-+	unsigned i;
-+
-+	memset(&free,		0, sizeof(free));
-+	memset(&free_inc,	0, sizeof(free_inc));
-+	memset(&alloc_heap,	0, sizeof(alloc_heap));
-+
-+	if (!(buckets		= kvpmalloc(sizeof(struct bucket_array) +
-+					    nbuckets * sizeof(struct bucket),
-+					    GFP_KERNEL|__GFP_ZERO)) ||
-+	    !(buckets_nouse	= kvpmalloc(BITS_TO_LONGS(nbuckets) *
-+					    sizeof(unsigned long),
-+					    GFP_KERNEL|__GFP_ZERO)) ||
-+	    !init_fifo(&free[RESERVE_BTREE], btree_reserve, GFP_KERNEL) ||
-+	    !init_fifo(&free[RESERVE_MOVINGGC],
-+		       copygc_reserve, GFP_KERNEL) ||
-+	    !init_fifo(&free[RESERVE_NONE], reserve_none, GFP_KERNEL) ||
-+	    !init_fifo(&free_inc,	free_inc_nr, GFP_KERNEL) ||
-+	    !init_heap(&alloc_heap,	ALLOC_SCAN_BATCH(ca) << 1, GFP_KERNEL))
-+		goto err;
-+
-+	buckets->first_bucket	= ca->mi.first_bucket;
-+	buckets->nbuckets	= nbuckets;
-+
-+	bch2_copygc_stop(c);
-+
-+	if (resize) {
-+		down_write(&c->gc_lock);
-+		down_write(&ca->bucket_lock);
-+		percpu_down_write(&c->mark_lock);
-+	}
-+
-+	old_buckets = bucket_array(ca);
-+
-+	if (resize) {
-+		size_t n = min(buckets->nbuckets, old_buckets->nbuckets);
-+
-+		memcpy(buckets->b,
-+		       old_buckets->b,
-+		       n * sizeof(struct bucket));
-+		memcpy(buckets_nouse,
-+		       ca->buckets_nouse,
-+		       BITS_TO_LONGS(n) * sizeof(unsigned long));
-+	}
-+
-+	rcu_assign_pointer(ca->buckets[0], buckets);
-+	buckets = old_buckets;
-+
-+	swap(ca->buckets_nouse, buckets_nouse);
-+
-+	if (resize) {
-+		percpu_up_write(&c->mark_lock);
-+		up_write(&c->gc_lock);
-+	}
-+
-+	spin_lock(&c->freelist_lock);
-+	for (i = 0; i < RESERVE_NR; i++) {
-+		fifo_move(&free[i], &ca->free[i]);
-+		swap(ca->free[i], free[i]);
-+	}
-+	fifo_move(&free_inc, &ca->free_inc);
-+	swap(ca->free_inc, free_inc);
-+	spin_unlock(&c->freelist_lock);
-+
-+	/* with gc lock held, alloc_heap can't be in use: */
-+	swap(ca->alloc_heap, alloc_heap);
-+
-+	nbuckets = ca->mi.nbuckets;
-+
-+	if (resize)
-+		up_write(&ca->bucket_lock);
-+
-+	ret = 0;
-+err:
-+	free_heap(&alloc_heap);
-+	free_fifo(&free_inc);
-+	for (i = 0; i < RESERVE_NR; i++)
-+		free_fifo(&free[i]);
-+	kvpfree(buckets_nouse,
-+		BITS_TO_LONGS(nbuckets) * sizeof(unsigned long));
-+	if (buckets)
-+		call_rcu(&old_buckets->rcu, buckets_free_rcu);
-+
-+	return ret;
-+}
-+
-+void bch2_dev_buckets_free(struct bch_dev *ca)
-+{
-+	unsigned i;
-+
-+	free_heap(&ca->alloc_heap);
-+	free_fifo(&ca->free_inc);
-+	for (i = 0; i < RESERVE_NR; i++)
-+		free_fifo(&ca->free[i]);
-+	kvpfree(ca->buckets_nouse,
-+		BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long));
-+	kvpfree(rcu_dereference_protected(ca->buckets[0], 1),
-+		sizeof(struct bucket_array) +
-+		ca->mi.nbuckets * sizeof(struct bucket));
-+
-+	free_percpu(ca->usage[0]);
-+}
-+
-+int bch2_dev_buckets_alloc(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	if (!(ca->usage[0] = alloc_percpu(struct bch_dev_usage)))
-+		return -ENOMEM;
-+
-+	return bch2_dev_buckets_resize(c, ca, ca->mi.nbuckets);;
-+}
-diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h
-new file mode 100644
-index 000000000000..a3873becbb70
---- /dev/null
-+++ b/fs/bcachefs/buckets.h
-@@ -0,0 +1,318 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+/*
-+ * Code for manipulating bucket marks for garbage collection.
-+ *
-+ * Copyright 2014 Datera, Inc.
-+ */
-+
-+#ifndef _BUCKETS_H
-+#define _BUCKETS_H
-+
-+#include "buckets_types.h"
-+#include "super.h"
-+
-+#define for_each_bucket(_b, _buckets)				\
-+	for (_b = (_buckets)->b + (_buckets)->first_bucket;	\
-+	     _b < (_buckets)->b + (_buckets)->nbuckets; _b++)
-+
-+#define bucket_cmpxchg(g, new, expr)				\
-+({								\
-+	struct bucket *_g = g;					\
-+	u64 _v = atomic64_read(&(g)->_mark.v);			\
-+	struct bucket_mark _old;				\
-+								\
-+	do {							\
-+		(new).v.counter = _old.v.counter = _v;		\
-+		expr;						\
-+	} while ((_v = atomic64_cmpxchg(&(_g)->_mark.v,		\
-+			       _old.v.counter,			\
-+			       (new).v.counter)) != _old.v.counter);\
-+	_old;							\
-+})
-+
-+static inline struct bucket_array *__bucket_array(struct bch_dev *ca,
-+						  bool gc)
-+{
-+	return rcu_dereference_check(ca->buckets[gc],
-+				     !ca->fs ||
-+				     percpu_rwsem_is_held(&ca->fs->mark_lock) ||
-+				     lockdep_is_held(&ca->fs->gc_lock) ||
-+				     lockdep_is_held(&ca->bucket_lock));
-+}
-+
-+static inline struct bucket_array *bucket_array(struct bch_dev *ca)
-+{
-+	return __bucket_array(ca, false);
-+}
-+
-+static inline struct bucket *__bucket(struct bch_dev *ca, size_t b, bool gc)
-+{
-+	struct bucket_array *buckets = __bucket_array(ca, gc);
-+
-+	BUG_ON(b < buckets->first_bucket || b >= buckets->nbuckets);
-+	return buckets->b + b;
-+}
-+
-+static inline struct bucket *bucket(struct bch_dev *ca, size_t b)
-+{
-+	return __bucket(ca, b, false);
-+}
-+
-+static inline u16 bucket_last_io(struct bch_fs *c, struct bucket *g, int rw)
-+{
-+	return c->bucket_clock[rw].hand - g->io_time[rw];
-+}
-+
-+/*
-+ * bucket_gc_gen() returns the difference between the bucket's current gen and
-+ * the oldest gen of any pointer into that bucket in the btree.
-+ */
-+
-+static inline u8 bucket_gc_gen(struct bch_dev *ca, size_t b)
-+{
-+	struct bucket *g = bucket(ca, b);
-+
-+	return g->mark.gen - g->oldest_gen;
-+}
-+
-+static inline size_t PTR_BUCKET_NR(const struct bch_dev *ca,
-+				   const struct bch_extent_ptr *ptr)
-+{
-+	return sector_to_bucket(ca, ptr->offset);
-+}
-+
-+static inline struct bucket *PTR_BUCKET(struct bch_dev *ca,
-+					const struct bch_extent_ptr *ptr,
-+					bool gc)
-+{
-+	return __bucket(ca, PTR_BUCKET_NR(ca, ptr), gc);
-+}
-+
-+static inline enum bch_data_type ptr_data_type(const struct bkey *k,
-+					       const struct bch_extent_ptr *ptr)
-+{
-+	if (k->type == KEY_TYPE_btree_ptr ||
-+	    k->type == KEY_TYPE_btree_ptr_v2)
-+		return BCH_DATA_btree;
-+
-+	return ptr->cached ? BCH_DATA_cached : BCH_DATA_user;
-+}
-+
-+static inline struct bucket_mark ptr_bucket_mark(struct bch_dev *ca,
-+						 const struct bch_extent_ptr *ptr)
-+{
-+	struct bucket_mark m;
-+
-+	rcu_read_lock();
-+	m = READ_ONCE(PTR_BUCKET(ca, ptr, 0)->mark);
-+	rcu_read_unlock();
-+
-+	return m;
-+}
-+
-+static inline int gen_cmp(u8 a, u8 b)
-+{
-+	return (s8) (a - b);
-+}
-+
-+static inline int gen_after(u8 a, u8 b)
-+{
-+	int r = gen_cmp(a, b);
-+
-+	return r > 0 ? r : 0;
-+}
-+
-+/**
-+ * ptr_stale() - check if a pointer points into a bucket that has been
-+ * invalidated.
-+ */
-+static inline u8 ptr_stale(struct bch_dev *ca,
-+			   const struct bch_extent_ptr *ptr)
-+{
-+	return gen_after(ptr_bucket_mark(ca, ptr).gen, ptr->gen);
-+}
-+
-+static inline s64 __ptr_disk_sectors(struct extent_ptr_decoded p,
-+				     unsigned live_size)
-+{
-+	return live_size && p.crc.compression_type
-+		? max(1U, DIV_ROUND_UP(live_size * p.crc.compressed_size,
-+				       p.crc.uncompressed_size))
-+		: live_size;
-+}
-+
-+static inline s64 ptr_disk_sectors(struct extent_ptr_decoded p)
-+{
-+	return __ptr_disk_sectors(p, p.crc.live_size);
-+}
-+
-+/* bucket gc marks */
-+
-+static inline unsigned bucket_sectors_used(struct bucket_mark mark)
-+{
-+	return mark.dirty_sectors + mark.cached_sectors;
-+}
-+
-+static inline bool bucket_unused(struct bucket_mark mark)
-+{
-+	return !mark.owned_by_allocator &&
-+		!mark.data_type &&
-+		!bucket_sectors_used(mark);
-+}
-+
-+static inline bool is_available_bucket(struct bucket_mark mark)
-+{
-+	return (!mark.owned_by_allocator &&
-+		!mark.dirty_sectors &&
-+		!mark.stripe);
-+}
-+
-+static inline bool bucket_needs_journal_commit(struct bucket_mark m,
-+					       u16 last_seq_ondisk)
-+{
-+	return m.journal_seq_valid &&
-+		((s16) m.journal_seq - (s16) last_seq_ondisk > 0);
-+}
-+
-+/* Device usage: */
-+
-+struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *);
-+
-+void bch2_dev_usage_from_buckets(struct bch_fs *);
-+
-+static inline u64 __dev_buckets_available(struct bch_dev *ca,
-+					  struct bch_dev_usage stats)
-+{
-+	u64 total = ca->mi.nbuckets - ca->mi.first_bucket;
-+
-+	if (WARN_ONCE(stats.buckets_unavailable > total,
-+		      "buckets_unavailable overflow (%llu > %llu)\n",
-+		      stats.buckets_unavailable, total))
-+		return 0;
-+
-+	return total - stats.buckets_unavailable;
-+}
-+
-+/*
-+ * Number of reclaimable buckets - only for use by the allocator thread:
-+ */
-+static inline u64 dev_buckets_available(struct bch_dev *ca)
-+{
-+	return __dev_buckets_available(ca, bch2_dev_usage_read(ca));
-+}
-+
-+static inline u64 __dev_buckets_free(struct bch_dev *ca,
-+				     struct bch_dev_usage stats)
-+{
-+	return __dev_buckets_available(ca, stats) +
-+		fifo_used(&ca->free[RESERVE_NONE]) +
-+		fifo_used(&ca->free_inc);
-+}
-+
-+static inline u64 dev_buckets_free(struct bch_dev *ca)
-+{
-+	return __dev_buckets_free(ca, bch2_dev_usage_read(ca));
-+}
-+
-+/* Filesystem usage: */
-+
-+static inline unsigned fs_usage_u64s(struct bch_fs *c)
-+{
-+
-+	return sizeof(struct bch_fs_usage) / sizeof(u64) +
-+		READ_ONCE(c->replicas.nr);
-+}
-+
-+void bch2_fs_usage_scratch_put(struct bch_fs *, struct bch_fs_usage *);
-+struct bch_fs_usage *bch2_fs_usage_scratch_get(struct bch_fs *);
-+
-+u64 bch2_fs_usage_read_one(struct bch_fs *, u64 *);
-+
-+struct bch_fs_usage *bch2_fs_usage_read(struct bch_fs *);
-+
-+void bch2_fs_usage_acc_to_base(struct bch_fs *, unsigned);
-+
-+void bch2_fs_usage_to_text(struct printbuf *,
-+			   struct bch_fs *, struct bch_fs_usage *);
-+
-+u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage *);
-+
-+struct bch_fs_usage_short
-+bch2_fs_usage_read_short(struct bch_fs *);
-+
-+/* key/bucket marking: */
-+
-+void bch2_bucket_seq_cleanup(struct bch_fs *);
-+void bch2_fs_usage_initialize(struct bch_fs *);
-+
-+void bch2_invalidate_bucket(struct bch_fs *, struct bch_dev *,
-+			    size_t, struct bucket_mark *);
-+void bch2_mark_alloc_bucket(struct bch_fs *, struct bch_dev *,
-+			    size_t, bool, struct gc_pos, unsigned);
-+void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *,
-+			       size_t, enum bch_data_type, unsigned,
-+			       struct gc_pos, unsigned);
-+
-+int bch2_mark_key(struct bch_fs *, struct bkey_s_c, unsigned,
-+		  s64, struct bch_fs_usage *, u64, unsigned);
-+int bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *,
-+			struct disk_reservation *, unsigned);
-+
-+int bch2_mark_update(struct btree_trans *, struct btree_iter *,
-+		     struct bkey_i *, struct bch_fs_usage *, unsigned);
-+
-+int bch2_replicas_delta_list_apply(struct bch_fs *,
-+				   struct bch_fs_usage *,
-+				   struct replicas_delta_list *);
-+int bch2_trans_mark_key(struct btree_trans *, struct bkey_s_c,
-+			unsigned, s64, unsigned);
-+int bch2_trans_mark_update(struct btree_trans *, struct btree_iter *iter,
-+			   struct bkey_i *insert, unsigned);
-+void bch2_trans_fs_usage_apply(struct btree_trans *, struct bch_fs_usage *);
-+
-+/* disk reservations: */
-+
-+void __bch2_disk_reservation_put(struct bch_fs *, struct disk_reservation *);
-+
-+static inline void bch2_disk_reservation_put(struct bch_fs *c,
-+					     struct disk_reservation *res)
-+{
-+	if (res->sectors)
-+		__bch2_disk_reservation_put(c, res);
-+}
-+
-+#define BCH_DISK_RESERVATION_NOFAIL		(1 << 0)
-+
-+int bch2_disk_reservation_add(struct bch_fs *,
-+			     struct disk_reservation *,
-+			     unsigned, int);
-+
-+static inline struct disk_reservation
-+bch2_disk_reservation_init(struct bch_fs *c, unsigned nr_replicas)
-+{
-+	return (struct disk_reservation) {
-+		.sectors	= 0,
-+#if 0
-+		/* not used yet: */
-+		.gen		= c->capacity_gen,
-+#endif
-+		.nr_replicas	= nr_replicas,
-+	};
-+}
-+
-+static inline int bch2_disk_reservation_get(struct bch_fs *c,
-+					    struct disk_reservation *res,
-+					    unsigned sectors,
-+					    unsigned nr_replicas,
-+					    int flags)
-+{
-+	*res = bch2_disk_reservation_init(c, nr_replicas);
-+
-+	return bch2_disk_reservation_add(c, res, sectors * nr_replicas, flags);
-+}
-+
-+int bch2_dev_buckets_resize(struct bch_fs *, struct bch_dev *, u64);
-+void bch2_dev_buckets_free(struct bch_dev *);
-+int bch2_dev_buckets_alloc(struct bch_fs *, struct bch_dev *);
-+
-+#endif /* _BUCKETS_H */
-diff --git a/fs/bcachefs/buckets_types.h b/fs/bcachefs/buckets_types.h
-new file mode 100644
-index 000000000000..d5215b14d7d9
---- /dev/null
-+++ b/fs/bcachefs/buckets_types.h
-@@ -0,0 +1,135 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BUCKETS_TYPES_H
-+#define _BUCKETS_TYPES_H
-+
-+#include "bcachefs_format.h"
-+#include "util.h"
-+
-+#define BUCKET_JOURNAL_SEQ_BITS		16
-+
-+struct bucket_mark {
-+	union {
-+	atomic64_t	v;
-+
-+	struct {
-+	u8		gen;
-+	u8		data_type:3,
-+			owned_by_allocator:1,
-+			journal_seq_valid:1,
-+			stripe:1;
-+	u16		dirty_sectors;
-+	u16		cached_sectors;
-+
-+	/*
-+	 * low bits of journal sequence number when this bucket was most
-+	 * recently modified: if journal_seq_valid is set, this bucket can't be
-+	 * reused until the journal sequence number written to disk is >= the
-+	 * bucket's journal sequence number:
-+	 */
-+	u16		journal_seq;
-+	};
-+	};
-+};
-+
-+struct bucket {
-+	union {
-+		struct bucket_mark	_mark;
-+		const struct bucket_mark mark;
-+	};
-+
-+	u16				io_time[2];
-+	u8				oldest_gen;
-+	u8				gc_gen;
-+	unsigned			gen_valid:1;
-+};
-+
-+struct bucket_array {
-+	struct rcu_head		rcu;
-+	u16			first_bucket;
-+	size_t			nbuckets;
-+	struct bucket		b[];
-+};
-+
-+struct bch_dev_usage {
-+	u64			buckets[BCH_DATA_NR];
-+	u64			buckets_alloc;
-+	u64			buckets_unavailable;
-+
-+	/* _compressed_ sectors: */
-+	u64			sectors[BCH_DATA_NR];
-+	u64			sectors_fragmented;
-+
-+	u64			buckets_ec;
-+	u64			sectors_ec;
-+};
-+
-+struct bch_fs_usage {
-+	/* all fields are in units of 512 byte sectors: */
-+
-+	u64			online_reserved;
-+
-+	/* fields after online_reserved are cleared/recalculated by gc: */
-+	u64			gc_start[0];
-+
-+	u64			hidden;
-+	u64			btree;
-+	u64			data;
-+	u64			cached;
-+	u64			reserved;
-+	u64			nr_inodes;
-+
-+	/* XXX: add stats for compression ratio */
-+#if 0
-+	u64			uncompressed;
-+	u64			compressed;
-+#endif
-+
-+	/* broken out: */
-+
-+	u64			persistent_reserved[BCH_REPLICAS_MAX];
-+	u64			replicas[];
-+};
-+
-+struct bch_fs_usage_short {
-+	u64			capacity;
-+	u64			used;
-+	u64			free;
-+	u64			nr_inodes;
-+};
-+
-+struct replicas_delta {
-+	s64			delta;
-+	struct bch_replicas_entry r;
-+} __packed;
-+
-+struct replicas_delta_list {
-+	unsigned		size;
-+	unsigned		used;
-+
-+	struct			{} memset_start;
-+	u64			nr_inodes;
-+	u64			persistent_reserved[BCH_REPLICAS_MAX];
-+	struct			{} memset_end;
-+	struct replicas_delta	d[0];
-+};
-+
-+/*
-+ * A reservation for space on disk:
-+ */
-+struct disk_reservation {
-+	u64			sectors;
-+	u32			gen;
-+	unsigned		nr_replicas;
-+};
-+
-+struct copygc_heap_entry {
-+	u8			dev;
-+	u8			gen;
-+	u16			fragmentation;
-+	u32			sectors;
-+	u64			offset;
-+};
-+
-+typedef HEAP(struct copygc_heap_entry) copygc_heap;
-+
-+#endif /* _BUCKETS_TYPES_H */
-diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c
-new file mode 100644
-index 000000000000..0377f9018d27
---- /dev/null
-+++ b/fs/bcachefs/chardev.c
-@@ -0,0 +1,704 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#ifndef NO_BCACHEFS_CHARDEV
-+
-+#include "bcachefs.h"
-+#include "bcachefs_ioctl.h"
-+#include "buckets.h"
-+#include "chardev.h"
-+#include "move.h"
-+#include "replicas.h"
-+#include "super.h"
-+#include "super-io.h"
-+
-+#include <linux/anon_inodes.h>
-+#include <linux/cdev.h>
-+#include <linux/device.h>
-+#include <linux/file.h>
-+#include <linux/fs.h>
-+#include <linux/ioctl.h>
-+#include <linux/kthread.h>
-+#include <linux/major.h>
-+#include <linux/sched/task.h>
-+#include <linux/slab.h>
-+#include <linux/uaccess.h>
-+
-+/* returns with ref on ca->ref */
-+static struct bch_dev *bch2_device_lookup(struct bch_fs *c, u64 dev,
-+					  unsigned flags)
-+{
-+	struct bch_dev *ca;
-+
-+	if (flags & BCH_BY_INDEX) {
-+		if (dev >= c->sb.nr_devices)
-+			return ERR_PTR(-EINVAL);
-+
-+		rcu_read_lock();
-+		ca = rcu_dereference(c->devs[dev]);
-+		if (ca)
-+			percpu_ref_get(&ca->ref);
-+		rcu_read_unlock();
-+
-+		if (!ca)
-+			return ERR_PTR(-EINVAL);
-+	} else {
-+		char *path;
-+
-+		path = strndup_user((const char __user *)
-+				    (unsigned long) dev, PATH_MAX);
-+		if (IS_ERR(path))
-+			return ERR_CAST(path);
-+
-+		ca = bch2_dev_lookup(c, path);
-+		kfree(path);
-+	}
-+
-+	return ca;
-+}
-+
-+#if 0
-+static long bch2_ioctl_assemble(struct bch_ioctl_assemble __user *user_arg)
-+{
-+	struct bch_ioctl_assemble arg;
-+	struct bch_fs *c;
-+	u64 *user_devs = NULL;
-+	char **devs = NULL;
-+	unsigned i;
-+	int ret = -EFAULT;
-+
-+	if (copy_from_user(&arg, user_arg, sizeof(arg)))
-+		return -EFAULT;
-+
-+	if (arg.flags || arg.pad)
-+		return -EINVAL;
-+
-+	user_devs = kmalloc_array(arg.nr_devs, sizeof(u64), GFP_KERNEL);
-+	if (!user_devs)
-+		return -ENOMEM;
-+
-+	devs = kcalloc(arg.nr_devs, sizeof(char *), GFP_KERNEL);
-+
-+	if (copy_from_user(user_devs, user_arg->devs,
-+			   sizeof(u64) * arg.nr_devs))
-+		goto err;
-+
-+	for (i = 0; i < arg.nr_devs; i++) {
-+		devs[i] = strndup_user((const char __user *)(unsigned long)
-+				       user_devs[i],
-+				       PATH_MAX);
-+		if (!devs[i]) {
-+			ret = -ENOMEM;
-+			goto err;
-+		}
-+	}
-+
-+	c = bch2_fs_open(devs, arg.nr_devs, bch2_opts_empty());
-+	ret = PTR_ERR_OR_ZERO(c);
-+	if (!ret)
-+		closure_put(&c->cl);
-+err:
-+	if (devs)
-+		for (i = 0; i < arg.nr_devs; i++)
-+			kfree(devs[i]);
-+	kfree(devs);
-+	return ret;
-+}
-+
-+static long bch2_ioctl_incremental(struct bch_ioctl_incremental __user *user_arg)
-+{
-+	struct bch_ioctl_incremental arg;
-+	const char *err;
-+	char *path;
-+
-+	if (copy_from_user(&arg, user_arg, sizeof(arg)))
-+		return -EFAULT;
-+
-+	if (arg.flags || arg.pad)
-+		return -EINVAL;
-+
-+	path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX);
-+	if (!path)
-+		return -ENOMEM;
-+
-+	err = bch2_fs_open_incremental(path);
-+	kfree(path);
-+
-+	if (err) {
-+		pr_err("Could not register bcachefs devices: %s", err);
-+		return -EINVAL;
-+	}
-+
-+	return 0;
-+}
-+#endif
-+
-+static long bch2_global_ioctl(unsigned cmd, void __user *arg)
-+{
-+	switch (cmd) {
-+#if 0
-+	case BCH_IOCTL_ASSEMBLE:
-+		return bch2_ioctl_assemble(arg);
-+	case BCH_IOCTL_INCREMENTAL:
-+		return bch2_ioctl_incremental(arg);
-+#endif
-+	default:
-+		return -ENOTTY;
-+	}
-+}
-+
-+static long bch2_ioctl_query_uuid(struct bch_fs *c,
-+			struct bch_ioctl_query_uuid __user *user_arg)
-+{
-+	return copy_to_user(&user_arg->uuid,
-+			    &c->sb.user_uuid,
-+			    sizeof(c->sb.user_uuid));
-+}
-+
-+#if 0
-+static long bch2_ioctl_start(struct bch_fs *c, struct bch_ioctl_start arg)
-+{
-+	if (arg.flags || arg.pad)
-+		return -EINVAL;
-+
-+	return bch2_fs_start(c);
-+}
-+
-+static long bch2_ioctl_stop(struct bch_fs *c)
-+{
-+	bch2_fs_stop(c);
-+	return 0;
-+}
-+#endif
-+
-+static long bch2_ioctl_disk_add(struct bch_fs *c, struct bch_ioctl_disk arg)
-+{
-+	char *path;
-+	int ret;
-+
-+	if (arg.flags || arg.pad)
-+		return -EINVAL;
-+
-+	path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX);
-+	if (!path)
-+		return -ENOMEM;
-+
-+	ret = bch2_dev_add(c, path);
-+	kfree(path);
-+
-+	return ret;
-+}
-+
-+static long bch2_ioctl_disk_remove(struct bch_fs *c, struct bch_ioctl_disk arg)
-+{
-+	struct bch_dev *ca;
-+
-+	if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST|
-+			   BCH_FORCE_IF_METADATA_LOST|
-+			   BCH_FORCE_IF_DEGRADED|
-+			   BCH_BY_INDEX)) ||
-+	    arg.pad)
-+		return -EINVAL;
-+
-+	ca = bch2_device_lookup(c, arg.dev, arg.flags);
-+	if (IS_ERR(ca))
-+		return PTR_ERR(ca);
-+
-+	return bch2_dev_remove(c, ca, arg.flags);
-+}
-+
-+static long bch2_ioctl_disk_online(struct bch_fs *c, struct bch_ioctl_disk arg)
-+{
-+	char *path;
-+	int ret;
-+
-+	if (arg.flags || arg.pad)
-+		return -EINVAL;
-+
-+	path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX);
-+	if (!path)
-+		return -ENOMEM;
-+
-+	ret = bch2_dev_online(c, path);
-+	kfree(path);
-+	return ret;
-+}
-+
-+static long bch2_ioctl_disk_offline(struct bch_fs *c, struct bch_ioctl_disk arg)
-+{
-+	struct bch_dev *ca;
-+	int ret;
-+
-+	if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST|
-+			   BCH_FORCE_IF_METADATA_LOST|
-+			   BCH_FORCE_IF_DEGRADED|
-+			   BCH_BY_INDEX)) ||
-+	    arg.pad)
-+		return -EINVAL;
-+
-+	ca = bch2_device_lookup(c, arg.dev, arg.flags);
-+	if (IS_ERR(ca))
-+		return PTR_ERR(ca);
-+
-+	ret = bch2_dev_offline(c, ca, arg.flags);
-+	percpu_ref_put(&ca->ref);
-+	return ret;
-+}
-+
-+static long bch2_ioctl_disk_set_state(struct bch_fs *c,
-+			struct bch_ioctl_disk_set_state arg)
-+{
-+	struct bch_dev *ca;
-+	int ret;
-+
-+	if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST|
-+			   BCH_FORCE_IF_METADATA_LOST|
-+			   BCH_FORCE_IF_DEGRADED|
-+			   BCH_BY_INDEX)) ||
-+	    arg.pad[0] || arg.pad[1] || arg.pad[2])
-+		return -EINVAL;
-+
-+	ca = bch2_device_lookup(c, arg.dev, arg.flags);
-+	if (IS_ERR(ca))
-+		return PTR_ERR(ca);
-+
-+	ret = bch2_dev_set_state(c, ca, arg.new_state, arg.flags);
-+
-+	percpu_ref_put(&ca->ref);
-+	return ret;
-+}
-+
-+struct bch_data_ctx {
-+	struct bch_fs			*c;
-+	struct bch_ioctl_data		arg;
-+	struct bch_move_stats		stats;
-+
-+	int				ret;
-+
-+	struct task_struct		*thread;
-+};
-+
-+static int bch2_data_thread(void *arg)
-+{
-+	struct bch_data_ctx *ctx = arg;
-+
-+	ctx->ret = bch2_data_job(ctx->c, &ctx->stats, ctx->arg);
-+
-+	ctx->stats.data_type = U8_MAX;
-+	return 0;
-+}
-+
-+static int bch2_data_job_release(struct inode *inode, struct file *file)
-+{
-+	struct bch_data_ctx *ctx = file->private_data;
-+
-+	kthread_stop(ctx->thread);
-+	put_task_struct(ctx->thread);
-+	kfree(ctx);
-+	return 0;
-+}
-+
-+static ssize_t bch2_data_job_read(struct file *file, char __user *buf,
-+				  size_t len, loff_t *ppos)
-+{
-+	struct bch_data_ctx *ctx = file->private_data;
-+	struct bch_fs *c = ctx->c;
-+	struct bch_ioctl_data_event e = {
-+		.type			= BCH_DATA_EVENT_PROGRESS,
-+		.p.data_type		= ctx->stats.data_type,
-+		.p.btree_id		= ctx->stats.btree_id,
-+		.p.pos			= ctx->stats.pos,
-+		.p.sectors_done		= atomic64_read(&ctx->stats.sectors_seen),
-+		.p.sectors_total	= bch2_fs_usage_read_short(c).used,
-+	};
-+
-+	if (len < sizeof(e))
-+		return -EINVAL;
-+
-+	return copy_to_user(buf, &e, sizeof(e)) ?: sizeof(e);
-+}
-+
-+static const struct file_operations bcachefs_data_ops = {
-+	.release	= bch2_data_job_release,
-+	.read		= bch2_data_job_read,
-+	.llseek		= no_llseek,
-+};
-+
-+static long bch2_ioctl_data(struct bch_fs *c,
-+			    struct bch_ioctl_data arg)
-+{
-+	struct bch_data_ctx *ctx = NULL;
-+	struct file *file = NULL;
-+	unsigned flags = O_RDONLY|O_CLOEXEC|O_NONBLOCK;
-+	int ret, fd = -1;
-+
-+	if (arg.op >= BCH_DATA_OP_NR || arg.flags)
-+		return -EINVAL;
-+
-+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
-+	if (!ctx)
-+		return -ENOMEM;
-+
-+	ctx->c = c;
-+	ctx->arg = arg;
-+
-+	ctx->thread = kthread_create(bch2_data_thread, ctx, "[bcachefs]");
-+	if (IS_ERR(ctx->thread)) {
-+		ret = PTR_ERR(ctx->thread);
-+		goto err;
-+	}
-+
-+	ret = get_unused_fd_flags(flags);
-+	if (ret < 0)
-+		goto err;
-+	fd = ret;
-+
-+	file = anon_inode_getfile("[bcachefs]", &bcachefs_data_ops, ctx, flags);
-+	if (IS_ERR(file)) {
-+		ret = PTR_ERR(file);
-+		goto err;
-+	}
-+
-+	fd_install(fd, file);
-+
-+	get_task_struct(ctx->thread);
-+	wake_up_process(ctx->thread);
-+
-+	return fd;
-+err:
-+	if (fd >= 0)
-+		put_unused_fd(fd);
-+	if (!IS_ERR_OR_NULL(ctx->thread))
-+		kthread_stop(ctx->thread);
-+	kfree(ctx);
-+	return ret;
-+}
-+
-+static long bch2_ioctl_fs_usage(struct bch_fs *c,
-+				struct bch_ioctl_fs_usage __user *user_arg)
-+{
-+	struct bch_ioctl_fs_usage *arg = NULL;
-+	struct bch_replicas_usage *dst_e, *dst_end;
-+	struct bch_fs_usage *src;
-+	u32 replica_entries_bytes;
-+	unsigned i;
-+	int ret = 0;
-+
-+	if (!test_bit(BCH_FS_STARTED, &c->flags))
-+		return -EINVAL;
-+
-+	if (get_user(replica_entries_bytes, &user_arg->replica_entries_bytes))
-+		return -EFAULT;
-+
-+	arg = kzalloc(sizeof(*arg) + replica_entries_bytes, GFP_KERNEL);
-+	if (!arg)
-+		return -ENOMEM;
-+
-+	src = bch2_fs_usage_read(c);
-+	if (!src) {
-+		ret = -ENOMEM;
-+		goto err;
-+	}
-+
-+	arg->capacity		= c->capacity;
-+	arg->used		= bch2_fs_sectors_used(c, src);
-+	arg->online_reserved	= src->online_reserved;
-+
-+	for (i = 0; i < BCH_REPLICAS_MAX; i++)
-+		arg->persistent_reserved[i] = src->persistent_reserved[i];
-+
-+	dst_e	= arg->replicas;
-+	dst_end = (void *) arg->replicas + replica_entries_bytes;
-+
-+	for (i = 0; i < c->replicas.nr; i++) {
-+		struct bch_replicas_entry *src_e =
-+			cpu_replicas_entry(&c->replicas, i);
-+
-+		if (replicas_usage_next(dst_e) > dst_end) {
-+			ret = -ERANGE;
-+			break;
-+		}
-+
-+		dst_e->sectors		= src->replicas[i];
-+		dst_e->r		= *src_e;
-+
-+		/* recheck after setting nr_devs: */
-+		if (replicas_usage_next(dst_e) > dst_end) {
-+			ret = -ERANGE;
-+			break;
-+		}
-+
-+		memcpy(dst_e->r.devs, src_e->devs, src_e->nr_devs);
-+
-+		dst_e = replicas_usage_next(dst_e);
-+	}
-+
-+	arg->replica_entries_bytes = (void *) dst_e - (void *) arg->replicas;
-+
-+	percpu_up_read(&c->mark_lock);
-+	kfree(src);
-+
-+	if (!ret)
-+		ret = copy_to_user(user_arg, arg,
-+			sizeof(*arg) + arg->replica_entries_bytes);
-+err:
-+	kfree(arg);
-+	return ret;
-+}
-+
-+static long bch2_ioctl_dev_usage(struct bch_fs *c,
-+				 struct bch_ioctl_dev_usage __user *user_arg)
-+{
-+	struct bch_ioctl_dev_usage arg;
-+	struct bch_dev_usage src;
-+	struct bch_dev *ca;
-+	unsigned i;
-+
-+	if (!test_bit(BCH_FS_STARTED, &c->flags))
-+		return -EINVAL;
-+
-+	if (copy_from_user(&arg, user_arg, sizeof(arg)))
-+		return -EFAULT;
-+
-+	if ((arg.flags & ~BCH_BY_INDEX) ||
-+	    arg.pad[0] ||
-+	    arg.pad[1] ||
-+	    arg.pad[2])
-+		return -EINVAL;
-+
-+	ca = bch2_device_lookup(c, arg.dev, arg.flags);
-+	if (IS_ERR(ca))
-+		return PTR_ERR(ca);
-+
-+	src = bch2_dev_usage_read(ca);
-+
-+	arg.state		= ca->mi.state;
-+	arg.bucket_size		= ca->mi.bucket_size;
-+	arg.nr_buckets		= ca->mi.nbuckets - ca->mi.first_bucket;
-+	arg.available_buckets	= arg.nr_buckets - src.buckets_unavailable;
-+	arg.ec_buckets		= src.buckets_ec;
-+	arg.ec_sectors		= src.sectors_ec;
-+
-+	for (i = 0; i < BCH_DATA_NR; i++) {
-+		arg.buckets[i] = src.buckets[i];
-+		arg.sectors[i] = src.sectors[i];
-+	}
-+
-+	percpu_ref_put(&ca->ref);
-+
-+	return copy_to_user(user_arg, &arg, sizeof(arg));
-+}
-+
-+static long bch2_ioctl_read_super(struct bch_fs *c,
-+				  struct bch_ioctl_read_super arg)
-+{
-+	struct bch_dev *ca = NULL;
-+	struct bch_sb *sb;
-+	int ret = 0;
-+
-+	if ((arg.flags & ~(BCH_BY_INDEX|BCH_READ_DEV)) ||
-+	    arg.pad)
-+		return -EINVAL;
-+
-+	mutex_lock(&c->sb_lock);
-+
-+	if (arg.flags & BCH_READ_DEV) {
-+		ca = bch2_device_lookup(c, arg.dev, arg.flags);
-+
-+		if (IS_ERR(ca)) {
-+			ret = PTR_ERR(ca);
-+			goto err;
-+		}
-+
-+		sb = ca->disk_sb.sb;
-+	} else {
-+		sb = c->disk_sb.sb;
-+	}
-+
-+	if (vstruct_bytes(sb) > arg.size) {
-+		ret = -ERANGE;
-+		goto err;
-+	}
-+
-+	ret = copy_to_user((void __user *)(unsigned long)arg.sb,
-+			   sb, vstruct_bytes(sb));
-+err:
-+	if (ca)
-+		percpu_ref_put(&ca->ref);
-+	mutex_unlock(&c->sb_lock);
-+	return ret;
-+}
-+
-+static long bch2_ioctl_disk_get_idx(struct bch_fs *c,
-+				    struct bch_ioctl_disk_get_idx arg)
-+{
-+	dev_t dev = huge_decode_dev(arg.dev);
-+	struct bch_dev *ca;
-+	unsigned i;
-+
-+	for_each_online_member(ca, c, i)
-+		if (ca->disk_sb.bdev->bd_dev == dev) {
-+			percpu_ref_put(&ca->io_ref);
-+			return i;
-+		}
-+
-+	return -ENOENT;
-+}
-+
-+static long bch2_ioctl_disk_resize(struct bch_fs *c,
-+				   struct bch_ioctl_disk_resize arg)
-+{
-+	struct bch_dev *ca;
-+	int ret;
-+
-+	if ((arg.flags & ~BCH_BY_INDEX) ||
-+	    arg.pad)
-+		return -EINVAL;
-+
-+	ca = bch2_device_lookup(c, arg.dev, arg.flags);
-+	if (IS_ERR(ca))
-+		return PTR_ERR(ca);
-+
-+	ret = bch2_dev_resize(c, ca, arg.nbuckets);
-+
-+	percpu_ref_put(&ca->ref);
-+	return ret;
-+}
-+
-+#define BCH_IOCTL(_name, _argtype)					\
-+do {									\
-+	_argtype i;							\
-+									\
-+	if (copy_from_user(&i, arg, sizeof(i)))				\
-+		return -EFAULT;						\
-+	return bch2_ioctl_##_name(c, i);				\
-+} while (0)
-+
-+long bch2_fs_ioctl(struct bch_fs *c, unsigned cmd, void __user *arg)
-+{
-+	/* ioctls that don't require admin cap: */
-+	switch (cmd) {
-+	case BCH_IOCTL_QUERY_UUID:
-+		return bch2_ioctl_query_uuid(c, arg);
-+	case BCH_IOCTL_FS_USAGE:
-+		return bch2_ioctl_fs_usage(c, arg);
-+	case BCH_IOCTL_DEV_USAGE:
-+		return bch2_ioctl_dev_usage(c, arg);
-+	}
-+
-+	if (!capable(CAP_SYS_ADMIN))
-+		return -EPERM;
-+
-+	switch (cmd) {
-+#if 0
-+	case BCH_IOCTL_START:
-+		BCH_IOCTL(start, struct bch_ioctl_start);
-+	case BCH_IOCTL_STOP:
-+		return bch2_ioctl_stop(c);
-+#endif
-+	case BCH_IOCTL_READ_SUPER:
-+		BCH_IOCTL(read_super, struct bch_ioctl_read_super);
-+	case BCH_IOCTL_DISK_GET_IDX:
-+		BCH_IOCTL(disk_get_idx, struct bch_ioctl_disk_get_idx);
-+	}
-+
-+	if (!test_bit(BCH_FS_STARTED, &c->flags))
-+		return -EINVAL;
-+
-+	/* ioctls that do require admin cap: */
-+	switch (cmd) {
-+	case BCH_IOCTL_DISK_ADD:
-+		BCH_IOCTL(disk_add, struct bch_ioctl_disk);
-+	case BCH_IOCTL_DISK_REMOVE:
-+		BCH_IOCTL(disk_remove, struct bch_ioctl_disk);
-+	case BCH_IOCTL_DISK_ONLINE:
-+		BCH_IOCTL(disk_online, struct bch_ioctl_disk);
-+	case BCH_IOCTL_DISK_OFFLINE:
-+		BCH_IOCTL(disk_offline, struct bch_ioctl_disk);
-+	case BCH_IOCTL_DISK_SET_STATE:
-+		BCH_IOCTL(disk_set_state, struct bch_ioctl_disk_set_state);
-+	case BCH_IOCTL_DATA:
-+		BCH_IOCTL(data, struct bch_ioctl_data);
-+	case BCH_IOCTL_DISK_RESIZE:
-+		BCH_IOCTL(disk_resize, struct bch_ioctl_disk_resize);
-+
-+	default:
-+		return -ENOTTY;
-+	}
-+}
-+
-+static DEFINE_IDR(bch_chardev_minor);
-+
-+static long bch2_chardev_ioctl(struct file *filp, unsigned cmd, unsigned long v)
-+{
-+	unsigned minor = iminor(file_inode(filp));
-+	struct bch_fs *c = minor < U8_MAX ? idr_find(&bch_chardev_minor, minor) : NULL;
-+	void __user *arg = (void __user *) v;
-+
-+	return c
-+		? bch2_fs_ioctl(c, cmd, arg)
-+		: bch2_global_ioctl(cmd, arg);
-+}
-+
-+static const struct file_operations bch_chardev_fops = {
-+	.owner		= THIS_MODULE,
-+	.unlocked_ioctl = bch2_chardev_ioctl,
-+	.open		= nonseekable_open,
-+};
-+
-+static int bch_chardev_major;
-+static struct class *bch_chardev_class;
-+static struct device *bch_chardev;
-+
-+void bch2_fs_chardev_exit(struct bch_fs *c)
-+{
-+	if (!IS_ERR_OR_NULL(c->chardev))
-+		device_unregister(c->chardev);
-+	if (c->minor >= 0)
-+		idr_remove(&bch_chardev_minor, c->minor);
-+}
-+
-+int bch2_fs_chardev_init(struct bch_fs *c)
-+{
-+	c->minor = idr_alloc(&bch_chardev_minor, c, 0, 0, GFP_KERNEL);
-+	if (c->minor < 0)
-+		return c->minor;
-+
-+	c->chardev = device_create(bch_chardev_class, NULL,
-+				   MKDEV(bch_chardev_major, c->minor), c,
-+				   "bcachefs%u-ctl", c->minor);
-+	if (IS_ERR(c->chardev))
-+		return PTR_ERR(c->chardev);
-+
-+	return 0;
-+}
-+
-+void bch2_chardev_exit(void)
-+{
-+	if (!IS_ERR_OR_NULL(bch_chardev_class))
-+		device_destroy(bch_chardev_class,
-+			       MKDEV(bch_chardev_major, U8_MAX));
-+	if (!IS_ERR_OR_NULL(bch_chardev_class))
-+		class_destroy(bch_chardev_class);
-+	if (bch_chardev_major > 0)
-+		unregister_chrdev(bch_chardev_major, "bcachefs");
-+}
-+
-+int __init bch2_chardev_init(void)
-+{
-+	bch_chardev_major = register_chrdev(0, "bcachefs-ctl", &bch_chardev_fops);
-+	if (bch_chardev_major < 0)
-+		return bch_chardev_major;
-+
-+	bch_chardev_class = class_create(THIS_MODULE, "bcachefs");
-+	if (IS_ERR(bch_chardev_class))
-+		return PTR_ERR(bch_chardev_class);
-+
-+	bch_chardev = device_create(bch_chardev_class, NULL,
-+				    MKDEV(bch_chardev_major, U8_MAX),
-+				    NULL, "bcachefs-ctl");
-+	if (IS_ERR(bch_chardev))
-+		return PTR_ERR(bch_chardev);
-+
-+	return 0;
-+}
-+
-+#endif /* NO_BCACHEFS_CHARDEV */
-diff --git a/fs/bcachefs/chardev.h b/fs/bcachefs/chardev.h
-new file mode 100644
-index 000000000000..3a4890d39ff9
---- /dev/null
-+++ b/fs/bcachefs/chardev.h
-@@ -0,0 +1,31 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_CHARDEV_H
-+#define _BCACHEFS_CHARDEV_H
-+
-+#ifndef NO_BCACHEFS_FS
-+
-+long bch2_fs_ioctl(struct bch_fs *, unsigned, void __user *);
-+
-+void bch2_fs_chardev_exit(struct bch_fs *);
-+int bch2_fs_chardev_init(struct bch_fs *);
-+
-+void bch2_chardev_exit(void);
-+int __init bch2_chardev_init(void);
-+
-+#else
-+
-+static inline long bch2_fs_ioctl(struct bch_fs *c,
-+				unsigned cmd, void __user * arg)
-+{
-+	return -ENOSYS;
-+}
-+
-+static inline void bch2_fs_chardev_exit(struct bch_fs *c) {}
-+static inline int bch2_fs_chardev_init(struct bch_fs *c) { return 0; }
-+
-+static inline void bch2_chardev_exit(void) {}
-+static inline int __init bch2_chardev_init(void) { return 0; }
-+
-+#endif /* NO_BCACHEFS_FS */
-+
-+#endif /* _BCACHEFS_CHARDEV_H */
-diff --git a/fs/bcachefs/checksum.c b/fs/bcachefs/checksum.c
-new file mode 100644
-index 000000000000..3d88719ba86c
---- /dev/null
-+++ b/fs/bcachefs/checksum.c
-@@ -0,0 +1,618 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "checksum.h"
-+#include "super.h"
-+#include "super-io.h"
-+
-+#include <linux/crc32c.h>
-+#include <linux/crypto.h>
-+#include <linux/key.h>
-+#include <linux/random.h>
-+#include <linux/scatterlist.h>
-+#include <crypto/algapi.h>
-+#include <crypto/chacha.h>
-+#include <crypto/hash.h>
-+#include <crypto/poly1305.h>
-+#include <crypto/skcipher.h>
-+#include <keys/user-type.h>
-+
-+static u64 bch2_checksum_init(unsigned type)
-+{
-+	switch (type) {
-+	case BCH_CSUM_NONE:
-+		return 0;
-+	case BCH_CSUM_CRC32C_NONZERO:
-+		return U32_MAX;
-+	case BCH_CSUM_CRC64_NONZERO:
-+		return U64_MAX;
-+	case BCH_CSUM_CRC32C:
-+		return 0;
-+	case BCH_CSUM_CRC64:
-+		return 0;
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static u64 bch2_checksum_final(unsigned type, u64 crc)
-+{
-+	switch (type) {
-+	case BCH_CSUM_NONE:
-+		return 0;
-+	case BCH_CSUM_CRC32C_NONZERO:
-+		return crc ^ U32_MAX;
-+	case BCH_CSUM_CRC64_NONZERO:
-+		return crc ^ U64_MAX;
-+	case BCH_CSUM_CRC32C:
-+		return crc;
-+	case BCH_CSUM_CRC64:
-+		return crc;
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static u64 bch2_checksum_update(unsigned type, u64 crc, const void *data, size_t len)
-+{
-+	switch (type) {
-+	case BCH_CSUM_NONE:
-+		return 0;
-+	case BCH_CSUM_CRC32C_NONZERO:
-+	case BCH_CSUM_CRC32C:
-+		return crc32c(crc, data, len);
-+	case BCH_CSUM_CRC64_NONZERO:
-+	case BCH_CSUM_CRC64:
-+		return crc64_be(crc, data, len);
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static inline void do_encrypt_sg(struct crypto_sync_skcipher *tfm,
-+				 struct nonce nonce,
-+				 struct scatterlist *sg, size_t len)
-+{
-+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm);
-+	int ret;
-+
-+	skcipher_request_set_sync_tfm(req, tfm);
-+	skcipher_request_set_crypt(req, sg, sg, len, nonce.d);
-+
-+	ret = crypto_skcipher_encrypt(req);
-+	BUG_ON(ret);
-+}
-+
-+static inline void do_encrypt(struct crypto_sync_skcipher *tfm,
-+			      struct nonce nonce,
-+			      void *buf, size_t len)
-+{
-+	struct scatterlist sg;
-+
-+	sg_init_one(&sg, buf, len);
-+	do_encrypt_sg(tfm, nonce, &sg, len);
-+}
-+
-+int bch2_chacha_encrypt_key(struct bch_key *key, struct nonce nonce,
-+			    void *buf, size_t len)
-+{
-+	struct crypto_sync_skcipher *chacha20 =
-+		crypto_alloc_sync_skcipher("chacha20", 0, 0);
-+	int ret;
-+
-+	if (!chacha20) {
-+		pr_err("error requesting chacha20 module: %li", PTR_ERR(chacha20));
-+		return PTR_ERR(chacha20);
-+	}
-+
-+	ret = crypto_skcipher_setkey(&chacha20->base,
-+				     (void *) key, sizeof(*key));
-+	if (ret) {
-+		pr_err("crypto_skcipher_setkey() error: %i", ret);
-+		goto err;
-+	}
-+
-+	do_encrypt(chacha20, nonce, buf, len);
-+err:
-+	crypto_free_sync_skcipher(chacha20);
-+	return ret;
-+}
-+
-+static void gen_poly_key(struct bch_fs *c, struct shash_desc *desc,
-+			 struct nonce nonce)
-+{
-+	u8 key[POLY1305_KEY_SIZE];
-+
-+	nonce.d[3] ^= BCH_NONCE_POLY;
-+
-+	memset(key, 0, sizeof(key));
-+	do_encrypt(c->chacha20, nonce, key, sizeof(key));
-+
-+	desc->tfm = c->poly1305;
-+	crypto_shash_init(desc);
-+	crypto_shash_update(desc, key, sizeof(key));
-+}
-+
-+struct bch_csum bch2_checksum(struct bch_fs *c, unsigned type,
-+			      struct nonce nonce, const void *data, size_t len)
-+{
-+	switch (type) {
-+	case BCH_CSUM_NONE:
-+	case BCH_CSUM_CRC32C_NONZERO:
-+	case BCH_CSUM_CRC64_NONZERO:
-+	case BCH_CSUM_CRC32C:
-+	case BCH_CSUM_CRC64: {
-+		u64 crc = bch2_checksum_init(type);
-+
-+		crc = bch2_checksum_update(type, crc, data, len);
-+		crc = bch2_checksum_final(type, crc);
-+
-+		return (struct bch_csum) { .lo = cpu_to_le64(crc) };
-+	}
-+
-+	case BCH_CSUM_CHACHA20_POLY1305_80:
-+	case BCH_CSUM_CHACHA20_POLY1305_128: {
-+		SHASH_DESC_ON_STACK(desc, c->poly1305);
-+		u8 digest[POLY1305_DIGEST_SIZE];
-+		struct bch_csum ret = { 0 };
-+
-+		gen_poly_key(c, desc, nonce);
-+
-+		crypto_shash_update(desc, data, len);
-+		crypto_shash_final(desc, digest);
-+
-+		memcpy(&ret, digest, bch_crc_bytes[type]);
-+		return ret;
-+	}
-+	default:
-+		BUG();
-+	}
-+}
-+
-+void bch2_encrypt(struct bch_fs *c, unsigned type,
-+		  struct nonce nonce, void *data, size_t len)
-+{
-+	if (!bch2_csum_type_is_encryption(type))
-+		return;
-+
-+	do_encrypt(c->chacha20, nonce, data, len);
-+}
-+
-+static struct bch_csum __bch2_checksum_bio(struct bch_fs *c, unsigned type,
-+					   struct nonce nonce, struct bio *bio,
-+					   struct bvec_iter *iter)
-+{
-+	struct bio_vec bv;
-+
-+	switch (type) {
-+	case BCH_CSUM_NONE:
-+		return (struct bch_csum) { 0 };
-+	case BCH_CSUM_CRC32C_NONZERO:
-+	case BCH_CSUM_CRC64_NONZERO:
-+	case BCH_CSUM_CRC32C:
-+	case BCH_CSUM_CRC64: {
-+		u64 crc = bch2_checksum_init(type);
-+
-+#ifdef CONFIG_HIGHMEM
-+		__bio_for_each_segment(bv, bio, *iter, *iter) {
-+			void *p = kmap_atomic(bv.bv_page) + bv.bv_offset;
-+			crc = bch2_checksum_update(type,
-+				crc, p, bv.bv_len);
-+			kunmap_atomic(p);
-+		}
-+#else
-+		__bio_for_each_bvec(bv, bio, *iter, *iter)
-+			crc = bch2_checksum_update(type, crc,
-+				page_address(bv.bv_page) + bv.bv_offset,
-+				bv.bv_len);
-+#endif
-+		crc = bch2_checksum_final(type, crc);
-+		return (struct bch_csum) { .lo = cpu_to_le64(crc) };
-+	}
-+
-+	case BCH_CSUM_CHACHA20_POLY1305_80:
-+	case BCH_CSUM_CHACHA20_POLY1305_128: {
-+		SHASH_DESC_ON_STACK(desc, c->poly1305);
-+		u8 digest[POLY1305_DIGEST_SIZE];
-+		struct bch_csum ret = { 0 };
-+
-+		gen_poly_key(c, desc, nonce);
-+
-+#ifdef CONFIG_HIGHMEM
-+		__bio_for_each_segment(bv, bio, *iter, *iter) {
-+			void *p = kmap_atomic(bv.bv_page) + bv.bv_offset;
-+
-+			crypto_shash_update(desc, p, bv.bv_len);
-+			kunmap_atomic(p);
-+		}
-+#else
-+		__bio_for_each_bvec(bv, bio, *iter, *iter)
-+			crypto_shash_update(desc,
-+				page_address(bv.bv_page) + bv.bv_offset,
-+				bv.bv_len);
-+#endif
-+		crypto_shash_final(desc, digest);
-+
-+		memcpy(&ret, digest, bch_crc_bytes[type]);
-+		return ret;
-+	}
-+	default:
-+		BUG();
-+	}
-+}
-+
-+struct bch_csum bch2_checksum_bio(struct bch_fs *c, unsigned type,
-+				  struct nonce nonce, struct bio *bio)
-+{
-+	struct bvec_iter iter = bio->bi_iter;
-+
-+	return __bch2_checksum_bio(c, type, nonce, bio, &iter);
-+}
-+
-+void bch2_encrypt_bio(struct bch_fs *c, unsigned type,
-+		      struct nonce nonce, struct bio *bio)
-+{
-+	struct bio_vec bv;
-+	struct bvec_iter iter;
-+	struct scatterlist sgl[16], *sg = sgl;
-+	size_t bytes = 0;
-+
-+	if (!bch2_csum_type_is_encryption(type))
-+		return;
-+
-+	sg_init_table(sgl, ARRAY_SIZE(sgl));
-+
-+	bio_for_each_segment(bv, bio, iter) {
-+		if (sg == sgl + ARRAY_SIZE(sgl)) {
-+			sg_mark_end(sg - 1);
-+			do_encrypt_sg(c->chacha20, nonce, sgl, bytes);
-+
-+			nonce = nonce_add(nonce, bytes);
-+			bytes = 0;
-+
-+			sg_init_table(sgl, ARRAY_SIZE(sgl));
-+			sg = sgl;
-+		}
-+
-+		sg_set_page(sg++, bv.bv_page, bv.bv_len, bv.bv_offset);
-+		bytes += bv.bv_len;
-+	}
-+
-+	sg_mark_end(sg - 1);
-+	do_encrypt_sg(c->chacha20, nonce, sgl, bytes);
-+}
-+
-+struct bch_csum bch2_checksum_merge(unsigned type, struct bch_csum a,
-+				    struct bch_csum b, size_t b_len)
-+{
-+	BUG_ON(!bch2_checksum_mergeable(type));
-+
-+	while (b_len) {
-+		unsigned b = min_t(unsigned, b_len, PAGE_SIZE);
-+
-+		a.lo = bch2_checksum_update(type, a.lo,
-+				page_address(ZERO_PAGE(0)), b);
-+		b_len -= b;
-+	}
-+
-+	a.lo ^= b.lo;
-+	a.hi ^= b.hi;
-+	return a;
-+}
-+
-+int bch2_rechecksum_bio(struct bch_fs *c, struct bio *bio,
-+			struct bversion version,
-+			struct bch_extent_crc_unpacked crc_old,
-+			struct bch_extent_crc_unpacked *crc_a,
-+			struct bch_extent_crc_unpacked *crc_b,
-+			unsigned len_a, unsigned len_b,
-+			unsigned new_csum_type)
-+{
-+	struct bvec_iter iter = bio->bi_iter;
-+	struct nonce nonce = extent_nonce(version, crc_old);
-+	struct bch_csum merged = { 0 };
-+	struct crc_split {
-+		struct bch_extent_crc_unpacked	*crc;
-+		unsigned			len;
-+		unsigned			csum_type;
-+		struct bch_csum			csum;
-+	} splits[3] = {
-+		{ crc_a, len_a, new_csum_type },
-+		{ crc_b, len_b, new_csum_type },
-+		{ NULL,	 bio_sectors(bio) - len_a - len_b, new_csum_type },
-+	}, *i;
-+	bool mergeable = crc_old.csum_type == new_csum_type &&
-+		bch2_checksum_mergeable(new_csum_type);
-+	unsigned crc_nonce = crc_old.nonce;
-+
-+	BUG_ON(len_a + len_b > bio_sectors(bio));
-+	BUG_ON(crc_old.uncompressed_size != bio_sectors(bio));
-+	BUG_ON(crc_is_compressed(crc_old));
-+	BUG_ON(bch2_csum_type_is_encryption(crc_old.csum_type) !=
-+	       bch2_csum_type_is_encryption(new_csum_type));
-+
-+	for (i = splits; i < splits + ARRAY_SIZE(splits); i++) {
-+		iter.bi_size = i->len << 9;
-+		if (mergeable || i->crc)
-+			i->csum = __bch2_checksum_bio(c, i->csum_type,
-+						      nonce, bio, &iter);
-+		else
-+			bio_advance_iter(bio, &iter, i->len << 9);
-+		nonce = nonce_add(nonce, i->len << 9);
-+	}
-+
-+	if (mergeable)
-+		for (i = splits; i < splits + ARRAY_SIZE(splits); i++)
-+			merged = bch2_checksum_merge(new_csum_type, merged,
-+						     i->csum, i->len << 9);
-+	else
-+		merged = bch2_checksum_bio(c, crc_old.csum_type,
-+				extent_nonce(version, crc_old), bio);
-+
-+	if (bch2_crc_cmp(merged, crc_old.csum))
-+		return -EIO;
-+
-+	for (i = splits; i < splits + ARRAY_SIZE(splits); i++) {
-+		if (i->crc)
-+			*i->crc = (struct bch_extent_crc_unpacked) {
-+				.csum_type		= i->csum_type,
-+				.compression_type	= crc_old.compression_type,
-+				.compressed_size	= i->len,
-+				.uncompressed_size	= i->len,
-+				.offset			= 0,
-+				.live_size		= i->len,
-+				.nonce			= crc_nonce,
-+				.csum			= i->csum,
-+			};
-+
-+		if (bch2_csum_type_is_encryption(new_csum_type))
-+			crc_nonce += i->len;
-+	}
-+
-+	return 0;
-+}
-+
-+#ifdef __KERNEL__
-+int bch2_request_key(struct bch_sb *sb, struct bch_key *key)
-+{
-+	char key_description[60];
-+	struct key *keyring_key;
-+	const struct user_key_payload *ukp;
-+	int ret;
-+
-+	snprintf(key_description, sizeof(key_description),
-+		 "bcachefs:%pUb", &sb->user_uuid);
-+
-+	keyring_key = request_key(&key_type_logon, key_description, NULL);
-+	if (IS_ERR(keyring_key))
-+		return PTR_ERR(keyring_key);
-+
-+	down_read(&keyring_key->sem);
-+	ukp = dereference_key_locked(keyring_key);
-+	if (ukp->datalen == sizeof(*key)) {
-+		memcpy(key, ukp->data, ukp->datalen);
-+		ret = 0;
-+	} else {
-+		ret = -EINVAL;
-+	}
-+	up_read(&keyring_key->sem);
-+	key_put(keyring_key);
-+
-+	return ret;
-+}
-+#else
-+#include <keyutils.h>
-+#include <uuid/uuid.h>
-+
-+int bch2_request_key(struct bch_sb *sb, struct bch_key *key)
-+{
-+	key_serial_t key_id;
-+	char key_description[60];
-+	char uuid[40];
-+
-+	uuid_unparse_lower(sb->user_uuid.b, uuid);
-+	sprintf(key_description, "bcachefs:%s", uuid);
-+
-+	key_id = request_key("user", key_description, NULL,
-+			     KEY_SPEC_USER_KEYRING);
-+	if (key_id < 0)
-+		return -errno;
-+
-+	if (keyctl_read(key_id, (void *) key, sizeof(*key)) != sizeof(*key))
-+		return -1;
-+
-+	return 0;
-+}
-+#endif
-+
-+int bch2_decrypt_sb_key(struct bch_fs *c,
-+			struct bch_sb_field_crypt *crypt,
-+			struct bch_key *key)
-+{
-+	struct bch_encrypted_key sb_key = crypt->key;
-+	struct bch_key user_key;
-+	int ret = 0;
-+
-+	/* is key encrypted? */
-+	if (!bch2_key_is_encrypted(&sb_key))
-+		goto out;
-+
-+	ret = bch2_request_key(c->disk_sb.sb, &user_key);
-+	if (ret) {
-+		bch_err(c, "error requesting encryption key: %i", ret);
-+		goto err;
-+	}
-+
-+	/* decrypt real key: */
-+	ret = bch2_chacha_encrypt_key(&user_key, bch2_sb_key_nonce(c),
-+			     &sb_key, sizeof(sb_key));
-+	if (ret)
-+		goto err;
-+
-+	if (bch2_key_is_encrypted(&sb_key)) {
-+		bch_err(c, "incorrect encryption key");
-+		ret = -EINVAL;
-+		goto err;
-+	}
-+out:
-+	*key = sb_key.key;
-+err:
-+	memzero_explicit(&sb_key, sizeof(sb_key));
-+	memzero_explicit(&user_key, sizeof(user_key));
-+	return ret;
-+}
-+
-+static int bch2_alloc_ciphers(struct bch_fs *c)
-+{
-+	if (!c->chacha20)
-+		c->chacha20 = crypto_alloc_sync_skcipher("chacha20", 0, 0);
-+	if (IS_ERR(c->chacha20)) {
-+		bch_err(c, "error requesting chacha20 module: %li",
-+			PTR_ERR(c->chacha20));
-+		return PTR_ERR(c->chacha20);
-+	}
-+
-+	if (!c->poly1305)
-+		c->poly1305 = crypto_alloc_shash("poly1305", 0, 0);
-+	if (IS_ERR(c->poly1305)) {
-+		bch_err(c, "error requesting poly1305 module: %li",
-+			PTR_ERR(c->poly1305));
-+		return PTR_ERR(c->poly1305);
-+	}
-+
-+	return 0;
-+}
-+
-+int bch2_disable_encryption(struct bch_fs *c)
-+{
-+	struct bch_sb_field_crypt *crypt;
-+	struct bch_key key;
-+	int ret = -EINVAL;
-+
-+	mutex_lock(&c->sb_lock);
-+
-+	crypt = bch2_sb_get_crypt(c->disk_sb.sb);
-+	if (!crypt)
-+		goto out;
-+
-+	/* is key encrypted? */
-+	ret = 0;
-+	if (bch2_key_is_encrypted(&crypt->key))
-+		goto out;
-+
-+	ret = bch2_decrypt_sb_key(c, crypt, &key);
-+	if (ret)
-+		goto out;
-+
-+	crypt->key.magic	= BCH_KEY_MAGIC;
-+	crypt->key.key		= key;
-+
-+	SET_BCH_SB_ENCRYPTION_TYPE(c->disk_sb.sb, 0);
-+	bch2_write_super(c);
-+out:
-+	mutex_unlock(&c->sb_lock);
-+
-+	return ret;
-+}
-+
-+int bch2_enable_encryption(struct bch_fs *c, bool keyed)
-+{
-+	struct bch_encrypted_key key;
-+	struct bch_key user_key;
-+	struct bch_sb_field_crypt *crypt;
-+	int ret = -EINVAL;
-+
-+	mutex_lock(&c->sb_lock);
-+
-+	/* Do we already have an encryption key? */
-+	if (bch2_sb_get_crypt(c->disk_sb.sb))
-+		goto err;
-+
-+	ret = bch2_alloc_ciphers(c);
-+	if (ret)
-+		goto err;
-+
-+	key.magic = BCH_KEY_MAGIC;
-+	get_random_bytes(&key.key, sizeof(key.key));
-+
-+	if (keyed) {
-+		ret = bch2_request_key(c->disk_sb.sb, &user_key);
-+		if (ret) {
-+			bch_err(c, "error requesting encryption key: %i", ret);
-+			goto err;
-+		}
-+
-+		ret = bch2_chacha_encrypt_key(&user_key, bch2_sb_key_nonce(c),
-+					      &key, sizeof(key));
-+		if (ret)
-+			goto err;
-+	}
-+
-+	ret = crypto_skcipher_setkey(&c->chacha20->base,
-+			(void *) &key.key, sizeof(key.key));
-+	if (ret)
-+		goto err;
-+
-+	crypt = bch2_sb_resize_crypt(&c->disk_sb, sizeof(*crypt) / sizeof(u64));
-+	if (!crypt) {
-+		ret = -ENOMEM; /* XXX this technically could be -ENOSPC */
-+		goto err;
-+	}
-+
-+	crypt->key = key;
-+
-+	/* write superblock */
-+	SET_BCH_SB_ENCRYPTION_TYPE(c->disk_sb.sb, 1);
-+	bch2_write_super(c);
-+err:
-+	mutex_unlock(&c->sb_lock);
-+	memzero_explicit(&user_key, sizeof(user_key));
-+	memzero_explicit(&key, sizeof(key));
-+	return ret;
-+}
-+
-+void bch2_fs_encryption_exit(struct bch_fs *c)
-+{
-+	if (!IS_ERR_OR_NULL(c->poly1305))
-+		crypto_free_shash(c->poly1305);
-+	if (!IS_ERR_OR_NULL(c->chacha20))
-+		crypto_free_sync_skcipher(c->chacha20);
-+	if (!IS_ERR_OR_NULL(c->sha256))
-+		crypto_free_shash(c->sha256);
-+}
-+
-+int bch2_fs_encryption_init(struct bch_fs *c)
-+{
-+	struct bch_sb_field_crypt *crypt;
-+	struct bch_key key;
-+	int ret = 0;
-+
-+	pr_verbose_init(c->opts, "");
-+
-+	c->sha256 = crypto_alloc_shash("sha256", 0, 0);
-+	if (IS_ERR(c->sha256)) {
-+		bch_err(c, "error requesting sha256 module");
-+		ret = PTR_ERR(c->sha256);
-+		goto out;
-+	}
-+
-+	crypt = bch2_sb_get_crypt(c->disk_sb.sb);
-+	if (!crypt)
-+		goto out;
-+
-+	ret = bch2_alloc_ciphers(c);
-+	if (ret)
-+		goto out;
-+
-+	ret = bch2_decrypt_sb_key(c, crypt, &key);
-+	if (ret)
-+		goto out;
-+
-+	ret = crypto_skcipher_setkey(&c->chacha20->base,
-+			(void *) &key.key, sizeof(key.key));
-+	if (ret)
-+		goto out;
-+out:
-+	memzero_explicit(&key, sizeof(key));
-+	pr_verbose_init(c->opts, "ret %i", ret);
-+	return ret;
-+}
-diff --git a/fs/bcachefs/checksum.h b/fs/bcachefs/checksum.h
-new file mode 100644
-index 000000000000..24dee8039d57
---- /dev/null
-+++ b/fs/bcachefs/checksum.h
-@@ -0,0 +1,202 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_CHECKSUM_H
-+#define _BCACHEFS_CHECKSUM_H
-+
-+#include "bcachefs.h"
-+#include "extents_types.h"
-+#include "super-io.h"
-+
-+#include <linux/crc64.h>
-+#include <crypto/chacha.h>
-+
-+static inline bool bch2_checksum_mergeable(unsigned type)
-+{
-+
-+	switch (type) {
-+	case BCH_CSUM_NONE:
-+	case BCH_CSUM_CRC32C:
-+	case BCH_CSUM_CRC64:
-+		return true;
-+	default:
-+		return false;
-+	}
-+}
-+
-+struct bch_csum bch2_checksum_merge(unsigned, struct bch_csum,
-+				    struct bch_csum, size_t);
-+
-+#define BCH_NONCE_EXTENT	cpu_to_le32(1 << 28)
-+#define BCH_NONCE_BTREE		cpu_to_le32(2 << 28)
-+#define BCH_NONCE_JOURNAL	cpu_to_le32(3 << 28)
-+#define BCH_NONCE_PRIO		cpu_to_le32(4 << 28)
-+#define BCH_NONCE_POLY		cpu_to_le32(1 << 31)
-+
-+struct bch_csum bch2_checksum(struct bch_fs *, unsigned, struct nonce,
-+			     const void *, size_t);
-+
-+/*
-+ * This is used for various on disk data structures - bch_sb, prio_set, bset,
-+ * jset: The checksum is _always_ the first field of these structs
-+ */
-+#define csum_vstruct(_c, _type, _nonce, _i)				\
-+({									\
-+	const void *start = ((const void *) (_i)) + sizeof((_i)->csum);	\
-+	const void *end = vstruct_end(_i);				\
-+									\
-+	bch2_checksum(_c, _type, _nonce, start, end - start);		\
-+})
-+
-+int bch2_chacha_encrypt_key(struct bch_key *, struct nonce, void *, size_t);
-+int bch2_request_key(struct bch_sb *, struct bch_key *);
-+
-+void bch2_encrypt(struct bch_fs *, unsigned, struct nonce,
-+		 void *data, size_t);
-+
-+struct bch_csum bch2_checksum_bio(struct bch_fs *, unsigned,
-+				  struct nonce, struct bio *);
-+
-+int bch2_rechecksum_bio(struct bch_fs *, struct bio *, struct bversion,
-+			struct bch_extent_crc_unpacked,
-+			struct bch_extent_crc_unpacked *,
-+			struct bch_extent_crc_unpacked *,
-+			unsigned, unsigned, unsigned);
-+
-+void bch2_encrypt_bio(struct bch_fs *, unsigned,
-+		    struct nonce, struct bio *);
-+
-+int bch2_decrypt_sb_key(struct bch_fs *, struct bch_sb_field_crypt *,
-+			struct bch_key *);
-+
-+int bch2_disable_encryption(struct bch_fs *);
-+int bch2_enable_encryption(struct bch_fs *, bool);
-+
-+void bch2_fs_encryption_exit(struct bch_fs *);
-+int bch2_fs_encryption_init(struct bch_fs *);
-+
-+static inline enum bch_csum_type bch2_csum_opt_to_type(enum bch_csum_opts type,
-+						       bool data)
-+{
-+	switch (type) {
-+	case BCH_CSUM_OPT_NONE:
-+	     return BCH_CSUM_NONE;
-+	case BCH_CSUM_OPT_CRC32C:
-+	     return data ? BCH_CSUM_CRC32C : BCH_CSUM_CRC32C_NONZERO;
-+	case BCH_CSUM_OPT_CRC64:
-+	     return data ? BCH_CSUM_CRC64 : BCH_CSUM_CRC64_NONZERO;
-+	default:
-+	     BUG();
-+	}
-+}
-+
-+static inline enum bch_csum_type bch2_data_checksum_type(struct bch_fs *c,
-+							 unsigned opt)
-+{
-+	if (c->sb.encryption_type)
-+		return c->opts.wide_macs
-+			? BCH_CSUM_CHACHA20_POLY1305_128
-+			: BCH_CSUM_CHACHA20_POLY1305_80;
-+
-+	return bch2_csum_opt_to_type(opt, true);
-+}
-+
-+static inline enum bch_csum_type bch2_meta_checksum_type(struct bch_fs *c)
-+{
-+	if (c->sb.encryption_type)
-+		return BCH_CSUM_CHACHA20_POLY1305_128;
-+
-+	return bch2_csum_opt_to_type(c->opts.metadata_checksum, false);
-+}
-+
-+static const unsigned bch2_compression_opt_to_type[] = {
-+#define x(t, n) [BCH_COMPRESSION_OPT_##t] = BCH_COMPRESSION_TYPE_##t,
-+	BCH_COMPRESSION_OPTS()
-+#undef x
-+};
-+
-+static inline bool bch2_checksum_type_valid(const struct bch_fs *c,
-+					   unsigned type)
-+{
-+	if (type >= BCH_CSUM_NR)
-+		return false;
-+
-+	if (bch2_csum_type_is_encryption(type) && !c->chacha20)
-+		return false;
-+
-+	return true;
-+}
-+
-+/* returns true if not equal */
-+static inline bool bch2_crc_cmp(struct bch_csum l, struct bch_csum r)
-+{
-+	/*
-+	 * XXX: need some way of preventing the compiler from optimizing this
-+	 * into a form that isn't constant time..
-+	 */
-+	return ((l.lo ^ r.lo) | (l.hi ^ r.hi)) != 0;
-+}
-+
-+/* for skipping ahead and encrypting/decrypting at an offset: */
-+static inline struct nonce nonce_add(struct nonce nonce, unsigned offset)
-+{
-+	EBUG_ON(offset & (CHACHA_BLOCK_SIZE - 1));
-+
-+	le32_add_cpu(&nonce.d[0], offset / CHACHA_BLOCK_SIZE);
-+	return nonce;
-+}
-+
-+static inline struct nonce null_nonce(void)
-+{
-+	struct nonce ret;
-+
-+	memset(&ret, 0, sizeof(ret));
-+	return ret;
-+}
-+
-+static inline struct nonce extent_nonce(struct bversion version,
-+					struct bch_extent_crc_unpacked crc)
-+{
-+	unsigned compression_type = crc_is_compressed(crc)
-+		? crc.compression_type
-+		: 0;
-+	unsigned size = compression_type ? crc.uncompressed_size : 0;
-+	struct nonce nonce = (struct nonce) {{
-+		[0] = cpu_to_le32(size << 22),
-+		[1] = cpu_to_le32(version.lo),
-+		[2] = cpu_to_le32(version.lo >> 32),
-+		[3] = cpu_to_le32(version.hi|
-+				  (compression_type << 24))^BCH_NONCE_EXTENT,
-+	}};
-+
-+	return nonce_add(nonce, crc.nonce << 9);
-+}
-+
-+static inline bool bch2_key_is_encrypted(struct bch_encrypted_key *key)
-+{
-+	return le64_to_cpu(key->magic) != BCH_KEY_MAGIC;
-+}
-+
-+static inline struct nonce __bch2_sb_key_nonce(struct bch_sb *sb)
-+{
-+	__le64 magic = __bch2_sb_magic(sb);
-+
-+	return (struct nonce) {{
-+		[0] = 0,
-+		[1] = 0,
-+		[2] = ((__le32 *) &magic)[0],
-+		[3] = ((__le32 *) &magic)[1],
-+	}};
-+}
-+
-+static inline struct nonce bch2_sb_key_nonce(struct bch_fs *c)
-+{
-+	__le64 magic = bch2_sb_magic(c);
-+
-+	return (struct nonce) {{
-+		[0] = 0,
-+		[1] = 0,
-+		[2] = ((__le32 *) &magic)[0],
-+		[3] = ((__le32 *) &magic)[1],
-+	}};
-+}
-+
-+#endif /* _BCACHEFS_CHECKSUM_H */
-diff --git a/fs/bcachefs/clock.c b/fs/bcachefs/clock.c
-new file mode 100644
-index 000000000000..1d1590de55e8
---- /dev/null
-+++ b/fs/bcachefs/clock.c
-@@ -0,0 +1,191 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "clock.h"
-+
-+#include <linux/freezer.h>
-+#include <linux/kthread.h>
-+#include <linux/preempt.h>
-+
-+static inline long io_timer_cmp(io_timer_heap *h,
-+				struct io_timer *l,
-+				struct io_timer *r)
-+{
-+	return l->expire - r->expire;
-+}
-+
-+void bch2_io_timer_add(struct io_clock *clock, struct io_timer *timer)
-+{
-+	size_t i;
-+
-+	spin_lock(&clock->timer_lock);
-+
-+	if (time_after_eq((unsigned long) atomic_long_read(&clock->now),
-+			  timer->expire)) {
-+		spin_unlock(&clock->timer_lock);
-+		timer->fn(timer);
-+		return;
-+	}
-+
-+	for (i = 0; i < clock->timers.used; i++)
-+		if (clock->timers.data[i] == timer)
-+			goto out;
-+
-+	BUG_ON(!heap_add(&clock->timers, timer, io_timer_cmp, NULL));
-+out:
-+	spin_unlock(&clock->timer_lock);
-+}
-+
-+void bch2_io_timer_del(struct io_clock *clock, struct io_timer *timer)
-+{
-+	size_t i;
-+
-+	spin_lock(&clock->timer_lock);
-+
-+	for (i = 0; i < clock->timers.used; i++)
-+		if (clock->timers.data[i] == timer) {
-+			heap_del(&clock->timers, i, io_timer_cmp, NULL);
-+			break;
-+		}
-+
-+	spin_unlock(&clock->timer_lock);
-+}
-+
-+struct io_clock_wait {
-+	struct io_timer		io_timer;
-+	struct timer_list	cpu_timer;
-+	struct task_struct	*task;
-+	int			expired;
-+};
-+
-+static void io_clock_wait_fn(struct io_timer *timer)
-+{
-+	struct io_clock_wait *wait = container_of(timer,
-+				struct io_clock_wait, io_timer);
-+
-+	wait->expired = 1;
-+	wake_up_process(wait->task);
-+}
-+
-+static void io_clock_cpu_timeout(struct timer_list *timer)
-+{
-+	struct io_clock_wait *wait = container_of(timer,
-+				struct io_clock_wait, cpu_timer);
-+
-+	wait->expired = 1;
-+	wake_up_process(wait->task);
-+}
-+
-+void bch2_io_clock_schedule_timeout(struct io_clock *clock, unsigned long until)
-+{
-+	struct io_clock_wait wait;
-+
-+	/* XXX: calculate sleep time rigorously */
-+	wait.io_timer.expire	= until;
-+	wait.io_timer.fn	= io_clock_wait_fn;
-+	wait.task		= current;
-+	wait.expired		= 0;
-+	bch2_io_timer_add(clock, &wait.io_timer);
-+
-+	schedule();
-+
-+	bch2_io_timer_del(clock, &wait.io_timer);
-+}
-+
-+void bch2_kthread_io_clock_wait(struct io_clock *clock,
-+				unsigned long io_until,
-+				unsigned long cpu_timeout)
-+{
-+	bool kthread = (current->flags & PF_KTHREAD) != 0;
-+	struct io_clock_wait wait;
-+
-+	wait.io_timer.expire	= io_until;
-+	wait.io_timer.fn	= io_clock_wait_fn;
-+	wait.task		= current;
-+	wait.expired		= 0;
-+	bch2_io_timer_add(clock, &wait.io_timer);
-+
-+	timer_setup_on_stack(&wait.cpu_timer, io_clock_cpu_timeout, 0);
-+
-+	if (cpu_timeout != MAX_SCHEDULE_TIMEOUT)
-+		mod_timer(&wait.cpu_timer, cpu_timeout + jiffies);
-+
-+	while (1) {
-+		set_current_state(TASK_INTERRUPTIBLE);
-+		if (kthread && kthread_should_stop())
-+			break;
-+
-+		if (wait.expired)
-+			break;
-+
-+		schedule();
-+		try_to_freeze();
-+	}
-+
-+	__set_current_state(TASK_RUNNING);
-+	del_singleshot_timer_sync(&wait.cpu_timer);
-+	destroy_timer_on_stack(&wait.cpu_timer);
-+	bch2_io_timer_del(clock, &wait.io_timer);
-+}
-+
-+static struct io_timer *get_expired_timer(struct io_clock *clock,
-+					  unsigned long now)
-+{
-+	struct io_timer *ret = NULL;
-+
-+	spin_lock(&clock->timer_lock);
-+
-+	if (clock->timers.used &&
-+	    time_after_eq(now, clock->timers.data[0]->expire))
-+		heap_pop(&clock->timers, ret, io_timer_cmp, NULL);
-+
-+	spin_unlock(&clock->timer_lock);
-+
-+	return ret;
-+}
-+
-+void __bch2_increment_clock(struct io_clock *clock, unsigned sectors)
-+{
-+	struct io_timer *timer;
-+	unsigned long now = atomic_long_add_return(sectors, &clock->now);
-+
-+	while ((timer = get_expired_timer(clock, now)))
-+		timer->fn(timer);
-+}
-+
-+void bch2_io_timers_to_text(struct printbuf *out, struct io_clock *clock)
-+{
-+	unsigned long now;
-+	unsigned i;
-+
-+	spin_lock(&clock->timer_lock);
-+	now = atomic_long_read(&clock->now);
-+
-+	for (i = 0; i < clock->timers.used; i++)
-+		pr_buf(out, "%ps:\t%li\n",
-+		       clock->timers.data[i]->fn,
-+		       clock->timers.data[i]->expire - now);
-+	spin_unlock(&clock->timer_lock);
-+}
-+
-+void bch2_io_clock_exit(struct io_clock *clock)
-+{
-+	free_heap(&clock->timers);
-+	free_percpu(clock->pcpu_buf);
-+}
-+
-+int bch2_io_clock_init(struct io_clock *clock)
-+{
-+	atomic_long_set(&clock->now, 0);
-+	spin_lock_init(&clock->timer_lock);
-+
-+	clock->max_slop = IO_CLOCK_PCPU_SECTORS * num_possible_cpus();
-+
-+	clock->pcpu_buf = alloc_percpu(*clock->pcpu_buf);
-+	if (!clock->pcpu_buf)
-+		return -ENOMEM;
-+
-+	if (!init_heap(&clock->timers, NR_IO_TIMERS, GFP_KERNEL))
-+		return -ENOMEM;
-+
-+	return 0;
-+}
-diff --git a/fs/bcachefs/clock.h b/fs/bcachefs/clock.h
-new file mode 100644
-index 000000000000..70a0f7436c84
---- /dev/null
-+++ b/fs/bcachefs/clock.h
-@@ -0,0 +1,38 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_CLOCK_H
-+#define _BCACHEFS_CLOCK_H
-+
-+void bch2_io_timer_add(struct io_clock *, struct io_timer *);
-+void bch2_io_timer_del(struct io_clock *, struct io_timer *);
-+void bch2_kthread_io_clock_wait(struct io_clock *, unsigned long,
-+				unsigned long);
-+
-+void __bch2_increment_clock(struct io_clock *, unsigned);
-+
-+static inline void bch2_increment_clock(struct bch_fs *c, unsigned sectors,
-+					int rw)
-+{
-+	struct io_clock *clock = &c->io_clock[rw];
-+
-+	if (unlikely(this_cpu_add_return(*clock->pcpu_buf, sectors) >=
-+		   IO_CLOCK_PCPU_SECTORS))
-+		__bch2_increment_clock(clock, this_cpu_xchg(*clock->pcpu_buf, 0));
-+}
-+
-+void bch2_io_clock_schedule_timeout(struct io_clock *, unsigned long);
-+
-+#define bch2_kthread_wait_event_ioclock_timeout(condition, clock, timeout)\
-+({									\
-+	long __ret = timeout;						\
-+	might_sleep();							\
-+	if (!___wait_cond_timeout(condition))				\
-+		__ret = __wait_event_timeout(wq, condition, timeout);	\
-+	__ret;								\
-+})
-+
-+void bch2_io_timers_to_text(struct printbuf *, struct io_clock *);
-+
-+void bch2_io_clock_exit(struct io_clock *);
-+int bch2_io_clock_init(struct io_clock *);
-+
-+#endif /* _BCACHEFS_CLOCK_H */
-diff --git a/fs/bcachefs/clock_types.h b/fs/bcachefs/clock_types.h
-new file mode 100644
-index 000000000000..92c740a47565
---- /dev/null
-+++ b/fs/bcachefs/clock_types.h
-@@ -0,0 +1,37 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_CLOCK_TYPES_H
-+#define _BCACHEFS_CLOCK_TYPES_H
-+
-+#include "util.h"
-+
-+#define NR_IO_TIMERS		(BCH_SB_MEMBERS_MAX * 3)
-+
-+/*
-+ * Clocks/timers in units of sectors of IO:
-+ *
-+ * Note - they use percpu batching, so they're only approximate.
-+ */
-+
-+struct io_timer;
-+typedef void (*io_timer_fn)(struct io_timer *);
-+
-+struct io_timer {
-+	io_timer_fn		fn;
-+	unsigned long		expire;
-+};
-+
-+/* Amount to buffer up on a percpu counter */
-+#define IO_CLOCK_PCPU_SECTORS	128
-+
-+typedef HEAP(struct io_timer *)	io_timer_heap;
-+
-+struct io_clock {
-+	atomic_long_t		now;
-+	u16 __percpu		*pcpu_buf;
-+	unsigned		max_slop;
-+
-+	spinlock_t		timer_lock;
-+	io_timer_heap		timers;
-+};
-+
-+#endif /* _BCACHEFS_CLOCK_TYPES_H */
-diff --git a/fs/bcachefs/compress.c b/fs/bcachefs/compress.c
-new file mode 100644
-index 000000000000..b50d2b0d5fd3
---- /dev/null
-+++ b/fs/bcachefs/compress.c
-@@ -0,0 +1,629 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "checksum.h"
-+#include "compress.h"
-+#include "extents.h"
-+#include "io.h"
-+#include "super-io.h"
-+
-+#include <linux/lz4.h>
-+#include <linux/zlib.h>
-+#include <linux/zstd.h>
-+
-+/* Bounce buffer: */
-+struct bbuf {
-+	void		*b;
-+	enum {
-+		BB_NONE,
-+		BB_VMAP,
-+		BB_KMALLOC,
-+		BB_MEMPOOL,
-+	}		type;
-+	int		rw;
-+};
-+
-+static struct bbuf __bounce_alloc(struct bch_fs *c, unsigned size, int rw)
-+{
-+	void *b;
-+
-+	BUG_ON(size > c->sb.encoded_extent_max << 9);
-+
-+	b = kmalloc(size, GFP_NOIO|__GFP_NOWARN);
-+	if (b)
-+		return (struct bbuf) { .b = b, .type = BB_KMALLOC, .rw = rw };
-+
-+	b = mempool_alloc(&c->compression_bounce[rw], GFP_NOIO);
-+	if (b)
-+		return (struct bbuf) { .b = b, .type = BB_MEMPOOL, .rw = rw };
-+
-+	BUG();
-+}
-+
-+static bool bio_phys_contig(struct bio *bio, struct bvec_iter start)
-+{
-+	struct bio_vec bv;
-+	struct bvec_iter iter;
-+	void *expected_start = NULL;
-+
-+	__bio_for_each_bvec(bv, bio, iter, start) {
-+		if (expected_start &&
-+		    expected_start != page_address(bv.bv_page) + bv.bv_offset)
-+			return false;
-+
-+		expected_start = page_address(bv.bv_page) +
-+			bv.bv_offset + bv.bv_len;
-+	}
-+
-+	return true;
-+}
-+
-+static struct bbuf __bio_map_or_bounce(struct bch_fs *c, struct bio *bio,
-+				       struct bvec_iter start, int rw)
-+{
-+	struct bbuf ret;
-+	struct bio_vec bv;
-+	struct bvec_iter iter;
-+	unsigned nr_pages = 0;
-+	struct page *stack_pages[16];
-+	struct page **pages = NULL;
-+	void *data;
-+
-+	BUG_ON(bvec_iter_sectors(start) > c->sb.encoded_extent_max);
-+
-+	if (!IS_ENABLED(CONFIG_HIGHMEM) &&
-+	    bio_phys_contig(bio, start))
-+		return (struct bbuf) {
-+			.b = page_address(bio_iter_page(bio, start)) +
-+				bio_iter_offset(bio, start),
-+			.type = BB_NONE, .rw = rw
-+		};
-+
-+	/* check if we can map the pages contiguously: */
-+	__bio_for_each_segment(bv, bio, iter, start) {
-+		if (iter.bi_size != start.bi_size &&
-+		    bv.bv_offset)
-+			goto bounce;
-+
-+		if (bv.bv_len < iter.bi_size &&
-+		    bv.bv_offset + bv.bv_len < PAGE_SIZE)
-+			goto bounce;
-+
-+		nr_pages++;
-+	}
-+
-+	BUG_ON(DIV_ROUND_UP(start.bi_size, PAGE_SIZE) > nr_pages);
-+
-+	pages = nr_pages > ARRAY_SIZE(stack_pages)
-+		? kmalloc_array(nr_pages, sizeof(struct page *), GFP_NOIO)
-+		: stack_pages;
-+	if (!pages)
-+		goto bounce;
-+
-+	nr_pages = 0;
-+	__bio_for_each_segment(bv, bio, iter, start)
-+		pages[nr_pages++] = bv.bv_page;
-+
-+	data = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
-+	if (pages != stack_pages)
-+		kfree(pages);
-+
-+	if (data)
-+		return (struct bbuf) {
-+			.b = data + bio_iter_offset(bio, start),
-+			.type = BB_VMAP, .rw = rw
-+		};
-+bounce:
-+	ret = __bounce_alloc(c, start.bi_size, rw);
-+
-+	if (rw == READ)
-+		memcpy_from_bio(ret.b, bio, start);
-+
-+	return ret;
-+}
-+
-+static struct bbuf bio_map_or_bounce(struct bch_fs *c, struct bio *bio, int rw)
-+{
-+	return __bio_map_or_bounce(c, bio, bio->bi_iter, rw);
-+}
-+
-+static void bio_unmap_or_unbounce(struct bch_fs *c, struct bbuf buf)
-+{
-+	switch (buf.type) {
-+	case BB_NONE:
-+		break;
-+	case BB_VMAP:
-+		vunmap((void *) ((unsigned long) buf.b & PAGE_MASK));
-+		break;
-+	case BB_KMALLOC:
-+		kfree(buf.b);
-+		break;
-+	case BB_MEMPOOL:
-+		mempool_free(buf.b, &c->compression_bounce[buf.rw]);
-+		break;
-+	}
-+}
-+
-+static inline void zlib_set_workspace(z_stream *strm, void *workspace)
-+{
-+#ifdef __KERNEL__
-+	strm->workspace = workspace;
-+#endif
-+}
-+
-+static int __bio_uncompress(struct bch_fs *c, struct bio *src,
-+			    void *dst_data, struct bch_extent_crc_unpacked crc)
-+{
-+	struct bbuf src_data = { NULL };
-+	size_t src_len = src->bi_iter.bi_size;
-+	size_t dst_len = crc.uncompressed_size << 9;
-+	void *workspace;
-+	int ret;
-+
-+	src_data = bio_map_or_bounce(c, src, READ);
-+
-+	switch (crc.compression_type) {
-+	case BCH_COMPRESSION_TYPE_lz4_old:
-+	case BCH_COMPRESSION_TYPE_lz4:
-+		ret = LZ4_decompress_safe_partial(src_data.b, dst_data,
-+						  src_len, dst_len, dst_len);
-+		if (ret != dst_len)
-+			goto err;
-+		break;
-+	case BCH_COMPRESSION_TYPE_gzip: {
-+		z_stream strm = {
-+			.next_in	= src_data.b,
-+			.avail_in	= src_len,
-+			.next_out	= dst_data,
-+			.avail_out	= dst_len,
-+		};
-+
-+		workspace = mempool_alloc(&c->decompress_workspace, GFP_NOIO);
-+
-+		zlib_set_workspace(&strm, workspace);
-+		zlib_inflateInit2(&strm, -MAX_WBITS);
-+		ret = zlib_inflate(&strm, Z_FINISH);
-+
-+		mempool_free(workspace, &c->decompress_workspace);
-+
-+		if (ret != Z_STREAM_END)
-+			goto err;
-+		break;
-+	}
-+	case BCH_COMPRESSION_TYPE_zstd: {
-+		ZSTD_DCtx *ctx;
-+		size_t real_src_len = le32_to_cpup(src_data.b);
-+
-+		if (real_src_len > src_len - 4)
-+			goto err;
-+
-+		workspace = mempool_alloc(&c->decompress_workspace, GFP_NOIO);
-+		ctx = ZSTD_initDCtx(workspace, ZSTD_DCtxWorkspaceBound());
-+
-+		ret = ZSTD_decompressDCtx(ctx,
-+				dst_data,	dst_len,
-+				src_data.b + 4, real_src_len);
-+
-+		mempool_free(workspace, &c->decompress_workspace);
-+
-+		if (ret != dst_len)
-+			goto err;
-+		break;
-+	}
-+	default:
-+		BUG();
-+	}
-+	ret = 0;
-+out:
-+	bio_unmap_or_unbounce(c, src_data);
-+	return ret;
-+err:
-+	ret = -EIO;
-+	goto out;
-+}
-+
-+int bch2_bio_uncompress_inplace(struct bch_fs *c, struct bio *bio,
-+				struct bch_extent_crc_unpacked *crc)
-+{
-+	struct bbuf data = { NULL };
-+	size_t dst_len = crc->uncompressed_size << 9;
-+
-+	/* bio must own its pages: */
-+	BUG_ON(!bio->bi_vcnt);
-+	BUG_ON(DIV_ROUND_UP(crc->live_size, PAGE_SECTORS) > bio->bi_max_vecs);
-+
-+	if (crc->uncompressed_size	> c->sb.encoded_extent_max ||
-+	    crc->compressed_size	> c->sb.encoded_extent_max) {
-+		bch_err(c, "error rewriting existing data: extent too big");
-+		return -EIO;
-+	}
-+
-+	data = __bounce_alloc(c, dst_len, WRITE);
-+
-+	if (__bio_uncompress(c, bio, data.b, *crc)) {
-+		bch_err(c, "error rewriting existing data: decompression error");
-+		bio_unmap_or_unbounce(c, data);
-+		return -EIO;
-+	}
-+
-+	/*
-+	 * XXX: don't have a good way to assert that the bio was allocated with
-+	 * enough space, we depend on bch2_move_extent doing the right thing
-+	 */
-+	bio->bi_iter.bi_size = crc->live_size << 9;
-+
-+	memcpy_to_bio(bio, bio->bi_iter, data.b + (crc->offset << 9));
-+
-+	crc->csum_type		= 0;
-+	crc->compression_type	= 0;
-+	crc->compressed_size	= crc->live_size;
-+	crc->uncompressed_size	= crc->live_size;
-+	crc->offset		= 0;
-+	crc->csum		= (struct bch_csum) { 0, 0 };
-+
-+	bio_unmap_or_unbounce(c, data);
-+	return 0;
-+}
-+
-+int bch2_bio_uncompress(struct bch_fs *c, struct bio *src,
-+		       struct bio *dst, struct bvec_iter dst_iter,
-+		       struct bch_extent_crc_unpacked crc)
-+{
-+	struct bbuf dst_data = { NULL };
-+	size_t dst_len = crc.uncompressed_size << 9;
-+	int ret = -ENOMEM;
-+
-+	if (crc.uncompressed_size	> c->sb.encoded_extent_max ||
-+	    crc.compressed_size		> c->sb.encoded_extent_max)
-+		return -EIO;
-+
-+	dst_data = dst_len == dst_iter.bi_size
-+		? __bio_map_or_bounce(c, dst, dst_iter, WRITE)
-+		: __bounce_alloc(c, dst_len, WRITE);
-+
-+	ret = __bio_uncompress(c, src, dst_data.b, crc);
-+	if (ret)
-+		goto err;
-+
-+	if (dst_data.type != BB_NONE &&
-+	    dst_data.type != BB_VMAP)
-+		memcpy_to_bio(dst, dst_iter, dst_data.b + (crc.offset << 9));
-+err:
-+	bio_unmap_or_unbounce(c, dst_data);
-+	return ret;
-+}
-+
-+static int attempt_compress(struct bch_fs *c,
-+			    void *workspace,
-+			    void *dst, size_t dst_len,
-+			    void *src, size_t src_len,
-+			    enum bch_compression_type compression_type)
-+{
-+	switch (compression_type) {
-+	case BCH_COMPRESSION_TYPE_lz4: {
-+		int len = src_len;
-+		int ret = LZ4_compress_destSize(
-+				src,		dst,
-+				&len,		dst_len,
-+				workspace);
-+
-+		if (len < src_len)
-+			return -len;
-+
-+		return ret;
-+	}
-+	case BCH_COMPRESSION_TYPE_gzip: {
-+		z_stream strm = {
-+			.next_in	= src,
-+			.avail_in	= src_len,
-+			.next_out	= dst,
-+			.avail_out	= dst_len,
-+		};
-+
-+		zlib_set_workspace(&strm, workspace);
-+		zlib_deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
-+				  Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL,
-+				  Z_DEFAULT_STRATEGY);
-+
-+		if (zlib_deflate(&strm, Z_FINISH) != Z_STREAM_END)
-+			return 0;
-+
-+		if (zlib_deflateEnd(&strm) != Z_OK)
-+			return 0;
-+
-+		return strm.total_out;
-+	}
-+	case BCH_COMPRESSION_TYPE_zstd: {
-+		ZSTD_CCtx *ctx = ZSTD_initCCtx(workspace,
-+			ZSTD_CCtxWorkspaceBound(c->zstd_params.cParams));
-+
-+		size_t len = ZSTD_compressCCtx(ctx,
-+				dst + 4,	dst_len - 4,
-+				src,		src_len,
-+				c->zstd_params);
-+		if (ZSTD_isError(len))
-+			return 0;
-+
-+		*((__le32 *) dst) = cpu_to_le32(len);
-+		return len + 4;
-+	}
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static unsigned __bio_compress(struct bch_fs *c,
-+			       struct bio *dst, size_t *dst_len,
-+			       struct bio *src, size_t *src_len,
-+			       enum bch_compression_type compression_type)
-+{
-+	struct bbuf src_data = { NULL }, dst_data = { NULL };
-+	void *workspace;
-+	unsigned pad;
-+	int ret = 0;
-+
-+	BUG_ON(compression_type >= BCH_COMPRESSION_TYPE_NR);
-+	BUG_ON(!mempool_initialized(&c->compress_workspace[compression_type]));
-+
-+	/* If it's only one block, don't bother trying to compress: */
-+	if (bio_sectors(src) <= c->opts.block_size)
-+		return 0;
-+
-+	dst_data = bio_map_or_bounce(c, dst, WRITE);
-+	src_data = bio_map_or_bounce(c, src, READ);
-+
-+	workspace = mempool_alloc(&c->compress_workspace[compression_type], GFP_NOIO);
-+
-+	*src_len = src->bi_iter.bi_size;
-+	*dst_len = dst->bi_iter.bi_size;
-+
-+	/*
-+	 * XXX: this algorithm sucks when the compression code doesn't tell us
-+	 * how much would fit, like LZ4 does:
-+	 */
-+	while (1) {
-+		if (*src_len <= block_bytes(c)) {
-+			ret = -1;
-+			break;
-+		}
-+
-+		ret = attempt_compress(c, workspace,
-+				       dst_data.b,	*dst_len,
-+				       src_data.b,	*src_len,
-+				       compression_type);
-+		if (ret > 0) {
-+			*dst_len = ret;
-+			ret = 0;
-+			break;
-+		}
-+
-+		/* Didn't fit: should we retry with a smaller amount?  */
-+		if (*src_len <= *dst_len) {
-+			ret = -1;
-+			break;
-+		}
-+
-+		/*
-+		 * If ret is negative, it's a hint as to how much data would fit
-+		 */
-+		BUG_ON(-ret >= *src_len);
-+
-+		if (ret < 0)
-+			*src_len = -ret;
-+		else
-+			*src_len -= (*src_len - *dst_len) / 2;
-+		*src_len = round_down(*src_len, block_bytes(c));
-+	}
-+
-+	mempool_free(workspace, &c->compress_workspace[compression_type]);
-+
-+	if (ret)
-+		goto err;
-+
-+	/* Didn't get smaller: */
-+	if (round_up(*dst_len, block_bytes(c)) >= *src_len)
-+		goto err;
-+
-+	pad = round_up(*dst_len, block_bytes(c)) - *dst_len;
-+
-+	memset(dst_data.b + *dst_len, 0, pad);
-+	*dst_len += pad;
-+
-+	if (dst_data.type != BB_NONE &&
-+	    dst_data.type != BB_VMAP)
-+		memcpy_to_bio(dst, dst->bi_iter, dst_data.b);
-+
-+	BUG_ON(!*dst_len || *dst_len > dst->bi_iter.bi_size);
-+	BUG_ON(!*src_len || *src_len > src->bi_iter.bi_size);
-+	BUG_ON(*dst_len & (block_bytes(c) - 1));
-+	BUG_ON(*src_len & (block_bytes(c) - 1));
-+out:
-+	bio_unmap_or_unbounce(c, src_data);
-+	bio_unmap_or_unbounce(c, dst_data);
-+	return compression_type;
-+err:
-+	compression_type = BCH_COMPRESSION_TYPE_incompressible;
-+	goto out;
-+}
-+
-+unsigned bch2_bio_compress(struct bch_fs *c,
-+			   struct bio *dst, size_t *dst_len,
-+			   struct bio *src, size_t *src_len,
-+			   unsigned compression_type)
-+{
-+	unsigned orig_dst = dst->bi_iter.bi_size;
-+	unsigned orig_src = src->bi_iter.bi_size;
-+
-+	/* Don't consume more than BCH_ENCODED_EXTENT_MAX from @src: */
-+	src->bi_iter.bi_size = min_t(unsigned, src->bi_iter.bi_size,
-+				     c->sb.encoded_extent_max << 9);
-+	/* Don't generate a bigger output than input: */
-+	dst->bi_iter.bi_size = min(dst->bi_iter.bi_size, src->bi_iter.bi_size);
-+
-+	if (compression_type == BCH_COMPRESSION_TYPE_lz4_old)
-+		compression_type = BCH_COMPRESSION_TYPE_lz4;
-+
-+	compression_type =
-+		__bio_compress(c, dst, dst_len, src, src_len, compression_type);
-+
-+	dst->bi_iter.bi_size = orig_dst;
-+	src->bi_iter.bi_size = orig_src;
-+	return compression_type;
-+}
-+
-+static int __bch2_fs_compress_init(struct bch_fs *, u64);
-+
-+#define BCH_FEATURE_none	0
-+
-+static const unsigned bch2_compression_opt_to_feature[] = {
-+#define x(t, n) [BCH_COMPRESSION_OPT_##t] = BCH_FEATURE_##t,
-+	BCH_COMPRESSION_OPTS()
-+#undef x
-+};
-+
-+#undef BCH_FEATURE_none
-+
-+static int __bch2_check_set_has_compressed_data(struct bch_fs *c, u64 f)
-+{
-+	int ret = 0;
-+
-+	if ((c->sb.features & f) == f)
-+		return 0;
-+
-+	mutex_lock(&c->sb_lock);
-+
-+	if ((c->sb.features & f) == f) {
-+		mutex_unlock(&c->sb_lock);
-+		return 0;
-+	}
-+
-+	ret = __bch2_fs_compress_init(c, c->sb.features|f);
-+	if (ret) {
-+		mutex_unlock(&c->sb_lock);
-+		return ret;
-+	}
-+
-+	c->disk_sb.sb->features[0] |= cpu_to_le64(f);
-+	bch2_write_super(c);
-+	mutex_unlock(&c->sb_lock);
-+
-+	return 0;
-+}
-+
-+int bch2_check_set_has_compressed_data(struct bch_fs *c,
-+				       unsigned compression_type)
-+{
-+	BUG_ON(compression_type >= ARRAY_SIZE(bch2_compression_opt_to_feature));
-+
-+	return compression_type
-+		? __bch2_check_set_has_compressed_data(c,
-+				1ULL << bch2_compression_opt_to_feature[compression_type])
-+		: 0;
-+}
-+
-+void bch2_fs_compress_exit(struct bch_fs *c)
-+{
-+	unsigned i;
-+
-+	mempool_exit(&c->decompress_workspace);
-+	for (i = 0; i < ARRAY_SIZE(c->compress_workspace); i++)
-+		mempool_exit(&c->compress_workspace[i]);
-+	mempool_exit(&c->compression_bounce[WRITE]);
-+	mempool_exit(&c->compression_bounce[READ]);
-+}
-+
-+static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
-+{
-+	size_t max_extent = c->sb.encoded_extent_max << 9;
-+	size_t decompress_workspace_size = 0;
-+	bool decompress_workspace_needed;
-+	ZSTD_parameters params = ZSTD_getParams(0, max_extent, 0);
-+	struct {
-+		unsigned	feature;
-+		unsigned	type;
-+		size_t		compress_workspace;
-+		size_t		decompress_workspace;
-+	} compression_types[] = {
-+		{ BCH_FEATURE_lz4, BCH_COMPRESSION_TYPE_lz4, LZ4_MEM_COMPRESS, 0 },
-+		{ BCH_FEATURE_gzip, BCH_COMPRESSION_TYPE_gzip,
-+			zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL),
-+			zlib_inflate_workspacesize(), },
-+		{ BCH_FEATURE_zstd, BCH_COMPRESSION_TYPE_zstd,
-+			ZSTD_CCtxWorkspaceBound(params.cParams),
-+			ZSTD_DCtxWorkspaceBound() },
-+	}, *i;
-+	int ret = 0;
-+
-+	pr_verbose_init(c->opts, "");
-+
-+	c->zstd_params = params;
-+
-+	for (i = compression_types;
-+	     i < compression_types + ARRAY_SIZE(compression_types);
-+	     i++)
-+		if (features & (1 << i->feature))
-+			goto have_compressed;
-+
-+	goto out;
-+have_compressed:
-+
-+	if (!mempool_initialized(&c->compression_bounce[READ])) {
-+		ret = mempool_init_kvpmalloc_pool(&c->compression_bounce[READ],
-+						  1, max_extent);
-+		if (ret)
-+			goto out;
-+	}
-+
-+	if (!mempool_initialized(&c->compression_bounce[WRITE])) {
-+		ret = mempool_init_kvpmalloc_pool(&c->compression_bounce[WRITE],
-+						  1, max_extent);
-+		if (ret)
-+			goto out;
-+	}
-+
-+	for (i = compression_types;
-+	     i < compression_types + ARRAY_SIZE(compression_types);
-+	     i++) {
-+		decompress_workspace_size =
-+			max(decompress_workspace_size, i->decompress_workspace);
-+
-+		if (!(features & (1 << i->feature)))
-+			continue;
-+
-+		if (i->decompress_workspace)
-+			decompress_workspace_needed = true;
-+
-+		if (mempool_initialized(&c->compress_workspace[i->type]))
-+			continue;
-+
-+		ret = mempool_init_kvpmalloc_pool(
-+				&c->compress_workspace[i->type],
-+				1, i->compress_workspace);
-+		if (ret)
-+			goto out;
-+	}
-+
-+	if (!mempool_initialized(&c->decompress_workspace)) {
-+		ret = mempool_init_kvpmalloc_pool(
-+				&c->decompress_workspace,
-+				1, decompress_workspace_size);
-+		if (ret)
-+			goto out;
-+	}
-+out:
-+	pr_verbose_init(c->opts, "ret %i", ret);
-+	return ret;
-+}
-+
-+int bch2_fs_compress_init(struct bch_fs *c)
-+{
-+	u64 f = c->sb.features;
-+
-+	if (c->opts.compression)
-+		f |= 1ULL << bch2_compression_opt_to_feature[c->opts.compression];
-+
-+	if (c->opts.background_compression)
-+		f |= 1ULL << bch2_compression_opt_to_feature[c->opts.background_compression];
-+
-+	return __bch2_fs_compress_init(c, f);
-+
-+}
-diff --git a/fs/bcachefs/compress.h b/fs/bcachefs/compress.h
-new file mode 100644
-index 000000000000..4bab1f61b3b5
---- /dev/null
-+++ b/fs/bcachefs/compress.h
-@@ -0,0 +1,18 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_COMPRESS_H
-+#define _BCACHEFS_COMPRESS_H
-+
-+#include "extents_types.h"
-+
-+int bch2_bio_uncompress_inplace(struct bch_fs *, struct bio *,
-+				struct bch_extent_crc_unpacked *);
-+int bch2_bio_uncompress(struct bch_fs *, struct bio *, struct bio *,
-+		       struct bvec_iter, struct bch_extent_crc_unpacked);
-+unsigned bch2_bio_compress(struct bch_fs *, struct bio *, size_t *,
-+			   struct bio *, size_t *, unsigned);
-+
-+int bch2_check_set_has_compressed_data(struct bch_fs *, unsigned);
-+void bch2_fs_compress_exit(struct bch_fs *);
-+int bch2_fs_compress_init(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_COMPRESS_H */
-diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c
-new file mode 100644
-index 000000000000..aa10591a3b1a
---- /dev/null
-+++ b/fs/bcachefs/debug.c
-@@ -0,0 +1,432 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Assorted bcachefs debug code
-+ *
-+ * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
-+ * Copyright 2012 Google, Inc.
-+ */
-+
-+#include "bcachefs.h"
-+#include "bkey_methods.h"
-+#include "btree_cache.h"
-+#include "btree_io.h"
-+#include "btree_iter.h"
-+#include "btree_update.h"
-+#include "buckets.h"
-+#include "debug.h"
-+#include "error.h"
-+#include "extents.h"
-+#include "fsck.h"
-+#include "inode.h"
-+#include "io.h"
-+#include "super.h"
-+
-+#include <linux/console.h>
-+#include <linux/debugfs.h>
-+#include <linux/module.h>
-+#include <linux/random.h>
-+#include <linux/seq_file.h>
-+
-+static struct dentry *bch_debug;
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+
-+void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
-+{
-+	struct btree *v = c->verify_data;
-+	struct btree_node *n_ondisk, *n_sorted, *n_inmemory;
-+	struct bset *sorted, *inmemory;
-+	struct extent_ptr_decoded pick;
-+	struct bch_dev *ca;
-+	struct bio *bio;
-+
-+	if (c->opts.nochanges)
-+		return;
-+
-+	btree_node_io_lock(b);
-+	mutex_lock(&c->verify_lock);
-+
-+	n_ondisk = c->verify_ondisk;
-+	n_sorted = c->verify_data->data;
-+	n_inmemory = b->data;
-+
-+	bkey_copy(&v->key, &b->key);
-+	v->written	= 0;
-+	v->c.level	= b->c.level;
-+	v->c.btree_id	= b->c.btree_id;
-+	bch2_btree_keys_init(v, &c->expensive_debug_checks);
-+
-+	if (bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key),
-+				       NULL, &pick) <= 0)
-+		return;
-+
-+	ca = bch_dev_bkey_exists(c, pick.ptr.dev);
-+	if (!bch2_dev_get_ioref(ca, READ))
-+		return;
-+
-+	bio = bio_alloc_bioset(GFP_NOIO,
-+			buf_pages(n_sorted, btree_bytes(c)),
-+			&c->btree_bio);
-+	bio_set_dev(bio, ca->disk_sb.bdev);
-+	bio->bi_opf		= REQ_OP_READ|REQ_META;
-+	bio->bi_iter.bi_sector	= pick.ptr.offset;
-+	bch2_bio_map(bio, n_sorted, btree_bytes(c));
-+
-+	submit_bio_wait(bio);
-+
-+	bio_put(bio);
-+	percpu_ref_put(&ca->io_ref);
-+
-+	memcpy(n_ondisk, n_sorted, btree_bytes(c));
-+
-+	if (bch2_btree_node_read_done(c, v, false))
-+		goto out;
-+
-+	n_sorted = c->verify_data->data;
-+	sorted = &n_sorted->keys;
-+	inmemory = &n_inmemory->keys;
-+
-+	if (inmemory->u64s != sorted->u64s ||
-+	    memcmp(inmemory->start,
-+		   sorted->start,
-+		   vstruct_end(inmemory) - (void *) inmemory->start)) {
-+		unsigned offset = 0, sectors;
-+		struct bset *i;
-+		unsigned j;
-+
-+		console_lock();
-+
-+		printk(KERN_ERR "*** in memory:\n");
-+		bch2_dump_bset(c, b, inmemory, 0);
-+
-+		printk(KERN_ERR "*** read back in:\n");
-+		bch2_dump_bset(c, v, sorted, 0);
-+
-+		while (offset < b->written) {
-+			if (!offset ) {
-+				i = &n_ondisk->keys;
-+				sectors = vstruct_blocks(n_ondisk, c->block_bits) <<
-+					c->block_bits;
-+			} else {
-+				struct btree_node_entry *bne =
-+					(void *) n_ondisk + (offset << 9);
-+				i = &bne->keys;
-+
-+				sectors = vstruct_blocks(bne, c->block_bits) <<
-+					c->block_bits;
-+			}
-+
-+			printk(KERN_ERR "*** on disk block %u:\n", offset);
-+			bch2_dump_bset(c, b, i, offset);
-+
-+			offset += sectors;
-+		}
-+
-+		printk(KERN_ERR "*** block %u/%u not written\n",
-+		       offset >> c->block_bits, btree_blocks(c));
-+
-+		for (j = 0; j < le16_to_cpu(inmemory->u64s); j++)
-+			if (inmemory->_data[j] != sorted->_data[j])
-+				break;
-+
-+		printk(KERN_ERR "b->written %u\n", b->written);
-+
-+		console_unlock();
-+		panic("verify failed at %u\n", j);
-+	}
-+out:
-+	mutex_unlock(&c->verify_lock);
-+	btree_node_io_unlock(b);
-+}
-+
-+#endif
-+
-+#ifdef CONFIG_DEBUG_FS
-+
-+/* XXX: bch_fs refcounting */
-+
-+struct dump_iter {
-+	struct bpos		from;
-+	struct bch_fs	*c;
-+	enum btree_id		id;
-+
-+	char			buf[PAGE_SIZE];
-+	size_t			bytes;	/* what's currently in buf */
-+
-+	char __user		*ubuf;	/* destination user buffer */
-+	size_t			size;	/* size of requested read */
-+	ssize_t			ret;	/* bytes read so far */
-+};
-+
-+static int flush_buf(struct dump_iter *i)
-+{
-+	if (i->bytes) {
-+		size_t bytes = min(i->bytes, i->size);
-+		int err = copy_to_user(i->ubuf, i->buf, bytes);
-+
-+		if (err)
-+			return err;
-+
-+		i->ret	 += bytes;
-+		i->ubuf	 += bytes;
-+		i->size	 -= bytes;
-+		i->bytes -= bytes;
-+		memmove(i->buf, i->buf + bytes, i->bytes);
-+	}
-+
-+	return 0;
-+}
-+
-+static int bch2_dump_open(struct inode *inode, struct file *file)
-+{
-+	struct btree_debug *bd = inode->i_private;
-+	struct dump_iter *i;
-+
-+	i = kzalloc(sizeof(struct dump_iter), GFP_KERNEL);
-+	if (!i)
-+		return -ENOMEM;
-+
-+	file->private_data = i;
-+	i->from = POS_MIN;
-+	i->c	= container_of(bd, struct bch_fs, btree_debug[bd->id]);
-+	i->id	= bd->id;
-+
-+	return 0;
-+}
-+
-+static int bch2_dump_release(struct inode *inode, struct file *file)
-+{
-+	kfree(file->private_data);
-+	return 0;
-+}
-+
-+static ssize_t bch2_read_btree(struct file *file, char __user *buf,
-+			       size_t size, loff_t *ppos)
-+{
-+	struct dump_iter *i = file->private_data;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int err;
-+
-+	i->ubuf = buf;
-+	i->size	= size;
-+	i->ret	= 0;
-+
-+	err = flush_buf(i);
-+	if (err)
-+		return err;
-+
-+	if (!i->size)
-+		return i->ret;
-+
-+	bch2_trans_init(&trans, i->c, 0, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, i->id, i->from, BTREE_ITER_PREFETCH);
-+	k = bch2_btree_iter_peek(iter);
-+
-+	while (k.k && !(err = bkey_err(k))) {
-+		bch2_bkey_val_to_text(&PBUF(i->buf), i->c, k);
-+		i->bytes = strlen(i->buf);
-+		BUG_ON(i->bytes >= PAGE_SIZE);
-+		i->buf[i->bytes] = '\n';
-+		i->bytes++;
-+
-+		k = bch2_btree_iter_next(iter);
-+		i->from = iter->pos;
-+
-+		err = flush_buf(i);
-+		if (err)
-+			break;
-+
-+		if (!i->size)
-+			break;
-+	}
-+	bch2_trans_exit(&trans);
-+
-+	return err < 0 ? err : i->ret;
-+}
-+
-+static const struct file_operations btree_debug_ops = {
-+	.owner		= THIS_MODULE,
-+	.open		= bch2_dump_open,
-+	.release	= bch2_dump_release,
-+	.read		= bch2_read_btree,
-+};
-+
-+static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf,
-+				       size_t size, loff_t *ppos)
-+{
-+	struct dump_iter *i = file->private_data;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct btree *b;
-+	int err;
-+
-+	i->ubuf = buf;
-+	i->size	= size;
-+	i->ret	= 0;
-+
-+	err = flush_buf(i);
-+	if (err)
-+		return err;
-+
-+	if (!i->size || !bkey_cmp(POS_MAX, i->from))
-+		return i->ret;
-+
-+	bch2_trans_init(&trans, i->c, 0, 0);
-+
-+	for_each_btree_node(&trans, iter, i->id, i->from, 0, b) {
-+		bch2_btree_node_to_text(&PBUF(i->buf), i->c, b);
-+		i->bytes = strlen(i->buf);
-+		err = flush_buf(i);
-+		if (err)
-+			break;
-+
-+		/*
-+		 * can't easily correctly restart a btree node traversal across
-+		 * all nodes, meh
-+		 */
-+		i->from = bkey_cmp(POS_MAX, b->key.k.p)
-+			? bkey_successor(b->key.k.p)
-+			: b->key.k.p;
-+
-+		if (!i->size)
-+			break;
-+	}
-+	bch2_trans_exit(&trans);
-+
-+	return err < 0 ? err : i->ret;
-+}
-+
-+static const struct file_operations btree_format_debug_ops = {
-+	.owner		= THIS_MODULE,
-+	.open		= bch2_dump_open,
-+	.release	= bch2_dump_release,
-+	.read		= bch2_read_btree_formats,
-+};
-+
-+static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
-+				       size_t size, loff_t *ppos)
-+{
-+	struct dump_iter *i = file->private_data;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct btree *prev_node = NULL;
-+	int err;
-+
-+	i->ubuf = buf;
-+	i->size	= size;
-+	i->ret	= 0;
-+
-+	err = flush_buf(i);
-+	if (err)
-+		return err;
-+
-+	if (!i->size)
-+		return i->ret;
-+
-+	bch2_trans_init(&trans, i->c, 0, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, i->id, i->from, BTREE_ITER_PREFETCH);
-+
-+	while ((k = bch2_btree_iter_peek(iter)).k &&
-+	       !(err = bkey_err(k))) {
-+		struct btree_iter_level *l = &iter->l[0];
-+		struct bkey_packed *_k =
-+			bch2_btree_node_iter_peek(&l->iter, l->b);
-+
-+		if (l->b != prev_node) {
-+			bch2_btree_node_to_text(&PBUF(i->buf), i->c, l->b);
-+			i->bytes = strlen(i->buf);
-+			err = flush_buf(i);
-+			if (err)
-+				break;
-+		}
-+		prev_node = l->b;
-+
-+		bch2_bfloat_to_text(&PBUF(i->buf), l->b, _k);
-+		i->bytes = strlen(i->buf);
-+		err = flush_buf(i);
-+		if (err)
-+			break;
-+
-+		bch2_btree_iter_next(iter);
-+		i->from = iter->pos;
-+
-+		err = flush_buf(i);
-+		if (err)
-+			break;
-+
-+		if (!i->size)
-+			break;
-+	}
-+	bch2_trans_exit(&trans);
-+
-+	return err < 0 ? err : i->ret;
-+}
-+
-+static const struct file_operations bfloat_failed_debug_ops = {
-+	.owner		= THIS_MODULE,
-+	.open		= bch2_dump_open,
-+	.release	= bch2_dump_release,
-+	.read		= bch2_read_bfloat_failed,
-+};
-+
-+void bch2_fs_debug_exit(struct bch_fs *c)
-+{
-+	if (!IS_ERR_OR_NULL(c->debug))
-+		debugfs_remove_recursive(c->debug);
-+}
-+
-+void bch2_fs_debug_init(struct bch_fs *c)
-+{
-+	struct btree_debug *bd;
-+	char name[100];
-+
-+	if (IS_ERR_OR_NULL(bch_debug))
-+		return;
-+
-+	snprintf(name, sizeof(name), "%pU", c->sb.user_uuid.b);
-+	c->debug = debugfs_create_dir(name, bch_debug);
-+	if (IS_ERR_OR_NULL(c->debug))
-+		return;
-+
-+	for (bd = c->btree_debug;
-+	     bd < c->btree_debug + ARRAY_SIZE(c->btree_debug);
-+	     bd++) {
-+		bd->id = bd - c->btree_debug;
-+		bd->btree = debugfs_create_file(bch2_btree_ids[bd->id],
-+						0400, c->debug, bd,
-+						&btree_debug_ops);
-+
-+		snprintf(name, sizeof(name), "%s-formats",
-+			 bch2_btree_ids[bd->id]);
-+
-+		bd->btree_format = debugfs_create_file(name, 0400, c->debug, bd,
-+						       &btree_format_debug_ops);
-+
-+		snprintf(name, sizeof(name), "%s-bfloat-failed",
-+			 bch2_btree_ids[bd->id]);
-+
-+		bd->failed = debugfs_create_file(name, 0400, c->debug, bd,
-+						 &bfloat_failed_debug_ops);
-+	}
-+}
-+
-+#endif
-+
-+void bch2_debug_exit(void)
-+{
-+	if (!IS_ERR_OR_NULL(bch_debug))
-+		debugfs_remove_recursive(bch_debug);
-+}
-+
-+int __init bch2_debug_init(void)
-+{
-+	int ret = 0;
-+
-+	bch_debug = debugfs_create_dir("bcachefs", NULL);
-+	return ret;
-+}
-diff --git a/fs/bcachefs/debug.h b/fs/bcachefs/debug.h
-new file mode 100644
-index 000000000000..56c2d1ab5f63
---- /dev/null
-+++ b/fs/bcachefs/debug.h
-@@ -0,0 +1,63 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_DEBUG_H
-+#define _BCACHEFS_DEBUG_H
-+
-+#include "bcachefs.h"
-+
-+struct bio;
-+struct btree;
-+struct bch_fs;
-+
-+#define BCH_DEBUG_PARAM(name, description) extern bool bch2_##name;
-+BCH_DEBUG_PARAMS()
-+#undef BCH_DEBUG_PARAM
-+
-+#define BCH_DEBUG_PARAM(name, description)				\
-+	static inline bool name(struct bch_fs *c)			\
-+	{ return bch2_##name || c->name;	}
-+BCH_DEBUG_PARAMS_ALWAYS()
-+#undef BCH_DEBUG_PARAM
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+
-+#define BCH_DEBUG_PARAM(name, description)				\
-+	static inline bool name(struct bch_fs *c)			\
-+	{ return bch2_##name || c->name;	}
-+BCH_DEBUG_PARAMS_DEBUG()
-+#undef BCH_DEBUG_PARAM
-+
-+void __bch2_btree_verify(struct bch_fs *, struct btree *);
-+
-+#define bypass_torture_test(d)		((d)->bypass_torture_test)
-+
-+#else /* DEBUG */
-+
-+#define BCH_DEBUG_PARAM(name, description)				\
-+	static inline bool name(struct bch_fs *c) { return false; }
-+BCH_DEBUG_PARAMS_DEBUG()
-+#undef BCH_DEBUG_PARAM
-+
-+static inline void __bch2_btree_verify(struct bch_fs *c, struct btree *b) {}
-+
-+#define bypass_torture_test(d)		0
-+
-+#endif
-+
-+static inline void bch2_btree_verify(struct bch_fs *c, struct btree *b)
-+{
-+	if (verify_btree_ondisk(c))
-+		__bch2_btree_verify(c, b);
-+}
-+
-+#ifdef CONFIG_DEBUG_FS
-+void bch2_fs_debug_exit(struct bch_fs *);
-+void bch2_fs_debug_init(struct bch_fs *);
-+#else
-+static inline void bch2_fs_debug_exit(struct bch_fs *c) {}
-+static inline void bch2_fs_debug_init(struct bch_fs *c) {}
-+#endif
-+
-+void bch2_debug_exit(void);
-+int bch2_debug_init(void);
-+
-+#endif /* _BCACHEFS_DEBUG_H */
-diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c
-new file mode 100644
-index 000000000000..f34bfda8ab0d
---- /dev/null
-+++ b/fs/bcachefs/dirent.c
-@@ -0,0 +1,385 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "bkey_methods.h"
-+#include "btree_update.h"
-+#include "extents.h"
-+#include "dirent.h"
-+#include "fs.h"
-+#include "keylist.h"
-+#include "str_hash.h"
-+
-+#include <linux/dcache.h>
-+
-+unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d)
-+{
-+	unsigned len = bkey_val_bytes(d.k) -
-+		offsetof(struct bch_dirent, d_name);
-+
-+	return strnlen(d.v->d_name, len);
-+}
-+
-+static u64 bch2_dirent_hash(const struct bch_hash_info *info,
-+			    const struct qstr *name)
-+{
-+	struct bch_str_hash_ctx ctx;
-+
-+	bch2_str_hash_init(&ctx, info);
-+	bch2_str_hash_update(&ctx, info, name->name, name->len);
-+
-+	/* [0,2) reserved for dots */
-+	return max_t(u64, bch2_str_hash_end(&ctx, info), 2);
-+}
-+
-+static u64 dirent_hash_key(const struct bch_hash_info *info, const void *key)
-+{
-+	return bch2_dirent_hash(info, key);
-+}
-+
-+static u64 dirent_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k)
-+{
-+	struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
-+	struct qstr name = QSTR_INIT(d.v->d_name, bch2_dirent_name_bytes(d));
-+
-+	return bch2_dirent_hash(info, &name);
-+}
-+
-+static bool dirent_cmp_key(struct bkey_s_c _l, const void *_r)
-+{
-+	struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l);
-+	int len = bch2_dirent_name_bytes(l);
-+	const struct qstr *r = _r;
-+
-+	return len - r->len ?: memcmp(l.v->d_name, r->name, len);
-+}
-+
-+static bool dirent_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r)
-+{
-+	struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l);
-+	struct bkey_s_c_dirent r = bkey_s_c_to_dirent(_r);
-+	int l_len = bch2_dirent_name_bytes(l);
-+	int r_len = bch2_dirent_name_bytes(r);
-+
-+	return l_len - r_len ?: memcmp(l.v->d_name, r.v->d_name, l_len);
-+}
-+
-+const struct bch_hash_desc bch2_dirent_hash_desc = {
-+	.btree_id	= BTREE_ID_DIRENTS,
-+	.key_type	= KEY_TYPE_dirent,
-+	.hash_key	= dirent_hash_key,
-+	.hash_bkey	= dirent_hash_bkey,
-+	.cmp_key	= dirent_cmp_key,
-+	.cmp_bkey	= dirent_cmp_bkey,
-+};
-+
-+const char *bch2_dirent_invalid(const struct bch_fs *c, struct bkey_s_c k)
-+{
-+	struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
-+	unsigned len;
-+
-+	if (bkey_val_bytes(k.k) < sizeof(struct bch_dirent))
-+		return "value too small";
-+
-+	len = bch2_dirent_name_bytes(d);
-+	if (!len)
-+		return "empty name";
-+
-+	/*
-+	 * older versions of bcachefs were buggy and creating dirent
-+	 * keys that were bigger than necessary:
-+	 */
-+	if (bkey_val_u64s(k.k) > dirent_val_u64s(len + 7))
-+		return "value too big";
-+
-+	if (len > BCH_NAME_MAX)
-+		return "dirent name too big";
-+
-+	return NULL;
-+}
-+
-+void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c,
-+			 struct bkey_s_c k)
-+{
-+	struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
-+
-+	bch_scnmemcpy(out, d.v->d_name,
-+		      bch2_dirent_name_bytes(d));
-+	pr_buf(out, " -> %llu type %u", d.v->d_inum, d.v->d_type);
-+}
-+
-+static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
-+				u8 type, const struct qstr *name, u64 dst)
-+{
-+	struct bkey_i_dirent *dirent;
-+	unsigned u64s = BKEY_U64s + dirent_val_u64s(name->len);
-+
-+	if (name->len > BCH_NAME_MAX)
-+		return ERR_PTR(-ENAMETOOLONG);
-+
-+	BUG_ON(u64s > U8_MAX);
-+
-+	dirent = bch2_trans_kmalloc(trans, u64s * sizeof(u64));
-+	if (IS_ERR(dirent))
-+		return dirent;
-+
-+	bkey_dirent_init(&dirent->k_i);
-+	dirent->k.u64s = u64s;
-+	dirent->v.d_inum = cpu_to_le64(dst);
-+	dirent->v.d_type = type;
-+
-+	memcpy(dirent->v.d_name, name->name, name->len);
-+	memset(dirent->v.d_name + name->len, 0,
-+	       bkey_val_bytes(&dirent->k) -
-+	       offsetof(struct bch_dirent, d_name) -
-+	       name->len);
-+
-+	EBUG_ON(bch2_dirent_name_bytes(dirent_i_to_s_c(dirent)) != name->len);
-+
-+	return dirent;
-+}
-+
-+int bch2_dirent_create(struct btree_trans *trans,
-+		       u64 dir_inum, const struct bch_hash_info *hash_info,
-+		       u8 type, const struct qstr *name, u64 dst_inum,
-+		       int flags)
-+{
-+	struct bkey_i_dirent *dirent;
-+	int ret;
-+
-+	dirent = dirent_create_key(trans, type, name, dst_inum);
-+	ret = PTR_ERR_OR_ZERO(dirent);
-+	if (ret)
-+		return ret;
-+
-+	return bch2_hash_set(trans, bch2_dirent_hash_desc, hash_info,
-+			     dir_inum, &dirent->k_i, flags);
-+}
-+
-+static void dirent_copy_target(struct bkey_i_dirent *dst,
-+			       struct bkey_s_c_dirent src)
-+{
-+	dst->v.d_inum = src.v->d_inum;
-+	dst->v.d_type = src.v->d_type;
-+}
-+
-+int bch2_dirent_rename(struct btree_trans *trans,
-+		       u64 src_dir, struct bch_hash_info *src_hash,
-+		       u64 dst_dir, struct bch_hash_info *dst_hash,
-+		       const struct qstr *src_name, u64 *src_inum,
-+		       const struct qstr *dst_name, u64 *dst_inum,
-+		       enum bch_rename_mode mode)
-+{
-+	struct btree_iter *src_iter = NULL, *dst_iter = NULL;
-+	struct bkey_s_c old_src, old_dst;
-+	struct bkey_i_dirent *new_src = NULL, *new_dst = NULL;
-+	struct bpos dst_pos =
-+		POS(dst_dir, bch2_dirent_hash(dst_hash, dst_name));
-+	int ret = 0;
-+
-+	*src_inum = *dst_inum = 0;
-+
-+	/*
-+	 * Lookup dst:
-+	 *
-+	 * Note that in BCH_RENAME mode, we're _not_ checking if
-+	 * the target already exists - we're relying on the VFS
-+	 * to do that check for us for correctness:
-+	 */
-+	dst_iter = mode == BCH_RENAME
-+		? bch2_hash_hole(trans, bch2_dirent_hash_desc,
-+				 dst_hash, dst_dir, dst_name)
-+		: bch2_hash_lookup(trans, bch2_dirent_hash_desc,
-+				   dst_hash, dst_dir, dst_name,
-+				   BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(dst_iter);
-+	if (ret)
-+		goto out;
-+
-+	old_dst = bch2_btree_iter_peek_slot(dst_iter);
-+
-+	if (mode != BCH_RENAME)
-+		*dst_inum = le64_to_cpu(bkey_s_c_to_dirent(old_dst).v->d_inum);
-+
-+	/* Lookup src: */
-+	src_iter = bch2_hash_lookup(trans, bch2_dirent_hash_desc,
-+				    src_hash, src_dir, src_name,
-+				    BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(src_iter);
-+	if (ret)
-+		goto out;
-+
-+	old_src = bch2_btree_iter_peek_slot(src_iter);
-+	*src_inum = le64_to_cpu(bkey_s_c_to_dirent(old_src).v->d_inum);
-+
-+	/* Create new dst key: */
-+	new_dst = dirent_create_key(trans, 0, dst_name, 0);
-+	ret = PTR_ERR_OR_ZERO(new_dst);
-+	if (ret)
-+		goto out;
-+
-+	dirent_copy_target(new_dst, bkey_s_c_to_dirent(old_src));
-+	new_dst->k.p = dst_iter->pos;
-+
-+	/* Create new src key: */
-+	if (mode == BCH_RENAME_EXCHANGE) {
-+		new_src = dirent_create_key(trans, 0, src_name, 0);
-+		ret = PTR_ERR_OR_ZERO(new_src);
-+		if (ret)
-+			goto out;
-+
-+		dirent_copy_target(new_src, bkey_s_c_to_dirent(old_dst));
-+		new_src->k.p = src_iter->pos;
-+	} else {
-+		new_src = bch2_trans_kmalloc(trans, sizeof(struct bkey_i));
-+		ret = PTR_ERR_OR_ZERO(new_src);
-+		if (ret)
-+			goto out;
-+
-+		bkey_init(&new_src->k);
-+		new_src->k.p = src_iter->pos;
-+
-+		if (bkey_cmp(dst_pos, src_iter->pos) <= 0 &&
-+		    bkey_cmp(src_iter->pos, dst_iter->pos) < 0) {
-+			/*
-+			 * We have a hash collision for the new dst key,
-+			 * and new_src - the key we're deleting - is between
-+			 * new_dst's hashed slot and the slot we're going to be
-+			 * inserting it into - oops.  This will break the hash
-+			 * table if we don't deal with it:
-+			 */
-+			if (mode == BCH_RENAME) {
-+				/*
-+				 * If we're not overwriting, we can just insert
-+				 * new_dst at the src position:
-+				 */
-+				new_dst->k.p = src_iter->pos;
-+				bch2_trans_update(trans, src_iter,
-+						  &new_dst->k_i, 0);
-+				goto out;
-+			} else {
-+				/* If we're overwriting, we can't insert new_dst
-+				 * at a different slot because it has to
-+				 * overwrite old_dst - just make sure to use a
-+				 * whiteout when deleting src:
-+				 */
-+				new_src->k.type = KEY_TYPE_whiteout;
-+			}
-+		} else {
-+			/* Check if we need a whiteout to delete src: */
-+			ret = bch2_hash_needs_whiteout(trans, bch2_dirent_hash_desc,
-+						       src_hash, src_iter);
-+			if (ret < 0)
-+				goto out;
-+
-+			if (ret)
-+				new_src->k.type = KEY_TYPE_whiteout;
-+		}
-+	}
-+
-+	bch2_trans_update(trans, src_iter, &new_src->k_i, 0);
-+	bch2_trans_update(trans, dst_iter, &new_dst->k_i, 0);
-+out:
-+	bch2_trans_iter_put(trans, src_iter);
-+	bch2_trans_iter_put(trans, dst_iter);
-+	return ret;
-+}
-+
-+int bch2_dirent_delete_at(struct btree_trans *trans,
-+			  const struct bch_hash_info *hash_info,
-+			  struct btree_iter *iter)
-+{
-+	return bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
-+				   hash_info, iter);
-+}
-+
-+struct btree_iter *
-+__bch2_dirent_lookup_trans(struct btree_trans *trans, u64 dir_inum,
-+			   const struct bch_hash_info *hash_info,
-+			   const struct qstr *name, unsigned flags)
-+{
-+	return bch2_hash_lookup(trans, bch2_dirent_hash_desc,
-+				hash_info, dir_inum, name, flags);
-+}
-+
-+u64 bch2_dirent_lookup(struct bch_fs *c, u64 dir_inum,
-+		       const struct bch_hash_info *hash_info,
-+		       const struct qstr *name)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	u64 inum = 0;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = __bch2_dirent_lookup_trans(&trans, dir_inum,
-+					  hash_info, name, 0);
-+	if (IS_ERR(iter)) {
-+		BUG_ON(PTR_ERR(iter) == -EINTR);
-+		goto out;
-+	}
-+
-+	k = bch2_btree_iter_peek_slot(iter);
-+	inum = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum);
-+out:
-+	bch2_trans_exit(&trans);
-+	return inum;
-+}
-+
-+int bch2_empty_dir_trans(struct btree_trans *trans, u64 dir_inum)
-+{
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	for_each_btree_key(trans, iter, BTREE_ID_DIRENTS,
-+			   POS(dir_inum, 0), 0, k, ret) {
-+		if (k.k->p.inode > dir_inum)
-+			break;
-+
-+		if (k.k->type == KEY_TYPE_dirent) {
-+			ret = -ENOTEMPTY;
-+			break;
-+		}
-+	}
-+	bch2_trans_iter_put(trans, iter);
-+
-+	return ret;
-+}
-+
-+int bch2_readdir(struct bch_fs *c, u64 inum, struct dir_context *ctx)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct bkey_s_c_dirent dirent;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS,
-+			   POS(inum, ctx->pos), 0, k, ret) {
-+		if (k.k->p.inode > inum)
-+			break;
-+
-+		if (k.k->type != KEY_TYPE_dirent)
-+			continue;
-+
-+		dirent = bkey_s_c_to_dirent(k);
-+
-+		/*
-+		 * XXX: dir_emit() can fault and block, while we're holding
-+		 * locks
-+		 */
-+		ctx->pos = dirent.k->p.offset;
-+		if (!dir_emit(ctx, dirent.v->d_name,
-+			      bch2_dirent_name_bytes(dirent),
-+			      le64_to_cpu(dirent.v->d_inum),
-+			      dirent.v->d_type))
-+			break;
-+		ctx->pos = dirent.k->p.offset + 1;
-+	}
-+	ret = bch2_trans_exit(&trans) ?: ret;
-+
-+	return ret;
-+}
-diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h
-new file mode 100644
-index 000000000000..34769371dd13
---- /dev/null
-+++ b/fs/bcachefs/dirent.h
-@@ -0,0 +1,63 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_DIRENT_H
-+#define _BCACHEFS_DIRENT_H
-+
-+#include "str_hash.h"
-+
-+extern const struct bch_hash_desc bch2_dirent_hash_desc;
-+
-+const char *bch2_dirent_invalid(const struct bch_fs *, struct bkey_s_c);
-+void bch2_dirent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-+
-+#define bch2_bkey_ops_dirent (struct bkey_ops) {	\
-+	.key_invalid	= bch2_dirent_invalid,		\
-+	.val_to_text	= bch2_dirent_to_text,		\
-+}
-+
-+struct qstr;
-+struct file;
-+struct dir_context;
-+struct bch_fs;
-+struct bch_hash_info;
-+struct bch_inode_info;
-+
-+unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent);
-+
-+static inline unsigned dirent_val_u64s(unsigned len)
-+{
-+	return DIV_ROUND_UP(offsetof(struct bch_dirent, d_name) + len,
-+			    sizeof(u64));
-+}
-+
-+int bch2_dirent_create(struct btree_trans *, u64,
-+		       const struct bch_hash_info *, u8,
-+		       const struct qstr *, u64, int);
-+
-+int bch2_dirent_delete_at(struct btree_trans *,
-+			  const struct bch_hash_info *,
-+			  struct btree_iter *);
-+
-+enum bch_rename_mode {
-+	BCH_RENAME,
-+	BCH_RENAME_OVERWRITE,
-+	BCH_RENAME_EXCHANGE,
-+};
-+
-+int bch2_dirent_rename(struct btree_trans *,
-+		       u64, struct bch_hash_info *,
-+		       u64, struct bch_hash_info *,
-+		       const struct qstr *, u64 *,
-+		       const struct qstr *, u64 *,
-+		       enum bch_rename_mode);
-+
-+struct btree_iter *
-+__bch2_dirent_lookup_trans(struct btree_trans *, u64,
-+			   const struct bch_hash_info *,
-+			   const struct qstr *, unsigned);
-+u64 bch2_dirent_lookup(struct bch_fs *, u64, const struct bch_hash_info *,
-+		       const struct qstr *);
-+
-+int bch2_empty_dir_trans(struct btree_trans *, u64);
-+int bch2_readdir(struct bch_fs *, u64, struct dir_context *);
-+
-+#endif /* _BCACHEFS_DIRENT_H */
-diff --git a/fs/bcachefs/disk_groups.c b/fs/bcachefs/disk_groups.c
-new file mode 100644
-index 000000000000..c52b6faac9b4
---- /dev/null
-+++ b/fs/bcachefs/disk_groups.c
-@@ -0,0 +1,486 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "disk_groups.h"
-+#include "super-io.h"
-+
-+#include <linux/sort.h>
-+
-+static int group_cmp(const void *_l, const void *_r)
-+{
-+	const struct bch_disk_group *l = _l;
-+	const struct bch_disk_group *r = _r;
-+
-+	return ((BCH_GROUP_DELETED(l) > BCH_GROUP_DELETED(r)) -
-+		(BCH_GROUP_DELETED(l) < BCH_GROUP_DELETED(r))) ?:
-+		((BCH_GROUP_PARENT(l) > BCH_GROUP_PARENT(r)) -
-+		 (BCH_GROUP_PARENT(l) < BCH_GROUP_PARENT(r))) ?:
-+		strncmp(l->label, r->label, sizeof(l->label));
-+}
-+
-+static const char *bch2_sb_disk_groups_validate(struct bch_sb *sb,
-+						struct bch_sb_field *f)
-+{
-+	struct bch_sb_field_disk_groups *groups =
-+		field_to_type(f, disk_groups);
-+	struct bch_disk_group *g, *sorted = NULL;
-+	struct bch_sb_field_members *mi;
-+	struct bch_member *m;
-+	unsigned i, nr_groups, len;
-+	const char *err = NULL;
-+
-+	mi		= bch2_sb_get_members(sb);
-+	groups		= bch2_sb_get_disk_groups(sb);
-+	nr_groups	= disk_groups_nr(groups);
-+
-+	for (m = mi->members;
-+	     m < mi->members + sb->nr_devices;
-+	     m++) {
-+		unsigned g;
-+
-+		if (!BCH_MEMBER_GROUP(m))
-+			continue;
-+
-+		g = BCH_MEMBER_GROUP(m) - 1;
-+
-+		if (g >= nr_groups ||
-+		    BCH_GROUP_DELETED(&groups->entries[g]))
-+			return "disk has invalid group";
-+	}
-+
-+	if (!nr_groups)
-+		return NULL;
-+
-+	for (g = groups->entries;
-+	     g < groups->entries + nr_groups;
-+	     g++) {
-+		if (BCH_GROUP_DELETED(g))
-+			continue;
-+
-+		len = strnlen(g->label, sizeof(g->label));
-+		if (!len) {
-+			err = "group with empty label";
-+			goto err;
-+		}
-+	}
-+
-+	sorted = kmalloc_array(nr_groups, sizeof(*sorted), GFP_KERNEL);
-+	if (!sorted)
-+		return "cannot allocate memory";
-+
-+	memcpy(sorted, groups->entries, nr_groups * sizeof(*sorted));
-+	sort(sorted, nr_groups, sizeof(*sorted), group_cmp, NULL);
-+
-+	for (i = 0; i + 1 < nr_groups; i++)
-+		if (!BCH_GROUP_DELETED(sorted + i) &&
-+		    !group_cmp(sorted + i, sorted + i + 1)) {
-+			err = "duplicate groups";
-+			goto err;
-+		}
-+
-+	err = NULL;
-+err:
-+	kfree(sorted);
-+	return err;
-+}
-+
-+static void bch2_sb_disk_groups_to_text(struct printbuf *out,
-+					struct bch_sb *sb,
-+					struct bch_sb_field *f)
-+{
-+	struct bch_sb_field_disk_groups *groups =
-+		field_to_type(f, disk_groups);
-+	struct bch_disk_group *g;
-+	unsigned nr_groups = disk_groups_nr(groups);
-+
-+	for (g = groups->entries;
-+	     g < groups->entries + nr_groups;
-+	     g++) {
-+		if (g != groups->entries)
-+			pr_buf(out, " ");
-+
-+		if (BCH_GROUP_DELETED(g))
-+			pr_buf(out, "[deleted]");
-+		else
-+			pr_buf(out, "[parent %llu name %s]",
-+			       BCH_GROUP_PARENT(g), g->label);
-+	}
-+}
-+
-+const struct bch_sb_field_ops bch_sb_field_ops_disk_groups = {
-+	.validate	= bch2_sb_disk_groups_validate,
-+	.to_text	= bch2_sb_disk_groups_to_text
-+};
-+
-+int bch2_sb_disk_groups_to_cpu(struct bch_fs *c)
-+{
-+	struct bch_sb_field_members *mi;
-+	struct bch_sb_field_disk_groups *groups;
-+	struct bch_disk_groups_cpu *cpu_g, *old_g;
-+	unsigned i, g, nr_groups;
-+
-+	lockdep_assert_held(&c->sb_lock);
-+
-+	mi		= bch2_sb_get_members(c->disk_sb.sb);
-+	groups		= bch2_sb_get_disk_groups(c->disk_sb.sb);
-+	nr_groups	= disk_groups_nr(groups);
-+
-+	if (!groups)
-+		return 0;
-+
-+	cpu_g = kzalloc(sizeof(*cpu_g) +
-+			sizeof(cpu_g->entries[0]) * nr_groups, GFP_KERNEL);
-+	if (!cpu_g)
-+		return -ENOMEM;
-+
-+	cpu_g->nr = nr_groups;
-+
-+	for (i = 0; i < nr_groups; i++) {
-+		struct bch_disk_group *src	= &groups->entries[i];
-+		struct bch_disk_group_cpu *dst	= &cpu_g->entries[i];
-+
-+		dst->deleted	= BCH_GROUP_DELETED(src);
-+		dst->parent	= BCH_GROUP_PARENT(src);
-+	}
-+
-+	for (i = 0; i < c->disk_sb.sb->nr_devices; i++) {
-+		struct bch_member *m = mi->members + i;
-+		struct bch_disk_group_cpu *dst =
-+			&cpu_g->entries[BCH_MEMBER_GROUP(m)];
-+
-+		if (!bch2_member_exists(m))
-+			continue;
-+
-+		g = BCH_MEMBER_GROUP(m);
-+		while (g) {
-+			dst = &cpu_g->entries[g - 1];
-+			__set_bit(i, dst->devs.d);
-+			g = dst->parent;
-+		}
-+	}
-+
-+	old_g = rcu_dereference_protected(c->disk_groups,
-+				lockdep_is_held(&c->sb_lock));
-+	rcu_assign_pointer(c->disk_groups, cpu_g);
-+	if (old_g)
-+		kfree_rcu(old_g, rcu);
-+
-+	return 0;
-+}
-+
-+const struct bch_devs_mask *bch2_target_to_mask(struct bch_fs *c, unsigned target)
-+{
-+	struct target t = target_decode(target);
-+
-+	switch (t.type) {
-+	case TARGET_NULL:
-+		return NULL;
-+	case TARGET_DEV: {
-+		struct bch_dev *ca = t.dev < c->sb.nr_devices
-+			? rcu_dereference(c->devs[t.dev])
-+			: NULL;
-+		return ca ? &ca->self : NULL;
-+	}
-+	case TARGET_GROUP: {
-+		struct bch_disk_groups_cpu *g = rcu_dereference(c->disk_groups);
-+
-+		return g && t.group < g->nr && !g->entries[t.group].deleted
-+			? &g->entries[t.group].devs
-+			: NULL;
-+	}
-+	default:
-+		BUG();
-+	}
-+}
-+
-+bool bch2_dev_in_target(struct bch_fs *c, unsigned dev, unsigned target)
-+{
-+	struct target t = target_decode(target);
-+
-+	switch (t.type) {
-+	case TARGET_NULL:
-+		return false;
-+	case TARGET_DEV:
-+		return dev == t.dev;
-+	case TARGET_GROUP: {
-+		struct bch_disk_groups_cpu *g;
-+		const struct bch_devs_mask *m;
-+		bool ret;
-+
-+		rcu_read_lock();
-+		g = rcu_dereference(c->disk_groups);
-+		m = g && t.group < g->nr && !g->entries[t.group].deleted
-+			? &g->entries[t.group].devs
-+			: NULL;
-+
-+		ret = m ? test_bit(dev, m->d) : false;
-+		rcu_read_unlock();
-+
-+		return ret;
-+	}
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static int __bch2_disk_group_find(struct bch_sb_field_disk_groups *groups,
-+				  unsigned parent,
-+				  const char *name, unsigned namelen)
-+{
-+	unsigned i, nr_groups = disk_groups_nr(groups);
-+
-+	if (!namelen || namelen > BCH_SB_LABEL_SIZE)
-+		return -EINVAL;
-+
-+	for (i = 0; i < nr_groups; i++) {
-+		struct bch_disk_group *g = groups->entries + i;
-+
-+		if (BCH_GROUP_DELETED(g))
-+			continue;
-+
-+		if (!BCH_GROUP_DELETED(g) &&
-+		    BCH_GROUP_PARENT(g) == parent &&
-+		    strnlen(g->label, sizeof(g->label)) == namelen &&
-+		    !memcmp(name, g->label, namelen))
-+			return i;
-+	}
-+
-+	return -1;
-+}
-+
-+static int __bch2_disk_group_add(struct bch_sb_handle *sb, unsigned parent,
-+				 const char *name, unsigned namelen)
-+{
-+	struct bch_sb_field_disk_groups *groups =
-+		bch2_sb_get_disk_groups(sb->sb);
-+	unsigned i, nr_groups = disk_groups_nr(groups);
-+	struct bch_disk_group *g;
-+
-+	if (!namelen || namelen > BCH_SB_LABEL_SIZE)
-+		return -EINVAL;
-+
-+	for (i = 0;
-+	     i < nr_groups && !BCH_GROUP_DELETED(&groups->entries[i]);
-+	     i++)
-+		;
-+
-+	if (i == nr_groups) {
-+		unsigned u64s =
-+			(sizeof(struct bch_sb_field_disk_groups) +
-+			 sizeof(struct bch_disk_group) * (nr_groups + 1)) /
-+			sizeof(u64);
-+
-+		groups = bch2_sb_resize_disk_groups(sb, u64s);
-+		if (!groups)
-+			return -ENOSPC;
-+
-+		nr_groups = disk_groups_nr(groups);
-+	}
-+
-+	BUG_ON(i >= nr_groups);
-+
-+	g = &groups->entries[i];
-+
-+	memcpy(g->label, name, namelen);
-+	if (namelen < sizeof(g->label))
-+		g->label[namelen] = '\0';
-+	SET_BCH_GROUP_DELETED(g, 0);
-+	SET_BCH_GROUP_PARENT(g, parent);
-+	SET_BCH_GROUP_DATA_ALLOWED(g, ~0);
-+
-+	return i;
-+}
-+
-+int bch2_disk_path_find(struct bch_sb_handle *sb, const char *name)
-+{
-+	struct bch_sb_field_disk_groups *groups =
-+		bch2_sb_get_disk_groups(sb->sb);
-+	int v = -1;
-+
-+	do {
-+		const char *next = strchrnul(name, '.');
-+		unsigned len = next - name;
-+
-+		if (*next == '.')
-+			next++;
-+
-+		v = __bch2_disk_group_find(groups, v + 1, name, len);
-+		name = next;
-+	} while (*name && v >= 0);
-+
-+	return v;
-+}
-+
-+int bch2_disk_path_find_or_create(struct bch_sb_handle *sb, const char *name)
-+{
-+	struct bch_sb_field_disk_groups *groups;
-+	unsigned parent = 0;
-+	int v = -1;
-+
-+	do {
-+		const char *next = strchrnul(name, '.');
-+		unsigned len = next - name;
-+
-+		if (*next == '.')
-+			next++;
-+
-+		groups = bch2_sb_get_disk_groups(sb->sb);
-+
-+		v = __bch2_disk_group_find(groups, parent, name, len);
-+		if (v < 0)
-+			v = __bch2_disk_group_add(sb, parent, name, len);
-+		if (v < 0)
-+			return v;
-+
-+		parent = v + 1;
-+		name = next;
-+	} while (*name && v >= 0);
-+
-+	return v;
-+}
-+
-+void bch2_disk_path_to_text(struct printbuf *out,
-+			    struct bch_sb_handle *sb,
-+			    unsigned v)
-+{
-+	struct bch_sb_field_disk_groups *groups =
-+		bch2_sb_get_disk_groups(sb->sb);
-+	struct bch_disk_group *g;
-+	unsigned nr = 0;
-+	u16 path[32];
-+
-+	while (1) {
-+		if (nr == ARRAY_SIZE(path))
-+			goto inval;
-+
-+		if (v >= disk_groups_nr(groups))
-+			goto inval;
-+
-+		g = groups->entries + v;
-+
-+		if (BCH_GROUP_DELETED(g))
-+			goto inval;
-+
-+		path[nr++] = v;
-+
-+		if (!BCH_GROUP_PARENT(g))
-+			break;
-+
-+		v = BCH_GROUP_PARENT(g) - 1;
-+	}
-+
-+	while (nr) {
-+		v = path[--nr];
-+		g = groups->entries + v;
-+
-+		bch_scnmemcpy(out, g->label,
-+			      strnlen(g->label, sizeof(g->label)));
-+
-+		if (nr)
-+			pr_buf(out, ".");
-+	}
-+	return;
-+inval:
-+	pr_buf(out, "invalid group %u", v);
-+}
-+
-+int bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name)
-+{
-+	struct bch_member *mi;
-+	int v = -1;
-+	int ret = 0;
-+
-+	mutex_lock(&c->sb_lock);
-+
-+	if (!strlen(name) || !strcmp(name, "none"))
-+		goto write_sb;
-+
-+	v = bch2_disk_path_find_or_create(&c->disk_sb, name);
-+	if (v < 0) {
-+		mutex_unlock(&c->sb_lock);
-+		return v;
-+	}
-+
-+	ret = bch2_sb_disk_groups_to_cpu(c);
-+	if (ret)
-+		goto unlock;
-+write_sb:
-+	mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
-+	SET_BCH_MEMBER_GROUP(mi, v + 1);
-+
-+	bch2_write_super(c);
-+unlock:
-+	mutex_unlock(&c->sb_lock);
-+
-+	return ret;
-+}
-+
-+int bch2_opt_target_parse(struct bch_fs *c, const char *buf, u64 *v)
-+{
-+	struct bch_dev *ca;
-+	int g;
-+
-+	if (!strlen(buf) || !strcmp(buf, "none")) {
-+		*v = 0;
-+		return 0;
-+	}
-+
-+	/* Is it a device? */
-+	ca = bch2_dev_lookup(c, buf);
-+	if (!IS_ERR(ca)) {
-+		*v = dev_to_target(ca->dev_idx);
-+		percpu_ref_put(&ca->ref);
-+		return 0;
-+	}
-+
-+	mutex_lock(&c->sb_lock);
-+	g = bch2_disk_path_find(&c->disk_sb, buf);
-+	mutex_unlock(&c->sb_lock);
-+
-+	if (g >= 0) {
-+		*v = group_to_target(g);
-+		return 0;
-+	}
-+
-+	return -EINVAL;
-+}
-+
-+void bch2_opt_target_to_text(struct printbuf *out, struct bch_fs *c, u64 v)
-+{
-+	struct target t = target_decode(v);
-+
-+	switch (t.type) {
-+	case TARGET_NULL:
-+		pr_buf(out, "none");
-+		break;
-+	case TARGET_DEV: {
-+		struct bch_dev *ca;
-+
-+		rcu_read_lock();
-+		ca = t.dev < c->sb.nr_devices
-+			? rcu_dereference(c->devs[t.dev])
-+			: NULL;
-+
-+		if (ca && percpu_ref_tryget(&ca->io_ref)) {
-+			char b[BDEVNAME_SIZE];
-+
-+			pr_buf(out, "/dev/%s",
-+			     bdevname(ca->disk_sb.bdev, b));
-+			percpu_ref_put(&ca->io_ref);
-+		} else if (ca) {
-+			pr_buf(out, "offline device %u", t.dev);
-+		} else {
-+			pr_buf(out, "invalid device %u", t.dev);
-+		}
-+
-+		rcu_read_unlock();
-+		break;
-+	}
-+	case TARGET_GROUP:
-+		mutex_lock(&c->sb_lock);
-+		bch2_disk_path_to_text(out, &c->disk_sb, t.group);
-+		mutex_unlock(&c->sb_lock);
-+		break;
-+	default:
-+		BUG();
-+	}
-+}
-diff --git a/fs/bcachefs/disk_groups.h b/fs/bcachefs/disk_groups.h
-new file mode 100644
-index 000000000000..3d84f23c34ed
---- /dev/null
-+++ b/fs/bcachefs/disk_groups.h
-@@ -0,0 +1,91 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_DISK_GROUPS_H
-+#define _BCACHEFS_DISK_GROUPS_H
-+
-+extern const struct bch_sb_field_ops bch_sb_field_ops_disk_groups;
-+
-+static inline unsigned disk_groups_nr(struct bch_sb_field_disk_groups *groups)
-+{
-+	return groups
-+		? (vstruct_end(&groups->field) -
-+		   (void *) &groups->entries[0]) / sizeof(struct bch_disk_group)
-+		: 0;
-+}
-+
-+struct target {
-+	enum {
-+		TARGET_NULL,
-+		TARGET_DEV,
-+		TARGET_GROUP,
-+	}			type;
-+	union {
-+		unsigned	dev;
-+		unsigned	group;
-+	};
-+};
-+
-+#define TARGET_DEV_START	1
-+#define TARGET_GROUP_START	(256 + TARGET_DEV_START)
-+
-+static inline u16 dev_to_target(unsigned dev)
-+{
-+	return TARGET_DEV_START + dev;
-+}
-+
-+static inline u16 group_to_target(unsigned group)
-+{
-+	return TARGET_GROUP_START + group;
-+}
-+
-+static inline struct target target_decode(unsigned target)
-+{
-+	if (target >= TARGET_GROUP_START)
-+		return (struct target) {
-+			.type	= TARGET_GROUP,
-+			.group	= target - TARGET_GROUP_START
-+		};
-+
-+	if (target >= TARGET_DEV_START)
-+		return (struct target) {
-+			.type	= TARGET_DEV,
-+			.group	= target - TARGET_DEV_START
-+		};
-+
-+	return (struct target) { .type = TARGET_NULL };
-+}
-+
-+const struct bch_devs_mask *bch2_target_to_mask(struct bch_fs *, unsigned);
-+
-+static inline struct bch_devs_mask target_rw_devs(struct bch_fs *c,
-+						  enum bch_data_type data_type,
-+						  u16 target)
-+{
-+	struct bch_devs_mask devs = c->rw_devs[data_type];
-+	const struct bch_devs_mask *t = bch2_target_to_mask(c, target);
-+
-+	if (t)
-+		bitmap_and(devs.d, devs.d, t->d, BCH_SB_MEMBERS_MAX);
-+	return devs;
-+}
-+
-+bool bch2_dev_in_target(struct bch_fs *, unsigned, unsigned);
-+
-+int bch2_disk_path_find(struct bch_sb_handle *, const char *);
-+
-+/* Exported for userspace bcachefs-tools: */
-+int bch2_disk_path_find_or_create(struct bch_sb_handle *, const char *);
-+
-+void bch2_disk_path_to_text(struct printbuf *, struct bch_sb_handle *,
-+			    unsigned);
-+
-+int bch2_opt_target_parse(struct bch_fs *, const char *, u64 *);
-+void bch2_opt_target_to_text(struct printbuf *, struct bch_fs *, u64);
-+
-+int bch2_sb_disk_groups_to_cpu(struct bch_fs *);
-+
-+int bch2_dev_group_set(struct bch_fs *, struct bch_dev *, const char *);
-+
-+const char *bch2_sb_validate_disk_groups(struct bch_sb *,
-+					 struct bch_sb_field *);
-+
-+#endif /* _BCACHEFS_DISK_GROUPS_H */
-diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
-new file mode 100644
-index 000000000000..eac750ad2240
---- /dev/null
-+++ b/fs/bcachefs/ec.c
-@@ -0,0 +1,1636 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+/* erasure coding */
-+
-+#include "bcachefs.h"
-+#include "alloc_foreground.h"
-+#include "bkey_on_stack.h"
-+#include "bset.h"
-+#include "btree_gc.h"
-+#include "btree_update.h"
-+#include "buckets.h"
-+#include "disk_groups.h"
-+#include "ec.h"
-+#include "error.h"
-+#include "io.h"
-+#include "keylist.h"
-+#include "recovery.h"
-+#include "super-io.h"
-+#include "util.h"
-+
-+#include <linux/sort.h>
-+
-+#ifdef __KERNEL__
-+
-+#include <linux/raid/pq.h>
-+#include <linux/raid/xor.h>
-+
-+static void raid5_recov(unsigned disks, unsigned failed_idx,
-+			size_t size, void **data)
-+{
-+	unsigned i = 2, nr;
-+
-+	BUG_ON(failed_idx >= disks);
-+
-+	swap(data[0], data[failed_idx]);
-+	memcpy(data[0], data[1], size);
-+
-+	while (i < disks) {
-+		nr = min_t(unsigned, disks - i, MAX_XOR_BLOCKS);
-+		xor_blocks(nr, size, data[0], data + i);
-+		i += nr;
-+	}
-+
-+	swap(data[0], data[failed_idx]);
-+}
-+
-+static void raid_gen(int nd, int np, size_t size, void **v)
-+{
-+	if (np >= 1)
-+		raid5_recov(nd + np, nd, size, v);
-+	if (np >= 2)
-+		raid6_call.gen_syndrome(nd + np, size, v);
-+	BUG_ON(np > 2);
-+}
-+
-+static void raid_rec(int nr, int *ir, int nd, int np, size_t size, void **v)
-+{
-+	switch (nr) {
-+	case 0:
-+		break;
-+	case 1:
-+		if (ir[0] < nd + 1)
-+			raid5_recov(nd + 1, ir[0], size, v);
-+		else
-+			raid6_call.gen_syndrome(nd + np, size, v);
-+		break;
-+	case 2:
-+		if (ir[1] < nd) {
-+			/* data+data failure. */
-+			raid6_2data_recov(nd + np, size, ir[0], ir[1], v);
-+		} else if (ir[0] < nd) {
-+			/* data + p/q failure */
-+
-+			if (ir[1] == nd) /* data + p failure */
-+				raid6_datap_recov(nd + np, size, ir[0], v);
-+			else { /* data + q failure */
-+				raid5_recov(nd + 1, ir[0], size, v);
-+				raid6_call.gen_syndrome(nd + np, size, v);
-+			}
-+		} else {
-+			raid_gen(nd, np, size, v);
-+		}
-+		break;
-+	default:
-+		BUG();
-+	}
-+}
-+
-+#else
-+
-+#include <raid/raid.h>
-+
-+#endif
-+
-+struct ec_bio {
-+	struct bch_dev		*ca;
-+	struct ec_stripe_buf	*buf;
-+	size_t			idx;
-+	struct bio		bio;
-+};
-+
-+/* Stripes btree keys: */
-+
-+const char *bch2_stripe_invalid(const struct bch_fs *c, struct bkey_s_c k)
-+{
-+	const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
-+
-+	if (k.k->p.inode)
-+		return "invalid stripe key";
-+
-+	if (bkey_val_bytes(k.k) < sizeof(*s))
-+		return "incorrect value size";
-+
-+	if (bkey_val_bytes(k.k) < sizeof(*s) ||
-+	    bkey_val_u64s(k.k) < stripe_val_u64s(s))
-+		return "incorrect value size";
-+
-+	return bch2_bkey_ptrs_invalid(c, k);
-+}
-+
-+void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c,
-+			 struct bkey_s_c k)
-+{
-+	const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
-+	unsigned i;
-+
-+	pr_buf(out, "algo %u sectors %u blocks %u:%u csum %u gran %u",
-+	       s->algorithm,
-+	       le16_to_cpu(s->sectors),
-+	       s->nr_blocks - s->nr_redundant,
-+	       s->nr_redundant,
-+	       s->csum_type,
-+	       1U << s->csum_granularity_bits);
-+
-+	for (i = 0; i < s->nr_blocks; i++)
-+		pr_buf(out, " %u:%llu:%u", s->ptrs[i].dev,
-+		       (u64) s->ptrs[i].offset,
-+		       stripe_blockcount_get(s, i));
-+}
-+
-+static int ptr_matches_stripe(struct bch_fs *c,
-+			      struct bch_stripe *v,
-+			      const struct bch_extent_ptr *ptr)
-+{
-+	unsigned i;
-+
-+	for (i = 0; i < v->nr_blocks - v->nr_redundant; i++) {
-+		const struct bch_extent_ptr *ptr2 = v->ptrs + i;
-+
-+		if (ptr->dev == ptr2->dev &&
-+		    ptr->gen == ptr2->gen &&
-+		    ptr->offset >= ptr2->offset &&
-+		    ptr->offset <  ptr2->offset + le16_to_cpu(v->sectors))
-+			return i;
-+	}
-+
-+	return -1;
-+}
-+
-+static int extent_matches_stripe(struct bch_fs *c,
-+				 struct bch_stripe *v,
-+				 struct bkey_s_c k)
-+{
-+
-+	switch (k.k->type) {
-+	case KEY_TYPE_extent: {
-+		struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
-+		const struct bch_extent_ptr *ptr;
-+		int idx;
-+
-+		extent_for_each_ptr(e, ptr) {
-+			idx = ptr_matches_stripe(c, v, ptr);
-+			if (idx >= 0)
-+				return idx;
-+		}
-+		break;
-+	}
-+	}
-+
-+	return -1;
-+}
-+
-+static bool extent_has_stripe_ptr(struct bkey_s_c k, u64 idx)
-+{
-+	switch (k.k->type) {
-+	case KEY_TYPE_extent: {
-+		struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
-+		const union bch_extent_entry *entry;
-+
-+		extent_for_each_entry(e, entry)
-+			if (extent_entry_type(entry) ==
-+			    BCH_EXTENT_ENTRY_stripe_ptr &&
-+			    entry->stripe_ptr.idx == idx)
-+				return true;
-+
-+		break;
-+	}
-+	}
-+
-+	return false;
-+}
-+
-+/* Checksumming: */
-+
-+static void ec_generate_checksums(struct ec_stripe_buf *buf)
-+{
-+	struct bch_stripe *v = &buf->key.v;
-+	unsigned csum_granularity = 1 << v->csum_granularity_bits;
-+	unsigned csums_per_device = stripe_csums_per_device(v);
-+	unsigned csum_bytes = bch_crc_bytes[v->csum_type];
-+	unsigned i, j;
-+
-+	if (!csum_bytes)
-+		return;
-+
-+	BUG_ON(buf->offset);
-+	BUG_ON(buf->size != le16_to_cpu(v->sectors));
-+
-+	for (i = 0; i < v->nr_blocks; i++) {
-+		for (j = 0; j < csums_per_device; j++) {
-+			unsigned offset = j << v->csum_granularity_bits;
-+			unsigned len = min(csum_granularity, buf->size - offset);
-+
-+			struct bch_csum csum =
-+				bch2_checksum(NULL, v->csum_type,
-+					      null_nonce(),
-+					      buf->data[i] + (offset << 9),
-+					      len << 9);
-+
-+			memcpy(stripe_csum(v, i, j), &csum, csum_bytes);
-+		}
-+	}
-+}
-+
-+static void ec_validate_checksums(struct bch_fs *c, struct ec_stripe_buf *buf)
-+{
-+	struct bch_stripe *v = &buf->key.v;
-+	unsigned csum_granularity = 1 << v->csum_granularity_bits;
-+	unsigned csum_bytes = bch_crc_bytes[v->csum_type];
-+	unsigned i;
-+
-+	if (!csum_bytes)
-+		return;
-+
-+	for (i = 0; i < v->nr_blocks; i++) {
-+		unsigned offset = buf->offset;
-+		unsigned end = buf->offset + buf->size;
-+
-+		if (!test_bit(i, buf->valid))
-+			continue;
-+
-+		while (offset < end) {
-+			unsigned j = offset >> v->csum_granularity_bits;
-+			unsigned len = min(csum_granularity, end - offset);
-+			struct bch_csum csum;
-+
-+			BUG_ON(offset & (csum_granularity - 1));
-+			BUG_ON(offset + len != le16_to_cpu(v->sectors) &&
-+			       ((offset + len) & (csum_granularity - 1)));
-+
-+			csum = bch2_checksum(NULL, v->csum_type,
-+					     null_nonce(),
-+					     buf->data[i] + ((offset - buf->offset) << 9),
-+					     len << 9);
-+
-+			if (memcmp(stripe_csum(v, i, j), &csum, csum_bytes)) {
-+				__bcache_io_error(c,
-+					"checksum error while doing reconstruct read (%u:%u)",
-+					i, j);
-+				clear_bit(i, buf->valid);
-+				break;
-+			}
-+
-+			offset += len;
-+		}
-+	}
-+}
-+
-+/* Erasure coding: */
-+
-+static void ec_generate_ec(struct ec_stripe_buf *buf)
-+{
-+	struct bch_stripe *v = &buf->key.v;
-+	unsigned nr_data = v->nr_blocks - v->nr_redundant;
-+	unsigned bytes = le16_to_cpu(v->sectors) << 9;
-+
-+	raid_gen(nr_data, v->nr_redundant, bytes, buf->data);
-+}
-+
-+static unsigned __ec_nr_failed(struct ec_stripe_buf *buf, unsigned nr)
-+{
-+	return nr - bitmap_weight(buf->valid, nr);
-+}
-+
-+static unsigned ec_nr_failed(struct ec_stripe_buf *buf)
-+{
-+	return __ec_nr_failed(buf, buf->key.v.nr_blocks);
-+}
-+
-+static int ec_do_recov(struct bch_fs *c, struct ec_stripe_buf *buf)
-+{
-+	struct bch_stripe *v = &buf->key.v;
-+	unsigned i, failed[EC_STRIPE_MAX], nr_failed = 0;
-+	unsigned nr_data = v->nr_blocks - v->nr_redundant;
-+	unsigned bytes = buf->size << 9;
-+
-+	if (ec_nr_failed(buf) > v->nr_redundant) {
-+		__bcache_io_error(c,
-+			"error doing reconstruct read: unable to read enough blocks");
-+		return -1;
-+	}
-+
-+	for (i = 0; i < nr_data; i++)
-+		if (!test_bit(i, buf->valid))
-+			failed[nr_failed++] = i;
-+
-+	raid_rec(nr_failed, failed, nr_data, v->nr_redundant, bytes, buf->data);
-+	return 0;
-+}
-+
-+/* IO: */
-+
-+static void ec_block_endio(struct bio *bio)
-+{
-+	struct ec_bio *ec_bio = container_of(bio, struct ec_bio, bio);
-+	struct bch_dev *ca = ec_bio->ca;
-+	struct closure *cl = bio->bi_private;
-+
-+	if (bch2_dev_io_err_on(bio->bi_status, ca, "erasure coding %s: %s",
-+			       bio_data_dir(bio) ? "write" : "read",
-+			       bch2_blk_status_to_str(bio->bi_status)))
-+		clear_bit(ec_bio->idx, ec_bio->buf->valid);
-+
-+	bio_put(&ec_bio->bio);
-+	percpu_ref_put(&ca->io_ref);
-+	closure_put(cl);
-+}
-+
-+static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf,
-+			unsigned rw, unsigned idx, struct closure *cl)
-+{
-+	struct bch_stripe *v = &buf->key.v;
-+	unsigned offset = 0, bytes = buf->size << 9;
-+	struct bch_extent_ptr *ptr = &v->ptrs[idx];
-+	struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-+
-+	if (!bch2_dev_get_ioref(ca, rw)) {
-+		clear_bit(idx, buf->valid);
-+		return;
-+	}
-+
-+	while (offset < bytes) {
-+		unsigned nr_iovecs = min_t(size_t, BIO_MAX_PAGES,
-+					   DIV_ROUND_UP(bytes, PAGE_SIZE));
-+		unsigned b = min_t(size_t, bytes - offset,
-+				   nr_iovecs << PAGE_SHIFT);
-+		struct ec_bio *ec_bio;
-+
-+		ec_bio = container_of(bio_alloc_bioset(GFP_KERNEL, nr_iovecs,
-+						       &c->ec_bioset),
-+				      struct ec_bio, bio);
-+
-+		ec_bio->ca			= ca;
-+		ec_bio->buf			= buf;
-+		ec_bio->idx			= idx;
-+
-+		bio_set_dev(&ec_bio->bio, ca->disk_sb.bdev);
-+		bio_set_op_attrs(&ec_bio->bio, rw, 0);
-+
-+		ec_bio->bio.bi_iter.bi_sector	= ptr->offset + buf->offset + (offset >> 9);
-+		ec_bio->bio.bi_end_io		= ec_block_endio;
-+		ec_bio->bio.bi_private		= cl;
-+
-+		bch2_bio_map(&ec_bio->bio, buf->data[idx] + offset, b);
-+
-+		closure_get(cl);
-+		percpu_ref_get(&ca->io_ref);
-+
-+		submit_bio(&ec_bio->bio);
-+
-+		offset += b;
-+	}
-+
-+	percpu_ref_put(&ca->io_ref);
-+}
-+
-+/* recovery read path: */
-+int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct ec_stripe_buf *buf;
-+	struct closure cl;
-+	struct bkey_s_c k;
-+	struct bch_stripe *v;
-+	unsigned stripe_idx;
-+	unsigned offset, end;
-+	unsigned i, nr_data, csum_granularity;
-+	int ret = 0, idx;
-+
-+	closure_init_stack(&cl);
-+
-+	BUG_ON(!rbio->pick.has_ec);
-+
-+	stripe_idx = rbio->pick.ec.idx;
-+
-+	buf = kzalloc(sizeof(*buf), GFP_NOIO);
-+	if (!buf)
-+		return -ENOMEM;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_EC,
-+				   POS(0, stripe_idx),
-+				   BTREE_ITER_SLOTS);
-+	k = bch2_btree_iter_peek_slot(iter);
-+	if (bkey_err(k) || k.k->type != KEY_TYPE_stripe) {
-+		__bcache_io_error(c,
-+			"error doing reconstruct read: stripe not found");
-+		kfree(buf);
-+		return bch2_trans_exit(&trans) ?: -EIO;
-+	}
-+
-+	bkey_reassemble(&buf->key.k_i, k);
-+	bch2_trans_exit(&trans);
-+
-+	v = &buf->key.v;
-+
-+	nr_data = v->nr_blocks - v->nr_redundant;
-+
-+	idx = ptr_matches_stripe(c, v, &rbio->pick.ptr);
-+	BUG_ON(idx < 0);
-+
-+	csum_granularity = 1U << v->csum_granularity_bits;
-+
-+	offset	= rbio->bio.bi_iter.bi_sector - v->ptrs[idx].offset;
-+	end	= offset + bio_sectors(&rbio->bio);
-+
-+	BUG_ON(end > le16_to_cpu(v->sectors));
-+
-+	buf->offset	= round_down(offset, csum_granularity);
-+	buf->size	= min_t(unsigned, le16_to_cpu(v->sectors),
-+				round_up(end, csum_granularity)) - buf->offset;
-+
-+	for (i = 0; i < v->nr_blocks; i++) {
-+		buf->data[i] = kmalloc(buf->size << 9, GFP_NOIO);
-+		if (!buf->data[i]) {
-+			ret = -ENOMEM;
-+			goto err;
-+		}
-+	}
-+
-+	memset(buf->valid, 0xFF, sizeof(buf->valid));
-+
-+	for (i = 0; i < v->nr_blocks; i++) {
-+		struct bch_extent_ptr *ptr = v->ptrs + i;
-+		struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-+
-+		if (ptr_stale(ca, ptr)) {
-+			__bcache_io_error(c,
-+					  "error doing reconstruct read: stale pointer");
-+			clear_bit(i, buf->valid);
-+			continue;
-+		}
-+
-+		ec_block_io(c, buf, REQ_OP_READ, i, &cl);
-+	}
-+
-+	closure_sync(&cl);
-+
-+	if (ec_nr_failed(buf) > v->nr_redundant) {
-+		__bcache_io_error(c,
-+			"error doing reconstruct read: unable to read enough blocks");
-+		ret = -EIO;
-+		goto err;
-+	}
-+
-+	ec_validate_checksums(c, buf);
-+
-+	ret = ec_do_recov(c, buf);
-+	if (ret)
-+		goto err;
-+
-+	memcpy_to_bio(&rbio->bio, rbio->bio.bi_iter,
-+		      buf->data[idx] + ((offset - buf->offset) << 9));
-+err:
-+	for (i = 0; i < v->nr_blocks; i++)
-+		kfree(buf->data[i]);
-+	kfree(buf);
-+	return ret;
-+}
-+
-+/* stripe bucket accounting: */
-+
-+static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp)
-+{
-+	ec_stripes_heap n, *h = &c->ec_stripes_heap;
-+
-+	if (idx >= h->size) {
-+		if (!init_heap(&n, max(1024UL, roundup_pow_of_two(idx + 1)), gfp))
-+			return -ENOMEM;
-+
-+		spin_lock(&c->ec_stripes_heap_lock);
-+		if (n.size > h->size) {
-+			memcpy(n.data, h->data, h->used * sizeof(h->data[0]));
-+			n.used = h->used;
-+			swap(*h, n);
-+		}
-+		spin_unlock(&c->ec_stripes_heap_lock);
-+
-+		free_heap(&n);
-+	}
-+
-+	if (!genradix_ptr_alloc(&c->stripes[0], idx, gfp))
-+		return -ENOMEM;
-+
-+	if (c->gc_pos.phase != GC_PHASE_NOT_RUNNING &&
-+	    !genradix_ptr_alloc(&c->stripes[1], idx, gfp))
-+		return -ENOMEM;
-+
-+	return 0;
-+}
-+
-+static int ec_stripe_mem_alloc(struct bch_fs *c,
-+			       struct btree_iter *iter)
-+{
-+	size_t idx = iter->pos.offset;
-+	int ret = 0;
-+
-+	if (!__ec_stripe_mem_alloc(c, idx, GFP_NOWAIT|__GFP_NOWARN))
-+		return ret;
-+
-+	bch2_trans_unlock(iter->trans);
-+	ret = -EINTR;
-+
-+	if (!__ec_stripe_mem_alloc(c, idx, GFP_KERNEL))
-+		return ret;
-+
-+	return -ENOMEM;
-+}
-+
-+static ssize_t stripe_idx_to_delete(struct bch_fs *c)
-+{
-+	ec_stripes_heap *h = &c->ec_stripes_heap;
-+
-+	return h->used && h->data[0].blocks_nonempty == 0
-+		? h->data[0].idx : -1;
-+}
-+
-+static inline int ec_stripes_heap_cmp(ec_stripes_heap *h,
-+				      struct ec_stripe_heap_entry l,
-+				      struct ec_stripe_heap_entry r)
-+{
-+	return ((l.blocks_nonempty > r.blocks_nonempty) -
-+		(l.blocks_nonempty < r.blocks_nonempty));
-+}
-+
-+static inline void ec_stripes_heap_set_backpointer(ec_stripes_heap *h,
-+						   size_t i)
-+{
-+	struct bch_fs *c = container_of(h, struct bch_fs, ec_stripes_heap);
-+
-+	genradix_ptr(&c->stripes[0], h->data[i].idx)->heap_idx = i;
-+}
-+
-+static void heap_verify_backpointer(struct bch_fs *c, size_t idx)
-+{
-+	ec_stripes_heap *h = &c->ec_stripes_heap;
-+	struct stripe *m = genradix_ptr(&c->stripes[0], idx);
-+
-+	BUG_ON(!m->alive);
-+	BUG_ON(m->heap_idx >= h->used);
-+	BUG_ON(h->data[m->heap_idx].idx != idx);
-+}
-+
-+void bch2_stripes_heap_del(struct bch_fs *c,
-+			   struct stripe *m, size_t idx)
-+{
-+	if (!m->on_heap)
-+		return;
-+
-+	m->on_heap = false;
-+
-+	heap_verify_backpointer(c, idx);
-+
-+	heap_del(&c->ec_stripes_heap, m->heap_idx,
-+		 ec_stripes_heap_cmp,
-+		 ec_stripes_heap_set_backpointer);
-+}
-+
-+void bch2_stripes_heap_insert(struct bch_fs *c,
-+			      struct stripe *m, size_t idx)
-+{
-+	if (m->on_heap)
-+		return;
-+
-+	BUG_ON(heap_full(&c->ec_stripes_heap));
-+
-+	m->on_heap = true;
-+
-+	heap_add(&c->ec_stripes_heap, ((struct ec_stripe_heap_entry) {
-+			.idx = idx,
-+			.blocks_nonempty = m->blocks_nonempty,
-+		}),
-+		 ec_stripes_heap_cmp,
-+		 ec_stripes_heap_set_backpointer);
-+
-+	heap_verify_backpointer(c, idx);
-+}
-+
-+void bch2_stripes_heap_update(struct bch_fs *c,
-+			      struct stripe *m, size_t idx)
-+{
-+	ec_stripes_heap *h = &c->ec_stripes_heap;
-+	size_t i;
-+
-+	if (!m->on_heap)
-+		return;
-+
-+	heap_verify_backpointer(c, idx);
-+
-+	h->data[m->heap_idx].blocks_nonempty = m->blocks_nonempty;
-+
-+	i = m->heap_idx;
-+	heap_sift_up(h,	  i, ec_stripes_heap_cmp,
-+		     ec_stripes_heap_set_backpointer);
-+	heap_sift_down(h, i, ec_stripes_heap_cmp,
-+		       ec_stripes_heap_set_backpointer);
-+
-+	heap_verify_backpointer(c, idx);
-+
-+	if (stripe_idx_to_delete(c) >= 0 &&
-+	    !percpu_ref_is_dying(&c->writes))
-+		schedule_work(&c->ec_stripe_delete_work);
-+}
-+
-+/* stripe deletion */
-+
-+static int ec_stripe_delete(struct bch_fs *c, size_t idx)
-+{
-+	//pr_info("deleting stripe %zu", idx);
-+	return bch2_btree_delete_range(c, BTREE_ID_EC,
-+				       POS(0, idx),
-+				       POS(0, idx + 1),
-+				       NULL);
-+}
-+
-+static void ec_stripe_delete_work(struct work_struct *work)
-+{
-+	struct bch_fs *c =
-+		container_of(work, struct bch_fs, ec_stripe_delete_work);
-+	ssize_t idx;
-+
-+	while (1) {
-+		spin_lock(&c->ec_stripes_heap_lock);
-+		idx = stripe_idx_to_delete(c);
-+		if (idx < 0) {
-+			spin_unlock(&c->ec_stripes_heap_lock);
-+			break;
-+		}
-+
-+		bch2_stripes_heap_del(c, genradix_ptr(&c->stripes[0], idx), idx);
-+		spin_unlock(&c->ec_stripes_heap_lock);
-+
-+		if (ec_stripe_delete(c, idx))
-+			break;
-+	}
-+}
-+
-+/* stripe creation: */
-+
-+static int ec_stripe_bkey_insert(struct bch_fs *c,
-+				 struct bkey_i_stripe *stripe)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct bpos start_pos = POS(0, c->ec_stripe_hint);
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+retry:
-+	bch2_trans_begin(&trans);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_EC, start_pos,
-+			   BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
-+		if (bkey_cmp(k.k->p, POS(0, U32_MAX)) > 0) {
-+			if (start_pos.offset) {
-+				start_pos = POS_MIN;
-+				bch2_btree_iter_set_pos(iter, start_pos);
-+				continue;
-+			}
-+
-+			ret = -ENOSPC;
-+			break;
-+		}
-+
-+		if (bkey_deleted(k.k))
-+			goto found_slot;
-+	}
-+
-+	goto err;
-+found_slot:
-+	start_pos = iter->pos;
-+
-+	ret = ec_stripe_mem_alloc(c, iter);
-+	if (ret)
-+		goto err;
-+
-+	stripe->k.p = iter->pos;
-+
-+	bch2_trans_update(&trans, iter, &stripe->k_i, 0);
-+
-+	ret = bch2_trans_commit(&trans, NULL, NULL,
-+				BTREE_INSERT_NOFAIL);
-+err:
-+	bch2_trans_iter_put(&trans, iter);
-+
-+	if (ret == -EINTR)
-+		goto retry;
-+
-+	c->ec_stripe_hint = ret ? start_pos.offset : start_pos.offset + 1;
-+	bch2_trans_exit(&trans);
-+
-+	return ret;
-+}
-+
-+static void extent_stripe_ptr_add(struct bkey_s_extent e,
-+				  struct ec_stripe_buf *s,
-+				  struct bch_extent_ptr *ptr,
-+				  unsigned block)
-+{
-+	struct bch_extent_stripe_ptr *dst = (void *) ptr;
-+	union bch_extent_entry *end = extent_entry_last(e);
-+
-+	memmove_u64s_up(dst + 1, dst, (u64 *) end - (u64 *) dst);
-+	e.k->u64s += sizeof(*dst) / sizeof(u64);
-+
-+	*dst = (struct bch_extent_stripe_ptr) {
-+		.type = 1 << BCH_EXTENT_ENTRY_stripe_ptr,
-+		.block		= block,
-+		.idx		= s->key.k.p.offset,
-+	};
-+}
-+
-+static int ec_stripe_update_ptrs(struct bch_fs *c,
-+				 struct ec_stripe_buf *s,
-+				 struct bkey *pos)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct bkey_s_extent e;
-+	struct bkey_on_stack sk;
-+	int ret = 0, dev, idx;
-+
-+	bkey_on_stack_init(&sk);
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-+
-+	/* XXX this doesn't support the reflink btree */
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
-+				   bkey_start_pos(pos),
-+				   BTREE_ITER_INTENT);
-+
-+	while ((k = bch2_btree_iter_peek(iter)).k &&
-+	       !(ret = bkey_err(k)) &&
-+	       bkey_cmp(bkey_start_pos(k.k), pos->p) < 0) {
-+		struct bch_extent_ptr *ptr, *ec_ptr = NULL;
-+
-+		if (extent_has_stripe_ptr(k, s->key.k.p.offset)) {
-+			bch2_btree_iter_next(iter);
-+			continue;
-+		}
-+
-+		idx = extent_matches_stripe(c, &s->key.v, k);
-+		if (idx < 0) {
-+			bch2_btree_iter_next(iter);
-+			continue;
-+		}
-+
-+		dev = s->key.v.ptrs[idx].dev;
-+
-+		bkey_on_stack_reassemble(&sk, c, k);
-+		e = bkey_i_to_s_extent(sk.k);
-+
-+		bch2_bkey_drop_ptrs(e.s, ptr, ptr->dev != dev);
-+		ec_ptr = (void *) bch2_bkey_has_device(e.s_c, dev);
-+		BUG_ON(!ec_ptr);
-+
-+		extent_stripe_ptr_add(e, s, ec_ptr, idx);
-+
-+		bch2_btree_iter_set_pos(iter, bkey_start_pos(&sk.k->k));
-+		bch2_trans_update(&trans, iter, sk.k, 0);
-+
-+		ret = bch2_trans_commit(&trans, NULL, NULL,
-+					BTREE_INSERT_NOFAIL|
-+					BTREE_INSERT_USE_RESERVE);
-+		if (ret == -EINTR)
-+			ret = 0;
-+		if (ret)
-+			break;
-+	}
-+
-+	bch2_trans_exit(&trans);
-+	bkey_on_stack_exit(&sk, c);
-+
-+	return ret;
-+}
-+
-+/*
-+ * data buckets of new stripe all written: create the stripe
-+ */
-+static void ec_stripe_create(struct ec_stripe_new *s)
-+{
-+	struct bch_fs *c = s->c;
-+	struct open_bucket *ob;
-+	struct bkey_i *k;
-+	struct stripe *m;
-+	struct bch_stripe *v = &s->stripe.key.v;
-+	unsigned i, nr_data = v->nr_blocks - v->nr_redundant;
-+	struct closure cl;
-+	int ret;
-+
-+	BUG_ON(s->h->s == s);
-+
-+	closure_init_stack(&cl);
-+
-+	if (s->err) {
-+		if (s->err != -EROFS)
-+			bch_err(c, "error creating stripe: error writing data buckets");
-+		goto err;
-+	}
-+
-+	BUG_ON(!s->allocated);
-+
-+	if (!percpu_ref_tryget(&c->writes))
-+		goto err;
-+
-+	BUG_ON(bitmap_weight(s->blocks_allocated,
-+			     s->blocks.nr) != s->blocks.nr);
-+
-+	ec_generate_ec(&s->stripe);
-+
-+	ec_generate_checksums(&s->stripe);
-+
-+	/* write p/q: */
-+	for (i = nr_data; i < v->nr_blocks; i++)
-+		ec_block_io(c, &s->stripe, REQ_OP_WRITE, i, &cl);
-+
-+	closure_sync(&cl);
-+
-+	for (i = nr_data; i < v->nr_blocks; i++)
-+		if (!test_bit(i, s->stripe.valid)) {
-+			bch_err(c, "error creating stripe: error writing redundancy buckets");
-+			goto err_put_writes;
-+		}
-+
-+	ret = s->existing_stripe
-+		? bch2_btree_insert(c, BTREE_ID_EC, &s->stripe.key.k_i,
-+				    NULL, NULL, BTREE_INSERT_NOFAIL)
-+		: ec_stripe_bkey_insert(c, &s->stripe.key);
-+	if (ret) {
-+		bch_err(c, "error creating stripe: error creating stripe key");
-+		goto err_put_writes;
-+	}
-+
-+	for_each_keylist_key(&s->keys, k) {
-+		ret = ec_stripe_update_ptrs(c, &s->stripe, &k->k);
-+		if (ret) {
-+			bch_err(c, "error creating stripe: error updating pointers");
-+			break;
-+		}
-+	}
-+
-+	spin_lock(&c->ec_stripes_heap_lock);
-+	m = genradix_ptr(&c->stripes[0], s->stripe.key.k.p.offset);
-+#if 0
-+	pr_info("created a %s stripe %llu",
-+		s->existing_stripe ? "existing" : "new",
-+		s->stripe.key.k.p.offset);
-+#endif
-+	BUG_ON(m->on_heap);
-+	bch2_stripes_heap_insert(c, m, s->stripe.key.k.p.offset);
-+	spin_unlock(&c->ec_stripes_heap_lock);
-+err_put_writes:
-+	percpu_ref_put(&c->writes);
-+err:
-+	open_bucket_for_each(c, &s->blocks, ob, i) {
-+		ob->ec = NULL;
-+		__bch2_open_bucket_put(c, ob);
-+	}
-+
-+	bch2_open_buckets_put(c, &s->parity);
-+
-+	bch2_keylist_free(&s->keys, s->inline_keys);
-+
-+	for (i = 0; i < s->stripe.key.v.nr_blocks; i++)
-+		kvpfree(s->stripe.data[i], s->stripe.size << 9);
-+	kfree(s);
-+}
-+
-+static void ec_stripe_create_work(struct work_struct *work)
-+{
-+	struct bch_fs *c = container_of(work,
-+		struct bch_fs, ec_stripe_create_work);
-+	struct ec_stripe_new *s, *n;
-+restart:
-+	mutex_lock(&c->ec_stripe_new_lock);
-+	list_for_each_entry_safe(s, n, &c->ec_stripe_new_list, list)
-+		if (!atomic_read(&s->pin)) {
-+			list_del(&s->list);
-+			mutex_unlock(&c->ec_stripe_new_lock);
-+			ec_stripe_create(s);
-+			goto restart;
-+		}
-+	mutex_unlock(&c->ec_stripe_new_lock);
-+}
-+
-+static void ec_stripe_new_put(struct bch_fs *c, struct ec_stripe_new *s)
-+{
-+	BUG_ON(atomic_read(&s->pin) <= 0);
-+
-+	if (atomic_dec_and_test(&s->pin)) {
-+		BUG_ON(!s->pending);
-+		queue_work(system_long_wq, &c->ec_stripe_create_work);
-+	}
-+}
-+
-+static void ec_stripe_set_pending(struct bch_fs *c, struct ec_stripe_head *h)
-+{
-+	struct ec_stripe_new *s = h->s;
-+
-+	BUG_ON(!s->allocated && !s->err);
-+
-+	h->s		= NULL;
-+	s->pending	= true;
-+
-+	mutex_lock(&c->ec_stripe_new_lock);
-+	list_add(&s->list, &c->ec_stripe_new_list);
-+	mutex_unlock(&c->ec_stripe_new_lock);
-+
-+	ec_stripe_new_put(c, s);
-+}
-+
-+/* have a full bucket - hand it off to be erasure coded: */
-+void bch2_ec_bucket_written(struct bch_fs *c, struct open_bucket *ob)
-+{
-+	struct ec_stripe_new *s = ob->ec;
-+
-+	if (ob->sectors_free)
-+		s->err = -1;
-+
-+	ec_stripe_new_put(c, s);
-+}
-+
-+void bch2_ec_bucket_cancel(struct bch_fs *c, struct open_bucket *ob)
-+{
-+	struct ec_stripe_new *s = ob->ec;
-+
-+	s->err = -EIO;
-+}
-+
-+void *bch2_writepoint_ec_buf(struct bch_fs *c, struct write_point *wp)
-+{
-+	struct open_bucket *ob = ec_open_bucket(c, &wp->ptrs);
-+	struct bch_dev *ca;
-+	unsigned offset;
-+
-+	if (!ob)
-+		return NULL;
-+
-+	ca	= bch_dev_bkey_exists(c, ob->ptr.dev);
-+	offset	= ca->mi.bucket_size - ob->sectors_free;
-+
-+	return ob->ec->stripe.data[ob->ec_idx] + (offset << 9);
-+}
-+
-+void bch2_ec_add_backpointer(struct bch_fs *c, struct write_point *wp,
-+			     struct bpos pos, unsigned sectors)
-+{
-+	struct open_bucket *ob = ec_open_bucket(c, &wp->ptrs);
-+	struct ec_stripe_new *ec;
-+
-+	if (!ob)
-+		return;
-+
-+	//pr_info("adding backpointer at %llu:%llu", pos.inode, pos.offset);
-+
-+	ec = ob->ec;
-+	mutex_lock(&ec->lock);
-+
-+	if (bch2_keylist_realloc(&ec->keys, ec->inline_keys,
-+				 ARRAY_SIZE(ec->inline_keys),
-+				 BKEY_U64s)) {
-+		BUG();
-+	}
-+
-+	bkey_init(&ec->keys.top->k);
-+	ec->keys.top->k.p	= pos;
-+	bch2_key_resize(&ec->keys.top->k, sectors);
-+	bch2_keylist_push(&ec->keys);
-+
-+	mutex_unlock(&ec->lock);
-+}
-+
-+static int unsigned_cmp(const void *_l, const void *_r)
-+{
-+	unsigned l = *((const unsigned *) _l);
-+	unsigned r = *((const unsigned *) _r);
-+
-+	return cmp_int(l, r);
-+}
-+
-+/* pick most common bucket size: */
-+static unsigned pick_blocksize(struct bch_fs *c,
-+			       struct bch_devs_mask *devs)
-+{
-+	struct bch_dev *ca;
-+	unsigned i, nr = 0, sizes[BCH_SB_MEMBERS_MAX];
-+	struct {
-+		unsigned nr, size;
-+	} cur = { 0, 0 }, best = { 0, 0 };
-+
-+	for_each_member_device_rcu(ca, c, i, devs)
-+		sizes[nr++] = ca->mi.bucket_size;
-+
-+	sort(sizes, nr, sizeof(unsigned), unsigned_cmp, NULL);
-+
-+	for (i = 0; i < nr; i++) {
-+		if (sizes[i] != cur.size) {
-+			if (cur.nr > best.nr)
-+				best = cur;
-+
-+			cur.nr = 0;
-+			cur.size = sizes[i];
-+		}
-+
-+		cur.nr++;
-+	}
-+
-+	if (cur.nr > best.nr)
-+		best = cur;
-+
-+	return best.size;
-+}
-+
-+static bool may_create_new_stripe(struct bch_fs *c)
-+{
-+	return false;
-+}
-+
-+static void ec_stripe_key_init(struct bch_fs *c,
-+			       struct bkey_i_stripe *s,
-+			       unsigned nr_data,
-+			       unsigned nr_parity,
-+			       unsigned stripe_size)
-+{
-+	unsigned u64s;
-+
-+	bkey_stripe_init(&s->k_i);
-+	s->v.sectors			= cpu_to_le16(stripe_size);
-+	s->v.algorithm			= 0;
-+	s->v.nr_blocks			= nr_data + nr_parity;
-+	s->v.nr_redundant		= nr_parity;
-+	s->v.csum_granularity_bits	= ilog2(c->sb.encoded_extent_max);
-+	s->v.csum_type			= BCH_CSUM_CRC32C;
-+	s->v.pad			= 0;
-+
-+	while ((u64s = stripe_val_u64s(&s->v)) > BKEY_VAL_U64s_MAX) {
-+		BUG_ON(1 << s->v.csum_granularity_bits >=
-+		       le16_to_cpu(s->v.sectors) ||
-+		       s->v.csum_granularity_bits == U8_MAX);
-+		s->v.csum_granularity_bits++;
-+	}
-+
-+	set_bkey_val_u64s(&s->k, u64s);
-+}
-+
-+static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
-+{
-+	struct ec_stripe_new *s;
-+	unsigned i;
-+
-+	lockdep_assert_held(&h->lock);
-+
-+	s = kzalloc(sizeof(*s), GFP_KERNEL);
-+	if (!s)
-+		return -ENOMEM;
-+
-+	mutex_init(&s->lock);
-+	atomic_set(&s->pin, 1);
-+	s->c		= c;
-+	s->h		= h;
-+	s->nr_data	= min_t(unsigned, h->nr_active_devs,
-+				EC_STRIPE_MAX) - h->redundancy;
-+	s->nr_parity	= h->redundancy;
-+
-+	bch2_keylist_init(&s->keys, s->inline_keys);
-+
-+	s->stripe.offset	= 0;
-+	s->stripe.size		= h->blocksize;
-+	memset(s->stripe.valid, 0xFF, sizeof(s->stripe.valid));
-+
-+	ec_stripe_key_init(c, &s->stripe.key, s->nr_data,
-+			   s->nr_parity, h->blocksize);
-+
-+	for (i = 0; i < s->stripe.key.v.nr_blocks; i++) {
-+		s->stripe.data[i] = kvpmalloc(s->stripe.size << 9, GFP_KERNEL);
-+		if (!s->stripe.data[i])
-+			goto err;
-+	}
-+
-+	h->s = s;
-+
-+	return 0;
-+err:
-+	for (i = 0; i < s->stripe.key.v.nr_blocks; i++)
-+		kvpfree(s->stripe.data[i], s->stripe.size << 9);
-+	kfree(s);
-+	return -ENOMEM;
-+}
-+
-+static struct ec_stripe_head *
-+ec_new_stripe_head_alloc(struct bch_fs *c, unsigned target,
-+			 unsigned algo, unsigned redundancy)
-+{
-+	struct ec_stripe_head *h;
-+	struct bch_dev *ca;
-+	unsigned i;
-+
-+	h = kzalloc(sizeof(*h), GFP_KERNEL);
-+	if (!h)
-+		return NULL;
-+
-+	mutex_init(&h->lock);
-+	mutex_lock(&h->lock);
-+
-+	h->target	= target;
-+	h->algo		= algo;
-+	h->redundancy	= redundancy;
-+
-+	rcu_read_lock();
-+	h->devs = target_rw_devs(c, BCH_DATA_user, target);
-+
-+	for_each_member_device_rcu(ca, c, i, &h->devs)
-+		if (!ca->mi.durability)
-+			__clear_bit(i, h->devs.d);
-+
-+	h->blocksize = pick_blocksize(c, &h->devs);
-+
-+	for_each_member_device_rcu(ca, c, i, &h->devs)
-+		if (ca->mi.bucket_size == h->blocksize)
-+			h->nr_active_devs++;
-+
-+	rcu_read_unlock();
-+	list_add(&h->list, &c->ec_stripe_head_list);
-+	return h;
-+}
-+
-+void bch2_ec_stripe_head_put(struct bch_fs *c, struct ec_stripe_head *h)
-+{
-+	if (h->s &&
-+	    h->s->allocated &&
-+	    bitmap_weight(h->s->blocks_allocated,
-+			  h->s->blocks.nr) == h->s->blocks.nr)
-+		ec_stripe_set_pending(c, h);
-+
-+	mutex_unlock(&h->lock);
-+}
-+
-+struct ec_stripe_head *__bch2_ec_stripe_head_get(struct bch_fs *c,
-+					       unsigned target,
-+					       unsigned algo,
-+					       unsigned redundancy)
-+{
-+	struct ec_stripe_head *h;
-+
-+	if (!redundancy)
-+		return NULL;
-+
-+	mutex_lock(&c->ec_stripe_head_lock);
-+	list_for_each_entry(h, &c->ec_stripe_head_list, list)
-+		if (h->target		== target &&
-+		    h->algo		== algo &&
-+		    h->redundancy	== redundancy) {
-+			mutex_lock(&h->lock);
-+			goto found;
-+		}
-+
-+	h = ec_new_stripe_head_alloc(c, target, algo, redundancy);
-+found:
-+	mutex_unlock(&c->ec_stripe_head_lock);
-+	return h;
-+}
-+
-+/*
-+ * XXX: use a higher watermark for allocating open buckets here:
-+ */
-+static int new_stripe_alloc_buckets(struct bch_fs *c, struct ec_stripe_head *h)
-+{
-+	struct bch_devs_mask devs;
-+	struct open_bucket *ob;
-+	unsigned i, nr_have, nr_data =
-+		min_t(unsigned, h->nr_active_devs,
-+		      EC_STRIPE_MAX) - h->redundancy;
-+	bool have_cache = true;
-+	int ret = 0;
-+
-+	devs = h->devs;
-+
-+	for_each_set_bit(i, h->s->blocks_allocated, EC_STRIPE_MAX) {
-+		__clear_bit(h->s->stripe.key.v.ptrs[i].dev, devs.d);
-+		--nr_data;
-+	}
-+
-+	BUG_ON(h->s->blocks.nr > nr_data);
-+	BUG_ON(h->s->parity.nr > h->redundancy);
-+
-+	open_bucket_for_each(c, &h->s->parity, ob, i)
-+		__clear_bit(ob->ptr.dev, devs.d);
-+	open_bucket_for_each(c, &h->s->blocks, ob, i)
-+		__clear_bit(ob->ptr.dev, devs.d);
-+
-+	percpu_down_read(&c->mark_lock);
-+	rcu_read_lock();
-+
-+	if (h->s->parity.nr < h->redundancy) {
-+		nr_have = h->s->parity.nr;
-+
-+		ret = bch2_bucket_alloc_set(c, &h->s->parity,
-+					    &h->parity_stripe,
-+					    &devs,
-+					    h->redundancy,
-+					    &nr_have,
-+					    &have_cache,
-+					    RESERVE_NONE,
-+					    0,
-+					    NULL);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	if (h->s->blocks.nr < nr_data) {
-+		nr_have = h->s->blocks.nr;
-+
-+		ret = bch2_bucket_alloc_set(c, &h->s->blocks,
-+					    &h->block_stripe,
-+					    &devs,
-+					    nr_data,
-+					    &nr_have,
-+					    &have_cache,
-+					    RESERVE_NONE,
-+					    0,
-+					    NULL);
-+		if (ret)
-+			goto err;
-+	}
-+err:
-+	rcu_read_unlock();
-+	percpu_up_read(&c->mark_lock);
-+	return ret;
-+}
-+
-+/* XXX: doesn't obey target: */
-+static s64 get_existing_stripe(struct bch_fs *c,
-+			       unsigned target,
-+			       unsigned algo,
-+			       unsigned redundancy)
-+{
-+	ec_stripes_heap *h = &c->ec_stripes_heap;
-+	struct stripe *m;
-+	size_t heap_idx;
-+	u64 stripe_idx;
-+
-+	if (may_create_new_stripe(c))
-+		return -1;
-+
-+	spin_lock(&c->ec_stripes_heap_lock);
-+	for (heap_idx = 0; heap_idx < h->used; heap_idx++) {
-+		if (!h->data[heap_idx].blocks_nonempty)
-+			continue;
-+
-+		stripe_idx = h->data[heap_idx].idx;
-+		m = genradix_ptr(&c->stripes[0], stripe_idx);
-+
-+		if (m->algorithm	== algo &&
-+		    m->nr_redundant	== redundancy &&
-+		    m->blocks_nonempty	< m->nr_blocks - m->nr_redundant) {
-+			bch2_stripes_heap_del(c, m, stripe_idx);
-+			spin_unlock(&c->ec_stripes_heap_lock);
-+			return stripe_idx;
-+		}
-+	}
-+
-+	spin_unlock(&c->ec_stripes_heap_lock);
-+	return -1;
-+}
-+
-+static int get_stripe_key(struct bch_fs *c, u64 idx, struct ec_stripe_buf *stripe)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_EC, POS(0, idx), BTREE_ITER_SLOTS);
-+	k = bch2_btree_iter_peek_slot(iter);
-+	ret = bkey_err(k);
-+	if (!ret)
-+		bkey_reassemble(&stripe->key.k_i, k);
-+	bch2_trans_exit(&trans);
-+
-+	return ret;
-+}
-+
-+struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
-+					       unsigned target,
-+					       unsigned algo,
-+					       unsigned redundancy)
-+{
-+	struct closure cl;
-+	struct ec_stripe_head *h;
-+	struct open_bucket *ob;
-+	unsigned i, data_idx = 0;
-+	s64 idx;
-+
-+	closure_init_stack(&cl);
-+
-+	h = __bch2_ec_stripe_head_get(c, target, algo, redundancy);
-+	if (!h)
-+		return NULL;
-+
-+	if (!h->s && ec_new_stripe_alloc(c, h)) {
-+		bch2_ec_stripe_head_put(c, h);
-+		return NULL;
-+	}
-+
-+	if (!h->s->allocated) {
-+		if (!h->s->existing_stripe &&
-+		    (idx = get_existing_stripe(c, target, algo, redundancy)) >= 0) {
-+			//pr_info("got existing stripe %llu", idx);
-+
-+			h->s->existing_stripe = true;
-+			h->s->existing_stripe_idx = idx;
-+			if (get_stripe_key(c, idx, &h->s->stripe)) {
-+				/* btree error */
-+				BUG();
-+			}
-+
-+			for (i = 0; i < h->s->stripe.key.v.nr_blocks; i++)
-+				if (stripe_blockcount_get(&h->s->stripe.key.v, i)) {
-+					__set_bit(i, h->s->blocks_allocated);
-+					ec_block_io(c, &h->s->stripe, READ, i, &cl);
-+				}
-+		}
-+
-+		if (new_stripe_alloc_buckets(c, h)) {
-+			bch2_ec_stripe_head_put(c, h);
-+			h = NULL;
-+			goto out;
-+		}
-+
-+		open_bucket_for_each(c, &h->s->blocks, ob, i) {
-+			data_idx = find_next_zero_bit(h->s->blocks_allocated,
-+						      h->s->nr_data, data_idx);
-+			BUG_ON(data_idx >= h->s->nr_data);
-+
-+			h->s->stripe.key.v.ptrs[data_idx] = ob->ptr;
-+			h->s->data_block_idx[i] = data_idx;
-+			data_idx++;
-+		}
-+
-+		open_bucket_for_each(c, &h->s->parity, ob, i)
-+			h->s->stripe.key.v.ptrs[h->s->nr_data + i] = ob->ptr;
-+
-+		//pr_info("new stripe, blocks_allocated %lx", h->s->blocks_allocated[0]);
-+		h->s->allocated = true;
-+	}
-+out:
-+	closure_sync(&cl);
-+	return h;
-+}
-+
-+void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	struct ec_stripe_head *h;
-+	struct open_bucket *ob;
-+	unsigned i;
-+
-+	mutex_lock(&c->ec_stripe_head_lock);
-+	list_for_each_entry(h, &c->ec_stripe_head_list, list) {
-+
-+		mutex_lock(&h->lock);
-+		if (!h->s)
-+			goto unlock;
-+
-+		open_bucket_for_each(c, &h->s->blocks, ob, i)
-+			if (ob->ptr.dev == ca->dev_idx)
-+				goto found;
-+		open_bucket_for_each(c, &h->s->parity, ob, i)
-+			if (ob->ptr.dev == ca->dev_idx)
-+				goto found;
-+		goto unlock;
-+found:
-+		h->s->err = -EROFS;
-+		ec_stripe_set_pending(c, h);
-+unlock:
-+		mutex_unlock(&h->lock);
-+	}
-+	mutex_unlock(&c->ec_stripe_head_lock);
-+}
-+
-+static int __bch2_stripe_write_key(struct btree_trans *trans,
-+				   struct btree_iter *iter,
-+				   struct stripe *m,
-+				   size_t idx,
-+				   struct bkey_i_stripe *new_key)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct bkey_s_c k;
-+	unsigned i;
-+	int ret;
-+
-+	bch2_btree_iter_set_pos(iter, POS(0, idx));
-+
-+	k = bch2_btree_iter_peek_slot(iter);
-+	ret = bkey_err(k);
-+	if (ret)
-+		return ret;
-+
-+	if (k.k->type != KEY_TYPE_stripe)
-+		return -EIO;
-+
-+	bkey_reassemble(&new_key->k_i, k);
-+
-+	spin_lock(&c->ec_stripes_heap_lock);
-+
-+	for (i = 0; i < new_key->v.nr_blocks; i++)
-+		stripe_blockcount_set(&new_key->v, i,
-+				      m->block_sectors[i]);
-+	m->dirty = false;
-+
-+	spin_unlock(&c->ec_stripes_heap_lock);
-+
-+	bch2_trans_update(trans, iter, &new_key->k_i, 0);
-+	return 0;
-+}
-+
-+int bch2_stripes_write(struct bch_fs *c, unsigned flags)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct genradix_iter giter;
-+	struct bkey_i_stripe *new_key;
-+	struct stripe *m;
-+	int ret = 0;
-+
-+	new_key = kmalloc(255 * sizeof(u64), GFP_KERNEL);
-+	BUG_ON(!new_key);
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_EC, POS_MIN,
-+				   BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
-+
-+	genradix_for_each(&c->stripes[0], giter, m) {
-+		if (!m->dirty)
-+			continue;
-+
-+		ret = __bch2_trans_do(&trans, NULL, NULL,
-+				      BTREE_INSERT_NOFAIL|flags,
-+			__bch2_stripe_write_key(&trans, iter, m,
-+					giter.pos, new_key));
-+
-+		if (ret)
-+			break;
-+	}
-+
-+	bch2_trans_exit(&trans);
-+
-+	kfree(new_key);
-+
-+	return ret;
-+}
-+
-+static int bch2_stripes_read_fn(struct bch_fs *c, enum btree_id id,
-+			      unsigned level, struct bkey_s_c k)
-+{
-+	int ret = 0;
-+
-+	if (k.k->type == KEY_TYPE_stripe) {
-+		struct stripe *m;
-+
-+		ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL) ?:
-+			bch2_mark_key(c, k, 0, 0, NULL, 0,
-+				      BTREE_TRIGGER_NOATOMIC);
-+		if (ret)
-+			return ret;
-+
-+		spin_lock(&c->ec_stripes_heap_lock);
-+		m = genradix_ptr(&c->stripes[0], k.k->p.offset);
-+		bch2_stripes_heap_insert(c, m, k.k->p.offset);
-+		spin_unlock(&c->ec_stripes_heap_lock);
-+	}
-+
-+	return ret;
-+}
-+
-+int bch2_stripes_read(struct bch_fs *c, struct journal_keys *journal_keys)
-+{
-+	int ret = bch2_btree_and_journal_walk(c, journal_keys, BTREE_ID_EC,
-+					  NULL, bch2_stripes_read_fn);
-+	if (ret)
-+		bch_err(c, "error reading stripes: %i", ret);
-+
-+	return ret;
-+}
-+
-+int bch2_ec_mem_alloc(struct bch_fs *c, bool gc)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	size_t i, idx = 0;
-+	int ret = 0;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_EC, POS(0, U64_MAX), 0);
-+
-+	k = bch2_btree_iter_prev(iter);
-+	if (!IS_ERR_OR_NULL(k.k))
-+		idx = k.k->p.offset + 1;
-+	ret = bch2_trans_exit(&trans);
-+	if (ret)
-+		return ret;
-+
-+	if (!idx)
-+		return 0;
-+
-+	if (!gc &&
-+	    !init_heap(&c->ec_stripes_heap, roundup_pow_of_two(idx),
-+		       GFP_KERNEL))
-+		return -ENOMEM;
-+#if 0
-+	ret = genradix_prealloc(&c->stripes[gc], idx, GFP_KERNEL);
-+#else
-+	for (i = 0; i < idx; i++)
-+		if (!genradix_ptr_alloc(&c->stripes[gc], i, GFP_KERNEL))
-+			return -ENOMEM;
-+#endif
-+	return 0;
-+}
-+
-+void bch2_stripes_heap_to_text(struct printbuf *out, struct bch_fs *c)
-+{
-+	ec_stripes_heap *h = &c->ec_stripes_heap;
-+	struct stripe *m;
-+	size_t i;
-+
-+	spin_lock(&c->ec_stripes_heap_lock);
-+	for (i = 0; i < min(h->used, 20UL); i++) {
-+		m = genradix_ptr(&c->stripes[0], h->data[i].idx);
-+
-+		pr_buf(out, "%zu %u/%u+%u\n", h->data[i].idx,
-+		       h->data[i].blocks_nonempty,
-+		       m->nr_blocks - m->nr_redundant,
-+		       m->nr_redundant);
-+	}
-+	spin_unlock(&c->ec_stripes_heap_lock);
-+}
-+
-+void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c)
-+{
-+	struct ec_stripe_head *h;
-+	struct ec_stripe_new *s;
-+
-+	mutex_lock(&c->ec_stripe_head_lock);
-+	list_for_each_entry(h, &c->ec_stripe_head_list, list) {
-+		pr_buf(out, "target %u algo %u redundancy %u:\n",
-+		       h->target, h->algo, h->redundancy);
-+
-+		if (h->s)
-+			pr_buf(out, "\tpending: blocks %u allocated %u\n",
-+			       h->s->blocks.nr,
-+			       bitmap_weight(h->s->blocks_allocated,
-+					     h->s->blocks.nr));
-+	}
-+	mutex_unlock(&c->ec_stripe_head_lock);
-+
-+	mutex_lock(&c->ec_stripe_new_lock);
-+	list_for_each_entry(s, &c->ec_stripe_new_list, list) {
-+		pr_buf(out, "\tin flight: blocks %u allocated %u pin %u\n",
-+		       s->blocks.nr,
-+		       bitmap_weight(s->blocks_allocated,
-+				     s->blocks.nr),
-+		       atomic_read(&s->pin));
-+	}
-+	mutex_unlock(&c->ec_stripe_new_lock);
-+}
-+
-+void bch2_fs_ec_exit(struct bch_fs *c)
-+{
-+	struct ec_stripe_head *h;
-+
-+	while (1) {
-+		mutex_lock(&c->ec_stripe_head_lock);
-+		h = list_first_entry_or_null(&c->ec_stripe_head_list,
-+					     struct ec_stripe_head, list);
-+		if (h)
-+			list_del(&h->list);
-+		mutex_unlock(&c->ec_stripe_head_lock);
-+		if (!h)
-+			break;
-+
-+		BUG_ON(h->s);
-+		kfree(h);
-+	}
-+
-+	BUG_ON(!list_empty(&c->ec_stripe_new_list));
-+
-+	free_heap(&c->ec_stripes_heap);
-+	genradix_free(&c->stripes[0]);
-+	bioset_exit(&c->ec_bioset);
-+}
-+
-+int bch2_fs_ec_init(struct bch_fs *c)
-+{
-+	INIT_WORK(&c->ec_stripe_create_work, ec_stripe_create_work);
-+	INIT_WORK(&c->ec_stripe_delete_work, ec_stripe_delete_work);
-+
-+	return bioset_init(&c->ec_bioset, 1, offsetof(struct ec_bio, bio),
-+			   BIOSET_NEED_BVECS);
-+}
-diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h
-new file mode 100644
-index 000000000000..6db16cf768da
---- /dev/null
-+++ b/fs/bcachefs/ec.h
-@@ -0,0 +1,169 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_EC_H
-+#define _BCACHEFS_EC_H
-+
-+#include "ec_types.h"
-+#include "keylist_types.h"
-+
-+const char *bch2_stripe_invalid(const struct bch_fs *, struct bkey_s_c);
-+void bch2_stripe_to_text(struct printbuf *, struct bch_fs *,
-+			 struct bkey_s_c);
-+
-+#define bch2_bkey_ops_stripe (struct bkey_ops) {	\
-+	.key_invalid	= bch2_stripe_invalid,		\
-+	.val_to_text	= bch2_stripe_to_text,		\
-+	.swab		= bch2_ptr_swab,		\
-+}
-+
-+static inline unsigned stripe_csums_per_device(const struct bch_stripe *s)
-+{
-+	return DIV_ROUND_UP(le16_to_cpu(s->sectors),
-+			    1 << s->csum_granularity_bits);
-+}
-+
-+static inline unsigned stripe_csum_offset(const struct bch_stripe *s,
-+					  unsigned dev, unsigned csum_idx)
-+{
-+	unsigned csum_bytes = bch_crc_bytes[s->csum_type];
-+
-+	return sizeof(struct bch_stripe) +
-+		sizeof(struct bch_extent_ptr) * s->nr_blocks +
-+		(dev * stripe_csums_per_device(s) + csum_idx) * csum_bytes;
-+}
-+
-+static inline unsigned stripe_blockcount_offset(const struct bch_stripe *s,
-+						unsigned idx)
-+{
-+	return stripe_csum_offset(s, s->nr_blocks, 0) +
-+		sizeof(u16) * idx;
-+}
-+
-+static inline unsigned stripe_blockcount_get(const struct bch_stripe *s,
-+					     unsigned idx)
-+{
-+	return le16_to_cpup((void *) s + stripe_blockcount_offset(s, idx));
-+}
-+
-+static inline void stripe_blockcount_set(struct bch_stripe *s,
-+					 unsigned idx, unsigned v)
-+{
-+	__le16 *p = (void *) s + stripe_blockcount_offset(s, idx);
-+
-+	*p = cpu_to_le16(v);
-+}
-+
-+static inline unsigned stripe_val_u64s(const struct bch_stripe *s)
-+{
-+	return DIV_ROUND_UP(stripe_blockcount_offset(s, s->nr_blocks),
-+			    sizeof(u64));
-+}
-+
-+static inline void *stripe_csum(struct bch_stripe *s,
-+				unsigned dev, unsigned csum_idx)
-+{
-+	return (void *) s + stripe_csum_offset(s, dev, csum_idx);
-+}
-+
-+struct bch_read_bio;
-+
-+struct ec_stripe_buf {
-+	/* might not be buffering the entire stripe: */
-+	unsigned		offset;
-+	unsigned		size;
-+	unsigned long		valid[BITS_TO_LONGS(EC_STRIPE_MAX)];
-+
-+	void			*data[EC_STRIPE_MAX];
-+
-+	union {
-+		struct bkey_i_stripe	key;
-+		u64			pad[255];
-+	};
-+};
-+
-+struct ec_stripe_head;
-+
-+struct ec_stripe_new {
-+	struct bch_fs		*c;
-+	struct ec_stripe_head	*h;
-+	struct mutex		lock;
-+	struct list_head	list;
-+
-+	/* counts in flight writes, stripe is created when pin == 0 */
-+	atomic_t		pin;
-+
-+	int			err;
-+
-+	u8			nr_data;
-+	u8			nr_parity;
-+	bool			allocated;
-+	bool			pending;
-+	bool			existing_stripe;
-+	u64			existing_stripe_idx;
-+
-+	unsigned long		blocks_allocated[BITS_TO_LONGS(EC_STRIPE_MAX)];
-+
-+	struct open_buckets	blocks;
-+	u8			data_block_idx[EC_STRIPE_MAX];
-+	struct open_buckets	parity;
-+
-+	struct keylist		keys;
-+	u64			inline_keys[BKEY_U64s * 8];
-+
-+	struct ec_stripe_buf	stripe;
-+};
-+
-+struct ec_stripe_head {
-+	struct list_head	list;
-+	struct mutex		lock;
-+
-+	unsigned		target;
-+	unsigned		algo;
-+	unsigned		redundancy;
-+
-+	struct bch_devs_mask	devs;
-+	unsigned		nr_active_devs;
-+
-+	unsigned		blocksize;
-+
-+	struct dev_stripe_state	block_stripe;
-+	struct dev_stripe_state	parity_stripe;
-+
-+	struct ec_stripe_new	*s;
-+};
-+
-+int bch2_ec_read_extent(struct bch_fs *, struct bch_read_bio *);
-+
-+void *bch2_writepoint_ec_buf(struct bch_fs *, struct write_point *);
-+void bch2_ec_add_backpointer(struct bch_fs *, struct write_point *,
-+			     struct bpos, unsigned);
-+
-+void bch2_ec_bucket_written(struct bch_fs *, struct open_bucket *);
-+void bch2_ec_bucket_cancel(struct bch_fs *, struct open_bucket *);
-+
-+int bch2_ec_stripe_new_alloc(struct bch_fs *, struct ec_stripe_head *);
-+
-+void bch2_ec_stripe_head_put(struct bch_fs *, struct ec_stripe_head *);
-+struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *, unsigned,
-+					       unsigned, unsigned);
-+
-+void bch2_stripes_heap_update(struct bch_fs *, struct stripe *, size_t);
-+void bch2_stripes_heap_del(struct bch_fs *, struct stripe *, size_t);
-+void bch2_stripes_heap_insert(struct bch_fs *, struct stripe *, size_t);
-+
-+void bch2_ec_stop_dev(struct bch_fs *, struct bch_dev *);
-+
-+void bch2_ec_flush_new_stripes(struct bch_fs *);
-+
-+struct journal_keys;
-+int bch2_stripes_read(struct bch_fs *, struct journal_keys *);
-+int bch2_stripes_write(struct bch_fs *, unsigned);
-+
-+int bch2_ec_mem_alloc(struct bch_fs *, bool);
-+
-+void bch2_stripes_heap_to_text(struct printbuf *, struct bch_fs *);
-+void bch2_new_stripes_to_text(struct printbuf *, struct bch_fs *);
-+
-+void bch2_fs_ec_exit(struct bch_fs *);
-+int bch2_fs_ec_init(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_EC_H */
-diff --git a/fs/bcachefs/ec_types.h b/fs/bcachefs/ec_types.h
-new file mode 100644
-index 000000000000..e4d633fca5bf
---- /dev/null
-+++ b/fs/bcachefs/ec_types.h
-@@ -0,0 +1,39 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_EC_TYPES_H
-+#define _BCACHEFS_EC_TYPES_H
-+
-+#include <linux/llist.h>
-+
-+#define EC_STRIPE_MAX	16
-+
-+struct bch_replicas_padded {
-+	struct bch_replicas_entry	e;
-+	u8				pad[EC_STRIPE_MAX];
-+};
-+
-+struct stripe {
-+	size_t			heap_idx;
-+
-+	u16			sectors;
-+	u8			algorithm;
-+
-+	u8			nr_blocks;
-+	u8			nr_redundant;
-+
-+	unsigned		alive:1;
-+	unsigned		dirty:1;
-+	unsigned		on_heap:1;
-+	u8			blocks_nonempty;
-+	u16			block_sectors[EC_STRIPE_MAX];
-+
-+	struct bch_replicas_padded r;
-+};
-+
-+struct ec_stripe_heap_entry {
-+	size_t			idx;
-+	unsigned		blocks_nonempty;
-+};
-+
-+typedef HEAP(struct ec_stripe_heap_entry) ec_stripes_heap;
-+
-+#endif /* _BCACHEFS_EC_TYPES_H */
-diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c
-new file mode 100644
-index 000000000000..cd46706fb6f5
---- /dev/null
-+++ b/fs/bcachefs/error.c
-@@ -0,0 +1,172 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "error.h"
-+#include "io.h"
-+#include "super.h"
-+
-+#define FSCK_ERR_RATELIMIT_NR	10
-+
-+bool bch2_inconsistent_error(struct bch_fs *c)
-+{
-+	set_bit(BCH_FS_ERROR, &c->flags);
-+
-+	switch (c->opts.errors) {
-+	case BCH_ON_ERROR_CONTINUE:
-+		return false;
-+	case BCH_ON_ERROR_RO:
-+		if (bch2_fs_emergency_read_only(c))
-+			bch_err(c, "emergency read only");
-+		return true;
-+	case BCH_ON_ERROR_PANIC:
-+		panic(bch2_fmt(c, "panic after error"));
-+		return true;
-+	default:
-+		BUG();
-+	}
-+}
-+
-+void bch2_fatal_error(struct bch_fs *c)
-+{
-+	if (bch2_fs_emergency_read_only(c))
-+		bch_err(c, "emergency read only");
-+}
-+
-+void bch2_io_error_work(struct work_struct *work)
-+{
-+	struct bch_dev *ca = container_of(work, struct bch_dev, io_error_work);
-+	struct bch_fs *c = ca->fs;
-+	bool dev;
-+
-+	down_write(&c->state_lock);
-+	dev = bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_RO,
-+				    BCH_FORCE_IF_DEGRADED);
-+	if (dev
-+	    ? __bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_RO,
-+				  BCH_FORCE_IF_DEGRADED)
-+	    : bch2_fs_emergency_read_only(c))
-+		bch_err(ca,
-+			"too many IO errors, setting %s RO",
-+			dev ? "device" : "filesystem");
-+	up_write(&c->state_lock);
-+}
-+
-+void bch2_io_error(struct bch_dev *ca)
-+{
-+	//queue_work(system_long_wq, &ca->io_error_work);
-+}
-+
-+#ifdef __KERNEL__
-+#define ask_yn()	false
-+#else
-+#include "tools-util.h"
-+#endif
-+
-+enum fsck_err_ret bch2_fsck_err(struct bch_fs *c, unsigned flags,
-+				const char *fmt, ...)
-+{
-+	struct fsck_err_state *s = NULL;
-+	va_list args;
-+	bool fix = false, print = true, suppressing = false;
-+	char _buf[sizeof(s->buf)], *buf = _buf;
-+
-+	if (test_bit(BCH_FS_FSCK_DONE, &c->flags)) {
-+		va_start(args, fmt);
-+		vprintk(fmt, args);
-+		va_end(args);
-+
-+		return bch2_inconsistent_error(c)
-+			? FSCK_ERR_EXIT
-+			: FSCK_ERR_FIX;
-+	}
-+
-+	mutex_lock(&c->fsck_error_lock);
-+
-+	list_for_each_entry(s, &c->fsck_errors, list)
-+		if (s->fmt == fmt)
-+			goto found;
-+
-+	s = kzalloc(sizeof(*s), GFP_NOFS);
-+	if (!s) {
-+		if (!c->fsck_alloc_err)
-+			bch_err(c, "kmalloc err, cannot ratelimit fsck errs");
-+		c->fsck_alloc_err = true;
-+		buf = _buf;
-+		goto print;
-+	}
-+
-+	INIT_LIST_HEAD(&s->list);
-+	s->fmt = fmt;
-+found:
-+	list_move(&s->list, &c->fsck_errors);
-+	s->nr++;
-+	if (c->opts.ratelimit_errors &&
-+	    s->nr >= FSCK_ERR_RATELIMIT_NR) {
-+		if (s->nr == FSCK_ERR_RATELIMIT_NR)
-+			suppressing = true;
-+		else
-+			print = false;
-+	}
-+	buf		= s->buf;
-+print:
-+	va_start(args, fmt);
-+	vscnprintf(buf, sizeof(_buf), fmt, args);
-+	va_end(args);
-+
-+	if (c->opts.fix_errors == FSCK_OPT_EXIT) {
-+		bch_err(c, "%s, exiting", buf);
-+	} else if (flags & FSCK_CAN_FIX) {
-+		if (c->opts.fix_errors == FSCK_OPT_ASK) {
-+			printk(KERN_ERR "%s: fix?", buf);
-+			fix = ask_yn();
-+		} else if (c->opts.fix_errors == FSCK_OPT_YES ||
-+			   (c->opts.nochanges &&
-+			    !(flags & FSCK_CAN_IGNORE))) {
-+			if (print)
-+				bch_err(c, "%s, fixing", buf);
-+			fix = true;
-+		} else {
-+			if (print)
-+				bch_err(c, "%s, not fixing", buf);
-+			fix = false;
-+		}
-+	} else if (flags & FSCK_NEED_FSCK) {
-+		if (print)
-+			bch_err(c, "%s (run fsck to correct)", buf);
-+	} else {
-+		if (print)
-+			bch_err(c, "%s (repair unimplemented)", buf);
-+	}
-+
-+	if (suppressing)
-+		bch_err(c, "Ratelimiting new instances of previous error");
-+
-+	mutex_unlock(&c->fsck_error_lock);
-+
-+	if (fix) {
-+		set_bit(BCH_FS_ERRORS_FIXED, &c->flags);
-+		return FSCK_ERR_FIX;
-+	} else {
-+		set_bit(BCH_FS_ERROR, &c->flags);
-+		return c->opts.fix_errors == FSCK_OPT_EXIT ||
-+			!(flags & FSCK_CAN_IGNORE)
-+			? FSCK_ERR_EXIT
-+			: FSCK_ERR_IGNORE;
-+	}
-+}
-+
-+void bch2_flush_fsck_errs(struct bch_fs *c)
-+{
-+	struct fsck_err_state *s, *n;
-+
-+	mutex_lock(&c->fsck_error_lock);
-+
-+	list_for_each_entry_safe(s, n, &c->fsck_errors, list) {
-+		if (s->ratelimited)
-+			bch_err(c, "Saw %llu errors like:\n    %s", s->nr, s->buf);
-+
-+		list_del(&s->list);
-+		kfree(s);
-+	}
-+
-+	mutex_unlock(&c->fsck_error_lock);
-+}
-diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h
-new file mode 100644
-index 000000000000..94b53312fbbd
---- /dev/null
-+++ b/fs/bcachefs/error.h
-@@ -0,0 +1,211 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_ERROR_H
-+#define _BCACHEFS_ERROR_H
-+
-+#include <linux/list.h>
-+#include <linux/printk.h>
-+
-+struct bch_dev;
-+struct bch_fs;
-+struct work_struct;
-+
-+/*
-+ * XXX: separate out errors that indicate on disk data is inconsistent, and flag
-+ * superblock as such
-+ */
-+
-+/* Error messages: */
-+
-+/*
-+ * Inconsistency errors: The on disk data is inconsistent. If these occur during
-+ * initial recovery, they don't indicate a bug in the running code - we walk all
-+ * the metadata before modifying anything. If they occur at runtime, they
-+ * indicate either a bug in the running code or (less likely) data is being
-+ * silently corrupted under us.
-+ *
-+ * XXX: audit all inconsistent errors and make sure they're all recoverable, in
-+ * BCH_ON_ERROR_CONTINUE mode
-+ */
-+
-+bool bch2_inconsistent_error(struct bch_fs *);
-+
-+#define bch2_fs_inconsistent(c, ...)					\
-+({									\
-+	bch_err(c, __VA_ARGS__);					\
-+	bch2_inconsistent_error(c);					\
-+})
-+
-+#define bch2_fs_inconsistent_on(cond, c, ...)				\
-+({									\
-+	int _ret = !!(cond);						\
-+									\
-+	if (_ret)							\
-+		bch2_fs_inconsistent(c, __VA_ARGS__);			\
-+	_ret;								\
-+})
-+
-+/*
-+ * Later we might want to mark only the particular device inconsistent, not the
-+ * entire filesystem:
-+ */
-+
-+#define bch2_dev_inconsistent(ca, ...)					\
-+do {									\
-+	bch_err(ca, __VA_ARGS__);					\
-+	bch2_inconsistent_error((ca)->fs);				\
-+} while (0)
-+
-+#define bch2_dev_inconsistent_on(cond, ca, ...)				\
-+({									\
-+	int _ret = !!(cond);						\
-+									\
-+	if (_ret)							\
-+		bch2_dev_inconsistent(ca, __VA_ARGS__);			\
-+	_ret;								\
-+})
-+
-+/*
-+ * Fsck errors: inconsistency errors we detect at mount time, and should ideally
-+ * be able to repair:
-+ */
-+
-+enum {
-+	BCH_FSCK_OK			= 0,
-+	BCH_FSCK_ERRORS_NOT_FIXED	= 1,
-+	BCH_FSCK_REPAIR_UNIMPLEMENTED	= 2,
-+	BCH_FSCK_REPAIR_IMPOSSIBLE	= 3,
-+	BCH_FSCK_UNKNOWN_VERSION	= 4,
-+};
-+
-+enum fsck_err_opts {
-+	FSCK_OPT_EXIT,
-+	FSCK_OPT_YES,
-+	FSCK_OPT_NO,
-+	FSCK_OPT_ASK,
-+};
-+
-+enum fsck_err_ret {
-+	FSCK_ERR_IGNORE	= 0,
-+	FSCK_ERR_FIX	= 1,
-+	FSCK_ERR_EXIT	= 2,
-+};
-+
-+struct fsck_err_state {
-+	struct list_head	list;
-+	const char		*fmt;
-+	u64			nr;
-+	bool			ratelimited;
-+	char			buf[512];
-+};
-+
-+#define FSCK_CAN_FIX		(1 << 0)
-+#define FSCK_CAN_IGNORE		(1 << 1)
-+#define FSCK_NEED_FSCK		(1 << 2)
-+
-+__printf(3, 4) __cold
-+enum fsck_err_ret bch2_fsck_err(struct bch_fs *,
-+				unsigned, const char *, ...);
-+void bch2_flush_fsck_errs(struct bch_fs *);
-+
-+#define __fsck_err(c, _flags, msg, ...)					\
-+({									\
-+	int _fix = bch2_fsck_err(c, _flags, msg, ##__VA_ARGS__);\
-+									\
-+	if (_fix == FSCK_ERR_EXIT) {					\
-+		bch_err(c, "Unable to continue, halting");		\
-+		ret = BCH_FSCK_ERRORS_NOT_FIXED;			\
-+		goto fsck_err;						\
-+	}								\
-+									\
-+	_fix;								\
-+})
-+
-+/* These macros return true if error should be fixed: */
-+
-+/* XXX: mark in superblock that filesystem contains errors, if we ignore: */
-+
-+#define __fsck_err_on(cond, c, _flags, ...)				\
-+	((cond) ? __fsck_err(c, _flags,	##__VA_ARGS__) : false)
-+
-+#define need_fsck_err_on(cond, c, ...)					\
-+	__fsck_err_on(cond, c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, ##__VA_ARGS__)
-+
-+#define need_fsck_err(c, ...)						\
-+	__fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, ##__VA_ARGS__)
-+
-+#define mustfix_fsck_err(c, ...)					\
-+	__fsck_err(c, FSCK_CAN_FIX, ##__VA_ARGS__)
-+
-+#define mustfix_fsck_err_on(cond, c, ...)				\
-+	__fsck_err_on(cond, c, FSCK_CAN_FIX, ##__VA_ARGS__)
-+
-+#define fsck_err(c, ...)						\
-+	__fsck_err(c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, ##__VA_ARGS__)
-+
-+#define fsck_err_on(cond, c, ...)					\
-+	__fsck_err_on(cond, c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, ##__VA_ARGS__)
-+
-+/*
-+ * Fatal errors: these don't indicate a bug, but we can't continue running in RW
-+ * mode - pretty much just due to metadata IO errors:
-+ */
-+
-+void bch2_fatal_error(struct bch_fs *);
-+
-+#define bch2_fs_fatal_error(c, ...)					\
-+do {									\
-+	bch_err(c, __VA_ARGS__);					\
-+	bch2_fatal_error(c);						\
-+} while (0)
-+
-+#define bch2_fs_fatal_err_on(cond, c, ...)				\
-+({									\
-+	int _ret = !!(cond);						\
-+									\
-+	if (_ret)							\
-+		bch2_fs_fatal_error(c, __VA_ARGS__);			\
-+	_ret;								\
-+})
-+
-+/*
-+ * IO errors: either recoverable metadata IO (because we have replicas), or data
-+ * IO - we need to log it and print out a message, but we don't (necessarily)
-+ * want to shut down the fs:
-+ */
-+
-+void bch2_io_error_work(struct work_struct *);
-+
-+/* Does the error handling without logging a message */
-+void bch2_io_error(struct bch_dev *);
-+
-+/* Logs message and handles the error: */
-+#define bch2_dev_io_error(ca, fmt, ...)					\
-+do {									\
-+	printk_ratelimited(KERN_ERR bch2_fmt((ca)->fs,			\
-+		"IO error on %s for " fmt),				\
-+		(ca)->name, ##__VA_ARGS__);				\
-+	bch2_io_error(ca);						\
-+} while (0)
-+
-+#define bch2_dev_io_err_on(cond, ca, ...)				\
-+({									\
-+	bool _ret = (cond);						\
-+									\
-+	if (_ret)							\
-+		bch2_dev_io_error(ca, __VA_ARGS__);			\
-+	_ret;								\
-+})
-+
-+/* kill? */
-+
-+#define __bcache_io_error(c, fmt, ...)					\
-+	printk_ratelimited(KERN_ERR bch2_fmt(c,				\
-+			"IO error: " fmt), ##__VA_ARGS__)
-+
-+#define bcache_io_error(c, bio, fmt, ...)				\
-+do {									\
-+	__bcache_io_error(c, fmt, ##__VA_ARGS__);			\
-+	(bio)->bi_status = BLK_STS_IOERR;					\
-+} while (0)
-+
-+#endif /* _BCACHEFS_ERROR_H */
-diff --git a/fs/bcachefs/extent_update.c b/fs/bcachefs/extent_update.c
-new file mode 100644
-index 000000000000..fd011df3cb99
---- /dev/null
-+++ b/fs/bcachefs/extent_update.c
-@@ -0,0 +1,229 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "bkey_on_stack.h"
-+#include "btree_update.h"
-+#include "btree_update_interior.h"
-+#include "buckets.h"
-+#include "debug.h"
-+#include "extents.h"
-+#include "extent_update.h"
-+
-+/*
-+ * This counts the number of iterators to the alloc & ec btrees we'll need
-+ * inserting/removing this extent:
-+ */
-+static unsigned bch2_bkey_nr_alloc_ptrs(struct bkey_s_c k)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const union bch_extent_entry *entry;
-+	unsigned ret = 0;
-+
-+	bkey_extent_entry_for_each(ptrs, entry) {
-+		switch (__extent_entry_type(entry)) {
-+		case BCH_EXTENT_ENTRY_ptr:
-+		case BCH_EXTENT_ENTRY_stripe_ptr:
-+			ret++;
-+		}
-+	}
-+
-+	return ret;
-+}
-+
-+static int count_iters_for_insert(struct btree_trans *trans,
-+				  struct bkey_s_c k,
-+				  unsigned offset,
-+				  struct bpos *end,
-+				  unsigned *nr_iters,
-+				  unsigned max_iters)
-+{
-+	int ret = 0, ret2 = 0;
-+
-+	if (*nr_iters >= max_iters) {
-+		*end = bpos_min(*end, k.k->p);
-+		ret = 1;
-+	}
-+
-+	switch (k.k->type) {
-+	case KEY_TYPE_extent:
-+	case KEY_TYPE_reflink_v:
-+		*nr_iters += bch2_bkey_nr_alloc_ptrs(k);
-+
-+		if (*nr_iters >= max_iters) {
-+			*end = bpos_min(*end, k.k->p);
-+			ret = 1;
-+		}
-+
-+		break;
-+	case KEY_TYPE_reflink_p: {
-+		struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
-+		u64 idx = le64_to_cpu(p.v->idx);
-+		unsigned sectors = bpos_min(*end, p.k->p).offset -
-+			bkey_start_offset(p.k);
-+		struct btree_iter *iter;
-+		struct bkey_s_c r_k;
-+
-+		for_each_btree_key(trans, iter,
-+				   BTREE_ID_REFLINK, POS(0, idx + offset),
-+				   BTREE_ITER_SLOTS, r_k, ret2) {
-+			if (bkey_cmp(bkey_start_pos(r_k.k),
-+				     POS(0, idx + sectors)) >= 0)
-+				break;
-+
-+			/* extent_update_to_keys(), for the reflink_v update */
-+			*nr_iters += 1;
-+
-+			*nr_iters += 1 + bch2_bkey_nr_alloc_ptrs(r_k);
-+
-+			if (*nr_iters >= max_iters) {
-+				struct bpos pos = bkey_start_pos(k.k);
-+				pos.offset += min_t(u64, k.k->size,
-+						    r_k.k->p.offset - idx);
-+
-+				*end = bpos_min(*end, pos);
-+				ret = 1;
-+				break;
-+			}
-+		}
-+
-+		bch2_trans_iter_put(trans, iter);
-+		break;
-+	}
-+	}
-+
-+	return ret2 ?: ret;
-+}
-+
-+#define EXTENT_ITERS_MAX	(BTREE_ITER_MAX / 3)
-+
-+int bch2_extent_atomic_end(struct btree_iter *iter,
-+			   struct bkey_i *insert,
-+			   struct bpos *end)
-+{
-+	struct btree_trans *trans = iter->trans;
-+	struct btree *b;
-+	struct btree_node_iter	node_iter;
-+	struct bkey_packed	*_k;
-+	unsigned		nr_iters = 0;
-+	int ret;
-+
-+	ret = bch2_btree_iter_traverse(iter);
-+	if (ret)
-+		return ret;
-+
-+	b = iter->l[0].b;
-+	node_iter = iter->l[0].iter;
-+
-+	BUG_ON(bkey_cmp(b->data->min_key, POS_MIN) &&
-+	       bkey_cmp(bkey_start_pos(&insert->k),
-+			bkey_predecessor(b->data->min_key)) < 0);
-+
-+	*end = bpos_min(insert->k.p, b->key.k.p);
-+
-+	/* extent_update_to_keys(): */
-+	nr_iters += 1;
-+
-+	ret = count_iters_for_insert(trans, bkey_i_to_s_c(insert), 0, end,
-+				     &nr_iters, EXTENT_ITERS_MAX / 2);
-+	if (ret < 0)
-+		return ret;
-+
-+	while ((_k = bch2_btree_node_iter_peek(&node_iter, b))) {
-+		struct bkey	unpacked;
-+		struct bkey_s_c	k = bkey_disassemble(b, _k, &unpacked);
-+		unsigned offset = 0;
-+
-+		if (bkey_cmp(bkey_start_pos(k.k), *end) >= 0)
-+			break;
-+
-+		if (bkey_cmp(bkey_start_pos(&insert->k),
-+			     bkey_start_pos(k.k)) > 0)
-+			offset = bkey_start_offset(&insert->k) -
-+				bkey_start_offset(k.k);
-+
-+		/* extent_handle_overwrites(): */
-+		switch (bch2_extent_overlap(&insert->k, k.k)) {
-+		case BCH_EXTENT_OVERLAP_ALL:
-+		case BCH_EXTENT_OVERLAP_FRONT:
-+			nr_iters += 1;
-+			break;
-+		case BCH_EXTENT_OVERLAP_BACK:
-+		case BCH_EXTENT_OVERLAP_MIDDLE:
-+			nr_iters += 2;
-+			break;
-+		}
-+
-+		ret = count_iters_for_insert(trans, k, offset, end,
-+					&nr_iters, EXTENT_ITERS_MAX);
-+		if (ret)
-+			break;
-+
-+		bch2_btree_node_iter_advance(&node_iter, b);
-+	}
-+
-+	return ret < 0 ? ret : 0;
-+}
-+
-+int bch2_extent_trim_atomic(struct bkey_i *k, struct btree_iter *iter)
-+{
-+	struct bpos end;
-+	int ret;
-+
-+	ret = bch2_extent_atomic_end(iter, k, &end);
-+	if (ret)
-+		return ret;
-+
-+	bch2_cut_back(end, k);
-+	return 0;
-+}
-+
-+int bch2_extent_is_atomic(struct bkey_i *k, struct btree_iter *iter)
-+{
-+	struct bpos end;
-+	int ret;
-+
-+	ret = bch2_extent_atomic_end(iter, k, &end);
-+	if (ret)
-+		return ret;
-+
-+	return !bkey_cmp(end, k->k.p);
-+}
-+
-+enum btree_insert_ret
-+bch2_extent_can_insert(struct btree_trans *trans,
-+		       struct btree_iter *iter,
-+		       struct bkey_i *insert)
-+{
-+	struct btree_iter_level *l = &iter->l[0];
-+	struct btree_node_iter node_iter = l->iter;
-+	struct bkey_packed *_k;
-+	struct bkey_s_c k;
-+	struct bkey unpacked;
-+	int sectors;
-+
-+	_k = bch2_btree_node_iter_peek(&node_iter, l->b);
-+	if (!_k)
-+		return BTREE_INSERT_OK;
-+
-+	k = bkey_disassemble(l->b, _k, &unpacked);
-+
-+	/* Check if we're splitting a compressed extent: */
-+
-+	if (bkey_cmp(bkey_start_pos(&insert->k), bkey_start_pos(k.k)) > 0 &&
-+	    bkey_cmp(insert->k.p, k.k->p) < 0 &&
-+	    (sectors = bch2_bkey_sectors_compressed(k))) {
-+		int flags = trans->flags & BTREE_INSERT_NOFAIL
-+			? BCH_DISK_RESERVATION_NOFAIL : 0;
-+
-+		switch (bch2_disk_reservation_add(trans->c, trans->disk_res,
-+						  sectors, flags)) {
-+		case 0:
-+			break;
-+		case -ENOSPC:
-+			return BTREE_INSERT_ENOSPC;
-+		default:
-+			BUG();
-+		}
-+	}
-+
-+	return BTREE_INSERT_OK;
-+}
-diff --git a/fs/bcachefs/extent_update.h b/fs/bcachefs/extent_update.h
-new file mode 100644
-index 000000000000..38dc084627d2
---- /dev/null
-+++ b/fs/bcachefs/extent_update.h
-@@ -0,0 +1,16 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_EXTENT_UPDATE_H
-+#define _BCACHEFS_EXTENT_UPDATE_H
-+
-+#include "bcachefs.h"
-+
-+int bch2_extent_atomic_end(struct btree_iter *, struct bkey_i *,
-+			   struct bpos *);
-+int bch2_extent_trim_atomic(struct bkey_i *, struct btree_iter *);
-+int bch2_extent_is_atomic(struct bkey_i *, struct btree_iter *);
-+
-+enum btree_insert_ret
-+bch2_extent_can_insert(struct btree_trans *, struct btree_iter *,
-+		       struct bkey_i *);
-+
-+#endif /* _BCACHEFS_EXTENT_UPDATE_H */
-diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
-new file mode 100644
-index 000000000000..568f039edcff
---- /dev/null
-+++ b/fs/bcachefs/extents.c
-@@ -0,0 +1,1258 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Copyright (C) 2010 Kent Overstreet <kent.overstreet@gmail.com>
-+ *
-+ * Code for managing the extent btree and dynamically updating the writeback
-+ * dirty sector count.
-+ */
-+
-+#include "bcachefs.h"
-+#include "bkey_methods.h"
-+#include "btree_gc.h"
-+#include "btree_io.h"
-+#include "btree_iter.h"
-+#include "buckets.h"
-+#include "checksum.h"
-+#include "debug.h"
-+#include "disk_groups.h"
-+#include "error.h"
-+#include "extents.h"
-+#include "inode.h"
-+#include "journal.h"
-+#include "replicas.h"
-+#include "super.h"
-+#include "super-io.h"
-+#include "util.h"
-+
-+#include <trace/events/bcachefs.h>
-+
-+static unsigned bch2_crc_field_size_max[] = {
-+	[BCH_EXTENT_ENTRY_crc32] = CRC32_SIZE_MAX,
-+	[BCH_EXTENT_ENTRY_crc64] = CRC64_SIZE_MAX,
-+	[BCH_EXTENT_ENTRY_crc128] = CRC128_SIZE_MAX,
-+};
-+
-+static void bch2_extent_crc_pack(union bch_extent_crc *,
-+				 struct bch_extent_crc_unpacked,
-+				 enum bch_extent_entry_type);
-+
-+static struct bch_dev_io_failures *dev_io_failures(struct bch_io_failures *f,
-+						   unsigned dev)
-+{
-+	struct bch_dev_io_failures *i;
-+
-+	for (i = f->devs; i < f->devs + f->nr; i++)
-+		if (i->dev == dev)
-+			return i;
-+
-+	return NULL;
-+}
-+
-+void bch2_mark_io_failure(struct bch_io_failures *failed,
-+			  struct extent_ptr_decoded *p)
-+{
-+	struct bch_dev_io_failures *f = dev_io_failures(failed, p->ptr.dev);
-+
-+	if (!f) {
-+		BUG_ON(failed->nr >= ARRAY_SIZE(failed->devs));
-+
-+		f = &failed->devs[failed->nr++];
-+		f->dev		= p->ptr.dev;
-+		f->idx		= p->idx;
-+		f->nr_failed	= 1;
-+		f->nr_retries	= 0;
-+	} else if (p->idx != f->idx) {
-+		f->idx		= p->idx;
-+		f->nr_failed	= 1;
-+		f->nr_retries	= 0;
-+	} else {
-+		f->nr_failed++;
-+	}
-+}
-+
-+/*
-+ * returns true if p1 is better than p2:
-+ */
-+static inline bool ptr_better(struct bch_fs *c,
-+			      const struct extent_ptr_decoded p1,
-+			      const struct extent_ptr_decoded p2)
-+{
-+	if (likely(!p1.idx && !p2.idx)) {
-+		struct bch_dev *dev1 = bch_dev_bkey_exists(c, p1.ptr.dev);
-+		struct bch_dev *dev2 = bch_dev_bkey_exists(c, p2.ptr.dev);
-+
-+		u64 l1 = atomic64_read(&dev1->cur_latency[READ]);
-+		u64 l2 = atomic64_read(&dev2->cur_latency[READ]);
-+
-+		/* Pick at random, biased in favor of the faster device: */
-+
-+		return bch2_rand_range(l1 + l2) > l1;
-+	}
-+
-+	if (force_reconstruct_read(c))
-+		return p1.idx > p2.idx;
-+
-+	return p1.idx < p2.idx;
-+}
-+
-+/*
-+ * This picks a non-stale pointer, preferably from a device other than @avoid.
-+ * Avoid can be NULL, meaning pick any. If there are no non-stale pointers to
-+ * other devices, it will still pick a pointer from avoid.
-+ */
-+int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
-+			       struct bch_io_failures *failed,
-+			       struct extent_ptr_decoded *pick)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const union bch_extent_entry *entry;
-+	struct extent_ptr_decoded p;
-+	struct bch_dev_io_failures *f;
-+	struct bch_dev *ca;
-+	int ret = 0;
-+
-+	if (k.k->type == KEY_TYPE_error)
-+		return -EIO;
-+
-+	bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
-+		ca = bch_dev_bkey_exists(c, p.ptr.dev);
-+
-+		/*
-+		 * If there are any dirty pointers it's an error if we can't
-+		 * read:
-+		 */
-+		if (!ret && !p.ptr.cached)
-+			ret = -EIO;
-+
-+		if (p.ptr.cached && ptr_stale(ca, &p.ptr))
-+			continue;
-+
-+		f = failed ? dev_io_failures(failed, p.ptr.dev) : NULL;
-+		if (f)
-+			p.idx = f->nr_failed < f->nr_retries
-+				? f->idx
-+				: f->idx + 1;
-+
-+		if (!p.idx &&
-+		    !bch2_dev_is_readable(ca))
-+			p.idx++;
-+
-+		if (force_reconstruct_read(c) &&
-+		    !p.idx && p.has_ec)
-+			p.idx++;
-+
-+		if (p.idx >= (unsigned) p.has_ec + 1)
-+			continue;
-+
-+		if (ret > 0 && !ptr_better(c, p, *pick))
-+			continue;
-+
-+		*pick = p;
-+		ret = 1;
-+	}
-+
-+	return ret;
-+}
-+
-+/* KEY_TYPE_btree_ptr: */
-+
-+const char *bch2_btree_ptr_invalid(const struct bch_fs *c, struct bkey_s_c k)
-+{
-+	if (bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX)
-+		return "value too big";
-+
-+	return bch2_bkey_ptrs_invalid(c, k);
-+}
-+
-+void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct bkey_s_c k)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const struct bch_extent_ptr *ptr;
-+	const char *err;
-+	char buf[160];
-+	struct bucket_mark mark;
-+	struct bch_dev *ca;
-+
-+	if (!test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags))
-+		return;
-+
-+	if (!percpu_down_read_trylock(&c->mark_lock))
-+		return;
-+
-+	bkey_for_each_ptr(ptrs, ptr) {
-+		ca = bch_dev_bkey_exists(c, ptr->dev);
-+
-+		mark = ptr_bucket_mark(ca, ptr);
-+
-+		err = "stale";
-+		if (gen_after(mark.gen, ptr->gen))
-+			goto err;
-+
-+		err = "inconsistent";
-+		if (mark.data_type != BCH_DATA_btree ||
-+		    mark.dirty_sectors < c->opts.btree_node_size)
-+			goto err;
-+	}
-+out:
-+	percpu_up_read(&c->mark_lock);
-+	return;
-+err:
-+	bch2_fs_inconsistent(c, "%s btree pointer %s: bucket %zi gen %i mark %08x",
-+		err, (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf),
-+		PTR_BUCKET_NR(ca, ptr),
-+		mark.gen, (unsigned) mark.v.counter);
-+	goto out;
-+}
-+
-+void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c,
-+			    struct bkey_s_c k)
-+{
-+	bch2_bkey_ptrs_to_text(out, c, k);
-+}
-+
-+void bch2_btree_ptr_v2_to_text(struct printbuf *out, struct bch_fs *c,
-+			    struct bkey_s_c k)
-+{
-+	struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k);
-+
-+	pr_buf(out, "seq %llx sectors %u written %u min_key ",
-+	       le64_to_cpu(bp.v->seq),
-+	       le16_to_cpu(bp.v->sectors),
-+	       le16_to_cpu(bp.v->sectors_written));
-+
-+	bch2_bpos_to_text(out, bp.v->min_key);
-+	pr_buf(out, " ");
-+	bch2_bkey_ptrs_to_text(out, c, k);
-+}
-+
-+void bch2_btree_ptr_v2_compat(enum btree_id btree_id, unsigned version,
-+			      unsigned big_endian, int write,
-+			      struct bkey_s k)
-+{
-+	struct bkey_s_btree_ptr_v2 bp = bkey_s_to_btree_ptr_v2(k);
-+
-+	compat_bpos(0, btree_id, version, big_endian, write, &bp.v->min_key);
-+
-+	if (version < bcachefs_metadata_version_inode_btree_change &&
-+	    btree_node_type_is_extents(btree_id) &&
-+	    bkey_cmp(bp.v->min_key, POS_MIN))
-+		bp.v->min_key = write
-+			? bkey_predecessor(bp.v->min_key)
-+			: bkey_successor(bp.v->min_key);
-+}
-+
-+/* KEY_TYPE_extent: */
-+
-+const char *bch2_extent_invalid(const struct bch_fs *c, struct bkey_s_c k)
-+{
-+	return bch2_bkey_ptrs_invalid(c, k);
-+}
-+
-+void bch2_extent_debugcheck(struct bch_fs *c, struct bkey_s_c k)
-+{
-+	struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
-+	const union bch_extent_entry *entry;
-+	struct extent_ptr_decoded p;
-+	char buf[160];
-+
-+	if (!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags) ||
-+	    !test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags))
-+		return;
-+
-+	if (!percpu_down_read_trylock(&c->mark_lock))
-+		return;
-+
-+	extent_for_each_ptr_decode(e, p, entry) {
-+		struct bch_dev *ca	= bch_dev_bkey_exists(c, p.ptr.dev);
-+		struct bucket_mark mark = ptr_bucket_mark(ca, &p.ptr);
-+		unsigned stale		= gen_after(mark.gen, p.ptr.gen);
-+		unsigned disk_sectors	= ptr_disk_sectors(p);
-+		unsigned mark_sectors	= p.ptr.cached
-+			? mark.cached_sectors
-+			: mark.dirty_sectors;
-+
-+		bch2_fs_inconsistent_on(stale && !p.ptr.cached, c,
-+			"stale dirty pointer (ptr gen %u bucket %u",
-+			p.ptr.gen, mark.gen);
-+
-+		bch2_fs_inconsistent_on(stale > 96, c,
-+			"key too stale: %i", stale);
-+
-+		bch2_fs_inconsistent_on(!stale &&
-+			(mark.data_type != BCH_DATA_user ||
-+			 mark_sectors < disk_sectors), c,
-+			"extent pointer not marked: %s:\n"
-+			"type %u sectors %u < %u",
-+			(bch2_bkey_val_to_text(&PBUF(buf), c, e.s_c), buf),
-+			mark.data_type,
-+			mark_sectors, disk_sectors);
-+	}
-+
-+	percpu_up_read(&c->mark_lock);
-+}
-+
-+void bch2_extent_to_text(struct printbuf *out, struct bch_fs *c,
-+			 struct bkey_s_c k)
-+{
-+	bch2_bkey_ptrs_to_text(out, c, k);
-+}
-+
-+enum merge_result bch2_extent_merge(struct bch_fs *c,
-+				    struct bkey_s _l, struct bkey_s _r)
-+{
-+	struct bkey_s_extent l = bkey_s_to_extent(_l);
-+	struct bkey_s_extent r = bkey_s_to_extent(_r);
-+	union bch_extent_entry *en_l = l.v->start;
-+	union bch_extent_entry *en_r = r.v->start;
-+	struct bch_extent_crc_unpacked crc_l, crc_r;
-+
-+	if (bkey_val_u64s(l.k) != bkey_val_u64s(r.k))
-+		return BCH_MERGE_NOMERGE;
-+
-+	crc_l = bch2_extent_crc_unpack(l.k, NULL);
-+
-+	extent_for_each_entry(l, en_l) {
-+		en_r = vstruct_idx(r.v, (u64 *) en_l - l.v->_data);
-+
-+		if (extent_entry_type(en_l) != extent_entry_type(en_r))
-+			return BCH_MERGE_NOMERGE;
-+
-+		switch (extent_entry_type(en_l)) {
-+		case BCH_EXTENT_ENTRY_ptr: {
-+			const struct bch_extent_ptr *lp = &en_l->ptr;
-+			const struct bch_extent_ptr *rp = &en_r->ptr;
-+			struct bch_dev *ca;
-+
-+			if (lp->offset + crc_l.compressed_size != rp->offset ||
-+			    lp->dev			!= rp->dev ||
-+			    lp->gen			!= rp->gen)
-+				return BCH_MERGE_NOMERGE;
-+
-+			/* We don't allow extents to straddle buckets: */
-+			ca = bch_dev_bkey_exists(c, lp->dev);
-+
-+			if (PTR_BUCKET_NR(ca, lp) != PTR_BUCKET_NR(ca, rp))
-+				return BCH_MERGE_NOMERGE;
-+
-+			break;
-+		}
-+		case BCH_EXTENT_ENTRY_stripe_ptr:
-+			if (en_l->stripe_ptr.block	!= en_r->stripe_ptr.block ||
-+			    en_l->stripe_ptr.idx	!= en_r->stripe_ptr.idx)
-+				return BCH_MERGE_NOMERGE;
-+			break;
-+		case BCH_EXTENT_ENTRY_crc32:
-+		case BCH_EXTENT_ENTRY_crc64:
-+		case BCH_EXTENT_ENTRY_crc128:
-+			crc_l = bch2_extent_crc_unpack(l.k, entry_to_crc(en_l));
-+			crc_r = bch2_extent_crc_unpack(r.k, entry_to_crc(en_r));
-+
-+			if (crc_l.csum_type		!= crc_r.csum_type ||
-+			    crc_l.compression_type	!= crc_r.compression_type ||
-+			    crc_l.nonce			!= crc_r.nonce)
-+				return BCH_MERGE_NOMERGE;
-+
-+			if (crc_l.offset + crc_l.live_size != crc_l.compressed_size ||
-+			    crc_r.offset)
-+				return BCH_MERGE_NOMERGE;
-+
-+			if (!bch2_checksum_mergeable(crc_l.csum_type))
-+				return BCH_MERGE_NOMERGE;
-+
-+			if (crc_is_compressed(crc_l))
-+				return BCH_MERGE_NOMERGE;
-+
-+			if (crc_l.csum_type &&
-+			    crc_l.uncompressed_size +
-+			    crc_r.uncompressed_size > c->sb.encoded_extent_max)
-+				return BCH_MERGE_NOMERGE;
-+
-+			if (crc_l.uncompressed_size + crc_r.uncompressed_size >
-+			    bch2_crc_field_size_max[extent_entry_type(en_l)])
-+				return BCH_MERGE_NOMERGE;
-+
-+			break;
-+		default:
-+			return BCH_MERGE_NOMERGE;
-+		}
-+	}
-+
-+	extent_for_each_entry(l, en_l) {
-+		struct bch_extent_crc_unpacked crc_l, crc_r;
-+
-+		en_r = vstruct_idx(r.v, (u64 *) en_l - l.v->_data);
-+
-+		if (!extent_entry_is_crc(en_l))
-+			continue;
-+
-+		crc_l = bch2_extent_crc_unpack(l.k, entry_to_crc(en_l));
-+		crc_r = bch2_extent_crc_unpack(r.k, entry_to_crc(en_r));
-+
-+		crc_l.csum = bch2_checksum_merge(crc_l.csum_type,
-+						 crc_l.csum,
-+						 crc_r.csum,
-+						 crc_r.uncompressed_size << 9);
-+
-+		crc_l.uncompressed_size	+= crc_r.uncompressed_size;
-+		crc_l.compressed_size	+= crc_r.compressed_size;
-+
-+		bch2_extent_crc_pack(entry_to_crc(en_l), crc_l,
-+				     extent_entry_type(en_l));
-+	}
-+
-+	bch2_key_resize(l.k, l.k->size + r.k->size);
-+
-+	return BCH_MERGE_MERGE;
-+}
-+
-+/* KEY_TYPE_reservation: */
-+
-+const char *bch2_reservation_invalid(const struct bch_fs *c, struct bkey_s_c k)
-+{
-+	struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k);
-+
-+	if (bkey_val_bytes(k.k) != sizeof(struct bch_reservation))
-+		return "incorrect value size";
-+
-+	if (!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX)
-+		return "invalid nr_replicas";
-+
-+	return NULL;
-+}
-+
-+void bch2_reservation_to_text(struct printbuf *out, struct bch_fs *c,
-+			      struct bkey_s_c k)
-+{
-+	struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k);
-+
-+	pr_buf(out, "generation %u replicas %u",
-+	       le32_to_cpu(r.v->generation),
-+	       r.v->nr_replicas);
-+}
-+
-+enum merge_result bch2_reservation_merge(struct bch_fs *c,
-+					 struct bkey_s _l, struct bkey_s _r)
-+{
-+	struct bkey_s_reservation l = bkey_s_to_reservation(_l);
-+	struct bkey_s_reservation r = bkey_s_to_reservation(_r);
-+
-+	if (l.v->generation != r.v->generation ||
-+	    l.v->nr_replicas != r.v->nr_replicas)
-+		return BCH_MERGE_NOMERGE;
-+
-+	if ((u64) l.k->size + r.k->size > KEY_SIZE_MAX) {
-+		bch2_key_resize(l.k, KEY_SIZE_MAX);
-+		bch2_cut_front_s(l.k->p, r.s);
-+		return BCH_MERGE_PARTIAL;
-+	}
-+
-+	bch2_key_resize(l.k, l.k->size + r.k->size);
-+
-+	return BCH_MERGE_MERGE;
-+}
-+
-+/* Extent checksum entries: */
-+
-+/* returns true if not equal */
-+static inline bool bch2_crc_unpacked_cmp(struct bch_extent_crc_unpacked l,
-+					 struct bch_extent_crc_unpacked r)
-+{
-+	return (l.csum_type		!= r.csum_type ||
-+		l.compression_type	!= r.compression_type ||
-+		l.compressed_size	!= r.compressed_size ||
-+		l.uncompressed_size	!= r.uncompressed_size ||
-+		l.offset		!= r.offset ||
-+		l.live_size		!= r.live_size ||
-+		l.nonce			!= r.nonce ||
-+		bch2_crc_cmp(l.csum, r.csum));
-+}
-+
-+static inline bool can_narrow_crc(struct bch_extent_crc_unpacked u,
-+				  struct bch_extent_crc_unpacked n)
-+{
-+	return !crc_is_compressed(u) &&
-+		u.csum_type &&
-+		u.uncompressed_size > u.live_size &&
-+		bch2_csum_type_is_encryption(u.csum_type) ==
-+		bch2_csum_type_is_encryption(n.csum_type);
-+}
-+
-+bool bch2_can_narrow_extent_crcs(struct bkey_s_c k,
-+				 struct bch_extent_crc_unpacked n)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	struct bch_extent_crc_unpacked crc;
-+	const union bch_extent_entry *i;
-+
-+	if (!n.csum_type)
-+		return false;
-+
-+	bkey_for_each_crc(k.k, ptrs, crc, i)
-+		if (can_narrow_crc(crc, n))
-+			return true;
-+
-+	return false;
-+}
-+
-+/*
-+ * We're writing another replica for this extent, so while we've got the data in
-+ * memory we'll be computing a new checksum for the currently live data.
-+ *
-+ * If there are other replicas we aren't moving, and they are checksummed but
-+ * not compressed, we can modify them to point to only the data that is
-+ * currently live (so that readers won't have to bounce) while we've got the
-+ * checksum we need:
-+ */
-+bool bch2_bkey_narrow_crcs(struct bkey_i *k, struct bch_extent_crc_unpacked n)
-+{
-+	struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(k));
-+	struct bch_extent_crc_unpacked u;
-+	struct extent_ptr_decoded p;
-+	union bch_extent_entry *i;
-+	bool ret = false;
-+
-+	/* Find a checksum entry that covers only live data: */
-+	if (!n.csum_type) {
-+		bkey_for_each_crc(&k->k, ptrs, u, i)
-+			if (!crc_is_compressed(u) &&
-+			    u.csum_type &&
-+			    u.live_size == u.uncompressed_size) {
-+				n = u;
-+				goto found;
-+			}
-+		return false;
-+	}
-+found:
-+	BUG_ON(crc_is_compressed(n));
-+	BUG_ON(n.offset);
-+	BUG_ON(n.live_size != k->k.size);
-+
-+restart_narrow_pointers:
-+	ptrs = bch2_bkey_ptrs(bkey_i_to_s(k));
-+
-+	bkey_for_each_ptr_decode(&k->k, ptrs, p, i)
-+		if (can_narrow_crc(p.crc, n)) {
-+			bch2_bkey_drop_ptr(bkey_i_to_s(k), &i->ptr);
-+			p.ptr.offset += p.crc.offset;
-+			p.crc = n;
-+			bch2_extent_ptr_decoded_append(k, &p);
-+			ret = true;
-+			goto restart_narrow_pointers;
-+		}
-+
-+	return ret;
-+}
-+
-+static void bch2_extent_crc_pack(union bch_extent_crc *dst,
-+				 struct bch_extent_crc_unpacked src,
-+				 enum bch_extent_entry_type type)
-+{
-+#define set_common_fields(_dst, _src)					\
-+		_dst.type		= 1 << type;			\
-+		_dst.csum_type		= _src.csum_type,		\
-+		_dst.compression_type	= _src.compression_type,	\
-+		_dst._compressed_size	= _src.compressed_size - 1,	\
-+		_dst._uncompressed_size	= _src.uncompressed_size - 1,	\
-+		_dst.offset		= _src.offset
-+
-+	switch (type) {
-+	case BCH_EXTENT_ENTRY_crc32:
-+		set_common_fields(dst->crc32, src);
-+		dst->crc32.csum	 = *((__le32 *) &src.csum.lo);
-+		break;
-+	case BCH_EXTENT_ENTRY_crc64:
-+		set_common_fields(dst->crc64, src);
-+		dst->crc64.nonce	= src.nonce;
-+		dst->crc64.csum_lo	= src.csum.lo;
-+		dst->crc64.csum_hi	= *((__le16 *) &src.csum.hi);
-+		break;
-+	case BCH_EXTENT_ENTRY_crc128:
-+		set_common_fields(dst->crc128, src);
-+		dst->crc128.nonce	= src.nonce;
-+		dst->crc128.csum	= src.csum;
-+		break;
-+	default:
-+		BUG();
-+	}
-+#undef set_common_fields
-+}
-+
-+void bch2_extent_crc_append(struct bkey_i *k,
-+			    struct bch_extent_crc_unpacked new)
-+{
-+	struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(k));
-+	union bch_extent_crc *crc = (void *) ptrs.end;
-+	enum bch_extent_entry_type type;
-+
-+	if (bch_crc_bytes[new.csum_type]	<= 4 &&
-+	    new.uncompressed_size		<= CRC32_SIZE_MAX &&
-+	    new.nonce				<= CRC32_NONCE_MAX)
-+		type = BCH_EXTENT_ENTRY_crc32;
-+	else if (bch_crc_bytes[new.csum_type]	<= 10 &&
-+		   new.uncompressed_size	<= CRC64_SIZE_MAX &&
-+		   new.nonce			<= CRC64_NONCE_MAX)
-+		type = BCH_EXTENT_ENTRY_crc64;
-+	else if (bch_crc_bytes[new.csum_type]	<= 16 &&
-+		   new.uncompressed_size	<= CRC128_SIZE_MAX &&
-+		   new.nonce			<= CRC128_NONCE_MAX)
-+		type = BCH_EXTENT_ENTRY_crc128;
-+	else
-+		BUG();
-+
-+	bch2_extent_crc_pack(crc, new, type);
-+
-+	k->k.u64s += extent_entry_u64s(ptrs.end);
-+
-+	EBUG_ON(bkey_val_u64s(&k->k) > BKEY_EXTENT_VAL_U64s_MAX);
-+}
-+
-+/* Generic code for keys with pointers: */
-+
-+unsigned bch2_bkey_nr_ptrs(struct bkey_s_c k)
-+{
-+	return bch2_bkey_devs(k).nr;
-+}
-+
-+unsigned bch2_bkey_nr_ptrs_allocated(struct bkey_s_c k)
-+{
-+	return k.k->type == KEY_TYPE_reservation
-+		? bkey_s_c_to_reservation(k).v->nr_replicas
-+		: bch2_bkey_dirty_devs(k).nr;
-+}
-+
-+unsigned bch2_bkey_nr_ptrs_fully_allocated(struct bkey_s_c k)
-+{
-+	unsigned ret = 0;
-+
-+	if (k.k->type == KEY_TYPE_reservation) {
-+		ret = bkey_s_c_to_reservation(k).v->nr_replicas;
-+	} else {
-+		struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+		const union bch_extent_entry *entry;
-+		struct extent_ptr_decoded p;
-+
-+		bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
-+			ret += !p.ptr.cached && !crc_is_compressed(p.crc);
-+	}
-+
-+	return ret;
-+}
-+
-+unsigned bch2_bkey_sectors_compressed(struct bkey_s_c k)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const union bch_extent_entry *entry;
-+	struct extent_ptr_decoded p;
-+	unsigned ret = 0;
-+
-+	bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
-+		if (!p.ptr.cached && crc_is_compressed(p.crc))
-+			ret += p.crc.compressed_size;
-+
-+	return ret;
-+}
-+
-+bool bch2_bkey_is_incompressible(struct bkey_s_c k)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const union bch_extent_entry *entry;
-+	struct bch_extent_crc_unpacked crc;
-+
-+	bkey_for_each_crc(k.k, ptrs, crc, entry)
-+		if (crc.compression_type == BCH_COMPRESSION_TYPE_incompressible)
-+			return true;
-+	return false;
-+}
-+
-+bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size,
-+				unsigned nr_replicas)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bpos end = pos;
-+	struct bkey_s_c k;
-+	bool ret = true;
-+	int err;
-+
-+	end.offset += size;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, pos,
-+			   BTREE_ITER_SLOTS, k, err) {
-+		if (bkey_cmp(bkey_start_pos(k.k), end) >= 0)
-+			break;
-+
-+		if (nr_replicas > bch2_bkey_nr_ptrs_fully_allocated(k)) {
-+			ret = false;
-+			break;
-+		}
-+	}
-+	bch2_trans_exit(&trans);
-+
-+	return ret;
-+}
-+
-+static unsigned bch2_extent_ptr_durability(struct bch_fs *c,
-+					   struct extent_ptr_decoded p)
-+{
-+	unsigned durability = 0;
-+	struct bch_dev *ca;
-+
-+	if (p.ptr.cached)
-+		return 0;
-+
-+	ca = bch_dev_bkey_exists(c, p.ptr.dev);
-+
-+	if (ca->mi.state != BCH_MEMBER_STATE_FAILED)
-+		durability = max_t(unsigned, durability, ca->mi.durability);
-+
-+	if (p.has_ec) {
-+		struct stripe *s =
-+			genradix_ptr(&c->stripes[0], p.ec.idx);
-+
-+		if (WARN_ON(!s))
-+			goto out;
-+
-+		durability += s->nr_redundant;
-+	}
-+out:
-+	return durability;
-+}
-+
-+unsigned bch2_bkey_durability(struct bch_fs *c, struct bkey_s_c k)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const union bch_extent_entry *entry;
-+	struct extent_ptr_decoded p;
-+	unsigned durability = 0;
-+
-+	bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
-+		durability += bch2_extent_ptr_durability(c, p);
-+
-+	return durability;
-+}
-+
-+void bch2_bkey_mark_replicas_cached(struct bch_fs *c, struct bkey_s k,
-+				    unsigned target,
-+				    unsigned nr_desired_replicas)
-+{
-+	struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
-+	union bch_extent_entry *entry;
-+	struct extent_ptr_decoded p;
-+	int extra = bch2_bkey_durability(c, k.s_c) - nr_desired_replicas;
-+
-+	if (target && extra > 0)
-+		bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
-+			int n = bch2_extent_ptr_durability(c, p);
-+
-+			if (n && n <= extra &&
-+			    !bch2_dev_in_target(c, p.ptr.dev, target)) {
-+				entry->ptr.cached = true;
-+				extra -= n;
-+			}
-+		}
-+
-+	if (extra > 0)
-+		bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
-+			int n = bch2_extent_ptr_durability(c, p);
-+
-+			if (n && n <= extra) {
-+				entry->ptr.cached = true;
-+				extra -= n;
-+			}
-+		}
-+}
-+
-+void bch2_bkey_append_ptr(struct bkey_i *k,
-+			  struct bch_extent_ptr ptr)
-+{
-+	EBUG_ON(bch2_bkey_has_device(bkey_i_to_s_c(k), ptr.dev));
-+
-+	switch (k->k.type) {
-+	case KEY_TYPE_btree_ptr:
-+	case KEY_TYPE_btree_ptr_v2:
-+	case KEY_TYPE_extent:
-+		EBUG_ON(bkey_val_u64s(&k->k) >= BKEY_EXTENT_VAL_U64s_MAX);
-+
-+		ptr.type = 1 << BCH_EXTENT_ENTRY_ptr;
-+
-+		memcpy((void *) &k->v + bkey_val_bytes(&k->k),
-+		       &ptr,
-+		       sizeof(ptr));
-+		k->u64s++;
-+		break;
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static inline void __extent_entry_insert(struct bkey_i *k,
-+					 union bch_extent_entry *dst,
-+					 union bch_extent_entry *new)
-+{
-+	union bch_extent_entry *end = bkey_val_end(bkey_i_to_s(k));
-+
-+	memmove_u64s_up_small((u64 *) dst + extent_entry_u64s(new),
-+			      dst, (u64 *) end - (u64 *) dst);
-+	k->k.u64s += extent_entry_u64s(new);
-+	memcpy(dst, new, extent_entry_bytes(new));
-+}
-+
-+void bch2_extent_ptr_decoded_append(struct bkey_i *k,
-+				    struct extent_ptr_decoded *p)
-+{
-+	struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(k));
-+	struct bch_extent_crc_unpacked crc =
-+		bch2_extent_crc_unpack(&k->k, NULL);
-+	union bch_extent_entry *pos;
-+
-+	if (!bch2_crc_unpacked_cmp(crc, p->crc)) {
-+		pos = ptrs.start;
-+		goto found;
-+	}
-+
-+	bkey_for_each_crc(&k->k, ptrs, crc, pos)
-+		if (!bch2_crc_unpacked_cmp(crc, p->crc)) {
-+			pos = extent_entry_next(pos);
-+			goto found;
-+		}
-+
-+	bch2_extent_crc_append(k, p->crc);
-+	pos = bkey_val_end(bkey_i_to_s(k));
-+found:
-+	p->ptr.type = 1 << BCH_EXTENT_ENTRY_ptr;
-+	__extent_entry_insert(k, pos, to_entry(&p->ptr));
-+
-+	if (p->has_ec) {
-+		p->ec.type = 1 << BCH_EXTENT_ENTRY_stripe_ptr;
-+		__extent_entry_insert(k, pos, to_entry(&p->ec));
-+	}
-+}
-+
-+static union bch_extent_entry *extent_entry_prev(struct bkey_ptrs ptrs,
-+					  union bch_extent_entry *entry)
-+{
-+	union bch_extent_entry *i = ptrs.start;
-+
-+	if (i == entry)
-+		return NULL;
-+
-+	while (extent_entry_next(i) != entry)
-+		i = extent_entry_next(i);
-+	return i;
-+}
-+
-+union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s k,
-+					   struct bch_extent_ptr *ptr)
-+{
-+	struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
-+	union bch_extent_entry *dst, *src, *prev;
-+	bool drop_crc = true;
-+
-+	EBUG_ON(ptr < &ptrs.start->ptr ||
-+		ptr >= &ptrs.end->ptr);
-+	EBUG_ON(ptr->type != 1 << BCH_EXTENT_ENTRY_ptr);
-+
-+	src = extent_entry_next(to_entry(ptr));
-+	if (src != ptrs.end &&
-+	    !extent_entry_is_crc(src))
-+		drop_crc = false;
-+
-+	dst = to_entry(ptr);
-+	while ((prev = extent_entry_prev(ptrs, dst))) {
-+		if (extent_entry_is_ptr(prev))
-+			break;
-+
-+		if (extent_entry_is_crc(prev)) {
-+			if (drop_crc)
-+				dst = prev;
-+			break;
-+		}
-+
-+		dst = prev;
-+	}
-+
-+	memmove_u64s_down(dst, src,
-+			  (u64 *) ptrs.end - (u64 *) src);
-+	k.k->u64s -= (u64 *) src - (u64 *) dst;
-+
-+	return dst;
-+}
-+
-+void bch2_bkey_drop_device(struct bkey_s k, unsigned dev)
-+{
-+	struct bch_extent_ptr *ptr;
-+
-+	bch2_bkey_drop_ptrs(k, ptr, ptr->dev == dev);
-+}
-+
-+const struct bch_extent_ptr *
-+bch2_bkey_has_device(struct bkey_s_c k, unsigned dev)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const struct bch_extent_ptr *ptr;
-+
-+	bkey_for_each_ptr(ptrs, ptr)
-+		if (ptr->dev == dev)
-+			return ptr;
-+
-+	return NULL;
-+}
-+
-+bool bch2_bkey_has_target(struct bch_fs *c, struct bkey_s_c k, unsigned target)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const struct bch_extent_ptr *ptr;
-+
-+	bkey_for_each_ptr(ptrs, ptr)
-+		if (bch2_dev_in_target(c, ptr->dev, target) &&
-+		    (!ptr->cached ||
-+		     !ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr)))
-+			return true;
-+
-+	return false;
-+}
-+
-+bool bch2_bkey_matches_ptr(struct bch_fs *c, struct bkey_s_c k,
-+			   struct bch_extent_ptr m, u64 offset)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const union bch_extent_entry *entry;
-+	struct extent_ptr_decoded p;
-+
-+	bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
-+		if (p.ptr.dev	== m.dev &&
-+		    p.ptr.gen	== m.gen &&
-+		    (s64) p.ptr.offset + p.crc.offset - bkey_start_offset(k.k) ==
-+		    (s64) m.offset  - offset)
-+			return true;
-+
-+	return false;
-+}
-+
-+/*
-+ * bch_extent_normalize - clean up an extent, dropping stale pointers etc.
-+ *
-+ * Returns true if @k should be dropped entirely
-+ *
-+ * For existing keys, only called when btree nodes are being rewritten, not when
-+ * they're merely being compacted/resorted in memory.
-+ */
-+bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k)
-+{
-+	struct bch_extent_ptr *ptr;
-+
-+	bch2_bkey_drop_ptrs(k, ptr,
-+		ptr->cached &&
-+		ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr));
-+
-+	/* will only happen if all pointers were cached: */
-+	if (!bch2_bkey_nr_ptrs(k.s_c))
-+		k.k->type = KEY_TYPE_discard;
-+
-+	return bkey_whiteout(k.k);
-+}
-+
-+void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
-+			    struct bkey_s_c k)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const union bch_extent_entry *entry;
-+	struct bch_extent_crc_unpacked crc;
-+	const struct bch_extent_ptr *ptr;
-+	const struct bch_extent_stripe_ptr *ec;
-+	struct bch_dev *ca;
-+	bool first = true;
-+
-+	bkey_extent_entry_for_each(ptrs, entry) {
-+		if (!first)
-+			pr_buf(out, " ");
-+
-+		switch (__extent_entry_type(entry)) {
-+		case BCH_EXTENT_ENTRY_ptr:
-+			ptr = entry_to_ptr(entry);
-+			ca = ptr->dev < c->sb.nr_devices && c->devs[ptr->dev]
-+				? bch_dev_bkey_exists(c, ptr->dev)
-+				: NULL;
-+
-+			pr_buf(out, "ptr: %u:%llu gen %u%s%s", ptr->dev,
-+			       (u64) ptr->offset, ptr->gen,
-+			       ptr->cached ? " cached" : "",
-+			       ca && ptr_stale(ca, ptr)
-+			       ? " stale" : "");
-+			break;
-+		case BCH_EXTENT_ENTRY_crc32:
-+		case BCH_EXTENT_ENTRY_crc64:
-+		case BCH_EXTENT_ENTRY_crc128:
-+			crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry));
-+
-+			pr_buf(out, "crc: c_size %u size %u offset %u nonce %u csum %u compress %u",
-+			       crc.compressed_size,
-+			       crc.uncompressed_size,
-+			       crc.offset, crc.nonce,
-+			       crc.csum_type,
-+			       crc.compression_type);
-+			break;
-+		case BCH_EXTENT_ENTRY_stripe_ptr:
-+			ec = &entry->stripe_ptr;
-+
-+			pr_buf(out, "ec: idx %llu block %u",
-+			       (u64) ec->idx, ec->block);
-+			break;
-+		default:
-+			pr_buf(out, "(invalid extent entry %.16llx)", *((u64 *) entry));
-+			return;
-+		}
-+
-+		first = false;
-+	}
-+}
-+
-+static const char *extent_ptr_invalid(const struct bch_fs *c,
-+				      struct bkey_s_c k,
-+				      const struct bch_extent_ptr *ptr,
-+				      unsigned size_ondisk,
-+				      bool metadata)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const struct bch_extent_ptr *ptr2;
-+	struct bch_dev *ca;
-+
-+	if (!bch2_dev_exists2(c, ptr->dev))
-+		return "pointer to invalid device";
-+
-+	ca = bch_dev_bkey_exists(c, ptr->dev);
-+	if (!ca)
-+		return "pointer to invalid device";
-+
-+	bkey_for_each_ptr(ptrs, ptr2)
-+		if (ptr != ptr2 && ptr->dev == ptr2->dev)
-+			return "multiple pointers to same device";
-+
-+	if (ptr->offset + size_ondisk > bucket_to_sector(ca, ca->mi.nbuckets))
-+		return "offset past end of device";
-+
-+	if (ptr->offset < bucket_to_sector(ca, ca->mi.first_bucket))
-+		return "offset before first bucket";
-+
-+	if (bucket_remainder(ca, ptr->offset) +
-+	    size_ondisk > ca->mi.bucket_size)
-+		return "spans multiple buckets";
-+
-+	return NULL;
-+}
-+
-+const char *bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const union bch_extent_entry *entry;
-+	struct bch_extent_crc_unpacked crc;
-+	unsigned size_ondisk = k.k->size;
-+	const char *reason;
-+	unsigned nonce = UINT_MAX;
-+
-+	if (k.k->type == KEY_TYPE_btree_ptr)
-+		size_ondisk = c->opts.btree_node_size;
-+	if (k.k->type == KEY_TYPE_btree_ptr_v2)
-+		size_ondisk = le16_to_cpu(bkey_s_c_to_btree_ptr_v2(k).v->sectors);
-+
-+	bkey_extent_entry_for_each(ptrs, entry) {
-+		if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX)
-+			return "invalid extent entry type";
-+
-+		if (k.k->type == KEY_TYPE_btree_ptr &&
-+		    !extent_entry_is_ptr(entry))
-+			return "has non ptr field";
-+
-+		switch (extent_entry_type(entry)) {
-+		case BCH_EXTENT_ENTRY_ptr:
-+			reason = extent_ptr_invalid(c, k, &entry->ptr,
-+						    size_ondisk, false);
-+			if (reason)
-+				return reason;
-+			break;
-+		case BCH_EXTENT_ENTRY_crc32:
-+		case BCH_EXTENT_ENTRY_crc64:
-+		case BCH_EXTENT_ENTRY_crc128:
-+			crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry));
-+
-+			if (crc.offset + crc.live_size >
-+			    crc.uncompressed_size)
-+				return "checksum offset + key size > uncompressed size";
-+
-+			size_ondisk = crc.compressed_size;
-+
-+			if (!bch2_checksum_type_valid(c, crc.csum_type))
-+				return "invalid checksum type";
-+
-+			if (crc.compression_type >= BCH_COMPRESSION_TYPE_NR)
-+				return "invalid compression type";
-+
-+			if (bch2_csum_type_is_encryption(crc.csum_type)) {
-+				if (nonce == UINT_MAX)
-+					nonce = crc.offset + crc.nonce;
-+				else if (nonce != crc.offset + crc.nonce)
-+					return "incorrect nonce";
-+			}
-+			break;
-+		case BCH_EXTENT_ENTRY_stripe_ptr:
-+			break;
-+		}
-+	}
-+
-+	return NULL;
-+}
-+
-+void bch2_ptr_swab(struct bkey_s k)
-+{
-+	struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
-+	union bch_extent_entry *entry;
-+	u64 *d;
-+
-+	for (d =  (u64 *) ptrs.start;
-+	     d != (u64 *) ptrs.end;
-+	     d++)
-+		*d = swab64(*d);
-+
-+	for (entry = ptrs.start;
-+	     entry < ptrs.end;
-+	     entry = extent_entry_next(entry)) {
-+		switch (extent_entry_type(entry)) {
-+		case BCH_EXTENT_ENTRY_ptr:
-+			break;
-+		case BCH_EXTENT_ENTRY_crc32:
-+			entry->crc32.csum = swab32(entry->crc32.csum);
-+			break;
-+		case BCH_EXTENT_ENTRY_crc64:
-+			entry->crc64.csum_hi = swab16(entry->crc64.csum_hi);
-+			entry->crc64.csum_lo = swab64(entry->crc64.csum_lo);
-+			break;
-+		case BCH_EXTENT_ENTRY_crc128:
-+			entry->crc128.csum.hi = (__force __le64)
-+				swab64((__force u64) entry->crc128.csum.hi);
-+			entry->crc128.csum.lo = (__force __le64)
-+				swab64((__force u64) entry->crc128.csum.lo);
-+			break;
-+		case BCH_EXTENT_ENTRY_stripe_ptr:
-+			break;
-+		}
-+	}
-+}
-+
-+/* Generic extent code: */
-+
-+int bch2_cut_front_s(struct bpos where, struct bkey_s k)
-+{
-+	unsigned new_val_u64s = bkey_val_u64s(k.k);
-+	int val_u64s_delta;
-+	u64 sub;
-+
-+	if (bkey_cmp(where, bkey_start_pos(k.k)) <= 0)
-+		return 0;
-+
-+	EBUG_ON(bkey_cmp(where, k.k->p) > 0);
-+
-+	sub = where.offset - bkey_start_offset(k.k);
-+
-+	k.k->size -= sub;
-+
-+	if (!k.k->size) {
-+		k.k->type = KEY_TYPE_deleted;
-+		new_val_u64s = 0;
-+	}
-+
-+	switch (k.k->type) {
-+	case KEY_TYPE_extent:
-+	case KEY_TYPE_reflink_v: {
-+		struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
-+		union bch_extent_entry *entry;
-+		bool seen_crc = false;
-+
-+		bkey_extent_entry_for_each(ptrs, entry) {
-+			switch (extent_entry_type(entry)) {
-+			case BCH_EXTENT_ENTRY_ptr:
-+				if (!seen_crc)
-+					entry->ptr.offset += sub;
-+				break;
-+			case BCH_EXTENT_ENTRY_crc32:
-+				entry->crc32.offset += sub;
-+				break;
-+			case BCH_EXTENT_ENTRY_crc64:
-+				entry->crc64.offset += sub;
-+				break;
-+			case BCH_EXTENT_ENTRY_crc128:
-+				entry->crc128.offset += sub;
-+				break;
-+			case BCH_EXTENT_ENTRY_stripe_ptr:
-+				break;
-+			}
-+
-+			if (extent_entry_is_crc(entry))
-+				seen_crc = true;
-+		}
-+
-+		break;
-+	}
-+	case KEY_TYPE_reflink_p: {
-+		struct bkey_s_reflink_p p = bkey_s_to_reflink_p(k);
-+
-+		le64_add_cpu(&p.v->idx, sub);
-+		break;
-+	}
-+	case KEY_TYPE_inline_data: {
-+		struct bkey_s_inline_data d = bkey_s_to_inline_data(k);
-+
-+		sub = min_t(u64, sub << 9, bkey_val_bytes(d.k));
-+
-+		memmove(d.v->data,
-+			d.v->data + sub,
-+			bkey_val_bytes(d.k) - sub);
-+
-+		new_val_u64s -= sub >> 3;
-+		break;
-+	}
-+	}
-+
-+	val_u64s_delta = bkey_val_u64s(k.k) - new_val_u64s;
-+	BUG_ON(val_u64s_delta < 0);
-+
-+	set_bkey_val_u64s(k.k, new_val_u64s);
-+	memset(bkey_val_end(k), 0, val_u64s_delta * sizeof(u64));
-+	return -val_u64s_delta;
-+}
-+
-+int bch2_cut_back_s(struct bpos where, struct bkey_s k)
-+{
-+	unsigned new_val_u64s = bkey_val_u64s(k.k);
-+	int val_u64s_delta;
-+	u64 len = 0;
-+
-+	if (bkey_cmp(where, k.k->p) >= 0)
-+		return 0;
-+
-+	EBUG_ON(bkey_cmp(where, bkey_start_pos(k.k)) < 0);
-+
-+	len = where.offset - bkey_start_offset(k.k);
-+
-+	k.k->p = where;
-+	k.k->size = len;
-+
-+	if (!len) {
-+		k.k->type = KEY_TYPE_deleted;
-+		new_val_u64s = 0;
-+	}
-+
-+	switch (k.k->type) {
-+	case KEY_TYPE_inline_data:
-+		new_val_u64s = min(new_val_u64s, k.k->size << 6);
-+		break;
-+	}
-+
-+	val_u64s_delta = bkey_val_u64s(k.k) - new_val_u64s;
-+	BUG_ON(val_u64s_delta < 0);
-+
-+	set_bkey_val_u64s(k.k, new_val_u64s);
-+	memset(bkey_val_end(k), 0, val_u64s_delta * sizeof(u64));
-+	return -val_u64s_delta;
-+}
-diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h
-new file mode 100644
-index 000000000000..29b15365d19c
---- /dev/null
-+++ b/fs/bcachefs/extents.h
-@@ -0,0 +1,603 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_EXTENTS_H
-+#define _BCACHEFS_EXTENTS_H
-+
-+#include "bcachefs.h"
-+#include "bkey.h"
-+#include "extents_types.h"
-+
-+struct bch_fs;
-+struct btree_trans;
-+
-+/* extent entries: */
-+
-+#define extent_entry_last(_e)						\
-+	((typeof(&(_e).v->start[0])) bkey_val_end(_e))
-+
-+#define entry_to_ptr(_entry)						\
-+({									\
-+	EBUG_ON((_entry) && !extent_entry_is_ptr(_entry));		\
-+									\
-+	__builtin_choose_expr(						\
-+		type_is_exact(_entry, const union bch_extent_entry *),	\
-+		(const struct bch_extent_ptr *) (_entry),		\
-+		(struct bch_extent_ptr *) (_entry));			\
-+})
-+
-+/* downcast, preserves const */
-+#define to_entry(_entry)						\
-+({									\
-+	BUILD_BUG_ON(!type_is(_entry, union bch_extent_crc *) &&	\
-+		     !type_is(_entry, struct bch_extent_ptr *) &&	\
-+		     !type_is(_entry, struct bch_extent_stripe_ptr *));	\
-+									\
-+	__builtin_choose_expr(						\
-+		(type_is_exact(_entry, const union bch_extent_crc *) ||	\
-+		 type_is_exact(_entry, const struct bch_extent_ptr *) ||\
-+		 type_is_exact(_entry, const struct bch_extent_stripe_ptr *)),\
-+		(const union bch_extent_entry *) (_entry),		\
-+		(union bch_extent_entry *) (_entry));			\
-+})
-+
-+#define extent_entry_next(_entry)					\
-+	((typeof(_entry)) ((void *) (_entry) + extent_entry_bytes(_entry)))
-+
-+static inline unsigned
-+__extent_entry_type(const union bch_extent_entry *e)
-+{
-+	return e->type ? __ffs(e->type) : BCH_EXTENT_ENTRY_MAX;
-+}
-+
-+static inline enum bch_extent_entry_type
-+extent_entry_type(const union bch_extent_entry *e)
-+{
-+	int ret = __ffs(e->type);
-+
-+	EBUG_ON(ret < 0 || ret >= BCH_EXTENT_ENTRY_MAX);
-+
-+	return ret;
-+}
-+
-+static inline size_t extent_entry_bytes(const union bch_extent_entry *entry)
-+{
-+	switch (extent_entry_type(entry)) {
-+#define x(f, n)						\
-+	case BCH_EXTENT_ENTRY_##f:			\
-+		return sizeof(struct bch_extent_##f);
-+	BCH_EXTENT_ENTRY_TYPES()
-+#undef x
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static inline size_t extent_entry_u64s(const union bch_extent_entry *entry)
-+{
-+	return extent_entry_bytes(entry) / sizeof(u64);
-+}
-+
-+static inline bool extent_entry_is_ptr(const union bch_extent_entry *e)
-+{
-+	switch (extent_entry_type(e)) {
-+	case BCH_EXTENT_ENTRY_ptr:
-+		return true;
-+	default:
-+		return false;
-+	}
-+}
-+
-+static inline bool extent_entry_is_crc(const union bch_extent_entry *e)
-+{
-+	switch (extent_entry_type(e)) {
-+	case BCH_EXTENT_ENTRY_crc32:
-+	case BCH_EXTENT_ENTRY_crc64:
-+	case BCH_EXTENT_ENTRY_crc128:
-+		return true;
-+	default:
-+		return false;
-+	}
-+}
-+
-+union bch_extent_crc {
-+	u8				type;
-+	struct bch_extent_crc32		crc32;
-+	struct bch_extent_crc64		crc64;
-+	struct bch_extent_crc128	crc128;
-+};
-+
-+#define __entry_to_crc(_entry)						\
-+	__builtin_choose_expr(						\
-+		type_is_exact(_entry, const union bch_extent_entry *),	\
-+		(const union bch_extent_crc *) (_entry),		\
-+		(union bch_extent_crc *) (_entry))
-+
-+#define entry_to_crc(_entry)						\
-+({									\
-+	EBUG_ON((_entry) && !extent_entry_is_crc(_entry));		\
-+									\
-+	__entry_to_crc(_entry);						\
-+})
-+
-+static inline struct bch_extent_crc_unpacked
-+bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc)
-+{
-+#define common_fields(_crc)						\
-+		.csum_type		= _crc.csum_type,		\
-+		.compression_type	= _crc.compression_type,	\
-+		.compressed_size	= _crc._compressed_size + 1,	\
-+		.uncompressed_size	= _crc._uncompressed_size + 1,	\
-+		.offset			= _crc.offset,			\
-+		.live_size		= k->size
-+
-+	if (!crc)
-+		return (struct bch_extent_crc_unpacked) {
-+			.compressed_size	= k->size,
-+			.uncompressed_size	= k->size,
-+			.live_size		= k->size,
-+		};
-+
-+	switch (extent_entry_type(to_entry(crc))) {
-+	case BCH_EXTENT_ENTRY_crc32: {
-+		struct bch_extent_crc_unpacked ret = (struct bch_extent_crc_unpacked) {
-+			common_fields(crc->crc32),
-+		};
-+
-+		*((__le32 *) &ret.csum.lo) = crc->crc32.csum;
-+
-+		memcpy(&ret.csum.lo, &crc->crc32.csum,
-+		       sizeof(crc->crc32.csum));
-+
-+		return ret;
-+	}
-+	case BCH_EXTENT_ENTRY_crc64: {
-+		struct bch_extent_crc_unpacked ret = (struct bch_extent_crc_unpacked) {
-+			common_fields(crc->crc64),
-+			.nonce			= crc->crc64.nonce,
-+			.csum.lo		= (__force __le64) crc->crc64.csum_lo,
-+		};
-+
-+		*((__le16 *) &ret.csum.hi) = crc->crc64.csum_hi;
-+
-+		return ret;
-+	}
-+	case BCH_EXTENT_ENTRY_crc128: {
-+		struct bch_extent_crc_unpacked ret = (struct bch_extent_crc_unpacked) {
-+			common_fields(crc->crc128),
-+			.nonce			= crc->crc128.nonce,
-+			.csum			= crc->crc128.csum,
-+		};
-+
-+		return ret;
-+	}
-+	default:
-+		BUG();
-+	}
-+#undef common_fields
-+}
-+
-+static inline bool crc_is_compressed(struct bch_extent_crc_unpacked crc)
-+{
-+	return (crc.compression_type != BCH_COMPRESSION_TYPE_none &&
-+		crc.compression_type != BCH_COMPRESSION_TYPE_incompressible);
-+}
-+
-+/* bkey_ptrs: generically over any key type that has ptrs */
-+
-+struct bkey_ptrs_c {
-+	const union bch_extent_entry	*start;
-+	const union bch_extent_entry	*end;
-+};
-+
-+struct bkey_ptrs {
-+	union bch_extent_entry	*start;
-+	union bch_extent_entry	*end;
-+};
-+
-+static inline struct bkey_ptrs_c bch2_bkey_ptrs_c(struct bkey_s_c k)
-+{
-+	switch (k.k->type) {
-+	case KEY_TYPE_btree_ptr: {
-+		struct bkey_s_c_btree_ptr e = bkey_s_c_to_btree_ptr(k);
-+		return (struct bkey_ptrs_c) {
-+			to_entry(&e.v->start[0]),
-+			to_entry(extent_entry_last(e))
-+		};
-+	}
-+	case KEY_TYPE_extent: {
-+		struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
-+		return (struct bkey_ptrs_c) {
-+			e.v->start,
-+			extent_entry_last(e)
-+		};
-+	}
-+	case KEY_TYPE_stripe: {
-+		struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
-+		return (struct bkey_ptrs_c) {
-+			to_entry(&s.v->ptrs[0]),
-+			to_entry(&s.v->ptrs[s.v->nr_blocks]),
-+		};
-+	}
-+	case KEY_TYPE_reflink_v: {
-+		struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(k);
-+
-+		return (struct bkey_ptrs_c) {
-+			r.v->start,
-+			bkey_val_end(r),
-+		};
-+	}
-+	case KEY_TYPE_btree_ptr_v2: {
-+		struct bkey_s_c_btree_ptr_v2 e = bkey_s_c_to_btree_ptr_v2(k);
-+		return (struct bkey_ptrs_c) {
-+			to_entry(&e.v->start[0]),
-+			to_entry(extent_entry_last(e))
-+		};
-+	}
-+	default:
-+		return (struct bkey_ptrs_c) { NULL, NULL };
-+	}
-+}
-+
-+static inline struct bkey_ptrs bch2_bkey_ptrs(struct bkey_s k)
-+{
-+	struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k.s_c);
-+
-+	return (struct bkey_ptrs) {
-+		(void *) p.start,
-+		(void *) p.end
-+	};
-+}
-+
-+#define __bkey_extent_entry_for_each_from(_start, _end, _entry)		\
-+	for ((_entry) = (_start);					\
-+	     (_entry) < (_end);						\
-+	     (_entry) = extent_entry_next(_entry))
-+
-+#define __bkey_ptr_next(_ptr, _end)					\
-+({									\
-+	typeof(_end) _entry;						\
-+									\
-+	__bkey_extent_entry_for_each_from(to_entry(_ptr), _end, _entry)	\
-+		if (extent_entry_is_ptr(_entry))			\
-+			break;						\
-+									\
-+	_entry < (_end) ? entry_to_ptr(_entry) : NULL;			\
-+})
-+
-+#define bkey_extent_entry_for_each_from(_p, _entry, _start)		\
-+	__bkey_extent_entry_for_each_from(_start, (_p).end, _entry)
-+
-+#define bkey_extent_entry_for_each(_p, _entry)				\
-+	bkey_extent_entry_for_each_from(_p, _entry, _p.start)
-+
-+#define __bkey_for_each_ptr(_start, _end, _ptr)				\
-+	for ((_ptr) = (_start);						\
-+	     ((_ptr) = __bkey_ptr_next(_ptr, _end));			\
-+	     (_ptr)++)
-+
-+#define bkey_ptr_next(_p, _ptr)						\
-+	__bkey_ptr_next(_ptr, (_p).end)
-+
-+#define bkey_for_each_ptr(_p, _ptr)					\
-+	__bkey_for_each_ptr(&(_p).start->ptr, (_p).end, _ptr)
-+
-+#define __bkey_ptr_next_decode(_k, _end, _ptr, _entry)			\
-+({									\
-+	__label__ out;							\
-+									\
-+	(_ptr).idx	= 0;						\
-+	(_ptr).has_ec	= false;					\
-+									\
-+	__bkey_extent_entry_for_each_from(_entry, _end, _entry)		\
-+		switch (extent_entry_type(_entry)) {			\
-+		case BCH_EXTENT_ENTRY_ptr:				\
-+			(_ptr).ptr		= _entry->ptr;		\
-+			goto out;					\
-+		case BCH_EXTENT_ENTRY_crc32:				\
-+		case BCH_EXTENT_ENTRY_crc64:				\
-+		case BCH_EXTENT_ENTRY_crc128:				\
-+			(_ptr).crc = bch2_extent_crc_unpack(_k,		\
-+					entry_to_crc(_entry));		\
-+			break;						\
-+		case BCH_EXTENT_ENTRY_stripe_ptr:			\
-+			(_ptr).ec = _entry->stripe_ptr;			\
-+			(_ptr).has_ec	= true;				\
-+			break;						\
-+		}							\
-+out:									\
-+	_entry < (_end);						\
-+})
-+
-+#define __bkey_for_each_ptr_decode(_k, _start, _end, _ptr, _entry)	\
-+	for ((_ptr).crc = bch2_extent_crc_unpack(_k, NULL),		\
-+	     (_entry) = _start;						\
-+	     __bkey_ptr_next_decode(_k, _end, _ptr, _entry);		\
-+	     (_entry) = extent_entry_next(_entry))
-+
-+#define bkey_for_each_ptr_decode(_k, _p, _ptr, _entry)			\
-+	__bkey_for_each_ptr_decode(_k, (_p).start, (_p).end,		\
-+				   _ptr, _entry)
-+
-+#define bkey_crc_next(_k, _start, _end, _crc, _iter)			\
-+({									\
-+	__bkey_extent_entry_for_each_from(_iter, _end, _iter)		\
-+		if (extent_entry_is_crc(_iter)) {			\
-+			(_crc) = bch2_extent_crc_unpack(_k,		\
-+						entry_to_crc(_iter));	\
-+			break;						\
-+		}							\
-+									\
-+	(_iter) < (_end);						\
-+})
-+
-+#define __bkey_for_each_crc(_k, _start, _end, _crc, _iter)		\
-+	for ((_crc) = bch2_extent_crc_unpack(_k, NULL),			\
-+	     (_iter) = (_start);					\
-+	     bkey_crc_next(_k, _start, _end, _crc, _iter);		\
-+	     (_iter) = extent_entry_next(_iter))
-+
-+#define bkey_for_each_crc(_k, _p, _crc, _iter)				\
-+	__bkey_for_each_crc(_k, (_p).start, (_p).end, _crc, _iter)
-+
-+/* Iterate over pointers in KEY_TYPE_extent: */
-+
-+#define extent_for_each_entry_from(_e, _entry, _start)			\
-+	__bkey_extent_entry_for_each_from(_start,			\
-+				extent_entry_last(_e),_entry)
-+
-+#define extent_for_each_entry(_e, _entry)				\
-+	extent_for_each_entry_from(_e, _entry, (_e).v->start)
-+
-+#define extent_ptr_next(_e, _ptr)					\
-+	__bkey_ptr_next(_ptr, extent_entry_last(_e))
-+
-+#define extent_for_each_ptr(_e, _ptr)					\
-+	__bkey_for_each_ptr(&(_e).v->start->ptr, extent_entry_last(_e), _ptr)
-+
-+#define extent_for_each_ptr_decode(_e, _ptr, _entry)			\
-+	__bkey_for_each_ptr_decode((_e).k, (_e).v->start,		\
-+				   extent_entry_last(_e), _ptr, _entry)
-+
-+/* utility code common to all keys with pointers: */
-+
-+void bch2_mark_io_failure(struct bch_io_failures *,
-+			  struct extent_ptr_decoded *);
-+int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c,
-+			       struct bch_io_failures *,
-+			       struct extent_ptr_decoded *);
-+
-+/* KEY_TYPE_btree_ptr: */
-+
-+const char *bch2_btree_ptr_invalid(const struct bch_fs *, struct bkey_s_c);
-+void bch2_btree_ptr_debugcheck(struct bch_fs *, struct bkey_s_c);
-+void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *,
-+			    struct bkey_s_c);
-+
-+void bch2_btree_ptr_v2_to_text(struct printbuf *, struct bch_fs *,
-+			    struct bkey_s_c);
-+void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned,
-+			      int, struct bkey_s);
-+
-+#define bch2_bkey_ops_btree_ptr (struct bkey_ops) {		\
-+	.key_invalid	= bch2_btree_ptr_invalid,		\
-+	.key_debugcheck	= bch2_btree_ptr_debugcheck,		\
-+	.val_to_text	= bch2_btree_ptr_to_text,		\
-+	.swab		= bch2_ptr_swab,			\
-+}
-+
-+#define bch2_bkey_ops_btree_ptr_v2 (struct bkey_ops) {		\
-+	.key_invalid	= bch2_btree_ptr_invalid,		\
-+	.key_debugcheck	= bch2_btree_ptr_debugcheck,		\
-+	.val_to_text	= bch2_btree_ptr_v2_to_text,		\
-+	.swab		= bch2_ptr_swab,			\
-+	.compat		= bch2_btree_ptr_v2_compat,		\
-+}
-+
-+/* KEY_TYPE_extent: */
-+
-+const char *bch2_extent_invalid(const struct bch_fs *, struct bkey_s_c);
-+void bch2_extent_debugcheck(struct bch_fs *, struct bkey_s_c);
-+void bch2_extent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-+enum merge_result bch2_extent_merge(struct bch_fs *,
-+				    struct bkey_s, struct bkey_s);
-+
-+#define bch2_bkey_ops_extent (struct bkey_ops) {		\
-+	.key_invalid	= bch2_extent_invalid,			\
-+	.key_debugcheck	= bch2_extent_debugcheck,		\
-+	.val_to_text	= bch2_extent_to_text,			\
-+	.swab		= bch2_ptr_swab,			\
-+	.key_normalize	= bch2_extent_normalize,		\
-+	.key_merge	= bch2_extent_merge,			\
-+}
-+
-+/* KEY_TYPE_reservation: */
-+
-+const char *bch2_reservation_invalid(const struct bch_fs *, struct bkey_s_c);
-+void bch2_reservation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-+enum merge_result bch2_reservation_merge(struct bch_fs *,
-+					 struct bkey_s, struct bkey_s);
-+
-+#define bch2_bkey_ops_reservation (struct bkey_ops) {		\
-+	.key_invalid	= bch2_reservation_invalid,		\
-+	.val_to_text	= bch2_reservation_to_text,		\
-+	.key_merge	= bch2_reservation_merge,		\
-+}
-+
-+/* Extent checksum entries: */
-+
-+bool bch2_can_narrow_extent_crcs(struct bkey_s_c,
-+				 struct bch_extent_crc_unpacked);
-+bool bch2_bkey_narrow_crcs(struct bkey_i *, struct bch_extent_crc_unpacked);
-+void bch2_extent_crc_append(struct bkey_i *,
-+			    struct bch_extent_crc_unpacked);
-+
-+/* Generic code for keys with pointers: */
-+
-+static inline bool bkey_extent_is_direct_data(const struct bkey *k)
-+{
-+	switch (k->type) {
-+	case KEY_TYPE_btree_ptr:
-+	case KEY_TYPE_btree_ptr_v2:
-+	case KEY_TYPE_extent:
-+	case KEY_TYPE_reflink_v:
-+		return true;
-+	default:
-+		return false;
-+	}
-+}
-+
-+static inline bool bkey_extent_is_data(const struct bkey *k)
-+{
-+	return bkey_extent_is_direct_data(k) ||
-+		k->type == KEY_TYPE_inline_data ||
-+		k->type == KEY_TYPE_reflink_p;
-+}
-+
-+/*
-+ * Should extent be counted under inode->i_sectors?
-+ */
-+static inline bool bkey_extent_is_allocation(const struct bkey *k)
-+{
-+	switch (k->type) {
-+	case KEY_TYPE_extent:
-+	case KEY_TYPE_reservation:
-+	case KEY_TYPE_reflink_p:
-+	case KEY_TYPE_reflink_v:
-+	case KEY_TYPE_inline_data:
-+		return true;
-+	default:
-+		return false;
-+	}
-+}
-+
-+static inline struct bch_devs_list bch2_bkey_devs(struct bkey_s_c k)
-+{
-+	struct bch_devs_list ret = (struct bch_devs_list) { 0 };
-+	struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k);
-+	const struct bch_extent_ptr *ptr;
-+
-+	bkey_for_each_ptr(p, ptr)
-+		ret.devs[ret.nr++] = ptr->dev;
-+
-+	return ret;
-+}
-+
-+static inline struct bch_devs_list bch2_bkey_dirty_devs(struct bkey_s_c k)
-+{
-+	struct bch_devs_list ret = (struct bch_devs_list) { 0 };
-+	struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k);
-+	const struct bch_extent_ptr *ptr;
-+
-+	bkey_for_each_ptr(p, ptr)
-+		if (!ptr->cached)
-+			ret.devs[ret.nr++] = ptr->dev;
-+
-+	return ret;
-+}
-+
-+static inline struct bch_devs_list bch2_bkey_cached_devs(struct bkey_s_c k)
-+{
-+	struct bch_devs_list ret = (struct bch_devs_list) { 0 };
-+	struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k);
-+	const struct bch_extent_ptr *ptr;
-+
-+	bkey_for_each_ptr(p, ptr)
-+		if (ptr->cached)
-+			ret.devs[ret.nr++] = ptr->dev;
-+
-+	return ret;
-+}
-+
-+unsigned bch2_bkey_nr_ptrs(struct bkey_s_c);
-+unsigned bch2_bkey_nr_ptrs_allocated(struct bkey_s_c);
-+unsigned bch2_bkey_nr_ptrs_fully_allocated(struct bkey_s_c);
-+bool bch2_bkey_is_incompressible(struct bkey_s_c);
-+unsigned bch2_bkey_sectors_compressed(struct bkey_s_c);
-+bool bch2_check_range_allocated(struct bch_fs *, struct bpos, u64, unsigned);
-+unsigned bch2_bkey_durability(struct bch_fs *, struct bkey_s_c);
-+
-+void bch2_bkey_mark_replicas_cached(struct bch_fs *, struct bkey_s,
-+				    unsigned, unsigned);
-+
-+void bch2_bkey_append_ptr(struct bkey_i *, struct bch_extent_ptr);
-+void bch2_extent_ptr_decoded_append(struct bkey_i *,
-+				    struct extent_ptr_decoded *);
-+union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s,
-+					   struct bch_extent_ptr *);
-+
-+#define bch2_bkey_drop_ptrs(_k, _ptr, _cond)				\
-+do {									\
-+	struct bkey_ptrs _ptrs = bch2_bkey_ptrs(_k);			\
-+									\
-+	_ptr = &_ptrs.start->ptr;					\
-+									\
-+	while ((_ptr = bkey_ptr_next(_ptrs, _ptr))) {			\
-+		if (_cond) {						\
-+			_ptr = (void *) bch2_bkey_drop_ptr(_k, _ptr);	\
-+			_ptrs = bch2_bkey_ptrs(_k);			\
-+			continue;					\
-+		}							\
-+									\
-+		(_ptr)++;						\
-+	}								\
-+} while (0)
-+
-+void bch2_bkey_drop_device(struct bkey_s, unsigned);
-+const struct bch_extent_ptr *bch2_bkey_has_device(struct bkey_s_c, unsigned);
-+bool bch2_bkey_has_target(struct bch_fs *, struct bkey_s_c, unsigned);
-+
-+bool bch2_bkey_matches_ptr(struct bch_fs *, struct bkey_s_c,
-+			   struct bch_extent_ptr, u64);
-+
-+bool bch2_extent_normalize(struct bch_fs *, struct bkey_s);
-+void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *,
-+			    struct bkey_s_c);
-+const char *bch2_bkey_ptrs_invalid(const struct bch_fs *, struct bkey_s_c);
-+
-+void bch2_ptr_swab(struct bkey_s);
-+
-+/* Generic extent code: */
-+
-+int bch2_cut_front_s(struct bpos, struct bkey_s);
-+int bch2_cut_back_s(struct bpos, struct bkey_s);
-+
-+static inline void bch2_cut_front(struct bpos where, struct bkey_i *k)
-+{
-+	bch2_cut_front_s(where, bkey_i_to_s(k));
-+}
-+
-+static inline void bch2_cut_back(struct bpos where, struct bkey_i *k)
-+{
-+	bch2_cut_back_s(where, bkey_i_to_s(k));
-+}
-+
-+/**
-+ * bch_key_resize - adjust size of @k
-+ *
-+ * bkey_start_offset(k) will be preserved, modifies where the extent ends
-+ */
-+static inline void bch2_key_resize(struct bkey *k, unsigned new_size)
-+{
-+	k->p.offset -= k->size;
-+	k->p.offset += new_size;
-+	k->size = new_size;
-+}
-+
-+/*
-+ * In extent_sort_fix_overlapping(), insert_fixup_extent(),
-+ * extent_merge_inline() - we're modifying keys in place that are packed. To do
-+ * that we have to unpack the key, modify the unpacked key - then this
-+ * copies/repacks the unpacked to the original as necessary.
-+ */
-+static inline void extent_save(struct btree *b, struct bkey_packed *dst,
-+			       struct bkey *src)
-+{
-+	struct bkey_format *f = &b->format;
-+	struct bkey_i *dst_unpacked;
-+
-+	if ((dst_unpacked = packed_to_bkey(dst)))
-+		dst_unpacked->k = *src;
-+	else
-+		BUG_ON(!bch2_bkey_pack_key(dst, src, f));
-+}
-+
-+#endif /* _BCACHEFS_EXTENTS_H */
-diff --git a/fs/bcachefs/extents_types.h b/fs/bcachefs/extents_types.h
-new file mode 100644
-index 000000000000..43d6c341ecca
---- /dev/null
-+++ b/fs/bcachefs/extents_types.h
-@@ -0,0 +1,40 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_EXTENTS_TYPES_H
-+#define _BCACHEFS_EXTENTS_TYPES_H
-+
-+#include "bcachefs_format.h"
-+
-+struct bch_extent_crc_unpacked {
-+	u32			compressed_size;
-+	u32			uncompressed_size;
-+	u32			live_size;
-+
-+	u8			csum_type;
-+	u8			compression_type;
-+
-+	u16			offset;
-+
-+	u16			nonce;
-+
-+	struct bch_csum		csum;
-+};
-+
-+struct extent_ptr_decoded {
-+	unsigned			idx;
-+	bool				has_ec;
-+	struct bch_extent_crc_unpacked	crc;
-+	struct bch_extent_ptr		ptr;
-+	struct bch_extent_stripe_ptr	ec;
-+};
-+
-+struct bch_io_failures {
-+	u8			nr;
-+	struct bch_dev_io_failures {
-+		u8		dev;
-+		u8		idx;
-+		u8		nr_failed;
-+		u8		nr_retries;
-+	}			devs[BCH_REPLICAS_MAX];
-+};
-+
-+#endif /* _BCACHEFS_EXTENTS_TYPES_H */
-diff --git a/fs/bcachefs/eytzinger.h b/fs/bcachefs/eytzinger.h
-new file mode 100644
-index 000000000000..26d5cad7e6a5
---- /dev/null
-+++ b/fs/bcachefs/eytzinger.h
-@@ -0,0 +1,285 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _EYTZINGER_H
-+#define _EYTZINGER_H
-+
-+#include <linux/bitops.h>
-+#include <linux/log2.h>
-+
-+#include "util.h"
-+
-+/*
-+ * Traversal for trees in eytzinger layout - a full binary tree layed out in an
-+ * array
-+ */
-+
-+/*
-+ * One based indexing version:
-+ *
-+ * With one based indexing each level of the tree starts at a power of two -
-+ * good for cacheline alignment:
-+ *
-+ * Size parameter is treated as if we were using 0 based indexing, however:
-+ * valid nodes, and inorder indices, are in the range [1..size) - that is, there
-+ * are actually size - 1 elements
-+ */
-+
-+static inline unsigned eytzinger1_child(unsigned i, unsigned child)
-+{
-+	EBUG_ON(child > 1);
-+
-+	return (i << 1) + child;
-+}
-+
-+static inline unsigned eytzinger1_left_child(unsigned i)
-+{
-+	return eytzinger1_child(i, 0);
-+}
-+
-+static inline unsigned eytzinger1_right_child(unsigned i)
-+{
-+	return eytzinger1_child(i, 1);
-+}
-+
-+static inline unsigned eytzinger1_first(unsigned size)
-+{
-+	return rounddown_pow_of_two(size - 1);
-+}
-+
-+static inline unsigned eytzinger1_last(unsigned size)
-+{
-+	return rounddown_pow_of_two(size) - 1;
-+}
-+
-+/*
-+ * eytzinger1_next() and eytzinger1_prev() have the nice properties that
-+ *
-+ * eytzinger1_next(0) == eytzinger1_first())
-+ * eytzinger1_prev(0) == eytzinger1_last())
-+ *
-+ * eytzinger1_prev(eytzinger1_first()) == 0
-+ * eytzinger1_next(eytzinger1_last()) == 0
-+ */
-+
-+static inline unsigned eytzinger1_next(unsigned i, unsigned size)
-+{
-+	EBUG_ON(i >= size);
-+
-+	if (eytzinger1_right_child(i) < size) {
-+		i = eytzinger1_right_child(i);
-+
-+		i <<= __fls(size) - __fls(i);
-+		i >>= i >= size;
-+	} else {
-+		i >>= ffz(i) + 1;
-+	}
-+
-+	return i;
-+}
-+
-+static inline unsigned eytzinger1_prev(unsigned i, unsigned size)
-+{
-+	EBUG_ON(i >= size);
-+
-+	if (eytzinger1_left_child(i) < size) {
-+		i = eytzinger1_left_child(i) + 1;
-+
-+		i <<= __fls(size) - __fls(i);
-+		i -= 1;
-+		i >>= i >= size;
-+	} else {
-+		i >>= __ffs(i) + 1;
-+	}
-+
-+	return i;
-+}
-+
-+static inline unsigned eytzinger1_extra(unsigned size)
-+{
-+	return (size - rounddown_pow_of_two(size - 1)) << 1;
-+}
-+
-+static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size,
-+					      unsigned extra)
-+{
-+	unsigned b = __fls(i);
-+	unsigned shift = __fls(size - 1) - b;
-+	int s;
-+
-+	EBUG_ON(!i || i >= size);
-+
-+	i  ^= 1U << b;
-+	i <<= 1;
-+	i  |= 1;
-+	i <<= shift;
-+
-+	/*
-+	 * sign bit trick:
-+	 *
-+	 * if (i > extra)
-+	 *	i -= (i - extra) >> 1;
-+	 */
-+	s = extra - i;
-+	i += (s >> 1) & (s >> 31);
-+
-+	return i;
-+}
-+
-+static inline unsigned __inorder_to_eytzinger1(unsigned i, unsigned size,
-+					       unsigned extra)
-+{
-+	unsigned shift;
-+	int s;
-+
-+	EBUG_ON(!i || i >= size);
-+
-+	/*
-+	 * sign bit trick:
-+	 *
-+	 * if (i > extra)
-+	 *	i += i - extra;
-+	 */
-+	s = extra - i;
-+	i -= s & (s >> 31);
-+
-+	shift = __ffs(i);
-+
-+	i >>= shift + 1;
-+	i  |= 1U << (__fls(size - 1) - shift);
-+
-+	return i;
-+}
-+
-+static inline unsigned eytzinger1_to_inorder(unsigned i, unsigned size)
-+{
-+	return __eytzinger1_to_inorder(i, size, eytzinger1_extra(size));
-+}
-+
-+static inline unsigned inorder_to_eytzinger1(unsigned i, unsigned size)
-+{
-+	return __inorder_to_eytzinger1(i, size, eytzinger1_extra(size));
-+}
-+
-+#define eytzinger1_for_each(_i, _size)			\
-+	for ((_i) = eytzinger1_first((_size));		\
-+	     (_i) != 0;					\
-+	     (_i) = eytzinger1_next((_i), (_size)))
-+
-+/* Zero based indexing version: */
-+
-+static inline unsigned eytzinger0_child(unsigned i, unsigned child)
-+{
-+	EBUG_ON(child > 1);
-+
-+	return (i << 1) + 1 + child;
-+}
-+
-+static inline unsigned eytzinger0_left_child(unsigned i)
-+{
-+	return eytzinger0_child(i, 0);
-+}
-+
-+static inline unsigned eytzinger0_right_child(unsigned i)
-+{
-+	return eytzinger0_child(i, 1);
-+}
-+
-+static inline unsigned eytzinger0_first(unsigned size)
-+{
-+	return eytzinger1_first(size + 1) - 1;
-+}
-+
-+static inline unsigned eytzinger0_last(unsigned size)
-+{
-+	return eytzinger1_last(size + 1) - 1;
-+}
-+
-+static inline unsigned eytzinger0_next(unsigned i, unsigned size)
-+{
-+	return eytzinger1_next(i + 1, size + 1) - 1;
-+}
-+
-+static inline unsigned eytzinger0_prev(unsigned i, unsigned size)
-+{
-+	return eytzinger1_prev(i + 1, size + 1) - 1;
-+}
-+
-+static inline unsigned eytzinger0_extra(unsigned size)
-+{
-+	return eytzinger1_extra(size + 1);
-+}
-+
-+static inline unsigned __eytzinger0_to_inorder(unsigned i, unsigned size,
-+					       unsigned extra)
-+{
-+	return __eytzinger1_to_inorder(i + 1, size + 1, extra) - 1;
-+}
-+
-+static inline unsigned __inorder_to_eytzinger0(unsigned i, unsigned size,
-+					       unsigned extra)
-+{
-+	return __inorder_to_eytzinger1(i + 1, size + 1, extra) - 1;
-+}
-+
-+static inline unsigned eytzinger0_to_inorder(unsigned i, unsigned size)
-+{
-+	return __eytzinger0_to_inorder(i, size, eytzinger0_extra(size));
-+}
-+
-+static inline unsigned inorder_to_eytzinger0(unsigned i, unsigned size)
-+{
-+	return __inorder_to_eytzinger0(i, size, eytzinger0_extra(size));
-+}
-+
-+#define eytzinger0_for_each(_i, _size)			\
-+	for ((_i) = eytzinger0_first((_size));		\
-+	     (_i) != -1;				\
-+	     (_i) = eytzinger0_next((_i), (_size)))
-+
-+typedef int (*eytzinger_cmp_fn)(const void *l, const void *r, size_t size);
-+
-+/* return greatest node <= @search, or -1 if not found */
-+static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size,
-+					 eytzinger_cmp_fn cmp, const void *search)
-+{
-+	unsigned i, n = 0;
-+
-+	if (!nr)
-+		return -1;
-+
-+	do {
-+		i = n;
-+		n = eytzinger0_child(i, cmp(search, base + i * size, size) >= 0);
-+	} while (n < nr);
-+
-+	if (n & 1) {
-+		/* @i was greater than @search, return previous node: */
-+
-+		if (i == eytzinger0_first(nr))
-+			return -1;
-+
-+		return eytzinger0_prev(i, nr);
-+	} else {
-+		return i;
-+	}
-+}
-+
-+#define eytzinger0_find(base, nr, size, _cmp, search)			\
-+({									\
-+	void *_base	= (base);					\
-+	void *_search	= (search);					\
-+	size_t _nr	= (nr);						\
-+	size_t _size	= (size);					\
-+	size_t _i	= 0;						\
-+	int _res;							\
-+									\
-+	while (_i < _nr &&						\
-+	       (_res = _cmp(_search, _base + _i * _size, _size)))	\
-+		_i = eytzinger0_child(_i, _res > 0);			\
-+	_i;								\
-+})
-+
-+void eytzinger0_sort(void *, size_t, size_t,
-+		    int (*cmp_func)(const void *, const void *, size_t),
-+		    void (*swap_func)(void *, void *, size_t));
-+
-+#endif /* _EYTZINGER_H */
-diff --git a/fs/bcachefs/fifo.h b/fs/bcachefs/fifo.h
-new file mode 100644
-index 000000000000..cdb272708a4b
---- /dev/null
-+++ b/fs/bcachefs/fifo.h
-@@ -0,0 +1,127 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_FIFO_H
-+#define _BCACHEFS_FIFO_H
-+
-+#include "util.h"
-+
-+#define FIFO(type)							\
-+struct {								\
-+	size_t front, back, size, mask;					\
-+	type *data;							\
-+}
-+
-+#define DECLARE_FIFO(type, name)	FIFO(type) name
-+
-+#define fifo_buf_size(fifo)						\
-+	((fifo)->size							\
-+	 ? roundup_pow_of_two((fifo)->size) * sizeof((fifo)->data[0])	\
-+	 : 0)
-+
-+#define init_fifo(fifo, _size, _gfp)					\
-+({									\
-+	(fifo)->front	= (fifo)->back = 0;				\
-+	(fifo)->size	= (_size);					\
-+	(fifo)->mask	= (fifo)->size					\
-+		? roundup_pow_of_two((fifo)->size) - 1			\
-+		: 0;							\
-+	(fifo)->data	= kvpmalloc(fifo_buf_size(fifo), (_gfp));	\
-+})
-+
-+#define free_fifo(fifo)							\
-+do {									\
-+	kvpfree((fifo)->data, fifo_buf_size(fifo));			\
-+	(fifo)->data = NULL;						\
-+} while (0)
-+
-+#define fifo_swap(l, r)							\
-+do {									\
-+	swap((l)->front, (r)->front);					\
-+	swap((l)->back, (r)->back);					\
-+	swap((l)->size, (r)->size);					\
-+	swap((l)->mask, (r)->mask);					\
-+	swap((l)->data, (r)->data);					\
-+} while (0)
-+
-+#define fifo_move(dest, src)						\
-+do {									\
-+	typeof(*((dest)->data)) _t;					\
-+	while (!fifo_full(dest) &&					\
-+	       fifo_pop(src, _t))					\
-+		fifo_push(dest, _t);					\
-+} while (0)
-+
-+#define fifo_used(fifo)		(((fifo)->back - (fifo)->front))
-+#define fifo_free(fifo)		((fifo)->size - fifo_used(fifo))
-+
-+#define fifo_empty(fifo)	((fifo)->front == (fifo)->back)
-+#define fifo_full(fifo)		(fifo_used(fifo) == (fifo)->size)
-+
-+#define fifo_peek_front(fifo)	((fifo)->data[(fifo)->front & (fifo)->mask])
-+#define fifo_peek_back(fifo)	((fifo)->data[((fifo)->back - 1) & (fifo)->mask])
-+
-+#define fifo_entry_idx_abs(fifo, p)					\
-+	((((p) >= &fifo_peek_front(fifo)				\
-+	   ? (fifo)->front : (fifo)->back) & ~(fifo)->mask) +		\
-+	   (((p) - (fifo)->data)))
-+
-+#define fifo_entry_idx(fifo, p)	(((p) - &fifo_peek_front(fifo)) & (fifo)->mask)
-+#define fifo_idx_entry(fifo, i)	(fifo)->data[((fifo)->front + (i)) & (fifo)->mask]
-+
-+#define fifo_push_back_ref(f)						\
-+	(fifo_full((f)) ? NULL : &(f)->data[(f)->back++ & (f)->mask])
-+
-+#define fifo_push_front_ref(f)						\
-+	(fifo_full((f)) ? NULL : &(f)->data[--(f)->front & (f)->mask])
-+
-+#define fifo_push_back(fifo, new)					\
-+({									\
-+	typeof((fifo)->data) _r = fifo_push_back_ref(fifo);		\
-+	if (_r)								\
-+		*_r = (new);						\
-+	_r != NULL;							\
-+})
-+
-+#define fifo_push_front(fifo, new)					\
-+({									\
-+	typeof((fifo)->data) _r = fifo_push_front_ref(fifo);		\
-+	if (_r)								\
-+		*_r = (new);						\
-+	_r != NULL;							\
-+})
-+
-+#define fifo_pop_front(fifo, i)						\
-+({									\
-+	bool _r = !fifo_empty((fifo));					\
-+	if (_r)								\
-+		(i) = (fifo)->data[(fifo)->front++ & (fifo)->mask];	\
-+	_r;								\
-+})
-+
-+#define fifo_pop_back(fifo, i)						\
-+({									\
-+	bool _r = !fifo_empty((fifo));					\
-+	if (_r)								\
-+		(i) = (fifo)->data[--(fifo)->back & (fifo)->mask];	\
-+	_r;								\
-+})
-+
-+#define fifo_push_ref(fifo)	fifo_push_back_ref(fifo)
-+#define fifo_push(fifo, i)	fifo_push_back(fifo, (i))
-+#define fifo_pop(fifo, i)	fifo_pop_front(fifo, (i))
-+#define fifo_peek(fifo)		fifo_peek_front(fifo)
-+
-+#define fifo_for_each_entry(_entry, _fifo, _iter)			\
-+	for (typecheck(typeof((_fifo)->front), _iter),			\
-+	     (_iter) = (_fifo)->front;					\
-+	     ((_iter != (_fifo)->back) &&				\
-+	      (_entry = (_fifo)->data[(_iter) & (_fifo)->mask], true));	\
-+	     (_iter)++)
-+
-+#define fifo_for_each_entry_ptr(_ptr, _fifo, _iter)			\
-+	for (typecheck(typeof((_fifo)->front), _iter),			\
-+	     (_iter) = (_fifo)->front;					\
-+	     ((_iter != (_fifo)->back) &&				\
-+	      (_ptr = &(_fifo)->data[(_iter) & (_fifo)->mask], true));	\
-+	     (_iter)++)
-+
-+#endif /* _BCACHEFS_FIFO_H */
-diff --git a/fs/bcachefs/fs-common.c b/fs/bcachefs/fs-common.c
-new file mode 100644
-index 000000000000..878419d40992
---- /dev/null
-+++ b/fs/bcachefs/fs-common.c
-@@ -0,0 +1,317 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "acl.h"
-+#include "btree_update.h"
-+#include "dirent.h"
-+#include "fs-common.h"
-+#include "inode.h"
-+#include "xattr.h"
-+
-+#include <linux/posix_acl.h>
-+
-+int bch2_create_trans(struct btree_trans *trans, u64 dir_inum,
-+		      struct bch_inode_unpacked *dir_u,
-+		      struct bch_inode_unpacked *new_inode,
-+		      const struct qstr *name,
-+		      uid_t uid, gid_t gid, umode_t mode, dev_t rdev,
-+		      struct posix_acl *default_acl,
-+		      struct posix_acl *acl)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct btree_iter *dir_iter = NULL;
-+	struct bch_hash_info hash = bch2_hash_info_init(c, new_inode);
-+	u64 now = bch2_current_time(trans->c);
-+	int ret;
-+
-+	dir_iter = bch2_inode_peek(trans, dir_u, dir_inum, BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(dir_iter);
-+	if (ret)
-+		goto err;
-+
-+	bch2_inode_init_late(new_inode, now, uid, gid, mode, rdev, dir_u);
-+
-+	if (!name)
-+		new_inode->bi_flags |= BCH_INODE_UNLINKED;
-+
-+	ret = bch2_inode_create(trans, new_inode,
-+				BLOCKDEV_INODE_MAX, 0,
-+				&c->unused_inode_hint);
-+	if (ret)
-+		goto err;
-+
-+	if (default_acl) {
-+		ret = bch2_set_acl_trans(trans, new_inode, &hash,
-+					 default_acl, ACL_TYPE_DEFAULT);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	if (acl) {
-+		ret = bch2_set_acl_trans(trans, new_inode, &hash,
-+					 acl, ACL_TYPE_ACCESS);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	if (name) {
-+		struct bch_hash_info dir_hash = bch2_hash_info_init(c, dir_u);
-+		dir_u->bi_mtime = dir_u->bi_ctime = now;
-+
-+		if (S_ISDIR(new_inode->bi_mode))
-+			dir_u->bi_nlink++;
-+
-+		ret = bch2_inode_write(trans, dir_iter, dir_u);
-+		if (ret)
-+			goto err;
-+
-+		ret = bch2_dirent_create(trans, dir_inum, &dir_hash,
-+					 mode_to_type(new_inode->bi_mode),
-+					 name, new_inode->bi_inum,
-+					 BCH_HASH_SET_MUST_CREATE);
-+		if (ret)
-+			goto err;
-+	}
-+err:
-+	bch2_trans_iter_put(trans, dir_iter);
-+	return ret;
-+}
-+
-+int bch2_link_trans(struct btree_trans *trans, u64 dir_inum,
-+		    u64 inum, struct bch_inode_unpacked *dir_u,
-+		    struct bch_inode_unpacked *inode_u, const struct qstr *name)
-+{
-+	struct btree_iter *dir_iter = NULL, *inode_iter = NULL;
-+	struct bch_hash_info dir_hash;
-+	u64 now = bch2_current_time(trans->c);
-+	int ret;
-+
-+	inode_iter = bch2_inode_peek(trans, inode_u, inum, BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(inode_iter);
-+	if (ret)
-+		goto err;
-+
-+	inode_u->bi_ctime = now;
-+	bch2_inode_nlink_inc(inode_u);
-+
-+	dir_iter = bch2_inode_peek(trans, dir_u, dir_inum, 0);
-+	ret = PTR_ERR_OR_ZERO(dir_iter);
-+	if (ret)
-+		goto err;
-+
-+	dir_u->bi_mtime = dir_u->bi_ctime = now;
-+
-+	dir_hash = bch2_hash_info_init(trans->c, dir_u);
-+
-+	ret =   bch2_dirent_create(trans, dir_inum, &dir_hash,
-+				  mode_to_type(inode_u->bi_mode),
-+				  name, inum, BCH_HASH_SET_MUST_CREATE) ?:
-+		bch2_inode_write(trans, dir_iter, dir_u) ?:
-+		bch2_inode_write(trans, inode_iter, inode_u);
-+err:
-+	bch2_trans_iter_put(trans, dir_iter);
-+	bch2_trans_iter_put(trans, inode_iter);
-+	return ret;
-+}
-+
-+int bch2_unlink_trans(struct btree_trans *trans,
-+		      u64 dir_inum, struct bch_inode_unpacked *dir_u,
-+		      struct bch_inode_unpacked *inode_u,
-+		      const struct qstr *name)
-+{
-+	struct btree_iter *dir_iter = NULL, *dirent_iter = NULL,
-+			  *inode_iter = NULL;
-+	struct bch_hash_info dir_hash;
-+	u64 inum, now = bch2_current_time(trans->c);
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	dir_iter = bch2_inode_peek(trans, dir_u, dir_inum, BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(dir_iter);
-+	if (ret)
-+		goto err;
-+
-+	dir_hash = bch2_hash_info_init(trans->c, dir_u);
-+
-+	dirent_iter = __bch2_dirent_lookup_trans(trans, dir_inum, &dir_hash,
-+						 name, BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(dirent_iter);
-+	if (ret)
-+		goto err;
-+
-+	k = bch2_btree_iter_peek_slot(dirent_iter);
-+	inum = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum);
-+
-+	inode_iter = bch2_inode_peek(trans, inode_u, inum, BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(inode_iter);
-+	if (ret)
-+		goto err;
-+
-+	dir_u->bi_mtime = dir_u->bi_ctime = inode_u->bi_ctime = now;
-+	dir_u->bi_nlink -= S_ISDIR(inode_u->bi_mode);
-+	bch2_inode_nlink_dec(inode_u);
-+
-+	ret =   (S_ISDIR(inode_u->bi_mode)
-+		 ? bch2_empty_dir_trans(trans, inum)
-+		 : 0) ?:
-+		bch2_dirent_delete_at(trans, &dir_hash, dirent_iter) ?:
-+		bch2_inode_write(trans, dir_iter, dir_u) ?:
-+		bch2_inode_write(trans, inode_iter, inode_u);
-+err:
-+	bch2_trans_iter_put(trans, inode_iter);
-+	bch2_trans_iter_put(trans, dirent_iter);
-+	bch2_trans_iter_put(trans, dir_iter);
-+	return ret;
-+}
-+
-+bool bch2_reinherit_attrs(struct bch_inode_unpacked *dst_u,
-+			  struct bch_inode_unpacked *src_u)
-+{
-+	u64 src, dst;
-+	unsigned id;
-+	bool ret = false;
-+
-+	for (id = 0; id < Inode_opt_nr; id++) {
-+		if (dst_u->bi_fields_set & (1 << id))
-+			continue;
-+
-+		src = bch2_inode_opt_get(src_u, id);
-+		dst = bch2_inode_opt_get(dst_u, id);
-+
-+		if (src == dst)
-+			continue;
-+
-+		bch2_inode_opt_set(dst_u, id, src);
-+		ret = true;
-+	}
-+
-+	return ret;
-+}
-+
-+int bch2_rename_trans(struct btree_trans *trans,
-+		      u64 src_dir, struct bch_inode_unpacked *src_dir_u,
-+		      u64 dst_dir, struct bch_inode_unpacked *dst_dir_u,
-+		      struct bch_inode_unpacked *src_inode_u,
-+		      struct bch_inode_unpacked *dst_inode_u,
-+		      const struct qstr *src_name,
-+		      const struct qstr *dst_name,
-+		      enum bch_rename_mode mode)
-+{
-+	struct btree_iter *src_dir_iter = NULL, *dst_dir_iter = NULL;
-+	struct btree_iter *src_inode_iter = NULL, *dst_inode_iter = NULL;
-+	struct bch_hash_info src_hash, dst_hash;
-+	u64 src_inode, dst_inode, now = bch2_current_time(trans->c);
-+	int ret;
-+
-+	src_dir_iter = bch2_inode_peek(trans, src_dir_u, src_dir,
-+				       BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(src_dir_iter);
-+	if (ret)
-+		goto err;
-+
-+	src_hash = bch2_hash_info_init(trans->c, src_dir_u);
-+
-+	if (dst_dir != src_dir) {
-+		dst_dir_iter = bch2_inode_peek(trans, dst_dir_u, dst_dir,
-+					       BTREE_ITER_INTENT);
-+		ret = PTR_ERR_OR_ZERO(dst_dir_iter);
-+		if (ret)
-+			goto err;
-+
-+		dst_hash = bch2_hash_info_init(trans->c, dst_dir_u);
-+	} else {
-+		dst_dir_u = src_dir_u;
-+		dst_hash = src_hash;
-+	}
-+
-+	ret = bch2_dirent_rename(trans,
-+				 src_dir, &src_hash,
-+				 dst_dir, &dst_hash,
-+				 src_name, &src_inode,
-+				 dst_name, &dst_inode,
-+				 mode);
-+	if (ret)
-+		goto err;
-+
-+	src_inode_iter = bch2_inode_peek(trans, src_inode_u, src_inode,
-+					 BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(src_inode_iter);
-+	if (ret)
-+		goto err;
-+
-+	if (dst_inode) {
-+		dst_inode_iter = bch2_inode_peek(trans, dst_inode_u, dst_inode,
-+						 BTREE_ITER_INTENT);
-+		ret = PTR_ERR_OR_ZERO(dst_inode_iter);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	if (mode == BCH_RENAME_OVERWRITE) {
-+		if (S_ISDIR(src_inode_u->bi_mode) !=
-+		    S_ISDIR(dst_inode_u->bi_mode)) {
-+			ret = -ENOTDIR;
-+			goto err;
-+		}
-+
-+		if (S_ISDIR(dst_inode_u->bi_mode) &&
-+		    bch2_empty_dir_trans(trans, dst_inode)) {
-+			ret = -ENOTEMPTY;
-+			goto err;
-+		}
-+	}
-+
-+	if (bch2_reinherit_attrs(src_inode_u, dst_dir_u) &&
-+	    S_ISDIR(src_inode_u->bi_mode)) {
-+		ret = -EXDEV;
-+		goto err;
-+	}
-+
-+	if (mode == BCH_RENAME_EXCHANGE &&
-+	    bch2_reinherit_attrs(dst_inode_u, src_dir_u) &&
-+	    S_ISDIR(dst_inode_u->bi_mode)) {
-+		ret = -EXDEV;
-+		goto err;
-+	}
-+
-+	if (S_ISDIR(src_inode_u->bi_mode)) {
-+		src_dir_u->bi_nlink--;
-+		dst_dir_u->bi_nlink++;
-+	}
-+
-+	if (dst_inode && S_ISDIR(dst_inode_u->bi_mode)) {
-+		dst_dir_u->bi_nlink--;
-+		src_dir_u->bi_nlink += mode == BCH_RENAME_EXCHANGE;
-+	}
-+
-+	if (mode == BCH_RENAME_OVERWRITE)
-+		bch2_inode_nlink_dec(dst_inode_u);
-+
-+	src_dir_u->bi_mtime		= now;
-+	src_dir_u->bi_ctime		= now;
-+
-+	if (src_dir != dst_dir) {
-+		dst_dir_u->bi_mtime	= now;
-+		dst_dir_u->bi_ctime	= now;
-+	}
-+
-+	src_inode_u->bi_ctime		= now;
-+
-+	if (dst_inode)
-+		dst_inode_u->bi_ctime	= now;
-+
-+	ret =   bch2_inode_write(trans, src_dir_iter, src_dir_u) ?:
-+		(src_dir != dst_dir
-+		 ? bch2_inode_write(trans, dst_dir_iter, dst_dir_u)
-+		 : 0 ) ?:
-+		bch2_inode_write(trans, src_inode_iter, src_inode_u) ?:
-+		(dst_inode
-+		 ? bch2_inode_write(trans, dst_inode_iter, dst_inode_u)
-+		 : 0 );
-+err:
-+	bch2_trans_iter_put(trans, dst_inode_iter);
-+	bch2_trans_iter_put(trans, src_inode_iter);
-+	bch2_trans_iter_put(trans, dst_dir_iter);
-+	bch2_trans_iter_put(trans, src_dir_iter);
-+	return ret;
-+}
-diff --git a/fs/bcachefs/fs-common.h b/fs/bcachefs/fs-common.h
-new file mode 100644
-index 000000000000..2273b7961c9b
---- /dev/null
-+++ b/fs/bcachefs/fs-common.h
-@@ -0,0 +1,37 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_FS_COMMON_H
-+#define _BCACHEFS_FS_COMMON_H
-+
-+struct posix_acl;
-+
-+int bch2_create_trans(struct btree_trans *, u64,
-+		      struct bch_inode_unpacked *,
-+		      struct bch_inode_unpacked *,
-+		      const struct qstr *,
-+		      uid_t, gid_t, umode_t, dev_t,
-+		      struct posix_acl *,
-+		      struct posix_acl *);
-+
-+int bch2_link_trans(struct btree_trans *, u64,
-+		    u64, struct bch_inode_unpacked *,
-+		    struct bch_inode_unpacked *,
-+		    const struct qstr *);
-+
-+int bch2_unlink_trans(struct btree_trans *,
-+		      u64, struct bch_inode_unpacked *,
-+		      struct bch_inode_unpacked *,
-+		      const struct qstr *);
-+
-+int bch2_rename_trans(struct btree_trans *,
-+		      u64, struct bch_inode_unpacked *,
-+		      u64, struct bch_inode_unpacked *,
-+		      struct bch_inode_unpacked *,
-+		      struct bch_inode_unpacked *,
-+		      const struct qstr *,
-+		      const struct qstr *,
-+		      enum bch_rename_mode);
-+
-+bool bch2_reinherit_attrs(struct bch_inode_unpacked *,
-+			  struct bch_inode_unpacked *);
-+
-+#endif /* _BCACHEFS_FS_COMMON_H */
-diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c
-new file mode 100644
-index 000000000000..4ceeafcfa33c
---- /dev/null
-+++ b/fs/bcachefs/fs-io.c
-@@ -0,0 +1,3140 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#ifndef NO_BCACHEFS_FS
-+
-+#include "bcachefs.h"
-+#include "alloc_foreground.h"
-+#include "bkey_on_stack.h"
-+#include "btree_update.h"
-+#include "buckets.h"
-+#include "clock.h"
-+#include "error.h"
-+#include "extents.h"
-+#include "extent_update.h"
-+#include "fs.h"
-+#include "fs-io.h"
-+#include "fsck.h"
-+#include "inode.h"
-+#include "journal.h"
-+#include "io.h"
-+#include "keylist.h"
-+#include "quota.h"
-+#include "reflink.h"
-+
-+#include <linux/aio.h>
-+#include <linux/backing-dev.h>
-+#include <linux/falloc.h>
-+#include <linux/migrate.h>
-+#include <linux/mmu_context.h>
-+#include <linux/pagevec.h>
-+#include <linux/rmap.h>
-+#include <linux/sched/signal.h>
-+#include <linux/task_io_accounting_ops.h>
-+#include <linux/uio.h>
-+#include <linux/writeback.h>
-+
-+#include <trace/events/bcachefs.h>
-+#include <trace/events/writeback.h>
-+
-+struct quota_res {
-+	u64				sectors;
-+};
-+
-+struct bch_writepage_io {
-+	struct closure			cl;
-+	struct bch_inode_info		*inode;
-+
-+	/* must be last: */
-+	struct bch_write_op		op;
-+};
-+
-+struct dio_write {
-+	struct completion		done;
-+	struct kiocb			*req;
-+	struct mm_struct		*mm;
-+	unsigned			loop:1,
-+					sync:1,
-+					free_iov:1;
-+	struct quota_res		quota_res;
-+	u64				written;
-+
-+	struct iov_iter			iter;
-+	struct iovec			inline_vecs[2];
-+
-+	/* must be last: */
-+	struct bch_write_op		op;
-+};
-+
-+struct dio_read {
-+	struct closure			cl;
-+	struct kiocb			*req;
-+	long				ret;
-+	struct bch_read_bio		rbio;
-+};
-+
-+/* pagecache_block must be held */
-+static int write_invalidate_inode_pages_range(struct address_space *mapping,
-+					      loff_t start, loff_t end)
-+{
-+	int ret;
-+
-+	/*
-+	 * XXX: the way this is currently implemented, we can spin if a process
-+	 * is continually redirtying a specific page
-+	 */
-+	do {
-+		if (!mapping->nrpages &&
-+		    !mapping->nrexceptional)
-+			return 0;
-+
-+		ret = filemap_write_and_wait_range(mapping, start, end);
-+		if (ret)
-+			break;
-+
-+		if (!mapping->nrpages)
-+			return 0;
-+
-+		ret = invalidate_inode_pages2_range(mapping,
-+				start >> PAGE_SHIFT,
-+				end >> PAGE_SHIFT);
-+	} while (ret == -EBUSY);
-+
-+	return ret;
-+}
-+
-+/* quotas */
-+
-+#ifdef CONFIG_BCACHEFS_QUOTA
-+
-+static void bch2_quota_reservation_put(struct bch_fs *c,
-+				       struct bch_inode_info *inode,
-+				       struct quota_res *res)
-+{
-+	if (!res->sectors)
-+		return;
-+
-+	mutex_lock(&inode->ei_quota_lock);
-+	BUG_ON(res->sectors > inode->ei_quota_reserved);
-+
-+	bch2_quota_acct(c, inode->ei_qid, Q_SPC,
-+			-((s64) res->sectors), KEY_TYPE_QUOTA_PREALLOC);
-+	inode->ei_quota_reserved -= res->sectors;
-+	mutex_unlock(&inode->ei_quota_lock);
-+
-+	res->sectors = 0;
-+}
-+
-+static int bch2_quota_reservation_add(struct bch_fs *c,
-+				      struct bch_inode_info *inode,
-+				      struct quota_res *res,
-+				      unsigned sectors,
-+				      bool check_enospc)
-+{
-+	int ret;
-+
-+	mutex_lock(&inode->ei_quota_lock);
-+	ret = bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors,
-+			      check_enospc ? KEY_TYPE_QUOTA_PREALLOC : KEY_TYPE_QUOTA_NOCHECK);
-+	if (likely(!ret)) {
-+		inode->ei_quota_reserved += sectors;
-+		res->sectors += sectors;
-+	}
-+	mutex_unlock(&inode->ei_quota_lock);
-+
-+	return ret;
-+}
-+
-+#else
-+
-+static void bch2_quota_reservation_put(struct bch_fs *c,
-+				       struct bch_inode_info *inode,
-+				       struct quota_res *res)
-+{
-+}
-+
-+static int bch2_quota_reservation_add(struct bch_fs *c,
-+				      struct bch_inode_info *inode,
-+				      struct quota_res *res,
-+				      unsigned sectors,
-+				      bool check_enospc)
-+{
-+	return 0;
-+}
-+
-+#endif
-+
-+/* i_size updates: */
-+
-+struct inode_new_size {
-+	loff_t		new_size;
-+	u64		now;
-+	unsigned	fields;
-+};
-+
-+static int inode_set_size(struct bch_inode_info *inode,
-+			  struct bch_inode_unpacked *bi,
-+			  void *p)
-+{
-+	struct inode_new_size *s = p;
-+
-+	bi->bi_size = s->new_size;
-+	if (s->fields & ATTR_ATIME)
-+		bi->bi_atime = s->now;
-+	if (s->fields & ATTR_MTIME)
-+		bi->bi_mtime = s->now;
-+	if (s->fields & ATTR_CTIME)
-+		bi->bi_ctime = s->now;
-+
-+	return 0;
-+}
-+
-+int __must_check bch2_write_inode_size(struct bch_fs *c,
-+				       struct bch_inode_info *inode,
-+				       loff_t new_size, unsigned fields)
-+{
-+	struct inode_new_size s = {
-+		.new_size	= new_size,
-+		.now		= bch2_current_time(c),
-+		.fields		= fields,
-+	};
-+
-+	return bch2_write_inode(c, inode, inode_set_size, &s, fields);
-+}
-+
-+static void i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode,
-+			   struct quota_res *quota_res, s64 sectors)
-+{
-+	if (!sectors)
-+		return;
-+
-+	mutex_lock(&inode->ei_quota_lock);
-+#ifdef CONFIG_BCACHEFS_QUOTA
-+	if (quota_res && sectors > 0) {
-+		BUG_ON(sectors > quota_res->sectors);
-+		BUG_ON(sectors > inode->ei_quota_reserved);
-+
-+		quota_res->sectors -= sectors;
-+		inode->ei_quota_reserved -= sectors;
-+	} else {
-+		bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, KEY_TYPE_QUOTA_WARN);
-+	}
-+#endif
-+	inode->v.i_blocks += sectors;
-+	mutex_unlock(&inode->ei_quota_lock);
-+}
-+
-+/* page state: */
-+
-+/* stored in page->private: */
-+
-+struct bch_page_sector {
-+	/* Uncompressed, fully allocated replicas: */
-+	unsigned		nr_replicas:3;
-+
-+	/* Owns PAGE_SECTORS * replicas_reserved sized reservation: */
-+	unsigned		replicas_reserved:3;
-+
-+	/* i_sectors: */
-+	enum {
-+		SECTOR_UNALLOCATED,
-+		SECTOR_RESERVED,
-+		SECTOR_DIRTY,
-+		SECTOR_ALLOCATED,
-+	}			state:2;
-+};
-+
-+struct bch_page_state {
-+	spinlock_t		lock;
-+	atomic_t		write_count;
-+	struct bch_page_sector	s[PAGE_SECTORS];
-+};
-+
-+static inline struct bch_page_state *__bch2_page_state(struct page *page)
-+{
-+	return page_has_private(page)
-+		? (struct bch_page_state *) page_private(page)
-+		: NULL;
-+}
-+
-+static inline struct bch_page_state *bch2_page_state(struct page *page)
-+{
-+	EBUG_ON(!PageLocked(page));
-+
-+	return __bch2_page_state(page);
-+}
-+
-+/* for newly allocated pages: */
-+static void __bch2_page_state_release(struct page *page)
-+{
-+	struct bch_page_state *s = __bch2_page_state(page);
-+
-+	if (!s)
-+		return;
-+
-+	ClearPagePrivate(page);
-+	set_page_private(page, 0);
-+	put_page(page);
-+	kfree(s);
-+}
-+
-+static void bch2_page_state_release(struct page *page)
-+{
-+	struct bch_page_state *s = bch2_page_state(page);
-+
-+	if (!s)
-+		return;
-+
-+	ClearPagePrivate(page);
-+	set_page_private(page, 0);
-+	put_page(page);
-+	kfree(s);
-+}
-+
-+/* for newly allocated pages: */
-+static struct bch_page_state *__bch2_page_state_create(struct page *page,
-+						       gfp_t gfp)
-+{
-+	struct bch_page_state *s;
-+
-+	s = kzalloc(sizeof(*s), GFP_NOFS|gfp);
-+	if (!s)
-+		return NULL;
-+
-+	spin_lock_init(&s->lock);
-+	/*
-+	 * migrate_page_move_mapping() assumes that pages with private data
-+	 * have their count elevated by 1.
-+	 */
-+	get_page(page);
-+	set_page_private(page, (unsigned long) s);
-+	SetPagePrivate(page);
-+	return s;
-+}
-+
-+static struct bch_page_state *bch2_page_state_create(struct page *page,
-+						     gfp_t gfp)
-+{
-+	return bch2_page_state(page) ?: __bch2_page_state_create(page, gfp);
-+}
-+
-+static inline unsigned inode_nr_replicas(struct bch_fs *c, struct bch_inode_info *inode)
-+{
-+	/* XXX: this should not be open coded */
-+	return inode->ei_inode.bi_data_replicas
-+		? inode->ei_inode.bi_data_replicas - 1
-+		: c->opts.data_replicas;
-+}
-+
-+static inline unsigned sectors_to_reserve(struct bch_page_sector *s,
-+						  unsigned nr_replicas)
-+{
-+	return max(0, (int) nr_replicas -
-+		   s->nr_replicas -
-+		   s->replicas_reserved);
-+}
-+
-+static int bch2_get_page_disk_reservation(struct bch_fs *c,
-+				struct bch_inode_info *inode,
-+				struct page *page, bool check_enospc)
-+{
-+	struct bch_page_state *s = bch2_page_state_create(page, 0);
-+	unsigned nr_replicas = inode_nr_replicas(c, inode);
-+	struct disk_reservation disk_res = { 0 };
-+	unsigned i, disk_res_sectors = 0;
-+	int ret;
-+
-+	if (!s)
-+		return -ENOMEM;
-+
-+	for (i = 0; i < ARRAY_SIZE(s->s); i++)
-+		disk_res_sectors += sectors_to_reserve(&s->s[i], nr_replicas);
-+
-+	if (!disk_res_sectors)
-+		return 0;
-+
-+	ret = bch2_disk_reservation_get(c, &disk_res,
-+					disk_res_sectors, 1,
-+					!check_enospc
-+					? BCH_DISK_RESERVATION_NOFAIL
-+					: 0);
-+	if (unlikely(ret))
-+		return ret;
-+
-+	for (i = 0; i < ARRAY_SIZE(s->s); i++)
-+		s->s[i].replicas_reserved +=
-+			sectors_to_reserve(&s->s[i], nr_replicas);
-+
-+	return 0;
-+}
-+
-+struct bch2_page_reservation {
-+	struct disk_reservation	disk;
-+	struct quota_res	quota;
-+};
-+
-+static void bch2_page_reservation_init(struct bch_fs *c,
-+			struct bch_inode_info *inode,
-+			struct bch2_page_reservation *res)
-+{
-+	memset(res, 0, sizeof(*res));
-+
-+	res->disk.nr_replicas = inode_nr_replicas(c, inode);
-+}
-+
-+static void bch2_page_reservation_put(struct bch_fs *c,
-+			struct bch_inode_info *inode,
-+			struct bch2_page_reservation *res)
-+{
-+	bch2_disk_reservation_put(c, &res->disk);
-+	bch2_quota_reservation_put(c, inode, &res->quota);
-+}
-+
-+static int bch2_page_reservation_get(struct bch_fs *c,
-+			struct bch_inode_info *inode, struct page *page,
-+			struct bch2_page_reservation *res,
-+			unsigned offset, unsigned len, bool check_enospc)
-+{
-+	struct bch_page_state *s = bch2_page_state_create(page, 0);
-+	unsigned i, disk_sectors = 0, quota_sectors = 0;
-+	int ret;
-+
-+	if (!s)
-+		return -ENOMEM;
-+
-+	for (i = round_down(offset, block_bytes(c)) >> 9;
-+	     i < round_up(offset + len, block_bytes(c)) >> 9;
-+	     i++) {
-+		disk_sectors += sectors_to_reserve(&s->s[i],
-+						res->disk.nr_replicas);
-+		quota_sectors += s->s[i].state == SECTOR_UNALLOCATED;
-+	}
-+
-+	if (disk_sectors) {
-+		ret = bch2_disk_reservation_add(c, &res->disk,
-+						disk_sectors,
-+						!check_enospc
-+						? BCH_DISK_RESERVATION_NOFAIL
-+						: 0);
-+		if (unlikely(ret))
-+			return ret;
-+	}
-+
-+	if (quota_sectors) {
-+		ret = bch2_quota_reservation_add(c, inode, &res->quota,
-+						 quota_sectors,
-+						 check_enospc);
-+		if (unlikely(ret)) {
-+			struct disk_reservation tmp = {
-+				.sectors = disk_sectors
-+			};
-+
-+			bch2_disk_reservation_put(c, &tmp);
-+			res->disk.sectors -= disk_sectors;
-+			return ret;
-+		}
-+	}
-+
-+	return 0;
-+}
-+
-+static void bch2_clear_page_bits(struct page *page)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(page->mapping->host);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct bch_page_state *s = bch2_page_state(page);
-+	struct disk_reservation disk_res = { 0 };
-+	int i, dirty_sectors = 0;
-+
-+	if (!s)
-+		return;
-+
-+	EBUG_ON(!PageLocked(page));
-+	EBUG_ON(PageWriteback(page));
-+
-+	for (i = 0; i < ARRAY_SIZE(s->s); i++) {
-+		disk_res.sectors += s->s[i].replicas_reserved;
-+		s->s[i].replicas_reserved = 0;
-+
-+		if (s->s[i].state == SECTOR_DIRTY) {
-+			dirty_sectors++;
-+			s->s[i].state = SECTOR_UNALLOCATED;
-+		}
-+	}
-+
-+	bch2_disk_reservation_put(c, &disk_res);
-+
-+	if (dirty_sectors)
-+		i_sectors_acct(c, inode, NULL, -dirty_sectors);
-+
-+	bch2_page_state_release(page);
-+}
-+
-+static void bch2_set_page_dirty(struct bch_fs *c,
-+			struct bch_inode_info *inode, struct page *page,
-+			struct bch2_page_reservation *res,
-+			unsigned offset, unsigned len)
-+{
-+	struct bch_page_state *s = bch2_page_state(page);
-+	unsigned i, dirty_sectors = 0;
-+
-+	WARN_ON((u64) page_offset(page) + offset + len >
-+		round_up((u64) i_size_read(&inode->v), block_bytes(c)));
-+
-+	spin_lock(&s->lock);
-+
-+	for (i = round_down(offset, block_bytes(c)) >> 9;
-+	     i < round_up(offset + len, block_bytes(c)) >> 9;
-+	     i++) {
-+		unsigned sectors = sectors_to_reserve(&s->s[i],
-+						res->disk.nr_replicas);
-+
-+		/*
-+		 * This can happen if we race with the error path in
-+		 * bch2_writepage_io_done():
-+		 */
-+		sectors = min_t(unsigned, sectors, res->disk.sectors);
-+
-+		s->s[i].replicas_reserved += sectors;
-+		res->disk.sectors -= sectors;
-+
-+		if (s->s[i].state == SECTOR_UNALLOCATED)
-+			dirty_sectors++;
-+
-+		s->s[i].state = max_t(unsigned, s->s[i].state, SECTOR_DIRTY);
-+	}
-+
-+	spin_unlock(&s->lock);
-+
-+	if (dirty_sectors)
-+		i_sectors_acct(c, inode, &res->quota, dirty_sectors);
-+
-+	if (!PageDirty(page))
-+		__set_page_dirty_nobuffers(page);
-+}
-+
-+vm_fault_t bch2_page_fault(struct vm_fault *vmf)
-+{
-+	struct file *file = vmf->vma->vm_file;
-+	struct bch_inode_info *inode = file_bch_inode(file);
-+	int ret;
-+
-+	bch2_pagecache_add_get(&inode->ei_pagecache_lock);
-+	ret = filemap_fault(vmf);
-+	bch2_pagecache_add_put(&inode->ei_pagecache_lock);
-+
-+	return ret;
-+}
-+
-+vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
-+{
-+	struct page *page = vmf->page;
-+	struct file *file = vmf->vma->vm_file;
-+	struct bch_inode_info *inode = file_bch_inode(file);
-+	struct address_space *mapping = file->f_mapping;
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct bch2_page_reservation res;
-+	unsigned len;
-+	loff_t isize;
-+	int ret = VM_FAULT_LOCKED;
-+
-+	bch2_page_reservation_init(c, inode, &res);
-+
-+	sb_start_pagefault(inode->v.i_sb);
-+	file_update_time(file);
-+
-+	/*
-+	 * Not strictly necessary, but helps avoid dio writes livelocking in
-+	 * write_invalidate_inode_pages_range() - can drop this if/when we get
-+	 * a write_invalidate_inode_pages_range() that works without dropping
-+	 * page lock before invalidating page
-+	 */
-+	bch2_pagecache_add_get(&inode->ei_pagecache_lock);
-+
-+	lock_page(page);
-+	isize = i_size_read(&inode->v);
-+
-+	if (page->mapping != mapping || page_offset(page) >= isize) {
-+		unlock_page(page);
-+		ret = VM_FAULT_NOPAGE;
-+		goto out;
-+	}
-+
-+	len = min_t(loff_t, PAGE_SIZE, isize - page_offset(page));
-+
-+	if (bch2_page_reservation_get(c, inode, page, &res, 0, len, true)) {
-+		unlock_page(page);
-+		ret = VM_FAULT_SIGBUS;
-+		goto out;
-+	}
-+
-+	bch2_set_page_dirty(c, inode, page, &res, 0, len);
-+	bch2_page_reservation_put(c, inode, &res);
-+
-+	wait_for_stable_page(page);
-+out:
-+	bch2_pagecache_add_put(&inode->ei_pagecache_lock);
-+	sb_end_pagefault(inode->v.i_sb);
-+
-+	return ret;
-+}
-+
-+void bch2_invalidatepage(struct page *page, unsigned int offset,
-+			 unsigned int length)
-+{
-+	if (offset || length < PAGE_SIZE)
-+		return;
-+
-+	bch2_clear_page_bits(page);
-+}
-+
-+int bch2_releasepage(struct page *page, gfp_t gfp_mask)
-+{
-+	if (PageDirty(page))
-+		return 0;
-+
-+	bch2_clear_page_bits(page);
-+	return 1;
-+}
-+
-+#ifdef CONFIG_MIGRATION
-+int bch2_migrate_page(struct address_space *mapping, struct page *newpage,
-+		      struct page *page, enum migrate_mode mode)
-+{
-+	int ret;
-+
-+	EBUG_ON(!PageLocked(page));
-+	EBUG_ON(!PageLocked(newpage));
-+
-+	ret = migrate_page_move_mapping(mapping, newpage, page, 0);
-+	if (ret != MIGRATEPAGE_SUCCESS)
-+		return ret;
-+
-+	if (PagePrivate(page)) {
-+		ClearPagePrivate(page);
-+		get_page(newpage);
-+		set_page_private(newpage, page_private(page));
-+		set_page_private(page, 0);
-+		put_page(page);
-+		SetPagePrivate(newpage);
-+	}
-+
-+	if (mode != MIGRATE_SYNC_NO_COPY)
-+		migrate_page_copy(newpage, page);
-+	else
-+		migrate_page_states(newpage, page);
-+	return MIGRATEPAGE_SUCCESS;
-+}
-+#endif
-+
-+/* readpage(s): */
-+
-+static void bch2_readpages_end_io(struct bio *bio)
-+{
-+	struct bvec_iter_all iter;
-+	struct bio_vec *bv;
-+
-+	bio_for_each_segment_all(bv, bio, iter) {
-+		struct page *page = bv->bv_page;
-+
-+		if (!bio->bi_status) {
-+			SetPageUptodate(page);
-+		} else {
-+			ClearPageUptodate(page);
-+			SetPageError(page);
-+		}
-+		unlock_page(page);
-+	}
-+
-+	bio_put(bio);
-+}
-+
-+static inline void page_state_init_for_read(struct page *page)
-+{
-+	SetPagePrivate(page);
-+	page->private = 0;
-+}
-+
-+struct readpages_iter {
-+	struct address_space	*mapping;
-+	struct page		**pages;
-+	unsigned		nr_pages;
-+	unsigned		nr_added;
-+	unsigned		idx;
-+	pgoff_t			offset;
-+};
-+
-+static int readpages_iter_init(struct readpages_iter *iter,
-+			       struct address_space *mapping,
-+			       struct list_head *pages, unsigned nr_pages)
-+{
-+	memset(iter, 0, sizeof(*iter));
-+
-+	iter->mapping	= mapping;
-+	iter->offset	= list_last_entry(pages, struct page, lru)->index;
-+
-+	iter->pages = kmalloc_array(nr_pages, sizeof(struct page *), GFP_NOFS);
-+	if (!iter->pages)
-+		return -ENOMEM;
-+
-+	while (!list_empty(pages)) {
-+		struct page *page = list_last_entry(pages, struct page, lru);
-+
-+		__bch2_page_state_create(page, __GFP_NOFAIL);
-+
-+		iter->pages[iter->nr_pages++] = page;
-+		list_del(&page->lru);
-+	}
-+
-+	return 0;
-+}
-+
-+static inline struct page *readpage_iter_next(struct readpages_iter *iter)
-+{
-+	struct page *page;
-+	unsigned i;
-+	int ret;
-+
-+	BUG_ON(iter->idx > iter->nr_added);
-+	BUG_ON(iter->nr_added > iter->nr_pages);
-+
-+	if (iter->idx < iter->nr_added)
-+		goto out;
-+
-+	while (1) {
-+		if (iter->idx == iter->nr_pages)
-+			return NULL;
-+
-+		ret = add_to_page_cache_lru_vec(iter->mapping,
-+				iter->pages	+ iter->nr_added,
-+				iter->nr_pages	- iter->nr_added,
-+				iter->offset	+ iter->nr_added,
-+				GFP_NOFS);
-+		if (ret > 0)
-+			break;
-+
-+		page = iter->pages[iter->nr_added];
-+		iter->idx++;
-+		iter->nr_added++;
-+
-+		__bch2_page_state_release(page);
-+		put_page(page);
-+	}
-+
-+	iter->nr_added += ret;
-+
-+	for (i = iter->idx; i < iter->nr_added; i++)
-+		put_page(iter->pages[i]);
-+out:
-+	EBUG_ON(iter->pages[iter->idx]->index != iter->offset + iter->idx);
-+
-+	return iter->pages[iter->idx];
-+}
-+
-+static void bch2_add_page_sectors(struct bio *bio, struct bkey_s_c k)
-+{
-+	struct bvec_iter iter;
-+	struct bio_vec bv;
-+	unsigned nr_ptrs = k.k->type == KEY_TYPE_reflink_v
-+		? 0 : bch2_bkey_nr_ptrs_fully_allocated(k);
-+	unsigned state = k.k->type == KEY_TYPE_reservation
-+		? SECTOR_RESERVED
-+		: SECTOR_ALLOCATED;
-+
-+	bio_for_each_segment(bv, bio, iter) {
-+		struct bch_page_state *s = bch2_page_state(bv.bv_page);
-+		unsigned i;
-+
-+		for (i = bv.bv_offset >> 9;
-+		     i < (bv.bv_offset + bv.bv_len) >> 9;
-+		     i++) {
-+			s->s[i].nr_replicas = nr_ptrs;
-+			s->s[i].state = state;
-+		}
-+	}
-+}
-+
-+static bool extent_partial_reads_expensive(struct bkey_s_c k)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	struct bch_extent_crc_unpacked crc;
-+	const union bch_extent_entry *i;
-+
-+	bkey_for_each_crc(k.k, ptrs, crc, i)
-+		if (crc.csum_type || crc.compression_type)
-+			return true;
-+	return false;
-+}
-+
-+static void readpage_bio_extend(struct readpages_iter *iter,
-+				struct bio *bio,
-+				unsigned sectors_this_extent,
-+				bool get_more)
-+{
-+	while (bio_sectors(bio) < sectors_this_extent &&
-+	       bio->bi_vcnt < bio->bi_max_vecs) {
-+		pgoff_t page_offset = bio_end_sector(bio) >> PAGE_SECTOR_SHIFT;
-+		struct page *page = readpage_iter_next(iter);
-+		int ret;
-+
-+		if (page) {
-+			if (iter->offset + iter->idx != page_offset)
-+				break;
-+
-+			iter->idx++;
-+		} else {
-+			if (!get_more)
-+				break;
-+
-+			page = xa_load(&iter->mapping->i_pages, page_offset);
-+			if (page && !xa_is_value(page))
-+				break;
-+
-+			page = __page_cache_alloc(readahead_gfp_mask(iter->mapping));
-+			if (!page)
-+				break;
-+
-+			if (!__bch2_page_state_create(page, 0)) {
-+				put_page(page);
-+				break;
-+			}
-+
-+			ret = add_to_page_cache_lru(page, iter->mapping,
-+						    page_offset, GFP_NOFS);
-+			if (ret) {
-+				__bch2_page_state_release(page);
-+				put_page(page);
-+				break;
-+			}
-+
-+			put_page(page);
-+		}
-+
-+		BUG_ON(!bio_add_page(bio, page, PAGE_SIZE, 0));
-+	}
-+}
-+
-+static void bchfs_read(struct btree_trans *trans, struct btree_iter *iter,
-+		       struct bch_read_bio *rbio, u64 inum,
-+		       struct readpages_iter *readpages_iter)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct bkey_on_stack sk;
-+	int flags = BCH_READ_RETRY_IF_STALE|
-+		BCH_READ_MAY_PROMOTE;
-+	int ret = 0;
-+
-+	rbio->c = c;
-+	rbio->start_time = local_clock();
-+
-+	bkey_on_stack_init(&sk);
-+retry:
-+	while (1) {
-+		struct bkey_s_c k;
-+		unsigned bytes, sectors, offset_into_extent;
-+
-+		bch2_btree_iter_set_pos(iter,
-+				POS(inum, rbio->bio.bi_iter.bi_sector));
-+
-+		k = bch2_btree_iter_peek_slot(iter);
-+		ret = bkey_err(k);
-+		if (ret)
-+			break;
-+
-+		bkey_on_stack_reassemble(&sk, c, k);
-+		k = bkey_i_to_s_c(sk.k);
-+
-+		offset_into_extent = iter->pos.offset -
-+			bkey_start_offset(k.k);
-+		sectors = k.k->size - offset_into_extent;
-+
-+		ret = bch2_read_indirect_extent(trans,
-+					&offset_into_extent, &sk);
-+		if (ret)
-+			break;
-+
-+		sectors = min(sectors, k.k->size - offset_into_extent);
-+
-+		bch2_trans_unlock(trans);
-+
-+		if (readpages_iter)
-+			readpage_bio_extend(readpages_iter, &rbio->bio, sectors,
-+					    extent_partial_reads_expensive(k));
-+
-+		bytes = min(sectors, bio_sectors(&rbio->bio)) << 9;
-+		swap(rbio->bio.bi_iter.bi_size, bytes);
-+
-+		if (rbio->bio.bi_iter.bi_size == bytes)
-+			flags |= BCH_READ_LAST_FRAGMENT;
-+
-+		if (bkey_extent_is_allocation(k.k))
-+			bch2_add_page_sectors(&rbio->bio, k);
-+
-+		bch2_read_extent(trans, rbio, k, offset_into_extent, flags);
-+
-+		if (flags & BCH_READ_LAST_FRAGMENT)
-+			break;
-+
-+		swap(rbio->bio.bi_iter.bi_size, bytes);
-+		bio_advance(&rbio->bio, bytes);
-+	}
-+
-+	if (ret == -EINTR)
-+		goto retry;
-+
-+	if (ret) {
-+		bcache_io_error(c, &rbio->bio, "btree IO error %i", ret);
-+		bio_endio(&rbio->bio);
-+	}
-+
-+	bkey_on_stack_exit(&sk, c);
-+}
-+
-+int bch2_readpages(struct file *file, struct address_space *mapping,
-+		   struct list_head *pages, unsigned nr_pages)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(mapping->host);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct bch_io_opts opts = io_opts(c, &inode->ei_inode);
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct page *page;
-+	struct readpages_iter readpages_iter;
-+	int ret;
-+
-+	ret = readpages_iter_init(&readpages_iter, mapping, pages, nr_pages);
-+	BUG_ON(ret);
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, POS_MIN,
-+				   BTREE_ITER_SLOTS);
-+
-+	bch2_pagecache_add_get(&inode->ei_pagecache_lock);
-+
-+	while ((page = readpage_iter_next(&readpages_iter))) {
-+		pgoff_t index = readpages_iter.offset + readpages_iter.idx;
-+		unsigned n = min_t(unsigned,
-+				   readpages_iter.nr_pages -
-+				   readpages_iter.idx,
-+				   BIO_MAX_PAGES);
-+		struct bch_read_bio *rbio =
-+			rbio_init(bio_alloc_bioset(GFP_NOFS, n, &c->bio_read),
-+				  opts);
-+
-+		readpages_iter.idx++;
-+
-+		bio_set_op_attrs(&rbio->bio, REQ_OP_READ, 0);
-+		rbio->bio.bi_iter.bi_sector = (sector_t) index << PAGE_SECTOR_SHIFT;
-+		rbio->bio.bi_end_io = bch2_readpages_end_io;
-+		BUG_ON(!bio_add_page(&rbio->bio, page, PAGE_SIZE, 0));
-+
-+		bchfs_read(&trans, iter, rbio, inode->v.i_ino,
-+			   &readpages_iter);
-+	}
-+
-+	bch2_pagecache_add_put(&inode->ei_pagecache_lock);
-+
-+	bch2_trans_exit(&trans);
-+	kfree(readpages_iter.pages);
-+
-+	return 0;
-+}
-+
-+static void __bchfs_readpage(struct bch_fs *c, struct bch_read_bio *rbio,
-+			     u64 inum, struct page *page)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+
-+	bch2_page_state_create(page, __GFP_NOFAIL);
-+
-+	bio_set_op_attrs(&rbio->bio, REQ_OP_READ, REQ_SYNC);
-+	rbio->bio.bi_iter.bi_sector =
-+		(sector_t) page->index << PAGE_SECTOR_SHIFT;
-+	BUG_ON(!bio_add_page(&rbio->bio, page, PAGE_SIZE, 0));
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, POS_MIN,
-+				   BTREE_ITER_SLOTS);
-+
-+	bchfs_read(&trans, iter, rbio, inum, NULL);
-+
-+	bch2_trans_exit(&trans);
-+}
-+
-+int bch2_readpage(struct file *file, struct page *page)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(page->mapping->host);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct bch_io_opts opts = io_opts(c, &inode->ei_inode);
-+	struct bch_read_bio *rbio;
-+
-+	rbio = rbio_init(bio_alloc_bioset(GFP_NOFS, 1, &c->bio_read), opts);
-+	rbio->bio.bi_end_io = bch2_readpages_end_io;
-+
-+	__bchfs_readpage(c, rbio, inode->v.i_ino, page);
-+	return 0;
-+}
-+
-+static void bch2_read_single_page_end_io(struct bio *bio)
-+{
-+	complete(bio->bi_private);
-+}
-+
-+static int bch2_read_single_page(struct page *page,
-+				 struct address_space *mapping)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(mapping->host);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct bch_read_bio *rbio;
-+	int ret;
-+	DECLARE_COMPLETION_ONSTACK(done);
-+
-+	rbio = rbio_init(bio_alloc_bioset(GFP_NOFS, 1, &c->bio_read),
-+			 io_opts(c, &inode->ei_inode));
-+	rbio->bio.bi_private = &done;
-+	rbio->bio.bi_end_io = bch2_read_single_page_end_io;
-+
-+	__bchfs_readpage(c, rbio, inode->v.i_ino, page);
-+	wait_for_completion(&done);
-+
-+	ret = blk_status_to_errno(rbio->bio.bi_status);
-+	bio_put(&rbio->bio);
-+
-+	if (ret < 0)
-+		return ret;
-+
-+	SetPageUptodate(page);
-+	return 0;
-+}
-+
-+/* writepages: */
-+
-+struct bch_writepage_state {
-+	struct bch_writepage_io	*io;
-+	struct bch_io_opts	opts;
-+};
-+
-+static inline struct bch_writepage_state bch_writepage_state_init(struct bch_fs *c,
-+								  struct bch_inode_info *inode)
-+{
-+	return (struct bch_writepage_state) {
-+		.opts = io_opts(c, &inode->ei_inode)
-+	};
-+}
-+
-+static void bch2_writepage_io_free(struct closure *cl)
-+{
-+	struct bch_writepage_io *io = container_of(cl,
-+					struct bch_writepage_io, cl);
-+
-+	bio_put(&io->op.wbio.bio);
-+}
-+
-+static void bch2_writepage_io_done(struct closure *cl)
-+{
-+	struct bch_writepage_io *io = container_of(cl,
-+					struct bch_writepage_io, cl);
-+	struct bch_fs *c = io->op.c;
-+	struct bio *bio = &io->op.wbio.bio;
-+	struct bvec_iter_all iter;
-+	struct bio_vec *bvec;
-+	unsigned i;
-+
-+	if (io->op.error) {
-+		bio_for_each_segment_all(bvec, bio, iter) {
-+			struct bch_page_state *s;
-+
-+			SetPageError(bvec->bv_page);
-+			mapping_set_error(bvec->bv_page->mapping, -EIO);
-+
-+			s = __bch2_page_state(bvec->bv_page);
-+			spin_lock(&s->lock);
-+			for (i = 0; i < PAGE_SECTORS; i++)
-+				s->s[i].nr_replicas = 0;
-+			spin_unlock(&s->lock);
-+		}
-+	}
-+
-+	if (io->op.flags & BCH_WRITE_WROTE_DATA_INLINE) {
-+		bio_for_each_segment_all(bvec, bio, iter) {
-+			struct bch_page_state *s;
-+
-+			s = __bch2_page_state(bvec->bv_page);
-+			spin_lock(&s->lock);
-+			for (i = 0; i < PAGE_SECTORS; i++)
-+				s->s[i].nr_replicas = 0;
-+			spin_unlock(&s->lock);
-+		}
-+	}
-+
-+	/*
-+	 * racing with fallocate can cause us to add fewer sectors than
-+	 * expected - but we shouldn't add more sectors than expected:
-+	 */
-+	BUG_ON(io->op.i_sectors_delta > 0);
-+
-+	/*
-+	 * (error (due to going RO) halfway through a page can screw that up
-+	 * slightly)
-+	 * XXX wtf?
-+	   BUG_ON(io->op.op.i_sectors_delta >= PAGE_SECTORS);
-+	 */
-+
-+	/*
-+	 * PageWriteback is effectively our ref on the inode - fixup i_blocks
-+	 * before calling end_page_writeback:
-+	 */
-+	i_sectors_acct(c, io->inode, NULL, io->op.i_sectors_delta);
-+
-+	bio_for_each_segment_all(bvec, bio, iter) {
-+		struct bch_page_state *s = __bch2_page_state(bvec->bv_page);
-+
-+		if (atomic_dec_and_test(&s->write_count))
-+			end_page_writeback(bvec->bv_page);
-+	}
-+
-+	closure_return_with_destructor(&io->cl, bch2_writepage_io_free);
-+}
-+
-+static void bch2_writepage_do_io(struct bch_writepage_state *w)
-+{
-+	struct bch_writepage_io *io = w->io;
-+
-+	w->io = NULL;
-+	closure_call(&io->op.cl, bch2_write, NULL, &io->cl);
-+	continue_at(&io->cl, bch2_writepage_io_done, NULL);
-+}
-+
-+/*
-+ * Get a bch_writepage_io and add @page to it - appending to an existing one if
-+ * possible, else allocating a new one:
-+ */
-+static void bch2_writepage_io_alloc(struct bch_fs *c,
-+				    struct writeback_control *wbc,
-+				    struct bch_writepage_state *w,
-+				    struct bch_inode_info *inode,
-+				    u64 sector,
-+				    unsigned nr_replicas)
-+{
-+	struct bch_write_op *op;
-+
-+	w->io = container_of(bio_alloc_bioset(GFP_NOFS,
-+					      BIO_MAX_PAGES,
-+					      &c->writepage_bioset),
-+			     struct bch_writepage_io, op.wbio.bio);
-+
-+	closure_init(&w->io->cl, NULL);
-+	w->io->inode		= inode;
-+
-+	op			= &w->io->op;
-+	bch2_write_op_init(op, c, w->opts);
-+	op->target		= w->opts.foreground_target;
-+	op_journal_seq_set(op, &inode->ei_journal_seq);
-+	op->nr_replicas		= nr_replicas;
-+	op->res.nr_replicas	= nr_replicas;
-+	op->write_point		= writepoint_hashed(inode->ei_last_dirtied);
-+	op->pos			= POS(inode->v.i_ino, sector);
-+	op->wbio.bio.bi_iter.bi_sector = sector;
-+	op->wbio.bio.bi_opf	= wbc_to_write_flags(wbc);
-+}
-+
-+static int __bch2_writepage(struct page *page,
-+			    struct writeback_control *wbc,
-+			    void *data)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(page->mapping->host);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct bch_writepage_state *w = data;
-+	struct bch_page_state *s, orig;
-+	unsigned i, offset, nr_replicas_this_write = U32_MAX;
-+	loff_t i_size = i_size_read(&inode->v);
-+	pgoff_t end_index = i_size >> PAGE_SHIFT;
-+	int ret;
-+
-+	EBUG_ON(!PageUptodate(page));
-+
-+	/* Is the page fully inside i_size? */
-+	if (page->index < end_index)
-+		goto do_io;
-+
-+	/* Is the page fully outside i_size? (truncate in progress) */
-+	offset = i_size & (PAGE_SIZE - 1);
-+	if (page->index > end_index || !offset) {
-+		unlock_page(page);
-+		return 0;
-+	}
-+
-+	/*
-+	 * The page straddles i_size.  It must be zeroed out on each and every
-+	 * writepage invocation because it may be mmapped.  "A file is mapped
-+	 * in multiples of the page size.  For a file that is not a multiple of
-+	 * the  page size, the remaining memory is zeroed when mapped, and
-+	 * writes to that region are not written out to the file."
-+	 */
-+	zero_user_segment(page, offset, PAGE_SIZE);
-+do_io:
-+	s = bch2_page_state_create(page, __GFP_NOFAIL);
-+
-+	ret = bch2_get_page_disk_reservation(c, inode, page, true);
-+	if (ret) {
-+		SetPageError(page);
-+		mapping_set_error(page->mapping, ret);
-+		unlock_page(page);
-+		return 0;
-+	}
-+
-+	/* Before unlocking the page, get copy of reservations: */
-+	orig = *s;
-+
-+	for (i = 0; i < PAGE_SECTORS; i++) {
-+		if (s->s[i].state < SECTOR_DIRTY)
-+			continue;
-+
-+		nr_replicas_this_write =
-+			min_t(unsigned, nr_replicas_this_write,
-+			      s->s[i].nr_replicas +
-+			      s->s[i].replicas_reserved);
-+	}
-+
-+	for (i = 0; i < PAGE_SECTORS; i++) {
-+		if (s->s[i].state < SECTOR_DIRTY)
-+			continue;
-+
-+		s->s[i].nr_replicas = w->opts.compression
-+			? 0 : nr_replicas_this_write;
-+
-+		s->s[i].replicas_reserved = 0;
-+		s->s[i].state = SECTOR_ALLOCATED;
-+	}
-+
-+	BUG_ON(atomic_read(&s->write_count));
-+	atomic_set(&s->write_count, 1);
-+
-+	BUG_ON(PageWriteback(page));
-+	set_page_writeback(page);
-+
-+	unlock_page(page);
-+
-+	offset = 0;
-+	while (1) {
-+		unsigned sectors = 1, dirty_sectors = 0, reserved_sectors = 0;
-+		u64 sector;
-+
-+		while (offset < PAGE_SECTORS &&
-+		       orig.s[offset].state < SECTOR_DIRTY)
-+			offset++;
-+
-+		if (offset == PAGE_SECTORS)
-+			break;
-+
-+		sector = ((u64) page->index << PAGE_SECTOR_SHIFT) + offset;
-+
-+		while (offset + sectors < PAGE_SECTORS &&
-+		       orig.s[offset + sectors].state >= SECTOR_DIRTY)
-+			sectors++;
-+
-+		for (i = offset; i < offset + sectors; i++) {
-+			reserved_sectors += orig.s[i].replicas_reserved;
-+			dirty_sectors += orig.s[i].state == SECTOR_DIRTY;
-+		}
-+
-+		if (w->io &&
-+		    (w->io->op.res.nr_replicas != nr_replicas_this_write ||
-+		     bio_full(&w->io->op.wbio.bio, PAGE_SIZE) ||
-+		     w->io->op.wbio.bio.bi_iter.bi_size + (sectors << 9) >=
-+		     (BIO_MAX_PAGES * PAGE_SIZE) ||
-+		     bio_end_sector(&w->io->op.wbio.bio) != sector))
-+			bch2_writepage_do_io(w);
-+
-+		if (!w->io)
-+			bch2_writepage_io_alloc(c, wbc, w, inode, sector,
-+						nr_replicas_this_write);
-+
-+		atomic_inc(&s->write_count);
-+
-+		BUG_ON(inode != w->io->inode);
-+		BUG_ON(!bio_add_page(&w->io->op.wbio.bio, page,
-+				     sectors << 9, offset << 9));
-+
-+		/* Check for writing past i_size: */
-+		WARN_ON((bio_end_sector(&w->io->op.wbio.bio) << 9) >
-+			round_up(i_size, block_bytes(c)));
-+
-+		w->io->op.res.sectors += reserved_sectors;
-+		w->io->op.i_sectors_delta -= dirty_sectors;
-+		w->io->op.new_i_size = i_size;
-+
-+		offset += sectors;
-+	}
-+
-+	if (atomic_dec_and_test(&s->write_count))
-+		end_page_writeback(page);
-+
-+	return 0;
-+}
-+
-+int bch2_writepages(struct address_space *mapping, struct writeback_control *wbc)
-+{
-+	struct bch_fs *c = mapping->host->i_sb->s_fs_info;
-+	struct bch_writepage_state w =
-+		bch_writepage_state_init(c, to_bch_ei(mapping->host));
-+	struct blk_plug plug;
-+	int ret;
-+
-+	blk_start_plug(&plug);
-+	ret = write_cache_pages(mapping, wbc, __bch2_writepage, &w);
-+	if (w.io)
-+		bch2_writepage_do_io(&w);
-+	blk_finish_plug(&plug);
-+	return ret;
-+}
-+
-+int bch2_writepage(struct page *page, struct writeback_control *wbc)
-+{
-+	struct bch_fs *c = page->mapping->host->i_sb->s_fs_info;
-+	struct bch_writepage_state w =
-+		bch_writepage_state_init(c, to_bch_ei(page->mapping->host));
-+	int ret;
-+
-+	ret = __bch2_writepage(page, wbc, &w);
-+	if (w.io)
-+		bch2_writepage_do_io(&w);
-+
-+	return ret;
-+}
-+
-+/* buffered writes: */
-+
-+int bch2_write_begin(struct file *file, struct address_space *mapping,
-+		     loff_t pos, unsigned len, unsigned flags,
-+		     struct page **pagep, void **fsdata)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(mapping->host);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct bch2_page_reservation *res;
-+	pgoff_t index = pos >> PAGE_SHIFT;
-+	unsigned offset = pos & (PAGE_SIZE - 1);
-+	struct page *page;
-+	int ret = -ENOMEM;
-+
-+	res = kmalloc(sizeof(*res), GFP_KERNEL);
-+	if (!res)
-+		return -ENOMEM;
-+
-+	bch2_page_reservation_init(c, inode, res);
-+	*fsdata = res;
-+
-+	bch2_pagecache_add_get(&inode->ei_pagecache_lock);
-+
-+	page = grab_cache_page_write_begin(mapping, index, flags);
-+	if (!page)
-+		goto err_unlock;
-+
-+	if (PageUptodate(page))
-+		goto out;
-+
-+	/* If we're writing entire page, don't need to read it in first: */
-+	if (len == PAGE_SIZE)
-+		goto out;
-+
-+	if (!offset && pos + len >= inode->v.i_size) {
-+		zero_user_segment(page, len, PAGE_SIZE);
-+		flush_dcache_page(page);
-+		goto out;
-+	}
-+
-+	if (index > inode->v.i_size >> PAGE_SHIFT) {
-+		zero_user_segments(page, 0, offset, offset + len, PAGE_SIZE);
-+		flush_dcache_page(page);
-+		goto out;
-+	}
-+readpage:
-+	ret = bch2_read_single_page(page, mapping);
-+	if (ret)
-+		goto err;
-+out:
-+	ret = bch2_page_reservation_get(c, inode, page, res,
-+					offset, len, true);
-+	if (ret) {
-+		if (!PageUptodate(page)) {
-+			/*
-+			 * If the page hasn't been read in, we won't know if we
-+			 * actually need a reservation - we don't actually need
-+			 * to read here, we just need to check if the page is
-+			 * fully backed by uncompressed data:
-+			 */
-+			goto readpage;
-+		}
-+
-+		goto err;
-+	}
-+
-+	*pagep = page;
-+	return 0;
-+err:
-+	unlock_page(page);
-+	put_page(page);
-+	*pagep = NULL;
-+err_unlock:
-+	bch2_pagecache_add_put(&inode->ei_pagecache_lock);
-+	kfree(res);
-+	*fsdata = NULL;
-+	return ret;
-+}
-+
-+int bch2_write_end(struct file *file, struct address_space *mapping,
-+		   loff_t pos, unsigned len, unsigned copied,
-+		   struct page *page, void *fsdata)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(mapping->host);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct bch2_page_reservation *res = fsdata;
-+	unsigned offset = pos & (PAGE_SIZE - 1);
-+
-+	lockdep_assert_held(&inode->v.i_rwsem);
-+
-+	if (unlikely(copied < len && !PageUptodate(page))) {
-+		/*
-+		 * The page needs to be read in, but that would destroy
-+		 * our partial write - simplest thing is to just force
-+		 * userspace to redo the write:
-+		 */
-+		zero_user(page, 0, PAGE_SIZE);
-+		flush_dcache_page(page);
-+		copied = 0;
-+	}
-+
-+	spin_lock(&inode->v.i_lock);
-+	if (pos + copied > inode->v.i_size)
-+		i_size_write(&inode->v, pos + copied);
-+	spin_unlock(&inode->v.i_lock);
-+
-+	if (copied) {
-+		if (!PageUptodate(page))
-+			SetPageUptodate(page);
-+
-+		bch2_set_page_dirty(c, inode, page, res, offset, copied);
-+
-+		inode->ei_last_dirtied = (unsigned long) current;
-+	}
-+
-+	unlock_page(page);
-+	put_page(page);
-+	bch2_pagecache_add_put(&inode->ei_pagecache_lock);
-+
-+	bch2_page_reservation_put(c, inode, res);
-+	kfree(res);
-+
-+	return copied;
-+}
-+
-+#define WRITE_BATCH_PAGES	32
-+
-+static int __bch2_buffered_write(struct bch_inode_info *inode,
-+				 struct address_space *mapping,
-+				 struct iov_iter *iter,
-+				 loff_t pos, unsigned len)
-+{
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct page *pages[WRITE_BATCH_PAGES];
-+	struct bch2_page_reservation res;
-+	unsigned long index = pos >> PAGE_SHIFT;
-+	unsigned offset = pos & (PAGE_SIZE - 1);
-+	unsigned nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE);
-+	unsigned i, reserved = 0, set_dirty = 0;
-+	unsigned copied = 0, nr_pages_copied = 0;
-+	int ret = 0;
-+
-+	BUG_ON(!len);
-+	BUG_ON(nr_pages > ARRAY_SIZE(pages));
-+
-+	bch2_page_reservation_init(c, inode, &res);
-+
-+	for (i = 0; i < nr_pages; i++) {
-+		pages[i] = grab_cache_page_write_begin(mapping, index + i, 0);
-+		if (!pages[i]) {
-+			nr_pages = i;
-+			if (!i) {
-+				ret = -ENOMEM;
-+				goto out;
-+			}
-+			len = min_t(unsigned, len,
-+				    nr_pages * PAGE_SIZE - offset);
-+			break;
-+		}
-+	}
-+
-+	if (offset && !PageUptodate(pages[0])) {
-+		ret = bch2_read_single_page(pages[0], mapping);
-+		if (ret)
-+			goto out;
-+	}
-+
-+	if ((pos + len) & (PAGE_SIZE - 1) &&
-+	    !PageUptodate(pages[nr_pages - 1])) {
-+		if ((index + nr_pages - 1) << PAGE_SHIFT >= inode->v.i_size) {
-+			zero_user(pages[nr_pages - 1], 0, PAGE_SIZE);
-+		} else {
-+			ret = bch2_read_single_page(pages[nr_pages - 1], mapping);
-+			if (ret)
-+				goto out;
-+		}
-+	}
-+
-+	while (reserved < len) {
-+		struct page *page = pages[(offset + reserved) >> PAGE_SHIFT];
-+		unsigned pg_offset = (offset + reserved) & (PAGE_SIZE - 1);
-+		unsigned pg_len = min_t(unsigned, len - reserved,
-+					PAGE_SIZE - pg_offset);
-+retry_reservation:
-+		ret = bch2_page_reservation_get(c, inode, page, &res,
-+						pg_offset, pg_len, true);
-+
-+		if (ret && !PageUptodate(page)) {
-+			ret = bch2_read_single_page(page, mapping);
-+			if (!ret)
-+				goto retry_reservation;
-+		}
-+
-+		if (ret)
-+			goto out;
-+
-+		reserved += pg_len;
-+	}
-+
-+	if (mapping_writably_mapped(mapping))
-+		for (i = 0; i < nr_pages; i++)
-+			flush_dcache_page(pages[i]);
-+
-+	while (copied < len) {
-+		struct page *page = pages[(offset + copied) >> PAGE_SHIFT];
-+		unsigned pg_offset = (offset + copied) & (PAGE_SIZE - 1);
-+		unsigned pg_len = min_t(unsigned, len - copied,
-+					PAGE_SIZE - pg_offset);
-+		unsigned pg_copied = iov_iter_copy_from_user_atomic(page,
-+						iter, pg_offset, pg_len);
-+
-+		if (!pg_copied)
-+			break;
-+
-+		if (!PageUptodate(page) &&
-+		    pg_copied != PAGE_SIZE &&
-+		    pos + copied + pg_copied < inode->v.i_size) {
-+			zero_user(page, 0, PAGE_SIZE);
-+			break;
-+		}
-+
-+		flush_dcache_page(page);
-+		iov_iter_advance(iter, pg_copied);
-+		copied += pg_copied;
-+
-+		if (pg_copied != pg_len)
-+			break;
-+	}
-+
-+	if (!copied)
-+		goto out;
-+
-+	spin_lock(&inode->v.i_lock);
-+	if (pos + copied > inode->v.i_size)
-+		i_size_write(&inode->v, pos + copied);
-+	spin_unlock(&inode->v.i_lock);
-+
-+	while (set_dirty < copied) {
-+		struct page *page = pages[(offset + set_dirty) >> PAGE_SHIFT];
-+		unsigned pg_offset = (offset + set_dirty) & (PAGE_SIZE - 1);
-+		unsigned pg_len = min_t(unsigned, copied - set_dirty,
-+					PAGE_SIZE - pg_offset);
-+
-+		if (!PageUptodate(page))
-+			SetPageUptodate(page);
-+
-+		bch2_set_page_dirty(c, inode, page, &res, pg_offset, pg_len);
-+		unlock_page(page);
-+		put_page(page);
-+
-+		set_dirty += pg_len;
-+	}
-+
-+	nr_pages_copied = DIV_ROUND_UP(offset + copied, PAGE_SIZE);
-+	inode->ei_last_dirtied = (unsigned long) current;
-+out:
-+	for (i = nr_pages_copied; i < nr_pages; i++) {
-+		unlock_page(pages[i]);
-+		put_page(pages[i]);
-+	}
-+
-+	bch2_page_reservation_put(c, inode, &res);
-+
-+	return copied ?: ret;
-+}
-+
-+static ssize_t bch2_buffered_write(struct kiocb *iocb, struct iov_iter *iter)
-+{
-+	struct file *file = iocb->ki_filp;
-+	struct address_space *mapping = file->f_mapping;
-+	struct bch_inode_info *inode = file_bch_inode(file);
-+	loff_t pos = iocb->ki_pos;
-+	ssize_t written = 0;
-+	int ret = 0;
-+
-+	bch2_pagecache_add_get(&inode->ei_pagecache_lock);
-+
-+	do {
-+		unsigned offset = pos & (PAGE_SIZE - 1);
-+		unsigned bytes = min_t(unsigned long, iov_iter_count(iter),
-+			      PAGE_SIZE * WRITE_BATCH_PAGES - offset);
-+again:
-+		/*
-+		 * Bring in the user page that we will copy from _first_.
-+		 * Otherwise there's a nasty deadlock on copying from the
-+		 * same page as we're writing to, without it being marked
-+		 * up-to-date.
-+		 *
-+		 * Not only is this an optimisation, but it is also required
-+		 * to check that the address is actually valid, when atomic
-+		 * usercopies are used, below.
-+		 */
-+		if (unlikely(iov_iter_fault_in_readable(iter, bytes))) {
-+			bytes = min_t(unsigned long, iov_iter_count(iter),
-+				      PAGE_SIZE - offset);
-+
-+			if (unlikely(iov_iter_fault_in_readable(iter, bytes))) {
-+				ret = -EFAULT;
-+				break;
-+			}
-+		}
-+
-+		if (unlikely(fatal_signal_pending(current))) {
-+			ret = -EINTR;
-+			break;
-+		}
-+
-+		ret = __bch2_buffered_write(inode, mapping, iter, pos, bytes);
-+		if (unlikely(ret < 0))
-+			break;
-+
-+		cond_resched();
-+
-+		if (unlikely(ret == 0)) {
-+			/*
-+			 * If we were unable to copy any data at all, we must
-+			 * fall back to a single segment length write.
-+			 *
-+			 * If we didn't fallback here, we could livelock
-+			 * because not all segments in the iov can be copied at
-+			 * once without a pagefault.
-+			 */
-+			bytes = min_t(unsigned long, PAGE_SIZE - offset,
-+				      iov_iter_single_seg_count(iter));
-+			goto again;
-+		}
-+		pos += ret;
-+		written += ret;
-+		ret = 0;
-+
-+		balance_dirty_pages_ratelimited(mapping);
-+	} while (iov_iter_count(iter));
-+
-+	bch2_pagecache_add_put(&inode->ei_pagecache_lock);
-+
-+	return written ? written : ret;
-+}
-+
-+/* O_DIRECT reads */
-+
-+static void bch2_dio_read_complete(struct closure *cl)
-+{
-+	struct dio_read *dio = container_of(cl, struct dio_read, cl);
-+
-+	dio->req->ki_complete(dio->req, dio->ret, 0);
-+	bio_check_pages_dirty(&dio->rbio.bio);	/* transfers ownership */
-+}
-+
-+static void bch2_direct_IO_read_endio(struct bio *bio)
-+{
-+	struct dio_read *dio = bio->bi_private;
-+
-+	if (bio->bi_status)
-+		dio->ret = blk_status_to_errno(bio->bi_status);
-+
-+	closure_put(&dio->cl);
-+}
-+
-+static void bch2_direct_IO_read_split_endio(struct bio *bio)
-+{
-+	bch2_direct_IO_read_endio(bio);
-+	bio_check_pages_dirty(bio);	/* transfers ownership */
-+}
-+
-+static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter)
-+{
-+	struct file *file = req->ki_filp;
-+	struct bch_inode_info *inode = file_bch_inode(file);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct bch_io_opts opts = io_opts(c, &inode->ei_inode);
-+	struct dio_read *dio;
-+	struct bio *bio;
-+	loff_t offset = req->ki_pos;
-+	bool sync = is_sync_kiocb(req);
-+	size_t shorten;
-+	ssize_t ret;
-+
-+	if ((offset|iter->count) & (block_bytes(c) - 1))
-+		return -EINVAL;
-+
-+	ret = min_t(loff_t, iter->count,
-+		    max_t(loff_t, 0, i_size_read(&inode->v) - offset));
-+
-+	if (!ret)
-+		return ret;
-+
-+	shorten = iov_iter_count(iter) - round_up(ret, block_bytes(c));
-+	iter->count -= shorten;
-+
-+	bio = bio_alloc_bioset(GFP_KERNEL,
-+			       iov_iter_npages(iter, BIO_MAX_PAGES),
-+			       &c->dio_read_bioset);
-+
-+	bio->bi_end_io = bch2_direct_IO_read_endio;
-+
-+	dio = container_of(bio, struct dio_read, rbio.bio);
-+	closure_init(&dio->cl, NULL);
-+
-+	/*
-+	 * this is a _really_ horrible hack just to avoid an atomic sub at the
-+	 * end:
-+	 */
-+	if (!sync) {
-+		set_closure_fn(&dio->cl, bch2_dio_read_complete, NULL);
-+		atomic_set(&dio->cl.remaining,
-+			   CLOSURE_REMAINING_INITIALIZER -
-+			   CLOSURE_RUNNING +
-+			   CLOSURE_DESTRUCTOR);
-+	} else {
-+		atomic_set(&dio->cl.remaining,
-+			   CLOSURE_REMAINING_INITIALIZER + 1);
-+	}
-+
-+	dio->req	= req;
-+	dio->ret	= ret;
-+
-+	goto start;
-+	while (iter->count) {
-+		bio = bio_alloc_bioset(GFP_KERNEL,
-+				       iov_iter_npages(iter, BIO_MAX_PAGES),
-+				       &c->bio_read);
-+		bio->bi_end_io		= bch2_direct_IO_read_split_endio;
-+start:
-+		bio_set_op_attrs(bio, REQ_OP_READ, REQ_SYNC);
-+		bio->bi_iter.bi_sector	= offset >> 9;
-+		bio->bi_private		= dio;
-+
-+		ret = bio_iov_iter_get_pages(bio, iter);
-+		if (ret < 0) {
-+			/* XXX: fault inject this path */
-+			bio->bi_status = BLK_STS_RESOURCE;
-+			bio_endio(bio);
-+			break;
-+		}
-+
-+		offset += bio->bi_iter.bi_size;
-+		bio_set_pages_dirty(bio);
-+
-+		if (iter->count)
-+			closure_get(&dio->cl);
-+
-+		bch2_read(c, rbio_init(bio, opts), inode->v.i_ino);
-+	}
-+
-+	iter->count += shorten;
-+
-+	if (sync) {
-+		closure_sync(&dio->cl);
-+		closure_debug_destroy(&dio->cl);
-+		ret = dio->ret;
-+		bio_check_pages_dirty(&dio->rbio.bio); /* transfers ownership */
-+		return ret;
-+	} else {
-+		return -EIOCBQUEUED;
-+	}
-+}
-+
-+ssize_t bch2_read_iter(struct kiocb *iocb, struct iov_iter *iter)
-+{
-+	struct file *file = iocb->ki_filp;
-+	struct bch_inode_info *inode = file_bch_inode(file);
-+	struct address_space *mapping = file->f_mapping;
-+	size_t count = iov_iter_count(iter);
-+	ssize_t ret;
-+
-+	if (!count)
-+		return 0; /* skip atime */
-+
-+	if (iocb->ki_flags & IOCB_DIRECT) {
-+		struct blk_plug plug;
-+
-+		ret = filemap_write_and_wait_range(mapping,
-+					iocb->ki_pos,
-+					iocb->ki_pos + count - 1);
-+		if (ret < 0)
-+			return ret;
-+
-+		file_accessed(file);
-+
-+		blk_start_plug(&plug);
-+		ret = bch2_direct_IO_read(iocb, iter);
-+		blk_finish_plug(&plug);
-+
-+		if (ret >= 0)
-+			iocb->ki_pos += ret;
-+	} else {
-+		bch2_pagecache_add_get(&inode->ei_pagecache_lock);
-+		ret = generic_file_read_iter(iocb, iter);
-+		bch2_pagecache_add_put(&inode->ei_pagecache_lock);
-+	}
-+
-+	return ret;
-+}
-+
-+/* O_DIRECT writes */
-+
-+static void bch2_dio_write_loop_async(struct bch_write_op *);
-+
-+static long bch2_dio_write_loop(struct dio_write *dio)
-+{
-+	bool kthread = (current->flags & PF_KTHREAD) != 0;
-+	struct kiocb *req = dio->req;
-+	struct address_space *mapping = req->ki_filp->f_mapping;
-+	struct bch_inode_info *inode = file_bch_inode(req->ki_filp);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct bio *bio = &dio->op.wbio.bio;
-+	struct bvec_iter_all iter;
-+	struct bio_vec *bv;
-+	unsigned unaligned;
-+	bool sync = dio->sync;
-+	long ret;
-+
-+	if (dio->loop)
-+		goto loop;
-+
-+	while (1) {
-+		if (kthread)
-+			kthread_use_mm(dio->mm);
-+		BUG_ON(current->faults_disabled_mapping);
-+		current->faults_disabled_mapping = mapping;
-+
-+		ret = bio_iov_iter_get_pages(bio, &dio->iter);
-+
-+		current->faults_disabled_mapping = NULL;
-+		if (kthread)
-+			kthread_unuse_mm(dio->mm);
-+
-+		if (unlikely(ret < 0))
-+			goto err;
-+
-+		unaligned = bio->bi_iter.bi_size & (block_bytes(c) - 1);
-+		bio->bi_iter.bi_size -= unaligned;
-+		iov_iter_revert(&dio->iter, unaligned);
-+
-+		if (!bio->bi_iter.bi_size) {
-+			/*
-+			 * bio_iov_iter_get_pages was only able to get <
-+			 * blocksize worth of pages:
-+			 */
-+			bio_for_each_segment_all(bv, bio, iter)
-+				put_page(bv->bv_page);
-+			ret = -EFAULT;
-+			goto err;
-+		}
-+
-+		bch2_write_op_init(&dio->op, c, io_opts(c, &inode->ei_inode));
-+		dio->op.end_io		= bch2_dio_write_loop_async;
-+		dio->op.target		= dio->op.opts.foreground_target;
-+		op_journal_seq_set(&dio->op, &inode->ei_journal_seq);
-+		dio->op.write_point	= writepoint_hashed((unsigned long) current);
-+		dio->op.nr_replicas	= dio->op.opts.data_replicas;
-+		dio->op.pos		= POS(inode->v.i_ino, (u64) req->ki_pos >> 9);
-+
-+		if ((req->ki_flags & IOCB_DSYNC) &&
-+		    !c->opts.journal_flush_disabled)
-+			dio->op.flags |= BCH_WRITE_FLUSH;
-+
-+		ret = bch2_disk_reservation_get(c, &dio->op.res, bio_sectors(bio),
-+						dio->op.opts.data_replicas, 0);
-+		if (unlikely(ret) &&
-+		    !bch2_check_range_allocated(c, dio->op.pos,
-+				bio_sectors(bio), dio->op.opts.data_replicas))
-+			goto err;
-+
-+		task_io_account_write(bio->bi_iter.bi_size);
-+
-+		if (!dio->sync && !dio->loop && dio->iter.count) {
-+			struct iovec *iov = dio->inline_vecs;
-+
-+			if (dio->iter.nr_segs > ARRAY_SIZE(dio->inline_vecs)) {
-+				iov = kmalloc(dio->iter.nr_segs * sizeof(*iov),
-+					      GFP_KERNEL);
-+				if (unlikely(!iov)) {
-+					dio->sync = sync = true;
-+					goto do_io;
-+				}
-+
-+				dio->free_iov = true;
-+			}
-+
-+			memcpy(iov, dio->iter.iov, dio->iter.nr_segs * sizeof(*iov));
-+			dio->iter.iov = iov;
-+		}
-+do_io:
-+		dio->loop = true;
-+		closure_call(&dio->op.cl, bch2_write, NULL, NULL);
-+
-+		if (sync)
-+			wait_for_completion(&dio->done);
-+		else
-+			return -EIOCBQUEUED;
-+loop:
-+		i_sectors_acct(c, inode, &dio->quota_res,
-+			       dio->op.i_sectors_delta);
-+		req->ki_pos += (u64) dio->op.written << 9;
-+		dio->written += dio->op.written;
-+
-+		spin_lock(&inode->v.i_lock);
-+		if (req->ki_pos > inode->v.i_size)
-+			i_size_write(&inode->v, req->ki_pos);
-+		spin_unlock(&inode->v.i_lock);
-+
-+		bio_for_each_segment_all(bv, bio, iter)
-+			put_page(bv->bv_page);
-+		if (!dio->iter.count || dio->op.error)
-+			break;
-+
-+		bio_reset(bio);
-+		reinit_completion(&dio->done);
-+	}
-+
-+	ret = dio->op.error ?: ((long) dio->written << 9);
-+err:
-+	bch2_pagecache_block_put(&inode->ei_pagecache_lock);
-+	bch2_quota_reservation_put(c, inode, &dio->quota_res);
-+
-+	if (dio->free_iov)
-+		kfree(dio->iter.iov);
-+
-+	bio_put(bio);
-+
-+	/* inode->i_dio_count is our ref on inode and thus bch_fs */
-+	inode_dio_end(&inode->v);
-+
-+	if (!sync) {
-+		req->ki_complete(req, ret, 0);
-+		ret = -EIOCBQUEUED;
-+	}
-+	return ret;
-+}
-+
-+static void bch2_dio_write_loop_async(struct bch_write_op *op)
-+{
-+	struct dio_write *dio = container_of(op, struct dio_write, op);
-+
-+	if (dio->sync)
-+		complete(&dio->done);
-+	else
-+		bch2_dio_write_loop(dio);
-+}
-+
-+static noinline
-+ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
-+{
-+	struct file *file = req->ki_filp;
-+	struct address_space *mapping = file->f_mapping;
-+	struct bch_inode_info *inode = file_bch_inode(file);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct dio_write *dio;
-+	struct bio *bio;
-+	bool locked = true, extending;
-+	ssize_t ret;
-+
-+	prefetch(&c->opts);
-+	prefetch((void *) &c->opts + 64);
-+	prefetch(&inode->ei_inode);
-+	prefetch((void *) &inode->ei_inode + 64);
-+
-+	inode_lock(&inode->v);
-+
-+	ret = generic_write_checks(req, iter);
-+	if (unlikely(ret <= 0))
-+		goto err;
-+
-+	ret = file_remove_privs(file);
-+	if (unlikely(ret))
-+		goto err;
-+
-+	ret = file_update_time(file);
-+	if (unlikely(ret))
-+		goto err;
-+
-+	if (unlikely((req->ki_pos|iter->count) & (block_bytes(c) - 1)))
-+		goto err;
-+
-+	inode_dio_begin(&inode->v);
-+	bch2_pagecache_block_get(&inode->ei_pagecache_lock);
-+
-+	extending = req->ki_pos + iter->count > inode->v.i_size;
-+	if (!extending) {
-+		inode_unlock(&inode->v);
-+		locked = false;
-+	}
-+
-+	bio = bio_alloc_bioset(GFP_KERNEL,
-+			       iov_iter_npages(iter, BIO_MAX_PAGES),
-+			       &c->dio_write_bioset);
-+	dio = container_of(bio, struct dio_write, op.wbio.bio);
-+	init_completion(&dio->done);
-+	dio->req		= req;
-+	dio->mm			= current->mm;
-+	dio->loop		= false;
-+	dio->sync		= is_sync_kiocb(req) || extending;
-+	dio->free_iov		= false;
-+	dio->quota_res.sectors	= 0;
-+	dio->written		= 0;
-+	dio->iter		= *iter;
-+
-+	ret = bch2_quota_reservation_add(c, inode, &dio->quota_res,
-+					 iter->count >> 9, true);
-+	if (unlikely(ret))
-+		goto err_put_bio;
-+
-+	ret = write_invalidate_inode_pages_range(mapping,
-+					req->ki_pos,
-+					req->ki_pos + iter->count - 1);
-+	if (unlikely(ret))
-+		goto err_put_bio;
-+
-+	ret = bch2_dio_write_loop(dio);
-+err:
-+	if (locked)
-+		inode_unlock(&inode->v);
-+	return ret;
-+err_put_bio:
-+	bch2_pagecache_block_put(&inode->ei_pagecache_lock);
-+	bch2_quota_reservation_put(c, inode, &dio->quota_res);
-+	bio_put(bio);
-+	inode_dio_end(&inode->v);
-+	goto err;
-+}
-+
-+ssize_t bch2_write_iter(struct kiocb *iocb, struct iov_iter *from)
-+{
-+	struct file *file = iocb->ki_filp;
-+	struct bch_inode_info *inode = file_bch_inode(file);
-+	ssize_t ret;
-+
-+	if (iocb->ki_flags & IOCB_DIRECT)
-+		return bch2_direct_write(iocb, from);
-+
-+	/* We can write back this queue in page reclaim */
-+	current->backing_dev_info = inode_to_bdi(&inode->v);
-+	inode_lock(&inode->v);
-+
-+	ret = generic_write_checks(iocb, from);
-+	if (ret <= 0)
-+		goto unlock;
-+
-+	ret = file_remove_privs(file);
-+	if (ret)
-+		goto unlock;
-+
-+	ret = file_update_time(file);
-+	if (ret)
-+		goto unlock;
-+
-+	ret = bch2_buffered_write(iocb, from);
-+	if (likely(ret > 0))
-+		iocb->ki_pos += ret;
-+unlock:
-+	inode_unlock(&inode->v);
-+	current->backing_dev_info = NULL;
-+
-+	if (ret > 0)
-+		ret = generic_write_sync(iocb, ret);
-+
-+	return ret;
-+}
-+
-+/* fsync: */
-+
-+int bch2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
-+{
-+	struct bch_inode_info *inode = file_bch_inode(file);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	int ret, ret2;
-+
-+	ret = file_write_and_wait_range(file, start, end);
-+	if (ret)
-+		return ret;
-+
-+	if (datasync && !(inode->v.i_state & I_DIRTY_DATASYNC))
-+		goto out;
-+
-+	ret = sync_inode_metadata(&inode->v, 1);
-+	if (ret)
-+		return ret;
-+out:
-+	if (!c->opts.journal_flush_disabled)
-+		ret = bch2_journal_flush_seq(&c->journal,
-+					     inode->ei_journal_seq);
-+	ret2 = file_check_and_advance_wb_err(file);
-+
-+	return ret ?: ret2;
-+}
-+
-+/* truncate: */
-+
-+static inline int range_has_data(struct bch_fs *c,
-+				  struct bpos start,
-+				  struct bpos end)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int ret = 0;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, start, 0, k, ret) {
-+		if (bkey_cmp(bkey_start_pos(k.k), end) >= 0)
-+			break;
-+
-+		if (bkey_extent_is_data(k.k)) {
-+			ret = 1;
-+			break;
-+		}
-+	}
-+
-+	return bch2_trans_exit(&trans) ?: ret;
-+}
-+
-+static int __bch2_truncate_page(struct bch_inode_info *inode,
-+				pgoff_t index, loff_t start, loff_t end)
-+{
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct address_space *mapping = inode->v.i_mapping;
-+	struct bch_page_state *s;
-+	unsigned start_offset = start & (PAGE_SIZE - 1);
-+	unsigned end_offset = ((end - 1) & (PAGE_SIZE - 1)) + 1;
-+	unsigned i;
-+	struct page *page;
-+	int ret = 0;
-+
-+	/* Page boundary? Nothing to do */
-+	if (!((index == start >> PAGE_SHIFT && start_offset) ||
-+	      (index == end >> PAGE_SHIFT && end_offset != PAGE_SIZE)))
-+		return 0;
-+
-+	/* Above i_size? */
-+	if (index << PAGE_SHIFT >= inode->v.i_size)
-+		return 0;
-+
-+	page = find_lock_page(mapping, index);
-+	if (!page) {
-+		/*
-+		 * XXX: we're doing two index lookups when we end up reading the
-+		 * page
-+		 */
-+		ret = range_has_data(c,
-+				POS(inode->v.i_ino, index << PAGE_SECTOR_SHIFT),
-+				POS(inode->v.i_ino, (index + 1) << PAGE_SECTOR_SHIFT));
-+		if (ret <= 0)
-+			return ret;
-+
-+		page = find_or_create_page(mapping, index, GFP_KERNEL);
-+		if (unlikely(!page)) {
-+			ret = -ENOMEM;
-+			goto out;
-+		}
-+	}
-+
-+	s = bch2_page_state_create(page, 0);
-+	if (!s) {
-+		ret = -ENOMEM;
-+		goto unlock;
-+	}
-+
-+	if (!PageUptodate(page)) {
-+		ret = bch2_read_single_page(page, mapping);
-+		if (ret)
-+			goto unlock;
-+	}
-+
-+	if (index != start >> PAGE_SHIFT)
-+		start_offset = 0;
-+	if (index != end >> PAGE_SHIFT)
-+		end_offset = PAGE_SIZE;
-+
-+	for (i = round_up(start_offset, block_bytes(c)) >> 9;
-+	     i < round_down(end_offset, block_bytes(c)) >> 9;
-+	     i++) {
-+		s->s[i].nr_replicas	= 0;
-+		s->s[i].state		= SECTOR_UNALLOCATED;
-+	}
-+
-+	zero_user_segment(page, start_offset, end_offset);
-+
-+	/*
-+	 * Bit of a hack - we don't want truncate to fail due to -ENOSPC.
-+	 *
-+	 * XXX: because we aren't currently tracking whether the page has actual
-+	 * data in it (vs. just 0s, or only partially written) this wrong. ick.
-+	 */
-+	ret = bch2_get_page_disk_reservation(c, inode, page, false);
-+	BUG_ON(ret);
-+
-+	/*
-+	 * This removes any writeable userspace mappings; we need to force
-+	 * .page_mkwrite to be called again before any mmapped writes, to
-+	 * redirty the full page:
-+	 */
-+	page_mkclean(page);
-+	__set_page_dirty_nobuffers(page);
-+unlock:
-+	unlock_page(page);
-+	put_page(page);
-+out:
-+	return ret;
-+}
-+
-+static int bch2_truncate_page(struct bch_inode_info *inode, loff_t from)
-+{
-+	return __bch2_truncate_page(inode, from >> PAGE_SHIFT,
-+				    from, round_up(from, PAGE_SIZE));
-+}
-+
-+static int bch2_extend(struct bch_inode_info *inode,
-+		       struct bch_inode_unpacked *inode_u,
-+		       struct iattr *iattr)
-+{
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct address_space *mapping = inode->v.i_mapping;
-+	int ret;
-+
-+	/*
-+	 * sync appends:
-+	 *
-+	 * this has to be done _before_ extending i_size:
-+	 */
-+	ret = filemap_write_and_wait_range(mapping, inode_u->bi_size, S64_MAX);
-+	if (ret)
-+		return ret;
-+
-+	truncate_setsize(&inode->v, iattr->ia_size);
-+	setattr_copy(&inode->v, iattr);
-+
-+	mutex_lock(&inode->ei_update_lock);
-+	ret = bch2_write_inode_size(c, inode, inode->v.i_size,
-+				    ATTR_MTIME|ATTR_CTIME);
-+	mutex_unlock(&inode->ei_update_lock);
-+
-+	return ret;
-+}
-+
-+static int bch2_truncate_finish_fn(struct bch_inode_info *inode,
-+				   struct bch_inode_unpacked *bi,
-+				   void *p)
-+{
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+
-+	bi->bi_flags &= ~BCH_INODE_I_SIZE_DIRTY;
-+	bi->bi_mtime = bi->bi_ctime = bch2_current_time(c);
-+	return 0;
-+}
-+
-+static int bch2_truncate_start_fn(struct bch_inode_info *inode,
-+				  struct bch_inode_unpacked *bi, void *p)
-+{
-+	u64 *new_i_size = p;
-+
-+	bi->bi_flags |= BCH_INODE_I_SIZE_DIRTY;
-+	bi->bi_size = *new_i_size;
-+	return 0;
-+}
-+
-+int bch2_truncate(struct bch_inode_info *inode, struct iattr *iattr)
-+{
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct address_space *mapping = inode->v.i_mapping;
-+	struct bch_inode_unpacked inode_u;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	u64 new_i_size = iattr->ia_size;
-+	s64 i_sectors_delta = 0;
-+	int ret = 0;
-+
-+	inode_dio_wait(&inode->v);
-+	bch2_pagecache_block_get(&inode->ei_pagecache_lock);
-+
-+	/*
-+	 * fetch current on disk i_size: inode is locked, i_size can only
-+	 * increase underneath us:
-+	 */
-+	bch2_trans_init(&trans, c, 0, 0);
-+	iter = bch2_inode_peek(&trans, &inode_u, inode->v.i_ino, 0);
-+	ret = PTR_ERR_OR_ZERO(iter);
-+	bch2_trans_exit(&trans);
-+
-+	if (ret)
-+		goto err;
-+
-+	/*
-+	 * check this before next assertion; on filesystem error our normal
-+	 * invariants are a bit broken (truncate has to truncate the page cache
-+	 * before the inode).
-+	 */
-+	ret = bch2_journal_error(&c->journal);
-+	if (ret)
-+		goto err;
-+
-+	BUG_ON(inode->v.i_size < inode_u.bi_size);
-+
-+	if (iattr->ia_size > inode->v.i_size) {
-+		ret = bch2_extend(inode, &inode_u, iattr);
-+		goto err;
-+	}
-+
-+	ret = bch2_truncate_page(inode, iattr->ia_size);
-+	if (unlikely(ret))
-+		goto err;
-+
-+	/*
-+	 * When extending, we're going to write the new i_size to disk
-+	 * immediately so we need to flush anything above the current on disk
-+	 * i_size first:
-+	 *
-+	 * Also, when extending we need to flush the page that i_size currently
-+	 * straddles - if it's mapped to userspace, we need to ensure that
-+	 * userspace has to redirty it and call .mkwrite -> set_page_dirty
-+	 * again to allocate the part of the page that was extended.
-+	 */
-+	if (iattr->ia_size > inode_u.bi_size)
-+		ret = filemap_write_and_wait_range(mapping,
-+				inode_u.bi_size,
-+				iattr->ia_size - 1);
-+	else if (iattr->ia_size & (PAGE_SIZE - 1))
-+		ret = filemap_write_and_wait_range(mapping,
-+				round_down(iattr->ia_size, PAGE_SIZE),
-+				iattr->ia_size - 1);
-+	if (ret)
-+		goto err;
-+
-+	mutex_lock(&inode->ei_update_lock);
-+	ret = bch2_write_inode(c, inode, bch2_truncate_start_fn,
-+			       &new_i_size, 0);
-+	mutex_unlock(&inode->ei_update_lock);
-+
-+	if (unlikely(ret))
-+		goto err;
-+
-+	truncate_setsize(&inode->v, iattr->ia_size);
-+
-+	ret = bch2_fpunch(c, inode->v.i_ino,
-+			round_up(iattr->ia_size, block_bytes(c)) >> 9,
-+			U64_MAX, &inode->ei_journal_seq, &i_sectors_delta);
-+	i_sectors_acct(c, inode, NULL, i_sectors_delta);
-+
-+	if (unlikely(ret))
-+		goto err;
-+
-+	setattr_copy(&inode->v, iattr);
-+
-+	mutex_lock(&inode->ei_update_lock);
-+	ret = bch2_write_inode(c, inode, bch2_truncate_finish_fn, NULL,
-+			       ATTR_MTIME|ATTR_CTIME);
-+	mutex_unlock(&inode->ei_update_lock);
-+err:
-+	bch2_pagecache_block_put(&inode->ei_pagecache_lock);
-+	return ret;
-+}
-+
-+/* fallocate: */
-+
-+static long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, loff_t len)
-+{
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	u64 discard_start = round_up(offset, block_bytes(c)) >> 9;
-+	u64 discard_end = round_down(offset + len, block_bytes(c)) >> 9;
-+	int ret = 0;
-+
-+	inode_lock(&inode->v);
-+	inode_dio_wait(&inode->v);
-+	bch2_pagecache_block_get(&inode->ei_pagecache_lock);
-+
-+	ret = __bch2_truncate_page(inode,
-+				   offset >> PAGE_SHIFT,
-+				   offset, offset + len);
-+	if (unlikely(ret))
-+		goto err;
-+
-+	if (offset >> PAGE_SHIFT !=
-+	    (offset + len) >> PAGE_SHIFT) {
-+		ret = __bch2_truncate_page(inode,
-+					   (offset + len) >> PAGE_SHIFT,
-+					   offset, offset + len);
-+		if (unlikely(ret))
-+			goto err;
-+	}
-+
-+	truncate_pagecache_range(&inode->v, offset, offset + len - 1);
-+
-+	if (discard_start < discard_end) {
-+		s64 i_sectors_delta = 0;
-+
-+		ret = bch2_fpunch(c, inode->v.i_ino,
-+				  discard_start, discard_end,
-+				  &inode->ei_journal_seq,
-+				  &i_sectors_delta);
-+		i_sectors_acct(c, inode, NULL, i_sectors_delta);
-+	}
-+err:
-+	bch2_pagecache_block_put(&inode->ei_pagecache_lock);
-+	inode_unlock(&inode->v);
-+
-+	return ret;
-+}
-+
-+static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
-+				   loff_t offset, loff_t len,
-+				   bool insert)
-+{
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct address_space *mapping = inode->v.i_mapping;
-+	struct bkey_on_stack copy;
-+	struct btree_trans trans;
-+	struct btree_iter *src, *dst;
-+	loff_t shift, new_size;
-+	u64 src_start;
-+	int ret;
-+
-+	if ((offset | len) & (block_bytes(c) - 1))
-+		return -EINVAL;
-+
-+	bkey_on_stack_init(&copy);
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 256);
-+
-+	/*
-+	 * We need i_mutex to keep the page cache consistent with the extents
-+	 * btree, and the btree consistent with i_size - we don't need outside
-+	 * locking for the extents btree itself, because we're using linked
-+	 * iterators
-+	 */
-+	inode_lock(&inode->v);
-+	inode_dio_wait(&inode->v);
-+	bch2_pagecache_block_get(&inode->ei_pagecache_lock);
-+
-+	if (insert) {
-+		ret = -EFBIG;
-+		if (inode->v.i_sb->s_maxbytes - inode->v.i_size < len)
-+			goto err;
-+
-+		ret = -EINVAL;
-+		if (offset >= inode->v.i_size)
-+			goto err;
-+
-+		src_start	= U64_MAX;
-+		shift		= len;
-+	} else {
-+		ret = -EINVAL;
-+		if (offset + len >= inode->v.i_size)
-+			goto err;
-+
-+		src_start	= offset + len;
-+		shift		= -len;
-+	}
-+
-+	new_size = inode->v.i_size + shift;
-+
-+	ret = write_invalidate_inode_pages_range(mapping, offset, LLONG_MAX);
-+	if (ret)
-+		goto err;
-+
-+	if (insert) {
-+		i_size_write(&inode->v, new_size);
-+		mutex_lock(&inode->ei_update_lock);
-+		ret = bch2_write_inode_size(c, inode, new_size,
-+					    ATTR_MTIME|ATTR_CTIME);
-+		mutex_unlock(&inode->ei_update_lock);
-+	} else {
-+		s64 i_sectors_delta = 0;
-+
-+		ret = bch2_fpunch(c, inode->v.i_ino,
-+				  offset >> 9, (offset + len) >> 9,
-+				  &inode->ei_journal_seq,
-+				  &i_sectors_delta);
-+		i_sectors_acct(c, inode, NULL, i_sectors_delta);
-+
-+		if (ret)
-+			goto err;
-+	}
-+
-+	src = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
-+			POS(inode->v.i_ino, src_start >> 9),
-+			BTREE_ITER_INTENT);
-+	BUG_ON(IS_ERR_OR_NULL(src));
-+
-+	dst = bch2_trans_copy_iter(&trans, src);
-+	BUG_ON(IS_ERR_OR_NULL(dst));
-+
-+	while (1) {
-+		struct disk_reservation disk_res =
-+			bch2_disk_reservation_init(c, 0);
-+		struct bkey_i delete;
-+		struct bkey_s_c k;
-+		struct bpos next_pos;
-+		struct bpos move_pos = POS(inode->v.i_ino, offset >> 9);
-+		struct bpos atomic_end;
-+		unsigned trigger_flags = 0;
-+
-+		k = insert
-+			? bch2_btree_iter_peek_prev(src)
-+			: bch2_btree_iter_peek(src);
-+		if ((ret = bkey_err(k)))
-+			goto bkey_err;
-+
-+		if (!k.k || k.k->p.inode != inode->v.i_ino)
-+			break;
-+
-+		BUG_ON(bkey_cmp(src->pos, bkey_start_pos(k.k)));
-+
-+		if (insert &&
-+		    bkey_cmp(k.k->p, POS(inode->v.i_ino, offset >> 9)) <= 0)
-+			break;
-+reassemble:
-+		bkey_on_stack_reassemble(&copy, c, k);
-+
-+		if (insert &&
-+		    bkey_cmp(bkey_start_pos(k.k), move_pos) < 0)
-+			bch2_cut_front(move_pos, copy.k);
-+
-+		copy.k->k.p.offset += shift >> 9;
-+		bch2_btree_iter_set_pos(dst, bkey_start_pos(&copy.k->k));
-+
-+		ret = bch2_extent_atomic_end(dst, copy.k, &atomic_end);
-+		if (ret)
-+			goto bkey_err;
-+
-+		if (bkey_cmp(atomic_end, copy.k->k.p)) {
-+			if (insert) {
-+				move_pos = atomic_end;
-+				move_pos.offset -= shift >> 9;
-+				goto reassemble;
-+			} else {
-+				bch2_cut_back(atomic_end, copy.k);
-+			}
-+		}
-+
-+		bkey_init(&delete.k);
-+		delete.k.p = copy.k->k.p;
-+		delete.k.size = copy.k->k.size;
-+		delete.k.p.offset -= shift >> 9;
-+
-+		next_pos = insert ? bkey_start_pos(&delete.k) : delete.k.p;
-+
-+		if (copy.k->k.size == k.k->size) {
-+			/*
-+			 * If we're moving the entire extent, we can skip
-+			 * running triggers:
-+			 */
-+			trigger_flags |= BTREE_TRIGGER_NORUN;
-+		} else {
-+			/* We might end up splitting compressed extents: */
-+			unsigned nr_ptrs =
-+				bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(copy.k));
-+
-+			ret = bch2_disk_reservation_get(c, &disk_res,
-+					copy.k->k.size, nr_ptrs,
-+					BCH_DISK_RESERVATION_NOFAIL);
-+			BUG_ON(ret);
-+		}
-+
-+		bch2_btree_iter_set_pos(src, bkey_start_pos(&delete.k));
-+
-+		ret =   bch2_trans_update(&trans, src, &delete, trigger_flags) ?:
-+			bch2_trans_update(&trans, dst, copy.k, trigger_flags) ?:
-+			bch2_trans_commit(&trans, &disk_res,
-+					  &inode->ei_journal_seq,
-+					  BTREE_INSERT_NOFAIL);
-+		bch2_disk_reservation_put(c, &disk_res);
-+bkey_err:
-+		if (!ret)
-+			bch2_btree_iter_set_pos(src, next_pos);
-+
-+		if (ret == -EINTR)
-+			ret = 0;
-+		if (ret)
-+			goto err;
-+
-+		bch2_trans_cond_resched(&trans);
-+	}
-+	bch2_trans_unlock(&trans);
-+
-+	if (!insert) {
-+		i_size_write(&inode->v, new_size);
-+		mutex_lock(&inode->ei_update_lock);
-+		ret = bch2_write_inode_size(c, inode, new_size,
-+					    ATTR_MTIME|ATTR_CTIME);
-+		mutex_unlock(&inode->ei_update_lock);
-+	}
-+err:
-+	bch2_trans_exit(&trans);
-+	bkey_on_stack_exit(&copy, c);
-+	bch2_pagecache_block_put(&inode->ei_pagecache_lock);
-+	inode_unlock(&inode->v);
-+	return ret;
-+}
-+
-+static long bchfs_fallocate(struct bch_inode_info *inode, int mode,
-+			    loff_t offset, loff_t len)
-+{
-+	struct address_space *mapping = inode->v.i_mapping;
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bpos end_pos;
-+	loff_t end		= offset + len;
-+	loff_t block_start	= round_down(offset,	block_bytes(c));
-+	loff_t block_end	= round_up(end,		block_bytes(c));
-+	unsigned sectors;
-+	unsigned replicas = io_opts(c, &inode->ei_inode).data_replicas;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-+
-+	inode_lock(&inode->v);
-+	inode_dio_wait(&inode->v);
-+	bch2_pagecache_block_get(&inode->ei_pagecache_lock);
-+
-+	if (!(mode & FALLOC_FL_KEEP_SIZE) && end > inode->v.i_size) {
-+		ret = inode_newsize_ok(&inode->v, end);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	if (mode & FALLOC_FL_ZERO_RANGE) {
-+		ret = __bch2_truncate_page(inode,
-+					   offset >> PAGE_SHIFT,
-+					   offset, end);
-+
-+		if (!ret &&
-+		    offset >> PAGE_SHIFT != end >> PAGE_SHIFT)
-+			ret = __bch2_truncate_page(inode,
-+						   end >> PAGE_SHIFT,
-+						   offset, end);
-+
-+		if (unlikely(ret))
-+			goto err;
-+
-+		truncate_pagecache_range(&inode->v, offset, end - 1);
-+	}
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
-+			POS(inode->v.i_ino, block_start >> 9),
-+			BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
-+	end_pos = POS(inode->v.i_ino, block_end >> 9);
-+
-+	while (bkey_cmp(iter->pos, end_pos) < 0) {
-+		s64 i_sectors_delta = 0;
-+		struct disk_reservation disk_res = { 0 };
-+		struct quota_res quota_res = { 0 };
-+		struct bkey_i_reservation reservation;
-+		struct bkey_s_c k;
-+
-+		bch2_trans_begin(&trans);
-+
-+		k = bch2_btree_iter_peek_slot(iter);
-+		if ((ret = bkey_err(k)))
-+			goto bkey_err;
-+
-+		/* already reserved */
-+		if (k.k->type == KEY_TYPE_reservation &&
-+		    bkey_s_c_to_reservation(k).v->nr_replicas >= replicas) {
-+			bch2_btree_iter_next_slot(iter);
-+			continue;
-+		}
-+
-+		if (bkey_extent_is_data(k.k) &&
-+		    !(mode & FALLOC_FL_ZERO_RANGE)) {
-+			bch2_btree_iter_next_slot(iter);
-+			continue;
-+		}
-+
-+		bkey_reservation_init(&reservation.k_i);
-+		reservation.k.type	= KEY_TYPE_reservation;
-+		reservation.k.p		= k.k->p;
-+		reservation.k.size	= k.k->size;
-+
-+		bch2_cut_front(iter->pos,	&reservation.k_i);
-+		bch2_cut_back(end_pos,		&reservation.k_i);
-+
-+		sectors = reservation.k.size;
-+		reservation.v.nr_replicas = bch2_bkey_nr_ptrs_allocated(k);
-+
-+		if (!bkey_extent_is_allocation(k.k)) {
-+			ret = bch2_quota_reservation_add(c, inode,
-+					&quota_res,
-+					sectors, true);
-+			if (unlikely(ret))
-+				goto bkey_err;
-+		}
-+
-+		if (reservation.v.nr_replicas < replicas ||
-+		    bch2_bkey_sectors_compressed(k)) {
-+			ret = bch2_disk_reservation_get(c, &disk_res, sectors,
-+							replicas, 0);
-+			if (unlikely(ret))
-+				goto bkey_err;
-+
-+			reservation.v.nr_replicas = disk_res.nr_replicas;
-+		}
-+
-+		ret = bch2_extent_update(&trans, iter, &reservation.k_i,
-+				&disk_res, &inode->ei_journal_seq,
-+				0, &i_sectors_delta);
-+		i_sectors_acct(c, inode, &quota_res, i_sectors_delta);
-+bkey_err:
-+		bch2_quota_reservation_put(c, inode, &quota_res);
-+		bch2_disk_reservation_put(c, &disk_res);
-+		if (ret == -EINTR)
-+			ret = 0;
-+		if (ret)
-+			goto err;
-+	}
-+
-+	/*
-+	 * Do we need to extend the file?
-+	 *
-+	 * If we zeroed up to the end of the file, we dropped whatever writes
-+	 * were going to write out the current i_size, so we have to extend
-+	 * manually even if FL_KEEP_SIZE was set:
-+	 */
-+	if (end >= inode->v.i_size &&
-+	    (!(mode & FALLOC_FL_KEEP_SIZE) ||
-+	     (mode & FALLOC_FL_ZERO_RANGE))) {
-+		struct btree_iter *inode_iter;
-+		struct bch_inode_unpacked inode_u;
-+
-+		do {
-+			bch2_trans_begin(&trans);
-+			inode_iter = bch2_inode_peek(&trans, &inode_u,
-+						     inode->v.i_ino, 0);
-+			ret = PTR_ERR_OR_ZERO(inode_iter);
-+		} while (ret == -EINTR);
-+
-+		bch2_trans_unlock(&trans);
-+
-+		if (ret)
-+			goto err;
-+
-+		/*
-+		 * Sync existing appends before extending i_size,
-+		 * as in bch2_extend():
-+		 */
-+		ret = filemap_write_and_wait_range(mapping,
-+					inode_u.bi_size, S64_MAX);
-+		if (ret)
-+			goto err;
-+
-+		if (mode & FALLOC_FL_KEEP_SIZE)
-+			end = inode->v.i_size;
-+		else
-+			i_size_write(&inode->v, end);
-+
-+		mutex_lock(&inode->ei_update_lock);
-+		ret = bch2_write_inode_size(c, inode, end, 0);
-+		mutex_unlock(&inode->ei_update_lock);
-+	}
-+err:
-+	bch2_trans_exit(&trans);
-+	bch2_pagecache_block_put(&inode->ei_pagecache_lock);
-+	inode_unlock(&inode->v);
-+	return ret;
-+}
-+
-+long bch2_fallocate_dispatch(struct file *file, int mode,
-+			     loff_t offset, loff_t len)
-+{
-+	struct bch_inode_info *inode = file_bch_inode(file);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	long ret;
-+
-+	if (!percpu_ref_tryget(&c->writes))
-+		return -EROFS;
-+
-+	if (!(mode & ~(FALLOC_FL_KEEP_SIZE|FALLOC_FL_ZERO_RANGE)))
-+		ret = bchfs_fallocate(inode, mode, offset, len);
-+	else if (mode == (FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE))
-+		ret = bchfs_fpunch(inode, offset, len);
-+	else if (mode == FALLOC_FL_INSERT_RANGE)
-+		ret = bchfs_fcollapse_finsert(inode, offset, len, true);
-+	else if (mode == FALLOC_FL_COLLAPSE_RANGE)
-+		ret = bchfs_fcollapse_finsert(inode, offset, len, false);
-+	else
-+		ret = -EOPNOTSUPP;
-+
-+	percpu_ref_put(&c->writes);
-+
-+	return ret;
-+}
-+
-+static void mark_range_unallocated(struct bch_inode_info *inode,
-+				   loff_t start, loff_t end)
-+{
-+	pgoff_t index = start >> PAGE_SHIFT;
-+	pgoff_t end_index = (end - 1) >> PAGE_SHIFT;
-+	struct pagevec pvec;
-+
-+	pagevec_init(&pvec);
-+
-+	do {
-+		unsigned nr_pages, i, j;
-+
-+		nr_pages = pagevec_lookup_range(&pvec, inode->v.i_mapping,
-+						&index, end_index);
-+		if (nr_pages == 0)
-+			break;
-+
-+		for (i = 0; i < nr_pages; i++) {
-+			struct page *page = pvec.pages[i];
-+			struct bch_page_state *s;
-+
-+			lock_page(page);
-+			s = bch2_page_state(page);
-+
-+			if (s) {
-+				spin_lock(&s->lock);
-+				for (j = 0; j < PAGE_SECTORS; j++)
-+					s->s[j].nr_replicas = 0;
-+				spin_unlock(&s->lock);
-+			}
-+
-+			unlock_page(page);
-+		}
-+		pagevec_release(&pvec);
-+	} while (index <= end_index);
-+}
-+
-+loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src,
-+			     struct file *file_dst, loff_t pos_dst,
-+			     loff_t len, unsigned remap_flags)
-+{
-+	struct bch_inode_info *src = file_bch_inode(file_src);
-+	struct bch_inode_info *dst = file_bch_inode(file_dst);
-+	struct bch_fs *c = src->v.i_sb->s_fs_info;
-+	s64 i_sectors_delta = 0;
-+	u64 aligned_len;
-+	loff_t ret = 0;
-+
-+	if (!c->opts.reflink)
-+		return -EOPNOTSUPP;
-+
-+	if (remap_flags & ~(REMAP_FILE_DEDUP|REMAP_FILE_ADVISORY))
-+		return -EINVAL;
-+
-+	if (remap_flags & REMAP_FILE_DEDUP)
-+		return -EOPNOTSUPP;
-+
-+	if ((pos_src & (block_bytes(c) - 1)) ||
-+	    (pos_dst & (block_bytes(c) - 1)))
-+		return -EINVAL;
-+
-+	if (src == dst &&
-+	    abs(pos_src - pos_dst) < len)
-+		return -EINVAL;
-+
-+	bch2_lock_inodes(INODE_LOCK|INODE_PAGECACHE_BLOCK, src, dst);
-+
-+	file_update_time(file_dst);
-+
-+	inode_dio_wait(&src->v);
-+	inode_dio_wait(&dst->v);
-+
-+	ret = generic_remap_file_range_prep(file_src, pos_src,
-+					    file_dst, pos_dst,
-+					    &len, remap_flags);
-+	if (ret < 0 || len == 0)
-+		goto err;
-+
-+	aligned_len = round_up((u64) len, block_bytes(c));
-+
-+	ret = write_invalidate_inode_pages_range(dst->v.i_mapping,
-+				pos_dst, pos_dst + len - 1);
-+	if (ret)
-+		goto err;
-+
-+	mark_range_unallocated(src, pos_src, pos_src + aligned_len);
-+
-+	ret = bch2_remap_range(c,
-+			       POS(dst->v.i_ino, pos_dst >> 9),
-+			       POS(src->v.i_ino, pos_src >> 9),
-+			       aligned_len >> 9,
-+			       &dst->ei_journal_seq,
-+			       pos_dst + len, &i_sectors_delta);
-+	if (ret < 0)
-+		goto err;
-+
-+	/*
-+	 * due to alignment, we might have remapped slightly more than requsted
-+	 */
-+	ret = min((u64) ret << 9, (u64) len);
-+
-+	/* XXX get a quota reservation */
-+	i_sectors_acct(c, dst, NULL, i_sectors_delta);
-+
-+	spin_lock(&dst->v.i_lock);
-+	if (pos_dst + ret > dst->v.i_size)
-+		i_size_write(&dst->v, pos_dst + ret);
-+	spin_unlock(&dst->v.i_lock);
-+err:
-+	bch2_unlock_inodes(INODE_LOCK|INODE_PAGECACHE_BLOCK, src, dst);
-+
-+	return ret;
-+}
-+
-+/* fseek: */
-+
-+static int page_data_offset(struct page *page, unsigned offset)
-+{
-+	struct bch_page_state *s = bch2_page_state(page);
-+	unsigned i;
-+
-+	if (s)
-+		for (i = offset >> 9; i < PAGE_SECTORS; i++)
-+			if (s->s[i].state >= SECTOR_DIRTY)
-+				return i << 9;
-+
-+	return -1;
-+}
-+
-+static loff_t bch2_seek_pagecache_data(struct inode *vinode,
-+				       loff_t start_offset,
-+				       loff_t end_offset)
-+{
-+	struct address_space *mapping = vinode->i_mapping;
-+	struct page *page;
-+	pgoff_t start_index	= start_offset >> PAGE_SHIFT;
-+	pgoff_t end_index	= end_offset >> PAGE_SHIFT;
-+	pgoff_t index		= start_index;
-+	loff_t ret;
-+	int offset;
-+
-+	while (index <= end_index) {
-+		if (find_get_pages_range(mapping, &index, end_index, 1, &page)) {
-+			lock_page(page);
-+
-+			offset = page_data_offset(page,
-+					page->index == start_index
-+					? start_offset & (PAGE_SIZE - 1)
-+					: 0);
-+			if (offset >= 0) {
-+				ret = clamp(((loff_t) page->index << PAGE_SHIFT) +
-+					    offset,
-+					    start_offset, end_offset);
-+				unlock_page(page);
-+				put_page(page);
-+				return ret;
-+			}
-+
-+			unlock_page(page);
-+			put_page(page);
-+		} else {
-+			break;
-+		}
-+	}
-+
-+	return end_offset;
-+}
-+
-+static loff_t bch2_seek_data(struct file *file, u64 offset)
-+{
-+	struct bch_inode_info *inode = file_bch_inode(file);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	u64 isize, next_data = MAX_LFS_FILESIZE;
-+	int ret;
-+
-+	isize = i_size_read(&inode->v);
-+	if (offset >= isize)
-+		return -ENXIO;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
-+			   POS(inode->v.i_ino, offset >> 9), 0, k, ret) {
-+		if (k.k->p.inode != inode->v.i_ino) {
-+			break;
-+		} else if (bkey_extent_is_data(k.k)) {
-+			next_data = max(offset, bkey_start_offset(k.k) << 9);
-+			break;
-+		} else if (k.k->p.offset >> 9 > isize)
-+			break;
-+	}
-+
-+	ret = bch2_trans_exit(&trans) ?: ret;
-+	if (ret)
-+		return ret;
-+
-+	if (next_data > offset)
-+		next_data = bch2_seek_pagecache_data(&inode->v,
-+						     offset, next_data);
-+
-+	if (next_data >= isize)
-+		return -ENXIO;
-+
-+	return vfs_setpos(file, next_data, MAX_LFS_FILESIZE);
-+}
-+
-+static int __page_hole_offset(struct page *page, unsigned offset)
-+{
-+	struct bch_page_state *s = bch2_page_state(page);
-+	unsigned i;
-+
-+	if (!s)
-+		return 0;
-+
-+	for (i = offset >> 9; i < PAGE_SECTORS; i++)
-+		if (s->s[i].state < SECTOR_DIRTY)
-+			return i << 9;
-+
-+	return -1;
-+}
-+
-+static loff_t page_hole_offset(struct address_space *mapping, loff_t offset)
-+{
-+	pgoff_t index = offset >> PAGE_SHIFT;
-+	struct page *page;
-+	int pg_offset;
-+	loff_t ret = -1;
-+
-+	page = find_lock_entry(mapping, index);
-+	if (!page || xa_is_value(page))
-+		return offset;
-+
-+	pg_offset = __page_hole_offset(page, offset & (PAGE_SIZE - 1));
-+	if (pg_offset >= 0)
-+		ret = ((loff_t) index << PAGE_SHIFT) + pg_offset;
-+
-+	unlock_page(page);
-+
-+	return ret;
-+}
-+
-+static loff_t bch2_seek_pagecache_hole(struct inode *vinode,
-+				       loff_t start_offset,
-+				       loff_t end_offset)
-+{
-+	struct address_space *mapping = vinode->i_mapping;
-+	loff_t offset = start_offset, hole;
-+
-+	while (offset < end_offset) {
-+		hole = page_hole_offset(mapping, offset);
-+		if (hole >= 0 && hole <= end_offset)
-+			return max(start_offset, hole);
-+
-+		offset += PAGE_SIZE;
-+		offset &= PAGE_MASK;
-+	}
-+
-+	return end_offset;
-+}
-+
-+static loff_t bch2_seek_hole(struct file *file, u64 offset)
-+{
-+	struct bch_inode_info *inode = file_bch_inode(file);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	u64 isize, next_hole = MAX_LFS_FILESIZE;
-+	int ret;
-+
-+	isize = i_size_read(&inode->v);
-+	if (offset >= isize)
-+		return -ENXIO;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
-+			   POS(inode->v.i_ino, offset >> 9),
-+			   BTREE_ITER_SLOTS, k, ret) {
-+		if (k.k->p.inode != inode->v.i_ino) {
-+			next_hole = bch2_seek_pagecache_hole(&inode->v,
-+					offset, MAX_LFS_FILESIZE);
-+			break;
-+		} else if (!bkey_extent_is_data(k.k)) {
-+			next_hole = bch2_seek_pagecache_hole(&inode->v,
-+					max(offset, bkey_start_offset(k.k) << 9),
-+					k.k->p.offset << 9);
-+
-+			if (next_hole < k.k->p.offset << 9)
-+				break;
-+		} else {
-+			offset = max(offset, bkey_start_offset(k.k) << 9);
-+		}
-+	}
-+
-+	ret = bch2_trans_exit(&trans) ?: ret;
-+	if (ret)
-+		return ret;
-+
-+	if (next_hole > isize)
-+		next_hole = isize;
-+
-+	return vfs_setpos(file, next_hole, MAX_LFS_FILESIZE);
-+}
-+
-+loff_t bch2_llseek(struct file *file, loff_t offset, int whence)
-+{
-+	switch (whence) {
-+	case SEEK_SET:
-+	case SEEK_CUR:
-+	case SEEK_END:
-+		return generic_file_llseek(file, offset, whence);
-+	case SEEK_DATA:
-+		return bch2_seek_data(file, offset);
-+	case SEEK_HOLE:
-+		return bch2_seek_hole(file, offset);
-+	}
-+
-+	return -EINVAL;
-+}
-+
-+void bch2_fs_fsio_exit(struct bch_fs *c)
-+{
-+	bioset_exit(&c->dio_write_bioset);
-+	bioset_exit(&c->dio_read_bioset);
-+	bioset_exit(&c->writepage_bioset);
-+}
-+
-+int bch2_fs_fsio_init(struct bch_fs *c)
-+{
-+	int ret = 0;
-+
-+	pr_verbose_init(c->opts, "");
-+
-+	if (bioset_init(&c->writepage_bioset,
-+			4, offsetof(struct bch_writepage_io, op.wbio.bio),
-+			BIOSET_NEED_BVECS) ||
-+	    bioset_init(&c->dio_read_bioset,
-+			4, offsetof(struct dio_read, rbio.bio),
-+			BIOSET_NEED_BVECS) ||
-+	    bioset_init(&c->dio_write_bioset,
-+			4, offsetof(struct dio_write, op.wbio.bio),
-+			BIOSET_NEED_BVECS))
-+		ret = -ENOMEM;
-+
-+	pr_verbose_init(c->opts, "ret %i", ret);
-+	return ret;
-+}
-+
-+#endif /* NO_BCACHEFS_FS */
-diff --git a/fs/bcachefs/fs-io.h b/fs/bcachefs/fs-io.h
-new file mode 100644
-index 000000000000..7063556d289b
---- /dev/null
-+++ b/fs/bcachefs/fs-io.h
-@@ -0,0 +1,57 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_FS_IO_H
-+#define _BCACHEFS_FS_IO_H
-+
-+#ifndef NO_BCACHEFS_FS
-+
-+#include "buckets.h"
-+#include "io_types.h"
-+
-+#include <linux/uio.h>
-+
-+struct quota_res;
-+
-+int __must_check bch2_write_inode_size(struct bch_fs *,
-+				       struct bch_inode_info *,
-+				       loff_t, unsigned);
-+
-+int bch2_writepage(struct page *, struct writeback_control *);
-+int bch2_readpage(struct file *, struct page *);
-+
-+int bch2_writepages(struct address_space *, struct writeback_control *);
-+int bch2_readpages(struct file *, struct address_space *,
-+		   struct list_head *, unsigned);
-+
-+int bch2_write_begin(struct file *, struct address_space *, loff_t,
-+		     unsigned, unsigned, struct page **, void **);
-+int bch2_write_end(struct file *, struct address_space *, loff_t,
-+		   unsigned, unsigned, struct page *, void *);
-+
-+ssize_t bch2_read_iter(struct kiocb *, struct iov_iter *);
-+ssize_t bch2_write_iter(struct kiocb *, struct iov_iter *);
-+
-+int bch2_fsync(struct file *, loff_t, loff_t, int);
-+
-+int bch2_truncate(struct bch_inode_info *, struct iattr *);
-+long bch2_fallocate_dispatch(struct file *, int, loff_t, loff_t);
-+
-+loff_t bch2_remap_file_range(struct file *, loff_t, struct file *,
-+			     loff_t, loff_t, unsigned);
-+
-+loff_t bch2_llseek(struct file *, loff_t, int);
-+
-+vm_fault_t bch2_page_fault(struct vm_fault *);
-+vm_fault_t bch2_page_mkwrite(struct vm_fault *);
-+void bch2_invalidatepage(struct page *, unsigned int, unsigned int);
-+int bch2_releasepage(struct page *, gfp_t);
-+int bch2_migrate_page(struct address_space *, struct page *,
-+		      struct page *, enum migrate_mode);
-+
-+void bch2_fs_fsio_exit(struct bch_fs *);
-+int bch2_fs_fsio_init(struct bch_fs *);
-+#else
-+static inline void bch2_fs_fsio_exit(struct bch_fs *c) {}
-+static inline int bch2_fs_fsio_init(struct bch_fs *c) { return 0; }
-+#endif
-+
-+#endif /* _BCACHEFS_FS_IO_H */
-diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c
-new file mode 100644
-index 000000000000..0873d2f0928c
---- /dev/null
-+++ b/fs/bcachefs/fs-ioctl.c
-@@ -0,0 +1,312 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#ifndef NO_BCACHEFS_FS
-+
-+#include "bcachefs.h"
-+#include "chardev.h"
-+#include "dirent.h"
-+#include "fs.h"
-+#include "fs-common.h"
-+#include "fs-ioctl.h"
-+#include "quota.h"
-+
-+#include <linux/compat.h>
-+#include <linux/mount.h>
-+
-+#define FS_IOC_GOINGDOWN	     _IOR('X', 125, __u32)
-+
-+struct flags_set {
-+	unsigned		mask;
-+	unsigned		flags;
-+
-+	unsigned		projid;
-+};
-+
-+static int bch2_inode_flags_set(struct bch_inode_info *inode,
-+				struct bch_inode_unpacked *bi,
-+				void *p)
-+{
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	/*
-+	 * We're relying on btree locking here for exclusion with other ioctl
-+	 * calls - use the flags in the btree (@bi), not inode->i_flags:
-+	 */
-+	struct flags_set *s = p;
-+	unsigned newflags = s->flags;
-+	unsigned oldflags = bi->bi_flags & s->mask;
-+
-+	if (((newflags ^ oldflags) & (BCH_INODE_APPEND|BCH_INODE_IMMUTABLE)) &&
-+	    !capable(CAP_LINUX_IMMUTABLE))
-+		return -EPERM;
-+
-+	if (!S_ISREG(bi->bi_mode) &&
-+	    !S_ISDIR(bi->bi_mode) &&
-+	    (newflags & (BCH_INODE_NODUMP|BCH_INODE_NOATIME)) != newflags)
-+		return -EINVAL;
-+
-+	bi->bi_flags &= ~s->mask;
-+	bi->bi_flags |= newflags;
-+
-+	bi->bi_ctime = timespec_to_bch2_time(c, current_time(&inode->v));
-+	return 0;
-+}
-+
-+static int bch2_ioc_getflags(struct bch_inode_info *inode, int __user *arg)
-+{
-+	unsigned flags = map_flags(bch_flags_to_uflags, inode->ei_inode.bi_flags);
-+
-+	return put_user(flags, arg);
-+}
-+
-+static int bch2_ioc_setflags(struct bch_fs *c,
-+			     struct file *file,
-+			     struct bch_inode_info *inode,
-+			     void __user *arg)
-+{
-+	struct flags_set s = { .mask = map_defined(bch_flags_to_uflags) };
-+	unsigned uflags;
-+	int ret;
-+
-+	if (get_user(uflags, (int __user *) arg))
-+		return -EFAULT;
-+
-+	s.flags = map_flags_rev(bch_flags_to_uflags, uflags);
-+	if (uflags)
-+		return -EOPNOTSUPP;
-+
-+	ret = mnt_want_write_file(file);
-+	if (ret)
-+		return ret;
-+
-+	inode_lock(&inode->v);
-+	if (!inode_owner_or_capable(&inode->v)) {
-+		ret = -EACCES;
-+		goto setflags_out;
-+	}
-+
-+	mutex_lock(&inode->ei_update_lock);
-+	ret = bch2_write_inode(c, inode, bch2_inode_flags_set, &s,
-+			       ATTR_CTIME);
-+	mutex_unlock(&inode->ei_update_lock);
-+
-+setflags_out:
-+	inode_unlock(&inode->v);
-+	mnt_drop_write_file(file);
-+	return ret;
-+}
-+
-+static int bch2_ioc_fsgetxattr(struct bch_inode_info *inode,
-+			       struct fsxattr __user *arg)
-+{
-+	struct fsxattr fa = { 0 };
-+
-+	fa.fsx_xflags = map_flags(bch_flags_to_xflags, inode->ei_inode.bi_flags);
-+	fa.fsx_projid = inode->ei_qid.q[QTYP_PRJ];
-+
-+	return copy_to_user(arg, &fa, sizeof(fa));
-+}
-+
-+static int fssetxattr_inode_update_fn(struct bch_inode_info *inode,
-+				      struct bch_inode_unpacked *bi,
-+				      void *p)
-+{
-+	struct flags_set *s = p;
-+
-+	if (s->projid != bi->bi_project) {
-+		bi->bi_fields_set |= 1U << Inode_opt_project;
-+		bi->bi_project = s->projid;
-+	}
-+
-+	return bch2_inode_flags_set(inode, bi, p);
-+}
-+
-+static int bch2_ioc_fssetxattr(struct bch_fs *c,
-+			       struct file *file,
-+			       struct bch_inode_info *inode,
-+			       struct fsxattr __user *arg)
-+{
-+	struct flags_set s = { .mask = map_defined(bch_flags_to_xflags) };
-+	struct fsxattr fa;
-+	int ret;
-+
-+	if (copy_from_user(&fa, arg, sizeof(fa)))
-+		return -EFAULT;
-+
-+	s.flags = map_flags_rev(bch_flags_to_xflags, fa.fsx_xflags);
-+	if (fa.fsx_xflags)
-+		return -EOPNOTSUPP;
-+
-+	if (fa.fsx_projid >= U32_MAX)
-+		return -EINVAL;
-+
-+	/*
-+	 * inode fields accessible via the xattr interface are stored with a +1
-+	 * bias, so that 0 means unset:
-+	 */
-+	s.projid = fa.fsx_projid + 1;
-+
-+	ret = mnt_want_write_file(file);
-+	if (ret)
-+		return ret;
-+
-+	inode_lock(&inode->v);
-+	if (!inode_owner_or_capable(&inode->v)) {
-+		ret = -EACCES;
-+		goto err;
-+	}
-+
-+	mutex_lock(&inode->ei_update_lock);
-+	ret = bch2_set_projid(c, inode, fa.fsx_projid);
-+	if (ret)
-+		goto err_unlock;
-+
-+	ret = bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s,
-+			       ATTR_CTIME);
-+err_unlock:
-+	mutex_unlock(&inode->ei_update_lock);
-+err:
-+	inode_unlock(&inode->v);
-+	mnt_drop_write_file(file);
-+	return ret;
-+}
-+
-+static int bch2_reinherit_attrs_fn(struct bch_inode_info *inode,
-+				   struct bch_inode_unpacked *bi,
-+				   void *p)
-+{
-+	struct bch_inode_info *dir = p;
-+
-+	return !bch2_reinherit_attrs(bi, &dir->ei_inode);
-+}
-+
-+static int bch2_ioc_reinherit_attrs(struct bch_fs *c,
-+				    struct file *file,
-+				    struct bch_inode_info *src,
-+				    const char __user *name)
-+{
-+	struct bch_inode_info *dst;
-+	struct inode *vinode = NULL;
-+	char *kname = NULL;
-+	struct qstr qstr;
-+	int ret = 0;
-+	u64 inum;
-+
-+	kname = kmalloc(BCH_NAME_MAX + 1, GFP_KERNEL);
-+	if (!kname)
-+		return -ENOMEM;
-+
-+	ret = strncpy_from_user(kname, name, BCH_NAME_MAX);
-+	if (unlikely(ret < 0))
-+		goto err1;
-+
-+	qstr.len	= ret;
-+	qstr.name	= kname;
-+
-+	ret = -ENOENT;
-+	inum = bch2_dirent_lookup(c, src->v.i_ino,
-+				  &src->ei_str_hash,
-+				  &qstr);
-+	if (!inum)
-+		goto err1;
-+
-+	vinode = bch2_vfs_inode_get(c, inum);
-+	ret = PTR_ERR_OR_ZERO(vinode);
-+	if (ret)
-+		goto err1;
-+
-+	dst = to_bch_ei(vinode);
-+
-+	ret = mnt_want_write_file(file);
-+	if (ret)
-+		goto err2;
-+
-+	bch2_lock_inodes(INODE_UPDATE_LOCK, src, dst);
-+
-+	if (inode_attr_changing(src, dst, Inode_opt_project)) {
-+		ret = bch2_fs_quota_transfer(c, dst,
-+					     src->ei_qid,
-+					     1 << QTYP_PRJ,
-+					     KEY_TYPE_QUOTA_PREALLOC);
-+		if (ret)
-+			goto err3;
-+	}
-+
-+	ret = bch2_write_inode(c, dst, bch2_reinherit_attrs_fn, src, 0);
-+err3:
-+	bch2_unlock_inodes(INODE_UPDATE_LOCK, src, dst);
-+
-+	/* return true if we did work */
-+	if (ret >= 0)
-+		ret = !ret;
-+
-+	mnt_drop_write_file(file);
-+err2:
-+	iput(vinode);
-+err1:
-+	kfree(kname);
-+
-+	return ret;
-+}
-+
-+long bch2_fs_file_ioctl(struct file *file, unsigned cmd, unsigned long arg)
-+{
-+	struct bch_inode_info *inode = file_bch_inode(file);
-+	struct super_block *sb = inode->v.i_sb;
-+	struct bch_fs *c = sb->s_fs_info;
-+
-+	switch (cmd) {
-+	case FS_IOC_GETFLAGS:
-+		return bch2_ioc_getflags(inode, (int __user *) arg);
-+
-+	case FS_IOC_SETFLAGS:
-+		return bch2_ioc_setflags(c, file, inode, (int __user *) arg);
-+
-+	case FS_IOC_FSGETXATTR:
-+		return bch2_ioc_fsgetxattr(inode, (void __user *) arg);
-+	case FS_IOC_FSSETXATTR:
-+		return bch2_ioc_fssetxattr(c, file, inode,
-+					   (void __user *) arg);
-+
-+	case BCHFS_IOC_REINHERIT_ATTRS:
-+		return bch2_ioc_reinherit_attrs(c, file, inode,
-+						(void __user *) arg);
-+
-+	case FS_IOC_GETVERSION:
-+		return -ENOTTY;
-+	case FS_IOC_SETVERSION:
-+		return -ENOTTY;
-+
-+	case FS_IOC_GOINGDOWN:
-+		if (!capable(CAP_SYS_ADMIN))
-+			return -EPERM;
-+
-+		down_write(&sb->s_umount);
-+		sb->s_flags |= SB_RDONLY;
-+		if (bch2_fs_emergency_read_only(c))
-+			bch_err(c, "emergency read only due to ioctl");
-+		up_write(&sb->s_umount);
-+		return 0;
-+
-+	default:
-+		return bch2_fs_ioctl(c, cmd, (void __user *) arg);
-+	}
-+}
-+
-+#ifdef CONFIG_COMPAT
-+long bch2_compat_fs_ioctl(struct file *file, unsigned cmd, unsigned long arg)
-+{
-+	/* These are just misnamed, they actually get/put from/to user an int */
-+	switch (cmd) {
-+	case FS_IOC_GETFLAGS:
-+		cmd = FS_IOC_GETFLAGS;
-+		break;
-+	case FS_IOC32_SETFLAGS:
-+		cmd = FS_IOC_SETFLAGS;
-+		break;
-+	default:
-+		return -ENOIOCTLCMD;
-+	}
-+	return bch2_fs_file_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
-+}
-+#endif
-+
-+#endif /* NO_BCACHEFS_FS */
-diff --git a/fs/bcachefs/fs-ioctl.h b/fs/bcachefs/fs-ioctl.h
-new file mode 100644
-index 000000000000..f201980ef2c3
---- /dev/null
-+++ b/fs/bcachefs/fs-ioctl.h
-@@ -0,0 +1,81 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_FS_IOCTL_H
-+#define _BCACHEFS_FS_IOCTL_H
-+
-+/* Inode flags: */
-+
-+/* bcachefs inode flags -> vfs inode flags: */
-+static const unsigned bch_flags_to_vfs[] = {
-+	[__BCH_INODE_SYNC]	= S_SYNC,
-+	[__BCH_INODE_IMMUTABLE]	= S_IMMUTABLE,
-+	[__BCH_INODE_APPEND]	= S_APPEND,
-+	[__BCH_INODE_NOATIME]	= S_NOATIME,
-+};
-+
-+/* bcachefs inode flags -> FS_IOC_GETFLAGS: */
-+static const unsigned bch_flags_to_uflags[] = {
-+	[__BCH_INODE_SYNC]	= FS_SYNC_FL,
-+	[__BCH_INODE_IMMUTABLE]	= FS_IMMUTABLE_FL,
-+	[__BCH_INODE_APPEND]	= FS_APPEND_FL,
-+	[__BCH_INODE_NODUMP]	= FS_NODUMP_FL,
-+	[__BCH_INODE_NOATIME]	= FS_NOATIME_FL,
-+};
-+
-+/* bcachefs inode flags -> FS_IOC_FSGETXATTR: */
-+static const unsigned bch_flags_to_xflags[] = {
-+	[__BCH_INODE_SYNC]	= FS_XFLAG_SYNC,
-+	[__BCH_INODE_IMMUTABLE]	= FS_XFLAG_IMMUTABLE,
-+	[__BCH_INODE_APPEND]	= FS_XFLAG_APPEND,
-+	[__BCH_INODE_NODUMP]	= FS_XFLAG_NODUMP,
-+	[__BCH_INODE_NOATIME]	= FS_XFLAG_NOATIME,
-+	//[__BCH_INODE_PROJINHERIT] = FS_XFLAG_PROJINHERIT;
-+};
-+
-+#define set_flags(_map, _in, _out)					\
-+do {									\
-+	unsigned _i;							\
-+									\
-+	for (_i = 0; _i < ARRAY_SIZE(_map); _i++)			\
-+		if ((_in) & (1 << _i))					\
-+			(_out) |= _map[_i];				\
-+		else							\
-+			(_out) &= ~_map[_i];				\
-+} while (0)
-+
-+#define map_flags(_map, _in)						\
-+({									\
-+	unsigned _out = 0;						\
-+									\
-+	set_flags(_map, _in, _out);					\
-+	_out;								\
-+})
-+
-+#define map_flags_rev(_map, _in)					\
-+({									\
-+	unsigned _i, _out = 0;						\
-+									\
-+	for (_i = 0; _i < ARRAY_SIZE(_map); _i++)			\
-+		if ((_in) & _map[_i]) {					\
-+			(_out) |= 1 << _i;				\
-+			(_in) &= ~_map[_i];				\
-+		}							\
-+	(_out);								\
-+})
-+
-+#define map_defined(_map)						\
-+({									\
-+	unsigned _in = ~0;						\
-+									\
-+	map_flags_rev(_map, _in);					\
-+})
-+
-+/* Set VFS inode flags from bcachefs inode: */
-+static inline void bch2_inode_flags_to_vfs(struct bch_inode_info *inode)
-+{
-+	set_flags(bch_flags_to_vfs, inode->ei_inode.bi_flags, inode->v.i_flags);
-+}
-+
-+long bch2_fs_file_ioctl(struct file *, unsigned, unsigned long);
-+long bch2_compat_fs_ioctl(struct file *, unsigned, unsigned long);
-+
-+#endif /* _BCACHEFS_FS_IOCTL_H */
-diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
-new file mode 100644
-index 000000000000..6a9820e83db7
---- /dev/null
-+++ b/fs/bcachefs/fs.c
-@@ -0,0 +1,1614 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#ifndef NO_BCACHEFS_FS
-+
-+#include "bcachefs.h"
-+#include "acl.h"
-+#include "bkey_on_stack.h"
-+#include "btree_update.h"
-+#include "buckets.h"
-+#include "chardev.h"
-+#include "dirent.h"
-+#include "extents.h"
-+#include "fs.h"
-+#include "fs-common.h"
-+#include "fs-io.h"
-+#include "fs-ioctl.h"
-+#include "fsck.h"
-+#include "inode.h"
-+#include "io.h"
-+#include "journal.h"
-+#include "keylist.h"
-+#include "quota.h"
-+#include "super.h"
-+#include "xattr.h"
-+
-+#include <linux/aio.h>
-+#include <linux/backing-dev.h>
-+#include <linux/exportfs.h>
-+#include <linux/fiemap.h>
-+#include <linux/module.h>
-+#include <linux/posix_acl.h>
-+#include <linux/random.h>
-+#include <linux/statfs.h>
-+#include <linux/xattr.h>
-+
-+static struct kmem_cache *bch2_inode_cache;
-+
-+static void bch2_vfs_inode_init(struct bch_fs *,
-+				struct bch_inode_info *,
-+				struct bch_inode_unpacked *);
-+
-+static void journal_seq_copy(struct bch_fs *c,
-+			     struct bch_inode_info *dst,
-+			     u64 journal_seq)
-+{
-+	u64 old, v = READ_ONCE(dst->ei_journal_seq);
-+
-+	do {
-+		old = v;
-+
-+		if (old >= journal_seq)
-+			break;
-+	} while ((v = cmpxchg(&dst->ei_journal_seq, old, journal_seq)) != old);
-+
-+	bch2_journal_set_has_inum(&c->journal, dst->v.i_ino, journal_seq);
-+}
-+
-+static void __pagecache_lock_put(struct pagecache_lock *lock, long i)
-+{
-+	BUG_ON(atomic_long_read(&lock->v) == 0);
-+
-+	if (atomic_long_sub_return_release(i, &lock->v) == 0)
-+		wake_up_all(&lock->wait);
-+}
-+
-+static bool __pagecache_lock_tryget(struct pagecache_lock *lock, long i)
-+{
-+	long v = atomic_long_read(&lock->v), old;
-+
-+	do {
-+		old = v;
-+
-+		if (i > 0 ? v < 0 : v > 0)
-+			return false;
-+	} while ((v = atomic_long_cmpxchg_acquire(&lock->v,
-+					old, old + i)) != old);
-+	return true;
-+}
-+
-+static void __pagecache_lock_get(struct pagecache_lock *lock, long i)
-+{
-+	wait_event(lock->wait, __pagecache_lock_tryget(lock, i));
-+}
-+
-+void bch2_pagecache_add_put(struct pagecache_lock *lock)
-+{
-+	__pagecache_lock_put(lock, 1);
-+}
-+
-+void bch2_pagecache_add_get(struct pagecache_lock *lock)
-+{
-+	__pagecache_lock_get(lock, 1);
-+}
-+
-+void bch2_pagecache_block_put(struct pagecache_lock *lock)
-+{
-+	__pagecache_lock_put(lock, -1);
-+}
-+
-+void bch2_pagecache_block_get(struct pagecache_lock *lock)
-+{
-+	__pagecache_lock_get(lock, -1);
-+}
-+
-+void bch2_inode_update_after_write(struct bch_fs *c,
-+				   struct bch_inode_info *inode,
-+				   struct bch_inode_unpacked *bi,
-+				   unsigned fields)
-+{
-+	set_nlink(&inode->v, bch2_inode_nlink_get(bi));
-+	i_uid_write(&inode->v, bi->bi_uid);
-+	i_gid_write(&inode->v, bi->bi_gid);
-+	inode->v.i_mode	= bi->bi_mode;
-+
-+	if (fields & ATTR_ATIME)
-+		inode->v.i_atime = bch2_time_to_timespec(c, bi->bi_atime);
-+	if (fields & ATTR_MTIME)
-+		inode->v.i_mtime = bch2_time_to_timespec(c, bi->bi_mtime);
-+	if (fields & ATTR_CTIME)
-+		inode->v.i_ctime = bch2_time_to_timespec(c, bi->bi_ctime);
-+
-+	inode->ei_inode		= *bi;
-+
-+	bch2_inode_flags_to_vfs(inode);
-+}
-+
-+int __must_check bch2_write_inode(struct bch_fs *c,
-+				  struct bch_inode_info *inode,
-+				  inode_set_fn set,
-+				  void *p, unsigned fields)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bch_inode_unpacked inode_u;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+retry:
-+	bch2_trans_begin(&trans);
-+
-+	iter = bch2_inode_peek(&trans, &inode_u, inode->v.i_ino,
-+			       BTREE_ITER_INTENT);
-+	ret   = PTR_ERR_OR_ZERO(iter) ?:
-+		(set ? set(inode, &inode_u, p) : 0) ?:
-+		bch2_inode_write(&trans, iter, &inode_u) ?:
-+		bch2_trans_commit(&trans, NULL,
-+				  &inode->ei_journal_seq,
-+				  BTREE_INSERT_NOUNLOCK|
-+				  BTREE_INSERT_NOFAIL);
-+
-+	/*
-+	 * the btree node lock protects inode->ei_inode, not ei_update_lock;
-+	 * this is important for inode updates via bchfs_write_index_update
-+	 */
-+	if (!ret)
-+		bch2_inode_update_after_write(c, inode, &inode_u, fields);
-+
-+	bch2_trans_iter_put(&trans, iter);
-+
-+	if (ret == -EINTR)
-+		goto retry;
-+
-+	bch2_trans_exit(&trans);
-+	return ret < 0 ? ret : 0;
-+}
-+
-+int bch2_fs_quota_transfer(struct bch_fs *c,
-+			   struct bch_inode_info *inode,
-+			   struct bch_qid new_qid,
-+			   unsigned qtypes,
-+			   enum quota_acct_mode mode)
-+{
-+	unsigned i;
-+	int ret;
-+
-+	qtypes &= enabled_qtypes(c);
-+
-+	for (i = 0; i < QTYP_NR; i++)
-+		if (new_qid.q[i] == inode->ei_qid.q[i])
-+			qtypes &= ~(1U << i);
-+
-+	if (!qtypes)
-+		return 0;
-+
-+	mutex_lock(&inode->ei_quota_lock);
-+
-+	ret = bch2_quota_transfer(c, qtypes, new_qid,
-+				  inode->ei_qid,
-+				  inode->v.i_blocks +
-+				  inode->ei_quota_reserved,
-+				  mode);
-+	if (!ret)
-+		for (i = 0; i < QTYP_NR; i++)
-+			if (qtypes & (1 << i))
-+				inode->ei_qid.q[i] = new_qid.q[i];
-+
-+	mutex_unlock(&inode->ei_quota_lock);
-+
-+	return ret;
-+}
-+
-+struct inode *bch2_vfs_inode_get(struct bch_fs *c, u64 inum)
-+{
-+	struct bch_inode_unpacked inode_u;
-+	struct bch_inode_info *inode;
-+	int ret;
-+
-+	inode = to_bch_ei(iget_locked(c->vfs_sb, inum));
-+	if (unlikely(!inode))
-+		return ERR_PTR(-ENOMEM);
-+	if (!(inode->v.i_state & I_NEW))
-+		return &inode->v;
-+
-+	ret = bch2_inode_find_by_inum(c, inum, &inode_u);
-+	if (ret) {
-+		iget_failed(&inode->v);
-+		return ERR_PTR(ret);
-+	}
-+
-+	bch2_vfs_inode_init(c, inode, &inode_u);
-+
-+	inode->ei_journal_seq = bch2_inode_journal_seq(&c->journal, inum);
-+
-+	unlock_new_inode(&inode->v);
-+
-+	return &inode->v;
-+}
-+
-+static struct bch_inode_info *
-+__bch2_create(struct bch_inode_info *dir, struct dentry *dentry,
-+	      umode_t mode, dev_t rdev, bool tmpfile)
-+{
-+	struct bch_fs *c = dir->v.i_sb->s_fs_info;
-+	struct user_namespace *ns = dir->v.i_sb->s_user_ns;
-+	struct btree_trans trans;
-+	struct bch_inode_unpacked dir_u;
-+	struct bch_inode_info *inode, *old;
-+	struct bch_inode_unpacked inode_u;
-+	struct posix_acl *default_acl = NULL, *acl = NULL;
-+	u64 journal_seq = 0;
-+	int ret;
-+
-+	/*
-+	 * preallocate acls + vfs inode before btree transaction, so that
-+	 * nothing can fail after the transaction succeeds:
-+	 */
-+#ifdef CONFIG_BCACHEFS_POSIX_ACL
-+	ret = posix_acl_create(&dir->v, &mode, &default_acl, &acl);
-+	if (ret)
-+		return ERR_PTR(ret);
-+#endif
-+	inode = to_bch_ei(new_inode(c->vfs_sb));
-+	if (unlikely(!inode)) {
-+		inode = ERR_PTR(-ENOMEM);
-+		goto err;
-+	}
-+
-+	bch2_inode_init_early(c, &inode_u);
-+
-+	if (!tmpfile)
-+		mutex_lock(&dir->ei_update_lock);
-+
-+	bch2_trans_init(&trans, c, 8, 1024);
-+retry:
-+	bch2_trans_begin(&trans);
-+
-+	ret   = bch2_create_trans(&trans, dir->v.i_ino, &dir_u, &inode_u,
-+				  !tmpfile ? &dentry->d_name : NULL,
-+				  from_kuid(ns, current_fsuid()),
-+				  from_kgid(ns, current_fsgid()),
-+				  mode, rdev,
-+				  default_acl, acl) ?:
-+		bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1,
-+				KEY_TYPE_QUOTA_PREALLOC);
-+	if (unlikely(ret))
-+		goto err_before_quota;
-+
-+	ret   = bch2_trans_commit(&trans, NULL, &journal_seq,
-+				  BTREE_INSERT_NOUNLOCK);
-+	if (unlikely(ret)) {
-+		bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1,
-+				KEY_TYPE_QUOTA_WARN);
-+err_before_quota:
-+		if (ret == -EINTR)
-+			goto retry;
-+		goto err_trans;
-+	}
-+
-+	if (!tmpfile) {
-+		bch2_inode_update_after_write(c, dir, &dir_u,
-+					      ATTR_MTIME|ATTR_CTIME);
-+		journal_seq_copy(c, dir, journal_seq);
-+		mutex_unlock(&dir->ei_update_lock);
-+	}
-+
-+	bch2_vfs_inode_init(c, inode, &inode_u);
-+	journal_seq_copy(c, inode, journal_seq);
-+
-+	set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl);
-+	set_cached_acl(&inode->v, ACL_TYPE_DEFAULT, default_acl);
-+
-+	/*
-+	 * we must insert the new inode into the inode cache before calling
-+	 * bch2_trans_exit() and dropping locks, else we could race with another
-+	 * thread pulling the inode in and modifying it:
-+	 */
-+
-+	old = to_bch_ei(insert_inode_locked2(&inode->v));
-+	if (unlikely(old)) {
-+		/*
-+		 * We raced, another process pulled the new inode into cache
-+		 * before us:
-+		 */
-+		journal_seq_copy(c, old, journal_seq);
-+		make_bad_inode(&inode->v);
-+		iput(&inode->v);
-+
-+		inode = old;
-+	} else {
-+		/*
-+		 * we really don't want insert_inode_locked2() to be setting
-+		 * I_NEW...
-+		 */
-+		unlock_new_inode(&inode->v);
-+	}
-+
-+	bch2_trans_exit(&trans);
-+err:
-+	posix_acl_release(default_acl);
-+	posix_acl_release(acl);
-+	return inode;
-+err_trans:
-+	if (!tmpfile)
-+		mutex_unlock(&dir->ei_update_lock);
-+
-+	bch2_trans_exit(&trans);
-+	make_bad_inode(&inode->v);
-+	iput(&inode->v);
-+	inode = ERR_PTR(ret);
-+	goto err;
-+}
-+
-+/* methods */
-+
-+static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry,
-+				  unsigned int flags)
-+{
-+	struct bch_fs *c = vdir->i_sb->s_fs_info;
-+	struct bch_inode_info *dir = to_bch_ei(vdir);
-+	struct inode *vinode = NULL;
-+	u64 inum;
-+
-+	inum = bch2_dirent_lookup(c, dir->v.i_ino,
-+				  &dir->ei_str_hash,
-+				  &dentry->d_name);
-+
-+	if (inum)
-+		vinode = bch2_vfs_inode_get(c, inum);
-+
-+	return d_splice_alias(vinode, dentry);
-+}
-+
-+static int bch2_mknod(struct inode *vdir, struct dentry *dentry,
-+		      umode_t mode, dev_t rdev)
-+{
-+	struct bch_inode_info *inode =
-+		__bch2_create(to_bch_ei(vdir), dentry, mode, rdev, false);
-+
-+	if (IS_ERR(inode))
-+		return PTR_ERR(inode);
-+
-+	d_instantiate(dentry, &inode->v);
-+	return 0;
-+}
-+
-+static int bch2_create(struct inode *vdir, struct dentry *dentry,
-+		       umode_t mode, bool excl)
-+{
-+	return bch2_mknod(vdir, dentry, mode|S_IFREG, 0);
-+}
-+
-+static int __bch2_link(struct bch_fs *c,
-+		       struct bch_inode_info *inode,
-+		       struct bch_inode_info *dir,
-+		       struct dentry *dentry)
-+{
-+	struct btree_trans trans;
-+	struct bch_inode_unpacked dir_u, inode_u;
-+	int ret;
-+
-+	mutex_lock(&inode->ei_update_lock);
-+	bch2_trans_init(&trans, c, 4, 1024);
-+
-+	do {
-+		bch2_trans_begin(&trans);
-+		ret   = bch2_link_trans(&trans,
-+					dir->v.i_ino,
-+					inode->v.i_ino, &dir_u, &inode_u,
-+					&dentry->d_name) ?:
-+			bch2_trans_commit(&trans, NULL,
-+					&inode->ei_journal_seq,
-+					BTREE_INSERT_NOUNLOCK);
-+	} while (ret == -EINTR);
-+
-+	if (likely(!ret)) {
-+		BUG_ON(inode_u.bi_inum != inode->v.i_ino);
-+
-+		journal_seq_copy(c, inode, dir->ei_journal_seq);
-+		bch2_inode_update_after_write(c, dir, &dir_u,
-+					      ATTR_MTIME|ATTR_CTIME);
-+		bch2_inode_update_after_write(c, inode, &inode_u, ATTR_CTIME);
-+	}
-+
-+	bch2_trans_exit(&trans);
-+	mutex_unlock(&inode->ei_update_lock);
-+	return ret;
-+}
-+
-+static int bch2_link(struct dentry *old_dentry, struct inode *vdir,
-+		     struct dentry *dentry)
-+{
-+	struct bch_fs *c = vdir->i_sb->s_fs_info;
-+	struct bch_inode_info *dir = to_bch_ei(vdir);
-+	struct bch_inode_info *inode = to_bch_ei(old_dentry->d_inode);
-+	int ret;
-+
-+	lockdep_assert_held(&inode->v.i_rwsem);
-+
-+	ret = __bch2_link(c, inode, dir, dentry);
-+	if (unlikely(ret))
-+		return ret;
-+
-+	ihold(&inode->v);
-+	d_instantiate(dentry, &inode->v);
-+	return 0;
-+}
-+
-+static int bch2_unlink(struct inode *vdir, struct dentry *dentry)
-+{
-+	struct bch_fs *c = vdir->i_sb->s_fs_info;
-+	struct bch_inode_info *dir = to_bch_ei(vdir);
-+	struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
-+	struct bch_inode_unpacked dir_u, inode_u;
-+	struct btree_trans trans;
-+	int ret;
-+
-+	bch2_lock_inodes(INODE_UPDATE_LOCK, dir, inode);
-+	bch2_trans_init(&trans, c, 4, 1024);
-+
-+	do {
-+		bch2_trans_begin(&trans);
-+
-+		ret   = bch2_unlink_trans(&trans,
-+					  dir->v.i_ino, &dir_u,
-+					  &inode_u, &dentry->d_name) ?:
-+			bch2_trans_commit(&trans, NULL,
-+					  &dir->ei_journal_seq,
-+					  BTREE_INSERT_NOUNLOCK|
-+					  BTREE_INSERT_NOFAIL);
-+	} while (ret == -EINTR);
-+
-+	if (likely(!ret)) {
-+		BUG_ON(inode_u.bi_inum != inode->v.i_ino);
-+
-+		journal_seq_copy(c, inode, dir->ei_journal_seq);
-+		bch2_inode_update_after_write(c, dir, &dir_u,
-+					      ATTR_MTIME|ATTR_CTIME);
-+		bch2_inode_update_after_write(c, inode, &inode_u,
-+					      ATTR_MTIME);
-+	}
-+
-+	bch2_trans_exit(&trans);
-+	bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode);
-+
-+	return ret;
-+}
-+
-+static int bch2_symlink(struct inode *vdir, struct dentry *dentry,
-+			const char *symname)
-+{
-+	struct bch_fs *c = vdir->i_sb->s_fs_info;
-+	struct bch_inode_info *dir = to_bch_ei(vdir), *inode;
-+	int ret;
-+
-+	inode = __bch2_create(dir, dentry, S_IFLNK|S_IRWXUGO, 0, true);
-+	if (unlikely(IS_ERR(inode)))
-+		return PTR_ERR(inode);
-+
-+	inode_lock(&inode->v);
-+	ret = page_symlink(&inode->v, symname, strlen(symname) + 1);
-+	inode_unlock(&inode->v);
-+
-+	if (unlikely(ret))
-+		goto err;
-+
-+	ret = filemap_write_and_wait_range(inode->v.i_mapping, 0, LLONG_MAX);
-+	if (unlikely(ret))
-+		goto err;
-+
-+	journal_seq_copy(c, dir, inode->ei_journal_seq);
-+
-+	ret = __bch2_link(c, inode, dir, dentry);
-+	if (unlikely(ret))
-+		goto err;
-+
-+	d_instantiate(dentry, &inode->v);
-+	return 0;
-+err:
-+	iput(&inode->v);
-+	return ret;
-+}
-+
-+static int bch2_mkdir(struct inode *vdir, struct dentry *dentry, umode_t mode)
-+{
-+	return bch2_mknod(vdir, dentry, mode|S_IFDIR, 0);
-+}
-+
-+static int bch2_rename2(struct inode *src_vdir, struct dentry *src_dentry,
-+			struct inode *dst_vdir, struct dentry *dst_dentry,
-+			unsigned flags)
-+{
-+	struct bch_fs *c = src_vdir->i_sb->s_fs_info;
-+	struct bch_inode_info *src_dir = to_bch_ei(src_vdir);
-+	struct bch_inode_info *dst_dir = to_bch_ei(dst_vdir);
-+	struct bch_inode_info *src_inode = to_bch_ei(src_dentry->d_inode);
-+	struct bch_inode_info *dst_inode = to_bch_ei(dst_dentry->d_inode);
-+	struct bch_inode_unpacked dst_dir_u, src_dir_u;
-+	struct bch_inode_unpacked src_inode_u, dst_inode_u;
-+	struct btree_trans trans;
-+	enum bch_rename_mode mode = flags & RENAME_EXCHANGE
-+		? BCH_RENAME_EXCHANGE
-+		: dst_dentry->d_inode
-+		? BCH_RENAME_OVERWRITE : BCH_RENAME;
-+	u64 journal_seq = 0;
-+	int ret;
-+
-+	if (flags & ~(RENAME_NOREPLACE|RENAME_EXCHANGE))
-+		return -EINVAL;
-+
-+	if (mode == BCH_RENAME_OVERWRITE) {
-+		ret = filemap_write_and_wait_range(src_inode->v.i_mapping,
-+						   0, LLONG_MAX);
-+		if (ret)
-+			return ret;
-+	}
-+
-+	bch2_trans_init(&trans, c, 8, 2048);
-+
-+	bch2_lock_inodes(INODE_UPDATE_LOCK,
-+			 src_dir,
-+			 dst_dir,
-+			 src_inode,
-+			 dst_inode);
-+
-+	if (inode_attr_changing(dst_dir, src_inode, Inode_opt_project)) {
-+		ret = bch2_fs_quota_transfer(c, src_inode,
-+					     dst_dir->ei_qid,
-+					     1 << QTYP_PRJ,
-+					     KEY_TYPE_QUOTA_PREALLOC);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	if (mode == BCH_RENAME_EXCHANGE &&
-+	    inode_attr_changing(src_dir, dst_inode, Inode_opt_project)) {
-+		ret = bch2_fs_quota_transfer(c, dst_inode,
-+					     src_dir->ei_qid,
-+					     1 << QTYP_PRJ,
-+					     KEY_TYPE_QUOTA_PREALLOC);
-+		if (ret)
-+			goto err;
-+	}
-+
-+retry:
-+	bch2_trans_begin(&trans);
-+	ret   = bch2_rename_trans(&trans,
-+				  src_dir->v.i_ino, &src_dir_u,
-+				  dst_dir->v.i_ino, &dst_dir_u,
-+				  &src_inode_u,
-+				  &dst_inode_u,
-+				  &src_dentry->d_name,
-+				  &dst_dentry->d_name,
-+				  mode) ?:
-+		bch2_trans_commit(&trans, NULL,
-+				  &journal_seq,
-+				  BTREE_INSERT_NOUNLOCK);
-+	if (ret == -EINTR)
-+		goto retry;
-+	if (unlikely(ret))
-+		goto err;
-+
-+	BUG_ON(src_inode->v.i_ino != src_inode_u.bi_inum);
-+	BUG_ON(dst_inode &&
-+	       dst_inode->v.i_ino != dst_inode_u.bi_inum);
-+
-+	bch2_inode_update_after_write(c, src_dir, &src_dir_u,
-+				      ATTR_MTIME|ATTR_CTIME);
-+	journal_seq_copy(c, src_dir, journal_seq);
-+
-+	if (src_dir != dst_dir) {
-+		bch2_inode_update_after_write(c, dst_dir, &dst_dir_u,
-+					      ATTR_MTIME|ATTR_CTIME);
-+		journal_seq_copy(c, dst_dir, journal_seq);
-+	}
-+
-+	bch2_inode_update_after_write(c, src_inode, &src_inode_u,
-+				      ATTR_CTIME);
-+	journal_seq_copy(c, src_inode, journal_seq);
-+
-+	if (dst_inode) {
-+		bch2_inode_update_after_write(c, dst_inode, &dst_inode_u,
-+					      ATTR_CTIME);
-+		journal_seq_copy(c, dst_inode, journal_seq);
-+	}
-+err:
-+	bch2_trans_exit(&trans);
-+
-+	bch2_fs_quota_transfer(c, src_inode,
-+			       bch_qid(&src_inode->ei_inode),
-+			       1 << QTYP_PRJ,
-+			       KEY_TYPE_QUOTA_NOCHECK);
-+	if (dst_inode)
-+		bch2_fs_quota_transfer(c, dst_inode,
-+				       bch_qid(&dst_inode->ei_inode),
-+				       1 << QTYP_PRJ,
-+				       KEY_TYPE_QUOTA_NOCHECK);
-+
-+	bch2_unlock_inodes(INODE_UPDATE_LOCK,
-+			   src_dir,
-+			   dst_dir,
-+			   src_inode,
-+			   dst_inode);
-+
-+	return ret;
-+}
-+
-+void bch2_setattr_copy(struct bch_inode_info *inode,
-+		       struct bch_inode_unpacked *bi,
-+		       struct iattr *attr)
-+{
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	unsigned int ia_valid = attr->ia_valid;
-+
-+	if (ia_valid & ATTR_UID)
-+		bi->bi_uid = from_kuid(c->vfs_sb->s_user_ns, attr->ia_uid);
-+	if (ia_valid & ATTR_GID)
-+		bi->bi_gid = from_kgid(c->vfs_sb->s_user_ns, attr->ia_gid);
-+
-+	if (ia_valid & ATTR_ATIME)
-+		bi->bi_atime = timespec_to_bch2_time(c, attr->ia_atime);
-+	if (ia_valid & ATTR_MTIME)
-+		bi->bi_mtime = timespec_to_bch2_time(c, attr->ia_mtime);
-+	if (ia_valid & ATTR_CTIME)
-+		bi->bi_ctime = timespec_to_bch2_time(c, attr->ia_ctime);
-+
-+	if (ia_valid & ATTR_MODE) {
-+		umode_t mode = attr->ia_mode;
-+		kgid_t gid = ia_valid & ATTR_GID
-+			? attr->ia_gid
-+			: inode->v.i_gid;
-+
-+		if (!in_group_p(gid) &&
-+		    !capable_wrt_inode_uidgid(&inode->v, CAP_FSETID))
-+			mode &= ~S_ISGID;
-+		bi->bi_mode = mode;
-+	}
-+}
-+
-+static int bch2_setattr_nonsize(struct bch_inode_info *inode,
-+				struct iattr *attr)
-+{
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct bch_qid qid;
-+	struct btree_trans trans;
-+	struct btree_iter *inode_iter;
-+	struct bch_inode_unpacked inode_u;
-+	struct posix_acl *acl = NULL;
-+	int ret;
-+
-+	mutex_lock(&inode->ei_update_lock);
-+
-+	qid = inode->ei_qid;
-+
-+	if (attr->ia_valid & ATTR_UID)
-+		qid.q[QTYP_USR] = from_kuid(&init_user_ns, attr->ia_uid);
-+
-+	if (attr->ia_valid & ATTR_GID)
-+		qid.q[QTYP_GRP] = from_kgid(&init_user_ns, attr->ia_gid);
-+
-+	ret = bch2_fs_quota_transfer(c, inode, qid, ~0,
-+				     KEY_TYPE_QUOTA_PREALLOC);
-+	if (ret)
-+		goto err;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+retry:
-+	bch2_trans_begin(&trans);
-+	kfree(acl);
-+	acl = NULL;
-+
-+	inode_iter = bch2_inode_peek(&trans, &inode_u, inode->v.i_ino,
-+				     BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(inode_iter);
-+	if (ret)
-+		goto btree_err;
-+
-+	bch2_setattr_copy(inode, &inode_u, attr);
-+
-+	if (attr->ia_valid & ATTR_MODE) {
-+		ret = bch2_acl_chmod(&trans, inode, inode_u.bi_mode, &acl);
-+		if (ret)
-+			goto btree_err;
-+	}
-+
-+	ret =   bch2_inode_write(&trans, inode_iter, &inode_u) ?:
-+		bch2_trans_commit(&trans, NULL,
-+				  &inode->ei_journal_seq,
-+				  BTREE_INSERT_NOUNLOCK|
-+				  BTREE_INSERT_NOFAIL);
-+btree_err:
-+	if (ret == -EINTR)
-+		goto retry;
-+	if (unlikely(ret))
-+		goto err_trans;
-+
-+	bch2_inode_update_after_write(c, inode, &inode_u, attr->ia_valid);
-+
-+	if (acl)
-+		set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl);
-+err_trans:
-+	bch2_trans_exit(&trans);
-+err:
-+	mutex_unlock(&inode->ei_update_lock);
-+
-+	return ret;
-+}
-+
-+static int bch2_getattr(const struct path *path, struct kstat *stat,
-+			u32 request_mask, unsigned query_flags)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(d_inode(path->dentry));
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+
-+	stat->dev	= inode->v.i_sb->s_dev;
-+	stat->ino	= inode->v.i_ino;
-+	stat->mode	= inode->v.i_mode;
-+	stat->nlink	= inode->v.i_nlink;
-+	stat->uid	= inode->v.i_uid;
-+	stat->gid	= inode->v.i_gid;
-+	stat->rdev	= inode->v.i_rdev;
-+	stat->size	= i_size_read(&inode->v);
-+	stat->atime	= inode->v.i_atime;
-+	stat->mtime	= inode->v.i_mtime;
-+	stat->ctime	= inode->v.i_ctime;
-+	stat->blksize	= block_bytes(c);
-+	stat->blocks	= inode->v.i_blocks;
-+
-+	if (request_mask & STATX_BTIME) {
-+		stat->result_mask |= STATX_BTIME;
-+		stat->btime = bch2_time_to_timespec(c, inode->ei_inode.bi_otime);
-+	}
-+
-+	if (inode->ei_inode.bi_flags & BCH_INODE_IMMUTABLE)
-+		stat->attributes |= STATX_ATTR_IMMUTABLE;
-+	stat->attributes_mask	 |= STATX_ATTR_IMMUTABLE;
-+
-+	if (inode->ei_inode.bi_flags & BCH_INODE_APPEND)
-+		stat->attributes |= STATX_ATTR_APPEND;
-+	stat->attributes_mask	 |= STATX_ATTR_APPEND;
-+
-+	if (inode->ei_inode.bi_flags & BCH_INODE_NODUMP)
-+		stat->attributes |= STATX_ATTR_NODUMP;
-+	stat->attributes_mask	 |= STATX_ATTR_NODUMP;
-+
-+	return 0;
-+}
-+
-+static int bch2_setattr(struct dentry *dentry, struct iattr *iattr)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
-+	int ret;
-+
-+	lockdep_assert_held(&inode->v.i_rwsem);
-+
-+	ret = setattr_prepare(dentry, iattr);
-+	if (ret)
-+		return ret;
-+
-+	return iattr->ia_valid & ATTR_SIZE
-+		? bch2_truncate(inode, iattr)
-+		: bch2_setattr_nonsize(inode, iattr);
-+}
-+
-+static int bch2_tmpfile(struct inode *vdir, struct dentry *dentry, umode_t mode)
-+{
-+	struct bch_inode_info *inode =
-+		__bch2_create(to_bch_ei(vdir), dentry, mode, 0, true);
-+
-+	if (IS_ERR(inode))
-+		return PTR_ERR(inode);
-+
-+	d_mark_tmpfile(dentry, &inode->v);
-+	d_instantiate(dentry, &inode->v);
-+	return 0;
-+}
-+
-+static int bch2_fill_extent(struct bch_fs *c,
-+			    struct fiemap_extent_info *info,
-+			    struct bkey_s_c k, unsigned flags)
-+{
-+	if (bkey_extent_is_data(k.k)) {
-+		struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+		const union bch_extent_entry *entry;
-+		struct extent_ptr_decoded p;
-+		int ret;
-+
-+		if (k.k->type == KEY_TYPE_reflink_v)
-+			flags |= FIEMAP_EXTENT_SHARED;
-+
-+		bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
-+			int flags2 = 0;
-+			u64 offset = p.ptr.offset;
-+
-+			if (p.crc.compression_type)
-+				flags2 |= FIEMAP_EXTENT_ENCODED;
-+			else
-+				offset += p.crc.offset;
-+
-+			if ((offset & (c->opts.block_size - 1)) ||
-+			    (k.k->size & (c->opts.block_size - 1)))
-+				flags2 |= FIEMAP_EXTENT_NOT_ALIGNED;
-+
-+			ret = fiemap_fill_next_extent(info,
-+						bkey_start_offset(k.k) << 9,
-+						offset << 9,
-+						k.k->size << 9, flags|flags2);
-+			if (ret)
-+				return ret;
-+		}
-+
-+		return 0;
-+	} else if (k.k->type == KEY_TYPE_reservation) {
-+		return fiemap_fill_next_extent(info,
-+					       bkey_start_offset(k.k) << 9,
-+					       0, k.k->size << 9,
-+					       flags|
-+					       FIEMAP_EXTENT_DELALLOC|
-+					       FIEMAP_EXTENT_UNWRITTEN);
-+	} else {
-+		BUG();
-+	}
-+}
-+
-+static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
-+		       u64 start, u64 len)
-+{
-+	struct bch_fs *c = vinode->i_sb->s_fs_info;
-+	struct bch_inode_info *ei = to_bch_ei(vinode);
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct bkey_on_stack cur, prev;
-+	struct bpos end = POS(ei->v.i_ino, (start + len) >> 9);
-+	unsigned offset_into_extent, sectors;
-+	bool have_extent = false;
-+	int ret = 0;
-+
-+	ret = fiemap_prep(&ei->v, info, start, &len, FIEMAP_FLAG_SYNC);
-+	if (ret)
-+		return ret;
-+
-+	if (start + len < start)
-+		return -EINVAL;
-+
-+	bkey_on_stack_init(&cur);
-+	bkey_on_stack_init(&prev);
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
-+				   POS(ei->v.i_ino, start >> 9), 0);
-+retry:
-+	while ((k = bch2_btree_iter_peek(iter)).k &&
-+	       !(ret = bkey_err(k)) &&
-+	       bkey_cmp(iter->pos, end) < 0) {
-+		if (!bkey_extent_is_data(k.k) &&
-+		    k.k->type != KEY_TYPE_reservation) {
-+			bch2_btree_iter_next(iter);
-+			continue;
-+		}
-+
-+		bkey_on_stack_realloc(&cur, c, k.k->u64s);
-+		bkey_on_stack_realloc(&prev, c, k.k->u64s);
-+		bkey_reassemble(cur.k, k);
-+		k = bkey_i_to_s_c(cur.k);
-+
-+		offset_into_extent	= iter->pos.offset -
-+			bkey_start_offset(k.k);
-+		sectors			= k.k->size - offset_into_extent;
-+
-+		ret = bch2_read_indirect_extent(&trans,
-+					&offset_into_extent, &cur);
-+		if (ret)
-+			break;
-+
-+		sectors = min(sectors, k.k->size - offset_into_extent);
-+
-+		if (offset_into_extent)
-+			bch2_cut_front(POS(k.k->p.inode,
-+					   bkey_start_offset(k.k) +
-+					   offset_into_extent),
-+				       cur.k);
-+		bch2_key_resize(&cur.k->k, sectors);
-+		cur.k->k.p = iter->pos;
-+		cur.k->k.p.offset += cur.k->k.size;
-+
-+		if (have_extent) {
-+			ret = bch2_fill_extent(c, info,
-+					bkey_i_to_s_c(prev.k), 0);
-+			if (ret)
-+				break;
-+		}
-+
-+		bkey_copy(prev.k, cur.k);
-+		have_extent = true;
-+
-+		if (k.k->type == KEY_TYPE_reflink_v)
-+			bch2_btree_iter_set_pos(iter, k.k->p);
-+		else
-+			bch2_btree_iter_next(iter);
-+	}
-+
-+	if (ret == -EINTR)
-+		goto retry;
-+
-+	if (!ret && have_extent)
-+		ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k),
-+				       FIEMAP_EXTENT_LAST);
-+
-+	ret = bch2_trans_exit(&trans) ?: ret;
-+	bkey_on_stack_exit(&cur, c);
-+	bkey_on_stack_exit(&prev, c);
-+	return ret < 0 ? ret : 0;
-+}
-+
-+static const struct vm_operations_struct bch_vm_ops = {
-+	.fault		= bch2_page_fault,
-+	.map_pages	= filemap_map_pages,
-+	.page_mkwrite   = bch2_page_mkwrite,
-+};
-+
-+static int bch2_mmap(struct file *file, struct vm_area_struct *vma)
-+{
-+	file_accessed(file);
-+
-+	vma->vm_ops = &bch_vm_ops;
-+	return 0;
-+}
-+
-+/* Directories: */
-+
-+static loff_t bch2_dir_llseek(struct file *file, loff_t offset, int whence)
-+{
-+	return generic_file_llseek_size(file, offset, whence,
-+					S64_MAX, S64_MAX);
-+}
-+
-+static int bch2_vfs_readdir(struct file *file, struct dir_context *ctx)
-+{
-+	struct bch_inode_info *inode = file_bch_inode(file);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+
-+	if (!dir_emit_dots(file, ctx))
-+		return 0;
-+
-+	return bch2_readdir(c, inode->v.i_ino, ctx);
-+}
-+
-+static const struct file_operations bch_file_operations = {
-+	.llseek		= bch2_llseek,
-+	.read_iter	= bch2_read_iter,
-+	.write_iter	= bch2_write_iter,
-+	.mmap		= bch2_mmap,
-+	.open		= generic_file_open,
-+	.fsync		= bch2_fsync,
-+	.splice_read	= generic_file_splice_read,
-+	/*
-+	 * Broken, on v5.3:
-+	.splice_write	= iter_file_splice_write,
-+	*/
-+	.fallocate	= bch2_fallocate_dispatch,
-+	.unlocked_ioctl = bch2_fs_file_ioctl,
-+#ifdef CONFIG_COMPAT
-+	.compat_ioctl	= bch2_compat_fs_ioctl,
-+#endif
-+	.remap_file_range = bch2_remap_file_range,
-+};
-+
-+static const struct inode_operations bch_file_inode_operations = {
-+	.getattr	= bch2_getattr,
-+	.setattr	= bch2_setattr,
-+	.fiemap		= bch2_fiemap,
-+	.listxattr	= bch2_xattr_list,
-+#ifdef CONFIG_BCACHEFS_POSIX_ACL
-+	.get_acl	= bch2_get_acl,
-+	.set_acl	= bch2_set_acl,
-+#endif
-+};
-+
-+static const struct inode_operations bch_dir_inode_operations = {
-+	.lookup		= bch2_lookup,
-+	.create		= bch2_create,
-+	.link		= bch2_link,
-+	.unlink		= bch2_unlink,
-+	.symlink	= bch2_symlink,
-+	.mkdir		= bch2_mkdir,
-+	.rmdir		= bch2_unlink,
-+	.mknod		= bch2_mknod,
-+	.rename		= bch2_rename2,
-+	.getattr	= bch2_getattr,
-+	.setattr	= bch2_setattr,
-+	.tmpfile	= bch2_tmpfile,
-+	.listxattr	= bch2_xattr_list,
-+#ifdef CONFIG_BCACHEFS_POSIX_ACL
-+	.get_acl	= bch2_get_acl,
-+	.set_acl	= bch2_set_acl,
-+#endif
-+};
-+
-+static const struct file_operations bch_dir_file_operations = {
-+	.llseek		= bch2_dir_llseek,
-+	.read		= generic_read_dir,
-+	.iterate_shared	= bch2_vfs_readdir,
-+	.fsync		= bch2_fsync,
-+	.unlocked_ioctl = bch2_fs_file_ioctl,
-+#ifdef CONFIG_COMPAT
-+	.compat_ioctl	= bch2_compat_fs_ioctl,
-+#endif
-+};
-+
-+static const struct inode_operations bch_symlink_inode_operations = {
-+	.get_link	= page_get_link,
-+	.getattr	= bch2_getattr,
-+	.setattr	= bch2_setattr,
-+	.listxattr	= bch2_xattr_list,
-+#ifdef CONFIG_BCACHEFS_POSIX_ACL
-+	.get_acl	= bch2_get_acl,
-+	.set_acl	= bch2_set_acl,
-+#endif
-+};
-+
-+static const struct inode_operations bch_special_inode_operations = {
-+	.getattr	= bch2_getattr,
-+	.setattr	= bch2_setattr,
-+	.listxattr	= bch2_xattr_list,
-+#ifdef CONFIG_BCACHEFS_POSIX_ACL
-+	.get_acl	= bch2_get_acl,
-+	.set_acl	= bch2_set_acl,
-+#endif
-+};
-+
-+static const struct address_space_operations bch_address_space_operations = {
-+	.writepage	= bch2_writepage,
-+	.readpage	= bch2_readpage,
-+	.writepages	= bch2_writepages,
-+	.readpages	= bch2_readpages,
-+	.set_page_dirty	= __set_page_dirty_nobuffers,
-+	.write_begin	= bch2_write_begin,
-+	.write_end	= bch2_write_end,
-+	.invalidatepage	= bch2_invalidatepage,
-+	.releasepage	= bch2_releasepage,
-+	.direct_IO	= noop_direct_IO,
-+#ifdef CONFIG_MIGRATION
-+	.migratepage	= bch2_migrate_page,
-+#endif
-+	.error_remove_page = generic_error_remove_page,
-+};
-+
-+static struct inode *bch2_nfs_get_inode(struct super_block *sb,
-+		u64 ino, u32 generation)
-+{
-+	struct bch_fs *c = sb->s_fs_info;
-+	struct inode *vinode;
-+
-+	if (ino < BCACHEFS_ROOT_INO)
-+		return ERR_PTR(-ESTALE);
-+
-+	vinode = bch2_vfs_inode_get(c, ino);
-+	if (IS_ERR(vinode))
-+		return ERR_CAST(vinode);
-+	if (generation && vinode->i_generation != generation) {
-+		/* we didn't find the right inode.. */
-+		iput(vinode);
-+		return ERR_PTR(-ESTALE);
-+	}
-+	return vinode;
-+}
-+
-+static struct dentry *bch2_fh_to_dentry(struct super_block *sb, struct fid *fid,
-+		int fh_len, int fh_type)
-+{
-+	return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
-+				    bch2_nfs_get_inode);
-+}
-+
-+static struct dentry *bch2_fh_to_parent(struct super_block *sb, struct fid *fid,
-+		int fh_len, int fh_type)
-+{
-+	return generic_fh_to_parent(sb, fid, fh_len, fh_type,
-+				    bch2_nfs_get_inode);
-+}
-+
-+static const struct export_operations bch_export_ops = {
-+	.fh_to_dentry	= bch2_fh_to_dentry,
-+	.fh_to_parent	= bch2_fh_to_parent,
-+	//.get_parent	= bch2_get_parent,
-+};
-+
-+static void bch2_vfs_inode_init(struct bch_fs *c,
-+				struct bch_inode_info *inode,
-+				struct bch_inode_unpacked *bi)
-+{
-+	bch2_inode_update_after_write(c, inode, bi, ~0);
-+
-+	inode->v.i_blocks	= bi->bi_sectors;
-+	inode->v.i_ino		= bi->bi_inum;
-+	inode->v.i_rdev		= bi->bi_dev;
-+	inode->v.i_generation	= bi->bi_generation;
-+	inode->v.i_size		= bi->bi_size;
-+
-+	inode->ei_journal_seq	= 0;
-+	inode->ei_quota_reserved = 0;
-+	inode->ei_str_hash	= bch2_hash_info_init(c, bi);
-+	inode->ei_qid		= bch_qid(bi);
-+
-+	inode->v.i_mapping->a_ops = &bch_address_space_operations;
-+
-+	switch (inode->v.i_mode & S_IFMT) {
-+	case S_IFREG:
-+		inode->v.i_op	= &bch_file_inode_operations;
-+		inode->v.i_fop	= &bch_file_operations;
-+		break;
-+	case S_IFDIR:
-+		inode->v.i_op	= &bch_dir_inode_operations;
-+		inode->v.i_fop	= &bch_dir_file_operations;
-+		break;
-+	case S_IFLNK:
-+		inode_nohighmem(&inode->v);
-+		inode->v.i_op	= &bch_symlink_inode_operations;
-+		break;
-+	default:
-+		init_special_inode(&inode->v, inode->v.i_mode, inode->v.i_rdev);
-+		inode->v.i_op	= &bch_special_inode_operations;
-+		break;
-+	}
-+}
-+
-+static struct inode *bch2_alloc_inode(struct super_block *sb)
-+{
-+	struct bch_inode_info *inode;
-+
-+	inode = kmem_cache_alloc(bch2_inode_cache, GFP_NOFS);
-+	if (!inode)
-+		return NULL;
-+
-+	inode_init_once(&inode->v);
-+	mutex_init(&inode->ei_update_lock);
-+	pagecache_lock_init(&inode->ei_pagecache_lock);
-+	mutex_init(&inode->ei_quota_lock);
-+	inode->ei_journal_seq = 0;
-+
-+	return &inode->v;
-+}
-+
-+static void bch2_i_callback(struct rcu_head *head)
-+{
-+	struct inode *vinode = container_of(head, struct inode, i_rcu);
-+	struct bch_inode_info *inode = to_bch_ei(vinode);
-+
-+	kmem_cache_free(bch2_inode_cache, inode);
-+}
-+
-+static void bch2_destroy_inode(struct inode *vinode)
-+{
-+	call_rcu(&vinode->i_rcu, bch2_i_callback);
-+}
-+
-+static int inode_update_times_fn(struct bch_inode_info *inode,
-+				 struct bch_inode_unpacked *bi,
-+				 void *p)
-+{
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+
-+	bi->bi_atime	= timespec_to_bch2_time(c, inode->v.i_atime);
-+	bi->bi_mtime	= timespec_to_bch2_time(c, inode->v.i_mtime);
-+	bi->bi_ctime	= timespec_to_bch2_time(c, inode->v.i_ctime);
-+
-+	return 0;
-+}
-+
-+static int bch2_vfs_write_inode(struct inode *vinode,
-+				struct writeback_control *wbc)
-+{
-+	struct bch_fs *c = vinode->i_sb->s_fs_info;
-+	struct bch_inode_info *inode = to_bch_ei(vinode);
-+	int ret;
-+
-+	mutex_lock(&inode->ei_update_lock);
-+	ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL,
-+			       ATTR_ATIME|ATTR_MTIME|ATTR_CTIME);
-+	mutex_unlock(&inode->ei_update_lock);
-+
-+	return ret;
-+}
-+
-+static void bch2_evict_inode(struct inode *vinode)
-+{
-+	struct bch_fs *c = vinode->i_sb->s_fs_info;
-+	struct bch_inode_info *inode = to_bch_ei(vinode);
-+
-+	truncate_inode_pages_final(&inode->v.i_data);
-+
-+	clear_inode(&inode->v);
-+
-+	BUG_ON(!is_bad_inode(&inode->v) && inode->ei_quota_reserved);
-+
-+	if (!inode->v.i_nlink && !is_bad_inode(&inode->v)) {
-+		bch2_quota_acct(c, inode->ei_qid, Q_SPC, -((s64) inode->v.i_blocks),
-+				KEY_TYPE_QUOTA_WARN);
-+		bch2_quota_acct(c, inode->ei_qid, Q_INO, -1,
-+				KEY_TYPE_QUOTA_WARN);
-+		bch2_inode_rm(c, inode->v.i_ino);
-+	}
-+}
-+
-+static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf)
-+{
-+	struct super_block *sb = dentry->d_sb;
-+	struct bch_fs *c = sb->s_fs_info;
-+	struct bch_fs_usage_short usage = bch2_fs_usage_read_short(c);
-+	unsigned shift = sb->s_blocksize_bits - 9;
-+	u64 fsid;
-+
-+	buf->f_type	= BCACHEFS_STATFS_MAGIC;
-+	buf->f_bsize	= sb->s_blocksize;
-+	buf->f_blocks	= usage.capacity >> shift;
-+	buf->f_bfree	= (usage.capacity - usage.used) >> shift;
-+	buf->f_bavail	= buf->f_bfree;
-+	buf->f_files	= 0;
-+	buf->f_ffree	= 0;
-+
-+	fsid = le64_to_cpup((void *) c->sb.user_uuid.b) ^
-+	       le64_to_cpup((void *) c->sb.user_uuid.b + sizeof(u64));
-+	buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
-+	buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
-+	buf->f_namelen	= BCH_NAME_MAX;
-+
-+	return 0;
-+}
-+
-+static int bch2_sync_fs(struct super_block *sb, int wait)
-+{
-+	struct bch_fs *c = sb->s_fs_info;
-+
-+	if (c->opts.journal_flush_disabled)
-+		return 0;
-+
-+	if (!wait) {
-+		bch2_journal_flush_async(&c->journal, NULL);
-+		return 0;
-+	}
-+
-+	return bch2_journal_flush(&c->journal);
-+}
-+
-+static struct bch_fs *bch2_path_to_fs(const char *dev)
-+{
-+	struct bch_fs *c;
-+	struct block_device *bdev = lookup_bdev(dev);
-+
-+	if (IS_ERR(bdev))
-+		return ERR_CAST(bdev);
-+
-+	c = bch2_bdev_to_fs(bdev);
-+	bdput(bdev);
-+	if (c)
-+		closure_put(&c->cl);
-+	return c ?: ERR_PTR(-ENOENT);
-+}
-+
-+static char **split_devs(const char *_dev_name, unsigned *nr)
-+{
-+	char *dev_name = NULL, **devs = NULL, *s;
-+	size_t i, nr_devs = 0;
-+
-+	dev_name = kstrdup(_dev_name, GFP_KERNEL);
-+	if (!dev_name)
-+		return NULL;
-+
-+	for (s = dev_name; s; s = strchr(s + 1, ':'))
-+		nr_devs++;
-+
-+	devs = kcalloc(nr_devs + 1, sizeof(const char *), GFP_KERNEL);
-+	if (!devs) {
-+		kfree(dev_name);
-+		return NULL;
-+	}
-+
-+	for (i = 0, s = dev_name;
-+	     s;
-+	     (s = strchr(s, ':')) && (*s++ = '\0'))
-+		devs[i++] = s;
-+
-+	*nr = nr_devs;
-+	return devs;
-+}
-+
-+static int bch2_remount(struct super_block *sb, int *flags, char *data)
-+{
-+	struct bch_fs *c = sb->s_fs_info;
-+	struct bch_opts opts = bch2_opts_empty();
-+	int ret;
-+
-+	opt_set(opts, read_only, (*flags & SB_RDONLY) != 0);
-+
-+	ret = bch2_parse_mount_opts(&opts, data);
-+	if (ret)
-+		return ret;
-+
-+	if (opts.read_only != c->opts.read_only) {
-+		down_write(&c->state_lock);
-+
-+		if (opts.read_only) {
-+			bch2_fs_read_only(c);
-+
-+			sb->s_flags |= SB_RDONLY;
-+		} else {
-+			ret = bch2_fs_read_write(c);
-+			if (ret) {
-+				bch_err(c, "error going rw: %i", ret);
-+				up_write(&c->state_lock);
-+				return -EINVAL;
-+			}
-+
-+			sb->s_flags &= ~SB_RDONLY;
-+		}
-+
-+		c->opts.read_only = opts.read_only;
-+
-+		up_write(&c->state_lock);
-+	}
-+
-+	if (opts.errors >= 0)
-+		c->opts.errors = opts.errors;
-+
-+	return ret;
-+}
-+
-+static int bch2_show_devname(struct seq_file *seq, struct dentry *root)
-+{
-+	struct bch_fs *c = root->d_sb->s_fs_info;
-+	struct bch_dev *ca;
-+	unsigned i;
-+	bool first = true;
-+
-+	for_each_online_member(ca, c, i) {
-+		if (!first)
-+			seq_putc(seq, ':');
-+		first = false;
-+		seq_puts(seq, "/dev/");
-+		seq_puts(seq, ca->name);
-+	}
-+
-+	return 0;
-+}
-+
-+static int bch2_show_options(struct seq_file *seq, struct dentry *root)
-+{
-+	struct bch_fs *c = root->d_sb->s_fs_info;
-+	enum bch_opt_id i;
-+	char buf[512];
-+
-+	for (i = 0; i < bch2_opts_nr; i++) {
-+		const struct bch_option *opt = &bch2_opt_table[i];
-+		u64 v = bch2_opt_get_by_id(&c->opts, i);
-+
-+		if (!(opt->mode & OPT_MOUNT))
-+			continue;
-+
-+		if (v == bch2_opt_get_by_id(&bch2_opts_default, i))
-+			continue;
-+
-+		bch2_opt_to_text(&PBUF(buf), c, opt, v,
-+				 OPT_SHOW_MOUNT_STYLE);
-+		seq_putc(seq, ',');
-+		seq_puts(seq, buf);
-+	}
-+
-+	return 0;
-+}
-+
-+static void bch2_put_super(struct super_block *sb)
-+{
-+	struct bch_fs *c = sb->s_fs_info;
-+
-+	__bch2_fs_stop(c);
-+}
-+
-+static const struct super_operations bch_super_operations = {
-+	.alloc_inode	= bch2_alloc_inode,
-+	.destroy_inode	= bch2_destroy_inode,
-+	.write_inode	= bch2_vfs_write_inode,
-+	.evict_inode	= bch2_evict_inode,
-+	.sync_fs	= bch2_sync_fs,
-+	.statfs		= bch2_statfs,
-+	.show_devname	= bch2_show_devname,
-+	.show_options	= bch2_show_options,
-+	.remount_fs	= bch2_remount,
-+	.put_super	= bch2_put_super,
-+#if 0
-+	.freeze_fs	= bch2_freeze,
-+	.unfreeze_fs	= bch2_unfreeze,
-+#endif
-+};
-+
-+static int bch2_set_super(struct super_block *s, void *data)
-+{
-+	s->s_fs_info = data;
-+	return 0;
-+}
-+
-+static int bch2_noset_super(struct super_block *s, void *data)
-+{
-+	return -EBUSY;
-+}
-+
-+static int bch2_test_super(struct super_block *s, void *data)
-+{
-+	struct bch_fs *c = s->s_fs_info;
-+	struct bch_fs **devs = data;
-+	unsigned i;
-+
-+	if (!c)
-+		return false;
-+
-+	for (i = 0; devs[i]; i++)
-+		if (c != devs[i])
-+			return false;
-+	return true;
-+}
-+
-+static struct dentry *bch2_mount(struct file_system_type *fs_type,
-+				 int flags, const char *dev_name, void *data)
-+{
-+	struct bch_fs *c;
-+	struct bch_dev *ca;
-+	struct super_block *sb;
-+	struct inode *vinode;
-+	struct bch_opts opts = bch2_opts_empty();
-+	char **devs;
-+	struct bch_fs **devs_to_fs = NULL;
-+	unsigned i, nr_devs;
-+	int ret;
-+
-+	opt_set(opts, read_only, (flags & SB_RDONLY) != 0);
-+
-+	ret = bch2_parse_mount_opts(&opts, data);
-+	if (ret)
-+		return ERR_PTR(ret);
-+
-+	devs = split_devs(dev_name, &nr_devs);
-+	if (!devs)
-+		return ERR_PTR(-ENOMEM);
-+
-+	devs_to_fs = kcalloc(nr_devs + 1, sizeof(void *), GFP_KERNEL);
-+	if (!devs_to_fs) {
-+		sb = ERR_PTR(-ENOMEM);
-+		goto got_sb;
-+	}
-+
-+	for (i = 0; i < nr_devs; i++)
-+		devs_to_fs[i] = bch2_path_to_fs(devs[i]);
-+
-+	sb = sget(fs_type, bch2_test_super, bch2_noset_super,
-+		  flags|SB_NOSEC, devs_to_fs);
-+	if (!IS_ERR(sb))
-+		goto got_sb;
-+
-+	c = bch2_fs_open(devs, nr_devs, opts);
-+
-+	if (!IS_ERR(c))
-+		sb = sget(fs_type, NULL, bch2_set_super, flags|SB_NOSEC, c);
-+	else
-+		sb = ERR_CAST(c);
-+got_sb:
-+	kfree(devs_to_fs);
-+	kfree(devs[0]);
-+	kfree(devs);
-+
-+	if (IS_ERR(sb))
-+		return ERR_CAST(sb);
-+
-+	c = sb->s_fs_info;
-+
-+	if (sb->s_root) {
-+		if ((flags ^ sb->s_flags) & SB_RDONLY) {
-+			ret = -EBUSY;
-+			goto err_put_super;
-+		}
-+		goto out;
-+	}
-+
-+	sb->s_blocksize		= block_bytes(c);
-+	sb->s_blocksize_bits	= ilog2(block_bytes(c));
-+	sb->s_maxbytes		= MAX_LFS_FILESIZE;
-+	sb->s_op		= &bch_super_operations;
-+	sb->s_export_op		= &bch_export_ops;
-+#ifdef CONFIG_BCACHEFS_QUOTA
-+	sb->s_qcop		= &bch2_quotactl_operations;
-+	sb->s_quota_types	= QTYPE_MASK_USR|QTYPE_MASK_GRP|QTYPE_MASK_PRJ;
-+#endif
-+	sb->s_xattr		= bch2_xattr_handlers;
-+	sb->s_magic		= BCACHEFS_STATFS_MAGIC;
-+	sb->s_time_gran		= c->sb.time_precision;
-+	c->vfs_sb		= sb;
-+	strlcpy(sb->s_id, c->name, sizeof(sb->s_id));
-+
-+	ret = super_setup_bdi(sb);
-+	if (ret)
-+		goto err_put_super;
-+
-+	sb->s_bdi->ra_pages		= VM_READAHEAD_PAGES;
-+
-+	for_each_online_member(ca, c, i) {
-+		struct block_device *bdev = ca->disk_sb.bdev;
-+
-+		/* XXX: create an anonymous device for multi device filesystems */
-+		sb->s_bdev	= bdev;
-+		sb->s_dev	= bdev->bd_dev;
-+		percpu_ref_put(&ca->io_ref);
-+		break;
-+	}
-+
-+#ifdef CONFIG_BCACHEFS_POSIX_ACL
-+	if (c->opts.acl)
-+		sb->s_flags	|= SB_POSIXACL;
-+#endif
-+
-+	vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_INO);
-+	if (IS_ERR(vinode)) {
-+		bch_err(c, "error mounting: error getting root inode %i",
-+			(int) PTR_ERR(vinode));
-+		ret = PTR_ERR(vinode);
-+		goto err_put_super;
-+	}
-+
-+	sb->s_root = d_make_root(vinode);
-+	if (!sb->s_root) {
-+		bch_err(c, "error mounting: error allocating root dentry");
-+		ret = -ENOMEM;
-+		goto err_put_super;
-+	}
-+
-+	sb->s_flags |= SB_ACTIVE;
-+out:
-+	return dget(sb->s_root);
-+
-+err_put_super:
-+	deactivate_locked_super(sb);
-+	return ERR_PTR(ret);
-+}
-+
-+static void bch2_kill_sb(struct super_block *sb)
-+{
-+	struct bch_fs *c = sb->s_fs_info;
-+
-+	generic_shutdown_super(sb);
-+	bch2_fs_free(c);
-+}
-+
-+static struct file_system_type bcache_fs_type = {
-+	.owner		= THIS_MODULE,
-+	.name		= "bcachefs",
-+	.mount		= bch2_mount,
-+	.kill_sb	= bch2_kill_sb,
-+	.fs_flags	= FS_REQUIRES_DEV,
-+};
-+
-+MODULE_ALIAS_FS("bcachefs");
-+
-+void bch2_vfs_exit(void)
-+{
-+	unregister_filesystem(&bcache_fs_type);
-+	if (bch2_inode_cache)
-+		kmem_cache_destroy(bch2_inode_cache);
-+}
-+
-+int __init bch2_vfs_init(void)
-+{
-+	int ret = -ENOMEM;
-+
-+	bch2_inode_cache = KMEM_CACHE(bch_inode_info, 0);
-+	if (!bch2_inode_cache)
-+		goto err;
-+
-+	ret = register_filesystem(&bcache_fs_type);
-+	if (ret)
-+		goto err;
-+
-+	return 0;
-+err:
-+	bch2_vfs_exit();
-+	return ret;
-+}
-+
-+#endif /* NO_BCACHEFS_FS */
-diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h
-new file mode 100644
-index 000000000000..eda903a45325
---- /dev/null
-+++ b/fs/bcachefs/fs.h
-@@ -0,0 +1,174 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_FS_H
-+#define _BCACHEFS_FS_H
-+
-+#include "inode.h"
-+#include "opts.h"
-+#include "str_hash.h"
-+#include "quota_types.h"
-+
-+#include <linux/seqlock.h>
-+#include <linux/stat.h>
-+
-+/*
-+ * Two-state lock - can be taken for add or block - both states are shared,
-+ * like read side of rwsem, but conflict with other state:
-+ */
-+struct pagecache_lock {
-+	atomic_long_t		v;
-+	wait_queue_head_t	wait;
-+};
-+
-+static inline void pagecache_lock_init(struct pagecache_lock *lock)
-+{
-+	atomic_long_set(&lock->v, 0);
-+	init_waitqueue_head(&lock->wait);
-+}
-+
-+void bch2_pagecache_add_put(struct pagecache_lock *);
-+void bch2_pagecache_add_get(struct pagecache_lock *);
-+void bch2_pagecache_block_put(struct pagecache_lock *);
-+void bch2_pagecache_block_get(struct pagecache_lock *);
-+
-+struct bch_inode_info {
-+	struct inode		v;
-+
-+	struct mutex		ei_update_lock;
-+	u64			ei_journal_seq;
-+	u64			ei_quota_reserved;
-+	unsigned long		ei_last_dirtied;
-+
-+	struct pagecache_lock	ei_pagecache_lock;
-+
-+	struct mutex		ei_quota_lock;
-+	struct bch_qid		ei_qid;
-+
-+	struct bch_hash_info	ei_str_hash;
-+
-+	/* copy of inode in btree: */
-+	struct bch_inode_unpacked ei_inode;
-+};
-+
-+#define to_bch_ei(_inode)					\
-+	container_of_or_null(_inode, struct bch_inode_info, v)
-+
-+static inline int ptrcmp(void *l, void *r)
-+{
-+	return cmp_int(l, r);
-+}
-+
-+enum bch_inode_lock_op {
-+	INODE_LOCK		= (1U << 0),
-+	INODE_PAGECACHE_BLOCK	= (1U << 1),
-+	INODE_UPDATE_LOCK	= (1U << 2),
-+};
-+
-+#define bch2_lock_inodes(_locks, ...)					\
-+do {									\
-+	struct bch_inode_info *a[] = { NULL, __VA_ARGS__ };		\
-+	unsigned i;							\
-+									\
-+	bubble_sort(&a[1], ARRAY_SIZE(a) - 1, ptrcmp);			\
-+									\
-+	for (i = 1; i < ARRAY_SIZE(a); i++)				\
-+		if (a[i] != a[i - 1]) {					\
-+			if ((_locks) & INODE_LOCK)			\
-+				down_write_nested(&a[i]->v.i_rwsem, i);	\
-+			if ((_locks) & INODE_PAGECACHE_BLOCK)		\
-+				bch2_pagecache_block_get(&a[i]->ei_pagecache_lock);\
-+			if ((_locks) & INODE_UPDATE_LOCK)			\
-+				mutex_lock_nested(&a[i]->ei_update_lock, i);\
-+		}							\
-+} while (0)
-+
-+#define bch2_unlock_inodes(_locks, ...)					\
-+do {									\
-+	struct bch_inode_info *a[] = { NULL, __VA_ARGS__ };		\
-+	unsigned i;							\
-+									\
-+	bubble_sort(&a[1], ARRAY_SIZE(a) - 1, ptrcmp);			\
-+									\
-+	for (i = 1; i < ARRAY_SIZE(a); i++)				\
-+		if (a[i] != a[i - 1]) {					\
-+			if ((_locks) & INODE_LOCK)			\
-+				up_write(&a[i]->v.i_rwsem);		\
-+			if ((_locks) & INODE_PAGECACHE_BLOCK)		\
-+				bch2_pagecache_block_put(&a[i]->ei_pagecache_lock);\
-+			if ((_locks) & INODE_UPDATE_LOCK)			\
-+				mutex_unlock(&a[i]->ei_update_lock);	\
-+		}							\
-+} while (0)
-+
-+static inline struct bch_inode_info *file_bch_inode(struct file *file)
-+{
-+	return to_bch_ei(file_inode(file));
-+}
-+
-+static inline bool inode_attr_changing(struct bch_inode_info *dir,
-+				struct bch_inode_info *inode,
-+				enum inode_opt_id id)
-+{
-+	return !(inode->ei_inode.bi_fields_set & (1 << id)) &&
-+		bch2_inode_opt_get(&dir->ei_inode, id) !=
-+		bch2_inode_opt_get(&inode->ei_inode, id);
-+}
-+
-+static inline bool inode_attrs_changing(struct bch_inode_info *dir,
-+				 struct bch_inode_info *inode)
-+{
-+	unsigned id;
-+
-+	for (id = 0; id < Inode_opt_nr; id++)
-+		if (inode_attr_changing(dir, inode, id))
-+			return true;
-+
-+	return false;
-+}
-+
-+struct bch_inode_unpacked;
-+
-+#ifndef NO_BCACHEFS_FS
-+
-+int bch2_fs_quota_transfer(struct bch_fs *,
-+			   struct bch_inode_info *,
-+			   struct bch_qid,
-+			   unsigned,
-+			   enum quota_acct_mode);
-+
-+static inline int bch2_set_projid(struct bch_fs *c,
-+				  struct bch_inode_info *inode,
-+				  u32 projid)
-+{
-+	struct bch_qid qid = inode->ei_qid;
-+
-+	qid.q[QTYP_PRJ] = projid;
-+
-+	return bch2_fs_quota_transfer(c, inode, qid,
-+				      1 << QTYP_PRJ,
-+				      KEY_TYPE_QUOTA_PREALLOC);
-+}
-+
-+struct inode *bch2_vfs_inode_get(struct bch_fs *, u64);
-+
-+/* returns 0 if we want to do the update, or error is passed up */
-+typedef int (*inode_set_fn)(struct bch_inode_info *,
-+			    struct bch_inode_unpacked *, void *);
-+
-+void bch2_inode_update_after_write(struct bch_fs *,
-+				   struct bch_inode_info *,
-+				   struct bch_inode_unpacked *,
-+				   unsigned);
-+int __must_check bch2_write_inode(struct bch_fs *, struct bch_inode_info *,
-+				  inode_set_fn, void *, unsigned);
-+
-+void bch2_vfs_exit(void);
-+int bch2_vfs_init(void);
-+
-+#else
-+
-+static inline void bch2_vfs_exit(void) {}
-+static inline int bch2_vfs_init(void) { return 0; }
-+
-+#endif /* NO_BCACHEFS_FS */
-+
-+#endif /* _BCACHEFS_FS_H */
-diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
-new file mode 100644
-index 000000000000..5a6df3d1973a
---- /dev/null
-+++ b/fs/bcachefs/fsck.c
-@@ -0,0 +1,1502 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "bkey_on_stack.h"
-+#include "btree_update.h"
-+#include "dirent.h"
-+#include "error.h"
-+#include "fs-common.h"
-+#include "fsck.h"
-+#include "inode.h"
-+#include "keylist.h"
-+#include "super.h"
-+#include "xattr.h"
-+
-+#include <linux/dcache.h> /* struct qstr */
-+#include <linux/generic-radix-tree.h>
-+
-+#define QSTR(n) { { { .len = strlen(n) } }, .name = n }
-+
-+static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum)
-+{
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	u64 sectors = 0;
-+	int ret;
-+
-+	for_each_btree_key(trans, iter, BTREE_ID_EXTENTS,
-+			   POS(inum, 0), 0, k, ret) {
-+		if (k.k->p.inode != inum)
-+			break;
-+
-+		if (bkey_extent_is_allocation(k.k))
-+			sectors += k.k->size;
-+	}
-+
-+	bch2_trans_iter_free(trans, iter);
-+
-+	return ret ?: sectors;
-+}
-+
-+static int __remove_dirent(struct btree_trans *trans,
-+			   struct bkey_s_c_dirent dirent)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct qstr name;
-+	struct bch_inode_unpacked dir_inode;
-+	struct bch_hash_info dir_hash_info;
-+	u64 dir_inum = dirent.k->p.inode;
-+	int ret;
-+	char *buf;
-+
-+	name.len = bch2_dirent_name_bytes(dirent);
-+	buf = bch2_trans_kmalloc(trans, name.len + 1);
-+	if (IS_ERR(buf))
-+		return PTR_ERR(buf);
-+
-+	memcpy(buf, dirent.v->d_name, name.len);
-+	buf[name.len] = '\0';
-+	name.name = buf;
-+
-+	ret = bch2_inode_find_by_inum_trans(trans, dir_inum, &dir_inode);
-+	if (ret && ret != -EINTR)
-+		bch_err(c, "remove_dirent: err %i looking up directory inode", ret);
-+	if (ret)
-+		return ret;
-+
-+	dir_hash_info = bch2_hash_info_init(c, &dir_inode);
-+
-+	ret = bch2_hash_delete(trans, bch2_dirent_hash_desc,
-+			       &dir_hash_info, dir_inum, &name);
-+	if (ret && ret != -EINTR)
-+		bch_err(c, "remove_dirent: err %i deleting dirent", ret);
-+	if (ret)
-+		return ret;
-+
-+	return 0;
-+}
-+
-+static int remove_dirent(struct btree_trans *trans,
-+			 struct bkey_s_c_dirent dirent)
-+{
-+	return __bch2_trans_do(trans, NULL, NULL,
-+			       BTREE_INSERT_NOFAIL|
-+			       BTREE_INSERT_LAZY_RW,
-+			       __remove_dirent(trans, dirent));
-+}
-+
-+static int reattach_inode(struct bch_fs *c,
-+			  struct bch_inode_unpacked *lostfound_inode,
-+			  u64 inum)
-+{
-+	struct bch_inode_unpacked dir_u, inode_u;
-+	char name_buf[20];
-+	struct qstr name;
-+	int ret;
-+
-+	snprintf(name_buf, sizeof(name_buf), "%llu", inum);
-+	name = (struct qstr) QSTR(name_buf);
-+
-+	ret = bch2_trans_do(c, NULL, NULL,
-+			    BTREE_INSERT_LAZY_RW,
-+		bch2_link_trans(&trans, lostfound_inode->bi_inum,
-+				inum, &dir_u, &inode_u, &name));
-+	if (ret)
-+		bch_err(c, "error %i reattaching inode %llu", ret, inum);
-+
-+	return ret;
-+}
-+
-+struct inode_walker {
-+	bool			first_this_inode;
-+	bool			have_inode;
-+	u64			cur_inum;
-+	struct bch_inode_unpacked inode;
-+};
-+
-+static struct inode_walker inode_walker_init(void)
-+{
-+	return (struct inode_walker) {
-+		.cur_inum	= -1,
-+		.have_inode	= false,
-+	};
-+}
-+
-+static int walk_inode(struct btree_trans *trans,
-+		      struct inode_walker *w, u64 inum)
-+{
-+	if (inum != w->cur_inum) {
-+		int ret = bch2_inode_find_by_inum_trans(trans, inum,
-+							&w->inode);
-+
-+		if (ret && ret != -ENOENT)
-+			return ret;
-+
-+		w->have_inode	= !ret;
-+		w->cur_inum	= inum;
-+		w->first_this_inode = true;
-+	} else {
-+		w->first_this_inode = false;
-+	}
-+
-+	return 0;
-+}
-+
-+struct hash_check {
-+	struct bch_hash_info	info;
-+
-+	/* start of current chain of hash collisions: */
-+	struct btree_iter	*chain;
-+
-+	/* next offset in current chain of hash collisions: */
-+	u64			chain_end;
-+};
-+
-+static void hash_check_init(struct hash_check *h)
-+{
-+	h->chain = NULL;
-+	h->chain_end = 0;
-+}
-+
-+static void hash_stop_chain(struct btree_trans *trans,
-+			    struct hash_check *h)
-+{
-+	if (h->chain)
-+		bch2_trans_iter_free(trans, h->chain);
-+	h->chain = NULL;
-+}
-+
-+static void hash_check_set_inode(struct btree_trans *trans,
-+				 struct hash_check *h,
-+				 const struct bch_inode_unpacked *bi)
-+{
-+	h->info = bch2_hash_info_init(trans->c, bi);
-+	hash_stop_chain(trans, h);
-+}
-+
-+static int hash_redo_key(const struct bch_hash_desc desc,
-+			 struct btree_trans *trans, struct hash_check *h,
-+			 struct btree_iter *k_iter, struct bkey_s_c k,
-+			 u64 hashed)
-+{
-+	struct bkey_i delete;
-+	struct bkey_i *tmp;
-+
-+	tmp = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
-+	if (IS_ERR(tmp))
-+		return PTR_ERR(tmp);
-+
-+	bkey_reassemble(tmp, k);
-+
-+	bkey_init(&delete.k);
-+	delete.k.p = k_iter->pos;
-+	bch2_trans_update(trans, k_iter, &delete, 0);
-+
-+	return bch2_hash_set(trans, desc, &h->info, k_iter->pos.inode,
-+			     tmp, BCH_HASH_SET_MUST_CREATE);
-+}
-+
-+static int fsck_hash_delete_at(struct btree_trans *trans,
-+			       const struct bch_hash_desc desc,
-+			       struct bch_hash_info *info,
-+			       struct btree_iter *iter)
-+{
-+	int ret;
-+retry:
-+	ret   = bch2_hash_delete_at(trans, desc, info, iter) ?:
-+		bch2_trans_commit(trans, NULL, NULL,
-+				  BTREE_INSERT_NOFAIL|
-+				  BTREE_INSERT_LAZY_RW);
-+	if (ret == -EINTR) {
-+		ret = bch2_btree_iter_traverse(iter);
-+		if (!ret)
-+			goto retry;
-+	}
-+
-+	return ret;
-+}
-+
-+static int hash_check_duplicates(struct btree_trans *trans,
-+			const struct bch_hash_desc desc, struct hash_check *h,
-+			struct btree_iter *k_iter, struct bkey_s_c k)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k2;
-+	char buf[200];
-+	int ret = 0;
-+
-+	if (!bkey_cmp(h->chain->pos, k_iter->pos))
-+		return 0;
-+
-+	iter = bch2_trans_copy_iter(trans, h->chain);
-+	BUG_ON(IS_ERR(iter));
-+
-+	for_each_btree_key_continue(iter, 0, k2, ret) {
-+		if (bkey_cmp(k2.k->p, k.k->p) >= 0)
-+			break;
-+
-+		if (fsck_err_on(k2.k->type == desc.key_type &&
-+				!desc.cmp_bkey(k, k2), c,
-+				"duplicate hash table keys:\n%s",
-+				(bch2_bkey_val_to_text(&PBUF(buf), c,
-+						       k), buf))) {
-+			ret = fsck_hash_delete_at(trans, desc, &h->info, k_iter);
-+			if (ret)
-+				return ret;
-+			ret = 1;
-+			break;
-+		}
-+	}
-+fsck_err:
-+	bch2_trans_iter_free(trans, iter);
-+	return ret;
-+}
-+
-+static void hash_set_chain_start(struct btree_trans *trans,
-+			const struct bch_hash_desc desc,
-+			struct hash_check *h,
-+			struct btree_iter *k_iter, struct bkey_s_c k)
-+{
-+	bool hole = (k.k->type != KEY_TYPE_whiteout &&
-+		     k.k->type != desc.key_type);
-+
-+	if (hole || k.k->p.offset > h->chain_end + 1)
-+		hash_stop_chain(trans, h);
-+
-+	if (!hole) {
-+		if (!h->chain) {
-+			h->chain = bch2_trans_copy_iter(trans, k_iter);
-+			BUG_ON(IS_ERR(h->chain));
-+		}
-+
-+		h->chain_end = k.k->p.offset;
-+	}
-+}
-+
-+static bool key_has_correct_hash(struct btree_trans *trans,
-+			const struct bch_hash_desc desc,
-+			struct hash_check *h,
-+			struct btree_iter *k_iter, struct bkey_s_c k)
-+{
-+	u64 hash;
-+
-+	hash_set_chain_start(trans, desc, h, k_iter, k);
-+
-+	if (k.k->type != desc.key_type)
-+		return true;
-+
-+	hash = desc.hash_bkey(&h->info, k);
-+
-+	return hash >= h->chain->pos.offset &&
-+		hash <= k.k->p.offset;
-+}
-+
-+static int hash_check_key(struct btree_trans *trans,
-+			const struct bch_hash_desc desc, struct hash_check *h,
-+			struct btree_iter *k_iter, struct bkey_s_c k)
-+{
-+	struct bch_fs *c = trans->c;
-+	char buf[200];
-+	u64 hashed;
-+	int ret = 0;
-+
-+	hash_set_chain_start(trans, desc, h, k_iter, k);
-+
-+	if (k.k->type != desc.key_type)
-+		return 0;
-+
-+	hashed = desc.hash_bkey(&h->info, k);
-+
-+	if (fsck_err_on(hashed < h->chain->pos.offset ||
-+			hashed > k.k->p.offset, c,
-+			"hash table key at wrong offset: btree %u, %llu, "
-+			"hashed to %llu chain starts at %llu\n%s",
-+			desc.btree_id, k.k->p.offset,
-+			hashed, h->chain->pos.offset,
-+			(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf))) {
-+		ret = __bch2_trans_do(trans, NULL, NULL,
-+				      BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
-+			hash_redo_key(desc, trans, h, k_iter, k, hashed));
-+		if (ret) {
-+			bch_err(c, "hash_redo_key err %i", ret);
-+			return ret;
-+		}
-+		return 1;
-+	}
-+
-+	ret = hash_check_duplicates(trans, desc, h, k_iter, k);
-+fsck_err:
-+	return ret;
-+}
-+
-+static int check_dirent_hash(struct btree_trans *trans, struct hash_check *h,
-+			     struct btree_iter *iter, struct bkey_s_c *k)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct bkey_i_dirent *d = NULL;
-+	int ret = -EINVAL;
-+	char buf[200];
-+	unsigned len;
-+	u64 hash;
-+
-+	if (key_has_correct_hash(trans, bch2_dirent_hash_desc, h, iter, *k))
-+		return 0;
-+
-+	len = bch2_dirent_name_bytes(bkey_s_c_to_dirent(*k));
-+	BUG_ON(!len);
-+
-+	memcpy(buf, bkey_s_c_to_dirent(*k).v->d_name, len);
-+	buf[len] = '\0';
-+
-+	d = kmalloc(bkey_bytes(k->k), GFP_KERNEL);
-+	if (!d) {
-+		bch_err(c, "memory allocation failure");
-+		return -ENOMEM;
-+	}
-+
-+	bkey_reassemble(&d->k_i, *k);
-+
-+	do {
-+		--len;
-+		if (!len)
-+			goto err_redo;
-+
-+		d->k.u64s = BKEY_U64s + dirent_val_u64s(len);
-+
-+		BUG_ON(bkey_val_bytes(&d->k) <
-+		       offsetof(struct bch_dirent, d_name) + len);
-+
-+		memset(d->v.d_name + len, 0,
-+		       bkey_val_bytes(&d->k) -
-+		       offsetof(struct bch_dirent, d_name) - len);
-+
-+		hash = bch2_dirent_hash_desc.hash_bkey(&h->info,
-+						bkey_i_to_s_c(&d->k_i));
-+	} while (hash < h->chain->pos.offset ||
-+		 hash > k->k->p.offset);
-+
-+	if (fsck_err(c, "dirent with junk at end, was %s (%zu) now %s (%u)",
-+		     buf, strlen(buf), d->v.d_name, len)) {
-+		ret = __bch2_trans_do(trans, NULL, NULL,
-+				      BTREE_INSERT_NOFAIL|
-+				      BTREE_INSERT_LAZY_RW,
-+			(bch2_trans_update(trans, iter, &d->k_i, 0), 0));
-+		if (ret)
-+			goto err;
-+
-+		*k = bch2_btree_iter_peek(iter);
-+
-+		BUG_ON(k->k->type != KEY_TYPE_dirent);
-+	}
-+err:
-+fsck_err:
-+	kfree(d);
-+	return ret;
-+err_redo:
-+	hash = bch2_dirent_hash_desc.hash_bkey(&h->info, *k);
-+
-+	if (fsck_err(c, "cannot fix dirent by removing trailing garbage %s (%zu)\n"
-+		     "hash table key at wrong offset: btree %u, offset %llu, "
-+		     "hashed to %llu chain starts at %llu\n%s",
-+		     buf, strlen(buf), BTREE_ID_DIRENTS,
-+		     k->k->p.offset, hash, h->chain->pos.offset,
-+		     (bch2_bkey_val_to_text(&PBUF(buf), c,
-+					    *k), buf))) {
-+		ret = __bch2_trans_do(trans, NULL, NULL,
-+				      BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
-+			hash_redo_key(bch2_dirent_hash_desc, trans,
-+				      h, iter, *k, hash));
-+		if (ret)
-+			bch_err(c, "hash_redo_key err %i", ret);
-+		else
-+			ret = 1;
-+	}
-+
-+	goto err;
-+}
-+
-+static int bch2_inode_truncate(struct bch_fs *c, u64 inode_nr, u64 new_size)
-+{
-+	return bch2_btree_delete_range(c, BTREE_ID_EXTENTS,
-+			POS(inode_nr, round_up(new_size, block_bytes(c)) >> 9),
-+			POS(inode_nr + 1, 0), NULL);
-+}
-+
-+static int bch2_fix_overlapping_extent(struct btree_trans *trans,
-+				       struct btree_iter *iter,
-+				       struct bkey_s_c k, struct bpos cut_at)
-+{
-+	struct btree_iter *u_iter;
-+	struct bkey_i *u;
-+	int ret;
-+
-+	u = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
-+	ret = PTR_ERR_OR_ZERO(u);
-+	if (ret)
-+		return ret;
-+
-+	bkey_reassemble(u, k);
-+	bch2_cut_front(cut_at, u);
-+
-+	u_iter = bch2_trans_copy_iter(trans, iter);
-+	ret = PTR_ERR_OR_ZERO(u_iter);
-+	if (ret)
-+		return ret;
-+
-+	/*
-+	 * We don't want to go through the
-+	 * extent_handle_overwrites path:
-+	 */
-+	__bch2_btree_iter_set_pos(u_iter, u->k.p, false);
-+
-+	/*
-+	 * XXX: this is going to leave disk space
-+	 * accounting slightly wrong
-+	 */
-+	ret = bch2_trans_update(trans, u_iter, u, 0);
-+	bch2_trans_iter_put(trans, u_iter);
-+	return ret;
-+}
-+
-+/*
-+ * Walk extents: verify that extents have a corresponding S_ISREG inode, and
-+ * that i_size an i_sectors are consistent
-+ */
-+noinline_for_stack
-+static int check_extents(struct bch_fs *c)
-+{
-+	struct inode_walker w = inode_walker_init();
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct bkey_on_stack prev;
-+	u64 i_sectors;
-+	int ret = 0;
-+
-+	bkey_on_stack_init(&prev);
-+	prev.k->k = KEY(0, 0, 0);
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-+
-+	bch_verbose(c, "checking extents");
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
-+				   POS(BCACHEFS_ROOT_INO, 0),
-+				   BTREE_ITER_INTENT);
-+retry:
-+	for_each_btree_key_continue(iter, 0, k, ret) {
-+		if (bkey_cmp(prev.k->k.p, bkey_start_pos(k.k)) > 0) {
-+			char buf1[200];
-+			char buf2[200];
-+
-+			bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(prev.k));
-+			bch2_bkey_val_to_text(&PBUF(buf2), c, k);
-+
-+			if (fsck_err(c, "overlapping extents:\n%s\n%s", buf1, buf2)) {
-+				ret = __bch2_trans_do(&trans, NULL, NULL,
-+						      BTREE_INSERT_NOFAIL|
-+						      BTREE_INSERT_LAZY_RW,
-+						bch2_fix_overlapping_extent(&trans,
-+								iter, k, prev.k->k.p));
-+				if (ret)
-+					goto err;
-+			}
-+		}
-+		bkey_on_stack_reassemble(&prev, c, k);
-+
-+		ret = walk_inode(&trans, &w, k.k->p.inode);
-+		if (ret)
-+			break;
-+
-+		if (fsck_err_on(!w.have_inode, c,
-+			"extent type %u for missing inode %llu",
-+			k.k->type, k.k->p.inode) ||
-+		    fsck_err_on(w.have_inode &&
-+			!S_ISREG(w.inode.bi_mode) && !S_ISLNK(w.inode.bi_mode), c,
-+			"extent type %u for non regular file, inode %llu mode %o",
-+			k.k->type, k.k->p.inode, w.inode.bi_mode)) {
-+			bch2_trans_unlock(&trans);
-+
-+			ret = bch2_inode_truncate(c, k.k->p.inode, 0);
-+			if (ret)
-+				goto err;
-+			continue;
-+		}
-+
-+		if (fsck_err_on(w.first_this_inode &&
-+			w.have_inode &&
-+			!(w.inode.bi_flags & BCH_INODE_I_SECTORS_DIRTY) &&
-+			w.inode.bi_sectors !=
-+			(i_sectors = bch2_count_inode_sectors(&trans, w.cur_inum)),
-+			c, "inode %llu has incorrect i_sectors: got %llu, should be %llu",
-+			w.inode.bi_inum,
-+			w.inode.bi_sectors, i_sectors)) {
-+			struct bkey_inode_buf p;
-+
-+			w.inode.bi_sectors = i_sectors;
-+
-+			bch2_trans_unlock(&trans);
-+
-+			bch2_inode_pack(&p, &w.inode);
-+
-+			ret = bch2_btree_insert(c, BTREE_ID_INODES,
-+						&p.inode.k_i, NULL, NULL,
-+						BTREE_INSERT_NOFAIL|
-+						BTREE_INSERT_LAZY_RW);
-+			if (ret) {
-+				bch_err(c, "error in fsck: error %i updating inode", ret);
-+				goto err;
-+			}
-+
-+			/* revalidate iterator: */
-+			k = bch2_btree_iter_peek(iter);
-+		}
-+
-+		if (fsck_err_on(w.have_inode &&
-+			!(w.inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
-+			k.k->type != KEY_TYPE_reservation &&
-+			k.k->p.offset > round_up(w.inode.bi_size, block_bytes(c)) >> 9, c,
-+			"extent type %u offset %llu past end of inode %llu, i_size %llu",
-+			k.k->type, k.k->p.offset, k.k->p.inode, w.inode.bi_size)) {
-+			bch2_trans_unlock(&trans);
-+
-+			ret = bch2_inode_truncate(c, k.k->p.inode,
-+						  w.inode.bi_size);
-+			if (ret)
-+				goto err;
-+			continue;
-+		}
-+	}
-+err:
-+fsck_err:
-+	if (ret == -EINTR)
-+		goto retry;
-+	bkey_on_stack_exit(&prev, c);
-+	return bch2_trans_exit(&trans) ?: ret;
-+}
-+
-+/*
-+ * Walk dirents: verify that they all have a corresponding S_ISDIR inode,
-+ * validate d_type
-+ */
-+noinline_for_stack
-+static int check_dirents(struct bch_fs *c)
-+{
-+	struct inode_walker w = inode_walker_init();
-+	struct hash_check h;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	unsigned name_len;
-+	char buf[200];
-+	int ret = 0;
-+
-+	bch_verbose(c, "checking dirents");
-+
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-+
-+	hash_check_init(&h);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS,
-+				   POS(BCACHEFS_ROOT_INO, 0), 0);
-+retry:
-+	for_each_btree_key_continue(iter, 0, k, ret) {
-+		struct bkey_s_c_dirent d;
-+		struct bch_inode_unpacked target;
-+		bool have_target;
-+		u64 d_inum;
-+
-+		ret = walk_inode(&trans, &w, k.k->p.inode);
-+		if (ret)
-+			break;
-+
-+		if (fsck_err_on(!w.have_inode, c,
-+				"dirent in nonexisting directory:\n%s",
-+				(bch2_bkey_val_to_text(&PBUF(buf), c,
-+						       k), buf)) ||
-+		    fsck_err_on(!S_ISDIR(w.inode.bi_mode), c,
-+				"dirent in non directory inode type %u:\n%s",
-+				mode_to_type(w.inode.bi_mode),
-+				(bch2_bkey_val_to_text(&PBUF(buf), c,
-+						       k), buf))) {
-+			ret = bch2_btree_delete_at(&trans, iter, 0);
-+			if (ret)
-+				goto err;
-+			continue;
-+		}
-+
-+		if (w.first_this_inode && w.have_inode)
-+			hash_check_set_inode(&trans, &h, &w.inode);
-+
-+		ret = check_dirent_hash(&trans, &h, iter, &k);
-+		if (ret > 0) {
-+			ret = 0;
-+			continue;
-+		}
-+		if (ret)
-+			goto fsck_err;
-+
-+		if (ret)
-+			goto fsck_err;
-+
-+		if (k.k->type != KEY_TYPE_dirent)
-+			continue;
-+
-+		d = bkey_s_c_to_dirent(k);
-+		d_inum = le64_to_cpu(d.v->d_inum);
-+
-+		name_len = bch2_dirent_name_bytes(d);
-+
-+		if (fsck_err_on(!name_len, c, "empty dirent") ||
-+		    fsck_err_on(name_len == 1 &&
-+				!memcmp(d.v->d_name, ".", 1), c,
-+				". dirent") ||
-+		    fsck_err_on(name_len == 2 &&
-+				!memcmp(d.v->d_name, "..", 2), c,
-+				".. dirent") ||
-+		    fsck_err_on(name_len == 2 &&
-+				!memcmp(d.v->d_name, "..", 2), c,
-+				".. dirent") ||
-+		    fsck_err_on(memchr(d.v->d_name, '/', name_len), c,
-+				"dirent name has invalid chars")) {
-+			ret = remove_dirent(&trans, d);
-+			if (ret)
-+				goto err;
-+			continue;
-+		}
-+
-+		if (fsck_err_on(d_inum == d.k->p.inode, c,
-+				"dirent points to own directory:\n%s",
-+				(bch2_bkey_val_to_text(&PBUF(buf), c,
-+						       k), buf))) {
-+			ret = remove_dirent(&trans, d);
-+			if (ret)
-+				goto err;
-+			continue;
-+		}
-+
-+		ret = bch2_inode_find_by_inum_trans(&trans, d_inum, &target);
-+		if (ret && ret != -ENOENT)
-+			break;
-+
-+		have_target = !ret;
-+		ret = 0;
-+
-+		if (fsck_err_on(!have_target, c,
-+				"dirent points to missing inode:\n%s",
-+				(bch2_bkey_val_to_text(&PBUF(buf), c,
-+						       k), buf))) {
-+			ret = remove_dirent(&trans, d);
-+			if (ret)
-+				goto err;
-+			continue;
-+		}
-+
-+		if (fsck_err_on(have_target &&
-+				d.v->d_type !=
-+				mode_to_type(target.bi_mode), c,
-+				"incorrect d_type: should be %u:\n%s",
-+				mode_to_type(target.bi_mode),
-+				(bch2_bkey_val_to_text(&PBUF(buf), c,
-+						       k), buf))) {
-+			struct bkey_i_dirent *n;
-+
-+			n = kmalloc(bkey_bytes(d.k), GFP_KERNEL);
-+			if (!n) {
-+				ret = -ENOMEM;
-+				goto err;
-+			}
-+
-+			bkey_reassemble(&n->k_i, d.s_c);
-+			n->v.d_type = mode_to_type(target.bi_mode);
-+
-+			ret = __bch2_trans_do(&trans, NULL, NULL,
-+					      BTREE_INSERT_NOFAIL|
-+					      BTREE_INSERT_LAZY_RW,
-+				(bch2_trans_update(&trans, iter, &n->k_i, 0), 0));
-+			kfree(n);
-+			if (ret)
-+				goto err;
-+
-+		}
-+	}
-+
-+	hash_stop_chain(&trans, &h);
-+err:
-+fsck_err:
-+	if (ret == -EINTR)
-+		goto retry;
-+
-+	return bch2_trans_exit(&trans) ?: ret;
-+}
-+
-+/*
-+ * Walk xattrs: verify that they all have a corresponding inode
-+ */
-+noinline_for_stack
-+static int check_xattrs(struct bch_fs *c)
-+{
-+	struct inode_walker w = inode_walker_init();
-+	struct hash_check h;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int ret = 0;
-+
-+	bch_verbose(c, "checking xattrs");
-+
-+	hash_check_init(&h);
-+
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS,
-+				   POS(BCACHEFS_ROOT_INO, 0), 0);
-+retry:
-+	for_each_btree_key_continue(iter, 0, k, ret) {
-+		ret = walk_inode(&trans, &w, k.k->p.inode);
-+		if (ret)
-+			break;
-+
-+		if (fsck_err_on(!w.have_inode, c,
-+				"xattr for missing inode %llu",
-+				k.k->p.inode)) {
-+			ret = bch2_btree_delete_at(&trans, iter, 0);
-+			if (ret)
-+				goto err;
-+			continue;
-+		}
-+
-+		if (w.first_this_inode && w.have_inode)
-+			hash_check_set_inode(&trans, &h, &w.inode);
-+
-+		ret = hash_check_key(&trans, bch2_xattr_hash_desc,
-+				     &h, iter, k);
-+		if (ret)
-+			goto fsck_err;
-+	}
-+err:
-+fsck_err:
-+	if (ret == -EINTR)
-+		goto retry;
-+	return bch2_trans_exit(&trans) ?: ret;
-+}
-+
-+/* Get root directory, create if it doesn't exist: */
-+static int check_root(struct bch_fs *c, struct bch_inode_unpacked *root_inode)
-+{
-+	struct bkey_inode_buf packed;
-+	int ret;
-+
-+	bch_verbose(c, "checking root directory");
-+
-+	ret = bch2_inode_find_by_inum(c, BCACHEFS_ROOT_INO, root_inode);
-+	if (ret && ret != -ENOENT)
-+		return ret;
-+
-+	if (fsck_err_on(ret, c, "root directory missing"))
-+		goto create_root;
-+
-+	if (fsck_err_on(!S_ISDIR(root_inode->bi_mode), c,
-+			"root inode not a directory"))
-+		goto create_root;
-+
-+	return 0;
-+fsck_err:
-+	return ret;
-+create_root:
-+	bch2_inode_init(c, root_inode, 0, 0, S_IFDIR|0755,
-+			0, NULL);
-+	root_inode->bi_inum = BCACHEFS_ROOT_INO;
-+
-+	bch2_inode_pack(&packed, root_inode);
-+
-+	return bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
-+				 NULL, NULL,
-+				 BTREE_INSERT_NOFAIL|
-+				 BTREE_INSERT_LAZY_RW);
-+}
-+
-+/* Get lost+found, create if it doesn't exist: */
-+static int check_lostfound(struct bch_fs *c,
-+			   struct bch_inode_unpacked *root_inode,
-+			   struct bch_inode_unpacked *lostfound_inode)
-+{
-+	struct qstr lostfound = QSTR("lost+found");
-+	struct bch_hash_info root_hash_info =
-+		bch2_hash_info_init(c, root_inode);
-+	u64 inum;
-+	int ret;
-+
-+	bch_verbose(c, "checking lost+found");
-+
-+	inum = bch2_dirent_lookup(c, BCACHEFS_ROOT_INO, &root_hash_info,
-+				 &lostfound);
-+	if (!inum) {
-+		bch_notice(c, "creating lost+found");
-+		goto create_lostfound;
-+	}
-+
-+	ret = bch2_inode_find_by_inum(c, inum, lostfound_inode);
-+	if (ret && ret != -ENOENT)
-+		return ret;
-+
-+	if (fsck_err_on(ret, c, "lost+found missing"))
-+		goto create_lostfound;
-+
-+	if (fsck_err_on(!S_ISDIR(lostfound_inode->bi_mode), c,
-+			"lost+found inode not a directory"))
-+		goto create_lostfound;
-+
-+	return 0;
-+fsck_err:
-+	return ret;
-+create_lostfound:
-+	bch2_inode_init_early(c, lostfound_inode);
-+
-+	ret = bch2_trans_do(c, NULL, NULL,
-+			    BTREE_INSERT_NOFAIL|
-+			    BTREE_INSERT_LAZY_RW,
-+		bch2_create_trans(&trans,
-+				  BCACHEFS_ROOT_INO, root_inode,
-+				  lostfound_inode, &lostfound,
-+				  0, 0, S_IFDIR|0700, 0, NULL, NULL));
-+	if (ret)
-+		bch_err(c, "error creating lost+found: %i", ret);
-+
-+	return ret;
-+}
-+
-+struct inode_bitmap {
-+	unsigned long	*bits;
-+	size_t		size;
-+};
-+
-+static inline bool inode_bitmap_test(struct inode_bitmap *b, size_t nr)
-+{
-+	return nr < b->size ? test_bit(nr, b->bits) : false;
-+}
-+
-+static inline int inode_bitmap_set(struct inode_bitmap *b, size_t nr)
-+{
-+	if (nr >= b->size) {
-+		size_t new_size = max_t(size_t, max_t(size_t,
-+					PAGE_SIZE * 8,
-+					b->size * 2),
-+					nr + 1);
-+		void *n;
-+
-+		new_size = roundup_pow_of_two(new_size);
-+		n = krealloc(b->bits, new_size / 8, GFP_KERNEL|__GFP_ZERO);
-+		if (!n) {
-+			return -ENOMEM;
-+		}
-+
-+		b->bits = n;
-+		b->size = new_size;
-+	}
-+
-+	__set_bit(nr, b->bits);
-+	return 0;
-+}
-+
-+struct pathbuf {
-+	size_t		nr;
-+	size_t		size;
-+
-+	struct pathbuf_entry {
-+		u64	inum;
-+		u64	offset;
-+	}		*entries;
-+};
-+
-+static int path_down(struct pathbuf *p, u64 inum)
-+{
-+	if (p->nr == p->size) {
-+		size_t new_size = max_t(size_t, 256UL, p->size * 2);
-+		void *n = krealloc(p->entries,
-+				   new_size * sizeof(p->entries[0]),
-+				   GFP_KERNEL);
-+		if (!n)
-+			return -ENOMEM;
-+
-+		p->entries = n;
-+		p->size = new_size;
-+	};
-+
-+	p->entries[p->nr++] = (struct pathbuf_entry) {
-+		.inum = inum,
-+		.offset = 0,
-+	};
-+	return 0;
-+}
-+
-+noinline_for_stack
-+static int check_directory_structure(struct bch_fs *c,
-+				     struct bch_inode_unpacked *lostfound_inode)
-+{
-+	struct inode_bitmap dirs_done = { NULL, 0 };
-+	struct pathbuf path = { 0, 0, NULL };
-+	struct pathbuf_entry *e;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct bkey_s_c_dirent dirent;
-+	bool had_unreachable;
-+	u64 d_inum;
-+	int ret = 0;
-+
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-+
-+	bch_verbose(c, "checking directory structure");
-+
-+	/* DFS: */
-+restart_dfs:
-+	had_unreachable = false;
-+
-+	ret = inode_bitmap_set(&dirs_done, BCACHEFS_ROOT_INO);
-+	if (ret) {
-+		bch_err(c, "memory allocation failure in inode_bitmap_set()");
-+		goto err;
-+	}
-+
-+	ret = path_down(&path, BCACHEFS_ROOT_INO);
-+	if (ret)
-+		goto err;
-+
-+	while (path.nr) {
-+next:
-+		e = &path.entries[path.nr - 1];
-+
-+		if (e->offset == U64_MAX)
-+			goto up;
-+
-+		for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS,
-+				   POS(e->inum, e->offset + 1), 0, k, ret) {
-+			if (k.k->p.inode != e->inum)
-+				break;
-+
-+			e->offset = k.k->p.offset;
-+
-+			if (k.k->type != KEY_TYPE_dirent)
-+				continue;
-+
-+			dirent = bkey_s_c_to_dirent(k);
-+
-+			if (dirent.v->d_type != DT_DIR)
-+				continue;
-+
-+			d_inum = le64_to_cpu(dirent.v->d_inum);
-+
-+			if (fsck_err_on(inode_bitmap_test(&dirs_done, d_inum), c,
-+					"directory %llu has multiple hardlinks",
-+					d_inum)) {
-+				ret = remove_dirent(&trans, dirent);
-+				if (ret)
-+					goto err;
-+				continue;
-+			}
-+
-+			ret = inode_bitmap_set(&dirs_done, d_inum);
-+			if (ret) {
-+				bch_err(c, "memory allocation failure in inode_bitmap_set()");
-+				goto err;
-+			}
-+
-+			ret = path_down(&path, d_inum);
-+			if (ret) {
-+				goto err;
-+			}
-+
-+			ret = bch2_trans_iter_free(&trans, iter);
-+			if (ret) {
-+				bch_err(c, "btree error %i in fsck", ret);
-+				goto err;
-+			}
-+			goto next;
-+		}
-+		ret = bch2_trans_iter_free(&trans, iter) ?: ret;
-+		if (ret) {
-+			bch_err(c, "btree error %i in fsck", ret);
-+			goto err;
-+		}
-+up:
-+		path.nr--;
-+	}
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES, POS_MIN, 0);
-+retry:
-+	for_each_btree_key_continue(iter, 0, k, ret) {
-+		if (k.k->type != KEY_TYPE_inode)
-+			continue;
-+
-+		if (!S_ISDIR(le16_to_cpu(bkey_s_c_to_inode(k).v->bi_mode)))
-+			continue;
-+
-+		ret = bch2_empty_dir_trans(&trans, k.k->p.inode);
-+		if (ret == -EINTR)
-+			goto retry;
-+		if (!ret)
-+			continue;
-+
-+		if (fsck_err_on(!inode_bitmap_test(&dirs_done, k.k->p.offset), c,
-+				"unreachable directory found (inum %llu)",
-+				k.k->p.offset)) {
-+			bch2_trans_unlock(&trans);
-+
-+			ret = reattach_inode(c, lostfound_inode, k.k->p.offset);
-+			if (ret) {
-+				goto err;
-+			}
-+
-+			had_unreachable = true;
-+		}
-+	}
-+	bch2_trans_iter_free(&trans, iter);
-+	if (ret)
-+		goto err;
-+
-+	if (had_unreachable) {
-+		bch_info(c, "reattached unreachable directories, restarting pass to check for loops");
-+		kfree(dirs_done.bits);
-+		kfree(path.entries);
-+		memset(&dirs_done, 0, sizeof(dirs_done));
-+		memset(&path, 0, sizeof(path));
-+		goto restart_dfs;
-+	}
-+err:
-+fsck_err:
-+	ret = bch2_trans_exit(&trans) ?: ret;
-+	kfree(dirs_done.bits);
-+	kfree(path.entries);
-+	return ret;
-+}
-+
-+struct nlink {
-+	u32	count;
-+	u32	dir_count;
-+};
-+
-+typedef GENRADIX(struct nlink) nlink_table;
-+
-+static void inc_link(struct bch_fs *c, nlink_table *links,
-+		     u64 range_start, u64 *range_end,
-+		     u64 inum, bool dir)
-+{
-+	struct nlink *link;
-+
-+	if (inum < range_start || inum >= *range_end)
-+		return;
-+
-+	link = genradix_ptr_alloc(links, inum - range_start, GFP_KERNEL);
-+	if (!link) {
-+		bch_verbose(c, "allocation failed during fsck - will need another pass");
-+		*range_end = inum;
-+		return;
-+	}
-+
-+	if (dir)
-+		link->dir_count++;
-+	else
-+		link->count++;
-+}
-+
-+noinline_for_stack
-+static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links,
-+			       u64 range_start, u64 *range_end)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct bkey_s_c_dirent d;
-+	u64 d_inum;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-+
-+	inc_link(c, links, range_start, range_end, BCACHEFS_ROOT_INO, false);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS_MIN, 0, k, ret) {
-+		switch (k.k->type) {
-+		case KEY_TYPE_dirent:
-+			d = bkey_s_c_to_dirent(k);
-+			d_inum = le64_to_cpu(d.v->d_inum);
-+
-+			if (d.v->d_type == DT_DIR)
-+				inc_link(c, links, range_start, range_end,
-+					 d.k->p.inode, true);
-+
-+			inc_link(c, links, range_start, range_end,
-+				 d_inum, false);
-+
-+			break;
-+		}
-+
-+		bch2_trans_cond_resched(&trans);
-+	}
-+	ret = bch2_trans_exit(&trans) ?: ret;
-+	if (ret)
-+		bch_err(c, "error in fsck: btree error %i while walking dirents", ret);
-+
-+	return ret;
-+}
-+
-+static int check_inode_nlink(struct bch_fs *c,
-+			     struct bch_inode_unpacked *lostfound_inode,
-+			     struct bch_inode_unpacked *u,
-+			     struct nlink *link,
-+			     bool *do_update)
-+{
-+	u32 i_nlink = bch2_inode_nlink_get(u);
-+	u32 real_i_nlink =
-+		link->count * nlink_bias(u->bi_mode) +
-+		link->dir_count;
-+	int ret = 0;
-+
-+	/*
-+	 * These should have been caught/fixed by earlier passes, we don't
-+	 * repair them here:
-+	 */
-+	if (S_ISDIR(u->bi_mode) && link->count > 1) {
-+		need_fsck_err(c, "directory %llu with multiple hardlinks: %u",
-+			      u->bi_inum, link->count);
-+		return 0;
-+	}
-+
-+	if (S_ISDIR(u->bi_mode) && !link->count) {
-+		need_fsck_err(c, "unreachable directory found (inum %llu)",
-+			      u->bi_inum);
-+		return 0;
-+	}
-+
-+	if (!S_ISDIR(u->bi_mode) && link->dir_count) {
-+		need_fsck_err(c, "non directory with subdirectories (inum %llu)",
-+			      u->bi_inum);
-+		return 0;
-+	}
-+
-+	if (!link->count &&
-+	    !(u->bi_flags & BCH_INODE_UNLINKED) &&
-+	    (c->sb.features & (1 << BCH_FEATURE_atomic_nlink))) {
-+		if (fsck_err(c, "unreachable inode %llu not marked as unlinked (type %u)",
-+			     u->bi_inum, mode_to_type(u->bi_mode)) ==
-+		    FSCK_ERR_IGNORE)
-+			return 0;
-+
-+		ret = reattach_inode(c, lostfound_inode, u->bi_inum);
-+		if (ret)
-+			return ret;
-+
-+		link->count = 1;
-+		real_i_nlink = nlink_bias(u->bi_mode) + link->dir_count;
-+		goto set_i_nlink;
-+	}
-+
-+	if (i_nlink < link->count) {
-+		if (fsck_err(c, "inode %llu i_link too small (%u < %u, type %i)",
-+			     u->bi_inum, i_nlink, link->count,
-+			     mode_to_type(u->bi_mode)) == FSCK_ERR_IGNORE)
-+			return 0;
-+		goto set_i_nlink;
-+	}
-+
-+	if (i_nlink != real_i_nlink &&
-+	    c->sb.clean) {
-+		if (fsck_err(c, "filesystem marked clean, "
-+			     "but inode %llu has wrong i_nlink "
-+			     "(type %u i_nlink %u, should be %u)",
-+			     u->bi_inum, mode_to_type(u->bi_mode),
-+			     i_nlink, real_i_nlink) == FSCK_ERR_IGNORE)
-+			return 0;
-+		goto set_i_nlink;
-+	}
-+
-+	if (i_nlink != real_i_nlink &&
-+	    (c->sb.features & (1 << BCH_FEATURE_atomic_nlink))) {
-+		if (fsck_err(c, "inode %llu has wrong i_nlink "
-+			     "(type %u i_nlink %u, should be %u)",
-+			     u->bi_inum, mode_to_type(u->bi_mode),
-+			     i_nlink, real_i_nlink) == FSCK_ERR_IGNORE)
-+			return 0;
-+		goto set_i_nlink;
-+	}
-+
-+	if (real_i_nlink && i_nlink != real_i_nlink)
-+		bch_verbose(c, "setting inode %llu nlink from %u to %u",
-+			    u->bi_inum, i_nlink, real_i_nlink);
-+set_i_nlink:
-+	if (i_nlink != real_i_nlink) {
-+		bch2_inode_nlink_set(u, real_i_nlink);
-+		*do_update = true;
-+	}
-+fsck_err:
-+	return ret;
-+}
-+
-+static int check_inode(struct btree_trans *trans,
-+		       struct bch_inode_unpacked *lostfound_inode,
-+		       struct btree_iter *iter,
-+		       struct bkey_s_c_inode inode,
-+		       struct nlink *link)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct bch_inode_unpacked u;
-+	bool do_update = false;
-+	int ret = 0;
-+
-+	ret = bch2_inode_unpack(inode, &u);
-+
-+	bch2_trans_unlock(trans);
-+
-+	if (bch2_fs_inconsistent_on(ret, c,
-+			 "error unpacking inode %llu in fsck",
-+			 inode.k->p.inode))
-+		return ret;
-+
-+	if (link) {
-+		ret = check_inode_nlink(c, lostfound_inode, &u, link,
-+					&do_update);
-+		if (ret)
-+			return ret;
-+	}
-+
-+	if (u.bi_flags & BCH_INODE_UNLINKED &&
-+	    (!c->sb.clean ||
-+	     fsck_err(c, "filesystem marked clean, but inode %llu unlinked",
-+		      u.bi_inum))) {
-+		bch_verbose(c, "deleting inode %llu", u.bi_inum);
-+
-+		bch2_fs_lazy_rw(c);
-+
-+		ret = bch2_inode_rm(c, u.bi_inum);
-+		if (ret)
-+			bch_err(c, "error in fsck: error %i while deleting inode", ret);
-+		return ret;
-+	}
-+
-+	if (u.bi_flags & BCH_INODE_I_SIZE_DIRTY &&
-+	    (!c->sb.clean ||
-+	     fsck_err(c, "filesystem marked clean, but inode %llu has i_size dirty",
-+		      u.bi_inum))) {
-+		bch_verbose(c, "truncating inode %llu", u.bi_inum);
-+
-+		bch2_fs_lazy_rw(c);
-+
-+		/*
-+		 * XXX: need to truncate partial blocks too here - or ideally
-+		 * just switch units to bytes and that issue goes away
-+		 */
-+
-+		ret = bch2_inode_truncate(c, u.bi_inum, u.bi_size);
-+		if (ret) {
-+			bch_err(c, "error in fsck: error %i truncating inode", ret);
-+			return ret;
-+		}
-+
-+		/*
-+		 * We truncated without our normal sector accounting hook, just
-+		 * make sure we recalculate it:
-+		 */
-+		u.bi_flags |= BCH_INODE_I_SECTORS_DIRTY;
-+
-+		u.bi_flags &= ~BCH_INODE_I_SIZE_DIRTY;
-+		do_update = true;
-+	}
-+
-+	if (u.bi_flags & BCH_INODE_I_SECTORS_DIRTY &&
-+	    (!c->sb.clean ||
-+	     fsck_err(c, "filesystem marked clean, but inode %llu has i_sectors dirty",
-+		      u.bi_inum))) {
-+		s64 sectors;
-+
-+		bch_verbose(c, "recounting sectors for inode %llu",
-+			    u.bi_inum);
-+
-+		sectors = bch2_count_inode_sectors(trans, u.bi_inum);
-+		if (sectors < 0) {
-+			bch_err(c, "error in fsck: error %i recounting inode sectors",
-+				(int) sectors);
-+			return sectors;
-+		}
-+
-+		u.bi_sectors = sectors;
-+		u.bi_flags &= ~BCH_INODE_I_SECTORS_DIRTY;
-+		do_update = true;
-+	}
-+
-+	if (do_update) {
-+		struct bkey_inode_buf p;
-+
-+		bch2_inode_pack(&p, &u);
-+
-+		ret = __bch2_trans_do(trans, NULL, NULL,
-+				      BTREE_INSERT_NOFAIL|
-+				      BTREE_INSERT_LAZY_RW,
-+			(bch2_trans_update(trans, iter, &p.inode.k_i, 0), 0));
-+		if (ret)
-+			bch_err(c, "error in fsck: error %i "
-+				"updating inode", ret);
-+	}
-+fsck_err:
-+	return ret;
-+}
-+
-+noinline_for_stack
-+static int bch2_gc_walk_inodes(struct bch_fs *c,
-+			       struct bch_inode_unpacked *lostfound_inode,
-+			       nlink_table *links,
-+			       u64 range_start, u64 range_end)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct nlink *link, zero_links = { 0, 0 };
-+	struct genradix_iter nlinks_iter;
-+	int ret = 0, ret2 = 0;
-+	u64 nlinks_pos;
-+
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES,
-+				   POS(0, range_start), 0);
-+	nlinks_iter = genradix_iter_init(links, 0);
-+
-+	while ((k = bch2_btree_iter_peek(iter)).k &&
-+	       !(ret2 = bkey_err(k))) {
-+peek_nlinks:	link = genradix_iter_peek(&nlinks_iter, links);
-+
-+		if (!link && (!k.k || iter->pos.offset >= range_end))
-+			break;
-+
-+		nlinks_pos = range_start + nlinks_iter.pos;
-+		if (iter->pos.offset > nlinks_pos) {
-+			/* Should have been caught by dirents pass: */
-+			need_fsck_err_on(link && link->count, c,
-+				"missing inode %llu (nlink %u)",
-+				nlinks_pos, link->count);
-+			genradix_iter_advance(&nlinks_iter, links);
-+			goto peek_nlinks;
-+		}
-+
-+		if (iter->pos.offset < nlinks_pos || !link)
-+			link = &zero_links;
-+
-+		if (k.k && k.k->type == KEY_TYPE_inode) {
-+			ret = check_inode(&trans, lostfound_inode, iter,
-+					  bkey_s_c_to_inode(k), link);
-+			BUG_ON(ret == -EINTR);
-+			if (ret)
-+				break;
-+		} else {
-+			/* Should have been caught by dirents pass: */
-+			need_fsck_err_on(link->count, c,
-+				"missing inode %llu (nlink %u)",
-+				nlinks_pos, link->count);
-+		}
-+
-+		if (nlinks_pos == iter->pos.offset)
-+			genradix_iter_advance(&nlinks_iter, links);
-+
-+		bch2_btree_iter_next(iter);
-+		bch2_trans_cond_resched(&trans);
-+	}
-+fsck_err:
-+	bch2_trans_exit(&trans);
-+
-+	if (ret2)
-+		bch_err(c, "error in fsck: btree error %i while walking inodes", ret2);
-+
-+	return ret ?: ret2;
-+}
-+
-+noinline_for_stack
-+static int check_inode_nlinks(struct bch_fs *c,
-+			      struct bch_inode_unpacked *lostfound_inode)
-+{
-+	nlink_table links;
-+	u64 this_iter_range_start, next_iter_range_start = 0;
-+	int ret = 0;
-+
-+	bch_verbose(c, "checking inode nlinks");
-+
-+	genradix_init(&links);
-+
-+	do {
-+		this_iter_range_start = next_iter_range_start;
-+		next_iter_range_start = U64_MAX;
-+
-+		ret = bch2_gc_walk_dirents(c, &links,
-+					  this_iter_range_start,
-+					  &next_iter_range_start);
-+		if (ret)
-+			break;
-+
-+		ret = bch2_gc_walk_inodes(c, lostfound_inode, &links,
-+					 this_iter_range_start,
-+					 next_iter_range_start);
-+		if (ret)
-+			break;
-+
-+		genradix_free(&links);
-+	} while (next_iter_range_start != U64_MAX);
-+
-+	genradix_free(&links);
-+
-+	return ret;
-+}
-+
-+/*
-+ * Checks for inconsistencies that shouldn't happen, unless we have a bug.
-+ * Doesn't fix them yet, mainly because they haven't yet been observed:
-+ */
-+int bch2_fsck_full(struct bch_fs *c)
-+{
-+	struct bch_inode_unpacked root_inode, lostfound_inode;
-+
-+	return  check_extents(c) ?:
-+		check_dirents(c) ?:
-+		check_xattrs(c) ?:
-+		check_root(c, &root_inode) ?:
-+		check_lostfound(c, &root_inode, &lostfound_inode) ?:
-+		check_directory_structure(c, &lostfound_inode) ?:
-+		check_inode_nlinks(c, &lostfound_inode);
-+}
-+
-+int bch2_fsck_inode_nlink(struct bch_fs *c)
-+{
-+	struct bch_inode_unpacked root_inode, lostfound_inode;
-+
-+	return  check_root(c, &root_inode) ?:
-+		check_lostfound(c, &root_inode, &lostfound_inode) ?:
-+		check_inode_nlinks(c, &lostfound_inode);
-+}
-+
-+int bch2_fsck_walk_inodes_only(struct bch_fs *c)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct bkey_s_c_inode inode;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_INODES, POS_MIN, 0, k, ret) {
-+		if (k.k->type != KEY_TYPE_inode)
-+			continue;
-+
-+		inode = bkey_s_c_to_inode(k);
-+
-+		if (inode.v->bi_flags &
-+		    (BCH_INODE_I_SIZE_DIRTY|
-+		     BCH_INODE_I_SECTORS_DIRTY|
-+		     BCH_INODE_UNLINKED)) {
-+			ret = check_inode(&trans, NULL, iter, inode, NULL);
-+			BUG_ON(ret == -EINTR);
-+			if (ret)
-+				break;
-+		}
-+	}
-+	BUG_ON(ret == -EINTR);
-+
-+	return bch2_trans_exit(&trans) ?: ret;
-+}
-diff --git a/fs/bcachefs/fsck.h b/fs/bcachefs/fsck.h
-new file mode 100644
-index 000000000000..9e4af02bde1e
---- /dev/null
-+++ b/fs/bcachefs/fsck.h
-@@ -0,0 +1,9 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_FSCK_H
-+#define _BCACHEFS_FSCK_H
-+
-+int bch2_fsck_full(struct bch_fs *);
-+int bch2_fsck_inode_nlink(struct bch_fs *);
-+int bch2_fsck_walk_inodes_only(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_FSCK_H */
-diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c
-new file mode 100644
-index 000000000000..7d20f082ad45
---- /dev/null
-+++ b/fs/bcachefs/inode.c
-@@ -0,0 +1,554 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "bkey_methods.h"
-+#include "btree_update.h"
-+#include "error.h"
-+#include "extents.h"
-+#include "inode.h"
-+#include "str_hash.h"
-+
-+#include <linux/random.h>
-+
-+#include <asm/unaligned.h>
-+
-+const char * const bch2_inode_opts[] = {
-+#define x(name, ...)	#name,
-+	BCH_INODE_OPTS()
-+#undef  x
-+	NULL,
-+};
-+
-+static const u8 byte_table[8] = { 1, 2, 3, 4, 6, 8, 10, 13 };
-+static const u8 bits_table[8] = {
-+	1  * 8 - 1,
-+	2  * 8 - 2,
-+	3  * 8 - 3,
-+	4  * 8 - 4,
-+	6  * 8 - 5,
-+	8  * 8 - 6,
-+	10 * 8 - 7,
-+	13 * 8 - 8,
-+};
-+
-+static int inode_encode_field(u8 *out, u8 *end, u64 hi, u64 lo)
-+{
-+	__be64 in[2] = { cpu_to_be64(hi), cpu_to_be64(lo), };
-+	unsigned shift, bytes, bits = likely(!hi)
-+		? fls64(lo)
-+		: fls64(hi) + 64;
-+
-+	for (shift = 1; shift <= 8; shift++)
-+		if (bits < bits_table[shift - 1])
-+			goto got_shift;
-+
-+	BUG();
-+got_shift:
-+	bytes = byte_table[shift - 1];
-+
-+	BUG_ON(out + bytes > end);
-+
-+	memcpy(out, (u8 *) in + 16 - bytes, bytes);
-+	*out |= (1 << 8) >> shift;
-+
-+	return bytes;
-+}
-+
-+static int inode_decode_field(const u8 *in, const u8 *end,
-+			      u64 out[2], unsigned *out_bits)
-+{
-+	__be64 be[2] = { 0, 0 };
-+	unsigned bytes, shift;
-+	u8 *p;
-+
-+	if (in >= end)
-+		return -1;
-+
-+	if (!*in)
-+		return -1;
-+
-+	/*
-+	 * position of highest set bit indicates number of bytes:
-+	 * shift = number of bits to remove in high byte:
-+	 */
-+	shift	= 8 - __fls(*in); /* 1 <= shift <= 8 */
-+	bytes	= byte_table[shift - 1];
-+
-+	if (in + bytes > end)
-+		return -1;
-+
-+	p = (u8 *) be + 16 - bytes;
-+	memcpy(p, in, bytes);
-+	*p ^= (1 << 8) >> shift;
-+
-+	out[0] = be64_to_cpu(be[0]);
-+	out[1] = be64_to_cpu(be[1]);
-+	*out_bits = out[0] ? 64 + fls64(out[0]) : fls64(out[1]);
-+
-+	return bytes;
-+}
-+
-+void bch2_inode_pack(struct bkey_inode_buf *packed,
-+		     const struct bch_inode_unpacked *inode)
-+{
-+	u8 *out = packed->inode.v.fields;
-+	u8 *end = (void *) &packed[1];
-+	u8 *last_nonzero_field = out;
-+	unsigned nr_fields = 0, last_nonzero_fieldnr = 0;
-+	unsigned bytes;
-+
-+	bkey_inode_init(&packed->inode.k_i);
-+	packed->inode.k.p.offset	= inode->bi_inum;
-+	packed->inode.v.bi_hash_seed	= inode->bi_hash_seed;
-+	packed->inode.v.bi_flags	= cpu_to_le32(inode->bi_flags);
-+	packed->inode.v.bi_mode		= cpu_to_le16(inode->bi_mode);
-+
-+#define x(_name, _bits)					\
-+	out += inode_encode_field(out, end, 0, inode->_name);		\
-+	nr_fields++;							\
-+									\
-+	if (inode->_name) {						\
-+		last_nonzero_field = out;				\
-+		last_nonzero_fieldnr = nr_fields;			\
-+	}
-+
-+	BCH_INODE_FIELDS()
-+#undef  x
-+
-+	out = last_nonzero_field;
-+	nr_fields = last_nonzero_fieldnr;
-+
-+	bytes = out - (u8 *) &packed->inode.v;
-+	set_bkey_val_bytes(&packed->inode.k, bytes);
-+	memset_u64s_tail(&packed->inode.v, 0, bytes);
-+
-+	SET_INODE_NR_FIELDS(&packed->inode.v, nr_fields);
-+
-+	if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
-+		struct bch_inode_unpacked unpacked;
-+
-+		int ret = bch2_inode_unpack(inode_i_to_s_c(&packed->inode),
-+					   &unpacked);
-+		BUG_ON(ret);
-+		BUG_ON(unpacked.bi_inum		!= inode->bi_inum);
-+		BUG_ON(unpacked.bi_hash_seed	!= inode->bi_hash_seed);
-+		BUG_ON(unpacked.bi_mode		!= inode->bi_mode);
-+
-+#define x(_name, _bits)	BUG_ON(unpacked._name != inode->_name);
-+		BCH_INODE_FIELDS()
-+#undef  x
-+	}
-+}
-+
-+int bch2_inode_unpack(struct bkey_s_c_inode inode,
-+		      struct bch_inode_unpacked *unpacked)
-+{
-+	const u8 *in = inode.v->fields;
-+	const u8 *end = (void *) inode.v + bkey_val_bytes(inode.k);
-+	u64 field[2];
-+	unsigned fieldnr = 0, field_bits;
-+	int ret;
-+
-+	unpacked->bi_inum	= inode.k->p.offset;
-+	unpacked->bi_hash_seed	= inode.v->bi_hash_seed;
-+	unpacked->bi_flags	= le32_to_cpu(inode.v->bi_flags);
-+	unpacked->bi_mode	= le16_to_cpu(inode.v->bi_mode);
-+
-+#define x(_name, _bits)					\
-+	if (fieldnr++ == INODE_NR_FIELDS(inode.v)) {			\
-+		memset(&unpacked->_name, 0,				\
-+		       sizeof(*unpacked) -				\
-+		       offsetof(struct bch_inode_unpacked, _name));	\
-+		return 0;						\
-+	}								\
-+									\
-+	ret = inode_decode_field(in, end, field, &field_bits);		\
-+	if (ret < 0)							\
-+		return ret;						\
-+									\
-+	if (field_bits > sizeof(unpacked->_name) * 8)			\
-+		return -1;						\
-+									\
-+	unpacked->_name = field[1];					\
-+	in += ret;
-+
-+	BCH_INODE_FIELDS()
-+#undef  x
-+
-+	/* XXX: signal if there were more fields than expected? */
-+
-+	return 0;
-+}
-+
-+struct btree_iter *bch2_inode_peek(struct btree_trans *trans,
-+				   struct bch_inode_unpacked *inode,
-+				   u64 inum, unsigned flags)
-+{
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	iter = bch2_trans_get_iter(trans, BTREE_ID_INODES, POS(0, inum),
-+				   BTREE_ITER_SLOTS|flags);
-+	if (IS_ERR(iter))
-+		return iter;
-+
-+	k = bch2_btree_iter_peek_slot(iter);
-+	ret = bkey_err(k);
-+	if (ret)
-+		goto err;
-+
-+	ret = k.k->type == KEY_TYPE_inode ? 0 : -EIO;
-+	if (ret)
-+		goto err;
-+
-+	ret = bch2_inode_unpack(bkey_s_c_to_inode(k), inode);
-+	if (ret)
-+		goto err;
-+
-+	return iter;
-+err:
-+	bch2_trans_iter_put(trans, iter);
-+	return ERR_PTR(ret);
-+}
-+
-+int bch2_inode_write(struct btree_trans *trans,
-+		     struct btree_iter *iter,
-+		     struct bch_inode_unpacked *inode)
-+{
-+	struct bkey_inode_buf *inode_p;
-+
-+	inode_p = bch2_trans_kmalloc(trans, sizeof(*inode_p));
-+	if (IS_ERR(inode_p))
-+		return PTR_ERR(inode_p);
-+
-+	bch2_inode_pack(inode_p, inode);
-+	bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0);
-+	return 0;
-+}
-+
-+const char *bch2_inode_invalid(const struct bch_fs *c, struct bkey_s_c k)
-+{
-+		struct bkey_s_c_inode inode = bkey_s_c_to_inode(k);
-+		struct bch_inode_unpacked unpacked;
-+
-+	if (k.k->p.inode)
-+		return "nonzero k.p.inode";
-+
-+	if (bkey_val_bytes(k.k) < sizeof(struct bch_inode))
-+		return "incorrect value size";
-+
-+	if (k.k->p.offset < BLOCKDEV_INODE_MAX)
-+		return "fs inode in blockdev range";
-+
-+	if (INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR)
-+		return "invalid str hash type";
-+
-+	if (bch2_inode_unpack(inode, &unpacked))
-+		return "invalid variable length fields";
-+
-+	if (unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1)
-+		return "invalid data checksum type";
-+
-+	if (unpacked.bi_compression >= BCH_COMPRESSION_OPT_NR + 1)
-+		return "invalid data checksum type";
-+
-+	if ((unpacked.bi_flags & BCH_INODE_UNLINKED) &&
-+	    unpacked.bi_nlink != 0)
-+		return "flagged as unlinked but bi_nlink != 0";
-+
-+	return NULL;
-+}
-+
-+void bch2_inode_to_text(struct printbuf *out, struct bch_fs *c,
-+		       struct bkey_s_c k)
-+{
-+	struct bkey_s_c_inode inode = bkey_s_c_to_inode(k);
-+	struct bch_inode_unpacked unpacked;
-+
-+	if (bch2_inode_unpack(inode, &unpacked)) {
-+		pr_buf(out, "(unpack error)");
-+		return;
-+	}
-+
-+#define x(_name, _bits)						\
-+	pr_buf(out, #_name ": %llu ", (u64) unpacked._name);
-+	BCH_INODE_FIELDS()
-+#undef  x
-+}
-+
-+const char *bch2_inode_generation_invalid(const struct bch_fs *c,
-+					  struct bkey_s_c k)
-+{
-+	if (k.k->p.inode)
-+		return "nonzero k.p.inode";
-+
-+	if (bkey_val_bytes(k.k) != sizeof(struct bch_inode_generation))
-+		return "incorrect value size";
-+
-+	return NULL;
-+}
-+
-+void bch2_inode_generation_to_text(struct printbuf *out, struct bch_fs *c,
-+				   struct bkey_s_c k)
-+{
-+	struct bkey_s_c_inode_generation gen = bkey_s_c_to_inode_generation(k);
-+
-+	pr_buf(out, "generation: %u", le32_to_cpu(gen.v->bi_generation));
-+}
-+
-+void bch2_inode_init_early(struct bch_fs *c,
-+			   struct bch_inode_unpacked *inode_u)
-+{
-+	enum bch_str_hash_type str_hash =
-+		bch2_str_hash_opt_to_type(c, c->opts.str_hash);
-+
-+	memset(inode_u, 0, sizeof(*inode_u));
-+
-+	/* ick */
-+	inode_u->bi_flags |= str_hash << INODE_STR_HASH_OFFSET;
-+	get_random_bytes(&inode_u->bi_hash_seed,
-+			 sizeof(inode_u->bi_hash_seed));
-+}
-+
-+void bch2_inode_init_late(struct bch_inode_unpacked *inode_u, u64 now,
-+			  uid_t uid, gid_t gid, umode_t mode, dev_t rdev,
-+			  struct bch_inode_unpacked *parent)
-+{
-+	inode_u->bi_mode	= mode;
-+	inode_u->bi_uid		= uid;
-+	inode_u->bi_gid		= gid;
-+	inode_u->bi_dev		= rdev;
-+	inode_u->bi_atime	= now;
-+	inode_u->bi_mtime	= now;
-+	inode_u->bi_ctime	= now;
-+	inode_u->bi_otime	= now;
-+
-+	if (parent && parent->bi_mode & S_ISGID) {
-+		inode_u->bi_gid = parent->bi_gid;
-+		if (S_ISDIR(mode))
-+			inode_u->bi_mode |= S_ISGID;
-+	}
-+
-+	if (parent) {
-+#define x(_name, ...)	inode_u->bi_##_name = parent->bi_##_name;
-+		BCH_INODE_OPTS()
-+#undef x
-+	}
-+}
-+
-+void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u,
-+		     uid_t uid, gid_t gid, umode_t mode, dev_t rdev,
-+		     struct bch_inode_unpacked *parent)
-+{
-+	bch2_inode_init_early(c, inode_u);
-+	bch2_inode_init_late(inode_u, bch2_current_time(c),
-+			     uid, gid, mode, rdev, parent);
-+}
-+
-+static inline u32 bkey_generation(struct bkey_s_c k)
-+{
-+	switch (k.k->type) {
-+	case KEY_TYPE_inode:
-+		BUG();
-+	case KEY_TYPE_inode_generation:
-+		return le32_to_cpu(bkey_s_c_to_inode_generation(k).v->bi_generation);
-+	default:
-+		return 0;
-+	}
-+}
-+
-+int bch2_inode_create(struct btree_trans *trans,
-+		      struct bch_inode_unpacked *inode_u,
-+		      u64 min, u64 max, u64 *hint)
-+{
-+	struct bkey_inode_buf *inode_p;
-+	struct btree_iter *iter = NULL;
-+	struct bkey_s_c k;
-+	u64 start;
-+	int ret;
-+
-+	if (!max)
-+		max = ULLONG_MAX;
-+
-+	if (trans->c->opts.inodes_32bit)
-+		max = min_t(u64, max, U32_MAX);
-+
-+	start = READ_ONCE(*hint);
-+
-+	if (start >= max || start < min)
-+		start = min;
-+
-+	inode_p = bch2_trans_kmalloc(trans, sizeof(*inode_p));
-+	if (IS_ERR(inode_p))
-+		return PTR_ERR(inode_p);
-+again:
-+	for_each_btree_key(trans, iter, BTREE_ID_INODES, POS(0, start),
-+			   BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
-+		if (bkey_cmp(iter->pos, POS(0, max)) > 0)
-+			break;
-+
-+		if (k.k->type != KEY_TYPE_inode)
-+			goto found_slot;
-+	}
-+
-+	bch2_trans_iter_put(trans, iter);
-+
-+	if (ret)
-+		return ret;
-+
-+	if (start != min) {
-+		/* Retry from start */
-+		start = min;
-+		goto again;
-+	}
-+
-+	return -ENOSPC;
-+found_slot:
-+	*hint			= k.k->p.offset;
-+	inode_u->bi_inum	= k.k->p.offset;
-+	inode_u->bi_generation	= bkey_generation(k);
-+
-+	bch2_inode_pack(inode_p, inode_u);
-+	bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0);
-+	bch2_trans_iter_put(trans, iter);
-+	return 0;
-+}
-+
-+int bch2_inode_rm(struct bch_fs *c, u64 inode_nr)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_i_inode_generation delete;
-+	struct bpos start = POS(inode_nr, 0);
-+	struct bpos end = POS(inode_nr + 1, 0);
-+	int ret;
-+
-+	/*
-+	 * If this was a directory, there shouldn't be any real dirents left -
-+	 * but there could be whiteouts (from hash collisions) that we should
-+	 * delete:
-+	 *
-+	 * XXX: the dirent could ideally would delete whiteouts when they're no
-+	 * longer needed
-+	 */
-+	ret   = bch2_btree_delete_range(c, BTREE_ID_EXTENTS,
-+					start, end, NULL) ?:
-+		bch2_btree_delete_range(c, BTREE_ID_XATTRS,
-+					start, end, NULL) ?:
-+		bch2_btree_delete_range(c, BTREE_ID_DIRENTS,
-+					start, end, NULL);
-+	if (ret)
-+		return ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES, POS(0, inode_nr),
-+				   BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
-+	do {
-+		struct bkey_s_c k = bch2_btree_iter_peek_slot(iter);
-+		u32 bi_generation = 0;
-+
-+		ret = bkey_err(k);
-+		if (ret)
-+			break;
-+
-+		bch2_fs_inconsistent_on(k.k->type != KEY_TYPE_inode, c,
-+					"inode %llu not found when deleting",
-+					inode_nr);
-+
-+		switch (k.k->type) {
-+		case KEY_TYPE_inode: {
-+			struct bch_inode_unpacked inode_u;
-+
-+			if (!bch2_inode_unpack(bkey_s_c_to_inode(k), &inode_u))
-+				bi_generation = inode_u.bi_generation + 1;
-+			break;
-+		}
-+		case KEY_TYPE_inode_generation: {
-+			struct bkey_s_c_inode_generation g =
-+				bkey_s_c_to_inode_generation(k);
-+			bi_generation = le32_to_cpu(g.v->bi_generation);
-+			break;
-+		}
-+		}
-+
-+		if (!bi_generation) {
-+			bkey_init(&delete.k);
-+			delete.k.p.offset = inode_nr;
-+		} else {
-+			bkey_inode_generation_init(&delete.k_i);
-+			delete.k.p.offset = inode_nr;
-+			delete.v.bi_generation = cpu_to_le32(bi_generation);
-+		}
-+
-+		bch2_trans_update(&trans, iter, &delete.k_i, 0);
-+
-+		ret = bch2_trans_commit(&trans, NULL, NULL,
-+					BTREE_INSERT_NOFAIL);
-+	} while (ret == -EINTR);
-+
-+	bch2_trans_exit(&trans);
-+	return ret;
-+}
-+
-+int bch2_inode_find_by_inum_trans(struct btree_trans *trans, u64 inode_nr,
-+				  struct bch_inode_unpacked *inode)
-+{
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	iter = bch2_trans_get_iter(trans, BTREE_ID_INODES,
-+			POS(0, inode_nr), BTREE_ITER_SLOTS);
-+	if (IS_ERR(iter))
-+		return PTR_ERR(iter);
-+
-+	k = bch2_btree_iter_peek_slot(iter);
-+	ret = bkey_err(k);
-+	if (ret)
-+		goto err;
-+
-+	ret = k.k->type == KEY_TYPE_inode
-+		? bch2_inode_unpack(bkey_s_c_to_inode(k), inode)
-+		: -ENOENT;
-+err:
-+	bch2_trans_iter_put(trans, iter);
-+	return ret;
-+}
-+
-+int bch2_inode_find_by_inum(struct bch_fs *c, u64 inode_nr,
-+			    struct bch_inode_unpacked *inode)
-+{
-+	return bch2_trans_do(c, NULL, NULL, 0,
-+		bch2_inode_find_by_inum_trans(&trans, inode_nr, inode));
-+}
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+void bch2_inode_pack_test(void)
-+{
-+	struct bch_inode_unpacked *u, test_inodes[] = {
-+		{
-+			.bi_atime	= U64_MAX,
-+			.bi_ctime	= U64_MAX,
-+			.bi_mtime	= U64_MAX,
-+			.bi_otime	= U64_MAX,
-+			.bi_size	= U64_MAX,
-+			.bi_sectors	= U64_MAX,
-+			.bi_uid		= U32_MAX,
-+			.bi_gid		= U32_MAX,
-+			.bi_nlink	= U32_MAX,
-+			.bi_generation	= U32_MAX,
-+			.bi_dev		= U32_MAX,
-+		},
-+	};
-+
-+	for (u = test_inodes;
-+	     u < test_inodes + ARRAY_SIZE(test_inodes);
-+	     u++) {
-+		struct bkey_inode_buf p;
-+
-+		bch2_inode_pack(&p, u);
-+	}
-+}
-+#endif
-diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h
-new file mode 100644
-index 000000000000..bb759a46dc41
---- /dev/null
-+++ b/fs/bcachefs/inode.h
-@@ -0,0 +1,177 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_INODE_H
-+#define _BCACHEFS_INODE_H
-+
-+#include "opts.h"
-+
-+extern const char * const bch2_inode_opts[];
-+
-+const char *bch2_inode_invalid(const struct bch_fs *, struct bkey_s_c);
-+void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-+
-+#define bch2_bkey_ops_inode (struct bkey_ops) {		\
-+	.key_invalid	= bch2_inode_invalid,		\
-+	.val_to_text	= bch2_inode_to_text,		\
-+}
-+
-+const char *bch2_inode_generation_invalid(const struct bch_fs *,
-+					  struct bkey_s_c);
-+void bch2_inode_generation_to_text(struct printbuf *, struct bch_fs *,
-+				   struct bkey_s_c);
-+
-+#define bch2_bkey_ops_inode_generation (struct bkey_ops) {	\
-+	.key_invalid	= bch2_inode_generation_invalid,	\
-+	.val_to_text	= bch2_inode_generation_to_text,	\
-+}
-+
-+struct bch_inode_unpacked {
-+	u64			bi_inum;
-+	__le64			bi_hash_seed;
-+	u32			bi_flags;
-+	u16			bi_mode;
-+
-+#define x(_name, _bits)	u##_bits _name;
-+	BCH_INODE_FIELDS()
-+#undef  x
-+};
-+
-+struct bkey_inode_buf {
-+	struct bkey_i_inode	inode;
-+
-+#define x(_name, _bits)		+ 8 + _bits / 8
-+	u8		_pad[0 + BCH_INODE_FIELDS()];
-+#undef  x
-+} __attribute__((packed, aligned(8)));
-+
-+void bch2_inode_pack(struct bkey_inode_buf *, const struct bch_inode_unpacked *);
-+int bch2_inode_unpack(struct bkey_s_c_inode, struct bch_inode_unpacked *);
-+
-+struct btree_iter *bch2_inode_peek(struct btree_trans *,
-+			struct bch_inode_unpacked *, u64, unsigned);
-+int bch2_inode_write(struct btree_trans *, struct btree_iter *,
-+		     struct bch_inode_unpacked *);
-+
-+void bch2_inode_init_early(struct bch_fs *,
-+			   struct bch_inode_unpacked *);
-+void bch2_inode_init_late(struct bch_inode_unpacked *, u64,
-+			  uid_t, gid_t, umode_t, dev_t,
-+			  struct bch_inode_unpacked *);
-+void bch2_inode_init(struct bch_fs *, struct bch_inode_unpacked *,
-+		     uid_t, gid_t, umode_t, dev_t,
-+		     struct bch_inode_unpacked *);
-+
-+int bch2_inode_create(struct btree_trans *,
-+		      struct bch_inode_unpacked *,
-+		      u64, u64, u64 *);
-+
-+int bch2_inode_rm(struct bch_fs *, u64);
-+
-+int bch2_inode_find_by_inum_trans(struct btree_trans *, u64,
-+				  struct bch_inode_unpacked *);
-+int bch2_inode_find_by_inum(struct bch_fs *, u64, struct bch_inode_unpacked *);
-+
-+static inline struct bch_io_opts bch2_inode_opts_get(struct bch_inode_unpacked *inode)
-+{
-+	struct bch_io_opts ret = { 0 };
-+
-+#define x(_name, _bits)					\
-+	if (inode->bi_##_name)						\
-+		opt_set(ret, _name, inode->bi_##_name - 1);
-+	BCH_INODE_OPTS()
-+#undef x
-+	return ret;
-+}
-+
-+static inline void bch2_inode_opt_set(struct bch_inode_unpacked *inode,
-+				      enum inode_opt_id id, u64 v)
-+{
-+	switch (id) {
-+#define x(_name, ...)							\
-+	case Inode_opt_##_name:						\
-+		inode->bi_##_name = v;					\
-+		break;
-+	BCH_INODE_OPTS()
-+#undef x
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static inline u64 bch2_inode_opt_get(struct bch_inode_unpacked *inode,
-+				     enum inode_opt_id id)
-+{
-+	switch (id) {
-+#define x(_name, ...)							\
-+	case Inode_opt_##_name:						\
-+		return inode->bi_##_name;
-+	BCH_INODE_OPTS()
-+#undef x
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static inline struct bch_io_opts
-+io_opts(struct bch_fs *c, struct bch_inode_unpacked *inode)
-+{
-+	struct bch_io_opts opts = bch2_opts_to_inode_opts(c->opts);
-+
-+	bch2_io_opts_apply(&opts, bch2_inode_opts_get(inode));
-+	return opts;
-+}
-+
-+static inline u8 mode_to_type(umode_t mode)
-+{
-+	return (mode >> 12) & 15;
-+}
-+
-+/* i_nlink: */
-+
-+static inline unsigned nlink_bias(umode_t mode)
-+{
-+	return S_ISDIR(mode) ? 2 : 1;
-+}
-+
-+static inline void bch2_inode_nlink_inc(struct bch_inode_unpacked *bi)
-+{
-+	if (bi->bi_flags & BCH_INODE_UNLINKED)
-+		bi->bi_flags &= ~BCH_INODE_UNLINKED;
-+	else
-+		bi->bi_nlink++;
-+}
-+
-+static inline void bch2_inode_nlink_dec(struct bch_inode_unpacked *bi)
-+{
-+	BUG_ON(bi->bi_flags & BCH_INODE_UNLINKED);
-+	if (bi->bi_nlink)
-+		bi->bi_nlink--;
-+	else
-+		bi->bi_flags |= BCH_INODE_UNLINKED;
-+}
-+
-+static inline unsigned bch2_inode_nlink_get(struct bch_inode_unpacked *bi)
-+{
-+	return bi->bi_flags & BCH_INODE_UNLINKED
-+		  ? 0
-+		  : bi->bi_nlink + nlink_bias(bi->bi_mode);
-+}
-+
-+static inline void bch2_inode_nlink_set(struct bch_inode_unpacked *bi,
-+					unsigned nlink)
-+{
-+	if (nlink) {
-+		bi->bi_nlink = nlink - nlink_bias(bi->bi_mode);
-+		bi->bi_flags &= ~BCH_INODE_UNLINKED;
-+	} else {
-+		bi->bi_nlink = 0;
-+		bi->bi_flags |= BCH_INODE_UNLINKED;
-+	}
-+}
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+void bch2_inode_pack_test(void);
-+#else
-+static inline void bch2_inode_pack_test(void) {}
-+#endif
-+
-+#endif /* _BCACHEFS_INODE_H */
-diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c
-new file mode 100644
-index 000000000000..0a4b4eed465c
---- /dev/null
-+++ b/fs/bcachefs/io.c
-@@ -0,0 +1,2389 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Some low level IO code, and hacks for various block layer limitations
-+ *
-+ * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
-+ * Copyright 2012 Google, Inc.
-+ */
-+
-+#include "bcachefs.h"
-+#include "alloc_background.h"
-+#include "alloc_foreground.h"
-+#include "bkey_on_stack.h"
-+#include "bset.h"
-+#include "btree_update.h"
-+#include "buckets.h"
-+#include "checksum.h"
-+#include "compress.h"
-+#include "clock.h"
-+#include "debug.h"
-+#include "disk_groups.h"
-+#include "ec.h"
-+#include "error.h"
-+#include "extent_update.h"
-+#include "inode.h"
-+#include "io.h"
-+#include "journal.h"
-+#include "keylist.h"
-+#include "move.h"
-+#include "rebalance.h"
-+#include "super.h"
-+#include "super-io.h"
-+
-+#include <linux/blkdev.h>
-+#include <linux/random.h>
-+#include <linux/sched/mm.h>
-+
-+#include <trace/events/bcachefs.h>
-+
-+const char *bch2_blk_status_to_str(blk_status_t status)
-+{
-+	if (status == BLK_STS_REMOVED)
-+		return "device removed";
-+	return blk_status_to_str(status);
-+}
-+
-+static bool bch2_target_congested(struct bch_fs *c, u16 target)
-+{
-+	const struct bch_devs_mask *devs;
-+	unsigned d, nr = 0, total = 0;
-+	u64 now = local_clock(), last;
-+	s64 congested;
-+	struct bch_dev *ca;
-+
-+	if (!target)
-+		return false;
-+
-+	rcu_read_lock();
-+	devs = bch2_target_to_mask(c, target) ?:
-+		&c->rw_devs[BCH_DATA_user];
-+
-+	for_each_set_bit(d, devs->d, BCH_SB_MEMBERS_MAX) {
-+		ca = rcu_dereference(c->devs[d]);
-+		if (!ca)
-+			continue;
-+
-+		congested = atomic_read(&ca->congested);
-+		last = READ_ONCE(ca->congested_last);
-+		if (time_after64(now, last))
-+			congested -= (now - last) >> 12;
-+
-+		total += max(congested, 0LL);
-+		nr++;
-+	}
-+	rcu_read_unlock();
-+
-+	return bch2_rand_range(nr * CONGESTED_MAX) < total;
-+}
-+
-+static inline void bch2_congested_acct(struct bch_dev *ca, u64 io_latency,
-+				       u64 now, int rw)
-+{
-+	u64 latency_capable =
-+		ca->io_latency[rw].quantiles.entries[QUANTILE_IDX(1)].m;
-+	/* ideally we'd be taking into account the device's variance here: */
-+	u64 latency_threshold = latency_capable << (rw == READ ? 2 : 3);
-+	s64 latency_over = io_latency - latency_threshold;
-+
-+	if (latency_threshold && latency_over > 0) {
-+		/*
-+		 * bump up congested by approximately latency_over * 4 /
-+		 * latency_threshold - we don't need much accuracy here so don't
-+		 * bother with the divide:
-+		 */
-+		if (atomic_read(&ca->congested) < CONGESTED_MAX)
-+			atomic_add(latency_over >>
-+				   max_t(int, ilog2(latency_threshold) - 2, 0),
-+				   &ca->congested);
-+
-+		ca->congested_last = now;
-+	} else if (atomic_read(&ca->congested) > 0) {
-+		atomic_dec(&ca->congested);
-+	}
-+}
-+
-+void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw)
-+{
-+	atomic64_t *latency = &ca->cur_latency[rw];
-+	u64 now = local_clock();
-+	u64 io_latency = time_after64(now, submit_time)
-+		? now - submit_time
-+		: 0;
-+	u64 old, new, v = atomic64_read(latency);
-+
-+	do {
-+		old = v;
-+
-+		/*
-+		 * If the io latency was reasonably close to the current
-+		 * latency, skip doing the update and atomic operation - most of
-+		 * the time:
-+		 */
-+		if (abs((int) (old - io_latency)) < (old >> 1) &&
-+		    now & ~(~0 << 5))
-+			break;
-+
-+		new = ewma_add(old, io_latency, 5);
-+	} while ((v = atomic64_cmpxchg(latency, old, new)) != old);
-+
-+	bch2_congested_acct(ca, io_latency, now, rw);
-+
-+	__bch2_time_stats_update(&ca->io_latency[rw], submit_time, now);
-+}
-+
-+/* Allocate, free from mempool: */
-+
-+void bch2_bio_free_pages_pool(struct bch_fs *c, struct bio *bio)
-+{
-+	struct bvec_iter_all iter;
-+	struct bio_vec *bv;
-+
-+	bio_for_each_segment_all(bv, bio, iter)
-+		if (bv->bv_page != ZERO_PAGE(0))
-+			mempool_free(bv->bv_page, &c->bio_bounce_pages);
-+	bio->bi_vcnt = 0;
-+}
-+
-+static struct page *__bio_alloc_page_pool(struct bch_fs *c, bool *using_mempool)
-+{
-+	struct page *page;
-+
-+	if (likely(!*using_mempool)) {
-+		page = alloc_page(GFP_NOIO);
-+		if (unlikely(!page)) {
-+			mutex_lock(&c->bio_bounce_pages_lock);
-+			*using_mempool = true;
-+			goto pool_alloc;
-+
-+		}
-+	} else {
-+pool_alloc:
-+		page = mempool_alloc(&c->bio_bounce_pages, GFP_NOIO);
-+	}
-+
-+	return page;
-+}
-+
-+void bch2_bio_alloc_pages_pool(struct bch_fs *c, struct bio *bio,
-+			       size_t size)
-+{
-+	bool using_mempool = false;
-+
-+	while (size) {
-+		struct page *page = __bio_alloc_page_pool(c, &using_mempool);
-+		unsigned len = min(PAGE_SIZE, size);
-+
-+		BUG_ON(!bio_add_page(bio, page, len, 0));
-+		size -= len;
-+	}
-+
-+	if (using_mempool)
-+		mutex_unlock(&c->bio_bounce_pages_lock);
-+}
-+
-+/* Extent update path: */
-+
-+static int sum_sector_overwrites(struct btree_trans *trans,
-+				 struct btree_iter *extent_iter,
-+				 struct bkey_i *new,
-+				 bool may_allocate,
-+				 bool *maybe_extending,
-+				 s64 *delta)
-+{
-+	struct btree_iter *iter;
-+	struct bkey_s_c old;
-+	int ret = 0;
-+
-+	*maybe_extending = true;
-+	*delta = 0;
-+
-+	iter = bch2_trans_copy_iter(trans, extent_iter);
-+	if (IS_ERR(iter))
-+		return PTR_ERR(iter);
-+
-+	for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, old, ret) {
-+		if (!may_allocate &&
-+		    bch2_bkey_nr_ptrs_fully_allocated(old) <
-+		    bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(new))) {
-+			ret = -ENOSPC;
-+			break;
-+		}
-+
-+		*delta += (min(new->k.p.offset,
-+			      old.k->p.offset) -
-+			  max(bkey_start_offset(&new->k),
-+			      bkey_start_offset(old.k))) *
-+			(bkey_extent_is_allocation(&new->k) -
-+			 bkey_extent_is_allocation(old.k));
-+
-+		if (bkey_cmp(old.k->p, new->k.p) >= 0) {
-+			/*
-+			 * Check if there's already data above where we're
-+			 * going to be writing to - this means we're definitely
-+			 * not extending the file:
-+			 *
-+			 * Note that it's not sufficient to check if there's
-+			 * data up to the sector offset we're going to be
-+			 * writing to, because i_size could be up to one block
-+			 * less:
-+			 */
-+			if (!bkey_cmp(old.k->p, new->k.p))
-+				old = bch2_btree_iter_next(iter);
-+
-+			if (old.k && !bkey_err(old) &&
-+			    old.k->p.inode == extent_iter->pos.inode &&
-+			    bkey_extent_is_data(old.k))
-+				*maybe_extending = false;
-+
-+			break;
-+		}
-+	}
-+
-+	bch2_trans_iter_put(trans, iter);
-+	return ret;
-+}
-+
-+int bch2_extent_update(struct btree_trans *trans,
-+		       struct btree_iter *iter,
-+		       struct bkey_i *k,
-+		       struct disk_reservation *disk_res,
-+		       u64 *journal_seq,
-+		       u64 new_i_size,
-+		       s64 *i_sectors_delta)
-+{
-+	/* this must live until after bch2_trans_commit(): */
-+	struct bkey_inode_buf inode_p;
-+	bool extending = false;
-+	s64 delta = 0;
-+	int ret;
-+
-+	ret = bch2_extent_trim_atomic(k, iter);
-+	if (ret)
-+		return ret;
-+
-+	ret = sum_sector_overwrites(trans, iter, k,
-+			disk_res && disk_res->sectors != 0,
-+			&extending, &delta);
-+	if (ret)
-+		return ret;
-+
-+	new_i_size = extending
-+		? min(k->k.p.offset << 9, new_i_size)
-+		: 0;
-+
-+	if (delta || new_i_size) {
-+		struct btree_iter *inode_iter;
-+		struct bch_inode_unpacked inode_u;
-+
-+		inode_iter = bch2_inode_peek(trans, &inode_u,
-+				k->k.p.inode, BTREE_ITER_INTENT);
-+		if (IS_ERR(inode_iter))
-+			return PTR_ERR(inode_iter);
-+
-+		/*
-+		 * XXX:
-+		 * writeback can race a bit with truncate, because truncate
-+		 * first updates the inode then truncates the pagecache. This is
-+		 * ugly, but lets us preserve the invariant that the in memory
-+		 * i_size is always >= the on disk i_size.
-+		 *
-+		BUG_ON(new_i_size > inode_u.bi_size &&
-+		       (inode_u.bi_flags & BCH_INODE_I_SIZE_DIRTY));
-+		 */
-+		BUG_ON(new_i_size > inode_u.bi_size && !extending);
-+
-+		if (!(inode_u.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
-+		    new_i_size > inode_u.bi_size)
-+			inode_u.bi_size = new_i_size;
-+		else
-+			new_i_size = 0;
-+
-+		inode_u.bi_sectors += delta;
-+
-+		if (delta || new_i_size) {
-+			bch2_inode_pack(&inode_p, &inode_u);
-+			bch2_trans_update(trans, inode_iter,
-+					  &inode_p.inode.k_i, 0);
-+		}
-+
-+		bch2_trans_iter_put(trans, inode_iter);
-+	}
-+
-+	bch2_trans_update(trans, iter, k, 0);
-+
-+	ret = bch2_trans_commit(trans, disk_res, journal_seq,
-+				BTREE_INSERT_NOCHECK_RW|
-+				BTREE_INSERT_NOFAIL|
-+				BTREE_INSERT_USE_RESERVE);
-+	if (!ret && i_sectors_delta)
-+		*i_sectors_delta += delta;
-+
-+	return ret;
-+}
-+
-+int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter,
-+		   struct bpos end, u64 *journal_seq,
-+		   s64 *i_sectors_delta)
-+{
-+	struct bch_fs *c	= trans->c;
-+	unsigned max_sectors	= KEY_SIZE_MAX & (~0 << c->block_bits);
-+	struct bkey_s_c k;
-+	int ret = 0, ret2 = 0;
-+
-+	while ((k = bch2_btree_iter_peek(iter)).k &&
-+	       bkey_cmp(iter->pos, end) < 0) {
-+		struct disk_reservation disk_res =
-+			bch2_disk_reservation_init(c, 0);
-+		struct bkey_i delete;
-+
-+		bch2_trans_begin(trans);
-+
-+		ret = bkey_err(k);
-+		if (ret)
-+			goto btree_err;
-+
-+		bkey_init(&delete.k);
-+		delete.k.p = iter->pos;
-+
-+		/* create the biggest key we can */
-+		bch2_key_resize(&delete.k, max_sectors);
-+		bch2_cut_back(end, &delete);
-+
-+		ret = bch2_extent_update(trans, iter, &delete,
-+				&disk_res, journal_seq,
-+				0, i_sectors_delta);
-+		bch2_disk_reservation_put(c, &disk_res);
-+btree_err:
-+		if (ret == -EINTR) {
-+			ret2 = ret;
-+			ret = 0;
-+		}
-+		if (ret)
-+			break;
-+	}
-+
-+	if (bkey_cmp(iter->pos, end) > 0) {
-+		bch2_btree_iter_set_pos(iter, end);
-+		ret = bch2_btree_iter_traverse(iter);
-+	}
-+
-+	return ret ?: ret2;
-+}
-+
-+int bch2_fpunch(struct bch_fs *c, u64 inum, u64 start, u64 end,
-+		u64 *journal_seq, s64 *i_sectors_delta)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	int ret = 0;
-+
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024);
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
-+				   POS(inum, start),
-+				   BTREE_ITER_INTENT);
-+
-+	ret = bch2_fpunch_at(&trans, iter, POS(inum, end),
-+			     journal_seq, i_sectors_delta);
-+	bch2_trans_exit(&trans);
-+
-+	if (ret == -EINTR)
-+		ret = 0;
-+
-+	return ret;
-+}
-+
-+int bch2_write_index_default(struct bch_write_op *op)
-+{
-+	struct bch_fs *c = op->c;
-+	struct bkey_on_stack sk;
-+	struct keylist *keys = &op->insert_keys;
-+	struct bkey_i *k = bch2_keylist_front(keys);
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	int ret;
-+
-+	bkey_on_stack_init(&sk);
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
-+				   bkey_start_pos(&k->k),
-+				   BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
-+
-+	do {
-+		bch2_trans_begin(&trans);
-+
-+		k = bch2_keylist_front(keys);
-+
-+		bkey_on_stack_realloc(&sk, c, k->k.u64s);
-+		bkey_copy(sk.k, k);
-+		bch2_cut_front(iter->pos, sk.k);
-+
-+		ret = bch2_extent_update(&trans, iter, sk.k,
-+					 &op->res, op_journal_seq(op),
-+					 op->new_i_size, &op->i_sectors_delta);
-+		if (ret == -EINTR)
-+			continue;
-+		if (ret)
-+			break;
-+
-+		if (bkey_cmp(iter->pos, k->k.p) >= 0)
-+			bch2_keylist_pop_front(keys);
-+	} while (!bch2_keylist_empty(keys));
-+
-+	bch2_trans_exit(&trans);
-+	bkey_on_stack_exit(&sk, c);
-+
-+	return ret;
-+}
-+
-+/* Writes */
-+
-+void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
-+			       enum bch_data_type type,
-+			       const struct bkey_i *k)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(k));
-+	const struct bch_extent_ptr *ptr;
-+	struct bch_write_bio *n;
-+	struct bch_dev *ca;
-+
-+	BUG_ON(c->opts.nochanges);
-+
-+	bkey_for_each_ptr(ptrs, ptr) {
-+		BUG_ON(ptr->dev >= BCH_SB_MEMBERS_MAX ||
-+		       !c->devs[ptr->dev]);
-+
-+		ca = bch_dev_bkey_exists(c, ptr->dev);
-+
-+		if (to_entry(ptr + 1) < ptrs.end) {
-+			n = to_wbio(bio_clone_fast(&wbio->bio, GFP_NOIO,
-+						   &ca->replica_set));
-+
-+			n->bio.bi_end_io	= wbio->bio.bi_end_io;
-+			n->bio.bi_private	= wbio->bio.bi_private;
-+			n->parent		= wbio;
-+			n->split		= true;
-+			n->bounce		= false;
-+			n->put_bio		= true;
-+			n->bio.bi_opf		= wbio->bio.bi_opf;
-+			bio_inc_remaining(&wbio->bio);
-+		} else {
-+			n = wbio;
-+			n->split		= false;
-+		}
-+
-+		n->c			= c;
-+		n->dev			= ptr->dev;
-+		n->have_ioref		= bch2_dev_get_ioref(ca,
-+					type == BCH_DATA_btree ? READ : WRITE);
-+		n->submit_time		= local_clock();
-+		n->bio.bi_iter.bi_sector = ptr->offset;
-+
-+		if (!journal_flushes_device(ca))
-+			n->bio.bi_opf |= REQ_FUA;
-+
-+		if (likely(n->have_ioref)) {
-+			this_cpu_add(ca->io_done->sectors[WRITE][type],
-+				     bio_sectors(&n->bio));
-+
-+			bio_set_dev(&n->bio, ca->disk_sb.bdev);
-+			submit_bio(&n->bio);
-+		} else {
-+			n->bio.bi_status	= BLK_STS_REMOVED;
-+			bio_endio(&n->bio);
-+		}
-+	}
-+}
-+
-+static void __bch2_write(struct closure *);
-+
-+static void bch2_write_done(struct closure *cl)
-+{
-+	struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
-+	struct bch_fs *c = op->c;
-+
-+	if (!op->error && (op->flags & BCH_WRITE_FLUSH))
-+		op->error = bch2_journal_error(&c->journal);
-+
-+	bch2_disk_reservation_put(c, &op->res);
-+	percpu_ref_put(&c->writes);
-+	bch2_keylist_free(&op->insert_keys, op->inline_keys);
-+
-+	bch2_time_stats_update(&c->times[BCH_TIME_data_write], op->start_time);
-+
-+	if (!(op->flags & BCH_WRITE_FROM_INTERNAL))
-+		up(&c->io_in_flight);
-+
-+	if (op->end_io) {
-+		EBUG_ON(cl->parent);
-+		closure_debug_destroy(cl);
-+		op->end_io(op);
-+	} else {
-+		closure_return(cl);
-+	}
-+}
-+
-+/**
-+ * bch_write_index - after a write, update index to point to new data
-+ */
-+static void __bch2_write_index(struct bch_write_op *op)
-+{
-+	struct bch_fs *c = op->c;
-+	struct keylist *keys = &op->insert_keys;
-+	struct bch_extent_ptr *ptr;
-+	struct bkey_i *src, *dst = keys->keys, *n, *k;
-+	unsigned dev;
-+	int ret;
-+
-+	for (src = keys->keys; src != keys->top; src = n) {
-+		n = bkey_next(src);
-+
-+		if (bkey_extent_is_direct_data(&src->k)) {
-+			bch2_bkey_drop_ptrs(bkey_i_to_s(src), ptr,
-+					    test_bit(ptr->dev, op->failed.d));
-+
-+			if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(src))) {
-+				ret = -EIO;
-+				goto err;
-+			}
-+		}
-+
-+		if (dst != src)
-+			memmove_u64s_down(dst, src, src->u64s);
-+		dst = bkey_next(dst);
-+	}
-+
-+	keys->top = dst;
-+
-+	/*
-+	 * probably not the ideal place to hook this in, but I don't
-+	 * particularly want to plumb io_opts all the way through the btree
-+	 * update stack right now
-+	 */
-+	for_each_keylist_key(keys, k) {
-+		bch2_rebalance_add_key(c, bkey_i_to_s_c(k), &op->opts);
-+
-+		if (bch2_bkey_is_incompressible(bkey_i_to_s_c(k)))
-+			bch2_check_set_feature(op->c, BCH_FEATURE_incompressible);
-+
-+	}
-+
-+	if (!bch2_keylist_empty(keys)) {
-+		u64 sectors_start = keylist_sectors(keys);
-+		int ret = op->index_update_fn(op);
-+
-+		BUG_ON(ret == -EINTR);
-+		BUG_ON(keylist_sectors(keys) && !ret);
-+
-+		op->written += sectors_start - keylist_sectors(keys);
-+
-+		if (ret) {
-+			__bcache_io_error(c, "btree IO error %i", ret);
-+			op->error = ret;
-+		}
-+	}
-+out:
-+	/* If some a bucket wasn't written, we can't erasure code it: */
-+	for_each_set_bit(dev, op->failed.d, BCH_SB_MEMBERS_MAX)
-+		bch2_open_bucket_write_error(c, &op->open_buckets, dev);
-+
-+	bch2_open_buckets_put(c, &op->open_buckets);
-+	return;
-+err:
-+	keys->top = keys->keys;
-+	op->error = ret;
-+	goto out;
-+}
-+
-+static void bch2_write_index(struct closure *cl)
-+{
-+	struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
-+	struct bch_fs *c = op->c;
-+
-+	__bch2_write_index(op);
-+
-+	if (!(op->flags & BCH_WRITE_DONE)) {
-+		continue_at(cl, __bch2_write, index_update_wq(op));
-+	} else if (!op->error && (op->flags & BCH_WRITE_FLUSH)) {
-+		bch2_journal_flush_seq_async(&c->journal,
-+					     *op_journal_seq(op),
-+					     cl);
-+		continue_at(cl, bch2_write_done, index_update_wq(op));
-+	} else {
-+		continue_at_nobarrier(cl, bch2_write_done, NULL);
-+	}
-+}
-+
-+static void bch2_write_endio(struct bio *bio)
-+{
-+	struct closure *cl		= bio->bi_private;
-+	struct bch_write_op *op		= container_of(cl, struct bch_write_op, cl);
-+	struct bch_write_bio *wbio	= to_wbio(bio);
-+	struct bch_write_bio *parent	= wbio->split ? wbio->parent : NULL;
-+	struct bch_fs *c		= wbio->c;
-+	struct bch_dev *ca		= bch_dev_bkey_exists(c, wbio->dev);
-+
-+	if (bch2_dev_io_err_on(bio->bi_status, ca, "data write: %s",
-+			       bch2_blk_status_to_str(bio->bi_status)))
-+		set_bit(wbio->dev, op->failed.d);
-+
-+	if (wbio->have_ioref) {
-+		bch2_latency_acct(ca, wbio->submit_time, WRITE);
-+		percpu_ref_put(&ca->io_ref);
-+	}
-+
-+	if (wbio->bounce)
-+		bch2_bio_free_pages_pool(c, bio);
-+
-+	if (wbio->put_bio)
-+		bio_put(bio);
-+
-+	if (parent)
-+		bio_endio(&parent->bio);
-+	else if (!(op->flags & BCH_WRITE_SKIP_CLOSURE_PUT))
-+		closure_put(cl);
-+	else
-+		continue_at_nobarrier(cl, bch2_write_index, index_update_wq(op));
-+}
-+
-+static void init_append_extent(struct bch_write_op *op,
-+			       struct write_point *wp,
-+			       struct bversion version,
-+			       struct bch_extent_crc_unpacked crc)
-+{
-+	struct bch_fs *c = op->c;
-+	struct bkey_i_extent *e;
-+	struct open_bucket *ob;
-+	unsigned i;
-+
-+	BUG_ON(crc.compressed_size > wp->sectors_free);
-+	wp->sectors_free -= crc.compressed_size;
-+	op->pos.offset += crc.uncompressed_size;
-+
-+	e = bkey_extent_init(op->insert_keys.top);
-+	e->k.p		= op->pos;
-+	e->k.size	= crc.uncompressed_size;
-+	e->k.version	= version;
-+
-+	if (crc.csum_type ||
-+	    crc.compression_type ||
-+	    crc.nonce)
-+		bch2_extent_crc_append(&e->k_i, crc);
-+
-+	open_bucket_for_each(c, &wp->ptrs, ob, i) {
-+		struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
-+		union bch_extent_entry *end =
-+			bkey_val_end(bkey_i_to_s(&e->k_i));
-+
-+		end->ptr = ob->ptr;
-+		end->ptr.type = 1 << BCH_EXTENT_ENTRY_ptr;
-+		end->ptr.cached = !ca->mi.durability ||
-+			(op->flags & BCH_WRITE_CACHED) != 0;
-+		end->ptr.offset += ca->mi.bucket_size - ob->sectors_free;
-+
-+		e->k.u64s++;
-+
-+		BUG_ON(crc.compressed_size > ob->sectors_free);
-+		ob->sectors_free -= crc.compressed_size;
-+	}
-+
-+	bch2_keylist_push(&op->insert_keys);
-+}
-+
-+static struct bio *bch2_write_bio_alloc(struct bch_fs *c,
-+					struct write_point *wp,
-+					struct bio *src,
-+					bool *page_alloc_failed,
-+					void *buf)
-+{
-+	struct bch_write_bio *wbio;
-+	struct bio *bio;
-+	unsigned output_available =
-+		min(wp->sectors_free << 9, src->bi_iter.bi_size);
-+	unsigned pages = DIV_ROUND_UP(output_available +
-+				      (buf
-+				       ? ((unsigned long) buf & (PAGE_SIZE - 1))
-+				       : 0), PAGE_SIZE);
-+
-+	bio = bio_alloc_bioset(GFP_NOIO, pages, &c->bio_write);
-+	wbio			= wbio_init(bio);
-+	wbio->put_bio		= true;
-+	/* copy WRITE_SYNC flag */
-+	wbio->bio.bi_opf	= src->bi_opf;
-+
-+	if (buf) {
-+		bch2_bio_map(bio, buf, output_available);
-+		return bio;
-+	}
-+
-+	wbio->bounce		= true;
-+
-+	/*
-+	 * We can't use mempool for more than c->sb.encoded_extent_max
-+	 * worth of pages, but we'd like to allocate more if we can:
-+	 */
-+	bch2_bio_alloc_pages_pool(c, bio,
-+				  min_t(unsigned, output_available,
-+					c->sb.encoded_extent_max << 9));
-+
-+	if (bio->bi_iter.bi_size < output_available)
-+		*page_alloc_failed =
-+			bch2_bio_alloc_pages(bio,
-+					     output_available -
-+					     bio->bi_iter.bi_size,
-+					     GFP_NOFS) != 0;
-+
-+	return bio;
-+}
-+
-+static int bch2_write_rechecksum(struct bch_fs *c,
-+				 struct bch_write_op *op,
-+				 unsigned new_csum_type)
-+{
-+	struct bio *bio = &op->wbio.bio;
-+	struct bch_extent_crc_unpacked new_crc;
-+	int ret;
-+
-+	/* bch2_rechecksum_bio() can't encrypt or decrypt data: */
-+
-+	if (bch2_csum_type_is_encryption(op->crc.csum_type) !=
-+	    bch2_csum_type_is_encryption(new_csum_type))
-+		new_csum_type = op->crc.csum_type;
-+
-+	ret = bch2_rechecksum_bio(c, bio, op->version, op->crc,
-+				  NULL, &new_crc,
-+				  op->crc.offset, op->crc.live_size,
-+				  new_csum_type);
-+	if (ret)
-+		return ret;
-+
-+	bio_advance(bio, op->crc.offset << 9);
-+	bio->bi_iter.bi_size = op->crc.live_size << 9;
-+	op->crc = new_crc;
-+	return 0;
-+}
-+
-+static int bch2_write_decrypt(struct bch_write_op *op)
-+{
-+	struct bch_fs *c = op->c;
-+	struct nonce nonce = extent_nonce(op->version, op->crc);
-+	struct bch_csum csum;
-+
-+	if (!bch2_csum_type_is_encryption(op->crc.csum_type))
-+		return 0;
-+
-+	/*
-+	 * If we need to decrypt data in the write path, we'll no longer be able
-+	 * to verify the existing checksum (poly1305 mac, in this case) after
-+	 * it's decrypted - this is the last point we'll be able to reverify the
-+	 * checksum:
-+	 */
-+	csum = bch2_checksum_bio(c, op->crc.csum_type, nonce, &op->wbio.bio);
-+	if (bch2_crc_cmp(op->crc.csum, csum))
-+		return -EIO;
-+
-+	bch2_encrypt_bio(c, op->crc.csum_type, nonce, &op->wbio.bio);
-+	op->crc.csum_type = 0;
-+	op->crc.csum = (struct bch_csum) { 0, 0 };
-+	return 0;
-+}
-+
-+static enum prep_encoded_ret {
-+	PREP_ENCODED_OK,
-+	PREP_ENCODED_ERR,
-+	PREP_ENCODED_CHECKSUM_ERR,
-+	PREP_ENCODED_DO_WRITE,
-+} bch2_write_prep_encoded_data(struct bch_write_op *op, struct write_point *wp)
-+{
-+	struct bch_fs *c = op->c;
-+	struct bio *bio = &op->wbio.bio;
-+
-+	if (!(op->flags & BCH_WRITE_DATA_ENCODED))
-+		return PREP_ENCODED_OK;
-+
-+	BUG_ON(bio_sectors(bio) != op->crc.compressed_size);
-+
-+	/* Can we just write the entire extent as is? */
-+	if (op->crc.uncompressed_size == op->crc.live_size &&
-+	    op->crc.compressed_size <= wp->sectors_free &&
-+	    (op->crc.compression_type == op->compression_type ||
-+	     op->incompressible)) {
-+		if (!crc_is_compressed(op->crc) &&
-+		    op->csum_type != op->crc.csum_type &&
-+		    bch2_write_rechecksum(c, op, op->csum_type))
-+			return PREP_ENCODED_CHECKSUM_ERR;
-+
-+		return PREP_ENCODED_DO_WRITE;
-+	}
-+
-+	/*
-+	 * If the data is compressed and we couldn't write the entire extent as
-+	 * is, we have to decompress it:
-+	 */
-+	if (crc_is_compressed(op->crc)) {
-+		struct bch_csum csum;
-+
-+		if (bch2_write_decrypt(op))
-+			return PREP_ENCODED_CHECKSUM_ERR;
-+
-+		/* Last point we can still verify checksum: */
-+		csum = bch2_checksum_bio(c, op->crc.csum_type,
-+					 extent_nonce(op->version, op->crc),
-+					 bio);
-+		if (bch2_crc_cmp(op->crc.csum, csum))
-+			return PREP_ENCODED_CHECKSUM_ERR;
-+
-+		if (bch2_bio_uncompress_inplace(c, bio, &op->crc))
-+			return PREP_ENCODED_ERR;
-+	}
-+
-+	/*
-+	 * No longer have compressed data after this point - data might be
-+	 * encrypted:
-+	 */
-+
-+	/*
-+	 * If the data is checksummed and we're only writing a subset,
-+	 * rechecksum and adjust bio to point to currently live data:
-+	 */
-+	if ((op->crc.live_size != op->crc.uncompressed_size ||
-+	     op->crc.csum_type != op->csum_type) &&
-+	    bch2_write_rechecksum(c, op, op->csum_type))
-+		return PREP_ENCODED_CHECKSUM_ERR;
-+
-+	/*
-+	 * If we want to compress the data, it has to be decrypted:
-+	 */
-+	if ((op->compression_type ||
-+	     bch2_csum_type_is_encryption(op->crc.csum_type) !=
-+	     bch2_csum_type_is_encryption(op->csum_type)) &&
-+	    bch2_write_decrypt(op))
-+		return PREP_ENCODED_CHECKSUM_ERR;
-+
-+	return PREP_ENCODED_OK;
-+}
-+
-+static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
-+			     struct bio **_dst)
-+{
-+	struct bch_fs *c = op->c;
-+	struct bio *src = &op->wbio.bio, *dst = src;
-+	struct bvec_iter saved_iter;
-+	void *ec_buf;
-+	struct bpos ec_pos = op->pos;
-+	unsigned total_output = 0, total_input = 0;
-+	bool bounce = false;
-+	bool page_alloc_failed = false;
-+	int ret, more = 0;
-+
-+	BUG_ON(!bio_sectors(src));
-+
-+	ec_buf = bch2_writepoint_ec_buf(c, wp);
-+
-+	switch (bch2_write_prep_encoded_data(op, wp)) {
-+	case PREP_ENCODED_OK:
-+		break;
-+	case PREP_ENCODED_ERR:
-+		ret = -EIO;
-+		goto err;
-+	case PREP_ENCODED_CHECKSUM_ERR:
-+		BUG();
-+		goto csum_err;
-+	case PREP_ENCODED_DO_WRITE:
-+		/* XXX look for bug here */
-+		if (ec_buf) {
-+			dst = bch2_write_bio_alloc(c, wp, src,
-+						   &page_alloc_failed,
-+						   ec_buf);
-+			bio_copy_data(dst, src);
-+			bounce = true;
-+		}
-+		init_append_extent(op, wp, op->version, op->crc);
-+		goto do_write;
-+	}
-+
-+	if (ec_buf ||
-+	    op->compression_type ||
-+	    (op->csum_type &&
-+	     !(op->flags & BCH_WRITE_PAGES_STABLE)) ||
-+	    (bch2_csum_type_is_encryption(op->csum_type) &&
-+	     !(op->flags & BCH_WRITE_PAGES_OWNED))) {
-+		dst = bch2_write_bio_alloc(c, wp, src,
-+					   &page_alloc_failed,
-+					   ec_buf);
-+		bounce = true;
-+	}
-+
-+	saved_iter = dst->bi_iter;
-+
-+	do {
-+		struct bch_extent_crc_unpacked crc =
-+			(struct bch_extent_crc_unpacked) { 0 };
-+		struct bversion version = op->version;
-+		size_t dst_len, src_len;
-+
-+		if (page_alloc_failed &&
-+		    bio_sectors(dst) < wp->sectors_free &&
-+		    bio_sectors(dst) < c->sb.encoded_extent_max)
-+			break;
-+
-+		BUG_ON(op->compression_type &&
-+		       (op->flags & BCH_WRITE_DATA_ENCODED) &&
-+		       bch2_csum_type_is_encryption(op->crc.csum_type));
-+		BUG_ON(op->compression_type && !bounce);
-+
-+		crc.compression_type = op->incompressible
-+			? BCH_COMPRESSION_TYPE_incompressible
-+			: op->compression_type
-+			? bch2_bio_compress(c, dst, &dst_len, src, &src_len,
-+					    op->compression_type)
-+			: 0;
-+		if (!crc_is_compressed(crc)) {
-+			dst_len = min(dst->bi_iter.bi_size, src->bi_iter.bi_size);
-+			dst_len = min_t(unsigned, dst_len, wp->sectors_free << 9);
-+
-+			if (op->csum_type)
-+				dst_len = min_t(unsigned, dst_len,
-+						c->sb.encoded_extent_max << 9);
-+
-+			if (bounce) {
-+				swap(dst->bi_iter.bi_size, dst_len);
-+				bio_copy_data(dst, src);
-+				swap(dst->bi_iter.bi_size, dst_len);
-+			}
-+
-+			src_len = dst_len;
-+		}
-+
-+		BUG_ON(!src_len || !dst_len);
-+
-+		if (bch2_csum_type_is_encryption(op->csum_type)) {
-+			if (bversion_zero(version)) {
-+				version.lo = atomic64_inc_return(&c->key_version);
-+			} else {
-+				crc.nonce = op->nonce;
-+				op->nonce += src_len >> 9;
-+			}
-+		}
-+
-+		if ((op->flags & BCH_WRITE_DATA_ENCODED) &&
-+		    !crc_is_compressed(crc) &&
-+		    bch2_csum_type_is_encryption(op->crc.csum_type) ==
-+		    bch2_csum_type_is_encryption(op->csum_type)) {
-+			/*
-+			 * Note: when we're using rechecksum(), we need to be
-+			 * checksumming @src because it has all the data our
-+			 * existing checksum covers - if we bounced (because we
-+			 * were trying to compress), @dst will only have the
-+			 * part of the data the new checksum will cover.
-+			 *
-+			 * But normally we want to be checksumming post bounce,
-+			 * because part of the reason for bouncing is so the
-+			 * data can't be modified (by userspace) while it's in
-+			 * flight.
-+			 */
-+			if (bch2_rechecksum_bio(c, src, version, op->crc,
-+					&crc, &op->crc,
-+					src_len >> 9,
-+					bio_sectors(src) - (src_len >> 9),
-+					op->csum_type))
-+				goto csum_err;
-+		} else {
-+			if ((op->flags & BCH_WRITE_DATA_ENCODED) &&
-+			    bch2_rechecksum_bio(c, src, version, op->crc,
-+					NULL, &op->crc,
-+					src_len >> 9,
-+					bio_sectors(src) - (src_len >> 9),
-+					op->crc.csum_type))
-+				goto csum_err;
-+
-+			crc.compressed_size	= dst_len >> 9;
-+			crc.uncompressed_size	= src_len >> 9;
-+			crc.live_size		= src_len >> 9;
-+
-+			swap(dst->bi_iter.bi_size, dst_len);
-+			bch2_encrypt_bio(c, op->csum_type,
-+					 extent_nonce(version, crc), dst);
-+			crc.csum = bch2_checksum_bio(c, op->csum_type,
-+					 extent_nonce(version, crc), dst);
-+			crc.csum_type = op->csum_type;
-+			swap(dst->bi_iter.bi_size, dst_len);
-+		}
-+
-+		init_append_extent(op, wp, version, crc);
-+
-+		if (dst != src)
-+			bio_advance(dst, dst_len);
-+		bio_advance(src, src_len);
-+		total_output	+= dst_len;
-+		total_input	+= src_len;
-+	} while (dst->bi_iter.bi_size &&
-+		 src->bi_iter.bi_size &&
-+		 wp->sectors_free &&
-+		 !bch2_keylist_realloc(&op->insert_keys,
-+				      op->inline_keys,
-+				      ARRAY_SIZE(op->inline_keys),
-+				      BKEY_EXTENT_U64s_MAX));
-+
-+	more = src->bi_iter.bi_size != 0;
-+
-+	dst->bi_iter = saved_iter;
-+
-+	if (dst == src && more) {
-+		BUG_ON(total_output != total_input);
-+
-+		dst = bio_split(src, total_input >> 9,
-+				GFP_NOIO, &c->bio_write);
-+		wbio_init(dst)->put_bio	= true;
-+		/* copy WRITE_SYNC flag */
-+		dst->bi_opf		= src->bi_opf;
-+	}
-+
-+	dst->bi_iter.bi_size = total_output;
-+do_write:
-+	/* might have done a realloc... */
-+	bch2_ec_add_backpointer(c, wp, ec_pos, total_input >> 9);
-+
-+	*_dst = dst;
-+	return more;
-+csum_err:
-+	bch_err(c, "error verifying existing checksum while "
-+		"rewriting existing data (memory corruption?)");
-+	ret = -EIO;
-+err:
-+	if (to_wbio(dst)->bounce)
-+		bch2_bio_free_pages_pool(c, dst);
-+	if (to_wbio(dst)->put_bio)
-+		bio_put(dst);
-+
-+	return ret;
-+}
-+
-+static void __bch2_write(struct closure *cl)
-+{
-+	struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
-+	struct bch_fs *c = op->c;
-+	struct write_point *wp;
-+	struct bio *bio;
-+	bool skip_put = true;
-+	unsigned nofs_flags;
-+	int ret;
-+
-+	nofs_flags = memalloc_nofs_save();
-+again:
-+	memset(&op->failed, 0, sizeof(op->failed));
-+
-+	do {
-+		struct bkey_i *key_to_write;
-+		unsigned key_to_write_offset = op->insert_keys.top_p -
-+			op->insert_keys.keys_p;
-+
-+		/* +1 for possible cache device: */
-+		if (op->open_buckets.nr + op->nr_replicas + 1 >
-+		    ARRAY_SIZE(op->open_buckets.v))
-+			goto flush_io;
-+
-+		if (bch2_keylist_realloc(&op->insert_keys,
-+					op->inline_keys,
-+					ARRAY_SIZE(op->inline_keys),
-+					BKEY_EXTENT_U64s_MAX))
-+			goto flush_io;
-+
-+		if ((op->flags & BCH_WRITE_FROM_INTERNAL) &&
-+		    percpu_ref_is_dying(&c->writes)) {
-+			ret = -EROFS;
-+			goto err;
-+		}
-+
-+		/*
-+		 * The copygc thread is now global, which means it's no longer
-+		 * freeing up space on specific disks, which means that
-+		 * allocations for specific disks may hang arbitrarily long:
-+		 */
-+		wp = bch2_alloc_sectors_start(c,
-+			op->target,
-+			op->opts.erasure_code,
-+			op->write_point,
-+			&op->devs_have,
-+			op->nr_replicas,
-+			op->nr_replicas_required,
-+			op->alloc_reserve,
-+			op->flags,
-+			(op->flags & (BCH_WRITE_ALLOC_NOWAIT|
-+				      BCH_WRITE_ONLY_SPECIFIED_DEVS)) ? NULL : cl);
-+		EBUG_ON(!wp);
-+
-+		if (unlikely(IS_ERR(wp))) {
-+			if (unlikely(PTR_ERR(wp) != -EAGAIN)) {
-+				ret = PTR_ERR(wp);
-+				goto err;
-+			}
-+
-+			goto flush_io;
-+		}
-+
-+		/*
-+		 * It's possible for the allocator to fail, put us on the
-+		 * freelist waitlist, and then succeed in one of various retry
-+		 * paths: if that happens, we need to disable the skip_put
-+		 * optimization because otherwise there won't necessarily be a
-+		 * barrier before we free the bch_write_op:
-+		 */
-+		if (atomic_read(&cl->remaining) & CLOSURE_WAITING)
-+			skip_put = false;
-+
-+		bch2_open_bucket_get(c, wp, &op->open_buckets);
-+		ret = bch2_write_extent(op, wp, &bio);
-+		bch2_alloc_sectors_done(c, wp);
-+
-+		if (ret < 0)
-+			goto err;
-+
-+		if (ret) {
-+			skip_put = false;
-+		} else {
-+			/*
-+			 * for the skip_put optimization this has to be set
-+			 * before we submit the bio:
-+			 */
-+			op->flags |= BCH_WRITE_DONE;
-+		}
-+
-+		bio->bi_end_io	= bch2_write_endio;
-+		bio->bi_private	= &op->cl;
-+		bio->bi_opf |= REQ_OP_WRITE;
-+
-+		if (!skip_put)
-+			closure_get(bio->bi_private);
-+		else
-+			op->flags |= BCH_WRITE_SKIP_CLOSURE_PUT;
-+
-+		key_to_write = (void *) (op->insert_keys.keys_p +
-+					 key_to_write_offset);
-+
-+		bch2_submit_wbio_replicas(to_wbio(bio), c, BCH_DATA_user,
-+					  key_to_write);
-+	} while (ret);
-+
-+	if (!skip_put)
-+		continue_at(cl, bch2_write_index, index_update_wq(op));
-+out:
-+	memalloc_nofs_restore(nofs_flags);
-+	return;
-+err:
-+	op->error = ret;
-+	op->flags |= BCH_WRITE_DONE;
-+
-+	continue_at(cl, bch2_write_index, index_update_wq(op));
-+	goto out;
-+flush_io:
-+	/*
-+	 * If the write can't all be submitted at once, we generally want to
-+	 * block synchronously as that signals backpressure to the caller.
-+	 *
-+	 * However, if we're running out of a workqueue, we can't block here
-+	 * because we'll be blocking other work items from completing:
-+	 */
-+	if (current->flags & PF_WQ_WORKER) {
-+		continue_at(cl, bch2_write_index, index_update_wq(op));
-+		goto out;
-+	}
-+
-+	closure_sync(cl);
-+
-+	if (!bch2_keylist_empty(&op->insert_keys)) {
-+		__bch2_write_index(op);
-+
-+		if (op->error) {
-+			op->flags |= BCH_WRITE_DONE;
-+			continue_at_nobarrier(cl, bch2_write_done, NULL);
-+			goto out;
-+		}
-+	}
-+
-+	goto again;
-+}
-+
-+static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len)
-+{
-+	struct closure *cl = &op->cl;
-+	struct bio *bio = &op->wbio.bio;
-+	struct bvec_iter iter;
-+	struct bkey_i_inline_data *id;
-+	unsigned sectors;
-+	int ret;
-+
-+	bch2_check_set_feature(op->c, BCH_FEATURE_inline_data);
-+
-+	ret = bch2_keylist_realloc(&op->insert_keys, op->inline_keys,
-+				   ARRAY_SIZE(op->inline_keys),
-+				   BKEY_U64s + DIV_ROUND_UP(data_len, 8));
-+	if (ret) {
-+		op->error = ret;
-+		goto err;
-+	}
-+
-+	sectors = bio_sectors(bio);
-+	op->pos.offset += sectors;
-+
-+	id = bkey_inline_data_init(op->insert_keys.top);
-+	id->k.p		= op->pos;
-+	id->k.version	= op->version;
-+	id->k.size	= sectors;
-+
-+	iter = bio->bi_iter;
-+	iter.bi_size = data_len;
-+	memcpy_from_bio(id->v.data, bio, iter);
-+
-+	while (data_len & 7)
-+		id->v.data[data_len++] = '\0';
-+	set_bkey_val_bytes(&id->k, data_len);
-+	bch2_keylist_push(&op->insert_keys);
-+
-+	op->flags |= BCH_WRITE_WROTE_DATA_INLINE;
-+	op->flags |= BCH_WRITE_DONE;
-+
-+	continue_at_nobarrier(cl, bch2_write_index, NULL);
-+	return;
-+err:
-+	bch2_write_done(&op->cl);
-+}
-+
-+/**
-+ * bch_write - handle a write to a cache device or flash only volume
-+ *
-+ * This is the starting point for any data to end up in a cache device; it could
-+ * be from a normal write, or a writeback write, or a write to a flash only
-+ * volume - it's also used by the moving garbage collector to compact data in
-+ * mostly empty buckets.
-+ *
-+ * It first writes the data to the cache, creating a list of keys to be inserted
-+ * (if the data won't fit in a single open bucket, there will be multiple keys);
-+ * after the data is written it calls bch_journal, and after the keys have been
-+ * added to the next journal write they're inserted into the btree.
-+ *
-+ * If op->discard is true, instead of inserting the data it invalidates the
-+ * region of the cache represented by op->bio and op->inode.
-+ */
-+void bch2_write(struct closure *cl)
-+{
-+	struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
-+	struct bio *bio = &op->wbio.bio;
-+	struct bch_fs *c = op->c;
-+	unsigned data_len;
-+
-+	BUG_ON(!op->nr_replicas);
-+	BUG_ON(!op->write_point.v);
-+	BUG_ON(!bkey_cmp(op->pos, POS_MAX));
-+
-+	op->start_time = local_clock();
-+	bch2_keylist_init(&op->insert_keys, op->inline_keys);
-+	wbio_init(bio)->put_bio = false;
-+
-+	if (bio_sectors(bio) & (c->opts.block_size - 1)) {
-+		__bcache_io_error(c, "misaligned write");
-+		op->error = -EIO;
-+		goto err;
-+	}
-+
-+	if (c->opts.nochanges ||
-+	    !percpu_ref_tryget(&c->writes)) {
-+		if (!(op->flags & BCH_WRITE_FROM_INTERNAL))
-+			__bcache_io_error(c, "read only");
-+		op->error = -EROFS;
-+		goto err;
-+	}
-+
-+	/*
-+	 * Can't ratelimit copygc - we'd deadlock:
-+	 */
-+	if (!(op->flags & BCH_WRITE_FROM_INTERNAL))
-+		down(&c->io_in_flight);
-+
-+	bch2_increment_clock(c, bio_sectors(bio), WRITE);
-+
-+	data_len = min_t(u64, bio->bi_iter.bi_size,
-+			 op->new_i_size - (op->pos.offset << 9));
-+
-+	if (c->opts.inline_data &&
-+	    data_len <= min(block_bytes(c) / 2, 1024U)) {
-+		bch2_write_data_inline(op, data_len);
-+		return;
-+	}
-+
-+	continue_at_nobarrier(cl, __bch2_write, NULL);
-+	return;
-+err:
-+	bch2_disk_reservation_put(c, &op->res);
-+
-+	if (op->end_io) {
-+		EBUG_ON(cl->parent);
-+		closure_debug_destroy(cl);
-+		op->end_io(op);
-+	} else {
-+		closure_return(cl);
-+	}
-+}
-+
-+/* Cache promotion on read */
-+
-+struct promote_op {
-+	struct closure		cl;
-+	struct rcu_head		rcu;
-+	u64			start_time;
-+
-+	struct rhash_head	hash;
-+	struct bpos		pos;
-+
-+	struct migrate_write	write;
-+	struct bio_vec		bi_inline_vecs[0]; /* must be last */
-+};
-+
-+static const struct rhashtable_params bch_promote_params = {
-+	.head_offset	= offsetof(struct promote_op, hash),
-+	.key_offset	= offsetof(struct promote_op, pos),
-+	.key_len	= sizeof(struct bpos),
-+};
-+
-+static inline bool should_promote(struct bch_fs *c, struct bkey_s_c k,
-+				  struct bpos pos,
-+				  struct bch_io_opts opts,
-+				  unsigned flags)
-+{
-+	if (!(flags & BCH_READ_MAY_PROMOTE))
-+		return false;
-+
-+	if (!opts.promote_target)
-+		return false;
-+
-+	if (bch2_bkey_has_target(c, k, opts.promote_target))
-+		return false;
-+
-+	if (bch2_target_congested(c, opts.promote_target)) {
-+		/* XXX trace this */
-+		return false;
-+	}
-+
-+	if (rhashtable_lookup_fast(&c->promote_table, &pos,
-+				   bch_promote_params))
-+		return false;
-+
-+	return true;
-+}
-+
-+static void promote_free(struct bch_fs *c, struct promote_op *op)
-+{
-+	int ret;
-+
-+	ret = rhashtable_remove_fast(&c->promote_table, &op->hash,
-+				     bch_promote_params);
-+	BUG_ON(ret);
-+	percpu_ref_put(&c->writes);
-+	kfree_rcu(op, rcu);
-+}
-+
-+static void promote_done(struct closure *cl)
-+{
-+	struct promote_op *op =
-+		container_of(cl, struct promote_op, cl);
-+	struct bch_fs *c = op->write.op.c;
-+
-+	bch2_time_stats_update(&c->times[BCH_TIME_data_promote],
-+			       op->start_time);
-+
-+	bch2_bio_free_pages_pool(c, &op->write.op.wbio.bio);
-+	promote_free(c, op);
-+}
-+
-+static void promote_start(struct promote_op *op, struct bch_read_bio *rbio)
-+{
-+	struct bch_fs *c = rbio->c;
-+	struct closure *cl = &op->cl;
-+	struct bio *bio = &op->write.op.wbio.bio;
-+
-+	trace_promote(&rbio->bio);
-+
-+	/* we now own pages: */
-+	BUG_ON(!rbio->bounce);
-+	BUG_ON(rbio->bio.bi_vcnt > bio->bi_max_vecs);
-+
-+	memcpy(bio->bi_io_vec, rbio->bio.bi_io_vec,
-+	       sizeof(struct bio_vec) * rbio->bio.bi_vcnt);
-+	swap(bio->bi_vcnt, rbio->bio.bi_vcnt);
-+
-+	bch2_migrate_read_done(&op->write, rbio);
-+
-+	closure_init(cl, NULL);
-+	closure_call(&op->write.op.cl, bch2_write, c->wq, cl);
-+	closure_return_with_destructor(cl, promote_done);
-+}
-+
-+static struct promote_op *__promote_alloc(struct bch_fs *c,
-+					  enum btree_id btree_id,
-+					  struct bkey_s_c k,
-+					  struct bpos pos,
-+					  struct extent_ptr_decoded *pick,
-+					  struct bch_io_opts opts,
-+					  unsigned sectors,
-+					  struct bch_read_bio **rbio)
-+{
-+	struct promote_op *op = NULL;
-+	struct bio *bio;
-+	unsigned pages = DIV_ROUND_UP(sectors, PAGE_SECTORS);
-+	int ret;
-+
-+	if (!percpu_ref_tryget(&c->writes))
-+		return NULL;
-+
-+	op = kzalloc(sizeof(*op) + sizeof(struct bio_vec) * pages, GFP_NOIO);
-+	if (!op)
-+		goto err;
-+
-+	op->start_time = local_clock();
-+	op->pos = pos;
-+
-+	/*
-+	 * We don't use the mempool here because extents that aren't
-+	 * checksummed or compressed can be too big for the mempool:
-+	 */
-+	*rbio = kzalloc(sizeof(struct bch_read_bio) +
-+			sizeof(struct bio_vec) * pages,
-+			GFP_NOIO);
-+	if (!*rbio)
-+		goto err;
-+
-+	rbio_init(&(*rbio)->bio, opts);
-+	bio_init(&(*rbio)->bio, (*rbio)->bio.bi_inline_vecs, pages);
-+
-+	if (bch2_bio_alloc_pages(&(*rbio)->bio, sectors << 9,
-+				 GFP_NOIO))
-+		goto err;
-+
-+	(*rbio)->bounce		= true;
-+	(*rbio)->split		= true;
-+	(*rbio)->kmalloc	= true;
-+
-+	if (rhashtable_lookup_insert_fast(&c->promote_table, &op->hash,
-+					  bch_promote_params))
-+		goto err;
-+
-+	bio = &op->write.op.wbio.bio;
-+	bio_init(bio, bio->bi_inline_vecs, pages);
-+
-+	ret = bch2_migrate_write_init(c, &op->write,
-+			writepoint_hashed((unsigned long) current),
-+			opts,
-+			DATA_PROMOTE,
-+			(struct data_opts) {
-+				.target = opts.promote_target
-+			},
-+			btree_id, k);
-+	BUG_ON(ret);
-+
-+	return op;
-+err:
-+	if (*rbio)
-+		bio_free_pages(&(*rbio)->bio);
-+	kfree(*rbio);
-+	*rbio = NULL;
-+	kfree(op);
-+	percpu_ref_put(&c->writes);
-+	return NULL;
-+}
-+
-+noinline
-+static struct promote_op *promote_alloc(struct bch_fs *c,
-+					       struct bvec_iter iter,
-+					       struct bkey_s_c k,
-+					       struct extent_ptr_decoded *pick,
-+					       struct bch_io_opts opts,
-+					       unsigned flags,
-+					       struct bch_read_bio **rbio,
-+					       bool *bounce,
-+					       bool *read_full)
-+{
-+	bool promote_full = *read_full || READ_ONCE(c->promote_whole_extents);
-+	/* data might have to be decompressed in the write path: */
-+	unsigned sectors = promote_full
-+		? max(pick->crc.compressed_size, pick->crc.live_size)
-+		: bvec_iter_sectors(iter);
-+	struct bpos pos = promote_full
-+		? bkey_start_pos(k.k)
-+		: POS(k.k->p.inode, iter.bi_sector);
-+	struct promote_op *promote;
-+
-+	if (!should_promote(c, k, pos, opts, flags))
-+		return NULL;
-+
-+	promote = __promote_alloc(c,
-+				  k.k->type == KEY_TYPE_reflink_v
-+				  ? BTREE_ID_REFLINK
-+				  : BTREE_ID_EXTENTS,
-+				  k, pos, pick, opts, sectors, rbio);
-+	if (!promote)
-+		return NULL;
-+
-+	*bounce		= true;
-+	*read_full	= promote_full;
-+	return promote;
-+}
-+
-+/* Read */
-+
-+#define READ_RETRY_AVOID	1
-+#define READ_RETRY		2
-+#define READ_ERR		3
-+
-+enum rbio_context {
-+	RBIO_CONTEXT_NULL,
-+	RBIO_CONTEXT_HIGHPRI,
-+	RBIO_CONTEXT_UNBOUND,
-+};
-+
-+static inline struct bch_read_bio *
-+bch2_rbio_parent(struct bch_read_bio *rbio)
-+{
-+	return rbio->split ? rbio->parent : rbio;
-+}
-+
-+__always_inline
-+static void bch2_rbio_punt(struct bch_read_bio *rbio, work_func_t fn,
-+			   enum rbio_context context,
-+			   struct workqueue_struct *wq)
-+{
-+	if (context <= rbio->context) {
-+		fn(&rbio->work);
-+	} else {
-+		rbio->work.func		= fn;
-+		rbio->context		= context;
-+		queue_work(wq, &rbio->work);
-+	}
-+}
-+
-+static inline struct bch_read_bio *bch2_rbio_free(struct bch_read_bio *rbio)
-+{
-+	BUG_ON(rbio->bounce && !rbio->split);
-+
-+	if (rbio->promote)
-+		promote_free(rbio->c, rbio->promote);
-+	rbio->promote = NULL;
-+
-+	if (rbio->bounce)
-+		bch2_bio_free_pages_pool(rbio->c, &rbio->bio);
-+
-+	if (rbio->split) {
-+		struct bch_read_bio *parent = rbio->parent;
-+
-+		if (rbio->kmalloc)
-+			kfree(rbio);
-+		else
-+			bio_put(&rbio->bio);
-+
-+		rbio = parent;
-+	}
-+
-+	return rbio;
-+}
-+
-+/*
-+ * Only called on a top level bch_read_bio to complete an entire read request,
-+ * not a split:
-+ */
-+static void bch2_rbio_done(struct bch_read_bio *rbio)
-+{
-+	if (rbio->start_time)
-+		bch2_time_stats_update(&rbio->c->times[BCH_TIME_data_read],
-+				       rbio->start_time);
-+	bio_endio(&rbio->bio);
-+}
-+
-+static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio,
-+				     struct bvec_iter bvec_iter, u64 inode,
-+				     struct bch_io_failures *failed,
-+				     unsigned flags)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_on_stack sk;
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	flags &= ~BCH_READ_LAST_FRAGMENT;
-+	flags |= BCH_READ_MUST_CLONE;
-+
-+	bkey_on_stack_init(&sk);
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
-+				   rbio->pos, BTREE_ITER_SLOTS);
-+retry:
-+	rbio->bio.bi_status = 0;
-+
-+	k = bch2_btree_iter_peek_slot(iter);
-+	if (bkey_err(k))
-+		goto err;
-+
-+	bkey_on_stack_reassemble(&sk, c, k);
-+	k = bkey_i_to_s_c(sk.k);
-+	bch2_trans_unlock(&trans);
-+
-+	if (!bch2_bkey_matches_ptr(c, k,
-+				   rbio->pick.ptr,
-+				   rbio->pos.offset -
-+				   rbio->pick.crc.offset)) {
-+		/* extent we wanted to read no longer exists: */
-+		rbio->hole = true;
-+		goto out;
-+	}
-+
-+	ret = __bch2_read_extent(&trans, rbio, bvec_iter, k, 0, failed, flags);
-+	if (ret == READ_RETRY)
-+		goto retry;
-+	if (ret)
-+		goto err;
-+out:
-+	bch2_rbio_done(rbio);
-+	bch2_trans_exit(&trans);
-+	bkey_on_stack_exit(&sk, c);
-+	return;
-+err:
-+	rbio->bio.bi_status = BLK_STS_IOERR;
-+	goto out;
-+}
-+
-+static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio,
-+			    struct bvec_iter bvec_iter, u64 inode,
-+			    struct bch_io_failures *failed, unsigned flags)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_on_stack sk;
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	flags &= ~BCH_READ_LAST_FRAGMENT;
-+	flags |= BCH_READ_MUST_CLONE;
-+
-+	bkey_on_stack_init(&sk);
-+	bch2_trans_init(&trans, c, 0, 0);
-+retry:
-+	bch2_trans_begin(&trans);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
-+			   POS(inode, bvec_iter.bi_sector),
-+			   BTREE_ITER_SLOTS, k, ret) {
-+		unsigned bytes, sectors, offset_into_extent;
-+
-+		bkey_on_stack_reassemble(&sk, c, k);
-+		k = bkey_i_to_s_c(sk.k);
-+
-+		offset_into_extent = iter->pos.offset -
-+			bkey_start_offset(k.k);
-+		sectors = k.k->size - offset_into_extent;
-+
-+		ret = bch2_read_indirect_extent(&trans,
-+					&offset_into_extent, &sk);
-+		if (ret)
-+			break;
-+
-+		sectors = min(sectors, k.k->size - offset_into_extent);
-+
-+		bch2_trans_unlock(&trans);
-+
-+		bytes = min(sectors, bvec_iter_sectors(bvec_iter)) << 9;
-+		swap(bvec_iter.bi_size, bytes);
-+
-+		ret = __bch2_read_extent(&trans, rbio, bvec_iter, k,
-+				offset_into_extent, failed, flags);
-+		switch (ret) {
-+		case READ_RETRY:
-+			goto retry;
-+		case READ_ERR:
-+			goto err;
-+		};
-+
-+		if (bytes == bvec_iter.bi_size)
-+			goto out;
-+
-+		swap(bvec_iter.bi_size, bytes);
-+		bio_advance_iter(&rbio->bio, &bvec_iter, bytes);
-+	}
-+
-+	if (ret == -EINTR)
-+		goto retry;
-+	/*
-+	 * If we get here, it better have been because there was an error
-+	 * reading a btree node
-+	 */
-+	BUG_ON(!ret);
-+	__bcache_io_error(c, "btree IO error: %i", ret);
-+err:
-+	rbio->bio.bi_status = BLK_STS_IOERR;
-+out:
-+	bch2_trans_exit(&trans);
-+	bkey_on_stack_exit(&sk, c);
-+	bch2_rbio_done(rbio);
-+}
-+
-+static void bch2_rbio_retry(struct work_struct *work)
-+{
-+	struct bch_read_bio *rbio =
-+		container_of(work, struct bch_read_bio, work);
-+	struct bch_fs *c	= rbio->c;
-+	struct bvec_iter iter	= rbio->bvec_iter;
-+	unsigned flags		= rbio->flags;
-+	u64 inode		= rbio->pos.inode;
-+	struct bch_io_failures failed = { .nr = 0 };
-+
-+	trace_read_retry(&rbio->bio);
-+
-+	if (rbio->retry == READ_RETRY_AVOID)
-+		bch2_mark_io_failure(&failed, &rbio->pick);
-+
-+	rbio->bio.bi_status = 0;
-+
-+	rbio = bch2_rbio_free(rbio);
-+
-+	flags |= BCH_READ_IN_RETRY;
-+	flags &= ~BCH_READ_MAY_PROMOTE;
-+
-+	if (flags & BCH_READ_NODECODE)
-+		bch2_read_retry_nodecode(c, rbio, iter, inode, &failed, flags);
-+	else
-+		bch2_read_retry(c, rbio, iter, inode, &failed, flags);
-+}
-+
-+static void bch2_rbio_error(struct bch_read_bio *rbio, int retry,
-+			    blk_status_t error)
-+{
-+	rbio->retry = retry;
-+
-+	if (rbio->flags & BCH_READ_IN_RETRY)
-+		return;
-+
-+	if (retry == READ_ERR) {
-+		rbio = bch2_rbio_free(rbio);
-+
-+		rbio->bio.bi_status = error;
-+		bch2_rbio_done(rbio);
-+	} else {
-+		bch2_rbio_punt(rbio, bch2_rbio_retry,
-+			       RBIO_CONTEXT_UNBOUND, system_unbound_wq);
-+	}
-+}
-+
-+static int __bch2_rbio_narrow_crcs(struct btree_trans *trans,
-+				   struct bch_read_bio *rbio)
-+{
-+	struct bch_fs *c = rbio->c;
-+	u64 data_offset = rbio->pos.offset - rbio->pick.crc.offset;
-+	struct bch_extent_crc_unpacked new_crc;
-+	struct btree_iter *iter = NULL;
-+	struct bkey_i *new;
-+	struct bkey_s_c k;
-+	int ret = 0;
-+
-+	if (crc_is_compressed(rbio->pick.crc))
-+		return 0;
-+
-+	iter = bch2_trans_get_iter(trans, BTREE_ID_EXTENTS, rbio->pos,
-+				   BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
-+	if ((ret = PTR_ERR_OR_ZERO(iter)))
-+		goto out;
-+
-+	k = bch2_btree_iter_peek_slot(iter);
-+	if ((ret = bkey_err(k)))
-+		goto out;
-+
-+	/*
-+	 * going to be temporarily appending another checksum entry:
-+	 */
-+	new = bch2_trans_kmalloc(trans, bkey_bytes(k.k) +
-+				 BKEY_EXTENT_U64s_MAX * 8);
-+	if ((ret = PTR_ERR_OR_ZERO(new)))
-+		goto out;
-+
-+	bkey_reassemble(new, k);
-+	k = bkey_i_to_s_c(new);
-+
-+	if (bversion_cmp(k.k->version, rbio->version) ||
-+	    !bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, data_offset))
-+		goto out;
-+
-+	/* Extent was merged? */
-+	if (bkey_start_offset(k.k) < data_offset ||
-+	    k.k->p.offset > data_offset + rbio->pick.crc.uncompressed_size)
-+		goto out;
-+
-+	if (bch2_rechecksum_bio(c, &rbio->bio, rbio->version,
-+			rbio->pick.crc, NULL, &new_crc,
-+			bkey_start_offset(k.k) - data_offset, k.k->size,
-+			rbio->pick.crc.csum_type)) {
-+		bch_err(c, "error verifying existing checksum while narrowing checksum (memory corruption?)");
-+		ret = 0;
-+		goto out;
-+	}
-+
-+	if (!bch2_bkey_narrow_crcs(new, new_crc))
-+		goto out;
-+
-+	bch2_trans_update(trans, iter, new, 0);
-+out:
-+	bch2_trans_iter_put(trans, iter);
-+	return ret;
-+}
-+
-+static noinline void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio)
-+{
-+	bch2_trans_do(rbio->c, NULL, NULL, BTREE_INSERT_NOFAIL,
-+		      __bch2_rbio_narrow_crcs(&trans, rbio));
-+}
-+
-+/* Inner part that may run in process context */
-+static void __bch2_read_endio(struct work_struct *work)
-+{
-+	struct bch_read_bio *rbio =
-+		container_of(work, struct bch_read_bio, work);
-+	struct bch_fs *c	= rbio->c;
-+	struct bch_dev *ca	= bch_dev_bkey_exists(c, rbio->pick.ptr.dev);
-+	struct bio *src		= &rbio->bio;
-+	struct bio *dst		= &bch2_rbio_parent(rbio)->bio;
-+	struct bvec_iter dst_iter = rbio->bvec_iter;
-+	struct bch_extent_crc_unpacked crc = rbio->pick.crc;
-+	struct nonce nonce = extent_nonce(rbio->version, crc);
-+	struct bch_csum csum;
-+
-+	/* Reset iterator for checksumming and copying bounced data: */
-+	if (rbio->bounce) {
-+		src->bi_iter.bi_size		= crc.compressed_size << 9;
-+		src->bi_iter.bi_idx		= 0;
-+		src->bi_iter.bi_bvec_done	= 0;
-+	} else {
-+		src->bi_iter			= rbio->bvec_iter;
-+	}
-+
-+	csum = bch2_checksum_bio(c, crc.csum_type, nonce, src);
-+	if (bch2_crc_cmp(csum, rbio->pick.crc.csum))
-+		goto csum_err;
-+
-+	if (unlikely(rbio->narrow_crcs))
-+		bch2_rbio_narrow_crcs(rbio);
-+
-+	if (rbio->flags & BCH_READ_NODECODE)
-+		goto nodecode;
-+
-+	/* Adjust crc to point to subset of data we want: */
-+	crc.offset     += rbio->offset_into_extent;
-+	crc.live_size	= bvec_iter_sectors(rbio->bvec_iter);
-+
-+	if (crc_is_compressed(crc)) {
-+		bch2_encrypt_bio(c, crc.csum_type, nonce, src);
-+		if (bch2_bio_uncompress(c, src, dst, dst_iter, crc))
-+			goto decompression_err;
-+	} else {
-+		/* don't need to decrypt the entire bio: */
-+		nonce = nonce_add(nonce, crc.offset << 9);
-+		bio_advance(src, crc.offset << 9);
-+
-+		BUG_ON(src->bi_iter.bi_size < dst_iter.bi_size);
-+		src->bi_iter.bi_size = dst_iter.bi_size;
-+
-+		bch2_encrypt_bio(c, crc.csum_type, nonce, src);
-+
-+		if (rbio->bounce) {
-+			struct bvec_iter src_iter = src->bi_iter;
-+			bio_copy_data_iter(dst, &dst_iter, src, &src_iter);
-+		}
-+	}
-+
-+	if (rbio->promote) {
-+		/*
-+		 * Re encrypt data we decrypted, so it's consistent with
-+		 * rbio->crc:
-+		 */
-+		bch2_encrypt_bio(c, crc.csum_type, nonce, src);
-+		promote_start(rbio->promote, rbio);
-+		rbio->promote = NULL;
-+	}
-+nodecode:
-+	if (likely(!(rbio->flags & BCH_READ_IN_RETRY))) {
-+		rbio = bch2_rbio_free(rbio);
-+		bch2_rbio_done(rbio);
-+	}
-+	return;
-+csum_err:
-+	/*
-+	 * Checksum error: if the bio wasn't bounced, we may have been
-+	 * reading into buffers owned by userspace (that userspace can
-+	 * scribble over) - retry the read, bouncing it this time:
-+	 */
-+	if (!rbio->bounce && (rbio->flags & BCH_READ_USER_MAPPED)) {
-+		rbio->flags |= BCH_READ_MUST_BOUNCE;
-+		bch2_rbio_error(rbio, READ_RETRY, BLK_STS_IOERR);
-+		return;
-+	}
-+
-+	bch2_dev_io_error(ca,
-+		"data checksum error, inode %llu offset %llu: expected %0llx:%0llx got %0llx:%0llx (type %u)",
-+		rbio->pos.inode, (u64) rbio->bvec_iter.bi_sector,
-+		rbio->pick.crc.csum.hi, rbio->pick.crc.csum.lo,
-+		csum.hi, csum.lo, crc.csum_type);
-+	bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
-+	return;
-+decompression_err:
-+	__bcache_io_error(c, "decompression error, inode %llu offset %llu",
-+			  rbio->pos.inode,
-+			  (u64) rbio->bvec_iter.bi_sector);
-+	bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR);
-+	return;
-+}
-+
-+static void bch2_read_endio(struct bio *bio)
-+{
-+	struct bch_read_bio *rbio =
-+		container_of(bio, struct bch_read_bio, bio);
-+	struct bch_fs *c	= rbio->c;
-+	struct bch_dev *ca	= bch_dev_bkey_exists(c, rbio->pick.ptr.dev);
-+	struct workqueue_struct *wq = NULL;
-+	enum rbio_context context = RBIO_CONTEXT_NULL;
-+
-+	if (rbio->have_ioref) {
-+		bch2_latency_acct(ca, rbio->submit_time, READ);
-+		percpu_ref_put(&ca->io_ref);
-+	}
-+
-+	if (!rbio->split)
-+		rbio->bio.bi_end_io = rbio->end_io;
-+
-+	if (bch2_dev_io_err_on(bio->bi_status, ca, "data read; %s",
-+			       bch2_blk_status_to_str(bio->bi_status))) {
-+		bch2_rbio_error(rbio, READ_RETRY_AVOID, bio->bi_status);
-+		return;
-+	}
-+
-+	if (rbio->pick.ptr.cached &&
-+	    (((rbio->flags & BCH_READ_RETRY_IF_STALE) && race_fault()) ||
-+	     ptr_stale(ca, &rbio->pick.ptr))) {
-+		atomic_long_inc(&c->read_realloc_races);
-+
-+		if (rbio->flags & BCH_READ_RETRY_IF_STALE)
-+			bch2_rbio_error(rbio, READ_RETRY, BLK_STS_AGAIN);
-+		else
-+			bch2_rbio_error(rbio, READ_ERR, BLK_STS_AGAIN);
-+		return;
-+	}
-+
-+	if (rbio->narrow_crcs ||
-+	    crc_is_compressed(rbio->pick.crc) ||
-+	    bch2_csum_type_is_encryption(rbio->pick.crc.csum_type))
-+		context = RBIO_CONTEXT_UNBOUND,	wq = system_unbound_wq;
-+	else if (rbio->pick.crc.csum_type)
-+		context = RBIO_CONTEXT_HIGHPRI,	wq = system_highpri_wq;
-+
-+	bch2_rbio_punt(rbio, __bch2_read_endio, context, wq);
-+}
-+
-+int __bch2_read_indirect_extent(struct btree_trans *trans,
-+				unsigned *offset_into_extent,
-+				struct bkey_on_stack *orig_k)
-+{
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	u64 reflink_offset;
-+	int ret;
-+
-+	reflink_offset = le64_to_cpu(bkey_i_to_reflink_p(orig_k->k)->v.idx) +
-+		*offset_into_extent;
-+
-+	iter = bch2_trans_get_iter(trans, BTREE_ID_REFLINK,
-+				   POS(0, reflink_offset),
-+				   BTREE_ITER_SLOTS);
-+	ret = PTR_ERR_OR_ZERO(iter);
-+	if (ret)
-+		return ret;
-+
-+	k = bch2_btree_iter_peek_slot(iter);
-+	ret = bkey_err(k);
-+	if (ret)
-+		goto err;
-+
-+	if (k.k->type != KEY_TYPE_reflink_v) {
-+		__bcache_io_error(trans->c,
-+				"pointer to nonexistent indirect extent");
-+		ret = -EIO;
-+		goto err;
-+	}
-+
-+	*offset_into_extent = iter->pos.offset - bkey_start_offset(k.k);
-+	bkey_on_stack_reassemble(orig_k, trans->c, k);
-+err:
-+	bch2_trans_iter_put(trans, iter);
-+	return ret;
-+}
-+
-+int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
-+		       struct bvec_iter iter, struct bkey_s_c k,
-+		       unsigned offset_into_extent,
-+		       struct bch_io_failures *failed, unsigned flags)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct extent_ptr_decoded pick;
-+	struct bch_read_bio *rbio = NULL;
-+	struct bch_dev *ca;
-+	struct promote_op *promote = NULL;
-+	bool bounce = false, read_full = false, narrow_crcs = false;
-+	struct bpos pos = bkey_start_pos(k.k);
-+	int pick_ret;
-+
-+	if (k.k->type == KEY_TYPE_inline_data) {
-+		struct bkey_s_c_inline_data d = bkey_s_c_to_inline_data(k);
-+		unsigned bytes = min_t(unsigned, iter.bi_size,
-+				       bkey_val_bytes(d.k));
-+
-+		swap(iter.bi_size, bytes);
-+		memcpy_to_bio(&orig->bio, iter, d.v->data);
-+		swap(iter.bi_size, bytes);
-+		bio_advance_iter(&orig->bio, &iter, bytes);
-+		zero_fill_bio_iter(&orig->bio, iter);
-+		goto out_read_done;
-+	}
-+
-+	pick_ret = bch2_bkey_pick_read_device(c, k, failed, &pick);
-+
-+	/* hole or reservation - just zero fill: */
-+	if (!pick_ret)
-+		goto hole;
-+
-+	if (pick_ret < 0) {
-+		__bcache_io_error(c, "no device to read from");
-+		goto err;
-+	}
-+
-+	if (pick_ret > 0)
-+		ca = bch_dev_bkey_exists(c, pick.ptr.dev);
-+
-+	if (flags & BCH_READ_NODECODE) {
-+		/*
-+		 * can happen if we retry, and the extent we were going to read
-+		 * has been merged in the meantime:
-+		 */
-+		if (pick.crc.compressed_size > orig->bio.bi_vcnt * PAGE_SECTORS)
-+			goto hole;
-+
-+		iter.bi_size	= pick.crc.compressed_size << 9;
-+		goto get_bio;
-+	}
-+
-+	if (!(flags & BCH_READ_LAST_FRAGMENT) ||
-+	    bio_flagged(&orig->bio, BIO_CHAIN))
-+		flags |= BCH_READ_MUST_CLONE;
-+
-+	narrow_crcs = !(flags & BCH_READ_IN_RETRY) &&
-+		bch2_can_narrow_extent_crcs(k, pick.crc);
-+
-+	if (narrow_crcs && (flags & BCH_READ_USER_MAPPED))
-+		flags |= BCH_READ_MUST_BOUNCE;
-+
-+	EBUG_ON(offset_into_extent + bvec_iter_sectors(iter) > k.k->size);
-+
-+	if (crc_is_compressed(pick.crc) ||
-+	    (pick.crc.csum_type != BCH_CSUM_NONE &&
-+	     (bvec_iter_sectors(iter) != pick.crc.uncompressed_size ||
-+	      (bch2_csum_type_is_encryption(pick.crc.csum_type) &&
-+	       (flags & BCH_READ_USER_MAPPED)) ||
-+	      (flags & BCH_READ_MUST_BOUNCE)))) {
-+		read_full = true;
-+		bounce = true;
-+	}
-+
-+	if (orig->opts.promote_target)
-+		promote = promote_alloc(c, iter, k, &pick, orig->opts, flags,
-+					&rbio, &bounce, &read_full);
-+
-+	if (!read_full) {
-+		EBUG_ON(crc_is_compressed(pick.crc));
-+		EBUG_ON(pick.crc.csum_type &&
-+			(bvec_iter_sectors(iter) != pick.crc.uncompressed_size ||
-+			 bvec_iter_sectors(iter) != pick.crc.live_size ||
-+			 pick.crc.offset ||
-+			 offset_into_extent));
-+
-+		pos.offset += offset_into_extent;
-+		pick.ptr.offset += pick.crc.offset +
-+			offset_into_extent;
-+		offset_into_extent		= 0;
-+		pick.crc.compressed_size	= bvec_iter_sectors(iter);
-+		pick.crc.uncompressed_size	= bvec_iter_sectors(iter);
-+		pick.crc.offset			= 0;
-+		pick.crc.live_size		= bvec_iter_sectors(iter);
-+		offset_into_extent		= 0;
-+	}
-+get_bio:
-+	if (rbio) {
-+		/*
-+		 * promote already allocated bounce rbio:
-+		 * promote needs to allocate a bio big enough for uncompressing
-+		 * data in the write path, but we're not going to use it all
-+		 * here:
-+		 */
-+		EBUG_ON(rbio->bio.bi_iter.bi_size <
-+		       pick.crc.compressed_size << 9);
-+		rbio->bio.bi_iter.bi_size =
-+			pick.crc.compressed_size << 9;
-+	} else if (bounce) {
-+		unsigned sectors = pick.crc.compressed_size;
-+
-+		rbio = rbio_init(bio_alloc_bioset(GFP_NOIO,
-+						  DIV_ROUND_UP(sectors, PAGE_SECTORS),
-+						  &c->bio_read_split),
-+				 orig->opts);
-+
-+		bch2_bio_alloc_pages_pool(c, &rbio->bio, sectors << 9);
-+		rbio->bounce	= true;
-+		rbio->split	= true;
-+	} else if (flags & BCH_READ_MUST_CLONE) {
-+		/*
-+		 * Have to clone if there were any splits, due to error
-+		 * reporting issues (if a split errored, and retrying didn't
-+		 * work, when it reports the error to its parent (us) we don't
-+		 * know if the error was from our bio, and we should retry, or
-+		 * from the whole bio, in which case we don't want to retry and
-+		 * lose the error)
-+		 */
-+		rbio = rbio_init(bio_clone_fast(&orig->bio, GFP_NOIO,
-+						&c->bio_read_split),
-+				 orig->opts);
-+		rbio->bio.bi_iter = iter;
-+		rbio->split	= true;
-+	} else {
-+		rbio = orig;
-+		rbio->bio.bi_iter = iter;
-+		EBUG_ON(bio_flagged(&rbio->bio, BIO_CHAIN));
-+	}
-+
-+	EBUG_ON(bio_sectors(&rbio->bio) != pick.crc.compressed_size);
-+
-+	rbio->c			= c;
-+	rbio->submit_time	= local_clock();
-+	if (rbio->split)
-+		rbio->parent	= orig;
-+	else
-+		rbio->end_io	= orig->bio.bi_end_io;
-+	rbio->bvec_iter		= iter;
-+	rbio->offset_into_extent= offset_into_extent;
-+	rbio->flags		= flags;
-+	rbio->have_ioref	= pick_ret > 0 && bch2_dev_get_ioref(ca, READ);
-+	rbio->narrow_crcs	= narrow_crcs;
-+	rbio->hole		= 0;
-+	rbio->retry		= 0;
-+	rbio->context		= 0;
-+	/* XXX: only initialize this if needed */
-+	rbio->devs_have		= bch2_bkey_devs(k);
-+	rbio->pick		= pick;
-+	rbio->pos		= pos;
-+	rbio->version		= k.k->version;
-+	rbio->promote		= promote;
-+	INIT_WORK(&rbio->work, NULL);
-+
-+	rbio->bio.bi_opf	= orig->bio.bi_opf;
-+	rbio->bio.bi_iter.bi_sector = pick.ptr.offset;
-+	rbio->bio.bi_end_io	= bch2_read_endio;
-+
-+	if (rbio->bounce)
-+		trace_read_bounce(&rbio->bio);
-+
-+	bch2_increment_clock(c, bio_sectors(&rbio->bio), READ);
-+
-+	if (pick.ptr.cached)
-+		bch2_bucket_io_time_reset(trans, pick.ptr.dev,
-+			PTR_BUCKET_NR(ca, &pick.ptr), READ);
-+
-+	if (!(flags & (BCH_READ_IN_RETRY|BCH_READ_LAST_FRAGMENT))) {
-+		bio_inc_remaining(&orig->bio);
-+		trace_read_split(&orig->bio);
-+	}
-+
-+	if (!rbio->pick.idx) {
-+		if (!rbio->have_ioref) {
-+			__bcache_io_error(c, "no device to read from");
-+			bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
-+			goto out;
-+		}
-+
-+		this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_user],
-+			     bio_sectors(&rbio->bio));
-+		bio_set_dev(&rbio->bio, ca->disk_sb.bdev);
-+
-+		if (likely(!(flags & BCH_READ_IN_RETRY)))
-+			submit_bio(&rbio->bio);
-+		else
-+			submit_bio_wait(&rbio->bio);
-+	} else {
-+		/* Attempting reconstruct read: */
-+		if (bch2_ec_read_extent(c, rbio)) {
-+			bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
-+			goto out;
-+		}
-+
-+		if (likely(!(flags & BCH_READ_IN_RETRY)))
-+			bio_endio(&rbio->bio);
-+	}
-+out:
-+	if (likely(!(flags & BCH_READ_IN_RETRY))) {
-+		return 0;
-+	} else {
-+		int ret;
-+
-+		rbio->context = RBIO_CONTEXT_UNBOUND;
-+		bch2_read_endio(&rbio->bio);
-+
-+		ret = rbio->retry;
-+		rbio = bch2_rbio_free(rbio);
-+
-+		if (ret == READ_RETRY_AVOID) {
-+			bch2_mark_io_failure(failed, &pick);
-+			ret = READ_RETRY;
-+		}
-+
-+		return ret;
-+	}
-+
-+err:
-+	if (flags & BCH_READ_IN_RETRY)
-+		return READ_ERR;
-+
-+	orig->bio.bi_status = BLK_STS_IOERR;
-+	goto out_read_done;
-+
-+hole:
-+	/*
-+	 * won't normally happen in the BCH_READ_NODECODE
-+	 * (bch2_move_extent()) path, but if we retry and the extent we wanted
-+	 * to read no longer exists we have to signal that:
-+	 */
-+	if (flags & BCH_READ_NODECODE)
-+		orig->hole = true;
-+
-+	zero_fill_bio_iter(&orig->bio, iter);
-+out_read_done:
-+	if (flags & BCH_READ_LAST_FRAGMENT)
-+		bch2_rbio_done(orig);
-+	return 0;
-+}
-+
-+void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_on_stack sk;
-+	struct bkey_s_c k;
-+	unsigned flags = BCH_READ_RETRY_IF_STALE|
-+		BCH_READ_MAY_PROMOTE|
-+		BCH_READ_USER_MAPPED;
-+	int ret;
-+
-+	BUG_ON(rbio->_state);
-+	BUG_ON(flags & BCH_READ_NODECODE);
-+	BUG_ON(flags & BCH_READ_IN_RETRY);
-+
-+	rbio->c = c;
-+	rbio->start_time = local_clock();
-+
-+	bkey_on_stack_init(&sk);
-+	bch2_trans_init(&trans, c, 0, 0);
-+retry:
-+	bch2_trans_begin(&trans);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
-+				   POS(inode, rbio->bio.bi_iter.bi_sector),
-+				   BTREE_ITER_SLOTS);
-+	while (1) {
-+		unsigned bytes, sectors, offset_into_extent;
-+
-+		bch2_btree_iter_set_pos(iter,
-+				POS(inode, rbio->bio.bi_iter.bi_sector));
-+
-+		k = bch2_btree_iter_peek_slot(iter);
-+		ret = bkey_err(k);
-+		if (ret)
-+			goto err;
-+
-+		offset_into_extent = iter->pos.offset -
-+			bkey_start_offset(k.k);
-+		sectors = k.k->size - offset_into_extent;
-+
-+		bkey_on_stack_reassemble(&sk, c, k);
-+		k = bkey_i_to_s_c(sk.k);
-+
-+		ret = bch2_read_indirect_extent(&trans,
-+					&offset_into_extent, &sk);
-+		if (ret)
-+			goto err;
-+
-+		/*
-+		 * With indirect extents, the amount of data to read is the min
-+		 * of the original extent and the indirect extent:
-+		 */
-+		sectors = min(sectors, k.k->size - offset_into_extent);
-+
-+		/*
-+		 * Unlock the iterator while the btree node's lock is still in
-+		 * cache, before doing the IO:
-+		 */
-+		bch2_trans_unlock(&trans);
-+
-+		bytes = min(sectors, bio_sectors(&rbio->bio)) << 9;
-+		swap(rbio->bio.bi_iter.bi_size, bytes);
-+
-+		if (rbio->bio.bi_iter.bi_size == bytes)
-+			flags |= BCH_READ_LAST_FRAGMENT;
-+
-+		bch2_read_extent(&trans, rbio, k, offset_into_extent, flags);
-+
-+		if (flags & BCH_READ_LAST_FRAGMENT)
-+			break;
-+
-+		swap(rbio->bio.bi_iter.bi_size, bytes);
-+		bio_advance(&rbio->bio, bytes);
-+	}
-+out:
-+	bch2_trans_exit(&trans);
-+	bkey_on_stack_exit(&sk, c);
-+	return;
-+err:
-+	if (ret == -EINTR)
-+		goto retry;
-+
-+	bcache_io_error(c, &rbio->bio, "btree IO error: %i", ret);
-+	bch2_rbio_done(rbio);
-+	goto out;
-+}
-+
-+void bch2_fs_io_exit(struct bch_fs *c)
-+{
-+	if (c->promote_table.tbl)
-+		rhashtable_destroy(&c->promote_table);
-+	mempool_exit(&c->bio_bounce_pages);
-+	bioset_exit(&c->bio_write);
-+	bioset_exit(&c->bio_read_split);
-+	bioset_exit(&c->bio_read);
-+}
-+
-+int bch2_fs_io_init(struct bch_fs *c)
-+{
-+	if (bioset_init(&c->bio_read, 1, offsetof(struct bch_read_bio, bio),
-+			BIOSET_NEED_BVECS) ||
-+	    bioset_init(&c->bio_read_split, 1, offsetof(struct bch_read_bio, bio),
-+			BIOSET_NEED_BVECS) ||
-+	    bioset_init(&c->bio_write, 1, offsetof(struct bch_write_bio, bio),
-+			BIOSET_NEED_BVECS) ||
-+	    mempool_init_page_pool(&c->bio_bounce_pages,
-+				   max_t(unsigned,
-+					 c->opts.btree_node_size,
-+					 c->sb.encoded_extent_max) /
-+				   PAGE_SECTORS, 0) ||
-+	    rhashtable_init(&c->promote_table, &bch_promote_params))
-+		return -ENOMEM;
-+
-+	return 0;
-+}
-diff --git a/fs/bcachefs/io.h b/fs/bcachefs/io.h
-new file mode 100644
-index 000000000000..e6aac594f3e6
---- /dev/null
-+++ b/fs/bcachefs/io.h
-@@ -0,0 +1,169 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_IO_H
-+#define _BCACHEFS_IO_H
-+
-+#include "checksum.h"
-+#include "bkey_on_stack.h"
-+#include "io_types.h"
-+
-+#define to_wbio(_bio)			\
-+	container_of((_bio), struct bch_write_bio, bio)
-+
-+#define to_rbio(_bio)			\
-+	container_of((_bio), struct bch_read_bio, bio)
-+
-+void bch2_bio_free_pages_pool(struct bch_fs *, struct bio *);
-+void bch2_bio_alloc_pages_pool(struct bch_fs *, struct bio *, size_t);
-+
-+void bch2_latency_acct(struct bch_dev *, u64, int);
-+
-+void bch2_submit_wbio_replicas(struct bch_write_bio *, struct bch_fs *,
-+			       enum bch_data_type, const struct bkey_i *);
-+
-+#define BLK_STS_REMOVED		((__force blk_status_t)128)
-+
-+const char *bch2_blk_status_to_str(blk_status_t);
-+
-+enum bch_write_flags {
-+	BCH_WRITE_ALLOC_NOWAIT		= (1 << 0),
-+	BCH_WRITE_CACHED		= (1 << 1),
-+	BCH_WRITE_FLUSH			= (1 << 2),
-+	BCH_WRITE_DATA_ENCODED		= (1 << 3),
-+	BCH_WRITE_PAGES_STABLE		= (1 << 4),
-+	BCH_WRITE_PAGES_OWNED		= (1 << 5),
-+	BCH_WRITE_ONLY_SPECIFIED_DEVS	= (1 << 6),
-+	BCH_WRITE_WROTE_DATA_INLINE	= (1 << 7),
-+	BCH_WRITE_FROM_INTERNAL		= (1 << 8),
-+
-+	/* Internal: */
-+	BCH_WRITE_JOURNAL_SEQ_PTR	= (1 << 9),
-+	BCH_WRITE_SKIP_CLOSURE_PUT	= (1 << 10),
-+	BCH_WRITE_DONE			= (1 << 11),
-+};
-+
-+static inline u64 *op_journal_seq(struct bch_write_op *op)
-+{
-+	return (op->flags & BCH_WRITE_JOURNAL_SEQ_PTR)
-+		? op->journal_seq_p : &op->journal_seq;
-+}
-+
-+static inline void op_journal_seq_set(struct bch_write_op *op, u64 *journal_seq)
-+{
-+	op->journal_seq_p = journal_seq;
-+	op->flags |= BCH_WRITE_JOURNAL_SEQ_PTR;
-+}
-+
-+static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op)
-+{
-+	return op->alloc_reserve == RESERVE_MOVINGGC
-+		? op->c->copygc_wq
-+		: op->c->wq;
-+}
-+
-+int bch2_extent_update(struct btree_trans *, struct btree_iter *,
-+		       struct bkey_i *, struct disk_reservation *,
-+		       u64 *, u64, s64 *);
-+int bch2_fpunch_at(struct btree_trans *, struct btree_iter *,
-+		   struct bpos, u64 *, s64 *);
-+int bch2_fpunch(struct bch_fs *c, u64, u64, u64, u64 *, s64 *);
-+
-+int bch2_write_index_default(struct bch_write_op *);
-+
-+static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c,
-+				      struct bch_io_opts opts)
-+{
-+	op->c			= c;
-+	op->end_io		= NULL;
-+	op->flags		= 0;
-+	op->written		= 0;
-+	op->error		= 0;
-+	op->csum_type		= bch2_data_checksum_type(c, opts.data_checksum);
-+	op->compression_type	= bch2_compression_opt_to_type[opts.compression];
-+	op->nr_replicas		= 0;
-+	op->nr_replicas_required = c->opts.data_replicas_required;
-+	op->alloc_reserve	= RESERVE_NONE;
-+	op->incompressible	= 0;
-+	op->open_buckets.nr	= 0;
-+	op->devs_have.nr	= 0;
-+	op->target		= 0;
-+	op->opts		= opts;
-+	op->pos			= POS_MAX;
-+	op->version		= ZERO_VERSION;
-+	op->write_point		= (struct write_point_specifier) { 0 };
-+	op->res			= (struct disk_reservation) { 0 };
-+	op->journal_seq		= 0;
-+	op->new_i_size		= U64_MAX;
-+	op->i_sectors_delta	= 0;
-+	op->index_update_fn	= bch2_write_index_default;
-+}
-+
-+void bch2_write(struct closure *);
-+
-+static inline struct bch_write_bio *wbio_init(struct bio *bio)
-+{
-+	struct bch_write_bio *wbio = to_wbio(bio);
-+
-+	memset(wbio, 0, offsetof(struct bch_write_bio, bio));
-+	return wbio;
-+}
-+
-+struct bch_devs_mask;
-+struct cache_promote_op;
-+struct extent_ptr_decoded;
-+
-+int __bch2_read_indirect_extent(struct btree_trans *, unsigned *,
-+				struct bkey_on_stack *);
-+
-+static inline int bch2_read_indirect_extent(struct btree_trans *trans,
-+					    unsigned *offset_into_extent,
-+					    struct bkey_on_stack *k)
-+{
-+	return k->k->k.type == KEY_TYPE_reflink_p
-+		? __bch2_read_indirect_extent(trans, offset_into_extent, k)
-+		: 0;
-+}
-+
-+enum bch_read_flags {
-+	BCH_READ_RETRY_IF_STALE		= 1 << 0,
-+	BCH_READ_MAY_PROMOTE		= 1 << 1,
-+	BCH_READ_USER_MAPPED		= 1 << 2,
-+	BCH_READ_NODECODE		= 1 << 3,
-+	BCH_READ_LAST_FRAGMENT		= 1 << 4,
-+
-+	/* internal: */
-+	BCH_READ_MUST_BOUNCE		= 1 << 5,
-+	BCH_READ_MUST_CLONE		= 1 << 6,
-+	BCH_READ_IN_RETRY		= 1 << 7,
-+};
-+
-+int __bch2_read_extent(struct btree_trans *, struct bch_read_bio *,
-+		       struct bvec_iter, struct bkey_s_c, unsigned,
-+		       struct bch_io_failures *, unsigned);
-+
-+static inline void bch2_read_extent(struct btree_trans *trans,
-+				    struct bch_read_bio *rbio,
-+				    struct bkey_s_c k,
-+				    unsigned offset_into_extent,
-+				    unsigned flags)
-+{
-+	__bch2_read_extent(trans, rbio, rbio->bio.bi_iter, k,
-+			   offset_into_extent, NULL, flags);
-+}
-+
-+void bch2_read(struct bch_fs *, struct bch_read_bio *, u64);
-+
-+static inline struct bch_read_bio *rbio_init(struct bio *bio,
-+					     struct bch_io_opts opts)
-+{
-+	struct bch_read_bio *rbio = to_rbio(bio);
-+
-+	rbio->_state	= 0;
-+	rbio->promote	= NULL;
-+	rbio->opts	= opts;
-+	return rbio;
-+}
-+
-+void bch2_fs_io_exit(struct bch_fs *);
-+int bch2_fs_io_init(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_IO_H */
-diff --git a/fs/bcachefs/io_types.h b/fs/bcachefs/io_types.h
-new file mode 100644
-index 000000000000..b23727d212b9
---- /dev/null
-+++ b/fs/bcachefs/io_types.h
-@@ -0,0 +1,148 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_IO_TYPES_H
-+#define _BCACHEFS_IO_TYPES_H
-+
-+#include "alloc_types.h"
-+#include "btree_types.h"
-+#include "buckets_types.h"
-+#include "extents_types.h"
-+#include "keylist_types.h"
-+#include "opts.h"
-+#include "super_types.h"
-+
-+#include <linux/llist.h>
-+#include <linux/workqueue.h>
-+
-+struct bch_read_bio {
-+	struct bch_fs		*c;
-+	u64			start_time;
-+	u64			submit_time;
-+
-+	/*
-+	 * Reads will often have to be split, and if the extent being read from
-+	 * was checksummed or compressed we'll also have to allocate bounce
-+	 * buffers and copy the data back into the original bio.
-+	 *
-+	 * If we didn't have to split, we have to save and restore the original
-+	 * bi_end_io - @split below indicates which:
-+	 */
-+	union {
-+	struct bch_read_bio	*parent;
-+	bio_end_io_t		*end_io;
-+	};
-+
-+	/*
-+	 * Saved copy of bio->bi_iter, from submission time - allows us to
-+	 * resubmit on IO error, and also to copy data back to the original bio
-+	 * when we're bouncing:
-+	 */
-+	struct bvec_iter	bvec_iter;
-+
-+	unsigned		offset_into_extent;
-+
-+	u16			flags;
-+	union {
-+	struct {
-+	u16			bounce:1,
-+				split:1,
-+				kmalloc:1,
-+				have_ioref:1,
-+				narrow_crcs:1,
-+				hole:1,
-+				retry:2,
-+				context:2;
-+	};
-+	u16			_state;
-+	};
-+
-+	struct bch_devs_list	devs_have;
-+
-+	struct extent_ptr_decoded pick;
-+	/* start pos of data we read (may not be pos of data we want) */
-+	struct bpos		pos;
-+	struct bversion		version;
-+
-+	struct promote_op	*promote;
-+
-+	struct bch_io_opts	opts;
-+
-+	struct work_struct	work;
-+
-+	struct bio		bio;
-+};
-+
-+struct bch_write_bio {
-+	struct bch_fs		*c;
-+	struct bch_write_bio	*parent;
-+
-+	u64			submit_time;
-+
-+	struct bch_devs_list	failed;
-+	u8			dev;
-+
-+	unsigned		split:1,
-+				bounce:1,
-+				put_bio:1,
-+				have_ioref:1,
-+				used_mempool:1;
-+
-+	struct bio		bio;
-+};
-+
-+struct bch_write_op {
-+	struct closure		cl;
-+	struct bch_fs		*c;
-+	void			(*end_io)(struct bch_write_op *);
-+	u64			start_time;
-+
-+	unsigned		written; /* sectors */
-+	u16			flags;
-+	s16			error; /* dio write path expects it to hold -ERESTARTSYS... */
-+
-+	unsigned		csum_type:4;
-+	unsigned		compression_type:4;
-+	unsigned		nr_replicas:4;
-+	unsigned		nr_replicas_required:4;
-+	unsigned		alloc_reserve:3;
-+	unsigned		incompressible:1;
-+
-+	struct bch_devs_list	devs_have;
-+	u16			target;
-+	u16			nonce;
-+	struct bch_io_opts	opts;
-+
-+	struct bpos		pos;
-+	struct bversion		version;
-+
-+	/* For BCH_WRITE_DATA_ENCODED: */
-+	struct bch_extent_crc_unpacked crc;
-+
-+	struct write_point_specifier write_point;
-+
-+	struct disk_reservation	res;
-+
-+	struct open_buckets	open_buckets;
-+
-+	/*
-+	 * If caller wants to flush but hasn't passed us a journal_seq ptr, we
-+	 * still need to stash the journal_seq somewhere:
-+	 */
-+	union {
-+		u64			*journal_seq_p;
-+		u64			journal_seq;
-+	};
-+	u64			new_i_size;
-+	s64			i_sectors_delta;
-+
-+	int			(*index_update_fn)(struct bch_write_op *);
-+
-+	struct bch_devs_mask	failed;
-+
-+	struct keylist		insert_keys;
-+	u64			inline_keys[BKEY_EXTENT_U64s_MAX * 2];
-+
-+	/* Must be last: */
-+	struct bch_write_bio	wbio;
-+};
-+
-+#endif /* _BCACHEFS_IO_TYPES_H */
-diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
-new file mode 100644
-index 000000000000..b8b719902c63
---- /dev/null
-+++ b/fs/bcachefs/journal.c
-@@ -0,0 +1,1263 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * bcachefs journalling code, for btree insertions
-+ *
-+ * Copyright 2012 Google, Inc.
-+ */
-+
-+#include "bcachefs.h"
-+#include "alloc_foreground.h"
-+#include "bkey_methods.h"
-+#include "btree_gc.h"
-+#include "buckets.h"
-+#include "journal.h"
-+#include "journal_io.h"
-+#include "journal_reclaim.h"
-+#include "journal_seq_blacklist.h"
-+#include "super-io.h"
-+
-+#include <trace/events/bcachefs.h>
-+
-+static inline struct journal_buf *journal_seq_to_buf(struct journal *, u64);
-+
-+static bool __journal_entry_is_open(union journal_res_state state)
-+{
-+	return state.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL;
-+}
-+
-+static bool journal_entry_is_open(struct journal *j)
-+{
-+	return __journal_entry_is_open(j->reservations);
-+}
-+
-+static void journal_pin_new_entry(struct journal *j, int count)
-+{
-+	struct journal_entry_pin_list *p;
-+
-+	/*
-+	 * The fifo_push() needs to happen at the same time as j->seq is
-+	 * incremented for journal_last_seq() to be calculated correctly
-+	 */
-+	atomic64_inc(&j->seq);
-+	p = fifo_push_ref(&j->pin);
-+
-+	INIT_LIST_HEAD(&p->list);
-+	INIT_LIST_HEAD(&p->flushed);
-+	atomic_set(&p->count, count);
-+	p->devs.nr = 0;
-+}
-+
-+static void bch2_journal_buf_init(struct journal *j)
-+{
-+	struct journal_buf *buf = journal_cur_buf(j);
-+
-+	memset(buf->has_inode, 0, sizeof(buf->has_inode));
-+
-+	memset(buf->data, 0, sizeof(*buf->data));
-+	buf->data->seq	= cpu_to_le64(journal_cur_seq(j));
-+	buf->data->u64s	= 0;
-+}
-+
-+void bch2_journal_halt(struct journal *j)
-+{
-+	union journal_res_state old, new;
-+	u64 v = atomic64_read(&j->reservations.counter);
-+
-+	do {
-+		old.v = new.v = v;
-+		if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL)
-+			return;
-+
-+		new.cur_entry_offset = JOURNAL_ENTRY_ERROR_VAL;
-+	} while ((v = atomic64_cmpxchg(&j->reservations.counter,
-+				       old.v, new.v)) != old.v);
-+
-+	journal_wake(j);
-+	closure_wake_up(&journal_cur_buf(j)->wait);
-+}
-+
-+/* journal entry close/open: */
-+
-+void __bch2_journal_buf_put(struct journal *j, bool need_write_just_set)
-+{
-+	if (!need_write_just_set &&
-+	    test_bit(JOURNAL_NEED_WRITE, &j->flags))
-+		bch2_time_stats_update(j->delay_time,
-+				       j->need_write_time);
-+
-+	clear_bit(JOURNAL_NEED_WRITE, &j->flags);
-+
-+	closure_call(&j->io, bch2_journal_write, system_highpri_wq, NULL);
-+}
-+
-+/*
-+ * Returns true if journal entry is now closed:
-+ */
-+static bool __journal_entry_close(struct journal *j)
-+{
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	struct journal_buf *buf = journal_cur_buf(j);
-+	union journal_res_state old, new;
-+	u64 v = atomic64_read(&j->reservations.counter);
-+	bool set_need_write = false;
-+	unsigned sectors;
-+
-+	lockdep_assert_held(&j->lock);
-+
-+	do {
-+		old.v = new.v = v;
-+		if (old.cur_entry_offset == JOURNAL_ENTRY_CLOSED_VAL)
-+			return true;
-+
-+		if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL) {
-+			/* this entry will never be written: */
-+			closure_wake_up(&buf->wait);
-+			return true;
-+		}
-+
-+		if (!test_bit(JOURNAL_NEED_WRITE, &j->flags)) {
-+			set_bit(JOURNAL_NEED_WRITE, &j->flags);
-+			j->need_write_time = local_clock();
-+			set_need_write = true;
-+		}
-+
-+		if (new.prev_buf_unwritten)
-+			return false;
-+
-+		new.cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL;
-+		new.idx++;
-+		new.prev_buf_unwritten = 1;
-+
-+		BUG_ON(journal_state_count(new, new.idx));
-+	} while ((v = atomic64_cmpxchg(&j->reservations.counter,
-+				       old.v, new.v)) != old.v);
-+
-+	buf->data->u64s		= cpu_to_le32(old.cur_entry_offset);
-+
-+	sectors = vstruct_blocks_plus(buf->data, c->block_bits,
-+				      buf->u64s_reserved) << c->block_bits;
-+	BUG_ON(sectors > buf->sectors);
-+	buf->sectors = sectors;
-+
-+	bkey_extent_init(&buf->key);
-+
-+	/*
-+	 * We have to set last_seq here, _before_ opening a new journal entry:
-+	 *
-+	 * A threads may replace an old pin with a new pin on their current
-+	 * journal reservation - the expectation being that the journal will
-+	 * contain either what the old pin protected or what the new pin
-+	 * protects.
-+	 *
-+	 * After the old pin is dropped journal_last_seq() won't include the old
-+	 * pin, so we can only write the updated last_seq on the entry that
-+	 * contains whatever the new pin protects.
-+	 *
-+	 * Restated, we can _not_ update last_seq for a given entry if there
-+	 * could be a newer entry open with reservations/pins that have been
-+	 * taken against it.
-+	 *
-+	 * Hence, we want update/set last_seq on the current journal entry right
-+	 * before we open a new one:
-+	 */
-+	buf->data->last_seq	= cpu_to_le64(journal_last_seq(j));
-+
-+	if (journal_entry_empty(buf->data))
-+		clear_bit(JOURNAL_NOT_EMPTY, &j->flags);
-+	else
-+		set_bit(JOURNAL_NOT_EMPTY, &j->flags);
-+
-+	journal_pin_new_entry(j, 1);
-+
-+	bch2_journal_buf_init(j);
-+
-+	cancel_delayed_work(&j->write_work);
-+
-+	bch2_journal_space_available(j);
-+
-+	bch2_journal_buf_put(j, old.idx, set_need_write);
-+	return true;
-+}
-+
-+static bool journal_entry_close(struct journal *j)
-+{
-+	bool ret;
-+
-+	spin_lock(&j->lock);
-+	ret = __journal_entry_close(j);
-+	spin_unlock(&j->lock);
-+
-+	return ret;
-+}
-+
-+/*
-+ * should _only_ called from journal_res_get() - when we actually want a
-+ * journal reservation - journal entry is open means journal is dirty:
-+ *
-+ * returns:
-+ * 0:		success
-+ * -ENOSPC:	journal currently full, must invoke reclaim
-+ * -EAGAIN:	journal blocked, must wait
-+ * -EROFS:	insufficient rw devices or journal error
-+ */
-+static int journal_entry_open(struct journal *j)
-+{
-+	struct journal_buf *buf = journal_cur_buf(j);
-+	union journal_res_state old, new;
-+	int u64s;
-+	u64 v;
-+
-+	lockdep_assert_held(&j->lock);
-+	BUG_ON(journal_entry_is_open(j));
-+
-+	if (j->blocked)
-+		return -EAGAIN;
-+
-+	if (j->cur_entry_error)
-+		return j->cur_entry_error;
-+
-+	BUG_ON(!j->cur_entry_sectors);
-+
-+	buf->u64s_reserved	= j->entry_u64s_reserved;
-+	buf->disk_sectors	= j->cur_entry_sectors;
-+	buf->sectors		= min(buf->disk_sectors, buf->buf_size >> 9);
-+
-+	u64s = (int) (buf->sectors << 9) / sizeof(u64) -
-+		journal_entry_overhead(j);
-+	u64s  = clamp_t(int, u64s, 0, JOURNAL_ENTRY_CLOSED_VAL - 1);
-+
-+	if (u64s <= le32_to_cpu(buf->data->u64s))
-+		return -ENOSPC;
-+
-+	/*
-+	 * Must be set before marking the journal entry as open:
-+	 */
-+	j->cur_entry_u64s = u64s;
-+
-+	v = atomic64_read(&j->reservations.counter);
-+	do {
-+		old.v = new.v = v;
-+
-+		if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL)
-+			return -EROFS;
-+
-+		/* Handle any already added entries */
-+		new.cur_entry_offset = le32_to_cpu(buf->data->u64s);
-+
-+		EBUG_ON(journal_state_count(new, new.idx));
-+		journal_state_inc(&new);
-+	} while ((v = atomic64_cmpxchg(&j->reservations.counter,
-+				       old.v, new.v)) != old.v);
-+
-+	if (j->res_get_blocked_start)
-+		bch2_time_stats_update(j->blocked_time,
-+				       j->res_get_blocked_start);
-+	j->res_get_blocked_start = 0;
-+
-+	mod_delayed_work(system_freezable_wq,
-+			 &j->write_work,
-+			 msecs_to_jiffies(j->write_delay_ms));
-+	journal_wake(j);
-+	return 0;
-+}
-+
-+static bool journal_quiesced(struct journal *j)
-+{
-+	union journal_res_state state = READ_ONCE(j->reservations);
-+	bool ret = !state.prev_buf_unwritten && !__journal_entry_is_open(state);
-+
-+	if (!ret)
-+		journal_entry_close(j);
-+	return ret;
-+}
-+
-+static void journal_quiesce(struct journal *j)
-+{
-+	wait_event(j->wait, journal_quiesced(j));
-+}
-+
-+static void journal_write_work(struct work_struct *work)
-+{
-+	struct journal *j = container_of(work, struct journal, write_work.work);
-+
-+	journal_entry_close(j);
-+}
-+
-+/*
-+ * Given an inode number, if that inode number has data in the journal that
-+ * hasn't yet been flushed, return the journal sequence number that needs to be
-+ * flushed:
-+ */
-+u64 bch2_inode_journal_seq(struct journal *j, u64 inode)
-+{
-+	size_t h = hash_64(inode, ilog2(sizeof(j->buf[0].has_inode) * 8));
-+	u64 seq = 0;
-+
-+	if (!test_bit(h, j->buf[0].has_inode) &&
-+	    !test_bit(h, j->buf[1].has_inode))
-+		return 0;
-+
-+	spin_lock(&j->lock);
-+	if (test_bit(h, journal_cur_buf(j)->has_inode))
-+		seq = journal_cur_seq(j);
-+	else if (test_bit(h, journal_prev_buf(j)->has_inode))
-+		seq = journal_cur_seq(j) - 1;
-+	spin_unlock(&j->lock);
-+
-+	return seq;
-+}
-+
-+void bch2_journal_set_has_inum(struct journal *j, u64 inode, u64 seq)
-+{
-+	size_t h = hash_64(inode, ilog2(sizeof(j->buf[0].has_inode) * 8));
-+	struct journal_buf *buf;
-+
-+	spin_lock(&j->lock);
-+
-+	if ((buf = journal_seq_to_buf(j, seq)))
-+		set_bit(h, buf->has_inode);
-+
-+	spin_unlock(&j->lock);
-+}
-+
-+static int __journal_res_get(struct journal *j, struct journal_res *res,
-+			     unsigned flags)
-+{
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	struct journal_buf *buf;
-+	bool can_discard;
-+	int ret;
-+retry:
-+	if (journal_res_get_fast(j, res, flags))
-+		return 0;
-+
-+	if (bch2_journal_error(j))
-+		return -EROFS;
-+
-+	spin_lock(&j->lock);
-+
-+	/*
-+	 * Recheck after taking the lock, so we don't race with another thread
-+	 * that just did journal_entry_open() and call journal_entry_close()
-+	 * unnecessarily
-+	 */
-+	if (journal_res_get_fast(j, res, flags)) {
-+		spin_unlock(&j->lock);
-+		return 0;
-+	}
-+
-+	if (!(flags & JOURNAL_RES_GET_RESERVED) &&
-+	    !test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags)) {
-+		/*
-+		 * Don't want to close current journal entry, just need to
-+		 * invoke reclaim:
-+		 */
-+		ret = -ENOSPC;
-+		goto unlock;
-+	}
-+
-+	/*
-+	 * If we couldn't get a reservation because the current buf filled up,
-+	 * and we had room for a bigger entry on disk, signal that we want to
-+	 * realloc the journal bufs:
-+	 */
-+	buf = journal_cur_buf(j);
-+	if (journal_entry_is_open(j) &&
-+	    buf->buf_size >> 9 < buf->disk_sectors &&
-+	    buf->buf_size < JOURNAL_ENTRY_SIZE_MAX)
-+		j->buf_size_want = max(j->buf_size_want, buf->buf_size << 1);
-+
-+	if (journal_entry_is_open(j) &&
-+	    !__journal_entry_close(j)) {
-+		/*
-+		 * We failed to get a reservation on the current open journal
-+		 * entry because it's full, and we can't close it because
-+		 * there's still a previous one in flight:
-+		 */
-+		trace_journal_entry_full(c);
-+		ret = -EAGAIN;
-+	} else {
-+		ret = journal_entry_open(j);
-+	}
-+unlock:
-+	if ((ret == -EAGAIN || ret == -ENOSPC) &&
-+	    !j->res_get_blocked_start)
-+		j->res_get_blocked_start = local_clock() ?: 1;
-+
-+	can_discard = j->can_discard;
-+	spin_unlock(&j->lock);
-+
-+	if (!ret)
-+		goto retry;
-+
-+	if (ret == -ENOSPC) {
-+		WARN_ONCE(!can_discard && (flags & JOURNAL_RES_GET_RESERVED),
-+			  "JOURNAL_RES_GET_RESERVED set but journal full");
-+
-+		/*
-+		 * Journal is full - can't rely on reclaim from work item due to
-+		 * freezing:
-+		 */
-+		trace_journal_full(c);
-+
-+		if (!(flags & JOURNAL_RES_GET_NONBLOCK)) {
-+			if (can_discard) {
-+				bch2_journal_do_discards(j);
-+				goto retry;
-+			}
-+
-+			if (mutex_trylock(&j->reclaim_lock)) {
-+				bch2_journal_reclaim(j);
-+				mutex_unlock(&j->reclaim_lock);
-+			}
-+		}
-+
-+		ret = -EAGAIN;
-+	}
-+
-+	return ret;
-+}
-+
-+/*
-+ * Essentially the entry function to the journaling code. When bcachefs is doing
-+ * a btree insert, it calls this function to get the current journal write.
-+ * Journal write is the structure used set up journal writes. The calling
-+ * function will then add its keys to the structure, queuing them for the next
-+ * write.
-+ *
-+ * To ensure forward progress, the current task must not be holding any
-+ * btree node write locks.
-+ */
-+int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res,
-+				  unsigned flags)
-+{
-+	int ret;
-+
-+	closure_wait_event(&j->async_wait,
-+		   (ret = __journal_res_get(j, res, flags)) != -EAGAIN ||
-+		   (flags & JOURNAL_RES_GET_NONBLOCK));
-+	return ret;
-+}
-+
-+/* journal_preres: */
-+
-+static bool journal_preres_available(struct journal *j,
-+				     struct journal_preres *res,
-+				     unsigned new_u64s,
-+				     unsigned flags)
-+{
-+	bool ret = bch2_journal_preres_get_fast(j, res, new_u64s, flags);
-+
-+	if (!ret)
-+		bch2_journal_reclaim_work(&j->reclaim_work.work);
-+
-+	return ret;
-+}
-+
-+int __bch2_journal_preres_get(struct journal *j,
-+			      struct journal_preres *res,
-+			      unsigned new_u64s,
-+			      unsigned flags)
-+{
-+	int ret;
-+
-+	closure_wait_event(&j->preres_wait,
-+		   (ret = bch2_journal_error(j)) ||
-+		   journal_preres_available(j, res, new_u64s, flags));
-+	return ret;
-+}
-+
-+/* journal_entry_res: */
-+
-+void bch2_journal_entry_res_resize(struct journal *j,
-+				   struct journal_entry_res *res,
-+				   unsigned new_u64s)
-+{
-+	union journal_res_state state;
-+	int d = new_u64s - res->u64s;
-+
-+	spin_lock(&j->lock);
-+
-+	j->entry_u64s_reserved += d;
-+	if (d <= 0)
-+		goto out;
-+
-+	j->cur_entry_u64s = max_t(int, 0, j->cur_entry_u64s - d);
-+	smp_mb();
-+	state = READ_ONCE(j->reservations);
-+
-+	if (state.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL &&
-+	    state.cur_entry_offset > j->cur_entry_u64s) {
-+		j->cur_entry_u64s += d;
-+		/*
-+		 * Not enough room in current journal entry, have to flush it:
-+		 */
-+		__journal_entry_close(j);
-+	} else {
-+		journal_cur_buf(j)->u64s_reserved += d;
-+	}
-+out:
-+	spin_unlock(&j->lock);
-+	res->u64s += d;
-+}
-+
-+/* journal flushing: */
-+
-+u64 bch2_journal_last_unwritten_seq(struct journal *j)
-+{
-+	u64 seq;
-+
-+	spin_lock(&j->lock);
-+	seq = journal_cur_seq(j);
-+	if (j->reservations.prev_buf_unwritten)
-+		seq--;
-+	spin_unlock(&j->lock);
-+
-+	return seq;
-+}
-+
-+/**
-+ * bch2_journal_open_seq_async - try to open a new journal entry if @seq isn't
-+ * open yet, or wait if we cannot
-+ *
-+ * used by the btree interior update machinery, when it needs to write a new
-+ * btree root - every journal entry contains the roots of all the btrees, so it
-+ * doesn't need to bother with getting a journal reservation
-+ */
-+int bch2_journal_open_seq_async(struct journal *j, u64 seq, struct closure *cl)
-+{
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	int ret;
-+
-+	spin_lock(&j->lock);
-+
-+	/*
-+	 * Can't try to open more than one sequence number ahead:
-+	 */
-+	BUG_ON(journal_cur_seq(j) < seq && !journal_entry_is_open(j));
-+
-+	if (journal_cur_seq(j) > seq ||
-+	    journal_entry_is_open(j)) {
-+		spin_unlock(&j->lock);
-+		return 0;
-+	}
-+
-+	if (journal_cur_seq(j) < seq &&
-+	    !__journal_entry_close(j)) {
-+		/* haven't finished writing out the previous one: */
-+		trace_journal_entry_full(c);
-+		ret = -EAGAIN;
-+	} else {
-+		BUG_ON(journal_cur_seq(j) != seq);
-+
-+		ret = journal_entry_open(j);
-+	}
-+
-+	if ((ret == -EAGAIN || ret == -ENOSPC) &&
-+	    !j->res_get_blocked_start)
-+		j->res_get_blocked_start = local_clock() ?: 1;
-+
-+	if (ret == -EAGAIN || ret == -ENOSPC)
-+		closure_wait(&j->async_wait, cl);
-+
-+	spin_unlock(&j->lock);
-+
-+	if (ret == -ENOSPC) {
-+		trace_journal_full(c);
-+		bch2_journal_reclaim_work(&j->reclaim_work.work);
-+		ret = -EAGAIN;
-+	}
-+
-+	return ret;
-+}
-+
-+static int journal_seq_error(struct journal *j, u64 seq)
-+{
-+	union journal_res_state state = READ_ONCE(j->reservations);
-+
-+	if (seq == journal_cur_seq(j))
-+		return bch2_journal_error(j);
-+
-+	if (seq + 1 == journal_cur_seq(j) &&
-+	    !state.prev_buf_unwritten &&
-+	    seq > j->seq_ondisk)
-+		return -EIO;
-+
-+	return 0;
-+}
-+
-+static inline struct journal_buf *
-+journal_seq_to_buf(struct journal *j, u64 seq)
-+{
-+	/* seq should be for a journal entry that has been opened: */
-+	BUG_ON(seq > journal_cur_seq(j));
-+	BUG_ON(seq == journal_cur_seq(j) &&
-+	       j->reservations.cur_entry_offset == JOURNAL_ENTRY_CLOSED_VAL);
-+
-+	if (seq == journal_cur_seq(j))
-+		return journal_cur_buf(j);
-+	if (seq + 1 == journal_cur_seq(j) &&
-+	    j->reservations.prev_buf_unwritten)
-+		return journal_prev_buf(j);
-+	return NULL;
-+}
-+
-+/**
-+ * bch2_journal_wait_on_seq - wait for a journal entry to be written
-+ *
-+ * does _not_ cause @seq to be written immediately - if there is no other
-+ * activity to cause the relevant journal entry to be filled up or flushed it
-+ * can wait for an arbitrary amount of time (up to @j->write_delay_ms, which is
-+ * configurable).
-+ */
-+void bch2_journal_wait_on_seq(struct journal *j, u64 seq,
-+			      struct closure *parent)
-+{
-+	struct journal_buf *buf;
-+
-+	spin_lock(&j->lock);
-+
-+	if ((buf = journal_seq_to_buf(j, seq))) {
-+		if (!closure_wait(&buf->wait, parent))
-+			BUG();
-+
-+		if (seq == journal_cur_seq(j)) {
-+			smp_mb();
-+			if (bch2_journal_error(j))
-+				closure_wake_up(&buf->wait);
-+		}
-+	}
-+
-+	spin_unlock(&j->lock);
-+}
-+
-+/**
-+ * bch2_journal_flush_seq_async - wait for a journal entry to be written
-+ *
-+ * like bch2_journal_wait_on_seq, except that it triggers a write immediately if
-+ * necessary
-+ */
-+void bch2_journal_flush_seq_async(struct journal *j, u64 seq,
-+				  struct closure *parent)
-+{
-+	struct journal_buf *buf;
-+
-+	spin_lock(&j->lock);
-+
-+	if (parent &&
-+	    (buf = journal_seq_to_buf(j, seq)))
-+		if (!closure_wait(&buf->wait, parent))
-+			BUG();
-+
-+	if (seq == journal_cur_seq(j))
-+		__journal_entry_close(j);
-+	spin_unlock(&j->lock);
-+}
-+
-+static int journal_seq_flushed(struct journal *j, u64 seq)
-+{
-+	int ret;
-+
-+	spin_lock(&j->lock);
-+	ret = seq <= j->seq_ondisk ? 1 : journal_seq_error(j, seq);
-+
-+	if (seq == journal_cur_seq(j))
-+		__journal_entry_close(j);
-+	spin_unlock(&j->lock);
-+
-+	return ret;
-+}
-+
-+int bch2_journal_flush_seq(struct journal *j, u64 seq)
-+{
-+	u64 start_time = local_clock();
-+	int ret, ret2;
-+
-+	ret = wait_event_killable(j->wait, (ret2 = journal_seq_flushed(j, seq)));
-+
-+	bch2_time_stats_update(j->flush_seq_time, start_time);
-+
-+	return ret ?: ret2 < 0 ? ret2 : 0;
-+}
-+
-+/**
-+ * bch2_journal_meta_async - force a journal entry to be written
-+ */
-+void bch2_journal_meta_async(struct journal *j, struct closure *parent)
-+{
-+	struct journal_res res;
-+
-+	memset(&res, 0, sizeof(res));
-+
-+	bch2_journal_res_get(j, &res, jset_u64s(0), 0);
-+	bch2_journal_res_put(j, &res);
-+
-+	bch2_journal_flush_seq_async(j, res.seq, parent);
-+}
-+
-+int bch2_journal_meta(struct journal *j)
-+{
-+	struct journal_res res;
-+	int ret;
-+
-+	memset(&res, 0, sizeof(res));
-+
-+	ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0);
-+	if (ret)
-+		return ret;
-+
-+	bch2_journal_res_put(j, &res);
-+
-+	return bch2_journal_flush_seq(j, res.seq);
-+}
-+
-+/*
-+ * bch2_journal_flush_async - if there is an open journal entry, or a journal
-+ * still being written, write it and wait for the write to complete
-+ */
-+void bch2_journal_flush_async(struct journal *j, struct closure *parent)
-+{
-+	u64 seq, journal_seq;
-+
-+	spin_lock(&j->lock);
-+	journal_seq = journal_cur_seq(j);
-+
-+	if (journal_entry_is_open(j)) {
-+		seq = journal_seq;
-+	} else if (journal_seq) {
-+		seq = journal_seq - 1;
-+	} else {
-+		spin_unlock(&j->lock);
-+		return;
-+	}
-+	spin_unlock(&j->lock);
-+
-+	bch2_journal_flush_seq_async(j, seq, parent);
-+}
-+
-+int bch2_journal_flush(struct journal *j)
-+{
-+	u64 seq, journal_seq;
-+
-+	spin_lock(&j->lock);
-+	journal_seq = journal_cur_seq(j);
-+
-+	if (journal_entry_is_open(j)) {
-+		seq = journal_seq;
-+	} else if (journal_seq) {
-+		seq = journal_seq - 1;
-+	} else {
-+		spin_unlock(&j->lock);
-+		return 0;
-+	}
-+	spin_unlock(&j->lock);
-+
-+	return bch2_journal_flush_seq(j, seq);
-+}
-+
-+/* block/unlock the journal: */
-+
-+void bch2_journal_unblock(struct journal *j)
-+{
-+	spin_lock(&j->lock);
-+	j->blocked--;
-+	spin_unlock(&j->lock);
-+
-+	journal_wake(j);
-+}
-+
-+void bch2_journal_block(struct journal *j)
-+{
-+	spin_lock(&j->lock);
-+	j->blocked++;
-+	spin_unlock(&j->lock);
-+
-+	journal_quiesce(j);
-+}
-+
-+/* allocate journal on a device: */
-+
-+static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
-+					 bool new_fs, struct closure *cl)
-+{
-+	struct bch_fs *c = ca->fs;
-+	struct journal_device *ja = &ca->journal;
-+	struct bch_sb_field_journal *journal_buckets;
-+	u64 *new_bucket_seq = NULL, *new_buckets = NULL;
-+	int ret = 0;
-+
-+	/* don't handle reducing nr of buckets yet: */
-+	if (nr <= ja->nr)
-+		return 0;
-+
-+	ret = -ENOMEM;
-+	new_buckets	= kzalloc(nr * sizeof(u64), GFP_KERNEL);
-+	new_bucket_seq	= kzalloc(nr * sizeof(u64), GFP_KERNEL);
-+	if (!new_buckets || !new_bucket_seq)
-+		goto err;
-+
-+	journal_buckets = bch2_sb_resize_journal(&ca->disk_sb,
-+						 nr + sizeof(*journal_buckets) / sizeof(u64));
-+	if (!journal_buckets)
-+		goto err;
-+
-+	/*
-+	 * We may be called from the device add path, before the new device has
-+	 * actually been added to the running filesystem:
-+	 */
-+	if (c)
-+		spin_lock(&c->journal.lock);
-+
-+	memcpy(new_buckets,	ja->buckets,	ja->nr * sizeof(u64));
-+	memcpy(new_bucket_seq,	ja->bucket_seq,	ja->nr * sizeof(u64));
-+	swap(new_buckets,	ja->buckets);
-+	swap(new_bucket_seq,	ja->bucket_seq);
-+
-+	if (c)
-+		spin_unlock(&c->journal.lock);
-+
-+	while (ja->nr < nr) {
-+		struct open_bucket *ob = NULL;
-+		unsigned pos;
-+		long bucket;
-+
-+		if (new_fs) {
-+			bucket = bch2_bucket_alloc_new_fs(ca);
-+			if (bucket < 0) {
-+				ret = -ENOSPC;
-+				goto err;
-+			}
-+		} else {
-+			ob = bch2_bucket_alloc(c, ca, RESERVE_ALLOC,
-+					       false, cl);
-+			if (IS_ERR(ob)) {
-+				ret = cl ? -EAGAIN : -ENOSPC;
-+				goto err;
-+			}
-+
-+			bucket = sector_to_bucket(ca, ob->ptr.offset);
-+		}
-+
-+		if (c) {
-+			percpu_down_read(&c->mark_lock);
-+			spin_lock(&c->journal.lock);
-+		}
-+
-+		pos = ja->nr ? (ja->cur_idx + 1) % ja->nr : 0;
-+		__array_insert_item(ja->buckets,		ja->nr, pos);
-+		__array_insert_item(ja->bucket_seq,		ja->nr, pos);
-+		__array_insert_item(journal_buckets->buckets,	ja->nr, pos);
-+		ja->nr++;
-+
-+		ja->buckets[pos] = bucket;
-+		ja->bucket_seq[pos] = 0;
-+		journal_buckets->buckets[pos] = cpu_to_le64(bucket);
-+
-+		if (pos <= ja->discard_idx)
-+			ja->discard_idx = (ja->discard_idx + 1) % ja->nr;
-+		if (pos <= ja->dirty_idx_ondisk)
-+			ja->dirty_idx_ondisk = (ja->dirty_idx_ondisk + 1) % ja->nr;
-+		if (pos <= ja->dirty_idx)
-+			ja->dirty_idx = (ja->dirty_idx + 1) % ja->nr;
-+		if (pos <= ja->cur_idx)
-+			ja->cur_idx = (ja->cur_idx + 1) % ja->nr;
-+
-+		bch2_mark_metadata_bucket(c, ca, bucket, BCH_DATA_journal,
-+					  ca->mi.bucket_size,
-+					  gc_phase(GC_PHASE_SB),
-+					  0);
-+
-+		if (c) {
-+			spin_unlock(&c->journal.lock);
-+			percpu_up_read(&c->mark_lock);
-+		}
-+
-+		if (!new_fs)
-+			bch2_open_bucket_put(c, ob);
-+	}
-+
-+	ret = 0;
-+err:
-+	kfree(new_bucket_seq);
-+	kfree(new_buckets);
-+
-+	return ret;
-+}
-+
-+/*
-+ * Allocate more journal space at runtime - not currently making use if it, but
-+ * the code works:
-+ */
-+int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca,
-+				unsigned nr)
-+{
-+	struct journal_device *ja = &ca->journal;
-+	struct closure cl;
-+	unsigned current_nr;
-+	int ret;
-+
-+	closure_init_stack(&cl);
-+
-+	do {
-+		struct disk_reservation disk_res = { 0, 0 };
-+
-+		closure_sync(&cl);
-+
-+		mutex_lock(&c->sb_lock);
-+		current_nr = ja->nr;
-+
-+		/*
-+		 * note: journal buckets aren't really counted as _sectors_ used yet, so
-+		 * we don't need the disk reservation to avoid the BUG_ON() in buckets.c
-+		 * when space used goes up without a reservation - but we do need the
-+		 * reservation to ensure we'll actually be able to allocate:
-+		 */
-+
-+		if (bch2_disk_reservation_get(c, &disk_res,
-+					      bucket_to_sector(ca, nr - ja->nr), 1, 0)) {
-+			mutex_unlock(&c->sb_lock);
-+			return -ENOSPC;
-+		}
-+
-+		ret = __bch2_set_nr_journal_buckets(ca, nr, false, &cl);
-+
-+		bch2_disk_reservation_put(c, &disk_res);
-+
-+		if (ja->nr != current_nr)
-+			bch2_write_super(c);
-+		mutex_unlock(&c->sb_lock);
-+	} while (ret == -EAGAIN);
-+
-+	return ret;
-+}
-+
-+int bch2_dev_journal_alloc(struct bch_dev *ca)
-+{
-+	unsigned nr;
-+
-+	if (dynamic_fault("bcachefs:add:journal_alloc"))
-+		return -ENOMEM;
-+
-+	/*
-+	 * clamp journal size to 1024 buckets or 512MB (in sectors), whichever
-+	 * is smaller:
-+	 */
-+	nr = clamp_t(unsigned, ca->mi.nbuckets >> 8,
-+		     BCH_JOURNAL_BUCKETS_MIN,
-+		     min(1 << 10,
-+			 (1 << 20) / ca->mi.bucket_size));
-+
-+	return __bch2_set_nr_journal_buckets(ca, nr, true, NULL);
-+}
-+
-+/* startup/shutdown: */
-+
-+static bool bch2_journal_writing_to_device(struct journal *j, unsigned dev_idx)
-+{
-+	union journal_res_state state;
-+	struct journal_buf *w;
-+	bool ret;
-+
-+	spin_lock(&j->lock);
-+	state = READ_ONCE(j->reservations);
-+	w = j->buf + !state.idx;
-+
-+	ret = state.prev_buf_unwritten &&
-+		bch2_bkey_has_device(bkey_i_to_s_c(&w->key), dev_idx);
-+	spin_unlock(&j->lock);
-+
-+	return ret;
-+}
-+
-+void bch2_dev_journal_stop(struct journal *j, struct bch_dev *ca)
-+{
-+	wait_event(j->wait, !bch2_journal_writing_to_device(j, ca->dev_idx));
-+}
-+
-+void bch2_fs_journal_stop(struct journal *j)
-+{
-+	bch2_journal_flush_all_pins(j);
-+
-+	wait_event(j->wait, journal_entry_close(j));
-+
-+	/* do we need to write another journal entry? */
-+	if (test_bit(JOURNAL_NOT_EMPTY, &j->flags))
-+		bch2_journal_meta(j);
-+
-+	journal_quiesce(j);
-+
-+	BUG_ON(!bch2_journal_error(j) &&
-+	       test_bit(JOURNAL_NOT_EMPTY, &j->flags));
-+
-+	cancel_delayed_work_sync(&j->write_work);
-+	cancel_delayed_work_sync(&j->reclaim_work);
-+}
-+
-+int bch2_fs_journal_start(struct journal *j, u64 cur_seq,
-+			  struct list_head *journal_entries)
-+{
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	struct journal_entry_pin_list *p;
-+	struct journal_replay *i;
-+	u64 last_seq = cur_seq, nr, seq;
-+
-+	if (!list_empty(journal_entries))
-+		last_seq = le64_to_cpu(list_last_entry(journal_entries,
-+				struct journal_replay, list)->j.last_seq);
-+
-+	nr = cur_seq - last_seq;
-+
-+	if (nr + 1 > j->pin.size) {
-+		free_fifo(&j->pin);
-+		init_fifo(&j->pin, roundup_pow_of_two(nr + 1), GFP_KERNEL);
-+		if (!j->pin.data) {
-+			bch_err(c, "error reallocating journal fifo (%llu open entries)", nr);
-+			return -ENOMEM;
-+		}
-+	}
-+
-+	j->replay_journal_seq	= last_seq;
-+	j->replay_journal_seq_end = cur_seq;
-+	j->last_seq_ondisk	= last_seq;
-+	j->pin.front		= last_seq;
-+	j->pin.back		= cur_seq;
-+	atomic64_set(&j->seq, cur_seq - 1);
-+
-+	fifo_for_each_entry_ptr(p, &j->pin, seq) {
-+		INIT_LIST_HEAD(&p->list);
-+		INIT_LIST_HEAD(&p->flushed);
-+		atomic_set(&p->count, 1);
-+		p->devs.nr = 0;
-+	}
-+
-+	list_for_each_entry(i, journal_entries, list) {
-+		seq = le64_to_cpu(i->j.seq);
-+		BUG_ON(seq >= cur_seq);
-+
-+		if (seq < last_seq)
-+			continue;
-+
-+		journal_seq_pin(j, seq)->devs = i->devs;
-+	}
-+
-+	spin_lock(&j->lock);
-+
-+	set_bit(JOURNAL_STARTED, &j->flags);
-+
-+	journal_pin_new_entry(j, 1);
-+	bch2_journal_buf_init(j);
-+
-+	c->last_bucket_seq_cleanup = journal_cur_seq(j);
-+
-+	bch2_journal_space_available(j);
-+	spin_unlock(&j->lock);
-+
-+	return 0;
-+}
-+
-+/* init/exit: */
-+
-+void bch2_dev_journal_exit(struct bch_dev *ca)
-+{
-+	kfree(ca->journal.bio);
-+	kfree(ca->journal.buckets);
-+	kfree(ca->journal.bucket_seq);
-+
-+	ca->journal.bio		= NULL;
-+	ca->journal.buckets	= NULL;
-+	ca->journal.bucket_seq	= NULL;
-+}
-+
-+int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb)
-+{
-+	struct journal_device *ja = &ca->journal;
-+	struct bch_sb_field_journal *journal_buckets =
-+		bch2_sb_get_journal(sb);
-+	unsigned i;
-+
-+	ja->nr = bch2_nr_journal_buckets(journal_buckets);
-+
-+	ja->bucket_seq = kcalloc(ja->nr, sizeof(u64), GFP_KERNEL);
-+	if (!ja->bucket_seq)
-+		return -ENOMEM;
-+
-+	ca->journal.bio = bio_kmalloc(GFP_KERNEL,
-+			DIV_ROUND_UP(JOURNAL_ENTRY_SIZE_MAX, PAGE_SIZE));
-+	if (!ca->journal.bio)
-+		return -ENOMEM;
-+
-+	ja->buckets = kcalloc(ja->nr, sizeof(u64), GFP_KERNEL);
-+	if (!ja->buckets)
-+		return -ENOMEM;
-+
-+	for (i = 0; i < ja->nr; i++)
-+		ja->buckets[i] = le64_to_cpu(journal_buckets->buckets[i]);
-+
-+	return 0;
-+}
-+
-+void bch2_fs_journal_exit(struct journal *j)
-+{
-+	kvpfree(j->buf[1].data, j->buf[1].buf_size);
-+	kvpfree(j->buf[0].data, j->buf[0].buf_size);
-+	free_fifo(&j->pin);
-+}
-+
-+int bch2_fs_journal_init(struct journal *j)
-+{
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	static struct lock_class_key res_key;
-+	int ret = 0;
-+
-+	pr_verbose_init(c->opts, "");
-+
-+	spin_lock_init(&j->lock);
-+	spin_lock_init(&j->err_lock);
-+	init_waitqueue_head(&j->wait);
-+	INIT_DELAYED_WORK(&j->write_work, journal_write_work);
-+	INIT_DELAYED_WORK(&j->reclaim_work, bch2_journal_reclaim_work);
-+	init_waitqueue_head(&j->pin_flush_wait);
-+	mutex_init(&j->reclaim_lock);
-+	mutex_init(&j->discard_lock);
-+
-+	lockdep_init_map(&j->res_map, "journal res", &res_key, 0);
-+
-+	j->buf[0].buf_size	= JOURNAL_ENTRY_SIZE_MIN;
-+	j->buf[1].buf_size	= JOURNAL_ENTRY_SIZE_MIN;
-+	j->write_delay_ms	= 1000;
-+	j->reclaim_delay_ms	= 100;
-+
-+	/* Btree roots: */
-+	j->entry_u64s_reserved +=
-+		BTREE_ID_NR * (JSET_KEYS_U64s + BKEY_EXTENT_U64s_MAX);
-+
-+	atomic64_set(&j->reservations.counter,
-+		((union journal_res_state)
-+		 { .cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL }).v);
-+
-+	if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) ||
-+	    !(j->buf[0].data = kvpmalloc(j->buf[0].buf_size, GFP_KERNEL)) ||
-+	    !(j->buf[1].data = kvpmalloc(j->buf[1].buf_size, GFP_KERNEL))) {
-+		ret = -ENOMEM;
-+		goto out;
-+	}
-+
-+	j->pin.front = j->pin.back = 1;
-+out:
-+	pr_verbose_init(c->opts, "ret %i", ret);
-+	return ret;
-+}
-+
-+/* debug: */
-+
-+void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
-+{
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	union journal_res_state s;
-+	struct bch_dev *ca;
-+	unsigned iter;
-+
-+	rcu_read_lock();
-+	spin_lock(&j->lock);
-+	s = READ_ONCE(j->reservations);
-+
-+	pr_buf(out,
-+	       "active journal entries:\t%llu\n"
-+	       "seq:\t\t\t%llu\n"
-+	       "last_seq:\t\t%llu\n"
-+	       "last_seq_ondisk:\t%llu\n"
-+	       "prereserved:\t\t%u/%u\n"
-+	       "current entry sectors:\t%u\n"
-+	       "current entry:\t\t",
-+	       fifo_used(&j->pin),
-+	       journal_cur_seq(j),
-+	       journal_last_seq(j),
-+	       j->last_seq_ondisk,
-+	       j->prereserved.reserved,
-+	       j->prereserved.remaining,
-+	       j->cur_entry_sectors);
-+
-+	switch (s.cur_entry_offset) {
-+	case JOURNAL_ENTRY_ERROR_VAL:
-+		pr_buf(out, "error\n");
-+		break;
-+	case JOURNAL_ENTRY_CLOSED_VAL:
-+		pr_buf(out, "closed\n");
-+		break;
-+	default:
-+		pr_buf(out, "%u/%u\n",
-+		       s.cur_entry_offset,
-+		       j->cur_entry_u64s);
-+		break;
-+	}
-+
-+	pr_buf(out,
-+	       "current entry refs:\t%u\n"
-+	       "prev entry unwritten:\t",
-+	       journal_state_count(s, s.idx));
-+
-+	if (s.prev_buf_unwritten)
-+		pr_buf(out, "yes, ref %u sectors %u\n",
-+		       journal_state_count(s, !s.idx),
-+		       journal_prev_buf(j)->sectors);
-+	else
-+		pr_buf(out, "no\n");
-+
-+	pr_buf(out,
-+	       "need write:\t\t%i\n"
-+	       "replay done:\t\t%i\n",
-+	       test_bit(JOURNAL_NEED_WRITE,	&j->flags),
-+	       test_bit(JOURNAL_REPLAY_DONE,	&j->flags));
-+
-+	for_each_member_device_rcu(ca, c, iter,
-+				   &c->rw_devs[BCH_DATA_journal]) {
-+		struct journal_device *ja = &ca->journal;
-+
-+		if (!ja->nr)
-+			continue;
-+
-+		pr_buf(out,
-+		       "dev %u:\n"
-+		       "\tnr\t\t%u\n"
-+		       "\tavailable\t%u:%u\n"
-+		       "\tdiscard_idx\t\t%u\n"
-+		       "\tdirty_idx_ondisk\t%u (seq %llu)\n"
-+		       "\tdirty_idx\t\t%u (seq %llu)\n"
-+		       "\tcur_idx\t\t%u (seq %llu)\n",
-+		       iter, ja->nr,
-+		       bch2_journal_dev_buckets_available(j, ja, journal_space_discarded),
-+		       ja->sectors_free,
-+		       ja->discard_idx,
-+		       ja->dirty_idx_ondisk,	ja->bucket_seq[ja->dirty_idx_ondisk],
-+		       ja->dirty_idx,		ja->bucket_seq[ja->dirty_idx],
-+		       ja->cur_idx,		ja->bucket_seq[ja->cur_idx]);
-+	}
-+
-+	spin_unlock(&j->lock);
-+	rcu_read_unlock();
-+}
-+
-+void bch2_journal_pins_to_text(struct printbuf *out, struct journal *j)
-+{
-+	struct journal_entry_pin_list *pin_list;
-+	struct journal_entry_pin *pin;
-+	u64 i;
-+
-+	spin_lock(&j->lock);
-+	fifo_for_each_entry_ptr(pin_list, &j->pin, i) {
-+		pr_buf(out, "%llu: count %u\n",
-+		       i, atomic_read(&pin_list->count));
-+
-+		list_for_each_entry(pin, &pin_list->list, list)
-+			pr_buf(out, "\t%px %ps\n",
-+			       pin, pin->flush);
-+
-+		if (!list_empty(&pin_list->flushed))
-+			pr_buf(out, "flushed:\n");
-+
-+		list_for_each_entry(pin, &pin_list->flushed, list)
-+			pr_buf(out, "\t%px %ps\n",
-+			       pin, pin->flush);
-+	}
-+	spin_unlock(&j->lock);
-+}
-diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h
-new file mode 100644
-index 000000000000..f60bc964ee1f
---- /dev/null
-+++ b/fs/bcachefs/journal.h
-@@ -0,0 +1,520 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_JOURNAL_H
-+#define _BCACHEFS_JOURNAL_H
-+
-+/*
-+ * THE JOURNAL:
-+ *
-+ * The primary purpose of the journal is to log updates (insertions) to the
-+ * b-tree, to avoid having to do synchronous updates to the b-tree on disk.
-+ *
-+ * Without the journal, the b-tree is always internally consistent on
-+ * disk - and in fact, in the earliest incarnations bcache didn't have a journal
-+ * but did handle unclean shutdowns by doing all index updates synchronously
-+ * (with coalescing).
-+ *
-+ * Updates to interior nodes still happen synchronously and without the journal
-+ * (for simplicity) - this may change eventually but updates to interior nodes
-+ * are rare enough it's not a huge priority.
-+ *
-+ * This means the journal is relatively separate from the b-tree; it consists of
-+ * just a list of keys and journal replay consists of just redoing those
-+ * insertions in same order that they appear in the journal.
-+ *
-+ * PERSISTENCE:
-+ *
-+ * For synchronous updates (where we're waiting on the index update to hit
-+ * disk), the journal entry will be written out immediately (or as soon as
-+ * possible, if the write for the previous journal entry was still in flight).
-+ *
-+ * Synchronous updates are specified by passing a closure (@flush_cl) to
-+ * bch2_btree_insert() or bch_btree_insert_node(), which then pass that parameter
-+ * down to the journalling code. That closure will will wait on the journal
-+ * write to complete (via closure_wait()).
-+ *
-+ * If the index update wasn't synchronous, the journal entry will be
-+ * written out after 10 ms have elapsed, by default (the delay_ms field
-+ * in struct journal).
-+ *
-+ * JOURNAL ENTRIES:
-+ *
-+ * A journal entry is variable size (struct jset), it's got a fixed length
-+ * header and then a variable number of struct jset_entry entries.
-+ *
-+ * Journal entries are identified by monotonically increasing 64 bit sequence
-+ * numbers - jset->seq; other places in the code refer to this sequence number.
-+ *
-+ * A jset_entry entry contains one or more bkeys (which is what gets inserted
-+ * into the b-tree). We need a container to indicate which b-tree the key is
-+ * for; also, the roots of the various b-trees are stored in jset_entry entries
-+ * (one for each b-tree) - this lets us add new b-tree types without changing
-+ * the on disk format.
-+ *
-+ * We also keep some things in the journal header that are logically part of the
-+ * superblock - all the things that are frequently updated. This is for future
-+ * bcache on raw flash support; the superblock (which will become another
-+ * journal) can't be moved or wear leveled, so it contains just enough
-+ * information to find the main journal, and the superblock only has to be
-+ * rewritten when we want to move/wear level the main journal.
-+ *
-+ * JOURNAL LAYOUT ON DISK:
-+ *
-+ * The journal is written to a ringbuffer of buckets (which is kept in the
-+ * superblock); the individual buckets are not necessarily contiguous on disk
-+ * which means that journal entries are not allowed to span buckets, but also
-+ * that we can resize the journal at runtime if desired (unimplemented).
-+ *
-+ * The journal buckets exist in the same pool as all the other buckets that are
-+ * managed by the allocator and garbage collection - garbage collection marks
-+ * the journal buckets as metadata buckets.
-+ *
-+ * OPEN/DIRTY JOURNAL ENTRIES:
-+ *
-+ * Open/dirty journal entries are journal entries that contain b-tree updates
-+ * that have not yet been written out to the b-tree on disk. We have to track
-+ * which journal entries are dirty, and we also have to avoid wrapping around
-+ * the journal and overwriting old but still dirty journal entries with new
-+ * journal entries.
-+ *
-+ * On disk, this is represented with the "last_seq" field of struct jset;
-+ * last_seq is the first sequence number that journal replay has to replay.
-+ *
-+ * To avoid overwriting dirty journal entries on disk, we keep a mapping (in
-+ * journal_device->seq) of for each journal bucket, the highest sequence number
-+ * any journal entry it contains. Then, by comparing that against last_seq we
-+ * can determine whether that journal bucket contains dirty journal entries or
-+ * not.
-+ *
-+ * To track which journal entries are dirty, we maintain a fifo of refcounts
-+ * (where each entry corresponds to a specific sequence number) - when a ref
-+ * goes to 0, that journal entry is no longer dirty.
-+ *
-+ * Journalling of index updates is done at the same time as the b-tree itself is
-+ * being modified (see btree_insert_key()); when we add the key to the journal
-+ * the pending b-tree write takes a ref on the journal entry the key was added
-+ * to. If a pending b-tree write would need to take refs on multiple dirty
-+ * journal entries, it only keeps the ref on the oldest one (since a newer
-+ * journal entry will still be replayed if an older entry was dirty).
-+ *
-+ * JOURNAL FILLING UP:
-+ *
-+ * There are two ways the journal could fill up; either we could run out of
-+ * space to write to, or we could have too many open journal entries and run out
-+ * of room in the fifo of refcounts. Since those refcounts are decremented
-+ * without any locking we can't safely resize that fifo, so we handle it the
-+ * same way.
-+ *
-+ * If the journal fills up, we start flushing dirty btree nodes until we can
-+ * allocate space for a journal write again - preferentially flushing btree
-+ * nodes that are pinning the oldest journal entries first.
-+ */
-+
-+#include <linux/hash.h>
-+
-+#include "journal_types.h"
-+
-+struct bch_fs;
-+
-+static inline void journal_wake(struct journal *j)
-+{
-+	wake_up(&j->wait);
-+	closure_wake_up(&j->async_wait);
-+	closure_wake_up(&j->preres_wait);
-+}
-+
-+static inline struct journal_buf *journal_cur_buf(struct journal *j)
-+{
-+	return j->buf + j->reservations.idx;
-+}
-+
-+static inline struct journal_buf *journal_prev_buf(struct journal *j)
-+{
-+	return j->buf + !j->reservations.idx;
-+}
-+
-+/* Sequence number of oldest dirty journal entry */
-+
-+static inline u64 journal_last_seq(struct journal *j)
-+{
-+	return j->pin.front;
-+}
-+
-+static inline u64 journal_cur_seq(struct journal *j)
-+{
-+	BUG_ON(j->pin.back - 1 != atomic64_read(&j->seq));
-+
-+	return j->pin.back - 1;
-+}
-+
-+u64 bch2_inode_journal_seq(struct journal *, u64);
-+void bch2_journal_set_has_inum(struct journal *, u64, u64);
-+
-+static inline int journal_state_count(union journal_res_state s, int idx)
-+{
-+	return idx == 0 ? s.buf0_count : s.buf1_count;
-+}
-+
-+static inline void journal_state_inc(union journal_res_state *s)
-+{
-+	s->buf0_count += s->idx == 0;
-+	s->buf1_count += s->idx == 1;
-+}
-+
-+static inline void bch2_journal_set_has_inode(struct journal *j,
-+					      struct journal_res *res,
-+					      u64 inum)
-+{
-+	struct journal_buf *buf = &j->buf[res->idx];
-+	unsigned long bit = hash_64(inum, ilog2(sizeof(buf->has_inode) * 8));
-+
-+	/* avoid atomic op if possible */
-+	if (unlikely(!test_bit(bit, buf->has_inode)))
-+		set_bit(bit, buf->has_inode);
-+}
-+
-+/*
-+ * Amount of space that will be taken up by some keys in the journal (i.e.
-+ * including the jset header)
-+ */
-+static inline unsigned jset_u64s(unsigned u64s)
-+{
-+	return u64s + sizeof(struct jset_entry) / sizeof(u64);
-+}
-+
-+static inline int journal_entry_overhead(struct journal *j)
-+{
-+	return sizeof(struct jset) / sizeof(u64) + j->entry_u64s_reserved;
-+}
-+
-+static inline struct jset_entry *
-+bch2_journal_add_entry_noreservation(struct journal_buf *buf, size_t u64s)
-+{
-+	struct jset *jset = buf->data;
-+	struct jset_entry *entry = vstruct_idx(jset, le32_to_cpu(jset->u64s));
-+
-+	memset(entry, 0, sizeof(*entry));
-+	entry->u64s = cpu_to_le16(u64s);
-+
-+	le32_add_cpu(&jset->u64s, jset_u64s(u64s));
-+
-+	return entry;
-+}
-+
-+static inline struct jset_entry *
-+journal_res_entry(struct journal *j, struct journal_res *res)
-+{
-+	return vstruct_idx(j->buf[res->idx].data, res->offset);
-+}
-+
-+static inline unsigned journal_entry_set(struct jset_entry *entry, unsigned type,
-+					  enum btree_id id, unsigned level,
-+					  const void *data, unsigned u64s)
-+{
-+	memset(entry, 0, sizeof(*entry));
-+	entry->u64s	= cpu_to_le16(u64s);
-+	entry->type	= type;
-+	entry->btree_id = id;
-+	entry->level	= level;
-+	memcpy_u64s_small(entry->_data, data, u64s);
-+
-+	return jset_u64s(u64s);
-+}
-+
-+static inline void bch2_journal_add_entry(struct journal *j, struct journal_res *res,
-+					  unsigned type, enum btree_id id,
-+					  unsigned level,
-+					  const void *data, unsigned u64s)
-+{
-+	unsigned actual = journal_entry_set(journal_res_entry(j, res),
-+			       type, id, level, data, u64s);
-+
-+	EBUG_ON(!res->ref);
-+	EBUG_ON(actual > res->u64s);
-+
-+	res->offset	+= actual;
-+	res->u64s	-= actual;
-+}
-+
-+static inline void bch2_journal_add_keys(struct journal *j, struct journal_res *res,
-+					enum btree_id id, const struct bkey_i *k)
-+{
-+	bch2_journal_add_entry(j, res, BCH_JSET_ENTRY_btree_keys,
-+			       id, 0, k, k->k.u64s);
-+}
-+
-+static inline bool journal_entry_empty(struct jset *j)
-+{
-+	struct jset_entry *i;
-+
-+	if (j->seq != j->last_seq)
-+		return false;
-+
-+	vstruct_for_each(j, i)
-+		if (i->type == BCH_JSET_ENTRY_btree_keys && i->u64s)
-+			return false;
-+	return true;
-+}
-+
-+void __bch2_journal_buf_put(struct journal *, bool);
-+
-+static inline void bch2_journal_buf_put(struct journal *j, unsigned idx,
-+				       bool need_write_just_set)
-+{
-+	union journal_res_state s;
-+
-+	s.v = atomic64_sub_return(((union journal_res_state) {
-+				    .buf0_count = idx == 0,
-+				    .buf1_count = idx == 1,
-+				    }).v, &j->reservations.counter);
-+	if (!journal_state_count(s, idx)) {
-+		EBUG_ON(s.idx == idx || !s.prev_buf_unwritten);
-+		__bch2_journal_buf_put(j, need_write_just_set);
-+	}
-+}
-+
-+/*
-+ * This function releases the journal write structure so other threads can
-+ * then proceed to add their keys as well.
-+ */
-+static inline void bch2_journal_res_put(struct journal *j,
-+				       struct journal_res *res)
-+{
-+	if (!res->ref)
-+		return;
-+
-+	lock_release(&j->res_map, _THIS_IP_);
-+
-+	while (res->u64s)
-+		bch2_journal_add_entry(j, res,
-+				       BCH_JSET_ENTRY_btree_keys,
-+				       0, 0, NULL, 0);
-+
-+	bch2_journal_buf_put(j, res->idx, false);
-+
-+	res->ref = 0;
-+}
-+
-+int bch2_journal_res_get_slowpath(struct journal *, struct journal_res *,
-+				  unsigned);
-+
-+#define JOURNAL_RES_GET_NONBLOCK	(1 << 0)
-+#define JOURNAL_RES_GET_CHECK		(1 << 1)
-+#define JOURNAL_RES_GET_RESERVED	(1 << 2)
-+#define JOURNAL_RES_GET_RECLAIM		(1 << 3)
-+
-+static inline int journal_res_get_fast(struct journal *j,
-+				       struct journal_res *res,
-+				       unsigned flags)
-+{
-+	union journal_res_state old, new;
-+	u64 v = atomic64_read(&j->reservations.counter);
-+
-+	do {
-+		old.v = new.v = v;
-+
-+		/*
-+		 * Check if there is still room in the current journal
-+		 * entry:
-+		 */
-+		if (new.cur_entry_offset + res->u64s > j->cur_entry_u64s)
-+			return 0;
-+
-+		EBUG_ON(!journal_state_count(new, new.idx));
-+
-+		if (!(flags & JOURNAL_RES_GET_RESERVED) &&
-+		    !test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags))
-+			return 0;
-+
-+		if (flags & JOURNAL_RES_GET_CHECK)
-+			return 1;
-+
-+		new.cur_entry_offset += res->u64s;
-+		journal_state_inc(&new);
-+	} while ((v = atomic64_cmpxchg(&j->reservations.counter,
-+				       old.v, new.v)) != old.v);
-+
-+	res->ref	= true;
-+	res->idx	= old.idx;
-+	res->offset	= old.cur_entry_offset;
-+	res->seq	= le64_to_cpu(j->buf[old.idx].data->seq);
-+	return 1;
-+}
-+
-+static inline int bch2_journal_res_get(struct journal *j, struct journal_res *res,
-+				       unsigned u64s, unsigned flags)
-+{
-+	int ret;
-+
-+	EBUG_ON(res->ref);
-+	EBUG_ON(!test_bit(JOURNAL_STARTED, &j->flags));
-+
-+	res->u64s = u64s;
-+
-+	if (journal_res_get_fast(j, res, flags))
-+		goto out;
-+
-+	ret = bch2_journal_res_get_slowpath(j, res, flags);
-+	if (ret)
-+		return ret;
-+out:
-+	if (!(flags & JOURNAL_RES_GET_CHECK)) {
-+		lock_acquire_shared(&j->res_map, 0,
-+				    (flags & JOURNAL_RES_GET_NONBLOCK) != 0,
-+				    NULL, _THIS_IP_);
-+		EBUG_ON(!res->ref);
-+	}
-+	return 0;
-+}
-+
-+/* journal_preres: */
-+
-+static inline bool journal_check_may_get_unreserved(struct journal *j)
-+{
-+	union journal_preres_state s = READ_ONCE(j->prereserved);
-+	bool ret = s.reserved <= s.remaining &&
-+		fifo_free(&j->pin) > 8;
-+
-+	lockdep_assert_held(&j->lock);
-+
-+	if (ret != test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags)) {
-+		if (ret) {
-+			set_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags);
-+			journal_wake(j);
-+		} else {
-+			clear_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags);
-+		}
-+	}
-+	return ret;
-+}
-+
-+static inline void bch2_journal_preres_put(struct journal *j,
-+					   struct journal_preres *res)
-+{
-+	union journal_preres_state s = { .reserved = res->u64s };
-+
-+	if (!res->u64s)
-+		return;
-+
-+	s.v = atomic64_sub_return(s.v, &j->prereserved.counter);
-+	res->u64s = 0;
-+	closure_wake_up(&j->preres_wait);
-+
-+	if (s.reserved <= s.remaining &&
-+	    !test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags)) {
-+		spin_lock(&j->lock);
-+		journal_check_may_get_unreserved(j);
-+		spin_unlock(&j->lock);
-+	}
-+}
-+
-+int __bch2_journal_preres_get(struct journal *,
-+			struct journal_preres *, unsigned, unsigned);
-+
-+static inline int bch2_journal_preres_get_fast(struct journal *j,
-+					       struct journal_preres *res,
-+					       unsigned new_u64s,
-+					       unsigned flags)
-+{
-+	int d = new_u64s - res->u64s;
-+	union journal_preres_state old, new;
-+	u64 v = atomic64_read(&j->prereserved.counter);
-+
-+	do {
-+		old.v = new.v = v;
-+
-+		new.reserved += d;
-+
-+		/*
-+		 * If we're being called from the journal reclaim path, we have
-+		 * to unconditionally give out the pre-reservation, there's
-+		 * nothing else sensible we can do - otherwise we'd recurse back
-+		 * into the reclaim path and deadlock:
-+		 */
-+
-+		if (!(flags & JOURNAL_RES_GET_RECLAIM) &&
-+		    new.reserved > new.remaining)
-+			return 0;
-+	} while ((v = atomic64_cmpxchg(&j->prereserved.counter,
-+				       old.v, new.v)) != old.v);
-+
-+	res->u64s += d;
-+	return 1;
-+}
-+
-+static inline int bch2_journal_preres_get(struct journal *j,
-+					  struct journal_preres *res,
-+					  unsigned new_u64s,
-+					  unsigned flags)
-+{
-+	if (new_u64s <= res->u64s)
-+		return 0;
-+
-+	if (bch2_journal_preres_get_fast(j, res, new_u64s, flags))
-+		return 0;
-+
-+	if (flags & JOURNAL_RES_GET_NONBLOCK)
-+		return -EAGAIN;
-+
-+	return __bch2_journal_preres_get(j, res, new_u64s, flags);
-+}
-+
-+/* journal_entry_res: */
-+
-+void bch2_journal_entry_res_resize(struct journal *,
-+				   struct journal_entry_res *,
-+				   unsigned);
-+
-+u64 bch2_journal_last_unwritten_seq(struct journal *);
-+int bch2_journal_open_seq_async(struct journal *, u64, struct closure *);
-+
-+void bch2_journal_wait_on_seq(struct journal *, u64, struct closure *);
-+void bch2_journal_flush_seq_async(struct journal *, u64, struct closure *);
-+void bch2_journal_flush_async(struct journal *, struct closure *);
-+void bch2_journal_meta_async(struct journal *, struct closure *);
-+
-+int bch2_journal_flush_seq(struct journal *, u64);
-+int bch2_journal_flush(struct journal *);
-+int bch2_journal_meta(struct journal *);
-+
-+void bch2_journal_halt(struct journal *);
-+
-+static inline int bch2_journal_error(struct journal *j)
-+{
-+	return j->reservations.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL
-+		? -EIO : 0;
-+}
-+
-+struct bch_dev;
-+
-+static inline bool journal_flushes_device(struct bch_dev *ca)
-+{
-+	return true;
-+}
-+
-+static inline void bch2_journal_set_replay_done(struct journal *j)
-+{
-+	BUG_ON(!test_bit(JOURNAL_STARTED, &j->flags));
-+	set_bit(JOURNAL_REPLAY_DONE, &j->flags);
-+}
-+
-+void bch2_journal_unblock(struct journal *);
-+void bch2_journal_block(struct journal *);
-+
-+void bch2_journal_debug_to_text(struct printbuf *, struct journal *);
-+void bch2_journal_pins_to_text(struct printbuf *, struct journal *);
-+
-+int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *,
-+				unsigned nr);
-+int bch2_dev_journal_alloc(struct bch_dev *);
-+
-+void bch2_dev_journal_stop(struct journal *, struct bch_dev *);
-+
-+void bch2_fs_journal_stop(struct journal *);
-+int bch2_fs_journal_start(struct journal *, u64, struct list_head *);
-+
-+void bch2_dev_journal_exit(struct bch_dev *);
-+int bch2_dev_journal_init(struct bch_dev *, struct bch_sb *);
-+void bch2_fs_journal_exit(struct journal *);
-+int bch2_fs_journal_init(struct journal *);
-+
-+#endif /* _BCACHEFS_JOURNAL_H */
-diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
-new file mode 100644
-index 000000000000..bd0e6b371701
---- /dev/null
-+++ b/fs/bcachefs/journal_io.c
-@@ -0,0 +1,1183 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "alloc_foreground.h"
-+#include "btree_io.h"
-+#include "btree_update_interior.h"
-+#include "buckets.h"
-+#include "checksum.h"
-+#include "error.h"
-+#include "io.h"
-+#include "journal.h"
-+#include "journal_io.h"
-+#include "journal_reclaim.h"
-+#include "replicas.h"
-+
-+#include <trace/events/bcachefs.h>
-+
-+struct journal_list {
-+	struct closure		cl;
-+	struct mutex		lock;
-+	struct list_head	*head;
-+	int			ret;
-+};
-+
-+#define JOURNAL_ENTRY_ADD_OK		0
-+#define JOURNAL_ENTRY_ADD_OUT_OF_RANGE	5
-+
-+/*
-+ * Given a journal entry we just read, add it to the list of journal entries to
-+ * be replayed:
-+ */
-+static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca,
-+			     struct journal_list *jlist, struct jset *j,
-+			     bool bad)
-+{
-+	struct journal_replay *i, *pos;
-+	struct bch_devs_list devs = { .nr = 0 };
-+	struct list_head *where;
-+	size_t bytes = vstruct_bytes(j);
-+	__le64 last_seq;
-+	int ret;
-+
-+	last_seq = !list_empty(jlist->head)
-+		? list_last_entry(jlist->head, struct journal_replay,
-+				  list)->j.last_seq
-+		: 0;
-+
-+	if (!c->opts.read_entire_journal) {
-+		/* Is this entry older than the range we need? */
-+		if (le64_to_cpu(j->seq) < le64_to_cpu(last_seq)) {
-+			ret = JOURNAL_ENTRY_ADD_OUT_OF_RANGE;
-+			goto out;
-+		}
-+
-+		/* Drop entries we don't need anymore */
-+		list_for_each_entry_safe(i, pos, jlist->head, list) {
-+			if (le64_to_cpu(i->j.seq) >= le64_to_cpu(j->last_seq))
-+				break;
-+			list_del(&i->list);
-+			kvpfree(i, offsetof(struct journal_replay, j) +
-+				vstruct_bytes(&i->j));
-+		}
-+	}
-+
-+	list_for_each_entry_reverse(i, jlist->head, list) {
-+		if (le64_to_cpu(j->seq) > le64_to_cpu(i->j.seq)) {
-+			where = &i->list;
-+			goto add;
-+		}
-+	}
-+
-+	where = jlist->head;
-+add:
-+	i = where->next != jlist->head
-+		? container_of(where->next, struct journal_replay, list)
-+		: NULL;
-+
-+	/*
-+	 * Duplicate journal entries? If so we want the one that didn't have a
-+	 * checksum error:
-+	 */
-+	if (i && le64_to_cpu(j->seq) == le64_to_cpu(i->j.seq)) {
-+		if (i->bad) {
-+			devs = i->devs;
-+			list_del(&i->list);
-+			kvpfree(i, offsetof(struct journal_replay, j) +
-+				vstruct_bytes(&i->j));
-+		} else if (bad) {
-+			goto found;
-+		} else {
-+			fsck_err_on(bytes != vstruct_bytes(&i->j) ||
-+				    memcmp(j, &i->j, bytes), c,
-+				    "found duplicate but non identical journal entries (seq %llu)",
-+				    le64_to_cpu(j->seq));
-+			goto found;
-+		}
-+
-+	}
-+
-+	i = kvpmalloc(offsetof(struct journal_replay, j) + bytes, GFP_KERNEL);
-+	if (!i) {
-+		ret = -ENOMEM;
-+		goto out;
-+	}
-+
-+	list_add(&i->list, where);
-+	i->devs = devs;
-+	i->bad	= bad;
-+	memcpy(&i->j, j, bytes);
-+found:
-+	if (!bch2_dev_list_has_dev(i->devs, ca->dev_idx))
-+		bch2_dev_list_add_dev(&i->devs, ca->dev_idx);
-+	else
-+		fsck_err_on(1, c, "duplicate journal entries on same device");
-+	ret = JOURNAL_ENTRY_ADD_OK;
-+out:
-+fsck_err:
-+	return ret;
-+}
-+
-+static struct nonce journal_nonce(const struct jset *jset)
-+{
-+	return (struct nonce) {{
-+		[0] = 0,
-+		[1] = ((__le32 *) &jset->seq)[0],
-+		[2] = ((__le32 *) &jset->seq)[1],
-+		[3] = BCH_NONCE_JOURNAL,
-+	}};
-+}
-+
-+/* this fills in a range with empty jset_entries: */
-+static void journal_entry_null_range(void *start, void *end)
-+{
-+	struct jset_entry *entry;
-+
-+	for (entry = start; entry != end; entry = vstruct_next(entry))
-+		memset(entry, 0, sizeof(*entry));
-+}
-+
-+#define JOURNAL_ENTRY_REREAD	5
-+#define JOURNAL_ENTRY_NONE	6
-+#define JOURNAL_ENTRY_BAD	7
-+
-+#define journal_entry_err(c, msg, ...)					\
-+({									\
-+	switch (write) {						\
-+	case READ:							\
-+		mustfix_fsck_err(c, msg, ##__VA_ARGS__);		\
-+		break;							\
-+	case WRITE:							\
-+		bch_err(c, "corrupt metadata before write:\n"		\
-+			msg, ##__VA_ARGS__);				\
-+		if (bch2_fs_inconsistent(c)) {				\
-+			ret = BCH_FSCK_ERRORS_NOT_FIXED;		\
-+			goto fsck_err;					\
-+		}							\
-+		break;							\
-+	}								\
-+	true;								\
-+})
-+
-+#define journal_entry_err_on(cond, c, msg, ...)				\
-+	((cond) ? journal_entry_err(c, msg, ##__VA_ARGS__) : false)
-+
-+static int journal_validate_key(struct bch_fs *c, struct jset *jset,
-+				struct jset_entry *entry,
-+				unsigned level, enum btree_id btree_id,
-+				struct bkey_i *k,
-+				const char *type, int write)
-+{
-+	void *next = vstruct_next(entry);
-+	const char *invalid;
-+	unsigned version = le32_to_cpu(jset->version);
-+	int ret = 0;
-+
-+	if (journal_entry_err_on(!k->k.u64s, c,
-+			"invalid %s in journal: k->u64s 0", type)) {
-+		entry->u64s = cpu_to_le16((u64 *) k - entry->_data);
-+		journal_entry_null_range(vstruct_next(entry), next);
-+		return 0;
-+	}
-+
-+	if (journal_entry_err_on((void *) bkey_next(k) >
-+				(void *) vstruct_next(entry), c,
-+			"invalid %s in journal: extends past end of journal entry",
-+			type)) {
-+		entry->u64s = cpu_to_le16((u64 *) k - entry->_data);
-+		journal_entry_null_range(vstruct_next(entry), next);
-+		return 0;
-+	}
-+
-+	if (journal_entry_err_on(k->k.format != KEY_FORMAT_CURRENT, c,
-+			"invalid %s in journal: bad format %u",
-+			type, k->k.format)) {
-+		le16_add_cpu(&entry->u64s, -k->k.u64s);
-+		memmove(k, bkey_next(k), next - (void *) bkey_next(k));
-+		journal_entry_null_range(vstruct_next(entry), next);
-+		return 0;
-+	}
-+
-+	if (!write)
-+		bch2_bkey_compat(level, btree_id, version,
-+			    JSET_BIG_ENDIAN(jset), write,
-+			    NULL, bkey_to_packed(k));
-+
-+	invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(k),
-+				    __btree_node_type(level, btree_id));
-+	if (invalid) {
-+		char buf[160];
-+
-+		bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(k));
-+		mustfix_fsck_err(c, "invalid %s in journal: %s\n%s",
-+				 type, invalid, buf);
-+
-+		le16_add_cpu(&entry->u64s, -k->k.u64s);
-+		memmove(k, bkey_next(k), next - (void *) bkey_next(k));
-+		journal_entry_null_range(vstruct_next(entry), next);
-+		return 0;
-+	}
-+
-+	if (write)
-+		bch2_bkey_compat(level, btree_id, version,
-+			    JSET_BIG_ENDIAN(jset), write,
-+			    NULL, bkey_to_packed(k));
-+fsck_err:
-+	return ret;
-+}
-+
-+static int journal_entry_validate_btree_keys(struct bch_fs *c,
-+					     struct jset *jset,
-+					     struct jset_entry *entry,
-+					     int write)
-+{
-+	struct bkey_i *k;
-+
-+	vstruct_for_each(entry, k) {
-+		int ret = journal_validate_key(c, jset, entry,
-+					       entry->level,
-+					       entry->btree_id,
-+					       k, "key", write);
-+		if (ret)
-+			return ret;
-+	}
-+
-+	return 0;
-+}
-+
-+static int journal_entry_validate_btree_root(struct bch_fs *c,
-+					     struct jset *jset,
-+					     struct jset_entry *entry,
-+					     int write)
-+{
-+	struct bkey_i *k = entry->start;
-+	int ret = 0;
-+
-+	if (journal_entry_err_on(!entry->u64s ||
-+				 le16_to_cpu(entry->u64s) != k->k.u64s, c,
-+				 "invalid btree root journal entry: wrong number of keys")) {
-+		void *next = vstruct_next(entry);
-+		/*
-+		 * we don't want to null out this jset_entry,
-+		 * just the contents, so that later we can tell
-+		 * we were _supposed_ to have a btree root
-+		 */
-+		entry->u64s = 0;
-+		journal_entry_null_range(vstruct_next(entry), next);
-+		return 0;
-+	}
-+
-+	return journal_validate_key(c, jset, entry, 1, entry->btree_id, k,
-+				    "btree root", write);
-+fsck_err:
-+	return ret;
-+}
-+
-+static int journal_entry_validate_prio_ptrs(struct bch_fs *c,
-+					    struct jset *jset,
-+					    struct jset_entry *entry,
-+					    int write)
-+{
-+	/* obsolete, don't care: */
-+	return 0;
-+}
-+
-+static int journal_entry_validate_blacklist(struct bch_fs *c,
-+					    struct jset *jset,
-+					    struct jset_entry *entry,
-+					    int write)
-+{
-+	int ret = 0;
-+
-+	if (journal_entry_err_on(le16_to_cpu(entry->u64s) != 1, c,
-+		"invalid journal seq blacklist entry: bad size")) {
-+		journal_entry_null_range(entry, vstruct_next(entry));
-+	}
-+fsck_err:
-+	return ret;
-+}
-+
-+static int journal_entry_validate_blacklist_v2(struct bch_fs *c,
-+					       struct jset *jset,
-+					       struct jset_entry *entry,
-+					       int write)
-+{
-+	struct jset_entry_blacklist_v2 *bl_entry;
-+	int ret = 0;
-+
-+	if (journal_entry_err_on(le16_to_cpu(entry->u64s) != 2, c,
-+		"invalid journal seq blacklist entry: bad size")) {
-+		journal_entry_null_range(entry, vstruct_next(entry));
-+		goto out;
-+	}
-+
-+	bl_entry = container_of(entry, struct jset_entry_blacklist_v2, entry);
-+
-+	if (journal_entry_err_on(le64_to_cpu(bl_entry->start) >
-+				 le64_to_cpu(bl_entry->end), c,
-+		"invalid journal seq blacklist entry: start > end")) {
-+		journal_entry_null_range(entry, vstruct_next(entry));
-+	}
-+out:
-+fsck_err:
-+	return ret;
-+}
-+
-+static int journal_entry_validate_usage(struct bch_fs *c,
-+					struct jset *jset,
-+					struct jset_entry *entry,
-+					int write)
-+{
-+	struct jset_entry_usage *u =
-+		container_of(entry, struct jset_entry_usage, entry);
-+	unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64);
-+	int ret = 0;
-+
-+	if (journal_entry_err_on(bytes < sizeof(*u),
-+				 c,
-+				 "invalid journal entry usage: bad size")) {
-+		journal_entry_null_range(entry, vstruct_next(entry));
-+		return ret;
-+	}
-+
-+fsck_err:
-+	return ret;
-+}
-+
-+static int journal_entry_validate_data_usage(struct bch_fs *c,
-+					struct jset *jset,
-+					struct jset_entry *entry,
-+					int write)
-+{
-+	struct jset_entry_data_usage *u =
-+		container_of(entry, struct jset_entry_data_usage, entry);
-+	unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64);
-+	int ret = 0;
-+
-+	if (journal_entry_err_on(bytes < sizeof(*u) ||
-+				 bytes < sizeof(*u) + u->r.nr_devs,
-+				 c,
-+				 "invalid journal entry usage: bad size")) {
-+		journal_entry_null_range(entry, vstruct_next(entry));
-+		return ret;
-+	}
-+
-+fsck_err:
-+	return ret;
-+}
-+
-+struct jset_entry_ops {
-+	int (*validate)(struct bch_fs *, struct jset *,
-+			struct jset_entry *, int);
-+};
-+
-+static const struct jset_entry_ops bch2_jset_entry_ops[] = {
-+#define x(f, nr)						\
-+	[BCH_JSET_ENTRY_##f]	= (struct jset_entry_ops) {	\
-+		.validate	= journal_entry_validate_##f,	\
-+	},
-+	BCH_JSET_ENTRY_TYPES()
-+#undef x
-+};
-+
-+static int journal_entry_validate(struct bch_fs *c, struct jset *jset,
-+				  struct jset_entry *entry, int write)
-+{
-+	return entry->type < BCH_JSET_ENTRY_NR
-+		? bch2_jset_entry_ops[entry->type].validate(c, jset,
-+							    entry, write)
-+		: 0;
-+}
-+
-+static int jset_validate_entries(struct bch_fs *c, struct jset *jset,
-+				 int write)
-+{
-+	struct jset_entry *entry;
-+	int ret = 0;
-+
-+	vstruct_for_each(jset, entry) {
-+		if (journal_entry_err_on(vstruct_next(entry) >
-+					 vstruct_last(jset), c,
-+				"journal entry extends past end of jset")) {
-+			jset->u64s = cpu_to_le32((u64 *) entry - jset->_data);
-+			break;
-+		}
-+
-+		ret = journal_entry_validate(c, jset, entry, write);
-+		if (ret)
-+			break;
-+	}
-+fsck_err:
-+	return ret;
-+}
-+
-+static int jset_validate(struct bch_fs *c,
-+			 struct bch_dev *ca,
-+			 struct jset *jset, u64 sector,
-+			 unsigned bucket_sectors_left,
-+			 unsigned sectors_read,
-+			 int write)
-+{
-+	size_t bytes = vstruct_bytes(jset);
-+	struct bch_csum csum;
-+	unsigned version;
-+	int ret = 0;
-+
-+	if (le64_to_cpu(jset->magic) != jset_magic(c))
-+		return JOURNAL_ENTRY_NONE;
-+
-+	version = le32_to_cpu(jset->version);
-+	if (journal_entry_err_on((version != BCH_JSET_VERSION_OLD &&
-+				  version < bcachefs_metadata_version_min) ||
-+				 version >= bcachefs_metadata_version_max, c,
-+			"%s sector %llu seq %llu: unknown journal entry version %u",
-+			ca->name, sector, le64_to_cpu(jset->seq),
-+			version)) {
-+		/* XXX: note we might have missing journal entries */
-+		return JOURNAL_ENTRY_BAD;
-+	}
-+
-+	if (journal_entry_err_on(bytes > bucket_sectors_left << 9, c,
-+			"%s sector %llu seq %llu: journal entry too big (%zu bytes)",
-+			ca->name, sector, le64_to_cpu(jset->seq), bytes)) {
-+		/* XXX: note we might have missing journal entries */
-+		return JOURNAL_ENTRY_BAD;
-+	}
-+
-+	if (bytes > sectors_read << 9)
-+		return JOURNAL_ENTRY_REREAD;
-+
-+	if (fsck_err_on(!bch2_checksum_type_valid(c, JSET_CSUM_TYPE(jset)), c,
-+			"%s sector %llu seq %llu: journal entry with unknown csum type %llu",
-+			ca->name, sector, le64_to_cpu(jset->seq),
-+			JSET_CSUM_TYPE(jset)))
-+		return JOURNAL_ENTRY_BAD;
-+
-+	csum = csum_vstruct(c, JSET_CSUM_TYPE(jset), journal_nonce(jset), jset);
-+	if (journal_entry_err_on(bch2_crc_cmp(csum, jset->csum), c,
-+				 "%s sector %llu seq %llu: journal checksum bad",
-+				 ca->name, sector, le64_to_cpu(jset->seq))) {
-+		/* XXX: retry IO, when we start retrying checksum errors */
-+		/* XXX: note we might have missing journal entries */
-+		return JOURNAL_ENTRY_BAD;
-+	}
-+
-+	bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset),
-+		     jset->encrypted_start,
-+		     vstruct_end(jset) - (void *) jset->encrypted_start);
-+
-+	if (journal_entry_err_on(le64_to_cpu(jset->last_seq) > le64_to_cpu(jset->seq), c,
-+				 "invalid journal entry: last_seq > seq")) {
-+		jset->last_seq = jset->seq;
-+		return JOURNAL_ENTRY_BAD;
-+	}
-+
-+	return 0;
-+fsck_err:
-+	return ret;
-+}
-+
-+struct journal_read_buf {
-+	void		*data;
-+	size_t		size;
-+};
-+
-+static int journal_read_buf_realloc(struct journal_read_buf *b,
-+				    size_t new_size)
-+{
-+	void *n;
-+
-+	/* the bios are sized for this many pages, max: */
-+	if (new_size > JOURNAL_ENTRY_SIZE_MAX)
-+		return -ENOMEM;
-+
-+	new_size = roundup_pow_of_two(new_size);
-+	n = kvpmalloc(new_size, GFP_KERNEL);
-+	if (!n)
-+		return -ENOMEM;
-+
-+	kvpfree(b->data, b->size);
-+	b->data = n;
-+	b->size = new_size;
-+	return 0;
-+}
-+
-+static int journal_read_bucket(struct bch_dev *ca,
-+			       struct journal_read_buf *buf,
-+			       struct journal_list *jlist,
-+			       unsigned bucket)
-+{
-+	struct bch_fs *c = ca->fs;
-+	struct journal_device *ja = &ca->journal;
-+	struct jset *j = NULL;
-+	unsigned sectors, sectors_read = 0;
-+	u64 offset = bucket_to_sector(ca, ja->buckets[bucket]),
-+	    end = offset + ca->mi.bucket_size;
-+	bool saw_bad = false;
-+	int ret = 0;
-+
-+	pr_debug("reading %u", bucket);
-+
-+	while (offset < end) {
-+		if (!sectors_read) {
-+			struct bio *bio;
-+reread:
-+			sectors_read = min_t(unsigned,
-+				end - offset, buf->size >> 9);
-+
-+			bio = bio_kmalloc(GFP_KERNEL,
-+					  buf_pages(buf->data,
-+						    sectors_read << 9));
-+			bio_set_dev(bio, ca->disk_sb.bdev);
-+			bio->bi_iter.bi_sector	= offset;
-+			bio_set_op_attrs(bio, REQ_OP_READ, 0);
-+			bch2_bio_map(bio, buf->data, sectors_read << 9);
-+
-+			ret = submit_bio_wait(bio);
-+			bio_put(bio);
-+
-+			if (bch2_dev_io_err_on(ret, ca,
-+					       "journal read from sector %llu",
-+					       offset) ||
-+			    bch2_meta_read_fault("journal"))
-+				return -EIO;
-+
-+			j = buf->data;
-+		}
-+
-+		ret = jset_validate(c, ca, j, offset,
-+				    end - offset, sectors_read,
-+				    READ);
-+		switch (ret) {
-+		case BCH_FSCK_OK:
-+			sectors = vstruct_sectors(j, c->block_bits);
-+			break;
-+		case JOURNAL_ENTRY_REREAD:
-+			if (vstruct_bytes(j) > buf->size) {
-+				ret = journal_read_buf_realloc(buf,
-+							vstruct_bytes(j));
-+				if (ret)
-+					return ret;
-+			}
-+			goto reread;
-+		case JOURNAL_ENTRY_NONE:
-+			if (!saw_bad)
-+				return 0;
-+			sectors = c->opts.block_size;
-+			goto next_block;
-+		case JOURNAL_ENTRY_BAD:
-+			saw_bad = true;
-+			/*
-+			 * On checksum error we don't really trust the size
-+			 * field of the journal entry we read, so try reading
-+			 * again at next block boundary:
-+			 */
-+			sectors = c->opts.block_size;
-+			break;
-+		default:
-+			return ret;
-+		}
-+
-+		/*
-+		 * This happens sometimes if we don't have discards on -
-+		 * when we've partially overwritten a bucket with new
-+		 * journal entries. We don't need the rest of the
-+		 * bucket:
-+		 */
-+		if (le64_to_cpu(j->seq) < ja->bucket_seq[bucket])
-+			return 0;
-+
-+		ja->bucket_seq[bucket] = le64_to_cpu(j->seq);
-+
-+		mutex_lock(&jlist->lock);
-+		ret = journal_entry_add(c, ca, jlist, j, ret != 0);
-+		mutex_unlock(&jlist->lock);
-+
-+		switch (ret) {
-+		case JOURNAL_ENTRY_ADD_OK:
-+			break;
-+		case JOURNAL_ENTRY_ADD_OUT_OF_RANGE:
-+			break;
-+		default:
-+			return ret;
-+		}
-+next_block:
-+		pr_debug("next");
-+		offset		+= sectors;
-+		sectors_read	-= sectors;
-+		j = ((void *) j) + (sectors << 9);
-+	}
-+
-+	return 0;
-+}
-+
-+static void bch2_journal_read_device(struct closure *cl)
-+{
-+	struct journal_device *ja =
-+		container_of(cl, struct journal_device, read);
-+	struct bch_dev *ca = container_of(ja, struct bch_dev, journal);
-+	struct journal_list *jlist =
-+		container_of(cl->parent, struct journal_list, cl);
-+	struct journal_read_buf buf = { NULL, 0 };
-+	u64 min_seq = U64_MAX;
-+	unsigned i;
-+	int ret;
-+
-+	if (!ja->nr)
-+		goto out;
-+
-+	ret = journal_read_buf_realloc(&buf, PAGE_SIZE);
-+	if (ret)
-+		goto err;
-+
-+	pr_debug("%u journal buckets", ja->nr);
-+
-+	for (i = 0; i < ja->nr; i++) {
-+		ret = journal_read_bucket(ca, &buf, jlist, i);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	/* Find the journal bucket with the highest sequence number: */
-+	for (i = 0; i < ja->nr; i++) {
-+		if (ja->bucket_seq[i] > ja->bucket_seq[ja->cur_idx])
-+			ja->cur_idx = i;
-+
-+		min_seq = min(ja->bucket_seq[i], min_seq);
-+	}
-+
-+	/*
-+	 * If there's duplicate journal entries in multiple buckets (which
-+	 * definitely isn't supposed to happen, but...) - make sure to start
-+	 * cur_idx at the last of those buckets, so we don't deadlock trying to
-+	 * allocate
-+	 */
-+	while (ja->bucket_seq[ja->cur_idx] > min_seq &&
-+	       ja->bucket_seq[ja->cur_idx] >
-+	       ja->bucket_seq[(ja->cur_idx + 1) % ja->nr])
-+		ja->cur_idx = (ja->cur_idx + 1) % ja->nr;
-+
-+	ja->sectors_free = 0;
-+
-+	/*
-+	 * Set dirty_idx to indicate the entire journal is full and needs to be
-+	 * reclaimed - journal reclaim will immediately reclaim whatever isn't
-+	 * pinned when it first runs:
-+	 */
-+	ja->discard_idx = ja->dirty_idx_ondisk =
-+		ja->dirty_idx = (ja->cur_idx + 1) % ja->nr;
-+out:
-+	kvpfree(buf.data, buf.size);
-+	percpu_ref_put(&ca->io_ref);
-+	closure_return(cl);
-+	return;
-+err:
-+	mutex_lock(&jlist->lock);
-+	jlist->ret = ret;
-+	mutex_unlock(&jlist->lock);
-+	goto out;
-+}
-+
-+int bch2_journal_read(struct bch_fs *c, struct list_head *list)
-+{
-+	struct journal_list jlist;
-+	struct journal_replay *i;
-+	struct bch_dev *ca;
-+	unsigned iter;
-+	size_t keys = 0, entries = 0;
-+	bool degraded = false;
-+	int ret = 0;
-+
-+	closure_init_stack(&jlist.cl);
-+	mutex_init(&jlist.lock);
-+	jlist.head = list;
-+	jlist.ret = 0;
-+
-+	for_each_member_device(ca, c, iter) {
-+		if (!test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) &&
-+		    !(bch2_dev_has_data(c, ca) & (1 << BCH_DATA_journal)))
-+			continue;
-+
-+		if ((ca->mi.state == BCH_MEMBER_STATE_RW ||
-+		     ca->mi.state == BCH_MEMBER_STATE_RO) &&
-+		    percpu_ref_tryget(&ca->io_ref))
-+			closure_call(&ca->journal.read,
-+				     bch2_journal_read_device,
-+				     system_unbound_wq,
-+				     &jlist.cl);
-+		else
-+			degraded = true;
-+	}
-+
-+	closure_sync(&jlist.cl);
-+
-+	if (jlist.ret)
-+		return jlist.ret;
-+
-+	list_for_each_entry(i, list, list) {
-+		struct jset_entry *entry;
-+		struct bkey_i *k, *_n;
-+		struct bch_replicas_padded replicas;
-+		char buf[80];
-+
-+		ret = jset_validate_entries(c, &i->j, READ);
-+		if (ret)
-+			goto fsck_err;
-+
-+		/*
-+		 * If we're mounting in degraded mode - if we didn't read all
-+		 * the devices - this is wrong:
-+		 */
-+
-+		bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal, i->devs);
-+
-+		if (!degraded &&
-+		    (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
-+		     fsck_err_on(!bch2_replicas_marked(c, &replicas.e), c,
-+				 "superblock not marked as containing replicas %s",
-+				 (bch2_replicas_entry_to_text(&PBUF(buf),
-+							      &replicas.e), buf)))) {
-+			ret = bch2_mark_replicas(c, &replicas.e);
-+			if (ret)
-+				return ret;
-+		}
-+
-+		for_each_jset_key(k, _n, entry, &i->j)
-+			keys++;
-+		entries++;
-+	}
-+
-+	if (!list_empty(list)) {
-+		i = list_last_entry(list, struct journal_replay, list);
-+
-+		bch_info(c, "journal read done, %zu keys in %zu entries, seq %llu",
-+			 keys, entries, le64_to_cpu(i->j.seq));
-+	}
-+fsck_err:
-+	return ret;
-+}
-+
-+/* journal write: */
-+
-+static void __journal_write_alloc(struct journal *j,
-+				  struct journal_buf *w,
-+				  struct dev_alloc_list *devs_sorted,
-+				  unsigned sectors,
-+				  unsigned *replicas,
-+				  unsigned replicas_want)
-+{
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	struct journal_device *ja;
-+	struct bch_dev *ca;
-+	unsigned i;
-+
-+	if (*replicas >= replicas_want)
-+		return;
-+
-+	for (i = 0; i < devs_sorted->nr; i++) {
-+		ca = rcu_dereference(c->devs[devs_sorted->devs[i]]);
-+		if (!ca)
-+			continue;
-+
-+		ja = &ca->journal;
-+
-+		/*
-+		 * Check that we can use this device, and aren't already using
-+		 * it:
-+		 */
-+		if (!ca->mi.durability ||
-+		    ca->mi.state != BCH_MEMBER_STATE_RW ||
-+		    !ja->nr ||
-+		    bch2_bkey_has_device(bkey_i_to_s_c(&w->key),
-+					 ca->dev_idx) ||
-+		    sectors > ja->sectors_free)
-+			continue;
-+
-+		bch2_dev_stripe_increment(ca, &j->wp.stripe);
-+
-+		bch2_bkey_append_ptr(&w->key,
-+			(struct bch_extent_ptr) {
-+				  .offset = bucket_to_sector(ca,
-+					ja->buckets[ja->cur_idx]) +
-+					ca->mi.bucket_size -
-+					ja->sectors_free,
-+				  .dev = ca->dev_idx,
-+		});
-+
-+		ja->sectors_free -= sectors;
-+		ja->bucket_seq[ja->cur_idx] = le64_to_cpu(w->data->seq);
-+
-+		*replicas += ca->mi.durability;
-+
-+		if (*replicas >= replicas_want)
-+			break;
-+	}
-+}
-+
-+/**
-+ * journal_next_bucket - move on to the next journal bucket if possible
-+ */
-+static int journal_write_alloc(struct journal *j, struct journal_buf *w,
-+			       unsigned sectors)
-+{
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	struct journal_device *ja;
-+	struct bch_dev *ca;
-+	struct dev_alloc_list devs_sorted;
-+	unsigned i, replicas = 0, replicas_want =
-+		READ_ONCE(c->opts.metadata_replicas);
-+
-+	rcu_read_lock();
-+
-+	devs_sorted = bch2_dev_alloc_list(c, &j->wp.stripe,
-+					  &c->rw_devs[BCH_DATA_journal]);
-+
-+	__journal_write_alloc(j, w, &devs_sorted,
-+			      sectors, &replicas, replicas_want);
-+
-+	if (replicas >= replicas_want)
-+		goto done;
-+
-+	for (i = 0; i < devs_sorted.nr; i++) {
-+		ca = rcu_dereference(c->devs[devs_sorted.devs[i]]);
-+		if (!ca)
-+			continue;
-+
-+		ja = &ca->journal;
-+
-+		if (sectors > ja->sectors_free &&
-+		    sectors <= ca->mi.bucket_size &&
-+		    bch2_journal_dev_buckets_available(j, ja,
-+					journal_space_discarded)) {
-+			ja->cur_idx = (ja->cur_idx + 1) % ja->nr;
-+			ja->sectors_free = ca->mi.bucket_size;
-+
-+			/*
-+			 * ja->bucket_seq[ja->cur_idx] must always have
-+			 * something sensible:
-+			 */
-+			ja->bucket_seq[ja->cur_idx] = le64_to_cpu(w->data->seq);
-+		}
-+	}
-+
-+	__journal_write_alloc(j, w, &devs_sorted,
-+			      sectors, &replicas, replicas_want);
-+done:
-+	rcu_read_unlock();
-+
-+	return replicas >= c->opts.metadata_replicas_required ? 0 : -EROFS;
-+}
-+
-+static void journal_write_compact(struct jset *jset)
-+{
-+	struct jset_entry *i, *next, *prev = NULL;
-+
-+	/*
-+	 * Simple compaction, dropping empty jset_entries (from journal
-+	 * reservations that weren't fully used) and merging jset_entries that
-+	 * can be.
-+	 *
-+	 * If we wanted to be really fancy here, we could sort all the keys in
-+	 * the jset and drop keys that were overwritten - probably not worth it:
-+	 */
-+	vstruct_for_each_safe(jset, i, next) {
-+		unsigned u64s = le16_to_cpu(i->u64s);
-+
-+		/* Empty entry: */
-+		if (!u64s)
-+			continue;
-+
-+		/* Can we merge with previous entry? */
-+		if (prev &&
-+		    i->btree_id == prev->btree_id &&
-+		    i->level	== prev->level &&
-+		    i->type	== prev->type &&
-+		    i->type	== BCH_JSET_ENTRY_btree_keys &&
-+		    le16_to_cpu(prev->u64s) + u64s <= U16_MAX) {
-+			memmove_u64s_down(vstruct_next(prev),
-+					  i->_data,
-+					  u64s);
-+			le16_add_cpu(&prev->u64s, u64s);
-+			continue;
-+		}
-+
-+		/* Couldn't merge, move i into new position (after prev): */
-+		prev = prev ? vstruct_next(prev) : jset->start;
-+		if (i != prev)
-+			memmove_u64s_down(prev, i, jset_u64s(u64s));
-+	}
-+
-+	prev = prev ? vstruct_next(prev) : jset->start;
-+	jset->u64s = cpu_to_le32((u64 *) prev - jset->_data);
-+}
-+
-+static void journal_buf_realloc(struct journal *j, struct journal_buf *buf)
-+{
-+	/* we aren't holding j->lock: */
-+	unsigned new_size = READ_ONCE(j->buf_size_want);
-+	void *new_buf;
-+
-+	if (buf->buf_size >= new_size)
-+		return;
-+
-+	new_buf = kvpmalloc(new_size, GFP_NOIO|__GFP_NOWARN);
-+	if (!new_buf)
-+		return;
-+
-+	memcpy(new_buf, buf->data, buf->buf_size);
-+	kvpfree(buf->data, buf->buf_size);
-+	buf->data	= new_buf;
-+	buf->buf_size	= new_size;
-+}
-+
-+static void journal_write_done(struct closure *cl)
-+{
-+	struct journal *j = container_of(cl, struct journal, io);
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	struct journal_buf *w = journal_prev_buf(j);
-+	struct bch_devs_list devs =
-+		bch2_bkey_devs(bkey_i_to_s_c(&w->key));
-+	struct bch_replicas_padded replicas;
-+	u64 seq = le64_to_cpu(w->data->seq);
-+	u64 last_seq = le64_to_cpu(w->data->last_seq);
-+
-+	bch2_time_stats_update(j->write_time, j->write_start_time);
-+
-+	if (!devs.nr) {
-+		bch_err(c, "unable to write journal to sufficient devices");
-+		goto err;
-+	}
-+
-+	bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal, devs);
-+
-+	if (bch2_mark_replicas(c, &replicas.e))
-+		goto err;
-+
-+	spin_lock(&j->lock);
-+	if (seq >= j->pin.front)
-+		journal_seq_pin(j, seq)->devs = devs;
-+
-+	j->seq_ondisk		= seq;
-+	j->last_seq_ondisk	= last_seq;
-+	bch2_journal_space_available(j);
-+
-+	/*
-+	 * Updating last_seq_ondisk may let bch2_journal_reclaim_work() discard
-+	 * more buckets:
-+	 *
-+	 * Must come before signaling write completion, for
-+	 * bch2_fs_journal_stop():
-+	 */
-+	mod_delayed_work(c->journal_reclaim_wq, &j->reclaim_work, 0);
-+out:
-+	/* also must come before signalling write completion: */
-+	closure_debug_destroy(cl);
-+
-+	BUG_ON(!j->reservations.prev_buf_unwritten);
-+	atomic64_sub(((union journal_res_state) { .prev_buf_unwritten = 1 }).v,
-+		     &j->reservations.counter);
-+
-+	closure_wake_up(&w->wait);
-+	journal_wake(j);
-+
-+	if (test_bit(JOURNAL_NEED_WRITE, &j->flags))
-+		mod_delayed_work(system_freezable_wq, &j->write_work, 0);
-+	spin_unlock(&j->lock);
-+	return;
-+err:
-+	bch2_fatal_error(c);
-+	spin_lock(&j->lock);
-+	goto out;
-+}
-+
-+static void journal_write_endio(struct bio *bio)
-+{
-+	struct bch_dev *ca = bio->bi_private;
-+	struct journal *j = &ca->fs->journal;
-+
-+	if (bch2_dev_io_err_on(bio->bi_status, ca, "journal write: %s",
-+			       bch2_blk_status_to_str(bio->bi_status)) ||
-+	    bch2_meta_write_fault("journal")) {
-+		struct journal_buf *w = journal_prev_buf(j);
-+		unsigned long flags;
-+
-+		spin_lock_irqsave(&j->err_lock, flags);
-+		bch2_bkey_drop_device(bkey_i_to_s(&w->key), ca->dev_idx);
-+		spin_unlock_irqrestore(&j->err_lock, flags);
-+	}
-+
-+	closure_put(&j->io);
-+	percpu_ref_put(&ca->io_ref);
-+}
-+
-+void bch2_journal_write(struct closure *cl)
-+{
-+	struct journal *j = container_of(cl, struct journal, io);
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	struct bch_dev *ca;
-+	struct journal_buf *w = journal_prev_buf(j);
-+	struct jset_entry *start, *end;
-+	struct jset *jset;
-+	struct bio *bio;
-+	struct bch_extent_ptr *ptr;
-+	bool validate_before_checksum = false;
-+	unsigned i, sectors, bytes, u64s;
-+	int ret;
-+
-+	bch2_journal_pin_put(j, le64_to_cpu(w->data->seq));
-+
-+	journal_buf_realloc(j, w);
-+	jset = w->data;
-+
-+	j->write_start_time = local_clock();
-+
-+	/*
-+	 * New btree roots are set by journalling them; when the journal entry
-+	 * gets written we have to propagate them to c->btree_roots
-+	 *
-+	 * But, every journal entry we write has to contain all the btree roots
-+	 * (at least for now); so after we copy btree roots to c->btree_roots we
-+	 * have to get any missing btree roots and add them to this journal
-+	 * entry:
-+	 */
-+
-+	bch2_journal_entries_to_btree_roots(c, jset);
-+
-+	start = end = vstruct_last(jset);
-+
-+	end	= bch2_btree_roots_to_journal_entries(c, jset->start, end);
-+
-+	end	= bch2_journal_super_entries_add_common(c, end,
-+						le64_to_cpu(jset->seq));
-+	u64s	= (u64 *) end - (u64 *) start;
-+	BUG_ON(u64s > j->entry_u64s_reserved);
-+
-+	le32_add_cpu(&jset->u64s, u64s);
-+	BUG_ON(vstruct_sectors(jset, c->block_bits) > w->sectors);
-+
-+	journal_write_compact(jset);
-+
-+	jset->read_clock	= cpu_to_le16(c->bucket_clock[READ].hand);
-+	jset->write_clock	= cpu_to_le16(c->bucket_clock[WRITE].hand);
-+	jset->magic		= cpu_to_le64(jset_magic(c));
-+
-+	jset->version		= c->sb.version < bcachefs_metadata_version_new_versioning
-+		? cpu_to_le32(BCH_JSET_VERSION_OLD)
-+		: cpu_to_le32(c->sb.version);
-+
-+	SET_JSET_BIG_ENDIAN(jset, CPU_BIG_ENDIAN);
-+	SET_JSET_CSUM_TYPE(jset, bch2_meta_checksum_type(c));
-+
-+	if (bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset)))
-+		validate_before_checksum = true;
-+
-+	if (le32_to_cpu(jset->version) < bcachefs_metadata_version_max)
-+		validate_before_checksum = true;
-+
-+	if (validate_before_checksum &&
-+	    jset_validate_entries(c, jset, WRITE))
-+		goto err;
-+
-+	bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset),
-+		    jset->encrypted_start,
-+		    vstruct_end(jset) - (void *) jset->encrypted_start);
-+
-+	jset->csum = csum_vstruct(c, JSET_CSUM_TYPE(jset),
-+				  journal_nonce(jset), jset);
-+
-+	if (!validate_before_checksum &&
-+	    jset_validate_entries(c, jset, WRITE))
-+		goto err;
-+
-+	sectors = vstruct_sectors(jset, c->block_bits);
-+	BUG_ON(sectors > w->sectors);
-+
-+	bytes = vstruct_bytes(jset);
-+	memset((void *) jset + bytes, 0, (sectors << 9) - bytes);
-+
-+retry_alloc:
-+	spin_lock(&j->lock);
-+	ret = journal_write_alloc(j, w, sectors);
-+
-+	if (ret && j->can_discard) {
-+		spin_unlock(&j->lock);
-+		bch2_journal_do_discards(j);
-+		goto retry_alloc;
-+	}
-+
-+	/*
-+	 * write is allocated, no longer need to account for it in
-+	 * bch2_journal_space_available():
-+	 */
-+	w->sectors = 0;
-+
-+	/*
-+	 * journal entry has been compacted and allocated, recalculate space
-+	 * available:
-+	 */
-+	bch2_journal_space_available(j);
-+	spin_unlock(&j->lock);
-+
-+	if (ret) {
-+		bch_err(c, "Unable to allocate journal write");
-+		bch2_fatal_error(c);
-+		continue_at(cl, journal_write_done, system_highpri_wq);
-+		return;
-+	}
-+
-+	/*
-+	 * XXX: we really should just disable the entire journal in nochanges
-+	 * mode
-+	 */
-+	if (c->opts.nochanges)
-+		goto no_io;
-+
-+	extent_for_each_ptr(bkey_i_to_s_extent(&w->key), ptr) {
-+		ca = bch_dev_bkey_exists(c, ptr->dev);
-+		if (!percpu_ref_tryget(&ca->io_ref)) {
-+			/* XXX: fix this */
-+			bch_err(c, "missing device for journal write\n");
-+			continue;
-+		}
-+
-+		this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_journal],
-+			     sectors);
-+
-+		bio = ca->journal.bio;
-+		bio_reset(bio);
-+		bio_set_dev(bio, ca->disk_sb.bdev);
-+		bio->bi_iter.bi_sector	= ptr->offset;
-+		bio->bi_end_io		= journal_write_endio;
-+		bio->bi_private		= ca;
-+		bio_set_op_attrs(bio, REQ_OP_WRITE,
-+				 REQ_SYNC|REQ_META|REQ_PREFLUSH|REQ_FUA);
-+		bch2_bio_map(bio, jset, sectors << 9);
-+
-+		trace_journal_write(bio);
-+		closure_bio_submit(bio, cl);
-+
-+		ca->journal.bucket_seq[ca->journal.cur_idx] = le64_to_cpu(jset->seq);
-+	}
-+
-+	for_each_rw_member(ca, c, i)
-+		if (journal_flushes_device(ca) &&
-+		    !bch2_bkey_has_device(bkey_i_to_s_c(&w->key), i)) {
-+			percpu_ref_get(&ca->io_ref);
-+
-+			bio = ca->journal.bio;
-+			bio_reset(bio);
-+			bio_set_dev(bio, ca->disk_sb.bdev);
-+			bio->bi_opf		= REQ_OP_FLUSH;
-+			bio->bi_end_io		= journal_write_endio;
-+			bio->bi_private		= ca;
-+			closure_bio_submit(bio, cl);
-+		}
-+
-+no_io:
-+	bch2_bucket_seq_cleanup(c);
-+
-+	continue_at(cl, journal_write_done, system_highpri_wq);
-+	return;
-+err:
-+	bch2_inconsistent_error(c);
-+	continue_at(cl, journal_write_done, system_highpri_wq);
-+}
-diff --git a/fs/bcachefs/journal_io.h b/fs/bcachefs/journal_io.h
-new file mode 100644
-index 000000000000..6958ee0f8cf2
---- /dev/null
-+++ b/fs/bcachefs/journal_io.h
-@@ -0,0 +1,44 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_JOURNAL_IO_H
-+#define _BCACHEFS_JOURNAL_IO_H
-+
-+/*
-+ * Only used for holding the journal entries we read in btree_journal_read()
-+ * during cache_registration
-+ */
-+struct journal_replay {
-+	struct list_head	list;
-+	struct bch_devs_list	devs;
-+	/* checksum error, but we may want to try using it anyways: */
-+	bool			bad;
-+	/* must be last: */
-+	struct jset		j;
-+};
-+
-+static inline struct jset_entry *__jset_entry_type_next(struct jset *jset,
-+					struct jset_entry *entry, unsigned type)
-+{
-+	while (entry < vstruct_last(jset)) {
-+		if (entry->type == type)
-+			return entry;
-+
-+		entry = vstruct_next(entry);
-+	}
-+
-+	return NULL;
-+}
-+
-+#define for_each_jset_entry_type(entry, jset, type)			\
-+	for (entry = (jset)->start;					\
-+	     (entry = __jset_entry_type_next(jset, entry, type));	\
-+	     entry = vstruct_next(entry))
-+
-+#define for_each_jset_key(k, _n, entry, jset)				\
-+	for_each_jset_entry_type(entry, jset, BCH_JSET_ENTRY_btree_keys)	\
-+		vstruct_for_each_safe(entry, k, _n)
-+
-+int bch2_journal_read(struct bch_fs *, struct list_head *);
-+
-+void bch2_journal_write(struct closure *);
-+
-+#endif /* _BCACHEFS_JOURNAL_IO_H */
-diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c
-new file mode 100644
-index 000000000000..57591983eebd
---- /dev/null
-+++ b/fs/bcachefs/journal_reclaim.c
-@@ -0,0 +1,644 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "journal.h"
-+#include "journal_io.h"
-+#include "journal_reclaim.h"
-+#include "replicas.h"
-+#include "super.h"
-+
-+/* Free space calculations: */
-+
-+static unsigned journal_space_from(struct journal_device *ja,
-+				   enum journal_space_from from)
-+{
-+	switch (from) {
-+	case journal_space_discarded:
-+		return ja->discard_idx;
-+	case journal_space_clean_ondisk:
-+		return ja->dirty_idx_ondisk;
-+	case journal_space_clean:
-+		return ja->dirty_idx;
-+	default:
-+		BUG();
-+	}
-+}
-+
-+unsigned bch2_journal_dev_buckets_available(struct journal *j,
-+					    struct journal_device *ja,
-+					    enum journal_space_from from)
-+{
-+	unsigned available = (journal_space_from(ja, from) -
-+			      ja->cur_idx - 1 + ja->nr) % ja->nr;
-+
-+	/*
-+	 * Don't use the last bucket unless writing the new last_seq
-+	 * will make another bucket available:
-+	 */
-+	if (available && ja->dirty_idx_ondisk == ja->dirty_idx)
-+		--available;
-+
-+	return available;
-+}
-+
-+static void journal_set_remaining(struct journal *j, unsigned u64s_remaining)
-+{
-+	union journal_preres_state old, new;
-+	u64 v = atomic64_read(&j->prereserved.counter);
-+
-+	do {
-+		old.v = new.v = v;
-+		new.remaining = u64s_remaining;
-+	} while ((v = atomic64_cmpxchg(&j->prereserved.counter,
-+				       old.v, new.v)) != old.v);
-+}
-+
-+static struct journal_space {
-+	unsigned	next_entry;
-+	unsigned	remaining;
-+} __journal_space_available(struct journal *j, unsigned nr_devs_want,
-+			    enum journal_space_from from)
-+{
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	struct bch_dev *ca;
-+	unsigned sectors_next_entry	= UINT_MAX;
-+	unsigned sectors_total		= UINT_MAX;
-+	unsigned i, nr_devs = 0;
-+	unsigned unwritten_sectors = j->reservations.prev_buf_unwritten
-+		? journal_prev_buf(j)->sectors
-+		: 0;
-+
-+	rcu_read_lock();
-+	for_each_member_device_rcu(ca, c, i,
-+				   &c->rw_devs[BCH_DATA_journal]) {
-+		struct journal_device *ja = &ca->journal;
-+		unsigned buckets_this_device, sectors_this_device;
-+
-+		if (!ja->nr)
-+			continue;
-+
-+		buckets_this_device = bch2_journal_dev_buckets_available(j, ja, from);
-+		sectors_this_device = ja->sectors_free;
-+
-+		/*
-+		 * We that we don't allocate the space for a journal entry
-+		 * until we write it out - thus, account for it here:
-+		 */
-+		if (unwritten_sectors >= sectors_this_device) {
-+			if (!buckets_this_device)
-+				continue;
-+
-+			buckets_this_device--;
-+			sectors_this_device = ca->mi.bucket_size;
-+		}
-+
-+		sectors_this_device -= unwritten_sectors;
-+
-+		if (sectors_this_device < ca->mi.bucket_size &&
-+		    buckets_this_device) {
-+			buckets_this_device--;
-+			sectors_this_device = ca->mi.bucket_size;
-+		}
-+
-+		if (!sectors_this_device)
-+			continue;
-+
-+		sectors_next_entry = min(sectors_next_entry,
-+					 sectors_this_device);
-+
-+		sectors_total = min(sectors_total,
-+			buckets_this_device * ca->mi.bucket_size +
-+			sectors_this_device);
-+
-+		nr_devs++;
-+	}
-+	rcu_read_unlock();
-+
-+	if (nr_devs < nr_devs_want)
-+		return (struct journal_space) { 0, 0 };
-+
-+	return (struct journal_space) {
-+		.next_entry	= sectors_next_entry,
-+		.remaining	= max_t(int, 0, sectors_total - sectors_next_entry),
-+	};
-+}
-+
-+void bch2_journal_space_available(struct journal *j)
-+{
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	struct bch_dev *ca;
-+	struct journal_space discarded, clean_ondisk, clean;
-+	unsigned overhead, u64s_remaining = 0;
-+	unsigned max_entry_size	 = min(j->buf[0].buf_size >> 9,
-+				       j->buf[1].buf_size >> 9);
-+	unsigned i, nr_online = 0, nr_devs_want;
-+	bool can_discard = false;
-+	int ret = 0;
-+
-+	lockdep_assert_held(&j->lock);
-+
-+	rcu_read_lock();
-+	for_each_member_device_rcu(ca, c, i,
-+				   &c->rw_devs[BCH_DATA_journal]) {
-+		struct journal_device *ja = &ca->journal;
-+
-+		if (!ja->nr)
-+			continue;
-+
-+		while (ja->dirty_idx != ja->cur_idx &&
-+		       ja->bucket_seq[ja->dirty_idx] < journal_last_seq(j))
-+			ja->dirty_idx = (ja->dirty_idx + 1) % ja->nr;
-+
-+		while (ja->dirty_idx_ondisk != ja->dirty_idx &&
-+		       ja->bucket_seq[ja->dirty_idx_ondisk] < j->last_seq_ondisk)
-+			ja->dirty_idx_ondisk = (ja->dirty_idx_ondisk + 1) % ja->nr;
-+
-+		if (ja->discard_idx != ja->dirty_idx_ondisk)
-+			can_discard = true;
-+
-+		max_entry_size = min_t(unsigned, max_entry_size, ca->mi.bucket_size);
-+		nr_online++;
-+	}
-+	rcu_read_unlock();
-+
-+	j->can_discard = can_discard;
-+
-+	if (nr_online < c->opts.metadata_replicas_required) {
-+		ret = -EROFS;
-+		goto out;
-+	}
-+
-+	if (!fifo_free(&j->pin)) {
-+		ret = -ENOSPC;
-+		goto out;
-+	}
-+
-+	nr_devs_want = min_t(unsigned, nr_online, c->opts.metadata_replicas);
-+
-+	discarded	= __journal_space_available(j, nr_devs_want, journal_space_discarded);
-+	clean_ondisk	= __journal_space_available(j, nr_devs_want, journal_space_clean_ondisk);
-+	clean		= __journal_space_available(j, nr_devs_want, journal_space_clean);
-+
-+	if (!discarded.next_entry)
-+		ret = -ENOSPC;
-+
-+	overhead = DIV_ROUND_UP(clean.remaining, max_entry_size) *
-+		journal_entry_overhead(j);
-+	u64s_remaining = clean.remaining << 6;
-+	u64s_remaining = max_t(int, 0, u64s_remaining - overhead);
-+	u64s_remaining /= 4;
-+out:
-+	j->cur_entry_sectors	= !ret ? discarded.next_entry : 0;
-+	j->cur_entry_error	= ret;
-+	journal_set_remaining(j, u64s_remaining);
-+	journal_check_may_get_unreserved(j);
-+
-+	if (!ret)
-+		journal_wake(j);
-+}
-+
-+/* Discards - last part of journal reclaim: */
-+
-+static bool should_discard_bucket(struct journal *j, struct journal_device *ja)
-+{
-+	bool ret;
-+
-+	spin_lock(&j->lock);
-+	ret = ja->discard_idx != ja->dirty_idx_ondisk;
-+	spin_unlock(&j->lock);
-+
-+	return ret;
-+}
-+
-+/*
-+ * Advance ja->discard_idx as long as it points to buckets that are no longer
-+ * dirty, issuing discards if necessary:
-+ */
-+void bch2_journal_do_discards(struct journal *j)
-+{
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	struct bch_dev *ca;
-+	unsigned iter;
-+
-+	mutex_lock(&j->discard_lock);
-+
-+	for_each_rw_member(ca, c, iter) {
-+		struct journal_device *ja = &ca->journal;
-+
-+		while (should_discard_bucket(j, ja)) {
-+			if (ca->mi.discard &&
-+			    blk_queue_discard(bdev_get_queue(ca->disk_sb.bdev)))
-+				blkdev_issue_discard(ca->disk_sb.bdev,
-+					bucket_to_sector(ca,
-+						ja->buckets[ja->discard_idx]),
-+					ca->mi.bucket_size, GFP_NOIO, 0);
-+
-+			spin_lock(&j->lock);
-+			ja->discard_idx = (ja->discard_idx + 1) % ja->nr;
-+
-+			bch2_journal_space_available(j);
-+			spin_unlock(&j->lock);
-+		}
-+	}
-+
-+	mutex_unlock(&j->discard_lock);
-+}
-+
-+/*
-+ * Journal entry pinning - machinery for holding a reference on a given journal
-+ * entry, holding it open to ensure it gets replayed during recovery:
-+ */
-+
-+static void bch2_journal_reclaim_fast(struct journal *j)
-+{
-+	struct journal_entry_pin_list temp;
-+	bool popped = false;
-+
-+	lockdep_assert_held(&j->lock);
-+
-+	/*
-+	 * Unpin journal entries whose reference counts reached zero, meaning
-+	 * all btree nodes got written out
-+	 */
-+	while (!fifo_empty(&j->pin) &&
-+	       !atomic_read(&fifo_peek_front(&j->pin).count)) {
-+		BUG_ON(!list_empty(&fifo_peek_front(&j->pin).list));
-+		BUG_ON(!fifo_pop(&j->pin, temp));
-+		popped = true;
-+	}
-+
-+	if (popped)
-+		bch2_journal_space_available(j);
-+}
-+
-+void bch2_journal_pin_put(struct journal *j, u64 seq)
-+{
-+	struct journal_entry_pin_list *pin_list = journal_seq_pin(j, seq);
-+
-+	if (atomic_dec_and_test(&pin_list->count)) {
-+		spin_lock(&j->lock);
-+		bch2_journal_reclaim_fast(j);
-+		spin_unlock(&j->lock);
-+	}
-+}
-+
-+static inline void __journal_pin_drop(struct journal *j,
-+				      struct journal_entry_pin *pin)
-+{
-+	struct journal_entry_pin_list *pin_list;
-+
-+	if (!journal_pin_active(pin))
-+		return;
-+
-+	pin_list = journal_seq_pin(j, pin->seq);
-+	pin->seq = 0;
-+	list_del_init(&pin->list);
-+
-+	/*
-+	 * Unpinning a journal entry make make journal_next_bucket() succeed, if
-+	 * writing a new last_seq will now make another bucket available:
-+	 */
-+	if (atomic_dec_and_test(&pin_list->count) &&
-+	    pin_list == &fifo_peek_front(&j->pin))
-+		bch2_journal_reclaim_fast(j);
-+	else if (fifo_used(&j->pin) == 1 &&
-+		 atomic_read(&pin_list->count) == 1)
-+		journal_wake(j);
-+}
-+
-+void bch2_journal_pin_drop(struct journal *j,
-+			   struct journal_entry_pin *pin)
-+{
-+	spin_lock(&j->lock);
-+	__journal_pin_drop(j, pin);
-+	spin_unlock(&j->lock);
-+}
-+
-+static void bch2_journal_pin_add_locked(struct journal *j, u64 seq,
-+			    struct journal_entry_pin *pin,
-+			    journal_pin_flush_fn flush_fn)
-+{
-+	struct journal_entry_pin_list *pin_list = journal_seq_pin(j, seq);
-+
-+	__journal_pin_drop(j, pin);
-+
-+	BUG_ON(!atomic_read(&pin_list->count) && seq == journal_last_seq(j));
-+
-+	atomic_inc(&pin_list->count);
-+	pin->seq	= seq;
-+	pin->flush	= flush_fn;
-+
-+	list_add(&pin->list, flush_fn ? &pin_list->list : &pin_list->flushed);
-+}
-+
-+void __bch2_journal_pin_add(struct journal *j, u64 seq,
-+			    struct journal_entry_pin *pin,
-+			    journal_pin_flush_fn flush_fn)
-+{
-+	spin_lock(&j->lock);
-+	bch2_journal_pin_add_locked(j, seq, pin, flush_fn);
-+	spin_unlock(&j->lock);
-+
-+	/*
-+	 * If the journal is currently full,  we might want to call flush_fn
-+	 * immediately:
-+	 */
-+	journal_wake(j);
-+}
-+
-+void bch2_journal_pin_update(struct journal *j, u64 seq,
-+			     struct journal_entry_pin *pin,
-+			     journal_pin_flush_fn flush_fn)
-+{
-+	if (journal_pin_active(pin) && pin->seq < seq)
-+		return;
-+
-+	spin_lock(&j->lock);
-+
-+	if (pin->seq != seq) {
-+		bch2_journal_pin_add_locked(j, seq, pin, flush_fn);
-+	} else {
-+		struct journal_entry_pin_list *pin_list =
-+			journal_seq_pin(j, seq);
-+
-+		/*
-+		 * If the pin is already pinning the right sequence number, it
-+		 * still might've already been flushed:
-+		 */
-+		list_move(&pin->list, &pin_list->list);
-+	}
-+
-+	spin_unlock(&j->lock);
-+
-+	/*
-+	 * If the journal is currently full,  we might want to call flush_fn
-+	 * immediately:
-+	 */
-+	journal_wake(j);
-+}
-+
-+void bch2_journal_pin_copy(struct journal *j,
-+			   struct journal_entry_pin *dst,
-+			   struct journal_entry_pin *src,
-+			   journal_pin_flush_fn flush_fn)
-+{
-+	spin_lock(&j->lock);
-+
-+	if (journal_pin_active(src) &&
-+	    (!journal_pin_active(dst) || src->seq < dst->seq))
-+		bch2_journal_pin_add_locked(j, src->seq, dst, flush_fn);
-+
-+	spin_unlock(&j->lock);
-+}
-+
-+/**
-+ * bch2_journal_pin_flush: ensure journal pin callback is no longer running
-+ */
-+void bch2_journal_pin_flush(struct journal *j, struct journal_entry_pin *pin)
-+{
-+	BUG_ON(journal_pin_active(pin));
-+
-+	wait_event(j->pin_flush_wait, j->flush_in_progress != pin);
-+}
-+
-+/*
-+ * Journal reclaim: flush references to open journal entries to reclaim space in
-+ * the journal
-+ *
-+ * May be done by the journal code in the background as needed to free up space
-+ * for more journal entries, or as part of doing a clean shutdown, or to migrate
-+ * data off of a specific device:
-+ */
-+
-+static struct journal_entry_pin *
-+journal_get_next_pin(struct journal *j, u64 max_seq, u64 *seq)
-+{
-+	struct journal_entry_pin_list *pin_list;
-+	struct journal_entry_pin *ret = NULL;
-+
-+	if (!test_bit(JOURNAL_RECLAIM_STARTED, &j->flags))
-+		return NULL;
-+
-+	spin_lock(&j->lock);
-+
-+	fifo_for_each_entry_ptr(pin_list, &j->pin, *seq)
-+		if (*seq > max_seq ||
-+		    (ret = list_first_entry_or_null(&pin_list->list,
-+				struct journal_entry_pin, list)))
-+			break;
-+
-+	if (ret) {
-+		list_move(&ret->list, &pin_list->flushed);
-+		BUG_ON(j->flush_in_progress);
-+		j->flush_in_progress = ret;
-+		j->last_flushed = jiffies;
-+	}
-+
-+	spin_unlock(&j->lock);
-+
-+	return ret;
-+}
-+
-+/* returns true if we did work */
-+static bool journal_flush_pins(struct journal *j, u64 seq_to_flush,
-+			       unsigned min_nr)
-+{
-+	struct journal_entry_pin *pin;
-+	bool ret = false;
-+	u64 seq;
-+
-+	lockdep_assert_held(&j->reclaim_lock);
-+
-+	while ((pin = journal_get_next_pin(j, min_nr
-+				? U64_MAX : seq_to_flush, &seq))) {
-+		if (min_nr)
-+			min_nr--;
-+
-+		pin->flush(j, pin, seq);
-+
-+		BUG_ON(j->flush_in_progress != pin);
-+		j->flush_in_progress = NULL;
-+		wake_up(&j->pin_flush_wait);
-+		ret = true;
-+	}
-+
-+	return ret;
-+}
-+
-+/**
-+ * bch2_journal_reclaim - free up journal buckets
-+ *
-+ * Background journal reclaim writes out btree nodes. It should be run
-+ * early enough so that we never completely run out of journal buckets.
-+ *
-+ * High watermarks for triggering background reclaim:
-+ * - FIFO has fewer than 512 entries left
-+ * - fewer than 25% journal buckets free
-+ *
-+ * Background reclaim runs until low watermarks are reached:
-+ * - FIFO has more than 1024 entries left
-+ * - more than 50% journal buckets free
-+ *
-+ * As long as a reclaim can complete in the time it takes to fill up
-+ * 512 journal entries or 25% of all journal buckets, then
-+ * journal_next_bucket() should not stall.
-+ */
-+void bch2_journal_reclaim(struct journal *j)
-+{
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	struct bch_dev *ca;
-+	unsigned iter, min_nr = 0;
-+	u64 seq_to_flush = 0;
-+
-+	lockdep_assert_held(&j->reclaim_lock);
-+
-+	bch2_journal_do_discards(j);
-+
-+	spin_lock(&j->lock);
-+
-+	for_each_rw_member(ca, c, iter) {
-+		struct journal_device *ja = &ca->journal;
-+		unsigned nr_buckets, bucket_to_flush;
-+
-+		if (!ja->nr)
-+			continue;
-+
-+		/* Try to keep the journal at most half full: */
-+		nr_buckets = ja->nr / 2;
-+
-+		/* And include pre-reservations: */
-+		nr_buckets += DIV_ROUND_UP(j->prereserved.reserved,
-+					   (ca->mi.bucket_size << 6) -
-+					   journal_entry_overhead(j));
-+
-+		nr_buckets = min(nr_buckets, ja->nr);
-+
-+		bucket_to_flush = (ja->cur_idx + nr_buckets) % ja->nr;
-+		seq_to_flush = max(seq_to_flush,
-+				   ja->bucket_seq[bucket_to_flush]);
-+	}
-+
-+	/* Also flush if the pin fifo is more than half full */
-+	seq_to_flush = max_t(s64, seq_to_flush,
-+			     (s64) journal_cur_seq(j) -
-+			     (j->pin.size >> 1));
-+	spin_unlock(&j->lock);
-+
-+	/*
-+	 * If it's been longer than j->reclaim_delay_ms since we last flushed,
-+	 * make sure to flush at least one journal pin:
-+	 */
-+	if (time_after(jiffies, j->last_flushed +
-+		       msecs_to_jiffies(j->reclaim_delay_ms)))
-+		min_nr = 1;
-+
-+	if (j->prereserved.reserved * 2 > j->prereserved.remaining) {
-+		seq_to_flush = max(seq_to_flush, journal_last_seq(j));
-+		min_nr = 1;
-+	}
-+
-+	journal_flush_pins(j, seq_to_flush, min_nr);
-+
-+	if (!bch2_journal_error(j))
-+		queue_delayed_work(c->journal_reclaim_wq, &j->reclaim_work,
-+				   msecs_to_jiffies(j->reclaim_delay_ms));
-+}
-+
-+void bch2_journal_reclaim_work(struct work_struct *work)
-+{
-+	struct journal *j = container_of(to_delayed_work(work),
-+				struct journal, reclaim_work);
-+
-+	mutex_lock(&j->reclaim_lock);
-+	bch2_journal_reclaim(j);
-+	mutex_unlock(&j->reclaim_lock);
-+}
-+
-+static int journal_flush_done(struct journal *j, u64 seq_to_flush,
-+			      bool *did_work)
-+{
-+	int ret;
-+
-+	ret = bch2_journal_error(j);
-+	if (ret)
-+		return ret;
-+
-+	mutex_lock(&j->reclaim_lock);
-+
-+	*did_work = journal_flush_pins(j, seq_to_flush, 0);
-+
-+	spin_lock(&j->lock);
-+	/*
-+	 * If journal replay hasn't completed, the unreplayed journal entries
-+	 * hold refs on their corresponding sequence numbers
-+	 */
-+	ret = !test_bit(JOURNAL_REPLAY_DONE, &j->flags) ||
-+		journal_last_seq(j) > seq_to_flush ||
-+		(fifo_used(&j->pin) == 1 &&
-+		 atomic_read(&fifo_peek_front(&j->pin).count) == 1);
-+
-+	spin_unlock(&j->lock);
-+	mutex_unlock(&j->reclaim_lock);
-+
-+	return ret;
-+}
-+
-+bool bch2_journal_flush_pins(struct journal *j, u64 seq_to_flush)
-+{
-+	bool did_work = false;
-+
-+	if (!test_bit(JOURNAL_STARTED, &j->flags))
-+		return false;
-+
-+	closure_wait_event(&j->async_wait,
-+		journal_flush_done(j, seq_to_flush, &did_work));
-+
-+	return did_work;
-+}
-+
-+int bch2_journal_flush_device_pins(struct journal *j, int dev_idx)
-+{
-+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+	struct journal_entry_pin_list *p;
-+	u64 iter, seq = 0;
-+	int ret = 0;
-+
-+	spin_lock(&j->lock);
-+	fifo_for_each_entry_ptr(p, &j->pin, iter)
-+		if (dev_idx >= 0
-+		    ? bch2_dev_list_has_dev(p->devs, dev_idx)
-+		    : p->devs.nr < c->opts.metadata_replicas)
-+			seq = iter;
-+	spin_unlock(&j->lock);
-+
-+	bch2_journal_flush_pins(j, seq);
-+
-+	ret = bch2_journal_error(j);
-+	if (ret)
-+		return ret;
-+
-+	mutex_lock(&c->replicas_gc_lock);
-+	bch2_replicas_gc_start(c, 1 << BCH_DATA_journal);
-+
-+	seq = 0;
-+
-+	spin_lock(&j->lock);
-+	while (!ret && seq < j->pin.back) {
-+		struct bch_replicas_padded replicas;
-+
-+		seq = max(seq, journal_last_seq(j));
-+		bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal,
-+					 journal_seq_pin(j, seq)->devs);
-+		seq++;
-+
-+		spin_unlock(&j->lock);
-+		ret = bch2_mark_replicas(c, &replicas.e);
-+		spin_lock(&j->lock);
-+	}
-+	spin_unlock(&j->lock);
-+
-+	ret = bch2_replicas_gc_end(c, ret);
-+	mutex_unlock(&c->replicas_gc_lock);
-+
-+	return ret;
-+}
-diff --git a/fs/bcachefs/journal_reclaim.h b/fs/bcachefs/journal_reclaim.h
-new file mode 100644
-index 000000000000..8128907a7623
---- /dev/null
-+++ b/fs/bcachefs/journal_reclaim.h
-@@ -0,0 +1,69 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_JOURNAL_RECLAIM_H
-+#define _BCACHEFS_JOURNAL_RECLAIM_H
-+
-+#define JOURNAL_PIN	(32 * 1024)
-+
-+enum journal_space_from {
-+	journal_space_discarded,
-+	journal_space_clean_ondisk,
-+	journal_space_clean,
-+};
-+
-+unsigned bch2_journal_dev_buckets_available(struct journal *,
-+					    struct journal_device *,
-+					    enum journal_space_from);
-+void bch2_journal_space_available(struct journal *);
-+
-+static inline bool journal_pin_active(struct journal_entry_pin *pin)
-+{
-+	return pin->seq != 0;
-+}
-+
-+static inline struct journal_entry_pin_list *
-+journal_seq_pin(struct journal *j, u64 seq)
-+{
-+	EBUG_ON(seq < j->pin.front || seq >= j->pin.back);
-+
-+	return &j->pin.data[seq & j->pin.mask];
-+}
-+
-+void bch2_journal_pin_put(struct journal *, u64);
-+void bch2_journal_pin_drop(struct journal *, struct journal_entry_pin *);
-+
-+void __bch2_journal_pin_add(struct journal *, u64, struct journal_entry_pin *,
-+			    journal_pin_flush_fn);
-+
-+static inline void bch2_journal_pin_add(struct journal *j, u64 seq,
-+					struct journal_entry_pin *pin,
-+					journal_pin_flush_fn flush_fn)
-+{
-+	if (unlikely(!journal_pin_active(pin) || pin->seq > seq))
-+		__bch2_journal_pin_add(j, seq, pin, flush_fn);
-+}
-+
-+void bch2_journal_pin_update(struct journal *, u64,
-+			     struct journal_entry_pin *,
-+			     journal_pin_flush_fn);
-+
-+void bch2_journal_pin_copy(struct journal *,
-+			   struct journal_entry_pin *,
-+			   struct journal_entry_pin *,
-+			   journal_pin_flush_fn);
-+
-+void bch2_journal_pin_flush(struct journal *, struct journal_entry_pin *);
-+
-+void bch2_journal_do_discards(struct journal *);
-+void bch2_journal_reclaim(struct journal *);
-+void bch2_journal_reclaim_work(struct work_struct *);
-+
-+bool bch2_journal_flush_pins(struct journal *, u64);
-+
-+static inline bool bch2_journal_flush_all_pins(struct journal *j)
-+{
-+	return bch2_journal_flush_pins(j, U64_MAX);
-+}
-+
-+int bch2_journal_flush_device_pins(struct journal *, int);
-+
-+#endif /* _BCACHEFS_JOURNAL_RECLAIM_H */
-diff --git a/fs/bcachefs/journal_seq_blacklist.c b/fs/bcachefs/journal_seq_blacklist.c
-new file mode 100644
-index 000000000000..d0f1bbf8f6a7
---- /dev/null
-+++ b/fs/bcachefs/journal_seq_blacklist.c
-@@ -0,0 +1,309 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "btree_iter.h"
-+#include "eytzinger.h"
-+#include "journal_seq_blacklist.h"
-+#include "super-io.h"
-+
-+/*
-+ * journal_seq_blacklist machinery:
-+ *
-+ * To guarantee order of btree updates after a crash, we need to detect when a
-+ * btree node entry (bset) is newer than the newest journal entry that was
-+ * successfully written, and ignore it - effectively ignoring any btree updates
-+ * that didn't make it into the journal.
-+ *
-+ * If we didn't do this, we might have two btree nodes, a and b, both with
-+ * updates that weren't written to the journal yet: if b was updated after a,
-+ * but b was flushed and not a - oops; on recovery we'll find that the updates
-+ * to b happened, but not the updates to a that happened before it.
-+ *
-+ * Ignoring bsets that are newer than the newest journal entry is always safe,
-+ * because everything they contain will also have been journalled - and must
-+ * still be present in the journal on disk until a journal entry has been
-+ * written _after_ that bset was written.
-+ *
-+ * To accomplish this, bsets record the newest journal sequence number they
-+ * contain updates for; then, on startup, the btree code queries the journal
-+ * code to ask "Is this sequence number newer than the newest journal entry? If
-+ * so, ignore it."
-+ *
-+ * When this happens, we must blacklist that journal sequence number: the
-+ * journal must not write any entries with that sequence number, and it must
-+ * record that it was blacklisted so that a) on recovery we don't think we have
-+ * missing journal entries and b) so that the btree code continues to ignore
-+ * that bset, until that btree node is rewritten.
-+ */
-+
-+static unsigned sb_blacklist_u64s(unsigned nr)
-+{
-+	struct bch_sb_field_journal_seq_blacklist *bl;
-+
-+	return (sizeof(*bl) + sizeof(bl->start[0]) * nr) / sizeof(u64);
-+}
-+
-+static struct bch_sb_field_journal_seq_blacklist *
-+blacklist_entry_try_merge(struct bch_fs *c,
-+			  struct bch_sb_field_journal_seq_blacklist *bl,
-+			  unsigned i)
-+{
-+	unsigned nr = blacklist_nr_entries(bl);
-+
-+	if (le64_to_cpu(bl->start[i].end) >=
-+	    le64_to_cpu(bl->start[i + 1].start)) {
-+		bl->start[i].end = bl->start[i + 1].end;
-+		--nr;
-+		memmove(&bl->start[i],
-+			&bl->start[i + 1],
-+			sizeof(bl->start[0]) * (nr - i));
-+
-+		bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb,
-+							sb_blacklist_u64s(nr));
-+		BUG_ON(!bl);
-+	}
-+
-+	return bl;
-+}
-+
-+int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end)
-+{
-+	struct bch_sb_field_journal_seq_blacklist *bl;
-+	unsigned i, nr;
-+	int ret = 0;
-+
-+	mutex_lock(&c->sb_lock);
-+	bl = bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
-+	nr = blacklist_nr_entries(bl);
-+
-+	if (bl) {
-+		for (i = 0; i < nr; i++) {
-+			struct journal_seq_blacklist_entry *e =
-+				bl->start + i;
-+
-+			if (start == le64_to_cpu(e->start) &&
-+			    end   == le64_to_cpu(e->end))
-+				goto out;
-+
-+			if (start <= le64_to_cpu(e->start) &&
-+			    end   >= le64_to_cpu(e->end)) {
-+				e->start = cpu_to_le64(start);
-+				e->end	= cpu_to_le64(end);
-+
-+				if (i + 1 < nr)
-+					bl = blacklist_entry_try_merge(c,
-+								bl, i);
-+				if (i)
-+					bl = blacklist_entry_try_merge(c,
-+								bl, i - 1);
-+				goto out_write_sb;
-+			}
-+		}
-+	}
-+
-+	bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb,
-+					sb_blacklist_u64s(nr + 1));
-+	if (!bl) {
-+		ret = -ENOMEM;
-+		goto out;
-+	}
-+
-+	bl->start[nr].start	= cpu_to_le64(start);
-+	bl->start[nr].end	= cpu_to_le64(end);
-+out_write_sb:
-+	c->disk_sb.sb->features[0] |=
-+		1ULL << BCH_FEATURE_journal_seq_blacklist_v3;
-+
-+	ret = bch2_write_super(c);
-+out:
-+	mutex_unlock(&c->sb_lock);
-+
-+	return ret;
-+}
-+
-+static int journal_seq_blacklist_table_cmp(const void *_l,
-+					   const void *_r, size_t size)
-+{
-+	const struct journal_seq_blacklist_table_entry *l = _l;
-+	const struct journal_seq_blacklist_table_entry *r = _r;
-+
-+	return cmp_int(l->start, r->start);
-+}
-+
-+bool bch2_journal_seq_is_blacklisted(struct bch_fs *c, u64 seq,
-+				     bool dirty)
-+{
-+	struct journal_seq_blacklist_table *t = c->journal_seq_blacklist_table;
-+	struct journal_seq_blacklist_table_entry search = { .start = seq };
-+	int idx;
-+
-+	if (!t)
-+		return false;
-+
-+	idx = eytzinger0_find_le(t->entries, t->nr,
-+				 sizeof(t->entries[0]),
-+				 journal_seq_blacklist_table_cmp,
-+				 &search);
-+	if (idx < 0)
-+		return false;
-+
-+	BUG_ON(t->entries[idx].start > seq);
-+
-+	if (seq >= t->entries[idx].end)
-+		return false;
-+
-+	if (dirty)
-+		t->entries[idx].dirty = true;
-+	return true;
-+}
-+
-+int bch2_blacklist_table_initialize(struct bch_fs *c)
-+{
-+	struct bch_sb_field_journal_seq_blacklist *bl =
-+		bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
-+	struct journal_seq_blacklist_table *t;
-+	unsigned i, nr = blacklist_nr_entries(bl);
-+
-+	BUG_ON(c->journal_seq_blacklist_table);
-+
-+	if (!bl)
-+		return 0;
-+
-+	t = kzalloc(sizeof(*t) + sizeof(t->entries[0]) * nr,
-+		    GFP_KERNEL);
-+	if (!t)
-+		return -ENOMEM;
-+
-+	t->nr = nr;
-+
-+	for (i = 0; i < nr; i++) {
-+		t->entries[i].start	= le64_to_cpu(bl->start[i].start);
-+		t->entries[i].end	= le64_to_cpu(bl->start[i].end);
-+	}
-+
-+	eytzinger0_sort(t->entries,
-+			t->nr,
-+			sizeof(t->entries[0]),
-+			journal_seq_blacklist_table_cmp,
-+			NULL);
-+
-+	c->journal_seq_blacklist_table = t;
-+	return 0;
-+}
-+
-+static const char *
-+bch2_sb_journal_seq_blacklist_validate(struct bch_sb *sb,
-+				       struct bch_sb_field *f)
-+{
-+	struct bch_sb_field_journal_seq_blacklist *bl =
-+		field_to_type(f, journal_seq_blacklist);
-+	struct journal_seq_blacklist_entry *i;
-+	unsigned nr = blacklist_nr_entries(bl);
-+
-+	for (i = bl->start; i < bl->start + nr; i++) {
-+		if (le64_to_cpu(i->start) >=
-+		    le64_to_cpu(i->end))
-+			return "entry start >= end";
-+
-+		if (i + 1 < bl->start + nr &&
-+		    le64_to_cpu(i[0].end) >
-+		    le64_to_cpu(i[1].start))
-+			return "entries out of order";
-+	}
-+
-+	return NULL;
-+}
-+
-+static void bch2_sb_journal_seq_blacklist_to_text(struct printbuf *out,
-+						  struct bch_sb *sb,
-+						  struct bch_sb_field *f)
-+{
-+	struct bch_sb_field_journal_seq_blacklist *bl =
-+		field_to_type(f, journal_seq_blacklist);
-+	struct journal_seq_blacklist_entry *i;
-+	unsigned nr = blacklist_nr_entries(bl);
-+
-+	for (i = bl->start; i < bl->start + nr; i++) {
-+		if (i != bl->start)
-+			pr_buf(out, " ");
-+
-+		pr_buf(out, "%llu-%llu",
-+		       le64_to_cpu(i->start),
-+		       le64_to_cpu(i->end));
-+	}
-+}
-+
-+const struct bch_sb_field_ops bch_sb_field_ops_journal_seq_blacklist = {
-+	.validate	= bch2_sb_journal_seq_blacklist_validate,
-+	.to_text	= bch2_sb_journal_seq_blacklist_to_text
-+};
-+
-+void bch2_blacklist_entries_gc(struct work_struct *work)
-+{
-+	struct bch_fs *c = container_of(work, struct bch_fs,
-+					journal_seq_blacklist_gc_work);
-+	struct journal_seq_blacklist_table *t;
-+	struct bch_sb_field_journal_seq_blacklist *bl;
-+	struct journal_seq_blacklist_entry *src, *dst;
-+	struct btree_trans trans;
-+	unsigned i, nr, new_nr;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for (i = 0; i < BTREE_ID_NR; i++) {
-+		struct btree_iter *iter;
-+		struct btree *b;
-+
-+		for_each_btree_node(&trans, iter, i, POS_MIN,
-+				    BTREE_ITER_PREFETCH, b)
-+			if (test_bit(BCH_FS_STOPPING, &c->flags)) {
-+				bch2_trans_exit(&trans);
-+				return;
-+			}
-+		bch2_trans_iter_free(&trans, iter);
-+	}
-+
-+	ret = bch2_trans_exit(&trans);
-+	if (ret)
-+		return;
-+
-+	mutex_lock(&c->sb_lock);
-+	bl = bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
-+	if (!bl)
-+		goto out;
-+
-+	nr = blacklist_nr_entries(bl);
-+	dst = bl->start;
-+
-+	t = c->journal_seq_blacklist_table;
-+	BUG_ON(nr != t->nr);
-+
-+	for (src = bl->start, i = eytzinger0_first(t->nr);
-+	     src < bl->start + nr;
-+	     src++, i = eytzinger0_next(i, nr)) {
-+		BUG_ON(t->entries[i].start	!= le64_to_cpu(src->start));
-+		BUG_ON(t->entries[i].end	!= le64_to_cpu(src->end));
-+
-+		if (t->entries[i].dirty)
-+			*dst++ = *src;
-+	}
-+
-+	new_nr = dst - bl->start;
-+
-+	bch_info(c, "nr blacklist entries was %u, now %u", nr, new_nr);
-+
-+	if (new_nr != nr) {
-+		bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb,
-+				new_nr ? sb_blacklist_u64s(new_nr) : 0);
-+		BUG_ON(new_nr && !bl);
-+
-+		if (!new_nr)
-+			c->disk_sb.sb->features[0] &=
-+				~(1ULL << BCH_FEATURE_journal_seq_blacklist_v3);
-+
-+		bch2_write_super(c);
-+	}
-+out:
-+	mutex_unlock(&c->sb_lock);
-+}
-diff --git a/fs/bcachefs/journal_seq_blacklist.h b/fs/bcachefs/journal_seq_blacklist.h
-new file mode 100644
-index 000000000000..afb886ec8e25
---- /dev/null
-+++ b/fs/bcachefs/journal_seq_blacklist.h
-@@ -0,0 +1,22 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H
-+#define _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H
-+
-+static inline unsigned
-+blacklist_nr_entries(struct bch_sb_field_journal_seq_blacklist *bl)
-+{
-+	return bl
-+		? ((vstruct_end(&bl->field) - (void *) &bl->start[0]) /
-+		   sizeof(struct journal_seq_blacklist_entry))
-+		: 0;
-+}
-+
-+bool bch2_journal_seq_is_blacklisted(struct bch_fs *, u64, bool);
-+int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64, u64);
-+int bch2_blacklist_table_initialize(struct bch_fs *);
-+
-+extern const struct bch_sb_field_ops bch_sb_field_ops_journal_seq_blacklist;
-+
-+void bch2_blacklist_entries_gc(struct work_struct *);
-+
-+#endif /* _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H */
-diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h
-new file mode 100644
-index 000000000000..154b51b891d3
---- /dev/null
-+++ b/fs/bcachefs/journal_types.h
-@@ -0,0 +1,277 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_JOURNAL_TYPES_H
-+#define _BCACHEFS_JOURNAL_TYPES_H
-+
-+#include <linux/cache.h>
-+#include <linux/workqueue.h>
-+
-+#include "alloc_types.h"
-+#include "super_types.h"
-+#include "fifo.h"
-+
-+struct journal_res;
-+
-+/*
-+ * We put two of these in struct journal; we used them for writes to the
-+ * journal that are being staged or in flight.
-+ */
-+struct journal_buf {
-+	struct jset		*data;
-+
-+	BKEY_PADDED(key);
-+
-+	struct closure_waitlist	wait;
-+
-+	unsigned		buf_size;	/* size in bytes of @data */
-+	unsigned		sectors;	/* maximum size for current entry */
-+	unsigned		disk_sectors;	/* maximum size entry could have been, if
-+						   buf_size was bigger */
-+	unsigned		u64s_reserved;
-+	/* bloom filter: */
-+	unsigned long		has_inode[1024 / sizeof(unsigned long)];
-+};
-+
-+/*
-+ * Something that makes a journal entry dirty - i.e. a btree node that has to be
-+ * flushed:
-+ */
-+
-+struct journal_entry_pin_list {
-+	struct list_head		list;
-+	struct list_head		flushed;
-+	atomic_t			count;
-+	struct bch_devs_list		devs;
-+};
-+
-+struct journal;
-+struct journal_entry_pin;
-+typedef void (*journal_pin_flush_fn)(struct journal *j,
-+				struct journal_entry_pin *, u64);
-+
-+struct journal_entry_pin {
-+	struct list_head		list;
-+	journal_pin_flush_fn		flush;
-+	u64				seq;
-+};
-+
-+struct journal_res {
-+	bool			ref;
-+	u8			idx;
-+	u16			u64s;
-+	u32			offset;
-+	u64			seq;
-+};
-+
-+/*
-+ * For reserving space in the journal prior to getting a reservation on a
-+ * particular journal entry:
-+ */
-+struct journal_preres {
-+	unsigned		u64s;
-+};
-+
-+union journal_res_state {
-+	struct {
-+		atomic64_t	counter;
-+	};
-+
-+	struct {
-+		u64		v;
-+	};
-+
-+	struct {
-+		u64		cur_entry_offset:20,
-+				idx:1,
-+				prev_buf_unwritten:1,
-+				buf0_count:21,
-+				buf1_count:21;
-+	};
-+};
-+
-+union journal_preres_state {
-+	struct {
-+		atomic64_t	counter;
-+	};
-+
-+	struct {
-+		u64		v;
-+	};
-+
-+	struct {
-+		u32		reserved;
-+		u32		remaining;
-+	};
-+};
-+
-+/* bytes: */
-+#define JOURNAL_ENTRY_SIZE_MIN		(64U << 10) /* 64k */
-+#define JOURNAL_ENTRY_SIZE_MAX		(4U  << 20) /* 4M */
-+
-+/*
-+ * We stash some journal state as sentinal values in cur_entry_offset:
-+ * note - cur_entry_offset is in units of u64s
-+ */
-+#define JOURNAL_ENTRY_OFFSET_MAX	((1U << 20) - 1)
-+
-+#define JOURNAL_ENTRY_CLOSED_VAL	(JOURNAL_ENTRY_OFFSET_MAX - 1)
-+#define JOURNAL_ENTRY_ERROR_VAL		(JOURNAL_ENTRY_OFFSET_MAX)
-+
-+/*
-+ * JOURNAL_NEED_WRITE - current (pending) journal entry should be written ASAP,
-+ * either because something's waiting on the write to complete or because it's
-+ * been dirty too long and the timer's expired.
-+ */
-+
-+enum {
-+	JOURNAL_REPLAY_DONE,
-+	JOURNAL_STARTED,
-+	JOURNAL_RECLAIM_STARTED,
-+	JOURNAL_NEED_WRITE,
-+	JOURNAL_NOT_EMPTY,
-+	JOURNAL_MAY_GET_UNRESERVED,
-+};
-+
-+/* Embedded in struct bch_fs */
-+struct journal {
-+	/* Fastpath stuff up front: */
-+
-+	unsigned long		flags;
-+
-+	union journal_res_state reservations;
-+
-+	/* Max size of current journal entry */
-+	unsigned		cur_entry_u64s;
-+	unsigned		cur_entry_sectors;
-+
-+	/*
-+	 * 0, or -ENOSPC if waiting on journal reclaim, or -EROFS if
-+	 * insufficient devices:
-+	 */
-+	int			cur_entry_error;
-+
-+	union journal_preres_state prereserved;
-+
-+	/* Reserved space in journal entry to be used just prior to write */
-+	unsigned		entry_u64s_reserved;
-+
-+	unsigned		buf_size_want;
-+
-+	/*
-+	 * Two journal entries -- one is currently open for new entries, the
-+	 * other is possibly being written out.
-+	 */
-+	struct journal_buf	buf[2];
-+
-+	spinlock_t		lock;
-+
-+	/* if nonzero, we may not open a new journal entry: */
-+	unsigned		blocked;
-+
-+	/* Used when waiting because the journal was full */
-+	wait_queue_head_t	wait;
-+	struct closure_waitlist	async_wait;
-+	struct closure_waitlist	preres_wait;
-+
-+	struct closure		io;
-+	struct delayed_work	write_work;
-+
-+	/* Sequence number of most recent journal entry (last entry in @pin) */
-+	atomic64_t		seq;
-+
-+	/* seq, last_seq from the most recent journal entry successfully written */
-+	u64			seq_ondisk;
-+	u64			last_seq_ondisk;
-+
-+	/*
-+	 * FIFO of journal entries whose btree updates have not yet been
-+	 * written out.
-+	 *
-+	 * Each entry is a reference count. The position in the FIFO is the
-+	 * entry's sequence number relative to @seq.
-+	 *
-+	 * The journal entry itself holds a reference count, put when the
-+	 * journal entry is written out. Each btree node modified by the journal
-+	 * entry also holds a reference count, put when the btree node is
-+	 * written.
-+	 *
-+	 * When a reference count reaches zero, the journal entry is no longer
-+	 * needed. When all journal entries in the oldest journal bucket are no
-+	 * longer needed, the bucket can be discarded and reused.
-+	 */
-+	struct {
-+		u64 front, back, size, mask;
-+		struct journal_entry_pin_list *data;
-+	}			pin;
-+
-+	u64			replay_journal_seq;
-+	u64			replay_journal_seq_end;
-+
-+	struct write_point	wp;
-+	spinlock_t		err_lock;
-+
-+	struct delayed_work	reclaim_work;
-+	struct mutex		reclaim_lock;
-+	unsigned long		last_flushed;
-+	struct journal_entry_pin *flush_in_progress;
-+	wait_queue_head_t	pin_flush_wait;
-+
-+	/* protects advancing ja->discard_idx: */
-+	struct mutex		discard_lock;
-+	bool			can_discard;
-+
-+	unsigned		write_delay_ms;
-+	unsigned		reclaim_delay_ms;
-+
-+	u64			res_get_blocked_start;
-+	u64			need_write_time;
-+	u64			write_start_time;
-+
-+	struct time_stats	*write_time;
-+	struct time_stats	*delay_time;
-+	struct time_stats	*blocked_time;
-+	struct time_stats	*flush_seq_time;
-+
-+#ifdef CONFIG_DEBUG_LOCK_ALLOC
-+	struct lockdep_map	res_map;
-+#endif
-+};
-+
-+/*
-+ * Embedded in struct bch_dev. First three fields refer to the array of journal
-+ * buckets, in bch_sb.
-+ */
-+struct journal_device {
-+	/*
-+	 * For each journal bucket, contains the max sequence number of the
-+	 * journal writes it contains - so we know when a bucket can be reused.
-+	 */
-+	u64			*bucket_seq;
-+
-+	unsigned		sectors_free;
-+
-+	/*
-+	 * discard_idx <= dirty_idx_ondisk <= dirty_idx <= cur_idx:
-+	 */
-+	unsigned		discard_idx;		/* Next bucket to discard */
-+	unsigned		dirty_idx_ondisk;
-+	unsigned		dirty_idx;
-+	unsigned		cur_idx;		/* Journal bucket we're currently writing to */
-+	unsigned		nr;
-+
-+	u64			*buckets;
-+
-+	/* Bio for journal reads/writes to this device */
-+	struct bio		*bio;
-+
-+	/* for bch_journal_read_device */
-+	struct closure		read;
-+};
-+
-+/*
-+ * journal_entry_res - reserve space in every journal entry:
-+ */
-+struct journal_entry_res {
-+	unsigned		u64s;
-+};
-+
-+#endif /* _BCACHEFS_JOURNAL_TYPES_H */
-diff --git a/fs/bcachefs/keylist.c b/fs/bcachefs/keylist.c
-new file mode 100644
-index 000000000000..864dfaa67b7a
---- /dev/null
-+++ b/fs/bcachefs/keylist.c
-@@ -0,0 +1,67 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "keylist.h"
-+
-+int bch2_keylist_realloc(struct keylist *l, u64 *inline_u64s,
-+			size_t nr_inline_u64s, size_t new_u64s)
-+{
-+	size_t oldsize = bch2_keylist_u64s(l);
-+	size_t newsize = oldsize + new_u64s;
-+	u64 *old_buf = l->keys_p == inline_u64s ? NULL : l->keys_p;
-+	u64 *new_keys;
-+
-+	newsize = roundup_pow_of_two(newsize);
-+
-+	if (newsize <= nr_inline_u64s ||
-+	    (old_buf && roundup_pow_of_two(oldsize) == newsize))
-+		return 0;
-+
-+	new_keys = krealloc(old_buf, sizeof(u64) * newsize, GFP_NOIO);
-+	if (!new_keys)
-+		return -ENOMEM;
-+
-+	if (!old_buf)
-+		memcpy_u64s(new_keys, inline_u64s, oldsize);
-+
-+	l->keys_p = new_keys;
-+	l->top_p = new_keys + oldsize;
-+
-+	return 0;
-+}
-+
-+void bch2_keylist_add_in_order(struct keylist *l, struct bkey_i *insert)
-+{
-+	struct bkey_i *where;
-+
-+	for_each_keylist_key(l, where)
-+		if (bkey_cmp(insert->k.p, where->k.p) < 0)
-+			break;
-+
-+	memmove_u64s_up((u64 *) where + insert->k.u64s,
-+			where,
-+			((u64 *) l->top) - ((u64 *) where));
-+
-+	l->top_p += insert->k.u64s;
-+	bkey_copy(where, insert);
-+}
-+
-+void bch2_keylist_pop_front(struct keylist *l)
-+{
-+	l->top_p -= bch2_keylist_front(l)->k.u64s;
-+
-+	memmove_u64s_down(l->keys,
-+			  bkey_next(l->keys),
-+			  bch2_keylist_u64s(l));
-+}
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+void bch2_verify_keylist_sorted(struct keylist *l)
-+{
-+	struct bkey_i *k;
-+
-+	for_each_keylist_key(l, k)
-+		BUG_ON(bkey_next(k) != l->top &&
-+		       bkey_cmp(k->k.p, bkey_next(k)->k.p) >= 0);
-+}
-+#endif
-diff --git a/fs/bcachefs/keylist.h b/fs/bcachefs/keylist.h
-new file mode 100644
-index 000000000000..195799bb20bc
---- /dev/null
-+++ b/fs/bcachefs/keylist.h
-@@ -0,0 +1,76 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_KEYLIST_H
-+#define _BCACHEFS_KEYLIST_H
-+
-+#include "keylist_types.h"
-+
-+int bch2_keylist_realloc(struct keylist *, u64 *, size_t, size_t);
-+void bch2_keylist_add_in_order(struct keylist *, struct bkey_i *);
-+void bch2_keylist_pop_front(struct keylist *);
-+
-+static inline void bch2_keylist_init(struct keylist *l, u64 *inline_keys)
-+{
-+	l->top_p = l->keys_p = inline_keys;
-+}
-+
-+static inline void bch2_keylist_free(struct keylist *l, u64 *inline_keys)
-+{
-+	if (l->keys_p != inline_keys)
-+		kfree(l->keys_p);
-+	bch2_keylist_init(l, inline_keys);
-+}
-+
-+static inline void bch2_keylist_push(struct keylist *l)
-+{
-+	l->top = bkey_next(l->top);
-+}
-+
-+static inline void bch2_keylist_add(struct keylist *l, const struct bkey_i *k)
-+{
-+	bkey_copy(l->top, k);
-+	bch2_keylist_push(l);
-+}
-+
-+static inline bool bch2_keylist_empty(struct keylist *l)
-+{
-+	return l->top == l->keys;
-+}
-+
-+static inline size_t bch2_keylist_u64s(struct keylist *l)
-+{
-+	return l->top_p - l->keys_p;
-+}
-+
-+static inline size_t bch2_keylist_bytes(struct keylist *l)
-+{
-+	return bch2_keylist_u64s(l) * sizeof(u64);
-+}
-+
-+static inline struct bkey_i *bch2_keylist_front(struct keylist *l)
-+{
-+	return l->keys;
-+}
-+
-+#define for_each_keylist_key(_keylist, _k)			\
-+	for (_k = (_keylist)->keys;				\
-+	     _k != (_keylist)->top;				\
-+	     _k = bkey_next(_k))
-+
-+static inline u64 keylist_sectors(struct keylist *keys)
-+{
-+	struct bkey_i *k;
-+	u64 ret = 0;
-+
-+	for_each_keylist_key(keys, k)
-+		ret += k->k.size;
-+
-+	return ret;
-+}
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+void bch2_verify_keylist_sorted(struct keylist *);
-+#else
-+static inline void bch2_verify_keylist_sorted(struct keylist *l) {}
-+#endif
-+
-+#endif /* _BCACHEFS_KEYLIST_H */
-diff --git a/fs/bcachefs/keylist_types.h b/fs/bcachefs/keylist_types.h
-new file mode 100644
-index 000000000000..4b3ff7d8a875
---- /dev/null
-+++ b/fs/bcachefs/keylist_types.h
-@@ -0,0 +1,16 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_KEYLIST_TYPES_H
-+#define _BCACHEFS_KEYLIST_TYPES_H
-+
-+struct keylist {
-+	union {
-+		struct bkey_i		*keys;
-+		u64			*keys_p;
-+	};
-+	union {
-+		struct bkey_i		*top;
-+		u64			*top_p;
-+	};
-+};
-+
-+#endif /* _BCACHEFS_KEYLIST_TYPES_H */
-diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c
-new file mode 100644
-index 000000000000..96c8690adc5b
---- /dev/null
-+++ b/fs/bcachefs/migrate.c
-@@ -0,0 +1,170 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Code for moving data off a device.
-+ */
-+
-+#include "bcachefs.h"
-+#include "bkey_on_stack.h"
-+#include "btree_update.h"
-+#include "btree_update_interior.h"
-+#include "buckets.h"
-+#include "extents.h"
-+#include "io.h"
-+#include "journal.h"
-+#include "keylist.h"
-+#include "migrate.h"
-+#include "move.h"
-+#include "replicas.h"
-+#include "super-io.h"
-+
-+static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s k,
-+			 unsigned dev_idx, int flags, bool metadata)
-+{
-+	unsigned replicas = metadata ? c->opts.metadata_replicas : c->opts.data_replicas;
-+	unsigned lost = metadata ? BCH_FORCE_IF_METADATA_LOST : BCH_FORCE_IF_DATA_LOST;
-+	unsigned degraded = metadata ? BCH_FORCE_IF_METADATA_DEGRADED : BCH_FORCE_IF_DATA_DEGRADED;
-+	unsigned nr_good;
-+
-+	bch2_bkey_drop_device(k, dev_idx);
-+
-+	nr_good = bch2_bkey_durability(c, k.s_c);
-+	if ((!nr_good && !(flags & lost)) ||
-+	    (nr_good < replicas && !(flags & degraded)))
-+		return -EINVAL;
-+
-+	return 0;
-+}
-+
-+static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags,
-+				   enum btree_id btree_id)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct bkey_on_stack sk;
-+	int ret = 0;
-+
-+	bkey_on_stack_init(&sk);
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, btree_id, POS_MIN,
-+				   BTREE_ITER_PREFETCH);
-+
-+	while ((k = bch2_btree_iter_peek(iter)).k &&
-+	       !(ret = bkey_err(k))) {
-+		if (!bch2_bkey_has_device(k, dev_idx)) {
-+			bch2_btree_iter_next(iter);
-+			continue;
-+		}
-+
-+		bkey_on_stack_reassemble(&sk, c, k);
-+
-+		ret = drop_dev_ptrs(c, bkey_i_to_s(sk.k),
-+				    dev_idx, flags, false);
-+		if (ret)
-+			break;
-+
-+		/*
-+		 * If the new extent no longer has any pointers, bch2_extent_normalize()
-+		 * will do the appropriate thing with it (turning it into a
-+		 * KEY_TYPE_error key, or just a discard if it was a cached extent)
-+		 */
-+		bch2_extent_normalize(c, bkey_i_to_s(sk.k));
-+
-+		bch2_btree_iter_set_pos(iter, bkey_start_pos(&sk.k->k));
-+
-+		bch2_trans_update(&trans, iter, sk.k, 0);
-+
-+		ret = bch2_trans_commit(&trans, NULL, NULL,
-+					BTREE_INSERT_NOFAIL);
-+
-+		/*
-+		 * don't want to leave ret == -EINTR, since if we raced and
-+		 * something else overwrote the key we could spuriously return
-+		 * -EINTR below:
-+		 */
-+		if (ret == -EINTR)
-+			ret = 0;
-+		if (ret)
-+			break;
-+	}
-+
-+	ret = bch2_trans_exit(&trans) ?: ret;
-+	bkey_on_stack_exit(&sk, c);
-+
-+	BUG_ON(ret == -EINTR);
-+
-+	return ret;
-+}
-+
-+static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
-+{
-+	return  __bch2_dev_usrdata_drop(c, dev_idx, flags, BTREE_ID_EXTENTS) ?:
-+		__bch2_dev_usrdata_drop(c, dev_idx, flags, BTREE_ID_REFLINK);
-+}
-+
-+static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct closure cl;
-+	struct btree *b;
-+	unsigned id;
-+	int ret;
-+
-+	/* don't handle this yet: */
-+	if (flags & BCH_FORCE_IF_METADATA_LOST)
-+		return -EINVAL;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+	closure_init_stack(&cl);
-+
-+	for (id = 0; id < BTREE_ID_NR; id++) {
-+		for_each_btree_node(&trans, iter, id, POS_MIN,
-+				    BTREE_ITER_PREFETCH, b) {
-+			__BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp;
-+retry:
-+			if (!bch2_bkey_has_device(bkey_i_to_s_c(&b->key),
-+						  dev_idx))
-+				continue;
-+
-+			bkey_copy(&tmp.k, &b->key);
-+
-+			ret = drop_dev_ptrs(c, bkey_i_to_s(&tmp.k),
-+					    dev_idx, flags, true);
-+			if (ret) {
-+				bch_err(c, "Cannot drop device without losing data");
-+				goto err;
-+			}
-+
-+			ret = bch2_btree_node_update_key(c, iter, b, &tmp.k);
-+			if (ret == -EINTR) {
-+				b = bch2_btree_iter_peek_node(iter);
-+				goto retry;
-+			}
-+			if (ret) {
-+				bch_err(c, "Error updating btree node key: %i", ret);
-+				goto err;
-+			}
-+		}
-+		bch2_trans_iter_free(&trans, iter);
-+	}
-+
-+	/* flush relevant btree updates */
-+	closure_wait_event(&c->btree_interior_update_wait,
-+			   !bch2_btree_interior_updates_nr_pending(c));
-+
-+	ret = 0;
-+err:
-+	ret = bch2_trans_exit(&trans) ?: ret;
-+
-+	BUG_ON(ret == -EINTR);
-+
-+	return ret;
-+}
-+
-+int bch2_dev_data_drop(struct bch_fs *c, unsigned dev_idx, int flags)
-+{
-+	return bch2_dev_usrdata_drop(c, dev_idx, flags) ?:
-+		bch2_dev_metadata_drop(c, dev_idx, flags);
-+}
-diff --git a/fs/bcachefs/migrate.h b/fs/bcachefs/migrate.h
-new file mode 100644
-index 000000000000..027efaa0d575
---- /dev/null
-+++ b/fs/bcachefs/migrate.h
-@@ -0,0 +1,7 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_MIGRATE_H
-+#define _BCACHEFS_MIGRATE_H
-+
-+int bch2_dev_data_drop(struct bch_fs *, unsigned, int);
-+
-+#endif /* _BCACHEFS_MIGRATE_H */
-diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
-new file mode 100644
-index 000000000000..62dcac79ed06
---- /dev/null
-+++ b/fs/bcachefs/move.c
-@@ -0,0 +1,826 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "alloc_foreground.h"
-+#include "bkey_on_stack.h"
-+#include "btree_gc.h"
-+#include "btree_update.h"
-+#include "btree_update_interior.h"
-+#include "buckets.h"
-+#include "disk_groups.h"
-+#include "inode.h"
-+#include "io.h"
-+#include "journal_reclaim.h"
-+#include "move.h"
-+#include "replicas.h"
-+#include "super-io.h"
-+#include "keylist.h"
-+
-+#include <linux/ioprio.h>
-+#include <linux/kthread.h>
-+
-+#include <trace/events/bcachefs.h>
-+
-+#define SECTORS_IN_FLIGHT_PER_DEVICE	2048
-+
-+struct moving_io {
-+	struct list_head	list;
-+	struct closure		cl;
-+	bool			read_completed;
-+
-+	unsigned		read_sectors;
-+	unsigned		write_sectors;
-+
-+	struct bch_read_bio	rbio;
-+
-+	struct migrate_write	write;
-+	/* Must be last since it is variable size */
-+	struct bio_vec		bi_inline_vecs[0];
-+};
-+
-+struct moving_context {
-+	/* Closure for waiting on all reads and writes to complete */
-+	struct closure		cl;
-+
-+	struct bch_move_stats	*stats;
-+
-+	struct list_head	reads;
-+
-+	/* in flight sectors: */
-+	atomic_t		read_sectors;
-+	atomic_t		write_sectors;
-+
-+	wait_queue_head_t	wait;
-+};
-+
-+static int bch2_migrate_index_update(struct bch_write_op *op)
-+{
-+	struct bch_fs *c = op->c;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct migrate_write *m =
-+		container_of(op, struct migrate_write, op);
-+	struct keylist *keys = &op->insert_keys;
-+	int ret = 0;
-+
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, m->btree_id,
-+				   bkey_start_pos(&bch2_keylist_front(keys)->k),
-+				   BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
-+
-+	while (1) {
-+		struct bkey_s_c k;
-+		struct bkey_i *insert;
-+		struct bkey_i_extent *new;
-+		BKEY_PADDED(k) _new, _insert;
-+		const union bch_extent_entry *entry;
-+		struct extent_ptr_decoded p;
-+		bool did_work = false;
-+		int nr;
-+
-+		bch2_trans_reset(&trans, 0);
-+
-+		k = bch2_btree_iter_peek_slot(iter);
-+		ret = bkey_err(k);
-+		if (ret) {
-+			if (ret == -EINTR)
-+				continue;
-+			break;
-+		}
-+
-+		new = bkey_i_to_extent(bch2_keylist_front(keys));
-+
-+		if (bversion_cmp(k.k->version, new->k.version) ||
-+		    !bch2_bkey_matches_ptr(c, k, m->ptr, m->offset))
-+			goto nomatch;
-+
-+		bkey_reassemble(&_insert.k, k);
-+		insert = &_insert.k;
-+
-+		bkey_copy(&_new.k, bch2_keylist_front(keys));
-+		new = bkey_i_to_extent(&_new.k);
-+		bch2_cut_front(iter->pos, &new->k_i);
-+
-+		bch2_cut_front(iter->pos,	insert);
-+		bch2_cut_back(new->k.p,		insert);
-+		bch2_cut_back(insert->k.p,	&new->k_i);
-+
-+		if (m->data_cmd == DATA_REWRITE) {
-+			struct bch_extent_ptr *new_ptr, *old_ptr = (void *)
-+				bch2_bkey_has_device(bkey_i_to_s_c(insert),
-+						     m->data_opts.rewrite_dev);
-+			if (!old_ptr)
-+				goto nomatch;
-+
-+			if (old_ptr->cached)
-+				extent_for_each_ptr(extent_i_to_s(new), new_ptr)
-+					new_ptr->cached = true;
-+
-+			bch2_bkey_drop_ptr(bkey_i_to_s(insert), old_ptr);
-+		}
-+
-+		extent_for_each_ptr_decode(extent_i_to_s(new), p, entry) {
-+			if (bch2_bkey_has_device(bkey_i_to_s_c(insert), p.ptr.dev)) {
-+				/*
-+				 * raced with another move op? extent already
-+				 * has a pointer to the device we just wrote
-+				 * data to
-+				 */
-+				continue;
-+			}
-+
-+			bch2_extent_ptr_decoded_append(insert, &p);
-+			did_work = true;
-+		}
-+
-+		if (!did_work)
-+			goto nomatch;
-+
-+		bch2_bkey_narrow_crcs(insert,
-+				(struct bch_extent_crc_unpacked) { 0 });
-+		bch2_extent_normalize(c, bkey_i_to_s(insert));
-+		bch2_bkey_mark_replicas_cached(c, bkey_i_to_s(insert),
-+					       op->opts.background_target,
-+					       op->opts.data_replicas);
-+
-+		/*
-+		 * If we're not fully overwriting @k, and it's compressed, we
-+		 * need a reservation for all the pointers in @insert
-+		 */
-+		nr = bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(insert)) -
-+			 m->nr_ptrs_reserved;
-+
-+		if (insert->k.size < k.k->size &&
-+		    bch2_bkey_sectors_compressed(k) &&
-+		    nr > 0) {
-+			ret = bch2_disk_reservation_add(c, &op->res,
-+					keylist_sectors(keys) * nr, 0);
-+			if (ret)
-+				goto out;
-+
-+			m->nr_ptrs_reserved += nr;
-+			goto next;
-+		}
-+
-+		bch2_trans_update(&trans, iter, insert, 0);
-+
-+		ret = bch2_trans_commit(&trans, &op->res,
-+				op_journal_seq(op),
-+				BTREE_INSERT_NOFAIL|
-+				BTREE_INSERT_USE_RESERVE|
-+				m->data_opts.btree_insert_flags);
-+		if (!ret)
-+			atomic_long_inc(&c->extent_migrate_done);
-+		if (ret == -EINTR)
-+			ret = 0;
-+		if (ret)
-+			break;
-+next:
-+		while (bkey_cmp(iter->pos, bch2_keylist_front(keys)->k.p) >= 0) {
-+			bch2_keylist_pop_front(keys);
-+			if (bch2_keylist_empty(keys))
-+				goto out;
-+		}
-+		continue;
-+nomatch:
-+		if (m->ctxt) {
-+			BUG_ON(k.k->p.offset <= iter->pos.offset);
-+			atomic64_inc(&m->ctxt->stats->keys_raced);
-+			atomic64_add(k.k->p.offset - iter->pos.offset,
-+				     &m->ctxt->stats->sectors_raced);
-+		}
-+		atomic_long_inc(&c->extent_migrate_raced);
-+		trace_move_race(&new->k);
-+		bch2_btree_iter_next_slot(iter);
-+		goto next;
-+	}
-+out:
-+	bch2_trans_exit(&trans);
-+	BUG_ON(ret == -EINTR);
-+	return ret;
-+}
-+
-+void bch2_migrate_read_done(struct migrate_write *m, struct bch_read_bio *rbio)
-+{
-+	/* write bio must own pages: */
-+	BUG_ON(!m->op.wbio.bio.bi_vcnt);
-+
-+	m->ptr		= rbio->pick.ptr;
-+	m->offset	= rbio->pos.offset - rbio->pick.crc.offset;
-+	m->op.devs_have	= rbio->devs_have;
-+	m->op.pos	= rbio->pos;
-+	m->op.version	= rbio->version;
-+	m->op.crc	= rbio->pick.crc;
-+	m->op.wbio.bio.bi_iter.bi_size = m->op.crc.compressed_size << 9;
-+
-+	if (bch2_csum_type_is_encryption(m->op.crc.csum_type)) {
-+		m->op.nonce	= m->op.crc.nonce + m->op.crc.offset;
-+		m->op.csum_type = m->op.crc.csum_type;
-+	}
-+
-+	if (m->data_cmd == DATA_REWRITE)
-+		bch2_dev_list_drop_dev(&m->op.devs_have, m->data_opts.rewrite_dev);
-+}
-+
-+int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m,
-+			    struct write_point_specifier wp,
-+			    struct bch_io_opts io_opts,
-+			    enum data_cmd data_cmd,
-+			    struct data_opts data_opts,
-+			    enum btree_id btree_id,
-+			    struct bkey_s_c k)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const union bch_extent_entry *entry;
-+	struct extent_ptr_decoded p;
-+	int ret;
-+
-+	m->btree_id	= btree_id;
-+	m->data_cmd	= data_cmd;
-+	m->data_opts	= data_opts;
-+	m->nr_ptrs_reserved = 0;
-+
-+	bch2_write_op_init(&m->op, c, io_opts);
-+
-+	if (!bch2_bkey_is_incompressible(k))
-+		m->op.compression_type =
-+			bch2_compression_opt_to_type[io_opts.background_compression ?:
-+						     io_opts.compression];
-+	else
-+		m->op.incompressible = true;
-+
-+	m->op.target	= data_opts.target,
-+	m->op.write_point = wp;
-+
-+	if (m->data_opts.btree_insert_flags & BTREE_INSERT_USE_RESERVE) {
-+		m->op.alloc_reserve = RESERVE_MOVINGGC;
-+		m->op.flags |= BCH_WRITE_ALLOC_NOWAIT;
-+	} else {
-+		/* XXX: this should probably be passed in */
-+		m->op.flags |= BCH_WRITE_ONLY_SPECIFIED_DEVS;
-+	}
-+
-+	m->op.flags |= BCH_WRITE_PAGES_STABLE|
-+		BCH_WRITE_PAGES_OWNED|
-+		BCH_WRITE_DATA_ENCODED|
-+		BCH_WRITE_FROM_INTERNAL;
-+
-+	m->op.nr_replicas	= 1;
-+	m->op.nr_replicas_required = 1;
-+	m->op.index_update_fn	= bch2_migrate_index_update;
-+
-+	switch (data_cmd) {
-+	case DATA_ADD_REPLICAS: {
-+		/*
-+		 * DATA_ADD_REPLICAS is used for moving data to a different
-+		 * device in the background, and due to compression the new copy
-+		 * might take up more space than the old copy:
-+		 */
-+#if 0
-+		int nr = (int) io_opts.data_replicas -
-+			bch2_bkey_nr_ptrs_allocated(k);
-+#endif
-+		int nr = (int) io_opts.data_replicas;
-+
-+		if (nr > 0) {
-+			m->op.nr_replicas = m->nr_ptrs_reserved = nr;
-+
-+			ret = bch2_disk_reservation_get(c, &m->op.res,
-+					k.k->size, m->op.nr_replicas, 0);
-+			if (ret)
-+				return ret;
-+		}
-+		break;
-+	}
-+	case DATA_REWRITE: {
-+		unsigned compressed_sectors = 0;
-+
-+		bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
-+			if (p.ptr.dev == data_opts.rewrite_dev &&
-+			    !p.ptr.cached &&
-+			    crc_is_compressed(p.crc))
-+				compressed_sectors += p.crc.compressed_size;
-+
-+		if (compressed_sectors) {
-+			ret = bch2_disk_reservation_add(c, &m->op.res,
-+					k.k->size * m->op.nr_replicas,
-+					BCH_DISK_RESERVATION_NOFAIL);
-+			if (ret)
-+				return ret;
-+		}
-+		break;
-+	}
-+	case DATA_PROMOTE:
-+		m->op.flags	|= BCH_WRITE_ALLOC_NOWAIT;
-+		m->op.flags	|= BCH_WRITE_CACHED;
-+		break;
-+	default:
-+		BUG();
-+	}
-+
-+	return 0;
-+}
-+
-+static void move_free(struct closure *cl)
-+{
-+	struct moving_io *io = container_of(cl, struct moving_io, cl);
-+	struct moving_context *ctxt = io->write.ctxt;
-+	struct bvec_iter_all iter;
-+	struct bio_vec *bv;
-+
-+	bch2_disk_reservation_put(io->write.op.c, &io->write.op.res);
-+
-+	bio_for_each_segment_all(bv, &io->write.op.wbio.bio, iter)
-+		if (bv->bv_page)
-+			__free_page(bv->bv_page);
-+
-+	wake_up(&ctxt->wait);
-+
-+	kfree(io);
-+}
-+
-+static void move_write_done(struct closure *cl)
-+{
-+	struct moving_io *io = container_of(cl, struct moving_io, cl);
-+
-+	atomic_sub(io->write_sectors, &io->write.ctxt->write_sectors);
-+	closure_return_with_destructor(cl, move_free);
-+}
-+
-+static void move_write(struct closure *cl)
-+{
-+	struct moving_io *io = container_of(cl, struct moving_io, cl);
-+
-+	if (unlikely(io->rbio.bio.bi_status || io->rbio.hole)) {
-+		closure_return_with_destructor(cl, move_free);
-+		return;
-+	}
-+
-+	bch2_migrate_read_done(&io->write, &io->rbio);
-+
-+	atomic_add(io->write_sectors, &io->write.ctxt->write_sectors);
-+	closure_call(&io->write.op.cl, bch2_write, NULL, cl);
-+	continue_at(cl, move_write_done, NULL);
-+}
-+
-+static inline struct moving_io *next_pending_write(struct moving_context *ctxt)
-+{
-+	struct moving_io *io =
-+		list_first_entry_or_null(&ctxt->reads, struct moving_io, list);
-+
-+	return io && io->read_completed ? io : NULL;
-+}
-+
-+static void move_read_endio(struct bio *bio)
-+{
-+	struct moving_io *io = container_of(bio, struct moving_io, rbio.bio);
-+	struct moving_context *ctxt = io->write.ctxt;
-+
-+	atomic_sub(io->read_sectors, &ctxt->read_sectors);
-+	io->read_completed = true;
-+
-+	if (next_pending_write(ctxt))
-+		wake_up(&ctxt->wait);
-+
-+	closure_put(&ctxt->cl);
-+}
-+
-+static void do_pending_writes(struct moving_context *ctxt)
-+{
-+	struct moving_io *io;
-+
-+	while ((io = next_pending_write(ctxt))) {
-+		list_del(&io->list);
-+		closure_call(&io->cl, move_write, NULL, &ctxt->cl);
-+	}
-+}
-+
-+#define move_ctxt_wait_event(_ctxt, _cond)			\
-+do {								\
-+	do_pending_writes(_ctxt);				\
-+								\
-+	if (_cond)						\
-+		break;						\
-+	__wait_event((_ctxt)->wait,				\
-+		     next_pending_write(_ctxt) || (_cond));	\
-+} while (1)
-+
-+static void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt)
-+{
-+	unsigned sectors_pending = atomic_read(&ctxt->write_sectors);
-+
-+	move_ctxt_wait_event(ctxt,
-+		!atomic_read(&ctxt->write_sectors) ||
-+		atomic_read(&ctxt->write_sectors) != sectors_pending);
-+}
-+
-+static int bch2_move_extent(struct btree_trans *trans,
-+			    struct moving_context *ctxt,
-+			    struct write_point_specifier wp,
-+			    struct bch_io_opts io_opts,
-+			    enum btree_id btree_id,
-+			    struct bkey_s_c k,
-+			    enum data_cmd data_cmd,
-+			    struct data_opts data_opts)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	struct moving_io *io;
-+	const union bch_extent_entry *entry;
-+	struct extent_ptr_decoded p;
-+	unsigned sectors = k.k->size, pages;
-+	int ret = -ENOMEM;
-+
-+	move_ctxt_wait_event(ctxt,
-+		atomic_read(&ctxt->write_sectors) <
-+		SECTORS_IN_FLIGHT_PER_DEVICE);
-+
-+	move_ctxt_wait_event(ctxt,
-+		atomic_read(&ctxt->read_sectors) <
-+		SECTORS_IN_FLIGHT_PER_DEVICE);
-+
-+	/* write path might have to decompress data: */
-+	bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
-+		sectors = max_t(unsigned, sectors, p.crc.uncompressed_size);
-+
-+	pages = DIV_ROUND_UP(sectors, PAGE_SECTORS);
-+	io = kzalloc(sizeof(struct moving_io) +
-+		     sizeof(struct bio_vec) * pages, GFP_KERNEL);
-+	if (!io)
-+		goto err;
-+
-+	io->write.ctxt		= ctxt;
-+	io->read_sectors	= k.k->size;
-+	io->write_sectors	= k.k->size;
-+
-+	bio_init(&io->write.op.wbio.bio, io->bi_inline_vecs, pages);
-+	bio_set_prio(&io->write.op.wbio.bio,
-+		     IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
-+
-+	if (bch2_bio_alloc_pages(&io->write.op.wbio.bio, sectors << 9,
-+				 GFP_KERNEL))
-+		goto err_free;
-+
-+	io->rbio.c		= c;
-+	io->rbio.opts		= io_opts;
-+	bio_init(&io->rbio.bio, io->bi_inline_vecs, pages);
-+	io->rbio.bio.bi_vcnt = pages;
-+	bio_set_prio(&io->rbio.bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
-+	io->rbio.bio.bi_iter.bi_size = sectors << 9;
-+
-+	bio_set_op_attrs(&io->rbio.bio, REQ_OP_READ, 0);
-+	io->rbio.bio.bi_iter.bi_sector	= bkey_start_offset(k.k);
-+	io->rbio.bio.bi_end_io		= move_read_endio;
-+
-+	ret = bch2_migrate_write_init(c, &io->write, wp, io_opts,
-+				      data_cmd, data_opts, btree_id, k);
-+	if (ret)
-+		goto err_free_pages;
-+
-+	atomic64_inc(&ctxt->stats->keys_moved);
-+	atomic64_add(k.k->size, &ctxt->stats->sectors_moved);
-+
-+	trace_move_extent(k.k);
-+
-+	atomic_add(io->read_sectors, &ctxt->read_sectors);
-+	list_add_tail(&io->list, &ctxt->reads);
-+
-+	/*
-+	 * dropped by move_read_endio() - guards against use after free of
-+	 * ctxt when doing wakeup
-+	 */
-+	closure_get(&ctxt->cl);
-+	bch2_read_extent(trans, &io->rbio, k, 0,
-+			 BCH_READ_NODECODE|
-+			 BCH_READ_LAST_FRAGMENT);
-+	return 0;
-+err_free_pages:
-+	bio_free_pages(&io->write.op.wbio.bio);
-+err_free:
-+	kfree(io);
-+err:
-+	trace_move_alloc_fail(k.k);
-+	return ret;
-+}
-+
-+static int __bch2_move_data(struct bch_fs *c,
-+		struct moving_context *ctxt,
-+		struct bch_ratelimit *rate,
-+		struct write_point_specifier wp,
-+		struct bpos start,
-+		struct bpos end,
-+		move_pred_fn pred, void *arg,
-+		struct bch_move_stats *stats,
-+		enum btree_id btree_id)
-+{
-+	bool kthread = (current->flags & PF_KTHREAD) != 0;
-+	struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
-+	struct bkey_on_stack sk;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct data_opts data_opts;
-+	enum data_cmd data_cmd;
-+	u64 delay, cur_inum = U64_MAX;
-+	int ret = 0, ret2;
-+
-+	bkey_on_stack_init(&sk);
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	stats->data_type = BCH_DATA_user;
-+	stats->btree_id	= btree_id;
-+	stats->pos	= POS_MIN;
-+
-+	iter = bch2_trans_get_iter(&trans, btree_id, start,
-+				   BTREE_ITER_PREFETCH);
-+
-+	if (rate)
-+		bch2_ratelimit_reset(rate);
-+
-+	while (1) {
-+		do {
-+			delay = rate ? bch2_ratelimit_delay(rate) : 0;
-+
-+			if (delay) {
-+				bch2_trans_unlock(&trans);
-+				set_current_state(TASK_INTERRUPTIBLE);
-+			}
-+
-+			if (kthread && (ret = kthread_should_stop())) {
-+				__set_current_state(TASK_RUNNING);
-+				goto out;
-+			}
-+
-+			if (delay)
-+				schedule_timeout(delay);
-+
-+			if (unlikely(freezing(current))) {
-+				bch2_trans_unlock(&trans);
-+				move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads));
-+				try_to_freeze();
-+			}
-+		} while (delay);
-+peek:
-+		k = bch2_btree_iter_peek(iter);
-+
-+		stats->pos = iter->pos;
-+
-+		if (!k.k)
-+			break;
-+		ret = bkey_err(k);
-+		if (ret)
-+			break;
-+		if (bkey_cmp(bkey_start_pos(k.k), end) >= 0)
-+			break;
-+
-+		if (!bkey_extent_is_direct_data(k.k))
-+			goto next_nondata;
-+
-+		if (btree_id == BTREE_ID_EXTENTS &&
-+		    cur_inum != k.k->p.inode) {
-+			struct bch_inode_unpacked inode;
-+
-+			/* don't hold btree locks while looking up inode: */
-+			bch2_trans_unlock(&trans);
-+
-+			io_opts = bch2_opts_to_inode_opts(c->opts);
-+			if (!bch2_inode_find_by_inum(c, k.k->p.inode, &inode))
-+				bch2_io_opts_apply(&io_opts, bch2_inode_opts_get(&inode));
-+			cur_inum = k.k->p.inode;
-+			goto peek;
-+		}
-+
-+		switch ((data_cmd = pred(c, arg, k, &io_opts, &data_opts))) {
-+		case DATA_SKIP:
-+			goto next;
-+		case DATA_SCRUB:
-+			BUG();
-+		case DATA_ADD_REPLICAS:
-+		case DATA_REWRITE:
-+		case DATA_PROMOTE:
-+			break;
-+		default:
-+			BUG();
-+		}
-+
-+		/* unlock before doing IO: */
-+		bkey_on_stack_reassemble(&sk, c, k);
-+		k = bkey_i_to_s_c(sk.k);
-+		bch2_trans_unlock(&trans);
-+
-+		ret2 = bch2_move_extent(&trans, ctxt, wp, io_opts, btree_id, k,
-+					data_cmd, data_opts);
-+		if (ret2) {
-+			if (ret2 == -ENOMEM) {
-+				/* memory allocation failure, wait for some IO to finish */
-+				bch2_move_ctxt_wait_for_io(ctxt);
-+				continue;
-+			}
-+
-+			/* XXX signal failure */
-+			goto next;
-+		}
-+
-+		if (rate)
-+			bch2_ratelimit_increment(rate, k.k->size);
-+next:
-+		atomic64_add(k.k->size * bch2_bkey_nr_ptrs_allocated(k),
-+			     &stats->sectors_seen);
-+next_nondata:
-+		bch2_btree_iter_next(iter);
-+		bch2_trans_cond_resched(&trans);
-+	}
-+out:
-+	ret = bch2_trans_exit(&trans) ?: ret;
-+	bkey_on_stack_exit(&sk, c);
-+
-+	return ret;
-+}
-+
-+int bch2_move_data(struct bch_fs *c,
-+		   struct bch_ratelimit *rate,
-+		   struct write_point_specifier wp,
-+		   struct bpos start,
-+		   struct bpos end,
-+		   move_pred_fn pred, void *arg,
-+		   struct bch_move_stats *stats)
-+{
-+	struct moving_context ctxt = { .stats = stats };
-+	int ret;
-+
-+	closure_init_stack(&ctxt.cl);
-+	INIT_LIST_HEAD(&ctxt.reads);
-+	init_waitqueue_head(&ctxt.wait);
-+
-+	stats->data_type = BCH_DATA_user;
-+
-+	ret =   __bch2_move_data(c, &ctxt, rate, wp, start, end,
-+				 pred, arg, stats, BTREE_ID_EXTENTS) ?:
-+		__bch2_move_data(c, &ctxt, rate, wp, start, end,
-+				 pred, arg, stats, BTREE_ID_REFLINK);
-+
-+	move_ctxt_wait_event(&ctxt, list_empty(&ctxt.reads));
-+	closure_sync(&ctxt.cl);
-+
-+	EBUG_ON(atomic_read(&ctxt.write_sectors));
-+
-+	trace_move_data(c,
-+			atomic64_read(&stats->sectors_moved),
-+			atomic64_read(&stats->keys_moved));
-+
-+	return ret;
-+}
-+
-+static int bch2_move_btree(struct bch_fs *c,
-+			   move_pred_fn pred,
-+			   void *arg,
-+			   struct bch_move_stats *stats)
-+{
-+	struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct btree *b;
-+	unsigned id;
-+	struct data_opts data_opts;
-+	enum data_cmd cmd;
-+	int ret = 0;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	stats->data_type = BCH_DATA_btree;
-+
-+	for (id = 0; id < BTREE_ID_NR; id++) {
-+		stats->btree_id = id;
-+
-+		for_each_btree_node(&trans, iter, id, POS_MIN,
-+				    BTREE_ITER_PREFETCH, b) {
-+			stats->pos = iter->pos;
-+
-+			switch ((cmd = pred(c, arg,
-+					    bkey_i_to_s_c(&b->key),
-+					    &io_opts, &data_opts))) {
-+			case DATA_SKIP:
-+				goto next;
-+			case DATA_SCRUB:
-+				BUG();
-+			case DATA_ADD_REPLICAS:
-+			case DATA_REWRITE:
-+				break;
-+			default:
-+				BUG();
-+			}
-+
-+			ret = bch2_btree_node_rewrite(c, iter,
-+					b->data->keys.seq, 0) ?: ret;
-+next:
-+			bch2_trans_cond_resched(&trans);
-+		}
-+
-+		ret = bch2_trans_iter_free(&trans, iter) ?: ret;
-+	}
-+
-+	bch2_trans_exit(&trans);
-+
-+	return ret;
-+}
-+
-+#if 0
-+static enum data_cmd scrub_pred(struct bch_fs *c, void *arg,
-+				struct bkey_s_c k,
-+				struct bch_io_opts *io_opts,
-+				struct data_opts *data_opts)
-+{
-+	return DATA_SCRUB;
-+}
-+#endif
-+
-+static enum data_cmd rereplicate_pred(struct bch_fs *c, void *arg,
-+				      struct bkey_s_c k,
-+				      struct bch_io_opts *io_opts,
-+				      struct data_opts *data_opts)
-+{
-+	unsigned nr_good = bch2_bkey_durability(c, k);
-+	unsigned replicas = 0;
-+
-+	switch (k.k->type) {
-+	case KEY_TYPE_btree_ptr:
-+		replicas = c->opts.metadata_replicas;
-+		break;
-+	case KEY_TYPE_extent:
-+		replicas = io_opts->data_replicas;
-+		break;
-+	}
-+
-+	if (!nr_good || nr_good >= replicas)
-+		return DATA_SKIP;
-+
-+	data_opts->target		= 0;
-+	data_opts->btree_insert_flags	= 0;
-+	return DATA_ADD_REPLICAS;
-+}
-+
-+static enum data_cmd migrate_pred(struct bch_fs *c, void *arg,
-+				  struct bkey_s_c k,
-+				  struct bch_io_opts *io_opts,
-+				  struct data_opts *data_opts)
-+{
-+	struct bch_ioctl_data *op = arg;
-+
-+	if (!bch2_bkey_has_device(k, op->migrate.dev))
-+		return DATA_SKIP;
-+
-+	data_opts->target		= 0;
-+	data_opts->btree_insert_flags	= 0;
-+	data_opts->rewrite_dev		= op->migrate.dev;
-+	return DATA_REWRITE;
-+}
-+
-+int bch2_data_job(struct bch_fs *c,
-+		  struct bch_move_stats *stats,
-+		  struct bch_ioctl_data op)
-+{
-+	int ret = 0;
-+
-+	switch (op.op) {
-+	case BCH_DATA_OP_REREPLICATE:
-+		stats->data_type = BCH_DATA_journal;
-+		ret = bch2_journal_flush_device_pins(&c->journal, -1);
-+
-+		ret = bch2_move_btree(c, rereplicate_pred, c, stats) ?: ret;
-+
-+		closure_wait_event(&c->btree_interior_update_wait,
-+				   !bch2_btree_interior_updates_nr_pending(c));
-+
-+		ret = bch2_replicas_gc2(c) ?: ret;
-+
-+		ret = bch2_move_data(c, NULL,
-+				     writepoint_hashed((unsigned long) current),
-+				     op.start,
-+				     op.end,
-+				     rereplicate_pred, c, stats) ?: ret;
-+		ret = bch2_replicas_gc2(c) ?: ret;
-+		break;
-+	case BCH_DATA_OP_MIGRATE:
-+		if (op.migrate.dev >= c->sb.nr_devices)
-+			return -EINVAL;
-+
-+		stats->data_type = BCH_DATA_journal;
-+		ret = bch2_journal_flush_device_pins(&c->journal, op.migrate.dev);
-+
-+		ret = bch2_move_btree(c, migrate_pred, &op, stats) ?: ret;
-+		ret = bch2_replicas_gc2(c) ?: ret;
-+
-+		ret = bch2_move_data(c, NULL,
-+				     writepoint_hashed((unsigned long) current),
-+				     op.start,
-+				     op.end,
-+				     migrate_pred, &op, stats) ?: ret;
-+		ret = bch2_replicas_gc2(c) ?: ret;
-+		break;
-+	default:
-+		ret = -EINVAL;
-+	}
-+
-+	return ret;
-+}
-diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h
-new file mode 100644
-index 000000000000..0acd1720d4f8
---- /dev/null
-+++ b/fs/bcachefs/move.h
-@@ -0,0 +1,64 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_MOVE_H
-+#define _BCACHEFS_MOVE_H
-+
-+#include "btree_iter.h"
-+#include "buckets.h"
-+#include "io_types.h"
-+#include "move_types.h"
-+
-+struct bch_read_bio;
-+struct moving_context;
-+
-+enum data_cmd {
-+	DATA_SKIP,
-+	DATA_SCRUB,
-+	DATA_ADD_REPLICAS,
-+	DATA_REWRITE,
-+	DATA_PROMOTE,
-+};
-+
-+struct data_opts {
-+	u16		target;
-+	unsigned	rewrite_dev;
-+	int		btree_insert_flags;
-+};
-+
-+struct migrate_write {
-+	enum btree_id		btree_id;
-+	enum data_cmd		data_cmd;
-+	struct data_opts	data_opts;
-+
-+	unsigned		nr_ptrs_reserved;
-+
-+	struct moving_context	*ctxt;
-+
-+	/* what we read: */
-+	struct bch_extent_ptr	ptr;
-+	u64			offset;
-+
-+	struct bch_write_op	op;
-+};
-+
-+void bch2_migrate_read_done(struct migrate_write *, struct bch_read_bio *);
-+int bch2_migrate_write_init(struct bch_fs *, struct migrate_write *,
-+			    struct write_point_specifier,
-+			    struct bch_io_opts,
-+			    enum data_cmd, struct data_opts,
-+			    enum btree_id, struct bkey_s_c);
-+
-+typedef enum data_cmd (*move_pred_fn)(struct bch_fs *, void *,
-+				struct bkey_s_c,
-+				struct bch_io_opts *, struct data_opts *);
-+
-+int bch2_move_data(struct bch_fs *, struct bch_ratelimit *,
-+		   struct write_point_specifier,
-+		   struct bpos, struct bpos,
-+		   move_pred_fn, void *,
-+		   struct bch_move_stats *);
-+
-+int bch2_data_job(struct bch_fs *,
-+		  struct bch_move_stats *,
-+		  struct bch_ioctl_data);
-+
-+#endif /* _BCACHEFS_MOVE_H */
-diff --git a/fs/bcachefs/move_types.h b/fs/bcachefs/move_types.h
-new file mode 100644
-index 000000000000..fc0de165af9f
---- /dev/null
-+++ b/fs/bcachefs/move_types.h
-@@ -0,0 +1,17 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_MOVE_TYPES_H
-+#define _BCACHEFS_MOVE_TYPES_H
-+
-+struct bch_move_stats {
-+	enum bch_data_type	data_type;
-+	enum btree_id		btree_id;
-+	struct bpos		pos;
-+
-+	atomic64_t		keys_moved;
-+	atomic64_t		keys_raced;
-+	atomic64_t		sectors_moved;
-+	atomic64_t		sectors_seen;
-+	atomic64_t		sectors_raced;
-+};
-+
-+#endif /* _BCACHEFS_MOVE_TYPES_H */
-diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c
-new file mode 100644
-index 000000000000..de0a7974ec9f
---- /dev/null
-+++ b/fs/bcachefs/movinggc.c
-@@ -0,0 +1,359 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Moving/copying garbage collector
-+ *
-+ * Copyright 2012 Google, Inc.
-+ */
-+
-+#include "bcachefs.h"
-+#include "alloc_foreground.h"
-+#include "btree_iter.h"
-+#include "btree_update.h"
-+#include "buckets.h"
-+#include "clock.h"
-+#include "disk_groups.h"
-+#include "error.h"
-+#include "extents.h"
-+#include "eytzinger.h"
-+#include "io.h"
-+#include "keylist.h"
-+#include "move.h"
-+#include "movinggc.h"
-+#include "super-io.h"
-+
-+#include <trace/events/bcachefs.h>
-+#include <linux/freezer.h>
-+#include <linux/kthread.h>
-+#include <linux/math64.h>
-+#include <linux/sched/task.h>
-+#include <linux/sort.h>
-+#include <linux/wait.h>
-+
-+/*
-+ * We can't use the entire copygc reserve in one iteration of copygc: we may
-+ * need the buckets we're freeing up to go back into the copygc reserve to make
-+ * forward progress, but if the copygc reserve is full they'll be available for
-+ * any allocation - and it's possible that in a given iteration, we free up most
-+ * of the buckets we're going to free before we allocate most of the buckets
-+ * we're going to allocate.
-+ *
-+ * If we only use half of the reserve per iteration, then in steady state we'll
-+ * always have room in the reserve for the buckets we're going to need in the
-+ * next iteration:
-+ */
-+#define COPYGC_BUCKETS_PER_ITER(ca)					\
-+	((ca)->free[RESERVE_MOVINGGC].size / 2)
-+
-+static int bucket_offset_cmp(const void *_l, const void *_r, size_t size)
-+{
-+	const struct copygc_heap_entry *l = _l;
-+	const struct copygc_heap_entry *r = _r;
-+
-+	return  cmp_int(l->dev,    r->dev) ?:
-+		cmp_int(l->offset, r->offset);
-+}
-+
-+static int __copygc_pred(struct bch_fs *c, struct bkey_s_c k)
-+{
-+	copygc_heap *h = &c->copygc_heap;
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const struct bch_extent_ptr *ptr;
-+
-+	bkey_for_each_ptr(ptrs, ptr) {
-+		struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-+		struct copygc_heap_entry search = {
-+			.dev = ptr->dev,
-+			.offset = ptr->offset
-+		};
-+
-+		ssize_t i = eytzinger0_find_le(h->data, h->used,
-+					       sizeof(h->data[0]),
-+					       bucket_offset_cmp, &search);
-+#if 0
-+		/* eytzinger search verify code: */
-+		ssize_t j = -1, k;
-+
-+		for (k = 0; k < h->used; k++)
-+			if (h->data[k].offset <= ptr->offset &&
-+			    (j < 0 || h->data[k].offset > h->data[j].offset))
-+				j = k;
-+
-+		BUG_ON(i != j);
-+#endif
-+		if (i >= 0 &&
-+		    ptr->offset < h->data[i].offset + ca->mi.bucket_size &&
-+		    ptr->gen == h->data[i].gen)
-+			return ptr->dev;
-+	}
-+
-+	return -1;
-+}
-+
-+static enum data_cmd copygc_pred(struct bch_fs *c, void *arg,
-+				 struct bkey_s_c k,
-+				 struct bch_io_opts *io_opts,
-+				 struct data_opts *data_opts)
-+{
-+	int dev_idx = __copygc_pred(c, k);
-+	if (dev_idx < 0)
-+		return DATA_SKIP;
-+
-+	data_opts->target		= io_opts->background_target;
-+	data_opts->btree_insert_flags	= BTREE_INSERT_USE_RESERVE;
-+	data_opts->rewrite_dev		= dev_idx;
-+	return DATA_REWRITE;
-+}
-+
-+static bool have_copygc_reserve(struct bch_dev *ca)
-+{
-+	bool ret;
-+
-+	spin_lock(&ca->fs->freelist_lock);
-+	ret = fifo_full(&ca->free[RESERVE_MOVINGGC]) ||
-+		ca->allocator_state != ALLOCATOR_RUNNING;
-+	spin_unlock(&ca->fs->freelist_lock);
-+
-+	return ret;
-+}
-+
-+static inline int fragmentation_cmp(copygc_heap *heap,
-+				   struct copygc_heap_entry l,
-+				   struct copygc_heap_entry r)
-+{
-+	return cmp_int(l.fragmentation, r.fragmentation);
-+}
-+
-+static int bch2_copygc(struct bch_fs *c)
-+{
-+	copygc_heap *h = &c->copygc_heap;
-+	struct copygc_heap_entry e, *i;
-+	struct bucket_array *buckets;
-+	struct bch_move_stats move_stats;
-+	u64 sectors_to_move = 0, sectors_not_moved = 0;
-+	u64 sectors_reserved = 0;
-+	u64 buckets_to_move, buckets_not_moved = 0;
-+	struct bch_dev *ca;
-+	unsigned dev_idx;
-+	size_t b, heap_size = 0;
-+	int ret;
-+
-+	memset(&move_stats, 0, sizeof(move_stats));
-+	/*
-+	 * Find buckets with lowest sector counts, skipping completely
-+	 * empty buckets, by building a maxheap sorted by sector count,
-+	 * and repeatedly replacing the maximum element until all
-+	 * buckets have been visited.
-+	 */
-+	h->used = 0;
-+
-+	for_each_rw_member(ca, c, dev_idx)
-+		heap_size += ca->mi.nbuckets >> 7;
-+
-+	if (h->size < heap_size) {
-+		free_heap(&c->copygc_heap);
-+		if (!init_heap(&c->copygc_heap, heap_size, GFP_KERNEL)) {
-+			bch_err(c, "error allocating copygc heap");
-+			return 0;
-+		}
-+	}
-+
-+	for_each_rw_member(ca, c, dev_idx) {
-+		closure_wait_event(&c->freelist_wait, have_copygc_reserve(ca));
-+
-+		spin_lock(&ca->fs->freelist_lock);
-+		sectors_reserved += fifo_used(&ca->free[RESERVE_MOVINGGC]) * ca->mi.bucket_size;
-+		spin_unlock(&ca->fs->freelist_lock);
-+
-+		down_read(&ca->bucket_lock);
-+		buckets = bucket_array(ca);
-+
-+		for (b = buckets->first_bucket; b < buckets->nbuckets; b++) {
-+			struct bucket_mark m = READ_ONCE(buckets->b[b].mark);
-+			struct copygc_heap_entry e;
-+
-+			if (m.owned_by_allocator ||
-+			    m.data_type != BCH_DATA_user ||
-+			    !bucket_sectors_used(m) ||
-+			    bucket_sectors_used(m) >= ca->mi.bucket_size)
-+				continue;
-+
-+			e = (struct copygc_heap_entry) {
-+				.dev		= dev_idx,
-+				.gen		= m.gen,
-+				.fragmentation	= bucket_sectors_used(m) * (1U << 15)
-+					/ ca->mi.bucket_size,
-+				.sectors	= bucket_sectors_used(m),
-+				.offset		= bucket_to_sector(ca, b),
-+			};
-+			heap_add_or_replace(h, e, -fragmentation_cmp, NULL);
-+		}
-+		up_read(&ca->bucket_lock);
-+	}
-+
-+	if (!sectors_reserved) {
-+		bch2_fs_fatal_error(c, "stuck, ran out of copygc reserve!");
-+		return -1;
-+	}
-+
-+	for (i = h->data; i < h->data + h->used; i++)
-+		sectors_to_move += i->sectors;
-+
-+	while (sectors_to_move > sectors_reserved) {
-+		BUG_ON(!heap_pop(h, e, -fragmentation_cmp, NULL));
-+		sectors_to_move -= e.sectors;
-+	}
-+
-+	buckets_to_move = h->used;
-+
-+	if (!buckets_to_move)
-+		return 0;
-+
-+	eytzinger0_sort(h->data, h->used,
-+			sizeof(h->data[0]),
-+			bucket_offset_cmp, NULL);
-+
-+	ret = bch2_move_data(c, &c->copygc_pd.rate,
-+			     writepoint_ptr(&c->copygc_write_point),
-+			     POS_MIN, POS_MAX,
-+			     copygc_pred, NULL,
-+			     &move_stats);
-+
-+	for_each_rw_member(ca, c, dev_idx) {
-+		down_read(&ca->bucket_lock);
-+		buckets = bucket_array(ca);
-+		for (i = h->data; i < h->data + h->used; i++) {
-+			struct bucket_mark m;
-+			size_t b;
-+
-+			if (i->dev != dev_idx)
-+				continue;
-+
-+			b = sector_to_bucket(ca, i->offset);
-+			m = READ_ONCE(buckets->b[b].mark);
-+
-+			if (i->gen == m.gen &&
-+			    bucket_sectors_used(m)) {
-+				sectors_not_moved += bucket_sectors_used(m);
-+				buckets_not_moved++;
-+			}
-+		}
-+		up_read(&ca->bucket_lock);
-+	}
-+
-+	if (sectors_not_moved && !ret)
-+		bch_warn_ratelimited(c,
-+			"copygc finished but %llu/%llu sectors, %llu/%llu buckets not moved (move stats: moved %llu sectors, raced %llu keys, %llu sectors)",
-+			 sectors_not_moved, sectors_to_move,
-+			 buckets_not_moved, buckets_to_move,
-+			 atomic64_read(&move_stats.sectors_moved),
-+			 atomic64_read(&move_stats.keys_raced),
-+			 atomic64_read(&move_stats.sectors_raced));
-+
-+	trace_copygc(c,
-+		     atomic64_read(&move_stats.sectors_moved), sectors_not_moved,
-+		     buckets_to_move, buckets_not_moved);
-+	return 0;
-+}
-+
-+/*
-+ * Copygc runs when the amount of fragmented data is above some arbitrary
-+ * threshold:
-+ *
-+ * The threshold at the limit - when the device is full - is the amount of space
-+ * we reserved in bch2_recalc_capacity; we can't have more than that amount of
-+ * disk space stranded due to fragmentation and store everything we have
-+ * promised to store.
-+ *
-+ * But we don't want to be running copygc unnecessarily when the device still
-+ * has plenty of free space - rather, we want copygc to smoothly run every so
-+ * often and continually reduce the amount of fragmented space as the device
-+ * fills up. So, we increase the threshold by half the current free space.
-+ */
-+unsigned long bch2_copygc_wait_amount(struct bch_fs *c)
-+{
-+	struct bch_dev *ca;
-+	unsigned dev_idx;
-+	u64 fragmented_allowed = c->copygc_threshold;
-+	u64 fragmented = 0;
-+
-+	for_each_rw_member(ca, c, dev_idx) {
-+		struct bch_dev_usage usage = bch2_dev_usage_read(ca);
-+
-+		fragmented_allowed += ((__dev_buckets_available(ca, usage) *
-+					ca->mi.bucket_size) >> 1);
-+		fragmented += usage.sectors_fragmented;
-+	}
-+
-+	return max_t(s64, 0, fragmented_allowed - fragmented);
-+}
-+
-+static int bch2_copygc_thread(void *arg)
-+{
-+	struct bch_fs *c = arg;
-+	struct io_clock *clock = &c->io_clock[WRITE];
-+	unsigned long last, wait;
-+
-+	set_freezable();
-+
-+	while (!kthread_should_stop()) {
-+		if (kthread_wait_freezable(c->copy_gc_enabled))
-+			break;
-+
-+		last = atomic_long_read(&clock->now);
-+		wait = bch2_copygc_wait_amount(c);
-+
-+		if (wait > clock->max_slop) {
-+			bch2_kthread_io_clock_wait(clock, last + wait,
-+					MAX_SCHEDULE_TIMEOUT);
-+			continue;
-+		}
-+
-+		if (bch2_copygc(c))
-+			break;
-+	}
-+
-+	return 0;
-+}
-+
-+void bch2_copygc_stop(struct bch_fs *c)
-+{
-+	c->copygc_pd.rate.rate = UINT_MAX;
-+	bch2_ratelimit_reset(&c->copygc_pd.rate);
-+
-+	if (c->copygc_thread) {
-+		kthread_stop(c->copygc_thread);
-+		put_task_struct(c->copygc_thread);
-+	}
-+	c->copygc_thread = NULL;
-+}
-+
-+int bch2_copygc_start(struct bch_fs *c)
-+{
-+	struct task_struct *t;
-+
-+	if (c->copygc_thread)
-+		return 0;
-+
-+	if (c->opts.nochanges)
-+		return 0;
-+
-+	if (bch2_fs_init_fault("copygc_start"))
-+		return -ENOMEM;
-+
-+	t = kthread_create(bch2_copygc_thread, c, "bch_copygc");
-+	if (IS_ERR(t))
-+		return PTR_ERR(t);
-+
-+	get_task_struct(t);
-+
-+	c->copygc_thread = t;
-+	wake_up_process(c->copygc_thread);
-+
-+	return 0;
-+}
-+
-+void bch2_fs_copygc_init(struct bch_fs *c)
-+{
-+	bch2_pd_controller_init(&c->copygc_pd);
-+	c->copygc_pd.d_term = 0;
-+}
-diff --git a/fs/bcachefs/movinggc.h b/fs/bcachefs/movinggc.h
-new file mode 100644
-index 000000000000..922738247d03
---- /dev/null
-+++ b/fs/bcachefs/movinggc.h
-@@ -0,0 +1,9 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_MOVINGGC_H
-+#define _BCACHEFS_MOVINGGC_H
-+
-+void bch2_copygc_stop(struct bch_fs *);
-+int bch2_copygc_start(struct bch_fs *);
-+void bch2_fs_copygc_init(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_MOVINGGC_H */
-diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c
-new file mode 100644
-index 000000000000..afe25cd26c06
---- /dev/null
-+++ b/fs/bcachefs/opts.c
-@@ -0,0 +1,437 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include <linux/kernel.h>
-+
-+#include "bcachefs.h"
-+#include "compress.h"
-+#include "disk_groups.h"
-+#include "opts.h"
-+#include "super-io.h"
-+#include "util.h"
-+
-+const char * const bch2_error_actions[] = {
-+	"continue",
-+	"remount-ro",
-+	"panic",
-+	NULL
-+};
-+
-+const char * const bch2_sb_features[] = {
-+#define x(f, n) #f,
-+	BCH_SB_FEATURES()
-+#undef x
-+	NULL
-+};
-+
-+const char * const bch2_csum_opts[] = {
-+	"none",
-+	"crc32c",
-+	"crc64",
-+	NULL
-+};
-+
-+const char * const bch2_compression_opts[] = {
-+#define x(t, n) #t,
-+	BCH_COMPRESSION_OPTS()
-+#undef x
-+	NULL
-+};
-+
-+const char * const bch2_str_hash_types[] = {
-+	"crc32c",
-+	"crc64",
-+	"siphash",
-+	NULL
-+};
-+
-+const char * const bch2_data_types[] = {
-+#define x(t, n) #t,
-+	BCH_DATA_TYPES()
-+#undef x
-+	NULL
-+};
-+
-+const char * const bch2_cache_replacement_policies[] = {
-+	"lru",
-+	"fifo",
-+	"random",
-+	NULL
-+};
-+
-+/* Default is -1; we skip past it for struct cached_dev's cache mode */
-+const char * const bch2_cache_modes[] = {
-+	"default",
-+	"writethrough",
-+	"writeback",
-+	"writearound",
-+	"none",
-+	NULL
-+};
-+
-+const char * const bch2_dev_state[] = {
-+	"readwrite",
-+	"readonly",
-+	"failed",
-+	"spare",
-+	NULL
-+};
-+
-+void bch2_opts_apply(struct bch_opts *dst, struct bch_opts src)
-+{
-+#define x(_name, ...)						\
-+	if (opt_defined(src, _name))					\
-+		opt_set(*dst, _name, src._name);
-+
-+	BCH_OPTS()
-+#undef x
-+}
-+
-+bool bch2_opt_defined_by_id(const struct bch_opts *opts, enum bch_opt_id id)
-+{
-+	switch (id) {
-+#define x(_name, ...)						\
-+	case Opt_##_name:						\
-+		return opt_defined(*opts, _name);
-+	BCH_OPTS()
-+#undef x
-+	default:
-+		BUG();
-+	}
-+}
-+
-+u64 bch2_opt_get_by_id(const struct bch_opts *opts, enum bch_opt_id id)
-+{
-+	switch (id) {
-+#define x(_name, ...)						\
-+	case Opt_##_name:						\
-+		return opts->_name;
-+	BCH_OPTS()
-+#undef x
-+	default:
-+		BUG();
-+	}
-+}
-+
-+void bch2_opt_set_by_id(struct bch_opts *opts, enum bch_opt_id id, u64 v)
-+{
-+	switch (id) {
-+#define x(_name, ...)						\
-+	case Opt_##_name:						\
-+		opt_set(*opts, _name, v);				\
-+		break;
-+	BCH_OPTS()
-+#undef x
-+	default:
-+		BUG();
-+	}
-+}
-+
-+/*
-+ * Initial options from superblock - here we don't want any options undefined,
-+ * any options the superblock doesn't specify are set to 0:
-+ */
-+struct bch_opts bch2_opts_from_sb(struct bch_sb *sb)
-+{
-+	struct bch_opts opts = bch2_opts_empty();
-+
-+#define x(_name, _bits, _mode, _type, _sb_opt, ...)			\
-+	if (_sb_opt != NO_SB_OPT)					\
-+		opt_set(opts, _name, _sb_opt(sb));
-+	BCH_OPTS()
-+#undef x
-+
-+	return opts;
-+}
-+
-+const struct bch_option bch2_opt_table[] = {
-+#define OPT_BOOL()		.type = BCH_OPT_BOOL
-+#define OPT_UINT(_min, _max)	.type = BCH_OPT_UINT, .min = _min, .max = _max
-+#define OPT_SECTORS(_min, _max)	.type = BCH_OPT_SECTORS, .min = _min, .max = _max
-+#define OPT_STR(_choices)	.type = BCH_OPT_STR, .choices = _choices
-+#define OPT_FN(_fn)		.type = BCH_OPT_FN,			\
-+				.parse = _fn##_parse,			\
-+				.to_text = _fn##_to_text
-+
-+#define x(_name, _bits, _mode, _type, _sb_opt, _default, _hint, _help)	\
-+	[Opt_##_name] = {						\
-+		.attr	= {						\
-+			.name	= #_name,				\
-+			.mode = (_mode) & OPT_RUNTIME ? 0644 : 0444,	\
-+		},							\
-+		.mode	= _mode,					\
-+		.hint	= _hint,					\
-+		.help	= _help,					\
-+		.set_sb	= SET_##_sb_opt,				\
-+		_type							\
-+	},
-+
-+	BCH_OPTS()
-+#undef x
-+};
-+
-+int bch2_opt_lookup(const char *name)
-+{
-+	const struct bch_option *i;
-+
-+	for (i = bch2_opt_table;
-+	     i < bch2_opt_table + ARRAY_SIZE(bch2_opt_table);
-+	     i++)
-+		if (!strcmp(name, i->attr.name))
-+			return i - bch2_opt_table;
-+
-+	return -1;
-+}
-+
-+struct synonym {
-+	const char	*s1, *s2;
-+};
-+
-+static const struct synonym bch_opt_synonyms[] = {
-+	{ "quota",	"usrquota" },
-+};
-+
-+static int bch2_mount_opt_lookup(const char *name)
-+{
-+	const struct synonym *i;
-+
-+	for (i = bch_opt_synonyms;
-+	     i < bch_opt_synonyms + ARRAY_SIZE(bch_opt_synonyms);
-+	     i++)
-+		if (!strcmp(name, i->s1))
-+			name = i->s2;
-+
-+	return bch2_opt_lookup(name);
-+}
-+
-+int bch2_opt_parse(struct bch_fs *c, const struct bch_option *opt,
-+		   const char *val, u64 *res)
-+{
-+	ssize_t ret;
-+
-+	switch (opt->type) {
-+	case BCH_OPT_BOOL:
-+		ret = kstrtou64(val, 10, res);
-+		if (ret < 0)
-+			return ret;
-+
-+		if (*res > 1)
-+			return -ERANGE;
-+		break;
-+	case BCH_OPT_UINT:
-+		ret = kstrtou64(val, 10, res);
-+		if (ret < 0)
-+			return ret;
-+
-+		if (*res < opt->min || *res >= opt->max)
-+			return -ERANGE;
-+		break;
-+	case BCH_OPT_SECTORS:
-+		ret = bch2_strtou64_h(val, res);
-+		if (ret < 0)
-+			return ret;
-+
-+		if (*res & 511)
-+			return -EINVAL;
-+
-+		*res >>= 9;
-+
-+		if (*res < opt->min || *res >= opt->max)
-+			return -ERANGE;
-+		break;
-+	case BCH_OPT_STR:
-+		ret = match_string(opt->choices, -1, val);
-+		if (ret < 0)
-+			return ret;
-+
-+		*res = ret;
-+		break;
-+	case BCH_OPT_FN:
-+		if (!c)
-+			return -EINVAL;
-+
-+		return opt->parse(c, val, res);
-+	}
-+
-+	return 0;
-+}
-+
-+void bch2_opt_to_text(struct printbuf *out, struct bch_fs *c,
-+		      const struct bch_option *opt, u64 v,
-+		      unsigned flags)
-+{
-+	if (flags & OPT_SHOW_MOUNT_STYLE) {
-+		if (opt->type == BCH_OPT_BOOL) {
-+			pr_buf(out, "%s%s",
-+			       v ? "" : "no",
-+			       opt->attr.name);
-+			return;
-+		}
-+
-+		pr_buf(out, "%s=", opt->attr.name);
-+	}
-+
-+	switch (opt->type) {
-+	case BCH_OPT_BOOL:
-+	case BCH_OPT_UINT:
-+		pr_buf(out, "%lli", v);
-+		break;
-+	case BCH_OPT_SECTORS:
-+		bch2_hprint(out, v);
-+		break;
-+	case BCH_OPT_STR:
-+		if (flags & OPT_SHOW_FULL_LIST)
-+			bch2_string_opt_to_text(out, opt->choices, v);
-+		else
-+			pr_buf(out, opt->choices[v]);
-+		break;
-+	case BCH_OPT_FN:
-+		opt->to_text(out, c, v);
-+		break;
-+	default:
-+		BUG();
-+	}
-+}
-+
-+int bch2_opt_check_may_set(struct bch_fs *c, int id, u64 v)
-+{
-+	int ret = 0;
-+
-+	switch (id) {
-+	case Opt_compression:
-+	case Opt_background_compression:
-+		ret = bch2_check_set_has_compressed_data(c, v);
-+		break;
-+	case Opt_erasure_code:
-+		if (v)
-+			bch2_check_set_feature(c, BCH_FEATURE_ec);
-+		break;
-+	}
-+
-+	return ret;
-+}
-+
-+int bch2_opts_check_may_set(struct bch_fs *c)
-+{
-+	unsigned i;
-+	int ret;
-+
-+	for (i = 0; i < bch2_opts_nr; i++) {
-+		ret = bch2_opt_check_may_set(c, i,
-+				bch2_opt_get_by_id(&c->opts, i));
-+		if (ret)
-+			return ret;
-+	}
-+
-+	return 0;
-+}
-+
-+int bch2_parse_mount_opts(struct bch_opts *opts, char *options)
-+{
-+	char *opt, *name, *val;
-+	int ret, id;
-+	u64 v;
-+
-+	while ((opt = strsep(&options, ",")) != NULL) {
-+		name	= strsep(&opt, "=");
-+		val	= opt;
-+
-+		if (val) {
-+			id = bch2_mount_opt_lookup(name);
-+			if (id < 0)
-+				goto bad_opt;
-+
-+			ret = bch2_opt_parse(NULL, &bch2_opt_table[id], val, &v);
-+			if (ret < 0)
-+				goto bad_val;
-+		} else {
-+			id = bch2_mount_opt_lookup(name);
-+			v = 1;
-+
-+			if (id < 0 &&
-+			    !strncmp("no", name, 2)) {
-+				id = bch2_mount_opt_lookup(name + 2);
-+				v = 0;
-+			}
-+
-+			if (id < 0)
-+				goto bad_opt;
-+
-+			if (bch2_opt_table[id].type != BCH_OPT_BOOL)
-+				goto no_val;
-+		}
-+
-+		if (!(bch2_opt_table[id].mode & OPT_MOUNT))
-+			goto bad_opt;
-+
-+		if (id == Opt_acl &&
-+		    !IS_ENABLED(CONFIG_BCACHEFS_POSIX_ACL))
-+			goto bad_opt;
-+
-+		if ((id == Opt_usrquota ||
-+		     id == Opt_grpquota) &&
-+		    !IS_ENABLED(CONFIG_BCACHEFS_QUOTA))
-+			goto bad_opt;
-+
-+		bch2_opt_set_by_id(opts, id, v);
-+	}
-+
-+	return 0;
-+bad_opt:
-+	pr_err("Bad mount option %s", name);
-+	return -1;
-+bad_val:
-+	pr_err("Invalid value %s for mount option %s", val, name);
-+	return -1;
-+no_val:
-+	pr_err("Mount option %s requires a value", name);
-+	return -1;
-+}
-+
-+/* io opts: */
-+
-+struct bch_io_opts bch2_opts_to_inode_opts(struct bch_opts src)
-+{
-+	struct bch_io_opts ret = { 0 };
-+#define x(_name, _bits)					\
-+	if (opt_defined(src, _name))					\
-+		opt_set(ret, _name, src._name);
-+	BCH_INODE_OPTS()
-+#undef x
-+	return ret;
-+}
-+
-+struct bch_opts bch2_inode_opts_to_opts(struct bch_io_opts src)
-+{
-+	struct bch_opts ret = { 0 };
-+#define x(_name, _bits)					\
-+	if (opt_defined(src, _name))					\
-+		opt_set(ret, _name, src._name);
-+	BCH_INODE_OPTS()
-+#undef x
-+	return ret;
-+}
-+
-+void bch2_io_opts_apply(struct bch_io_opts *dst, struct bch_io_opts src)
-+{
-+#define x(_name, _bits)					\
-+	if (opt_defined(src, _name))					\
-+		opt_set(*dst, _name, src._name);
-+	BCH_INODE_OPTS()
-+#undef x
-+}
-+
-+bool bch2_opt_is_inode_opt(enum bch_opt_id id)
-+{
-+	static const enum bch_opt_id inode_opt_list[] = {
-+#define x(_name, _bits)	Opt_##_name,
-+	BCH_INODE_OPTS()
-+#undef x
-+	};
-+	unsigned i;
-+
-+	for (i = 0; i < ARRAY_SIZE(inode_opt_list); i++)
-+		if (inode_opt_list[i] == id)
-+			return true;
-+
-+	return false;
-+}
-diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h
-new file mode 100644
-index 000000000000..014c608ca0c6
---- /dev/null
-+++ b/fs/bcachefs/opts.h
-@@ -0,0 +1,440 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_OPTS_H
-+#define _BCACHEFS_OPTS_H
-+
-+#include <linux/bug.h>
-+#include <linux/log2.h>
-+#include <linux/string.h>
-+#include <linux/sysfs.h>
-+#include "bcachefs_format.h"
-+
-+extern const char * const bch2_error_actions[];
-+extern const char * const bch2_sb_features[];
-+extern const char * const bch2_csum_opts[];
-+extern const char * const bch2_compression_opts[];
-+extern const char * const bch2_str_hash_types[];
-+extern const char * const bch2_data_types[];
-+extern const char * const bch2_cache_replacement_policies[];
-+extern const char * const bch2_cache_modes[];
-+extern const char * const bch2_dev_state[];
-+
-+/*
-+ * Mount options; we also store defaults in the superblock.
-+ *
-+ * Also exposed via sysfs: if an option is writeable, and it's also stored in
-+ * the superblock, changing it via sysfs (currently? might change this) also
-+ * updates the superblock.
-+ *
-+ * We store options as signed integers, where -1 means undefined. This means we
-+ * can pass the mount options to bch2_fs_alloc() as a whole struct, and then only
-+ * apply the options from that struct that are defined.
-+ */
-+
-+/* dummy option, for options that aren't stored in the superblock */
-+LE64_BITMASK(NO_SB_OPT,		struct bch_sb, flags[0], 0, 0);
-+
-+/* When can be set: */
-+enum opt_mode {
-+	OPT_FORMAT	= (1 << 0),
-+	OPT_MOUNT	= (1 << 1),
-+	OPT_RUNTIME	= (1 << 2),
-+	OPT_INODE	= (1 << 3),
-+	OPT_DEVICE	= (1 << 4),
-+};
-+
-+enum opt_type {
-+	BCH_OPT_BOOL,
-+	BCH_OPT_UINT,
-+	BCH_OPT_SECTORS,
-+	BCH_OPT_STR,
-+	BCH_OPT_FN,
-+};
-+
-+/**
-+ * x(name, shortopt, type, in mem type, mode, sb_opt)
-+ *
-+ * @name	- name of mount option, sysfs attribute, and struct bch_opts
-+ *		  member
-+ *
-+ * @mode	- when opt may be set
-+ *
-+ * @sb_option	- name of corresponding superblock option
-+ *
-+ * @type	- one of OPT_BOOL, OPT_UINT, OPT_STR
-+ */
-+
-+/*
-+ * XXX: add fields for
-+ *  - default value
-+ *  - helptext
-+ */
-+
-+#ifdef __KERNEL__
-+#define RATELIMIT_ERRORS true
-+#else
-+#define RATELIMIT_ERRORS false
-+#endif
-+
-+#define BCH_OPTS()							\
-+	x(block_size,			u16,				\
-+	  OPT_FORMAT,							\
-+	  OPT_SECTORS(1, 128),						\
-+	  BCH_SB_BLOCK_SIZE,		8,				\
-+	  "size",	NULL)						\
-+	x(btree_node_size,		u16,				\
-+	  OPT_FORMAT,							\
-+	  OPT_SECTORS(1, 512),						\
-+	  BCH_SB_BTREE_NODE_SIZE,	512,				\
-+	  "size",	"Btree node size, default 256k")		\
-+	x(errors,			u8,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME,				\
-+	  OPT_STR(bch2_error_actions),					\
-+	  BCH_SB_ERROR_ACTION,		BCH_ON_ERROR_RO,		\
-+	  NULL,		"Action to take on filesystem error")		\
-+	x(metadata_replicas,		u8,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME,				\
-+	  OPT_UINT(1, BCH_REPLICAS_MAX),				\
-+	  BCH_SB_META_REPLICAS_WANT,	1,				\
-+	  "#",		"Number of metadata replicas")			\
-+	x(data_replicas,		u8,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE,			\
-+	  OPT_UINT(1, BCH_REPLICAS_MAX),				\
-+	  BCH_SB_DATA_REPLICAS_WANT,	1,				\
-+	  "#",		"Number of data replicas")			\
-+	x(metadata_replicas_required, u8,				\
-+	  OPT_FORMAT|OPT_MOUNT,						\
-+	  OPT_UINT(1, BCH_REPLICAS_MAX),				\
-+	  BCH_SB_META_REPLICAS_REQ,	1,				\
-+	  "#",		NULL)						\
-+	x(data_replicas_required,	u8,				\
-+	  OPT_FORMAT|OPT_MOUNT,						\
-+	  OPT_UINT(1, BCH_REPLICAS_MAX),				\
-+	  BCH_SB_DATA_REPLICAS_REQ,	1,				\
-+	  "#",		NULL)						\
-+	x(metadata_checksum,		u8,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME,				\
-+	  OPT_STR(bch2_csum_opts),					\
-+	  BCH_SB_META_CSUM_TYPE,	BCH_CSUM_OPT_CRC32C,		\
-+	  NULL,		NULL)						\
-+	x(data_checksum,		u8,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE,			\
-+	  OPT_STR(bch2_csum_opts),					\
-+	  BCH_SB_DATA_CSUM_TYPE,	BCH_CSUM_OPT_CRC32C,		\
-+	  NULL,		NULL)						\
-+	x(compression,			u8,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE,			\
-+	  OPT_STR(bch2_compression_opts),				\
-+	  BCH_SB_COMPRESSION_TYPE,	BCH_COMPRESSION_OPT_none,	\
-+	  NULL,		NULL)						\
-+	x(background_compression,	u8,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE,			\
-+	  OPT_STR(bch2_compression_opts),				\
-+	  BCH_SB_BACKGROUND_COMPRESSION_TYPE,BCH_COMPRESSION_OPT_none,	\
-+	  NULL,		NULL)						\
-+	x(str_hash,			u8,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME,				\
-+	  OPT_STR(bch2_str_hash_types),					\
-+	  BCH_SB_STR_HASH_TYPE,		BCH_STR_HASH_OPT_SIPHASH,	\
-+	  NULL,		"Hash function for directory entries and xattrs")\
-+	x(foreground_target,		u16,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE,			\
-+	  OPT_FN(bch2_opt_target),					\
-+	  BCH_SB_FOREGROUND_TARGET,	0,				\
-+	  "(target)",	"Device or disk group for foreground writes")	\
-+	x(background_target,		u16,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE,			\
-+	  OPT_FN(bch2_opt_target),					\
-+	  BCH_SB_BACKGROUND_TARGET,	0,				\
-+	  "(target)",	"Device or disk group to move data to in the background")\
-+	x(promote_target,		u16,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE,			\
-+	  OPT_FN(bch2_opt_target),					\
-+	  BCH_SB_PROMOTE_TARGET,	0,				\
-+	  "(target)",	"Device or disk group to promote data to on read")\
-+	x(erasure_code,			u16,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE,			\
-+	  OPT_BOOL(),							\
-+	  BCH_SB_ERASURE_CODE,		false,				\
-+	  NULL,		"Enable erasure coding (DO NOT USE YET)")	\
-+	x(inodes_32bit,			u8,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME,				\
-+	  OPT_BOOL(),							\
-+	  BCH_SB_INODE_32BIT,		false,				\
-+	  NULL,		"Constrain inode numbers to 32 bits")		\
-+	x(gc_reserve_percent,		u8,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME,				\
-+	  OPT_UINT(5, 21),						\
-+	  BCH_SB_GC_RESERVE,		8,				\
-+	  "%",		"Percentage of disk space to reserve for copygc")\
-+	x(gc_reserve_bytes,		u64,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME,				\
-+	  OPT_SECTORS(0, U64_MAX),					\
-+	  BCH_SB_GC_RESERVE_BYTES,	0,				\
-+	  "%",		"Amount of disk space to reserve for copygc\n"	\
-+			"Takes precedence over gc_reserve_percent if set")\
-+	x(root_reserve_percent,		u8,				\
-+	  OPT_FORMAT|OPT_MOUNT,						\
-+	  OPT_UINT(0, 100),						\
-+	  BCH_SB_ROOT_RESERVE,		0,				\
-+	  "%",		"Percentage of disk space to reserve for superuser")\
-+	x(wide_macs,			u8,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME,				\
-+	  OPT_BOOL(),							\
-+	  BCH_SB_128_BIT_MACS,		false,				\
-+	  NULL,		"Store full 128 bits of cryptographic MACs, instead of 80")\
-+	x(inline_data,			u8,				\
-+	  OPT_MOUNT|OPT_RUNTIME,					\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Enable inline data extents")			\
-+	x(acl,				u8,				\
-+	  OPT_FORMAT|OPT_MOUNT,						\
-+	  OPT_BOOL(),							\
-+	  BCH_SB_POSIX_ACL,		true,				\
-+	  NULL,		"Enable POSIX acls")				\
-+	x(usrquota,			u8,				\
-+	  OPT_FORMAT|OPT_MOUNT,						\
-+	  OPT_BOOL(),							\
-+	  BCH_SB_USRQUOTA,		false,				\
-+	  NULL,		"Enable user quotas")				\
-+	x(grpquota,			u8,				\
-+	  OPT_FORMAT|OPT_MOUNT,						\
-+	  OPT_BOOL(),							\
-+	  BCH_SB_GRPQUOTA,		false,				\
-+	  NULL,		"Enable group quotas")				\
-+	x(prjquota,			u8,				\
-+	  OPT_FORMAT|OPT_MOUNT,						\
-+	  OPT_BOOL(),							\
-+	  BCH_SB_PRJQUOTA,		false,				\
-+	  NULL,		"Enable project quotas")			\
-+	x(reflink,			u8,				\
-+	  OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME,				\
-+	  OPT_BOOL(),							\
-+	  BCH_SB_REFLINK,		true,				\
-+	  NULL,		"Enable reflink support")			\
-+	x(degraded,			u8,				\
-+	  OPT_MOUNT,							\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Allow mounting in degraded mode")		\
-+	x(discard,			u8,				\
-+	  OPT_MOUNT|OPT_DEVICE,						\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Enable discard/TRIM support")			\
-+	x(verbose,			u8,				\
-+	  OPT_MOUNT,							\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Extra debugging information during mount/recovery")\
-+	x(journal_flush_disabled,	u8,				\
-+	  OPT_MOUNT|OPT_RUNTIME,					\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Disable journal flush on sync/fsync\n"		\
-+			"If enabled, writes can be lost, but only since the\n"\
-+			"last journal write (default 1 second)")	\
-+	x(fsck,				u8,				\
-+	  OPT_MOUNT,							\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Run fsck on mount")				\
-+	x(fix_errors,			u8,				\
-+	  OPT_MOUNT,							\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Fix errors during fsck without asking")	\
-+	x(ratelimit_errors,		u8,				\
-+	  OPT_MOUNT,							\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			RATELIMIT_ERRORS,		\
-+	  NULL,		"Ratelimit error messages during fsck")		\
-+	x(nochanges,			u8,				\
-+	  OPT_MOUNT,							\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Super read only mode - no writes at all will be issued,\n"\
-+			"even if we have to replay the journal")	\
-+	x(norecovery,			u8,				\
-+	  OPT_MOUNT,							\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Don't replay the journal")			\
-+	x(rebuild_replicas,		u8,				\
-+	  OPT_MOUNT,							\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Rebuild the superblock replicas section")	\
-+	x(keep_journal,			u8,				\
-+	  OPT_MOUNT,							\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Don't free journal entries/keys after startup")\
-+	x(read_entire_journal,		u8,				\
-+	  0,								\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Read all journal entries, not just dirty ones")\
-+	x(noexcl,			u8,				\
-+	  OPT_MOUNT,							\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Don't open device in exclusive mode")		\
-+	x(sb,				u64,				\
-+	  OPT_MOUNT,							\
-+	  OPT_UINT(0, S64_MAX),						\
-+	  NO_SB_OPT,			BCH_SB_SECTOR,			\
-+	  "offset",	"Sector offset of superblock")			\
-+	x(read_only,			u8,				\
-+	  0,								\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		NULL)						\
-+	x(nostart,			u8,				\
-+	  0,								\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Don\'t start filesystem, only open devices")	\
-+	x(reconstruct_alloc,		u8,				\
-+	  OPT_MOUNT,							\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Reconstruct alloc btree")			\
-+	x(version_upgrade,		u8,				\
-+	  OPT_MOUNT,							\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		"Set superblock to latest version,\n"		\
-+			"allowing any new features to be used")		\
-+	x(project,			u8,				\
-+	  OPT_INODE,							\
-+	  OPT_BOOL(),							\
-+	  NO_SB_OPT,			false,				\
-+	  NULL,		NULL)						\
-+	x(fs_size,			u64,				\
-+	  OPT_DEVICE,							\
-+	  OPT_SECTORS(0, S64_MAX),					\
-+	  NO_SB_OPT,			0,				\
-+	  "size",	"Size of filesystem on device")			\
-+	x(bucket,			u32,				\
-+	  OPT_DEVICE,							\
-+	  OPT_SECTORS(0, S64_MAX),					\
-+	  NO_SB_OPT,			0,				\
-+	  "size",	"Size of filesystem on device")			\
-+	x(durability,			u8,				\
-+	  OPT_DEVICE,							\
-+	  OPT_UINT(0, BCH_REPLICAS_MAX),				\
-+	  NO_SB_OPT,			1,				\
-+	  "n",		"Data written to this device will be considered\n"\
-+			"to have already been replicated n times")
-+
-+struct bch_opts {
-+#define x(_name, _bits, ...)	unsigned _name##_defined:1;
-+	BCH_OPTS()
-+#undef x
-+
-+#define x(_name, _bits, ...)	_bits	_name;
-+	BCH_OPTS()
-+#undef x
-+};
-+
-+static const struct bch_opts bch2_opts_default = {
-+#define x(_name, _bits, _mode, _type, _sb_opt, _default, ...)		\
-+	._name##_defined = true,					\
-+	._name = _default,						\
-+
-+	BCH_OPTS()
-+#undef x
-+};
-+
-+#define opt_defined(_opts, _name)	((_opts)._name##_defined)
-+
-+#define opt_get(_opts, _name)						\
-+	(opt_defined(_opts, _name) ? (_opts)._name : bch2_opts_default._name)
-+
-+#define opt_set(_opts, _name, _v)					\
-+do {									\
-+	(_opts)._name##_defined = true;					\
-+	(_opts)._name = _v;						\
-+} while (0)
-+
-+static inline struct bch_opts bch2_opts_empty(void)
-+{
-+	return (struct bch_opts) { 0 };
-+}
-+
-+void bch2_opts_apply(struct bch_opts *, struct bch_opts);
-+
-+enum bch_opt_id {
-+#define x(_name, ...)	Opt_##_name,
-+	BCH_OPTS()
-+#undef x
-+	bch2_opts_nr
-+};
-+
-+struct bch_fs;
-+struct printbuf;
-+
-+struct bch_option {
-+	struct attribute	attr;
-+	void			(*set_sb)(struct bch_sb *, u64);
-+	enum opt_mode		mode;
-+	enum opt_type		type;
-+
-+	union {
-+	struct {
-+		u64		min, max;
-+	};
-+	struct {
-+		const char * const *choices;
-+	};
-+	struct {
-+		int (*parse)(struct bch_fs *, const char *, u64 *);
-+		void (*to_text)(struct printbuf *, struct bch_fs *, u64);
-+	};
-+	};
-+
-+	const char		*hint;
-+	const char		*help;
-+
-+};
-+
-+extern const struct bch_option bch2_opt_table[];
-+
-+bool bch2_opt_defined_by_id(const struct bch_opts *, enum bch_opt_id);
-+u64 bch2_opt_get_by_id(const struct bch_opts *, enum bch_opt_id);
-+void bch2_opt_set_by_id(struct bch_opts *, enum bch_opt_id, u64);
-+
-+struct bch_opts bch2_opts_from_sb(struct bch_sb *);
-+
-+int bch2_opt_lookup(const char *);
-+int bch2_opt_parse(struct bch_fs *, const struct bch_option *, const char *, u64 *);
-+
-+#define OPT_SHOW_FULL_LIST	(1 << 0)
-+#define OPT_SHOW_MOUNT_STYLE	(1 << 1)
-+
-+void bch2_opt_to_text(struct printbuf *, struct bch_fs *,
-+		      const struct bch_option *, u64, unsigned);
-+
-+int bch2_opt_check_may_set(struct bch_fs *, int, u64);
-+int bch2_opts_check_may_set(struct bch_fs *);
-+int bch2_parse_mount_opts(struct bch_opts *, char *);
-+
-+/* inode opts: */
-+
-+struct bch_io_opts {
-+#define x(_name, _bits)	unsigned _name##_defined:1;
-+	BCH_INODE_OPTS()
-+#undef x
-+
-+#define x(_name, _bits)	u##_bits _name;
-+	BCH_INODE_OPTS()
-+#undef x
-+};
-+
-+struct bch_io_opts bch2_opts_to_inode_opts(struct bch_opts);
-+struct bch_opts bch2_inode_opts_to_opts(struct bch_io_opts);
-+void bch2_io_opts_apply(struct bch_io_opts *, struct bch_io_opts);
-+bool bch2_opt_is_inode_opt(enum bch_opt_id);
-+
-+#endif /* _BCACHEFS_OPTS_H */
-diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c
-new file mode 100644
-index 000000000000..d3032a46e7f3
---- /dev/null
-+++ b/fs/bcachefs/quota.c
-@@ -0,0 +1,783 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "btree_update.h"
-+#include "inode.h"
-+#include "quota.h"
-+#include "super-io.h"
-+
-+static const char *bch2_sb_validate_quota(struct bch_sb *sb,
-+					  struct bch_sb_field *f)
-+{
-+	struct bch_sb_field_quota *q = field_to_type(f, quota);
-+
-+	if (vstruct_bytes(&q->field) != sizeof(*q))
-+		return "invalid field quota: wrong size";
-+
-+	return NULL;
-+}
-+
-+const struct bch_sb_field_ops bch_sb_field_ops_quota = {
-+	.validate	= bch2_sb_validate_quota,
-+};
-+
-+const char *bch2_quota_invalid(const struct bch_fs *c, struct bkey_s_c k)
-+{
-+	if (k.k->p.inode >= QTYP_NR)
-+		return "invalid quota type";
-+
-+	if (bkey_val_bytes(k.k) != sizeof(struct bch_quota))
-+		return "incorrect value size";
-+
-+	return NULL;
-+}
-+
-+static const char * const bch2_quota_counters[] = {
-+	"space",
-+	"inodes",
-+};
-+
-+void bch2_quota_to_text(struct printbuf *out, struct bch_fs *c,
-+			struct bkey_s_c k)
-+{
-+	struct bkey_s_c_quota dq = bkey_s_c_to_quota(k);
-+	unsigned i;
-+
-+	for (i = 0; i < Q_COUNTERS; i++)
-+		pr_buf(out, "%s hardlimit %llu softlimit %llu",
-+		       bch2_quota_counters[i],
-+		       le64_to_cpu(dq.v->c[i].hardlimit),
-+		       le64_to_cpu(dq.v->c[i].softlimit));
-+}
-+
-+#ifdef CONFIG_BCACHEFS_QUOTA
-+
-+#include <linux/cred.h>
-+#include <linux/fs.h>
-+#include <linux/quota.h>
-+
-+static inline unsigned __next_qtype(unsigned i, unsigned qtypes)
-+{
-+	qtypes >>= i;
-+	return qtypes ? i + __ffs(qtypes) : QTYP_NR;
-+}
-+
-+#define for_each_set_qtype(_c, _i, _q, _qtypes)				\
-+	for (_i = 0;							\
-+	     (_i = __next_qtype(_i, _qtypes),				\
-+	      _q = &(_c)->quotas[_i],					\
-+	      _i < QTYP_NR);						\
-+	     _i++)
-+
-+static bool ignore_hardlimit(struct bch_memquota_type *q)
-+{
-+	if (capable(CAP_SYS_RESOURCE))
-+		return true;
-+#if 0
-+	struct mem_dqinfo *info = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_id.type];
-+
-+	return capable(CAP_SYS_RESOURCE) &&
-+	       (info->dqi_format->qf_fmt_id != QFMT_VFS_OLD ||
-+		!(info->dqi_flags & DQF_ROOT_SQUASH));
-+#endif
-+	return false;
-+}
-+
-+enum quota_msg {
-+	SOFTWARN,	/* Softlimit reached */
-+	SOFTLONGWARN,	/* Grace time expired */
-+	HARDWARN,	/* Hardlimit reached */
-+
-+	HARDBELOW,	/* Usage got below inode hardlimit */
-+	SOFTBELOW,	/* Usage got below inode softlimit */
-+};
-+
-+static int quota_nl[][Q_COUNTERS] = {
-+	[HARDWARN][Q_SPC]	= QUOTA_NL_BHARDWARN,
-+	[SOFTLONGWARN][Q_SPC]	= QUOTA_NL_BSOFTLONGWARN,
-+	[SOFTWARN][Q_SPC]	= QUOTA_NL_BSOFTWARN,
-+	[HARDBELOW][Q_SPC]	= QUOTA_NL_BHARDBELOW,
-+	[SOFTBELOW][Q_SPC]	= QUOTA_NL_BSOFTBELOW,
-+
-+	[HARDWARN][Q_INO]	= QUOTA_NL_IHARDWARN,
-+	[SOFTLONGWARN][Q_INO]	= QUOTA_NL_ISOFTLONGWARN,
-+	[SOFTWARN][Q_INO]	= QUOTA_NL_ISOFTWARN,
-+	[HARDBELOW][Q_INO]	= QUOTA_NL_IHARDBELOW,
-+	[SOFTBELOW][Q_INO]	= QUOTA_NL_ISOFTBELOW,
-+};
-+
-+struct quota_msgs {
-+	u8		nr;
-+	struct {
-+		u8	qtype;
-+		u8	msg;
-+	}		m[QTYP_NR * Q_COUNTERS];
-+};
-+
-+static void prepare_msg(unsigned qtype,
-+			enum quota_counters counter,
-+			struct quota_msgs *msgs,
-+			enum quota_msg msg_type)
-+{
-+	BUG_ON(msgs->nr >= ARRAY_SIZE(msgs->m));
-+
-+	msgs->m[msgs->nr].qtype	= qtype;
-+	msgs->m[msgs->nr].msg	= quota_nl[msg_type][counter];
-+	msgs->nr++;
-+}
-+
-+static void prepare_warning(struct memquota_counter *qc,
-+			    unsigned qtype,
-+			    enum quota_counters counter,
-+			    struct quota_msgs *msgs,
-+			    enum quota_msg msg_type)
-+{
-+	if (qc->warning_issued & (1 << msg_type))
-+		return;
-+
-+	prepare_msg(qtype, counter, msgs, msg_type);
-+}
-+
-+static void flush_warnings(struct bch_qid qid,
-+			   struct super_block *sb,
-+			   struct quota_msgs *msgs)
-+{
-+	unsigned i;
-+
-+	for (i = 0; i < msgs->nr; i++)
-+		quota_send_warning(make_kqid(&init_user_ns, msgs->m[i].qtype, qid.q[i]),
-+				   sb->s_dev, msgs->m[i].msg);
-+}
-+
-+static int bch2_quota_check_limit(struct bch_fs *c,
-+				  unsigned qtype,
-+				  struct bch_memquota *mq,
-+				  struct quota_msgs *msgs,
-+				  enum quota_counters counter,
-+				  s64 v,
-+				  enum quota_acct_mode mode)
-+{
-+	struct bch_memquota_type *q = &c->quotas[qtype];
-+	struct memquota_counter *qc = &mq->c[counter];
-+	u64 n = qc->v + v;
-+
-+	BUG_ON((s64) n < 0);
-+
-+	if (mode == KEY_TYPE_QUOTA_NOCHECK)
-+		return 0;
-+
-+	if (v <= 0) {
-+		if (n < qc->hardlimit &&
-+		    (qc->warning_issued & (1 << HARDWARN))) {
-+			qc->warning_issued &= ~(1 << HARDWARN);
-+			prepare_msg(qtype, counter, msgs, HARDBELOW);
-+		}
-+
-+		if (n < qc->softlimit &&
-+		    (qc->warning_issued & (1 << SOFTWARN))) {
-+			qc->warning_issued &= ~(1 << SOFTWARN);
-+			prepare_msg(qtype, counter, msgs, SOFTBELOW);
-+		}
-+
-+		qc->warning_issued = 0;
-+		return 0;
-+	}
-+
-+	if (qc->hardlimit &&
-+	    qc->hardlimit < n &&
-+	    !ignore_hardlimit(q)) {
-+		if (mode == KEY_TYPE_QUOTA_PREALLOC)
-+			return -EDQUOT;
-+
-+		prepare_warning(qc, qtype, counter, msgs, HARDWARN);
-+	}
-+
-+	if (qc->softlimit &&
-+	    qc->softlimit < n &&
-+	    qc->timer &&
-+	    ktime_get_real_seconds() >= qc->timer &&
-+	    !ignore_hardlimit(q)) {
-+		if (mode == KEY_TYPE_QUOTA_PREALLOC)
-+			return -EDQUOT;
-+
-+		prepare_warning(qc, qtype, counter, msgs, SOFTLONGWARN);
-+	}
-+
-+	if (qc->softlimit &&
-+	    qc->softlimit < n &&
-+	    qc->timer == 0) {
-+		if (mode == KEY_TYPE_QUOTA_PREALLOC)
-+			return -EDQUOT;
-+
-+		prepare_warning(qc, qtype, counter, msgs, SOFTWARN);
-+
-+		/* XXX is this the right one? */
-+		qc->timer = ktime_get_real_seconds() +
-+			q->limits[counter].warnlimit;
-+	}
-+
-+	return 0;
-+}
-+
-+int bch2_quota_acct(struct bch_fs *c, struct bch_qid qid,
-+		    enum quota_counters counter, s64 v,
-+		    enum quota_acct_mode mode)
-+{
-+	unsigned qtypes = enabled_qtypes(c);
-+	struct bch_memquota_type *q;
-+	struct bch_memquota *mq[QTYP_NR];
-+	struct quota_msgs msgs;
-+	unsigned i;
-+	int ret = 0;
-+
-+	memset(&msgs, 0, sizeof(msgs));
-+
-+	for_each_set_qtype(c, i, q, qtypes)
-+		mutex_lock_nested(&q->lock, i);
-+
-+	for_each_set_qtype(c, i, q, qtypes) {
-+		mq[i] = genradix_ptr_alloc(&q->table, qid.q[i], GFP_NOFS);
-+		if (!mq[i]) {
-+			ret = -ENOMEM;
-+			goto err;
-+		}
-+
-+		ret = bch2_quota_check_limit(c, i, mq[i], &msgs, counter, v, mode);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	for_each_set_qtype(c, i, q, qtypes)
-+		mq[i]->c[counter].v += v;
-+err:
-+	for_each_set_qtype(c, i, q, qtypes)
-+		mutex_unlock(&q->lock);
-+
-+	flush_warnings(qid, c->vfs_sb, &msgs);
-+
-+	return ret;
-+}
-+
-+static void __bch2_quota_transfer(struct bch_memquota *src_q,
-+				  struct bch_memquota *dst_q,
-+				  enum quota_counters counter, s64 v)
-+{
-+	BUG_ON(v > src_q->c[counter].v);
-+	BUG_ON(v + dst_q->c[counter].v < v);
-+
-+	src_q->c[counter].v -= v;
-+	dst_q->c[counter].v += v;
-+}
-+
-+int bch2_quota_transfer(struct bch_fs *c, unsigned qtypes,
-+			struct bch_qid dst,
-+			struct bch_qid src, u64 space,
-+			enum quota_acct_mode mode)
-+{
-+	struct bch_memquota_type *q;
-+	struct bch_memquota *src_q[3], *dst_q[3];
-+	struct quota_msgs msgs;
-+	unsigned i;
-+	int ret = 0;
-+
-+	qtypes &= enabled_qtypes(c);
-+
-+	memset(&msgs, 0, sizeof(msgs));
-+
-+	for_each_set_qtype(c, i, q, qtypes)
-+		mutex_lock_nested(&q->lock, i);
-+
-+	for_each_set_qtype(c, i, q, qtypes) {
-+		src_q[i] = genradix_ptr_alloc(&q->table, src.q[i], GFP_NOFS);
-+		dst_q[i] = genradix_ptr_alloc(&q->table, dst.q[i], GFP_NOFS);
-+
-+		if (!src_q[i] || !dst_q[i]) {
-+			ret = -ENOMEM;
-+			goto err;
-+		}
-+
-+		ret = bch2_quota_check_limit(c, i, dst_q[i], &msgs, Q_SPC,
-+					     dst_q[i]->c[Q_SPC].v + space,
-+					     mode);
-+		if (ret)
-+			goto err;
-+
-+		ret = bch2_quota_check_limit(c, i, dst_q[i], &msgs, Q_INO,
-+					     dst_q[i]->c[Q_INO].v + 1,
-+					     mode);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	for_each_set_qtype(c, i, q, qtypes) {
-+		__bch2_quota_transfer(src_q[i], dst_q[i], Q_SPC, space);
-+		__bch2_quota_transfer(src_q[i], dst_q[i], Q_INO, 1);
-+	}
-+
-+err:
-+	for_each_set_qtype(c, i, q, qtypes)
-+		mutex_unlock(&q->lock);
-+
-+	flush_warnings(dst, c->vfs_sb, &msgs);
-+
-+	return ret;
-+}
-+
-+static int __bch2_quota_set(struct bch_fs *c, struct bkey_s_c k)
-+{
-+	struct bkey_s_c_quota dq;
-+	struct bch_memquota_type *q;
-+	struct bch_memquota *mq;
-+	unsigned i;
-+
-+	BUG_ON(k.k->p.inode >= QTYP_NR);
-+
-+	switch (k.k->type) {
-+	case KEY_TYPE_quota:
-+		dq = bkey_s_c_to_quota(k);
-+		q = &c->quotas[k.k->p.inode];
-+
-+		mutex_lock(&q->lock);
-+		mq = genradix_ptr_alloc(&q->table, k.k->p.offset, GFP_KERNEL);
-+		if (!mq) {
-+			mutex_unlock(&q->lock);
-+			return -ENOMEM;
-+		}
-+
-+		for (i = 0; i < Q_COUNTERS; i++) {
-+			mq->c[i].hardlimit = le64_to_cpu(dq.v->c[i].hardlimit);
-+			mq->c[i].softlimit = le64_to_cpu(dq.v->c[i].softlimit);
-+		}
-+
-+		mutex_unlock(&q->lock);
-+	}
-+
-+	return 0;
-+}
-+
-+static int bch2_quota_init_type(struct bch_fs *c, enum quota_types type)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int ret = 0;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_QUOTAS, POS(type, 0),
-+			   BTREE_ITER_PREFETCH, k, ret) {
-+		if (k.k->p.inode != type)
-+			break;
-+
-+		ret = __bch2_quota_set(c, k);
-+		if (ret)
-+			break;
-+	}
-+
-+	return bch2_trans_exit(&trans) ?: ret;
-+}
-+
-+void bch2_fs_quota_exit(struct bch_fs *c)
-+{
-+	unsigned i;
-+
-+	for (i = 0; i < ARRAY_SIZE(c->quotas); i++)
-+		genradix_free(&c->quotas[i].table);
-+}
-+
-+void bch2_fs_quota_init(struct bch_fs *c)
-+{
-+	unsigned i;
-+
-+	for (i = 0; i < ARRAY_SIZE(c->quotas); i++)
-+		mutex_init(&c->quotas[i].lock);
-+}
-+
-+static void bch2_sb_quota_read(struct bch_fs *c)
-+{
-+	struct bch_sb_field_quota *sb_quota;
-+	unsigned i, j;
-+
-+	sb_quota = bch2_sb_get_quota(c->disk_sb.sb);
-+	if (!sb_quota)
-+		return;
-+
-+	for (i = 0; i < QTYP_NR; i++) {
-+		struct bch_memquota_type *q = &c->quotas[i];
-+
-+		for (j = 0; j < Q_COUNTERS; j++) {
-+			q->limits[j].timelimit =
-+				le32_to_cpu(sb_quota->q[i].c[j].timelimit);
-+			q->limits[j].warnlimit =
-+				le32_to_cpu(sb_quota->q[i].c[j].warnlimit);
-+		}
-+	}
-+}
-+
-+int bch2_fs_quota_read(struct bch_fs *c)
-+{
-+	unsigned i, qtypes = enabled_qtypes(c);
-+	struct bch_memquota_type *q;
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bch_inode_unpacked u;
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	mutex_lock(&c->sb_lock);
-+	bch2_sb_quota_read(c);
-+	mutex_unlock(&c->sb_lock);
-+
-+	for_each_set_qtype(c, i, q, qtypes) {
-+		ret = bch2_quota_init_type(c, i);
-+		if (ret)
-+			return ret;
-+	}
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_INODES, POS_MIN,
-+			   BTREE_ITER_PREFETCH, k, ret) {
-+		switch (k.k->type) {
-+		case KEY_TYPE_inode:
-+			ret = bch2_inode_unpack(bkey_s_c_to_inode(k), &u);
-+			if (ret)
-+				return ret;
-+
-+			bch2_quota_acct(c, bch_qid(&u), Q_SPC, u.bi_sectors,
-+					KEY_TYPE_QUOTA_NOCHECK);
-+			bch2_quota_acct(c, bch_qid(&u), Q_INO, 1,
-+					KEY_TYPE_QUOTA_NOCHECK);
-+		}
-+	}
-+	return bch2_trans_exit(&trans) ?: ret;
-+}
-+
-+/* Enable/disable/delete quotas for an entire filesystem: */
-+
-+static int bch2_quota_enable(struct super_block	*sb, unsigned uflags)
-+{
-+	struct bch_fs *c = sb->s_fs_info;
-+
-+	if (sb->s_flags & SB_RDONLY)
-+		return -EROFS;
-+
-+	/* Accounting must be enabled at mount time: */
-+	if (uflags & (FS_QUOTA_UDQ_ACCT|FS_QUOTA_GDQ_ACCT|FS_QUOTA_PDQ_ACCT))
-+		return -EINVAL;
-+
-+	/* Can't enable enforcement without accounting: */
-+	if ((uflags & FS_QUOTA_UDQ_ENFD) && !c->opts.usrquota)
-+		return -EINVAL;
-+
-+	if ((uflags & FS_QUOTA_GDQ_ENFD) && !c->opts.grpquota)
-+		return -EINVAL;
-+
-+	if (uflags & FS_QUOTA_PDQ_ENFD && !c->opts.prjquota)
-+		return -EINVAL;
-+
-+	mutex_lock(&c->sb_lock);
-+	if (uflags & FS_QUOTA_UDQ_ENFD)
-+		SET_BCH_SB_USRQUOTA(c->disk_sb.sb, true);
-+
-+	if (uflags & FS_QUOTA_GDQ_ENFD)
-+		SET_BCH_SB_GRPQUOTA(c->disk_sb.sb, true);
-+
-+	if (uflags & FS_QUOTA_PDQ_ENFD)
-+		SET_BCH_SB_PRJQUOTA(c->disk_sb.sb, true);
-+
-+	bch2_write_super(c);
-+	mutex_unlock(&c->sb_lock);
-+
-+	return 0;
-+}
-+
-+static int bch2_quota_disable(struct super_block *sb, unsigned uflags)
-+{
-+	struct bch_fs *c = sb->s_fs_info;
-+
-+	if (sb->s_flags & SB_RDONLY)
-+		return -EROFS;
-+
-+	mutex_lock(&c->sb_lock);
-+	if (uflags & FS_QUOTA_UDQ_ENFD)
-+		SET_BCH_SB_USRQUOTA(c->disk_sb.sb, false);
-+
-+	if (uflags & FS_QUOTA_GDQ_ENFD)
-+		SET_BCH_SB_GRPQUOTA(c->disk_sb.sb, false);
-+
-+	if (uflags & FS_QUOTA_PDQ_ENFD)
-+		SET_BCH_SB_PRJQUOTA(c->disk_sb.sb, false);
-+
-+	bch2_write_super(c);
-+	mutex_unlock(&c->sb_lock);
-+
-+	return 0;
-+}
-+
-+static int bch2_quota_remove(struct super_block *sb, unsigned uflags)
-+{
-+	struct bch_fs *c = sb->s_fs_info;
-+	int ret;
-+
-+	if (sb->s_flags & SB_RDONLY)
-+		return -EROFS;
-+
-+	if (uflags & FS_USER_QUOTA) {
-+		if (c->opts.usrquota)
-+			return -EINVAL;
-+
-+		ret = bch2_btree_delete_range(c, BTREE_ID_QUOTAS,
-+					      POS(QTYP_USR, 0),
-+					      POS(QTYP_USR + 1, 0),
-+					      NULL);
-+		if (ret)
-+			return ret;
-+	}
-+
-+	if (uflags & FS_GROUP_QUOTA) {
-+		if (c->opts.grpquota)
-+			return -EINVAL;
-+
-+		ret = bch2_btree_delete_range(c, BTREE_ID_QUOTAS,
-+					      POS(QTYP_GRP, 0),
-+					      POS(QTYP_GRP + 1, 0),
-+					      NULL);
-+		if (ret)
-+			return ret;
-+	}
-+
-+	if (uflags & FS_PROJ_QUOTA) {
-+		if (c->opts.prjquota)
-+			return -EINVAL;
-+
-+		ret = bch2_btree_delete_range(c, BTREE_ID_QUOTAS,
-+					      POS(QTYP_PRJ, 0),
-+					      POS(QTYP_PRJ + 1, 0),
-+					      NULL);
-+		if (ret)
-+			return ret;
-+	}
-+
-+	return 0;
-+}
-+
-+/*
-+ * Return quota status information, such as enforcements, quota file inode
-+ * numbers etc.
-+ */
-+static int bch2_quota_get_state(struct super_block *sb, struct qc_state *state)
-+{
-+	struct bch_fs *c = sb->s_fs_info;
-+	unsigned qtypes = enabled_qtypes(c);
-+	unsigned i;
-+
-+	memset(state, 0, sizeof(*state));
-+
-+	for (i = 0; i < QTYP_NR; i++) {
-+		state->s_state[i].flags |= QCI_SYSFILE;
-+
-+		if (!(qtypes & (1 << i)))
-+			continue;
-+
-+		state->s_state[i].flags |= QCI_ACCT_ENABLED;
-+
-+		state->s_state[i].spc_timelimit = c->quotas[i].limits[Q_SPC].timelimit;
-+		state->s_state[i].spc_warnlimit = c->quotas[i].limits[Q_SPC].warnlimit;
-+
-+		state->s_state[i].ino_timelimit = c->quotas[i].limits[Q_INO].timelimit;
-+		state->s_state[i].ino_warnlimit = c->quotas[i].limits[Q_INO].warnlimit;
-+	}
-+
-+	return 0;
-+}
-+
-+/*
-+ * Adjust quota timers & warnings
-+ */
-+static int bch2_quota_set_info(struct super_block *sb, int type,
-+			       struct qc_info *info)
-+{
-+	struct bch_fs *c = sb->s_fs_info;
-+	struct bch_sb_field_quota *sb_quota;
-+	struct bch_memquota_type *q;
-+
-+	if (sb->s_flags & SB_RDONLY)
-+		return -EROFS;
-+
-+	if (type >= QTYP_NR)
-+		return -EINVAL;
-+
-+	if (!((1 << type) & enabled_qtypes(c)))
-+		return -ESRCH;
-+
-+	if (info->i_fieldmask &
-+	    ~(QC_SPC_TIMER|QC_INO_TIMER|QC_SPC_WARNS|QC_INO_WARNS))
-+		return -EINVAL;
-+
-+	q = &c->quotas[type];
-+
-+	mutex_lock(&c->sb_lock);
-+	sb_quota = bch2_sb_get_quota(c->disk_sb.sb);
-+	if (!sb_quota) {
-+		sb_quota = bch2_sb_resize_quota(&c->disk_sb,
-+					sizeof(*sb_quota) / sizeof(u64));
-+		if (!sb_quota)
-+			return -ENOSPC;
-+	}
-+
-+	if (info->i_fieldmask & QC_SPC_TIMER)
-+		sb_quota->q[type].c[Q_SPC].timelimit =
-+			cpu_to_le32(info->i_spc_timelimit);
-+
-+	if (info->i_fieldmask & QC_SPC_WARNS)
-+		sb_quota->q[type].c[Q_SPC].warnlimit =
-+			cpu_to_le32(info->i_spc_warnlimit);
-+
-+	if (info->i_fieldmask & QC_INO_TIMER)
-+		sb_quota->q[type].c[Q_INO].timelimit =
-+			cpu_to_le32(info->i_ino_timelimit);
-+
-+	if (info->i_fieldmask & QC_INO_WARNS)
-+		sb_quota->q[type].c[Q_INO].warnlimit =
-+			cpu_to_le32(info->i_ino_warnlimit);
-+
-+	bch2_sb_quota_read(c);
-+
-+	bch2_write_super(c);
-+	mutex_unlock(&c->sb_lock);
-+
-+	return 0;
-+}
-+
-+/* Get/set individual quotas: */
-+
-+static void __bch2_quota_get(struct qc_dqblk *dst, struct bch_memquota *src)
-+{
-+	dst->d_space		= src->c[Q_SPC].v << 9;
-+	dst->d_spc_hardlimit	= src->c[Q_SPC].hardlimit << 9;
-+	dst->d_spc_softlimit	= src->c[Q_SPC].softlimit << 9;
-+	dst->d_spc_timer	= src->c[Q_SPC].timer;
-+	dst->d_spc_warns	= src->c[Q_SPC].warns;
-+
-+	dst->d_ino_count	= src->c[Q_INO].v;
-+	dst->d_ino_hardlimit	= src->c[Q_INO].hardlimit;
-+	dst->d_ino_softlimit	= src->c[Q_INO].softlimit;
-+	dst->d_ino_timer	= src->c[Q_INO].timer;
-+	dst->d_ino_warns	= src->c[Q_INO].warns;
-+}
-+
-+static int bch2_get_quota(struct super_block *sb, struct kqid kqid,
-+			  struct qc_dqblk *qdq)
-+{
-+	struct bch_fs *c		= sb->s_fs_info;
-+	struct bch_memquota_type *q	= &c->quotas[kqid.type];
-+	qid_t qid			= from_kqid(&init_user_ns, kqid);
-+	struct bch_memquota *mq;
-+
-+	memset(qdq, 0, sizeof(*qdq));
-+
-+	mutex_lock(&q->lock);
-+	mq = genradix_ptr(&q->table, qid);
-+	if (mq)
-+		__bch2_quota_get(qdq, mq);
-+	mutex_unlock(&q->lock);
-+
-+	return 0;
-+}
-+
-+static int bch2_get_next_quota(struct super_block *sb, struct kqid *kqid,
-+			       struct qc_dqblk *qdq)
-+{
-+	struct bch_fs *c		= sb->s_fs_info;
-+	struct bch_memquota_type *q	= &c->quotas[kqid->type];
-+	qid_t qid			= from_kqid(&init_user_ns, *kqid);
-+	struct genradix_iter iter;
-+	struct bch_memquota *mq;
-+	int ret = 0;
-+
-+	mutex_lock(&q->lock);
-+
-+	genradix_for_each_from(&q->table, iter, mq, qid)
-+		if (memcmp(mq, page_address(ZERO_PAGE(0)), sizeof(*mq))) {
-+			__bch2_quota_get(qdq, mq);
-+			*kqid = make_kqid(current_user_ns(), kqid->type, iter.pos);
-+			goto found;
-+		}
-+
-+	ret = -ENOENT;
-+found:
-+	mutex_unlock(&q->lock);
-+	return ret;
-+}
-+
-+static int bch2_set_quota_trans(struct btree_trans *trans,
-+				struct bkey_i_quota *new_quota,
-+				struct qc_dqblk *qdq)
-+{
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	iter = bch2_trans_get_iter(trans, BTREE_ID_QUOTAS, new_quota->k.p,
-+				   BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
-+	k = bch2_btree_iter_peek_slot(iter);
-+
-+	ret = bkey_err(k);
-+	if (unlikely(ret))
-+		return ret;
-+
-+	if (k.k->type == KEY_TYPE_quota)
-+		new_quota->v = *bkey_s_c_to_quota(k).v;
-+
-+	if (qdq->d_fieldmask & QC_SPC_SOFT)
-+		new_quota->v.c[Q_SPC].softlimit = cpu_to_le64(qdq->d_spc_softlimit >> 9);
-+	if (qdq->d_fieldmask & QC_SPC_HARD)
-+		new_quota->v.c[Q_SPC].hardlimit = cpu_to_le64(qdq->d_spc_hardlimit >> 9);
-+
-+	if (qdq->d_fieldmask & QC_INO_SOFT)
-+		new_quota->v.c[Q_INO].softlimit = cpu_to_le64(qdq->d_ino_softlimit);
-+	if (qdq->d_fieldmask & QC_INO_HARD)
-+		new_quota->v.c[Q_INO].hardlimit = cpu_to_le64(qdq->d_ino_hardlimit);
-+
-+	return bch2_trans_update(trans, iter, &new_quota->k_i, 0);
-+}
-+
-+static int bch2_set_quota(struct super_block *sb, struct kqid qid,
-+			  struct qc_dqblk *qdq)
-+{
-+	struct bch_fs *c = sb->s_fs_info;
-+	struct btree_trans trans;
-+	struct bkey_i_quota new_quota;
-+	int ret;
-+
-+	if (sb->s_flags & SB_RDONLY)
-+		return -EROFS;
-+
-+	bkey_quota_init(&new_quota.k_i);
-+	new_quota.k.p = POS(qid.type, from_kqid(&init_user_ns, qid));
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOUNLOCK,
-+			    bch2_set_quota_trans(&trans, &new_quota, qdq)) ?:
-+		__bch2_quota_set(c, bkey_i_to_s_c(&new_quota.k_i));
-+
-+	bch2_trans_exit(&trans);
-+
-+	return ret;
-+}
-+
-+const struct quotactl_ops bch2_quotactl_operations = {
-+	.quota_enable		= bch2_quota_enable,
-+	.quota_disable		= bch2_quota_disable,
-+	.rm_xquota		= bch2_quota_remove,
-+
-+	.get_state		= bch2_quota_get_state,
-+	.set_info		= bch2_quota_set_info,
-+
-+	.get_dqblk		= bch2_get_quota,
-+	.get_nextdqblk		= bch2_get_next_quota,
-+	.set_dqblk		= bch2_set_quota,
-+};
-+
-+#endif /* CONFIG_BCACHEFS_QUOTA */
-diff --git a/fs/bcachefs/quota.h b/fs/bcachefs/quota.h
-new file mode 100644
-index 000000000000..51e4f9713ef0
---- /dev/null
-+++ b/fs/bcachefs/quota.h
-@@ -0,0 +1,71 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_QUOTA_H
-+#define _BCACHEFS_QUOTA_H
-+
-+#include "inode.h"
-+#include "quota_types.h"
-+
-+extern const struct bch_sb_field_ops bch_sb_field_ops_quota;
-+
-+const char *bch2_quota_invalid(const struct bch_fs *, struct bkey_s_c);
-+void bch2_quota_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-+
-+#define bch2_bkey_ops_quota (struct bkey_ops) {		\
-+	.key_invalid	= bch2_quota_invalid,		\
-+	.val_to_text	= bch2_quota_to_text,		\
-+}
-+
-+static inline struct bch_qid bch_qid(struct bch_inode_unpacked *u)
-+{
-+	return (struct bch_qid) {
-+		.q[QTYP_USR] = u->bi_uid,
-+		.q[QTYP_GRP] = u->bi_gid,
-+		.q[QTYP_PRJ] = u->bi_project ? u->bi_project - 1 : 0,
-+	};
-+}
-+
-+static inline unsigned enabled_qtypes(struct bch_fs *c)
-+{
-+	return ((c->opts.usrquota << QTYP_USR)|
-+		(c->opts.grpquota << QTYP_GRP)|
-+		(c->opts.prjquota << QTYP_PRJ));
-+}
-+
-+#ifdef CONFIG_BCACHEFS_QUOTA
-+
-+int bch2_quota_acct(struct bch_fs *, struct bch_qid, enum quota_counters,
-+		    s64, enum quota_acct_mode);
-+
-+int bch2_quota_transfer(struct bch_fs *, unsigned, struct bch_qid,
-+			struct bch_qid, u64, enum quota_acct_mode);
-+
-+void bch2_fs_quota_exit(struct bch_fs *);
-+void bch2_fs_quota_init(struct bch_fs *);
-+int bch2_fs_quota_read(struct bch_fs *);
-+
-+extern const struct quotactl_ops bch2_quotactl_operations;
-+
-+#else
-+
-+static inline int bch2_quota_acct(struct bch_fs *c, struct bch_qid qid,
-+				  enum quota_counters counter, s64 v,
-+				  enum quota_acct_mode mode)
-+{
-+	return 0;
-+}
-+
-+static inline int bch2_quota_transfer(struct bch_fs *c, unsigned qtypes,
-+				      struct bch_qid dst,
-+				      struct bch_qid src, u64 space,
-+				      enum quota_acct_mode mode)
-+{
-+	return 0;
-+}
-+
-+static inline void bch2_fs_quota_exit(struct bch_fs *c) {}
-+static inline void bch2_fs_quota_init(struct bch_fs *c) {}
-+static inline int bch2_fs_quota_read(struct bch_fs *c) { return 0; }
-+
-+#endif
-+
-+#endif /* _BCACHEFS_QUOTA_H */
-diff --git a/fs/bcachefs/quota_types.h b/fs/bcachefs/quota_types.h
-new file mode 100644
-index 000000000000..6a136083d389
---- /dev/null
-+++ b/fs/bcachefs/quota_types.h
-@@ -0,0 +1,43 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_QUOTA_TYPES_H
-+#define _BCACHEFS_QUOTA_TYPES_H
-+
-+#include <linux/generic-radix-tree.h>
-+
-+struct bch_qid {
-+	u32		q[QTYP_NR];
-+};
-+
-+enum quota_acct_mode {
-+	KEY_TYPE_QUOTA_PREALLOC,
-+	KEY_TYPE_QUOTA_WARN,
-+	KEY_TYPE_QUOTA_NOCHECK,
-+};
-+
-+struct memquota_counter {
-+	u64				v;
-+	u64				hardlimit;
-+	u64				softlimit;
-+	s64				timer;
-+	int				warns;
-+	int				warning_issued;
-+};
-+
-+struct bch_memquota {
-+	struct memquota_counter		c[Q_COUNTERS];
-+};
-+
-+typedef GENRADIX(struct bch_memquota)	bch_memquota_table;
-+
-+struct quota_limit {
-+	u32				timelimit;
-+	u32				warnlimit;
-+};
-+
-+struct bch_memquota_type {
-+	struct quota_limit		limits[Q_COUNTERS];
-+	bch_memquota_table		table;
-+	struct mutex			lock;
-+};
-+
-+#endif /* _BCACHEFS_QUOTA_TYPES_H */
-diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c
-new file mode 100644
-index 000000000000..56a1f761271f
---- /dev/null
-+++ b/fs/bcachefs/rebalance.c
-@@ -0,0 +1,331 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "alloc_foreground.h"
-+#include "btree_iter.h"
-+#include "buckets.h"
-+#include "clock.h"
-+#include "disk_groups.h"
-+#include "extents.h"
-+#include "io.h"
-+#include "move.h"
-+#include "rebalance.h"
-+#include "super-io.h"
-+
-+#include <linux/freezer.h>
-+#include <linux/kthread.h>
-+#include <linux/sched/cputime.h>
-+#include <trace/events/bcachefs.h>
-+
-+/*
-+ * Check if an extent should be moved:
-+ * returns -1 if it should not be moved, or
-+ * device of pointer that should be moved, if known, or INT_MAX if unknown
-+ */
-+static int __bch2_rebalance_pred(struct bch_fs *c,
-+				 struct bkey_s_c k,
-+				 struct bch_io_opts *io_opts)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const union bch_extent_entry *entry;
-+	struct extent_ptr_decoded p;
-+
-+	if (io_opts->background_compression &&
-+	    !bch2_bkey_is_incompressible(k))
-+		bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
-+			if (!p.ptr.cached &&
-+			    p.crc.compression_type !=
-+			    bch2_compression_opt_to_type[io_opts->background_compression])
-+				return p.ptr.dev;
-+
-+	if (io_opts->background_target)
-+		bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
-+			if (!p.ptr.cached &&
-+			    !bch2_dev_in_target(c, p.ptr.dev, io_opts->background_target))
-+				return p.ptr.dev;
-+
-+	return -1;
-+}
-+
-+void bch2_rebalance_add_key(struct bch_fs *c,
-+			    struct bkey_s_c k,
-+			    struct bch_io_opts *io_opts)
-+{
-+	atomic64_t *counter;
-+	int dev;
-+
-+	dev = __bch2_rebalance_pred(c, k, io_opts);
-+	if (dev < 0)
-+		return;
-+
-+	counter = dev < INT_MAX
-+		? &bch_dev_bkey_exists(c, dev)->rebalance_work
-+		: &c->rebalance.work_unknown_dev;
-+
-+	if (atomic64_add_return(k.k->size, counter) == k.k->size)
-+		rebalance_wakeup(c);
-+}
-+
-+static enum data_cmd rebalance_pred(struct bch_fs *c, void *arg,
-+				    struct bkey_s_c k,
-+				    struct bch_io_opts *io_opts,
-+				    struct data_opts *data_opts)
-+{
-+	if (__bch2_rebalance_pred(c, k, io_opts) >= 0) {
-+		data_opts->target		= io_opts->background_target;
-+		data_opts->btree_insert_flags	= 0;
-+		return DATA_ADD_REPLICAS;
-+	} else {
-+		return DATA_SKIP;
-+	}
-+}
-+
-+void bch2_rebalance_add_work(struct bch_fs *c, u64 sectors)
-+{
-+	if (atomic64_add_return(sectors, &c->rebalance.work_unknown_dev) ==
-+	    sectors)
-+		rebalance_wakeup(c);
-+}
-+
-+struct rebalance_work {
-+	int		dev_most_full_idx;
-+	unsigned	dev_most_full_percent;
-+	u64		dev_most_full_work;
-+	u64		dev_most_full_capacity;
-+	u64		total_work;
-+};
-+
-+static void rebalance_work_accumulate(struct rebalance_work *w,
-+		u64 dev_work, u64 unknown_dev, u64 capacity, int idx)
-+{
-+	unsigned percent_full;
-+	u64 work = dev_work + unknown_dev;
-+
-+	if (work < dev_work || work < unknown_dev)
-+		work = U64_MAX;
-+	work = min(work, capacity);
-+
-+	percent_full = div64_u64(work * 100, capacity);
-+
-+	if (percent_full >= w->dev_most_full_percent) {
-+		w->dev_most_full_idx		= idx;
-+		w->dev_most_full_percent	= percent_full;
-+		w->dev_most_full_work		= work;
-+		w->dev_most_full_capacity	= capacity;
-+	}
-+
-+	if (w->total_work + dev_work >= w->total_work &&
-+	    w->total_work + dev_work >= dev_work)
-+		w->total_work += dev_work;
-+}
-+
-+static struct rebalance_work rebalance_work(struct bch_fs *c)
-+{
-+	struct bch_dev *ca;
-+	struct rebalance_work ret = { .dev_most_full_idx = -1 };
-+	u64 unknown_dev = atomic64_read(&c->rebalance.work_unknown_dev);
-+	unsigned i;
-+
-+	for_each_online_member(ca, c, i)
-+		rebalance_work_accumulate(&ret,
-+			atomic64_read(&ca->rebalance_work),
-+			unknown_dev,
-+			bucket_to_sector(ca, ca->mi.nbuckets -
-+					 ca->mi.first_bucket),
-+			i);
-+
-+	rebalance_work_accumulate(&ret,
-+		unknown_dev, 0, c->capacity, -1);
-+
-+	return ret;
-+}
-+
-+static void rebalance_work_reset(struct bch_fs *c)
-+{
-+	struct bch_dev *ca;
-+	unsigned i;
-+
-+	for_each_online_member(ca, c, i)
-+		atomic64_set(&ca->rebalance_work, 0);
-+
-+	atomic64_set(&c->rebalance.work_unknown_dev, 0);
-+}
-+
-+static unsigned long curr_cputime(void)
-+{
-+	u64 utime, stime;
-+
-+	task_cputime_adjusted(current, &utime, &stime);
-+	return nsecs_to_jiffies(utime + stime);
-+}
-+
-+static int bch2_rebalance_thread(void *arg)
-+{
-+	struct bch_fs *c = arg;
-+	struct bch_fs_rebalance *r = &c->rebalance;
-+	struct io_clock *clock = &c->io_clock[WRITE];
-+	struct rebalance_work w, p;
-+	unsigned long start, prev_start;
-+	unsigned long prev_run_time, prev_run_cputime;
-+	unsigned long cputime, prev_cputime;
-+	unsigned long io_start;
-+	long throttle;
-+
-+	set_freezable();
-+
-+	io_start	= atomic_long_read(&clock->now);
-+	p		= rebalance_work(c);
-+	prev_start	= jiffies;
-+	prev_cputime	= curr_cputime();
-+
-+	while (!kthread_wait_freezable(r->enabled)) {
-+		cond_resched();
-+
-+		start			= jiffies;
-+		cputime			= curr_cputime();
-+
-+		prev_run_time		= start - prev_start;
-+		prev_run_cputime	= cputime - prev_cputime;
-+
-+		w			= rebalance_work(c);
-+		BUG_ON(!w.dev_most_full_capacity);
-+
-+		if (!w.total_work) {
-+			r->state = REBALANCE_WAITING;
-+			kthread_wait_freezable(rebalance_work(c).total_work);
-+			continue;
-+		}
-+
-+		/*
-+		 * If there isn't much work to do, throttle cpu usage:
-+		 */
-+		throttle = prev_run_cputime * 100 /
-+			max(1U, w.dev_most_full_percent) -
-+			prev_run_time;
-+
-+		if (w.dev_most_full_percent < 20 && throttle > 0) {
-+			r->throttled_until_iotime = io_start +
-+				div_u64(w.dev_most_full_capacity *
-+					(20 - w.dev_most_full_percent),
-+					50);
-+
-+			if (atomic_long_read(&clock->now) + clock->max_slop <
-+			    r->throttled_until_iotime) {
-+				r->throttled_until_cputime = start + throttle;
-+				r->state = REBALANCE_THROTTLED;
-+
-+				bch2_kthread_io_clock_wait(clock,
-+					r->throttled_until_iotime,
-+					throttle);
-+				continue;
-+			}
-+		}
-+
-+		/* minimum 1 mb/sec: */
-+		r->pd.rate.rate =
-+			max_t(u64, 1 << 11,
-+			      r->pd.rate.rate *
-+			      max(p.dev_most_full_percent, 1U) /
-+			      max(w.dev_most_full_percent, 1U));
-+
-+		io_start	= atomic_long_read(&clock->now);
-+		p		= w;
-+		prev_start	= start;
-+		prev_cputime	= cputime;
-+
-+		r->state = REBALANCE_RUNNING;
-+		memset(&r->move_stats, 0, sizeof(r->move_stats));
-+		rebalance_work_reset(c);
-+
-+		bch2_move_data(c,
-+			       /* ratelimiting disabled for now */
-+			       NULL, /*  &r->pd.rate, */
-+			       writepoint_ptr(&c->rebalance_write_point),
-+			       POS_MIN, POS_MAX,
-+			       rebalance_pred, NULL,
-+			       &r->move_stats);
-+	}
-+
-+	return 0;
-+}
-+
-+void bch2_rebalance_work_to_text(struct printbuf *out, struct bch_fs *c)
-+{
-+	struct bch_fs_rebalance *r = &c->rebalance;
-+	struct rebalance_work w = rebalance_work(c);
-+	char h1[21], h2[21];
-+
-+	bch2_hprint(&PBUF(h1), w.dev_most_full_work << 9);
-+	bch2_hprint(&PBUF(h2), w.dev_most_full_capacity << 9);
-+	pr_buf(out, "fullest_dev (%i):\t%s/%s\n",
-+	       w.dev_most_full_idx, h1, h2);
-+
-+	bch2_hprint(&PBUF(h1), w.total_work << 9);
-+	bch2_hprint(&PBUF(h2), c->capacity << 9);
-+	pr_buf(out, "total work:\t\t%s/%s\n", h1, h2);
-+
-+	pr_buf(out, "rate:\t\t\t%u\n", r->pd.rate.rate);
-+
-+	switch (r->state) {
-+	case REBALANCE_WAITING:
-+		pr_buf(out, "waiting\n");
-+		break;
-+	case REBALANCE_THROTTLED:
-+		bch2_hprint(&PBUF(h1),
-+			    (r->throttled_until_iotime -
-+			     atomic_long_read(&c->io_clock[WRITE].now)) << 9);
-+		pr_buf(out, "throttled for %lu sec or %s io\n",
-+		       (r->throttled_until_cputime - jiffies) / HZ,
-+		       h1);
-+		break;
-+	case REBALANCE_RUNNING:
-+		pr_buf(out, "running\n");
-+		pr_buf(out, "pos %llu:%llu\n",
-+		       r->move_stats.pos.inode,
-+		       r->move_stats.pos.offset);
-+		break;
-+	}
-+}
-+
-+void bch2_rebalance_stop(struct bch_fs *c)
-+{
-+	struct task_struct *p;
-+
-+	c->rebalance.pd.rate.rate = UINT_MAX;
-+	bch2_ratelimit_reset(&c->rebalance.pd.rate);
-+
-+	p = rcu_dereference_protected(c->rebalance.thread, 1);
-+	c->rebalance.thread = NULL;
-+
-+	if (p) {
-+		/* for sychronizing with rebalance_wakeup() */
-+		synchronize_rcu();
-+
-+		kthread_stop(p);
-+		put_task_struct(p);
-+	}
-+}
-+
-+int bch2_rebalance_start(struct bch_fs *c)
-+{
-+	struct task_struct *p;
-+
-+	if (c->opts.nochanges)
-+		return 0;
-+
-+	p = kthread_create(bch2_rebalance_thread, c, "bch_rebalance");
-+	if (IS_ERR(p))
-+		return PTR_ERR(p);
-+
-+	get_task_struct(p);
-+	rcu_assign_pointer(c->rebalance.thread, p);
-+	wake_up_process(p);
-+	return 0;
-+}
-+
-+void bch2_fs_rebalance_init(struct bch_fs *c)
-+{
-+	bch2_pd_controller_init(&c->rebalance.pd);
-+
-+	atomic64_set(&c->rebalance.work_unknown_dev, S64_MAX);
-+}
-diff --git a/fs/bcachefs/rebalance.h b/fs/bcachefs/rebalance.h
-new file mode 100644
-index 000000000000..7ade0bb81cce
---- /dev/null
-+++ b/fs/bcachefs/rebalance.h
-@@ -0,0 +1,28 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_REBALANCE_H
-+#define _BCACHEFS_REBALANCE_H
-+
-+#include "rebalance_types.h"
-+
-+static inline void rebalance_wakeup(struct bch_fs *c)
-+{
-+	struct task_struct *p;
-+
-+	rcu_read_lock();
-+	p = rcu_dereference(c->rebalance.thread);
-+	if (p)
-+		wake_up_process(p);
-+	rcu_read_unlock();
-+}
-+
-+void bch2_rebalance_add_key(struct bch_fs *, struct bkey_s_c,
-+			    struct bch_io_opts *);
-+void bch2_rebalance_add_work(struct bch_fs *, u64);
-+
-+void bch2_rebalance_work_to_text(struct printbuf *, struct bch_fs *);
-+
-+void bch2_rebalance_stop(struct bch_fs *);
-+int bch2_rebalance_start(struct bch_fs *);
-+void bch2_fs_rebalance_init(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_REBALANCE_H */
-diff --git a/fs/bcachefs/rebalance_types.h b/fs/bcachefs/rebalance_types.h
-new file mode 100644
-index 000000000000..192c6be20ced
---- /dev/null
-+++ b/fs/bcachefs/rebalance_types.h
-@@ -0,0 +1,27 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_REBALANCE_TYPES_H
-+#define _BCACHEFS_REBALANCE_TYPES_H
-+
-+#include "move_types.h"
-+
-+enum rebalance_state {
-+	REBALANCE_WAITING,
-+	REBALANCE_THROTTLED,
-+	REBALANCE_RUNNING,
-+};
-+
-+struct bch_fs_rebalance {
-+	struct task_struct __rcu *thread;
-+	struct bch_pd_controller pd;
-+
-+	atomic64_t		work_unknown_dev;
-+
-+	enum rebalance_state	state;
-+	unsigned long		throttled_until_iotime;
-+	unsigned long		throttled_until_cputime;
-+	struct bch_move_stats	move_stats;
-+
-+	unsigned		enabled:1;
-+};
-+
-+#endif /* _BCACHEFS_REBALANCE_TYPES_H */
-diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
-new file mode 100644
-index 000000000000..d70fa968db50
---- /dev/null
-+++ b/fs/bcachefs/recovery.c
-@@ -0,0 +1,1350 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "alloc_background.h"
-+#include "btree_gc.h"
-+#include "btree_update.h"
-+#include "btree_update_interior.h"
-+#include "btree_io.h"
-+#include "buckets.h"
-+#include "dirent.h"
-+#include "ec.h"
-+#include "error.h"
-+#include "fs-common.h"
-+#include "fsck.h"
-+#include "journal_io.h"
-+#include "journal_reclaim.h"
-+#include "journal_seq_blacklist.h"
-+#include "quota.h"
-+#include "recovery.h"
-+#include "replicas.h"
-+#include "super-io.h"
-+
-+#include <linux/sort.h>
-+#include <linux/stat.h>
-+
-+#define QSTR(n) { { { .len = strlen(n) } }, .name = n }
-+
-+/* iterate over keys read from the journal: */
-+
-+static struct journal_key *journal_key_search(struct journal_keys *journal_keys,
-+					      enum btree_id id, unsigned level,
-+					      struct bpos pos)
-+{
-+	size_t l = 0, r = journal_keys->nr, m;
-+
-+	while (l < r) {
-+		m = l + ((r - l) >> 1);
-+		if ((cmp_int(id,	journal_keys->d[m].btree_id) ?:
-+		     cmp_int(level,	journal_keys->d[m].level) ?:
-+		     bkey_cmp(pos,	journal_keys->d[m].k->k.p)) > 0)
-+			l = m + 1;
-+		else
-+			r = m;
-+	}
-+
-+	BUG_ON(l < journal_keys->nr &&
-+	       (cmp_int(id,	journal_keys->d[l].btree_id) ?:
-+		cmp_int(level,	journal_keys->d[l].level) ?:
-+		bkey_cmp(pos,	journal_keys->d[l].k->k.p)) > 0);
-+
-+	BUG_ON(l &&
-+	       (cmp_int(id,	journal_keys->d[l - 1].btree_id) ?:
-+		cmp_int(level,	journal_keys->d[l - 1].level) ?:
-+		bkey_cmp(pos,	journal_keys->d[l - 1].k->k.p)) <= 0);
-+
-+	return l < journal_keys->nr ? journal_keys->d + l : NULL;
-+}
-+
-+static struct bkey_i *bch2_journal_iter_peek(struct journal_iter *iter)
-+{
-+	if (iter->k &&
-+	    iter->k < iter->keys->d + iter->keys->nr &&
-+	    iter->k->btree_id	== iter->btree_id &&
-+	    iter->k->level	== iter->level)
-+		return iter->k->k;
-+
-+	iter->k = NULL;
-+	return NULL;
-+}
-+
-+static void bch2_journal_iter_advance(struct journal_iter *iter)
-+{
-+	if (iter->k)
-+		iter->k++;
-+}
-+
-+static void bch2_journal_iter_init(struct journal_iter *iter,
-+				   struct journal_keys *journal_keys,
-+				   enum btree_id id, unsigned level,
-+				   struct bpos pos)
-+{
-+	iter->btree_id	= id;
-+	iter->level	= level;
-+	iter->keys	= journal_keys;
-+	iter->k		= journal_key_search(journal_keys, id, level, pos);
-+}
-+
-+static struct bkey_s_c bch2_journal_iter_peek_btree(struct btree_and_journal_iter *iter)
-+{
-+	return iter->btree
-+		? bch2_btree_iter_peek(iter->btree)
-+		: bch2_btree_node_iter_peek_unpack(&iter->node_iter,
-+						   iter->b, &iter->unpacked);
-+}
-+
-+static void bch2_journal_iter_advance_btree(struct btree_and_journal_iter *iter)
-+{
-+	if (iter->btree)
-+		bch2_btree_iter_next(iter->btree);
-+	else
-+		bch2_btree_node_iter_advance(&iter->node_iter, iter->b);
-+}
-+
-+void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *iter)
-+{
-+	switch (iter->last) {
-+	case none:
-+		break;
-+	case btree:
-+		bch2_journal_iter_advance_btree(iter);
-+		break;
-+	case journal:
-+		bch2_journal_iter_advance(&iter->journal);
-+		break;
-+	}
-+
-+	iter->last = none;
-+}
-+
-+struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *iter)
-+{
-+	struct bkey_s_c ret;
-+
-+	while (1) {
-+		struct bkey_s_c btree_k		=
-+			bch2_journal_iter_peek_btree(iter);
-+		struct bkey_s_c journal_k	=
-+			bkey_i_to_s_c(bch2_journal_iter_peek(&iter->journal));
-+
-+		if (btree_k.k && journal_k.k) {
-+			int cmp = bkey_cmp(btree_k.k->p, journal_k.k->p);
-+
-+			if (!cmp)
-+				bch2_journal_iter_advance_btree(iter);
-+
-+			iter->last = cmp < 0 ? btree : journal;
-+		} else if (btree_k.k) {
-+			iter->last = btree;
-+		} else if (journal_k.k) {
-+			iter->last = journal;
-+		} else {
-+			iter->last = none;
-+			return bkey_s_c_null;
-+		}
-+
-+		ret = iter->last == journal ? journal_k : btree_k;
-+
-+		if (iter->b &&
-+		    bkey_cmp(ret.k->p, iter->b->data->max_key) > 0) {
-+			iter->journal.k = NULL;
-+			iter->last = none;
-+			return bkey_s_c_null;
-+		}
-+
-+		if (!bkey_deleted(ret.k))
-+			break;
-+
-+		bch2_btree_and_journal_iter_advance(iter);
-+	}
-+
-+	return ret;
-+}
-+
-+struct bkey_s_c bch2_btree_and_journal_iter_next(struct btree_and_journal_iter *iter)
-+{
-+	bch2_btree_and_journal_iter_advance(iter);
-+
-+	return bch2_btree_and_journal_iter_peek(iter);
-+}
-+
-+void bch2_btree_and_journal_iter_init(struct btree_and_journal_iter *iter,
-+				      struct btree_trans *trans,
-+				      struct journal_keys *journal_keys,
-+				      enum btree_id id, struct bpos pos)
-+{
-+	memset(iter, 0, sizeof(*iter));
-+
-+	iter->btree = bch2_trans_get_iter(trans, id, pos, 0);
-+	bch2_journal_iter_init(&iter->journal, journal_keys, id, 0, pos);
-+}
-+
-+void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *iter,
-+						struct journal_keys *journal_keys,
-+						struct btree *b)
-+{
-+	memset(iter, 0, sizeof(*iter));
-+
-+	iter->b = b;
-+	bch2_btree_node_iter_init_from_start(&iter->node_iter, iter->b);
-+	bch2_journal_iter_init(&iter->journal, journal_keys,
-+			       b->c.btree_id, b->c.level, b->data->min_key);
-+}
-+
-+/* Walk btree, overlaying keys from the journal: */
-+
-+static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b,
-+				struct journal_keys *journal_keys,
-+				enum btree_id btree_id,
-+				btree_walk_node_fn node_fn,
-+				btree_walk_key_fn key_fn)
-+{
-+	struct btree_and_journal_iter iter;
-+	struct bkey_s_c k;
-+	int ret = 0;
-+
-+	bch2_btree_and_journal_iter_init_node_iter(&iter, journal_keys, b);
-+
-+	while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
-+		ret = key_fn(c, btree_id, b->c.level, k);
-+		if (ret)
-+			break;
-+
-+		if (b->c.level) {
-+			struct btree *child;
-+			BKEY_PADDED(k) tmp;
-+
-+			bkey_reassemble(&tmp.k, k);
-+			k = bkey_i_to_s_c(&tmp.k);
-+
-+			bch2_btree_and_journal_iter_advance(&iter);
-+
-+			if (b->c.level > 0) {
-+				child = bch2_btree_node_get_noiter(c, &tmp.k,
-+							b->c.btree_id, b->c.level - 1);
-+				ret = PTR_ERR_OR_ZERO(child);
-+				if (ret)
-+					break;
-+
-+				ret   = (node_fn ? node_fn(c, b) : 0) ?:
-+					bch2_btree_and_journal_walk_recurse(c, child,
-+						journal_keys, btree_id, node_fn, key_fn);
-+				six_unlock_read(&child->c.lock);
-+
-+				if (ret)
-+					break;
-+			}
-+		} else {
-+			bch2_btree_and_journal_iter_advance(&iter);
-+		}
-+	}
-+
-+	return ret;
-+}
-+
-+int bch2_btree_and_journal_walk(struct bch_fs *c, struct journal_keys *journal_keys,
-+				enum btree_id btree_id,
-+				btree_walk_node_fn node_fn,
-+				btree_walk_key_fn key_fn)
-+{
-+	struct btree *b = c->btree_roots[btree_id].b;
-+	int ret = 0;
-+
-+	if (btree_node_fake(b))
-+		return 0;
-+
-+	six_lock_read(&b->c.lock, NULL, NULL);
-+	ret   = (node_fn ? node_fn(c, b) : 0) ?:
-+		bch2_btree_and_journal_walk_recurse(c, b, journal_keys, btree_id,
-+						    node_fn, key_fn) ?:
-+		key_fn(c, btree_id, b->c.level + 1, bkey_i_to_s_c(&b->key));
-+	six_unlock_read(&b->c.lock);
-+
-+	return ret;
-+}
-+
-+/* sort and dedup all keys in the journal: */
-+
-+void bch2_journal_entries_free(struct list_head *list)
-+{
-+
-+	while (!list_empty(list)) {
-+		struct journal_replay *i =
-+			list_first_entry(list, struct journal_replay, list);
-+		list_del(&i->list);
-+		kvpfree(i, offsetof(struct journal_replay, j) +
-+			vstruct_bytes(&i->j));
-+	}
-+}
-+
-+/*
-+ * When keys compare equal, oldest compares first:
-+ */
-+static int journal_sort_key_cmp(const void *_l, const void *_r)
-+{
-+	const struct journal_key *l = _l;
-+	const struct journal_key *r = _r;
-+
-+	return  cmp_int(l->btree_id,	r->btree_id) ?:
-+		cmp_int(l->level,	r->level) ?:
-+		bkey_cmp(l->k->k.p, r->k->k.p) ?:
-+		cmp_int(l->journal_seq, r->journal_seq) ?:
-+		cmp_int(l->journal_offset, r->journal_offset);
-+}
-+
-+void bch2_journal_keys_free(struct journal_keys *keys)
-+{
-+	kvfree(keys->d);
-+	keys->d = NULL;
-+	keys->nr = 0;
-+}
-+
-+static struct journal_keys journal_keys_sort(struct list_head *journal_entries)
-+{
-+	struct journal_replay *p;
-+	struct jset_entry *entry;
-+	struct bkey_i *k, *_n;
-+	struct journal_keys keys = { NULL };
-+	struct journal_key *src, *dst;
-+	size_t nr_keys = 0;
-+
-+	if (list_empty(journal_entries))
-+		return keys;
-+
-+	keys.journal_seq_base =
-+		le64_to_cpu(list_last_entry(journal_entries,
-+				struct journal_replay, list)->j.last_seq);
-+
-+	list_for_each_entry(p, journal_entries, list) {
-+		if (le64_to_cpu(p->j.seq) < keys.journal_seq_base)
-+			continue;
-+
-+		for_each_jset_key(k, _n, entry, &p->j)
-+			nr_keys++;
-+	}
-+
-+
-+	keys.d = kvmalloc(sizeof(keys.d[0]) * nr_keys, GFP_KERNEL);
-+	if (!keys.d)
-+		goto err;
-+
-+	list_for_each_entry(p, journal_entries, list) {
-+		if (le64_to_cpu(p->j.seq) < keys.journal_seq_base)
-+			continue;
-+
-+		for_each_jset_key(k, _n, entry, &p->j)
-+			keys.d[keys.nr++] = (struct journal_key) {
-+				.btree_id	= entry->btree_id,
-+				.level		= entry->level,
-+				.k		= k,
-+				.journal_seq	= le64_to_cpu(p->j.seq) -
-+					keys.journal_seq_base,
-+				.journal_offset	= k->_data - p->j._data,
-+			};
-+	}
-+
-+	sort(keys.d, keys.nr, sizeof(keys.d[0]), journal_sort_key_cmp, NULL);
-+
-+	src = dst = keys.d;
-+	while (src < keys.d + keys.nr) {
-+		while (src + 1 < keys.d + keys.nr &&
-+		       src[0].btree_id	== src[1].btree_id &&
-+		       src[0].level	== src[1].level &&
-+		       !bkey_cmp(src[0].k->k.p, src[1].k->k.p))
-+			src++;
-+
-+		*dst++ = *src++;
-+	}
-+
-+	keys.nr = dst - keys.d;
-+err:
-+	return keys;
-+}
-+
-+/* journal replay: */
-+
-+static void replay_now_at(struct journal *j, u64 seq)
-+{
-+	BUG_ON(seq < j->replay_journal_seq);
-+	BUG_ON(seq > j->replay_journal_seq_end);
-+
-+	while (j->replay_journal_seq < seq)
-+		bch2_journal_pin_put(j, j->replay_journal_seq++);
-+}
-+
-+static int bch2_extent_replay_key(struct bch_fs *c, enum btree_id btree_id,
-+				  struct bkey_i *k)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter, *split_iter;
-+	/*
-+	 * We might cause compressed extents to be split, so we need to pass in
-+	 * a disk_reservation:
-+	 */
-+	struct disk_reservation disk_res =
-+		bch2_disk_reservation_init(c, 0);
-+	struct bkey_i *split;
-+	struct bpos atomic_end;
-+	/*
-+	 * Some extents aren't equivalent - w.r.t. what the triggers do
-+	 * - if they're split:
-+	 */
-+	bool remark_if_split = bch2_bkey_sectors_compressed(bkey_i_to_s_c(k)) ||
-+		k->k.type == KEY_TYPE_reflink_p;
-+	bool remark = false;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-+retry:
-+	bch2_trans_begin(&trans);
-+
-+	iter = bch2_trans_get_iter(&trans, btree_id,
-+				   bkey_start_pos(&k->k),
-+				   BTREE_ITER_INTENT);
-+
-+	do {
-+		ret = bch2_btree_iter_traverse(iter);
-+		if (ret)
-+			goto err;
-+
-+		atomic_end = bpos_min(k->k.p, iter->l[0].b->key.k.p);
-+
-+		split = bch2_trans_kmalloc(&trans, bkey_bytes(&k->k));
-+		ret = PTR_ERR_OR_ZERO(split);
-+		if (ret)
-+			goto err;
-+
-+		if (!remark &&
-+		    remark_if_split &&
-+		    bkey_cmp(atomic_end, k->k.p) < 0) {
-+			ret = bch2_disk_reservation_add(c, &disk_res,
-+					k->k.size *
-+					bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(k)),
-+					BCH_DISK_RESERVATION_NOFAIL);
-+			BUG_ON(ret);
-+
-+			remark = true;
-+		}
-+
-+		bkey_copy(split, k);
-+		bch2_cut_front(iter->pos, split);
-+		bch2_cut_back(atomic_end, split);
-+
-+		split_iter = bch2_trans_copy_iter(&trans, iter);
-+		ret = PTR_ERR_OR_ZERO(split_iter);
-+		if (ret)
-+			goto err;
-+
-+		/*
-+		 * It's important that we don't go through the
-+		 * extent_handle_overwrites() and extent_update_to_keys() path
-+		 * here: journal replay is supposed to treat extents like
-+		 * regular keys
-+		 */
-+		__bch2_btree_iter_set_pos(split_iter, split->k.p, false);
-+		bch2_trans_update(&trans, split_iter, split,
-+				  BTREE_TRIGGER_NORUN);
-+
-+		bch2_btree_iter_set_pos(iter, split->k.p);
-+
-+		if (remark) {
-+			ret = bch2_trans_mark_key(&trans, bkey_i_to_s_c(split),
-+						  0, split->k.size,
-+						  BTREE_TRIGGER_INSERT);
-+			if (ret)
-+				goto err;
-+		}
-+	} while (bkey_cmp(iter->pos, k->k.p) < 0);
-+
-+	if (remark) {
-+		ret = bch2_trans_mark_key(&trans, bkey_i_to_s_c(k),
-+					  0, -((s64) k->k.size),
-+					  BTREE_TRIGGER_OVERWRITE);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	ret = bch2_trans_commit(&trans, &disk_res, NULL,
-+				BTREE_INSERT_NOFAIL|
-+				BTREE_INSERT_LAZY_RW|
-+				BTREE_INSERT_JOURNAL_REPLAY);
-+err:
-+	if (ret == -EINTR)
-+		goto retry;
-+
-+	bch2_disk_reservation_put(c, &disk_res);
-+
-+	return bch2_trans_exit(&trans) ?: ret;
-+}
-+
-+static int __bch2_journal_replay_key(struct btree_trans *trans,
-+				     enum btree_id id, unsigned level,
-+				     struct bkey_i *k)
-+{
-+	struct btree_iter *iter;
-+	int ret;
-+
-+	iter = bch2_trans_get_node_iter(trans, id, k->k.p,
-+					BTREE_MAX_DEPTH, level,
-+					BTREE_ITER_INTENT);
-+	if (IS_ERR(iter))
-+		return PTR_ERR(iter);
-+
-+	/*
-+	 * iter->flags & BTREE_ITER_IS_EXTENTS triggers the update path to run
-+	 * extent_handle_overwrites() and extent_update_to_keys() - but we don't
-+	 * want that here, journal replay is supposed to treat extents like
-+	 * regular keys:
-+	 */
-+	__bch2_btree_iter_set_pos(iter, k->k.p, false);
-+
-+	ret   = bch2_btree_iter_traverse(iter) ?:
-+		bch2_trans_update(trans, iter, k, BTREE_TRIGGER_NORUN);
-+	bch2_trans_iter_put(trans, iter);
-+	return ret;
-+}
-+
-+static int bch2_journal_replay_key(struct bch_fs *c, enum btree_id id,
-+				   unsigned level, struct bkey_i *k)
-+{
-+	return bch2_trans_do(c, NULL, NULL,
-+			     BTREE_INSERT_NOFAIL|
-+			     BTREE_INSERT_LAZY_RW|
-+			     BTREE_INSERT_JOURNAL_REPLAY,
-+			     __bch2_journal_replay_key(&trans, id, level, k));
-+}
-+
-+static int __bch2_alloc_replay_key(struct btree_trans *trans, struct bkey_i *k)
-+{
-+	struct btree_iter *iter;
-+	int ret;
-+
-+	iter = bch2_trans_get_iter(trans, BTREE_ID_ALLOC, k->k.p,
-+				   BTREE_ITER_CACHED|
-+				   BTREE_ITER_CACHED_NOFILL|
-+				   BTREE_ITER_INTENT);
-+	ret =   PTR_ERR_OR_ZERO(iter) ?:
-+		bch2_trans_update(trans, iter, k, BTREE_TRIGGER_NORUN);
-+	bch2_trans_iter_put(trans, iter);
-+	return ret;
-+}
-+
-+static int bch2_alloc_replay_key(struct bch_fs *c, struct bkey_i *k)
-+{
-+	return bch2_trans_do(c, NULL, NULL,
-+			     BTREE_INSERT_NOFAIL|
-+			     BTREE_INSERT_USE_RESERVE|
-+			     BTREE_INSERT_LAZY_RW|
-+			     BTREE_INSERT_JOURNAL_REPLAY,
-+			__bch2_alloc_replay_key(&trans, k));
-+}
-+
-+static int journal_sort_seq_cmp(const void *_l, const void *_r)
-+{
-+	const struct journal_key *l = _l;
-+	const struct journal_key *r = _r;
-+
-+	return  cmp_int(r->level,	l->level) ?:
-+		cmp_int(l->journal_seq, r->journal_seq) ?:
-+		cmp_int(l->btree_id,	r->btree_id) ?:
-+		bkey_cmp(l->k->k.p,	r->k->k.p);
-+}
-+
-+static int bch2_journal_replay(struct bch_fs *c,
-+			       struct journal_keys keys)
-+{
-+	struct journal *j = &c->journal;
-+	struct journal_key *i;
-+	u64 seq;
-+	int ret;
-+
-+	sort(keys.d, keys.nr, sizeof(keys.d[0]), journal_sort_seq_cmp, NULL);
-+
-+	if (keys.nr)
-+		replay_now_at(j, keys.journal_seq_base);
-+
-+	seq = j->replay_journal_seq;
-+
-+	/*
-+	 * First replay updates to the alloc btree - these will only update the
-+	 * btree key cache:
-+	 */
-+	for_each_journal_key(keys, i) {
-+		cond_resched();
-+
-+		if (!i->level && i->btree_id == BTREE_ID_ALLOC) {
-+			j->replay_journal_seq = keys.journal_seq_base + i->journal_seq;
-+			ret = bch2_alloc_replay_key(c, i->k);
-+			if (ret)
-+				goto err;
-+		}
-+	}
-+
-+	/*
-+	 * Next replay updates to interior btree nodes:
-+	 */
-+	for_each_journal_key(keys, i) {
-+		cond_resched();
-+
-+		if (i->level) {
-+			j->replay_journal_seq = keys.journal_seq_base + i->journal_seq;
-+			ret = bch2_journal_replay_key(c, i->btree_id, i->level, i->k);
-+			if (ret)
-+				goto err;
-+		}
-+	}
-+
-+	/*
-+	 * Now that the btree is in a consistent state, we can start journal
-+	 * reclaim (which will be flushing entries from the btree key cache back
-+	 * to the btree:
-+	 */
-+	set_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags);
-+	set_bit(JOURNAL_RECLAIM_STARTED, &j->flags);
-+
-+	j->replay_journal_seq = seq;
-+
-+	/*
-+	 * Now replay leaf node updates:
-+	 */
-+	for_each_journal_key(keys, i) {
-+		cond_resched();
-+
-+		if (i->level || i->btree_id == BTREE_ID_ALLOC)
-+			continue;
-+
-+		replay_now_at(j, keys.journal_seq_base + i->journal_seq);
-+
-+		ret = i->k->k.size
-+			? bch2_extent_replay_key(c, i->btree_id, i->k)
-+			: bch2_journal_replay_key(c, i->btree_id, i->level, i->k);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	replay_now_at(j, j->replay_journal_seq_end);
-+	j->replay_journal_seq = 0;
-+
-+	bch2_journal_set_replay_done(j);
-+	bch2_journal_flush_all_pins(j);
-+	return bch2_journal_error(j);
-+err:
-+	bch_err(c, "journal replay: error %d while replaying key", ret);
-+	return ret;
-+}
-+
-+static bool journal_empty(struct list_head *journal)
-+{
-+	return list_empty(journal) ||
-+		journal_entry_empty(&list_last_entry(journal,
-+					struct journal_replay, list)->j);
-+}
-+
-+static int
-+verify_journal_entries_not_blacklisted_or_missing(struct bch_fs *c,
-+						  struct list_head *journal)
-+{
-+	struct journal_replay *i =
-+		list_last_entry(journal, struct journal_replay, list);
-+	u64 start_seq	= le64_to_cpu(i->j.last_seq);
-+	u64 end_seq	= le64_to_cpu(i->j.seq);
-+	u64 seq		= start_seq;
-+	int ret = 0;
-+
-+	list_for_each_entry(i, journal, list) {
-+		if (le64_to_cpu(i->j.seq) < start_seq)
-+			continue;
-+
-+		fsck_err_on(seq != le64_to_cpu(i->j.seq), c,
-+			"journal entries %llu-%llu missing! (replaying %llu-%llu)",
-+			seq, le64_to_cpu(i->j.seq) - 1,
-+			start_seq, end_seq);
-+
-+		seq = le64_to_cpu(i->j.seq);
-+
-+		fsck_err_on(bch2_journal_seq_is_blacklisted(c, seq, false), c,
-+			    "found blacklisted journal entry %llu", seq);
-+
-+		do {
-+			seq++;
-+		} while (bch2_journal_seq_is_blacklisted(c, seq, false));
-+	}
-+fsck_err:
-+	return ret;
-+}
-+
-+/* journal replay early: */
-+
-+static int journal_replay_entry_early(struct bch_fs *c,
-+				      struct jset_entry *entry)
-+{
-+	int ret = 0;
-+
-+	switch (entry->type) {
-+	case BCH_JSET_ENTRY_btree_root: {
-+		struct btree_root *r;
-+
-+		if (entry->btree_id >= BTREE_ID_NR) {
-+			bch_err(c, "filesystem has unknown btree type %u",
-+				entry->btree_id);
-+			return -EINVAL;
-+		}
-+
-+		r = &c->btree_roots[entry->btree_id];
-+
-+		if (entry->u64s) {
-+			r->level = entry->level;
-+			bkey_copy(&r->key, &entry->start[0]);
-+			r->error = 0;
-+		} else {
-+			r->error = -EIO;
-+		}
-+		r->alive = true;
-+		break;
-+	}
-+	case BCH_JSET_ENTRY_usage: {
-+		struct jset_entry_usage *u =
-+			container_of(entry, struct jset_entry_usage, entry);
-+
-+		switch (entry->btree_id) {
-+		case FS_USAGE_RESERVED:
-+			if (entry->level < BCH_REPLICAS_MAX)
-+				c->usage_base->persistent_reserved[entry->level] =
-+					le64_to_cpu(u->v);
-+			break;
-+		case FS_USAGE_INODES:
-+			c->usage_base->nr_inodes = le64_to_cpu(u->v);
-+			break;
-+		case FS_USAGE_KEY_VERSION:
-+			atomic64_set(&c->key_version,
-+				     le64_to_cpu(u->v));
-+			break;
-+		}
-+
-+		break;
-+	}
-+	case BCH_JSET_ENTRY_data_usage: {
-+		struct jset_entry_data_usage *u =
-+			container_of(entry, struct jset_entry_data_usage, entry);
-+		ret = bch2_replicas_set_usage(c, &u->r,
-+					      le64_to_cpu(u->v));
-+		break;
-+	}
-+	case BCH_JSET_ENTRY_blacklist: {
-+		struct jset_entry_blacklist *bl_entry =
-+			container_of(entry, struct jset_entry_blacklist, entry);
-+
-+		ret = bch2_journal_seq_blacklist_add(c,
-+				le64_to_cpu(bl_entry->seq),
-+				le64_to_cpu(bl_entry->seq) + 1);
-+		break;
-+	}
-+	case BCH_JSET_ENTRY_blacklist_v2: {
-+		struct jset_entry_blacklist_v2 *bl_entry =
-+			container_of(entry, struct jset_entry_blacklist_v2, entry);
-+
-+		ret = bch2_journal_seq_blacklist_add(c,
-+				le64_to_cpu(bl_entry->start),
-+				le64_to_cpu(bl_entry->end) + 1);
-+		break;
-+	}
-+	}
-+
-+	return ret;
-+}
-+
-+static int journal_replay_early(struct bch_fs *c,
-+				struct bch_sb_field_clean *clean,
-+				struct list_head *journal)
-+{
-+	struct jset_entry *entry;
-+	int ret;
-+
-+	if (clean) {
-+		c->bucket_clock[READ].hand = le16_to_cpu(clean->read_clock);
-+		c->bucket_clock[WRITE].hand = le16_to_cpu(clean->write_clock);
-+
-+		for (entry = clean->start;
-+		     entry != vstruct_end(&clean->field);
-+		     entry = vstruct_next(entry)) {
-+			ret = journal_replay_entry_early(c, entry);
-+			if (ret)
-+				return ret;
-+		}
-+	} else {
-+		struct journal_replay *i =
-+			list_last_entry(journal, struct journal_replay, list);
-+
-+		c->bucket_clock[READ].hand = le16_to_cpu(i->j.read_clock);
-+		c->bucket_clock[WRITE].hand = le16_to_cpu(i->j.write_clock);
-+
-+		list_for_each_entry(i, journal, list)
-+			vstruct_for_each(&i->j, entry) {
-+				ret = journal_replay_entry_early(c, entry);
-+				if (ret)
-+					return ret;
-+			}
-+	}
-+
-+	bch2_fs_usage_initialize(c);
-+
-+	return 0;
-+}
-+
-+/* sb clean section: */
-+
-+static struct bkey_i *btree_root_find(struct bch_fs *c,
-+				      struct bch_sb_field_clean *clean,
-+				      struct jset *j,
-+				      enum btree_id id, unsigned *level)
-+{
-+	struct bkey_i *k;
-+	struct jset_entry *entry, *start, *end;
-+
-+	if (clean) {
-+		start = clean->start;
-+		end = vstruct_end(&clean->field);
-+	} else {
-+		start = j->start;
-+		end = vstruct_last(j);
-+	}
-+
-+	for (entry = start; entry < end; entry = vstruct_next(entry))
-+		if (entry->type == BCH_JSET_ENTRY_btree_root &&
-+		    entry->btree_id == id)
-+			goto found;
-+
-+	return NULL;
-+found:
-+	if (!entry->u64s)
-+		return ERR_PTR(-EINVAL);
-+
-+	k = entry->start;
-+	*level = entry->level;
-+	return k;
-+}
-+
-+static int verify_superblock_clean(struct bch_fs *c,
-+				   struct bch_sb_field_clean **cleanp,
-+				   struct jset *j)
-+{
-+	unsigned i;
-+	struct bch_sb_field_clean *clean = *cleanp;
-+	int ret = 0;
-+
-+	if (!c->sb.clean || !j)
-+		return 0;
-+
-+	if (mustfix_fsck_err_on(j->seq != clean->journal_seq, c,
-+			"superblock journal seq (%llu) doesn't match journal (%llu) after clean shutdown",
-+			le64_to_cpu(clean->journal_seq),
-+			le64_to_cpu(j->seq))) {
-+		kfree(clean);
-+		*cleanp = NULL;
-+		return 0;
-+	}
-+
-+	mustfix_fsck_err_on(j->read_clock != clean->read_clock, c,
-+			"superblock read clock %u doesn't match journal %u after clean shutdown",
-+			clean->read_clock, j->read_clock);
-+	mustfix_fsck_err_on(j->write_clock != clean->write_clock, c,
-+			"superblock write clock %u doesn't match journal %u after clean shutdown",
-+			clean->write_clock, j->write_clock);
-+
-+	for (i = 0; i < BTREE_ID_NR; i++) {
-+		char buf1[200], buf2[200];
-+		struct bkey_i *k1, *k2;
-+		unsigned l1 = 0, l2 = 0;
-+
-+		k1 = btree_root_find(c, clean, NULL, i, &l1);
-+		k2 = btree_root_find(c, NULL, j, i, &l2);
-+
-+		if (!k1 && !k2)
-+			continue;
-+
-+		mustfix_fsck_err_on(!k1 || !k2 ||
-+				    IS_ERR(k1) ||
-+				    IS_ERR(k2) ||
-+				    k1->k.u64s != k2->k.u64s ||
-+				    memcmp(k1, k2, bkey_bytes(k1)) ||
-+				    l1 != l2, c,
-+			"superblock btree root %u doesn't match journal after clean shutdown\n"
-+			"sb:      l=%u %s\n"
-+			"journal: l=%u %s\n", i,
-+			l1, (bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(k1)), buf1),
-+			l2, (bch2_bkey_val_to_text(&PBUF(buf2), c, bkey_i_to_s_c(k2)), buf2));
-+	}
-+fsck_err:
-+	return ret;
-+}
-+
-+static struct bch_sb_field_clean *read_superblock_clean(struct bch_fs *c)
-+{
-+	struct bch_sb_field_clean *clean, *sb_clean;
-+	int ret;
-+
-+	mutex_lock(&c->sb_lock);
-+	sb_clean = bch2_sb_get_clean(c->disk_sb.sb);
-+
-+	if (fsck_err_on(!sb_clean, c,
-+			"superblock marked clean but clean section not present")) {
-+		SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
-+		c->sb.clean = false;
-+		mutex_unlock(&c->sb_lock);
-+		return NULL;
-+	}
-+
-+	clean = kmemdup(sb_clean, vstruct_bytes(&sb_clean->field),
-+			GFP_KERNEL);
-+	if (!clean) {
-+		mutex_unlock(&c->sb_lock);
-+		return ERR_PTR(-ENOMEM);
-+	}
-+
-+	if (le16_to_cpu(c->disk_sb.sb->version) <
-+	    bcachefs_metadata_version_bkey_renumber)
-+		bch2_sb_clean_renumber(clean, READ);
-+
-+	mutex_unlock(&c->sb_lock);
-+
-+	return clean;
-+fsck_err:
-+	mutex_unlock(&c->sb_lock);
-+	return ERR_PTR(ret);
-+}
-+
-+static int read_btree_roots(struct bch_fs *c)
-+{
-+	unsigned i;
-+	int ret = 0;
-+
-+	for (i = 0; i < BTREE_ID_NR; i++) {
-+		struct btree_root *r = &c->btree_roots[i];
-+
-+		if (!r->alive)
-+			continue;
-+
-+		if (i == BTREE_ID_ALLOC &&
-+		    c->opts.reconstruct_alloc) {
-+			c->sb.compat &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
-+			continue;
-+		}
-+
-+
-+		if (r->error) {
-+			__fsck_err(c, i == BTREE_ID_ALLOC
-+				   ? FSCK_CAN_IGNORE : 0,
-+				   "invalid btree root %s",
-+				   bch2_btree_ids[i]);
-+			if (i == BTREE_ID_ALLOC)
-+				c->sb.compat &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
-+		}
-+
-+		ret = bch2_btree_root_read(c, i, &r->key, r->level);
-+		if (ret) {
-+			__fsck_err(c, i == BTREE_ID_ALLOC
-+				   ? FSCK_CAN_IGNORE : 0,
-+				   "error reading btree root %s",
-+				   bch2_btree_ids[i]);
-+			if (i == BTREE_ID_ALLOC)
-+				c->sb.compat &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
-+		}
-+	}
-+
-+	for (i = 0; i < BTREE_ID_NR; i++)
-+		if (!c->btree_roots[i].b)
-+			bch2_btree_root_alloc(c, i);
-+fsck_err:
-+	return ret;
-+}
-+
-+int bch2_fs_recovery(struct bch_fs *c)
-+{
-+	const char *err = "cannot allocate memory";
-+	struct bch_sb_field_clean *clean = NULL;
-+	u64 journal_seq;
-+	bool write_sb = false, need_write_alloc = false;
-+	int ret;
-+
-+	if (c->sb.clean)
-+		clean = read_superblock_clean(c);
-+	ret = PTR_ERR_OR_ZERO(clean);
-+	if (ret)
-+		goto err;
-+
-+	if (c->sb.clean)
-+		bch_info(c, "recovering from clean shutdown, journal seq %llu",
-+			 le64_to_cpu(clean->journal_seq));
-+
-+	if (!c->replicas.entries ||
-+	    c->opts.rebuild_replicas) {
-+		bch_info(c, "building replicas info");
-+		set_bit(BCH_FS_REBUILD_REPLICAS, &c->flags);
-+	}
-+
-+	if (!c->sb.clean || c->opts.fsck || c->opts.keep_journal) {
-+		struct jset *j;
-+
-+		ret = bch2_journal_read(c, &c->journal_entries);
-+		if (ret)
-+			goto err;
-+
-+		if (mustfix_fsck_err_on(c->sb.clean && !journal_empty(&c->journal_entries), c,
-+				"filesystem marked clean but journal not empty")) {
-+			c->sb.compat &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
-+			SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
-+			c->sb.clean = false;
-+		}
-+
-+		if (!c->sb.clean && list_empty(&c->journal_entries)) {
-+			bch_err(c, "no journal entries found");
-+			ret = BCH_FSCK_REPAIR_IMPOSSIBLE;
-+			goto err;
-+		}
-+
-+		c->journal_keys = journal_keys_sort(&c->journal_entries);
-+		if (!c->journal_keys.d) {
-+			ret = -ENOMEM;
-+			goto err;
-+		}
-+
-+		j = &list_last_entry(&c->journal_entries,
-+				     struct journal_replay, list)->j;
-+
-+		ret = verify_superblock_clean(c, &clean, j);
-+		if (ret)
-+			goto err;
-+
-+		journal_seq = le64_to_cpu(j->seq) + 1;
-+	} else {
-+		journal_seq = le64_to_cpu(clean->journal_seq) + 1;
-+	}
-+
-+	if (!c->sb.clean &&
-+	    !(c->sb.features & (1ULL << BCH_FEATURE_extents_above_btree_updates))) {
-+		bch_err(c, "filesystem needs recovery from older version; run fsck from older bcachefs-tools to fix");
-+		ret = -EINVAL;
-+		goto err;
-+	}
-+
-+	ret = journal_replay_early(c, clean, &c->journal_entries);
-+	if (ret)
-+		goto err;
-+
-+	if (!c->sb.clean) {
-+		ret = bch2_journal_seq_blacklist_add(c,
-+						     journal_seq,
-+						     journal_seq + 4);
-+		if (ret) {
-+			bch_err(c, "error creating new journal seq blacklist entry");
-+			goto err;
-+		}
-+
-+		journal_seq += 4;
-+
-+		/*
-+		 * The superblock needs to be written before we do any btree
-+		 * node writes: it will be in the read_write() path
-+		 */
-+	}
-+
-+	ret = bch2_blacklist_table_initialize(c);
-+
-+	if (!list_empty(&c->journal_entries)) {
-+		ret = verify_journal_entries_not_blacklisted_or_missing(c,
-+							&c->journal_entries);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	ret = bch2_fs_journal_start(&c->journal, journal_seq,
-+				    &c->journal_entries);
-+	if (ret)
-+		goto err;
-+
-+	ret = read_btree_roots(c);
-+	if (ret)
-+		goto err;
-+
-+	bch_verbose(c, "starting alloc read");
-+	err = "error reading allocation information";
-+	ret = bch2_alloc_read(c, &c->journal_keys);
-+	if (ret)
-+		goto err;
-+	bch_verbose(c, "alloc read done");
-+
-+	bch_verbose(c, "starting stripes_read");
-+	err = "error reading stripes";
-+	ret = bch2_stripes_read(c, &c->journal_keys);
-+	if (ret)
-+		goto err;
-+	bch_verbose(c, "stripes_read done");
-+
-+	set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
-+
-+	if ((c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)) &&
-+	    !(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_METADATA))) {
-+		/*
-+		 * interior btree node updates aren't consistent with the
-+		 * journal; after an unclean shutdown we have to walk all
-+		 * pointers to metadata:
-+		 */
-+		bch_info(c, "starting metadata mark and sweep");
-+		err = "error in mark and sweep";
-+		ret = bch2_gc(c, &c->journal_keys, true, true);
-+		if (ret < 0)
-+			goto err;
-+		if (ret)
-+			need_write_alloc = true;
-+		bch_verbose(c, "mark and sweep done");
-+	}
-+
-+	if (c->opts.fsck ||
-+	    !(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)) ||
-+	    test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags)) {
-+		bch_info(c, "starting mark and sweep");
-+		err = "error in mark and sweep";
-+		ret = bch2_gc(c, &c->journal_keys, true, false);
-+		if (ret < 0)
-+			goto err;
-+		if (ret)
-+			need_write_alloc = true;
-+		bch_verbose(c, "mark and sweep done");
-+	}
-+
-+	clear_bit(BCH_FS_REBUILD_REPLICAS, &c->flags);
-+	set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
-+
-+	/*
-+	 * Skip past versions that might have possibly been used (as nonces),
-+	 * but hadn't had their pointers written:
-+	 */
-+	if (c->sb.encryption_type && !c->sb.clean)
-+		atomic64_add(1 << 16, &c->key_version);
-+
-+	if (c->opts.norecovery)
-+		goto out;
-+
-+	bch_verbose(c, "starting journal replay");
-+	err = "journal replay failed";
-+	ret = bch2_journal_replay(c, c->journal_keys);
-+	if (ret)
-+		goto err;
-+	bch_verbose(c, "journal replay done");
-+
-+	if (need_write_alloc && !c->opts.nochanges) {
-+		/*
-+		 * note that even when filesystem was clean there might be work
-+		 * to do here, if we ran gc (because of fsck) which recalculated
-+		 * oldest_gen:
-+		 */
-+		bch_verbose(c, "writing allocation info");
-+		err = "error writing out alloc info";
-+		ret = bch2_stripes_write(c, BTREE_INSERT_LAZY_RW) ?:
-+			bch2_alloc_write(c, BTREE_INSERT_LAZY_RW);
-+		if (ret) {
-+			bch_err(c, "error writing alloc info");
-+			goto err;
-+		}
-+		bch_verbose(c, "alloc write done");
-+
-+		set_bit(BCH_FS_ALLOC_WRITTEN, &c->flags);
-+	}
-+
-+	if (!c->sb.clean) {
-+		if (!(c->sb.features & (1 << BCH_FEATURE_atomic_nlink))) {
-+			bch_info(c, "checking inode link counts");
-+			err = "error in recovery";
-+			ret = bch2_fsck_inode_nlink(c);
-+			if (ret)
-+				goto err;
-+			bch_verbose(c, "check inodes done");
-+
-+		} else {
-+			bch_verbose(c, "checking for deleted inodes");
-+			err = "error in recovery";
-+			ret = bch2_fsck_walk_inodes_only(c);
-+			if (ret)
-+				goto err;
-+			bch_verbose(c, "check inodes done");
-+		}
-+	}
-+
-+	if (c->opts.fsck) {
-+		bch_info(c, "starting fsck");
-+		err = "error in fsck";
-+		ret = bch2_fsck_full(c);
-+		if (ret)
-+			goto err;
-+		bch_verbose(c, "fsck done");
-+	}
-+
-+	if (enabled_qtypes(c)) {
-+		bch_verbose(c, "reading quotas");
-+		ret = bch2_fs_quota_read(c);
-+		if (ret)
-+			goto err;
-+		bch_verbose(c, "quotas done");
-+	}
-+
-+	mutex_lock(&c->sb_lock);
-+	if (c->opts.version_upgrade) {
-+		if (c->sb.version < bcachefs_metadata_version_new_versioning)
-+			c->disk_sb.sb->version_min =
-+				le16_to_cpu(bcachefs_metadata_version_min);
-+		c->disk_sb.sb->version = le16_to_cpu(bcachefs_metadata_version_current);
-+		c->disk_sb.sb->features[0] |= BCH_SB_FEATURES_ALL;
-+		write_sb = true;
-+	}
-+
-+	if (!test_bit(BCH_FS_ERROR, &c->flags)) {
-+		c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_ALLOC_INFO;
-+		write_sb = true;
-+	}
-+
-+	if (c->opts.fsck &&
-+	    !test_bit(BCH_FS_ERROR, &c->flags)) {
-+		c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_atomic_nlink;
-+		SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 0);
-+		write_sb = true;
-+	}
-+
-+	if (write_sb)
-+		bch2_write_super(c);
-+	mutex_unlock(&c->sb_lock);
-+
-+	if (c->journal_seq_blacklist_table &&
-+	    c->journal_seq_blacklist_table->nr > 128)
-+		queue_work(system_long_wq, &c->journal_seq_blacklist_gc_work);
-+out:
-+	ret = 0;
-+err:
-+fsck_err:
-+	set_bit(BCH_FS_FSCK_DONE, &c->flags);
-+	bch2_flush_fsck_errs(c);
-+
-+	if (!c->opts.keep_journal) {
-+		bch2_journal_keys_free(&c->journal_keys);
-+		bch2_journal_entries_free(&c->journal_entries);
-+	}
-+	kfree(clean);
-+	if (ret)
-+		bch_err(c, "Error in recovery: %s (%i)", err, ret);
-+	else
-+		bch_verbose(c, "ret %i", ret);
-+	return ret;
-+}
-+
-+int bch2_fs_initialize(struct bch_fs *c)
-+{
-+	struct bch_inode_unpacked root_inode, lostfound_inode;
-+	struct bkey_inode_buf packed_inode;
-+	struct qstr lostfound = QSTR("lost+found");
-+	const char *err = "cannot allocate memory";
-+	struct bch_dev *ca;
-+	LIST_HEAD(journal);
-+	unsigned i;
-+	int ret;
-+
-+	bch_notice(c, "initializing new filesystem");
-+
-+	mutex_lock(&c->sb_lock);
-+	for_each_online_member(ca, c, i)
-+		bch2_mark_dev_superblock(c, ca, 0);
-+	mutex_unlock(&c->sb_lock);
-+
-+	mutex_lock(&c->sb_lock);
-+	c->disk_sb.sb->version = c->disk_sb.sb->version_min =
-+		le16_to_cpu(bcachefs_metadata_version_current);
-+	c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_atomic_nlink;
-+	c->disk_sb.sb->features[0] |= BCH_SB_FEATURES_ALL;
-+
-+	bch2_write_super(c);
-+	mutex_unlock(&c->sb_lock);
-+
-+	set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
-+	set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
-+
-+	for (i = 0; i < BTREE_ID_NR; i++)
-+		bch2_btree_root_alloc(c, i);
-+
-+	set_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags);
-+	set_bit(JOURNAL_RECLAIM_STARTED, &c->journal.flags);
-+
-+	err = "unable to allocate journal buckets";
-+	for_each_online_member(ca, c, i) {
-+		ret = bch2_dev_journal_alloc(ca);
-+		if (ret) {
-+			percpu_ref_put(&ca->io_ref);
-+			goto err;
-+		}
-+	}
-+
-+	/*
-+	 * journal_res_get() will crash if called before this has
-+	 * set up the journal.pin FIFO and journal.cur pointer:
-+	 */
-+	bch2_fs_journal_start(&c->journal, 1, &journal);
-+	bch2_journal_set_replay_done(&c->journal);
-+
-+	err = "error going read-write";
-+	ret = bch2_fs_read_write_early(c);
-+	if (ret)
-+		goto err;
-+
-+	/*
-+	 * Write out the superblock and journal buckets, now that we can do
-+	 * btree updates
-+	 */
-+	err = "error writing alloc info";
-+	ret = bch2_alloc_write(c, 0);
-+	if (ret)
-+		goto err;
-+
-+	bch2_inode_init(c, &root_inode, 0, 0,
-+			S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0, NULL);
-+	root_inode.bi_inum = BCACHEFS_ROOT_INO;
-+	bch2_inode_pack(&packed_inode, &root_inode);
-+
-+	err = "error creating root directory";
-+	ret = bch2_btree_insert(c, BTREE_ID_INODES,
-+				&packed_inode.inode.k_i,
-+				NULL, NULL, 0);
-+	if (ret)
-+		goto err;
-+
-+	bch2_inode_init_early(c, &lostfound_inode);
-+
-+	err = "error creating lost+found";
-+	ret = bch2_trans_do(c, NULL, NULL, 0,
-+		bch2_create_trans(&trans, BCACHEFS_ROOT_INO,
-+				  &root_inode, &lostfound_inode,
-+				  &lostfound,
-+				  0, 0, S_IFDIR|0700, 0,
-+				  NULL, NULL));
-+	if (ret)
-+		goto err;
-+
-+	if (enabled_qtypes(c)) {
-+		ret = bch2_fs_quota_read(c);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	err = "error writing first journal entry";
-+	ret = bch2_journal_meta(&c->journal);
-+	if (ret)
-+		goto err;
-+
-+	mutex_lock(&c->sb_lock);
-+	SET_BCH_SB_INITIALIZED(c->disk_sb.sb, true);
-+	SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
-+
-+	bch2_write_super(c);
-+	mutex_unlock(&c->sb_lock);
-+
-+	return 0;
-+err:
-+	pr_err("Error initializing new filesystem: %s (%i)", err, ret);
-+	return ret;
-+}
-diff --git a/fs/bcachefs/recovery.h b/fs/bcachefs/recovery.h
-new file mode 100644
-index 000000000000..a66827c9addf
---- /dev/null
-+++ b/fs/bcachefs/recovery.h
-@@ -0,0 +1,60 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_RECOVERY_H
-+#define _BCACHEFS_RECOVERY_H
-+
-+#define for_each_journal_key(keys, i)				\
-+	for (i = (keys).d; i < (keys).d + (keys).nr; (i)++)
-+
-+struct journal_iter {
-+	enum btree_id		btree_id;
-+	unsigned		level;
-+	struct journal_keys	*keys;
-+	struct journal_key	*k;
-+};
-+
-+/*
-+ * Iterate over keys in the btree, with keys from the journal overlaid on top:
-+ */
-+
-+struct btree_and_journal_iter {
-+	struct btree_iter	*btree;
-+
-+	struct btree		*b;
-+	struct btree_node_iter	node_iter;
-+	struct bkey		unpacked;
-+
-+	struct journal_iter	journal;
-+
-+	enum last_key_returned {
-+		none,
-+		btree,
-+		journal,
-+	}			last;
-+};
-+
-+void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *);
-+struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *);
-+struct bkey_s_c bch2_btree_and_journal_iter_next(struct btree_and_journal_iter *);
-+
-+void bch2_btree_and_journal_iter_init(struct btree_and_journal_iter *,
-+				      struct btree_trans *,
-+				      struct journal_keys *,
-+				      enum btree_id, struct bpos);
-+void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
-+						struct journal_keys *,
-+						struct btree *);
-+
-+typedef int (*btree_walk_node_fn)(struct bch_fs *c, struct btree *b);
-+typedef int (*btree_walk_key_fn)(struct bch_fs *c, enum btree_id id,
-+				 unsigned level, struct bkey_s_c k);
-+
-+int bch2_btree_and_journal_walk(struct bch_fs *, struct journal_keys *, enum btree_id,
-+				btree_walk_node_fn, btree_walk_key_fn);
-+
-+void bch2_journal_keys_free(struct journal_keys *);
-+void bch2_journal_entries_free(struct list_head *);
-+
-+int bch2_fs_recovery(struct bch_fs *);
-+int bch2_fs_initialize(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_RECOVERY_H */
-diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c
-new file mode 100644
-index 000000000000..3c473f1380a6
---- /dev/null
-+++ b/fs/bcachefs/reflink.c
-@@ -0,0 +1,303 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "bkey_on_stack.h"
-+#include "btree_update.h"
-+#include "extents.h"
-+#include "inode.h"
-+#include "io.h"
-+#include "reflink.h"
-+
-+#include <linux/sched/signal.h>
-+
-+/* reflink pointers */
-+
-+const char *bch2_reflink_p_invalid(const struct bch_fs *c, struct bkey_s_c k)
-+{
-+	struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
-+
-+	if (bkey_val_bytes(p.k) != sizeof(*p.v))
-+		return "incorrect value size";
-+
-+	return NULL;
-+}
-+
-+void bch2_reflink_p_to_text(struct printbuf *out, struct bch_fs *c,
-+			    struct bkey_s_c k)
-+{
-+	struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
-+
-+	pr_buf(out, "idx %llu", le64_to_cpu(p.v->idx));
-+}
-+
-+enum merge_result bch2_reflink_p_merge(struct bch_fs *c,
-+				       struct bkey_s _l, struct bkey_s _r)
-+{
-+	struct bkey_s_reflink_p l = bkey_s_to_reflink_p(_l);
-+	struct bkey_s_reflink_p r = bkey_s_to_reflink_p(_r);
-+
-+	if (le64_to_cpu(l.v->idx) + l.k->size != le64_to_cpu(r.v->idx))
-+		return BCH_MERGE_NOMERGE;
-+
-+	if ((u64) l.k->size + r.k->size > KEY_SIZE_MAX) {
-+		bch2_key_resize(l.k, KEY_SIZE_MAX);
-+		bch2_cut_front_s(l.k->p, _r);
-+		return BCH_MERGE_PARTIAL;
-+	}
-+
-+	bch2_key_resize(l.k, l.k->size + r.k->size);
-+
-+	return BCH_MERGE_MERGE;
-+}
-+
-+/* indirect extents */
-+
-+const char *bch2_reflink_v_invalid(const struct bch_fs *c, struct bkey_s_c k)
-+{
-+	struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(k);
-+
-+	if (bkey_val_bytes(r.k) < sizeof(*r.v))
-+		return "incorrect value size";
-+
-+	return bch2_bkey_ptrs_invalid(c, k);
-+}
-+
-+void bch2_reflink_v_to_text(struct printbuf *out, struct bch_fs *c,
-+			    struct bkey_s_c k)
-+{
-+	struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(k);
-+
-+	pr_buf(out, "refcount: %llu ", le64_to_cpu(r.v->refcount));
-+
-+	bch2_bkey_ptrs_to_text(out, c, k);
-+}
-+
-+static int bch2_make_extent_indirect(struct btree_trans *trans,
-+				     struct btree_iter *extent_iter,
-+				     struct bkey_i_extent *e)
-+{
-+	struct bch_fs *c = trans->c;
-+	struct btree_iter *reflink_iter;
-+	struct bkey_s_c k;
-+	struct bkey_i_reflink_v *r_v;
-+	struct bkey_i_reflink_p *r_p;
-+	int ret;
-+
-+	for_each_btree_key(trans, reflink_iter, BTREE_ID_REFLINK,
-+			   POS(0, c->reflink_hint),
-+			   BTREE_ITER_INTENT|BTREE_ITER_SLOTS, k, ret) {
-+		if (reflink_iter->pos.inode) {
-+			bch2_btree_iter_set_pos(reflink_iter, POS_MIN);
-+			continue;
-+		}
-+
-+		if (bkey_deleted(k.k) && e->k.size <= k.k->size)
-+			break;
-+	}
-+
-+	if (ret)
-+		goto err;
-+
-+	/* rewind iter to start of hole, if necessary: */
-+	bch2_btree_iter_set_pos(reflink_iter, bkey_start_pos(k.k));
-+
-+	r_v = bch2_trans_kmalloc(trans, sizeof(*r_v) + bkey_val_bytes(&e->k));
-+	ret = PTR_ERR_OR_ZERO(r_v);
-+	if (ret)
-+		goto err;
-+
-+	bkey_reflink_v_init(&r_v->k_i);
-+	r_v->k.p	= reflink_iter->pos;
-+	bch2_key_resize(&r_v->k, e->k.size);
-+	r_v->k.version	= e->k.version;
-+
-+	set_bkey_val_u64s(&r_v->k, bkey_val_u64s(&r_v->k) +
-+			  bkey_val_u64s(&e->k));
-+	r_v->v.refcount	= 0;
-+	memcpy(r_v->v.start, e->v.start, bkey_val_bytes(&e->k));
-+
-+	bch2_trans_update(trans, reflink_iter, &r_v->k_i, 0);
-+
-+	r_p = bch2_trans_kmalloc(trans, sizeof(*r_p));
-+	if (IS_ERR(r_p))
-+		return PTR_ERR(r_p);
-+
-+	e->k.type = KEY_TYPE_reflink_p;
-+	r_p = bkey_i_to_reflink_p(&e->k_i);
-+	set_bkey_val_bytes(&r_p->k, sizeof(r_p->v));
-+	r_p->v.idx = cpu_to_le64(bkey_start_offset(&r_v->k));
-+
-+	bch2_trans_update(trans, extent_iter, &r_p->k_i, 0);
-+err:
-+	if (!IS_ERR(reflink_iter))
-+		c->reflink_hint = reflink_iter->pos.offset;
-+	bch2_trans_iter_put(trans, reflink_iter);
-+
-+	return ret;
-+}
-+
-+static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end)
-+{
-+	struct bkey_s_c k = bch2_btree_iter_peek(iter);
-+	int ret;
-+
-+	for_each_btree_key_continue(iter, 0, k, ret) {
-+		if (bkey_cmp(iter->pos, end) >= 0)
-+			return bkey_s_c_null;
-+
-+		if (k.k->type == KEY_TYPE_extent ||
-+		    k.k->type == KEY_TYPE_reflink_p)
-+			break;
-+	}
-+
-+	return k;
-+}
-+
-+s64 bch2_remap_range(struct bch_fs *c,
-+		     struct bpos dst_start, struct bpos src_start,
-+		     u64 remap_sectors, u64 *journal_seq,
-+		     u64 new_i_size, s64 *i_sectors_delta)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *dst_iter, *src_iter;
-+	struct bkey_s_c src_k;
-+	BKEY_PADDED(k) new_dst;
-+	struct bkey_on_stack new_src;
-+	struct bpos dst_end = dst_start, src_end = src_start;
-+	struct bpos dst_want, src_want;
-+	u64 src_done, dst_done;
-+	int ret = 0, ret2 = 0;
-+
-+	if (!c->opts.reflink)
-+		return -EOPNOTSUPP;
-+
-+	if (!percpu_ref_tryget(&c->writes))
-+		return -EROFS;
-+
-+	bch2_check_set_feature(c, BCH_FEATURE_reflink);
-+
-+	dst_end.offset += remap_sectors;
-+	src_end.offset += remap_sectors;
-+
-+	bkey_on_stack_init(&new_src);
-+	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 4096);
-+
-+	src_iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, src_start,
-+				       BTREE_ITER_INTENT);
-+	dst_iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, dst_start,
-+				       BTREE_ITER_INTENT);
-+
-+	while (1) {
-+		bch2_trans_begin(&trans);
-+
-+		trans.mem_top = 0;
-+
-+		if (fatal_signal_pending(current)) {
-+			ret = -EINTR;
-+			goto err;
-+		}
-+
-+		src_k = get_next_src(src_iter, src_end);
-+		ret = bkey_err(src_k);
-+		if (ret)
-+			goto btree_err;
-+
-+		src_done = bpos_min(src_iter->pos, src_end).offset -
-+			src_start.offset;
-+		dst_want = POS(dst_start.inode, dst_start.offset + src_done);
-+
-+		if (bkey_cmp(dst_iter->pos, dst_want) < 0) {
-+			ret = bch2_fpunch_at(&trans, dst_iter, dst_want,
-+					     journal_seq, i_sectors_delta);
-+			if (ret)
-+				goto btree_err;
-+			continue;
-+		}
-+
-+		BUG_ON(bkey_cmp(dst_iter->pos, dst_want));
-+
-+		if (!bkey_cmp(dst_iter->pos, dst_end))
-+			break;
-+
-+		if (src_k.k->type == KEY_TYPE_extent) {
-+			bkey_on_stack_reassemble(&new_src, c, src_k);
-+			src_k = bkey_i_to_s_c(new_src.k);
-+
-+			bch2_cut_front(src_iter->pos,	new_src.k);
-+			bch2_cut_back(src_end,		new_src.k);
-+
-+			ret = bch2_make_extent_indirect(&trans, src_iter,
-+						bkey_i_to_extent(new_src.k));
-+			if (ret)
-+				goto btree_err;
-+
-+			BUG_ON(src_k.k->type != KEY_TYPE_reflink_p);
-+		}
-+
-+		if (src_k.k->type == KEY_TYPE_reflink_p) {
-+			struct bkey_s_c_reflink_p src_p =
-+				bkey_s_c_to_reflink_p(src_k);
-+			struct bkey_i_reflink_p *dst_p =
-+				bkey_reflink_p_init(&new_dst.k);
-+
-+			u64 offset = le64_to_cpu(src_p.v->idx) +
-+				(src_iter->pos.offset -
-+				 bkey_start_offset(src_k.k));
-+
-+			dst_p->v.idx = cpu_to_le64(offset);
-+		} else {
-+			BUG();
-+		}
-+
-+		new_dst.k.k.p = dst_iter->pos;
-+		bch2_key_resize(&new_dst.k.k,
-+				min(src_k.k->p.offset - src_iter->pos.offset,
-+				    dst_end.offset - dst_iter->pos.offset));
-+
-+		ret = bch2_extent_update(&trans, dst_iter, &new_dst.k,
-+					 NULL, journal_seq,
-+					 new_i_size, i_sectors_delta);
-+		if (ret)
-+			goto btree_err;
-+
-+		dst_done = dst_iter->pos.offset - dst_start.offset;
-+		src_want = POS(src_start.inode, src_start.offset + dst_done);
-+		bch2_btree_iter_set_pos(src_iter, src_want);
-+btree_err:
-+		if (ret == -EINTR)
-+			ret = 0;
-+		if (ret)
-+			goto err;
-+	}
-+
-+	BUG_ON(bkey_cmp(dst_iter->pos, dst_end));
-+err:
-+	BUG_ON(bkey_cmp(dst_iter->pos, dst_end) > 0);
-+
-+	dst_done = dst_iter->pos.offset - dst_start.offset;
-+	new_i_size = min(dst_iter->pos.offset << 9, new_i_size);
-+
-+	bch2_trans_begin(&trans);
-+
-+	do {
-+		struct bch_inode_unpacked inode_u;
-+		struct btree_iter *inode_iter;
-+
-+		inode_iter = bch2_inode_peek(&trans, &inode_u,
-+				dst_start.inode, BTREE_ITER_INTENT);
-+		ret2 = PTR_ERR_OR_ZERO(inode_iter);
-+
-+		if (!ret2 &&
-+		    inode_u.bi_size < new_i_size) {
-+			inode_u.bi_size = new_i_size;
-+			ret2  = bch2_inode_write(&trans, inode_iter, &inode_u) ?:
-+				bch2_trans_commit(&trans, NULL, journal_seq, 0);
-+		}
-+	} while (ret2 == -EINTR);
-+
-+	ret = bch2_trans_exit(&trans) ?: ret;
-+	bkey_on_stack_exit(&new_src, c);
-+
-+	percpu_ref_put(&c->writes);
-+
-+	return dst_done ?: ret ?: ret2;
-+}
-diff --git a/fs/bcachefs/reflink.h b/fs/bcachefs/reflink.h
-new file mode 100644
-index 000000000000..5445c1cf0797
---- /dev/null
-+++ b/fs/bcachefs/reflink.h
-@@ -0,0 +1,31 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_REFLINK_H
-+#define _BCACHEFS_REFLINK_H
-+
-+const char *bch2_reflink_p_invalid(const struct bch_fs *, struct bkey_s_c);
-+void bch2_reflink_p_to_text(struct printbuf *, struct bch_fs *,
-+			    struct bkey_s_c);
-+enum merge_result bch2_reflink_p_merge(struct bch_fs *,
-+				       struct bkey_s, struct bkey_s);
-+
-+#define bch2_bkey_ops_reflink_p (struct bkey_ops) {		\
-+	.key_invalid	= bch2_reflink_p_invalid,		\
-+	.val_to_text	= bch2_reflink_p_to_text,		\
-+	.key_merge	= bch2_reflink_p_merge,		\
-+}
-+
-+const char *bch2_reflink_v_invalid(const struct bch_fs *, struct bkey_s_c);
-+void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *,
-+			    struct bkey_s_c);
-+
-+
-+#define bch2_bkey_ops_reflink_v (struct bkey_ops) {		\
-+	.key_invalid	= bch2_reflink_v_invalid,		\
-+	.val_to_text	= bch2_reflink_v_to_text,		\
-+	.swab		= bch2_ptr_swab,			\
-+}
-+
-+s64 bch2_remap_range(struct bch_fs *, struct bpos, struct bpos,
-+		     u64, u64 *, u64, s64 *);
-+
-+#endif /* _BCACHEFS_REFLINK_H */
-diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c
-new file mode 100644
-index 000000000000..6b6506c68609
---- /dev/null
-+++ b/fs/bcachefs/replicas.c
-@@ -0,0 +1,1059 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "buckets.h"
-+#include "journal.h"
-+#include "replicas.h"
-+#include "super-io.h"
-+
-+static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *,
-+					    struct bch_replicas_cpu *);
-+
-+/* Replicas tracking - in memory: */
-+
-+static inline int u8_cmp(u8 l, u8 r)
-+{
-+	return cmp_int(l, r);
-+}
-+
-+static void verify_replicas_entry(struct bch_replicas_entry *e)
-+{
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+	unsigned i;
-+
-+	BUG_ON(e->data_type >= BCH_DATA_NR);
-+	BUG_ON(!e->nr_devs);
-+	BUG_ON(e->nr_required > 1 &&
-+	       e->nr_required >= e->nr_devs);
-+
-+	for (i = 0; i + 1 < e->nr_devs; i++)
-+		BUG_ON(e->devs[i] >= e->devs[i + 1]);
-+#endif
-+}
-+
-+static void replicas_entry_sort(struct bch_replicas_entry *e)
-+{
-+	bubble_sort(e->devs, e->nr_devs, u8_cmp);
-+}
-+
-+static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r)
-+{
-+	eytzinger0_sort(r->entries, r->nr, r->entry_size, memcmp, NULL);
-+}
-+
-+void bch2_replicas_entry_to_text(struct printbuf *out,
-+				 struct bch_replicas_entry *e)
-+{
-+	unsigned i;
-+
-+	pr_buf(out, "%s: %u/%u [",
-+	       bch2_data_types[e->data_type],
-+	       e->nr_required,
-+	       e->nr_devs);
-+
-+	for (i = 0; i < e->nr_devs; i++)
-+		pr_buf(out, i ? " %u" : "%u", e->devs[i]);
-+	pr_buf(out, "]");
-+}
-+
-+void bch2_cpu_replicas_to_text(struct printbuf *out,
-+			      struct bch_replicas_cpu *r)
-+{
-+	struct bch_replicas_entry *e;
-+	bool first = true;
-+
-+	for_each_cpu_replicas_entry(r, e) {
-+		if (!first)
-+			pr_buf(out, " ");
-+		first = false;
-+
-+		bch2_replicas_entry_to_text(out, e);
-+	}
-+}
-+
-+static void extent_to_replicas(struct bkey_s_c k,
-+			       struct bch_replicas_entry *r)
-+{
-+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+	const union bch_extent_entry *entry;
-+	struct extent_ptr_decoded p;
-+
-+	r->nr_required	= 1;
-+
-+	bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
-+		if (p.ptr.cached)
-+			continue;
-+
-+		if (!p.has_ec)
-+			r->devs[r->nr_devs++] = p.ptr.dev;
-+		else
-+			r->nr_required = 0;
-+	}
-+}
-+
-+static void stripe_to_replicas(struct bkey_s_c k,
-+			       struct bch_replicas_entry *r)
-+{
-+	struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
-+	const struct bch_extent_ptr *ptr;
-+
-+	r->nr_required	= s.v->nr_blocks - s.v->nr_redundant;
-+
-+	for (ptr = s.v->ptrs;
-+	     ptr < s.v->ptrs + s.v->nr_blocks;
-+	     ptr++)
-+		r->devs[r->nr_devs++] = ptr->dev;
-+}
-+
-+void bch2_bkey_to_replicas(struct bch_replicas_entry *e,
-+			   struct bkey_s_c k)
-+{
-+	e->nr_devs = 0;
-+
-+	switch (k.k->type) {
-+	case KEY_TYPE_btree_ptr:
-+	case KEY_TYPE_btree_ptr_v2:
-+		e->data_type = BCH_DATA_btree;
-+		extent_to_replicas(k, e);
-+		break;
-+	case KEY_TYPE_extent:
-+	case KEY_TYPE_reflink_v:
-+		e->data_type = BCH_DATA_user;
-+		extent_to_replicas(k, e);
-+		break;
-+	case KEY_TYPE_stripe:
-+		e->data_type = BCH_DATA_user;
-+		stripe_to_replicas(k, e);
-+		break;
-+	}
-+
-+	replicas_entry_sort(e);
-+}
-+
-+void bch2_devlist_to_replicas(struct bch_replicas_entry *e,
-+			      enum bch_data_type data_type,
-+			      struct bch_devs_list devs)
-+{
-+	unsigned i;
-+
-+	BUG_ON(!data_type ||
-+	       data_type == BCH_DATA_sb ||
-+	       data_type >= BCH_DATA_NR);
-+
-+	e->data_type	= data_type;
-+	e->nr_devs	= 0;
-+	e->nr_required	= 1;
-+
-+	for (i = 0; i < devs.nr; i++)
-+		e->devs[e->nr_devs++] = devs.devs[i];
-+
-+	replicas_entry_sort(e);
-+}
-+
-+static struct bch_replicas_cpu
-+cpu_replicas_add_entry(struct bch_replicas_cpu *old,
-+		       struct bch_replicas_entry *new_entry)
-+{
-+	unsigned i;
-+	struct bch_replicas_cpu new = {
-+		.nr		= old->nr + 1,
-+		.entry_size	= max_t(unsigned, old->entry_size,
-+					replicas_entry_bytes(new_entry)),
-+	};
-+
-+	BUG_ON(!new_entry->data_type);
-+	verify_replicas_entry(new_entry);
-+
-+	new.entries = kcalloc(new.nr, new.entry_size, GFP_NOIO);
-+	if (!new.entries)
-+		return new;
-+
-+	for (i = 0; i < old->nr; i++)
-+		memcpy(cpu_replicas_entry(&new, i),
-+		       cpu_replicas_entry(old, i),
-+		       old->entry_size);
-+
-+	memcpy(cpu_replicas_entry(&new, old->nr),
-+	       new_entry,
-+	       replicas_entry_bytes(new_entry));
-+
-+	bch2_cpu_replicas_sort(&new);
-+	return new;
-+}
-+
-+static inline int __replicas_entry_idx(struct bch_replicas_cpu *r,
-+				       struct bch_replicas_entry *search)
-+{
-+	int idx, entry_size = replicas_entry_bytes(search);
-+
-+	if (unlikely(entry_size > r->entry_size))
-+		return -1;
-+
-+	verify_replicas_entry(search);
-+
-+#define entry_cmp(_l, _r, size)	memcmp(_l, _r, entry_size)
-+	idx = eytzinger0_find(r->entries, r->nr, r->entry_size,
-+			      entry_cmp, search);
-+#undef entry_cmp
-+
-+	return idx < r->nr ? idx : -1;
-+}
-+
-+int bch2_replicas_entry_idx(struct bch_fs *c,
-+			    struct bch_replicas_entry *search)
-+{
-+	replicas_entry_sort(search);
-+
-+	return __replicas_entry_idx(&c->replicas, search);
-+}
-+
-+static bool __replicas_has_entry(struct bch_replicas_cpu *r,
-+				 struct bch_replicas_entry *search)
-+{
-+	return __replicas_entry_idx(r, search) >= 0;
-+}
-+
-+bool bch2_replicas_marked(struct bch_fs *c,
-+			  struct bch_replicas_entry *search)
-+{
-+	bool marked;
-+
-+	if (!search->nr_devs)
-+		return true;
-+
-+	verify_replicas_entry(search);
-+
-+	percpu_down_read(&c->mark_lock);
-+	marked = __replicas_has_entry(&c->replicas, search) &&
-+		(likely((!c->replicas_gc.entries)) ||
-+		 __replicas_has_entry(&c->replicas_gc, search));
-+	percpu_up_read(&c->mark_lock);
-+
-+	return marked;
-+}
-+
-+static void __replicas_table_update(struct bch_fs_usage *dst,
-+				    struct bch_replicas_cpu *dst_r,
-+				    struct bch_fs_usage *src,
-+				    struct bch_replicas_cpu *src_r)
-+{
-+	int src_idx, dst_idx;
-+
-+	*dst = *src;
-+
-+	for (src_idx = 0; src_idx < src_r->nr; src_idx++) {
-+		if (!src->replicas[src_idx])
-+			continue;
-+
-+		dst_idx = __replicas_entry_idx(dst_r,
-+				cpu_replicas_entry(src_r, src_idx));
-+		BUG_ON(dst_idx < 0);
-+
-+		dst->replicas[dst_idx] = src->replicas[src_idx];
-+	}
-+}
-+
-+static void __replicas_table_update_pcpu(struct bch_fs_usage __percpu *dst_p,
-+				    struct bch_replicas_cpu *dst_r,
-+				    struct bch_fs_usage __percpu *src_p,
-+				    struct bch_replicas_cpu *src_r)
-+{
-+	unsigned src_nr = sizeof(struct bch_fs_usage) / sizeof(u64) + src_r->nr;
-+	struct bch_fs_usage *dst, *src = (void *)
-+		bch2_acc_percpu_u64s((void *) src_p, src_nr);
-+
-+	preempt_disable();
-+	dst = this_cpu_ptr(dst_p);
-+	preempt_enable();
-+
-+	__replicas_table_update(dst, dst_r, src, src_r);
-+}
-+
-+/*
-+ * Resize filesystem accounting:
-+ */
-+static int replicas_table_update(struct bch_fs *c,
-+				 struct bch_replicas_cpu *new_r)
-+{
-+	struct bch_fs_usage __percpu *new_usage[2] = { NULL, NULL };
-+	struct bch_fs_usage *new_scratch = NULL;
-+	struct bch_fs_usage __percpu *new_gc = NULL;
-+	struct bch_fs_usage *new_base = NULL;
-+	unsigned bytes = sizeof(struct bch_fs_usage) +
-+		sizeof(u64) * new_r->nr;
-+	int ret = -ENOMEM;
-+
-+	if (!(new_base = kzalloc(bytes, GFP_NOIO)) ||
-+	    !(new_usage[0] = __alloc_percpu_gfp(bytes, sizeof(u64),
-+						GFP_NOIO)) ||
-+	    !(new_usage[1] = __alloc_percpu_gfp(bytes, sizeof(u64),
-+						GFP_NOIO)) ||
-+	    !(new_scratch  = kmalloc(bytes, GFP_NOIO)) ||
-+	    (c->usage_gc &&
-+	     !(new_gc = __alloc_percpu_gfp(bytes, sizeof(u64), GFP_NOIO)))) {
-+		bch_err(c, "error updating replicas table: memory allocation failure");
-+		goto err;
-+	}
-+
-+	if (c->usage_base)
-+		__replicas_table_update(new_base,		new_r,
-+					c->usage_base,		&c->replicas);
-+	if (c->usage[0])
-+		__replicas_table_update_pcpu(new_usage[0],	new_r,
-+					     c->usage[0],	&c->replicas);
-+	if (c->usage[1])
-+		__replicas_table_update_pcpu(new_usage[1],	new_r,
-+					     c->usage[1],	&c->replicas);
-+	if (c->usage_gc)
-+		__replicas_table_update_pcpu(new_gc,		new_r,
-+					     c->usage_gc,	&c->replicas);
-+
-+	swap(c->usage_base,	new_base);
-+	swap(c->usage[0],	new_usage[0]);
-+	swap(c->usage[1],	new_usage[1]);
-+	swap(c->usage_scratch,	new_scratch);
-+	swap(c->usage_gc,	new_gc);
-+	swap(c->replicas,	*new_r);
-+	ret = 0;
-+err:
-+	free_percpu(new_gc);
-+	kfree(new_scratch);
-+	free_percpu(new_usage[1]);
-+	free_percpu(new_usage[0]);
-+	kfree(new_base);
-+	return ret;
-+}
-+
-+static unsigned reserve_journal_replicas(struct bch_fs *c,
-+				     struct bch_replicas_cpu *r)
-+{
-+	struct bch_replicas_entry *e;
-+	unsigned journal_res_u64s = 0;
-+
-+	/* nr_inodes: */
-+	journal_res_u64s +=
-+		DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64));
-+
-+	/* key_version: */
-+	journal_res_u64s +=
-+		DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64));
-+
-+	/* persistent_reserved: */
-+	journal_res_u64s +=
-+		DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64)) *
-+		BCH_REPLICAS_MAX;
-+
-+	for_each_cpu_replicas_entry(r, e)
-+		journal_res_u64s +=
-+			DIV_ROUND_UP(sizeof(struct jset_entry_data_usage) +
-+				     e->nr_devs, sizeof(u64));
-+	return journal_res_u64s;
-+}
-+
-+noinline
-+static int bch2_mark_replicas_slowpath(struct bch_fs *c,
-+				struct bch_replicas_entry *new_entry)
-+{
-+	struct bch_replicas_cpu new_r, new_gc;
-+	int ret = 0;
-+
-+	verify_replicas_entry(new_entry);
-+
-+	memset(&new_r, 0, sizeof(new_r));
-+	memset(&new_gc, 0, sizeof(new_gc));
-+
-+	mutex_lock(&c->sb_lock);
-+
-+	if (c->replicas_gc.entries &&
-+	    !__replicas_has_entry(&c->replicas_gc, new_entry)) {
-+		new_gc = cpu_replicas_add_entry(&c->replicas_gc, new_entry);
-+		if (!new_gc.entries)
-+			goto err;
-+	}
-+
-+	if (!__replicas_has_entry(&c->replicas, new_entry)) {
-+		new_r = cpu_replicas_add_entry(&c->replicas, new_entry);
-+		if (!new_r.entries)
-+			goto err;
-+
-+		ret = bch2_cpu_replicas_to_sb_replicas(c, &new_r);
-+		if (ret)
-+			goto err;
-+
-+		bch2_journal_entry_res_resize(&c->journal,
-+				&c->replicas_journal_res,
-+				reserve_journal_replicas(c, &new_r));
-+	}
-+
-+	if (!new_r.entries &&
-+	    !new_gc.entries)
-+		goto out;
-+
-+	/* allocations done, now commit: */
-+
-+	if (new_r.entries)
-+		bch2_write_super(c);
-+
-+	/* don't update in memory replicas until changes are persistent */
-+	percpu_down_write(&c->mark_lock);
-+	if (new_r.entries)
-+		ret = replicas_table_update(c, &new_r);
-+	if (new_gc.entries)
-+		swap(new_gc, c->replicas_gc);
-+	percpu_up_write(&c->mark_lock);
-+out:
-+	mutex_unlock(&c->sb_lock);
-+
-+	kfree(new_r.entries);
-+	kfree(new_gc.entries);
-+
-+	return ret;
-+err:
-+	bch_err(c, "error adding replicas entry: memory allocation failure");
-+	ret = -ENOMEM;
-+	goto out;
-+}
-+
-+static int __bch2_mark_replicas(struct bch_fs *c,
-+				struct bch_replicas_entry *r,
-+				bool check)
-+{
-+	return likely(bch2_replicas_marked(c, r))	? 0
-+		: check					? -1
-+		: bch2_mark_replicas_slowpath(c, r);
-+}
-+
-+int bch2_mark_replicas(struct bch_fs *c, struct bch_replicas_entry *r)
-+{
-+	return __bch2_mark_replicas(c, r, false);
-+}
-+
-+static int __bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k,
-+				     bool check)
-+{
-+	struct bch_replicas_padded search;
-+	struct bch_devs_list cached = bch2_bkey_cached_devs(k);
-+	unsigned i;
-+	int ret;
-+
-+	for (i = 0; i < cached.nr; i++) {
-+		bch2_replicas_entry_cached(&search.e, cached.devs[i]);
-+
-+		ret = __bch2_mark_replicas(c, &search.e, check);
-+		if (ret)
-+			return ret;
-+	}
-+
-+	bch2_bkey_to_replicas(&search.e, k);
-+
-+	return __bch2_mark_replicas(c, &search.e, check);
-+}
-+
-+bool bch2_bkey_replicas_marked(struct bch_fs *c,
-+			       struct bkey_s_c k)
-+{
-+	return __bch2_mark_bkey_replicas(c, k, true) == 0;
-+}
-+
-+int bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k)
-+{
-+	return __bch2_mark_bkey_replicas(c, k, false);
-+}
-+
-+int bch2_replicas_gc_end(struct bch_fs *c, int ret)
-+{
-+	unsigned i;
-+
-+	lockdep_assert_held(&c->replicas_gc_lock);
-+
-+	mutex_lock(&c->sb_lock);
-+	percpu_down_write(&c->mark_lock);
-+
-+	/*
-+	 * this is kind of crappy; the replicas gc mechanism needs to be ripped
-+	 * out
-+	 */
-+
-+	for (i = 0; i < c->replicas.nr; i++) {
-+		struct bch_replicas_entry *e =
-+			cpu_replicas_entry(&c->replicas, i);
-+		struct bch_replicas_cpu n;
-+
-+		if (!__replicas_has_entry(&c->replicas_gc, e) &&
-+		    (c->usage_base->replicas[i] ||
-+		     percpu_u64_get(&c->usage[0]->replicas[i]) ||
-+		     percpu_u64_get(&c->usage[1]->replicas[i]))) {
-+			n = cpu_replicas_add_entry(&c->replicas_gc, e);
-+			if (!n.entries) {
-+				ret = -ENOSPC;
-+				goto err;
-+			}
-+
-+			swap(n, c->replicas_gc);
-+			kfree(n.entries);
-+		}
-+	}
-+
-+	if (bch2_cpu_replicas_to_sb_replicas(c, &c->replicas_gc)) {
-+		ret = -ENOSPC;
-+		goto err;
-+	}
-+
-+	ret = replicas_table_update(c, &c->replicas_gc);
-+err:
-+	kfree(c->replicas_gc.entries);
-+	c->replicas_gc.entries = NULL;
-+
-+	percpu_up_write(&c->mark_lock);
-+
-+	if (!ret)
-+		bch2_write_super(c);
-+
-+	mutex_unlock(&c->sb_lock);
-+
-+	return ret;
-+}
-+
-+int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask)
-+{
-+	struct bch_replicas_entry *e;
-+	unsigned i = 0;
-+
-+	lockdep_assert_held(&c->replicas_gc_lock);
-+
-+	mutex_lock(&c->sb_lock);
-+	BUG_ON(c->replicas_gc.entries);
-+
-+	c->replicas_gc.nr		= 0;
-+	c->replicas_gc.entry_size	= 0;
-+
-+	for_each_cpu_replicas_entry(&c->replicas, e)
-+		if (!((1 << e->data_type) & typemask)) {
-+			c->replicas_gc.nr++;
-+			c->replicas_gc.entry_size =
-+				max_t(unsigned, c->replicas_gc.entry_size,
-+				      replicas_entry_bytes(e));
-+		}
-+
-+	c->replicas_gc.entries = kcalloc(c->replicas_gc.nr,
-+					 c->replicas_gc.entry_size,
-+					 GFP_NOIO);
-+	if (!c->replicas_gc.entries) {
-+		mutex_unlock(&c->sb_lock);
-+		bch_err(c, "error allocating c->replicas_gc");
-+		return -ENOMEM;
-+	}
-+
-+	for_each_cpu_replicas_entry(&c->replicas, e)
-+		if (!((1 << e->data_type) & typemask))
-+			memcpy(cpu_replicas_entry(&c->replicas_gc, i++),
-+			       e, c->replicas_gc.entry_size);
-+
-+	bch2_cpu_replicas_sort(&c->replicas_gc);
-+	mutex_unlock(&c->sb_lock);
-+
-+	return 0;
-+}
-+
-+int bch2_replicas_gc2(struct bch_fs *c)
-+{
-+	struct bch_replicas_cpu new = { 0 };
-+	unsigned i, nr;
-+	int ret = 0;
-+
-+	bch2_journal_meta(&c->journal);
-+retry:
-+	nr		= READ_ONCE(c->replicas.nr);
-+	new.entry_size	= READ_ONCE(c->replicas.entry_size);
-+	new.entries	= kcalloc(nr, new.entry_size, GFP_KERNEL);
-+	if (!new.entries) {
-+		bch_err(c, "error allocating c->replicas_gc");
-+		return -ENOMEM;
-+	}
-+
-+	mutex_lock(&c->sb_lock);
-+	percpu_down_write(&c->mark_lock);
-+
-+	if (nr			!= c->replicas.nr ||
-+	    new.entry_size	!= c->replicas.entry_size) {
-+		percpu_up_write(&c->mark_lock);
-+		mutex_unlock(&c->sb_lock);
-+		kfree(new.entries);
-+		goto retry;
-+	}
-+
-+	for (i = 0; i < c->replicas.nr; i++) {
-+		struct bch_replicas_entry *e =
-+			cpu_replicas_entry(&c->replicas, i);
-+
-+		if (e->data_type == BCH_DATA_journal ||
-+		    c->usage_base->replicas[i] ||
-+		    percpu_u64_get(&c->usage[0]->replicas[i]) ||
-+		    percpu_u64_get(&c->usage[1]->replicas[i]))
-+			memcpy(cpu_replicas_entry(&new, new.nr++),
-+			       e, new.entry_size);
-+	}
-+
-+	bch2_cpu_replicas_sort(&new);
-+
-+	if (bch2_cpu_replicas_to_sb_replicas(c, &new)) {
-+		ret = -ENOSPC;
-+		goto err;
-+	}
-+
-+	ret = replicas_table_update(c, &new);
-+err:
-+	kfree(new.entries);
-+
-+	percpu_up_write(&c->mark_lock);
-+
-+	if (!ret)
-+		bch2_write_super(c);
-+
-+	mutex_unlock(&c->sb_lock);
-+
-+	return ret;
-+}
-+
-+int bch2_replicas_set_usage(struct bch_fs *c,
-+			    struct bch_replicas_entry *r,
-+			    u64 sectors)
-+{
-+	int ret, idx = bch2_replicas_entry_idx(c, r);
-+
-+	if (idx < 0) {
-+		struct bch_replicas_cpu n;
-+
-+		n = cpu_replicas_add_entry(&c->replicas, r);
-+		if (!n.entries)
-+			return -ENOMEM;
-+
-+		ret = replicas_table_update(c, &n);
-+		if (ret)
-+			return ret;
-+
-+		kfree(n.entries);
-+
-+		idx = bch2_replicas_entry_idx(c, r);
-+		BUG_ON(ret < 0);
-+	}
-+
-+	c->usage_base->replicas[idx] = sectors;
-+
-+	return 0;
-+}
-+
-+/* Replicas tracking - superblock: */
-+
-+static int
-+__bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r,
-+				   struct bch_replicas_cpu *cpu_r)
-+{
-+	struct bch_replicas_entry *e, *dst;
-+	unsigned nr = 0, entry_size = 0, idx = 0;
-+
-+	for_each_replicas_entry(sb_r, e) {
-+		entry_size = max_t(unsigned, entry_size,
-+				   replicas_entry_bytes(e));
-+		nr++;
-+	}
-+
-+	cpu_r->entries = kcalloc(nr, entry_size, GFP_NOIO);
-+	if (!cpu_r->entries)
-+		return -ENOMEM;
-+
-+	cpu_r->nr		= nr;
-+	cpu_r->entry_size	= entry_size;
-+
-+	for_each_replicas_entry(sb_r, e) {
-+		dst = cpu_replicas_entry(cpu_r, idx++);
-+		memcpy(dst, e, replicas_entry_bytes(e));
-+		replicas_entry_sort(dst);
-+	}
-+
-+	return 0;
-+}
-+
-+static int
-+__bch2_sb_replicas_v0_to_cpu_replicas(struct bch_sb_field_replicas_v0 *sb_r,
-+				      struct bch_replicas_cpu *cpu_r)
-+{
-+	struct bch_replicas_entry_v0 *e;
-+	unsigned nr = 0, entry_size = 0, idx = 0;
-+
-+	for_each_replicas_entry(sb_r, e) {
-+		entry_size = max_t(unsigned, entry_size,
-+				   replicas_entry_bytes(e));
-+		nr++;
-+	}
-+
-+	entry_size += sizeof(struct bch_replicas_entry) -
-+		sizeof(struct bch_replicas_entry_v0);
-+
-+	cpu_r->entries = kcalloc(nr, entry_size, GFP_NOIO);
-+	if (!cpu_r->entries)
-+		return -ENOMEM;
-+
-+	cpu_r->nr		= nr;
-+	cpu_r->entry_size	= entry_size;
-+
-+	for_each_replicas_entry(sb_r, e) {
-+		struct bch_replicas_entry *dst =
-+			cpu_replicas_entry(cpu_r, idx++);
-+
-+		dst->data_type	= e->data_type;
-+		dst->nr_devs	= e->nr_devs;
-+		dst->nr_required = 1;
-+		memcpy(dst->devs, e->devs, e->nr_devs);
-+		replicas_entry_sort(dst);
-+	}
-+
-+	return 0;
-+}
-+
-+int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c)
-+{
-+	struct bch_sb_field_replicas *sb_v1;
-+	struct bch_sb_field_replicas_v0 *sb_v0;
-+	struct bch_replicas_cpu new_r = { 0, 0, NULL };
-+	int ret = 0;
-+
-+	if ((sb_v1 = bch2_sb_get_replicas(c->disk_sb.sb)))
-+		ret = __bch2_sb_replicas_to_cpu_replicas(sb_v1, &new_r);
-+	else if ((sb_v0 = bch2_sb_get_replicas_v0(c->disk_sb.sb)))
-+		ret = __bch2_sb_replicas_v0_to_cpu_replicas(sb_v0, &new_r);
-+
-+	if (ret)
-+		return -ENOMEM;
-+
-+	bch2_cpu_replicas_sort(&new_r);
-+
-+	percpu_down_write(&c->mark_lock);
-+
-+	ret = replicas_table_update(c, &new_r);
-+	percpu_up_write(&c->mark_lock);
-+
-+	kfree(new_r.entries);
-+
-+	return 0;
-+}
-+
-+static int bch2_cpu_replicas_to_sb_replicas_v0(struct bch_fs *c,
-+					       struct bch_replicas_cpu *r)
-+{
-+	struct bch_sb_field_replicas_v0 *sb_r;
-+	struct bch_replicas_entry_v0 *dst;
-+	struct bch_replicas_entry *src;
-+	size_t bytes;
-+
-+	bytes = sizeof(struct bch_sb_field_replicas);
-+
-+	for_each_cpu_replicas_entry(r, src)
-+		bytes += replicas_entry_bytes(src) - 1;
-+
-+	sb_r = bch2_sb_resize_replicas_v0(&c->disk_sb,
-+			DIV_ROUND_UP(bytes, sizeof(u64)));
-+	if (!sb_r)
-+		return -ENOSPC;
-+
-+	bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas);
-+	sb_r = bch2_sb_get_replicas_v0(c->disk_sb.sb);
-+
-+	memset(&sb_r->entries, 0,
-+	       vstruct_end(&sb_r->field) -
-+	       (void *) &sb_r->entries);
-+
-+	dst = sb_r->entries;
-+	for_each_cpu_replicas_entry(r, src) {
-+		dst->data_type	= src->data_type;
-+		dst->nr_devs	= src->nr_devs;
-+		memcpy(dst->devs, src->devs, src->nr_devs);
-+
-+		dst = replicas_entry_next(dst);
-+
-+		BUG_ON((void *) dst > vstruct_end(&sb_r->field));
-+	}
-+
-+	return 0;
-+}
-+
-+static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c,
-+					    struct bch_replicas_cpu *r)
-+{
-+	struct bch_sb_field_replicas *sb_r;
-+	struct bch_replicas_entry *dst, *src;
-+	bool need_v1 = false;
-+	size_t bytes;
-+
-+	bytes = sizeof(struct bch_sb_field_replicas);
-+
-+	for_each_cpu_replicas_entry(r, src) {
-+		bytes += replicas_entry_bytes(src);
-+		if (src->nr_required != 1)
-+			need_v1 = true;
-+	}
-+
-+	if (!need_v1)
-+		return bch2_cpu_replicas_to_sb_replicas_v0(c, r);
-+
-+	sb_r = bch2_sb_resize_replicas(&c->disk_sb,
-+			DIV_ROUND_UP(bytes, sizeof(u64)));
-+	if (!sb_r)
-+		return -ENOSPC;
-+
-+	bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas_v0);
-+	sb_r = bch2_sb_get_replicas(c->disk_sb.sb);
-+
-+	memset(&sb_r->entries, 0,
-+	       vstruct_end(&sb_r->field) -
-+	       (void *) &sb_r->entries);
-+
-+	dst = sb_r->entries;
-+	for_each_cpu_replicas_entry(r, src) {
-+		memcpy(dst, src, replicas_entry_bytes(src));
-+
-+		dst = replicas_entry_next(dst);
-+
-+		BUG_ON((void *) dst > vstruct_end(&sb_r->field));
-+	}
-+
-+	return 0;
-+}
-+
-+static const char *check_dup_replicas_entries(struct bch_replicas_cpu *cpu_r)
-+{
-+	unsigned i;
-+
-+	sort_cmp_size(cpu_r->entries,
-+		      cpu_r->nr,
-+		      cpu_r->entry_size,
-+		      memcmp, NULL);
-+
-+	for (i = 0; i + 1 < cpu_r->nr; i++) {
-+		struct bch_replicas_entry *l =
-+			cpu_replicas_entry(cpu_r, i);
-+		struct bch_replicas_entry *r =
-+			cpu_replicas_entry(cpu_r, i + 1);
-+
-+		BUG_ON(memcmp(l, r, cpu_r->entry_size) > 0);
-+
-+		if (!memcmp(l, r, cpu_r->entry_size))
-+			return "duplicate replicas entry";
-+	}
-+
-+	return NULL;
-+}
-+
-+static const char *bch2_sb_validate_replicas(struct bch_sb *sb, struct bch_sb_field *f)
-+{
-+	struct bch_sb_field_replicas *sb_r = field_to_type(f, replicas);
-+	struct bch_sb_field_members *mi = bch2_sb_get_members(sb);
-+	struct bch_replicas_cpu cpu_r = { .entries = NULL };
-+	struct bch_replicas_entry *e;
-+	const char *err;
-+	unsigned i;
-+
-+	for_each_replicas_entry(sb_r, e) {
-+		err = "invalid replicas entry: invalid data type";
-+		if (e->data_type >= BCH_DATA_NR)
-+			goto err;
-+
-+		err = "invalid replicas entry: no devices";
-+		if (!e->nr_devs)
-+			goto err;
-+
-+		err = "invalid replicas entry: bad nr_required";
-+		if (e->nr_required > 1 &&
-+		    e->nr_required >= e->nr_devs)
-+			goto err;
-+
-+		err = "invalid replicas entry: invalid device";
-+		for (i = 0; i < e->nr_devs; i++)
-+			if (!bch2_dev_exists(sb, mi, e->devs[i]))
-+				goto err;
-+	}
-+
-+	err = "cannot allocate memory";
-+	if (__bch2_sb_replicas_to_cpu_replicas(sb_r, &cpu_r))
-+		goto err;
-+
-+	err = check_dup_replicas_entries(&cpu_r);
-+err:
-+	kfree(cpu_r.entries);
-+	return err;
-+}
-+
-+static void bch2_sb_replicas_to_text(struct printbuf *out,
-+				     struct bch_sb *sb,
-+				     struct bch_sb_field *f)
-+{
-+	struct bch_sb_field_replicas *r = field_to_type(f, replicas);
-+	struct bch_replicas_entry *e;
-+	bool first = true;
-+
-+	for_each_replicas_entry(r, e) {
-+		if (!first)
-+			pr_buf(out, " ");
-+		first = false;
-+
-+		bch2_replicas_entry_to_text(out, e);
-+	}
-+}
-+
-+const struct bch_sb_field_ops bch_sb_field_ops_replicas = {
-+	.validate	= bch2_sb_validate_replicas,
-+	.to_text	= bch2_sb_replicas_to_text,
-+};
-+
-+static const char *bch2_sb_validate_replicas_v0(struct bch_sb *sb, struct bch_sb_field *f)
-+{
-+	struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0);
-+	struct bch_sb_field_members *mi = bch2_sb_get_members(sb);
-+	struct bch_replicas_cpu cpu_r = { .entries = NULL };
-+	struct bch_replicas_entry_v0 *e;
-+	const char *err;
-+	unsigned i;
-+
-+	for_each_replicas_entry_v0(sb_r, e) {
-+		err = "invalid replicas entry: invalid data type";
-+		if (e->data_type >= BCH_DATA_NR)
-+			goto err;
-+
-+		err = "invalid replicas entry: no devices";
-+		if (!e->nr_devs)
-+			goto err;
-+
-+		err = "invalid replicas entry: invalid device";
-+		for (i = 0; i < e->nr_devs; i++)
-+			if (!bch2_dev_exists(sb, mi, e->devs[i]))
-+				goto err;
-+	}
-+
-+	err = "cannot allocate memory";
-+	if (__bch2_sb_replicas_v0_to_cpu_replicas(sb_r, &cpu_r))
-+		goto err;
-+
-+	err = check_dup_replicas_entries(&cpu_r);
-+err:
-+	kfree(cpu_r.entries);
-+	return err;
-+}
-+
-+const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = {
-+	.validate	= bch2_sb_validate_replicas_v0,
-+};
-+
-+/* Query replicas: */
-+
-+struct replicas_status __bch2_replicas_status(struct bch_fs *c,
-+					      struct bch_devs_mask online_devs)
-+{
-+	struct bch_sb_field_members *mi;
-+	struct bch_replicas_entry *e;
-+	unsigned i, nr_online, nr_offline;
-+	struct replicas_status ret;
-+
-+	memset(&ret, 0, sizeof(ret));
-+
-+	for (i = 0; i < ARRAY_SIZE(ret.replicas); i++)
-+		ret.replicas[i].redundancy = INT_MAX;
-+
-+	mi = bch2_sb_get_members(c->disk_sb.sb);
-+
-+	percpu_down_read(&c->mark_lock);
-+
-+	for_each_cpu_replicas_entry(&c->replicas, e) {
-+		if (e->data_type >= ARRAY_SIZE(ret.replicas))
-+			panic("e %p data_type %u\n", e, e->data_type);
-+
-+		nr_online = nr_offline = 0;
-+
-+		for (i = 0; i < e->nr_devs; i++) {
-+			BUG_ON(!bch2_dev_exists(c->disk_sb.sb, mi,
-+						e->devs[i]));
-+
-+			if (test_bit(e->devs[i], online_devs.d))
-+				nr_online++;
-+			else
-+				nr_offline++;
-+		}
-+
-+		ret.replicas[e->data_type].redundancy =
-+			min(ret.replicas[e->data_type].redundancy,
-+			    (int) nr_online - (int) e->nr_required);
-+
-+		ret.replicas[e->data_type].nr_offline =
-+			max(ret.replicas[e->data_type].nr_offline,
-+			    nr_offline);
-+	}
-+
-+	percpu_up_read(&c->mark_lock);
-+
-+	for (i = 0; i < ARRAY_SIZE(ret.replicas); i++)
-+		if (ret.replicas[i].redundancy == INT_MAX)
-+			ret.replicas[i].redundancy = 0;
-+
-+	return ret;
-+}
-+
-+struct replicas_status bch2_replicas_status(struct bch_fs *c)
-+{
-+	return __bch2_replicas_status(c, bch2_online_devs(c));
-+}
-+
-+static bool have_enough_devs(struct replicas_status s,
-+			     enum bch_data_type type,
-+			     bool force_if_degraded,
-+			     bool force_if_lost)
-+{
-+	return (!s.replicas[type].nr_offline || force_if_degraded) &&
-+		(s.replicas[type].redundancy >= 0 || force_if_lost);
-+}
-+
-+bool bch2_have_enough_devs(struct replicas_status s, unsigned flags)
-+{
-+	return (have_enough_devs(s, BCH_DATA_journal,
-+				 flags & BCH_FORCE_IF_METADATA_DEGRADED,
-+				 flags & BCH_FORCE_IF_METADATA_LOST) &&
-+		have_enough_devs(s, BCH_DATA_btree,
-+				 flags & BCH_FORCE_IF_METADATA_DEGRADED,
-+				 flags & BCH_FORCE_IF_METADATA_LOST) &&
-+		have_enough_devs(s, BCH_DATA_user,
-+				 flags & BCH_FORCE_IF_DATA_DEGRADED,
-+				 flags & BCH_FORCE_IF_DATA_LOST));
-+}
-+
-+int bch2_replicas_online(struct bch_fs *c, bool meta)
-+{
-+	struct replicas_status s = bch2_replicas_status(c);
-+
-+	return (meta
-+		? min(s.replicas[BCH_DATA_journal].redundancy,
-+		      s.replicas[BCH_DATA_btree].redundancy)
-+		: s.replicas[BCH_DATA_user].redundancy) + 1;
-+}
-+
-+unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	struct bch_replicas_entry *e;
-+	unsigned i, ret = 0;
-+
-+	percpu_down_read(&c->mark_lock);
-+
-+	for_each_cpu_replicas_entry(&c->replicas, e)
-+		for (i = 0; i < e->nr_devs; i++)
-+			if (e->devs[i] == ca->dev_idx)
-+				ret |= 1 << e->data_type;
-+
-+	percpu_up_read(&c->mark_lock);
-+
-+	return ret;
-+}
-+
-+int bch2_fs_replicas_init(struct bch_fs *c)
-+{
-+	c->journal.entry_u64s_reserved +=
-+		reserve_journal_replicas(c, &c->replicas);
-+
-+	return replicas_table_update(c, &c->replicas);
-+}
-diff --git a/fs/bcachefs/replicas.h b/fs/bcachefs/replicas.h
-new file mode 100644
-index 000000000000..8b95164fbb56
---- /dev/null
-+++ b/fs/bcachefs/replicas.h
-@@ -0,0 +1,91 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_REPLICAS_H
-+#define _BCACHEFS_REPLICAS_H
-+
-+#include "eytzinger.h"
-+#include "replicas_types.h"
-+
-+void bch2_replicas_entry_to_text(struct printbuf *,
-+				 struct bch_replicas_entry *);
-+void bch2_cpu_replicas_to_text(struct printbuf *, struct bch_replicas_cpu *);
-+
-+static inline struct bch_replicas_entry *
-+cpu_replicas_entry(struct bch_replicas_cpu *r, unsigned i)
-+{
-+	return (void *) r->entries + r->entry_size * i;
-+}
-+
-+int bch2_replicas_entry_idx(struct bch_fs *,
-+			    struct bch_replicas_entry *);
-+
-+void bch2_devlist_to_replicas(struct bch_replicas_entry *,
-+			      enum bch_data_type,
-+			      struct bch_devs_list);
-+bool bch2_replicas_marked(struct bch_fs *, struct bch_replicas_entry *);
-+int bch2_mark_replicas(struct bch_fs *,
-+		       struct bch_replicas_entry *);
-+
-+void bch2_bkey_to_replicas(struct bch_replicas_entry *, struct bkey_s_c);
-+bool bch2_bkey_replicas_marked(struct bch_fs *, struct bkey_s_c);
-+int bch2_mark_bkey_replicas(struct bch_fs *, struct bkey_s_c);
-+
-+static inline void bch2_replicas_entry_cached(struct bch_replicas_entry *e,
-+					      unsigned dev)
-+{
-+	e->data_type	= BCH_DATA_cached;
-+	e->nr_devs	= 1;
-+	e->nr_required	= 1;
-+	e->devs[0]	= dev;
-+}
-+
-+struct replicas_status {
-+	struct {
-+		int		redundancy;
-+		unsigned	nr_offline;
-+	}			replicas[BCH_DATA_NR];
-+};
-+
-+struct replicas_status __bch2_replicas_status(struct bch_fs *,
-+					      struct bch_devs_mask);
-+struct replicas_status bch2_replicas_status(struct bch_fs *);
-+bool bch2_have_enough_devs(struct replicas_status, unsigned);
-+
-+int bch2_replicas_online(struct bch_fs *, bool);
-+unsigned bch2_dev_has_data(struct bch_fs *, struct bch_dev *);
-+
-+int bch2_replicas_gc_end(struct bch_fs *, int);
-+int bch2_replicas_gc_start(struct bch_fs *, unsigned);
-+int bch2_replicas_gc2(struct bch_fs *);
-+
-+int bch2_replicas_set_usage(struct bch_fs *,
-+			    struct bch_replicas_entry *,
-+			    u64);
-+
-+#define for_each_cpu_replicas_entry(_r, _i)				\
-+	for (_i = (_r)->entries;					\
-+	     (void *) (_i) < (void *) (_r)->entries + (_r)->nr * (_r)->entry_size;\
-+	     _i = (void *) (_i) + (_r)->entry_size)
-+
-+/* iterate over superblock replicas - used by userspace tools: */
-+
-+#define replicas_entry_next(_i)						\
-+	((typeof(_i)) ((void *) (_i) + replicas_entry_bytes(_i)))
-+
-+#define for_each_replicas_entry(_r, _i)					\
-+	for (_i = (_r)->entries;					\
-+	     (void *) (_i) < vstruct_end(&(_r)->field) && (_i)->data_type;\
-+	     (_i) = replicas_entry_next(_i))
-+
-+#define for_each_replicas_entry_v0(_r, _i)				\
-+	for (_i = (_r)->entries;					\
-+	     (void *) (_i) < vstruct_end(&(_r)->field) && (_i)->data_type;\
-+	     (_i) = replicas_entry_next(_i))
-+
-+int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *);
-+
-+extern const struct bch_sb_field_ops bch_sb_field_ops_replicas;
-+extern const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0;
-+
-+int bch2_fs_replicas_init(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_REPLICAS_H */
-diff --git a/fs/bcachefs/replicas_types.h b/fs/bcachefs/replicas_types.h
-new file mode 100644
-index 000000000000..0535b1d3760e
---- /dev/null
-+++ b/fs/bcachefs/replicas_types.h
-@@ -0,0 +1,10 @@
-+#ifndef _BCACHEFS_REPLICAS_TYPES_H
-+#define _BCACHEFS_REPLICAS_TYPES_H
-+
-+struct bch_replicas_cpu {
-+	unsigned		nr;
-+	unsigned		entry_size;
-+	struct bch_replicas_entry *entries;
-+};
-+
-+#endif /* _BCACHEFS_REPLICAS_TYPES_H */
-diff --git a/fs/bcachefs/siphash.c b/fs/bcachefs/siphash.c
-new file mode 100644
-index 000000000000..c062edb3fbc2
---- /dev/null
-+++ b/fs/bcachefs/siphash.c
-@@ -0,0 +1,173 @@
-+// SPDX-License-Identifier: BSD-3-Clause
-+/*	$OpenBSD: siphash.c,v 1.3 2015/02/20 11:51:03 tedu Exp $ */
-+
-+/*-
-+ * Copyright (c) 2013 Andre Oppermann <andre@FreeBSD.org>
-+ * All rights reserved.
-+ *
-+ * Redistribution and use in source and binary forms, with or without
-+ * modification, are permitted provided that the following conditions
-+ * are met:
-+ * 1. Redistributions of source code must retain the above copyright
-+ *    notice, this list of conditions and the following disclaimer.
-+ * 2. Redistributions in binary form must reproduce the above copyright
-+ *    notice, this list of conditions and the following disclaimer in the
-+ *    documentation and/or other materials provided with the distribution.
-+ * 3. The name of the author may not be used to endorse or promote
-+ *    products derived from this software without specific prior written
-+ *    permission.
-+ *
-+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
-+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-+ * SUCH DAMAGE.
-+ */
-+
-+/*
-+ * SipHash is a family of PRFs SipHash-c-d where the integer parameters c and d
-+ * are the number of compression rounds and the number of finalization rounds.
-+ * A compression round is identical to a finalization round and this round
-+ * function is called SipRound.  Given a 128-bit key k and a (possibly empty)
-+ * byte string m, SipHash-c-d returns a 64-bit value SipHash-c-d(k; m).
-+ *
-+ * Implemented from the paper "SipHash: a fast short-input PRF", 2012.09.18,
-+ * by Jean-Philippe Aumasson and Daniel J. Bernstein,
-+ * Permanent Document ID b9a943a805fbfc6fde808af9fc0ecdfa
-+ * https://131002.net/siphash/siphash.pdf
-+ * https://131002.net/siphash/
-+ */
-+
-+#include <asm/byteorder.h>
-+#include <asm/unaligned.h>
-+#include <linux/bitops.h>
-+#include <linux/string.h>
-+
-+#include "siphash.h"
-+
-+static void SipHash_Rounds(SIPHASH_CTX *ctx, int rounds)
-+{
-+	while (rounds--) {
-+		ctx->v[0] += ctx->v[1];
-+		ctx->v[2] += ctx->v[3];
-+		ctx->v[1] = rol64(ctx->v[1], 13);
-+		ctx->v[3] = rol64(ctx->v[3], 16);
-+
-+		ctx->v[1] ^= ctx->v[0];
-+		ctx->v[3] ^= ctx->v[2];
-+		ctx->v[0] = rol64(ctx->v[0], 32);
-+
-+		ctx->v[2] += ctx->v[1];
-+		ctx->v[0] += ctx->v[3];
-+		ctx->v[1] = rol64(ctx->v[1], 17);
-+		ctx->v[3] = rol64(ctx->v[3], 21);
-+
-+		ctx->v[1] ^= ctx->v[2];
-+		ctx->v[3] ^= ctx->v[0];
-+		ctx->v[2] = rol64(ctx->v[2], 32);
-+	}
-+}
-+
-+static void SipHash_CRounds(SIPHASH_CTX *ctx, const void *ptr, int rounds)
-+{
-+	u64 m = get_unaligned_le64(ptr);
-+
-+	ctx->v[3] ^= m;
-+	SipHash_Rounds(ctx, rounds);
-+	ctx->v[0] ^= m;
-+}
-+
-+void SipHash_Init(SIPHASH_CTX *ctx, const SIPHASH_KEY *key)
-+{
-+	u64 k0, k1;
-+
-+	k0 = le64_to_cpu(key->k0);
-+	k1 = le64_to_cpu(key->k1);
-+
-+	ctx->v[0] = 0x736f6d6570736575ULL ^ k0;
-+	ctx->v[1] = 0x646f72616e646f6dULL ^ k1;
-+	ctx->v[2] = 0x6c7967656e657261ULL ^ k0;
-+	ctx->v[3] = 0x7465646279746573ULL ^ k1;
-+
-+	memset(ctx->buf, 0, sizeof(ctx->buf));
-+	ctx->bytes = 0;
-+}
-+
-+void SipHash_Update(SIPHASH_CTX *ctx, int rc, int rf,
-+		    const void *src, size_t len)
-+{
-+	const u8 *ptr = src;
-+	size_t left, used;
-+
-+	if (len == 0)
-+		return;
-+
-+	used = ctx->bytes % sizeof(ctx->buf);
-+	ctx->bytes += len;
-+
-+	if (used > 0) {
-+		left = sizeof(ctx->buf) - used;
-+
-+		if (len >= left) {
-+			memcpy(&ctx->buf[used], ptr, left);
-+			SipHash_CRounds(ctx, ctx->buf, rc);
-+			len -= left;
-+			ptr += left;
-+		} else {
-+			memcpy(&ctx->buf[used], ptr, len);
-+			return;
-+		}
-+	}
-+
-+	while (len >= sizeof(ctx->buf)) {
-+		SipHash_CRounds(ctx, ptr, rc);
-+		len -= sizeof(ctx->buf);
-+		ptr += sizeof(ctx->buf);
-+	}
-+
-+	if (len > 0)
-+		memcpy(&ctx->buf[used], ptr, len);
-+}
-+
-+void SipHash_Final(void *dst, SIPHASH_CTX *ctx, int rc, int rf)
-+{
-+	u64 r;
-+
-+	r = SipHash_End(ctx, rc, rf);
-+
-+	*((__le64 *) dst) = cpu_to_le64(r);
-+}
-+
-+u64 SipHash_End(SIPHASH_CTX *ctx, int rc, int rf)
-+{
-+	u64 r;
-+	size_t left, used;
-+
-+	used = ctx->bytes % sizeof(ctx->buf);
-+	left = sizeof(ctx->buf) - used;
-+	memset(&ctx->buf[used], 0, left - 1);
-+	ctx->buf[7] = ctx->bytes;
-+
-+	SipHash_CRounds(ctx, ctx->buf, rc);
-+	ctx->v[2] ^= 0xff;
-+	SipHash_Rounds(ctx, rf);
-+
-+	r = (ctx->v[0] ^ ctx->v[1]) ^ (ctx->v[2] ^ ctx->v[3]);
-+	memset(ctx, 0, sizeof(*ctx));
-+	return (r);
-+}
-+
-+u64 SipHash(const SIPHASH_KEY *key, int rc, int rf, const void *src, size_t len)
-+{
-+	SIPHASH_CTX ctx;
-+
-+	SipHash_Init(&ctx, key);
-+	SipHash_Update(&ctx, rc, rf, src, len);
-+	return SipHash_End(&ctx, rc, rf);
-+}
-diff --git a/fs/bcachefs/siphash.h b/fs/bcachefs/siphash.h
-new file mode 100644
-index 000000000000..3dfaf34a43b2
---- /dev/null
-+++ b/fs/bcachefs/siphash.h
-@@ -0,0 +1,87 @@
-+/* SPDX-License-Identifier: BSD-3-Clause */
-+/* $OpenBSD: siphash.h,v 1.5 2015/02/20 11:51:03 tedu Exp $ */
-+/*-
-+ * Copyright (c) 2013 Andre Oppermann <andre@FreeBSD.org>
-+ * All rights reserved.
-+ *
-+ * Redistribution and use in source and binary forms, with or without
-+ * modification, are permitted provided that the following conditions
-+ * are met:
-+ * 1. Redistributions of source code must retain the above copyright
-+ *    notice, this list of conditions and the following disclaimer.
-+ * 2. Redistributions in binary form must reproduce the above copyright
-+ *    notice, this list of conditions and the following disclaimer in the
-+ *    documentation and/or other materials provided with the distribution.
-+ * 3. The name of the author may not be used to endorse or promote
-+ *    products derived from this software without specific prior written
-+ *    permission.
-+ *
-+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
-+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-+ * SUCH DAMAGE.
-+ *
-+ * $FreeBSD$
-+ */
-+
-+/*
-+ * SipHash is a family of pseudorandom functions (a.k.a. keyed hash functions)
-+ * optimized for speed on short messages returning a 64bit hash/digest value.
-+ *
-+ * The number of rounds is defined during the initialization:
-+ *  SipHash24_Init() for the fast and resonable strong version
-+ *  SipHash48_Init() for the strong version (half as fast)
-+ *
-+ * struct SIPHASH_CTX ctx;
-+ * SipHash24_Init(&ctx);
-+ * SipHash_SetKey(&ctx, "16bytes long key");
-+ * SipHash_Update(&ctx, pointer_to_string, length_of_string);
-+ * SipHash_Final(output, &ctx);
-+ */
-+
-+#ifndef _SIPHASH_H_
-+#define _SIPHASH_H_
-+
-+#include <linux/types.h>
-+
-+#define SIPHASH_BLOCK_LENGTH	 8
-+#define SIPHASH_KEY_LENGTH	16
-+#define SIPHASH_DIGEST_LENGTH	 8
-+
-+typedef struct _SIPHASH_CTX {
-+	u64		v[4];
-+	u8		buf[SIPHASH_BLOCK_LENGTH];
-+	u32		bytes;
-+} SIPHASH_CTX;
-+
-+typedef struct {
-+	__le64		k0;
-+	__le64		k1;
-+} SIPHASH_KEY;
-+
-+void	SipHash_Init(SIPHASH_CTX *, const SIPHASH_KEY *);
-+void	SipHash_Update(SIPHASH_CTX *, int, int, const void *, size_t);
-+u64	SipHash_End(SIPHASH_CTX *, int, int);
-+void	SipHash_Final(void *, SIPHASH_CTX *, int, int);
-+u64	SipHash(const SIPHASH_KEY *, int, int, const void *, size_t);
-+
-+#define SipHash24_Init(_c, _k)		SipHash_Init((_c), (_k))
-+#define SipHash24_Update(_c, _p, _l)	SipHash_Update((_c), 2, 4, (_p), (_l))
-+#define SipHash24_End(_d)		SipHash_End((_d), 2, 4)
-+#define SipHash24_Final(_d, _c)		SipHash_Final((_d), (_c), 2, 4)
-+#define SipHash24(_k, _p, _l)		SipHash((_k), 2, 4, (_p), (_l))
-+
-+#define SipHash48_Init(_c, _k)		SipHash_Init((_c), (_k))
-+#define SipHash48_Update(_c, _p, _l)	SipHash_Update((_c), 4, 8, (_p), (_l))
-+#define SipHash48_End(_d)		SipHash_End((_d), 4, 8)
-+#define SipHash48_Final(_d, _c)		SipHash_Final((_d), (_c), 4, 8)
-+#define SipHash48(_k, _p, _l)		SipHash((_k), 4, 8, (_p), (_l))
-+
-+#endif /* _SIPHASH_H_ */
-diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h
-new file mode 100644
-index 000000000000..dea9b7252b88
---- /dev/null
-+++ b/fs/bcachefs/str_hash.h
-@@ -0,0 +1,336 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_STR_HASH_H
-+#define _BCACHEFS_STR_HASH_H
-+
-+#include "btree_iter.h"
-+#include "btree_update.h"
-+#include "checksum.h"
-+#include "error.h"
-+#include "inode.h"
-+#include "siphash.h"
-+#include "super.h"
-+
-+#include <linux/crc32c.h>
-+#include <crypto/hash.h>
-+#include <crypto/sha.h>
-+
-+static inline enum bch_str_hash_type
-+bch2_str_hash_opt_to_type(struct bch_fs *c, enum bch_str_hash_opts opt)
-+{
-+	switch (opt) {
-+	case BCH_STR_HASH_OPT_CRC32C:
-+		return BCH_STR_HASH_CRC32C;
-+	case BCH_STR_HASH_OPT_CRC64:
-+		return BCH_STR_HASH_CRC64;
-+	case BCH_STR_HASH_OPT_SIPHASH:
-+		return c->sb.features & (1ULL << BCH_FEATURE_new_siphash)
-+			? BCH_STR_HASH_SIPHASH
-+			: BCH_STR_HASH_SIPHASH_OLD;
-+	default:
-+	     BUG();
-+	}
-+}
-+
-+struct bch_hash_info {
-+	u8			type;
-+	union {
-+		__le64		crc_key;
-+		SIPHASH_KEY	siphash_key;
-+	};
-+};
-+
-+static inline struct bch_hash_info
-+bch2_hash_info_init(struct bch_fs *c, const struct bch_inode_unpacked *bi)
-+{
-+	/* XXX ick */
-+	struct bch_hash_info info = {
-+		.type = (bi->bi_flags >> INODE_STR_HASH_OFFSET) &
-+			~(~0U << INODE_STR_HASH_BITS),
-+		.crc_key = bi->bi_hash_seed,
-+	};
-+
-+	if (unlikely(info.type == BCH_STR_HASH_SIPHASH_OLD)) {
-+		SHASH_DESC_ON_STACK(desc, c->sha256);
-+		u8 digest[SHA256_DIGEST_SIZE];
-+
-+		desc->tfm = c->sha256;
-+
-+		crypto_shash_digest(desc, (void *) &bi->bi_hash_seed,
-+				    sizeof(bi->bi_hash_seed), digest);
-+		memcpy(&info.siphash_key, digest, sizeof(info.siphash_key));
-+	}
-+
-+	return info;
-+}
-+
-+struct bch_str_hash_ctx {
-+	union {
-+		u32		crc32c;
-+		u64		crc64;
-+		SIPHASH_CTX	siphash;
-+	};
-+};
-+
-+static inline void bch2_str_hash_init(struct bch_str_hash_ctx *ctx,
-+				     const struct bch_hash_info *info)
-+{
-+	switch (info->type) {
-+	case BCH_STR_HASH_CRC32C:
-+		ctx->crc32c = crc32c(~0, &info->crc_key, sizeof(info->crc_key));
-+		break;
-+	case BCH_STR_HASH_CRC64:
-+		ctx->crc64 = crc64_be(~0, &info->crc_key, sizeof(info->crc_key));
-+		break;
-+	case BCH_STR_HASH_SIPHASH_OLD:
-+	case BCH_STR_HASH_SIPHASH:
-+		SipHash24_Init(&ctx->siphash, &info->siphash_key);
-+		break;
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static inline void bch2_str_hash_update(struct bch_str_hash_ctx *ctx,
-+				       const struct bch_hash_info *info,
-+				       const void *data, size_t len)
-+{
-+	switch (info->type) {
-+	case BCH_STR_HASH_CRC32C:
-+		ctx->crc32c = crc32c(ctx->crc32c, data, len);
-+		break;
-+	case BCH_STR_HASH_CRC64:
-+		ctx->crc64 = crc64_be(ctx->crc64, data, len);
-+		break;
-+	case BCH_STR_HASH_SIPHASH_OLD:
-+	case BCH_STR_HASH_SIPHASH:
-+		SipHash24_Update(&ctx->siphash, data, len);
-+		break;
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static inline u64 bch2_str_hash_end(struct bch_str_hash_ctx *ctx,
-+				   const struct bch_hash_info *info)
-+{
-+	switch (info->type) {
-+	case BCH_STR_HASH_CRC32C:
-+		return ctx->crc32c;
-+	case BCH_STR_HASH_CRC64:
-+		return ctx->crc64 >> 1;
-+	case BCH_STR_HASH_SIPHASH_OLD:
-+	case BCH_STR_HASH_SIPHASH:
-+		return SipHash24_End(&ctx->siphash) >> 1;
-+	default:
-+		BUG();
-+	}
-+}
-+
-+struct bch_hash_desc {
-+	enum btree_id	btree_id;
-+	u8		key_type;
-+
-+	u64		(*hash_key)(const struct bch_hash_info *, const void *);
-+	u64		(*hash_bkey)(const struct bch_hash_info *, struct bkey_s_c);
-+	bool		(*cmp_key)(struct bkey_s_c, const void *);
-+	bool		(*cmp_bkey)(struct bkey_s_c, struct bkey_s_c);
-+};
-+
-+static __always_inline struct btree_iter *
-+bch2_hash_lookup(struct btree_trans *trans,
-+		 const struct bch_hash_desc desc,
-+		 const struct bch_hash_info *info,
-+		 u64 inode, const void *key,
-+		 unsigned flags)
-+{
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	for_each_btree_key(trans, iter, desc.btree_id,
-+			   POS(inode, desc.hash_key(info, key)),
-+			   BTREE_ITER_SLOTS|flags, k, ret) {
-+		if (iter->pos.inode != inode)
-+			break;
-+
-+		if (k.k->type == desc.key_type) {
-+			if (!desc.cmp_key(k, key))
-+				return iter;
-+		} else if (k.k->type == KEY_TYPE_whiteout) {
-+			;
-+		} else {
-+			/* hole, not found */
-+			break;
-+		}
-+	}
-+	bch2_trans_iter_put(trans, iter);
-+
-+	return ERR_PTR(ret ?: -ENOENT);
-+}
-+
-+static __always_inline struct btree_iter *
-+bch2_hash_hole(struct btree_trans *trans,
-+	       const struct bch_hash_desc desc,
-+	       const struct bch_hash_info *info,
-+	       u64 inode, const void *key)
-+{
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	for_each_btree_key(trans, iter, desc.btree_id,
-+			   POS(inode, desc.hash_key(info, key)),
-+			   BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
-+		if (iter->pos.inode != inode)
-+			break;
-+
-+		if (k.k->type != desc.key_type)
-+			return iter;
-+	}
-+
-+	iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
-+	bch2_trans_iter_put(trans, iter);
-+
-+	return ERR_PTR(ret ?: -ENOSPC);
-+}
-+
-+static __always_inline
-+int bch2_hash_needs_whiteout(struct btree_trans *trans,
-+			     const struct bch_hash_desc desc,
-+			     const struct bch_hash_info *info,
-+			     struct btree_iter *start)
-+{
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	iter = bch2_trans_copy_iter(trans, start);
-+	if (IS_ERR(iter))
-+		return PTR_ERR(iter);
-+
-+	bch2_btree_iter_next_slot(iter);
-+
-+	for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, k, ret) {
-+		if (k.k->type != desc.key_type &&
-+		    k.k->type != KEY_TYPE_whiteout)
-+			break;
-+
-+		if (k.k->type == desc.key_type &&
-+		    desc.hash_bkey(info, k) <= start->pos.offset) {
-+			iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
-+			ret = 1;
-+			break;
-+		}
-+	}
-+
-+	bch2_trans_iter_put(trans, iter);
-+	return ret;
-+}
-+
-+static __always_inline
-+int bch2_hash_set(struct btree_trans *trans,
-+		  const struct bch_hash_desc desc,
-+		  const struct bch_hash_info *info,
-+		  u64 inode, struct bkey_i *insert, int flags)
-+{
-+	struct btree_iter *iter, *slot = NULL;
-+	struct bkey_s_c k;
-+	bool found = false;
-+	int ret;
-+
-+	for_each_btree_key(trans, iter, desc.btree_id,
-+			   POS(inode, desc.hash_bkey(info, bkey_i_to_s_c(insert))),
-+			   BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
-+		if (iter->pos.inode != inode)
-+			break;
-+
-+		if (k.k->type == desc.key_type) {
-+			if (!desc.cmp_bkey(k, bkey_i_to_s_c(insert)))
-+				goto found;
-+
-+			/* hash collision: */
-+			continue;
-+		}
-+
-+		if (!slot &&
-+		    !(flags & BCH_HASH_SET_MUST_REPLACE)) {
-+			slot = bch2_trans_copy_iter(trans, iter);
-+			if (IS_ERR(slot))
-+				return PTR_ERR(slot);
-+		}
-+
-+		if (k.k->type != KEY_TYPE_whiteout)
-+			goto not_found;
-+	}
-+
-+	if (!ret)
-+		ret = -ENOSPC;
-+out:
-+	bch2_trans_iter_put(trans, slot);
-+	bch2_trans_iter_put(trans, iter);
-+
-+	return ret;
-+found:
-+	found = true;
-+not_found:
-+
-+	if (!found && (flags & BCH_HASH_SET_MUST_REPLACE)) {
-+		ret = -ENOENT;
-+	} else if (found && (flags & BCH_HASH_SET_MUST_CREATE)) {
-+		ret = -EEXIST;
-+	} else {
-+		if (!found && slot)
-+			swap(iter, slot);
-+
-+		insert->k.p = iter->pos;
-+		bch2_trans_update(trans, iter, insert, 0);
-+	}
-+
-+	goto out;
-+}
-+
-+static __always_inline
-+int bch2_hash_delete_at(struct btree_trans *trans,
-+			const struct bch_hash_desc desc,
-+			const struct bch_hash_info *info,
-+			struct btree_iter *iter)
-+{
-+	struct bkey_i *delete;
-+	int ret;
-+
-+	ret = bch2_hash_needs_whiteout(trans, desc, info, iter);
-+	if (ret < 0)
-+		return ret;
-+
-+	delete = bch2_trans_kmalloc(trans, sizeof(*delete));
-+	if (IS_ERR(delete))
-+		return PTR_ERR(delete);
-+
-+	bkey_init(&delete->k);
-+	delete->k.p = iter->pos;
-+	delete->k.type = ret ? KEY_TYPE_whiteout : KEY_TYPE_deleted;
-+
-+	bch2_trans_update(trans, iter, delete, 0);
-+	return 0;
-+}
-+
-+static __always_inline
-+int bch2_hash_delete(struct btree_trans *trans,
-+		     const struct bch_hash_desc desc,
-+		     const struct bch_hash_info *info,
-+		     u64 inode, const void *key)
-+{
-+	struct btree_iter *iter;
-+	int ret;
-+
-+	iter = bch2_hash_lookup(trans, desc, info, inode, key,
-+				BTREE_ITER_INTENT);
-+	if (IS_ERR(iter))
-+		return PTR_ERR(iter);
-+
-+	ret = bch2_hash_delete_at(trans, desc, info, iter);
-+	bch2_trans_iter_put(trans, iter);
-+	return ret;
-+}
-+
-+#endif /* _BCACHEFS_STR_HASH_H */
-diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
-new file mode 100644
-index 000000000000..cee6cc938734
---- /dev/null
-+++ b/fs/bcachefs/super-io.c
-@@ -0,0 +1,1158 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "btree_update_interior.h"
-+#include "buckets.h"
-+#include "checksum.h"
-+#include "disk_groups.h"
-+#include "ec.h"
-+#include "error.h"
-+#include "io.h"
-+#include "journal.h"
-+#include "journal_seq_blacklist.h"
-+#include "replicas.h"
-+#include "quota.h"
-+#include "super-io.h"
-+#include "super.h"
-+#include "vstructs.h"
-+
-+#include <linux/backing-dev.h>
-+#include <linux/sort.h>
-+
-+const char * const bch2_sb_fields[] = {
-+#define x(name, nr)	#name,
-+	BCH_SB_FIELDS()
-+#undef x
-+	NULL
-+};
-+
-+static const char *bch2_sb_field_validate(struct bch_sb *,
-+					  struct bch_sb_field *);
-+
-+struct bch_sb_field *bch2_sb_field_get(struct bch_sb *sb,
-+				      enum bch_sb_field_type type)
-+{
-+	struct bch_sb_field *f;
-+
-+	/* XXX: need locking around superblock to access optional fields */
-+
-+	vstruct_for_each(sb, f)
-+		if (le32_to_cpu(f->type) == type)
-+			return f;
-+	return NULL;
-+}
-+
-+static struct bch_sb_field *__bch2_sb_field_resize(struct bch_sb_handle *sb,
-+						   struct bch_sb_field *f,
-+						   unsigned u64s)
-+{
-+	unsigned old_u64s = f ? le32_to_cpu(f->u64s) : 0;
-+	unsigned sb_u64s = le32_to_cpu(sb->sb->u64s) + u64s - old_u64s;
-+
-+	BUG_ON(get_order(__vstruct_bytes(struct bch_sb, sb_u64s)) >
-+	       sb->page_order);
-+
-+	if (!f && !u64s) {
-+		/* nothing to do: */
-+	} else if (!f) {
-+		f = vstruct_last(sb->sb);
-+		memset(f, 0, sizeof(u64) * u64s);
-+		f->u64s = cpu_to_le32(u64s);
-+		f->type = 0;
-+	} else {
-+		void *src, *dst;
-+
-+		src = vstruct_end(f);
-+
-+		if (u64s) {
-+			f->u64s = cpu_to_le32(u64s);
-+			dst = vstruct_end(f);
-+		} else {
-+			dst = f;
-+		}
-+
-+		memmove(dst, src, vstruct_end(sb->sb) - src);
-+
-+		if (dst > src)
-+			memset(src, 0, dst - src);
-+	}
-+
-+	sb->sb->u64s = cpu_to_le32(sb_u64s);
-+
-+	return u64s ? f : NULL;
-+}
-+
-+void bch2_sb_field_delete(struct bch_sb_handle *sb,
-+			  enum bch_sb_field_type type)
-+{
-+	struct bch_sb_field *f = bch2_sb_field_get(sb->sb, type);
-+
-+	if (f)
-+		__bch2_sb_field_resize(sb, f, 0);
-+}
-+
-+/* Superblock realloc/free: */
-+
-+void bch2_free_super(struct bch_sb_handle *sb)
-+{
-+	if (sb->bio)
-+		bio_put(sb->bio);
-+	if (!IS_ERR_OR_NULL(sb->bdev))
-+		blkdev_put(sb->bdev, sb->mode);
-+
-+	free_pages((unsigned long) sb->sb, sb->page_order);
-+	memset(sb, 0, sizeof(*sb));
-+}
-+
-+int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s)
-+{
-+	size_t new_bytes = __vstruct_bytes(struct bch_sb, u64s);
-+	unsigned order = get_order(new_bytes);
-+	struct bch_sb *new_sb;
-+	struct bio *bio;
-+
-+	if (sb->sb && sb->page_order >= order)
-+		return 0;
-+
-+	if (sb->have_layout) {
-+		u64 max_bytes = 512 << sb->sb->layout.sb_max_size_bits;
-+
-+		if (new_bytes > max_bytes) {
-+			char buf[BDEVNAME_SIZE];
-+
-+			pr_err("%s: superblock too big: want %zu but have %llu",
-+			       bdevname(sb->bdev, buf), new_bytes, max_bytes);
-+			return -ENOSPC;
-+		}
-+	}
-+
-+	if (sb->page_order >= order && sb->sb)
-+		return 0;
-+
-+	if (dynamic_fault("bcachefs:add:super_realloc"))
-+		return -ENOMEM;
-+
-+	if (sb->have_bio) {
-+		bio = bio_kmalloc(GFP_KERNEL, 1 << order);
-+		if (!bio)
-+			return -ENOMEM;
-+
-+		if (sb->bio)
-+			bio_put(sb->bio);
-+		sb->bio = bio;
-+	}
-+
-+	new_sb = (void *) __get_free_pages(GFP_NOFS|__GFP_ZERO, order);
-+	if (!new_sb)
-+		return -ENOMEM;
-+
-+	if (sb->sb)
-+		memcpy(new_sb, sb->sb, PAGE_SIZE << sb->page_order);
-+
-+	free_pages((unsigned long) sb->sb, sb->page_order);
-+	sb->sb = new_sb;
-+
-+	sb->page_order = order;
-+
-+	return 0;
-+}
-+
-+struct bch_sb_field *bch2_sb_field_resize(struct bch_sb_handle *sb,
-+					  enum bch_sb_field_type type,
-+					  unsigned u64s)
-+{
-+	struct bch_sb_field *f = bch2_sb_field_get(sb->sb, type);
-+	ssize_t old_u64s = f ? le32_to_cpu(f->u64s) : 0;
-+	ssize_t d = -old_u64s + u64s;
-+
-+	if (bch2_sb_realloc(sb, le32_to_cpu(sb->sb->u64s) + d))
-+		return NULL;
-+
-+	if (sb->fs_sb) {
-+		struct bch_fs *c = container_of(sb, struct bch_fs, disk_sb);
-+		struct bch_dev *ca;
-+		unsigned i;
-+
-+		lockdep_assert_held(&c->sb_lock);
-+
-+		/* XXX: we're not checking that offline device have enough space */
-+
-+		for_each_online_member(ca, c, i) {
-+			struct bch_sb_handle *sb = &ca->disk_sb;
-+
-+			if (bch2_sb_realloc(sb, le32_to_cpu(sb->sb->u64s) + d)) {
-+				percpu_ref_put(&ca->ref);
-+				return NULL;
-+			}
-+		}
-+	}
-+
-+	f = bch2_sb_field_get(sb->sb, type);
-+	f = __bch2_sb_field_resize(sb, f, u64s);
-+	if (f)
-+		f->type = cpu_to_le32(type);
-+	return f;
-+}
-+
-+/* Superblock validate: */
-+
-+static inline void __bch2_sb_layout_size_assert(void)
-+{
-+	BUILD_BUG_ON(sizeof(struct bch_sb_layout) != 512);
-+}
-+
-+static const char *validate_sb_layout(struct bch_sb_layout *layout)
-+{
-+	u64 offset, prev_offset, max_sectors;
-+	unsigned i;
-+
-+	if (uuid_le_cmp(layout->magic, BCACHE_MAGIC))
-+		return "Not a bcachefs superblock layout";
-+
-+	if (layout->layout_type != 0)
-+		return "Invalid superblock layout type";
-+
-+	if (!layout->nr_superblocks)
-+		return "Invalid superblock layout: no superblocks";
-+
-+	if (layout->nr_superblocks > ARRAY_SIZE(layout->sb_offset))
-+		return "Invalid superblock layout: too many superblocks";
-+
-+	max_sectors = 1 << layout->sb_max_size_bits;
-+
-+	prev_offset = le64_to_cpu(layout->sb_offset[0]);
-+
-+	for (i = 1; i < layout->nr_superblocks; i++) {
-+		offset = le64_to_cpu(layout->sb_offset[i]);
-+
-+		if (offset < prev_offset + max_sectors)
-+			return "Invalid superblock layout: superblocks overlap";
-+		prev_offset = offset;
-+	}
-+
-+	return NULL;
-+}
-+
-+const char *bch2_sb_validate(struct bch_sb_handle *disk_sb)
-+{
-+	struct bch_sb *sb = disk_sb->sb;
-+	struct bch_sb_field *f;
-+	struct bch_sb_field_members *mi;
-+	const char *err;
-+	u32 version, version_min;
-+	u16 block_size;
-+
-+	version		= le16_to_cpu(sb->version);
-+	version_min	= version >= bcachefs_metadata_version_new_versioning
-+		? le16_to_cpu(sb->version_min)
-+		: version;
-+
-+	if (version    >= bcachefs_metadata_version_max ||
-+	    version_min < bcachefs_metadata_version_min)
-+		return "Unsupported superblock version";
-+
-+	if (version_min > version)
-+		return "Bad minimum version";
-+
-+	if (sb->features[1] ||
-+	    (le64_to_cpu(sb->features[0]) & (~0ULL << BCH_FEATURE_NR)))
-+		return "Filesystem has incompatible features";
-+
-+	block_size = le16_to_cpu(sb->block_size);
-+
-+	if (!is_power_of_2(block_size) ||
-+	    block_size > PAGE_SECTORS)
-+		return "Bad block size";
-+
-+	if (bch2_is_zero(sb->user_uuid.b, sizeof(uuid_le)))
-+		return "Bad user UUID";
-+
-+	if (bch2_is_zero(sb->uuid.b, sizeof(uuid_le)))
-+		return "Bad internal UUID";
-+
-+	if (!sb->nr_devices ||
-+	    sb->nr_devices <= sb->dev_idx ||
-+	    sb->nr_devices > BCH_SB_MEMBERS_MAX)
-+		return "Bad number of member devices";
-+
-+	if (!BCH_SB_META_REPLICAS_WANT(sb) ||
-+	    BCH_SB_META_REPLICAS_WANT(sb) >= BCH_REPLICAS_MAX)
-+		return "Invalid number of metadata replicas";
-+
-+	if (!BCH_SB_META_REPLICAS_REQ(sb) ||
-+	    BCH_SB_META_REPLICAS_REQ(sb) >= BCH_REPLICAS_MAX)
-+		return "Invalid number of metadata replicas";
-+
-+	if (!BCH_SB_DATA_REPLICAS_WANT(sb) ||
-+	    BCH_SB_DATA_REPLICAS_WANT(sb) >= BCH_REPLICAS_MAX)
-+		return "Invalid number of data replicas";
-+
-+	if (!BCH_SB_DATA_REPLICAS_REQ(sb) ||
-+	    BCH_SB_DATA_REPLICAS_REQ(sb) >= BCH_REPLICAS_MAX)
-+		return "Invalid number of data replicas";
-+
-+	if (BCH_SB_META_CSUM_TYPE(sb) >= BCH_CSUM_OPT_NR)
-+		return "Invalid metadata checksum type";
-+
-+	if (BCH_SB_DATA_CSUM_TYPE(sb) >= BCH_CSUM_OPT_NR)
-+		return "Invalid metadata checksum type";
-+
-+	if (BCH_SB_COMPRESSION_TYPE(sb) >= BCH_COMPRESSION_OPT_NR)
-+		return "Invalid compression type";
-+
-+	if (!BCH_SB_BTREE_NODE_SIZE(sb))
-+		return "Btree node size not set";
-+
-+	if (!is_power_of_2(BCH_SB_BTREE_NODE_SIZE(sb)))
-+		return "Btree node size not a power of two";
-+
-+	if (BCH_SB_GC_RESERVE(sb) < 5)
-+		return "gc reserve percentage too small";
-+
-+	if (!sb->time_precision ||
-+	    le32_to_cpu(sb->time_precision) > NSEC_PER_SEC)
-+		return "invalid time precision";
-+
-+	/* validate layout */
-+	err = validate_sb_layout(&sb->layout);
-+	if (err)
-+		return err;
-+
-+	vstruct_for_each(sb, f) {
-+		if (!f->u64s)
-+			return "Invalid superblock: invalid optional field";
-+
-+		if (vstruct_next(f) > vstruct_last(sb))
-+			return "Invalid superblock: invalid optional field";
-+	}
-+
-+	/* members must be validated first: */
-+	mi = bch2_sb_get_members(sb);
-+	if (!mi)
-+		return "Invalid superblock: member info area missing";
-+
-+	err = bch2_sb_field_validate(sb, &mi->field);
-+	if (err)
-+		return err;
-+
-+	vstruct_for_each(sb, f) {
-+		if (le32_to_cpu(f->type) == BCH_SB_FIELD_members)
-+			continue;
-+
-+		err = bch2_sb_field_validate(sb, f);
-+		if (err)
-+			return err;
-+	}
-+
-+	return NULL;
-+}
-+
-+/* device open: */
-+
-+static void bch2_sb_update(struct bch_fs *c)
-+{
-+	struct bch_sb *src = c->disk_sb.sb;
-+	struct bch_sb_field_members *mi = bch2_sb_get_members(src);
-+	struct bch_dev *ca;
-+	unsigned i;
-+
-+	lockdep_assert_held(&c->sb_lock);
-+
-+	c->sb.uuid		= src->uuid;
-+	c->sb.user_uuid		= src->user_uuid;
-+	c->sb.version		= le16_to_cpu(src->version);
-+	c->sb.nr_devices	= src->nr_devices;
-+	c->sb.clean		= BCH_SB_CLEAN(src);
-+	c->sb.encryption_type	= BCH_SB_ENCRYPTION_TYPE(src);
-+	c->sb.encoded_extent_max= 1 << BCH_SB_ENCODED_EXTENT_MAX_BITS(src);
-+	c->sb.time_base_lo	= le64_to_cpu(src->time_base_lo);
-+	c->sb.time_base_hi	= le32_to_cpu(src->time_base_hi);
-+	c->sb.time_precision	= le32_to_cpu(src->time_precision);
-+	c->sb.features		= le64_to_cpu(src->features[0]);
-+	c->sb.compat		= le64_to_cpu(src->compat[0]);
-+
-+	for_each_member_device(ca, c, i)
-+		ca->mi = bch2_mi_to_cpu(mi->members + i);
-+}
-+
-+/* doesn't copy member info */
-+static void __copy_super(struct bch_sb_handle *dst_handle, struct bch_sb *src)
-+{
-+	struct bch_sb_field *src_f, *dst_f;
-+	struct bch_sb *dst = dst_handle->sb;
-+	unsigned i;
-+
-+	dst->version		= src->version;
-+	dst->version_min	= src->version_min;
-+	dst->seq		= src->seq;
-+	dst->uuid		= src->uuid;
-+	dst->user_uuid		= src->user_uuid;
-+	memcpy(dst->label,	src->label, sizeof(dst->label));
-+
-+	dst->block_size		= src->block_size;
-+	dst->nr_devices		= src->nr_devices;
-+
-+	dst->time_base_lo	= src->time_base_lo;
-+	dst->time_base_hi	= src->time_base_hi;
-+	dst->time_precision	= src->time_precision;
-+
-+	memcpy(dst->flags,	src->flags,	sizeof(dst->flags));
-+	memcpy(dst->features,	src->features,	sizeof(dst->features));
-+	memcpy(dst->compat,	src->compat,	sizeof(dst->compat));
-+
-+	for (i = 0; i < BCH_SB_FIELD_NR; i++) {
-+		if (i == BCH_SB_FIELD_journal)
-+			continue;
-+
-+		src_f = bch2_sb_field_get(src, i);
-+		dst_f = bch2_sb_field_get(dst, i);
-+		dst_f = __bch2_sb_field_resize(dst_handle, dst_f,
-+				src_f ? le32_to_cpu(src_f->u64s) : 0);
-+
-+		if (src_f)
-+			memcpy(dst_f, src_f, vstruct_bytes(src_f));
-+	}
-+}
-+
-+int bch2_sb_to_fs(struct bch_fs *c, struct bch_sb *src)
-+{
-+	struct bch_sb_field_journal *journal_buckets =
-+		bch2_sb_get_journal(src);
-+	unsigned journal_u64s = journal_buckets
-+		? le32_to_cpu(journal_buckets->field.u64s)
-+		: 0;
-+	int ret;
-+
-+	lockdep_assert_held(&c->sb_lock);
-+
-+	ret = bch2_sb_realloc(&c->disk_sb,
-+			      le32_to_cpu(src->u64s) - journal_u64s);
-+	if (ret)
-+		return ret;
-+
-+	__copy_super(&c->disk_sb, src);
-+
-+	ret = bch2_sb_replicas_to_cpu_replicas(c);
-+	if (ret)
-+		return ret;
-+
-+	ret = bch2_sb_disk_groups_to_cpu(c);
-+	if (ret)
-+		return ret;
-+
-+	bch2_sb_update(c);
-+	return 0;
-+}
-+
-+int bch2_sb_from_fs(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	struct bch_sb *src = c->disk_sb.sb, *dst = ca->disk_sb.sb;
-+	struct bch_sb_field_journal *journal_buckets =
-+		bch2_sb_get_journal(dst);
-+	unsigned journal_u64s = journal_buckets
-+		? le32_to_cpu(journal_buckets->field.u64s)
-+		: 0;
-+	unsigned u64s = le32_to_cpu(src->u64s) + journal_u64s;
-+	int ret;
-+
-+	ret = bch2_sb_realloc(&ca->disk_sb, u64s);
-+	if (ret)
-+		return ret;
-+
-+	__copy_super(&ca->disk_sb, src);
-+	return 0;
-+}
-+
-+/* read superblock: */
-+
-+static const char *read_one_super(struct bch_sb_handle *sb, u64 offset)
-+{
-+	struct bch_csum csum;
-+	size_t bytes;
-+reread:
-+	bio_reset(sb->bio);
-+	bio_set_dev(sb->bio, sb->bdev);
-+	sb->bio->bi_iter.bi_sector = offset;
-+	bio_set_op_attrs(sb->bio, REQ_OP_READ, REQ_SYNC|REQ_META);
-+	bch2_bio_map(sb->bio, sb->sb, PAGE_SIZE << sb->page_order);
-+
-+	if (submit_bio_wait(sb->bio))
-+		return "IO error";
-+
-+	if (uuid_le_cmp(sb->sb->magic, BCACHE_MAGIC))
-+		return "Not a bcachefs superblock";
-+
-+	if (le16_to_cpu(sb->sb->version) <  bcachefs_metadata_version_min ||
-+	    le16_to_cpu(sb->sb->version) >= bcachefs_metadata_version_max)
-+		return "Unsupported superblock version";
-+
-+	bytes = vstruct_bytes(sb->sb);
-+
-+	if (bytes > 512 << sb->sb->layout.sb_max_size_bits)
-+		return "Bad superblock: too big";
-+
-+	if (get_order(bytes) > sb->page_order) {
-+		if (bch2_sb_realloc(sb, le32_to_cpu(sb->sb->u64s)))
-+			return "cannot allocate memory";
-+		goto reread;
-+	}
-+
-+	if (BCH_SB_CSUM_TYPE(sb->sb) >= BCH_CSUM_NR)
-+		return "unknown csum type";
-+
-+	/* XXX: verify MACs */
-+	csum = csum_vstruct(NULL, BCH_SB_CSUM_TYPE(sb->sb),
-+			    null_nonce(), sb->sb);
-+
-+	if (bch2_crc_cmp(csum, sb->sb->csum))
-+		return "bad checksum reading superblock";
-+
-+	sb->seq = le64_to_cpu(sb->sb->seq);
-+
-+	return NULL;
-+}
-+
-+int bch2_read_super(const char *path, struct bch_opts *opts,
-+		    struct bch_sb_handle *sb)
-+{
-+	u64 offset = opt_get(*opts, sb);
-+	struct bch_sb_layout layout;
-+	const char *err;
-+	__le64 *i;
-+	int ret;
-+
-+	pr_verbose_init(*opts, "");
-+
-+	memset(sb, 0, sizeof(*sb));
-+	sb->mode	= FMODE_READ;
-+	sb->have_bio	= true;
-+
-+	if (!opt_get(*opts, noexcl))
-+		sb->mode |= FMODE_EXCL;
-+
-+	if (!opt_get(*opts, nochanges))
-+		sb->mode |= FMODE_WRITE;
-+
-+	sb->bdev = blkdev_get_by_path(path, sb->mode, sb);
-+	if (IS_ERR(sb->bdev) &&
-+	    PTR_ERR(sb->bdev) == -EACCES &&
-+	    opt_get(*opts, read_only)) {
-+		sb->mode &= ~FMODE_WRITE;
-+
-+		sb->bdev = blkdev_get_by_path(path, sb->mode, sb);
-+		if (!IS_ERR(sb->bdev))
-+			opt_set(*opts, nochanges, true);
-+	}
-+
-+	if (IS_ERR(sb->bdev)) {
-+		ret = PTR_ERR(sb->bdev);
-+		goto out;
-+	}
-+
-+	err = "cannot allocate memory";
-+	ret = bch2_sb_realloc(sb, 0);
-+	if (ret)
-+		goto err;
-+
-+	ret = -EFAULT;
-+	err = "dynamic fault";
-+	if (bch2_fs_init_fault("read_super"))
-+		goto err;
-+
-+	ret = -EINVAL;
-+	err = read_one_super(sb, offset);
-+	if (!err)
-+		goto got_super;
-+
-+	if (opt_defined(*opts, sb))
-+		goto err;
-+
-+	pr_err("error reading default superblock: %s", err);
-+
-+	/*
-+	 * Error reading primary superblock - read location of backup
-+	 * superblocks:
-+	 */
-+	bio_reset(sb->bio);
-+	bio_set_dev(sb->bio, sb->bdev);
-+	sb->bio->bi_iter.bi_sector = BCH_SB_LAYOUT_SECTOR;
-+	bio_set_op_attrs(sb->bio, REQ_OP_READ, REQ_SYNC|REQ_META);
-+	/*
-+	 * use sb buffer to read layout, since sb buffer is page aligned but
-+	 * layout won't be:
-+	 */
-+	bch2_bio_map(sb->bio, sb->sb, sizeof(struct bch_sb_layout));
-+
-+	err = "IO error";
-+	if (submit_bio_wait(sb->bio))
-+		goto err;
-+
-+	memcpy(&layout, sb->sb, sizeof(layout));
-+	err = validate_sb_layout(&layout);
-+	if (err)
-+		goto err;
-+
-+	for (i = layout.sb_offset;
-+	     i < layout.sb_offset + layout.nr_superblocks; i++) {
-+		offset = le64_to_cpu(*i);
-+
-+		if (offset == opt_get(*opts, sb))
-+			continue;
-+
-+		err = read_one_super(sb, offset);
-+		if (!err)
-+			goto got_super;
-+	}
-+
-+	ret = -EINVAL;
-+	goto err;
-+
-+got_super:
-+	err = "Superblock block size smaller than device block size";
-+	ret = -EINVAL;
-+	if (le16_to_cpu(sb->sb->block_size) << 9 <
-+	    bdev_logical_block_size(sb->bdev))
-+		goto err;
-+
-+	if (sb->mode & FMODE_WRITE)
-+		bdev_get_queue(sb->bdev)->backing_dev_info->capabilities
-+			|= BDI_CAP_STABLE_WRITES;
-+	ret = 0;
-+	sb->have_layout = true;
-+out:
-+	pr_verbose_init(*opts, "ret %i", ret);
-+	return ret;
-+err:
-+	bch2_free_super(sb);
-+	pr_err("error reading superblock: %s", err);
-+	goto out;
-+}
-+
-+/* write superblock: */
-+
-+static void write_super_endio(struct bio *bio)
-+{
-+	struct bch_dev *ca = bio->bi_private;
-+
-+	/* XXX: return errors directly */
-+
-+	if (bch2_dev_io_err_on(bio->bi_status, ca, "superblock write: %s",
-+			       bch2_blk_status_to_str(bio->bi_status)))
-+		ca->sb_write_error = 1;
-+
-+	closure_put(&ca->fs->sb_write);
-+	percpu_ref_put(&ca->io_ref);
-+}
-+
-+static void read_back_super(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	struct bch_sb *sb = ca->disk_sb.sb;
-+	struct bio *bio = ca->disk_sb.bio;
-+
-+	bio_reset(bio);
-+	bio_set_dev(bio, ca->disk_sb.bdev);
-+	bio->bi_iter.bi_sector	= le64_to_cpu(sb->layout.sb_offset[0]);
-+	bio->bi_end_io		= write_super_endio;
-+	bio->bi_private		= ca;
-+	bio_set_op_attrs(bio, REQ_OP_READ, REQ_SYNC|REQ_META);
-+	bch2_bio_map(bio, ca->sb_read_scratch, PAGE_SIZE);
-+
-+	this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_sb],
-+		     bio_sectors(bio));
-+
-+	percpu_ref_get(&ca->io_ref);
-+	closure_bio_submit(bio, &c->sb_write);
-+}
-+
-+static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx)
-+{
-+	struct bch_sb *sb = ca->disk_sb.sb;
-+	struct bio *bio = ca->disk_sb.bio;
-+
-+	sb->offset = sb->layout.sb_offset[idx];
-+
-+	SET_BCH_SB_CSUM_TYPE(sb, c->opts.metadata_checksum);
-+	sb->csum = csum_vstruct(c, BCH_SB_CSUM_TYPE(sb),
-+				null_nonce(), sb);
-+
-+	bio_reset(bio);
-+	bio_set_dev(bio, ca->disk_sb.bdev);
-+	bio->bi_iter.bi_sector	= le64_to_cpu(sb->offset);
-+	bio->bi_end_io		= write_super_endio;
-+	bio->bi_private		= ca;
-+	bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC|REQ_META);
-+	bch2_bio_map(bio, sb,
-+		     roundup((size_t) vstruct_bytes(sb),
-+			     bdev_logical_block_size(ca->disk_sb.bdev)));
-+
-+	this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_sb],
-+		     bio_sectors(bio));
-+
-+	percpu_ref_get(&ca->io_ref);
-+	closure_bio_submit(bio, &c->sb_write);
-+}
-+
-+int bch2_write_super(struct bch_fs *c)
-+{
-+	struct closure *cl = &c->sb_write;
-+	struct bch_dev *ca;
-+	unsigned i, sb = 0, nr_wrote;
-+	const char *err;
-+	struct bch_devs_mask sb_written;
-+	bool wrote, can_mount_without_written, can_mount_with_written;
-+	int ret = 0;
-+
-+	lockdep_assert_held(&c->sb_lock);
-+
-+	closure_init_stack(cl);
-+	memset(&sb_written, 0, sizeof(sb_written));
-+
-+	le64_add_cpu(&c->disk_sb.sb->seq, 1);
-+
-+	if (test_bit(BCH_FS_ERROR, &c->flags))
-+		SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 1);
-+
-+	for_each_online_member(ca, c, i)
-+		bch2_sb_from_fs(c, ca);
-+
-+	for_each_online_member(ca, c, i) {
-+		err = bch2_sb_validate(&ca->disk_sb);
-+		if (err) {
-+			bch2_fs_inconsistent(c, "sb invalid before write: %s", err);
-+			ret = -1;
-+			goto out;
-+		}
-+	}
-+
-+	if (c->opts.nochanges)
-+		goto out;
-+
-+	for_each_online_member(ca, c, i) {
-+		__set_bit(ca->dev_idx, sb_written.d);
-+		ca->sb_write_error = 0;
-+	}
-+
-+	for_each_online_member(ca, c, i)
-+		read_back_super(c, ca);
-+	closure_sync(cl);
-+
-+	for_each_online_member(ca, c, i) {
-+		if (!ca->sb_write_error &&
-+		    ca->disk_sb.seq !=
-+		    le64_to_cpu(ca->sb_read_scratch->seq)) {
-+			bch2_fs_fatal_error(c,
-+				"Superblock modified by another process");
-+			percpu_ref_put(&ca->io_ref);
-+			ret = -EROFS;
-+			goto out;
-+		}
-+	}
-+
-+	do {
-+		wrote = false;
-+		for_each_online_member(ca, c, i)
-+			if (!ca->sb_write_error &&
-+			    sb < ca->disk_sb.sb->layout.nr_superblocks) {
-+				write_one_super(c, ca, sb);
-+				wrote = true;
-+			}
-+		closure_sync(cl);
-+		sb++;
-+	} while (wrote);
-+
-+	for_each_online_member(ca, c, i) {
-+		if (ca->sb_write_error)
-+			__clear_bit(ca->dev_idx, sb_written.d);
-+		else
-+			ca->disk_sb.seq = le64_to_cpu(ca->disk_sb.sb->seq);
-+	}
-+
-+	nr_wrote = dev_mask_nr(&sb_written);
-+
-+	can_mount_with_written =
-+		bch2_have_enough_devs(__bch2_replicas_status(c, sb_written),
-+				      BCH_FORCE_IF_DEGRADED);
-+
-+	for (i = 0; i < ARRAY_SIZE(sb_written.d); i++)
-+		sb_written.d[i] = ~sb_written.d[i];
-+
-+	can_mount_without_written =
-+		bch2_have_enough_devs(__bch2_replicas_status(c, sb_written),
-+				      BCH_FORCE_IF_DEGRADED);
-+
-+	/*
-+	 * If we would be able to mount _without_ the devices we successfully
-+	 * wrote superblocks to, we weren't able to write to enough devices:
-+	 *
-+	 * Exception: if we can mount without the successes because we haven't
-+	 * written anything (new filesystem), we continue if we'd be able to
-+	 * mount with the devices we did successfully write to:
-+	 */
-+	if (bch2_fs_fatal_err_on(!nr_wrote ||
-+				 (can_mount_without_written &&
-+				  !can_mount_with_written), c,
-+		"Unable to write superblock to sufficient devices"))
-+		ret = -1;
-+out:
-+	/* Make new options visible after they're persistent: */
-+	bch2_sb_update(c);
-+	return ret;
-+}
-+
-+void __bch2_check_set_feature(struct bch_fs *c, unsigned feat)
-+{
-+	mutex_lock(&c->sb_lock);
-+	if (!(c->sb.features & (1ULL << feat))) {
-+		c->disk_sb.sb->features[0] |= cpu_to_le64(1ULL << feat);
-+
-+		bch2_write_super(c);
-+	}
-+	mutex_unlock(&c->sb_lock);
-+}
-+
-+/* BCH_SB_FIELD_journal: */
-+
-+static int u64_cmp(const void *_l, const void *_r)
-+{
-+	u64 l = *((const u64 *) _l), r = *((const u64 *) _r);
-+
-+	return l < r ? -1 : l > r ? 1 : 0;
-+}
-+
-+static const char *bch2_sb_validate_journal(struct bch_sb *sb,
-+					    struct bch_sb_field *f)
-+{
-+	struct bch_sb_field_journal *journal = field_to_type(f, journal);
-+	struct bch_member *m = bch2_sb_get_members(sb)->members + sb->dev_idx;
-+	const char *err;
-+	unsigned nr;
-+	unsigned i;
-+	u64 *b;
-+
-+	journal = bch2_sb_get_journal(sb);
-+	if (!journal)
-+		return NULL;
-+
-+	nr = bch2_nr_journal_buckets(journal);
-+	if (!nr)
-+		return NULL;
-+
-+	b = kmalloc_array(sizeof(u64), nr, GFP_KERNEL);
-+	if (!b)
-+		return "cannot allocate memory";
-+
-+	for (i = 0; i < nr; i++)
-+		b[i] = le64_to_cpu(journal->buckets[i]);
-+
-+	sort(b, nr, sizeof(u64), u64_cmp, NULL);
-+
-+	err = "journal bucket at sector 0";
-+	if (!b[0])
-+		goto err;
-+
-+	err = "journal bucket before first bucket";
-+	if (m && b[0] < le16_to_cpu(m->first_bucket))
-+		goto err;
-+
-+	err = "journal bucket past end of device";
-+	if (m && b[nr - 1] >= le64_to_cpu(m->nbuckets))
-+		goto err;
-+
-+	err = "duplicate journal buckets";
-+	for (i = 0; i + 1 < nr; i++)
-+		if (b[i] == b[i + 1])
-+			goto err;
-+
-+	err = NULL;
-+err:
-+	kfree(b);
-+	return err;
-+}
-+
-+static const struct bch_sb_field_ops bch_sb_field_ops_journal = {
-+	.validate	= bch2_sb_validate_journal,
-+};
-+
-+/* BCH_SB_FIELD_members: */
-+
-+static const char *bch2_sb_validate_members(struct bch_sb *sb,
-+					    struct bch_sb_field *f)
-+{
-+	struct bch_sb_field_members *mi = field_to_type(f, members);
-+	struct bch_member *m;
-+
-+	if ((void *) (mi->members + sb->nr_devices) >
-+	    vstruct_end(&mi->field))
-+		return "Invalid superblock: bad member info";
-+
-+	for (m = mi->members;
-+	     m < mi->members + sb->nr_devices;
-+	     m++) {
-+		if (!bch2_member_exists(m))
-+			continue;
-+
-+		if (le64_to_cpu(m->nbuckets) > LONG_MAX)
-+			return "Too many buckets";
-+
-+		if (le64_to_cpu(m->nbuckets) -
-+		    le16_to_cpu(m->first_bucket) < BCH_MIN_NR_NBUCKETS)
-+			return "Not enough buckets";
-+
-+		if (le16_to_cpu(m->bucket_size) <
-+		    le16_to_cpu(sb->block_size))
-+			return "bucket size smaller than block size";
-+
-+		if (le16_to_cpu(m->bucket_size) <
-+		    BCH_SB_BTREE_NODE_SIZE(sb))
-+			return "bucket size smaller than btree node size";
-+	}
-+
-+	return NULL;
-+}
-+
-+static const struct bch_sb_field_ops bch_sb_field_ops_members = {
-+	.validate	= bch2_sb_validate_members,
-+};
-+
-+/* BCH_SB_FIELD_crypt: */
-+
-+static const char *bch2_sb_validate_crypt(struct bch_sb *sb,
-+					  struct bch_sb_field *f)
-+{
-+	struct bch_sb_field_crypt *crypt = field_to_type(f, crypt);
-+
-+	if (vstruct_bytes(&crypt->field) != sizeof(*crypt))
-+		return "invalid field crypt: wrong size";
-+
-+	if (BCH_CRYPT_KDF_TYPE(crypt))
-+		return "invalid field crypt: bad kdf type";
-+
-+	return NULL;
-+}
-+
-+static const struct bch_sb_field_ops bch_sb_field_ops_crypt = {
-+	.validate	= bch2_sb_validate_crypt,
-+};
-+
-+/* BCH_SB_FIELD_clean: */
-+
-+void bch2_sb_clean_renumber(struct bch_sb_field_clean *clean, int write)
-+{
-+	struct jset_entry *entry;
-+
-+	for (entry = clean->start;
-+	     entry < (struct jset_entry *) vstruct_end(&clean->field);
-+	     entry = vstruct_next(entry))
-+		bch2_bkey_renumber(BKEY_TYPE_BTREE, bkey_to_packed(entry->start), write);
-+}
-+
-+int bch2_fs_mark_dirty(struct bch_fs *c)
-+{
-+	int ret;
-+
-+	/*
-+	 * Unconditionally write superblock, to verify it hasn't changed before
-+	 * we go rw:
-+	 */
-+
-+	mutex_lock(&c->sb_lock);
-+	SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
-+	c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_new_extent_overwrite;
-+	c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_extents_above_btree_updates;
-+	c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_btree_updates_journalled;
-+	ret = bch2_write_super(c);
-+	mutex_unlock(&c->sb_lock);
-+
-+	return ret;
-+}
-+
-+static void
-+entry_init_u64s(struct jset_entry *entry, unsigned u64s)
-+{
-+	memset(entry, 0, u64s * sizeof(u64));
-+
-+	/*
-+	 * The u64s field counts from the start of data, ignoring the shared
-+	 * fields.
-+	 */
-+	entry->u64s = u64s - 1;
-+}
-+
-+static void
-+entry_init_size(struct jset_entry *entry, size_t size)
-+{
-+	unsigned u64s = DIV_ROUND_UP(size, sizeof(u64));
-+	entry_init_u64s(entry, u64s);
-+}
-+
-+struct jset_entry *
-+bch2_journal_super_entries_add_common(struct bch_fs *c,
-+				      struct jset_entry *entry,
-+				      u64 journal_seq)
-+{
-+	unsigned i;
-+
-+	percpu_down_write(&c->mark_lock);
-+
-+	if (!journal_seq) {
-+		bch2_fs_usage_acc_to_base(c, 0);
-+		bch2_fs_usage_acc_to_base(c, 1);
-+	} else {
-+		bch2_fs_usage_acc_to_base(c, journal_seq & 1);
-+	}
-+
-+	{
-+		struct jset_entry_usage *u =
-+			container_of(entry, struct jset_entry_usage, entry);
-+
-+		entry_init_size(entry, sizeof(*u));
-+		u->entry.type	= BCH_JSET_ENTRY_usage;
-+		u->entry.btree_id = FS_USAGE_INODES;
-+		u->v		= cpu_to_le64(c->usage_base->nr_inodes);
-+
-+		entry = vstruct_next(entry);
-+	}
-+
-+	{
-+		struct jset_entry_usage *u =
-+			container_of(entry, struct jset_entry_usage, entry);
-+
-+		entry_init_size(entry, sizeof(*u));
-+		u->entry.type	= BCH_JSET_ENTRY_usage;
-+		u->entry.btree_id = FS_USAGE_KEY_VERSION;
-+		u->v		= cpu_to_le64(atomic64_read(&c->key_version));
-+
-+		entry = vstruct_next(entry);
-+	}
-+
-+	for (i = 0; i < BCH_REPLICAS_MAX; i++) {
-+		struct jset_entry_usage *u =
-+			container_of(entry, struct jset_entry_usage, entry);
-+
-+		entry_init_size(entry, sizeof(*u));
-+		u->entry.type	= BCH_JSET_ENTRY_usage;
-+		u->entry.btree_id = FS_USAGE_RESERVED;
-+		u->entry.level	= i;
-+		u->v		= cpu_to_le64(c->usage_base->persistent_reserved[i]);
-+
-+		entry = vstruct_next(entry);
-+	}
-+
-+	for (i = 0; i < c->replicas.nr; i++) {
-+		struct bch_replicas_entry *e =
-+			cpu_replicas_entry(&c->replicas, i);
-+		struct jset_entry_data_usage *u =
-+			container_of(entry, struct jset_entry_data_usage, entry);
-+
-+		entry_init_size(entry, sizeof(*u) + e->nr_devs);
-+		u->entry.type	= BCH_JSET_ENTRY_data_usage;
-+		u->v		= cpu_to_le64(c->usage_base->replicas[i]);
-+		memcpy(&u->r, e, replicas_entry_bytes(e));
-+
-+		entry = vstruct_next(entry);
-+	}
-+
-+	percpu_up_write(&c->mark_lock);
-+
-+	return entry;
-+}
-+
-+void bch2_fs_mark_clean(struct bch_fs *c)
-+{
-+	struct bch_sb_field_clean *sb_clean;
-+	struct jset_entry *entry;
-+	unsigned u64s;
-+
-+	mutex_lock(&c->sb_lock);
-+	if (BCH_SB_CLEAN(c->disk_sb.sb))
-+		goto out;
-+
-+	SET_BCH_SB_CLEAN(c->disk_sb.sb, true);
-+
-+	c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_ALLOC_INFO;
-+	c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_ALLOC_METADATA;
-+	c->disk_sb.sb->features[0] &= ~(1ULL << BCH_FEATURE_extents_above_btree_updates);
-+	c->disk_sb.sb->features[0] &= ~(1ULL << BCH_FEATURE_btree_updates_journalled);
-+
-+	u64s = sizeof(*sb_clean) / sizeof(u64) + c->journal.entry_u64s_reserved;
-+
-+	sb_clean = bch2_sb_resize_clean(&c->disk_sb, u64s);
-+	if (!sb_clean) {
-+		bch_err(c, "error resizing superblock while setting filesystem clean");
-+		goto out;
-+	}
-+
-+	sb_clean->flags		= 0;
-+	sb_clean->read_clock	= cpu_to_le16(c->bucket_clock[READ].hand);
-+	sb_clean->write_clock	= cpu_to_le16(c->bucket_clock[WRITE].hand);
-+	sb_clean->journal_seq	= cpu_to_le64(journal_cur_seq(&c->journal) - 1);
-+
-+	/* Trying to catch outstanding bug: */
-+	BUG_ON(le64_to_cpu(sb_clean->journal_seq) > S64_MAX);
-+
-+	entry = sb_clean->start;
-+	entry = bch2_journal_super_entries_add_common(c, entry, 0);
-+	entry = bch2_btree_roots_to_journal_entries(c, entry, entry);
-+	BUG_ON((void *) entry > vstruct_end(&sb_clean->field));
-+
-+	memset(entry, 0,
-+	       vstruct_end(&sb_clean->field) - (void *) entry);
-+
-+	if (le16_to_cpu(c->disk_sb.sb->version) <
-+	    bcachefs_metadata_version_bkey_renumber)
-+		bch2_sb_clean_renumber(sb_clean, WRITE);
-+
-+	bch2_write_super(c);
-+out:
-+	mutex_unlock(&c->sb_lock);
-+}
-+
-+static const char *bch2_sb_validate_clean(struct bch_sb *sb,
-+					  struct bch_sb_field *f)
-+{
-+	struct bch_sb_field_clean *clean = field_to_type(f, clean);
-+
-+	if (vstruct_bytes(&clean->field) < sizeof(*clean))
-+		return "invalid field crypt: wrong size";
-+
-+	return NULL;
-+}
-+
-+static const struct bch_sb_field_ops bch_sb_field_ops_clean = {
-+	.validate	= bch2_sb_validate_clean,
-+};
-+
-+static const struct bch_sb_field_ops *bch2_sb_field_ops[] = {
-+#define x(f, nr)					\
-+	[BCH_SB_FIELD_##f] = &bch_sb_field_ops_##f,
-+	BCH_SB_FIELDS()
-+#undef x
-+};
-+
-+static const char *bch2_sb_field_validate(struct bch_sb *sb,
-+					  struct bch_sb_field *f)
-+{
-+	unsigned type = le32_to_cpu(f->type);
-+
-+	return type < BCH_SB_FIELD_NR
-+		? bch2_sb_field_ops[type]->validate(sb, f)
-+		: NULL;
-+}
-+
-+void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb,
-+			   struct bch_sb_field *f)
-+{
-+	unsigned type = le32_to_cpu(f->type);
-+	const struct bch_sb_field_ops *ops = type < BCH_SB_FIELD_NR
-+		? bch2_sb_field_ops[type] : NULL;
-+
-+	if (ops)
-+		pr_buf(out, "%s", bch2_sb_fields[type]);
-+	else
-+		pr_buf(out, "(unknown field %u)", type);
-+
-+	pr_buf(out, " (size %llu):", vstruct_bytes(f));
-+
-+	if (ops && ops->to_text)
-+		bch2_sb_field_ops[type]->to_text(out, sb, f);
-+}
-diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h
-new file mode 100644
-index 000000000000..7a068158efca
---- /dev/null
-+++ b/fs/bcachefs/super-io.h
-@@ -0,0 +1,137 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_SUPER_IO_H
-+#define _BCACHEFS_SUPER_IO_H
-+
-+#include "extents.h"
-+#include "eytzinger.h"
-+#include "super_types.h"
-+#include "super.h"
-+
-+#include <asm/byteorder.h>
-+
-+struct bch_sb_field *bch2_sb_field_get(struct bch_sb *, enum bch_sb_field_type);
-+struct bch_sb_field *bch2_sb_field_resize(struct bch_sb_handle *,
-+					  enum bch_sb_field_type, unsigned);
-+void bch2_sb_field_delete(struct bch_sb_handle *, enum bch_sb_field_type);
-+
-+#define field_to_type(_f, _name)					\
-+	container_of_or_null(_f, struct bch_sb_field_##_name, field)
-+
-+#define x(_name, _nr)							\
-+static inline struct bch_sb_field_##_name *				\
-+bch2_sb_get_##_name(struct bch_sb *sb)					\
-+{									\
-+	return field_to_type(bch2_sb_field_get(sb,			\
-+				BCH_SB_FIELD_##_name), _name);		\
-+}									\
-+									\
-+static inline struct bch_sb_field_##_name *				\
-+bch2_sb_resize_##_name(struct bch_sb_handle *sb, unsigned u64s)	\
-+{									\
-+	return field_to_type(bch2_sb_field_resize(sb,			\
-+				BCH_SB_FIELD_##_name, u64s), _name);	\
-+}
-+
-+BCH_SB_FIELDS()
-+#undef x
-+
-+extern const char * const bch2_sb_fields[];
-+
-+struct bch_sb_field_ops {
-+	const char *	(*validate)(struct bch_sb *, struct bch_sb_field *);
-+	void		(*to_text)(struct printbuf *, struct bch_sb *,
-+				   struct bch_sb_field *);
-+};
-+
-+static inline __le64 bch2_sb_magic(struct bch_fs *c)
-+{
-+	__le64 ret;
-+	memcpy(&ret, &c->sb.uuid, sizeof(ret));
-+	return ret;
-+}
-+
-+static inline __u64 jset_magic(struct bch_fs *c)
-+{
-+	return __le64_to_cpu(bch2_sb_magic(c) ^ JSET_MAGIC);
-+}
-+
-+static inline __u64 bset_magic(struct bch_fs *c)
-+{
-+	return __le64_to_cpu(bch2_sb_magic(c) ^ BSET_MAGIC);
-+}
-+
-+int bch2_sb_to_fs(struct bch_fs *, struct bch_sb *);
-+int bch2_sb_from_fs(struct bch_fs *, struct bch_dev *);
-+
-+void bch2_free_super(struct bch_sb_handle *);
-+int bch2_sb_realloc(struct bch_sb_handle *, unsigned);
-+
-+const char *bch2_sb_validate(struct bch_sb_handle *);
-+
-+int bch2_read_super(const char *, struct bch_opts *, struct bch_sb_handle *);
-+int bch2_write_super(struct bch_fs *);
-+void __bch2_check_set_feature(struct bch_fs *, unsigned);
-+
-+static inline void bch2_check_set_feature(struct bch_fs *c, unsigned feat)
-+{
-+	if (!(c->sb.features & (1ULL << feat)))
-+		__bch2_check_set_feature(c, feat);
-+}
-+
-+/* BCH_SB_FIELD_journal: */
-+
-+static inline unsigned bch2_nr_journal_buckets(struct bch_sb_field_journal *j)
-+{
-+	return j
-+		? (__le64 *) vstruct_end(&j->field) - j->buckets
-+		: 0;
-+}
-+
-+/* BCH_SB_FIELD_members: */
-+
-+static inline bool bch2_member_exists(struct bch_member *m)
-+{
-+	return !bch2_is_zero(m->uuid.b, sizeof(uuid_le));
-+}
-+
-+static inline bool bch2_dev_exists(struct bch_sb *sb,
-+				   struct bch_sb_field_members *mi,
-+				   unsigned dev)
-+{
-+	return dev < sb->nr_devices &&
-+		bch2_member_exists(&mi->members[dev]);
-+}
-+
-+static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi)
-+{
-+	return (struct bch_member_cpu) {
-+		.nbuckets	= le64_to_cpu(mi->nbuckets),
-+		.first_bucket	= le16_to_cpu(mi->first_bucket),
-+		.bucket_size	= le16_to_cpu(mi->bucket_size),
-+		.group		= BCH_MEMBER_GROUP(mi),
-+		.state		= BCH_MEMBER_STATE(mi),
-+		.replacement	= BCH_MEMBER_REPLACEMENT(mi),
-+		.discard	= BCH_MEMBER_DISCARD(mi),
-+		.data_allowed	= BCH_MEMBER_DATA_ALLOWED(mi),
-+		.durability	= BCH_MEMBER_DURABILITY(mi)
-+			? BCH_MEMBER_DURABILITY(mi) - 1
-+			: 1,
-+		.valid		= !bch2_is_zero(mi->uuid.b, sizeof(uuid_le)),
-+	};
-+}
-+
-+/* BCH_SB_FIELD_clean: */
-+
-+struct jset_entry *
-+bch2_journal_super_entries_add_common(struct bch_fs *,
-+				      struct jset_entry *, u64);
-+
-+void bch2_sb_clean_renumber(struct bch_sb_field_clean *, int);
-+
-+int bch2_fs_mark_dirty(struct bch_fs *);
-+void bch2_fs_mark_clean(struct bch_fs *);
-+
-+void bch2_sb_field_to_text(struct printbuf *, struct bch_sb *,
-+			   struct bch_sb_field *);
-+
-+#endif /* _BCACHEFS_SUPER_IO_H */
-diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
-new file mode 100644
-index 000000000000..015bbd9f21fd
---- /dev/null
-+++ b/fs/bcachefs/super.c
-@@ -0,0 +1,2037 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * bcachefs setup/teardown code, and some metadata io - read a superblock and
-+ * figure out what to do with it.
-+ *
-+ * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
-+ * Copyright 2012 Google, Inc.
-+ */
-+
-+#include "bcachefs.h"
-+#include "alloc_background.h"
-+#include "alloc_foreground.h"
-+#include "bkey_sort.h"
-+#include "btree_cache.h"
-+#include "btree_gc.h"
-+#include "btree_key_cache.h"
-+#include "btree_update_interior.h"
-+#include "btree_io.h"
-+#include "chardev.h"
-+#include "checksum.h"
-+#include "clock.h"
-+#include "compress.h"
-+#include "debug.h"
-+#include "disk_groups.h"
-+#include "ec.h"
-+#include "error.h"
-+#include "fs.h"
-+#include "fs-io.h"
-+#include "fsck.h"
-+#include "inode.h"
-+#include "io.h"
-+#include "journal.h"
-+#include "journal_reclaim.h"
-+#include "journal_seq_blacklist.h"
-+#include "move.h"
-+#include "migrate.h"
-+#include "movinggc.h"
-+#include "quota.h"
-+#include "rebalance.h"
-+#include "recovery.h"
-+#include "replicas.h"
-+#include "super.h"
-+#include "super-io.h"
-+#include "sysfs.h"
-+
-+#include <linux/backing-dev.h>
-+#include <linux/blkdev.h>
-+#include <linux/debugfs.h>
-+#include <linux/device.h>
-+#include <linux/genhd.h>
-+#include <linux/idr.h>
-+#include <linux/kthread.h>
-+#include <linux/module.h>
-+#include <linux/percpu.h>
-+#include <linux/random.h>
-+#include <linux/sysfs.h>
-+#include <crypto/hash.h>
-+
-+#include <trace/events/bcachefs.h>
-+
-+MODULE_LICENSE("GPL");
-+MODULE_AUTHOR("Kent Overstreet <kent.overstreet@gmail.com>");
-+
-+#define KTYPE(type)							\
-+struct kobj_type type ## _ktype = {					\
-+	.release	= type ## _release,				\
-+	.sysfs_ops	= &type ## _sysfs_ops,				\
-+	.default_attrs	= type ## _files				\
-+}
-+
-+static void bch2_fs_release(struct kobject *);
-+static void bch2_dev_release(struct kobject *);
-+
-+static void bch2_fs_internal_release(struct kobject *k)
-+{
-+}
-+
-+static void bch2_fs_opts_dir_release(struct kobject *k)
-+{
-+}
-+
-+static void bch2_fs_time_stats_release(struct kobject *k)
-+{
-+}
-+
-+static KTYPE(bch2_fs);
-+static KTYPE(bch2_fs_internal);
-+static KTYPE(bch2_fs_opts_dir);
-+static KTYPE(bch2_fs_time_stats);
-+static KTYPE(bch2_dev);
-+
-+static struct kset *bcachefs_kset;
-+static LIST_HEAD(bch_fs_list);
-+static DEFINE_MUTEX(bch_fs_list_lock);
-+
-+static DECLARE_WAIT_QUEUE_HEAD(bch_read_only_wait);
-+
-+static void bch2_dev_free(struct bch_dev *);
-+static int bch2_dev_alloc(struct bch_fs *, unsigned);
-+static int bch2_dev_sysfs_online(struct bch_fs *, struct bch_dev *);
-+static void __bch2_dev_read_only(struct bch_fs *, struct bch_dev *);
-+
-+struct bch_fs *bch2_bdev_to_fs(struct block_device *bdev)
-+{
-+	struct bch_fs *c;
-+	struct bch_dev *ca;
-+	unsigned i;
-+
-+	mutex_lock(&bch_fs_list_lock);
-+	rcu_read_lock();
-+
-+	list_for_each_entry(c, &bch_fs_list, list)
-+		for_each_member_device_rcu(ca, c, i, NULL)
-+			if (ca->disk_sb.bdev == bdev) {
-+				closure_get(&c->cl);
-+				goto found;
-+			}
-+	c = NULL;
-+found:
-+	rcu_read_unlock();
-+	mutex_unlock(&bch_fs_list_lock);
-+
-+	return c;
-+}
-+
-+static struct bch_fs *__bch2_uuid_to_fs(uuid_le uuid)
-+{
-+	struct bch_fs *c;
-+
-+	lockdep_assert_held(&bch_fs_list_lock);
-+
-+	list_for_each_entry(c, &bch_fs_list, list)
-+		if (!memcmp(&c->disk_sb.sb->uuid, &uuid, sizeof(uuid_le)))
-+			return c;
-+
-+	return NULL;
-+}
-+
-+struct bch_fs *bch2_uuid_to_fs(uuid_le uuid)
-+{
-+	struct bch_fs *c;
-+
-+	mutex_lock(&bch_fs_list_lock);
-+	c = __bch2_uuid_to_fs(uuid);
-+	if (c)
-+		closure_get(&c->cl);
-+	mutex_unlock(&bch_fs_list_lock);
-+
-+	return c;
-+}
-+
-+/* Filesystem RO/RW: */
-+
-+/*
-+ * For startup/shutdown of RW stuff, the dependencies are:
-+ *
-+ * - foreground writes depend on copygc and rebalance (to free up space)
-+ *
-+ * - copygc and rebalance depend on mark and sweep gc (they actually probably
-+ *   don't because they either reserve ahead of time or don't block if
-+ *   allocations fail, but allocations can require mark and sweep gc to run
-+ *   because of generation number wraparound)
-+ *
-+ * - all of the above depends on the allocator threads
-+ *
-+ * - allocator depends on the journal (when it rewrites prios and gens)
-+ */
-+
-+static void __bch2_fs_read_only(struct bch_fs *c)
-+{
-+	struct bch_dev *ca;
-+	unsigned i, clean_passes = 0;
-+
-+	bch2_rebalance_stop(c);
-+	bch2_copygc_stop(c);
-+	bch2_gc_thread_stop(c);
-+
-+	bch2_io_timer_del(&c->io_clock[READ], &c->bucket_clock[READ].rescale);
-+	bch2_io_timer_del(&c->io_clock[WRITE], &c->bucket_clock[WRITE].rescale);
-+
-+	/*
-+	 * Flush journal before stopping allocators, because flushing journal
-+	 * blacklist entries involves allocating new btree nodes:
-+	 */
-+	bch2_journal_flush_all_pins(&c->journal);
-+
-+	/*
-+	 * If the allocator threads didn't all start up, the btree updates to
-+	 * write out alloc info aren't going to work:
-+	 */
-+	if (!test_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags))
-+		goto nowrote_alloc;
-+
-+	bch_verbose(c, "flushing journal and stopping allocators");
-+
-+	bch2_journal_flush_all_pins(&c->journal);
-+	set_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags);
-+
-+	do {
-+		clean_passes++;
-+
-+		if (bch2_journal_flush_all_pins(&c->journal))
-+			clean_passes = 0;
-+
-+		/*
-+		 * In flight interior btree updates will generate more journal
-+		 * updates and btree updates (alloc btree):
-+		 */
-+		if (bch2_btree_interior_updates_nr_pending(c)) {
-+			closure_wait_event(&c->btree_interior_update_wait,
-+					   !bch2_btree_interior_updates_nr_pending(c));
-+			clean_passes = 0;
-+		}
-+		flush_work(&c->btree_interior_update_work);
-+
-+		if (bch2_journal_flush_all_pins(&c->journal))
-+			clean_passes = 0;
-+	} while (clean_passes < 2);
-+	bch_verbose(c, "flushing journal and stopping allocators complete");
-+
-+	set_bit(BCH_FS_ALLOC_CLEAN, &c->flags);
-+nowrote_alloc:
-+	closure_wait_event(&c->btree_interior_update_wait,
-+			   !bch2_btree_interior_updates_nr_pending(c));
-+	flush_work(&c->btree_interior_update_work);
-+
-+	for_each_member_device(ca, c, i)
-+		bch2_dev_allocator_stop(ca);
-+
-+	clear_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags);
-+	clear_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags);
-+
-+	bch2_fs_journal_stop(&c->journal);
-+
-+	/*
-+	 * the journal kicks off btree writes via reclaim - wait for in flight
-+	 * writes after stopping journal:
-+	 */
-+	if (test_bit(BCH_FS_EMERGENCY_RO, &c->flags))
-+		bch2_btree_flush_all_writes(c);
-+	else
-+		bch2_btree_verify_flushed(c);
-+
-+	/*
-+	 * After stopping journal:
-+	 */
-+	for_each_member_device(ca, c, i)
-+		bch2_dev_allocator_remove(c, ca);
-+}
-+
-+static void bch2_writes_disabled(struct percpu_ref *writes)
-+{
-+	struct bch_fs *c = container_of(writes, struct bch_fs, writes);
-+
-+	set_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags);
-+	wake_up(&bch_read_only_wait);
-+}
-+
-+void bch2_fs_read_only(struct bch_fs *c)
-+{
-+	if (!test_bit(BCH_FS_RW, &c->flags)) {
-+		cancel_delayed_work_sync(&c->journal.reclaim_work);
-+		return;
-+	}
-+
-+	BUG_ON(test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags));
-+
-+	/*
-+	 * Block new foreground-end write operations from starting - any new
-+	 * writes will return -EROFS:
-+	 *
-+	 * (This is really blocking new _allocations_, writes to previously
-+	 * allocated space can still happen until stopping the allocator in
-+	 * bch2_dev_allocator_stop()).
-+	 */
-+	percpu_ref_kill(&c->writes);
-+
-+	cancel_work_sync(&c->ec_stripe_delete_work);
-+	cancel_delayed_work(&c->pd_controllers_update);
-+
-+	/*
-+	 * If we're not doing an emergency shutdown, we want to wait on
-+	 * outstanding writes to complete so they don't see spurious errors due
-+	 * to shutting down the allocator:
-+	 *
-+	 * If we are doing an emergency shutdown outstanding writes may
-+	 * hang until we shutdown the allocator so we don't want to wait
-+	 * on outstanding writes before shutting everything down - but
-+	 * we do need to wait on them before returning and signalling
-+	 * that going RO is complete:
-+	 */
-+	wait_event(bch_read_only_wait,
-+		   test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags) ||
-+		   test_bit(BCH_FS_EMERGENCY_RO, &c->flags));
-+
-+	__bch2_fs_read_only(c);
-+
-+	wait_event(bch_read_only_wait,
-+		   test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags));
-+
-+	clear_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags);
-+
-+	if (!bch2_journal_error(&c->journal) &&
-+	    !test_bit(BCH_FS_ERROR, &c->flags) &&
-+	    !test_bit(BCH_FS_EMERGENCY_RO, &c->flags) &&
-+	    test_bit(BCH_FS_STARTED, &c->flags) &&
-+	    test_bit(BCH_FS_ALLOC_CLEAN, &c->flags) &&
-+	    !c->opts.norecovery) {
-+		bch_verbose(c, "marking filesystem clean");
-+		bch2_fs_mark_clean(c);
-+	}
-+
-+	clear_bit(BCH_FS_RW, &c->flags);
-+}
-+
-+static void bch2_fs_read_only_work(struct work_struct *work)
-+{
-+	struct bch_fs *c =
-+		container_of(work, struct bch_fs, read_only_work);
-+
-+	down_write(&c->state_lock);
-+	bch2_fs_read_only(c);
-+	up_write(&c->state_lock);
-+}
-+
-+static void bch2_fs_read_only_async(struct bch_fs *c)
-+{
-+	queue_work(system_long_wq, &c->read_only_work);
-+}
-+
-+bool bch2_fs_emergency_read_only(struct bch_fs *c)
-+{
-+	bool ret = !test_and_set_bit(BCH_FS_EMERGENCY_RO, &c->flags);
-+
-+	bch2_journal_halt(&c->journal);
-+	bch2_fs_read_only_async(c);
-+
-+	wake_up(&bch_read_only_wait);
-+	return ret;
-+}
-+
-+static int bch2_fs_read_write_late(struct bch_fs *c)
-+{
-+	int ret;
-+
-+	ret = bch2_gc_thread_start(c);
-+	if (ret) {
-+		bch_err(c, "error starting gc thread");
-+		return ret;
-+	}
-+
-+	ret = bch2_copygc_start(c);
-+	if (ret) {
-+		bch_err(c, "error starting copygc thread");
-+		return ret;
-+	}
-+
-+	ret = bch2_rebalance_start(c);
-+	if (ret) {
-+		bch_err(c, "error starting rebalance thread");
-+		return ret;
-+	}
-+
-+	schedule_delayed_work(&c->pd_controllers_update, 5 * HZ);
-+
-+	schedule_work(&c->ec_stripe_delete_work);
-+
-+	return 0;
-+}
-+
-+static int __bch2_fs_read_write(struct bch_fs *c, bool early)
-+{
-+	struct bch_dev *ca;
-+	unsigned i;
-+	int ret;
-+
-+	if (test_bit(BCH_FS_RW, &c->flags))
-+		return 0;
-+
-+	/*
-+	 * nochanges is used for fsck -n mode - we have to allow going rw
-+	 * during recovery for that to work:
-+	 */
-+	if (c->opts.norecovery ||
-+	    (c->opts.nochanges &&
-+	     (!early || c->opts.read_only)))
-+		return -EROFS;
-+
-+	ret = bch2_fs_mark_dirty(c);
-+	if (ret)
-+		goto err;
-+
-+	/*
-+	 * We need to write out a journal entry before we start doing btree
-+	 * updates, to ensure that on unclean shutdown new journal blacklist
-+	 * entries are created:
-+	 */
-+	bch2_journal_meta(&c->journal);
-+
-+	clear_bit(BCH_FS_ALLOC_CLEAN, &c->flags);
-+
-+	for_each_rw_member(ca, c, i)
-+		bch2_dev_allocator_add(c, ca);
-+	bch2_recalc_capacity(c);
-+
-+	bch2_io_timer_add(&c->io_clock[READ], &c->bucket_clock[READ].rescale);
-+	bch2_io_timer_add(&c->io_clock[WRITE], &c->bucket_clock[WRITE].rescale);
-+
-+	for_each_rw_member(ca, c, i) {
-+		ret = bch2_dev_allocator_start(ca);
-+		if (ret) {
-+			bch_err(c, "error starting allocator threads");
-+			percpu_ref_put(&ca->io_ref);
-+			goto err;
-+		}
-+	}
-+
-+	set_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags);
-+
-+	if (!early) {
-+		ret = bch2_fs_read_write_late(c);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	percpu_ref_reinit(&c->writes);
-+	set_bit(BCH_FS_RW, &c->flags);
-+
-+	queue_delayed_work(c->journal_reclaim_wq,
-+			   &c->journal.reclaim_work, 0);
-+	return 0;
-+err:
-+	__bch2_fs_read_only(c);
-+	return ret;
-+}
-+
-+int bch2_fs_read_write(struct bch_fs *c)
-+{
-+	return __bch2_fs_read_write(c, false);
-+}
-+
-+int bch2_fs_read_write_early(struct bch_fs *c)
-+{
-+	lockdep_assert_held(&c->state_lock);
-+
-+	return __bch2_fs_read_write(c, true);
-+}
-+
-+/* Filesystem startup/shutdown: */
-+
-+static void __bch2_fs_free(struct bch_fs *c)
-+{
-+	unsigned i;
-+
-+	for (i = 0; i < BCH_TIME_STAT_NR; i++)
-+		bch2_time_stats_exit(&c->times[i]);
-+
-+	bch2_fs_quota_exit(c);
-+	bch2_fs_fsio_exit(c);
-+	bch2_fs_ec_exit(c);
-+	bch2_fs_encryption_exit(c);
-+	bch2_fs_io_exit(c);
-+	bch2_fs_btree_interior_update_exit(c);
-+	bch2_fs_btree_iter_exit(c);
-+	bch2_fs_btree_key_cache_exit(&c->btree_key_cache);
-+	bch2_fs_btree_cache_exit(c);
-+	bch2_fs_journal_exit(&c->journal);
-+	bch2_io_clock_exit(&c->io_clock[WRITE]);
-+	bch2_io_clock_exit(&c->io_clock[READ]);
-+	bch2_fs_compress_exit(c);
-+	bch2_journal_keys_free(&c->journal_keys);
-+	bch2_journal_entries_free(&c->journal_entries);
-+	percpu_free_rwsem(&c->mark_lock);
-+	kfree(c->usage_scratch);
-+	free_percpu(c->usage[1]);
-+	free_percpu(c->usage[0]);
-+	kfree(c->usage_base);
-+	free_percpu(c->pcpu);
-+	mempool_exit(&c->large_bkey_pool);
-+	mempool_exit(&c->btree_bounce_pool);
-+	bioset_exit(&c->btree_bio);
-+	mempool_exit(&c->fill_iter);
-+	percpu_ref_exit(&c->writes);
-+	kfree(c->replicas.entries);
-+	kfree(c->replicas_gc.entries);
-+	kfree(rcu_dereference_protected(c->disk_groups, 1));
-+	kfree(c->journal_seq_blacklist_table);
-+	free_heap(&c->copygc_heap);
-+
-+	if (c->journal_reclaim_wq)
-+		destroy_workqueue(c->journal_reclaim_wq);
-+	if (c->copygc_wq)
-+		destroy_workqueue(c->copygc_wq);
-+	if (c->wq)
-+		destroy_workqueue(c->wq);
-+
-+	free_pages((unsigned long) c->disk_sb.sb,
-+		   c->disk_sb.page_order);
-+	kvpfree(c, sizeof(*c));
-+	module_put(THIS_MODULE);
-+}
-+
-+static void bch2_fs_release(struct kobject *kobj)
-+{
-+	struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
-+
-+	__bch2_fs_free(c);
-+}
-+
-+void __bch2_fs_stop(struct bch_fs *c)
-+{
-+	struct bch_dev *ca;
-+	unsigned i;
-+
-+	bch_verbose(c, "shutting down");
-+
-+	set_bit(BCH_FS_STOPPING, &c->flags);
-+
-+	cancel_work_sync(&c->journal_seq_blacklist_gc_work);
-+
-+	down_write(&c->state_lock);
-+	bch2_fs_read_only(c);
-+	up_write(&c->state_lock);
-+
-+	for_each_member_device(ca, c, i)
-+		if (ca->kobj.state_in_sysfs &&
-+		    ca->disk_sb.bdev)
-+			sysfs_remove_link(&part_to_dev(ca->disk_sb.bdev->bd_part)->kobj,
-+					  "bcachefs");
-+
-+	if (c->kobj.state_in_sysfs)
-+		kobject_del(&c->kobj);
-+
-+	bch2_fs_debug_exit(c);
-+	bch2_fs_chardev_exit(c);
-+
-+	kobject_put(&c->time_stats);
-+	kobject_put(&c->opts_dir);
-+	kobject_put(&c->internal);
-+
-+	/* btree prefetch might have kicked off reads in the background: */
-+	bch2_btree_flush_all_reads(c);
-+
-+	for_each_member_device(ca, c, i)
-+		cancel_work_sync(&ca->io_error_work);
-+
-+	cancel_work_sync(&c->btree_write_error_work);
-+	cancel_delayed_work_sync(&c->pd_controllers_update);
-+	cancel_work_sync(&c->read_only_work);
-+
-+	for (i = 0; i < c->sb.nr_devices; i++)
-+		if (c->devs[i])
-+			bch2_free_super(&c->devs[i]->disk_sb);
-+}
-+
-+void bch2_fs_free(struct bch_fs *c)
-+{
-+	unsigned i;
-+
-+	mutex_lock(&bch_fs_list_lock);
-+	list_del(&c->list);
-+	mutex_unlock(&bch_fs_list_lock);
-+
-+	closure_sync(&c->cl);
-+	closure_debug_destroy(&c->cl);
-+
-+	for (i = 0; i < c->sb.nr_devices; i++)
-+		if (c->devs[i])
-+			bch2_dev_free(rcu_dereference_protected(c->devs[i], 1));
-+
-+	bch_verbose(c, "shutdown complete");
-+
-+	kobject_put(&c->kobj);
-+}
-+
-+void bch2_fs_stop(struct bch_fs *c)
-+{
-+	__bch2_fs_stop(c);
-+	bch2_fs_free(c);
-+}
-+
-+static const char *bch2_fs_online(struct bch_fs *c)
-+{
-+	struct bch_dev *ca;
-+	const char *err = NULL;
-+	unsigned i;
-+	int ret;
-+
-+	lockdep_assert_held(&bch_fs_list_lock);
-+
-+	if (!list_empty(&c->list))
-+		return NULL;
-+
-+	if (__bch2_uuid_to_fs(c->sb.uuid))
-+		return "filesystem UUID already open";
-+
-+	ret = bch2_fs_chardev_init(c);
-+	if (ret)
-+		return "error creating character device";
-+
-+	bch2_fs_debug_init(c);
-+
-+	if (kobject_add(&c->kobj, NULL, "%pU", c->sb.user_uuid.b) ||
-+	    kobject_add(&c->internal, &c->kobj, "internal") ||
-+	    kobject_add(&c->opts_dir, &c->kobj, "options") ||
-+	    kobject_add(&c->time_stats, &c->kobj, "time_stats") ||
-+	    bch2_opts_create_sysfs_files(&c->opts_dir))
-+		return "error creating sysfs objects";
-+
-+	down_write(&c->state_lock);
-+
-+	err = "error creating sysfs objects";
-+	__for_each_member_device(ca, c, i, NULL)
-+		if (bch2_dev_sysfs_online(c, ca))
-+			goto err;
-+
-+	list_add(&c->list, &bch_fs_list);
-+	err = NULL;
-+err:
-+	up_write(&c->state_lock);
-+	return err;
-+}
-+
-+static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
-+{
-+	struct bch_sb_field_members *mi;
-+	struct bch_fs *c;
-+	unsigned i, iter_size;
-+	const char *err;
-+
-+	pr_verbose_init(opts, "");
-+
-+	c = kvpmalloc(sizeof(struct bch_fs), GFP_KERNEL|__GFP_ZERO);
-+	if (!c)
-+		goto out;
-+
-+	__module_get(THIS_MODULE);
-+
-+	closure_init(&c->cl, NULL);
-+
-+	c->kobj.kset = bcachefs_kset;
-+	kobject_init(&c->kobj, &bch2_fs_ktype);
-+	kobject_init(&c->internal, &bch2_fs_internal_ktype);
-+	kobject_init(&c->opts_dir, &bch2_fs_opts_dir_ktype);
-+	kobject_init(&c->time_stats, &bch2_fs_time_stats_ktype);
-+
-+	c->minor		= -1;
-+	c->disk_sb.fs_sb	= true;
-+
-+	init_rwsem(&c->state_lock);
-+	mutex_init(&c->sb_lock);
-+	mutex_init(&c->replicas_gc_lock);
-+	mutex_init(&c->btree_root_lock);
-+	INIT_WORK(&c->read_only_work, bch2_fs_read_only_work);
-+
-+	init_rwsem(&c->gc_lock);
-+
-+	for (i = 0; i < BCH_TIME_STAT_NR; i++)
-+		bch2_time_stats_init(&c->times[i]);
-+
-+	bch2_fs_copygc_init(c);
-+	bch2_fs_btree_key_cache_init_early(&c->btree_key_cache);
-+	bch2_fs_allocator_background_init(c);
-+	bch2_fs_allocator_foreground_init(c);
-+	bch2_fs_rebalance_init(c);
-+	bch2_fs_quota_init(c);
-+
-+	INIT_LIST_HEAD(&c->list);
-+
-+	mutex_init(&c->usage_scratch_lock);
-+
-+	mutex_init(&c->bio_bounce_pages_lock);
-+
-+	bio_list_init(&c->btree_write_error_list);
-+	spin_lock_init(&c->btree_write_error_lock);
-+	INIT_WORK(&c->btree_write_error_work, bch2_btree_write_error_work);
-+
-+	INIT_WORK(&c->journal_seq_blacklist_gc_work,
-+		  bch2_blacklist_entries_gc);
-+
-+	INIT_LIST_HEAD(&c->journal_entries);
-+
-+	INIT_LIST_HEAD(&c->fsck_errors);
-+	mutex_init(&c->fsck_error_lock);
-+
-+	INIT_LIST_HEAD(&c->ec_stripe_head_list);
-+	mutex_init(&c->ec_stripe_head_lock);
-+
-+	INIT_LIST_HEAD(&c->ec_stripe_new_list);
-+	mutex_init(&c->ec_stripe_new_lock);
-+
-+	spin_lock_init(&c->ec_stripes_heap_lock);
-+
-+	seqcount_init(&c->gc_pos_lock);
-+
-+	seqcount_init(&c->usage_lock);
-+
-+	sema_init(&c->io_in_flight, 64);
-+
-+	c->copy_gc_enabled		= 1;
-+	c->rebalance.enabled		= 1;
-+	c->promote_whole_extents	= true;
-+
-+	c->journal.write_time	= &c->times[BCH_TIME_journal_write];
-+	c->journal.delay_time	= &c->times[BCH_TIME_journal_delay];
-+	c->journal.blocked_time	= &c->times[BCH_TIME_blocked_journal];
-+	c->journal.flush_seq_time = &c->times[BCH_TIME_journal_flush_seq];
-+
-+	bch2_fs_btree_cache_init_early(&c->btree_cache);
-+
-+	if (percpu_init_rwsem(&c->mark_lock))
-+		goto err;
-+
-+	mutex_lock(&c->sb_lock);
-+
-+	if (bch2_sb_to_fs(c, sb)) {
-+		mutex_unlock(&c->sb_lock);
-+		goto err;
-+	}
-+
-+	mutex_unlock(&c->sb_lock);
-+
-+	scnprintf(c->name, sizeof(c->name), "%pU", &c->sb.user_uuid);
-+
-+	c->opts = bch2_opts_default;
-+	bch2_opts_apply(&c->opts, bch2_opts_from_sb(sb));
-+	bch2_opts_apply(&c->opts, opts);
-+
-+	c->block_bits		= ilog2(c->opts.block_size);
-+	c->btree_foreground_merge_threshold = BTREE_FOREGROUND_MERGE_THRESHOLD(c);
-+
-+	if (bch2_fs_init_fault("fs_alloc"))
-+		goto err;
-+
-+	iter_size = sizeof(struct sort_iter) +
-+		(btree_blocks(c) + 1) * 2 *
-+		sizeof(struct sort_iter_set);
-+
-+	if (!(c->wq = alloc_workqueue("bcachefs",
-+				WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
-+	    !(c->copygc_wq = alloc_workqueue("bcache_copygc",
-+				WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
-+	    !(c->journal_reclaim_wq = alloc_workqueue("bcache_journal",
-+				WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) ||
-+	    percpu_ref_init(&c->writes, bch2_writes_disabled,
-+			    PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
-+	    mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) ||
-+	    bioset_init(&c->btree_bio, 1,
-+			max(offsetof(struct btree_read_bio, bio),
-+			    offsetof(struct btree_write_bio, wbio.bio)),
-+			BIOSET_NEED_BVECS) ||
-+	    !(c->pcpu = alloc_percpu(struct bch_fs_pcpu)) ||
-+	    mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1,
-+					btree_bytes(c)) ||
-+	    mempool_init_kmalloc_pool(&c->large_bkey_pool, 1, 2048) ||
-+	    bch2_io_clock_init(&c->io_clock[READ]) ||
-+	    bch2_io_clock_init(&c->io_clock[WRITE]) ||
-+	    bch2_fs_journal_init(&c->journal) ||
-+	    bch2_fs_replicas_init(c) ||
-+	    bch2_fs_btree_cache_init(c) ||
-+	    bch2_fs_btree_key_cache_init(&c->btree_key_cache) ||
-+	    bch2_fs_btree_iter_init(c) ||
-+	    bch2_fs_btree_interior_update_init(c) ||
-+	    bch2_fs_io_init(c) ||
-+	    bch2_fs_encryption_init(c) ||
-+	    bch2_fs_compress_init(c) ||
-+	    bch2_fs_ec_init(c) ||
-+	    bch2_fs_fsio_init(c))
-+		goto err;
-+
-+	mi = bch2_sb_get_members(c->disk_sb.sb);
-+	for (i = 0; i < c->sb.nr_devices; i++)
-+		if (bch2_dev_exists(c->disk_sb.sb, mi, i) &&
-+		    bch2_dev_alloc(c, i))
-+			goto err;
-+
-+	mutex_lock(&bch_fs_list_lock);
-+	err = bch2_fs_online(c);
-+	mutex_unlock(&bch_fs_list_lock);
-+	if (err) {
-+		bch_err(c, "bch2_fs_online() error: %s", err);
-+		goto err;
-+	}
-+out:
-+	pr_verbose_init(opts, "ret %i", c ? 0 : -ENOMEM);
-+	return c;
-+err:
-+	bch2_fs_free(c);
-+	c = NULL;
-+	goto out;
-+}
-+
-+noinline_for_stack
-+static void print_mount_opts(struct bch_fs *c)
-+{
-+	enum bch_opt_id i;
-+	char buf[512];
-+	struct printbuf p = PBUF(buf);
-+	bool first = true;
-+
-+	strcpy(buf, "(null)");
-+
-+	if (c->opts.read_only) {
-+		pr_buf(&p, "ro");
-+		first = false;
-+	}
-+
-+	for (i = 0; i < bch2_opts_nr; i++) {
-+		const struct bch_option *opt = &bch2_opt_table[i];
-+		u64 v = bch2_opt_get_by_id(&c->opts, i);
-+
-+		if (!(opt->mode & OPT_MOUNT))
-+			continue;
-+
-+		if (v == bch2_opt_get_by_id(&bch2_opts_default, i))
-+			continue;
-+
-+		if (!first)
-+			pr_buf(&p, ",");
-+		first = false;
-+		bch2_opt_to_text(&p, c, opt, v, OPT_SHOW_MOUNT_STYLE);
-+	}
-+
-+	bch_info(c, "mounted with opts: %s", buf);
-+}
-+
-+int bch2_fs_start(struct bch_fs *c)
-+{
-+	const char *err = "cannot allocate memory";
-+	struct bch_sb_field_members *mi;
-+	struct bch_dev *ca;
-+	time64_t now = ktime_get_real_seconds();
-+	unsigned i;
-+	int ret = -EINVAL;
-+
-+	down_write(&c->state_lock);
-+
-+	BUG_ON(test_bit(BCH_FS_STARTED, &c->flags));
-+
-+	mutex_lock(&c->sb_lock);
-+
-+	for_each_online_member(ca, c, i)
-+		bch2_sb_from_fs(c, ca);
-+
-+	mi = bch2_sb_get_members(c->disk_sb.sb);
-+	for_each_online_member(ca, c, i)
-+		mi->members[ca->dev_idx].last_mount = cpu_to_le64(now);
-+
-+	mutex_unlock(&c->sb_lock);
-+
-+	for_each_rw_member(ca, c, i)
-+		bch2_dev_allocator_add(c, ca);
-+	bch2_recalc_capacity(c);
-+
-+	ret = BCH_SB_INITIALIZED(c->disk_sb.sb)
-+		? bch2_fs_recovery(c)
-+		: bch2_fs_initialize(c);
-+	if (ret)
-+		goto err;
-+
-+	ret = bch2_opts_check_may_set(c);
-+	if (ret)
-+		goto err;
-+
-+	err = "dynamic fault";
-+	ret = -EINVAL;
-+	if (bch2_fs_init_fault("fs_start"))
-+		goto err;
-+
-+	set_bit(BCH_FS_STARTED, &c->flags);
-+
-+	/*
-+	 * Allocator threads don't start filling copygc reserve until after we
-+	 * set BCH_FS_STARTED - wake them now:
-+	 */
-+	for_each_online_member(ca, c, i)
-+		bch2_wake_allocator(ca);
-+
-+	if (c->opts.read_only || c->opts.nochanges) {
-+		bch2_fs_read_only(c);
-+	} else {
-+		err = "error going read write";
-+		ret = !test_bit(BCH_FS_RW, &c->flags)
-+			? bch2_fs_read_write(c)
-+			: bch2_fs_read_write_late(c);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	print_mount_opts(c);
-+	ret = 0;
-+out:
-+	up_write(&c->state_lock);
-+	return ret;
-+err:
-+	switch (ret) {
-+	case BCH_FSCK_ERRORS_NOT_FIXED:
-+		bch_err(c, "filesystem contains errors: please report this to the developers");
-+		pr_cont("mount with -o fix_errors to repair\n");
-+		err = "fsck error";
-+		break;
-+	case BCH_FSCK_REPAIR_UNIMPLEMENTED:
-+		bch_err(c, "filesystem contains errors: please report this to the developers");
-+		pr_cont("repair unimplemented: inform the developers so that it can be added\n");
-+		err = "fsck error";
-+		break;
-+	case BCH_FSCK_REPAIR_IMPOSSIBLE:
-+		bch_err(c, "filesystem contains errors, but repair impossible");
-+		err = "fsck error";
-+		break;
-+	case BCH_FSCK_UNKNOWN_VERSION:
-+		err = "unknown metadata version";;
-+		break;
-+	case -ENOMEM:
-+		err = "cannot allocate memory";
-+		break;
-+	case -EIO:
-+		err = "IO error";
-+		break;
-+	}
-+
-+	if (ret >= 0)
-+		ret = -EIO;
-+	goto out;
-+}
-+
-+static const char *bch2_dev_may_add(struct bch_sb *sb, struct bch_fs *c)
-+{
-+	struct bch_sb_field_members *sb_mi;
-+
-+	sb_mi = bch2_sb_get_members(sb);
-+	if (!sb_mi)
-+		return "Invalid superblock: member info area missing";
-+
-+	if (le16_to_cpu(sb->block_size) != c->opts.block_size)
-+		return "mismatched block size";
-+
-+	if (le16_to_cpu(sb_mi->members[sb->dev_idx].bucket_size) <
-+	    BCH_SB_BTREE_NODE_SIZE(c->disk_sb.sb))
-+		return "new cache bucket size is too small";
-+
-+	return NULL;
-+}
-+
-+static const char *bch2_dev_in_fs(struct bch_sb *fs, struct bch_sb *sb)
-+{
-+	struct bch_sb *newest =
-+		le64_to_cpu(fs->seq) > le64_to_cpu(sb->seq) ? fs : sb;
-+	struct bch_sb_field_members *mi = bch2_sb_get_members(newest);
-+
-+	if (uuid_le_cmp(fs->uuid, sb->uuid))
-+		return "device not a member of filesystem";
-+
-+	if (!bch2_dev_exists(newest, mi, sb->dev_idx))
-+		return "device has been removed";
-+
-+	if (fs->block_size != sb->block_size)
-+		return "mismatched block size";
-+
-+	return NULL;
-+}
-+
-+/* Device startup/shutdown: */
-+
-+static void bch2_dev_release(struct kobject *kobj)
-+{
-+	struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj);
-+
-+	kfree(ca);
-+}
-+
-+static void bch2_dev_free(struct bch_dev *ca)
-+{
-+	cancel_work_sync(&ca->io_error_work);
-+
-+	if (ca->kobj.state_in_sysfs &&
-+	    ca->disk_sb.bdev)
-+		sysfs_remove_link(&part_to_dev(ca->disk_sb.bdev->bd_part)->kobj,
-+				  "bcachefs");
-+
-+	if (ca->kobj.state_in_sysfs)
-+		kobject_del(&ca->kobj);
-+
-+	bch2_free_super(&ca->disk_sb);
-+	bch2_dev_journal_exit(ca);
-+
-+	free_percpu(ca->io_done);
-+	bioset_exit(&ca->replica_set);
-+	bch2_dev_buckets_free(ca);
-+	free_page((unsigned long) ca->sb_read_scratch);
-+
-+	bch2_time_stats_exit(&ca->io_latency[WRITE]);
-+	bch2_time_stats_exit(&ca->io_latency[READ]);
-+
-+	percpu_ref_exit(&ca->io_ref);
-+	percpu_ref_exit(&ca->ref);
-+	kobject_put(&ca->kobj);
-+}
-+
-+static void __bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca)
-+{
-+
-+	lockdep_assert_held(&c->state_lock);
-+
-+	if (percpu_ref_is_zero(&ca->io_ref))
-+		return;
-+
-+	__bch2_dev_read_only(c, ca);
-+
-+	reinit_completion(&ca->io_ref_completion);
-+	percpu_ref_kill(&ca->io_ref);
-+	wait_for_completion(&ca->io_ref_completion);
-+
-+	if (ca->kobj.state_in_sysfs) {
-+		struct kobject *block =
-+			&part_to_dev(ca->disk_sb.bdev->bd_part)->kobj;
-+
-+		sysfs_remove_link(block, "bcachefs");
-+		sysfs_remove_link(&ca->kobj, "block");
-+	}
-+
-+	bch2_free_super(&ca->disk_sb);
-+	bch2_dev_journal_exit(ca);
-+}
-+
-+static void bch2_dev_ref_complete(struct percpu_ref *ref)
-+{
-+	struct bch_dev *ca = container_of(ref, struct bch_dev, ref);
-+
-+	complete(&ca->ref_completion);
-+}
-+
-+static void bch2_dev_io_ref_complete(struct percpu_ref *ref)
-+{
-+	struct bch_dev *ca = container_of(ref, struct bch_dev, io_ref);
-+
-+	complete(&ca->io_ref_completion);
-+}
-+
-+static int bch2_dev_sysfs_online(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	int ret;
-+
-+	if (!c->kobj.state_in_sysfs)
-+		return 0;
-+
-+	if (!ca->kobj.state_in_sysfs) {
-+		ret = kobject_add(&ca->kobj, &c->kobj,
-+				  "dev-%u", ca->dev_idx);
-+		if (ret)
-+			return ret;
-+	}
-+
-+	if (ca->disk_sb.bdev) {
-+		struct kobject *block =
-+			&part_to_dev(ca->disk_sb.bdev->bd_part)->kobj;
-+
-+		ret = sysfs_create_link(block, &ca->kobj, "bcachefs");
-+		if (ret)
-+			return ret;
-+		ret = sysfs_create_link(&ca->kobj, block, "block");
-+		if (ret)
-+			return ret;
-+	}
-+
-+	return 0;
-+}
-+
-+static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c,
-+					struct bch_member *member)
-+{
-+	struct bch_dev *ca;
-+
-+	ca = kzalloc(sizeof(*ca), GFP_KERNEL);
-+	if (!ca)
-+		return NULL;
-+
-+	kobject_init(&ca->kobj, &bch2_dev_ktype);
-+	init_completion(&ca->ref_completion);
-+	init_completion(&ca->io_ref_completion);
-+
-+	init_rwsem(&ca->bucket_lock);
-+
-+	INIT_WORK(&ca->io_error_work, bch2_io_error_work);
-+
-+	bch2_time_stats_init(&ca->io_latency[READ]);
-+	bch2_time_stats_init(&ca->io_latency[WRITE]);
-+
-+	ca->mi = bch2_mi_to_cpu(member);
-+	ca->uuid = member->uuid;
-+
-+	if (opt_defined(c->opts, discard))
-+		ca->mi.discard = opt_get(c->opts, discard);
-+
-+	if (percpu_ref_init(&ca->ref, bch2_dev_ref_complete,
-+			    0, GFP_KERNEL) ||
-+	    percpu_ref_init(&ca->io_ref, bch2_dev_io_ref_complete,
-+			    PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
-+	    !(ca->sb_read_scratch = (void *) __get_free_page(GFP_KERNEL)) ||
-+	    bch2_dev_buckets_alloc(c, ca) ||
-+	    bioset_init(&ca->replica_set, 4,
-+			offsetof(struct bch_write_bio, bio), 0) ||
-+	    !(ca->io_done	= alloc_percpu(*ca->io_done)))
-+		goto err;
-+
-+	return ca;
-+err:
-+	bch2_dev_free(ca);
-+	return NULL;
-+}
-+
-+static void bch2_dev_attach(struct bch_fs *c, struct bch_dev *ca,
-+			    unsigned dev_idx)
-+{
-+	ca->dev_idx = dev_idx;
-+	__set_bit(ca->dev_idx, ca->self.d);
-+	scnprintf(ca->name, sizeof(ca->name), "dev-%u", dev_idx);
-+
-+	ca->fs = c;
-+	rcu_assign_pointer(c->devs[ca->dev_idx], ca);
-+
-+	if (bch2_dev_sysfs_online(c, ca))
-+		pr_warn("error creating sysfs objects");
-+}
-+
-+static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx)
-+{
-+	struct bch_member *member =
-+		bch2_sb_get_members(c->disk_sb.sb)->members + dev_idx;
-+	struct bch_dev *ca = NULL;
-+	int ret = 0;
-+
-+	pr_verbose_init(c->opts, "");
-+
-+	if (bch2_fs_init_fault("dev_alloc"))
-+		goto err;
-+
-+	ca = __bch2_dev_alloc(c, member);
-+	if (!ca)
-+		goto err;
-+
-+	bch2_dev_attach(c, ca, dev_idx);
-+out:
-+	pr_verbose_init(c->opts, "ret %i", ret);
-+	return ret;
-+err:
-+	if (ca)
-+		bch2_dev_free(ca);
-+	ret = -ENOMEM;
-+	goto out;
-+}
-+
-+static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb)
-+{
-+	unsigned ret;
-+
-+	if (bch2_dev_is_online(ca)) {
-+		bch_err(ca, "already have device online in slot %u",
-+			sb->sb->dev_idx);
-+		return -EINVAL;
-+	}
-+
-+	if (get_capacity(sb->bdev->bd_disk) <
-+	    ca->mi.bucket_size * ca->mi.nbuckets) {
-+		bch_err(ca, "cannot online: device too small");
-+		return -EINVAL;
-+	}
-+
-+	BUG_ON(!percpu_ref_is_zero(&ca->io_ref));
-+
-+	if (get_capacity(sb->bdev->bd_disk) <
-+	    ca->mi.bucket_size * ca->mi.nbuckets) {
-+		bch_err(ca, "device too small");
-+		return -EINVAL;
-+	}
-+
-+	ret = bch2_dev_journal_init(ca, sb->sb);
-+	if (ret)
-+		return ret;
-+
-+	/* Commit: */
-+	ca->disk_sb = *sb;
-+	if (sb->mode & FMODE_EXCL)
-+		ca->disk_sb.bdev->bd_holder = ca;
-+	memset(sb, 0, sizeof(*sb));
-+
-+	percpu_ref_reinit(&ca->io_ref);
-+
-+	return 0;
-+}
-+
-+static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb)
-+{
-+	struct bch_dev *ca;
-+	int ret;
-+
-+	lockdep_assert_held(&c->state_lock);
-+
-+	if (le64_to_cpu(sb->sb->seq) >
-+	    le64_to_cpu(c->disk_sb.sb->seq))
-+		bch2_sb_to_fs(c, sb->sb);
-+
-+	BUG_ON(sb->sb->dev_idx >= c->sb.nr_devices ||
-+	       !c->devs[sb->sb->dev_idx]);
-+
-+	ca = bch_dev_locked(c, sb->sb->dev_idx);
-+
-+	ret = __bch2_dev_attach_bdev(ca, sb);
-+	if (ret)
-+		return ret;
-+
-+	if (test_bit(BCH_FS_ALLOC_READ_DONE, &c->flags) &&
-+	    !percpu_u64_get(&ca->usage[0]->buckets[BCH_DATA_sb])) {
-+		mutex_lock(&c->sb_lock);
-+		bch2_mark_dev_superblock(ca->fs, ca, 0);
-+		mutex_unlock(&c->sb_lock);
-+	}
-+
-+	bch2_dev_sysfs_online(c, ca);
-+
-+	if (c->sb.nr_devices == 1)
-+		bdevname(ca->disk_sb.bdev, c->name);
-+	bdevname(ca->disk_sb.bdev, ca->name);
-+
-+	rebalance_wakeup(c);
-+	return 0;
-+}
-+
-+/* Device management: */
-+
-+/*
-+ * Note: this function is also used by the error paths - when a particular
-+ * device sees an error, we call it to determine whether we can just set the
-+ * device RO, or - if this function returns false - we'll set the whole
-+ * filesystem RO:
-+ *
-+ * XXX: maybe we should be more explicit about whether we're changing state
-+ * because we got an error or what have you?
-+ */
-+bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
-+			    enum bch_member_state new_state, int flags)
-+{
-+	struct bch_devs_mask new_online_devs;
-+	struct replicas_status s;
-+	struct bch_dev *ca2;
-+	int i, nr_rw = 0, required;
-+
-+	lockdep_assert_held(&c->state_lock);
-+
-+	switch (new_state) {
-+	case BCH_MEMBER_STATE_RW:
-+		return true;
-+	case BCH_MEMBER_STATE_RO:
-+		if (ca->mi.state != BCH_MEMBER_STATE_RW)
-+			return true;
-+
-+		/* do we have enough devices to write to?  */
-+		for_each_member_device(ca2, c, i)
-+			if (ca2 != ca)
-+				nr_rw += ca2->mi.state == BCH_MEMBER_STATE_RW;
-+
-+		required = max(!(flags & BCH_FORCE_IF_METADATA_DEGRADED)
-+			       ? c->opts.metadata_replicas
-+			       : c->opts.metadata_replicas_required,
-+			       !(flags & BCH_FORCE_IF_DATA_DEGRADED)
-+			       ? c->opts.data_replicas
-+			       : c->opts.data_replicas_required);
-+
-+		return nr_rw >= required;
-+	case BCH_MEMBER_STATE_FAILED:
-+	case BCH_MEMBER_STATE_SPARE:
-+		if (ca->mi.state != BCH_MEMBER_STATE_RW &&
-+		    ca->mi.state != BCH_MEMBER_STATE_RO)
-+			return true;
-+
-+		/* do we have enough devices to read from?  */
-+		new_online_devs = bch2_online_devs(c);
-+		__clear_bit(ca->dev_idx, new_online_devs.d);
-+
-+		s = __bch2_replicas_status(c, new_online_devs);
-+
-+		return bch2_have_enough_devs(s, flags);
-+	default:
-+		BUG();
-+	}
-+}
-+
-+static bool bch2_fs_may_start(struct bch_fs *c)
-+{
-+	struct replicas_status s;
-+	struct bch_sb_field_members *mi;
-+	struct bch_dev *ca;
-+	unsigned i, flags = c->opts.degraded
-+		? BCH_FORCE_IF_DEGRADED
-+		: 0;
-+
-+	if (!c->opts.degraded) {
-+		mutex_lock(&c->sb_lock);
-+		mi = bch2_sb_get_members(c->disk_sb.sb);
-+
-+		for (i = 0; i < c->disk_sb.sb->nr_devices; i++) {
-+			if (!bch2_dev_exists(c->disk_sb.sb, mi, i))
-+				continue;
-+
-+			ca = bch_dev_locked(c, i);
-+
-+			if (!bch2_dev_is_online(ca) &&
-+			    (ca->mi.state == BCH_MEMBER_STATE_RW ||
-+			     ca->mi.state == BCH_MEMBER_STATE_RO)) {
-+				mutex_unlock(&c->sb_lock);
-+				return false;
-+			}
-+		}
-+		mutex_unlock(&c->sb_lock);
-+	}
-+
-+	s = bch2_replicas_status(c);
-+
-+	return bch2_have_enough_devs(s, flags);
-+}
-+
-+static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	/*
-+	 * Device going read only means the copygc reserve get smaller, so we
-+	 * don't want that happening while copygc is in progress:
-+	 */
-+	bch2_copygc_stop(c);
-+
-+	/*
-+	 * The allocator thread itself allocates btree nodes, so stop it first:
-+	 */
-+	bch2_dev_allocator_stop(ca);
-+	bch2_dev_allocator_remove(c, ca);
-+	bch2_dev_journal_stop(&c->journal, ca);
-+
-+	bch2_copygc_start(c);
-+}
-+
-+static const char *__bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	lockdep_assert_held(&c->state_lock);
-+
-+	BUG_ON(ca->mi.state != BCH_MEMBER_STATE_RW);
-+
-+	bch2_dev_allocator_add(c, ca);
-+	bch2_recalc_capacity(c);
-+
-+	if (bch2_dev_allocator_start(ca))
-+		return "error starting allocator thread";
-+
-+	return NULL;
-+}
-+
-+int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
-+			 enum bch_member_state new_state, int flags)
-+{
-+	struct bch_sb_field_members *mi;
-+	int ret = 0;
-+
-+	if (ca->mi.state == new_state)
-+		return 0;
-+
-+	if (!bch2_dev_state_allowed(c, ca, new_state, flags))
-+		return -EINVAL;
-+
-+	if (new_state != BCH_MEMBER_STATE_RW)
-+		__bch2_dev_read_only(c, ca);
-+
-+	bch_notice(ca, "%s", bch2_dev_state[new_state]);
-+
-+	mutex_lock(&c->sb_lock);
-+	mi = bch2_sb_get_members(c->disk_sb.sb);
-+	SET_BCH_MEMBER_STATE(&mi->members[ca->dev_idx], new_state);
-+	bch2_write_super(c);
-+	mutex_unlock(&c->sb_lock);
-+
-+	if (new_state == BCH_MEMBER_STATE_RW &&
-+	    __bch2_dev_read_write(c, ca))
-+		ret = -ENOMEM;
-+
-+	rebalance_wakeup(c);
-+
-+	return ret;
-+}
-+
-+int bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
-+		       enum bch_member_state new_state, int flags)
-+{
-+	int ret;
-+
-+	down_write(&c->state_lock);
-+	ret = __bch2_dev_set_state(c, ca, new_state, flags);
-+	up_write(&c->state_lock);
-+
-+	return ret;
-+}
-+
-+/* Device add/removal: */
-+
-+int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca)
-+{
-+	struct btree_trans trans;
-+	size_t i;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for (i = 0; i < ca->mi.nbuckets; i++) {
-+		ret = bch2_btree_key_cache_flush(&trans,
-+				BTREE_ID_ALLOC, POS(ca->dev_idx, i));
-+		if (ret)
-+			break;
-+	}
-+	bch2_trans_exit(&trans);
-+
-+	if (ret)
-+		return ret;
-+
-+	return bch2_btree_delete_range(c, BTREE_ID_ALLOC,
-+				       POS(ca->dev_idx, 0),
-+				       POS(ca->dev_idx + 1, 0),
-+				       NULL);
-+}
-+
-+int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
-+{
-+	struct bch_sb_field_members *mi;
-+	unsigned dev_idx = ca->dev_idx, data;
-+	int ret = -EINVAL;
-+
-+	down_write(&c->state_lock);
-+
-+	/*
-+	 * We consume a reference to ca->ref, regardless of whether we succeed
-+	 * or fail:
-+	 */
-+	percpu_ref_put(&ca->ref);
-+
-+	if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_FAILED, flags)) {
-+		bch_err(ca, "Cannot remove without losing data");
-+		goto err;
-+	}
-+
-+	__bch2_dev_read_only(c, ca);
-+
-+	ret = bch2_dev_data_drop(c, ca->dev_idx, flags);
-+	if (ret) {
-+		bch_err(ca, "Remove failed: error %i dropping data", ret);
-+		goto err;
-+	}
-+
-+	ret = bch2_journal_flush_device_pins(&c->journal, ca->dev_idx);
-+	if (ret) {
-+		bch_err(ca, "Remove failed: error %i flushing journal", ret);
-+		goto err;
-+	}
-+
-+	ret = bch2_dev_remove_alloc(c, ca);
-+	if (ret) {
-+		bch_err(ca, "Remove failed, error deleting alloc info");
-+		goto err;
-+	}
-+
-+	/*
-+	 * must flush all existing journal entries, they might have
-+	 * (overwritten) keys that point to the device we're removing:
-+	 */
-+	bch2_journal_flush_all_pins(&c->journal);
-+	/*
-+	 * hack to ensure bch2_replicas_gc2() clears out entries to this device
-+	 */
-+	bch2_journal_meta(&c->journal);
-+	ret = bch2_journal_error(&c->journal);
-+	if (ret) {
-+		bch_err(ca, "Remove failed, journal error");
-+		goto err;
-+	}
-+
-+	ret = bch2_replicas_gc2(c);
-+	if (ret) {
-+		bch_err(ca, "Remove failed: error %i from replicas gc", ret);
-+		goto err;
-+	}
-+
-+	data = bch2_dev_has_data(c, ca);
-+	if (data) {
-+		char data_has_str[100];
-+
-+		bch2_flags_to_text(&PBUF(data_has_str),
-+				   bch2_data_types, data);
-+		bch_err(ca, "Remove failed, still has data (%s)", data_has_str);
-+		ret = -EBUSY;
-+		goto err;
-+	}
-+
-+	__bch2_dev_offline(c, ca);
-+
-+	mutex_lock(&c->sb_lock);
-+	rcu_assign_pointer(c->devs[ca->dev_idx], NULL);
-+	mutex_unlock(&c->sb_lock);
-+
-+	percpu_ref_kill(&ca->ref);
-+	wait_for_completion(&ca->ref_completion);
-+
-+	bch2_dev_free(ca);
-+
-+	/*
-+	 * Free this device's slot in the bch_member array - all pointers to
-+	 * this device must be gone:
-+	 */
-+	mutex_lock(&c->sb_lock);
-+	mi = bch2_sb_get_members(c->disk_sb.sb);
-+	memset(&mi->members[dev_idx].uuid, 0, sizeof(mi->members[dev_idx].uuid));
-+
-+	bch2_write_super(c);
-+
-+	mutex_unlock(&c->sb_lock);
-+	up_write(&c->state_lock);
-+	return 0;
-+err:
-+	if (ca->mi.state == BCH_MEMBER_STATE_RW &&
-+	    !percpu_ref_is_zero(&ca->io_ref))
-+		__bch2_dev_read_write(c, ca);
-+	up_write(&c->state_lock);
-+	return ret;
-+}
-+
-+static void dev_usage_clear(struct bch_dev *ca)
-+{
-+	struct bucket_array *buckets;
-+
-+	percpu_memset(ca->usage[0], 0, sizeof(*ca->usage[0]));
-+
-+	down_read(&ca->bucket_lock);
-+	buckets = bucket_array(ca);
-+
-+	memset(buckets->b, 0, sizeof(buckets->b[0]) * buckets->nbuckets);
-+	up_read(&ca->bucket_lock);
-+}
-+
-+/* Add new device to running filesystem: */
-+int bch2_dev_add(struct bch_fs *c, const char *path)
-+{
-+	struct bch_opts opts = bch2_opts_empty();
-+	struct bch_sb_handle sb;
-+	const char *err;
-+	struct bch_dev *ca = NULL;
-+	struct bch_sb_field_members *mi;
-+	struct bch_member dev_mi;
-+	unsigned dev_idx, nr_devices, u64s;
-+	int ret;
-+
-+	ret = bch2_read_super(path, &opts, &sb);
-+	if (ret)
-+		return ret;
-+
-+	err = bch2_sb_validate(&sb);
-+	if (err)
-+		return -EINVAL;
-+
-+	dev_mi = bch2_sb_get_members(sb.sb)->members[sb.sb->dev_idx];
-+
-+	err = bch2_dev_may_add(sb.sb, c);
-+	if (err)
-+		return -EINVAL;
-+
-+	ca = __bch2_dev_alloc(c, &dev_mi);
-+	if (!ca) {
-+		bch2_free_super(&sb);
-+		return -ENOMEM;
-+	}
-+
-+	ret = __bch2_dev_attach_bdev(ca, &sb);
-+	if (ret) {
-+		bch2_dev_free(ca);
-+		return ret;
-+	}
-+
-+	/*
-+	 * We want to allocate journal on the new device before adding the new
-+	 * device to the filesystem because allocating after we attach requires
-+	 * spinning up the allocator thread, and the allocator thread requires
-+	 * doing btree writes, which if the existing devices are RO isn't going
-+	 * to work
-+	 *
-+	 * So we have to mark where the superblocks are, but marking allocated
-+	 * data normally updates the filesystem usage too, so we have to mark,
-+	 * allocate the journal, reset all the marks, then remark after we
-+	 * attach...
-+	 */
-+	bch2_mark_dev_superblock(ca->fs, ca, 0);
-+
-+	err = "journal alloc failed";
-+	ret = bch2_dev_journal_alloc(ca);
-+	if (ret)
-+		goto err;
-+
-+	dev_usage_clear(ca);
-+
-+	down_write(&c->state_lock);
-+	mutex_lock(&c->sb_lock);
-+
-+	err = "insufficient space in new superblock";
-+	ret = bch2_sb_from_fs(c, ca);
-+	if (ret)
-+		goto err_unlock;
-+
-+	mi = bch2_sb_get_members(ca->disk_sb.sb);
-+
-+	if (!bch2_sb_resize_members(&ca->disk_sb,
-+				le32_to_cpu(mi->field.u64s) +
-+				sizeof(dev_mi) / sizeof(u64))) {
-+		ret = -ENOSPC;
-+		goto err_unlock;
-+	}
-+
-+	if (dynamic_fault("bcachefs:add:no_slot"))
-+		goto no_slot;
-+
-+	mi = bch2_sb_get_members(c->disk_sb.sb);
-+	for (dev_idx = 0; dev_idx < BCH_SB_MEMBERS_MAX; dev_idx++)
-+		if (!bch2_dev_exists(c->disk_sb.sb, mi, dev_idx))
-+			goto have_slot;
-+no_slot:
-+	err = "no slots available in superblock";
-+	ret = -ENOSPC;
-+	goto err_unlock;
-+
-+have_slot:
-+	nr_devices = max_t(unsigned, dev_idx + 1, c->sb.nr_devices);
-+	u64s = (sizeof(struct bch_sb_field_members) +
-+		sizeof(struct bch_member) * nr_devices) / sizeof(u64);
-+
-+	err = "no space in superblock for member info";
-+	ret = -ENOSPC;
-+
-+	mi = bch2_sb_resize_members(&c->disk_sb, u64s);
-+	if (!mi)
-+		goto err_unlock;
-+
-+	/* success: */
-+
-+	mi->members[dev_idx] = dev_mi;
-+	mi->members[dev_idx].last_mount = cpu_to_le64(ktime_get_real_seconds());
-+	c->disk_sb.sb->nr_devices	= nr_devices;
-+
-+	ca->disk_sb.sb->dev_idx	= dev_idx;
-+	bch2_dev_attach(c, ca, dev_idx);
-+
-+	bch2_mark_dev_superblock(c, ca, 0);
-+
-+	bch2_write_super(c);
-+	mutex_unlock(&c->sb_lock);
-+
-+	err = "alloc write failed";
-+	ret = bch2_dev_alloc_write(c, ca, 0);
-+	if (ret)
-+		goto err;
-+
-+	if (ca->mi.state == BCH_MEMBER_STATE_RW) {
-+		err = __bch2_dev_read_write(c, ca);
-+		if (err)
-+			goto err_late;
-+	}
-+
-+	up_write(&c->state_lock);
-+	return 0;
-+
-+err_unlock:
-+	mutex_unlock(&c->sb_lock);
-+	up_write(&c->state_lock);
-+err:
-+	if (ca)
-+		bch2_dev_free(ca);
-+	bch2_free_super(&sb);
-+	bch_err(c, "Unable to add device: %s", err);
-+	return ret;
-+err_late:
-+	bch_err(c, "Error going rw after adding device: %s", err);
-+	return -EINVAL;
-+}
-+
-+/* Hot add existing device to running filesystem: */
-+int bch2_dev_online(struct bch_fs *c, const char *path)
-+{
-+	struct bch_opts opts = bch2_opts_empty();
-+	struct bch_sb_handle sb = { NULL };
-+	struct bch_sb_field_members *mi;
-+	struct bch_dev *ca;
-+	unsigned dev_idx;
-+	const char *err;
-+	int ret;
-+
-+	down_write(&c->state_lock);
-+
-+	ret = bch2_read_super(path, &opts, &sb);
-+	if (ret) {
-+		up_write(&c->state_lock);
-+		return ret;
-+	}
-+
-+	dev_idx = sb.sb->dev_idx;
-+
-+	err = bch2_dev_in_fs(c->disk_sb.sb, sb.sb);
-+	if (err)
-+		goto err;
-+
-+	if (bch2_dev_attach_bdev(c, &sb)) {
-+		err = "bch2_dev_attach_bdev() error";
-+		goto err;
-+	}
-+
-+	ca = bch_dev_locked(c, dev_idx);
-+	if (ca->mi.state == BCH_MEMBER_STATE_RW) {
-+		err = __bch2_dev_read_write(c, ca);
-+		if (err)
-+			goto err;
-+	}
-+
-+	mutex_lock(&c->sb_lock);
-+	mi = bch2_sb_get_members(c->disk_sb.sb);
-+
-+	mi->members[ca->dev_idx].last_mount =
-+		cpu_to_le64(ktime_get_real_seconds());
-+
-+	bch2_write_super(c);
-+	mutex_unlock(&c->sb_lock);
-+
-+	up_write(&c->state_lock);
-+	return 0;
-+err:
-+	up_write(&c->state_lock);
-+	bch2_free_super(&sb);
-+	bch_err(c, "error bringing %s online: %s", path, err);
-+	return -EINVAL;
-+}
-+
-+int bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca, int flags)
-+{
-+	down_write(&c->state_lock);
-+
-+	if (!bch2_dev_is_online(ca)) {
-+		bch_err(ca, "Already offline");
-+		up_write(&c->state_lock);
-+		return 0;
-+	}
-+
-+	if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_FAILED, flags)) {
-+		bch_err(ca, "Cannot offline required disk");
-+		up_write(&c->state_lock);
-+		return -EINVAL;
-+	}
-+
-+	__bch2_dev_offline(c, ca);
-+
-+	up_write(&c->state_lock);
-+	return 0;
-+}
-+
-+int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
-+{
-+	struct bch_member *mi;
-+	int ret = 0;
-+
-+	down_write(&c->state_lock);
-+
-+	if (nbuckets < ca->mi.nbuckets) {
-+		bch_err(ca, "Cannot shrink yet");
-+		ret = -EINVAL;
-+		goto err;
-+	}
-+
-+	if (bch2_dev_is_online(ca) &&
-+	    get_capacity(ca->disk_sb.bdev->bd_disk) <
-+	    ca->mi.bucket_size * nbuckets) {
-+		bch_err(ca, "New size larger than device");
-+		ret = -EINVAL;
-+		goto err;
-+	}
-+
-+	ret = bch2_dev_buckets_resize(c, ca, nbuckets);
-+	if (ret) {
-+		bch_err(ca, "Resize error: %i", ret);
-+		goto err;
-+	}
-+
-+	mutex_lock(&c->sb_lock);
-+	mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
-+	mi->nbuckets = cpu_to_le64(nbuckets);
-+
-+	bch2_write_super(c);
-+	mutex_unlock(&c->sb_lock);
-+
-+	bch2_recalc_capacity(c);
-+err:
-+	up_write(&c->state_lock);
-+	return ret;
-+}
-+
-+/* return with ref on ca->ref: */
-+struct bch_dev *bch2_dev_lookup(struct bch_fs *c, const char *path)
-+{
-+	struct block_device *bdev = lookup_bdev(path);
-+	struct bch_dev *ca;
-+	unsigned i;
-+
-+	if (IS_ERR(bdev))
-+		return ERR_CAST(bdev);
-+
-+	for_each_member_device(ca, c, i)
-+		if (ca->disk_sb.bdev == bdev)
-+			goto found;
-+
-+	ca = ERR_PTR(-ENOENT);
-+found:
-+	bdput(bdev);
-+	return ca;
-+}
-+
-+/* Filesystem open: */
-+
-+struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
-+			    struct bch_opts opts)
-+{
-+	struct bch_sb_handle *sb = NULL;
-+	struct bch_fs *c = NULL;
-+	struct bch_sb_field_members *mi;
-+	unsigned i, best_sb = 0;
-+	const char *err;
-+	int ret = -ENOMEM;
-+
-+	pr_verbose_init(opts, "");
-+
-+	if (!nr_devices) {
-+		c = ERR_PTR(-EINVAL);
-+		goto out2;
-+	}
-+
-+	if (!try_module_get(THIS_MODULE)) {
-+		c = ERR_PTR(-ENODEV);
-+		goto out2;
-+	}
-+
-+	sb = kcalloc(nr_devices, sizeof(*sb), GFP_KERNEL);
-+	if (!sb)
-+		goto err;
-+
-+	for (i = 0; i < nr_devices; i++) {
-+		ret = bch2_read_super(devices[i], &opts, &sb[i]);
-+		if (ret)
-+			goto err;
-+
-+		err = bch2_sb_validate(&sb[i]);
-+		if (err)
-+			goto err_print;
-+	}
-+
-+	for (i = 1; i < nr_devices; i++)
-+		if (le64_to_cpu(sb[i].sb->seq) >
-+		    le64_to_cpu(sb[best_sb].sb->seq))
-+			best_sb = i;
-+
-+	mi = bch2_sb_get_members(sb[best_sb].sb);
-+
-+	i = 0;
-+	while (i < nr_devices) {
-+		if (i != best_sb &&
-+		    !bch2_dev_exists(sb[best_sb].sb, mi, sb[i].sb->dev_idx)) {
-+			char buf[BDEVNAME_SIZE];
-+			pr_info("%s has been removed, skipping",
-+				bdevname(sb[i].bdev, buf));
-+			bch2_free_super(&sb[i]);
-+			array_remove_item(sb, nr_devices, i);
-+			continue;
-+		}
-+
-+		err = bch2_dev_in_fs(sb[best_sb].sb, sb[i].sb);
-+		if (err)
-+			goto err_print;
-+		i++;
-+	}
-+
-+	ret = -ENOMEM;
-+	c = bch2_fs_alloc(sb[best_sb].sb, opts);
-+	if (!c)
-+		goto err;
-+
-+	err = "bch2_dev_online() error";
-+	down_write(&c->state_lock);
-+	for (i = 0; i < nr_devices; i++)
-+		if (bch2_dev_attach_bdev(c, &sb[i])) {
-+			up_write(&c->state_lock);
-+			goto err_print;
-+		}
-+	up_write(&c->state_lock);
-+
-+	err = "insufficient devices";
-+	if (!bch2_fs_may_start(c))
-+		goto err_print;
-+
-+	if (!c->opts.nostart) {
-+		ret = bch2_fs_start(c);
-+		if (ret)
-+			goto err;
-+	}
-+out:
-+	kfree(sb);
-+	module_put(THIS_MODULE);
-+out2:
-+	pr_verbose_init(opts, "ret %i", PTR_ERR_OR_ZERO(c));
-+	return c;
-+err_print:
-+	pr_err("bch_fs_open err opening %s: %s",
-+	       devices[0], err);
-+	ret = -EINVAL;
-+err:
-+	if (c)
-+		bch2_fs_stop(c);
-+	for (i = 0; i < nr_devices; i++)
-+		bch2_free_super(&sb[i]);
-+	c = ERR_PTR(ret);
-+	goto out;
-+}
-+
-+static const char *__bch2_fs_open_incremental(struct bch_sb_handle *sb,
-+					      struct bch_opts opts)
-+{
-+	const char *err;
-+	struct bch_fs *c;
-+	bool allocated_fs = false;
-+	int ret;
-+
-+	err = bch2_sb_validate(sb);
-+	if (err)
-+		return err;
-+
-+	mutex_lock(&bch_fs_list_lock);
-+	c = __bch2_uuid_to_fs(sb->sb->uuid);
-+	if (c) {
-+		closure_get(&c->cl);
-+
-+		err = bch2_dev_in_fs(c->disk_sb.sb, sb->sb);
-+		if (err)
-+			goto err;
-+	} else {
-+		c = bch2_fs_alloc(sb->sb, opts);
-+		err = "cannot allocate memory";
-+		if (!c)
-+			goto err;
-+
-+		allocated_fs = true;
-+	}
-+
-+	err = "bch2_dev_online() error";
-+
-+	mutex_lock(&c->sb_lock);
-+	if (bch2_dev_attach_bdev(c, sb)) {
-+		mutex_unlock(&c->sb_lock);
-+		goto err;
-+	}
-+	mutex_unlock(&c->sb_lock);
-+
-+	if (!c->opts.nostart && bch2_fs_may_start(c)) {
-+		err = "error starting filesystem";
-+		ret = bch2_fs_start(c);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	closure_put(&c->cl);
-+	mutex_unlock(&bch_fs_list_lock);
-+
-+	return NULL;
-+err:
-+	mutex_unlock(&bch_fs_list_lock);
-+
-+	if (allocated_fs)
-+		bch2_fs_stop(c);
-+	else if (c)
-+		closure_put(&c->cl);
-+
-+	return err;
-+}
-+
-+const char *bch2_fs_open_incremental(const char *path)
-+{
-+	struct bch_sb_handle sb;
-+	struct bch_opts opts = bch2_opts_empty();
-+	const char *err;
-+
-+	if (bch2_read_super(path, &opts, &sb))
-+		return "error reading superblock";
-+
-+	err = __bch2_fs_open_incremental(&sb, opts);
-+	bch2_free_super(&sb);
-+
-+	return err;
-+}
-+
-+/* Global interfaces/init */
-+
-+static void bcachefs_exit(void)
-+{
-+	bch2_debug_exit();
-+	bch2_vfs_exit();
-+	bch2_chardev_exit();
-+	if (bcachefs_kset)
-+		kset_unregister(bcachefs_kset);
-+}
-+
-+static int __init bcachefs_init(void)
-+{
-+	bch2_bkey_pack_test();
-+	bch2_inode_pack_test();
-+
-+	if (!(bcachefs_kset = kset_create_and_add("bcachefs", NULL, fs_kobj)) ||
-+	    bch2_chardev_init() ||
-+	    bch2_vfs_init() ||
-+	    bch2_debug_init())
-+		goto err;
-+
-+	return 0;
-+err:
-+	bcachefs_exit();
-+	return -ENOMEM;
-+}
-+
-+#define BCH_DEBUG_PARAM(name, description)			\
-+	bool bch2_##name;					\
-+	module_param_named(name, bch2_##name, bool, 0644);	\
-+	MODULE_PARM_DESC(name, description);
-+BCH_DEBUG_PARAMS()
-+#undef BCH_DEBUG_PARAM
-+
-+module_exit(bcachefs_exit);
-+module_init(bcachefs_init);
-diff --git a/fs/bcachefs/super.h b/fs/bcachefs/super.h
-new file mode 100644
-index 000000000000..02c81f3555c3
---- /dev/null
-+++ b/fs/bcachefs/super.h
-@@ -0,0 +1,241 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_SUPER_H
-+#define _BCACHEFS_SUPER_H
-+
-+#include "extents.h"
-+
-+#include "bcachefs_ioctl.h"
-+
-+#include <linux/math64.h>
-+
-+static inline size_t sector_to_bucket(const struct bch_dev *ca, sector_t s)
-+{
-+	return div_u64(s, ca->mi.bucket_size);
-+}
-+
-+static inline sector_t bucket_to_sector(const struct bch_dev *ca, size_t b)
-+{
-+	return ((sector_t) b) * ca->mi.bucket_size;
-+}
-+
-+static inline sector_t bucket_remainder(const struct bch_dev *ca, sector_t s)
-+{
-+	u32 remainder;
-+
-+	div_u64_rem(s, ca->mi.bucket_size, &remainder);
-+	return remainder;
-+}
-+
-+static inline bool bch2_dev_is_online(struct bch_dev *ca)
-+{
-+	return !percpu_ref_is_zero(&ca->io_ref);
-+}
-+
-+static inline bool bch2_dev_is_readable(struct bch_dev *ca)
-+{
-+	return bch2_dev_is_online(ca) &&
-+		ca->mi.state != BCH_MEMBER_STATE_FAILED;
-+}
-+
-+static inline bool bch2_dev_get_ioref(struct bch_dev *ca, int rw)
-+{
-+	if (!percpu_ref_tryget(&ca->io_ref))
-+		return false;
-+
-+	if (ca->mi.state == BCH_MEMBER_STATE_RW ||
-+	    (ca->mi.state == BCH_MEMBER_STATE_RO && rw == READ))
-+		return true;
-+
-+	percpu_ref_put(&ca->io_ref);
-+	return false;
-+}
-+
-+static inline unsigned dev_mask_nr(const struct bch_devs_mask *devs)
-+{
-+	return bitmap_weight(devs->d, BCH_SB_MEMBERS_MAX);
-+}
-+
-+static inline bool bch2_dev_list_has_dev(struct bch_devs_list devs,
-+					 unsigned dev)
-+{
-+	unsigned i;
-+
-+	for (i = 0; i < devs.nr; i++)
-+		if (devs.devs[i] == dev)
-+			return true;
-+
-+	return false;
-+}
-+
-+static inline void bch2_dev_list_drop_dev(struct bch_devs_list *devs,
-+					  unsigned dev)
-+{
-+	unsigned i;
-+
-+	for (i = 0; i < devs->nr; i++)
-+		if (devs->devs[i] == dev) {
-+			array_remove_item(devs->devs, devs->nr, i);
-+			return;
-+		}
-+}
-+
-+static inline void bch2_dev_list_add_dev(struct bch_devs_list *devs,
-+					 unsigned dev)
-+{
-+	BUG_ON(bch2_dev_list_has_dev(*devs, dev));
-+	BUG_ON(devs->nr >= BCH_REPLICAS_MAX);
-+	devs->devs[devs->nr++] = dev;
-+}
-+
-+static inline struct bch_devs_list bch2_dev_list_single(unsigned dev)
-+{
-+	return (struct bch_devs_list) { .nr = 1, .devs[0] = dev };
-+}
-+
-+static inline struct bch_dev *__bch2_next_dev(struct bch_fs *c, unsigned *iter,
-+					      const struct bch_devs_mask *mask)
-+{
-+	struct bch_dev *ca = NULL;
-+
-+	while ((*iter = mask
-+		? find_next_bit(mask->d, c->sb.nr_devices, *iter)
-+		: *iter) < c->sb.nr_devices &&
-+	       !(ca = rcu_dereference_check(c->devs[*iter],
-+					    lockdep_is_held(&c->state_lock))))
-+		(*iter)++;
-+
-+	return ca;
-+}
-+
-+#define __for_each_member_device(ca, c, iter, mask)			\
-+	for ((iter) = 0; ((ca) = __bch2_next_dev((c), &(iter), mask)); (iter)++)
-+
-+#define for_each_member_device_rcu(ca, c, iter, mask)			\
-+	__for_each_member_device(ca, c, iter, mask)
-+
-+static inline struct bch_dev *bch2_get_next_dev(struct bch_fs *c, unsigned *iter)
-+{
-+	struct bch_dev *ca;
-+
-+	rcu_read_lock();
-+	if ((ca = __bch2_next_dev(c, iter, NULL)))
-+		percpu_ref_get(&ca->ref);
-+	rcu_read_unlock();
-+
-+	return ca;
-+}
-+
-+/*
-+ * If you break early, you must drop your ref on the current device
-+ */
-+#define for_each_member_device(ca, c, iter)				\
-+	for ((iter) = 0;						\
-+	     (ca = bch2_get_next_dev(c, &(iter)));			\
-+	     percpu_ref_put(&ca->ref), (iter)++)
-+
-+static inline struct bch_dev *bch2_get_next_online_dev(struct bch_fs *c,
-+						      unsigned *iter,
-+						      int state_mask)
-+{
-+	struct bch_dev *ca;
-+
-+	rcu_read_lock();
-+	while ((ca = __bch2_next_dev(c, iter, NULL)) &&
-+	       (!((1 << ca->mi.state) & state_mask) ||
-+		!percpu_ref_tryget(&ca->io_ref)))
-+		(*iter)++;
-+	rcu_read_unlock();
-+
-+	return ca;
-+}
-+
-+#define __for_each_online_member(ca, c, iter, state_mask)		\
-+	for ((iter) = 0;						\
-+	     (ca = bch2_get_next_online_dev(c, &(iter), state_mask));	\
-+	     percpu_ref_put(&ca->io_ref), (iter)++)
-+
-+#define for_each_online_member(ca, c, iter)				\
-+	__for_each_online_member(ca, c, iter, ~0)
-+
-+#define for_each_rw_member(ca, c, iter)					\
-+	__for_each_online_member(ca, c, iter, 1 << BCH_MEMBER_STATE_RW)
-+
-+#define for_each_readable_member(ca, c, iter)				\
-+	__for_each_online_member(ca, c, iter,				\
-+		(1 << BCH_MEMBER_STATE_RW)|(1 << BCH_MEMBER_STATE_RO))
-+
-+/*
-+ * If a key exists that references a device, the device won't be going away and
-+ * we can omit rcu_read_lock():
-+ */
-+static inline struct bch_dev *bch_dev_bkey_exists(const struct bch_fs *c, unsigned idx)
-+{
-+	EBUG_ON(idx >= c->sb.nr_devices || !c->devs[idx]);
-+
-+	return rcu_dereference_check(c->devs[idx], 1);
-+}
-+
-+static inline struct bch_dev *bch_dev_locked(struct bch_fs *c, unsigned idx)
-+{
-+	EBUG_ON(idx >= c->sb.nr_devices || !c->devs[idx]);
-+
-+	return rcu_dereference_protected(c->devs[idx],
-+					 lockdep_is_held(&c->sb_lock) ||
-+					 lockdep_is_held(&c->state_lock));
-+}
-+
-+/* XXX kill, move to struct bch_fs */
-+static inline struct bch_devs_mask bch2_online_devs(struct bch_fs *c)
-+{
-+	struct bch_devs_mask devs;
-+	struct bch_dev *ca;
-+	unsigned i;
-+
-+	memset(&devs, 0, sizeof(devs));
-+	for_each_online_member(ca, c, i)
-+		__set_bit(ca->dev_idx, devs.d);
-+	return devs;
-+}
-+
-+struct bch_fs *bch2_bdev_to_fs(struct block_device *);
-+struct bch_fs *bch2_uuid_to_fs(uuid_le);
-+
-+bool bch2_dev_state_allowed(struct bch_fs *, struct bch_dev *,
-+			   enum bch_member_state, int);
-+int __bch2_dev_set_state(struct bch_fs *, struct bch_dev *,
-+			enum bch_member_state, int);
-+int bch2_dev_set_state(struct bch_fs *, struct bch_dev *,
-+		      enum bch_member_state, int);
-+
-+int bch2_dev_fail(struct bch_dev *, int);
-+int bch2_dev_remove(struct bch_fs *, struct bch_dev *, int);
-+int bch2_dev_add(struct bch_fs *, const char *);
-+int bch2_dev_online(struct bch_fs *, const char *);
-+int bch2_dev_offline(struct bch_fs *, struct bch_dev *, int);
-+int bch2_dev_resize(struct bch_fs *, struct bch_dev *, u64);
-+struct bch_dev *bch2_dev_lookup(struct bch_fs *, const char *);
-+
-+bool bch2_fs_emergency_read_only(struct bch_fs *);
-+void bch2_fs_read_only(struct bch_fs *);
-+
-+int bch2_fs_read_write(struct bch_fs *);
-+int bch2_fs_read_write_early(struct bch_fs *);
-+
-+/*
-+ * Only for use in the recovery/fsck path:
-+ */
-+static inline void bch2_fs_lazy_rw(struct bch_fs *c)
-+{
-+	if (percpu_ref_is_zero(&c->writes))
-+		bch2_fs_read_write_early(c);
-+}
-+
-+void __bch2_fs_stop(struct bch_fs *);
-+void bch2_fs_free(struct bch_fs *);
-+void bch2_fs_stop(struct bch_fs *);
-+
-+int bch2_fs_start(struct bch_fs *);
-+struct bch_fs *bch2_fs_open(char * const *, unsigned, struct bch_opts);
-+const char *bch2_fs_open_incremental(const char *path);
-+
-+#endif /* _BCACHEFS_SUPER_H */
-diff --git a/fs/bcachefs/super_types.h b/fs/bcachefs/super_types.h
-new file mode 100644
-index 000000000000..20406ebd6f5b
---- /dev/null
-+++ b/fs/bcachefs/super_types.h
-@@ -0,0 +1,51 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_SUPER_TYPES_H
-+#define _BCACHEFS_SUPER_TYPES_H
-+
-+struct bch_sb_handle {
-+	struct bch_sb		*sb;
-+	struct block_device	*bdev;
-+	struct bio		*bio;
-+	unsigned		page_order;
-+	fmode_t			mode;
-+	unsigned		have_layout:1;
-+	unsigned		have_bio:1;
-+	unsigned		fs_sb:1;
-+	u64			seq;
-+};
-+
-+struct bch_devs_mask {
-+	unsigned long d[BITS_TO_LONGS(BCH_SB_MEMBERS_MAX)];
-+};
-+
-+struct bch_devs_list {
-+	u8			nr;
-+	u8			devs[BCH_REPLICAS_MAX + 1];
-+};
-+
-+struct bch_member_cpu {
-+	u64			nbuckets;	/* device size */
-+	u16			first_bucket;   /* index of first bucket used */
-+	u16			bucket_size;	/* sectors */
-+	u16			group;
-+	u8			state;
-+	u8			replacement;
-+	u8			discard;
-+	u8			data_allowed;
-+	u8			durability;
-+	u8			valid;
-+};
-+
-+struct bch_disk_group_cpu {
-+	bool				deleted;
-+	u16				parent;
-+	struct bch_devs_mask		devs;
-+};
-+
-+struct bch_disk_groups_cpu {
-+	struct rcu_head			rcu;
-+	unsigned			nr;
-+	struct bch_disk_group_cpu	entries[];
-+};
-+
-+#endif /* _BCACHEFS_SUPER_TYPES_H */
-diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c
-new file mode 100644
-index 000000000000..0cb29f43d99d
---- /dev/null
-+++ b/fs/bcachefs/sysfs.c
-@@ -0,0 +1,1074 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * bcache sysfs interfaces
-+ *
-+ * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
-+ * Copyright 2012 Google, Inc.
-+ */
-+
-+#ifndef NO_BCACHEFS_SYSFS
-+
-+#include "bcachefs.h"
-+#include "alloc_background.h"
-+#include "sysfs.h"
-+#include "btree_cache.h"
-+#include "btree_io.h"
-+#include "btree_iter.h"
-+#include "btree_key_cache.h"
-+#include "btree_update.h"
-+#include "btree_update_interior.h"
-+#include "btree_gc.h"
-+#include "buckets.h"
-+#include "clock.h"
-+#include "disk_groups.h"
-+#include "ec.h"
-+#include "inode.h"
-+#include "journal.h"
-+#include "keylist.h"
-+#include "move.h"
-+#include "opts.h"
-+#include "rebalance.h"
-+#include "replicas.h"
-+#include "super-io.h"
-+#include "tests.h"
-+
-+#include <linux/blkdev.h>
-+#include <linux/sort.h>
-+#include <linux/sched/clock.h>
-+
-+#include "util.h"
-+
-+#define SYSFS_OPS(type)							\
-+struct sysfs_ops type ## _sysfs_ops = {					\
-+	.show	= type ## _show,					\
-+	.store	= type ## _store					\
-+}
-+
-+#define SHOW(fn)							\
-+static ssize_t fn ## _show(struct kobject *kobj, struct attribute *attr,\
-+			   char *buf)					\
-+
-+#define STORE(fn)							\
-+static ssize_t fn ## _store(struct kobject *kobj, struct attribute *attr,\
-+			    const char *buf, size_t size)		\
-+
-+#define __sysfs_attribute(_name, _mode)					\
-+	static struct attribute sysfs_##_name =				\
-+		{ .name = #_name, .mode = _mode }
-+
-+#define write_attribute(n)	__sysfs_attribute(n, S_IWUSR)
-+#define read_attribute(n)	__sysfs_attribute(n, S_IRUGO)
-+#define rw_attribute(n)		__sysfs_attribute(n, S_IRUGO|S_IWUSR)
-+
-+#define sysfs_printf(file, fmt, ...)					\
-+do {									\
-+	if (attr == &sysfs_ ## file)					\
-+		return scnprintf(buf, PAGE_SIZE, fmt "\n", __VA_ARGS__);\
-+} while (0)
-+
-+#define sysfs_print(file, var)						\
-+do {									\
-+	if (attr == &sysfs_ ## file)					\
-+		return snprint(buf, PAGE_SIZE, var);			\
-+} while (0)
-+
-+#define sysfs_hprint(file, val)						\
-+do {									\
-+	if (attr == &sysfs_ ## file) {					\
-+		bch2_hprint(&out, val);					\
-+		pr_buf(&out, "\n");					\
-+		return out.pos - buf;					\
-+	}								\
-+} while (0)
-+
-+#define var_printf(_var, fmt)	sysfs_printf(_var, fmt, var(_var))
-+#define var_print(_var)		sysfs_print(_var, var(_var))
-+#define var_hprint(_var)	sysfs_hprint(_var, var(_var))
-+
-+#define sysfs_strtoul(file, var)					\
-+do {									\
-+	if (attr == &sysfs_ ## file)					\
-+		return strtoul_safe(buf, var) ?: (ssize_t) size;	\
-+} while (0)
-+
-+#define sysfs_strtoul_clamp(file, var, min, max)			\
-+do {									\
-+	if (attr == &sysfs_ ## file)					\
-+		return strtoul_safe_clamp(buf, var, min, max)		\
-+			?: (ssize_t) size;				\
-+} while (0)
-+
-+#define strtoul_or_return(cp)						\
-+({									\
-+	unsigned long _v;						\
-+	int _r = kstrtoul(cp, 10, &_v);					\
-+	if (_r)								\
-+		return _r;						\
-+	_v;								\
-+})
-+
-+#define strtoul_restrict_or_return(cp, min, max)			\
-+({									\
-+	unsigned long __v = 0;						\
-+	int _r = strtoul_safe_restrict(cp, __v, min, max);		\
-+	if (_r)								\
-+		return _r;						\
-+	__v;								\
-+})
-+
-+#define strtoi_h_or_return(cp)						\
-+({									\
-+	u64 _v;								\
-+	int _r = strtoi_h(cp, &_v);					\
-+	if (_r)								\
-+		return _r;						\
-+	_v;								\
-+})
-+
-+#define sysfs_hatoi(file, var)						\
-+do {									\
-+	if (attr == &sysfs_ ## file)					\
-+		return strtoi_h(buf, &var) ?: (ssize_t) size;		\
-+} while (0)
-+
-+write_attribute(trigger_journal_flush);
-+write_attribute(trigger_btree_coalesce);
-+write_attribute(trigger_gc);
-+write_attribute(prune_cache);
-+rw_attribute(btree_gc_periodic);
-+
-+read_attribute(uuid);
-+read_attribute(minor);
-+read_attribute(bucket_size);
-+read_attribute(block_size);
-+read_attribute(btree_node_size);
-+read_attribute(first_bucket);
-+read_attribute(nbuckets);
-+read_attribute(durability);
-+read_attribute(iodone);
-+
-+read_attribute(io_latency_read);
-+read_attribute(io_latency_write);
-+read_attribute(io_latency_stats_read);
-+read_attribute(io_latency_stats_write);
-+read_attribute(congested);
-+
-+read_attribute(bucket_quantiles_last_read);
-+read_attribute(bucket_quantiles_last_write);
-+read_attribute(bucket_quantiles_fragmentation);
-+read_attribute(bucket_quantiles_oldest_gen);
-+
-+read_attribute(reserve_stats);
-+read_attribute(btree_cache_size);
-+read_attribute(compression_stats);
-+read_attribute(journal_debug);
-+read_attribute(journal_pins);
-+read_attribute(btree_updates);
-+read_attribute(dirty_btree_nodes);
-+read_attribute(btree_key_cache);
-+read_attribute(btree_transactions);
-+read_attribute(stripes_heap);
-+
-+read_attribute(internal_uuid);
-+
-+read_attribute(has_data);
-+read_attribute(alloc_debug);
-+write_attribute(wake_allocator);
-+
-+read_attribute(read_realloc_races);
-+read_attribute(extent_migrate_done);
-+read_attribute(extent_migrate_raced);
-+
-+rw_attribute(journal_write_delay_ms);
-+rw_attribute(journal_reclaim_delay_ms);
-+
-+rw_attribute(discard);
-+rw_attribute(cache_replacement_policy);
-+rw_attribute(label);
-+
-+rw_attribute(copy_gc_enabled);
-+sysfs_pd_controller_attribute(copy_gc);
-+
-+rw_attribute(rebalance_enabled);
-+sysfs_pd_controller_attribute(rebalance);
-+read_attribute(rebalance_work);
-+rw_attribute(promote_whole_extents);
-+
-+read_attribute(new_stripes);
-+
-+rw_attribute(pd_controllers_update_seconds);
-+
-+read_attribute(meta_replicas_have);
-+read_attribute(data_replicas_have);
-+
-+read_attribute(io_timers_read);
-+read_attribute(io_timers_write);
-+
-+#ifdef CONFIG_BCACHEFS_TESTS
-+write_attribute(perf_test);
-+#endif /* CONFIG_BCACHEFS_TESTS */
-+
-+#define BCH_DEBUG_PARAM(name, description)				\
-+	rw_attribute(name);
-+
-+	BCH_DEBUG_PARAMS()
-+#undef BCH_DEBUG_PARAM
-+
-+#define x(_name)						\
-+	static struct attribute sysfs_time_stat_##_name =		\
-+		{ .name = #_name, .mode = S_IRUGO };
-+	BCH_TIME_STATS()
-+#undef x
-+
-+static struct attribute sysfs_state_rw = {
-+	.name = "state",
-+	.mode = S_IRUGO
-+};
-+
-+static size_t bch2_btree_cache_size(struct bch_fs *c)
-+{
-+	size_t ret = 0;
-+	struct btree *b;
-+
-+	mutex_lock(&c->btree_cache.lock);
-+	list_for_each_entry(b, &c->btree_cache.live, list)
-+		ret += btree_bytes(c);
-+
-+	mutex_unlock(&c->btree_cache.lock);
-+	return ret;
-+}
-+
-+static int fs_alloc_debug_to_text(struct printbuf *out, struct bch_fs *c)
-+{
-+	struct bch_fs_usage *fs_usage = bch2_fs_usage_read(c);
-+
-+	if (!fs_usage)
-+		return -ENOMEM;
-+
-+	bch2_fs_usage_to_text(out, c, fs_usage);
-+
-+	percpu_up_read(&c->mark_lock);
-+
-+	kfree(fs_usage);
-+	return 0;
-+}
-+
-+static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	u64 nr_uncompressed_extents = 0, uncompressed_sectors = 0,
-+	    nr_compressed_extents = 0,
-+	    compressed_sectors_compressed = 0,
-+	    compressed_sectors_uncompressed = 0;
-+	int ret;
-+
-+	if (!test_bit(BCH_FS_STARTED, &c->flags))
-+		return -EPERM;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS_MIN, 0, k, ret)
-+		if (k.k->type == KEY_TYPE_extent) {
-+			struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
-+			const union bch_extent_entry *entry;
-+			struct extent_ptr_decoded p;
-+
-+			extent_for_each_ptr_decode(e, p, entry) {
-+				if (!crc_is_compressed(p.crc)) {
-+					nr_uncompressed_extents++;
-+					uncompressed_sectors += e.k->size;
-+				} else {
-+					nr_compressed_extents++;
-+					compressed_sectors_compressed +=
-+						p.crc.compressed_size;
-+					compressed_sectors_uncompressed +=
-+						p.crc.uncompressed_size;
-+				}
-+
-+				/* only looking at the first ptr */
-+				break;
-+			}
-+		}
-+
-+	ret = bch2_trans_exit(&trans) ?: ret;
-+	if (ret)
-+		return ret;
-+
-+	pr_buf(out,
-+	       "uncompressed data:\n"
-+	       "	nr extents:			%llu\n"
-+	       "	size (bytes):			%llu\n"
-+	       "compressed data:\n"
-+	       "	nr extents:			%llu\n"
-+	       "	compressed size (bytes):	%llu\n"
-+	       "	uncompressed size (bytes):	%llu\n",
-+	       nr_uncompressed_extents,
-+	       uncompressed_sectors << 9,
-+	       nr_compressed_extents,
-+	       compressed_sectors_compressed << 9,
-+	       compressed_sectors_uncompressed << 9);
-+	return 0;
-+}
-+
-+SHOW(bch2_fs)
-+{
-+	struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
-+	struct printbuf out = _PBUF(buf, PAGE_SIZE);
-+
-+	sysfs_print(minor,			c->minor);
-+	sysfs_printf(internal_uuid, "%pU",	c->sb.uuid.b);
-+
-+	sysfs_print(journal_write_delay_ms,	c->journal.write_delay_ms);
-+	sysfs_print(journal_reclaim_delay_ms,	c->journal.reclaim_delay_ms);
-+
-+	sysfs_print(block_size,			block_bytes(c));
-+	sysfs_print(btree_node_size,		btree_bytes(c));
-+	sysfs_hprint(btree_cache_size,		bch2_btree_cache_size(c));
-+
-+	sysfs_print(read_realloc_races,
-+		    atomic_long_read(&c->read_realloc_races));
-+	sysfs_print(extent_migrate_done,
-+		    atomic_long_read(&c->extent_migrate_done));
-+	sysfs_print(extent_migrate_raced,
-+		    atomic_long_read(&c->extent_migrate_raced));
-+
-+	sysfs_printf(btree_gc_periodic, "%u",	(int) c->btree_gc_periodic);
-+
-+	sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled);
-+
-+	sysfs_print(pd_controllers_update_seconds,
-+		    c->pd_controllers_update_seconds);
-+
-+	sysfs_printf(rebalance_enabled,		"%i", c->rebalance.enabled);
-+	sysfs_pd_controller_show(rebalance,	&c->rebalance.pd); /* XXX */
-+	sysfs_pd_controller_show(copy_gc,	&c->copygc_pd);
-+
-+	if (attr == &sysfs_rebalance_work) {
-+		bch2_rebalance_work_to_text(&out, c);
-+		return out.pos - buf;
-+	}
-+
-+	sysfs_print(promote_whole_extents,	c->promote_whole_extents);
-+
-+	sysfs_printf(meta_replicas_have, "%i",	bch2_replicas_online(c, true));
-+	sysfs_printf(data_replicas_have, "%i",	bch2_replicas_online(c, false));
-+
-+	/* Debugging: */
-+
-+	if (attr == &sysfs_alloc_debug)
-+		return fs_alloc_debug_to_text(&out, c) ?: out.pos - buf;
-+
-+	if (attr == &sysfs_journal_debug) {
-+		bch2_journal_debug_to_text(&out, &c->journal);
-+		return out.pos - buf;
-+	}
-+
-+	if (attr == &sysfs_journal_pins) {
-+		bch2_journal_pins_to_text(&out, &c->journal);
-+		return out.pos - buf;
-+	}
-+
-+	if (attr == &sysfs_btree_updates) {
-+		bch2_btree_updates_to_text(&out, c);
-+		return out.pos - buf;
-+	}
-+
-+	if (attr == &sysfs_dirty_btree_nodes) {
-+		bch2_dirty_btree_nodes_to_text(&out, c);
-+		return out.pos - buf;
-+	}
-+
-+	if (attr == &sysfs_btree_key_cache) {
-+		bch2_btree_key_cache_to_text(&out, &c->btree_key_cache);
-+		return out.pos - buf;
-+	}
-+
-+	if (attr == &sysfs_btree_transactions) {
-+		bch2_btree_trans_to_text(&out, c);
-+		return out.pos - buf;
-+	}
-+
-+	if (attr == &sysfs_stripes_heap) {
-+		bch2_stripes_heap_to_text(&out, c);
-+		return out.pos - buf;
-+	}
-+
-+	if (attr == &sysfs_compression_stats) {
-+		bch2_compression_stats_to_text(&out, c);
-+		return out.pos - buf;
-+	}
-+
-+	if (attr == &sysfs_new_stripes) {
-+		bch2_new_stripes_to_text(&out, c);
-+		return out.pos - buf;
-+	}
-+
-+	if (attr == &sysfs_io_timers_read) {
-+		bch2_io_timers_to_text(&out, &c->io_clock[READ]);
-+		return out.pos - buf;
-+	}
-+	if (attr == &sysfs_io_timers_write) {
-+		bch2_io_timers_to_text(&out, &c->io_clock[WRITE]);
-+		return out.pos - buf;
-+	}
-+
-+#define BCH_DEBUG_PARAM(name, description) sysfs_print(name, c->name);
-+	BCH_DEBUG_PARAMS()
-+#undef BCH_DEBUG_PARAM
-+
-+	return 0;
-+}
-+
-+STORE(bch2_fs)
-+{
-+	struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
-+
-+	sysfs_strtoul(journal_write_delay_ms, c->journal.write_delay_ms);
-+	sysfs_strtoul(journal_reclaim_delay_ms, c->journal.reclaim_delay_ms);
-+
-+	if (attr == &sysfs_btree_gc_periodic) {
-+		ssize_t ret = strtoul_safe(buf, c->btree_gc_periodic)
-+			?: (ssize_t) size;
-+
-+		wake_up_process(c->gc_thread);
-+		return ret;
-+	}
-+
-+	if (attr == &sysfs_copy_gc_enabled) {
-+		ssize_t ret = strtoul_safe(buf, c->copy_gc_enabled)
-+			?: (ssize_t) size;
-+
-+		if (c->copygc_thread)
-+			wake_up_process(c->copygc_thread);
-+		return ret;
-+	}
-+
-+	if (attr == &sysfs_rebalance_enabled) {
-+		ssize_t ret = strtoul_safe(buf, c->rebalance.enabled)
-+			?: (ssize_t) size;
-+
-+		rebalance_wakeup(c);
-+		return ret;
-+	}
-+
-+	sysfs_strtoul(pd_controllers_update_seconds,
-+		      c->pd_controllers_update_seconds);
-+	sysfs_pd_controller_store(rebalance,	&c->rebalance.pd);
-+	sysfs_pd_controller_store(copy_gc,	&c->copygc_pd);
-+
-+	sysfs_strtoul(promote_whole_extents,	c->promote_whole_extents);
-+
-+	/* Debugging: */
-+
-+#define BCH_DEBUG_PARAM(name, description) sysfs_strtoul(name, c->name);
-+	BCH_DEBUG_PARAMS()
-+#undef BCH_DEBUG_PARAM
-+
-+	if (!test_bit(BCH_FS_STARTED, &c->flags))
-+		return -EPERM;
-+
-+	/* Debugging: */
-+
-+	if (attr == &sysfs_trigger_journal_flush)
-+		bch2_journal_meta_async(&c->journal, NULL);
-+
-+	if (attr == &sysfs_trigger_btree_coalesce)
-+		bch2_coalesce(c);
-+
-+	if (attr == &sysfs_trigger_gc) {
-+		/*
-+		 * Full gc is currently incompatible with btree key cache:
-+		 */
-+#if 0
-+		down_read(&c->state_lock);
-+		bch2_gc(c, NULL, false, false);
-+		up_read(&c->state_lock);
-+#else
-+		bch2_gc_gens(c);
-+#endif
-+	}
-+
-+	if (attr == &sysfs_prune_cache) {
-+		struct shrink_control sc;
-+
-+		sc.gfp_mask = GFP_KERNEL;
-+		sc.nr_to_scan = strtoul_or_return(buf);
-+		c->btree_cache.shrink.scan_objects(&c->btree_cache.shrink, &sc);
-+	}
-+
-+#ifdef CONFIG_BCACHEFS_TESTS
-+	if (attr == &sysfs_perf_test) {
-+		char *tmp = kstrdup(buf, GFP_KERNEL), *p = tmp;
-+		char *test		= strsep(&p, " \t\n");
-+		char *nr_str		= strsep(&p, " \t\n");
-+		char *threads_str	= strsep(&p, " \t\n");
-+		unsigned threads;
-+		u64 nr;
-+		int ret = -EINVAL;
-+
-+		if (threads_str &&
-+		    !(ret = kstrtouint(threads_str, 10, &threads)) &&
-+		    !(ret = bch2_strtoull_h(nr_str, &nr)))
-+			bch2_btree_perf_test(c, test, nr, threads);
-+		else
-+			size = ret;
-+		kfree(tmp);
-+	}
-+#endif
-+	return size;
-+}
-+SYSFS_OPS(bch2_fs);
-+
-+struct attribute *bch2_fs_files[] = {
-+	&sysfs_minor,
-+	&sysfs_block_size,
-+	&sysfs_btree_node_size,
-+	&sysfs_btree_cache_size,
-+
-+	&sysfs_meta_replicas_have,
-+	&sysfs_data_replicas_have,
-+
-+	&sysfs_journal_write_delay_ms,
-+	&sysfs_journal_reclaim_delay_ms,
-+
-+	&sysfs_promote_whole_extents,
-+
-+	&sysfs_compression_stats,
-+
-+#ifdef CONFIG_BCACHEFS_TESTS
-+	&sysfs_perf_test,
-+#endif
-+	NULL
-+};
-+
-+/* internal dir - just a wrapper */
-+
-+SHOW(bch2_fs_internal)
-+{
-+	struct bch_fs *c = container_of(kobj, struct bch_fs, internal);
-+	return bch2_fs_show(&c->kobj, attr, buf);
-+}
-+
-+STORE(bch2_fs_internal)
-+{
-+	struct bch_fs *c = container_of(kobj, struct bch_fs, internal);
-+	return bch2_fs_store(&c->kobj, attr, buf, size);
-+}
-+SYSFS_OPS(bch2_fs_internal);
-+
-+struct attribute *bch2_fs_internal_files[] = {
-+	&sysfs_alloc_debug,
-+	&sysfs_journal_debug,
-+	&sysfs_journal_pins,
-+	&sysfs_btree_updates,
-+	&sysfs_dirty_btree_nodes,
-+	&sysfs_btree_key_cache,
-+	&sysfs_btree_transactions,
-+	&sysfs_stripes_heap,
-+
-+	&sysfs_read_realloc_races,
-+	&sysfs_extent_migrate_done,
-+	&sysfs_extent_migrate_raced,
-+
-+	&sysfs_trigger_journal_flush,
-+	&sysfs_trigger_btree_coalesce,
-+	&sysfs_trigger_gc,
-+	&sysfs_prune_cache,
-+
-+	&sysfs_copy_gc_enabled,
-+
-+	&sysfs_rebalance_enabled,
-+	&sysfs_rebalance_work,
-+	sysfs_pd_controller_files(rebalance),
-+	sysfs_pd_controller_files(copy_gc),
-+
-+	&sysfs_new_stripes,
-+
-+	&sysfs_io_timers_read,
-+	&sysfs_io_timers_write,
-+
-+	&sysfs_internal_uuid,
-+
-+#define BCH_DEBUG_PARAM(name, description) &sysfs_##name,
-+	BCH_DEBUG_PARAMS()
-+#undef BCH_DEBUG_PARAM
-+
-+	NULL
-+};
-+
-+/* options */
-+
-+SHOW(bch2_fs_opts_dir)
-+{
-+	struct printbuf out = _PBUF(buf, PAGE_SIZE);
-+	struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
-+	const struct bch_option *opt = container_of(attr, struct bch_option, attr);
-+	int id = opt - bch2_opt_table;
-+	u64 v = bch2_opt_get_by_id(&c->opts, id);
-+
-+	bch2_opt_to_text(&out, c, opt, v, OPT_SHOW_FULL_LIST);
-+	pr_buf(&out, "\n");
-+
-+	return out.pos - buf;
-+}
-+
-+STORE(bch2_fs_opts_dir)
-+{
-+	struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
-+	const struct bch_option *opt = container_of(attr, struct bch_option, attr);
-+	int ret, id = opt - bch2_opt_table;
-+	char *tmp;
-+	u64 v;
-+
-+	tmp = kstrdup(buf, GFP_KERNEL);
-+	if (!tmp)
-+		return -ENOMEM;
-+
-+	ret = bch2_opt_parse(c, opt, strim(tmp), &v);
-+	kfree(tmp);
-+
-+	if (ret < 0)
-+		return ret;
-+
-+	ret = bch2_opt_check_may_set(c, id, v);
-+	if (ret < 0)
-+		return ret;
-+
-+	if (opt->set_sb != SET_NO_SB_OPT) {
-+		mutex_lock(&c->sb_lock);
-+		opt->set_sb(c->disk_sb.sb, v);
-+		bch2_write_super(c);
-+		mutex_unlock(&c->sb_lock);
-+	}
-+
-+	bch2_opt_set_by_id(&c->opts, id, v);
-+
-+	if ((id == Opt_background_target ||
-+	     id == Opt_background_compression) && v) {
-+		bch2_rebalance_add_work(c, S64_MAX);
-+		rebalance_wakeup(c);
-+	}
-+
-+	return size;
-+}
-+SYSFS_OPS(bch2_fs_opts_dir);
-+
-+struct attribute *bch2_fs_opts_dir_files[] = { NULL };
-+
-+int bch2_opts_create_sysfs_files(struct kobject *kobj)
-+{
-+	const struct bch_option *i;
-+	int ret;
-+
-+	for (i = bch2_opt_table;
-+	     i < bch2_opt_table + bch2_opts_nr;
-+	     i++) {
-+		if (!(i->mode & (OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME)))
-+			continue;
-+
-+		ret = sysfs_create_file(kobj, &i->attr);
-+		if (ret)
-+			return ret;
-+	}
-+
-+	return 0;
-+}
-+
-+/* time stats */
-+
-+SHOW(bch2_fs_time_stats)
-+{
-+	struct bch_fs *c = container_of(kobj, struct bch_fs, time_stats);
-+	struct printbuf out = _PBUF(buf, PAGE_SIZE);
-+
-+#define x(name)								\
-+	if (attr == &sysfs_time_stat_##name) {				\
-+		bch2_time_stats_to_text(&out, &c->times[BCH_TIME_##name]);\
-+		return out.pos - buf;					\
-+	}
-+	BCH_TIME_STATS()
-+#undef x
-+
-+	return 0;
-+}
-+
-+STORE(bch2_fs_time_stats)
-+{
-+	return size;
-+}
-+SYSFS_OPS(bch2_fs_time_stats);
-+
-+struct attribute *bch2_fs_time_stats_files[] = {
-+#define x(name)						\
-+	&sysfs_time_stat_##name,
-+	BCH_TIME_STATS()
-+#undef x
-+	NULL
-+};
-+
-+typedef unsigned (bucket_map_fn)(struct bch_fs *, struct bch_dev *,
-+				 size_t, void *);
-+
-+static unsigned bucket_last_io_fn(struct bch_fs *c, struct bch_dev *ca,
-+				  size_t b, void *private)
-+{
-+	int rw = (private ? 1 : 0);
-+
-+	return bucket_last_io(c, bucket(ca, b), rw);
-+}
-+
-+static unsigned bucket_sectors_used_fn(struct bch_fs *c, struct bch_dev *ca,
-+				       size_t b, void *private)
-+{
-+	struct bucket *g = bucket(ca, b);
-+	return bucket_sectors_used(g->mark);
-+}
-+
-+static unsigned bucket_oldest_gen_fn(struct bch_fs *c, struct bch_dev *ca,
-+				     size_t b, void *private)
-+{
-+	return bucket_gc_gen(ca, b);
-+}
-+
-+static int unsigned_cmp(const void *_l, const void *_r)
-+{
-+	const unsigned *l = _l;
-+	const unsigned *r = _r;
-+
-+	return cmp_int(*l, *r);
-+}
-+
-+static int quantiles_to_text(struct printbuf *out,
-+			     struct bch_fs *c, struct bch_dev *ca,
-+			     bucket_map_fn *fn, void *private)
-+{
-+	size_t i, n;
-+	/* Compute 31 quantiles */
-+	unsigned q[31], *p;
-+
-+	down_read(&ca->bucket_lock);
-+	n = ca->mi.nbuckets;
-+
-+	p = vzalloc(n * sizeof(unsigned));
-+	if (!p) {
-+		up_read(&ca->bucket_lock);
-+		return -ENOMEM;
-+	}
-+
-+	for (i = ca->mi.first_bucket; i < n; i++)
-+		p[i] = fn(c, ca, i, private);
-+
-+	sort(p, n, sizeof(unsigned), unsigned_cmp, NULL);
-+	up_read(&ca->bucket_lock);
-+
-+	while (n &&
-+	       !p[n - 1])
-+		--n;
-+
-+	for (i = 0; i < ARRAY_SIZE(q); i++)
-+		q[i] = p[n * (i + 1) / (ARRAY_SIZE(q) + 1)];
-+
-+	vfree(p);
-+
-+	for (i = 0; i < ARRAY_SIZE(q); i++)
-+		pr_buf(out, "%u ", q[i]);
-+	pr_buf(out, "\n");
-+	return 0;
-+}
-+
-+static void reserve_stats_to_text(struct printbuf *out, struct bch_dev *ca)
-+{
-+	enum alloc_reserve i;
-+
-+	spin_lock(&ca->fs->freelist_lock);
-+
-+	pr_buf(out, "free_inc:\t%zu\t%zu\n",
-+	       fifo_used(&ca->free_inc),
-+	       ca->free_inc.size);
-+
-+	for (i = 0; i < RESERVE_NR; i++)
-+		pr_buf(out, "free[%u]:\t%zu\t%zu\n", i,
-+		       fifo_used(&ca->free[i]),
-+		       ca->free[i].size);
-+
-+	spin_unlock(&ca->fs->freelist_lock);
-+}
-+
-+static void dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
-+{
-+	struct bch_fs *c = ca->fs;
-+	struct bch_dev_usage stats = bch2_dev_usage_read(ca);
-+	unsigned i, nr[BCH_DATA_NR];
-+
-+	memset(nr, 0, sizeof(nr));
-+
-+	for (i = 0; i < ARRAY_SIZE(c->open_buckets); i++)
-+		nr[c->open_buckets[i].type]++;
-+
-+	pr_buf(out,
-+		"free_inc:               %zu/%zu\n"
-+		"free[RESERVE_BTREE]:    %zu/%zu\n"
-+		"free[RESERVE_MOVINGGC]: %zu/%zu\n"
-+		"free[RESERVE_NONE]:     %zu/%zu\n"
-+		"buckets:\n"
-+		"    capacity:           %llu\n"
-+		"    alloc:              %llu\n"
-+		"    sb:                 %llu\n"
-+		"    journal:            %llu\n"
-+		"    meta:               %llu\n"
-+		"    user:               %llu\n"
-+		"    cached:             %llu\n"
-+		"    erasure coded:      %llu\n"
-+		"    available:          %lli\n"
-+		"sectors:\n"
-+		"    sb:                 %llu\n"
-+		"    journal:            %llu\n"
-+		"    meta:               %llu\n"
-+		"    user:               %llu\n"
-+		"    cached:             %llu\n"
-+		"    erasure coded:      %llu\n"
-+		"    fragmented:         %llu\n"
-+		"    copygc threshold:   %llu\n"
-+		"freelist_wait:          %s\n"
-+		"open buckets:           %u/%u (reserved %u)\n"
-+		"open_buckets_wait:      %s\n"
-+		"open_buckets_btree:     %u\n"
-+		"open_buckets_user:      %u\n"
-+		"btree reserve cache:    %u\n",
-+		fifo_used(&ca->free_inc),		ca->free_inc.size,
-+		fifo_used(&ca->free[RESERVE_BTREE]),	ca->free[RESERVE_BTREE].size,
-+		fifo_used(&ca->free[RESERVE_MOVINGGC]),	ca->free[RESERVE_MOVINGGC].size,
-+		fifo_used(&ca->free[RESERVE_NONE]),	ca->free[RESERVE_NONE].size,
-+		ca->mi.nbuckets - ca->mi.first_bucket,
-+		stats.buckets_alloc,
-+		stats.buckets[BCH_DATA_sb],
-+		stats.buckets[BCH_DATA_journal],
-+		stats.buckets[BCH_DATA_btree],
-+		stats.buckets[BCH_DATA_user],
-+		stats.buckets[BCH_DATA_cached],
-+		stats.buckets_ec,
-+		__dev_buckets_available(ca, stats),
-+		stats.sectors[BCH_DATA_sb],
-+		stats.sectors[BCH_DATA_journal],
-+		stats.sectors[BCH_DATA_btree],
-+		stats.sectors[BCH_DATA_user],
-+		stats.sectors[BCH_DATA_cached],
-+		stats.sectors_ec,
-+		stats.sectors_fragmented,
-+		c->copygc_threshold,
-+		c->freelist_wait.list.first		? "waiting" : "empty",
-+		c->open_buckets_nr_free, OPEN_BUCKETS_COUNT,
-+		BTREE_NODE_OPEN_BUCKET_RESERVE,
-+		c->open_buckets_wait.list.first		? "waiting" : "empty",
-+		nr[BCH_DATA_btree],
-+		nr[BCH_DATA_user],
-+		c->btree_reserve_cache_nr);
-+}
-+
-+static const char * const bch2_rw[] = {
-+	"read",
-+	"write",
-+	NULL
-+};
-+
-+static void dev_iodone_to_text(struct printbuf *out, struct bch_dev *ca)
-+{
-+	int rw, i;
-+
-+	for (rw = 0; rw < 2; rw++) {
-+		pr_buf(out, "%s:\n", bch2_rw[rw]);
-+
-+		for (i = 1; i < BCH_DATA_NR; i++)
-+			pr_buf(out, "%-12s:%12llu\n",
-+			       bch2_data_types[i],
-+			       percpu_u64_get(&ca->io_done->sectors[rw][i]) << 9);
-+	}
-+}
-+
-+SHOW(bch2_dev)
-+{
-+	struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj);
-+	struct bch_fs *c = ca->fs;
-+	struct printbuf out = _PBUF(buf, PAGE_SIZE);
-+
-+	sysfs_printf(uuid,		"%pU\n", ca->uuid.b);
-+
-+	sysfs_print(bucket_size,	bucket_bytes(ca));
-+	sysfs_print(block_size,		block_bytes(c));
-+	sysfs_print(first_bucket,	ca->mi.first_bucket);
-+	sysfs_print(nbuckets,		ca->mi.nbuckets);
-+	sysfs_print(durability,		ca->mi.durability);
-+	sysfs_print(discard,		ca->mi.discard);
-+
-+	if (attr == &sysfs_label) {
-+		if (ca->mi.group) {
-+			mutex_lock(&c->sb_lock);
-+			bch2_disk_path_to_text(&out, &c->disk_sb,
-+					       ca->mi.group - 1);
-+			mutex_unlock(&c->sb_lock);
-+		}
-+
-+		pr_buf(&out, "\n");
-+		return out.pos - buf;
-+	}
-+
-+	if (attr == &sysfs_has_data) {
-+		bch2_flags_to_text(&out, bch2_data_types,
-+				   bch2_dev_has_data(c, ca));
-+		pr_buf(&out, "\n");
-+		return out.pos - buf;
-+	}
-+
-+	if (attr == &sysfs_cache_replacement_policy) {
-+		bch2_string_opt_to_text(&out,
-+					bch2_cache_replacement_policies,
-+					ca->mi.replacement);
-+		pr_buf(&out, "\n");
-+		return out.pos - buf;
-+	}
-+
-+	if (attr == &sysfs_state_rw) {
-+		bch2_string_opt_to_text(&out, bch2_dev_state,
-+					ca->mi.state);
-+		pr_buf(&out, "\n");
-+		return out.pos - buf;
-+	}
-+
-+	if (attr == &sysfs_iodone) {
-+		dev_iodone_to_text(&out, ca);
-+		return out.pos - buf;
-+	}
-+
-+	sysfs_print(io_latency_read,		atomic64_read(&ca->cur_latency[READ]));
-+	sysfs_print(io_latency_write,		atomic64_read(&ca->cur_latency[WRITE]));
-+
-+	if (attr == &sysfs_io_latency_stats_read) {
-+		bch2_time_stats_to_text(&out, &ca->io_latency[READ]);
-+		return out.pos - buf;
-+	}
-+	if (attr == &sysfs_io_latency_stats_write) {
-+		bch2_time_stats_to_text(&out, &ca->io_latency[WRITE]);
-+		return out.pos - buf;
-+	}
-+
-+	sysfs_printf(congested,			"%u%%",
-+		     clamp(atomic_read(&ca->congested), 0, CONGESTED_MAX)
-+		     * 100 / CONGESTED_MAX);
-+
-+	if (attr == &sysfs_bucket_quantiles_last_read)
-+		return quantiles_to_text(&out, c, ca, bucket_last_io_fn, (void *) 0) ?: out.pos - buf;
-+	if (attr == &sysfs_bucket_quantiles_last_write)
-+		return quantiles_to_text(&out, c, ca, bucket_last_io_fn, (void *) 1) ?: out.pos - buf;
-+	if (attr == &sysfs_bucket_quantiles_fragmentation)
-+		return quantiles_to_text(&out, c, ca, bucket_sectors_used_fn, NULL)  ?: out.pos - buf;
-+	if (attr == &sysfs_bucket_quantiles_oldest_gen)
-+		return quantiles_to_text(&out, c, ca, bucket_oldest_gen_fn, NULL)    ?: out.pos - buf;
-+
-+	if (attr == &sysfs_reserve_stats) {
-+		reserve_stats_to_text(&out, ca);
-+		return out.pos - buf;
-+	}
-+	if (attr == &sysfs_alloc_debug) {
-+		dev_alloc_debug_to_text(&out, ca);
-+		return out.pos - buf;
-+	}
-+
-+	return 0;
-+}
-+
-+STORE(bch2_dev)
-+{
-+	struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj);
-+	struct bch_fs *c = ca->fs;
-+	struct bch_member *mi;
-+
-+	if (attr == &sysfs_discard) {
-+		bool v = strtoul_or_return(buf);
-+
-+		mutex_lock(&c->sb_lock);
-+		mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
-+
-+		if (v != BCH_MEMBER_DISCARD(mi)) {
-+			SET_BCH_MEMBER_DISCARD(mi, v);
-+			bch2_write_super(c);
-+		}
-+		mutex_unlock(&c->sb_lock);
-+	}
-+
-+	if (attr == &sysfs_cache_replacement_policy) {
-+		ssize_t v = __sysfs_match_string(bch2_cache_replacement_policies, -1, buf);
-+
-+		if (v < 0)
-+			return v;
-+
-+		mutex_lock(&c->sb_lock);
-+		mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
-+
-+		if ((unsigned) v != BCH_MEMBER_REPLACEMENT(mi)) {
-+			SET_BCH_MEMBER_REPLACEMENT(mi, v);
-+			bch2_write_super(c);
-+		}
-+		mutex_unlock(&c->sb_lock);
-+	}
-+
-+	if (attr == &sysfs_label) {
-+		char *tmp;
-+		int ret;
-+
-+		tmp = kstrdup(buf, GFP_KERNEL);
-+		if (!tmp)
-+			return -ENOMEM;
-+
-+		ret = bch2_dev_group_set(c, ca, strim(tmp));
-+		kfree(tmp);
-+		if (ret)
-+			return ret;
-+	}
-+
-+	if (attr == &sysfs_wake_allocator)
-+		bch2_wake_allocator(ca);
-+
-+	return size;
-+}
-+SYSFS_OPS(bch2_dev);
-+
-+struct attribute *bch2_dev_files[] = {
-+	&sysfs_uuid,
-+	&sysfs_bucket_size,
-+	&sysfs_block_size,
-+	&sysfs_first_bucket,
-+	&sysfs_nbuckets,
-+	&sysfs_durability,
-+
-+	/* settings: */
-+	&sysfs_discard,
-+	&sysfs_cache_replacement_policy,
-+	&sysfs_state_rw,
-+	&sysfs_label,
-+
-+	&sysfs_has_data,
-+	&sysfs_iodone,
-+
-+	&sysfs_io_latency_read,
-+	&sysfs_io_latency_write,
-+	&sysfs_io_latency_stats_read,
-+	&sysfs_io_latency_stats_write,
-+	&sysfs_congested,
-+
-+	/* alloc info - other stats: */
-+	&sysfs_bucket_quantiles_last_read,
-+	&sysfs_bucket_quantiles_last_write,
-+	&sysfs_bucket_quantiles_fragmentation,
-+	&sysfs_bucket_quantiles_oldest_gen,
-+
-+	&sysfs_reserve_stats,
-+
-+	/* debug: */
-+	&sysfs_alloc_debug,
-+	&sysfs_wake_allocator,
-+	NULL
-+};
-+
-+#endif  /* _BCACHEFS_SYSFS_H_ */
-diff --git a/fs/bcachefs/sysfs.h b/fs/bcachefs/sysfs.h
-new file mode 100644
-index 000000000000..525fd05d91f7
---- /dev/null
-+++ b/fs/bcachefs/sysfs.h
-@@ -0,0 +1,44 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_SYSFS_H_
-+#define _BCACHEFS_SYSFS_H_
-+
-+#include <linux/sysfs.h>
-+
-+#ifndef NO_BCACHEFS_SYSFS
-+
-+struct attribute;
-+struct sysfs_ops;
-+
-+extern struct attribute *bch2_fs_files[];
-+extern struct attribute *bch2_fs_internal_files[];
-+extern struct attribute *bch2_fs_opts_dir_files[];
-+extern struct attribute *bch2_fs_time_stats_files[];
-+extern struct attribute *bch2_dev_files[];
-+
-+extern struct sysfs_ops bch2_fs_sysfs_ops;
-+extern struct sysfs_ops bch2_fs_internal_sysfs_ops;
-+extern struct sysfs_ops bch2_fs_opts_dir_sysfs_ops;
-+extern struct sysfs_ops bch2_fs_time_stats_sysfs_ops;
-+extern struct sysfs_ops bch2_dev_sysfs_ops;
-+
-+int bch2_opts_create_sysfs_files(struct kobject *);
-+
-+#else
-+
-+static struct attribute *bch2_fs_files[] = {};
-+static struct attribute *bch2_fs_internal_files[] = {};
-+static struct attribute *bch2_fs_opts_dir_files[] = {};
-+static struct attribute *bch2_fs_time_stats_files[] = {};
-+static struct attribute *bch2_dev_files[] = {};
-+
-+static const struct sysfs_ops bch2_fs_sysfs_ops;
-+static const struct sysfs_ops bch2_fs_internal_sysfs_ops;
-+static const struct sysfs_ops bch2_fs_opts_dir_sysfs_ops;
-+static const struct sysfs_ops bch2_fs_time_stats_sysfs_ops;
-+static const struct sysfs_ops bch2_dev_sysfs_ops;
-+
-+static inline int bch2_opts_create_sysfs_files(struct kobject *kobj) { return 0; }
-+
-+#endif /* NO_BCACHEFS_SYSFS */
-+
-+#endif  /* _BCACHEFS_SYSFS_H_ */
-diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c
-new file mode 100644
-index 000000000000..4dcace650416
---- /dev/null
-+++ b/fs/bcachefs/tests.c
-@@ -0,0 +1,725 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#ifdef CONFIG_BCACHEFS_TESTS
-+
-+#include "bcachefs.h"
-+#include "btree_update.h"
-+#include "journal_reclaim.h"
-+#include "tests.h"
-+
-+#include "linux/kthread.h"
-+#include "linux/random.h"
-+
-+static void delete_test_keys(struct bch_fs *c)
-+{
-+	int ret;
-+
-+	ret = bch2_btree_delete_range(c, BTREE_ID_EXTENTS,
-+				      POS(0, 0), POS(0, U64_MAX),
-+				      NULL);
-+	BUG_ON(ret);
-+
-+	ret = bch2_btree_delete_range(c, BTREE_ID_XATTRS,
-+				      POS(0, 0), POS(0, U64_MAX),
-+				      NULL);
-+	BUG_ON(ret);
-+}
-+
-+/* unit tests */
-+
-+static void test_delete(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_i_cookie k;
-+	int ret;
-+
-+	bkey_cookie_init(&k.k_i);
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS, k.k.p,
-+				   BTREE_ITER_INTENT);
-+
-+	ret = bch2_btree_iter_traverse(iter);
-+	BUG_ON(ret);
-+
-+	ret = __bch2_trans_do(&trans, NULL, NULL, 0,
-+		bch2_trans_update(&trans, iter, &k.k_i, 0));
-+	BUG_ON(ret);
-+
-+	pr_info("deleting once");
-+	ret = bch2_btree_delete_at(&trans, iter, 0);
-+	BUG_ON(ret);
-+
-+	pr_info("deleting twice");
-+	ret = bch2_btree_delete_at(&trans, iter, 0);
-+	BUG_ON(ret);
-+
-+	bch2_trans_exit(&trans);
-+}
-+
-+static void test_delete_written(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_i_cookie k;
-+	int ret;
-+
-+	bkey_cookie_init(&k.k_i);
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS, k.k.p,
-+				   BTREE_ITER_INTENT);
-+
-+	ret = bch2_btree_iter_traverse(iter);
-+	BUG_ON(ret);
-+
-+	ret = __bch2_trans_do(&trans, NULL, NULL, 0,
-+		bch2_trans_update(&trans, iter, &k.k_i, 0));
-+	BUG_ON(ret);
-+
-+	bch2_journal_flush_all_pins(&c->journal);
-+
-+	ret = bch2_btree_delete_at(&trans, iter, 0);
-+	BUG_ON(ret);
-+
-+	bch2_trans_exit(&trans);
-+}
-+
-+static void test_iterate(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	u64 i;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	delete_test_keys(c);
-+
-+	pr_info("inserting test keys");
-+
-+	for (i = 0; i < nr; i++) {
-+		struct bkey_i_cookie k;
-+
-+		bkey_cookie_init(&k.k_i);
-+		k.k.p.offset = i;
-+
-+		ret = bch2_btree_insert(c, BTREE_ID_XATTRS, &k.k_i,
-+					NULL, NULL, 0);
-+		BUG_ON(ret);
-+	}
-+
-+	pr_info("iterating forwards");
-+
-+	i = 0;
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_XATTRS,
-+			   POS_MIN, 0, k, ret) {
-+		if (k.k->p.inode)
-+			break;
-+
-+		BUG_ON(k.k->p.offset != i++);
-+	}
-+
-+	BUG_ON(i != nr);
-+
-+	pr_info("iterating backwards");
-+
-+	while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(iter)).k))
-+		BUG_ON(k.k->p.offset != --i);
-+
-+	BUG_ON(i);
-+
-+	bch2_trans_exit(&trans);
-+}
-+
-+static void test_iterate_extents(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	u64 i;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	delete_test_keys(c);
-+
-+	pr_info("inserting test extents");
-+
-+	for (i = 0; i < nr; i += 8) {
-+		struct bkey_i_cookie k;
-+
-+		bkey_cookie_init(&k.k_i);
-+		k.k.p.offset = i + 8;
-+		k.k.size = 8;
-+
-+		ret = bch2_btree_insert(c, BTREE_ID_EXTENTS, &k.k_i,
-+					NULL, NULL, 0);
-+		BUG_ON(ret);
-+	}
-+
-+	pr_info("iterating forwards");
-+
-+	i = 0;
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
-+			   POS_MIN, 0, k, ret) {
-+		BUG_ON(bkey_start_offset(k.k) != i);
-+		i = k.k->p.offset;
-+	}
-+
-+	BUG_ON(i != nr);
-+
-+	pr_info("iterating backwards");
-+
-+	while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(iter)).k)) {
-+		BUG_ON(k.k->p.offset != i);
-+		i = bkey_start_offset(k.k);
-+	}
-+
-+	BUG_ON(i);
-+
-+	bch2_trans_exit(&trans);
-+}
-+
-+static void test_iterate_slots(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	u64 i;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	delete_test_keys(c);
-+
-+	pr_info("inserting test keys");
-+
-+	for (i = 0; i < nr; i++) {
-+		struct bkey_i_cookie k;
-+
-+		bkey_cookie_init(&k.k_i);
-+		k.k.p.offset = i * 2;
-+
-+		ret = bch2_btree_insert(c, BTREE_ID_XATTRS, &k.k_i,
-+					NULL, NULL, 0);
-+		BUG_ON(ret);
-+	}
-+
-+	pr_info("iterating forwards");
-+
-+	i = 0;
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_XATTRS, POS_MIN,
-+			   0, k, ret) {
-+		if (k.k->p.inode)
-+			break;
-+
-+		BUG_ON(k.k->p.offset != i);
-+		i += 2;
-+	}
-+	bch2_trans_iter_free(&trans, iter);
-+
-+	BUG_ON(i != nr * 2);
-+
-+	pr_info("iterating forwards by slots");
-+
-+	i = 0;
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_XATTRS, POS_MIN,
-+			   BTREE_ITER_SLOTS, k, ret) {
-+		BUG_ON(k.k->p.offset != i);
-+		BUG_ON(bkey_deleted(k.k) != (i & 1));
-+
-+		i++;
-+		if (i == nr * 2)
-+			break;
-+	}
-+
-+	bch2_trans_exit(&trans);
-+}
-+
-+static void test_iterate_slots_extents(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	u64 i;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	delete_test_keys(c);
-+
-+	pr_info("inserting test keys");
-+
-+	for (i = 0; i < nr; i += 16) {
-+		struct bkey_i_cookie k;
-+
-+		bkey_cookie_init(&k.k_i);
-+		k.k.p.offset = i + 16;
-+		k.k.size = 8;
-+
-+		ret = bch2_btree_insert(c, BTREE_ID_EXTENTS, &k.k_i,
-+					NULL, NULL, 0);
-+		BUG_ON(ret);
-+	}
-+
-+	pr_info("iterating forwards");
-+
-+	i = 0;
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS_MIN,
-+			   0, k, ret) {
-+		BUG_ON(bkey_start_offset(k.k) != i + 8);
-+		BUG_ON(k.k->size != 8);
-+		i += 16;
-+	}
-+	bch2_trans_iter_free(&trans, iter);
-+
-+	BUG_ON(i != nr);
-+
-+	pr_info("iterating forwards by slots");
-+
-+	i = 0;
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS_MIN,
-+			   BTREE_ITER_SLOTS, k, ret) {
-+		BUG_ON(bkey_deleted(k.k) != !(i % 16));
-+
-+		BUG_ON(bkey_start_offset(k.k) != i);
-+		BUG_ON(k.k->size != 8);
-+		i = k.k->p.offset;
-+
-+		if (i == nr)
-+			break;
-+	}
-+
-+	bch2_trans_exit(&trans);
-+}
-+
-+/*
-+ * XXX: we really want to make sure we've got a btree with depth > 0 for these
-+ * tests
-+ */
-+static void test_peek_end(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS, POS_MIN, 0);
-+
-+	k = bch2_btree_iter_peek(iter);
-+	BUG_ON(k.k);
-+
-+	k = bch2_btree_iter_peek(iter);
-+	BUG_ON(k.k);
-+
-+	bch2_trans_exit(&trans);
-+}
-+
-+static void test_peek_end_extents(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, POS_MIN, 0);
-+
-+	k = bch2_btree_iter_peek(iter);
-+	BUG_ON(k.k);
-+
-+	k = bch2_btree_iter_peek(iter);
-+	BUG_ON(k.k);
-+
-+	bch2_trans_exit(&trans);
-+}
-+
-+/* extent unit tests */
-+
-+u64 test_version;
-+
-+static void insert_test_extent(struct bch_fs *c,
-+			       u64 start, u64 end)
-+{
-+	struct bkey_i_cookie k;
-+	int ret;
-+
-+	//pr_info("inserting %llu-%llu v %llu", start, end, test_version);
-+
-+	bkey_cookie_init(&k.k_i);
-+	k.k_i.k.p.offset = end;
-+	k.k_i.k.size = end - start;
-+	k.k_i.k.version.lo = test_version++;
-+
-+	ret = bch2_btree_insert(c, BTREE_ID_EXTENTS, &k.k_i,
-+				NULL, NULL, 0);
-+	BUG_ON(ret);
-+}
-+
-+static void __test_extent_overwrite(struct bch_fs *c,
-+				    u64 e1_start, u64 e1_end,
-+				    u64 e2_start, u64 e2_end)
-+{
-+	insert_test_extent(c, e1_start, e1_end);
-+	insert_test_extent(c, e2_start, e2_end);
-+
-+	delete_test_keys(c);
-+}
-+
-+static void test_extent_overwrite_front(struct bch_fs *c, u64 nr)
-+{
-+	__test_extent_overwrite(c, 0, 64, 0, 32);
-+	__test_extent_overwrite(c, 8, 64, 0, 32);
-+}
-+
-+static void test_extent_overwrite_back(struct bch_fs *c, u64 nr)
-+{
-+	__test_extent_overwrite(c, 0, 64, 32, 64);
-+	__test_extent_overwrite(c, 0, 64, 32, 72);
-+}
-+
-+static void test_extent_overwrite_middle(struct bch_fs *c, u64 nr)
-+{
-+	__test_extent_overwrite(c, 0, 64, 32, 40);
-+}
-+
-+static void test_extent_overwrite_all(struct bch_fs *c, u64 nr)
-+{
-+	__test_extent_overwrite(c, 32, 64,  0,  64);
-+	__test_extent_overwrite(c, 32, 64,  0, 128);
-+	__test_extent_overwrite(c, 32, 64, 32,  64);
-+	__test_extent_overwrite(c, 32, 64, 32, 128);
-+}
-+
-+/* perf tests */
-+
-+static u64 test_rand(void)
-+{
-+	u64 v;
-+#if 0
-+	v = prandom_u32();
-+#else
-+	prandom_bytes(&v, sizeof(v));
-+#endif
-+	return v;
-+}
-+
-+static void rand_insert(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	struct bkey_i_cookie k;
-+	int ret;
-+	u64 i;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for (i = 0; i < nr; i++) {
-+		bkey_cookie_init(&k.k_i);
-+		k.k.p.offset = test_rand();
-+
-+		ret = __bch2_trans_do(&trans, NULL, NULL, 0,
-+			__bch2_btree_insert(&trans, BTREE_ID_XATTRS, &k.k_i));
-+
-+		BUG_ON(ret);
-+	}
-+
-+	bch2_trans_exit(&trans);
-+}
-+
-+static void rand_lookup(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	u64 i;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for (i = 0; i < nr; i++) {
-+		iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS,
-+					   POS(0, test_rand()), 0);
-+
-+		k = bch2_btree_iter_peek(iter);
-+		bch2_trans_iter_free(&trans, iter);
-+	}
-+
-+	bch2_trans_exit(&trans);
-+}
-+
-+static void rand_mixed(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int ret;
-+	u64 i;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for (i = 0; i < nr; i++) {
-+		iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS,
-+					   POS(0, test_rand()), 0);
-+
-+		k = bch2_btree_iter_peek(iter);
-+
-+		if (!(i & 3) && k.k) {
-+			struct bkey_i_cookie k;
-+
-+			bkey_cookie_init(&k.k_i);
-+			k.k.p = iter->pos;
-+
-+			ret = __bch2_trans_do(&trans, NULL, NULL, 0,
-+				bch2_trans_update(&trans, iter, &k.k_i, 0));
-+
-+			BUG_ON(ret);
-+		}
-+
-+		bch2_trans_iter_free(&trans, iter);
-+	}
-+
-+	bch2_trans_exit(&trans);
-+}
-+
-+static int __do_delete(struct btree_trans *trans, struct bpos pos)
-+{
-+	struct btree_iter *iter;
-+	struct bkey_i delete;
-+	struct bkey_s_c k;
-+	int ret = 0;
-+
-+	iter = bch2_trans_get_iter(trans, BTREE_ID_XATTRS, pos,
-+				   BTREE_ITER_INTENT);
-+	ret = PTR_ERR_OR_ZERO(iter);
-+	if (ret)
-+		goto err;
-+
-+	k = bch2_btree_iter_peek(iter);
-+	ret = bkey_err(k);
-+	if (ret)
-+		goto err;
-+
-+	bkey_init(&delete.k);
-+	delete.k.p = k.k->p;
-+
-+	bch2_trans_update(trans, iter, &delete, 0);
-+err:
-+	bch2_trans_iter_put(trans, iter);
-+	return ret;
-+}
-+
-+static void rand_delete(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	int ret;
-+	u64 i;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for (i = 0; i < nr; i++) {
-+		struct bpos pos = POS(0, test_rand());
-+
-+		ret = __bch2_trans_do(&trans, NULL, NULL, 0,
-+			__do_delete(&trans, pos));
-+		BUG_ON(ret);
-+	}
-+
-+	bch2_trans_exit(&trans);
-+}
-+
-+static void seq_insert(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct bkey_i_cookie insert;
-+	int ret;
-+	u64 i = 0;
-+
-+	bkey_cookie_init(&insert.k_i);
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_XATTRS, POS_MIN,
-+			   BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
-+		insert.k.p = iter->pos;
-+
-+		ret = __bch2_trans_do(&trans, NULL, NULL, 0,
-+			bch2_trans_update(&trans, iter, &insert.k_i, 0));
-+
-+		BUG_ON(ret);
-+
-+		if (++i == nr)
-+			break;
-+	}
-+	bch2_trans_exit(&trans);
-+}
-+
-+static void seq_lookup(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_XATTRS, POS_MIN, 0, k, ret)
-+		;
-+	bch2_trans_exit(&trans);
-+}
-+
-+static void seq_overwrite(struct bch_fs *c, u64 nr)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_XATTRS, POS_MIN,
-+			   BTREE_ITER_INTENT, k, ret) {
-+		struct bkey_i_cookie u;
-+
-+		bkey_reassemble(&u.k_i, k);
-+
-+		ret = __bch2_trans_do(&trans, NULL, NULL, 0,
-+			bch2_trans_update(&trans, iter, &u.k_i, 0));
-+
-+		BUG_ON(ret);
-+	}
-+	bch2_trans_exit(&trans);
-+}
-+
-+static void seq_delete(struct bch_fs *c, u64 nr)
-+{
-+	int ret;
-+
-+	ret = bch2_btree_delete_range(c, BTREE_ID_XATTRS,
-+				      POS(0, 0), POS(0, U64_MAX),
-+				      NULL);
-+	BUG_ON(ret);
-+}
-+
-+typedef void (*perf_test_fn)(struct bch_fs *, u64);
-+
-+struct test_job {
-+	struct bch_fs			*c;
-+	u64				nr;
-+	unsigned			nr_threads;
-+	perf_test_fn			fn;
-+
-+	atomic_t			ready;
-+	wait_queue_head_t		ready_wait;
-+
-+	atomic_t			done;
-+	struct completion		done_completion;
-+
-+	u64				start;
-+	u64				finish;
-+};
-+
-+static int btree_perf_test_thread(void *data)
-+{
-+	struct test_job *j = data;
-+
-+	if (atomic_dec_and_test(&j->ready)) {
-+		wake_up(&j->ready_wait);
-+		j->start = sched_clock();
-+	} else {
-+		wait_event(j->ready_wait, !atomic_read(&j->ready));
-+	}
-+
-+	j->fn(j->c, j->nr / j->nr_threads);
-+
-+	if (atomic_dec_and_test(&j->done)) {
-+		j->finish = sched_clock();
-+		complete(&j->done_completion);
-+	}
-+
-+	return 0;
-+}
-+
-+void bch2_btree_perf_test(struct bch_fs *c, const char *testname,
-+			  u64 nr, unsigned nr_threads)
-+{
-+	struct test_job j = { .c = c, .nr = nr, .nr_threads = nr_threads };
-+	char name_buf[20], nr_buf[20], per_sec_buf[20];
-+	unsigned i;
-+	u64 time;
-+
-+	atomic_set(&j.ready, nr_threads);
-+	init_waitqueue_head(&j.ready_wait);
-+
-+	atomic_set(&j.done, nr_threads);
-+	init_completion(&j.done_completion);
-+
-+#define perf_test(_test)				\
-+	if (!strcmp(testname, #_test)) j.fn = _test
-+
-+	perf_test(rand_insert);
-+	perf_test(rand_lookup);
-+	perf_test(rand_mixed);
-+	perf_test(rand_delete);
-+
-+	perf_test(seq_insert);
-+	perf_test(seq_lookup);
-+	perf_test(seq_overwrite);
-+	perf_test(seq_delete);
-+
-+	/* a unit test, not a perf test: */
-+	perf_test(test_delete);
-+	perf_test(test_delete_written);
-+	perf_test(test_iterate);
-+	perf_test(test_iterate_extents);
-+	perf_test(test_iterate_slots);
-+	perf_test(test_iterate_slots_extents);
-+	perf_test(test_peek_end);
-+	perf_test(test_peek_end_extents);
-+
-+	perf_test(test_extent_overwrite_front);
-+	perf_test(test_extent_overwrite_back);
-+	perf_test(test_extent_overwrite_middle);
-+	perf_test(test_extent_overwrite_all);
-+
-+	if (!j.fn) {
-+		pr_err("unknown test %s", testname);
-+		return;
-+	}
-+
-+	//pr_info("running test %s:", testname);
-+
-+	if (nr_threads == 1)
-+		btree_perf_test_thread(&j);
-+	else
-+		for (i = 0; i < nr_threads; i++)
-+			kthread_run(btree_perf_test_thread, &j,
-+				    "bcachefs perf test[%u]", i);
-+
-+	while (wait_for_completion_interruptible(&j.done_completion))
-+		;
-+
-+	time = j.finish - j.start;
-+
-+	scnprintf(name_buf, sizeof(name_buf), "%s:", testname);
-+	bch2_hprint(&PBUF(nr_buf), nr);
-+	bch2_hprint(&PBUF(per_sec_buf), nr * NSEC_PER_SEC / time);
-+	printk(KERN_INFO "%-12s %s with %u threads in %5llu sec, %5llu nsec per iter, %5s per sec\n",
-+		name_buf, nr_buf, nr_threads,
-+		time / NSEC_PER_SEC,
-+		time * nr_threads / nr,
-+		per_sec_buf);
-+}
-+
-+#endif /* CONFIG_BCACHEFS_TESTS */
-diff --git a/fs/bcachefs/tests.h b/fs/bcachefs/tests.h
-new file mode 100644
-index 000000000000..551d0764225e
---- /dev/null
-+++ b/fs/bcachefs/tests.h
-@@ -0,0 +1,15 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_TEST_H
-+#define _BCACHEFS_TEST_H
-+
-+struct bch_fs;
-+
-+#ifdef CONFIG_BCACHEFS_TESTS
-+
-+void bch2_btree_perf_test(struct bch_fs *, const char *, u64, unsigned);
-+
-+#else
-+
-+#endif /* CONFIG_BCACHEFS_TESTS */
-+
-+#endif /* _BCACHEFS_TEST_H */
-diff --git a/fs/bcachefs/trace.c b/fs/bcachefs/trace.c
-new file mode 100644
-index 000000000000..59e8dfa3d245
---- /dev/null
-+++ b/fs/bcachefs/trace.c
-@@ -0,0 +1,12 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "alloc_types.h"
-+#include "buckets.h"
-+#include "btree_types.h"
-+#include "keylist.h"
-+
-+#include <linux/blktrace_api.h>
-+#include "keylist.h"
-+
-+#define CREATE_TRACE_POINTS
-+#include <trace/events/bcachefs.h>
-diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c
-new file mode 100644
-index 000000000000..fd4044a6a08f
---- /dev/null
-+++ b/fs/bcachefs/util.c
-@@ -0,0 +1,907 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * random utiility code, for bcache but in theory not specific to bcache
-+ *
-+ * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
-+ * Copyright 2012 Google, Inc.
-+ */
-+
-+#include <linux/bio.h>
-+#include <linux/blkdev.h>
-+#include <linux/ctype.h>
-+#include <linux/debugfs.h>
-+#include <linux/freezer.h>
-+#include <linux/kthread.h>
-+#include <linux/log2.h>
-+#include <linux/math64.h>
-+#include <linux/percpu.h>
-+#include <linux/preempt.h>
-+#include <linux/random.h>
-+#include <linux/seq_file.h>
-+#include <linux/string.h>
-+#include <linux/types.h>
-+#include <linux/sched/clock.h>
-+
-+#include "eytzinger.h"
-+#include "util.h"
-+
-+static const char si_units[] = "?kMGTPEZY";
-+
-+static int __bch2_strtoh(const char *cp, u64 *res,
-+			 u64 t_max, bool t_signed)
-+{
-+	bool positive = *cp != '-';
-+	unsigned u;
-+	u64 v = 0;
-+
-+	if (*cp == '+' || *cp == '-')
-+		cp++;
-+
-+	if (!isdigit(*cp))
-+		return -EINVAL;
-+
-+	do {
-+		if (v > U64_MAX / 10)
-+			return -ERANGE;
-+		v *= 10;
-+		if (v > U64_MAX - (*cp - '0'))
-+			return -ERANGE;
-+		v += *cp - '0';
-+		cp++;
-+	} while (isdigit(*cp));
-+
-+	for (u = 1; u < strlen(si_units); u++)
-+		if (*cp == si_units[u]) {
-+			cp++;
-+			goto got_unit;
-+		}
-+	u = 0;
-+got_unit:
-+	if (*cp == '\n')
-+		cp++;
-+	if (*cp)
-+		return -EINVAL;
-+
-+	if (fls64(v) + u * 10 > 64)
-+		return -ERANGE;
-+
-+	v <<= u * 10;
-+
-+	if (positive) {
-+		if (v > t_max)
-+			return -ERANGE;
-+	} else {
-+		if (v && !t_signed)
-+			return -ERANGE;
-+
-+		if (v > t_max + 1)
-+			return -ERANGE;
-+		v = -v;
-+	}
-+
-+	*res = v;
-+	return 0;
-+}
-+
-+#define STRTO_H(name, type)					\
-+int bch2_ ## name ## _h(const char *cp, type *res)		\
-+{								\
-+	u64 v;							\
-+	int ret = __bch2_strtoh(cp, &v, ANYSINT_MAX(type),	\
-+			ANYSINT_MAX(type) != ((type) ~0ULL));	\
-+	*res = v;						\
-+	return ret;						\
-+}
-+
-+STRTO_H(strtoint, int)
-+STRTO_H(strtouint, unsigned int)
-+STRTO_H(strtoll, long long)
-+STRTO_H(strtoull, unsigned long long)
-+STRTO_H(strtou64, u64)
-+
-+void bch2_hprint(struct printbuf *buf, s64 v)
-+{
-+	int u, t = 0;
-+
-+	for (u = 0; v >= 1024 || v <= -1024; u++) {
-+		t = v & ~(~0U << 10);
-+		v >>= 10;
-+	}
-+
-+	pr_buf(buf, "%lli", v);
-+
-+	/*
-+	 * 103 is magic: t is in the range [-1023, 1023] and we want
-+	 * to turn it into [-9, 9]
-+	 */
-+	if (u && v < 100 && v > -100)
-+		pr_buf(buf, ".%i", t / 103);
-+	if (u)
-+		pr_buf(buf, "%c", si_units[u]);
-+}
-+
-+void bch2_string_opt_to_text(struct printbuf *out,
-+			     const char * const list[],
-+			     size_t selected)
-+{
-+	size_t i;
-+
-+	for (i = 0; list[i]; i++)
-+		pr_buf(out, i == selected ? "[%s] " : "%s ", list[i]);
-+}
-+
-+void bch2_flags_to_text(struct printbuf *out,
-+			const char * const list[], u64 flags)
-+{
-+	unsigned bit, nr = 0;
-+	bool first = true;
-+
-+	if (out->pos != out->end)
-+		*out->pos = '\0';
-+
-+	while (list[nr])
-+		nr++;
-+
-+	while (flags && (bit = __ffs(flags)) < nr) {
-+		if (!first)
-+			pr_buf(out, ",");
-+		first = false;
-+		pr_buf(out, "%s", list[bit]);
-+		flags ^= 1 << bit;
-+	}
-+}
-+
-+u64 bch2_read_flag_list(char *opt, const char * const list[])
-+{
-+	u64 ret = 0;
-+	char *p, *s, *d = kstrndup(opt, PAGE_SIZE - 1, GFP_KERNEL);
-+
-+	if (!d)
-+		return -ENOMEM;
-+
-+	s = strim(d);
-+
-+	while ((p = strsep(&s, ","))) {
-+		int flag = match_string(list, -1, p);
-+		if (flag < 0) {
-+			ret = -1;
-+			break;
-+		}
-+
-+		ret |= 1 << flag;
-+	}
-+
-+	kfree(d);
-+
-+	return ret;
-+}
-+
-+bool bch2_is_zero(const void *_p, size_t n)
-+{
-+	const char *p = _p;
-+	size_t i;
-+
-+	for (i = 0; i < n; i++)
-+		if (p[i])
-+			return false;
-+	return true;
-+}
-+
-+static void bch2_quantiles_update(struct quantiles *q, u64 v)
-+{
-+	unsigned i = 0;
-+
-+	while (i < ARRAY_SIZE(q->entries)) {
-+		struct quantile_entry *e = q->entries + i;
-+
-+		if (unlikely(!e->step)) {
-+			e->m = v;
-+			e->step = max_t(unsigned, v / 2, 1024);
-+		} else if (e->m > v) {
-+			e->m = e->m >= e->step
-+				? e->m - e->step
-+				: 0;
-+		} else if (e->m < v) {
-+			e->m = e->m + e->step > e->m
-+				? e->m + e->step
-+				: U32_MAX;
-+		}
-+
-+		if ((e->m > v ? e->m - v : v - e->m) < e->step)
-+			e->step = max_t(unsigned, e->step / 2, 1);
-+
-+		if (v >= e->m)
-+			break;
-+
-+		i = eytzinger0_child(i, v > e->m);
-+	}
-+}
-+
-+/* time stats: */
-+
-+static void bch2_time_stats_update_one(struct time_stats *stats,
-+				       u64 start, u64 end)
-+{
-+	u64 duration, freq;
-+
-+	duration	= time_after64(end, start)
-+		? end - start : 0;
-+	freq		= time_after64(end, stats->last_event)
-+		? end - stats->last_event : 0;
-+
-+	stats->count++;
-+
-+	stats->average_duration = stats->average_duration
-+		? ewma_add(stats->average_duration, duration, 6)
-+		: duration;
-+
-+	stats->average_frequency = stats->average_frequency
-+		? ewma_add(stats->average_frequency, freq, 6)
-+		: freq;
-+
-+	stats->max_duration = max(stats->max_duration, duration);
-+
-+	stats->last_event = end;
-+
-+	bch2_quantiles_update(&stats->quantiles, duration);
-+}
-+
-+void __bch2_time_stats_update(struct time_stats *stats, u64 start, u64 end)
-+{
-+	unsigned long flags;
-+
-+	if (!stats->buffer) {
-+		spin_lock_irqsave(&stats->lock, flags);
-+		bch2_time_stats_update_one(stats, start, end);
-+
-+		if (stats->average_frequency < 32 &&
-+		    stats->count > 1024)
-+			stats->buffer =
-+				alloc_percpu_gfp(struct time_stat_buffer,
-+						 GFP_ATOMIC);
-+		spin_unlock_irqrestore(&stats->lock, flags);
-+	} else {
-+		struct time_stat_buffer_entry *i;
-+		struct time_stat_buffer *b;
-+
-+		preempt_disable();
-+		b = this_cpu_ptr(stats->buffer);
-+
-+		BUG_ON(b->nr >= ARRAY_SIZE(b->entries));
-+		b->entries[b->nr++] = (struct time_stat_buffer_entry) {
-+			.start = start,
-+			.end = end
-+		};
-+
-+		if (b->nr == ARRAY_SIZE(b->entries)) {
-+			spin_lock_irqsave(&stats->lock, flags);
-+			for (i = b->entries;
-+			     i < b->entries + ARRAY_SIZE(b->entries);
-+			     i++)
-+				bch2_time_stats_update_one(stats, i->start, i->end);
-+			spin_unlock_irqrestore(&stats->lock, flags);
-+
-+			b->nr = 0;
-+		}
-+
-+		preempt_enable();
-+	}
-+}
-+
-+static const struct time_unit {
-+	const char	*name;
-+	u32		nsecs;
-+} time_units[] = {
-+	{ "ns",		1		},
-+	{ "us",		NSEC_PER_USEC	},
-+	{ "ms",		NSEC_PER_MSEC	},
-+	{ "sec",	NSEC_PER_SEC	},
-+};
-+
-+static const struct time_unit *pick_time_units(u64 ns)
-+{
-+	const struct time_unit *u;
-+
-+	for (u = time_units;
-+	     u + 1 < time_units + ARRAY_SIZE(time_units) &&
-+	     ns >= u[1].nsecs << 1;
-+	     u++)
-+		;
-+
-+	return u;
-+}
-+
-+static void pr_time_units(struct printbuf *out, u64 ns)
-+{
-+	const struct time_unit *u = pick_time_units(ns);
-+
-+	pr_buf(out, "%llu %s", div_u64(ns, u->nsecs), u->name);
-+}
-+
-+void bch2_time_stats_to_text(struct printbuf *out, struct time_stats *stats)
-+{
-+	const struct time_unit *u;
-+	u64 freq = READ_ONCE(stats->average_frequency);
-+	u64 q, last_q = 0;
-+	int i;
-+
-+	pr_buf(out, "count:\t\t%llu\n",
-+			 stats->count);
-+	pr_buf(out, "rate:\t\t%llu/sec\n",
-+	       freq ?  div64_u64(NSEC_PER_SEC, freq) : 0);
-+
-+	pr_buf(out, "frequency:\t");
-+	pr_time_units(out, freq);
-+
-+	pr_buf(out, "\navg duration:\t");
-+	pr_time_units(out, stats->average_duration);
-+
-+	pr_buf(out, "\nmax duration:\t");
-+	pr_time_units(out, stats->max_duration);
-+
-+	i = eytzinger0_first(NR_QUANTILES);
-+	u = pick_time_units(stats->quantiles.entries[i].m);
-+
-+	pr_buf(out, "\nquantiles (%s):\t", u->name);
-+	eytzinger0_for_each(i, NR_QUANTILES) {
-+		bool is_last = eytzinger0_next(i, NR_QUANTILES) == -1;
-+
-+		q = max(stats->quantiles.entries[i].m, last_q);
-+		pr_buf(out, "%llu%s",
-+		       div_u64(q, u->nsecs),
-+		       is_last ? "\n" : " ");
-+		last_q = q;
-+	}
-+}
-+
-+void bch2_time_stats_exit(struct time_stats *stats)
-+{
-+	free_percpu(stats->buffer);
-+}
-+
-+void bch2_time_stats_init(struct time_stats *stats)
-+{
-+	memset(stats, 0, sizeof(*stats));
-+	spin_lock_init(&stats->lock);
-+}
-+
-+/* ratelimit: */
-+
-+/**
-+ * bch2_ratelimit_delay() - return how long to delay until the next time to do
-+ * some work
-+ *
-+ * @d - the struct bch_ratelimit to update
-+ *
-+ * Returns the amount of time to delay by, in jiffies
-+ */
-+u64 bch2_ratelimit_delay(struct bch_ratelimit *d)
-+{
-+	u64 now = local_clock();
-+
-+	return time_after64(d->next, now)
-+		? nsecs_to_jiffies(d->next - now)
-+		: 0;
-+}
-+
-+/**
-+ * bch2_ratelimit_increment() - increment @d by the amount of work done
-+ *
-+ * @d - the struct bch_ratelimit to update
-+ * @done - the amount of work done, in arbitrary units
-+ */
-+void bch2_ratelimit_increment(struct bch_ratelimit *d, u64 done)
-+{
-+	u64 now = local_clock();
-+
-+	d->next += div_u64(done * NSEC_PER_SEC, d->rate);
-+
-+	if (time_before64(now + NSEC_PER_SEC, d->next))
-+		d->next = now + NSEC_PER_SEC;
-+
-+	if (time_after64(now - NSEC_PER_SEC * 2, d->next))
-+		d->next = now - NSEC_PER_SEC * 2;
-+}
-+
-+/* pd controller: */
-+
-+/*
-+ * Updates pd_controller. Attempts to scale inputed values to units per second.
-+ * @target: desired value
-+ * @actual: current value
-+ *
-+ * @sign: 1 or -1; 1 if increasing the rate makes actual go up, -1 if increasing
-+ * it makes actual go down.
-+ */
-+void bch2_pd_controller_update(struct bch_pd_controller *pd,
-+			      s64 target, s64 actual, int sign)
-+{
-+	s64 proportional, derivative, change;
-+
-+	unsigned long seconds_since_update = (jiffies - pd->last_update) / HZ;
-+
-+	if (seconds_since_update == 0)
-+		return;
-+
-+	pd->last_update = jiffies;
-+
-+	proportional = actual - target;
-+	proportional *= seconds_since_update;
-+	proportional = div_s64(proportional, pd->p_term_inverse);
-+
-+	derivative = actual - pd->last_actual;
-+	derivative = div_s64(derivative, seconds_since_update);
-+	derivative = ewma_add(pd->smoothed_derivative, derivative,
-+			      (pd->d_term / seconds_since_update) ?: 1);
-+	derivative = derivative * pd->d_term;
-+	derivative = div_s64(derivative, pd->p_term_inverse);
-+
-+	change = proportional + derivative;
-+
-+	/* Don't increase rate if not keeping up */
-+	if (change > 0 &&
-+	    pd->backpressure &&
-+	    time_after64(local_clock(),
-+			 pd->rate.next + NSEC_PER_MSEC))
-+		change = 0;
-+
-+	change *= (sign * -1);
-+
-+	pd->rate.rate = clamp_t(s64, (s64) pd->rate.rate + change,
-+				1, UINT_MAX);
-+
-+	pd->last_actual		= actual;
-+	pd->last_derivative	= derivative;
-+	pd->last_proportional	= proportional;
-+	pd->last_change		= change;
-+	pd->last_target		= target;
-+}
-+
-+void bch2_pd_controller_init(struct bch_pd_controller *pd)
-+{
-+	pd->rate.rate		= 1024;
-+	pd->last_update		= jiffies;
-+	pd->p_term_inverse	= 6000;
-+	pd->d_term		= 30;
-+	pd->d_smooth		= pd->d_term;
-+	pd->backpressure	= 1;
-+}
-+
-+size_t bch2_pd_controller_print_debug(struct bch_pd_controller *pd, char *buf)
-+{
-+	/* 2^64 - 1 is 20 digits, plus null byte */
-+	char rate[21];
-+	char actual[21];
-+	char target[21];
-+	char proportional[21];
-+	char derivative[21];
-+	char change[21];
-+	s64 next_io;
-+
-+	bch2_hprint(&PBUF(rate),	pd->rate.rate);
-+	bch2_hprint(&PBUF(actual),	pd->last_actual);
-+	bch2_hprint(&PBUF(target),	pd->last_target);
-+	bch2_hprint(&PBUF(proportional), pd->last_proportional);
-+	bch2_hprint(&PBUF(derivative),	pd->last_derivative);
-+	bch2_hprint(&PBUF(change),	pd->last_change);
-+
-+	next_io = div64_s64(pd->rate.next - local_clock(), NSEC_PER_MSEC);
-+
-+	return sprintf(buf,
-+		       "rate:\t\t%s/sec\n"
-+		       "target:\t\t%s\n"
-+		       "actual:\t\t%s\n"
-+		       "proportional:\t%s\n"
-+		       "derivative:\t%s\n"
-+		       "change:\t\t%s/sec\n"
-+		       "next io:\t%llims\n",
-+		       rate, target, actual, proportional,
-+		       derivative, change, next_io);
-+}
-+
-+/* misc: */
-+
-+void bch2_bio_map(struct bio *bio, void *base, size_t size)
-+{
-+	while (size) {
-+		struct page *page = is_vmalloc_addr(base)
-+				? vmalloc_to_page(base)
-+				: virt_to_page(base);
-+		unsigned offset = offset_in_page(base);
-+		unsigned len = min_t(size_t, PAGE_SIZE - offset, size);
-+
-+		BUG_ON(!bio_add_page(bio, page, len, offset));
-+		size -= len;
-+		base += len;
-+	}
-+}
-+
-+int bch2_bio_alloc_pages(struct bio *bio, size_t size, gfp_t gfp_mask)
-+{
-+	while (size) {
-+		struct page *page = alloc_page(gfp_mask);
-+		unsigned len = min(PAGE_SIZE, size);
-+
-+		if (!page)
-+			return -ENOMEM;
-+
-+		BUG_ON(!bio_add_page(bio, page, len, 0));
-+		size -= len;
-+	}
-+
-+	return 0;
-+}
-+
-+size_t bch2_rand_range(size_t max)
-+{
-+	size_t rand;
-+
-+	if (!max)
-+		return 0;
-+
-+	do {
-+		rand = get_random_long();
-+		rand &= roundup_pow_of_two(max) - 1;
-+	} while (rand >= max);
-+
-+	return rand;
-+}
-+
-+void memcpy_to_bio(struct bio *dst, struct bvec_iter dst_iter, const void *src)
-+{
-+	struct bio_vec bv;
-+	struct bvec_iter iter;
-+
-+	__bio_for_each_segment(bv, dst, iter, dst_iter) {
-+		void *dstp = kmap_atomic(bv.bv_page);
-+		memcpy(dstp + bv.bv_offset, src, bv.bv_len);
-+		kunmap_atomic(dstp);
-+
-+		src += bv.bv_len;
-+	}
-+}
-+
-+void memcpy_from_bio(void *dst, struct bio *src, struct bvec_iter src_iter)
-+{
-+	struct bio_vec bv;
-+	struct bvec_iter iter;
-+
-+	__bio_for_each_segment(bv, src, iter, src_iter) {
-+		void *srcp = kmap_atomic(bv.bv_page);
-+		memcpy(dst, srcp + bv.bv_offset, bv.bv_len);
-+		kunmap_atomic(srcp);
-+
-+		dst += bv.bv_len;
-+	}
-+}
-+
-+void bch_scnmemcpy(struct printbuf *out,
-+		   const char *src, size_t len)
-+{
-+	size_t n = printbuf_remaining(out);
-+
-+	if (n) {
-+		n = min(n - 1, len);
-+		memcpy(out->pos, src, n);
-+		out->pos += n;
-+		*out->pos = '\0';
-+	}
-+}
-+
-+#include "eytzinger.h"
-+
-+static int alignment_ok(const void *base, size_t align)
-+{
-+	return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
-+		((unsigned long)base & (align - 1)) == 0;
-+}
-+
-+static void u32_swap(void *a, void *b, size_t size)
-+{
-+	u32 t = *(u32 *)a;
-+	*(u32 *)a = *(u32 *)b;
-+	*(u32 *)b = t;
-+}
-+
-+static void u64_swap(void *a, void *b, size_t size)
-+{
-+	u64 t = *(u64 *)a;
-+	*(u64 *)a = *(u64 *)b;
-+	*(u64 *)b = t;
-+}
-+
-+static void generic_swap(void *a, void *b, size_t size)
-+{
-+	char t;
-+
-+	do {
-+		t = *(char *)a;
-+		*(char *)a++ = *(char *)b;
-+		*(char *)b++ = t;
-+	} while (--size > 0);
-+}
-+
-+static inline int do_cmp(void *base, size_t n, size_t size,
-+			 int (*cmp_func)(const void *, const void *, size_t),
-+			 size_t l, size_t r)
-+{
-+	return cmp_func(base + inorder_to_eytzinger0(l, n) * size,
-+			base + inorder_to_eytzinger0(r, n) * size,
-+			size);
-+}
-+
-+static inline void do_swap(void *base, size_t n, size_t size,
-+			   void (*swap_func)(void *, void *, size_t),
-+			   size_t l, size_t r)
-+{
-+	swap_func(base + inorder_to_eytzinger0(l, n) * size,
-+		  base + inorder_to_eytzinger0(r, n) * size,
-+		  size);
-+}
-+
-+void eytzinger0_sort(void *base, size_t n, size_t size,
-+		     int (*cmp_func)(const void *, const void *, size_t),
-+		     void (*swap_func)(void *, void *, size_t))
-+{
-+	int i, c, r;
-+
-+	if (!swap_func) {
-+		if (size == 4 && alignment_ok(base, 4))
-+			swap_func = u32_swap;
-+		else if (size == 8 && alignment_ok(base, 8))
-+			swap_func = u64_swap;
-+		else
-+			swap_func = generic_swap;
-+	}
-+
-+	/* heapify */
-+	for (i = n / 2 - 1; i >= 0; --i) {
-+		for (r = i; r * 2 + 1 < n; r = c) {
-+			c = r * 2 + 1;
-+
-+			if (c + 1 < n &&
-+			    do_cmp(base, n, size, cmp_func, c, c + 1) < 0)
-+				c++;
-+
-+			if (do_cmp(base, n, size, cmp_func, r, c) >= 0)
-+				break;
-+
-+			do_swap(base, n, size, swap_func, r, c);
-+		}
-+	}
-+
-+	/* sort */
-+	for (i = n - 1; i > 0; --i) {
-+		do_swap(base, n, size, swap_func, 0, i);
-+
-+		for (r = 0; r * 2 + 1 < i; r = c) {
-+			c = r * 2 + 1;
-+
-+			if (c + 1 < i &&
-+			    do_cmp(base, n, size, cmp_func, c, c + 1) < 0)
-+				c++;
-+
-+			if (do_cmp(base, n, size, cmp_func, r, c) >= 0)
-+				break;
-+
-+			do_swap(base, n, size, swap_func, r, c);
-+		}
-+	}
-+}
-+
-+void sort_cmp_size(void *base, size_t num, size_t size,
-+	  int (*cmp_func)(const void *, const void *, size_t),
-+	  void (*swap_func)(void *, void *, size_t size))
-+{
-+	/* pre-scale counters for performance */
-+	int i = (num/2 - 1) * size, n = num * size, c, r;
-+
-+	if (!swap_func) {
-+		if (size == 4 && alignment_ok(base, 4))
-+			swap_func = u32_swap;
-+		else if (size == 8 && alignment_ok(base, 8))
-+			swap_func = u64_swap;
-+		else
-+			swap_func = generic_swap;
-+	}
-+
-+	/* heapify */
-+	for ( ; i >= 0; i -= size) {
-+		for (r = i; r * 2 + size < n; r  = c) {
-+			c = r * 2 + size;
-+			if (c < n - size &&
-+			    cmp_func(base + c, base + c + size, size) < 0)
-+				c += size;
-+			if (cmp_func(base + r, base + c, size) >= 0)
-+				break;
-+			swap_func(base + r, base + c, size);
-+		}
-+	}
-+
-+	/* sort */
-+	for (i = n - size; i > 0; i -= size) {
-+		swap_func(base, base + i, size);
-+		for (r = 0; r * 2 + size < i; r = c) {
-+			c = r * 2 + size;
-+			if (c < i - size &&
-+			    cmp_func(base + c, base + c + size, size) < 0)
-+				c += size;
-+			if (cmp_func(base + r, base + c, size) >= 0)
-+				break;
-+			swap_func(base + r, base + c, size);
-+		}
-+	}
-+}
-+
-+static void mempool_free_vp(void *element, void *pool_data)
-+{
-+	size_t size = (size_t) pool_data;
-+
-+	vpfree(element, size);
-+}
-+
-+static void *mempool_alloc_vp(gfp_t gfp_mask, void *pool_data)
-+{
-+	size_t size = (size_t) pool_data;
-+
-+	return vpmalloc(size, gfp_mask);
-+}
-+
-+int mempool_init_kvpmalloc_pool(mempool_t *pool, int min_nr, size_t size)
-+{
-+	return size < PAGE_SIZE
-+		? mempool_init_kmalloc_pool(pool, min_nr, size)
-+		: mempool_init(pool, min_nr, mempool_alloc_vp,
-+			       mempool_free_vp, (void *) size);
-+}
-+
-+#if 0
-+void eytzinger1_test(void)
-+{
-+	unsigned inorder, eytz, size;
-+
-+	pr_info("1 based eytzinger test:");
-+
-+	for (size = 2;
-+	     size < 65536;
-+	     size++) {
-+		unsigned extra = eytzinger1_extra(size);
-+
-+		if (!(size % 4096))
-+			pr_info("tree size %u", size);
-+
-+		BUG_ON(eytzinger1_prev(0, size) != eytzinger1_last(size));
-+		BUG_ON(eytzinger1_next(0, size) != eytzinger1_first(size));
-+
-+		BUG_ON(eytzinger1_prev(eytzinger1_first(size), size)	!= 0);
-+		BUG_ON(eytzinger1_next(eytzinger1_last(size), size)	!= 0);
-+
-+		inorder = 1;
-+		eytzinger1_for_each(eytz, size) {
-+			BUG_ON(__inorder_to_eytzinger1(inorder, size, extra) != eytz);
-+			BUG_ON(__eytzinger1_to_inorder(eytz, size, extra) != inorder);
-+			BUG_ON(eytz != eytzinger1_last(size) &&
-+			       eytzinger1_prev(eytzinger1_next(eytz, size), size) != eytz);
-+
-+			inorder++;
-+		}
-+	}
-+}
-+
-+void eytzinger0_test(void)
-+{
-+
-+	unsigned inorder, eytz, size;
-+
-+	pr_info("0 based eytzinger test:");
-+
-+	for (size = 1;
-+	     size < 65536;
-+	     size++) {
-+		unsigned extra = eytzinger0_extra(size);
-+
-+		if (!(size % 4096))
-+			pr_info("tree size %u", size);
-+
-+		BUG_ON(eytzinger0_prev(-1, size) != eytzinger0_last(size));
-+		BUG_ON(eytzinger0_next(-1, size) != eytzinger0_first(size));
-+
-+		BUG_ON(eytzinger0_prev(eytzinger0_first(size), size)	!= -1);
-+		BUG_ON(eytzinger0_next(eytzinger0_last(size), size)	!= -1);
-+
-+		inorder = 0;
-+		eytzinger0_for_each(eytz, size) {
-+			BUG_ON(__inorder_to_eytzinger0(inorder, size, extra) != eytz);
-+			BUG_ON(__eytzinger0_to_inorder(eytz, size, extra) != inorder);
-+			BUG_ON(eytz != eytzinger0_last(size) &&
-+			       eytzinger0_prev(eytzinger0_next(eytz, size), size) != eytz);
-+
-+			inorder++;
-+		}
-+	}
-+}
-+
-+static inline int cmp_u16(const void *_l, const void *_r, size_t size)
-+{
-+	const u16 *l = _l, *r = _r;
-+
-+	return (*l > *r) - (*r - *l);
-+}
-+
-+static void eytzinger0_find_test_val(u16 *test_array, unsigned nr, u16 search)
-+{
-+	int i, c1 = -1, c2 = -1;
-+	ssize_t r;
-+
-+	r = eytzinger0_find_le(test_array, nr,
-+			       sizeof(test_array[0]),
-+			       cmp_u16, &search);
-+	if (r >= 0)
-+		c1 = test_array[r];
-+
-+	for (i = 0; i < nr; i++)
-+		if (test_array[i] <= search && test_array[i] > c2)
-+			c2 = test_array[i];
-+
-+	if (c1 != c2) {
-+		eytzinger0_for_each(i, nr)
-+			pr_info("[%3u] = %12u", i, test_array[i]);
-+		pr_info("find_le(%2u) -> [%2zi] = %2i should be %2i",
-+			i, r, c1, c2);
-+	}
-+}
-+
-+void eytzinger0_find_test(void)
-+{
-+	unsigned i, nr, allocated = 1 << 12;
-+	u16 *test_array = kmalloc_array(allocated, sizeof(test_array[0]), GFP_KERNEL);
-+
-+	for (nr = 1; nr < allocated; nr++) {
-+		pr_info("testing %u elems", nr);
-+
-+		get_random_bytes(test_array, nr * sizeof(test_array[0]));
-+		eytzinger0_sort(test_array, nr, sizeof(test_array[0]), cmp_u16, NULL);
-+
-+		/* verify array is sorted correctly: */
-+		eytzinger0_for_each(i, nr)
-+			BUG_ON(i != eytzinger0_last(nr) &&
-+			       test_array[i] > test_array[eytzinger0_next(i, nr)]);
-+
-+		for (i = 0; i < U16_MAX; i += 1 << 12)
-+			eytzinger0_find_test_val(test_array, nr, i);
-+
-+		for (i = 0; i < nr; i++) {
-+			eytzinger0_find_test_val(test_array, nr, test_array[i] - 1);
-+			eytzinger0_find_test_val(test_array, nr, test_array[i]);
-+			eytzinger0_find_test_val(test_array, nr, test_array[i] + 1);
-+		}
-+	}
-+
-+	kfree(test_array);
-+}
-+#endif
-+
-+/*
-+ * Accumulate percpu counters onto one cpu's copy - only valid when access
-+ * against any percpu counter is guarded against
-+ */
-+u64 *bch2_acc_percpu_u64s(u64 __percpu *p, unsigned nr)
-+{
-+	u64 *ret;
-+	int cpu;
-+
-+	preempt_disable();
-+	ret = this_cpu_ptr(p);
-+	preempt_enable();
-+
-+	for_each_possible_cpu(cpu) {
-+		u64 *i = per_cpu_ptr(p, cpu);
-+
-+		if (i != ret) {
-+			acc_u64s(ret, i, nr);
-+			memset(i, 0, nr * sizeof(u64));
-+		}
-+	}
-+
-+	return ret;
-+}
-diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
-new file mode 100644
-index 000000000000..f48c6380684f
---- /dev/null
-+++ b/fs/bcachefs/util.h
-@@ -0,0 +1,761 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_UTIL_H
-+#define _BCACHEFS_UTIL_H
-+
-+#include <linux/bio.h>
-+#include <linux/blkdev.h>
-+#include <linux/closure.h>
-+#include <linux/errno.h>
-+#include <linux/freezer.h>
-+#include <linux/kernel.h>
-+#include <linux/sched/clock.h>
-+#include <linux/llist.h>
-+#include <linux/log2.h>
-+#include <linux/percpu.h>
-+#include <linux/preempt.h>
-+#include <linux/ratelimit.h>
-+#include <linux/slab.h>
-+#include <linux/vmalloc.h>
-+#include <linux/workqueue.h>
-+
-+#define PAGE_SECTOR_SHIFT	(PAGE_SHIFT - 9)
-+#define PAGE_SECTORS		(1UL << PAGE_SECTOR_SHIFT)
-+
-+struct closure;
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+
-+#define EBUG_ON(cond)		BUG_ON(cond)
-+#define atomic_dec_bug(v)	BUG_ON(atomic_dec_return(v) < 0)
-+#define atomic_inc_bug(v, i)	BUG_ON(atomic_inc_return(v) <= i)
-+#define atomic_sub_bug(i, v)	BUG_ON(atomic_sub_return(i, v) < 0)
-+#define atomic_add_bug(i, v)	BUG_ON(atomic_add_return(i, v) < 0)
-+#define atomic_long_dec_bug(v)		BUG_ON(atomic_long_dec_return(v) < 0)
-+#define atomic_long_sub_bug(i, v)	BUG_ON(atomic_long_sub_return(i, v) < 0)
-+#define atomic64_dec_bug(v)	BUG_ON(atomic64_dec_return(v) < 0)
-+#define atomic64_inc_bug(v, i)	BUG_ON(atomic64_inc_return(v) <= i)
-+#define atomic64_sub_bug(i, v)	BUG_ON(atomic64_sub_return(i, v) < 0)
-+#define atomic64_add_bug(i, v)	BUG_ON(atomic64_add_return(i, v) < 0)
-+
-+#define memcpy(dst, src, len)						\
-+({									\
-+	void *_dst = (dst);						\
-+	const void *_src = (src);					\
-+	size_t _len = (len);						\
-+									\
-+	BUG_ON(!((void *) (_dst) >= (void *) (_src) + (_len) ||		\
-+		 (void *) (_dst) + (_len) <= (void *) (_src)));		\
-+	memcpy(_dst, _src, _len);					\
-+})
-+
-+#else /* DEBUG */
-+
-+#define EBUG_ON(cond)
-+#define atomic_dec_bug(v)	atomic_dec(v)
-+#define atomic_inc_bug(v, i)	atomic_inc(v)
-+#define atomic_sub_bug(i, v)	atomic_sub(i, v)
-+#define atomic_add_bug(i, v)	atomic_add(i, v)
-+#define atomic_long_dec_bug(v)		atomic_long_dec(v)
-+#define atomic_long_sub_bug(i, v)	atomic_long_sub(i, v)
-+#define atomic64_dec_bug(v)	atomic64_dec(v)
-+#define atomic64_inc_bug(v, i)	atomic64_inc(v)
-+#define atomic64_sub_bug(i, v)	atomic64_sub(i, v)
-+#define atomic64_add_bug(i, v)	atomic64_add(i, v)
-+
-+#endif
-+
-+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-+#define CPU_BIG_ENDIAN		0
-+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-+#define CPU_BIG_ENDIAN		1
-+#endif
-+
-+/* type hackery */
-+
-+#define type_is_exact(_val, _type)					\
-+	__builtin_types_compatible_p(typeof(_val), _type)
-+
-+#define type_is(_val, _type)						\
-+	(__builtin_types_compatible_p(typeof(_val), _type) ||		\
-+	 __builtin_types_compatible_p(typeof(_val), const _type))
-+
-+/* Userspace doesn't align allocations as nicely as the kernel allocators: */
-+static inline size_t buf_pages(void *p, size_t len)
-+{
-+	return DIV_ROUND_UP(len +
-+			    ((unsigned long) p & (PAGE_SIZE - 1)),
-+			    PAGE_SIZE);
-+}
-+
-+static inline void vpfree(void *p, size_t size)
-+{
-+	if (is_vmalloc_addr(p))
-+		vfree(p);
-+	else
-+		free_pages((unsigned long) p, get_order(size));
-+}
-+
-+static inline void *vpmalloc(size_t size, gfp_t gfp_mask)
-+{
-+	return (void *) __get_free_pages(gfp_mask|__GFP_NOWARN,
-+					 get_order(size)) ?:
-+		__vmalloc(size, gfp_mask);
-+}
-+
-+static inline void kvpfree(void *p, size_t size)
-+{
-+	if (size < PAGE_SIZE)
-+		kfree(p);
-+	else
-+		vpfree(p, size);
-+}
-+
-+static inline void *kvpmalloc(size_t size, gfp_t gfp_mask)
-+{
-+	return size < PAGE_SIZE
-+		? kmalloc(size, gfp_mask)
-+		: vpmalloc(size, gfp_mask);
-+}
-+
-+int mempool_init_kvpmalloc_pool(mempool_t *, int, size_t);
-+
-+#define HEAP(type)							\
-+struct {								\
-+	size_t size, used;						\
-+	type *data;							\
-+}
-+
-+#define DECLARE_HEAP(type, name) HEAP(type) name
-+
-+#define init_heap(heap, _size, gfp)					\
-+({									\
-+	(heap)->used = 0;						\
-+	(heap)->size = (_size);						\
-+	(heap)->data = kvpmalloc((heap)->size * sizeof((heap)->data[0]),\
-+				 (gfp));				\
-+})
-+
-+#define free_heap(heap)							\
-+do {									\
-+	kvpfree((heap)->data, (heap)->size * sizeof((heap)->data[0]));	\
-+	(heap)->data = NULL;						\
-+} while (0)
-+
-+#define heap_set_backpointer(h, i, _fn)					\
-+do {									\
-+	void (*fn)(typeof(h), size_t) = _fn;				\
-+	if (fn)								\
-+		fn(h, i);						\
-+} while (0)
-+
-+#define heap_swap(h, i, j, set_backpointer)				\
-+do {									\
-+	swap((h)->data[i], (h)->data[j]);				\
-+	heap_set_backpointer(h, i, set_backpointer);			\
-+	heap_set_backpointer(h, j, set_backpointer);			\
-+} while (0)
-+
-+#define heap_peek(h)							\
-+({									\
-+	EBUG_ON(!(h)->used);						\
-+	(h)->data[0];							\
-+})
-+
-+#define heap_full(h)	((h)->used == (h)->size)
-+
-+#define heap_sift_down(h, i, cmp, set_backpointer)			\
-+do {									\
-+	size_t _c, _j = i;						\
-+									\
-+	for (; _j * 2 + 1 < (h)->used; _j = _c) {			\
-+		_c = _j * 2 + 1;					\
-+		if (_c + 1 < (h)->used &&				\
-+		    cmp(h, (h)->data[_c], (h)->data[_c + 1]) >= 0)	\
-+			_c++;						\
-+									\
-+		if (cmp(h, (h)->data[_c], (h)->data[_j]) >= 0)		\
-+			break;						\
-+		heap_swap(h, _c, _j, set_backpointer);			\
-+	}								\
-+} while (0)
-+
-+#define heap_sift_up(h, i, cmp, set_backpointer)			\
-+do {									\
-+	while (i) {							\
-+		size_t p = (i - 1) / 2;					\
-+		if (cmp(h, (h)->data[i], (h)->data[p]) >= 0)		\
-+			break;						\
-+		heap_swap(h, i, p, set_backpointer);			\
-+		i = p;							\
-+	}								\
-+} while (0)
-+
-+#define __heap_add(h, d, cmp, set_backpointer)				\
-+({									\
-+	size_t _i = (h)->used++;					\
-+	(h)->data[_i] = d;						\
-+	heap_set_backpointer(h, _i, set_backpointer);			\
-+									\
-+	heap_sift_up(h, _i, cmp, set_backpointer);			\
-+	_i;								\
-+})
-+
-+#define heap_add(h, d, cmp, set_backpointer)				\
-+({									\
-+	bool _r = !heap_full(h);					\
-+	if (_r)								\
-+		__heap_add(h, d, cmp, set_backpointer);			\
-+	_r;								\
-+})
-+
-+#define heap_add_or_replace(h, new, cmp, set_backpointer)		\
-+do {									\
-+	if (!heap_add(h, new, cmp, set_backpointer) &&			\
-+	    cmp(h, new, heap_peek(h)) >= 0) {				\
-+		(h)->data[0] = new;					\
-+		heap_set_backpointer(h, 0, set_backpointer);		\
-+		heap_sift_down(h, 0, cmp, set_backpointer);		\
-+	}								\
-+} while (0)
-+
-+#define heap_del(h, i, cmp, set_backpointer)				\
-+do {									\
-+	size_t _i = (i);						\
-+									\
-+	BUG_ON(_i >= (h)->used);					\
-+	(h)->used--;							\
-+	heap_swap(h, _i, (h)->used, set_backpointer);			\
-+	heap_sift_up(h, _i, cmp, set_backpointer);			\
-+	heap_sift_down(h, _i, cmp, set_backpointer);			\
-+} while (0)
-+
-+#define heap_pop(h, d, cmp, set_backpointer)				\
-+({									\
-+	bool _r = (h)->used;						\
-+	if (_r) {							\
-+		(d) = (h)->data[0];					\
-+		heap_del(h, 0, cmp, set_backpointer);			\
-+	}								\
-+	_r;								\
-+})
-+
-+#define heap_resort(heap, cmp, set_backpointer)				\
-+do {									\
-+	ssize_t _i;							\
-+	for (_i = (ssize_t) (heap)->used / 2 -  1; _i >= 0; --_i)	\
-+		heap_sift_down(heap, _i, cmp, set_backpointer);		\
-+} while (0)
-+
-+#define ANYSINT_MAX(t)							\
-+	((((t) 1 << (sizeof(t) * 8 - 2)) - (t) 1) * (t) 2 + (t) 1)
-+
-+struct printbuf {
-+	char		*pos;
-+	char		*end;
-+};
-+
-+static inline size_t printbuf_remaining(struct printbuf *buf)
-+{
-+	return buf->end - buf->pos;
-+}
-+
-+#define _PBUF(_buf, _len)						\
-+	((struct printbuf) {						\
-+		.pos	= _buf,						\
-+		.end	= _buf + _len,					\
-+	})
-+
-+#define PBUF(_buf) _PBUF(_buf, sizeof(_buf))
-+
-+#define pr_buf(_out, ...)						\
-+do {									\
-+	(_out)->pos += scnprintf((_out)->pos, printbuf_remaining(_out),	\
-+				 __VA_ARGS__);				\
-+} while (0)
-+
-+void bch_scnmemcpy(struct printbuf *, const char *, size_t);
-+
-+int bch2_strtoint_h(const char *, int *);
-+int bch2_strtouint_h(const char *, unsigned int *);
-+int bch2_strtoll_h(const char *, long long *);
-+int bch2_strtoull_h(const char *, unsigned long long *);
-+int bch2_strtou64_h(const char *, u64 *);
-+
-+static inline int bch2_strtol_h(const char *cp, long *res)
-+{
-+#if BITS_PER_LONG == 32
-+	return bch2_strtoint_h(cp, (int *) res);
-+#else
-+	return bch2_strtoll_h(cp, (long long *) res);
-+#endif
-+}
-+
-+static inline int bch2_strtoul_h(const char *cp, long *res)
-+{
-+#if BITS_PER_LONG == 32
-+	return bch2_strtouint_h(cp, (unsigned int *) res);
-+#else
-+	return bch2_strtoull_h(cp, (unsigned long long *) res);
-+#endif
-+}
-+
-+#define strtoi_h(cp, res)						\
-+	( type_is(*res, int)		? bch2_strtoint_h(cp, (void *) res)\
-+	: type_is(*res, long)		? bch2_strtol_h(cp, (void *) res)\
-+	: type_is(*res, long long)	? bch2_strtoll_h(cp, (void *) res)\
-+	: type_is(*res, unsigned)	? bch2_strtouint_h(cp, (void *) res)\
-+	: type_is(*res, unsigned long)	? bch2_strtoul_h(cp, (void *) res)\
-+	: type_is(*res, unsigned long long) ? bch2_strtoull_h(cp, (void *) res)\
-+	: -EINVAL)
-+
-+#define strtoul_safe(cp, var)						\
-+({									\
-+	unsigned long _v;						\
-+	int _r = kstrtoul(cp, 10, &_v);					\
-+	if (!_r)							\
-+		var = _v;						\
-+	_r;								\
-+})
-+
-+#define strtoul_safe_clamp(cp, var, min, max)				\
-+({									\
-+	unsigned long _v;						\
-+	int _r = kstrtoul(cp, 10, &_v);					\
-+	if (!_r)							\
-+		var = clamp_t(typeof(var), _v, min, max);		\
-+	_r;								\
-+})
-+
-+#define strtoul_safe_restrict(cp, var, min, max)			\
-+({									\
-+	unsigned long _v;						\
-+	int _r = kstrtoul(cp, 10, &_v);					\
-+	if (!_r && _v >= min && _v <= max)				\
-+		var = _v;						\
-+	else								\
-+		_r = -EINVAL;						\
-+	_r;								\
-+})
-+
-+#define snprint(buf, size, var)						\
-+	snprintf(buf, size,						\
-+		   type_is(var, int)		? "%i\n"		\
-+		 : type_is(var, unsigned)	? "%u\n"		\
-+		 : type_is(var, long)		? "%li\n"		\
-+		 : type_is(var, unsigned long)	? "%lu\n"		\
-+		 : type_is(var, s64)		? "%lli\n"		\
-+		 : type_is(var, u64)		? "%llu\n"		\
-+		 : type_is(var, char *)		? "%s\n"		\
-+		 : "%i\n", var)
-+
-+void bch2_hprint(struct printbuf *, s64);
-+
-+bool bch2_is_zero(const void *, size_t);
-+
-+void bch2_string_opt_to_text(struct printbuf *,
-+			     const char * const [], size_t);
-+
-+void bch2_flags_to_text(struct printbuf *, const char * const[], u64);
-+u64 bch2_read_flag_list(char *, const char * const[]);
-+
-+#define NR_QUANTILES	15
-+#define QUANTILE_IDX(i)	inorder_to_eytzinger0(i, NR_QUANTILES)
-+#define QUANTILE_FIRST	eytzinger0_first(NR_QUANTILES)
-+#define QUANTILE_LAST	eytzinger0_last(NR_QUANTILES)
-+
-+struct quantiles {
-+	struct quantile_entry {
-+		u64	m;
-+		u64	step;
-+	}		entries[NR_QUANTILES];
-+};
-+
-+struct time_stat_buffer {
-+	unsigned	nr;
-+	struct time_stat_buffer_entry {
-+		u64	start;
-+		u64	end;
-+	}		entries[32];
-+};
-+
-+struct time_stats {
-+	spinlock_t	lock;
-+	u64		count;
-+	/* all fields are in nanoseconds */
-+	u64		average_duration;
-+	u64		average_frequency;
-+	u64		max_duration;
-+	u64		last_event;
-+	struct quantiles quantiles;
-+
-+	struct time_stat_buffer __percpu *buffer;
-+};
-+
-+void __bch2_time_stats_update(struct time_stats *stats, u64, u64);
-+
-+static inline void bch2_time_stats_update(struct time_stats *stats, u64 start)
-+{
-+	__bch2_time_stats_update(stats, start, local_clock());
-+}
-+
-+void bch2_time_stats_to_text(struct printbuf *, struct time_stats *);
-+
-+void bch2_time_stats_exit(struct time_stats *);
-+void bch2_time_stats_init(struct time_stats *);
-+
-+#define ewma_add(ewma, val, weight)					\
-+({									\
-+	typeof(ewma) _ewma = (ewma);					\
-+	typeof(weight) _weight = (weight);				\
-+									\
-+	(((_ewma << _weight) - _ewma) + (val)) >> _weight;		\
-+})
-+
-+struct bch_ratelimit {
-+	/* Next time we want to do some work, in nanoseconds */
-+	u64			next;
-+
-+	/*
-+	 * Rate at which we want to do work, in units per nanosecond
-+	 * The units here correspond to the units passed to
-+	 * bch2_ratelimit_increment()
-+	 */
-+	unsigned		rate;
-+};
-+
-+static inline void bch2_ratelimit_reset(struct bch_ratelimit *d)
-+{
-+	d->next = local_clock();
-+}
-+
-+u64 bch2_ratelimit_delay(struct bch_ratelimit *);
-+void bch2_ratelimit_increment(struct bch_ratelimit *, u64);
-+
-+struct bch_pd_controller {
-+	struct bch_ratelimit	rate;
-+	unsigned long		last_update;
-+
-+	s64			last_actual;
-+	s64			smoothed_derivative;
-+
-+	unsigned		p_term_inverse;
-+	unsigned		d_smooth;
-+	unsigned		d_term;
-+
-+	/* for exporting to sysfs (no effect on behavior) */
-+	s64			last_derivative;
-+	s64			last_proportional;
-+	s64			last_change;
-+	s64			last_target;
-+
-+	/* If true, the rate will not increase if bch2_ratelimit_delay()
-+	 * is not being called often enough. */
-+	bool			backpressure;
-+};
-+
-+void bch2_pd_controller_update(struct bch_pd_controller *, s64, s64, int);
-+void bch2_pd_controller_init(struct bch_pd_controller *);
-+size_t bch2_pd_controller_print_debug(struct bch_pd_controller *, char *);
-+
-+#define sysfs_pd_controller_attribute(name)				\
-+	rw_attribute(name##_rate);					\
-+	rw_attribute(name##_rate_bytes);				\
-+	rw_attribute(name##_rate_d_term);				\
-+	rw_attribute(name##_rate_p_term_inverse);			\
-+	read_attribute(name##_rate_debug)
-+
-+#define sysfs_pd_controller_files(name)					\
-+	&sysfs_##name##_rate,						\
-+	&sysfs_##name##_rate_bytes,					\
-+	&sysfs_##name##_rate_d_term,					\
-+	&sysfs_##name##_rate_p_term_inverse,				\
-+	&sysfs_##name##_rate_debug
-+
-+#define sysfs_pd_controller_show(name, var)				\
-+do {									\
-+	sysfs_hprint(name##_rate,		(var)->rate.rate);	\
-+	sysfs_print(name##_rate_bytes,		(var)->rate.rate);	\
-+	sysfs_print(name##_rate_d_term,		(var)->d_term);		\
-+	sysfs_print(name##_rate_p_term_inverse,	(var)->p_term_inverse);	\
-+									\
-+	if (attr == &sysfs_##name##_rate_debug)				\
-+		return bch2_pd_controller_print_debug(var, buf);		\
-+} while (0)
-+
-+#define sysfs_pd_controller_store(name, var)				\
-+do {									\
-+	sysfs_strtoul_clamp(name##_rate,				\
-+			    (var)->rate.rate, 1, UINT_MAX);		\
-+	sysfs_strtoul_clamp(name##_rate_bytes,				\
-+			    (var)->rate.rate, 1, UINT_MAX);		\
-+	sysfs_strtoul(name##_rate_d_term,	(var)->d_term);		\
-+	sysfs_strtoul_clamp(name##_rate_p_term_inverse,			\
-+			    (var)->p_term_inverse, 1, INT_MAX);		\
-+} while (0)
-+
-+#define container_of_or_null(ptr, type, member)				\
-+({									\
-+	typeof(ptr) _ptr = ptr;						\
-+	_ptr ? container_of(_ptr, type, member) : NULL;			\
-+})
-+
-+/* Does linear interpolation between powers of two */
-+static inline unsigned fract_exp_two(unsigned x, unsigned fract_bits)
-+{
-+	unsigned fract = x & ~(~0 << fract_bits);
-+
-+	x >>= fract_bits;
-+	x   = 1 << x;
-+	x  += (x * fract) >> fract_bits;
-+
-+	return x;
-+}
-+
-+void bch2_bio_map(struct bio *bio, void *base, size_t);
-+int bch2_bio_alloc_pages(struct bio *, size_t, gfp_t);
-+
-+static inline sector_t bdev_sectors(struct block_device *bdev)
-+{
-+	return bdev->bd_inode->i_size >> 9;
-+}
-+
-+#define closure_bio_submit(bio, cl)					\
-+do {									\
-+	closure_get(cl);						\
-+	submit_bio(bio);						\
-+} while (0)
-+
-+#define kthread_wait_freezable(cond)					\
-+({									\
-+	int _ret = 0;							\
-+	while (1) {							\
-+		set_current_state(TASK_INTERRUPTIBLE);			\
-+		if (kthread_should_stop()) {				\
-+			_ret = -1;					\
-+			break;						\
-+		}							\
-+									\
-+		if (cond)						\
-+			break;						\
-+									\
-+		schedule();						\
-+		try_to_freeze();					\
-+	}								\
-+	set_current_state(TASK_RUNNING);				\
-+	_ret;								\
-+})
-+
-+size_t bch2_rand_range(size_t);
-+
-+void memcpy_to_bio(struct bio *, struct bvec_iter, const void *);
-+void memcpy_from_bio(void *, struct bio *, struct bvec_iter);
-+
-+static inline void memcpy_u64s_small(void *dst, const void *src,
-+				     unsigned u64s)
-+{
-+	u64 *d = dst;
-+	const u64 *s = src;
-+
-+	while (u64s--)
-+		*d++ = *s++;
-+}
-+
-+static inline void __memcpy_u64s(void *dst, const void *src,
-+				 unsigned u64s)
-+{
-+#ifdef CONFIG_X86_64
-+	long d0, d1, d2;
-+	asm volatile("rep ; movsq"
-+		     : "=&c" (d0), "=&D" (d1), "=&S" (d2)
-+		     : "0" (u64s), "1" (dst), "2" (src)
-+		     : "memory");
-+#else
-+	u64 *d = dst;
-+	const u64 *s = src;
-+
-+	while (u64s--)
-+		*d++ = *s++;
-+#endif
-+}
-+
-+static inline void memcpy_u64s(void *dst, const void *src,
-+			       unsigned u64s)
-+{
-+	EBUG_ON(!(dst >= src + u64s * sizeof(u64) ||
-+		 dst + u64s * sizeof(u64) <= src));
-+
-+	__memcpy_u64s(dst, src, u64s);
-+}
-+
-+static inline void __memmove_u64s_down(void *dst, const void *src,
-+				       unsigned u64s)
-+{
-+	__memcpy_u64s(dst, src, u64s);
-+}
-+
-+static inline void memmove_u64s_down(void *dst, const void *src,
-+				     unsigned u64s)
-+{
-+	EBUG_ON(dst > src);
-+
-+	__memmove_u64s_down(dst, src, u64s);
-+}
-+
-+static inline void __memmove_u64s_up_small(void *_dst, const void *_src,
-+					   unsigned u64s)
-+{
-+	u64 *dst = (u64 *) _dst + u64s;
-+	u64 *src = (u64 *) _src + u64s;
-+
-+	while (u64s--)
-+		*--dst = *--src;
-+}
-+
-+static inline void memmove_u64s_up_small(void *dst, const void *src,
-+					 unsigned u64s)
-+{
-+	EBUG_ON(dst < src);
-+
-+	__memmove_u64s_up_small(dst, src, u64s);
-+}
-+
-+static inline void __memmove_u64s_up(void *_dst, const void *_src,
-+				     unsigned u64s)
-+{
-+	u64 *dst = (u64 *) _dst + u64s - 1;
-+	u64 *src = (u64 *) _src + u64s - 1;
-+
-+#ifdef CONFIG_X86_64
-+	long d0, d1, d2;
-+	asm volatile("std ;\n"
-+		     "rep ; movsq\n"
-+		     "cld ;\n"
-+		     : "=&c" (d0), "=&D" (d1), "=&S" (d2)
-+		     : "0" (u64s), "1" (dst), "2" (src)
-+		     : "memory");
-+#else
-+	while (u64s--)
-+		*dst-- = *src--;
-+#endif
-+}
-+
-+static inline void memmove_u64s_up(void *dst, const void *src,
-+				   unsigned u64s)
-+{
-+	EBUG_ON(dst < src);
-+
-+	__memmove_u64s_up(dst, src, u64s);
-+}
-+
-+static inline void memmove_u64s(void *dst, const void *src,
-+				unsigned u64s)
-+{
-+	if (dst < src)
-+		__memmove_u64s_down(dst, src, u64s);
-+	else
-+		__memmove_u64s_up(dst, src, u64s);
-+}
-+
-+/* Set the last few bytes up to a u64 boundary given an offset into a buffer. */
-+static inline void memset_u64s_tail(void *s, int c, unsigned bytes)
-+{
-+	unsigned rem = round_up(bytes, sizeof(u64)) - bytes;
-+
-+	memset(s + bytes, c, rem);
-+}
-+
-+void sort_cmp_size(void *base, size_t num, size_t size,
-+	  int (*cmp_func)(const void *, const void *, size_t),
-+	  void (*swap_func)(void *, void *, size_t));
-+
-+/* just the memmove, doesn't update @_nr */
-+#define __array_insert_item(_array, _nr, _pos)				\
-+	memmove(&(_array)[(_pos) + 1],					\
-+		&(_array)[(_pos)],					\
-+		sizeof((_array)[0]) * ((_nr) - (_pos)))
-+
-+#define array_insert_item(_array, _nr, _pos, _new_item)			\
-+do {									\
-+	__array_insert_item(_array, _nr, _pos);				\
-+	(_nr)++;							\
-+	(_array)[(_pos)] = (_new_item);					\
-+} while (0)
-+
-+#define array_remove_items(_array, _nr, _pos, _nr_to_remove)		\
-+do {									\
-+	(_nr) -= (_nr_to_remove);					\
-+	memmove(&(_array)[(_pos)],					\
-+		&(_array)[(_pos) + (_nr_to_remove)],			\
-+		sizeof((_array)[0]) * ((_nr) - (_pos)));		\
-+} while (0)
-+
-+#define array_remove_item(_array, _nr, _pos)				\
-+	array_remove_items(_array, _nr, _pos, 1)
-+
-+#define bubble_sort(_base, _nr, _cmp)					\
-+do {									\
-+	ssize_t _i, _end;						\
-+	bool _swapped = true;						\
-+									\
-+	for (_end = (ssize_t) (_nr) - 1; _end > 0 && _swapped; --_end) {\
-+		_swapped = false;					\
-+		for (_i = 0; _i < _end; _i++)				\
-+			if (_cmp((_base)[_i], (_base)[_i + 1]) > 0) {	\
-+				swap((_base)[_i], (_base)[_i + 1]);	\
-+				_swapped = true;			\
-+			}						\
-+	}								\
-+} while (0)
-+
-+static inline u64 percpu_u64_get(u64 __percpu *src)
-+{
-+	u64 ret = 0;
-+	int cpu;
-+
-+	for_each_possible_cpu(cpu)
-+		ret += *per_cpu_ptr(src, cpu);
-+	return ret;
-+}
-+
-+static inline void percpu_u64_set(u64 __percpu *dst, u64 src)
-+{
-+	int cpu;
-+
-+	for_each_possible_cpu(cpu)
-+		*per_cpu_ptr(dst, cpu) = 0;
-+
-+	preempt_disable();
-+	*this_cpu_ptr(dst) = src;
-+	preempt_enable();
-+}
-+
-+static inline void acc_u64s(u64 *acc, const u64 *src, unsigned nr)
-+{
-+	unsigned i;
-+
-+	for (i = 0; i < nr; i++)
-+		acc[i] += src[i];
-+}
-+
-+static inline void acc_u64s_percpu(u64 *acc, const u64 __percpu *src,
-+				   unsigned nr)
-+{
-+	int cpu;
-+
-+	for_each_possible_cpu(cpu)
-+		acc_u64s(acc, per_cpu_ptr(src, cpu), nr);
-+}
-+
-+static inline void percpu_memset(void __percpu *p, int c, size_t bytes)
-+{
-+	int cpu;
-+
-+	for_each_possible_cpu(cpu)
-+		memset(per_cpu_ptr(p, cpu), c, bytes);
-+}
-+
-+u64 *bch2_acc_percpu_u64s(u64 __percpu *, unsigned);
-+
-+#define cmp_int(l, r)		((l > r) - (l < r))
-+
-+#endif /* _BCACHEFS_UTIL_H */
-diff --git a/fs/bcachefs/vstructs.h b/fs/bcachefs/vstructs.h
-new file mode 100644
-index 000000000000..c099cdc0605f
---- /dev/null
-+++ b/fs/bcachefs/vstructs.h
-@@ -0,0 +1,63 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _VSTRUCTS_H
-+#define _VSTRUCTS_H
-+
-+#include "util.h"
-+
-+/*
-+ * NOTE: we can't differentiate between __le64 and u64 with type_is - this
-+ * assumes u64 is little endian:
-+ */
-+#define __vstruct_u64s(_s)						\
-+({									\
-+	( type_is((_s)->u64s, u64) ? le64_to_cpu((__force __le64) (_s)->u64s)		\
-+	: type_is((_s)->u64s, u32) ? le32_to_cpu((__force __le32) (_s)->u64s)		\
-+	: type_is((_s)->u64s, u16) ? le16_to_cpu((__force __le16) (_s)->u64s)		\
-+	: ((__force u8) ((_s)->u64s)));						\
-+})
-+
-+#define __vstruct_bytes(_type, _u64s)					\
-+({									\
-+	BUILD_BUG_ON(offsetof(_type, _data) % sizeof(u64));		\
-+									\
-+	(offsetof(_type, _data) + (_u64s) * sizeof(u64));		\
-+})
-+
-+#define vstruct_bytes(_s)						\
-+	__vstruct_bytes(typeof(*(_s)), __vstruct_u64s(_s))
-+
-+#define __vstruct_blocks(_type, _sector_block_bits, _u64s)		\
-+	(round_up(__vstruct_bytes(_type, _u64s),			\
-+		  512 << (_sector_block_bits)) >> (9 + (_sector_block_bits)))
-+
-+#define vstruct_blocks(_s, _sector_block_bits)				\
-+	__vstruct_blocks(typeof(*(_s)), _sector_block_bits, __vstruct_u64s(_s))
-+
-+#define vstruct_blocks_plus(_s, _sector_block_bits, _u64s)		\
-+	__vstruct_blocks(typeof(*(_s)), _sector_block_bits,		\
-+			 __vstruct_u64s(_s) + (_u64s))
-+
-+#define vstruct_sectors(_s, _sector_block_bits)				\
-+	(round_up(vstruct_bytes(_s), 512 << (_sector_block_bits)) >> 9)
-+
-+#define vstruct_next(_s)						\
-+	((typeof(_s))			((_s)->_data + __vstruct_u64s(_s)))
-+#define vstruct_last(_s)						\
-+	((typeof(&(_s)->start[0]))	((_s)->_data + __vstruct_u64s(_s)))
-+#define vstruct_end(_s)							\
-+	((void *)			((_s)->_data + __vstruct_u64s(_s)))
-+
-+#define vstruct_for_each(_s, _i)					\
-+	for (_i = (_s)->start;						\
-+	     _i < vstruct_last(_s);					\
-+	     _i = vstruct_next(_i))
-+
-+#define vstruct_for_each_safe(_s, _i, _t)				\
-+	for (_i = (_s)->start;						\
-+	     _i < vstruct_last(_s) && (_t = vstruct_next(_i), true);	\
-+	     _i = _t)
-+
-+#define vstruct_idx(_s, _idx)						\
-+	((typeof(&(_s)->start[0])) ((_s)->_data + (_idx)))
-+
-+#endif /* _VSTRUCTS_H */
-diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c
-new file mode 100644
-index 000000000000..21f64cb7e402
---- /dev/null
-+++ b/fs/bcachefs/xattr.c
-@@ -0,0 +1,586 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "bkey_methods.h"
-+#include "btree_update.h"
-+#include "extents.h"
-+#include "fs.h"
-+#include "rebalance.h"
-+#include "str_hash.h"
-+#include "xattr.h"
-+
-+#include <linux/dcache.h>
-+#include <linux/posix_acl_xattr.h>
-+#include <linux/xattr.h>
-+
-+static const struct xattr_handler *bch2_xattr_type_to_handler(unsigned);
-+
-+static u64 bch2_xattr_hash(const struct bch_hash_info *info,
-+			  const struct xattr_search_key *key)
-+{
-+	struct bch_str_hash_ctx ctx;
-+
-+	bch2_str_hash_init(&ctx, info);
-+	bch2_str_hash_update(&ctx, info, &key->type, sizeof(key->type));
-+	bch2_str_hash_update(&ctx, info, key->name.name, key->name.len);
-+
-+	return bch2_str_hash_end(&ctx, info);
-+}
-+
-+static u64 xattr_hash_key(const struct bch_hash_info *info, const void *key)
-+{
-+	return bch2_xattr_hash(info, key);
-+}
-+
-+static u64 xattr_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k)
-+{
-+	struct bkey_s_c_xattr x = bkey_s_c_to_xattr(k);
-+
-+	return bch2_xattr_hash(info,
-+		 &X_SEARCH(x.v->x_type, x.v->x_name, x.v->x_name_len));
-+}
-+
-+static bool xattr_cmp_key(struct bkey_s_c _l, const void *_r)
-+{
-+	struct bkey_s_c_xattr l = bkey_s_c_to_xattr(_l);
-+	const struct xattr_search_key *r = _r;
-+
-+	return l.v->x_type != r->type ||
-+		l.v->x_name_len != r->name.len ||
-+		memcmp(l.v->x_name, r->name.name, r->name.len);
-+}
-+
-+static bool xattr_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r)
-+{
-+	struct bkey_s_c_xattr l = bkey_s_c_to_xattr(_l);
-+	struct bkey_s_c_xattr r = bkey_s_c_to_xattr(_r);
-+
-+	return l.v->x_type != r.v->x_type ||
-+		l.v->x_name_len != r.v->x_name_len ||
-+		memcmp(l.v->x_name, r.v->x_name, r.v->x_name_len);
-+}
-+
-+const struct bch_hash_desc bch2_xattr_hash_desc = {
-+	.btree_id	= BTREE_ID_XATTRS,
-+	.key_type	= KEY_TYPE_xattr,
-+	.hash_key	= xattr_hash_key,
-+	.hash_bkey	= xattr_hash_bkey,
-+	.cmp_key	= xattr_cmp_key,
-+	.cmp_bkey	= xattr_cmp_bkey,
-+};
-+
-+const char *bch2_xattr_invalid(const struct bch_fs *c, struct bkey_s_c k)
-+{
-+	const struct xattr_handler *handler;
-+	struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k);
-+
-+	if (bkey_val_bytes(k.k) < sizeof(struct bch_xattr))
-+		return "value too small";
-+
-+	if (bkey_val_u64s(k.k) <
-+	    xattr_val_u64s(xattr.v->x_name_len,
-+			   le16_to_cpu(xattr.v->x_val_len)))
-+		return "value too small";
-+
-+	if (bkey_val_u64s(k.k) >
-+	    xattr_val_u64s(xattr.v->x_name_len,
-+			   le16_to_cpu(xattr.v->x_val_len) + 4))
-+		return "value too big";
-+
-+	handler = bch2_xattr_type_to_handler(xattr.v->x_type);
-+	if (!handler)
-+		return "invalid type";
-+
-+	if (memchr(xattr.v->x_name, '\0', xattr.v->x_name_len))
-+		return "xattr name has invalid characters";
-+
-+	return NULL;
-+}
-+
-+void bch2_xattr_to_text(struct printbuf *out, struct bch_fs *c,
-+			struct bkey_s_c k)
-+{
-+	const struct xattr_handler *handler;
-+	struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k);
-+
-+	handler = bch2_xattr_type_to_handler(xattr.v->x_type);
-+	if (handler && handler->prefix)
-+		pr_buf(out, "%s", handler->prefix);
-+	else if (handler)
-+		pr_buf(out, "(type %u)", xattr.v->x_type);
-+	else
-+		pr_buf(out, "(unknown type %u)", xattr.v->x_type);
-+
-+	bch_scnmemcpy(out, xattr.v->x_name,
-+		      xattr.v->x_name_len);
-+	pr_buf(out, ":");
-+	bch_scnmemcpy(out, xattr_val(xattr.v),
-+		      le16_to_cpu(xattr.v->x_val_len));
-+}
-+
-+int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode,
-+		   const char *name, void *buffer, size_t size, int type)
-+{
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c_xattr xattr;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	iter = bch2_hash_lookup(&trans, bch2_xattr_hash_desc,
-+				&inode->ei_str_hash, inode->v.i_ino,
-+				&X_SEARCH(type, name, strlen(name)),
-+				0);
-+	if (IS_ERR(iter)) {
-+		bch2_trans_exit(&trans);
-+		BUG_ON(PTR_ERR(iter) == -EINTR);
-+
-+		return PTR_ERR(iter) == -ENOENT ? -ENODATA : PTR_ERR(iter);
-+	}
-+
-+	xattr = bkey_s_c_to_xattr(bch2_btree_iter_peek_slot(iter));
-+	ret = le16_to_cpu(xattr.v->x_val_len);
-+	if (buffer) {
-+		if (ret > size)
-+			ret = -ERANGE;
-+		else
-+			memcpy(buffer, xattr_val(xattr.v), ret);
-+	}
-+
-+	bch2_trans_exit(&trans);
-+	return ret;
-+}
-+
-+int bch2_xattr_set(struct btree_trans *trans, u64 inum,
-+		   const struct bch_hash_info *hash_info,
-+		   const char *name, const void *value, size_t size,
-+		   int type, int flags)
-+{
-+	int ret;
-+
-+	if (value) {
-+		struct bkey_i_xattr *xattr;
-+		unsigned namelen = strlen(name);
-+		unsigned u64s = BKEY_U64s +
-+			xattr_val_u64s(namelen, size);
-+
-+		if (u64s > U8_MAX)
-+			return -ERANGE;
-+
-+		xattr = bch2_trans_kmalloc(trans, u64s * sizeof(u64));
-+		if (IS_ERR(xattr))
-+			return PTR_ERR(xattr);
-+
-+		bkey_xattr_init(&xattr->k_i);
-+		xattr->k.u64s		= u64s;
-+		xattr->v.x_type		= type;
-+		xattr->v.x_name_len	= namelen;
-+		xattr->v.x_val_len	= cpu_to_le16(size);
-+		memcpy(xattr->v.x_name, name, namelen);
-+		memcpy(xattr_val(&xattr->v), value, size);
-+
-+		ret = bch2_hash_set(trans, bch2_xattr_hash_desc, hash_info,
-+			      inum, &xattr->k_i,
-+			      (flags & XATTR_CREATE ? BCH_HASH_SET_MUST_CREATE : 0)|
-+			      (flags & XATTR_REPLACE ? BCH_HASH_SET_MUST_REPLACE : 0));
-+	} else {
-+		struct xattr_search_key search =
-+			X_SEARCH(type, name, strlen(name));
-+
-+		ret = bch2_hash_delete(trans, bch2_xattr_hash_desc,
-+				       hash_info, inum, &search);
-+	}
-+
-+	if (ret == -ENOENT)
-+		ret = flags & XATTR_REPLACE ? -ENODATA : 0;
-+
-+	return ret;
-+}
-+
-+struct xattr_buf {
-+	char		*buf;
-+	size_t		len;
-+	size_t		used;
-+};
-+
-+static int __bch2_xattr_emit(const char *prefix,
-+			     const char *name, size_t name_len,
-+			     struct xattr_buf *buf)
-+{
-+	const size_t prefix_len = strlen(prefix);
-+	const size_t total_len = prefix_len + name_len + 1;
-+
-+	if (buf->buf) {
-+		if (buf->used + total_len > buf->len)
-+			return -ERANGE;
-+
-+		memcpy(buf->buf + buf->used, prefix, prefix_len);
-+		memcpy(buf->buf + buf->used + prefix_len,
-+		       name, name_len);
-+		buf->buf[buf->used + prefix_len + name_len] = '\0';
-+	}
-+
-+	buf->used += total_len;
-+	return 0;
-+}
-+
-+static int bch2_xattr_emit(struct dentry *dentry,
-+			    const struct bch_xattr *xattr,
-+			    struct xattr_buf *buf)
-+{
-+	const struct xattr_handler *handler =
-+		bch2_xattr_type_to_handler(xattr->x_type);
-+
-+	return handler && (!handler->list || handler->list(dentry))
-+		? __bch2_xattr_emit(handler->prefix ?: handler->name,
-+				    xattr->x_name, xattr->x_name_len, buf)
-+		: 0;
-+}
-+
-+static int bch2_xattr_list_bcachefs(struct bch_fs *c,
-+				    struct bch_inode_info *inode,
-+				    struct xattr_buf *buf,
-+				    bool all)
-+{
-+	const char *prefix = all ? "bcachefs_effective." : "bcachefs.";
-+	unsigned id;
-+	int ret = 0;
-+	u64 v;
-+
-+	for (id = 0; id < Inode_opt_nr; id++) {
-+		v = bch2_inode_opt_get(&inode->ei_inode, id);
-+		if (!v)
-+			continue;
-+
-+		if (!all &&
-+		    !(inode->ei_inode.bi_fields_set & (1 << id)))
-+			continue;
-+
-+		ret = __bch2_xattr_emit(prefix, bch2_inode_opts[id],
-+					strlen(bch2_inode_opts[id]), buf);
-+		if (ret)
-+			break;
-+	}
-+
-+	return ret;
-+}
-+
-+ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
-+{
-+	struct bch_fs *c = dentry->d_sb->s_fs_info;
-+	struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
-+	struct btree_trans trans;
-+	struct btree_iter *iter;
-+	struct bkey_s_c k;
-+	struct xattr_buf buf = { .buf = buffer, .len = buffer_size };
-+	u64 inum = dentry->d_inode->i_ino;
-+	int ret;
-+
-+	bch2_trans_init(&trans, c, 0, 0);
-+
-+	for_each_btree_key(&trans, iter, BTREE_ID_XATTRS,
-+			   POS(inum, 0), 0, k, ret) {
-+		BUG_ON(k.k->p.inode < inum);
-+
-+		if (k.k->p.inode > inum)
-+			break;
-+
-+		if (k.k->type != KEY_TYPE_xattr)
-+			continue;
-+
-+		ret = bch2_xattr_emit(dentry, bkey_s_c_to_xattr(k).v, &buf);
-+		if (ret)
-+			break;
-+	}
-+	ret = bch2_trans_exit(&trans) ?: ret;
-+
-+	if (ret)
-+		return ret;
-+
-+	ret = bch2_xattr_list_bcachefs(c, inode, &buf, false);
-+	if (ret)
-+		return ret;
-+
-+	ret = bch2_xattr_list_bcachefs(c, inode, &buf, true);
-+	if (ret)
-+		return ret;
-+
-+	return buf.used;
-+}
-+
-+static int bch2_xattr_get_handler(const struct xattr_handler *handler,
-+				  struct dentry *dentry, struct inode *vinode,
-+				  const char *name, void *buffer, size_t size)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(vinode);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+
-+	return bch2_xattr_get(c, inode, name, buffer, size, handler->flags);
-+}
-+
-+static int bch2_xattr_set_handler(const struct xattr_handler *handler,
-+				  struct dentry *dentry, struct inode *vinode,
-+				  const char *name, const void *value,
-+				  size_t size, int flags)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(vinode);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+
-+	return bch2_trans_do(c, NULL, &inode->ei_journal_seq, 0,
-+			bch2_xattr_set(&trans, inode->v.i_ino,
-+				       &inode->ei_str_hash,
-+				       name, value, size,
-+				       handler->flags, flags));
-+}
-+
-+static const struct xattr_handler bch_xattr_user_handler = {
-+	.prefix	= XATTR_USER_PREFIX,
-+	.get	= bch2_xattr_get_handler,
-+	.set	= bch2_xattr_set_handler,
-+	.flags	= KEY_TYPE_XATTR_INDEX_USER,
-+};
-+
-+static bool bch2_xattr_trusted_list(struct dentry *dentry)
-+{
-+	return capable(CAP_SYS_ADMIN);
-+}
-+
-+static const struct xattr_handler bch_xattr_trusted_handler = {
-+	.prefix	= XATTR_TRUSTED_PREFIX,
-+	.list	= bch2_xattr_trusted_list,
-+	.get	= bch2_xattr_get_handler,
-+	.set	= bch2_xattr_set_handler,
-+	.flags	= KEY_TYPE_XATTR_INDEX_TRUSTED,
-+};
-+
-+static const struct xattr_handler bch_xattr_security_handler = {
-+	.prefix	= XATTR_SECURITY_PREFIX,
-+	.get	= bch2_xattr_get_handler,
-+	.set	= bch2_xattr_set_handler,
-+	.flags	= KEY_TYPE_XATTR_INDEX_SECURITY,
-+};
-+
-+#ifndef NO_BCACHEFS_FS
-+
-+static int opt_to_inode_opt(int id)
-+{
-+	switch (id) {
-+#define x(name, ...)				\
-+	case Opt_##name: return Inode_opt_##name;
-+	BCH_INODE_OPTS()
-+#undef  x
-+	default:
-+		return -1;
-+	}
-+}
-+
-+static int __bch2_xattr_bcachefs_get(const struct xattr_handler *handler,
-+				struct dentry *dentry, struct inode *vinode,
-+				const char *name, void *buffer, size_t size,
-+				bool all)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(vinode);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	struct bch_opts opts =
-+		bch2_inode_opts_to_opts(bch2_inode_opts_get(&inode->ei_inode));
-+	const struct bch_option *opt;
-+	int id, inode_opt_id;
-+	char buf[512];
-+	struct printbuf out = PBUF(buf);
-+	unsigned val_len;
-+	u64 v;
-+
-+	id = bch2_opt_lookup(name);
-+	if (id < 0 || !bch2_opt_is_inode_opt(id))
-+		return -EINVAL;
-+
-+	inode_opt_id = opt_to_inode_opt(id);
-+	if (inode_opt_id < 0)
-+		return -EINVAL;
-+
-+	opt = bch2_opt_table + id;
-+
-+	if (!bch2_opt_defined_by_id(&opts, id))
-+		return -ENODATA;
-+
-+	if (!all &&
-+	    !(inode->ei_inode.bi_fields_set & (1 << inode_opt_id)))
-+		return -ENODATA;
-+
-+	v = bch2_opt_get_by_id(&opts, id);
-+	bch2_opt_to_text(&out, c, opt, v, 0);
-+
-+	val_len = out.pos - buf;
-+
-+	if (buffer && val_len > size)
-+		return -ERANGE;
-+
-+	if (buffer)
-+		memcpy(buffer, buf, val_len);
-+	return val_len;
-+}
-+
-+static int bch2_xattr_bcachefs_get(const struct xattr_handler *handler,
-+				   struct dentry *dentry, struct inode *vinode,
-+				   const char *name, void *buffer, size_t size)
-+{
-+	return __bch2_xattr_bcachefs_get(handler, dentry, vinode,
-+					 name, buffer, size, false);
-+}
-+
-+struct inode_opt_set {
-+	int			id;
-+	u64			v;
-+	bool			defined;
-+};
-+
-+static int inode_opt_set_fn(struct bch_inode_info *inode,
-+			    struct bch_inode_unpacked *bi,
-+			    void *p)
-+{
-+	struct inode_opt_set *s = p;
-+
-+	if (s->defined)
-+		bi->bi_fields_set |= 1U << s->id;
-+	else
-+		bi->bi_fields_set &= ~(1U << s->id);
-+
-+	bch2_inode_opt_set(bi, s->id, s->v);
-+
-+	return 0;
-+}
-+
-+static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler,
-+				   struct dentry *dentry, struct inode *vinode,
-+				   const char *name, const void *value,
-+				   size_t size, int flags)
-+{
-+	struct bch_inode_info *inode = to_bch_ei(vinode);
-+	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+	const struct bch_option *opt;
-+	char *buf;
-+	struct inode_opt_set s;
-+	int opt_id, inode_opt_id, ret;
-+
-+	opt_id = bch2_opt_lookup(name);
-+	if (opt_id < 0)
-+		return -EINVAL;
-+
-+	opt = bch2_opt_table + opt_id;
-+
-+	inode_opt_id = opt_to_inode_opt(opt_id);
-+	if (inode_opt_id < 0)
-+		return -EINVAL;
-+
-+	s.id = inode_opt_id;
-+
-+	if (value) {
-+		u64 v = 0;
-+
-+		buf = kmalloc(size + 1, GFP_KERNEL);
-+		if (!buf)
-+			return -ENOMEM;
-+		memcpy(buf, value, size);
-+		buf[size] = '\0';
-+
-+		ret = bch2_opt_parse(c, opt, buf, &v);
-+		kfree(buf);
-+
-+		if (ret < 0)
-+			return ret;
-+
-+		ret = bch2_opt_check_may_set(c, opt_id, v);
-+		if (ret < 0)
-+			return ret;
-+
-+		s.v = v + 1;
-+		s.defined = true;
-+	} else {
-+		if (!IS_ROOT(dentry)) {
-+			struct bch_inode_info *dir =
-+				to_bch_ei(d_inode(dentry->d_parent));
-+
-+			s.v = bch2_inode_opt_get(&dir->ei_inode, inode_opt_id);
-+		} else {
-+			s.v = 0;
-+		}
-+
-+		s.defined = false;
-+	}
-+
-+	mutex_lock(&inode->ei_update_lock);
-+	if (inode_opt_id == Inode_opt_project) {
-+		/*
-+		 * inode fields accessible via the xattr interface are stored
-+		 * with a +1 bias, so that 0 means unset:
-+		 */
-+		ret = bch2_set_projid(c, inode, s.v ? s.v - 1 : 0);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	ret = bch2_write_inode(c, inode, inode_opt_set_fn, &s, 0);
-+err:
-+	mutex_unlock(&inode->ei_update_lock);
-+
-+	if (value &&
-+	    (opt_id == Opt_background_compression ||
-+	     opt_id == Opt_background_target))
-+		bch2_rebalance_add_work(c, inode->v.i_blocks);
-+
-+	return ret;
-+}
-+
-+static const struct xattr_handler bch_xattr_bcachefs_handler = {
-+	.prefix	= "bcachefs.",
-+	.get	= bch2_xattr_bcachefs_get,
-+	.set	= bch2_xattr_bcachefs_set,
-+};
-+
-+static int bch2_xattr_bcachefs_get_effective(
-+				const struct xattr_handler *handler,
-+				struct dentry *dentry, struct inode *vinode,
-+				const char *name, void *buffer, size_t size)
-+{
-+	return __bch2_xattr_bcachefs_get(handler, dentry, vinode,
-+					 name, buffer, size, true);
-+}
-+
-+static const struct xattr_handler bch_xattr_bcachefs_effective_handler = {
-+	.prefix	= "bcachefs_effective.",
-+	.get	= bch2_xattr_bcachefs_get_effective,
-+	.set	= bch2_xattr_bcachefs_set,
-+};
-+
-+#endif /* NO_BCACHEFS_FS */
-+
-+const struct xattr_handler *bch2_xattr_handlers[] = {
-+	&bch_xattr_user_handler,
-+	&posix_acl_access_xattr_handler,
-+	&posix_acl_default_xattr_handler,
-+	&bch_xattr_trusted_handler,
-+	&bch_xattr_security_handler,
-+#ifndef NO_BCACHEFS_FS
-+	&bch_xattr_bcachefs_handler,
-+	&bch_xattr_bcachefs_effective_handler,
-+#endif
-+	NULL
-+};
-+
-+static const struct xattr_handler *bch_xattr_handler_map[] = {
-+	[KEY_TYPE_XATTR_INDEX_USER]			= &bch_xattr_user_handler,
-+	[KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS]	=
-+		&posix_acl_access_xattr_handler,
-+	[KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT]	=
-+		&posix_acl_default_xattr_handler,
-+	[KEY_TYPE_XATTR_INDEX_TRUSTED]		= &bch_xattr_trusted_handler,
-+	[KEY_TYPE_XATTR_INDEX_SECURITY]		= &bch_xattr_security_handler,
-+};
-+
-+static const struct xattr_handler *bch2_xattr_type_to_handler(unsigned type)
-+{
-+	return type < ARRAY_SIZE(bch_xattr_handler_map)
-+		? bch_xattr_handler_map[type]
-+		: NULL;
-+}
-diff --git a/fs/bcachefs/xattr.h b/fs/bcachefs/xattr.h
-new file mode 100644
-index 000000000000..4151065ab853
---- /dev/null
-+++ b/fs/bcachefs/xattr.h
-@@ -0,0 +1,49 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_XATTR_H
-+#define _BCACHEFS_XATTR_H
-+
-+#include "str_hash.h"
-+
-+extern const struct bch_hash_desc bch2_xattr_hash_desc;
-+
-+const char *bch2_xattr_invalid(const struct bch_fs *, struct bkey_s_c);
-+void bch2_xattr_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-+
-+#define bch2_bkey_ops_xattr (struct bkey_ops) {		\
-+	.key_invalid	= bch2_xattr_invalid,		\
-+	.val_to_text	= bch2_xattr_to_text,		\
-+}
-+
-+static inline unsigned xattr_val_u64s(unsigned name_len, unsigned val_len)
-+{
-+	return DIV_ROUND_UP(offsetof(struct bch_xattr, x_name) +
-+			    name_len + val_len, sizeof(u64));
-+}
-+
-+#define xattr_val(_xattr)					\
-+	((void *) (_xattr)->x_name + (_xattr)->x_name_len)
-+
-+struct xattr_search_key {
-+	u8		type;
-+	struct qstr	name;
-+};
-+
-+#define X_SEARCH(_type, _name, _len) ((struct xattr_search_key)	\
-+	{ .type = _type, .name = QSTR_INIT(_name, _len) })
-+
-+struct dentry;
-+struct xattr_handler;
-+struct bch_hash_info;
-+struct bch_inode_info;
-+
-+int bch2_xattr_get(struct bch_fs *, struct bch_inode_info *,
-+		  const char *, void *, size_t, int);
-+
-+int bch2_xattr_set(struct btree_trans *, u64, const struct bch_hash_info *,
-+		   const char *, const void *, size_t, int, int);
-+
-+ssize_t bch2_xattr_list(struct dentry *, char *, size_t);
-+
-+extern const struct xattr_handler *bch2_xattr_handlers[];
-+
-+#endif /* _BCACHEFS_XATTR_H */
-diff --git a/fs/cifs/file.c b/fs/cifs/file.c
-index be46fab4c96d..a17a21181e18 100644
---- a/fs/cifs/file.c
-+++ b/fs/cifs/file.c
-@@ -4296,20 +4296,12 @@ readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
- 
- 	page = lru_to_page(page_list);
- 
--	/*
--	 * Lock the page and put it in the cache. Since no one else
--	 * should have access to this page, we're safe to simply set
--	 * PG_locked without checking it first.
--	 */
--	__SetPageLocked(page);
--	rc = add_to_page_cache_locked(page, mapping,
--				      page->index, gfp);
-+	rc = add_to_page_cache(page, mapping,
-+			       page->index, gfp);
- 
- 	/* give up if we can't stick it in the cache */
--	if (rc) {
--		__ClearPageLocked(page);
-+	if (rc)
- 		return rc;
--	}
- 
- 	/* move first page to the tmplist */
- 	*offset = (loff_t)page->index << PAGE_SHIFT;
-@@ -4328,12 +4320,9 @@ readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
- 		if (*bytes + PAGE_SIZE > rsize)
- 			break;
- 
--		__SetPageLocked(page);
--		rc = add_to_page_cache_locked(page, mapping, page->index, gfp);
--		if (rc) {
--			__ClearPageLocked(page);
-+		rc = add_to_page_cache(page, mapping, page->index, gfp);
-+		if (rc)
- 			break;
--		}
- 		list_move_tail(&page->lru, tmplist);
- 		(*bytes) += PAGE_SIZE;
- 		expected_index++;
-diff --git a/fs/dcache.c b/fs/dcache.c
-index ea0485861d93..b4d6e3e86285 100644
---- a/fs/dcache.c
-+++ b/fs/dcache.c
-@@ -3132,9 +3132,8 @@ void d_genocide(struct dentry *parent)
- 
- EXPORT_SYMBOL(d_genocide);
- 
--void d_tmpfile(struct dentry *dentry, struct inode *inode)
-+void d_mark_tmpfile(struct dentry *dentry, struct inode *inode)
- {
--	inode_dec_link_count(inode);
- 	BUG_ON(dentry->d_name.name != dentry->d_iname ||
- 		!hlist_unhashed(&dentry->d_u.d_alias) ||
- 		!d_unlinked(dentry));
-@@ -3144,6 +3143,13 @@ void d_tmpfile(struct dentry *dentry, struct inode *inode)
- 				(unsigned long long)inode->i_ino);
- 	spin_unlock(&dentry->d_lock);
- 	spin_unlock(&dentry->d_parent->d_lock);
-+}
-+EXPORT_SYMBOL(d_mark_tmpfile);
-+
-+void d_tmpfile(struct dentry *dentry, struct inode *inode)
-+{
-+	inode_dec_link_count(inode);
-+	d_mark_tmpfile(dentry, inode);
- 	d_instantiate(dentry, inode);
- }
- EXPORT_SYMBOL(d_tmpfile);
-diff --git a/fs/inode.c b/fs/inode.c
-index 72c4c347afb7..e70ad3d2d01c 100644
---- a/fs/inode.c
-+++ b/fs/inode.c
-@@ -1578,6 +1578,46 @@ int insert_inode_locked(struct inode *inode)
- }
- EXPORT_SYMBOL(insert_inode_locked);
- 
-+struct inode *insert_inode_locked2(struct inode *inode)
-+{
-+	struct super_block *sb = inode->i_sb;
-+	ino_t ino = inode->i_ino;
-+	struct hlist_head *head = inode_hashtable + hash(sb, ino);
-+
-+	while (1) {
-+		struct inode *old = NULL;
-+		spin_lock(&inode_hash_lock);
-+		hlist_for_each_entry(old, head, i_hash) {
-+			if (old->i_ino != ino)
-+				continue;
-+			if (old->i_sb != sb)
-+				continue;
-+			spin_lock(&old->i_lock);
-+			if (old->i_state & (I_FREEING|I_WILL_FREE)) {
-+				spin_unlock(&old->i_lock);
-+				continue;
-+			}
-+			break;
-+		}
-+		if (likely(!old)) {
-+			spin_lock(&inode->i_lock);
-+			inode->i_state |= I_NEW | I_CREATING;
-+			hlist_add_head(&inode->i_hash, head);
-+			spin_unlock(&inode->i_lock);
-+			spin_unlock(&inode_hash_lock);
-+			return NULL;
-+		}
-+		__iget(old);
-+		spin_unlock(&old->i_lock);
-+		spin_unlock(&inode_hash_lock);
-+		wait_on_inode(old);
-+		if (unlikely(!inode_unhashed(old)))
-+			return old;
-+		iput(old);
-+	}
-+}
-+EXPORT_SYMBOL(insert_inode_locked2);
-+
- int insert_inode_locked4(struct inode *inode, unsigned long hashval,
- 		int (*test)(struct inode *, void *), void *data)
- {
-diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
-index 868e11face00..d9e3b7b0175e 100644
---- a/include/linux/blkdev.h
-+++ b/include/linux/blkdev.h
-@@ -936,6 +936,7 @@ extern const char *blk_op_str(unsigned int op);
- 
- int blk_status_to_errno(blk_status_t status);
- blk_status_t errno_to_blk_status(int errno);
-+const char *blk_status_to_str(blk_status_t status);
- 
- int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin);
- 
-diff --git a/include/linux/closure.h b/include/linux/closure.h
-new file mode 100644
-index 000000000000..36b4a83f9b77
---- /dev/null
-+++ b/include/linux/closure.h
-@@ -0,0 +1,399 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _LINUX_CLOSURE_H
-+#define _LINUX_CLOSURE_H
-+
-+#include <linux/llist.h>
-+#include <linux/sched.h>
-+#include <linux/sched/task_stack.h>
-+#include <linux/workqueue.h>
-+
-+/*
-+ * Closure is perhaps the most overused and abused term in computer science, but
-+ * since I've been unable to come up with anything better you're stuck with it
-+ * again.
-+ *
-+ * What are closures?
-+ *
-+ * They embed a refcount. The basic idea is they count "things that are in
-+ * progress" - in flight bios, some other thread that's doing something else -
-+ * anything you might want to wait on.
-+ *
-+ * The refcount may be manipulated with closure_get() and closure_put().
-+ * closure_put() is where many of the interesting things happen, when it causes
-+ * the refcount to go to 0.
-+ *
-+ * Closures can be used to wait on things both synchronously and asynchronously,
-+ * and synchronous and asynchronous use can be mixed without restriction. To
-+ * wait synchronously, use closure_sync() - you will sleep until your closure's
-+ * refcount hits 1.
-+ *
-+ * To wait asynchronously, use
-+ *   continue_at(cl, next_function, workqueue);
-+ *
-+ * passing it, as you might expect, the function to run when nothing is pending
-+ * and the workqueue to run that function out of.
-+ *
-+ * continue_at() also, critically, requires a 'return' immediately following the
-+ * location where this macro is referenced, to return to the calling function.
-+ * There's good reason for this.
-+ *
-+ * To use safely closures asynchronously, they must always have a refcount while
-+ * they are running owned by the thread that is running them. Otherwise, suppose
-+ * you submit some bios and wish to have a function run when they all complete:
-+ *
-+ * foo_endio(struct bio *bio)
-+ * {
-+ *	closure_put(cl);
-+ * }
-+ *
-+ * closure_init(cl);
-+ *
-+ * do_stuff();
-+ * closure_get(cl);
-+ * bio1->bi_endio = foo_endio;
-+ * bio_submit(bio1);
-+ *
-+ * do_more_stuff();
-+ * closure_get(cl);
-+ * bio2->bi_endio = foo_endio;
-+ * bio_submit(bio2);
-+ *
-+ * continue_at(cl, complete_some_read, system_wq);
-+ *
-+ * If closure's refcount started at 0, complete_some_read() could run before the
-+ * second bio was submitted - which is almost always not what you want! More
-+ * importantly, it wouldn't be possible to say whether the original thread or
-+ * complete_some_read()'s thread owned the closure - and whatever state it was
-+ * associated with!
-+ *
-+ * So, closure_init() initializes a closure's refcount to 1 - and when a
-+ * closure_fn is run, the refcount will be reset to 1 first.
-+ *
-+ * Then, the rule is - if you got the refcount with closure_get(), release it
-+ * with closure_put() (i.e, in a bio->bi_endio function). If you have a refcount
-+ * on a closure because you called closure_init() or you were run out of a
-+ * closure - _always_ use continue_at(). Doing so consistently will help
-+ * eliminate an entire class of particularly pernicious races.
-+ *
-+ * Lastly, you might have a wait list dedicated to a specific event, and have no
-+ * need for specifying the condition - you just want to wait until someone runs
-+ * closure_wake_up() on the appropriate wait list. In that case, just use
-+ * closure_wait(). It will return either true or false, depending on whether the
-+ * closure was already on a wait list or not - a closure can only be on one wait
-+ * list at a time.
-+ *
-+ * Parents:
-+ *
-+ * closure_init() takes two arguments - it takes the closure to initialize, and
-+ * a (possibly null) parent.
-+ *
-+ * If parent is non null, the new closure will have a refcount for its lifetime;
-+ * a closure is considered to be "finished" when its refcount hits 0 and the
-+ * function to run is null. Hence
-+ *
-+ * continue_at(cl, NULL, NULL);
-+ *
-+ * returns up the (spaghetti) stack of closures, precisely like normal return
-+ * returns up the C stack. continue_at() with non null fn is better thought of
-+ * as doing a tail call.
-+ *
-+ * All this implies that a closure should typically be embedded in a particular
-+ * struct (which its refcount will normally control the lifetime of), and that
-+ * struct can very much be thought of as a stack frame.
-+ */
-+
-+struct closure;
-+struct closure_syncer;
-+typedef void (closure_fn) (struct closure *);
-+extern struct dentry *bcache_debug;
-+
-+struct closure_waitlist {
-+	struct llist_head	list;
-+};
-+
-+enum closure_state {
-+	/*
-+	 * CLOSURE_WAITING: Set iff the closure is on a waitlist. Must be set by
-+	 * the thread that owns the closure, and cleared by the thread that's
-+	 * waking up the closure.
-+	 *
-+	 * The rest are for debugging and don't affect behaviour:
-+	 *
-+	 * CLOSURE_RUNNING: Set when a closure is running (i.e. by
-+	 * closure_init() and when closure_put() runs then next function), and
-+	 * must be cleared before remaining hits 0. Primarily to help guard
-+	 * against incorrect usage and accidentally transferring references.
-+	 * continue_at() and closure_return() clear it for you, if you're doing
-+	 * something unusual you can use closure_set_dead() which also helps
-+	 * annotate where references are being transferred.
-+	 */
-+
-+	CLOSURE_BITS_START	= (1U << 26),
-+	CLOSURE_DESTRUCTOR	= (1U << 26),
-+	CLOSURE_WAITING		= (1U << 28),
-+	CLOSURE_RUNNING		= (1U << 30),
-+};
-+
-+#define CLOSURE_GUARD_MASK					\
-+	((CLOSURE_DESTRUCTOR|CLOSURE_WAITING|CLOSURE_RUNNING) << 1)
-+
-+#define CLOSURE_REMAINING_MASK		(CLOSURE_BITS_START - 1)
-+#define CLOSURE_REMAINING_INITIALIZER	(1|CLOSURE_RUNNING)
-+
-+struct closure {
-+	union {
-+		struct {
-+			struct workqueue_struct *wq;
-+			struct closure_syncer	*s;
-+			struct llist_node	list;
-+			closure_fn		*fn;
-+		};
-+		struct work_struct	work;
-+	};
-+
-+	struct closure		*parent;
-+
-+	atomic_t		remaining;
-+
-+#ifdef CONFIG_DEBUG_CLOSURES
-+#define CLOSURE_MAGIC_DEAD	0xc054dead
-+#define CLOSURE_MAGIC_ALIVE	0xc054a11e
-+
-+	unsigned int		magic;
-+	struct list_head	all;
-+	unsigned long		ip;
-+	unsigned long		waiting_on;
-+#endif
-+};
-+
-+void closure_sub(struct closure *cl, int v);
-+void closure_put(struct closure *cl);
-+void __closure_wake_up(struct closure_waitlist *list);
-+bool closure_wait(struct closure_waitlist *list, struct closure *cl);
-+void __closure_sync(struct closure *cl);
-+
-+/**
-+ * closure_sync - sleep until a closure a closure has nothing left to wait on
-+ *
-+ * Sleeps until the refcount hits 1 - the thread that's running the closure owns
-+ * the last refcount.
-+ */
-+static inline void closure_sync(struct closure *cl)
-+{
-+	if ((atomic_read(&cl->remaining) & CLOSURE_REMAINING_MASK) != 1)
-+		__closure_sync(cl);
-+}
-+
-+#ifdef CONFIG_DEBUG_CLOSURES
-+
-+void closure_debug_create(struct closure *cl);
-+void closure_debug_destroy(struct closure *cl);
-+
-+#else
-+
-+static inline void closure_debug_create(struct closure *cl) {}
-+static inline void closure_debug_destroy(struct closure *cl) {}
-+
-+#endif
-+
-+static inline void closure_set_ip(struct closure *cl)
-+{
-+#ifdef CONFIG_DEBUG_CLOSURES
-+	cl->ip = _THIS_IP_;
-+#endif
-+}
-+
-+static inline void closure_set_ret_ip(struct closure *cl)
-+{
-+#ifdef CONFIG_DEBUG_CLOSURES
-+	cl->ip = _RET_IP_;
-+#endif
-+}
-+
-+static inline void closure_set_waiting(struct closure *cl, unsigned long f)
-+{
-+#ifdef CONFIG_DEBUG_CLOSURES
-+	cl->waiting_on = f;
-+#endif
-+}
-+
-+static inline void closure_set_stopped(struct closure *cl)
-+{
-+	atomic_sub(CLOSURE_RUNNING, &cl->remaining);
-+}
-+
-+static inline void set_closure_fn(struct closure *cl, closure_fn *fn,
-+				  struct workqueue_struct *wq)
-+{
-+	closure_set_ip(cl);
-+	cl->fn = fn;
-+	cl->wq = wq;
-+	/* between atomic_dec() in closure_put() */
-+	smp_mb__before_atomic();
-+}
-+
-+static inline void closure_queue(struct closure *cl)
-+{
-+	struct workqueue_struct *wq = cl->wq;
-+	/**
-+	 * Changes made to closure, work_struct, or a couple of other structs
-+	 * may cause work.func not pointing to the right location.
-+	 */
-+	BUILD_BUG_ON(offsetof(struct closure, fn)
-+		     != offsetof(struct work_struct, func));
-+
-+	if (wq) {
-+		INIT_WORK(&cl->work, cl->work.func);
-+		BUG_ON(!queue_work(wq, &cl->work));
-+	} else
-+		cl->fn(cl);
-+}
-+
-+/**
-+ * closure_get - increment a closure's refcount
-+ */
-+static inline void closure_get(struct closure *cl)
-+{
-+#ifdef CONFIG_DEBUG_CLOSURES
-+	BUG_ON((atomic_inc_return(&cl->remaining) &
-+		CLOSURE_REMAINING_MASK) <= 1);
-+#else
-+	atomic_inc(&cl->remaining);
-+#endif
-+}
-+
-+/**
-+ * closure_init - Initialize a closure, setting the refcount to 1
-+ * @cl:		closure to initialize
-+ * @parent:	parent of the new closure. cl will take a refcount on it for its
-+ *		lifetime; may be NULL.
-+ */
-+static inline void closure_init(struct closure *cl, struct closure *parent)
-+{
-+	cl->fn = NULL;
-+	cl->parent = parent;
-+	if (parent)
-+		closure_get(parent);
-+
-+	atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
-+
-+	closure_debug_create(cl);
-+	closure_set_ip(cl);
-+}
-+
-+static inline void closure_init_stack(struct closure *cl)
-+{
-+	memset(cl, 0, sizeof(struct closure));
-+	atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
-+}
-+
-+/**
-+ * closure_wake_up - wake up all closures on a wait list,
-+ *		     with memory barrier
-+ */
-+static inline void closure_wake_up(struct closure_waitlist *list)
-+{
-+	/* Memory barrier for the wait list */
-+	smp_mb();
-+	__closure_wake_up(list);
-+}
-+
-+/**
-+ * continue_at - jump to another function with barrier
-+ *
-+ * After @cl is no longer waiting on anything (i.e. all outstanding refs have
-+ * been dropped with closure_put()), it will resume execution at @fn running out
-+ * of @wq (or, if @wq is NULL, @fn will be called by closure_put() directly).
-+ *
-+ * This is because after calling continue_at() you no longer have a ref on @cl,
-+ * and whatever @cl owns may be freed out from under you - a running closure fn
-+ * has a ref on its own closure which continue_at() drops.
-+ *
-+ * Note you are expected to immediately return after using this macro.
-+ */
-+#define continue_at(_cl, _fn, _wq)					\
-+do {									\
-+	set_closure_fn(_cl, _fn, _wq);					\
-+	closure_sub(_cl, CLOSURE_RUNNING + 1);				\
-+} while (0)
-+
-+/**
-+ * closure_return - finish execution of a closure
-+ *
-+ * This is used to indicate that @cl is finished: when all outstanding refs on
-+ * @cl have been dropped @cl's ref on its parent closure (as passed to
-+ * closure_init()) will be dropped, if one was specified - thus this can be
-+ * thought of as returning to the parent closure.
-+ */
-+#define closure_return(_cl)	continue_at((_cl), NULL, NULL)
-+
-+/**
-+ * continue_at_nobarrier - jump to another function without barrier
-+ *
-+ * Causes @fn to be executed out of @cl, in @wq context (or called directly if
-+ * @wq is NULL).
-+ *
-+ * The ref the caller of continue_at_nobarrier() had on @cl is now owned by @fn,
-+ * thus it's not safe to touch anything protected by @cl after a
-+ * continue_at_nobarrier().
-+ */
-+#define continue_at_nobarrier(_cl, _fn, _wq)				\
-+do {									\
-+	set_closure_fn(_cl, _fn, _wq);					\
-+	closure_queue(_cl);						\
-+} while (0)
-+
-+/**
-+ * closure_return_with_destructor - finish execution of a closure,
-+ *				    with destructor
-+ *
-+ * Works like closure_return(), except @destructor will be called when all
-+ * outstanding refs on @cl have been dropped; @destructor may be used to safely
-+ * free the memory occupied by @cl, and it is called with the ref on the parent
-+ * closure still held - so @destructor could safely return an item to a
-+ * freelist protected by @cl's parent.
-+ */
-+#define closure_return_with_destructor(_cl, _destructor)		\
-+do {									\
-+	set_closure_fn(_cl, _destructor, NULL);				\
-+	closure_sub(_cl, CLOSURE_RUNNING - CLOSURE_DESTRUCTOR + 1);	\
-+} while (0)
-+
-+/**
-+ * closure_call - execute @fn out of a new, uninitialized closure
-+ *
-+ * Typically used when running out of one closure, and we want to run @fn
-+ * asynchronously out of a new closure - @parent will then wait for @cl to
-+ * finish.
-+ */
-+static inline void closure_call(struct closure *cl, closure_fn fn,
-+				struct workqueue_struct *wq,
-+				struct closure *parent)
-+{
-+	closure_init(cl, parent);
-+	continue_at_nobarrier(cl, fn, wq);
-+}
-+
-+#define __closure_wait_event(waitlist, _cond)				\
-+do {									\
-+	struct closure cl;						\
-+									\
-+	closure_init_stack(&cl);					\
-+									\
-+	while (1) {							\
-+		closure_wait(waitlist, &cl);				\
-+		if (_cond)						\
-+			break;						\
-+		closure_sync(&cl);					\
-+	}								\
-+	closure_wake_up(waitlist);					\
-+	closure_sync(&cl);						\
-+} while (0)
-+
-+#define closure_wait_event(waitlist, _cond)				\
-+do {									\
-+	if (!(_cond))							\
-+		__closure_wait_event(waitlist, _cond);			\
-+} while (0)
-+
-+#endif /* _LINUX_CLOSURE_H */
-diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h
-index ea7b756b1c8f..51658b72de72 100644
---- a/include/linux/compiler_attributes.h
-+++ b/include/linux/compiler_attributes.h
-@@ -278,4 +278,9 @@
-  */
- #define __weak                          __attribute__((__weak__))
- 
-+/*
-+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-flatten-function-attribute
-+ */
-+#define __flatten __attribute__((flatten))
-+
- #endif /* __LINUX_COMPILER_ATTRIBUTES_H */
-diff --git a/include/linux/dcache.h b/include/linux/dcache.h
-index 65d975bf9390..008573618071 100644
---- a/include/linux/dcache.h
-+++ b/include/linux/dcache.h
-@@ -256,6 +256,7 @@ extern struct dentry * d_make_root(struct inode *);
- /* <clickety>-<click> the ramfs-type tree */
- extern void d_genocide(struct dentry *);
- 
-+extern void d_mark_tmpfile(struct dentry *, struct inode *);
- extern void d_tmpfile(struct dentry *, struct inode *);
- 
- extern struct dentry *d_find_alias(struct inode *);
-diff --git a/include/linux/fs.h b/include/linux/fs.h
-index 7519ae003a08..305d316f01f3 100644
---- a/include/linux/fs.h
-+++ b/include/linux/fs.h
-@@ -2953,6 +2953,7 @@ extern struct inode *find_inode_rcu(struct super_block *, unsigned long,
- extern struct inode *find_inode_by_ino_rcu(struct super_block *, unsigned long);
- extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *);
- extern int insert_inode_locked(struct inode *);
-+extern struct inode *insert_inode_locked2(struct inode *);
- #ifdef CONFIG_DEBUG_LOCK_ALLOC
- extern void lockdep_annotate_inode_mutex_key(struct inode *inode);
- #else
-diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
-index 434c9c34aeb6..620535006624 100644
---- a/include/linux/pagemap.h
-+++ b/include/linux/pagemap.h
-@@ -689,10 +689,15 @@ static inline int fault_in_pages_readable(const char __user *uaddr, int size)
- 	return 0;
- }
- 
--int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
--				pgoff_t index, gfp_t gfp_mask);
-+int add_to_page_cache(struct page *page, struct address_space *mapping,
-+		      pgoff_t index, gfp_t gfp_mask);
- int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
- 				pgoff_t index, gfp_t gfp_mask);
-+int add_to_page_cache_lru_vec(struct address_space *mapping,
-+			      struct page **pages,
-+			      unsigned nr_pages,
-+			      pgoff_t offset, gfp_t gfp_mask);
-+
- extern void delete_from_page_cache(struct page *page);
- extern void __delete_from_page_cache(struct page *page, void *shadow);
- int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask);
-@@ -710,22 +715,6 @@ void page_cache_readahead_unbounded(struct address_space *, struct file *,
- 		pgoff_t index, unsigned long nr_to_read,
- 		unsigned long lookahead_count);
- 
--/*
-- * Like add_to_page_cache_locked, but used to add newly allocated pages:
-- * the page is new, so we can just run __SetPageLocked() against it.
-- */
--static inline int add_to_page_cache(struct page *page,
--		struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask)
--{
--	int error;
--
--	__SetPageLocked(page);
--	error = add_to_page_cache_locked(page, mapping, offset, gfp_mask);
--	if (unlikely(error))
--		__ClearPageLocked(page);
--	return error;
--}
--
- /**
-  * struct readahead_control - Describes a readahead request.
-  *
-diff --git a/include/linux/sched.h b/include/linux/sched.h
-index afe01e232935..793b07788062 100644
---- a/include/linux/sched.h
-+++ b/include/linux/sched.h
-@@ -747,6 +747,7 @@ struct task_struct {
- 
- 	struct mm_struct		*mm;
- 	struct mm_struct		*active_mm;
-+	struct address_space		*faults_disabled_mapping;
- 
- 	/* Per-thread vma caching: */
- 	struct vmacache			vmacache;
-diff --git a/include/linux/six.h b/include/linux/six.h
-new file mode 100644
-index 000000000000..a16e94f482e9
---- /dev/null
-+++ b/include/linux/six.h
-@@ -0,0 +1,197 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+
-+#ifndef _LINUX_SIX_H
-+#define _LINUX_SIX_H
-+
-+/*
-+ * Shared/intent/exclusive locks: sleepable read/write locks, much like rw
-+ * semaphores, except with a third intermediate state, intent. Basic operations
-+ * are:
-+ *
-+ * six_lock_read(&foo->lock);
-+ * six_unlock_read(&foo->lock);
-+ *
-+ * six_lock_intent(&foo->lock);
-+ * six_unlock_intent(&foo->lock);
-+ *
-+ * six_lock_write(&foo->lock);
-+ * six_unlock_write(&foo->lock);
-+ *
-+ * Intent locks block other intent locks, but do not block read locks, and you
-+ * must have an intent lock held before taking a write lock, like so:
-+ *
-+ * six_lock_intent(&foo->lock);
-+ * six_lock_write(&foo->lock);
-+ * six_unlock_write(&foo->lock);
-+ * six_unlock_intent(&foo->lock);
-+ *
-+ * Other operations:
-+ *
-+ *   six_trylock_read()
-+ *   six_trylock_intent()
-+ *   six_trylock_write()
-+ *
-+ *   six_lock_downgrade():	convert from intent to read
-+ *   six_lock_tryupgrade():	attempt to convert from read to intent
-+ *
-+ * Locks also embed a sequence number, which is incremented when the lock is
-+ * locked or unlocked for write. The current sequence number can be grabbed
-+ * while a lock is held from lock->state.seq; then, if you drop the lock you can
-+ * use six_relock_(read|intent_write)(lock, seq) to attempt to retake the lock
-+ * iff it hasn't been locked for write in the meantime.
-+ *
-+ * There are also operations that take the lock type as a parameter, where the
-+ * type is one of SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write:
-+ *
-+ *   six_lock_type(lock, type)
-+ *   six_unlock_type(lock, type)
-+ *   six_relock(lock, type, seq)
-+ *   six_trylock_type(lock, type)
-+ *   six_trylock_convert(lock, from, to)
-+ *
-+ * A lock may be held multiple types by the same thread (for read or intent,
-+ * not write). However, the six locks code does _not_ implement the actual
-+ * recursive checks itself though - rather, if your code (e.g. btree iterator
-+ * code) knows that the current thread already has a lock held, and for the
-+ * correct type, six_lock_increment() may be used to bump up the counter for
-+ * that type - the only effect is that one more call to unlock will be required
-+ * before the lock is unlocked.
-+ */
-+
-+#include <linux/lockdep.h>
-+#include <linux/osq_lock.h>
-+#include <linux/sched.h>
-+#include <linux/types.h>
-+
-+#define SIX_LOCK_SEPARATE_LOCKFNS
-+
-+union six_lock_state {
-+	struct {
-+		atomic64_t	counter;
-+	};
-+
-+	struct {
-+		u64		v;
-+	};
-+
-+	struct {
-+		/* for waitlist_bitnr() */
-+		unsigned long	l;
-+	};
-+
-+	struct {
-+		unsigned	read_lock:28;
-+		unsigned	intent_lock:1;
-+		unsigned	waiters:3;
-+		/*
-+		 * seq works much like in seqlocks: it's incremented every time
-+		 * we lock and unlock for write.
-+		 *
-+		 * If it's odd write lock is held, even unlocked.
-+		 *
-+		 * Thus readers can unlock, and then lock again later iff it
-+		 * hasn't been modified in the meantime.
-+		 */
-+		u32		seq;
-+	};
-+};
-+
-+enum six_lock_type {
-+	SIX_LOCK_read,
-+	SIX_LOCK_intent,
-+	SIX_LOCK_write,
-+};
-+
-+struct six_lock {
-+	union six_lock_state	state;
-+	unsigned		intent_lock_recurse;
-+	struct task_struct	*owner;
-+	struct optimistic_spin_queue osq;
-+
-+	raw_spinlock_t		wait_lock;
-+	struct list_head	wait_list[2];
-+#ifdef CONFIG_DEBUG_LOCK_ALLOC
-+	struct lockdep_map	dep_map;
-+#endif
-+};
-+
-+typedef int (*six_lock_should_sleep_fn)(struct six_lock *lock, void *);
-+
-+static __always_inline void __six_lock_init(struct six_lock *lock,
-+					    const char *name,
-+					    struct lock_class_key *key)
-+{
-+	atomic64_set(&lock->state.counter, 0);
-+	raw_spin_lock_init(&lock->wait_lock);
-+	INIT_LIST_HEAD(&lock->wait_list[SIX_LOCK_read]);
-+	INIT_LIST_HEAD(&lock->wait_list[SIX_LOCK_intent]);
-+#ifdef CONFIG_DEBUG_LOCK_ALLOC
-+	debug_check_no_locks_freed((void *) lock, sizeof(*lock));
-+	lockdep_init_map(&lock->dep_map, name, key, 0);
-+#endif
-+}
-+
-+#define six_lock_init(lock)						\
-+do {									\
-+	static struct lock_class_key __key;				\
-+									\
-+	__six_lock_init((lock), #lock, &__key);				\
-+} while (0)
-+
-+#define __SIX_VAL(field, _v)	(((union six_lock_state) { .field = _v }).v)
-+
-+#define __SIX_LOCK(type)						\
-+bool six_trylock_##type(struct six_lock *);				\
-+bool six_relock_##type(struct six_lock *, u32);				\
-+int six_lock_##type(struct six_lock *, six_lock_should_sleep_fn, void *);\
-+void six_unlock_##type(struct six_lock *);
-+
-+__SIX_LOCK(read)
-+__SIX_LOCK(intent)
-+__SIX_LOCK(write)
-+#undef __SIX_LOCK
-+
-+#define SIX_LOCK_DISPATCH(type, fn, ...)			\
-+	switch (type) {						\
-+	case SIX_LOCK_read:					\
-+		return fn##_read(__VA_ARGS__);			\
-+	case SIX_LOCK_intent:					\
-+		return fn##_intent(__VA_ARGS__);		\
-+	case SIX_LOCK_write:					\
-+		return fn##_write(__VA_ARGS__);			\
-+	default:						\
-+		BUG();						\
-+	}
-+
-+static inline bool six_trylock_type(struct six_lock *lock, enum six_lock_type type)
-+{
-+	SIX_LOCK_DISPATCH(type, six_trylock, lock);
-+}
-+
-+static inline bool six_relock_type(struct six_lock *lock, enum six_lock_type type,
-+				   unsigned seq)
-+{
-+	SIX_LOCK_DISPATCH(type, six_relock, lock, seq);
-+}
-+
-+static inline int six_lock_type(struct six_lock *lock, enum six_lock_type type,
-+				six_lock_should_sleep_fn should_sleep_fn, void *p)
-+{
-+	SIX_LOCK_DISPATCH(type, six_lock, lock, should_sleep_fn, p);
-+}
-+
-+static inline void six_unlock_type(struct six_lock *lock, enum six_lock_type type)
-+{
-+	SIX_LOCK_DISPATCH(type, six_unlock, lock);
-+}
-+
-+void six_lock_downgrade(struct six_lock *);
-+bool six_lock_tryupgrade(struct six_lock *);
-+bool six_trylock_convert(struct six_lock *, enum six_lock_type,
-+			 enum six_lock_type);
-+
-+void six_lock_increment(struct six_lock *, enum six_lock_type);
-+
-+void six_lock_wakeup_all(struct six_lock *);
-+
-+#endif /* _LINUX_SIX_H */
-diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
-index 0221f852a7e1..f81f60d891ac 100644
---- a/include/linux/vmalloc.h
-+++ b/include/linux/vmalloc.h
-@@ -106,6 +106,7 @@ extern void *vzalloc(unsigned long size);
- extern void *vmalloc_user(unsigned long size);
- extern void *vmalloc_node(unsigned long size, int node);
- extern void *vzalloc_node(unsigned long size, int node);
-+extern void *vmalloc_exec(unsigned long size, gfp_t gfp_mask);
- extern void *vmalloc_32(unsigned long size);
- extern void *vmalloc_32_user(unsigned long size);
- extern void *__vmalloc(unsigned long size, gfp_t gfp_mask);
-diff --git a/include/trace/events/bcachefs.h b/include/trace/events/bcachefs.h
-new file mode 100644
-index 000000000000..9b4e8295ed75
---- /dev/null
-+++ b/include/trace/events/bcachefs.h
-@@ -0,0 +1,664 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#undef TRACE_SYSTEM
-+#define TRACE_SYSTEM bcachefs
-+
-+#if !defined(_TRACE_BCACHE_H) || defined(TRACE_HEADER_MULTI_READ)
-+#define _TRACE_BCACHE_H
-+
-+#include <linux/tracepoint.h>
-+
-+DECLARE_EVENT_CLASS(bpos,
-+	TP_PROTO(struct bpos *p),
-+	TP_ARGS(p),
-+
-+	TP_STRUCT__entry(
-+		__field(u64,	inode				)
-+		__field(u64,	offset				)
-+	),
-+
-+	TP_fast_assign(
-+		__entry->inode	= p->inode;
-+		__entry->offset	= p->offset;
-+	),
-+
-+	TP_printk("%llu:%llu", __entry->inode, __entry->offset)
-+);
-+
-+DECLARE_EVENT_CLASS(bkey,
-+	TP_PROTO(const struct bkey *k),
-+	TP_ARGS(k),
-+
-+	TP_STRUCT__entry(
-+		__field(u64,	inode				)
-+		__field(u64,	offset				)
-+		__field(u32,	size				)
-+	),
-+
-+	TP_fast_assign(
-+		__entry->inode	= k->p.inode;
-+		__entry->offset	= k->p.offset;
-+		__entry->size	= k->size;
-+	),
-+
-+	TP_printk("%llu:%llu len %u", __entry->inode,
-+		  __entry->offset, __entry->size)
-+);
-+
-+DECLARE_EVENT_CLASS(bch_fs,
-+	TP_PROTO(struct bch_fs *c),
-+	TP_ARGS(c),
-+
-+	TP_STRUCT__entry(
-+		__array(char,		uuid,	16 )
-+	),
-+
-+	TP_fast_assign(
-+		memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
-+	),
-+
-+	TP_printk("%pU", __entry->uuid)
-+);
-+
-+DECLARE_EVENT_CLASS(bio,
-+	TP_PROTO(struct bio *bio),
-+	TP_ARGS(bio),
-+
-+	TP_STRUCT__entry(
-+		__field(dev_t,		dev			)
-+		__field(sector_t,	sector			)
-+		__field(unsigned int,	nr_sector		)
-+		__array(char,		rwbs,	6		)
-+	),
-+
-+	TP_fast_assign(
-+		__entry->dev		= bio->bi_disk ? bio_dev(bio) : 0;
-+		__entry->sector		= bio->bi_iter.bi_sector;
-+		__entry->nr_sector	= bio->bi_iter.bi_size >> 9;
-+		blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
-+	),
-+
-+	TP_printk("%d,%d  %s %llu + %u",
-+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
-+		  (unsigned long long)__entry->sector, __entry->nr_sector)
-+);
-+
-+/* io.c: */
-+
-+DEFINE_EVENT(bio, read_split,
-+	TP_PROTO(struct bio *bio),
-+	TP_ARGS(bio)
-+);
-+
-+DEFINE_EVENT(bio, read_bounce,
-+	TP_PROTO(struct bio *bio),
-+	TP_ARGS(bio)
-+);
-+
-+DEFINE_EVENT(bio, read_retry,
-+	TP_PROTO(struct bio *bio),
-+	TP_ARGS(bio)
-+);
-+
-+DEFINE_EVENT(bio, promote,
-+	TP_PROTO(struct bio *bio),
-+	TP_ARGS(bio)
-+);
-+
-+/* Journal */
-+
-+DEFINE_EVENT(bch_fs, journal_full,
-+	TP_PROTO(struct bch_fs *c),
-+	TP_ARGS(c)
-+);
-+
-+DEFINE_EVENT(bch_fs, journal_entry_full,
-+	TP_PROTO(struct bch_fs *c),
-+	TP_ARGS(c)
-+);
-+
-+DEFINE_EVENT(bio, journal_write,
-+	TP_PROTO(struct bio *bio),
-+	TP_ARGS(bio)
-+);
-+
-+/* bset.c: */
-+
-+DEFINE_EVENT(bpos, bkey_pack_pos_fail,
-+	TP_PROTO(struct bpos *p),
-+	TP_ARGS(p)
-+);
-+
-+/* Btree */
-+
-+DECLARE_EVENT_CLASS(btree_node,
-+	TP_PROTO(struct bch_fs *c, struct btree *b),
-+	TP_ARGS(c, b),
-+
-+	TP_STRUCT__entry(
-+		__array(char,		uuid,		16	)
-+		__field(u8,		level			)
-+		__field(u8,		id			)
-+		__field(u64,		inode			)
-+		__field(u64,		offset			)
-+	),
-+
-+	TP_fast_assign(
-+		memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
-+		__entry->level		= b->c.level;
-+		__entry->id		= b->c.btree_id;
-+		__entry->inode		= b->key.k.p.inode;
-+		__entry->offset		= b->key.k.p.offset;
-+	),
-+
-+	TP_printk("%pU  %u id %u %llu:%llu",
-+		  __entry->uuid, __entry->level, __entry->id,
-+		  __entry->inode, __entry->offset)
-+);
-+
-+DEFINE_EVENT(btree_node, btree_read,
-+	TP_PROTO(struct bch_fs *c, struct btree *b),
-+	TP_ARGS(c, b)
-+);
-+
-+TRACE_EVENT(btree_write,
-+	TP_PROTO(struct btree *b, unsigned bytes, unsigned sectors),
-+	TP_ARGS(b, bytes, sectors),
-+
-+	TP_STRUCT__entry(
-+		__field(enum btree_node_type,	type)
-+		__field(unsigned,	bytes			)
-+		__field(unsigned,	sectors			)
-+	),
-+
-+	TP_fast_assign(
-+		__entry->type	= btree_node_type(b);
-+		__entry->bytes	= bytes;
-+		__entry->sectors = sectors;
-+	),
-+
-+	TP_printk("bkey type %u bytes %u sectors %u",
-+		  __entry->type , __entry->bytes, __entry->sectors)
-+);
-+
-+DEFINE_EVENT(btree_node, btree_node_alloc,
-+	TP_PROTO(struct bch_fs *c, struct btree *b),
-+	TP_ARGS(c, b)
-+);
-+
-+DEFINE_EVENT(btree_node, btree_node_free,
-+	TP_PROTO(struct bch_fs *c, struct btree *b),
-+	TP_ARGS(c, b)
-+);
-+
-+DEFINE_EVENT(btree_node, btree_node_reap,
-+	TP_PROTO(struct bch_fs *c, struct btree *b),
-+	TP_ARGS(c, b)
-+);
-+
-+DECLARE_EVENT_CLASS(btree_node_cannibalize_lock,
-+	TP_PROTO(struct bch_fs *c),
-+	TP_ARGS(c),
-+
-+	TP_STRUCT__entry(
-+		__array(char,			uuid,	16	)
-+	),
-+
-+	TP_fast_assign(
-+		memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
-+	),
-+
-+	TP_printk("%pU", __entry->uuid)
-+);
-+
-+DEFINE_EVENT(btree_node_cannibalize_lock, btree_node_cannibalize_lock_fail,
-+	TP_PROTO(struct bch_fs *c),
-+	TP_ARGS(c)
-+);
-+
-+DEFINE_EVENT(btree_node_cannibalize_lock, btree_node_cannibalize_lock,
-+	TP_PROTO(struct bch_fs *c),
-+	TP_ARGS(c)
-+);
-+
-+DEFINE_EVENT(btree_node_cannibalize_lock, btree_node_cannibalize,
-+	TP_PROTO(struct bch_fs *c),
-+	TP_ARGS(c)
-+);
-+
-+DEFINE_EVENT(bch_fs, btree_node_cannibalize_unlock,
-+	TP_PROTO(struct bch_fs *c),
-+	TP_ARGS(c)
-+);
-+
-+TRACE_EVENT(btree_reserve_get_fail,
-+	TP_PROTO(struct bch_fs *c, size_t required, struct closure *cl),
-+	TP_ARGS(c, required, cl),
-+
-+	TP_STRUCT__entry(
-+		__array(char,			uuid,	16	)
-+		__field(size_t,			required	)
-+		__field(struct closure *,	cl		)
-+	),
-+
-+	TP_fast_assign(
-+		memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
-+		__entry->required = required;
-+		__entry->cl = cl;
-+	),
-+
-+	TP_printk("%pU required %zu by %p", __entry->uuid,
-+		  __entry->required, __entry->cl)
-+);
-+
-+TRACE_EVENT(btree_insert_key,
-+	TP_PROTO(struct bch_fs *c, struct btree *b, struct bkey_i *k),
-+	TP_ARGS(c, b, k),
-+
-+	TP_STRUCT__entry(
-+		__field(u8,		id			)
-+		__field(u64,		inode			)
-+		__field(u64,		offset			)
-+		__field(u32,		size			)
-+	),
-+
-+	TP_fast_assign(
-+		__entry->id		= b->c.btree_id;
-+		__entry->inode		= k->k.p.inode;
-+		__entry->offset		= k->k.p.offset;
-+		__entry->size		= k->k.size;
-+	),
-+
-+	TP_printk("btree %u: %llu:%llu len %u", __entry->id,
-+		  __entry->inode, __entry->offset, __entry->size)
-+);
-+
-+DEFINE_EVENT(btree_node, btree_split,
-+	TP_PROTO(struct bch_fs *c, struct btree *b),
-+	TP_ARGS(c, b)
-+);
-+
-+DEFINE_EVENT(btree_node, btree_compact,
-+	TP_PROTO(struct bch_fs *c, struct btree *b),
-+	TP_ARGS(c, b)
-+);
-+
-+DEFINE_EVENT(btree_node, btree_merge,
-+	TP_PROTO(struct bch_fs *c, struct btree *b),
-+	TP_ARGS(c, b)
-+);
-+
-+DEFINE_EVENT(btree_node, btree_set_root,
-+	TP_PROTO(struct bch_fs *c, struct btree *b),
-+	TP_ARGS(c, b)
-+);
-+
-+/* Garbage collection */
-+
-+DEFINE_EVENT(btree_node, btree_gc_coalesce,
-+	TP_PROTO(struct bch_fs *c, struct btree *b),
-+	TP_ARGS(c, b)
-+);
-+
-+TRACE_EVENT(btree_gc_coalesce_fail,
-+	TP_PROTO(struct bch_fs *c, int reason),
-+	TP_ARGS(c, reason),
-+
-+	TP_STRUCT__entry(
-+		__field(u8,		reason			)
-+		__array(char,		uuid,	16		)
-+	),
-+
-+	TP_fast_assign(
-+		__entry->reason		= reason;
-+		memcpy(__entry->uuid, c->disk_sb.sb->user_uuid.b, 16);
-+	),
-+
-+	TP_printk("%pU: %u", __entry->uuid, __entry->reason)
-+);
-+
-+DEFINE_EVENT(btree_node, btree_gc_rewrite_node,
-+	TP_PROTO(struct bch_fs *c, struct btree *b),
-+	TP_ARGS(c, b)
-+);
-+
-+DEFINE_EVENT(btree_node, btree_gc_rewrite_node_fail,
-+	TP_PROTO(struct bch_fs *c, struct btree *b),
-+	TP_ARGS(c, b)
-+);
-+
-+DEFINE_EVENT(bch_fs, gc_start,
-+	TP_PROTO(struct bch_fs *c),
-+	TP_ARGS(c)
-+);
-+
-+DEFINE_EVENT(bch_fs, gc_end,
-+	TP_PROTO(struct bch_fs *c),
-+	TP_ARGS(c)
-+);
-+
-+DEFINE_EVENT(bch_fs, gc_coalesce_start,
-+	TP_PROTO(struct bch_fs *c),
-+	TP_ARGS(c)
-+);
-+
-+DEFINE_EVENT(bch_fs, gc_coalesce_end,
-+	TP_PROTO(struct bch_fs *c),
-+	TP_ARGS(c)
-+);
-+
-+DEFINE_EVENT(bch_fs, gc_cannot_inc_gens,
-+	TP_PROTO(struct bch_fs *c),
-+	TP_ARGS(c)
-+);
-+
-+/* Allocator */
-+
-+TRACE_EVENT(alloc_batch,
-+	TP_PROTO(struct bch_dev *ca, size_t free, size_t total),
-+	TP_ARGS(ca, free, total),
-+
-+	TP_STRUCT__entry(
-+		__array(char,		uuid,	16	)
-+		__field(size_t,		free		)
-+		__field(size_t,		total		)
-+	),
-+
-+	TP_fast_assign(
-+		memcpy(__entry->uuid, ca->uuid.b, 16);
-+		__entry->free = free;
-+		__entry->total = total;
-+	),
-+
-+	TP_printk("%pU free %zu total %zu",
-+		__entry->uuid, __entry->free, __entry->total)
-+);
-+
-+TRACE_EVENT(invalidate,
-+	TP_PROTO(struct bch_dev *ca, u64 offset, unsigned sectors),
-+	TP_ARGS(ca, offset, sectors),
-+
-+	TP_STRUCT__entry(
-+		__field(unsigned,	sectors			)
-+		__field(dev_t,		dev			)
-+		__field(__u64,		offset			)
-+	),
-+
-+	TP_fast_assign(
-+		__entry->dev		= ca->disk_sb.bdev->bd_dev;
-+		__entry->offset		= offset,
-+		__entry->sectors	= sectors;
-+	),
-+
-+	TP_printk("invalidated %u sectors at %d,%d sector=%llu",
-+		  __entry->sectors, MAJOR(__entry->dev),
-+		  MINOR(__entry->dev), __entry->offset)
-+);
-+
-+DEFINE_EVENT(bch_fs, rescale_prios,
-+	TP_PROTO(struct bch_fs *c),
-+	TP_ARGS(c)
-+);
-+
-+DECLARE_EVENT_CLASS(bucket_alloc,
-+	TP_PROTO(struct bch_dev *ca, enum alloc_reserve reserve),
-+	TP_ARGS(ca, reserve),
-+
-+	TP_STRUCT__entry(
-+		__array(char,			uuid,	16)
-+		__field(enum alloc_reserve,	reserve	  )
-+	),
-+
-+	TP_fast_assign(
-+		memcpy(__entry->uuid, ca->uuid.b, 16);
-+		__entry->reserve = reserve;
-+	),
-+
-+	TP_printk("%pU reserve %d", __entry->uuid, __entry->reserve)
-+);
-+
-+DEFINE_EVENT(bucket_alloc, bucket_alloc,
-+	TP_PROTO(struct bch_dev *ca, enum alloc_reserve reserve),
-+	TP_ARGS(ca, reserve)
-+);
-+
-+DEFINE_EVENT(bucket_alloc, bucket_alloc_fail,
-+	TP_PROTO(struct bch_dev *ca, enum alloc_reserve reserve),
-+	TP_ARGS(ca, reserve)
-+);
-+
-+DEFINE_EVENT(bucket_alloc, open_bucket_alloc_fail,
-+	TP_PROTO(struct bch_dev *ca, enum alloc_reserve reserve),
-+	TP_ARGS(ca, reserve)
-+);
-+
-+/* Moving IO */
-+
-+DEFINE_EVENT(bkey, move_extent,
-+	TP_PROTO(const struct bkey *k),
-+	TP_ARGS(k)
-+);
-+
-+DEFINE_EVENT(bkey, move_alloc_fail,
-+	TP_PROTO(const struct bkey *k),
-+	TP_ARGS(k)
-+);
-+
-+DEFINE_EVENT(bkey, move_race,
-+	TP_PROTO(const struct bkey *k),
-+	TP_ARGS(k)
-+);
-+
-+TRACE_EVENT(move_data,
-+	TP_PROTO(struct bch_fs *c, u64 sectors_moved,
-+		 u64 keys_moved),
-+	TP_ARGS(c, sectors_moved, keys_moved),
-+
-+	TP_STRUCT__entry(
-+		__array(char,		uuid,	16	)
-+		__field(u64,		sectors_moved	)
-+		__field(u64,		keys_moved	)
-+	),
-+
-+	TP_fast_assign(
-+		memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
-+		__entry->sectors_moved = sectors_moved;
-+		__entry->keys_moved = keys_moved;
-+	),
-+
-+	TP_printk("%pU sectors_moved %llu keys_moved %llu",
-+		__entry->uuid, __entry->sectors_moved, __entry->keys_moved)
-+);
-+
-+TRACE_EVENT(copygc,
-+	TP_PROTO(struct bch_fs *c,
-+		 u64 sectors_moved, u64 sectors_not_moved,
-+		 u64 buckets_moved, u64 buckets_not_moved),
-+	TP_ARGS(c,
-+		sectors_moved, sectors_not_moved,
-+		buckets_moved, buckets_not_moved),
-+
-+	TP_STRUCT__entry(
-+		__array(char,		uuid,	16		)
-+		__field(u64,		sectors_moved		)
-+		__field(u64,		sectors_not_moved	)
-+		__field(u64,		buckets_moved		)
-+		__field(u64,		buckets_not_moved	)
-+	),
-+
-+	TP_fast_assign(
-+		memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
-+		__entry->sectors_moved		= sectors_moved;
-+		__entry->sectors_not_moved	= sectors_not_moved;
-+		__entry->buckets_moved		= buckets_moved;
-+		__entry->buckets_not_moved = buckets_moved;
-+	),
-+
-+	TP_printk("%pU sectors moved %llu remain %llu buckets moved %llu remain %llu",
-+		__entry->uuid,
-+		__entry->sectors_moved, __entry->sectors_not_moved,
-+		__entry->buckets_moved, __entry->buckets_not_moved)
-+);
-+
-+TRACE_EVENT(transaction_restart_ip,
-+	TP_PROTO(unsigned long caller, unsigned long ip),
-+	TP_ARGS(caller, ip),
-+
-+	TP_STRUCT__entry(
-+		__field(unsigned long,		caller	)
-+		__field(unsigned long,		ip	)
-+	),
-+
-+	TP_fast_assign(
-+		__entry->caller	= caller;
-+		__entry->ip	= ip;
-+	),
-+
-+	TP_printk("%pF %pF", (void *) __entry->caller, (void *) __entry->ip)
-+);
-+
-+DECLARE_EVENT_CLASS(transaction_restart,
-+	TP_PROTO(unsigned long ip),
-+	TP_ARGS(ip),
-+
-+	TP_STRUCT__entry(
-+		__field(unsigned long,		ip	)
-+	),
-+
-+	TP_fast_assign(
-+		__entry->ip = ip;
-+	),
-+
-+	TP_printk("%pf", (void *) __entry->ip)
-+);
-+
-+DEFINE_EVENT(transaction_restart,	trans_restart_btree_node_reused,
-+	TP_PROTO(unsigned long ip),
-+	TP_ARGS(ip)
-+);
-+
-+DEFINE_EVENT(transaction_restart,	trans_restart_would_deadlock,
-+	TP_PROTO(unsigned long ip),
-+	TP_ARGS(ip)
-+);
-+
-+TRACE_EVENT(trans_restart_iters_realloced,
-+	TP_PROTO(unsigned long ip, unsigned nr),
-+	TP_ARGS(ip, nr),
-+
-+	TP_STRUCT__entry(
-+		__field(unsigned long,		ip	)
-+		__field(unsigned,		nr	)
-+	),
-+
-+	TP_fast_assign(
-+		__entry->ip	= ip;
-+		__entry->nr	= nr;
-+	),
-+
-+	TP_printk("%pf nr %u", (void *) __entry->ip, __entry->nr)
-+);
-+
-+TRACE_EVENT(trans_restart_mem_realloced,
-+	TP_PROTO(unsigned long ip, unsigned long bytes),
-+	TP_ARGS(ip, bytes),
-+
-+	TP_STRUCT__entry(
-+		__field(unsigned long,		ip	)
-+		__field(unsigned long,		bytes	)
-+	),
-+
-+	TP_fast_assign(
-+		__entry->ip	= ip;
-+		__entry->bytes	= bytes;
-+	),
-+
-+	TP_printk("%pf bytes %lu", (void *) __entry->ip, __entry->bytes)
-+);
-+
-+DEFINE_EVENT(transaction_restart,	trans_restart_journal_res_get,
-+	TP_PROTO(unsigned long ip),
-+	TP_ARGS(ip)
-+);
-+
-+DEFINE_EVENT(transaction_restart,	trans_restart_journal_preres_get,
-+	TP_PROTO(unsigned long ip),
-+	TP_ARGS(ip)
-+);
-+
-+DEFINE_EVENT(transaction_restart,	trans_restart_mark_replicas,
-+	TP_PROTO(unsigned long ip),
-+	TP_ARGS(ip)
-+);
-+
-+DEFINE_EVENT(transaction_restart,	trans_restart_fault_inject,
-+	TP_PROTO(unsigned long ip),
-+	TP_ARGS(ip)
-+);
-+
-+DEFINE_EVENT(transaction_restart,	trans_restart_btree_node_split,
-+	TP_PROTO(unsigned long ip),
-+	TP_ARGS(ip)
-+);
-+
-+DEFINE_EVENT(transaction_restart,	trans_restart_mark,
-+	TP_PROTO(unsigned long ip),
-+	TP_ARGS(ip)
-+);
-+
-+DEFINE_EVENT(transaction_restart,	trans_restart_upgrade,
-+	TP_PROTO(unsigned long ip),
-+	TP_ARGS(ip)
-+);
-+
-+DEFINE_EVENT(transaction_restart,	trans_restart_iter_upgrade,
-+	TP_PROTO(unsigned long ip),
-+	TP_ARGS(ip)
-+);
-+
-+DEFINE_EVENT(transaction_restart,	trans_restart_traverse,
-+	TP_PROTO(unsigned long ip),
-+	TP_ARGS(ip)
-+);
-+
-+DEFINE_EVENT(transaction_restart,	trans_restart_atomic,
-+	TP_PROTO(unsigned long ip),
-+	TP_ARGS(ip)
-+);
-+
-+DECLARE_EVENT_CLASS(node_lock_fail,
-+	TP_PROTO(unsigned level, u32 iter_seq, unsigned node, u32 node_seq),
-+	TP_ARGS(level, iter_seq, node, node_seq),
-+
-+	TP_STRUCT__entry(
-+		__field(u32,		level)
-+		__field(u32,		iter_seq)
-+		__field(u32,		node)
-+		__field(u32,		node_seq)
-+	),
-+
-+	TP_fast_assign(
-+		__entry->level		= level;
-+		__entry->iter_seq	= iter_seq;
-+		__entry->node		= node;
-+		__entry->node_seq	= node_seq;
-+	),
-+
-+	TP_printk("level %u iter seq %u node %u node seq %u",
-+		  __entry->level, __entry->iter_seq,
-+		  __entry->node, __entry->node_seq)
-+);
-+
-+DEFINE_EVENT(node_lock_fail, node_upgrade_fail,
-+	TP_PROTO(unsigned level, u32 iter_seq, unsigned node, u32 node_seq),
-+	TP_ARGS(level, iter_seq, node, node_seq)
-+);
-+
-+DEFINE_EVENT(node_lock_fail, node_relock_fail,
-+	TP_PROTO(unsigned level, u32 iter_seq, unsigned node, u32 node_seq),
-+	TP_ARGS(level, iter_seq, node, node_seq)
-+);
-+
-+#endif /* _TRACE_BCACHE_H */
-+
-+/* This part must be outside protection */
-+#include <trace/define_trace.h>
-diff --git a/init/init_task.c b/init/init_task.c
-index f6889fce64af..94706c45bb6a 100644
---- a/init/init_task.c
-+++ b/init/init_task.c
-@@ -84,6 +84,7 @@ struct task_struct init_task
- 	.nr_cpus_allowed= NR_CPUS,
- 	.mm		= NULL,
- 	.active_mm	= &init_mm,
-+	.faults_disabled_mapping = NULL,
- 	.restart_block	= {
- 		.fn = do_no_restart_syscall,
- 	},
-diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
-index 3de8fd11873b..ab8aa082ce56 100644
---- a/kernel/Kconfig.locks
-+++ b/kernel/Kconfig.locks
-@@ -259,3 +259,6 @@ config ARCH_HAS_MMIOWB
- config MMIOWB
- 	def_bool y if ARCH_HAS_MMIOWB
- 	depends on SMP
-+
-+config SIXLOCKS
-+	bool
-diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
-index 6d11cfb9b41f..4c13937e8f37 100644
---- a/kernel/locking/Makefile
-+++ b/kernel/locking/Makefile
-@@ -32,3 +32,4 @@ obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o
- obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o
- obj-$(CONFIG_WW_MUTEX_SELFTEST) += test-ww_mutex.o
- obj-$(CONFIG_LOCK_EVENT_COUNTS) += lock_events.o
-+obj-$(CONFIG_SIXLOCKS) += six.o
-diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h
-index b0be1560ed17..6388e42cfd68 100644
---- a/kernel/locking/lockdep_internals.h
-+++ b/kernel/locking/lockdep_internals.h
-@@ -98,7 +98,7 @@ static const unsigned long LOCKF_USED_IN_IRQ_READ =
- #else
- #define MAX_LOCKDEP_ENTRIES	32768UL
- 
--#define MAX_LOCKDEP_CHAINS_BITS	16
-+#define MAX_LOCKDEP_CHAINS_BITS	18
- 
- /*
-  * Stack-trace: tightly packed array of stack backtrace
-@@ -116,7 +116,7 @@ static const unsigned long LOCKF_USED_IN_IRQ_READ =
- 
- #define MAX_LOCKDEP_CHAINS	(1UL << MAX_LOCKDEP_CHAINS_BITS)
- 
--#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5)
-+#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*10)
- 
- extern struct list_head all_lock_classes;
- extern struct lock_chain lock_chains[];
-diff --git a/kernel/locking/six.c b/kernel/locking/six.c
-new file mode 100644
-index 000000000000..49d46ed2e18e
---- /dev/null
-+++ b/kernel/locking/six.c
-@@ -0,0 +1,553 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include <linux/export.h>
-+#include <linux/log2.h>
-+#include <linux/preempt.h>
-+#include <linux/rcupdate.h>
-+#include <linux/sched.h>
-+#include <linux/sched/rt.h>
-+#include <linux/six.h>
-+
-+#ifdef DEBUG
-+#define EBUG_ON(cond)		BUG_ON(cond)
-+#else
-+#define EBUG_ON(cond)		do {} while (0)
-+#endif
-+
-+#define six_acquire(l, t)	lock_acquire(l, 0, t, 0, 0, NULL, _RET_IP_)
-+#define six_release(l)		lock_release(l, _RET_IP_)
-+
-+struct six_lock_vals {
-+	/* Value we add to the lock in order to take the lock: */
-+	u64			lock_val;
-+
-+	/* If the lock has this value (used as a mask), taking the lock fails: */
-+	u64			lock_fail;
-+
-+	/* Value we add to the lock in order to release the lock: */
-+	u64			unlock_val;
-+
-+	/* Mask that indicates lock is held for this type: */
-+	u64			held_mask;
-+
-+	/* Waitlist we wakeup when releasing the lock: */
-+	enum six_lock_type	unlock_wakeup;
-+};
-+
-+#define __SIX_LOCK_HELD_read	__SIX_VAL(read_lock, ~0)
-+#define __SIX_LOCK_HELD_intent	__SIX_VAL(intent_lock, ~0)
-+#define __SIX_LOCK_HELD_write	__SIX_VAL(seq, 1)
-+
-+#define LOCK_VALS {							\
-+	[SIX_LOCK_read] = {						\
-+		.lock_val	= __SIX_VAL(read_lock, 1),		\
-+		.lock_fail	= __SIX_LOCK_HELD_write,		\
-+		.unlock_val	= -__SIX_VAL(read_lock, 1),		\
-+		.held_mask	= __SIX_LOCK_HELD_read,			\
-+		.unlock_wakeup	= SIX_LOCK_write,			\
-+	},								\
-+	[SIX_LOCK_intent] = {						\
-+		.lock_val	= __SIX_VAL(intent_lock, 1),		\
-+		.lock_fail	= __SIX_LOCK_HELD_intent,		\
-+		.unlock_val	= -__SIX_VAL(intent_lock, 1),		\
-+		.held_mask	= __SIX_LOCK_HELD_intent,		\
-+		.unlock_wakeup	= SIX_LOCK_intent,			\
-+	},								\
-+	[SIX_LOCK_write] = {						\
-+		.lock_val	= __SIX_VAL(seq, 1),			\
-+		.lock_fail	= __SIX_LOCK_HELD_read,			\
-+		.unlock_val	= __SIX_VAL(seq, 1),			\
-+		.held_mask	= __SIX_LOCK_HELD_write,		\
-+		.unlock_wakeup	= SIX_LOCK_read,			\
-+	},								\
-+}
-+
-+static inline void six_set_owner(struct six_lock *lock, enum six_lock_type type,
-+				 union six_lock_state old)
-+{
-+	if (type != SIX_LOCK_intent)
-+		return;
-+
-+	if (!old.intent_lock) {
-+		EBUG_ON(lock->owner);
-+		lock->owner = current;
-+	} else {
-+		EBUG_ON(lock->owner != current);
-+	}
-+}
-+
-+static __always_inline bool do_six_trylock_type(struct six_lock *lock,
-+						enum six_lock_type type)
-+{
-+	const struct six_lock_vals l[] = LOCK_VALS;
-+	union six_lock_state old;
-+	u64 v = READ_ONCE(lock->state.v);
-+
-+	EBUG_ON(type == SIX_LOCK_write && lock->owner != current);
-+
-+	do {
-+		old.v = v;
-+
-+		EBUG_ON(type == SIX_LOCK_write &&
-+			((old.v & __SIX_LOCK_HELD_write) ||
-+			 !(old.v & __SIX_LOCK_HELD_intent)));
-+
-+		if (old.v & l[type].lock_fail)
-+			return false;
-+	} while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
-+				old.v,
-+				old.v + l[type].lock_val)) != old.v);
-+
-+	six_set_owner(lock, type, old);
-+	return true;
-+}
-+
-+__always_inline __flatten
-+static bool __six_trylock_type(struct six_lock *lock, enum six_lock_type type)
-+{
-+	if (!do_six_trylock_type(lock, type))
-+		return false;
-+
-+	if (type != SIX_LOCK_write)
-+		six_acquire(&lock->dep_map, 1);
-+	return true;
-+}
-+
-+__always_inline __flatten
-+static bool __six_relock_type(struct six_lock *lock, enum six_lock_type type,
-+			      unsigned seq)
-+{
-+	const struct six_lock_vals l[] = LOCK_VALS;
-+	union six_lock_state old;
-+	u64 v = READ_ONCE(lock->state.v);
-+
-+	do {
-+		old.v = v;
-+
-+		if (old.seq != seq || old.v & l[type].lock_fail)
-+			return false;
-+	} while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
-+				old.v,
-+				old.v + l[type].lock_val)) != old.v);
-+
-+	six_set_owner(lock, type, old);
-+	if (type != SIX_LOCK_write)
-+		six_acquire(&lock->dep_map, 1);
-+	return true;
-+}
-+
-+struct six_lock_waiter {
-+	struct list_head	list;
-+	struct task_struct	*task;
-+};
-+
-+/* This is probably up there with the more evil things I've done */
-+#define waitlist_bitnr(id) ilog2((((union six_lock_state) { .waiters = 1 << (id) }).l))
-+
-+#ifdef CONFIG_LOCK_SPIN_ON_OWNER
-+
-+static inline int six_can_spin_on_owner(struct six_lock *lock)
-+{
-+	struct task_struct *owner;
-+	int retval = 1;
-+
-+	if (need_resched())
-+		return 0;
-+
-+	rcu_read_lock();
-+	owner = READ_ONCE(lock->owner);
-+	if (owner)
-+		retval = owner->on_cpu;
-+	rcu_read_unlock();
-+	/*
-+	 * if lock->owner is not set, the mutex owner may have just acquired
-+	 * it and not set the owner yet or the mutex has been released.
-+	 */
-+	return retval;
-+}
-+
-+static inline bool six_spin_on_owner(struct six_lock *lock,
-+				     struct task_struct *owner)
-+{
-+	bool ret = true;
-+
-+	rcu_read_lock();
-+	while (lock->owner == owner) {
-+		/*
-+		 * Ensure we emit the owner->on_cpu, dereference _after_
-+		 * checking lock->owner still matches owner. If that fails,
-+		 * owner might point to freed memory. If it still matches,
-+		 * the rcu_read_lock() ensures the memory stays valid.
-+		 */
-+		barrier();
-+
-+		if (!owner->on_cpu || need_resched()) {
-+			ret = false;
-+			break;
-+		}
-+
-+		cpu_relax();
-+	}
-+	rcu_read_unlock();
-+
-+	return ret;
-+}
-+
-+static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
-+{
-+	struct task_struct *task = current;
-+
-+	if (type == SIX_LOCK_write)
-+		return false;
-+
-+	preempt_disable();
-+	if (!six_can_spin_on_owner(lock))
-+		goto fail;
-+
-+	if (!osq_lock(&lock->osq))
-+		goto fail;
-+
-+	while (1) {
-+		struct task_struct *owner;
-+
-+		/*
-+		 * If there's an owner, wait for it to either
-+		 * release the lock or go to sleep.
-+		 */
-+		owner = READ_ONCE(lock->owner);
-+		if (owner && !six_spin_on_owner(lock, owner))
-+			break;
-+
-+		if (do_six_trylock_type(lock, type)) {
-+			osq_unlock(&lock->osq);
-+			preempt_enable();
-+			return true;
-+		}
-+
-+		/*
-+		 * When there's no owner, we might have preempted between the
-+		 * owner acquiring the lock and setting the owner field. If
-+		 * we're an RT task that will live-lock because we won't let
-+		 * the owner complete.
-+		 */
-+		if (!owner && (need_resched() || rt_task(task)))
-+			break;
-+
-+		/*
-+		 * The cpu_relax() call is a compiler barrier which forces
-+		 * everything in this loop to be re-loaded. We don't need
-+		 * memory barriers as we'll eventually observe the right
-+		 * values at the cost of a few extra spins.
-+		 */
-+		cpu_relax();
-+	}
-+
-+	osq_unlock(&lock->osq);
-+fail:
-+	preempt_enable();
-+
-+	/*
-+	 * If we fell out of the spin path because of need_resched(),
-+	 * reschedule now, before we try-lock again. This avoids getting
-+	 * scheduled out right after we obtained the lock.
-+	 */
-+	if (need_resched())
-+		schedule();
-+
-+	return false;
-+}
-+
-+#else /* CONFIG_LOCK_SPIN_ON_OWNER */
-+
-+static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
-+{
-+	return false;
-+}
-+
-+#endif
-+
-+noinline
-+static int __six_lock_type_slowpath(struct six_lock *lock, enum six_lock_type type,
-+				    six_lock_should_sleep_fn should_sleep_fn, void *p)
-+{
-+	const struct six_lock_vals l[] = LOCK_VALS;
-+	union six_lock_state old, new;
-+	struct six_lock_waiter wait;
-+	int ret = 0;
-+	u64 v;
-+
-+	ret = should_sleep_fn ? should_sleep_fn(lock, p) : 0;
-+	if (ret)
-+		return ret;
-+
-+	if (six_optimistic_spin(lock, type))
-+		return 0;
-+
-+	lock_contended(&lock->dep_map, _RET_IP_);
-+
-+	INIT_LIST_HEAD(&wait.list);
-+	wait.task = current;
-+
-+	while (1) {
-+		set_current_state(TASK_UNINTERRUPTIBLE);
-+		if (type == SIX_LOCK_write)
-+			EBUG_ON(lock->owner != current);
-+		else if (list_empty_careful(&wait.list)) {
-+			raw_spin_lock(&lock->wait_lock);
-+			list_add_tail(&wait.list, &lock->wait_list[type]);
-+			raw_spin_unlock(&lock->wait_lock);
-+		}
-+
-+		ret = should_sleep_fn ? should_sleep_fn(lock, p) : 0;
-+		if (ret)
-+			break;
-+
-+		v = READ_ONCE(lock->state.v);
-+		do {
-+			new.v = old.v = v;
-+
-+			if (!(old.v & l[type].lock_fail))
-+				new.v += l[type].lock_val;
-+			else if (!(new.waiters & (1 << type)))
-+				new.waiters |= 1 << type;
-+			else
-+				break; /* waiting bit already set */
-+		} while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
-+					old.v, new.v)) != old.v);
-+
-+		if (!(old.v & l[type].lock_fail))
-+			break;
-+
-+		schedule();
-+	}
-+
-+	if (!ret)
-+		six_set_owner(lock, type, old);
-+
-+	__set_current_state(TASK_RUNNING);
-+
-+	if (!list_empty_careful(&wait.list)) {
-+		raw_spin_lock(&lock->wait_lock);
-+		list_del_init(&wait.list);
-+		raw_spin_unlock(&lock->wait_lock);
-+	}
-+
-+	return ret;
-+}
-+
-+__always_inline
-+static int __six_lock_type(struct six_lock *lock, enum six_lock_type type,
-+			   six_lock_should_sleep_fn should_sleep_fn, void *p)
-+{
-+	int ret;
-+
-+	if (type != SIX_LOCK_write)
-+		six_acquire(&lock->dep_map, 0);
-+
-+	ret = do_six_trylock_type(lock, type) ? 0
-+		: __six_lock_type_slowpath(lock, type, should_sleep_fn, p);
-+
-+	if (ret && type != SIX_LOCK_write)
-+		six_release(&lock->dep_map);
-+	if (!ret)
-+		lock_acquired(&lock->dep_map, _RET_IP_);
-+
-+	return ret;
-+}
-+
-+static inline void six_lock_wakeup(struct six_lock *lock,
-+				   union six_lock_state state,
-+				   unsigned waitlist_id)
-+{
-+	struct list_head *wait_list = &lock->wait_list[waitlist_id];
-+	struct six_lock_waiter *w, *next;
-+
-+	if (waitlist_id == SIX_LOCK_write && state.read_lock)
-+		return;
-+
-+	if (!(state.waiters & (1 << waitlist_id)))
-+		return;
-+
-+	clear_bit(waitlist_bitnr(waitlist_id),
-+		  (unsigned long *) &lock->state.v);
-+
-+	if (waitlist_id == SIX_LOCK_write) {
-+		struct task_struct *p = READ_ONCE(lock->owner);
-+
-+		if (p)
-+			wake_up_process(p);
-+		return;
-+	}
-+
-+	raw_spin_lock(&lock->wait_lock);
-+
-+	list_for_each_entry_safe(w, next, wait_list, list) {
-+		list_del_init(&w->list);
-+
-+		if (wake_up_process(w->task) &&
-+		    waitlist_id != SIX_LOCK_read) {
-+			if (!list_empty(wait_list))
-+				set_bit(waitlist_bitnr(waitlist_id),
-+					(unsigned long *) &lock->state.v);
-+			break;
-+		}
-+	}
-+
-+	raw_spin_unlock(&lock->wait_lock);
-+}
-+
-+__always_inline __flatten
-+static void __six_unlock_type(struct six_lock *lock, enum six_lock_type type)
-+{
-+	const struct six_lock_vals l[] = LOCK_VALS;
-+	union six_lock_state state;
-+
-+	EBUG_ON(!(lock->state.v & l[type].held_mask));
-+	EBUG_ON(type == SIX_LOCK_write &&
-+		!(lock->state.v & __SIX_LOCK_HELD_intent));
-+
-+	if (type != SIX_LOCK_write)
-+		six_release(&lock->dep_map);
-+
-+	if (type == SIX_LOCK_intent) {
-+		EBUG_ON(lock->owner != current);
-+
-+		if (lock->intent_lock_recurse) {
-+			--lock->intent_lock_recurse;
-+			return;
-+		}
-+
-+		lock->owner = NULL;
-+	}
-+
-+	state.v = atomic64_add_return_release(l[type].unlock_val,
-+					      &lock->state.counter);
-+	six_lock_wakeup(lock, state, l[type].unlock_wakeup);
-+}
-+
-+#define __SIX_LOCK(type)						\
-+bool six_trylock_##type(struct six_lock *lock)				\
-+{									\
-+	return __six_trylock_type(lock, SIX_LOCK_##type);		\
-+}									\
-+EXPORT_SYMBOL_GPL(six_trylock_##type);					\
-+									\
-+bool six_relock_##type(struct six_lock *lock, u32 seq)			\
-+{									\
-+	return __six_relock_type(lock, SIX_LOCK_##type, seq);		\
-+}									\
-+EXPORT_SYMBOL_GPL(six_relock_##type);					\
-+									\
-+int six_lock_##type(struct six_lock *lock,				\
-+		    six_lock_should_sleep_fn should_sleep_fn, void *p)	\
-+{									\
-+	return __six_lock_type(lock, SIX_LOCK_##type, should_sleep_fn, p);\
-+}									\
-+EXPORT_SYMBOL_GPL(six_lock_##type);					\
-+									\
-+void six_unlock_##type(struct six_lock *lock)				\
-+{									\
-+	__six_unlock_type(lock, SIX_LOCK_##type);			\
-+}									\
-+EXPORT_SYMBOL_GPL(six_unlock_##type);
-+
-+__SIX_LOCK(read)
-+__SIX_LOCK(intent)
-+__SIX_LOCK(write)
-+
-+#undef __SIX_LOCK
-+
-+/* Convert from intent to read: */
-+void six_lock_downgrade(struct six_lock *lock)
-+{
-+	six_lock_increment(lock, SIX_LOCK_read);
-+	six_unlock_intent(lock);
-+}
-+EXPORT_SYMBOL_GPL(six_lock_downgrade);
-+
-+bool six_lock_tryupgrade(struct six_lock *lock)
-+{
-+	const struct six_lock_vals l[] = LOCK_VALS;
-+	union six_lock_state old, new;
-+	u64 v = READ_ONCE(lock->state.v);
-+
-+	do {
-+		new.v = old.v = v;
-+
-+		EBUG_ON(!(old.v & l[SIX_LOCK_read].held_mask));
-+
-+		new.v += l[SIX_LOCK_read].unlock_val;
-+
-+		if (new.v & l[SIX_LOCK_intent].lock_fail)
-+			return false;
-+
-+		new.v += l[SIX_LOCK_intent].lock_val;
-+	} while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
-+				old.v, new.v)) != old.v);
-+
-+	six_set_owner(lock, SIX_LOCK_intent, old);
-+	six_lock_wakeup(lock, new, l[SIX_LOCK_read].unlock_wakeup);
-+
-+	return true;
-+}
-+EXPORT_SYMBOL_GPL(six_lock_tryupgrade);
-+
-+bool six_trylock_convert(struct six_lock *lock,
-+			 enum six_lock_type from,
-+			 enum six_lock_type to)
-+{
-+	EBUG_ON(to == SIX_LOCK_write || from == SIX_LOCK_write);
-+
-+	if (to == from)
-+		return true;
-+
-+	if (to == SIX_LOCK_read) {
-+		six_lock_downgrade(lock);
-+		return true;
-+	} else {
-+		return six_lock_tryupgrade(lock);
-+	}
-+}
-+EXPORT_SYMBOL_GPL(six_trylock_convert);
-+
-+/*
-+ * Increment read/intent lock count, assuming we already have it read or intent
-+ * locked:
-+ */
-+void six_lock_increment(struct six_lock *lock, enum six_lock_type type)
-+{
-+	const struct six_lock_vals l[] = LOCK_VALS;
-+
-+	EBUG_ON(type == SIX_LOCK_write);
-+	six_acquire(&lock->dep_map, 0);
-+
-+	/* XXX: assert already locked, and that we don't overflow: */
-+
-+	switch (type) {
-+	case SIX_LOCK_read:
-+		atomic64_add(l[type].lock_val, &lock->state.counter);
-+		break;
-+	case SIX_LOCK_intent:
-+		lock->intent_lock_recurse++;
-+		break;
-+	case SIX_LOCK_write:
-+		BUG();
-+		break;
-+	}
-+}
-+EXPORT_SYMBOL_GPL(six_lock_increment);
-+
-+void six_lock_wakeup_all(struct six_lock *lock)
-+{
-+	struct six_lock_waiter *w;
-+
-+	raw_spin_lock(&lock->wait_lock);
-+
-+	list_for_each_entry(w, &lock->wait_list[0], list)
-+		wake_up_process(w->task);
-+	list_for_each_entry(w, &lock->wait_list[1], list)
-+		wake_up_process(w->task);
-+
-+	raw_spin_unlock(&lock->wait_lock);
-+}
-+EXPORT_SYMBOL_GPL(six_lock_wakeup_all);
-diff --git a/kernel/module.c b/kernel/module.c
-index 1c5cff34d9f2..8f9f37b0bfaa 100644
---- a/kernel/module.c
-+++ b/kernel/module.c
-@@ -2830,9 +2830,7 @@ static void dynamic_debug_remove(struct module *mod, struct _ddebug *debug)
- 
- void * __weak module_alloc(unsigned long size)
- {
--	return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
--			GFP_KERNEL, PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS,
--			NUMA_NO_NODE, __builtin_return_address(0));
-+	return vmalloc_exec(size, GFP_KERNEL);
- }
- 
- bool __weak module_init_section(const char *name)
-diff --git a/lib/Kconfig b/lib/Kconfig
-index b4b98a03ff98..7ec0b400c545 100644
---- a/lib/Kconfig
-+++ b/lib/Kconfig
-@@ -461,6 +461,9 @@ config ASSOCIATIVE_ARRAY
- 
- 	  for more information.
- 
-+config CLOSURES
-+	bool
-+
- config HAS_IOMEM
- 	bool
- 	depends on !NO_IOMEM
-diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
-index 0c781f912f9f..efe645766784 100644
---- a/lib/Kconfig.debug
-+++ b/lib/Kconfig.debug
-@@ -1517,6 +1517,15 @@ config DEBUG_CREDENTIALS
- 
- source "kernel/rcu/Kconfig.debug"
- 
-+config DEBUG_CLOSURES
-+	bool "Debug closures (bcache async widgits)"
-+	depends on CLOSURES
-+	select DEBUG_FS
-+	help
-+	Keeps all active closures in a linked list and provides a debugfs
-+	interface to list them, which makes it possible to see asynchronous
-+	operations that get stuck.
-+
- config DEBUG_WQ_FORCE_RR_CPU
- 	bool "Force round-robin CPU selection for unbound work items"
- 	depends on DEBUG_KERNEL
-diff --git a/lib/Makefile b/lib/Makefile
-index a4a4c6864f51..dfefe98c29ec 100644
---- a/lib/Makefile
-+++ b/lib/Makefile
-@@ -234,6 +234,8 @@ obj-$(CONFIG_ATOMIC64_SELFTEST) += atomic64_test.o
- 
- obj-$(CONFIG_CPU_RMAP) += cpu_rmap.o
- 
-+obj-$(CONFIG_CLOSURES) += closure.o
-+
- obj-$(CONFIG_DQL) += dynamic_queue_limits.o
- 
- obj-$(CONFIG_GLOB) += glob.o
-diff --git a/lib/closure.c b/lib/closure.c
-new file mode 100644
-index 000000000000..3e6366c26209
---- /dev/null
-+++ b/lib/closure.c
-@@ -0,0 +1,214 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Asynchronous refcounty things
-+ *
-+ * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
-+ * Copyright 2012 Google, Inc.
-+ */
-+
-+#include <linux/closure.h>
-+#include <linux/debugfs.h>
-+#include <linux/export.h>
-+#include <linux/seq_file.h>
-+#include <linux/sched/debug.h>
-+
-+static inline void closure_put_after_sub(struct closure *cl, int flags)
-+{
-+	int r = flags & CLOSURE_REMAINING_MASK;
-+
-+	BUG_ON(flags & CLOSURE_GUARD_MASK);
-+	BUG_ON(!r && (flags & ~CLOSURE_DESTRUCTOR));
-+
-+	if (!r) {
-+		if (cl->fn && !(flags & CLOSURE_DESTRUCTOR)) {
-+			atomic_set(&cl->remaining,
-+				   CLOSURE_REMAINING_INITIALIZER);
-+			closure_queue(cl);
-+		} else {
-+			struct closure *parent = cl->parent;
-+			closure_fn *destructor = cl->fn;
-+
-+			closure_debug_destroy(cl);
-+
-+			if (destructor)
-+				destructor(cl);
-+
-+			if (parent)
-+				closure_put(parent);
-+		}
-+	}
-+}
-+
-+/* For clearing flags with the same atomic op as a put */
-+void closure_sub(struct closure *cl, int v)
-+{
-+	closure_put_after_sub(cl, atomic_sub_return(v, &cl->remaining));
-+}
-+EXPORT_SYMBOL(closure_sub);
-+
-+/*
-+ * closure_put - decrement a closure's refcount
-+ */
-+void closure_put(struct closure *cl)
-+{
-+	closure_put_after_sub(cl, atomic_dec_return(&cl->remaining));
-+}
-+EXPORT_SYMBOL(closure_put);
-+
-+/*
-+ * closure_wake_up - wake up all closures on a wait list, without memory barrier
-+ */
-+void __closure_wake_up(struct closure_waitlist *wait_list)
-+{
-+	struct llist_node *list;
-+	struct closure *cl, *t;
-+	struct llist_node *reverse = NULL;
-+
-+	list = llist_del_all(&wait_list->list);
-+
-+	/* We first reverse the list to preserve FIFO ordering and fairness */
-+	reverse = llist_reverse_order(list);
-+
-+	/* Then do the wakeups */
-+	llist_for_each_entry_safe(cl, t, reverse, list) {
-+		closure_set_waiting(cl, 0);
-+		closure_sub(cl, CLOSURE_WAITING + 1);
-+	}
-+}
-+EXPORT_SYMBOL(__closure_wake_up);
-+
-+/**
-+ * closure_wait - add a closure to a waitlist
-+ * @waitlist: will own a ref on @cl, which will be released when
-+ * closure_wake_up() is called on @waitlist.
-+ * @cl: closure pointer.
-+ *
-+ */
-+bool closure_wait(struct closure_waitlist *waitlist, struct closure *cl)
-+{
-+	if (atomic_read(&cl->remaining) & CLOSURE_WAITING)
-+		return false;
-+
-+	closure_set_waiting(cl, _RET_IP_);
-+	atomic_add(CLOSURE_WAITING + 1, &cl->remaining);
-+	llist_add(&cl->list, &waitlist->list);
-+
-+	return true;
-+}
-+EXPORT_SYMBOL(closure_wait);
-+
-+struct closure_syncer {
-+	struct task_struct	*task;
-+	int			done;
-+};
-+
-+static void closure_sync_fn(struct closure *cl)
-+{
-+	struct closure_syncer *s = cl->s;
-+	struct task_struct *p;
-+
-+	rcu_read_lock();
-+	p = READ_ONCE(s->task);
-+	s->done = 1;
-+	wake_up_process(p);
-+	rcu_read_unlock();
-+}
-+
-+void __sched __closure_sync(struct closure *cl)
-+{
-+	struct closure_syncer s = { .task = current };
-+
-+	cl->s = &s;
-+	continue_at(cl, closure_sync_fn, NULL);
-+
-+	while (1) {
-+		set_current_state(TASK_UNINTERRUPTIBLE);
-+		if (s.done)
-+			break;
-+		schedule();
-+	}
-+
-+	__set_current_state(TASK_RUNNING);
-+}
-+EXPORT_SYMBOL(__closure_sync);
-+
-+#ifdef CONFIG_DEBUG_CLOSURES
-+
-+static LIST_HEAD(closure_list);
-+static DEFINE_SPINLOCK(closure_list_lock);
-+
-+void closure_debug_create(struct closure *cl)
-+{
-+	unsigned long flags;
-+
-+	BUG_ON(cl->magic == CLOSURE_MAGIC_ALIVE);
-+	cl->magic = CLOSURE_MAGIC_ALIVE;
-+
-+	spin_lock_irqsave(&closure_list_lock, flags);
-+	list_add(&cl->all, &closure_list);
-+	spin_unlock_irqrestore(&closure_list_lock, flags);
-+}
-+EXPORT_SYMBOL(closure_debug_create);
-+
-+void closure_debug_destroy(struct closure *cl)
-+{
-+	unsigned long flags;
-+
-+	BUG_ON(cl->magic != CLOSURE_MAGIC_ALIVE);
-+	cl->magic = CLOSURE_MAGIC_DEAD;
-+
-+	spin_lock_irqsave(&closure_list_lock, flags);
-+	list_del(&cl->all);
-+	spin_unlock_irqrestore(&closure_list_lock, flags);
-+}
-+EXPORT_SYMBOL(closure_debug_destroy);
-+
-+static int debug_seq_show(struct seq_file *f, void *data)
-+{
-+	struct closure *cl;
-+
-+	spin_lock_irq(&closure_list_lock);
-+
-+	list_for_each_entry(cl, &closure_list, all) {
-+		int r = atomic_read(&cl->remaining);
-+
-+		seq_printf(f, "%p: %pS -> %pS p %p r %i ",
-+			   cl, (void *) cl->ip, cl->fn, cl->parent,
-+			   r & CLOSURE_REMAINING_MASK);
-+
-+		seq_printf(f, "%s%s\n",
-+			   test_bit(WORK_STRUCT_PENDING_BIT,
-+				    work_data_bits(&cl->work)) ? "Q" : "",
-+			   r & CLOSURE_RUNNING	? "R" : "");
-+
-+		if (r & CLOSURE_WAITING)
-+			seq_printf(f, " W %pS\n",
-+				   (void *) cl->waiting_on);
-+
-+		seq_puts(f, "\n");
-+	}
-+
-+	spin_unlock_irq(&closure_list_lock);
-+	return 0;
-+}
-+
-+static int debug_seq_open(struct inode *inode, struct file *file)
-+{
-+	return single_open(file, debug_seq_show, NULL);
-+}
-+
-+static const struct file_operations debug_ops = {
-+	.owner		= THIS_MODULE,
-+	.open		= debug_seq_open,
-+	.read		= seq_read,
-+	.release	= single_release
-+};
-+
-+static int __init closure_debug_init(void)
-+{
-+	debugfs_create_file("closures", 0400, NULL, NULL, &debug_ops);
-+	return 0;
-+}
-+late_initcall(closure_debug_init)
-+
-+#endif
-diff --git a/mm/filemap.c b/mm/filemap.c
-index 99c49eeae71b..a5a07767a2eb 100644
---- a/mm/filemap.c
-+++ b/mm/filemap.c
-@@ -117,6 +117,69 @@
-  *   ->tasklist_lock            (memory_failure, collect_procs_ao)
-  */
- 
-+static int page_cache_tree_insert_vec(struct page *pages[],
-+				      unsigned nr_pages,
-+				      struct address_space *mapping,
-+				      pgoff_t index,
-+				      gfp_t gfp_mask,
-+				      void *shadow[])
-+{
-+	XA_STATE(xas, &mapping->i_pages, index);
-+	void *old;
-+	int i = 0, error = 0;
-+
-+	mapping_set_update(&xas, mapping);
-+
-+	if (!nr_pages)
-+		return 0;
-+
-+	xa_lock_irq(&mapping->i_pages);
-+
-+	while (1) {
-+		old = xas_load(&xas);
-+		if (old && !xa_is_value(old)) {
-+			error = -EEXIST;
-+			break;
-+		}
-+
-+		xas_store(&xas, pages[i]);
-+		error = xas_error(&xas);
-+
-+		if (error == -ENOMEM) {
-+			xa_unlock_irq(&mapping->i_pages);
-+			if (xas_nomem(&xas, gfp_mask & GFP_RECLAIM_MASK))
-+				error = 0;
-+			xa_lock_irq(&mapping->i_pages);
-+
-+			if (!error)
-+				continue;
-+			break;
-+		}
-+
-+		if (error)
-+			break;
-+
-+		if (shadow)
-+			shadow[i] = old;
-+		if (xa_is_value(old))
-+			mapping->nrexceptional--;
-+		mapping->nrpages++;
-+
-+		/* hugetlb pages do not participate in page cache accounting. */
-+		if (!PageHuge(pages[i]))
-+			__inc_lruvec_page_state(pages[i], NR_FILE_PAGES);
-+
-+		if (++i == nr_pages)
-+			break;
-+
-+		xas_next(&xas);
-+	}
-+
-+	xa_unlock_irq(&mapping->i_pages);
-+
-+	return i ?: error;
-+}
-+
- static void page_cache_delete(struct address_space *mapping,
- 				   struct page *page, void *shadow)
- {
-@@ -827,114 +890,148 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
- }
- EXPORT_SYMBOL_GPL(replace_page_cache_page);
- 
--static int __add_to_page_cache_locked(struct page *page,
--				      struct address_space *mapping,
--				      pgoff_t offset, gfp_t gfp_mask,
--				      void **shadowp)
-+static int add_to_page_cache_vec(struct page **pages, unsigned nr_pages,
-+				 struct address_space *mapping,
-+				 pgoff_t index, gfp_t gfp_mask,
-+				 void *shadow[])
- {
--	XA_STATE(xas, &mapping->i_pages, offset);
--	int huge = PageHuge(page);
--	int error;
--	void *old;
-+	int i, nr_added = 0, error = 0;
- 
--	VM_BUG_ON_PAGE(!PageLocked(page), page);
--	VM_BUG_ON_PAGE(PageSwapBacked(page), page);
--	mapping_set_update(&xas, mapping);
-+	for (i = 0; i < nr_pages; i++) {
-+		struct page *page = pages[i];
- 
--	get_page(page);
--	page->mapping = mapping;
--	page->index = offset;
-+		VM_BUG_ON_PAGE(PageSwapBacked(page), page);
-+		VM_BUG_ON_PAGE(PageSwapCache(page), page);
- 
--	if (!huge) {
--		error = mem_cgroup_charge(page, current->mm, gfp_mask);
--		if (error)
--			goto error;
-+		__SetPageLocked(page);
-+		get_page(page);
-+		page->mapping = mapping;
-+		page->index = index + i;
-+
-+		if (!PageHuge(page)) {
-+			error = mem_cgroup_charge(page, current->mm, gfp_mask);
-+			if (error) {
-+				page->mapping = NULL;
-+				/* Leave page->index set: truncation relies upon it */
-+				put_page(page);
-+				__ClearPageLocked(page);
-+				if (!i)
-+					return error;
-+				nr_pages = i;
-+				break;
-+			}
-+		}
- 	}
- 
--	do {
--		xas_lock_irq(&xas);
--		old = xas_load(&xas);
--		if (old && !xa_is_value(old))
--			xas_set_err(&xas, -EEXIST);
--		xas_store(&xas, page);
--		if (xas_error(&xas))
--			goto unlock;
-+	error = page_cache_tree_insert_vec(pages, nr_pages, mapping,
-+					   index, gfp_mask, shadow);
-+	if (error > 0) {
-+		nr_added = error;
-+		error = 0;
-+	}
- 
--		if (xa_is_value(old)) {
--			mapping->nrexceptional--;
--			if (shadowp)
--				*shadowp = old;
--		}
--		mapping->nrpages++;
-+	for (i = 0; i < nr_added; i++)
-+		trace_mm_filemap_add_to_page_cache(pages[i]);
- 
--		/* hugetlb pages do not participate in page cache accounting */
--		if (!huge)
--			__inc_lruvec_page_state(page, NR_FILE_PAGES);
--unlock:
--		xas_unlock_irq(&xas);
--	} while (xas_nomem(&xas, gfp_mask & GFP_RECLAIM_MASK));
-+	for (i = nr_added; i < nr_pages; i++) {
-+		struct page *page = pages[i];
- 
--	if (xas_error(&xas)) {
--		error = xas_error(&xas);
--		goto error;
-+		/* Leave page->index set: truncation relies upon it */
-+		page->mapping = NULL;
-+		put_page(page);
-+		__ClearPageLocked(page);
- 	}
- 
--	trace_mm_filemap_add_to_page_cache(page);
--	return 0;
--error:
--	page->mapping = NULL;
--	/* Leave page->index set: truncation relies upon it */
--	put_page(page);
--	return error;
-+	return nr_added ?: error;
- }
--ALLOW_ERROR_INJECTION(__add_to_page_cache_locked, ERRNO);
-+ALLOW_ERROR_INJECTION(__add_to_page_cache, ERRNO);
- 
- /**
-- * add_to_page_cache_locked - add a locked page to the pagecache
-+ * add_to_page_cache - add a newly allocated page to the pagecache
-  * @page:	page to add
-  * @mapping:	the page's address_space
-  * @offset:	page index
-  * @gfp_mask:	page allocation mode
-  *
-- * This function is used to add a page to the pagecache. It must be locked.
-- * This function does not add the page to the LRU.  The caller must do that.
-+ * This function is used to add a page to the pagecache. It must be newly
-+ * allocated.  This function does not add the page to the LRU.  The caller must
-+ * do that.
-  *
-  * Return: %0 on success, negative error code otherwise.
-  */
--int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
--		pgoff_t offset, gfp_t gfp_mask)
-+int add_to_page_cache(struct page *page, struct address_space *mapping,
-+		      pgoff_t offset, gfp_t gfp_mask)
- {
--	return __add_to_page_cache_locked(page, mapping, offset,
--					  gfp_mask, NULL);
-+	int ret = add_to_page_cache_vec(&page, 1, mapping, offset,
-+					gfp_mask, NULL);
-+	if (ret < 0)
-+		return ret;
-+	return 0;
- }
--EXPORT_SYMBOL(add_to_page_cache_locked);
-+EXPORT_SYMBOL(add_to_page_cache);
-+ALLOW_ERROR_INJECTION(add_to_page_cache, ERRNO);
- 
--int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
--				pgoff_t offset, gfp_t gfp_mask)
-+int add_to_page_cache_lru_vec(struct address_space *mapping,
-+			      struct page **pages,
-+			      unsigned nr_pages,
-+			      pgoff_t offset, gfp_t gfp_mask)
- {
--	void *shadow = NULL;
--	int ret;
-+	void *shadow_stack[8], **shadow = shadow_stack;
-+	int i, ret = 0, err = 0, nr_added;
-+
-+	if (nr_pages > ARRAY_SIZE(shadow_stack)) {
-+		shadow = kmalloc_array(nr_pages, sizeof(void *), gfp_mask);
-+		if (!shadow)
-+			goto slowpath;
-+	}
-+
-+	for (i = 0; i < nr_pages; i++)
-+		VM_BUG_ON_PAGE(PageActive(pages[i]), pages[i]);
-+
-+	ret = add_to_page_cache_vec(pages, nr_pages, mapping,
-+				    offset, gfp_mask, shadow);
-+	nr_added = ret > 0 ? ret : 0;
-+
-+	/*
-+	 * The page might have been evicted from cache only recently, in which
-+	 * case it should be activated like any other repeatedly accessed page.
-+	 * The exception is pages getting rewritten; evicting other data from
-+	 * the working set, only to cache data that will get overwritten with
-+	 * something else, is a waste of memory.
-+	 */
-+	for (i = 0; i < nr_added; i++) {
-+		struct page *page = pages[i];
-+		void *s = shadow[i];
- 
--	__SetPageLocked(page);
--	ret = __add_to_page_cache_locked(page, mapping, offset,
--					 gfp_mask, &shadow);
--	if (unlikely(ret))
--		__ClearPageLocked(page);
--	else {
--		/*
--		 * The page might have been evicted from cache only
--		 * recently, in which case it should be activated like
--		 * any other repeatedly accessed page.
--		 * The exception is pages getting rewritten; evicting other
--		 * data from the working set, only to cache data that will
--		 * get overwritten with something else, is a waste of memory.
--		 */
- 		WARN_ON_ONCE(PageActive(page));
--		if (!(gfp_mask & __GFP_WRITE) && shadow)
--			workingset_refault(page, shadow);
-+		if (!(gfp_mask & __GFP_WRITE) && s)
-+			workingset_refault(page, s);
- 		lru_cache_add(page);
- 	}
-+
-+	if (shadow != shadow_stack)
-+		kfree(shadow);
-+
- 	return ret;
-+slowpath:
-+	for (i = 0; i < nr_pages; i++) {
-+		err = add_to_page_cache_lru(pages[i], mapping,
-+					    offset + i, gfp_mask);
-+		if (err)
-+			break;
-+	}
-+
-+	return i ?: err;
-+}
-+EXPORT_SYMBOL_GPL(add_to_page_cache_lru_vec);
-+
-+int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
-+				pgoff_t offset, gfp_t gfp_mask)
-+{
-+	int ret = add_to_page_cache_lru_vec(mapping, &page, 1, offset, gfp_mask);
-+	if (ret < 0)
-+		return ret;
-+	return 0;
- }
- EXPORT_SYMBOL_GPL(add_to_page_cache_lru);
- 
-@@ -1990,6 +2087,7 @@ unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start,
- 
- 	return ret;
- }
-+EXPORT_SYMBOL(find_get_pages_range);
- 
- /**
-  * find_get_pages_contig - gang contiguous pagecache lookup
-@@ -2138,6 +2236,259 @@ static void shrink_readahead_size_eio(struct file_ra_state *ra)
- 	ra->ra_pages /= 4;
- }
- 
-+static int lock_page_for_iocb(struct kiocb *iocb, struct page *page)
-+{
-+	if (iocb->ki_flags & IOCB_WAITQ)
-+		return lock_page_async(page, iocb->ki_waitq);
-+	else if (iocb->ki_flags & IOCB_NOWAIT)
-+		return trylock_page(page) ? 0 : -EAGAIN;
-+	else
-+		return lock_page_killable(page);
-+}
-+
-+static struct page *
-+generic_file_buffered_read_readpage(struct kiocb *iocb,
-+				    struct file *filp,
-+				    struct address_space *mapping,
-+				    struct page *page)
-+{
-+	struct file_ra_state *ra = &filp->f_ra;
-+	int error;
-+
-+	if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT)) {
-+		unlock_page(page);
-+		put_page(page);
-+		return ERR_PTR(-EAGAIN);
-+	}
-+
-+	/*
-+	 * A previous I/O error may have been due to temporary
-+	 * failures, eg. multipath errors.
-+	 * PG_error will be set again if readpage fails.
-+	 */
-+	ClearPageError(page);
-+	/* Start the actual read. The read will unlock the page. */
-+	error = mapping->a_ops->readpage(filp, page);
-+
-+	if (unlikely(error)) {
-+		put_page(page);
-+		return error != AOP_TRUNCATED_PAGE ? ERR_PTR(error) : NULL;
-+	}
-+
-+	if (!PageUptodate(page)) {
-+		error = lock_page_for_iocb(iocb, page);
-+		if (unlikely(error)) {
-+			put_page(page);
-+			return ERR_PTR(error);
-+		}
-+		if (!PageUptodate(page)) {
-+			if (page->mapping == NULL) {
-+				/*
-+				 * invalidate_mapping_pages got it
-+				 */
-+				unlock_page(page);
-+				put_page(page);
-+				return NULL;
-+			}
-+			unlock_page(page);
-+			shrink_readahead_size_eio(ra);
-+			put_page(page);
-+			return ERR_PTR(-EIO);
-+		}
-+		unlock_page(page);
-+	}
-+
-+	return page;
-+}
-+
-+static struct page *
-+generic_file_buffered_read_pagenotuptodate(struct kiocb *iocb,
-+					   struct file *filp,
-+					   struct iov_iter *iter,
-+					   struct page *page,
-+					   loff_t pos, loff_t count)
-+{
-+	struct address_space *mapping = filp->f_mapping;
-+	struct inode *inode = mapping->host;
-+	int error;
-+
-+	/*
-+	 * See comment in do_read_cache_page on why
-+	 * wait_on_page_locked is used to avoid unnecessarily
-+	 * serialisations and why it's safe.
-+	 */
-+	if (iocb->ki_flags & IOCB_WAITQ) {
-+		error = wait_on_page_locked_async(page,
-+						iocb->ki_waitq);
-+	} else {
-+		error = wait_on_page_locked_killable(page);
-+	}
-+	if (unlikely(error)) {
-+		put_page(page);
-+		return ERR_PTR(error);
-+	}
-+	if (PageUptodate(page))
-+		return page;
-+
-+	if (inode->i_blkbits == PAGE_SHIFT ||
-+			!mapping->a_ops->is_partially_uptodate)
-+		goto page_not_up_to_date;
-+	/* pipes can't handle partially uptodate pages */
-+	if (unlikely(iov_iter_is_pipe(iter)))
-+		goto page_not_up_to_date;
-+	if (!trylock_page(page))
-+		goto page_not_up_to_date;
-+	/* Did it get truncated before we got the lock? */
-+	if (!page->mapping)
-+		goto page_not_up_to_date_locked;
-+	if (!mapping->a_ops->is_partially_uptodate(page,
-+				pos & ~PAGE_MASK, count))
-+		goto page_not_up_to_date_locked;
-+	unlock_page(page);
-+	return page;
-+
-+page_not_up_to_date:
-+	/* Get exclusive access to the page ... */
-+	error = lock_page_for_iocb(iocb, page);
-+	if (unlikely(error)) {
-+		put_page(page);
-+		return ERR_PTR(error);
-+	}
-+
-+page_not_up_to_date_locked:
-+	/* Did it get truncated before we got the lock? */
-+	if (!page->mapping) {
-+		unlock_page(page);
-+		put_page(page);
-+		return NULL;
-+	}
-+
-+	/* Did somebody else fill it already? */
-+	if (PageUptodate(page)) {
-+		unlock_page(page);
-+		return page;
-+	}
-+
-+	return generic_file_buffered_read_readpage(iocb, filp, mapping, page);
-+}
-+
-+static struct page *
-+generic_file_buffered_read_no_cached_page(struct kiocb *iocb,
-+					  struct iov_iter *iter)
-+{
-+	struct file *filp = iocb->ki_filp;
-+	struct address_space *mapping = filp->f_mapping;
-+	pgoff_t index = iocb->ki_pos >> PAGE_SHIFT;
-+	struct page *page;
-+	int error;
-+
-+	if (iocb->ki_flags & IOCB_NOIO)
-+		return ERR_PTR(-EAGAIN);
-+
-+	/*
-+	 * Ok, it wasn't cached, so we need to create a new
-+	 * page..
-+	 */
-+	page = page_cache_alloc(mapping);
-+	if (!page)
-+		return ERR_PTR(-ENOMEM);
-+
-+	error = add_to_page_cache_lru(page, mapping, index,
-+				      mapping_gfp_constraint(mapping, GFP_KERNEL));
-+	if (error) {
-+		put_page(page);
-+		return error != -EEXIST ? ERR_PTR(error) : NULL;
-+	}
-+
-+	return generic_file_buffered_read_readpage(iocb, filp, mapping, page);
-+}
-+
-+static int generic_file_buffered_read_get_pages(struct kiocb *iocb,
-+						struct iov_iter *iter,
-+						struct page **pages,
-+						unsigned int nr)
-+{
-+	struct file *filp = iocb->ki_filp;
-+	struct address_space *mapping = filp->f_mapping;
-+	struct file_ra_state *ra = &filp->f_ra;
-+	pgoff_t index = iocb->ki_pos >> PAGE_SHIFT;
-+	pgoff_t last_index = (iocb->ki_pos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
-+	int i, j, nr_got, err = 0;
-+
-+	nr = min_t(unsigned long, last_index - index, nr);
-+find_page:
-+	if (fatal_signal_pending(current))
-+		return -EINTR;
-+
-+	nr_got = find_get_pages_contig(mapping, index, nr, pages);
-+	if (nr_got)
-+		goto got_pages;
-+
-+	if (iocb->ki_flags & IOCB_NOIO)
-+		return -EAGAIN;
-+
-+	page_cache_sync_readahead(mapping, ra, filp, index, last_index - index);
-+
-+	nr_got = find_get_pages_contig(mapping, index, nr, pages);
-+	if (nr_got)
-+		goto got_pages;
-+
-+	pages[0] = generic_file_buffered_read_no_cached_page(iocb, iter);
-+	err = PTR_ERR_OR_ZERO(pages[0]);
-+	if (!IS_ERR_OR_NULL(pages[0]))
-+		nr_got = 1;
-+got_pages:
-+	for (i = 0; i < nr_got; i++) {
-+		struct page *page = pages[i];
-+		pgoff_t pg_index = index + i;
-+		loff_t pg_pos = max(iocb->ki_pos,
-+				    (loff_t) pg_index << PAGE_SHIFT);
-+		loff_t pg_count = iocb->ki_pos + iter->count - pg_pos;
-+
-+		if (PageReadahead(page)) {
-+			if (iocb->ki_flags & IOCB_NOIO) {
-+				for (j = i; j < nr_got; j++)
-+					put_page(pages[j]);
-+				nr_got = i;
-+				err = -EAGAIN;
-+				break;
-+			}
-+			page_cache_async_readahead(mapping, ra, filp, page,
-+					pg_index, last_index - pg_index);
-+		}
-+
-+		if (!PageUptodate(page)) {
-+			if ((iocb->ki_flags & IOCB_NOWAIT) ||
-+			    ((iocb->ki_flags & IOCB_WAITQ) && i)) {
-+				for (j = i; j < nr_got; j++)
-+					put_page(pages[j]);
-+				nr_got = i;
-+				err = -EAGAIN;
-+				break;
-+			}
-+
-+			page = generic_file_buffered_read_pagenotuptodate(iocb,
-+					filp, iter, page, pg_pos, pg_count);
-+			if (IS_ERR_OR_NULL(page)) {
-+				for (j = i + 1; j < nr_got; j++)
-+					put_page(pages[j]);
-+				nr_got = i;
-+				err = PTR_ERR_OR_ZERO(page);
-+				break;
-+			}
-+		}
-+	}
-+
-+	if (likely(nr_got))
-+		return nr_got;
-+	if (err)
-+		return err;
-+	/*
-+	 * No pages and no error means we raced and should retry:
-+	 */
-+	goto find_page;
-+}
-+
- /**
-  * generic_file_buffered_read - generic file read routine
-  * @iocb:	the iocb to read
-@@ -2158,276 +2509,116 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
- 		struct iov_iter *iter, ssize_t written)
- {
- 	struct file *filp = iocb->ki_filp;
-+	struct file_ra_state *ra = &filp->f_ra;
- 	struct address_space *mapping = filp->f_mapping;
- 	struct inode *inode = mapping->host;
--	struct file_ra_state *ra = &filp->f_ra;
--	loff_t *ppos = &iocb->ki_pos;
--	pgoff_t index;
--	pgoff_t last_index;
--	pgoff_t prev_index;
--	unsigned long offset;      /* offset into pagecache page */
--	unsigned int prev_offset;
--	int error = 0;
--
--	if (unlikely(*ppos >= inode->i_sb->s_maxbytes))
-+	struct page *pages_onstack[PAGEVEC_SIZE], **pages = NULL;
-+	unsigned int nr_pages = min_t(unsigned int, 512,
-+			((iocb->ki_pos + iter->count + PAGE_SIZE - 1) >> PAGE_SHIFT) -
-+			(iocb->ki_pos >> PAGE_SHIFT));
-+	int i, pg_nr, error = 0;
-+	bool writably_mapped;
-+	loff_t isize, end_offset;
-+
-+	if (unlikely(iocb->ki_pos >= inode->i_sb->s_maxbytes))
- 		return 0;
- 	iov_iter_truncate(iter, inode->i_sb->s_maxbytes);
- 
--	index = *ppos >> PAGE_SHIFT;
--	prev_index = ra->prev_pos >> PAGE_SHIFT;
--	prev_offset = ra->prev_pos & (PAGE_SIZE-1);
--	last_index = (*ppos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
--	offset = *ppos & ~PAGE_MASK;
-+	if (nr_pages > ARRAY_SIZE(pages_onstack))
-+		pages = kmalloc_array(nr_pages, sizeof(void *), GFP_KERNEL);
- 
--	for (;;) {
--		struct page *page;
--		pgoff_t end_index;
--		loff_t isize;
--		unsigned long nr, ret;
-+	if (!pages) {
-+		pages = pages_onstack;
-+		nr_pages = min_t(unsigned int, nr_pages, ARRAY_SIZE(pages_onstack));
-+	}
- 
-+	do {
- 		cond_resched();
--find_page:
--		if (fatal_signal_pending(current)) {
--			error = -EINTR;
--			goto out;
--		}
- 
--		page = find_get_page(mapping, index);
--		if (!page) {
--			if (iocb->ki_flags & IOCB_NOIO)
--				goto would_block;
--			page_cache_sync_readahead(mapping,
--					ra, filp,
--					index, last_index - index);
--			page = find_get_page(mapping, index);
--			if (unlikely(page == NULL))
--				goto no_cached_page;
--		}
--		if (PageReadahead(page)) {
--			if (iocb->ki_flags & IOCB_NOIO) {
--				put_page(page);
--				goto out;
--			}
--			page_cache_async_readahead(mapping,
--					ra, filp, page,
--					index, last_index - index);
--		}
--		if (!PageUptodate(page)) {
--			/*
--			 * See comment in do_read_cache_page on why
--			 * wait_on_page_locked is used to avoid unnecessarily
--			 * serialisations and why it's safe.
--			 */
--			if (iocb->ki_flags & IOCB_WAITQ) {
--				if (written) {
--					put_page(page);
--					goto out;
--				}
--				error = wait_on_page_locked_async(page,
--								iocb->ki_waitq);
--			} else {
--				if (iocb->ki_flags & IOCB_NOWAIT) {
--					put_page(page);
--					goto would_block;
--				}
--				error = wait_on_page_locked_killable(page);
--			}
--			if (unlikely(error))
--				goto readpage_error;
--			if (PageUptodate(page))
--				goto page_ok;
--
--			if (inode->i_blkbits == PAGE_SHIFT ||
--					!mapping->a_ops->is_partially_uptodate)
--				goto page_not_up_to_date;
--			/* pipes can't handle partially uptodate pages */
--			if (unlikely(iov_iter_is_pipe(iter)))
--				goto page_not_up_to_date;
--			if (!trylock_page(page))
--				goto page_not_up_to_date;
--			/* Did it get truncated before we got the lock? */
--			if (!page->mapping)
--				goto page_not_up_to_date_locked;
--			if (!mapping->a_ops->is_partially_uptodate(page,
--							offset, iter->count))
--				goto page_not_up_to_date_locked;
--			unlock_page(page);
-+		/*
-+		 * We can't return -EIOCBQUEUED once we've done some work, so
-+		 * ensure we don't block:
-+		 */
-+		if ((iocb->ki_flags & IOCB_WAITQ) && written)
-+			iocb->ki_flags |= IOCB_NOWAIT;
-+
-+		i = 0;
-+		pg_nr = generic_file_buffered_read_get_pages(iocb, iter,
-+							     pages, nr_pages);
-+		if (pg_nr < 0) {
-+			error = pg_nr;
-+			break;
- 		}
--page_ok:
-+
- 		/*
--		 * i_size must be checked after we know the page is Uptodate.
-+		 * i_size must be checked after we know the pages are Uptodate.
- 		 *
- 		 * Checking i_size after the check allows us to calculate
- 		 * the correct value for "nr", which means the zero-filled
- 		 * part of the page is not copied back to userspace (unless
- 		 * another truncate extends the file - this is desired though).
- 		 */
--
- 		isize = i_size_read(inode);
--		end_index = (isize - 1) >> PAGE_SHIFT;
--		if (unlikely(!isize || index > end_index)) {
--			put_page(page);
--			goto out;
--		}
-+		if (unlikely(iocb->ki_pos >= isize))
-+			goto put_pages;
- 
--		/* nr is the maximum number of bytes to copy from this page */
--		nr = PAGE_SIZE;
--		if (index == end_index) {
--			nr = ((isize - 1) & ~PAGE_MASK) + 1;
--			if (nr <= offset) {
--				put_page(page);
--				goto out;
--			}
--		}
--		nr = nr - offset;
-+		end_offset = min_t(loff_t, isize, iocb->ki_pos + iter->count);
- 
--		/* If users can be writing to this page using arbitrary
--		 * virtual addresses, take care about potential aliasing
--		 * before reading the page on the kernel side.
--		 */
--		if (mapping_writably_mapped(mapping))
--			flush_dcache_page(page);
-+		while ((iocb->ki_pos >> PAGE_SHIFT) + pg_nr >
-+		       (end_offset + PAGE_SIZE - 1) >> PAGE_SHIFT)
-+			put_page(pages[--pg_nr]);
- 
- 		/*
--		 * When a sequential read accesses a page several times,
--		 * only mark it as accessed the first time.
-+		 * Once we start copying data, we don't want to be touching any
-+		 * cachelines that might be contended:
- 		 */
--		if (prev_index != index || offset != prev_offset)
--			mark_page_accessed(page);
--		prev_index = index;
-+		writably_mapped = mapping_writably_mapped(mapping);
- 
- 		/*
--		 * Ok, we have the page, and it's up-to-date, so
--		 * now we can copy it to user space...
-+		 * When a sequential read accesses a page several times, only
-+		 * mark it as accessed the first time.
- 		 */
-+		if (iocb->ki_pos >> PAGE_SHIFT !=
-+		    ra->prev_pos >> PAGE_SHIFT)
-+			mark_page_accessed(pages[0]);
-+		for (i = 1; i < pg_nr; i++)
-+			mark_page_accessed(pages[i]);
-+
-+		for (i = 0; i < pg_nr; i++) {
-+			unsigned int offset = iocb->ki_pos & ~PAGE_MASK;
-+			unsigned int bytes = min_t(loff_t, end_offset - iocb->ki_pos,
-+						   PAGE_SIZE - offset);
-+			unsigned int copied;
- 
--		ret = copy_page_to_iter(page, offset, nr, iter);
--		offset += ret;
--		index += offset >> PAGE_SHIFT;
--		offset &= ~PAGE_MASK;
--		prev_offset = offset;
--
--		put_page(page);
--		written += ret;
--		if (!iov_iter_count(iter))
--			goto out;
--		if (ret < nr) {
--			error = -EFAULT;
--			goto out;
--		}
--		continue;
--
--page_not_up_to_date:
--		/* Get exclusive access to the page ... */
--		if (iocb->ki_flags & IOCB_WAITQ)
--			error = lock_page_async(page, iocb->ki_waitq);
--		else
--			error = lock_page_killable(page);
--		if (unlikely(error))
--			goto readpage_error;
--
--page_not_up_to_date_locked:
--		/* Did it get truncated before we got the lock? */
--		if (!page->mapping) {
--			unlock_page(page);
--			put_page(page);
--			continue;
--		}
--
--		/* Did somebody else fill it already? */
--		if (PageUptodate(page)) {
--			unlock_page(page);
--			goto page_ok;
--		}
-+			/*
-+			 * If users can be writing to this page using arbitrary
-+			 * virtual addresses, take care about potential aliasing
-+			 * before reading the page on the kernel side.
-+			 */
-+			if (writably_mapped)
-+				flush_dcache_page(pages[i]);
- 
--readpage:
--		if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT)) {
--			unlock_page(page);
--			put_page(page);
--			goto would_block;
--		}
--		/*
--		 * A previous I/O error may have been due to temporary
--		 * failures, eg. multipath errors.
--		 * PG_error will be set again if readpage fails.
--		 */
--		ClearPageError(page);
--		/* Start the actual read. The read will unlock the page. */
--		error = mapping->a_ops->readpage(filp, page);
-+			copied = copy_page_to_iter(pages[i], offset, bytes, iter);
- 
--		if (unlikely(error)) {
--			if (error == AOP_TRUNCATED_PAGE) {
--				put_page(page);
--				error = 0;
--				goto find_page;
--			}
--			goto readpage_error;
--		}
-+			written += copied;
-+			iocb->ki_pos += copied;
-+			ra->prev_pos = iocb->ki_pos;
- 
--		if (!PageUptodate(page)) {
--			if (iocb->ki_flags & IOCB_WAITQ)
--				error = lock_page_async(page, iocb->ki_waitq);
--			else
--				error = lock_page_killable(page);
--
--			if (unlikely(error))
--				goto readpage_error;
--			if (!PageUptodate(page)) {
--				if (page->mapping == NULL) {
--					/*
--					 * invalidate_mapping_pages got it
--					 */
--					unlock_page(page);
--					put_page(page);
--					goto find_page;
--				}
--				unlock_page(page);
--				shrink_readahead_size_eio(ra);
--				error = -EIO;
--				goto readpage_error;
-+			if (copied < bytes) {
-+				error = -EFAULT;
-+				break;
- 			}
--			unlock_page(page);
- 		}
-+put_pages:
-+		for (i = 0; i < pg_nr; i++)
-+			put_page(pages[i]);
-+	} while (iov_iter_count(iter) && iocb->ki_pos < isize && !error);
- 
--		goto page_ok;
--
--readpage_error:
--		/* UHHUH! A synchronous read error occurred. Report it */
--		put_page(page);
--		goto out;
--
--no_cached_page:
--		/*
--		 * Ok, it wasn't cached, so we need to create a new
--		 * page..
--		 */
--		page = page_cache_alloc(mapping);
--		if (!page) {
--			error = -ENOMEM;
--			goto out;
--		}
--		error = add_to_page_cache_lru(page, mapping, index,
--				mapping_gfp_constraint(mapping, GFP_KERNEL));
--		if (error) {
--			put_page(page);
--			if (error == -EEXIST) {
--				error = 0;
--				goto find_page;
--			}
--			goto out;
--		}
--		goto readpage;
--	}
-+	file_accessed(filp);
- 
--would_block:
--	error = -EAGAIN;
--out:
--	ra->prev_pos = prev_index;
--	ra->prev_pos <<= PAGE_SHIFT;
--	ra->prev_pos |= prev_offset;
-+	if (pages != pages_onstack)
-+		kfree(pages);
- 
--	*ppos = ((loff_t)index << PAGE_SHIFT) + offset;
--	file_accessed(filp);
- 	return written ? written : error;
- }
- EXPORT_SYMBOL_GPL(generic_file_buffered_read);
-diff --git a/mm/gup.c b/mm/gup.c
-index e869c634cc9a..9bfb3e933deb 100644
---- a/mm/gup.c
-+++ b/mm/gup.c
-@@ -1085,6 +1085,13 @@ static long __get_user_pages(struct mm_struct *mm,
- 		}
- 		cond_resched();
- 
-+		if (current->faults_disabled_mapping &&
-+		    vma->vm_file &&
-+		    vma->vm_file->f_mapping == current->faults_disabled_mapping) {
-+			ret = -EFAULT;
-+			goto out;
-+		}
-+
- 		page = follow_page_mask(vma, start, foll_flags, &ctx);
- 		if (!page) {
- 			ret = faultin_page(vma, start, &foll_flags, locked);
-diff --git a/mm/nommu.c b/mm/nommu.c
-index 75a327149af1..fe0a77d01656 100644
---- a/mm/nommu.c
-+++ b/mm/nommu.c
-@@ -290,6 +290,24 @@ void *vzalloc_node(unsigned long size, int node)
- }
- EXPORT_SYMBOL(vzalloc_node);
- 
-+/**
-+ *	vmalloc_exec  -  allocate virtually contiguous, executable memory
-+ *	@size:		allocation size
-+ *
-+ *	Kernel-internal function to allocate enough pages to cover @size
-+ *	the page level allocator and map them into contiguous and
-+ *	executable kernel virtual space.
-+ *
-+ *	For tight control over page level allocator and protection flags
-+ *	use __vmalloc() instead.
-+ */
-+
-+void *vmalloc_exec(unsigned long size, gfp_t gfp_mask)
-+{
-+	return __vmalloc(size, gfp_mask);
-+}
-+EXPORT_SYMBOL_GPL(vmalloc_exec);
-+
- /**
-  * vmalloc_32  -  allocate virtually contiguous memory (32bit addressable)
-  *	@size:		allocation size
-diff --git a/mm/page-writeback.c b/mm/page-writeback.c
-index 4e4ddd67b71e..563cc766f511 100644
---- a/mm/page-writeback.c
-+++ b/mm/page-writeback.c
-@@ -2475,20 +2475,19 @@ int __set_page_dirty_nobuffers(struct page *page)
- 	lock_page_memcg(page);
- 	if (!TestSetPageDirty(page)) {
- 		struct address_space *mapping = page_mapping(page);
--		unsigned long flags;
- 
- 		if (!mapping) {
- 			unlock_page_memcg(page);
- 			return 1;
- 		}
- 
--		xa_lock_irqsave(&mapping->i_pages, flags);
-+		xa_lock_irq(&mapping->i_pages);
- 		BUG_ON(page_mapping(page) != mapping);
- 		WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
- 		account_page_dirtied(page, mapping);
- 		__xa_set_mark(&mapping->i_pages, page_index(page),
- 				   PAGECACHE_TAG_DIRTY);
--		xa_unlock_irqrestore(&mapping->i_pages, flags);
-+		xa_unlock_irq(&mapping->i_pages);
- 		unlock_page_memcg(page);
- 
- 		if (mapping->host) {
-diff --git a/mm/vmalloc.c b/mm/vmalloc.c
-index be4724b916b3..efd7f9dd1eb8 100644
---- a/mm/vmalloc.c
-+++ b/mm/vmalloc.c
-@@ -2665,6 +2665,27 @@ void *vzalloc_node(unsigned long size, int node)
- }
- EXPORT_SYMBOL(vzalloc_node);
- 
-+/**
-+ * vmalloc_exec - allocate virtually contiguous, executable memory
-+ * @size:	  allocation size
-+ *
-+ * Kernel-internal function to allocate enough pages to cover @size
-+ * the page level allocator and map them into contiguous and
-+ * executable kernel virtual space.
-+ *
-+ * For tight control over page level allocator and protection flags
-+ * use __vmalloc() instead.
-+ *
-+ * Return: pointer to the allocated memory or %NULL on error
-+ */
-+void *vmalloc_exec(unsigned long size, gfp_t gfp_mask)
-+{
-+	return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
-+			gfp_mask, PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS,
-+			NUMA_NO_NODE, __builtin_return_address(0));
-+}
-+EXPORT_SYMBOL_GPL(vmalloc_exec);
-+
- #if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
- #define GFP_VMALLOC32 (GFP_DMA32 | GFP_KERNEL)
- #elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA)
diff --git a/linux59-tkg/linux59-tkg-patches/0009-glitched-bmq.patch b/linux59-tkg/linux59-tkg-patches/0009-glitched-bmq.patch
deleted file mode 100644
index e42e522..0000000
--- a/linux59-tkg/linux59-tkg-patches/0009-glitched-bmq.patch
+++ /dev/null
@@ -1,90 +0,0 @@
-From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
-From: Tk-Glitch <ti3nou@gmail.com>
-Date: Wed, 4 Jul 2018 04:30:08 +0200
-Subject: glitched - BMQ
-
-diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
-index 2a202a846757..1d9c7ed79b11 100644
---- a/kernel/Kconfig.hz
-+++ b/kernel/Kconfig.hz
-@@ -4,7 +4,7 @@
- 
- choice
- 	prompt "Timer frequency"
--	default HZ_250
-+	default HZ_500
- 	help
- 	 Allows the configuration of the timer frequency. It is customary
- 	 to have the timer interrupt run at 1000 Hz but 100 Hz may be more
-@@ -39,6 +39,13 @@ choice
- 	 on SMP and NUMA systems and exactly dividing by both PAL and
- 	 NTSC frame rates for video and multimedia work.
- 
-+	config HZ_500
-+		bool "500 HZ"
-+	help
-+	 500 Hz is a balanced timer frequency. Provides fast interactivity
-+	 on desktops with great smoothness without increasing CPU power
-+	 consumption and sacrificing the battery life on laptops.
-+
- 	config HZ_1000
- 		bool "1000 HZ"
- 	help
-@@ -52,6 +59,7 @@ config HZ
- 	default 100 if HZ_100
- 	default 250 if HZ_250
- 	default 300 if HZ_300
-+	default 500 if HZ_500
- 	default 1000 if HZ_1000
- 
- config SCHED_HRTICK
-
-diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
-index 2a202a846757..1d9c7ed79b11 100644
---- a/kernel/Kconfig.hz
-+++ b/kernel/Kconfig.hz
-@@ -4,7 +4,7 @@
- 
- choice
- 	prompt "Timer frequency"
--	default HZ_500
-+	default HZ_750
- 	help
- 	 Allows the configuration of the timer frequency. It is customary
- 	 to have the timer interrupt run at 1000 Hz but 100 Hz may be more
-@@ -46,6 +46,13 @@ choice
- 	 on desktops with great smoothness without increasing CPU power
- 	 consumption and sacrificing the battery life on laptops.
- 
-+	config HZ_750
-+		bool "750 HZ"
-+	help
-+	 750 Hz is a good timer frequency for desktops. Provides fast
-+	 interactivity with great smoothness without sacrificing too
-+	 much throughput.
-+
- 	config HZ_1000
- 		bool "1000 HZ"
- 	help
-@@ -60,6 +67,7 @@ config HZ
- 	default 250 if HZ_250
- 	default 300 if HZ_300
- 	default 500 if HZ_500
-+	default 750 if HZ_750
- 	default 1000 if HZ_1000
- 
- config SCHED_HRTICK
-
-diff --git a/mm/vmscan.c b/mm/vmscan.c
-index 9270a4370d54..30d01e647417 100644
---- a/mm/vmscan.c
-+++ b/mm/vmscan.c
-@@ -169,7 +169,7 @@  
- /*
-  * From 0 .. 200.  Higher means more swappy.
-  */
--int vm_swappiness = 60;
-+int vm_swappiness = 20;
- 
- static void set_task_reclaim_state(struct task_struct *task,
- 				   struct reclaim_state *rs)
diff --git a/linux59-tkg/linux59-tkg-patches/0009-glitched-ondemand-bmq.patch b/linux59-tkg/linux59-tkg-patches/0009-glitched-ondemand-bmq.patch
deleted file mode 100644
index a926040..0000000
--- a/linux59-tkg/linux59-tkg-patches/0009-glitched-ondemand-bmq.patch
+++ /dev/null
@@ -1,18 +0,0 @@
-diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
-index 6b423eebfd5d..61e3271675d6 100644
---- a/drivers/cpufreq/cpufreq_ondemand.c
-+++ b/drivers/cpufreq/cpufreq_ondemand.c
-@@ -21,10 +21,10 @@
- #include "cpufreq_ondemand.h"
- 
- /* On-demand governor macros */
--#define DEF_FREQUENCY_UP_THRESHOLD		(80)
--#define DEF_SAMPLING_DOWN_FACTOR		(1)
-+#define DEF_FREQUENCY_UP_THRESHOLD		(55)
-+#define DEF_SAMPLING_DOWN_FACTOR		(5)
- #define MAX_SAMPLING_DOWN_FACTOR		(100000)
--#define MICRO_FREQUENCY_UP_THRESHOLD		(95)
-+#define MICRO_FREQUENCY_UP_THRESHOLD		(63)
- #define MICRO_FREQUENCY_MIN_SAMPLE_RATE		(10000)
- #define MIN_FREQUENCY_UP_THRESHOLD		(1)
- #define MAX_FREQUENCY_UP_THRESHOLD		(100)
diff --git a/linux59-tkg/linux59-tkg-patches/0009-prjc_v5.9-r0.patch b/linux59-tkg/linux59-tkg-patches/0009-prjc_v5.9-r0.patch
deleted file mode 100644
index 550d29c..0000000
--- a/linux59-tkg/linux59-tkg-patches/0009-prjc_v5.9-r0.patch
+++ /dev/null
@@ -1,8809 +0,0 @@
-diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
-index a1068742a6df..b97a9697fde4 100644
---- a/Documentation/admin-guide/kernel-parameters.txt
-+++ b/Documentation/admin-guide/kernel-parameters.txt
-@@ -4611,6 +4611,12 @@
- 
- 	sbni=		[NET] Granch SBNI12 leased line adapter
- 
-+	sched_timeslice=
-+			[KNL] Time slice in us for BMQ/PDS scheduler.
-+			Format: <int> (must be >= 1000)
-+			Default: 4000
-+			See Documentation/scheduler/sched-BMQ.txt
-+
- 	sched_debug	[KNL] Enables verbose scheduler debug messages.
- 
- 	schedstats=	[KNL,X86] Enable or disable scheduled statistics.
-diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
-index d4b32cc32bb7..14118e5168ef 100644
---- a/Documentation/admin-guide/sysctl/kernel.rst
-+++ b/Documentation/admin-guide/sysctl/kernel.rst
-@@ -1515,3 +1515,13 @@ is 10 seconds.
- 
- The softlockup threshold is (``2 * watchdog_thresh``). Setting this
- tunable to zero will disable lockup detection altogether.
-+
-+yield_type:
-+===========
-+
-+BMQ/PDS CPU scheduler only. This determines what type of yield calls
-+to sched_yield will perform.
-+
-+  0 - No yield.
-+  1 - Deboost and requeue task. (default)
-+  2 - Set run queue skip task.
-diff --git a/Documentation/scheduler/sched-BMQ.txt b/Documentation/scheduler/sched-BMQ.txt
-new file mode 100644
-index 000000000000..05c84eec0f31
---- /dev/null
-+++ b/Documentation/scheduler/sched-BMQ.txt
-@@ -0,0 +1,110 @@
-+                         BitMap queue CPU Scheduler
-+                         --------------------------
-+
-+CONTENT
-+========
-+
-+ Background
-+ Design
-+   Overview
-+   Task policy
-+   Priority management
-+   BitMap Queue
-+   CPU Assignment and Migration
-+
-+
-+Background
-+==========
-+
-+BitMap Queue CPU scheduler, referred to as BMQ from here on, is an evolution
-+of previous Priority and Deadline based Skiplist multiple queue scheduler(PDS),
-+and inspired by Zircon scheduler. The goal of it is to keep the scheduler code
-+simple, while efficiency and scalable for interactive tasks, such as desktop,
-+movie playback and gaming etc.
-+
-+Design
-+======
-+
-+Overview
-+--------
-+
-+BMQ use per CPU run queue design, each CPU(logical) has it's own run queue,
-+each CPU is responsible for scheduling the tasks that are putting into it's
-+run queue.
-+
-+The run queue is a set of priority queues. Note that these queues are fifo
-+queue for non-rt tasks or priority queue for rt tasks in data structure. See
-+BitMap Queue below for details. BMQ is optimized for non-rt tasks in the fact
-+that most applications are non-rt tasks. No matter the queue is fifo or
-+priority, In each queue is an ordered list of runnable tasks awaiting execution
-+and the data structures are the same. When it is time for a new task to run,
-+the scheduler simply looks the lowest numbered queueue that contains a task,
-+and runs the first task from the head of that queue. And per CPU idle task is
-+also in the run queue, so the scheduler can always find a task to run on from
-+its run queue.
-+
-+Each task will assigned the same timeslice(default 4ms) when it is picked to
-+start running. Task will be reinserted at the end of the appropriate priority
-+queue when it uses its whole timeslice. When the scheduler selects a new task
-+from the priority queue it sets the CPU's preemption timer for the remainder of
-+the previous timeslice. When that timer fires the scheduler will stop execution
-+on that task, select another task and start over again.
-+
-+If a task blocks waiting for a shared resource then it's taken out of its
-+priority queue and is placed in a wait queue for the shared resource. When it
-+is unblocked it will be reinserted in the appropriate priority queue of an
-+eligible CPU.
-+
-+Task policy
-+-----------
-+
-+BMQ supports DEADLINE, FIFO, RR, NORMAL, BATCH and IDLE task policy like the
-+mainline CFS scheduler. But BMQ is heavy optimized for non-rt task, that's
-+NORMAL/BATCH/IDLE policy tasks. Below is the implementation detail of each
-+policy.
-+
-+DEADLINE
-+	It is squashed as priority 0 FIFO task.
-+
-+FIFO/RR
-+	All RT tasks share one single priority queue in BMQ run queue designed. The
-+complexity of insert operation is O(n). BMQ is not designed for system runs
-+with major rt policy tasks.
-+
-+NORMAL/BATCH/IDLE
-+	BATCH and IDLE tasks are treated as the same policy. They compete CPU with
-+NORMAL policy tasks, but they just don't boost. To control the priority of
-+NORMAL/BATCH/IDLE tasks, simply use nice level.
-+
-+ISO
-+	ISO policy is not supported in BMQ. Please use nice level -20 NORMAL policy
-+task instead.
-+
-+Priority management
-+-------------------
-+
-+RT tasks have priority from 0-99. For non-rt tasks, there are three different
-+factors used to determine the effective priority of a task. The effective
-+priority being what is used to determine which queue it will be in.
-+
-+The first factor is simply the task’s static priority. Which is assigned from
-+task's nice level, within [-20, 19] in userland's point of view and [0, 39]
-+internally.
-+
-+The second factor is the priority boost. This is a value bounded between
-+[-MAX_PRIORITY_ADJ, MAX_PRIORITY_ADJ] used to offset the base priority, it is
-+modified by the following cases:
-+
-+*When a thread has used up its entire timeslice, always deboost its boost by
-+increasing by one.
-+*When a thread gives up cpu control(voluntary or non-voluntary) to reschedule,
-+and its switch-in time(time after last switch and run) below the thredhold
-+based on its priority boost, will boost its boost by decreasing by one buti is
-+capped at 0 (won’t go negative).
-+
-+The intent in this system is to ensure that interactive threads are serviced
-+quickly. These are usually the threads that interact directly with the user
-+and cause user-perceivable latency. These threads usually do little work and
-+spend most of their time blocked awaiting another user event. So they get the
-+priority boost from unblocking while background threads that do most of the
-+processing receive the priority penalty for using their entire timeslice.
-diff --git a/fs/proc/base.c b/fs/proc/base.c
-index 617db4e0faa0..f85926764f9a 100644
---- a/fs/proc/base.c
-+++ b/fs/proc/base.c
-@@ -479,7 +479,7 @@ static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns,
- 		seq_puts(m, "0 0 0\n");
- 	else
- 		seq_printf(m, "%llu %llu %lu\n",
--		   (unsigned long long)task->se.sum_exec_runtime,
-+		   (unsigned long long)tsk_seruntime(task),
- 		   (unsigned long long)task->sched_info.run_delay,
- 		   task->sched_info.pcount);
- 
-diff --git a/include/asm-generic/resource.h b/include/asm-generic/resource.h
-index 8874f681b056..59eb72bf7d5f 100644
---- a/include/asm-generic/resource.h
-+++ b/include/asm-generic/resource.h
-@@ -23,7 +23,7 @@
- 	[RLIMIT_LOCKS]		= {  RLIM_INFINITY,  RLIM_INFINITY },	\
- 	[RLIMIT_SIGPENDING]	= { 		0,	       0 },	\
- 	[RLIMIT_MSGQUEUE]	= {   MQ_BYTES_MAX,   MQ_BYTES_MAX },	\
--	[RLIMIT_NICE]		= { 0, 0 },				\
-+	[RLIMIT_NICE]		= { 30, 30 },				\
- 	[RLIMIT_RTPRIO]		= { 0, 0 },				\
- 	[RLIMIT_RTTIME]		= {  RLIM_INFINITY,  RLIM_INFINITY },	\
- }
-diff --git a/include/linux/sched.h b/include/linux/sched.h
-index afe01e232935..8918609cb9f0 100644
---- a/include/linux/sched.h
-+++ b/include/linux/sched.h
-@@ -34,6 +34,7 @@
- #include <linux/rseq.h>
- #include <linux/seqlock.h>
- #include <linux/kcsan.h>
-+#include <linux/skip_list.h>
- 
- /* task_struct member predeclarations (sorted alphabetically): */
- struct audit_context;
-@@ -652,12 +653,18 @@ struct task_struct {
- 	unsigned int			ptrace;
- 
- #ifdef CONFIG_SMP
--	int				on_cpu;
- 	struct __call_single_node	wake_entry;
-+#endif
-+#if defined(CONFIG_SMP) || defined(CONFIG_SCHED_ALT)
-+	int				on_cpu;
-+#endif
-+
-+#ifdef CONFIG_SMP
- #ifdef CONFIG_THREAD_INFO_IN_TASK
- 	/* Current CPU: */
- 	unsigned int			cpu;
- #endif
-+#ifndef CONFIG_SCHED_ALT
- 	unsigned int			wakee_flips;
- 	unsigned long			wakee_flip_decay_ts;
- 	struct task_struct		*last_wakee;
-@@ -671,6 +678,7 @@ struct task_struct {
- 	 */
- 	int				recent_used_cpu;
- 	int				wake_cpu;
-+#endif /* !CONFIG_SCHED_ALT */
- #endif
- 	int				on_rq;
- 
-@@ -679,13 +687,33 @@ struct task_struct {
- 	int				normal_prio;
- 	unsigned int			rt_priority;
- 
-+#ifdef CONFIG_SCHED_ALT
-+	u64				last_ran;
-+	s64				time_slice;
-+#ifdef CONFIG_SCHED_BMQ
-+	int				boost_prio;
-+	int				bmq_idx;
-+	struct list_head		bmq_node;
-+#endif /* CONFIG_SCHED_BMQ */
-+#ifdef CONFIG_SCHED_PDS
-+	u64				deadline;
-+	u64				priodl;
-+	/* skip list level */
-+	int				sl_level;
-+	/* skip list node */
-+	struct skiplist_node		sl_node;
-+#endif /* CONFIG_SCHED_PDS */
-+	/* sched_clock time spent running */
-+	u64				sched_time;
-+#else /* !CONFIG_SCHED_ALT */
- 	const struct sched_class	*sched_class;
- 	struct sched_entity		se;
- 	struct sched_rt_entity		rt;
-+	struct sched_dl_entity		dl;
-+#endif
- #ifdef CONFIG_CGROUP_SCHED
- 	struct task_group		*sched_task_group;
- #endif
--	struct sched_dl_entity		dl;
- 
- #ifdef CONFIG_UCLAMP_TASK
- 	/*
-@@ -1332,6 +1360,15 @@ struct task_struct {
- 	 */
- };
- 
-+#ifdef CONFIG_SCHED_ALT
-+#define tsk_seruntime(t)		((t)->sched_time)
-+/* replace the uncertian rt_timeout with 0UL */
-+#define tsk_rttimeout(t)		(0UL)
-+#else /* CFS */
-+#define tsk_seruntime(t)	((t)->se.sum_exec_runtime)
-+#define tsk_rttimeout(t)	((t)->rt.timeout)
-+#endif /* !CONFIG_SCHED_ALT */
-+
- static inline struct pid *task_pid(struct task_struct *task)
- {
- 	return task->thread_pid;
-diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h
-index 1aff00b65f3c..179d77c8360e 100644
---- a/include/linux/sched/deadline.h
-+++ b/include/linux/sched/deadline.h
-@@ -1,5 +1,24 @@
- /* SPDX-License-Identifier: GPL-2.0 */
- 
-+#ifdef CONFIG_SCHED_ALT
-+
-+static inline int dl_task(struct task_struct *p)
-+{
-+	return 0;
-+}
-+
-+#ifdef CONFIG_SCHED_BMQ
-+#define __tsk_deadline(p)	(0UL)
-+#endif
-+
-+#ifdef CONFIG_SCHED_PDS
-+#define __tsk_deadline(p)	((p)->priodl)
-+#endif
-+
-+#else
-+
-+#define __tsk_deadline(p)	((p)->dl.deadline)
-+
- /*
-  * SCHED_DEADLINE tasks has negative priorities, reflecting
-  * the fact that any of them has higher prio than RT and
-@@ -19,6 +38,7 @@ static inline int dl_task(struct task_struct *p)
- {
- 	return dl_prio(p->prio);
- }
-+#endif /* CONFIG_SCHED_ALT */
- 
- static inline bool dl_time_before(u64 a, u64 b)
- {
-diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h
-index 7d64feafc408..42730d27ceb5 100644
---- a/include/linux/sched/prio.h
-+++ b/include/linux/sched/prio.h
-@@ -20,11 +20,20 @@
-  */
- 
- #define MAX_USER_RT_PRIO	100
-+
- #define MAX_RT_PRIO		MAX_USER_RT_PRIO
- 
- #define MAX_PRIO		(MAX_RT_PRIO + NICE_WIDTH)
- #define DEFAULT_PRIO		(MAX_RT_PRIO + NICE_WIDTH / 2)
- 
-+/* +/- priority levels from the base priority */
-+#ifdef CONFIG_SCHED_BMQ
-+#define MAX_PRIORITY_ADJ	7
-+#endif
-+#ifdef CONFIG_SCHED_PDS
-+#define MAX_PRIORITY_ADJ	0
-+#endif
-+
- /*
-  * Convert user-nice values [ -20 ... 0 ... 19 ]
-  * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
-diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
-index e5af028c08b4..0a7565d0d3cf 100644
---- a/include/linux/sched/rt.h
-+++ b/include/linux/sched/rt.h
-@@ -24,8 +24,10 @@ static inline bool task_is_realtime(struct task_struct *tsk)
- 
- 	if (policy == SCHED_FIFO || policy == SCHED_RR)
- 		return true;
-+#ifndef CONFIG_SCHED_ALT
- 	if (policy == SCHED_DEADLINE)
- 		return true;
-+#endif
- 	return false;
- }
- 
-diff --git a/include/linux/skip_list.h b/include/linux/skip_list.h
-new file mode 100644
-index 000000000000..47ca955a451d
---- /dev/null
-+++ b/include/linux/skip_list.h
-@@ -0,0 +1,177 @@
-+/*
-+ * Copyright (C) 2016 Alfred Chen.
-+ *
-+ * Code based on Con Kolivas's skip list implementation for BFS, and
-+ * which is based on example originally by William Pugh.
-+ *
-+ * Skip Lists are a probabilistic alternative to balanced trees, as
-+ * described in the June 1990 issue of CACM and were invented by
-+ * William Pugh in 1987.
-+ *
-+ * A couple of comments about this implementation:
-+ *
-+ * This file only provides a infrastructure of skip list.
-+ *
-+ * skiplist_node is embedded into container data structure, to get rid
-+ * the dependency of kmalloc/kfree operation in scheduler code.
-+ *
-+ * A customized search function should be defined using DEFINE_SKIPLIST_INSERT
-+ * macro and be used for skip list insert operation.
-+ *
-+ * Random Level is also not defined in this file, instead, it should be
-+ * customized implemented and set to node->level then pass to the customized
-+ * skiplist_insert function.
-+ *
-+ * Levels start at zero and go up to (NUM_SKIPLIST_LEVEL -1)
-+ *
-+ * NUM_SKIPLIST_LEVEL in this implementation is 8 instead of origin 16,
-+ * considering that there will be 256 entries to enable the top level when using
-+ * random level p=0.5, and that number is more than enough for a run queue usage
-+ * in a scheduler usage. And it also help to reduce the memory usage of the
-+ * embedded skip list node in task_struct to about 50%.
-+ *
-+ * The insertion routine has been implemented so as to use the
-+ * dirty hack described in the CACM paper: if a random level is
-+ * generated that is more than the current maximum level, the
-+ * current maximum level plus one is used instead.
-+ *
-+ * BFS Notes: In this implementation of skiplists, there are bidirectional
-+ * next/prev pointers and the insert function returns a pointer to the actual
-+ * node the value is stored. The key here is chosen by the scheduler so as to
-+ * sort tasks according to the priority list requirements and is no longer used
-+ * by the scheduler after insertion. The scheduler lookup, however, occurs in
-+ * O(1) time because it is always the first item in the level 0 linked list.
-+ * Since the task struct stores a copy of the node pointer upon skiplist_insert,
-+ * it can also remove it much faster than the original implementation with the
-+ * aid of prev<->next pointer manipulation and no searching.
-+ */
-+#ifndef _LINUX_SKIP_LIST_H
-+#define _LINUX_SKIP_LIST_H
-+
-+#include <linux/kernel.h>
-+
-+#define NUM_SKIPLIST_LEVEL (8)
-+
-+struct skiplist_node {
-+	int level;	/* Levels in this node */
-+	struct skiplist_node *next[NUM_SKIPLIST_LEVEL];
-+	struct skiplist_node *prev[NUM_SKIPLIST_LEVEL];
-+};
-+
-+#define SKIPLIST_NODE_INIT(name) { 0,\
-+				   {&name, &name, &name, &name,\
-+				    &name, &name, &name, &name},\
-+				   {&name, &name, &name, &name,\
-+				    &name, &name, &name, &name},\
-+				 }
-+
-+static inline void INIT_SKIPLIST_NODE(struct skiplist_node *node)
-+{
-+	/* only level 0 ->next matters in skiplist_empty() */
-+	WRITE_ONCE(node->next[0], node);
-+}
-+
-+/**
-+ * FULL_INIT_SKIPLIST_NODE -- fully init a skiplist_node, expecially for header
-+ * @node: the skip list node to be inited.
-+ */
-+static inline void FULL_INIT_SKIPLIST_NODE(struct skiplist_node *node)
-+{
-+	int i;
-+
-+	node->level = 0;
-+	for (i = 0; i < NUM_SKIPLIST_LEVEL; i++) {
-+		WRITE_ONCE(node->next[i], node);
-+		node->prev[i] = node;
-+	}
-+}
-+
-+/**
-+ * skiplist_empty - test whether a skip list is empty
-+ * @head: the skip list to test.
-+ */
-+static inline int skiplist_empty(const struct skiplist_node *head)
-+{
-+	return READ_ONCE(head->next[0]) == head;
-+}
-+
-+/**
-+ * skiplist_entry - get the struct for this entry
-+ * @ptr: the &struct skiplist_node pointer.
-+ * @type:       the type of the struct this is embedded in.
-+ * @member:     the name of the skiplist_node within the struct.
-+ */
-+#define skiplist_entry(ptr, type, member) \
-+	container_of(ptr, type, member)
-+
-+/**
-+ * DEFINE_SKIPLIST_INSERT_FUNC -- macro to define a customized skip list insert
-+ * function, which takes two parameters, first one is the header node of the
-+ * skip list, second one is the skip list node to be inserted
-+ * @func_name: the customized skip list insert function name
-+ * @search_func: the search function to be used, which takes two parameters,
-+ * 1st one is the itrator of skiplist_node in the list, the 2nd is the skip list
-+ * node to be inserted, the function should return true if search should be
-+ * continued, otherwise return false.
-+ * Returns 1 if @node is inserted as the first item of skip list at level zero,
-+ * otherwise 0
-+ */
-+#define DEFINE_SKIPLIST_INSERT_FUNC(func_name, search_func)\
-+static inline int func_name(struct skiplist_node *head, struct skiplist_node *node)\
-+{\
-+	struct skiplist_node *update[NUM_SKIPLIST_LEVEL];\
-+	struct skiplist_node *p, *q;\
-+	int k = head->level;\
-+\
-+	p = head;\
-+	do {\
-+		while (q = p->next[k], q != head && search_func(q, node))\
-+			p = q;\
-+		update[k] = p;\
-+	} while (--k >= 0);\
-+\
-+	k = node->level;\
-+	if (unlikely(k > head->level)) {\
-+		node->level = k = ++head->level;\
-+		update[k] = head;\
-+	}\
-+\
-+	do {\
-+		p = update[k];\
-+		q = p->next[k];\
-+		node->next[k] = q;\
-+		p->next[k] = node;\
-+		node->prev[k] = p;\
-+		q->prev[k] = node;\
-+	} while (--k >= 0);\
-+\
-+	return (p == head);\
-+}
-+
-+/**
-+ * skiplist_del_init -- delete skip list node from a skip list and reset it's
-+ * init state
-+ * @head: the header node of the skip list to be deleted from.
-+ * @node: the skip list node to be deleted, the caller need to ensure @node is
-+ * in skip list which @head represent.
-+ * Returns 1 if @node is the first item of skip level at level zero, otherwise 0
-+ */
-+static inline int
-+skiplist_del_init(struct skiplist_node *head, struct skiplist_node *node)
-+{
-+	int l, m = node->level;
-+
-+	for (l = 0; l <= m; l++) {
-+		node->prev[l]->next[l] = node->next[l];
-+		node->next[l]->prev[l] = node->prev[l];
-+	}
-+	if (m == head->level && m > 0) {
-+		while (head->next[m] == head && m > 0)
-+			m--;
-+		head->level = m;
-+	}
-+	INIT_SKIPLIST_NODE(node);
-+
-+	return (node->prev[0] == head);
-+}
-+#endif /* _LINUX_SKIP_LIST_H */
-diff --git a/init/Kconfig b/init/Kconfig
-index d6a0b31b13dc..2122dba5596f 100644
---- a/init/Kconfig
-+++ b/init/Kconfig
-@@ -770,9 +770,39 @@ config GENERIC_SCHED_CLOCK
- 
- menu "Scheduler features"
- 
-+menuconfig SCHED_ALT
-+	bool "Alternative CPU Schedulers"
-+	default y
-+	help
-+	  This feature enable alternative CPU scheduler"
-+
-+if SCHED_ALT
-+
-+choice
-+	prompt "Alternative CPU Scheduler"
-+	default SCHED_BMQ
-+
-+config SCHED_BMQ
-+	bool "BMQ CPU scheduler"
-+	help
-+	  The BitMap Queue CPU scheduler for excellent interactivity and
-+	  responsiveness on the desktop and solid scalability on normal
-+	  hardware and commodity servers.
-+
-+config SCHED_PDS
-+	bool "PDS CPU scheduler"
-+	help
-+	  The Priority and Deadline based Skip list multiple queue CPU
-+	  Scheduler.
-+
-+endchoice
-+
-+endif
-+
- config UCLAMP_TASK
- 	bool "Enable utilization clamping for RT/FAIR tasks"
- 	depends on CPU_FREQ_GOV_SCHEDUTIL
-+	depends on !SCHED_ALT
- 	help
- 	  This feature enables the scheduler to track the clamped utilization
- 	  of each CPU based on RUNNABLE tasks scheduled on that CPU.
-@@ -858,6 +888,7 @@ config NUMA_BALANCING
- 	depends on ARCH_SUPPORTS_NUMA_BALANCING
- 	depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY
- 	depends on SMP && NUMA && MIGRATION
-+	depends on !SCHED_ALT
- 	help
- 	  This option adds support for automatic NUMA aware memory/task placement.
- 	  The mechanism is quite primitive and is based on migrating memory when
-@@ -944,7 +975,7 @@ menuconfig CGROUP_SCHED
- 	  bandwidth allocation to such task groups. It uses cgroups to group
- 	  tasks.
- 
--if CGROUP_SCHED
-+if CGROUP_SCHED && !SCHED_ALT
- config FAIR_GROUP_SCHED
- 	bool "Group scheduling for SCHED_OTHER"
- 	depends on CGROUP_SCHED
-@@ -1200,6 +1231,7 @@ config CHECKPOINT_RESTORE
- 
- config SCHED_AUTOGROUP
- 	bool "Automatic process group scheduling"
-+	depends on !SCHED_ALT
- 	select CGROUPS
- 	select CGROUP_SCHED
- 	select FAIR_GROUP_SCHED
-diff --git a/init/init_task.c b/init/init_task.c
-index f6889fce64af..5a23122f3d2c 100644
---- a/init/init_task.c
-+++ b/init/init_task.c
-@@ -75,9 +75,15 @@ struct task_struct init_task
- 	.stack		= init_stack,
- 	.usage		= REFCOUNT_INIT(2),
- 	.flags		= PF_KTHREAD,
-+#ifdef CONFIG_SCHED_ALT
-+	.prio		= DEFAULT_PRIO + MAX_PRIORITY_ADJ,
-+	.static_prio	= DEFAULT_PRIO,
-+	.normal_prio	= DEFAULT_PRIO + MAX_PRIORITY_ADJ,
-+#else
- 	.prio		= MAX_PRIO - 20,
- 	.static_prio	= MAX_PRIO - 20,
- 	.normal_prio	= MAX_PRIO - 20,
-+#endif
- 	.policy		= SCHED_NORMAL,
- 	.cpus_ptr	= &init_task.cpus_mask,
- 	.cpus_mask	= CPU_MASK_ALL,
-@@ -87,6 +93,19 @@ struct task_struct init_task
- 	.restart_block	= {
- 		.fn = do_no_restart_syscall,
- 	},
-+#ifdef CONFIG_SCHED_ALT
-+#ifdef CONFIG_SCHED_BMQ
-+	.boost_prio	= 0,
-+	.bmq_idx	= 15,
-+	.bmq_node	= LIST_HEAD_INIT(init_task.bmq_node),
-+#endif
-+#ifdef CONFIG_SCHED_PDS
-+	.deadline	= 0,
-+	.sl_level	= 0,
-+	.sl_node	= SKIPLIST_NODE_INIT(init_task.sl_node),
-+#endif
-+	.time_slice	= HZ,
-+#else
- 	.se		= {
- 		.group_node 	= LIST_HEAD_INIT(init_task.se.group_node),
- 	},
-@@ -94,6 +113,7 @@ struct task_struct init_task
- 		.run_list	= LIST_HEAD_INIT(init_task.rt.run_list),
- 		.time_slice	= RR_TIMESLICE,
- 	},
-+#endif
- 	.tasks		= LIST_HEAD_INIT(init_task.tasks),
- #ifdef CONFIG_SMP
- 	.pushable_tasks	= PLIST_NODE_INIT(init_task.pushable_tasks, MAX_PRIO),
-diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
-index 642415b8c3c9..7e0e1fe18035 100644
---- a/kernel/cgroup/cpuset.c
-+++ b/kernel/cgroup/cpuset.c
-@@ -636,7 +636,7 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial)
- 	return ret;
- }
- 
--#ifdef CONFIG_SMP
-+#if defined(CONFIG_SMP) && !defined(CONFIG_SCHED_ALT)
- /*
-  * Helper routine for generate_sched_domains().
-  * Do cpusets a, b have overlapping effective cpus_allowed masks?
-@@ -1009,7 +1009,7 @@ static void rebuild_sched_domains_locked(void)
- 	/* Have scheduler rebuild the domains */
- 	partition_and_rebuild_sched_domains(ndoms, doms, attr);
- }
--#else /* !CONFIG_SMP */
-+#else /* !CONFIG_SMP || CONFIG_SCHED_ALT */
- static void rebuild_sched_domains_locked(void)
- {
- }
-diff --git a/kernel/delayacct.c b/kernel/delayacct.c
-index 27725754ac99..769d773c7182 100644
---- a/kernel/delayacct.c
-+++ b/kernel/delayacct.c
-@@ -106,7 +106,7 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
- 	 */
- 	t1 = tsk->sched_info.pcount;
- 	t2 = tsk->sched_info.run_delay;
--	t3 = tsk->se.sum_exec_runtime;
-+	t3 = tsk_seruntime(tsk);
- 
- 	d->cpu_count += t1;
- 
-diff --git a/kernel/exit.c b/kernel/exit.c
-index 733e80f334e7..3f3506c851fd 100644
---- a/kernel/exit.c
-+++ b/kernel/exit.c
-@@ -121,7 +121,7 @@ static void __exit_signal(struct task_struct *tsk)
- 			sig->curr_target = next_thread(tsk);
- 	}
- 
--	add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
-+	add_device_randomness((const void*) &tsk_seruntime(tsk),
- 			      sizeof(unsigned long long));
- 
- 	/*
-@@ -142,7 +142,7 @@ static void __exit_signal(struct task_struct *tsk)
- 	sig->inblock += task_io_get_inblock(tsk);
- 	sig->oublock += task_io_get_oublock(tsk);
- 	task_io_accounting_add(&sig->ioac, &tsk->ioac);
--	sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
-+	sig->sum_sched_runtime += tsk_seruntime(tsk);
- 	sig->nr_threads--;
- 	__unhash_process(tsk, group_dead);
- 	write_sequnlock(&sig->stats_lock);
-diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
-index f6310f848f34..4176ad070bc9 100644
---- a/kernel/livepatch/transition.c
-+++ b/kernel/livepatch/transition.c
-@@ -306,7 +306,11 @@ static bool klp_try_switch_task(struct task_struct *task)
- 	 */
- 	rq = task_rq_lock(task, &flags);
- 
-+#ifdef	CONFIG_SCHED_ALT
-+	if (task_running(task) && task != current) {
-+#else
- 	if (task_running(rq, task) && task != current) {
-+#endif
- 		snprintf(err_buf, STACK_ERR_BUF_SIZE,
- 			 "%s: %s:%d is running\n", __func__, task->comm,
- 			 task->pid);
-diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
-index cfdd5b93264d..84c284eb544a 100644
---- a/kernel/locking/rtmutex.c
-+++ b/kernel/locking/rtmutex.c
-@@ -227,15 +227,19 @@ static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
-  * Only use with rt_mutex_waiter_{less,equal}()
-  */
- #define task_to_waiter(p)	\
--	&(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline }
-+	&(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = __tsk_deadline(p) }
- 
- static inline int
- rt_mutex_waiter_less(struct rt_mutex_waiter *left,
- 		     struct rt_mutex_waiter *right)
- {
-+#ifdef CONFIG_SCHED_PDS
-+	return (left->deadline < right->deadline);
-+#else
- 	if (left->prio < right->prio)
- 		return 1;
- 
-+#ifndef CONFIG_SCHED_BMQ
- 	/*
- 	 * If both waiters have dl_prio(), we check the deadlines of the
- 	 * associated tasks.
-@@ -244,17 +248,23 @@ rt_mutex_waiter_less(struct rt_mutex_waiter *left,
- 	 */
- 	if (dl_prio(left->prio))
- 		return dl_time_before(left->deadline, right->deadline);
-+#endif
- 
- 	return 0;
-+#endif
- }
- 
- static inline int
- rt_mutex_waiter_equal(struct rt_mutex_waiter *left,
- 		      struct rt_mutex_waiter *right)
- {
-+#ifdef CONFIG_SCHED_PDS
-+	return (left->deadline == right->deadline);
-+#else
- 	if (left->prio != right->prio)
- 		return 0;
- 
-+#ifndef CONFIG_SCHED_BMQ
- 	/*
- 	 * If both waiters have dl_prio(), we check the deadlines of the
- 	 * associated tasks.
-@@ -263,8 +273,10 @@ rt_mutex_waiter_equal(struct rt_mutex_waiter *left,
- 	 */
- 	if (dl_prio(left->prio))
- 		return left->deadline == right->deadline;
-+#endif
- 
- 	return 1;
-+#endif
- }
- 
- static void
-@@ -678,7 +690,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
- 	 * the values of the node being removed.
- 	 */
- 	waiter->prio = task->prio;
--	waiter->deadline = task->dl.deadline;
-+	waiter->deadline = __tsk_deadline(task);
- 
- 	rt_mutex_enqueue(lock, waiter);
- 
-@@ -951,7 +963,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
- 	waiter->task = task;
- 	waiter->lock = lock;
- 	waiter->prio = task->prio;
--	waiter->deadline = task->dl.deadline;
-+	waiter->deadline = __tsk_deadline(task);
- 
- 	/* Get the top priority waiter on the lock */
- 	if (rt_mutex_has_waiters(lock))
-diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
-index 5fc9c9b70862..eb6d7d87779f 100644
---- a/kernel/sched/Makefile
-+++ b/kernel/sched/Makefile
-@@ -22,14 +22,20 @@ ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
- CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
- endif
- 
--obj-y += core.o loadavg.o clock.o cputime.o
--obj-y += idle.o fair.o rt.o deadline.o
--obj-y += wait.o wait_bit.o swait.o completion.o
--
--obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o
-+ifdef CONFIG_SCHED_ALT
-+obj-y += alt_core.o alt_debug.o
-+else
-+obj-y += core.o
-+obj-y += fair.o rt.o deadline.o
-+obj-$(CONFIG_SMP) += cpudeadline.o stop_task.o
- obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
--obj-$(CONFIG_SCHEDSTATS) += stats.o
- obj-$(CONFIG_SCHED_DEBUG) += debug.o
-+endif
-+obj-y += loadavg.o clock.o cputime.o
-+obj-y += idle.o
-+obj-y += wait.o wait_bit.o swait.o completion.o
-+obj-$(CONFIG_SMP) += cpupri.o pelt.o topology.o
-+obj-$(CONFIG_SCHEDSTATS) += stats.o
- obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
- obj-$(CONFIG_CPU_FREQ) += cpufreq.o
- obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o
-diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c
-new file mode 100644
-index 000000000000..f36264fea75c
---- /dev/null
-+++ b/kernel/sched/alt_core.c
-@@ -0,0 +1,6360 @@
-+/*
-+ *  kernel/sched/alt_core.c
-+ *
-+ *  Core alternative kernel scheduler code and related syscalls
-+ *
-+ *  Copyright (C) 1991-2002  Linus Torvalds
-+ *
-+ *  2009-08-13	Brainfuck deadline scheduling policy by Con Kolivas deletes
-+ *		a whole lot of those previous things.
-+ *  2017-09-06	Priority and Deadline based Skip list multiple queue kernel
-+ *		scheduler by Alfred Chen.
-+ *  2019-02-20	BMQ(BitMap Queue) kernel scheduler by Alfred Chen.
-+ */
-+#include "sched.h"
-+
-+#include <linux/sched/rt.h>
-+
-+#include <linux/context_tracking.h>
-+#include <linux/compat.h>
-+#include <linux/blkdev.h>
-+#include <linux/delayacct.h>
-+#include <linux/freezer.h>
-+#include <linux/init_task.h>
-+#include <linux/kprobes.h>
-+#include <linux/mmu_context.h>
-+#include <linux/nmi.h>
-+#include <linux/profile.h>
-+#include <linux/rcupdate_wait.h>
-+#include <linux/security.h>
-+#include <linux/syscalls.h>
-+#include <linux/wait_bit.h>
-+
-+#include <linux/kcov.h>
-+#include <linux/scs.h>
-+
-+#include <asm/switch_to.h>
-+
-+#include "../workqueue_internal.h"
-+#include "../../fs/io-wq.h"
-+#include "../smpboot.h"
-+
-+#include "pelt.h"
-+#include "smp.h"
-+
-+#define CREATE_TRACE_POINTS
-+#include <trace/events/sched.h>
-+
-+#define ALT_SCHED_VERSION "v5.9-r0"
-+
-+/* rt_prio(prio) defined in include/linux/sched/rt.h */
-+#define rt_task(p)		rt_prio((p)->prio)
-+#define rt_policy(policy)	((policy) == SCHED_FIFO || (policy) == SCHED_RR)
-+#define task_has_rt_policy(p)	(rt_policy((p)->policy))
-+
-+#define STOP_PRIO		(MAX_RT_PRIO - 1)
-+
-+/* Default time slice is 4 in ms, can be set via kernel parameter "sched_timeslice" */
-+u64 sched_timeslice_ns __read_mostly = (4 * 1000 * 1000);
-+
-+static int __init sched_timeslice(char *str)
-+{
-+	int timeslice_us;
-+
-+	get_option(&str, &timeslice_us);
-+	if (timeslice_us >= 1000)
-+		sched_timeslice_ns = timeslice_us * 1000;
-+
-+	return 0;
-+}
-+early_param("sched_timeslice", sched_timeslice);
-+
-+/* Reschedule if less than this many μs left */
-+#define RESCHED_NS		(100 * 1000)
-+
-+/**
-+ * sched_yield_type - Choose what sort of yield sched_yield will perform.
-+ * 0: No yield.
-+ * 1: Deboost and requeue task. (default)
-+ * 2: Set rq skip task.
-+ */
-+int sched_yield_type __read_mostly = 1;
-+
-+#ifdef CONFIG_SMP
-+static cpumask_t sched_rq_pending_mask ____cacheline_aligned_in_smp;
-+
-+DEFINE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_CHK_LEVEL], sched_cpu_affinity_masks);
-+DEFINE_PER_CPU(cpumask_t *, sched_cpu_affinity_end_mask);
-+DEFINE_PER_CPU(cpumask_t *, sched_cpu_llc_mask);
-+
-+#ifdef CONFIG_SCHED_SMT
-+DEFINE_STATIC_KEY_FALSE(sched_smt_present);
-+EXPORT_SYMBOL_GPL(sched_smt_present);
-+#endif
-+
-+/*
-+ * Keep a unique ID per domain (we use the first CPUs number in the cpumask of
-+ * the domain), this allows us to quickly tell if two cpus are in the same cache
-+ * domain, see cpus_share_cache().
-+ */
-+DEFINE_PER_CPU(int, sd_llc_id);
-+#endif /* CONFIG_SMP */
-+
-+static DEFINE_MUTEX(sched_hotcpu_mutex);
-+
-+DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
-+
-+#ifndef prepare_arch_switch
-+# define prepare_arch_switch(next)	do { } while (0)
-+#endif
-+#ifndef finish_arch_post_lock_switch
-+# define finish_arch_post_lock_switch()	do { } while (0)
-+#endif
-+
-+#define IDLE_WM	(IDLE_TASK_SCHED_PRIO)
-+
-+#ifdef CONFIG_SCHED_SMT
-+static cpumask_t sched_sg_idle_mask ____cacheline_aligned_in_smp;
-+#endif
-+static cpumask_t sched_rq_watermark[SCHED_BITS] ____cacheline_aligned_in_smp;
-+
-+#ifdef CONFIG_SCHED_BMQ
-+#include "bmq_imp.h"
-+#endif
-+#ifdef CONFIG_SCHED_PDS
-+#include "pds_imp.h"
-+#endif
-+
-+static inline void update_sched_rq_watermark(struct rq *rq)
-+{
-+	unsigned long watermark = sched_queue_watermark(rq);
-+	unsigned long last_wm = rq->watermark;
-+	unsigned long i;
-+	int cpu;
-+
-+	/*printk(KERN_INFO "sched: watermark(%d) %d, last %d\n",
-+	       cpu_of(rq), watermark, last_wm);*/
-+	if (watermark == last_wm)
-+		return;
-+
-+	rq->watermark = watermark;
-+	cpu = cpu_of(rq);
-+	if (watermark < last_wm) {
-+		for (i = watermark + 1; i <= last_wm; i++)
-+			cpumask_andnot(&sched_rq_watermark[i],
-+				       &sched_rq_watermark[i], cpumask_of(cpu));
-+#ifdef CONFIG_SCHED_SMT
-+		if (!static_branch_likely(&sched_smt_present))
-+			return;
-+		if (IDLE_WM == last_wm)
-+			cpumask_andnot(&sched_sg_idle_mask,
-+				       &sched_sg_idle_mask, cpu_smt_mask(cpu));
-+#endif
-+		return;
-+	}
-+	/* last_wm < watermark */
-+	for (i = last_wm + 1; i <= watermark; i++)
-+		cpumask_set_cpu(cpu, &sched_rq_watermark[i]);
-+#ifdef CONFIG_SCHED_SMT
-+	if (!static_branch_likely(&sched_smt_present))
-+		return;
-+	if (IDLE_WM == watermark) {
-+		cpumask_t tmp;
-+		cpumask_and(&tmp, cpu_smt_mask(cpu), &sched_rq_watermark[IDLE_WM]);
-+		if (cpumask_equal(&tmp, cpu_smt_mask(cpu)))
-+			cpumask_or(&sched_sg_idle_mask, cpu_smt_mask(cpu),
-+				   &sched_sg_idle_mask);
-+	}
-+#endif
-+}
-+
-+static inline struct task_struct *rq_runnable_task(struct rq *rq)
-+{
-+	struct task_struct *next = sched_rq_first_task(rq);
-+
-+	if (unlikely(next == rq->skip))
-+		next = sched_rq_next_task(next, rq);
-+
-+	return next;
-+}
-+
-+/*
-+ * Serialization rules:
-+ *
-+ * Lock order:
-+ *
-+ *   p->pi_lock
-+ *     rq->lock
-+ *       hrtimer_cpu_base->lock (hrtimer_start() for bandwidth controls)
-+ *
-+ *  rq1->lock
-+ *    rq2->lock  where: rq1 < rq2
-+ *
-+ * Regular state:
-+ *
-+ * Normal scheduling state is serialized by rq->lock. __schedule() takes the
-+ * local CPU's rq->lock, it optionally removes the task from the runqueue and
-+ * always looks at the local rq data structures to find the most elegible task
-+ * to run next.
-+ *
-+ * Task enqueue is also under rq->lock, possibly taken from another CPU.
-+ * Wakeups from another LLC domain might use an IPI to transfer the enqueue to
-+ * the local CPU to avoid bouncing the runqueue state around [ see
-+ * ttwu_queue_wakelist() ]
-+ *
-+ * Task wakeup, specifically wakeups that involve migration, are horribly
-+ * complicated to avoid having to take two rq->locks.
-+ *
-+ * Special state:
-+ *
-+ * System-calls and anything external will use task_rq_lock() which acquires
-+ * both p->pi_lock and rq->lock. As a consequence the state they change is
-+ * stable while holding either lock:
-+ *
-+ *  - sched_setaffinity()/
-+ *    set_cpus_allowed_ptr():	p->cpus_ptr, p->nr_cpus_allowed
-+ *  - set_user_nice():		p->se.load, p->*prio
-+ *  - __sched_setscheduler():	p->sched_class, p->policy, p->*prio,
-+ *				p->se.load, p->rt_priority,
-+ *				p->dl.dl_{runtime, deadline, period, flags, bw, density}
-+ *  - sched_setnuma():		p->numa_preferred_nid
-+ *  - sched_move_task()/
-+ *    cpu_cgroup_fork():	p->sched_task_group
-+ *  - uclamp_update_active()	p->uclamp*
-+ *
-+ * p->state <- TASK_*:
-+ *
-+ *   is changed locklessly using set_current_state(), __set_current_state() or
-+ *   set_special_state(), see their respective comments, or by
-+ *   try_to_wake_up(). This latter uses p->pi_lock to serialize against
-+ *   concurrent self.
-+ *
-+ * p->on_rq <- { 0, 1 = TASK_ON_RQ_QUEUED, 2 = TASK_ON_RQ_MIGRATING }:
-+ *
-+ *   is set by activate_task() and cleared by deactivate_task(), under
-+ *   rq->lock. Non-zero indicates the task is runnable, the special
-+ *   ON_RQ_MIGRATING state is used for migration without holding both
-+ *   rq->locks. It indicates task_cpu() is not stable, see task_rq_lock().
-+ *
-+ * p->on_cpu <- { 0, 1 }:
-+ *
-+ *   is set by prepare_task() and cleared by finish_task() such that it will be
-+ *   set before p is scheduled-in and cleared after p is scheduled-out, both
-+ *   under rq->lock. Non-zero indicates the task is running on its CPU.
-+ *
-+ *   [ The astute reader will observe that it is possible for two tasks on one
-+ *     CPU to have ->on_cpu = 1 at the same time. ]
-+ *
-+ * task_cpu(p): is changed by set_task_cpu(), the rules are:
-+ *
-+ *  - Don't call set_task_cpu() on a blocked task:
-+ *
-+ *    We don't care what CPU we're not running on, this simplifies hotplug,
-+ *    the CPU assignment of blocked tasks isn't required to be valid.
-+ *
-+ *  - for try_to_wake_up(), called under p->pi_lock:
-+ *
-+ *    This allows try_to_wake_up() to only take one rq->lock, see its comment.
-+ *
-+ *  - for migration called under rq->lock:
-+ *    [ see task_on_rq_migrating() in task_rq_lock() ]
-+ *
-+ *    o move_queued_task()
-+ *    o detach_task()
-+ *
-+ *  - for migration called under double_rq_lock():
-+ *
-+ *    o __migrate_swap_task()
-+ *    o push_rt_task() / pull_rt_task()
-+ *    o push_dl_task() / pull_dl_task()
-+ *    o dl_task_offline_migration()
-+ *
-+ */
-+
-+/*
-+ * Context: p->pi_lock
-+ */
-+static inline struct rq
-+*__task_access_lock(struct task_struct *p, raw_spinlock_t **plock)
-+{
-+	struct rq *rq;
-+	for (;;) {
-+		rq = task_rq(p);
-+		if (p->on_cpu || task_on_rq_queued(p)) {
-+			raw_spin_lock(&rq->lock);
-+			if (likely((p->on_cpu || task_on_rq_queued(p))
-+				   && rq == task_rq(p))) {
-+				*plock = &rq->lock;
-+				return rq;
-+			}
-+			raw_spin_unlock(&rq->lock);
-+		} else if (task_on_rq_migrating(p)) {
-+			do {
-+				cpu_relax();
-+			} while (unlikely(task_on_rq_migrating(p)));
-+		} else {
-+			*plock = NULL;
-+			return rq;
-+		}
-+	}
-+}
-+
-+static inline void
-+__task_access_unlock(struct task_struct *p, raw_spinlock_t *lock)
-+{
-+	if (NULL != lock)
-+		raw_spin_unlock(lock);
-+}
-+
-+static inline struct rq
-+*task_access_lock_irqsave(struct task_struct *p, raw_spinlock_t **plock,
-+			  unsigned long *flags)
-+{
-+	struct rq *rq;
-+	for (;;) {
-+		rq = task_rq(p);
-+		if (p->on_cpu || task_on_rq_queued(p)) {
-+			raw_spin_lock_irqsave(&rq->lock, *flags);
-+			if (likely((p->on_cpu || task_on_rq_queued(p))
-+				   && rq == task_rq(p))) {
-+				*plock = &rq->lock;
-+				return rq;
-+			}
-+			raw_spin_unlock_irqrestore(&rq->lock, *flags);
-+		} else if (task_on_rq_migrating(p)) {
-+			do {
-+				cpu_relax();
-+			} while (unlikely(task_on_rq_migrating(p)));
-+		} else {
-+			raw_spin_lock_irqsave(&p->pi_lock, *flags);
-+			if (likely(!p->on_cpu && !p->on_rq &&
-+				   rq == task_rq(p))) {
-+				*plock = &p->pi_lock;
-+				return rq;
-+			}
-+			raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
-+		}
-+	}
-+}
-+
-+static inline void
-+task_access_unlock_irqrestore(struct task_struct *p, raw_spinlock_t *lock,
-+			      unsigned long *flags)
-+{
-+	raw_spin_unlock_irqrestore(lock, *flags);
-+}
-+
-+/*
-+ * __task_rq_lock - lock the rq @p resides on.
-+ */
-+struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
-+	__acquires(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	lockdep_assert_held(&p->pi_lock);
-+
-+	for (;;) {
-+		rq = task_rq(p);
-+		raw_spin_lock(&rq->lock);
-+		if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
-+			return rq;
-+		raw_spin_unlock(&rq->lock);
-+
-+		while (unlikely(task_on_rq_migrating(p)))
-+			cpu_relax();
-+	}
-+}
-+
-+/*
-+ * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
-+ */
-+struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
-+	__acquires(p->pi_lock)
-+	__acquires(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	for (;;) {
-+		raw_spin_lock_irqsave(&p->pi_lock, rf->flags);
-+		rq = task_rq(p);
-+		raw_spin_lock(&rq->lock);
-+		/*
-+		 *	move_queued_task()		task_rq_lock()
-+		 *
-+		 *	ACQUIRE (rq->lock)
-+		 *	[S] ->on_rq = MIGRATING		[L] rq = task_rq()
-+		 *	WMB (__set_task_cpu())		ACQUIRE (rq->lock);
-+		 *	[S] ->cpu = new_cpu		[L] task_rq()
-+		 *					[L] ->on_rq
-+		 *	RELEASE (rq->lock)
-+		 *
-+		 * If we observe the old CPU in task_rq_lock(), the acquire of
-+		 * the old rq->lock will fully serialize against the stores.
-+		 *
-+		 * If we observe the new CPU in task_rq_lock(), the address
-+		 * dependency headed by '[L] rq = task_rq()' and the acquire
-+		 * will pair with the WMB to ensure we then also see migrating.
-+		 */
-+		if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
-+			return rq;
-+		}
-+		raw_spin_unlock(&rq->lock);
-+		raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
-+
-+		while (unlikely(task_on_rq_migrating(p)))
-+			cpu_relax();
-+	}
-+}
-+
-+static inline void
-+rq_lock_irqsave(struct rq *rq, struct rq_flags *rf)
-+	__acquires(rq->lock)
-+{
-+	raw_spin_lock_irqsave(&rq->lock, rf->flags);
-+}
-+
-+static inline void
-+rq_unlock_irqrestore(struct rq *rq, struct rq_flags *rf)
-+	__releases(rq->lock)
-+{
-+	raw_spin_unlock_irqrestore(&rq->lock, rf->flags);
-+}
-+
-+/*
-+ * RQ-clock updating methods:
-+ */
-+
-+static void update_rq_clock_task(struct rq *rq, s64 delta)
-+{
-+/*
-+ * In theory, the compile should just see 0 here, and optimize out the call
-+ * to sched_rt_avg_update. But I don't trust it...
-+ */
-+	s64 __maybe_unused steal = 0, irq_delta = 0;
-+
-+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-+	irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
-+
-+	/*
-+	 * Since irq_time is only updated on {soft,}irq_exit, we might run into
-+	 * this case when a previous update_rq_clock() happened inside a
-+	 * {soft,}irq region.
-+	 *
-+	 * When this happens, we stop ->clock_task and only update the
-+	 * prev_irq_time stamp to account for the part that fit, so that a next
-+	 * update will consume the rest. This ensures ->clock_task is
-+	 * monotonic.
-+	 *
-+	 * It does however cause some slight miss-attribution of {soft,}irq
-+	 * time, a more accurate solution would be to update the irq_time using
-+	 * the current rq->clock timestamp, except that would require using
-+	 * atomic ops.
-+	 */
-+	if (irq_delta > delta)
-+		irq_delta = delta;
-+
-+	rq->prev_irq_time += irq_delta;
-+	delta -= irq_delta;
-+#endif
-+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
-+	if (static_key_false((&paravirt_steal_rq_enabled))) {
-+		steal = paravirt_steal_clock(cpu_of(rq));
-+		steal -= rq->prev_steal_time_rq;
-+
-+		if (unlikely(steal > delta))
-+			steal = delta;
-+
-+		rq->prev_steal_time_rq += steal;
-+		delta -= steal;
-+	}
-+#endif
-+
-+	rq->clock_task += delta;
-+
-+#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
-+	if ((irq_delta + steal))
-+		update_irq_load_avg(rq, irq_delta + steal);
-+#endif
-+}
-+
-+static inline void update_rq_clock(struct rq *rq)
-+{
-+	s64 delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
-+
-+	if (unlikely(delta <= 0))
-+		return;
-+	rq->clock += delta;
-+	update_rq_clock_task(rq, delta);
-+}
-+
-+#ifdef CONFIG_NO_HZ_FULL
-+/*
-+ * Tick may be needed by tasks in the runqueue depending on their policy and
-+ * requirements. If tick is needed, lets send the target an IPI to kick it out
-+ * of nohz mode if necessary.
-+ */
-+static inline void sched_update_tick_dependency(struct rq *rq)
-+{
-+	int cpu = cpu_of(rq);
-+
-+	if (!tick_nohz_full_cpu(cpu))
-+		return;
-+
-+	if (rq->nr_running < 2)
-+		tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED);
-+	else
-+		tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
-+}
-+#else /* !CONFIG_NO_HZ_FULL */
-+static inline void sched_update_tick_dependency(struct rq *rq) { }
-+#endif
-+
-+/*
-+ * Add/Remove/Requeue task to/from the runqueue routines
-+ * Context: rq->lock
-+ */
-+static inline void dequeue_task(struct task_struct *p, struct rq *rq, int flags)
-+{
-+	lockdep_assert_held(&rq->lock);
-+
-+	/*printk(KERN_INFO "sched: dequeue(%d) %px %016llx\n", cpu_of(rq), p, p->priodl);*/
-+	WARN_ONCE(task_rq(p) != rq, "sched: dequeue task reside on cpu%d from cpu%d\n",
-+		  task_cpu(p), cpu_of(rq));
-+
-+	__SCHED_DEQUEUE_TASK(p, rq, flags, update_sched_rq_watermark(rq));
-+	--rq->nr_running;
-+#ifdef CONFIG_SMP
-+	if (1 == rq->nr_running)
-+		cpumask_clear_cpu(cpu_of(rq), &sched_rq_pending_mask);
-+#endif
-+
-+	sched_update_tick_dependency(rq);
-+}
-+
-+static inline void enqueue_task(struct task_struct *p, struct rq *rq, int flags)
-+{
-+	lockdep_assert_held(&rq->lock);
-+
-+	/*printk(KERN_INFO "sched: enqueue(%d) %px %016llx\n", cpu_of(rq), p, p->priodl);*/
-+	WARN_ONCE(task_rq(p) != rq, "sched: enqueue task reside on cpu%d to cpu%d\n",
-+		  task_cpu(p), cpu_of(rq));
-+
-+	__SCHED_ENQUEUE_TASK(p, rq, flags);
-+	update_sched_rq_watermark(rq);
-+	++rq->nr_running;
-+#ifdef CONFIG_SMP
-+	if (2 == rq->nr_running)
-+		cpumask_set_cpu(cpu_of(rq), &sched_rq_pending_mask);
-+#endif
-+
-+	sched_update_tick_dependency(rq);
-+
-+	/*
-+	 * If in_iowait is set, the code below may not trigger any cpufreq
-+	 * utilization updates, so do it here explicitly with the IOWAIT flag
-+	 * passed.
-+	 */
-+	if (p->in_iowait)
-+		cpufreq_update_util(rq, SCHED_CPUFREQ_IOWAIT);
-+}
-+
-+static inline void requeue_task(struct task_struct *p, struct rq *rq)
-+{
-+	lockdep_assert_held(&rq->lock);
-+	/*printk(KERN_INFO "sched: requeue(%d) %px %016llx\n", cpu_of(rq), p, p->priodl);*/
-+	WARN_ONCE(task_rq(p) != rq, "sched: cpu[%d] requeue task reside on cpu%d\n",
-+		  cpu_of(rq), task_cpu(p));
-+
-+	__SCHED_REQUEUE_TASK(p, rq, update_sched_rq_watermark(rq));
-+}
-+
-+/*
-+ * cmpxchg based fetch_or, macro so it works for different integer types
-+ */
-+#define fetch_or(ptr, mask)						\
-+	({								\
-+		typeof(ptr) _ptr = (ptr);				\
-+		typeof(mask) _mask = (mask);				\
-+		typeof(*_ptr) _old, _val = *_ptr;			\
-+									\
-+		for (;;) {						\
-+			_old = cmpxchg(_ptr, _val, _val | _mask);	\
-+			if (_old == _val)				\
-+				break;					\
-+			_val = _old;					\
-+		}							\
-+	_old;								\
-+})
-+
-+#if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
-+/*
-+ * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG,
-+ * this avoids any races wrt polling state changes and thereby avoids
-+ * spurious IPIs.
-+ */
-+static bool set_nr_and_not_polling(struct task_struct *p)
-+{
-+	struct thread_info *ti = task_thread_info(p);
-+	return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG);
-+}
-+
-+/*
-+ * Atomically set TIF_NEED_RESCHED if TIF_POLLING_NRFLAG is set.
-+ *
-+ * If this returns true, then the idle task promises to call
-+ * sched_ttwu_pending() and reschedule soon.
-+ */
-+static bool set_nr_if_polling(struct task_struct *p)
-+{
-+	struct thread_info *ti = task_thread_info(p);
-+	typeof(ti->flags) old, val = READ_ONCE(ti->flags);
-+
-+	for (;;) {
-+		if (!(val & _TIF_POLLING_NRFLAG))
-+			return false;
-+		if (val & _TIF_NEED_RESCHED)
-+			return true;
-+		old = cmpxchg(&ti->flags, val, val | _TIF_NEED_RESCHED);
-+		if (old == val)
-+			break;
-+		val = old;
-+	}
-+	return true;
-+}
-+
-+#else
-+static bool set_nr_and_not_polling(struct task_struct *p)
-+{
-+	set_tsk_need_resched(p);
-+	return true;
-+}
-+
-+#ifdef CONFIG_SMP
-+static bool set_nr_if_polling(struct task_struct *p)
-+{
-+	return false;
-+}
-+#endif
-+#endif
-+
-+static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task)
-+{
-+	struct wake_q_node *node = &task->wake_q;
-+
-+	/*
-+	 * Atomically grab the task, if ->wake_q is !nil already it means
-+	 * its already queued (either by us or someone else) and will get the
-+	 * wakeup due to that.
-+	 *
-+	 * In order to ensure that a pending wakeup will observe our pending
-+	 * state, even in the failed case, an explicit smp_mb() must be used.
-+	 */
-+	smp_mb__before_atomic();
-+	if (unlikely(cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL)))
-+		return false;
-+
-+	/*
-+	 * The head is context local, there can be no concurrency.
-+	 */
-+	*head->lastp = node;
-+	head->lastp = &node->next;
-+	return true;
-+}
-+
-+/**
-+ * wake_q_add() - queue a wakeup for 'later' waking.
-+ * @head: the wake_q_head to add @task to
-+ * @task: the task to queue for 'later' wakeup
-+ *
-+ * Queue a task for later wakeup, most likely by the wake_up_q() call in the
-+ * same context, _HOWEVER_ this is not guaranteed, the wakeup can come
-+ * instantly.
-+ *
-+ * This function must be used as-if it were wake_up_process(); IOW the task
-+ * must be ready to be woken at this location.
-+ */
-+void wake_q_add(struct wake_q_head *head, struct task_struct *task)
-+{
-+	if (__wake_q_add(head, task))
-+		get_task_struct(task);
-+}
-+
-+/**
-+ * wake_q_add_safe() - safely queue a wakeup for 'later' waking.
-+ * @head: the wake_q_head to add @task to
-+ * @task: the task to queue for 'later' wakeup
-+ *
-+ * Queue a task for later wakeup, most likely by the wake_up_q() call in the
-+ * same context, _HOWEVER_ this is not guaranteed, the wakeup can come
-+ * instantly.
-+ *
-+ * This function must be used as-if it were wake_up_process(); IOW the task
-+ * must be ready to be woken at this location.
-+ *
-+ * This function is essentially a task-safe equivalent to wake_q_add(). Callers
-+ * that already hold reference to @task can call the 'safe' version and trust
-+ * wake_q to do the right thing depending whether or not the @task is already
-+ * queued for wakeup.
-+ */
-+void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task)
-+{
-+	if (!__wake_q_add(head, task))
-+		put_task_struct(task);
-+}
-+
-+void wake_up_q(struct wake_q_head *head)
-+{
-+	struct wake_q_node *node = head->first;
-+
-+	while (node != WAKE_Q_TAIL) {
-+		struct task_struct *task;
-+
-+		task = container_of(node, struct task_struct, wake_q);
-+		BUG_ON(!task);
-+		/* task can safely be re-inserted now: */
-+		node = node->next;
-+		task->wake_q.next = NULL;
-+
-+		/*
-+		 * wake_up_process() executes a full barrier, which pairs with
-+		 * the queueing in wake_q_add() so as not to miss wakeups.
-+		 */
-+		wake_up_process(task);
-+		put_task_struct(task);
-+	}
-+}
-+
-+/*
-+ * resched_curr - mark rq's current task 'to be rescheduled now'.
-+ *
-+ * On UP this means the setting of the need_resched flag, on SMP it
-+ * might also involve a cross-CPU call to trigger the scheduler on
-+ * the target CPU.
-+ */
-+void resched_curr(struct rq *rq)
-+{
-+	struct task_struct *curr = rq->curr;
-+	int cpu;
-+
-+	lockdep_assert_held(&rq->lock);
-+
-+	if (test_tsk_need_resched(curr))
-+		return;
-+
-+	cpu = cpu_of(rq);
-+	if (cpu == smp_processor_id()) {
-+		set_tsk_need_resched(curr);
-+		set_preempt_need_resched();
-+		return;
-+	}
-+
-+	if (set_nr_and_not_polling(curr))
-+		smp_send_reschedule(cpu);
-+	else
-+		trace_sched_wake_idle_without_ipi(cpu);
-+}
-+
-+void resched_cpu(int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	raw_spin_lock_irqsave(&rq->lock, flags);
-+	if (cpu_online(cpu) || cpu == smp_processor_id())
-+		resched_curr(cpu_rq(cpu));
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+}
-+
-+#ifdef CONFIG_SMP
-+#ifdef CONFIG_NO_HZ_COMMON
-+void nohz_balance_enter_idle(int cpu) {}
-+
-+void select_nohz_load_balancer(int stop_tick) {}
-+
-+void set_cpu_sd_state_idle(void) {}
-+
-+/*
-+ * In the semi idle case, use the nearest busy CPU for migrating timers
-+ * from an idle CPU.  This is good for power-savings.
-+ *
-+ * We don't do similar optimization for completely idle system, as
-+ * selecting an idle CPU will add more delays to the timers than intended
-+ * (as that CPU's timer base may not be uptodate wrt jiffies etc).
-+ */
-+int get_nohz_timer_target(void)
-+{
-+	int i, cpu = smp_processor_id(), default_cpu = -1;
-+	struct cpumask *mask;
-+
-+	if (housekeeping_cpu(cpu, HK_FLAG_TIMER)) {
-+		if (!idle_cpu(cpu))
-+			return cpu;
-+		default_cpu = cpu;
-+	}
-+
-+	for (mask = &(per_cpu(sched_cpu_affinity_masks, cpu)[0]);
-+	     mask < per_cpu(sched_cpu_affinity_end_mask, cpu); mask++)
-+		for_each_cpu_and(i, mask, housekeeping_cpumask(HK_FLAG_TIMER))
-+			if (!idle_cpu(i))
-+				return i;
-+
-+	if (default_cpu == -1)
-+		default_cpu = housekeeping_any_cpu(HK_FLAG_TIMER);
-+	cpu = default_cpu;
-+
-+	return cpu;
-+}
-+
-+/*
-+ * When add_timer_on() enqueues a timer into the timer wheel of an
-+ * idle CPU then this timer might expire before the next timer event
-+ * which is scheduled to wake up that CPU. In case of a completely
-+ * idle system the next event might even be infinite time into the
-+ * future. wake_up_idle_cpu() ensures that the CPU is woken up and
-+ * leaves the inner idle loop so the newly added timer is taken into
-+ * account when the CPU goes back to idle and evaluates the timer
-+ * wheel for the next timer event.
-+ */
-+static inline void wake_up_idle_cpu(int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	if (cpu == smp_processor_id())
-+		return;
-+
-+	if (set_nr_and_not_polling(rq->idle))
-+		smp_send_reschedule(cpu);
-+	else
-+		trace_sched_wake_idle_without_ipi(cpu);
-+}
-+
-+static inline bool wake_up_full_nohz_cpu(int cpu)
-+{
-+	/*
-+	 * We just need the target to call irq_exit() and re-evaluate
-+	 * the next tick. The nohz full kick at least implies that.
-+	 * If needed we can still optimize that later with an
-+	 * empty IRQ.
-+	 */
-+	if (cpu_is_offline(cpu))
-+		return true;  /* Don't try to wake offline CPUs. */
-+	if (tick_nohz_full_cpu(cpu)) {
-+		if (cpu != smp_processor_id() ||
-+		    tick_nohz_tick_stopped())
-+			tick_nohz_full_kick_cpu(cpu);
-+		return true;
-+	}
-+
-+	return false;
-+}
-+
-+void wake_up_nohz_cpu(int cpu)
-+{
-+	if (!wake_up_full_nohz_cpu(cpu))
-+		wake_up_idle_cpu(cpu);
-+}
-+
-+static void nohz_csd_func(void *info)
-+{
-+	struct rq *rq = info;
-+	int cpu = cpu_of(rq);
-+	unsigned int flags;
-+
-+	/*
-+	 * Release the rq::nohz_csd.
-+	 */
-+	flags = atomic_fetch_andnot(NOHZ_KICK_MASK, nohz_flags(cpu));
-+	WARN_ON(!(flags & NOHZ_KICK_MASK));
-+
-+	rq->idle_balance = idle_cpu(cpu);
-+	if (rq->idle_balance && !need_resched()) {
-+		rq->nohz_idle_balance = flags;
-+		raise_softirq_irqoff(SCHED_SOFTIRQ);
-+	}
-+}
-+
-+#endif /* CONFIG_NO_HZ_COMMON */
-+#endif /* CONFIG_SMP */
-+
-+static inline void check_preempt_curr(struct rq *rq)
-+{
-+	if (sched_rq_first_task(rq) != rq->curr)
-+		resched_curr(rq);
-+}
-+
-+static inline void
-+rq_csd_init(struct rq *rq, call_single_data_t *csd, smp_call_func_t func)
-+{
-+	csd->flags = 0;
-+	csd->func = func;
-+	csd->info = rq;
-+}
-+
-+#ifdef CONFIG_SCHED_HRTICK
-+/*
-+ * Use HR-timers to deliver accurate preemption points.
-+ */
-+
-+static void hrtick_clear(struct rq *rq)
-+{
-+	if (hrtimer_active(&rq->hrtick_timer))
-+		hrtimer_cancel(&rq->hrtick_timer);
-+}
-+
-+/*
-+ * High-resolution timer tick.
-+ * Runs from hardirq context with interrupts disabled.
-+ */
-+static enum hrtimer_restart hrtick(struct hrtimer *timer)
-+{
-+	struct rq *rq = container_of(timer, struct rq, hrtick_timer);
-+	struct task_struct *p;
-+
-+	WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
-+
-+	raw_spin_lock(&rq->lock);
-+	p = rq->curr;
-+	p->time_slice = 0;
-+	resched_curr(rq);
-+	raw_spin_unlock(&rq->lock);
-+
-+	return HRTIMER_NORESTART;
-+}
-+
-+/*
-+ * Use hrtick when:
-+ *  - enabled by features
-+ *  - hrtimer is actually high res
-+ */
-+static inline int hrtick_enabled(struct rq *rq)
-+{
-+	/**
-+	 * Alt schedule FW doesn't support sched_feat yet
-+	if (!sched_feat(HRTICK))
-+		return 0;
-+	*/
-+	if (!cpu_active(cpu_of(rq)))
-+		return 0;
-+	return hrtimer_is_hres_active(&rq->hrtick_timer);
-+}
-+
-+#ifdef CONFIG_SMP
-+
-+static void __hrtick_restart(struct rq *rq)
-+{
-+	struct hrtimer *timer = &rq->hrtick_timer;
-+
-+	hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED_HARD);
-+}
-+
-+/*
-+ * called from hardirq (IPI) context
-+ */
-+static void __hrtick_start(void *arg)
-+{
-+	struct rq *rq = arg;
-+
-+	raw_spin_lock(&rq->lock);
-+	__hrtick_restart(rq);
-+	raw_spin_unlock(&rq->lock);
-+}
-+
-+/*
-+ * Called to set the hrtick timer state.
-+ *
-+ * called with rq->lock held and irqs disabled
-+ */
-+void hrtick_start(struct rq *rq, u64 delay)
-+{
-+	struct hrtimer *timer = &rq->hrtick_timer;
-+	ktime_t time;
-+	s64 delta;
-+
-+	/*
-+	 * Don't schedule slices shorter than 10000ns, that just
-+	 * doesn't make sense and can cause timer DoS.
-+	 */
-+	delta = max_t(s64, delay, 10000LL);
-+	time = ktime_add_ns(timer->base->get_time(), delta);
-+
-+	hrtimer_set_expires(timer, time);
-+
-+	if (rq == this_rq())
-+		__hrtick_restart(rq);
-+	else
-+		smp_call_function_single_async(cpu_of(rq), &rq->hrtick_csd);
-+}
-+
-+#else
-+/*
-+ * Called to set the hrtick timer state.
-+ *
-+ * called with rq->lock held and irqs disabled
-+ */
-+void hrtick_start(struct rq *rq, u64 delay)
-+{
-+	/*
-+	 * Don't schedule slices shorter than 10000ns, that just
-+	 * doesn't make sense. Rely on vruntime for fairness.
-+	 */
-+	delay = max_t(u64, delay, 10000LL);
-+	hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay),
-+		      HRTIMER_MODE_REL_PINNED_HARD);
-+}
-+#endif /* CONFIG_SMP */
-+
-+static void hrtick_rq_init(struct rq *rq)
-+{
-+#ifdef CONFIG_SMP
-+	rq_csd_init(rq, &rq->hrtick_csd, __hrtick_start);
-+#endif
-+
-+	hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
-+	rq->hrtick_timer.function = hrtick;
-+}
-+#else	/* CONFIG_SCHED_HRTICK */
-+static inline int hrtick_enabled(struct rq *rq)
-+{
-+	return 0;
-+}
-+
-+static inline void hrtick_clear(struct rq *rq)
-+{
-+}
-+
-+static inline void hrtick_rq_init(struct rq *rq)
-+{
-+}
-+#endif	/* CONFIG_SCHED_HRTICK */
-+
-+static inline int normal_prio(struct task_struct *p)
-+{
-+	if (task_has_rt_policy(p))
-+		return MAX_RT_PRIO - 1 - p->rt_priority;
-+
-+	return p->static_prio + MAX_PRIORITY_ADJ;
-+}
-+
-+/*
-+ * Calculate the current priority, i.e. the priority
-+ * taken into account by the scheduler. This value might
-+ * be boosted by RT tasks as it will be RT if the task got
-+ * RT-boosted. If not then it returns p->normal_prio.
-+ */
-+static int effective_prio(struct task_struct *p)
-+{
-+	p->normal_prio = normal_prio(p);
-+	/*
-+	 * If we are RT tasks or we were boosted to RT priority,
-+	 * keep the priority unchanged. Otherwise, update priority
-+	 * to the normal priority:
-+	 */
-+	if (!rt_prio(p->prio))
-+		return p->normal_prio;
-+	return p->prio;
-+}
-+
-+/*
-+ * activate_task - move a task to the runqueue.
-+ *
-+ * Context: rq->lock
-+ */
-+static void activate_task(struct task_struct *p, struct rq *rq)
-+{
-+	enqueue_task(p, rq, ENQUEUE_WAKEUP);
-+	p->on_rq = TASK_ON_RQ_QUEUED;
-+	cpufreq_update_util(rq, 0);
-+}
-+
-+/*
-+ * deactivate_task - remove a task from the runqueue.
-+ *
-+ * Context: rq->lock
-+ */
-+static inline void deactivate_task(struct task_struct *p, struct rq *rq)
-+{
-+	dequeue_task(p, rq, DEQUEUE_SLEEP);
-+	p->on_rq = 0;
-+	cpufreq_update_util(rq, 0);
-+}
-+
-+static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
-+{
-+#ifdef CONFIG_SMP
-+	/*
-+	 * After ->cpu is set up to a new value, task_access_lock(p, ...) can be
-+	 * successfully executed on another CPU. We must ensure that updates of
-+	 * per-task data have been completed by this moment.
-+	 */
-+	smp_wmb();
-+
-+#ifdef CONFIG_THREAD_INFO_IN_TASK
-+	WRITE_ONCE(p->cpu, cpu);
-+#else
-+	WRITE_ONCE(task_thread_info(p)->cpu, cpu);
-+#endif
-+#endif
-+}
-+
-+#ifdef CONFIG_SMP
-+void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
-+{
-+#ifdef CONFIG_SCHED_DEBUG
-+	/*
-+	 * We should never call set_task_cpu() on a blocked task,
-+	 * ttwu() will sort out the placement.
-+	 */
-+	WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING &&
-+		     !p->on_rq);
-+#ifdef CONFIG_LOCKDEP
-+	/*
-+	 * The caller should hold either p->pi_lock or rq->lock, when changing
-+	 * a task's CPU. ->pi_lock for waking tasks, rq->lock for runnable tasks.
-+	 *
-+	 * sched_move_task() holds both and thus holding either pins the cgroup,
-+	 * see task_group().
-+	 */
-+	WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) ||
-+				      lockdep_is_held(&task_rq(p)->lock)));
-+#endif
-+	/*
-+	 * Clearly, migrating tasks to offline CPUs is a fairly daft thing.
-+	 */
-+	WARN_ON_ONCE(!cpu_online(new_cpu));
-+#endif
-+	if (task_cpu(p) == new_cpu)
-+		return;
-+	trace_sched_migrate_task(p, new_cpu);
-+	rseq_migrate(p);
-+	perf_event_task_migrate(p);
-+
-+	__set_task_cpu(p, new_cpu);
-+}
-+
-+static inline bool is_per_cpu_kthread(struct task_struct *p)
-+{
-+	return ((p->flags & PF_KTHREAD) && (1 == p->nr_cpus_allowed));
-+}
-+
-+/*
-+ * Per-CPU kthreads are allowed to run on !active && online CPUs, see
-+ * __set_cpus_allowed_ptr() and select_fallback_rq().
-+ */
-+static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
-+{
-+	if (!cpumask_test_cpu(cpu, p->cpus_ptr))
-+		return false;
-+
-+	if (is_per_cpu_kthread(p))
-+		return cpu_online(cpu);
-+
-+	return cpu_active(cpu);
-+}
-+
-+/*
-+ * This is how migration works:
-+ *
-+ * 1) we invoke migration_cpu_stop() on the target CPU using
-+ *    stop_one_cpu().
-+ * 2) stopper starts to run (implicitly forcing the migrated thread
-+ *    off the CPU)
-+ * 3) it checks whether the migrated task is still in the wrong runqueue.
-+ * 4) if it's in the wrong runqueue then the migration thread removes
-+ *    it and puts it into the right queue.
-+ * 5) stopper completes and stop_one_cpu() returns and the migration
-+ *    is done.
-+ */
-+
-+/*
-+ * move_queued_task - move a queued task to new rq.
-+ *
-+ * Returns (locked) new rq. Old rq's lock is released.
-+ */
-+static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int
-+				   new_cpu)
-+{
-+	lockdep_assert_held(&rq->lock);
-+
-+	WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING);
-+	dequeue_task(p, rq, 0);
-+	set_task_cpu(p, new_cpu);
-+	raw_spin_unlock(&rq->lock);
-+
-+	rq = cpu_rq(new_cpu);
-+
-+	raw_spin_lock(&rq->lock);
-+	BUG_ON(task_cpu(p) != new_cpu);
-+	enqueue_task(p, rq, 0);
-+	p->on_rq = TASK_ON_RQ_QUEUED;
-+	check_preempt_curr(rq);
-+
-+	return rq;
-+}
-+
-+struct migration_arg {
-+	struct task_struct *task;
-+	int dest_cpu;
-+};
-+
-+/*
-+ * Move (not current) task off this CPU, onto the destination CPU. We're doing
-+ * this because either it can't run here any more (set_cpus_allowed()
-+ * away from this CPU, or CPU going down), or because we're
-+ * attempting to rebalance this task on exec (sched_exec).
-+ *
-+ * So we race with normal scheduler movements, but that's OK, as long
-+ * as the task is no longer on this CPU.
-+ */
-+static struct rq *__migrate_task(struct rq *rq, struct task_struct *p, int
-+				 dest_cpu)
-+{
-+	/* Affinity changed (again). */
-+	if (!is_cpu_allowed(p, dest_cpu))
-+		return rq;
-+
-+	update_rq_clock(rq);
-+	return move_queued_task(rq, p, dest_cpu);
-+}
-+
-+/*
-+ * migration_cpu_stop - this will be executed by a highprio stopper thread
-+ * and performs thread migration by bumping thread off CPU then
-+ * 'pushing' onto another runqueue.
-+ */
-+static int migration_cpu_stop(void *data)
-+{
-+	struct migration_arg *arg = data;
-+	struct task_struct *p = arg->task;
-+	struct rq *rq = this_rq();
-+
-+	/*
-+	 * The original target CPU might have gone down and we might
-+	 * be on another CPU but it doesn't matter.
-+	 */
-+	local_irq_disable();
-+	/*
-+	 * We need to explicitly wake pending tasks before running
-+	 * __migrate_task() such that we will not miss enforcing cpus_ptr
-+	 * during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test.
-+	 */
-+	flush_smp_call_function_from_idle();
-+
-+	raw_spin_lock(&p->pi_lock);
-+	raw_spin_lock(&rq->lock);
-+	/*
-+	 * If task_rq(p) != rq, it cannot be migrated here, because we're
-+	 * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because
-+	 * we're holding p->pi_lock.
-+	 */
-+	if (task_rq(p) == rq && task_on_rq_queued(p))
-+		rq = __migrate_task(rq, p, arg->dest_cpu);
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock(&p->pi_lock);
-+
-+	local_irq_enable();
-+	return 0;
-+}
-+
-+static inline void
-+set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	cpumask_copy(&p->cpus_mask, new_mask);
-+	p->nr_cpus_allowed = cpumask_weight(new_mask);
-+}
-+
-+void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	set_cpus_allowed_common(p, new_mask);
-+}
-+#endif
-+
-+/**
-+ * task_curr - is this task currently executing on a CPU?
-+ * @p: the task in question.
-+ *
-+ * Return: 1 if the task is currently executing. 0 otherwise.
-+ */
-+inline int task_curr(const struct task_struct *p)
-+{
-+	return cpu_curr(task_cpu(p)) == p;
-+}
-+
-+#ifdef CONFIG_SMP
-+/*
-+ * wait_task_inactive - wait for a thread to unschedule.
-+ *
-+ * If @match_state is nonzero, it's the @p->state value just checked and
-+ * not expected to change.  If it changes, i.e. @p might have woken up,
-+ * then return zero.  When we succeed in waiting for @p to be off its CPU,
-+ * we return a positive number (its total switch count).  If a second call
-+ * a short while later returns the same number, the caller can be sure that
-+ * @p has remained unscheduled the whole time.
-+ *
-+ * The caller must ensure that the task *will* unschedule sometime soon,
-+ * else this function might spin for a *long* time. This function can't
-+ * be called with interrupts off, or it may introduce deadlock with
-+ * smp_call_function() if an IPI is sent by the same process we are
-+ * waiting to become inactive.
-+ */
-+unsigned long wait_task_inactive(struct task_struct *p, long match_state)
-+{
-+	unsigned long flags;
-+	bool running, on_rq;
-+	unsigned long ncsw;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+
-+	for (;;) {
-+		rq = task_rq(p);
-+
-+		/*
-+		 * If the task is actively running on another CPU
-+		 * still, just relax and busy-wait without holding
-+		 * any locks.
-+		 *
-+		 * NOTE! Since we don't hold any locks, it's not
-+		 * even sure that "rq" stays as the right runqueue!
-+		 * But we don't care, since this will return false
-+		 * if the runqueue has changed and p is actually now
-+		 * running somewhere else!
-+		 */
-+		while (task_running(p) && p == rq->curr) {
-+			if (match_state && unlikely(p->state != match_state))
-+				return 0;
-+			cpu_relax();
-+		}
-+
-+		/*
-+		 * Ok, time to look more closely! We need the rq
-+		 * lock now, to be *sure*. If we're wrong, we'll
-+		 * just go back and repeat.
-+		 */
-+		task_access_lock_irqsave(p, &lock, &flags);
-+		trace_sched_wait_task(p);
-+		running = task_running(p);
-+		on_rq = p->on_rq;
-+		ncsw = 0;
-+		if (!match_state || p->state == match_state)
-+			ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
-+		task_access_unlock_irqrestore(p, lock, &flags);
-+
-+		/*
-+		 * If it changed from the expected state, bail out now.
-+		 */
-+		if (unlikely(!ncsw))
-+			break;
-+
-+		/*
-+		 * Was it really running after all now that we
-+		 * checked with the proper locks actually held?
-+		 *
-+		 * Oops. Go back and try again..
-+		 */
-+		if (unlikely(running)) {
-+			cpu_relax();
-+			continue;
-+		}
-+
-+		/*
-+		 * It's not enough that it's not actively running,
-+		 * it must be off the runqueue _entirely_, and not
-+		 * preempted!
-+		 *
-+		 * So if it was still runnable (but just not actively
-+		 * running right now), it's preempted, and we should
-+		 * yield - it could be a while.
-+		 */
-+		if (unlikely(on_rq)) {
-+			ktime_t to = NSEC_PER_SEC / HZ;
-+
-+			set_current_state(TASK_UNINTERRUPTIBLE);
-+			schedule_hrtimeout(&to, HRTIMER_MODE_REL);
-+			continue;
-+		}
-+
-+		/*
-+		 * Ahh, all good. It wasn't running, and it wasn't
-+		 * runnable, which means that it will never become
-+		 * running in the future either. We're all done!
-+		 */
-+		break;
-+	}
-+
-+	return ncsw;
-+}
-+
-+/***
-+ * kick_process - kick a running thread to enter/exit the kernel
-+ * @p: the to-be-kicked thread
-+ *
-+ * Cause a process which is running on another CPU to enter
-+ * kernel-mode, without any delay. (to get signals handled.)
-+ *
-+ * NOTE: this function doesn't have to take the runqueue lock,
-+ * because all it wants to ensure is that the remote task enters
-+ * the kernel. If the IPI races and the task has been migrated
-+ * to another CPU then no harm is done and the purpose has been
-+ * achieved as well.
-+ */
-+void kick_process(struct task_struct *p)
-+{
-+	int cpu;
-+
-+	preempt_disable();
-+	cpu = task_cpu(p);
-+	if ((cpu != smp_processor_id()) && task_curr(p))
-+		smp_send_reschedule(cpu);
-+	preempt_enable();
-+}
-+EXPORT_SYMBOL_GPL(kick_process);
-+
-+/*
-+ * ->cpus_ptr is protected by both rq->lock and p->pi_lock
-+ *
-+ * A few notes on cpu_active vs cpu_online:
-+ *
-+ *  - cpu_active must be a subset of cpu_online
-+ *
-+ *  - on CPU-up we allow per-CPU kthreads on the online && !active CPU,
-+ *    see __set_cpus_allowed_ptr(). At this point the newly online
-+ *    CPU isn't yet part of the sched domains, and balancing will not
-+ *    see it.
-+ *
-+ *  - on cpu-down we clear cpu_active() to mask the sched domains and
-+ *    avoid the load balancer to place new tasks on the to be removed
-+ *    CPU. Existing tasks will remain running there and will be taken
-+ *    off.
-+ *
-+ * This means that fallback selection must not select !active CPUs.
-+ * And can assume that any active CPU must be online. Conversely
-+ * select_task_rq() below may allow selection of !active CPUs in order
-+ * to satisfy the above rules.
-+ */
-+static int select_fallback_rq(int cpu, struct task_struct *p)
-+{
-+	int nid = cpu_to_node(cpu);
-+	const struct cpumask *nodemask = NULL;
-+	enum { cpuset, possible, fail } state = cpuset;
-+	int dest_cpu;
-+
-+	/*
-+	 * If the node that the CPU is on has been offlined, cpu_to_node()
-+	 * will return -1. There is no CPU on the node, and we should
-+	 * select the CPU on the other node.
-+	 */
-+	if (nid != -1) {
-+		nodemask = cpumask_of_node(nid);
-+
-+		/* Look for allowed, online CPU in same node. */
-+		for_each_cpu(dest_cpu, nodemask) {
-+			if (!cpu_active(dest_cpu))
-+				continue;
-+			if (cpumask_test_cpu(dest_cpu, p->cpus_ptr))
-+				return dest_cpu;
-+		}
-+	}
-+
-+	for (;;) {
-+		/* Any allowed, online CPU? */
-+		for_each_cpu(dest_cpu, p->cpus_ptr) {
-+			if (!is_cpu_allowed(p, dest_cpu))
-+				continue;
-+			goto out;
-+		}
-+
-+		/* No more Mr. Nice Guy. */
-+		switch (state) {
-+		case cpuset:
-+			if (IS_ENABLED(CONFIG_CPUSETS)) {
-+				cpuset_cpus_allowed_fallback(p);
-+				state = possible;
-+				break;
-+			}
-+			fallthrough;
-+		case possible:
-+			do_set_cpus_allowed(p, cpu_possible_mask);
-+			state = fail;
-+			break;
-+
-+		case fail:
-+			BUG();
-+			break;
-+		}
-+	}
-+
-+out:
-+	if (state != cpuset) {
-+		/*
-+		 * Don't tell them about moving exiting tasks or
-+		 * kernel threads (both mm NULL), since they never
-+		 * leave kernel.
-+		 */
-+		if (p->mm && printk_ratelimit()) {
-+			printk_deferred("process %d (%s) no longer affine to cpu%d\n",
-+					task_pid_nr(p), p->comm, cpu);
-+		}
-+	}
-+
-+	return dest_cpu;
-+}
-+
-+static inline int select_task_rq(struct task_struct *p, struct rq *rq)
-+{
-+	cpumask_t chk_mask, tmp;
-+
-+	if (unlikely(!cpumask_and(&chk_mask, p->cpus_ptr, cpu_online_mask)))
-+		return select_fallback_rq(task_cpu(p), p);
-+
-+	if (
-+#ifdef CONFIG_SCHED_SMT
-+	    cpumask_and(&tmp, &chk_mask, &sched_sg_idle_mask) ||
-+#endif
-+	    cpumask_and(&tmp, &chk_mask, &sched_rq_watermark[IDLE_WM]) ||
-+	    cpumask_and(&tmp, &chk_mask,
-+			&sched_rq_watermark[task_sched_prio(p, rq) + 1]))
-+		return best_mask_cpu(task_cpu(p), &tmp);
-+
-+	return best_mask_cpu(task_cpu(p), &chk_mask);
-+}
-+
-+void sched_set_stop_task(int cpu, struct task_struct *stop)
-+{
-+	struct sched_param stop_param = { .sched_priority = STOP_PRIO };
-+	struct sched_param start_param = { .sched_priority = 0 };
-+	struct task_struct *old_stop = cpu_rq(cpu)->stop;
-+
-+	if (stop) {
-+		/*
-+		 * Make it appear like a SCHED_FIFO task, its something
-+		 * userspace knows about and won't get confused about.
-+		 *
-+		 * Also, it will make PI more or less work without too
-+		 * much confusion -- but then, stop work should not
-+		 * rely on PI working anyway.
-+		 */
-+		sched_setscheduler_nocheck(stop, SCHED_FIFO, &stop_param);
-+	}
-+
-+	cpu_rq(cpu)->stop = stop;
-+
-+	if (old_stop) {
-+		/*
-+		 * Reset it back to a normal scheduling policy so that
-+		 * it can die in pieces.
-+		 */
-+		sched_setscheduler_nocheck(old_stop, SCHED_NORMAL, &start_param);
-+	}
-+}
-+
-+/*
-+ * Change a given task's CPU affinity. Migrate the thread to a
-+ * proper CPU and schedule it away if the CPU it's executing on
-+ * is removed from the allowed bitmask.
-+ *
-+ * NOTE: the caller must have a valid reference to the task, the
-+ * task must not exit() & deallocate itself prematurely. The
-+ * call is not atomic; no spinlocks may be held.
-+ */
-+static int __set_cpus_allowed_ptr(struct task_struct *p,
-+				  const struct cpumask *new_mask, bool check)
-+{
-+	const struct cpumask *cpu_valid_mask = cpu_active_mask;
-+	int dest_cpu;
-+	unsigned long flags;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+	int ret = 0;
-+
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	rq = __task_access_lock(p, &lock);
-+
-+	if (p->flags & PF_KTHREAD) {
-+		/*
-+		 * Kernel threads are allowed on online && !active CPUs
-+		 */
-+		cpu_valid_mask = cpu_online_mask;
-+	}
-+
-+	/*
-+	 * Must re-check here, to close a race against __kthread_bind(),
-+	 * sched_setaffinity() is not guaranteed to observe the flag.
-+	 */
-+	if (check && (p->flags & PF_NO_SETAFFINITY)) {
-+		ret = -EINVAL;
-+		goto out;
-+	}
-+
-+	if (cpumask_equal(&p->cpus_mask, new_mask))
-+		goto out;
-+
-+	dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
-+	if (dest_cpu >= nr_cpu_ids) {
-+		ret = -EINVAL;
-+		goto out;
-+	}
-+
-+	do_set_cpus_allowed(p, new_mask);
-+
-+	if (p->flags & PF_KTHREAD) {
-+		/*
-+		 * For kernel threads that do indeed end up on online &&
-+		 * !active we want to ensure they are strict per-CPU threads.
-+		 */
-+		WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) &&
-+			!cpumask_intersects(new_mask, cpu_active_mask) &&
-+			p->nr_cpus_allowed != 1);
-+	}
-+
-+	/* Can the task run on the task's current CPU? If so, we're done */
-+	if (cpumask_test_cpu(task_cpu(p), new_mask))
-+		goto out;
-+
-+	if (task_running(p) || p->state == TASK_WAKING) {
-+		struct migration_arg arg = { p, dest_cpu };
-+
-+		/* Need help from migration thread: drop lock and wait. */
-+		__task_access_unlock(p, lock);
-+		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+		stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
-+		return 0;
-+	}
-+	if (task_on_rq_queued(p)) {
-+		/*
-+		 * OK, since we're going to drop the lock immediately
-+		 * afterwards anyway.
-+		 */
-+		update_rq_clock(rq);
-+		rq = move_queued_task(rq, p, dest_cpu);
-+		lock = &rq->lock;
-+	}
-+
-+out:
-+	__task_access_unlock(p, lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+
-+	return ret;
-+}
-+
-+int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	return __set_cpus_allowed_ptr(p, new_mask, false);
-+}
-+EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
-+
-+#else /* CONFIG_SMP */
-+
-+static inline int select_task_rq(struct task_struct *p, struct rq *rq)
-+{
-+	return 0;
-+}
-+
-+static inline int
-+__set_cpus_allowed_ptr(struct task_struct *p,
-+		       const struct cpumask *new_mask, bool check)
-+{
-+	return set_cpus_allowed_ptr(p, new_mask);
-+}
-+
-+#endif /* CONFIG_SMP */
-+
-+static void
-+ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
-+{
-+	struct rq *rq;
-+
-+	if (!schedstat_enabled())
-+		return;
-+
-+	rq= this_rq();
-+
-+#ifdef CONFIG_SMP
-+	if (cpu == rq->cpu)
-+		__schedstat_inc(rq->ttwu_local);
-+	else {
-+		/** Alt schedule FW ToDo:
-+		 * How to do ttwu_wake_remote
-+		 */
-+	}
-+#endif /* CONFIG_SMP */
-+
-+	__schedstat_inc(rq->ttwu_count);
-+}
-+
-+/*
-+ * Mark the task runnable and perform wakeup-preemption.
-+ */
-+static inline void
-+ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
-+{
-+	check_preempt_curr(rq);
-+	p->state = TASK_RUNNING;
-+	trace_sched_wakeup(p);
-+}
-+
-+static inline void
-+ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags)
-+{
-+	if (p->sched_contributes_to_load)
-+		rq->nr_uninterruptible--;
-+
-+	activate_task(p, rq);
-+	ttwu_do_wakeup(rq, p, 0);
-+}
-+
-+/*
-+ * Consider @p being inside a wait loop:
-+ *
-+ *   for (;;) {
-+ *      set_current_state(TASK_UNINTERRUPTIBLE);
-+ *
-+ *      if (CONDITION)
-+ *         break;
-+ *
-+ *      schedule();
-+ *   }
-+ *   __set_current_state(TASK_RUNNING);
-+ *
-+ * between set_current_state() and schedule(). In this case @p is still
-+ * runnable, so all that needs doing is change p->state back to TASK_RUNNING in
-+ * an atomic manner.
-+ *
-+ * By taking task_rq(p)->lock we serialize against schedule(), if @p->on_rq
-+ * then schedule() must still happen and p->state can be changed to
-+ * TASK_RUNNING. Otherwise we lost the race, schedule() has happened, and we
-+ * need to do a full wakeup with enqueue.
-+ *
-+ * Returns: %true when the wakeup is done,
-+ *          %false otherwise.
-+ */
-+static int ttwu_runnable(struct task_struct *p, int wake_flags)
-+{
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+	int ret = 0;
-+
-+	rq = __task_access_lock(p, &lock);
-+	if (task_on_rq_queued(p)) {
-+		/* check_preempt_curr() may use rq clock */
-+		update_rq_clock(rq);
-+		ttwu_do_wakeup(rq, p, wake_flags);
-+		ret = 1;
-+	}
-+	__task_access_unlock(p, lock);
-+
-+	return ret;
-+}
-+
-+#ifdef CONFIG_SMP
-+void sched_ttwu_pending(void *arg)
-+{
-+	struct llist_node *llist = arg;
-+	struct rq *rq = this_rq();
-+	struct task_struct *p, *t;
-+	struct rq_flags rf;
-+
-+	if (!llist)
-+		return;
-+
-+	/*
-+	 * rq::ttwu_pending racy indication of out-standing wakeups.
-+	 * Races such that false-negatives are possible, since they
-+	 * are shorter lived that false-positives would be.
-+	 */
-+	WRITE_ONCE(rq->ttwu_pending, 0);
-+
-+	rq_lock_irqsave(rq, &rf);
-+	update_rq_clock(rq);
-+
-+	llist_for_each_entry_safe(p, t, llist, wake_entry.llist) {
-+		if (WARN_ON_ONCE(p->on_cpu))
-+			smp_cond_load_acquire(&p->on_cpu, !VAL);
-+
-+		if (WARN_ON_ONCE(task_cpu(p) != cpu_of(rq)))
-+			set_task_cpu(p, cpu_of(rq));
-+
-+		ttwu_do_activate(rq, p, p->sched_remote_wakeup ? WF_MIGRATED : 0);
-+	}
-+
-+	rq_unlock_irqrestore(rq, &rf);
-+}
-+
-+void send_call_function_single_ipi(int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	if (!set_nr_if_polling(rq->idle))
-+		arch_send_call_function_single_ipi(cpu);
-+	else
-+		trace_sched_wake_idle_without_ipi(cpu);
-+}
-+
-+/*
-+ * Queue a task on the target CPUs wake_list and wake the CPU via IPI if
-+ * necessary. The wakee CPU on receipt of the IPI will queue the task
-+ * via sched_ttwu_wakeup() for activation so the wakee incurs the cost
-+ * of the wakeup instead of the waker.
-+ */
-+static void __ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	p->sched_remote_wakeup = !!(wake_flags & WF_MIGRATED);
-+
-+	WRITE_ONCE(rq->ttwu_pending, 1);
-+	__smp_call_single_queue(cpu, &p->wake_entry.llist);
-+}
-+
-+static inline bool ttwu_queue_cond(int cpu, int wake_flags)
-+{
-+	/*
-+	 * If the CPU does not share cache, then queue the task on the
-+	 * remote rqs wakelist to avoid accessing remote data.
-+	 */
-+	if (!cpus_share_cache(smp_processor_id(), cpu))
-+		return true;
-+
-+	/*
-+	 * If the task is descheduling and the only running task on the
-+	 * CPU then use the wakelist to offload the task activation to
-+	 * the soon-to-be-idle CPU as the current CPU is likely busy.
-+	 * nr_running is checked to avoid unnecessary task stacking.
-+	 */
-+	if ((wake_flags & WF_ON_CPU) && cpu_rq(cpu)->nr_running <= 1)
-+		return true;
-+
-+	return false;
-+}
-+
-+static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
-+{
-+	if (__is_defined(ALT_SCHED_TTWU_QUEUE) && ttwu_queue_cond(cpu, wake_flags)) {
-+		if (WARN_ON_ONCE(cpu == smp_processor_id()))
-+			return false;
-+
-+		sched_clock_cpu(cpu); /* Sync clocks across CPUs */
-+		__ttwu_queue_wakelist(p, cpu, wake_flags);
-+		return true;
-+	}
-+
-+	return false;
-+}
-+
-+void wake_up_if_idle(int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	rcu_read_lock();
-+
-+	if (!is_idle_task(rcu_dereference(rq->curr)))
-+		goto out;
-+
-+	if (set_nr_if_polling(rq->idle)) {
-+		trace_sched_wake_idle_without_ipi(cpu);
-+	} else {
-+		raw_spin_lock_irqsave(&rq->lock, flags);
-+		if (is_idle_task(rq->curr))
-+			smp_send_reschedule(cpu);
-+		/* Else CPU is not idle, do nothing here */
-+		raw_spin_unlock_irqrestore(&rq->lock, flags);
-+	}
-+
-+out:
-+	rcu_read_unlock();
-+}
-+
-+bool cpus_share_cache(int this_cpu, int that_cpu)
-+{
-+	return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu);
-+}
-+#else /* !CONFIG_SMP */
-+
-+static inline bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
-+{
-+	return false;
-+}
-+
-+#endif /* CONFIG_SMP */
-+
-+static inline void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	if (ttwu_queue_wakelist(p, cpu, wake_flags))
-+		return;
-+
-+	raw_spin_lock(&rq->lock);
-+	update_rq_clock(rq);
-+	ttwu_do_activate(rq, p, wake_flags);
-+	raw_spin_unlock(&rq->lock);
-+}
-+
-+/*
-+ * Notes on Program-Order guarantees on SMP systems.
-+ *
-+ *  MIGRATION
-+ *
-+ * The basic program-order guarantee on SMP systems is that when a task [t]
-+ * migrates, all its activity on its old CPU [c0] happens-before any subsequent
-+ * execution on its new CPU [c1].
-+ *
-+ * For migration (of runnable tasks) this is provided by the following means:
-+ *
-+ *  A) UNLOCK of the rq(c0)->lock scheduling out task t
-+ *  B) migration for t is required to synchronize *both* rq(c0)->lock and
-+ *     rq(c1)->lock (if not at the same time, then in that order).
-+ *  C) LOCK of the rq(c1)->lock scheduling in task
-+ *
-+ * Transitivity guarantees that B happens after A and C after B.
-+ * Note: we only require RCpc transitivity.
-+ * Note: the CPU doing B need not be c0 or c1
-+ *
-+ * Example:
-+ *
-+ *   CPU0            CPU1            CPU2
-+ *
-+ *   LOCK rq(0)->lock
-+ *   sched-out X
-+ *   sched-in Y
-+ *   UNLOCK rq(0)->lock
-+ *
-+ *                                   LOCK rq(0)->lock // orders against CPU0
-+ *                                   dequeue X
-+ *                                   UNLOCK rq(0)->lock
-+ *
-+ *                                   LOCK rq(1)->lock
-+ *                                   enqueue X
-+ *                                   UNLOCK rq(1)->lock
-+ *
-+ *                   LOCK rq(1)->lock // orders against CPU2
-+ *                   sched-out Z
-+ *                   sched-in X
-+ *                   UNLOCK rq(1)->lock
-+ *
-+ *
-+ *  BLOCKING -- aka. SLEEP + WAKEUP
-+ *
-+ * For blocking we (obviously) need to provide the same guarantee as for
-+ * migration. However the means are completely different as there is no lock
-+ * chain to provide order. Instead we do:
-+ *
-+ *   1) smp_store_release(X->on_cpu, 0)   -- finish_task()
-+ *   2) smp_cond_load_acquire(!X->on_cpu) -- try_to_wake_up()
-+ *
-+ * Example:
-+ *
-+ *   CPU0 (schedule)  CPU1 (try_to_wake_up) CPU2 (schedule)
-+ *
-+ *   LOCK rq(0)->lock LOCK X->pi_lock
-+ *   dequeue X
-+ *   sched-out X
-+ *   smp_store_release(X->on_cpu, 0);
-+ *
-+ *                    smp_cond_load_acquire(&X->on_cpu, !VAL);
-+ *                    X->state = WAKING
-+ *                    set_task_cpu(X,2)
-+ *
-+ *                    LOCK rq(2)->lock
-+ *                    enqueue X
-+ *                    X->state = RUNNING
-+ *                    UNLOCK rq(2)->lock
-+ *
-+ *                                          LOCK rq(2)->lock // orders against CPU1
-+ *                                          sched-out Z
-+ *                                          sched-in X
-+ *                                          UNLOCK rq(2)->lock
-+ *
-+ *                    UNLOCK X->pi_lock
-+ *   UNLOCK rq(0)->lock
-+ *
-+ *
-+ * However; for wakeups there is a second guarantee we must provide, namely we
-+ * must observe the state that lead to our wakeup. That is, not only must our
-+ * task observe its own prior state, it must also observe the stores prior to
-+ * its wakeup.
-+ *
-+ * This means that any means of doing remote wakeups must order the CPU doing
-+ * the wakeup against the CPU the task is going to end up running on. This,
-+ * however, is already required for the regular Program-Order guarantee above,
-+ * since the waking CPU is the one issueing the ACQUIRE (smp_cond_load_acquire).
-+ *
-+ */
-+
-+/**
-+ * try_to_wake_up - wake up a thread
-+ * @p: the thread to be awakened
-+ * @state: the mask of task states that can be woken
-+ * @wake_flags: wake modifier flags (WF_*)
-+ *
-+ * Conceptually does:
-+ *
-+ *   If (@state & @p->state) @p->state = TASK_RUNNING.
-+ *
-+ * If the task was not queued/runnable, also place it back on a runqueue.
-+ *
-+ * This function is atomic against schedule() which would dequeue the task.
-+ *
-+ * It issues a full memory barrier before accessing @p->state, see the comment
-+ * with set_current_state().
-+ *
-+ * Uses p->pi_lock to serialize against concurrent wake-ups.
-+ *
-+ * Relies on p->pi_lock stabilizing:
-+ *  - p->sched_class
-+ *  - p->cpus_ptr
-+ *  - p->sched_task_group
-+ * in order to do migration, see its use of select_task_rq()/set_task_cpu().
-+ *
-+ * Tries really hard to only take one task_rq(p)->lock for performance.
-+ * Takes rq->lock in:
-+ *  - ttwu_runnable()    -- old rq, unavoidable, see comment there;
-+ *  - ttwu_queue()       -- new rq, for enqueue of the task;
-+ *  - psi_ttwu_dequeue() -- much sadness :-( accounting will kill us.
-+ *
-+ * As a consequence we race really badly with just about everything. See the
-+ * many memory barriers and their comments for details.
-+ *
-+ * Return: %true if @p->state changes (an actual wakeup was done),
-+ *	   %false otherwise.
-+ */
-+static int try_to_wake_up(struct task_struct *p, unsigned int state,
-+			  int wake_flags)
-+{
-+	unsigned long flags;
-+	int cpu, success = 0;
-+
-+	preempt_disable();
-+	if (p == current) {
-+		/*
-+		 * We're waking current, this means 'p->on_rq' and 'task_cpu(p)
-+		 * == smp_processor_id()'. Together this means we can special
-+		 * case the whole 'p->on_rq && ttwu_runnable()' case below
-+		 * without taking any locks.
-+		 *
-+		 * In particular:
-+		 *  - we rely on Program-Order guarantees for all the ordering,
-+		 *  - we're serialized against set_special_state() by virtue of
-+		 *    it disabling IRQs (this allows not taking ->pi_lock).
-+		 */
-+		if (!(p->state & state))
-+			goto out;
-+
-+		success = 1;
-+		trace_sched_waking(p);
-+		p->state = TASK_RUNNING;
-+		trace_sched_wakeup(p);
-+		goto out;
-+	}
-+
-+	/*
-+	 * If we are going to wake up a thread waiting for CONDITION we
-+	 * need to ensure that CONDITION=1 done by the caller can not be
-+	 * reordered with p->state check below. This pairs with smp_store_mb()
-+	 * in set_current_state() that the waiting thread does.
-+	 */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	smp_mb__after_spinlock();
-+	if (!(p->state & state))
-+		goto unlock;
-+
-+	trace_sched_waking(p);
-+
-+	/* We're going to change ->state: */
-+	success = 1;
-+
-+	/*
-+	 * Ensure we load p->on_rq _after_ p->state, otherwise it would
-+	 * be possible to, falsely, observe p->on_rq == 0 and get stuck
-+	 * in smp_cond_load_acquire() below.
-+	 *
-+	 * sched_ttwu_pending()			try_to_wake_up()
-+	 *   STORE p->on_rq = 1			  LOAD p->state
-+	 *   UNLOCK rq->lock
-+	 *
-+	 * __schedule() (switch to task 'p')
-+	 *   LOCK rq->lock			  smp_rmb();
-+	 *   smp_mb__after_spinlock();
-+	 *   UNLOCK rq->lock
-+	 *
-+	 * [task p]
-+	 *   STORE p->state = UNINTERRUPTIBLE	  LOAD p->on_rq
-+	 *
-+	 * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
-+	 * __schedule().  See the comment for smp_mb__after_spinlock().
-+	 *
-+	 * A similar smb_rmb() lives in try_invoke_on_locked_down_task().
-+	 */
-+	smp_rmb();
-+	if (READ_ONCE(p->on_rq) && ttwu_runnable(p, wake_flags))
-+		goto unlock;
-+
-+	if (p->in_iowait) {
-+		delayacct_blkio_end(p);
-+		atomic_dec(&task_rq(p)->nr_iowait);
-+	}
-+
-+#ifdef CONFIG_SMP
-+	/*
-+	 * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
-+	 * possible to, falsely, observe p->on_cpu == 0.
-+	 *
-+	 * One must be running (->on_cpu == 1) in order to remove oneself
-+	 * from the runqueue.
-+	 *
-+	 * __schedule() (switch to task 'p')	try_to_wake_up()
-+	 *   STORE p->on_cpu = 1		  LOAD p->on_rq
-+	 *   UNLOCK rq->lock
-+	 *
-+	 * __schedule() (put 'p' to sleep)
-+	 *   LOCK rq->lock			  smp_rmb();
-+	 *   smp_mb__after_spinlock();
-+	 *   STORE p->on_rq = 0			  LOAD p->on_cpu
-+	 *
-+	 * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
-+	 * __schedule().  See the comment for smp_mb__after_spinlock().
-+	 *
-+	 * Form a control-dep-acquire with p->on_rq == 0 above, to ensure
-+	 * schedule()'s deactivate_task() has 'happened' and p will no longer
-+	 * care about it's own p->state. See the comment in __schedule().
-+	 */
-+	smp_acquire__after_ctrl_dep();
-+
-+	/*
-+	 * We're doing the wakeup (@success == 1), they did a dequeue (p->on_rq
-+	 * == 0), which means we need to do an enqueue, change p->state to
-+	 * TASK_WAKING such that we can unlock p->pi_lock before doing the
-+	 * enqueue, such as ttwu_queue_wakelist().
-+	 */
-+	p->state = TASK_WAKING;
-+
-+	/*
-+	 * If the owning (remote) CPU is still in the middle of schedule() with
-+	 * this task as prev, considering queueing p on the remote CPUs wake_list
-+	 * which potentially sends an IPI instead of spinning on p->on_cpu to
-+	 * let the waker make forward progress. This is safe because IRQs are
-+	 * disabled and the IPI will deliver after on_cpu is cleared.
-+	 *
-+	 * Ensure we load task_cpu(p) after p->on_cpu:
-+	 *
-+	 * set_task_cpu(p, cpu);
-+	 *   STORE p->cpu = @cpu
-+	 * __schedule() (switch to task 'p')
-+	 *   LOCK rq->lock
-+	 *   smp_mb__after_spin_lock()          smp_cond_load_acquire(&p->on_cpu)
-+	 *   STORE p->on_cpu = 1                LOAD p->cpu
-+	 *
-+	 * to ensure we observe the correct CPU on which the task is currently
-+	 * scheduling.
-+	 */
-+	if (smp_load_acquire(&p->on_cpu) &&
-+	    ttwu_queue_wakelist(p, task_cpu(p), wake_flags | WF_ON_CPU))
-+		goto unlock;
-+
-+	/*
-+	 * If the owning (remote) CPU is still in the middle of schedule() with
-+	 * this task as prev, wait until its done referencing the task.
-+	 *
-+	 * Pairs with the smp_store_release() in finish_task().
-+	 *
-+	 * This ensures that tasks getting woken will be fully ordered against
-+	 * their previous state and preserve Program Order.
-+	 */
-+	smp_cond_load_acquire(&p->on_cpu, !VAL);
-+
-+	sched_task_ttwu(p);
-+
-+	cpu = select_task_rq(p, this_rq());
-+
-+	if (cpu != task_cpu(p)) {
-+		wake_flags |= WF_MIGRATED;
-+		psi_ttwu_dequeue(p);
-+		set_task_cpu(p, cpu);
-+	}
-+#else
-+	cpu = task_cpu(p);
-+#endif /* CONFIG_SMP */
-+
-+	ttwu_queue(p, cpu, wake_flags);
-+unlock:
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+out:
-+	if (success)
-+		ttwu_stat(p, task_cpu(p), wake_flags);
-+	preempt_enable();
-+
-+	return success;
-+}
-+
-+/**
-+ * try_invoke_on_locked_down_task - Invoke a function on task in fixed state
-+ * @p: Process for which the function is to be invoked.
-+ * @func: Function to invoke.
-+ * @arg: Argument to function.
-+ *
-+ * If the specified task can be quickly locked into a definite state
-+ * (either sleeping or on a given runqueue), arrange to keep it in that
-+ * state while invoking @func(@arg).  This function can use ->on_rq and
-+ * task_curr() to work out what the state is, if required.  Given that
-+ * @func can be invoked with a runqueue lock held, it had better be quite
-+ * lightweight.
-+ *
-+ * Returns:
-+ *	@false if the task slipped out from under the locks.
-+ *	@true if the task was locked onto a runqueue or is sleeping.
-+ *		However, @func can override this by returning @false.
-+ */
-+bool try_invoke_on_locked_down_task(struct task_struct *p, bool (*func)(struct task_struct *t, void *arg), void *arg)
-+{
-+	bool ret = false;
-+	struct rq_flags rf;
-+	struct rq *rq;
-+
-+	lockdep_assert_irqs_enabled();
-+	raw_spin_lock_irq(&p->pi_lock);
-+	if (p->on_rq) {
-+		rq = __task_rq_lock(p, &rf);
-+		if (task_rq(p) == rq)
-+			ret = func(p, arg);
-+		__task_rq_unlock(rq, &rf);
-+	} else {
-+		switch (p->state) {
-+		case TASK_RUNNING:
-+		case TASK_WAKING:
-+			break;
-+		default:
-+			smp_rmb(); // See smp_rmb() comment in try_to_wake_up().
-+			if (!p->on_rq)
-+				ret = func(p, arg);
-+		}
-+	}
-+	raw_spin_unlock_irq(&p->pi_lock);
-+	return ret;
-+}
-+
-+/**
-+ * wake_up_process - Wake up a specific process
-+ * @p: The process to be woken up.
-+ *
-+ * Attempt to wake up the nominated process and move it to the set of runnable
-+ * processes.
-+ *
-+ * Return: 1 if the process was woken up, 0 if it was already running.
-+ *
-+ * This function executes a full memory barrier before accessing the task state.
-+ */
-+int wake_up_process(struct task_struct *p)
-+{
-+	return try_to_wake_up(p, TASK_NORMAL, 0);
-+}
-+EXPORT_SYMBOL(wake_up_process);
-+
-+int wake_up_state(struct task_struct *p, unsigned int state)
-+{
-+	return try_to_wake_up(p, state, 0);
-+}
-+
-+/*
-+ * Perform scheduler related setup for a newly forked process p.
-+ * p is forked by current.
-+ *
-+ * __sched_fork() is basic setup used by init_idle() too:
-+ */
-+static inline void __sched_fork(unsigned long clone_flags, struct task_struct *p)
-+{
-+	p->on_rq			= 0;
-+	p->on_cpu			= 0;
-+	p->utime			= 0;
-+	p->stime			= 0;
-+	p->sched_time			= 0;
-+
-+#ifdef CONFIG_PREEMPT_NOTIFIERS
-+	INIT_HLIST_HEAD(&p->preempt_notifiers);
-+#endif
-+
-+#ifdef CONFIG_COMPACTION
-+	p->capture_control = NULL;
-+#endif
-+#ifdef CONFIG_SMP
-+	p->wake_entry.u_flags = CSD_TYPE_TTWU;
-+#endif
-+}
-+
-+/*
-+ * fork()/clone()-time setup:
-+ */
-+int sched_fork(unsigned long clone_flags, struct task_struct *p)
-+{
-+	unsigned long flags;
-+	struct rq *rq;
-+
-+	__sched_fork(clone_flags, p);
-+	/*
-+	 * We mark the process as NEW here. This guarantees that
-+	 * nobody will actually run it, and a signal or other external
-+	 * event cannot wake it up and insert it on the runqueue either.
-+	 */
-+	p->state = TASK_NEW;
-+
-+	/*
-+	 * Make sure we do not leak PI boosting priority to the child.
-+	 */
-+	p->prio = current->normal_prio;
-+
-+	/*
-+	 * Revert to default priority/policy on fork if requested.
-+	 */
-+	if (unlikely(p->sched_reset_on_fork)) {
-+		if (task_has_rt_policy(p)) {
-+			p->policy = SCHED_NORMAL;
-+			p->static_prio = NICE_TO_PRIO(0);
-+			p->rt_priority = 0;
-+		} else if (PRIO_TO_NICE(p->static_prio) < 0)
-+			p->static_prio = NICE_TO_PRIO(0);
-+
-+		p->prio = p->normal_prio = normal_prio(p);
-+
-+		/*
-+		 * We don't need the reset flag anymore after the fork. It has
-+		 * fulfilled its duty:
-+		 */
-+		p->sched_reset_on_fork = 0;
-+	}
-+
-+	/*
-+	 * The child is not yet in the pid-hash so no cgroup attach races,
-+	 * and the cgroup is pinned to this child due to cgroup_fork()
-+	 * is ran before sched_fork().
-+	 *
-+	 * Silence PROVE_RCU.
-+	 */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	/*
-+	 * Share the timeslice between parent and child, thus the
-+	 * total amount of pending timeslices in the system doesn't change,
-+	 * resulting in more scheduling fairness.
-+	 */
-+	rq = this_rq();
-+	raw_spin_lock(&rq->lock);
-+
-+	rq->curr->time_slice /= 2;
-+	p->time_slice = rq->curr->time_slice;
-+#ifdef CONFIG_SCHED_HRTICK
-+	hrtick_start(rq, rq->curr->time_slice);
-+#endif
-+
-+	if (p->time_slice < RESCHED_NS) {
-+		p->time_slice = sched_timeslice_ns;
-+		resched_curr(rq);
-+	}
-+	sched_task_fork(p, rq);
-+	raw_spin_unlock(&rq->lock);
-+
-+	rseq_migrate(p);
-+	/*
-+	 * We're setting the CPU for the first time, we don't migrate,
-+	 * so use __set_task_cpu().
-+	 */
-+	__set_task_cpu(p, cpu_of(rq));
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+
-+#ifdef CONFIG_SCHED_INFO
-+	if (unlikely(sched_info_on()))
-+		memset(&p->sched_info, 0, sizeof(p->sched_info));
-+#endif
-+	init_task_preempt_count(p);
-+
-+	return 0;
-+}
-+
-+void sched_post_fork(struct task_struct *p) {}
-+
-+#ifdef CONFIG_SCHEDSTATS
-+
-+DEFINE_STATIC_KEY_FALSE(sched_schedstats);
-+static bool __initdata __sched_schedstats = false;
-+
-+static void set_schedstats(bool enabled)
-+{
-+	if (enabled)
-+		static_branch_enable(&sched_schedstats);
-+	else
-+		static_branch_disable(&sched_schedstats);
-+}
-+
-+void force_schedstat_enabled(void)
-+{
-+	if (!schedstat_enabled()) {
-+		pr_info("kernel profiling enabled schedstats, disable via kernel.sched_schedstats.\n");
-+		static_branch_enable(&sched_schedstats);
-+	}
-+}
-+
-+static int __init setup_schedstats(char *str)
-+{
-+	int ret = 0;
-+	if (!str)
-+		goto out;
-+
-+	/*
-+	 * This code is called before jump labels have been set up, so we can't
-+	 * change the static branch directly just yet.  Instead set a temporary
-+	 * variable so init_schedstats() can do it later.
-+	 */
-+	if (!strcmp(str, "enable")) {
-+		__sched_schedstats = true;
-+		ret = 1;
-+	} else if (!strcmp(str, "disable")) {
-+		__sched_schedstats = false;
-+		ret = 1;
-+	}
-+out:
-+	if (!ret)
-+		pr_warn("Unable to parse schedstats=\n");
-+
-+	return ret;
-+}
-+__setup("schedstats=", setup_schedstats);
-+
-+static void __init init_schedstats(void)
-+{
-+	set_schedstats(__sched_schedstats);
-+}
-+
-+#ifdef CONFIG_PROC_SYSCTL
-+int sysctl_schedstats(struct ctl_table *table, int write,
-+			 void __user *buffer, size_t *lenp, loff_t *ppos)
-+{
-+	struct ctl_table t;
-+	int err;
-+	int state = static_branch_likely(&sched_schedstats);
-+
-+	if (write && !capable(CAP_SYS_ADMIN))
-+		return -EPERM;
-+
-+	t = *table;
-+	t.data = &state;
-+	err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
-+	if (err < 0)
-+		return err;
-+	if (write)
-+		set_schedstats(state);
-+	return err;
-+}
-+#endif /* CONFIG_PROC_SYSCTL */
-+#else  /* !CONFIG_SCHEDSTATS */
-+static inline void init_schedstats(void) {}
-+#endif /* CONFIG_SCHEDSTATS */
-+
-+/*
-+ * wake_up_new_task - wake up a newly created task for the first time.
-+ *
-+ * This function will do some initial scheduler statistics housekeeping
-+ * that must be done for every newly created context, then puts the task
-+ * on the runqueue and wakes it.
-+ */
-+void wake_up_new_task(struct task_struct *p)
-+{
-+	unsigned long flags;
-+	struct rq *rq;
-+
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+
-+	p->state = TASK_RUNNING;
-+
-+	rq = cpu_rq(select_task_rq(p, this_rq()));
-+#ifdef CONFIG_SMP
-+	rseq_migrate(p);
-+	/*
-+	 * Fork balancing, do it here and not earlier because:
-+	 * - cpus_ptr can change in the fork path
-+	 * - any previously selected CPU might disappear through hotplug
-+	 * Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq,
-+	 * as we're not fully set-up yet.
-+	 */
-+	__set_task_cpu(p, cpu_of(rq));
-+#endif
-+
-+	raw_spin_lock(&rq->lock);
-+
-+	update_rq_clock(rq);
-+	activate_task(p, rq);
-+	trace_sched_wakeup_new(p);
-+	check_preempt_curr(rq);
-+
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+}
-+
-+#ifdef CONFIG_PREEMPT_NOTIFIERS
-+
-+static DEFINE_STATIC_KEY_FALSE(preempt_notifier_key);
-+
-+void preempt_notifier_inc(void)
-+{
-+	static_branch_inc(&preempt_notifier_key);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_inc);
-+
-+void preempt_notifier_dec(void)
-+{
-+	static_branch_dec(&preempt_notifier_key);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_dec);
-+
-+/**
-+ * preempt_notifier_register - tell me when current is being preempted & rescheduled
-+ * @notifier: notifier struct to register
-+ */
-+void preempt_notifier_register(struct preempt_notifier *notifier)
-+{
-+	if (!static_branch_unlikely(&preempt_notifier_key))
-+		WARN(1, "registering preempt_notifier while notifiers disabled\n");
-+
-+	hlist_add_head(&notifier->link, &current->preempt_notifiers);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_register);
-+
-+/**
-+ * preempt_notifier_unregister - no longer interested in preemption notifications
-+ * @notifier: notifier struct to unregister
-+ *
-+ * This is *not* safe to call from within a preemption notifier.
-+ */
-+void preempt_notifier_unregister(struct preempt_notifier *notifier)
-+{
-+	hlist_del(&notifier->link);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_unregister);
-+
-+static void __fire_sched_in_preempt_notifiers(struct task_struct *curr)
-+{
-+	struct preempt_notifier *notifier;
-+
-+	hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
-+		notifier->ops->sched_in(notifier, raw_smp_processor_id());
-+}
-+
-+static __always_inline void fire_sched_in_preempt_notifiers(struct task_struct *curr)
-+{
-+	if (static_branch_unlikely(&preempt_notifier_key))
-+		__fire_sched_in_preempt_notifiers(curr);
-+}
-+
-+static void
-+__fire_sched_out_preempt_notifiers(struct task_struct *curr,
-+				   struct task_struct *next)
-+{
-+	struct preempt_notifier *notifier;
-+
-+	hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
-+		notifier->ops->sched_out(notifier, next);
-+}
-+
-+static __always_inline void
-+fire_sched_out_preempt_notifiers(struct task_struct *curr,
-+				 struct task_struct *next)
-+{
-+	if (static_branch_unlikely(&preempt_notifier_key))
-+		__fire_sched_out_preempt_notifiers(curr, next);
-+}
-+
-+#else /* !CONFIG_PREEMPT_NOTIFIERS */
-+
-+static inline void fire_sched_in_preempt_notifiers(struct task_struct *curr)
-+{
-+}
-+
-+static inline void
-+fire_sched_out_preempt_notifiers(struct task_struct *curr,
-+				 struct task_struct *next)
-+{
-+}
-+
-+#endif /* CONFIG_PREEMPT_NOTIFIERS */
-+
-+static inline void prepare_task(struct task_struct *next)
-+{
-+	/*
-+	 * Claim the task as running, we do this before switching to it
-+	 * such that any running task will have this set.
-+	 *
-+	 * See the ttwu() WF_ON_CPU case and its ordering comment.
-+	 */
-+	WRITE_ONCE(next->on_cpu, 1);
-+}
-+
-+static inline void finish_task(struct task_struct *prev)
-+{
-+#ifdef CONFIG_SMP
-+	/*
-+	 * This must be the very last reference to @prev from this CPU. After
-+	 * p->on_cpu is cleared, the task can be moved to a different CPU. We
-+	 * must ensure this doesn't happen until the switch is completely
-+	 * finished.
-+	 *
-+	 * In particular, the load of prev->state in finish_task_switch() must
-+	 * happen before this.
-+	 *
-+	 * Pairs with the smp_cond_load_acquire() in try_to_wake_up().
-+	 */
-+	smp_store_release(&prev->on_cpu, 0);
-+#else
-+	prev->on_cpu = 0;
-+#endif
-+}
-+
-+static inline void
-+prepare_lock_switch(struct rq *rq, struct task_struct *next)
-+{
-+	/*
-+	 * Since the runqueue lock will be released by the next
-+	 * task (which is an invalid locking op but in the case
-+	 * of the scheduler it's an obvious special-case), so we
-+	 * do an early lockdep release here:
-+	 */
-+	spin_release(&rq->lock.dep_map, _THIS_IP_);
-+#ifdef CONFIG_DEBUG_SPINLOCK
-+	/* this is a valid case when another task releases the spinlock */
-+	rq->lock.owner = next;
-+#endif
-+}
-+
-+static inline void finish_lock_switch(struct rq *rq)
-+{
-+	/*
-+	 * If we are tracking spinlock dependencies then we have to
-+	 * fix up the runqueue lock - which gets 'carried over' from
-+	 * prev into current:
-+	 */
-+	spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
-+	raw_spin_unlock_irq(&rq->lock);
-+}
-+
-+/**
-+ * prepare_task_switch - prepare to switch tasks
-+ * @rq: the runqueue preparing to switch
-+ * @next: the task we are going to switch to.
-+ *
-+ * This is called with the rq lock held and interrupts off. It must
-+ * be paired with a subsequent finish_task_switch after the context
-+ * switch.
-+ *
-+ * prepare_task_switch sets up locking and calls architecture specific
-+ * hooks.
-+ */
-+static inline void
-+prepare_task_switch(struct rq *rq, struct task_struct *prev,
-+		    struct task_struct *next)
-+{
-+	kcov_prepare_switch(prev);
-+	sched_info_switch(rq, prev, next);
-+	perf_event_task_sched_out(prev, next);
-+	rseq_preempt(prev);
-+	fire_sched_out_preempt_notifiers(prev, next);
-+	prepare_task(next);
-+	prepare_arch_switch(next);
-+}
-+
-+/**
-+ * finish_task_switch - clean up after a task-switch
-+ * @rq: runqueue associated with task-switch
-+ * @prev: the thread we just switched away from.
-+ *
-+ * finish_task_switch must be called after the context switch, paired
-+ * with a prepare_task_switch call before the context switch.
-+ * finish_task_switch will reconcile locking set up by prepare_task_switch,
-+ * and do any other architecture-specific cleanup actions.
-+ *
-+ * Note that we may have delayed dropping an mm in context_switch(). If
-+ * so, we finish that here outside of the runqueue lock.  (Doing it
-+ * with the lock held can cause deadlocks; see schedule() for
-+ * details.)
-+ *
-+ * The context switch have flipped the stack from under us and restored the
-+ * local variables which were saved when this task called schedule() in the
-+ * past. prev == current is still correct but we need to recalculate this_rq
-+ * because prev may have moved to another CPU.
-+ */
-+static struct rq *finish_task_switch(struct task_struct *prev)
-+	__releases(rq->lock)
-+{
-+	struct rq *rq = this_rq();
-+	struct mm_struct *mm = rq->prev_mm;
-+	long prev_state;
-+
-+	/*
-+	 * The previous task will have left us with a preempt_count of 2
-+	 * because it left us after:
-+	 *
-+	 *	schedule()
-+	 *	  preempt_disable();			// 1
-+	 *	  __schedule()
-+	 *	    raw_spin_lock_irq(&rq->lock)	// 2
-+	 *
-+	 * Also, see FORK_PREEMPT_COUNT.
-+	 */
-+	if (WARN_ONCE(preempt_count() != 2*PREEMPT_DISABLE_OFFSET,
-+		      "corrupted preempt_count: %s/%d/0x%x\n",
-+		      current->comm, current->pid, preempt_count()))
-+		preempt_count_set(FORK_PREEMPT_COUNT);
-+
-+	rq->prev_mm = NULL;
-+
-+	/*
-+	 * A task struct has one reference for the use as "current".
-+	 * If a task dies, then it sets TASK_DEAD in tsk->state and calls
-+	 * schedule one last time. The schedule call will never return, and
-+	 * the scheduled task must drop that reference.
-+	 *
-+	 * We must observe prev->state before clearing prev->on_cpu (in
-+	 * finish_task), otherwise a concurrent wakeup can get prev
-+	 * running on another CPU and we could rave with its RUNNING -> DEAD
-+	 * transition, resulting in a double drop.
-+	 */
-+	prev_state = prev->state;
-+	vtime_task_switch(prev);
-+	perf_event_task_sched_in(prev, current);
-+	finish_task(prev);
-+	finish_lock_switch(rq);
-+	finish_arch_post_lock_switch();
-+	kcov_finish_switch(current);
-+
-+	fire_sched_in_preempt_notifiers(current);
-+	/*
-+	 * When switching through a kernel thread, the loop in
-+	 * membarrier_{private,global}_expedited() may have observed that
-+	 * kernel thread and not issued an IPI. It is therefore possible to
-+	 * schedule between user->kernel->user threads without passing though
-+	 * switch_mm(). Membarrier requires a barrier after storing to
-+	 * rq->curr, before returning to userspace, so provide them here:
-+	 *
-+	 * - a full memory barrier for {PRIVATE,GLOBAL}_EXPEDITED, implicitly
-+	 *   provided by mmdrop(),
-+	 * - a sync_core for SYNC_CORE.
-+	 */
-+	if (mm) {
-+		membarrier_mm_sync_core_before_usermode(mm);
-+		mmdrop(mm);
-+	}
-+	if (unlikely(prev_state == TASK_DEAD)) {
-+		/*
-+		 * Remove function-return probe instances associated with this
-+		 * task and put them back on the free list.
-+		 */
-+		kprobe_flush_task(prev);
-+
-+		/* Task is done with its stack. */
-+		put_task_stack(prev);
-+
-+		put_task_struct_rcu_user(prev);
-+	}
-+
-+	tick_nohz_task_switch();
-+	return rq;
-+}
-+
-+/**
-+ * schedule_tail - first thing a freshly forked thread must call.
-+ * @prev: the thread we just switched away from.
-+ */
-+asmlinkage __visible void schedule_tail(struct task_struct *prev)
-+	__releases(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	/*
-+	 * New tasks start with FORK_PREEMPT_COUNT, see there and
-+	 * finish_task_switch() for details.
-+	 *
-+	 * finish_task_switch() will drop rq->lock() and lower preempt_count
-+	 * and the preempt_enable() will end up enabling preemption (on
-+	 * PREEMPT_COUNT kernels).
-+	 */
-+
-+	rq = finish_task_switch(prev);
-+	preempt_enable();
-+
-+	if (current->set_child_tid)
-+		put_user(task_pid_vnr(current), current->set_child_tid);
-+
-+	calculate_sigpending();
-+}
-+
-+/*
-+ * context_switch - switch to the new MM and the new thread's register state.
-+ */
-+static __always_inline struct rq *
-+context_switch(struct rq *rq, struct task_struct *prev,
-+	       struct task_struct *next)
-+{
-+	prepare_task_switch(rq, prev, next);
-+
-+	/*
-+	 * For paravirt, this is coupled with an exit in switch_to to
-+	 * combine the page table reload and the switch backend into
-+	 * one hypercall.
-+	 */
-+	arch_start_context_switch(prev);
-+
-+	/*
-+	 * kernel -> kernel   lazy + transfer active
-+	 *   user -> kernel   lazy + mmgrab() active
-+	 *
-+	 * kernel ->   user   switch + mmdrop() active
-+	 *   user ->   user   switch
-+	 */
-+	if (!next->mm) {                                // to kernel
-+		enter_lazy_tlb(prev->active_mm, next);
-+
-+		next->active_mm = prev->active_mm;
-+		if (prev->mm)                           // from user
-+			mmgrab(prev->active_mm);
-+		else
-+			prev->active_mm = NULL;
-+	} else {                                        // to user
-+		membarrier_switch_mm(rq, prev->active_mm, next->mm);
-+		/*
-+		 * sys_membarrier() requires an smp_mb() between setting
-+		 * rq->curr / membarrier_switch_mm() and returning to userspace.
-+		 *
-+		 * The below provides this either through switch_mm(), or in
-+		 * case 'prev->active_mm == next->mm' through
-+		 * finish_task_switch()'s mmdrop().
-+		 */
-+		switch_mm_irqs_off(prev->active_mm, next->mm, next);
-+
-+		if (!prev->mm) {                        // from kernel
-+			/* will mmdrop() in finish_task_switch(). */
-+			rq->prev_mm = prev->active_mm;
-+			prev->active_mm = NULL;
-+		}
-+	}
-+
-+	prepare_lock_switch(rq, next);
-+
-+	/* Here we just switch the register state and the stack. */
-+	switch_to(prev, next, prev);
-+	barrier();
-+
-+	return finish_task_switch(prev);
-+}
-+
-+/*
-+ * nr_running, nr_uninterruptible and nr_context_switches:
-+ *
-+ * externally visible scheduler statistics: current number of runnable
-+ * threads, total number of context switches performed since bootup.
-+ */
-+unsigned long nr_running(void)
-+{
-+	unsigned long i, sum = 0;
-+
-+	for_each_online_cpu(i)
-+		sum += cpu_rq(i)->nr_running;
-+
-+	return sum;
-+}
-+
-+/*
-+ * Check if only the current task is running on the CPU.
-+ *
-+ * Caution: this function does not check that the caller has disabled
-+ * preemption, thus the result might have a time-of-check-to-time-of-use
-+ * race.  The caller is responsible to use it correctly, for example:
-+ *
-+ * - from a non-preemptible section (of course)
-+ *
-+ * - from a thread that is bound to a single CPU
-+ *
-+ * - in a loop with very short iterations (e.g. a polling loop)
-+ */
-+bool single_task_running(void)
-+{
-+	return raw_rq()->nr_running == 1;
-+}
-+EXPORT_SYMBOL(single_task_running);
-+
-+unsigned long long nr_context_switches(void)
-+{
-+	int i;
-+	unsigned long long sum = 0;
-+
-+	for_each_possible_cpu(i)
-+		sum += cpu_rq(i)->nr_switches;
-+
-+	return sum;
-+}
-+
-+/*
-+ * Consumers of these two interfaces, like for example the cpuidle menu
-+ * governor, are using nonsensical data. Preferring shallow idle state selection
-+ * for a CPU that has IO-wait which might not even end up running the task when
-+ * it does become runnable.
-+ */
-+
-+unsigned long nr_iowait_cpu(int cpu)
-+{
-+	return atomic_read(&cpu_rq(cpu)->nr_iowait);
-+}
-+
-+/*
-+ * IO-wait accounting, and how its mostly bollocks (on SMP).
-+ *
-+ * The idea behind IO-wait account is to account the idle time that we could
-+ * have spend running if it were not for IO. That is, if we were to improve the
-+ * storage performance, we'd have a proportional reduction in IO-wait time.
-+ *
-+ * This all works nicely on UP, where, when a task blocks on IO, we account
-+ * idle time as IO-wait, because if the storage were faster, it could've been
-+ * running and we'd not be idle.
-+ *
-+ * This has been extended to SMP, by doing the same for each CPU. This however
-+ * is broken.
-+ *
-+ * Imagine for instance the case where two tasks block on one CPU, only the one
-+ * CPU will have IO-wait accounted, while the other has regular idle. Even
-+ * though, if the storage were faster, both could've ran at the same time,
-+ * utilising both CPUs.
-+ *
-+ * This means, that when looking globally, the current IO-wait accounting on
-+ * SMP is a lower bound, by reason of under accounting.
-+ *
-+ * Worse, since the numbers are provided per CPU, they are sometimes
-+ * interpreted per CPU, and that is nonsensical. A blocked task isn't strictly
-+ * associated with any one particular CPU, it can wake to another CPU than it
-+ * blocked on. This means the per CPU IO-wait number is meaningless.
-+ *
-+ * Task CPU affinities can make all that even more 'interesting'.
-+ */
-+
-+unsigned long nr_iowait(void)
-+{
-+	unsigned long i, sum = 0;
-+
-+	for_each_possible_cpu(i)
-+		sum += nr_iowait_cpu(i);
-+
-+	return sum;
-+}
-+
-+#ifdef CONFIG_SMP
-+
-+/*
-+ * sched_exec - execve() is a valuable balancing opportunity, because at
-+ * this point the task has the smallest effective memory and cache
-+ * footprint.
-+ */
-+void sched_exec(void)
-+{
-+	struct task_struct *p = current;
-+	unsigned long flags;
-+	int dest_cpu;
-+	struct rq *rq;
-+
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	rq = this_rq();
-+
-+	if (rq != task_rq(p) || rq->nr_running < 2)
-+		goto unlock;
-+
-+	dest_cpu = select_task_rq(p, task_rq(p));
-+	if (dest_cpu == smp_processor_id())
-+		goto unlock;
-+
-+	if (likely(cpu_active(dest_cpu))) {
-+		struct migration_arg arg = { p, dest_cpu };
-+
-+		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+		stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg);
-+		return;
-+	}
-+unlock:
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+}
-+
-+#endif
-+
-+DEFINE_PER_CPU(struct kernel_stat, kstat);
-+DEFINE_PER_CPU(struct kernel_cpustat, kernel_cpustat);
-+
-+EXPORT_PER_CPU_SYMBOL(kstat);
-+EXPORT_PER_CPU_SYMBOL(kernel_cpustat);
-+
-+static inline void update_curr(struct rq *rq, struct task_struct *p)
-+{
-+	s64 ns = rq->clock_task - p->last_ran;
-+
-+	p->sched_time += ns;
-+	account_group_exec_runtime(p, ns);
-+
-+	p->time_slice -= ns;
-+	p->last_ran = rq->clock_task;
-+}
-+
-+/*
-+ * Return accounted runtime for the task.
-+ * Return separately the current's pending runtime that have not been
-+ * accounted yet.
-+ */
-+unsigned long long task_sched_runtime(struct task_struct *p)
-+{
-+	unsigned long flags;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+	u64 ns;
-+
-+#if defined(CONFIG_64BIT) && defined(CONFIG_SMP)
-+	/*
-+	 * 64-bit doesn't need locks to atomically read a 64-bit value.
-+	 * So we have a optimization chance when the task's delta_exec is 0.
-+	 * Reading ->on_cpu is racy, but this is ok.
-+	 *
-+	 * If we race with it leaving CPU, we'll take a lock. So we're correct.
-+	 * If we race with it entering CPU, unaccounted time is 0. This is
-+	 * indistinguishable from the read occurring a few cycles earlier.
-+	 * If we see ->on_cpu without ->on_rq, the task is leaving, and has
-+	 * been accounted, so we're correct here as well.
-+	 */
-+	if (!p->on_cpu || !task_on_rq_queued(p))
-+		return tsk_seruntime(p);
-+#endif
-+
-+	rq = task_access_lock_irqsave(p, &lock, &flags);
-+	/*
-+	 * Must be ->curr _and_ ->on_rq.  If dequeued, we would
-+	 * project cycles that may never be accounted to this
-+	 * thread, breaking clock_gettime().
-+	 */
-+	if (p == rq->curr && task_on_rq_queued(p)) {
-+		update_rq_clock(rq);
-+		update_curr(rq, p);
-+	}
-+	ns = tsk_seruntime(p);
-+	task_access_unlock_irqrestore(p, lock, &flags);
-+
-+	return ns;
-+}
-+
-+/* This manages tasks that have run out of timeslice during a scheduler_tick */
-+static inline void scheduler_task_tick(struct rq *rq)
-+{
-+	struct task_struct *p = rq->curr;
-+
-+	if (is_idle_task(p))
-+		return;
-+
-+	update_curr(rq, p);
-+	cpufreq_update_util(rq, 0);
-+
-+	/*
-+	 * Tasks have less than RESCHED_NS of time slice left they will be
-+	 * rescheduled.
-+	 */
-+	if (p->time_slice >= RESCHED_NS)
-+		return;
-+	set_tsk_need_resched(p);
-+	set_preempt_need_resched();
-+}
-+
-+/*
-+ * This function gets called by the timer code, with HZ frequency.
-+ * We call it with interrupts disabled.
-+ */
-+void scheduler_tick(void)
-+{
-+	int cpu __maybe_unused = smp_processor_id();
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	arch_scale_freq_tick();
-+	sched_clock_tick();
-+
-+	raw_spin_lock(&rq->lock);
-+	update_rq_clock(rq);
-+
-+	scheduler_task_tick(rq);
-+	calc_global_load_tick(rq);
-+	psi_task_tick(rq);
-+
-+	rq->last_tick = rq->clock;
-+	raw_spin_unlock(&rq->lock);
-+
-+	perf_event_task_tick();
-+}
-+
-+#ifdef CONFIG_SCHED_SMT
-+static inline int active_load_balance_cpu_stop(void *data)
-+{
-+	struct rq *rq = this_rq();
-+	struct task_struct *p = data;
-+	cpumask_t tmp;
-+	unsigned long flags;
-+
-+	local_irq_save(flags);
-+
-+	raw_spin_lock(&p->pi_lock);
-+	raw_spin_lock(&rq->lock);
-+
-+	rq->active_balance = 0;
-+	/* _something_ may have changed the task, double check again */
-+	if (task_on_rq_queued(p) && task_rq(p) == rq &&
-+	    cpumask_and(&tmp, p->cpus_ptr, &sched_sg_idle_mask)) {
-+		int cpu = cpu_of(rq);
-+		int dcpu = __best_mask_cpu(cpu, &tmp,
-+					   per_cpu(sched_cpu_llc_mask, cpu));
-+		rq = move_queued_task(rq, p, dcpu);
-+	}
-+
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock(&p->pi_lock);
-+
-+	local_irq_restore(flags);
-+
-+	return 0;
-+}
-+
-+/* sg_balance_trigger - trigger slibing group balance for @cpu */
-+static inline int sg_balance_trigger(const int cpu)
-+{
-+	struct rq *rq= cpu_rq(cpu);
-+	unsigned long flags;
-+	struct task_struct *curr;
-+	int res;
-+
-+	if (!raw_spin_trylock_irqsave(&rq->lock, flags))
-+		return 0;
-+	curr = rq->curr;
-+	res = (!is_idle_task(curr)) && (1 == rq->nr_running) &&\
-+	      cpumask_intersects(curr->cpus_ptr, &sched_sg_idle_mask) &&\
-+	      (!rq->active_balance);
-+
-+	if (res)
-+		rq->active_balance = 1;
-+
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+
-+	if (res)
-+		stop_one_cpu_nowait(cpu, active_load_balance_cpu_stop,
-+				    curr, &rq->active_balance_work);
-+	return res;
-+}
-+
-+/*
-+ * sg_balance_check - slibing group balance check for run queue @rq
-+ */
-+static inline void sg_balance_check(struct rq *rq)
-+{
-+	cpumask_t chk;
-+	int cpu;
-+
-+	/* exit when no sg in idle */
-+	if (cpumask_empty(&sched_sg_idle_mask))
-+		return;
-+
-+	cpu = cpu_of(rq);
-+	/*
-+	 * Only cpu in slibing idle group will do the checking and then
-+	 * find potential cpus which can migrate the current running task
-+	 */
-+	if (cpumask_test_cpu(cpu, &sched_sg_idle_mask) &&
-+	    cpumask_andnot(&chk, cpu_online_mask, &sched_rq_pending_mask) &&
-+	    cpumask_andnot(&chk, &chk, &sched_rq_watermark[IDLE_WM])) {
-+		int i, tried = 0;
-+
-+		for_each_cpu_wrap(i, &chk, cpu) {
-+			if (cpumask_subset(cpu_smt_mask(i), &chk)) {
-+				if (sg_balance_trigger(i))
-+					return;
-+				if (tried)
-+					return;
-+				tried++;
-+			}
-+		}
-+	}
-+}
-+#endif /* CONFIG_SCHED_SMT */
-+
-+#ifdef CONFIG_NO_HZ_FULL
-+
-+struct tick_work {
-+	int			cpu;
-+	atomic_t		state;
-+	struct delayed_work	work;
-+};
-+/* Values for ->state, see diagram below. */
-+#define TICK_SCHED_REMOTE_OFFLINE	0
-+#define TICK_SCHED_REMOTE_OFFLINING	1
-+#define TICK_SCHED_REMOTE_RUNNING	2
-+
-+/*
-+ * State diagram for ->state:
-+ *
-+ *
-+ *          TICK_SCHED_REMOTE_OFFLINE
-+ *                    |   ^
-+ *                    |   |
-+ *                    |   | sched_tick_remote()
-+ *                    |   |
-+ *                    |   |
-+ *                    +--TICK_SCHED_REMOTE_OFFLINING
-+ *                    |   ^
-+ *                    |   |
-+ * sched_tick_start() |   | sched_tick_stop()
-+ *                    |   |
-+ *                    V   |
-+ *          TICK_SCHED_REMOTE_RUNNING
-+ *
-+ *
-+ * Other transitions get WARN_ON_ONCE(), except that sched_tick_remote()
-+ * and sched_tick_start() are happy to leave the state in RUNNING.
-+ */
-+
-+static struct tick_work __percpu *tick_work_cpu;
-+
-+static void sched_tick_remote(struct work_struct *work)
-+{
-+	struct delayed_work *dwork = to_delayed_work(work);
-+	struct tick_work *twork = container_of(dwork, struct tick_work, work);
-+	int cpu = twork->cpu;
-+	struct rq *rq = cpu_rq(cpu);
-+	struct task_struct *curr;
-+	unsigned long flags;
-+	u64 delta;
-+	int os;
-+
-+	/*
-+	 * Handle the tick only if it appears the remote CPU is running in full
-+	 * dynticks mode. The check is racy by nature, but missing a tick or
-+	 * having one too much is no big deal because the scheduler tick updates
-+	 * statistics and checks timeslices in a time-independent way, regardless
-+	 * of when exactly it is running.
-+	 */
-+	if (!tick_nohz_tick_stopped_cpu(cpu))
-+		goto out_requeue;
-+
-+	raw_spin_lock_irqsave(&rq->lock, flags);
-+	curr = rq->curr;
-+	if (cpu_is_offline(cpu))
-+		goto out_unlock;
-+
-+	update_rq_clock(rq);
-+	if (!is_idle_task(curr)) {
-+		/*
-+		 * Make sure the next tick runs within a reasonable
-+		 * amount of time.
-+		 */
-+		delta = rq_clock_task(rq) - curr->last_ran;
-+		WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
-+	}
-+	scheduler_task_tick(rq);
-+
-+	calc_load_nohz_remote(rq);
-+out_unlock:
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+
-+out_requeue:
-+	/*
-+	 * Run the remote tick once per second (1Hz). This arbitrary
-+	 * frequency is large enough to avoid overload but short enough
-+	 * to keep scheduler internal stats reasonably up to date.  But
-+	 * first update state to reflect hotplug activity if required.
-+	 */
-+	os = atomic_fetch_add_unless(&twork->state, -1, TICK_SCHED_REMOTE_RUNNING);
-+	WARN_ON_ONCE(os == TICK_SCHED_REMOTE_OFFLINE);
-+	if (os == TICK_SCHED_REMOTE_RUNNING)
-+		queue_delayed_work(system_unbound_wq, dwork, HZ);
-+}
-+
-+static void sched_tick_start(int cpu)
-+{
-+	int os;
-+	struct tick_work *twork;
-+
-+	if (housekeeping_cpu(cpu, HK_FLAG_TICK))
-+		return;
-+
-+	WARN_ON_ONCE(!tick_work_cpu);
-+
-+	twork = per_cpu_ptr(tick_work_cpu, cpu);
-+	os = atomic_xchg(&twork->state, TICK_SCHED_REMOTE_RUNNING);
-+	WARN_ON_ONCE(os == TICK_SCHED_REMOTE_RUNNING);
-+	if (os == TICK_SCHED_REMOTE_OFFLINE) {
-+		twork->cpu = cpu;
-+		INIT_DELAYED_WORK(&twork->work, sched_tick_remote);
-+		queue_delayed_work(system_unbound_wq, &twork->work, HZ);
-+	}
-+}
-+
-+#ifdef CONFIG_HOTPLUG_CPU
-+static void sched_tick_stop(int cpu)
-+{
-+	struct tick_work *twork;
-+
-+	if (housekeeping_cpu(cpu, HK_FLAG_TICK))
-+		return;
-+
-+	WARN_ON_ONCE(!tick_work_cpu);
-+
-+	twork = per_cpu_ptr(tick_work_cpu, cpu);
-+	cancel_delayed_work_sync(&twork->work);
-+}
-+#endif /* CONFIG_HOTPLUG_CPU */
-+
-+int __init sched_tick_offload_init(void)
-+{
-+	tick_work_cpu = alloc_percpu(struct tick_work);
-+	BUG_ON(!tick_work_cpu);
-+	return 0;
-+}
-+
-+#else /* !CONFIG_NO_HZ_FULL */
-+static inline void sched_tick_start(int cpu) { }
-+static inline void sched_tick_stop(int cpu) { }
-+#endif
-+
-+#if defined(CONFIG_PREEMPTION) && (defined(CONFIG_DEBUG_PREEMPT) || \
-+				defined(CONFIG_PREEMPT_TRACER))
-+/*
-+ * If the value passed in is equal to the current preempt count
-+ * then we just disabled preemption. Start timing the latency.
-+ */
-+static inline void preempt_latency_start(int val)
-+{
-+	if (preempt_count() == val) {
-+		unsigned long ip = get_lock_parent_ip();
-+#ifdef CONFIG_DEBUG_PREEMPT
-+		current->preempt_disable_ip = ip;
-+#endif
-+		trace_preempt_off(CALLER_ADDR0, ip);
-+	}
-+}
-+
-+void preempt_count_add(int val)
-+{
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	/*
-+	 * Underflow?
-+	 */
-+	if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
-+		return;
-+#endif
-+	__preempt_count_add(val);
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	/*
-+	 * Spinlock count overflowing soon?
-+	 */
-+	DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
-+				PREEMPT_MASK - 10);
-+#endif
-+	preempt_latency_start(val);
-+}
-+EXPORT_SYMBOL(preempt_count_add);
-+NOKPROBE_SYMBOL(preempt_count_add);
-+
-+/*
-+ * If the value passed in equals to the current preempt count
-+ * then we just enabled preemption. Stop timing the latency.
-+ */
-+static inline void preempt_latency_stop(int val)
-+{
-+	if (preempt_count() == val)
-+		trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
-+}
-+
-+void preempt_count_sub(int val)
-+{
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	/*
-+	 * Underflow?
-+	 */
-+	if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
-+		return;
-+	/*
-+	 * Is the spinlock portion underflowing?
-+	 */
-+	if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) &&
-+			!(preempt_count() & PREEMPT_MASK)))
-+		return;
-+#endif
-+
-+	preempt_latency_stop(val);
-+	__preempt_count_sub(val);
-+}
-+EXPORT_SYMBOL(preempt_count_sub);
-+NOKPROBE_SYMBOL(preempt_count_sub);
-+
-+#else
-+static inline void preempt_latency_start(int val) { }
-+static inline void preempt_latency_stop(int val) { }
-+#endif
-+
-+static inline unsigned long get_preempt_disable_ip(struct task_struct *p)
-+{
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	return p->preempt_disable_ip;
-+#else
-+	return 0;
-+#endif
-+}
-+
-+/*
-+ * Print scheduling while atomic bug:
-+ */
-+static noinline void __schedule_bug(struct task_struct *prev)
-+{
-+	/* Save this before calling printk(), since that will clobber it */
-+	unsigned long preempt_disable_ip = get_preempt_disable_ip(current);
-+
-+	if (oops_in_progress)
-+		return;
-+
-+	printk(KERN_ERR "BUG: scheduling while atomic: %s/%d/0x%08x\n",
-+		prev->comm, prev->pid, preempt_count());
-+
-+	debug_show_held_locks(prev);
-+	print_modules();
-+	if (irqs_disabled())
-+		print_irqtrace_events(prev);
-+	if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)
-+	    && in_atomic_preempt_off()) {
-+		pr_err("Preemption disabled at:");
-+		print_ip_sym(KERN_ERR, preempt_disable_ip);
-+	}
-+	if (panic_on_warn)
-+		panic("scheduling while atomic\n");
-+
-+	dump_stack();
-+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+}
-+
-+/*
-+ * Various schedule()-time debugging checks and statistics:
-+ */
-+static inline void schedule_debug(struct task_struct *prev, bool preempt)
-+{
-+#ifdef CONFIG_SCHED_STACK_END_CHECK
-+	if (task_stack_end_corrupted(prev))
-+		panic("corrupted stack end detected inside scheduler\n");
-+
-+	if (task_scs_end_corrupted(prev))
-+		panic("corrupted shadow stack detected inside scheduler\n");
-+#endif
-+
-+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-+	if (!preempt && prev->state && prev->non_block_count) {
-+		printk(KERN_ERR "BUG: scheduling in a non-blocking section: %s/%d/%i\n",
-+			prev->comm, prev->pid, prev->non_block_count);
-+		dump_stack();
-+		add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+	}
-+#endif
-+
-+	if (unlikely(in_atomic_preempt_off())) {
-+		__schedule_bug(prev);
-+		preempt_count_set(PREEMPT_DISABLED);
-+	}
-+	rcu_sleep_check();
-+
-+	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
-+
-+	schedstat_inc(this_rq()->sched_count);
-+}
-+
-+/*
-+ * Compile time debug macro
-+ * #define ALT_SCHED_DEBUG
-+ */
-+
-+#ifdef ALT_SCHED_DEBUG
-+void alt_sched_debug(void)
-+{
-+	printk(KERN_INFO "sched: pending: 0x%04lx, idle: 0x%04lx, sg_idle: 0x%04lx\n",
-+	       sched_rq_pending_mask.bits[0],
-+	       sched_rq_watermark[IDLE_WM].bits[0],
-+	       sched_sg_idle_mask.bits[0]);
-+}
-+#else
-+inline void alt_sched_debug(void) {}
-+#endif
-+
-+#ifdef	CONFIG_SMP
-+
-+#define SCHED_RQ_NR_MIGRATION (32UL)
-+/*
-+ * Migrate pending tasks in @rq to @dest_cpu
-+ * Will try to migrate mininal of half of @rq nr_running tasks and
-+ * SCHED_RQ_NR_MIGRATION to @dest_cpu
-+ */
-+static inline int
-+migrate_pending_tasks(struct rq *rq, struct rq *dest_rq, const int dest_cpu)
-+{
-+	struct task_struct *p, *skip = rq->curr;
-+	int nr_migrated = 0;
-+	int nr_tries = min(rq->nr_running / 2, SCHED_RQ_NR_MIGRATION);
-+
-+	while (skip != rq->idle && nr_tries &&
-+	       (p = sched_rq_next_task(skip, rq)) != rq->idle) {
-+		skip = sched_rq_next_task(p, rq);
-+		if (cpumask_test_cpu(dest_cpu, p->cpus_ptr)) {
-+			__SCHED_DEQUEUE_TASK(p, rq, 0, );
-+			set_task_cpu(p, dest_cpu);
-+			__SCHED_ENQUEUE_TASK(p, dest_rq, 0);
-+			nr_migrated++;
-+		}
-+		nr_tries--;
-+	}
-+
-+	return nr_migrated;
-+}
-+
-+static inline int take_other_rq_tasks(struct rq *rq, int cpu)
-+{
-+	struct cpumask *affinity_mask, *end_mask;
-+
-+	if (unlikely(!rq->online))
-+		return 0;
-+
-+	if (cpumask_empty(&sched_rq_pending_mask))
-+		return 0;
-+
-+	affinity_mask = &(per_cpu(sched_cpu_affinity_masks, cpu)[0]);
-+	end_mask = per_cpu(sched_cpu_affinity_end_mask, cpu);
-+	do {
-+		int i;
-+		for_each_cpu_and(i, &sched_rq_pending_mask, affinity_mask) {
-+			int nr_migrated;
-+			struct rq *src_rq;
-+
-+			src_rq = cpu_rq(i);
-+			if (!do_raw_spin_trylock(&src_rq->lock))
-+				continue;
-+			spin_acquire(&src_rq->lock.dep_map,
-+				     SINGLE_DEPTH_NESTING, 1, _RET_IP_);
-+
-+			if ((nr_migrated = migrate_pending_tasks(src_rq, rq, cpu))) {
-+				src_rq->nr_running -= nr_migrated;
-+#ifdef CONFIG_SMP
-+				if (src_rq->nr_running < 2)
-+					cpumask_clear_cpu(i, &sched_rq_pending_mask);
-+#endif
-+				rq->nr_running += nr_migrated;
-+#ifdef CONFIG_SMP
-+				if (rq->nr_running > 1)
-+					cpumask_set_cpu(cpu, &sched_rq_pending_mask);
-+#endif
-+				update_sched_rq_watermark(rq);
-+				cpufreq_update_util(rq, 0);
-+
-+				spin_release(&src_rq->lock.dep_map, _RET_IP_);
-+				do_raw_spin_unlock(&src_rq->lock);
-+
-+				return 1;
-+			}
-+
-+			spin_release(&src_rq->lock.dep_map, _RET_IP_);
-+			do_raw_spin_unlock(&src_rq->lock);
-+		}
-+	} while (++affinity_mask < end_mask);
-+
-+	return 0;
-+}
-+#endif
-+
-+/*
-+ * Timeslices below RESCHED_NS are considered as good as expired as there's no
-+ * point rescheduling when there's so little time left.
-+ */
-+static inline void check_curr(struct task_struct *p, struct rq *rq)
-+{
-+	if (unlikely(rq->idle == p))
-+		return;
-+
-+	update_curr(rq, p);
-+
-+	if (p->time_slice < RESCHED_NS)
-+		time_slice_expired(p, rq);
-+}
-+
-+static inline struct task_struct *
-+choose_next_task(struct rq *rq, int cpu, struct task_struct *prev)
-+{
-+	struct task_struct *next;
-+
-+	if (unlikely(rq->skip)) {
-+		next = rq_runnable_task(rq);
-+		if (next == rq->idle) {
-+#ifdef	CONFIG_SMP
-+			if (!take_other_rq_tasks(rq, cpu)) {
-+#endif
-+				rq->skip = NULL;
-+				schedstat_inc(rq->sched_goidle);
-+				return next;
-+#ifdef	CONFIG_SMP
-+			}
-+			next = rq_runnable_task(rq);
-+#endif
-+		}
-+		rq->skip = NULL;
-+#ifdef CONFIG_HIGH_RES_TIMERS
-+		hrtick_start(rq, next->time_slice);
-+#endif
-+		return next;
-+	}
-+
-+	next = sched_rq_first_task(rq);
-+	if (next == rq->idle) {
-+#ifdef	CONFIG_SMP
-+		if (!take_other_rq_tasks(rq, cpu)) {
-+#endif
-+			schedstat_inc(rq->sched_goidle);
-+			/*printk(KERN_INFO "sched: choose_next_task(%d) idle %px\n", cpu, next);*/
-+			return next;
-+#ifdef	CONFIG_SMP
-+		}
-+		next = sched_rq_first_task(rq);
-+#endif
-+	}
-+#ifdef CONFIG_HIGH_RES_TIMERS
-+	hrtick_start(rq, next->time_slice);
-+#endif
-+	/*printk(KERN_INFO "sched: choose_next_task(%d) next %px\n", cpu,
-+	 * next);*/
-+	return next;
-+}
-+
-+/*
-+ * schedule() is the main scheduler function.
-+ *
-+ * The main means of driving the scheduler and thus entering this function are:
-+ *
-+ *   1. Explicit blocking: mutex, semaphore, waitqueue, etc.
-+ *
-+ *   2. TIF_NEED_RESCHED flag is checked on interrupt and userspace return
-+ *      paths. For example, see arch/x86/entry_64.S.
-+ *
-+ *      To drive preemption between tasks, the scheduler sets the flag in timer
-+ *      interrupt handler scheduler_tick().
-+ *
-+ *   3. Wakeups don't really cause entry into schedule(). They add a
-+ *      task to the run-queue and that's it.
-+ *
-+ *      Now, if the new task added to the run-queue preempts the current
-+ *      task, then the wakeup sets TIF_NEED_RESCHED and schedule() gets
-+ *      called on the nearest possible occasion:
-+ *
-+ *       - If the kernel is preemptible (CONFIG_PREEMPTION=y):
-+ *
-+ *         - in syscall or exception context, at the next outmost
-+ *           preempt_enable(). (this might be as soon as the wake_up()'s
-+ *           spin_unlock()!)
-+ *
-+ *         - in IRQ context, return from interrupt-handler to
-+ *           preemptible context
-+ *
-+ *       - If the kernel is not preemptible (CONFIG_PREEMPTION is not set)
-+ *         then at the next:
-+ *
-+ *          - cond_resched() call
-+ *          - explicit schedule() call
-+ *          - return from syscall or exception to user-space
-+ *          - return from interrupt-handler to user-space
-+ *
-+ * WARNING: must be called with preemption disabled!
-+ */
-+static void __sched notrace __schedule(bool preempt)
-+{
-+	struct task_struct *prev, *next;
-+	unsigned long *switch_count;
-+	unsigned long prev_state;
-+	struct rq *rq;
-+	int cpu;
-+
-+	cpu = smp_processor_id();
-+	rq = cpu_rq(cpu);
-+	prev = rq->curr;
-+
-+	schedule_debug(prev, preempt);
-+
-+	/* by passing sched_feat(HRTICK) checking which Alt schedule FW doesn't support */
-+	hrtick_clear(rq);
-+
-+	local_irq_disable();
-+	rcu_note_context_switch(preempt);
-+
-+	/*
-+	 * Make sure that signal_pending_state()->signal_pending() below
-+	 * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
-+	 * done by the caller to avoid the race with signal_wake_up():
-+	 *
-+	 * __set_current_state(@state)		signal_wake_up()
-+	 * schedule()				  set_tsk_thread_flag(p, TIF_SIGPENDING)
-+	 *					  wake_up_state(p, state)
-+	 *   LOCK rq->lock			    LOCK p->pi_state
-+	 *   smp_mb__after_spinlock()		    smp_mb__after_spinlock()
-+	 *     if (signal_pending_state())	    if (p->state & @state)
-+	 *
-+	 * Also, the membarrier system call requires a full memory barrier
-+	 * after coming from user-space, before storing to rq->curr.
-+	 */
-+	raw_spin_lock(&rq->lock);
-+	smp_mb__after_spinlock();
-+
-+	update_rq_clock(rq);
-+
-+	switch_count = &prev->nivcsw;
-+	/*
-+	 * We must load prev->state once (task_struct::state is volatile), such
-+	 * that:
-+	 *
-+	 *  - we form a control dependency vs deactivate_task() below.
-+	 *  - ptrace_{,un}freeze_traced() can change ->state underneath us.
-+	 */
-+	prev_state = prev->state;
-+	if (!preempt && prev_state && prev_state == prev->state) {
-+		if (signal_pending_state(prev_state, prev)) {
-+			prev->state = TASK_RUNNING;
-+		} else {
-+			prev->sched_contributes_to_load =
-+				(prev_state & TASK_UNINTERRUPTIBLE) &&
-+				!(prev_state & TASK_NOLOAD) &&
-+				!(prev->flags & PF_FROZEN);
-+
-+			if (prev->sched_contributes_to_load)
-+				rq->nr_uninterruptible++;
-+
-+			/*
-+			 * __schedule()			ttwu()
-+			 *   prev_state = prev->state;    if (p->on_rq && ...)
-+			 *   if (prev_state)		    goto out;
-+			 *     p->on_rq = 0;		  smp_acquire__after_ctrl_dep();
-+			 *				  p->state = TASK_WAKING
-+			 *
-+			 * Where __schedule() and ttwu() have matching control dependencies.
-+			 *
-+			 * After this, schedule() must not care about p->state any more.
-+			 */
-+			sched_task_deactivate(prev, rq);
-+			deactivate_task(prev, rq);
-+
-+			if (prev->in_iowait) {
-+				atomic_inc(&rq->nr_iowait);
-+				delayacct_blkio_start();
-+			}
-+		}
-+		switch_count = &prev->nvcsw;
-+	}
-+
-+	check_curr(prev, rq);
-+
-+	next = choose_next_task(rq, cpu, prev);
-+	clear_tsk_need_resched(prev);
-+	clear_preempt_need_resched();
-+
-+
-+	if (likely(prev != next)) {
-+		next->last_ran = rq->clock_task;
-+		rq->last_ts_switch = rq->clock;
-+
-+		rq->nr_switches++;
-+		/*
-+		 * RCU users of rcu_dereference(rq->curr) may not see
-+		 * changes to task_struct made by pick_next_task().
-+		 */
-+		RCU_INIT_POINTER(rq->curr, next);
-+		/*
-+		 * The membarrier system call requires each architecture
-+		 * to have a full memory barrier after updating
-+		 * rq->curr, before returning to user-space.
-+		 *
-+		 * Here are the schemes providing that barrier on the
-+		 * various architectures:
-+		 * - mm ? switch_mm() : mmdrop() for x86, s390, sparc, PowerPC.
-+		 *   switch_mm() rely on membarrier_arch_switch_mm() on PowerPC.
-+		 * - finish_lock_switch() for weakly-ordered
-+		 *   architectures where spin_unlock is a full barrier,
-+		 * - switch_to() for arm64 (weakly-ordered, spin_unlock
-+		 *   is a RELEASE barrier),
-+		 */
-+		++*switch_count;
-+
-+		psi_sched_switch(prev, next, !task_on_rq_queued(prev));
-+
-+		trace_sched_switch(preempt, prev, next);
-+
-+		/* Also unlocks the rq: */
-+		rq = context_switch(rq, prev, next);
-+	} else
-+		raw_spin_unlock_irq(&rq->lock);
-+
-+#ifdef CONFIG_SCHED_SMT
-+	sg_balance_check(rq);
-+#endif
-+}
-+
-+void __noreturn do_task_dead(void)
-+{
-+	/* Causes final put_task_struct in finish_task_switch(): */
-+	set_special_state(TASK_DEAD);
-+
-+	/* Tell freezer to ignore us: */
-+	current->flags |= PF_NOFREEZE;
-+
-+	__schedule(false);
-+	BUG();
-+
-+	/* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */
-+	for (;;)
-+		cpu_relax();
-+}
-+
-+static inline void sched_submit_work(struct task_struct *tsk)
-+{
-+	if (!tsk->state)
-+		return;
-+
-+	/*
-+	 * If a worker went to sleep, notify and ask workqueue whether
-+	 * it wants to wake up a task to maintain concurrency.
-+	 * As this function is called inside the schedule() context,
-+	 * we disable preemption to avoid it calling schedule() again
-+	 * in the possible wakeup of a kworker and because wq_worker_sleeping()
-+	 * requires it.
-+	 */
-+	if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) {
-+		preempt_disable();
-+		if (tsk->flags & PF_WQ_WORKER)
-+			wq_worker_sleeping(tsk);
-+		else
-+			io_wq_worker_sleeping(tsk);
-+		preempt_enable_no_resched();
-+	}
-+
-+	if (tsk_is_pi_blocked(tsk))
-+		return;
-+
-+	/*
-+	 * If we are going to sleep and we have plugged IO queued,
-+	 * make sure to submit it to avoid deadlocks.
-+	 */
-+	if (blk_needs_flush_plug(tsk))
-+		blk_schedule_flush_plug(tsk);
-+}
-+
-+static void sched_update_worker(struct task_struct *tsk)
-+{
-+	if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) {
-+		if (tsk->flags & PF_WQ_WORKER)
-+			wq_worker_running(tsk);
-+		else
-+			io_wq_worker_running(tsk);
-+	}
-+}
-+
-+asmlinkage __visible void __sched schedule(void)
-+{
-+	struct task_struct *tsk = current;
-+
-+	sched_submit_work(tsk);
-+	do {
-+		preempt_disable();
-+		__schedule(false);
-+		sched_preempt_enable_no_resched();
-+	} while (need_resched());
-+	sched_update_worker(tsk);
-+}
-+EXPORT_SYMBOL(schedule);
-+
-+/*
-+ * synchronize_rcu_tasks() makes sure that no task is stuck in preempted
-+ * state (have scheduled out non-voluntarily) by making sure that all
-+ * tasks have either left the run queue or have gone into user space.
-+ * As idle tasks do not do either, they must not ever be preempted
-+ * (schedule out non-voluntarily).
-+ *
-+ * schedule_idle() is similar to schedule_preempt_disable() except that it
-+ * never enables preemption because it does not call sched_submit_work().
-+ */
-+void __sched schedule_idle(void)
-+{
-+	/*
-+	 * As this skips calling sched_submit_work(), which the idle task does
-+	 * regardless because that function is a nop when the task is in a
-+	 * TASK_RUNNING state, make sure this isn't used someplace that the
-+	 * current task can be in any other state. Note, idle is always in the
-+	 * TASK_RUNNING state.
-+	 */
-+	WARN_ON_ONCE(current->state);
-+	do {
-+		__schedule(false);
-+	} while (need_resched());
-+}
-+
-+#ifdef CONFIG_CONTEXT_TRACKING
-+asmlinkage __visible void __sched schedule_user(void)
-+{
-+	/*
-+	 * If we come here after a random call to set_need_resched(),
-+	 * or we have been woken up remotely but the IPI has not yet arrived,
-+	 * we haven't yet exited the RCU idle mode. Do it here manually until
-+	 * we find a better solution.
-+	 *
-+	 * NB: There are buggy callers of this function.  Ideally we
-+	 * should warn if prev_state != CONTEXT_USER, but that will trigger
-+	 * too frequently to make sense yet.
-+	 */
-+	enum ctx_state prev_state = exception_enter();
-+	schedule();
-+	exception_exit(prev_state);
-+}
-+#endif
-+
-+/**
-+ * schedule_preempt_disabled - called with preemption disabled
-+ *
-+ * Returns with preemption disabled. Note: preempt_count must be 1
-+ */
-+void __sched schedule_preempt_disabled(void)
-+{
-+	sched_preempt_enable_no_resched();
-+	schedule();
-+	preempt_disable();
-+}
-+
-+static void __sched notrace preempt_schedule_common(void)
-+{
-+	do {
-+		/*
-+		 * Because the function tracer can trace preempt_count_sub()
-+		 * and it also uses preempt_enable/disable_notrace(), if
-+		 * NEED_RESCHED is set, the preempt_enable_notrace() called
-+		 * by the function tracer will call this function again and
-+		 * cause infinite recursion.
-+		 *
-+		 * Preemption must be disabled here before the function
-+		 * tracer can trace. Break up preempt_disable() into two
-+		 * calls. One to disable preemption without fear of being
-+		 * traced. The other to still record the preemption latency,
-+		 * which can also be traced by the function tracer.
-+		 */
-+		preempt_disable_notrace();
-+		preempt_latency_start(1);
-+		__schedule(true);
-+		preempt_latency_stop(1);
-+		preempt_enable_no_resched_notrace();
-+
-+		/*
-+		 * Check again in case we missed a preemption opportunity
-+		 * between schedule and now.
-+		 */
-+	} while (need_resched());
-+}
-+
-+#ifdef CONFIG_PREEMPTION
-+/*
-+ * This is the entry point to schedule() from in-kernel preemption
-+ * off of preempt_enable.
-+ */
-+asmlinkage __visible void __sched notrace preempt_schedule(void)
-+{
-+	/*
-+	 * If there is a non-zero preempt_count or interrupts are disabled,
-+	 * we do not want to preempt the current task. Just return..
-+	 */
-+	if (likely(!preemptible()))
-+		return;
-+
-+	preempt_schedule_common();
-+}
-+NOKPROBE_SYMBOL(preempt_schedule);
-+EXPORT_SYMBOL(preempt_schedule);
-+
-+/**
-+ * preempt_schedule_notrace - preempt_schedule called by tracing
-+ *
-+ * The tracing infrastructure uses preempt_enable_notrace to prevent
-+ * recursion and tracing preempt enabling caused by the tracing
-+ * infrastructure itself. But as tracing can happen in areas coming
-+ * from userspace or just about to enter userspace, a preempt enable
-+ * can occur before user_exit() is called. This will cause the scheduler
-+ * to be called when the system is still in usermode.
-+ *
-+ * To prevent this, the preempt_enable_notrace will use this function
-+ * instead of preempt_schedule() to exit user context if needed before
-+ * calling the scheduler.
-+ */
-+asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
-+{
-+	enum ctx_state prev_ctx;
-+
-+	if (likely(!preemptible()))
-+		return;
-+
-+	do {
-+		/*
-+		 * Because the function tracer can trace preempt_count_sub()
-+		 * and it also uses preempt_enable/disable_notrace(), if
-+		 * NEED_RESCHED is set, the preempt_enable_notrace() called
-+		 * by the function tracer will call this function again and
-+		 * cause infinite recursion.
-+		 *
-+		 * Preemption must be disabled here before the function
-+		 * tracer can trace. Break up preempt_disable() into two
-+		 * calls. One to disable preemption without fear of being
-+		 * traced. The other to still record the preemption latency,
-+		 * which can also be traced by the function tracer.
-+		 */
-+		preempt_disable_notrace();
-+		preempt_latency_start(1);
-+		/*
-+		 * Needs preempt disabled in case user_exit() is traced
-+		 * and the tracer calls preempt_enable_notrace() causing
-+		 * an infinite recursion.
-+		 */
-+		prev_ctx = exception_enter();
-+		__schedule(true);
-+		exception_exit(prev_ctx);
-+
-+		preempt_latency_stop(1);
-+		preempt_enable_no_resched_notrace();
-+	} while (need_resched());
-+}
-+EXPORT_SYMBOL_GPL(preempt_schedule_notrace);
-+
-+#endif /* CONFIG_PREEMPTION */
-+
-+/*
-+ * This is the entry point to schedule() from kernel preemption
-+ * off of irq context.
-+ * Note, that this is called and return with irqs disabled. This will
-+ * protect us against recursive calling from irq.
-+ */
-+asmlinkage __visible void __sched preempt_schedule_irq(void)
-+{
-+	enum ctx_state prev_state;
-+
-+	/* Catch callers which need to be fixed */
-+	BUG_ON(preempt_count() || !irqs_disabled());
-+
-+	prev_state = exception_enter();
-+
-+	do {
-+		preempt_disable();
-+		local_irq_enable();
-+		__schedule(true);
-+		local_irq_disable();
-+		sched_preempt_enable_no_resched();
-+	} while (need_resched());
-+
-+	exception_exit(prev_state);
-+}
-+
-+int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags,
-+			  void *key)
-+{
-+	WARN_ON_ONCE(IS_ENABLED(CONFIG_SCHED_DEBUG) && wake_flags & ~WF_SYNC);
-+	return try_to_wake_up(curr->private, mode, wake_flags);
-+}
-+EXPORT_SYMBOL(default_wake_function);
-+
-+static inline void check_task_changed(struct rq *rq, struct task_struct *p)
-+{
-+	/* Trigger resched if task sched_prio has been modified. */
-+	if (task_on_rq_queued(p) && sched_task_need_requeue(p, rq)) {
-+		requeue_task(p, rq);
-+		check_preempt_curr(rq);
-+	}
-+}
-+
-+#ifdef CONFIG_RT_MUTEXES
-+
-+static inline int __rt_effective_prio(struct task_struct *pi_task, int prio)
-+{
-+	if (pi_task)
-+		prio = min(prio, pi_task->prio);
-+
-+	return prio;
-+}
-+
-+static inline int rt_effective_prio(struct task_struct *p, int prio)
-+{
-+	struct task_struct *pi_task = rt_mutex_get_top_task(p);
-+
-+	return __rt_effective_prio(pi_task, prio);
-+}
-+
-+/*
-+ * rt_mutex_setprio - set the current priority of a task
-+ * @p: task to boost
-+ * @pi_task: donor task
-+ *
-+ * This function changes the 'effective' priority of a task. It does
-+ * not touch ->normal_prio like __setscheduler().
-+ *
-+ * Used by the rt_mutex code to implement priority inheritance
-+ * logic. Call site only calls if the priority of the task changed.
-+ */
-+void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
-+{
-+	int prio;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+
-+	/* XXX used to be waiter->prio, not waiter->task->prio */
-+	prio = __rt_effective_prio(pi_task, p->normal_prio);
-+
-+	/*
-+	 * If nothing changed; bail early.
-+	 */
-+	if (p->pi_top_task == pi_task && prio == p->prio)
-+		return;
-+
-+	rq = __task_access_lock(p, &lock);
-+	/*
-+	 * Set under pi_lock && rq->lock, such that the value can be used under
-+	 * either lock.
-+	 *
-+	 * Note that there is loads of tricky to make this pointer cache work
-+	 * right. rt_mutex_slowunlock()+rt_mutex_postunlock() work together to
-+	 * ensure a task is de-boosted (pi_task is set to NULL) before the
-+	 * task is allowed to run again (and can exit). This ensures the pointer
-+	 * points to a blocked task -- which guaratees the task is present.
-+	 */
-+	p->pi_top_task = pi_task;
-+
-+	/*
-+	 * For FIFO/RR we only need to set prio, if that matches we're done.
-+	 */
-+	if (prio == p->prio)
-+		goto out_unlock;
-+
-+	/*
-+	 * Idle task boosting is a nono in general. There is one
-+	 * exception, when PREEMPT_RT and NOHZ is active:
-+	 *
-+	 * The idle task calls get_next_timer_interrupt() and holds
-+	 * the timer wheel base->lock on the CPU and another CPU wants
-+	 * to access the timer (probably to cancel it). We can safely
-+	 * ignore the boosting request, as the idle CPU runs this code
-+	 * with interrupts disabled and will complete the lock
-+	 * protected section without being interrupted. So there is no
-+	 * real need to boost.
-+	 */
-+	if (unlikely(p == rq->idle)) {
-+		WARN_ON(p != rq->curr);
-+		WARN_ON(p->pi_blocked_on);
-+		goto out_unlock;
-+	}
-+
-+	trace_sched_pi_setprio(p, pi_task);
-+	p->prio = prio;
-+	update_task_priodl(p);
-+
-+	check_task_changed(rq, p);
-+out_unlock:
-+	__task_access_unlock(p, lock);
-+}
-+#else
-+static inline int rt_effective_prio(struct task_struct *p, int prio)
-+{
-+	return prio;
-+}
-+#endif
-+
-+void set_user_nice(struct task_struct *p, long nice)
-+{
-+	unsigned long flags;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+
-+	if (task_nice(p) == nice || nice < MIN_NICE || nice > MAX_NICE)
-+		return;
-+	/*
-+	 * We have to be careful, if called from sys_setpriority(),
-+	 * the task might be in the middle of scheduling on another CPU.
-+	 */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	rq = __task_access_lock(p, &lock);
-+
-+	p->static_prio = NICE_TO_PRIO(nice);
-+	/*
-+	 * The RT priorities are set via sched_setscheduler(), but we still
-+	 * allow the 'normal' nice value to be set - but as expected
-+	 * it wont have any effect on scheduling until the task is
-+	 * not SCHED_NORMAL/SCHED_BATCH:
-+	 */
-+	if (task_has_rt_policy(p))
-+		goto out_unlock;
-+
-+	p->prio = effective_prio(p);
-+	update_task_priodl(p);
-+
-+	check_task_changed(rq, p);
-+out_unlock:
-+	__task_access_unlock(p, lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+}
-+EXPORT_SYMBOL(set_user_nice);
-+
-+/*
-+ * can_nice - check if a task can reduce its nice value
-+ * @p: task
-+ * @nice: nice value
-+ */
-+int can_nice(const struct task_struct *p, const int nice)
-+{
-+	/* Convert nice value [19,-20] to rlimit style value [1,40] */
-+	int nice_rlim = nice_to_rlimit(nice);
-+
-+	return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) ||
-+		capable(CAP_SYS_NICE));
-+}
-+
-+#ifdef __ARCH_WANT_SYS_NICE
-+
-+/*
-+ * sys_nice - change the priority of the current process.
-+ * @increment: priority increment
-+ *
-+ * sys_setpriority is a more generic, but much slower function that
-+ * does similar things.
-+ */
-+SYSCALL_DEFINE1(nice, int, increment)
-+{
-+	long nice, retval;
-+
-+	/*
-+	 * Setpriority might change our priority at the same moment.
-+	 * We don't have to worry. Conceptually one call occurs first
-+	 * and we have a single winner.
-+	 */
-+
-+	increment = clamp(increment, -NICE_WIDTH, NICE_WIDTH);
-+	nice = task_nice(current) + increment;
-+
-+	nice = clamp_val(nice, MIN_NICE, MAX_NICE);
-+	if (increment < 0 && !can_nice(current, nice))
-+		return -EPERM;
-+
-+	retval = security_task_setnice(current, nice);
-+	if (retval)
-+		return retval;
-+
-+	set_user_nice(current, nice);
-+	return 0;
-+}
-+
-+#endif
-+
-+/**
-+ * idle_cpu - is a given CPU idle currently?
-+ * @cpu: the processor in question.
-+ *
-+ * Return: 1 if the CPU is currently idle. 0 otherwise.
-+ */
-+int idle_cpu(int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	if (rq->curr != rq->idle)
-+		return 0;
-+
-+	if (rq->nr_running)
-+		return 0;
-+
-+#ifdef CONFIG_SMP
-+	if (rq->ttwu_pending)
-+		return 0;
-+#endif
-+
-+	return 1;
-+}
-+
-+/**
-+ * idle_task - return the idle task for a given CPU.
-+ * @cpu: the processor in question.
-+ *
-+ * Return: The idle task for the cpu @cpu.
-+ */
-+struct task_struct *idle_task(int cpu)
-+{
-+	return cpu_rq(cpu)->idle;
-+}
-+
-+/**
-+ * find_process_by_pid - find a process with a matching PID value.
-+ * @pid: the pid in question.
-+ *
-+ * The task of @pid, if found. %NULL otherwise.
-+ */
-+static inline struct task_struct *find_process_by_pid(pid_t pid)
-+{
-+	return pid ? find_task_by_vpid(pid) : current;
-+}
-+
-+/*
-+ * sched_setparam() passes in -1 for its policy, to let the functions
-+ * it calls know not to change it.
-+ */
-+#define SETPARAM_POLICY -1
-+
-+static void __setscheduler_params(struct task_struct *p,
-+		const struct sched_attr *attr)
-+{
-+	int policy = attr->sched_policy;
-+
-+	if (policy == SETPARAM_POLICY)
-+		policy = p->policy;
-+
-+	p->policy = policy;
-+
-+	/*
-+	 * allow normal nice value to be set, but will not have any
-+	 * effect on scheduling until the task not SCHED_NORMAL/
-+	 * SCHED_BATCH
-+	 */
-+	p->static_prio = NICE_TO_PRIO(attr->sched_nice);
-+
-+	/*
-+	 * __sched_setscheduler() ensures attr->sched_priority == 0 when
-+	 * !rt_policy. Always setting this ensures that things like
-+	 * getparam()/getattr() don't report silly values for !rt tasks.
-+	 */
-+	p->rt_priority = attr->sched_priority;
-+	p->normal_prio = normal_prio(p);
-+}
-+
-+/* Actually do priority change: must hold rq lock. */
-+static void __setscheduler(struct rq *rq, struct task_struct *p,
-+			   const struct sched_attr *attr, bool keep_boost)
-+{
-+	__setscheduler_params(p, attr);
-+
-+	/*
-+	 * Keep a potential priority boosting if called from
-+	 * sched_setscheduler().
-+	 */
-+	p->prio = normal_prio(p);
-+	if (keep_boost)
-+		p->prio = rt_effective_prio(p, p->prio);
-+	update_task_priodl(p);
-+}
-+
-+/*
-+ * check the target process has a UID that matches the current process's
-+ */
-+static bool check_same_owner(struct task_struct *p)
-+{
-+	const struct cred *cred = current_cred(), *pcred;
-+	bool match;
-+
-+	rcu_read_lock();
-+	pcred = __task_cred(p);
-+	match = (uid_eq(cred->euid, pcred->euid) ||
-+		 uid_eq(cred->euid, pcred->uid));
-+	rcu_read_unlock();
-+	return match;
-+}
-+
-+static int __sched_setscheduler(struct task_struct *p,
-+				const struct sched_attr *attr,
-+				bool user, bool pi)
-+{
-+	const struct sched_attr dl_squash_attr = {
-+		.size		= sizeof(struct sched_attr),
-+		.sched_policy	= SCHED_FIFO,
-+		.sched_nice	= 0,
-+		.sched_priority = 99,
-+	};
-+	int newprio = MAX_RT_PRIO - 1 - attr->sched_priority;
-+	int retval, oldpolicy = -1;
-+	int policy = attr->sched_policy;
-+	unsigned long flags;
-+	struct rq *rq;
-+	int reset_on_fork;
-+	raw_spinlock_t *lock;
-+
-+	/* The pi code expects interrupts enabled */
-+	BUG_ON(pi && in_interrupt());
-+
-+	/*
-+	 * Alt schedule FW supports SCHED_DEADLINE by squash it as prio 0 SCHED_FIFO
-+	 */
-+	if (unlikely(SCHED_DEADLINE == policy)) {
-+		attr = &dl_squash_attr;
-+		policy = attr->sched_policy;
-+		newprio = MAX_RT_PRIO - 1 - attr->sched_priority;
-+	}
-+recheck:
-+	/* Double check policy once rq lock held */
-+	if (policy < 0) {
-+		reset_on_fork = p->sched_reset_on_fork;
-+		policy = oldpolicy = p->policy;
-+	} else {
-+		reset_on_fork = !!(attr->sched_flags & SCHED_RESET_ON_FORK);
-+
-+		if (policy > SCHED_IDLE)
-+			return -EINVAL;
-+	}
-+
-+	if (attr->sched_flags & ~(SCHED_FLAG_ALL))
-+		return -EINVAL;
-+
-+	/*
-+	 * Valid priorities for SCHED_FIFO and SCHED_RR are
-+	 * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL and
-+	 * SCHED_BATCH and SCHED_IDLE is 0.
-+	 */
-+	if (attr->sched_priority < 0 ||
-+	    (p->mm && attr->sched_priority > MAX_USER_RT_PRIO - 1) ||
-+	    (!p->mm && attr->sched_priority > MAX_RT_PRIO - 1))
-+		return -EINVAL;
-+	if ((SCHED_RR == policy || SCHED_FIFO == policy) !=
-+	    (attr->sched_priority != 0))
-+		return -EINVAL;
-+
-+	/*
-+	 * Allow unprivileged RT tasks to decrease priority:
-+	 */
-+	if (user && !capable(CAP_SYS_NICE)) {
-+		if (SCHED_FIFO == policy || SCHED_RR == policy) {
-+			unsigned long rlim_rtprio =
-+					task_rlimit(p, RLIMIT_RTPRIO);
-+
-+			/* Can't set/change the rt policy */
-+			if (policy != p->policy && !rlim_rtprio)
-+				return -EPERM;
-+
-+			/* Can't increase priority */
-+			if (attr->sched_priority > p->rt_priority &&
-+			    attr->sched_priority > rlim_rtprio)
-+				return -EPERM;
-+		}
-+
-+		/* Can't change other user's priorities */
-+		if (!check_same_owner(p))
-+			return -EPERM;
-+
-+		/* Normal users shall not reset the sched_reset_on_fork flag */
-+		if (p->sched_reset_on_fork && !reset_on_fork)
-+			return -EPERM;
-+	}
-+
-+	if (user) {
-+		retval = security_task_setscheduler(p);
-+		if (retval)
-+			return retval;
-+	}
-+
-+	if (pi)
-+		cpuset_read_lock();
-+
-+	/*
-+	 * Make sure no PI-waiters arrive (or leave) while we are
-+	 * changing the priority of the task:
-+	 */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+
-+	/*
-+	 * To be able to change p->policy safely, task_access_lock()
-+	 * must be called.
-+	 * IF use task_access_lock() here:
-+	 * For the task p which is not running, reading rq->stop is
-+	 * racy but acceptable as ->stop doesn't change much.
-+	 * An enhancemnet can be made to read rq->stop saftly.
-+	 */
-+	rq = __task_access_lock(p, &lock);
-+
-+	/*
-+	 * Changing the policy of the stop threads its a very bad idea
-+	 */
-+	if (p == rq->stop) {
-+		retval = -EINVAL;
-+		goto unlock;
-+	}
-+
-+	/*
-+	 * If not changing anything there's no need to proceed further:
-+	 */
-+	if (unlikely(policy == p->policy)) {
-+		if (rt_policy(policy) && attr->sched_priority != p->rt_priority)
-+			goto change;
-+		if (!rt_policy(policy) &&
-+		    NICE_TO_PRIO(attr->sched_nice) != p->static_prio)
-+			goto change;
-+
-+		p->sched_reset_on_fork = reset_on_fork;
-+		retval = 0;
-+		goto unlock;
-+	}
-+change:
-+
-+	/* Re-check policy now with rq lock held */
-+	if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
-+		policy = oldpolicy = -1;
-+		__task_access_unlock(p, lock);
-+		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+		if (pi)
-+			cpuset_read_unlock();
-+		goto recheck;
-+	}
-+
-+	p->sched_reset_on_fork = reset_on_fork;
-+
-+	if (pi) {
-+		/*
-+		 * Take priority boosted tasks into account. If the new
-+		 * effective priority is unchanged, we just store the new
-+		 * normal parameters and do not touch the scheduler class and
-+		 * the runqueue. This will be done when the task deboost
-+		 * itself.
-+		 */
-+		if (rt_effective_prio(p, newprio) == p->prio) {
-+			__setscheduler_params(p, attr);
-+			retval = 0;
-+			goto unlock;
-+		}
-+	}
-+
-+	__setscheduler(rq, p, attr, pi);
-+
-+	check_task_changed(rq, p);
-+
-+	/* Avoid rq from going away on us: */
-+	preempt_disable();
-+	__task_access_unlock(p, lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+
-+	if (pi) {
-+		cpuset_read_unlock();
-+		rt_mutex_adjust_pi(p);
-+	}
-+
-+	preempt_enable();
-+
-+	return 0;
-+
-+unlock:
-+	__task_access_unlock(p, lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+	if (pi)
-+		cpuset_read_unlock();
-+	return retval;
-+}
-+
-+static int _sched_setscheduler(struct task_struct *p, int policy,
-+			       const struct sched_param *param, bool check)
-+{
-+	struct sched_attr attr = {
-+		.sched_policy   = policy,
-+		.sched_priority = param->sched_priority,
-+		.sched_nice     = PRIO_TO_NICE(p->static_prio),
-+	};
-+
-+	/* Fixup the legacy SCHED_RESET_ON_FORK hack. */
-+	if ((policy != SETPARAM_POLICY) && (policy & SCHED_RESET_ON_FORK)) {
-+		attr.sched_flags |= SCHED_FLAG_RESET_ON_FORK;
-+		policy &= ~SCHED_RESET_ON_FORK;
-+		attr.sched_policy = policy;
-+	}
-+
-+	return __sched_setscheduler(p, &attr, check, true);
-+}
-+
-+/**
-+ * sched_setscheduler - change the scheduling policy and/or RT priority of a thread.
-+ * @p: the task in question.
-+ * @policy: new policy.
-+ * @param: structure containing the new RT priority.
-+ *
-+ * Use sched_set_fifo(), read its comment.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ *
-+ * NOTE that the task may be already dead.
-+ */
-+int sched_setscheduler(struct task_struct *p, int policy,
-+		       const struct sched_param *param)
-+{
-+	return _sched_setscheduler(p, policy, param, true);
-+}
-+
-+int sched_setattr(struct task_struct *p, const struct sched_attr *attr)
-+{
-+	return __sched_setscheduler(p, attr, true, true);
-+}
-+
-+int sched_setattr_nocheck(struct task_struct *p, const struct sched_attr *attr)
-+{
-+	return __sched_setscheduler(p, attr, false, true);
-+}
-+
-+/**
-+ * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace.
-+ * @p: the task in question.
-+ * @policy: new policy.
-+ * @param: structure containing the new RT priority.
-+ *
-+ * Just like sched_setscheduler, only don't bother checking if the
-+ * current context has permission.  For example, this is needed in
-+ * stop_machine(): we create temporary high priority worker threads,
-+ * but our caller might not have that capability.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+int sched_setscheduler_nocheck(struct task_struct *p, int policy,
-+			       const struct sched_param *param)
-+{
-+	return _sched_setscheduler(p, policy, param, false);
-+}
-+
-+/*
-+ * SCHED_FIFO is a broken scheduler model; that is, it is fundamentally
-+ * incapable of resource management, which is the one thing an OS really should
-+ * be doing.
-+ *
-+ * This is of course the reason it is limited to privileged users only.
-+ *
-+ * Worse still; it is fundamentally impossible to compose static priority
-+ * workloads. You cannot take two correctly working static prio workloads
-+ * and smash them together and still expect them to work.
-+ *
-+ * For this reason 'all' FIFO tasks the kernel creates are basically at:
-+ *
-+ *   MAX_RT_PRIO / 2
-+ *
-+ * The administrator _MUST_ configure the system, the kernel simply doesn't
-+ * know enough information to make a sensible choice.
-+ */
-+void sched_set_fifo(struct task_struct *p)
-+{
-+	struct sched_param sp = { .sched_priority = MAX_RT_PRIO / 2 };
-+	WARN_ON_ONCE(sched_setscheduler_nocheck(p, SCHED_FIFO, &sp) != 0);
-+}
-+EXPORT_SYMBOL_GPL(sched_set_fifo);
-+
-+/*
-+ * For when you don't much care about FIFO, but want to be above SCHED_NORMAL.
-+ */
-+void sched_set_fifo_low(struct task_struct *p)
-+{
-+	struct sched_param sp = { .sched_priority = 1 };
-+	WARN_ON_ONCE(sched_setscheduler_nocheck(p, SCHED_FIFO, &sp) != 0);
-+}
-+EXPORT_SYMBOL_GPL(sched_set_fifo_low);
-+
-+void sched_set_normal(struct task_struct *p, int nice)
-+{
-+	struct sched_attr attr = {
-+		.sched_policy = SCHED_NORMAL,
-+		.sched_nice = nice,
-+	};
-+	WARN_ON_ONCE(sched_setattr_nocheck(p, &attr) != 0);
-+}
-+EXPORT_SYMBOL_GPL(sched_set_normal);
-+
-+static int
-+do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
-+{
-+	struct sched_param lparam;
-+	struct task_struct *p;
-+	int retval;
-+
-+	if (!param || pid < 0)
-+		return -EINVAL;
-+	if (copy_from_user(&lparam, param, sizeof(struct sched_param)))
-+		return -EFAULT;
-+
-+	rcu_read_lock();
-+	retval = -ESRCH;
-+	p = find_process_by_pid(pid);
-+	if (likely(p))
-+		get_task_struct(p);
-+	rcu_read_unlock();
-+
-+	if (likely(p)) {
-+		retval = sched_setscheduler(p, policy, &lparam);
-+		put_task_struct(p);
-+	}
-+
-+	return retval;
-+}
-+
-+/*
-+ * Mimics kernel/events/core.c perf_copy_attr().
-+ */
-+static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *attr)
-+{
-+	u32 size;
-+	int ret;
-+
-+	/* Zero the full structure, so that a short copy will be nice: */
-+	memset(attr, 0, sizeof(*attr));
-+
-+	ret = get_user(size, &uattr->size);
-+	if (ret)
-+		return ret;
-+
-+	/* ABI compatibility quirk: */
-+	if (!size)
-+		size = SCHED_ATTR_SIZE_VER0;
-+
-+	if (size < SCHED_ATTR_SIZE_VER0 || size > PAGE_SIZE)
-+		goto err_size;
-+
-+	ret = copy_struct_from_user(attr, sizeof(*attr), uattr, size);
-+	if (ret) {
-+		if (ret == -E2BIG)
-+			goto err_size;
-+		return ret;
-+	}
-+
-+	/*
-+	 * XXX: Do we want to be lenient like existing syscalls; or do we want
-+	 * to be strict and return an error on out-of-bounds values?
-+	 */
-+	attr->sched_nice = clamp(attr->sched_nice, -20, 19);
-+
-+	/* sched/core.c uses zero here but we already know ret is zero */
-+	return 0;
-+
-+err_size:
-+	put_user(sizeof(*attr), &uattr->size);
-+	return -E2BIG;
-+}
-+
-+/**
-+ * sys_sched_setscheduler - set/change the scheduler policy and RT priority
-+ * @pid: the pid in question.
-+ * @policy: new policy.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ * @param: structure containing the new RT priority.
-+ */
-+SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy, struct sched_param __user *, param)
-+{
-+	if (policy < 0)
-+		return -EINVAL;
-+
-+	return do_sched_setscheduler(pid, policy, param);
-+}
-+
-+/**
-+ * sys_sched_setparam - set/change the RT priority of a thread
-+ * @pid: the pid in question.
-+ * @param: structure containing the new RT priority.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
-+{
-+	return do_sched_setscheduler(pid, SETPARAM_POLICY, param);
-+}
-+
-+/**
-+ * sys_sched_setattr - same as above, but with extended sched_attr
-+ * @pid: the pid in question.
-+ * @uattr: structure containing the extended parameters.
-+ */
-+SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr,
-+			       unsigned int, flags)
-+{
-+	struct sched_attr attr;
-+	struct task_struct *p;
-+	int retval;
-+
-+	if (!uattr || pid < 0 || flags)
-+		return -EINVAL;
-+
-+	retval = sched_copy_attr(uattr, &attr);
-+	if (retval)
-+		return retval;
-+
-+	if ((int)attr.sched_policy < 0)
-+		return -EINVAL;
-+
-+	rcu_read_lock();
-+	retval = -ESRCH;
-+	p = find_process_by_pid(pid);
-+	if (p != NULL)
-+		retval = sched_setattr(p, &attr);
-+	rcu_read_unlock();
-+
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_getscheduler - get the policy (scheduling class) of a thread
-+ * @pid: the pid in question.
-+ *
-+ * Return: On success, the policy of the thread. Otherwise, a negative error
-+ * code.
-+ */
-+SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
-+{
-+	struct task_struct *p;
-+	int retval = -EINVAL;
-+
-+	if (pid < 0)
-+		goto out_nounlock;
-+
-+	retval = -ESRCH;
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	if (p) {
-+		retval = security_task_getscheduler(p);
-+		if (!retval)
-+			retval = p->policy;
-+	}
-+	rcu_read_unlock();
-+
-+out_nounlock:
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_getscheduler - get the RT priority of a thread
-+ * @pid: the pid in question.
-+ * @param: structure containing the RT priority.
-+ *
-+ * Return: On success, 0 and the RT priority is in @param. Otherwise, an error
-+ * code.
-+ */
-+SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
-+{
-+	struct sched_param lp = { .sched_priority = 0 };
-+	struct task_struct *p;
-+	int retval = -EINVAL;
-+
-+	if (!param || pid < 0)
-+		goto out_nounlock;
-+
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	retval = -ESRCH;
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	if (task_has_rt_policy(p))
-+		lp.sched_priority = p->rt_priority;
-+	rcu_read_unlock();
-+
-+	/*
-+	 * This one might sleep, we cannot do it with a spinlock held ...
-+	 */
-+	retval = copy_to_user(param, &lp, sizeof(*param)) ? -EFAULT : 0;
-+
-+out_nounlock:
-+	return retval;
-+
-+out_unlock:
-+	rcu_read_unlock();
-+	return retval;
-+}
-+
-+/*
-+ * Copy the kernel size attribute structure (which might be larger
-+ * than what user-space knows about) to user-space.
-+ *
-+ * Note that all cases are valid: user-space buffer can be larger or
-+ * smaller than the kernel-space buffer. The usual case is that both
-+ * have the same size.
-+ */
-+static int
-+sched_attr_copy_to_user(struct sched_attr __user *uattr,
-+			struct sched_attr *kattr,
-+			unsigned int usize)
-+{
-+	unsigned int ksize = sizeof(*kattr);
-+
-+	if (!access_ok(uattr, usize))
-+		return -EFAULT;
-+
-+	/*
-+	 * sched_getattr() ABI forwards and backwards compatibility:
-+	 *
-+	 * If usize == ksize then we just copy everything to user-space and all is good.
-+	 *
-+	 * If usize < ksize then we only copy as much as user-space has space for,
-+	 * this keeps ABI compatibility as well. We skip the rest.
-+	 *
-+	 * If usize > ksize then user-space is using a newer version of the ABI,
-+	 * which part the kernel doesn't know about. Just ignore it - tooling can
-+	 * detect the kernel's knowledge of attributes from the attr->size value
-+	 * which is set to ksize in this case.
-+	 */
-+	kattr->size = min(usize, ksize);
-+
-+	if (copy_to_user(uattr, kattr, kattr->size))
-+		return -EFAULT;
-+
-+	return 0;
-+}
-+
-+/**
-+ * sys_sched_getattr - similar to sched_getparam, but with sched_attr
-+ * @pid: the pid in question.
-+ * @uattr: structure containing the extended parameters.
-+ * @usize: sizeof(attr) for fwd/bwd comp.
-+ * @flags: for future extension.
-+ */
-+SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
-+		unsigned int, usize, unsigned int, flags)
-+{
-+	struct sched_attr kattr = { };
-+	struct task_struct *p;
-+	int retval;
-+
-+	if (!uattr || pid < 0 || usize > PAGE_SIZE ||
-+	    usize < SCHED_ATTR_SIZE_VER0 || flags)
-+		return -EINVAL;
-+
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	retval = -ESRCH;
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	kattr.sched_policy = p->policy;
-+	if (p->sched_reset_on_fork)
-+		kattr.sched_flags |= SCHED_FLAG_RESET_ON_FORK;
-+	if (task_has_rt_policy(p))
-+		kattr.sched_priority = p->rt_priority;
-+	else
-+		kattr.sched_nice = task_nice(p);
-+
-+#ifdef CONFIG_UCLAMP_TASK
-+	kattr.sched_util_min = p->uclamp_req[UCLAMP_MIN].value;
-+	kattr.sched_util_max = p->uclamp_req[UCLAMP_MAX].value;
-+#endif
-+
-+	rcu_read_unlock();
-+
-+	return sched_attr_copy_to_user(uattr, &kattr, usize);
-+
-+out_unlock:
-+	rcu_read_unlock();
-+	return retval;
-+}
-+
-+long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
-+{
-+	cpumask_var_t cpus_allowed, new_mask;
-+	struct task_struct *p;
-+	int retval;
-+
-+	get_online_cpus();
-+	rcu_read_lock();
-+
-+	p = find_process_by_pid(pid);
-+	if (!p) {
-+		rcu_read_unlock();
-+		put_online_cpus();
-+		return -ESRCH;
-+	}
-+
-+	/* Prevent p going away */
-+	get_task_struct(p);
-+	rcu_read_unlock();
-+
-+	if (p->flags & PF_NO_SETAFFINITY) {
-+		retval = -EINVAL;
-+		goto out_put_task;
-+	}
-+	if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
-+		retval = -ENOMEM;
-+		goto out_put_task;
-+	}
-+	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) {
-+		retval = -ENOMEM;
-+		goto out_free_cpus_allowed;
-+	}
-+	retval = -EPERM;
-+	if (!check_same_owner(p)) {
-+		rcu_read_lock();
-+		if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
-+			rcu_read_unlock();
-+			goto out_unlock;
-+		}
-+		rcu_read_unlock();
-+	}
-+
-+	retval = security_task_setscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	cpuset_cpus_allowed(p, cpus_allowed);
-+	cpumask_and(new_mask, in_mask, cpus_allowed);
-+again:
-+	retval = __set_cpus_allowed_ptr(p, new_mask, true);
-+
-+	if (!retval) {
-+		cpuset_cpus_allowed(p, cpus_allowed);
-+		if (!cpumask_subset(new_mask, cpus_allowed)) {
-+			/*
-+			 * We must have raced with a concurrent cpuset
-+			 * update. Just reset the cpus_allowed to the
-+			 * cpuset's cpus_allowed
-+			 */
-+			cpumask_copy(new_mask, cpus_allowed);
-+			goto again;
-+		}
-+	}
-+out_unlock:
-+	free_cpumask_var(new_mask);
-+out_free_cpus_allowed:
-+	free_cpumask_var(cpus_allowed);
-+out_put_task:
-+	put_task_struct(p);
-+	put_online_cpus();
-+	return retval;
-+}
-+
-+static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
-+			     struct cpumask *new_mask)
-+{
-+	if (len < cpumask_size())
-+		cpumask_clear(new_mask);
-+	else if (len > cpumask_size())
-+		len = cpumask_size();
-+
-+	return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0;
-+}
-+
-+/**
-+ * sys_sched_setaffinity - set the CPU affinity of a process
-+ * @pid: pid of the process
-+ * @len: length in bytes of the bitmask pointed to by user_mask_ptr
-+ * @user_mask_ptr: user-space pointer to the new CPU mask
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
-+		unsigned long __user *, user_mask_ptr)
-+{
-+	cpumask_var_t new_mask;
-+	int retval;
-+
-+	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
-+		return -ENOMEM;
-+
-+	retval = get_user_cpu_mask(user_mask_ptr, len, new_mask);
-+	if (retval == 0)
-+		retval = sched_setaffinity(pid, new_mask);
-+	free_cpumask_var(new_mask);
-+	return retval;
-+}
-+
-+long sched_getaffinity(pid_t pid, cpumask_t *mask)
-+{
-+	struct task_struct *p;
-+	raw_spinlock_t *lock;
-+	unsigned long flags;
-+	int retval;
-+
-+	rcu_read_lock();
-+
-+	retval = -ESRCH;
-+	p = find_process_by_pid(pid);
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	task_access_lock_irqsave(p, &lock, &flags);
-+	cpumask_and(mask, &p->cpus_mask, cpu_active_mask);
-+	task_access_unlock_irqrestore(p, lock, &flags);
-+
-+out_unlock:
-+	rcu_read_unlock();
-+
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_getaffinity - get the CPU affinity of a process
-+ * @pid: pid of the process
-+ * @len: length in bytes of the bitmask pointed to by user_mask_ptr
-+ * @user_mask_ptr: user-space pointer to hold the current CPU mask
-+ *
-+ * Return: size of CPU mask copied to user_mask_ptr on success. An
-+ * error code otherwise.
-+ */
-+SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
-+		unsigned long __user *, user_mask_ptr)
-+{
-+	int ret;
-+	cpumask_var_t mask;
-+
-+	if ((len * BITS_PER_BYTE) < nr_cpu_ids)
-+		return -EINVAL;
-+	if (len & (sizeof(unsigned long)-1))
-+		return -EINVAL;
-+
-+	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
-+		return -ENOMEM;
-+
-+	ret = sched_getaffinity(pid, mask);
-+	if (ret == 0) {
-+		unsigned int retlen = min_t(size_t, len, cpumask_size());
-+
-+		if (copy_to_user(user_mask_ptr, mask, retlen))
-+			ret = -EFAULT;
-+		else
-+			ret = retlen;
-+	}
-+	free_cpumask_var(mask);
-+
-+	return ret;
-+}
-+
-+/**
-+ * sys_sched_yield - yield the current processor to other threads.
-+ *
-+ * This function yields the current CPU to other tasks. It does this by
-+ * scheduling away the current task. If it still has the earliest deadline
-+ * it will be scheduled again as the next task.
-+ *
-+ * Return: 0.
-+ */
-+static void do_sched_yield(void)
-+{
-+	struct rq *rq;
-+	struct rq_flags rf;
-+
-+	if (!sched_yield_type)
-+		return;
-+
-+	rq = this_rq_lock_irq(&rf);
-+
-+	schedstat_inc(rq->yld_count);
-+
-+	if (1 == sched_yield_type) {
-+		if (!rt_task(current))
-+			do_sched_yield_type_1(current, rq);
-+	} else if (2 == sched_yield_type) {
-+		if (rq->nr_running > 1)
-+			rq->skip = current;
-+	}
-+
-+	/*
-+	 * Since we are going to call schedule() anyway, there's
-+	 * no need to preempt or enable interrupts:
-+	 */
-+	preempt_disable();
-+	raw_spin_unlock(&rq->lock);
-+	sched_preempt_enable_no_resched();
-+
-+	schedule();
-+}
-+
-+SYSCALL_DEFINE0(sched_yield)
-+{
-+	do_sched_yield();
-+	return 0;
-+}
-+
-+#ifndef CONFIG_PREEMPTION
-+int __sched _cond_resched(void)
-+{
-+	if (should_resched(0)) {
-+		preempt_schedule_common();
-+		return 1;
-+	}
-+	rcu_all_qs();
-+	return 0;
-+}
-+EXPORT_SYMBOL(_cond_resched);
-+#endif
-+
-+/*
-+ * __cond_resched_lock() - if a reschedule is pending, drop the given lock,
-+ * call schedule, and on return reacquire the lock.
-+ *
-+ * This works OK both with and without CONFIG_PREEMPTION.  We do strange low-level
-+ * operations here to prevent schedule() from being called twice (once via
-+ * spin_unlock(), once by hand).
-+ */
-+int __cond_resched_lock(spinlock_t *lock)
-+{
-+	int resched = should_resched(PREEMPT_LOCK_OFFSET);
-+	int ret = 0;
-+
-+	lockdep_assert_held(lock);
-+
-+	if (spin_needbreak(lock) || resched) {
-+		spin_unlock(lock);
-+		if (resched)
-+			preempt_schedule_common();
-+		else
-+			cpu_relax();
-+		ret = 1;
-+		spin_lock(lock);
-+	}
-+	return ret;
-+}
-+EXPORT_SYMBOL(__cond_resched_lock);
-+
-+/**
-+ * yield - yield the current processor to other threads.
-+ *
-+ * Do not ever use this function, there's a 99% chance you're doing it wrong.
-+ *
-+ * The scheduler is at all times free to pick the calling task as the most
-+ * eligible task to run, if removing the yield() call from your code breaks
-+ * it, its already broken.
-+ *
-+ * Typical broken usage is:
-+ *
-+ * while (!event)
-+ * 	yield();
-+ *
-+ * where one assumes that yield() will let 'the other' process run that will
-+ * make event true. If the current task is a SCHED_FIFO task that will never
-+ * happen. Never use yield() as a progress guarantee!!
-+ *
-+ * If you want to use yield() to wait for something, use wait_event().
-+ * If you want to use yield() to be 'nice' for others, use cond_resched().
-+ * If you still want to use yield(), do not!
-+ */
-+void __sched yield(void)
-+{
-+	set_current_state(TASK_RUNNING);
-+	do_sched_yield();
-+}
-+EXPORT_SYMBOL(yield);
-+
-+/**
-+ * yield_to - yield the current processor to another thread in
-+ * your thread group, or accelerate that thread toward the
-+ * processor it's on.
-+ * @p: target task
-+ * @preempt: whether task preemption is allowed or not
-+ *
-+ * It's the caller's job to ensure that the target task struct
-+ * can't go away on us before we can do any checks.
-+ *
-+ * In Alt schedule FW, yield_to is not supported.
-+ *
-+ * Return:
-+ *	true (>0) if we indeed boosted the target task.
-+ *	false (0) if we failed to boost the target.
-+ *	-ESRCH if there's no task to yield to.
-+ */
-+int __sched yield_to(struct task_struct *p, bool preempt)
-+{
-+	return 0;
-+}
-+EXPORT_SYMBOL_GPL(yield_to);
-+
-+int io_schedule_prepare(void)
-+{
-+	int old_iowait = current->in_iowait;
-+
-+	current->in_iowait = 1;
-+	blk_schedule_flush_plug(current);
-+
-+	return old_iowait;
-+}
-+
-+void io_schedule_finish(int token)
-+{
-+	current->in_iowait = token;
-+}
-+
-+/*
-+ * This task is about to go to sleep on IO.  Increment rq->nr_iowait so
-+ * that process accounting knows that this is a task in IO wait state.
-+ *
-+ * But don't do that if it is a deliberate, throttling IO wait (this task
-+ * has set its backing_dev_info: the queue against which it should throttle)
-+ */
-+
-+long __sched io_schedule_timeout(long timeout)
-+{
-+	int token;
-+	long ret;
-+
-+	token = io_schedule_prepare();
-+	ret = schedule_timeout(timeout);
-+	io_schedule_finish(token);
-+
-+	return ret;
-+}
-+EXPORT_SYMBOL(io_schedule_timeout);
-+
-+void __sched io_schedule(void)
-+{
-+	int token;
-+
-+	token = io_schedule_prepare();
-+	schedule();
-+	io_schedule_finish(token);
-+}
-+EXPORT_SYMBOL(io_schedule);
-+
-+/**
-+ * sys_sched_get_priority_max - return maximum RT priority.
-+ * @policy: scheduling class.
-+ *
-+ * Return: On success, this syscall returns the maximum
-+ * rt_priority that can be used by a given scheduling class.
-+ * On failure, a negative error code is returned.
-+ */
-+SYSCALL_DEFINE1(sched_get_priority_max, int, policy)
-+{
-+	int ret = -EINVAL;
-+
-+	switch (policy) {
-+	case SCHED_FIFO:
-+	case SCHED_RR:
-+		ret = MAX_USER_RT_PRIO-1;
-+		break;
-+	case SCHED_NORMAL:
-+	case SCHED_BATCH:
-+	case SCHED_IDLE:
-+		ret = 0;
-+		break;
-+	}
-+	return ret;
-+}
-+
-+/**
-+ * sys_sched_get_priority_min - return minimum RT priority.
-+ * @policy: scheduling class.
-+ *
-+ * Return: On success, this syscall returns the minimum
-+ * rt_priority that can be used by a given scheduling class.
-+ * On failure, a negative error code is returned.
-+ */
-+SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
-+{
-+	int ret = -EINVAL;
-+
-+	switch (policy) {
-+	case SCHED_FIFO:
-+	case SCHED_RR:
-+		ret = 1;
-+		break;
-+	case SCHED_NORMAL:
-+	case SCHED_BATCH:
-+	case SCHED_IDLE:
-+		ret = 0;
-+		break;
-+	}
-+	return ret;
-+}
-+
-+static int sched_rr_get_interval(pid_t pid, struct timespec64 *t)
-+{
-+	struct task_struct *p;
-+	int retval;
-+
-+	alt_sched_debug();
-+
-+	if (pid < 0)
-+		return -EINVAL;
-+
-+	retval = -ESRCH;
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+	rcu_read_unlock();
-+
-+	*t = ns_to_timespec64(sched_timeslice_ns);
-+	return 0;
-+
-+out_unlock:
-+	rcu_read_unlock();
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_rr_get_interval - return the default timeslice of a process.
-+ * @pid: pid of the process.
-+ * @interval: userspace pointer to the timeslice value.
-+ *
-+ *
-+ * Return: On success, 0 and the timeslice is in @interval. Otherwise,
-+ * an error code.
-+ */
-+SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
-+		struct __kernel_timespec __user *, interval)
-+{
-+	struct timespec64 t;
-+	int retval = sched_rr_get_interval(pid, &t);
-+
-+	if (retval == 0)
-+		retval = put_timespec64(&t, interval);
-+
-+	return retval;
-+}
-+
-+#ifdef CONFIG_COMPAT_32BIT_TIME
-+SYSCALL_DEFINE2(sched_rr_get_interval_time32, pid_t, pid,
-+		struct old_timespec32 __user *, interval)
-+{
-+	struct timespec64 t;
-+	int retval = sched_rr_get_interval(pid, &t);
-+
-+	if (retval == 0)
-+		retval = put_old_timespec32(&t, interval);
-+	return retval;
-+}
-+#endif
-+
-+void sched_show_task(struct task_struct *p)
-+{
-+	unsigned long free = 0;
-+	int ppid;
-+
-+	if (!try_get_task_stack(p))
-+		return;
-+
-+	pr_info("task:%-15.15s state:%c", p->comm, task_state_to_char(p));
-+
-+	if (p->state == TASK_RUNNING)
-+		pr_cont("  running task    ");
-+#ifdef CONFIG_DEBUG_STACK_USAGE
-+	free = stack_not_used(p);
-+#endif
-+	ppid = 0;
-+	rcu_read_lock();
-+	if (pid_alive(p))
-+		ppid = task_pid_nr(rcu_dereference(p->real_parent));
-+	rcu_read_unlock();
-+	pr_cont(" stack:%5lu pid:%5d ppid:%6d flags:0x%08lx\n",
-+		free, task_pid_nr(p), ppid,
-+		(unsigned long)task_thread_info(p)->flags);
-+
-+	print_worker_info(KERN_INFO, p);
-+	show_stack(p, NULL, KERN_INFO);
-+	put_task_stack(p);
-+}
-+EXPORT_SYMBOL_GPL(sched_show_task);
-+
-+static inline bool
-+state_filter_match(unsigned long state_filter, struct task_struct *p)
-+{
-+	/* no filter, everything matches */
-+	if (!state_filter)
-+		return true;
-+
-+	/* filter, but doesn't match */
-+	if (!(p->state & state_filter))
-+		return false;
-+
-+	/*
-+	 * When looking for TASK_UNINTERRUPTIBLE skip TASK_IDLE (allows
-+	 * TASK_KILLABLE).
-+	 */
-+	if (state_filter == TASK_UNINTERRUPTIBLE && p->state == TASK_IDLE)
-+		return false;
-+
-+	return true;
-+}
-+
-+
-+void show_state_filter(unsigned long state_filter)
-+{
-+	struct task_struct *g, *p;
-+
-+	rcu_read_lock();
-+	for_each_process_thread(g, p) {
-+		/*
-+		 * reset the NMI-timeout, listing all files on a slow
-+		 * console might take a lot of time:
-+		 * Also, reset softlockup watchdogs on all CPUs, because
-+		 * another CPU might be blocked waiting for us to process
-+		 * an IPI.
-+		 */
-+		touch_nmi_watchdog();
-+		touch_all_softlockup_watchdogs();
-+		if (state_filter_match(state_filter, p))
-+			sched_show_task(p);
-+	}
-+
-+#ifdef CONFIG_SCHED_DEBUG
-+	/* TODO: Alt schedule FW should support this
-+	if (!state_filter)
-+		sysrq_sched_debug_show();
-+	*/
-+#endif
-+	rcu_read_unlock();
-+	/*
-+	 * Only show locks if all tasks are dumped:
-+	 */
-+	if (!state_filter)
-+		debug_show_all_locks();
-+}
-+
-+void dump_cpu_task(int cpu)
-+{
-+	pr_info("Task dump for CPU %d:\n", cpu);
-+	sched_show_task(cpu_curr(cpu));
-+}
-+
-+/**
-+ * init_idle - set up an idle thread for a given CPU
-+ * @idle: task in question
-+ * @cpu: CPU the idle task belongs to
-+ *
-+ * NOTE: this function does not set the idle thread's NEED_RESCHED
-+ * flag, to make booting more robust.
-+ */
-+void init_idle(struct task_struct *idle, int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	__sched_fork(0, idle);
-+
-+	raw_spin_lock_irqsave(&idle->pi_lock, flags);
-+	raw_spin_lock(&rq->lock);
-+	update_rq_clock(rq);
-+
-+	idle->last_ran = rq->clock_task;
-+	idle->state = TASK_RUNNING;
-+	idle->flags |= PF_IDLE;
-+	sched_queue_init_idle(rq, idle);
-+
-+	scs_task_reset(idle);
-+	kasan_unpoison_task_stack(idle);
-+
-+#ifdef CONFIG_SMP
-+	/*
-+	 * It's possible that init_idle() gets called multiple times on a task,
-+	 * in that case do_set_cpus_allowed() will not do the right thing.
-+	 *
-+	 * And since this is boot we can forgo the serialisation.
-+	 */
-+	set_cpus_allowed_common(idle, cpumask_of(cpu));
-+#endif
-+
-+	/* Silence PROVE_RCU */
-+	rcu_read_lock();
-+	__set_task_cpu(idle, cpu);
-+	rcu_read_unlock();
-+
-+	rq->idle = idle;
-+	rcu_assign_pointer(rq->curr, idle);
-+	idle->on_cpu = 1;
-+
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock_irqrestore(&idle->pi_lock, flags);
-+
-+	/* Set the preempt count _outside_ the spinlocks! */
-+	init_idle_preempt_count(idle, cpu);
-+
-+	ftrace_graph_init_idle_task(idle, cpu);
-+	vtime_init_idle(idle, cpu);
-+#ifdef CONFIG_SMP
-+	sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
-+#endif
-+}
-+
-+#ifdef CONFIG_SMP
-+
-+int cpuset_cpumask_can_shrink(const struct cpumask __maybe_unused *cur,
-+			      const struct cpumask __maybe_unused *trial)
-+{
-+	return 1;
-+}
-+
-+int task_can_attach(struct task_struct *p,
-+		    const struct cpumask *cs_cpus_allowed)
-+{
-+	int ret = 0;
-+
-+	/*
-+	 * Kthreads which disallow setaffinity shouldn't be moved
-+	 * to a new cpuset; we don't want to change their CPU
-+	 * affinity and isolating such threads by their set of
-+	 * allowed nodes is unnecessary.  Thus, cpusets are not
-+	 * applicable for such threads.  This prevents checking for
-+	 * success of set_cpus_allowed_ptr() on all attached tasks
-+	 * before cpus_mask may be changed.
-+	 */
-+	if (p->flags & PF_NO_SETAFFINITY)
-+		ret = -EINVAL;
-+
-+	return ret;
-+}
-+
-+bool sched_smp_initialized __read_mostly;
-+
-+#ifdef CONFIG_HOTPLUG_CPU
-+/*
-+ * Ensures that the idle task is using init_mm right before its CPU goes
-+ * offline.
-+ */
-+void idle_task_exit(void)
-+{
-+	struct mm_struct *mm = current->active_mm;
-+
-+	BUG_ON(current != this_rq()->idle);
-+
-+	if (mm != &init_mm) {
-+		switch_mm(mm, &init_mm, current);
-+		finish_arch_post_lock_switch();
-+	}
-+
-+	/* finish_cpu(), as ran on the BP, will clean up the active_mm state */
-+}
-+
-+/*
-+ * Migrate all tasks from the rq, sleeping tasks will be migrated by
-+ * try_to_wake_up()->select_task_rq().
-+ *
-+ * Called with rq->lock held even though we'er in stop_machine() and
-+ * there's no concurrency possible, we hold the required locks anyway
-+ * because of lock validation efforts.
-+ */
-+static void migrate_tasks(struct rq *dead_rq)
-+{
-+	struct rq *rq = dead_rq;
-+	struct task_struct *p, *stop = rq->stop;
-+	int count = 0;
-+
-+	/*
-+	 * Fudge the rq selection such that the below task selection loop
-+	 * doesn't get stuck on the currently eligible stop task.
-+	 *
-+	 * We're currently inside stop_machine() and the rq is either stuck
-+	 * in the stop_machine_cpu_stop() loop, or we're executing this code,
-+	 * either way we should never end up calling schedule() until we're
-+	 * done here.
-+	 */
-+	rq->stop = NULL;
-+
-+	p = sched_rq_first_task(rq);
-+	while (p != rq->idle) {
-+		int dest_cpu;
-+
-+		/* skip the running task */
-+		if (task_running(p) || 1 == p->nr_cpus_allowed) {
-+			p = sched_rq_next_task(p, rq);
-+			continue;
-+		}
-+
-+		/*
-+		 * Rules for changing task_struct::cpus_allowed are holding
-+		 * both pi_lock and rq->lock, such that holding either
-+		 * stabilizes the mask.
-+		 *
-+		 * Drop rq->lock is not quite as disastrous as it usually is
-+		 * because !cpu_active at this point, which means load-balance
-+		 * will not interfere. Also, stop-machine.
-+		 */
-+		raw_spin_unlock(&rq->lock);
-+		raw_spin_lock(&p->pi_lock);
-+		raw_spin_lock(&rq->lock);
-+
-+		/*
-+		 * Since we're inside stop-machine, _nothing_ should have
-+		 * changed the task, WARN if weird stuff happened, because in
-+		 * that case the above rq->lock drop is a fail too.
-+		 */
-+		if (WARN_ON(task_rq(p) != rq || !task_on_rq_queued(p))) {
-+			raw_spin_unlock(&p->pi_lock);
-+			p = sched_rq_next_task(p, rq);
-+			continue;
-+		}
-+
-+		count++;
-+		/* Find suitable destination for @next, with force if needed. */
-+		dest_cpu = select_fallback_rq(dead_rq->cpu, p);
-+		rq = __migrate_task(rq, p, dest_cpu);
-+		raw_spin_unlock(&rq->lock);
-+		raw_spin_unlock(&p->pi_lock);
-+
-+		rq = dead_rq;
-+		raw_spin_lock(&rq->lock);
-+		/* Check queued task all over from the header again */
-+		p = sched_rq_first_task(rq);
-+	}
-+
-+	rq->stop = stop;
-+}
-+
-+static void set_rq_offline(struct rq *rq)
-+{
-+	if (rq->online)
-+		rq->online = false;
-+}
-+#endif /* CONFIG_HOTPLUG_CPU */
-+
-+static void set_rq_online(struct rq *rq)
-+{
-+	if (!rq->online)
-+		rq->online = true;
-+}
-+
-+/*
-+ * used to mark begin/end of suspend/resume:
-+ */
-+static int num_cpus_frozen;
-+
-+/*
-+ * Update cpusets according to cpu_active mask.  If cpusets are
-+ * disabled, cpuset_update_active_cpus() becomes a simple wrapper
-+ * around partition_sched_domains().
-+ *
-+ * If we come here as part of a suspend/resume, don't touch cpusets because we
-+ * want to restore it back to its original state upon resume anyway.
-+ */
-+static void cpuset_cpu_active(void)
-+{
-+	if (cpuhp_tasks_frozen) {
-+		/*
-+		 * num_cpus_frozen tracks how many CPUs are involved in suspend
-+		 * resume sequence. As long as this is not the last online
-+		 * operation in the resume sequence, just build a single sched
-+		 * domain, ignoring cpusets.
-+		 */
-+		partition_sched_domains(1, NULL, NULL);
-+		if (--num_cpus_frozen)
-+			return;
-+		/*
-+		 * This is the last CPU online operation. So fall through and
-+		 * restore the original sched domains by considering the
-+		 * cpuset configurations.
-+		 */
-+		cpuset_force_rebuild();
-+	}
-+
-+	cpuset_update_active_cpus();
-+}
-+
-+static int cpuset_cpu_inactive(unsigned int cpu)
-+{
-+	if (!cpuhp_tasks_frozen) {
-+		cpuset_update_active_cpus();
-+	} else {
-+		num_cpus_frozen++;
-+		partition_sched_domains(1, NULL, NULL);
-+	}
-+	return 0;
-+}
-+
-+int sched_cpu_activate(unsigned int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+#ifdef CONFIG_SCHED_SMT
-+	/*
-+	 * When going up, increment the number of cores with SMT present.
-+	 */
-+	if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
-+		static_branch_inc_cpuslocked(&sched_smt_present);
-+#endif
-+	set_cpu_active(cpu, true);
-+
-+	if (sched_smp_initialized)
-+		cpuset_cpu_active();
-+
-+	/*
-+	 * Put the rq online, if not already. This happens:
-+	 *
-+	 * 1) In the early boot process, because we build the real domains
-+	 *    after all cpus have been brought up.
-+	 *
-+	 * 2) At runtime, if cpuset_cpu_active() fails to rebuild the
-+	 *    domains.
-+	 */
-+	raw_spin_lock_irqsave(&rq->lock, flags);
-+	set_rq_online(rq);
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+
-+	return 0;
-+}
-+
-+int sched_cpu_deactivate(unsigned int cpu)
-+{
-+	int ret;
-+
-+	set_cpu_active(cpu, false);
-+	/*
-+	 * We've cleared cpu_active_mask, wait for all preempt-disabled and RCU
-+	 * users of this state to go away such that all new such users will
-+	 * observe it.
-+	 *
-+	 * Do sync before park smpboot threads to take care the rcu boost case.
-+	 */
-+	synchronize_rcu();
-+
-+#ifdef CONFIG_SCHED_SMT
-+	/*
-+	 * When going down, decrement the number of cores with SMT present.
-+	 */
-+	if (cpumask_weight(cpu_smt_mask(cpu)) == 2) {
-+		static_branch_dec_cpuslocked(&sched_smt_present);
-+		if (!static_branch_likely(&sched_smt_present))
-+			cpumask_clear(&sched_sg_idle_mask);
-+	}
-+#endif
-+
-+	if (!sched_smp_initialized)
-+		return 0;
-+
-+	ret = cpuset_cpu_inactive(cpu);
-+	if (ret) {
-+		set_cpu_active(cpu, true);
-+		return ret;
-+	}
-+	return 0;
-+}
-+
-+static void sched_rq_cpu_starting(unsigned int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	rq->calc_load_update = calc_load_update;
-+}
-+
-+int sched_cpu_starting(unsigned int cpu)
-+{
-+	sched_rq_cpu_starting(cpu);
-+	sched_tick_start(cpu);
-+	return 0;
-+}
-+
-+#ifdef CONFIG_HOTPLUG_CPU
-+int sched_cpu_dying(unsigned int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	/* Handle pending wakeups and then migrate everything off */
-+	sched_tick_stop(cpu);
-+
-+	raw_spin_lock_irqsave(&rq->lock, flags);
-+	set_rq_offline(rq);
-+	migrate_tasks(rq);
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+
-+	hrtick_clear(rq);
-+	return 0;
-+}
-+#endif
-+
-+#ifdef CONFIG_SMP
-+static void sched_init_topology_cpumask_early(void)
-+{
-+	int cpu, level;
-+	cpumask_t *tmp;
-+
-+	for_each_possible_cpu(cpu) {
-+		for (level = 0; level < NR_CPU_AFFINITY_CHK_LEVEL; level++) {
-+			tmp = &(per_cpu(sched_cpu_affinity_masks, cpu)[level]);
-+			cpumask_copy(tmp, cpu_possible_mask);
-+			cpumask_clear_cpu(cpu, tmp);
-+		}
-+		per_cpu(sched_cpu_llc_mask, cpu) =
-+			&(per_cpu(sched_cpu_affinity_masks, cpu)[0]);
-+		per_cpu(sched_cpu_affinity_end_mask, cpu) =
-+			&(per_cpu(sched_cpu_affinity_masks, cpu)[1]);
-+		/*per_cpu(sd_llc_id, cpu) = cpu;*/
-+	}
-+}
-+
-+#define TOPOLOGY_CPUMASK(name, mask, last) \
-+	if (cpumask_and(chk, chk, mask))					\
-+		printk(KERN_INFO "sched: cpu#%02d affinity mask: 0x%08lx - "#name,\
-+		       cpu, (chk++)->bits[0]);					\
-+	if (!last)								\
-+		cpumask_complement(chk, mask)
-+
-+static void sched_init_topology_cpumask(void)
-+{
-+	int cpu;
-+	cpumask_t *chk;
-+
-+	for_each_online_cpu(cpu) {
-+		/* take chance to reset time slice for idle tasks */
-+		cpu_rq(cpu)->idle->time_slice = sched_timeslice_ns;
-+
-+		chk = &(per_cpu(sched_cpu_affinity_masks, cpu)[0]);
-+
-+		cpumask_complement(chk, cpumask_of(cpu));
-+#ifdef CONFIG_SCHED_SMT
-+		TOPOLOGY_CPUMASK(smt, topology_sibling_cpumask(cpu), false);
-+#endif
-+		per_cpu(sd_llc_id, cpu) = cpumask_first(cpu_coregroup_mask(cpu));
-+		per_cpu(sched_cpu_llc_mask, cpu) = chk;
-+		TOPOLOGY_CPUMASK(coregroup, cpu_coregroup_mask(cpu), false);
-+
-+		TOPOLOGY_CPUMASK(core, topology_core_cpumask(cpu), false);
-+
-+		TOPOLOGY_CPUMASK(others, cpu_online_mask, true);
-+
-+		per_cpu(sched_cpu_affinity_end_mask, cpu) = chk;
-+		printk(KERN_INFO "sched: cpu#%02d llc_id = %d, llc_mask idx = %d\n",
-+		       cpu, per_cpu(sd_llc_id, cpu),
-+		       (int) (per_cpu(sched_cpu_llc_mask, cpu) -
-+			      &(per_cpu(sched_cpu_affinity_masks, cpu)[0])));
-+	}
-+}
-+#endif
-+
-+void __init sched_init_smp(void)
-+{
-+	/* Move init over to a non-isolated CPU */
-+	if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0)
-+		BUG();
-+
-+	sched_init_topology_cpumask();
-+
-+	sched_smp_initialized = true;
-+}
-+#else
-+void __init sched_init_smp(void)
-+{
-+	cpu_rq(0)->idle->time_slice = sched_timeslice_ns;
-+}
-+#endif /* CONFIG_SMP */
-+
-+int in_sched_functions(unsigned long addr)
-+{
-+	return in_lock_functions(addr) ||
-+		(addr >= (unsigned long)__sched_text_start
-+		&& addr < (unsigned long)__sched_text_end);
-+}
-+
-+#ifdef CONFIG_CGROUP_SCHED
-+/* task group related information */
-+struct task_group {
-+	struct cgroup_subsys_state css;
-+
-+	struct rcu_head rcu;
-+	struct list_head list;
-+
-+	struct task_group *parent;
-+	struct list_head siblings;
-+	struct list_head children;
-+};
-+
-+/*
-+ * Default task group.
-+ * Every task in system belongs to this group at bootup.
-+ */
-+struct task_group root_task_group;
-+LIST_HEAD(task_groups);
-+
-+/* Cacheline aligned slab cache for task_group */
-+static struct kmem_cache *task_group_cache __read_mostly;
-+#endif /* CONFIG_CGROUP_SCHED */
-+
-+void __init sched_init(void)
-+{
-+	int i;
-+	struct rq *rq;
-+
-+	printk(KERN_INFO ALT_SCHED_VERSION_MSG);
-+
-+	wait_bit_init();
-+
-+#ifdef CONFIG_SMP
-+	for (i = 0; i < SCHED_BITS; i++)
-+		cpumask_copy(&sched_rq_watermark[i], cpu_present_mask);
-+#endif
-+
-+#ifdef CONFIG_CGROUP_SCHED
-+	task_group_cache = KMEM_CACHE(task_group, 0);
-+
-+	list_add(&root_task_group.list, &task_groups);
-+	INIT_LIST_HEAD(&root_task_group.children);
-+	INIT_LIST_HEAD(&root_task_group.siblings);
-+#endif /* CONFIG_CGROUP_SCHED */
-+	for_each_possible_cpu(i) {
-+		rq = cpu_rq(i);
-+
-+		sched_queue_init(rq);
-+		rq->watermark = IDLE_WM;
-+		rq->skip = NULL;
-+
-+		raw_spin_lock_init(&rq->lock);
-+		rq->nr_running = rq->nr_uninterruptible = 0;
-+		rq->calc_load_active = 0;
-+		rq->calc_load_update = jiffies + LOAD_FREQ;
-+#ifdef CONFIG_SMP
-+		rq->online = false;
-+		rq->cpu = i;
-+
-+#ifdef CONFIG_SCHED_SMT
-+		rq->active_balance = 0;
-+#endif
-+
-+#ifdef CONFIG_NO_HZ_COMMON
-+		rq_csd_init(rq, &rq->nohz_csd, nohz_csd_func);
-+#endif
-+#endif /* CONFIG_SMP */
-+		rq->nr_switches = 0;
-+
-+		hrtick_rq_init(rq);
-+		atomic_set(&rq->nr_iowait, 0);
-+	}
-+#ifdef CONFIG_SMP
-+	/* Set rq->online for cpu 0 */
-+	cpu_rq(0)->online = true;
-+#endif
-+	/*
-+	 * The boot idle thread does lazy MMU switching as well:
-+	 */
-+	mmgrab(&init_mm);
-+	enter_lazy_tlb(&init_mm, current);
-+
-+	/*
-+	 * Make us the idle thread. Technically, schedule() should not be
-+	 * called from this thread, however somewhere below it might be,
-+	 * but because we are the idle thread, we just pick up running again
-+	 * when this runqueue becomes "idle".
-+	 */
-+	init_idle(current, smp_processor_id());
-+
-+	calc_load_update = jiffies + LOAD_FREQ;
-+
-+#ifdef CONFIG_SMP
-+	idle_thread_set_boot_cpu();
-+
-+	sched_init_topology_cpumask_early();
-+#endif /* SMP */
-+
-+	init_schedstats();
-+
-+	psi_init();
-+}
-+
-+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-+static inline int preempt_count_equals(int preempt_offset)
-+{
-+	int nested = preempt_count() + rcu_preempt_depth();
-+
-+	return (nested == preempt_offset);
-+}
-+
-+void __might_sleep(const char *file, int line, int preempt_offset)
-+{
-+	/*
-+	 * Blocking primitives will set (and therefore destroy) current->state,
-+	 * since we will exit with TASK_RUNNING make sure we enter with it,
-+	 * otherwise we will destroy state.
-+	 */
-+	WARN_ONCE(current->state != TASK_RUNNING && current->task_state_change,
-+			"do not call blocking ops when !TASK_RUNNING; "
-+			"state=%lx set at [<%p>] %pS\n",
-+			current->state,
-+			(void *)current->task_state_change,
-+			(void *)current->task_state_change);
-+
-+	___might_sleep(file, line, preempt_offset);
-+}
-+EXPORT_SYMBOL(__might_sleep);
-+
-+void ___might_sleep(const char *file, int line, int preempt_offset)
-+{
-+	/* Ratelimiting timestamp: */
-+	static unsigned long prev_jiffy;
-+
-+	unsigned long preempt_disable_ip;
-+
-+	/* WARN_ON_ONCE() by default, no rate limit required: */
-+	rcu_sleep_check();
-+
-+	if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
-+	     !is_idle_task(current) && !current->non_block_count) ||
-+	    system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING ||
-+	    oops_in_progress)
-+		return;
-+	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
-+		return;
-+	prev_jiffy = jiffies;
-+
-+	/* Save this before calling printk(), since that will clobber it: */
-+	preempt_disable_ip = get_preempt_disable_ip(current);
-+
-+	printk(KERN_ERR
-+		"BUG: sleeping function called from invalid context at %s:%d\n",
-+			file, line);
-+	printk(KERN_ERR
-+		"in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n",
-+			in_atomic(), irqs_disabled(), current->non_block_count,
-+			current->pid, current->comm);
-+
-+	if (task_stack_end_corrupted(current))
-+		printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");
-+
-+	debug_show_held_locks(current);
-+	if (irqs_disabled())
-+		print_irqtrace_events(current);
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	if (!preempt_count_equals(preempt_offset)) {
-+		pr_err("Preemption disabled at:");
-+		print_ip_sym(KERN_ERR, preempt_disable_ip);
-+	}
-+#endif
-+	dump_stack();
-+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+}
-+EXPORT_SYMBOL(___might_sleep);
-+
-+void __cant_sleep(const char *file, int line, int preempt_offset)
-+{
-+	static unsigned long prev_jiffy;
-+
-+	if (irqs_disabled())
-+		return;
-+
-+	if (!IS_ENABLED(CONFIG_PREEMPT_COUNT))
-+		return;
-+
-+	if (preempt_count() > preempt_offset)
-+		return;
-+
-+	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
-+		return;
-+	prev_jiffy = jiffies;
-+
-+	printk(KERN_ERR "BUG: assuming atomic context at %s:%d\n", file, line);
-+	printk(KERN_ERR "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
-+			in_atomic(), irqs_disabled(),
-+			current->pid, current->comm);
-+
-+	debug_show_held_locks(current);
-+	dump_stack();
-+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+}
-+EXPORT_SYMBOL_GPL(__cant_sleep);
-+#endif
-+
-+#ifdef CONFIG_MAGIC_SYSRQ
-+void normalize_rt_tasks(void)
-+{
-+	struct task_struct *g, *p;
-+	struct sched_attr attr = {
-+		.sched_policy = SCHED_NORMAL,
-+	};
-+
-+	read_lock(&tasklist_lock);
-+	for_each_process_thread(g, p) {
-+		/*
-+		 * Only normalize user tasks:
-+		 */
-+		if (p->flags & PF_KTHREAD)
-+			continue;
-+
-+		if (!rt_task(p)) {
-+			/*
-+			 * Renice negative nice level userspace
-+			 * tasks back to 0:
-+			 */
-+			if (task_nice(p) < 0)
-+				set_user_nice(p, 0);
-+			continue;
-+		}
-+
-+		__sched_setscheduler(p, &attr, false, false);
-+	}
-+	read_unlock(&tasklist_lock);
-+}
-+#endif /* CONFIG_MAGIC_SYSRQ */
-+
-+#if defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB)
-+/*
-+ * These functions are only useful for the IA64 MCA handling, or kdb.
-+ *
-+ * They can only be called when the whole system has been
-+ * stopped - every CPU needs to be quiescent, and no scheduling
-+ * activity can take place. Using them for anything else would
-+ * be a serious bug, and as a result, they aren't even visible
-+ * under any other configuration.
-+ */
-+
-+/**
-+ * curr_task - return the current task for a given CPU.
-+ * @cpu: the processor in question.
-+ *
-+ * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
-+ *
-+ * Return: The current task for @cpu.
-+ */
-+struct task_struct *curr_task(int cpu)
-+{
-+	return cpu_curr(cpu);
-+}
-+
-+#endif /* defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB) */
-+
-+#ifdef CONFIG_IA64
-+/**
-+ * ia64_set_curr_task - set the current task for a given CPU.
-+ * @cpu: the processor in question.
-+ * @p: the task pointer to set.
-+ *
-+ * Description: This function must only be used when non-maskable interrupts
-+ * are serviced on a separate stack.  It allows the architecture to switch the
-+ * notion of the current task on a CPU in a non-blocking manner.  This function
-+ * must be called with all CPU's synchronised, and interrupts disabled, the
-+ * and caller must save the original value of the current task (see
-+ * curr_task() above) and restore that value before reenabling interrupts and
-+ * re-starting the system.
-+ *
-+ * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
-+ */
-+void ia64_set_curr_task(int cpu, struct task_struct *p)
-+{
-+	cpu_curr(cpu) = p;
-+}
-+
-+#endif
-+
-+#ifdef CONFIG_CGROUP_SCHED
-+static void sched_free_group(struct task_group *tg)
-+{
-+	kmem_cache_free(task_group_cache, tg);
-+}
-+
-+/* allocate runqueue etc for a new task group */
-+struct task_group *sched_create_group(struct task_group *parent)
-+{
-+	struct task_group *tg;
-+
-+	tg = kmem_cache_alloc(task_group_cache, GFP_KERNEL | __GFP_ZERO);
-+	if (!tg)
-+		return ERR_PTR(-ENOMEM);
-+
-+	return tg;
-+}
-+
-+void sched_online_group(struct task_group *tg, struct task_group *parent)
-+{
-+}
-+
-+/* rcu callback to free various structures associated with a task group */
-+static void sched_free_group_rcu(struct rcu_head *rhp)
-+{
-+	/* Now it should be safe to free those cfs_rqs */
-+	sched_free_group(container_of(rhp, struct task_group, rcu));
-+}
-+
-+void sched_destroy_group(struct task_group *tg)
-+{
-+	/* Wait for possible concurrent references to cfs_rqs complete */
-+	call_rcu(&tg->rcu, sched_free_group_rcu);
-+}
-+
-+void sched_offline_group(struct task_group *tg)
-+{
-+}
-+
-+static inline struct task_group *css_tg(struct cgroup_subsys_state *css)
-+{
-+	return css ? container_of(css, struct task_group, css) : NULL;
-+}
-+
-+static struct cgroup_subsys_state *
-+cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
-+{
-+	struct task_group *parent = css_tg(parent_css);
-+	struct task_group *tg;
-+
-+	if (!parent) {
-+		/* This is early initialization for the top cgroup */
-+		return &root_task_group.css;
-+	}
-+
-+	tg = sched_create_group(parent);
-+	if (IS_ERR(tg))
-+		return ERR_PTR(-ENOMEM);
-+	return &tg->css;
-+}
-+
-+/* Expose task group only after completing cgroup initialization */
-+static int cpu_cgroup_css_online(struct cgroup_subsys_state *css)
-+{
-+	struct task_group *tg = css_tg(css);
-+	struct task_group *parent = css_tg(css->parent);
-+
-+	if (parent)
-+		sched_online_group(tg, parent);
-+	return 0;
-+}
-+
-+static void cpu_cgroup_css_released(struct cgroup_subsys_state *css)
-+{
-+	struct task_group *tg = css_tg(css);
-+
-+	sched_offline_group(tg);
-+}
-+
-+static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
-+{
-+	struct task_group *tg = css_tg(css);
-+
-+	/*
-+	 * Relies on the RCU grace period between css_released() and this.
-+	 */
-+	sched_free_group(tg);
-+}
-+
-+static void cpu_cgroup_fork(struct task_struct *task)
-+{
-+}
-+
-+static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
-+{
-+	return 0;
-+}
-+
-+static void cpu_cgroup_attach(struct cgroup_taskset *tset)
-+{
-+}
-+
-+static struct cftype cpu_legacy_files[] = {
-+	{ }	/* Terminate */
-+};
-+
-+
-+static struct cftype cpu_files[] = {
-+	{ }	/* terminate */
-+};
-+
-+static int cpu_extra_stat_show(struct seq_file *sf,
-+			       struct cgroup_subsys_state *css)
-+{
-+	return 0;
-+}
-+
-+struct cgroup_subsys cpu_cgrp_subsys = {
-+	.css_alloc	= cpu_cgroup_css_alloc,
-+	.css_online	= cpu_cgroup_css_online,
-+	.css_released	= cpu_cgroup_css_released,
-+	.css_free	= cpu_cgroup_css_free,
-+	.css_extra_stat_show = cpu_extra_stat_show,
-+	.fork		= cpu_cgroup_fork,
-+	.can_attach	= cpu_cgroup_can_attach,
-+	.attach		= cpu_cgroup_attach,
-+	.legacy_cftypes	= cpu_files,
-+	.legacy_cftypes	= cpu_legacy_files,
-+	.dfl_cftypes	= cpu_files,
-+	.early_init	= true,
-+	.threaded	= true,
-+};
-+#endif	/* CONFIG_CGROUP_SCHED */
-+
-+#undef CREATE_TRACE_POINTS
-diff --git a/kernel/sched/alt_debug.c b/kernel/sched/alt_debug.c
-new file mode 100644
-index 000000000000..1212a031700e
---- /dev/null
-+++ b/kernel/sched/alt_debug.c
-@@ -0,0 +1,31 @@
-+/*
-+ * kernel/sched/alt_debug.c
-+ *
-+ * Print the alt scheduler debugging details
-+ *
-+ * Author: Alfred Chen
-+ * Date  : 2020
-+ */
-+#include "sched.h"
-+
-+/*
-+ * This allows printing both to /proc/sched_debug and
-+ * to the console
-+ */
-+#define SEQ_printf(m, x...)			\
-+ do {						\
-+	if (m)					\
-+		seq_printf(m, x);		\
-+	else					\
-+		pr_cont(x);			\
-+ } while (0)
-+
-+void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
-+			  struct seq_file *m)
-+{
-+	SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, task_pid_nr_ns(p, ns),
-+						get_nr_threads(p));
-+}
-+
-+void proc_sched_set_task(struct task_struct *p)
-+{}
-diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h
-new file mode 100644
-index 000000000000..99be2c51c88d
---- /dev/null
-+++ b/kernel/sched/alt_sched.h
-@@ -0,0 +1,555 @@
-+#ifndef ALT_SCHED_H
-+#define ALT_SCHED_H
-+
-+#include <linux/sched.h>
-+
-+#include <linux/sched/clock.h>
-+#include <linux/sched/cpufreq.h>
-+#include <linux/sched/cputime.h>
-+#include <linux/sched/debug.h>
-+#include <linux/sched/init.h>
-+#include <linux/sched/isolation.h>
-+#include <linux/sched/loadavg.h>
-+#include <linux/sched/mm.h>
-+#include <linux/sched/nohz.h>
-+#include <linux/sched/signal.h>
-+#include <linux/sched/stat.h>
-+#include <linux/sched/sysctl.h>
-+#include <linux/sched/task.h>
-+#include <linux/sched/topology.h>
-+#include <linux/sched/wake_q.h>
-+
-+#include <uapi/linux/sched/types.h>
-+
-+#include <linux/cgroup.h>
-+#include <linux/cpufreq.h>
-+#include <linux/cpuidle.h>
-+#include <linux/cpuset.h>
-+#include <linux/ctype.h>
-+#include <linux/kthread.h>
-+#include <linux/livepatch.h>
-+#include <linux/membarrier.h>
-+#include <linux/proc_fs.h>
-+#include <linux/psi.h>
-+#include <linux/slab.h>
-+#include <linux/stop_machine.h>
-+#include <linux/suspend.h>
-+#include <linux/swait.h>
-+#include <linux/syscalls.h>
-+#include <linux/tsacct_kern.h>
-+
-+#include <asm/tlb.h>
-+
-+#ifdef CONFIG_PARAVIRT
-+# include <asm/paravirt.h>
-+#endif
-+
-+#include "cpupri.h"
-+
-+#ifdef CONFIG_SCHED_BMQ
-+#include "bmq.h"
-+#endif
-+#ifdef CONFIG_SCHED_PDS
-+#include "pds.h"
-+#endif
-+
-+/* task_struct::on_rq states: */
-+#define TASK_ON_RQ_QUEUED	1
-+#define TASK_ON_RQ_MIGRATING	2
-+
-+static inline int task_on_rq_queued(struct task_struct *p)
-+{
-+	return p->on_rq == TASK_ON_RQ_QUEUED;
-+}
-+
-+static inline int task_on_rq_migrating(struct task_struct *p)
-+{
-+	return READ_ONCE(p->on_rq) == TASK_ON_RQ_MIGRATING;
-+}
-+
-+/*
-+ * wake flags
-+ */
-+#define WF_SYNC		0x01		/* waker goes to sleep after wakeup */
-+#define WF_FORK		0x02		/* child wakeup after fork */
-+#define WF_MIGRATED	0x04		/* internal use, task got migrated */
-+#define WF_ON_CPU	0x08		/* Wakee is on_rq */
-+
-+/*
-+ * This is the main, per-CPU runqueue data structure.
-+ * This data should only be modified by the local cpu.
-+ */
-+struct rq {
-+	/* runqueue lock: */
-+	raw_spinlock_t lock;
-+
-+	struct task_struct __rcu *curr;
-+	struct task_struct *idle, *stop, *skip;
-+	struct mm_struct *prev_mm;
-+
-+#ifdef CONFIG_SCHED_BMQ
-+	struct bmq queue;
-+#endif
-+#ifdef CONFIG_SCHED_PDS
-+	struct skiplist_node sl_header;
-+#endif
-+	unsigned long watermark;
-+
-+	/* switch count */
-+	u64 nr_switches;
-+
-+	atomic_t nr_iowait;
-+
-+#ifdef CONFIG_MEMBARRIER
-+	int membarrier_state;
-+#endif
-+
-+#ifdef CONFIG_SMP
-+	int cpu;		/* cpu of this runqueue */
-+	bool online;
-+
-+	unsigned int		ttwu_pending;
-+	unsigned char		nohz_idle_balance;
-+	unsigned char		idle_balance;
-+
-+#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
-+	struct sched_avg	avg_irq;
-+#endif
-+
-+#ifdef CONFIG_SCHED_SMT
-+	int active_balance;
-+	struct cpu_stop_work active_balance_work;
-+#endif
-+#endif /* CONFIG_SMP */
-+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-+	u64 prev_irq_time;
-+#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
-+#ifdef CONFIG_PARAVIRT
-+	u64 prev_steal_time;
-+#endif /* CONFIG_PARAVIRT */
-+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
-+	u64 prev_steal_time_rq;
-+#endif /* CONFIG_PARAVIRT_TIME_ACCOUNTING */
-+
-+	/* calc_load related fields */
-+	unsigned long calc_load_update;
-+	long calc_load_active;
-+
-+	u64 clock, last_tick;
-+	u64 last_ts_switch;
-+	u64 clock_task;
-+
-+	unsigned long nr_running;
-+	unsigned long nr_uninterruptible;
-+
-+#ifdef CONFIG_SCHED_HRTICK
-+#ifdef CONFIG_SMP
-+	call_single_data_t hrtick_csd;
-+#endif
-+	struct hrtimer hrtick_timer;
-+#endif
-+
-+#ifdef CONFIG_SCHEDSTATS
-+
-+	/* latency stats */
-+	struct sched_info rq_sched_info;
-+	unsigned long long rq_cpu_time;
-+	/* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
-+
-+	/* sys_sched_yield() stats */
-+	unsigned int yld_count;
-+
-+	/* schedule() stats */
-+	unsigned int sched_switch;
-+	unsigned int sched_count;
-+	unsigned int sched_goidle;
-+
-+	/* try_to_wake_up() stats */
-+	unsigned int ttwu_count;
-+	unsigned int ttwu_local;
-+#endif /* CONFIG_SCHEDSTATS */
-+
-+#ifdef CONFIG_CPU_IDLE
-+	/* Must be inspected within a rcu lock section */
-+	struct cpuidle_state *idle_state;
-+#endif
-+
-+#ifdef CONFIG_NO_HZ_COMMON
-+#ifdef CONFIG_SMP
-+	call_single_data_t	nohz_csd;
-+#endif
-+	atomic_t		nohz_flags;
-+#endif /* CONFIG_NO_HZ_COMMON */
-+};
-+
-+extern unsigned long calc_load_update;
-+extern atomic_long_t calc_load_tasks;
-+
-+extern void calc_global_load_tick(struct rq *this_rq);
-+extern long calc_load_fold_active(struct rq *this_rq, long adjust);
-+
-+DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
-+#define cpu_rq(cpu)		(&per_cpu(runqueues, (cpu)))
-+#define this_rq()		this_cpu_ptr(&runqueues)
-+#define task_rq(p)		cpu_rq(task_cpu(p))
-+#define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
-+#define raw_rq()		raw_cpu_ptr(&runqueues)
-+
-+#ifdef CONFIG_SMP
-+#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
-+void register_sched_domain_sysctl(void);
-+void unregister_sched_domain_sysctl(void);
-+#else
-+static inline void register_sched_domain_sysctl(void)
-+{
-+}
-+static inline void unregister_sched_domain_sysctl(void)
-+{
-+}
-+#endif
-+
-+extern bool sched_smp_initialized;
-+
-+enum {
-+	BASE_CPU_AFFINITY_CHK_LEVEL = 1,
-+#ifdef CONFIG_SCHED_SMT
-+	SMT_CPU_AFFINITY_CHK_LEVEL_SPACE_HOLDER,
-+#endif
-+#ifdef CONFIG_SCHED_MC
-+	MC_CPU_AFFINITY_CHK_LEVEL_SPACE_HOLDER,
-+#endif
-+	NR_CPU_AFFINITY_CHK_LEVEL
-+};
-+
-+DECLARE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_CHK_LEVEL], sched_cpu_affinity_masks);
-+
-+static inline int __best_mask_cpu(int cpu, const cpumask_t *cpumask,
-+				  const cpumask_t *mask)
-+{
-+	while ((cpu = cpumask_any_and(cpumask, mask)) >= nr_cpu_ids)
-+		mask++;
-+	return cpu;
-+}
-+
-+static inline int best_mask_cpu(int cpu, const cpumask_t *cpumask)
-+{
-+	return cpumask_test_cpu(cpu, cpumask)? cpu :
-+		__best_mask_cpu(cpu, cpumask, &(per_cpu(sched_cpu_affinity_masks, cpu)[0]));
-+}
-+
-+extern void flush_smp_call_function_from_idle(void);
-+
-+#else  /* !CONFIG_SMP */
-+static inline void flush_smp_call_function_from_idle(void) { }
-+#endif
-+
-+#ifndef arch_scale_freq_tick
-+static __always_inline
-+void arch_scale_freq_tick(void)
-+{
-+}
-+#endif
-+
-+#ifndef arch_scale_freq_capacity
-+static __always_inline
-+unsigned long arch_scale_freq_capacity(int cpu)
-+{
-+	return SCHED_CAPACITY_SCALE;
-+}
-+#endif
-+
-+static inline u64 __rq_clock_broken(struct rq *rq)
-+{
-+	return READ_ONCE(rq->clock);
-+}
-+
-+static inline u64 rq_clock(struct rq *rq)
-+{
-+	/*
-+	 * Relax lockdep_assert_held() checking as in VRQ, call to
-+	 * sched_info_xxxx() may not held rq->lock
-+	 * lockdep_assert_held(&rq->lock);
-+	 */
-+	return rq->clock;
-+}
-+
-+static inline u64 rq_clock_task(struct rq *rq)
-+{
-+	/*
-+	 * Relax lockdep_assert_held() checking as in VRQ, call to
-+	 * sched_info_xxxx() may not held rq->lock
-+	 * lockdep_assert_held(&rq->lock);
-+	 */
-+	return rq->clock_task;
-+}
-+
-+/*
-+ * {de,en}queue flags:
-+ *
-+ * DEQUEUE_SLEEP  - task is no longer runnable
-+ * ENQUEUE_WAKEUP - task just became runnable
-+ *
-+ */
-+
-+#define DEQUEUE_SLEEP		0x01
-+
-+#define ENQUEUE_WAKEUP		0x01
-+
-+
-+/*
-+ * Below are scheduler API which using in other kernel code
-+ * It use the dummy rq_flags
-+ * ToDo : BMQ need to support these APIs for compatibility with mainline
-+ * scheduler code.
-+ */
-+struct rq_flags {
-+	unsigned long flags;
-+};
-+
-+struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
-+	__acquires(rq->lock);
-+
-+struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
-+	__acquires(p->pi_lock)
-+	__acquires(rq->lock);
-+
-+static inline void __task_rq_unlock(struct rq *rq, struct rq_flags *rf)
-+	__releases(rq->lock)
-+{
-+	raw_spin_unlock(&rq->lock);
-+}
-+
-+static inline void
-+task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
-+	__releases(rq->lock)
-+	__releases(p->pi_lock)
-+{
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
-+}
-+
-+static inline void
-+rq_unlock_irq(struct rq *rq, struct rq_flags *rf)
-+	__releases(rq->lock)
-+{
-+	raw_spin_unlock_irq(&rq->lock);
-+}
-+
-+static inline struct rq *
-+this_rq_lock_irq(struct rq_flags *rf)
-+	__acquires(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	local_irq_disable();
-+	rq = this_rq();
-+	raw_spin_lock(&rq->lock);
-+
-+	return rq;
-+}
-+
-+static inline int task_current(struct rq *rq, struct task_struct *p)
-+{
-+	return rq->curr == p;
-+}
-+
-+static inline bool task_running(struct task_struct *p)
-+{
-+	return p->on_cpu;
-+}
-+
-+extern struct static_key_false sched_schedstats;
-+
-+#ifdef CONFIG_CPU_IDLE
-+static inline void idle_set_state(struct rq *rq,
-+				  struct cpuidle_state *idle_state)
-+{
-+	rq->idle_state = idle_state;
-+}
-+
-+static inline struct cpuidle_state *idle_get_state(struct rq *rq)
-+{
-+	WARN_ON(!rcu_read_lock_held());
-+	return rq->idle_state;
-+}
-+#else
-+static inline void idle_set_state(struct rq *rq,
-+				  struct cpuidle_state *idle_state)
-+{
-+}
-+
-+static inline struct cpuidle_state *idle_get_state(struct rq *rq)
-+{
-+	return NULL;
-+}
-+#endif
-+
-+static inline int cpu_of(const struct rq *rq)
-+{
-+#ifdef CONFIG_SMP
-+	return rq->cpu;
-+#else
-+	return 0;
-+#endif
-+}
-+
-+#include "stats.h"
-+
-+#ifdef CONFIG_NO_HZ_COMMON
-+#define NOHZ_BALANCE_KICK_BIT	0
-+#define NOHZ_STATS_KICK_BIT	1
-+
-+#define NOHZ_BALANCE_KICK	BIT(NOHZ_BALANCE_KICK_BIT)
-+#define NOHZ_STATS_KICK		BIT(NOHZ_STATS_KICK_BIT)
-+
-+#define NOHZ_KICK_MASK	(NOHZ_BALANCE_KICK | NOHZ_STATS_KICK)
-+
-+#define nohz_flags(cpu)	(&cpu_rq(cpu)->nohz_flags)
-+
-+/* TODO: needed?
-+extern void nohz_balance_exit_idle(struct rq *rq);
-+#else
-+static inline void nohz_balance_exit_idle(struct rq *rq) { }
-+*/
-+#endif
-+
-+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-+struct irqtime {
-+	u64			total;
-+	u64			tick_delta;
-+	u64			irq_start_time;
-+	struct u64_stats_sync	sync;
-+};
-+
-+DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
-+
-+/*
-+ * Returns the irqtime minus the softirq time computed by ksoftirqd.
-+ * Otherwise ksoftirqd's sum_exec_runtime is substracted its own runtime
-+ * and never move forward.
-+ */
-+static inline u64 irq_time_read(int cpu)
-+{
-+	struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu);
-+	unsigned int seq;
-+	u64 total;
-+
-+	do {
-+		seq = __u64_stats_fetch_begin(&irqtime->sync);
-+		total = irqtime->total;
-+	} while (__u64_stats_fetch_retry(&irqtime->sync, seq));
-+
-+	return total;
-+}
-+#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
-+
-+#ifdef CONFIG_CPU_FREQ
-+DECLARE_PER_CPU(struct update_util_data __rcu *, cpufreq_update_util_data);
-+
-+/**
-+ * cpufreq_update_util - Take a note about CPU utilization changes.
-+ * @rq: Runqueue to carry out the update for.
-+ * @flags: Update reason flags.
-+ *
-+ * This function is called by the scheduler on the CPU whose utilization is
-+ * being updated.
-+ *
-+ * It can only be called from RCU-sched read-side critical sections.
-+ *
-+ * The way cpufreq is currently arranged requires it to evaluate the CPU
-+ * performance state (frequency/voltage) on a regular basis to prevent it from
-+ * being stuck in a completely inadequate performance level for too long.
-+ * That is not guaranteed to happen if the updates are only triggered from CFS
-+ * and DL, though, because they may not be coming in if only RT tasks are
-+ * active all the time (or there are RT tasks only).
-+ *
-+ * As a workaround for that issue, this function is called periodically by the
-+ * RT sched class to trigger extra cpufreq updates to prevent it from stalling,
-+ * but that really is a band-aid.  Going forward it should be replaced with
-+ * solutions targeted more specifically at RT tasks.
-+ */
-+static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
-+{
-+	struct update_util_data *data;
-+
-+	data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data));
-+	if (data)
-+		data->func(data, rq_clock(rq), flags);
-+}
-+#else
-+static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
-+#endif /* CONFIG_CPU_FREQ */
-+
-+#ifdef CONFIG_NO_HZ_FULL
-+extern int __init sched_tick_offload_init(void);
-+#else
-+static inline int sched_tick_offload_init(void) { return 0; }
-+#endif
-+
-+#ifdef arch_scale_freq_capacity
-+#ifndef arch_scale_freq_invariant
-+#define arch_scale_freq_invariant()	(true)
-+#endif
-+#else /* arch_scale_freq_capacity */
-+#define arch_scale_freq_invariant()	(false)
-+#endif
-+
-+extern void schedule_idle(void);
-+
-+/*
-+ * !! For sched_setattr_nocheck() (kernel) only !!
-+ *
-+ * This is actually gross. :(
-+ *
-+ * It is used to make schedutil kworker(s) higher priority than SCHED_DEADLINE
-+ * tasks, but still be able to sleep. We need this on platforms that cannot
-+ * atomically change clock frequency. Remove once fast switching will be
-+ * available on such platforms.
-+ *
-+ * SUGOV stands for SchedUtil GOVernor.
-+ */
-+#define SCHED_FLAG_SUGOV	0x10000000
-+
-+#ifdef CONFIG_MEMBARRIER
-+/*
-+ * The scheduler provides memory barriers required by membarrier between:
-+ * - prior user-space memory accesses and store to rq->membarrier_state,
-+ * - store to rq->membarrier_state and following user-space memory accesses.
-+ * In the same way it provides those guarantees around store to rq->curr.
-+ */
-+static inline void membarrier_switch_mm(struct rq *rq,
-+					struct mm_struct *prev_mm,
-+					struct mm_struct *next_mm)
-+{
-+	int membarrier_state;
-+
-+	if (prev_mm == next_mm)
-+		return;
-+
-+	membarrier_state = atomic_read(&next_mm->membarrier_state);
-+	if (READ_ONCE(rq->membarrier_state) == membarrier_state)
-+		return;
-+
-+	WRITE_ONCE(rq->membarrier_state, membarrier_state);
-+}
-+#else
-+static inline void membarrier_switch_mm(struct rq *rq,
-+					struct mm_struct *prev_mm,
-+					struct mm_struct *next_mm)
-+{
-+}
-+#endif
-+
-+#ifdef CONFIG_NUMA
-+extern int sched_numa_find_closest(const struct cpumask *cpus, int cpu);
-+#else
-+static inline int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
-+{
-+	return nr_cpu_ids;
-+}
-+#endif
-+
-+void swake_up_all_locked(struct swait_queue_head *q);
-+void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
-+
-+#endif /* ALT_SCHED_H */
-diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h
-new file mode 100644
-index 000000000000..aff0bb30a884
---- /dev/null
-+++ b/kernel/sched/bmq.h
-@@ -0,0 +1,20 @@
-+#ifndef BMQ_H
-+#define BMQ_H
-+
-+/* bits:
-+ * RT(0-99), (Low prio adj range, nice width, high prio adj range) / 2, cpu idle task */
-+#define SCHED_BITS	(MAX_RT_PRIO + NICE_WIDTH / 2 + MAX_PRIORITY_ADJ + 1)
-+#define IDLE_TASK_SCHED_PRIO	(SCHED_BITS - 1)
-+
-+struct bmq {
-+	DECLARE_BITMAP(bitmap, SCHED_BITS);
-+	struct list_head heads[SCHED_BITS];
-+};
-+
-+
-+static inline int task_running_nice(struct task_struct *p)
-+{
-+	return (p->prio + p->boost_prio > DEFAULT_PRIO + MAX_PRIORITY_ADJ);
-+}
-+
-+#endif
-diff --git a/kernel/sched/bmq_imp.h b/kernel/sched/bmq_imp.h
-new file mode 100644
-index 000000000000..ad9a7c448da7
---- /dev/null
-+++ b/kernel/sched/bmq_imp.h
-@@ -0,0 +1,185 @@
-+#define ALT_SCHED_VERSION_MSG "sched/bmq: BMQ CPU Scheduler "ALT_SCHED_VERSION" by Alfred Chen.\n"
-+
-+/*
-+ * BMQ only routines
-+ */
-+#define rq_switch_time(rq)	((rq)->clock - (rq)->last_ts_switch)
-+#define boost_threshold(p)	(sched_timeslice_ns >>\
-+				 (15 - MAX_PRIORITY_ADJ -  (p)->boost_prio))
-+
-+static inline void boost_task(struct task_struct *p)
-+{
-+	int limit;
-+
-+	switch (p->policy) {
-+	case SCHED_NORMAL:
-+		limit = -MAX_PRIORITY_ADJ;
-+		break;
-+	case SCHED_BATCH:
-+	case SCHED_IDLE:
-+		limit = 0;
-+		break;
-+	default:
-+		return;
-+	}
-+
-+	if (p->boost_prio > limit)
-+		p->boost_prio--;
-+}
-+
-+static inline void deboost_task(struct task_struct *p)
-+{
-+	if (p->boost_prio < MAX_PRIORITY_ADJ)
-+		p->boost_prio++;
-+}
-+
-+/*
-+ * Common interfaces
-+ */
-+static inline int task_sched_prio(struct task_struct *p, struct rq *rq)
-+{
-+	return (p->prio < MAX_RT_PRIO)? p->prio : MAX_RT_PRIO / 2 + (p->prio + p->boost_prio) / 2;
-+}
-+
-+static inline void requeue_task(struct task_struct *p, struct rq *rq);
-+
-+static inline void time_slice_expired(struct task_struct *p, struct rq *rq)
-+{
-+	p->time_slice = sched_timeslice_ns;
-+
-+	if (SCHED_FIFO != p->policy && task_on_rq_queued(p)) {
-+		if (SCHED_RR != p->policy)
-+			deboost_task(p);
-+		requeue_task(p, rq);
-+	}
-+}
-+
-+static inline void update_task_priodl(struct task_struct *p) {}
-+
-+static inline unsigned long sched_queue_watermark(struct rq *rq)
-+{
-+	return find_first_bit(rq->queue.bitmap, SCHED_BITS);
-+}
-+
-+static inline void sched_queue_init(struct rq *rq)
-+{
-+	struct bmq *q = &rq->queue;
-+	int i;
-+
-+	bitmap_zero(q->bitmap, SCHED_BITS);
-+	for(i = 0; i < SCHED_BITS; i++)
-+		INIT_LIST_HEAD(&q->heads[i]);
-+}
-+
-+static inline void sched_queue_init_idle(struct rq *rq, struct task_struct *idle)
-+{
-+	struct bmq *q = &rq->queue;
-+
-+	idle->bmq_idx = IDLE_TASK_SCHED_PRIO;
-+	INIT_LIST_HEAD(&q->heads[idle->bmq_idx]);
-+	list_add(&idle->bmq_node, &q->heads[idle->bmq_idx]);
-+	set_bit(idle->bmq_idx, q->bitmap);
-+}
-+
-+/*
-+ * This routine used in bmq scheduler only which assume the idle task in the bmq
-+ */
-+static inline struct task_struct *sched_rq_first_task(struct rq *rq)
-+{
-+	unsigned long idx = find_first_bit(rq->queue.bitmap, SCHED_BITS);
-+	const struct list_head *head = &rq->queue.heads[idx];
-+
-+	return list_first_entry(head, struct task_struct, bmq_node);
-+}
-+
-+static inline struct task_struct *
-+sched_rq_next_task(struct task_struct *p, struct rq *rq)
-+{
-+	unsigned long idx = p->bmq_idx;
-+	struct list_head *head = &rq->queue.heads[idx];
-+
-+	if (list_is_last(&p->bmq_node, head)) {
-+		idx = find_next_bit(rq->queue.bitmap, SCHED_BITS, idx + 1);
-+		head = &rq->queue.heads[idx];
-+
-+		return list_first_entry(head, struct task_struct, bmq_node);
-+	}
-+
-+	return list_next_entry(p, bmq_node);
-+}
-+
-+#define __SCHED_DEQUEUE_TASK(p, rq, flags, func)	\
-+	psi_dequeue(p, flags & DEQUEUE_SLEEP);		\
-+	sched_info_dequeued(rq, p);			\
-+							\
-+	list_del(&p->bmq_node);				\
-+	if (list_empty(&rq->queue.heads[p->bmq_idx])) {	\
-+		clear_bit(p->bmq_idx, rq->queue.bitmap);\
-+		func;					\
-+	}
-+
-+#define __SCHED_ENQUEUE_TASK(p, rq, flags)				\
-+	sched_info_queued(rq, p);					\
-+	psi_enqueue(p, flags);						\
-+									\
-+	p->bmq_idx = task_sched_prio(p, rq);				\
-+	list_add_tail(&p->bmq_node, &rq->queue.heads[p->bmq_idx]);	\
-+	set_bit(p->bmq_idx, rq->queue.bitmap)
-+
-+#define __SCHED_REQUEUE_TASK(p, rq, func)				\
-+{									\
-+	int idx = task_sched_prio(p, rq);				\
-+\
-+	list_del(&p->bmq_node);						\
-+	list_add_tail(&p->bmq_node, &rq->queue.heads[idx]);		\
-+	if (idx != p->bmq_idx) {					\
-+		if (list_empty(&rq->queue.heads[p->bmq_idx]))		\
-+			clear_bit(p->bmq_idx, rq->queue.bitmap);	\
-+		p->bmq_idx = idx;					\
-+		set_bit(p->bmq_idx, rq->queue.bitmap);			\
-+		func;							\
-+	}								\
-+}
-+
-+static inline bool sched_task_need_requeue(struct task_struct *p, struct rq *rq)
-+{
-+	return (task_sched_prio(p, rq) != p->bmq_idx);
-+}
-+
-+static void sched_task_fork(struct task_struct *p, struct rq *rq)
-+{
-+	p->boost_prio = (p->boost_prio < 0) ?
-+		p->boost_prio + MAX_PRIORITY_ADJ : MAX_PRIORITY_ADJ;
-+}
-+
-+/**
-+ * task_prio - return the priority value of a given task.
-+ * @p: the task in question.
-+ *
-+ * Return: The priority value as seen by users in /proc.
-+ * RT tasks are offset by -100. Normal tasks are centered around 1, value goes
-+ * from 0(SCHED_ISO) up to 82 (nice +19 SCHED_IDLE).
-+ */
-+int task_prio(const struct task_struct *p)
-+{
-+	if (p->prio < MAX_RT_PRIO)
-+		return (p->prio - MAX_RT_PRIO);
-+	return (p->prio - MAX_RT_PRIO + p->boost_prio);
-+}
-+
-+static void do_sched_yield_type_1(struct task_struct *p, struct rq *rq)
-+{
-+	p->boost_prio = MAX_PRIORITY_ADJ;
-+}
-+
-+static void sched_task_ttwu(struct task_struct *p)
-+{
-+	if(this_rq()->clock_task - p->last_ran > sched_timeslice_ns)
-+		boost_task(p);
-+}
-+
-+static void sched_task_deactivate(struct task_struct *p, struct rq *rq)
-+{
-+	if (rq_switch_time(rq) < boost_threshold(p))
-+		boost_task(p);
-+}
-diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
-index e39008242cf4..5963716fe391 100644
---- a/kernel/sched/cpufreq_schedutil.c
-+++ b/kernel/sched/cpufreq_schedutil.c
-@@ -183,6 +183,7 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy,
- 	return cpufreq_driver_resolve_freq(policy, freq);
- }
- 
-+#ifndef CONFIG_SCHED_ALT
- /*
-  * This function computes an effective utilization for the given CPU, to be
-  * used for frequency selection given the linear relation: f = u * f_max.
-@@ -300,6 +301,13 @@ static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu)
- 
- 	return schedutil_cpu_util(sg_cpu->cpu, util, max, FREQUENCY_UTIL, NULL);
- }
-+#else /* CONFIG_SCHED_ALT */
-+static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu)
-+{
-+	sg_cpu->max = arch_scale_cpu_capacity(sg_cpu->cpu);
-+	return sg_cpu->max;
-+}
-+#endif
- 
- /**
-  * sugov_iowait_reset() - Reset the IO boost status of a CPU.
-@@ -443,7 +451,9 @@ static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; }
-  */
- static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu, struct sugov_policy *sg_policy)
- {
-+#ifndef CONFIG_SCHED_ALT
- 	if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_dl)
-+#endif
- 		sg_policy->limits_changed = true;
- }
- 
-@@ -686,6 +696,7 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy)
- 	}
- 
- 	ret = sched_setattr_nocheck(thread, &attr);
-+
- 	if (ret) {
- 		kthread_stop(thread);
- 		pr_warn("%s: failed to set SCHED_DEADLINE\n", __func__);
-@@ -912,6 +923,7 @@ struct cpufreq_governor *cpufreq_default_governor(void)
- cpufreq_governor_init(schedutil_gov);
- 
- #ifdef CONFIG_ENERGY_MODEL
-+#ifndef CONFIG_SCHED_ALT
- extern bool sched_energy_update;
- extern struct mutex sched_energy_mutex;
- 
-@@ -942,4 +954,10 @@ void sched_cpufreq_governor_change(struct cpufreq_policy *policy,
- 	}
- 
- }
-+#else /* CONFIG_SCHED_ALT */
-+void sched_cpufreq_governor_change(struct cpufreq_policy *policy,
-+				  struct cpufreq_governor *old_gov)
-+{
-+}
-+#endif
- #endif
-diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
-index 5a55d2300452..66a0ab7165f0 100644
---- a/kernel/sched/cputime.c
-+++ b/kernel/sched/cputime.c
-@@ -122,7 +122,7 @@ void account_user_time(struct task_struct *p, u64 cputime)
- 	p->utime += cputime;
- 	account_group_user_time(p, cputime);
- 
--	index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
-+	index = task_running_nice(p) ? CPUTIME_NICE : CPUTIME_USER;
- 
- 	/* Add user time to cpustat. */
- 	task_group_account_field(p, index, cputime);
-@@ -146,7 +146,7 @@ void account_guest_time(struct task_struct *p, u64 cputime)
- 	p->gtime += cputime;
- 
- 	/* Add guest time to cpustat. */
--	if (task_nice(p) > 0) {
-+	if (task_running_nice(p)) {
- 		cpustat[CPUTIME_NICE] += cputime;
- 		cpustat[CPUTIME_GUEST_NICE] += cputime;
- 	} else {
-@@ -269,7 +269,7 @@ static inline u64 account_other_time(u64 max)
- #ifdef CONFIG_64BIT
- static inline u64 read_sum_exec_runtime(struct task_struct *t)
- {
--	return t->se.sum_exec_runtime;
-+	return tsk_seruntime(t);
- }
- #else
- static u64 read_sum_exec_runtime(struct task_struct *t)
-@@ -279,7 +279,7 @@ static u64 read_sum_exec_runtime(struct task_struct *t)
- 	struct rq *rq;
- 
- 	rq = task_rq_lock(t, &rf);
--	ns = t->se.sum_exec_runtime;
-+	ns = tsk_seruntime(t);
- 	task_rq_unlock(rq, t, &rf);
- 
- 	return ns;
-@@ -614,7 +614,7 @@ void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev,
- void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
- {
- 	struct task_cputime cputime = {
--		.sum_exec_runtime = p->se.sum_exec_runtime,
-+		.sum_exec_runtime = tsk_seruntime(p),
- 	};
- 
- 	task_cputime(p, &cputime.utime, &cputime.stime);
-diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
-index f324dc36fc43..a6b566bda65b 100644
---- a/kernel/sched/idle.c
-+++ b/kernel/sched/idle.c
-@@ -369,6 +369,7 @@ void cpu_startup_entry(enum cpuhp_state state)
- 		do_idle();
- }
- 
-+#ifndef CONFIG_SCHED_ALT
- /*
-  * idle-task scheduling class.
-  */
-@@ -482,3 +483,4 @@ const struct sched_class idle_sched_class
- 	.switched_to		= switched_to_idle,
- 	.update_curr		= update_curr_idle,
- };
-+#endif
-diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h
-new file mode 100644
-index 000000000000..7fdeace7e8a5
---- /dev/null
-+++ b/kernel/sched/pds.h
-@@ -0,0 +1,14 @@
-+#ifndef PDS_H
-+#define PDS_H
-+
-+/* bits:
-+ * RT(0-99), (Low prio adj range, nice width, high prio adj range) / 2, cpu idle task */
-+#define SCHED_BITS	(MAX_RT_PRIO + 20 + 1)
-+#define IDLE_TASK_SCHED_PRIO	(SCHED_BITS - 1)
-+
-+static inline int task_running_nice(struct task_struct *p)
-+{
-+	return (p->prio > DEFAULT_PRIO);
-+}
-+
-+#endif
-diff --git a/kernel/sched/pds_imp.h b/kernel/sched/pds_imp.h
-new file mode 100644
-index 000000000000..6baee5e961b9
---- /dev/null
-+++ b/kernel/sched/pds_imp.h
-@@ -0,0 +1,257 @@
-+#define ALT_SCHED_VERSION_MSG "sched/pds: PDS CPU Scheduler "ALT_SCHED_VERSION" by Alfred Chen.\n"
-+
-+static const u64 user_prio2deadline[NICE_WIDTH] = {
-+/* -20 */	  4194304,   4613734,   5075107,   5582617,   6140878,
-+/* -15 */	  6754965,   7430461,   8173507,   8990857,   9889942,
-+/* -10 */	 10878936,  11966829,  13163511,  14479862,  15927848,
-+/*  -5 */	 17520632,  19272695,  21199964,  23319960,  25651956,
-+/*   0 */	 28217151,  31038866,  34142752,  37557027,  41312729,
-+/*   5 */	 45444001,  49988401,  54987241,  60485965,  66534561,
-+/*  10 */	 73188017,  80506818,  88557499,  97413248, 107154572,
-+/*  15 */	117870029, 129657031, 142622734, 156885007, 172573507
-+};
-+
-+static const unsigned char dl_level_map[] = {
-+/*       0               4               8              12           */
-+	19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18,
-+/*      16              20              24              28           */
-+	18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 17, 17, 17, 17, 17,
-+/*      32              36              40              44           */
-+	17, 17, 17, 17, 16, 16, 16, 16, 16, 16, 16, 16, 15, 15, 15, 15,
-+/*      48              52              56              60           */
-+	15, 15, 15, 14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 12, 12, 12,
-+/*      64              68              72              76           */
-+	12, 11, 11, 11, 10, 10, 10,  9,  9,  8,  7,  6,  5,  4,  3,  2,
-+/*      80              84              88              92           */
-+	 1,  0
-+};
-+
-+static inline int
-+task_sched_prio(const struct task_struct *p, const struct rq *rq)
-+{
-+	size_t delta;
-+
-+	if (p == rq->idle)
-+		return IDLE_TASK_SCHED_PRIO;
-+
-+	if (p->prio < MAX_RT_PRIO)
-+		return p->prio;
-+
-+	delta = (rq->clock + user_prio2deadline[39] - p->deadline) >> 21;
-+	delta = min((size_t)delta, ARRAY_SIZE(dl_level_map) - 1);
-+
-+	return MAX_RT_PRIO + dl_level_map[delta];
-+}
-+
-+static inline void update_task_priodl(struct task_struct *p)
-+{
-+	p->priodl = (((u64) (p->prio))<<56) | ((p->deadline)>>8);
-+}
-+
-+static inline void requeue_task(struct task_struct *p, struct rq *rq);
-+
-+static inline void time_slice_expired(struct task_struct *p, struct rq *rq)
-+{
-+	/*printk(KERN_INFO "sched: time_slice_expired(%d) - %px\n", cpu_of(rq), p);*/
-+	p->time_slice = sched_timeslice_ns;
-+
-+	if (p->prio >= MAX_RT_PRIO)
-+		p->deadline = rq->clock + user_prio2deadline[TASK_USER_PRIO(p)];
-+	update_task_priodl(p);
-+
-+	if (SCHED_FIFO != p->policy && task_on_rq_queued(p))
-+		requeue_task(p, rq);
-+}
-+
-+/*
-+ * pds_skiplist_task_search -- search function used in PDS run queue skip list
-+ * node insert operation.
-+ * @it: iterator pointer to the node in the skip list
-+ * @node: pointer to the skiplist_node to be inserted
-+ *
-+ * Returns true if key of @it is less or equal to key value of @node, otherwise
-+ * false.
-+ */
-+static inline bool
-+pds_skiplist_task_search(struct skiplist_node *it, struct skiplist_node *node)
-+{
-+	return (skiplist_entry(it, struct task_struct, sl_node)->priodl <=
-+		skiplist_entry(node, struct task_struct, sl_node)->priodl);
-+}
-+
-+/*
-+ * Define the skip list insert function for PDS
-+ */
-+DEFINE_SKIPLIST_INSERT_FUNC(pds_skiplist_insert, pds_skiplist_task_search);
-+
-+/*
-+ * Init the queue structure in rq
-+ */
-+static inline void sched_queue_init(struct rq *rq)
-+{
-+	FULL_INIT_SKIPLIST_NODE(&rq->sl_header);
-+}
-+
-+/*
-+ * Init idle task and put into queue structure of rq
-+ * IMPORTANT: may be called multiple times for a single cpu
-+ */
-+static inline void sched_queue_init_idle(struct rq *rq, struct task_struct *idle)
-+{
-+	/*printk(KERN_INFO "sched: init(%d) - %px\n", cpu_of(rq), idle);*/
-+	int default_prio = idle->prio;
-+
-+	idle->prio = MAX_PRIO;
-+	idle->deadline = 0ULL;
-+	update_task_priodl(idle);
-+
-+	FULL_INIT_SKIPLIST_NODE(&rq->sl_header);
-+
-+	idle->sl_node.level = idle->sl_level;
-+	pds_skiplist_insert(&rq->sl_header, &idle->sl_node);
-+
-+	idle->prio = default_prio;
-+}
-+
-+/*
-+ * This routine assume that the idle task always in queue
-+ */
-+static inline struct task_struct *sched_rq_first_task(struct rq *rq)
-+{
-+	struct skiplist_node *node = rq->sl_header.next[0];
-+
-+	BUG_ON(node == &rq->sl_header);
-+	return skiplist_entry(node, struct task_struct, sl_node);
-+}
-+
-+static inline struct task_struct *
-+sched_rq_next_task(struct task_struct *p, struct rq *rq)
-+{
-+	struct skiplist_node *next = p->sl_node.next[0];
-+
-+	BUG_ON(next == &rq->sl_header);
-+	return skiplist_entry(next, struct task_struct, sl_node);
-+}
-+
-+static inline unsigned long sched_queue_watermark(struct rq *rq)
-+{
-+	return task_sched_prio(sched_rq_first_task(rq), rq);
-+}
-+
-+#define __SCHED_DEQUEUE_TASK(p, rq, flags, func)		\
-+	psi_dequeue(p, flags & DEQUEUE_SLEEP);			\
-+	sched_info_dequeued(rq, p);				\
-+								\
-+	if (skiplist_del_init(&rq->sl_header, &p->sl_node)) {	\
-+		func;						\
-+	}
-+
-+#define __SCHED_ENQUEUE_TASK(p, rq, flags)				\
-+	sched_info_queued(rq, p);					\
-+	psi_enqueue(p, flags);						\
-+									\
-+	p->sl_node.level = p->sl_level;					\
-+	pds_skiplist_insert(&rq->sl_header, &p->sl_node)
-+
-+/*
-+ * Requeue a task @p to @rq
-+ */
-+#define __SCHED_REQUEUE_TASK(p, rq, func)					\
-+{\
-+	bool b_first = skiplist_del_init(&rq->sl_header, &p->sl_node);		\
-+\
-+	p->sl_node.level = p->sl_level;						\
-+	if (pds_skiplist_insert(&rq->sl_header, &p->sl_node) || b_first) {	\
-+		func;								\
-+	}									\
-+}
-+
-+static inline bool sched_task_need_requeue(struct task_struct *p, struct rq *rq)
-+{
-+	struct skiplist_node *node = p->sl_node.prev[0];
-+
-+	if (node != &rq->sl_header) {
-+		struct task_struct *t = skiplist_entry(node, struct task_struct, sl_node);
-+
-+		if (t->priodl > p->priodl)
-+			return true;
-+	}
-+
-+	node = p->sl_node.next[0];
-+	if (node != &rq->sl_header) {
-+		struct task_struct *t = skiplist_entry(node, struct task_struct, sl_node);
-+
-+		if (t->priodl < p->priodl)
-+			return true;
-+	}
-+
-+	return false;
-+}
-+
-+/*
-+ * pds_skiplist_random_level -- Returns a pseudo-random level number for skip
-+ * list node which is used in PDS run queue.
-+ *
-+ * In current implementation, based on testing, the first 8 bits in microseconds
-+ * of niffies are suitable for random level population.
-+ * find_first_bit() is used to satisfy p = 0.5 between each levels, and there
-+ * should be platform hardware supported instruction(known as ctz/clz) to speed
-+ * up this function.
-+ * The skiplist level for a task is populated when task is created and doesn't
-+ * change in task's life time. When task is being inserted into run queue, this
-+ * skiplist level is set to task's sl_node->level, the skiplist insert function
-+ * may change it based on current level of the skip lsit.
-+ */
-+static inline int pds_skiplist_random_level(const struct task_struct *p)
-+{
-+	long unsigned int randseed;
-+
-+	/*
-+	 * 1. Some architectures don't have better than microsecond resolution
-+	 * so mask out ~microseconds as a factor of the random seed for skiplist
-+	 * insertion.
-+	 * 2. Use address of task structure pointer as another factor of the
-+	 * random seed for task burst forking scenario.
-+	 */
-+	randseed = (task_rq(p)->clock ^ (long unsigned int)p) >> 10;
-+
-+	return find_first_bit(&randseed, NUM_SKIPLIST_LEVEL - 1);
-+}
-+
-+static void sched_task_fork(struct task_struct *p, struct rq *rq)
-+{
-+	p->sl_level = pds_skiplist_random_level(p);
-+	if (p->prio >= MAX_RT_PRIO)
-+		p->deadline = rq->clock + user_prio2deadline[TASK_USER_PRIO(p)];
-+	update_task_priodl(p);
-+}
-+
-+/**
-+ * task_prio - return the priority value of a given task.
-+ * @p: the task in question.
-+ *
-+ * Return: The priority value as seen by users in /proc.
-+ * RT tasks are offset by -100. Normal tasks are centered around 1, value goes
-+ * from 0(SCHED_ISO) up to 82 (nice +19 SCHED_IDLE).
-+ */
-+int task_prio(const struct task_struct *p)
-+{
-+	int ret;
-+
-+	if (p->prio < MAX_RT_PRIO)
-+		return (p->prio - MAX_RT_PRIO);
-+
-+	preempt_disable();
-+	ret = task_sched_prio(p, this_rq()) - MAX_RT_PRIO;
-+	preempt_enable();
-+
-+	return ret;
-+}
-+
-+static void do_sched_yield_type_1(struct task_struct *p, struct rq *rq)
-+{
-+	time_slice_expired(p, rq);
-+}
-+
-+static void sched_task_ttwu(struct task_struct *p) {}
-+static void sched_task_deactivate(struct task_struct *p, struct rq *rq) {}
-diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c
-index 2c613e1cff3a..0103b2a7201d 100644
---- a/kernel/sched/pelt.c
-+++ b/kernel/sched/pelt.c
-@@ -270,6 +270,7 @@ ___update_load_avg(struct sched_avg *sa, unsigned long load)
- 	WRITE_ONCE(sa->util_avg, sa->util_sum / divider);
- }
- 
-+#ifndef CONFIG_SCHED_ALT
- /*
-  * sched_entity:
-  *
-@@ -387,8 +388,9 @@ int update_dl_rq_load_avg(u64 now, struct rq *rq, int running)
- 
- 	return 0;
- }
-+#endif
- 
--#ifdef CONFIG_SCHED_THERMAL_PRESSURE
-+#if defined(CONFIG_SCHED_THERMAL_PRESSURE) && !defined(CONFIG_SCHED_ALT)
- /*
-  * thermal:
-  *
-diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h
-index 795e43e02afc..856163dac896 100644
---- a/kernel/sched/pelt.h
-+++ b/kernel/sched/pelt.h
-@@ -1,13 +1,15 @@
- #ifdef CONFIG_SMP
- #include "sched-pelt.h"
- 
-+#ifndef CONFIG_SCHED_ALT
- int __update_load_avg_blocked_se(u64 now, struct sched_entity *se);
- int __update_load_avg_se(u64 now, struct cfs_rq *cfs_rq, struct sched_entity *se);
- int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq);
- int update_rt_rq_load_avg(u64 now, struct rq *rq, int running);
- int update_dl_rq_load_avg(u64 now, struct rq *rq, int running);
-+#endif
- 
--#ifdef CONFIG_SCHED_THERMAL_PRESSURE
-+#if defined(CONFIG_SCHED_THERMAL_PRESSURE) && !defined(CONFIG_SCHED_ALT)
- int update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity);
- 
- static inline u64 thermal_load_avg(struct rq *rq)
-@@ -42,6 +44,7 @@ static inline u32 get_pelt_divider(struct sched_avg *avg)
- 	return LOAD_AVG_MAX - 1024 + avg->period_contrib;
- }
- 
-+#ifndef CONFIG_SCHED_ALT
- /*
-  * When a task is dequeued, its estimated utilization should not be update if
-  * its util_avg has not been updated at least once.
-@@ -162,9 +165,11 @@ static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq)
- 	return rq_clock_pelt(rq_of(cfs_rq));
- }
- #endif
-+#endif /* CONFIG_SCHED_ALT */
- 
- #else
- 
-+#ifndef CONFIG_SCHED_ALT
- static inline int
- update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
- {
-@@ -182,6 +187,7 @@ update_dl_rq_load_avg(u64 now, struct rq *rq, int running)
- {
- 	return 0;
- }
-+#endif
- 
- static inline int
- update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity)
-diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
-index 28709f6b0975..6bc68bacbac8 100644
---- a/kernel/sched/sched.h
-+++ b/kernel/sched/sched.h
-@@ -2,6 +2,10 @@
- /*
-  * Scheduler internal types and methods:
-  */
-+#ifdef CONFIG_SCHED_ALT
-+#include "alt_sched.h"
-+#else
-+
- #include <linux/sched.h>
- 
- #include <linux/sched/autogroup.h>
-@@ -2626,3 +2630,9 @@ static inline bool is_per_cpu_kthread(struct task_struct *p)
- 
- void swake_up_all_locked(struct swait_queue_head *q);
- void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
-+
-+static inline int task_running_nice(struct task_struct *p)
-+{
-+	return (task_nice(p) > 0);
-+}
-+#endif /* !CONFIG_SCHED_ALT */
-diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c
-index 750fb3c67eed..108422ebc7bf 100644
---- a/kernel/sched/stats.c
-+++ b/kernel/sched/stats.c
-@@ -22,8 +22,10 @@ static int show_schedstat(struct seq_file *seq, void *v)
- 	} else {
- 		struct rq *rq;
- #ifdef CONFIG_SMP
-+#ifndef CONFIG_SCHED_ALT
- 		struct sched_domain *sd;
- 		int dcount = 0;
-+#endif
- #endif
- 		cpu = (unsigned long)(v - 2);
- 		rq = cpu_rq(cpu);
-@@ -40,6 +42,7 @@ static int show_schedstat(struct seq_file *seq, void *v)
- 		seq_printf(seq, "\n");
- 
- #ifdef CONFIG_SMP
-+#ifndef CONFIG_SCHED_ALT
- 		/* domain-specific stats */
- 		rcu_read_lock();
- 		for_each_domain(cpu, sd) {
-@@ -68,6 +71,7 @@ static int show_schedstat(struct seq_file *seq, void *v)
- 			    sd->ttwu_move_balance);
- 		}
- 		rcu_read_unlock();
-+#endif
- #endif
- 	}
- 	return 0;
-diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
-index 1bd7e3af904f..cc946a9bd550 100644
---- a/kernel/sched/topology.c
-+++ b/kernel/sched/topology.c
-@@ -4,6 +4,7 @@
-  */
- #include "sched.h"
- 
-+#ifndef CONFIG_SCHED_ALT
- DEFINE_MUTEX(sched_domains_mutex);
- 
- /* Protected by sched_domains_mutex: */
-@@ -1180,8 +1181,10 @@ static void init_sched_groups_capacity(int cpu, struct sched_domain *sd)
-  */
- 
- static int default_relax_domain_level = -1;
-+#endif /* CONFIG_SCHED_ALT */
- int sched_domain_level_max;
- 
-+#ifndef CONFIG_SCHED_ALT
- static int __init setup_relax_domain_level(char *str)
- {
- 	if (kstrtoint(str, 0, &default_relax_domain_level))
-@@ -1413,6 +1416,7 @@ sd_init(struct sched_domain_topology_level *tl,
- 
- 	return sd;
- }
-+#endif /* CONFIG_SCHED_ALT */
- 
- /*
-  * Topology list, bottom-up.
-@@ -1442,6 +1446,7 @@ void set_sched_topology(struct sched_domain_topology_level *tl)
- 	sched_domain_topology = tl;
- }
- 
-+#ifndef CONFIG_SCHED_ALT
- #ifdef CONFIG_NUMA
- 
- static const struct cpumask *sd_numa_mask(int cpu)
-@@ -2316,3 +2321,17 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
- 	partition_sched_domains_locked(ndoms_new, doms_new, dattr_new);
- 	mutex_unlock(&sched_domains_mutex);
- }
-+#else /* CONFIG_SCHED_ALT */
-+void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
-+			     struct sched_domain_attr *dattr_new)
-+{}
-+
-+#ifdef CONFIG_NUMA
-+int __read_mostly		node_reclaim_distance = RECLAIM_DISTANCE;
-+
-+int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
-+{
-+	return best_mask_cpu(cpu, cpus);
-+}
-+#endif /* CONFIG_NUMA */
-+#endif
-diff --git a/kernel/sysctl.c b/kernel/sysctl.c
-index afad085960b8..e91b4cb3042b 100644
---- a/kernel/sysctl.c
-+++ b/kernel/sysctl.c
-@@ -120,6 +120,10 @@ static unsigned long long_max = LONG_MAX;
- static int one_hundred = 100;
- static int two_hundred = 200;
- static int one_thousand = 1000;
-+#ifdef CONFIG_SCHED_ALT
-+static int __maybe_unused zero = 0;
-+extern int sched_yield_type;
-+#endif
- #ifdef CONFIG_PRINTK
- static int ten_thousand = 10000;
- #endif
-@@ -184,7 +188,7 @@ static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
- int sysctl_legacy_va_layout;
- #endif
- 
--#ifdef CONFIG_SCHED_DEBUG
-+#if defined(CONFIG_SCHED_DEBUG) && !defined(CONFIG_SCHED_ALT)
- static int min_sched_granularity_ns = 100000;		/* 100 usecs */
- static int max_sched_granularity_ns = NSEC_PER_SEC;	/* 1 second */
- static int min_wakeup_granularity_ns;			/* 0 usecs */
-@@ -1652,6 +1656,7 @@ int proc_do_static_key(struct ctl_table *table, int write,
- }
- 
- static struct ctl_table kern_table[] = {
-+#ifndef CONFIG_SCHED_ALT
- 	{
- 		.procname	= "sched_child_runs_first",
- 		.data		= &sysctl_sched_child_runs_first,
-@@ -1854,6 +1859,7 @@ static struct ctl_table kern_table[] = {
- 		.extra2		= SYSCTL_ONE,
- 	},
- #endif
-+#endif /* !CONFIG_SCHED_ALT */
- #ifdef CONFIG_PROVE_LOCKING
- 	{
- 		.procname	= "prove_locking",
-@@ -2430,6 +2436,17 @@ static struct ctl_table kern_table[] = {
- 		.proc_handler	= proc_dointvec,
- 	},
- #endif
-+#ifdef CONFIG_SCHED_ALT
-+	{
-+		.procname	= "yield_type",
-+		.data		= &sched_yield_type,
-+		.maxlen		= sizeof (int),
-+		.mode		= 0644,
-+		.proc_handler	= &proc_dointvec_minmax,
-+		.extra1		= &zero,
-+		.extra2		= &two,
-+	},
-+#endif
- #if defined(CONFIG_S390) && defined(CONFIG_SMP)
- 	{
- 		.procname	= "spin_retry",
-diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
-index 95b6a708b040..81f2ee62c807 100644
---- a/kernel/time/hrtimer.c
-+++ b/kernel/time/hrtimer.c
-@@ -1927,8 +1927,10 @@ long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode,
- 	int ret = 0;
- 	u64 slack;
- 
-+#ifndef CONFIG_SCHED_ALT
- 	slack = current->timer_slack_ns;
- 	if (dl_task(current) || rt_task(current))
-+#endif
- 		slack = 0;
- 
- 	hrtimer_init_sleeper_on_stack(&t, clockid, mode);
-diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
-index a71758e34e45..d20c347df861 100644
---- a/kernel/time/posix-cpu-timers.c
-+++ b/kernel/time/posix-cpu-timers.c
-@@ -216,7 +216,7 @@ static void task_sample_cputime(struct task_struct *p, u64 *samples)
- 	u64 stime, utime;
- 
- 	task_cputime(p, &utime, &stime);
--	store_samples(samples, stime, utime, p->se.sum_exec_runtime);
-+	store_samples(samples, stime, utime, tsk_seruntime(p));
- }
- 
- static void proc_sample_cputime_atomic(struct task_cputime_atomic *at,
-@@ -801,6 +801,7 @@ static void collect_posix_cputimers(struct posix_cputimers *pct, u64 *samples,
- 	}
- }
- 
-+#ifndef CONFIG_SCHED_ALT
- static inline void check_dl_overrun(struct task_struct *tsk)
- {
- 	if (tsk->dl.dl_overrun) {
-@@ -808,6 +809,7 @@ static inline void check_dl_overrun(struct task_struct *tsk)
- 		__group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
- 	}
- }
-+#endif
- 
- static bool check_rlimit(u64 time, u64 limit, int signo, bool rt, bool hard)
- {
-@@ -835,8 +837,10 @@ static void check_thread_timers(struct task_struct *tsk,
- 	u64 samples[CPUCLOCK_MAX];
- 	unsigned long soft;
- 
-+#ifndef CONFIG_SCHED_ALT
- 	if (dl_task(tsk))
- 		check_dl_overrun(tsk);
-+#endif
- 
- 	if (expiry_cache_is_inactive(pct))
- 		return;
-@@ -850,7 +854,7 @@ static void check_thread_timers(struct task_struct *tsk,
- 	soft = task_rlimit(tsk, RLIMIT_RTTIME);
- 	if (soft != RLIM_INFINITY) {
- 		/* Task RT timeout is accounted in jiffies. RTTIME is usec */
--		unsigned long rttime = tsk->rt.timeout * (USEC_PER_SEC / HZ);
-+		unsigned long rttime = tsk_rttimeout(tsk) * (USEC_PER_SEC / HZ);
- 		unsigned long hard = task_rlimit_max(tsk, RLIMIT_RTTIME);
- 
- 		/* At the hard limit, send SIGKILL. No further action. */
-@@ -1086,8 +1090,10 @@ static inline bool fastpath_timer_check(struct task_struct *tsk)
- 			return true;
- 	}
- 
-+#ifndef CONFIG_SCHED_ALT
- 	if (dl_task(tsk) && tsk->dl.dl_overrun)
- 		return true;
-+#endif
- 
- 	return false;
- }
-diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
-index b5e3496cf803..65f60c77bc50 100644
---- a/kernel/trace/trace_selftest.c
-+++ b/kernel/trace/trace_selftest.c
-@@ -1048,10 +1048,15 @@ static int trace_wakeup_test_thread(void *data)
- {
- 	/* Make this a -deadline thread */
- 	static const struct sched_attr attr = {
-+#ifdef CONFIG_SCHED_ALT
-+		/* No deadline on BMQ/PDS, use RR */
-+		.sched_policy = SCHED_RR,
-+#else
- 		.sched_policy = SCHED_DEADLINE,
- 		.sched_runtime = 100000ULL,
- 		.sched_deadline = 10000000ULL,
- 		.sched_period = 10000000ULL
-+#endif
- 	};
- 	struct wakeup_test_data *x = data;
- 
-diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c
-index f36264fea75c6ca7c34eaa259c0bff829cbf6ac0..d43ca62fd00fe442bda9b4ad548fae432a7436de 100644
---- a/kernel/sched/alt_core.c
-+++ b/kernel/sched/alt_core.c
-@@ -11,6 +11,10 @@
-  *		scheduler by Alfred Chen.
-  *  2019-02-20	BMQ(BitMap Queue) kernel scheduler by Alfred Chen.
-  */
-+#define CREATE_TRACE_POINTS
-+#include <trace/events/sched.h>
-+#undef CREATE_TRACE_POINTS
-+
- #include "sched.h"
- 
- #include <linux/sched/rt.h>
-@@ -42,8 +46,11 @@
- #include "pelt.h"
- #include "smp.h"
- 
--#define CREATE_TRACE_POINTS
--#include <trace/events/sched.h>
-+/*
-+ * Export tracepoints that act as a bare tracehook (ie: have no trace event
-+ * associated with them) to allow external modules to probe them.
-+ */
-+EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp);
- 
- #define ALT_SCHED_VERSION "v5.9-r0"
- 
-diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h
-index 99be2c51c88d0406cced20b36d7230da12930a5c..03f8b8b1aa27eeb15989af25b4050c767da12aad 100644
---- a/kernel/sched/alt_sched.h
-+++ b/kernel/sched/alt_sched.h
-@@ -46,6 +46,8 @@
- 
- #include "cpupri.h"
- 
-+#include <trace/events/sched.h>
-+
- #ifdef CONFIG_SCHED_BMQ
- #include "bmq.h"
- #endif
-@@ -496,6 +498,8 @@ static inline int sched_tick_offload_init(void) { return 0; }
- 
- extern void schedule_idle(void);
- 
-+#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
-+
- /*
-  * !! For sched_setattr_nocheck() (kernel) only !!
-  *
diff --git a/linux59-tkg/linux59-tkg-patches/0011-ZFS-fix.patch b/linux59-tkg/linux59-tkg-patches/0011-ZFS-fix.patch
deleted file mode 100644
index af71d04..0000000
--- a/linux59-tkg/linux59-tkg-patches/0011-ZFS-fix.patch
+++ /dev/null
@@ -1,43 +0,0 @@
-From 1e010beda2896bdf3082fb37a3e49f8ce20e04d8 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?J=C3=B6rg=20Thalheim?= <joerg@thalheim.io>
-Date: Thu, 2 May 2019 05:28:08 +0100
-Subject: [PATCH] x86/fpu: Export kernel_fpu_{begin,end}() with
- EXPORT_SYMBOL_GPL
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-We need these symbols in zfs as the fpu implementation breaks userspace:
-
-https://github.com/zfsonlinux/zfs/issues/9346
-Signed-off-by: Jörg Thalheim <joerg@thalheim.io>
----
- arch/x86/kernel/fpu/core.c | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
-index 12c70840980e..352538b3bb5d 100644
---- a/arch/x86/kernel/fpu/core.c
-+++ b/arch/x86/kernel/fpu/core.c
-@@ -102,7 +102,7 @@ void kernel_fpu_begin(void)
- 	}
- 	__cpu_invalidate_fpregs_state();
- }
--EXPORT_SYMBOL_GPL(kernel_fpu_begin);
-+EXPORT_SYMBOL(kernel_fpu_begin);
- 
- void kernel_fpu_end(void)
- {
-@@ -111,7 +111,7 @@ void kernel_fpu_end(void)
- 	this_cpu_write(in_kernel_fpu, false);
- 	preempt_enable();
- }
--EXPORT_SYMBOL_GPL(kernel_fpu_end);
-+EXPORT_SYMBOL(kernel_fpu_end);
- 
- /*
-  * Save the FPU state (mark it for reload if necessary):
--- 
-2.23.0
-
-
diff --git a/linux59-tkg/linux59-tkg-patches/0012-misc-additions.patch b/linux59-tkg/linux59-tkg-patches/0012-misc-additions.patch
deleted file mode 100644
index a4efaef..0000000
--- a/linux59-tkg/linux59-tkg-patches/0012-misc-additions.patch
+++ /dev/null
@@ -1,54 +0,0 @@
-diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig
-index 0840d27381ea..73aba9a31064 100644
---- a/drivers/tty/Kconfig
-+++ b/drivers/tty/Kconfig
-@@ -75,6 +75,19 @@ config VT_CONSOLE_SLEEP
- 	def_bool y
- 	depends on VT_CONSOLE && PM_SLEEP
- 
-+config NR_TTY_DEVICES
-+        int "Maximum tty device number"
-+        depends on VT
-+        range 12 63
-+        default 63
-+        help
-+          This option is used to change the number of tty devices in /dev.
-+          The default value is 63. The lowest number you can set is 12,
-+          63 is also the upper limit so we don't overrun the serial
-+          consoles.
-+
-+          If unsure, say 63.
-+
- config HW_CONSOLE
- 	bool
- 	depends on VT && !UML
-diff --git a/include/uapi/linux/vt.h b/include/uapi/linux/vt.h
-index e9d39c48520a..3bceead8da40 100644
---- a/include/uapi/linux/vt.h
-+++ b/include/uapi/linux/vt.h
-@@ -3,12 +3,25 @@
- #define _UAPI_LINUX_VT_H
- 
- 
-+/*
-+ * We will make this definition solely for the purpose of making packages
-+ * such as splashutils build, because they can not understand that
-+ * NR_TTY_DEVICES is defined in the kernel configuration.
-+ */
-+#ifndef CONFIG_NR_TTY_DEVICES
-+#define CONFIG_NR_TTY_DEVICES 63
-+#endif
-+
- /*
-  * These constants are also useful for user-level apps (e.g., VC
-  * resizing).
-  */
- #define MIN_NR_CONSOLES 1       /* must be at least 1 */
--#define MAX_NR_CONSOLES	63	/* serial lines start at 64 */
-+/*
-+ * NR_TTY_DEVICES:
-+ * Value MUST be at least 12 and must never be higher then 63
-+ */
-+#define MAX_NR_CONSOLES CONFIG_NR_TTY_DEVICES	/* serial lines start above this */
- 		/* Note: the ioctl VT_GETSTATE does not work for
- 		   consoles 16 and higher (since it returns a short) */
\ No newline at end of file